From 1bc5aee63eb72b341f506ad058502cd0361f0d10 Mon Sep 17 00:00:00 2001
From: Ben Cheng <bccheng@google.com>
Date: Tue, 25 Mar 2014 22:37:19 -0700
Subject: Initial checkin of GCC 4.9.0 from trunk (r208799).

Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
---
 gcc-4.9/gcc/config/README                          |     5 +
 gcc-4.9/gcc/config/aarch64/aarch64-arches.def      |    29 +
 gcc-4.9/gcc/config/aarch64/aarch64-builtins.c      |  1253 +
 gcc-4.9/gcc/config/aarch64/aarch64-cores.def       |    42 +
 gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h       |    33 +
 gcc-4.9/gcc/config/aarch64/aarch64-elf.h           |   161 +
 gcc-4.9/gcc/config/aarch64/aarch64-linux.h         |    47 +
 gcc-4.9/gcc/config/aarch64/aarch64-modes.def       |    55 +
 .../config/aarch64/aarch64-option-extensions.def   |    38 +
 gcc-4.9/gcc/config/aarch64/aarch64-opts.h          |    64 +
 gcc-4.9/gcc/config/aarch64/aarch64-protos.h        |   292 +
 .../gcc/config/aarch64/aarch64-simd-builtins.def   |   395 +
 gcc-4.9/gcc/config/aarch64/aarch64-simd.md         |  4363 ++
 gcc-4.9/gcc/config/aarch64/aarch64-tune.md         |     5 +
 gcc-4.9/gcc/config/aarch64/aarch64.c               |  8544 ++++
 gcc-4.9/gcc/config/aarch64/aarch64.h               |   873 +
 gcc-4.9/gcc/config/aarch64/aarch64.md              |  3642 ++
 gcc-4.9/gcc/config/aarch64/aarch64.opt             |   118 +
 gcc-4.9/gcc/config/aarch64/arm_neon.h              | 25403 ++++++++++
 gcc-4.9/gcc/config/aarch64/atomics.md              |   382 +
 gcc-4.9/gcc/config/aarch64/biarchilp32.h           |    29 +
 gcc-4.9/gcc/config/aarch64/biarchlp64.h            |    29 +
 gcc-4.9/gcc/config/aarch64/constraints.md          |   188 +
 gcc-4.9/gcc/config/aarch64/gentune.sh              |    32 +
 gcc-4.9/gcc/config/aarch64/iterators.md            |   997 +
 gcc-4.9/gcc/config/aarch64/predicates.md           |   302 +
 gcc-4.9/gcc/config/aarch64/t-aarch64               |    45 +
 gcc-4.9/gcc/config/aarch64/t-aarch64-linux         |    31 +
 gcc-4.9/gcc/config/alpha/alpha-modes.def           |    27 +
 gcc-4.9/gcc/config/alpha/alpha-protos.h            |   117 +
 gcc-4.9/gcc/config/alpha/alpha.c                   |  9898 ++++
 gcc-4.9/gcc/config/alpha/alpha.h                   |  1074 +
 gcc-4.9/gcc/config/alpha/alpha.md                  |  6113 +++
 gcc-4.9/gcc/config/alpha/alpha.opt                 |   130 +
 gcc-4.9/gcc/config/alpha/constraints.md            |   120 +
 gcc-4.9/gcc/config/alpha/driver-alpha.c            |    99 +
 gcc-4.9/gcc/config/alpha/elf.h                     |   168 +
 gcc-4.9/gcc/config/alpha/elf.opt                   |    29 +
 gcc-4.9/gcc/config/alpha/ev4.md                    |   161 +
 gcc-4.9/gcc/config/alpha/ev5.md                    |   194 +
 gcc-4.9/gcc/config/alpha/ev6.md                    |   181 +
 gcc-4.9/gcc/config/alpha/freebsd.h                 |    68 +
 gcc-4.9/gcc/config/alpha/linux-elf.h               |    53 +
 gcc-4.9/gcc/config/alpha/linux.h                   |   102 +
 gcc-4.9/gcc/config/alpha/netbsd.h                  |    72 +
 gcc-4.9/gcc/config/alpha/openbsd.h                 |    45 +
 gcc-4.9/gcc/config/alpha/predicates.md             |   653 +
 gcc-4.9/gcc/config/alpha/sync.md                   |   343 +
 gcc-4.9/gcc/config/alpha/t-linux                   |     1 +
 gcc-4.9/gcc/config/alpha/t-vms                     |    21 +
 gcc-4.9/gcc/config/alpha/vms.h                     |   306 +
 gcc-4.9/gcc/config/alpha/x-alpha                   |     3 +
 gcc-4.9/gcc/config/arc/arc-modes.def               |    37 +
 gcc-4.9/gcc/config/arc/arc-opts.h                  |    28 +
 gcc-4.9/gcc/config/arc/arc-protos.h                |   118 +
 gcc-4.9/gcc/config/arc/arc-simd.h                  |   186 +
 gcc-4.9/gcc/config/arc/arc.c                       |  9360 ++++
 gcc-4.9/gcc/config/arc/arc.h                       |  1696 +
 gcc-4.9/gcc/config/arc/arc.md                      |  5165 ++
 gcc-4.9/gcc/config/arc/arc.opt                     |   390 +
 gcc-4.9/gcc/config/arc/arc600.md                   |    63 +
 gcc-4.9/gcc/config/arc/arc700.md                   |   170 +
 gcc-4.9/gcc/config/arc/constraints.md              |   399 +
 gcc-4.9/gcc/config/arc/fpx.md                      |   674 +
 gcc-4.9/gcc/config/arc/predicates.md               |   811 +
 gcc-4.9/gcc/config/arc/simdext.md                  |  1292 +
 gcc-4.9/gcc/config/arc/t-arc-newlib                |    38 +
 gcc-4.9/gcc/config/arc/t-arc-uClibc                |    20 +
 gcc-4.9/gcc/config/arm/README-interworking         |   749 +
 gcc-4.9/gcc/config/arm/aarch-common-protos.h       |   134 +
 gcc-4.9/gcc/config/arm/aarch-common.c              |   353 +
 gcc-4.9/gcc/config/arm/aarch-cost-tables.h         |   325 +
 gcc-4.9/gcc/config/arm/aout.h                      |   303 +
 gcc-4.9/gcc/config/arm/arm-arches.def              |    60 +
 gcc-4.9/gcc/config/arm/arm-c.c                     |    44 +
 gcc-4.9/gcc/config/arm/arm-cores.def               |   159 +
 gcc-4.9/gcc/config/arm/arm-fixed.md                |   429 +
 gcc-4.9/gcc/config/arm/arm-fpus.def                |    46 +
 gcc-4.9/gcc/config/arm/arm-generic.md              |   152 +
 gcc-4.9/gcc/config/arm/arm-ldmstm.ml               |   345 +
 gcc-4.9/gcc/config/arm/arm-modes.def               |    84 +
 gcc-4.9/gcc/config/arm/arm-opts.h                  |    75 +
 gcc-4.9/gcc/config/arm/arm-protos.h                |   297 +
 gcc-4.9/gcc/config/arm/arm-tables.opt              |   439 +
 gcc-4.9/gcc/config/arm/arm-tune.md                 |    34 +
 gcc-4.9/gcc/config/arm/arm.c                       | 31119 ++++++++++++
 gcc-4.9/gcc/config/arm/arm.h                       |  2398 +
 gcc-4.9/gcc/config/arm/arm.md                      | 12928 +++++
 gcc-4.9/gcc/config/arm/arm.opt                     |   277 +
 gcc-4.9/gcc/config/arm/arm1020e.md                 |   385 +
 gcc-4.9/gcc/config/arm/arm1026ejs.md               |   250 +
 gcc-4.9/gcc/config/arm/arm1136jfs.md               |   387 +
 gcc-4.9/gcc/config/arm/arm926ejs.md                |   198 +
 gcc-4.9/gcc/config/arm/arm_acle.h                  |   100 +
 gcc-4.9/gcc/config/arm/arm_neon.h                  | 13429 ++++++
 gcc-4.9/gcc/config/arm/arm_neon_builtins.def       |   212 +
 gcc-4.9/gcc/config/arm/bpabi.h                     |   163 +
 gcc-4.9/gcc/config/arm/coff.h                      |    82 +
 gcc-4.9/gcc/config/arm/constraints.md              |   438 +
 gcc-4.9/gcc/config/arm/cortex-a15-neon.md          |   677 +
 gcc-4.9/gcc/config/arm/cortex-a15.md               |   186 +
 gcc-4.9/gcc/config/arm/cortex-a5.md                |   311 +
 gcc-4.9/gcc/config/arm/cortex-a53.md               |   309 +
 gcc-4.9/gcc/config/arm/cortex-a7.md                |   394 +
 gcc-4.9/gcc/config/arm/cortex-a8-neon.md           |  1534 +
 gcc-4.9/gcc/config/arm/cortex-a8.md                |   279 +
 gcc-4.9/gcc/config/arm/cortex-a9-neon.md           |  1471 +
 gcc-4.9/gcc/config/arm/cortex-a9.md                |   283 +
 gcc-4.9/gcc/config/arm/cortex-m4-fpu.md            |   117 +
 gcc-4.9/gcc/config/arm/cortex-m4.md                |   128 +
 gcc-4.9/gcc/config/arm/cortex-r4.md                |   299 +
 gcc-4.9/gcc/config/arm/cortex-r4f.md               |   161 +
 gcc-4.9/gcc/config/arm/crypto.def                  |    34 +
 gcc-4.9/gcc/config/arm/crypto.md                   |    86 +
 gcc-4.9/gcc/config/arm/driver-arm.c                |   151 +
 gcc-4.9/gcc/config/arm/elf.h                       |   159 +
 gcc-4.9/gcc/config/arm/fa526.md                    |   173 +
 gcc-4.9/gcc/config/arm/fa606te.md                  |   182 +
 gcc-4.9/gcc/config/arm/fa626te.md                  |   177 +
 gcc-4.9/gcc/config/arm/fa726te.md                  |   223 +
 gcc-4.9/gcc/config/arm/fmp626.md                   |   191 +
 gcc-4.9/gcc/config/arm/genopt.sh                   |    95 +
 gcc-4.9/gcc/config/arm/gentune.sh                  |    29 +
 gcc-4.9/gcc/config/arm/iterators.md                |   585 +
 gcc-4.9/gcc/config/arm/iwmmxt.md                   |  1775 +
 gcc-4.9/gcc/config/arm/iwmmxt2.md                  |   903 +
 gcc-4.9/gcc/config/arm/ldmstm.md                   |  1225 +
 gcc-4.9/gcc/config/arm/ldrdstrd.md                 |   260 +
 gcc-4.9/gcc/config/arm/linux-eabi.h                |   122 +
 gcc-4.9/gcc/config/arm/linux-elf.h                 |   115 +
 gcc-4.9/gcc/config/arm/linux-gas.h                 |    55 +
 gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md         |   189 +
 gcc-4.9/gcc/config/arm/marvell-pj4.md              |   232 +
 gcc-4.9/gcc/config/arm/mmintrin.h                  |  1836 +
 gcc-4.9/gcc/config/arm/neon-docgen.ml              |   424 +
 gcc-4.9/gcc/config/arm/neon-gen.ml                 |   520 +
 gcc-4.9/gcc/config/arm/neon-testgen.ml             |   305 +
 gcc-4.9/gcc/config/arm/neon.md                     |  5808 +++
 gcc-4.9/gcc/config/arm/neon.ml                     |  2355 +
 gcc-4.9/gcc/config/arm/netbsd-elf.h                |   154 +
 gcc-4.9/gcc/config/arm/predicates.md               |   677 +
 gcc-4.9/gcc/config/arm/rtems-eabi.h                |    29 +
 gcc-4.9/gcc/config/arm/semi.h                      |    68 +
 gcc-4.9/gcc/config/arm/symbian.h                   |   101 +
 gcc-4.9/gcc/config/arm/sync.md                     |   472 +
 gcc-4.9/gcc/config/arm/t-aprofile                  |   178 +
 gcc-4.9/gcc/config/arm/t-arm                       |   100 +
 gcc-4.9/gcc/config/arm/t-arm-elf                   |    90 +
 gcc-4.9/gcc/config/arm/t-bpabi                     |     1 +
 gcc-4.9/gcc/config/arm/t-linux-androideabi         |    10 +
 gcc-4.9/gcc/config/arm/t-linux-eabi                |    31 +
 gcc-4.9/gcc/config/arm/t-rtems-eabi                |    47 +
 gcc-4.9/gcc/config/arm/t-symbian                   |    26 +
 gcc-4.9/gcc/config/arm/t-vxworks                   |    24 +
 gcc-4.9/gcc/config/arm/thumb2.md                   |  1495 +
 gcc-4.9/gcc/config/arm/types.md                    |  1077 +
 gcc-4.9/gcc/config/arm/uclinux-eabi.h              |    67 +
 gcc-4.9/gcc/config/arm/uclinux-elf.h               |    84 +
 gcc-4.9/gcc/config/arm/unknown-elf.h               |    96 +
 gcc-4.9/gcc/config/arm/unspecs.md                  |   305 +
 gcc-4.9/gcc/config/arm/vec-common.md               |   136 +
 gcc-4.9/gcc/config/arm/vfp.md                      |  1330 +
 gcc-4.9/gcc/config/arm/vfp11.md                    |    93 +
 gcc-4.9/gcc/config/arm/vxworks.h                   |   109 +
 gcc-4.9/gcc/config/arm/vxworks.opt                 |    59 +
 gcc-4.9/gcc/config/arm/x-arm                       |     3 +
 gcc-4.9/gcc/config/avr/avr-arch.h                  |   156 +
 gcc-4.9/gcc/config/avr/avr-c.c                     |   402 +
 gcc-4.9/gcc/config/avr/avr-devices.c               |   114 +
 gcc-4.9/gcc/config/avr/avr-dimode.md               |   479 +
 gcc-4.9/gcc/config/avr/avr-fixed.md                |   497 +
 gcc-4.9/gcc/config/avr/avr-log.c                   |   351 +
 gcc-4.9/gcc/config/avr/avr-mcus.def                |   323 +
 gcc-4.9/gcc/config/avr/avr-modes.def               |    33 +
 gcc-4.9/gcc/config/avr/avr-protos.h                |   164 +
 gcc-4.9/gcc/config/avr/avr-stdint.h                |    66 +
 gcc-4.9/gcc/config/avr/avr-tables.opt              |   766 +
 gcc-4.9/gcc/config/avr/avr.c                       | 12522 +++++
 gcc-4.9/gcc/config/avr/avr.h                       |   606 +
 gcc-4.9/gcc/config/avr/avr.md                      |  6358 +++
 gcc-4.9/gcc/config/avr/avr.opt                     |    84 +
 gcc-4.9/gcc/config/avr/avrlibc.h                   |    30 +
 gcc-4.9/gcc/config/avr/builtins.def                |   169 +
 gcc-4.9/gcc/config/avr/constraints.md              |   238 +
 gcc-4.9/gcc/config/avr/driver-avr.c                |   150 +
 gcc-4.9/gcc/config/avr/elf.h                       |    41 +
 gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c         |   144 +
 gcc-4.9/gcc/config/avr/genmultilib.awk             |   216 +
 gcc-4.9/gcc/config/avr/genopt.sh                   |    59 +
 gcc-4.9/gcc/config/avr/predicates.md               |   275 +
 gcc-4.9/gcc/config/avr/rtems.h                     |    27 +
 gcc-4.9/gcc/config/avr/stdfix.h                    |   236 +
 gcc-4.9/gcc/config/avr/t-avr                       |    83 +
 gcc-4.9/gcc/config/avr/t-multilib                  |   269 +
 gcc-4.9/gcc/config/avr/t-rtems                     |     3 +
 gcc-4.9/gcc/config/bfin/bfin-modes.def             |    28 +
 gcc-4.9/gcc/config/bfin/bfin-opts.h                |    59 +
 gcc-4.9/gcc/config/bfin/bfin-protos.h              |   117 +
 gcc-4.9/gcc/config/bfin/bfin.c                     |  5834 +++
 gcc-4.9/gcc/config/bfin/bfin.h                     |  1156 +
 gcc-4.9/gcc/config/bfin/bfin.md                    |  4202 ++
 gcc-4.9/gcc/config/bfin/bfin.opt                   |   118 +
 gcc-4.9/gcc/config/bfin/constraints.md             |   225 +
 gcc-4.9/gcc/config/bfin/elf.h                      |    74 +
 gcc-4.9/gcc/config/bfin/linux.h                    |    52 +
 gcc-4.9/gcc/config/bfin/predicates.md              |   249 +
 gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh    |    81 +
 gcc-4.9/gcc/config/bfin/rtems.h                    |    28 +
 gcc-4.9/gcc/config/bfin/sync.md                    |   178 +
 gcc-4.9/gcc/config/bfin/t-bfin-elf                 |    49 +
 gcc-4.9/gcc/config/bfin/t-bfin-linux               |    52 +
 gcc-4.9/gcc/config/bfin/t-bfin-uclinux             |    48 +
 gcc-4.9/gcc/config/bfin/t-rtems                    |     6 +
 gcc-4.9/gcc/config/bfin/uclinux.h                  |    38 +
 gcc-4.9/gcc/config/c6x/c6x-isas.def                |    37 +
 gcc-4.9/gcc/config/c6x/c6x-modes.def               |    24 +
 gcc-4.9/gcc/config/c6x/c6x-mult.md                 |   844 +
 gcc-4.9/gcc/config/c6x/c6x-mult.md.in              |   421 +
 gcc-4.9/gcc/config/c6x/c6x-opts.h                  |    35 +
 gcc-4.9/gcc/config/c6x/c6x-protos.h                |    65 +
 gcc-4.9/gcc/config/c6x/c6x-sched.md                |   934 +
 gcc-4.9/gcc/config/c6x/c6x-sched.md.in             |   230 +
 gcc-4.9/gcc/config/c6x/c6x-tables.opt              |    43 +
 gcc-4.9/gcc/config/c6x/c6x.c                       |  6846 +++
 gcc-4.9/gcc/config/c6x/c6x.h                       |   618 +
 gcc-4.9/gcc/config/c6x/c6x.md                      |  3136 ++
 gcc-4.9/gcc/config/c6x/c6x.opt                     |    67 +
 gcc-4.9/gcc/config/c6x/c6x_intrinsics.h            |   194 +
 gcc-4.9/gcc/config/c6x/constraints.md              |   174 +
 gcc-4.9/gcc/config/c6x/elf-common.h                |    37 +
 gcc-4.9/gcc/config/c6x/elf.h                       |    35 +
 gcc-4.9/gcc/config/c6x/genmult.sh                  |    33 +
 gcc-4.9/gcc/config/c6x/genopt.sh                   |    59 +
 gcc-4.9/gcc/config/c6x/gensched.sh                 |    44 +
 gcc-4.9/gcc/config/c6x/predicates.md               |   226 +
 gcc-4.9/gcc/config/c6x/sync.md                     |   270 +
 gcc-4.9/gcc/config/c6x/t-c6x                       |    42 +
 gcc-4.9/gcc/config/c6x/t-c6x-elf                   |    30 +
 gcc-4.9/gcc/config/c6x/t-c6x-uclinux               |     3 +
 gcc-4.9/gcc/config/c6x/uclinux-elf.h               |    63 +
 gcc-4.9/gcc/config/cr16/constraints.md             |    81 +
 gcc-4.9/gcc/config/cr16/cr16-protos.h              |    99 +
 gcc-4.9/gcc/config/cr16/cr16.c                     |  2194 +
 gcc-4.9/gcc/config/cr16/cr16.h                     |   586 +
 gcc-4.9/gcc/config/cr16/cr16.md                    |  1084 +
 gcc-4.9/gcc/config/cr16/cr16.opt                   |    51 +
 gcc-4.9/gcc/config/cr16/predicates.md              |   225 +
 gcc-4.9/gcc/config/cr16/t-cr16                     |    25 +
 gcc-4.9/gcc/config/cris/constraints.md             |   164 +
 gcc-4.9/gcc/config/cris/cris-protos.h              |    67 +
 gcc-4.9/gcc/config/cris/cris.c                     |  4359 ++
 gcc-4.9/gcc/config/cris/cris.h                     |  1081 +
 gcc-4.9/gcc/config/cris/cris.md                    |  5157 ++
 gcc-4.9/gcc/config/cris/cris.opt                   |   202 +
 gcc-4.9/gcc/config/cris/elf.opt                    |    25 +
 gcc-4.9/gcc/config/cris/linux.h                    |   150 +
 gcc-4.9/gcc/config/cris/linux.opt                  |    33 +
 gcc-4.9/gcc/config/cris/predicates.md              |   178 +
 gcc-4.9/gcc/config/cris/sync.md                    |   314 +
 gcc-4.9/gcc/config/cris/t-cris                     |    29 +
 gcc-4.9/gcc/config/cris/t-elfmulti                 |    31 +
 gcc-4.9/gcc/config/cris/t-linux                    |     5 +
 gcc-4.9/gcc/config/darwin-c.c                      |   775 +
 gcc-4.9/gcc/config/darwin-driver.c                 |   224 +
 gcc-4.9/gcc/config/darwin-f.c                      |    60 +
 gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def    |   113 +
 gcc-4.9/gcc/config/darwin-protos.h                 |   127 +
 gcc-4.9/gcc/config/darwin-sections.def             |   195 +
 gcc-4.9/gcc/config/darwin.c                        |  3663 ++
 gcc-4.9/gcc/config/darwin.h                        |   921 +
 gcc-4.9/gcc/config/darwin.opt                      |   393 +
 gcc-4.9/gcc/config/darwin10.h                      |    34 +
 gcc-4.9/gcc/config/darwin9.h                       |    65 +
 gcc-4.9/gcc/config/dbx.h                           |    27 +
 gcc-4.9/gcc/config/dbxcoff.h                       |    62 +
 gcc-4.9/gcc/config/dbxelf.h                        |    68 +
 gcc-4.9/gcc/config/default-c.c                     |    33 +
 gcc-4.9/gcc/config/elfos.h                         |   438 +
 gcc-4.9/gcc/config/epiphany/constraints.md         |   130 +
 gcc-4.9/gcc/config/epiphany/epiphany-modes.def     |    40 +
 gcc-4.9/gcc/config/epiphany/epiphany-protos.h      |    64 +
 gcc-4.9/gcc/config/epiphany/epiphany-sched.md      |   145 +
 gcc-4.9/gcc/config/epiphany/epiphany.c             |  2938 ++
 gcc-4.9/gcc/config/epiphany/epiphany.h             |   945 +
 gcc-4.9/gcc/config/epiphany/epiphany.md            |  2812 ++
 gcc-4.9/gcc/config/epiphany/epiphany.opt           |   148 +
 gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h  |    27 +
 gcc-4.9/gcc/config/epiphany/mode-switch-use.c      |   109 +
 gcc-4.9/gcc/config/epiphany/predicates.md          |   368 +
 gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c     |   200 +
 gcc-4.9/gcc/config/epiphany/t-epiphany             |    38 +
 gcc-4.9/gcc/config/flat.h                          |    22 +
 gcc-4.9/gcc/config/fr30/constraints.md             |    71 +
 gcc-4.9/gcc/config/fr30/fr30-protos.h              |    32 +
 gcc-4.9/gcc/config/fr30/fr30.c                     |  1062 +
 gcc-4.9/gcc/config/fr30/fr30.h                     |   845 +
 gcc-4.9/gcc/config/fr30/fr30.md                    |  1267 +
 gcc-4.9/gcc/config/fr30/fr30.opt                   |    27 +
 gcc-4.9/gcc/config/fr30/predicates.md              |   123 +
 gcc-4.9/gcc/config/freebsd-nthr.h                  |    21 +
 gcc-4.9/gcc/config/freebsd-spec.h                  |   138 +
 gcc-4.9/gcc/config/freebsd-stdint.h                |    56 +
 gcc-4.9/gcc/config/freebsd.h                       |    93 +
 gcc-4.9/gcc/config/freebsd.opt                     |    64 +
 gcc-4.9/gcc/config/frv/constraints.md              |   174 +
 gcc-4.9/gcc/config/frv/frv-asm.h                   |    48 +
 gcc-4.9/gcc/config/frv/frv-modes.def               |    34 +
 gcc-4.9/gcc/config/frv/frv-opts.h                  |    37 +
 gcc-4.9/gcc/config/frv/frv-protos.h                |   178 +
 gcc-4.9/gcc/config/frv/frv.c                       |  9655 ++++
 gcc-4.9/gcc/config/frv/frv.h                       |  2133 +
 gcc-4.9/gcc/config/frv/frv.md                      |  8021 ++++
 gcc-4.9/gcc/config/frv/frv.opt                     |   237 +
 gcc-4.9/gcc/config/frv/linux.h                     |    74 +
 gcc-4.9/gcc/config/frv/predicates.md               |  1543 +
 gcc-4.9/gcc/config/frv/t-frv                       |    36 +
 gcc-4.9/gcc/config/frv/t-linux                     |    24 +
 gcc-4.9/gcc/config/fused-madd.opt                  |    24 +
 gcc-4.9/gcc/config/g.opt                           |    29 +
 gcc-4.9/gcc/config/glibc-c.c                       |    37 +
 gcc-4.9/gcc/config/glibc-stdint.h                  |    55 +
 gcc-4.9/gcc/config/gnu-user.h                      |   128 +
 gcc-4.9/gcc/config/gnu-user.opt                    |    38 +
 gcc-4.9/gcc/config/gnu.h                           |    41 +
 gcc-4.9/gcc/config/h8300/constraints.md            |   214 +
 gcc-4.9/gcc/config/h8300/elf.h                     |    44 +
 gcc-4.9/gcc/config/h8300/genmova.sh                |   198 +
 gcc-4.9/gcc/config/h8300/h8300-protos.h            |   118 +
 gcc-4.9/gcc/config/h8300/h8300.c                   |  6102 +++
 gcc-4.9/gcc/config/h8300/h8300.h                   |   789 +
 gcc-4.9/gcc/config/h8300/h8300.md                  |  6322 +++
 gcc-4.9/gcc/config/h8300/h8300.opt                 |    71 +
 gcc-4.9/gcc/config/h8300/mova.md                   |   858 +
 gcc-4.9/gcc/config/h8300/predicates.md             |   504 +
 gcc-4.9/gcc/config/h8300/rtems.h                   |    29 +
 gcc-4.9/gcc/config/h8300/t-h8300                   |    29 +
 gcc-4.9/gcc/config/h8300/t-rtems                   |     7 +
 gcc-4.9/gcc/config/host-darwin.c                   |    77 +
 gcc-4.9/gcc/config/host-darwin.h                   |    27 +
 gcc-4.9/gcc/config/host-hpux.c                     |   129 +
 gcc-4.9/gcc/config/host-linux.c                    |   228 +
 gcc-4.9/gcc/config/host-openbsd.c                  |    85 +
 gcc-4.9/gcc/config/host-solaris.c                  |   125 +
 gcc-4.9/gcc/config/hpux-stdint.h                   |    34 +
 gcc-4.9/gcc/config/hpux11.opt                      |    32 +
 gcc-4.9/gcc/config/i386/adxintrin.h                |    49 +
 gcc-4.9/gcc/config/i386/ammintrin.h                |    93 +
 gcc-4.9/gcc/config/i386/athlon.md                  |  1186 +
 gcc-4.9/gcc/config/i386/atom.md                    |   794 +
 gcc-4.9/gcc/config/i386/att.h                      |    91 +
 gcc-4.9/gcc/config/i386/avx2intrin.h               |  1889 +
 gcc-4.9/gcc/config/i386/avx512cdintrin.h           |   184 +
 gcc-4.9/gcc/config/i386/avx512erintrin.h           |   394 +
 gcc-4.9/gcc/config/i386/avx512fintrin.h            | 12832 +++++
 gcc-4.9/gcc/config/i386/avx512pfintrin.h           |   212 +
 gcc-4.9/gcc/config/i386/avxintrin.h                |  1463 +
 gcc-4.9/gcc/config/i386/avxmath.h                  |    28 +
 gcc-4.9/gcc/config/i386/bdver1.md                  |   800 +
 gcc-4.9/gcc/config/i386/bdver3.md                  |   748 +
 gcc-4.9/gcc/config/i386/biarch64.h                 |    29 +
 gcc-4.9/gcc/config/i386/biarchx32.h                |    28 +
 gcc-4.9/gcc/config/i386/bmi2intrin.h               |   109 +
 gcc-4.9/gcc/config/i386/bmiintrin.h                |   138 +
 gcc-4.9/gcc/config/i386/bmmintrin.h                |    29 +
 gcc-4.9/gcc/config/i386/bsd.h                      |    99 +
 gcc-4.9/gcc/config/i386/btver2.md                  |  1391 +
 gcc-4.9/gcc/config/i386/constraints.md             |   246 +
 gcc-4.9/gcc/config/i386/core2.md                   |   691 +
 gcc-4.9/gcc/config/i386/cpuid.h                    |   277 +
 gcc-4.9/gcc/config/i386/cross-stdarg.h             |    72 +
 gcc-4.9/gcc/config/i386/crtdll.h                   |    42 +
 gcc-4.9/gcc/config/i386/cygming.h                  |   487 +
 gcc-4.9/gcc/config/i386/cygming.opt                |    60 +
 gcc-4.9/gcc/config/i386/cygwin-stdint.h            |    94 +
 gcc-4.9/gcc/config/i386/cygwin-w64.h               |    83 +
 gcc-4.9/gcc/config/i386/cygwin.h                   |   146 +
 gcc-4.9/gcc/config/i386/darwin.h                   |   313 +
 gcc-4.9/gcc/config/i386/darwin64.h                 |    32 +
 gcc-4.9/gcc/config/i386/djgpp-stdint.h             |    62 +
 gcc-4.9/gcc/config/i386/djgpp.h                    |   178 +
 gcc-4.9/gcc/config/i386/djgpp.opt                  |    28 +
 gcc-4.9/gcc/config/i386/driver-i386.c              |   913 +
 gcc-4.9/gcc/config/i386/emmintrin.h                |  1541 +
 gcc-4.9/gcc/config/i386/f16cintrin.h               |    98 +
 gcc-4.9/gcc/config/i386/fma4intrin.h               |   241 +
 gcc-4.9/gcc/config/i386/fmaintrin.h                |   302 +
 gcc-4.9/gcc/config/i386/freebsd.h                  |   149 +
 gcc-4.9/gcc/config/i386/freebsd64.h                |    44 +
 gcc-4.9/gcc/config/i386/fxsrintrin.h               |    73 +
 gcc-4.9/gcc/config/i386/gas.h                      |   124 +
 gcc-4.9/gcc/config/i386/geode.md                   |   151 +
 gcc-4.9/gcc/config/i386/gmm_malloc.h               |    74 +
 gcc-4.9/gcc/config/i386/gnu-user-common.h          |    72 +
 gcc-4.9/gcc/config/i386/gnu-user.h                 |   164 +
 gcc-4.9/gcc/config/i386/gnu-user64.h               |    99 +
 gcc-4.9/gcc/config/i386/gnu.h                      |    47 +
 gcc-4.9/gcc/config/i386/gstabs.h                   |     7 +
 gcc-4.9/gcc/config/i386/host-cygwin.c              |    78 +
 gcc-4.9/gcc/config/i386/host-i386-darwin.c         |    30 +
 gcc-4.9/gcc/config/i386/host-mingw32.c             |   198 +
 gcc-4.9/gcc/config/i386/i386-builtin-types.awk     |   280 +
 gcc-4.9/gcc/config/i386/i386-builtin-types.def     |   808 +
 gcc-4.9/gcc/config/i386/i386-c.c                   |   546 +
 gcc-4.9/gcc/config/i386/i386-interix.h             |   346 +
 gcc-4.9/gcc/config/i386/i386-modes.def             |    99 +
 gcc-4.9/gcc/config/i386/i386-opts.h                |    96 +
 gcc-4.9/gcc/config/i386/i386-protos.h              |   332 +
 gcc-4.9/gcc/config/i386/i386.c                     | 47138 +++++++++++++++++++
 gcc-4.9/gcc/config/i386/i386.h                     |  2552 +
 gcc-4.9/gcc/config/i386/i386.md                    | 18044 +++++++
 gcc-4.9/gcc/config/i386/i386.opt                   |   796 +
 gcc-4.9/gcc/config/i386/i386elf.h                  |   103 +
 gcc-4.9/gcc/config/i386/ia32intrin.h               |   293 +
 gcc-4.9/gcc/config/i386/immintrin.h                |   177 +
 gcc-4.9/gcc/config/i386/interix.opt                |    34 +
 gcc-4.9/gcc/config/i386/k6.md                      |   266 +
 gcc-4.9/gcc/config/i386/kfreebsd-gnu.h             |    22 +
 gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h           |    27 +
 gcc-4.9/gcc/config/i386/knetbsd-gnu.h              |    21 +
 gcc-4.9/gcc/config/i386/kopensolaris-gnu.h         |    21 +
 gcc-4.9/gcc/config/i386/linux-common.h             |    55 +
 gcc-4.9/gcc/config/i386/linux.h                    |    23 +
 gcc-4.9/gcc/config/i386/linux64.h                  |    32 +
 gcc-4.9/gcc/config/i386/lwpintrin.h                |   105 +
 gcc-4.9/gcc/config/i386/lynx.h                     |    87 +
 gcc-4.9/gcc/config/i386/lzcntintrin.h              |    75 +
 gcc-4.9/gcc/config/i386/mingw-pthread.h            |    21 +
 gcc-4.9/gcc/config/i386/mingw-stdint.h             |    50 +
 gcc-4.9/gcc/config/i386/mingw-w64.h                |    86 +
 gcc-4.9/gcc/config/i386/mingw-w64.opt              |    25 +
 gcc-4.9/gcc/config/i386/mingw.opt                  |    35 +
 gcc-4.9/gcc/config/i386/mingw32.h                  |   248 +
 gcc-4.9/gcc/config/i386/mm3dnow.h                  |   218 +
 gcc-4.9/gcc/config/i386/mmintrin.h                 |   942 +
 gcc-4.9/gcc/config/i386/mmx.md                     |  1613 +
 gcc-4.9/gcc/config/i386/msformat-c.c               |   195 +
 gcc-4.9/gcc/config/i386/netbsd-elf.h               |   121 +
 gcc-4.9/gcc/config/i386/netbsd64.h                 |    69 +
 gcc-4.9/gcc/config/i386/nmmintrin.h                |    33 +
 gcc-4.9/gcc/config/i386/nto.h                      |   105 +
 gcc-4.9/gcc/config/i386/nto.opt                    |    32 +
 gcc-4.9/gcc/config/i386/openbsd.h                  |   101 +
 gcc-4.9/gcc/config/i386/openbsdelf.h               |   119 +
 gcc-4.9/gcc/config/i386/pentium.md                 |   306 +
 gcc-4.9/gcc/config/i386/pmm_malloc.h               |    57 +
 gcc-4.9/gcc/config/i386/pmmintrin.h                |   132 +
 gcc-4.9/gcc/config/i386/popcntintrin.h             |    53 +
 gcc-4.9/gcc/config/i386/ppro.md                    |   758 +
 gcc-4.9/gcc/config/i386/predicates.md              |  1424 +
 gcc-4.9/gcc/config/i386/prfchwintrin.h             |    37 +
 gcc-4.9/gcc/config/i386/rdos.h                     |    39 +
 gcc-4.9/gcc/config/i386/rdos64.h                   |    24 +
 gcc-4.9/gcc/config/i386/rdseedintrin.h             |    66 +
 gcc-4.9/gcc/config/i386/rtemself.h                 |    40 +
 gcc-4.9/gcc/config/i386/rtmintrin.h                |    84 +
 gcc-4.9/gcc/config/i386/shaintrin.h                |    98 +
 gcc-4.9/gcc/config/i386/slm.md                     |   758 +
 gcc-4.9/gcc/config/i386/smmintrin.h                |   862 +
 gcc-4.9/gcc/config/i386/sol2-9.h                   |    23 +
 gcc-4.9/gcc/config/i386/sol2-bi.h                  |   109 +
 gcc-4.9/gcc/config/i386/sol2.h                     |   189 +
 gcc-4.9/gcc/config/i386/sse.md                     | 15507 ++++++
 gcc-4.9/gcc/config/i386/ssemath.h                  |    28 +
 gcc-4.9/gcc/config/i386/stringop.def               |    37 +
 gcc-4.9/gcc/config/i386/stringop.opt               |    31 +
 gcc-4.9/gcc/config/i386/subst.md                   |   198 +
 gcc-4.9/gcc/config/i386/sync.md                    |   606 +
 gcc-4.9/gcc/config/i386/sysv4.h                    |    72 +
 gcc-4.9/gcc/config/i386/t-cygming                  |    48 +
 gcc-4.9/gcc/config/i386/t-cygwin-w64               |     3 +
 gcc-4.9/gcc/config/i386/t-darwin                   |     2 +
 gcc-4.9/gcc/config/i386/t-darwin64                 |     2 +
 gcc-4.9/gcc/config/i386/t-gmm_malloc               |     6 +
 gcc-4.9/gcc/config/i386/t-gnu                      |     1 +
 gcc-4.9/gcc/config/i386/t-i386                     |    31 +
 gcc-4.9/gcc/config/i386/t-interix                  |    30 +
 gcc-4.9/gcc/config/i386/t-kfreebsd                 |     5 +
 gcc-4.9/gcc/config/i386/t-linux                    |     1 +
 gcc-4.9/gcc/config/i386/t-linux64                  |    38 +
 gcc-4.9/gcc/config/i386/t-mingw-w32                |     3 +
 gcc-4.9/gcc/config/i386/t-mingw-w64                |     3 +
 gcc-4.9/gcc/config/i386/t-openbsd                  |     4 +
 gcc-4.9/gcc/config/i386/t-pmm_malloc               |     6 +
 gcc-4.9/gcc/config/i386/t-rtems                    |    26 +
 gcc-4.9/gcc/config/i386/t-sol2-64                  |    21 +
 gcc-4.9/gcc/config/i386/t-vxworks                  |     8 +
 gcc-4.9/gcc/config/i386/t-vxworksae                |     5 +
 gcc-4.9/gcc/config/i386/tbmintrin.h                |   180 +
 gcc-4.9/gcc/config/i386/tmmintrin.h                |   249 +
 gcc-4.9/gcc/config/i386/unix.h                     |    80 +
 gcc-4.9/gcc/config/i386/vx-common.h                |    33 +
 gcc-4.9/gcc/config/i386/vxworks.h                  |    73 +
 gcc-4.9/gcc/config/i386/vxworksae.h                |    35 +
 gcc-4.9/gcc/config/i386/winnt-cxx.c                |   184 +
 gcc-4.9/gcc/config/i386/winnt-stubs.c              |    51 +
 gcc-4.9/gcc/config/i386/winnt.c                    |  1304 +
 gcc-4.9/gcc/config/i386/wmmintrin.h                |   132 +
 gcc-4.9/gcc/config/i386/x-cygwin                   |     4 +
 gcc-4.9/gcc/config/i386/x-darwin                   |     3 +
 gcc-4.9/gcc/config/i386/x-i386                     |     3 +
 gcc-4.9/gcc/config/i386/x-mingw32                  |    31 +
 gcc-4.9/gcc/config/i386/x86-64.h                   |   108 +
 gcc-4.9/gcc/config/i386/x86-tune.def               |   525 +
 gcc-4.9/gcc/config/i386/x86intrin.h                |    78 +
 gcc-4.9/gcc/config/i386/xm-cygwin.h                |    21 +
 gcc-4.9/gcc/config/i386/xm-djgpp.h                 |    83 +
 gcc-4.9/gcc/config/i386/xm-mingw32.h               |    40 +
 gcc-4.9/gcc/config/i386/xmmintrin.h                |  1265 +
 gcc-4.9/gcc/config/i386/xopintrin.h                |   844 +
 gcc-4.9/gcc/config/i386/xsaveintrin.h              |    72 +
 gcc-4.9/gcc/config/i386/xsaveoptintrin.h           |    58 +
 gcc-4.9/gcc/config/i386/xtestintrin.h              |    51 +
 gcc-4.9/gcc/config/ia64/constraints.md             |   154 +
 gcc-4.9/gcc/config/ia64/div.md                     |  1221 +
 gcc-4.9/gcc/config/ia64/elf.h                      |    68 +
 gcc-4.9/gcc/config/ia64/freebsd.h                  |    52 +
 gcc-4.9/gcc/config/ia64/hpux-unix2003.h            |     8 +
 gcc-4.9/gcc/config/ia64/hpux.h                     |   234 +
 gcc-4.9/gcc/config/ia64/ia64-c.c                   |   191 +
 gcc-4.9/gcc/config/ia64/ia64-modes.def             |    86 +
 gcc-4.9/gcc/config/ia64/ia64-opts.h                |    34 +
 gcc-4.9/gcc/config/ia64/ia64-protos.h              |   104 +
 gcc-4.9/gcc/config/ia64/ia64.c                     | 11762 +++++
 gcc-4.9/gcc/config/ia64/ia64.h                     |  1724 +
 gcc-4.9/gcc/config/ia64/ia64.md                    |  5242 +++
 gcc-4.9/gcc/config/ia64/ia64.opt                   |   198 +
 gcc-4.9/gcc/config/ia64/ia64intrin.h               |     2 +
 gcc-4.9/gcc/config/ia64/ilp32.opt                  |     7 +
 gcc-4.9/gcc/config/ia64/itanium2.md                |  1867 +
 gcc-4.9/gcc/config/ia64/linux.h                    |    90 +
 gcc-4.9/gcc/config/ia64/predicates.md              |   636 +
 gcc-4.9/gcc/config/ia64/sync.md                    |   330 +
 gcc-4.9/gcc/config/ia64/sysv4.h                    |   144 +
 gcc-4.9/gcc/config/ia64/t-hpux                     |    28 +
 gcc-4.9/gcc/config/ia64/t-ia64                     |    28 +
 gcc-4.9/gcc/config/ia64/t-linux                    |     1 +
 gcc-4.9/gcc/config/ia64/vect.md                    |  1569 +
 gcc-4.9/gcc/config/ia64/vms.h                      |   156 +
 gcc-4.9/gcc/config/ia64/vms.opt                    |    29 +
 gcc-4.9/gcc/config/initfini-array.h                |    40 +
 gcc-4.9/gcc/config/iq2000/abi                      |   239 +
 gcc-4.9/gcc/config/iq2000/constraints.md           |    79 +
 gcc-4.9/gcc/config/iq2000/iq2000-opts.h            |    32 +
 gcc-4.9/gcc/config/iq2000/iq2000-protos.h          |    48 +
 gcc-4.9/gcc/config/iq2000/iq2000.c                 |  3474 ++
 gcc-4.9/gcc/config/iq2000/iq2000.h                 |   871 +
 gcc-4.9/gcc/config/iq2000/iq2000.md                |  2179 +
 gcc-4.9/gcc/config/iq2000/iq2000.opt               |    74 +
 gcc-4.9/gcc/config/iq2000/predicates.md            |   240 +
 gcc-4.9/gcc/config/kfreebsd-gnu.h                  |    35 +
 gcc-4.9/gcc/config/knetbsd-gnu.h                   |    35 +
 gcc-4.9/gcc/config/kopensolaris-gnu.h              |    34 +
 gcc-4.9/gcc/config/linux-android.h                 |    59 +
 gcc-4.9/gcc/config/linux-android.opt               |    30 +
 gcc-4.9/gcc/config/linux-protos.h                  |    22 +
 gcc-4.9/gcc/config/linux.c                         |    46 +
 gcc-4.9/gcc/config/linux.h                         |   129 +
 gcc-4.9/gcc/config/linux.opt                       |    32 +
 gcc-4.9/gcc/config/lm32/constraints.md             |    57 +
 gcc-4.9/gcc/config/lm32/lm32-protos.h              |    38 +
 gcc-4.9/gcc/config/lm32/lm32.c                     |  1227 +
 gcc-4.9/gcc/config/lm32/lm32.h                     |   545 +
 gcc-4.9/gcc/config/lm32/lm32.md                    |  1015 +
 gcc-4.9/gcc/config/lm32/lm32.opt                   |    40 +
 gcc-4.9/gcc/config/lm32/predicates.md              |    72 +
 gcc-4.9/gcc/config/lm32/rtems.h                    |    32 +
 gcc-4.9/gcc/config/lm32/t-lm32                     |     2 +
 gcc-4.9/gcc/config/lm32/t-rtems                    |    21 +
 gcc-4.9/gcc/config/lm32/uclinux-elf.h              |    78 +
 gcc-4.9/gcc/config/lynx.h                          |   176 +
 gcc-4.9/gcc/config/lynx.opt                        |    31 +
 gcc-4.9/gcc/config/m32c/addsub.md                  |   259 +
 gcc-4.9/gcc/config/m32c/bitops.md                  |   421 +
 gcc-4.9/gcc/config/m32c/blkmov.md                  |   241 +
 gcc-4.9/gcc/config/m32c/cond.md                    |   309 +
 gcc-4.9/gcc/config/m32c/constraints.md             |   225 +
 gcc-4.9/gcc/config/m32c/jump.md                    |   134 +
 gcc-4.9/gcc/config/m32c/m32c-modes.def             |    28 +
 gcc-4.9/gcc/config/m32c/m32c-pragma.c              |   129 +
 gcc-4.9/gcc/config/m32c/m32c-protos.h              |    84 +
 gcc-4.9/gcc/config/m32c/m32c.abi                   |   131 +
 gcc-4.9/gcc/config/m32c/m32c.c                     |  4544 ++
 gcc-4.9/gcc/config/m32c/m32c.h                     |   647 +
 gcc-4.9/gcc/config/m32c/m32c.md                    |    79 +
 gcc-4.9/gcc/config/m32c/m32c.opt                   |    43 +
 gcc-4.9/gcc/config/m32c/minmax.md                  |    57 +
 gcc-4.9/gcc/config/m32c/mov.md                     |   490 +
 gcc-4.9/gcc/config/m32c/muldiv.md                  |   287 +
 gcc-4.9/gcc/config/m32c/predicates.md              |   294 +
 gcc-4.9/gcc/config/m32c/prologue.md                |   201 +
 gcc-4.9/gcc/config/m32c/rtems.h                    |    33 +
 gcc-4.9/gcc/config/m32c/shift.md                   |   351 +
 gcc-4.9/gcc/config/m32c/t-m32c                     |    42 +
 gcc-4.9/gcc/config/m32r/constraints.md             |   147 +
 gcc-4.9/gcc/config/m32r/linux.h                    |    91 +
 gcc-4.9/gcc/config/m32r/little.h                   |    20 +
 gcc-4.9/gcc/config/m32r/m32r-opts.h                |   108 +
 gcc-4.9/gcc/config/m32r/m32r-protos.h              |    62 +
 gcc-4.9/gcc/config/m32r/m32r.c                     |  2921 ++
 gcc-4.9/gcc/config/m32r/m32r.h                     |  1051 +
 gcc-4.9/gcc/config/m32r/m32r.md                    |  2276 +
 gcc-4.9/gcc/config/m32r/m32r.opt                   |   117 +
 gcc-4.9/gcc/config/m32r/predicates.md              |   440 +
 gcc-4.9/gcc/config/m32r/rtems.h                    |    33 +
 gcc-4.9/gcc/config/m32r/t-linux                    |    20 +
 gcc-4.9/gcc/config/m32r/t-m32r                     |    31 +
 gcc-4.9/gcc/config/m68k/cf.md                      |  2250 +
 gcc-4.9/gcc/config/m68k/constraints.md             |   165 +
 gcc-4.9/gcc/config/m68k/genopt.sh                  |    92 +
 gcc-4.9/gcc/config/m68k/ieee.opt                   |    24 +
 gcc-4.9/gcc/config/m68k/linux.h                    |   241 +
 gcc-4.9/gcc/config/m68k/m68020-elf.h               |    30 +
 gcc-4.9/gcc/config/m68k/m68k-devices.def           |   195 +
 gcc-4.9/gcc/config/m68k/m68k-isas.def              |    41 +
 gcc-4.9/gcc/config/m68k/m68k-microarchs.def        |    44 +
 gcc-4.9/gcc/config/m68k/m68k-modes.def             |    25 +
 gcc-4.9/gcc/config/m68k/m68k-none.h                |    19 +
 gcc-4.9/gcc/config/m68k/m68k-opts.h                |    44 +
 gcc-4.9/gcc/config/m68k/m68k-protos.h              |   101 +
 gcc-4.9/gcc/config/m68k/m68k-tables.opt            |   445 +
 gcc-4.9/gcc/config/m68k/m68k.c                     |  6530 +++
 gcc-4.9/gcc/config/m68k/m68k.h                     |   980 +
 gcc-4.9/gcc/config/m68k/m68k.md                    |  7585 +++
 gcc-4.9/gcc/config/m68k/m68k.opt                   |   195 +
 gcc-4.9/gcc/config/m68k/m68kelf.h                  |   148 +
 gcc-4.9/gcc/config/m68k/m68kemb.h                  |    52 +
 gcc-4.9/gcc/config/m68k/math-68881.h               |   529 +
 gcc-4.9/gcc/config/m68k/netbsd-elf.h               |   297 +
 gcc-4.9/gcc/config/m68k/openbsd.h                  |    90 +
 gcc-4.9/gcc/config/m68k/predicates.md              |   246 +
 gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh    |    81 +
 gcc-4.9/gcc/config/m68k/rtemself.h                 |    33 +
 gcc-4.9/gcc/config/m68k/sync.md                    |    80 +
 gcc-4.9/gcc/config/m68k/t-cf                       |     7 +
 gcc-4.9/gcc/config/m68k/t-linux                    |    31 +
 gcc-4.9/gcc/config/m68k/t-m68k                     |     4 +
 gcc-4.9/gcc/config/m68k/t-m68kbare                 |     4 +
 gcc-4.9/gcc/config/m68k/t-mlibs                    |    94 +
 gcc-4.9/gcc/config/m68k/t-openbsd                  |     4 +
 gcc-4.9/gcc/config/m68k/t-opts                     |     5 +
 gcc-4.9/gcc/config/m68k/t-rtems                    |     9 +
 gcc-4.9/gcc/config/m68k/t-uclinux                  |    33 +
 gcc-4.9/gcc/config/m68k/uclinux.h                  |    69 +
 gcc-4.9/gcc/config/m68k/uclinux.opt                |    35 +
 gcc-4.9/gcc/config/mcore/constraints.md            |   111 +
 gcc-4.9/gcc/config/mcore/mcore-elf.h               |   125 +
 gcc-4.9/gcc/config/mcore/mcore-protos.h            |    68 +
 gcc-4.9/gcc/config/mcore/mcore.c                   |  3184 ++
 gcc-4.9/gcc/config/mcore/mcore.h                   |   839 +
 gcc-4.9/gcc/config/mcore/mcore.md                  |  3057 ++
 gcc-4.9/gcc/config/mcore/mcore.opt                 |    75 +
 gcc-4.9/gcc/config/mcore/predicates.md             |   338 +
 gcc-4.9/gcc/config/mcore/t-mcore                   |    29 +
 gcc-4.9/gcc/config/mep/constraints.md              |   162 +
 gcc-4.9/gcc/config/mep/default.h                   |    10 +
 gcc-4.9/gcc/config/mep/intrinsics.h                |   620 +
 gcc-4.9/gcc/config/mep/intrinsics.md               | 21568 +++++++++
 gcc-4.9/gcc/config/mep/ivc2-template.h             |     9 +
 gcc-4.9/gcc/config/mep/mep-c5.cpu                  |   277 +
 gcc-4.9/gcc/config/mep/mep-core.cpu                |  3080 ++
 gcc-4.9/gcc/config/mep/mep-default.cpu             |    25 +
 gcc-4.9/gcc/config/mep/mep-ext-cop.cpu             |    23 +
 gcc-4.9/gcc/config/mep/mep-intrin.h                |  8933 ++++
 gcc-4.9/gcc/config/mep/mep-ivc2.cpu                |  9775 ++++
 gcc-4.9/gcc/config/mep/mep-pragma.c                |   404 +
 gcc-4.9/gcc/config/mep/mep-protos.h                |   128 +
 gcc-4.9/gcc/config/mep/mep.c                       |  7303 +++
 gcc-4.9/gcc/config/mep/mep.cpu                     |    21 +
 gcc-4.9/gcc/config/mep/mep.h                       |   793 +
 gcc-4.9/gcc/config/mep/mep.md                      |  2256 +
 gcc-4.9/gcc/config/mep/mep.opt                     |   164 +
 gcc-4.9/gcc/config/mep/predicates.md               |   184 +
 gcc-4.9/gcc/config/mep/t-mep                       |    68 +
 gcc-4.9/gcc/config/microblaze/constraints.md       |    77 +
 gcc-4.9/gcc/config/microblaze/linux.h              |    45 +
 gcc-4.9/gcc/config/microblaze/microblaze-c.c       |   105 +
 gcc-4.9/gcc/config/microblaze/microblaze-protos.h  |    62 +
 gcc-4.9/gcc/config/microblaze/microblaze.c         |  3594 ++
 gcc-4.9/gcc/config/microblaze/microblaze.h         |   937 +
 gcc-4.9/gcc/config/microblaze/microblaze.md        |  2264 +
 gcc-4.9/gcc/config/microblaze/microblaze.opt       |   127 +
 gcc-4.9/gcc/config/microblaze/predicates.md        |   129 +
 gcc-4.9/gcc/config/microblaze/rtems.h              |    25 +
 gcc-4.9/gcc/config/microblaze/sync.md              |    43 +
 gcc-4.9/gcc/config/microblaze/t-microblaze         |    12 +
 gcc-4.9/gcc/config/microblaze/t-microblaze-linux   |     3 +
 gcc-4.9/gcc/config/microblaze/t-rtems              |     1 +
 gcc-4.9/gcc/config/mips/10000.md                   |   251 +
 gcc-4.9/gcc/config/mips/20kc.md                    |   284 +
 gcc-4.9/gcc/config/mips/24k.md                     |   545 +
 gcc-4.9/gcc/config/mips/3000.md                    |    71 +
 gcc-4.9/gcc/config/mips/4000.md                    |    32 +
 gcc-4.9/gcc/config/mips/4100.md                    |    51 +
 gcc-4.9/gcc/config/mips/4130.md                    |   153 +
 gcc-4.9/gcc/config/mips/4300.md                    |    85 +
 gcc-4.9/gcc/config/mips/4600.md                    |   130 +
 gcc-4.9/gcc/config/mips/4k.md                      |   153 +
 gcc-4.9/gcc/config/mips/5000.md                    |    80 +
 gcc-4.9/gcc/config/mips/5400.md                    |   184 +
 gcc-4.9/gcc/config/mips/5500.md                    |   227 +
 gcc-4.9/gcc/config/mips/5k.md                      |   229 +
 gcc-4.9/gcc/config/mips/6000.md                    |    56 +
 gcc-4.9/gcc/config/mips/7000.md                    |   214 +
 gcc-4.9/gcc/config/mips/74k.md                     |   501 +
 gcc-4.9/gcc/config/mips/9000.md                    |   151 +
 gcc-4.9/gcc/config/mips/constraints.md             |   363 +
 gcc-4.9/gcc/config/mips/driver-native.c            |    89 +
 gcc-4.9/gcc/config/mips/elf.h                      |    50 +
 gcc-4.9/gcc/config/mips/elfoabi.h                  |    39 +
 gcc-4.9/gcc/config/mips/elforion.h                 |    20 +
 gcc-4.9/gcc/config/mips/generic.md                 |   121 +
 gcc-4.9/gcc/config/mips/genopt.sh                  |   123 +
 gcc-4.9/gcc/config/mips/gnu-user.h                 |   139 +
 gcc-4.9/gcc/config/mips/gnu-user64.h               |    52 +
 gcc-4.9/gcc/config/mips/linux-common.h             |    64 +
 gcc-4.9/gcc/config/mips/linux.h                    |    25 +
 gcc-4.9/gcc/config/mips/linux64.h                  |    44 +
 gcc-4.9/gcc/config/mips/loongson.h                 |   690 +
 gcc-4.9/gcc/config/mips/loongson.md                |   939 +
 gcc-4.9/gcc/config/mips/loongson2ef.md             |   252 +
 gcc-4.9/gcc/config/mips/loongson3a.md              |   137 +
 gcc-4.9/gcc/config/mips/micromips.md               |   138 +
 gcc-4.9/gcc/config/mips/mips-cpus.def              |   154 +
 gcc-4.9/gcc/config/mips/mips-dsp.md                |  1205 +
 gcc-4.9/gcc/config/mips/mips-dspr2.md              |   632 +
 gcc-4.9/gcc/config/mips/mips-fixed.md              |   156 +
 gcc-4.9/gcc/config/mips/mips-ftypes.def            |   129 +
 gcc-4.9/gcc/config/mips/mips-modes.def             |    48 +
 gcc-4.9/gcc/config/mips/mips-opts.h                |    50 +
 gcc-4.9/gcc/config/mips/mips-protos.h              |   362 +
 gcc-4.9/gcc/config/mips/mips-ps-3d.md              |   764 +
 gcc-4.9/gcc/config/mips/mips-tables.opt            |   644 +
 gcc-4.9/gcc/config/mips/mips.c                     | 19139 ++++++++
 gcc-4.9/gcc/config/mips/mips.h                     |  3005 ++
 gcc-4.9/gcc/config/mips/mips.md                    |  7190 +++
 gcc-4.9/gcc/config/mips/mips.opt                   |   404 +
 gcc-4.9/gcc/config/mips/mti-elf.h                  |    42 +
 gcc-4.9/gcc/config/mips/mti-linux.h                |    46 +
 gcc-4.9/gcc/config/mips/n32-elf.h                  |    35 +
 gcc-4.9/gcc/config/mips/netbsd.h                   |   179 +
 gcc-4.9/gcc/config/mips/octeon.md                  |   136 +
 gcc-4.9/gcc/config/mips/predicates.md              |   494 +
 gcc-4.9/gcc/config/mips/r3900.h                    |    39 +
 gcc-4.9/gcc/config/mips/rtems.h                    |    34 +
 gcc-4.9/gcc/config/mips/sb1.md                     |   573 +
 gcc-4.9/gcc/config/mips/sde.h                      |   103 +
 gcc-4.9/gcc/config/mips/sde.opt                    |    28 +
 gcc-4.9/gcc/config/mips/sdemtk.h                   |   102 +
 gcc-4.9/gcc/config/mips/sr71k.md                   |   337 +
 gcc-4.9/gcc/config/mips/st.h                       |    30 +
 gcc-4.9/gcc/config/mips/sync.md                    |   716 +
 gcc-4.9/gcc/config/mips/t-elf                      |    23 +
 gcc-4.9/gcc/config/mips/t-irix6                    |     4 +
 gcc-4.9/gcc/config/mips/t-isa3264                  |    33 +
 gcc-4.9/gcc/config/mips/t-linux64                  |    26 +
 gcc-4.9/gcc/config/mips/t-mips                     |    22 +
 gcc-4.9/gcc/config/mips/t-mti-elf                  |    50 +
 gcc-4.9/gcc/config/mips/t-mti-linux                |    50 +
 gcc-4.9/gcc/config/mips/t-r3900                    |    23 +
 gcc-4.9/gcc/config/mips/t-rtems                    |    34 +
 gcc-4.9/gcc/config/mips/t-sb1                      |    62 +
 gcc-4.9/gcc/config/mips/t-sde                      |    37 +
 gcc-4.9/gcc/config/mips/t-sdemtk                   |    40 +
 gcc-4.9/gcc/config/mips/t-sr71k                    |    21 +
 gcc-4.9/gcc/config/mips/t-st                       |    30 +
 gcc-4.9/gcc/config/mips/t-vr                       |   106 +
 gcc-4.9/gcc/config/mips/t-vxworks                  |    35 +
 gcc-4.9/gcc/config/mips/vr.h                       |    58 +
 gcc-4.9/gcc/config/mips/vxworks.h                  |    78 +
 gcc-4.9/gcc/config/mips/x-native                   |     3 +
 gcc-4.9/gcc/config/mips/xlp.md                     |   213 +
 gcc-4.9/gcc/config/mips/xlr.md                     |    94 +
 gcc-4.9/gcc/config/mmix/constraints.md             |   116 +
 gcc-4.9/gcc/config/mmix/mmix-modes.def             |    49 +
 gcc-4.9/gcc/config/mmix/mmix-protos.h              |    85 +
 gcc-4.9/gcc/config/mmix/mmix.c                     |  2799 ++
 gcc-4.9/gcc/config/mmix/mmix.h                     |   831 +
 gcc-4.9/gcc/config/mmix/mmix.md                    |  1243 +
 gcc-4.9/gcc/config/mmix/mmix.opt                   |    99 +
 gcc-4.9/gcc/config/mmix/predicates.md              |   174 +
 gcc-4.9/gcc/config/mmix/t-mmix                     |    20 +
 gcc-4.9/gcc/config/mn10300/constraints.md          |   107 +
 gcc-4.9/gcc/config/mn10300/linux.h                 |    87 +
 gcc-4.9/gcc/config/mn10300/mn10300-modes.def       |    23 +
 gcc-4.9/gcc/config/mn10300/mn10300-opts.h          |    31 +
 gcc-4.9/gcc/config/mn10300/mn10300-protos.h        |    48 +
 gcc-4.9/gcc/config/mn10300/mn10300.c               |  3426 ++
 gcc-4.9/gcc/config/mn10300/mn10300.h               |   732 +
 gcc-4.9/gcc/config/mn10300/mn10300.md              |  2217 +
 gcc-4.9/gcc/config/mn10300/mn10300.opt             |    67 +
 gcc-4.9/gcc/config/mn10300/predicates.md           |    73 +
 gcc-4.9/gcc/config/mn10300/t-mn10300               |    20 +
 gcc-4.9/gcc/config/moxie/constraints.md            |    56 +
 gcc-4.9/gcc/config/moxie/moxie-protos.h            |    24 +
 gcc-4.9/gcc/config/moxie/moxie.c                   |   614 +
 gcc-4.9/gcc/config/moxie/moxie.h                   |   491 +
 gcc-4.9/gcc/config/moxie/moxie.md                  |   450 +
 gcc-4.9/gcc/config/moxie/moxie.opt                 |    31 +
 gcc-4.9/gcc/config/moxie/predicates.md             |    55 +
 gcc-4.9/gcc/config/moxie/rtems.h                   |    35 +
 gcc-4.9/gcc/config/moxie/t-moxie                   |    23 +
 gcc-4.9/gcc/config/moxie/uclinux.h                 |    42 +
 gcc-4.9/gcc/config/msp430/README.txt               |     7 +
 gcc-4.9/gcc/config/msp430/constraints.md           |    85 +
 gcc-4.9/gcc/config/msp430/msp430-c.c               |    36 +
 gcc-4.9/gcc/config/msp430/msp430-modes.def         |     3 +
 gcc-4.9/gcc/config/msp430/msp430-protos.h          |    49 +
 gcc-4.9/gcc/config/msp430/msp430.c                 |  2338 +
 gcc-4.9/gcc/config/msp430/msp430.h                 |   411 +
 gcc-4.9/gcc/config/msp430/msp430.md                |  1370 +
 gcc-4.9/gcc/config/msp430/msp430.opt               |    38 +
 gcc-4.9/gcc/config/msp430/predicates.md            |    80 +
 gcc-4.9/gcc/config/msp430/t-msp430                 |   257 +
 gcc-4.9/gcc/config/nds32/constants.md              |    46 +
 gcc-4.9/gcc/config/nds32/constraints.md            |   254 +
 gcc-4.9/gcc/config/nds32/iterators.md              |    55 +
 gcc-4.9/gcc/config/nds32/nds32-doubleword.md       |   251 +
 gcc-4.9/gcc/config/nds32/nds32-intrinsic.md        |    97 +
 gcc-4.9/gcc/config/nds32/nds32-modes.def           |    21 +
 gcc-4.9/gcc/config/nds32/nds32-multiple.md         |   410 +
 gcc-4.9/gcc/config/nds32/nds32-opts.h              |    35 +
 gcc-4.9/gcc/config/nds32/nds32-peephole2.md        |    25 +
 gcc-4.9/gcc/config/nds32/nds32-protos.h            |   128 +
 gcc-4.9/gcc/config/nds32/nds32.c                   |  5724 +++
 gcc-4.9/gcc/config/nds32/nds32.h                   |   993 +
 gcc-4.9/gcc/config/nds32/nds32.md                  |  2221 +
 gcc-4.9/gcc/config/nds32/nds32.opt                 |   102 +
 gcc-4.9/gcc/config/nds32/nds32_intrinsic.h         |    37 +
 gcc-4.9/gcc/config/nds32/pipelines.md              |    29 +
 gcc-4.9/gcc/config/nds32/predicates.md             |    92 +
 gcc-4.9/gcc/config/nds32/t-mlibs                   |    38 +
 gcc-4.9/gcc/config/netbsd-elf.h                    |    86 +
 gcc-4.9/gcc/config/netbsd-elf.opt                  |    32 +
 gcc-4.9/gcc/config/netbsd.h                        |   177 +
 gcc-4.9/gcc/config/netbsd.opt                      |    35 +
 gcc-4.9/gcc/config/newlib-stdint.h                 |    64 +
 gcc-4.9/gcc/config/nios2/constraints.md            |    89 +
 gcc-4.9/gcc/config/nios2/elf.h                     |    52 +
 gcc-4.9/gcc/config/nios2/elf.opt                   |    38 +
 gcc-4.9/gcc/config/nios2/linux.h                   |    38 +
 gcc-4.9/gcc/config/nios2/nios2-opts.h              |    69 +
 gcc-4.9/gcc/config/nios2/nios2-protos.h            |    61 +
 gcc-4.9/gcc/config/nios2/nios2.c                   |  3312 ++
 gcc-4.9/gcc/config/nios2/nios2.h                   |   499 +
 gcc-4.9/gcc/config/nios2/nios2.md                  |  1030 +
 gcc-4.9/gcc/config/nios2/nios2.opt                 |   531 +
 gcc-4.9/gcc/config/nios2/predicates.md             |    85 +
 gcc-4.9/gcc/config/nios2/t-nios2                   |    27 +
 gcc-4.9/gcc/config/openbsd-libpthread.h            |    22 +
 gcc-4.9/gcc/config/openbsd-oldgas.h                |    22 +
 gcc-4.9/gcc/config/openbsd-stdint.h                |    34 +
 gcc-4.9/gcc/config/openbsd.h                       |   289 +
 gcc-4.9/gcc/config/openbsd.opt                     |    35 +
 gcc-4.9/gcc/config/pa/constraints.md               |   135 +
 gcc-4.9/gcc/config/pa/elf.h                        |    92 +
 gcc-4.9/gcc/config/pa/pa-64.h                      |   100 +
 gcc-4.9/gcc/config/pa/pa-hpux.h                    |   119 +
 gcc-4.9/gcc/config/pa/pa-hpux.opt                  |    41 +
 gcc-4.9/gcc/config/pa/pa-hpux10.h                  |   143 +
 gcc-4.9/gcc/config/pa/pa-hpux10.opt                |    22 +
 gcc-4.9/gcc/config/pa/pa-hpux1010.h                |    27 +
 gcc-4.9/gcc/config/pa/pa-hpux1010.opt              |    23 +
 gcc-4.9/gcc/config/pa/pa-hpux11.h                  |   189 +
 gcc-4.9/gcc/config/pa/pa-hpux1111.h                |    27 +
 gcc-4.9/gcc/config/pa/pa-hpux1111.opt              |    23 +
 gcc-4.9/gcc/config/pa/pa-hpux1131.h                |    29 +
 gcc-4.9/gcc/config/pa/pa-hpux1131.opt              |    23 +
 gcc-4.9/gcc/config/pa/pa-linux.h                   |   135 +
 gcc-4.9/gcc/config/pa/pa-modes.def                 |    32 +
 gcc-4.9/gcc/config/pa/pa-openbsd.h                 |   154 +
 gcc-4.9/gcc/config/pa/pa-opts.h                    |    35 +
 gcc-4.9/gcc/config/pa/pa-protos.h                  |   121 +
 gcc-4.9/gcc/config/pa/pa.c                         | 10581 +++++
 gcc-4.9/gcc/config/pa/pa.h                         |  1310 +
 gcc-4.9/gcc/config/pa/pa.md                        |  9855 ++++
 gcc-4.9/gcc/config/pa/pa.opt                       |   140 +
 gcc-4.9/gcc/config/pa/pa32-linux.h                 |    64 +
 gcc-4.9/gcc/config/pa/pa32-openbsd.h               |    22 +
 gcc-4.9/gcc/config/pa/pa32-regs.h                  |   359 +
 gcc-4.9/gcc/config/pa/pa64-hpux.h                  |   454 +
 gcc-4.9/gcc/config/pa/pa64-hpux.opt                |    27 +
 gcc-4.9/gcc/config/pa/pa64-linux.h                 |    64 +
 gcc-4.9/gcc/config/pa/pa64-regs.h                  |   280 +
 gcc-4.9/gcc/config/pa/pa64-start.h                 |     8 +
 gcc-4.9/gcc/config/pa/predicates.md                |   657 +
 gcc-4.9/gcc/config/pa/som.h                        |   345 +
 gcc-4.9/gcc/config/pa/t-dce-thr                    |     2 +
 gcc-4.9/gcc/config/pa/t-linux                      |     1 +
 gcc-4.9/gcc/config/pdp11/constraints.md            |    81 +
 gcc-4.9/gcc/config/pdp11/pdp11-modes.def           |    26 +
 gcc-4.9/gcc/config/pdp11/pdp11-protos.h            |    49 +
 gcc-4.9/gcc/config/pdp11/pdp11.c                   |  1903 +
 gcc-4.9/gcc/config/pdp11/pdp11.h                   |   672 +
 gcc-4.9/gcc/config/pdp11/pdp11.md                  |  1436 +
 gcc-4.9/gcc/config/pdp11/pdp11.opt                 |    87 +
 gcc-4.9/gcc/config/pdp11/predicates.md             |    54 +
 gcc-4.9/gcc/config/pdp11/t-pdp11                   |    27 +
 gcc-4.9/gcc/config/picochip/constraints.md         |    64 +
 gcc-4.9/gcc/config/picochip/dfa_space.md           |    43 +
 gcc-4.9/gcc/config/picochip/dfa_speed.md           |   123 +
 gcc-4.9/gcc/config/picochip/picochip-protos.h      |   128 +
 gcc-4.9/gcc/config/picochip/picochip.c             |  4703 ++
 gcc-4.9/gcc/config/picochip/picochip.h             |   661 +
 gcc-4.9/gcc/config/picochip/picochip.md            |  2623 ++
 gcc-4.9/gcc/config/picochip/picochip.opt           |    46 +
 gcc-4.9/gcc/config/picochip/predicates.md          |    72 +
 gcc-4.9/gcc/config/picochip/t-picochip             |    24 +
 gcc-4.9/gcc/config/print-sysroot-suffix.sh         |   145 +
 gcc-4.9/gcc/config/rl78/constraints.md             |   386 +
 gcc-4.9/gcc/config/rl78/predicates.md              |    71 +
 gcc-4.9/gcc/config/rl78/rl78-c.c                   |    34 +
 gcc-4.9/gcc/config/rl78/rl78-expand.md             |   306 +
 gcc-4.9/gcc/config/rl78/rl78-opts.h                |    30 +
 gcc-4.9/gcc/config/rl78/rl78-protos.h              |    47 +
 gcc-4.9/gcc/config/rl78/rl78-real.md               |   561 +
 gcc-4.9/gcc/config/rl78/rl78-virt.md               |   416 +
 gcc-4.9/gcc/config/rl78/rl78.c                     |  3748 ++
 gcc-4.9/gcc/config/rl78/rl78.h                     |   473 +
 gcc-4.9/gcc/config/rl78/rl78.md                    |   443 +
 gcc-4.9/gcc/config/rl78/rl78.opt                   |    55 +
 gcc-4.9/gcc/config/rl78/t-rl78                     |    27 +
 gcc-4.9/gcc/config/rpath.opt                       |    28 +
 gcc-4.9/gcc/config/rs6000/40x.md                   |   120 +
 gcc-4.9/gcc/config/rs6000/440.md                   |   133 +
 gcc-4.9/gcc/config/rs6000/476.h                    |    32 +
 gcc-4.9/gcc/config/rs6000/476.md                   |   141 +
 gcc-4.9/gcc/config/rs6000/476.opt                  |    24 +
 gcc-4.9/gcc/config/rs6000/601.md                   |   136 +
 gcc-4.9/gcc/config/rs6000/603.md                   |   143 +
 gcc-4.9/gcc/config/rs6000/6xx.md                   |   275 +
 gcc-4.9/gcc/config/rs6000/7450.md                  |   185 +
 gcc-4.9/gcc/config/rs6000/750cl.h                  |    30 +
 gcc-4.9/gcc/config/rs6000/7xx.md                   |   184 +
 gcc-4.9/gcc/config/rs6000/8540.md                  |   250 +
 gcc-4.9/gcc/config/rs6000/a2.md                    |   134 +
 gcc-4.9/gcc/config/rs6000/aix-stdint.h             |    51 +
 gcc-4.9/gcc/config/rs6000/aix.h                    |   225 +
 gcc-4.9/gcc/config/rs6000/aix43.h                  |   164 +
 gcc-4.9/gcc/config/rs6000/aix51.h                  |   168 +
 gcc-4.9/gcc/config/rs6000/aix52.h                  |   181 +
 gcc-4.9/gcc/config/rs6000/aix53.h                  |   181 +
 gcc-4.9/gcc/config/rs6000/aix61.h                  |   214 +
 gcc-4.9/gcc/config/rs6000/aix64.opt                |    55 +
 gcc-4.9/gcc/config/rs6000/altivec.h                |   536 +
 gcc-4.9/gcc/config/rs6000/altivec.md               |  3324 ++
 gcc-4.9/gcc/config/rs6000/biarch64.h               |    26 +
 gcc-4.9/gcc/config/rs6000/cell.md                  |   399 +
 gcc-4.9/gcc/config/rs6000/constraints.md           |   242 +
 gcc-4.9/gcc/config/rs6000/crypto.md                |   101 +
 gcc-4.9/gcc/config/rs6000/darwin.h                 |   426 +
 gcc-4.9/gcc/config/rs6000/darwin.md                |   480 +
 gcc-4.9/gcc/config/rs6000/darwin.opt               |    42 +
 gcc-4.9/gcc/config/rs6000/darwin64.h               |    32 +
 gcc-4.9/gcc/config/rs6000/darwin7.h                |    32 +
 gcc-4.9/gcc/config/rs6000/darwin8.h                |    31 +
 gcc-4.9/gcc/config/rs6000/default64.h              |    27 +
 gcc-4.9/gcc/config/rs6000/dfp.md                   |   324 +
 gcc-4.9/gcc/config/rs6000/driver-rs6000.c          |   528 +
 gcc-4.9/gcc/config/rs6000/e300c2c3.md              |   189 +
 gcc-4.9/gcc/config/rs6000/e500.h                   |    54 +
 gcc-4.9/gcc/config/rs6000/e500mc.md                |   200 +
 gcc-4.9/gcc/config/rs6000/e500mc64.md              |   191 +
 gcc-4.9/gcc/config/rs6000/e5500.md                 |   176 +
 gcc-4.9/gcc/config/rs6000/e6500.md                 |   213 +
 gcc-4.9/gcc/config/rs6000/eabi.h                   |    41 +
 gcc-4.9/gcc/config/rs6000/eabialtivec.h            |    27 +
 gcc-4.9/gcc/config/rs6000/eabisim.h                |    51 +
 gcc-4.9/gcc/config/rs6000/eabispe.h                |    26 +
 gcc-4.9/gcc/config/rs6000/freebsd.h                |    78 +
 gcc-4.9/gcc/config/rs6000/freebsd64.h              |   435 +
 gcc-4.9/gcc/config/rs6000/genopt.sh                |    64 +
 gcc-4.9/gcc/config/rs6000/host-darwin.c            |   153 +
 gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c      |    30 +
 gcc-4.9/gcc/config/rs6000/htm.md                   |   366 +
 gcc-4.9/gcc/config/rs6000/htmintrin.h              |   131 +
 gcc-4.9/gcc/config/rs6000/htmxlintrin.h            |   208 +
 gcc-4.9/gcc/config/rs6000/linux.h                  |   153 +
 gcc-4.9/gcc/config/rs6000/linux64.h                |   583 +
 gcc-4.9/gcc/config/rs6000/linux64.opt              |    42 +
 gcc-4.9/gcc/config/rs6000/linuxaltivec.h           |    32 +
 gcc-4.9/gcc/config/rs6000/linuxspe.h               |    32 +
 gcc-4.9/gcc/config/rs6000/lynx.h                   |   119 +
 gcc-4.9/gcc/config/rs6000/milli.exp                |     7 +
 gcc-4.9/gcc/config/rs6000/mpc.md                   |   111 +
 gcc-4.9/gcc/config/rs6000/netbsd.h                 |    90 +
 gcc-4.9/gcc/config/rs6000/option-defaults.h        |    64 +
 gcc-4.9/gcc/config/rs6000/paired.h                 |    75 +
 gcc-4.9/gcc/config/rs6000/paired.md                |   488 +
 gcc-4.9/gcc/config/rs6000/power4.md                |   408 +
 gcc-4.9/gcc/config/rs6000/power5.md                |   308 +
 gcc-4.9/gcc/config/rs6000/power6.md                |   578 +
 gcc-4.9/gcc/config/rs6000/power7.md                |   333 +
 gcc-4.9/gcc/config/rs6000/power8.md                |   373 +
 gcc-4.9/gcc/config/rs6000/ppc-asm.h                |   381 +
 gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h         |   727 +
 gcc-4.9/gcc/config/rs6000/predicates.md            |  1828 +
 gcc-4.9/gcc/config/rs6000/rs6000-builtin.def       |  1845 +
 gcc-4.9/gcc/config/rs6000/rs6000-c.c               |  4557 ++
 gcc-4.9/gcc/config/rs6000/rs6000-cpus.def          |   191 +
 gcc-4.9/gcc/config/rs6000/rs6000-linux.c           |    38 +
 gcc-4.9/gcc/config/rs6000/rs6000-modes.def         |    49 +
 gcc-4.9/gcc/config/rs6000/rs6000-opts.h            |   160 +
 gcc-4.9/gcc/config/rs6000/rs6000-protos.h          |   224 +
 gcc-4.9/gcc/config/rs6000/rs6000-tables.opt        |   190 +
 gcc-4.9/gcc/config/rs6000/rs6000.c                 | 32834 +++++++++++++
 gcc-4.9/gcc/config/rs6000/rs6000.h                 |  2676 ++
 gcc-4.9/gcc/config/rs6000/rs6000.md                | 15700 ++++++
 gcc-4.9/gcc/config/rs6000/rs6000.opt               |   590 +
 gcc-4.9/gcc/config/rs6000/rs64.md                  |   154 +
 gcc-4.9/gcc/config/rs6000/rtems.h                  |    59 +
 gcc-4.9/gcc/config/rs6000/secureplt.h              |    20 +
 gcc-4.9/gcc/config/rs6000/si2vmx.h                 |  2048 +
 gcc-4.9/gcc/config/rs6000/singlefp.h               |    40 +
 gcc-4.9/gcc/config/rs6000/spe.h                    |  1107 +
 gcc-4.9/gcc/config/rs6000/spe.md                   |  3223 ++
 gcc-4.9/gcc/config/rs6000/spu2vmx.h                |  2415 +
 gcc-4.9/gcc/config/rs6000/sync.md                  |   411 +
 gcc-4.9/gcc/config/rs6000/sysv4.h                  |   951 +
 gcc-4.9/gcc/config/rs6000/sysv4.opt                |   157 +
 gcc-4.9/gcc/config/rs6000/sysv4le.h                |    37 +
 gcc-4.9/gcc/config/rs6000/t-aix43                  |    39 +
 gcc-4.9/gcc/config/rs6000/t-aix52                  |    26 +
 gcc-4.9/gcc/config/rs6000/t-darwin64               |     2 +
 gcc-4.9/gcc/config/rs6000/t-darwin8                |     3 +
 gcc-4.9/gcc/config/rs6000/t-fprules                |    26 +
 gcc-4.9/gcc/config/rs6000/t-freebsd64              |    31 +
 gcc-4.9/gcc/config/rs6000/t-linux                  |    13 +
 gcc-4.9/gcc/config/rs6000/t-linux64                |    36 +
 gcc-4.9/gcc/config/rs6000/t-linux64bele            |     7 +
 gcc-4.9/gcc/config/rs6000/t-linux64le              |     3 +
 gcc-4.9/gcc/config/rs6000/t-linux64lebe            |     7 +
 gcc-4.9/gcc/config/rs6000/t-lynx                   |    29 +
 gcc-4.9/gcc/config/rs6000/t-netbsd                 |    36 +
 gcc-4.9/gcc/config/rs6000/t-ppccomm                |    23 +
 gcc-4.9/gcc/config/rs6000/t-ppcendian              |    30 +
 gcc-4.9/gcc/config/rs6000/t-ppcgas                 |    32 +
 gcc-4.9/gcc/config/rs6000/t-ppcos                  |     8 +
 gcc-4.9/gcc/config/rs6000/t-rs6000                 |    66 +
 gcc-4.9/gcc/config/rs6000/t-rtems                  |    88 +
 gcc-4.9/gcc/config/rs6000/t-spe                    |    73 +
 gcc-4.9/gcc/config/rs6000/t-vxworks                |    25 +
 gcc-4.9/gcc/config/rs6000/t-vxworksae              |     5 +
 gcc-4.9/gcc/config/rs6000/t-xilinx                 |    28 +
 gcc-4.9/gcc/config/rs6000/titan.md                 |   169 +
 gcc-4.9/gcc/config/rs6000/vec_types.h              |    52 +
 gcc-4.9/gcc/config/rs6000/vector.md                |  1217 +
 gcc-4.9/gcc/config/rs6000/vsx.md                   |  2015 +
 gcc-4.9/gcc/config/rs6000/vxworks.h                |   133 +
 gcc-4.9/gcc/config/rs6000/x-aix                    |     6 +
 gcc-4.9/gcc/config/rs6000/x-darwin                 |     3 +
 gcc-4.9/gcc/config/rs6000/x-darwin64               |     3 +
 gcc-4.9/gcc/config/rs6000/x-linux-relax            |     2 +
 gcc-4.9/gcc/config/rs6000/x-rs6000                 |     3 +
 gcc-4.9/gcc/config/rs6000/xcoff.h                  |   362 +
 gcc-4.9/gcc/config/rs6000/xfpu.h                   |    26 +
 gcc-4.9/gcc/config/rs6000/xfpu.md                  |   140 +
 gcc-4.9/gcc/config/rs6000/xilinx.h                 |    47 +
 gcc-4.9/gcc/config/rs6000/xilinx.opt               |    32 +
 gcc-4.9/gcc/config/rtems.h                         |    45 +
 gcc-4.9/gcc/config/rtems.opt                       |    35 +
 gcc-4.9/gcc/config/rx/constraints.md               |   108 +
 gcc-4.9/gcc/config/rx/predicates.md                |   307 +
 gcc-4.9/gcc/config/rx/rx-modes.def                 |    25 +
 gcc-4.9/gcc/config/rx/rx-opts.h                    |    31 +
 gcc-4.9/gcc/config/rx/rx-protos.h                  |    46 +
 gcc-4.9/gcc/config/rx/rx.c                         |  3443 ++
 gcc-4.9/gcc/config/rx/rx.h                         |   665 +
 gcc-4.9/gcc/config/rx/rx.md                        |  2641 ++
 gcc-4.9/gcc/config/rx/rx.opt                       |   141 +
 gcc-4.9/gcc/config/rx/t-rx                         |    34 +
 gcc-4.9/gcc/config/s390/2064.md                    |   135 +
 gcc-4.9/gcc/config/s390/2084.md                    |   309 +
 gcc-4.9/gcc/config/s390/2097.md                    |   764 +
 gcc-4.9/gcc/config/s390/2817.md                    |   314 +
 gcc-4.9/gcc/config/s390/2827.md                    |   624 +
 gcc-4.9/gcc/config/s390/constraints.md             |   501 +
 gcc-4.9/gcc/config/s390/htmintrin.h                |    57 +
 gcc-4.9/gcc/config/s390/htmxlintrin.h              |   189 +
 gcc-4.9/gcc/config/s390/linux.h                    |    93 +
 gcc-4.9/gcc/config/s390/predicates.md              |   470 +
 gcc-4.9/gcc/config/s390/s390-modes.def             |   183 +
 gcc-4.9/gcc/config/s390/s390-opts.h                |    41 +
 gcc-4.9/gcc/config/s390/s390-protos.h              |   117 +
 gcc-4.9/gcc/config/s390/s390.c                     | 12237 +++++
 gcc-4.9/gcc/config/s390/s390.h                     |   931 +
 gcc-4.9/gcc/config/s390/s390.md                    | 10220 ++++
 gcc-4.9/gcc/config/s390/s390.opt                   |   167 +
 gcc-4.9/gcc/config/s390/s390intrin.h               |    33 +
 gcc-4.9/gcc/config/s390/s390x.h                    |    27 +
 gcc-4.9/gcc/config/s390/t-linux64                  |    11 +
 gcc-4.9/gcc/config/s390/tpf.h                      |   118 +
 gcc-4.9/gcc/config/s390/tpf.md                     |    33 +
 gcc-4.9/gcc/config/s390/tpf.opt                    |    27 +
 gcc-4.9/gcc/config/score/constraints.md            |    93 +
 gcc-4.9/gcc/config/score/elf.h                     |    97 +
 gcc-4.9/gcc/config/score/predicates.md             |   152 +
 gcc-4.9/gcc/config/score/score-conv.h              |    78 +
 gcc-4.9/gcc/config/score/score-generic.md          |    44 +
 gcc-4.9/gcc/config/score/score-modes.def           |    24 +
 gcc-4.9/gcc/config/score/score-protos.h            |    83 +
 gcc-4.9/gcc/config/score/score.c                   |  1939 +
 gcc-4.9/gcc/config/score/score.h                   |   867 +
 gcc-4.9/gcc/config/score/score.md                  |  1879 +
 gcc-4.9/gcc/config/score/score.opt                 |    57 +
 gcc-4.9/gcc/config/sh/constraints.md               |   324 +
 gcc-4.9/gcc/config/sh/divcost-analysis             |    88 +
 gcc-4.9/gcc/config/sh/divtab-sh4-300.c             |    77 +
 gcc-4.9/gcc/config/sh/divtab-sh4.c                 |    85 +
 gcc-4.9/gcc/config/sh/divtab.c                     |   203 +
 gcc-4.9/gcc/config/sh/elf.h                        |    88 +
 gcc-4.9/gcc/config/sh/embed-elf.h                  |    36 +
 gcc-4.9/gcc/config/sh/iterators.md                 |    46 +
 gcc-4.9/gcc/config/sh/linux.h                      |   152 +
 gcc-4.9/gcc/config/sh/little.h                     |    21 +
 gcc-4.9/gcc/config/sh/netbsd-elf.h                 |   106 +
 gcc-4.9/gcc/config/sh/newlib.h                     |    29 +
 gcc-4.9/gcc/config/sh/predicates.md                |  1201 +
 gcc-4.9/gcc/config/sh/rtems.h                      |    26 +
 gcc-4.9/gcc/config/sh/rtemself.h                   |    26 +
 gcc-4.9/gcc/config/sh/sh-c.c                       |   148 +
 gcc-4.9/gcc/config/sh/sh-mem.cc                    |   610 +
 gcc-4.9/gcc/config/sh/sh-modes.def                 |    34 +
 gcc-4.9/gcc/config/sh/sh-protos.h                  |   235 +
 gcc-4.9/gcc/config/sh/sh.c                         | 13504 ++++++
 gcc-4.9/gcc/config/sh/sh.h                         |  2311 +
 gcc-4.9/gcc/config/sh/sh.md                        | 15960 +++++++
 gcc-4.9/gcc/config/sh/sh.opt                       |   362 +
 gcc-4.9/gcc/config/sh/sh1.md                       |    85 +
 gcc-4.9/gcc/config/sh/sh4-300.md                   |   281 +
 gcc-4.9/gcc/config/sh/sh4.md                       |   454 +
 gcc-4.9/gcc/config/sh/sh4a.md                      |   234 +
 gcc-4.9/gcc/config/sh/sh64.h                       |    22 +
 gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc     |   473 +
 gcc-4.9/gcc/config/sh/sh_treg_combine.cc           |  1509 +
 gcc-4.9/gcc/config/sh/shmedia.h                    |    30 +
 gcc-4.9/gcc/config/sh/shmedia.md                   |    94 +
 gcc-4.9/gcc/config/sh/sshmedia.h                   |    78 +
 gcc-4.9/gcc/config/sh/superh.h                     |   104 +
 gcc-4.9/gcc/config/sh/superh.opt                   |    10 +
 gcc-4.9/gcc/config/sh/sync.md                      |  1388 +
 gcc-4.9/gcc/config/sh/t-linux                      |     2 +
 gcc-4.9/gcc/config/sh/t-netbsd-sh5-64              |     1 +
 gcc-4.9/gcc/config/sh/t-rtems                      |     7 +
 gcc-4.9/gcc/config/sh/t-sh                         |   101 +
 gcc-4.9/gcc/config/sh/t-sh64                       |    22 +
 gcc-4.9/gcc/config/sh/t-vxworks                    |     6 +
 gcc-4.9/gcc/config/sh/ushmedia.h                   |  1091 +
 gcc-4.9/gcc/config/sh/vxworks.h                    |    66 +
 gcc-4.9/gcc/config/sol2-10.h                       |    24 +
 gcc-4.9/gcc/config/sol2-bi.h                       |   135 +
 gcc-4.9/gcc/config/sol2-c.c                        |   274 +
 gcc-4.9/gcc/config/sol2-cxx.c                      |    65 +
 gcc-4.9/gcc/config/sol2-protos.h                   |    33 +
 gcc-4.9/gcc/config/sol2-stubs.c                    |    33 +
 gcc-4.9/gcc/config/sol2.c                          |   300 +
 gcc-4.9/gcc/config/sol2.h                          |   300 +
 gcc-4.9/gcc/config/sol2.opt                        |    41 +
 gcc-4.9/gcc/config/sparc/biarch64.h                |    23 +
 gcc-4.9/gcc/config/sparc/constraints.md            |   201 +
 gcc-4.9/gcc/config/sparc/cypress.md                |    50 +
 gcc-4.9/gcc/config/sparc/default-64.h              |    22 +
 gcc-4.9/gcc/config/sparc/driver-sparc.c            |   172 +
 gcc-4.9/gcc/config/sparc/freebsd.h                 |   149 +
 gcc-4.9/gcc/config/sparc/hypersparc.md             |    82 +
 gcc-4.9/gcc/config/sparc/leon.md                   |    64 +
 gcc-4.9/gcc/config/sparc/linux.h                   |   169 +
 gcc-4.9/gcc/config/sparc/linux64.h                 |   284 +
 gcc-4.9/gcc/config/sparc/long-double-switch.opt    |    27 +
 gcc-4.9/gcc/config/sparc/netbsd-elf.h              |   226 +
 gcc-4.9/gcc/config/sparc/niagara.md                |   118 +
 gcc-4.9/gcc/config/sparc/niagara2.md               |   120 +
 gcc-4.9/gcc/config/sparc/niagara4.md               |   114 +
 gcc-4.9/gcc/config/sparc/openbsd1-64.h             |    23 +
 gcc-4.9/gcc/config/sparc/openbsd64.h               |    78 +
 gcc-4.9/gcc/config/sparc/predicates.md             |   535 +
 gcc-4.9/gcc/config/sparc/rtemself.h                |    33 +
 gcc-4.9/gcc/config/sparc/sol2.h                    |   411 +
 gcc-4.9/gcc/config/sparc/sp-elf.h                  |    66 +
 gcc-4.9/gcc/config/sparc/sp64-elf.h                |    76 +
 gcc-4.9/gcc/config/sparc/sparc-c.c                 |    62 +
 gcc-4.9/gcc/config/sparc/sparc-modes.def           |    50 +
 gcc-4.9/gcc/config/sparc/sparc-opts.h              |    60 +
 gcc-4.9/gcc/config/sparc/sparc-protos.h            |   116 +
 gcc-4.9/gcc/config/sparc/sparc.c                   | 12704 +++++
 gcc-4.9/gcc/config/sparc/sparc.h                   |  1782 +
 gcc-4.9/gcc/config/sparc/sparc.md                  |  9024 ++++
 gcc-4.9/gcc/config/sparc/sparc.opt                 |   256 +
 gcc-4.9/gcc/config/sparc/sparclet.md               |    43 +
 gcc-4.9/gcc/config/sparc/supersparc.md             |    92 +
 gcc-4.9/gcc/config/sparc/sync.md                   |   286 +
 gcc-4.9/gcc/config/sparc/sysv4.h                   |   119 +
 gcc-4.9/gcc/config/sparc/t-elf                     |    21 +
 gcc-4.9/gcc/config/sparc/t-leon                    |    24 +
 gcc-4.9/gcc/config/sparc/t-leon3                   |    22 +
 gcc-4.9/gcc/config/sparc/t-linux                   |     1 +
 gcc-4.9/gcc/config/sparc/t-linux64                 |    29 +
 gcc-4.9/gcc/config/sparc/t-netbsd64                |     5 +
 gcc-4.9/gcc/config/sparc/t-rtems                   |    22 +
 gcc-4.9/gcc/config/sparc/t-rtems-64                |    22 +
 gcc-4.9/gcc/config/sparc/t-sol2-64                 |     4 +
 gcc-4.9/gcc/config/sparc/t-sparc                   |    23 +
 gcc-4.9/gcc/config/sparc/t-vxworks                 |     5 +
 gcc-4.9/gcc/config/sparc/tso.h                     |    23 +
 gcc-4.9/gcc/config/sparc/ultra1_2.md               |   301 +
 gcc-4.9/gcc/config/sparc/ultra3.md                 |   194 +
 gcc-4.9/gcc/config/sparc/visintrin.h               |   709 +
 gcc-4.9/gcc/config/sparc/vxworks.h                 |    57 +
 gcc-4.9/gcc/config/sparc/x-sparc                   |     4 +
 gcc-4.9/gcc/config/spu/constraints.md              |   179 +
 gcc-4.9/gcc/config/spu/predicates.md               |   122 +
 gcc-4.9/gcc/config/spu/spu-builtins.def            |   781 +
 gcc-4.9/gcc/config/spu/spu-builtins.md             |   864 +
 gcc-4.9/gcc/config/spu/spu-c.c                     |   235 +
 gcc-4.9/gcc/config/spu/spu-elf.h                   |    75 +
 gcc-4.9/gcc/config/spu/spu-modes.def               |    29 +
 gcc-4.9/gcc/config/spu/spu-protos.h                |    93 +
 gcc-4.9/gcc/config/spu/spu.c                       |  7349 +++
 gcc-4.9/gcc/config/spu/spu.h                       |   551 +
 gcc-4.9/gcc/config/spu/spu.md                      |  5093 ++
 gcc-4.9/gcc/config/spu/spu.opt                     |   105 +
 gcc-4.9/gcc/config/spu/spu_cache.h                 |    39 +
 gcc-4.9/gcc/config/spu/spu_internals.h             |   421 +
 gcc-4.9/gcc/config/spu/spu_intrinsics.h            |    83 +
 gcc-4.9/gcc/config/spu/spu_mfcio.h                 |   342 +
 gcc-4.9/gcc/config/spu/t-spu-elf                   |    34 +
 gcc-4.9/gcc/config/spu/vec_types.h                 |    36 +
 gcc-4.9/gcc/config/spu/vmx2spu.h                   |  3985 ++
 gcc-4.9/gcc/config/stormy16/constraints.md         |   119 +
 gcc-4.9/gcc/config/stormy16/predicates.md          |   178 +
 gcc-4.9/gcc/config/stormy16/stormy-abi             |   174 +
 gcc-4.9/gcc/config/stormy16/stormy16-protos.h      |    69 +
 gcc-4.9/gcc/config/stormy16/stormy16.c             |  2703 ++
 gcc-4.9/gcc/config/stormy16/stormy16.h             |   504 +
 gcc-4.9/gcc/config/stormy16/stormy16.md            |  1250 +
 gcc-4.9/gcc/config/stormy16/stormy16.opt           |    24 +
 gcc-4.9/gcc/config/t-darwin                        |    36 +
 gcc-4.9/gcc/config/t-glibc                         |    21 +
 gcc-4.9/gcc/config/t-libunwind                     |    21 +
 gcc-4.9/gcc/config/t-linux                         |    21 +
 gcc-4.9/gcc/config/t-lynx                          |    24 +
 gcc-4.9/gcc/config/t-openbsd                       |     2 +
 gcc-4.9/gcc/config/t-pnt16-warn                    |    27 +
 gcc-4.9/gcc/config/t-rtems                         |     2 +
 gcc-4.9/gcc/config/t-slibgcc                       |     2 +
 gcc-4.9/gcc/config/t-sol2                          |    37 +
 gcc-4.9/gcc/config/t-sysroot-suffix                |     7 +
 gcc-4.9/gcc/config/t-vxworks                       |    24 +
 gcc-4.9/gcc/config/t-winnt                         |    22 +
 gcc-4.9/gcc/config/tilegx/constraints.md           |   122 +
 gcc-4.9/gcc/config/tilegx/feedback.h               |    14 +
 gcc-4.9/gcc/config/tilegx/linux.h                  |    72 +
 gcc-4.9/gcc/config/tilegx/mul-tables.c             | 27243 +++++++++++
 gcc-4.9/gcc/config/tilegx/predicates.md            |   298 +
 gcc-4.9/gcc/config/tilegx/sync.md                  |   227 +
 gcc-4.9/gcc/config/tilegx/t-tilegx                 |    21 +
 gcc-4.9/gcc/config/tilegx/tilegx-builtins.h        |   325 +
 gcc-4.9/gcc/config/tilegx/tilegx-c.c               |    57 +
 gcc-4.9/gcc/config/tilegx/tilegx-generic.md        |   115 +
 gcc-4.9/gcc/config/tilegx/tilegx-modes.def         |    37 +
 gcc-4.9/gcc/config/tilegx/tilegx-multiply.h        |    78 +
 gcc-4.9/gcc/config/tilegx/tilegx-opts.h            |    33 +
 gcc-4.9/gcc/config/tilegx/tilegx-protos.h          |    75 +
 gcc-4.9/gcc/config/tilegx/tilegx.c                 |  5707 +++
 gcc-4.9/gcc/config/tilegx/tilegx.h                 |   556 +
 gcc-4.9/gcc/config/tilegx/tilegx.md                |  5630 +++
 gcc-4.9/gcc/config/tilegx/tilegx.opt               |    63 +
 gcc-4.9/gcc/config/tilepro/constraints.md          |   101 +
 gcc-4.9/gcc/config/tilepro/feedback.h              |    14 +
 gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc       |  1361 +
 gcc-4.9/gcc/config/tilepro/linux.h                 |    64 +
 gcc-4.9/gcc/config/tilepro/mul-tables.c            | 17831 +++++++
 gcc-4.9/gcc/config/tilepro/predicates.md           |   260 +
 gcc-4.9/gcc/config/tilepro/t-tilepro               |    15 +
 gcc-4.9/gcc/config/tilepro/tilepro-builtins.h      |   216 +
 gcc-4.9/gcc/config/tilepro/tilepro-c.c             |    56 +
 gcc-4.9/gcc/config/tilepro/tilepro-generic.md      |   107 +
 gcc-4.9/gcc/config/tilepro/tilepro-modes.def       |    34 +
 gcc-4.9/gcc/config/tilepro/tilepro-multiply.h      |    82 +
 gcc-4.9/gcc/config/tilepro/tilepro-protos.h        |    76 +
 gcc-4.9/gcc/config/tilepro/tilepro.c               |  5097 ++
 gcc-4.9/gcc/config/tilepro/tilepro.h               |   481 +
 gcc-4.9/gcc/config/tilepro/tilepro.md              |  3810 ++
 gcc-4.9/gcc/config/tilepro/tilepro.opt             |    36 +
 gcc-4.9/gcc/config/tm-dwarf2.h                     |     4 +
 gcc-4.9/gcc/config/usegas.h                        |    20 +
 gcc-4.9/gcc/config/usegld.h                        |     1 +
 gcc-4.9/gcc/config/v850/constraints.md             |   108 +
 gcc-4.9/gcc/config/v850/predicates.md              |   584 +
 gcc-4.9/gcc/config/v850/rtems.h                    |    36 +
 gcc-4.9/gcc/config/v850/t-rtems                    |     7 +
 gcc-4.9/gcc/config/v850/t-v850                     |    35 +
 gcc-4.9/gcc/config/v850/v850-c.c                   |   274 +
 gcc-4.9/gcc/config/v850/v850-modes.def             |    27 +
 gcc-4.9/gcc/config/v850/v850-opts.h                |    33 +
 gcc-4.9/gcc/config/v850/v850-protos.h              |    69 +
 gcc-4.9/gcc/config/v850/v850.c                     |  3281 ++
 gcc-4.9/gcc/config/v850/v850.h                     |   979 +
 gcc-4.9/gcc/config/v850/v850.md                    |  3115 ++
 gcc-4.9/gcc/config/v850/v850.opt                   |   159 +
 gcc-4.9/gcc/config/vax/builtins.md                 |   192 +
 gcc-4.9/gcc/config/vax/constraints.md              |   119 +
 gcc-4.9/gcc/config/vax/elf.h                       |   112 +
 gcc-4.9/gcc/config/vax/elf.opt                     |    29 +
 gcc-4.9/gcc/config/vax/linux.h                     |    51 +
 gcc-4.9/gcc/config/vax/netbsd-elf.h                |    68 +
 gcc-4.9/gcc/config/vax/openbsd.h                   |    50 +
 gcc-4.9/gcc/config/vax/openbsd1.h                  |    22 +
 gcc-4.9/gcc/config/vax/predicates.md               |   111 +
 gcc-4.9/gcc/config/vax/vax-modes.def               |    22 +
 gcc-4.9/gcc/config/vax/vax-protos.h                |    39 +
 gcc-4.9/gcc/config/vax/vax.c                       |  2177 +
 gcc-4.9/gcc/config/vax/vax.h                       |   708 +
 gcc-4.9/gcc/config/vax/vax.md                      |  1662 +
 gcc-4.9/gcc/config/vax/vax.opt                     |    51 +
 gcc-4.9/gcc/config/vms/make-crtlmap.awk            |    55 +
 gcc-4.9/gcc/config/vms/t-vms                       |    40 +
 gcc-4.9/gcc/config/vms/t-vmsnative                 |    34 +
 gcc-4.9/gcc/config/vms/vms-ar.c                    |   348 +
 gcc-4.9/gcc/config/vms/vms-c.c                     |   488 +
 gcc-4.9/gcc/config/vms/vms-crtlmap.map             |   930 +
 gcc-4.9/gcc/config/vms/vms-f.c                     |    31 +
 gcc-4.9/gcc/config/vms/vms-ld.c                    |   968 +
 gcc-4.9/gcc/config/vms/vms-opts.h                  |    30 +
 gcc-4.9/gcc/config/vms/vms-protos.h                |    35 +
 gcc-4.9/gcc/config/vms/vms-stdint.h                |    50 +
 gcc-4.9/gcc/config/vms/vms.c                       |   330 +
 gcc-4.9/gcc/config/vms/vms.h                       |    92 +
 gcc-4.9/gcc/config/vms/vms.opt                     |    63 +
 gcc-4.9/gcc/config/vms/x-vms                       |    26 +
 gcc-4.9/gcc/config/vms/xm-vms.h                    |    63 +
 gcc-4.9/gcc/config/vx-common.h                     |    94 +
 gcc-4.9/gcc/config/vxworks-dummy.h                 |    40 +
 gcc-4.9/gcc/config/vxworks.c                       |   154 +
 gcc-4.9/gcc/config/vxworks.h                       |   140 +
 gcc-4.9/gcc/config/vxworks.opt                     |    46 +
 gcc-4.9/gcc/config/vxworksae.h                     |    70 +
 gcc-4.9/gcc/config/winnt-c.c                       |    38 +
 gcc-4.9/gcc/config/x-cflags-O1                     |     5 +
 gcc-4.9/gcc/config/x-darwin                        |     3 +
 gcc-4.9/gcc/config/x-hpux                          |     4 +
 gcc-4.9/gcc/config/x-linux                         |     3 +
 gcc-4.9/gcc/config/x-openbsd                       |     4 +
 gcc-4.9/gcc/config/x-solaris                       |     3 +
 gcc-4.9/gcc/config/xtensa/constraints.md           |   139 +
 gcc-4.9/gcc/config/xtensa/elf.h                    |   101 +
 gcc-4.9/gcc/config/xtensa/elf.opt                  |    29 +
 gcc-4.9/gcc/config/xtensa/linux.h                  |    65 +
 gcc-4.9/gcc/config/xtensa/predicates.md            |   175 +
 gcc-4.9/gcc/config/xtensa/t-xtensa                 |    19 +
 gcc-4.9/gcc/config/xtensa/xtensa-protos.h          |    73 +
 gcc-4.9/gcc/config/xtensa/xtensa.c                 |  3712 ++
 gcc-4.9/gcc/config/xtensa/xtensa.h                 |   819 +
 gcc-4.9/gcc/config/xtensa/xtensa.md                |  1913 +
 gcc-4.9/gcc/config/xtensa/xtensa.opt               |    43 +
 1354 files changed, 1042837 insertions(+)
 create mode 100644 gcc-4.9/gcc/config/README
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-arches.def
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-cores.def
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-elf.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-linux.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-modes.def
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-opts.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-protos.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-simd.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-tune.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.c
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.opt
 create mode 100644 gcc-4.9/gcc/config/aarch64/arm_neon.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/atomics.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/biarchilp32.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/biarchlp64.h
 create mode 100644 gcc-4.9/gcc/config/aarch64/constraints.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/gentune.sh
 create mode 100644 gcc-4.9/gcc/config/aarch64/iterators.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/predicates.md
 create mode 100644 gcc-4.9/gcc/config/aarch64/t-aarch64
 create mode 100644 gcc-4.9/gcc/config/aarch64/t-aarch64-linux
 create mode 100644 gcc-4.9/gcc/config/alpha/alpha-modes.def
 create mode 100644 gcc-4.9/gcc/config/alpha/alpha-protos.h
 create mode 100644 gcc-4.9/gcc/config/alpha/alpha.c
 create mode 100644 gcc-4.9/gcc/config/alpha/alpha.h
 create mode 100644 gcc-4.9/gcc/config/alpha/alpha.md
 create mode 100644 gcc-4.9/gcc/config/alpha/alpha.opt
 create mode 100644 gcc-4.9/gcc/config/alpha/constraints.md
 create mode 100644 gcc-4.9/gcc/config/alpha/driver-alpha.c
 create mode 100644 gcc-4.9/gcc/config/alpha/elf.h
 create mode 100644 gcc-4.9/gcc/config/alpha/elf.opt
 create mode 100644 gcc-4.9/gcc/config/alpha/ev4.md
 create mode 100644 gcc-4.9/gcc/config/alpha/ev5.md
 create mode 100644 gcc-4.9/gcc/config/alpha/ev6.md
 create mode 100644 gcc-4.9/gcc/config/alpha/freebsd.h
 create mode 100644 gcc-4.9/gcc/config/alpha/linux-elf.h
 create mode 100644 gcc-4.9/gcc/config/alpha/linux.h
 create mode 100644 gcc-4.9/gcc/config/alpha/netbsd.h
 create mode 100644 gcc-4.9/gcc/config/alpha/openbsd.h
 create mode 100644 gcc-4.9/gcc/config/alpha/predicates.md
 create mode 100644 gcc-4.9/gcc/config/alpha/sync.md
 create mode 100644 gcc-4.9/gcc/config/alpha/t-linux
 create mode 100644 gcc-4.9/gcc/config/alpha/t-vms
 create mode 100644 gcc-4.9/gcc/config/alpha/vms.h
 create mode 100644 gcc-4.9/gcc/config/alpha/x-alpha
 create mode 100644 gcc-4.9/gcc/config/arc/arc-modes.def
 create mode 100644 gcc-4.9/gcc/config/arc/arc-opts.h
 create mode 100644 gcc-4.9/gcc/config/arc/arc-protos.h
 create mode 100644 gcc-4.9/gcc/config/arc/arc-simd.h
 create mode 100644 gcc-4.9/gcc/config/arc/arc.c
 create mode 100644 gcc-4.9/gcc/config/arc/arc.h
 create mode 100644 gcc-4.9/gcc/config/arc/arc.md
 create mode 100644 gcc-4.9/gcc/config/arc/arc.opt
 create mode 100644 gcc-4.9/gcc/config/arc/arc600.md
 create mode 100644 gcc-4.9/gcc/config/arc/arc700.md
 create mode 100644 gcc-4.9/gcc/config/arc/constraints.md
 create mode 100644 gcc-4.9/gcc/config/arc/fpx.md
 create mode 100644 gcc-4.9/gcc/config/arc/predicates.md
 create mode 100644 gcc-4.9/gcc/config/arc/simdext.md
 create mode 100644 gcc-4.9/gcc/config/arc/t-arc-newlib
 create mode 100644 gcc-4.9/gcc/config/arc/t-arc-uClibc
 create mode 100644 gcc-4.9/gcc/config/arm/README-interworking
 create mode 100644 gcc-4.9/gcc/config/arm/aarch-common-protos.h
 create mode 100644 gcc-4.9/gcc/config/arm/aarch-common.c
 create mode 100644 gcc-4.9/gcc/config/arm/aarch-cost-tables.h
 create mode 100644 gcc-4.9/gcc/config/arm/aout.h
 create mode 100644 gcc-4.9/gcc/config/arm/arm-arches.def
 create mode 100644 gcc-4.9/gcc/config/arm/arm-c.c
 create mode 100644 gcc-4.9/gcc/config/arm/arm-cores.def
 create mode 100644 gcc-4.9/gcc/config/arm/arm-fixed.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm-fpus.def
 create mode 100644 gcc-4.9/gcc/config/arm/arm-generic.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm-ldmstm.ml
 create mode 100644 gcc-4.9/gcc/config/arm/arm-modes.def
 create mode 100644 gcc-4.9/gcc/config/arm/arm-opts.h
 create mode 100644 gcc-4.9/gcc/config/arm/arm-protos.h
 create mode 100644 gcc-4.9/gcc/config/arm/arm-tables.opt
 create mode 100644 gcc-4.9/gcc/config/arm/arm-tune.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm.c
 create mode 100644 gcc-4.9/gcc/config/arm/arm.h
 create mode 100644 gcc-4.9/gcc/config/arm/arm.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm.opt
 create mode 100644 gcc-4.9/gcc/config/arm/arm1020e.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm1026ejs.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm1136jfs.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm926ejs.md
 create mode 100644 gcc-4.9/gcc/config/arm/arm_acle.h
 create mode 100644 gcc-4.9/gcc/config/arm/arm_neon.h
 create mode 100644 gcc-4.9/gcc/config/arm/arm_neon_builtins.def
 create mode 100644 gcc-4.9/gcc/config/arm/bpabi.h
 create mode 100644 gcc-4.9/gcc/config/arm/coff.h
 create mode 100644 gcc-4.9/gcc/config/arm/constraints.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a15-neon.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a15.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a5.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a53.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a7.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a8-neon.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a8.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a9-neon.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-a9.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-m4-fpu.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-m4.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-r4.md
 create mode 100644 gcc-4.9/gcc/config/arm/cortex-r4f.md
 create mode 100644 gcc-4.9/gcc/config/arm/crypto.def
 create mode 100644 gcc-4.9/gcc/config/arm/crypto.md
 create mode 100644 gcc-4.9/gcc/config/arm/driver-arm.c
 create mode 100644 gcc-4.9/gcc/config/arm/elf.h
 create mode 100644 gcc-4.9/gcc/config/arm/fa526.md
 create mode 100644 gcc-4.9/gcc/config/arm/fa606te.md
 create mode 100644 gcc-4.9/gcc/config/arm/fa626te.md
 create mode 100644 gcc-4.9/gcc/config/arm/fa726te.md
 create mode 100644 gcc-4.9/gcc/config/arm/fmp626.md
 create mode 100755 gcc-4.9/gcc/config/arm/genopt.sh
 create mode 100755 gcc-4.9/gcc/config/arm/gentune.sh
 create mode 100644 gcc-4.9/gcc/config/arm/iterators.md
 create mode 100644 gcc-4.9/gcc/config/arm/iwmmxt.md
 create mode 100644 gcc-4.9/gcc/config/arm/iwmmxt2.md
 create mode 100644 gcc-4.9/gcc/config/arm/ldmstm.md
 create mode 100644 gcc-4.9/gcc/config/arm/ldrdstrd.md
 create mode 100644 gcc-4.9/gcc/config/arm/linux-eabi.h
 create mode 100644 gcc-4.9/gcc/config/arm/linux-elf.h
 create mode 100644 gcc-4.9/gcc/config/arm/linux-gas.h
 create mode 100644 gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md
 create mode 100644 gcc-4.9/gcc/config/arm/marvell-pj4.md
 create mode 100644 gcc-4.9/gcc/config/arm/mmintrin.h
 create mode 100644 gcc-4.9/gcc/config/arm/neon-docgen.ml
 create mode 100644 gcc-4.9/gcc/config/arm/neon-gen.ml
 create mode 100644 gcc-4.9/gcc/config/arm/neon-testgen.ml
 create mode 100644 gcc-4.9/gcc/config/arm/neon.md
 create mode 100644 gcc-4.9/gcc/config/arm/neon.ml
 create mode 100644 gcc-4.9/gcc/config/arm/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/arm/predicates.md
 create mode 100644 gcc-4.9/gcc/config/arm/rtems-eabi.h
 create mode 100644 gcc-4.9/gcc/config/arm/semi.h
 create mode 100644 gcc-4.9/gcc/config/arm/symbian.h
 create mode 100644 gcc-4.9/gcc/config/arm/sync.md
 create mode 100644 gcc-4.9/gcc/config/arm/t-aprofile
 create mode 100644 gcc-4.9/gcc/config/arm/t-arm
 create mode 100644 gcc-4.9/gcc/config/arm/t-arm-elf
 create mode 100644 gcc-4.9/gcc/config/arm/t-bpabi
 create mode 100644 gcc-4.9/gcc/config/arm/t-linux-androideabi
 create mode 100644 gcc-4.9/gcc/config/arm/t-linux-eabi
 create mode 100644 gcc-4.9/gcc/config/arm/t-rtems-eabi
 create mode 100644 gcc-4.9/gcc/config/arm/t-symbian
 create mode 100644 gcc-4.9/gcc/config/arm/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/arm/thumb2.md
 create mode 100644 gcc-4.9/gcc/config/arm/types.md
 create mode 100644 gcc-4.9/gcc/config/arm/uclinux-eabi.h
 create mode 100644 gcc-4.9/gcc/config/arm/uclinux-elf.h
 create mode 100644 gcc-4.9/gcc/config/arm/unknown-elf.h
 create mode 100644 gcc-4.9/gcc/config/arm/unspecs.md
 create mode 100644 gcc-4.9/gcc/config/arm/vec-common.md
 create mode 100644 gcc-4.9/gcc/config/arm/vfp.md
 create mode 100644 gcc-4.9/gcc/config/arm/vfp11.md
 create mode 100644 gcc-4.9/gcc/config/arm/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/arm/vxworks.opt
 create mode 100644 gcc-4.9/gcc/config/arm/x-arm
 create mode 100644 gcc-4.9/gcc/config/avr/avr-arch.h
 create mode 100644 gcc-4.9/gcc/config/avr/avr-c.c
 create mode 100644 gcc-4.9/gcc/config/avr/avr-devices.c
 create mode 100644 gcc-4.9/gcc/config/avr/avr-dimode.md
 create mode 100644 gcc-4.9/gcc/config/avr/avr-fixed.md
 create mode 100644 gcc-4.9/gcc/config/avr/avr-log.c
 create mode 100644 gcc-4.9/gcc/config/avr/avr-mcus.def
 create mode 100644 gcc-4.9/gcc/config/avr/avr-modes.def
 create mode 100644 gcc-4.9/gcc/config/avr/avr-protos.h
 create mode 100644 gcc-4.9/gcc/config/avr/avr-stdint.h
 create mode 100644 gcc-4.9/gcc/config/avr/avr-tables.opt
 create mode 100644 gcc-4.9/gcc/config/avr/avr.c
 create mode 100644 gcc-4.9/gcc/config/avr/avr.h
 create mode 100644 gcc-4.9/gcc/config/avr/avr.md
 create mode 100644 gcc-4.9/gcc/config/avr/avr.opt
 create mode 100644 gcc-4.9/gcc/config/avr/avrlibc.h
 create mode 100644 gcc-4.9/gcc/config/avr/builtins.def
 create mode 100644 gcc-4.9/gcc/config/avr/constraints.md
 create mode 100644 gcc-4.9/gcc/config/avr/driver-avr.c
 create mode 100644 gcc-4.9/gcc/config/avr/elf.h
 create mode 100644 gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c
 create mode 100644 gcc-4.9/gcc/config/avr/genmultilib.awk
 create mode 100755 gcc-4.9/gcc/config/avr/genopt.sh
 create mode 100644 gcc-4.9/gcc/config/avr/predicates.md
 create mode 100644 gcc-4.9/gcc/config/avr/rtems.h
 create mode 100644 gcc-4.9/gcc/config/avr/stdfix.h
 create mode 100644 gcc-4.9/gcc/config/avr/t-avr
 create mode 100644 gcc-4.9/gcc/config/avr/t-multilib
 create mode 100644 gcc-4.9/gcc/config/avr/t-rtems
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin-modes.def
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin-opts.h
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin-protos.h
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin.c
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin.h
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin.md
 create mode 100644 gcc-4.9/gcc/config/bfin/bfin.opt
 create mode 100644 gcc-4.9/gcc/config/bfin/constraints.md
 create mode 100644 gcc-4.9/gcc/config/bfin/elf.h
 create mode 100644 gcc-4.9/gcc/config/bfin/linux.h
 create mode 100644 gcc-4.9/gcc/config/bfin/predicates.md
 create mode 100644 gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh
 create mode 100644 gcc-4.9/gcc/config/bfin/rtems.h
 create mode 100644 gcc-4.9/gcc/config/bfin/sync.md
 create mode 100644 gcc-4.9/gcc/config/bfin/t-bfin-elf
 create mode 100644 gcc-4.9/gcc/config/bfin/t-bfin-linux
 create mode 100644 gcc-4.9/gcc/config/bfin/t-bfin-uclinux
 create mode 100644 gcc-4.9/gcc/config/bfin/t-rtems
 create mode 100644 gcc-4.9/gcc/config/bfin/uclinux.h
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-isas.def
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-modes.def
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-mult.md
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-mult.md.in
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-opts.h
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-protos.h
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-sched.md
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-sched.md.in
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x-tables.opt
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x.c
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x.h
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x.md
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x.opt
 create mode 100644 gcc-4.9/gcc/config/c6x/c6x_intrinsics.h
 create mode 100644 gcc-4.9/gcc/config/c6x/constraints.md
 create mode 100644 gcc-4.9/gcc/config/c6x/elf-common.h
 create mode 100644 gcc-4.9/gcc/config/c6x/elf.h
 create mode 100644 gcc-4.9/gcc/config/c6x/genmult.sh
 create mode 100644 gcc-4.9/gcc/config/c6x/genopt.sh
 create mode 100644 gcc-4.9/gcc/config/c6x/gensched.sh
 create mode 100644 gcc-4.9/gcc/config/c6x/predicates.md
 create mode 100644 gcc-4.9/gcc/config/c6x/sync.md
 create mode 100644 gcc-4.9/gcc/config/c6x/t-c6x
 create mode 100644 gcc-4.9/gcc/config/c6x/t-c6x-elf
 create mode 100644 gcc-4.9/gcc/config/c6x/t-c6x-uclinux
 create mode 100644 gcc-4.9/gcc/config/c6x/uclinux-elf.h
 create mode 100644 gcc-4.9/gcc/config/cr16/constraints.md
 create mode 100644 gcc-4.9/gcc/config/cr16/cr16-protos.h
 create mode 100644 gcc-4.9/gcc/config/cr16/cr16.c
 create mode 100644 gcc-4.9/gcc/config/cr16/cr16.h
 create mode 100644 gcc-4.9/gcc/config/cr16/cr16.md
 create mode 100644 gcc-4.9/gcc/config/cr16/cr16.opt
 create mode 100644 gcc-4.9/gcc/config/cr16/predicates.md
 create mode 100644 gcc-4.9/gcc/config/cr16/t-cr16
 create mode 100644 gcc-4.9/gcc/config/cris/constraints.md
 create mode 100644 gcc-4.9/gcc/config/cris/cris-protos.h
 create mode 100644 gcc-4.9/gcc/config/cris/cris.c
 create mode 100644 gcc-4.9/gcc/config/cris/cris.h
 create mode 100644 gcc-4.9/gcc/config/cris/cris.md
 create mode 100644 gcc-4.9/gcc/config/cris/cris.opt
 create mode 100644 gcc-4.9/gcc/config/cris/elf.opt
 create mode 100644 gcc-4.9/gcc/config/cris/linux.h
 create mode 100644 gcc-4.9/gcc/config/cris/linux.opt
 create mode 100644 gcc-4.9/gcc/config/cris/predicates.md
 create mode 100644 gcc-4.9/gcc/config/cris/sync.md
 create mode 100644 gcc-4.9/gcc/config/cris/t-cris
 create mode 100644 gcc-4.9/gcc/config/cris/t-elfmulti
 create mode 100644 gcc-4.9/gcc/config/cris/t-linux
 create mode 100644 gcc-4.9/gcc/config/darwin-c.c
 create mode 100644 gcc-4.9/gcc/config/darwin-driver.c
 create mode 100644 gcc-4.9/gcc/config/darwin-f.c
 create mode 100644 gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def
 create mode 100644 gcc-4.9/gcc/config/darwin-protos.h
 create mode 100644 gcc-4.9/gcc/config/darwin-sections.def
 create mode 100644 gcc-4.9/gcc/config/darwin.c
 create mode 100644 gcc-4.9/gcc/config/darwin.h
 create mode 100644 gcc-4.9/gcc/config/darwin.opt
 create mode 100644 gcc-4.9/gcc/config/darwin10.h
 create mode 100644 gcc-4.9/gcc/config/darwin9.h
 create mode 100644 gcc-4.9/gcc/config/dbx.h
 create mode 100644 gcc-4.9/gcc/config/dbxcoff.h
 create mode 100644 gcc-4.9/gcc/config/dbxelf.h
 create mode 100644 gcc-4.9/gcc/config/default-c.c
 create mode 100644 gcc-4.9/gcc/config/elfos.h
 create mode 100644 gcc-4.9/gcc/config/epiphany/constraints.md
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany-modes.def
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany-protos.h
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany-sched.md
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.c
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.h
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.md
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.opt
 create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h
 create mode 100644 gcc-4.9/gcc/config/epiphany/mode-switch-use.c
 create mode 100644 gcc-4.9/gcc/config/epiphany/predicates.md
 create mode 100644 gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c
 create mode 100644 gcc-4.9/gcc/config/epiphany/t-epiphany
 create mode 100644 gcc-4.9/gcc/config/flat.h
 create mode 100644 gcc-4.9/gcc/config/fr30/constraints.md
 create mode 100644 gcc-4.9/gcc/config/fr30/fr30-protos.h
 create mode 100644 gcc-4.9/gcc/config/fr30/fr30.c
 create mode 100644 gcc-4.9/gcc/config/fr30/fr30.h
 create mode 100644 gcc-4.9/gcc/config/fr30/fr30.md
 create mode 100644 gcc-4.9/gcc/config/fr30/fr30.opt
 create mode 100644 gcc-4.9/gcc/config/fr30/predicates.md
 create mode 100644 gcc-4.9/gcc/config/freebsd-nthr.h
 create mode 100644 gcc-4.9/gcc/config/freebsd-spec.h
 create mode 100644 gcc-4.9/gcc/config/freebsd-stdint.h
 create mode 100644 gcc-4.9/gcc/config/freebsd.h
 create mode 100644 gcc-4.9/gcc/config/freebsd.opt
 create mode 100644 gcc-4.9/gcc/config/frv/constraints.md
 create mode 100644 gcc-4.9/gcc/config/frv/frv-asm.h
 create mode 100644 gcc-4.9/gcc/config/frv/frv-modes.def
 create mode 100644 gcc-4.9/gcc/config/frv/frv-opts.h
 create mode 100644 gcc-4.9/gcc/config/frv/frv-protos.h
 create mode 100644 gcc-4.9/gcc/config/frv/frv.c
 create mode 100644 gcc-4.9/gcc/config/frv/frv.h
 create mode 100644 gcc-4.9/gcc/config/frv/frv.md
 create mode 100644 gcc-4.9/gcc/config/frv/frv.opt
 create mode 100644 gcc-4.9/gcc/config/frv/linux.h
 create mode 100644 gcc-4.9/gcc/config/frv/predicates.md
 create mode 100644 gcc-4.9/gcc/config/frv/t-frv
 create mode 100644 gcc-4.9/gcc/config/frv/t-linux
 create mode 100644 gcc-4.9/gcc/config/fused-madd.opt
 create mode 100644 gcc-4.9/gcc/config/g.opt
 create mode 100644 gcc-4.9/gcc/config/glibc-c.c
 create mode 100644 gcc-4.9/gcc/config/glibc-stdint.h
 create mode 100644 gcc-4.9/gcc/config/gnu-user.h
 create mode 100644 gcc-4.9/gcc/config/gnu-user.opt
 create mode 100644 gcc-4.9/gcc/config/gnu.h
 create mode 100644 gcc-4.9/gcc/config/h8300/constraints.md
 create mode 100644 gcc-4.9/gcc/config/h8300/elf.h
 create mode 100644 gcc-4.9/gcc/config/h8300/genmova.sh
 create mode 100644 gcc-4.9/gcc/config/h8300/h8300-protos.h
 create mode 100644 gcc-4.9/gcc/config/h8300/h8300.c
 create mode 100644 gcc-4.9/gcc/config/h8300/h8300.h
 create mode 100644 gcc-4.9/gcc/config/h8300/h8300.md
 create mode 100644 gcc-4.9/gcc/config/h8300/h8300.opt
 create mode 100644 gcc-4.9/gcc/config/h8300/mova.md
 create mode 100644 gcc-4.9/gcc/config/h8300/predicates.md
 create mode 100644 gcc-4.9/gcc/config/h8300/rtems.h
 create mode 100644 gcc-4.9/gcc/config/h8300/t-h8300
 create mode 100644 gcc-4.9/gcc/config/h8300/t-rtems
 create mode 100644 gcc-4.9/gcc/config/host-darwin.c
 create mode 100644 gcc-4.9/gcc/config/host-darwin.h
 create mode 100644 gcc-4.9/gcc/config/host-hpux.c
 create mode 100644 gcc-4.9/gcc/config/host-linux.c
 create mode 100644 gcc-4.9/gcc/config/host-openbsd.c
 create mode 100644 gcc-4.9/gcc/config/host-solaris.c
 create mode 100644 gcc-4.9/gcc/config/hpux-stdint.h
 create mode 100644 gcc-4.9/gcc/config/hpux11.opt
 create mode 100644 gcc-4.9/gcc/config/i386/adxintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/ammintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/athlon.md
 create mode 100644 gcc-4.9/gcc/config/i386/atom.md
 create mode 100644 gcc-4.9/gcc/config/i386/att.h
 create mode 100644 gcc-4.9/gcc/config/i386/avx2intrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/avx512cdintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/avx512erintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/avx512fintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/avx512pfintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/avxintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/avxmath.h
 create mode 100644 gcc-4.9/gcc/config/i386/bdver1.md
 create mode 100644 gcc-4.9/gcc/config/i386/bdver3.md
 create mode 100644 gcc-4.9/gcc/config/i386/biarch64.h
 create mode 100644 gcc-4.9/gcc/config/i386/biarchx32.h
 create mode 100644 gcc-4.9/gcc/config/i386/bmi2intrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/bmiintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/bmmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/bsd.h
 create mode 100644 gcc-4.9/gcc/config/i386/btver2.md
 create mode 100644 gcc-4.9/gcc/config/i386/constraints.md
 create mode 100644 gcc-4.9/gcc/config/i386/core2.md
 create mode 100644 gcc-4.9/gcc/config/i386/cpuid.h
 create mode 100644 gcc-4.9/gcc/config/i386/cross-stdarg.h
 create mode 100644 gcc-4.9/gcc/config/i386/crtdll.h
 create mode 100644 gcc-4.9/gcc/config/i386/cygming.h
 create mode 100644 gcc-4.9/gcc/config/i386/cygming.opt
 create mode 100644 gcc-4.9/gcc/config/i386/cygwin-stdint.h
 create mode 100644 gcc-4.9/gcc/config/i386/cygwin-w64.h
 create mode 100644 gcc-4.9/gcc/config/i386/cygwin.h
 create mode 100644 gcc-4.9/gcc/config/i386/darwin.h
 create mode 100644 gcc-4.9/gcc/config/i386/darwin64.h
 create mode 100644 gcc-4.9/gcc/config/i386/djgpp-stdint.h
 create mode 100644 gcc-4.9/gcc/config/i386/djgpp.h
 create mode 100644 gcc-4.9/gcc/config/i386/djgpp.opt
 create mode 100644 gcc-4.9/gcc/config/i386/driver-i386.c
 create mode 100644 gcc-4.9/gcc/config/i386/emmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/f16cintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/fma4intrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/fmaintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/freebsd.h
 create mode 100644 gcc-4.9/gcc/config/i386/freebsd64.h
 create mode 100644 gcc-4.9/gcc/config/i386/fxsrintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/gas.h
 create mode 100644 gcc-4.9/gcc/config/i386/geode.md
 create mode 100644 gcc-4.9/gcc/config/i386/gmm_malloc.h
 create mode 100644 gcc-4.9/gcc/config/i386/gnu-user-common.h
 create mode 100644 gcc-4.9/gcc/config/i386/gnu-user.h
 create mode 100644 gcc-4.9/gcc/config/i386/gnu-user64.h
 create mode 100644 gcc-4.9/gcc/config/i386/gnu.h
 create mode 100644 gcc-4.9/gcc/config/i386/gstabs.h
 create mode 100644 gcc-4.9/gcc/config/i386/host-cygwin.c
 create mode 100644 gcc-4.9/gcc/config/i386/host-i386-darwin.c
 create mode 100644 gcc-4.9/gcc/config/i386/host-mingw32.c
 create mode 100644 gcc-4.9/gcc/config/i386/i386-builtin-types.awk
 create mode 100644 gcc-4.9/gcc/config/i386/i386-builtin-types.def
 create mode 100644 gcc-4.9/gcc/config/i386/i386-c.c
 create mode 100644 gcc-4.9/gcc/config/i386/i386-interix.h
 create mode 100644 gcc-4.9/gcc/config/i386/i386-modes.def
 create mode 100644 gcc-4.9/gcc/config/i386/i386-opts.h
 create mode 100644 gcc-4.9/gcc/config/i386/i386-protos.h
 create mode 100644 gcc-4.9/gcc/config/i386/i386.c
 create mode 100644 gcc-4.9/gcc/config/i386/i386.h
 create mode 100644 gcc-4.9/gcc/config/i386/i386.md
 create mode 100644 gcc-4.9/gcc/config/i386/i386.opt
 create mode 100644 gcc-4.9/gcc/config/i386/i386elf.h
 create mode 100644 gcc-4.9/gcc/config/i386/ia32intrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/immintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/interix.opt
 create mode 100644 gcc-4.9/gcc/config/i386/k6.md
 create mode 100644 gcc-4.9/gcc/config/i386/kfreebsd-gnu.h
 create mode 100644 gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h
 create mode 100644 gcc-4.9/gcc/config/i386/knetbsd-gnu.h
 create mode 100644 gcc-4.9/gcc/config/i386/kopensolaris-gnu.h
 create mode 100644 gcc-4.9/gcc/config/i386/linux-common.h
 create mode 100644 gcc-4.9/gcc/config/i386/linux.h
 create mode 100644 gcc-4.9/gcc/config/i386/linux64.h
 create mode 100644 gcc-4.9/gcc/config/i386/lwpintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/lynx.h
 create mode 100644 gcc-4.9/gcc/config/i386/lzcntintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/mingw-pthread.h
 create mode 100644 gcc-4.9/gcc/config/i386/mingw-stdint.h
 create mode 100644 gcc-4.9/gcc/config/i386/mingw-w64.h
 create mode 100644 gcc-4.9/gcc/config/i386/mingw-w64.opt
 create mode 100644 gcc-4.9/gcc/config/i386/mingw.opt
 create mode 100644 gcc-4.9/gcc/config/i386/mingw32.h
 create mode 100644 gcc-4.9/gcc/config/i386/mm3dnow.h
 create mode 100644 gcc-4.9/gcc/config/i386/mmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/mmx.md
 create mode 100644 gcc-4.9/gcc/config/i386/msformat-c.c
 create mode 100644 gcc-4.9/gcc/config/i386/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/i386/netbsd64.h
 create mode 100644 gcc-4.9/gcc/config/i386/nmmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/nto.h
 create mode 100644 gcc-4.9/gcc/config/i386/nto.opt
 create mode 100644 gcc-4.9/gcc/config/i386/openbsd.h
 create mode 100644 gcc-4.9/gcc/config/i386/openbsdelf.h
 create mode 100644 gcc-4.9/gcc/config/i386/pentium.md
 create mode 100644 gcc-4.9/gcc/config/i386/pmm_malloc.h
 create mode 100644 gcc-4.9/gcc/config/i386/pmmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/popcntintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/ppro.md
 create mode 100644 gcc-4.9/gcc/config/i386/predicates.md
 create mode 100644 gcc-4.9/gcc/config/i386/prfchwintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/rdos.h
 create mode 100644 gcc-4.9/gcc/config/i386/rdos64.h
 create mode 100644 gcc-4.9/gcc/config/i386/rdseedintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/rtemself.h
 create mode 100644 gcc-4.9/gcc/config/i386/rtmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/shaintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/slm.md
 create mode 100644 gcc-4.9/gcc/config/i386/smmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/sol2-9.h
 create mode 100644 gcc-4.9/gcc/config/i386/sol2-bi.h
 create mode 100644 gcc-4.9/gcc/config/i386/sol2.h
 create mode 100644 gcc-4.9/gcc/config/i386/sse.md
 create mode 100644 gcc-4.9/gcc/config/i386/ssemath.h
 create mode 100644 gcc-4.9/gcc/config/i386/stringop.def
 create mode 100644 gcc-4.9/gcc/config/i386/stringop.opt
 create mode 100644 gcc-4.9/gcc/config/i386/subst.md
 create mode 100644 gcc-4.9/gcc/config/i386/sync.md
 create mode 100644 gcc-4.9/gcc/config/i386/sysv4.h
 create mode 100644 gcc-4.9/gcc/config/i386/t-cygming
 create mode 100644 gcc-4.9/gcc/config/i386/t-cygwin-w64
 create mode 100644 gcc-4.9/gcc/config/i386/t-darwin
 create mode 100644 gcc-4.9/gcc/config/i386/t-darwin64
 create mode 100644 gcc-4.9/gcc/config/i386/t-gmm_malloc
 create mode 100644 gcc-4.9/gcc/config/i386/t-gnu
 create mode 100644 gcc-4.9/gcc/config/i386/t-i386
 create mode 100644 gcc-4.9/gcc/config/i386/t-interix
 create mode 100644 gcc-4.9/gcc/config/i386/t-kfreebsd
 create mode 100644 gcc-4.9/gcc/config/i386/t-linux
 create mode 100644 gcc-4.9/gcc/config/i386/t-linux64
 create mode 100644 gcc-4.9/gcc/config/i386/t-mingw-w32
 create mode 100644 gcc-4.9/gcc/config/i386/t-mingw-w64
 create mode 100644 gcc-4.9/gcc/config/i386/t-openbsd
 create mode 100644 gcc-4.9/gcc/config/i386/t-pmm_malloc
 create mode 100644 gcc-4.9/gcc/config/i386/t-rtems
 create mode 100644 gcc-4.9/gcc/config/i386/t-sol2-64
 create mode 100644 gcc-4.9/gcc/config/i386/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/i386/t-vxworksae
 create mode 100644 gcc-4.9/gcc/config/i386/tbmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/tmmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/unix.h
 create mode 100644 gcc-4.9/gcc/config/i386/vx-common.h
 create mode 100644 gcc-4.9/gcc/config/i386/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/i386/vxworksae.h
 create mode 100644 gcc-4.9/gcc/config/i386/winnt-cxx.c
 create mode 100644 gcc-4.9/gcc/config/i386/winnt-stubs.c
 create mode 100644 gcc-4.9/gcc/config/i386/winnt.c
 create mode 100644 gcc-4.9/gcc/config/i386/wmmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/x-cygwin
 create mode 100644 gcc-4.9/gcc/config/i386/x-darwin
 create mode 100644 gcc-4.9/gcc/config/i386/x-i386
 create mode 100644 gcc-4.9/gcc/config/i386/x-mingw32
 create mode 100644 gcc-4.9/gcc/config/i386/x86-64.h
 create mode 100644 gcc-4.9/gcc/config/i386/x86-tune.def
 create mode 100644 gcc-4.9/gcc/config/i386/x86intrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/xm-cygwin.h
 create mode 100644 gcc-4.9/gcc/config/i386/xm-djgpp.h
 create mode 100644 gcc-4.9/gcc/config/i386/xm-mingw32.h
 create mode 100644 gcc-4.9/gcc/config/i386/xmmintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/xopintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/xsaveintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/xsaveoptintrin.h
 create mode 100644 gcc-4.9/gcc/config/i386/xtestintrin.h
 create mode 100644 gcc-4.9/gcc/config/ia64/constraints.md
 create mode 100644 gcc-4.9/gcc/config/ia64/div.md
 create mode 100644 gcc-4.9/gcc/config/ia64/elf.h
 create mode 100644 gcc-4.9/gcc/config/ia64/freebsd.h
 create mode 100644 gcc-4.9/gcc/config/ia64/hpux-unix2003.h
 create mode 100644 gcc-4.9/gcc/config/ia64/hpux.h
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64-c.c
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64-modes.def
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64-opts.h
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64-protos.h
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64.c
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64.h
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64.md
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64.opt
 create mode 100644 gcc-4.9/gcc/config/ia64/ia64intrin.h
 create mode 100644 gcc-4.9/gcc/config/ia64/ilp32.opt
 create mode 100644 gcc-4.9/gcc/config/ia64/itanium2.md
 create mode 100644 gcc-4.9/gcc/config/ia64/linux.h
 create mode 100644 gcc-4.9/gcc/config/ia64/predicates.md
 create mode 100644 gcc-4.9/gcc/config/ia64/sync.md
 create mode 100644 gcc-4.9/gcc/config/ia64/sysv4.h
 create mode 100644 gcc-4.9/gcc/config/ia64/t-hpux
 create mode 100644 gcc-4.9/gcc/config/ia64/t-ia64
 create mode 100644 gcc-4.9/gcc/config/ia64/t-linux
 create mode 100644 gcc-4.9/gcc/config/ia64/vect.md
 create mode 100644 gcc-4.9/gcc/config/ia64/vms.h
 create mode 100644 gcc-4.9/gcc/config/ia64/vms.opt
 create mode 100644 gcc-4.9/gcc/config/initfini-array.h
 create mode 100644 gcc-4.9/gcc/config/iq2000/abi
 create mode 100644 gcc-4.9/gcc/config/iq2000/constraints.md
 create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000-opts.h
 create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000-protos.h
 create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.c
 create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.h
 create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.md
 create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.opt
 create mode 100644 gcc-4.9/gcc/config/iq2000/predicates.md
 create mode 100644 gcc-4.9/gcc/config/kfreebsd-gnu.h
 create mode 100644 gcc-4.9/gcc/config/knetbsd-gnu.h
 create mode 100644 gcc-4.9/gcc/config/kopensolaris-gnu.h
 create mode 100644 gcc-4.9/gcc/config/linux-android.h
 create mode 100644 gcc-4.9/gcc/config/linux-android.opt
 create mode 100644 gcc-4.9/gcc/config/linux-protos.h
 create mode 100644 gcc-4.9/gcc/config/linux.c
 create mode 100644 gcc-4.9/gcc/config/linux.h
 create mode 100644 gcc-4.9/gcc/config/linux.opt
 create mode 100644 gcc-4.9/gcc/config/lm32/constraints.md
 create mode 100644 gcc-4.9/gcc/config/lm32/lm32-protos.h
 create mode 100644 gcc-4.9/gcc/config/lm32/lm32.c
 create mode 100644 gcc-4.9/gcc/config/lm32/lm32.h
 create mode 100644 gcc-4.9/gcc/config/lm32/lm32.md
 create mode 100644 gcc-4.9/gcc/config/lm32/lm32.opt
 create mode 100644 gcc-4.9/gcc/config/lm32/predicates.md
 create mode 100644 gcc-4.9/gcc/config/lm32/rtems.h
 create mode 100644 gcc-4.9/gcc/config/lm32/t-lm32
 create mode 100644 gcc-4.9/gcc/config/lm32/t-rtems
 create mode 100644 gcc-4.9/gcc/config/lm32/uclinux-elf.h
 create mode 100644 gcc-4.9/gcc/config/lynx.h
 create mode 100644 gcc-4.9/gcc/config/lynx.opt
 create mode 100644 gcc-4.9/gcc/config/m32c/addsub.md
 create mode 100644 gcc-4.9/gcc/config/m32c/bitops.md
 create mode 100644 gcc-4.9/gcc/config/m32c/blkmov.md
 create mode 100644 gcc-4.9/gcc/config/m32c/cond.md
 create mode 100644 gcc-4.9/gcc/config/m32c/constraints.md
 create mode 100644 gcc-4.9/gcc/config/m32c/jump.md
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c-modes.def
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c-pragma.c
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c-protos.h
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c.abi
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c.c
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c.h
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c.md
 create mode 100644 gcc-4.9/gcc/config/m32c/m32c.opt
 create mode 100644 gcc-4.9/gcc/config/m32c/minmax.md
 create mode 100644 gcc-4.9/gcc/config/m32c/mov.md
 create mode 100644 gcc-4.9/gcc/config/m32c/muldiv.md
 create mode 100644 gcc-4.9/gcc/config/m32c/predicates.md
 create mode 100644 gcc-4.9/gcc/config/m32c/prologue.md
 create mode 100644 gcc-4.9/gcc/config/m32c/rtems.h
 create mode 100644 gcc-4.9/gcc/config/m32c/shift.md
 create mode 100644 gcc-4.9/gcc/config/m32c/t-m32c
 create mode 100644 gcc-4.9/gcc/config/m32r/constraints.md
 create mode 100644 gcc-4.9/gcc/config/m32r/linux.h
 create mode 100644 gcc-4.9/gcc/config/m32r/little.h
 create mode 100644 gcc-4.9/gcc/config/m32r/m32r-opts.h
 create mode 100644 gcc-4.9/gcc/config/m32r/m32r-protos.h
 create mode 100644 gcc-4.9/gcc/config/m32r/m32r.c
 create mode 100644 gcc-4.9/gcc/config/m32r/m32r.h
 create mode 100644 gcc-4.9/gcc/config/m32r/m32r.md
 create mode 100644 gcc-4.9/gcc/config/m32r/m32r.opt
 create mode 100644 gcc-4.9/gcc/config/m32r/predicates.md
 create mode 100644 gcc-4.9/gcc/config/m32r/rtems.h
 create mode 100644 gcc-4.9/gcc/config/m32r/t-linux
 create mode 100644 gcc-4.9/gcc/config/m32r/t-m32r
 create mode 100644 gcc-4.9/gcc/config/m68k/cf.md
 create mode 100644 gcc-4.9/gcc/config/m68k/constraints.md
 create mode 100755 gcc-4.9/gcc/config/m68k/genopt.sh
 create mode 100644 gcc-4.9/gcc/config/m68k/ieee.opt
 create mode 100644 gcc-4.9/gcc/config/m68k/linux.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68020-elf.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-devices.def
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-isas.def
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-microarchs.def
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-modes.def
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-none.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-opts.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-protos.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k-tables.opt
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k.c
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k.md
 create mode 100644 gcc-4.9/gcc/config/m68k/m68k.opt
 create mode 100644 gcc-4.9/gcc/config/m68k/m68kelf.h
 create mode 100644 gcc-4.9/gcc/config/m68k/m68kemb.h
 create mode 100644 gcc-4.9/gcc/config/m68k/math-68881.h
 create mode 100644 gcc-4.9/gcc/config/m68k/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/m68k/openbsd.h
 create mode 100644 gcc-4.9/gcc/config/m68k/predicates.md
 create mode 100644 gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh
 create mode 100644 gcc-4.9/gcc/config/m68k/rtemself.h
 create mode 100644 gcc-4.9/gcc/config/m68k/sync.md
 create mode 100644 gcc-4.9/gcc/config/m68k/t-cf
 create mode 100644 gcc-4.9/gcc/config/m68k/t-linux
 create mode 100644 gcc-4.9/gcc/config/m68k/t-m68k
 create mode 100644 gcc-4.9/gcc/config/m68k/t-m68kbare
 create mode 100644 gcc-4.9/gcc/config/m68k/t-mlibs
 create mode 100644 gcc-4.9/gcc/config/m68k/t-openbsd
 create mode 100644 gcc-4.9/gcc/config/m68k/t-opts
 create mode 100644 gcc-4.9/gcc/config/m68k/t-rtems
 create mode 100644 gcc-4.9/gcc/config/m68k/t-uclinux
 create mode 100644 gcc-4.9/gcc/config/m68k/uclinux.h
 create mode 100644 gcc-4.9/gcc/config/m68k/uclinux.opt
 create mode 100644 gcc-4.9/gcc/config/mcore/constraints.md
 create mode 100644 gcc-4.9/gcc/config/mcore/mcore-elf.h
 create mode 100644 gcc-4.9/gcc/config/mcore/mcore-protos.h
 create mode 100644 gcc-4.9/gcc/config/mcore/mcore.c
 create mode 100644 gcc-4.9/gcc/config/mcore/mcore.h
 create mode 100644 gcc-4.9/gcc/config/mcore/mcore.md
 create mode 100644 gcc-4.9/gcc/config/mcore/mcore.opt
 create mode 100644 gcc-4.9/gcc/config/mcore/predicates.md
 create mode 100644 gcc-4.9/gcc/config/mcore/t-mcore
 create mode 100644 gcc-4.9/gcc/config/mep/constraints.md
 create mode 100644 gcc-4.9/gcc/config/mep/default.h
 create mode 100644 gcc-4.9/gcc/config/mep/intrinsics.h
 create mode 100644 gcc-4.9/gcc/config/mep/intrinsics.md
 create mode 100644 gcc-4.9/gcc/config/mep/ivc2-template.h
 create mode 100644 gcc-4.9/gcc/config/mep/mep-c5.cpu
 create mode 100644 gcc-4.9/gcc/config/mep/mep-core.cpu
 create mode 100644 gcc-4.9/gcc/config/mep/mep-default.cpu
 create mode 100644 gcc-4.9/gcc/config/mep/mep-ext-cop.cpu
 create mode 100644 gcc-4.9/gcc/config/mep/mep-intrin.h
 create mode 100644 gcc-4.9/gcc/config/mep/mep-ivc2.cpu
 create mode 100644 gcc-4.9/gcc/config/mep/mep-pragma.c
 create mode 100644 gcc-4.9/gcc/config/mep/mep-protos.h
 create mode 100644 gcc-4.9/gcc/config/mep/mep.c
 create mode 100644 gcc-4.9/gcc/config/mep/mep.cpu
 create mode 100644 gcc-4.9/gcc/config/mep/mep.h
 create mode 100644 gcc-4.9/gcc/config/mep/mep.md
 create mode 100644 gcc-4.9/gcc/config/mep/mep.opt
 create mode 100644 gcc-4.9/gcc/config/mep/predicates.md
 create mode 100644 gcc-4.9/gcc/config/mep/t-mep
 create mode 100644 gcc-4.9/gcc/config/microblaze/constraints.md
 create mode 100644 gcc-4.9/gcc/config/microblaze/linux.h
 create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze-c.c
 create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze-protos.h
 create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.c
 create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.h
 create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.md
 create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.opt
 create mode 100644 gcc-4.9/gcc/config/microblaze/predicates.md
 create mode 100644 gcc-4.9/gcc/config/microblaze/rtems.h
 create mode 100644 gcc-4.9/gcc/config/microblaze/sync.md
 create mode 100644 gcc-4.9/gcc/config/microblaze/t-microblaze
 create mode 100644 gcc-4.9/gcc/config/microblaze/t-microblaze-linux
 create mode 100644 gcc-4.9/gcc/config/microblaze/t-rtems
 create mode 100644 gcc-4.9/gcc/config/mips/10000.md
 create mode 100644 gcc-4.9/gcc/config/mips/20kc.md
 create mode 100644 gcc-4.9/gcc/config/mips/24k.md
 create mode 100644 gcc-4.9/gcc/config/mips/3000.md
 create mode 100644 gcc-4.9/gcc/config/mips/4000.md
 create mode 100644 gcc-4.9/gcc/config/mips/4100.md
 create mode 100644 gcc-4.9/gcc/config/mips/4130.md
 create mode 100644 gcc-4.9/gcc/config/mips/4300.md
 create mode 100644 gcc-4.9/gcc/config/mips/4600.md
 create mode 100644 gcc-4.9/gcc/config/mips/4k.md
 create mode 100644 gcc-4.9/gcc/config/mips/5000.md
 create mode 100644 gcc-4.9/gcc/config/mips/5400.md
 create mode 100644 gcc-4.9/gcc/config/mips/5500.md
 create mode 100644 gcc-4.9/gcc/config/mips/5k.md
 create mode 100644 gcc-4.9/gcc/config/mips/6000.md
 create mode 100644 gcc-4.9/gcc/config/mips/7000.md
 create mode 100644 gcc-4.9/gcc/config/mips/74k.md
 create mode 100644 gcc-4.9/gcc/config/mips/9000.md
 create mode 100644 gcc-4.9/gcc/config/mips/constraints.md
 create mode 100644 gcc-4.9/gcc/config/mips/driver-native.c
 create mode 100644 gcc-4.9/gcc/config/mips/elf.h
 create mode 100644 gcc-4.9/gcc/config/mips/elfoabi.h
 create mode 100644 gcc-4.9/gcc/config/mips/elforion.h
 create mode 100644 gcc-4.9/gcc/config/mips/generic.md
 create mode 100755 gcc-4.9/gcc/config/mips/genopt.sh
 create mode 100644 gcc-4.9/gcc/config/mips/gnu-user.h
 create mode 100644 gcc-4.9/gcc/config/mips/gnu-user64.h
 create mode 100644 gcc-4.9/gcc/config/mips/linux-common.h
 create mode 100644 gcc-4.9/gcc/config/mips/linux.h
 create mode 100644 gcc-4.9/gcc/config/mips/linux64.h
 create mode 100644 gcc-4.9/gcc/config/mips/loongson.h
 create mode 100644 gcc-4.9/gcc/config/mips/loongson.md
 create mode 100644 gcc-4.9/gcc/config/mips/loongson2ef.md
 create mode 100644 gcc-4.9/gcc/config/mips/loongson3a.md
 create mode 100644 gcc-4.9/gcc/config/mips/micromips.md
 create mode 100644 gcc-4.9/gcc/config/mips/mips-cpus.def
 create mode 100644 gcc-4.9/gcc/config/mips/mips-dsp.md
 create mode 100644 gcc-4.9/gcc/config/mips/mips-dspr2.md
 create mode 100644 gcc-4.9/gcc/config/mips/mips-fixed.md
 create mode 100644 gcc-4.9/gcc/config/mips/mips-ftypes.def
 create mode 100644 gcc-4.9/gcc/config/mips/mips-modes.def
 create mode 100644 gcc-4.9/gcc/config/mips/mips-opts.h
 create mode 100644 gcc-4.9/gcc/config/mips/mips-protos.h
 create mode 100644 gcc-4.9/gcc/config/mips/mips-ps-3d.md
 create mode 100644 gcc-4.9/gcc/config/mips/mips-tables.opt
 create mode 100644 gcc-4.9/gcc/config/mips/mips.c
 create mode 100644 gcc-4.9/gcc/config/mips/mips.h
 create mode 100644 gcc-4.9/gcc/config/mips/mips.md
 create mode 100644 gcc-4.9/gcc/config/mips/mips.opt
 create mode 100644 gcc-4.9/gcc/config/mips/mti-elf.h
 create mode 100644 gcc-4.9/gcc/config/mips/mti-linux.h
 create mode 100644 gcc-4.9/gcc/config/mips/n32-elf.h
 create mode 100644 gcc-4.9/gcc/config/mips/netbsd.h
 create mode 100644 gcc-4.9/gcc/config/mips/octeon.md
 create mode 100644 gcc-4.9/gcc/config/mips/predicates.md
 create mode 100644 gcc-4.9/gcc/config/mips/r3900.h
 create mode 100644 gcc-4.9/gcc/config/mips/rtems.h
 create mode 100644 gcc-4.9/gcc/config/mips/sb1.md
 create mode 100644 gcc-4.9/gcc/config/mips/sde.h
 create mode 100644 gcc-4.9/gcc/config/mips/sde.opt
 create mode 100644 gcc-4.9/gcc/config/mips/sdemtk.h
 create mode 100644 gcc-4.9/gcc/config/mips/sr71k.md
 create mode 100644 gcc-4.9/gcc/config/mips/st.h
 create mode 100644 gcc-4.9/gcc/config/mips/sync.md
 create mode 100644 gcc-4.9/gcc/config/mips/t-elf
 create mode 100644 gcc-4.9/gcc/config/mips/t-irix6
 create mode 100644 gcc-4.9/gcc/config/mips/t-isa3264
 create mode 100644 gcc-4.9/gcc/config/mips/t-linux64
 create mode 100644 gcc-4.9/gcc/config/mips/t-mips
 create mode 100644 gcc-4.9/gcc/config/mips/t-mti-elf
 create mode 100644 gcc-4.9/gcc/config/mips/t-mti-linux
 create mode 100644 gcc-4.9/gcc/config/mips/t-r3900
 create mode 100644 gcc-4.9/gcc/config/mips/t-rtems
 create mode 100644 gcc-4.9/gcc/config/mips/t-sb1
 create mode 100644 gcc-4.9/gcc/config/mips/t-sde
 create mode 100644 gcc-4.9/gcc/config/mips/t-sdemtk
 create mode 100644 gcc-4.9/gcc/config/mips/t-sr71k
 create mode 100644 gcc-4.9/gcc/config/mips/t-st
 create mode 100644 gcc-4.9/gcc/config/mips/t-vr
 create mode 100644 gcc-4.9/gcc/config/mips/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/mips/vr.h
 create mode 100644 gcc-4.9/gcc/config/mips/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/mips/x-native
 create mode 100644 gcc-4.9/gcc/config/mips/xlp.md
 create mode 100644 gcc-4.9/gcc/config/mips/xlr.md
 create mode 100644 gcc-4.9/gcc/config/mmix/constraints.md
 create mode 100644 gcc-4.9/gcc/config/mmix/mmix-modes.def
 create mode 100644 gcc-4.9/gcc/config/mmix/mmix-protos.h
 create mode 100644 gcc-4.9/gcc/config/mmix/mmix.c
 create mode 100644 gcc-4.9/gcc/config/mmix/mmix.h
 create mode 100644 gcc-4.9/gcc/config/mmix/mmix.md
 create mode 100644 gcc-4.9/gcc/config/mmix/mmix.opt
 create mode 100644 gcc-4.9/gcc/config/mmix/predicates.md
 create mode 100644 gcc-4.9/gcc/config/mmix/t-mmix
 create mode 100644 gcc-4.9/gcc/config/mn10300/constraints.md
 create mode 100644 gcc-4.9/gcc/config/mn10300/linux.h
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300-modes.def
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300-opts.h
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300-protos.h
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.c
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.h
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.md
 create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.opt
 create mode 100644 gcc-4.9/gcc/config/mn10300/predicates.md
 create mode 100644 gcc-4.9/gcc/config/mn10300/t-mn10300
 create mode 100644 gcc-4.9/gcc/config/moxie/constraints.md
 create mode 100644 gcc-4.9/gcc/config/moxie/moxie-protos.h
 create mode 100644 gcc-4.9/gcc/config/moxie/moxie.c
 create mode 100644 gcc-4.9/gcc/config/moxie/moxie.h
 create mode 100644 gcc-4.9/gcc/config/moxie/moxie.md
 create mode 100644 gcc-4.9/gcc/config/moxie/moxie.opt
 create mode 100644 gcc-4.9/gcc/config/moxie/predicates.md
 create mode 100644 gcc-4.9/gcc/config/moxie/rtems.h
 create mode 100644 gcc-4.9/gcc/config/moxie/t-moxie
 create mode 100644 gcc-4.9/gcc/config/moxie/uclinux.h
 create mode 100644 gcc-4.9/gcc/config/msp430/README.txt
 create mode 100644 gcc-4.9/gcc/config/msp430/constraints.md
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430-c.c
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430-modes.def
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430-protos.h
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430.c
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430.h
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430.md
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430.opt
 create mode 100644 gcc-4.9/gcc/config/msp430/predicates.md
 create mode 100644 gcc-4.9/gcc/config/msp430/t-msp430
 create mode 100644 gcc-4.9/gcc/config/nds32/constants.md
 create mode 100644 gcc-4.9/gcc/config/nds32/constraints.md
 create mode 100644 gcc-4.9/gcc/config/nds32/iterators.md
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-doubleword.md
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-intrinsic.md
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-modes.def
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-multiple.md
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-opts.h
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-peephole2.md
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32-protos.h
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32.c
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32.h
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32.md
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32.opt
 create mode 100644 gcc-4.9/gcc/config/nds32/nds32_intrinsic.h
 create mode 100644 gcc-4.9/gcc/config/nds32/pipelines.md
 create mode 100644 gcc-4.9/gcc/config/nds32/predicates.md
 create mode 100644 gcc-4.9/gcc/config/nds32/t-mlibs
 create mode 100644 gcc-4.9/gcc/config/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/netbsd-elf.opt
 create mode 100644 gcc-4.9/gcc/config/netbsd.h
 create mode 100644 gcc-4.9/gcc/config/netbsd.opt
 create mode 100644 gcc-4.9/gcc/config/newlib-stdint.h
 create mode 100644 gcc-4.9/gcc/config/nios2/constraints.md
 create mode 100644 gcc-4.9/gcc/config/nios2/elf.h
 create mode 100644 gcc-4.9/gcc/config/nios2/elf.opt
 create mode 100644 gcc-4.9/gcc/config/nios2/linux.h
 create mode 100644 gcc-4.9/gcc/config/nios2/nios2-opts.h
 create mode 100644 gcc-4.9/gcc/config/nios2/nios2-protos.h
 create mode 100644 gcc-4.9/gcc/config/nios2/nios2.c
 create mode 100644 gcc-4.9/gcc/config/nios2/nios2.h
 create mode 100644 gcc-4.9/gcc/config/nios2/nios2.md
 create mode 100644 gcc-4.9/gcc/config/nios2/nios2.opt
 create mode 100644 gcc-4.9/gcc/config/nios2/predicates.md
 create mode 100644 gcc-4.9/gcc/config/nios2/t-nios2
 create mode 100644 gcc-4.9/gcc/config/openbsd-libpthread.h
 create mode 100644 gcc-4.9/gcc/config/openbsd-oldgas.h
 create mode 100644 gcc-4.9/gcc/config/openbsd-stdint.h
 create mode 100644 gcc-4.9/gcc/config/openbsd.h
 create mode 100644 gcc-4.9/gcc/config/openbsd.opt
 create mode 100644 gcc-4.9/gcc/config/pa/constraints.md
 create mode 100644 gcc-4.9/gcc/config/pa/elf.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-64.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux10.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux10.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1010.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1010.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux11.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1111.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1111.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1131.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1131.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa-linux.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-modes.def
 create mode 100644 gcc-4.9/gcc/config/pa/pa-openbsd.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-opts.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa-protos.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa.c
 create mode 100644 gcc-4.9/gcc/config/pa/pa.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa.md
 create mode 100644 gcc-4.9/gcc/config/pa/pa.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa32-linux.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa32-openbsd.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa32-regs.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa64-hpux.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa64-hpux.opt
 create mode 100644 gcc-4.9/gcc/config/pa/pa64-linux.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa64-regs.h
 create mode 100644 gcc-4.9/gcc/config/pa/pa64-start.h
 create mode 100644 gcc-4.9/gcc/config/pa/predicates.md
 create mode 100644 gcc-4.9/gcc/config/pa/som.h
 create mode 100644 gcc-4.9/gcc/config/pa/t-dce-thr
 create mode 100644 gcc-4.9/gcc/config/pa/t-linux
 create mode 100644 gcc-4.9/gcc/config/pdp11/constraints.md
 create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11-modes.def
 create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11-protos.h
 create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.c
 create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.h
 create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.md
 create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.opt
 create mode 100644 gcc-4.9/gcc/config/pdp11/predicates.md
 create mode 100644 gcc-4.9/gcc/config/pdp11/t-pdp11
 create mode 100644 gcc-4.9/gcc/config/picochip/constraints.md
 create mode 100644 gcc-4.9/gcc/config/picochip/dfa_space.md
 create mode 100644 gcc-4.9/gcc/config/picochip/dfa_speed.md
 create mode 100644 gcc-4.9/gcc/config/picochip/picochip-protos.h
 create mode 100644 gcc-4.9/gcc/config/picochip/picochip.c
 create mode 100644 gcc-4.9/gcc/config/picochip/picochip.h
 create mode 100644 gcc-4.9/gcc/config/picochip/picochip.md
 create mode 100644 gcc-4.9/gcc/config/picochip/picochip.opt
 create mode 100644 gcc-4.9/gcc/config/picochip/predicates.md
 create mode 100644 gcc-4.9/gcc/config/picochip/t-picochip
 create mode 100644 gcc-4.9/gcc/config/print-sysroot-suffix.sh
 create mode 100644 gcc-4.9/gcc/config/rl78/constraints.md
 create mode 100644 gcc-4.9/gcc/config/rl78/predicates.md
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78-c.c
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78-expand.md
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78-opts.h
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78-protos.h
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78-real.md
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78-virt.md
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78.c
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78.h
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78.md
 create mode 100644 gcc-4.9/gcc/config/rl78/rl78.opt
 create mode 100644 gcc-4.9/gcc/config/rl78/t-rl78
 create mode 100644 gcc-4.9/gcc/config/rpath.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/40x.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/440.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/476.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/476.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/476.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/601.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/603.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/6xx.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/7450.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/750cl.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/7xx.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/8540.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/a2.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix-stdint.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix43.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix51.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix52.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix53.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix61.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/aix64.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/altivec.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/altivec.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/biarch64.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/cell.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/constraints.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/crypto.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/darwin.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/darwin.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/darwin.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/darwin64.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/darwin7.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/darwin8.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/default64.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/dfp.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/driver-rs6000.c
 create mode 100644 gcc-4.9/gcc/config/rs6000/e300c2c3.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/e500.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/e500mc.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/e500mc64.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/e5500.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/e6500.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/eabi.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/eabialtivec.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/eabisim.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/eabispe.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/freebsd.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/freebsd64.h
 create mode 100755 gcc-4.9/gcc/config/rs6000/genopt.sh
 create mode 100644 gcc-4.9/gcc/config/rs6000/host-darwin.c
 create mode 100644 gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c
 create mode 100644 gcc-4.9/gcc/config/rs6000/htm.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/htmintrin.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/htmxlintrin.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/linux.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/linux64.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/linux64.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/linuxaltivec.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/linuxspe.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/lynx.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/milli.exp
 create mode 100644 gcc-4.9/gcc/config/rs6000/mpc.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/netbsd.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/option-defaults.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/paired.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/paired.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/power4.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/power5.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/power6.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/power7.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/power8.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/ppc-asm.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/predicates.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-c.c
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-cpus.def
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-linux.c
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-modes.def
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-opts.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-protos.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-tables.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.c
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/rs64.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/rtems.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/secureplt.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/si2vmx.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/singlefp.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/spe.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/spe.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/spu2vmx.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/sync.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/sysv4.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/sysv4.opt
 create mode 100644 gcc-4.9/gcc/config/rs6000/sysv4le.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-aix43
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-aix52
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-darwin64
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-darwin8
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-fprules
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-freebsd64
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64bele
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64le
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64lebe
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-lynx
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-netbsd
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppccomm
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppcendian
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppcgas
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppcos
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-rs6000
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-rtems
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-spe
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-vxworksae
 create mode 100644 gcc-4.9/gcc/config/rs6000/t-xilinx
 create mode 100644 gcc-4.9/gcc/config/rs6000/titan.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/vec_types.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/vector.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/vsx.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/x-aix
 create mode 100644 gcc-4.9/gcc/config/rs6000/x-darwin
 create mode 100644 gcc-4.9/gcc/config/rs6000/x-darwin64
 create mode 100644 gcc-4.9/gcc/config/rs6000/x-linux-relax
 create mode 100644 gcc-4.9/gcc/config/rs6000/x-rs6000
 create mode 100644 gcc-4.9/gcc/config/rs6000/xcoff.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/xfpu.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/xfpu.md
 create mode 100644 gcc-4.9/gcc/config/rs6000/xilinx.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/xilinx.opt
 create mode 100644 gcc-4.9/gcc/config/rtems.h
 create mode 100644 gcc-4.9/gcc/config/rtems.opt
 create mode 100644 gcc-4.9/gcc/config/rx/constraints.md
 create mode 100644 gcc-4.9/gcc/config/rx/predicates.md
 create mode 100644 gcc-4.9/gcc/config/rx/rx-modes.def
 create mode 100644 gcc-4.9/gcc/config/rx/rx-opts.h
 create mode 100644 gcc-4.9/gcc/config/rx/rx-protos.h
 create mode 100644 gcc-4.9/gcc/config/rx/rx.c
 create mode 100644 gcc-4.9/gcc/config/rx/rx.h
 create mode 100644 gcc-4.9/gcc/config/rx/rx.md
 create mode 100644 gcc-4.9/gcc/config/rx/rx.opt
 create mode 100644 gcc-4.9/gcc/config/rx/t-rx
 create mode 100644 gcc-4.9/gcc/config/s390/2064.md
 create mode 100644 gcc-4.9/gcc/config/s390/2084.md
 create mode 100644 gcc-4.9/gcc/config/s390/2097.md
 create mode 100644 gcc-4.9/gcc/config/s390/2817.md
 create mode 100644 gcc-4.9/gcc/config/s390/2827.md
 create mode 100644 gcc-4.9/gcc/config/s390/constraints.md
 create mode 100644 gcc-4.9/gcc/config/s390/htmintrin.h
 create mode 100644 gcc-4.9/gcc/config/s390/htmxlintrin.h
 create mode 100644 gcc-4.9/gcc/config/s390/linux.h
 create mode 100644 gcc-4.9/gcc/config/s390/predicates.md
 create mode 100644 gcc-4.9/gcc/config/s390/s390-modes.def
 create mode 100644 gcc-4.9/gcc/config/s390/s390-opts.h
 create mode 100644 gcc-4.9/gcc/config/s390/s390-protos.h
 create mode 100644 gcc-4.9/gcc/config/s390/s390.c
 create mode 100644 gcc-4.9/gcc/config/s390/s390.h
 create mode 100644 gcc-4.9/gcc/config/s390/s390.md
 create mode 100644 gcc-4.9/gcc/config/s390/s390.opt
 create mode 100644 gcc-4.9/gcc/config/s390/s390intrin.h
 create mode 100644 gcc-4.9/gcc/config/s390/s390x.h
 create mode 100644 gcc-4.9/gcc/config/s390/t-linux64
 create mode 100644 gcc-4.9/gcc/config/s390/tpf.h
 create mode 100644 gcc-4.9/gcc/config/s390/tpf.md
 create mode 100644 gcc-4.9/gcc/config/s390/tpf.opt
 create mode 100644 gcc-4.9/gcc/config/score/constraints.md
 create mode 100644 gcc-4.9/gcc/config/score/elf.h
 create mode 100644 gcc-4.9/gcc/config/score/predicates.md
 create mode 100644 gcc-4.9/gcc/config/score/score-conv.h
 create mode 100644 gcc-4.9/gcc/config/score/score-generic.md
 create mode 100644 gcc-4.9/gcc/config/score/score-modes.def
 create mode 100644 gcc-4.9/gcc/config/score/score-protos.h
 create mode 100644 gcc-4.9/gcc/config/score/score.c
 create mode 100644 gcc-4.9/gcc/config/score/score.h
 create mode 100644 gcc-4.9/gcc/config/score/score.md
 create mode 100644 gcc-4.9/gcc/config/score/score.opt
 create mode 100644 gcc-4.9/gcc/config/sh/constraints.md
 create mode 100644 gcc-4.9/gcc/config/sh/divcost-analysis
 create mode 100644 gcc-4.9/gcc/config/sh/divtab-sh4-300.c
 create mode 100644 gcc-4.9/gcc/config/sh/divtab-sh4.c
 create mode 100644 gcc-4.9/gcc/config/sh/divtab.c
 create mode 100644 gcc-4.9/gcc/config/sh/elf.h
 create mode 100644 gcc-4.9/gcc/config/sh/embed-elf.h
 create mode 100644 gcc-4.9/gcc/config/sh/iterators.md
 create mode 100644 gcc-4.9/gcc/config/sh/linux.h
 create mode 100644 gcc-4.9/gcc/config/sh/little.h
 create mode 100644 gcc-4.9/gcc/config/sh/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/sh/newlib.h
 create mode 100644 gcc-4.9/gcc/config/sh/predicates.md
 create mode 100644 gcc-4.9/gcc/config/sh/rtems.h
 create mode 100644 gcc-4.9/gcc/config/sh/rtemself.h
 create mode 100644 gcc-4.9/gcc/config/sh/sh-c.c
 create mode 100644 gcc-4.9/gcc/config/sh/sh-mem.cc
 create mode 100644 gcc-4.9/gcc/config/sh/sh-modes.def
 create mode 100644 gcc-4.9/gcc/config/sh/sh-protos.h
 create mode 100644 gcc-4.9/gcc/config/sh/sh.c
 create mode 100644 gcc-4.9/gcc/config/sh/sh.h
 create mode 100644 gcc-4.9/gcc/config/sh/sh.md
 create mode 100644 gcc-4.9/gcc/config/sh/sh.opt
 create mode 100644 gcc-4.9/gcc/config/sh/sh1.md
 create mode 100644 gcc-4.9/gcc/config/sh/sh4-300.md
 create mode 100644 gcc-4.9/gcc/config/sh/sh4.md
 create mode 100644 gcc-4.9/gcc/config/sh/sh4a.md
 create mode 100644 gcc-4.9/gcc/config/sh/sh64.h
 create mode 100644 gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
 create mode 100644 gcc-4.9/gcc/config/sh/sh_treg_combine.cc
 create mode 100644 gcc-4.9/gcc/config/sh/shmedia.h
 create mode 100644 gcc-4.9/gcc/config/sh/shmedia.md
 create mode 100644 gcc-4.9/gcc/config/sh/sshmedia.h
 create mode 100644 gcc-4.9/gcc/config/sh/superh.h
 create mode 100644 gcc-4.9/gcc/config/sh/superh.opt
 create mode 100644 gcc-4.9/gcc/config/sh/sync.md
 create mode 100644 gcc-4.9/gcc/config/sh/t-linux
 create mode 100644 gcc-4.9/gcc/config/sh/t-netbsd-sh5-64
 create mode 100644 gcc-4.9/gcc/config/sh/t-rtems
 create mode 100644 gcc-4.9/gcc/config/sh/t-sh
 create mode 100644 gcc-4.9/gcc/config/sh/t-sh64
 create mode 100644 gcc-4.9/gcc/config/sh/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/sh/ushmedia.h
 create mode 100644 gcc-4.9/gcc/config/sh/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/sol2-10.h
 create mode 100644 gcc-4.9/gcc/config/sol2-bi.h
 create mode 100644 gcc-4.9/gcc/config/sol2-c.c
 create mode 100644 gcc-4.9/gcc/config/sol2-cxx.c
 create mode 100644 gcc-4.9/gcc/config/sol2-protos.h
 create mode 100644 gcc-4.9/gcc/config/sol2-stubs.c
 create mode 100644 gcc-4.9/gcc/config/sol2.c
 create mode 100644 gcc-4.9/gcc/config/sol2.h
 create mode 100644 gcc-4.9/gcc/config/sol2.opt
 create mode 100644 gcc-4.9/gcc/config/sparc/biarch64.h
 create mode 100644 gcc-4.9/gcc/config/sparc/constraints.md
 create mode 100644 gcc-4.9/gcc/config/sparc/cypress.md
 create mode 100644 gcc-4.9/gcc/config/sparc/default-64.h
 create mode 100644 gcc-4.9/gcc/config/sparc/driver-sparc.c
 create mode 100644 gcc-4.9/gcc/config/sparc/freebsd.h
 create mode 100644 gcc-4.9/gcc/config/sparc/hypersparc.md
 create mode 100644 gcc-4.9/gcc/config/sparc/leon.md
 create mode 100644 gcc-4.9/gcc/config/sparc/linux.h
 create mode 100644 gcc-4.9/gcc/config/sparc/linux64.h
 create mode 100644 gcc-4.9/gcc/config/sparc/long-double-switch.opt
 create mode 100644 gcc-4.9/gcc/config/sparc/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/sparc/niagara.md
 create mode 100644 gcc-4.9/gcc/config/sparc/niagara2.md
 create mode 100644 gcc-4.9/gcc/config/sparc/niagara4.md
 create mode 100644 gcc-4.9/gcc/config/sparc/openbsd1-64.h
 create mode 100644 gcc-4.9/gcc/config/sparc/openbsd64.h
 create mode 100644 gcc-4.9/gcc/config/sparc/predicates.md
 create mode 100644 gcc-4.9/gcc/config/sparc/rtemself.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sol2.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sp-elf.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sp64-elf.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc-c.c
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc-modes.def
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc-opts.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc-protos.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc.c
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc.h
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc.md
 create mode 100644 gcc-4.9/gcc/config/sparc/sparc.opt
 create mode 100644 gcc-4.9/gcc/config/sparc/sparclet.md
 create mode 100644 gcc-4.9/gcc/config/sparc/supersparc.md
 create mode 100644 gcc-4.9/gcc/config/sparc/sync.md
 create mode 100644 gcc-4.9/gcc/config/sparc/sysv4.h
 create mode 100644 gcc-4.9/gcc/config/sparc/t-elf
 create mode 100644 gcc-4.9/gcc/config/sparc/t-leon
 create mode 100644 gcc-4.9/gcc/config/sparc/t-leon3
 create mode 100644 gcc-4.9/gcc/config/sparc/t-linux
 create mode 100644 gcc-4.9/gcc/config/sparc/t-linux64
 create mode 100644 gcc-4.9/gcc/config/sparc/t-netbsd64
 create mode 100644 gcc-4.9/gcc/config/sparc/t-rtems
 create mode 100644 gcc-4.9/gcc/config/sparc/t-rtems-64
 create mode 100644 gcc-4.9/gcc/config/sparc/t-sol2-64
 create mode 100644 gcc-4.9/gcc/config/sparc/t-sparc
 create mode 100644 gcc-4.9/gcc/config/sparc/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/sparc/tso.h
 create mode 100644 gcc-4.9/gcc/config/sparc/ultra1_2.md
 create mode 100644 gcc-4.9/gcc/config/sparc/ultra3.md
 create mode 100644 gcc-4.9/gcc/config/sparc/visintrin.h
 create mode 100644 gcc-4.9/gcc/config/sparc/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/sparc/x-sparc
 create mode 100644 gcc-4.9/gcc/config/spu/constraints.md
 create mode 100644 gcc-4.9/gcc/config/spu/predicates.md
 create mode 100644 gcc-4.9/gcc/config/spu/spu-builtins.def
 create mode 100644 gcc-4.9/gcc/config/spu/spu-builtins.md
 create mode 100644 gcc-4.9/gcc/config/spu/spu-c.c
 create mode 100644 gcc-4.9/gcc/config/spu/spu-elf.h
 create mode 100644 gcc-4.9/gcc/config/spu/spu-modes.def
 create mode 100644 gcc-4.9/gcc/config/spu/spu-protos.h
 create mode 100644 gcc-4.9/gcc/config/spu/spu.c
 create mode 100644 gcc-4.9/gcc/config/spu/spu.h
 create mode 100644 gcc-4.9/gcc/config/spu/spu.md
 create mode 100644 gcc-4.9/gcc/config/spu/spu.opt
 create mode 100644 gcc-4.9/gcc/config/spu/spu_cache.h
 create mode 100644 gcc-4.9/gcc/config/spu/spu_internals.h
 create mode 100644 gcc-4.9/gcc/config/spu/spu_intrinsics.h
 create mode 100644 gcc-4.9/gcc/config/spu/spu_mfcio.h
 create mode 100644 gcc-4.9/gcc/config/spu/t-spu-elf
 create mode 100644 gcc-4.9/gcc/config/spu/vec_types.h
 create mode 100644 gcc-4.9/gcc/config/spu/vmx2spu.h
 create mode 100644 gcc-4.9/gcc/config/stormy16/constraints.md
 create mode 100644 gcc-4.9/gcc/config/stormy16/predicates.md
 create mode 100644 gcc-4.9/gcc/config/stormy16/stormy-abi
 create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16-protos.h
 create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.c
 create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.h
 create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.md
 create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.opt
 create mode 100644 gcc-4.9/gcc/config/t-darwin
 create mode 100644 gcc-4.9/gcc/config/t-glibc
 create mode 100644 gcc-4.9/gcc/config/t-libunwind
 create mode 100644 gcc-4.9/gcc/config/t-linux
 create mode 100644 gcc-4.9/gcc/config/t-lynx
 create mode 100644 gcc-4.9/gcc/config/t-openbsd
 create mode 100644 gcc-4.9/gcc/config/t-pnt16-warn
 create mode 100644 gcc-4.9/gcc/config/t-rtems
 create mode 100644 gcc-4.9/gcc/config/t-slibgcc
 create mode 100644 gcc-4.9/gcc/config/t-sol2
 create mode 100644 gcc-4.9/gcc/config/t-sysroot-suffix
 create mode 100644 gcc-4.9/gcc/config/t-vxworks
 create mode 100644 gcc-4.9/gcc/config/t-winnt
 create mode 100644 gcc-4.9/gcc/config/tilegx/constraints.md
 create mode 100644 gcc-4.9/gcc/config/tilegx/feedback.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/linux.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/mul-tables.c
 create mode 100644 gcc-4.9/gcc/config/tilegx/predicates.md
 create mode 100644 gcc-4.9/gcc/config/tilegx/sync.md
 create mode 100644 gcc-4.9/gcc/config/tilegx/t-tilegx
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-builtins.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-c.c
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-generic.md
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-modes.def
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-multiply.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-opts.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-protos.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.c
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.h
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.md
 create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.opt
 create mode 100644 gcc-4.9/gcc/config/tilepro/constraints.md
 create mode 100644 gcc-4.9/gcc/config/tilepro/feedback.h
 create mode 100644 gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc
 create mode 100644 gcc-4.9/gcc/config/tilepro/linux.h
 create mode 100644 gcc-4.9/gcc/config/tilepro/mul-tables.c
 create mode 100644 gcc-4.9/gcc/config/tilepro/predicates.md
 create mode 100644 gcc-4.9/gcc/config/tilepro/t-tilepro
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-builtins.h
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-c.c
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-generic.md
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-modes.def
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-multiply.h
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-protos.h
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.c
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.h
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.md
 create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.opt
 create mode 100644 gcc-4.9/gcc/config/tm-dwarf2.h
 create mode 100644 gcc-4.9/gcc/config/usegas.h
 create mode 100644 gcc-4.9/gcc/config/usegld.h
 create mode 100644 gcc-4.9/gcc/config/v850/constraints.md
 create mode 100644 gcc-4.9/gcc/config/v850/predicates.md
 create mode 100644 gcc-4.9/gcc/config/v850/rtems.h
 create mode 100644 gcc-4.9/gcc/config/v850/t-rtems
 create mode 100644 gcc-4.9/gcc/config/v850/t-v850
 create mode 100644 gcc-4.9/gcc/config/v850/v850-c.c
 create mode 100644 gcc-4.9/gcc/config/v850/v850-modes.def
 create mode 100644 gcc-4.9/gcc/config/v850/v850-opts.h
 create mode 100644 gcc-4.9/gcc/config/v850/v850-protos.h
 create mode 100644 gcc-4.9/gcc/config/v850/v850.c
 create mode 100644 gcc-4.9/gcc/config/v850/v850.h
 create mode 100644 gcc-4.9/gcc/config/v850/v850.md
 create mode 100644 gcc-4.9/gcc/config/v850/v850.opt
 create mode 100644 gcc-4.9/gcc/config/vax/builtins.md
 create mode 100644 gcc-4.9/gcc/config/vax/constraints.md
 create mode 100644 gcc-4.9/gcc/config/vax/elf.h
 create mode 100644 gcc-4.9/gcc/config/vax/elf.opt
 create mode 100644 gcc-4.9/gcc/config/vax/linux.h
 create mode 100644 gcc-4.9/gcc/config/vax/netbsd-elf.h
 create mode 100644 gcc-4.9/gcc/config/vax/openbsd.h
 create mode 100644 gcc-4.9/gcc/config/vax/openbsd1.h
 create mode 100644 gcc-4.9/gcc/config/vax/predicates.md
 create mode 100644 gcc-4.9/gcc/config/vax/vax-modes.def
 create mode 100644 gcc-4.9/gcc/config/vax/vax-protos.h
 create mode 100644 gcc-4.9/gcc/config/vax/vax.c
 create mode 100644 gcc-4.9/gcc/config/vax/vax.h
 create mode 100644 gcc-4.9/gcc/config/vax/vax.md
 create mode 100644 gcc-4.9/gcc/config/vax/vax.opt
 create mode 100644 gcc-4.9/gcc/config/vms/make-crtlmap.awk
 create mode 100644 gcc-4.9/gcc/config/vms/t-vms
 create mode 100644 gcc-4.9/gcc/config/vms/t-vmsnative
 create mode 100644 gcc-4.9/gcc/config/vms/vms-ar.c
 create mode 100644 gcc-4.9/gcc/config/vms/vms-c.c
 create mode 100644 gcc-4.9/gcc/config/vms/vms-crtlmap.map
 create mode 100644 gcc-4.9/gcc/config/vms/vms-f.c
 create mode 100644 gcc-4.9/gcc/config/vms/vms-ld.c
 create mode 100644 gcc-4.9/gcc/config/vms/vms-opts.h
 create mode 100644 gcc-4.9/gcc/config/vms/vms-protos.h
 create mode 100644 gcc-4.9/gcc/config/vms/vms-stdint.h
 create mode 100644 gcc-4.9/gcc/config/vms/vms.c
 create mode 100644 gcc-4.9/gcc/config/vms/vms.h
 create mode 100644 gcc-4.9/gcc/config/vms/vms.opt
 create mode 100644 gcc-4.9/gcc/config/vms/x-vms
 create mode 100644 gcc-4.9/gcc/config/vms/xm-vms.h
 create mode 100644 gcc-4.9/gcc/config/vx-common.h
 create mode 100644 gcc-4.9/gcc/config/vxworks-dummy.h
 create mode 100644 gcc-4.9/gcc/config/vxworks.c
 create mode 100644 gcc-4.9/gcc/config/vxworks.h
 create mode 100644 gcc-4.9/gcc/config/vxworks.opt
 create mode 100644 gcc-4.9/gcc/config/vxworksae.h
 create mode 100644 gcc-4.9/gcc/config/winnt-c.c
 create mode 100644 gcc-4.9/gcc/config/x-cflags-O1
 create mode 100644 gcc-4.9/gcc/config/x-darwin
 create mode 100644 gcc-4.9/gcc/config/x-hpux
 create mode 100644 gcc-4.9/gcc/config/x-linux
 create mode 100644 gcc-4.9/gcc/config/x-openbsd
 create mode 100644 gcc-4.9/gcc/config/x-solaris
 create mode 100644 gcc-4.9/gcc/config/xtensa/constraints.md
 create mode 100644 gcc-4.9/gcc/config/xtensa/elf.h
 create mode 100644 gcc-4.9/gcc/config/xtensa/elf.opt
 create mode 100644 gcc-4.9/gcc/config/xtensa/linux.h
 create mode 100644 gcc-4.9/gcc/config/xtensa/predicates.md
 create mode 100644 gcc-4.9/gcc/config/xtensa/t-xtensa
 create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa-protos.h
 create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.c
 create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.h
 create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.md
 create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.opt

(limited to 'gcc-4.9/gcc/config')

diff --git a/gcc-4.9/gcc/config/README b/gcc-4.9/gcc/config/README
new file mode 100644
index 000000000..60328ec5b
--- /dev/null
+++ b/gcc-4.9/gcc/config/README
@@ -0,0 +1,5 @@
+This directory contains machine-specific files for the GNU C compiler.
+It has a subdirectory for each basic CPU type.
+The only files in this directory itself
+are some .h files that pertain to particular operating systems
+and are used for more than one CPU type.
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-arches.def b/gcc-4.9/gcc/config/aarch64/aarch64-arches.def
new file mode 100644
index 000000000..4b796d8c9
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-arches.def
@@ -0,0 +1,29 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before using #include to read this file, define a macro:
+
+      AARCH64_ARCH(NAME, CORE, ARCH, FLAGS)
+
+   The NAME is the name of the architecture, represented as a string
+   constant.  The CORE is the identifier for a core representative of
+   this architecture.  ARCH is the architecture revision.  FLAGS are
+   the flags implied by the architecture.  */
+
+AARCH64_ARCH("armv8-a",	      generic,	     8,  AARCH64_FL_FOR_ARCH8)
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
new file mode 100644
index 000000000..55cfe0ab2
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
@@ -0,0 +1,1253 @@
+/* Builtins' description for AArch64 SIMD architecture.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "calls.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "recog.h"
+#include "langhooks.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+
+enum aarch64_simd_builtin_type_mode
+{
+  T_V8QI,
+  T_V4HI,
+  T_V2SI,
+  T_V2SF,
+  T_DI,
+  T_DF,
+  T_V16QI,
+  T_V8HI,
+  T_V4SI,
+  T_V4SF,
+  T_V2DI,
+  T_V2DF,
+  T_TI,
+  T_EI,
+  T_OI,
+  T_XI,
+  T_SI,
+  T_SF,
+  T_HI,
+  T_QI,
+  T_MAX
+};
+
+#define v8qi_UP  T_V8QI
+#define v4hi_UP  T_V4HI
+#define v2si_UP  T_V2SI
+#define v2sf_UP  T_V2SF
+#define di_UP    T_DI
+#define df_UP    T_DF
+#define v16qi_UP T_V16QI
+#define v8hi_UP  T_V8HI
+#define v4si_UP  T_V4SI
+#define v4sf_UP  T_V4SF
+#define v2di_UP  T_V2DI
+#define v2df_UP  T_V2DF
+#define ti_UP	 T_TI
+#define ei_UP	 T_EI
+#define oi_UP	 T_OI
+#define xi_UP	 T_XI
+#define si_UP    T_SI
+#define sf_UP    T_SF
+#define hi_UP    T_HI
+#define qi_UP    T_QI
+
+#define UP(X) X##_UP
+
+#define SIMD_MAX_BUILTIN_ARGS 5
+
+enum aarch64_type_qualifiers
+{
+  /* T foo.  */
+  qualifier_none = 0x0,
+  /* unsigned T foo.  */
+  qualifier_unsigned = 0x1, /* 1 << 0  */
+  /* const T foo.  */
+  qualifier_const = 0x2, /* 1 << 1  */
+  /* T *foo.  */
+  qualifier_pointer = 0x4, /* 1 << 2  */
+  /* const T *foo.  */
+  qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer  */
+  /* Used when expanding arguments if an operand could
+     be an immediate.  */
+  qualifier_immediate = 0x8, /* 1 << 3  */
+  qualifier_maybe_immediate = 0x10, /* 1 << 4  */
+  /* void foo (...).  */
+  qualifier_void = 0x20, /* 1 << 5  */
+  /* Some patterns may have internal operands, this qualifier is an
+     instruction to the initialisation code to skip this operand.  */
+  qualifier_internal = 0x40, /* 1 << 6  */
+  /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
+     rather than using the type of the operand.  */
+  qualifier_map_mode = 0x80, /* 1 << 7  */
+  /* qualifier_pointer | qualifier_map_mode  */
+  qualifier_pointer_map_mode = 0x84,
+  /* qualifier_const_pointer | qualifier_map_mode  */
+  qualifier_const_pointer_map_mode = 0x86,
+  /* Polynomial types.  */
+  qualifier_poly = 0x100
+};
+
+typedef struct
+{
+  const char *name;
+  enum aarch64_simd_builtin_type_mode mode;
+  const enum insn_code code;
+  unsigned int fcode;
+  enum aarch64_type_qualifiers *qualifiers;
+} aarch64_simd_builtin_datum;
+
+static enum aarch64_type_qualifiers
+aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none };
+#define TYPES_UNOP (aarch64_types_unop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned };
+#define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
+#define TYPES_CREATE (aarch64_types_unop_qualifiers)
+#define TYPES_REINTERP (aarch64_types_unop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
+#define TYPES_BINOP (aarch64_types_binop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
+#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_poly, qualifier_poly, qualifier_poly };
+#define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
+#define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned,
+      qualifier_unsigned, qualifier_unsigned };
+#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_none };
+#define TYPES_QUADOP (aarch64_types_quadop_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_immediate };
+#define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
+#define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
+#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
+#define TYPES_SETLANE (aarch64_types_setlane_qualifiers)
+#define TYPES_SHIFTINSERT (aarch64_types_setlane_qualifiers)
+#define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none };
+#define TYPES_COMBINE (aarch64_types_combine_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_const_pointer_map_mode };
+#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
+#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_poly, qualifier_unsigned,
+      qualifier_poly, qualifier_poly };
+#define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_unsigned,
+      qualifier_none, qualifier_none };
+#define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned,
+      qualifier_unsigned, qualifier_unsigned };
+#define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
+
+/* The first argument (return type) of a store should be void type,
+   which we represent with qualifier_void.  Their first operand will be
+   a DImode pointer to the location to store to, so we must use
+   qualifier_map_mode | qualifier_pointer to build a pointer to the
+   element type of the vector.  */
+static enum aarch64_type_qualifiers
+aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
+#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
+#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
+
+#define CF0(N, X) CODE_FOR_aarch64_##N##X
+#define CF1(N, X) CODE_FOR_##N##X##1
+#define CF2(N, X) CODE_FOR_##N##X##2
+#define CF3(N, X) CODE_FOR_##N##X##3
+#define CF4(N, X) CODE_FOR_##N##X##4
+#define CF10(N, X) CODE_FOR_##N##X
+
+#define VAR1(T, N, MAP, A) \
+  {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T},
+#define VAR2(T, N, MAP, A, B) \
+  VAR1 (T, N, MAP, A) \
+  VAR1 (T, N, MAP, B)
+#define VAR3(T, N, MAP, A, B, C) \
+  VAR2 (T, N, MAP, A, B) \
+  VAR1 (T, N, MAP, C)
+#define VAR4(T, N, MAP, A, B, C, D) \
+  VAR3 (T, N, MAP, A, B, C) \
+  VAR1 (T, N, MAP, D)
+#define VAR5(T, N, MAP, A, B, C, D, E) \
+  VAR4 (T, N, MAP, A, B, C, D) \
+  VAR1 (T, N, MAP, E)
+#define VAR6(T, N, MAP, A, B, C, D, E, F) \
+  VAR5 (T, N, MAP, A, B, C, D, E) \
+  VAR1 (T, N, MAP, F)
+#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
+  VAR6 (T, N, MAP, A, B, C, D, E, F) \
+  VAR1 (T, N, MAP, G)
+#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
+  VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
+  VAR1 (T, N, MAP, H)
+#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
+  VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
+  VAR1 (T, N, MAP, I)
+#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
+  VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
+  VAR1 (T, N, MAP, J)
+#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
+  VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
+  VAR1 (T, N, MAP, K)
+#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
+  VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
+  VAR1 (T, N, MAP, L)
+
+/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
+   modes as is given for each define_mode_iterator in
+   config/aarch64/iterators.md.  */
+
+#define BUILTIN_DX(T, N, MAP) \
+  VAR2 (T, N, MAP, di, df)
+#define BUILTIN_GPF(T, N, MAP) \
+  VAR2 (T, N, MAP, sf, df)
+#define BUILTIN_SDQ_I(T, N, MAP) \
+  VAR4 (T, N, MAP, qi, hi, si, di)
+#define BUILTIN_SD_HSI(T, N, MAP) \
+  VAR2 (T, N, MAP, hi, si)
+#define BUILTIN_V2F(T, N, MAP) \
+  VAR2 (T, N, MAP, v2sf, v2df)
+#define BUILTIN_VALL(T, N, MAP) \
+  VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
+	 v4si, v2di, v2sf, v4sf, v2df)
+#define BUILTIN_VALLDI(T, N, MAP) \
+  VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
+	 v4si, v2di, v2sf, v4sf, v2df, di)
+#define BUILTIN_VALLDIF(T, N, MAP) \
+  VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
+	 v4si, v2di, v2sf, v4sf, v2df, di, df)
+#define BUILTIN_VB(T, N, MAP) \
+  VAR2 (T, N, MAP, v8qi, v16qi)
+#define BUILTIN_VD(T, N, MAP) \
+  VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
+#define BUILTIN_VDC(T, N, MAP) \
+  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
+#define BUILTIN_VDIC(T, N, MAP) \
+  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
+#define BUILTIN_VDN(T, N, MAP) \
+  VAR3 (T, N, MAP, v4hi, v2si, di)
+#define BUILTIN_VDQ(T, N, MAP) \
+  VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
+#define BUILTIN_VDQF(T, N, MAP) \
+  VAR3 (T, N, MAP, v2sf, v4sf, v2df)
+#define BUILTIN_VDQH(T, N, MAP) \
+  VAR2 (T, N, MAP, v4hi, v8hi)
+#define BUILTIN_VDQHS(T, N, MAP) \
+  VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
+#define BUILTIN_VDQIF(T, N, MAP) \
+  VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
+#define BUILTIN_VDQM(T, N, MAP) \
+  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
+#define BUILTIN_VDQV(T, N, MAP) \
+  VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
+#define BUILTIN_VDQQH(T, N, MAP) \
+  VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi)
+#define BUILTIN_VDQ_BHSI(T, N, MAP) \
+  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
+#define BUILTIN_VDQ_I(T, N, MAP) \
+  VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
+#define BUILTIN_VDW(T, N, MAP) \
+  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
+#define BUILTIN_VD_BHSI(T, N, MAP) \
+  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
+#define BUILTIN_VD_HSI(T, N, MAP) \
+  VAR2 (T, N, MAP, v4hi, v2si)
+#define BUILTIN_VD_RE(T, N, MAP) \
+  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
+#define BUILTIN_VQ(T, N, MAP) \
+  VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
+#define BUILTIN_VQN(T, N, MAP) \
+  VAR3 (T, N, MAP, v8hi, v4si, v2di)
+#define BUILTIN_VQW(T, N, MAP) \
+  VAR3 (T, N, MAP, v16qi, v8hi, v4si)
+#define BUILTIN_VQ_HSI(T, N, MAP) \
+  VAR2 (T, N, MAP, v8hi, v4si)
+#define BUILTIN_VQ_S(T, N, MAP) \
+  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
+#define BUILTIN_VSDQ_HSI(T, N, MAP) \
+  VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
+#define BUILTIN_VSDQ_I(T, N, MAP) \
+  VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
+#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
+  VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
+#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
+  VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
+#define BUILTIN_VSD_HSI(T, N, MAP) \
+  VAR4 (T, N, MAP, v4hi, v2si, hi, si)
+#define BUILTIN_VSQN_HSDI(T, N, MAP) \
+  VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
+#define BUILTIN_VSTRUCT(T, N, MAP) \
+  VAR3 (T, N, MAP, oi, ci, xi)
+
+static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
+#include "aarch64-simd-builtins.def"
+};
+
+#undef VAR1
+#define VAR1(T, N, MAP, A) \
+  AARCH64_SIMD_BUILTIN_##T##_##N##A,
+
+enum aarch64_builtins
+{
+  AARCH64_BUILTIN_MIN,
+  AARCH64_SIMD_BUILTIN_BASE,
+#include "aarch64-simd-builtins.def"
+  AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
+			      + ARRAY_SIZE (aarch64_simd_builtin_data),
+  AARCH64_BUILTIN_MAX
+};
+
+static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
+
+#define NUM_DREG_TYPES 6
+#define NUM_QREG_TYPES 6
+
+/* Return a tree for a signed or unsigned argument of either
+   the mode specified by MODE, or the inner mode of MODE.  */
+tree
+aarch64_build_scalar_type (enum machine_mode mode,
+			   bool unsigned_p,
+			   bool poly_p)
+{
+#undef INT_TYPES
+#define INT_TYPES \
+  AARCH64_TYPE_BUILDER (QI) \
+  AARCH64_TYPE_BUILDER (HI) \
+  AARCH64_TYPE_BUILDER (SI) \
+  AARCH64_TYPE_BUILDER (DI) \
+  AARCH64_TYPE_BUILDER (EI) \
+  AARCH64_TYPE_BUILDER (OI) \
+  AARCH64_TYPE_BUILDER (CI) \
+  AARCH64_TYPE_BUILDER (XI) \
+  AARCH64_TYPE_BUILDER (TI) \
+
+/* Statically declare all the possible types we might need.  */
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  static tree X##_aarch64_type_node_p = NULL; \
+  static tree X##_aarch64_type_node_s = NULL; \
+  static tree X##_aarch64_type_node_u = NULL;
+
+  INT_TYPES
+
+  static tree float_aarch64_type_node = NULL;
+  static tree double_aarch64_type_node = NULL;
+
+  gcc_assert (!VECTOR_MODE_P (mode));
+
+/* If we've already initialised this type, don't initialise it again,
+   otherwise ask for a new type of the correct size.  */
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  case X##mode: \
+    if (unsigned_p) \
+      return (X##_aarch64_type_node_u \
+	      ? X##_aarch64_type_node_u \
+	      : X##_aarch64_type_node_u \
+		  = make_unsigned_type (GET_MODE_PRECISION (mode))); \
+    else if (poly_p) \
+       return (X##_aarch64_type_node_p \
+	      ? X##_aarch64_type_node_p \
+	      : X##_aarch64_type_node_p \
+		  = make_unsigned_type (GET_MODE_PRECISION (mode))); \
+    else \
+       return (X##_aarch64_type_node_s \
+	      ? X##_aarch64_type_node_s \
+	      : X##_aarch64_type_node_s \
+		  = make_signed_type (GET_MODE_PRECISION (mode))); \
+    break;
+
+  switch (mode)
+    {
+      INT_TYPES
+      case SFmode:
+	if (!float_aarch64_type_node)
+	  {
+	    float_aarch64_type_node = make_node (REAL_TYPE);
+	    TYPE_PRECISION (float_aarch64_type_node) = FLOAT_TYPE_SIZE;
+	    layout_type (float_aarch64_type_node);
+	  }
+	return float_aarch64_type_node;
+	break;
+      case DFmode:
+	if (!double_aarch64_type_node)
+	  {
+	    double_aarch64_type_node = make_node (REAL_TYPE);
+	    TYPE_PRECISION (double_aarch64_type_node) = DOUBLE_TYPE_SIZE;
+	    layout_type (double_aarch64_type_node);
+	  }
+	return double_aarch64_type_node;
+	break;
+      default:
+	gcc_unreachable ();
+    }
+}
+
+tree
+aarch64_build_vector_type (enum machine_mode mode,
+			   bool unsigned_p,
+			   bool poly_p)
+{
+  tree eltype;
+
+#define VECTOR_TYPES \
+  AARCH64_TYPE_BUILDER (V16QI) \
+  AARCH64_TYPE_BUILDER (V8HI) \
+  AARCH64_TYPE_BUILDER (V4SI) \
+  AARCH64_TYPE_BUILDER (V2DI) \
+  AARCH64_TYPE_BUILDER (V8QI) \
+  AARCH64_TYPE_BUILDER (V4HI) \
+  AARCH64_TYPE_BUILDER (V2SI) \
+  \
+  AARCH64_TYPE_BUILDER (V4SF) \
+  AARCH64_TYPE_BUILDER (V2DF) \
+  AARCH64_TYPE_BUILDER (V2SF) \
+/* Declare our "cache" of values.  */
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  static tree X##_aarch64_type_node_s = NULL; \
+  static tree X##_aarch64_type_node_u = NULL; \
+  static tree X##_aarch64_type_node_p = NULL;
+
+  VECTOR_TYPES
+
+  gcc_assert (VECTOR_MODE_P (mode));
+
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  case X##mode: \
+    if (unsigned_p) \
+      return X##_aarch64_type_node_u \
+	     ? X##_aarch64_type_node_u \
+	     : X##_aarch64_type_node_u \
+		= build_vector_type_for_mode (aarch64_build_scalar_type \
+						(GET_MODE_INNER (mode), \
+						 unsigned_p, poly_p), mode); \
+    else if (poly_p) \
+       return X##_aarch64_type_node_p \
+	      ? X##_aarch64_type_node_p \
+	      : X##_aarch64_type_node_p \
+		= build_vector_type_for_mode (aarch64_build_scalar_type \
+						(GET_MODE_INNER (mode), \
+						 unsigned_p, poly_p), mode); \
+    else \
+       return X##_aarch64_type_node_s \
+	      ? X##_aarch64_type_node_s \
+	      : X##_aarch64_type_node_s \
+		= build_vector_type_for_mode (aarch64_build_scalar_type \
+						(GET_MODE_INNER (mode), \
+						 unsigned_p, poly_p), mode); \
+    break;
+
+  switch (mode)
+    {
+      default:
+	eltype = aarch64_build_scalar_type (GET_MODE_INNER (mode),
+					    unsigned_p, poly_p);
+	return build_vector_type_for_mode (eltype, mode);
+	break;
+      VECTOR_TYPES
+   }
+}
+
+tree
+aarch64_build_type (enum machine_mode mode, bool unsigned_p, bool poly_p)
+{
+  if (VECTOR_MODE_P (mode))
+    return aarch64_build_vector_type (mode, unsigned_p, poly_p);
+  else
+    return aarch64_build_scalar_type (mode, unsigned_p, poly_p);
+}
+
+tree
+aarch64_build_signed_type (enum machine_mode mode)
+{
+  return aarch64_build_type (mode, false, false);
+}
+
+tree
+aarch64_build_unsigned_type (enum machine_mode mode)
+{
+  return aarch64_build_type (mode, true, false);
+}
+
+tree
+aarch64_build_poly_type (enum machine_mode mode)
+{
+  return aarch64_build_type (mode, false, true);
+}
+
+static void
+aarch64_init_simd_builtins (void)
+{
+  unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1;
+
+  /* Signed scalar type nodes.  */
+  tree aarch64_simd_intQI_type_node = aarch64_build_signed_type (QImode);
+  tree aarch64_simd_intHI_type_node = aarch64_build_signed_type (HImode);
+  tree aarch64_simd_intSI_type_node = aarch64_build_signed_type (SImode);
+  tree aarch64_simd_intDI_type_node = aarch64_build_signed_type (DImode);
+  tree aarch64_simd_intTI_type_node = aarch64_build_signed_type (TImode);
+  tree aarch64_simd_intEI_type_node = aarch64_build_signed_type (EImode);
+  tree aarch64_simd_intOI_type_node = aarch64_build_signed_type (OImode);
+  tree aarch64_simd_intCI_type_node = aarch64_build_signed_type (CImode);
+  tree aarch64_simd_intXI_type_node = aarch64_build_signed_type (XImode);
+
+  /* Unsigned scalar type nodes.  */
+  tree aarch64_simd_intUQI_type_node = aarch64_build_unsigned_type (QImode);
+  tree aarch64_simd_intUHI_type_node = aarch64_build_unsigned_type (HImode);
+  tree aarch64_simd_intUSI_type_node = aarch64_build_unsigned_type (SImode);
+  tree aarch64_simd_intUDI_type_node = aarch64_build_unsigned_type (DImode);
+
+  /* Poly scalar type nodes.  */
+  tree aarch64_simd_polyQI_type_node = aarch64_build_poly_type (QImode);
+  tree aarch64_simd_polyHI_type_node = aarch64_build_poly_type (HImode);
+  tree aarch64_simd_polyDI_type_node = aarch64_build_poly_type (DImode);
+  tree aarch64_simd_polyTI_type_node = aarch64_build_poly_type (TImode);
+
+  /* Float type nodes.  */
+  tree aarch64_simd_float_type_node = aarch64_build_signed_type (SFmode);
+  tree aarch64_simd_double_type_node = aarch64_build_signed_type (DFmode);
+
+  /* Define typedefs which exactly correspond to the modes we are basing vector
+     types on.  If you change these names you'll need to change
+     the table used by aarch64_mangle_type too.  */
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intQI_type_node,
+					     "__builtin_aarch64_simd_qi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intHI_type_node,
+					     "__builtin_aarch64_simd_hi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intSI_type_node,
+					     "__builtin_aarch64_simd_si");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_float_type_node,
+					     "__builtin_aarch64_simd_sf");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intDI_type_node,
+					     "__builtin_aarch64_simd_di");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_double_type_node,
+					     "__builtin_aarch64_simd_df");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyQI_type_node,
+					     "__builtin_aarch64_simd_poly8");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyHI_type_node,
+					     "__builtin_aarch64_simd_poly16");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyDI_type_node,
+					     "__builtin_aarch64_simd_poly64");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyTI_type_node,
+					     "__builtin_aarch64_simd_poly128");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intTI_type_node,
+					     "__builtin_aarch64_simd_ti");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intEI_type_node,
+					     "__builtin_aarch64_simd_ei");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intOI_type_node,
+					     "__builtin_aarch64_simd_oi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intCI_type_node,
+					     "__builtin_aarch64_simd_ci");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intXI_type_node,
+					     "__builtin_aarch64_simd_xi");
+
+  /* Unsigned integer types for various mode sizes.  */
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUQI_type_node,
+					     "__builtin_aarch64_simd_uqi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUHI_type_node,
+					     "__builtin_aarch64_simd_uhi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUSI_type_node,
+					     "__builtin_aarch64_simd_usi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUDI_type_node,
+					     "__builtin_aarch64_simd_udi");
+
+  for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
+    {
+      bool print_type_signature_p = false;
+      char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 };
+      aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
+      const char *const modenames[] =
+	{
+	  "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
+	  "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
+	  "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
+	};
+      const enum machine_mode modes[] =
+	{
+	  V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
+	  V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
+	  V2DFmode, TImode, EImode, OImode, XImode, SImode,
+	  SFmode, HImode, QImode
+	};
+      char namebuf[60];
+      tree ftype = NULL;
+      tree fndecl = NULL;
+
+      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
+
+      d->fcode = fcode;
+
+      /* We must track two variables here.  op_num is
+	 the operand number as in the RTL pattern.  This is
+	 required to access the mode (e.g. V4SF mode) of the
+	 argument, from which the base type can be derived.
+	 arg_num is an index in to the qualifiers data, which
+	 gives qualifiers to the type (e.g. const unsigned).
+	 The reason these two variables may differ by one is the
+	 void return type.  While all return types take the 0th entry
+	 in the qualifiers array, there is no operand for them in the
+	 RTL pattern.  */
+      int op_num = insn_data[d->code].n_operands - 1;
+      int arg_num = d->qualifiers[0] & qualifier_void
+		      ? op_num + 1
+		      : op_num;
+      tree return_type = void_type_node, args = void_list_node;
+      tree eltype;
+
+      /* Build a function type directly from the insn_data for this
+	 builtin.  The build_function_type () function takes care of
+	 removing duplicates for us.  */
+      for (; op_num >= 0; arg_num--, op_num--)
+	{
+	  enum machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
+	  enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
+
+	  if (qualifiers & qualifier_unsigned)
+	    {
+	      type_signature[arg_num] = 'u';
+	      print_type_signature_p = true;
+	    }
+	  else if (qualifiers & qualifier_poly)
+	    {
+	      type_signature[arg_num] = 'p';
+	      print_type_signature_p = true;
+	    }
+	  else
+	    type_signature[arg_num] = 's';
+
+	  /* Skip an internal operand for vget_{low, high}.  */
+	  if (qualifiers & qualifier_internal)
+	    continue;
+
+	  /* Some builtins have different user-facing types
+	     for certain arguments, encoded in d->mode.  */
+	  if (qualifiers & qualifier_map_mode)
+	      op_mode = modes[d->mode];
+
+	  /* For pointers, we want a pointer to the basic type
+	     of the vector.  */
+	  if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
+	    op_mode = GET_MODE_INNER (op_mode);
+
+	  eltype = aarch64_build_type (op_mode,
+				       qualifiers & qualifier_unsigned,
+				       qualifiers & qualifier_poly);
+
+	  /* Add qualifiers.  */
+	  if (qualifiers & qualifier_const)
+	    eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
+
+	  if (qualifiers & qualifier_pointer)
+	      eltype = build_pointer_type (eltype);
+
+	  /* If we have reached arg_num == 0, we are at a non-void
+	     return type.  Otherwise, we are still processing
+	     arguments.  */
+	  if (arg_num == 0)
+	    return_type = eltype;
+	  else
+	    args = tree_cons (NULL_TREE, eltype, args);
+	}
+
+      ftype = build_function_type (return_type, args);
+
+      gcc_assert (ftype != NULL);
+
+      if (print_type_signature_p)
+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s",
+		  d->name, modenames[d->mode], type_signature);
+      else
+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s",
+		  d->name, modenames[d->mode]);
+
+      fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
+				     NULL, NULL_TREE);
+      aarch64_builtin_decls[fcode] = fndecl;
+    }
+}
+
+void
+aarch64_init_builtins (void)
+{
+  if (TARGET_SIMD)
+    aarch64_init_simd_builtins ();
+}
+
+tree
+aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= AARCH64_BUILTIN_MAX)
+    return error_mark_node;
+
+  return aarch64_builtin_decls[code];
+}
+
+typedef enum
+{
+  SIMD_ARG_COPY_TO_REG,
+  SIMD_ARG_CONSTANT,
+  SIMD_ARG_STOP
+} builtin_simd_arg;
+
+static rtx
+aarch64_simd_expand_args (rtx target, int icode, int have_retval,
+			  tree exp, ...)
+{
+  va_list ap;
+  rtx pat;
+  tree arg[SIMD_MAX_BUILTIN_ARGS];
+  rtx op[SIMD_MAX_BUILTIN_ARGS];
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode[SIMD_MAX_BUILTIN_ARGS];
+  int argc = 0;
+
+  if (have_retval
+      && (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
+    target = gen_reg_rtx (tmode);
+
+  va_start (ap, exp);
+
+  for (;;)
+    {
+      builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int);
+
+      if (thisarg == SIMD_ARG_STOP)
+	break;
+      else
+	{
+	  arg[argc] = CALL_EXPR_ARG (exp, argc);
+	  op[argc] = expand_normal (arg[argc]);
+	  mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
+
+	  switch (thisarg)
+	    {
+	    case SIMD_ARG_COPY_TO_REG:
+	      if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
+		op[argc] = convert_memory_address (Pmode, op[argc]);
+	      /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
+	      if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+		  (op[argc], mode[argc]))
+		op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
+	      break;
+
+	    case SIMD_ARG_CONSTANT:
+	      if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+		  (op[argc], mode[argc]))
+		error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, "
+		       "expected %<const int%>", argc + 1);
+	      break;
+
+	    case SIMD_ARG_STOP:
+	      gcc_unreachable ();
+	    }
+
+	  argc++;
+	}
+    }
+
+  va_end (ap);
+
+  if (have_retval)
+    switch (argc)
+      {
+      case 1:
+	pat = GEN_FCN (icode) (target, op[0]);
+	break;
+
+      case 2:
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+	break;
+
+      case 3:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+	break;
+
+      case 4:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+	break;
+
+      case 5:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+  else
+    switch (argc)
+      {
+      case 1:
+	pat = GEN_FCN (icode) (op[0]);
+	break;
+
+      case 2:
+	pat = GEN_FCN (icode) (op[0], op[1]);
+	break;
+
+      case 3:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+	break;
+
+      case 4:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+	break;
+
+      case 5:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  if (!pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Expand an AArch64 AdvSIMD builtin(intrinsic).  */
+rtx
+aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
+{
+  aarch64_simd_builtin_datum *d =
+		&aarch64_simd_builtin_data[fcode - (AARCH64_SIMD_BUILTIN_BASE + 1)];
+  enum insn_code icode = d->code;
+  builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS];
+  int num_args = insn_data[d->code].n_operands;
+  int is_void = 0;
+  int k;
+
+  is_void = !!(d->qualifiers[0] & qualifier_void);
+
+  num_args += is_void;
+
+  for (k = 1; k < num_args; k++)
+    {
+      /* We have four arrays of data, each indexed in a different fashion.
+	 qualifiers - element 0 always describes the function return type.
+	 operands - element 0 is either the operand for return value (if
+	   the function has a non-void return type) or the operand for the
+	   first argument.
+	 expr_args - element 0 always holds the first argument.
+	 args - element 0 is always used for the return type.  */
+      int qualifiers_k = k;
+      int operands_k = k - is_void;
+      int expr_args_k = k - 1;
+
+      if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+	args[k] = SIMD_ARG_CONSTANT;
+      else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
+	{
+	  rtx arg
+	    = expand_normal (CALL_EXPR_ARG (exp,
+					    (expr_args_k)));
+	  /* Handle constants only if the predicate allows it.  */
+	  bool op_const_int_p =
+	    (CONST_INT_P (arg)
+	     && (*insn_data[icode].operand[operands_k].predicate)
+		(arg, insn_data[icode].operand[operands_k].mode));
+	  args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
+	}
+      else
+	args[k] = SIMD_ARG_COPY_TO_REG;
+
+    }
+  args[k] = SIMD_ARG_STOP;
+
+  /* The interface to aarch64_simd_expand_args expects a 0 if
+     the function is void, and a 1 if it is not.  */
+  return aarch64_simd_expand_args
+	  (target, icode, !is_void, exp,
+	   args[1],
+	   args[2],
+	   args[3],
+	   args[4],
+	   SIMD_ARG_STOP);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient.  */
+rtx
+aarch64_expand_builtin (tree exp,
+		     rtx target,
+		     rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
+    return aarch64_simd_expand_builtin (fcode, exp, target);
+
+  return NULL_RTX;
+}
+
+tree
+aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
+#define AARCH64_FIND_FRINT_VARIANT(N) \
+  (AARCH64_CHECK_BUILTIN_MODE (2, D) \
+    ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
+    : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
+	? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
+	: (AARCH64_CHECK_BUILTIN_MODE (2, S) \
+	   ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
+	   : NULL_TREE)))
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##Fmode && out_n == C \
+   && in_mode == N##Fmode && in_n == C)
+	case BUILT_IN_FLOOR:
+	case BUILT_IN_FLOORF:
+	  return AARCH64_FIND_FRINT_VARIANT (floor);
+	case BUILT_IN_CEIL:
+	case BUILT_IN_CEILF:
+	  return AARCH64_FIND_FRINT_VARIANT (ceil);
+	case BUILT_IN_TRUNC:
+	case BUILT_IN_TRUNCF:
+	  return AARCH64_FIND_FRINT_VARIANT (btrunc);
+	case BUILT_IN_ROUND:
+	case BUILT_IN_ROUNDF:
+	  return AARCH64_FIND_FRINT_VARIANT (round);
+	case BUILT_IN_NEARBYINT:
+	case BUILT_IN_NEARBYINTF:
+	  return AARCH64_FIND_FRINT_VARIANT (nearbyint);
+	case BUILT_IN_SQRT:
+	case BUILT_IN_SQRTF:
+	  return AARCH64_FIND_FRINT_VARIANT (sqrt);
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == SImode && out_n == C \
+   && in_mode == N##Imode && in_n == C)
+        case BUILT_IN_CLZ:
+          {
+            if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+              return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
+            return NULL_TREE;
+          }
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##Imode && out_n == C \
+   && in_mode == N##Fmode && in_n == C)
+	case BUILT_IN_LFLOOR:
+	case BUILT_IN_LFLOORF:
+	case BUILT_IN_LLFLOOR:
+	case BUILT_IN_IFLOORF:
+	  {
+	    enum aarch64_builtins builtin;
+	    if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	      builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
+	    else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	      builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
+	    else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	      builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
+	    else
+	      return NULL_TREE;
+
+	    return aarch64_builtin_decls[builtin];
+	  }
+	case BUILT_IN_LCEIL:
+	case BUILT_IN_LCEILF:
+	case BUILT_IN_LLCEIL:
+	case BUILT_IN_ICEILF:
+	  {
+	    enum aarch64_builtins builtin;
+	    if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	      builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
+	    else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	      builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
+	    else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	      builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
+	    else
+	      return NULL_TREE;
+
+	    return aarch64_builtin_decls[builtin];
+	  }
+	case BUILT_IN_LROUND:
+	case BUILT_IN_IROUNDF:
+	  {
+	    enum aarch64_builtins builtin;
+	    if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	      builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
+	    else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	      builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
+	    else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	      builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
+	    else
+	      return NULL_TREE;
+
+	    return aarch64_builtin_decls[builtin];
+	  }
+
+	default:
+	  return NULL_TREE;
+      }
+    }
+
+  return NULL_TREE;
+}
+
+#undef VAR1
+#define VAR1(T, N, MAP, A) \
+  case AARCH64_SIMD_BUILTIN_##T##_##N##A:
+
+tree
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+		      bool ignore ATTRIBUTE_UNUSED)
+{
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree type = TREE_TYPE (TREE_TYPE (fndecl));
+
+  switch (fcode)
+    {
+      BUILTIN_VALLDI (UNOP, abs, 2)
+	return fold_build1 (ABS_EXPR, type, args[0]);
+	break;
+      BUILTIN_VALLDI (BINOP, cmge, 0)
+	return fold_build2 (GE_EXPR, type, args[0], args[1]);
+	break;
+      BUILTIN_VALLDI (BINOP, cmgt, 0)
+	return fold_build2 (GT_EXPR, type, args[0], args[1]);
+	break;
+      BUILTIN_VALLDI (BINOP, cmeq, 0)
+	return fold_build2 (EQ_EXPR, type, args[0], args[1]);
+	break;
+      BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
+	{
+	  tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
+	  tree vec_zero_node = build_zero_cst (type);
+	  return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
+	  break;
+	}
+      VAR1 (UNOP, floatv2si, 2, v2sf)
+      VAR1 (UNOP, floatv4si, 2, v4sf)
+      VAR1 (UNOP, floatv2di, 2, v2df)
+	return fold_build1 (FLOAT_EXPR, type, args[0]);
+      default:
+	break;
+    }
+
+  return NULL_TREE;
+}
+
+bool
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  bool changed = false;
+  gimple stmt = gsi_stmt (*gsi);
+  tree call = gimple_call_fn (stmt);
+  tree fndecl;
+  gimple new_stmt = NULL;
+  if (call)
+    {
+      fndecl = gimple_call_fndecl (stmt);
+      if (fndecl)
+	{
+	  int fcode = DECL_FUNCTION_CODE (fndecl);
+	  int nargs = gimple_call_num_args (stmt);
+	  tree *args = (nargs > 0
+			? gimple_call_arg_ptr (stmt, 0)
+			: &error_mark_node);
+
+	  switch (fcode)
+	    {
+	      BUILTIN_VALL (UNOP, reduc_splus_, 10)
+		new_stmt = gimple_build_assign_with_ops (
+						REDUC_PLUS_EXPR,
+						gimple_call_lhs (stmt),
+						args[0],
+						NULL_TREE);
+		break;
+	      BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
+		new_stmt = gimple_build_assign_with_ops (
+						REDUC_MAX_EXPR,
+						gimple_call_lhs (stmt),
+						args[0],
+						NULL_TREE);
+		break;
+	      BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
+		new_stmt = gimple_build_assign_with_ops (
+						REDUC_MIN_EXPR,
+						gimple_call_lhs (stmt),
+						args[0],
+						NULL_TREE);
+		break;
+
+	    default:
+	      break;
+	    }
+	}
+    }
+
+  if (new_stmt)
+    {
+      gsi_replace (gsi, new_stmt, true);
+      changed = true;
+    }
+
+  return changed;
+}
+
+#undef AARCH64_CHECK_BUILTIN_MODE
+#undef AARCH64_FIND_FRINT_VARIANT
+#undef BUILTIN_DX
+#undef BUILTIN_SDQ_I
+#undef BUILTIN_SD_HSI
+#undef BUILTIN_V2F
+#undef BUILTIN_VALL
+#undef BUILTIN_VB
+#undef BUILTIN_VD
+#undef BUILTIN_VDC
+#undef BUILTIN_VDIC
+#undef BUILTIN_VDN
+#undef BUILTIN_VDQ
+#undef BUILTIN_VDQF
+#undef BUILTIN_VDQH
+#undef BUILTIN_VDQHS
+#undef BUILTIN_VDQIF
+#undef BUILTIN_VDQM
+#undef BUILTIN_VDQV
+#undef BUILTIN_VDQ_BHSI
+#undef BUILTIN_VDQ_I
+#undef BUILTIN_VDW
+#undef BUILTIN_VD_BHSI
+#undef BUILTIN_VD_HSI
+#undef BUILTIN_VD_RE
+#undef BUILTIN_VQ
+#undef BUILTIN_VQN
+#undef BUILTIN_VQW
+#undef BUILTIN_VQ_HSI
+#undef BUILTIN_VQ_S
+#undef BUILTIN_VSDQ_HSI
+#undef BUILTIN_VSDQ_I
+#undef BUILTIN_VSDQ_I_BHSI
+#undef BUILTIN_VSDQ_I_DI
+#undef BUILTIN_VSD_HSI
+#undef BUILTIN_VSQN_HSDI
+#undef BUILTIN_VSTRUCT
+#undef CF0
+#undef CF1
+#undef CF2
+#undef CF3
+#undef CF4
+#undef CF10
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+#undef VAR11
+
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-cores.def b/gcc-4.9/gcc/config/aarch64/aarch64-cores.def
new file mode 100644
index 000000000..9319249e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-cores.def
@@ -0,0 +1,42 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a list of cores that implement AArch64.
+
+   Before using #include to read this file, define a macro:
+
+      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS)
+
+   The CORE_NAME is the name of the core, represented as a string constant.
+   The CORE_IDENT is the name of the core, represented as an identifier.
+   The SCHEDULER_IDENT is the name of the core for which scheduling decisions
+   will be made, represented as an identifier.
+   ARCH is the architecture revision implemented by the chip.
+   FLAGS are the bitwise-or of the traits that apply to that core.
+   This need not include flags implied by the architecture.
+   COSTS is the name of the rtx_costs routine to use.  */
+
+/* V8 Architecture Processors.  */
+
+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53)
+AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
+
+/* V8 big.LITTLE implementations.  */
+
+AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
new file mode 100644
index 000000000..adec7e7ba
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
@@ -0,0 +1,33 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Support for bare-metal builds.  */
+#ifndef GCC_AARCH64_ELF_RAW_H
+#define GCC_AARCH64_ELF_RAW_H
+
+#define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"
+#define ENDFILE_SPEC " crtend%O%s crtn%O%s"
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \
+  -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}"
+#endif
+
+#endif /* GCC_AARCH64_ELF_RAW_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf.h
new file mode 100644
index 000000000..15ab630de
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf.h
@@ -0,0 +1,161 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_ELF_H
+#define GCC_AARCH64_ELF_H
+
+
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  aarch64_asm_output_labelref (FILE, NAME)
+
+#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2)	\
+  do						\
+    {						\
+      assemble_name (FILE, NAME1);		\
+      fputs (" = ", FILE);			\
+      assemble_name (FILE, NAME2);		\
+      fputc ('\n', FILE);			\
+    } while (0)
+
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.bss"
+
+#define CTORS_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array"
+#define DTORS_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array"
+
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#define INIT_ARRAY_SECTION_ASM_OP CTORS_SECTION_ASM_OP
+#define FINI_ARRAY_SECTION_ASM_OP DTORS_SECTION_ASM_OP
+
+/* Since we use .init_array/.fini_array we don't need the markers at
+   the start and end of the ctors/dtors arrays.  */
+#define CTOR_LIST_BEGIN asm (CTORS_SECTION_ASM_OP)
+#define CTOR_LIST_END		/* empty */
+#define DTOR_LIST_BEGIN asm (DTORS_SECTION_ASM_OP)
+#define DTOR_LIST_END		/* empty */
+
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR aarch64_elf_asm_constructor
+
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR aarch64_elf_asm_destructor
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* Support for -falign-* switches.  Use .p2align to ensure that code
+   sections are padded with NOP instructions, rather than zeros.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)		\
+  do								\
+    {								\
+      if ((LOG) != 0)						\
+	{							\
+	  if ((MAX_SKIP) == 0)					\
+	    fprintf ((FILE), "\t.p2align %d\n", (int) (LOG));	\
+	  else							\
+	    fprintf ((FILE), "\t.p2align %d,,%d\n",		\
+		     (int) (LOG), (int) (MAX_SKIP));		\
+	}							\
+    } while (0)
+
+#endif /* HAVE_GAS_MAX_SKIP_P2ALIGN */
+
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do {									\
+    switch (GET_MODE (BODY))						\
+      {									\
+      case QImode:							\
+	asm_fprintf (STREAM, "\t.byte\t(%LL%d - %LLrtx%d) / 4\n",	\
+		     VALUE, REL);					\
+	break;								\
+      case HImode:							\
+	asm_fprintf (STREAM, "\t.2byte\t(%LL%d - %LLrtx%d) / 4\n",	\
+		     VALUE, REL);					\
+	break;								\
+      case SImode:							\
+      case DImode: /* See comment in aarch64_output_casesi.  */		\
+	asm_fprintf (STREAM, "\t.word\t(%LL%d - %LLrtx%d) / 4\n",	\
+		     VALUE, REL);					\
+	break;								\
+      default:								\
+	gcc_unreachable ();						\
+      }									\
+  } while (0)
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)		\
+  fprintf(STREAM, "\t.align\t%d\n", (int)POWER)
+
+#define ASM_COMMENT_START "//"
+
+#define LOCAL_LABEL_PREFIX	"."
+#define USER_LABEL_PREFIX	""
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#ifdef TARGET_BIG_ENDIAN_DEFAULT
+#define ENDIAN_SPEC "-mbig-endian"
+#else
+#define ENDIAN_SPEC "-mlittle-endian"
+#endif
+
+#if TARGET_DATA_MODEL == 1
+#define ABI_SPEC  "-mabi=lp64"
+#define MULTILIB_DEFAULTS { "mabi=lp64" }
+#elif TARGET_DATA_MODEL == 2
+#define ABI_SPEC  "-mabi=ilp32"
+#define MULTILIB_DEFAULTS { "mabi=ilp32" }
+#else
+#error "Unknown or undefined TARGET_DATA_MODEL!"
+#endif
+
+/* Force the default endianness and ABI flags onto the command line
+   in order to make the other specs easier to write.  */
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+  " %{!mbig-endian:%{!mlittle-endian:" ENDIAN_SPEC "}}" \
+  " %{!mabi=*:" ABI_SPEC "}"
+
+#ifdef HAVE_AS_MABI_OPTION
+#define ASM_MABI_SPEC	"%{mabi=*:-mabi=%*}"
+#else
+#define ASM_MABI_SPEC	"%{mabi=lp64:}"
+#endif
+
+#ifndef ASM_SPEC
+#define ASM_SPEC "\
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%{march=*:-march=%*} \
+%(asm_cpu_spec)" \
+ASM_MABI_SPEC
+#endif
+
+#undef TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"%%%s"
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  aarch64_elf_asm_named_section
+
+/* Stabs debug not required.  */
+#undef DBX_DEBUGGING_INFO
+
+#endif /* GCC_AARCH64_ELF_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
new file mode 100644
index 000000000..a8f077156
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
@@ -0,0 +1,47 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_LINUX_H
+#define GCC_AARCH64_LINUX_H
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
+
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#define LINUX_TARGET_LINK_SPEC  "%{h*}		\
+   %{static:-Bstatic}				\
+   %{shared:-shared}				\
+   %{symbolic:-Bsymbolic}			\
+   %{rdynamic:-export-dynamic}			\
+   -dynamic-linker " GNU_USER_DYNAMIC_LINKER "	\
+   -X						\
+   %{mbig-endian:-EB} %{mlittle-endian:-EL}     \
+   -maarch64linux%{mbig-endian:b}"
+
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+    }						\
+  while (0)
+
+#endif  /* GCC_AARCH64_LINUX_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
new file mode 100644
index 000000000..1d2cc7679
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
@@ -0,0 +1,55 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CCFP);
+CC_MODE (CCFPE);
+CC_MODE (CC_SWP);
+CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS).  */
+CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS).  */
+CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
+CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI.  */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI.  */
+VECTOR_MODES (FLOAT, 8);      /*                 V2SF.  */
+VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF.  */
+
+/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments.  */
+INT_MODE (OI, 32);
+
+/* Opaque integer modes for 3, 6 or 8 Neon double registers (2 is
+   TImode).  */
+INT_MODE (EI, 24);
+INT_MODE (CI, 48);
+INT_MODE (XI, 64);
+
+/* Vector modes for register lists.  */
+VECTOR_MODES (INT, 32);		/* V32QI V16HI V8SI V4DI.  */
+VECTOR_MODES (FLOAT, 32);	/* V8SF V4DF.  */
+
+VECTOR_MODES (INT, 48);		/* V32QI V16HI V8SI V4DI.  */
+VECTOR_MODES (FLOAT, 48);	/* V8SF V4DF.  */
+
+VECTOR_MODES (INT, 64);		/* V32QI V16HI V8SI V4DI.  */
+VECTOR_MODES (FLOAT, 64);	/* V8SF V4DF.  */
+
+/* Quad float: 128-bit floating mode for long doubles.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def b/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def
new file mode 100644
index 000000000..1aa65d32a
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def
@@ -0,0 +1,38 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a list of ISA extentsions in AArch64.
+
+   Before using #include to read this file, define a macro:
+
+      AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF)
+
+   EXT_NAME is the name of the extension, represented as a string constant.
+   FLAGS_ON are the bitwise-or of the features that the extension adds.
+   FLAGS_OFF are the bitwise-or of the features that the extension removes.  */
+
+/* V8 Architecture Extensions.
+   This list currently contains example extensions for CPUs that implement
+   AArch64, and therefore serves as a template for adding more CPUs in the
+   future.  */
+
+AARCH64_OPT_EXTENSION("fp",	AARCH64_FL_FP,	AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO)
+AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,	AARCH64_FL_SIMD | AARCH64_FL_CRYPTO)
+AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,	AARCH64_FL_CRYPTO)
+AARCH64_OPT_EXTENSION("crc",	AARCH64_FL_CRC,	AARCH64_FL_CRC)
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-opts.h b/gcc-4.9/gcc/config/aarch64/aarch64-opts.h
new file mode 100644
index 000000000..370931536
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-opts.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Definitions for option handling for AArch64.  */
+
+#ifndef GCC_AARCH64_OPTS_H
+#define GCC_AARCH64_OPTS_H
+
+/* The various cores that implement AArch64.  */
+enum aarch64_processor
+{
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \
+  INTERNAL_IDENT,
+#include "aarch64-cores.def"
+#undef AARCH64_CORE
+  /* Used to indicate that no processor has been specified.  */
+  generic,
+  /* Used to mark the end of the processor table.  */
+  aarch64_none
+};
+
+/* TLS types.  */
+enum aarch64_tls_type {
+  TLS_TRADITIONAL,
+  TLS_DESCRIPTORS
+};
+
+/* The code model defines the address generation strategy.
+   Most have a PIC and non-PIC variant.  */
+enum aarch64_code_model {
+  /* Static code and data fit within a 1MB region.
+     Not fully implemented, mostly treated as SMALL.  */
+  AARCH64_CMODEL_TINY,
+  /* Static code, data and GOT/PLT fit within a 1MB region.
+     Not fully implemented, mostly treated as SMALL_PIC.  */
+  AARCH64_CMODEL_TINY_PIC,
+  /* Static code and data fit within a 4GB region.
+     The default non-PIC code model.  */
+  AARCH64_CMODEL_SMALL,
+  /* Static code, data and GOT/PLT fit within a 4GB region.
+     The default PIC code model.  */
+  AARCH64_CMODEL_SMALL_PIC,
+  /* No assumptions about addresses of code and data.
+     The PIC variant is not yet implemented.  */
+  AARCH64_CMODEL_LARGE
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
new file mode 100644
index 000000000..5542f023b
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
@@ -0,0 +1,292 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef GCC_AARCH64_PROTOS_H
+#define GCC_AARCH64_PROTOS_H
+
+/*
+  SYMBOL_CONTEXT_ADR
+  The symbol is used in a load-address operation.
+  SYMBOL_CONTEXT_MEM
+  The symbol is used as the address in a MEM.
+ */
+enum aarch64_symbol_context
+{
+  SYMBOL_CONTEXT_MEM,
+  SYMBOL_CONTEXT_ADR
+};
+
+/* SYMBOL_SMALL_ABSOLUTE: Generate symbol accesses through
+   high and lo relocs that calculate the base address using a PC
+   relative reloc.
+   So to get the address of foo, we generate
+   adrp x0, foo
+   add  x0, x0, :lo12:foo
+
+   To load or store something to foo, we could use the corresponding
+   load store variants that generate an
+   ldr x0, [x0,:lo12:foo]
+   or
+   str x1, [x0, :lo12:foo]
+
+   This corresponds to the small code model of the compiler.
+
+   SYMBOL_SMALL_GOT: Similar to the one above but this
+   gives us the GOT entry of the symbol being referred to :
+   Thus calculating the GOT entry for foo is done using the
+   following sequence of instructions.  The ADRP instruction
+   gets us to the page containing the GOT entry of the symbol
+   and the got_lo12 gets us the actual offset in it.
+
+   adrp  x0, :got:foo
+   ldr   x0, [x0, :gotoff_lo12:foo]
+
+   This corresponds to the small PIC model of the compiler.
+
+   SYMBOL_SMALL_TLSGD
+   SYMBOL_SMALL_TLSDESC
+   SYMBOL_SMALL_GOTTPREL
+   SYMBOL_SMALL_TPREL
+   Each of of these represents a thread-local symbol, and corresponds to the
+   thread local storage relocation operator for the symbol being referred to.
+
+   SYMBOL_TINY_ABSOLUTE
+
+   Generate symbol accesses as a PC relative address using a single
+   instruction.  To compute the address of symbol foo, we generate:
+
+   ADR x0, foo
+
+   SYMBOL_TINY_GOT
+
+   Generate symbol accesses via the GOT using a single PC relative
+   instruction.  To compute the address of symbol foo, we generate:
+
+   ldr t0, :got:foo
+
+   The value of foo can subsequently read using:
+
+   ldrb    t0, [t0]
+
+   SYMBOL_FORCE_TO_MEM : Global variables are addressed using
+   constant pool.  All variable addresses are spilled into constant
+   pools.  The constant pools themselves are addressed using PC
+   relative accesses.  This only works for the large code model.
+ */
+enum aarch64_symbol_type
+{
+  SYMBOL_SMALL_ABSOLUTE,
+  SYMBOL_SMALL_GOT,
+  SYMBOL_SMALL_TLSGD,
+  SYMBOL_SMALL_TLSDESC,
+  SYMBOL_SMALL_GOTTPREL,
+  SYMBOL_SMALL_TPREL,
+  SYMBOL_TINY_ABSOLUTE,
+  SYMBOL_TINY_GOT,
+  SYMBOL_FORCE_TO_MEM
+};
+
+/* A set of tuning parameters contains references to size and time
+   cost models and vectors for address cost calculations, register
+   move costs and memory move costs.  */
+
+/* Additional cost for addresses.  */
+struct cpu_addrcost_table
+{
+  const int pre_modify;
+  const int post_modify;
+  const int register_offset;
+  const int register_extend;
+  const int imm_offset;
+};
+
+/* Additional costs for register copies.  Cost is for one register.  */
+struct cpu_regmove_cost
+{
+  const int GP2GP;
+  const int GP2FP;
+  const int FP2GP;
+  const int FP2FP;
+};
+
+/* Cost for vector insn classes.  */
+struct cpu_vector_cost
+{
+  const int scalar_stmt_cost;		 /* Cost of any scalar operation,
+					    excluding load and store.  */
+  const int scalar_load_cost;		 /* Cost of scalar load.  */
+  const int scalar_store_cost;		 /* Cost of scalar store.  */
+  const int vec_stmt_cost;		 /* Cost of any vector operation,
+					    excluding load, store,
+					    vector-to-scalar and
+					    scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;		 /* Cost of vec-to-scalar operation.  */
+  const int scalar_to_vec_cost;		 /* Cost of scalar-to-vector
+					    operation.  */
+  const int vec_align_load_cost;	 /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost;	 /* Cost of unaligned vector load.  */
+  const int vec_unalign_store_cost;	 /* Cost of unaligned vector store.  */
+  const int vec_store_cost;		 /* Cost of vector store.  */
+  const int cond_taken_branch_cost;	 /* Cost of taken branch.  */
+  const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
+};
+
+struct tune_params
+{
+  const struct cpu_cost_table *const insn_extra_cost;
+  const struct cpu_addrcost_table *const addr_cost;
+  const struct cpu_regmove_cost *const regmove_cost;
+  const struct cpu_vector_cost *const vec_costs;
+  const int memmov_cost;
+  const int issue_rate;
+};
+
+HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
+bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
+bool aarch64_cannot_change_mode_class (enum machine_mode,
+				       enum machine_mode,
+				       enum reg_class);
+enum aarch64_symbol_type
+aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
+bool aarch64_constant_address_p (rtx);
+bool aarch64_float_const_zero_rtx_p (rtx);
+bool aarch64_function_arg_regno_p (unsigned);
+bool aarch64_gen_movmemqi (rtx *);
+bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
+bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
+bool aarch64_is_long_call_p (rtx);
+bool aarch64_label_mentioned_p (rtx);
+bool aarch64_legitimate_pic_operand_p (rtx);
+bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
+bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
+			    enum machine_mode);
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
+bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
+bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
+bool aarch64_regno_ok_for_base_p (int, bool);
+bool aarch64_regno_ok_for_index_p (int, bool);
+bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
+bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
+bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
+bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
+				   struct simd_immediate_info *);
+bool aarch64_symbolic_address_p (rtx);
+bool aarch64_uimm12_shift (HOST_WIDE_INT);
+const char *aarch64_output_casesi (rtx *);
+const char *aarch64_rewrite_selected_cpu (const char *name);
+
+enum aarch64_symbol_type aarch64_classify_symbol (rtx,
+						  enum aarch64_symbol_context);
+enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
+enum reg_class aarch64_regno_regclass (unsigned);
+int aarch64_asm_preferred_eh_data_format (int, int);
+int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode);
+int aarch64_hard_regno_nregs (unsigned, enum machine_mode);
+int aarch64_simd_attr_length_move (rtx);
+int aarch64_uxt_size (int, HOST_WIDE_INT);
+rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_legitimize_reload_address (rtx *, enum machine_mode, int, int, int);
+const char *aarch64_output_move_struct (rtx *operands);
+rtx aarch64_return_addr (int, rtx);
+rtx aarch64_simd_gen_const_vector_dup (enum machine_mode, int);
+bool aarch64_simd_mem_operand_p (rtx);
+rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
+rtx aarch64_tls_get_addr (void);
+tree aarch64_fold_builtin (tree, int, tree *, bool);
+unsigned aarch64_dbx_register_number (unsigned);
+unsigned aarch64_trampoline_size (void);
+void aarch64_asm_output_labelref (FILE *, const char *);
+void aarch64_elf_asm_named_section (const char *, unsigned, tree);
+void aarch64_expand_epilogue (bool);
+void aarch64_expand_mov_immediate (rtx, rtx);
+void aarch64_expand_prologue (void);
+void aarch64_expand_vector_init (rtx, rtx);
+void aarch64_function_profiler (FILE *, int);
+void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
+				   const_tree, unsigned);
+void aarch64_init_expanders (void);
+void aarch64_print_operand (FILE *, rtx, char);
+void aarch64_print_operand_address (FILE *, rtx);
+
+/* Initialize builtins for SIMD intrinsics.  */
+void init_aarch64_simd_builtins (void);
+
+void aarch64_simd_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int);
+
+/* Emit code to place a AdvSIMD pair result in memory locations (with equal
+   registers).  */
+void aarch64_simd_emit_pair_result_insn (enum machine_mode,
+					 rtx (*intfn) (rtx, rtx, rtx), rtx,
+					 rtx);
+
+/* Expand builtins for SIMD intrinsics.  */
+rtx aarch64_simd_expand_builtin (int, tree, rtx);
+
+void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+
+/* Emit code for reinterprets.  */
+void aarch64_simd_reinterpret (rtx, rtx);
+
+void aarch64_split_128bit_move (rtx, rtx);
+
+bool aarch64_split_128bit_move_p (rtx, rtx);
+
+void aarch64_split_simd_combine (rtx, rtx, rtx);
+
+void aarch64_split_simd_move (rtx, rtx);
+
+/* Check for a legitimate floating point constant for FMOV.  */
+bool aarch64_float_const_representable_p (rtx);
+
+#if defined (RTX_CODE)
+
+bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool);
+enum machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
+rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+rtx aarch64_load_tp (rtx);
+
+void aarch64_expand_compare_and_swap (rtx op[]);
+void aarch64_split_compare_and_swap (rtx op[]);
+void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
+
+#endif /* RTX_CODE */
+
+void aarch64_init_builtins (void);
+rtx aarch64_expand_builtin (tree exp,
+			    rtx target,
+			    rtx subtarget ATTRIBUTE_UNUSED,
+			    enum machine_mode mode ATTRIBUTE_UNUSED,
+			    int ignore ATTRIBUTE_UNUSED);
+tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
+
+tree
+aarch64_builtin_vectorized_function (tree fndecl,
+				     tree type_out,
+				     tree type_in);
+
+extern void aarch64_split_combinev16qi (rtx operands[3]);
+extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
+extern bool
+aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def
new file mode 100644
index 000000000..c9b7570e5
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -0,0 +1,395 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* In the list below, the BUILTIN_<ITERATOR> macros expand to create
+   builtins for each of the modes described by <ITERATOR>.  When adding
+   new builtins to this list, a helpful idiom to follow is to add
+   a line for each pattern in the md file.  Thus, ADDP, which has one
+   pattern defined for the VD_BHSI iterator, and one for DImode, has two
+   entries below.
+
+   Parameter 1 is the 'type' of the intrinsic.  This is used to
+   describe the type modifiers (for example; unsigned) applied to
+   each of the parameters to the intrinsic function.
+
+   Parameter 2 is the name of the intrinsic.  This is appended
+   to `__builtin_aarch64_<name><mode>` to give the intrinsic name
+   as exported to the front-ends.
+
+   Parameter 3 describes how to map from the name to the CODE_FOR_
+   macro holding the RTL pattern for the intrinsic.  This mapping is:
+   0 - CODE_FOR_aarch64_<name><mode>
+   1-9 - CODE_FOR_<name><mode><1-9>
+   10 - CODE_FOR_<name><mode>.  */
+
+  BUILTIN_VD_RE (CREATE, create, 0)
+  BUILTIN_VDC (COMBINE, combine, 0)
+  BUILTIN_VB (BINOP, pmul, 0)
+  BUILTIN_VDQF (UNOP, sqrt, 2)
+  BUILTIN_VD_BHSI (BINOP, addp, 0)
+  VAR1 (UNOP, addp, 0, di)
+  BUILTIN_VDQ_BHSI (UNOP, clz, 2)
+
+  BUILTIN_VALL (GETLANE, get_lane, 0)
+  VAR1 (GETLANE, get_lane, 0, di)
+  BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
+
+  BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
+  BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
+  BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
+  BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
+  BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
+  BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
+  BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
+  BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
+  BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
+  BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
+  BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
+
+  BUILTIN_VDQ_I (BINOP, dup_lane, 0)
+  /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
+  BUILTIN_VSDQ_I (BINOP, sqshl, 0)
+  BUILTIN_VSDQ_I (BINOP, uqshl, 0)
+  BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
+  BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
+  /* Implemented by aarch64_<su_optab><optab><mode>.  */
+  BUILTIN_VSDQ_I (BINOP, sqadd, 0)
+  BUILTIN_VSDQ_I (BINOP, uqadd, 0)
+  BUILTIN_VSDQ_I (BINOP, sqsub, 0)
+  BUILTIN_VSDQ_I (BINOP, uqsub, 0)
+  /* Implemented by aarch64_<sur>qadd<mode>.  */
+  BUILTIN_VSDQ_I (BINOP, suqadd, 0)
+  BUILTIN_VSDQ_I (BINOP, usqadd, 0)
+
+  /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>.  */
+  BUILTIN_VDC (GETLANE, get_dregoi, 0)
+  BUILTIN_VDC (GETLANE, get_dregci, 0)
+  BUILTIN_VDC (GETLANE, get_dregxi, 0)
+  /* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>.  */
+  BUILTIN_VQ (GETLANE, get_qregoi, 0)
+  BUILTIN_VQ (GETLANE, get_qregci, 0)
+  BUILTIN_VQ (GETLANE, get_qregxi, 0)
+  /* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>.  */
+  BUILTIN_VQ (SETLANE, set_qregoi, 0)
+  BUILTIN_VQ (SETLANE, set_qregci, 0)
+  BUILTIN_VQ (SETLANE, set_qregxi, 0)
+  /* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>.  */
+  BUILTIN_VDC (LOADSTRUCT, ld2, 0)
+  BUILTIN_VDC (LOADSTRUCT, ld3, 0)
+  BUILTIN_VDC (LOADSTRUCT, ld4, 0)
+  /* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>.  */
+  BUILTIN_VQ (LOADSTRUCT, ld2, 0)
+  BUILTIN_VQ (LOADSTRUCT, ld3, 0)
+  BUILTIN_VQ (LOADSTRUCT, ld4, 0)
+  /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>.  */
+  BUILTIN_VDC (STORESTRUCT, st2, 0)
+  BUILTIN_VDC (STORESTRUCT, st3, 0)
+  BUILTIN_VDC (STORESTRUCT, st4, 0)
+  /* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>.  */
+  BUILTIN_VQ (STORESTRUCT, st2, 0)
+  BUILTIN_VQ (STORESTRUCT, st3, 0)
+  BUILTIN_VQ (STORESTRUCT, st4, 0)
+
+  BUILTIN_VQW (BINOP, saddl2, 0)
+  BUILTIN_VQW (BINOP, uaddl2, 0)
+  BUILTIN_VQW (BINOP, ssubl2, 0)
+  BUILTIN_VQW (BINOP, usubl2, 0)
+  BUILTIN_VQW (BINOP, saddw2, 0)
+  BUILTIN_VQW (BINOP, uaddw2, 0)
+  BUILTIN_VQW (BINOP, ssubw2, 0)
+  BUILTIN_VQW (BINOP, usubw2, 0)
+  /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>.  */
+  BUILTIN_VDW (BINOP, saddl, 0)
+  BUILTIN_VDW (BINOP, uaddl, 0)
+  BUILTIN_VDW (BINOP, ssubl, 0)
+  BUILTIN_VDW (BINOP, usubl, 0)
+  /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>.  */
+  BUILTIN_VDW (BINOP, saddw, 0)
+  BUILTIN_VDW (BINOP, uaddw, 0)
+  BUILTIN_VDW (BINOP, ssubw, 0)
+  BUILTIN_VDW (BINOP, usubw, 0)
+  /* Implemented by aarch64_<sur>h<addsub><mode>.  */
+  BUILTIN_VQ_S (BINOP, shadd, 0)
+  BUILTIN_VQ_S (BINOP, uhadd, 0)
+  BUILTIN_VQ_S (BINOP, srhadd, 0)
+  BUILTIN_VQ_S (BINOP, urhadd, 0)
+  /* Implemented by aarch64_<sur><addsub>hn<mode>.  */
+  BUILTIN_VQN (BINOP, addhn, 0)
+  BUILTIN_VQN (BINOP, raddhn, 0)
+  /* Implemented by aarch64_<sur><addsub>hn2<mode>.  */
+  BUILTIN_VQN (TERNOP, addhn2, 0)
+  BUILTIN_VQN (TERNOP, raddhn2, 0)
+
+  BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
+  /* Implemented by aarch64_<sur>qmovn<mode>.  */
+  BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
+  BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
+  /* Implemented by aarch64_s<optab><mode>.  */
+  BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
+  BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
+
+  BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
+  BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
+  BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
+  BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
+  BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
+  BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
+  BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
+  BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
+  /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>.  */
+  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0)
+  /* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>.  */
+  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0)
+  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0)
+
+  BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
+  BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0)
+  BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0)
+  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
+  BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0)
+  BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0)
+  /* Implemented by aarch64_sq<r>dmulh<mode>.  */
+  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0)
+  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
+  /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>.  */
+  BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
+  BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
+  BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
+  BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
+  BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
+  BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
+
+  BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
+  /* Implemented by aarch64_<sur>shl<mode>.  */
+  BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
+  BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
+  BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
+  BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
+
+  BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
+  VAR1 (SHIFTIMM, ashr_simd, 0, di)
+  BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
+  VAR1 (USHIFTIMM, lshr_simd, 0, di)
+  /* Implemented by aarch64_<sur>shr_n<mode>.  */
+  BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
+  /* Implemented by aarch64_<sur>sra_n<mode>.  */
+  BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
+  /* Implemented by aarch64_<sur>shll_n<mode>.  */
+  BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
+  BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
+  /* Implemented by aarch64_<sur>shll2_n<mode>.  */
+  BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
+  BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
+  /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>.  */
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
+  /* Implemented by aarch64_<sur>s<lr>i_n<mode>.  */
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
+  /* Implemented by aarch64_<sur>qshl<u>_n<mode>.  */
+  BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
+  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
+  BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
+
+  /* Implemented by aarch64_cm<cmp><mode>.  */
+  BUILTIN_VALLDI (BINOP, cmeq, 0)
+  BUILTIN_VALLDI (BINOP, cmge, 0)
+  BUILTIN_VALLDI (BINOP, cmgt, 0)
+  BUILTIN_VALLDI (BINOP, cmle, 0)
+  BUILTIN_VALLDI (BINOP, cmlt, 0)
+  /* Implemented by aarch64_cm<cmp><mode>.  */
+  BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
+  BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
+  BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
+
+  /* Implemented by reduc_<sur>plus_<mode>.  */
+  BUILTIN_VALL (UNOP, reduc_splus_, 10)
+  BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
+
+  /* Implemented by reduc_<maxmin_uns>_<mode>.  */
+  BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
+  BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
+  BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10)
+  BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10)
+  BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10)
+  BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10)
+
+  /* Implemented by <maxmin><mode>3.
+     smax variants map to fmaxnm,
+     smax_nan variants map to fmax.  */
+  BUILTIN_VDQIF (BINOP, smax, 3)
+  BUILTIN_VDQIF (BINOP, smin, 3)
+  BUILTIN_VDQ_BHSI (BINOP, umax, 3)
+  BUILTIN_VDQ_BHSI (BINOP, umin, 3)
+  BUILTIN_VDQF (BINOP, smax_nan, 3)
+  BUILTIN_VDQF (BINOP, smin_nan, 3)
+
+  /* Implemented by <frint_pattern><mode>2.  */
+  BUILTIN_VDQF (UNOP, btrunc, 2)
+  BUILTIN_VDQF (UNOP, ceil, 2)
+  BUILTIN_VDQF (UNOP, floor, 2)
+  BUILTIN_VDQF (UNOP, nearbyint, 2)
+  BUILTIN_VDQF (UNOP, rint, 2)
+  BUILTIN_VDQF (UNOP, round, 2)
+  BUILTIN_VDQF (UNOP, frintn, 2)
+
+  /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
+  VAR1 (UNOP, lbtruncv2sf, 2, v2si)
+  VAR1 (UNOP, lbtruncv4sf, 2, v4si)
+  VAR1 (UNOP, lbtruncv2df, 2, v2di)
+
+  VAR1 (UNOP, lbtruncuv2sf, 2, v2si)
+  VAR1 (UNOP, lbtruncuv4sf, 2, v4si)
+  VAR1 (UNOP, lbtruncuv2df, 2, v2di)
+
+  VAR1 (UNOP, lroundv2sf, 2, v2si)
+  VAR1 (UNOP, lroundv4sf, 2, v4si)
+  VAR1 (UNOP, lroundv2df, 2, v2di)
+  /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2.  */
+  VAR1 (UNOP, lroundsf, 2, si)
+  VAR1 (UNOP, lrounddf, 2, di)
+
+  VAR1 (UNOP, lrounduv2sf, 2, v2si)
+  VAR1 (UNOP, lrounduv4sf, 2, v4si)
+  VAR1 (UNOP, lrounduv2df, 2, v2di)
+  VAR1 (UNOP, lroundusf, 2, si)
+  VAR1 (UNOP, lroundudf, 2, di)
+
+  VAR1 (UNOP, lceilv2sf, 2, v2si)
+  VAR1 (UNOP, lceilv4sf, 2, v4si)
+  VAR1 (UNOP, lceilv2df, 2, v2di)
+
+  VAR1 (UNOP, lceiluv2sf, 2, v2si)
+  VAR1 (UNOP, lceiluv4sf, 2, v4si)
+  VAR1 (UNOP, lceiluv2df, 2, v2di)
+  VAR1 (UNOP, lceilusf, 2, si)
+  VAR1 (UNOP, lceiludf, 2, di)
+
+  VAR1 (UNOP, lfloorv2sf, 2, v2si)
+  VAR1 (UNOP, lfloorv4sf, 2, v4si)
+  VAR1 (UNOP, lfloorv2df, 2, v2di)
+
+  VAR1 (UNOP, lflooruv2sf, 2, v2si)
+  VAR1 (UNOP, lflooruv4sf, 2, v4si)
+  VAR1 (UNOP, lflooruv2df, 2, v2di)
+  VAR1 (UNOP, lfloorusf, 2, si)
+  VAR1 (UNOP, lfloorudf, 2, di)
+
+  VAR1 (UNOP, lfrintnv2sf, 2, v2si)
+  VAR1 (UNOP, lfrintnv4sf, 2, v4si)
+  VAR1 (UNOP, lfrintnv2df, 2, v2di)
+  VAR1 (UNOP, lfrintnsf, 2, si)
+  VAR1 (UNOP, lfrintndf, 2, di)
+
+  VAR1 (UNOP, lfrintnuv2sf, 2, v2si)
+  VAR1 (UNOP, lfrintnuv4sf, 2, v4si)
+  VAR1 (UNOP, lfrintnuv2df, 2, v2di)
+  VAR1 (UNOP, lfrintnusf, 2, si)
+  VAR1 (UNOP, lfrintnudf, 2, di)
+
+  /* Implemented by <optab><fcvt_target><VDQF:mode>2.  */
+  VAR1 (UNOP, floatv2si, 2, v2sf)
+  VAR1 (UNOP, floatv4si, 2, v4sf)
+  VAR1 (UNOP, floatv2di, 2, v2df)
+
+  VAR1 (UNOP, floatunsv2si, 2, v2sf)
+  VAR1 (UNOP, floatunsv4si, 2, v4sf)
+  VAR1 (UNOP, floatunsv2di, 2, v2df)
+
+  /* Implemented by
+     aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
+  BUILTIN_VALL (BINOP, zip1, 0)
+  BUILTIN_VALL (BINOP, zip2, 0)
+  BUILTIN_VALL (BINOP, uzp1, 0)
+  BUILTIN_VALL (BINOP, uzp2, 0)
+  BUILTIN_VALL (BINOP, trn1, 0)
+  BUILTIN_VALL (BINOP, trn2, 0)
+
+  /* Implemented by
+     aarch64_frecp<FRECP:frecp_suffix><mode>.  */
+  BUILTIN_GPF (UNOP, frecpe, 0)
+  BUILTIN_GPF (BINOP, frecps, 0)
+  BUILTIN_GPF (UNOP, frecpx, 0)
+
+  BUILTIN_VDQF (UNOP, frecpe, 0)
+  BUILTIN_VDQF (BINOP, frecps, 0)
+
+  BUILTIN_VALLDI (UNOP, abs, 2)
+
+  VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
+  VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
+
+  VAR1 (UNOP, float_extend_lo_, 0, v2df)
+  VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
+
+  /* Implemented by aarch64_ld1<VALL:mode>.  */
+  BUILTIN_VALL (LOAD1, ld1, 0)
+
+  /* Implemented by aarch64_st1<VALL:mode>.  */
+  BUILTIN_VALL (STORE1, st1, 0)
+
+  /* Implemented by fma<mode>4.  */
+  BUILTIN_VDQF (TERNOP, fma, 4)
+
+  /* Implemented by aarch64_simd_bsl<mode>.  */
+  BUILTIN_VDQQH (BSL_P, simd_bsl, 0)
+  BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0)
+  BUILTIN_VALLDIF (BSL_S, simd_bsl, 0)
+
+  /* Implemented by aarch64_crypto_aes<op><mode>.  */
+  VAR1 (BINOPU, crypto_aese, 0, v16qi)
+  VAR1 (BINOPU, crypto_aesd, 0, v16qi)
+  VAR1 (UNOPU, crypto_aesmc, 0, v16qi)
+  VAR1 (UNOPU, crypto_aesimc, 0, v16qi)
+
+  /* Implemented by aarch64_crypto_sha1<op><mode>.  */
+  VAR1 (UNOPU, crypto_sha1h, 0, si)
+  VAR1 (BINOPU, crypto_sha1su1, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha1c, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha1m, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha1p, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha1su0, 0, v4si)
+
+  /* Implemented by aarch64_crypto_sha256<op><mode>.  */
+  VAR1 (TERNOPU, crypto_sha256h, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha256h2, 0, v4si)
+  VAR1 (BINOPU, crypto_sha256su0, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha256su1, 0, v4si)
+
+  /* Implemented by aarch64_crypto_pmull<mode>.  */
+  VAR1 (BINOPP, crypto_pmull, 0, di)
+  VAR1 (BINOPP, crypto_pmull, 0, v2di)
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
new file mode 100644
index 000000000..6048d605c
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
@@ -0,0 +1,4363 @@
+;; Machine description for AArch64 AdvSIMD architecture.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
+	(match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
+  "TARGET_SIMD"
+  "
+    if (GET_CODE (operands[0]) == MEM)
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+  "
+)
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
+        (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
+  "TARGET_SIMD"
+{
+  /* This pattern is not permitted to fail during expansion: if both arguments
+     are non-registers (e.g. memory := constant, which can be created by the
+     auto-vectorizer), force operand 1 into a register.  */
+  if (!register_operand (operands[0], <MODE>mode)
+      && !register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "aarch64_simd_dup<mode>"
+  [(set (match_operand:VDQ 0 "register_operand" "=w, w")
+        (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
+  "TARGET_SIMD"
+  "@
+   dup\\t%0.<Vtype>, %<vw>1
+   dup\\t%0.<Vtype>, %1.<Vetype>[0]"
+  [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
+)
+
+(define_insn "aarch64_simd_dup<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+        (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
+  [(set_attr "type" "neon_dup<q>")]
+)
+
+(define_insn "aarch64_dup_lane<mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(vec_duplicate:VALL
+	  (vec_select:<VEL>
+	    (match_operand:VALL 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
+          )))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_dup<q>")]
+)
+
+(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(vec_duplicate:VALL
+	  (vec_select:<VEL>
+	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
+          )))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+					  INTVAL (operands[2])));
+    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_dup<q>")]
+)
+
+(define_insn "*aarch64_simd_mov<mode>"
+  [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
+		"=w, m,  w, ?r, ?w, ?r, w")
+	(match_operand:VD 1 "aarch64_simd_general_operand"
+		"m,  w,  w,  w,  r,  r, Dn"))]
+  "TARGET_SIMD
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+   switch (which_alternative)
+     {
+     case 0: return "ldr\\t%d0, %1";
+     case 1: return "str\\t%d1, %0";
+     case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
+     case 3: return "umov\t%0, %1.d[0]";
+     case 4: return "ins\t%0.d[0], %1";
+     case 5: return "mov\t%0, %1";
+     case 6:
+	return aarch64_output_simd_mov_immediate (operands[1],
+						  <MODE>mode, 64);
+     default: gcc_unreachable ();
+     }
+}
+  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
+                     neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
+                     mov_reg, neon_move<q>")]
+)
+
+(define_insn "*aarch64_simd_mov<mode>"
+  [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
+		"=w, m,  w, ?r, ?w, ?r, w")
+	(match_operand:VQ 1 "aarch64_simd_general_operand"
+		"m,  w,  w,  w,  r,  r, Dn"))]
+  "TARGET_SIMD
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+	return "ldr\\t%q0, %1";
+    case 1:
+	return "str\\t%q1, %0";
+    case 2:
+	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
+    case 3:
+    case 4:
+    case 5:
+	return "#";
+    case 6:
+	return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
+    default:
+	gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
+                     neon_logic<q>, multiple, multiple, multiple,\
+                     neon_move<q>")
+   (set_attr "length" "4,4,4,8,8,8,4")]
+)
+
+(define_split
+  [(set (match_operand:VQ 0 "register_operand" "")
+      (match_operand:VQ 1 "register_operand" ""))]
+  "TARGET_SIMD && reload_completed
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[2], src[2];
+
+  dest[0] = gen_rtx_REG (DImode, rdest);
+  src[0] = gen_rtx_REG (DImode, rsrc);
+  dest[1] = gen_rtx_REG (DImode, rdest + 1);
+  src[1] = gen_rtx_REG (DImode, rsrc + 1);
+
+  aarch64_simd_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+  [(set (match_operand:VQ 0 "register_operand" "")
+        (match_operand:VQ 1 "register_operand" ""))]
+  "TARGET_SIMD && reload_completed
+   && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
+       || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
+  [(const_int 0)]
+{
+  aarch64_split_simd_move (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_split_simd_mov<mode>"
+  [(set (match_operand:VQ 0)
+        (match_operand:VQ 1))]
+  "TARGET_SIMD"
+  {
+    rtx dst = operands[0];
+    rtx src = operands[1];
+
+    if (GP_REGNUM_P (REGNO (src)))
+      {
+        rtx src_low_part = gen_lowpart (<VHALF>mode, src);
+        rtx src_high_part = gen_highpart (<VHALF>mode, src);
+
+        emit_insn
+          (gen_move_lo_quad_<mode> (dst, src_low_part));
+        emit_insn
+          (gen_move_hi_quad_<mode> (dst, src_high_part));
+      }
+
+    else
+      {
+        rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
+        rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
+        rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+        rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+
+        emit_insn
+          (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
+        emit_insn
+          (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
+      }
+    DONE;
+  }
+)
+
+(define_insn "aarch64_simd_mov_from_<mode>low"
+  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
+        (vec_select:<VHALF>
+          (match_operand:VQ 1 "register_operand" "w")
+          (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
+  "TARGET_SIMD && reload_completed"
+  "umov\t%0, %1.d[0]"
+  [(set_attr "type" "neon_to_gp<q>")
+   (set_attr "length" "4")
+  ])
+
+(define_insn "aarch64_simd_mov_from_<mode>high"
+  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
+        (vec_select:<VHALF>
+          (match_operand:VQ 1 "register_operand" "w")
+          (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
+  "TARGET_SIMD && reload_completed"
+  "umov\t%0, %1.d[1]"
+  [(set_attr "type" "neon_to_gp<q>")
+   (set_attr "length" "4")
+  ])
+
+(define_insn "orn<mode>3"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
+		(match_operand:VDQ 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "bic<mode>3"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
+		(match_operand:VDQ 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (plus:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		  (match_operand:VDQ 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_add<q>")]
+)
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (minus:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		   (match_operand:VDQ 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_sub<q>")]
+)
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VDQM 0 "register_operand" "=w")
+        (mult:VDQM (match_operand:VDQM 1 "register_operand" "w")
+		   (match_operand:VDQM 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_mul_<Vetype><q>")]
+)
+
+(define_insn "*aarch64_mul3_elt<mode>"
+ [(set (match_operand:VMUL 0 "register_operand" "=w")
+    (mult:VMUL
+      (vec_duplicate:VMUL
+	  (vec_select:<VEL>
+	    (match_operand:VMUL 1 "register_operand" "<h_con>")
+	    (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VMUL 3 "register_operand" "w")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
+  }
+  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
+  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
+     (mult:VMUL_CHANGE_NLANES
+       (vec_duplicate:VMUL_CHANGE_NLANES
+	  (vec_select:<VEL>
+	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
+	    (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+					  INTVAL (operands[2])));
+    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
+  }
+  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_mul3_elt_to_128df"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+     (mult:V2DF
+       (vec_duplicate:V2DF
+	 (match_operand:DF 2 "register_operand" "w"))
+      (match_operand:V2DF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fmul\\t%0.2d, %1.2d, %2.d[0]"
+  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
+)
+
+(define_insn "*aarch64_mul3_elt_to_64v2df"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+     (mult:DF
+       (vec_select:DF
+	 (match_operand:V2DF 1 "register_operand" "w")
+	 (parallel [(match_operand:SI 2 "immediate_operand")]))
+       (match_operand:DF 3 "register_operand" "w")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+    return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
+  }
+  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
+)
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+	(neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "neg\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_neg<q>")]
+)
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "abs\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_abs<q>")]
+)
+
+(define_insn "abd<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(abs:VDQ_BHSI (minus:VDQ_BHSI
+		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
+		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_abd<q>")]
+)
+
+(define_insn "aba<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
+			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
+			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
+		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "fabd<mode>_3"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(abs:VDQF (minus:VDQF
+		   (match_operand:VDQF 1 "register_operand" "w")
+		   (match_operand:VDQF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
+)
+
+(define_insn "*fabd_scalar<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (abs:GPF (minus:GPF
+                 (match_operand:GPF 1 "register_operand" "w")
+                 (match_operand:GPF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fabd\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
+)
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		 (match_operand:VDQ 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (ior:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		 (match_operand:VDQ 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (xor:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		 (match_operand:VDQ 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VDQ 0 "register_operand" "=w")
+        (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "not\t%0.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "aarch64_simd_vec_set<mode>"
+  [(set (match_operand:VQ_S 0 "register_operand" "=w,w")
+        (vec_merge:VQ_S
+	    (vec_duplicate:VQ_S
+		(match_operand:<VEL> 1 "register_operand" "r,w"))
+	    (match_operand:VQ_S 3 "register_operand" "0,0")
+	    (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_SIMD"
+  {
+   int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
+   switch (which_alternative)
+     {
+     case 0:
+	return "ins\\t%0.<Vetype>[%p2], %w1";
+     case 1:
+	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+     default:
+	gcc_unreachable ();
+     }
+  }
+  [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")]
+)
+
+(define_insn "aarch64_simd_lshr<mode>"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		     (match_operand:VDQ  2 "aarch64_simd_rshift_imm" "Dr")))]
+ "TARGET_SIMD"
+ "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "aarch64_simd_ashr<mode>"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		     (match_operand:VDQ  2 "aarch64_simd_rshift_imm" "Dr")))]
+ "TARGET_SIMD"
+ "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "aarch64_simd_imm_shl<mode>"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		   (match_operand:VDQ  2 "aarch64_simd_lshift_imm" "Dl")))]
+ "TARGET_SIMD"
+  "shl\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "aarch64_simd_reg_sshl<mode>"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
+		   (match_operand:VDQ 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
+		    (match_operand:VDQ 2 "register_operand" "w")]
+		   UNSPEC_ASHIFT_UNSIGNED))]
+ "TARGET_SIMD"
+ "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+(define_insn "aarch64_simd_reg_shl<mode>_signed"
+ [(set (match_operand:VDQ 0 "register_operand" "=w")
+       (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
+		    (match_operand:VDQ 2 "register_operand" "w")]
+		   UNSPEC_ASHIFT_SIGNED))]
+ "TARGET_SIMD"
+ "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+(define_expand "ashl<mode>3"
+  [(match_operand:VDQ 0 "register_operand" "")
+   (match_operand:VDQ 1 "register_operand" "")
+   (match_operand:SI  2 "general_operand" "")]
+ "TARGET_SIMD"
+{
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  int shift_amount;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      shift_amount = INTVAL (operands[2]);
+      if (shift_amount >= 0 && shift_amount < bit_width)
+        {
+	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						       shift_amount);
+	  emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
+						     operands[1],
+						     tmp));
+          DONE;
+        }
+      else
+        {
+          operands[2] = force_reg (SImode, operands[2]);
+        }
+    }
+  else if (MEM_P (operands[2]))
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+
+  if (REG_P (operands[2]))
+    {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_aarch64_simd_dup<mode> (tmp,
+					     convert_to_mode (<VEL>mode,
+							      operands[2],
+							      0)));
+      emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
+						  tmp));
+      DONE;
+    }
+  else
+    FAIL;
+}
+)
+
+(define_expand "lshr<mode>3"
+  [(match_operand:VDQ 0 "register_operand" "")
+   (match_operand:VDQ 1 "register_operand" "")
+   (match_operand:SI  2 "general_operand" "")]
+ "TARGET_SIMD"
+{
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  int shift_amount;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      shift_amount = INTVAL (operands[2]);
+      if (shift_amount > 0 && shift_amount <= bit_width)
+        {
+	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						       shift_amount);
+          emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
+						  operands[1],
+						  tmp));
+	  DONE;
+	}
+      else
+        operands[2] = force_reg (SImode, operands[2]);
+    }
+  else if (MEM_P (operands[2]))
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+
+  if (REG_P (operands[2]))
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      rtx tmp1 = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_negsi2 (tmp, operands[2]));
+      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
+					     convert_to_mode (<VEL>mode,
+							      tmp, 0)));
+      emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
+							  operands[1],
+							  tmp1));
+      DONE;
+    }
+  else
+    FAIL;
+}
+)
+
+(define_expand "ashr<mode>3"
+  [(match_operand:VDQ 0 "register_operand" "")
+   (match_operand:VDQ 1 "register_operand" "")
+   (match_operand:SI  2 "general_operand" "")]
+ "TARGET_SIMD"
+{
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  int shift_amount;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      shift_amount = INTVAL (operands[2]);
+      if (shift_amount > 0 && shift_amount <= bit_width)
+        {
+	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						       shift_amount);
+          emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
+						  operands[1],
+						  tmp));
+          DONE;
+	}
+      else
+        operands[2] = force_reg (SImode, operands[2]);
+    }
+  else if (MEM_P (operands[2]))
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+
+  if (REG_P (operands[2]))
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      rtx tmp1 = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_negsi2 (tmp, operands[2]));
+      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
+					     convert_to_mode (<VEL>mode,
+							      tmp, 0)));
+      emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
+							operands[1],
+							tmp1));
+      DONE;
+    }
+  else
+    FAIL;
+}
+)
+
+(define_expand "vashl<mode>3"
+ [(match_operand:VDQ 0 "register_operand" "")
+  (match_operand:VDQ 1 "register_operand" "")
+  (match_operand:VDQ 2 "register_operand" "")]
+ "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
+					      operands[2]));
+  DONE;
+})
+
+;; Using mode VQ_S as there is no V2DImode neg!
+;; Negating individual lanes most certainly offsets the
+;; gain from vectorization.
+(define_expand "vashr<mode>3"
+ [(match_operand:VQ_S 0 "register_operand" "")
+  (match_operand:VQ_S 1 "register_operand" "")
+  (match_operand:VQ_S 2 "register_operand" "")]
+ "TARGET_SIMD"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
+						    neg));
+  DONE;
+})
+
+;; DI vector shift
+(define_expand "aarch64_ashr_simddi"
+  [(match_operand:DI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  "TARGET_SIMD"
+  {
+    if (INTVAL (operands[2]) == 64)
+      emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1]));
+    else
+      emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+;; SIMD shift by 64.  This pattern is a special case as standard pattern does
+;; not handle NEON shifts by 64.
+(define_insn "aarch64_sshr_simddi"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI
+          [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))]
+  "TARGET_SIMD"
+  "sshr\t%d0, %d1, 64"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
+(define_expand "vlshr<mode>3"
+ [(match_operand:VQ_S 0 "register_operand" "")
+  (match_operand:VQ_S 1 "register_operand" "")
+  (match_operand:VQ_S 2 "register_operand" "")]
+ "TARGET_SIMD"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
+						      neg));
+  DONE;
+})
+
+(define_expand "aarch64_lshr_simddi"
+  [(match_operand:DI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  "TARGET_SIMD"
+  {
+    if (INTVAL (operands[2]) == 64)
+      emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
+    else
+      emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+;; SIMD shift by 64.  This pattern is a special case as standard pattern does
+;; not handle NEON shifts by 64.
+(define_insn "aarch64_ushr_simddi"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI
+          [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))]
+  "TARGET_SIMD"
+  "ushr\t%d0, %d1, 64"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VQ_S 0 "register_operand")
+   (match_operand:<VEL> 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+  {
+    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
+					    GEN_INT (elem), operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_simd_vec_setv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=w,w")
+        (vec_merge:V2DI
+	    (vec_duplicate:V2DI
+		(match_operand:DI 1 "register_operand" "r,w"))
+	    (match_operand:V2DI 3 "register_operand" "0,0")
+	    (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_SIMD"
+  {
+    int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
+    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
+    switch (which_alternative)
+      {
+      case 0:
+	return "ins\\t%0.d[%p2], %1";
+      case 1:
+        return "ins\\t%0.d[%p2], %1.d[0]";
+      default:
+	gcc_unreachable ();
+      }
+  }
+  [(set_attr "type" "neon_from_gp, neon_ins_q")]
+)
+
+(define_expand "vec_setv2di"
+  [(match_operand:V2DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+  {
+    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+    emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
+					  GEN_INT (elem), operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_simd_vec_set<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+        (vec_merge:VDQF
+	    (vec_duplicate:VDQF
+		(match_operand:<VEL> 1 "register_operand" "w"))
+	    (match_operand:VDQF 3 "register_operand" "0")
+	    (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_SIMD"
+  {
+    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+
+    operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
+    return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+  }
+  [(set_attr "type" "neon_ins<q>")]
+)
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VDQF 0 "register_operand" "+w")
+   (match_operand:<VEL> 1 "register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
+					  GEN_INT (elem), operands[0]));
+    DONE;
+  }
+)
+
+
+(define_insn "aarch64_mla<mode>"
+ [(set (match_operand:VQ_S 0 "register_operand" "=w")
+       (plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
+			     (match_operand:VQ_S 3 "register_operand" "w"))
+		  (match_operand:VQ_S 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_mla_<Vetype><q>")]
+)
+
+(define_insn "*aarch64_mla_elt<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+       (plus:VDQHS
+	 (mult:VDQHS
+	   (vec_duplicate:VDQHS
+	      (vec_select:<VEL>
+		(match_operand:VDQHS 1 "register_operand" "<h_con>")
+		  (parallel [(match_operand:SI 2 "immediate_operand")])))
+	   (match_operand:VDQHS 3 "register_operand" "w"))
+	 (match_operand:VDQHS 4 "register_operand" "0")))]
+ "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+       (plus:VDQHS
+	 (mult:VDQHS
+	   (vec_duplicate:VDQHS
+	      (vec_select:<VEL>
+		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
+		  (parallel [(match_operand:SI 2 "immediate_operand")])))
+	   (match_operand:VDQHS 3 "register_operand" "w"))
+	 (match_operand:VDQHS 4 "register_operand" "0")))]
+ "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+					  INTVAL (operands[2])));
+    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "aarch64_mls<mode>"
+ [(set (match_operand:VQ_S 0 "register_operand" "=w")
+       (minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0")
+		   (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
+			      (match_operand:VQ_S 3 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_mla_<Vetype><q>")]
+)
+
+(define_insn "*aarch64_mls_elt<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+       (minus:VDQHS
+	 (match_operand:VDQHS 4 "register_operand" "0")
+	 (mult:VDQHS
+	   (vec_duplicate:VDQHS
+	      (vec_select:<VEL>
+		(match_operand:VDQHS 1 "register_operand" "<h_con>")
+		  (parallel [(match_operand:SI 2 "immediate_operand")])))
+	   (match_operand:VDQHS 3 "register_operand" "w"))))]
+ "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+       (minus:VDQHS
+	 (match_operand:VDQHS 4 "register_operand" "0")
+	 (mult:VDQHS
+	   (vec_duplicate:VDQHS
+	      (vec_select:<VEL>
+		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
+		  (parallel [(match_operand:SI 2 "immediate_operand")])))
+	   (match_operand:VDQHS 3 "register_operand" "w"))))]
+ "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+					  INTVAL (operands[2])));
+    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
+;; Max/Min operations.
+(define_insn "<su><maxmin><mode>3"
+ [(set (match_operand:VQ_S 0 "register_operand" "=w")
+       (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w")
+		    (match_operand:VQ_S 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_minmax<q>")]
+)
+
+;; Move into low-half clearing high half to 0.
+
+(define_insn "move_lo_quad_<mode>"
+  [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
+        (vec_concat:VQ
+	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
+	  (vec_duplicate:<VHALF> (const_int 0))))]
+  "TARGET_SIMD"
+  "@
+   dup\\t%d0, %1.d[0]
+   fmov\\t%d0, %1
+   dup\\t%d0, %1"
+  [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
+   (set_attr "simd" "yes,*,yes")
+   (set_attr "fp" "*,yes,*")
+   (set_attr "length" "4")]
+)
+
+;; Move into high-half.
+
+(define_insn "aarch64_simd_move_hi_quad_<mode>"
+  [(set (match_operand:VQ 0 "register_operand" "+w,w")
+        (vec_concat:VQ
+          (vec_select:<VHALF>
+                (match_dup 0)
+                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
+	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
+  "TARGET_SIMD"
+  "@
+   ins\\t%0.d[1], %1.d[0]
+   ins\\t%0.d[1], %1"
+  [(set_attr "type" "neon_ins")
+   (set_attr "length" "4")]
+)
+
+(define_expand "move_hi_quad_<mode>"
+ [(match_operand:VQ 0 "register_operand" "")
+  (match_operand:<VHALF> 1 "register_operand" "")]
+ "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+  emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
+						   operands[1], p));
+  DONE;
+})
+
+;; Narrowing operations.
+
+;; For doubles.
+(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "xtn\\t%0.<Vntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<VNARROWD> 0 "register_operand" "")
+  (match_operand:VDN 1 "register_operand" "")
+  (match_operand:VDN 2 "register_operand" "")]
+ "TARGET_SIMD"
+{
+  rtx tempreg = gen_reg_rtx (<VDBL>mode);
+  int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+  int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+  emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
+  emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
+  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
+  DONE;
+})
+
+;; For quads.
+
+(define_insn "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
+       (vec_concat:<VNARROWQ2>
+	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
+	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ {
+   if (BYTES_BIG_ENDIAN)
+     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
+   else
+     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
+ }
+  [(set_attr "type" "multiple")
+   (set_attr "length" "8")]
+)
+
+;; Widening operations.
+
+(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			       (match_operand:VQW 1 "register_operand" "w")
+			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
+			    )))]
+  "TARGET_SIMD"
+  "<su>shll %0.<Vwtype>, %1.<Vhalftype>, 0"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			       (match_operand:VQW 1 "register_operand" "w")
+			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
+			    )))]
+  "TARGET_SIMD"
+  "<su>shll2 %0.<Vwtype>, %1.<Vtype>, 0"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_expand "vec_unpack<su>_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+    emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
+							  operands[1], p));
+    DONE;
+  }
+)
+
+(define_expand "vec_unpack<su>_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+    emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
+							  operands[1], p));
+    DONE;
+  }
+)
+
+;; Widening arithmetic.
+
+(define_insn "*aarch64_<su>mlal_lo<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "*aarch64_<su>mlal_hi<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "*aarch64_<su>mlsl_lo<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "*aarch64_<su>mlsl_hi<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))))]
+  "TARGET_SIMD"
+  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "*aarch64_<su>mlal<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 1 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 2 "register_operand" "w")))
+          (match_operand:<VWIDE> 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "*aarch64_<su>mlsl<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 2 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 3 "register_operand" "w")))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			   (match_operand:VQW 1 "register_operand" "w")
+                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+		     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                           (match_operand:VQW 2 "register_operand" "w")
+                           (match_dup 3)))))]
+  "TARGET_SIMD"
+  "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_mul_<Vetype>_long")]
+)
+
+(define_expand "vec_widen_<su>mult_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+ "TARGET_SIMD"
+ {
+   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
+						       operands[1],
+						       operands[2], p));
+   DONE;
+ }
+)
+
+(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand" "w")
+			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			    (match_operand:VQW 2 "register_operand" "w")
+			    (match_dup 3)))))]
+  "TARGET_SIMD"
+  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_mul_<Vetype>_long")]
+)
+
+(define_expand "vec_widen_<su>mult_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+ "TARGET_SIMD"
+ {
+   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
+						       operands[1],
+						       operands[2], p));
+   DONE;
+
+ }
+)
+
+;; FP vector operations.
+;; AArch64 AdvSIMD supports single-precision (32-bit) and 
+;; double-precision (64-bit) floating-point data types and arithmetic as
+;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
+;; without the need for -ffast-math or -funsafe-math-optimizations.
+;;
+;; Floating-point operations can raise an exception.  Vectorizing such
+;; operations are safe because of reasons explained below.
+;;
+;; ARMv8 permits an extension to enable trapped floating-point
+;; exception handling, however this is an optional feature.  In the
+;; event of a floating-point exception being raised by vectorised
+;; code then:
+;; 1.  If trapped floating-point exceptions are available, then a trap
+;;     will be taken when any lane raises an enabled exception.  A trap
+;;     handler may determine which lane raised the exception.
+;; 2.  Alternatively a sticky exception flag is set in the
+;;     floating-point status register (FPSR).  Software may explicitly
+;;     test the exception flags, in which case the tests will either
+;;     prevent vectorisation, allowing precise identification of the
+;;     failing operation, or if tested outside of vectorisable regions
+;;     then the specific operation and lane are not of interest.
+
+;; FP arithmetic operations.
+
+(define_insn "add<mode>3"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
+		  (match_operand:VDQF 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
+)
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
+		   (match_operand:VDQF 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
+)
+
+(define_insn "mul<mode>3"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
+		  (match_operand:VDQF 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
+)
+
+(define_insn "div<mode>3"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
+		 (match_operand:VDQF 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_div_<Vetype><q>")]
+)
+
+(define_insn "neg<mode>2"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fneg\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
+)
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "fabs\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
+)
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
+                (match_operand:VDQF 2 "register_operand" "w")
+                (match_operand:VDQF 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+ "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
+)
+
+(define_insn "*aarch64_fma4_elt<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+    (fma:VDQF
+      (vec_duplicate:VDQF
+	(vec_select:<VEL>
+	  (match_operand:VDQF 1 "register_operand" "<h_con>")
+	  (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VDQF 3 "register_operand" "w")
+      (match_operand:VDQF 4 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
+  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+    (fma:VDQSF
+      (vec_duplicate:VDQSF
+	(vec_select:<VEL>
+	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
+	  (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VDQSF 3 "register_operand" "w")
+      (match_operand:VDQSF 4 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+					  INTVAL (operands[2])));
+    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_fma4_elt_to_128df"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+    (fma:V2DF
+      (vec_duplicate:V2DF
+	  (match_operand:DF 1 "register_operand" "w"))
+      (match_operand:V2DF 2 "register_operand" "w")
+      (match_operand:V2DF 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "fmla\\t%0.2d, %2.2d, %1.2d[0]"
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
+)
+
+(define_insn "*aarch64_fma4_elt_to_64v2df"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+    (fma:DF
+	(vec_select:DF
+	  (match_operand:V2DF 1 "register_operand" "w")
+	  (parallel [(match_operand:SI 2 "immediate_operand")]))
+      (match_operand:DF 3 "register_operand" "w")
+      (match_operand:DF 4 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+    return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
+  }
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
+)
+
+(define_insn "fnma<mode>4"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(fma:VDQF
+	  (match_operand:VDQF 1 "register_operand" "w")
+          (neg:VDQF
+	    (match_operand:VDQF 2 "register_operand" "w"))
+	  (match_operand:VDQF 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+ "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
+)
+
+(define_insn "*aarch64_fnma4_elt<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+    (fma:VDQF
+      (neg:VDQF
+        (match_operand:VDQF 3 "register_operand" "w"))
+      (vec_duplicate:VDQF
+	(vec_select:<VEL>
+	  (match_operand:VDQF 1 "register_operand" "<h_con>")
+	  (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VDQF 4 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
+  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+    (fma:VDQSF
+      (neg:VDQSF
+        (match_operand:VDQSF 3 "register_operand" "w"))
+      (vec_duplicate:VDQSF
+	(vec_select:<VEL>
+	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
+	  (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VDQSF 4 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+					  INTVAL (operands[2])));
+    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+  }
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
+)
+
+(define_insn "*aarch64_fnma4_elt_to_128df"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+    (fma:V2DF
+      (neg:V2DF
+        (match_operand:V2DF 2 "register_operand" "w"))
+      (vec_duplicate:V2DF
+	(match_operand:DF 1 "register_operand" "w"))
+      (match_operand:V2DF 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "fmls\\t%0.2d, %2.2d, %1.2d[0]"
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
+)
+
+(define_insn "*aarch64_fnma4_elt_to_64v2df"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+    (fma:DF
+      (vec_select:DF
+	(match_operand:V2DF 1 "register_operand" "w")
+	(parallel [(match_operand:SI 2 "immediate_operand")]))
+      (neg:DF
+        (match_operand:DF 3 "register_operand" "w"))
+      (match_operand:DF 4 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+    return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
+  }
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
+)
+
+;; Vector versions of the floating-point frint patterns.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+(define_insn "<frint_pattern><mode>2"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
+		      FRINT))]
+  "TARGET_SIMD"
+  "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_round_<Vetype><q>")]
+)
+
+;; Vector versions of the fcvt standard patterns.
+;; Expands to lbtrunc, lround, lceil, lfloor
+(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
+  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
+	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
+			       [(match_operand:VDQF 1 "register_operand" "w")]
+			       FCVT)))]
+  "TARGET_SIMD"
+  "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
+)
+
+(define_expand "<optab><VDQF:mode><fcvt_target>2"
+  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
+	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
+			       [(match_operand:VDQF 1 "register_operand")]
+			       UNSPEC_FRINTZ)))]
+  "TARGET_SIMD"
+  {})
+
+(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
+  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
+	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
+			       [(match_operand:VDQF 1 "register_operand")]
+			       UNSPEC_FRINTZ)))]
+  "TARGET_SIMD"
+  {})
+
+(define_expand "ftrunc<VDQF:mode>2"
+  [(set (match_operand:VDQF 0 "register_operand")
+	(unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
+		      UNSPEC_FRINTZ))]
+  "TARGET_SIMD"
+  {})
+
+(define_insn "<optab><fcvt_target><VDQF:mode>2"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(FLOATUORS:VDQF
+	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
+)
+
+;; Conversions between vectors of floats and doubles.
+;; Contains a mix of patterns to match standard pattern names
+;; and those for intrinsics.
+
+;; Float widening operations.
+
+(define_insn "vec_unpacks_lo_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "w")
+	    (parallel [(const_int 0) (const_int 1)])
+	  )))]
+  "TARGET_SIMD"
+  "fcvtl\\t%0.2d, %1.2s"
+  [(set_attr "type" "neon_fp_cvt_widen_s")]
+)
+
+(define_insn "aarch64_float_extend_lo_v2df"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+	(float_extend:V2DF
+	  (match_operand:V2SF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fcvtl\\t%0.2d, %1.2s"
+  [(set_attr "type" "neon_fp_cvt_widen_s")]
+)
+
+(define_insn "vec_unpacks_hi_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "w")
+	    (parallel [(const_int 2) (const_int 3)])
+	  )))]
+  "TARGET_SIMD"
+  "fcvtl2\\t%0.2d, %1.4s"
+  [(set_attr "type" "neon_fp_cvt_widen_s")]
+)
+
+;; Float narrowing operations.
+
+(define_insn "aarch64_float_truncate_lo_v2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=w")
+      (float_truncate:V2SF
+	(match_operand:V2DF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fcvtn\\t%0.2s, %1.2d"
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_insn "aarch64_float_truncate_hi_v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+    (vec_concat:V4SF
+      (match_operand:V2SF 1 "register_operand" "0")
+      (float_truncate:V2SF
+	(match_operand:V2DF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fcvtn2\\t%0.4s, %2.2d"
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_expand "vec_pack_trunc_v2df"
+  [(set (match_operand:V4SF 0 "register_operand")
+      (vec_concat:V4SF
+	(float_truncate:V2SF
+	    (match_operand:V2DF 1 "register_operand"))
+	(float_truncate:V2SF
+	    (match_operand:V2DF 2 "register_operand"))
+	  ))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (V2SFmode);
+    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
+    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
+						   tmp, operands[hi]));
+    DONE;
+  }
+)
+
+(define_expand "vec_pack_trunc_df"
+  [(set (match_operand:V2SF 0 "register_operand")
+      (vec_concat:V2SF
+	(float_truncate:SF
+	    (match_operand:DF 1 "register_operand"))
+	(float_truncate:SF
+	    (match_operand:DF 2 "register_operand"))
+	  ))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (V2SFmode);
+    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+    emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
+    emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
+    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_vmls<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
+		   (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
+			      (match_operand:VDQF 3 "register_operand" "w"))))]
+  "TARGET_SIMD"
+ "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
+)
+
+;; FP Max/Min
+;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
+;; expression like:
+;;      a = (b < c) ? b : c;
+;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
+;; either explicitly or indirectly via -ffast-math.
+;;
+;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
+;; The 'smax' and 'smin' RTL standard pattern names do not specify which
+;; operand will be returned when both operands are zero (i.e. they may not
+;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
+;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
+;; NaNs.
+
+(define_insn "<su><maxmin><mode>3"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+        (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
+		   (match_operand:VDQF 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
+)
+
+(define_insn "<maxmin_uns><mode>3"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
+		     (match_operand:VDQF 2 "register_operand" "w")]
+		    FMAXMIN_UNS))]
+  "TARGET_SIMD"
+  "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
+)
+
+;; 'across lanes' add.
+
+(define_insn "reduc_<sur>plus_<mode>"
+ [(set (match_operand:VDQV 0 "register_operand" "=w")
+       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
+		    SUADDV))]
+ "TARGET_SIMD"
+ "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+(define_insn "reduc_<sur>plus_v2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=w")
+       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
+		    SUADDV))]
+ "TARGET_SIMD"
+ "addp\\t%0.2s, %1.2s, %1.2s"
+  [(set_attr "type" "neon_reduc_add")]
+)
+
+(define_insn "reduc_splus_<mode>"
+ [(set (match_operand:V2F 0 "register_operand" "=w")
+       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
+		   UNSPEC_FADDV))]
+ "TARGET_SIMD"
+ "faddp\\t%<Vetype>0, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
+)
+
+(define_insn "aarch64_addpv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
+		    UNSPEC_FADDV))]
+ "TARGET_SIMD"
+ "faddp\\t%0.4s, %1.4s, %1.4s"
+  [(set_attr "type" "neon_fp_reduc_add_s_q")]
+)
+
+(define_expand "reduc_splus_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
+		    UNSPEC_FADDV))]
+ "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1]));
+  emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0]));
+  DONE;
+})
+
+(define_insn "clz<mode>2"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "clz\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_cls<q>")]
+)
+
+;; 'across lanes' max and min ops.
+
+(define_insn "reduc_<maxmin_uns>_<mode>"
+ [(set (match_operand:VDQV_S 0 "register_operand" "=w")
+       (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
+		    MAXMINV))]
+ "TARGET_SIMD"
+ "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
+  [(set_attr "type" "neon_reduc_minmax<q>")]
+)
+
+(define_insn "reduc_<maxmin_uns>_v2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=w")
+       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
+		    MAXMINV))]
+ "TARGET_SIMD"
+ "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
+  [(set_attr "type" "neon_reduc_minmax")]
+)
+
+(define_insn "reduc_<maxmin_uns>_<mode>"
+ [(set (match_operand:V2F 0 "register_operand" "=w")
+       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
+		    FMAXMINV))]
+ "TARGET_SIMD"
+ "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
+)
+
+(define_insn "reduc_<maxmin_uns>_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
+		    FMAXMINV))]
+ "TARGET_SIMD"
+ "<maxmin_uns_op>v\\t%s0, %1.4s"
+  [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
+)
+
+;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
+;; allocation.
+;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
+;; to select.
+;;
+;; Thus our BSL is of the form:
+;;   op0 = bsl (mask, op2, op3)
+;; We can use any of:
+;;
+;;   if (op0 = mask)
+;;     bsl mask, op1, op2
+;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
+;;     bit op0, op2, mask
+;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
+;;     bif op0, op1, mask
+
+(define_insn "aarch64_simd_bsl<mode>_internal"
+  [(set (match_operand:VALLDIF 0 "register_operand"		"=w,w,w")
+	(ior:VALLDIF
+	   (and:VALLDIF
+	     (match_operand:<V_cmp_result> 1 "register_operand"	" 0,w,w")
+	     (match_operand:VALLDIF 2 "register_operand"	" w,w,0"))
+	   (and:VALLDIF
+	     (not:<V_cmp_result>
+		(match_dup:<V_cmp_result> 1))
+	     (match_operand:VALLDIF 3 "register_operand"	" w,0,w"))
+	))]
+  "TARGET_SIMD"
+  "@
+  bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
+  bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
+  bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_bsl<q>")]
+)
+
+(define_expand "aarch64_simd_bsl<mode>"
+  [(match_operand:VALLDIF 0 "register_operand")
+   (match_operand:<V_cmp_result> 1 "register_operand")
+   (match_operand:VALLDIF 2 "register_operand")
+   (match_operand:VALLDIF 3 "register_operand")]
+ "TARGET_SIMD"
+{
+  /* We can't alias operands together if they have different modes.  */
+  operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
+  emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
+						  operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_vcond_internal<mode><mode>"
+  [(set (match_operand:VDQ 0 "register_operand")
+	(if_then_else:VDQ
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VDQ 4 "register_operand")
+	     (match_operand:VDQ 5 "nonmemory_operand")])
+	  (match_operand:VDQ 1 "nonmemory_operand")
+	  (match_operand:VDQ 2 "nonmemory_operand")))]
+  "TARGET_SIMD"
+{
+  int inverse = 0, has_zero_imm_form = 0;
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx mask = gen_reg_rtx (<MODE>mode);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LE:
+    case LT:
+    case NE:
+      inverse = 1;
+      /* Fall through.  */
+    case GE:
+    case GT:
+    case EQ:
+      has_zero_imm_form = 1;
+      break;
+    case LEU:
+    case LTU:
+      inverse = 1;
+      break;
+    default:
+      break;
+    }
+
+  if (!REG_P (operands[5])
+      && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
+    operands[5] = force_reg (<MODE>mode, operands[5]);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case GE:
+      emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
+      break;
+
+    case LE:
+    case GT:
+      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
+      break;
+
+    case LTU:
+    case GEU:
+      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
+      break;
+
+    case LEU:
+    case GTU:
+      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
+      break;
+
+    case NE:
+    case EQ:
+      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (inverse)
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+
+    /* If we have (a = (b CMP c) ? -1 : 0);
+       Then we can simply move the generated mask.  */
+
+    if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
+	&& op2 == CONST0_RTX (<V_cmp_result>mode))
+      emit_move_insn (operands[0], mask);
+    else
+      {
+	if (!REG_P (op1))
+	  op1 = force_reg (<MODE>mode, op1);
+	if (!REG_P (op2))
+	  op2 = force_reg (<MODE>mode, op2);
+	emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
+					       op1, op2));
+      }
+
+  DONE;
+})
+
+(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
+  [(set (match_operand:VDQF_COND 0 "register_operand")
+	(if_then_else:VDQF
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VDQF 4 "register_operand")
+	     (match_operand:VDQF 5 "nonmemory_operand")])
+	  (match_operand:VDQF_COND 1 "nonmemory_operand")
+	  (match_operand:VDQF_COND 2 "nonmemory_operand")))]
+  "TARGET_SIMD"
+{
+  int inverse = 0;
+  int use_zero_form = 0;
+  int swap_bsl_operands = 0;
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
+  rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
+
+  rtx (*base_comparison) (rtx, rtx, rtx);
+  rtx (*complimentary_comparison) (rtx, rtx, rtx);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case GE:
+    case GT:
+    case LE:
+    case LT:
+    case EQ:
+      if (operands[5] == CONST0_RTX (<MODE>mode))
+	{
+	  use_zero_form = 1;
+	  break;
+	}
+      /* Fall through.  */
+    default:
+      if (!REG_P (operands[5]))
+	operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
+    }
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case UNLT:
+      inverse = 1;
+      /* Fall through.  */
+    case GE:
+    case UNGE:
+    case ORDERED:
+    case UNORDERED:
+      base_comparison = gen_aarch64_cmge<VDQF:mode>;
+      complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
+      break;
+    case LE:
+    case UNLE:
+      inverse = 1;
+      /* Fall through.  */
+    case GT:
+    case UNGT:
+      base_comparison = gen_aarch64_cmgt<VDQF:mode>;
+      complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
+      break;
+    case EQ:
+    case NE:
+    case UNEQ:
+      base_comparison = gen_aarch64_cmeq<VDQF:mode>;
+      complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case LE:
+    case GT:
+    case GE:
+    case EQ:
+      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
+	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
+	 a GE b -> a GE b
+	 a GT b -> a GT b
+	 a LE b -> b GE a
+	 a LT b -> b GT a
+	 a EQ b -> a EQ b
+	 Note that there also exist direct comparison against 0 forms,
+	 so catch those as a special case.  */
+      if (use_zero_form)
+	{
+	  inverse = 0;
+	  switch (GET_CODE (operands[3]))
+	    {
+	    case LT:
+	      base_comparison = gen_aarch64_cmlt<VDQF:mode>;
+	      break;
+	    case LE:
+	      base_comparison = gen_aarch64_cmle<VDQF:mode>;
+	      break;
+	    default:
+	      /* Do nothing, other zero form cases already have the correct
+		 base_comparison.  */
+	      break;
+	    }
+	}
+
+      if (!inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5]));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
+      break;
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+    case NE:
+      /* FCM returns false for lanes which are unordered, so if we use
+	 the inverse of the comparison we actually want to emit, then
+	 swap the operands to BSL, we will end up with the correct result.
+	 Note that a NE NaN and NaN NE b are true for all a, b.
+
+	 Our transformations are:
+	 a GE b -> !(b GT a)
+	 a GT b -> !(b GE a)
+	 a LE b -> !(a GT b)
+	 a LT b -> !(a GE b)
+	 a NE b -> !(a EQ b)  */
+
+      if (inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5]));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
+
+      swap_bsl_operands = 1;
+      break;
+    case UNEQ:
+      /* We check (a > b ||  b > a).  combining these comparisons give us
+	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
+	 will then give us (a == b ||  a UNORDERED b) as intended.  */
+
+      emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
+      emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
+      swap_bsl_operands = 1;
+      break;
+    case UNORDERED:
+       /* Operands are ORDERED iff (a > b || b >= a).
+	 Swapping the operands to BSL will give the UNORDERED case.  */
+     swap_bsl_operands = 1;
+     /* Fall through.  */
+    case ORDERED:
+      emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
+      emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (swap_bsl_operands)
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+
+    /* If we have (a = (b CMP c) ? -1 : 0);
+       Then we can simply move the generated mask.  */
+
+    if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
+	&& op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
+      emit_move_insn (operands[0], mask);
+    else
+      {
+	if (!REG_P (op1))
+	  op1 = force_reg (<VDQF_COND:MODE>mode, op1);
+	if (!REG_P (op2))
+	  op2 = force_reg (<VDQF_COND:MODE>mode, op2);
+	emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
+					       op1, op2));
+      }
+
+  DONE;
+})
+
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VALL 0 "register_operand")
+	(if_then_else:VALL
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VALL 4 "register_operand")
+	     (match_operand:VALL 5 "nonmemory_operand")])
+	  (match_operand:VALL 1 "nonmemory_operand")
+	  (match_operand:VALL 2 "nonmemory_operand")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
+					       operands[2], operands[3],
+					       operands[4], operands[5]));
+  DONE;
+})
+
+(define_expand "vcond<v_cmp_result><mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand")
+	(if_then_else:<V_cmp_result>
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VDQF 4 "register_operand")
+	     (match_operand:VDQF 5 "nonmemory_operand")])
+	  (match_operand:<V_cmp_result> 1 "nonmemory_operand")
+	  (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
+						operands[0], operands[1],
+						operands[2], operands[3],
+						operands[4], operands[5]));
+  DONE;
+})
+
+(define_expand "vcondu<mode><mode>"
+  [(set (match_operand:VDQ 0 "register_operand")
+	(if_then_else:VDQ
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VDQ 4 "register_operand")
+	     (match_operand:VDQ 5 "nonmemory_operand")])
+	  (match_operand:VDQ 1 "nonmemory_operand")
+	  (match_operand:VDQ 2 "nonmemory_operand")))]
+  "TARGET_SIMD"
+{
+  emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
+					       operands[2], operands[3],
+					       operands[4], operands[5]));
+  DONE;
+})
+
+;; Patterns for AArch64 SIMD Intrinsics.
+
+(define_expand "aarch64_create<mode>"
+  [(match_operand:VD_RE 0 "register_operand" "")
+   (match_operand:DI 1 "general_operand" "")]
+  "TARGET_SIMD"
+{
+  rtx src = gen_lowpart (<MODE>mode, operands[1]);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+;; Lane extraction with sign extension to general purpose register.
+(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(sign_extend:GPI
+	  (vec_select:<VEL>
+	    (match_operand:VDQQH 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_to_gp<q>")]
+)
+
+(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	  (vec_select:<VEL>
+	    (match_operand:VDQQH 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "umov\\t%w0, %1.<Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_to_gp<q>")]
+)
+
+(define_expand "aarch64_be_checked_get_lane<mode>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VALL 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    emit_insn (gen_aarch64_get_lane<mode> (operands[0],
+					   operands[1],
+					   operands[2]));
+    DONE;
+  }
+)
+
+;; Lane extraction of a value, neither sign nor zero extension
+;; is guaranteed so upper bits should be considered undefined.
+(define_insn "aarch64_get_lane<mode>"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+	(vec_select:<VEL>
+	  (match_operand:VALL 1 "register_operand" "w, w, w")
+	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
+  "TARGET_SIMD"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    switch (which_alternative)
+      {
+	case 0:
+	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
+	case 1:
+	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
+	case 2:
+	  return "st1\\t{%1.<Vetype>}[%2], %0";
+	default:
+	  gcc_unreachable ();
+      }
+  }
+  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
+)
+
+(define_expand "aarch64_get_lanedi"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[2], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv8qi<mode>"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:VDC 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv4hi<mode>"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:VDC 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv2si<mode>"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:VDC 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv2sf<mode>"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:VDC 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretdi<mode>"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:VD_RE 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv16qi<mode>"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:VQ 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv8hi<mode>"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:VQ 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv4si<mode>"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:VQ 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv4sf<mode>"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:VQ 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv2di<mode>"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:VQ 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "aarch64_reinterpretv2df<mode>"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:VQ 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+;; In this insn, operand 1 should be low, and operand 2 the high part of the
+;; dest vector.
+
+(define_insn "*aarch64_combinez<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+        (vec_concat:<VDBL>
+	   (match_operand:VDIC 1 "register_operand" "w")
+	   (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
+  "TARGET_SIMD"
+  "mov\\t%0.8b, %1.8b"
+  [(set_attr "type" "neon_move<q>")]
+)
+
+(define_insn_and_split "aarch64_combine<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+			   (match_operand:VDC 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+  DONE;
+}
+[(set_attr "type" "multiple")]
+)
+
+(define_expand "aarch64_simd_combine<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+  (match_operand:VDC 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  {
+    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
+    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
+    DONE;
+  }
+[(set_attr "type" "multiple")]
+)
+
+;; <su><addsub>l<q>.
+
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			   (match_operand:VQW 1 "register_operand" "w")
+			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+		       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			   (match_operand:VQW 2 "register_operand" "w")
+			   (match_dup 3)))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                           (match_operand:VQW 1 "register_operand" "w")
+                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+                       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                           (match_operand:VQW 2 "register_operand" "w")
+                           (match_dup 3)))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
+)
+
+
+(define_expand "aarch64_saddl2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQW 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
+                                                  operands[2], p));
+  DONE;
+})
+
+(define_expand "aarch64_uaddl2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQW 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
+                                                  operands[2], p));
+  DONE;
+})
+
+(define_expand "aarch64_ssubl2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQW 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
+						operands[2], p));
+  DONE;
+})
+
+(define_expand "aarch64_usubl2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQW 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
+						operands[2], p));
+  DONE;
+})
+
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
+			   (match_operand:VDW 1 "register_operand" "w"))
+		       (ANY_EXTEND:<VWIDE>
+			   (match_operand:VDW 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>l %0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
+)
+
+;; <su><addsub>w<q>.
+
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+			(ANY_EXTEND:<VWIDE>
+			  (match_operand:VDW 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+			(ANY_EXTEND:<VWIDE>
+			  (vec_select:<VHALF>
+			   (match_operand:VQW 2 "register_operand" "w")
+			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+)
+
+(define_expand "aarch64_saddw2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
+						operands[2], p));
+  DONE;
+})
+
+(define_expand "aarch64_uaddw2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
+						operands[2], p));
+  DONE;
+})
+
+
+(define_expand "aarch64_ssubw2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
+						operands[2], p));
+  DONE;
+})
+
+(define_expand "aarch64_usubw2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQW 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
+						operands[2], p));
+  DONE;
+})
+
+;; <su><r>h<addsub>.
+
+(define_insn "aarch64_<sur>h<addsub><mode>"
+  [(set (match_operand:VQ_S 0 "register_operand" "=w")
+        (unspec:VQ_S [(match_operand:VQ_S 1 "register_operand" "w")
+		      (match_operand:VQ_S 2 "register_operand" "w")]
+		     HADDSUB))]
+  "TARGET_SIMD"
+  "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<addsub>_halve<q>")]
+)
+
+;; <r><addsub>hn<q>.
+
+(define_insn "aarch64_<sur><addsub>hn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+			    (match_operand:VQN 2 "register_operand" "w")]
+                           ADDSUBHN))]
+  "TARGET_SIMD"
+  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
+)
+
+(define_insn "aarch64_<sur><addsub>hn2<mode>"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
+			     (match_operand:VQN 2 "register_operand" "w")
+			     (match_operand:VQN 3 "register_operand" "w")]
+                            ADDSUBHN2))]
+  "TARGET_SIMD"
+  "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
+)
+
+;; pmul.
+
+(define_insn "aarch64_pmul<mode>"
+  [(set (match_operand:VB 0 "register_operand" "=w")
+        (unspec:VB [(match_operand:VB 1 "register_operand" "w")
+		    (match_operand:VB 2 "register_operand" "w")]
+		   UNSPEC_PMUL))]
+ "TARGET_SIMD"
+ "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_mul_<Vetype><q>")]
+)
+
+;; <su>q<addsub>
+
+(define_insn "aarch64_<su_optab><optab><mode>"
+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
+			  (match_operand:VSDQ_I 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_<optab><q>")]
+)
+
+;; suqadd and usqadd
+
+(define_insn "aarch64_<sur>qadd<mode>"
+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
+			(match_operand:VSDQ_I 2 "register_operand" "w")]
+		       USSUQADD))]
+  "TARGET_SIMD"
+  "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_qadd<q>")]
+)
+
+;; sqmovun
+
+(define_insn "aarch64_sqmovun<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
+                            UNSPEC_SQXTUN))]
+   "TARGET_SIMD"
+   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+ )
+
+;; sqmovn and uqmovn
+
+(define_insn "aarch64_<sur>qmovn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
+                            SUQMOVN))]
+  "TARGET_SIMD"
+  "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+ )
+
+;; <su>q<absneg>
+
+(define_insn "aarch64_s<optab><mode>"
+  [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
+	(UNQOPS:VSDQ_I_BHSI
+	  (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_<optab><q>")]
+)
+
+;; sq<r>dmulh.
+
+(define_insn "aarch64_sq<r>dmulh<mode>"
+  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
+	(unspec:VSDQ_HSI
+	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
+	   (match_operand:VSDQ_HSI 2 "register_operand" "w")]
+	 VQDMULH))]
+  "TARGET_SIMD"
+  "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
+)
+
+;; sq<r>dmulh_lane
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+        (unspec:VDQHS
+	  [(match_operand:VDQHS 1 "register_operand" "w")
+           (vec_select:<VEL>
+             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+	 VQDMULH))]
+  "TARGET_SIMD"
+  "*
+   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
+)
+
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+        (unspec:VDQHS
+	  [(match_operand:VDQHS 1 "register_operand" "w")
+           (vec_select:<VEL>
+             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+	 VQDMULH))]
+  "TARGET_SIMD"
+  "*
+   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+        (unspec:SD_HSI
+	  [(match_operand:SD_HSI 1 "register_operand" "w")
+           (vec_select:<VEL>
+             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+	 VQDMULH))]
+  "TARGET_SIMD"
+  "*
+   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
+)
+
+;; vqdml[sa]l
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+		      (match_operand:VSD_HSI 2 "register_operand" "w"))
+		(sign_extend:<VWIDE>
+		      (match_operand:VSD_HSI 3 "register_operand" "w")))
+	      (const_int 1))))]
+  "TARGET_SIMD"
+  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
+)
+
+;; vqdml[sa]l_lane
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:VD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_duplicate:VD_HSI
+		  (vec_select:<VEL>
+		    (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              ))
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+    return
+      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:SD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_select:<VEL>
+		  (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              )
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+    return
+      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmlal_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "0")
+   (match_operand:VSD_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+  emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
+						      operands[2], operands[3],
+						      operands[4]));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlal_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "0")
+   (match_operand:VSD_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
+  emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
+						      operands[2], operands[3],
+						      operands[4]));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlsl_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "0")
+   (match_operand:VSD_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+  emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
+						      operands[2], operands[3],
+						      operands[4]));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlsl_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "0")
+   (match_operand:VSD_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
+  emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
+						      operands[2], operands[3],
+						      operands[4]));
+  DONE;
+})
+
+;; vqdml[sa]l_n
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+		      (match_operand:VD_HSI 2 "register_operand" "w"))
+		(sign_extend:<VWIDE>
+		  (vec_duplicate:VD_HSI
+		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
+	      (const_int 1))))]
+  "TARGET_SIMD"
+  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+;; sqdml[as]l2
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+         (match_operand:<VWIDE> 1 "register_operand" "0")
+         (ss_ashift:<VWIDE>
+             (mult:<VWIDE>
+               (sign_extend:<VWIDE>
+                 (vec_select:<VHALF>
+                     (match_operand:VQ_HSI 2 "register_operand" "w")
+                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+               (sign_extend:<VWIDE>
+                 (vec_select:<VHALF>
+                     (match_operand:VQ_HSI 3 "register_operand" "w")
+                     (match_dup 4))))
+             (const_int 1))))]
+  "TARGET_SIMD"
+  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmlal2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:VQ_HSI 3 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
+						  operands[2], operands[3], p));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlsl2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:VQ_HSI 3 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
+						  operands[2], operands[3], p));
+  DONE;
+})
+
+;; vqdml[sa]l2_lane
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+                  (vec_select:<VHALF>
+                    (match_operand:VQ_HSI 2 "register_operand" "w")
+                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(sign_extend:<VWIDE>
+                  (vec_duplicate:<VHALF>
+		    (vec_select:<VEL>
+		      (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+		    ))))
+	      (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+    return
+     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmlal2_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
+						       operands[2], operands[3],
+						       operands[4], p));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlal2_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
+						       operands[2], operands[3],
+						       operands[4], p));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlsl2_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
+						       operands[2], operands[3],
+						       operands[4], p));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlsl2_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
+						       operands[2], operands[3],
+						       operands[4], p));
+  DONE;
+})
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+                (vec_select:<VHALF>
+                  (match_operand:VQ_HSI 2 "register_operand" "w")
+                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	      (sign_extend:<VWIDE>
+                (vec_duplicate:<VHALF>
+		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmlal2_n<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:<VEL> 3 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
+						    operands[2], operands[3],
+						    p));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmlsl2_n<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:<VWIDE> 1 "register_operand" "w")
+   (match_operand:VQ_HSI 2 "register_operand" "w")
+   (match_operand:<VEL> 3 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
+						    operands[2], operands[3],
+						    p));
+  DONE;
+})
+
+;; vqdmull
+
+(define_insn "aarch64_sqdmull<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		     (match_operand:VSD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+		     (match_operand:VSD_HSI 2 "register_operand" "w")))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
+)
+
+;; vqdmull_lane
+
+(define_insn "aarch64_sqdmull_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:VD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:VD_HSI
+                   (vec_select:<VEL>
+		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:SD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_select:<VEL>
+		   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmull_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VSD_HSI 1 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+  emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
+						      operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmull_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VD_HSI 1 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
+  emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
+	       (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+;; vqdmull_n
+
+(define_insn "aarch64_sqdmull_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:VD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:VD_HSI
+                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+	       )
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+;; vqdmull2
+
+
+
+(define_insn "aarch64_sqdmull2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 1 "register_operand" "w")
+                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	       (sign_extend:<VWIDE>
+		 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 2 "register_operand" "w")
+                   (match_dup 3)))
+	       )
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmull2<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQ_HSI 1 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
+;; vqdmull2_lane
+
+(define_insn "aarch64_sqdmull2_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 1 "register_operand" "w")
+                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:<VHALF>
+                   (vec_select:<VEL>
+		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmull2_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQ_HSI 1 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
+						       operands[2], operands[3],
+						       p));
+  DONE;
+})
+
+(define_expand "aarch64_sqdmull2_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQ_HSI 1 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
+						       operands[2], operands[3],
+						       p));
+  DONE;
+})
+
+;; vqdmull2_n
+
+(define_insn "aarch64_sqdmull2_n<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 1 "register_operand" "w")
+                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:<VHALF>
+                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+	       )
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_sqdmull2_n<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand" "=w")
+   (match_operand:VQ_HSI 1 "register_operand" "w")
+   (match_operand:<VEL> 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+  emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
+						    operands[2], p));
+  DONE;
+})
+
+;; vshl
+
+(define_insn "aarch64_<sur>shl<mode>"
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
+        (unspec:VSDQ_I_DI
+	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
+           (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
+         VSHL))]
+  "TARGET_SIMD"
+  "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+
+;; vqshl
+
+(define_insn "aarch64_<sur>q<r>shl<mode>"
+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+        (unspec:VSDQ_I
+	  [(match_operand:VSDQ_I 1 "register_operand" "w")
+           (match_operand:VSDQ_I 2 "register_operand" "w")]
+         VQSHL))]
+  "TARGET_SIMD"
+  "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
+  [(set_attr "type" "neon_sat_shift_reg<q>")]
+)
+
+;; vshll_n
+
+(define_insn "aarch64_<sur>shll_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(match_operand:VDW 1 "register_operand" "w")
+			 (match_operand:SI 2 "immediate_operand" "i")]
+                         VSHLL))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
+  if (INTVAL (operands[2]) == bit_width)
+  {
+    return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
+  }
+  else {
+    return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
+  }"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+;; vshll_high_n
+
+(define_insn "aarch64_<sur>shll2_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
+			 (match_operand:SI 2 "immediate_operand" "i")]
+                         VSHLL))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
+  if (INTVAL (operands[2]) == bit_width)
+  {
+    return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
+  }
+  else {
+    return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
+  }"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+;; vrshr_n
+
+(define_insn "aarch64_<sur>shr_n<mode>"
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
+        (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+			  VRSHR_N))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
+  return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
+)
+
+;; v(r)sra_n
+
+(define_insn "aarch64_<sur>sra_n<mode>"
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
+	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
+		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      VSRA))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[3], 1, bit_width + 1);
+  return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
+  [(set_attr "type" "neon_shift_acc<q>")]
+)
+
+;; vs<lr>i_n
+
+(define_insn "aarch64_<sur>s<lr>i_n<mode>"
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
+	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
+		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      VSLRI))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>,
+                             bit_width - <VSLRI:offsetlr> + 1);
+  return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+;; vqshl(u)
+
+(define_insn "aarch64_<sur>qshl<u>_n<mode>"
+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+                      VQSHL_N))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[2], 0, bit_width);
+  return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
+)
+
+
+;; vq(r)shr(u)n_n
+
+(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+			   VQSHRN_N))]
+  "TARGET_SIMD"
+  "*
+  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+  aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
+  return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+
+;; cm(eq|ge|gt|lt|le)
+;; Note, we have constraints for Dz and Z as different expanders
+;; have different ideas of what should be passed to this pattern.
+
+(define_insn "aarch64_cm<optab><mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
+	(neg:<V_cmp_result>
+	  (COMPARISONS:<V_cmp_result>
+	    (match_operand:VDQ 1 "register_operand" "w,w")
+	    (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
+	  )))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
+  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
+)
+
+(define_insn_and_split "aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
+	(neg:DI
+	  (COMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,w,r")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
+	  )))
+     (clobber (reg:CC CC_REGNUM))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  cm<optab>\t%d0, %d1, #0
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(const_int 0)]
+  {
+    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
+)
+
+;; cm(hs|hi)
+
+(define_insn "aarch64_cm<optab><mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+	(neg:<V_cmp_result>
+	  (UCOMPARISONS:<V_cmp_result>
+	    (match_operand:VDQ 1 "register_operand" "w")
+	    (match_operand:VDQ 2 "register_operand" "w")
+	  )))]
+  "TARGET_SIMD"
+  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
+  [(set_attr "type" "neon_compare<q>")]
+)
+
+(define_insn_and_split "aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,r")
+	(neg:DI
+	  (UCOMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,r")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
+	  )))
+    (clobber (reg:CC CC_REGNUM))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(const_int 0)]
+  {
+    enum machine_mode mode = CCmode;
+    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "type" "neon_compare, neon_compare_zero")]
+)
+
+;; cmtst
+
+(define_insn "aarch64_cmtst<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+	(neg:<V_cmp_result>
+	  (ne:<V_cmp_result>
+	    (and:VDQ
+	      (match_operand:VDQ 1 "register_operand" "w")
+	      (match_operand:VDQ 2 "register_operand" "w"))
+	    (vec_duplicate:<V_cmp_result> (const_int 0)))))]
+  "TARGET_SIMD"
+  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_tst<q>")]
+)
+
+(define_insn_and_split "aarch64_cmtstdi"
+  [(set (match_operand:DI 0 "register_operand" "=w,r")
+	(neg:DI
+	  (ne:DI
+	    (and:DI
+	      (match_operand:DI 1 "register_operand" "w,r")
+	      (match_operand:DI 2 "register_operand" "w,r"))
+	    (const_int 0))))
+    (clobber (reg:CC CC_REGNUM))]
+  "TARGET_SIMD"
+  "@
+  cmtst\t%d0, %d1, %d2
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(const_int 0)]
+  {
+    rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
+    enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
+    rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
+    rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "type" "neon_tst")]
+)
+
+;; fcm(eq|ge|gt|le|lt)
+
+(define_insn "aarch64_cm<optab><mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
+	(neg:<V_cmp_result>
+	  (COMPARISONS:<V_cmp_result>
+	    (match_operand:VALLF 1 "register_operand" "w,w")
+	    (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+	  )))]
+  "TARGET_SIMD"
+  "@
+  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
+  [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
+)
+
+;; fac(ge|gt)
+;; Note we can also handle what would be fac(le|lt) by
+;; generating fac(ge|gt).
+
+(define_insn "*aarch64_fac<optab><mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+	(neg:<V_cmp_result>
+	  (FAC_COMPARISONS:<V_cmp_result>
+	    (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
+	    (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
+  )))]
+  "TARGET_SIMD"
+  "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
+  [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
+)
+
+;; addp
+
+(define_insn "aarch64_addp<mode>"
+  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
+        (unspec:VD_BHSI
+          [(match_operand:VD_BHSI 1 "register_operand" "w")
+	   (match_operand:VD_BHSI 2 "register_operand" "w")]
+          UNSPEC_ADDP))]
+  "TARGET_SIMD"
+  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+(define_insn "aarch64_addpdi"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI
+          [(match_operand:V2DI 1 "register_operand" "w")]
+          UNSPEC_ADDP))]
+  "TARGET_SIMD"
+  "addp\t%d0, %1.2d"
+  [(set_attr "type" "neon_reduc_add")]
+)
+
+;; sqrt
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+        (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
+)
+
+;; Patterns for vector struct loads and stores.
+
+(define_insn "vec_load_lanesoi<mode>"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD2))]
+  "TARGET_SIMD"
+  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load2_2reg<q>")]
+)
+
+(define_insn "vec_store_lanesoi<mode>"
+  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST2))]
+  "TARGET_SIMD"
+  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store2_2reg<q>")]
+)
+
+(define_insn "vec_load_lanesci<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD3))]
+  "TARGET_SIMD"
+  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
+
+(define_insn "vec_store_lanesci<mode>"
+  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST3))]
+  "TARGET_SIMD"
+  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store3_3reg<q>")]
+)
+
+(define_insn "vec_load_lanesxi<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD4))]
+  "TARGET_SIMD"
+  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
+
+(define_insn "vec_store_lanesxi<mode>"
+  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST4))]
+  "TARGET_SIMD"
+  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store4_4reg<q>")]
+)
+
+;; Reload patterns for AdvSIMD register list operands.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
+	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
+  "TARGET_SIMD"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
+})
+
+(define_insn "*aarch64_mov<mode>"
+  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
+	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand"	" w,w,Utv"))]
+  "TARGET_SIMD
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+
+{
+  switch (which_alternative)
+    {
+    case 0: return "#";
+    case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
+    case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
+                     neon_load<nregs>_<nregs>reg_q")
+   (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
+)
+
+(define_insn "aarch64_be_ld1<mode>"
+  [(set (match_operand:VALLDI 0	"register_operand" "=w")
+	(unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
+	UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%0<Vmtype>}, %1"
+  [(set_attr "type" "neon_load1_1reg<q>")]
+)
+
+(define_insn "aarch64_be_st1<mode>"
+  [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
+	UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%1<Vmtype>}, %0"
+  [(set_attr "type" "neon_store1_1reg<q>")]
+)
+
+(define_split
+  [(set (match_operand:OI 0 "register_operand" "")
+	(match_operand:OI 1 "register_operand" ""))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[2], src[2];
+
+  dest[0] = gen_rtx_REG (TFmode, rdest);
+  src[0] = gen_rtx_REG (TFmode, rsrc);
+  dest[1] = gen_rtx_REG (TFmode, rdest + 1);
+  src[1] = gen_rtx_REG (TFmode, rsrc + 1);
+
+  aarch64_simd_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+  [(set (match_operand:CI 0 "register_operand" "")
+	(match_operand:CI 1 "register_operand" ""))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[3], src[3];
+
+  dest[0] = gen_rtx_REG (TFmode, rdest);
+  src[0] = gen_rtx_REG (TFmode, rsrc);
+  dest[1] = gen_rtx_REG (TFmode, rdest + 1);
+  src[1] = gen_rtx_REG (TFmode, rsrc + 1);
+  dest[2] = gen_rtx_REG (TFmode, rdest + 2);
+  src[2] = gen_rtx_REG (TFmode, rsrc + 2);
+
+  aarch64_simd_disambiguate_copy (operands, dest, src, 3);
+})
+
+(define_split
+  [(set (match_operand:XI 0 "register_operand" "")
+	(match_operand:XI 1 "register_operand" ""))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[4], src[4];
+
+  dest[0] = gen_rtx_REG (TFmode, rdest);
+  src[0] = gen_rtx_REG (TFmode, rsrc);
+  dest[1] = gen_rtx_REG (TFmode, rdest + 1);
+  src[1] = gen_rtx_REG (TFmode, rsrc + 1);
+  dest[2] = gen_rtx_REG (TFmode, rdest + 2);
+  src[2] = gen_rtx_REG (TFmode, rsrc + 2);
+  dest[3] = gen_rtx_REG (TFmode, rdest + 3);
+  src[3] = gen_rtx_REG (TFmode, rsrc + 3);
+
+  aarch64_simd_disambiguate_copy (operands, dest, src, 4);
+})
+
+(define_insn "aarch64_ld2<mode>_dreg"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(subreg:OI
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
+			UNSPEC_LD2)
+	     (vec_duplicate:VD (const_int 0)))
+	    (vec_concat:<VDBL>
+	     (unspec:VD [(match_dup 1)]
+			UNSPEC_LD2)
+	     (vec_duplicate:VD (const_int 0)))) 0))]
+  "TARGET_SIMD"
+  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load2_2reg<q>")]
+)
+
+(define_insn "aarch64_ld2<mode>_dreg"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(subreg:OI
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
+			UNSPEC_LD2)
+	     (const_int 0))
+	    (vec_concat:<VDBL>
+	     (unspec:DX [(match_dup 1)]
+			UNSPEC_LD2)
+	     (const_int 0))) 0))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.1d - %T0.1d}, %1"
+  [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+(define_insn "aarch64_ld3<mode>_dreg"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(subreg:CI
+	 (vec_concat:<VRL3>
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
+			UNSPEC_LD3)
+	     (vec_duplicate:VD (const_int 0)))
+	    (vec_concat:<VDBL>
+	     (unspec:VD [(match_dup 1)]
+			UNSPEC_LD3)
+	     (vec_duplicate:VD (const_int 0))))
+	  (vec_concat:<VDBL>
+	     (unspec:VD [(match_dup 1)]
+			UNSPEC_LD3)
+	     (vec_duplicate:VD (const_int 0)))) 0))]
+  "TARGET_SIMD"
+  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
+
+(define_insn "aarch64_ld3<mode>_dreg"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(subreg:CI
+	 (vec_concat:<VRL3>
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
+			UNSPEC_LD3)
+	     (const_int 0))
+	    (vec_concat:<VDBL>
+	     (unspec:DX [(match_dup 1)]
+			UNSPEC_LD3)
+	     (const_int 0)))
+	  (vec_concat:<VDBL>
+	     (unspec:DX [(match_dup 1)]
+			UNSPEC_LD3)
+	     (const_int 0))) 0))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.1d - %U0.1d}, %1"
+  [(set_attr "type" "neon_load1_3reg<q>")]
+)
+
+(define_insn "aarch64_ld4<mode>_dreg"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(subreg:XI
+	 (vec_concat:<VRL4>
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
+			  UNSPEC_LD4)
+	       (vec_duplicate:VD (const_int 0)))
+	      (vec_concat:<VDBL>
+	        (unspec:VD [(match_dup 1)]
+			UNSPEC_LD4)
+	        (vec_duplicate:VD (const_int 0))))
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (unspec:VD [(match_dup 1)]
+			UNSPEC_LD4)
+	       (vec_duplicate:VD (const_int 0)))
+	     (vec_concat:<VDBL>
+	       (unspec:VD [(match_dup 1)]
+			UNSPEC_LD4)
+	       (vec_duplicate:VD (const_int 0))))) 0))]
+  "TARGET_SIMD"
+  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
+
+(define_insn "aarch64_ld4<mode>_dreg"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(subreg:XI
+	 (vec_concat:<VRL4>
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
+			  UNSPEC_LD4)
+	       (const_int 0))
+	      (vec_concat:<VDBL>
+	        (unspec:DX [(match_dup 1)]
+			UNSPEC_LD4)
+	        (const_int 0)))
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (unspec:DX [(match_dup 1)]
+			UNSPEC_LD4)
+	       (const_int 0))
+	     (vec_concat:<VDBL>
+	       (unspec:DX [(match_dup 1)]
+			UNSPEC_LD4)
+	       (const_int 0)))) 0))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.1d - %V0.1d}, %1"
+  [(set_attr "type" "neon_load1_4reg<q>")]
+)
+
+(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
+ [(match_operand:VSTRUCT 0 "register_operand" "=w")
+  (match_operand:DI 1 "register_operand" "r")
+  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
+  DONE;
+})
+
+(define_expand "aarch64_ld1<VALL:mode>"
+ [(match_operand:VALL 0 "register_operand")
+  (match_operand:DI 1 "register_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <VALL:MODE>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
+  else
+    emit_move_insn (operands[0], mem);
+  DONE;
+})
+
+(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
+ [(match_operand:VSTRUCT 0 "register_operand" "=w")
+  (match_operand:DI 1 "register_operand" "r")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <VSTRUCT:MODE>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
+  DONE;
+})
+
+;; Expanders for builtins to extract vector registers from large
+;; opaque integer modes.
+
+;; D-register list.
+
+(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
+ [(match_operand:VDC 0 "register_operand" "=w")
+  (match_operand:VSTRUCT 1 "register_operand" "w")
+  (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  int part = INTVAL (operands[2]);
+  rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
+  int offset = part * 16;
+
+  emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
+  emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
+  DONE;
+})
+
+;; Q-register list.
+
+(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
+ [(match_operand:VQ 0 "register_operand" "=w")
+  (match_operand:VSTRUCT 1 "register_operand" "w")
+  (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  int part = INTVAL (operands[2]);
+  int offset = part * 16;
+
+  emit_move_insn (operands[0],
+		  gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
+  DONE;
+})
+
+;; Permuted-store expanders for neon intrinsics.
+
+;; Permute instructions
+
+;; vec_perm support
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VALL 0 "register_operand")
+   (match_operand:VALL 1 "register_operand")
+   (match_operand:VALL 2 "register_operand")
+   (match_operand:<V_cmp_result> 3)]
+  "TARGET_SIMD"
+{
+  if (aarch64_expand_vec_perm_const (operands[0], operands[1],
+				     operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "vec_perm<mode>"
+  [(match_operand:VB 0 "register_operand")
+   (match_operand:VB 1 "register_operand")
+   (match_operand:VB 2 "register_operand")
+   (match_operand:VB 3 "register_operand")]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+{
+  aarch64_expand_vec_perm (operands[0], operands[1],
+			   operands[2], operands[3]);
+  DONE;
+})
+
+(define_insn "aarch64_tbl1<mode>"
+  [(set (match_operand:VB 0 "register_operand" "=w")
+	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
+		    (match_operand:VB 2 "register_operand" "w")]
+		   UNSPEC_TBL))]
+  "TARGET_SIMD"
+  "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
+  [(set_attr "type" "neon_tbl1<q>")]
+)
+
+;; Two source registers.
+
+(define_insn "aarch64_tbl2v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
+		       (match_operand:V16QI 2 "register_operand" "w")]
+		      UNSPEC_TBL))]
+  "TARGET_SIMD"
+  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
+  [(set_attr "type" "neon_tbl2_q")]
+)
+
+(define_insn_and_split "aarch64_combinev16qi"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
+		    (match_operand:V16QI 2 "register_operand" "w")]
+		   UNSPEC_CONCAT))]
+  "TARGET_SIMD"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  aarch64_split_combinev16qi (operands);
+  DONE;
+}
+[(set_attr "type" "multiple")]
+)
+
+(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
+		      (match_operand:VALL 2 "register_operand" "w")]
+		       PERMUTE))]
+  "TARGET_SIMD"
+  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_permute<q>")]
+)
+
+(define_insn "aarch64_st2<mode>_dreg"
+  [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:TI [(match_operand:OI 1 "register_operand" "w")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST2))]
+  "TARGET_SIMD"
+  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store2_2reg")]
+)
+
+(define_insn "aarch64_st2<mode>_dreg"
+  [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:TI [(match_operand:OI 1 "register_operand" "w")
+                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST2))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.1d - %T1.1d}, %0"
+  [(set_attr "type" "neon_store1_2reg")]
+)
+
+(define_insn "aarch64_st3<mode>_dreg"
+  [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:EI [(match_operand:CI 1 "register_operand" "w")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST3))]
+  "TARGET_SIMD"
+  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store3_3reg")]
+)
+
+(define_insn "aarch64_st3<mode>_dreg"
+  [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:EI [(match_operand:CI 1 "register_operand" "w")
+                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST3))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.1d - %U1.1d}, %0"
+  [(set_attr "type" "neon_store1_3reg")]
+)
+
+(define_insn "aarch64_st4<mode>_dreg"
+  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:OI [(match_operand:XI 1 "register_operand" "w")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST4))]
+  "TARGET_SIMD"
+  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store4_4reg")]
+)
+
+(define_insn "aarch64_st4<mode>_dreg"
+  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:OI [(match_operand:XI 1 "register_operand" "w")
+                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST4))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.1d - %V1.1d}, %0"
+  [(set_attr "type" "neon_store1_4reg")]
+)
+
+(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:VSTRUCT 1 "register_operand" "w")
+  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+
+  emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
+  DONE;
+})
+
+(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:VSTRUCT 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <VSTRUCT:MODE>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+
+  emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_expand "aarch64_st1<VALL:mode>"
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VALL 1 "register_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <VALL:MODE>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
+  else
+    emit_move_insn (mem, operands[1]);
+  DONE;
+})
+
+;; Expander for builtins to insert vector registers into large
+;; opaque integer modes.
+
+;; Q-register list.  We don't need a D-reg inserter as we zero
+;; extend them in arm_neon.h and insert the resulting Q-regs.
+
+(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
+ [(match_operand:VSTRUCT 0 "register_operand" "+w")
+  (match_operand:VSTRUCT 1 "register_operand" "0")
+  (match_operand:VQ 2 "register_operand" "w")
+  (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  int part = INTVAL (operands[3]);
+  int offset = part * 16;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
+		  operands[2]);
+  DONE;
+})
+
+;; Standard pattern name vec_init<mode>.
+
+(define_expand "vec_init<mode>"
+  [(match_operand:VALL 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SIMD"
+{
+  aarch64_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "*aarch64_simd_ld1r<mode>"
+  [(set (match_operand:VALLDI 0 "register_operand" "=w")
+	(vec_duplicate:VALLDI
+	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
+  "TARGET_SIMD"
+  "ld1r\\t{%0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_all_lanes")]
+)
+
+(define_insn "aarch64_frecpe<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
+		    UNSPEC_FRECPE))]
+  "TARGET_SIMD"
+  "frecpe\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
+)
+
+(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+		    FRECP))]
+  "TARGET_SIMD"
+  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
+  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
+)
+
+(define_insn "aarch64_frecps<mode>"
+  [(set (match_operand:VALLF 0 "register_operand" "=w")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+		     (match_operand:VALLF 2 "register_operand" "w")]
+		    UNSPEC_FRECPS))]
+  "TARGET_SIMD"
+  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
+)
+
+;; Standard pattern name vec_extract<mode>.
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
+   (match_operand:VALL 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_SIMD"
+{
+    emit_insn
+      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
+    DONE;
+})
+
+;; aes
+
+(define_insn "aarch64_crypto_aes<aes_op>v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+		       (match_operand:V16QI 2 "register_operand" "w")]
+         CRYPTO_AES))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "aes<aes_op>\\t%0.16b, %2.16b"
+  [(set_attr "type" "crypto_aes")]
+)
+
+(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
+	 CRYPTO_AESMC))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "aes<aesmc_op>\\t%0.16b, %1.16b"
+  [(set_attr "type" "crypto_aes")]
+)
+
+;; sha1
+
+(define_insn "aarch64_crypto_sha1hsi"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1
+                       "register_operand" "w")]
+         UNSPEC_SHA1H))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1h\\t%s0, %s1"
+  [(set_attr "type" "crypto_sha1_fast")]
+)
+
+(define_insn "aarch64_crypto_sha1su1v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+                      (match_operand:V4SI 2 "register_operand" "w")]
+         UNSPEC_SHA1SU1))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1su1\\t%0.4s, %2.4s"
+  [(set_attr "type" "crypto_sha1_fast")]
+)
+
+(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+                      (match_operand:SI 2 "register_operand" "w")
+                      (match_operand:V4SI 3 "register_operand" "w")]
+         CRYPTO_SHA1))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1<sha1_op>\\t%q0, %s2, %3.4s"
+  [(set_attr "type" "crypto_sha1_slow")]
+)
+
+(define_insn "aarch64_crypto_sha1su0v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+                      (match_operand:V4SI 2 "register_operand" "w")
+                      (match_operand:V4SI 3 "register_operand" "w")]
+         UNSPEC_SHA1SU0))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1su0\\t%0.4s, %2.4s, %3.4s"
+  [(set_attr "type" "crypto_sha1_xor")]
+)
+
+;; sha256
+
+(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+                      (match_operand:V4SI 2 "register_operand" "w")
+                      (match_operand:V4SI 3 "register_operand" "w")]
+         CRYPTO_SHA256))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
+  [(set_attr "type" "crypto_sha256_slow")]
+)
+
+(define_insn "aarch64_crypto_sha256su0v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+                      (match_operand:V4SI 2 "register_operand" "w")]
+         UNSPEC_SHA256SU0))]
+  "TARGET_SIMD &&TARGET_CRYPTO"
+  "sha256su0\\t%0.4s, %2.4s"
+  [(set_attr "type" "crypto_sha256_fast")]
+)
+
+(define_insn "aarch64_crypto_sha256su1v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+                      (match_operand:V4SI 2 "register_operand" "w")
+                      (match_operand:V4SI 3 "register_operand" "w")]
+         UNSPEC_SHA256SU1))]
+  "TARGET_SIMD &&TARGET_CRYPTO"
+  "sha256su1\\t%0.4s, %2.4s, %3.4s"
+  [(set_attr "type" "crypto_sha256_slow")]
+)
+
+;; pmull
+
+(define_insn "aarch64_crypto_pmulldi"
+  [(set (match_operand:TI 0 "register_operand" "=w")
+        (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
+		     (match_operand:DI 2 "register_operand" "w")]
+		    UNSPEC_PMULL))]
+ "TARGET_SIMD && TARGET_CRYPTO"
+ "pmull\\t%0.1q, %1.1d, %2.1d"
+  [(set_attr "type" "neon_mul_d_long")]
+)
+
+(define_insn "aarch64_crypto_pmullv2di"
+ [(set (match_operand:TI 0 "register_operand" "=w")
+       (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
+		   (match_operand:V2DI 2 "register_operand" "w")]
+		  UNSPEC_PMULL2))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "pmull2\\t%0.1q, %1.2d, %2.2d"
+  [(set_attr "type" "neon_mul_d_long")]
+)
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-tune.md b/gcc-4.9/gcc/config/aarch64/aarch64-tune.md
new file mode 100644
index 000000000..b7e40e0b5
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-tune.md
@@ -0,0 +1,5 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically by gentune.sh from aarch64-cores.def
+(define_attr "tune"
+	"cortexa53,cortexa15,cortexa57cortexa53"
+	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
new file mode 100644
index 000000000..ebd58c007
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -0,0 +1,8544 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "insn-codes.h"
+#include "rtl.h"
+#include "insn-attr.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "regs.h"
+#include "df.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "expr.h"
+#include "reload.h"
+#include "toplev.h"
+#include "target.h"
+#include "target-def.h"
+#include "targhooks.h"
+#include "ggc.h"
+#include "function.h"
+#include "tm_p.h"
+#include "recog.h"
+#include "langhooks.h"
+#include "diagnostic-core.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "optabs.h"
+#include "dwarf2.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "config/arm/aarch-cost-tables.h"
+
+/* Defined for convenience.  */
+#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+
+/* Classifies an address.
+
+   ADDRESS_REG_IMM
+       A simple base register plus immediate offset.
+
+   ADDRESS_REG_WB
+       A base register indexed by immediate offset with writeback.
+
+   ADDRESS_REG_REG
+       A base register indexed by (optionally scaled) register.
+
+   ADDRESS_REG_UXTW
+       A base register indexed by (optionally scaled) zero-extended register.
+
+   ADDRESS_REG_SXTW
+       A base register indexed by (optionally scaled) sign-extended register.
+
+   ADDRESS_LO_SUM
+       A LO_SUM rtx with a base register and "LO12" symbol relocation.
+
+   ADDRESS_SYMBOLIC:
+       A constant symbolic address, in pc-relative literal pool.  */
+
+enum aarch64_address_type {
+  ADDRESS_REG_IMM,
+  ADDRESS_REG_WB,
+  ADDRESS_REG_REG,
+  ADDRESS_REG_UXTW,
+  ADDRESS_REG_SXTW,
+  ADDRESS_LO_SUM,
+  ADDRESS_SYMBOLIC
+};
+
+struct aarch64_address_info {
+  enum aarch64_address_type type;
+  rtx base;
+  rtx offset;
+  int shift;
+  enum aarch64_symbol_type symbol_type;
+};
+
+struct simd_immediate_info
+{
+  rtx value;
+  int shift;
+  int element_width;
+  bool mvn;
+  bool msl;
+};
+
+/* The current code model.  */
+enum aarch64_code_model aarch64_cmodel;
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS 1
+#endif
+
+static bool aarch64_lra_p (void);
+static bool aarch64_composite_type_p (const_tree, enum machine_mode);
+static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
+						     const_tree,
+						     enum machine_mode *, int *,
+						     bool *);
+static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
+static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
+static void aarch64_override_options_after_change (void);
+static bool aarch64_vector_mode_supported_p (enum machine_mode);
+static unsigned bit_count (unsigned HOST_WIDE_INT);
+static bool aarch64_const_vec_all_same_int_p (rtx,
+					      HOST_WIDE_INT, HOST_WIDE_INT);
+
+static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+						 const unsigned char *sel);
+
+/* The processor for which instructions should be scheduled.  */
+enum aarch64_processor aarch64_tune = cortexa53;
+
+/* The current tuning set.  */
+const struct tune_params *aarch64_tune_params;
+
+/* Mask to specify which instructions we are allowed to generate.  */
+unsigned long aarch64_isa_flags = 0;
+
+/* Mask to specify which instruction scheduling options should be used.  */
+unsigned long aarch64_tune_flags = 0;
+
+/* Tuning parameters.  */
+
+#if HAVE_DESIGNATED_INITIALIZERS
+#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
+#else
+#define NAMED_PARAM(NAME, VAL) (VAL)
+#endif
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_addrcost_table generic_addrcost_table =
+{
+  NAMED_PARAM (pre_modify, 0),
+  NAMED_PARAM (post_modify, 0),
+  NAMED_PARAM (register_offset, 0),
+  NAMED_PARAM (register_extend, 0),
+  NAMED_PARAM (imm_offset, 0)
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_regmove_cost generic_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 1),
+  NAMED_PARAM (GP2FP, 2),
+  NAMED_PARAM (FP2GP, 2),
+  /* We currently do not provide direct support for TFmode Q->Q move.
+     Therefore we need to raise the cost above 2 in order to have
+     reload handle the situation.  */
+  NAMED_PARAM (FP2FP, 4)
+};
+
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost generic_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 1),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 1),
+  NAMED_PARAM (vec_to_scalar_cost, 1),
+  NAMED_PARAM (scalar_to_vec_cost, 1),
+  NAMED_PARAM (vec_align_load_cost, 1),
+  NAMED_PARAM (vec_unalign_load_cost, 1),
+  NAMED_PARAM (vec_unalign_store_cost, 1),
+  NAMED_PARAM (vec_store_cost, 1),
+  NAMED_PARAM (cond_taken_branch_cost, 3),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct tune_params generic_tunings =
+{
+  &cortexa57_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  NAMED_PARAM (memmov_cost, 4),
+  NAMED_PARAM (issue_rate, 2)
+};
+
+static const struct tune_params cortexa53_tunings =
+{
+  &cortexa53_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  NAMED_PARAM (memmov_cost, 4),
+  NAMED_PARAM (issue_rate, 2)
+};
+
+static const struct tune_params cortexa57_tunings =
+{
+  &cortexa57_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  NAMED_PARAM (memmov_cost, 4),
+  NAMED_PARAM (issue_rate, 3)
+};
+
+/* A processor implementing AArch64.  */
+struct processor
+{
+  const char *const name;
+  enum aarch64_processor core;
+  const char *arch;
+  const unsigned long flags;
+  const struct tune_params *const tune;
+};
+
+/* Processor cores implementing AArch64.  */
+static const struct processor all_cores[] =
+{
+#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
+  {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
+#include "aarch64-cores.def"
+#undef AARCH64_CORE
+  {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
+  {NULL, aarch64_none, NULL, 0, NULL}
+};
+
+/* Architectures implementing AArch64.  */
+static const struct processor all_architectures[] =
+{
+#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
+  {NAME, CORE, #ARCH, FLAGS, NULL},
+#include "aarch64-arches.def"
+#undef AARCH64_ARCH
+  {NULL, aarch64_none, NULL, 0, NULL}
+};
+
+/* Target specification.  These are populated as commandline arguments
+   are processed, or NULL if not specified.  */
+static const struct processor *selected_arch;
+static const struct processor *selected_cpu;
+static const struct processor *selected_tune;
+
+#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
+
+/* An ISA extension in the co-processor and main instruction set space.  */
+struct aarch64_option_extension
+{
+  const char *const name;
+  const unsigned long flags_on;
+  const unsigned long flags_off;
+};
+
+/* ISA extensions in AArch64.  */
+static const struct aarch64_option_extension all_extensions[] =
+{
+#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
+  {NAME, FLAGS_ON, FLAGS_OFF},
+#include "aarch64-option-extensions.def"
+#undef AARCH64_OPT_EXTENSION
+  {NULL, 0, 0}
+};
+
+/* Used to track the size of an address when generating a pre/post
+   increment address.  */
+static enum machine_mode aarch64_memory_reference_mode;
+
+/* Used to force GTY into this file.  */
+static GTY(()) int gty_dummy;
+
+/* A table of valid AArch64 "bitmask immediate" values for
+   logical instructions.  */
+
+#define AARCH64_NUM_BITMASKS  5334
+static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
+
+/* Did we set flag_omit_frame_pointer just so
+   aarch64_frame_pointer_required would be called? */
+static bool faked_omit_frame_pointer;
+
+typedef enum aarch64_cond_code
+{
+  AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
+  AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
+  AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
+}
+aarch64_cc;
+
+#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
+
+/* The condition codes of the processor, and the inverse function.  */
+static const char * const aarch64_condition_codes[] =
+{
+  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
+};
+
+/* Provide a mapping from gcc register numbers to dwarf register numbers.  */
+unsigned
+aarch64_dbx_register_number (unsigned regno)
+{
+   if (GP_REGNUM_P (regno))
+     return AARCH64_DWARF_R0 + regno - R0_REGNUM;
+   else if (regno == SP_REGNUM)
+     return AARCH64_DWARF_SP;
+   else if (FP_REGNUM_P (regno))
+     return AARCH64_DWARF_V0 + regno - V0_REGNUM;
+
+   /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
+      equivalent DWARF register.  */
+   return DWARF_FRAME_REGISTERS;
+}
+
+/* Return TRUE if MODE is any of the large INT modes.  */
+static bool
+aarch64_vect_struct_mode_p (enum machine_mode mode)
+{
+  return mode == OImode || mode == CImode || mode == XImode;
+}
+
+/* Return TRUE if MODE is any of the vector modes.  */
+static bool
+aarch64_vector_mode_p (enum machine_mode mode)
+{
+  return aarch64_vector_mode_supported_p (mode)
+	 || aarch64_vect_struct_mode_p (mode);
+}
+
+/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
+static bool
+aarch64_array_mode_supported_p (enum machine_mode mode,
+				unsigned HOST_WIDE_INT nelems)
+{
+  if (TARGET_SIMD
+      && AARCH64_VALID_SIMD_QREG_MODE (mode)
+      && (nelems >= 2 && nelems <= 4))
+    return true;
+
+  return false;
+}
+
+/* Implement HARD_REGNO_NREGS.  */
+
+int
+aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
+{
+  switch (aarch64_regno_regclass (regno))
+    {
+    case FP_REGS:
+    case FP_LO_REGS:
+      return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
+    default:
+      return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+    }
+  gcc_unreachable ();
+}
+
+/* Implement HARD_REGNO_MODE_OK.  */
+
+int
+aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return regno == CC_REGNUM;
+
+  if (regno == SP_REGNUM)
+    /* The purpose of comparing with ptr_mode is to support the
+       global register variable associated with the stack pointer
+       register via the syntax of asm ("wsp") in ILP32.  */
+    return mode == Pmode || mode == ptr_mode;
+
+  if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
+    return mode == Pmode;
+
+  if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
+    return 1;
+
+  if (FP_REGNUM_P (regno))
+    {
+      if (aarch64_vect_struct_mode_p (mode))
+	return
+	  (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
+      else
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return true if calls to DECL should be treated as
+   long-calls (ie called via a register).  */
+static bool
+aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* Return true if calls to symbol-ref SYM should be treated as
+   long-calls (ie called via a register).  */
+bool
+aarch64_is_long_call_p (rtx sym)
+{
+  return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
+}
+
+/* Return true if the offsets to a zero/sign-extract operation
+   represent an expression that matches an extend operation.  The
+   operands represent the paramters from
+
+   (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
+bool
+aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
+				rtx extract_imm)
+{
+  HOST_WIDE_INT mult_val, extract_val;
+
+  if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
+    return false;
+
+  mult_val = INTVAL (mult_imm);
+  extract_val = INTVAL (extract_imm);
+
+  if (extract_val > 8
+      && extract_val < GET_MODE_BITSIZE (mode)
+      && exact_log2 (extract_val & ~7) > 0
+      && (extract_val & 7) <= 4
+      && mult_val == (1 << (extract_val & 7)))
+    return true;
+
+  return false;
+}
+
+/* Emit an insn that's a simple single-set.  Both the operands must be
+   known to be valid.  */
+inline static rtx
+emit_set_insn (rtx x, rtx y)
+{
+  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
+}
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for register 0 in the proper mode.  */
+rtx
+aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
+{
+  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
+  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+
+  emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+  return cc_reg;
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+rtx
+aarch64_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+/* Return the TLS model to use for ADDR.  */
+
+static enum tls_model
+tls_symbolic_operand_type (rtx addr)
+{
+  enum tls_model tls_kind = TLS_MODEL_NONE;
+  rtx sym, addend;
+
+  if (GET_CODE (addr) == CONST)
+    {
+      split_const (addr, &sym, &addend);
+      if (GET_CODE (sym) == SYMBOL_REF)
+	tls_kind = SYMBOL_REF_TLS_MODEL (sym);
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
+
+  return tls_kind;
+}
+
+/* We'll allow lo_sum's in addresses in our legitimate addresses
+   so that combine would take care of combining addresses where
+   necessary, but for generation purposes, we'll generate the address
+   as :
+   RTL                               Absolute
+   tmp = hi (symbol_ref);            adrp  x1, foo
+   dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
+                                     nop
+
+   PIC                               TLS
+   adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
+   ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
+                                     bl   __tls_get_addr
+                                     nop
+
+   Load TLS symbol, depending on TLS mechanism and TLS access model.
+
+   Global Dynamic - Traditional TLS:
+   adrp tmp, :tlsgd:imm
+   add  dest, tmp, #:tlsgd_lo12:imm
+   bl   __tls_get_addr
+
+   Global Dynamic - TLS Descriptors:
+   adrp dest, :tlsdesc:imm
+   ldr  tmp, [dest, #:tlsdesc_lo12:imm]
+   add  dest, dest, #:tlsdesc_lo12:imm
+   blr  tmp
+   mrs  tp, tpidr_el0
+   add  dest, dest, tp
+
+   Initial Exec:
+   mrs  tp, tpidr_el0
+   adrp tmp, :gottprel:imm
+   ldr  dest, [tmp, #:gottprel_lo12:imm]
+   add  dest, dest, tp
+
+   Local Exec:
+   mrs  tp, tpidr_el0
+   add  t0, tp, #:tprel_hi12:imm
+   add  t0, #:tprel_lo12_nc:imm
+*/
+
+static void
+aarch64_load_symref_appropriately (rtx dest, rtx imm,
+				   enum aarch64_symbol_type type)
+{
+  switch (type)
+    {
+    case SYMBOL_SMALL_ABSOLUTE:
+      {
+	/* In ILP32, the mode of dest can be either SImode or DImode.  */
+	rtx tmp_reg = dest;
+	enum machine_mode mode = GET_MODE (dest);
+
+	gcc_assert (mode == Pmode || mode == ptr_mode);
+
+	if (can_create_pseudo_p ())
+	  tmp_reg = gen_reg_rtx (mode);
+
+	emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
+	emit_insn (gen_add_losym (dest, tmp_reg, imm));
+	return;
+      }
+
+    case SYMBOL_TINY_ABSOLUTE:
+      emit_insn (gen_rtx_SET (Pmode, dest, imm));
+      return;
+
+    case SYMBOL_SMALL_GOT:
+      {
+	/* In ILP32, the mode of dest can be either SImode or DImode,
+	   while the got entry is always of SImode size.  The mode of
+	   dest depends on how dest is used: if dest is assigned to a
+	   pointer (e.g. in the memory), it has SImode; it may have
+	   DImode if dest is dereferenced to access the memeory.
+	   This is why we have to handle three different ldr_got_small
+	   patterns here (two patterns for ILP32).  */
+	rtx tmp_reg = dest;
+	enum machine_mode mode = GET_MODE (dest);
+
+	if (can_create_pseudo_p ())
+	  tmp_reg = gen_reg_rtx (mode);
+
+	emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
+	if (mode == ptr_mode)
+	  {
+	    if (mode == DImode)
+	      emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
+	    else
+	      emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
+	  }
+	else
+	  {
+	    gcc_assert (mode == Pmode);
+	    emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
+	  }
+
+	return;
+      }
+
+    case SYMBOL_SMALL_TLSGD:
+      {
+	rtx insns;
+	rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
+
+	start_sequence ();
+	emit_call_insn (gen_tlsgd_small (result, imm));
+	insns = get_insns ();
+	end_sequence ();
+
+	RTL_CONST_CALL_P (insns) = 1;
+	emit_libcall_block (insns, dest, result, imm);
+	return;
+      }
+
+    case SYMBOL_SMALL_TLSDESC:
+      {
+	rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
+	rtx tp;
+
+	emit_insn (gen_tlsdesc_small (imm));
+	tp = aarch64_load_tp (NULL);
+	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
+	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	return;
+      }
+
+    case SYMBOL_SMALL_GOTTPREL:
+      {
+	rtx tmp_reg = gen_reg_rtx (Pmode);
+	rtx tp = aarch64_load_tp (NULL);
+	emit_insn (gen_tlsie_small (tmp_reg, imm));
+	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
+	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	return;
+      }
+
+    case SYMBOL_SMALL_TPREL:
+      {
+	rtx tp = aarch64_load_tp (NULL);
+	emit_insn (gen_tlsle_small (dest, tp, imm));
+	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	return;
+      }
+
+    case SYMBOL_TINY_GOT:
+      emit_insn (gen_ldr_got_tiny (dest, imm));
+      return;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Emit a move from SRC to DEST.  Assume that the move expanders can
+   handle all moves if !can_create_pseudo_p ().  The distinction is
+   important because, unlike emit_move_insn, the move expanders know
+   how to force Pmode objects into the constant pool even when the
+   constant pool address is not itself legitimate.  */
+static rtx
+aarch64_emit_move (rtx dest, rtx src)
+{
+  return (can_create_pseudo_p ()
+	  ? emit_move_insn (dest, src)
+	  : emit_move_insn_1 (dest, src));
+}
+
+/* Split a 128-bit move operation into two 64-bit move operations,
+   taking care to handle partial overlap of register to register
+   copies.  Special cases are needed when moving between GP regs and
+   FP regs.  SRC can be a register, constant or memory; DST a register
+   or memory.  If either operand is memory it must not have any side
+   effects.  */
+void
+aarch64_split_128bit_move (rtx dst, rtx src)
+{
+  rtx dst_lo, dst_hi;
+  rtx src_lo, src_hi;
+
+  enum machine_mode mode = GET_MODE (dst);
+
+  gcc_assert (mode == TImode || mode == TFmode);
+  gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
+  gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
+
+  if (REG_P (dst) && REG_P (src))
+    {
+      int src_regno = REGNO (src);
+      int dst_regno = REGNO (dst);
+
+      /* Handle FP <-> GP regs.  */
+      if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
+	{
+	  src_lo = gen_lowpart (word_mode, src);
+	  src_hi = gen_highpart (word_mode, src);
+
+	  if (mode == TImode)
+	    {
+	      emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
+	      emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
+	    }
+	  else
+	    {
+	      emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
+	      emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
+	    }
+	  return;
+	}
+      else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
+	{
+	  dst_lo = gen_lowpart (word_mode, dst);
+	  dst_hi = gen_highpart (word_mode, dst);
+
+	  if (mode == TImode)
+	    {
+	      emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
+	      emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
+	    }
+	  else
+	    {
+	      emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
+	      emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
+	    }
+	  return;
+	}
+    }
+
+  dst_lo = gen_lowpart (word_mode, dst);
+  dst_hi = gen_highpart (word_mode, dst);
+  src_lo = gen_lowpart (word_mode, src);
+  src_hi = gen_highpart_mode (word_mode, mode, src);
+
+  /* At most one pairing may overlap.  */
+  if (reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      aarch64_emit_move (dst_hi, src_hi);
+      aarch64_emit_move (dst_lo, src_lo);
+    }
+  else
+    {
+      aarch64_emit_move (dst_lo, src_lo);
+      aarch64_emit_move (dst_hi, src_hi);
+    }
+}
+
+bool
+aarch64_split_128bit_move_p (rtx dst, rtx src)
+{
+  return (! REG_P (src)
+	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+}
+
+/* Split a complex SIMD combine.  */
+
+void
+aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+{
+  enum machine_mode src_mode = GET_MODE (src1);
+  enum machine_mode dst_mode = GET_MODE (dst);
+
+  gcc_assert (VECTOR_MODE_P (dst_mode));
+
+  if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+    {
+      rtx (*gen) (rtx, rtx, rtx);
+
+      switch (src_mode)
+	{
+	case V8QImode:
+	  gen = gen_aarch64_simd_combinev8qi;
+	  break;
+	case V4HImode:
+	  gen = gen_aarch64_simd_combinev4hi;
+	  break;
+	case V2SImode:
+	  gen = gen_aarch64_simd_combinev2si;
+	  break;
+	case V2SFmode:
+	  gen = gen_aarch64_simd_combinev2sf;
+	  break;
+	case DImode:
+	  gen = gen_aarch64_simd_combinedi;
+	  break;
+	case DFmode:
+	  gen = gen_aarch64_simd_combinedf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_insn (gen (dst, src1, src2));
+      return;
+    }
+}
+
+/* Split a complex SIMD move.  */
+
+void
+aarch64_split_simd_move (rtx dst, rtx src)
+{
+  enum machine_mode src_mode = GET_MODE (src);
+  enum machine_mode dst_mode = GET_MODE (dst);
+
+  gcc_assert (VECTOR_MODE_P (dst_mode));
+
+  if (REG_P (dst) && REG_P (src))
+    {
+      rtx (*gen) (rtx, rtx);
+
+      gcc_assert (VECTOR_MODE_P (src_mode));
+
+      switch (src_mode)
+	{
+	case V16QImode:
+	  gen = gen_aarch64_split_simd_movv16qi;
+	  break;
+	case V8HImode:
+	  gen = gen_aarch64_split_simd_movv8hi;
+	  break;
+	case V4SImode:
+	  gen = gen_aarch64_split_simd_movv4si;
+	  break;
+	case V2DImode:
+	  gen = gen_aarch64_split_simd_movv2di;
+	  break;
+	case V4SFmode:
+	  gen = gen_aarch64_split_simd_movv4sf;
+	  break;
+	case V2DFmode:
+	  gen = gen_aarch64_split_simd_movv2df;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_insn (gen (dst, src));
+      return;
+    }
+}
+
+static rtx
+aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
+{
+  if (can_create_pseudo_p ())
+    return force_reg (mode, value);
+  else
+    {
+      x = aarch64_emit_move (x, value);
+      return x;
+    }
+}
+
+
+static rtx
+aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
+{
+  if (!aarch64_plus_immediate (GEN_INT (offset), mode))
+    {
+      rtx high;
+      /* Load the full offset into a register.  This
+         might be improvable in the future.  */
+      high = GEN_INT (offset);
+      offset = 0;
+      high = aarch64_force_temporary (mode, temp, high);
+      reg = aarch64_force_temporary (mode, temp,
+				     gen_rtx_PLUS (mode, high, reg));
+    }
+  return plus_constant (mode, reg, offset);
+}
+
+void
+aarch64_expand_mov_immediate (rtx dest, rtx imm)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  unsigned HOST_WIDE_INT mask;
+  int i;
+  bool first;
+  unsigned HOST_WIDE_INT val;
+  bool subtargets;
+  rtx subtarget;
+  int one_match, zero_match;
+
+  gcc_assert (mode == SImode || mode == DImode);
+
+  /* Check on what type of symbol it is.  */
+  if (GET_CODE (imm) == SYMBOL_REF
+      || GET_CODE (imm) == LABEL_REF
+      || GET_CODE (imm) == CONST)
+    {
+      rtx mem, base, offset;
+      enum aarch64_symbol_type sty;
+
+      /* If we have (const (plus symbol offset)), separate out the offset
+	 before we start classifying the symbol.  */
+      split_const (imm, &base, &offset);
+
+      sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
+      switch (sty)
+	{
+	case SYMBOL_FORCE_TO_MEM:
+	  if (offset != const0_rtx
+	      && targetm.cannot_force_const_mem (mode, imm))
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      base = aarch64_force_temporary (mode, dest, base);
+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
+	      aarch64_emit_move (dest, base);
+	      return;
+	    }
+	  mem = force_const_mem (ptr_mode, imm);
+	  gcc_assert (mem);
+	  if (mode != ptr_mode)
+	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
+	  return;
+
+        case SYMBOL_SMALL_TLSGD:
+        case SYMBOL_SMALL_TLSDESC:
+        case SYMBOL_SMALL_GOTTPREL:
+	case SYMBOL_SMALL_GOT:
+	case SYMBOL_TINY_GOT:
+	  if (offset != const0_rtx)
+	    {
+	      gcc_assert(can_create_pseudo_p ());
+	      base = aarch64_force_temporary (mode, dest, base);
+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
+	      aarch64_emit_move (dest, base);
+	      return;
+	    }
+	  /* FALLTHRU */
+
+        case SYMBOL_SMALL_TPREL:
+	case SYMBOL_SMALL_ABSOLUTE:
+	case SYMBOL_TINY_ABSOLUTE:
+	  aarch64_load_symref_appropriately (dest, imm, sty);
+	  return;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
+      return;
+    }
+
+  if (!CONST_INT_P (imm))
+    {
+      if (GET_CODE (imm) == HIGH)
+	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
+      else
+        {
+	  rtx mem = force_const_mem (mode, imm);
+	  gcc_assert (mem);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
+	}
+
+      return;
+    }
+
+  if (mode == SImode)
+    {
+      /* We know we can't do this in 1 insn, and we must be able to do it
+	 in two; so don't mess around looking for sequences that don't buy
+	 us anything.  */
+      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
+      emit_insn (gen_insv_immsi (dest, GEN_INT (16),
+				 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
+      return;
+    }
+
+  /* Remaining cases are all for DImode.  */
+
+  val = INTVAL (imm);
+  subtargets = optimize && can_create_pseudo_p ();
+
+  one_match = 0;
+  zero_match = 0;
+  mask = 0xffff;
+
+  for (i = 0; i < 64; i += 16, mask <<= 16)
+    {
+      if ((val & mask) == 0)
+	zero_match++;
+      else if ((val & mask) == mask)
+	one_match++;
+    }
+
+  if (one_match == 2)
+    {
+      mask = 0xffff;
+      for (i = 0; i < 64; i += 16, mask <<= 16)
+	{
+	  if ((val & mask) != mask)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+					 GEN_INT ((val >> i) & 0xffff)));
+	      return;
+	    }
+	}
+      gcc_unreachable ();
+    }
+
+  if (zero_match == 2)
+    goto simple_sequence;
+
+  mask = 0x0ffff0000UL;
+  for (i = 16; i < 64; i += 16, mask <<= 16)
+    {
+      HOST_WIDE_INT comp = mask & ~(mask - 1);
+
+      if (aarch64_uimm12_shift (val - (val & mask)))
+	{
+	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
+	  emit_insn (gen_adddi3 (dest, subtarget,
+				 GEN_INT (val - (val & mask))));
+	  return;
+	}
+      else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
+	{
+	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				  GEN_INT ((val + comp) & mask)));
+	  emit_insn (gen_adddi3 (dest, subtarget,
+				 GEN_INT (val - ((val + comp) & mask))));
+	  return;
+	}
+      else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
+	{
+	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				  GEN_INT ((val - comp) | ~mask)));
+	  emit_insn (gen_adddi3 (dest, subtarget,
+				 GEN_INT (val - ((val - comp) | ~mask))));
+	  return;
+	}
+      else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
+	{
+	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				  GEN_INT (val | ~mask)));
+	  emit_insn (gen_adddi3 (dest, subtarget,
+				 GEN_INT (val - (val | ~mask))));
+	  return;
+	}
+    }
+
+  /* See if we can do it by arithmetically combining two
+     immediates.  */
+  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+    {
+      int j;
+      mask = 0xffff;
+
+      if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
+	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
+	{
+	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				  GEN_INT (aarch64_bitmasks[i])));
+	  emit_insn (gen_adddi3 (dest, subtarget,
+				 GEN_INT (val - aarch64_bitmasks[i])));
+	  return;
+	}
+
+      for (j = 0; j < 64; j += 16, mask <<= 16)
+	{
+	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, dest,
+				      GEN_INT (aarch64_bitmasks[i])));
+	      emit_insn (gen_insv_immdi (dest, GEN_INT (j),
+					 GEN_INT ((val >> j) & 0xffff)));
+	      return;
+	    }
+	}
+    }
+
+  /* See if we can do it by logically combining two immediates.  */
+  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+    {
+      if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
+	{
+	  int j;
+
+	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
+	      {
+		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
+		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+					GEN_INT (aarch64_bitmasks[i])));
+		emit_insn (gen_iordi3 (dest, subtarget,
+				       GEN_INT (aarch64_bitmasks[j])));
+		return;
+	      }
+	}
+      else if ((val & aarch64_bitmasks[i]) == val)
+	{
+	  int j;
+
+	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
+	      {
+
+		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
+		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+					GEN_INT (aarch64_bitmasks[j])));
+		emit_insn (gen_anddi3 (dest, subtarget,
+				       GEN_INT (aarch64_bitmasks[i])));
+		return;
+	      }
+	}
+    }
+
+ simple_sequence:
+  first = true;
+  mask = 0xffff;
+  for (i = 0; i < 64; i += 16, mask <<= 16)
+    {
+      if ((val & mask) != 0)
+	{
+	  if (first)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, dest,
+				      GEN_INT (val & mask)));
+	      first = false;
+	    }
+	  else
+	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+				       GEN_INT ((val >> i) & 0xffff)));
+	}
+    }
+}
+
+static bool
+aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Indirect calls are not currently supported.  */
+  if (decl == NULL)
+    return false;
+
+  /* Cannot tail-call to long-calls, since these are outside of the
+     range of a branch instruction (we could handle this if we added
+     support for indirect tail-calls.  */
+  if (aarch64_decl_is_long_call_p (decl))
+    return false;
+
+  return true;
+}
+
+/* Implement TARGET_PASS_BY_REFERENCE.  */
+
+static bool
+aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
+			   enum machine_mode mode,
+			   const_tree type,
+			   bool named ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+  enum machine_mode dummymode;
+  int nregs;
+
+  /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
+  size = (mode == BLKmode && type)
+    ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+
+  /* Aggregates are passed by reference based on their size.  */
+  if (type && AGGREGATE_TYPE_P (type))
+    {
+      size = int_size_in_bytes (type);
+    }
+
+  /* Variable sized arguments are always returned by reference.  */
+  if (size < 0)
+    return true;
+
+  /* Can this be a candidate to be passed in fp/simd register(s)?  */
+  if (aarch64_vfp_is_call_or_return_candidate (mode, type,
+					       &dummymode, &nregs,
+					       NULL))
+    return false;
+
+  /* Arguments which are variable sized or larger than 2 registers are
+     passed by reference unless they are a homogenous floating point
+     aggregate.  */
+  return size > 2 * UNITS_PER_WORD;
+}
+
+/* Return TRUE if VALTYPE is padded to its least significant bits.  */
+static bool
+aarch64_return_in_msb (const_tree valtype)
+{
+  enum machine_mode dummy_mode;
+  int dummy_int;
+
+  /* Never happens in little-endian mode.  */
+  if (!BYTES_BIG_ENDIAN)
+    return false;
+
+  /* Only composite types smaller than or equal to 16 bytes can
+     be potentially returned in registers.  */
+  if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
+      || int_size_in_bytes (valtype) <= 0
+      || int_size_in_bytes (valtype) > 16)
+    return false;
+
+  /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
+     or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
+     is always passed/returned in the least significant bits of fp/simd
+     register(s).  */
+  if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
+					       &dummy_mode, &dummy_int, NULL))
+    return false;
+
+  return true;
+}
+
+/* Implement TARGET_FUNCTION_VALUE.
+   Define how to find the value returned by a function.  */
+
+static rtx
+aarch64_function_value (const_tree type, const_tree func,
+			bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  int unsignedp;
+  int count;
+  enum machine_mode ag_mode;
+
+  mode = TYPE_MODE (type);
+  if (INTEGRAL_TYPE_P (type))
+    mode = promote_function_mode (type, mode, &unsignedp, func, 1);
+
+  if (aarch64_return_in_msb (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+
+      if (size % UNITS_PER_WORD != 0)
+	{
+	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  if (aarch64_vfp_is_call_or_return_candidate (mode, type,
+					       &ag_mode, &count, NULL))
+    {
+      if (!aarch64_composite_type_p (type, mode))
+	{
+	  gcc_assert (count == 1 && mode == ag_mode);
+	  return gen_rtx_REG (mode, V0_REGNUM);
+	}
+      else
+	{
+	  int i;
+	  rtx par;
+
+	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+	  for (i = 0; i < count; i++)
+	    {
+	      rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
+	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+				       GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+	      XVECEXP (par, 0, i) = tmp;
+	    }
+	  return par;
+	}
+    }
+  else
+    return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
+   Return true if REGNO is the number of a hard register in which the values
+   of called function may come back.  */
+
+static bool
+aarch64_function_value_regno_p (const unsigned int regno)
+{
+  /* Maximum of 16 bytes can be returned in the general registers.  Examples
+     of 16-byte return values are: 128-bit integers and 16-byte small
+     structures (excluding homogeneous floating-point aggregates).  */
+  if (regno == R0_REGNUM || regno == R1_REGNUM)
+    return true;
+
+  /* Up to four fp/simd registers can return a function value, e.g. a
+     homogeneous floating-point aggregate having four members.  */
+  if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
+    return !TARGET_GENERAL_REGS_ONLY;
+
+  return false;
+}
+
+/* Implement TARGET_RETURN_IN_MEMORY.
+
+   If the type T of the result of a function is such that
+     void func (T arg)
+   would require that arg be passed as a value in a register (or set of
+   registers) according to the parameter passing rules, then the result
+   is returned in the same registers as would be used for such an
+   argument.  */
+
+static bool
+aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+  enum machine_mode ag_mode;
+  int count;
+
+  if (!AGGREGATE_TYPE_P (type)
+      && TREE_CODE (type) != COMPLEX_TYPE
+      && TREE_CODE (type) != VECTOR_TYPE)
+    /* Simple scalar types always returned in registers.  */
+    return false;
+
+  if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
+					       type,
+					       &ag_mode,
+					       &count,
+					       NULL))
+    return false;
+
+  /* Types larger than 2 registers returned in memory.  */
+  size = int_size_in_bytes (type);
+  return (size < 0 || size > 2 * UNITS_PER_WORD);
+}
+
+static bool
+aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
+			       const_tree type, int *nregs)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  return aarch64_vfp_is_call_or_return_candidate (mode,
+						  type,
+						  &pcum->aapcs_vfp_rmode,
+						  nregs,
+						  NULL);
+}
+
+/* Given MODE and TYPE of a function argument, return the alignment in
+   bits.  The idea is to suppress any stronger alignment requested by
+   the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
+   This is a helper function for local use only.  */
+
+static unsigned int
+aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  if (type)
+    {
+      if (!integer_zerop (TYPE_SIZE (type)))
+	{
+	  if (TYPE_MODE (type) == mode)
+	    alignment = TYPE_ALIGN (type);
+	  else
+	    alignment = GET_MODE_ALIGNMENT (mode);
+	}
+      else
+	alignment = 0;
+    }
+  else
+    alignment = GET_MODE_ALIGNMENT (mode);
+
+  return alignment;
+}
+
+/* Layout a function argument according to the AAPCS64 rules.  The rule
+   numbers refer to the rule numbers in the AAPCS64.  */
+
+static void
+aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
+		    const_tree type,
+		    bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int ncrn, nvrn, nregs;
+  bool allocate_ncrn, allocate_nvrn;
+
+  /* We need to do this once per argument.  */
+  if (pcum->aapcs_arg_processed)
+    return;
+
+  pcum->aapcs_arg_processed = true;
+
+  allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
+  allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
+						 mode,
+						 type,
+						 &nregs);
+
+  /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
+     The following code thus handles passing by SIMD/FP registers first.  */
+
+  nvrn = pcum->aapcs_nvrn;
+
+  /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
+     and homogenous short-vector aggregates (HVA).  */
+  if (allocate_nvrn)
+    {
+      if (nvrn + nregs <= NUM_FP_ARG_REGS)
+	{
+	  pcum->aapcs_nextnvrn = nvrn + nregs;
+	  if (!aarch64_composite_type_p (type, mode))
+	    {
+	      gcc_assert (nregs == 1);
+	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+	    }
+	  else
+	    {
+	      rtx par;
+	      int i;
+	      par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
+	      for (i = 0; i < nregs; i++)
+		{
+		  rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
+					 V0_REGNUM + nvrn + i);
+		  tmp = gen_rtx_EXPR_LIST
+		    (VOIDmode, tmp,
+		     GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
+		  XVECEXP (par, 0, i) = tmp;
+		}
+	      pcum->aapcs_reg = par;
+	    }
+	  return;
+	}
+      else
+	{
+	  /* C.3 NSRN is set to 8.  */
+	  pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
+	  goto on_stack;
+	}
+    }
+
+  ncrn = pcum->aapcs_ncrn;
+  nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
+	   + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+
+  /* C6 - C9.  though the sign and zero extension semantics are
+     handled elsewhere.  This is the case where the argument fits
+     entirely general registers.  */
+  if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
+    {
+      unsigned int alignment = aarch64_function_arg_alignment (mode, type);
+
+      gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
+
+      /* C.8 if the argument has an alignment of 16 then the NGRN is
+         rounded up to the next even number.  */
+      if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
+	{
+	  ++ncrn;
+	  gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
+	}
+      /* NREGS can be 0 when e.g. an empty structure is to be passed.
+         A reg is still generated for it, but the caller should be smart
+	 enough not to use it.  */
+      if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
+	{
+	  pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
+	}
+      else
+	{
+	  rtx par;
+	  int i;
+
+	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
+	  for (i = 0; i < nregs; i++)
+	    {
+	      rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
+	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+				       GEN_INT (i * UNITS_PER_WORD));
+	      XVECEXP (par, 0, i) = tmp;
+	    }
+	  pcum->aapcs_reg = par;
+	}
+
+      pcum->aapcs_nextncrn = ncrn + nregs;
+      return;
+    }
+
+  /* C.11  */
+  pcum->aapcs_nextncrn = NUM_ARG_REGS;
+
+  /* The argument is passed on stack; record the needed number of words for
+     this argument (we can re-use NREGS) and align the total size if
+     necessary.  */
+on_stack:
+  pcum->aapcs_stack_words = nregs;
+  if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
+    pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
+					       16 / UNITS_PER_WORD) + 1;
+  return;
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
+		      const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
+
+  if (mode == VOIDmode)
+    return NULL_RTX;
+
+  aarch64_layout_arg (pcum_v, mode, type, named);
+  return pcum->aapcs_reg;
+}
+
+void
+aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
+			   const_tree fntype ATTRIBUTE_UNUSED,
+			   rtx libname ATTRIBUTE_UNUSED,
+			   const_tree fndecl ATTRIBUTE_UNUSED,
+			   unsigned n_named ATTRIBUTE_UNUSED)
+{
+  pcum->aapcs_ncrn = 0;
+  pcum->aapcs_nvrn = 0;
+  pcum->aapcs_nextncrn = 0;
+  pcum->aapcs_nextnvrn = 0;
+  pcum->pcs_variant = ARM_PCS_AAPCS64;
+  pcum->aapcs_reg = NULL_RTX;
+  pcum->aapcs_arg_processed = false;
+  pcum->aapcs_stack_words = 0;
+  pcum->aapcs_stack_size = 0;
+
+  return;
+}
+
+static void
+aarch64_function_arg_advance (cumulative_args_t pcum_v,
+			      enum machine_mode mode,
+			      const_tree type,
+			      bool named)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  if (pcum->pcs_variant == ARM_PCS_AAPCS64)
+    {
+      aarch64_layout_arg (pcum_v, mode, type, named);
+      gcc_assert ((pcum->aapcs_reg != NULL_RTX)
+		  != (pcum->aapcs_stack_words != 0));
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
+      pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
+      pcum->aapcs_stack_size += pcum->aapcs_stack_words;
+      pcum->aapcs_stack_words = 0;
+      pcum->aapcs_reg = NULL_RTX;
+    }
+}
+
+bool
+aarch64_function_arg_regno_p (unsigned regno)
+{
+  return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
+	  || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
+}
+
+/* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
+   PARM_BOUNDARY bits of alignment, but will be given anything up
+   to STACK_BOUNDARY bits if the type requires it.  This makes sure
+   that both before and after the layout of each argument, the Next
+   Stacked Argument Address (NSAA) will have a minimum alignment of
+   8 bytes.  */
+
+static unsigned int
+aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment = aarch64_function_arg_alignment (mode, type);
+
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
+
+   Return true if an argument passed on the stack should be padded upwards,
+   i.e. if the least-significant byte of the stack slot has useful data.
+
+   Small aggregate types are placed in the lowest memory address.
+
+   The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
+
+bool
+aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
+{
+  /* On little-endian targets, the least significant byte of every stack
+     argument is passed at the lowest byte address of the stack slot.  */
+  if (!BYTES_BIG_ENDIAN)
+    return true;
+
+  /* Otherwise, integral, floating-point and pointer types are padded downward:
+     the least significant byte of a stack argument is passed at the highest
+     byte address of the stack slot.  */
+  if (type
+      ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
+	 || POINTER_TYPE_P (type))
+      : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
+    return false;
+
+  /* Everything else padded upward, i.e. data in first byte of stack slot.  */
+  return true;
+}
+
+/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
+
+   It specifies padding for the last (may also be the only)
+   element of a block move between registers and memory.  If
+   assuming the block is in the memory, padding upward means that
+   the last element is padded after its highest significant byte,
+   while in downward padding, the last element is padded at the
+   its least significant byte side.
+
+   Small aggregates and small complex types are always padded
+   upwards.
+
+   We don't need to worry about homogeneous floating-point or
+   short-vector aggregates; their move is not affected by the
+   padding direction determined here.  Regardless of endianness,
+   each element of such an aggregate is put in the least
+   significant bits of a fp/simd register.
+
+   Return !BYTES_BIG_ENDIAN if the least significant byte of the
+   register has useful data, and return the opposite if the most
+   significant byte does.  */
+
+bool
+aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
+		     bool first ATTRIBUTE_UNUSED)
+{
+
+  /* Small composite types are always padded upward.  */
+  if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
+    {
+      HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
+			    : GET_MODE_SIZE (mode));
+      if (size < 2 * UNITS_PER_WORD)
+	return true;
+    }
+
+  /* Otherwise, use the default padding.  */
+  return !BYTES_BIG_ENDIAN;
+}
+
+static enum machine_mode
+aarch64_libgcc_cmp_return_mode (void)
+{
+  return SImode;
+}
+
+static bool
+aarch64_frame_pointer_required (void)
+{
+  /* If the function contains dynamic stack allocations, we need to
+     use the frame pointer to access the static parts of the frame.  */
+  if (cfun->calls_alloca)
+    return true;
+
+  /* We may have turned flag_omit_frame_pointer on in order to have this
+     function called; if we did, we also set the 'faked_omit_frame_pointer' flag
+     and we'll check it here.
+     If we really did set flag_omit_frame_pointer normally, then we return false
+     (no frame pointer required) in all cases.  */
+
+  if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
+    return false;
+  else if (flag_omit_leaf_frame_pointer)
+    return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
+  return true;
+}
+
+/* Mark the registers that need to be saved by the callee and calculate
+   the size of the callee-saved registers area and frame record (both FP
+   and LR may be omitted).  */
+static void
+aarch64_layout_frame (void)
+{
+  HOST_WIDE_INT offset = 0;
+  int regno;
+
+  if (reload_completed && cfun->machine->frame.laid_out)
+    return;
+
+  cfun->machine->frame.fp_lr_offset = 0;
+
+  /* First mark all the registers that really need to be saved...  */
+  for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
+    cfun->machine->frame.reg_offset[regno] = -1;
+
+  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+    cfun->machine->frame.reg_offset[regno] = -1;
+
+  /* ... that includes the eh data registers (if needed)...  */
+  if (crtl->calls_eh_return)
+    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
+      cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
+
+  /* ... and any callee saved register that dataflow says is live.  */
+  for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
+    if (df_regs_ever_live_p (regno)
+	&& !call_used_regs[regno])
+      cfun->machine->frame.reg_offset[regno] = 0;
+
+  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+    if (df_regs_ever_live_p (regno)
+	&& !call_used_regs[regno])
+      cfun->machine->frame.reg_offset[regno] = 0;
+
+  if (frame_pointer_needed)
+    {
+      cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
+      cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
+      cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
+    }
+
+  /* Now assign stack slots for them.  */
+  for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
+    if (cfun->machine->frame.reg_offset[regno] != -1)
+      {
+	cfun->machine->frame.reg_offset[regno] = offset;
+	offset += UNITS_PER_WORD;
+      }
+
+  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+    if (cfun->machine->frame.reg_offset[regno] != -1)
+      {
+	cfun->machine->frame.reg_offset[regno] = offset;
+	offset += UNITS_PER_WORD;
+      }
+
+  if (frame_pointer_needed)
+    {
+      cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
+      offset += UNITS_PER_WORD;
+      cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
+    }
+
+  if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
+    {
+      cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
+      offset += UNITS_PER_WORD;
+      cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
+    }
+
+  cfun->machine->frame.padding0 =
+    (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
+  offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+
+  cfun->machine->frame.saved_regs_size = offset;
+  cfun->machine->frame.laid_out = true;
+}
+
+/* Make the last instruction frame-related and note that it performs
+   the operation described by FRAME_PATTERN.  */
+
+static void
+aarch64_set_frame_expr (rtx frame_pattern)
+{
+  rtx insn;
+
+  insn = get_last_insn ();
+  RTX_FRAME_RELATED_P (insn) = 1;
+  RTX_FRAME_RELATED_P (frame_pattern) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      frame_pattern,
+				      REG_NOTES (insn));
+}
+
+static bool
+aarch64_register_saved_on_entry (int regno)
+{
+  return cfun->machine->frame.reg_offset[regno] != -1;
+}
+
+
+static void
+aarch64_save_or_restore_fprs (int start_offset, int increment,
+			      bool restore, rtx base_rtx)
+
+{
+  unsigned regno;
+  unsigned regno2;
+  rtx insn;
+  rtx (*gen_mem_ref)(enum machine_mode, rtx)
+    = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
+
+
+  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+    {
+      if (aarch64_register_saved_on_entry (regno))
+	{
+	  rtx mem;
+	  mem = gen_mem_ref (DFmode,
+			     plus_constant (Pmode,
+					    base_rtx,
+					    start_offset));
+
+	  for (regno2 = regno + 1;
+	       regno2 <= V31_REGNUM
+		 && !aarch64_register_saved_on_entry (regno2);
+	       regno2++)
+	    {
+	      /* Empty loop.  */
+	    }
+	  if (regno2 <= V31_REGNUM &&
+	      aarch64_register_saved_on_entry (regno2))
+	    {
+	      rtx mem2;
+	      /* Next highest register to be saved.  */
+	      mem2 = gen_mem_ref (DFmode,
+				  plus_constant
+				  (Pmode,
+				   base_rtx,
+				   start_offset + increment));
+	      if (restore == false)
+		{
+		  insn = emit_insn
+		    ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
+					mem2, gen_rtx_REG (DFmode, regno2)));
+
+		}
+	      else
+		{
+		  insn = emit_insn
+		    ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
+				       gen_rtx_REG (DFmode, regno2), mem2));
+
+		  add_reg_note (insn, REG_CFA_RESTORE,
+				gen_rtx_REG (DFmode, regno));
+		  add_reg_note (insn, REG_CFA_RESTORE,
+				gen_rtx_REG (DFmode, regno2));
+		}
+
+		  /* The first part of a frame-related parallel insn
+		     is always assumed to be relevant to the frame
+		     calculations; subsequent parts, are only
+		     frame-related if explicitly marked.  */
+	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	      regno = regno2;
+	      start_offset += increment * 2;
+	    }
+	  else
+	    {
+	      if (restore == false)
+		insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
+	      else
+		{
+		  insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
+		  add_reg_note (insn, REG_CFA_RESTORE,
+				gen_rtx_REG (DImode, regno));
+		}
+	      start_offset += increment;
+	    }
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+}
+
+
+/* offset from the stack pointer of where the saves and
+   restore's have to happen.  */
+static void
+aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
+					    bool restore)
+{
+  rtx insn;
+  rtx base_rtx = stack_pointer_rtx;
+  HOST_WIDE_INT start_offset = offset;
+  HOST_WIDE_INT increment = UNITS_PER_WORD;
+  rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
+  unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
+  unsigned regno;
+  unsigned regno2;
+
+  for (regno = R0_REGNUM; regno <= limit; regno++)
+    {
+      if (aarch64_register_saved_on_entry (regno))
+	{
+	  rtx mem;
+	  mem = gen_mem_ref (Pmode,
+			     plus_constant (Pmode,
+					    base_rtx,
+					    start_offset));
+
+	  for (regno2 = regno + 1;
+	       regno2 <= limit
+		 && !aarch64_register_saved_on_entry (regno2);
+	       regno2++)
+	    {
+	      /* Empty loop.  */
+	    }
+	  if (regno2 <= limit &&
+	      aarch64_register_saved_on_entry (regno2))
+	    {
+	      rtx mem2;
+	      /* Next highest register to be saved.  */
+	      mem2 = gen_mem_ref (Pmode,
+				  plus_constant
+				  (Pmode,
+				   base_rtx,
+				   start_offset + increment));
+	      if (restore == false)
+		{
+		  insn = emit_insn
+		    ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
+					mem2, gen_rtx_REG (DImode, regno2)));
+
+		}
+	      else
+		{
+		  insn = emit_insn
+		    ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
+				     gen_rtx_REG (DImode, regno2), mem2));
+
+		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
+		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
+		}
+
+		  /* The first part of a frame-related parallel insn
+		     is always assumed to be relevant to the frame
+		     calculations; subsequent parts, are only
+		     frame-related if explicitly marked.  */
+	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
+					    1)) = 1;
+	      regno = regno2;
+	      start_offset += increment * 2;
+	    }
+	  else
+	    {
+	      if (restore == false)
+		insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
+	      else
+		{
+		  insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
+		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
+		}
+	      start_offset += increment;
+	    }
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
+
+}
+
+/* AArch64 stack frames generated by this compiler look like:
+
+	+-------------------------------+
+	|                               |
+	|  incoming stack arguments     |
+	|                               |
+	+-------------------------------+ <-- arg_pointer_rtx
+	|                               |
+	|  callee-allocated save area   |
+	|  for register varargs         |
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx
+	|                               |
+	|  local variables              |
+	|                               |
+	+-------------------------------+
+	|  padding0                     | \
+	+-------------------------------+  |
+	|                               |  |
+	|                               |  |
+	|  callee-saved registers       |  | frame.saved_regs_size
+	|                               |  |
+	+-------------------------------+  |
+	|  LR'                          |  |
+	+-------------------------------+  |
+	|  FP'                          | /
+      P +-------------------------------+ <-- hard_frame_pointer_rtx
+	|  dynamic allocation           |
+	+-------------------------------+
+	|                               |
+	|  outgoing stack arguments     |
+	|                               |
+	+-------------------------------+ <-- stack_pointer_rtx
+
+   Dynamic stack allocations such as alloca insert data at point P.
+   They decrease stack_pointer_rtx but leave frame_pointer_rtx and
+   hard_frame_pointer_rtx unchanged.  */
+
+/* Generate the prologue instructions for entry into a function.
+   Establish the stack frame by decreasing the stack pointer with a
+   properly calculated size and, if necessary, create a frame record
+   filled with the values of LR and previous frame pointer.  The
+   current FP is also set up if it is in use.  */
+
+void
+aarch64_expand_prologue (void)
+{
+  /* sub sp, sp, #<frame_size>
+     stp {fp, lr}, [sp, #<frame_size> - 16]
+     add fp, sp, #<frame_size> - hardfp_offset
+     stp {cs_reg}, [fp, #-16] etc.
+
+     sub sp, sp, <final_adjustment_if_any>
+  */
+  HOST_WIDE_INT original_frame_size;	/* local variables + vararg save */
+  HOST_WIDE_INT frame_size, offset;
+  HOST_WIDE_INT fp_offset;		/* FP offset from SP */
+  rtx insn;
+
+  aarch64_layout_frame ();
+  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
+  gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
+	      && (cfun->stdarg || !cfun->machine->saved_varargs_size));
+  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
+		+ crtl->outgoing_args_size);
+  offset = frame_size = AARCH64_ROUND_UP (frame_size,
+					  STACK_BOUNDARY / BITS_PER_UNIT);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = frame_size;
+
+  fp_offset = (offset
+	       - original_frame_size
+	       - cfun->machine->frame.saved_regs_size);
+
+  /* Store pairs and load pairs have a range only -512 to 504.  */
+  if (offset >= 512)
+    {
+      /* When the frame has a large size, an initial decrease is done on
+	 the stack pointer to jump over the callee-allocated save area for
+	 register varargs, the local variable area and/or the callee-saved
+	 register area.  This will allow the pre-index write-back
+	 store pair instructions to be used for setting up the stack frame
+	 efficiently.  */
+      offset = original_frame_size + cfun->machine->frame.saved_regs_size;
+      if (offset >= 512)
+	offset = cfun->machine->frame.saved_regs_size;
+
+      frame_size -= (offset + crtl->outgoing_args_size);
+      fp_offset = 0;
+
+      if (frame_size >= 0x1000000)
+	{
+	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
+	  emit_move_insn (op0, GEN_INT (-frame_size));
+	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
+	  aarch64_set_frame_expr (gen_rtx_SET
+				  (Pmode, stack_pointer_rtx,
+				   plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  -frame_size)));
+	}
+      else if (frame_size > 0)
+	{
+	  if ((frame_size & 0xfff) != frame_size)
+	    {
+	      insn = emit_insn (gen_add2_insn
+				(stack_pointer_rtx,
+				 GEN_INT (-(frame_size
+					    & ~(HOST_WIDE_INT)0xfff))));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  if ((frame_size & 0xfff) != 0)
+	    {
+	      insn = emit_insn (gen_add2_insn
+				(stack_pointer_rtx,
+				 GEN_INT (-(frame_size
+					    & (HOST_WIDE_INT)0xfff))));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+    }
+  else
+    frame_size = -1;
+
+  if (offset > 0)
+    {
+      /* Save the frame pointer and lr if the frame pointer is needed
+	 first.  Make the frame pointer point to the location of the
+	 old frame pointer on the stack.  */
+      if (frame_pointer_needed)
+	{
+	  rtx mem_fp, mem_lr;
+
+	  if (fp_offset)
+	    {
+	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+					       GEN_INT (-offset)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      aarch64_set_frame_expr (gen_rtx_SET
+				      (Pmode, stack_pointer_rtx,
+				       gen_rtx_MINUS (Pmode,
+						      stack_pointer_rtx,
+						      GEN_INT (offset))));
+	      mem_fp = gen_frame_mem (DImode,
+				      plus_constant (Pmode,
+						     stack_pointer_rtx,
+						     fp_offset));
+	      mem_lr = gen_frame_mem (DImode,
+				      plus_constant (Pmode,
+						     stack_pointer_rtx,
+						     fp_offset
+						     + UNITS_PER_WORD));
+	      insn = emit_insn (gen_store_pairdi (mem_fp,
+						  hard_frame_pointer_rtx,
+						  mem_lr,
+						  gen_rtx_REG (DImode,
+							       LR_REGNUM)));
+	    }
+	  else
+	    {
+	      insn = emit_insn (gen_storewb_pairdi_di
+				(stack_pointer_rtx, stack_pointer_rtx,
+				 hard_frame_pointer_rtx,
+				 gen_rtx_REG (DImode, LR_REGNUM),
+				 GEN_INT (-offset),
+				 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
+	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
+	    }
+
+	  /* The first part of a frame-related parallel insn is always
+	     assumed to be relevant to the frame calculations;
+	     subsequent parts, are only frame-related if explicitly
+	     marked.  */
+	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  /* Set up frame pointer to point to the location of the
+	     previous frame pointer on the stack.  */
+	  insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
+					   stack_pointer_rtx,
+					   GEN_INT (fp_offset)));
+	  aarch64_set_frame_expr (gen_rtx_SET
+				  (Pmode, hard_frame_pointer_rtx,
+				   plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  fp_offset)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
+					   hard_frame_pointer_rtx));
+	}
+      else
+	{
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+					   GEN_INT (-offset)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      aarch64_save_or_restore_callee_save_registers
+	(fp_offset + cfun->machine->frame.hardfp_offset, 0);
+    }
+
+  /* when offset >= 512,
+     sub sp, sp, #<outgoing_args_size> */
+  if (frame_size > -1)
+    {
+      if (crtl->outgoing_args_size > 0)
+	{
+	  insn = emit_insn (gen_add2_insn
+			    (stack_pointer_rtx,
+			     GEN_INT (- crtl->outgoing_args_size)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Generate the epilogue instructions for returning from a function.  */
+void
+aarch64_expand_epilogue (bool for_sibcall)
+{
+  HOST_WIDE_INT original_frame_size, frame_size, offset;
+  HOST_WIDE_INT fp_offset;
+  rtx insn;
+  rtx cfa_reg;
+
+  aarch64_layout_frame ();
+  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
+  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
+		+ crtl->outgoing_args_size);
+  offset = frame_size = AARCH64_ROUND_UP (frame_size,
+					  STACK_BOUNDARY / BITS_PER_UNIT);
+
+  fp_offset = (offset
+	       - original_frame_size
+	       - cfun->machine->frame.saved_regs_size);
+
+  cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
+
+  /* Store pairs and load pairs have a range only -512 to 504.  */
+  if (offset >= 512)
+    {
+      offset = original_frame_size + cfun->machine->frame.saved_regs_size;
+      if (offset >= 512)
+	offset = cfun->machine->frame.saved_regs_size;
+
+      frame_size -= (offset + crtl->outgoing_args_size);
+      fp_offset = 0;
+      if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
+	{
+	  insn = emit_insn (gen_add2_insn
+			    (stack_pointer_rtx,
+			     GEN_INT (crtl->outgoing_args_size)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+  else
+    frame_size = -1;
+
+  /* If there were outgoing arguments or we've done dynamic stack
+     allocation, then restore the stack pointer from the frame
+     pointer.  This is at most one insn and more efficient than using
+     GCC's internal mechanism.  */
+  if (frame_pointer_needed
+      && (crtl->outgoing_args_size || cfun->calls_alloca))
+    {
+      insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+				       hard_frame_pointer_rtx,
+				       GEN_INT (- fp_offset)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      /* As SP is set to (FP - fp_offset), according to the rules in
+	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
+	 from the value of SP from now on.  */
+      cfa_reg = stack_pointer_rtx;
+    }
+
+  aarch64_save_or_restore_callee_save_registers
+    (fp_offset + cfun->machine->frame.hardfp_offset, 1);
+
+  /* Restore the frame pointer and lr if the frame pointer is needed.  */
+  if (offset > 0)
+    {
+      if (frame_pointer_needed)
+	{
+	  rtx mem_fp, mem_lr;
+
+	  if (fp_offset)
+	    {
+	      mem_fp = gen_frame_mem (DImode,
+				      plus_constant (Pmode,
+						     stack_pointer_rtx,
+						     fp_offset));
+	      mem_lr = gen_frame_mem (DImode,
+				      plus_constant (Pmode,
+						     stack_pointer_rtx,
+						     fp_offset
+						     + UNITS_PER_WORD));
+	      insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
+						 mem_fp,
+						 gen_rtx_REG (DImode,
+							      LR_REGNUM),
+						 mem_lr));
+	    }
+	  else
+	    {
+	      insn = emit_insn (gen_loadwb_pairdi_di
+				(stack_pointer_rtx,
+				 stack_pointer_rtx,
+				 hard_frame_pointer_rtx,
+				 gen_rtx_REG (DImode, LR_REGNUM),
+				 GEN_INT (offset),
+				 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
+	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
+	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			    (gen_rtx_SET (Pmode, stack_pointer_rtx,
+					  plus_constant (Pmode, cfa_reg,
+							 offset))));
+	    }
+
+	  /* The first part of a frame-related parallel insn
+	     is always assumed to be relevant to the frame
+	     calculations; subsequent parts, are only
+	     frame-related if explicitly marked.  */
+	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+	  add_reg_note (insn, REG_CFA_RESTORE,
+			gen_rtx_REG (DImode, LR_REGNUM));
+
+	  if (fp_offset)
+	    {
+	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+					       GEN_INT (offset)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+      else
+	{
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+					   GEN_INT (offset)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    {
+      /* We need to unwind the stack by the offset computed by
+	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
+	 based on SP.  Ideally we would update the SP and define the
+	 CFA along the lines of:
+
+	 SP = SP + EH_RETURN_STACKADJ_RTX
+	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
+
+	 However the dwarf emitter only understands a constant
+	 register offset.
+
+	 The solution chosen here is to use the otherwise unused IP0
+	 as a temporary register to hold the current SP value.  The
+	 CFA is described using IP0 then SP is modified.  */
+
+      rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
+
+      insn = emit_move_insn (ip0, stack_pointer_rtx);
+      add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
+
+      /* Ensure the assignment to IP0 does not get optimized away.  */
+      emit_use (ip0);
+    }
+
+  if (frame_size > -1)
+    {
+      if (frame_size >= 0x1000000)
+	{
+	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
+	  emit_move_insn (op0, GEN_INT (frame_size));
+	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
+	  aarch64_set_frame_expr (gen_rtx_SET
+				  (Pmode, stack_pointer_rtx,
+				   plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  frame_size)));
+	}
+      else if (frame_size > 0)
+	{
+	  if ((frame_size & 0xfff) != 0)
+	    {
+	      insn = emit_insn (gen_add2_insn
+				(stack_pointer_rtx,
+				 GEN_INT ((frame_size
+					   & (HOST_WIDE_INT) 0xfff))));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  if ((frame_size & 0xfff) != frame_size)
+	    {
+	      insn = emit_insn (gen_add2_insn
+				(stack_pointer_rtx,
+				 GEN_INT ((frame_size
+					   & ~ (HOST_WIDE_INT) 0xfff))));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+        aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
+					     plus_constant (Pmode,
+							    stack_pointer_rtx,
+							    offset)));
+    }
+
+  emit_use (gen_rtx_REG (DImode, LR_REGNUM));
+  if (!for_sibcall)
+    emit_jump_insn (ret_rtx);
+}
+
+/* Return the place to copy the exception unwinding return address to.
+   This will probably be a stack slot, but could (in theory be the
+   return register).  */
+rtx
+aarch64_final_eh_return_addr (void)
+{
+  HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
+  aarch64_layout_frame ();
+  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
+  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
+		+ crtl->outgoing_args_size);
+  offset = frame_size = AARCH64_ROUND_UP (frame_size,
+					  STACK_BOUNDARY / BITS_PER_UNIT);
+  fp_offset = offset
+    - original_frame_size
+    - cfun->machine->frame.saved_regs_size;
+
+  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
+    return gen_rtx_REG (DImode, LR_REGNUM);
+
+  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
+     result in a store to save LR introduced by builtin_eh_return () being
+     incorrectly deleted because the alias is not detected.
+     So in the calculation of the address to copy the exception unwinding
+     return address to, we note 2 cases.
+     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
+     we return a SP-relative location since all the addresses are SP-relative
+     in this case.  This prevents the store from being optimized away.
+     If the fp_offset is not 0, then the addresses will be FP-relative and
+     therefore we return a FP-relative location.  */
+
+  if (frame_pointer_needed)
+    {
+      if (fp_offset)
+        return gen_frame_mem (DImode,
+			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+      else
+        return gen_frame_mem (DImode,
+			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
+    }
+
+  /* If FP is not needed, we calculate the location of LR, which would be
+     at the top of the saved registers block.  */
+
+  return gen_frame_mem (DImode,
+			plus_constant (Pmode,
+				       stack_pointer_rtx,
+				       fp_offset
+				       + cfun->machine->frame.saved_regs_size
+				       - 2 * UNITS_PER_WORD));
+}
+
+/* Output code to build up a constant in a register.  */
+static void
+aarch64_build_constant (int regnum, HOST_WIDE_INT val)
+{
+  if (aarch64_bitmask_imm (val, DImode))
+    emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+  else
+    {
+      int i;
+      int ncount = 0;
+      int zcount = 0;
+      HOST_WIDE_INT valp = val >> 16;
+      HOST_WIDE_INT valm;
+      HOST_WIDE_INT tval;
+
+      for (i = 16; i < 64; i += 16)
+	{
+	  valm = (valp & 0xffff);
+
+	  if (valm != 0)
+	    ++ zcount;
+
+	  if (valm != 0xffff)
+	    ++ ncount;
+
+	  valp >>= 16;
+	}
+
+      /* zcount contains the number of additional MOVK instructions
+	 required if the constant is built up with an initial MOVZ instruction,
+	 while ncount is the number of MOVK instructions required if starting
+	 with a MOVN instruction.  Choose the sequence that yields the fewest
+	 number of instructions, preferring MOVZ instructions when they are both
+	 the same.  */
+      if (ncount < zcount)
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
+			  GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
+	  tval = 0xffff;
+	}
+      else
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
+			  GEN_INT (val & 0xffff));
+	  tval = 0;
+	}
+
+      val >>= 16;
+
+      for (i = 16; i < 64; i += 16)
+	{
+	  if ((val & 0xffff) != tval)
+	    emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
+				       GEN_INT (i), GEN_INT (val & 0xffff)));
+	  val >>= 16;
+	}
+    }
+}
+
+static void
+aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
+{
+  HOST_WIDE_INT mdelta = delta;
+  rtx this_rtx = gen_rtx_REG (Pmode, regnum);
+  rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
+
+  if (mdelta < 0)
+    mdelta = -mdelta;
+
+  if (mdelta >= 4096 * 4096)
+    {
+      aarch64_build_constant (scratchreg, delta);
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
+    }
+  else if (mdelta > 0)
+    {
+      if (mdelta >= 4096)
+	{
+	  emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
+	  rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
+	  if (delta < 0)
+	    emit_insn (gen_rtx_SET (Pmode, this_rtx,
+				    gen_rtx_MINUS (Pmode, this_rtx, shift)));
+	  else
+	    emit_insn (gen_rtx_SET (Pmode, this_rtx,
+				    gen_rtx_PLUS (Pmode, this_rtx, shift)));
+	}
+      if (mdelta % 4096 != 0)
+	{
+	  scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
+	  emit_insn (gen_rtx_SET (Pmode, this_rtx,
+				  gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
+	}
+    }
+}
+
+/* Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+static void
+aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT delta,
+			 HOST_WIDE_INT vcall_offset,
+			 tree function)
+{
+  /* The this pointer is always in x0.  Note that this differs from
+     Arm where the this pointer maybe bumped to r1 if r0 is required
+     to return a pointer to an aggregate.  On AArch64 a result value
+     pointer will be in x8.  */
+  int this_regno = R0_REGNUM;
+  rtx this_rtx, temp0, temp1, addr, insn, funexp;
+
+  reload_completed = 1;
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  if (vcall_offset == 0)
+    aarch64_add_constant (this_regno, IP1_REGNUM, delta);
+  else
+    {
+      gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
+
+      this_rtx = gen_rtx_REG (Pmode, this_regno);
+      temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
+      temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
+
+      addr = this_rtx;
+      if (delta != 0)
+	{
+	  if (delta >= -256 && delta < 256)
+	    addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
+				       plus_constant (Pmode, this_rtx, delta));
+	  else
+	    aarch64_add_constant (this_regno, IP1_REGNUM, delta);
+	}
+
+      if (Pmode == ptr_mode)
+	aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
+      else
+	aarch64_emit_move (temp0,
+			   gen_rtx_ZERO_EXTEND (Pmode,
+						gen_rtx_MEM (ptr_mode, addr)));
+
+      if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
+	  addr = plus_constant (Pmode, temp0, vcall_offset);
+      else
+	{
+	  aarch64_build_constant (IP1_REGNUM, vcall_offset);
+	  addr = gen_rtx_PLUS (Pmode, temp0, temp1);
+	}
+
+      if (Pmode == ptr_mode)
+	aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
+      else
+	aarch64_emit_move (temp1,
+			   gen_rtx_SIGN_EXTEND (Pmode,
+						gen_rtx_MEM (ptr_mode, addr)));
+
+      emit_insn (gen_add2_insn (this_rtx, temp1));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
+  SIBLING_CALL_P (insn) = 1;
+
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Stop pretending to be a post-reload pass.  */
+  reload_completed = 0;
+}
+
+static int
+aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+     TLS offsets, not real symbol references.  */
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_TLS)
+    return -1;
+
+  return 0;
+}
+
+static bool
+aarch64_tls_referenced_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
+}
+
+
+static int
+aarch64_bitmasks_cmp (const void *i1, const void *i2)
+{
+  const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
+  const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
+
+  if (*imm1 < *imm2)
+    return -1;
+  if (*imm1 > *imm2)
+    return +1;
+  return 0;
+}
+
+
+static void
+aarch64_build_bitmask_table (void)
+{
+  unsigned HOST_WIDE_INT mask, imm;
+  unsigned int log_e, e, s, r;
+  unsigned int nimms = 0;
+
+  for (log_e = 1; log_e <= 6; log_e++)
+    {
+      e = 1 << log_e;
+      if (e == 64)
+	mask = ~(HOST_WIDE_INT) 0;
+      else
+	mask = ((HOST_WIDE_INT) 1 << e) - 1;
+      for (s = 1; s < e; s++)
+	{
+	  for (r = 0; r < e; r++)
+	    {
+	      /* set s consecutive bits to 1 (s < 64) */
+	      imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
+	      /* rotate right by r */
+	      if (r != 0)
+		imm = ((imm >> r) | (imm << (e - r))) & mask;
+	      /* replicate the constant depending on SIMD size */
+	      switch (log_e) {
+	      case 1: imm |= (imm <<  2);
+	      case 2: imm |= (imm <<  4);
+	      case 3: imm |= (imm <<  8);
+	      case 4: imm |= (imm << 16);
+	      case 5: imm |= (imm << 32);
+	      case 6:
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	      gcc_assert (nimms < AARCH64_NUM_BITMASKS);
+	      aarch64_bitmasks[nimms++] = imm;
+	    }
+	}
+    }
+
+  gcc_assert (nimms == AARCH64_NUM_BITMASKS);
+  qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
+	 aarch64_bitmasks_cmp);
+}
+
+
+/* Return true if val can be encoded as a 12-bit unsigned immediate with
+   a left shift of 0 or 12 bits.  */
+bool
+aarch64_uimm12_shift (HOST_WIDE_INT val)
+{
+  return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
+	  || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
+	  );
+}
+
+
+/* Return true if val is an immediate that can be loaded into a
+   register by a MOVZ instruction.  */
+static bool
+aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) > 4)
+    {
+      if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
+	  || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
+	return 1;
+    }
+  else
+    {
+      /* Ignore sign extension.  */
+      val &= (HOST_WIDE_INT) 0xffffffff;
+    }
+  return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
+	  || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
+}
+
+
+/* Return true if val is a valid bitmask immediate.  */
+bool
+aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) < 8)
+    {
+      /* Replicate bit pattern.  */
+      val &= (HOST_WIDE_INT) 0xffffffff;
+      val |= val << 32;
+    }
+  return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
+		  sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
+}
+
+
+/* Return true if val is an immediate that can be loaded into a
+   register in a single instruction.  */
+bool
+aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
+{
+  if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
+    return 1;
+  return aarch64_bitmask_imm (val, mode);
+}
+
+static bool
+aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  rtx base, offset;
+
+  if (GET_CODE (x) == HIGH)
+    return true;
+
+  split_const (x, &base, &offset);
+  if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
+    {
+      if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
+	  != SYMBOL_FORCE_TO_MEM)
+	return true;
+      else
+	/* Avoid generating a 64-bit relocation in ILP32; leave
+	   to aarch64_expand_mov_immediate to handle it properly.  */
+	return mode != ptr_mode;
+    }
+
+  return aarch64_tls_referenced_p (x);
+}
+
+/* Return true if register REGNO is a valid index register.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+bool
+aarch64_regno_ok_for_index_p (int regno, bool strict_p)
+{
+  if (!HARD_REGISTER_NUM_P (regno))
+    {
+      if (!strict_p)
+	return true;
+
+      if (!reg_renumber)
+	return false;
+
+      regno = reg_renumber[regno];
+    }
+  return GP_REGNUM_P (regno);
+}
+
+/* Return true if register REGNO is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+bool
+aarch64_regno_ok_for_base_p (int regno, bool strict_p)
+{
+  if (!HARD_REGISTER_NUM_P (regno))
+    {
+      if (!strict_p)
+	return true;
+
+      if (!reg_renumber)
+	return false;
+
+      regno = reg_renumber[regno];
+    }
+
+  /* The fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  return (GP_REGNUM_P (regno)
+	  || regno == SP_REGNUM
+	  || regno == FRAME_POINTER_REGNUM
+	  || regno == ARG_POINTER_REGNUM);
+}
+
+/* Return true if X is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+static bool
+aarch64_base_register_rtx_p (rtx x, bool strict_p)
+{
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
+}
+
+/* Return true if address offset is a valid index.  If it is, fill in INFO
+   appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+static bool
+aarch64_classify_index (struct aarch64_address_info *info, rtx x,
+			enum machine_mode mode, bool strict_p)
+{
+  enum aarch64_address_type type;
+  rtx index;
+  int shift;
+
+  /* (reg:P) */
+  if ((REG_P (x) || GET_CODE (x) == SUBREG)
+      && GET_MODE (x) == Pmode)
+    {
+      type = ADDRESS_REG_REG;
+      index = x;
+      shift = 0;
+    }
+  /* (sign_extend:DI (reg:SI)) */
+  else if ((GET_CODE (x) == SIGN_EXTEND
+	    || GET_CODE (x) == ZERO_EXTEND)
+	   && GET_MODE (x) == DImode
+	   && GET_MODE (XEXP (x, 0)) == SImode)
+    {
+      type = (GET_CODE (x) == SIGN_EXTEND)
+	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
+      index = XEXP (x, 0);
+      shift = 0;
+    }
+  /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
+  else if (GET_CODE (x) == MULT
+	   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
+	   && GET_MODE (XEXP (x, 0)) == DImode
+	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
+      index = XEXP (XEXP (x, 0), 0);
+      shift = exact_log2 (INTVAL (XEXP (x, 1)));
+    }
+  /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
+  else if (GET_CODE (x) == ASHIFT
+	   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
+	   && GET_MODE (XEXP (x, 0)) == DImode
+	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
+      index = XEXP (XEXP (x, 0), 0);
+      shift = INTVAL (XEXP (x, 1));
+    }
+  /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
+  else if ((GET_CODE (x) == SIGN_EXTRACT
+	    || GET_CODE (x) == ZERO_EXTRACT)
+	   && GET_MODE (x) == DImode
+	   && GET_CODE (XEXP (x, 0)) == MULT
+	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
+	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      type = (GET_CODE (x) == SIGN_EXTRACT)
+	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
+      index = XEXP (XEXP (x, 0), 0);
+      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
+      if (INTVAL (XEXP (x, 1)) != 32 + shift
+	  || INTVAL (XEXP (x, 2)) != 0)
+	shift = -1;
+    }
+  /* (and:DI (mult:DI (reg:DI) (const_int scale))
+     (const_int 0xffffffff<<shift)) */
+  else if (GET_CODE (x) == AND
+	   && GET_MODE (x) == DImode
+	   && GET_CODE (XEXP (x, 0)) == MULT
+	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
+	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      type = ADDRESS_REG_UXTW;
+      index = XEXP (XEXP (x, 0), 0);
+      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
+      if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
+	shift = -1;
+    }
+  /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
+  else if ((GET_CODE (x) == SIGN_EXTRACT
+	    || GET_CODE (x) == ZERO_EXTRACT)
+	   && GET_MODE (x) == DImode
+	   && GET_CODE (XEXP (x, 0)) == ASHIFT
+	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
+	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      type = (GET_CODE (x) == SIGN_EXTRACT)
+	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
+      index = XEXP (XEXP (x, 0), 0);
+      shift = INTVAL (XEXP (XEXP (x, 0), 1));
+      if (INTVAL (XEXP (x, 1)) != 32 + shift
+	  || INTVAL (XEXP (x, 2)) != 0)
+	shift = -1;
+    }
+  /* (and:DI (ashift:DI (reg:DI) (const_int shift))
+     (const_int 0xffffffff<<shift)) */
+  else if (GET_CODE (x) == AND
+	   && GET_MODE (x) == DImode
+	   && GET_CODE (XEXP (x, 0)) == ASHIFT
+	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
+	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      type = ADDRESS_REG_UXTW;
+      index = XEXP (XEXP (x, 0), 0);
+      shift = INTVAL (XEXP (XEXP (x, 0), 1));
+      if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
+	shift = -1;
+    }
+  /* (mult:P (reg:P) (const_int scale)) */
+  else if (GET_CODE (x) == MULT
+	   && GET_MODE (x) == Pmode
+	   && GET_MODE (XEXP (x, 0)) == Pmode
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      type = ADDRESS_REG_REG;
+      index = XEXP (x, 0);
+      shift = exact_log2 (INTVAL (XEXP (x, 1)));
+    }
+  /* (ashift:P (reg:P) (const_int shift)) */
+  else if (GET_CODE (x) == ASHIFT
+	   && GET_MODE (x) == Pmode
+	   && GET_MODE (XEXP (x, 0)) == Pmode
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      type = ADDRESS_REG_REG;
+      index = XEXP (x, 0);
+      shift = INTVAL (XEXP (x, 1));
+    }
+  else
+    return false;
+
+  if (GET_CODE (index) == SUBREG)
+    index = SUBREG_REG (index);
+
+  if ((shift == 0 ||
+       (shift > 0 && shift <= 3
+	&& (1 << shift) == GET_MODE_SIZE (mode)))
+      && REG_P (index)
+      && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
+    {
+      info->type = type;
+      info->offset = index;
+      info->shift = shift;
+      return true;
+    }
+
+  return false;
+}
+
+static inline bool
+offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
+{
+  return (offset >= -64 * GET_MODE_SIZE (mode)
+	  && offset < 64 * GET_MODE_SIZE (mode)
+	  && offset % GET_MODE_SIZE (mode) == 0);
+}
+
+static inline bool
+offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT offset)
+{
+  return offset >= -256 && offset < 256;
+}
+
+static inline bool
+offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
+{
+  return (offset >= 0
+	  && offset < 4096 * GET_MODE_SIZE (mode)
+	  && offset % GET_MODE_SIZE (mode) == 0);
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
+   effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
+
+static bool
+aarch64_classify_address (struct aarch64_address_info *info,
+			  rtx x, enum machine_mode mode,
+			  RTX_CODE outer_code, bool strict_p)
+{
+  enum rtx_code code = GET_CODE (x);
+  rtx op0, op1;
+  bool allow_reg_index_p =
+    outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
+
+  /* Don't support anything other than POST_INC or REG addressing for
+     AdvSIMD.  */
+  if (aarch64_vector_mode_p (mode)
+      && (code != POST_INC && code != REG))
+    return false;
+
+  switch (code)
+    {
+    case REG:
+    case SUBREG:
+      info->type = ADDRESS_REG_IMM;
+      info->base = x;
+      info->offset = const0_rtx;
+      return aarch64_base_register_rtx_p (x, strict_p);
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_MODE_SIZE (mode) != 0
+	  && CONST_INT_P (op1)
+	  && aarch64_base_register_rtx_p (op0, strict_p))
+	{
+	  HOST_WIDE_INT offset = INTVAL (op1);
+
+	  info->type = ADDRESS_REG_IMM;
+	  info->base = op0;
+	  info->offset = op1;
+
+	  /* TImode and TFmode values are allowed in both pairs of X
+	     registers and individual Q registers.  The available
+	     address modes are:
+	     X,X: 7-bit signed scaled offset
+	     Q:   9-bit signed offset
+	     We conservatively require an offset representable in either mode.
+	   */
+	  if (mode == TImode || mode == TFmode)
+	    return (offset_7bit_signed_scaled_p (mode, offset)
+		    && offset_9bit_signed_unscaled_p (mode, offset));
+
+	  if (outer_code == PARALLEL)
+	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+		    && offset_7bit_signed_scaled_p (mode, offset));
+	  else
+	    return (offset_9bit_signed_unscaled_p (mode, offset)
+		    || offset_12bit_unsigned_scaled_p (mode, offset));
+	}
+
+      if (allow_reg_index_p)
+	{
+	  /* Look for base + (scaled/extended) index register.  */
+	  if (aarch64_base_register_rtx_p (op0, strict_p)
+	      && aarch64_classify_index (info, op1, mode, strict_p))
+	    {
+	      info->base = op0;
+	      return true;
+	    }
+	  if (aarch64_base_register_rtx_p (op1, strict_p)
+	      && aarch64_classify_index (info, op0, mode, strict_p))
+	    {
+	      info->base = op1;
+	      return true;
+	    }
+	}
+
+      return false;
+
+    case POST_INC:
+    case POST_DEC:
+    case PRE_INC:
+    case PRE_DEC:
+      info->type = ADDRESS_REG_WB;
+      info->base = XEXP (x, 0);
+      info->offset = NULL_RTX;
+      return aarch64_base_register_rtx_p (info->base, strict_p);
+
+    case POST_MODIFY:
+    case PRE_MODIFY:
+      info->type = ADDRESS_REG_WB;
+      info->base = XEXP (x, 0);
+      if (GET_CODE (XEXP (x, 1)) == PLUS
+	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+	  && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
+	  && aarch64_base_register_rtx_p (info->base, strict_p))
+	{
+	  HOST_WIDE_INT offset;
+	  info->offset = XEXP (XEXP (x, 1), 1);
+	  offset = INTVAL (info->offset);
+
+	  /* TImode and TFmode values are allowed in both pairs of X
+	     registers and individual Q registers.  The available
+	     address modes are:
+	     X,X: 7-bit signed scaled offset
+	     Q:   9-bit signed offset
+	     We conservatively require an offset representable in either mode.
+	   */
+	  if (mode == TImode || mode == TFmode)
+	    return (offset_7bit_signed_scaled_p (mode, offset)
+		    && offset_9bit_signed_unscaled_p (mode, offset));
+
+	  if (outer_code == PARALLEL)
+	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+		    && offset_7bit_signed_scaled_p (mode, offset));
+	  else
+	    return offset_9bit_signed_unscaled_p (mode, offset);
+	}
+      return false;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      /* load literal: pc-relative constant pool entry.  Only supported
+         for SI mode or larger.  */
+      info->type = ADDRESS_SYMBOLIC;
+      if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
+	{
+	  rtx sym, addend;
+
+	  split_const (x, &sym, &addend);
+	  return (GET_CODE (sym) == LABEL_REF
+		  || (GET_CODE (sym) == SYMBOL_REF
+		      && CONSTANT_POOL_ADDRESS_P (sym)));
+	}
+      return false;
+
+    case LO_SUM:
+      info->type = ADDRESS_LO_SUM;
+      info->base = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      if (allow_reg_index_p
+	  && aarch64_base_register_rtx_p (info->base, strict_p))
+	{
+	  rtx sym, offs;
+	  split_const (info->offset, &sym, &offs);
+	  if (GET_CODE (sym) == SYMBOL_REF
+	      && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
+		  == SYMBOL_SMALL_ABSOLUTE))
+	    {
+	      /* The symbol and offset must be aligned to the access size.  */
+	      unsigned int align;
+	      unsigned int ref_size;
+
+	      if (CONSTANT_POOL_ADDRESS_P (sym))
+		align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
+	      else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
+		{
+		  tree exp = SYMBOL_REF_DECL (sym);
+		  align = TYPE_ALIGN (TREE_TYPE (exp));
+		  align = CONSTANT_ALIGNMENT (exp, align);
+		}
+	      else if (SYMBOL_REF_DECL (sym))
+		align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
+	      else
+		align = BITS_PER_UNIT;
+
+	      ref_size = GET_MODE_SIZE (mode);
+	      if (ref_size == 0)
+		ref_size = GET_MODE_SIZE (DImode);
+
+	      return ((INTVAL (offs) & (ref_size - 1)) == 0
+		      && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
+	    }
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+bool
+aarch64_symbolic_address_p (rtx x)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
+}
+
+/* Classify the base of symbolic expression X, given that X appears in
+   context CONTEXT.  */
+
+enum aarch64_symbol_type
+aarch64_classify_symbolic_expression (rtx x,
+				      enum aarch64_symbol_context context)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  return aarch64_classify_symbol (x, context);
+}
+
+
+/* Return TRUE if X is a legitimate address for accessing memory in
+   mode MODE.  */
+static bool
+aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  struct aarch64_address_info addr;
+
+  return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
+}
+
+/* Return TRUE if X is a legitimate address for accessing memory in
+   mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
+   pair operation.  */
+bool
+aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
+			      RTX_CODE outer_code, bool strict_p)
+{
+  struct aarch64_address_info addr;
+
+  return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
+}
+
+/* Return TRUE if rtx X is immediate constant 0.0 */
+bool
+aarch64_float_const_zero_rtx_p (rtx x)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (x) == VOIDmode)
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+  if (REAL_VALUE_MINUS_ZERO (r))
+    return !HONOR_SIGNED_ZEROS (GET_MODE (x));
+  return REAL_VALUES_EQUAL (r, dconst0);
+}
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CC_REGNUM;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+enum machine_mode
+aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+{
+  /* All floating point compares return CCFP if it is an equality
+     comparison, and CCFPE otherwise.  */
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      switch (code)
+	{
+	case EQ:
+	case NE:
+	case UNORDERED:
+	case ORDERED:
+	case UNLT:
+	case UNLE:
+	case UNGT:
+	case UNGE:
+	case UNEQ:
+	case LTGT:
+	  return CCFPmode;
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  return CCFPEmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+      && y == const0_rtx
+      && (code == EQ || code == NE || code == LT || code == GE)
+      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
+	  || GET_CODE (x) == NEG))
+    return CC_NZmode;
+
+  /* A compare with a shifted operand.  Because of canonicalization,
+     the comparison will have to be swapped when we emit the assembly
+     code.  */
+  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+      && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
+      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
+	  || GET_CODE (x) == LSHIFTRT
+	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
+    return CC_SWPmode;
+
+  /* Similarly for a negated operand, but we can only do this for
+     equalities.  */
+  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+      && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
+      && (code == EQ || code == NE)
+      && GET_CODE (x) == NEG)
+    return CC_Zmode;
+
+  /* A compare of a mode narrower than SI mode against zero can be done
+     by extending the value in the comparison.  */
+  if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
+      && y == const0_rtx)
+    /* Only use sign-extension if we really need it.  */
+    return ((code == GT || code == GE || code == LE || code == LT)
+	    ? CC_SESWPmode : CC_ZESWPmode);
+
+  /* For everything else, return CCmode.  */
+  return CCmode;
+}
+
+static unsigned
+aarch64_get_condition_code (rtx x)
+{
+  enum machine_mode mode = GET_MODE (XEXP (x, 0));
+  enum rtx_code comp_code = GET_CODE (x);
+
+  if (GET_MODE_CLASS (mode) != MODE_CC)
+    mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
+
+  switch (mode)
+    {
+    case CCFPmode:
+    case CCFPEmode:
+      switch (comp_code)
+	{
+	case GE: return AARCH64_GE;
+	case GT: return AARCH64_GT;
+	case LE: return AARCH64_LS;
+	case LT: return AARCH64_MI;
+	case NE: return AARCH64_NE;
+	case EQ: return AARCH64_EQ;
+	case ORDERED: return AARCH64_VC;
+	case UNORDERED: return AARCH64_VS;
+	case UNLT: return AARCH64_LT;
+	case UNLE: return AARCH64_LE;
+	case UNGT: return AARCH64_HI;
+	case UNGE: return AARCH64_PL;
+	default: gcc_unreachable ();
+	}
+      break;
+
+    case CCmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_NE;
+	case EQ: return AARCH64_EQ;
+	case GE: return AARCH64_GE;
+	case GT: return AARCH64_GT;
+	case LE: return AARCH64_LE;
+	case LT: return AARCH64_LT;
+	case GEU: return AARCH64_CS;
+	case GTU: return AARCH64_HI;
+	case LEU: return AARCH64_LS;
+	case LTU: return AARCH64_CC;
+	default: gcc_unreachable ();
+	}
+      break;
+
+    case CC_SWPmode:
+    case CC_ZESWPmode:
+    case CC_SESWPmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_NE;
+	case EQ: return AARCH64_EQ;
+	case GE: return AARCH64_LE;
+	case GT: return AARCH64_LT;
+	case LE: return AARCH64_GE;
+	case LT: return AARCH64_GT;
+	case GEU: return AARCH64_LS;
+	case GTU: return AARCH64_CC;
+	case LEU: return AARCH64_CS;
+	case LTU: return AARCH64_HI;
+	default: gcc_unreachable ();
+	}
+      break;
+
+    case CC_NZmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_NE;
+	case EQ: return AARCH64_EQ;
+	case GE: return AARCH64_PL;
+	case LT: return AARCH64_MI;
+	default: gcc_unreachable ();
+	}
+      break;
+
+    case CC_Zmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_NE;
+	case EQ: return AARCH64_EQ;
+	default: gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+static unsigned
+bit_count (unsigned HOST_WIDE_INT value)
+{
+  unsigned count = 0;
+
+  while (value)
+    {
+      count++;
+      value &= value - 1;
+    }
+
+  return count;
+}
+
+void
+aarch64_print_operand (FILE *f, rtx x, char code)
+{
+  switch (code)
+    {
+    /* An integer or symbol address without a preceding # sign.  */
+    case 'c':
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+	  break;
+
+	case SYMBOL_REF:
+	  output_addr_const (f, x);
+	  break;
+
+	case CONST:
+	  if (GET_CODE (XEXP (x, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
+	    {
+	      output_addr_const (f, x);
+	      break;
+	    }
+	  /* Fall through.  */
+
+	default:
+	  output_operand_lossage ("Unsupported operand for code '%c'", code);
+	}
+      break;
+
+    case 'e':
+      /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
+      {
+	int n;
+
+	if (GET_CODE (x) != CONST_INT
+	    || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
+	  {
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+
+	switch (n)
+	  {
+	  case 3:
+	    fputc ('b', f);
+	    break;
+	  case 4:
+	    fputc ('h', f);
+	    break;
+	  case 5:
+	    fputc ('w', f);
+	    break;
+	  default:
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+      }
+      break;
+
+    case 'p':
+      {
+	int n;
+
+	/* Print N such that 2^N == X.  */
+	if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
+	  {
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+
+	asm_fprintf (f, "%d", n);
+      }
+      break;
+
+    case 'P':
+      /* Print the number of non-zero bits in X (a const_int).  */
+      if (GET_CODE (x) != CONST_INT)
+	{
+	  output_operand_lossage ("invalid operand for '%%%c'", code);
+	  return;
+	}
+
+      asm_fprintf (f, "%u", bit_count (INTVAL (x)));
+      break;
+
+    case 'H':
+      /* Print the higher numbered register of a pair (TImode) of regs.  */
+      if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
+	{
+	  output_operand_lossage ("invalid operand for '%%%c'", code);
+	  return;
+	}
+
+      asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
+      break;
+
+    case 'm':
+      /* Print a condition (eq, ne, etc).  */
+
+      /* CONST_TRUE_RTX means always -- that's the default.  */
+      if (x == const_true_rtx)
+	return;
+
+      if (!COMPARISON_P (x))
+	{
+	  output_operand_lossage ("invalid operand for '%%%c'", code);
+	  return;
+	}
+
+      fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
+      break;
+
+    case 'M':
+      /* Print the inverse of a condition (eq <-> ne, etc).  */
+
+      /* CONST_TRUE_RTX means never -- that's the default.  */
+      if (x == const_true_rtx)
+	{
+	  fputs ("nv", f);
+	  return;
+	}
+
+      if (!COMPARISON_P (x))
+	{
+	  output_operand_lossage ("invalid operand for '%%%c'", code);
+	  return;
+	}
+
+      fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
+				  (aarch64_get_condition_code (x))], f);
+      break;
+
+    case 'b':
+    case 'h':
+    case 's':
+    case 'd':
+    case 'q':
+      /* Print a scalar FP/SIMD register name.  */
+      if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
+	{
+	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
+	  return;
+	}
+      asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
+      break;
+
+    case 'S':
+    case 'T':
+    case 'U':
+    case 'V':
+      /* Print the first FP/SIMD register name in a list.  */
+      if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
+	{
+	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
+	  return;
+	}
+      asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
+      break;
+
+    case 'X':
+      /* Print bottom 16 bits of integer constant in hex.  */
+      if (GET_CODE (x) != CONST_INT)
+	{
+	  output_operand_lossage ("invalid operand for '%%%c'", code);
+	  return;
+	}
+      asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
+      break;
+
+    case 'w':
+    case 'x':
+      /* Print a general register name or the zero register (32-bit or
+         64-bit).  */
+      if (x == const0_rtx
+	  || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
+	{
+	  asm_fprintf (f, "%czr", code);
+	  break;
+	}
+
+      if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
+	{
+	  asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
+	  break;
+	}
+
+      if (REG_P (x) && REGNO (x) == SP_REGNUM)
+	{
+	  asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
+	  break;
+	}
+
+      /* Fall through */
+
+    case 0:
+      /* Print a normal operand, if it's a general register, then we
+	 assume DImode.  */
+      if (x == NULL)
+	{
+	  output_operand_lossage ("missing operand");
+	  return;
+	}
+
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  asm_fprintf (f, "%s", reg_names [REGNO (x)]);
+	  break;
+
+	case MEM:
+	  aarch64_memory_reference_mode = GET_MODE (x);
+	  output_address (XEXP (x, 0));
+	  break;
+
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  output_addr_const (asm_out_file, x);
+	  break;
+
+	case CONST_INT:
+	  asm_fprintf (f, "%wd", INTVAL (x));
+	  break;
+
+	case CONST_VECTOR:
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
+	    {
+	      gcc_assert (aarch64_const_vec_all_same_int_p (x,
+							    HOST_WIDE_INT_MIN,
+							    HOST_WIDE_INT_MAX));
+	      asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
+	    }
+	  else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
+	    {
+	      fputc ('0', f);
+	    }
+	  else
+	    gcc_unreachable ();
+	  break;
+
+	case CONST_DOUBLE:
+	  /* CONST_DOUBLE can represent a double-width integer.
+	     In this case, the mode of x is VOIDmode.  */
+	  if (GET_MODE (x) == VOIDmode)
+	    ; /* Do Nothing.  */
+	  else if (aarch64_float_const_zero_rtx_p (x))
+	    {
+	      fputc ('0', f);
+	      break;
+	    }
+	  else if (aarch64_float_const_representable_p (x))
+	    {
+#define buf_size 20
+	      char float_buf[buf_size] = {'\0'};
+	      REAL_VALUE_TYPE r;
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	      real_to_decimal_for_mode (float_buf, &r,
+					buf_size, buf_size,
+					1, GET_MODE (x));
+	      asm_fprintf (asm_out_file, "%s", float_buf);
+	      break;
+#undef buf_size
+	    }
+	  output_operand_lossage ("invalid constant");
+	  return;
+	default:
+	  output_operand_lossage ("invalid operand");
+	  return;
+	}
+      break;
+
+    case 'A':
+      if (GET_CODE (x) == HIGH)
+	x = XEXP (x, 0);
+
+      switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
+	{
+	case SYMBOL_SMALL_GOT:
+	  asm_fprintf (asm_out_file, ":got:");
+	  break;
+
+	case SYMBOL_SMALL_TLSGD:
+	  asm_fprintf (asm_out_file, ":tlsgd:");
+	  break;
+
+	case SYMBOL_SMALL_TLSDESC:
+	  asm_fprintf (asm_out_file, ":tlsdesc:");
+	  break;
+
+	case SYMBOL_SMALL_GOTTPREL:
+	  asm_fprintf (asm_out_file, ":gottprel:");
+	  break;
+
+	case SYMBOL_SMALL_TPREL:
+	  asm_fprintf (asm_out_file, ":tprel:");
+	  break;
+
+	case SYMBOL_TINY_GOT:
+	  gcc_unreachable ();
+	  break;
+
+	default:
+	  break;
+	}
+      output_addr_const (asm_out_file, x);
+      break;
+
+    case 'L':
+      switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
+	{
+	case SYMBOL_SMALL_GOT:
+	  asm_fprintf (asm_out_file, ":lo12:");
+	  break;
+
+	case SYMBOL_SMALL_TLSGD:
+	  asm_fprintf (asm_out_file, ":tlsgd_lo12:");
+	  break;
+
+	case SYMBOL_SMALL_TLSDESC:
+	  asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
+	  break;
+
+	case SYMBOL_SMALL_GOTTPREL:
+	  asm_fprintf (asm_out_file, ":gottprel_lo12:");
+	  break;
+
+	case SYMBOL_SMALL_TPREL:
+	  asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
+	  break;
+
+	case SYMBOL_TINY_GOT:
+	  asm_fprintf (asm_out_file, ":got:");
+	  break;
+
+	default:
+	  break;
+	}
+      output_addr_const (asm_out_file, x);
+      break;
+
+    case 'G':
+
+      switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
+	{
+	case SYMBOL_SMALL_TPREL:
+	  asm_fprintf (asm_out_file, ":tprel_hi12:");
+	  break;
+	default:
+	  break;
+	}
+      output_addr_const (asm_out_file, x);
+      break;
+
+    default:
+      output_operand_lossage ("invalid operand prefix '%%%c'", code);
+      return;
+    }
+}
+
+void
+aarch64_print_operand_address (FILE *f, rtx x)
+{
+  struct aarch64_address_info addr;
+
+  if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
+			     MEM, true))
+    switch (addr.type)
+      {
+      case ADDRESS_REG_IMM:
+	if (addr.offset == const0_rtx)
+	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
+	else
+	  asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
+		       INTVAL (addr.offset));
+	return;
+
+      case ADDRESS_REG_REG:
+	if (addr.shift == 0)
+	  asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
+		       reg_names [REGNO (addr.offset)]);
+	else
+	  asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
+		       reg_names [REGNO (addr.offset)], addr.shift);
+	return;
+
+      case ADDRESS_REG_UXTW:
+	if (addr.shift == 0)
+	  asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
+		       REGNO (addr.offset) - R0_REGNUM);
+	else
+	  asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
+		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
+	return;
+
+      case ADDRESS_REG_SXTW:
+	if (addr.shift == 0)
+	  asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
+		       REGNO (addr.offset) - R0_REGNUM);
+	else
+	  asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
+		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
+	return;
+
+      case ADDRESS_REG_WB:
+	switch (GET_CODE (x))
+	  {
+	  case PRE_INC:
+	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], 
+			 GET_MODE_SIZE (aarch64_memory_reference_mode));
+	    return;
+	  case POST_INC:
+	    asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
+			 GET_MODE_SIZE (aarch64_memory_reference_mode));
+	    return;
+	  case PRE_DEC:
+	    asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
+			 GET_MODE_SIZE (aarch64_memory_reference_mode));
+	    return;
+	  case POST_DEC:
+	    asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
+			 GET_MODE_SIZE (aarch64_memory_reference_mode));
+	    return;
+	  case PRE_MODIFY:
+	    asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
+			 INTVAL (addr.offset));
+	    return;
+	  case POST_MODIFY:
+	    asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
+			 INTVAL (addr.offset));
+	    return;
+	  default:
+	    break;
+	  }
+	break;
+
+      case ADDRESS_LO_SUM:
+	asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
+	output_addr_const (f, addr.offset);
+	asm_fprintf (f, "]");
+	return;
+
+      case ADDRESS_SYMBOLIC:
+	break;
+      }
+
+  output_addr_const (f, x);
+}
+
+bool
+aarch64_label_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == LABEL_REF)
+    return true;
+
+  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
+     referencing instruction, but they are constant offsets, not
+     symbols.  */
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return false;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Implement REGNO_REG_CLASS.  */
+
+enum reg_class
+aarch64_regno_regclass (unsigned regno)
+{
+  if (GP_REGNUM_P (regno))
+    return CORE_REGS;
+
+  if (regno == SP_REGNUM)
+    return STACK_REG;
+
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return POINTER_REGS;
+
+  if (FP_REGNUM_P (regno))
+    return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
+
+  return NO_REGS;
+}
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and return the new rtx.  */
+
+rtx
+aarch64_legitimize_reload_address (rtx *x_p,
+				   enum machine_mode mode,
+				   int opnum, int type,
+				   int ind_levels ATTRIBUTE_UNUSED)
+{
+  rtx x = *x_p;
+
+  /* Do not allow mem (plus (reg, const)) if vector mode.  */
+  if (aarch64_vector_mode_p (mode)
+      && GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      rtx orig_rtx = x;
+      x = copy_rtx (x);
+      push_reload (orig_rtx, NULL_RTX, x_p, NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  /* We wish to handle large displacements off a base register by splitting
+     the addend across an add and the mem insn.  This can cut the number of
+     extra insns needed from 3 to 1.  It is only useful for load/store of a
+     single register with 12 bit offset field.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1))
+      && HARD_REGISTER_P (XEXP (x, 0))
+      && mode != TImode
+      && mode != TFmode
+      && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT low = val & 0xfff;
+      HOST_WIDE_INT high = val - low;
+      HOST_WIDE_INT offs;
+      rtx cst;
+      enum machine_mode xmode = GET_MODE (x);
+
+      /* In ILP32, xmode can be either DImode or SImode.  */
+      gcc_assert (xmode == DImode || xmode == SImode);
+
+      /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
+	 BLKmode alignment.  */
+      if (GET_MODE_SIZE (mode) == 0)
+	return NULL_RTX;
+
+      offs = low % GET_MODE_SIZE (mode);
+
+      /* Align misaligned offset by adjusting high part to compensate.  */
+      if (offs != 0)
+	{
+	  if (aarch64_uimm12_shift (high + offs))
+	    {
+	      /* Align down.  */
+	      low = low - offs;
+	      high = high + offs;
+	    }
+	  else
+	    {
+	      /* Align up.  */
+	      offs = GET_MODE_SIZE (mode) - offs;
+	      low = low + offs;
+	      high = high + (low & 0x1000) - offs;
+	      low &= 0xfff;
+	    }
+	}
+
+      /* Check for overflow.  */
+      if (high + low != val)
+	return NULL_RTX;
+
+      cst = GEN_INT (high);
+      if (!aarch64_uimm12_shift (high))
+	cst = force_const_mem (xmode, cst);
+
+      /* Reload high part into base reg, leaving the low part
+	 in the mem instruction.
+	 Note that replacing this gen_rtx_PLUS with plus_constant is
+	 wrong in this case because we rely on the
+	 (plus (plus reg c1) c2) structure being preserved so that
+	 XEXP (*p, 0) in push_reload below uses the correct term.  */
+      x = gen_rtx_PLUS (xmode,
+			gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
+			GEN_INT (low));
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  return NULL_RTX;
+}
+
+
+static reg_class_t
+aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
+			  reg_class_t rclass,
+			  enum machine_mode mode,
+			  secondary_reload_info *sri)
+{
+  /* Without the TARGET_SIMD instructions we cannot move a Q register
+     to a Q register directly.  We need a scratch.  */
+  if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
+      && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
+      && reg_class_subset_p (rclass, FP_REGS))
+    {
+      if (mode == TFmode)
+        sri->icode = CODE_FOR_aarch64_reload_movtf;
+      else if (mode == TImode)
+        sri->icode = CODE_FOR_aarch64_reload_movti;
+      return NO_REGS;
+    }
+
+  /* A TFmode or TImode memory access should be handled via an FP_REGS
+     because AArch64 has richer addressing modes for LDR/STR instructions
+     than LDP/STP instructions.  */
+  if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
+      && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
+    return FP_REGS;
+
+  if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
+      return CORE_REGS;
+
+  return NO_REGS;
+}
+
+static bool
+aarch64_can_eliminate (const int from, const int to)
+{
+  /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
+     HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
+
+  if (frame_pointer_needed)
+    {
+      if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+	return true;
+      if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+	return false;
+      if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+	  && !cfun->calls_alloca)
+	return true;
+      if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+	return true;
+    return false;
+    }
+  else
+    {
+      /* If we decided that we didn't need a leaf frame pointer but then used
+	 LR in the function, then we'll want a frame pointer after all, so
+	 prevent this elimination to ensure a frame pointer is used.
+
+	 NOTE: the original value of flag_omit_frame_pointer gets trashed
+	 IFF flag_omit_leaf_frame_pointer is true, so we check the value
+	 of faked_omit_frame_pointer here (which is true when we always
+	 wish to keep non-leaf frame pointers but only wish to keep leaf frame
+	 pointers when LR is clobbered).  */
+      if (to == STACK_POINTER_REGNUM
+	  && df_regs_ever_live_p (LR_REGNUM)
+	  && faked_omit_frame_pointer)
+	return false;
+    }
+
+  return true;
+}
+
+HOST_WIDE_INT
+aarch64_initial_elimination_offset (unsigned from, unsigned to)
+{
+  HOST_WIDE_INT frame_size;
+  HOST_WIDE_INT offset;
+
+  aarch64_layout_frame ();
+  frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
+		+ crtl->outgoing_args_size
+		+ cfun->machine->saved_varargs_size);
+
+   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
+   offset = frame_size;
+
+   if (to == HARD_FRAME_POINTER_REGNUM)
+     {
+       if (from == ARG_POINTER_REGNUM)
+	 return offset - crtl->outgoing_args_size;
+
+       if (from == FRAME_POINTER_REGNUM)
+	 return cfun->machine->frame.saved_regs_size + get_frame_size ();
+     }
+
+   if (to == STACK_POINTER_REGNUM)
+     {
+       if (from == FRAME_POINTER_REGNUM)
+         {
+           HOST_WIDE_INT elim = crtl->outgoing_args_size
+                              + cfun->machine->frame.saved_regs_size
+                              + get_frame_size ()
+                              - cfun->machine->frame.fp_lr_offset;
+           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
+           return elim;
+         }
+     }
+
+   return offset;
+}
+
+
+/* Implement RETURN_ADDR_RTX.  We do not support moving back to a
+   previous frame.  */
+
+rtx
+aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
+}
+
+
+static void
+aarch64_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_ILP32)
+    {
+      asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
+      asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
+    }
+  else
+    {
+      asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
+      asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
+    }
+  asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
+  assemble_aligned_integer (4, const0_rtx);
+  assemble_aligned_integer (POINTER_BYTES, const0_rtx);
+  assemble_aligned_integer (POINTER_BYTES, const0_rtx);
+}
+
+static void
+aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr, mem, a_tramp;
+  const int tramp_code_sz = 16;
+
+  /* Don't need to copy the trailing D-words, we fill those in below.  */
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
+  mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  if (GET_MODE (fnaddr) != ptr_mode)
+    fnaddr = convert_memory_address (ptr_mode, fnaddr);
+  emit_move_insn (mem, fnaddr);
+
+  mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
+  emit_move_insn (mem, chain_value);
+
+  /* XXX We should really define a "clear_cache" pattern and use
+     gen_clear_cache().  */
+  a_tramp = XEXP (m_tramp, 0);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
+		     plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
+		     ptr_mode);
+}
+
+static unsigned char
+aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
+{
+  switch (regclass)
+    {
+    case CORE_REGS:
+    case POINTER_REGS:
+    case GENERAL_REGS:
+    case ALL_REGS:
+    case FP_REGS:
+    case FP_LO_REGS:
+      return
+	aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
+				       (GET_MODE_SIZE (mode) + 7) / 8;
+    case STACK_REG:
+      return 1;
+
+    case NO_REGS:
+      return 0;
+
+    default:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+static reg_class_t
+aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
+{
+  if (regclass == POINTER_REGS)
+    return GENERAL_REGS;
+
+  if (regclass == STACK_REG)
+    {
+      if (REG_P(x)
+	  && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
+	  return regclass;
+
+      return NO_REGS;
+    }
+
+  /* If it's an integer immediate that MOVI can't handle, then
+     FP_REGS is not an option, so we return NO_REGS instead.  */
+  if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
+      && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
+    return NO_REGS;
+
+  /* Register eliminiation can result in a request for
+     SP+constant->FP_REGS.  We cannot support such operations which
+     use SP as source and an FP_REG as destination, so reject out
+     right now.  */
+  if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
+    {
+      rtx lhs = XEXP (x, 0);
+
+      /* Look through a possible SUBREG introduced by ILP32.  */
+      if (GET_CODE (lhs) == SUBREG)
+	lhs = SUBREG_REG (lhs);
+
+      gcc_assert (REG_P (lhs));
+      gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
+				      POINTER_REGS));
+      return NO_REGS;
+    }
+
+  return regclass;
+}
+
+void
+aarch64_asm_output_labelref (FILE* f, const char *name)
+{
+  asm_fprintf (f, "%U%s", name);
+}
+
+static void
+aarch64_elf_asm_constructor (rtx symbol, int priority)
+{
+  if (priority == DEFAULT_INIT_PRIORITY)
+    default_ctor_section_asm_out_constructor (symbol, priority);
+  else
+    {
+      section *s;
+      char buf[18];
+      snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
+      s = get_section (buf, SECTION_WRITE, NULL);
+      switch_to_section (s);
+      assemble_align (POINTER_SIZE);
+      assemble_aligned_integer (POINTER_BYTES, symbol);
+    }
+}
+
+static void
+aarch64_elf_asm_destructor (rtx symbol, int priority)
+{
+  if (priority == DEFAULT_INIT_PRIORITY)
+    default_dtor_section_asm_out_destructor (symbol, priority);
+  else
+    {
+      section *s;
+      char buf[18];
+      snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
+      s = get_section (buf, SECTION_WRITE, NULL);
+      switch_to_section (s);
+      assemble_align (POINTER_SIZE);
+      assemble_aligned_integer (POINTER_BYTES, symbol);
+    }
+}
+
+const char*
+aarch64_output_casesi (rtx *operands)
+{
+  char buf[100];
+  char label[100];
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+  int index;
+  static const char *const patterns[4][2] =
+  {
+    {
+      "ldrb\t%w3, [%0,%w1,uxtw]",
+      "add\t%3, %4, %w3, sxtb #2"
+    },
+    {
+      "ldrh\t%w3, [%0,%w1,uxtw #1]",
+      "add\t%3, %4, %w3, sxth #2"
+    },
+    {
+      "ldr\t%w3, [%0,%w1,uxtw #2]",
+      "add\t%3, %4, %w3, sxtw #2"
+    },
+    /* We assume that DImode is only generated when not optimizing and
+       that we don't really need 64-bit address offsets.  That would
+       imply an object file with 8GB of code in a single function!  */
+    {
+      "ldr\t%w3, [%0,%w1,uxtw #2]",
+      "add\t%3, %4, %w3, sxtw #2"
+    }
+  };
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
+
+  gcc_assert (index >= 0 && index <= 3);
+
+  /* Need to implement table size reduction, by chaning the code below.  */
+  output_asm_insn (patterns[index][0], operands);
+  ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
+  snprintf (buf, sizeof (buf),
+	    "adr\t%%4, %s", targetm.strip_name_encoding (label));
+  output_asm_insn (buf, operands);
+  output_asm_insn (patterns[index][1], operands);
+  output_asm_insn ("br\t%3", operands);
+  assemble_label (asm_out_file, label);
+  return "";
+}
+
+
+/* Return size in bits of an arithmetic operand which is shifted/scaled and
+   masked such that it is suitable for a UXTB, UXTH, or UXTW extend
+   operator.  */
+
+int
+aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
+{
+  if (shift >= 0 && shift <= 3)
+    {
+      int size;
+      for (size = 8; size <= 32; size *= 2)
+	{
+	  HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
+	  if (mask == bits << shift)
+	    return size;
+	}
+    }
+  return 0;
+}
+
+static bool
+aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				   const_rtx x ATTRIBUTE_UNUSED)
+{
+  /* We can't use blocks for constants when we're using a per-function
+     constant pool.  */
+  return false;
+}
+
+static section *
+aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    rtx x ATTRIBUTE_UNUSED,
+			    unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  /* Force all constant pool entries into the current function section.  */
+  return function_section (current_function_decl);
+}
+
+
+/* Costs.  */
+
+/* Helper function for rtx cost calculation.  Strip a shift expression
+   from X.  Returns the inner operand if successful, or the original
+   expression on failure.  */
+static rtx
+aarch64_strip_shift (rtx x)
+{
+  rtx op = x;
+
+  if ((GET_CODE (op) == ASHIFT
+       || GET_CODE (op) == ASHIFTRT
+       || GET_CODE (op) == LSHIFTRT)
+      && CONST_INT_P (XEXP (op, 1)))
+    return XEXP (op, 0);
+
+  if (GET_CODE (op) == MULT
+      && CONST_INT_P (XEXP (op, 1))
+      && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
+    return XEXP (op, 0);
+
+  return x;
+}
+
+/* Helper function for rtx cost calculation.  Strip a shift or extend
+   expression from X.  Returns the inner operand if successful, or the
+   original expression on failure.  We deal with a number of possible
+   canonicalization variations here.  */
+static rtx
+aarch64_strip_shift_or_extend (rtx x)
+{
+  rtx op = x;
+
+  /* Zero and sign extraction of a widened value.  */
+  if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
+      && XEXP (op, 2) == const0_rtx
+      && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
+					 XEXP (op, 1)))
+    return XEXP (XEXP (op, 0), 0);
+
+  /* It can also be represented (for zero-extend) as an AND with an
+     immediate.  */
+  if (GET_CODE (op) == AND
+      && GET_CODE (XEXP (op, 0)) == MULT
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1))
+      && CONST_INT_P (XEXP (op, 1))
+      && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
+			   INTVAL (XEXP (op, 1))) != 0)
+    return XEXP (XEXP (op, 0), 0);
+
+  /* Now handle extended register, as this may also have an optional
+     left shift by 1..4.  */
+  if (GET_CODE (op) == ASHIFT
+      && CONST_INT_P (XEXP (op, 1))
+      && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
+    op = XEXP (op, 0);
+
+  if (GET_CODE (op) == ZERO_EXTEND
+      || GET_CODE (op) == SIGN_EXTEND)
+    op = XEXP (op, 0);
+
+  if (op != x)
+    return op;
+
+  return aarch64_strip_shift (x);
+}
+
+/* Calculate the cost of calculating X, storing it in *COST.  Result
+   is true if the total cost of the operation has now been calculated.  */
+static bool
+aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
+		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
+{
+  rtx op0, op1;
+  const struct cpu_cost_table *extra_cost
+    = aarch64_tune_params->insn_extra_cost;
+
+  switch (code)
+    {
+    case SET:
+      op0 = SET_DEST (x);
+      op1 = SET_SRC (x);
+
+      switch (GET_CODE (op0))
+	{
+	case MEM:
+	  if (speed)
+	    *cost += extra_cost->ldst.store;
+
+	  if (op1 != const0_rtx)
+	    *cost += rtx_cost (op1, SET, 1, speed);
+	  return true;
+
+	case SUBREG:
+	  if (! REG_P (SUBREG_REG (op0)))
+	    *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
+	  /* Fall through.  */
+	case REG:
+	  /* Cost is just the cost of the RHS of the set.  */
+	  *cost += rtx_cost (op1, SET, 1, true);
+	  return true;
+
+	case ZERO_EXTRACT:  /* Bit-field insertion.  */
+	case SIGN_EXTRACT:
+	  /* Strip any redundant widening of the RHS to meet the width of
+	     the target.  */
+	  if (GET_CODE (op1) == SUBREG)
+	    op1 = SUBREG_REG (op1);
+	  if ((GET_CODE (op1) == ZERO_EXTEND
+	       || GET_CODE (op1) == SIGN_EXTEND)
+	      && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
+		  >= INTVAL (XEXP (op0, 1))))
+	    op1 = XEXP (op1, 0);
+	  *cost += rtx_cost (op1, SET, 1, speed);
+	  return true;
+
+	default:
+	  break;
+	}
+      return false;
+
+    case MEM:
+      if (speed)
+	*cost += extra_cost->ldst.load;
+
+      return true;
+
+    case NEG:
+      op0 = CONST0_RTX (GET_MODE (x));
+      op1 = XEXP (x, 0);
+      goto cost_minus;
+
+    case COMPARE:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (op1 == const0_rtx
+	  && GET_CODE (op0) == AND)
+	{
+	  x = op0;
+	  goto cost_logic;
+	}
+
+      /* Comparisons can work if the order is swapped.
+	 Canonicalization puts the more complex operation first, but
+	 we want it in op1.  */
+      if (! (REG_P (op0)
+	     || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
+	{
+	  op0 = XEXP (x, 1);
+	  op1 = XEXP (x, 0);
+	}
+      goto cost_minus;
+
+    case MINUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+    cost_minus:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
+	  || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
+	      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
+	{
+	  if (op0 != const0_rtx)
+	    *cost += rtx_cost (op0, MINUS, 0, speed);
+
+	  if (CONST_INT_P (op1))
+	    {
+	      if (!aarch64_uimm12_shift (INTVAL (op1)))
+		*cost += rtx_cost (op1, MINUS, 1, speed);
+	    }
+	  else
+	    {
+	      op1 = aarch64_strip_shift_or_extend (op1);
+	      *cost += rtx_cost (op1, MINUS, 1, speed);
+	    }
+	  return true;
+	}
+
+      return false;
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	{
+	  if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
+	    {
+	      *cost += rtx_cost (op0, PLUS, 0, speed);
+	    }
+	  else
+	    {
+	      rtx new_op0 = aarch64_strip_shift_or_extend (op0);
+
+	      if (new_op0 == op0
+		  && GET_CODE (op0) == MULT)
+		{
+		  if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
+		       && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
+		      || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
+			  && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
+		    {
+		      *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
+					  speed)
+				+ rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
+					    speed)
+				+ rtx_cost (op1, PLUS, 1, speed));
+		      if (speed)
+			*cost +=
+			  extra_cost->mult[GET_MODE (x) == DImode].extend_add;
+		      return true;
+		    }
+
+		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
+			    + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
+			    + rtx_cost (op1, PLUS, 1, speed));
+
+		  if (speed)
+		    *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
+
+		  return true;
+		}
+
+	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
+			+ rtx_cost (op1, PLUS, 1, speed));
+	    }
+	  return true;
+	}
+
+      return false;
+
+    case IOR:
+    case XOR:
+    case AND:
+    cost_logic:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	{
+	  if (CONST_INT_P (op1)
+	      && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
+	    {
+	      *cost += rtx_cost (op0, AND, 0, speed);
+	    }
+	  else
+	    {
+	      if (GET_CODE (op0) == NOT)
+		op0 = XEXP (op0, 0);
+	      op0 = aarch64_strip_shift (op0);
+	      *cost += (rtx_cost (op0, AND, 0, speed)
+			+ rtx_cost (op1, AND, 1, speed));
+	    }
+	  return true;
+	}
+      return false;
+
+    case ZERO_EXTEND:
+      if ((GET_MODE (x) == DImode
+	   && GET_MODE (XEXP (x, 0)) == SImode)
+	  || GET_CODE (XEXP (x, 0)) == MEM)
+	{
+	  *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+	  return true;
+	}
+      return false;
+
+    case SIGN_EXTEND:
+      if (GET_CODE (XEXP (x, 0)) == MEM)
+	{
+	  *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
+	  return true;
+	}
+      return false;
+
+    case ROTATE:
+      if (!CONST_INT_P (XEXP (x, 1)))
+	*cost += COSTS_N_INSNS (2);
+      /* Fall through.  */
+    case ROTATERT:
+    case LSHIFTRT:
+    case ASHIFT:
+    case ASHIFTRT:
+
+      /* Shifting by a register often takes an extra cycle.  */
+      if (speed && !CONST_INT_P (XEXP (x, 1)))
+	*cost += extra_cost->alu.arith_shift_reg;
+
+      *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
+      return true;
+
+    case HIGH:
+      if (!CONSTANT_P (XEXP (x, 0)))
+	*cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
+      return true;
+
+    case LO_SUM:
+      if (!CONSTANT_P (XEXP (x, 1)))
+	*cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
+      *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
+      return true;
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
+      return true;
+
+    case MULT:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      *cost = COSTS_N_INSNS (1);
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	{
+	  if (CONST_INT_P (op1)
+	      && exact_log2 (INTVAL (op1)) > 0)
+	    {
+	      *cost += rtx_cost (op0, ASHIFT, 0, speed);
+	      return true;
+	    }
+
+	  if ((GET_CODE (op0) == ZERO_EXTEND
+	       && GET_CODE (op1) == ZERO_EXTEND)
+	      || (GET_CODE (op0) == SIGN_EXTEND
+		  && GET_CODE (op1) == SIGN_EXTEND))
+	    {
+	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
+			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
+	      if (speed)
+		*cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
+	      return true;
+	    }
+
+	  if (speed)
+	    *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
+	}
+      else if (speed)
+	{
+	  if (GET_MODE (x) == DFmode)
+	    *cost += extra_cost->fp[1].mult;
+	  else if (GET_MODE (x) == SFmode)
+	    *cost += extra_cost->fp[0].mult;
+	}
+
+      return false;  /* All arguments need to be in registers.  */
+
+    case MOD:
+    case UMOD:
+      *cost = COSTS_N_INSNS (2);
+      if (speed)
+	{
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	    *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
+		      + extra_cost->mult[GET_MODE (x) == DImode].idiv);
+	  else if (GET_MODE (x) == DFmode)
+	    *cost += (extra_cost->fp[1].mult
+		      + extra_cost->fp[1].div);
+	  else if (GET_MODE (x) == SFmode)
+	    *cost += (extra_cost->fp[0].mult
+		      + extra_cost->fp[0].div);
+	}
+      return false;  /* All arguments need to be in registers.  */
+
+    case DIV:
+    case UDIV:
+      *cost = COSTS_N_INSNS (1);
+      if (speed)
+	{
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	    *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
+	  else if (GET_MODE (x) == DFmode)
+	    *cost += extra_cost->fp[1].div;
+	  else if (GET_MODE (x) == SFmode)
+	    *cost += extra_cost->fp[0].div;
+	}
+      return false;  /* All arguments need to be in registers.  */
+
+    default:
+      break;
+    }
+  return false;
+}
+
+static int
+aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
+		  enum machine_mode mode ATTRIBUTE_UNUSED,
+		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
+{
+  enum rtx_code c  = GET_CODE (x);
+  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
+
+  if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
+    return addr_cost->pre_modify;
+
+  if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
+    return addr_cost->post_modify;
+
+  if (c == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	return addr_cost->imm_offset;
+      else if (GET_CODE (XEXP (x, 0)) == MULT
+	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	       || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	return addr_cost->register_extend;
+
+      return addr_cost->register_offset;
+    }
+  else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
+    return addr_cost->imm_offset;
+
+  return 0;
+}
+
+static int
+aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    reg_class_t from, reg_class_t to)
+{
+  const struct cpu_regmove_cost *regmove_cost
+    = aarch64_tune_params->regmove_cost;
+
+  /* Moving between GPR and stack cost is the same as GP2GP.  */
+  if ((from == GENERAL_REGS && to == STACK_REG)
+      || (to == GENERAL_REGS && from == STACK_REG))
+    return regmove_cost->GP2GP;
+
+  /* To/From the stack register, we move via the gprs.  */
+  if (to == STACK_REG || from == STACK_REG)
+    return aarch64_register_move_cost (mode, from, GENERAL_REGS)
+            + aarch64_register_move_cost (mode, GENERAL_REGS, to);
+
+  if (from == GENERAL_REGS && to == GENERAL_REGS)
+    return regmove_cost->GP2GP;
+  else if (from == GENERAL_REGS)
+    return regmove_cost->GP2FP;
+  else if (to == GENERAL_REGS)
+    return regmove_cost->FP2GP;
+
+  /* When AdvSIMD instructions are disabled it is not possible to move
+     a 128-bit value directly between Q registers.  This is handled in
+     secondary reload.  A general register is used as a scratch to move
+     the upper DI value and the lower DI value is moved directly,
+     hence the cost is the sum of three moves. */
+
+  if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
+    return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
+
+  return regmove_cost->FP2FP;
+}
+
+static int
+aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  reg_class_t rclass ATTRIBUTE_UNUSED,
+			  bool in ATTRIBUTE_UNUSED)
+{
+  return aarch64_tune_params->memmov_cost;
+}
+
+/* Return the number of instructions that can be issued per cycle.  */
+static int
+aarch64_sched_issue_rate (void)
+{
+  return aarch64_tune_params->issue_rate;
+}
+
+/* Vectorizer cost model target hooks.  */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+				    tree vectype,
+				    int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+	return aarch64_tune_params->vec_costs->scalar_stmt_cost;
+
+      case scalar_load:
+	return aarch64_tune_params->vec_costs->scalar_load_cost;
+
+      case scalar_store:
+	return aarch64_tune_params->vec_costs->scalar_store_cost;
+
+      case vector_stmt:
+	return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+      case vector_load:
+	return aarch64_tune_params->vec_costs->vec_align_load_cost;
+
+      case vector_store:
+	return aarch64_tune_params->vec_costs->vec_store_cost;
+
+      case vec_to_scalar:
+	return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+	return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
+
+      case unaligned_load:
+	return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
+
+      case unaligned_store:
+	return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
+
+      case cond_branch_taken:
+	return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+	return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+	return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+      case vec_construct:
+        elements = TYPE_VECTOR_SUBPARTS (vectype);
+	return elements / 2 + 1;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+static unsigned
+aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		       struct _stmt_vec_info *stmt_info, int misalign,
+		       enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost =
+	    aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+	 vectorized are weighted more heavily.  The value here is
+	 a function (linear for now) of the loop nest level.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+	{
+	  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+	  struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
+	  unsigned nest_level = loop_depth (loop);
+
+	  count *= nest_level;
+	}
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
+
+static void initialize_aarch64_code_model (void);
+
+/* Parse the architecture extension string.  */
+
+static void
+aarch64_parse_extension (char *str)
+{
+  /* The extension string is parsed left to right.  */
+  const struct aarch64_option_extension *opt = NULL;
+
+  /* Flag to say whether we are adding or removing an extension.  */
+  int adding_ext = -1;
+
+  while (str != NULL && *str != 0)
+    {
+      char *ext;
+      size_t len;
+
+      str++;
+      ext = strchr (str, '+');
+
+      if (ext != NULL)
+	len = ext - str;
+      else
+	len = strlen (str);
+
+      if (len >= 2 && strncmp (str, "no", 2) == 0)
+	{
+	  adding_ext = 0;
+	  len -= 2;
+	  str += 2;
+	}
+      else if (len > 0)
+	adding_ext = 1;
+
+      if (len == 0)
+	{
+	  error ("missing feature modifier after %qs", "+no");
+	  return;
+	}
+
+      /* Scan over the extensions table trying to find an exact match.  */
+      for (opt = all_extensions; opt->name != NULL; opt++)
+	{
+	  if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
+	    {
+	      /* Add or remove the extension.  */
+	      if (adding_ext)
+		aarch64_isa_flags |= opt->flags_on;
+	      else
+		aarch64_isa_flags &= ~(opt->flags_off);
+	      break;
+	    }
+	}
+
+      if (opt->name == NULL)
+	{
+	  /* Extension not found in list.  */
+	  error ("unknown feature modifier %qs", str);
+	  return;
+	}
+
+      str = ext;
+    };
+
+  return;
+}
+
+/* Parse the ARCH string.  */
+
+static void
+aarch64_parse_arch (void)
+{
+  char *ext;
+  const struct processor *arch;
+  char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
+  size_t len;
+
+  strcpy (str, aarch64_arch_string);
+
+  ext = strchr (str, '+');
+
+  if (ext != NULL)
+    len = ext - str;
+  else
+    len = strlen (str);
+
+  if (len == 0)
+    {
+      error ("missing arch name in -march=%qs", str);
+      return;
+    }
+
+  /* Loop through the list of supported ARCHs to find a match.  */
+  for (arch = all_architectures; arch->name != NULL; arch++)
+    {
+      if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
+	{
+	  selected_arch = arch;
+	  aarch64_isa_flags = selected_arch->flags;
+
+	  if (!selected_cpu)
+	    selected_cpu = &all_cores[selected_arch->core];
+
+	  if (ext != NULL)
+	    {
+	      /* ARCH string contains at least one extension.  */
+	      aarch64_parse_extension (ext);
+	    }
+
+	  if (strcmp (selected_arch->arch, selected_cpu->arch))
+	    {
+	      warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
+		       selected_cpu->name, selected_arch->name);
+	    }
+
+	  return;
+	}
+    }
+
+  /* ARCH name not found in list.  */
+  error ("unknown value %qs for -march", str);
+  return;
+}
+
+/* Parse the CPU string.  */
+
+static void
+aarch64_parse_cpu (void)
+{
+  char *ext;
+  const struct processor *cpu;
+  char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
+  size_t len;
+
+  strcpy (str, aarch64_cpu_string);
+
+  ext = strchr (str, '+');
+
+  if (ext != NULL)
+    len = ext - str;
+  else
+    len = strlen (str);
+
+  if (len == 0)
+    {
+      error ("missing cpu name in -mcpu=%qs", str);
+      return;
+    }
+
+  /* Loop through the list of supported CPUs to find a match.  */
+  for (cpu = all_cores; cpu->name != NULL; cpu++)
+    {
+      if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
+	{
+	  selected_cpu = cpu;
+	  selected_tune = cpu;
+	  aarch64_isa_flags = selected_cpu->flags;
+
+	  if (ext != NULL)
+	    {
+	      /* CPU string contains at least one extension.  */
+	      aarch64_parse_extension (ext);
+	    }
+
+	  return;
+	}
+    }
+
+  /* CPU name not found in list.  */
+  error ("unknown value %qs for -mcpu", str);
+  return;
+}
+
+/* Parse the TUNE string.  */
+
+static void
+aarch64_parse_tune (void)
+{
+  const struct processor *cpu;
+  char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
+  strcpy (str, aarch64_tune_string);
+
+  /* Loop through the list of supported CPUs to find a match.  */
+  for (cpu = all_cores; cpu->name != NULL; cpu++)
+    {
+      if (strcmp (cpu->name, str) == 0)
+	{
+	  selected_tune = cpu;
+	  return;
+	}
+    }
+
+  /* CPU name not found in list.  */
+  error ("unknown value %qs for -mtune", str);
+  return;
+}
+
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+aarch64_override_options (void)
+{
+  /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
+     If either of -march or -mtune is given, they override their
+     respective component of -mcpu.
+
+     So, first parse AARCH64_CPU_STRING, then the others, be careful
+     with -march as, if -mcpu is not present on the command line, march
+     must set a sensible default CPU.  */
+  if (aarch64_cpu_string)
+    {
+      aarch64_parse_cpu ();
+    }
+
+  if (aarch64_arch_string)
+    {
+      aarch64_parse_arch ();
+    }
+
+  if (aarch64_tune_string)
+    {
+      aarch64_parse_tune ();
+    }
+
+#ifndef HAVE_AS_MABI_OPTION
+  /* The compiler may have been configured with 2.23.* binutils, which does
+     not have support for ILP32.  */
+  if (TARGET_ILP32)
+    error ("Assembler does not support -mabi=ilp32");
+#endif
+
+  initialize_aarch64_code_model ();
+
+  aarch64_build_bitmask_table ();
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* If the user did not specify a processor, choose the default
+     one for them.  This will be the CPU set during configuration using
+     --with-cpu, otherwise it is "generic".  */
+  if (!selected_cpu)
+    {
+      selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
+      aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
+    }
+
+  gcc_assert (selected_cpu);
+
+  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
+  if (!selected_tune)
+    selected_tune = &all_cores[selected_cpu->core];
+
+  aarch64_tune_flags = selected_tune->flags;
+  aarch64_tune = selected_tune->core;
+  aarch64_tune_params = selected_tune->tune;
+
+  aarch64_override_options_after_change ();
+}
+
+/* Implement targetm.override_options_after_change.  */
+
+static void
+aarch64_override_options_after_change (void)
+{
+  faked_omit_frame_pointer = false;
+
+  /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
+     that aarch64_frame_pointer_required will be called.  We need to remember
+     whether flag_omit_frame_pointer was turned on normally or just faked.  */
+
+  if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
+    {
+      flag_omit_frame_pointer = true;
+      faked_omit_frame_pointer = true;
+    }
+}
+
+static struct machine_function *
+aarch64_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine = ggc_alloc_cleared_machine_function ();
+  return machine;
+}
+
+void
+aarch64_init_expanders (void)
+{
+  init_machine_status = aarch64_init_machine_status;
+}
+
+/* A checking mechanism for the implementation of the various code models.  */
+static void
+initialize_aarch64_code_model (void)
+{
+   if (flag_pic)
+     {
+       switch (aarch64_cmodel_var)
+	 {
+	 case AARCH64_CMODEL_TINY:
+	   aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
+	   break;
+	 case AARCH64_CMODEL_SMALL:
+	   aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
+	   break;
+	 case AARCH64_CMODEL_LARGE:
+	   sorry ("code model %qs with -f%s", "large",
+		  flag_pic > 1 ? "PIC" : "pic");
+	 default:
+	   gcc_unreachable ();
+	 }
+     }
+   else
+     aarch64_cmodel = aarch64_cmodel_var;
+}
+
+/* Return true if SYMBOL_REF X binds locally.  */
+
+static bool
+aarch64_symbol_binds_local_p (const_rtx x)
+{
+  return (SYMBOL_REF_DECL (x)
+	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+	  : SYMBOL_REF_LOCAL_P (x));
+}
+
+/* Return true if SYMBOL_REF X is thread local */
+static bool
+aarch64_tls_symbol_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Classify a TLS symbol into one of the TLS kinds.  */
+enum aarch64_symbol_type
+aarch64_classify_tls_symbol (rtx x)
+{
+  enum tls_model tls_kind = tls_symbolic_operand_type (x);
+
+  switch (tls_kind)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      return SYMBOL_SMALL_GOTTPREL;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      return SYMBOL_SMALL_TPREL;
+
+    case TLS_MODEL_EMULATED:
+    case TLS_MODEL_NONE:
+      return SYMBOL_FORCE_TO_MEM;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the method that should be used to access SYMBOL_REF or
+   LABEL_REF X in context CONTEXT.  */
+
+enum aarch64_symbol_type
+aarch64_classify_symbol (rtx x,
+			 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    {
+      switch (aarch64_cmodel)
+	{
+	case AARCH64_CMODEL_LARGE:
+	  return SYMBOL_FORCE_TO_MEM;
+
+	case AARCH64_CMODEL_TINY_PIC:
+	case AARCH64_CMODEL_TINY:
+	  return SYMBOL_TINY_ABSOLUTE;
+
+	case AARCH64_CMODEL_SMALL_PIC:
+	case AARCH64_CMODEL_SMALL:
+	  return SYMBOL_SMALL_ABSOLUTE;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
+	  return SYMBOL_FORCE_TO_MEM;
+
+      if (aarch64_tls_symbol_p (x))
+	return aarch64_classify_tls_symbol (x);
+
+      switch (aarch64_cmodel)
+	{
+	case AARCH64_CMODEL_TINY:
+	  if (SYMBOL_REF_WEAK (x))
+	    return SYMBOL_FORCE_TO_MEM;
+	  return SYMBOL_TINY_ABSOLUTE;
+
+	case AARCH64_CMODEL_SMALL:
+	  if (SYMBOL_REF_WEAK (x))
+	    return SYMBOL_FORCE_TO_MEM;
+	  return SYMBOL_SMALL_ABSOLUTE;
+
+	case AARCH64_CMODEL_TINY_PIC:
+	  if (!aarch64_symbol_binds_local_p (x))
+	    return SYMBOL_TINY_GOT;
+	  return SYMBOL_TINY_ABSOLUTE;
+
+	case AARCH64_CMODEL_SMALL_PIC:
+	  if (!aarch64_symbol_binds_local_p (x))
+	    return SYMBOL_SMALL_GOT;
+	  return SYMBOL_SMALL_ABSOLUTE;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* By default push everything into the constant pool.  */
+  return SYMBOL_FORCE_TO_MEM;
+}
+
+bool
+aarch64_constant_address_p (rtx x)
+{
+  return (CONSTANT_P (x) && memory_address_p (DImode, x));
+}
+
+bool
+aarch64_legitimate_pic_operand_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      || (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
+     return false;
+
+  return true;
+}
+
+/* Return true if X holds either a quarter-precision or
+     floating-point +0.0 constant.  */
+static bool
+aarch64_valid_floating_const (enum machine_mode mode, rtx x)
+{
+  if (!CONST_DOUBLE_P (x))
+    return false;
+
+  /* TODO: We could handle moving 0.0 to a TFmode register,
+     but first we would like to refactor the movtf_aarch64
+     to be more amicable to split moves properly and
+     correctly gate on TARGET_SIMD.  For now - reject all
+     constants which are not to SFmode or DFmode registers.  */
+  if (!(mode == SFmode || mode == DFmode))
+    return false;
+
+  if (aarch64_float_const_zero_rtx_p (x))
+    return true;
+  return aarch64_float_const_representable_p (x);
+}
+
+static bool
+aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  /* Do not allow vector struct mode constants.  We could support
+     0 and -1 easily, but they need support in aarch64-simd.md.  */
+  if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
+    return false;
+
+  /* This could probably go away because
+     we now decompose CONST_INTs according to expand_mov_immediate.  */
+  if ((GET_CODE (x) == CONST_VECTOR
+       && aarch64_simd_valid_immediate (x, mode, false, NULL))
+      || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
+	return !targetm.cannot_force_const_mem (mode, x);
+
+  if (GET_CODE (x) == HIGH
+      && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
+    return true;
+
+  return aarch64_constant_address_p (x);
+}
+
+rtx
+aarch64_load_tp (rtx target)
+{
+  if (!target
+      || GET_MODE (target) != Pmode
+      || !register_operand (target, Pmode))
+    target = gen_reg_rtx (Pmode);
+
+  /* Can return in any reg.  */
+  emit_insn (gen_aarch64_load_tp_hard (target));
+  return target;
+}
+
+/* On AAPCS systems, this is the "struct __va_list".  */
+static GTY(()) tree va_list_type;
+
+/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
+   Return the type to use as __builtin_va_list.
+
+   AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
+
+   struct __va_list
+   {
+     void *__stack;
+     void *__gr_top;
+     void *__vr_top;
+     int   __gr_offs;
+     int   __vr_offs;
+   };  */
+
+static tree
+aarch64_build_builtin_va_list (void)
+{
+  tree va_list_name;
+  tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
+
+  /* Create the type.  */
+  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
+  /* Give it the required name.  */
+  va_list_name = build_decl (BUILTINS_LOCATION,
+			     TYPE_DECL,
+			     get_identifier ("__va_list"),
+			     va_list_type);
+  DECL_ARTIFICIAL (va_list_name) = 1;
+  TYPE_NAME (va_list_type) = va_list_name;
+  TYPE_STUB_DECL (va_list_type) = va_list_name;
+
+  /* Create the fields.  */
+  f_stack = build_decl (BUILTINS_LOCATION,
+			FIELD_DECL, get_identifier ("__stack"),
+			ptr_type_node);
+  f_grtop = build_decl (BUILTINS_LOCATION,
+			FIELD_DECL, get_identifier ("__gr_top"),
+			ptr_type_node);
+  f_vrtop = build_decl (BUILTINS_LOCATION,
+			FIELD_DECL, get_identifier ("__vr_top"),
+			ptr_type_node);
+  f_groff = build_decl (BUILTINS_LOCATION,
+			FIELD_DECL, get_identifier ("__gr_offs"),
+			integer_type_node);
+  f_vroff = build_decl (BUILTINS_LOCATION,
+			FIELD_DECL, get_identifier ("__vr_offs"),
+			integer_type_node);
+
+  DECL_ARTIFICIAL (f_stack) = 1;
+  DECL_ARTIFICIAL (f_grtop) = 1;
+  DECL_ARTIFICIAL (f_vrtop) = 1;
+  DECL_ARTIFICIAL (f_groff) = 1;
+  DECL_ARTIFICIAL (f_vroff) = 1;
+
+  DECL_FIELD_CONTEXT (f_stack) = va_list_type;
+  DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
+  DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
+  DECL_FIELD_CONTEXT (f_groff) = va_list_type;
+  DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
+
+  TYPE_FIELDS (va_list_type) = f_stack;
+  DECL_CHAIN (f_stack) = f_grtop;
+  DECL_CHAIN (f_grtop) = f_vrtop;
+  DECL_CHAIN (f_vrtop) = f_groff;
+  DECL_CHAIN (f_groff) = f_vroff;
+
+  /* Compute its layout.  */
+  layout_type (va_list_type);
+
+  return va_list_type;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+static void
+aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  const CUMULATIVE_ARGS *cum;
+  tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
+  tree stack, grtop, vrtop, groff, vroff;
+  tree t;
+  int gr_save_area_size;
+  int vr_save_area_size;
+  int vr_offset;
+
+  cum = &crtl->args.info;
+  gr_save_area_size
+    = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
+  vr_save_area_size
+    = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
+
+  if (TARGET_GENERAL_REGS_ONLY)
+    {
+      if (cum->aapcs_nvrn > 0)
+	sorry ("%qs and floating point or vector arguments",
+	       "-mgeneral-regs-only");
+      vr_save_area_size = 0;
+    }
+
+  f_stack = TYPE_FIELDS (va_list_type_node);
+  f_grtop = DECL_CHAIN (f_stack);
+  f_vrtop = DECL_CHAIN (f_grtop);
+  f_groff = DECL_CHAIN (f_vrtop);
+  f_vroff = DECL_CHAIN (f_groff);
+
+  stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
+		  NULL_TREE);
+  grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
+		  NULL_TREE);
+  vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
+		  NULL_TREE);
+  groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
+		  NULL_TREE);
+  vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
+		  NULL_TREE);
+
+  /* Emit code to initialize STACK, which points to the next varargs stack
+     argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
+     by named arguments.  STACK is 8-byte aligned.  */
+  t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
+  if (cum->aapcs_stack_size > 0)
+    t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Emit code to initialize GRTOP, the top of the GR save area.
+     virtual_incoming_args_rtx should have been 16 byte aligned.  */
+  t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Emit code to initialize VRTOP, the top of the VR save area.
+     This address is gr_save_area_bytes below GRTOP, rounded
+     down to the next 16-byte boundary.  */
+  t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
+  vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
+			     STACK_BOUNDARY / BITS_PER_UNIT);
+
+  if (vr_offset)
+    t = fold_build_pointer_plus_hwi (t, -vr_offset);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Emit code to initialize GROFF, the offset from GRTOP of the
+     next GPR argument.  */
+  t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
+	      build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Likewise emit code to initialize VROFF, the offset from FTOP
+     of the next VR argument.  */
+  t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
+	      build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+
+static tree
+aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			      gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree addr;
+  bool indirect_p;
+  bool is_ha;		/* is HFA or HVA.  */
+  bool dw_align;	/* double-word align.  */
+  enum machine_mode ag_mode = VOIDmode;
+  int nregs;
+  enum machine_mode mode;
+
+  tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
+  tree stack, f_top, f_off, off, arg, roundup, on_stack;
+  HOST_WIDE_INT size, rsize, adjust, align;
+  tree t, u, cond1, cond2;
+
+  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect_p)
+    type = build_pointer_type (type);
+
+  mode = TYPE_MODE (type);
+
+  f_stack = TYPE_FIELDS (va_list_type_node);
+  f_grtop = DECL_CHAIN (f_stack);
+  f_vrtop = DECL_CHAIN (f_grtop);
+  f_groff = DECL_CHAIN (f_vrtop);
+  f_vroff = DECL_CHAIN (f_groff);
+
+  stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
+		  f_stack, NULL_TREE);
+  size = int_size_in_bytes (type);
+  align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
+
+  dw_align = false;
+  adjust = 0;
+  if (aarch64_vfp_is_call_or_return_candidate (mode,
+					       type,
+					       &ag_mode,
+					       &nregs,
+					       &is_ha))
+    {
+      /* TYPE passed in fp/simd registers.  */
+      if (TARGET_GENERAL_REGS_ONLY)
+	sorry ("%qs and floating point or vector arguments",
+	       "-mgeneral-regs-only");
+
+      f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
+		      unshare_expr (valist), f_vrtop, NULL_TREE);
+      f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
+		      unshare_expr (valist), f_vroff, NULL_TREE);
+
+      rsize = nregs * UNITS_PER_VREG;
+
+      if (is_ha)
+	{
+	  if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
+	    adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
+	}
+      else if (BLOCK_REG_PADDING (mode, type, 1) == downward
+	       && size < UNITS_PER_VREG)
+	{
+	  adjust = UNITS_PER_VREG - size;
+	}
+    }
+  else
+    {
+      /* TYPE passed in general registers.  */
+      f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
+		      unshare_expr (valist), f_grtop, NULL_TREE);
+      f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
+		      unshare_expr (valist), f_groff, NULL_TREE);
+      rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+      nregs = rsize / UNITS_PER_WORD;
+
+      if (align > 8)
+	dw_align = true;
+
+      if (BLOCK_REG_PADDING (mode, type, 1) == downward
+	  && size < UNITS_PER_WORD)
+	{
+	  adjust = UNITS_PER_WORD  - size;
+	}
+    }
+
+  /* Get a local temporary for the field value.  */
+  off = get_initialized_tmp_var (f_off, pre_p, NULL);
+
+  /* Emit code to branch if off >= 0.  */
+  t = build2 (GE_EXPR, boolean_type_node, off,
+	      build_int_cst (TREE_TYPE (off), 0));
+  cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
+
+  if (dw_align)
+    {
+      /* Emit: offs = (offs + 15) & -16.  */
+      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		  build_int_cst (TREE_TYPE (off), 15));
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
+		  build_int_cst (TREE_TYPE (off), -16));
+      roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
+    }
+  else
+    roundup = NULL;
+
+  /* Update ap.__[g|v]r_offs  */
+  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+	      build_int_cst (TREE_TYPE (off), rsize));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
+
+  /* String up.  */
+  if (roundup)
+    t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
+
+  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
+  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
+	      build_int_cst (TREE_TYPE (f_off), 0));
+  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
+
+  /* String up: make sure the assignment happens before the use.  */
+  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
+  COND_EXPR_ELSE (cond1) = t;
+
+  /* Prepare the trees handling the argument that is passed on the stack;
+     the top level node will store in ON_STACK.  */
+  arg = get_initialized_tmp_var (stack, pre_p, NULL);
+  if (align > 8)
+    {
+      /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
+      t = fold_convert (intDI_type_node, arg);
+      t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
+		  build_int_cst (TREE_TYPE (t), 15));
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  build_int_cst (TREE_TYPE (t), -16));
+      t = fold_convert (TREE_TYPE (arg), t);
+      roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
+    }
+  else
+    roundup = NULL;
+  /* Advance ap.__stack  */
+  t = fold_convert (intDI_type_node, arg);
+  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
+	      build_int_cst (TREE_TYPE (t), size + 7));
+  t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+	      build_int_cst (TREE_TYPE (t), -8));
+  t = fold_convert (TREE_TYPE (arg), t);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
+  /* String up roundup and advance.  */
+  if (roundup)
+    t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
+  /* String up with arg */
+  on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
+  /* Big-endianness related address adjustment.  */
+  if (BLOCK_REG_PADDING (mode, type, 1) == downward
+      && size < UNITS_PER_WORD)
+  {
+    t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
+		size_int (UNITS_PER_WORD - size));
+    on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
+  }
+
+  COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
+  COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+
+  /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
+  t = off;
+  if (adjust)
+    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
+		build_int_cst (TREE_TYPE (off), adjust));
+
+  t = fold_convert (sizetype, t);
+  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
+
+  if (is_ha)
+    {
+      /* type ha; // treat as "struct {ftype field[n];}"
+         ... [computing offs]
+         for (i = 0; i <nregs; ++i, offs += 16)
+	   ha.field[i] = *((ftype *)(ap.__vr_top + offs));
+	 return ha;  */
+      int i;
+      tree tmp_ha, field_t, field_ptr_t;
+
+      /* Declare a local variable.  */
+      tmp_ha = create_tmp_var_raw (type, "ha");
+      gimple_add_tmp_var (tmp_ha);
+
+      /* Establish the base type.  */
+      switch (ag_mode)
+	{
+	case SFmode:
+	  field_t = float_type_node;
+	  field_ptr_t = float_ptr_type_node;
+	  break;
+	case DFmode:
+	  field_t = double_type_node;
+	  field_ptr_t = double_ptr_type_node;
+	  break;
+	case TFmode:
+	  field_t = long_double_type_node;
+	  field_ptr_t = long_double_ptr_type_node;
+	  break;
+/* The half precision and quad precision are not fully supported yet.  Enable
+   the following code after the support is complete.  Need to find the correct
+   type node for __fp16 *.  */
+#if 0
+	case HFmode:
+	  field_t = float_type_node;
+	  field_ptr_t = float_ptr_type_node;
+	  break;
+#endif
+	case V2SImode:
+	case V4SImode:
+	    {
+	      tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
+	      field_t = build_vector_type_for_mode (innertype, ag_mode);
+	      field_ptr_t = build_pointer_type (field_t);
+	    }
+	  break;
+	default:
+	  gcc_assert (0);
+	}
+
+      /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
+      tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
+      addr = t;
+      t = fold_convert (field_ptr_t, addr);
+      t = build2 (MODIFY_EXPR, field_t,
+		  build1 (INDIRECT_REF, field_t, tmp_ha),
+		  build1 (INDIRECT_REF, field_t, t));
+
+      /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
+      for (i = 1; i < nregs; ++i)
+	{
+	  addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
+	  u = fold_convert (field_ptr_t, addr);
+	  u = build2 (MODIFY_EXPR, field_t,
+		      build2 (MEM_REF, field_t, tmp_ha,
+			      build_int_cst (field_ptr_t,
+					     (i *
+					      int_size_in_bytes (field_t)))),
+		      build1 (INDIRECT_REF, field_t, u));
+	  t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
+	}
+
+      u = fold_convert (TREE_TYPE (f_top), tmp_ha);
+      t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
+    }
+
+  COND_EXPR_ELSE (cond2) = t;
+  addr = fold_convert (build_pointer_type (type), cond1);
+  addr = build_va_arg_indirect_ref (addr);
+
+  if (indirect_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return addr;
+}
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
+				tree type, int *pretend_size ATTRIBUTE_UNUSED,
+				int no_rtl)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  CUMULATIVE_ARGS local_cum;
+  int gr_saved, vr_saved;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argument.  Advance a local copy of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  local_cum = *cum;
+  aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
+
+  /* Found out how many registers we need to save.  */
+  gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
+  vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
+
+  if (TARGET_GENERAL_REGS_ONLY)
+    {
+      if (local_cum.aapcs_nvrn > 0)
+	sorry ("%qs and floating point or vector arguments",
+	       "-mgeneral-regs-only");
+      vr_saved = 0;
+    }
+
+  if (!no_rtl)
+    {
+      if (gr_saved > 0)
+	{
+	  rtx ptr, mem;
+
+	  /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
+	  ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
+			       - gr_saved * UNITS_PER_WORD);
+	  mem = gen_frame_mem (BLKmode, ptr);
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+
+	  move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
+			       mem, gr_saved);
+	}
+      if (vr_saved > 0)
+	{
+	  /* We can't use move_block_from_reg, because it will use
+	     the wrong mode, storing D regs only.  */
+	  enum machine_mode mode = TImode;
+	  int off, i;
+
+	  /* Set OFF to the offset from virtual_incoming_args_rtx of
+	     the first vector register.  The VR save area lies below
+	     the GR one, and is aligned to 16 bytes.  */
+	  off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
+				   STACK_BOUNDARY / BITS_PER_UNIT);
+	  off -= vr_saved * UNITS_PER_VREG;
+
+	  for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
+	    {
+	      rtx ptr, mem;
+
+	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
+	      mem = gen_frame_mem (mode, ptr);
+	      set_mem_alias_set (mem, get_varargs_alias_set ());
+	      aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
+	      off += UNITS_PER_VREG;
+	    }
+	}
+    }
+
+  /* We don't save the size into *PRETEND_SIZE because we want to avoid
+     any complication of having crtl->args.pretend_args_size changed.  */
+  cfun->machine->saved_varargs_size
+    = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
+		      STACK_BOUNDARY / BITS_PER_UNIT)
+       + vr_saved * UNITS_PER_VREG);
+}
+
+static void
+aarch64_conditional_register_usage (void)
+{
+  int i;
+  if (!TARGET_FLOAT)
+    {
+      for (i = V0_REGNUM; i <= V31_REGNUM; i++)
+	{
+	  fixed_regs[i] = 1;
+	  call_used_regs[i] = 1;
+	}
+    }
+}
+
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   type.  If a non-floating point type is found, or if a floating point
+   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+   otherwise return the count in the sub-tree.  */
+static int
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (mode != DFmode && mode != SFmode && mode != TFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (mode != DFmode && mode != SFmode && mode != TFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      /* Use V2SImode and V4SImode as representatives of all 64-bit
+	 and 128-bit vector types.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 8:
+	  mode = V2SImode;
+	  break;
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
+	    || !TYPE_MIN_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+aarch64_lra_p (void)
+{
+  return aarch64_lra_flag;
+}
+
+/* Return TRUE if the type, as described by TYPE and MODE, is a composite
+   type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
+   array types.  The C99 floating-point complex types are also considered
+   as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
+   types, which are GCC extensions and out of the scope of AAPCS64, are
+   treated as composite types here as well.
+
+   Note that MODE itself is not sufficient in determining whether a type
+   is such a composite type or not.  This is because
+   stor-layout.c:compute_record_mode may have already changed the MODE
+   (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
+   structure with only one field may have its MODE set to the mode of the
+   field.  Also an integer mode whose size matches the size of the
+   RECORD_TYPE type may be used to substitute the original mode
+   (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
+   solely relied on.  */
+
+static bool
+aarch64_composite_type_p (const_tree type,
+			  enum machine_mode mode)
+{
+  if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
+    return true;
+
+  if (mode == BLKmode
+      || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
+   type as described in AAPCS64 \S 4.1.2.
+
+   See the comment above aarch64_composite_type_p for the notes on MODE.  */
+
+static bool
+aarch64_short_vector_p (const_tree type,
+			enum machine_mode mode)
+{
+  HOST_WIDE_INT size = -1;
+
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    size = int_size_in_bytes (type);
+  else if (!aarch64_composite_type_p (type, mode)
+	   && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
+    size = GET_MODE_SIZE (mode);
+
+  return (size == 8 || size == 16) ? true : false;
+}
+
+/* Return TRUE if an argument, whose type is described by TYPE and MODE,
+   shall be passed or returned in simd/fp register(s) (providing these
+   parameter passing registers are available).
+
+   Upon successful return, *COUNT returns the number of needed registers,
+   *BASE_MODE returns the mode of the individual register and when IS_HAF
+   is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
+   floating-point aggregate or a homogeneous short-vector aggregate.  */
+
+static bool
+aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
+					 const_tree type,
+					 enum machine_mode *base_mode,
+					 int *count,
+					 bool *is_ha)
+{
+  enum machine_mode new_mode = VOIDmode;
+  bool composite_p = aarch64_composite_type_p (type, mode);
+
+  if (is_ha != NULL) *is_ha = false;
+
+  if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
+      || aarch64_short_vector_p (type, mode))
+    {
+      *count = 1;
+      new_mode = mode;
+    }
+  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+    {
+      if (is_ha != NULL) *is_ha = true;
+      *count = 2;
+      new_mode = GET_MODE_INNER (mode);
+    }
+  else if (type && composite_p)
+    {
+      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+      if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
+	{
+	  if (is_ha != NULL) *is_ha = true;
+	  *count = ag_count;
+	}
+      else
+	return false;
+    }
+  else
+    return false;
+
+  *base_mode = new_mode;
+  return true;
+}
+
+/* Implement TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
+			  int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+static bool
+aarch64_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_SIMD
+      && (mode == V4SImode  || mode == V8HImode
+	  || mode == V16QImode || mode == V2DImode
+	  || mode == V2SImode  || mode == V4HImode
+	  || mode == V8QImode || mode == V2SFmode
+	  || mode == V4SFmode || mode == V2DFmode))
+    return true;
+
+  return false;
+}
+
+/* Return appropriate SIMD container
+   for MODE within a vector of WIDTH bits.  */
+static enum machine_mode
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
+{
+  gcc_assert (width == 64 || width == 128);
+  if (TARGET_SIMD)
+    {
+      if (width == 128)
+	switch (mode)
+	  {
+	  case DFmode:
+	    return V2DFmode;
+	  case SFmode:
+	    return V4SFmode;
+	  case SImode:
+	    return V4SImode;
+	  case HImode:
+	    return V8HImode;
+	  case QImode:
+	    return V16QImode;
+	  case DImode:
+	    return V2DImode;
+	  default:
+	    break;
+	  }
+      else
+	switch (mode)
+	  {
+	  case SFmode:
+	    return V2SFmode;
+	  case SImode:
+	    return V2SImode;
+	  case HImode:
+	    return V4HImode;
+	  case QImode:
+	    return V8QImode;
+	  default:
+	    break;
+	  }
+    }
+  return word_mode;
+}
+
+/* Return 128-bit container as the preferred SIMD mode for MODE.  */
+static enum machine_mode
+aarch64_preferred_simd_mode (enum machine_mode mode)
+{
+  return aarch64_simd_container_mode (mode, 128);
+}
+
+/* Return the bitmask of possible vector sizes for the vectorizer
+   to iterate over.  */
+static unsigned int
+aarch64_autovectorize_vector_sizes (void)
+{
+  return (16 | 8);
+}
+
+/* A table to help perform AArch64-specific name mangling for AdvSIMD
+   vector types in order to conform to the AAPCS64 (see "Procedure
+   Call Standard for the ARM 64-bit Architecture", Appendix A).  To
+   qualify for emission with the mangled names defined in that document,
+   a vector type must not only be of the correct mode but also be
+   composed of AdvSIMD vector element types (e.g.
+   _builtin_aarch64_simd_qi); these types are registered by
+   aarch64_init_simd_builtins ().  In other words, vector types defined
+   in other ways e.g. via vector_size attribute will get default
+   mangled names.  */
+typedef struct
+{
+  enum machine_mode mode;
+  const char *element_type_name;
+  const char *mangled_name;
+} aarch64_simd_mangle_map_entry;
+
+static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
+  /* 64-bit containerized types.  */
+  { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
+  { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
+  { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
+  { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
+  { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
+  { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
+  { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
+  { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
+  { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
+  /* 128-bit containerized types.  */
+  { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
+  { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
+  { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
+  { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
+  { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
+  { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
+  { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
+  { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
+  { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
+  { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
+  { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
+  { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
+  { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
+  { VOIDmode, NULL, NULL }
+};
+
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+aarch64_mangle_type (const_tree type)
+{
+  /* The AArch64 ABI documents say that "__va_list" has to be
+     managled as if it is in the "std" namespace.  */
+  if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
+    return "St9__va_list";
+
+  /* Check the mode of the vector type, and the name of the vector
+     element type, against the table.  */
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
+
+      while (pos->mode != VOIDmode)
+	{
+	  tree elt_type = TREE_TYPE (type);
+
+	  if (pos->mode == TYPE_MODE (type)
+	      && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
+	      && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
+			  pos->element_type_name))
+	    return pos->mangled_name;
+
+	  pos++;
+	}
+    }
+
+  /* Use the default mangling.  */
+  return NULL;
+}
+
+/* Return the equivalent letter for size.  */
+static char
+sizetochar (int size)
+{
+  switch (size)
+    {
+    case 64: return 'd';
+    case 32: return 's';
+    case 16: return 'h';
+    case 8 : return 'b';
+    default: gcc_unreachable ();
+    }
+}
+
+/* Return true iff x is a uniform vector of floating-point
+   constants, and the constant can be represented in
+   quarter-precision form.  Note, as aarch64_float_const_representable
+   rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
+static bool
+aarch64_vect_float_const_representable_p (rtx x)
+{
+  int i = 0;
+  REAL_VALUE_TYPE r0, ri;
+  rtx x0, xi;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
+    return false;
+
+  x0 = CONST_VECTOR_ELT (x, 0);
+  if (!CONST_DOUBLE_P (x0))
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
+
+  for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
+    {
+      xi = CONST_VECTOR_ELT (x, i);
+      if (!CONST_DOUBLE_P (xi))
+	return false;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
+      if (!REAL_VALUES_EQUAL (r0, ri))
+	return false;
+    }
+
+  return aarch64_float_const_representable_p (x0);
+}
+
+/* Return true for valid and false for invalid.  */
+bool
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
+			      struct simd_immediate_info *info)
+{
+#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
+  matches = 1;						\
+  for (i = 0; i < idx; i += (STRIDE))			\
+    if (!(TEST))					\
+      matches = 0;					\
+  if (matches)						\
+    {							\
+      immtype = (CLASS);				\
+      elsize = (ELSIZE);				\
+      eshift = (SHIFT);					\
+      emvn = (NEG);					\
+      break;						\
+    }
+
+  unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
+  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+  unsigned char bytes[16];
+  int immtype = -1, matches;
+  unsigned int invmask = inverse ? 0xff : 0;
+  int eshift, emvn;
+
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      if (! (aarch64_simd_imm_zero_p (op, mode)
+	     || aarch64_vect_float_const_representable_p (op)))
+	return false;
+
+      if (info)
+	{
+	  info->value = CONST_VECTOR_ELT (op, 0);
+	  info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
+	  info->mvn = false;
+	  info->shift = 0;
+	}
+
+      return true;
+    }
+
+  /* Splat vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = CONST_VECTOR_ELT (op, i);
+      unsigned HOST_WIDE_INT elpart;
+      unsigned int part, parts;
+
+      if (GET_CODE (el) == CONST_INT)
+        {
+          elpart = INTVAL (el);
+          parts = 1;
+        }
+      else if (GET_CODE (el) == CONST_DOUBLE)
+        {
+          elpart = CONST_DOUBLE_LOW (el);
+          parts = 2;
+        }
+      else
+        gcc_unreachable ();
+
+      for (part = 0; part < parts; part++)
+        {
+          unsigned int byte;
+          for (byte = 0; byte < innersize; byte++)
+            {
+              bytes[idx++] = (elpart & 0xff) ^ invmask;
+              elpart >>= BITS_PER_UNIT;
+            }
+          if (GET_CODE (el) == CONST_DOUBLE)
+            elpart = CONST_DOUBLE_HIGH (el);
+        }
+    }
+
+  /* Sanity check.  */
+  gcc_assert (idx == GET_MODE_SIZE (mode));
+
+  do
+    {
+      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+
+      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+
+      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+
+      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+
+      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+
+      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+
+      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+
+      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+
+      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+
+      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+
+      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+
+      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+
+      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+
+      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+
+      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+
+      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+
+      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+
+      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+    }
+  while (0);
+
+  if (immtype == -1)
+    return false;
+
+  if (info)
+    {
+      info->element_width = elsize;
+      info->mvn = emvn != 0;
+      info->shift = eshift;
+
+      unsigned HOST_WIDE_INT imm = 0;
+
+      if (immtype >= 12 && immtype <= 15)
+	info->msl = true;
+
+      /* Un-invert bytes of recognized vector, if necessary.  */
+      if (invmask != 0)
+        for (i = 0; i < idx; i++)
+          bytes[i] ^= invmask;
+
+      if (immtype == 17)
+        {
+          /* FIXME: Broken on 32-bit H_W_I hosts.  */
+          gcc_assert (sizeof (HOST_WIDE_INT) == 8);
+
+          for (i = 0; i < 8; i++)
+            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
+	      << (i * BITS_PER_UNIT);
+
+
+	  info->value = GEN_INT (imm);
+	}
+      else
+	{
+	  for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+	    imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+
+	  /* Construct 'abcdefgh' because the assembler cannot handle
+	     generic constants.	 */
+	  if (info->mvn)
+	    imm = ~imm;
+	  imm = (imm >> info->shift) & 0xff;
+	  info->value = GEN_INT (imm);
+	}
+    }
+
+  return true;
+#undef CHECK
+}
+
+static bool
+aarch64_const_vec_all_same_int_p (rtx x,
+				  HOST_WIDE_INT minval,
+				  HOST_WIDE_INT maxval)
+{
+  HOST_WIDE_INT firstval;
+  int count, i;
+
+  if (GET_CODE (x) != CONST_VECTOR
+      || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
+    return false;
+
+  firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
+  if (firstval < minval || firstval > maxval)
+    return false;
+
+  count = CONST_VECTOR_NUNITS (x);
+  for (i = 1; i < count; i++)
+    if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
+      return false;
+
+  return true;
+}
+
+/* Check of immediate shift constants are within range.  */
+bool
+aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
+{
+  int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
+  if (left)
+    return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
+  else
+    return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
+}
+
+/* Return true if X is a uniform vector where all elements
+   are either the floating-point constant 0.0 or the
+   integer constant 0.  */
+bool
+aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
+{
+  return x == CONST0_RTX (mode);
+}
+
+bool
+aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT imm = INTVAL (x);
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      unsigned int byte = imm & 0xff;
+      if (byte != 0xff && byte != 0)
+       return false;
+      imm >>= 8;
+    }
+
+  return true;
+}
+
+bool
+aarch64_mov_operand_p (rtx x,
+		       enum aarch64_symbol_context context,
+		       enum machine_mode mode)
+{
+  if (GET_CODE (x) == HIGH
+      && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
+    return true;
+
+  if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
+    return true;
+
+  if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
+    return true;
+
+  return aarch64_classify_symbolic_expression (x, context)
+    == SYMBOL_TINY_ABSOLUTE;
+}
+
+/* Return a const_int vector of VAL.  */
+rtx
+aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits);
+  int i;
+
+  for (i=0; i < nunits; i++)
+    RTVEC_ELT (v, i) = GEN_INT (val);
+
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+/* Check OP is a legal scalar immediate for the MOVI instruction.  */
+
+bool
+aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
+{
+  enum machine_mode vmode;
+
+  gcc_assert (!VECTOR_MODE_P (mode));
+  vmode = aarch64_preferred_simd_mode (mode);
+  rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
+  return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
+}
+
+/* Construct and return a PARALLEL RTX vector.  */
+rtx
+aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits / 2);
+  int base = high ? nunits / 2 : 0;
+  rtx t1;
+  int i;
+
+  for (i=0; i < nunits / 2; i++)
+    RTVEC_ELT (v, i) = GEN_INT (base + i);
+
+  t1 = gen_rtx_PARALLEL (mode, v);
+  return t1;
+}
+
+/* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
+   HIGH (exclusive).  */
+void
+aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  HOST_WIDE_INT lane;
+  gcc_assert (GET_CODE (operand) == CONST_INT);
+  lane = INTVAL (operand);
+
+  if (lane < low || lane >= high)
+    error ("lane out of range");
+}
+
+void
+aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  gcc_assert (GET_CODE (operand) == CONST_INT);
+  HOST_WIDE_INT lane = INTVAL (operand);
+
+  if (lane < low || lane >= high)
+    error ("constant out of range");
+}
+
+/* Emit code to reinterpret one AdvSIMD type as another,
+   without altering bits.  */
+void
+aarch64_simd_reinterpret (rtx dest, rtx src)
+{
+  emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
+}
+
+/* Emit code to place a AdvSIMD pair result in memory locations (with equal
+   registers).  */
+void
+aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
+			    rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
+                            rtx op1)
+{
+  rtx mem = gen_rtx_MEM (mode, destaddr);
+  rtx tmp1 = gen_reg_rtx (mode);
+  rtx tmp2 = gen_reg_rtx (mode);
+
+  emit_insn (intfn (tmp1, op1, tmp2));
+
+  emit_move_insn (mem, tmp1);
+  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
+  emit_move_insn (mem, tmp2);
+}
+
+/* Return TRUE if OP is a valid vector addressing mode.  */
+bool
+aarch64_simd_mem_operand_p (rtx op)
+{
+  return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
+			|| GET_CODE (XEXP (op, 0)) == REG);
+}
+
+/* Set up OPERANDS for a register copy from SRC to DEST, taking care
+   not to early-clobber SRC registers in the process.
+
+   We assume that the operands described by SRC and DEST represent a
+   decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
+   number of components into which the copy has been decomposed.  */
+void
+aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
+				rtx *src, unsigned int count)
+{
+  unsigned int i;
+
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      || REGNO (operands[0]) < REGNO (operands[1]))
+    {
+      for (i = 0; i < count; i++)
+	{
+	  operands[2 * i] = dest[i];
+	  operands[2 * i + 1] = src[i];
+	}
+    }
+  else
+    {
+      for (i = 0; i < count; i++)
+	{
+	  operands[2 * i] = dest[count - i - 1];
+	  operands[2 * i + 1] = src[count - i - 1];
+	}
+    }
+}
+
+/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
+   one of VSTRUCT modes: OI, CI or XI.  */
+int
+aarch64_simd_attr_length_move (rtx insn)
+{
+  enum machine_mode mode;
+
+  extract_insn_cached (insn);
+
+  if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
+    {
+      mode = GET_MODE (recog_data.operand[0]);
+      switch (mode)
+	{
+	case OImode:
+	  return 8;
+	case CImode:
+	  return 12;
+	case XImode:
+	  return 16;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  return 4;
+}
+
+/* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
+   alignment of a vector to 128 bits.  */
+static HOST_WIDE_INT
+aarch64_simd_vector_alignment (const_tree type)
+{
+  HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
+  return MIN (align, 128);
+}
+
+/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
+static bool
+aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+  if (is_packed)
+    return false;
+
+  /* We guarantee alignment for vectors up to 128-bits.  */
+  if (tree_int_cst_compare (TYPE_SIZE (type),
+			    bitsize_int (BIGGEST_ALIGNMENT)) > 0)
+    return false;
+
+  /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
+  return true;
+}
+
+/* If VALS is a vector constant that can be loaded into a register
+   using DUP, generate instructions to do so and return an RTX to
+   assign to the register.  Otherwise return NULL_RTX.  */
+static rtx
+aarch64_simd_dup_constant (rtx vals)
+{
+  enum machine_mode mode = GET_MODE (vals);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  bool all_same = true;
+  rtx x;
+  int i;
+
+  if (GET_CODE (vals) != CONST_VECTOR)
+    return NULL_RTX;
+
+  for (i = 1; i < n_elts; ++i)
+    {
+      x = CONST_VECTOR_ELT (vals, i);
+      if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
+	all_same = false;
+    }
+
+  if (!all_same)
+    return NULL_RTX;
+
+  /* We can load this constant by using DUP and a constant in a
+     single ARM register.  This will be cheaper than a vector
+     load.  */
+  x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
+  return gen_rtx_VEC_DUPLICATE (mode, x);
+}
+
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+   constants (for vec_init) or CONST_VECTOR, efficiently into a
+   register.  Returns an RTX to copy into the register, or NULL_RTX
+   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
+static rtx
+aarch64_simd_make_constant (rtx vals)
+{
+  enum machine_mode mode = GET_MODE (vals);
+  rtx const_dup;
+  rtx const_vec = NULL_RTX;
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_const = 0;
+  int i;
+
+  if (GET_CODE (vals) == CONST_VECTOR)
+    const_vec = vals;
+  else if (GET_CODE (vals) == PARALLEL)
+    {
+      /* A CONST_VECTOR must contain only CONST_INTs and
+	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+	 Only store valid constants in a CONST_VECTOR.  */
+      for (i = 0; i < n_elts; ++i)
+	{
+	  rtx x = XVECEXP (vals, 0, i);
+	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+	    n_const++;
+	}
+      if (n_const == n_elts)
+	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+    }
+  else
+    gcc_unreachable ();
+
+  if (const_vec != NULL_RTX
+      && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
+    /* Load using MOVI/MVNI.  */
+    return const_vec;
+  else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
+    /* Loaded using DUP.  */
+    return const_dup;
+  else if (const_vec != NULL_RTX)
+    /* Load from constant pool. We can not take advantage of single-cycle
+       LD1 because we need a PC-relative addressing mode.  */
+    return const_vec;
+  else
+    /* A PARALLEL containing something not valid inside CONST_VECTOR.
+       We can not construct an initializer.  */
+    return NULL_RTX;
+}
+
+void
+aarch64_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true;
+  rtx x, mem;
+  int i;
+
+  x = XVECEXP (vals, 0, 0);
+  if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
+    n_var = 1, one_var = 0;
+  
+  for (i = 1; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
+	++n_var, one_var = i;
+
+      if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (n_var == 0)
+    {
+      rtx constant = aarch64_simd_make_constant (vals);
+      if (constant != NULL_RTX)
+	{
+	  emit_move_insn (target, constant);
+	  return;
+	}
+    }
+
+  /* Splat a single non-constant element if we can.  */
+  if (all_same)
+    {
+      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+      aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
+      return;
+    }
+
+  /* One field is non-constant.  Load constant then overwrite varying
+     field.  This is more efficient than using the stack.  */
+  if (n_var == 1)
+    {
+      rtx copy = copy_rtx (vals);
+      rtx index = GEN_INT (one_var);
+      enum insn_code icode;
+
+      /* Load constant part of vector, substitute neighboring value for
+	 varying element.  */
+      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
+      aarch64_expand_vector_init (target, copy);
+
+      /* Insert variable.  */
+      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+      icode = optab_handler (vec_set_optab, mode);
+      gcc_assert (icode != CODE_FOR_nothing);
+      emit_insn (GEN_FCN (icode) (target, x, index));
+      return;
+    }
+
+  /* Construct the vector in memory one field at a time
+     and load the whole vector.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner_mode,
+				    i * GET_MODE_SIZE (inner_mode)),
+		    XVECEXP (vals, 0, i));
+  emit_move_insn (target, mem);
+
+}
+
+static unsigned HOST_WIDE_INT
+aarch64_shift_truncation_mask (enum machine_mode mode)
+{
+  return
+    (aarch64_vector_mode_supported_p (mode)
+     || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
+}
+
+#ifndef TLS_SECTION_ASM_FLAG
+#define TLS_SECTION_ASM_FLAG 'T'
+#endif
+
+void
+aarch64_elf_asm_named_section (const char *name, unsigned int flags,
+			       tree decl ATTRIBUTE_UNUSED)
+{
+  char flagchars[10], *f = flagchars;
+
+  /* If we have already declared this section, we can use an
+     abbreviated form to switch back to it -- unless this section is
+     part of a COMDAT groups, in which case GAS requires the full
+     declaration every time.  */
+  if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
+      && (flags & SECTION_DECLARED))
+    {
+      fprintf (asm_out_file, "\t.section\t%s\n", name);
+      return;
+    }
+
+  if (!(flags & SECTION_DEBUG))
+    *f++ = 'a';
+  if (flags & SECTION_WRITE)
+    *f++ = 'w';
+  if (flags & SECTION_CODE)
+    *f++ = 'x';
+  if (flags & SECTION_SMALL)
+    *f++ = 's';
+  if (flags & SECTION_MERGE)
+    *f++ = 'M';
+  if (flags & SECTION_STRINGS)
+    *f++ = 'S';
+  if (flags & SECTION_TLS)
+    *f++ = TLS_SECTION_ASM_FLAG;
+  if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
+    *f++ = 'G';
+  *f = '\0';
+
+  fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
+
+  if (!(flags & SECTION_NOTYPE))
+    {
+      const char *type;
+      const char *format;
+
+      if (flags & SECTION_BSS)
+	type = "nobits";
+      else
+	type = "progbits";
+
+#ifdef TYPE_OPERAND_FMT
+      format = "," TYPE_OPERAND_FMT;
+#else
+      format = ",@%s";
+#endif
+
+      fprintf (asm_out_file, format, type);
+
+      if (flags & SECTION_ENTSIZE)
+	fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
+      if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
+	{
+	  if (TREE_CODE (decl) == IDENTIFIER_NODE)
+	    fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
+	  else
+	    fprintf (asm_out_file, ",%s,comdat",
+		     IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
+	}
+    }
+
+  putc ('\n', asm_out_file);
+}
+
+/* Select a format to encode pointers in exception handling data.  */
+int
+aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
+{
+   int type;
+   switch (aarch64_cmodel)
+     {
+     case AARCH64_CMODEL_TINY:
+     case AARCH64_CMODEL_TINY_PIC:
+     case AARCH64_CMODEL_SMALL:
+     case AARCH64_CMODEL_SMALL_PIC:
+       /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
+	  for everything.  */
+       type = DW_EH_PE_sdata4;
+       break;
+     default:
+       /* No assumptions here.  8-byte relocs required.  */
+       type = DW_EH_PE_sdata8;
+       break;
+     }
+   return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+}
+
+/* Emit load exclusive.  */
+
+static void
+aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
+			     rtx mem, rtx model_rtx)
+{
+  rtx (*gen) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_aarch64_load_exclusiveqi; break;
+    case HImode: gen = gen_aarch64_load_exclusivehi; break;
+    case SImode: gen = gen_aarch64_load_exclusivesi; break;
+    case DImode: gen = gen_aarch64_load_exclusivedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (rval, mem, model_rtx));
+}
+
+/* Emit store exclusive.  */
+
+static void
+aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
+			      rtx rval, rtx mem, rtx model_rtx)
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_aarch64_store_exclusiveqi; break;
+    case HImode: gen = gen_aarch64_store_exclusivehi; break;
+    case SImode: gen = gen_aarch64_store_exclusivesi; break;
+    case DImode: gen = gen_aarch64_store_exclusivedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (bval, rval, mem, model_rtx));
+}
+
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+aarch64_emit_unlikely_jump (rtx insn)
+{
+  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+
+  insn = emit_jump_insn (insn);
+  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
+}
+
+/* Expand a compare and swap pattern.  */
+
+void
+aarch64_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+  enum machine_mode mode, cmp_mode;
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+  cmp_mode = mode;
+
+  /* Normally the succ memory model must be stronger than fail, but in the
+     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
+     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
+
+  if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
+      && INTVAL (mod_s) == MEMMODEL_RELEASE)
+    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+      /* For short modes, we're going to perform the comparison in SImode,
+	 so do the zero-extension now.  */
+      cmp_mode = SImode;
+      rval = gen_reg_rtx (SImode);
+      oldval = convert_modes (SImode, mode, oldval, true);
+      /* Fall through.  */
+
+    case SImode:
+    case DImode:
+      /* Force the value into a register if needed.  */
+      if (!aarch64_plus_operand (oldval, mode))
+	oldval = force_reg (cmp_mode, oldval);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (mode)
+    {
+    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
+    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
+    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
+    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+
+  if (mode == QImode || mode == HImode)
+    emit_move_insn (operands[1], gen_lowpart (mode, rval));
+
+  x = gen_rtx_REG (CCmode, CC_REGNUM);
+  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, bval, x));
+}
+
+/* Split a compare and swap pattern.  */
+
+void
+aarch64_split_compare_and_swap (rtx operands[])
+{
+  rtx rval, mem, oldval, newval, scratch;
+  enum machine_mode mode;
+  bool is_weak;
+  rtx label1, label2, x, cond;
+
+  rval = operands[0];
+  mem = operands[1];
+  oldval = operands[2];
+  newval = operands[3];
+  is_weak = (operands[4] != const0_rtx);
+  scratch = operands[7];
+  mode = GET_MODE (mem);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_label_rtx ();
+      emit_label (label1);
+    }
+  label2 = gen_label_rtx ();
+
+  aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
+
+  cond = aarch64_gen_compare_reg (NE, rval, oldval);
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+
+  aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
+
+  if (!is_weak)
+    {
+      x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+      aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+    }
+  else
+    {
+      cond = gen_rtx_REG (CCmode, CC_REGNUM);
+      x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+    }
+
+  emit_label (label2);
+}
+
+/* Split an atomic operation.  */
+
+void
+aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
+		     rtx value, rtx model_rtx, rtx cond)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode wmode = (mode == DImode ? DImode : SImode);
+  rtx label, x;
+
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  if (new_out)
+    new_out = gen_lowpart (wmode, new_out);
+  if (old_out)
+    old_out = gen_lowpart (wmode, old_out);
+  else
+    old_out = new_out;
+  value = simplify_gen_subreg (wmode, value, mode, 0);
+
+  aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
+
+  switch (code)
+    {
+    case SET:
+      new_out = value;
+      break;
+
+    case NOT:
+      x = gen_rtx_AND (wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      x = gen_rtx_NOT (wmode, new_out);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+
+    case MINUS:
+      if (CONST_INT_P (value))
+	{
+	  value = GEN_INT (-INTVAL (value));
+	  code = PLUS;
+	}
+      /* Fall through.  */
+
+    default:
+      x = gen_rtx_fmt_ee (code, wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+    }
+
+  aarch64_emit_store_exclusive (mode, cond, mem,
+				gen_lowpart (mode, new_out), model_rtx);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+}
+
+static void
+aarch64_print_extension (void)
+{
+  const struct aarch64_option_extension *opt = NULL;
+
+  for (opt = all_extensions; opt->name != NULL; opt++)
+    if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
+      asm_fprintf (asm_out_file, "+%s", opt->name);
+
+  asm_fprintf (asm_out_file, "\n");
+}
+
+static void
+aarch64_start_file (void)
+{
+  if (selected_arch)
+    {
+      asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
+      aarch64_print_extension ();
+    }
+  else if (selected_cpu)
+    {
+      const char *truncated_name
+	    = aarch64_rewrite_selected_cpu (selected_cpu->name);
+      asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
+      aarch64_print_extension ();
+    }
+  default_file_start();
+}
+
+/* Target hook for c_mode_for_suffix.  */
+static enum machine_mode
+aarch64_c_mode_for_suffix (char suffix)
+{
+  if (suffix == 'q')
+    return TFmode;
+
+  return VOIDmode;
+}
+
+/* We can only represent floating point constants which will fit in
+   "quarter-precision" values.  These values are characterised by
+   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
+   by:
+
+   (-1)^s * (n/16) * 2^r
+
+   Where:
+     's' is the sign bit.
+     'n' is an integer in the range 16 <= n <= 31.
+     'r' is an integer in the range -3 <= r <= 4.  */
+
+/* Return true iff X can be represented by a quarter-precision
+   floating point immediate operand X.  Note, we cannot represent 0.0.  */
+bool
+aarch64_float_const_representable_p (rtx x)
+{
+  /* This represents our current view of how many bits
+     make up the mantissa.  */
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+  int exponent;
+  unsigned HOST_WIDE_INT mantissa, mask;
+  HOST_WIDE_INT m1, m2;
+  REAL_VALUE_TYPE r, m;
+
+  if (!CONST_DOUBLE_P (x))
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* We cannot represent infinities, NaNs or +/-zero.  We won't
+     know if we have +zero until we analyse the mantissa, but we
+     can reject the other invalid values.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
+      || REAL_VALUE_MINUS_ZERO (r))
+    return false;
+
+  /* Extract exponent.  */
+  r = real_value_abs (&r);
+  exponent = REAL_EXP (&r);
+
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     m1 holds the low part of the mantissa, m2 the high part.
+     WARNING: If we ever have a representation using more than 2 * H_W_I - 1
+     bits for the mantissa, this can fail (low bits will be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  REAL_VALUE_TO_INT (&m1, &m2, m);
+
+  /* If the low part of the mantissa has bits set we cannot represent
+     the value.  */
+  if (m1 != 0)
+    return false;
+  /* We have rejected the lower HOST_WIDE_INT, so update our
+     understanding of how many bits lie in the mantissa and
+     look only at the high HOST_WIDE_INT.  */
+  mantissa = m2;
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+
+  /* We can only represent values with a mantissa of the form 1.xxxx.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return false;
+
+  /* Having filtered unrepresentable values, we may now remove all
+     but the highest 5 bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* We cannot represent the value 0.0, so reject it.  This is handled
+     elsewhere.  */
+  if (mantissa == 0)
+    return false;
+
+  /* Then, as bit 4 is always set, we can mask it off, leaving
+     the mantissa in the range [0, 15].  */
+  mantissa &= ~(1 << 4);
+  gcc_assert (mantissa <= 15);
+
+  /* GCC internally does not use IEEE754-like encoding (where normalized
+     significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
+     Our mantissa values are shifted 4 places to the left relative to
+     normalized IEEE754 so we must modify the exponent returned by REAL_EXP
+     by 5 places to correct for GCC's representation.  */
+  exponent = 5 - exponent;
+
+  return (exponent >= 0 && exponent <= 7);
+}
+
+char*
+aarch64_output_simd_mov_immediate (rtx const_vector,
+				   enum machine_mode mode,
+				   unsigned width)
+{
+  bool is_valid;
+  static char templ[40];
+  const char *mnemonic;
+  const char *shift_op;
+  unsigned int lane_count = 0;
+  char element_char;
+
+  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
+
+  /* This will return true to show const_vector is legal for use as either
+     a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
+     also update INFO to show how the immediate should be generated.  */
+  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
+  gcc_assert (is_valid);
+
+  element_char = sizetochar (info.element_width);
+  lane_count = width / info.element_width;
+
+  mode = GET_MODE_INNER (mode);
+  if (mode == SFmode || mode == DFmode)
+    {
+      gcc_assert (info.shift == 0 && ! info.mvn);
+      if (aarch64_float_const_zero_rtx_p (info.value))
+        info.value = GEN_INT (0);
+      else
+	{
+#define buf_size 20
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
+	  char float_buf[buf_size] = {'\0'};
+	  real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
+#undef buf_size
+
+	  if (lane_count == 1)
+	    snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
+	  else
+	    snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
+		      lane_count, element_char, float_buf);
+	  return templ;
+	}
+    }
+
+  mnemonic = info.mvn ? "mvni" : "movi";
+  shift_op = info.msl ? "msl" : "lsl";
+
+  if (lane_count == 1)
+    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+	      mnemonic, UINTVAL (info.value));
+  else if (info.shift)
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
+	      ", %s %d", mnemonic, lane_count, element_char,
+	      UINTVAL (info.value), shift_op, info.shift);
+  else
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
+	      mnemonic, lane_count, element_char, UINTVAL (info.value));
+  return templ;
+}
+
+char*
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+					  enum machine_mode mode)
+{
+  enum machine_mode vmode;
+
+  gcc_assert (!VECTOR_MODE_P (mode));
+  vmode = aarch64_simd_container_mode (mode, 64);
+  rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+  return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+}
+
+/* Split operands into moves from op[1] + op[2] into op[0].  */
+
+void
+aarch64_split_combinev16qi (rtx operands[3])
+{
+  unsigned int dest = REGNO (operands[0]);
+  unsigned int src1 = REGNO (operands[1]);
+  unsigned int src2 = REGNO (operands[2]);
+  enum machine_mode halfmode = GET_MODE (operands[1]);
+  unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
+  rtx destlo, desthi;
+
+  gcc_assert (halfmode == V16QImode);
+
+  if (src1 == dest && src2 == dest + halfregs)
+    {
+      /* No-op move.  Can't split to nothing; emit something.  */
+      emit_note (NOTE_INSN_DELETED);
+      return;
+    }
+
+  /* Preserve register attributes for variable tracking.  */
+  destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
+  desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
+			       GET_MODE_SIZE (halfmode));
+
+  /* Special case of reversed high/low parts.  */
+  if (reg_overlap_mentioned_p (operands[2], destlo)
+      && reg_overlap_mentioned_p (operands[1], desthi))
+    {
+      emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
+      emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
+      emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
+    }
+  else if (!reg_overlap_mentioned_p (operands[2], destlo))
+    {
+      /* Try to avoid unnecessary moves if part of the result
+	 is in the right place already.  */
+      if (src1 != dest)
+	emit_move_insn (destlo, operands[1]);
+      if (src2 != dest + halfregs)
+	emit_move_insn (desthi, operands[2]);
+    }
+  else
+    {
+      if (src2 != dest + halfregs)
+	emit_move_insn (desthi, operands[2]);
+      if (src1 != dest)
+	emit_move_insn (destlo, operands[1]);
+    }
+}
+
+/* vec_perm support.  */
+
+#define MAX_VECT_LEN 16
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool one_vector_p;
+  bool testing_p;
+};
+
+/* Generate a variable permutation.  */
+
+static void
+aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  bool one_vector_p = rtx_equal_p (op0, op1);
+
+  gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
+  gcc_checking_assert (GET_MODE (op0) == vmode);
+  gcc_checking_assert (GET_MODE (op1) == vmode);
+  gcc_checking_assert (GET_MODE (sel) == vmode);
+  gcc_checking_assert (TARGET_SIMD);
+
+  if (one_vector_p)
+    {
+      if (vmode == V8QImode)
+	{
+	  /* Expand the argument to a V16QI mode by duplicating it.  */
+	  rtx pair = gen_reg_rtx (V16QImode);
+	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
+	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
+	}
+      else
+	{
+	  emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
+	}
+    }
+  else
+    {
+      rtx pair;
+
+      if (vmode == V8QImode)
+	{
+	  pair = gen_reg_rtx (V16QImode);
+	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
+	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
+	}
+      else
+	{
+	  pair = gen_reg_rtx (OImode);
+	  emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
+	  emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
+	}
+    }
+}
+
+void
+aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
+  bool one_vector_p = rtx_equal_p (op0, op1);
+  rtx rmask[MAX_VECT_LEN], mask;
+
+  gcc_checking_assert (!BYTES_BIG_ENDIAN);
+
+  /* The TBL instruction does not use a modulo index, so we must take care
+     of that ourselves.  */
+  mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
+  for (i = 0; i < nelt; ++i)
+    rmask[i] = mask;
+  mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
+  sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
+
+  aarch64_expand_vec_perm_1 (target, op0, op1, sel);
+}
+
+/* Recognize patterns suitable for the TRN instructions.  */
+static bool
+aarch64_evpc_trn (struct expand_vec_perm_d *d)
+{
+  unsigned int i, odd, mask, nelt = d->nelt;
+  rtx out, in0, in1, x;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum machine_mode vmode = d->vmode;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 8)
+    return false;
+
+  /* Note that these are little-endian tests.
+     We correct for big-endian later.  */
+  if (d->perm[0] == 0)
+    odd = 0;
+  else if (d->perm[0] == 1)
+    odd = 1;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt; i += 2)
+    {
+      if (d->perm[i] != i + odd)
+	return false;
+      if (d->perm[i + 1] != ((i + nelt + odd) & mask))
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      odd = !odd;
+    }
+  out = d->target;
+
+  if (odd)
+    {
+      switch (vmode)
+	{
+	case V16QImode: gen = gen_aarch64_trn2v16qi; break;
+	case V8QImode: gen = gen_aarch64_trn2v8qi; break;
+	case V8HImode: gen = gen_aarch64_trn2v8hi; break;
+	case V4HImode: gen = gen_aarch64_trn2v4hi; break;
+	case V4SImode: gen = gen_aarch64_trn2v4si; break;
+	case V2SImode: gen = gen_aarch64_trn2v2si; break;
+	case V2DImode: gen = gen_aarch64_trn2v2di; break;
+	case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
+	case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
+	case V2DFmode: gen = gen_aarch64_trn2v2df; break;
+	default:
+	  return false;
+	}
+    }
+  else
+    {
+      switch (vmode)
+	{
+	case V16QImode: gen = gen_aarch64_trn1v16qi; break;
+	case V8QImode: gen = gen_aarch64_trn1v8qi; break;
+	case V8HImode: gen = gen_aarch64_trn1v8hi; break;
+	case V4HImode: gen = gen_aarch64_trn1v4hi; break;
+	case V4SImode: gen = gen_aarch64_trn1v4si; break;
+	case V2SImode: gen = gen_aarch64_trn1v2si; break;
+	case V2DImode: gen = gen_aarch64_trn1v2di; break;
+	case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
+	case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
+	case V2DFmode: gen = gen_aarch64_trn1v2df; break;
+	default:
+	  return false;
+	}
+    }
+
+  emit_insn (gen (out, in0, in1));
+  return true;
+}
+
+/* Recognize patterns suitable for the UZP instructions.  */
+static bool
+aarch64_evpc_uzp (struct expand_vec_perm_d *d)
+{
+  unsigned int i, odd, mask, nelt = d->nelt;
+  rtx out, in0, in1, x;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum machine_mode vmode = d->vmode;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 8)
+    return false;
+
+  /* Note that these are little-endian tests.
+     We correct for big-endian later.  */
+  if (d->perm[0] == 0)
+    odd = 0;
+  else if (d->perm[0] == 1)
+    odd = 1;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt; i++)
+    {
+      unsigned elt = (i * 2 + odd) & mask;
+      if (d->perm[i] != elt)
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      odd = !odd;
+    }
+  out = d->target;
+
+  if (odd)
+    {
+      switch (vmode)
+	{
+	case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
+	case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
+	case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
+	case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
+	case V4SImode: gen = gen_aarch64_uzp2v4si; break;
+	case V2SImode: gen = gen_aarch64_uzp2v2si; break;
+	case V2DImode: gen = gen_aarch64_uzp2v2di; break;
+	case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
+	case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
+	case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
+	default:
+	  return false;
+	}
+    }
+  else
+    {
+      switch (vmode)
+	{
+	case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
+	case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
+	case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
+	case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
+	case V4SImode: gen = gen_aarch64_uzp1v4si; break;
+	case V2SImode: gen = gen_aarch64_uzp1v2si; break;
+	case V2DImode: gen = gen_aarch64_uzp1v2di; break;
+	case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
+	case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
+	case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
+	default:
+	  return false;
+	}
+    }
+
+  emit_insn (gen (out, in0, in1));
+  return true;
+}
+
+/* Recognize patterns suitable for the ZIP instructions.  */
+static bool
+aarch64_evpc_zip (struct expand_vec_perm_d *d)
+{
+  unsigned int i, high, mask, nelt = d->nelt;
+  rtx out, in0, in1, x;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum machine_mode vmode = d->vmode;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 8)
+    return false;
+
+  /* Note that these are little-endian tests.
+     We correct for big-endian later.  */
+  high = nelt / 2;
+  if (d->perm[0] == high)
+    /* Do Nothing.  */
+    ;
+  else if (d->perm[0] == 0)
+    high = 0;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt / 2; i++)
+    {
+      unsigned elt = (i + high) & mask;
+      if (d->perm[i * 2] != elt)
+	return false;
+      elt = (elt + nelt) & mask;
+      if (d->perm[i * 2 + 1] != elt)
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      high = !high;
+    }
+  out = d->target;
+
+  if (high)
+    {
+      switch (vmode)
+	{
+	case V16QImode: gen = gen_aarch64_zip2v16qi; break;
+	case V8QImode: gen = gen_aarch64_zip2v8qi; break;
+	case V8HImode: gen = gen_aarch64_zip2v8hi; break;
+	case V4HImode: gen = gen_aarch64_zip2v4hi; break;
+	case V4SImode: gen = gen_aarch64_zip2v4si; break;
+	case V2SImode: gen = gen_aarch64_zip2v2si; break;
+	case V2DImode: gen = gen_aarch64_zip2v2di; break;
+	case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
+	case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
+	case V2DFmode: gen = gen_aarch64_zip2v2df; break;
+	default:
+	  return false;
+	}
+    }
+  else
+    {
+      switch (vmode)
+	{
+	case V16QImode: gen = gen_aarch64_zip1v16qi; break;
+	case V8QImode: gen = gen_aarch64_zip1v8qi; break;
+	case V8HImode: gen = gen_aarch64_zip1v8hi; break;
+	case V4HImode: gen = gen_aarch64_zip1v4hi; break;
+	case V4SImode: gen = gen_aarch64_zip1v4si; break;
+	case V2SImode: gen = gen_aarch64_zip1v2si; break;
+	case V2DImode: gen = gen_aarch64_zip1v2di; break;
+	case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
+	case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
+	case V2DFmode: gen = gen_aarch64_zip1v2df; break;
+	default:
+	  return false;
+	}
+    }
+
+  emit_insn (gen (out, in0, in1));
+  return true;
+}
+
+static bool
+aarch64_evpc_dup (struct expand_vec_perm_d *d)
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  rtx out = d->target;
+  rtx in0;
+  enum machine_mode vmode = d->vmode;
+  unsigned int i, elt, nelt = d->nelt;
+  rtx lane;
+
+  /* TODO: This may not be big-endian safe.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < nelt; i++)
+    {
+      if (elt != d->perm[i])
+	return false;
+    }
+
+  /* The generic preparation in aarch64_expand_vec_perm_const_1
+     swaps the operand order and the permute indices if it finds
+     d->perm[0] to be in the second operand.  Thus, we can always
+     use d->op0 and need not do any extra arithmetic to get the
+     correct lane number.  */
+  in0 = d->op0;
+  lane = GEN_INT (elt);
+
+  switch (vmode)
+    {
+    case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
+    case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
+    case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
+    case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
+    case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
+    case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
+    case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
+    case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
+    case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
+    case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
+    default:
+      return false;
+    }
+
+  emit_insn (gen (out, in0, lane));
+  return true;
+}
+
+static bool
+aarch64_evpc_tbl (struct expand_vec_perm_d *d)
+{
+  rtx rperm[MAX_VECT_LEN], sel;
+  enum machine_mode vmode = d->vmode;
+  unsigned int i, nelt = d->nelt;
+
+  /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
+     numbering of elements for big-endian, we must reverse the order.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* Generic code will try constant permutation twice.  Once with the
+     original mode and again with the elements lowered to QImode.
+     So wait and don't do the selector expansion ourselves.  */
+  if (vmode != V8QImode && vmode != V16QImode)
+    return false;
+
+  for (i = 0; i < nelt; ++i)
+    rperm[i] = GEN_INT (d->perm[i]);
+  sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
+  sel = force_reg (vmode, sel);
+
+  aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
+  return true;
+}
+
+static bool
+aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  /* The pattern matching functions above are written to look for a small
+     number to begin the sequence (0, 1, N/2).  If we begin with an index
+     from the second operand, we can swap the operands.  */
+  if (d->perm[0] >= d->nelt)
+    {
+      unsigned i, nelt = d->nelt;
+      rtx x;
+
+      for (i = 0; i < nelt; ++i)
+	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
+
+      x = d->op0;
+      d->op0 = d->op1;
+      d->op1 = x;
+    }
+
+  if (TARGET_SIMD)
+    {
+      if (aarch64_evpc_zip (d))
+	return true;
+      else if (aarch64_evpc_uzp (d))
+	return true;
+      else if (aarch64_evpc_trn (d))
+	return true;
+      else if (aarch64_evpc_dup (d))
+	return true;
+      return aarch64_evpc_tbl (d);
+    }
+  return false;
+}
+
+/* Expand a vec_perm_const pattern.  */
+
+bool
+aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
+{
+  struct expand_vec_perm_d d;
+  int i, nelt, which;
+
+  d.target = target;
+  d.op0 = op0;
+  d.op1 = op1;
+
+  d.vmode = GET_MODE (target);
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  for (i = which = 0; i < nelt; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      int ei = INTVAL (e) & (2 * nelt - 1);
+      which |= (ei < nelt ? 1 : 2);
+      d.perm[i] = ei;
+    }
+
+  switch (which)
+    {
+    default:
+      gcc_unreachable ();
+
+    case 3:
+      d.one_vector_p = false;
+      if (!rtx_equal_p (op0, op1))
+	break;
+
+      /* The elements of PERM do not suggest that only the first operand
+	 is used, but both operands are identical.  Allow easier matching
+	 of the permutation by folding the permutation into the single
+	 input vector.  */
+      /* Fall Through.  */
+    case 2:
+      for (i = 0; i < nelt; ++i)
+	d.perm[i] &= nelt - 1;
+      d.op0 = op1;
+      d.one_vector_p = true;
+      break;
+
+    case 1:
+      d.op1 = op0;
+      d.one_vector_p = true;
+      break;
+    }
+
+  return aarch64_expand_vec_perm_const_1 (&d);
+}
+
+static bool
+aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				     const unsigned char *sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned int i, nelt, which;
+  bool ret;
+
+  d.vmode = vmode;
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+  memcpy (d.perm, sel, nelt);
+
+  /* Calculate whether all elements are in one vector.  */
+  for (i = which = 0; i < nelt; ++i)
+    {
+      unsigned char e = d.perm[i];
+      gcc_assert (e < 2 * nelt);
+      which |= (e < nelt ? 1 : 2);
+    }
+
+  /* If all elements are from the second vector, reindex as if from the
+     first vector.  */
+  if (which == 2)
+    for (i = 0; i < nelt; ++i)
+      d.perm[i] -= nelt;
+
+  /* Check whether the mask can be applied to a single vector.  */
+  d.one_vector_p = (which != 3);
+
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!d.one_vector_p)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = aarch64_expand_vec_perm_const_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+/* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
+bool
+aarch64_cannot_change_mode_class (enum machine_mode from,
+				  enum machine_mode to,
+				  enum reg_class rclass)
+{
+  /* Full-reg subregs are allowed on general regs or any class if they are
+     the same size.  */
+  if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
+      || !reg_classes_intersect_p (FP_REGS, rclass))
+    return false;
+
+  /* Limited combinations of subregs are safe on FPREGs.  Particularly,
+     1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
+     2. Scalar to Scalar for integer modes or same size float modes.
+     3. Vector to Vector modes.  */
+  if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
+    {
+      if (aarch64_vector_mode_supported_p (from)
+	  && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
+	return false;
+
+      if (GET_MODE_NUNITS (from) == 1
+	  && GET_MODE_NUNITS (to) == 1
+	  && (GET_MODE_CLASS (from) == MODE_INT
+	      || from == to))
+	return false;
+
+      if (aarch64_vector_mode_supported_p (from)
+	  && aarch64_vector_mode_supported_p (to))
+	return false;
+    }
+
+  return true;
+}
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST aarch64_address_cost
+
+/* This hook will determines whether unnamed bitfields affect the alignment
+   of the containing structure.  The hook returns true if the structure
+   should inherit the alignment requirements of an unnamed bitfield's
+   type.  */
+#undef TARGET_ALIGN_ANON_BITFIELD
+#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START aarch64_start_file
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
+
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
+
+/* Only the least significant bit is used for initialization guard
+   variables.  */
+#undef TARGET_CXX_GUARD_MASK_BIT
+#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
+
+#ifdef TARGET_BIG_ENDIAN_DEFAULT
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
+#endif
+
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
+
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL aarch64_builtin_decl
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
+
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG aarch64_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
+
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE aarch64_function_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
+
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  aarch64_init_builtins
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P aarch64_lra_p
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE aarch64_mangle_type
+
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+/* This target hook should return true if accesses to volatile bitfields
+   should use the narrowest mode possible.  It should return false if these
+   accesses should use the bitfield container type.  */
+#undef TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE aarch64_override_options
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
+  aarch64_override_options_after_change
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
+
+#undef TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS aarch64_rtx_costs
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
+
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
+
+#undef TARGET_ARRAY_MODE_SUPPORTED_P
+#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
+
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  aarch64_builtin_vectorization_cost
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_BUILTINS
+#define TARGET_VECTORIZE_BUILTINS
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  aarch64_builtin_vectorized_function
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  aarch64_autovectorize_vector_sizes
+
+/* Section anchor support.  */
+
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -256
+
+/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
+   byte offset; we can do much more for larger data types, but have no way
+   to determine the size of the access.  We assume accesses are aligned.  */
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 4095
+
+#undef TARGET_VECTOR_ALIGNMENT
+#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+  aarch64_simd_vector_alignment_reachable
+
+/* vec_perm support.  */
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
+  aarch64_vectorize_vec_perm_const_ok
+
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-aarch64.h"
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.h b/gcc-4.9/gcc/config/aarch64/aarch64.h
new file mode 100644
index 000000000..7962aa472
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.h
@@ -0,0 +1,873 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef GCC_AARCH64_H
+#define GCC_AARCH64_H
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_define ("__aarch64__");			\
+      if (TARGET_BIG_END)				\
+	builtin_define ("__AARCH64EB__");		\
+      else						\
+	builtin_define ("__AARCH64EL__");		\
+							\
+      if (!TARGET_GENERAL_REGS_ONLY)			\
+	builtin_define ("__ARM_NEON");			\
+							\
+      switch (aarch64_cmodel)				\
+	{						\
+	  case AARCH64_CMODEL_TINY:			\
+	  case AARCH64_CMODEL_TINY_PIC:			\
+	    builtin_define ("__AARCH64_CMODEL_TINY__");	\
+	    break;					\
+	  case AARCH64_CMODEL_SMALL:			\
+	  case AARCH64_CMODEL_SMALL_PIC:		\
+	    builtin_define ("__AARCH64_CMODEL_SMALL__");\
+	    break;					\
+	  case AARCH64_CMODEL_LARGE:			\
+	    builtin_define ("__AARCH64_CMODEL_LARGE__");	\
+	    break;					\
+	  default:					\
+	    break;					\
+	}						\
+							\
+      if (TARGET_ILP32)					\
+	{						\
+	  cpp_define (parse_in, "_ILP32");		\
+	  cpp_define (parse_in, "__ILP32__");		\
+	}						\
+      if (TARGET_CRYPTO)				\
+	builtin_define ("__ARM_FEATURE_CRYPTO");	\
+    } while (0)
+
+
+
+/* Target machine storage layout.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)		\
+    {						\
+      if (MODE == QImode || MODE == HImode)	\
+	{					\
+	  MODE = SImode;			\
+	}					\
+    }
+
+/* Bits are always numbered from the LSBit.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Big/little-endian flavour.  */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_END != 0)
+#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN)
+
+/* AdvSIMD is supported in the default configuration, unless disabled by
+   -mgeneral-regs-only.  */
+#define TARGET_SIMD !TARGET_GENERAL_REGS_ONLY
+#define TARGET_FLOAT !TARGET_GENERAL_REGS_ONLY
+
+#define UNITS_PER_WORD		8
+
+#define UNITS_PER_VREG		16
+
+#define PARM_BOUNDARY		64
+
+#define STACK_BOUNDARY		128
+
+#define FUNCTION_BOUNDARY	32
+
+#define EMPTY_FIELD_BOUNDARY	32
+
+#define BIGGEST_ALIGNMENT	128
+
+#define SHORT_TYPE_SIZE		16
+
+#define INT_TYPE_SIZE		32
+
+#define LONG_TYPE_SIZE		(TARGET_ILP32 ? 32 : 64)
+
+#define POINTER_SIZE		(TARGET_ILP32 ? 32 : 64)
+
+#define LONG_LONG_TYPE_SIZE	64
+
+#define FLOAT_TYPE_SIZE		32
+
+#define DOUBLE_TYPE_SIZE	64
+
+#define LONG_DOUBLE_TYPE_SIZE	128
+
+/* The architecture reserves all bits of the address for hardware use,
+   so the vbit must go into the delta field of pointers to member
+   functions.  This is the same config as that in the AArch32
+   port.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+
+/* Make strings word-aligned so that strcpy from constants will be
+   faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)		\
+  ((TREE_CODE (EXP) == STRING_CST		\
+    && !optimize_size				\
+    && (ALIGN) < BITS_PER_WORD)			\
+   ? BITS_PER_WORD : ALIGN)
+
+#define DATA_ALIGNMENT(EXP, ALIGN)		\
+  ((((ALIGN) < BITS_PER_WORD)			\
+    && (TREE_CODE (EXP) == ARRAY_TYPE		\
+	|| TREE_CODE (EXP) == UNION_TYPE	\
+	|| TREE_CODE (EXP) == RECORD_TYPE))	\
+   ? BITS_PER_WORD : (ALIGN))
+
+#define LOCAL_ALIGNMENT(EXP, ALIGN) DATA_ALIGNMENT(EXP, ALIGN)
+
+#define STRUCTURE_SIZE_BOUNDARY		8
+
+/* Defined by the ABI */
+#define WCHAR_TYPE "unsigned int"
+#define WCHAR_TYPE_SIZE			32
+
+/* Using long long breaks -ansi and -std=c90, so these will need to be
+   made conditional for an LLP64 ABI.  */
+
+#define SIZE_TYPE	"long unsigned int"
+
+#define PTRDIFF_TYPE	"long int"
+
+#define PCC_BITFIELD_TYPE_MATTERS	1
+
+
+/* Instruction tuning/selection flags.  */
+
+/* Bit values used to identify processor capabilities.  */
+#define AARCH64_FL_SIMD       (1 << 0)	/* Has SIMD instructions.  */
+#define AARCH64_FL_FP         (1 << 1)	/* Has FP.  */
+#define AARCH64_FL_CRYPTO     (1 << 2)	/* Has crypto.  */
+#define AARCH64_FL_SLOWMUL    (1 << 3)	/* A slow multiply core.  */
+#define AARCH64_FL_CRC        (1 << 4)	/* Has CRC.  */
+
+/* Has FP and SIMD.  */
+#define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
+
+/* Has FP without SIMD.  */
+#define AARCH64_FL_FPQ16      (AARCH64_FL_FP & ~AARCH64_FL_SIMD)
+
+/* Architecture flags that effect instruction selection.  */
+#define AARCH64_FL_FOR_ARCH8       (AARCH64_FL_FPSIMD)
+
+/* Macros to test ISA flags.  */
+extern unsigned long aarch64_isa_flags;
+#define AARCH64_ISA_CRC            (aarch64_isa_flags & AARCH64_FL_CRC)
+#define AARCH64_ISA_CRYPTO         (aarch64_isa_flags & AARCH64_FL_CRYPTO)
+#define AARCH64_ISA_FP             (aarch64_isa_flags & AARCH64_FL_FP)
+#define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
+
+/* Macros to test tuning flags.  */
+extern unsigned long aarch64_tune_flags;
+#define AARCH64_TUNE_SLOWMUL       (aarch64_tune_flags & AARCH64_FL_SLOWMUL)
+
+/* Crypto is an optional feature.  */
+#define TARGET_CRYPTO AARCH64_ISA_CRYPTO
+
+/* Standard register usage.  */
+
+/* 31 64-bit general purpose registers R0-R30:
+   R30		LR (link register)
+   R29		FP (frame pointer)
+   R19-R28	Callee-saved registers
+   R18		The platform register; use as temporary register.
+   R17		IP1 The second intra-procedure-call temporary register
+		(can be used by call veneers and PLT code); otherwise use
+		as a temporary register
+   R16		IP0 The first intra-procedure-call temporary register (can
+		be used by call veneers and PLT code); otherwise use as a
+		temporary register
+   R9-R15	Temporary registers
+   R8		Structure value parameter / temporary register
+   R0-R7	Parameter/result registers
+
+   SP		stack pointer, encoded as X/R31 where permitted.
+   ZR		zero register, encoded as X/R31 elsewhere
+
+   32 x 128-bit floating-point/vector registers
+   V16-V31	Caller-saved (temporary) registers
+   V8-V15	Callee-saved registers
+   V0-V7	Parameter/result registers
+
+   The vector register V0 holds scalar B0, H0, S0 and D0 in its least
+   significant bits.  Unlike AArch32 S1 is not packed into D0,
+   etc.  */
+
+/* Note that we don't mark X30 as a call-clobbered register.  The idea is
+   that it's really the call instructions themselves which clobber X30.
+   We don't care what the called function does with it afterwards.
+
+   This approach makes it easier to implement sibcalls.  Unlike normal
+   calls, sibcalls don't clobber X30, so the register reaches the
+   called function intact.  EPILOGUE_USES says that X30 is useful
+   to the called function.  */
+
+#define FIXED_REGISTERS					\
+  {							\
+    0, 0, 0, 0,   0, 0, 0, 0,	/* R0 - R7 */		\
+    0, 0, 0, 0,   0, 0, 0, 0,	/* R8 - R15 */		\
+    0, 0, 0, 0,   0, 0, 0, 0,	/* R16 - R23 */		\
+    0, 0, 0, 0,   0, 1, 0, 1,	/* R24 - R30, SP */	\
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V0 - V7 */           \
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V8 - V15 */		\
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V16 - V23 */         \
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V24 - V31 */         \
+    1, 1, 1,			/* SFP, AP, CC */	\
+  }
+
+#define CALL_USED_REGISTERS				\
+  {							\
+    1, 1, 1, 1,   1, 1, 1, 1,	/* R0 - R7 */		\
+    1, 1, 1, 1,   1, 1, 1, 1,	/* R8 - R15 */		\
+    1, 1, 1, 0,   0, 0, 0, 0,	/* R16 - R23 */		\
+    0, 0, 0, 0,   0, 1, 0, 1,	/* R24 - R30, SP */	\
+    1, 1, 1, 1,   1, 1, 1, 1,	/* V0 - V7 */		\
+    0, 0, 0, 0,   0, 0, 0, 0,	/* V8 - V15 */		\
+    1, 1, 1, 1,   1, 1, 1, 1,   /* V16 - V23 */         \
+    1, 1, 1, 1,   1, 1, 1, 1,   /* V24 - V31 */         \
+    1, 1, 1,			/* SFP, AP, CC */	\
+  }
+
+#define REGISTER_NAMES						\
+  {								\
+    "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",	\
+    "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",	\
+    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",	\
+    "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp",	\
+    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",	\
+    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",	\
+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",	\
+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",	\
+    "sfp", "ap",  "cc",						\
+  }
+
+/* Generate the register aliases for core register N */
+#define R_ALIASES(N) {"r" # N, R0_REGNUM + (N)}, \
+                     {"w" # N, R0_REGNUM + (N)}
+
+#define V_ALIASES(N) {"q" # N, V0_REGNUM + (N)}, \
+                     {"d" # N, V0_REGNUM + (N)}, \
+                     {"s" # N, V0_REGNUM + (N)}, \
+                     {"h" # N, V0_REGNUM + (N)}, \
+                     {"b" # N, V0_REGNUM + (N)}
+
+/* Provide aliases for all of the ISA defined register name forms.
+   These aliases are convenient for use in the clobber lists of inline
+   asm statements.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+  { R_ALIASES(0),  R_ALIASES(1),  R_ALIASES(2),  R_ALIASES(3),  \
+    R_ALIASES(4),  R_ALIASES(5),  R_ALIASES(6),  R_ALIASES(7),  \
+    R_ALIASES(8),  R_ALIASES(9),  R_ALIASES(10), R_ALIASES(11), \
+    R_ALIASES(12), R_ALIASES(13), R_ALIASES(14), R_ALIASES(15), \
+    R_ALIASES(16), R_ALIASES(17), R_ALIASES(18), R_ALIASES(19), \
+    R_ALIASES(20), R_ALIASES(21), R_ALIASES(22), R_ALIASES(23), \
+    R_ALIASES(24), R_ALIASES(25), R_ALIASES(26), R_ALIASES(27), \
+    R_ALIASES(28), R_ALIASES(29), R_ALIASES(30), {"wsp", R0_REGNUM + 31}, \
+    V_ALIASES(0),  V_ALIASES(1),  V_ALIASES(2),  V_ALIASES(3),  \
+    V_ALIASES(4),  V_ALIASES(5),  V_ALIASES(6),  V_ALIASES(7),  \
+    V_ALIASES(8),  V_ALIASES(9),  V_ALIASES(10), V_ALIASES(11), \
+    V_ALIASES(12), V_ALIASES(13), V_ALIASES(14), V_ALIASES(15), \
+    V_ALIASES(16), V_ALIASES(17), V_ALIASES(18), V_ALIASES(19), \
+    V_ALIASES(20), V_ALIASES(21), V_ALIASES(22), V_ALIASES(23), \
+    V_ALIASES(24), V_ALIASES(25), V_ALIASES(26), V_ALIASES(27), \
+    V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31)  \
+  }
+
+/* Say that the epilogue uses the return address register.  Note that
+   in the case of sibcalls, the values "used by the epilogue" are
+   considered live at the start of the called function.  */
+
+#define EPILOGUE_USES(REGNO) \
+  ((REGNO) == LR_REGNUM)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.  */
+#define EXIT_IGNORE_STACK	1
+
+#define STATIC_CHAIN_REGNUM		R18_REGNUM
+#define HARD_FRAME_POINTER_REGNUM	R29_REGNUM
+#define FRAME_POINTER_REGNUM		SFP_REGNUM
+#define STACK_POINTER_REGNUM		SP_REGNUM
+#define ARG_POINTER_REGNUM		AP_REGNUM
+#define FIRST_PSEUDO_REGISTER		67
+
+/* The number of (integer) argument register available.  */
+#define NUM_ARG_REGS			8
+#define NUM_FP_ARG_REGS			8
+
+/* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most
+   four members.  */
+#define HA_MAX_NUM_FLDS		4
+
+/* External dwarf register number scheme.  These number are used to
+   identify registers in dwarf debug information, the values are
+   defined by the AArch64 ABI.  The numbering scheme is independent of
+   GCC's internal register numbering scheme.  */
+
+#define AARCH64_DWARF_R0        0
+
+/* The number of R registers, note 31! not 32.  */
+#define AARCH64_DWARF_NUMBER_R 31
+
+#define AARCH64_DWARF_SP       31
+#define AARCH64_DWARF_V0       64
+
+/* The number of V registers.  */
+#define AARCH64_DWARF_NUMBER_V 32
+
+/* For signal frames we need to use an alternative return column.  This
+   value must not correspond to a hard register and must be out of the
+   range of DWARF_FRAME_REGNUM().  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN   \
+  (AARCH64_DWARF_V0 + AARCH64_DWARF_NUMBER_V)
+
+/* We add 1 extra frame register for use as the
+   DWARF_ALT_FRAME_RETURN_COLUMN.  */
+#define DWARF_FRAME_REGISTERS           (DWARF_ALT_FRAME_RETURN_COLUMN + 1)
+
+
+#define DBX_REGISTER_NUMBER(REGNO)	aarch64_dbx_register_number (REGNO)
+/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders
+   can use DWARF_ALT_FRAME_RETURN_COLUMN defined below.  This is just the same
+   as the default definition in dwarf2out.c.  */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(REGNO)	DBX_REGISTER_NUMBER (REGNO)
+
+#define DWARF_FRAME_RETURN_COLUMN	DWARF_FRAME_REGNUM (LR_REGNUM)
+
+#define HARD_REGNO_NREGS(REGNO, MODE)	aarch64_hard_regno_nregs (REGNO, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	aarch64_hard_regno_mode_ok (REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+#define DWARF2_UNWIND_INFO 1
+
+/* Use R0 through R3 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
+
+/* Select a format to encode pointers in exception handling data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  aarch64_asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* The register that holds the return address in exception handlers.  */
+#define AARCH64_EH_STACKADJ_REGNUM	(R0_REGNUM + 4)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+
+/* Don't use __builtin_setjmp until we've defined it.  */
+#undef DONT_USE_BUILTIN_SETJMP
+#define DONT_USE_BUILTIN_SETJMP 1
+
+/* Register in which the structure value is to be returned.  */
+#define AARCH64_STRUCT_VALUE_REGNUM R8_REGNUM
+
+/* Non-zero if REGNO is part of the Core register set.
+
+   The rather unusual way of expressing this check is to avoid
+   warnings when building the compiler when R0_REGNUM is 0 and REGNO
+   is unsigned.  */
+#define GP_REGNUM_P(REGNO)						\
+  (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM))
+
+#define FP_REGNUM_P(REGNO)			\
+  (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
+
+#define FP_LO_REGNUM_P(REGNO)            \
+  (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM))
+
+
+/* Register and constant classes.  */
+
+enum reg_class
+{
+  NO_REGS,
+  CORE_REGS,
+  GENERAL_REGS,
+  STACK_REG,
+  POINTER_REGS,
+  FP_LO_REGS,
+  FP_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES		/* Last */
+};
+
+#define N_REG_CLASSES	((int) LIM_REG_CLASSES)
+
+#define REG_CLASS_NAMES				\
+{						\
+  "NO_REGS",					\
+  "CORE_REGS",					\
+  "GENERAL_REGS",				\
+  "STACK_REG",					\
+  "POINTER_REGS",				\
+  "FP_LO_REGS",					\
+  "FP_REGS",					\
+  "ALL_REGS"					\
+}
+
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x7fffffff, 0x00000000, 0x00000003 },	/* CORE_REGS */		\
+  { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
+  { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\
+  { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\
+  { 0x00000000, 0x0000ffff, 0x00000000 },       /* FP_LO_REGS  */	\
+  { 0x00000000, 0xffffffff, 0x00000000 },       /* FP_REGS  */		\
+  { 0xffffffff, 0xffffffff, 0x00000007 }	/* ALL_REGS */		\
+}
+
+#define REGNO_REG_CLASS(REGNO)	aarch64_regno_regclass (REGNO)
+
+#define INDEX_REG_CLASS	CORE_REGS
+#define BASE_REG_CLASS  POINTER_REGS
+
+/* Register pairs used to eliminate unneeded registers that point into
+   the stack frame.  */
+#define ELIMINABLE_REGS							\
+{									\
+  { ARG_POINTER_REGNUM,		STACK_POINTER_REGNUM		},	\
+  { ARG_POINTER_REGNUM,		HARD_FRAME_POINTER_REGNUM	},	\
+  { FRAME_POINTER_REGNUM,	STACK_POINTER_REGNUM		},	\
+  { FRAME_POINTER_REGNUM,	HARD_FRAME_POINTER_REGNUM	},	\
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = aarch64_initial_elimination_offset (FROM, TO)
+
+/* CPU/ARCH option handling.  */
+#include "config/aarch64/aarch64-opts.h"
+
+enum target_cpus
+{
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \
+  TARGET_CPU_##INTERNAL_IDENT,
+#include "aarch64-cores.def"
+#undef AARCH64_CORE
+  TARGET_CPU_generic
+};
+
+/* If there is no CPU defined at configure, use generic as default.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT \
+  (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6))
+#endif
+
+/* The processor for which instructions should be scheduled.  */
+extern enum aarch64_processor aarch64_tune;
+
+/* RTL generation support.  */
+#define INIT_EXPANDERS aarch64_init_expanders ()
+
+
+/* Stack layout; function entry, exit and calling.  */
+#define STACK_GROWS_DOWNWARD	1
+
+#define FRAME_GROWS_DOWNWARD	1
+
+#define STARTING_FRAME_OFFSET	0
+
+#define ACCUMULATE_OUTGOING_ARGS	1
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Fix for VFP */
+#define LIBCALL_VALUE(MODE)  \
+  gen_rtx_REG (MODE, FLOAT_MODE_P (MODE) ? V0_REGNUM : R0_REGNUM)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define AARCH64_ROUND_UP(X, ALIGNMENT) \
+  (((X) + ((ALIGNMENT) - 1)) & ~((ALIGNMENT) - 1))
+
+#define AARCH64_ROUND_DOWN(X, ALIGNMENT) \
+  ((X) & ~((ALIGNMENT) - 1))
+
+#ifdef HOST_WIDE_INT
+struct GTY (()) aarch64_frame
+{
+  HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
+  HOST_WIDE_INT saved_regs_size;
+  /* Padding if needed after the all the callee save registers have
+     been saved.  */
+  HOST_WIDE_INT padding0;
+  HOST_WIDE_INT hardfp_offset;	/* HARD_FRAME_POINTER_REGNUM */
+  HOST_WIDE_INT fp_lr_offset;	/* Space needed for saving fp and/or lr */
+
+  bool laid_out;
+};
+
+typedef struct GTY (()) machine_function
+{
+  struct aarch64_frame frame;
+
+  /* The number of extra stack bytes taken up by register varargs.
+     This area is allocated by the callee at the very top of the frame.  */
+  HOST_WIDE_INT saved_varargs_size;
+
+} machine_function;
+#endif
+
+/* Which ABI to use.  */
+enum aarch64_abi_type
+{
+  AARCH64_ABI_LP64 = 0,
+  AARCH64_ABI_ILP32 = 1
+};
+
+#ifndef AARCH64_ABI_DEFAULT
+#define AARCH64_ABI_DEFAULT AARCH64_ABI_LP64
+#endif
+
+#define TARGET_ILP32	(aarch64_abi & AARCH64_ABI_ILP32)
+
+enum arm_pcs
+{
+  ARM_PCS_AAPCS64,		/* Base standard AAPCS for 64 bit.  */
+  ARM_PCS_UNKNOWN
+};
+
+
+extern enum arm_pcs arm_pcs_variant;
+
+#ifndef ARM_DEFAULT_PCS
+#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64
+#endif
+
+/* We can't use enum machine_mode inside a generator file because it
+   hasn't been created yet; we shouldn't be using any code that
+   needs the real definition though, so this ought to be safe.  */
+#ifdef GENERATOR_FILE
+#define MACHMODE int
+#else
+#include "insn-modes.h"
+#define MACHMODE enum machine_mode
+#endif
+
+
+/* AAPCS related state tracking.  */
+typedef struct
+{
+  enum arm_pcs pcs_variant;
+  int aapcs_arg_processed;	/* No need to lay out this argument again.  */
+  int aapcs_ncrn;		/* Next Core register number.  */
+  int aapcs_nextncrn;		/* Next next core register number.  */
+  int aapcs_nvrn;		/* Next Vector register number.  */
+  int aapcs_nextnvrn;		/* Next Next Vector register number.  */
+  rtx aapcs_reg;		/* Register assigned to this argument.  This
+				   is NULL_RTX if this parameter goes on
+				   the stack.  */
+  MACHMODE aapcs_vfp_rmode;
+  int aapcs_stack_words;	/* If the argument is passed on the stack, this
+				   is the number of words needed, after rounding
+				   up.  Only meaningful when
+				   aapcs_reg == NULL_RTX.  */
+  int aapcs_stack_size;		/* The total size (in words, per 8 byte) of the
+				   stack arg area so far.  */
+} CUMULATIVE_ARGS;
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  (aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (aarch64_pad_reg_upward (MODE, TYPE, FIRST) ? upward : downward)
+
+#define PAD_VARARGS_DOWN	0
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  aarch64_init_cumulative_args (&(CUM), FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS)
+
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  aarch64_function_arg_regno_p(REGNO)
+
+
+/* ISA Features.  */
+
+/* Addressing modes, etc.  */
+#define HAVE_POST_INCREMENT	1
+#define HAVE_PRE_INCREMENT	1
+#define HAVE_POST_DECREMENT	1
+#define HAVE_PRE_DECREMENT	1
+#define HAVE_POST_MODIFY_DISP	1
+#define HAVE_PRE_MODIFY_DISP	1
+
+#define MAX_REGS_PER_ADDRESS	2
+
+#define CONSTANT_ADDRESS_P(X)		aarch64_constant_address_p(X)
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)	     \
+do {									     \
+  rtx new_x = aarch64_legitimize_reload_address (&(X), MODE, OPNUM, TYPE,    \
+						 IND_L);		     \
+  if (new_x)								     \
+    {									     \
+      X = new_x;							     \
+      goto WIN;								     \
+    }									     \
+} while (0)
+
+#define REGNO_OK_FOR_BASE_P(REGNO)	\
+  aarch64_regno_ok_for_base_p (REGNO, true)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+  aarch64_regno_ok_for_index_p (REGNO, true)
+
+#define LEGITIMATE_PIC_OPERAND_P(X) \
+  aarch64_legitimate_pic_operand_p (X)
+
+#define CASE_VECTOR_MODE Pmode
+
+#define DEFAULT_SIGNED_CHAR 0
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  We allow pairs of registers.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode)
+
+/* Maximum bytes moved by a single instruction (load/store pair).  */
+#define MOVE_MAX (UNITS_PER_WORD * 2)
+
+/* The base cost overhead of a memcpy call, for MOVE_RATIO and friends.  */
+#define AARCH64_CALL_RATIO 8
+
+/* When optimizing for size, give a better estimate of the length of a memcpy
+   call, but use the default otherwise.  But move_by_pieces_ninsns() counts
+   memory-to-memory moves, and we'll have to generate a load & store for each,
+   so halve the value to take that into account.  */
+#define MOVE_RATIO(speed) \
+  (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)
+
+/* For CLEAR_RATIO, when optimizing for size, give a better estimate
+   of the length of a memset call, but use the default otherwise.  */
+#define CLEAR_RATIO(speed) \
+  ((speed) ? 15 : AARCH64_CALL_RATIO)
+
+/* SET_RATIO is similar to CLEAR_RATIO, but for a non-zero constant, so when
+   optimizing for size adjust the ratio to account for the overhead of loading
+   the constant.  */
+#define SET_RATIO(speed) \
+  ((speed) ? 15 : AARCH64_CALL_RATIO - 2)
+
+/* STORE_BY_PIECES_P can be used when copying a constant string, but
+   in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
+   For now we always fail this and let the move_by_pieces code copy
+   the string from read-only memory.  */
+#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
+
+/* Disable auto-increment in move_by_pieces et al.  Use of auto-increment is
+   rarely a good idea in straight-line code since it adds an extra address
+   dependency between each instruction.  Better to use incrementing offsets.  */
+#define USE_LOAD_POST_INCREMENT(MODE)   0
+#define USE_LOAD_POST_DECREMENT(MODE)   0
+#define USE_LOAD_PRE_INCREMENT(MODE)    0
+#define USE_LOAD_PRE_DECREMENT(MODE)    0
+#define USE_STORE_POST_INCREMENT(MODE)  0
+#define USE_STORE_POST_DECREMENT(MODE)  0
+#define USE_STORE_PRE_INCREMENT(MODE)   0
+#define USE_STORE_PRE_DECREMENT(MODE)   0
+
+/* ?? #define WORD_REGISTER_OPERATIONS  */
+
+/* Define if loading from memory in MODE, an integral mode narrower than
+   BITS_PER_WORD will either zero-extend or sign-extend.  The value of this
+   macro should be the code that says which one of the two operations is
+   implicitly done, or UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Define this macro to be non-zero if instructions will fail to work
+   if given data not on the nominal alignment.  */
+#define STRICT_ALIGNMENT		TARGET_STRICT_ALIGN
+
+/* Define this macro to be non-zero if accessing less than a word of
+   memory is no faster than accessing a word of memory, i.e., if such
+   accesses require more than one instruction or if there is no
+   difference in cost.
+   Although there's no difference in instruction count or cycles,
+   in AArch64 we don't want to expand to a sub-word to a 64-bit access
+   if we don't have to, for power-saving reasons.  */
+#define SLOW_BYTE_ACCESS		0
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define NO_FUNCTION_CSE	1
+
+/* Specify the machine mode that the hardware addresses have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode		DImode
+
+/* A C expression whose value is zero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and
+   greater then zero if they are zero-extended and less then zero if the
+   ptr_extend instruction should be used.  */
+#define POINTERS_EXTEND_UNSIGNED 1
+
+/* Mode of a function address in a call instruction (for indexing purposes).  */
+#define FUNCTION_MODE	Pmode
+
+#define SELECT_CC_MODE(OP, X, Y)	aarch64_select_cc_mode (OP, X, Y)
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+#define REVERSE_CONDITION(CODE, MODE)		\
+  (((MODE) == CCFPmode || (MODE) == CCFPEmode)	\
+   ? reverse_condition_maybe_unordered (CODE)	\
+   : reverse_condition (CODE))
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = ((MODE) == SImode ? 32 : 64), 2)
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
+
+#define RETURN_ADDR_RTX aarch64_return_addr
+
+/* 3 insns + padding + 2 pointer-sized entries.  */
+#define TRAMPOLINE_SIZE	(TARGET_ILP32 ? 24 : 32)
+
+/* Trampolines contain dwords, so must be dword aligned.  */
+#define TRAMPOLINE_ALIGNMENT 64
+
+/* Put trampolines in the text section so that mapping symbols work
+   correctly.  */
+#define TRAMPOLINE_SECTION text_section
+
+/* To start with.  */
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
+
+
+/* Assembly output.  */
+
+/* For now we'll make all jump tables pc-relative.  */
+#define CASE_VECTOR_PC_RELATIVE	1
+
+#define CASE_VECTOR_SHORTEN_MODE(min, max, body)	\
+  ((min < -0x1fff0 || max > 0x1fff0) ? SImode		\
+   : (min < -0x1f0 || max > 0x1f0) ? HImode		\
+   : QImode)
+
+/* Jump table alignment is explicit in ASM_OUTPUT_CASE_LABEL.  */
+#define ADDR_VEC_ALIGN(JUMPTABLE) 0
+
+#define PRINT_OPERAND(STREAM, X, CODE) aarch64_print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) \
+  aarch64_print_operand_address (STREAM, X)
+
+#define MCOUNT_NAME "_mcount"
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Emit rtl for profiling.  Output assembler code to FILE
+   to call "_mcount" for profiling a function entry.  */
+#define PROFILE_HOOK(LABEL)						\
+  {									\
+    rtx fun, lr;							\
+    lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);			\
+    fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
+    emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);	\
+  }
+
+/* All the work done in PROFILE_HOOK, but still required.  */
+#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+
+/* For some reason, the Linux headers think they know how to define
+   these macros.  They don't!!!  */
+#undef ASM_APP_ON
+#undef ASM_APP_OFF
+#define ASM_APP_ON	"\t" ASM_COMMENT_START " Start of user assembly\n"
+#define ASM_APP_OFF	"\t" ASM_COMMENT_START " End of user assembly\n"
+
+#define CONSTANT_POOL_BEFORE_FUNCTION 0
+
+/* This definition should be relocated to aarch64-elf-raw.h.  This macro
+   should be undefined in aarch64-linux.h and a clear_cache pattern
+   implmented to emit either the call to __aarch64_sync_cache_range()
+   directly or preferably the appropriate sycall or cache clear
+   instructions inline.  */
+#define CLEAR_INSN_CACHE(beg, end)				\
+  extern void  __aarch64_sync_cache_range (void *, void *);	\
+  __aarch64_sync_cache_range (beg, end)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
+  aarch64_cannot_change_mode_class (FROM, TO, CLASS)
+
+#define SHIFT_COUNT_TRUNCATED !TARGET_SIMD
+
+/* Callee only saves lower 64-bits of a 128-bit register.  Tell the
+   compiler the callee clobbers the top 64-bits when restoring the
+   bottom 64-bits.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \
+		(FP_REGNUM_P (REGNO) && GET_MODE_SIZE (MODE) > 8)
+
+/* Check TLS Descriptors mechanism is selected.  */
+#define TARGET_TLS_DESC (aarch64_tls_dialect == TLS_DESCRIPTORS)
+
+extern enum aarch64_code_model aarch64_cmodel;
+
+/* When using the tiny addressing model conditional and unconditional branches
+   can span the whole of the available address space (1MB).  */
+#define HAS_LONG_COND_BRANCH				\
+  (aarch64_cmodel == AARCH64_CMODEL_TINY		\
+   || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
+
+#define HAS_LONG_UNCOND_BRANCH				\
+  (aarch64_cmodel == AARCH64_CMODEL_TINY		\
+   || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
+
+/* Modes valid for AdvSIMD Q registers.  */
+#define AARCH64_VALID_SIMD_QREG_MODE(MODE) \
+  ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
+   || (MODE) == V4SFmode || (MODE) == V2DImode || mode == V2DFmode)
+
+#define ENDIAN_LANE_N(mode, n)  \
+  (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
+
+#define BIG_LITTLE_SPEC \
+   " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}"
+
+extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
+#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
+  { "rewrite_mcpu", aarch64_rewrite_mcpu },
+
+#define ASM_CPU_SPEC \
+   BIG_LITTLE_SPEC
+
+#define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
+
+#define EXTRA_SPECS						\
+  { "asm_cpu_spec",		ASM_CPU_SPEC }
+
+#endif /* GCC_AARCH64_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
new file mode 100644
index 000000000..c86a29d8e
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -0,0 +1,3642 @@
+;; Machine description for AArch64 architecture.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register numbers
+(define_constants
+  [
+    (R0_REGNUM		0)
+    (R1_REGNUM		1)
+    (R2_REGNUM		2)
+    (R3_REGNUM		3)
+    (R4_REGNUM		4)
+    (R5_REGNUM		5)
+    (R6_REGNUM		6)
+    (R7_REGNUM		7)
+    (R8_REGNUM		8)
+    (R9_REGNUM		9)
+    (R10_REGNUM		10)
+    (R11_REGNUM		11)
+    (R12_REGNUM		12)
+    (R13_REGNUM		13)
+    (R14_REGNUM		14)
+    (R15_REGNUM		15)
+    (R16_REGNUM		16)
+    (IP0_REGNUM		16)
+    (R17_REGNUM		17)
+    (IP1_REGNUM		17)
+    (R18_REGNUM		18)
+    (R19_REGNUM		19)
+    (R20_REGNUM		20)
+    (R21_REGNUM		21)
+    (R22_REGNUM		22)
+    (R23_REGNUM		23)
+    (R24_REGNUM		24)
+    (R25_REGNUM		25)
+    (R26_REGNUM		26)
+    (R27_REGNUM		27)
+    (R28_REGNUM		28)
+    (R29_REGNUM		29)
+    (R30_REGNUM		30)
+    (LR_REGNUM		30)
+    (SP_REGNUM		31)
+    (V0_REGNUM		32)
+    (V15_REGNUM		47)
+    (V31_REGNUM		63)
+    (SFP_REGNUM		64)
+    (AP_REGNUM		65)
+    (CC_REGNUM		66)
+  ]
+)
+
+(define_c_enum "unspec" [
+    UNSPEC_CASESI
+    UNSPEC_CLS
+    UNSPEC_FRECPE
+    UNSPEC_FRECPS
+    UNSPEC_FRECPX
+    UNSPEC_FRINTA
+    UNSPEC_FRINTI
+    UNSPEC_FRINTM
+    UNSPEC_FRINTN
+    UNSPEC_FRINTP
+    UNSPEC_FRINTX
+    UNSPEC_FRINTZ
+    UNSPEC_GOTSMALLPIC
+    UNSPEC_GOTSMALLTLS
+    UNSPEC_GOTTINYPIC
+    UNSPEC_LD1
+    UNSPEC_LD2
+    UNSPEC_LD3
+    UNSPEC_LD4
+    UNSPEC_MB
+    UNSPEC_NOP
+    UNSPEC_PRLG_STK
+    UNSPEC_RBIT
+    UNSPEC_SISD_NEG
+    UNSPEC_SISD_SSHL
+    UNSPEC_SISD_USHL
+    UNSPEC_SSHL_2S
+    UNSPEC_SSHR64
+    UNSPEC_ST1
+    UNSPEC_ST2
+    UNSPEC_ST3
+    UNSPEC_ST4
+    UNSPEC_TLS
+    UNSPEC_TLSDESC
+    UNSPEC_USHL_2S
+    UNSPEC_USHR64
+    UNSPEC_VSTRUCTDUMMY
+])
+
+(define_c_enum "unspecv" [
+    UNSPECV_EH_RETURN		; Represent EH_RETURN
+  ]
+)
+
+;; If further include files are added the defintion of MD_INCLUDES
+;; must be updated.
+
+(include "constraints.md")
+(include "predicates.md")
+(include "iterators.md")
+
+;; -------------------------------------------------------------------
+;; Instruction types and attributes
+;; -------------------------------------------------------------------
+
+; The "type" attribute is is included here from AArch32 backend to be able
+; to share pipeline descriptions.
+(include "../arm/types.md")
+
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+(define_attr "fp" "no,yes" (const_string "no"))
+
+;; Attribute that specifies whether or not the instruction touches simd
+;; registers.
+(define_attr "simd" "no,yes" (const_string "no"))
+
+(define_attr "length" ""
+  (const_int 4))
+
+;; Attribute that controls whether an alternative is enabled or not.
+;; Currently it is only used to disable alternatives which touch fp or simd
+;; registers when -mgeneral-regs-only is specified.
+(define_attr "enabled" "no,yes"
+  (cond [(ior
+	(and (eq_attr "fp" "yes")
+	     (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
+	(and (eq_attr "simd" "yes")
+	     (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
+	     (const_string "no")
+	] (const_string "yes")))
+
+;; -------------------------------------------------------------------
+;; Pipeline descriptions and scheduling
+;; -------------------------------------------------------------------
+
+;; Processor types.
+(include "aarch64-tune.md")
+
+;; True if the generic scheduling description should be used.
+
+(define_attr "generic_sched" "yes,no"
+  (const (if_then_else
+          (eq_attr "tune" "cortexa53,cortexa15")
+          (const_string "no")
+          (const_string "yes"))))
+
+;; Scheduling
+(include "../arm/cortex-a53.md")
+(include "../arm/cortex-a15.md")
+
+;; -------------------------------------------------------------------
+;; Jumps and other miscellaneous insns
+;; -------------------------------------------------------------------
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "br\\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "b\\t%l0"
+  [(set_attr "type" "branch")]
+)
+
+(define_expand "cbranch<mode>4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand:GPI 1 "register_operand" "")
+			     (match_operand:GPI 2 "aarch64_plus_operand" "")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  ""
+  "
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[2] = const0_rtx;
+  "
+)
+
+(define_expand "cbranch<mode>4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand:GPF 1 "register_operand" "")
+			     (match_operand:GPF 2 "aarch64_reg_or_zero" "")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  ""
+  "
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[2] = const0_rtx;
+  "
+)
+
+(define_insn "*condjump"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand 1 "cc_register" "") (const_int 0)])
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))]
+  ""
+  "b%m0\\t%l2"
+  [(set_attr "type" "branch")]
+)
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand" "")	; Index
+   (match_operand:SI 1 "const_int_operand" "")	; Lower bound
+   (match_operand:SI 2 "const_int_operand" "")	; Total range
+   (match_operand:DI 3 "" "")			; Table label
+   (match_operand:DI 4 "" "")]			; Out of range label
+  ""
+  {
+    if (operands[1] != const0_rtx)
+      {
+	rtx reg = gen_reg_rtx (SImode);
+
+	/* Canonical RTL says that if you have:
+
+	   (minus (X) (CONST))
+
+           then this should be emitted as:
+
+           (plus (X) (-CONST))
+
+	   The use of trunc_int_for_mode ensures that the resulting
+	   constant can be represented in SImode, this is important
+	   for the corner case where operand[1] is INT_MIN.  */
+
+	operands[1] = GEN_INT (trunc_int_for_mode (-INTVAL (operands[1]), SImode));
+
+	if (!(*insn_data[CODE_FOR_addsi3].operand[2].predicate)
+	      (operands[1], SImode))
+	  operands[1] = force_reg (SImode, operands[1]);
+	emit_insn (gen_addsi3 (reg, operands[0], operands[1]));
+	operands[0] = reg;
+      }
+
+    if (!aarch64_plus_operand (operands[2], SImode))
+      operands[2] = force_reg (SImode, operands[2]);
+    emit_jump_insn (gen_cbranchsi4 (gen_rtx_GTU (SImode, const0_rtx,
+						 const0_rtx),
+				    operands[0], operands[2], operands[4]));
+
+    operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (VOIDmode, operands[3]));
+    emit_jump_insn (gen_casesi_dispatch (operands[2], operands[0],
+					 operands[3]));
+    DONE;
+  }
+)
+
+(define_insn "casesi_dispatch"
+  [(parallel
+    [(set (pc)
+	  (mem:DI (unspec [(match_operand:DI 0 "register_operand" "r")
+			   (match_operand:SI 1 "register_operand" "r")]
+			UNSPEC_CASESI)))
+     (clobber (reg:CC CC_REGNUM))
+     (clobber (match_scratch:DI 3 "=r"))
+     (clobber (match_scratch:DI 4 "=r"))
+     (use (label_ref (match_operand 2 "" "")))])]
+  ""
+  "*
+  return aarch64_output_casesi (operands);
+  "
+  [(set_attr "length" "16")
+   (set_attr "type" "branch")]
+)
+
+(define_insn "nop"
+  [(unspec[(const_int 0)] UNSPEC_NOP)]
+  ""
+  "nop"
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 8))]
+  ""
+  "brk #1000"
+  [(set_attr "type" "trap")])
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+  "
+  aarch64_expand_prologue ();
+  DONE;
+  "
+)
+
+(define_expand "epilogue"
+  [(clobber (const_int 0))]
+  ""
+  "
+  aarch64_expand_epilogue (false);
+  DONE;
+  "
+)
+
+(define_expand "sibcall_epilogue"
+  [(clobber (const_int 0))]
+  ""
+  "
+  aarch64_expand_epilogue (true);
+  DONE;
+  "
+)
+
+(define_insn "*do_return"
+  [(return)]
+  ""
+  "ret"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "eh_return"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+    UNSPECV_EH_RETURN)]
+  ""
+  "#"
+  [(set_attr "type" "branch")]
+
+)
+
+(define_split
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
+    UNSPECV_EH_RETURN)]
+  "reload_completed"
+  [(set (match_dup 1) (match_dup 0))]
+  {
+    operands[1] = aarch64_final_eh_return_addr ();
+  }
+)
+
+(define_insn "*cb<optab><mode>1"
+  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
+				(const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  "<cbz>\\t%<w>0, %l1"
+  [(set_attr "type" "branch")]
+
+)
+
+(define_insn "*tb<optab><mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
+				    (const_int 1)
+				    (match_operand 1 "const_int_operand" "n"))
+		   (const_int 0))
+	     (label_ref (match_operand 2 "" ""))
+	     (pc)))
+   (clobber (match_scratch:DI 3 "=r"))]
+  ""
+  "*
+  if (get_attr_length (insn) == 8)
+    return \"ubfx\\t%<w>3, %<w>0, %1, #1\;<cbz>\\t%<w>3, %l2\";
+  return \"<tbz>\\t%<w>0, %1, %l2\";
+  "
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+			   (lt (minus (match_dup 2) (pc)) (const_int 32764)))
+		      (const_int 4)
+		      (const_int 8)))]
+)
+
+(define_insn "*cb<optab><mode>1"
+  [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
+				 (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))
+   (clobber (match_scratch:DI 2 "=r"))]
+  ""
+  "*
+  if (get_attr_length (insn) == 8)
+    return \"ubfx\\t%<w>2, %<w>0, <sizem1>, #1\;<cbz>\\t%<w>2, %l1\";
+  return \"<tbz>\\t%<w>0, <sizem1>, %l1\";
+  "
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
+			   (lt (minus (match_dup 1) (pc)) (const_int 32764)))
+		      (const_int 4)
+		      (const_int 8)))]
+)
+
+;; -------------------------------------------------------------------
+;; Subroutine calls and sibcalls
+;; -------------------------------------------------------------------
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:DI LR_REGNUM))])]
+  ""
+  "
+  {
+    rtx callee;
+
+    /* In an untyped call, we can get NULL for operand 2.  */
+    if (operands[2] == NULL)
+      operands[2] = const0_rtx;
+
+    /* Decide if we should generate indirect calls by loading the
+       64-bit address of the callee into a register before performing
+       the branch-and-link.  */
+    callee = XEXP (operands[0], 0);
+    if (GET_CODE (callee) == SYMBOL_REF
+	? aarch64_is_long_call_p (callee)
+	: !REG_P (callee))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+  }"
+)
+
+(define_insn "*call_reg"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:DI LR_REGNUM))]
+  ""
+  "blr\\t%0"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_symbol"
+  [(call (mem:DI (match_operand:DI 0 "" ""))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:DI LR_REGNUM))]
+  "GET_CODE (operands[0]) == SYMBOL_REF
+   && !aarch64_is_long_call_p (operands[0])"
+  "bl\\t%a0"
+  [(set_attr "type" "call")]
+)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:DI LR_REGNUM))])]
+  ""
+  "
+  {
+    rtx callee;
+
+    /* In an untyped call, we can get NULL for operand 3.  */
+    if (operands[3] == NULL)
+      operands[3] = const0_rtx;
+
+    /* Decide if we should generate indirect calls by loading the
+       64-bit address of the callee into a register before performing
+       the branch-and-link.  */
+    callee = XEXP (operands[1], 0);
+    if (GET_CODE (callee) == SYMBOL_REF
+	? aarch64_is_long_call_p (callee)
+	: !REG_P (callee))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+  }"
+)
+
+(define_insn "*call_value_reg"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "register_operand" "r"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:DI LR_REGNUM))]
+  ""
+  "blr\\t%1"
+  [(set_attr "type" "call")]
+
+)
+
+(define_insn "*call_value_symbol"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "" ""))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:DI LR_REGNUM))]
+  "GET_CODE (operands[1]) == SYMBOL_REF
+   && !aarch64_is_long_call_p (operands[1])"
+  "bl\\t%a1"
+  [(set_attr "type" "call")]
+)
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (return)
+	      (use (match_operand 2 "" ""))])]
+  ""
+  {
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+  }
+)
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (return)
+	      (use (match_operand 3 "" ""))])]
+  ""
+  {
+    if (operands[3] == NULL_RTX)
+      operands[3] = const0_rtx;
+  }
+)
+
+(define_insn "*sibcall_insn"
+  [(call (mem:DI (match_operand:DI 0 "" "X"))
+	 (match_operand 1 "" ""))
+   (return)
+   (use (match_operand 2 "" ""))]
+  "GET_CODE (operands[0]) == SYMBOL_REF"
+  "b\\t%a0"
+  [(set_attr "type" "branch")]
+
+)
+
+(define_insn "*sibcall_value_insn"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand 1 "" "X"))
+	      (match_operand 2 "" "")))
+   (return)
+   (use (match_operand 3 "" ""))]
+  "GET_CODE (operands[1]) == SYMBOL_REF"
+  "b\\t%a1"
+  [(set_attr "type" "branch")]
+)
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "")
+		    (const_int 0))
+	      (match_operand 1 "")
+	      (match_operand 2 "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+  DONE;
+})
+
+;; -------------------------------------------------------------------
+;; Moves
+;; -------------------------------------------------------------------
+
+(define_expand "mov<mode>"
+  [(set (match_operand:SHORT 0 "nonimmediate_operand" "")
+	(match_operand:SHORT 1 "general_operand" ""))]
+  ""
+  "
+    if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+  "
+)
+
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,   *w,r,*w, m, m, r,*w,*w")
+        (match_operand:SHORT 1 "general_operand"      " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
+  "(register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_zero (operands[1], <MODE>mode))"
+{
+   switch (which_alternative)
+     {
+     case 0:
+       return "mov\t%w0, %w1";
+     case 1:
+       return "mov\t%w0, %1";
+     case 2:
+       return aarch64_output_scalar_simd_mov_immediate (operands[1],
+							<MODE>mode);
+     case 3:
+       return "ldr<size>\t%w0, %1";
+     case 4:
+       return "ldr\t%<size>0, %1";
+     case 5:
+       return "str<size>\t%w1, %0";
+     case 6:
+       return "str\t%<size>1, %0";
+     case 7:
+       return "umov\t%w0, %1.<v>[0]";
+     case 8:
+       return "dup\t%0.<Vallxd>, %w1";
+     case 9:
+       return "dup\t%<Vetype>0, %1.<v>[0]";
+     default:
+       gcc_unreachable ();
+     }
+}
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+                     neon_from_gp<q>,neon_from_gp<q>, neon_dup")
+   (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")]
+)
+
+(define_expand "mov<mode>"
+  [(set (match_operand:GPI 0 "nonimmediate_operand" "")
+	(match_operand:GPI 1 "general_operand" ""))]
+  ""
+  "
+    if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+
+    if (CONSTANT_P (operands[1]))
+      {
+	aarch64_expand_mov_immediate (operands[0], operands[1]);
+	DONE;
+      }
+  "
+)
+
+(define_insn "*movsi_aarch64"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
+  "(register_operand (operands[0], SImode)
+    || aarch64_reg_or_zero (operands[1], SImode))"
+  "@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   ldr\\t%w0, %1
+   ldr\\t%s0, %1
+   str\\t%w1, %0
+   str\\t%s1, %0
+   adr\\t%x0, %a1
+   adrp\\t%x0, %A1
+   fmov\\t%s0, %w1
+   fmov\\t%w0, %s1
+   fmov\\t%s0, %s1"
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+                     adr,adr,fmov,fmov,fmov")
+   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
+)
+
+(define_insn "*movdi_aarch64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
+  "(register_operand (operands[0], DImode)
+    || aarch64_reg_or_zero (operands[1], DImode))"
+  "@
+   mov\\t%x0, %x1
+   mov\\t%0, %x1
+   mov\\t%x0, %1
+   mov\\t%x0, %1
+   ldr\\t%x0, %1
+   ldr\\t%d0, %1
+   str\\t%x1, %0
+   str\\t%d1, %0
+   adr\\t%x0, %a1
+   adrp\\t%x0, %A1
+   fmov\\t%d0, %x1
+   fmov\\t%x0, %d1
+   fmov\\t%d0, %d1
+   movi\\t%d0, %1"
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+                     adr,adr,fmov,fmov,fmov,fmov")
+   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+)
+
+(define_insn "insv_imm<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (const_int 16)
+			  (match_operand:GPI 1 "const_int_operand" "n"))
+	(match_operand:GPI 2 "const_int_operand" "n"))]
+  "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
+   && UINTVAL (operands[1]) % 16 == 0"
+  "movk\\t%<w>0, %X2, lsl %1"
+  [(set_attr "type" "mov_imm")]
+)
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  ""
+  "
+    if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
+      operands[1] = force_reg (TImode, operands[1]);
+  "
+)
+
+(define_insn "*movti_aarch64"
+  [(set (match_operand:TI 0
+	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
+	(match_operand:TI 1
+	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
+  "(register_operand (operands[0], TImode)
+    || aarch64_reg_or_zero (operands[1], TImode))"
+  "@
+   #
+   #
+   #
+   orr\\t%0.16b, %1.16b, %1.16b
+   ldp\\t%0, %H0, %1
+   stp\\t%1, %H1, %0
+   stp\\txzr, xzr, %0
+   ldr\\t%q0, %1
+   str\\t%q1, %0"
+  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+		             load2,store2,store2,f_loadd,f_stored")
+   (set_attr "length" "8,8,8,4,4,4,4,4,4")
+   (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
+   (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
+)
+
+;; Split a TImode register-register or register-immediate move into
+;; its component DImode pieces, taking care to handle overlapping
+;; source and dest registers.
+(define_split
+   [(set (match_operand:TI 0 "register_operand" "")
+	 (match_operand:TI 1 "aarch64_reg_or_imm" ""))]
+  "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  aarch64_split_128bit_move (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:GPF 0 "nonimmediate_operand" "")
+	(match_operand:GPF 1 "general_operand" ""))]
+  ""
+  "
+    if (!TARGET_FLOAT)
+     {
+	sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\");
+	FAIL;
+     }
+
+    if (GET_CODE (operands[0]) == MEM)
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+  "
+)
+
+(define_insn "*movsf_aarch64"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
+	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], SFmode)
+    || register_operand (operands[1], SFmode))"
+  "@
+   fmov\\t%s0, %w1
+   fmov\\t%w0, %s1
+   fmov\\t%s0, %s1
+   fmov\\t%s0, %1
+   ldr\\t%s0, %1
+   str\\t%s1, %0
+   ldr\\t%w0, %1
+   str\\t%w1, %0
+   mov\\t%w0, %w1"
+  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
+                     f_loads,f_stores,f_loads,f_stores,fmov")]
+)
+
+(define_insn "*movdf_aarch64"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
+	(match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], DFmode)
+    || register_operand (operands[1], DFmode))"
+  "@
+   fmov\\t%d0, %x1
+   fmov\\t%x0, %d1
+   fmov\\t%d0, %d1
+   fmov\\t%d0, %1
+   ldr\\t%d0, %1
+   str\\t%d1, %0
+   ldr\\t%x0, %1
+   str\\t%x1, %0
+   mov\\t%x0, %x1"
+  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
+                     f_loadd,f_stored,f_loadd,f_stored,mov_reg")]
+)
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  ""
+  "
+    if (!TARGET_FLOAT)
+     {
+	sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\");
+	FAIL;
+     }
+
+    if (GET_CODE (operands[0]) == MEM)
+      operands[1] = force_reg (TFmode, operands[1]);
+  "
+)
+
+(define_insn "*movtf_aarch64"
+  [(set (match_operand:TF 0
+	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump")
+	(match_operand:TF 1
+	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?rY"))]
+  "TARGET_FLOAT && (register_operand (operands[0], TFmode)
+    || register_operand (operands[1], TFmode))"
+  "@
+   orr\\t%0.16b, %1.16b, %1.16b
+   #
+   #
+   #
+   movi\\t%0.2d, #0
+   fmov\\t%s0, wzr
+   ldr\\t%q0, %1
+   str\\t%q1, %0
+   ldp\\t%0, %H0, %1
+   stp\\t%1, %H1, %0"
+  [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\
+                     f_loadd,f_stored,neon_load1_2reg,neon_store1_2reg")
+   (set_attr "length" "4,8,8,8,4,4,4,4,4,4")
+   (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*")
+   (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
+)
+
+(define_split
+   [(set (match_operand:TF 0 "register_operand" "")
+	 (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
+  "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
+  [(const_int 0)]
+  {
+    aarch64_split_128bit_move (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+;; Operands 1 and 3 are tied together by the final condition; so we allow
+;; fairly lax checking on the second memory operation.
+(define_insn "load_pair<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(match_operand:GPI 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:GPI 2 "register_operand" "=r")
+        (match_operand:GPI 3 "memory_operand" "m"))]
+  "rtx_equal_p (XEXP (operands[3], 0),
+		plus_constant (Pmode,
+			       XEXP (operands[1], 0),
+			       GET_MODE_SIZE (<MODE>mode)))"
+  "ldp\\t%<w>0, %<w>2, %1"
+  [(set_attr "type" "load2")]
+)
+
+;; Operands 0 and 2 are tied together by the final condition; so we allow
+;; fairly lax checking on the second memory operation.
+(define_insn "store_pair<mode>"
+  [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "=Ump")
+	(match_operand:GPI 1 "register_operand" "r"))
+   (set (match_operand:GPI 2 "memory_operand" "=m")
+        (match_operand:GPI 3 "register_operand" "r"))]
+  "rtx_equal_p (XEXP (operands[2], 0),
+		plus_constant (Pmode,
+			       XEXP (operands[0], 0),
+			       GET_MODE_SIZE (<MODE>mode)))"
+  "stp\\t%<w>1, %<w>3, %0"
+  [(set_attr "type" "store2")]
+)
+
+;; Operands 1 and 3 are tied together by the final condition; so we allow
+;; fairly lax checking on the second memory operation.
+(define_insn "load_pair<mode>"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(match_operand:GPF 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:GPF 2 "register_operand" "=w")
+        (match_operand:GPF 3 "memory_operand" "m"))]
+  "rtx_equal_p (XEXP (operands[3], 0),
+		plus_constant (Pmode,
+			       XEXP (operands[1], 0),
+			       GET_MODE_SIZE (<MODE>mode)))"
+  "ldp\\t%<w>0, %<w>2, %1"
+  [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+;; Operands 0 and 2 are tied together by the final condition; so we allow
+;; fairly lax checking on the second memory operation.
+(define_insn "store_pair<mode>"
+  [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "=Ump")
+	(match_operand:GPF 1 "register_operand" "w"))
+   (set (match_operand:GPF 2 "memory_operand" "=m")
+        (match_operand:GPF 3 "register_operand" "w"))]
+  "rtx_equal_p (XEXP (operands[2], 0),
+		plus_constant (Pmode,
+			       XEXP (operands[0], 0),
+			       GET_MODE_SIZE (<MODE>mode)))"
+  "stp\\t%<w>1, %<w>3, %0"
+  [(set_attr "type" "neon_store1_2reg<q>")]
+)
+
+;; Load pair with writeback.  This is primarily used in function epilogues
+;; when restoring [fp,lr]
+(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand" "=k")
+          (plus:P (match_operand:P 1 "register_operand" "0")
+                  (match_operand:P 4 "const_int_operand" "n")))
+     (set (match_operand:GPI 2 "register_operand" "=r")
+          (mem:GPI (plus:P (match_dup 1)
+                   (match_dup 4))))
+     (set (match_operand:GPI 3 "register_operand" "=r")
+          (mem:GPI (plus:P (match_dup 1)
+                   (match_operand:P 5 "const_int_operand" "n"))))])]
+  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
+  "ldp\\t%<w>2, %<w>3, [%1], %4"
+  [(set_attr "type" "load2")]
+)
+
+;; Store pair with writeback.  This is primarily used in function prologues
+;; when saving [fp,lr]
+(define_insn "storewb_pair<GPI:mode>_<P:mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand" "=&k")
+          (plus:P (match_operand:P 1 "register_operand" "0")
+                  (match_operand:P 4 "const_int_operand" "n")))
+     (set (mem:GPI (plus:P (match_dup 0)
+                   (match_dup 4)))
+          (match_operand:GPI 2 "register_operand" "r"))
+     (set (mem:GPI (plus:P (match_dup 0)
+                   (match_operand:P 5 "const_int_operand" "n")))
+          (match_operand:GPI 3 "register_operand" "r"))])]
+  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
+  "stp\\t%<w>2, %<w>3, [%0, %4]!"
+  [(set_attr "type" "store2")]
+)
+
+;; -------------------------------------------------------------------
+;; Sign/Zero extension
+;; -------------------------------------------------------------------
+
+(define_expand "<optab>sidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
+  ""
+)
+
+(define_insn "*extendsidi2_aarch64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   sxtw\t%0, %w1
+   ldrsw\t%0, %1"
+  [(set_attr "type" "extend,load1")]
+)
+
+(define_insn "*zero_extendsidi2_aarch64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   uxtw\t%0, %w1
+   ldr\t%w0, %1"
+  [(set_attr "type" "extend,load1")]
+)
+
+(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "register_operand")
+        (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
+  ""
+)
+
+(define_insn "*extend<SHORT:mode><GPI:mode>2_aarch64"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r")
+        (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   sxt<SHORT:size>\t%<GPI:w>0, %w1
+   ldrs<SHORT:size>\t%<GPI:w>0, %1"
+  [(set_attr "type" "extend,load1")]
+)
+
+(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r,*w")
+        (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
+  ""
+  "@
+   uxt<SHORT:size>\t%<GPI:w>0, %w1
+   ldr<SHORT:size>\t%w0, %1
+   ldr\t%<SHORT:size>0, %1"
+  [(set_attr "type" "extend,load1,load1")]
+)
+
+(define_expand "<optab>qihi2"
+  [(set (match_operand:HI 0 "register_operand")
+        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+)
+
+(define_insn "*<optab>qihi2_aarch64"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   <su>xtb\t%w0, %w1
+   <ldrxt>b\t%w0, %1"
+  [(set_attr "type" "extend,load1")]
+)
+
+;; -------------------------------------------------------------------
+;; Simple arithmetic
+;; -------------------------------------------------------------------
+
+(define_expand "add<mode>3"
+  [(set
+    (match_operand:GPI 0 "register_operand" "")
+    (plus:GPI (match_operand:GPI 1 "register_operand" "")
+	      (match_operand:GPI 2 "aarch64_pluslong_operand" "")))]
+  ""
+  "
+  if (! aarch64_plus_operand (operands[2], VOIDmode))
+    {
+      rtx subtarget = ((optimize && can_create_pseudo_p ())
+		       ? gen_reg_rtx (<MODE>mode) : operands[0]);
+      HOST_WIDE_INT imm = INTVAL (operands[2]);
+
+      if (imm < 0)
+	imm = -(-imm & ~0xfff);
+      else
+        imm &= ~0xfff;
+
+      emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));
+      operands[1] = subtarget;
+      operands[2] = GEN_INT (INTVAL (operands[2]) - imm);
+    }
+  "
+)
+
+(define_insn "*addsi3_aarch64"
+  [(set
+    (match_operand:SI 0 "register_operand" "=rk,rk,rk")
+    (plus:SI
+     (match_operand:SI 1 "register_operand" "%rk,rk,rk")
+     (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))]
+  ""
+  "@
+  add\\t%w0, %w1, %2
+  add\\t%w0, %w1, %w2
+  sub\\t%w0, %w1, #%n2"
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*addsi3_aarch64_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=rk,rk,rk")
+    (zero_extend:DI
+     (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk")
+              (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))]
+  ""
+  "@
+  add\\t%w0, %w1, %2
+  add\\t%w0, %w1, %w2
+  sub\\t%w0, %w1, #%n2"
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
+)
+
+(define_insn "*adddi3_aarch64"
+  [(set
+    (match_operand:DI 0 "register_operand" "=rk,rk,rk,!w")
+    (plus:DI
+     (match_operand:DI 1 "register_operand" "%rk,rk,rk,!w")
+     (match_operand:DI 2 "aarch64_plus_operand" "I,r,J,!w")))]
+  ""
+  "@
+  add\\t%x0, %x1, %2
+  add\\t%x0, %x1, %x2
+  sub\\t%x0, %x1, #%n2
+  add\\t%d0, %d1, %d2"
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg")
+   (set_attr "simd" "*,*,*,yes")]
+)
+
+(define_insn "*add<mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r")
+		   (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %<w>2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*addsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:SI (match_operand:SI 1 "register_operand" "%r,r,r")
+		  (match_operand:SI 2 "aarch64_plus_operand" "r,I,J"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+  adds\\t%w0, %w1, %w2
+  adds\\t%w0, %w1, %w2
+  subs\\t%w0, %w1, #%n2"
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
+)
+
+(define_insn "*adds_mul_imm_<mode>"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:GPI (mult:GPI
+		    (match_operand:GPI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
+		   (match_operand:GPI 3 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (mult:GPI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))]
+  ""
+  "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
+  [(set_attr "type" "alus_shift_imm")]
+)
+
+(define_insn "*subs_mul_imm_<mode>"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+		    (mult:GPI
+		     (match_operand:GPI 2 "register_operand" "r")
+		     (match_operand:QI 3 "aarch64_pwr_2_<mode>" "n")))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 1)
+		   (mult:GPI (match_dup 2) (match_dup 3))))]
+  ""
+  "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
+  [(set_attr "type" "alus_shift_imm")]
+)
+
+(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:GPI
+	  (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
+	  (match_operand:GPI 2 "register_operand" "r"))
+	(const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
+  ""
+  "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
+  [(set_attr "type" "alus_ext")]
+)
+
+(define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+		    (ANY_EXTEND:GPI
+		     (match_operand:ALLX 2 "register_operand" "r")))
+	(const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
+  ""
+  "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
+  [(set_attr "type" "alus_ext")]
+)
+
+(define_insn "*adds_<optab><mode>_multp2"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:GPI (ANY_EXTRACT:GPI
+		    (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")
+		    (const_int 0))
+		   (match_operand:GPI 4 "register_operand" "r"))
+	(const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2))
+				   (match_dup 3)
+				   (const_int 0))
+		  (match_dup 4)))]
+  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
+  "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
+  [(set_attr "type" "alus_ext")]
+)
+
+(define_insn "*subs_<optab><mode>_multp2"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (minus:GPI (match_operand:GPI 4 "register_operand" "r")
+		    (ANY_EXTRACT:GPI
+		     (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			       (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		     (match_operand 3 "const_int_operand" "n")
+		     (const_int 0)))
+	(const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 4) (ANY_EXTRACT:GPI
+				  (mult:GPI (match_dup 1) (match_dup 2))
+				  (match_dup 3)
+				  (const_int 0))))]
+  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
+  "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
+  [(set_attr "type" "alus_ext")]
+)
+
+(define_insn "*add<mode>3nr_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:GPI (match_operand:GPI 0 "register_operand" "%r,r,r")
+		   (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))
+	 (const_int 0)))]
+  ""
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
+)
+
+(define_insn "*compare_neg<mode>"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (neg:GPI (match_operand:GPI 0 "register_operand" "r"))
+	 (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "cmn\\t%<w>1, %<w>0"
+  [(set_attr "type" "alus_reg")]
+)
+
+(define_insn "*add_<shift>_<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r")
+			      (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
+		  (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "add\\t%<w>0, %<w>3, %<w>1, <shift> %2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*add_<shift>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (plus:SI (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <shift> %2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+(define_insn "*add_mul_imm_<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
+		  (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "add\\t%<w>0, %<w>3, %<w>1, lsl %p2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+(define_insn "*add_<optab><ALLX:mode>_<GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(plus:GPI (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
+		  (match_operand:GPI 2 "register_operand" "r")))]
+  ""
+  "add\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (plus:SI (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r"))
+		  (match_operand:GPI 2 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w2, %w1, <su>xt<SHORT:size>"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*add_<optab><ALLX:mode>_shft_<GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(plus:GPI (ashift:GPI (ANY_EXTEND:GPI
+			       (match_operand:ALLX 1 "register_operand" "r"))
+			      (match_operand 2 "aarch64_imm3" "Ui3"))
+		  (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_shft_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+	 (plus:SI (ashift:SI (ANY_EXTEND:SI
+			      (match_operand:SHORT 1 "register_operand" "r"))
+			     (match_operand 2 "aarch64_imm3" "Ui3"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(plus:GPI (mult:GPI (ANY_EXTEND:GPI
+			     (match_operand:ALLX 1 "register_operand" "r"))
+			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		  (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %p2"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_mult_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI
+			     (match_operand:SHORT 1 "register_operand" "r"))
+			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*add_<optab><mode>_multp2"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(plus:GPI (ANY_EXTRACT:GPI
+		   (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n")
+		   (const_int 0))
+		  (match_operand:GPI 4 "register_operand" "r")))]
+  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
+  "add\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*add_<optab>si_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (plus:SI (ANY_EXTRACT:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n")
+                   (const_int 0))
+		  (match_operand:SI 4 "register_operand" "r"))))]
+  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+  "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*add<mode>3_carryin"
+  [(set
+    (match_operand:GPI 0 "register_operand" "=r")
+    (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
+	      (plus:GPI
+		(match_operand:GPI 1 "register_operand" "r")
+		(match_operand:GPI 2 "register_operand" "r"))))]
+   ""
+   "adc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI
+     (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+	      (plus:SI
+	       (match_operand:SI 1 "register_operand" "r")
+	       (match_operand:SI 2 "register_operand" "r")))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryin_alt1"
+  [(set
+    (match_operand:GPI 0 "register_operand" "=r")
+    (plus:GPI (plus:GPI
+		(match_operand:GPI 1 "register_operand" "r")
+		(match_operand:GPI 2 "register_operand" "r"))
+              (geu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+   ""
+   "adc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt1_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI
+     (plus:SI (plus:SI
+	       (match_operand:SI 1 "register_operand" "r")
+	       (match_operand:SI 2 "register_operand" "r"))
+              (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryin_alt2"
+  [(set
+    (match_operand:GPI 0 "register_operand" "=r")
+    (plus:GPI (plus:GPI
+                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
+		(match_operand:GPI 1 "register_operand" "r"))
+	      (match_operand:GPI 2 "register_operand" "r")))]
+   ""
+   "adc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt2_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI
+     (plus:SI (plus:SI
+               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+	       (match_operand:SI 1 "register_operand" "r"))
+	      (match_operand:SI 2 "register_operand" "r"))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryin_alt3"
+  [(set
+    (match_operand:GPI 0 "register_operand" "=r")
+    (plus:GPI (plus:GPI
+                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
+		(match_operand:GPI 2 "register_operand" "r"))
+	      (match_operand:GPI 1 "register_operand" "r")))]
+   ""
+   "adc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt3_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI
+     (plus:SI (plus:SI
+               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+	       (match_operand:SI 2 "register_operand" "r"))
+	      (match_operand:SI 1 "register_operand" "r"))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add_uxt<mode>_multp2"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(plus:GPI (and:GPI
+		   (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n"))
+		  (match_operand:GPI 4 "register_operand" "r")))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*add_uxtsi_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (plus:SI (and:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n"))
+		  (match_operand:SI 4 "register_operand" "r"))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=rk")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\\t%w0, %w1, %w2"
+  [(set_attr "type" "alu_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*subsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "sub\\t%w0, %w1, %w2"
+  [(set_attr "type" "alu_reg")]
+)
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=rk,!w")
+	(minus:DI (match_operand:DI 1 "register_operand" "r,!w")
+		   (match_operand:DI 2 "register_operand" "r,!w")))]
+  ""
+  "@
+   sub\\t%x0, %x1, %x2
+   sub\\t%d0, %d1, %d2"
+  [(set_attr "type" "alu_reg, neon_sub")
+   (set_attr "simd" "*,yes")]
+)
+
+
+(define_insn "*sub<mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+				  (match_operand:GPI 2 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*subsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r")
+				 (match_operand:SI 2 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "subs\\t%w0, %w1, %w2"
+  [(set_attr "type" "alus_reg")]
+)
+
+(define_insn "*sub_<shift>_<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
+		   (ASHIFT:GPI
+		    (match_operand:GPI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
+  ""
+  "sub\\t%<w>0, %<w>3, %<w>1, <shift> %2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*sub_<shift>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (ASHIFT:SI
+		    (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+  ""
+  "sub\\t%w0, %w3, %w1, <shift> %2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+(define_insn "*sub_mul_imm_<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
+		   (mult:GPI
+		    (match_operand:GPI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))))]
+  ""
+  "sub\\t%<w>0, %<w>3, %<w>1, lsl %p2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*sub_mul_imm_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (mult:SI
+		    (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+  ""
+  "sub\\t%w0, %w3, %w1, lsl %p2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+(define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(minus:GPI (match_operand:GPI 1 "register_operand" "r")
+		   (ANY_EXTEND:GPI
+		    (match_operand:ALLX 2 "register_operand" "r"))))]
+  ""
+  "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (ANY_EXTEND:SI
+		    (match_operand:SHORT 2 "register_operand" "r")))))]
+  ""
+  "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*sub_<optab><ALLX:mode>_shft_<GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(minus:GPI (match_operand:GPI 1 "register_operand" "r")
+		   (ashift:GPI (ANY_EXTEND:GPI
+				(match_operand:ALLX 2 "register_operand" "r"))
+			       (match_operand 3 "aarch64_imm3" "Ui3"))))]
+  ""
+  "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_shft_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (ashift:SI (ANY_EXTEND:SI
+			       (match_operand:SHORT 2 "register_operand" "r"))
+			      (match_operand 3 "aarch64_imm3" "Ui3")))))]
+  ""
+  "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*sub_<optab><mode>_multp2"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(minus:GPI (match_operand:GPI 4 "register_operand" "r")
+		   (ANY_EXTRACT:GPI
+		    (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")
+		    (const_int 0))))]
+  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
+  "sub\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*sub_<optab>si_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 4 "register_operand" "r")
+		   (ANY_EXTRACT:SI
+		    (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")
+		    (const_int 0)))))]
+  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+  "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn "*sub<mode>3_carryin"
+  [(set
+    (match_operand:GPI 0 "register_operand" "=r")
+    (minus:GPI (minus:GPI
+		(match_operand:GPI 1 "register_operand" "r")
+		(ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
+	       (match_operand:GPI 2 "register_operand" "r")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI
+     (minus:SI (minus:SI
+		(match_operand:SI 1 "register_operand" "r")
+		(ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
+	       (match_operand:SI 2 "register_operand" "r"))))]
+   ""
+   "sbc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub_uxt<mode>_multp2"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(minus:GPI (match_operand:GPI 4 "register_operand" "r")
+		   (and:GPI
+		    (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n"))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
+  [(set_attr "type" "alu_ext")]
+)
+
+;; zero_extend version of above
+(define_insn "*sub_uxtsi_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 4 "register_operand" "r")
+		   (and:SI
+		    (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
+  [(set_attr "type" "alu_ext")]
+)
+
+(define_insn_and_split "absdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,w")
+	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))
+   (clobber (match_scratch:DI 2 "=&r,X"))]
+  ""
+  "@
+   #
+   abs\\t%d0, %d1"
+  "reload_completed
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(const_int 0)]
+  {
+    emit_insn (gen_rtx_SET (VOIDmode, operands[2],
+			    gen_rtx_XOR (DImode,
+					 gen_rtx_ASHIFTRT (DImode,
+							   operands[1],
+							   GEN_INT (63)),
+					 operands[1])));
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    operands[0],
+			    gen_rtx_MINUS (DImode,
+					   operands[2],
+					   gen_rtx_ASHIFTRT (DImode,
+							     operands[1],
+							     GEN_INT (63)))));
+    DONE;
+  }
+  [(set_attr "type" "alu_reg")]
+)
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+	(neg:GPI (match_operand:GPI 1 "register_operand" "r,w")))]
+  ""
+  "@
+   neg\\t%<w>0, %<w>1
+   neg\\t%<rtn>0<vas>, %<rtn>1<vas>"
+  [(set_attr "type" "alu_reg, neon_neg<q>")
+   (set_attr "simd" "*,yes")]
+)
+
+;; zero_extend version of above
+(define_insn "*negsi2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))]
+  ""
+  "neg\\t%w0, %w1"
+  [(set_attr "type" "alu_reg")]
+)
+
+(define_insn "*ngc<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
+		   (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "ngc\\t%<w>0, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*ngcsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
+		   (match_operand:SI 1 "register_operand" "r"))))]
+  ""
+  "ngc\\t%w0, %w1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*neg<mode>2_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (match_dup 1)))]
+  ""
+  "negs\\t%<w>0, %<w>1"
+  [(set_attr "type" "alus_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*negsi2_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (match_dup 1))))]
+  ""
+  "negs\\t%w0, %w1"
+  [(set_attr "type" "alus_reg")]
+)
+
+(define_insn "*neg_<shift><mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (neg:GPI (ASHIFT:GPI
+		   (match_operand:GPI 1 "register_operand" "r")
+		   (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))]
+  ""
+  "negs\\t%<w>0, %<w>1, <shift> %2"
+  [(set_attr "type" "alus_shift_imm")]
+)
+
+(define_insn "*neg_<shift>_<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (ASHIFT:GPI
+		  (match_operand:GPI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
+  ""
+  "neg\\t%<w>0, %<w>1, <shift> %2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*neg_<shift>_si2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (neg:SI (ASHIFT:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+  ""
+  "neg\\t%w0, %w1, <shift> %2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+(define_insn "*neg_mul_imm_<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (mult:GPI
+		  (match_operand:GPI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))))]
+  ""
+  "neg\\t%<w>0, %<w>1, lsl %p2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*neg_mul_imm_si2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (neg:SI (mult:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+  ""
+  "neg\\t%w0, %w1, lsl %p2"
+  [(set_attr "type" "alu_shift_imm")]
+)
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(mult:GPI (match_operand:GPI 1 "register_operand" "r")
+		  (match_operand:GPI 2 "register_operand" "r")))]
+  ""
+  "mul\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "mul")]
+)
+
+;; zero_extend version of above
+(define_insn "*mulsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (mult:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "mul\\t%w0, %w1, %w2"
+  [(set_attr "type" "mul")]
+)
+
+(define_insn "*madd<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			    (match_operand:GPI 2 "register_operand" "r"))
+		  (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "madd\\t%<w>0, %<w>1, %<w>2, %<w>3"
+  [(set_attr "type" "mla")]
+)
+
+;; zero_extend version of above
+(define_insn "*maddsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 2 "register_operand" "r"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "madd\\t%w0, %w1, %w2, %w3"
+  [(set_attr "type" "mla")]
+)
+
+(define_insn "*msub<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
+		   (mult:GPI (match_operand:GPI 1 "register_operand" "r")
+			     (match_operand:GPI 2 "register_operand" "r"))))]
+
+  ""
+  "msub\\t%<w>0, %<w>1, %<w>2, %<w>3"
+  [(set_attr "type" "mla")]
+)
+
+;; zero_extend version of above
+(define_insn "*msubsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "register_operand" "r")))))]
+
+  ""
+  "msub\\t%w0, %w1, %w2, %w3"
+  [(set_attr "type" "mla")]
+)
+
+(define_insn "*mul<mode>_neg"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(mult:GPI (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
+		  (match_operand:GPI 2 "register_operand" "r")))]
+
+  ""
+  "mneg\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "mul")]
+)
+
+;; zero_extend version of above
+(define_insn "*mulsi_neg_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (mult:SI (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		  (match_operand:SI 2 "register_operand" "r"))))]
+
+  ""
+  "mneg\\t%w0, %w1, %w2"
+  [(set_attr "type" "mul")]
+)
+
+(define_insn "<su_optab>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
+		 (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "<su>mull\\t%0, %w1, %w2"
+  [(set_attr "type" "<su>mull")]
+)
+
+(define_insn "<su_optab>maddsidi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI
+		  (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
+		  (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "register_operand" "r")))]
+  ""
+  "<su>maddl\\t%0, %w1, %w2, %3"
+  [(set_attr "type" "<su>mlal")]
+)
+
+(define_insn "<su_optab>msubsidi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI
+	 (match_operand:DI 3 "register_operand" "r")
+	 (mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
+		  (ANY_EXTEND:DI
+		   (match_operand:SI 2 "register_operand" "r")))))]
+  ""
+  "<su>msubl\\t%0, %w1, %w2, %3"
+  [(set_attr "type" "<su>mlal")]
+)
+
+(define_insn "*<su_optab>mulsidi_neg"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (neg:DI
+		  (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r")))
+		  (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "<su>mnegl\\t%0, %w1, %w2"
+  [(set_attr "type" "<su>mull")]
+)
+
+(define_insn "<su>muldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI
+	   (ANY_EXTEND:TI (match_operand:DI 1 "register_operand" "r"))
+	   (ANY_EXTEND:TI (match_operand:DI 2 "register_operand" "r")))
+	  (const_int 64))))]
+  ""
+  "<su>mulh\\t%0, %1, %2"
+  [(set_attr "type" "<su>mull")]
+)
+
+(define_insn "<su_optab>div<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ANY_DIV:GPI (match_operand:GPI 1 "register_operand" "r")
+		     (match_operand:GPI 2 "register_operand" "r")))]
+  ""
+  "<su>div\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "<su>div")]
+)
+
+;; zero_extend version of above
+(define_insn "*<su_optab>divsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+         (ANY_DIV:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "<su>div\\t%w0, %w1, %w2"
+  [(set_attr "type" "<su>div")]
+)
+
+;; -------------------------------------------------------------------
+;; Comparison insns
+;; -------------------------------------------------------------------
+
+(define_insn "*cmp<mode>"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:GPI 0 "register_operand" "r,r,r")
+		    (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))]
+  ""
+  "@
+   cmp\\t%<w>0, %<w>1
+   cmp\\t%<w>0, %<w>1
+   cmn\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
+)
+
+(define_insn "*cmp<mode>"
+  [(set (reg:CCFP CC_REGNUM)
+        (compare:CCFP (match_operand:GPF 0 "register_operand" "w,w")
+		      (match_operand:GPF 1 "aarch64_fp_compare_operand" "Y,w")))]
+   "TARGET_FLOAT"
+   "@
+    fcmp\\t%<s>0, #0.0
+    fcmp\\t%<s>0, %<s>1"
+  [(set_attr "type" "fcmp<s>")]
+)
+
+(define_insn "*cmpe<mode>"
+  [(set (reg:CCFPE CC_REGNUM)
+        (compare:CCFPE (match_operand:GPF 0 "register_operand" "w,w")
+		       (match_operand:GPF 1 "aarch64_fp_compare_operand" "Y,w")))]
+   "TARGET_FLOAT"
+   "@
+    fcmpe\\t%<s>0, #0.0
+    fcmpe\\t%<s>0, %<s>1"
+  [(set_attr "type" "fcmp<s>")]
+)
+
+(define_insn "*cmp_swp_<shift>_reg<mode>"
+  [(set (reg:CC_SWP CC_REGNUM)
+	(compare:CC_SWP (ASHIFT:GPI
+			 (match_operand:GPI 0 "register_operand" "r")
+			 (match_operand:QI 1 "aarch64_shift_imm_<mode>" "n"))
+			(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))]
+  ""
+  "cmp\\t%<w>2, %<w>0, <shift> %1"
+  [(set_attr "type" "alus_shift_imm")]
+)
+
+(define_insn "*cmp_swp_<optab><ALLX:mode>_reg<GPI:mode>"
+  [(set (reg:CC_SWP CC_REGNUM)
+	(compare:CC_SWP (ANY_EXTEND:GPI
+			 (match_operand:ALLX 0 "register_operand" "r"))
+			(match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "cmp\\t%<GPI:w>1, %<GPI:w>0, <su>xt<ALLX:size>"
+  [(set_attr "type" "alus_ext")]
+)
+
+(define_insn "*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>"
+  [(set (reg:CC_SWP CC_REGNUM)
+	(compare:CC_SWP (ashift:GPI
+			 (ANY_EXTEND:GPI
+			  (match_operand:ALLX 0 "register_operand" "r"))
+			 (match_operand 1 "aarch64_imm3" "Ui3"))
+	(match_operand:GPI 2 "register_operand" "r")))]
+  ""
+  "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
+  [(set_attr "type" "alus_ext")]
+)
+
+;; -------------------------------------------------------------------
+;; Store-flag and conditional select insns
+;; -------------------------------------------------------------------
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "aarch64_comparison_operator"
+	 [(match_operand:GPI 2 "register_operand" "")
+	  (match_operand:GPI 3 "aarch64_plus_operand" "")]))]
+  ""
+  "
+  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				      operands[3]);
+  operands[3] = const0_rtx;
+  "
+)
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "aarch64_comparison_operator"
+	 [(match_operand:GPF 2 "register_operand" "")
+	  (match_operand:GPF 3 "register_operand" "")]))]
+  ""
+  "
+  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				      operands[3]);
+  operands[3] = const0_rtx;
+  "
+)
+
+(define_insn "*cstore<mode>_insn"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(match_operator:ALLI 1 "aarch64_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
+  ""
+  "cset\\t%<w>0, %m1"
+  [(set_attr "type" "csel")]
+)
+
+;; zero_extend version of the above
+(define_insn "*cstoresi_insn_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (match_operator:SI 1 "aarch64_comparison_operator"
+	  [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  ""
+  "cset\\t%w0, %m1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "cstore<mode>_neg"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
+		  [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  ""
+  "csetm\\t%<w>0, %m1"
+  [(set_attr "type" "csel")]
+)
+
+;; zero_extend version of the above
+(define_insn "*cstoresi_neg_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (neg:SI (match_operator:SI 1 "aarch64_comparison_operator"
+		  [(match_operand 2 "cc_register" "") (const_int 0)]))))]
+  ""
+  "csetm\\t%w0, %m1"
+  [(set_attr "type" "csel")]
+)
+
+(define_expand "cmov<mode>6"
+  [(set (match_operand:GPI 0 "register_operand" "")
+	(if_then_else:GPI
+	 (match_operator 1 "aarch64_comparison_operator"
+	  [(match_operand:GPI 2 "register_operand" "")
+	   (match_operand:GPI 3 "aarch64_plus_operand" "")])
+	 (match_operand:GPI 4 "register_operand" "")
+	 (match_operand:GPI 5 "register_operand" "")))]
+  ""
+  "
+  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				      operands[3]);
+  operands[3] = const0_rtx;
+  "
+)
+
+(define_expand "cmov<mode>6"
+  [(set (match_operand:GPF 0 "register_operand" "")
+	(if_then_else:GPF
+	 (match_operator 1 "aarch64_comparison_operator"
+	  [(match_operand:GPF 2 "register_operand" "")
+	   (match_operand:GPF 3 "register_operand" "")])
+	 (match_operand:GPF 4 "register_operand" "")
+	 (match_operand:GPF 5 "register_operand" "")))]
+  ""
+  "
+  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				      operands[3]);
+  operands[3] = const0_rtx;
+  "
+)
+
+(define_insn "*cmov<mode>_insn"
+  [(set (match_operand:ALLI 0 "register_operand" "=r,r,r,r,r,r,r")
+	(if_then_else:ALLI
+	 (match_operator 1 "aarch64_comparison_operator"
+	  [(match_operand 2 "cc_register" "") (const_int 0)])
+	 (match_operand:ALLI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1")
+	 (match_operand:ALLI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))]
+  "!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
+     || (operands[3] == constm1_rtx && operands[4] == const1_rtx))"
+  ;; Final two alternatives should be unreachable, but included for completeness
+  "@
+   csel\\t%<w>0, %<w>3, %<w>4, %m1
+   csinv\\t%<w>0, %<w>3, <w>zr, %m1
+   csinv\\t%<w>0, %<w>4, <w>zr, %M1
+   csinc\\t%<w>0, %<w>3, <w>zr, %m1
+   csinc\\t%<w>0, %<w>4, <w>zr, %M1
+   mov\\t%<w>0, -1
+   mov\\t%<w>0, 1"
+  [(set_attr "type" "csel")]
+)
+
+;; zero_extend version of above
+(define_insn "*cmovsi_insn_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r")
+	(zero_extend:DI
+	 (if_then_else:SI
+	  (match_operator 1 "aarch64_comparison_operator"
+	   [(match_operand 2 "cc_register" "") (const_int 0)])
+	  (match_operand:SI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1")
+	  (match_operand:SI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1"))))]
+  "!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
+     || (operands[3] == constm1_rtx && operands[4] == const1_rtx))"
+  ;; Final two alternatives should be unreachable, but included for completeness
+  "@
+   csel\\t%w0, %w3, %w4, %m1
+   csinv\\t%w0, %w3, wzr, %m1
+   csinv\\t%w0, %w4, wzr, %M1
+   csinc\\t%w0, %w3, wzr, %m1
+   csinc\\t%w0, %w4, wzr, %M1
+   mov\\t%w0, -1
+   mov\\t%w0, 1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*cmov<mode>_insn"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(if_then_else:GPF
+	 (match_operator 1 "aarch64_comparison_operator"
+	  [(match_operand 2 "cc_register" "") (const_int 0)])
+	 (match_operand:GPF 3 "register_operand" "w")
+	 (match_operand:GPF 4 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
+  [(set_attr "type" "fcsel")]
+)
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:ALLI 0 "register_operand" "")
+	(if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator" "")
+			   (match_operand:ALLI 2 "register_operand" "")
+			   (match_operand:ALLI 3 "register_operand" "")))]
+  ""
+  {
+    rtx ccreg;
+    enum rtx_code code = GET_CODE (operands[1]);
+
+    if (code == UNEQ || code == LTGT)
+      FAIL;
+
+    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+				  XEXP (operands[1], 1));
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }
+)
+
+(define_expand "mov<GPF:mode><GPI:mode>cc"
+  [(set (match_operand:GPI 0 "register_operand" "")
+	(if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "")
+			  (match_operand:GPF 2 "register_operand" "")
+			  (match_operand:GPF 3 "register_operand" "")))]
+  ""
+  {
+    rtx ccreg;
+    enum rtx_code code = GET_CODE (operands[1]);
+
+    if (code == UNEQ || code == LTGT)
+      FAIL;
+
+    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+				  XEXP (operands[1], 1));
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }
+)
+
+(define_insn "*csinc2<mode>_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator"
+		  [(match_operand:CC 3 "cc_register" "") (const_int 0)])
+		 (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "csinc3<mode>_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (if_then_else:GPI
+	  (match_operator:GPI 1 "aarch64_comparison_operator"
+	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
+	  (plus:GPI (match_operand:GPI 3 "register_operand" "r")
+		    (const_int 1))
+	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
+  ""
+  "csinc\\t%<w>0, %<w>4, %<w>3, %M1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csinv3<mode>_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (if_then_else:GPI
+	  (match_operator:GPI 1 "aarch64_comparison_operator"
+	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
+	  (not:GPI (match_operand:GPI 3 "register_operand" "r"))
+	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
+  ""
+  "csinv\\t%<w>0, %<w>4, %<w>3, %M1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csneg3<mode>_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (if_then_else:GPI
+	  (match_operator:GPI 1 "aarch64_comparison_operator"
+	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
+	  (neg:GPI (match_operand:GPI 3 "register_operand" "r"))
+	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
+  ""
+  "csneg\\t%<w>0, %<w>4, %<w>3, %M1"
+  [(set_attr "type" "csel")]
+)
+
+;; -------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------
+
+(define_insn "<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r,rk")
+	(LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r")
+		     (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>")))]
+  ""
+  "<logical>\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "logic_reg,logic_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*<optab>si3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r,rk")
+	(zero_extend:DI
+         (LOGICAL:SI (match_operand:SI 1 "register_operand" "%r,r")
+		     (match_operand:SI 2 "aarch64_logical_operand" "r,K"))))]
+  ""
+  "<logical>\\t%w0, %w1, %w2"
+  [(set_attr "type" "logic_reg,logic_imm")]
+)
+
+(define_insn "*and<mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:GPI (match_operand:GPI 1 "register_operand" "%r,r")
+		  (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(and:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "ands\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "logics_reg,logics_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*andsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:SI (match_operand:SI 1 "register_operand" "%r,r")
+		 (match_operand:SI 2 "aarch64_logical_operand" "r,K"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "ands\\t%w0, %w1, %w2"
+  [(set_attr "type" "logics_reg,logics_imm")]
+)
+
+(define_insn "*and_<SHIFT:optab><mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:GPI (SHIFT:GPI
+		   (match_operand:GPI 1 "register_operand" "r")
+		   (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
+		  (match_operand:GPI 3 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*and_<SHIFT:optab>si3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:SI (SHIFT:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+		 (match_operand:SI 3 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2))
+				(match_dup 3))))]
+  ""
+  "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
+(define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(LOGICAL:GPI (SHIFT:GPI
+		      (match_operand:GPI 1 "register_operand" "r")
+		      (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
+		     (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "<LOGICAL:logical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*<LOGICAL:optab>_<SHIFT:optab>si3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (LOGICAL:SI (SHIFT:SI
+		      (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+		     (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "<LOGICAL:logical>\\t%w0, %w3, %w1, <SHIFT:shift> %2"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(not:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "mvn\\t%<w>0, %<w>1"
+  [(set_attr "type" "logic_reg")]
+)
+
+(define_insn "*one_cmpl_<optab><mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(not:GPI (SHIFT:GPI (match_operand:GPI 1 "register_operand" "r")
+			    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
+  ""
+  "mvn\\t%<w>0, %<w>1, <shift> %2"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
+(define_insn "*<LOGICAL:optab>_one_cmpl<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(LOGICAL:GPI (not:GPI
+		      (match_operand:GPI 1 "register_operand" "r"))
+		     (match_operand:GPI 2 "register_operand" "r")))]
+  ""
+  "<LOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1"
+  [(set_attr "type" "logic_reg")]
+)
+
+(define_insn "*and_one_cmpl<mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:GPI (not:GPI
+		   (match_operand:GPI 1 "register_operand" "r"))
+		  (match_operand:GPI 2 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(and:GPI (not:GPI (match_dup 1)) (match_dup 2)))]
+  ""
+  "bics\\t%<w>0, %<w>2, %<w>1"
+  [(set_attr "type" "logics_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*and_one_cmplsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:SI (not:SI
+		  (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:SI 2 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (and:SI (not:SI (match_dup 1)) (match_dup 2))))]
+  ""
+  "bics\\t%w0, %w2, %w1"
+  [(set_attr "type" "logics_reg")]
+)
+
+(define_insn "*<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(LOGICAL:GPI (not:GPI
+		      (SHIFT:GPI
+		       (match_operand:GPI 1 "register_operand" "r")
+		       (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")))
+		     (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "<LOGICAL:nlogical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
+(define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:GPI (not:GPI
+		   (SHIFT:GPI
+		    (match_operand:GPI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")))
+		  (match_operand:GPI 3 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(and:GPI (not:GPI
+		  (SHIFT:GPI
+		   (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  ""
+  "bics\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
+;; zero_extend version of above
+(define_insn "*and_one_cmpl_<SHIFT:optab>si3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:SI (not:SI
+		  (SHIFT:SI
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:QI 2 "aarch64_shift_imm_si" "n")))
+		 (match_operand:SI 3 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (and:SI
+			 (not:SI
+			  (SHIFT:SI (match_dup 1) (match_dup 2))) (match_dup 3))))]
+  ""
+  "bics\\t%w0, %w3, %w1, <SHIFT:shift> %2"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "clz\\t%<w>0, %<w>1"
+  [(set_attr "type" "clz")]
+)
+
+(define_expand "ffs<mode>2"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")]
+  ""
+  {
+    rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx);
+    rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
+
+    emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
+    emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+    emit_insn (gen_csinc3<mode>_insn (operands[0], x, ccreg, operands[0], const0_rtx));
+    DONE;
+  }
+)
+
+(define_insn "clrsb<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))]
+  ""
+  "cls\\t%<w>0, %<w>1"
+  [(set_attr "type" "clz")]
+)
+
+(define_insn "rbit<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))]
+  ""
+  "rbit\\t%<w>0, %<w>1"
+  [(set_attr "type" "rbit")]
+)
+
+(define_expand "ctz<mode>2"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")]
+  ""
+  {
+    emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
+    emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "*and<mode>3nr_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:GPI (match_operand:GPI 0 "register_operand" "%r,r")
+		  (match_operand:GPI 1 "aarch64_logical_operand" "r,<lconst>"))
+	 (const_int 0)))]
+  ""
+  "tst\\t%<w>0, %<w>1"
+  [(set_attr "type" "logics_reg")]
+)
+
+(define_insn "*and_<SHIFT:optab><mode>3nr_compare0"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:GPI (SHIFT:GPI
+		   (match_operand:GPI 0 "register_operand" "r")
+		   (match_operand:QI 1 "aarch64_shift_imm_<mode>" "n"))
+		  (match_operand:GPI 2 "register_operand" "r"))
+	(const_int 0)))]
+  ""
+  "tst\\t%<w>2, %<w>0, <SHIFT:shift> %1"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
+;; -------------------------------------------------------------------
+;; Shifts
+;; -------------------------------------------------------------------
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(ASHIFT:GPI (match_operand:GPI 1 "register_operand")
+		    (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  {
+    if (CONST_INT_P (operands[2]))
+      {
+        operands[2] = GEN_INT (INTVAL (operands[2])
+                               & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+
+        if (operands[2] == const0_rtx)
+          {
+	    emit_insn (gen_mov<mode> (operands[0], operands[1]));
+	    DONE;
+          }
+      }
+  }
+)
+
+(define_expand "ashl<mode>3"
+  [(set (match_operand:SHORT 0 "register_operand")
+	(ashift:SHORT (match_operand:SHORT 1 "register_operand")
+		      (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  {
+    if (CONST_INT_P (operands[2]))
+      {
+        operands[2] = GEN_INT (INTVAL (operands[2])
+                               & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+
+        if (operands[2] == const0_rtx)
+          {
+	    emit_insn (gen_mov<mode> (operands[0], operands[1]));
+	    DONE;
+          }
+      }
+  }
+)
+
+(define_expand "rotr<mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(rotatert:GPI (match_operand:GPI 1 "register_operand")
+		      (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  {
+    if (CONST_INT_P (operands[2]))
+      {
+        operands[2] = GEN_INT (INTVAL (operands[2])
+                               & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+
+        if (operands[2] == const0_rtx)
+          {
+	    emit_insn (gen_mov<mode> (operands[0], operands[1]));
+	    DONE;
+          }
+      }
+  }
+)
+
+(define_expand "rotl<mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(rotatert:GPI (match_operand:GPI 1 "register_operand")
+		      (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  {
+    /* (SZ - cnt) % SZ == -cnt % SZ */
+    if (CONST_INT_P (operands[2]))
+      {
+        operands[2] = GEN_INT ((-INTVAL (operands[2]))
+			       & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+        if (operands[2] == const0_rtx)
+          {
+	    emit_insn (gen_mov<mode> (operands[0], operands[1]));
+	    DONE;
+          }
+      }
+    else
+      operands[2] = expand_simple_unop (QImode, NEG, operands[2],
+					NULL_RTX, 1);
+  }
+)
+
+;; Logical left shift using SISD or Integer instruction
+(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (ashift:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+   lsl\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "type" "neon_shift_imm<q>, neon_shift_reg<q>,shift_reg")]
+)
+
+;; Logical right shift using SISD or Integer instruction
+(define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (lshiftrt:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   #
+   lsr\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "aarch64_simd_register")
+        (lshiftrt:DI
+           (match_operand:DI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "aarch64_simd_register")
+        (lshiftrt:SI
+           (match_operand:SI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))]
+  ""
+)
+
+;; Arithmetic right shift using SISD or Integer instruction
+(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (ashiftrt:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   #
+   asr\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "aarch64_simd_register")
+        (ashiftrt:DI
+           (match_operand:DI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "aarch64_simd_register")
+        (ashiftrt:SI
+           (match_operand:SI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))]
+  ""
+)
+
+(define_insn "*aarch64_sisd_ushl"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SISD_USHL))]
+  "TARGET_SIMD"
+  "ushl\t%d0, %d1, %d2"
+  [(set_attr "simd" "yes")
+   (set_attr "type" "neon_shift_reg")]
+)
+
+(define_insn "*aarch64_ushl_2s"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_USHL_2S))]
+  "TARGET_SIMD"
+  "ushl\t%0.2s, %1.2s, %2.2s"
+  [(set_attr "simd" "yes")
+   (set_attr "type" "neon_shift_reg")]
+)
+
+(define_insn "*aarch64_sisd_sshl"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SISD_SSHL))]
+  "TARGET_SIMD"
+  "sshl\t%d0, %d1, %d2"
+  [(set_attr "simd" "yes")
+   (set_attr "type" "neon_shift_reg")]
+)
+
+(define_insn "*aarch64_sshl_2s"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SSHL_2S))]
+  "TARGET_SIMD"
+  "sshl\t%0.2s, %1.2s, %2.2s"
+  [(set_attr "simd" "yes")
+   (set_attr "type" "neon_shift_reg")]
+)
+
+(define_insn "*aarch64_sisd_neg_qi"
+  [(set (match_operand:QI 0 "register_operand" "=w")
+        (unspec:QI [(match_operand:QI 1 "register_operand" "w")]
+                   UNSPEC_SISD_NEG))]
+  "TARGET_SIMD"
+  "neg\t%d0, %d1"
+  [(set_attr "simd" "yes")
+   (set_attr "type" "neon_neg")]
+)
+
+;; Rotate right
+(define_insn "*ror<mode>3_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (rotatert:GPI
+          (match_operand:GPI 1 "register_operand" "r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))]
+  ""
+  "ror\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "shift_reg")]
+)
+
+;; zero_extend version of above
+(define_insn "*<optab>si3_insn_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (SHIFT:SI
+	 (match_operand:SI 1 "register_operand" "r")
+	 (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss"))))]
+  ""
+  "<shift>\\t%w0, %w1, %w2"
+  [(set_attr "type" "shift_reg")]
+)
+
+(define_insn "*ashl<mode>3_insn"
+  [(set (match_operand:SHORT 0 "register_operand" "=r")
+	(ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
+		      (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss")))]
+  ""
+  "lsl\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "shift_reg")]
+)
+
+(define_insn "*<optab><mode>3_insn"
+  [(set (match_operand:SHORT 0 "register_operand" "=r")
+	(ASHIFT:SHORT (match_operand:SHORT 1 "register_operand" "r")
+		      (match_operand 2 "const_int_operand" "n")))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+{
+  operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
+  return "<bfshift>\t%w0, %w1, %2, %3";
+}
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*extr<mode>5_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+			     (match_operand 3 "const_int_operand" "n"))
+		 (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
+			       (match_operand 4 "const_int_operand" "n"))))]
+  "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
+   (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
+  "extr\\t%<w>0, %<w>1, %<w>2, %4"
+  [(set_attr "type" "shift_imm")]
+)
+
+;; zero_extend version of the above
+(define_insn "*extrsi5_insn_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand 3 "const_int_operand" "n"))
+		 (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			      (match_operand 4 "const_int_operand" "n")))))]
+  "UINTVAL (operands[3]) < 32 &&
+   (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
+  "extr\\t%w0, %w1, %w2, %4"
+  [(set_attr "type" "shift_imm")]
+)
+
+(define_insn "*ror<mode>3_insn"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(rotate:GPI (match_operand:GPI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand" "n")))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+{
+  operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
+  return "ror\\t%<w>0, %<w>1, %3";
+}
+  [(set_attr "type" "shift_imm")]
+)
+
+;; zero_extend version of the above
+(define_insn "*rorsi3_insn_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (rotate:SI (match_operand:SI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < 32"
+{
+  operands[3] = GEN_INT (32 - UINTVAL (operands[2]));
+  return "ror\\t%w0, %w1, %3";
+}
+  [(set_attr "type" "shift_imm")]
+)
+
+(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ANY_EXTEND:GPI
+	 (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
+		       (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+{
+  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
+}
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(zero_extend:GPI
+	 (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
+			 (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+{
+  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
+}
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(sign_extend:GPI
+	 (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
+			 (match_operand 2 "const_int_operand" "n"))))]
+  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
+{
+  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
+  return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
+}
+  [(set_attr "type" "bfm")]
+)
+
+;; -------------------------------------------------------------------
+;; Bitfields
+;; -------------------------------------------------------------------
+
+(define_expand "<optab>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ANY_EXTRACT:DI (match_operand:DI 1 "register_operand" "r")
+			(match_operand 2 "const_int_operand" "n")
+			(match_operand 3 "const_int_operand" "n")))]
+  ""
+  ""
+)
+
+(define_insn "*<optab><mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+			 (match_operand 2 "const_int_operand" "n")
+			 (match_operand 3 "const_int_operand" "n")))]
+  ""
+  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+  [(set_attr "type" "bfm")]
+)
+
+;; Bitfield Insert (insv)
+(define_expand "insv<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
+			  (match_operand 1 "const_int_operand")
+			  (match_operand 2 "const_int_operand"))
+	(match_operand:GPI 3 "general_operand"))]
+  ""
+{
+  unsigned HOST_WIDE_INT width = UINTVAL (operands[1]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
+  rtx value = operands[3];
+
+  if (width == 0 || (pos + width) > GET_MODE_BITSIZE (<MODE>mode))
+    FAIL;
+
+  if (CONST_INT_P (value))
+    {
+      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1;
+
+      /* Prefer AND/OR for inserting all zeros or all ones.  */
+      if ((UINTVAL (value) & mask) == 0
+	   || (UINTVAL (value) & mask) == mask)
+	FAIL;
+
+      /* 16-bit aligned 16-bit wide insert is handled by insv_imm.  */
+      if (width == 16 && (pos % 16) == 0)
+	DONE;
+    }
+  operands[3] = force_reg (<MODE>mode, value);
+})
+
+(define_insn "*insv_reg<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+	(match_operand:GPI 3 "register_operand" "r"))]
+  "!(UINTVAL (operands[1]) == 0
+     || (UINTVAL (operands[2]) + UINTVAL (operands[1])
+	 > GET_MODE_BITSIZE (<MODE>mode)))"
+  "bfi\\t%<w>0, %<w>3, %2, %1"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*extr_insv_lower_reg<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (const_int 0))
+	(zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r")
+			  (match_dup 1)
+			  (match_operand 3 "const_int_operand" "n")))]
+  "!(UINTVAL (operands[1]) == 0
+     || (UINTVAL (operands[3]) + UINTVAL (operands[1])
+	 > GET_MODE_BITSIZE (<MODE>mode)))"
+  "bfxil\\t%<w>0, %<w>2, %3, %1"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ashift:GPI (ANY_EXTEND:GPI
+		     (match_operand:ALLX 1 "register_operand" "r"))
+		    (match_operand 2 "const_int_operand" "n")))]
+  "UINTVAL (operands[2]) < <GPI:sizen>"
+{
+  operands[3] = (<ALLX:sizen> <= (<GPI:sizen> - UINTVAL (operands[2])))
+	      ? GEN_INT (<ALLX:sizen>)
+	      : GEN_INT (<GPI:sizen> - UINTVAL (operands[2]));
+  return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
+}
+  [(set_attr "type" "bfm")]
+)
+
+;; XXX We should match (any_extend (ashift)) here, like (and (ashift)) below
+
+(define_insn "*andim_ashift<mode>_bfiz"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))
+		 (match_operand 3 "const_int_operand" "n")))]
+  "exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0
+   && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0"
+  "ubfiz\\t%<w>0, %<w>1, %2, %P3"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "bswap<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (bswap:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "rev\\t%<w>0, %<w>1"
+  [(set_attr "type" "rev")]
+)
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "rev16\\t%w0, %w1"
+  [(set_attr "type" "rev")]
+)
+
+;; zero_extend version of above
+(define_insn "*bswapsi2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (bswap:SI (match_operand:SI 1 "register_operand" "r"))))]
+  ""
+  "rev\\t%w0, %w1"
+  [(set_attr "type" "rev")]
+)
+
+;; -------------------------------------------------------------------
+;; Floating-point intrinsics
+;; -------------------------------------------------------------------
+
+;; frint floating-point round to integral standard patterns.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+
+(define_insn "<frint_pattern><mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+	 FRINT))]
+  "TARGET_FLOAT"
+  "frint<frint_suffix>\\t%<s>0, %<s>1"
+  [(set_attr "type" "f_rint<s>")]
+)
+
+;; frcvt floating-point round to integer and convert standard patterns.
+;; Expands to lbtrunc, lceil, lfloor, lround.
+(define_insn "l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+		      FCVT)))]
+  "TARGET_FLOAT"
+  "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
+  [(set_attr "type" "f_cvtf2i")]
+)
+
+;; fma - no throw
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (fma:GPF (match_operand:GPF 1 "register_operand" "w")
+		 (match_operand:GPF 2 "register_operand" "w")
+		 (match_operand:GPF 3 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  [(set_attr "type" "fmac<s>")]
+)
+
+(define_insn "fnma<mode>4"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
+		 (match_operand:GPF 2 "register_operand" "w")
+		 (match_operand:GPF 3 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  [(set_attr "type" "fmac<s>")]
+)
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (fma:GPF (match_operand:GPF 1 "register_operand" "w")
+		 (match_operand:GPF 2 "register_operand" "w")
+		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+  "TARGET_FLOAT"
+  "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  [(set_attr "type" "fmac<s>")]
+)
+
+(define_insn "fnms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
+		 (match_operand:GPF 2 "register_operand" "w")
+		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+  "TARGET_FLOAT"
+  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  [(set_attr "type" "fmac<s>")]
+)
+
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_insn "*fnmadd<mode>4"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w")
+			  (match_operand:GPF 2 "register_operand" "w")
+			  (match_operand:GPF 3 "register_operand" "w"))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
+  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  [(set_attr "type" "fmac<s>")]
+)
+
+;; -------------------------------------------------------------------
+;; Floating-point conversions
+;; -------------------------------------------------------------------
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+        (float_extend:DF (match_operand:SF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%d0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+        (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%s0, %d1"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "fix_trunc<GPF:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
+  [(set_attr "type" "f_cvtf2i")]
+)
+
+(define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
+  [(set_attr "type" "f_cvtf2i")]
+)
+
+(define_insn "float<GPI:mode><GPF:mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (float:GPF (match_operand:GPI 1 "register_operand" "r")))]
+  "TARGET_FLOAT"
+  "scvtf\\t%<GPF:s>0, %<GPI:w>1"
+  [(set_attr "type" "f_cvti2f")]
+)
+
+(define_insn "floatuns<GPI:mode><GPF:mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))]
+  "TARGET_FLOAT"
+  "ucvtf\\t%<GPF:s>0, %<GPI:w>1"
+  [(set_attr "type" "f_cvt")]
+)
+
+;; -------------------------------------------------------------------
+;; Floating-point arithmetic
+;; -------------------------------------------------------------------
+
+(define_insn "add<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (plus:GPF
+         (match_operand:GPF 1 "register_operand" "w")
+         (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fadd\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "fadd<s>")]
+)
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (minus:GPF
+         (match_operand:GPF 1 "register_operand" "w")
+         (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fsub\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "fadd<s>")]
+)
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (mult:GPF
+         (match_operand:GPF 1 "register_operand" "w")
+         (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fmul\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "fmul<s>")]
+)
+
+(define_insn "*fnmul<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (mult:GPF
+		 (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
+		 (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fnmul\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "fmul<s>")]
+)
+
+(define_insn "div<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (div:GPF
+         (match_operand:GPF 1 "register_operand" "w")
+         (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fdiv\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "fdiv<s>")]
+)
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (neg:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fneg\\t%<s>0, %<s>1"
+  [(set_attr "type" "ffarith<s>")]
+)
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fsqrt\\t%<s>0, %<s>1"
+  [(set_attr "type" "fsqrt<s>")]
+)
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (abs:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fabs\\t%<s>0, %<s>1"
+  [(set_attr "type" "ffarith<s>")]
+)
+
+;; Given that smax/smin do not specify the result when either input is NaN,
+;; we could use either FMAXNM or FMAX for smax, and either FMINNM or FMIN
+;; for smin.
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (smax:GPF (match_operand:GPF 1 "register_operand" "w")
+		  (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fmaxnm\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "f_minmax<s>")]
+)
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+        (smin:GPF (match_operand:GPF 1 "register_operand" "w")
+		  (match_operand:GPF 2 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fminnm\\t%<s>0, %<s>1, %<s>2"
+  [(set_attr "type" "f_minmax<s>")]
+)
+
+;; -------------------------------------------------------------------
+;; Reload support
+;; -------------------------------------------------------------------
+
+(define_expand "aarch64_reload_mov<mode>"
+  [(set (match_operand:TX 0 "register_operand" "=w")
+        (match_operand:TX 1 "register_operand" "w"))
+   (clobber (match_operand:DI 2 "register_operand" "=&r"))
+  ]
+  ""
+  {
+    rtx op0 = simplify_gen_subreg (TImode, operands[0], <MODE>mode, 0);
+    rtx op1 = simplify_gen_subreg (TImode, operands[1], <MODE>mode, 0);
+    gen_aarch64_movtilow_tilow (op0, op1);
+    gen_aarch64_movdi_tihigh (operands[2], op1);
+    gen_aarch64_movtihigh_di (op0, operands[2]);
+    DONE;
+  }
+)
+
+;; The following secondary reload helpers patterns are invoked
+;; after or during reload as we don't want these patterns to start
+;; kicking in during the combiner.
+
+(define_insn "aarch64_movdi_<mode>low"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (truncate:DI (match_operand:TX 1 "register_operand" "w")))]
+  "reload_completed || reload_in_progress"
+  "fmov\\t%x0, %d1"
+  [(set_attr "type" "f_mrc")
+   (set_attr "length" "4")
+  ])
+
+(define_insn "aarch64_movdi_<mode>high"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (truncate:DI
+	  (lshiftrt:TX (match_operand:TX 1 "register_operand" "w")
+		       (const_int 64))))]
+  "reload_completed || reload_in_progress"
+  "fmov\\t%x0, %1.d[1]"
+  [(set_attr "type" "f_mrc")
+   (set_attr "length" "4")
+  ])
+
+(define_insn "aarch64_mov<mode>high_di"
+  [(set (zero_extract:TX (match_operand:TX 0 "register_operand" "+w")
+                         (const_int 64) (const_int 64))
+        (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
+  "reload_completed || reload_in_progress"
+  "fmov\\t%0.d[1], %x1"
+  [(set_attr "type" "f_mcr")
+   (set_attr "length" "4")
+  ])
+
+(define_insn "aarch64_mov<mode>low_di"
+  [(set (match_operand:TX 0 "register_operand" "=w")
+        (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
+  "reload_completed || reload_in_progress"
+  "fmov\\t%d0, %x1"
+  [(set_attr "type" "f_mcr")
+   (set_attr "length" "4")
+  ])
+
+(define_insn "aarch64_movtilow_tilow"
+  [(set (match_operand:TI 0 "register_operand" "=w")
+        (zero_extend:TI 
+	  (truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
+  "reload_completed || reload_in_progress"
+  "fmov\\t%d0, %d1"
+  [(set_attr "type" "f_mcr")
+   (set_attr "length" "4")
+  ])
+
+;; There is a deliberate reason why the parameters of high and lo_sum's
+;; don't have modes for ADRP and ADD instructions.  This is to allow high
+;; and lo_sum's to be used with the labels defining the jump tables in
+;; rodata section.
+
+(define_expand "add_losym"
+  [(set (match_operand 0 "register_operand" "=r")
+	(lo_sum (match_operand 1 "register_operand" "r")
+		(match_operand 2 "aarch64_valid_symref" "S")))]
+  ""
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  emit_insn ((mode == DImode
+	      ? gen_add_losym_di
+	      : gen_add_losym_si) (operands[0],
+				   operands[1],
+				   operands[2]));
+  DONE;
+})
+
+(define_insn "add_losym_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(lo_sum:P (match_operand:P 1 "register_operand" "r")
+		  (match_operand 2 "aarch64_valid_symref" "S")))]
+  ""
+  "add\\t%<w>0, %<w>1, :lo12:%a2"
+  [(set_attr "type" "alu_reg")]
+)
+
+(define_insn "ldr_got_small_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=r")
+	(unspec:PTR [(mem:PTR (lo_sum:PTR
+			      (match_operand:PTR 1 "register_operand" "r")
+			      (match_operand:PTR 2 "aarch64_valid_symref" "S")))]
+		    UNSPEC_GOTSMALLPIC))]
+  ""
+  "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
+  [(set_attr "type" "load1")]
+)
+
+(define_insn "ldr_got_small_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (unspec:SI [(mem:SI (lo_sum:DI
+			     (match_operand:DI 1 "register_operand" "r")
+			     (match_operand:DI 2 "aarch64_valid_symref" "S")))]
+		    UNSPEC_GOTSMALLPIC)))]
+  "TARGET_ILP32"
+  "ldr\\t%w0, [%1, #:got_lo12:%a2]"
+  [(set_attr "type" "load1")]
+)
+
+(define_insn "ldr_got_tiny"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
+		   UNSPEC_GOTTINYPIC))]
+  ""
+  "ldr\\t%0, %L1"
+  [(set_attr "type" "load1")]
+)
+
+(define_insn "aarch64_load_tp_hard"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_TLS))]
+  ""
+  "mrs\\t%0, tpidr_el0"
+  [(set_attr "type" "mrs")]
+)
+
+;; The TLS ABI specifically requires that the compiler does not schedule
+;; instructions in the TLS stubs, in order to enable linker relaxation.
+;; Therefore we treat the stubs as an atomic sequence.
+(define_expand "tlsgd_small"
+ [(parallel [(set (match_operand 0 "register_operand" "")
+                  (call (mem:DI (match_dup 2)) (const_int 1)))
+	     (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS)
+	     (clobber (reg:DI LR_REGNUM))])]
+ ""
+{
+  operands[2] = aarch64_tls_get_addr ();
+})
+
+(define_insn "*tlsgd_small"
+  [(set (match_operand 0 "register_operand" "")
+	(call (mem:DI (match_operand:DI 2 "" "")) (const_int 1)))
+   (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS)
+   (clobber (reg:DI LR_REGNUM))
+  ]
+  ""
+  "adrp\\tx0, %A1\;add\\tx0, x0, %L1\;bl\\t%2\;nop"
+  [(set_attr "type" "call")
+   (set_attr "length" "16")])
+
+(define_insn "tlsie_small"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")]
+		   UNSPEC_GOTSMALLTLS))]
+  ""
+  "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]"
+  [(set_attr "type" "load1")
+   (set_attr "length" "8")]
+)
+
+(define_insn "tlsle_small"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "aarch64_tls_le_symref" "S")]
+		   UNSPEC_GOTSMALLTLS))]
+  ""
+  "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2"
+  [(set_attr "type" "alu_reg")
+   (set_attr "length" "8")]
+)
+
+(define_insn "tlsdesc_small"
+  [(set (reg:DI R0_REGNUM)
+        (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:DI LR_REGNUM))
+   (clobber (match_scratch:DI 1 "=r"))]
+  "TARGET_TLS_DESC"
+  "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+  [(set_attr "type" "call")
+   (set_attr "length" "16")])
+
+(define_insn "stack_tie"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
+		     (match_operand:DI 1 "register_operand" "rk")]
+		    UNSPEC_PRLG_STK))]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
+
+;; Named pattern for expanding thread pointer reference.
+(define_expand "get_thread_pointerdi"
+  [(match_operand:DI 0 "register_operand" "=r")]
+  ""
+{
+  rtx tmp = aarch64_load_tp (operands[0]);
+  if (tmp != operands[0])
+    emit_move_insn (operands[0], tmp);
+  DONE;
+})
+
+;; AdvSIMD Stuff
+(include "aarch64-simd.md")
+
+;; Atomic Operations
+(include "atomics.md")
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.opt b/gcc-4.9/gcc/config/aarch64/aarch64.opt
new file mode 100644
index 000000000..f5a15b729
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.opt
@@ -0,0 +1,118 @@
+; Machine description for AArch64 architecture.
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+; Contributed by ARM Ltd.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 3, or (at your option)
+; any later version.
+;
+; GCC is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/aarch64/aarch64-opts.h
+
+; The TLS dialect names to use with -mtls-dialect.
+
+Enum
+Name(tls_type) Type(enum aarch64_tls_type)
+The possible TLS dialects:
+
+EnumValue
+Enum(tls_type) String(trad) Value(TLS_TRADITIONAL)
+
+EnumValue
+Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS)
+
+; The code model option names for -mcmodel.
+
+Enum
+Name(cmodel) Type(enum aarch64_code_model)
+The code model option names for -mcmodel:
+
+EnumValue
+Enum(cmodel) String(tiny) Value(AARCH64_CMODEL_TINY)
+
+EnumValue
+Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL)
+
+EnumValue
+Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE)
+
+; The cpu/arch option names to use in cpu/arch selection.
+
+Variable
+const char *aarch64_arch_string
+
+Variable
+const char *aarch64_cpu_string
+
+Variable
+const char *aarch64_tune_string
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_END)
+Assume target CPU is configured as big endian
+
+mgeneral-regs-only
+Target Report RejectNegative Mask(GENERAL_REGS_ONLY)
+Generate code which uses only the general registers
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_END)
+Assume target CPU is configured as little endian
+
+mcmodel=
+Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL)
+Specify the code model
+
+mstrict-align
+Target Report RejectNegative Mask(STRICT_ALIGN)
+Don't assume that unaligned accesses are handled by the system
+
+momit-leaf-frame-pointer
+Target Report Save Var(flag_omit_leaf_frame_pointer) Init(1)
+Omit the frame pointer in leaf functions
+
+mtls-dialect=
+Target RejectNegative Joined Enum(tls_type) Var(aarch64_tls_dialect) Init(TLS_DESCRIPTORS)
+Specify TLS dialect
+
+march=
+Target RejectNegative ToLower Joined Var(aarch64_arch_string)
+-march=ARCH	Use features of architecture ARCH
+
+mcpu=
+Target RejectNegative ToLower Joined Var(aarch64_cpu_string)
+-mcpu=CPU	Use features of and optimize for CPU
+
+mtune=
+Target RejectNegative ToLower Joined Var(aarch64_tune_string)
+-mtune=CPU	Optimize for CPU
+
+mabi=
+Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI_DEFAULT)
+-mabi=ABI	Generate code that conforms to the specified ABI
+
+mlra
+Target Report Var(aarch64_lra_flag) Init(1) Save
+Use LRA instead of reload (transitional)
+
+Enum
+Name(aarch64_abi) Type(int)
+Known AArch64 ABIs (for use with the -mabi= option):
+
+EnumValue
+Enum(aarch64_abi) String(ilp32) Value(AARCH64_ABI_ILP32)
+
+EnumValue
+Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h
new file mode 100644
index 000000000..747a292ba
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h
@@ -0,0 +1,25403 @@
+/* ARM NEON intrinsics include file.
+
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_NEON_H_
+#define _AARCH64_NEON_H_
+
+#include <stdint.h>
+
+#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
+#define __AARCH64_INT64_C(__C) ((int64_t) __C)
+
+typedef __builtin_aarch64_simd_qi int8x8_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_hi int16x4_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_si int32x2_t
+  __attribute__ ((__vector_size__ (8)));
+typedef int64_t int64x1_t;
+typedef int32_t int32x1_t;
+typedef int16_t int16x1_t;
+typedef int8_t int8x1_t;
+typedef double float64x1_t;
+typedef __builtin_aarch64_simd_sf float32x2_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_poly8 poly8x8_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_poly16 poly16x4_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_uqi uint8x8_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_uhi uint16x4_t
+  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_aarch64_simd_usi uint32x2_t
+  __attribute__ ((__vector_size__ (8)));
+typedef uint64_t uint64x1_t;
+typedef uint32_t uint32x1_t;
+typedef uint16_t uint16x1_t;
+typedef uint8_t uint8x1_t;
+typedef __builtin_aarch64_simd_qi int8x16_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_hi int16x8_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_si int32x4_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_di int64x2_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_sf float32x4_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_df float64x2_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_poly8 poly8x16_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_poly16 poly16x8_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_poly64 poly64x2_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_uqi uint8x16_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_uhi uint16x8_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_usi uint32x4_t
+  __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_udi uint64x2_t
+  __attribute__ ((__vector_size__ (16)));
+
+typedef float float32_t;
+typedef double float64_t;
+typedef __builtin_aarch64_simd_poly8 poly8_t;
+typedef __builtin_aarch64_simd_poly16 poly16_t;
+typedef __builtin_aarch64_simd_poly64 poly64_t;
+typedef __builtin_aarch64_simd_poly128 poly128_t;
+
+typedef struct int8x8x2_t
+{
+  int8x8_t val[2];
+} int8x8x2_t;
+
+typedef struct int8x16x2_t
+{
+  int8x16_t val[2];
+} int8x16x2_t;
+
+typedef struct int16x4x2_t
+{
+  int16x4_t val[2];
+} int16x4x2_t;
+
+typedef struct int16x8x2_t
+{
+  int16x8_t val[2];
+} int16x8x2_t;
+
+typedef struct int32x2x2_t
+{
+  int32x2_t val[2];
+} int32x2x2_t;
+
+typedef struct int32x4x2_t
+{
+  int32x4_t val[2];
+} int32x4x2_t;
+
+typedef struct int64x1x2_t
+{
+  int64x1_t val[2];
+} int64x1x2_t;
+
+typedef struct int64x2x2_t
+{
+  int64x2_t val[2];
+} int64x2x2_t;
+
+typedef struct uint8x8x2_t
+{
+  uint8x8_t val[2];
+} uint8x8x2_t;
+
+typedef struct uint8x16x2_t
+{
+  uint8x16_t val[2];
+} uint8x16x2_t;
+
+typedef struct uint16x4x2_t
+{
+  uint16x4_t val[2];
+} uint16x4x2_t;
+
+typedef struct uint16x8x2_t
+{
+  uint16x8_t val[2];
+} uint16x8x2_t;
+
+typedef struct uint32x2x2_t
+{
+  uint32x2_t val[2];
+} uint32x2x2_t;
+
+typedef struct uint32x4x2_t
+{
+  uint32x4_t val[2];
+} uint32x4x2_t;
+
+typedef struct uint64x1x2_t
+{
+  uint64x1_t val[2];
+} uint64x1x2_t;
+
+typedef struct uint64x2x2_t
+{
+  uint64x2_t val[2];
+} uint64x2x2_t;
+
+typedef struct float32x2x2_t
+{
+  float32x2_t val[2];
+} float32x2x2_t;
+
+typedef struct float32x4x2_t
+{
+  float32x4_t val[2];
+} float32x4x2_t;
+
+typedef struct float64x2x2_t
+{
+  float64x2_t val[2];
+} float64x2x2_t;
+
+typedef struct float64x1x2_t
+{
+  float64x1_t val[2];
+} float64x1x2_t;
+
+typedef struct poly8x8x2_t
+{
+  poly8x8_t val[2];
+} poly8x8x2_t;
+
+typedef struct poly8x16x2_t
+{
+  poly8x16_t val[2];
+} poly8x16x2_t;
+
+typedef struct poly16x4x2_t
+{
+  poly16x4_t val[2];
+} poly16x4x2_t;
+
+typedef struct poly16x8x2_t
+{
+  poly16x8_t val[2];
+} poly16x8x2_t;
+
+typedef struct int8x8x3_t
+{
+  int8x8_t val[3];
+} int8x8x3_t;
+
+typedef struct int8x16x3_t
+{
+  int8x16_t val[3];
+} int8x16x3_t;
+
+typedef struct int16x4x3_t
+{
+  int16x4_t val[3];
+} int16x4x3_t;
+
+typedef struct int16x8x3_t
+{
+  int16x8_t val[3];
+} int16x8x3_t;
+
+typedef struct int32x2x3_t
+{
+  int32x2_t val[3];
+} int32x2x3_t;
+
+typedef struct int32x4x3_t
+{
+  int32x4_t val[3];
+} int32x4x3_t;
+
+typedef struct int64x1x3_t
+{
+  int64x1_t val[3];
+} int64x1x3_t;
+
+typedef struct int64x2x3_t
+{
+  int64x2_t val[3];
+} int64x2x3_t;
+
+typedef struct uint8x8x3_t
+{
+  uint8x8_t val[3];
+} uint8x8x3_t;
+
+typedef struct uint8x16x3_t
+{
+  uint8x16_t val[3];
+} uint8x16x3_t;
+
+typedef struct uint16x4x3_t
+{
+  uint16x4_t val[3];
+} uint16x4x3_t;
+
+typedef struct uint16x8x3_t
+{
+  uint16x8_t val[3];
+} uint16x8x3_t;
+
+typedef struct uint32x2x3_t
+{
+  uint32x2_t val[3];
+} uint32x2x3_t;
+
+typedef struct uint32x4x3_t
+{
+  uint32x4_t val[3];
+} uint32x4x3_t;
+
+typedef struct uint64x1x3_t
+{
+  uint64x1_t val[3];
+} uint64x1x3_t;
+
+typedef struct uint64x2x3_t
+{
+  uint64x2_t val[3];
+} uint64x2x3_t;
+
+typedef struct float32x2x3_t
+{
+  float32x2_t val[3];
+} float32x2x3_t;
+
+typedef struct float32x4x3_t
+{
+  float32x4_t val[3];
+} float32x4x3_t;
+
+typedef struct float64x2x3_t
+{
+  float64x2_t val[3];
+} float64x2x3_t;
+
+typedef struct float64x1x3_t
+{
+  float64x1_t val[3];
+} float64x1x3_t;
+
+typedef struct poly8x8x3_t
+{
+  poly8x8_t val[3];
+} poly8x8x3_t;
+
+typedef struct poly8x16x3_t
+{
+  poly8x16_t val[3];
+} poly8x16x3_t;
+
+typedef struct poly16x4x3_t
+{
+  poly16x4_t val[3];
+} poly16x4x3_t;
+
+typedef struct poly16x8x3_t
+{
+  poly16x8_t val[3];
+} poly16x8x3_t;
+
+typedef struct int8x8x4_t
+{
+  int8x8_t val[4];
+} int8x8x4_t;
+
+typedef struct int8x16x4_t
+{
+  int8x16_t val[4];
+} int8x16x4_t;
+
+typedef struct int16x4x4_t
+{
+  int16x4_t val[4];
+} int16x4x4_t;
+
+typedef struct int16x8x4_t
+{
+  int16x8_t val[4];
+} int16x8x4_t;
+
+typedef struct int32x2x4_t
+{
+  int32x2_t val[4];
+} int32x2x4_t;
+
+typedef struct int32x4x4_t
+{
+  int32x4_t val[4];
+} int32x4x4_t;
+
+typedef struct int64x1x4_t
+{
+  int64x1_t val[4];
+} int64x1x4_t;
+
+typedef struct int64x2x4_t
+{
+  int64x2_t val[4];
+} int64x2x4_t;
+
+typedef struct uint8x8x4_t
+{
+  uint8x8_t val[4];
+} uint8x8x4_t;
+
+typedef struct uint8x16x4_t
+{
+  uint8x16_t val[4];
+} uint8x16x4_t;
+
+typedef struct uint16x4x4_t
+{
+  uint16x4_t val[4];
+} uint16x4x4_t;
+
+typedef struct uint16x8x4_t
+{
+  uint16x8_t val[4];
+} uint16x8x4_t;
+
+typedef struct uint32x2x4_t
+{
+  uint32x2_t val[4];
+} uint32x2x4_t;
+
+typedef struct uint32x4x4_t
+{
+  uint32x4_t val[4];
+} uint32x4x4_t;
+
+typedef struct uint64x1x4_t
+{
+  uint64x1_t val[4];
+} uint64x1x4_t;
+
+typedef struct uint64x2x4_t
+{
+  uint64x2_t val[4];
+} uint64x2x4_t;
+
+typedef struct float32x2x4_t
+{
+  float32x2_t val[4];
+} float32x2x4_t;
+
+typedef struct float32x4x4_t
+{
+  float32x4_t val[4];
+} float32x4x4_t;
+
+typedef struct float64x2x4_t
+{
+  float64x2_t val[4];
+} float64x2x4_t;
+
+typedef struct float64x1x4_t
+{
+  float64x1_t val[4];
+} float64x1x4_t;
+
+typedef struct poly8x8x4_t
+{
+  poly8x8_t val[4];
+} poly8x8x4_t;
+
+typedef struct poly8x16x4_t
+{
+  poly8x16_t val[4];
+} poly8x16x4_t;
+
+typedef struct poly16x4x4_t
+{
+  poly16x4_t val[4];
+} poly16x4x4_t;
+
+typedef struct poly16x8x4_t
+{
+  poly16x8_t val[4];
+} poly16x8x4_t;
+
+/* vget_lane internal macros.  */
+
+#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
+  (__cast_ret								\
+     __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
+
+#define __aarch64_vget_lane_f32(__a, __b) \
+  __aarch64_vget_lane_any (v2sf, , , __a, __b)
+#define __aarch64_vget_lane_f64(__a, __b) (__a)
+
+#define __aarch64_vget_lane_p8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
+#define __aarch64_vget_lane_p16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
+
+#define __aarch64_vget_lane_s8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, , ,__a, __b)
+#define __aarch64_vget_lane_s16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, , ,__a, __b)
+#define __aarch64_vget_lane_s32(__a, __b) \
+  __aarch64_vget_lane_any (v2si, , ,__a, __b)
+#define __aarch64_vget_lane_s64(__a, __b) (__a)
+
+#define __aarch64_vget_lane_u8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
+#define __aarch64_vget_lane_u16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
+#define __aarch64_vget_lane_u32(__a, __b) \
+  __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
+#define __aarch64_vget_lane_u64(__a, __b) (__a)
+
+#define __aarch64_vgetq_lane_f32(__a, __b) \
+  __aarch64_vget_lane_any (v4sf, , , __a, __b)
+#define __aarch64_vgetq_lane_f64(__a, __b) \
+  __aarch64_vget_lane_any (v2df, , , __a, __b)
+
+#define __aarch64_vgetq_lane_p8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
+#define __aarch64_vgetq_lane_p16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
+
+#define __aarch64_vgetq_lane_s8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, , ,__a, __b)
+#define __aarch64_vgetq_lane_s16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, , ,__a, __b)
+#define __aarch64_vgetq_lane_s32(__a, __b) \
+  __aarch64_vget_lane_any (v4si, , ,__a, __b)
+#define __aarch64_vgetq_lane_s64(__a, __b) \
+  __aarch64_vget_lane_any (v2di, , ,__a, __b)
+
+#define __aarch64_vgetq_lane_u8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
+#define __aarch64_vgetq_lane_u16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
+#define __aarch64_vgetq_lane_u32(__a, __b) \
+  __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
+#define __aarch64_vgetq_lane_u64(__a, __b) \
+  __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
+
+/* __aarch64_vdup_lane internal macros.  */
+#define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
+  vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
+
+#define __aarch64_vdup_lane_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, , , __a, __b)
+#define __aarch64_vdup_lane_f64(__a, __b) (__a)
+#define __aarch64_vdup_lane_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, , , __a, __b)
+#define __aarch64_vdup_lane_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, , , __a, __b)
+#define __aarch64_vdup_lane_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, , , __a, __b)
+#define __aarch64_vdup_lane_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, , , __a, __b)
+#define __aarch64_vdup_lane_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, , , __a, __b)
+#define __aarch64_vdup_lane_s64(__a, __b) (__a)
+#define __aarch64_vdup_lane_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, , , __a, __b)
+#define __aarch64_vdup_lane_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, , , __a, __b)
+#define __aarch64_vdup_lane_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, , , __a, __b)
+#define __aarch64_vdup_lane_u64(__a, __b) (__a)
+
+/* __aarch64_vdup_laneq internal macros.  */
+#define __aarch64_vdup_laneq_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, , q, __a, __b)
+#define __aarch64_vdup_laneq_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, , q, __a, __b)
+#define __aarch64_vdup_laneq_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, , q, __a, __b)
+#define __aarch64_vdup_laneq_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, , q, __a, __b)
+#define __aarch64_vdup_laneq_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, , q, __a, __b)
+#define __aarch64_vdup_laneq_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, , q, __a, __b)
+#define __aarch64_vdup_laneq_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, , q, __a, __b)
+#define __aarch64_vdup_laneq_s64(__a, __b) \
+   __aarch64_vdup_lane_any (s64, , q, __a, __b)
+#define __aarch64_vdup_laneq_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, , q, __a, __b)
+#define __aarch64_vdup_laneq_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, , q, __a, __b)
+#define __aarch64_vdup_laneq_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, , q, __a, __b)
+#define __aarch64_vdup_laneq_u64(__a, __b) \
+   __aarch64_vdup_lane_any (u64, , q, __a, __b)
+
+/* __aarch64_vdupq_lane internal macros.  */
+#define __aarch64_vdupq_lane_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, q, , __a, __b)
+#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
+#define __aarch64_vdupq_lane_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, q, , __a, __b)
+#define __aarch64_vdupq_lane_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, q, , __a, __b)
+#define __aarch64_vdupq_lane_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, q, , __a, __b)
+#define __aarch64_vdupq_lane_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, q, , __a, __b)
+#define __aarch64_vdupq_lane_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, q, , __a, __b)
+#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
+#define __aarch64_vdupq_lane_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, q, , __a, __b)
+#define __aarch64_vdupq_lane_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, q, , __a, __b)
+#define __aarch64_vdupq_lane_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, q, , __a, __b)
+#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
+
+/* __aarch64_vdupq_laneq internal macros.  */
+#define __aarch64_vdupq_laneq_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s64(__a, __b) \
+   __aarch64_vdup_lane_any (s64, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u64(__a, __b) \
+   __aarch64_vdup_lane_any (u64, q, q, __a, __b)
+
+/* vadd  */
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vadd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vadd_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vadd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vaddq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vaddq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
+						     (int8x16_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
+						    (int16x8_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddw_s8 (int16x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddw_s16 (int32x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddw_s32 (int64x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
+						   (int8x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
+						     (int8x16_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
+						    (int16x8_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vhadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vhadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vhadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
+						  (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vhaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vhaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vhaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
+						    (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
+						   (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
+						   (int32x4_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrhadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrhadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrhadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
+						    (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
+						    (int32x2_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
+						     (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
+						    (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vaddhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vaddhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vaddhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
+						   (int32x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
+						   (int64x2_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vraddhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vraddhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vraddhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
+						   (int16x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
+						    (int64x2_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
+						    (int16x8_t) __b,
+						    (int16x8_t) __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
+						    (int32x4_t) __b,
+						    (int32x4_t) __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
+						    (int64x2_t) __b,
+						    (int64x2_t) __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
+						     (int16x8_t) __b,
+						     (int16x8_t) __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
+						     (int32x4_t) __b,
+						     (int32x4_t) __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
+						     (int64x2_t) __b,
+						     (int64x2_t) __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdiv_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __a / __b;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdiv_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a / __b;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdivq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __a / __b;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdivq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __a / __b;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmul_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmul_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmul_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmulq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vand_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vand_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vand_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vand_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vand_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vand_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vand_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vand_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vandq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vandq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vandq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vandq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vandq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vandq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vandq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vandq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a & __b;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vorr_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vorr_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vorr_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vorr_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vorr_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vorr_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vorr_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vorr_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vorrq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vorrq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vorrq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vorrq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a | __b;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+veor_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+veor_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+veor_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+veor_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+veor_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+veor_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+veor_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+veor_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+veorq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+veorq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+veorq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+veorq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+veorq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+veorq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+veorq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+veorq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vbic_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vbic_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vbic_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vbic_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vbic_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vbic_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vbic_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vbic_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vbicq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vbicq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vbicq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vbicq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a & ~__b;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vorn_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vorn_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vorn_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vorn_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vorn_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vorn_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vorn_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vorn_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vornq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vornq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vornq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vornq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vornq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vornq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vornq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vornq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a | ~__b;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vsub_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vsub_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsub_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vsubq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vsubq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
+						     (int8x16_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
+						    (int16x8_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubw_s8 (int16x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubw_s16 (int32x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubw_s32 (int64x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
+						   (int8x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
+						     (int8x16_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
+						    (int16x8_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
+						  (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
+						 (int64x1_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
+						    (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
+						   (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
+						   (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
+						   (int64x2_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
+						  (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
+						 (int64x1_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqsubq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
+						    (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
+						   (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
+						   (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
+						   (int64x2_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqneg_s8 (int8x8_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqneg_s16 (int16x4_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqneg_s32 (int32x2_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqnegq_s8 (int8x16_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqnegq_s16 (int16x8_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqnegq_s32 (int32x4_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqabs_s8 (int8x8_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqabs_s16 (int16x4_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqabs_s32 (int32x2_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqabsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqabsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqabsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcreate_s8 (uint64_t __a)
+{
+  return (int8x8_t) __a;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcreate_s16 (uint64_t __a)
+{
+  return (int16x4_t) __a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcreate_s32 (uint64_t __a)
+{
+  return (int32x2_t) __a;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcreate_s64 (uint64_t __a)
+{
+  return (int64x1_t) __a;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcreate_f32 (uint64_t __a)
+{
+  return (float32x2_t) __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcreate_u8 (uint64_t __a)
+{
+  return (uint8x8_t) __a;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcreate_u16 (uint64_t __a)
+{
+  return (uint16x4_t) __a;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcreate_u32 (uint64_t __a)
+{
+  return (uint32x2_t) __a;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcreate_u64 (uint64_t __a)
+{
+  return (uint64x1_t) __a;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcreate_f64 (uint64_t __a)
+{
+  return (float64x1_t) __builtin_aarch64_createdf (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcreate_p8 (uint64_t __a)
+{
+  return (poly8x8_t) __a;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vcreate_p16 (uint64_t __a)
+{
+  return (poly16x4_t) __a;
+}
+
+/* vget_lane  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vget_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vget_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vget_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vget_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vget_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vget_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vget_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vget_lane_s64 (int64x1_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vget_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vget_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vget_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vget_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u64 (__a, __b);
+}
+
+/* vgetq_lane  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vgetq_lane_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vgetq_lane_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vgetq_lane_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vgetq_lane_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vgetq_lane_s8 (int8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vgetq_lane_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vgetq_lane_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vgetq_lane_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vgetq_lane_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vgetq_lane_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vgetq_lane_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vgetq_lane_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u64 (__a, __b);
+}
+
+/* vreinterpret  */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s8 (int8x8_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s16 (int16x4_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s32 (int32x2_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s64 (int64x1_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_f32 (float32x2_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u8 (uint8x8_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u16 (uint16x4_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u32 (uint32x2_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u64 (uint64x1_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_p16 (poly16x4_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s8 (int8x16_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s16 (int16x8_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s32 (int32x4_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s64 (int64x2_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_f32 (float32x4_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u8 (uint8x16_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
+							       __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u16 (uint16x8_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
+							      __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u32 (uint32x4_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
+							      __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u64 (uint64x2_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
+							      __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_p16 (poly16x8_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
+							      __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s8 (int8x8_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s16 (int16x4_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s32 (int32x2_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s64 (int64x1_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_f32 (float32x2_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u8 (uint8x8_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u16 (uint16x4_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u32 (uint32x2_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u64 (uint64x1_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_p8 (poly8x8_t __a)
+{
+  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s8 (int8x16_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s16 (int16x8_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s32 (int32x4_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s64 (int64x2_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_f32 (float32x4_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u8 (uint8x16_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u16 (uint16x8_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u32 (uint32x4_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u64 (uint64x2_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_p8 (poly8x16_t __a)
+{
+  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s8 (int8x8_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s16 (int16x4_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s32 (int32x2_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s64 (int64x1_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u8 (uint8x8_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u16 (uint16x4_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
+							      __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u32 (uint32x2_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
+							      __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u64 (uint64x1_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p8 (poly8x8_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p16 (poly16x4_t __a)
+{
+  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
+							      __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s8 (int8x16_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s16 (int16x8_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s32 (int32x4_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s64 (int64x2_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u8 (uint8x16_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
+							       __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u16 (uint16x8_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
+							      __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u32 (uint32x4_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
+							      __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u64 (uint64x2_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
+							      __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p8 (poly8x16_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
+							       __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p16 (poly16x8_t __a)
+{
+  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
+							      __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s8 (int8x8_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s16 (int16x4_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s32 (int32x2_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_f32 (float32x2_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u8 (uint8x8_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u16 (uint16x4_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u32 (uint32x2_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u64 (uint64x1_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p8 (poly8x8_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p16 (poly16x4_t __a)
+{
+  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s8 (int8x16_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s16 (int16x8_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s32 (int32x4_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_f32 (float32x4_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u8 (uint8x16_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u16 (uint16x8_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u32 (uint32x4_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u64 (uint64x2_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p8 (poly8x16_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p16 (poly16x8_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s8 (int8x8_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s16 (int16x4_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s32 (int32x2_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s64 (int64x1_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_f32 (float32x2_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u8 (uint8x8_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u16 (uint16x4_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u32 (uint32x2_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p8 (poly8x8_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p16 (poly16x4_t __a)
+{
+  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s8 (int8x16_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s16 (int16x8_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s32 (int32x4_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s64 (int64x2_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f32 (float32x4_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u8 (uint8x16_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u16 (uint16x8_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u32 (uint32x4_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p8 (poly8x16_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p16 (poly16x8_t __a)
+{
+  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s16 (int16x4_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s32 (int32x2_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s64 (int64x1_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_f32 (float32x2_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u8 (uint8x8_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u16 (uint16x4_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u32 (uint32x2_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u64 (uint64x1_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p8 (poly8x8_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p16 (poly16x4_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s16 (int16x8_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s32 (int32x4_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s64 (int64x2_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_f32 (float32x4_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u8 (uint8x16_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u16 (uint16x8_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u32 (uint32x4_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u64 (uint64x2_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p8 (poly8x16_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p16 (poly16x8_t __a)
+{
+  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s8 (int8x8_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s32 (int32x2_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s64 (int64x1_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_f32 (float32x2_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u8 (uint8x8_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u16 (uint16x4_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u32 (uint32x2_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u64 (uint64x1_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p8 (poly8x8_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p16 (poly16x4_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s8 (int8x16_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s32 (int32x4_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s64 (int64x2_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_f32 (float32x4_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u8 (uint8x16_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u16 (uint16x8_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u32 (uint32x4_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u64 (uint64x2_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p8 (poly8x16_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p16 (poly16x8_t __a)
+{
+  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s8 (int8x8_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s16 (int16x4_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s64 (int64x1_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_f32 (float32x2_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u8 (uint8x8_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u16 (uint16x4_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u32 (uint32x2_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u64 (uint64x1_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p8 (poly8x8_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p16 (poly16x4_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s8 (int8x16_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s16 (int16x8_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s64 (int64x2_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_f32 (float32x4_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u8 (uint8x16_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u16 (uint16x8_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u32 (uint32x4_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u64 (uint64x2_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p8 (poly8x16_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p16 (poly16x8_t __a)
+{
+  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s8 (int8x8_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s16 (int16x4_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s32 (int32x2_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s64 (int64x1_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_f32 (float32x2_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u16 (uint16x4_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u32 (uint32x2_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u64 (uint64x1_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p8 (poly8x8_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p16 (poly16x4_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s8 (int8x16_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s16 (int16x8_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s32 (int32x4_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s64 (int64x2_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_f32 (float32x4_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u16 (uint16x8_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
+							      __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u32 (uint32x4_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
+							      __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u64 (uint64x2_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
+							      __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p8 (poly8x16_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
+							       __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p16 (poly16x8_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
+							      __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s8 (int8x8_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s16 (int16x4_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s32 (int32x2_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s64 (int64x1_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_f32 (float32x2_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u8 (uint8x8_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u32 (uint32x2_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u64 (uint64x1_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p8 (poly8x8_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p16 (poly16x4_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s8 (int8x16_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s16 (int16x8_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s32 (int32x4_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s64 (int64x2_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_f32 (float32x4_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u8 (uint8x16_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u32 (uint32x4_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u64 (uint64x2_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p8 (poly8x16_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p16 (poly16x8_t __a)
+{
+  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s8 (int8x8_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s16 (int16x4_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s32 (int32x2_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s64 (int64x1_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_f32 (float32x2_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u8 (uint8x8_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u16 (uint16x4_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u64 (uint64x1_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p8 (poly8x8_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p16 (poly16x4_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s8 (int8x16_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s16 (int16x8_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s32 (int32x4_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s64 (int64x2_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_f32 (float32x4_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u8 (uint8x16_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u16 (uint16x8_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u64 (uint64x2_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p8 (poly8x16_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
+							      __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p16 (poly16x8_t __a)
+{
+  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+#define __GET_LOW(__TYPE) \
+  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
+  uint64_t lo = vgetq_lane_u64 (tmp, 0);  \
+  return vreinterpret_##__TYPE##_u64 (lo);
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_low_f32 (float32x4_t __a)
+{
+  __GET_LOW (f32);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vget_low_f64 (float64x2_t __a)
+{
+  return vgetq_lane_f64 (__a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_low_p8 (poly8x16_t __a)
+{
+  __GET_LOW (p8);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_low_p16 (poly16x8_t __a)
+{
+  __GET_LOW (p16);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_low_s8 (int8x16_t __a)
+{
+  __GET_LOW (s8);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_low_s16 (int16x8_t __a)
+{
+  __GET_LOW (s16);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_low_s32 (int32x4_t __a)
+{
+  __GET_LOW (s32);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_low_s64 (int64x2_t __a)
+{
+  return vgetq_lane_s64 (__a, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_low_u8 (uint8x16_t __a)
+{
+  __GET_LOW (u8);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_low_u16 (uint16x8_t __a)
+{
+  __GET_LOW (u16);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_low_u32 (uint32x4_t __a)
+{
+  __GET_LOW (u32);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_low_u64 (uint64x2_t __a)
+{
+  return vgetq_lane_u64 (__a, 0);
+}
+
+#undef __GET_LOW
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcombine_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcombine_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcombine_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcombine_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcombine_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
+						     (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
+						     (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
+						     (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
+						   (int64x1_t) __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcombine_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
+						     (int8x8_t) __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
+						     (int16x4_t) __b);
+}
+
+/* Start of temporary inline asm implementations.  */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
+{
+  int8x8_t result;
+  __asm__ ("saba %0.8b,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
+{
+  int16x4_t result;
+  __asm__ ("saba %0.4h,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
+{
+  int32x2_t result;
+  __asm__ ("saba %0.2s,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
+{
+  uint8x8_t result;
+  __asm__ ("uaba %0.8b,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
+{
+  uint16x4_t result;
+  __asm__ ("uaba %0.4h,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
+{
+  uint32x2_t result;
+  __asm__ ("uaba %0.2s,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
+{
+  int16x8_t result;
+  __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+{
+  int32x4_t result;
+  __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+{
+  int64x2_t result;
+  __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
+{
+  uint16x8_t result;
+  __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint32x4_t result;
+  __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint64x2_t result;
+  __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
+{
+  int16x8_t result;
+  __asm__ ("sabal %0.8h,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+{
+  int32x4_t result;
+  __asm__ ("sabal %0.4s,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+{
+  int64x2_t result;
+  __asm__ ("sabal %0.2d,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
+{
+  uint16x8_t result;
+  __asm__ ("uabal %0.8h,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
+{
+  uint32x4_t result;
+  __asm__ ("uabal %0.4s,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
+{
+  uint64x2_t result;
+  __asm__ ("uabal %0.2d,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
+{
+  int8x16_t result;
+  __asm__ ("saba %0.16b,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
+{
+  int16x8_t result;
+  __asm__ ("saba %0.8h,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
+{
+  int32x4_t result;
+  __asm__ ("saba %0.4s,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+{
+  uint8x16_t result;
+  __asm__ ("uaba %0.16b,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint16x8_t result;
+  __asm__ ("uaba %0.8h,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint32x4_t result;
+  __asm__ ("uaba %0.4s,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vabd_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("fabd %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabd_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("sabd %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabd_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("sabd %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabd_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("sabd %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vabd_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("uabd %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vabd_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("uabd %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vabd_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("uabd %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vabdd_f64 (float64_t a, float64_t b)
+{
+  float64_t result;
+  __asm__ ("fabd %d0, %d1, %d2"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdl_high_s8 (int8x16_t a, int8x16_t b)
+{
+  int16x8_t result;
+  __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdl_high_s16 (int16x8_t a, int16x8_t b)
+{
+  int32x4_t result;
+  __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabdl_high_s32 (int32x4_t a, int32x4_t b)
+{
+  int64x2_t result;
+  __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint64x2_t result;
+  __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdl_s8 (int8x8_t a, int8x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdl_s16 (int16x4_t a, int16x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabdl_s32 (int32x2_t a, int32x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdl_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdl_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabdl_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vabdq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("fabd %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vabdq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("fabd %0.2d, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabdq_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("sabd %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdq_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("sabd %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdq_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("sabd %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vabdq_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("uabd %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdq_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uabd %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdq_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uabd %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vabds_f32 (float32_t a, float32_t b)
+{
+  float32_t result;
+  __asm__ ("fabd %s0, %s1, %s2"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vaddlv_s8 (int8x8_t a)
+{
+  int16_t result;
+  __asm__ ("saddlv %h0,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddlv_s16 (int16x4_t a)
+{
+  int32_t result;
+  __asm__ ("saddlv %s0,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vaddlv_u8 (uint8x8_t a)
+{
+  uint16_t result;
+  __asm__ ("uaddlv %h0,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddlv_u16 (uint16x4_t a)
+{
+  uint32_t result;
+  __asm__ ("uaddlv %s0,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vaddlvq_s8 (int8x16_t a)
+{
+  int16_t result;
+  __asm__ ("saddlv %h0,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddlvq_s16 (int16x8_t a)
+{
+  int32_t result;
+  __asm__ ("saddlv %s0,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vaddlvq_s32 (int32x4_t a)
+{
+  int64_t result;
+  __asm__ ("saddlv %d0,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vaddlvq_u8 (uint8x16_t a)
+{
+  uint16_t result;
+  __asm__ ("uaddlv %h0,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddlvq_u16 (uint16x8_t a)
+{
+  uint32_t result;
+  __asm__ ("uaddlv %s0,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vaddlvq_u32 (uint32x4_t a)
+{
+  uint64_t result;
+  __asm__ ("uaddlv %d0,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcls_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("cls %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcls_s16 (int16x4_t a)
+{
+  int16x4_t result;
+  __asm__ ("cls %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcls_s32 (int32x2_t a)
+{
+  int32x2_t result;
+  __asm__ ("cls %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclsq_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("cls %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclsq_s16 (int16x8_t a)
+{
+  int16x8_t result;
+  __asm__ ("cls %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclsq_s32 (int32x4_t a)
+{
+  int32x4_t result;
+  __asm__ ("cls %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcnt_p8 (poly8x8_t a)
+{
+  poly8x8_t result;
+  __asm__ ("cnt %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcnt_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("cnt %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcnt_u8 (uint8x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("cnt %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcntq_p8 (poly8x16_t a)
+{
+  poly8x16_t result;
+  __asm__ ("cnt %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcntq_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("cnt %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcntq_u8 (uint8x16_t a)
+{
+  uint8x16_t result;
+  __asm__ ("cnt %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vcopyq_lane_f32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t c_ = (c);                                            \
+       float32x4_t a_ = (a);                                            \
+       float32x4_t result;                                              \
+       __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_f64(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t c_ = (c);                                            \
+       float64x2_t a_ = (a);                                            \
+       float64x2_t result;                                              \
+       __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_p8(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t c_ = (c);                                             \
+       poly8x16_t a_ = (a);                                             \
+       poly8x16_t result;                                               \
+       __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_p16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t c_ = (c);                                             \
+       poly16x8_t a_ = (a);                                             \
+       poly16x8_t result;                                               \
+       __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_s8(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x16_t c_ = (c);                                              \
+       int8x16_t a_ = (a);                                              \
+       int8x16_t result;                                                \
+       __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_s16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x8_t a_ = (a);                                              \
+       int16x8_t result;                                                \
+       __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_s32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_s64(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t c_ = (c);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_u8(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x16_t c_ = (c);                                             \
+       uint8x16_t a_ = (a);                                             \
+       uint8x16_t result;                                               \
+       __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_u16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x8_t a_ = (a);                                             \
+       uint16x8_t result;                                               \
+       __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_u32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcopyq_lane_u64(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t c_ = (c);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
+                : "=w"(result)                                          \
+                : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+/* vcvt_f16_f32 not supported */
+
+/* vcvt_f32_f16 not supported */
+
+/* vcvt_high_f16_f32 not supported */
+
+/* vcvt_high_f32_f16 not supported */
+
+static float32x2_t vdup_n_f32 (float32_t);
+
+#define vcvt_n_f32_s32(a, b)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t a_ = (a);                                              \
+       float32x2_t result;                                              \
+       __asm__ ("scvtf %0.2s, %1.2s, #%2"                               \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvt_n_f32_u32(a, b)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t a_ = (a);                                             \
+       float32x2_t result;                                              \
+       __asm__ ("ucvtf %0.2s, %1.2s, #%2"                               \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvt_n_s32_f32(a, b)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x2_t a_ = (a);                                            \
+       int32x2_t result;                                                \
+       __asm__ ("fcvtzs %0.2s, %1.2s, #%2"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvt_n_u32_f32(a, b)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x2_t a_ = (a);                                            \
+       uint32x2_t result;                                               \
+       __asm__ ("fcvtzu %0.2s, %1.2s, #%2"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtd_n_f64_s64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int64_t a_ = (a);                                                \
+       float64_t result;                                                \
+       __asm__ ("scvtf %d0,%d1,%2"                                      \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtd_n_f64_u64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64_t a_ = (a);                                               \
+       float64_t result;                                                \
+       __asm__ ("ucvtf %d0,%d1,%2"                                      \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtd_n_s64_f64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float64_t a_ = (a);                                              \
+       int64_t result;                                                  \
+       __asm__ ("fcvtzs %d0,%d1,%2"                                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtd_n_u64_f64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float64_t a_ = (a);                                              \
+       uint64_t result;                                                 \
+       __asm__ ("fcvtzu %d0,%d1,%2"                                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_f32_s32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t a_ = (a);                                              \
+       float32x4_t result;                                              \
+       __asm__ ("scvtf %0.4s, %1.4s, #%2"                               \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_f32_u32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t a_ = (a);                                             \
+       float32x4_t result;                                              \
+       __asm__ ("ucvtf %0.4s, %1.4s, #%2"                               \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_f64_s64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t a_ = (a);                                              \
+       float64x2_t result;                                              \
+       __asm__ ("scvtf %0.2d, %1.2d, #%2"                               \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_f64_u64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t a_ = (a);                                             \
+       float64x2_t result;                                              \
+       __asm__ ("ucvtf %0.2d, %1.2d, #%2"                               \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_s32_f32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t a_ = (a);                                            \
+       int32x4_t result;                                                \
+       __asm__ ("fcvtzs %0.4s, %1.4s, #%2"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_s64_f64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t a_ = (a);                                            \
+       int64x2_t result;                                                \
+       __asm__ ("fcvtzs %0.2d, %1.2d, #%2"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_u32_f32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t a_ = (a);                                            \
+       uint32x4_t result;                                               \
+       __asm__ ("fcvtzu %0.4s, %1.4s, #%2"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvtq_n_u64_f64(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t a_ = (a);                                            \
+       uint64x2_t result;                                               \
+       __asm__ ("fcvtzu %0.2d, %1.2d, #%2"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvts_n_f32_s32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int32_t a_ = (a);                                                \
+       float32_t result;                                                \
+       __asm__ ("scvtf %s0,%s1,%2"                                      \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvts_n_f32_u32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32_t a_ = (a);                                               \
+       float32_t result;                                                \
+       __asm__ ("ucvtf %s0,%s1,%2"                                      \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvts_n_s32_f32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float32_t a_ = (a);                                              \
+       int32_t result;                                                  \
+       __asm__ ("fcvtzs %s0,%s1,%2"                                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vcvts_n_u32_f32(a, b)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       float32_t a_ = (a);                                              \
+       uint32_t result;                                                 \
+       __asm__ ("fcvtzu %s0,%s1,%2"                                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvtx_f32_f64 (float64x2_t a)
+{
+  float32x2_t result;
+  __asm__ ("fcvtxn %0.2s,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
+{
+  float32x4_t result;
+  __asm__ ("fcvtxn2 %0.4s,%1.2d"
+           : "=w"(result)
+           : "w" (b), "0"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vcvtxd_f32_f64 (float64_t a)
+{
+  float32_t result;
+  __asm__ ("fcvtxn %s0,%d1"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vext_f32(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x2_t b_ = (b);                                            \
+       float32x2_t a_ = (a);                                            \
+       float32x2_t result;                                              \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_f64(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x1_t b_ = (b);                                            \
+       float64x1_t a_ = (a);                                            \
+       float64x1_t result;                                              \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_p8(a, b, c)                                                \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8x8_t a_ = (a);                                              \
+       poly8x8_t result;                                                \
+       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_p16(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16x4_t a_ = (a);                                             \
+       poly16x4_t result;                                               \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_s8(a, b, c)                                                \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x8_t b_ = (b);                                               \
+       int8x8_t a_ = (a);                                               \
+       int8x8_t result;                                                 \
+       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_s16(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int16x4_t result;                                                \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_s32(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int32x2_t result;                                                \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_s64(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x1_t b_ = (b);                                              \
+       int64x1_t a_ = (a);                                              \
+       int64x1_t result;                                                \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_u8(a, b, c)                                                \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x8_t b_ = (b);                                              \
+       uint8x8_t a_ = (a);                                              \
+       uint8x8_t result;                                                \
+       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_u16(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint16x4_t result;                                               \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_u32(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint32x2_t result;                                               \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vext_u64(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x1_t b_ = (b);                                             \
+       uint64x1_t a_ = (a);                                             \
+       uint64x1_t result;                                               \
+       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_f32(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       float32x4_t a_ = (a);                                            \
+       float32x4_t result;                                              \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_f64(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t b_ = (b);                                            \
+       float64x2_t a_ = (a);                                            \
+       float64x2_t result;                                              \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_p8(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8x16_t a_ = (a);                                             \
+       poly8x16_t result;                                               \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_p16(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16x8_t a_ = (a);                                             \
+       poly16x8_t result;                                               \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_s8(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x16_t b_ = (b);                                              \
+       int8x16_t a_ = (a);                                              \
+       int8x16_t result;                                                \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_s16(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int16x8_t a_ = (a);                                              \
+       int16x8_t result;                                                \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_s32(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_s64(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_u8(a, b, c)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x16_t b_ = (b);                                             \
+       uint8x16_t a_ = (a);                                             \
+       uint8x16_t result;                                               \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_u16(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint16x8_t a_ = (a);                                             \
+       uint16x8_t result;                                               \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_u32(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vextq_u64(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
+{
+  float32x2_t result;
+  __asm__ ("fmla %0.2s,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
+{
+  float32x4_t result;
+  __asm__ ("fmla %0.4s,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+{
+  float64x2_t result;
+  __asm__ ("fmla %0.2d,%2.2d,%3.2d"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
+{
+  float32x2_t result;
+  __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
+{
+  float32x4_t result;
+  __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
+{
+  float64x2_t result;
+  __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
+{
+  float32x2_t result;
+  __asm__ ("fmls %0.2s,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
+{
+  float32x4_t result;
+  __asm__ ("fmls %0.4s,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+{
+  float64x2_t result;
+  __asm__ ("fmls %0.2d,%2.2d,%3.2d"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_high_f32 (float32x4_t a)
+{
+  float32x2_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vget_high_f64 (float64x2_t a)
+{
+  float64x1_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_high_p8 (poly8x16_t a)
+{
+  poly8x8_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_high_p16 (poly16x8_t a)
+{
+  poly16x4_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_high_s8 (int8x16_t a)
+{
+  int8x8_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_high_s16 (int16x8_t a)
+{
+  int16x4_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_high_s32 (int32x4_t a)
+{
+  int32x2_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_high_s64 (int64x2_t a)
+{
+  int64x1_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_high_u8 (uint8x16_t a)
+{
+  uint8x8_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_high_u16 (uint16x8_t a)
+{
+  uint16x4_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_high_u32 (uint32x4_t a)
+{
+  uint32x2_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_high_u64 (uint64x2_t a)
+{
+  uint64x1_t result;
+  __asm__ ("ins %0.d[0], %1.d[1]"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vhsub_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("shsub %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vhsub_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("shsub %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vhsub_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("shsub %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vhsub_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vhsub_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vhsub_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vhsubq_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("shsub %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vhsubq_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("shsub %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vhsubq_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("shsub %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vhsubq_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vhsubq_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vhsubq_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_dup_f32 (const float32_t * a)
+{
+  float32x2_t result;
+  __asm__ ("ld1r {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vld1_dup_f64 (const float64_t * a)
+{
+  float64x1_t result;
+  __asm__ ("ld1r {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_dup_p8 (const poly8_t * a)
+{
+  poly8x8_t result;
+  __asm__ ("ld1r {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_dup_p16 (const poly16_t * a)
+{
+  poly16x4_t result;
+  __asm__ ("ld1r {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_dup_s8 (const int8_t * a)
+{
+  int8x8_t result;
+  __asm__ ("ld1r {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_dup_s16 (const int16_t * a)
+{
+  int16x4_t result;
+  __asm__ ("ld1r {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_dup_s32 (const int32_t * a)
+{
+  int32x2_t result;
+  __asm__ ("ld1r {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_dup_s64 (const int64_t * a)
+{
+  int64x1_t result;
+  __asm__ ("ld1r {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_dup_u8 (const uint8_t * a)
+{
+  uint8x8_t result;
+  __asm__ ("ld1r {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_dup_u16 (const uint16_t * a)
+{
+  uint16x4_t result;
+  __asm__ ("ld1r {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_dup_u32 (const uint32_t * a)
+{
+  uint32x2_t result;
+  __asm__ ("ld1r {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_dup_u64 (const uint64_t * a)
+{
+  uint64x1_t result;
+  __asm__ ("ld1r {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+#define vld1_lane_f32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x2_t b_ = (b);                                            \
+       const float32_t * a_ = (a);                                      \
+       float32x2_t result;                                              \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_f64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x1_t b_ = (b);                                            \
+       const float64_t * a_ = (a);                                      \
+       float64x1_t result;                                              \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_p8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       const poly8_t * a_ = (a);                                        \
+       poly8x8_t result;                                                \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_p16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       const poly16_t * a_ = (a);                                       \
+       poly16x4_t result;                                               \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_s8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x8_t b_ = (b);                                               \
+       const int8_t * a_ = (a);                                         \
+       int8x8_t result;                                                 \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_s16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t b_ = (b);                                              \
+       const int16_t * a_ = (a);                                        \
+       int16x4_t result;                                                \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_s32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t b_ = (b);                                              \
+       const int32_t * a_ = (a);                                        \
+       int32x2_t result;                                                \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_s64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x1_t b_ = (b);                                              \
+       const int64_t * a_ = (a);                                        \
+       int64x1_t result;                                                \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_u8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x8_t b_ = (b);                                              \
+       const uint8_t * a_ = (a);                                        \
+       uint8x8_t result;                                                \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_u16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t b_ = (b);                                             \
+       const uint16_t * a_ = (a);                                       \
+       uint16x4_t result;                                               \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_u32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t b_ = (b);                                             \
+       const uint32_t * a_ = (a);                                       \
+       uint32x2_t result;                                               \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1_lane_u64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x1_t b_ = (b);                                             \
+       const uint64_t * a_ = (a);                                       \
+       uint64x1_t result;                                               \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_f32 (const float32_t * a)
+{
+  float32x4_t result;
+  __asm__ ("ld1r {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_f64 (const float64_t * a)
+{
+  float64x2_t result;
+  __asm__ ("ld1r {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_p8 (const poly8_t * a)
+{
+  poly8x16_t result;
+  __asm__ ("ld1r {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_p16 (const poly16_t * a)
+{
+  poly16x8_t result;
+  __asm__ ("ld1r {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_s8 (const int8_t * a)
+{
+  int8x16_t result;
+  __asm__ ("ld1r {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_s16 (const int16_t * a)
+{
+  int16x8_t result;
+  __asm__ ("ld1r {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_s32 (const int32_t * a)
+{
+  int32x4_t result;
+  __asm__ ("ld1r {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_s64 (const int64_t * a)
+{
+  int64x2_t result;
+  __asm__ ("ld1r {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_u8 (const uint8_t * a)
+{
+  uint8x16_t result;
+  __asm__ ("ld1r {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_u16 (const uint16_t * a)
+{
+  uint16x8_t result;
+  __asm__ ("ld1r {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_u32 (const uint32_t * a)
+{
+  uint32x4_t result;
+  __asm__ ("ld1r {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_u64 (const uint64_t * a)
+{
+  uint64x2_t result;
+  __asm__ ("ld1r {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
+  return result;
+}
+
+#define vld1q_lane_f32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       const float32_t * a_ = (a);                                      \
+       float32x4_t result;                                              \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_f64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t b_ = (b);                                            \
+       const float64_t * a_ = (a);                                      \
+       float64x2_t result;                                              \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_p8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       const poly8_t * a_ = (a);                                        \
+       poly8x16_t result;                                               \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_p16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       const poly16_t * a_ = (a);                                       \
+       poly16x8_t result;                                               \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_s8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x16_t b_ = (b);                                              \
+       const int8_t * a_ = (a);                                         \
+       int8x16_t result;                                                \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_s16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       const int16_t * a_ = (a);                                        \
+       int16x8_t result;                                                \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_s32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       const int32_t * a_ = (a);                                        \
+       int32x4_t result;                                                \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_s64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       const int64_t * a_ = (a);                                        \
+       int64x2_t result;                                                \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_u8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x16_t b_ = (b);                                             \
+       const uint8_t * a_ = (a);                                        \
+       uint8x16_t result;                                               \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_u16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       const uint16_t * a_ = (a);                                       \
+       uint16x8_t result;                                               \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_u32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       const uint32_t * a_ = (a);                                       \
+       uint32x4_t result;                                               \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vld1q_lane_u64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       const uint64_t * a_ = (a);                                       \
+       uint64x2_t result;                                               \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
+                : "=w"(result)                                          \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
+{
+  float32x2_t result;
+  float32x2_t t1;
+  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
+           : "=w"(result), "=w"(t1)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
+{
+  int16x4_t result;
+  __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
+{
+  int32x2_t result;
+  __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
+{
+  uint16x4_t result;
+  __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
+{
+  uint32x2_t result;
+  __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
+{
+  int8x8_t result;
+  __asm__ ("mla %0.8b, %2.8b, %3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
+{
+  int16x4_t result;
+  __asm__ ("mla %0.4h, %2.4h, %3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
+{
+  int32x2_t result;
+  __asm__ ("mla %0.2s, %2.2s, %3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
+{
+  uint8x8_t result;
+  __asm__ ("mla %0.8b, %2.8b, %3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
+{
+  uint16x4_t result;
+  __asm__ ("mla %0.4h, %2.4h, %3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
+{
+  uint32x2_t result;
+  __asm__ ("mla %0.2s, %2.2s, %3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmlal_high_lane_s16(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x8_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_lane_s32(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x4_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_lane_u16(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x8_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_lane_u32(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x4_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_laneq_s16(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x8_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_laneq_s32(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x4_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_laneq_u16(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x8_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_high_laneq_u32(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x4_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
+{
+  int16x8_t result;
+  __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
+{
+  uint16x8_t result;
+  __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmlal_lane_s16(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t c_ = (c);                                              \
+       int16x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_lane_s32(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t c_ = (c);                                              \
+       int32x2_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]"                            \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_lane_u16(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t c_ = (c);                                             \
+       uint16x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_lane_u32(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t c_ = (c);                                             \
+       uint32x2_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_laneq_s16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_laneq_s32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x2_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_laneq_u16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlal_laneq_u32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x2_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
+{
+  int16x8_t result;
+  __asm__ ("smlal %0.8h,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlal %0.4s,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlal %0.2d,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
+{
+  uint16x8_t result;
+  __asm__ ("umlal %0.8h,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlal %0.4s,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlal %0.2d,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
+{
+  float32x4_t result;
+  float32x4_t t1;
+  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
+           : "=w"(result), "=w"(t1)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
+{
+  float64x2_t result;
+  float64x2_t t1;
+  __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
+           : "=w"(result), "=w"(t1)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
+{
+  int16x8_t result;
+  __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
+{
+  int32x4_t result;
+  __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
+{
+  uint16x8_t result;
+  __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
+{
+  uint32x4_t result;
+  __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
+{
+  int8x16_t result;
+  __asm__ ("mla %0.16b, %2.16b, %3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
+{
+  int16x8_t result;
+  __asm__ ("mla %0.8h, %2.8h, %3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
+{
+  int32x4_t result;
+  __asm__ ("mla %0.4s, %2.4s, %3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+{
+  uint8x16_t result;
+  __asm__ ("mla %0.16b, %2.16b, %3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint16x8_t result;
+  __asm__ ("mla %0.8h, %2.8h, %3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint32x4_t result;
+  __asm__ ("mla %0.4s, %2.4s, %3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
+{
+  float32x2_t result;
+  float32x2_t t1;
+  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
+           : "=w"(result), "=w"(t1)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
+{
+  int16x4_t result;
+  __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
+{
+  int32x2_t result;
+  __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
+{
+  uint16x4_t result;
+  __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
+{
+  uint32x2_t result;
+  __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
+{
+  int8x8_t result;
+  __asm__ ("mls %0.8b,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
+{
+  int16x4_t result;
+  __asm__ ("mls %0.4h,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
+{
+  int32x2_t result;
+  __asm__ ("mls %0.2s,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
+{
+  uint8x8_t result;
+  __asm__ ("mls %0.8b,%2.8b,%3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
+{
+  uint16x4_t result;
+  __asm__ ("mls %0.4h,%2.4h,%3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
+{
+  uint32x2_t result;
+  __asm__ ("mls %0.2s,%2.2s,%3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmlsl_high_lane_s16(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x8_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_lane_s32(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x4_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_lane_u16(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x8_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_lane_u32(a, b, c, d)                                 \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x4_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_laneq_s16(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x8_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_laneq_s32(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x4_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_laneq_u16(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x8_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_high_laneq_u32(a, b, c, d)                                \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x4_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
+{
+  int16x8_t result;
+  __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
+{
+  uint16x8_t result;
+  __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmlsl_lane_s16(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t c_ = (c);                                              \
+       int16x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_lane_s32(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t c_ = (c);                                              \
+       int32x2_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_lane_u16(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t c_ = (c);                                             \
+       uint16x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_lane_u32(a, b, c, d)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t c_ = (c);                                             \
+       uint32x2_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_laneq_s16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t c_ = (c);                                              \
+       int16x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_laneq_s32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t c_ = (c);                                              \
+       int32x2_t b_ = (b);                                              \
+       int64x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_laneq_u16(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t c_ = (c);                                             \
+       uint16x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmlsl_laneq_u32(a, b, c, d)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t c_ = (c);                                             \
+       uint32x2_t b_ = (b);                                             \
+       uint64x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
+{
+  int16x8_t result;
+  __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+{
+  int32x4_t result;
+  __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+{
+  int64x2_t result;
+  __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
+{
+  uint16x8_t result;
+  __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
+{
+  uint32x4_t result;
+  __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
+{
+  uint64x2_t result;
+  __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
+{
+  float32x4_t result;
+  float32x4_t t1;
+  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
+           : "=w"(result), "=w"(t1)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
+{
+  float64x2_t result;
+  float64x2_t t1;
+  __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
+           : "=w"(result), "=w"(t1)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
+{
+  int16x8_t result;
+  __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
+{
+  int32x4_t result;
+  __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
+{
+  uint16x8_t result;
+  __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "x"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
+{
+  uint32x4_t result;
+  __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
+{
+  int8x16_t result;
+  __asm__ ("mls %0.16b,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
+{
+  int16x8_t result;
+  __asm__ ("mls %0.8h,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
+{
+  int32x4_t result;
+  __asm__ ("mls %0.4s,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+{
+  uint8x16_t result;
+  __asm__ ("mls %0.16b,%2.16b,%3.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint16x8_t result;
+  __asm__ ("mls %0.8h,%2.8h,%3.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint32x4_t result;
+  __asm__ ("mls %0.4s,%2.4s,%3.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovl_high_s8 (int8x16_t a)
+{
+  int16x8_t result;
+  __asm__ ("sshll2 %0.8h,%1.16b,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovl_high_s16 (int16x8_t a)
+{
+  int32x4_t result;
+  __asm__ ("sshll2 %0.4s,%1.8h,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovl_high_s32 (int32x4_t a)
+{
+  int64x2_t result;
+  __asm__ ("sshll2 %0.2d,%1.4s,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovl_high_u8 (uint8x16_t a)
+{
+  uint16x8_t result;
+  __asm__ ("ushll2 %0.8h,%1.16b,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovl_high_u16 (uint16x8_t a)
+{
+  uint32x4_t result;
+  __asm__ ("ushll2 %0.4s,%1.8h,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovl_high_u32 (uint32x4_t a)
+{
+  uint64x2_t result;
+  __asm__ ("ushll2 %0.2d,%1.4s,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovl_s8 (int8x8_t a)
+{
+  int16x8_t result;
+  __asm__ ("sshll %0.8h,%1.8b,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovl_s16 (int16x4_t a)
+{
+  int32x4_t result;
+  __asm__ ("sshll %0.4s,%1.4h,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovl_s32 (int32x2_t a)
+{
+  int64x2_t result;
+  __asm__ ("sshll %0.2d,%1.2s,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovl_u8 (uint8x8_t a)
+{
+  uint16x8_t result;
+  __asm__ ("ushll %0.8h,%1.8b,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovl_u16 (uint16x4_t a)
+{
+  uint32x4_t result;
+  __asm__ ("ushll %0.4s,%1.4h,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovl_u32 (uint32x2_t a)
+{
+  uint64x2_t result;
+  __asm__ ("ushll %0.2d,%1.2s,#0"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmovn_high_s16 (int8x8_t a, int16x8_t b)
+{
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("xtn2 %0.16b,%1.8h"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovn_high_s32 (int16x4_t a, int32x4_t b)
+{
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("xtn2 %0.8h,%1.4s"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovn_high_s64 (int32x2_t a, int64x2_t b)
+{
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("xtn2 %0.4s,%1.2d"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
+{
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("xtn2 %0.16b,%1.8h"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
+{
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("xtn2 %0.8h,%1.4s"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
+{
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("xtn2 %0.4s,%1.2d"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmovn_s16 (int16x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("xtn %0.8b,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmovn_s32 (int32x4_t a)
+{
+  int16x4_t result;
+  __asm__ ("xtn %0.4h,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmovn_s64 (int64x2_t a)
+{
+  int32x2_t result;
+  __asm__ ("xtn %0.2s,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmovn_u16 (uint16x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("xtn %0.8b,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmovn_u32 (uint32x4_t a)
+{
+  uint16x4_t result;
+  __asm__ ("xtn %0.4h,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmovn_u64 (uint64x2_t a)
+{
+  uint32x2_t result;
+  __asm__ ("xtn %0.2s,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_n_f32 (float32x2_t a, float32_t b)
+{
+  float32x2_t result;
+  __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_n_s16 (int16x4_t a, int16_t b)
+{
+  int16x4_t result;
+  __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_n_s32 (int32x2_t a, int32_t b)
+{
+  int32x2_t result;
+  __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_n_u16 (uint16x4_t a, uint16_t b)
+{
+  uint16x4_t result;
+  __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_n_u32 (uint32x2_t a, uint32_t b)
+{
+  uint32x2_t result;
+  __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmuld_lane_f64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t b_ = (b);                                            \
+       float64_t a_ = (a);                                              \
+       float64_t result;                                                \
+       __asm__ ("fmul %d0,%d1,%2.d[%3]"                                 \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_lane_s16(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int16x8_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_lane_s32(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_lane_u16(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint16x8_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_lane_u32(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_laneq_s16(a, b, c)                                   \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int16x8_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_laneq_s32(a, b, c)                                   \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int32x4_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_laneq_u16(a, b, c)                                   \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint16x8_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_high_laneq_u32(a, b, c)                                   \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint32x4_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_high_n_s16 (int16x8_t a, int16_t b)
+{
+  int32x4_t result;
+  __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_high_n_s32 (int32x4_t a, int32_t b)
+{
+  int64x2_t result;
+  __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_high_n_u16 (uint16x8_t a, uint16_t b)
+{
+  uint32x4_t result;
+  __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_high_n_u32 (uint32x4_t a, uint32_t b)
+{
+  uint64x2_t result;
+  __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmull_high_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly16x8_t result;
+  __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmull_high_s8 (int8x16_t a, int8x16_t b)
+{
+  int16x8_t result;
+  __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_high_s16 (int16x8_t a, int16x8_t b)
+{
+  int32x4_t result;
+  __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_high_s32 (int32x4_t a, int32x4_t b)
+{
+  int64x2_t result;
+  __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmull_high_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint16x8_t result;
+  __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_high_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint32x4_t result;
+  __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_high_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint64x2_t result;
+  __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmull_lane_s16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_lane_s32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smull %0.2d,%1.2s,%2.s[%3]"                            \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_lane_u16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_lane_u32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_laneq_s16(a, b, c)                                        \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int32x4_t result;                                                \
+       __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_laneq_s32(a, b, c)                                        \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int64x2_t result;                                                \
+       __asm__ ("smull %0.2d, %1.2s, %2.s[%3]"                          \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_laneq_u16(a, b, c)                                        \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint32x4_t result;                                               \
+       __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
+                : "=w"(result)                                          \
+                : "w"(a_), "x"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmull_laneq_u32(a, b, c)                                        \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint64x2_t result;                                               \
+       __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_n_s16 (int16x4_t a, int16_t b)
+{
+  int32x4_t result;
+  __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_n_s32 (int32x2_t a, int32_t b)
+{
+  int64x2_t result;
+  __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_n_u16 (uint16x4_t a, uint16_t b)
+{
+  uint32x4_t result;
+  __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_n_u32 (uint32x2_t a, uint32_t b)
+{
+  uint64x2_t result;
+  __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmull_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("pmull %0.8h, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmull_s8 (int8x8_t a, int8x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("smull %0.8h, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_s16 (int16x4_t a, int16x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("smull %0.4s, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_s32 (int32x2_t a, int32x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("smull %0.2d, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmull_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("umull %0.8h, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("umull %0.4s, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("umull %0.2d, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_n_f32 (float32x4_t a, float32_t b)
+{
+  float32x4_t result;
+  __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_n_f64 (float64x2_t a, float64_t b)
+{
+  float64x2_t result;
+  __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_n_s16 (int16x8_t a, int16_t b)
+{
+  int16x8_t result;
+  __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_n_s32 (int32x4_t a, int32_t b)
+{
+  int32x4_t result;
+  __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_n_u16 (uint16x8_t a, uint16_t b)
+{
+  uint16x8_t result;
+  __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_n_u32 (uint32x4_t a, uint32_t b)
+{
+  uint32x4_t result;
+  __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmuls_lane_f32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       float32_t a_ = (a);                                              \
+       float32_t result;                                                \
+       __asm__ ("fmul %s0,%s1,%2.s[%3]"                                 \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmulx_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmulx_lane_f32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       float32x2_t a_ = (a);                                            \
+       float32x2_t result;                                              \
+       __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]"                            \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmulxd_f64 (float64_t a, float64_t b)
+{
+  float64_t result;
+  __asm__ ("fmulx %d0, %d1, %d2"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulxq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulxq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vmulxq_lane_f32(a, b, c)                                        \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       float32x4_t a_ = (a);                                            \
+       float32x4_t result;                                              \
+       __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]"                            \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vmulxq_lane_f64(a, b, c)                                        \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t b_ = (b);                                            \
+       float64x2_t a_ = (a);                                            \
+       float64x2_t result;                                              \
+       __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]"                            \
+                : "=w"(result)                                          \
+                : "w"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmulxs_f32 (float32_t a, float32_t b)
+{
+  float32_t result;
+  __asm__ ("fmulx %s0, %s1, %s2"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmvn_p8 (poly8x8_t a)
+{
+  poly8x8_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmvn_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmvn_s16 (int16x4_t a)
+{
+  int16x4_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmvn_s32 (int32x2_t a)
+{
+  int32x2_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmvn_u8 (uint8x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmvn_u16 (uint16x4_t a)
+{
+  uint16x4_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmvn_u32 (uint32x2_t a)
+{
+  uint32x2_t result;
+  __asm__ ("mvn %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmvnq_p8 (poly8x16_t a)
+{
+  poly8x16_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmvnq_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmvnq_s16 (int16x8_t a)
+{
+  int16x8_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmvnq_s32 (int32x4_t a)
+{
+  int32x4_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmvnq_u8 (uint8x16_t a)
+{
+  uint8x16_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmvnq_u16 (uint16x8_t a)
+{
+  uint16x8_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmvnq_u32 (uint32x4_t a)
+{
+  uint32x4_t result;
+  __asm__ ("mvn %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadal_s8 (int16x4_t a, int8x8_t b)
+{
+  int16x4_t result;
+  __asm__ ("sadalp %0.4h,%2.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadal_s16 (int32x2_t a, int16x4_t b)
+{
+  int32x2_t result;
+  __asm__ ("sadalp %0.2s,%2.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpadal_s32 (int64x1_t a, int32x2_t b)
+{
+  int64x1_t result;
+  __asm__ ("sadalp %0.1d,%2.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadal_u8 (uint16x4_t a, uint8x8_t b)
+{
+  uint16x4_t result;
+  __asm__ ("uadalp %0.4h,%2.8b"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadal_u16 (uint32x2_t a, uint16x4_t b)
+{
+  uint32x2_t result;
+  __asm__ ("uadalp %0.2s,%2.4h"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vpadal_u32 (uint64x1_t a, uint32x2_t b)
+{
+  uint64x1_t result;
+  __asm__ ("uadalp %0.1d,%2.2s"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpadalq_s8 (int16x8_t a, int8x16_t b)
+{
+  int16x8_t result;
+  __asm__ ("sadalp %0.8h,%2.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpadalq_s16 (int32x4_t a, int16x8_t b)
+{
+  int32x4_t result;
+  __asm__ ("sadalp %0.4s,%2.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpadalq_s32 (int64x2_t a, int32x4_t b)
+{
+  int64x2_t result;
+  __asm__ ("sadalp %0.2d,%2.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpadalq_u8 (uint16x8_t a, uint8x16_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uadalp %0.8h,%2.16b"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpadalq_u16 (uint32x4_t a, uint16x8_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uadalp %0.4s,%2.8h"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpadalq_u32 (uint64x2_t a, uint32x4_t b)
+{
+  uint64x2_t result;
+  __asm__ ("uadalp %0.2d,%2.4s"
+           : "=w"(result)
+           : "0"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpadd_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("faddp %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_addpv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_addpv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_addpv2si (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vpaddd_f64 (float64x2_t a)
+{
+  float64_t result;
+  __asm__ ("faddp %d0,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpaddl_s8 (int8x8_t a)
+{
+  int16x4_t result;
+  __asm__ ("saddlp %0.4h,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpaddl_s16 (int16x4_t a)
+{
+  int32x2_t result;
+  __asm__ ("saddlp %0.2s,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpaddl_s32 (int32x2_t a)
+{
+  int64x1_t result;
+  __asm__ ("saddlp %0.1d,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpaddl_u8 (uint8x8_t a)
+{
+  uint16x4_t result;
+  __asm__ ("uaddlp %0.4h,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpaddl_u16 (uint16x4_t a)
+{
+  uint32x2_t result;
+  __asm__ ("uaddlp %0.2s,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vpaddl_u32 (uint32x2_t a)
+{
+  uint64x1_t result;
+  __asm__ ("uaddlp %0.1d,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpaddlq_s8 (int8x16_t a)
+{
+  int16x8_t result;
+  __asm__ ("saddlp %0.8h,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpaddlq_s16 (int16x8_t a)
+{
+  int32x4_t result;
+  __asm__ ("saddlp %0.4s,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpaddlq_s32 (int32x4_t a)
+{
+  int64x2_t result;
+  __asm__ ("saddlp %0.2d,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpaddlq_u8 (uint8x16_t a)
+{
+  uint16x8_t result;
+  __asm__ ("uaddlp %0.8h,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpaddlq_u16 (uint16x8_t a)
+{
+  uint32x4_t result;
+  __asm__ ("uaddlp %0.4s,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpaddlq_u32 (uint32x4_t a)
+{
+  uint64x2_t result;
+  __asm__ ("uaddlp %0.2d,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vpaddq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("faddp %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vpaddq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("faddp %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vpaddq_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("addp %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpaddq_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("addp %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpaddq_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("addp %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpaddq_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("addp %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vpaddq_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("addp %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpaddq_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("addp %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpaddq_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("addp %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpaddq_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("addp %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vpadds_f32 (float32x2_t a)
+{
+  float32_t result;
+  __asm__ ("faddp %s0,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmax_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpmax_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpmax_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpmax_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpmax_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpmax_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpmax_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmaxnm_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vpmaxnmqd_f64 (float64x2_t a)
+{
+  float64_t result;
+  __asm__ ("fmaxnmp %d0,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vpmaxnms_f32 (float32x2_t a)
+{
+  float32_t result;
+  __asm__ ("fmaxnmp %s0,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vpmaxq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vpmaxq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vpmaxq_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpmaxq_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpmaxq_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vpmaxqd_f64 (float64x2_t a)
+{
+  float64_t result;
+  __asm__ ("fmaxp %d0,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vpmaxs_f32 (float32x2_t a)
+{
+  float32_t result;
+  __asm__ ("fmaxp %s0,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmin_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("fminp %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpmin_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("sminp %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpmin_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("sminp %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpmin_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("sminp %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpmin_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("uminp %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpmin_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("uminp %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpmin_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("uminp %0.2s, %1.2s, %2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpminnm_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vpminnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vpminnmq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vpminnmqd_f64 (float64x2_t a)
+{
+  float64_t result;
+  __asm__ ("fminnmp %d0,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vpminnms_f32 (float32x2_t a)
+{
+  float32_t result;
+  __asm__ ("fminnmp %s0,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vpminq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("fminp %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vpminq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("fminp %0.2d, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vpminq_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("sminp %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpminq_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("sminp %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpminq_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("sminp %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vpminq_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("uminp %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpminq_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uminp %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpminq_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uminp %0.4s, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vpminqd_f64 (float64x2_t a)
+{
+  float64_t result;
+  __asm__ ("fminp %d0,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vpmins_f32 (float32x2_t a)
+{
+  float32_t result;
+  __asm__ ("fminp %s0,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_n_s16 (int16x4_t a, int16_t b)
+{
+  int16x4_t result;
+  __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_n_s32 (int32x2_t a, int32_t b)
+{
+  int32x2_t result;
+  __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_n_s16 (int16x8_t a, int16_t b)
+{
+  int16x8_t result;
+  __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_n_s32 (int32x4_t a, int32_t b)
+{
+  int32x4_t result;
+  __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqmovn_high_s16 (int8x8_t a, int16x8_t b)
+{
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("sqxtn2 %0.16b, %1.8h"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqmovn_high_s32 (int16x4_t a, int32x4_t b)
+{
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("sqxtn2 %0.8h, %1.4s"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqmovn_high_s64 (int32x2_t a, int64x2_t b)
+{
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("sqxtn2 %0.4s, %1.2d"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
+{
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("uqxtn2 %0.16b, %1.8h"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
+{
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("uqxtn2 %0.8h, %1.4s"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
+{
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("uqxtn2 %0.4s, %1.2d"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
+{
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("sqxtun2 %0.16b, %1.8h"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
+{
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("sqxtun2 %0.8h, %1.4s"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
+{
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("sqxtun2 %0.4s, %1.2d"
+           : "+w"(result)
+           : "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_n_s16 (int16x4_t a, int16_t b)
+{
+  int16x4_t result;
+  __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_n_s32 (int32x2_t a, int32_t b)
+{
+  int32x2_t result;
+  __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
+{
+  int16x8_t result;
+  __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
+           : "=w"(result)
+           : "w"(a), "x"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
+{
+  int32x4_t result;
+  __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vqrshrn_high_n_s16(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int8x8_t a_ = (a);                                               \
+       int8x16_t result = vcombine_s8                                   \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrn_high_n_s32(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int16x8_t result = vcombine_s16                                  \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrn_high_n_s64(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int32x4_t result = vcombine_s32                                  \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrn_high_n_u16(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint8x8_t a_ = (a);                                              \
+       uint8x16_t result = vcombine_u8                                  \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrn_high_n_u32(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint16x8_t result = vcombine_u16                                 \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrn_high_n_u64(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint32x4_t result = vcombine_u32                                 \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrun_high_n_s16(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       uint8x8_t a_ = (a);                                              \
+       uint8x16_t result = vcombine_u8                                  \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrun_high_n_s32(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       uint16x4_t a_ = (a);                                             \
+       uint16x8_t result = vcombine_u16                                 \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqrshrun_high_n_s64(a, b, c)                                    \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       uint32x2_t a_ = (a);                                             \
+       uint32x4_t result = vcombine_u32                                 \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrn_high_n_s16(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int8x8_t a_ = (a);                                               \
+       int8x16_t result = vcombine_s8                                   \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrn_high_n_s32(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int16x8_t result = vcombine_s16                                  \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrn_high_n_s64(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int32x4_t result = vcombine_s32                                  \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrn_high_n_u16(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint8x8_t a_ = (a);                                              \
+       uint8x16_t result = vcombine_u8                                  \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrn_high_n_u32(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint16x8_t result = vcombine_u16                                 \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrn_high_n_u64(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint32x4_t result = vcombine_u32                                 \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrun_high_n_s16(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       uint8x8_t a_ = (a);                                              \
+       uint8x16_t result = vcombine_u8                                  \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrun_high_n_s32(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       uint16x4_t a_ = (a);                                             \
+       uint16x8_t result = vcombine_u16                                 \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vqshrun_high_n_s64(a, b, c)                                     \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       uint32x2_t a_ = (a);                                             \
+       uint32x4_t result = vcombine_u32                                 \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
+       __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrbit_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("rbit %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrbit_u8 (uint8x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("rbit %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrbitq_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("rbit %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrbitq_u8 (uint8x16_t a)
+{
+  uint8x16_t result;
+  __asm__ ("rbit %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrecpe_u32 (uint32x2_t a)
+{
+  uint32x2_t result;
+  __asm__ ("urecpe %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrecpeq_u32 (uint32x4_t a)
+{
+  uint32x4_t result;
+  __asm__ ("urecpe %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev16_p8 (poly8x8_t a)
+{
+  poly8x8_t result;
+  __asm__ ("rev16 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev16_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("rev16 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev16_u8 (uint8x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("rev16 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev16q_p8 (poly8x16_t a)
+{
+  poly8x16_t result;
+  __asm__ ("rev16 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev16q_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("rev16 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev16q_u8 (uint8x16_t a)
+{
+  uint8x16_t result;
+  __asm__ ("rev16 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev32_p8 (poly8x8_t a)
+{
+  poly8x8_t result;
+  __asm__ ("rev32 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev32_p16 (poly16x4_t a)
+{
+  poly16x4_t result;
+  __asm__ ("rev32 %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev32_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("rev32 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev32_s16 (int16x4_t a)
+{
+  int16x4_t result;
+  __asm__ ("rev32 %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev32_u8 (uint8x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("rev32 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev32_u16 (uint16x4_t a)
+{
+  uint16x4_t result;
+  __asm__ ("rev32 %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev32q_p8 (poly8x16_t a)
+{
+  poly8x16_t result;
+  __asm__ ("rev32 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev32q_p16 (poly16x8_t a)
+{
+  poly16x8_t result;
+  __asm__ ("rev32 %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev32q_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("rev32 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev32q_s16 (int16x8_t a)
+{
+  int16x8_t result;
+  __asm__ ("rev32 %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev32q_u8 (uint8x16_t a)
+{
+  uint8x16_t result;
+  __asm__ ("rev32 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev32q_u16 (uint16x8_t a)
+{
+  uint16x8_t result;
+  __asm__ ("rev32 %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrev64_f32 (float32x2_t a)
+{
+  float32x2_t result;
+  __asm__ ("rev64 %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev64_p8 (poly8x8_t a)
+{
+  poly8x8_t result;
+  __asm__ ("rev64 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev64_p16 (poly16x4_t a)
+{
+  poly16x4_t result;
+  __asm__ ("rev64 %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev64_s8 (int8x8_t a)
+{
+  int8x8_t result;
+  __asm__ ("rev64 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev64_s16 (int16x4_t a)
+{
+  int16x4_t result;
+  __asm__ ("rev64 %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrev64_s32 (int32x2_t a)
+{
+  int32x2_t result;
+  __asm__ ("rev64 %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev64_u8 (uint8x8_t a)
+{
+  uint8x8_t result;
+  __asm__ ("rev64 %0.8b,%1.8b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev64_u16 (uint16x4_t a)
+{
+  uint16x4_t result;
+  __asm__ ("rev64 %0.4h,%1.4h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrev64_u32 (uint32x2_t a)
+{
+  uint32x2_t result;
+  __asm__ ("rev64 %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrev64q_f32 (float32x4_t a)
+{
+  float32x4_t result;
+  __asm__ ("rev64 %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev64q_p8 (poly8x16_t a)
+{
+  poly8x16_t result;
+  __asm__ ("rev64 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev64q_p16 (poly16x8_t a)
+{
+  poly16x8_t result;
+  __asm__ ("rev64 %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev64q_s8 (int8x16_t a)
+{
+  int8x16_t result;
+  __asm__ ("rev64 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev64q_s16 (int16x8_t a)
+{
+  int16x8_t result;
+  __asm__ ("rev64 %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrev64q_s32 (int32x4_t a)
+{
+  int32x4_t result;
+  __asm__ ("rev64 %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev64q_u8 (uint8x16_t a)
+{
+  uint8x16_t result;
+  __asm__ ("rev64 %0.16b,%1.16b"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev64q_u16 (uint16x8_t a)
+{
+  uint16x8_t result;
+  __asm__ ("rev64 %0.8h,%1.8h"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrev64q_u32 (uint32x4_t a)
+{
+  uint32x4_t result;
+  __asm__ ("rev64 %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vrshrn_high_n_s16(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int8x8_t a_ = (a);                                               \
+       int8x16_t result = vcombine_s8                                   \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_high_n_s32(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int16x8_t result = vcombine_s16                                  \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_high_n_s64(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int32x4_t result = vcombine_s32                                  \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_high_n_u16(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint8x8_t a_ = (a);                                              \
+       uint8x16_t result = vcombine_u8                                  \
+                            (a_, vcreate_u8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_high_n_u32(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint16x8_t result = vcombine_u16                                 \
+                            (a_, vcreate_u16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_high_n_u64(a, b, c)                                      \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint32x4_t result = vcombine_u32                                 \
+                            (a_, vcreate_u32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_n_s16(a, b)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t a_ = (a);                                              \
+       int8x8_t result;                                                 \
+       __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_n_s32(a, b)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t a_ = (a);                                              \
+       int16x4_t result;                                                \
+       __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_n_s64(a, b)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t a_ = (a);                                              \
+       int32x2_t result;                                                \
+       __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_n_u16(a, b)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t a_ = (a);                                             \
+       uint8x8_t result;                                                \
+       __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_n_u32(a, b)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t a_ = (a);                                             \
+       uint16x4_t result;                                               \
+       __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vrshrn_n_u64(a, b)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t a_ = (a);                                             \
+       uint32x2_t result;                                               \
+       __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrsqrte_f32 (float32x2_t a)
+{
+  float32x2_t result;
+  __asm__ ("frsqrte %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrsqrte_f64 (float64x1_t a)
+{
+  float64x1_t result;
+  __asm__ ("frsqrte %d0,%d1"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsqrte_u32 (uint32x2_t a)
+{
+  uint32x2_t result;
+  __asm__ ("ursqrte %0.2s,%1.2s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vrsqrted_f64 (float64_t a)
+{
+  float64_t result;
+  __asm__ ("frsqrte %d0,%d1"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrsqrteq_f32 (float32x4_t a)
+{
+  float32x4_t result;
+  __asm__ ("frsqrte %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrsqrteq_f64 (float64x2_t a)
+{
+  float64x2_t result;
+  __asm__ ("frsqrte %0.2d,%1.2d"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsqrteq_u32 (uint32x4_t a)
+{
+  uint32x4_t result;
+  __asm__ ("ursqrte %0.4s,%1.4s"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vrsqrtes_f32 (float32_t a)
+{
+  float32_t result;
+  __asm__ ("frsqrte %s0,%s1"
+           : "=w"(result)
+           : "w"(a)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrsqrts_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vrsqrtsd_f64 (float64_t a, float64_t b)
+{
+  float64_t result;
+  __asm__ ("frsqrts %d0,%d1,%d2"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vrsqrtss_f32 (float32_t a, float32_t b)
+{
+  float32_t result;
+  __asm__ ("frsqrts %s0,%s1,%s2"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrsrtsq_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
+{
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
+{
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
+{
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
+{
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrsubhn_s16 (int16x8_t a, int16x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrsubhn_s32 (int32x4_t a, int32x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrsubhn_s64 (int64x2_t a, int64x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+#define vset_lane_f32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x2_t b_ = (b);                                            \
+       float32_t a_ = (a);                                              \
+       float32x2_t result;                                              \
+       __asm__ ("ins %0.s[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_f64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x1_t b_ = (b);                                            \
+       float64_t a_ = (a);                                              \
+       float64x1_t result;                                              \
+       __asm__ ("ins %0.d[%3], %x1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_p8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8_t a_ = (a);                                                \
+       poly8x8_t result;                                                \
+       __asm__ ("ins %0.b[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_p16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16_t a_ = (a);                                               \
+       poly16x4_t result;                                               \
+       __asm__ ("ins %0.h[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_s8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x8_t b_ = (b);                                               \
+       int8_t a_ = (a);                                                 \
+       int8x8_t result;                                                 \
+       __asm__ ("ins %0.b[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_s16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t b_ = (b);                                              \
+       int16_t a_ = (a);                                                \
+       int16x4_t result;                                                \
+       __asm__ ("ins %0.h[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_s32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t b_ = (b);                                              \
+       int32_t a_ = (a);                                                \
+       int32x2_t result;                                                \
+       __asm__ ("ins %0.s[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_s64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x1_t b_ = (b);                                              \
+       int64_t a_ = (a);                                                \
+       int64x1_t result;                                                \
+       __asm__ ("ins %0.d[%3], %x1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_u8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x8_t b_ = (b);                                              \
+       uint8_t a_ = (a);                                                \
+       uint8x8_t result;                                                \
+       __asm__ ("ins %0.b[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_u16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t b_ = (b);                                             \
+       uint16_t a_ = (a);                                               \
+       uint16x4_t result;                                               \
+       __asm__ ("ins %0.h[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_u32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t b_ = (b);                                             \
+       uint32_t a_ = (a);                                               \
+       uint32x2_t result;                                               \
+       __asm__ ("ins %0.s[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vset_lane_u64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x1_t b_ = (b);                                             \
+       uint64_t a_ = (a);                                               \
+       uint64x1_t result;                                               \
+       __asm__ ("ins %0.d[%3], %x1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_f32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       float32_t a_ = (a);                                              \
+       float32x4_t result;                                              \
+       __asm__ ("ins %0.s[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_f64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t b_ = (b);                                            \
+       float64_t a_ = (a);                                              \
+       float64x2_t result;                                              \
+       __asm__ ("ins %0.d[%3], %x1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_p8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8_t a_ = (a);                                                \
+       poly8x16_t result;                                               \
+       __asm__ ("ins %0.b[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_p16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16_t a_ = (a);                                               \
+       poly16x8_t result;                                               \
+       __asm__ ("ins %0.h[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_s8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x16_t b_ = (b);                                              \
+       int8_t a_ = (a);                                                 \
+       int8x16_t result;                                                \
+       __asm__ ("ins %0.b[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_s16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int16_t a_ = (a);                                                \
+       int16x8_t result;                                                \
+       __asm__ ("ins %0.h[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_s32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int32_t a_ = (a);                                                \
+       int32x4_t result;                                                \
+       __asm__ ("ins %0.s[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_s64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int64_t a_ = (a);                                                \
+       int64x2_t result;                                                \
+       __asm__ ("ins %0.d[%3], %x1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_u8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x16_t b_ = (b);                                             \
+       uint8_t a_ = (a);                                                \
+       uint8x16_t result;                                               \
+       __asm__ ("ins %0.b[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_u16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint16_t a_ = (a);                                               \
+       uint16x8_t result;                                               \
+       __asm__ ("ins %0.h[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_u32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint32_t a_ = (a);                                               \
+       uint32x4_t result;                                               \
+       __asm__ ("ins %0.s[%3], %w1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsetq_lane_u64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint64_t a_ = (a);                                               \
+       uint64x2_t result;                                               \
+       __asm__ ("ins %0.d[%3], %x1"                                     \
+                : "=w"(result)                                          \
+                : "r"(a_), "0"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_high_n_s16(a, b, c)                                       \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int8x8_t a_ = (a);                                               \
+       int8x16_t result = vcombine_s8                                   \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_high_n_s32(a, b, c)                                       \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int16x4_t a_ = (a);                                              \
+       int16x8_t result = vcombine_s16                                  \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_high_n_s64(a, b, c)                                       \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int32x2_t a_ = (a);                                              \
+       int32x4_t result = vcombine_s32                                  \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_high_n_u16(a, b, c)                                       \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint8x8_t a_ = (a);                                              \
+       uint8x16_t result = vcombine_u8                                  \
+                            (a_, vcreate_u8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_high_n_u32(a, b, c)                                       \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint16x4_t a_ = (a);                                             \
+       uint16x8_t result = vcombine_u16                                 \
+                            (a_, vcreate_u16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_high_n_u64(a, b, c)                                       \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint32x2_t a_ = (a);                                             \
+       uint32x4_t result = vcombine_u32                                 \
+                            (a_, vcreate_u32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
+       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
+                : "+w"(result)                                          \
+                : "w"(b_), "i"(c)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_n_s16(a, b)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t a_ = (a);                                              \
+       int8x8_t result;                                                 \
+       __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_n_s32(a, b)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t a_ = (a);                                              \
+       int16x4_t result;                                                \
+       __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_n_s64(a, b)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t a_ = (a);                                              \
+       int32x2_t result;                                                \
+       __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_n_u16(a, b)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t a_ = (a);                                             \
+       uint8x8_t result;                                                \
+       __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_n_u32(a, b)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t a_ = (a);                                             \
+       uint16x4_t result;                                               \
+       __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vshrn_n_u64(a, b)                                               \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t a_ = (a);                                             \
+       uint32x2_t result;                                               \
+       __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
+                : "=w"(result)                                          \
+                : "w"(a_), "i"(b)                                       \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsli_n_p8(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8x8_t a_ = (a);                                              \
+       poly8x8_t result;                                                \
+       __asm__ ("sli %0.8b,%2.8b,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsli_n_p16(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16x4_t a_ = (a);                                             \
+       poly16x4_t result;                                               \
+       __asm__ ("sli %0.4h,%2.4h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsliq_n_p8(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8x16_t a_ = (a);                                             \
+       poly8x16_t result;                                               \
+       __asm__ ("sli %0.16b,%2.16b,%3"                                  \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsliq_n_p16(a, b, c)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16x8_t a_ = (a);                                             \
+       poly16x8_t result;                                               \
+       __asm__ ("sli %0.8h,%2.8h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsri_n_p8(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8x8_t a_ = (a);                                              \
+       poly8x8_t result;                                                \
+       __asm__ ("sri %0.8b,%2.8b,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsri_n_p16(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16x4_t a_ = (a);                                             \
+       poly16x4_t result;                                               \
+       __asm__ ("sri %0.4h,%2.4h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsriq_n_p8(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8x16_t a_ = (a);                                             \
+       poly8x16_t result;                                               \
+       __asm__ ("sri %0.16b,%2.16b,%3"                                  \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vsriq_n_p16(a, b, c)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16x8_t a_ = (a);                                             \
+       poly16x8_t result;                                               \
+       __asm__ ("sri %0.8h,%2.8h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
+
+#define vst1_lane_f32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x2_t b_ = (b);                                            \
+       float32_t * a_ = (a);                                            \
+       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_f64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x1_t b_ = (b);                                            \
+       float64_t * a_ = (a);                                            \
+       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_p8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_p16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_s8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x8_t b_ = (b);                                               \
+       int8_t * a_ = (a);                                               \
+       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_s16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x4_t b_ = (b);                                              \
+       int16_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_s32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x2_t b_ = (b);                                              \
+       int32_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_s64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x1_t b_ = (b);                                              \
+       int64_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_u8(a, b, c)                                           \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x8_t b_ = (b);                                              \
+       uint8_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_u16(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x4_t b_ = (b);                                             \
+       uint16_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_u32(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x2_t b_ = (b);                                             \
+       uint32_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1_lane_u64(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x1_t b_ = (b);                                             \
+       uint64_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+
+#define vst1q_lane_f32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float32x4_t b_ = (b);                                            \
+       float32_t * a_ = (a);                                            \
+       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_f64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       float64x2_t b_ = (b);                                            \
+       float64_t * a_ = (a);                                            \
+       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_p8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_p16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_s8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       int8x16_t b_ = (b);                                              \
+       int8_t * a_ = (a);                                               \
+       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_s16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int16x8_t b_ = (b);                                              \
+       int16_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_s32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int32x4_t b_ = (b);                                              \
+       int32_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_s64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       int64x2_t b_ = (b);                                              \
+       int64_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_u8(a, b, c)                                          \
+  __extension__                                                         \
+    ({                                                                  \
+       uint8x16_t b_ = (b);                                             \
+       uint8_t * a_ = (a);                                              \
+       __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_u16(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint16x8_t b_ = (b);                                             \
+       uint16_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_u32(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint32x4_t b_ = (b);                                             \
+       uint32_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+#define vst1q_lane_u64(a, b, c)                                         \
+  __extension__                                                         \
+    ({                                                                  \
+       uint64x2_t b_ = (b);                                             \
+       uint64_t * a_ = (a);                                             \
+       __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
+                :                                                       \
+                : "r"(a_), "w"(b_), "i"(c)                              \
+                : "memory");                                            \
+     })
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
+{
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
+{
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
+{
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
+{
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
+{
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
+{
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
+           : "+w"(result)
+           : "w"(b), "w"(c)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsubhn_s16 (int16x8_t a, int16x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("subhn %0.8b, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsubhn_s32 (int32x4_t a, int32x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("subhn %0.4h, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsubhn_s64 (int64x2_t a, int64x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("subhn %0.2s, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsubhn_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("subhn %0.8b, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsubhn_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("subhn %0.4h, %1.4s, %2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsubhn_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("subhn %0.2s, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vtrn1_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtrn1_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vtrn1_p16 (poly16x4_t a, poly16x4_t b)
+{
+  poly16x4_t result;
+  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtrn1_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vtrn1_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vtrn1_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtrn1_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtrn1_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtrn1_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vtrn1q_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vtrn1q_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vtrn1q_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vtrn1q_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vtrn1q_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vtrn1q_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vtrn2_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtrn2_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vtrn2_p16 (poly16x4_t a, poly16x4_t b)
+{
+  poly16x4_t result;
+  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtrn2_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vtrn2_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vtrn2_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtrn2_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtrn2_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtrn2_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vtrn2q_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vtrn2q_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vtrn2q_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vtrn2q_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vtrn2q_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vtrn2q_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_p8 (poly8x8_t a, poly8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_p16 (poly16x4_t a, poly16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_p8 (poly8x16_t a, poly8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_p16 (poly16x8_t a, poly16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vuzp1_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vuzp1_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vuzp1_p16 (poly16x4_t a, poly16x4_t b)
+{
+  poly16x4_t result;
+  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuzp1_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuzp1_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuzp1_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vuzp1_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vuzp1_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vuzp1_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vuzp1q_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vuzp1q_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuzp1q_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuzp1q_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuzp1q_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuzp1q_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vuzp2_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vuzp2_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vuzp2_p16 (poly16x4_t a, poly16x4_t b)
+{
+  poly16x4_t result;
+  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuzp2_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuzp2_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuzp2_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vuzp2_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vuzp2_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vuzp2_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vuzp2q_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vuzp2q_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuzp2q_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuzp2q_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuzp2q_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuzp2q_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vzip1_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vzip1_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vzip1_p16 (poly16x4_t a, poly16x4_t b)
+{
+  poly16x4_t result;
+  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vzip1_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vzip1_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vzip1_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vzip1_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vzip1_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vzip1_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vzip1q_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vzip1q_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vzip1q_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vzip1q_p16 (poly16x8_t a, poly16x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vzip1q_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vzip1q_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vzip1q_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vzip1q_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vzip1q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vzip1q_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vzip1q_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vzip1q_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vzip2_f32 (float32x2_t a, float32x2_t b)
+{
+  float32x2_t result;
+  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vzip2_p8 (poly8x8_t a, poly8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vzip2_p16 (poly16x4_t a, poly16x4_t b)
+{
+  poly16x4_t result;
+  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vzip2_s8 (int8x8_t a, int8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vzip2_s16 (int16x4_t a, int16x4_t b)
+{
+  int16x4_t result;
+  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vzip2_s32 (int32x2_t a, int32x2_t b)
+{
+  int32x2_t result;
+  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vzip2_u8 (uint8x8_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vzip2_u16 (uint16x4_t a, uint16x4_t b)
+{
+  uint16x4_t result;
+  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vzip2_u32 (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2_t result;
+  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vzip2q_f32 (float32x4_t a, float32x4_t b)
+{
+  float32x4_t result;
+  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vzip2q_f64 (float64x2_t a, float64x2_t b)
+{
+  float64x2_t result;
+  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vzip2q_p8 (poly8x16_t a, poly8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vzip2q_p16 (poly16x8_t a, poly16x8_t b)
+{
+  poly16x8_t result;
+  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vzip2q_s8 (int8x16_t a, int8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vzip2q_s16 (int16x8_t a, int16x8_t b)
+{
+  int16x8_t result;
+  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vzip2q_s32 (int32x4_t a, int32x4_t b)
+{
+  int32x4_t result;
+  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vzip2q_s64 (int64x2_t a, int64x2_t b)
+{
+  int64x2_t result;
+  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vzip2q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vzip2q_u16 (uint16x8_t a, uint16x8_t b)
+{
+  uint16x8_t result;
+  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vzip2q_u32 (uint32x4_t a, uint32x4_t b)
+{
+  uint32x4_t result;
+  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vzip2q_u64 (uint64x2_t a, uint64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+/* End of temporary inline asm implementations.  */
+
+/* Start of temporary inline asm for vldn, vstn and friends.  */
+
+/* Create struct element types for duplicating loads.
+
+   Create 2 element structures of:
+
+   +------+----+----+----+----+
+   |      | 8  | 16 | 32 | 64 |
+   +------+----+----+----+----+
+   |int   | Y  | Y  | N  | N  |
+   +------+----+----+----+----+
+   |uint  | Y  | Y  | N  | N  |
+   +------+----+----+----+----+
+   |float | -  | -  | N  | N  |
+   +------+----+----+----+----+
+   |poly  | Y  | Y  | -  | -  |
+   +------+----+----+----+----+
+
+   Create 3 element structures of:
+
+   +------+----+----+----+----+
+   |      | 8  | 16 | 32 | 64 |
+   +------+----+----+----+----+
+   |int   | Y  | Y  | Y  | Y  |
+   +------+----+----+----+----+
+   |uint  | Y  | Y  | Y  | Y  |
+   +------+----+----+----+----+
+   |float | -  | -  | Y  | Y  |
+   +------+----+----+----+----+
+   |poly  | Y  | Y  | -  | -  |
+   +------+----+----+----+----+
+
+   Create 4 element structures of:
+
+   +------+----+----+----+----+
+   |      | 8  | 16 | 32 | 64 |
+   +------+----+----+----+----+
+   |int   | Y  | N  | N  | Y  |
+   +------+----+----+----+----+
+   |uint  | Y  | N  | N  | Y  |
+   +------+----+----+----+----+
+   |float | -  | -  | N  | Y  |
+   +------+----+----+----+----+
+   |poly  | Y  | N  | -  | -  |
+   +------+----+----+----+----+
+
+  This is required for casting memory reference.  */
+#define __STRUCTN(t, sz, nelem)			\
+  typedef struct t ## sz ## x ## nelem ## _t {	\
+    t ## sz ## _t val[nelem];			\
+  }  t ## sz ## x ## nelem ## _t;
+
+/* 2-element structs.  */
+__STRUCTN (int, 8, 2)
+__STRUCTN (int, 16, 2)
+__STRUCTN (uint, 8, 2)
+__STRUCTN (uint, 16, 2)
+__STRUCTN (poly, 8, 2)
+__STRUCTN (poly, 16, 2)
+/* 3-element structs.  */
+__STRUCTN (int, 8, 3)
+__STRUCTN (int, 16, 3)
+__STRUCTN (int, 32, 3)
+__STRUCTN (int, 64, 3)
+__STRUCTN (uint, 8, 3)
+__STRUCTN (uint, 16, 3)
+__STRUCTN (uint, 32, 3)
+__STRUCTN (uint, 64, 3)
+__STRUCTN (float, 32, 3)
+__STRUCTN (float, 64, 3)
+__STRUCTN (poly, 8, 3)
+__STRUCTN (poly, 16, 3)
+/* 4-element structs.  */
+__STRUCTN (int, 8, 4)
+__STRUCTN (int, 64, 4)
+__STRUCTN (uint, 8, 4)
+__STRUCTN (uint, 64, 4)
+__STRUCTN (poly, 8, 4)
+__STRUCTN (float, 64, 4)
+#undef __STRUCTN
+
+#define __LD2R_FUNC(rettype, structtype, ptrtype,			\
+		    regsuffix, funcsuffix, Q)				\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__)) 					\
+  vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
+  {									\
+    rettype result;							\
+    __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
+	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
+	     : "=Q"(result)						\
+	     : "Q"(*(const structtype *)ptr)				\
+	     : "memory", "v16", "v17");					\
+    return result;							\
+  }
+
+__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
+__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
+__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
+__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
+__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
+__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
+__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
+__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
+__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
+__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
+__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
+__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
+__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
+__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
+__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
+__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
+__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
+__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
+__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
+__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
+__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
+__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
+__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
+__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
+
+#define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix,			\
+			lnsuffix, funcsuffix, Q)			\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__))					\
+  vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
+				     rettype b, const int c)		\
+  {									\
+    rettype result;							\
+    __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
+	     "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t"	\
+	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
+	     : "=Q"(result)						\
+	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
+	     : "memory", "v16", "v17");					\
+    return result;							\
+  }
+
+__LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
+__LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
+__LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
+__LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
+__LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
+__LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
+__LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
+__LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
+__LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
+__LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
+__LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
+__LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
+__LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
+__LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
+__LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
+__LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
+__LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
+__LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
+__LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
+__LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
+__LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
+__LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
+__LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
+__LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
+
+#define __LD3R_FUNC(rettype, structtype, ptrtype,			\
+		    regsuffix, funcsuffix, Q)				\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__))					\
+  vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
+  {									\
+    rettype result;							\
+    __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
+	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
+	     : "=Q"(result)						\
+	     : "Q"(*(const structtype *)ptr)				\
+	     : "memory", "v16", "v17", "v18");				\
+    return result;							\
+  }
+
+__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
+__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
+__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
+__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
+__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
+__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
+__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
+__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
+__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
+__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
+__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
+__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
+__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
+__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
+__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
+__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
+__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
+__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
+__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
+__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
+__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
+__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
+__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
+__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
+
+#define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix,			\
+			lnsuffix, funcsuffix, Q)			\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__))					\
+  vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
+				     rettype b, const int c)		\
+  {									\
+    rettype result;							\
+    __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
+	     "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t"	\
+	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
+	     : "=Q"(result)						\
+	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
+	     : "memory", "v16", "v17", "v18");				\
+    return result;							\
+  }
+
+__LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
+__LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
+__LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
+__LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
+__LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
+__LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
+__LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
+__LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
+__LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
+__LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
+__LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
+__LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
+__LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
+__LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
+__LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
+__LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
+__LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
+__LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
+__LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
+__LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
+__LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
+__LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
+__LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
+__LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
+
+#define __LD4R_FUNC(rettype, structtype, ptrtype,			\
+		    regsuffix, funcsuffix, Q)				\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__))					\
+  vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
+  {									\
+    rettype result;							\
+    __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
+	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
+	     : "=Q"(result)						\
+	     : "Q"(*(const structtype *)ptr)				\
+	     : "memory", "v16", "v17", "v18", "v19");			\
+    return result;							\
+  }
+
+__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
+__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
+__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
+__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
+__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
+__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
+__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
+__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
+__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
+__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
+__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
+__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
+__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
+__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
+__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
+__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
+__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
+__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
+__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
+__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
+__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
+__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
+__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
+__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
+
+#define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix,			\
+			lnsuffix, funcsuffix, Q)			\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__))					\
+  vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
+				     rettype b, const int c)		\
+  {									\
+    rettype result;							\
+    __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
+	     "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t"	\
+	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
+	     : "=Q"(result)						\
+	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
+	     : "memory", "v16", "v17", "v18", "v19");			\
+    return result;							\
+  }
+
+__LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
+__LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
+__LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
+__LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
+__LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
+__LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
+__LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
+__LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
+__LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
+__LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
+__LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
+__LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
+__LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
+__LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
+__LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
+__LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
+__LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
+__LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
+__LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
+__LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
+__LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
+__LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
+__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
+__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
+
+#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,			\
+			lnsuffix, funcsuffix, Q)			\
+  typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype;	\
+  __extension__ static __inline void					\
+  __attribute__ ((__always_inline__))					\
+  vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
+				     intype b, const int c)		\
+  {									\
+    __ST2_LANE_STRUCTURE_##intype *__p =				\
+				(__ST2_LANE_STRUCTURE_##intype *)ptr;	\
+    __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
+	     "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"	\
+	     : "=Q"(*__p)						\
+	     : "Q"(b), "i"(c)						\
+	     : "v16", "v17");						\
+  }
+
+__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
+__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
+__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
+__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
+__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
+__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
+__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
+__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
+__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
+__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
+__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
+__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
+__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
+__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
+__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
+__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
+__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
+__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
+
+#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,			\
+			lnsuffix, funcsuffix, Q)			\
+  typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype;	\
+  __extension__ static __inline void					\
+  __attribute__ ((__always_inline__))					\
+  vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
+				     intype b, const int c)		\
+  {									\
+    __ST3_LANE_STRUCTURE_##intype *__p =				\
+				(__ST3_LANE_STRUCTURE_##intype *)ptr;	\
+    __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
+	     "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"	\
+	     : "=Q"(*__p)						\
+	     : "Q"(b), "i"(c)						\
+	     : "v16", "v17", "v18");					\
+  }
+
+__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
+__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
+__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
+__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
+__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
+__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
+__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
+__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
+__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
+__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
+__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
+__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
+__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
+__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
+__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
+__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
+__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
+__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
+
+#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,			\
+			lnsuffix, funcsuffix, Q)			\
+  typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype;	\
+  __extension__ static __inline void					\
+  __attribute__ ((__always_inline__))					\
+  vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
+				     intype b, const int c)		\
+  {									\
+    __ST4_LANE_STRUCTURE_##intype *__p =				\
+				(__ST4_LANE_STRUCTURE_##intype *)ptr;	\
+    __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
+	     "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"	\
+	     : "=Q"(*__p)						\
+	     : "Q"(b), "i"(c)						\
+	     : "v16", "v17", "v18", "v19");				\
+  }
+
+__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
+__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
+__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
+__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
+__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
+__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
+__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
+__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
+__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
+__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
+__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
+__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
+__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
+__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
+__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
+__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
+__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
+__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vaddlv_s32 (int32x2_t a)
+{
+  int64_t result;
+  __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
+  return result;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vaddlv_u32 (uint32x2_t a)
+{
+  uint64_t result;
+  __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
+  return result;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpaddd_s64 (int64x2_t __a)
+{
+  return __builtin_aarch64_addpdi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
+{
+  return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
+}
+
+/* Table intrinsics.  */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
+{
+  poly8x8_t result;
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbl1_s8 (int8x16_t a, uint8x8_t b)
+{
+  int8x8_t result;
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
+{
+  uint8x8_t result;
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
+{
+  poly8x16_t result;
+  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
+{
+  int8x16_t result;
+  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
+{
+  uint8x16_t result;
+  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
+{
+  int8x8_t result;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
+{
+  int8x16_t result;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
+{
+  uint8x16_t result;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
+{
+  poly8x16_t result;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
+{
+  int8x8_t result;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
+{
+  int8x16_t result;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
+{
+  uint8x16_t result;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
+{
+  poly8x16_t result;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
+{
+  int8x8_t result;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
+{
+  int8x16_t result;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
+{
+  uint8x16_t result;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
+{
+  poly8x16_t result;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
+	   :"=w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
+{
+  int8x8_t result = r;
+  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
+           : "+w"(result)
+           : "w"(tab), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
+{
+  uint8x8_t result = r;
+  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
+           : "+w"(result)
+           : "w"(tab), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
+{
+  poly8x8_t result = r;
+  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
+           : "+w"(result)
+           : "w"(tab), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
+{
+  int8x16_t result = r;
+  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
+           : "+w"(result)
+           : "w"(tab), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
+{
+  uint8x16_t result = r;
+  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
+           : "+w"(result)
+           : "w"(tab), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
+{
+  poly8x16_t result = r;
+  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
+           : "+w"(result)
+           : "w"(tab), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
+{
+  int8x8_t result = r;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
+{
+  uint8x8_t result = r;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
+{
+  poly8x8_t result = r;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
+{
+  int8x16_t result = r;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
+{
+  uint8x16_t result = r;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
+{
+  poly8x16_t result = r;
+  __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17");
+  return result;
+}
+
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
+{
+  int8x8_t result = r;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
+{
+  uint8x8_t result = r;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
+{
+  poly8x8_t result = r;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
+{
+  int8x16_t result = r;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
+{
+  uint8x16_t result = r;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
+{
+  poly8x16_t result = r;
+  __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18");
+  return result;
+}
+
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
+{
+  int8x8_t result = r;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
+{
+  uint8x8_t result = r;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
+{
+  poly8x8_t result = r;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
+{
+  int8x16_t result = r;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
+{
+  uint8x16_t result = r;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
+{
+  poly8x16_t result = r;
+  __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
+	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
+	   :"+w"(result)
+	   :"Q"(tab),"w"(idx)
+	   :"memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+/* V7 legacy table intrinsics.  */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl1_s8 (int8x8_t tab, int8x8_t idx)
+{
+  int8x8_t result;
+  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
+{
+  int8x8_t result;
+  int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
+{
+  int8x8_t result;
+  int8x16x2_t temp;
+  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "=w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  uint8x16x2_t temp;
+  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "=w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  poly8x16x2_t temp;
+  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "=w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
+{
+  int8x8_t result;
+  int8x16x2_t temp;
+  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "=w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
+{
+  uint8x8_t result;
+  uint8x16x2_t temp;
+  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "=w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
+{
+  poly8x8_t result;
+  poly8x16x2_t temp;
+  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "=w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
+{
+  int8x8_t result = r;
+  int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
+  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
+           : "+w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
+{
+  uint8x8_t result = r;
+  uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
+  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
+           : "+w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
+{
+  poly8x8_t result = r;
+  poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
+  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
+           : "+w"(result)
+           : "w"(temp), "w"(idx)
+           : /* No clobbers */);
+  return result;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
+{
+  int8x8_t result = r;
+  int8x16x2_t temp;
+  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "+w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
+{
+  uint8x8_t result = r;
+  uint8x16x2_t temp;
+  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "+w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
+{
+  poly8x8_t result = r;
+  poly8x16x2_t temp;
+  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
+  temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
+  __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
+	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
+           : "+w"(result)
+           : "Q"(temp), "w"(idx)
+           : "v16", "v17", "memory");
+  return result;
+}
+
+/* End of temporary inline asm.  */
+
+/* Start of optimal implementations in approved order.  */
+
+/* vabs  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vabs_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_absv2sf (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vabs_f64 (float64x1_t __a)
+{
+  return __builtin_fabs (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabs_s8 (int8x8_t __a)
+{
+  return __builtin_aarch64_absv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabs_s16 (int16x4_t __a)
+{
+  return __builtin_aarch64_absv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabs_s32 (int32x2_t __a)
+{
+  return __builtin_aarch64_absv2si (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vabs_s64 (int64x1_t __a)
+{
+  return __builtin_llabs (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vabsq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_absv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vabsq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_absv2df (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabsq_s8 (int8x16_t __a)
+{
+  return __builtin_aarch64_absv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabsq_s16 (int16x8_t __a)
+{
+  return __builtin_aarch64_absv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabsq_s32 (int32x4_t __a)
+{
+  return __builtin_aarch64_absv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabsq_s64 (int64x2_t __a)
+{
+  return __builtin_aarch64_absv2di (__a);
+}
+
+/* vadd */
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vaddd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a + __b;
+}
+
+/* vaddv */
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vaddv_s8 (int8x8_t __a)
+{
+  return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vaddv_s16 (int16x4_t __a)
+{
+  return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddv_s32 (int32x2_t __a)
+{
+  return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vaddv_u8 (uint8x8_t __a)
+{
+  return vget_lane_u8 ((uint8x8_t)
+		__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
+		0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vaddv_u16 (uint16x4_t __a)
+{
+  return vget_lane_u16 ((uint16x4_t)
+		__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
+		0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddv_u32 (uint32x2_t __a)
+{
+  return vget_lane_u32 ((uint32x2_t)
+		__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
+		0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vaddvq_s8 (int8x16_t __a)
+{
+  return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
+			0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vaddvq_s16 (int16x8_t __a)
+{
+  return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddvq_s32 (int32x4_t __a)
+{
+  return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vaddvq_s64 (int64x2_t __a)
+{
+  return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vaddvq_u8 (uint8x16_t __a)
+{
+  return vgetq_lane_u8 ((uint8x16_t)
+		__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
+		0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vaddvq_u16 (uint16x8_t __a)
+{
+  return vgetq_lane_u16 ((uint16x8_t)
+		__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
+		0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddvq_u32 (uint32x4_t __a)
+{
+  return vgetq_lane_u32 ((uint32x4_t)
+		__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
+		0);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vaddvq_u64 (uint64x2_t __a)
+{
+  return vgetq_lane_u64 ((uint64x2_t)
+		__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
+		0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddv_f32 (float32x2_t __a)
+{
+  float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
+  return vget_lane_f32 (__t, 0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddvq_f32 (float32x4_t __a)
+{
+  float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
+  return vgetq_lane_f32 (__t, 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vaddvq_f64 (float64x2_t __a)
+{
+  float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
+  return vgetq_lane_f64 (__t, 0);
+}
+
+/* vbsl  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
+{
+  return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
+{
+  return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
+{
+  return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
+{
+  return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
+{
+  return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
+{
+  return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+/* vaes  */
+
+static __inline uint8x16_t
+vaeseq_u8 (uint8x16_t data, uint8x16_t key)
+{
+  return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
+}
+
+static __inline uint8x16_t
+vaesdq_u8 (uint8x16_t data, uint8x16_t key)
+{
+  return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
+}
+
+static __inline uint8x16_t
+vaesmcq_u8 (uint8x16_t data)
+{
+  return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
+}
+
+static __inline uint8x16_t
+vaesimcq_u8 (uint8x16_t data)
+{
+  return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
+}
+
+#endif
+
+/* vcage  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcages_f32 (float32_t __a, float32_t __b)
+{
+  return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcage_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return vabs_f32 (__a) >= vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return vabsq_f32 (__a) >= vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcaged_f64 (float64_t __a, float64_t __b)
+{
+  return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcageq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return vabsq_f64 (__a) >= vabsq_f64 (__b);
+}
+
+/* vcagt  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcagts_f32 (float32_t __a, float32_t __b)
+{
+  return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return vabs_f32 (__a) > vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return vabsq_f32 (__a) > vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcagtd_f64 (float64_t __a, float64_t __b)
+{
+  return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcagtq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return vabsq_f64 (__a) > vabsq_f64 (__b);
+}
+
+/* vcale  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcale_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return vabs_f32 (__a) <= vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return vabsq_f32 (__a) <= vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcaleq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return vabsq_f64 (__a) <= vabsq_f64 (__b);
+}
+
+/* vcalt  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return vabs_f32 (__a) < vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return vabsq_f32 (__a) < vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcaltq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return vabsq_f64 (__a) < vabsq_f64 (__b);
+}
+
+/* vceq - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+/* vceq - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vceqs_f32 (float32_t __a, float32_t __b)
+{
+  return __a == __b ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqd_f64 (float64_t __a, float64_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
+
+/* vceqz - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceqz_f32 (float32x2_t __a)
+{
+  float32x2_t __b = {0.0f, 0.0f};
+  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqz_f64 (float64x1_t __a)
+{
+  return __a == 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceqz_p8 (poly8x8_t __a)
+{
+  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceqz_s8 (int8x8_t __a)
+{
+  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_s16 (int16x4_t __a)
+{
+  int16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceqz_s32 (int32x2_t __a)
+{
+  int32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqz_s64 (int64x1_t __a)
+{
+  return __a == 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceqz_u8 (uint8x8_t __a)
+{
+  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_u16 (uint16x4_t __a)
+{
+  uint16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceqz_u32 (uint32x2_t __a)
+{
+  uint32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqz_u64 (uint64x1_t __a)
+{
+  return __a == 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqzq_f32 (float32x4_t __a)
+{
+  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+  return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqzq_f64 (float64x2_t __a)
+{
+  float64x2_t __b = {0.0, 0.0};
+  return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqzq_p8 (poly8x16_t __a)
+{
+  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqzq_s8 (int8x16_t __a)
+{
+  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		   0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_s16 (int16x8_t __a)
+{
+  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqzq_s32 (int32x4_t __a)
+{
+  int32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqzq_s64 (int64x2_t __a)
+{
+  int64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqzq_u8 (uint8x16_t __a)
+{
+  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_u16 (uint16x8_t __a)
+{
+  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqzq_u32 (uint32x4_t __a)
+{
+  uint32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqzq_u64 (uint64x2_t __a)
+{
+  uint64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+/* vceqz - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vceqzs_f32 (float32_t __a)
+{
+  return __a == 0.0f ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqzd_s64 (int64x1_t __a)
+{
+  return __a == 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqzd_u64 (int64x1_t __a)
+{
+  return __a == 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqzd_f64 (float64_t __a)
+{
+  return __a == 0.0 ? -1ll : 0ll;
+}
+
+/* vcge - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcge_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcge_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcge_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgeq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgeq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+/* vcge - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcges_f32 (float32_t __a, float32_t __b)
+{
+  return __a >= __b ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcged_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcged_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcged_f64 (float64_t __a, float64_t __b)
+{
+  return __a >= __b ? -1ll : 0ll;
+}
+
+/* vcgez - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgez_f32 (float32x2_t __a)
+{
+  float32x2_t __b = {0.0f, 0.0f};
+  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgez_f64 (float64x1_t __a)
+{
+  return __a >= 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgez_p8 (poly8x8_t __a)
+{
+  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgez_s8 (int8x8_t __a)
+{
+  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_s16 (int16x4_t __a)
+{
+  int16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgez_s32 (int32x2_t __a)
+{
+  int32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgez_s64 (int64x1_t __a)
+{
+  return __a >= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgez_u8 (uint8x8_t __a)
+{
+  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_u16 (uint16x4_t __a)
+{
+  uint16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgez_u32 (uint32x2_t __a)
+{
+  uint32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgez_u64 (uint64x1_t __a)
+{
+  return __a >= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgezq_f32 (float32x4_t __a)
+{
+  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgezq_f64 (float64x2_t __a)
+{
+  float64x2_t __b = {0.0, 0.0};
+  return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgezq_p8 (poly8x16_t __a)
+{
+  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgezq_s8 (int8x16_t __a)
+{
+  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		   0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_s16 (int16x8_t __a)
+{
+  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgezq_s32 (int32x4_t __a)
+{
+  int32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgezq_s64 (int64x2_t __a)
+{
+  int64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgezq_u8 (uint8x16_t __a)
+{
+  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_u16 (uint16x8_t __a)
+{
+  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgezq_u32 (uint32x4_t __a)
+{
+  uint32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgezq_u64 (uint64x2_t __a)
+{
+  uint64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+/* vcgez - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcgezs_f32 (float32_t __a)
+{
+  return __a >= 0.0f ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgezd_s64 (int64x1_t __a)
+{
+  return __a >= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgezd_u64 (int64x1_t __a)
+{
+  return __a >= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgezd_f64 (float64_t __a)
+{
+  return __a >= 0.0 ? -1ll : 0ll;
+}
+
+/* vcgt - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgt_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgt_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+/* vcgt - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcgts_f32 (float32_t __a, float32_t __b)
+{
+  return __a > __b ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtd_f64 (float64_t __a, float64_t __b)
+{
+  return __a > __b ? -1ll : 0ll;
+}
+
+/* vcgtz - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgtz_f32 (float32x2_t __a)
+{
+  float32x2_t __b = {0.0f, 0.0f};
+  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtz_f64 (float64x1_t __a)
+{
+  return __a > 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgtz_p8 (poly8x8_t __a)
+{
+  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgtz_s8 (int8x8_t __a)
+{
+  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_s16 (int16x4_t __a)
+{
+  int16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgtz_s32 (int32x2_t __a)
+{
+  int32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtz_s64 (int64x1_t __a)
+{
+  return __a > 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgtz_u8 (uint8x8_t __a)
+{
+  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_u16 (uint16x4_t __a)
+{
+  uint16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgtz_u32 (uint32x2_t __a)
+{
+  uint32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtz_u64 (uint64x1_t __a)
+{
+  return __a > 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtzq_f32 (float32x4_t __a)
+{
+  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtzq_f64 (float64x2_t __a)
+{
+  float64x2_t __b = {0.0, 0.0};
+  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtzq_p8 (poly8x16_t __a)
+{
+  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtzq_s8 (int8x16_t __a)
+{
+  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		   0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_s16 (int16x8_t __a)
+{
+  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtzq_s32 (int32x4_t __a)
+{
+  int32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtzq_s64 (int64x2_t __a)
+{
+  int64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtzq_u8 (uint8x16_t __a)
+{
+  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_u16 (uint16x8_t __a)
+{
+  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtzq_u32 (uint32x4_t __a)
+{
+  uint32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtzq_u64 (uint64x2_t __a)
+{
+  uint64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+/* vcgtz - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcgtzs_f32 (float32_t __a)
+{
+  return __a > 0.0f ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtzd_s64 (int64x1_t __a)
+{
+  return __a > 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtzd_u64 (int64x1_t __a)
+{
+  return __a > 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtzd_f64 (float64_t __a)
+{
+  return __a > 0.0 ? -1ll : 0ll;
+}
+
+/* vcle - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcle_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
+						 (int8x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcle_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
+						 (int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
+						  (int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
+						  (int32x2_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcle_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcleq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
+						   (int8x16_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcleq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
+						   (int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
+						  (int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
+						  (int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
+						  (int64x2_t) __a);
+}
+
+/* vcle - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcles_f32 (float32_t __a, float32_t __b)
+{
+  return __a <= __b ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcled_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcled_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcled_f64 (float64_t __a, float64_t __b)
+{
+  return __a <= __b ? -1ll : 0ll;
+}
+
+/* vclez - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclez_f32 (float32x2_t __a)
+{
+  float32x2_t __b = {0.0f, 0.0f};
+  return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclez_f64 (float64x1_t __a)
+{
+  return __a <= 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclez_p8 (poly8x8_t __a)
+{
+  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclez_s8 (int8x8_t __a)
+{
+  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclez_s16 (int16x4_t __a)
+{
+  int16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclez_s32 (int32x2_t __a)
+{
+  int32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclez_s64 (int64x1_t __a)
+{
+  return __a <= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclez_u64 (uint64x1_t __a)
+{
+  return __a <= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclezq_f32 (float32x4_t __a)
+{
+  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+  return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vclezq_f64 (float64x2_t __a)
+{
+  float64x2_t __b = {0.0, 0.0};
+  return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclezq_p8 (poly8x16_t __a)
+{
+  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclezq_s8 (int8x16_t __a)
+{
+  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		   0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclezq_s16 (int16x8_t __a)
+{
+  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclezq_s32 (int32x4_t __a)
+{
+  int32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vclezq_s64 (int64x2_t __a)
+{
+  int64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
+}
+
+/* vclez - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vclezs_f32 (float32_t __a)
+{
+  return __a <= 0.0f ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclezd_s64 (int64x1_t __a)
+{
+  return __a <= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclezd_u64 (int64x1_t __a)
+{
+  return __a <= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vclezd_f64 (float64_t __a)
+{
+  return __a <= 0.0 ? -1ll : 0ll;
+}
+
+/* vclt - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclt_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
+						 (int8x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclt_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
+						 (int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
+						  (int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
+						  (int32x2_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclt_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
+						   (int8x16_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
+						   (int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
+						  (int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
+						  (int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
+						  (int64x2_t) __a);
+}
+
+/* vclt - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vclts_f32 (float32_t __a, float32_t __b)
+{
+  return __a < __b ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltd_f64 (float64_t __a, float64_t __b)
+{
+  return __a < __b ? -1ll : 0ll;
+}
+
+/* vcltz - vector.  */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcltz_f32 (float32x2_t __a)
+{
+  float32x2_t __b = {0.0f, 0.0f};
+  return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltz_f64 (float64x1_t __a)
+{
+  return __a < 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcltz_p8 (poly8x8_t __a)
+{
+  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcltz_s8 (int8x8_t __a)
+{
+  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcltz_s16 (int16x4_t __a)
+{
+  int16x4_t __b = {0, 0, 0, 0};
+  return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcltz_s32 (int32x2_t __a)
+{
+  int32x2_t __b = {0, 0};
+  return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltz_s64 (int64x1_t __a)
+{
+  return __a < 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltzq_f32 (float32x4_t __a)
+{
+  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+  return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltzq_f64 (float64x2_t __a)
+{
+  float64x2_t __b = {0.0, 0.0};
+  return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltzq_p8 (poly8x16_t __a)
+{
+  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltzq_s8 (int8x16_t __a)
+{
+  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+		   0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltzq_s16 (int16x8_t __a)
+{
+  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+  return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltzq_s32 (int32x4_t __a)
+{
+  int32x4_t __b = {0, 0, 0, 0};
+  return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltzq_s64 (int64x2_t __a)
+{
+  int64x2_t __b = {0, 0};
+  return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
+}
+
+/* vcltz - scalar.  */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcltzs_f32 (float32_t __a)
+{
+  return __a < 0.0f ? -1 : 0;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltzd_s64 (int64x1_t __a)
+{
+  return __a < 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltzd_u64 (int64x1_t __a)
+{
+  return __a < 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltzd_f64 (float64_t __a)
+{
+  return __a < 0.0 ? -1ll : 0ll;
+}
+
+/* vclz.  */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vclz_s8 (int8x8_t __a)
+{
+  return __builtin_aarch64_clzv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vclz_s16 (int16x4_t __a)
+{
+  return __builtin_aarch64_clzv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vclz_s32 (int32x2_t __a)
+{
+  return __builtin_aarch64_clzv2si (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclz_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclz_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclz_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclzq_s8 (int8x16_t __a)
+{
+  return __builtin_aarch64_clzv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclzq_s16 (int16x8_t __a)
+{
+  return __builtin_aarch64_clzv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclzq_s32 (int32x4_t __a)
+{
+  return __builtin_aarch64_clzv4si (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclzq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclzq_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclzq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
+}
+
+/* vcvt (double -> float).  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_float_truncate_lo_v2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
+}
+
+/* vcvt (float -> double).  */
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcvt_f64_f32 (float32x2_t __a)
+{
+
+  return __builtin_aarch64_float_extend_lo_v2df (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcvt_high_f64_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
+}
+
+/* vcvt  (<u>int -> float)  */
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vcvtd_f64_s64 (int64_t __a)
+{
+  return (float64_t) __a;
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vcvtd_f64_u64 (uint64_t __a)
+{
+  return (float64_t) __a;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vcvts_f32_s32 (int32_t __a)
+{
+  return (float32_t) __a;
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vcvts_f32_u32 (uint32_t __a)
+{
+  return (float32_t) __a;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_s32 (int32x2_t __a)
+{
+  return __builtin_aarch64_floatv2siv2sf (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_u32 (uint32x2_t __a)
+{
+  return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_f32_s32 (int32x4_t __a)
+{
+  return __builtin_aarch64_floatv4siv4sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_f32_u32 (uint32x4_t __a)
+{
+  return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcvtq_f64_s64 (int64x2_t __a)
+{
+  return __builtin_aarch64_floatv2div2df (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcvtq_f64_u64 (uint64x2_t __a)
+{
+  return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
+}
+
+/* vcvt (float -> <u>int)  */
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtd_s64_f64 (float64_t __a)
+{
+  return (int64_t) __a;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtd_u64_f64 (float64_t __a)
+{
+  return (uint64_t) __a;
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvts_s32_f32 (float32_t __a)
+{
+  return (int32_t) __a;
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvts_u32_f32 (float32_t __a)
+{
+  return (uint32_t) __a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvt_s32_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_lbtruncv2sfv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvt_u32_f32 (float32x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lbtruncv4sfv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtq_u32_f32 (float32x4_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcvtq_s64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lbtruncv2dfv2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcvtq_u64_f64 (float64x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
+}
+
+/* vcvta  */
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtad_s64_f64 (float64_t __a)
+{
+  return __builtin_aarch64_lrounddfdi (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtad_u64_f64 (float64_t __a)
+{
+  return __builtin_aarch64_lroundudfdi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtas_s32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lroundsfsi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtas_u32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lroundusfsi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvta_s32_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_lroundv2sfv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvta_u32_f32 (float32x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtaq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lroundv4sfv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtaq_u32_f32 (float32x4_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcvtaq_s64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lroundv2dfv2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcvtaq_u64_f64 (float64x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
+}
+
+/* vcvtm  */
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtmd_s64_f64 (float64_t __a)
+{
+  return __builtin_llfloor (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtmd_u64_f64 (float64_t __a)
+{
+  return __builtin_aarch64_lfloorudfdi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtms_s32_f32 (float32_t __a)
+{
+  return __builtin_ifloorf (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtms_u32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lfloorusfsi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvtm_s32_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_lfloorv2sfv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvtm_u32_f32 (float32x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtmq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lfloorv4sfv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtmq_u32_f32 (float32x4_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcvtmq_s64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lfloorv2dfv2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcvtmq_u64_f64 (float64x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
+}
+
+/* vcvtn  */
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtnd_s64_f64 (float64_t __a)
+{
+  return __builtin_aarch64_lfrintndfdi (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtnd_u64_f64 (float64_t __a)
+{
+  return __builtin_aarch64_lfrintnudfdi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtns_s32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lfrintnsfsi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtns_u32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lfrintnusfsi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvtn_s32_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_lfrintnv2sfv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvtn_u32_f32 (float32x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtnq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lfrintnv4sfv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtnq_u32_f32 (float32x4_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcvtnq_s64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lfrintnv2dfv2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcvtnq_u64_f64 (float64x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
+}
+
+/* vcvtp  */
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtpd_s64_f64 (float64_t __a)
+{
+  return __builtin_llceil (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtpd_u64_f64 (float64_t __a)
+{
+  return __builtin_aarch64_lceiludfdi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtps_s32_f32 (float32_t __a)
+{
+  return __builtin_iceilf (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtps_u32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lceilusfsi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvtp_s32_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_lceilv2sfv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvtp_u32_f32 (float32x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtpq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lceilv4sfv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtpq_u32_f32 (float32x4_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcvtpq_s64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lceilv2dfv2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcvtpq_u64_f64 (float64x2_t __a)
+{
+  /* TODO: This cast should go away when builtins have
+     their correct types.  */
+  return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
+}
+
+/* vdup_n  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_n_f32 (float32_t __a)
+{
+  return (float32x2_t) {__a, __a};
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdup_n_f64 (float64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_n_p8 (poly8_t __a)
+{
+  return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_n_p16 (poly16_t __a)
+{
+  return (poly16x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_n_s8 (int8_t __a)
+{
+  return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_n_s16 (int16_t __a)
+{
+  return (int16x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_n_s32 (int32_t __a)
+{
+  return (int32x2_t) {__a, __a};
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_n_s64 (int64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_n_u8 (uint8_t __a)
+{
+  return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_n_u16 (uint16_t __a)
+{
+  return (uint16x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_n_u32 (uint32_t __a)
+{
+  return (uint32x2_t) {__a, __a};
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_n_u64 (uint64_t __a)
+{
+  return __a;
+}
+
+/* vdupq_n  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_n_f32 (float32_t __a)
+{
+  return (float32x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdupq_n_f64 (float64_t __a)
+{
+  return (float64x2_t) {__a, __a};
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_n_p8 (uint32_t __a)
+{
+  return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		       __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_n_p16 (uint32_t __a)
+{
+  return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_n_s8 (int32_t __a)
+{
+  return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		      __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_n_s16 (int32_t __a)
+{
+  return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_n_s32 (int32_t __a)
+{
+  return (int32x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_n_s64 (int64_t __a)
+{
+  return (int64x2_t) {__a, __a};
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_n_u8 (uint32_t __a)
+{
+  return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		       __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_n_u16 (uint32_t __a)
+{
+  return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_n_u32 (uint32_t __a)
+{
+  return (uint32x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_n_u64 (uint64_t __a)
+{
+  return (uint64x2_t) {__a, __a};
+}
+
+/* vdup_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdup_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_lane_s64 (int64x1_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u64 (__a, __b);
+}
+
+/* vdup_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_laneq_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_laneq_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_laneq_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_laneq_s8 (int8x16_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_laneq_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_laneq_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_laneq_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_laneq_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_laneq_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u64 (__a, __b);
+}
+
+/* vdupq_lane  */
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_s64 (int64x1_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u64 (__a, __b);
+}
+
+/* vdupq_laneq  */
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_laneq_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_laneq_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_laneq_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_laneq_s8 (int8x16_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_laneq_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_laneq_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_laneq_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_laneq_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_laneq_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u64 (__a, __b);
+}
+
+/* vdupb_lane  */
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vdupb_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vdupb_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vdupb_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u8 (__a, __b);
+}
+
+/* vduph_lane  */
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vduph_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vduph_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vduph_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u16 (__a, __b);
+}
+
+/* vdups_lane  */
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vdups_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vdups_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vdups_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u32 (__a, __b);
+}
+
+/* vdupd_lane  */
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __a;
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __a;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __a;
+}
+
+/* vdupb_laneq  */
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vdupb_laneq_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __aarch64_vgetq_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vdupb_laneq_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u8 (__a, __b);
+}
+
+/* vduph_laneq  */
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vduph_laneq_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vduph_laneq_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vduph_laneq_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u16 (__a, __b);
+}
+
+/* vdups_laneq  */
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vdups_laneq_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vdups_laneq_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vdups_laneq_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u32 (__a, __b);
+}
+
+/* vdupd_laneq  */
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vdupd_laneq_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vdupd_laneq_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vdupd_laneq_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u64 (__a, __b);
+}
+
+/* vfma_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (__b,
+				    __aarch64_vdup_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfma_lane_f64 (float64_t __a, float64_t __b,
+	       float64_t __c, const int __lane)
+{
+  return __builtin_fma (__b, __c, __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfmad_lane_f64 (float64_t __a, float64_t __b,
+	        float64_t __c, const int __lane)
+{
+  return __builtin_fma (__b, __c, __a);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vfmas_lane_f32 (float32_t __a, float32_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
+}
+
+/* vfma_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	        float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (__b,
+				    __aarch64_vdup_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfma_laneq_f64 (float64_t __a, float64_t __b,
+	        float64x2_t __c, const int __lane)
+{
+  return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfmad_laneq_f64 (float64_t __a, float64_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vfmas_laneq_f32 (float32_t __a, float32_t __b,
+		 float32x4_t __c, const int __lane)
+{
+  return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
+}
+
+/* vfmaq_lane  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (__b,
+				    __aarch64_vdupq_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
+	        float64_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
+}
+
+/* vfmaq_laneq  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+	         float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (__b,
+				    __aarch64_vdupq_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (__b,
+				    __aarch64_vdupq_laneq_f64 (__c, __lane),
+				    __a);
+}
+
+/* vfms_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (-__b,
+				    __aarch64_vdup_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfms_lane_f64 (float64_t __a, float64_t __b,
+	       float64_t __c, const int __lane)
+{
+  return __builtin_fma (-__b, __c, __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfmsd_lane_f64 (float64_t __a, float64_t __b,
+	        float64_t __c, const int __lane)
+{
+  return __builtin_fma (-__b, __c, __a);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vfmss_lane_f32 (float32_t __a, float32_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
+}
+
+/* vfms_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	        float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (-__b,
+				    __aarch64_vdup_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfms_laneq_f64 (float64_t __a, float64_t __b,
+	        float64x2_t __c, const int __lane)
+{
+  return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vfmsd_laneq_f64 (float64_t __a, float64_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vfmss_laneq_f32 (float32_t __a, float32_t __b,
+		 float32x4_t __c, const int __lane)
+{
+  return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
+}
+
+/* vfmsq_lane  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (-__b,
+				    __aarch64_vdupq_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
+	        float64_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
+}
+
+/* vfmsq_laneq  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+	         float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (-__b,
+				    __aarch64_vdupq_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (-__b,
+				    __aarch64_vdupq_laneq_f64 (__c, __lane),
+				    __a);
+}
+
+/* vld1 */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_f32 (const float32_t *a)
+{
+  return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vld1_f64 (const float64_t *a)
+{
+  return *a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_p8 (const poly8_t *a)
+{
+  return (poly8x8_t)
+    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_p16 (const poly16_t *a)
+{
+  return (poly16x4_t)
+    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_s8 (const int8_t *a)
+{
+  return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_s16 (const int16_t *a)
+{
+  return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_s32 (const int32_t *a)
+{
+  return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_s64 (const int64_t *a)
+{
+  return *a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_u8 (const uint8_t *a)
+{
+  return (uint8x8_t)
+    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_u16 (const uint16_t *a)
+{
+  return (uint16x4_t)
+    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_u32 (const uint32_t *a)
+{
+  return (uint32x2_t)
+    __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_u64 (const uint64_t *a)
+{
+  return *a;
+}
+
+/* vld1q */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_f32 (const float32_t *a)
+{
+  return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vld1q_f64 (const float64_t *a)
+{
+  return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_p8 (const poly8_t *a)
+{
+  return (poly8x16_t)
+    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_p16 (const poly16_t *a)
+{
+  return (poly16x8_t)
+    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_s8 (const int8_t *a)
+{
+  return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_s16 (const int16_t *a)
+{
+  return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_s32 (const int32_t *a)
+{
+  return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_s64 (const int64_t *a)
+{
+  return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_u8 (const uint8_t *a)
+{
+  return (uint8x16_t)
+    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_u16 (const uint16_t *a)
+{
+  return (uint16x8_t)
+    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_u32 (const uint32_t *a)
+{
+  return (uint32x4_t)
+    __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_u64 (const uint64_t *a)
+{
+  return (uint64x2_t)
+    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
+/* vldn */
+
+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
+vld2_s64 (const int64_t * __a)
+{
+  int64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
+vld2_u64 (const uint64_t * __a)
+{
+  uint64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
+vld2_f64 (const float64_t * __a)
+{
+  float64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
+  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_s8 (const int8_t * __a)
+{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_p8 (const poly8_t * __a)
+{
+  poly8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_s16 (const int16_t * __a)
+{
+  int16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_p16 (const poly16_t * __a)
+{
+  poly16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_s32 (const int32_t * __a)
+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_u8 (const uint8_t * __a)
+{
+  uint8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_u16 (const uint16_t * __a)
+{
+  uint16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_u32 (const uint32_t * __a)
+{
+  uint32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_f32 (const float32_t * __a)
+{
+  float32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vld2q_s8 (const int8_t * __a)
+{
+  int8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vld2q_p8 (const poly8_t * __a)
+{
+  poly8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vld2q_s16 (const int16_t * __a)
+{
+  int16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vld2q_p16 (const poly16_t * __a)
+{
+  poly16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vld2q_s32 (const int32_t * __a)
+{
+  int32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
+vld2q_s64 (const int64_t * __a)
+{
+  int64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vld2q_u8 (const uint8_t * __a)
+{
+  uint8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vld2q_u16 (const uint16_t * __a)
+{
+  uint16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vld2q_u32 (const uint32_t * __a)
+{
+  uint32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
+vld2q_u64 (const uint64_t * __a)
+{
+  uint64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vld2q_f32 (const float32_t * __a)
+{
+  float32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
+vld2q_f64 (const float64_t * __a)
+{
+  float64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
+vld3_s64 (const int64_t * __a)
+{
+  int64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
+vld3_u64 (const uint64_t * __a)
+{
+  uint64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
+vld3_f64 (const float64_t * __a)
+{
+  float64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
+  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
+  ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_s8 (const int8_t * __a)
+{
+  int8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_p8 (const poly8_t * __a)
+{
+  poly8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_s16 (const int16_t * __a)
+{
+  int16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_p16 (const poly16_t * __a)
+{
+  poly16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_s32 (const int32_t * __a)
+{
+  int32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_u8 (const uint8_t * __a)
+{
+  uint8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_u16 (const uint16_t * __a)
+{
+  uint16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_u32 (const uint32_t * __a)
+{
+  uint32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_f32 (const float32_t * __a)
+{
+  float32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
+vld3q_s8 (const int8_t * __a)
+{
+  int8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
+vld3q_p8 (const poly8_t * __a)
+{
+  poly8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
+vld3q_s16 (const int16_t * __a)
+{
+  int16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
+vld3q_p16 (const poly16_t * __a)
+{
+  poly16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
+vld3q_s32 (const int32_t * __a)
+{
+  int32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
+vld3q_s64 (const int64_t * __a)
+{
+  int64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
+vld3q_u8 (const uint8_t * __a)
+{
+  uint8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
+vld3q_u16 (const uint16_t * __a)
+{
+  uint16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
+vld3q_u32 (const uint32_t * __a)
+{
+  uint32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
+vld3q_u64 (const uint64_t * __a)
+{
+  uint64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
+vld3q_f32 (const float32_t * __a)
+{
+  float32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
+vld3q_f64 (const float64_t * __a)
+{
+  float64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
+vld4_s64 (const int64_t * __a)
+{
+  int64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
+vld4_u64 (const uint64_t * __a)
+{
+  uint64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
+vld4_f64 (const float64_t * __a)
+{
+  float64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
+  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
+  ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
+  ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_s8 (const int8_t * __a)
+{
+  int8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_p8 (const poly8_t * __a)
+{
+  poly8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_s16 (const int16_t * __a)
+{
+  int16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_p16 (const poly16_t * __a)
+{
+  poly16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_s32 (const int32_t * __a)
+{
+  int32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_u8 (const uint8_t * __a)
+{
+  uint8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_u16 (const uint16_t * __a)
+{
+  uint16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_u32 (const uint32_t * __a)
+{
+  uint32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_f32 (const float32_t * __a)
+{
+  float32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
+  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
+vld4q_s8 (const int8_t * __a)
+{
+  int8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
+vld4q_p8 (const poly8_t * __a)
+{
+  poly8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
+vld4q_s16 (const int16_t * __a)
+{
+  int16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
+vld4q_p16 (const poly16_t * __a)
+{
+  poly16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
+vld4q_s32 (const int32_t * __a)
+{
+  int32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
+vld4q_s64 (const int64_t * __a)
+{
+  int64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
+vld4q_u8 (const uint8_t * __a)
+{
+  uint8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
+vld4q_u16 (const uint16_t * __a)
+{
+  uint16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
+vld4q_u32 (const uint32_t * __a)
+{
+  uint32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
+vld4q_u64 (const uint64_t * __a)
+{
+  uint64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
+vld4q_f32 (const float32_t * __a)
+{
+  float32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
+  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
+vld4q_f64 (const float64_t * __a)
+{
+  float64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
+  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
+  return ret;
+}
+
+/* vmax */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmax_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_smax_nanv2sf (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmax_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_smaxv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmax_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_smaxv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmax_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_smaxv2si (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmax_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmax_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmax_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmaxq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_smax_nanv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmaxq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_smax_nanv2df (__a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmaxq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_smaxv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmaxq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_smaxv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmaxq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_smaxv4si (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+/* vmaxnm  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_smaxv2sf (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_smaxv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_smaxv2df (__a, __b);
+}
+
+/* vmaxv  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmaxv_f32 (float32x2_t __a)
+{
+  return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
+			0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vmaxv_s8 (int8x8_t __a)
+{
+  return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vmaxv_s16 (int16x4_t __a)
+{
+  return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vmaxv_s32 (int32x2_t __a)
+{
+  return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vmaxv_u8 (uint8x8_t __a)
+{
+  return vget_lane_u8 ((uint8x8_t)
+		__builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
+		0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vmaxv_u16 (uint16x4_t __a)
+{
+  return vget_lane_u16 ((uint16x4_t)
+		__builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
+		0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vmaxv_u32 (uint32x2_t __a)
+{
+  return vget_lane_u32 ((uint32x2_t)
+		__builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
+		0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmaxvq_f32 (float32x4_t __a)
+{
+  return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
+			 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmaxvq_f64 (float64x2_t __a)
+{
+  return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
+			 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vmaxvq_s8 (int8x16_t __a)
+{
+  return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vmaxvq_s16 (int16x8_t __a)
+{
+  return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vmaxvq_s32 (int32x4_t __a)
+{
+  return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vmaxvq_u8 (uint8x16_t __a)
+{
+  return vgetq_lane_u8 ((uint8x16_t)
+		__builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
+		0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vmaxvq_u16 (uint16x8_t __a)
+{
+  return vgetq_lane_u16 ((uint16x8_t)
+		__builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
+		0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vmaxvq_u32 (uint32x4_t __a)
+{
+  return vgetq_lane_u32 ((uint32x4_t)
+		__builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
+		0);
+}
+
+/* vmaxnmv  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmaxnmv_f32 (float32x2_t __a)
+{
+  return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
+			0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmaxnmvq_f32 (float32x4_t __a)
+{
+  return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmaxnmvq_f64 (float64x2_t __a)
+{
+  return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
+}
+
+/* vmin  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmin_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_smin_nanv2sf (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmin_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_sminv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmin_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_sminv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmin_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_sminv2si (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmin_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmin_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmin_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vminq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_smin_nanv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vminq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_smin_nanv2df (__a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vminq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_sminv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vminq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_sminv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vminq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sminv4si (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vminq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vminq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vminq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+/* vminnm  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vminnm_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_sminv2sf (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vminnmq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_sminv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vminnmq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_sminv2df (__a, __b);
+}
+
+/* vminv  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vminv_f32 (float32x2_t __a)
+{
+  return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
+			0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vminv_s8 (int8x8_t __a)
+{
+  return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
+		       0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vminv_s16 (int16x4_t __a)
+{
+  return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vminv_s32 (int32x2_t __a)
+{
+  return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vminv_u8 (uint8x8_t __a)
+{
+  return vget_lane_u8 ((uint8x8_t)
+		__builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
+		0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vminv_u16 (uint16x4_t __a)
+{
+  return vget_lane_u16 ((uint16x4_t)
+		__builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
+		0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vminv_u32 (uint32x2_t __a)
+{
+  return vget_lane_u32 ((uint32x2_t)
+		__builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
+		0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vminvq_f32 (float32x4_t __a)
+{
+  return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
+			 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vminvq_f64 (float64x2_t __a)
+{
+  return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
+			 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vminvq_s8 (int8x16_t __a)
+{
+  return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vminvq_s16 (int16x8_t __a)
+{
+  return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vminvq_s32 (int32x4_t __a)
+{
+  return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vminvq_u8 (uint8x16_t __a)
+{
+  return vgetq_lane_u8 ((uint8x16_t)
+		__builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
+		0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vminvq_u16 (uint16x8_t __a)
+{
+  return vgetq_lane_u16 ((uint16x8_t)
+		__builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
+		0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vminvq_u32 (uint32x4_t __a)
+{
+  return vgetq_lane_u32 ((uint32x4_t)
+		__builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
+		0);
+}
+
+/* vminnmv  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vminnmv_f32 (float32x2_t __a)
+{
+  return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vminnmvq_f32 (float32x4_t __a)
+{
+  return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vminnmvq_f64 (float64x2_t __a)
+{
+  return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
+}
+
+/* vmla */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
+{
+  return a + b * c;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
+{
+  return a + b * c;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+{
+  return a + b * c;
+}
+
+/* vmla_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
+		int16x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
+		int32x2_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
+	       uint32x2_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
+}
+
+/* vmla_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	        float32x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
+		int16x8_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
+		int32x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x8_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
+		uint32x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
+}
+
+/* vmlaq_lane  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
+		float32x2_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
+		int16x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
+		int32x2_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x2_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
+}
+
+  /* vmlaq_laneq  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+		 float32x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
+		int16x8_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
+		int32x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x8_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x4_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
+}
+
+/* vmls  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
+{
+  return a - b * c;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
+{
+  return a - b * c;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+{
+  return a - b * c;
+}
+
+/* vmls_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
+		int16x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
+		int32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
+	       uint32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
+}
+
+/* vmls_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
+		int16x8_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
+		int32x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x8_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
+		uint32x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
+}
+
+/* vmlsq_lane  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
+		float32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
+		int16x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
+		int32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
+}
+
+  /* vmlsq_laneq  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+		float32x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
+		int16x8_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
+		int32x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
+}
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x8_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x4_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
+}
+
+/* vmov_n_  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmov_n_f32 (float32_t __a)
+{
+  return vdup_n_f32 (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmov_n_f64 (float64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmov_n_p8 (poly8_t __a)
+{
+  return vdup_n_p8 (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vmov_n_p16 (poly16_t __a)
+{
+  return vdup_n_p16 (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmov_n_s8 (int8_t __a)
+{
+  return vdup_n_s8 (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmov_n_s16 (int16_t __a)
+{
+  return vdup_n_s16 (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmov_n_s32 (int32_t __a)
+{
+  return vdup_n_s32 (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vmov_n_s64 (int64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmov_n_u8 (uint8_t __a)
+{
+  return vdup_n_u8 (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmov_n_u16 (uint16_t __a)
+{
+    return vdup_n_u16 (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmov_n_u32 (uint32_t __a)
+{
+   return vdup_n_u32 (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vmov_n_u64 (uint64_t __a)
+{
+   return __a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmovq_n_f32 (float32_t __a)
+{
+  return vdupq_n_f32 (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmovq_n_f64 (float64_t __a)
+{
+  return vdupq_n_f64 (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmovq_n_p8 (poly8_t __a)
+{
+  return vdupq_n_p8 (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmovq_n_p16 (poly16_t __a)
+{
+  return vdupq_n_p16 (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmovq_n_s8 (int8_t __a)
+{
+  return vdupq_n_s8 (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovq_n_s16 (int16_t __a)
+{
+  return vdupq_n_s16 (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovq_n_s32 (int32_t __a)
+{
+  return vdupq_n_s32 (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovq_n_s64 (int64_t __a)
+{
+  return vdupq_n_s64 (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmovq_n_u8 (uint8_t __a)
+{
+  return vdupq_n_u8 (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovq_n_u16 (uint16_t __a)
+{
+  return vdupq_n_u16 (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovq_n_u32 (uint32_t __a)
+{
+  return vdupq_n_u32 (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovq_n_u64 (uint64_t __a)
+{
+  return vdupq_n_u64 (__a);
+}
+
+/* vmul_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u32 (__b, __lane);
+}
+
+/* vmul_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
+}
+
+/* vmulq_lane  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u32 (__b, __lane);
+}
+
+/* vmulq_laneq  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
+}
+
+/* vneg  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vneg_f32 (float32x2_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vneg_f64 (float64x1_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vneg_s8 (int8x8_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vneg_s16 (int16x4_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vneg_s32 (int32x2_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vneg_s64 (int64x1_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vnegq_f32 (float32x4_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vnegq_f64 (float64x2_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vnegq_s8 (int8x16_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vnegq_s16 (int16x8_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vnegq_s32 (int32x4_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vnegq_s64 (int64x2_t __a)
+{
+  return -__a;
+}
+
+/* vqabs */
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqabsq_s64 (int64x2_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqabsb_s8 (int8x1_t __a)
+{
+  return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqabsh_s16 (int16x1_t __a)
+{
+  return (int16x1_t) __builtin_aarch64_sqabshi (__a);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqabss_s32 (int32x1_t __a)
+{
+  return (int32x1_t) __builtin_aarch64_sqabssi (__a);
+}
+
+/* vqadd */
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqaddb_s8 (int8x1_t __a, int8x1_t __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqaddh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqadds_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqaddd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
+}
+
+/* vqdmlal */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+		       int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+			int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
+{
+  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
+  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+		       int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+			int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
+{
+  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
+  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
+{
+  return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+{
+  return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+{
+  return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
+}
+
+/* vqdmlsl */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+		       int const __d)
+{
+  return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+			int const __d)
+{
+  return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
+{
+  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
+  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+		       int const __d)
+{
+  return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+			int const __d)
+{
+  return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
+{
+  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
+  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
+{
+  return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+{
+  return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+{
+  return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
+}
+
+/* vqdmulh */
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
+}
+
+/* vqdmull */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_sqdmullv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_sqdmull2v8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+{
+  return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+{
+  return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
+{
+  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
+  return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
+{
+  return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_sqdmullv2si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sqdmull2v4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+{
+  return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+{
+  return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
+{
+  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
+  return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
+{
+  return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return __builtin_aarch64_sqdmull_nv2si (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
+}
+
+/* vqmovn */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqmovn_s16 (int16x8_t __a)
+{
+  return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqmovn_s32 (int32x4_t __a)
+{
+  return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqmovn_s64 (int64x2_t __a)
+{
+  return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqmovn_u16 (uint16x8_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqmovn_u32 (uint32x4_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqmovn_u64 (uint64x2_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqmovnh_s16 (int16x1_t __a)
+{
+  return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqmovns_s32 (int32x1_t __a)
+{
+  return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqmovnd_s64 (int64x1_t __a)
+{
+  return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqmovnh_u16 (uint16x1_t __a)
+{
+  return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqmovns_u32 (uint32x1_t __a)
+{
+  return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqmovnd_u64 (uint64x1_t __a)
+{
+  return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
+}
+
+/* vqmovun */
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqmovun_s16 (int16x8_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqmovun_s32 (int32x4_t __a)
+{
+  return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqmovun_s64 (int64x2_t __a)
+{
+  return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqmovunh_s16 (int16x1_t __a)
+{
+  return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqmovuns_s32 (int32x1_t __a)
+{
+  return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqmovund_s64 (int64x1_t __a)
+{
+  return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
+}
+
+/* vqneg */
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqnegq_s64 (int64x2_t __a)
+{
+  return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqnegb_s8 (int8x1_t __a)
+{
+  return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqnegh_s16 (int16x1_t __a)
+{
+  return (int16x1_t) __builtin_aarch64_sqneghi (__a);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqnegs_s32 (int32x1_t __a)
+{
+  return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
+}
+
+/* vqrdmulh */
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
+}
+
+/* vqrshl */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqrshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_sqrshlv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_sqrshlv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_sqrshlv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqrshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __builtin_aarch64_sqrshldi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_sqrshlv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_sqrshlv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sqrshlv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __builtin_aarch64_sqrshlv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
+{
+  return __builtin_aarch64_sqrshlqi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return __builtin_aarch64_sqrshlhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqrshls_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return __builtin_aarch64_sqrshlsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqrshld_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __builtin_aarch64_sqrshldi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
+}
+
+/* vqrshrn */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqrshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqrshrnh_n_s16 (int16x1_t __a, const int __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqrshrns_n_s32 (int32x1_t __a, const int __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqrshrnd_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqrshrns_n_u32 (uint32x1_t __a, const int __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
+}
+
+/* vqrshrun */
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshrun_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshrun_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqrshrunh_n_s16 (int16x1_t __a, const int __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqrshruns_n_s32 (int32x1_t __a, const int __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqrshrund_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
+}
+
+/* vqshl */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_sqshlv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_sqshlv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_sqshlv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __builtin_aarch64_sqshldi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_sqshlv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_sqshlv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sqshlv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return __builtin_aarch64_sqshlv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqshlb_s8 (int8x1_t __a, int8x1_t __b)
+{
+  return __builtin_aarch64_sqshlqi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqshlh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return __builtin_aarch64_sqshlhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqshls_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return __builtin_aarch64_sqshlsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshld_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __builtin_aarch64_sqshldi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshl_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshl_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshl_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshl_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshl_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshl_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshl_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqshlq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqshlq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqshlq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqshlq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshlq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshlq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshlq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshlq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqshlb_n_s8 (int8x1_t __a, const int __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqshlh_n_s16 (int16x1_t __a, const int __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqshls_n_s32 (int32x1_t __a, const int __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshld_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqshlb_n_u8 (uint8x1_t __a, const int __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqshlh_n_u16 (uint16x1_t __a, const int __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqshls_n_u32 (uint32x1_t __a, const int __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshld_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
+}
+
+/* vqshlu */
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshlu_n_s8 (int8x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshlu_n_s16 (int16x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshlu_n_s32 (int32x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshlu_n_s64 (int64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshluq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshluq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshluq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshluq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqshlub_n_s8 (int8x1_t __a, const int __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqshluh_n_s16 (int16x1_t __a, const int __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqshlus_n_s32 (int32x1_t __a, const int __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshlud_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
+}
+
+/* vqshrn */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqshrnh_n_s16 (int16x1_t __a, const int __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqshrns_n_s32 (int32x1_t __a, const int __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqshrnd_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqshrnh_n_u16 (uint16x1_t __a, const int __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqshrns_n_u32 (uint32x1_t __a, const int __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqshrnd_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
+}
+
+/* vqshrun */
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshrun_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshrun_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqshrunh_n_s16 (int16x1_t __a, const int __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqshruns_n_s32 (int32x1_t __a, const int __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqshrund_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
+}
+
+/* vqsub */
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vqsubb_s8 (int8x1_t __a, int8x1_t __b)
+{
+  return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vqsubh_s16 (int16x1_t __a, int16x1_t __b)
+{
+  return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vqsubs_s32 (int32x1_t __a, int32x1_t __b)
+{
+  return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqsubd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
+{
+  return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
+{
+  return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
+{
+  return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
+}
+
+/* vrecpe  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vrecpes_f32 (float32_t __a)
+{
+  return __builtin_aarch64_frecpesf (__a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vrecped_f64 (float64_t __a)
+{
+  return __builtin_aarch64_frecpedf (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrecpe_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_frecpev2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrecpeq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_frecpev4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrecpeq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_frecpev2df (__a);
+}
+
+/* vrecps  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vrecpss_f32 (float32_t __a, float32_t __b)
+{
+  return __builtin_aarch64_frecpssf (__a, __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vrecpsd_f64 (float64_t __a, float64_t __b)
+{
+  return __builtin_aarch64_frecpsdf (__a, __b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_frecpsv2sf (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_frecpsv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_frecpsv2df (__a, __b);
+}
+
+/* vrecpx  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vrecpxs_f32 (float32_t __a)
+{
+  return __builtin_aarch64_frecpxsf (__a);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vrecpxd_f64 (float64_t __a)
+{
+  return __builtin_aarch64_frecpxdf (__a);
+}
+
+/* vrnd  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrnd_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_btruncv2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_btruncv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_btruncv2df (__a);
+}
+
+/* vrnda  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrnda_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_roundv2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndaq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_roundv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndaq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_roundv2df (__a);
+}
+
+/* vrndi  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndi_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_nearbyintv2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndiq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_nearbyintv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndiq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_nearbyintv2df (__a);
+}
+
+/* vrndm  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndm_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_floorv2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndmq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_floorv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndmq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_floorv2df (__a);
+}
+
+/* vrndn  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndn_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_frintnv2sf (__a);
+}
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndnq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_frintnv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndnq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_frintnv2df (__a);
+}
+
+/* vrndp  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndp_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_ceilv2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndpq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_ceilv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndpq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_ceilv2df (__a);
+}
+
+/* vrndx  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndx_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_rintv2sf (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndxq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_rintv4sf (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vrndxq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_rintv2df (__a);
+}
+
+/* vrshl */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshld_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
+}
+
+/* vrshr */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshr_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshr_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshr_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshr_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshr_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshr_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshr_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshr_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrshrq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrshrq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrshrq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrshrq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrshrq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrshrq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrshrq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshrd_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshrd_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
+}
+
+/* vrsra */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
+						    (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
+						     (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
+						     (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
+						   (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
+						      (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
+						     (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
+						     (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
+						     (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+/* vsha1  */
+
+static __inline uint32x4_t
+vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+  return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
+}
+static __inline uint32x4_t
+vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+  return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
+}
+static __inline uint32x4_t
+vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+  return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
+}
+
+static __inline uint32_t
+vsha1h_u32 (uint32_t hash_e)
+{
+  return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
+}
+
+static __inline uint32x4_t
+vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
+{
+  return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
+}
+
+static __inline uint32x4_t
+vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
+{
+  return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
+}
+
+static __inline uint32x4_t
+vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
+{
+  return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
+}
+
+static __inline uint32x4_t
+vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
+{
+  return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
+}
+
+static __inline uint32x4_t
+vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
+{
+  return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
+}
+
+static __inline uint32x4_t
+vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
+{
+  return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
+}
+
+static __inline poly128_t
+vmull_p64 (poly64_t a, poly64_t b)
+{
+  return
+    __builtin_aarch64_crypto_pmulldi_ppp (a, b);
+}
+
+static __inline poly128_t
+vmull_high_p64 (poly64x2_t a, poly64x2_t b)
+{
+  return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
+}
+
+#endif
+
+/* vshl */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshl_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshl_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshl_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshl_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshl_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshl_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshl_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshlq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshlq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshlq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshlq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshlq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshlq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshlq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshlq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshld_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshld_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshld_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshld_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshll_high_n_s8 (int8x16_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll2_nv16qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshll_high_n_s16 (int16x8_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll2_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshll_high_n_s32 (int32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll2_nv4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshll_high_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshll_high_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshll_high_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshll_n_s8 (int8x8_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll_nv8qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshll_n_s16 (int16x4_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll_nv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshll_n_s32 (int32x2_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll_nv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshll_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshll_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshll_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
+}
+
+/* vshr */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshr_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshr_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshr_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshr_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshr_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshr_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshr_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshr_n_u64 (uint64x1_t __a, const int __b)
+{
+  return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshrq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshrq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshrq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshrq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshrq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshrq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshrq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshrd_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vshrd_n_u64 (uint64_t __a, const int __b)
+{
+  return __builtin_aarch64_lshr_simddi_uus (__a, __b);
+}
+
+/* vsli */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
+						    (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
+						    (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
+						  (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
+						     (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
+						    (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
+						    (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
+						    (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
+}
+
+/* vsqadd */
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
+						    (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
+						    (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
+						     (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
+						    (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
+						    (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
+						    (int64x2_t) __b);
+}
+
+__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
+vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
+{
+  return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
+}
+
+__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
+vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
+{
+  return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
+}
+
+__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
+{
+  return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
+}
+
+/* vsqrt */
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vsqrt_f32 (float32x2_t a)
+{
+  return __builtin_aarch64_sqrtv2sf (a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vsqrtq_f32 (float32x4_t a)
+{
+  return __builtin_aarch64_sqrtv4sf (a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vsqrtq_f64 (float64x2_t a)
+{
+  return __builtin_aarch64_sqrtv2df (a);
+}
+
+/* vsra */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
+						    (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
+						    (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
+						  (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
+						     (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
+						    (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
+						    (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
+						    (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
+}
+
+/* vsri */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
+						   (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
+						    (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
+						    (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
+						  (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
+						     (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
+						    (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
+						    (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
+						    (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
+}
+
+/* vst1 */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f32 (float32_t *a, float32x2_t b)
+{
+  __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f64 (float64_t *a, float64x1_t b)
+{
+  *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p8 (poly8_t *a, poly8x8_t b)
+{
+  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+			     (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p16 (poly16_t *a, poly16x4_t b)
+{
+  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+			     (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s8 (int8_t *a, int8x8_t b)
+{
+  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s16 (int16_t *a, int16x4_t b)
+{
+  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s32 (int32_t *a, int32x2_t b)
+{
+  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s64 (int64_t *a, int64x1_t b)
+{
+  *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u8 (uint8_t *a, uint8x8_t b)
+{
+  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+			     (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u16 (uint16_t *a, uint16x4_t b)
+{
+  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+			     (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u32 (uint32_t *a, uint32x2_t b)
+{
+  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
+			     (int32x2_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u64 (uint64_t *a, uint64x1_t b)
+{
+  *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f32 (float32_t *a, float32x4_t b)
+{
+  __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f64 (float64_t *a, float64x2_t b)
+{
+  __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
+}
+
+/* vst1q */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p8 (poly8_t *a, poly8x16_t b)
+{
+  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+			      (int8x16_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p16 (poly16_t *a, poly16x8_t b)
+{
+  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+			     (int16x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s8 (int8_t *a, int8x16_t b)
+{
+  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s16 (int16_t *a, int16x8_t b)
+{
+  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s32 (int32_t *a, int32x4_t b)
+{
+  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s64 (int64_t *a, int64x2_t b)
+{
+  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u8 (uint8_t *a, uint8x16_t b)
+{
+  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+			      (int8x16_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u16 (uint16_t *a, uint16x8_t b)
+{
+  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+			     (int16x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u32 (uint32_t *a, uint32x4_t b)
+{
+  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
+			     (int32x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u64 (uint64_t *a, uint64x2_t b)
+{
+  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
+			     (int64x2_t) b);
+}
+
+/* vstn */
+
+__extension__ static __inline void
+vst2_s64 (int64_t * __a, int64x1x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  int64x2x2_t temp;
+  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
+  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void
+vst2_u64 (uint64_t * __a, uint64x1x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  uint64x2x2_t temp;
+  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
+  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void
+vst2_f64 (float64_t * __a, float64x1x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  float64x2x2_t temp;
+  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
+  __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
+}
+
+__extension__ static __inline void
+vst2_s8 (int8_t * __a, int8x8x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  int8x16x2_t temp;
+  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p8 (poly8_t * __a, poly8x8x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  poly8x16x2_t temp;
+  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s16 (int16_t * __a, int16x4x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  int16x8x2_t temp;
+  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p16 (poly16_t * __a, poly16x4x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  poly16x8x2_t temp;
+  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s32 (int32_t * __a, int32x2x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  int32x4x2_t temp;
+  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
+  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u8 (uint8_t * __a, uint8x8x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  uint8x16x2_t temp;
+  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u16 (uint16_t * __a, uint16x4x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  uint16x8x2_t temp;
+  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u32 (uint32_t * __a, uint32x2x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  uint32x4x2_t temp;
+  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
+  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f32 (float32_t * __a, float32x2x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  float32x4x2_t temp;
+  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
+  __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s8 (int8_t * __a, int8x16x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s16 (int16_t * __a, int16x8x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s32 (int32_t * __a, int32x4x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
+  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s64 (int64_t * __a, int64x2x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
+  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
+  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
+  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f32 (float32_t * __a, float32x4x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
+  __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f64 (float64_t * __a, float64x2x2_t val)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
+  __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
+}
+
+__extension__ static __inline void
+vst3_s64 (int64_t * __a, int64x1x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  int64x2x3_t temp;
+  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
+  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void
+vst3_u64 (uint64_t * __a, uint64x1x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  uint64x2x3_t temp;
+  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
+  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void
+vst3_f64 (float64_t * __a, float64x1x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  float64x2x3_t temp;
+  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
+  __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
+}
+
+__extension__ static __inline void
+vst3_s8 (int8_t * __a, int8x8x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  int8x16x3_t temp;
+  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p8 (poly8_t * __a, poly8x8x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  poly8x16x3_t temp;
+  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s16 (int16_t * __a, int16x4x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  int16x8x3_t temp;
+  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p16 (poly16_t * __a, poly16x4x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  poly16x8x3_t temp;
+  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s32 (int32_t * __a, int32x2x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  int32x4x3_t temp;
+  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
+  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u8 (uint8_t * __a, uint8x8x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  uint8x16x3_t temp;
+  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u16 (uint16_t * __a, uint16x4x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  uint16x8x3_t temp;
+  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u32 (uint32_t * __a, uint32x2x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  uint32x4x3_t temp;
+  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
+  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f32 (float32_t * __a, float32x2x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  float32x4x3_t temp;
+  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
+  __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s8 (int8_t * __a, int8x16x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s16 (int16_t * __a, int16x8x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s32 (int32_t * __a, int32x4x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
+  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s64 (int64_t * __a, int64x2x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
+  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
+  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
+  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f32 (float32_t * __a, float32x4x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
+  __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f64 (float64_t * __a, float64x2x3_t val)
+{
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
+  __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
+}
+
+__extension__ static __inline void
+vst4_s64 (int64_t * __a, int64x1x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  int64x2x4_t temp;
+  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
+  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void
+vst4_u64 (uint64_t * __a, uint64x1x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  uint64x2x4_t temp;
+  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
+  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void
+vst4_f64 (float64_t * __a, float64x1x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  float64x2x4_t temp;
+  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
+  __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
+}
+
+__extension__ static __inline void
+vst4_s8 (int8_t * __a, int8x8x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  int8x16x4_t temp;
+  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p8 (poly8_t * __a, poly8x8x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  poly8x16x4_t temp;
+  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s16 (int16_t * __a, int16x4x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  int16x8x4_t temp;
+  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p16 (poly16_t * __a, poly16x4x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  poly16x8x4_t temp;
+  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s32 (int32_t * __a, int32x2x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  int32x4x4_t temp;
+  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
+  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u8 (uint8_t * __a, uint8x8x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  uint8x16x4_t temp;
+  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u16 (uint16_t * __a, uint16x4x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  uint16x8x4_t temp;
+  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u32 (uint32_t * __a, uint32x2x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  uint32x4x4_t temp;
+  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
+  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f32 (float32_t * __a, float32x2x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  float32x4x4_t temp;
+  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
+  __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s8 (int8_t * __a, int8x16x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s16 (int16_t * __a, int16x8x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s32 (int32_t * __a, int32x4x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
+  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s64 (int64_t * __a, int64x2x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
+  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
+  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
+  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f32 (float32_t * __a, float32x4x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
+  __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f64 (float64_t * __a, float64x2x4_t val)
+{
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
+  __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
+}
+
+/* vsub */
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsubd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return __a - __b;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return __a - __b;
+}
+
+/* vtbx1  */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
+{
+  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
+			      vmov_n_u8 (8));
+  int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
+
+  return vbsl_s8 (__mask, __tbl, __r);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
+{
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
+  uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
+
+  return vbsl_u8 (__mask, __tbl, __r);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
+{
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
+  poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
+
+  return vbsl_p8 (__mask, __tbl, __r);
+}
+
+/* vtbx3  */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
+{
+  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
+			      vmov_n_u8 (24));
+  int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
+
+  return vbsl_s8 (__mask, __tbl, __r);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
+{
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
+  uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
+
+  return vbsl_u8 (__mask, __tbl, __r);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
+{
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
+  poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
+
+  return vbsl_p8 (__mask, __tbl, __r);
+}
+
+/* vtrn */
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vtrn_f32 (float32x2_t a, float32x2_t b)
+{
+  return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vtrn_p8 (poly8x8_t a, poly8x8_t b)
+{
+  return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vtrn_p16 (poly16x4_t a, poly16x4_t b)
+{
+  return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vtrn_s8 (int8x8_t a, int8x8_t b)
+{
+  return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vtrn_s16 (int16x4_t a, int16x4_t b)
+{
+  return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vtrn_s32 (int32x2_t a, int32x2_t b)
+{
+  return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vtrn_u8 (uint8x8_t a, uint8x8_t b)
+{
+  return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vtrn_u16 (uint16x4_t a, uint16x4_t b)
+{
+  return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vtrn_u32 (uint32x2_t a, uint32x2_t b)
+{
+  return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_f32 (float32x4_t a, float32x4_t b)
+{
+  return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_p8 (poly8x16_t a, poly8x16_t b)
+{
+  return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_p16 (poly16x8_t a, poly16x8_t b)
+{
+  return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_s8 (int8x16_t a, int8x16_t b)
+{
+  return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_s16 (int16x8_t a, int16x8_t b)
+{
+  return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_s32 (int32x4_t a, int32x4_t b)
+{
+  return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_u8 (uint8x16_t a, uint8x16_t b)
+{
+  return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_u16 (uint16x8_t a, uint16x8_t b)
+{
+  return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_u32 (uint32x4_t a, uint32x4_t b)
+{
+  return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
+}
+
+/* vtst */
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtst_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (__a & __b) ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtst_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (__a & __b) ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtstq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtstq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
+						  (int64x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtstd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (__a & __b) ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (__a & __b) ? -1ll : 0ll;
+}
+
+/* vuqadd */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
+{
+  return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
+{
+  return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
+{
+  return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
+{
+  return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
+{
+  return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
+{
+  return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
+{
+  return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
+}
+
+__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
+vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
+{
+  return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
+}
+
+__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
+vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
+{
+  return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
+}
+
+__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
+{
+  return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
+{
+  return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
+}
+
+#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
+  __extension__ static __inline rettype					\
+  __attribute__ ((__always_inline__))					\
+  v ## op ## Q ## _ ## funcsuffix (intype a, intype b)			\
+  {									\
+    return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b),	\
+		      v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)};	\
+  }
+
+#define __INTERLEAVE_LIST(op)					\
+  __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,)	\
+  __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,)		\
+  __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,)		\
+  __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,)		\
+  __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,)		\
+  __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,)		\
+  __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,)		\
+  __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,)		\
+  __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,)		\
+  __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q)	\
+  __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q)		\
+  __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q)	\
+  __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q)		\
+  __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q)		\
+  __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q)		\
+  __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q)		\
+  __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q)	\
+  __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
+
+/* vuzp */
+
+__INTERLEAVE_LIST (uzp)
+
+/* vzip */
+
+__INTERLEAVE_LIST (zip)
+
+#undef __INTERLEAVE_LIST
+#undef __DEFINTERLEAVE
+
+/* End of optimal implementations in approved order.  */
+
+#undef __aarch64_vget_lane_any
+#undef __aarch64_vget_lane_f32
+#undef __aarch64_vget_lane_f64
+#undef __aarch64_vget_lane_p8
+#undef __aarch64_vget_lane_p16
+#undef __aarch64_vget_lane_s8
+#undef __aarch64_vget_lane_s16
+#undef __aarch64_vget_lane_s32
+#undef __aarch64_vget_lane_s64
+#undef __aarch64_vget_lane_u8
+#undef __aarch64_vget_lane_u16
+#undef __aarch64_vget_lane_u32
+#undef __aarch64_vget_lane_u64
+
+#undef __aarch64_vgetq_lane_f32
+#undef __aarch64_vgetq_lane_f64
+#undef __aarch64_vgetq_lane_p8
+#undef __aarch64_vgetq_lane_p16
+#undef __aarch64_vgetq_lane_s8
+#undef __aarch64_vgetq_lane_s16
+#undef __aarch64_vgetq_lane_s32
+#undef __aarch64_vgetq_lane_s64
+#undef __aarch64_vgetq_lane_u8
+#undef __aarch64_vgetq_lane_u16
+#undef __aarch64_vgetq_lane_u32
+#undef __aarch64_vgetq_lane_u64
+
+#undef __aarch64_vdup_lane_any
+#undef __aarch64_vdup_lane_f32
+#undef __aarch64_vdup_lane_f64
+#undef __aarch64_vdup_lane_p8
+#undef __aarch64_vdup_lane_p16
+#undef __aarch64_vdup_lane_s8
+#undef __aarch64_vdup_lane_s16
+#undef __aarch64_vdup_lane_s32
+#undef __aarch64_vdup_lane_s64
+#undef __aarch64_vdup_lane_u8
+#undef __aarch64_vdup_lane_u16
+#undef __aarch64_vdup_lane_u32
+#undef __aarch64_vdup_lane_u64
+#undef __aarch64_vdup_laneq_f32
+#undef __aarch64_vdup_laneq_f64
+#undef __aarch64_vdup_laneq_p8
+#undef __aarch64_vdup_laneq_p16
+#undef __aarch64_vdup_laneq_s8
+#undef __aarch64_vdup_laneq_s16
+#undef __aarch64_vdup_laneq_s32
+#undef __aarch64_vdup_laneq_s64
+#undef __aarch64_vdup_laneq_u8
+#undef __aarch64_vdup_laneq_u16
+#undef __aarch64_vdup_laneq_u32
+#undef __aarch64_vdup_laneq_u64
+#undef __aarch64_vdupq_lane_f32
+#undef __aarch64_vdupq_lane_f64
+#undef __aarch64_vdupq_lane_p8
+#undef __aarch64_vdupq_lane_p16
+#undef __aarch64_vdupq_lane_s8
+#undef __aarch64_vdupq_lane_s16
+#undef __aarch64_vdupq_lane_s32
+#undef __aarch64_vdupq_lane_s64
+#undef __aarch64_vdupq_lane_u8
+#undef __aarch64_vdupq_lane_u16
+#undef __aarch64_vdupq_lane_u32
+#undef __aarch64_vdupq_lane_u64
+#undef __aarch64_vdupq_laneq_f32
+#undef __aarch64_vdupq_laneq_f64
+#undef __aarch64_vdupq_laneq_p8
+#undef __aarch64_vdupq_laneq_p16
+#undef __aarch64_vdupq_laneq_s8
+#undef __aarch64_vdupq_laneq_s16
+#undef __aarch64_vdupq_laneq_s32
+#undef __aarch64_vdupq_laneq_s64
+#undef __aarch64_vdupq_laneq_u8
+#undef __aarch64_vdupq_laneq_u16
+#undef __aarch64_vdupq_laneq_u32
+#undef __aarch64_vdupq_laneq_u64
+
+#endif
diff --git a/gcc-4.9/gcc/config/aarch64/atomics.md b/gcc-4.9/gcc/config/aarch64/atomics.md
new file mode 100644
index 000000000..bffa465de
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/atomics.md
@@ -0,0 +1,382 @@
+;; Machine description for AArch64 processor synchronization primitives.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspecv"
+ [
+    UNSPECV_LX				; Represent a load-exclusive.
+    UNSPECV_SX				; Represent a store-exclusive.
+    UNSPECV_LDA				; Represent an atomic load or load-acquire.
+    UNSPECV_STL				; Represent an atomic store or store-release.
+    UNSPECV_ATOMIC_CMPSW		; Represent an atomic compare swap.
+    UNSPECV_ATOMIC_EXCHG		; Represent an atomic exchange.
+    UNSPECV_ATOMIC_OP			; Represent an atomic operation.
+])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")			;; bool out
+   (match_operand:ALLI 1 "register_operand" "")			;; val out
+   (match_operand:ALLI 2 "aarch64_sync_memory_operand" "")	;; memory
+   (match_operand:ALLI 3 "general_operand" "")			;; expected
+   (match_operand:ALLI 4 "register_operand" "")			;; desired
+   (match_operand:SI 5 "const_int_operand")			;; is_weak
+   (match_operand:SI 6 "const_int_operand")			;; mod_s
+   (match_operand:SI 7 "const_int_operand")]			;; mod_f
+  ""
+  {
+    aarch64_expand_compare_and_swap (operands);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
+    (zero_extend:SI
+      (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:SHORT
+      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
+       (match_operand:SHORT 3 "register_operand" "r")	;; desired
+       (match_operand:SI 4 "const_int_operand")		;; is_weak
+       (match_operand:SI 5 "const_int_operand")		;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_compare_and_swap (operands);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
+    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:GPI
+      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+       (match_operand:GPI 3 "register_operand" "r")		;; desired
+       (match_operand:SI 4 "const_int_operand")		;; is_weak
+       (match_operand:SI 5 "const_int_operand")		;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_compare_and_swap (operands);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_exchange<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")		;; output
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 2 "register_operand" "r")	;; input
+       (match_operand:SI 3 "const_int_operand" "")]		;; model
+      UNSPECV_ATOMIC_EXCHG))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (SET, operands[0], NULL, operands[1],
+			    operands[2], operands[3], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_<atomic_optab><mode>"
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+    (unspec_volatile:ALLI
+      [(atomic_op:ALLI (match_dup 0)
+	(match_operand:ALLI 1 "<atomic_op_operand>" "rn"))
+       (match_operand:SI 2 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+       (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
+			    operands[1], operands[2], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_nand<mode>"
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+    (unspec_volatile:ALLI
+      [(not:ALLI
+	(and:ALLI (match_dup 0)
+	  (match_operand:ALLI 1 "aarch64_logical_operand" "rn")))
+       (match_operand:SI 2 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+     aarch64_split_atomic_op (NOT, NULL, operands[3], operands[0],
+			     operands[1], operands[2], operands[4]);
+     DONE;
+  }
+)
+
+(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(atomic_op:ALLI (match_dup 1)
+	(match_operand:ALLI 2 "<atomic_op_operand>" "rn"))
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (<CODE>, operands[0], operands[4], operands[1],
+			     operands[2], operands[3], operands[5]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_fetch_nand<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(not:ALLI
+	 (and:ALLI (match_dup 1)
+	   (match_operand:ALLI 2 "aarch64_logical_operand" "rn")))
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (NOT, operands[0], operands[4], operands[1],
+			    operands[2], operands[3], operands[5]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (atomic_op:ALLI
+      (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+      (match_operand:ALLI 2 "<atomic_op_operand>" "rn")))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_dup 1) (match_dup 2)
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+    (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (<CODE>, NULL, operands[0], operands[1],
+			     operands[2], operands[3], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_nand_fetch<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (not:ALLI
+      (and:ALLI
+	(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+	(match_operand:ALLI 2 "aarch64_logical_operand" "rn"))))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_dup 1) (match_dup 2)
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (NOT, NULL, operands[0], operands[1],
+			    operands[2], operands[3], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn "atomic_load<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
+       (match_operand:SI 2 "const_int_operand")]			;; model
+      UNSPECV_LDA))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_RELEASE)
+      return "ldr<atomic_sfx>\t%<w>0, %1";
+    else
+      return "ldar<atomic_sfx>\t%<w>0, %1";
+  }
+)
+
+(define_insn "atomic_store<mode>"
+  [(set (match_operand:ALLI 0 "memory_operand" "=Q")
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 1 "general_operand" "rZ")
+       (match_operand:SI 2 "const_int_operand")]			;; model
+      UNSPECV_STL))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_ACQUIRE)
+      return "str<atomic_sfx>\t%<w>1, %0";
+    else
+      return "stlr<atomic_sfx>\t%<w>1, %0";
+  }
+)
+
+(define_insn "aarch64_load_exclusive<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+    (zero_extend:SI
+      (unspec_volatile:SHORT
+	[(match_operand:SHORT 1 "aarch64_sync_memory_operand" "Q")
+	 (match_operand:SI 2 "const_int_operand")]
+	UNSPECV_LX)))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_RELEASE)
+      return "ldxr<atomic_sfx>\t%w0, %1";
+    else
+      return "ldaxr<atomic_sfx>\t%w0, %1";
+  }
+)
+
+(define_insn "aarch64_load_exclusive<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+    (unspec_volatile:GPI
+      [(match_operand:GPI 1 "aarch64_sync_memory_operand" "Q")
+       (match_operand:SI 2 "const_int_operand")]
+      UNSPECV_LX))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_RELEASE)
+      return "ldxr\t%<w>0, %1";
+    else
+      return "ldaxr\t%<w>0, %1";
+  }
+)
+
+(define_insn "aarch64_store_exclusive<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+    (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+   (set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 2 "register_operand" "r")
+       (match_operand:SI 3 "const_int_operand")]
+      UNSPECV_SX))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_ACQUIRE)
+      return "stxr<atomic_sfx>\t%w0, %<w>2, %1";
+    else
+      return "stlxr<atomic_sfx>\t%w0, %<w>2, %1";
+  }
+)
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[0]);
+    if (model != MEMMODEL_RELAXED && model != MEMMODEL_CONSUME)
+      emit_insn (gen_dmb (operands[0]));
+    DONE;
+  }
+)
+
+(define_expand "dmb"
+  [(set (match_dup 1)
+    (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+     UNSPEC_MB))]
+   ""
+   {
+    operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (operands[1]) = 1;
+  }
+)
+
+(define_insn "*dmb"
+  [(set (match_operand:BLK 0 "" "")
+    (unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+     UNSPEC_MB))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[1]);
+    if (model == MEMMODEL_ACQUIRE)
+      return "dmb\\tishld";
+    else
+      return "dmb\\tish";
+  }
+)
diff --git a/gcc-4.9/gcc/config/aarch64/biarchilp32.h b/gcc-4.9/gcc/config/aarch64/biarchilp32.h
new file mode 100644
index 000000000..579673ced
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/biarchilp32.h
@@ -0,0 +1,29 @@
+/* Make configure files to produce biarch compiler defaulting to ilp32 ABI.
+   This file must be included very first, while the OS specific file later
+   to overwrite otherwise wrong defaults.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define AARCH64_ABI_DEFAULT AARCH64_ABI_ILP32
+#define TARGET_DATA_MODEL 2
diff --git a/gcc-4.9/gcc/config/aarch64/biarchlp64.h b/gcc-4.9/gcc/config/aarch64/biarchlp64.h
new file mode 100644
index 000000000..03dd35508
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/biarchlp64.h
@@ -0,0 +1,29 @@
+/* Make configure files to produce biarch compiler defaulting to ilp64 ABI.
+   This file must be included very first, while the OS specific file later
+   to overwrite otherwise wrong defaults.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define AARCH64_ABI_DEFAULT AARCH64_ABI_LP64
+#define TARGET_DATA_MODEL 1
diff --git a/gcc-4.9/gcc/config/aarch64/constraints.md b/gcc-4.9/gcc/config/aarch64/constraints.md
new file mode 100644
index 000000000..12ab570c0
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/constraints.md
@@ -0,0 +1,188 @@
+;; Machine description for AArch64 architecture.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "k" "STACK_REG"
+  "@internal The stack register.")
+
+(define_register_constraint "w" "FP_REGS"
+  "Floating point and SIMD vector registers.")
+
+(define_register_constraint "x" "FP_LO_REGS"
+  "Floating point and SIMD vector registers V0 - V15.")
+
+(define_constraint "I"
+ "A constant that can be used with an ADD operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_uimm12_shift (ival)")))
+
+(define_constraint "J"
+ "A constant that can be used with a SUB operation (once negated)."
+ (and (match_code "const_int")
+      (match_test "aarch64_uimm12_shift (-ival)")))
+
+;; We can't use the mode of a CONST_INT to determine the context in
+;; which it is being used, so we must have a separate constraint for
+;; each context.
+
+(define_constraint "K"
+ "A constant that can be used with a 32-bit logical operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_bitmask_imm (ival, SImode)")))
+
+(define_constraint "L"
+ "A constant that can be used with a 64-bit logical operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_bitmask_imm (ival, DImode)")))
+
+(define_constraint "M"
+ "A constant that can be used with a 32-bit MOV immediate operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_move_imm (ival, SImode)")))
+
+(define_constraint "N"
+ "A constant that can be used with a 64-bit MOV immediate operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_move_imm (ival, DImode)")))
+
+(define_constraint "S"
+  "A constraint that matches an absolute symbolic address."
+  (and (match_code "const,symbol_ref,label_ref")
+       (match_test "aarch64_symbolic_address_p (op)")))
+
+(define_constraint "Y"
+  "Floating point constant zero."
+  (and (match_code "const_double")
+       (match_test "aarch64_float_const_zero_rtx_p (op)")))
+
+(define_constraint "Z"
+  "Integer constant zero."
+  (match_test "op == const0_rtx"))
+
+(define_constraint "Ush"
+  "A constraint that matches an absolute symbolic address high part."
+  (and (match_code "high")
+       (match_test "aarch64_valid_symref (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
+
+(define_constraint "Uss"
+  "@internal
+  A constraint that matches an immediate shift constant in SImode."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival < 32")))
+
+(define_constraint "Usd"
+  "@internal
+  A constraint that matches an immediate shift constant in DImode."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival < 64")))
+
+(define_constraint "UsM"
+  "@internal
+  A constraint that matches the immediate constant -1."
+  (match_test "op == constm1_rtx"))
+
+(define_constraint "Ui1"
+  "@internal
+  A constraint that matches the immediate constant +1."
+  (match_test "op == const1_rtx"))
+
+(define_constraint "Ui3"
+  "@internal
+  A constraint that matches the integers 0...4."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 4")))
+
+(define_constraint "Up3"
+  "@internal
+  A constraint that matches the integers 2^(0...4)."
+  (and (match_code "const_int")
+       (match_test "(unsigned) exact_log2 (ival) <= 4")))
+
+(define_memory_constraint "Q"
+ "A memory address which uses a single base register with no offset."
+ (and (match_code "mem")
+      (match_test "REG_P (XEXP (op, 0))")))
+
+(define_memory_constraint "Ump"
+  "@internal
+  A memory address suitable for a load/store pair operation."
+  (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						  PARALLEL, 1)")))
+
+(define_memory_constraint "Utv"
+  "@internal
+   An address valid for loading/storing opaque structure
+   types wider than TImode."
+  (and (match_code "mem")
+       (match_test "aarch64_simd_mem_operand_p (op)")))
+
+(define_constraint "Ufc"
+  "A floating point constant which can be used with an\
+   FMOV immediate operation."
+  (and (match_code "const_double")
+       (match_test "aarch64_float_const_representable_p (op)")))
+
+(define_constraint "Dn"
+  "@internal
+ A constraint that matches vector of immediates."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
+						 false, NULL)")))
+
+(define_constraint "Dh"
+  "@internal
+ A constraint that matches an immediate operand valid for\
+ AdvSIMD scalar move in HImode."
+ (and (match_code "const_int")
+      (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
+						 HImode)")))
+
+(define_constraint "Dq"
+  "@internal
+ A constraint that matches an immediate operand valid for\
+ AdvSIMD scalar move in QImode."
+ (and (match_code "const_int")
+      (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
+						 QImode)")))
+
+(define_constraint "Dl"
+  "@internal
+ A constraint that matches vector of immediates for left shifts."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op),
+						 true)")))
+
+(define_constraint "Dr"
+  "@internal
+ A constraint that matches vector of immediates for right shifts."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op),
+						 false)")))
+(define_constraint "Dz"
+  "@internal
+ A constraint that matches vector of immediate zero."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_imm_zero_p (op, GET_MODE (op))")))
+
+(define_constraint "Dd"
+  "@internal
+ A constraint that matches an immediate operand valid for AdvSIMD scalar."
+ (and (match_code "const_int")
+      (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
diff --git a/gcc-4.9/gcc/config/aarch64/gentune.sh b/gcc-4.9/gcc/config/aarch64/gentune.sh
new file mode 100644
index 000000000..c0f2e794f
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/gentune.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+#
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+# Contributed by ARM Ltd.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Generate aarch64-tune.md, a file containing the tune attribute from the list of 
+# CPUs in aarch64-cores.def
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically by gentune.sh from aarch64-cores.def"
+
+allcores=`awk -F'[(, 	]+' '/^AARCH64_CORE/ { cores = cores$3"," } END { print cores } ' $1`
+
+echo "(define_attr \"tune\""
+echo "	\"$allcores\"" | sed -e 's/,"$/"/'
+echo "	(const (symbol_ref \"((enum attr_tune) aarch64_tune)\")))"
diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md
new file mode 100644
index 000000000..f1339b8cc
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/iterators.md
@@ -0,0 +1,997 @@
+;; Machine description for AArch64 architecture.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------
+;; Mode Iterators
+;; -------------------------------------------------------------------
+
+
+;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
+(define_mode_iterator GPI [SI DI])
+
+;; Iterator for QI and HI modes
+(define_mode_iterator SHORT [QI HI])
+
+;; Iterator for all integer modes (up to 64-bit)
+(define_mode_iterator ALLI [QI HI SI DI])
+
+;; Iterator scalar modes (up to 64-bit)
+(define_mode_iterator SDQ_I [QI HI SI DI])
+
+;; Iterator for all integer modes that can be extended (up to 64-bit)
+(define_mode_iterator ALLX [QI HI SI])
+
+;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
+(define_mode_iterator GPF [SF DF])
+
+;; Integer vector modes.
+(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
+
+;; Integer vector modes.
+(define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
+
+;; vector and scalar, 64 & 128-bit container, all integer modes
+(define_mode_iterator VSDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI QI HI SI DI])
+
+;; vector and scalar, 64 & 128-bit container: all vector integer modes;
+;; 64-bit scalar integer mode
+(define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])
+
+;; Double vector modes.
+(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+
+;; vector, 64-bit container, all integer modes
+(define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
+
+;; 128 and 64-bit container; 8, 16, 32-bit vector integer modes
+(define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Quad vector modes.
+(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; All vector modes, except double.
+(define_mode_iterator VQ_S [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Vector and scalar, 64 & 128-bit container: all vector integer mode;
+;; 8, 16, 32-bit scalar integer modes
+(define_mode_iterator VSDQ_I_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI QI HI SI])
+
+;; Vector modes for moves.
+(define_mode_iterator VDQM [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; addresses in different modes.  In LP64, only DI will match, while in
+;; ILP32, either can match.
+(define_mode_iterator P [(SI "ptr_mode == SImode || Pmode == SImode")
+			 (DI "ptr_mode == DImode || Pmode == DImode")])
+
+;; This mode iterator allows :PTR to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
+
+;; Vector Float modes.
+(define_mode_iterator VDQF [V2SF V4SF V2DF])
+
+;; Vector single Float modes.
+(define_mode_iterator VDQSF [V2SF V4SF])
+
+;; Modes suitable to use as the return type of a vcond expression.
+(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
+
+;; All Float modes.
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
+
+;; Vector Float modes with 2 elements.
+(define_mode_iterator V2F [V2SF V2DF])
+
+;; All modes.
+(define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
+
+;; All vector modes and DI.
+(define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI])
+
+;; All vector modes and DI and DF.
+(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
+			       V2DI V2SF V4SF V2DF DI DF])
+
+;; Vector modes for Integer reduction across lanes.
+(define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI])
+
+;; Vector modes(except V2DI) for Integer reduction across lanes.
+(define_mode_iterator VDQV_S [V8QI V16QI V4HI V8HI V4SI])
+
+;; All double integer narrow-able modes.
+(define_mode_iterator VDN [V4HI V2SI DI])
+
+;; All quad integer narrow-able modes.
+(define_mode_iterator VQN [V8HI V4SI V2DI])
+
+;; All double integer widen-able modes.
+(define_mode_iterator VDW [V8QI V4HI V2SI])
+
+;; Vector and scalar 128-bit container: narrowable 16, 32, 64-bit integer modes
+(define_mode_iterator VSQN_HSDI [V8HI V4SI V2DI HI SI DI])
+
+;; All quad integer widen-able modes.
+(define_mode_iterator VQW [V16QI V8HI V4SI])
+
+;; Double vector modes for combines.
+(define_mode_iterator VDC [V8QI V4HI V2SI V2SF DI DF])
+
+;; Double vector modes for combines.
+(define_mode_iterator VDIC [V8QI V4HI V2SI])
+
+;; Double vector modes.
+(define_mode_iterator VD_RE [V8QI V4HI V2SI DI DF V2SF])
+
+;; Vector modes except double int.
+(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
+
+;; Vector modes for Q and H types.
+(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
+
+;; Vector modes for H and S types.
+(define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
+
+;; Vector modes for Q, H and S types.
+(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Vector and scalar integer modes for H and S
+(define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI])
+
+;; Vector and scalar 64-bit container: 16, 32-bit integer modes
+(define_mode_iterator VSD_HSI [V4HI V2SI HI SI])
+
+;; Vector 64-bit container: 16, 32-bit integer modes
+(define_mode_iterator VD_HSI [V4HI V2SI])
+
+;; Scalar 64-bit container: 16, 32-bit integer modes
+(define_mode_iterator SD_HSI [HI SI])
+
+;; Vector 64-bit container: 16, 32-bit integer modes
+(define_mode_iterator VQ_HSI [V8HI V4SI])
+
+;; All byte modes.
+(define_mode_iterator VB [V8QI V16QI])
+
+(define_mode_iterator TX [TI TF])
+
+;; Opaque structure modes.
+(define_mode_iterator VSTRUCT [OI CI XI])
+
+;; Double scalar modes
+(define_mode_iterator DX [DI DF])
+
+;; Modes available for <f>mul lane operations.
+(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
+
+;; Modes available for <f>mul lane operations changing lane count.
+(define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF])
+
+;; ------------------------------------------------------------------
+;; Unspec enumerations for Advance SIMD. These could well go into
+;; aarch64.md but for their use in int_iterators here.
+;; ------------------------------------------------------------------
+
+(define_c_enum "unspec"
+ [
+    UNSPEC_ASHIFT_SIGNED	; Used in aarch-simd.md.
+    UNSPEC_ASHIFT_UNSIGNED	; Used in aarch64-simd.md.
+    UNSPEC_FMAX		; Used in aarch64-simd.md.
+    UNSPEC_FMAXNMV	; Used in aarch64-simd.md.
+    UNSPEC_FMAXV	; Used in aarch64-simd.md.
+    UNSPEC_FMIN		; Used in aarch64-simd.md.
+    UNSPEC_FMINNMV	; Used in aarch64-simd.md.
+    UNSPEC_FMINV	; Used in aarch64-simd.md.
+    UNSPEC_FADDV	; Used in aarch64-simd.md.
+    UNSPEC_SADDV	; Used in aarch64-simd.md.
+    UNSPEC_UADDV	; Used in aarch64-simd.md.
+    UNSPEC_SMAXV	; Used in aarch64-simd.md.
+    UNSPEC_SMINV	; Used in aarch64-simd.md.
+    UNSPEC_UMAXV	; Used in aarch64-simd.md.
+    UNSPEC_UMINV	; Used in aarch64-simd.md.
+    UNSPEC_SHADD	; Used in aarch64-simd.md.
+    UNSPEC_UHADD	; Used in aarch64-simd.md.
+    UNSPEC_SRHADD	; Used in aarch64-simd.md.
+    UNSPEC_URHADD	; Used in aarch64-simd.md.
+    UNSPEC_SHSUB	; Used in aarch64-simd.md.
+    UNSPEC_UHSUB	; Used in aarch64-simd.md.
+    UNSPEC_SRHSUB	; Used in aarch64-simd.md.
+    UNSPEC_URHSUB	; Used in aarch64-simd.md.
+    UNSPEC_ADDHN	; Used in aarch64-simd.md.
+    UNSPEC_RADDHN	; Used in aarch64-simd.md.
+    UNSPEC_SUBHN	; Used in aarch64-simd.md.
+    UNSPEC_RSUBHN	; Used in aarch64-simd.md.
+    UNSPEC_ADDHN2	; Used in aarch64-simd.md.
+    UNSPEC_RADDHN2	; Used in aarch64-simd.md.
+    UNSPEC_SUBHN2	; Used in aarch64-simd.md.
+    UNSPEC_RSUBHN2	; Used in aarch64-simd.md.
+    UNSPEC_SQDMULH	; Used in aarch64-simd.md.
+    UNSPEC_SQRDMULH	; Used in aarch64-simd.md.
+    UNSPEC_PMUL		; Used in aarch64-simd.md.
+    UNSPEC_USQADD	; Used in aarch64-simd.md.
+    UNSPEC_SUQADD	; Used in aarch64-simd.md.
+    UNSPEC_SQXTUN	; Used in aarch64-simd.md.
+    UNSPEC_SQXTN	; Used in aarch64-simd.md.
+    UNSPEC_UQXTN	; Used in aarch64-simd.md.
+    UNSPEC_SSRA		; Used in aarch64-simd.md.
+    UNSPEC_USRA		; Used in aarch64-simd.md.
+    UNSPEC_SRSRA	; Used in aarch64-simd.md.
+    UNSPEC_URSRA	; Used in aarch64-simd.md.
+    UNSPEC_SRSHR	; Used in aarch64-simd.md.
+    UNSPEC_URSHR	; Used in aarch64-simd.md.
+    UNSPEC_SQSHLU	; Used in aarch64-simd.md.
+    UNSPEC_SQSHL	; Used in aarch64-simd.md.
+    UNSPEC_UQSHL	; Used in aarch64-simd.md.
+    UNSPEC_SQSHRUN	; Used in aarch64-simd.md.
+    UNSPEC_SQRSHRUN	; Used in aarch64-simd.md.
+    UNSPEC_SQSHRN	; Used in aarch64-simd.md.
+    UNSPEC_UQSHRN	; Used in aarch64-simd.md.
+    UNSPEC_SQRSHRN	; Used in aarch64-simd.md.
+    UNSPEC_UQRSHRN	; Used in aarch64-simd.md.
+    UNSPEC_SSHL		; Used in aarch64-simd.md.
+    UNSPEC_USHL		; Used in aarch64-simd.md.
+    UNSPEC_SRSHL	; Used in aarch64-simd.md.
+    UNSPEC_URSHL	; Used in aarch64-simd.md.
+    UNSPEC_SQRSHL	; Used in aarch64-simd.md.
+    UNSPEC_UQRSHL	; Used in aarch64-simd.md.
+    UNSPEC_SSLI		; Used in aarch64-simd.md.
+    UNSPEC_USLI		; Used in aarch64-simd.md.
+    UNSPEC_SSRI		; Used in aarch64-simd.md.
+    UNSPEC_USRI		; Used in aarch64-simd.md.
+    UNSPEC_SSHLL	; Used in aarch64-simd.md.
+    UNSPEC_USHLL	; Used in aarch64-simd.md.
+    UNSPEC_ADDP		; Used in aarch64-simd.md.
+    UNSPEC_TBL		; Used in vector permute patterns.
+    UNSPEC_CONCAT	; Used in vector permute patterns.
+    UNSPEC_ZIP1		; Used in vector permute patterns.
+    UNSPEC_ZIP2		; Used in vector permute patterns.
+    UNSPEC_UZP1		; Used in vector permute patterns.
+    UNSPEC_UZP2		; Used in vector permute patterns.
+    UNSPEC_TRN1		; Used in vector permute patterns.
+    UNSPEC_TRN2		; Used in vector permute patterns.
+    UNSPEC_AESE		; Used in aarch64-simd.md.
+    UNSPEC_AESD         ; Used in aarch64-simd.md.
+    UNSPEC_AESMC        ; Used in aarch64-simd.md.
+    UNSPEC_AESIMC       ; Used in aarch64-simd.md.
+    UNSPEC_SHA1C	; Used in aarch64-simd.md.
+    UNSPEC_SHA1M        ; Used in aarch64-simd.md.
+    UNSPEC_SHA1P        ; Used in aarch64-simd.md.
+    UNSPEC_SHA1H        ; Used in aarch64-simd.md.
+    UNSPEC_SHA1SU0      ; Used in aarch64-simd.md.
+    UNSPEC_SHA1SU1      ; Used in aarch64-simd.md.
+    UNSPEC_SHA256H      ; Used in aarch64-simd.md.
+    UNSPEC_SHA256H2     ; Used in aarch64-simd.md.
+    UNSPEC_SHA256SU0    ; Used in aarch64-simd.md.
+    UNSPEC_SHA256SU1    ; Used in aarch64-simd.md.
+    UNSPEC_PMULL        ; Used in aarch64-simd.md.
+    UNSPEC_PMULL2       ; Used in aarch64-simd.md.
+])
+
+;; -------------------------------------------------------------------
+;; Mode attributes
+;; -------------------------------------------------------------------
+
+;; In GPI templates, a string like "%<w>0" will expand to "%w0" in the
+;; 32-bit version and "%x0" in the 64-bit version.
+(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
+
+;; For constraints used in scalar immediate vector moves
+(define_mode_attr hq [(HI "h") (QI "q")])
+
+;; For scalar usage of vector/FP registers
+(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
+		    (SF "s") (DF "d")
+		    (V8QI "") (V16QI "")
+		    (V4HI "") (V8HI "")
+		    (V2SI "") (V4SI  "")
+		    (V2DI "") (V2SF "")
+		    (V4SF "") (V2DF "")])
+
+;; For scalar usage of vector/FP registers, narrowing
+(define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s")
+		    (V8QI "") (V16QI "")
+		    (V4HI "") (V8HI "")
+		    (V2SI "") (V4SI  "")
+		    (V2DI "") (V2SF "")
+		    (V4SF "") (V2DF "")])
+
+;; For scalar usage of vector/FP registers, widening
+(define_mode_attr vw2 [(DI "") (QI "h") (HI "s") (SI "d")
+		    (V8QI "") (V16QI "")
+		    (V4HI "") (V8HI "")
+		    (V2SI "") (V4SI  "")
+		    (V2DI "") (V2SF "")
+		    (V4SF "") (V2DF "")])
+
+;; Register Type Name and Vector Arrangement Specifier for when
+;; we are doing scalar for DI and SIMD for SI (ignoring all but
+;; lane 0).
+(define_mode_attr rtn [(DI "d") (SI "")])
+(define_mode_attr vas [(DI "") (SI ".2s")])
+
+;; Map a floating point mode to the appropriate register name prefix
+(define_mode_attr s [(SF "s") (DF "d")])
+
+;; Give the length suffix letter for a sign- or zero-extension.
+(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+
+;; Give the number of bits in the mode
+(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
+
+;; Give the ordinal of the MSB in the mode
+(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")])
+
+;; Attribute to describe constants acceptable in logical operations
+(define_mode_attr lconst [(SI "K") (DI "L")])
+
+;; Map a mode to a specific constraint character.
+(define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
+
+(define_mode_attr Vtype [(V8QI "8b") (V16QI "16b")
+			 (V4HI "4h") (V8HI  "8h")
+                         (V2SI "2s") (V4SI  "4s")
+                         (DI   "1d") (DF    "1d")
+                         (V2DI "2d") (V2SF "2s")
+			 (V4SF "4s") (V2DF "2d")])
+
+(define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
+			 (V4HI ".4h") (V8HI  ".8h")
+			 (V2SI ".2s") (V4SI  ".4s")
+			 (V2DI ".2d") (V2SF ".2s")
+			 (V4SF ".4s") (V2DF ".2d")
+			 (DI   "")    (SI   "")
+			 (HI   "")    (QI   "")
+			 (TI   "")    (SF   "")
+			 (DF   "")])
+
+;; Register suffix narrowed modes for VQN.
+(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
+			   (V2DI ".2s")
+			   (DI   "")    (SI   "")
+			   (HI   "")])
+
+;; Mode-to-individual element type mapping.
+(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
+			  (V4HI "h") (V8HI  "h")
+                          (V2SI "s") (V4SI  "s")
+			  (V2DI "d") (V2SF  "s")
+			  (V4SF "s") (V2DF  "d")
+			  (SF   "s") (DF  "d")
+			  (QI "b")   (HI "h")
+			  (SI "s")   (DI "d")])
+
+;; Mode-to-bitwise operation type mapping.
+(define_mode_attr Vbtype [(V8QI "8b")  (V16QI "16b")
+			  (V4HI "8b") (V8HI  "16b")
+			  (V2SI "8b") (V4SI  "16b")
+			  (V2DI "16b") (V2SF  "8b")
+			  (V4SF "16b") (V2DF  "16b")
+			  (DI   "8b")  (DF    "8b")])
+
+;; Define element mode for each vector mode.
+(define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
+			(V4HI "HI") (V8HI "HI")
+                        (V2SI "SI") (V4SI "SI")
+                        (DI "DI")   (V2DI "DI")
+                        (V2SF "SF") (V4SF "SF")
+                        (V2DF "DF") (DF "DF")
+			(SI   "SI") (HI   "HI")
+			(QI   "QI")])
+
+;; Define container mode for lane selection.
+(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI")
+			 (V2SI "V2SI") (V4SI "V2SI")
+			 (DI   "DI") (V2DI "DI")
+			 (V2SF "V2SF") (V4SF "V2SF")
+			 (V2DF "DF")])
+
+;; Define container mode for lane selection.
+(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
+			 (V4HI "V8HI") (V8HI "V8HI")
+			 (V2SI "V4SI") (V4SI "V4SI")
+			 (DI   "V2DI") (V2DI "V2DI")
+			 (V2SF "V2SF") (V4SF "V4SF")
+			 (V2DF "V2DF") (SI   "V4SI")
+			 (HI   "V8HI") (QI   "V16QI")])
+
+;; Define container mode for lane selection.
+(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI")
+			(V4HI "V8HI") (V8HI "V8HI")
+			(V2SI "V4SI") (V4SI "V4SI")
+			(DI   "V2DI") (V2DI "V2DI")
+			(V2SF "V4SF") (V4SF "V4SF")
+			(V2DF "V2DF") (SI   "V4SI")
+			(HI   "V8HI") (QI   "V16QI")])
+
+;; Half modes of all vector modes.
+(define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
+			 (V4HI "V2HI")  (V8HI  "V4HI")
+			 (V2SI "SI")    (V4SI  "V2SI")
+			 (V2DI "DI")    (V2SF  "SF")
+			 (V4SF "V2SF")  (V2DF  "DF")])
+
+;; Double modes of vector modes.
+(define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
+			(V2SI "V4SI")  (V2SF "V4SF")
+			(SI   "V2SI")  (DI   "V2DI")
+			(DF   "V2DF")])
+
+;; Double modes of vector modes (lower case).
+(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
+			(V2SI "v4si")  (V2SF "v4sf")
+			(SI   "v2si")  (DI   "v2di")
+			(DF   "v2df")])
+
+;; Narrowed modes for VDN.
+(define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
+			    (DI   "V2SI")])
+
+;; Narrowed double-modes for VQN (Used for XTN).
+(define_mode_attr VNARROWQ [(V8HI "V8QI") (V4SI "V4HI")
+			    (V2DI "V2SI")
+			    (DI	  "SI")	  (SI	"HI")
+			    (HI	  "QI")])
+
+;; Narrowed quad-modes for VQN (Used for XTN2).
+(define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
+			     (V2DI "V4SI")])
+
+;; Register suffix narrowed modes for VQN.
+(define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
+			  (V2DI "2s")])
+
+;; Register suffix narrowed modes for VQN.
+(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
+			   (V2DI "4s")])
+
+;; Widened modes of vector modes.
+(define_mode_attr VWIDE [(V8QI "V8HI") (V4HI "V4SI")
+			 (V2SI "V2DI") (V16QI "V8HI") 
+			 (V8HI "V4SI") (V4SI "V2DI")
+			 (HI "SI")     (SI "DI")]
+
+)
+
+;; Widened mode register suffixes for VDW/VQW.
+(define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s")
+			  (V2SI "2d") (V16QI "8h") 
+			  (V8HI "4s") (V4SI "2d")])
+
+;; Widened mode register suffixes for VDW/VQW.
+(define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s")
+			   (V2SI ".2d") (V16QI ".8h") 
+			   (V8HI ".4s") (V4SI ".2d")
+			   (SI   "")    (HI   "")])
+
+;; Lower part register suffixes for VQW.
+(define_mode_attr Vhalftype [(V16QI "8b") (V8HI "4h")
+			     (V4SI "2s")])
+
+;; Define corresponding core/FP element mode for each vector mode.
+(define_mode_attr vw   [(V8QI "w") (V16QI "w")
+                        (V4HI "w") (V8HI "w")
+                        (V2SI "w") (V4SI "w")
+                        (DI   "x") (V2DI "x")
+                        (V2SF "s") (V4SF "s")
+                        (V2DF "d")])
+
+;; Corresponding core element mode for each vector mode.  This is a
+;; variation on <vw> mapping FP modes to GP regs.
+(define_mode_attr vwcore  [(V8QI "w") (V16QI "w")
+			   (V4HI "w") (V8HI "w")
+			   (V2SI "w") (V4SI "w")
+			   (DI   "x") (V2DI "x")
+			   (V2SF "w") (V4SF "w")
+			   (V2DF "x")])
+
+;; Double vector types for ALLX.
+(define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
+
+;; Mode of result of comparison operations.
+(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
+				(V4HI "V4HI") (V8HI  "V8HI")
+				(V2SI "V2SI") (V4SI  "V4SI")
+				(DI   "DI")   (V2DI  "V2DI")
+				(V2SF "V2SI") (V4SF  "V4SI")
+				(V2DF "V2DI") (DF    "DI")
+				(SF   "SI")])
+
+;; Lower case mode of results of comparison operations.
+(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
+				(V4HI "v4hi") (V8HI  "v8hi")
+				(V2SI "v2si") (V4SI  "v4si")
+				(DI   "di")   (V2DI  "v2di")
+				(V2SF "v2si") (V4SF  "v4si")
+				(V2DF "v2di") (DF    "di")
+				(SF   "si")])
+
+;; Vm for lane instructions is restricted to FP_LO_REGS.
+(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
+		       (V2SI "w") (V4SI "w") (SI "w")])
+
+(define_mode_attr Vendreg [(OI "T") (CI "U") (XI "V")])
+
+(define_mode_attr nregs [(OI "2") (CI "3") (XI "4")])
+
+(define_mode_attr VRL2 [(V8QI "V32QI") (V4HI "V16HI")
+			(V2SI "V8SI")  (V2SF "V8SF")
+			(DI   "V4DI")  (DF   "V4DF")
+			(V16QI "V32QI") (V8HI "V16HI")
+			(V4SI "V8SI")  (V4SF "V8SF")
+			(V2DI "V4DI")  (V2DF "V4DF")])
+
+(define_mode_attr VRL3 [(V8QI "V48QI") (V4HI "V24HI")
+			(V2SI "V12SI")  (V2SF "V12SF")
+			(DI   "V6DI")  (DF   "V6DF")
+			(V16QI "V48QI") (V8HI "V24HI")
+			(V4SI "V12SI")  (V4SF "V12SF")
+			(V2DI "V6DI")  (V2DF "V6DF")])
+
+(define_mode_attr VRL4 [(V8QI "V64QI") (V4HI "V32HI")
+			(V2SI "V16SI")  (V2SF "V16SF")
+			(DI   "V8DI")  (DF   "V8DF")
+			(V16QI "V64QI") (V8HI "V32HI")
+			(V4SI "V16SI")  (V4SF "V16SF")
+			(V2DI "V8DI")  (V2DF "V8DF")])
+
+(define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
+
+;; Mode for atomic operation suffixes
+(define_mode_attr atomic_sfx
+  [(QI "b") (HI "h") (SI "") (DI "")])
+
+(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
+(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
+
+(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
+				(V4HI "V8HI") (V8HI  "V4HI")
+				(V2SI "V4SI") (V4SI  "V2SI")
+				(DI   "V2DI") (V2DI  "DI")
+				(V2SF "V4SF") (V4SF  "V2SF")
+				(DF   "V2DF") (V2DF  "DF")])
+
+(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
+				    (V4HI "to_128") (V8HI  "to_64")
+				    (V2SI "to_128") (V4SI  "to_64")
+				    (DI   "to_128") (V2DI  "to_64")
+				    (V2SF "to_128") (V4SF  "to_64")
+				    (DF   "to_128") (V2DF  "to_64")])
+
+;; For certain vector-by-element multiplication instructions we must
+;; constrain the HI cases to use only V0-V15.  This is covered by
+;; the 'x' constraint.  All other modes may use the 'w' constraint.
+(define_mode_attr h_con [(V2SI "w") (V4SI "w")
+			 (V4HI "x") (V8HI "x")
+			 (V2SF "w") (V4SF "w")
+			 (V2DF "w") (DF "w")])
+
+;; Defined to 'f' for types whose element type is a float type.
+(define_mode_attr f [(V8QI "")  (V16QI "")
+		     (V4HI "")  (V8HI  "")
+		     (V2SI "")  (V4SI  "")
+		     (DI   "")  (V2DI  "")
+		     (V2SF "f") (V4SF  "f")
+		     (V2DF "f") (DF    "f")])
+
+;; Defined to '_fp' for types whose element type is a float type.
+(define_mode_attr fp [(V8QI "")  (V16QI "")
+		      (V4HI "")  (V8HI  "")
+		      (V2SI "")  (V4SI  "")
+		      (DI   "")  (V2DI  "")
+		      (V2SF "_fp") (V4SF  "_fp")
+		      (V2DF "_fp") (DF    "_fp")
+		      (SF "_fp")])
+
+;; Defined to '_q' for 128-bit types.
+(define_mode_attr q [(V8QI "") (V16QI "_q")
+		     (V4HI "") (V8HI  "_q")
+		     (V2SI "") (V4SI  "_q")
+		     (DI   "") (V2DI  "_q")
+		     (V2SF "") (V4SF  "_q")
+			       (V2DF  "_q")
+		     (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
+
+(define_mode_attr vp [(V8QI "v") (V16QI "v")
+		      (V4HI "v") (V8HI  "v")
+		      (V2SI "p") (V4SI  "v")
+		      (V2DI  "p") (V2DF  "p")
+		      (V2SF "p") (V4SF  "v")])
+
+;; -------------------------------------------------------------------
+;; Code Iterators
+;; -------------------------------------------------------------------
+
+;; This code iterator allows the various shifts supported on the core
+(define_code_iterator SHIFT [ashift ashiftrt lshiftrt rotatert])
+
+;; This code iterator allows the shifts supported in arithmetic instructions
+(define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
+
+;; Code iterator for logical operations
+(define_code_iterator LOGICAL [and ior xor])
+
+;; Code iterator for sign/zero extension
+(define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+
+;; All division operations (signed/unsigned)
+(define_code_iterator ANY_DIV [div udiv])
+
+;; Code iterator for sign/zero extraction
+(define_code_iterator ANY_EXTRACT [sign_extract zero_extract])
+
+;; Code iterator for equality comparisons
+(define_code_iterator EQL [eq ne])
+
+;; Code iterator for less-than and greater/equal-to
+(define_code_iterator LTGE [lt ge])
+
+;; Iterator for __sync_<op> operations that where the operation can be
+;; represented directly RTL.  This is all of the sync operations bar
+;; nand.
+(define_code_iterator atomic_op [plus minus ior xor and])
+
+;; Iterator for integer conversions
+(define_code_iterator FIXUORS [fix unsigned_fix])
+
+;; Iterator for float conversions
+(define_code_iterator FLOATUORS [float unsigned_float])
+
+;; Code iterator for variants of vector max and min.
+(define_code_iterator MAXMIN [smax smin umax umin])
+
+(define_code_iterator FMAXMIN [smax smin])
+
+;; Code iterator for variants of vector max and min.
+(define_code_iterator ADDSUB [plus minus])
+
+;; Code iterator for variants of vector saturating binary ops.
+(define_code_iterator BINQOPS [ss_plus us_plus ss_minus us_minus])
+
+;; Code iterator for variants of vector saturating unary ops.
+(define_code_iterator UNQOPS [ss_neg ss_abs])
+
+;; Code iterator for signed variants of vector saturating binary ops.
+(define_code_iterator SBINQOPS [ss_plus ss_minus])
+
+;; Comparison operators for <F>CM.
+(define_code_iterator COMPARISONS [lt le eq ge gt])
+
+;; Unsigned comparison operators.
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
+
+;; Unsigned comparison operators.
+(define_code_iterator FAC_COMPARISONS [lt le ge gt])
+
+;; -------------------------------------------------------------------
+;; Code Attributes
+;; -------------------------------------------------------------------
+;; Map rtl objects to optab names
+(define_code_attr optab [(ashift "ashl")
+			 (ashiftrt "ashr")
+			 (lshiftrt "lshr")
+			 (rotatert "rotr")
+			 (sign_extend "extend")
+			 (zero_extend "zero_extend")
+			 (sign_extract "extv")
+			 (zero_extract "extzv")
+			 (fix "fix")
+			 (unsigned_fix "fixuns")
+			 (float "float")
+			 (unsigned_float "floatuns")
+			 (and "and")
+			 (ior "ior")
+			 (xor "xor")
+			 (not "one_cmpl")
+			 (neg "neg")
+			 (plus "add")
+			 (minus "sub")
+			 (ss_plus "qadd")
+			 (us_plus "qadd")
+			 (ss_minus "qsub")
+			 (us_minus "qsub")
+			 (ss_neg "qneg")
+			 (ss_abs "qabs")
+			 (eq "eq")
+			 (ne "ne")
+			 (lt "lt")
+			 (ge "ge")
+			 (le "le")
+			 (gt "gt")
+			 (ltu "ltu")
+			 (leu "leu")
+			 (geu "geu")
+			 (gtu "gtu")])
+
+;; For comparison operators we use the FCM* and CM* instructions.
+;; As there are no CMLE or CMLT instructions which act on 3 vector
+;; operands, we must use CMGE or CMGT and swap the order of the
+;; source operands.
+
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
+			   (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
+(define_code_attr cmp_1   [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
+			   (ltu "2") (leu "2") (geu "1") (gtu "1")])
+(define_code_attr cmp_2   [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
+			   (ltu "1") (leu "1") (geu "2") (gtu "2")])
+
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
+			   (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
+
+(define_code_attr fix_trunc_optab [(fix "fix_trunc")
+				   (unsigned_fix "fixuns_trunc")])
+
+;; Optab prefix for sign/zero-extending operations
+(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
+			    (div "") (udiv "u")
+			    (fix "") (unsigned_fix "u")
+			    (float "s") (unsigned_float "u")
+			    (ss_plus "s") (us_plus "u")
+			    (ss_minus "s") (us_minus "u")])
+
+;; Similar for the instruction mnemonics
+(define_code_attr shift [(ashift "lsl") (ashiftrt "asr")
+			 (lshiftrt "lsr") (rotatert "ror")])
+
+;; Map shift operators onto underlying bit-field instructions
+(define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx")
+			   (lshiftrt "ubfx") (rotatert "extr")])
+
+;; Logical operator instruction mnemonics
+(define_code_attr logical [(and "and") (ior "orr") (xor "eor")])
+
+;; Similar, but when not(op)
+(define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")])
+
+;; Sign- or zero-extending load
+(define_code_attr ldrxt [(sign_extend "ldrs") (zero_extend "ldr")])
+
+;; Sign- or zero-extending data-op
+(define_code_attr su [(sign_extend "s") (zero_extend "u")
+		      (sign_extract "s") (zero_extract "u")
+		      (fix "s") (unsigned_fix "u")
+		      (div "s") (udiv "u")
+		      (smax "s") (umax "u")
+		      (smin "s") (umin "u")])
+
+;; Emit cbz/cbnz depending on comparison type.
+(define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
+
+;; Emit tbz/tbnz depending on comparison type.
+(define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")])
+
+;; Max/min attributes.
+(define_code_attr maxmin [(smax "max")
+			  (smin "min")
+			  (umax "max")
+			  (umin "min")])
+
+;; MLA/MLS attributes.
+(define_code_attr as [(ss_plus "a") (ss_minus "s")])
+
+;; Atomic operations
+(define_code_attr atomic_optab
+  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+
+(define_code_attr atomic_op_operand
+  [(ior "aarch64_logical_operand")
+   (xor "aarch64_logical_operand")
+   (and "aarch64_logical_operand")
+   (plus "aarch64_plus_operand")
+   (minus "aarch64_plus_operand")])
+
+;; -------------------------------------------------------------------
+;; Int Iterators.
+;; -------------------------------------------------------------------
+(define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
+			      UNSPEC_SMAXV UNSPEC_SMINV])
+
+(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
+			       UNSPEC_FMAXNMV UNSPEC_FMINNMV])
+
+(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
+
+(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
+			      UNSPEC_SRHADD UNSPEC_URHADD
+			      UNSPEC_SHSUB UNSPEC_UHSUB
+			      UNSPEC_SRHSUB UNSPEC_URHSUB])
+
+
+(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
+			       UNSPEC_SUBHN UNSPEC_RSUBHN])
+
+(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
+			        UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
+
+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
+
+(define_int_iterator USSUQADD [UNSPEC_SUQADD UNSPEC_USQADD])
+
+(define_int_iterator SUQMOVN [UNSPEC_SQXTN UNSPEC_UQXTN])
+
+(define_int_iterator VSHL [UNSPEC_SSHL UNSPEC_USHL
+		           UNSPEC_SRSHL UNSPEC_URSHL])
+
+(define_int_iterator VSHLL [UNSPEC_SSHLL UNSPEC_USHLL])
+
+(define_int_iterator VQSHL [UNSPEC_SQSHL UNSPEC_UQSHL
+                            UNSPEC_SQRSHL UNSPEC_UQRSHL])
+
+(define_int_iterator VSRA [UNSPEC_SSRA UNSPEC_USRA
+			     UNSPEC_SRSRA UNSPEC_URSRA])
+
+(define_int_iterator VSLRI [UNSPEC_SSLI UNSPEC_USLI
+			      UNSPEC_SSRI UNSPEC_USRI])
+
+
+(define_int_iterator VRSHR_N [UNSPEC_SRSHR UNSPEC_URSHR])
+
+(define_int_iterator VQSHL_N [UNSPEC_SQSHLU UNSPEC_SQSHL UNSPEC_UQSHL])
+
+(define_int_iterator VQSHRN_N [UNSPEC_SQSHRUN UNSPEC_SQRSHRUN
+                               UNSPEC_SQSHRN UNSPEC_UQSHRN
+                               UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
+
+(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
+			      UNSPEC_TRN1 UNSPEC_TRN2
+			      UNSPEC_UZP1 UNSPEC_UZP2])
+
+(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
+			     UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
+			     UNSPEC_FRINTA])
+
+(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
+			    UNSPEC_FRINTA UNSPEC_FRINTN])
+
+(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
+
+(define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD])
+(define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC])
+
+(define_int_iterator CRYPTO_SHA1 [UNSPEC_SHA1C UNSPEC_SHA1M UNSPEC_SHA1P])
+
+(define_int_iterator CRYPTO_SHA256 [UNSPEC_SHA256H UNSPEC_SHA256H2])
+
+;; -------------------------------------------------------------------
+;; Int Iterators Attributes.
+;; -------------------------------------------------------------------
+(define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
+			      (UNSPEC_UMINV "umin")
+			      (UNSPEC_SMAXV "smax")
+			      (UNSPEC_SMINV "smin")
+			      (UNSPEC_FMAX  "smax_nan")
+			      (UNSPEC_FMAXNMV "smax")
+			      (UNSPEC_FMAXV "smax_nan")
+			      (UNSPEC_FMIN "smin_nan")
+			      (UNSPEC_FMINNMV "smin")
+			      (UNSPEC_FMINV "smin_nan")])
+
+(define_int_attr  maxmin_uns_op [(UNSPEC_UMAXV "umax")
+				 (UNSPEC_UMINV "umin")
+				 (UNSPEC_SMAXV "smax")
+				 (UNSPEC_SMINV "smin")
+				 (UNSPEC_FMAX "fmax")
+				 (UNSPEC_FMAXNMV "fmaxnm")
+				 (UNSPEC_FMAXV "fmax")
+				 (UNSPEC_FMIN "fmin")
+				 (UNSPEC_FMINNMV "fminnm")
+				 (UNSPEC_FMINV "fmin")])
+
+(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
+		      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
+		      (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
+		      (UNSPEC_SRHSUB "sr") (UNSPEC_URHSUB "ur")
+		      (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r")
+		      (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r")
+		      (UNSPEC_ADDHN2 "") (UNSPEC_RADDHN2 "r")
+		      (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
+		      (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
+		      (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
+		      (UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
+		      (UNSPEC_SSLI  "s") (UNSPEC_USLI  "u")
+		      (UNSPEC_SSRI  "s") (UNSPEC_USRI  "u")
+		      (UNSPEC_USRA  "u") (UNSPEC_SSRA  "s")
+		      (UNSPEC_URSRA  "ur") (UNSPEC_SRSRA  "sr")
+		      (UNSPEC_URSHR  "ur") (UNSPEC_SRSHR  "sr")
+		      (UNSPEC_SQSHLU "s") (UNSPEC_SQSHL   "s")
+		      (UNSPEC_UQSHL  "u")
+		      (UNSPEC_SQSHRUN "s") (UNSPEC_SQRSHRUN "s")
+                      (UNSPEC_SQSHRN "s")  (UNSPEC_UQSHRN "u")
+                      (UNSPEC_SQRSHRN "s") (UNSPEC_UQRSHRN "u")
+		      (UNSPEC_USHL  "u")   (UNSPEC_SSHL  "s")
+		      (UNSPEC_USHLL  "u")  (UNSPEC_SSHLL "s")
+		      (UNSPEC_URSHL  "ur") (UNSPEC_SRSHL  "sr")
+		      (UNSPEC_UQRSHL  "u") (UNSPEC_SQRSHL  "s")
+])
+
+(define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
+		    (UNSPEC_SQSHRUN "") (UNSPEC_SQRSHRUN "r")
+                    (UNSPEC_SQSHRN "")  (UNSPEC_UQSHRN "")
+                    (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r")
+                    (UNSPEC_SQSHL   "")  (UNSPEC_UQSHL  "")
+                    (UNSPEC_SQRSHL   "r")(UNSPEC_UQRSHL  "r")
+])
+
+(define_int_attr lr [(UNSPEC_SSLI  "l") (UNSPEC_USLI  "l")
+		     (UNSPEC_SSRI  "r") (UNSPEC_USRI  "r")])
+
+(define_int_attr u [(UNSPEC_SQSHLU "u") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "")
+		    (UNSPEC_SQSHRUN "u") (UNSPEC_SQRSHRUN "u")
+                    (UNSPEC_SQSHRN "")  (UNSPEC_UQSHRN "")
+                    (UNSPEC_SQRSHRN "") (UNSPEC_UQRSHRN "")])
+
+(define_int_attr addsub [(UNSPEC_SHADD "add")
+			 (UNSPEC_UHADD "add")
+			 (UNSPEC_SRHADD "add")
+			 (UNSPEC_URHADD "add")
+			 (UNSPEC_SHSUB "sub")
+			 (UNSPEC_UHSUB "sub")
+			 (UNSPEC_SRHSUB "sub")
+			 (UNSPEC_URHSUB "sub")
+			 (UNSPEC_ADDHN "add")
+			 (UNSPEC_SUBHN "sub")
+			 (UNSPEC_RADDHN "add")
+			 (UNSPEC_RSUBHN "sub")
+			 (UNSPEC_ADDHN2 "add")
+			 (UNSPEC_SUBHN2 "sub")
+			 (UNSPEC_RADDHN2 "add")
+			 (UNSPEC_RSUBHN2 "sub")])
+
+(define_int_attr offsetlr [(UNSPEC_SSLI	"1") (UNSPEC_USLI "1")
+			   (UNSPEC_SSRI	"0") (UNSPEC_USRI "0")])
+
+;; Standard pattern names for floating-point rounding instructions.
+(define_int_attr frint_pattern [(UNSPEC_FRINTZ "btrunc")
+				(UNSPEC_FRINTP "ceil")
+				(UNSPEC_FRINTM "floor")
+				(UNSPEC_FRINTI "nearbyint")
+				(UNSPEC_FRINTX "rint")
+				(UNSPEC_FRINTA "round")
+				(UNSPEC_FRINTN "frintn")])
+
+;; frint suffix for floating-point rounding instructions.
+(define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p")
+			       (UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i")
+			       (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")
+			       (UNSPEC_FRINTN "n")])
+
+(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
+			       (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")
+			       (UNSPEC_FRINTN "frintn")])
+
+(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
+			    (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
+			    (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
+
+(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
+			    (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
+			    (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
+
+(define_int_attr frecp_suffix  [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
+
+(define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")])
+(define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")])
+
+(define_int_attr sha1_op [(UNSPEC_SHA1C "c") (UNSPEC_SHA1P "p")
+			  (UNSPEC_SHA1M "m")])
+
+(define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")])
diff --git a/gcc-4.9/gcc/config/aarch64/predicates.md b/gcc-4.9/gcc/config/aarch64/predicates.md
new file mode 100644
index 000000000..c8e27d871
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/predicates.md
@@ -0,0 +1,302 @@
+;; Machine description for AArch64 architecture.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_special_predicate "cc_register"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) == CC_REGNUM")
+	    (ior (match_test "mode == GET_MODE (op)")
+		 (match_test "mode == VOIDmode
+			      && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))
+)
+
+(define_predicate "aarch64_simd_register"
+  (and (match_code "reg")
+       (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
+            (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS"))))
+
+(define_predicate "aarch64_reg_or_zero"
+  (and (match_code "reg,subreg,const_int")
+       (ior (match_operand 0 "register_operand")
+	    (match_test "op == const0_rtx"))))
+
+(define_predicate "aarch64_reg_or_fp_zero"
+  (and (match_code "reg,subreg,const_double")
+       (ior (match_operand 0 "register_operand")
+	    (match_test "aarch64_float_const_zero_rtx_p (op)"))))
+
+(define_predicate "aarch64_reg_zero_or_m1_or_1"
+  (and (match_code "reg,subreg,const_int")
+       (ior (match_operand 0 "register_operand")
+	    (ior (match_test "op == const0_rtx")
+		 (ior (match_test "op == constm1_rtx")
+		      (match_test "op == const1_rtx"))))))
+
+(define_predicate "aarch64_fp_compare_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_double")
+	    (match_test "aarch64_float_const_zero_rtx_p (op)"))))
+
+(define_predicate "aarch64_plus_immediate"
+  (and (match_code "const_int")
+       (ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
+	    (match_test "aarch64_uimm12_shift (-INTVAL (op))"))))
+
+(define_predicate "aarch64_plus_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_plus_immediate")))
+
+(define_predicate "aarch64_pluslong_immediate"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
+
+(define_predicate "aarch64_pluslong_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_pluslong_immediate")))
+
+(define_predicate "aarch64_logical_immediate"
+  (and (match_code "const_int")
+       (match_test "aarch64_bitmask_imm (INTVAL (op), mode)")))
+
+(define_predicate "aarch64_logical_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_logical_immediate")))
+
+(define_predicate "aarch64_shift_imm_si"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 32")))
+
+(define_predicate "aarch64_shift_imm_di"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 64")))
+
+(define_predicate "aarch64_shift_imm64_di"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 64")))
+
+(define_predicate "aarch64_reg_or_shift_imm_si"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_shift_imm_si")))
+
+(define_predicate "aarch64_reg_or_shift_imm_di"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_shift_imm_di")))
+
+;; The imm3 field is a 3-bit field that only accepts immediates in the
+;; range 0..4.
+(define_predicate "aarch64_imm3"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 4")))
+
+(define_predicate "aarch64_pwr_imm3"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) != 0
+		    && (unsigned) exact_log2 (INTVAL (op)) <= 4")))
+
+(define_predicate "aarch64_pwr_2_si"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) != 0
+		    && (unsigned) exact_log2 (INTVAL (op)) < 32")))
+
+(define_predicate "aarch64_pwr_2_di"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) != 0
+		    && (unsigned) exact_log2 (INTVAL (op)) < 64")))
+
+(define_predicate "aarch64_mem_pair_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
+					       0)")))
+
+(define_predicate "aarch64_valid_symref"
+  (match_code "const, symbol_ref, label_ref")
+{
+  return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR)
+	  != SYMBOL_FORCE_TO_MEM);
+})
+
+(define_predicate "aarch64_tls_ie_symref"
+  (match_code "const, symbol_ref, label_ref")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      op = XEXP (op, 0);
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_predicate "aarch64_tls_le_symref"
+  (match_code "const, symbol_ref, label_ref")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      op = XEXP (op, 0);
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_predicate "aarch64_mov_operand"
+  (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high")
+       (ior (match_operand 0 "register_operand")
+	    (ior (match_operand 0 "memory_operand")
+		 (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)")))))
+
+(define_predicate "aarch64_movti_operand"
+  (and (match_code "reg,subreg,mem,const_int")
+       (ior (match_operand 0 "register_operand")
+	    (ior (match_operand 0 "memory_operand")
+		 (match_operand 0 "const_int_operand")))))
+
+(define_predicate "aarch64_reg_or_imm"
+  (and (match_code "reg,subreg,const_int")
+       (ior (match_operand 0 "register_operand")
+	    (match_operand 0 "const_int_operand"))))
+
+;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ.
+(define_special_predicate "aarch64_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt"))
+
+;; True if the operand is memory reference suitable for a load/store exclusive.
+(define_predicate "aarch64_sync_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_code "reg" "0")))
+
+;; Predicates for parallel expanders based on mode.
+(define_special_predicate "vect_par_cnst_hi_half"
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  int nunits = GET_MODE_NUNITS (mode);
+  int i;
+
+  if (count < 1
+      || count != nunits / 2)
+    return false;
+ 
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  for (i = 0; i < count; i++)
+   {
+     rtx elt = XVECEXP (op, 0, i);
+     int val;
+
+     if (GET_CODE (elt) != CONST_INT)
+       return false;
+
+     val = INTVAL (elt);
+     if (val != (nunits / 2) + i)
+       return false;
+   }
+  return true;
+})
+
+(define_special_predicate "vect_par_cnst_lo_half"
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  int nunits = GET_MODE_NUNITS (mode);
+  int i;
+
+  if (count < 1
+      || count != nunits / 2)
+    return false;
+
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  for (i = 0; i < count; i++)
+   {
+     rtx elt = XVECEXP (op, 0, i);
+     int val;
+
+     if (GET_CODE (elt) != CONST_INT)
+       return false;
+
+     val = INTVAL (elt);
+     if (val != i)
+       return false;
+   }
+  return true;
+})
+
+
+(define_special_predicate "aarch64_simd_lshift_imm"
+  (match_code "const_vector")
+{
+  return aarch64_simd_shift_imm_p (op, mode, true);
+})
+
+(define_special_predicate "aarch64_simd_rshift_imm"
+  (match_code "const_vector")
+{
+  return aarch64_simd_shift_imm_p (op, mode, false);
+})
+
+(define_predicate "aarch64_simd_reg_or_zero"
+  (and (match_code "reg,subreg,const_int,const_vector")
+       (ior (match_operand 0 "register_operand")
+           (ior (match_test "op == const0_rtx")
+                (match_test "aarch64_simd_imm_zero_p (op, mode)")))))
+
+(define_predicate "aarch64_simd_struct_operand"
+  (and (match_code "mem")
+       (match_test "TARGET_SIMD && aarch64_simd_mem_operand_p (op)")))
+
+;; Like general_operand but allow only valid SIMD addressing modes.
+(define_predicate "aarch64_simd_general_operand"
+  (and (match_operand 0 "general_operand")
+       (match_test "!MEM_P (op)
+		    || GET_CODE (XEXP (op, 0)) == POST_INC
+		    || GET_CODE (XEXP (op, 0)) == REG")))
+
+;; Like nonimmediate_operand but allow only valid SIMD addressing modes.
+(define_predicate "aarch64_simd_nonimmediate_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "!MEM_P (op)
+		    || GET_CODE (XEXP (op, 0)) == POST_INC
+		    || GET_CODE (XEXP (op, 0)) == REG")))
+
+(define_special_predicate "aarch64_simd_imm_zero"
+  (match_code "const_vector")
+{
+  return aarch64_simd_imm_zero_p (op, mode);
+})
diff --git a/gcc-4.9/gcc/config/aarch64/t-aarch64 b/gcc-4.9/gcc/config/aarch64/t-aarch64
new file mode 100644
index 000000000..158fbb578
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/t-aarch64
@@ -0,0 +1,45 @@
+# Machine description for AArch64 architecture.
+#  Copyright (C) 2009-2014 Free Software Foundation, Inc.
+#  Contributed by ARM Ltd.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/aarch64/aarch64-cores.def
+OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def
+
+$(srcdir)/config/aarch64/aarch64-tune.md: $(srcdir)/config/aarch64/gentune.sh \
+	$(srcdir)/config/aarch64/aarch64-cores.def
+	$(SHELL) $(srcdir)/config/aarch64/gentune.sh \
+		$(srcdir)/config/aarch64/aarch64-cores.def > \
+		$(srcdir)/config/aarch64/aarch64-tune.md
+
+aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \
+  $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \
+  $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \
+  $(srcdir)/config/aarch64/aarch64-simd-builtins.def
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/aarch64-builtins.c
+
+aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/aarch-common.c
+
+comma=,
+MULTILIB_OPTIONS    = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
+MULTILIB_DIRNAMES   = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
diff --git a/gcc-4.9/gcc/config/aarch64/t-aarch64-linux b/gcc-4.9/gcc/config/aarch64/t-aarch64-linux
new file mode 100644
index 000000000..147452b04
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/t-aarch64-linux
@@ -0,0 +1,31 @@
+# Machine description for AArch64 architecture.
+#  Copyright (C) 2009-2014 Free Software Foundation, Inc.
+#  Contributed by ARM Ltd.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC   = aarch64/lib1funcs.asm
+LIB1ASMFUNCS = _aarch64_sync_cache_range
+
+AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be)
+MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
+MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu)
+
+# Disable the multilib for linux-gnu targets for the time being; focus
+# on the baremetal targets.
+MULTILIB_OPTIONS    =
+MULTILIB_DIRNAMES   =
diff --git a/gcc-4.9/gcc/config/alpha/alpha-modes.def b/gcc-4.9/gcc/config/alpha/alpha-modes.def
new file mode 100644
index 000000000..dbfbed0b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/alpha-modes.def
@@ -0,0 +1,27 @@
+/* Alpha extra machine modes. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point.  This gets reset in alpha_option_override
+   if VAX float format is in use.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODE (INT, QI, 4);     /*                 V4QI */
+VECTOR_MODE (INT, QI, 2);     /*                 V2QI */
diff --git a/gcc-4.9/gcc/config/alpha/alpha-protos.h b/gcc-4.9/gcc/config/alpha/alpha-protos.h
new file mode 100644
index 000000000..753a762a5
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/alpha-protos.h
@@ -0,0 +1,117 @@
+/* Prototypes for alpha.c functions used in the md file & elsewhere.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern int alpha_next_sequence_number;
+
+extern void literal_section (void);
+extern int zap_mask (HOST_WIDE_INT);
+extern int direct_return (void);
+
+extern int alpha_sa_size (void);
+extern HOST_WIDE_INT alpha_initial_elimination_offset (unsigned int,
+						       unsigned int);
+extern void alpha_expand_prologue (void);
+extern void alpha_expand_epilogue (void);
+extern void alpha_output_filename (FILE *, const char *);
+
+extern bool alpha_legitimate_constant_p (enum machine_mode, rtx);
+extern rtx alpha_legitimize_reload_address (rtx, enum machine_mode,
+					    int, int, int);
+
+extern rtx split_small_symbolic_operand (rtx);
+
+extern void get_aligned_mem (rtx, rtx *, rtx *);
+extern rtx get_unaligned_address (rtx);
+extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT);
+extern enum reg_class alpha_preferred_reload_class (rtx, enum reg_class);
+
+extern void alpha_set_memflags (rtx, rtx);
+extern bool alpha_split_const_mov (enum machine_mode, rtx *);
+extern bool alpha_expand_mov (enum machine_mode, rtx *);
+extern bool alpha_expand_mov_nobwx (enum machine_mode, rtx *);
+extern void alpha_expand_movmisalign (enum machine_mode, rtx *);
+extern void alpha_emit_floatuns (rtx[]);
+extern rtx alpha_emit_conditional_move (rtx, enum machine_mode);
+extern void alpha_split_tmode_pair (rtx[], enum machine_mode, bool);
+extern void alpha_split_tfmode_frobsign (rtx[], rtx (*)(rtx, rtx, rtx));
+extern void alpha_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+					 HOST_WIDE_INT, int);
+extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+					  HOST_WIDE_INT);
+extern int alpha_expand_block_move (rtx []);
+extern int alpha_expand_block_clear (rtx []);
+extern rtx alpha_expand_zap_mask (HOST_WIDE_INT);
+extern void alpha_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx),
+					       enum machine_mode,
+					       rtx, rtx, rtx);
+extern void alpha_expand_builtin_establish_vms_condition_handler (rtx, rtx);
+extern void alpha_expand_builtin_revert_vms_condition_handler (rtx);
+
+extern rtx alpha_return_addr (int, rtx);
+extern rtx alpha_gp_save_rtx (void);
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void alpha_initialize_trampoline (rtx, rtx, rtx, int, int, int);
+
+extern rtx alpha_va_arg (tree, tree);
+extern rtx function_value (const_tree, const_tree, enum machine_mode);
+
+extern void alpha_start_function (FILE *, const char *, tree);
+extern void alpha_end_function (FILE *, const char *, tree);
+
+extern int alpha_find_lo_sum_using_gp (rtx);
+
+#ifdef REAL_VALUE_TYPE
+extern int check_float_value (enum machine_mode, REAL_VALUE_TYPE *, int);
+#endif
+
+#ifdef RTX_CODE
+extern void alpha_emit_conditional_branch (rtx[], enum machine_mode);
+extern bool alpha_emit_setcc (rtx[], enum machine_mode);
+extern int alpha_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx);
+extern void alpha_emit_xfloating_arith (enum rtx_code, rtx[]);
+extern void alpha_emit_xfloating_cvt (enum rtx_code, rtx[]);
+extern void alpha_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx,
+				   enum memmodel);
+extern void alpha_split_compare_and_swap (rtx op[]);
+extern void alpha_expand_compare_and_swap_12 (rtx op[]);
+extern void alpha_split_compare_and_swap_12 (rtx op[]);
+extern void alpha_split_atomic_exchange (rtx op[]);
+extern void alpha_expand_atomic_exchange_12 (rtx op[]);
+extern void alpha_split_atomic_exchange_12 (rtx op[]);
+#endif
+
+extern rtx alpha_use_linkage (rtx, bool, bool);
+
+#if TARGET_ABI_OPEN_VMS
+extern enum avms_arg_type alpha_arg_type (enum machine_mode);
+extern rtx alpha_arg_info_reg_val (CUMULATIVE_ARGS);
+extern void avms_asm_output_external (FILE *, tree, const char *);
+extern void vms_output_aligned_decl_common (FILE *, tree, const char *,
+					    unsigned HOST_WIDE_INT,
+					    unsigned int);
+extern HOST_WIDE_INT alpha_vms_initial_elimination_offset (unsigned int,
+							   unsigned int);
+#endif
+
+extern rtx unicosmk_add_call_info_word (rtx);
+
+extern int some_small_symbolic_operand_int (rtx *, void *);
+extern int tls_symbolic_operand_1 (rtx, int, int);
+extern rtx resolve_reload_operand (rtx);
diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c
new file mode 100644
index 000000000..df4cc1b1c
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/alpha.c
@@ -0,0 +1,9898 @@
+/* Subroutines used for code generation on the DEC Alpha.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "common/common-target.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "splay-tree.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-ssa.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-stdarg.h"
+#include "tm-constrs.h"
+#include "df.h"
+#include "libfuncs.h"
+#include "opts.h"
+#include "params.h"
+
+/* Specify which cpu to schedule for.  */
+enum processor_type alpha_tune;
+
+/* Which cpu we're generating code for.  */
+enum processor_type alpha_cpu;
+
+static const char * const alpha_cpu_name[] =
+{
+  "ev4", "ev5", "ev6"
+};
+
+/* Specify how accurate floating-point traps need to be.  */
+
+enum alpha_trap_precision alpha_tp;
+
+/* Specify the floating-point rounding mode.  */
+
+enum alpha_fp_rounding_mode alpha_fprm;
+
+/* Specify which things cause traps.  */
+
+enum alpha_fp_trap_mode alpha_fptm;
+
+/* Nonzero if inside of a function, because the Alpha asm can't
+   handle .files inside of functions.  */
+
+static int inside_function = FALSE;
+
+/* The number of cycles of latency we should assume on memory reads.  */
+
+int alpha_memory_latency = 3;
+
+/* Whether the function needs the GP.  */
+
+static int alpha_function_needs_gp;
+
+/* The assembler name of the current function.  */
+
+static const char *alpha_fnname;
+
+/* The next explicit relocation sequence number.  */
+extern GTY(()) int alpha_next_sequence_number;
+int alpha_next_sequence_number = 1;
+
+/* The literal and gpdisp sequence numbers for this insn, as printed
+   by %# and %* respectively.  */
+extern GTY(()) int alpha_this_literal_sequence_number;
+extern GTY(()) int alpha_this_gpdisp_sequence_number;
+int alpha_this_literal_sequence_number;
+int alpha_this_gpdisp_sequence_number;
+
+/* Costs of various operations on the different architectures.  */
+
+struct alpha_rtx_cost_data
+{
+  unsigned char fp_add;
+  unsigned char fp_mult;
+  unsigned char fp_div_sf;
+  unsigned char fp_div_df;
+  unsigned char int_mult_si;
+  unsigned char int_mult_di;
+  unsigned char int_shift;
+  unsigned char int_cmov;
+  unsigned short int_div;
+};
+
+static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
+{
+  { /* EV4 */
+    COSTS_N_INSNS (6),		/* fp_add */
+    COSTS_N_INSNS (6),		/* fp_mult */
+    COSTS_N_INSNS (34),		/* fp_div_sf */
+    COSTS_N_INSNS (63),		/* fp_div_df */
+    COSTS_N_INSNS (23),		/* int_mult_si */
+    COSTS_N_INSNS (23),		/* int_mult_di */
+    COSTS_N_INSNS (2),		/* int_shift */
+    COSTS_N_INSNS (2),		/* int_cmov */
+    COSTS_N_INSNS (97),		/* int_div */
+  },
+  { /* EV5 */
+    COSTS_N_INSNS (4),		/* fp_add */
+    COSTS_N_INSNS (4),		/* fp_mult */
+    COSTS_N_INSNS (15),		/* fp_div_sf */
+    COSTS_N_INSNS (22),		/* fp_div_df */
+    COSTS_N_INSNS (8),		/* int_mult_si */
+    COSTS_N_INSNS (12),		/* int_mult_di */
+    COSTS_N_INSNS (1) + 1,	/* int_shift */
+    COSTS_N_INSNS (1),		/* int_cmov */
+    COSTS_N_INSNS (83),		/* int_div */
+  },
+  { /* EV6 */
+    COSTS_N_INSNS (4),		/* fp_add */
+    COSTS_N_INSNS (4),		/* fp_mult */
+    COSTS_N_INSNS (12),		/* fp_div_sf */
+    COSTS_N_INSNS (15),		/* fp_div_df */
+    COSTS_N_INSNS (7),		/* int_mult_si */
+    COSTS_N_INSNS (7),		/* int_mult_di */
+    COSTS_N_INSNS (1),		/* int_shift */
+    COSTS_N_INSNS (2),		/* int_cmov */
+    COSTS_N_INSNS (86),		/* int_div */
+  },
+};
+
+/* Similar but tuned for code size instead of execution latency.  The
+   extra +N is fractional cost tuning based on latency.  It's used to
+   encourage use of cheaper insns like shift, but only if there's just
+   one of them.  */
+
+static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
+{
+  COSTS_N_INSNS (1),		/* fp_add */
+  COSTS_N_INSNS (1),		/* fp_mult */
+  COSTS_N_INSNS (1),		/* fp_div_sf */
+  COSTS_N_INSNS (1) + 1,	/* fp_div_df */
+  COSTS_N_INSNS (1) + 1,	/* int_mult_si */
+  COSTS_N_INSNS (1) + 2,	/* int_mult_di */
+  COSTS_N_INSNS (1),		/* int_shift */
+  COSTS_N_INSNS (1),		/* int_cmov */
+  COSTS_N_INSNS (6),		/* int_div */
+};
+
+/* Get the number of args of a function in one of two ways.  */
+#if TARGET_ABI_OPEN_VMS
+#define NUM_ARGS crtl->args.info.num_args
+#else
+#define NUM_ARGS crtl->args.info
+#endif
+
+#define REG_PV 27
+#define REG_RA 26
+
+/* Declarations of static functions.  */
+static struct machine_function *alpha_init_machine_status (void);
+static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
+
+#if TARGET_ABI_OPEN_VMS
+static void alpha_write_linkage (FILE *, const char *);
+static bool vms_valid_pointer_mode (enum machine_mode);
+#else
+#define vms_patch_builtins()  gcc_unreachable()
+#endif
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+alpha_mangle_type (const_tree type)
+{
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* Parse target option strings.  */
+
+static void
+alpha_option_override (void)
+{
+  static const struct cpu_table {
+    const char *const name;
+    const enum processor_type processor;
+    const int flags;
+    const unsigned short line_size; /* in bytes */
+    const unsigned short l1_size;   /* in kb.  */
+    const unsigned short l2_size;   /* in kb.  */
+  } cpu_table[] = {
+    /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
+       EV4/EV45 had 128k to 16M 32-byte direct Bcache.  LCA45
+       had 64k to 8M 8-byte direct Bcache.  */
+    { "ev4",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
+    { "21064",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
+    { "ev45",	PROCESSOR_EV4, 0, 32, 16, 16*1024 },
+
+    /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
+       and 1M to 16M 64 byte L3 (not modeled).
+       PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
+       PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache.  */
+    { "ev5",	PROCESSOR_EV5, 0, 32, 8, 96 },
+    { "21164",	PROCESSOR_EV5, 0, 32, 8, 96 },
+    { "ev56",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
+    { "21164a",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
+    { "pca56",	PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
+    { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
+    { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
+
+    /* EV6 had 64k 64 byte L1, 1M to 16M Bcache.  */
+    { "ev6",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
+    { "21264",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
+    { "ev67",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
+      64, 64, 16*1024 },
+    { "21264a",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
+      64, 64, 16*1024 }
+  };
+
+  int const ct_size = ARRAY_SIZE (cpu_table);
+  int line_size = 0, l1_size = 0, l2_size = 0;
+  int i;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* Default to full IEEE compliance mode for Go language.  */
+  if (strcmp (lang_hooks.name, "GNU Go") == 0
+      && !(target_flags_explicit & MASK_IEEE))
+    target_flags |= MASK_IEEE;
+
+  alpha_fprm = ALPHA_FPRM_NORM;
+  alpha_tp = ALPHA_TP_PROG;
+  alpha_fptm = ALPHA_FPTM_N;
+
+  if (TARGET_IEEE)
+    {
+      alpha_tp = ALPHA_TP_INSN;
+      alpha_fptm = ALPHA_FPTM_SU;
+    }
+  if (TARGET_IEEE_WITH_INEXACT)
+    {
+      alpha_tp = ALPHA_TP_INSN;
+      alpha_fptm = ALPHA_FPTM_SUI;
+    }
+
+  if (alpha_tp_string)
+    {
+      if (! strcmp (alpha_tp_string, "p"))
+	alpha_tp = ALPHA_TP_PROG;
+      else if (! strcmp (alpha_tp_string, "f"))
+	alpha_tp = ALPHA_TP_FUNC;
+      else if (! strcmp (alpha_tp_string, "i"))
+	alpha_tp = ALPHA_TP_INSN;
+      else
+	error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
+    }
+
+  if (alpha_fprm_string)
+    {
+      if (! strcmp (alpha_fprm_string, "n"))
+	alpha_fprm = ALPHA_FPRM_NORM;
+      else if (! strcmp (alpha_fprm_string, "m"))
+	alpha_fprm = ALPHA_FPRM_MINF;
+      else if (! strcmp (alpha_fprm_string, "c"))
+	alpha_fprm = ALPHA_FPRM_CHOP;
+      else if (! strcmp (alpha_fprm_string,"d"))
+	alpha_fprm = ALPHA_FPRM_DYN;
+      else
+	error ("bad value %qs for -mfp-rounding-mode switch",
+	       alpha_fprm_string);
+    }
+
+  if (alpha_fptm_string)
+    {
+      if (strcmp (alpha_fptm_string, "n") == 0)
+	alpha_fptm = ALPHA_FPTM_N;
+      else if (strcmp (alpha_fptm_string, "u") == 0)
+	alpha_fptm = ALPHA_FPTM_U;
+      else if (strcmp (alpha_fptm_string, "su") == 0)
+	alpha_fptm = ALPHA_FPTM_SU;
+      else if (strcmp (alpha_fptm_string, "sui") == 0)
+	alpha_fptm = ALPHA_FPTM_SUI;
+      else
+	error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
+    }
+
+  if (alpha_cpu_string)
+    {
+      for (i = 0; i < ct_size; i++)
+	if (! strcmp (alpha_cpu_string, cpu_table [i].name))
+	  {
+	    alpha_tune = alpha_cpu = cpu_table[i].processor;
+	    line_size = cpu_table[i].line_size;
+	    l1_size = cpu_table[i].l1_size;
+	    l2_size = cpu_table[i].l2_size;
+	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
+	    target_flags |= cpu_table[i].flags;
+	    break;
+	  }
+      if (i == ct_size)
+	error ("bad value %qs for -mcpu switch", alpha_cpu_string);
+    }
+
+  if (alpha_tune_string)
+    {
+      for (i = 0; i < ct_size; i++)
+	if (! strcmp (alpha_tune_string, cpu_table [i].name))
+	  {
+	    alpha_tune = cpu_table[i].processor;
+	    line_size = cpu_table[i].line_size;
+	    l1_size = cpu_table[i].l1_size;
+	    l2_size = cpu_table[i].l2_size;
+	    break;
+	  }
+      if (i == ct_size)
+	error ("bad value %qs for -mtune switch", alpha_tune_string);
+    }
+
+  if (line_size)
+    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+  if (l1_size)
+    maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+  if (l2_size)
+    maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+
+  /* Do some sanity checks on the above options.  */
+
+  if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
+      && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
+    {
+      warning (0, "fp software completion requires -mtrap-precision=i");
+      alpha_tp = ALPHA_TP_INSN;
+    }
+
+  if (alpha_cpu == PROCESSOR_EV6)
+    {
+      /* Except for EV6 pass 1 (not released), we always have precise
+	 arithmetic traps.  Which means we can do software completion
+	 without minding trap shadows.  */
+      alpha_tp = ALPHA_TP_PROG;
+    }
+
+  if (TARGET_FLOAT_VAX)
+    {
+      if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
+	{
+	  warning (0, "rounding mode not supported for VAX floats");
+	  alpha_fprm = ALPHA_FPRM_NORM;
+	}
+      if (alpha_fptm == ALPHA_FPTM_SUI)
+	{
+	  warning (0, "trap mode not supported for VAX floats");
+	  alpha_fptm = ALPHA_FPTM_SU;
+	}
+      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
+	warning (0, "128-bit long double not supported for VAX floats");
+      target_flags &= ~MASK_LONG_DOUBLE_128;
+    }
+
+  {
+    char *end;
+    int lat;
+
+    if (!alpha_mlat_string)
+      alpha_mlat_string = "L1";
+
+    if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
+	&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
+      ;
+    else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
+	     && ISDIGIT ((unsigned char)alpha_mlat_string[1])
+	     && alpha_mlat_string[2] == '\0')
+      {
+	static int const cache_latency[][4] =
+	{
+	  { 3, 30, -1 },	/* ev4 -- Bcache is a guess */
+	  { 2, 12, 38 },	/* ev5 -- Bcache from PC164 LMbench numbers */
+	  { 3, 12, 30 },	/* ev6 -- Bcache from DS20 LMbench.  */
+	};
+
+	lat = alpha_mlat_string[1] - '0';
+	if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
+	  {
+	    warning (0, "L%d cache latency unknown for %s",
+		     lat, alpha_cpu_name[alpha_tune]);
+	    lat = 3;
+	  }
+	else
+	  lat = cache_latency[alpha_tune][lat-1];
+      }
+    else if (! strcmp (alpha_mlat_string, "main"))
+      {
+	/* Most current memories have about 370ns latency.  This is
+	   a reasonable guess for a fast cpu.  */
+	lat = 150;
+      }
+    else
+      {
+	warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
+	lat = 3;
+      }
+
+    alpha_memory_latency = lat;
+  }
+
+  /* Default the definition of "small data" to 8 bytes.  */
+  if (!global_options_set.x_g_switch_value)
+    g_switch_value = 8;
+
+  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
+  if (flag_pic == 1)
+    target_flags |= MASK_SMALL_DATA;
+  else if (flag_pic == 2)
+    target_flags &= ~MASK_SMALL_DATA;
+
+  /* Align labels and loops for optimal branching.  */
+  /* ??? Kludge these by not doing anything if we don't optimize.  */
+  if (optimize > 0)
+    {
+      if (align_loops <= 0)
+	align_loops = 16;
+      if (align_jumps <= 0)
+	align_jumps = 16;
+    }
+  if (align_functions <= 0)
+    align_functions = 16;
+
+  /* Register variables and functions with the garbage collector.  */
+
+  /* Set up function hooks.  */
+  init_machine_status = alpha_init_machine_status;
+
+  /* Tell the compiler when we're using VAX floating point.  */
+  if (TARGET_FLOAT_VAX)
+    {
+      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
+      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
+      REAL_MODE_FORMAT (TFmode) = NULL;
+    }
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+}
+
+/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
+
+int
+zap_mask (HOST_WIDE_INT value)
+{
+  int i;
+
+  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
+       i++, value >>= 8)
+    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
+      return 0;
+
+  return 1;
+}
+
+/* Return true if OP is valid for a particular TLS relocation.
+   We are already guaranteed that OP is a CONST.  */
+
+int
+tls_symbolic_operand_1 (rtx op, int size, int unspec)
+{
+  op = XEXP (op, 0);
+
+  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
+    return 0;
+  op = XVECEXP (op, 0, 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  switch (SYMBOL_REF_TLS_MODEL (op))
+    {
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
+    case TLS_MODEL_INITIAL_EXEC:
+      return unspec == UNSPEC_TPREL && size == 64;
+    case TLS_MODEL_LOCAL_EXEC:
+      return unspec == UNSPEC_TPREL && size == alpha_tls_size;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Used by aligned_memory_operand and unaligned_memory_operand to
+   resolve what reload is going to do with OP if it's a register.  */
+
+rtx
+resolve_reload_operand (rtx op)
+{
+  if (reload_in_progress)
+    {
+      rtx tmp = op;
+      if (GET_CODE (tmp) == SUBREG)
+	tmp = SUBREG_REG (tmp);
+      if (REG_P (tmp)
+	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
+	{
+	  op = reg_equiv_memory_loc (REGNO (tmp));
+	  if (op == 0)
+	    return 0;
+	}
+    }
+  return op;
+}
+
+/* The scalar modes supported differs from the default check-what-c-supports
+   version in that sometimes TFmode is available even when long double
+   indicates only DFmode.  */
+
+static bool
+alpha_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode: /* via optabs.c */
+      return true;
+
+    case SFmode:
+    case DFmode:
+      return true;
+
+    case TFmode:
+      return TARGET_HAS_XFLOATING_LIBS;
+
+    default:
+      return false;
+    }
+}
+
+/* Alpha implements a couple of integer vector mode operations when
+   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
+   which allows the vectorizer to operate on e.g. move instructions,
+   or when expand_vector_operations can do something useful.  */
+
+static bool
+alpha_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
+}
+
+/* Return 1 if this function can directly return via $26.  */
+
+int
+direct_return (void)
+{
+  return (TARGET_ABI_OSF
+	  && reload_completed
+	  && alpha_sa_size () == 0
+	  && get_frame_size () == 0
+	  && crtl->outgoing_args_size == 0
+	  && crtl->args.pretend_args_size == 0);
+}
+
+/* Return the TLS model to use for SYMBOL.  */
+
+static enum tls_model
+tls_symbolic_operand_type (rtx symbol)
+{
+  enum tls_model model;
+
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return TLS_MODEL_NONE;
+  model = SYMBOL_REF_TLS_MODEL (symbol);
+
+  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
+  if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
+    model = TLS_MODEL_INITIAL_EXEC;
+
+  return model;
+}
+
+/* Return true if the function DECL will share the same GP as any
+   function in the current unit of translation.  */
+
+static bool
+decl_has_samegp (const_tree decl)
+{
+  /* Functions that are not local can be overridden, and thus may
+     not share the same gp.  */
+  if (!(*targetm.binds_local_p) (decl))
+    return false;
+
+  /* If -msmall-data is in effect, assume that there is only one GP
+     for the module, and so any local symbol has this property.  We
+     need explicit relocations to be able to enforce this for symbols
+     not defined in this unit of translation, however.  */
+  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
+    return true;
+
+  /* Functions that are not external are defined in this UoT.  */
+  /* ??? Irritatingly, static functions not yet emitted are still
+     marked "external".  Apply this to non-static functions only.  */
+  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
+}
+
+/* Return true if EXP should be placed in the small data section.  */
+
+static bool
+alpha_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  Duh.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+      if (strcmp (section, ".sdata") == 0
+	  || strcmp (section, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= g_switch_value)
+	return true;
+    }
+
+  return false;
+}
+
+#if TARGET_ABI_OPEN_VMS
+static bool
+vms_valid_pointer_mode (enum machine_mode mode)
+{
+  return (mode == SImode || mode == DImode);
+}
+
+static bool
+alpha_linkage_symbol_p (const char *symname)
+{
+  int symlen = strlen (symname);
+
+  if (symlen > 4)
+    return strcmp (&symname [symlen - 4], "..lk") == 0;
+
+  return false;
+}
+
+#define LINKAGE_SYMBOL_REF_P(X) \
+  ((GET_CODE (X) == SYMBOL_REF   \
+    && alpha_linkage_symbol_p (XSTR (X, 0))) \
+   || (GET_CODE (X) == CONST                 \
+       && GET_CODE (XEXP (X, 0)) == PLUS     \
+       && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
+       && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
+#endif
+
+/* legitimate_address_p recognizes an RTL expression that is a valid
+   memory address for an instruction.  The MODE argument is the
+   machine mode for the MEM expression that wants to use this address.
+
+   For Alpha, we have either a constant address or the sum of a
+   register and a constant address, or just a register.  For DImode,
+   any of those forms can be surrounded with an AND that clear the
+   low-order three bits; this is an "unaligned" access.  */
+
+static bool
+alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  /* If this is an ldq_u type address, discard the outer AND.  */
+  if (mode == DImode
+      && GET_CODE (x) == AND
+      && CONST_INT_P (XEXP (x, 1))
+      && INTVAL (XEXP (x, 1)) == -8)
+    x = XEXP (x, 0);
+
+  /* Discard non-paradoxical subregs.  */
+  if (GET_CODE (x) == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (x))
+	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+    x = SUBREG_REG (x);
+
+  /* Unadorned general registers are valid.  */
+  if (REG_P (x)
+      && (strict
+	  ? STRICT_REG_OK_FOR_BASE_P (x)
+	  : NONSTRICT_REG_OK_FOR_BASE_P (x)))
+    return true;
+
+  /* Constant addresses (i.e. +/- 32k) are valid.  */
+  if (CONSTANT_ADDRESS_P (x))
+    return true;
+
+#if TARGET_ABI_OPEN_VMS
+  if (LINKAGE_SYMBOL_REF_P (x))
+    return true;
+#endif
+
+  /* Register plus a small constant offset is valid.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx ofs = XEXP (x, 1);
+      x = XEXP (x, 0);
+
+      /* Discard non-paradoxical subregs.  */
+      if (GET_CODE (x) == SUBREG
+          && (GET_MODE_SIZE (GET_MODE (x))
+	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+	x = SUBREG_REG (x);
+
+      if (REG_P (x))
+	{
+	  if (! strict
+	      && NONSTRICT_REG_OK_FP_BASE_P (x)
+	      && CONST_INT_P (ofs))
+	    return true;
+	  if ((strict
+	       ? STRICT_REG_OK_FOR_BASE_P (x)
+	       : NONSTRICT_REG_OK_FOR_BASE_P (x))
+	      && CONSTANT_ADDRESS_P (ofs))
+	    return true;
+	}
+    }
+
+  /* If we're managing explicit relocations, LO_SUM is valid, as are small
+     data symbols.  Avoid explicit relocations of modes larger than word
+     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
+  else if (TARGET_EXPLICIT_RELOCS
+	   && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+    {
+      if (small_symbolic_operand (x, Pmode))
+	return true;
+
+      if (GET_CODE (x) == LO_SUM)
+	{
+	  rtx ofs = XEXP (x, 1);
+	  x = XEXP (x, 0);
+
+	  /* Discard non-paradoxical subregs.  */
+	  if (GET_CODE (x) == SUBREG
+	      && (GET_MODE_SIZE (GET_MODE (x))
+		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+	    x = SUBREG_REG (x);
+
+	  /* Must have a valid base register.  */
+	  if (! (REG_P (x)
+		 && (strict
+		     ? STRICT_REG_OK_FOR_BASE_P (x)
+		     : NONSTRICT_REG_OK_FOR_BASE_P (x))))
+	    return false;
+
+	  /* The symbol must be local.  */
+	  if (local_symbolic_operand (ofs, Pmode)
+	      || dtp32_symbolic_operand (ofs, Pmode)
+	      || tp32_symbolic_operand (ofs, Pmode))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.  */
+
+static rtx
+alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
+{
+  HOST_WIDE_INT addend;
+
+  /* If the address is (plus reg const_int) and the CONST_INT is not a
+     valid offset, compute the high part of the constant and add it to
+     the register.  Then our address is (plus temp low-part-const).  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1))
+      && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
+    {
+      addend = INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+      goto split_addend;
+    }
+
+  /* If the address is (const (plus FOO const_int)), find the low-order
+     part of the CONST_INT.  Then load FOO plus any high-order part of the
+     CONST_INT into a register.  Our address is (plus reg low-part-const).
+     This is done to reduce the number of GOT entries.  */
+  if (can_create_pseudo_p ()
+      && GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      addend = INTVAL (XEXP (XEXP (x, 0), 1));
+      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
+      goto split_addend;
+    }
+
+  /* If we have a (plus reg const), emit the load as in (2), then add
+     the two registers, and finally generate (plus reg low-part-const) as
+     our address.  */
+  if (can_create_pseudo_p ()
+      && GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
+    {
+      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
+      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
+			       XEXP (XEXP (XEXP (x, 1), 0), 0),
+			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      goto split_addend;
+    }
+
+  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
+     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
+     around +/- 32k offset.  */
+  if (TARGET_EXPLICIT_RELOCS
+      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+      && symbolic_operand (x, Pmode))
+    {
+      rtx r0, r16, eqv, tga, tp, insn, dest, seq;
+
+      switch (tls_symbolic_operand_type (x))
+	{
+	case TLS_MODEL_NONE:
+	  break;
+
+	case TLS_MODEL_GLOBAL_DYNAMIC:
+	  start_sequence ();
+
+	  r0 = gen_rtx_REG (Pmode, 0);
+	  r16 = gen_rtx_REG (Pmode, 16);
+	  tga = get_tls_get_addr ();
+	  dest = gen_reg_rtx (Pmode);
+	  seq = GEN_INT (alpha_next_sequence_number++);
+
+	  emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
+	  insn = gen_call_value_osf_tlsgd (r0, tga, seq);
+	  insn = emit_call_insn (insn);
+	  RTL_CONST_CALL_P (insn) = 1;
+	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
+
+          insn = get_insns ();
+	  end_sequence ();
+
+	  emit_libcall_block (insn, dest, r0, x);
+	  return dest;
+
+	case TLS_MODEL_LOCAL_DYNAMIC:
+	  start_sequence ();
+
+	  r0 = gen_rtx_REG (Pmode, 0);
+	  r16 = gen_rtx_REG (Pmode, 16);
+	  tga = get_tls_get_addr ();
+	  scratch = gen_reg_rtx (Pmode);
+	  seq = GEN_INT (alpha_next_sequence_number++);
+
+	  emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
+	  insn = gen_call_value_osf_tlsldm (r0, tga, seq);
+	  insn = emit_call_insn (insn);
+	  RTL_CONST_CALL_P (insn) = 1;
+	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
+
+          insn = get_insns ();
+	  end_sequence ();
+
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				UNSPEC_TLSLDM_CALL);
+	  emit_libcall_block (insn, scratch, r0, eqv);
+
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+
+	  if (alpha_tls_size == 64)
+	    {
+	      dest = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
+	      emit_insn (gen_adddi3 (dest, dest, scratch));
+	      return dest;
+	    }
+	  if (alpha_tls_size == 32)
+	    {
+	      insn = gen_rtx_HIGH (Pmode, eqv);
+	      insn = gen_rtx_PLUS (Pmode, scratch, insn);
+	      scratch = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
+	    }
+	  return gen_rtx_LO_SUM (Pmode, scratch, eqv);
+
+	case TLS_MODEL_INITIAL_EXEC:
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+	  tp = gen_reg_rtx (Pmode);
+	  scratch = gen_reg_rtx (Pmode);
+	  dest = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_get_thread_pointerdi (tp));
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
+	  emit_insn (gen_adddi3 (dest, tp, scratch));
+	  return dest;
+
+	case TLS_MODEL_LOCAL_EXEC:
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+	  tp = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_get_thread_pointerdi (tp));
+	  if (alpha_tls_size == 32)
+	    {
+	      insn = gen_rtx_HIGH (Pmode, eqv);
+	      insn = gen_rtx_PLUS (Pmode, tp, insn);
+	      tp = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
+	    }
+	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (local_symbolic_operand (x, Pmode))
+	{
+	  if (small_symbolic_operand (x, Pmode))
+	    return x;
+	  else
+	    {
+	      if (can_create_pseudo_p ())
+	        scratch = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch,
+				      gen_rtx_HIGH (Pmode, x)));
+	      return gen_rtx_LO_SUM (Pmode, scratch, x);
+	    }
+	}
+    }
+
+  return NULL;
+
+ split_addend:
+  {
+    HOST_WIDE_INT low, high;
+
+    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
+    addend -= low;
+    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
+    addend -= high;
+
+    if (addend)
+      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
+			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
+			       1, OPTAB_LIB_WIDEN);
+    if (high)
+      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
+			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
+			       1, OPTAB_LIB_WIDEN);
+
+    return plus_constant (Pmode, x, low);
+  }
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  Return X or the new, valid address.  */
+
+static rtx
+alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode)
+{
+  rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
+  return new_x ? new_x : x;
+}
+
+/* Return true if ADDR has an effect that depends on the machine mode it
+   is used for.  On the Alpha this is true only for the unaligned modes.
+   We can simplify the test since we know that the address must be valid.  */
+
+static bool
+alpha_mode_dependent_address_p (const_rtx addr,
+				addr_space_t as ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (addr) == AND;
+}
+
+/* Primarily this is required for TLS symbols, but given that our move
+   patterns *ought* to be able to handle any symbol at any time, we
+   should never be spilling symbolic operands to the constant pool, ever.  */
+
+static bool
+alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
+}
+
+/* We do not allow indirect calls to be optimized into sibling calls, nor
+   can we allow a call to a function with a different GP to be optimized
+   into a sibcall.  */
+
+static bool
+alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Can't do indirect tail calls, since we don't know if the target
+     uses the same GP.  */
+  if (!decl)
+    return false;
+
+  /* Otherwise, we can make a tail call if the target function shares
+     the same GP.  */
+  return decl_has_samegp (decl);
+}
+
+int
+some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  /* Don't re-split.  */
+  if (GET_CODE (x) == LO_SUM)
+    return -1;
+
+  return small_symbolic_operand (x, Pmode) != 0;
+}
+
+static int
+split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  /* Don't re-split.  */
+  if (GET_CODE (x) == LO_SUM)
+    return -1;
+
+  if (small_symbolic_operand (x, Pmode))
+    {
+      x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
+      *px = x;
+      return -1;
+    }
+
+  return 0;
+}
+
+rtx
+split_small_symbolic_operand (rtx x)
+{
+  x = copy_insn (x);
+  for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
+  return x;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is true for any insn
+   that we've marked with gpdisp relocs, since those have to stay in
+   1-1 correspondence with one another.
+
+   Technically we could copy them if we could set up a mapping from one
+   sequence number to another, across the set of insns to be duplicated.
+   This seems overly complicated and error-prone since interblock motion
+   from sched-ebb could move one of the pair of insns to a different block.
+
+   Also cannot allow jsr insns to be duplicated.  If they throw exceptions,
+   then they'll be in a different block from their ldgp.  Which could lead
+   the bb reorder code to think that it would be ok to copy just the block
+   containing the call and branch to the block containing the ldgp.  */
+
+static bool
+alpha_cannot_copy_insn_p (rtx insn)
+{
+  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
+    return false;
+  if (recog_memoized (insn) >= 0)
+    return get_attr_cannot_copy (insn);
+  else
+    return false;
+}
+
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and return the new rtx.  */
+
+rtx
+alpha_legitimize_reload_address (rtx x,
+				 enum machine_mode mode ATTRIBUTE_UNUSED,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  /* We wish to handle large displacements off a base register by
+     splitting the addend across an ldah and the mem insn.  This
+     cuts number of extra insns needed from 3 to 1.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
+      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT high
+	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+      /* Check for 32-bit overflow.  */
+      if (high + low != val)
+	return NULL_RTX;
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem directly.  */
+      x = gen_rtx_PLUS (GET_MODE (x),
+			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
+				      GEN_INT (high)),
+			GEN_INT (low));
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  return NULL_RTX;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
+		 bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+  const struct alpha_rtx_cost_data *cost_data;
+
+  if (!speed)
+    cost_data = &alpha_rtx_cost_size;
+  else
+    cost_data = &alpha_rtx_cost_data[alpha_tune];
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* If this is an 8-bit constant, return zero since it can be used
+	 nearly anywhere with no cost.  If it is a valid operand for an
+	 ADD or AND, likewise return 0 if we know it will be used in that
+	 context.  Otherwise, return 2 since it might be used there later.
+	 All other constants take at least two insns.  */
+      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST_DOUBLE:
+      if (x == CONST0_RTX (mode))
+	*total = 0;
+      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
+	       || (outer_code == AND && and_operand (x, VOIDmode)))
+	*total = 0;
+      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
+	*total = 2;
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
+	*total = COSTS_N_INSNS (outer_code != MEM);
+      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
+	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
+      else if (tls_symbolic_operand_type (x))
+	/* Estimate of cost for call_pal rduniq.  */
+	/* ??? How many insns do we emit here?  More than one...  */
+	*total = COSTS_N_INSNS (15);
+      else
+	/* Otherwise we do a load from the GOT.  */
+	*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
+      return true;
+
+    case HIGH:
+      /* This is effectively an add_operand.  */
+      *total = 2;
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (float_mode_p)
+	*total = cost_data->fp_add;
+      else if (GET_CODE (XEXP (x, 0)) == MULT
+	       && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
+			      (enum rtx_code) outer_code, opno, speed)
+		    + rtx_cost (XEXP (x, 1),
+				(enum rtx_code) outer_code, opno, speed)
+		    + COSTS_N_INSNS (1));
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = cost_data->fp_mult;
+      else if (mode == DImode)
+	*total = cost_data->int_mult_di;
+      else
+	*total = cost_data->int_mult_si;
+      return false;
+
+    case ASHIFT:
+      if (CONST_INT_P (XEXP (x, 1))
+	  && INTVAL (XEXP (x, 1)) <= 3)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = cost_data->int_shift;
+      return false;
+
+    case IF_THEN_ELSE:
+      if (float_mode_p)
+        *total = cost_data->fp_add;
+      else
+        *total = cost_data->int_cmov;
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (!float_mode_p)
+	*total = cost_data->int_div;
+      else if (mode == SFmode)
+        *total = cost_data->fp_div_sf;
+      else
+        *total = cost_data->fp_div_df;
+      return false;
+
+    case MEM:
+      *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
+      return true;
+
+    case NEG:
+      if (! float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ABS:
+      if (! float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+    case FLOAT_TRUNCATE:
+      *total = cost_data->fp_add;
+      return false;
+
+    case FLOAT_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	*total = 0;
+      else
+	*total = cost_data->fp_add;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* REF is an alignable memory location.  Place an aligned SImode
+   reference into *PALIGNED_MEM and the number of bits to shift into
+   *PBITNUM.  SCRATCH is a free register for use in reloading out
+   of range stack slots.  */
+
+void
+get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
+{
+  rtx base;
+  HOST_WIDE_INT disp, offset;
+
+  gcc_assert (MEM_P (ref));
+
+  if (reload_in_progress
+      && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
+    {
+      base = find_replacement (&XEXP (ref, 0));
+      gcc_assert (memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    base = XEXP (ref, 0);
+
+  if (GET_CODE (base) == PLUS)
+    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
+  else
+    disp = 0;
+
+  /* Find the byte offset within an aligned word.  If the memory itself is
+     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
+     will have examined the base register and determined it is aligned, and
+     thus displacements from it are naturally alignable.  */
+  if (MEM_ALIGN (ref) >= 32)
+    offset = 0;
+  else
+    offset = disp & 3;
+
+  /* The location should not cross aligned word boundary.  */
+  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
+	      <= GET_MODE_SIZE (SImode));
+
+  /* Access the entire aligned word.  */
+  *paligned_mem = widen_memory_access (ref, SImode, -offset);
+
+  /* Convert the byte offset within the word to a bit offset.  */
+  offset *= BITS_PER_UNIT;
+  *pbitnum = GEN_INT (offset);
+}
+
+/* Similar, but just get the address.  Handle the two reload cases.
+   Add EXTRA_OFFSET to the address we return.  */
+
+rtx
+get_unaligned_address (rtx ref)
+{
+  rtx base;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (MEM_P (ref));
+
+  if (reload_in_progress
+      && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
+    {
+      base = find_replacement (&XEXP (ref, 0));
+
+      gcc_assert (memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    base = XEXP (ref, 0);
+
+  if (GET_CODE (base) == PLUS)
+    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
+
+  return plus_constant (Pmode, base, offset);
+}
+
+/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
+   X is always returned in a register.  */
+
+rtx
+get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
+{
+  if (GET_CODE (addr) == PLUS)
+    {
+      ofs += INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
+			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
+}
+
+/* On the Alpha, all (non-symbolic) constants except zero go into
+   a floating-point register via memory.  Note that we cannot
+   return anything that is not a subset of RCLASS, and that some
+   symbolic constants cannot be dropped to memory.  */
+
+enum reg_class
+alpha_preferred_reload_class(rtx x, enum reg_class rclass)
+{
+  /* Zero is present in any register class.  */
+  if (x == CONST0_RTX (GET_MODE (x)))
+    return rclass;
+
+  /* These sorts of constants we can easily drop to memory.  */
+  if (CONST_INT_P (x)
+      || GET_CODE (x) == CONST_DOUBLE
+      || GET_CODE (x) == CONST_VECTOR)
+    {
+      if (rclass == FLOAT_REGS)
+	return NO_REGS;
+      if (rclass == ALL_REGS)
+	return GENERAL_REGS;
+      return rclass;
+    }
+
+  /* All other kinds of constants should not (and in the case of HIGH
+     cannot) be dropped to memory -- instead we use a GENERAL_REGS
+     secondary reload.  */
+  if (CONSTANT_P (x))
+    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
+
+  return rclass;
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.  */
+
+static reg_class_t
+alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+			enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Loading and storing HImode or QImode values to and from memory
+     usually requires a scratch register.  */
+  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
+    {
+      if (any_memory_operand (x, mode))
+	{
+	  if (in_p)
+	    {
+	      if (!aligned_memory_operand (x, mode))
+		sri->icode = direct_optab_handler (reload_in_optab, mode);
+	    }
+	  else
+	    sri->icode = direct_optab_handler (reload_out_optab, mode);
+	  return NO_REGS;
+	}
+    }
+
+  /* We also cannot do integral arithmetic into FP regs, as might result
+     from register elimination into a DImode fp register.  */
+  if (rclass == FLOAT_REGS)
+    {
+      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
+	return GENERAL_REGS;
+      if (in_p && INTEGRAL_MODE_P (mode)
+	  && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
+	return GENERAL_REGS;
+    }
+
+  return NO_REGS;
+}
+
+/* Subfunction of the following function.  Update the flags of any MEM
+   found in part of X.  */
+
+static int
+alpha_set_memflags_1 (rtx *xp, void *data)
+{
+  rtx x = *xp, orig = (rtx) data;
+
+  if (!MEM_P (x))
+    return 0;
+
+  MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
+  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
+  MEM_READONLY_P (x) = MEM_READONLY_P (orig);
+
+  /* Sadly, we cannot use alias sets because the extra aliasing
+     produced by the AND interferes.  Given that two-byte quantities
+     are the only thing we would be able to differentiate anyway,
+     there does not seem to be any point in convoluting the early
+     out of the alias check.  */
+
+  return -1;
+}
+
+/* Given SEQ, which is an INSN list, look for any MEMs in either
+   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
+   volatile flags from REF into each of the MEMs found.  If REF is not
+   a MEM, don't do anything.  */
+
+void
+alpha_set_memflags (rtx seq, rtx ref)
+{
+  rtx insn;
+
+  if (!MEM_P (ref))
+    return;
+
+  /* This is only called from alpha.md, after having had something
+     generated from one of the insn patterns.  So if everything is
+     zero, the pattern is already up-to-date.  */
+  if (!MEM_VOLATILE_P (ref)
+      && !MEM_NOTRAP_P (ref)
+      && !MEM_READONLY_P (ref))
+    return;
+
+  for (insn = seq; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
+    else
+      gcc_unreachable ();
+}
+
+static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
+				 int, bool);
+
+/* Internal routine for alpha_emit_set_const to check for N or below insns.
+   If NO_OUTPUT is true, then we only check to see if N insns are possible,
+   and return pc_rtx if successful.  */
+
+static rtx
+alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
+			HOST_WIDE_INT c, int n, bool no_output)
+{
+  HOST_WIDE_INT new_const;
+  int i, bits;
+  /* Use a pseudo if highly optimizing and still generating RTL.  */
+  rtx subtarget
+    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
+  rtx temp, insn;
+
+  /* If this is a sign-extended 32-bit constant, we can do this in at most
+     three insns, so do it if we have enough insns left.  We always have
+     a sign-extended 32-bit constant when compiling on a narrow machine.  */
+
+  if (HOST_BITS_PER_WIDE_INT != 64
+      || c >> 31 == -1 || c >> 31 == 0)
+    {
+      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT tmp1 = c - low;
+      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT extra = 0;
+
+      /* If HIGH will be interpreted as negative but the constant is
+	 positive, we must adjust it to do two ldha insns.  */
+
+      if ((high & 0x8000) != 0 && c >= 0)
+	{
+	  extra = 0x4000;
+	  tmp1 -= 0x40000000;
+	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
+	}
+
+      if (c == low || (low == 0 && extra == 0))
+	{
+	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
+	     but that meant that we can't handle INT_MIN on 32-bit machines
+	     (like NT/Alpha), because we recurse indefinitely through
+	     emit_move_insn to gen_movdi.  So instead, since we know exactly
+	     what we want, create it explicitly.  */
+
+	  if (no_output)
+	    return pc_rtx;
+	  if (target == NULL)
+	    target = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
+	  return target;
+	}
+      else if (n >= 2 + (extra != 0))
+	{
+	  if (no_output)
+	    return pc_rtx;
+	  if (!can_create_pseudo_p ())
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
+	      temp = target;
+	    }
+	  else
+	    temp = copy_to_suggested_reg (GEN_INT (high << 16),
+					  subtarget, mode);
+
+	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
+	     This means that if we go through expand_binop, we'll try to
+	     generate extensions, etc, which will require new pseudos, which
+	     will fail during some split phases.  The SImode add patterns
+	     still exist, but are not named.  So build the insns by hand.  */
+
+	  if (extra != 0)
+	    {
+	      if (! subtarget)
+		subtarget = gen_reg_rtx (mode);
+	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
+	      insn = gen_rtx_SET (VOIDmode, subtarget, insn);
+	      emit_insn (insn);
+	      temp = subtarget;
+	    }
+
+	  if (target == NULL)
+	    target = gen_reg_rtx (mode);
+	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
+	  insn = gen_rtx_SET (VOIDmode, target, insn);
+	  emit_insn (insn);
+	  return target;
+	}
+    }
+
+  /* If we couldn't do it that way, try some other methods.  But if we have
+     no instructions left, don't bother.  Likewise, if this is SImode and
+     we can't make pseudos, we can't do anything since the expand_binop
+     and expand_unop calls will widen and try to make pseudos.  */
+
+  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
+    return 0;
+
+  /* Next, see if we can load a related constant and then shift and possibly
+     negate it to get the constant we want.  Try this once each increasing
+     numbers of insns.  */
+
+  for (i = 1; i < n; i++)
+    {
+      /* First, see if minus some low bits, we've an easy load of
+	 high bits.  */
+
+      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
+      if (new_const != 0)
+	{
+          temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
+	  if (temp)
+	    {
+	      if (no_output)
+		return temp;
+	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
+				   target, 0, OPTAB_WIDEN);
+	    }
+	}
+
+      /* Next try complementing.  */
+      temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
+      if (temp)
+	{
+	  if (no_output)
+	    return temp;
+	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
+	}
+
+      /* Next try to form a constant and do a left shift.  We can do this
+	 if some low-order bits are zero; the exact_log2 call below tells
+	 us that information.  The bits we are shifting out could be any
+	 value, but here we'll just try the 0- and sign-extended forms of
+	 the constant.  To try to increase the chance of having the same
+	 constant in more than one insn, start at the highest number of
+	 bits to shift, but try all possibilities in case a ZAPNOT will
+	 be useful.  */
+
+      bits = exact_log2 (c & -c);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c >> bits;
+	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp && c < 0)
+	      {
+		new_const = (unsigned HOST_WIDE_INT)c >> bits;
+		temp = alpha_emit_set_const (subtarget, mode, new_const,
+					     i, no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+	        return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
+				     target, 0, OPTAB_WIDEN);
+	      }
+	  }
+
+      /* Now try high-order zero bits.  Here we try the shifted-in bits as
+	 all zero and all ones.  Be careful to avoid shifting outside the
+	 mode and to avoid shifting outside the host wide int size.  */
+      /* On narrow hosts, don't shift a 1 into the high bit, since we'll
+	 confuse the recursive call and set all of the high 32 bits.  */
+
+      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
+	      - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c << bits;
+	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp)
+	      {
+		new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
+	        temp = alpha_emit_set_const (subtarget, mode, new_const,
+					     i, no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
+				     target, 1, OPTAB_WIDEN);
+	      }
+	  }
+
+      /* Now try high-order 1 bits.  We get that with a sign-extension.
+	 But one bit isn't enough here.  Be careful to avoid shifting outside
+	 the mode and to avoid shifting outside the host wide int size.  */
+
+      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
+	      - floor_log2 (~ c) - 2);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c << bits;
+	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp)
+	      {
+		new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
+	        temp = alpha_emit_set_const (subtarget, mode, new_const,
+					     i, no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
+				     target, 0, OPTAB_WIDEN);
+	      }
+	  }
+    }
+
+#if HOST_BITS_PER_WIDE_INT == 64
+  /* Finally, see if can load a value into the target that is the same as the
+     constant except that all bytes that are 0 are changed to be 0xff.  If we
+     can, then we can do a ZAPNOT to obtain the desired constant.  */
+
+  new_const = c;
+  for (i = 0; i < 64; i += 8)
+    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
+      new_const |= (HOST_WIDE_INT) 0xff << i;
+
+  /* We are only called for SImode and DImode.  If this is SImode, ensure that
+     we are sign extended to a full word.  */
+
+  if (mode == SImode)
+    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+  if (new_const != c)
+    {
+      temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
+      if (temp)
+	{
+	  if (no_output)
+	    return temp;
+	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
+			       target, 0, OPTAB_WIDEN);
+	}
+    }
+#endif
+
+  return 0;
+}
+
+/* Try to output insns to set TARGET equal to the constant C if it can be
+   done in less than N insns.  Do all computations in MODE.  Returns the place
+   where the output has been placed if it can be done and the insns have been
+   emitted.  If it would take more than N insns, zero is returned and no
+   insns and emitted.  */
+
+static rtx
+alpha_emit_set_const (rtx target, enum machine_mode mode,
+		      HOST_WIDE_INT c, int n, bool no_output)
+{
+  enum machine_mode orig_mode = mode;
+  rtx orig_target = target;
+  rtx result = 0;
+  int i;
+
+  /* If we can't make any pseudos, TARGET is an SImode hard register, we
+     can't load this constant in one insn, do this in DImode.  */
+  if (!can_create_pseudo_p () && mode == SImode
+      && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
+    {
+      result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
+      if (result)
+	return result;
+
+      target = no_output ? NULL : gen_lowpart (DImode, target);
+      mode = DImode;
+    }
+  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
+    {
+      target = no_output ? NULL : gen_lowpart (DImode, target);
+      mode = DImode;
+    }
+
+  /* Try 1 insn, then 2, then up to N.  */
+  for (i = 1; i <= n; i++)
+    {
+      result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
+      if (result)
+	{
+	  rtx insn, set;
+
+	  if (no_output)
+	    return result;
+
+	  insn = get_last_insn ();
+	  set = single_set (insn);
+	  if (! CONSTANT_P (SET_SRC (set)))
+	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
+	  break;
+	}
+    }
+
+  /* Allow for the case where we changed the mode of TARGET.  */
+  if (result)
+    {
+      if (result == target)
+	result = orig_target;
+      else if (mode != orig_mode)
+	result = gen_lowpart (orig_mode, result);
+    }
+
+  return result;
+}
+
+/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
+   fall back to a straight forward decomposition.  We do this to avoid
+   exponential run times encountered when looking for longer sequences
+   with alpha_emit_set_const.  */
+
+static rtx
+alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
+{
+  HOST_WIDE_INT d1, d2, d3, d4;
+
+  /* Decompose the entire word */
+#if HOST_BITS_PER_WIDE_INT >= 64
+  gcc_assert (c2 == -(c1 < 0));
+  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d1;
+  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  c1 = (c1 - d2) >> 32;
+  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d3;
+  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c1 == d4);
+#else
+  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d1;
+  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c1 == d2);
+  c2 += (d2 < 0);
+  d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
+  c2 -= d3;
+  d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c2 == d4);
+#endif
+
+  /* Construct the high word */
+  if (d4)
+    {
+      emit_move_insn (target, GEN_INT (d4));
+      if (d3)
+	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
+    }
+  else
+    emit_move_insn (target, GEN_INT (d3));
+
+  /* Shift it into place */
+  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
+
+  /* Add in the low bits.  */
+  if (d2)
+    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
+  if (d1)
+    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
+
+  return target;
+}
+
+/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return 
+   the low 64 bits.  */
+
+static void
+alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
+{
+  HOST_WIDE_INT i0, i1;
+
+  if (GET_CODE (x) == CONST_VECTOR)
+    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+
+
+  if (CONST_INT_P (x))
+    {
+      i0 = INTVAL (x);
+      i1 = -(i0 < 0);
+    }
+  else if (HOST_BITS_PER_WIDE_INT >= 64)
+    {
+      i0 = CONST_DOUBLE_LOW (x);
+      i1 = -(i0 < 0);
+    }
+  else
+    {
+      i0 = CONST_DOUBLE_LOW (x);
+      i1 = CONST_DOUBLE_HIGH (x);
+    }
+
+  *p0 = i0;
+  *p1 = i1;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for which
+   we are willing to load the value into a register via a move pattern.
+   Normally this is all symbolic constants, integral constants that
+   take three or fewer instructions, and floating-point zero.  */
+
+bool
+alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  HOST_WIDE_INT i0, i1;
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+    case HIGH:
+      return true;
+
+    case CONST:
+      if (GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	return true;
+
+      if (GET_CODE (x) != SYMBOL_REF)
+	return true;
+
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* TLS symbols are never valid.  */
+      return SYMBOL_REF_TLS_MODEL (x) == 0;
+
+    case CONST_DOUBLE:
+      if (x == CONST0_RTX (mode))
+	return true;
+      if (FLOAT_MODE_P (mode))
+	return false;
+      goto do_integer;
+
+    case CONST_VECTOR:
+      if (x == CONST0_RTX (mode))
+	return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+	return false;
+      if (GET_MODE_SIZE (mode) != 8)
+	return false;
+      goto do_integer;
+
+    case CONST_INT:
+    do_integer:
+      if (TARGET_BUILD_CONSTANTS)
+	return true;
+      alpha_extract_integer (x, &i0, &i1);
+      if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
+        return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Operand 1 is known to be a constant, and should require more than one
+   instruction to load.  Emit that multi-part load.  */
+
+bool
+alpha_split_const_mov (enum machine_mode mode, rtx *operands)
+{
+  HOST_WIDE_INT i0, i1;
+  rtx temp = NULL_RTX;
+
+  alpha_extract_integer (operands[1], &i0, &i1);
+
+  if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
+    temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
+
+  if (!temp && TARGET_BUILD_CONSTANTS)
+    temp = alpha_emit_set_long_const (operands[0], i0, i1);
+
+  if (temp)
+    {
+      if (!rtx_equal_p (operands[0], temp))
+	emit_move_insn (operands[0], temp);
+      return true;
+    }
+
+  return false;
+}
+
+/* Expand a move instruction; return true if all work is done.
+   We don't handle non-bwx subword loads here.  */
+
+bool
+alpha_expand_mov (enum machine_mode mode, rtx *operands)
+{
+  rtx tmp;
+
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Allow legitimize_address to perform some simplifications.  */
+  if (mode == Pmode && symbolic_operand (operands[1], mode))
+    {
+      tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
+      if (tmp)
+	{
+	  if (tmp == operands[0])
+	    return true;
+	  operands[1] = tmp;
+	  return false;
+	}
+    }
+
+  /* Early out for non-constants and valid constants.  */
+  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
+    return false;
+
+  /* Split large integers.  */
+  if (CONST_INT_P (operands[1])
+      || GET_CODE (operands[1]) == CONST_DOUBLE
+      || GET_CODE (operands[1]) == CONST_VECTOR)
+    {
+      if (alpha_split_const_mov (mode, operands))
+	return true;
+    }
+
+  /* Otherwise we've nothing left but to drop the thing to memory.  */
+  tmp = force_const_mem (mode, operands[1]);
+
+  if (tmp == NULL_RTX)
+    return false;
+
+  if (reload_in_progress)
+    {
+      emit_move_insn (operands[0], XEXP (tmp, 0));
+      operands[1] = replace_equiv_address (tmp, operands[0]);
+    }
+  else
+    operands[1] = validize_mem (tmp);
+  return false;
+}
+
+/* Expand a non-bwx QImode or HImode move instruction;
+   return true if all work is done.  */
+
+bool
+alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
+{
+  rtx seq;
+
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Handle four memory cases, unaligned and aligned for either the input
+     or the output.  The only case where we can be called during reload is
+     for aligned loads; all other cases require temporaries.  */
+
+  if (any_memory_operand (operands[1], mode))
+    {
+      if (aligned_memory_operand (operands[1], mode))
+	{
+	  if (reload_in_progress)
+	    {
+	      if (mode == QImode)
+		seq = gen_reload_inqi_aligned (operands[0], operands[1]);
+	      else
+		seq = gen_reload_inhi_aligned (operands[0], operands[1]);
+	      emit_insn (seq);
+	    }
+	  else
+	    {
+	      rtx aligned_mem, bitnum;
+	      rtx scratch = gen_reg_rtx (SImode);
+	      rtx subtarget;
+	      bool copyout;
+
+	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+
+	      subtarget = operands[0];
+	      if (REG_P (subtarget))
+		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
+	      else
+		subtarget = gen_reg_rtx (DImode), copyout = true;
+
+	      if (mode == QImode)
+		seq = gen_aligned_loadqi (subtarget, aligned_mem,
+					  bitnum, scratch);
+	      else
+		seq = gen_aligned_loadhi (subtarget, aligned_mem,
+					  bitnum, scratch);
+	      emit_insn (seq);
+
+	      if (copyout)
+		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
+	    }
+	}
+      else
+	{
+	  /* Don't pass these as parameters since that makes the generated
+	     code depend on parameter evaluation order which will cause
+	     bootstrap failures.  */
+
+	  rtx temp1, temp2, subtarget, ua;
+	  bool copyout;
+
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+
+	  subtarget = operands[0];
+	  if (REG_P (subtarget))
+	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
+	  else
+	    subtarget = gen_reg_rtx (DImode), copyout = true;
+
+	  ua = get_unaligned_address (operands[1]);
+	  if (mode == QImode)
+	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
+	  else
+	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
+
+	  alpha_set_memflags (seq, operands[1]);
+	  emit_insn (seq);
+
+	  if (copyout)
+	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
+	}
+      return true;
+    }
+
+  if (any_memory_operand (operands[0], mode))
+    {
+      if (aligned_memory_operand (operands[0], mode))
+	{
+	  rtx aligned_mem, bitnum;
+	  rtx temp1 = gen_reg_rtx (SImode);
+	  rtx temp2 = gen_reg_rtx (SImode);
+
+	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+
+	  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+					temp1, temp2));
+	}
+      else
+	{
+	  rtx temp1 = gen_reg_rtx (DImode);
+	  rtx temp2 = gen_reg_rtx (DImode);
+	  rtx temp3 = gen_reg_rtx (DImode);
+	  rtx ua = get_unaligned_address (operands[0]);
+
+	  if (mode == QImode)
+	    seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
+	  else
+	    seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
+
+	  alpha_set_memflags (seq, operands[0]);
+	  emit_insn (seq);
+	}
+      return true;
+    }
+
+  return false;
+}
+
+/* Implement the movmisalign patterns.  One of the operands is a memory
+   that is not naturally aligned.  Emit instructions to load it.  */
+
+void
+alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
+{
+  /* Honor misaligned loads, for those we promised to do so.  */
+  if (MEM_P (operands[1]))
+    {
+      rtx tmp;
+
+      if (register_operand (operands[0], mode))
+	tmp = operands[0];
+      else
+	tmp = gen_reg_rtx (mode);
+
+      alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
+      if (tmp != operands[0])
+	emit_move_insn (operands[0], tmp);
+    }
+  else if (MEM_P (operands[0]))
+    {
+      if (!reg_or_0_operand (operands[1], mode))
+	operands[1] = force_reg (mode, operands[1]);
+      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Generate an unsigned DImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.
+
+   For SFmode, this is the only construction I've found that can pass
+   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
+   intermediates will work, because you'll get intermediate rounding
+   that ruins the end result.  Some of this could be fixed by turning
+   on round-to-positive-infinity, but that requires diddling the fpsr,
+   which kills performance.  I tried turning this around and converting
+   to a negative number, so that I could turn on /m, but either I did
+   it wrong or there's something else cause I wound up with the exact
+   same single-bit error.  There is a branch-less form of this same code:
+
+	srl     $16,1,$1
+	and     $16,1,$2
+	cmplt   $16,0,$3
+	or      $1,$2,$2
+	cmovge  $16,$16,$2
+	itoft	$3,$f10
+	itoft	$2,$f11
+	cvtqs   $f11,$f11
+	adds    $f11,$f11,$f0
+	fcmoveq $f10,$f11,$f0
+
+   I'm not using it because it's the same number of instructions as
+   this branch-full form, and it has more serialized long latency
+   instructions on the critical path.
+
+   For DFmode, we can avoid rounding errors by breaking up the word
+   into two pieces, converting them separately, and adding them back:
+
+   LC0: .long 0,0x5f800000
+
+	itoft	$16,$f11
+	lda	$2,LC0
+	cmplt	$16,0,$1
+	cpyse	$f11,$f31,$f10
+	cpyse	$f31,$f11,$f11
+	s4addq	$1,$2,$1
+	lds	$f12,0($1)
+	cvtqt	$f10,$f10
+	cvtqt	$f11,$f11
+	addt	$f12,$f10,$f0
+	addt	$f0,$f11,$f0
+
+   This doesn't seem to be a clear-cut win over the optabs form.
+   It probably all depends on the distribution of numbers being
+   converted -- in the optabs form, all but high-bit-set has a
+   much lower minimum execution time.  */
+
+void
+alpha_emit_floatuns (rtx operands[2])
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+  enum machine_mode mode;
+
+  out = operands[0];
+  in = force_reg (DImode, operands[1]);
+  mode = GET_MODE (out);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
+  emit_insn (gen_anddi3 (i1, in, const1_rtx));
+  emit_insn (gen_iordi3 (i0, i0, i1));
+  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* Generate the comparison for a conditional branch.  */
+
+void
+alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
+{
+  enum rtx_code cmp_code, branch_code;
+  enum machine_mode branch_mode = VOIDmode;
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1], op1 = operands[2];
+  rtx tem;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  /* The general case: fold the comparison code to the types of compares
+     that we have, choosing the branch as necessary.  */
+  switch (code)
+    {
+    case EQ:  case LE:  case LT:  case LEU:  case LTU:
+    case UNORDERED:
+      /* We have these compares.  */
+      cmp_code = code, branch_code = NE;
+      break;
+
+    case NE:
+    case ORDERED:
+      /* These must be reversed.  */
+      cmp_code = reverse_condition (code), branch_code = EQ;
+      break;
+
+    case GE:  case GT: case GEU:  case GTU:
+      /* For FP, we swap them, for INT, we reverse them.  */
+      if (cmp_mode == DFmode)
+	{
+	  cmp_code = swap_condition (code);
+	  branch_code = NE;
+	  tem = op0, op0 = op1, op1 = tem;
+	}
+      else
+	{
+	  cmp_code = reverse_condition (code);
+	  branch_code = EQ;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DFmode)
+    {
+      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
+	{
+	  /* When we are not as concerned about non-finite values, and we
+	     are comparing against zero, we can branch directly.  */
+	  if (op1 == CONST0_RTX (DFmode))
+	    cmp_code = UNKNOWN, branch_code = code;
+	  else if (op0 == CONST0_RTX (DFmode))
+	    {
+	      /* Undo the swap we probably did just above.  */
+	      tem = op0, op0 = op1, op1 = tem;
+	      branch_code = swap_condition (cmp_code);
+	      cmp_code = UNKNOWN;
+	    }
+	}
+      else
+	{
+	  /* ??? We mark the branch mode to be CCmode to prevent the
+	     compare and branch from being combined, since the compare
+	     insn follows IEEE rules that the branch does not.  */
+	  branch_mode = CCmode;
+	}
+    }
+  else
+    {
+      /* The following optimizations are only for signed compares.  */
+      if (code != LEU && code != LTU && code != GEU && code != GTU)
+	{
+	  /* Whee.  Compare and branch against 0 directly.  */
+	  if (op1 == const0_rtx)
+	    cmp_code = UNKNOWN, branch_code = code;
+
+	  /* If the constants doesn't fit into an immediate, but can
+ 	     be generated by lda/ldah, we adjust the argument and
+ 	     compare against zero, so we can use beq/bne directly.  */
+	  /* ??? Don't do this when comparing against symbols, otherwise
+	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
+	     be declared false out of hand (at least for non-weak).  */
+	  else if (CONST_INT_P (op1)
+		   && (code == EQ || code == NE)
+		   && !(symbolic_operand (op0, VOIDmode)
+			|| (REG_P (op0) && REG_POINTER (op0))))
+	    {
+	      rtx n_op1 = GEN_INT (-INTVAL (op1));
+
+	      if (! satisfies_constraint_I (op1)
+		  && (satisfies_constraint_K (n_op1)
+		      || satisfies_constraint_L (n_op1)))
+		cmp_code = PLUS, branch_code = code, op1 = n_op1;
+	    }
+	}
+
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* Emit an initial compare instruction, if necessary.  */
+  tem = op0;
+  if (cmp_code != UNKNOWN)
+    {
+      tem = gen_reg_rtx (cmp_mode);
+      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
+    }
+
+  /* Emit the branch instruction.  */
+  tem = gen_rtx_SET (VOIDmode, pc_rtx,
+		     gen_rtx_IF_THEN_ELSE (VOIDmode,
+					   gen_rtx_fmt_ee (branch_code,
+							   branch_mode, tem,
+							   CONST0_RTX (cmp_mode)),
+					   gen_rtx_LABEL_REF (VOIDmode,
+							      operands[3]),
+					   pc_rtx));
+  emit_jump_insn (tem);
+}
+
+/* Certain simplifications can be done to make invalid setcc operations
+   valid.  Return the final comparison, or NULL if we can't work.  */
+
+bool
+alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
+{
+  enum rtx_code cmp_code;
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2], op1 = operands[3];
+  rtx tmp;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  if (cmp_mode == DFmode && !TARGET_FIX)
+    return 0;
+
+  /* The general case: fold the comparison code to the types of compares
+     that we have, choosing the branch as necessary.  */
+
+  cmp_code = UNKNOWN;
+  switch (code)
+    {
+    case EQ:  case LE:  case LT:  case LEU:  case LTU:
+    case UNORDERED:
+      /* We have these compares.  */
+      if (cmp_mode == DFmode)
+	cmp_code = code, code = NE;
+      break;
+
+    case NE:
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      /* FALLTHRU */
+
+    case ORDERED:
+      cmp_code = reverse_condition (code);
+      code = EQ;
+      break;
+
+    case GE:  case GT: case GEU:  case GTU:
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      if (cmp_mode == DFmode)
+	cmp_code = code, code = NE;
+      tmp = op0, op0 = op1, op1 = tmp;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!register_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* Emit an initial compare instruction, if necessary.  */
+  if (cmp_code != UNKNOWN)
+    {
+      tmp = gen_reg_rtx (cmp_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			      gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
+
+      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
+      op1 = const0_rtx;
+    }
+
+  /* Emit the setcc instruction.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (code, DImode, op0, op1)));
+  return true;
+}
+
+
+/* Rewrite a comparison against zero CMP of the form
+   (CODE (cc0) (const_int 0)) so it can be written validly in
+   a conditional move (if_then_else CMP ...).
+   If both of the operands that set cc0 are nonzero we must emit
+   an insn to perform the compare (it can't be done within
+   the conditional move).  */
+
+rtx
+alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (cmp);
+  enum rtx_code cmov_code = NE;
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+  enum machine_mode cmp_mode
+    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
+  enum machine_mode cmov_mode = VOIDmode;
+  int local_fast_math = flag_unsafe_math_optimizations;
+  rtx tem;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
+
+  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
+    {
+      enum rtx_code cmp_code;
+
+      if (! TARGET_FIX)
+	return 0;
+
+      /* If we have fp<->int register move instructions, do a cmov by
+	 performing the comparison in fp registers, and move the
+	 zero/nonzero value to integer registers, where we can then
+	 use a normal cmov, or vice-versa.  */
+
+      switch (code)
+	{
+	case EQ: case LE: case LT: case LEU: case LTU:
+	case UNORDERED:
+	  /* We have these compares.  */
+	  cmp_code = code, code = NE;
+	  break;
+
+	case NE:
+	case ORDERED:
+	  /* These must be reversed.  */
+	  cmp_code = reverse_condition (code), code = EQ;
+	  break;
+
+	case GE: case GT: case GEU: case GTU:
+	  /* These normally need swapping, but for integer zero we have
+	     special patterns that recognize swapped operands.  */
+	  if (cmp_mode == DImode && op1 == const0_rtx)
+	    cmp_code = code, code = NE;
+	  else
+	    {
+	      cmp_code = swap_condition (code);
+	      code = NE;
+	      tem = op0, op0 = op1, op1 = tem;
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (cmp_mode == DImode)
+	{
+	  if (!reg_or_0_operand (op0, DImode))
+	    op0 = force_reg (DImode, op0);
+	  if (!reg_or_8bit_operand (op1, DImode))
+	    op1 = force_reg (DImode, op1);
+	}
+
+      tem = gen_reg_rtx (cmp_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_fmt_ee (cmp_code, cmp_mode,
+					      op0, op1)));
+
+      cmp_mode = cmp_mode == DImode ? DFmode : DImode;
+      op0 = gen_lowpart (cmp_mode, tem);
+      op1 = CONST0_RTX (cmp_mode);
+      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+      local_fast_math = 1;
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* We may be able to use a conditional move directly.
+     This avoids emitting spurious compares.  */
+  if (signed_comparison_operator (cmp, VOIDmode)
+      && (cmp_mode == DImode || local_fast_math)
+      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
+    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+
+  /* We can't put the comparison inside the conditional move;
+     emit a compare instruction and put that inside the
+     conditional move.  Make sure we emit only comparisons we have;
+     swap or reverse as necessary.  */
+
+  if (!can_create_pseudo_p ())
+    return NULL_RTX;
+
+  switch (code)
+    {
+    case EQ:  case LE:  case LT:  case LEU:  case LTU:
+    case UNORDERED:
+      /* We have these compares: */
+      break;
+
+    case NE:
+    case ORDERED:
+      /* These must be reversed.  */
+      code = reverse_condition (code);
+      cmov_code = EQ;
+      break;
+
+    case GE:  case GT:  case GEU:  case GTU:
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      tem = op0, op0 = op1, op1 = tem;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* ??? We mark the branch mode to be CCmode to prevent the compare
+     and cmov from being combined, since the compare insn follows IEEE
+     rules that the cmov does not.  */
+  if (cmp_mode == DFmode && !local_fast_math)
+    cmov_mode = CCmode;
+
+  tem = gen_reg_rtx (cmp_mode);
+  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
+  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
+}
+
+/* Simplify a conditional move of two constants into a setcc with
+   arithmetic.  This is done with a splitter since combine would
+   just undo the work if done during code generation.  It also catches
+   cases we wouldn't have before cse.  */
+
+int
+alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
+			      rtx t_rtx, rtx f_rtx)
+{
+  HOST_WIDE_INT t, f, diff;
+  enum machine_mode mode;
+  rtx target, subtarget, tmp;
+
+  mode = GET_MODE (dest);
+  t = INTVAL (t_rtx);
+  f = INTVAL (f_rtx);
+  diff = t - f;
+
+  if (((code == NE || code == EQ) && diff < 0)
+      || (code == GE || code == GT))
+    {
+      code = reverse_condition (code);
+      diff = t, t = f, f = diff;
+      diff = t - f;
+    }
+
+  subtarget = target = dest;
+  if (mode != DImode)
+    {
+      target = gen_lowpart (DImode, dest);
+      if (can_create_pseudo_p ())
+        subtarget = gen_reg_rtx (DImode);
+      else
+	subtarget = target;
+    }
+  /* Below, we must be careful to use copy_rtx on target and subtarget
+     in intermediate insns, as they may be a subreg rtx, which may not
+     be shared.  */
+
+  if (f == 0 && exact_log2 (diff) > 0
+      /* On EV6, we've got enough shifters to make non-arithmetic shifts
+	 viable over a longer latency cmove.  On EV5, the E0 slot is a
+	 scarce resource, and on EV4 shift has the same latency as a cmove.  */
+      && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
+
+      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
+			    GEN_INT (exact_log2 (t)));
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else if (f == 0 && t == -1)
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
+
+      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
+    }
+  else if (diff == 1 || diff == 4 || diff == 8)
+    {
+      rtx add_op;
+
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
+
+      if (diff == 1)
+	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
+      else
+	{
+	  add_op = GEN_INT (f);
+	  if (sext_add_operand (add_op, mode))
+	    {
+	      tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
+				  GEN_INT (diff));
+	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
+	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+	    }
+	  else
+	    return 0;
+	}
+    }
+  else
+    return 0;
+
+  return 1;
+}
+
+/* Look up the function X_floating library function name for the
+   given operation.  */
+
+struct GTY(()) xfloating_op
+{
+  const enum rtx_code code;
+  const char *const GTY((skip)) osf_func;
+  const char *const GTY((skip)) vms_func;
+  rtx libcall;
+};
+
+static GTY(()) struct xfloating_op xfloating_ops[] =
+{
+  { PLUS,		"_OtsAddX", "OTS$ADD_X", 0 },
+  { MINUS,		"_OtsSubX", "OTS$SUB_X", 0 },
+  { MULT,		"_OtsMulX", "OTS$MUL_X", 0 },
+  { DIV,		"_OtsDivX", "OTS$DIV_X", 0 },
+  { EQ,			"_OtsEqlX", "OTS$EQL_X", 0 },
+  { NE,			"_OtsNeqX", "OTS$NEQ_X", 0 },
+  { LT,			"_OtsLssX", "OTS$LSS_X", 0 },
+  { LE,			"_OtsLeqX", "OTS$LEQ_X", 0 },
+  { GT,			"_OtsGtrX", "OTS$GTR_X", 0 },
+  { GE,			"_OtsGeqX", "OTS$GEQ_X", 0 },
+  { FIX,		"_OtsCvtXQ", "OTS$CVTXQ", 0 },
+  { FLOAT,		"_OtsCvtQX", "OTS$CVTQX", 0 },
+  { UNSIGNED_FLOAT,	"_OtsCvtQUX", "OTS$CVTQUX", 0 },
+  { FLOAT_EXTEND,	"_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
+  { FLOAT_TRUNCATE,	"_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
+};
+
+static GTY(()) struct xfloating_op vax_cvt_ops[] =
+{
+  { FLOAT_EXTEND,	"_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
+  { FLOAT_TRUNCATE,	"_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
+};
+
+static rtx
+alpha_lookup_xfloating_lib_func (enum rtx_code code)
+{
+  struct xfloating_op *ops = xfloating_ops;
+  long n = ARRAY_SIZE (xfloating_ops);
+  long i;
+
+  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
+
+  /* How irritating.  Nothing to key off for the main table.  */
+  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
+    {
+      ops = vax_cvt_ops;
+      n = ARRAY_SIZE (vax_cvt_ops);
+    }
+
+  for (i = 0; i < n; ++i, ++ops)
+    if (ops->code == code)
+      {
+	rtx func = ops->libcall;
+	if (!func)
+	  {
+	    func = init_one_libfunc (TARGET_ABI_OPEN_VMS
+				     ? ops->vms_func : ops->osf_func);
+	    ops->libcall = func;
+	  }
+        return func;
+      }
+
+  gcc_unreachable ();
+}
+
+/* Most X_floating operations take the rounding mode as an argument.
+   Compute that here.  */
+
+static int
+alpha_compute_xfloating_mode_arg (enum rtx_code code,
+				  enum alpha_fp_rounding_mode round)
+{
+  int mode;
+
+  switch (round)
+    {
+    case ALPHA_FPRM_NORM:
+      mode = 2;
+      break;
+    case ALPHA_FPRM_MINF:
+      mode = 1;
+      break;
+    case ALPHA_FPRM_CHOP:
+      mode = 0;
+      break;
+    case ALPHA_FPRM_DYN:
+      mode = 4;
+      break;
+    default:
+      gcc_unreachable ();
+
+    /* XXX For reference, round to +inf is mode = 3.  */
+    }
+
+  if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
+    mode |= 0x10000;
+
+  return mode;
+}
+
+/* Emit an X_floating library function call.
+
+   Note that these functions do not follow normal calling conventions:
+   TFmode arguments are passed in two integer registers (as opposed to
+   indirect); TFmode return values appear in R16+R17.
+
+   FUNC is the function to call.
+   TARGET is where the output belongs.
+   OPERANDS are the inputs.
+   NOPERANDS is the count of inputs.
+   EQUIV is the expression equivalent for the function.
+*/
+
+static void
+alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
+			      int noperands, rtx equiv)
+{
+  rtx usage = NULL_RTX, tmp, reg;
+  int regno = 16, i;
+
+  start_sequence ();
+
+  for (i = 0; i < noperands; ++i)
+    {
+      switch (GET_MODE (operands[i]))
+	{
+	case TFmode:
+	  reg = gen_rtx_REG (TFmode, regno);
+	  regno += 2;
+	  break;
+
+	case DFmode:
+	  reg = gen_rtx_REG (DFmode, regno + 32);
+	  regno += 1;
+	  break;
+
+	case VOIDmode:
+	  gcc_assert (CONST_INT_P (operands[i]));
+	  /* FALLTHRU */
+	case DImode:
+	  reg = gen_rtx_REG (DImode, regno);
+	  regno += 1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_move_insn (reg, operands[i]);
+      use_reg (&usage, reg);
+    }
+
+  switch (GET_MODE (target))
+    {
+    case TFmode:
+      reg = gen_rtx_REG (TFmode, 16);
+      break;
+    case DFmode:
+      reg = gen_rtx_REG (DFmode, 32);
+      break;
+    case DImode:
+      reg = gen_rtx_REG (DImode, 0);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  tmp = gen_rtx_MEM (QImode, func);
+  tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
+					const0_rtx, const0_rtx));
+  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
+  RTL_CONST_CALL_P (tmp) = 1;
+
+  tmp = get_insns ();
+  end_sequence ();
+
+  emit_libcall_block (tmp, target, reg, equiv);
+}
+
+/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
+
+void
+alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
+{
+  rtx func;
+  int mode;
+  rtx out_operands[3];
+
+  func = alpha_lookup_xfloating_lib_func (code);
+  mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
+
+  out_operands[0] = operands[1];
+  out_operands[1] = operands[2];
+  out_operands[2] = GEN_INT (mode);
+  alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
+				gen_rtx_fmt_ee (code, TFmode, operands[1],
+						operands[2]));
+}
+
+/* Emit an X_floating library function call for a comparison.  */
+
+static rtx
+alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
+{
+  enum rtx_code cmp_code, res_code;
+  rtx func, out, operands[2], note;
+
+  /* X_floating library comparison functions return
+	   -1  unordered
+	    0  false
+	    1  true
+     Convert the compare against the raw return value.  */
+
+  cmp_code = *pcode;
+  switch (cmp_code)
+    {
+    case UNORDERED:
+      cmp_code = EQ;
+      res_code = LT;
+      break;
+    case ORDERED:
+      cmp_code = EQ;
+      res_code = GE;
+      break;
+    case NE:
+      res_code = NE;
+      break;
+    case EQ:
+    case LT:
+    case GT:
+    case LE:
+    case GE:
+      res_code = GT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  *pcode = res_code;
+
+  func = alpha_lookup_xfloating_lib_func (cmp_code);
+
+  operands[0] = op0;
+  operands[1] = op1;
+  out = gen_reg_rtx (DImode);
+
+  /* What's actually returned is -1,0,1, not a proper boolean value.  */
+  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
+  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
+  alpha_emit_xfloating_libcall (func, out, operands, 2, note);
+
+  return out;
+}
+
+/* Emit an X_floating library function call for a conversion.  */
+
+void
+alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
+{
+  int noperands = 1, mode;
+  rtx out_operands[2];
+  rtx func;
+  enum rtx_code code = orig_code;
+
+  if (code == UNSIGNED_FIX)
+    code = FIX;
+
+  func = alpha_lookup_xfloating_lib_func (code);
+
+  out_operands[0] = operands[1];
+
+  switch (code)
+    {
+    case FIX:
+      mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
+      out_operands[1] = GEN_INT (mode);
+      noperands = 2;
+      break;
+    case FLOAT_TRUNCATE:
+      mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
+      out_operands[1] = GEN_INT (mode);
+      noperands = 2;
+      break;
+    default:
+      break;
+    }
+
+  alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
+				gen_rtx_fmt_e (orig_code,
+					       GET_MODE (operands[0]),
+					       operands[1]));
+}
+
+/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
+   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
+   guarantee that the sequence
+     set (OP[0] OP[2])
+     set (OP[1] OP[3])
+   is valid.  Naturally, output operand ordering is little-endian.
+   This is used by *movtf_internal and *movti_internal.  */
+  
+void
+alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
+			bool fixup_overlap)
+{
+  switch (GET_CODE (operands[1]))
+    {
+    case REG:
+      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
+      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
+      break;
+
+    case MEM:
+      operands[3] = adjust_address (operands[1], DImode, 8);
+      operands[2] = adjust_address (operands[1], DImode, 0);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      gcc_assert (operands[1] == CONST0_RTX (mode));
+      operands[2] = operands[3] = const0_rtx;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
+      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+      break;
+
+    case MEM:
+      operands[1] = adjust_address (operands[0], DImode, 8);
+      operands[0] = adjust_address (operands[0], DImode, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+}
+
+/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
+   op2 is a register containing the sign bit, operation is the
+   logical operation to be performed.  */
+
+void
+alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
+{
+  rtx high_bit = operands[2];
+  rtx scratch;
+  int move;
+
+  alpha_split_tmode_pair (operands, TFmode, false);
+
+  /* Detect three flavors of operand overlap.  */
+  move = 1;
+  if (rtx_equal_p (operands[0], operands[2]))
+    move = 0;
+  else if (rtx_equal_p (operands[1], operands[2]))
+    {
+      if (rtx_equal_p (operands[0], high_bit))
+	move = 2;
+      else
+	move = -1;
+    }
+
+  if (move < 0)
+    emit_move_insn (operands[0], operands[2]);
+
+  /* ??? If the destination overlaps both source tf and high_bit, then
+     assume source tf is dead in its entirety and use the other half
+     for a scratch register.  Otherwise "scratch" is just the proper
+     destination register.  */
+  scratch = operands[move < 2 ? 1 : 3];
+
+  emit_insn ((*operation) (scratch, high_bit, operands[3]));
+
+  if (move > 0)
+    {
+      emit_move_insn (operands[0], operands[2]);
+      if (move > 1)
+	emit_move_insn (operands[1], scratch);
+    }
+}
+
+/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
+   unaligned data:
+
+           unsigned:                       signed:
+   word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
+           ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
+           lda    r3,X(r11)                lda    r3,X+2(r11)
+           extwl  r1,r3,r1                 extql  r1,r3,r1
+           extwh  r2,r3,r2                 extqh  r2,r3,r2
+           or     r1.r2.r1                 or     r1,r2,r1
+                                           sra    r1,48,r1
+
+   long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
+           ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
+           lda    r3,X(r11)                lda    r3,X(r11)
+           extll  r1,r3,r1                 extll  r1,r3,r1
+           extlh  r2,r3,r2                 extlh  r2,r3,r2
+           or     r1.r2.r1                 addl   r1,r2,r1
+
+   quad:   ldq_u  r1,X(r11)
+           ldq_u  r2,X+7(r11)
+           lda    r3,X(r11)
+           extql  r1,r3,r1
+           extqh  r2,r3,r2
+           or     r1.r2.r1
+*/
+
+void
+alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
+			     HOST_WIDE_INT ofs, int sign)
+{
+  rtx meml, memh, addr, extl, exth, tmp, mema;
+  enum machine_mode mode;
+
+  if (TARGET_BWX && size == 2)
+    {
+      meml = adjust_address (mem, QImode, ofs);
+      memh = adjust_address (mem, QImode, ofs+1);
+      extl = gen_reg_rtx (DImode);
+      exth = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendqidi2 (extl, meml));
+      emit_insn (gen_zero_extendqidi2 (exth, memh));
+      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
+				  NULL, 1, OPTAB_LIB_WIDEN);
+      addr = expand_simple_binop (DImode, IOR, extl, exth,
+				  NULL, 1, OPTAB_LIB_WIDEN);
+
+      if (sign && GET_MODE (tgt) != HImode)
+	{
+	  addr = gen_lowpart (HImode, addr);
+	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
+	}
+      else
+	{
+	  if (GET_MODE (tgt) != DImode)
+	    addr = gen_lowpart (GET_MODE (tgt), addr);
+	  emit_move_insn (tgt, addr);
+	}
+      return;
+    }
+
+  meml = gen_reg_rtx (DImode);
+  memh = gen_reg_rtx (DImode);
+  addr = gen_reg_rtx (DImode);
+  extl = gen_reg_rtx (DImode);
+  exth = gen_reg_rtx (DImode);
+
+  mema = XEXP (mem, 0);
+  if (GET_CODE (mema) == LO_SUM)
+    mema = force_reg (Pmode, mema);
+
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+
+  tmp = change_address (mem, DImode,
+			gen_rtx_AND (DImode,
+				     plus_constant (DImode, mema, ofs),
+				     GEN_INT (-8)));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (meml, tmp);
+
+  tmp = change_address (mem, DImode,
+			gen_rtx_AND (DImode,
+				     plus_constant (DImode, mema,
+						    ofs + size - 1),
+				     GEN_INT (-8)));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (memh, tmp);
+
+  if (sign && size == 2)
+    {
+      emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
+
+      emit_insn (gen_extql (extl, meml, addr));
+      emit_insn (gen_extqh (exth, memh, addr));
+
+      /* We must use tgt here for the target.  Alpha-vms port fails if we use
+	 addr for the target, because addr is marked as a pointer and combine
+	 knows that pointers are always sign-extended 32-bit values.  */
+      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
+      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
+			   addr, 1, OPTAB_WIDEN);
+    }
+  else
+    {
+      emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
+      emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
+      switch ((int) size)
+	{
+	case 2:
+	  emit_insn (gen_extwh (exth, memh, addr));
+	  mode = HImode;
+	  break;
+	case 4:
+	  emit_insn (gen_extlh (exth, memh, addr));
+	  mode = SImode;
+	  break;
+	case 8:
+	  emit_insn (gen_extqh (exth, memh, addr));
+	  mode = DImode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
+			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
+			   sign, OPTAB_WIDEN);
+    }
+
+  if (addr != tgt)
+    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
+}
+
+/* Similarly, use ins and msk instructions to perform unaligned stores.  */
+
+void
+alpha_expand_unaligned_store (rtx dst, rtx src,
+			      HOST_WIDE_INT size, HOST_WIDE_INT ofs)
+{
+  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
+
+  if (TARGET_BWX && size == 2)
+    {
+      if (src != const0_rtx)
+	{
+	  dstl = gen_lowpart (QImode, src);
+	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
+				      NULL, 1, OPTAB_LIB_WIDEN);
+	  dsth = gen_lowpart (QImode, dsth);
+	}
+      else
+	dstl = dsth = const0_rtx;
+
+      meml = adjust_address (dst, QImode, ofs);
+      memh = adjust_address (dst, QImode, ofs+1);
+
+      emit_move_insn (meml, dstl);
+      emit_move_insn (memh, dsth);
+      return;
+    }
+
+  dstl = gen_reg_rtx (DImode);
+  dsth = gen_reg_rtx (DImode);
+  insl = gen_reg_rtx (DImode);
+  insh = gen_reg_rtx (DImode);
+
+  dsta = XEXP (dst, 0);
+  if (GET_CODE (dsta) == LO_SUM)
+    dsta = force_reg (Pmode, dsta);
+
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+
+  meml = change_address (dst, DImode,
+			 gen_rtx_AND (DImode,
+				      plus_constant (DImode, dsta, ofs),
+				      GEN_INT (-8)));
+  set_mem_alias_set (meml, 0);
+
+  memh = change_address (dst, DImode,
+			 gen_rtx_AND (DImode,
+				      plus_constant (DImode, dsta,
+						     ofs + size - 1),
+				      GEN_INT (-8)));
+  set_mem_alias_set (memh, 0);
+
+  emit_move_insn (dsth, memh);
+  emit_move_insn (dstl, meml);
+
+  addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
+
+  if (src != CONST0_RTX (GET_MODE (src)))
+    {
+      emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
+			    GEN_INT (size*8), addr));
+
+      switch ((int) size)
+	{
+	case 2:
+	  emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
+	  break;
+	case 4:
+	  emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
+	  break;
+	case 8:
+	  emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
+
+  switch ((int) size)
+    {
+    case 2:
+      emit_insn (gen_mskwl (dstl, dstl, addr));
+      break;
+    case 4:
+      emit_insn (gen_mskll (dstl, dstl, addr));
+      break;
+    case 8:
+      emit_insn (gen_mskql (dstl, dstl, addr));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (src != CONST0_RTX (GET_MODE (src)))
+    {
+      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
+      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
+    }
+
+  /* Must store high before low for degenerate case of aligned.  */
+  emit_move_insn (memh, dsth);
+  emit_move_insn (meml, dstl);
+}
+
+/* The block move code tries to maximize speed by separating loads and
+   stores at the expense of register pressure: we load all of the data
+   before we store it back out.  There are two secondary effects worth
+   mentioning, that this speeds copying to/from aligned and unaligned
+   buffers, and that it makes the code significantly easier to write.  */
+
+#define MAX_MOVE_WORDS	8
+
+/* Load an integral number of consecutive unaligned quadwords.  */
+
+static void
+alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
+				   HOST_WIDE_INT words, HOST_WIDE_INT ofs)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
+  rtx sreg, areg, tmp, smema;
+  HOST_WIDE_INT i;
+
+  smema = XEXP (smem, 0);
+  if (GET_CODE (smema) == LO_SUM)
+    smema = force_reg (Pmode, smema);
+
+  /* Generate all the tmp registers we need.  */
+  for (i = 0; i < words; ++i)
+    {
+      data_regs[i] = out_regs[i];
+      ext_tmps[i] = gen_reg_rtx (DImode);
+    }
+  data_regs[words] = gen_reg_rtx (DImode);
+
+  if (ofs != 0)
+    smem = adjust_address (smem, GET_MODE (smem), ofs);
+
+  /* Load up all of the source data.  */
+  for (i = 0; i < words; ++i)
+    {
+      tmp = change_address (smem, DImode,
+			    gen_rtx_AND (DImode,
+					 plus_constant (DImode, smema, 8*i),
+					 im8));
+      set_mem_alias_set (tmp, 0);
+      emit_move_insn (data_regs[i], tmp);
+    }
+
+  tmp = change_address (smem, DImode,
+			gen_rtx_AND (DImode,
+				     plus_constant (DImode, smema,
+						    8*words - 1),
+				     im8));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (data_regs[words], tmp);
+
+  /* Extract the half-word fragments.  Unfortunately DEC decided to make
+     extxh with offset zero a noop instead of zeroing the register, so
+     we must take care of that edge condition ourselves with cmov.  */
+
+  sreg = copy_addr_to_reg (smema);
+  areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
+		       1, OPTAB_WIDEN);
+  for (i = 0; i < words; ++i)
+    {
+      emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
+      emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
+      emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
+			      gen_rtx_IF_THEN_ELSE (DImode,
+						    gen_rtx_EQ (DImode, areg,
+								const0_rtx),
+						    const0_rtx, ext_tmps[i])));
+    }
+
+  /* Merge the half-words into whole words.  */
+  for (i = 0; i < words; ++i)
+    {
+      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
+				  ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
+    }
+}
+
+/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
+   may be NULL to store zeros.  */
+
+static void
+alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
+				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx ins_tmps[MAX_MOVE_WORDS];
+  rtx st_tmp_1, st_tmp_2, dreg;
+  rtx st_addr_1, st_addr_2, dmema;
+  HOST_WIDE_INT i;
+
+  dmema = XEXP (dmem, 0);
+  if (GET_CODE (dmema) == LO_SUM)
+    dmema = force_reg (Pmode, dmema);
+
+  /* Generate all the tmp registers we need.  */
+  if (data_regs != NULL)
+    for (i = 0; i < words; ++i)
+      ins_tmps[i] = gen_reg_rtx(DImode);
+  st_tmp_1 = gen_reg_rtx(DImode);
+  st_tmp_2 = gen_reg_rtx(DImode);
+
+  if (ofs != 0)
+    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
+
+  st_addr_2 = change_address (dmem, DImode,
+			      gen_rtx_AND (DImode,
+					   plus_constant (DImode, dmema,
+							  words*8 - 1),
+					   im8));
+  set_mem_alias_set (st_addr_2, 0);
+
+  st_addr_1 = change_address (dmem, DImode,
+			      gen_rtx_AND (DImode, dmema, im8));
+  set_mem_alias_set (st_addr_1, 0);
+
+  /* Load up the destination end bits.  */
+  emit_move_insn (st_tmp_2, st_addr_2);
+  emit_move_insn (st_tmp_1, st_addr_1);
+
+  /* Shift the input data into place.  */
+  dreg = copy_addr_to_reg (dmema);
+  if (data_regs != NULL)
+    {
+      for (i = words-1; i >= 0; --i)
+	{
+	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
+	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
+	}
+      for (i = words-1; i > 0; --i)
+	{
+	  ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
+					ins_tmps[i-1], ins_tmps[i-1], 1,
+					OPTAB_WIDEN);
+	}
+    }
+
+  /* Split and merge the ends with the destination data.  */
+  emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
+  emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
+
+  if (data_regs != NULL)
+    {
+      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
+			       st_tmp_2, 1, OPTAB_WIDEN);
+      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
+			       st_tmp_1, 1, OPTAB_WIDEN);
+    }
+
+  /* Store it all.  */
+  emit_move_insn (st_addr_2, st_tmp_2);
+  for (i = words-1; i > 0; --i)
+    {
+      rtx tmp = change_address (dmem, DImode,
+				gen_rtx_AND (DImode,
+					     plus_constant (DImode,
+							    dmema, i*8),
+					     im8));
+      set_mem_alias_set (tmp, 0);
+      emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
+    }
+  emit_move_insn (st_addr_1, st_tmp_1);
+}
+
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.  */
+
+int
+alpha_expand_block_move (rtx operands[])
+{
+  rtx bytes_rtx	= operands[2];
+  rtx align_rtx = operands[3];
+  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
+  HOST_WIDE_INT bytes = orig_bytes;
+  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
+  HOST_WIDE_INT dst_align = src_align;
+  rtx orig_src = operands[1];
+  rtx orig_dst = operands[0];
+  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
+  rtx tmp;
+  unsigned int i, words, ofs, nregs = 0;
+
+  if (orig_bytes <= 0)
+    return 1;
+  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
+    return 0;
+
+  /* Look for additional alignment information from recorded register info.  */
+
+  tmp = XEXP (orig_src, 0);
+  if (REG_P (tmp))
+    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS
+	   && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > src_align)
+	{
+          if (a >= 64 && c % 8 == 0)
+	    src_align = 64;
+          else if (a >= 32 && c % 4 == 0)
+	    src_align = 32;
+          else if (a >= 16 && c % 2 == 0)
+	    src_align = 16;
+	}
+    }
+
+  tmp = XEXP (orig_dst, 0);
+  if (REG_P (tmp))
+    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS
+	   && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > dst_align)
+	{
+          if (a >= 64 && c % 8 == 0)
+	    dst_align = 64;
+          else if (a >= 32 && c % 4 == 0)
+	    dst_align = 32;
+          else if (a >= 16 && c % 2 == 0)
+	    dst_align = 16;
+	}
+    }
+
+  ofs = 0;
+  if (src_align >= 64 && bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (DImode);
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (data_regs[nregs + i],
+			adjust_address (orig_src, DImode, ofs + i * 8));
+
+      nregs += words;
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  if (src_align >= 32 && bytes >= 4)
+    {
+      words = bytes / 4;
+
+      for (i = 0; i < words; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (SImode);
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (data_regs[nregs + i],
+			adjust_address (orig_src, SImode, ofs + i * 4));
+
+      nregs += words;
+      bytes -= words * 4;
+      ofs += words * 4;
+    }
+
+  if (bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words+1; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (DImode);
+
+      alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
+					 words, ofs);
+
+      nregs += words;
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  if (! TARGET_BWX && bytes >= 4)
+    {
+      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
+      alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
+      bytes -= 4;
+      ofs += 4;
+    }
+
+  if (bytes >= 2)
+    {
+      if (src_align >= 16)
+	{
+	  do {
+	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
+	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
+	    bytes -= 2;
+	    ofs += 2;
+	  } while (bytes >= 2);
+	}
+      else if (! TARGET_BWX)
+	{
+	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
+	  alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
+	  bytes -= 2;
+	  ofs += 2;
+	}
+    }
+
+  while (bytes > 0)
+    {
+      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
+      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
+      bytes -= 1;
+      ofs += 1;
+    }
+
+  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
+
+  /* Now save it back out again.  */
+
+  i = 0, ofs = 0;
+
+  /* Write out the data in whatever chunks reading the source allowed.  */
+  if (dst_align >= 64)
+    {
+      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
+	{
+	  emit_move_insn (adjust_address (orig_dst, DImode, ofs),
+			  data_regs[i]);
+	  ofs += 8;
+	  i++;
+	}
+    }
+
+  if (dst_align >= 32)
+    {
+      /* If the source has remaining DImode regs, write them out in
+	 two pieces.  */
+      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
+	{
+	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+			  gen_lowpart (SImode, data_regs[i]));
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
+			  gen_lowpart (SImode, tmp));
+	  ofs += 8;
+	  i++;
+	}
+
+      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
+	{
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+			  data_regs[i]);
+	  ofs += 4;
+	  i++;
+	}
+    }
+
+  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
+    {
+      /* Write out a remaining block of words using unaligned methods.  */
+
+      for (words = 1; i + words < nregs; words++)
+	if (GET_MODE (data_regs[i + words]) != DImode)
+	  break;
+
+      if (words == 1)
+	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+      else
+        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
+					    words, ofs);
+
+      i += words;
+      ofs += words * 8;
+    }
+
+  /* Due to the above, this won't be aligned.  */
+  /* ??? If we have more than one of these, consider constructing full
+     words in registers and using alpha_expand_unaligned_store_words.  */
+  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
+    {
+      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
+      ofs += 4;
+      i++;
+    }
+
+  if (dst_align >= 16)
+    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
+      {
+	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
+	i++;
+	ofs += 2;
+      }
+  else
+    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
+      {
+	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
+	i++;
+	ofs += 2;
+      }
+
+  /* The remainder must be byte copies.  */
+  while (i < nregs)
+    {
+      gcc_assert (GET_MODE (data_regs[i]) == QImode);
+      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
+      i++;
+      ofs += 1;
+    }
+
+  return 1;
+}
+
+int
+alpha_expand_block_clear (rtx operands[])
+{
+  rtx bytes_rtx	= operands[1];
+  rtx align_rtx = operands[3];
+  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
+  HOST_WIDE_INT bytes = orig_bytes;
+  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
+  HOST_WIDE_INT alignofs = 0;
+  rtx orig_dst = operands[0];
+  rtx tmp;
+  int i, words, ofs = 0;
+
+  if (orig_bytes <= 0)
+    return 1;
+  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
+    return 0;
+
+  /* Look for stricter alignment.  */
+  tmp = XEXP (orig_dst, 0);
+  if (REG_P (tmp))
+    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS
+	   && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > align)
+	{
+          if (a >= 64)
+	    align = a, alignofs = 8 - c % 8;
+          else if (a >= 32)
+	    align = a, alignofs = 4 - c % 4;
+          else if (a >= 16)
+	    align = a, alignofs = 2 - c % 2;
+	}
+    }
+
+  /* Handle an unaligned prefix first.  */
+
+  if (alignofs > 0)
+    {
+#if HOST_BITS_PER_WIDE_INT >= 64
+      /* Given that alignofs is bounded by align, the only time BWX could
+	 generate three stores is for a 7 byte fill.  Prefer two individual
+	 stores over a load/mask/store sequence.  */
+      if ((!TARGET_BWX || alignofs == 7)
+	       && align >= 32
+	       && !(alignofs == 4 && bytes >= 4))
+	{
+	  enum machine_mode mode = (align >= 64 ? DImode : SImode);
+	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
+	  if (bytes < alignofs)
+	    {
+	      mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
+	      ofs += bytes;
+	      bytes = 0;
+	    }
+	  else
+	    {
+	      bytes -= alignofs;
+	      ofs += alignofs;
+	    }
+	  alignofs = 0;
+
+	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	}
+#endif
+
+      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
+	{
+	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+	  bytes -= 1;
+	  ofs += 1;
+	  alignofs -= 1;
+	}
+      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
+	{
+	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
+	  bytes -= 2;
+	  ofs += 2;
+	  alignofs -= 2;
+	}
+      if (alignofs == 4 && bytes >= 4)
+	{
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
+	  bytes -= 4;
+	  ofs += 4;
+	  alignofs = 0;
+	}
+
+      /* If we've not used the extra lead alignment information by now,
+	 we won't be able to.  Downgrade align to match what's left over.  */
+      if (alignofs > 0)
+	{
+	  alignofs = alignofs & -alignofs;
+	  align = MIN (align, alignofs * BITS_PER_UNIT);
+	}
+    }
+
+  /* Handle a block of contiguous long-words.  */
+
+  if (align >= 64 && bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
+			const0_rtx);
+
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  /* If the block is large and appropriately aligned, emit a single
+     store followed by a sequence of stq_u insns.  */
+
+  if (align >= 32 && bytes > 16)
+    {
+      rtx orig_dsta;
+
+      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
+      bytes -= 4;
+      ofs += 4;
+
+      orig_dsta = XEXP (orig_dst, 0);
+      if (GET_CODE (orig_dsta) == LO_SUM)
+	orig_dsta = force_reg (Pmode, orig_dsta);
+
+      words = bytes / 8;
+      for (i = 0; i < words; ++i)
+	{
+	  rtx mem
+	    = change_address (orig_dst, DImode,
+			      gen_rtx_AND (DImode,
+					   plus_constant (DImode, orig_dsta,
+							  ofs + i*8),
+					   GEN_INT (-8)));
+	  set_mem_alias_set (mem, 0);
+	  emit_move_insn (mem, const0_rtx);
+	}
+
+      /* Depending on the alignment, the first stq_u may have overlapped
+	 with the initial stl, which means that the last stq_u didn't
+	 write as much as it would appear.  Leave those questionable bytes
+	 unaccounted for.  */
+      bytes -= words * 8 - 4;
+      ofs += words * 8 - 4;
+    }
+
+  /* Handle a smaller block of aligned words.  */
+
+  if ((align >= 64 && bytes == 4)
+      || (align == 32 && bytes >= 4))
+    {
+      words = bytes / 4;
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
+			const0_rtx);
+
+      bytes -= words * 4;
+      ofs += words * 4;
+    }
+
+  /* An unaligned block uses stq_u stores for as many as possible.  */
+
+  if (bytes >= 8)
+    {
+      words = bytes / 8;
+
+      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
+
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  /* Next clean up any trailing pieces.  */
+
+#if HOST_BITS_PER_WIDE_INT >= 64
+  /* Count the number of bits in BYTES for which aligned stores could
+     be emitted.  */
+  words = 0;
+  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
+    if (bytes & i)
+      words += 1;
+
+  /* If we have appropriate alignment (and it wouldn't take too many
+     instructions otherwise), mask out the bytes we need.  */
+  if (TARGET_BWX ? words > 2 : bytes > 0)
+    {
+      if (align >= 64)
+	{
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, DImode, ofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
+
+	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	  return 1;
+	}
+      else if (align >= 32 && bytes < 4)
+	{
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, SImode, ofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
+
+	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	  return 1;
+	}
+    }
+#endif
+
+  if (!TARGET_BWX && bytes >= 4)
+    {
+      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
+      bytes -= 4;
+      ofs += 4;
+    }
+
+  if (bytes >= 2)
+    {
+      if (align >= 16)
+	{
+	  do {
+	    emit_move_insn (adjust_address (orig_dst, HImode, ofs),
+			    const0_rtx);
+	    bytes -= 2;
+	    ofs += 2;
+	  } while (bytes >= 2);
+	}
+      else if (! TARGET_BWX)
+	{
+	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
+	  bytes -= 2;
+	  ofs += 2;
+	}
+    }
+
+  while (bytes > 0)
+    {
+      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+      bytes -= 1;
+      ofs += 1;
+    }
+
+  return 1;
+}
+
+/* Returns a mask so that zap(x, value) == x & mask.  */
+
+rtx
+alpha_expand_zap_mask (HOST_WIDE_INT value)
+{
+  rtx result;
+  int i;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    {
+      HOST_WIDE_INT mask = 0;
+
+      for (i = 7; i >= 0; --i)
+	{
+	  mask <<= 8;
+	  if (!((value >> i) & 1))
+	    mask |= 0xff;
+	}
+
+      result = gen_int_mode (mask, DImode);
+    }
+  else
+    {
+      HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
+
+      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
+      
+      for (i = 7; i >= 4; --i)
+	{
+	  mask_hi <<= 8;
+	  if (!((value >> i) & 1))
+	    mask_hi |= 0xff;
+	}
+
+      for (i = 3; i >= 0; --i)
+	{
+	  mask_lo <<= 8;
+	  if (!((value >> i) & 1))
+	    mask_lo |= 0xff;
+	}
+
+      result = immed_double_const (mask_lo, mask_hi, DImode);
+    }
+
+  return result;
+}
+
+void
+alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
+				   enum machine_mode mode,
+				   rtx op0, rtx op1, rtx op2)
+{
+  op0 = gen_lowpart (mode, op0);
+
+  if (op1 == const0_rtx)
+    op1 = CONST0_RTX (mode);
+  else
+    op1 = gen_lowpart (mode, op1);
+
+  if (op2 == const0_rtx)
+    op2 = CONST0_RTX (mode);
+  else
+    op2 = gen_lowpart (mode, op2);
+
+  emit_insn ((*gen) (op0, op1, op2));
+}
+
+/* A subroutine of the atomic operation splitters.  Jump to LABEL if
+   COND is true.  Mark the jump as unlikely to be taken.  */
+
+static void
+emit_unlikely_jump (rtx cond, rtx label)
+{
+  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+  rtx x;
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
+  x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
+  add_int_reg_note (x, REG_BR_PROB, very_unlikely);
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a load-locked
+   instruction in MODE.  */
+
+static void
+emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
+{
+  rtx (*fn) (rtx, rtx) = NULL;
+  if (mode == SImode)
+    fn = gen_load_locked_si;
+  else if (mode == DImode)
+    fn = gen_load_locked_di;
+  emit_insn (fn (reg, mem));
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a store-conditional
+   instruction in MODE.  */
+
+static void
+emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
+{
+  rtx (*fn) (rtx, rtx, rtx) = NULL;
+  if (mode == SImode)
+    fn = gen_store_conditional_si;
+  else if (mode == DImode)
+    fn = gen_store_conditional_di;
+  emit_insn (fn (res, mem, val));
+}
+
+/* Subroutines of the atomic operation splitters.  Emit barriers
+   as needed for the memory MODEL.  */
+
+static void
+alpha_pre_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, true))
+    emit_insn (gen_memory_barrier ());
+}
+
+static void
+alpha_post_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, false))
+    emit_insn (gen_memory_barrier ());
+}
+
+/* A subroutine of the atomic operation splitters.  Emit an insxl
+   instruction in MODE.  */
+
+static rtx
+emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
+{
+  rtx ret = gen_reg_rtx (DImode);
+  rtx (*fn) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode:
+      fn = gen_insbl;
+      break;
+    case HImode:
+      fn = gen_inswl;
+      break;
+    case SImode:
+      fn = gen_insll;
+      break;
+    case DImode:
+      fn = gen_insql;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  op1 = force_reg (mode, op1);
+  emit_insn (fn (ret, op1, op2));
+
+  return ret;
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second 
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  SCRATCH is
+   a scratch register.  */
+
+void
+alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
+		       rtx after, rtx scratch, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
+
+  alpha_pre_atomic_barrier (model);
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (DImode, label);
+
+  if (before == NULL)
+    before = scratch;
+  emit_load_locked (mode, before, mem);
+
+  if (code == NOT)
+    {
+      x = gen_rtx_AND (mode, before, val);
+      emit_insn (gen_rtx_SET (VOIDmode, val, x));
+
+      x = gen_rtx_NOT (mode, val);
+    }
+  else
+    x = gen_rtx_fmt_ee (code, mode, before, val);
+  if (after)
+    emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  alpha_post_atomic_barrier (model);
+}
+
+/* Expand a compare and swap operation.  */
+
+void
+alpha_split_compare_and_swap (rtx operands[])
+{
+  rtx cond, retval, mem, oldval, newval;
+  bool is_weak;
+  enum memmodel mod_s, mod_f;
+  enum machine_mode mode;
+  rtx label1, label2, x;
+
+  cond = operands[0];
+  retval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = (operands[5] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[6]);
+  mod_f = (enum memmodel) INTVAL (operands[7]);
+  mode = GET_MODE (mem);
+
+  alpha_pre_atomic_barrier (mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+      emit_label (XEXP (label1, 0));
+    }
+  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+
+  emit_load_locked (mode, retval, mem);
+
+  x = gen_lowpart (DImode, retval);
+  if (oldval == const0_rtx)
+    {
+      emit_move_insn (cond, const0_rtx);
+      x = gen_rtx_NE (DImode, x, const0_rtx);
+    }
+  else
+    {
+      x = gen_rtx_EQ (DImode, x, oldval);
+      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+    }
+  emit_unlikely_jump (x, label2);
+
+  emit_move_insn (cond, newval);
+  emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
+
+  if (!is_weak)
+    {
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+      emit_unlikely_jump (x, label1);
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (XEXP (label2, 0));
+
+  alpha_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (XEXP (label2, 0));
+}
+
+void
+alpha_expand_compare_and_swap_12 (rtx operands[])
+{
+  rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
+  enum machine_mode mode;
+  rtx addr, align, wdst;
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
+  cond = operands[0];
+  dst = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+
+  /* We forced the address into a register via mem_noofs_operand.  */
+  addr = XEXP (mem, 0);
+  gcc_assert (register_operand (addr, DImode));
+
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
+			       NULL_RTX, 1, OPTAB_DIRECT);
+
+  oldval = convert_modes (DImode, mode, oldval, 1);
+
+  if (newval != const0_rtx)
+    newval = emit_insxl (mode, newval, addr);
+
+  wdst = gen_reg_rtx (DImode);
+  if (mode == QImode)
+    gen = gen_atomic_compare_and_swapqi_1;
+  else
+    gen = gen_atomic_compare_and_swaphi_1;
+  emit_insn (gen (cond, wdst, mem, oldval, newval, align,
+		  is_weak, mod_s, mod_f));
+
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+alpha_split_compare_and_swap_12 (rtx operands[])
+{
+  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
+  enum machine_mode mode;
+  bool is_weak;
+  enum memmodel mod_s, mod_f;
+  rtx label1, label2, mem, addr, width, mask, x;
+
+  cond = operands[0];
+  dest = operands[1];
+  orig_mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  align = operands[5];
+  is_weak = (operands[6] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[7]);
+  mod_f = (enum memmodel) INTVAL (operands[8]);
+  scratch = operands[9];
+  mode = GET_MODE (orig_mem);
+  addr = XEXP (orig_mem, 0);
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
+  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
+    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
+
+  alpha_pre_atomic_barrier (mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+      emit_label (XEXP (label1, 0));
+    }
+  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+
+  emit_load_locked (DImode, scratch, mem);
+  
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  emit_insn (gen_extxl (dest, scratch, width, addr));
+
+  if (oldval == const0_rtx)
+    {
+      emit_move_insn (cond, const0_rtx);
+      x = gen_rtx_NE (DImode, dest, const0_rtx);
+    }
+  else
+    {
+      x = gen_rtx_EQ (DImode, dest, oldval);
+      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+    }
+  emit_unlikely_jump (x, label2);
+
+  emit_insn (gen_mskxl (cond, scratch, mask, addr));
+
+  if (newval != const0_rtx)
+    emit_insn (gen_iordi3 (cond, cond, newval));
+
+  emit_store_conditional (DImode, cond, mem, cond);
+
+  if (!is_weak)
+    {
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+      emit_unlikely_jump (x, label1);
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (XEXP (label2, 0));
+
+  alpha_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (XEXP (label2, 0));
+}
+
+/* Expand an atomic exchange operation.  */
+
+void
+alpha_split_atomic_exchange (rtx operands[])
+{
+  rtx retval, mem, val, scratch;
+  enum memmodel model;
+  enum machine_mode mode;
+  rtx label, x, cond;
+
+  retval = operands[0];
+  mem = operands[1];
+  val = operands[2];
+  model = (enum memmodel) INTVAL (operands[3]);
+  scratch = operands[4];
+  mode = GET_MODE (mem);
+  cond = gen_lowpart (DImode, scratch);
+
+  alpha_pre_atomic_barrier (model);
+
+  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_load_locked (mode, retval, mem);
+  emit_move_insn (scratch, val);
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  alpha_post_atomic_barrier (model);
+}
+
+void
+alpha_expand_atomic_exchange_12 (rtx operands[])
+{
+  rtx dst, mem, val, model;
+  enum machine_mode mode;
+  rtx addr, align, wdst;
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
+
+  dst = operands[0];
+  mem = operands[1];
+  val = operands[2];
+  model = operands[3];
+  mode = GET_MODE (mem);
+
+  /* We forced the address into a register via mem_noofs_operand.  */
+  addr = XEXP (mem, 0);
+  gcc_assert (register_operand (addr, DImode));
+
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
+			       NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Insert val into the correct byte location within the word.  */
+  if (val != const0_rtx)
+    val = emit_insxl (mode, val, addr);
+
+  wdst = gen_reg_rtx (DImode);
+  if (mode == QImode)
+    gen = gen_atomic_exchangeqi_1;
+  else
+    gen = gen_atomic_exchangehi_1;
+  emit_insn (gen (wdst, mem, val, align, model));
+
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+alpha_split_atomic_exchange_12 (rtx operands[])
+{
+  rtx dest, orig_mem, addr, val, align, scratch;
+  rtx label, mem, width, mask, x;
+  enum machine_mode mode;
+  enum memmodel model;
+
+  dest = operands[0];
+  orig_mem = operands[1];
+  val = operands[2];
+  align = operands[3];
+  model = (enum memmodel) INTVAL (operands[4]);
+  scratch = operands[5];
+  mode = GET_MODE (orig_mem);
+  addr = XEXP (orig_mem, 0);
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
+  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
+    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
+
+  alpha_pre_atomic_barrier (model);
+
+  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_load_locked (DImode, scratch, mem);
+  
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  emit_insn (gen_extxl (dest, scratch, width, addr));
+  emit_insn (gen_mskxl (scratch, scratch, mask, addr));
+  if (val != const0_rtx)
+    emit_insn (gen_iordi3 (scratch, scratch, val));
+
+  emit_store_conditional (DImode, scratch, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  alpha_post_atomic_barrier (model);
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type dep_insn_type;
+
+  /* If the dependence is an anti-dependence, there is no cost.  For an
+     output dependence, there is sometimes a cost, but it doesn't seem
+     worth handling those few cases.  */
+  if (REG_NOTE_KIND (link) != 0)
+    return cost;
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
+
+  dep_insn_type = get_attr_type (dep_insn);
+
+  /* Bring in the user-defined memory latency.  */
+  if (dep_insn_type == TYPE_ILD
+      || dep_insn_type == TYPE_FLD
+      || dep_insn_type == TYPE_LDSYM)
+    cost += alpha_memory_latency-1;
+
+  /* Everything else handled in DFA bypasses now.  */
+
+  return cost;
+}
+
+/* The number of instructions that can be issued per cycle.  */
+
+static int
+alpha_issue_rate (void)
+{
+  return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
+}
+
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.
+
+   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
+   alternative schedules.  For EV5, we can choose between E0/E1 and
+   FA/FM.  For EV6, an arithmetic insn can be issued to U0/U1/L0/L1.  */
+
+static int
+alpha_multipass_dfa_lookahead (void)
+{
+  return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
+}
+
+/* Machine-specific function data.  */
+
+struct GTY(()) alpha_links;
+
+struct GTY(()) machine_function
+{
+  /* For OSF.  */
+  const char *some_ld_name;
+
+  /* For flag_reorder_blocks_and_partition.  */
+  rtx gp_save_rtx;
+
+  /* For VMS condition handlers.  */
+  bool uses_condition_handler;
+
+  /* Linkage entries.  */
+  splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
+    links;
+};
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+alpha_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Support for frame based VMS condition handlers.  */
+
+/* A VMS condition handler may be established for a function with a call to
+   __builtin_establish_vms_condition_handler, and cancelled with a call to
+   __builtin_revert_vms_condition_handler.
+
+   The VMS Condition Handling Facility knows about the existence of a handler
+   from the procedure descriptor .handler field.  As the VMS native compilers,
+   we store the user specified handler's address at a fixed location in the
+   stack frame and point the procedure descriptor at a common wrapper which
+   fetches the real handler's address and issues an indirect call.
+
+   The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
+
+   We force the procedure kind to PT_STACK, and the fixed frame location is
+   fp+8, just before the register save area. We use the handler_data field in
+   the procedure descriptor to state the fp offset at which the installed
+   handler address can be found.  */
+
+#define VMS_COND_HANDLER_FP_OFFSET 8
+
+/* Expand code to store the currently installed user VMS condition handler
+   into TARGET and install HANDLER as the new condition handler.  */
+
+void
+alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
+{
+  rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
+					    VMS_COND_HANDLER_FP_OFFSET);
+
+  rtx handler_slot
+    = gen_rtx_MEM (DImode, handler_slot_address);
+
+  emit_move_insn (target, handler_slot);
+  emit_move_insn (handler_slot, handler);
+
+  /* Notify the start/prologue/epilogue emitters that the condition handler
+     slot is needed.  In addition to reserving the slot space, this will force
+     the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
+     use above is correct.  */
+  cfun->machine->uses_condition_handler = true;
+}
+
+/* Expand code to store the current VMS condition handler into TARGET and
+   nullify it.  */
+
+void
+alpha_expand_builtin_revert_vms_condition_handler (rtx target)
+{
+  /* We implement this by establishing a null condition handler, with the tiny
+     side effect of setting uses_condition_handler.  This is a little bit
+     pessimistic if no actual builtin_establish call is ever issued, which is
+     not a real problem and expected never to happen anyway.  */
+
+  alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
+}
+
+/* Functions to save and restore alpha_return_addr_rtx.  */
+
+/* Start the ball rolling with RETURN_ADDR_RTX.  */
+
+rtx
+alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, REG_RA);
+}
+
+/* Return or create a memory slot containing the gp value for the current
+   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
+
+rtx
+alpha_gp_save_rtx (void)
+{
+  rtx seq, m = cfun->machine->gp_save_rtx;
+
+  if (m == NULL)
+    {
+      start_sequence ();
+
+      m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
+      m = validize_mem (m);
+      emit_move_insn (m, pic_offset_table_rtx);
+
+      seq = get_insns ();
+      end_sequence ();
+
+      /* We used to simply emit the sequence after entry_of_function.
+	 However this breaks the CFG if the first instruction in the
+	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
+	 label.  Emit the sequence properly on the edge.  We are only
+	 invoked from dw2_build_landing_pads and finish_eh_generation
+	 will call commit_edge_insertions thanks to a kludge.  */
+      insert_insn_on_edge (seq,
+			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+
+      cfun->machine->gp_save_rtx = m;
+    }
+
+  return m;
+}
+
+static void
+alpha_instantiate_decls (void)
+{
+  if (cfun->machine->gp_save_rtx != NULL_RTX)
+    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
+}
+
+static int
+alpha_ra_ever_killed (void)
+{
+  rtx top;
+
+  if (!has_hard_reg_initial_val (Pmode, REG_RA))
+    return (int)df_regs_ever_live_p (REG_RA);
+
+  push_topmost_sequence ();
+  top = get_insns ();
+  pop_topmost_sequence ();
+
+  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
+}
+
+
+/* Return the trap mode suffix applicable to the current
+   instruction, or NULL.  */
+
+static const char *
+get_trap_mode_suffix (void)
+{
+  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case TRAP_SUFFIX_NONE:
+      return NULL;
+
+    case TRAP_SUFFIX_SU:
+      if (alpha_fptm >= ALPHA_FPTM_SU)
+	return "su";
+      return NULL;
+
+    case TRAP_SUFFIX_SUI:
+      if (alpha_fptm >= ALPHA_FPTM_SUI)
+	return "sui";
+      return NULL;
+
+    case TRAP_SUFFIX_V_SV:
+      switch (alpha_fptm)
+	{
+	case ALPHA_FPTM_N:
+	  return NULL;
+	case ALPHA_FPTM_U:
+	  return "v";
+	case ALPHA_FPTM_SU:
+	case ALPHA_FPTM_SUI:
+	  return "sv";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TRAP_SUFFIX_V_SV_SVI:
+      switch (alpha_fptm)
+	{
+	case ALPHA_FPTM_N:
+	  return NULL;
+	case ALPHA_FPTM_U:
+	  return "v";
+	case ALPHA_FPTM_SU:
+	  return "sv";
+	case ALPHA_FPTM_SUI:
+	  return "svi";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case TRAP_SUFFIX_U_SU_SUI:
+      switch (alpha_fptm)
+	{
+	case ALPHA_FPTM_N:
+	  return NULL;
+	case ALPHA_FPTM_U:
+	  return "u";
+	case ALPHA_FPTM_SU:
+	  return "su";
+	case ALPHA_FPTM_SUI:
+	  return "sui";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+      
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Return the rounding mode suffix applicable to the current
+   instruction, or NULL.  */
+
+static const char *
+get_round_mode_suffix (void)
+{
+  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case ROUND_SUFFIX_NONE:
+      return NULL;
+    case ROUND_SUFFIX_NORMAL:
+      switch (alpha_fprm)
+	{
+	case ALPHA_FPRM_NORM:
+	  return NULL;
+	case ALPHA_FPRM_MINF:
+	  return "m";
+	case ALPHA_FPRM_CHOP:
+	  return "c";
+	case ALPHA_FPRM_DYN:
+	  return "d";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case ROUND_SUFFIX_C:
+      return "c";
+      
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in some movdi_er_tlsldm pattern.  */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+/* Print an operand.  Recognize special options, documented below.  */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  int i;
+
+  switch (code)
+    {
+    case '~':
+      /* Print the assembler name of the current function.  */
+      assemble_name (file, alpha_fnname);
+      break;
+
+    case '&':
+      assemble_name (file, get_some_local_dynamic_name ());
+      break;
+
+    case '/':
+      {
+	const char *trap = get_trap_mode_suffix ();
+	const char *round = get_round_mode_suffix ();
+
+	if (trap || round)
+	  fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
+	break;
+      }
+
+    case ',':
+      /* Generates single precision instruction suffix.  */
+      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
+      break;
+
+    case '-':
+      /* Generates double precision instruction suffix.  */
+      fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
+      break;
+
+    case '#':
+      if (alpha_this_literal_sequence_number == 0)
+	alpha_this_literal_sequence_number = alpha_next_sequence_number++;
+      fprintf (file, "%d", alpha_this_literal_sequence_number);
+      break;
+
+    case '*':
+      if (alpha_this_gpdisp_sequence_number == 0)
+	alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
+      fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
+      break;
+
+    case 'H':
+      if (GET_CODE (x) == HIGH)
+	output_addr_const (file, XEXP (x, 0));
+      else
+	output_operand_lossage ("invalid %%H value");
+      break;
+
+    case 'J':
+      {
+	const char *lituse;
+
+        if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
+	  {
+	    x = XVECEXP (x, 0, 0);
+	    lituse = "lituse_tlsgd";
+	  }
+	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
+	  {
+	    x = XVECEXP (x, 0, 0);
+	    lituse = "lituse_tlsldm";
+	  }
+	else if (CONST_INT_P (x))
+	  lituse = "lituse_jsr";
+	else
+	  {
+	    output_operand_lossage ("invalid %%J value");
+	    break;
+	  }
+
+	if (x != const0_rtx)
+	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
+      }
+      break;
+
+    case 'j':
+      {
+	const char *lituse;
+
+#ifdef HAVE_AS_JSRDIRECT_RELOCS
+	lituse = "lituse_jsrdirect";
+#else
+	lituse = "lituse_jsr";
+#endif
+
+	gcc_assert (INTVAL (x) != 0);
+	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
+      }
+      break;
+    case 'r':
+      /* If this operand is the constant zero, write it as "$31".  */
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fprintf (file, "$31");
+      else
+	output_operand_lossage ("invalid %%r value");
+      break;
+
+    case 'R':
+      /* Similar, but for floating-point.  */
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fprintf (file, "$f31");
+      else
+	output_operand_lossage ("invalid %%R value");
+      break;
+
+    case 'N':
+      /* Write the 1's complement of a constant.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%N value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
+      break;
+
+    case 'P':
+      /* Write 1 << C, for a constant C.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%P value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
+      break;
+
+    case 'h':
+      /* Write the high-order 16 bits of a constant, sign-extended.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%h value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
+      break;
+
+    case 'L':
+      /* Write the low-order 16 bits of a constant, sign-extended.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%L value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
+      break;
+
+    case 'm':
+      /* Write mask for ZAP insn.  */
+      if (GET_CODE (x) == CONST_DOUBLE)
+	{
+	  HOST_WIDE_INT mask = 0;
+	  HOST_WIDE_INT value;
+
+	  value = CONST_DOUBLE_LOW (x);
+	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
+	       i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << i);
+
+	  value = CONST_DOUBLE_HIGH (x);
+	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
+	       i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << (i + sizeof (int)));
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
+	}
+
+      else if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
+
+	  for (i = 0; i < 8; i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << i);
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
+	}
+      else
+	output_operand_lossage ("invalid %%m value");
+      break;
+
+    case 'M':
+      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
+      if (!CONST_INT_P (x)
+	  || (INTVAL (x) != 8 && INTVAL (x) != 16
+	      && INTVAL (x) != 32 && INTVAL (x) != 64))
+	output_operand_lossage ("invalid %%M value");
+
+      fprintf (file, "%s",
+	       (INTVAL (x) == 8 ? "b"
+		: INTVAL (x) == 16 ? "w"
+		: INTVAL (x) == 32 ? "l"
+		: "q"));
+      break;
+
+    case 'U':
+      /* Similar, except do it from the mask.  */
+      if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT value = INTVAL (x);
+
+	  if (value == 0xff)
+	    {
+	      fputc ('b', file);
+	      break;
+	    }
+	  if (value == 0xffff)
+	    {
+	      fputc ('w', file);
+	      break;
+	    }
+	  if (value == 0xffffffff)
+	    {
+	      fputc ('l', file);
+	      break;
+	    }
+	  if (value == -1)
+	    {
+	      fputc ('q', file);
+	      break;
+	    }
+	}
+      else if (HOST_BITS_PER_WIDE_INT == 32
+	       && GET_CODE (x) == CONST_DOUBLE
+	       && CONST_DOUBLE_LOW (x) == 0xffffffff
+	       && CONST_DOUBLE_HIGH (x) == 0)
+	{
+	  fputc ('l', file);
+	  break;
+	}
+      output_operand_lossage ("invalid %%U value");
+      break;
+
+    case 's':
+      /* Write the constant value divided by 8.  */
+      if (!CONST_INT_P (x)
+	  || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
+	  || (INTVAL (x) & 7) != 0)
+	output_operand_lossage ("invalid %%s value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
+      break;
+
+    case 'S':
+      /* Same, except compute (64 - c) / 8 */
+
+      if (!CONST_INT_P (x)
+	  && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
+	  && (INTVAL (x) & 7) != 8)
+	output_operand_lossage ("invalid %%s value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
+      break;
+
+    case 'C': case 'D': case 'c': case 'd':
+      /* Write out comparison name.  */
+      {
+	enum rtx_code c = GET_CODE (x);
+
+        if (!COMPARISON_P (x))
+	  output_operand_lossage ("invalid %%C value");
+
+	else if (code == 'D')
+	  c = reverse_condition (c);
+	else if (code == 'c')
+	  c = swap_condition (c);
+	else if (code == 'd')
+	  c = swap_condition (reverse_condition (c));
+
+        if (c == LEU)
+	  fprintf (file, "ule");
+        else if (c == LTU)
+	  fprintf (file, "ult");
+	else if (c == UNORDERED)
+	  fprintf (file, "un");
+        else
+	  fprintf (file, "%s", GET_RTX_NAME (c));
+      }
+      break;
+
+    case 'E':
+      /* Write the divide or modulus operator.  */
+      switch (GET_CODE (x))
+	{
+	case DIV:
+	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	case UDIV:
+	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	case MOD:
+	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	case UMOD:
+	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%E value");
+	  break;
+	}
+      break;
+
+    case 'A':
+      /* Write "_u" for unaligned access.  */
+      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
+	fprintf (file, "_u");
+      break;
+
+    case 0:
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (MEM_P (x))
+	output_address (XEXP (x, 0));
+      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
+	{
+	  switch (XINT (XEXP (x, 0), 1))
+	    {
+	    case UNSPEC_DTPREL:
+	    case UNSPEC_TPREL:
+	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
+	      break;
+	    default:
+	      output_operand_lossage ("unknown relocation unspec");
+	      break;
+	    }
+	}
+      else
+	output_addr_const (file, x);
+      break;
+
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+}
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  int basereg = 31;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (addr) == AND)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS
+      && CONST_INT_P (XEXP (addr, 1)))
+    {
+      offset = INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  if (GET_CODE (addr) == LO_SUM)
+    {
+      const char *reloc16, *reloclo;
+      rtx op1 = XEXP (addr, 1);
+
+      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
+	{
+	  op1 = XEXP (op1, 0);
+	  switch (XINT (op1, 1))
+	    {
+	    case UNSPEC_DTPREL:
+	      reloc16 = NULL;
+	      reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
+	      break;
+	    case UNSPEC_TPREL:
+	      reloc16 = NULL;
+	      reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
+	      break;
+	    default:
+	      output_operand_lossage ("unknown relocation unspec");
+	      return;
+	    }
+
+	  output_addr_const (file, XVECEXP (op1, 0, 0));
+	}
+      else
+	{
+	  reloc16 = "gprel";
+	  reloclo = "gprellow";
+	  output_addr_const (file, op1);
+	}
+
+      if (offset)
+	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
+
+      addr = XEXP (addr, 0);
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	  basereg = REGNO (addr);
+	  break;
+
+	case SUBREG:
+	  basereg = subreg_regno (addr);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      fprintf (file, "($%d)\t\t!%s", basereg,
+	       (basereg == 29 ? reloc16 : reloclo));
+      return;
+    }
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      basereg = REGNO (addr);
+      break;
+
+    case SUBREG:
+      basereg = subreg_regno (addr);
+      break;
+
+    case CONST_INT:
+      offset = INTVAL (addr);
+      break;
+
+#if TARGET_ABI_OPEN_VMS
+    case SYMBOL_REF:
+      fprintf (file, "%s", XSTR (addr, 0));
+      return;
+
+    case CONST:
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
+      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
+	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
+	       INTVAL (XEXP (XEXP (addr, 0), 1)));
+      return;
+    
+#endif
+    default:
+      gcc_unreachable ();
+    }
+
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline at
+   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
+   for the static chain value for the function.  */
+
+static void
+alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr, mem, word1, word2;
+
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+#ifdef POINTERS_EXTEND_UNSIGNED
+  fnaddr = convert_memory_address (Pmode, fnaddr);
+  chain_value = convert_memory_address (Pmode, chain_value);
+#endif
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      const char *fnname;
+      char *trname;
+
+      /* Construct the name of the trampoline entry point.  */
+      fnname = XSTR (fnaddr, 0);
+      trname = (char *) alloca (strlen (fnname) + 5);
+      strcpy (trname, fnname);
+      strcat (trname, "..tr");
+      fnname = ggc_alloc_string (trname, strlen (trname) + 1);
+      word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
+
+      /* Trampoline (or "bounded") procedure descriptor is constructed from
+	 the function's procedure descriptor with certain fields zeroed IAW
+	 the VMS calling standard. This is stored in the first quadword.  */
+      word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
+      word1 = expand_and (DImode, word1,
+			  GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
+			  NULL);
+    }
+  else
+    {
+      /* These 4 instructions are:
+	    ldq $1,24($27)
+	    ldq $27,16($27)
+	    jmp $31,($27),0
+	    nop
+	 We don't bother setting the HINT field of the jump; the nop
+	 is merely there for padding.  */
+      word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
+      word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
+    }
+
+  /* Store the first two words, as computed above.  */
+  mem = adjust_address (m_tramp, DImode, 0);
+  emit_move_insn (mem, word1);
+  mem = adjust_address (m_tramp, DImode, 8);
+  emit_move_insn (mem, word2);
+
+  /* Store function address and static chain value.  */
+  mem = adjust_address (m_tramp, Pmode, 16);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, Pmode, 24);
+  emit_move_insn (mem, chain_value);
+
+  if (TARGET_ABI_OSF)
+    {
+      emit_insn (gen_imb ());
+#ifdef HAVE_ENABLE_EXECUTE_STACK
+      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
+			 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+    }
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On Alpha the first 6 words of args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int basereg;
+  int num_args;
+
+  /* Don't get confused and pass small structures in FP registers.  */
+  if (type && AGGREGATE_TYPE_P (type))
+    basereg = 16;
+  else
+    {
+#ifdef ENABLE_CHECKING
+      /* With alpha_split_complex_arg, we shouldn't see any raw complex
+	 values here.  */
+      gcc_assert (!COMPLEX_MODE_P (mode));
+#endif
+
+      /* Set up defaults for FP operands passed in FP registers, and
+	 integral operands passed in integer registers.  */
+      if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
+	basereg = 32 + 16;
+      else
+	basereg = 16;
+    }
+
+  /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
+     the two platforms, so we can't avoid conditional compilation.  */
+#if TARGET_ABI_OPEN_VMS
+    {
+      if (mode == VOIDmode)
+	return alpha_arg_info_reg_val (*cum);
+
+      num_args = cum->num_args;
+      if (num_args >= 6
+	  || targetm.calls.must_pass_in_stack (mode, type))
+	return NULL_RTX;
+    }
+#elif TARGET_ABI_OSF
+    {
+      if (*cum >= 6)
+	return NULL_RTX;
+      num_args = *cum;
+
+      /* VOID is passed as a special flag for "last argument".  */
+      if (type == void_type_node)
+	basereg = 16;
+      else if (targetm.calls.must_pass_in_stack (mode, type))
+	return NULL_RTX;
+    }
+#else
+#error Unhandled ABI
+#endif
+
+  return gen_rtx_REG (mode, num_args + basereg);
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  bool onstack = targetm.calls.must_pass_in_stack (mode, type);
+  int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
+
+#if TARGET_ABI_OSF
+  *cum += increment;
+#else
+  if (!onstack && cum->num_args < 6)
+    cum->atypes[cum->num_args] = alpha_arg_type (mode);
+  cum->num_args += increment;
+#endif
+}
+
+static int
+alpha_arg_partial_bytes (cumulative_args_t cum_v,
+			 enum machine_mode mode ATTRIBUTE_UNUSED,
+			 tree type ATTRIBUTE_UNUSED,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  int words = 0;
+  CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
+
+#if TARGET_ABI_OPEN_VMS
+  if (cum->num_args < 6
+      && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
+    words = 6 - cum->num_args;
+#elif TARGET_ABI_OSF
+  if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
+    words = 6 - *cum;
+#else
+#error Unhandled ABI
+#endif
+
+  return words * UNITS_PER_WORD;
+}
+
+
+/* Return true if TYPE must be returned in memory, instead of in registers.  */
+
+static bool
+alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = VOIDmode;
+  int size;
+
+  if (type)
+    {
+      mode = TYPE_MODE (type);
+
+      /* All aggregates are returned in memory, except on OpenVMS where
+	 records that fit 64 bits should be returned by immediate value
+	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
+      if (TARGET_ABI_OPEN_VMS
+	  && TREE_CODE (type) != ARRAY_TYPE
+	  && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
+	return false;
+
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+    }
+
+  size = GET_MODE_SIZE (mode);
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_VECTOR_FLOAT:
+      /* Pass all float vectors in memory, like an aggregate.  */
+      return true;
+
+    case MODE_COMPLEX_FLOAT:
+      /* We judge complex floats on the size of their element,
+	 not the size of the whole type.  */
+      size = GET_MODE_UNIT_SIZE (mode);
+      break;
+
+    case MODE_INT:
+    case MODE_FLOAT:
+    case MODE_COMPLEX_INT:
+    case MODE_VECTOR_INT:
+      break;
+
+    default:
+      /* ??? We get called on all sorts of random stuff from
+	 aggregate_value_p.  We must return something, but it's not
+	 clear what's safe to return.  Pretend it's a struct I
+	 guess.  */
+      return true;
+    }
+
+  /* Otherwise types must fit in one register.  */
+  return size > UNITS_PER_WORD;
+}
+
+/* Return true if TYPE should be passed by invisible reference.  */
+
+static bool
+alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+			 enum machine_mode mode,
+			 const_tree type ATTRIBUTE_UNUSED,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  return mode == TFmode || mode == TCmode;
+}
+
+/* Define how to find the value returned by a function.  VALTYPE is the
+   data type of the value (as a tree).  If the precise function being
+   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
+   MODE is set instead of VALTYPE for libcalls.
+
+   On Alpha the value is found in $0 for integer functions and
+   $f0 for floating-point functions.  */
+
+rtx
+function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+		enum machine_mode mode)
+{
+  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
+  enum mode_class mclass;
+
+  gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
+
+  if (valtype)
+    mode = TYPE_MODE (valtype);
+
+  mclass = GET_MODE_CLASS (mode);
+  switch (mclass)
+    {
+    case MODE_INT:
+      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
+	 where we have them returning both SImode and DImode.  */
+      if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
+        PROMOTE_MODE (mode, dummy, valtype);
+      /* FALLTHRU */
+
+    case MODE_COMPLEX_INT:
+    case MODE_VECTOR_INT:
+      regnum = 0;
+      break;
+
+    case MODE_FLOAT:
+      regnum = 32;
+      break;
+
+    case MODE_COMPLEX_FLOAT:
+      {
+	enum machine_mode cmode = GET_MODE_INNER (mode);
+
+	return gen_rtx_PARALLEL
+	  (VOIDmode,
+	   gen_rtvec (2,
+		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
+				         const0_rtx),
+		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
+				         GEN_INT (GET_MODE_SIZE (cmode)))));
+      }
+
+    case MODE_RANDOM:
+      /* We should only reach here for BLKmode on VMS.  */
+      gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
+      regnum = 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_REG (mode, regnum);
+}
+
+/* TCmode complex values are passed by invisible reference.  We
+   should not split these values.  */
+
+static bool
+alpha_split_complex_arg (const_tree type)
+{
+  return TYPE_MODE (type) != TCmode;
+}
+
+static tree
+alpha_build_builtin_va_list (void)
+{
+  tree base, ofs, space, record, type_decl;
+
+  if (TARGET_ABI_OPEN_VMS)
+    return ptr_type_node;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+
+  /* C++? SET_IS_AGGR_TYPE (record, 1); */
+
+  /* Dummy field to prevent alignment warnings.  */
+  space = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, NULL_TREE, integer_type_node);
+  DECL_FIELD_CONTEXT (space) = record;
+  DECL_ARTIFICIAL (space) = 1;
+  DECL_IGNORED_P (space) = 1;
+
+  ofs = build_decl (BUILTINS_LOCATION,
+		    FIELD_DECL, get_identifier ("__offset"),
+		    integer_type_node);
+  DECL_FIELD_CONTEXT (ofs) = record;
+  DECL_CHAIN (ofs) = space;
+  /* ??? This is a hack, __offset is marked volatile to prevent
+     DCE that confuses stdarg optimization and results in
+     gcc.c-torture/execute/stdarg-1.c failure.  See PR 41089.  */
+  TREE_THIS_VOLATILE (ofs) = 1;
+
+  base = build_decl (BUILTINS_LOCATION,
+		     FIELD_DECL, get_identifier ("__base"),
+		     ptr_type_node);
+  DECL_FIELD_CONTEXT (base) = record;
+  DECL_CHAIN (base) = ofs;
+
+  TYPE_FIELDS (record) = base;
+  layout_type (record);
+
+  va_list_gpr_counter_field = ofs;
+  return record;
+}
+
+#if TARGET_ABI_OSF
+/* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
+   and constant additions.  */
+
+static gimple
+va_list_skip_additions (tree lhs)
+{
+  gimple stmt;
+
+  for (;;)
+    {
+      enum tree_code code;
+
+      stmt = SSA_NAME_DEF_STMT (lhs);
+
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	return stmt;
+
+      if (!is_gimple_assign (stmt)
+	  || gimple_assign_lhs (stmt) != lhs)
+	return NULL;
+
+      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
+	return stmt;
+      code = gimple_assign_rhs_code (stmt);
+      if (!CONVERT_EXPR_CODE_P (code)
+	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
+	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
+	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
+	return stmt;
+
+      lhs = gimple_assign_rhs1 (stmt);
+    }
+}
+
+/* Check if LHS = RHS statement is
+   LHS = *(ap.__base + ap.__offset + cst)
+   or
+   LHS = *(ap.__base
+	   + ((ap.__offset + cst <= 47)
+	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
+   If the former, indicate that GPR registers are needed,
+   if the latter, indicate that FPR registers are needed.
+
+   Also look for LHS = (*ptr).field, where ptr is one of the forms
+   listed above.
+
+   On alpha, cfun->va_list_gpr_size is used as size of the needed
+   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
+   registers are needed and bit 1 set if FPR registers are needed.
+   Return true if va_list references should not be scanned for the
+   current statement.  */
+
+static bool
+alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
+{
+  tree base, offset, rhs;
+  int offset_arg = 1;
+  gimple base_stmt;
+
+  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+      != GIMPLE_SINGLE_RHS)
+    return false;
+
+  rhs = gimple_assign_rhs1 (stmt);
+  while (handled_component_p (rhs))
+    rhs = TREE_OPERAND (rhs, 0);
+  if (TREE_CODE (rhs) != MEM_REF
+      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
+    return false;
+
+  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
+  if (stmt == NULL
+      || !is_gimple_assign (stmt)
+      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
+    return false;
+
+  base = gimple_assign_rhs1 (stmt);
+  if (TREE_CODE (base) == SSA_NAME)
+    {
+      base_stmt = va_list_skip_additions (base);
+      if (base_stmt
+	  && is_gimple_assign (base_stmt)
+	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	base = gimple_assign_rhs1 (base_stmt);
+    }
+
+  if (TREE_CODE (base) != COMPONENT_REF
+      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
+    {
+      base = gimple_assign_rhs2 (stmt);
+      if (TREE_CODE (base) == SSA_NAME)
+	{
+	  base_stmt = va_list_skip_additions (base);
+	  if (base_stmt
+	      && is_gimple_assign (base_stmt)
+	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	    base = gimple_assign_rhs1 (base_stmt);
+	}
+
+      if (TREE_CODE (base) != COMPONENT_REF
+	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
+	return false;
+
+      offset_arg = 0;
+    }
+
+  base = get_base_address (base);
+  if (TREE_CODE (base) != VAR_DECL
+      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
+    return false;
+
+  offset = gimple_op (stmt, 1 + offset_arg);
+  if (TREE_CODE (offset) == SSA_NAME)
+    {
+      gimple offset_stmt = va_list_skip_additions (offset);
+
+      if (offset_stmt
+	  && gimple_code (offset_stmt) == GIMPLE_PHI)
+	{
+	  HOST_WIDE_INT sub;
+	  gimple arg1_stmt, arg2_stmt;
+	  tree arg1, arg2;
+	  enum tree_code code1, code2;
+
+	  if (gimple_phi_num_args (offset_stmt) != 2)
+	    goto escapes;
+
+	  arg1_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
+	  arg2_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
+	  if (arg1_stmt == NULL
+	      || !is_gimple_assign (arg1_stmt)
+	      || arg2_stmt == NULL
+	      || !is_gimple_assign (arg2_stmt))
+	    goto escapes;
+
+	  code1 = gimple_assign_rhs_code (arg1_stmt);
+	  code2 = gimple_assign_rhs_code (arg2_stmt);
+	  if (code1 == COMPONENT_REF
+	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
+	    /* Do nothing.  */;
+	  else if (code2 == COMPONENT_REF
+		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
+	    {
+	      gimple tem = arg1_stmt;
+	      code2 = code1;
+	      arg1_stmt = arg2_stmt;
+	      arg2_stmt = tem;
+	    }
+	  else
+	    goto escapes;
+
+	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
+	    goto escapes;
+
+	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
+	  if (code2 == MINUS_EXPR)
+	    sub = -sub;
+	  if (sub < -48 || sub > -32)
+	    goto escapes;
+
+	  arg1 = gimple_assign_rhs1 (arg1_stmt);
+	  arg2 = gimple_assign_rhs1 (arg2_stmt);
+	  if (TREE_CODE (arg2) == SSA_NAME)
+	    {
+	      arg2_stmt = va_list_skip_additions (arg2);
+	      if (arg2_stmt == NULL
+		  || !is_gimple_assign (arg2_stmt)
+		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
+		goto escapes;
+	      arg2 = gimple_assign_rhs1 (arg2_stmt);
+	    }
+	  if (arg1 != arg2)
+	    goto escapes;
+
+	  if (TREE_CODE (arg1) != COMPONENT_REF
+	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
+	      || get_base_address (arg1) != base)
+	    goto escapes;
+
+	  /* Need floating point regs.  */
+	  cfun->va_list_fpr_size |= 2;
+	  return false;
+	}
+      if (offset_stmt
+	  && is_gimple_assign (offset_stmt)
+	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
+	offset = gimple_assign_rhs1 (offset_stmt);
+    }
+  if (TREE_CODE (offset) != COMPONENT_REF
+      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
+      || get_base_address (offset) != base)
+    goto escapes;
+  else
+    /* Need general regs.  */
+    cfun->va_list_fpr_size |= 1;
+  return false;
+
+escapes:
+  si->va_list_escapes = true;
+  return false;
+}
+#endif
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+static void
+alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode,
+			      tree type, int *pretend_size, int no_rtl)
+{
+  CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
+
+  /* Skip the current argument.  */
+  targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
+				      true);
+
+#if TARGET_ABI_OPEN_VMS
+  /* For VMS, we allocate space for all 6 arg registers plus a count.
+
+     However, if NO registers need to be saved, don't allocate any space.
+     This is not only because we won't need the space, but because AP
+     includes the current_pretend_args_size and we don't want to mess up
+     any ap-relative addresses already made.  */
+  if (cum.num_args < 6)
+    {
+      if (!no_rtl)
+	{
+	  emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
+	  emit_insn (gen_arg_home ());
+	}
+      *pretend_size = 7 * UNITS_PER_WORD;
+    }
+#else
+  /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
+     only push those that are remaining.  However, if NO registers need to
+     be saved, don't allocate any space.  This is not only because we won't
+     need the space, but because AP includes the current_pretend_args_size
+     and we don't want to mess up any ap-relative addresses already made.
+
+     If we are not to use the floating-point registers, save the integer
+     registers where we would put the floating-point registers.  This is
+     not the most efficient way to implement varargs with just one register
+     class, but it isn't worth doing anything more efficient in this rare
+     case.  */
+  if (cum >= 6)
+    return;
+
+  if (!no_rtl)
+    {
+      int count;
+      alias_set_type set = get_varargs_alias_set ();
+      rtx tmp;
+
+      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
+      if (count > 6 - cum)
+	count = 6 - cum;
+
+      /* Detect whether integer registers or floating-point registers
+	 are needed by the detected va_arg statements.  See above for
+	 how these values are computed.  Note that the "escape" value
+	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of 
+	 these bits set.  */
+      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
+
+      if (cfun->va_list_fpr_size & 1)
+	{
+	  tmp = gen_rtx_MEM (BLKmode,
+			     plus_constant (Pmode, virtual_incoming_args_rtx,
+					    (cum + 6) * UNITS_PER_WORD));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (16 + cum, tmp, count);
+	}
+
+      if (cfun->va_list_fpr_size & 2)
+	{
+	  tmp = gen_rtx_MEM (BLKmode,
+			     plus_constant (Pmode, virtual_incoming_args_rtx,
+					    cum * UNITS_PER_WORD));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
+	}
+     }
+  *pretend_size = 12 * UNITS_PER_WORD;
+#endif
+}
+
+static void
+alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT offset;
+  tree t, offset_field, base_field;
+
+  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
+    return;
+
+  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
+     up by 48, storing fp arg registers in the first 48 bytes, and the
+     integer arg registers in the next 48 bytes.  This is only done,
+     however, if any integer registers need to be stored.
+
+     If no integer registers need be stored, then we must subtract 48
+     in order to account for the integer arg registers which are counted
+     in argsize above, but which are not actually stored on the stack.
+     Must further be careful here about structures straddling the last
+     integer argument register; that futzes with pretend_args_size,
+     which changes the meaning of AP.  */
+
+  if (NUM_ARGS < 6)
+    offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
+  else
+    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+      t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+  else
+    {
+      base_field = TYPE_FIELDS (TREE_TYPE (valist));
+      offset_field = DECL_CHAIN (base_field);
+
+      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
+			   valist, base_field, NULL_TREE);
+      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
+			     valist, offset_field, NULL_TREE);
+
+      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+      t = fold_build_pointer_plus_hwi (t, offset);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+}
+
+static tree
+alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
+			 gimple_seq *pre_p)
+{
+  tree type_size, ptr_type, addend, t, addr;
+  gimple_seq internal_post;
+
+  /* If the type could not be passed in registers, skip the block
+     reserved for the registers.  */
+  if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
+    {
+      t = build_int_cst (TREE_TYPE (offset), 6*8);
+      gimplify_assign (offset,
+		       build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
+		       pre_p);
+    }
+
+  addend = offset;
+  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
+
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      tree real_part, imag_part, real_temp;
+
+      real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
+					   offset, pre_p);
+
+      /* Copy the value into a new temporary, lest the formal temporary
+	 be reused out from under us.  */
+      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+      imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
+					   offset, pre_p);
+
+      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
+    }
+  else if (TREE_CODE (type) == REAL_TYPE)
+    {
+      tree fpaddend, cond, fourtyeight;
+
+      fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
+      fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
+			      addend, fourtyeight);
+      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
+      addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
+			    fpaddend, addend);
+    }
+
+  /* Build the final address and force that value into a temporary.  */
+  addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
+  internal_post = NULL;
+  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
+  gimple_seq_add_seq (pre_p, internal_post);
+
+  /* Update the offset field.  */
+  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
+  if (type_size == NULL || TREE_OVERFLOW (type_size))
+    t = size_zero_node;
+  else
+    {
+      t = size_binop (PLUS_EXPR, type_size, size_int (7));
+      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
+      t = size_binop (MULT_EXPR, t, size_int (8));
+    }
+  t = fold_convert (TREE_TYPE (offset), t);
+  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
+      		   pre_p);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+static tree
+alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		       gimple_seq *post_p)
+{
+  tree offset_field, base_field, offset, base, t, r;
+  bool indirect;
+
+  if (TARGET_ABI_OPEN_VMS)
+    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+  base_field = TYPE_FIELDS (va_list_type_node);
+  offset_field = DECL_CHAIN (base_field);
+  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
+		       valist, base_field, NULL_TREE);
+  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
+			 valist, offset_field, NULL_TREE);
+
+  /* Pull the fields of the structure out into temporaries.  Since we never
+     modify the base field, we can use a formal temporary.  Sign-extend the
+     offset field so that it's the proper width for pointer arithmetic.  */
+  base = get_formal_tmp_var (base_field, pre_p);
+
+  t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
+  offset = get_initialized_tmp_var (t, pre_p, NULL);
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type_for_mode (type, ptr_mode, true);
+
+  /* Find the value.  Note that this will be a stable indirection, or
+     a composite of stable indirections in the case of complex.  */
+  r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
+
+  /* Stuff the offset temporary back into its field.  */
+  gimplify_assign (unshare_expr (offset_field),
+		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
+
+  if (indirect)
+    r = build_va_arg_indirect_ref (r);
+
+  return r;
+}
+
+/* Builtins.  */
+
+enum alpha_builtin
+{
+  ALPHA_BUILTIN_CMPBGE,
+  ALPHA_BUILTIN_EXTBL,
+  ALPHA_BUILTIN_EXTWL,
+  ALPHA_BUILTIN_EXTLL,
+  ALPHA_BUILTIN_EXTQL,
+  ALPHA_BUILTIN_EXTWH,
+  ALPHA_BUILTIN_EXTLH,
+  ALPHA_BUILTIN_EXTQH,
+  ALPHA_BUILTIN_INSBL,
+  ALPHA_BUILTIN_INSWL,
+  ALPHA_BUILTIN_INSLL,
+  ALPHA_BUILTIN_INSQL,
+  ALPHA_BUILTIN_INSWH,
+  ALPHA_BUILTIN_INSLH,
+  ALPHA_BUILTIN_INSQH,
+  ALPHA_BUILTIN_MSKBL,
+  ALPHA_BUILTIN_MSKWL,
+  ALPHA_BUILTIN_MSKLL,
+  ALPHA_BUILTIN_MSKQL,
+  ALPHA_BUILTIN_MSKWH,
+  ALPHA_BUILTIN_MSKLH,
+  ALPHA_BUILTIN_MSKQH,
+  ALPHA_BUILTIN_UMULH,
+  ALPHA_BUILTIN_ZAP,
+  ALPHA_BUILTIN_ZAPNOT,
+  ALPHA_BUILTIN_AMASK,
+  ALPHA_BUILTIN_IMPLVER,
+  ALPHA_BUILTIN_RPCC,
+  ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
+  ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
+
+  /* TARGET_MAX */
+  ALPHA_BUILTIN_MINUB8,
+  ALPHA_BUILTIN_MINSB8,
+  ALPHA_BUILTIN_MINUW4,
+  ALPHA_BUILTIN_MINSW4,
+  ALPHA_BUILTIN_MAXUB8,
+  ALPHA_BUILTIN_MAXSB8,
+  ALPHA_BUILTIN_MAXUW4,
+  ALPHA_BUILTIN_MAXSW4,
+  ALPHA_BUILTIN_PERR,
+  ALPHA_BUILTIN_PKLB,
+  ALPHA_BUILTIN_PKWB,
+  ALPHA_BUILTIN_UNPKBL,
+  ALPHA_BUILTIN_UNPKBW,
+
+  /* TARGET_CIX */
+  ALPHA_BUILTIN_CTTZ,
+  ALPHA_BUILTIN_CTLZ,
+  ALPHA_BUILTIN_CTPOP,
+
+  ALPHA_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
+  CODE_FOR_builtin_cmpbge,
+  CODE_FOR_extbl,
+  CODE_FOR_extwl,
+  CODE_FOR_extll,
+  CODE_FOR_extql,
+  CODE_FOR_extwh,
+  CODE_FOR_extlh,
+  CODE_FOR_extqh,
+  CODE_FOR_builtin_insbl,
+  CODE_FOR_builtin_inswl,
+  CODE_FOR_builtin_insll,
+  CODE_FOR_insql,
+  CODE_FOR_inswh,
+  CODE_FOR_inslh,
+  CODE_FOR_insqh,
+  CODE_FOR_mskbl,
+  CODE_FOR_mskwl,
+  CODE_FOR_mskll,
+  CODE_FOR_mskql,
+  CODE_FOR_mskwh,
+  CODE_FOR_msklh,
+  CODE_FOR_mskqh,
+  CODE_FOR_umuldi3_highpart,
+  CODE_FOR_builtin_zap,
+  CODE_FOR_builtin_zapnot,
+  CODE_FOR_builtin_amask,
+  CODE_FOR_builtin_implver,
+  CODE_FOR_builtin_rpcc,
+  CODE_FOR_builtin_establish_vms_condition_handler,
+  CODE_FOR_builtin_revert_vms_condition_handler,
+
+  /* TARGET_MAX */
+  CODE_FOR_builtin_minub8,
+  CODE_FOR_builtin_minsb8,
+  CODE_FOR_builtin_minuw4,
+  CODE_FOR_builtin_minsw4,
+  CODE_FOR_builtin_maxub8,
+  CODE_FOR_builtin_maxsb8,
+  CODE_FOR_builtin_maxuw4,
+  CODE_FOR_builtin_maxsw4,
+  CODE_FOR_builtin_perr,
+  CODE_FOR_builtin_pklb,
+  CODE_FOR_builtin_pkwb,
+  CODE_FOR_builtin_unpkbl,
+  CODE_FOR_builtin_unpkbw,
+
+  /* TARGET_CIX */
+  CODE_FOR_ctzdi2,
+  CODE_FOR_clzdi2,
+  CODE_FOR_popcountdi2
+};
+
+struct alpha_builtin_def
+{
+  const char *name;
+  enum alpha_builtin code;
+  unsigned int target_mask;
+  bool is_const;
+};
+
+static struct alpha_builtin_def const zero_arg_builtins[] = {
+  { "__builtin_alpha_implver",	ALPHA_BUILTIN_IMPLVER,	0, true },
+  { "__builtin_alpha_rpcc",	ALPHA_BUILTIN_RPCC,	0, false }
+};
+
+static struct alpha_builtin_def const one_arg_builtins[] = {
+  { "__builtin_alpha_amask",	ALPHA_BUILTIN_AMASK,	0, true },
+  { "__builtin_alpha_pklb",	ALPHA_BUILTIN_PKLB,	MASK_MAX, true },
+  { "__builtin_alpha_pkwb",	ALPHA_BUILTIN_PKWB,	MASK_MAX, true },
+  { "__builtin_alpha_unpkbl",	ALPHA_BUILTIN_UNPKBL,	MASK_MAX, true },
+  { "__builtin_alpha_unpkbw",	ALPHA_BUILTIN_UNPKBW,	MASK_MAX, true },
+  { "__builtin_alpha_cttz",	ALPHA_BUILTIN_CTTZ,	MASK_CIX, true },
+  { "__builtin_alpha_ctlz",	ALPHA_BUILTIN_CTLZ,	MASK_CIX, true },
+  { "__builtin_alpha_ctpop",	ALPHA_BUILTIN_CTPOP,	MASK_CIX, true }
+};
+
+static struct alpha_builtin_def const two_arg_builtins[] = {
+  { "__builtin_alpha_cmpbge",	ALPHA_BUILTIN_CMPBGE,	0, true },
+  { "__builtin_alpha_extbl",	ALPHA_BUILTIN_EXTBL,	0, true },
+  { "__builtin_alpha_extwl",	ALPHA_BUILTIN_EXTWL,	0, true },
+  { "__builtin_alpha_extll",	ALPHA_BUILTIN_EXTLL,	0, true },
+  { "__builtin_alpha_extql",	ALPHA_BUILTIN_EXTQL,	0, true },
+  { "__builtin_alpha_extwh",	ALPHA_BUILTIN_EXTWH,	0, true },
+  { "__builtin_alpha_extlh",	ALPHA_BUILTIN_EXTLH,	0, true },
+  { "__builtin_alpha_extqh",	ALPHA_BUILTIN_EXTQH,	0, true },
+  { "__builtin_alpha_insbl",	ALPHA_BUILTIN_INSBL,	0, true },
+  { "__builtin_alpha_inswl",	ALPHA_BUILTIN_INSWL,	0, true },
+  { "__builtin_alpha_insll",	ALPHA_BUILTIN_INSLL,	0, true },
+  { "__builtin_alpha_insql",	ALPHA_BUILTIN_INSQL,	0, true },
+  { "__builtin_alpha_inswh",	ALPHA_BUILTIN_INSWH,	0, true },
+  { "__builtin_alpha_inslh",	ALPHA_BUILTIN_INSLH,	0, true },
+  { "__builtin_alpha_insqh",	ALPHA_BUILTIN_INSQH,	0, true },
+  { "__builtin_alpha_mskbl",	ALPHA_BUILTIN_MSKBL,	0, true },
+  { "__builtin_alpha_mskwl",	ALPHA_BUILTIN_MSKWL,	0, true },
+  { "__builtin_alpha_mskll",	ALPHA_BUILTIN_MSKLL,	0, true },
+  { "__builtin_alpha_mskql",	ALPHA_BUILTIN_MSKQL,	0, true },
+  { "__builtin_alpha_mskwh",	ALPHA_BUILTIN_MSKWH,	0, true },
+  { "__builtin_alpha_msklh",	ALPHA_BUILTIN_MSKLH,	0, true },
+  { "__builtin_alpha_mskqh",	ALPHA_BUILTIN_MSKQH,	0, true },
+  { "__builtin_alpha_umulh",	ALPHA_BUILTIN_UMULH,	0, true },
+  { "__builtin_alpha_zap",	ALPHA_BUILTIN_ZAP,	0, true },
+  { "__builtin_alpha_zapnot",	ALPHA_BUILTIN_ZAPNOT,	0, true },
+  { "__builtin_alpha_minub8",	ALPHA_BUILTIN_MINUB8,	MASK_MAX, true },
+  { "__builtin_alpha_minsb8",	ALPHA_BUILTIN_MINSB8,	MASK_MAX, true },
+  { "__builtin_alpha_minuw4",	ALPHA_BUILTIN_MINUW4,	MASK_MAX, true },
+  { "__builtin_alpha_minsw4",	ALPHA_BUILTIN_MINSW4,	MASK_MAX, true },
+  { "__builtin_alpha_maxub8",	ALPHA_BUILTIN_MAXUB8,	MASK_MAX, true },
+  { "__builtin_alpha_maxsb8",	ALPHA_BUILTIN_MAXSB8,	MASK_MAX, true },
+  { "__builtin_alpha_maxuw4",	ALPHA_BUILTIN_MAXUW4,	MASK_MAX, true },
+  { "__builtin_alpha_maxsw4",	ALPHA_BUILTIN_MAXSW4,	MASK_MAX, true },
+  { "__builtin_alpha_perr",	ALPHA_BUILTIN_PERR,	MASK_MAX, true }
+};
+
+static GTY(()) tree alpha_dimode_u;
+static GTY(()) tree alpha_v8qi_u;
+static GTY(()) tree alpha_v8qi_s;
+static GTY(()) tree alpha_v4hi_u;
+static GTY(()) tree alpha_v4hi_s;
+
+static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
+
+/* Return the alpha builtin for CODE.  */
+
+static tree
+alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ALPHA_BUILTIN_max)
+    return error_mark_node;
+  return alpha_builtins[code];
+}
+
+/* Helper function of alpha_init_builtins.  Add the built-in specified
+   by NAME, TYPE, CODE, and ECF.  */
+
+static void
+alpha_builtin_function (const char *name, tree ftype,
+			enum alpha_builtin code, unsigned ecf)
+{
+  tree decl = add_builtin_function (name, ftype, (int) code,
+				    BUILT_IN_MD, NULL, NULL_TREE);
+
+  if (ecf & ECF_CONST)
+    TREE_READONLY (decl) = 1;
+  if (ecf & ECF_NOTHROW)
+    TREE_NOTHROW (decl) = 1;
+
+  alpha_builtins [(int) code] = decl;
+}
+
+/* Helper function of alpha_init_builtins.  Add the COUNT built-in
+   functions pointed to by P, with function type FTYPE.  */
+
+static void
+alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
+		    tree ftype)
+{
+  size_t i;
+
+  for (i = 0; i < count; ++i, ++p)
+    if ((target_flags & p->target_mask) == p->target_mask)
+      alpha_builtin_function (p->name, ftype, p->code,
+			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
+}
+
+static void
+alpha_init_builtins (void)
+{
+  tree ftype;
+
+  alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
+  alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
+  alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
+  alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
+  alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
+
+  ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
+  alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
+
+  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
+  alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
+
+  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
+				    alpha_dimode_u, NULL_TREE);
+  alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      ftype = build_function_type_list (ptr_type_node, ptr_type_node,
+					NULL_TREE);
+      alpha_builtin_function ("__builtin_establish_vms_condition_handler",
+			      ftype,
+			      ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
+			      0);
+
+      ftype = build_function_type_list (ptr_type_node, void_type_node,
+					NULL_TREE);
+      alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
+			      ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
+
+      vms_patch_builtins ();
+    }
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+alpha_expand_builtin (tree exp, rtx target,
+		      rtx subtarget ATTRIBUTE_UNUSED,
+		      enum machine_mode mode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg;
+  call_expr_arg_iterator iter;
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+
+  if (fcode >= ALPHA_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  arity = 0;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity > MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+      arity++;
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+        pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, op[0], op[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
+/* Several bits below assume HWI >= 64 bits.  This should be enforced
+   by config.gcc.  */
+#if HOST_BITS_PER_WIDE_INT < 64
+# error "HOST_WIDE_INT too small"
+#endif
+
+/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
+   with an 8-bit output vector.  OPINT contains the integer operands; bit N
+   of OP_CONST is set if OPINT[N] is valid.  */
+
+static tree
+alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  if (op_const == 3)
+    {
+      int i, val;
+      for (i = 0, val = 0; i < 8; ++i)
+	{
+	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
+	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
+	  if (c0 >= c1)
+	    val |= 1 << i;
+	}
+      return build_int_cst (alpha_dimode_u, val);
+    }
+  else if (op_const == 2 && opint[1] == 0)
+    return build_int_cst (alpha_dimode_u, 0xff);
+  return NULL;
+}
+
+/* Fold the builtin for the ZAPNOT instruction.  This is essentially a 
+   specialized form of an AND operation.  Other byte manipulation instructions
+   are defined in terms of this instruction, so this is also used as a
+   subroutine for other builtins.
+
+   OP contains the tree operands; OPINT contains the extracted integer values.
+   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
+   OPINT may be considered.  */
+
+static tree
+alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
+			   long op_const)
+{
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT mask = 0;
+      int i;
+
+      for (i = 0; i < 8; ++i)
+	if ((opint[1] >> i) & 1)
+	  mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
+
+      if (op_const & 1)
+	return build_int_cst (alpha_dimode_u, opint[0] & mask);
+
+      if (op)
+	return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
+			    build_int_cst (alpha_dimode_u, mask));
+    }
+  else if ((op_const & 1) && opint[0] == 0)
+    return build_int_cst (alpha_dimode_u, 0);
+  return NULL;
+}
+
+/* Fold the builtins for the EXT family of instructions.  */
+
+static tree
+alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  long zap_const = 2;
+  tree *zap_op = NULL;
+
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT loc;
+
+      loc = opint[1] & 7;
+      loc *= BITS_PER_UNIT;
+
+      if (loc != 0)
+	{
+	  if (op_const & 1)
+	    {
+	      unsigned HOST_WIDE_INT temp = opint[0];
+	      if (is_high)
+		temp <<= loc;
+	      else
+		temp >>= loc;
+	      opint[0] = temp;
+	      zap_const = 3;
+	    }
+	}
+      else
+	zap_op = op;
+    }
+  
+  opint[1] = bytemask;
+  return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
+}
+
+/* Fold the builtins for the INS family of instructions.  */
+
+static tree
+alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  if ((op_const & 1) && opint[0] == 0)
+    return build_int_cst (alpha_dimode_u, 0);
+
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT temp, loc, byteloc;
+      tree *zap_op = NULL;
+
+      loc = opint[1] & 7;
+      bytemask <<= loc;
+
+      temp = opint[0];
+      if (is_high)
+	{
+	  byteloc = (64 - (loc * 8)) & 0x3f;
+	  if (byteloc == 0)
+	    zap_op = op;
+	  else
+	    temp >>= byteloc;
+	  bytemask >>= 8;
+	}
+      else
+	{
+	  byteloc = loc * 8;
+	  if (byteloc == 0)
+	    zap_op = op;
+	  else
+	    temp <<= byteloc;
+	}
+
+      opint[0] = temp;
+      opint[1] = bytemask;
+      return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
+    }
+
+  return NULL;
+}
+
+static tree
+alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT loc;
+
+      loc = opint[1] & 7;
+      bytemask <<= loc;
+
+      if (is_high)
+	bytemask >>= 8;
+
+      opint[1] = bytemask ^ 0xff;
+    }
+
+  return alpha_fold_builtin_zapnot (op, opint, op_const);
+}
+
+static tree
+alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
+{
+  tree op0 = fold_convert (vtype, op[0]);
+  tree op1 = fold_convert (vtype, op[1]);
+  tree val = fold_build2 (code, vtype, op0, op1);
+  return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
+}
+
+static tree
+alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp = 0;
+  int i;
+
+  if (op_const != 3)
+    return NULL;
+
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
+      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
+      if (a >= b)
+	temp += a - b;
+      else
+	temp += b - a;
+    }
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] >> 24) & 0xff00;
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] >>  8) & 0xff00;
+  temp |= (opint[0] >> 16) & 0xff0000;
+  temp |= (opint[0] >> 24) & 0xff000000;
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] & 0xff00) << 24;
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] & 0x0000ff00) << 8;
+  temp |= (opint[0] & 0x00ff0000) << 16;
+  temp |= (opint[0] & 0xff000000) << 24;
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  if (opint[0] == 0)
+    temp = 64;
+  else
+    temp = exact_log2 (opint[0] & -opint[0]);
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  if (opint[0] == 0)
+    temp = 64;
+  else
+    temp = 64 - floor_log2 (opint[0]) - 1;
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+static tree
+alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp, op;
+
+  if (op_const == 0)
+    return NULL;
+
+  op = opint[0];
+  temp = 0;
+  while (op)
+    temp++, op &= op - 1;
+
+  return build_int_cst (alpha_dimode_u, temp);
+}
+
+/* Fold one of our builtin functions.  */
+
+static tree
+alpha_fold_builtin (tree fndecl, int n_args, tree *op,
+		    bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT opint[MAX_ARGS];
+  long op_const = 0;
+  int i;
+
+  if (n_args > MAX_ARGS)
+    return NULL;
+
+  for (i = 0; i < n_args; i++)
+    {
+      tree arg = op[i];
+      if (arg == error_mark_node)
+	return NULL;
+
+      opint[i] = 0;
+      if (TREE_CODE (arg) == INTEGER_CST)
+	{
+          op_const |= 1L << i;
+	  opint[i] = int_cst_value (arg);
+	}
+    }
+
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    case ALPHA_BUILTIN_CMPBGE:
+      return alpha_fold_builtin_cmpbge (opint, op_const);
+
+    case ALPHA_BUILTIN_EXTBL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
+    case ALPHA_BUILTIN_EXTWL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
+    case ALPHA_BUILTIN_EXTLL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
+    case ALPHA_BUILTIN_EXTQL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
+    case ALPHA_BUILTIN_EXTWH:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
+    case ALPHA_BUILTIN_EXTLH:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
+    case ALPHA_BUILTIN_EXTQH:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
+
+    case ALPHA_BUILTIN_INSBL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
+    case ALPHA_BUILTIN_INSWL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
+    case ALPHA_BUILTIN_INSLL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
+    case ALPHA_BUILTIN_INSQL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
+    case ALPHA_BUILTIN_INSWH:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
+    case ALPHA_BUILTIN_INSLH:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
+    case ALPHA_BUILTIN_INSQH:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
+
+    case ALPHA_BUILTIN_MSKBL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
+    case ALPHA_BUILTIN_MSKWL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
+    case ALPHA_BUILTIN_MSKLL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
+    case ALPHA_BUILTIN_MSKQL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
+    case ALPHA_BUILTIN_MSKWH:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
+    case ALPHA_BUILTIN_MSKLH:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
+    case ALPHA_BUILTIN_MSKQH:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
+
+    case ALPHA_BUILTIN_UMULH:
+      return fold_build2 (MULT_HIGHPART_EXPR, alpha_dimode_u, op[0], op[1]);
+
+    case ALPHA_BUILTIN_ZAP:
+      opint[1] ^= 0xff;
+      /* FALLTHRU */
+    case ALPHA_BUILTIN_ZAPNOT:
+      return alpha_fold_builtin_zapnot (op, opint, op_const);
+
+    case ALPHA_BUILTIN_MINUB8:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
+    case ALPHA_BUILTIN_MINSB8:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
+    case ALPHA_BUILTIN_MINUW4:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
+    case ALPHA_BUILTIN_MINSW4:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
+    case ALPHA_BUILTIN_MAXUB8:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
+    case ALPHA_BUILTIN_MAXSB8:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
+    case ALPHA_BUILTIN_MAXUW4:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
+    case ALPHA_BUILTIN_MAXSW4:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
+
+    case ALPHA_BUILTIN_PERR:
+      return alpha_fold_builtin_perr (opint, op_const);
+    case ALPHA_BUILTIN_PKLB:
+      return alpha_fold_builtin_pklb (opint, op_const);
+    case ALPHA_BUILTIN_PKWB:
+      return alpha_fold_builtin_pkwb (opint, op_const);
+    case ALPHA_BUILTIN_UNPKBL:
+      return alpha_fold_builtin_unpkbl (opint, op_const);
+    case ALPHA_BUILTIN_UNPKBW:
+      return alpha_fold_builtin_unpkbw (opint, op_const);
+
+    case ALPHA_BUILTIN_CTTZ:
+      return alpha_fold_builtin_cttz (opint, op_const);
+    case ALPHA_BUILTIN_CTLZ:
+      return alpha_fold_builtin_ctlz (opint, op_const);
+    case ALPHA_BUILTIN_CTPOP:
+      return alpha_fold_builtin_ctpop (opint, op_const);
+
+    case ALPHA_BUILTIN_AMASK:
+    case ALPHA_BUILTIN_IMPLVER:
+    case ALPHA_BUILTIN_RPCC:
+      /* None of these are foldable at compile-time.  */
+    default:
+      return NULL;
+    }
+}
+
+/* This page contains routines that are used to determine what the function
+   prologue and epilogue code will do and write them out.  */
+
+/* Compute the size of the save area in the stack.  */
+
+/* These variables are used for communication between the following functions.
+   They indicate various things about the current function being compiled
+   that are used to tell what kind of prologue, epilogue and procedure
+   descriptor to generate.  */
+
+/* Nonzero if we need a stack procedure.  */
+enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
+static enum alpha_procedure_types alpha_procedure_type;
+
+/* Register number (either FP or SP) that is used to unwind the frame.  */
+static int vms_unwind_regno;
+
+/* Register number used to save FP.  We need not have one for RA since
+   we don't modify it for register procedures.  This is only defined
+   for register frame procedures.  */
+static int vms_save_fp_regno;
+
+/* Register number used to reference objects off our PV.  */
+static int vms_base_regno;
+
+/* Compute register masks for saved registers.  */
+
+static void
+alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
+{
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  unsigned int i;
+
+  /* When outputting a thunk, we don't have valid register life info,
+     but assemble_start_function wants to output .frame and .mask
+     directives.  */
+  if (cfun->is_thunk)
+    {
+      *imaskP = 0;
+      *fmaskP = 0;
+      return;
+    }
+
+  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
+    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
+
+  /* One for every register we have to save.  */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (! fixed_regs[i] && ! call_used_regs[i]
+	&& df_regs_ever_live_p (i) && i != REG_RA)
+      {
+	if (i < 32)
+	  imask |= (1UL << i);
+	else
+	  fmask |= (1UL << (i - 32));
+      }
+
+  /* We need to restore these for the handler.  */
+  if (crtl->calls_eh_return)
+    {
+      for (i = 0; ; ++i)
+	{
+	  unsigned regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+	  imask |= 1UL << regno;
+	}
+    }
+
+  /* If any register spilled, then spill the return address also.  */
+  /* ??? This is required by the Digital stack unwind specification
+     and isn't needed if we're doing Dwarf2 unwinding.  */
+  if (imask || fmask || alpha_ra_ever_killed ())
+    imask |= (1UL << REG_RA);
+
+  *imaskP = imask;
+  *fmaskP = fmask;
+}
+
+int
+alpha_sa_size (void)
+{
+  unsigned long mask[2];
+  int sa_size = 0;
+  int i, j;
+
+  alpha_sa_mask (&mask[0], &mask[1]);
+
+  for (j = 0; j < 2; ++j)
+    for (i = 0; i < 32; ++i)
+      if ((mask[j] >> i) & 1)
+	sa_size++;
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      /* Start with a stack procedure if we make any calls (REG_RA used), or
+	 need a frame pointer, with a register procedure if we otherwise need
+	 at least a slot, and with a null procedure in other cases.  */
+      if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
+	alpha_procedure_type = PT_STACK;
+      else if (get_frame_size() != 0)
+	alpha_procedure_type = PT_REGISTER;
+      else
+	alpha_procedure_type = PT_NULL;
+
+      /* Don't reserve space for saving FP & RA yet.  Do that later after we've
+	 made the final decision on stack procedure vs register procedure.  */
+      if (alpha_procedure_type == PT_STACK)
+	sa_size -= 2;
+
+      /* Decide whether to refer to objects off our PV via FP or PV.
+	 If we need FP for something else or if we receive a nonlocal
+	 goto (which expects PV to contain the value), we must use PV.
+	 Otherwise, start by assuming we can use FP.  */
+
+      vms_base_regno
+	= (frame_pointer_needed
+	   || cfun->has_nonlocal_label
+	   || alpha_procedure_type == PT_STACK
+	   || crtl->outgoing_args_size)
+	  ? REG_PV : HARD_FRAME_POINTER_REGNUM;
+
+      /* If we want to copy PV into FP, we need to find some register
+	 in which to save FP.  */
+
+      vms_save_fp_regno = -1;
+      if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
+	for (i = 0; i < 32; i++)
+	  if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
+	    vms_save_fp_regno = i;
+
+      /* A VMS condition handler requires a stack procedure in our
+	 implementation. (not required by the calling standard).  */
+      if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
+	  || cfun->machine->uses_condition_handler)
+	vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
+      else if (alpha_procedure_type == PT_NULL)
+	vms_base_regno = REG_PV;
+
+      /* Stack unwinding should be done via FP unless we use it for PV.  */
+      vms_unwind_regno = (vms_base_regno == REG_PV
+			  ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+
+      /* If this is a stack procedure, allow space for saving FP, RA and
+	 a condition handler slot if needed.  */
+      if (alpha_procedure_type == PT_STACK)
+	sa_size += 2 + cfun->machine->uses_condition_handler;
+    }
+  else
+    {
+      /* Our size must be even (multiple of 16 bytes).  */
+      if (sa_size & 1)
+	sa_size++;
+    }
+
+  return sa_size * 8;
+}
+
+/* Define the offset between two registers, one to be eliminated,
+   and the other its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+alpha_initial_elimination_offset (unsigned int from,
+				  unsigned int to ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT ret;
+
+  ret = alpha_sa_size ();
+  ret += ALPHA_ROUND (crtl->outgoing_args_size);
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      break;
+
+    case ARG_POINTER_REGNUM:
+      ret += (ALPHA_ROUND (get_frame_size ()
+			   + crtl->args.pretend_args_size)
+	      - crtl->args.pretend_args_size);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return ret;
+}
+
+#if TARGET_ABI_OPEN_VMS
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+static bool
+alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  /* We need the alpha_procedure_type to decide. Evaluate it now.  */
+  alpha_sa_size ();
+
+  switch (alpha_procedure_type)
+    {
+    case PT_NULL:
+      /* NULL procedures have no frame of their own and we only
+	 know how to resolve from the current stack pointer.  */
+      return to == STACK_POINTER_REGNUM;
+
+    case PT_REGISTER:
+    case PT_STACK:
+      /* We always eliminate except to the stack pointer if there is no
+	 usable frame pointer at hand.  */
+      return (to != STACK_POINTER_REGNUM
+	      || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
+    }
+
+  gcc_unreachable ();
+}
+
+/* FROM is to be eliminated for TO. Return the offset so that TO+offset
+   designates the same location as FROM.  */
+
+HOST_WIDE_INT
+alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
+{ 
+  /* The only possible attempts we ever expect are ARG or FRAME_PTR to
+     HARD_FRAME or STACK_PTR.  We need the alpha_procedure_type to decide
+     on the proper computations and will need the register save area size
+     in most cases.  */
+
+  HOST_WIDE_INT sa_size = alpha_sa_size ();
+
+  /* PT_NULL procedures have no frame of their own and we only allow
+     elimination to the stack pointer. This is the argument pointer and we
+     resolve the soft frame pointer to that as well.  */
+     
+  if (alpha_procedure_type == PT_NULL)
+    return 0;
+
+  /* For a PT_STACK procedure the frame layout looks as follows
+
+                      -----> decreasing addresses
+
+		   <             size rounded up to 16       |   likewise   >
+     --------------#------------------------------+++--------------+++-------#
+     incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
+     --------------#---------------------------------------------------------#
+                                   ^         ^              ^               ^
+			      ARG_PTR FRAME_PTR HARD_FRAME_PTR       STACK_PTR
+
+			      
+     PT_REGISTER procedures are similar in that they may have a frame of their
+     own. They have no regs-sa/pv/outgoing-args area.
+
+     We first compute offset to HARD_FRAME_PTR, then add what we need to get
+     to STACK_PTR if need be.  */
+  
+  {
+    HOST_WIDE_INT offset;
+    HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
+
+    switch (from)
+      {
+      case FRAME_POINTER_REGNUM:
+	offset = ALPHA_ROUND (sa_size + pv_save_size);
+	break;
+      case ARG_POINTER_REGNUM:
+	offset = (ALPHA_ROUND (sa_size + pv_save_size
+			       + get_frame_size ()
+			       + crtl->args.pretend_args_size)
+		  - crtl->args.pretend_args_size);
+	break;
+      default:
+	gcc_unreachable ();
+      }
+    
+    if (to == STACK_POINTER_REGNUM)
+      offset += ALPHA_ROUND (crtl->outgoing_args_size);
+    
+    return offset;
+  }
+}
+
+#define COMMON_OBJECT "common_object"
+
+static tree
+common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
+		       tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
+		       bool *no_add_attrs ATTRIBUTE_UNUSED)
+{
+  tree decl = *node;
+  gcc_assert (DECL_P (decl));
+
+  DECL_COMMON (decl) = 1;
+  return NULL_TREE;
+}
+
+static const struct attribute_spec vms_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { COMMON_OBJECT,   0, 1, true,  false, false, common_object_handler, false },
+  { NULL,            0, 0, false, false, false, NULL, false }
+};
+
+void
+vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
+			       unsigned HOST_WIDE_INT size,
+			       unsigned int align)
+{
+  tree attr = DECL_ATTRIBUTES (decl);
+  fprintf (file, "%s", COMMON_ASM_OP);
+  assemble_name (file, name);
+  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
+  /* ??? Unlike on OSF/1, the alignment factor is not in log units.  */
+  fprintf (file, ",%u", align / BITS_PER_UNIT);
+  if (attr)
+    {
+      attr = lookup_attribute (COMMON_OBJECT, attr);
+      if (attr)
+        fprintf (file, ",%s",
+		 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
+    }
+  fputc ('\n', file);
+}
+
+#undef COMMON_OBJECT
+
+#endif
+
+static int
+find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
+}
+
+int
+alpha_find_lo_sum_using_gp (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
+}
+
+static int
+alpha_does_function_need_gp (void)
+{
+  rtx insn;
+
+  /* The GP being variable is an OSF abi thing.  */
+  if (! TARGET_ABI_OSF)
+    return 0;
+
+  /* We need the gp to load the address of __mcount.  */
+  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
+    return 1;
+
+  /* The code emitted by alpha_output_mi_thunk_osf uses the gp.  */
+  if (cfun->is_thunk)
+    return 1;
+
+  /* The nonlocal receiver pattern assumes that the gp is valid for
+     the nested function.  Reasonable because it's almost always set
+     correctly already.  For the cases where that's wrong, make sure
+     the nested function loads its gp on entry.  */
+  if (crtl->has_nonlocal_goto)
+    return 1;
+
+  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
+     Even if we are a static function, we still need to do this in case
+     our address is taken and passed to something like qsort.  */
+
+  push_topmost_sequence ();
+  insn = get_insns ();
+  pop_topmost_sequence ();
+
+  for (; insn; insn = NEXT_INSN (insn))
+    if (NONDEBUG_INSN_P (insn)
+	&& GET_CODE (PATTERN (insn)) != USE
+	&& GET_CODE (PATTERN (insn)) != CLOBBER
+	&& get_attr_usegp (insn))
+      return 1;
+
+  return 0;
+}
+
+
+/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
+   sequences.  */
+
+static rtx
+set_frame_related_p (void)
+{
+  rtx seq = get_insns ();
+  rtx insn;
+
+  end_sequence ();
+
+  if (!seq)
+    return NULL_RTX;
+
+  if (INSN_P (seq))
+    {
+      insn = seq;
+      while (insn != NULL_RTX)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = NEXT_INSN (insn);
+	}
+      seq = emit_insn (seq);
+    }
+  else
+    {
+      seq = emit_insn (seq);
+      RTX_FRAME_RELATED_P (seq) = 1;
+    }
+  return seq;
+}
+
+#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
+
+/* Generates a store with the proper unwind info attached.  VALUE is
+   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
+   contains SP+FRAME_BIAS, and that is the unwind info that should be
+   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
+   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
+
+static void
+emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
+		    HOST_WIDE_INT base_ofs, rtx frame_reg)
+{
+  rtx addr, mem, insn;
+
+  addr = plus_constant (Pmode, base_reg, base_ofs);
+  mem = gen_frame_mem (DImode, addr);
+
+  insn = emit_move_insn (mem, value);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  if (frame_bias || value != frame_reg)
+    {
+      if (frame_bias)
+	{
+	  addr = plus_constant (Pmode, stack_pointer_rtx,
+			        frame_bias + base_ofs);
+	  mem = gen_rtx_MEM (DImode, addr);
+	}
+
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode, mem, frame_reg));
+    }
+}
+
+static void
+emit_frame_store (unsigned int regno, rtx base_reg,
+		  HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
+{
+  rtx reg = gen_rtx_REG (DImode, regno);
+  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
+}
+
+/* Compute the frame size.  SIZE is the size of the "naked" frame
+   and SA_SIZE is the size of the register save area.  */
+
+static HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
+{
+  if (TARGET_ABI_OPEN_VMS)
+    return ALPHA_ROUND (sa_size 
+			+ (alpha_procedure_type == PT_STACK ? 8 : 0)
+			+ size
+			+ crtl->args.pretend_args_size);
+  else
+    return ALPHA_ROUND (crtl->outgoing_args_size)
+	   + sa_size
+	   + ALPHA_ROUND (size
+			  + crtl->args.pretend_args_size);
+}
+
+/* Write function prologue.  */
+
+/* On vms we have two kinds of functions:
+
+   - stack frame (PROC_STACK)
+	these are 'normal' functions with local vars and which are
+	calling other functions
+   - register frame (PROC_REGISTER)
+	keeps all data in registers, needs no stack
+
+   We must pass this to the assembler so it can generate the
+   proper pdsc (procedure descriptor)
+   This is done with the '.pdesc' command.
+
+   On not-vms, we don't really differentiate between the two, as we can
+   simply allocate stack without saving registers.  */
+
+void
+alpha_expand_prologue (void)
+{
+  /* Registers to save.  */
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size, sa_bias;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size;
+  /* Probed stack size; it additionally includes the size of
+     the "reserve region" if any.  */
+  HOST_WIDE_INT probed_size;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  rtx sa_reg;
+  int i;
+
+  sa_size = alpha_sa_size ();
+  frame_size = compute_frame_size (get_frame_size (), sa_size);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = frame_size;
+
+  if (TARGET_ABI_OPEN_VMS)
+    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
+  else
+    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
+
+  alpha_sa_mask (&imask, &fmask);
+
+  /* Emit an insn to reload GP, if needed.  */
+  if (TARGET_ABI_OSF)
+    {
+      alpha_function_needs_gp = alpha_does_function_need_gp ();
+      if (alpha_function_needs_gp)
+	emit_insn (gen_prologue_ldgp ());
+    }
+
+  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
+     the call to mcount ourselves, rather than having the linker do it
+     magically in response to -pg.  Since _mcount has special linkage,
+     don't represent the call as a call.  */
+  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
+    emit_insn (gen_prologue_mcount ());
+
+  /* Adjust the stack by the frame size.  If the frame size is > 4096
+     bytes, we need to be sure we probe somewhere in the first and last
+     4096 bytes (we can probably get away without the latter test) and
+     every 8192 bytes in between.  If the frame size is > 32768, we
+     do this in a loop.  Otherwise, we generate the explicit probe
+     instructions.
+
+     Note that we are only allowed to adjust sp once in the prologue.  */
+
+  probed_size = frame_size;
+  if (flag_stack_check)
+    probed_size += STACK_CHECK_PROTECT;
+
+  if (probed_size <= 32768)
+    {
+      if (probed_size > 4096)
+	{
+	  int probed;
+
+	  for (probed = 4096; probed < probed_size; probed += 8192)
+	    emit_insn (gen_probe_stack (GEN_INT (-probed)));
+
+	  /* We only have to do this probe if we aren't saving registers or
+	     if we are probing beyond the frame because of -fstack-check.  */
+	  if ((sa_size == 0 && probed_size > probed - 4096)
+	      || flag_stack_check)
+	    emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
+	}
+
+      if (frame_size != 0)
+	FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (-frame_size))));
+    }
+  else
+    {
+      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
+	 number of 8192 byte blocks to probe.  We then probe each block
+	 in the loop and then set SP to the proper location.  If the
+	 amount remaining is > 4096, we have to do one more probe if we
+	 are not saving any registers or if we are probing beyond the
+	 frame because of -fstack-check.  */
+
+      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
+      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
+      rtx ptr = gen_rtx_REG (DImode, 22);
+      rtx count = gen_rtx_REG (DImode, 23);
+      rtx seq;
+
+      emit_move_insn (count, GEN_INT (blocks));
+      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
+
+      /* Because of the difficulty in emitting a new basic block this
+	 late in the compilation, generate the loop as a single insn.  */
+      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
+
+      if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
+	{
+	  rtx last = gen_rtx_MEM (DImode,
+				  plus_constant (Pmode, ptr, -leftover));
+	  MEM_VOLATILE_P (last) = 1;
+	  emit_move_insn (last, const0_rtx);
+	}
+
+      if (flag_stack_check)
+	{
+	  /* If -fstack-check is specified we have to load the entire
+	     constant into a register and subtract from the sp in one go,
+	     because the probed stack size is not equal to the frame size.  */
+	  HOST_WIDE_INT lo, hi;
+	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
+	  hi = frame_size - lo;
+
+	  emit_move_insn (ptr, GEN_INT (hi));
+	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
+	  seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
+				       ptr));
+	}
+      else
+	{
+	  seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
+				       GEN_INT (-leftover)));
+	}
+
+      /* This alternative is special, because the DWARF code cannot
+         possibly intuit through the loop above.  So we invent this
+         note it looks at instead.  */
+      RTX_FRAME_RELATED_P (seq) = 1;
+      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				 plus_constant (Pmode, stack_pointer_rtx,
+						-frame_size)));
+    }
+
+  /* Cope with very large offsets to the register save area.  */
+  sa_bias = 0;
+  sa_reg = stack_pointer_rtx;
+  if (reg_offset + sa_size > 0x8000)
+    {
+      int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
+      rtx sa_bias_rtx;
+
+      if (low + sa_size <= 0x8000)
+	sa_bias = reg_offset - low, reg_offset = low;
+      else
+	sa_bias = reg_offset, reg_offset = 0;
+
+      sa_reg = gen_rtx_REG (DImode, 24);
+      sa_bias_rtx = GEN_INT (sa_bias);
+
+      if (add_operand (sa_bias_rtx, DImode))
+	emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
+      else
+	{
+	  emit_move_insn (sa_reg, sa_bias_rtx);
+	  emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
+	}
+    }
+
+  /* Save regs in stack order.  Beginning with VMS PV.  */
+  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
+    emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
+
+  /* Save register RA next.  */
+  if (imask & (1UL << REG_RA))
+    {
+      emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
+      imask &= ~(1UL << REG_RA);
+      reg_offset += 8;
+    }
+
+  /* Now save any other registers required to be saved.  */
+  for (i = 0; i < 31; i++)
+    if (imask & (1UL << i))
+      {
+	emit_frame_store (i, sa_reg, sa_bias, reg_offset);
+	reg_offset += 8;
+      }
+
+  for (i = 0; i < 31; i++)
+    if (fmask & (1UL << i))
+      {
+	emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
+	reg_offset += 8;
+      }
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      /* Register frame procedures save the fp.  */
+      if (alpha_procedure_type == PT_REGISTER)
+	{
+	  rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
+				     hard_frame_pointer_rtx);
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
+	emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
+				    gen_rtx_REG (DImode, REG_PV)));
+
+      if (alpha_procedure_type != PT_NULL
+	  && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
+	FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+      /* If we have to allocate space for outgoing args, do it now.  */
+      if (crtl->outgoing_args_size != 0)
+	{
+	  rtx seq
+	    = emit_move_insn (stack_pointer_rtx,
+			      plus_constant
+			      (Pmode, hard_frame_pointer_rtx,
+			       - (ALPHA_ROUND
+				  (crtl->outgoing_args_size))));
+
+	  /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
+	     if ! frame_pointer_needed. Setting the bit will change the CFA
+	     computation rule to use sp again, which would be wrong if we had
+	     frame_pointer_needed, as this means sp might move unpredictably
+	     later on.
+
+	     Also, note that
+	       frame_pointer_needed
+	       => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
+	     and
+	       crtl->outgoing_args_size != 0
+	       => alpha_procedure_type != PT_NULL,
+
+	     so when we are not setting the bit here, we are guaranteed to
+	     have emitted an FRP frame pointer update just before.  */
+	  RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
+	}
+    }
+  else
+    {
+      /* If we need a frame pointer, set it from the stack pointer.  */
+      if (frame_pointer_needed)
+	{
+	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
+	    FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+	  else
+	    /* This must always be the last instruction in the
+	       prologue, thus we emit a special move + clobber.  */
+	      FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
+				           stack_pointer_rtx, sa_reg)));
+	}
+    }
+
+  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
+     the prologue, for exception handling reasons, we cannot do this for
+     any insn that might fault.  We could prevent this for mems with a
+     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
+     have to prevent all such scheduling with a blockage.
+
+     Linux, on the other hand, never bothered to implement OSF/1's
+     exception handling, and so doesn't care about such things.  Anyone
+     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
+
+  if (! TARGET_CAN_FAULT_IN_PROLOGUE)
+    emit_insn (gen_blockage ());
+}
+
+/* Count the number of .file directives, so that .loc is up to date.  */
+int num_source_filenames = 0;
+
+/* Output the textual info surrounding the prologue.  */
+
+void
+alpha_start_function (FILE *file, const char *fnname,
+		      tree decl ATTRIBUTE_UNUSED)
+{
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size;
+  /* Complete stack size needed.  */
+  unsigned HOST_WIDE_INT frame_size;
+  /* The maximum debuggable frame size.  */
+  unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  char *entry_label = (char *) alloca (strlen (fnname) + 6);
+  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
+  int i;
+
+#if TARGET_ABI_OPEN_VMS
+  vms_start_function (fnname);
+#endif
+
+  alpha_fnname = fnname;
+  sa_size = alpha_sa_size ();
+  frame_size = compute_frame_size (get_frame_size (), sa_size);
+
+  if (TARGET_ABI_OPEN_VMS)
+    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
+  else
+    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
+
+  alpha_sa_mask (&imask, &fmask);
+
+  /* Issue function start and label.  */
+  if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent ", file);
+      assemble_name (file, fnname);
+      putc ('\n', file);
+
+      /* If the function needs GP, we'll write the "..ng" label there.
+	 Otherwise, do it here.  */
+      if (TARGET_ABI_OSF
+          && ! alpha_function_needs_gp
+	  && ! cfun->is_thunk)
+	{
+	  putc ('$', file);
+	  assemble_name (file, fnname);
+	  fputs ("..ng:\n", file);
+	}
+    }
+  /* Nested functions on VMS that are potentially called via trampoline
+     get a special transfer entry point that loads the called functions
+     procedure descriptor and static chain.  */
+   if (TARGET_ABI_OPEN_VMS
+       && !TREE_PUBLIC (decl)
+       && DECL_CONTEXT (decl)
+       && !TYPE_P (DECL_CONTEXT (decl))
+       && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
+     {
+	strcpy (tramp_label, fnname);
+	strcat (tramp_label, "..tr");
+	ASM_OUTPUT_LABEL (file, tramp_label);
+	fprintf (file, "\tldq $1,24($27)\n");
+	fprintf (file, "\tldq $27,16($27)\n");
+     }
+
+  strcpy (entry_label, fnname);
+  if (TARGET_ABI_OPEN_VMS)
+    strcat (entry_label, "..en");
+
+  ASM_OUTPUT_LABEL (file, entry_label);
+  inside_function = TRUE;
+
+  if (TARGET_ABI_OPEN_VMS)
+    fprintf (file, "\t.base $%d\n", vms_base_regno);
+
+  if (TARGET_ABI_OSF
+      && TARGET_IEEE_CONFORMANT
+      && !flag_inhibit_size_directive)
+    {
+      /* Set flags in procedure descriptor to request IEEE-conformant
+	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
+	 (/usr/include/pdsc.h).  */
+      fputs ("\t.eflag 48\n", file);
+    }
+
+  /* Set up offsets to alpha virtual arg/local debugging pointer.  */
+  alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
+  alpha_arg_offset = -frame_size + 48;
+
+  /* Describe our frame.  If the frame size is larger than an integer,
+     print it as zero to avoid an assembler error.  We won't be
+     properly describing such a frame, but that's the best we can do.  */
+  if (TARGET_ABI_OPEN_VMS)
+    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
+	     HOST_WIDE_INT_PRINT_DEC "\n",
+	     vms_unwind_regno,
+	     frame_size >= (1UL << 31) ? 0 : frame_size,
+	     reg_offset);
+  else if (!flag_inhibit_size_directive)
+    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
+	     (frame_pointer_needed
+	      ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
+	     frame_size >= max_frame_size ? 0 : frame_size,
+	     crtl->args.pretend_args_size);
+
+  /* Describe which registers were spilled.  */
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      if (imask)
+        /* ??? Does VMS care if mask contains ra?  The old code didn't
+           set it, so I don't here.  */
+	fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
+      if (fmask)
+	fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
+      if (alpha_procedure_type == PT_REGISTER)
+	fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
+    }
+  else if (!flag_inhibit_size_directive)
+    {
+      if (imask)
+	{
+	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
+		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
+
+	  for (i = 0; i < 32; ++i)
+	    if (imask & (1UL << i))
+	      reg_offset += 8;
+	}
+
+      if (fmask)
+	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
+		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
+    }
+
+#if TARGET_ABI_OPEN_VMS
+  /* If a user condition handler has been installed at some point, emit
+     the procedure descriptor bits to point the Condition Handling Facility
+     at the indirection wrapper, and state the fp offset at which the user
+     handler may be found.  */
+  if (cfun->machine->uses_condition_handler)
+    {
+      fprintf (file, "\t.handler __gcc_shell_handler\n");
+      fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
+    }
+
+#ifdef TARGET_VMS_CRASH_DEBUG
+  /* Support of minimal traceback info.  */
+  switch_to_section (readonly_data_section);
+  fprintf (file, "\t.align 3\n");
+  assemble_name (file, fnname); fputs ("..na:\n", file);
+  fputs ("\t.ascii \"", file);
+  assemble_name (file, fnname);
+  fputs ("\\0\"\n", file);
+  switch_to_section (text_section);
+#endif
+#endif /* TARGET_ABI_OPEN_VMS */
+}
+
+/* Emit the .prologue note at the scheduled end of the prologue.  */
+
+static void
+alpha_output_function_end_prologue (FILE *file)
+{
+  if (TARGET_ABI_OPEN_VMS)
+    fputs ("\t.prologue\n", file);
+  else if (!flag_inhibit_size_directive)
+    fprintf (file, "\t.prologue %d\n",
+	     alpha_function_needs_gp || cfun->is_thunk);
+}
+
+/* Write function epilogue.  */
+
+void
+alpha_expand_epilogue (void)
+{
+  /* Registers to save.  */
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  int fp_is_frame_pointer, fp_offset;
+  rtx sa_reg, sa_reg_exp = NULL;
+  rtx sp_adj1, sp_adj2, mem, reg, insn;
+  rtx eh_ofs;
+  rtx cfa_restores = NULL_RTX;
+  int i;
+
+  sa_size = alpha_sa_size ();
+  frame_size = compute_frame_size (get_frame_size (), sa_size);
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+       if (alpha_procedure_type == PT_STACK)
+          reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
+       else
+          reg_offset = 0;
+    }
+  else
+    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
+
+  alpha_sa_mask (&imask, &fmask);
+
+  fp_is_frame_pointer
+    = (TARGET_ABI_OPEN_VMS
+       ? alpha_procedure_type == PT_STACK
+       : frame_pointer_needed);
+  fp_offset = 0;
+  sa_reg = stack_pointer_rtx;
+
+  if (crtl->calls_eh_return)
+    eh_ofs = EH_RETURN_STACKADJ_RTX;
+  else
+    eh_ofs = NULL_RTX;
+
+  if (sa_size)
+    {
+      /* If we have a frame pointer, restore SP from it.  */
+      if (TARGET_ABI_OPEN_VMS
+	  ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
+	  : frame_pointer_needed)
+	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+
+      /* Cope with very large offsets to the register save area.  */
+      if (reg_offset + sa_size > 0x8000)
+	{
+	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
+	  HOST_WIDE_INT bias;
+
+	  if (low + sa_size <= 0x8000)
+	    bias = reg_offset - low, reg_offset = low;
+	  else
+	    bias = reg_offset, reg_offset = 0;
+
+	  sa_reg = gen_rtx_REG (DImode, 22);
+	  sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
+
+	  emit_move_insn (sa_reg, sa_reg_exp);
+	}
+
+      /* Restore registers in order, excepting a true frame pointer.  */
+
+      mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
+      reg = gen_rtx_REG (DImode, REG_RA);
+      emit_move_insn (reg, mem);
+      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+
+      reg_offset += 8;
+      imask &= ~(1UL << REG_RA);
+
+      for (i = 0; i < 31; ++i)
+	if (imask & (1UL << i))
+	  {
+	    if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
+	      fp_offset = reg_offset;
+	    else
+	      {
+		mem = gen_frame_mem (DImode,
+				     plus_constant (Pmode, sa_reg,
+						    reg_offset));
+		reg = gen_rtx_REG (DImode, i);
+		emit_move_insn (reg, mem);
+		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					       cfa_restores);
+	      }
+	    reg_offset += 8;
+	  }
+
+      for (i = 0; i < 31; ++i)
+	if (fmask & (1UL << i))
+	  {
+	    mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
+						        reg_offset));
+	    reg = gen_rtx_REG (DFmode, i+32);
+	    emit_move_insn (reg, mem);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	    reg_offset += 8;
+	  }
+    }
+
+  if (frame_size || eh_ofs)
+    {
+      sp_adj1 = stack_pointer_rtx;
+
+      if (eh_ofs)
+	{
+	  sp_adj1 = gen_rtx_REG (DImode, 23);
+	  emit_move_insn (sp_adj1,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
+	}
+
+      /* If the stack size is large, begin computation into a temporary
+	 register so as not to interfere with a potential fp restore,
+	 which must be consecutive with an SP restore.  */
+      if (frame_size < 32768 && !cfun->calls_alloca)
+	sp_adj2 = GEN_INT (frame_size);
+      else if (frame_size < 0x40007fffL)
+	{
+	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
+
+	  sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
+	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
+	    sp_adj1 = sa_reg;
+	  else
+	    {
+	      sp_adj1 = gen_rtx_REG (DImode, 23);
+	      emit_move_insn (sp_adj1, sp_adj2);
+	    }
+	  sp_adj2 = GEN_INT (low);
+	}
+      else
+	{
+	  rtx tmp = gen_rtx_REG (DImode, 23);
+	  sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
+	  if (!sp_adj2)
+	    {
+	      /* We can't drop new things to memory this late, afaik,
+		 so build it up by pieces.  */
+	      sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
+						   -(frame_size < 0));
+	      gcc_assert (sp_adj2);
+	    }
+	}
+
+      /* From now on, things must be in order.  So emit blockages.  */
+
+      /* Restore the frame pointer.  */
+      if (fp_is_frame_pointer)
+	{
+	  emit_insn (gen_blockage ());
+	  mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
+						      fp_offset));
+	  emit_move_insn (hard_frame_pointer_rtx, mem);
+	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+					 hard_frame_pointer_rtx, cfa_restores);
+	}
+      else if (TARGET_ABI_OPEN_VMS)
+	{
+	  emit_insn (gen_blockage ());
+	  emit_move_insn (hard_frame_pointer_rtx,
+			  gen_rtx_REG (DImode, vms_save_fp_regno));
+	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+					 hard_frame_pointer_rtx, cfa_restores);
+	}
+
+      /* Restore the stack pointer.  */
+      emit_insn (gen_blockage ());
+      if (sp_adj2 == const0_rtx)
+	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
+      else
+	insn = emit_move_insn (stack_pointer_rtx,
+			       gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      gcc_assert (cfa_restores == NULL);
+
+      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
+        {
+          emit_insn (gen_blockage ());
+          insn = emit_move_insn (hard_frame_pointer_rtx,
+				 gen_rtx_REG (DImode, vms_save_fp_regno));
+	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+        }
+    }
+}
+
+/* Output the rest of the textual info surrounding the epilogue.  */
+
+void
+alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
+{
+  rtx insn;
+
+  /* We output a nop after noreturn calls at the very end of the function to
+     ensure that the return address always remains in the caller's code range,
+     as not doing so might confuse unwinding engines.  */
+  insn = get_last_insn ();
+  if (!INSN_P (insn))
+    insn = prev_active_insn (insn);
+  if (insn && CALL_P (insn))
+    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
+
+#if TARGET_ABI_OPEN_VMS
+  /* Write the linkage entries.  */
+  alpha_write_linkage (file, fnname);
+#endif
+
+  /* End the function.  */
+  if (TARGET_ABI_OPEN_VMS
+      || !flag_inhibit_size_directive)
+    {
+      fputs ("\t.end ", file);
+      assemble_name (file, fnname);
+      putc ('\n', file);
+    }
+  inside_function = FALSE;
+}
+
+#if TARGET_ABI_OSF
+/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
+
+   In order to avoid the hordes of differences between generated code
+   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
+   lots of code loading up large constants, generate rtl and emit it
+   instead of going straight to text.
+
+   Not sure why this idea hasn't been explored before...  */
+
+static void
+alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			   tree function)
+{
+  HOST_WIDE_INT hi, lo;
+  rtx this_rtx, insn, funexp;
+
+  /* We always require a valid GP.  */
+  emit_insn (gen_prologue_ldgp ());
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in $16.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 17);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 16);
+
+  /* Add DELTA.  When possible we use ldah+lda.  Otherwise load the
+     entire constant for the add.  */
+  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
+  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  if (hi + lo == delta)
+    {
+      if (hi)
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
+      if (lo)
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
+    }
+  else
+    {
+      rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
+					   delta, -(delta < 0));
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
+  if (vcall_offset)
+    {
+      rtx tmp, tmp2;
+
+      tmp = gen_rtx_REG (Pmode, 0);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
+      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      if (hi + lo == vcall_offset)
+	{
+	  if (hi)
+	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
+	}
+      else
+	{
+	  tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
+					    vcall_offset, -(vcall_offset < 0));
+          emit_insn (gen_adddi3 (tmp, tmp, tmp2));
+	  lo = 0;
+	}
+      if (lo)
+	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
+      else
+	tmp2 = tmp;
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
+
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+}
+#endif /* TARGET_ABI_OSF */
+
+/* Debugging support.  */
+
+#include "gstab.h"
+
+/* Name of the file containing the current function.  */
+
+static const char *current_function_file = "";
+
+/* Offsets to alpha virtual arg/local debugging pointers.  */
+
+long alpha_arg_offset;
+long alpha_auto_offset;
+
+/* Emit a new filename to a stream.  */
+
+void
+alpha_output_filename (FILE *stream, const char *name)
+{
+  static int first_time = TRUE;
+
+  if (first_time)
+    {
+      first_time = FALSE;
+      ++num_source_filenames;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+      output_quoted_string (stream, name);
+      fprintf (stream, "\n");
+    }
+
+  else if (name != current_function_file
+	   && strcmp (name, current_function_file) != 0)
+    {
+      ++num_source_filenames;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+
+      output_quoted_string (stream, name);
+      fprintf (stream, "\n");
+    }
+}
+
+/* Structure to show the current status of registers and memory.  */
+
+struct shadow_summary
+{
+  struct {
+    unsigned int i     : 31;	/* Mask of int regs */
+    unsigned int fp    : 31;	/* Mask of fp regs */
+    unsigned int mem   :  1;	/* mem == imem | fpmem */
+  } used, defd;
+};
+
+/* Summary the effects of expression X on the machine.  Update SUM, a pointer
+   to the summary structure.  SET is nonzero if the insn is setting the
+   object, otherwise zero.  */
+
+static void
+summarize_insn (rtx x, struct shadow_summary *sum, int set)
+{
+  const char *format_ptr;
+  int i, j;
+
+  if (x == 0)
+    return;
+
+  switch (GET_CODE (x))
+    {
+      /* ??? Note that this case would be incorrect if the Alpha had a
+	 ZERO_EXTRACT in SET_DEST.  */
+    case SET:
+      summarize_insn (SET_SRC (x), sum, 0);
+      summarize_insn (SET_DEST (x), sum, 1);
+      break;
+
+    case CLOBBER:
+      summarize_insn (XEXP (x, 0), sum, 1);
+      break;
+
+    case USE:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    case ASM_OPERANDS:
+      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
+	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
+      break;
+
+    case PARALLEL:
+      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	summarize_insn (XVECEXP (x, 0, i), sum, 0);
+      break;
+
+    case SUBREG:
+      summarize_insn (SUBREG_REG (x), sum, 0);
+      break;
+
+    case REG:
+      {
+	int regno = REGNO (x);
+	unsigned long mask = ((unsigned long) 1) << (regno % 32);
+
+	if (regno == 31 || regno == 63)
+	  break;
+
+	if (set)
+	  {
+	    if (regno < 32)
+	      sum->defd.i |= mask;
+	    else
+	      sum->defd.fp |= mask;
+	  }
+	else
+	  {
+	    if (regno < 32)
+	      sum->used.i  |= mask;
+	    else
+	      sum->used.fp |= mask;
+	  }
+	}
+      break;
+
+    case MEM:
+      if (set)
+	sum->defd.mem = 1;
+      else
+	sum->used.mem = 1;
+
+      /* Find the regs used in memory address computation: */
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    case CONST_INT:   case CONST_DOUBLE:
+    case SYMBOL_REF:  case LABEL_REF:     case CONST:
+    case SCRATCH:     case ASM_INPUT:
+      break;
+
+      /* Handle common unary and binary ops for efficiency.  */
+    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
+    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
+    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
+    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
+    case NE:       case EQ:      case GE:      case GT:        case LE:
+    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      summarize_insn (XEXP (x, 1), sum, 0);
+      break;
+
+    case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
+    case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
+    case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
+    case SQRT:  case FFS:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    default:
+      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	switch (format_ptr[i])
+	  {
+	  case 'e':
+	    summarize_insn (XEXP (x, i), sum, 0);
+	    break;
+
+	  case 'E':
+	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	      summarize_insn (XVECEXP (x, i, j), sum, 0);
+	    break;
+
+	  case 'i':
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+    }
+}
+
+/* Ensure a sufficient number of `trapb' insns are in the code when
+   the user requests code with a trap precision of functions or
+   instructions.
+
+   In naive mode, when the user requests a trap-precision of
+   "instruction", a trapb is needed after every instruction that may
+   generate a trap.  This ensures that the code is resumption safe but
+   it is also slow.
+
+   When optimizations are turned on, we delay issuing a trapb as long
+   as possible.  In this context, a trap shadow is the sequence of
+   instructions that starts with a (potentially) trap generating
+   instruction and extends to the next trapb or call_pal instruction
+   (but GCC never generates call_pal by itself).  We can delay (and
+   therefore sometimes omit) a trapb subject to the following
+   conditions:
+
+   (a) On entry to the trap shadow, if any Alpha register or memory
+   location contains a value that is used as an operand value by some
+   instruction in the trap shadow (live on entry), then no instruction
+   in the trap shadow may modify the register or memory location.
+
+   (b) Within the trap shadow, the computation of the base register
+   for a memory load or store instruction may not involve using the
+   result of an instruction that might generate an UNPREDICTABLE
+   result.
+
+   (c) Within the trap shadow, no register may be used more than once
+   as a destination register.  (This is to make life easier for the
+   trap-handler.)
+
+   (d) The trap shadow may not include any branch instructions.  */
+
+static void
+alpha_handle_trap_shadows (void)
+{
+  struct shadow_summary shadow;
+  int trap_pending, exception_nesting;
+  rtx i, n;
+
+  trap_pending = 0;
+  exception_nesting = 0;
+  shadow.used.i = 0;
+  shadow.used.fp = 0;
+  shadow.used.mem = 0;
+  shadow.defd = shadow.used;
+
+  for (i = get_insns (); i ; i = NEXT_INSN (i))
+    {
+      if (NOTE_P (i))
+	{
+	  switch (NOTE_KIND (i))
+	    {
+	    case NOTE_INSN_EH_REGION_BEG:
+	      exception_nesting++;
+	      if (trap_pending)
+		goto close_shadow;
+	      break;
+
+	    case NOTE_INSN_EH_REGION_END:
+	      exception_nesting--;
+	      if (trap_pending)
+		goto close_shadow;
+	      break;
+
+	    case NOTE_INSN_EPILOGUE_BEG:
+	      if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
+		goto close_shadow;
+	      break;
+	    }
+	}
+      else if (trap_pending)
+	{
+	  if (alpha_tp == ALPHA_TP_FUNC)
+	    {
+	      if (JUMP_P (i)
+		  && GET_CODE (PATTERN (i)) == RETURN)
+		goto close_shadow;
+	    }
+	  else if (alpha_tp == ALPHA_TP_INSN)
+	    {
+	      if (optimize > 0)
+		{
+		  struct shadow_summary sum;
+
+		  sum.used.i = 0;
+		  sum.used.fp = 0;
+		  sum.used.mem = 0;
+		  sum.defd = sum.used;
+
+		  switch (GET_CODE (i))
+		    {
+		    case INSN:
+		      /* Annoyingly, get_attr_trap will die on these.  */
+		      if (GET_CODE (PATTERN (i)) == USE
+			  || GET_CODE (PATTERN (i)) == CLOBBER)
+			break;
+
+		      summarize_insn (PATTERN (i), &sum, 0);
+
+		      if ((sum.defd.i & shadow.defd.i)
+			  || (sum.defd.fp & shadow.defd.fp))
+			{
+			  /* (c) would be violated */
+			  goto close_shadow;
+			}
+
+		      /* Combine shadow with summary of current insn: */
+		      shadow.used.i   |= sum.used.i;
+		      shadow.used.fp  |= sum.used.fp;
+		      shadow.used.mem |= sum.used.mem;
+		      shadow.defd.i   |= sum.defd.i;
+		      shadow.defd.fp  |= sum.defd.fp;
+		      shadow.defd.mem |= sum.defd.mem;
+
+		      if ((sum.defd.i & shadow.used.i)
+			  || (sum.defd.fp & shadow.used.fp)
+			  || (sum.defd.mem & shadow.used.mem))
+			{
+			  /* (a) would be violated (also takes care of (b))  */
+			  gcc_assert (get_attr_trap (i) != TRAP_YES
+				      || (!(sum.defd.i & sum.used.i)
+					  && !(sum.defd.fp & sum.used.fp)));
+
+			  goto close_shadow;
+			}
+		      break;
+
+		    case JUMP_INSN:
+		    case CALL_INSN:
+		    case CODE_LABEL:
+		      goto close_shadow;
+
+		    default:
+		      gcc_unreachable ();
+		    }
+		}
+	      else
+		{
+		close_shadow:
+		  n = emit_insn_before (gen_trapb (), i);
+		  PUT_MODE (n, TImode);
+		  PUT_MODE (i, TImode);
+		  trap_pending = 0;
+		  shadow.used.i = 0;
+		  shadow.used.fp = 0;
+		  shadow.used.mem = 0;
+		  shadow.defd = shadow.used;
+		}
+	    }
+	}
+
+      if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
+	  && NONJUMP_INSN_P (i)
+	  && GET_CODE (PATTERN (i)) != USE
+	  && GET_CODE (PATTERN (i)) != CLOBBER
+	  && get_attr_trap (i) == TRAP_YES)
+	{
+	  if (optimize && !trap_pending)
+	    summarize_insn (PATTERN (i), &shadow, 0);
+	  trap_pending = 1;
+	}
+    }
+}
+
+/* Alpha can only issue instruction groups simultaneously if they are
+   suitably aligned.  This is very processor-specific.  */
+/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
+   that are marked "fake".  These instructions do not exist on that target,
+   but it is possible to see these insns with deranged combinations of 
+   command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
+   choose a result at random.  */
+
+enum alphaev4_pipe {
+  EV4_STOP = 0,
+  EV4_IB0 = 1,
+  EV4_IB1 = 2,
+  EV4_IBX = 4
+};
+
+enum alphaev5_pipe {
+  EV5_STOP = 0,
+  EV5_NONE = 1,
+  EV5_E01 = 2,
+  EV5_E0 = 4,
+  EV5_E1 = 8,
+  EV5_FAM = 16,
+  EV5_FA = 32,
+  EV5_FM = 64
+};
+
+static enum alphaev4_pipe
+alphaev4_insn_pipe (rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+    return EV4_STOP;
+  if (get_attr_length (insn) != 4)
+    return EV4_STOP;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ILD:
+    case TYPE_LDSYM:
+    case TYPE_FLD:
+    case TYPE_LD_L:
+      return EV4_IBX;
+
+    case TYPE_IADD:
+    case TYPE_ILOG:
+    case TYPE_ICMOV:
+    case TYPE_ICMP:
+    case TYPE_FST:
+    case TYPE_SHIFT:
+    case TYPE_IMUL:
+    case TYPE_FBR:
+    case TYPE_MVI:		/* fake */
+      return EV4_IB0;
+
+    case TYPE_IST:
+    case TYPE_MISC:
+    case TYPE_IBR:
+    case TYPE_JSR:
+    case TYPE_CALLPAL:
+    case TYPE_FCPYS:
+    case TYPE_FCMOV:
+    case TYPE_FADD:
+    case TYPE_FDIV:
+    case TYPE_FMUL:
+    case TYPE_ST_C:
+    case TYPE_MB:
+    case TYPE_FSQRT:		/* fake */
+    case TYPE_FTOI:		/* fake */
+    case TYPE_ITOF:		/* fake */
+      return EV4_IB1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static enum alphaev5_pipe
+alphaev5_insn_pipe (rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+    return EV5_STOP;
+  if (get_attr_length (insn) != 4)
+    return EV5_STOP;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ILD:
+    case TYPE_FLD:
+    case TYPE_LDSYM:
+    case TYPE_IADD:
+    case TYPE_ILOG:
+    case TYPE_ICMOV:
+    case TYPE_ICMP:
+      return EV5_E01;
+
+    case TYPE_IST:
+    case TYPE_FST:
+    case TYPE_SHIFT:
+    case TYPE_IMUL:
+    case TYPE_MISC:
+    case TYPE_MVI:
+    case TYPE_LD_L:
+    case TYPE_ST_C:
+    case TYPE_MB:
+    case TYPE_FTOI:		/* fake */
+    case TYPE_ITOF:		/* fake */
+      return EV5_E0;
+
+    case TYPE_IBR:
+    case TYPE_JSR:
+    case TYPE_CALLPAL:
+      return EV5_E1;
+
+    case TYPE_FCPYS:
+      return EV5_FAM;
+
+    case TYPE_FBR:
+    case TYPE_FCMOV:
+    case TYPE_FADD:
+    case TYPE_FDIV:
+    case TYPE_FSQRT:		/* fake */
+      return EV5_FA;
+
+    case TYPE_FMUL:
+      return EV5_FM;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* IN_USE is a mask of the slots currently filled within the insn group.
+   The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
+   the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
+
+   LEN is, of course, the length of the group in bytes.  */
+
+static rtx
+alphaev4_next_group (rtx insn, int *pin_use, int *plen)
+{
+  int len, in_use;
+
+  len = in_use = 0;
+
+  if (! INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == CLOBBER
+      || GET_CODE (PATTERN (insn)) == USE)
+    goto next_and_done;
+
+  while (1)
+    {
+      enum alphaev4_pipe pipe;
+
+      pipe = alphaev4_insn_pipe (insn);
+      switch (pipe)
+	{
+	case EV4_STOP:
+	  /* Force complex instructions to start new groups.  */
+	  if (in_use)
+	    goto done;
+
+	  /* If this is a completely unrecognized insn, it's an asm.
+	     We don't know how long it is, so record length as -1 to
+	     signal a needed realignment.  */
+	  if (recog_memoized (insn) < 0)
+	    len = -1;
+	  else
+	    len = get_attr_length (insn);
+	  goto next_and_done;
+
+	case EV4_IBX:
+	  if (in_use & EV4_IB0)
+	    {
+	      if (in_use & EV4_IB1)
+		goto done;
+	      in_use |= EV4_IB1;
+	    }
+	  else
+	    in_use |= EV4_IB0 | EV4_IBX;
+	  break;
+
+	case EV4_IB0:
+	  if (in_use & EV4_IB0)
+	    {
+	      if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
+		goto done;
+	      in_use |= EV4_IB1;
+	    }
+	  in_use |= EV4_IB0;
+	  break;
+
+	case EV4_IB1:
+	  if (in_use & EV4_IB1)
+	    goto done;
+	  in_use |= EV4_IB1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      len += 4;
+
+      /* Haifa doesn't do well scheduling branches.  */
+      if (JUMP_P (insn))
+	goto next_and_done;
+
+    next:
+      insn = next_nonnote_insn (insn);
+
+      if (!insn || ! INSN_P (insn))
+	goto done;
+
+      /* Let Haifa tell us where it thinks insn group boundaries are.  */
+      if (GET_MODE (insn) == TImode)
+	goto done;
+
+      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
+	goto next;
+    }
+
+ next_and_done:
+  insn = next_nonnote_insn (insn);
+
+ done:
+  *plen = len;
+  *pin_use = in_use;
+  return insn;
+}
+
+/* IN_USE is a mask of the slots currently filled within the insn group.
+   The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
+   the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
+
+   LEN is, of course, the length of the group in bytes.  */
+
+static rtx
+alphaev5_next_group (rtx insn, int *pin_use, int *plen)
+{
+  int len, in_use;
+
+  len = in_use = 0;
+
+  if (! INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == CLOBBER
+      || GET_CODE (PATTERN (insn)) == USE)
+    goto next_and_done;
+
+  while (1)
+    {
+      enum alphaev5_pipe pipe;
+
+      pipe = alphaev5_insn_pipe (insn);
+      switch (pipe)
+	{
+	case EV5_STOP:
+	  /* Force complex instructions to start new groups.  */
+	  if (in_use)
+	    goto done;
+
+	  /* If this is a completely unrecognized insn, it's an asm.
+	     We don't know how long it is, so record length as -1 to
+	     signal a needed realignment.  */
+	  if (recog_memoized (insn) < 0)
+	    len = -1;
+	  else
+	    len = get_attr_length (insn);
+	  goto next_and_done;
+
+	/* ??? Most of the places below, we would like to assert never
+	   happen, as it would indicate an error either in Haifa, or
+	   in the scheduling description.  Unfortunately, Haifa never
+	   schedules the last instruction of the BB, so we don't have
+	   an accurate TI bit to go off.  */
+	case EV5_E01:
+	  if (in_use & EV5_E0)
+	    {
+	      if (in_use & EV5_E1)
+		goto done;
+	      in_use |= EV5_E1;
+	    }
+	  else
+	    in_use |= EV5_E0 | EV5_E01;
+	  break;
+
+	case EV5_E0:
+	  if (in_use & EV5_E0)
+	    {
+	      if (!(in_use & EV5_E01) || (in_use & EV5_E1))
+		goto done;
+	      in_use |= EV5_E1;
+	    }
+	  in_use |= EV5_E0;
+	  break;
+
+	case EV5_E1:
+	  if (in_use & EV5_E1)
+	    goto done;
+	  in_use |= EV5_E1;
+	  break;
+
+	case EV5_FAM:
+	  if (in_use & EV5_FA)
+	    {
+	      if (in_use & EV5_FM)
+		goto done;
+	      in_use |= EV5_FM;
+	    }
+	  else
+	    in_use |= EV5_FA | EV5_FAM;
+	  break;
+
+	case EV5_FA:
+	  if (in_use & EV5_FA)
+	    goto done;
+	  in_use |= EV5_FA;
+	  break;
+
+	case EV5_FM:
+	  if (in_use & EV5_FM)
+	    goto done;
+	  in_use |= EV5_FM;
+	  break;
+
+	case EV5_NONE:
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      len += 4;
+
+      /* Haifa doesn't do well scheduling branches.  */
+      /* ??? If this is predicted not-taken, slotting continues, except
+	 that no more IBR, FBR, or JSR insns may be slotted.  */
+      if (JUMP_P (insn))
+	goto next_and_done;
+
+    next:
+      insn = next_nonnote_insn (insn);
+
+      if (!insn || ! INSN_P (insn))
+	goto done;
+
+      /* Let Haifa tell us where it thinks insn group boundaries are.  */
+      if (GET_MODE (insn) == TImode)
+	goto done;
+
+      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
+	goto next;
+    }
+
+ next_and_done:
+  insn = next_nonnote_insn (insn);
+
+ done:
+  *plen = len;
+  *pin_use = in_use;
+  return insn;
+}
+
+static rtx
+alphaev4_next_nop (int *pin_use)
+{
+  int in_use = *pin_use;
+  rtx nop;
+
+  if (!(in_use & EV4_IB0))
+    {
+      in_use |= EV4_IB0;
+      nop = gen_nop ();
+    }
+  else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
+    {
+      in_use |= EV4_IB1;
+      nop = gen_nop ();
+    }
+  else if (TARGET_FP && !(in_use & EV4_IB1))
+    {
+      in_use |= EV4_IB1;
+      nop = gen_fnop ();
+    }
+  else
+    nop = gen_unop ();
+
+  *pin_use = in_use;
+  return nop;
+}
+
+static rtx
+alphaev5_next_nop (int *pin_use)
+{
+  int in_use = *pin_use;
+  rtx nop;
+
+  if (!(in_use & EV5_E1))
+    {
+      in_use |= EV5_E1;
+      nop = gen_nop ();
+    }
+  else if (TARGET_FP && !(in_use & EV5_FA))
+    {
+      in_use |= EV5_FA;
+      nop = gen_fnop ();
+    }
+  else if (TARGET_FP && !(in_use & EV5_FM))
+    {
+      in_use |= EV5_FM;
+      nop = gen_fnop ();
+    }
+  else
+    nop = gen_unop ();
+
+  *pin_use = in_use;
+  return nop;
+}
+
+/* The instruction group alignment main loop.  */
+
+static void
+alpha_align_insns (unsigned int max_align,
+		   rtx (*next_group) (rtx, int *, int *),
+		   rtx (*next_nop) (int *))
+{
+  /* ALIGN is the known alignment for the insn group.  */
+  unsigned int align;
+  /* OFS is the offset of the current insn in the insn group.  */
+  int ofs;
+  int prev_in_use, in_use, len, ldgp;
+  rtx i, next;
+
+  /* Let shorten branches care for assigning alignments to code labels.  */
+  shorten_branches (get_insns ());
+
+  if (align_functions < 4)
+    align = 4;
+  else if ((unsigned int) align_functions < max_align)
+    align = align_functions;
+  else
+    align = max_align;
+
+  ofs = prev_in_use = 0;
+  i = get_insns ();
+  if (NOTE_P (i))
+    i = next_nonnote_insn (i);
+
+  ldgp = alpha_function_needs_gp ? 8 : 0;
+
+  while (i)
+    {
+      next = (*next_group) (i, &in_use, &len);
+
+      /* When we see a label, resync alignment etc.  */
+      if (LABEL_P (i))
+	{
+	  unsigned int new_align = 1 << label_to_alignment (i);
+
+	  if (new_align >= align)
+	    {
+	      align = new_align < max_align ? new_align : max_align;
+	      ofs = 0;
+	    }
+
+	  else if (ofs & (new_align-1))
+	    ofs = (ofs | (new_align-1)) + 1;
+	  gcc_assert (!len);
+	}
+
+      /* Handle complex instructions special.  */
+      else if (in_use == 0)
+	{
+	  /* Asms will have length < 0.  This is a signal that we have
+	     lost alignment knowledge.  Assume, however, that the asm
+	     will not mis-align instructions.  */
+	  if (len < 0)
+	    {
+	      ofs = 0;
+	      align = 4;
+	      len = 0;
+	    }
+	}
+
+      /* If the known alignment is smaller than the recognized insn group,
+	 realign the output.  */
+      else if ((int) align < len)
+	{
+	  unsigned int new_log_align = len > 8 ? 4 : 3;
+	  rtx prev, where;
+
+	  where = prev = prev_nonnote_insn (i);
+	  if (!where || !LABEL_P (where))
+	    where = i;
+
+	  /* Can't realign between a call and its gp reload.  */
+	  if (! (TARGET_EXPLICIT_RELOCS
+		 && prev && CALL_P (prev)))
+	    {
+	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
+	      align = 1 << new_log_align;
+	      ofs = 0;
+	    }
+	}
+
+      /* We may not insert padding inside the initial ldgp sequence.  */
+      else if (ldgp > 0)
+	ldgp -= len;
+
+      /* If the group won't fit in the same INT16 as the previous,
+	 we need to add padding to keep the group together.  Rather
+	 than simply leaving the insn filling to the assembler, we
+	 can make use of the knowledge of what sorts of instructions
+	 were issued in the previous group to make sure that all of
+	 the added nops are really free.  */
+      else if (ofs + len > (int) align)
+	{
+	  int nop_count = (align - ofs) / 4;
+	  rtx where;
+
+	  /* Insert nops before labels, branches, and calls to truly merge
+	     the execution of the nops with the previous instruction group.  */
+	  where = prev_nonnote_insn (i);
+	  if (where)
+	    {
+	      if (LABEL_P (where))
+		{
+		  rtx where2 = prev_nonnote_insn (where);
+		  if (where2 && JUMP_P (where2))
+		    where = where2;
+		}
+	      else if (NONJUMP_INSN_P (where))
+		where = i;
+	    }
+	  else
+	    where = i;
+
+	  do
+	    emit_insn_before ((*next_nop)(&prev_in_use), where);
+	  while (--nop_count);
+	  ofs = 0;
+	}
+
+      ofs = (ofs + len) & (align - 1);
+      prev_in_use = in_use;
+      i = next;
+    }
+}
+
+/* Insert an unop between sibcall or noreturn function call and GP load.  */
+
+static void
+alpha_pad_function_end (void)
+{
+  rtx insn, next;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (!CALL_P (insn)
+	  || !(SIBLING_CALL_P (insn)
+	       || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
+        continue;
+
+      /* Make sure we do not split a call and its corresponding
+	 CALL_ARG_LOCATION note.  */
+      next = NEXT_INSN (insn);
+      if (next == NULL)
+	continue;
+      if (BARRIER_P (next))
+	{
+	  next = NEXT_INSN (next);
+	  if (next == NULL)
+	    continue;
+	}
+      if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
+	insn = next;
+
+      next = next_active_insn (insn);
+      if (next)
+	{
+	  rtx pat = PATTERN (next);
+
+	  if (GET_CODE (pat) == SET
+	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
+	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
+	    emit_insn_after (gen_unop (), insn);
+	}
+    }
+}
+
+/* Machine dependent reorg pass.  */
+
+static void
+alpha_reorg (void)
+{
+  /* Workaround for a linker error that triggers when an exception
+     handler immediatelly follows a sibcall or a noreturn function.
+
+In the sibcall case:
+
+     The instruction stream from an object file:
+
+ 1d8:   00 00 fb 6b     jmp     (t12)
+ 1dc:   00 00 ba 27     ldah    gp,0(ra)
+ 1e0:   00 00 bd 23     lda     gp,0(gp)
+ 1e4:   00 00 7d a7     ldq     t12,0(gp)
+ 1e8:   00 40 5b 6b     jsr     ra,(t12),1ec <__funcZ+0x1ec>
+
+     was converted in the final link pass to:
+
+   12003aa88:   67 fa ff c3     br      120039428 <...>
+   12003aa8c:   00 00 fe 2f     unop
+   12003aa90:   00 00 fe 2f     unop
+   12003aa94:   48 83 7d a7     ldq     t12,-31928(gp)
+   12003aa98:   00 40 5b 6b     jsr     ra,(t12),12003aa9c <__func+0x1ec>
+
+And in the noreturn case:
+
+     The instruction stream from an object file:
+
+  54:   00 40 5b 6b     jsr     ra,(t12),58 <__func+0x58>
+  58:   00 00 ba 27     ldah    gp,0(ra)
+  5c:   00 00 bd 23     lda     gp,0(gp)
+  60:   00 00 7d a7     ldq     t12,0(gp)
+  64:   00 40 5b 6b     jsr     ra,(t12),68 <__func+0x68>
+
+     was converted in the final link pass to:
+
+   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
+   fdb28:       00 00 fe 2f     unop
+   fdb2c:       00 00 fe 2f     unop
+   fdb30:       30 82 7d a7     ldq     t12,-32208(gp)
+   fdb34:       00 40 5b 6b     jsr     ra,(t12),fdb38 <__func+0x68>
+
+     GP load instructions were wrongly cleared by the linker relaxation
+     pass.  This workaround prevents removal of GP loads by inserting
+     an unop instruction between a sibcall or noreturn function call and
+     exception handler prologue.  */
+
+  if (current_function_has_exception_handlers ())
+    alpha_pad_function_end ();
+
+  if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
+    alpha_handle_trap_shadows ();
+
+  /* Due to the number of extra trapb insns, don't bother fixing up
+     alignment when trap precision is instruction.  Moreover, we can
+     only do our job when sched2 is run.  */
+  if (optimize && !optimize_size
+      && alpha_tp != ALPHA_TP_INSN
+      && flag_schedule_insns_after_reload)
+    {
+      if (alpha_tune == PROCESSOR_EV4)
+	alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
+      else if (alpha_tune == PROCESSOR_EV5)
+	alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
+    }
+}
+
+static void
+alpha_file_start (void)
+{
+  default_file_start ();
+
+  fputs ("\t.set noreorder\n", asm_out_file);
+  fputs ("\t.set volatile\n", asm_out_file);
+  if (TARGET_ABI_OSF)
+    fputs ("\t.set noat\n", asm_out_file);
+  if (TARGET_EXPLICIT_RELOCS)
+    fputs ("\t.set nomacro\n", asm_out_file);
+  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
+    {
+      const char *arch;
+
+      if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
+	arch = "ev6";
+      else if (TARGET_MAX)
+	arch = "pca56";
+      else if (TARGET_BWX)
+	arch = "ev56";
+      else if (alpha_cpu == PROCESSOR_EV5)
+	arch = "ev5";
+      else
+	arch = "ev4";
+
+      fprintf (asm_out_file, "\t.arch %s\n", arch);
+    }
+}
+
+/* Since we don't have a .dynbss section, we should not allow global
+   relocations in the .rodata section.  */
+
+static int
+alpha_elf_reloc_rw_mask (void)
+{
+  return flag_pic ? 3 : 2;
+}
+
+/* Return a section for X.  The only special thing we do here is to
+   honor small data.  */
+
+static section *
+alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
+			      unsigned HOST_WIDE_INT align)
+{
+  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
+    /* ??? Consider using mergeable sdata sections.  */
+    return sdata_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+static unsigned int
+alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = 0;
+
+  if (strcmp (name, ".sdata") == 0
+      || strncmp (name, ".sdata.", 7) == 0
+      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
+      || strcmp (name, ".sbss") == 0
+      || strncmp (name, ".sbss.", 6) == 0
+      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
+    flags = SECTION_SMALL;
+
+  flags |= default_section_type_flags (decl, name, reloc);
+  return flags;
+}
+
+/* Structure to collect function names for final output in link section.  */
+/* Note that items marked with GTY can't be ifdef'ed out.  */
+
+enum reloc_kind
+{
+  KIND_LINKAGE,
+  KIND_CODEADDR
+};
+
+struct GTY(()) alpha_links
+{
+  rtx func;
+  rtx linkage;
+  enum reloc_kind rkind;
+};
+
+#if TARGET_ABI_OPEN_VMS
+
+/* Return the VMS argument type corresponding to MODE.  */
+
+enum avms_arg_type
+alpha_arg_type (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return TARGET_FLOAT_VAX ? FF : FS;
+    case DFmode:
+      return TARGET_FLOAT_VAX ? FD : FT;
+    default:
+      return I64;
+    }
+}
+
+/* Return an rtx for an integer representing the VMS Argument Information
+   register value.  */
+
+rtx
+alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
+{
+  unsigned HOST_WIDE_INT regval = cum.num_args;
+  int i;
+
+  for (i = 0; i < 6; i++)
+    regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
+
+  return GEN_INT (regval);
+}
+
+
+/* Return a SYMBOL_REF representing the reference to the .linkage entry
+   of function FUNC built for calls made from CFUNDECL.  LFLAG is 1 if
+   this is the reference to the linkage pointer value, 0 if this is the
+   reference to the function entry value.  RFLAG is 1 if this a reduced
+   reference (code address only), 0 if this is a full reference.  */
+
+rtx
+alpha_use_linkage (rtx func, bool lflag, bool rflag)
+{
+  struct alpha_links *al = NULL;
+  const char *name = XSTR (func, 0);
+
+  if (cfun->machine->links)
+    {
+      splay_tree_node lnode;
+
+      /* Is this name already defined?  */
+      lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name);
+      if (lnode)
+	al = (struct alpha_links *) lnode->value;
+    }
+  else
+    cfun->machine->links = splay_tree_new_ggc
+      ((splay_tree_compare_fn) strcmp,
+       ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
+       ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
+
+  if (al == NULL)
+    {
+      size_t buf_len;
+      char *linksym;
+      tree id;
+
+      if (name[0] == '*')
+	name++;
+
+      /* Follow transparent alias, as this is used for CRTL translations.  */
+      id = maybe_get_identifier (name);
+      if (id)
+        {
+          while (IDENTIFIER_TRANSPARENT_ALIAS (id))
+            id = TREE_CHAIN (id);
+          name = IDENTIFIER_POINTER (id);
+        }
+
+      buf_len = strlen (name) + 8 + 9;
+      linksym = (char *) alloca (buf_len);
+      snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
+
+      al = ggc_alloc_alpha_links ();
+      al->func = func;
+      al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
+
+      splay_tree_insert (cfun->machine->links,
+                         (splay_tree_key) ggc_strdup (name),
+			 (splay_tree_value) al);
+    }
+
+  al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
+
+  if (lflag)
+    return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
+  else
+    return al->linkage;
+}
+
+static int
+alpha_write_one_linkage (splay_tree_node node, void *data)
+{
+  const char *const name = (const char *) node->key;
+  struct alpha_links *link = (struct alpha_links *) node->value;
+  FILE *stream = (FILE *) data;
+
+  ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
+  if (link->rkind == KIND_CODEADDR)
+    {
+      /* External and used, request code address.  */
+      fprintf (stream, "\t.code_address ");
+    }
+  else
+    {
+      if (!SYMBOL_REF_EXTERNAL_P (link->func)
+          && SYMBOL_REF_LOCAL_P (link->func))
+	{
+	  /* Locally defined, build linkage pair.  */
+	  fprintf (stream, "\t.quad %s..en\n", name);
+	  fprintf (stream, "\t.quad ");
+	}
+      else
+	{
+	  /* External, request linkage pair.  */
+	  fprintf (stream, "\t.linkage ");
+	}
+    }
+  assemble_name (stream, name);
+  fputs ("\n", stream);
+
+  return 0;
+}
+
+static void
+alpha_write_linkage (FILE *stream, const char *funname)
+{
+  fprintf (stream, "\t.link\n");
+  fprintf (stream, "\t.align 3\n");
+  in_section = NULL;
+
+#ifdef TARGET_VMS_CRASH_DEBUG
+  fputs ("\t.name ", stream);
+  assemble_name (stream, funname);
+  fputs ("..na\n", stream);
+#endif
+
+  ASM_OUTPUT_LABEL (stream, funname);
+  fprintf (stream, "\t.pdesc ");
+  assemble_name (stream, funname);
+  fprintf (stream, "..en,%s\n",
+	   alpha_procedure_type == PT_STACK ? "stack"
+	   : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
+
+  if (cfun->machine->links)
+    {
+      splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream);
+      /* splay_tree_delete (func->links); */
+    }
+}
+
+/* Switch to an arbitrary section NAME with attributes as specified
+   by FLAGS.  ALIGN specifies any known alignment requirements for
+   the section; 0 if the default should be used.  */
+
+static void
+vms_asm_named_section (const char *name, unsigned int flags, 
+		       tree decl ATTRIBUTE_UNUSED)
+{
+  fputc ('\n', asm_out_file);
+  fprintf (asm_out_file, ".section\t%s", name);
+
+  if (flags & SECTION_DEBUG)
+    fprintf (asm_out_file, ",NOWRT");
+
+  fputc ('\n', asm_out_file);
+}
+
+/* Record an element in the table of global constructors.  SYMBOL is
+   a SYMBOL_REF of the function to be called; PRIORITY is a number
+   between 0 and MAX_INIT_PRIORITY.
+
+   Differs from default_ctors_section_asm_out_constructor in that the
+   width of the .ctors entry is always 64 bits, rather than the 32 bits
+   used by a normal pointer.  */
+
+static void
+vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (ctors_section);
+  assemble_align (BITS_PER_WORD);
+  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
+}
+
+static void
+vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (dtors_section);
+  assemble_align (BITS_PER_WORD);
+  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
+}
+#else
+rtx
+alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
+		   bool lflag ATTRIBUTE_UNUSED,
+		   bool rflag ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+#endif /* TARGET_ABI_OPEN_VMS */
+
+static void
+alpha_init_libfuncs (void)
+{
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      /* Use the VMS runtime library functions for division and
+	 remainder.  */
+      set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
+      set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
+      set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
+      set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
+      set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
+      set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
+      set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
+      set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
+      abort_libfunc = init_one_libfunc ("decc$abort");
+      memcmp_libfunc = init_one_libfunc ("decc$memcmp");
+#ifdef MEM_LIBFUNCS_INIT
+      MEM_LIBFUNCS_INIT;
+#endif
+    }
+}
+
+/* On the Alpha, we use this to disable the floating-point registers
+   when they don't exist.  */
+
+static void
+alpha_conditional_register_usage (void)
+{
+  int i;
+  if (! TARGET_FPREGS)
+    for (i = 32; i < 63; i++)
+      fixed_regs[i] = call_used_regs[i] = 1;
+}
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+
+static void
+alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			       bool op0_preserve_value)
+{
+  if (!op0_preserve_value
+      && (*code == GE || *code == GT || *code == GEU || *code == GTU)
+      && (REG_P (*op1) || *op1 == const0_rtx))
+    {
+      rtx tem = *op0;
+      *op0 = *op1;
+      *op1 = tem;
+      *code = (int)swap_condition ((enum rtx_code)*code);
+    }
+
+  if ((*code == LT || *code == LTU)
+      && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
+    {
+      *code = *code == LT ? LE : LEU;
+      *op1 = GEN_INT (255);
+    }
+}
+
+/* Initialize the GCC target structure.  */
+#if TARGET_ABI_OPEN_VMS
+# undef TARGET_ATTRIBUTE_TABLE
+# define TARGET_ATTRIBUTE_TABLE vms_attribute_table
+# undef TARGET_CAN_ELIMINATE
+# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
+#endif
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+/* Default unaligned ops are provided for ELF systems.  To get unaligned
+   data for non-ELF systems, we have to turn off auto alignment.  */
+#if TARGET_ABI_OPEN_VMS
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
+#endif
+
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK  alpha_elf_reloc_rw_mask
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION  alpha_elf_select_rtx_section
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  alpha_elf_section_type_flags
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START alpha_file_start
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  alpha_multipass_dfa_lookahead
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL  alpha_builtin_decl
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS alpha_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
+#undef  TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN alpha_fold_builtin
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
+
+#if TARGET_ABI_OSF
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+#undef TARGET_STDARG_OPTIMIZE_HOOK
+#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
+#endif
+
+/* Use 16-bits anchor.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS alpha_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG alpha_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
+
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD alpha_secondary_reload
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
+
+/* The Alpha architecture does not require sequential consistency.  See
+   http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
+   for an example of how it can be violated in practice.  */
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING true
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE alpha_option_override
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE alpha_mangle_type
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+#include "gt-alpha.h"
diff --git a/gcc-4.9/gcc/config/alpha/alpha.h b/gcc-4.9/gcc/config/alpha/alpha.h
new file mode 100644
index 000000000..0ff793f14
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/alpha.h
@@ -0,0 +1,1074 @@
+/* Definitions of target machine for GNU compiler, for DEC Alpha.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+	builtin_define ("__alpha");			\
+	builtin_define ("__alpha__");			\
+	builtin_assert ("cpu=alpha");			\
+	builtin_assert ("machine=alpha");		\
+	if (TARGET_CIX)					\
+	  {						\
+	    builtin_define ("__alpha_cix__");		\
+	    builtin_assert ("cpu=cix");			\
+	  }						\
+	if (TARGET_FIX)					\
+	  {						\
+	    builtin_define ("__alpha_fix__");		\
+	    builtin_assert ("cpu=fix");			\
+	  }						\
+	if (TARGET_BWX)					\
+	  {						\
+	    builtin_define ("__alpha_bwx__");		\
+	    builtin_assert ("cpu=bwx");			\
+	  }						\
+	if (TARGET_MAX)					\
+	  {						\
+	    builtin_define ("__alpha_max__");		\
+	    builtin_assert ("cpu=max");			\
+	  }						\
+	if (alpha_cpu == PROCESSOR_EV6)			\
+	  {						\
+	    builtin_define ("__alpha_ev6__");		\
+	    builtin_assert ("cpu=ev6");			\
+	  }						\
+	else if (alpha_cpu == PROCESSOR_EV5)		\
+	  {						\
+	    builtin_define ("__alpha_ev5__");		\
+	    builtin_assert ("cpu=ev5");			\
+	  }						\
+	else	/* Presumably ev4.  */			\
+	  {						\
+	    builtin_define ("__alpha_ev4__");		\
+	    builtin_assert ("cpu=ev4");			\
+	  }						\
+	if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT)	\
+	  builtin_define ("_IEEE_FP");			\
+	if (TARGET_IEEE_WITH_INEXACT)			\
+	  builtin_define ("_IEEE_FP_INEXACT");		\
+	if (TARGET_LONG_DOUBLE_128)			\
+	  builtin_define ("__LONG_DOUBLE_128__");	\
+							\
+	/* Macros dependent on the C dialect.  */	\
+	SUBTARGET_LANGUAGE_CPP_BUILTINS();		\
+} while (0)
+
+#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS
+#define SUBTARGET_LANGUAGE_CPP_BUILTINS()		\
+  do							\
+    {							\
+      if (preprocessing_asm_p ())			\
+	builtin_define_std ("LANGUAGE_ASSEMBLY");	\
+      else if (c_dialect_cxx ())			\
+	{						\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS");	\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS__");	\
+	}						\
+      else						\
+	builtin_define_std ("LANGUAGE_C");		\
+      if (c_dialect_objc ())				\
+	{						\
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C");	\
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C__");	\
+	}						\
+    }							\
+  while (0)
+#endif
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* Which processor to schedule for. The cpu attribute defines a list that
+   mirrors this list, so changes to alpha.md must be made at the same time.  */
+
+enum processor_type
+{
+  PROCESSOR_EV4,			/* 2106[46]{a,} */
+  PROCESSOR_EV5,			/* 21164{a,pc,} */
+  PROCESSOR_EV6,			/* 21264 */
+  PROCESSOR_MAX
+};
+
+extern enum processor_type alpha_cpu;
+extern enum processor_type alpha_tune;
+
+enum alpha_trap_precision
+{
+  ALPHA_TP_PROG,	/* No precision (default).  */
+  ALPHA_TP_FUNC,      	/* Trap contained within originating function.  */
+  ALPHA_TP_INSN		/* Instruction accuracy and code is resumption safe.  */
+};
+
+enum alpha_fp_rounding_mode
+{
+  ALPHA_FPRM_NORM,	/* Normal rounding mode.  */
+  ALPHA_FPRM_MINF,	/* Round towards minus-infinity.  */
+  ALPHA_FPRM_CHOP,	/* Chopped rounding mode (towards 0).  */
+  ALPHA_FPRM_DYN	/* Dynamic rounding mode.  */
+};
+
+enum alpha_fp_trap_mode
+{
+  ALPHA_FPTM_N,		/* Normal trap mode.  */
+  ALPHA_FPTM_U,		/* Underflow traps enabled.  */
+  ALPHA_FPTM_SU,	/* Software completion, w/underflow traps */
+  ALPHA_FPTM_SUI	/* Software completion, w/underflow & inexact traps */
+};
+
+extern enum alpha_trap_precision alpha_tp;
+extern enum alpha_fp_rounding_mode alpha_fprm;
+extern enum alpha_fp_trap_mode alpha_fptm;
+
+/* Invert the easy way to make options work.  */
+#define TARGET_FP	(!TARGET_SOFT_FP)
+
+/* These are for target os support and cannot be changed at runtime.  */
+#define TARGET_ABI_OPEN_VMS	0
+#define TARGET_ABI_OSF		(!TARGET_ABI_OPEN_VMS)
+
+#ifndef TARGET_CAN_FAULT_IN_PROLOGUE
+#define TARGET_CAN_FAULT_IN_PROLOGUE 0
+#endif
+#ifndef TARGET_HAS_XFLOATING_LIBS
+#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128
+#endif
+#ifndef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 0
+#endif
+#ifndef TARGET_FIXUP_EV5_PREFETCH
+#define TARGET_FIXUP_EV5_PREFETCH 0
+#endif
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#define TARGET_DEFAULT MASK_FPREGS
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS
+#ifdef HAVE_AS_EXPLICIT_RELOCS
+#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS
+#define TARGET_SUPPORT_ARCH 1
+#else
+#define TARGET_DEFAULT_EXPLICIT_RELOCS 0
+#endif
+#endif
+
+#ifndef TARGET_SUPPORT_ARCH
+#define TARGET_SUPPORT_ARCH 0
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+
+/* target machine storage layout */
+
+/* Define the size of `int'.  The default is the same as the word size.  */
+#define INT_TYPE_SIZE 32
+
+/* Define the size of `long long'.  The default is the twice the word size.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* The two floating-point formats we support are S-floating, which is
+   4 bytes, and T-floating, which is 8 bytes.  `float' is S and `double'
+   and `long double' are T.  */
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Work around target_flags dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+#define	WCHAR_TYPE "unsigned int"
+#define	WCHAR_TYPE_SIZE 32
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   For Alpha, we always store objects in a full register.  32-bit integers
+   are always sign-extended, but smaller objects retain their signedness.
+
+   Note that small vector types can get mapped onto integer modes at the
+   whim of not appearing in alpha-modes.def.  We never promoted these
+   values before; don't do so now that we've trimmed the set of modes to
+   those actually implemented in the backend.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)			\
+  if (GET_MODE_CLASS (MODE) == MODE_INT				\
+      && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE)	\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+    {								\
+      if ((MODE) == SImode)					\
+	(UNSIGNEDP) = 0;					\
+      (MODE) = DImode;						\
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+
+   There are no such instructions on the Alpha, but the documentation
+   is little endian.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   This is false on the Alpha.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+
+   For Alpha we can decide arbitrarily since there are no machine instructions
+   for them.  Might as well be consistent with bytes.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 8
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 64
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 128
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 64
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#undef PCC_BITFILED_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* For atomic access to objects, must have at least 32-bit alignment
+   unless the machine has byte operations.  */
+#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32))
+
+/* Align all constants and variables to at least a word boundary so
+   we can pick up pieces of them faster.  */
+/* ??? Only if block-move stuff knows about different source/destination
+   alignment.  */
+#if 0
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD)
+#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD)
+#endif
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.
+
+   Since we get an error message when we do one, call them invalid.  */
+
+#define STRICT_ALIGNMENT 1
+
+/* Set this nonzero if unaligned move instructions are extremely slow.
+
+   On the Alpha, they trap.  */
+
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   We define all 32 integer registers, even though $31 is always zero,
+   and all 32 floating-point registers, even though $f31 is also
+   always zero.  We do not bother defining the FP status register and
+   there are no other registers.
+
+   Since $31 is always zero, we will use register number 31 as the
+   argument pointer.  It will never appear in the generated code
+   because we will always be eliminating it in favor of the stack
+   pointer or hardware frame pointer.
+
+   Likewise, we use $f31 for the frame pointer, which will always
+   be eliminated in favor of the hardware frame pointer or the
+   stack pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 64
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, \
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  */
+
+#define REG_ALLOC_ORDER { \
+   1, 2, 3, 4, 5, 6, 7, 8,	/* nonsaved integer registers */	\
+   22, 23, 24, 25, 28,		/* likewise */				\
+   0,				/* likewise, but return value */	\
+   21, 20, 19, 18, 17, 16,	/* likewise, but input args */		\
+   27,				/* likewise, but OSF procedure value */	\
+									\
+   42, 43, 44, 45, 46, 47,	/* nonsaved floating-point registers */	\
+   54, 55, 56, 57, 58, 59,	/* likewise */				\
+   60, 61, 62,			/* likewise */				\
+   32, 33,			/* likewise, but return values */	\
+   53, 52, 51, 50, 49, 48,	/* likewise, but input args */		\
+									\
+   9, 10, 11, 12, 13, 14,	/* saved integer registers */		\
+   26,				/* return address */			\
+   15,				/* hard frame pointer */		\
+									\
+   34, 35, 36, 37, 38, 39,	/* saved floating-point registers */	\
+   40, 41,			/* likewise */				\
+									\
+   29, 30, 31, 63		/* gp, sp, ap, sfp */			\
+}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On Alpha, the integer registers can hold any mode.  The floating-point
+   registers can hold 64-bit integers as well, but not smaller values.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 				\
+  (IN_RANGE ((REGNO), 32, 62)						\
+   ? (MODE) == SFmode || (MODE) == DFmode || (MODE) == DImode		\
+     || (MODE) == SCmode || (MODE) == DCmode				\
+   : 1)
+
+/* A C expression that is nonzero if a value of mode
+   MODE1 is accessible in mode MODE2 without copying.
+
+   This asymmetric test is true when MODE1 could be put
+   in an FP register but MODE2 could not.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 				\
+  (HARD_REGNO_MODE_OK (32, (MODE1))				\
+   ? HARD_REGNO_MODE_OK (32, (MODE2))				\
+   : 1)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Alpha pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 30
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 15
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 31
+
+/* Base register for access to local variables of function.  */
+#define FRAME_POINTER_REGNUM 63
+
+/* Register in which static-chain is passed to a function.
+
+   For the Alpha, this is based on an example; the calling sequence
+   doesn't seem to specify this.  */
+#define STATIC_CHAIN_REGNUM 1
+
+/* The register number of the register used to address a table of
+   static data addresses in memory.  */
+#define PIC_OFFSET_TABLE_REGNUM 29
+
+/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM'
+   is clobbered by calls.  */
+/* ??? It is and it isn't.  It's required to be valid for a given
+   function when the function returns.  It isn't clobbered by
+   current_file functions.  Moreover, we do not expose the ldgp
+   until after reload, so we're probably safe.  */
+/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class {
+  NO_REGS, R0_REG, R24_REG, R25_REG, R27_REG,
+  GENERAL_REGS, FLOAT_REGS, ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES					\
+ {"NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG",	\
+  "GENERAL_REGS", "FLOAT_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS				\
+{ {0x00000000, 0x00000000},	/* NO_REGS */		\
+  {0x00000001, 0x00000000},	/* R0_REG */		\
+  {0x01000000, 0x00000000},	/* R24_REG */		\
+  {0x02000000, 0x00000000},	/* R25_REG */		\
+  {0x08000000, 0x00000000},	/* R27_REG */		\
+  {0xffffffff, 0x80000000},	/* GENERAL_REGS */	\
+  {0x00000000, 0x7fffffff},	/* FLOAT_REGS */	\
+  {0xffffffff, 0xffffffff} }
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)			\
+ ((REGNO) == 0 ? R0_REG				\
+  : (REGNO) == 24 ? R24_REG			\
+  : (REGNO) == 25 ? R25_REG			\
+  : (REGNO) == 27 ? R27_REG			\
+  : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS	\
+  : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+
+#define PREFERRED_RELOAD_CLASS  alpha_preferred_reload_class
+
+/* If we are copying between general and FP registers, we need a memory
+   location unless the FIX extension is available.  */
+
+#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \
+ (! TARGET_FIX && (((CLASS1) == FLOAT_REGS && (CLASS2) != FLOAT_REGS) \
+                   || ((CLASS2) == FLOAT_REGS && (CLASS1) != FLOAT_REGS)))
+
+/* Specify the mode to be used for memory when a secondary memory
+   location is needed.  If MODE is floating-point, use it.  Otherwise,
+   widen to a word like the default.  This is needed because we always
+   store integers in FP registers in quadword format.  This whole
+   area is very tricky! */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+  (GET_MODE_CLASS (MODE) == MODE_FLOAT ? (MODE)		\
+   : GET_MODE_SIZE (MODE) >= 4 ? (MODE)			\
+   : mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0))
+
+/* Return the class of registers that cannot change mode from FROM to TO.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
+   ? reg_classes_intersect_p (FLOAT_REGS, CLASS) : 0)
+
+/* Define the cost of moving between registers of various classes.  Moving
+   between FLOAT_REGS and anything else except float regs is expensive.
+   In fact, we make it quite expensive because we really don't want to
+   do these moves unless it is clearly worth it.  Optimizations may
+   reduce the impact of not being able to allocate a pseudo to a
+   hard register.  */
+
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)		\
+  (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS)	? 2	\
+   : TARGET_FIX ? ((CLASS1) == FLOAT_REGS ? 6 : 8)		\
+   : 4+2*alpha_memory_latency)
+
+/* A C expressions returning the cost of moving data of MODE from a register to
+   or from memory.
+
+   On the Alpha, bump this up a bit.  */
+
+extern int alpha_memory_latency;
+#define MEMORY_MOVE_COST(MODE,CLASS,IN)  (2*alpha_memory_latency)
+
+/* Provide the cost of a branch.  Exact meaning under development.  */
+#define BRANCH_COST(speed_p, predictable_p) 5
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+/* #define FRAME_GROWS_DOWNWARD 0 */
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On Alpha, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Define this to be nonzero if stack checking is built into the ABI.  */
+#define STACK_CHECK_BUILTIN 1
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the Alpha.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+
+#define ELIMINABLE_REGS				     \
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	     \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},   \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},	     \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+/* Round up to a multiple of 16 bytes.  */
+#define ALPHA_ROUND(X) (((X) + 15) & ~ 15)
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = alpha_initial_elimination_offset(FROM, TO))
+
+/* Define this if stack space is still allocated for a parameter passed
+   in a register.  */
+/* #define REG_PARM_STACK_SPACE */
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.
+
+   On Alpha the value is found in $0 for integer functions and
+   $f0 for floating-point functions.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  function_value (VALTYPE, FUNC, VOIDmode)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) \
+  function_value (NULL, NULL, MODE)
+
+/* 1 if N is a possible register number for a function value
+   as seen by the caller.  */
+
+#define FUNCTION_VALUE_REGNO_P(N)  \
+  ((N) == 0 || (N) == 1 || (N) == 32 || (N) == 33)
+
+/* 1 if N is a possible register number for function argument passing.
+   On Alpha, these are $16-$21 and $f16-$f21.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On Alpha, this is a single integer, which is a number of words
+   of arguments scanned so far.
+   Thus 6 or more means all following args should go on the stack.  */
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+/* Define intermediate macro to compute the size (in registers) of an argument
+   for the Alpha.  */
+
+#define ALPHA_ARG_SIZE(MODE, TYPE, NAMED)				\
+  ((MODE) == TFmode || (MODE) == TCmode ? 1				\
+   : (((MODE) == BLKmode ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \
+      + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
+
+/* Make (or fake) .linkage entry for function call.
+   IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.  */
+
+/* This macro defines the start of an assembly comment.  */
+
+#define ASM_COMMENT_START " #"
+
+/* This macro produces the initial definition of a function.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \
+  alpha_start_function(FILE,NAME,DECL);
+
+/* This macro closes up a function definition for the assembler.  */
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE,NAME,DECL) \
+  alpha_end_function(FILE,NAME,DECL)
+
+/* Output any profiling code before the prologue.  */
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Never use profile counters.  */
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  Under OSF/1, profiling is enabled
+   by simply passing -pg to the assembler and linker.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define registers used by the epilogue and return instruction.  */
+
+#define EPILOGUE_USES(REGNO)	((REGNO) == 26)
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE    32
+
+/* The alignment of a trampoline, in bits.  */
+
+#define TRAMPOLINE_ALIGNMENT  64
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of
+   the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined.  */
+
+#define RETURN_ADDR_RTX  alpha_return_addr
+
+/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders
+   can use DWARF_ALT_FRAME_RETURN_COLUMN defined below.  This is just the same
+   as the default definition in dwarf2out.c.  */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG)
+
+/* Before the prologue, RA lives in $26.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, 26)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26)
+#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64)
+#define DWARF_ZERO_REG 31
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) < 4 ? (N) + 16 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 28)
+#define EH_RETURN_HANDLER_RTX \
+  gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx, \
+				     crtl->outgoing_args_size))
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32  \
+ || (REGNO) == 63 || reg_renumber[REGNO] == 63)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Recognize any constant value that is a valid address.  For the Alpha,
+   there are only constants none since we want to use LDA to load any
+   symbolic addresses into registers.  */
+
+#define CONSTANT_ADDRESS_P(X)   \
+  (CONST_INT_P (X)		\
+   && (unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define NONSTRICT_REG_OK_FOR_BASE_P(X)  \
+  (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* ??? Nonzero if X is the frame pointer, or some virtual register
+   that may eliminate to the frame pointer.  These will be allowed to
+   have offsets greater than 32K.  This is done because register
+   elimination offsets will change the hi/lo split, and if we split
+   before reload, we will require additional instructions.  */
+#define NONSTRICT_REG_OK_FP_BASE_P(X)		\
+  (REGNO (X) == 31 || REGNO (X) == 63		\
+   || (REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+       && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X)	STRICT_REG_OK_FOR_BASE_P (X)
+#else
+#define REG_OK_FOR_BASE_P(X)	NONSTRICT_REG_OK_FOR_BASE_P (X)
+#endif
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN)		     \
+do {									     \
+  rtx new_x = alpha_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \
+  if (new_x)								     \
+    {									     \
+      X = new_x;							     \
+      goto WIN;								     \
+    }									     \
+} while (0)
+
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+
+   Do not define this if the table should contain absolute addresses.
+   On the Alpha, the table is really GP-relative, not relative to the PC
+   of the table, but we pretend that it is PC-relative; this should be OK,
+   but we should try to find some better way sometime.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move to or from memory
+   in one reasonably fast instruction.  */
+
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.
+
+   Without byte/word accesses, we want no more than four instructions;
+   with, several single byte accesses are better.  */
+
+#define MOVE_RATIO(speed)  (TARGET_BWX ? 7 : 2)
+
+/* Largest number of bytes of an object that can be placed in a register.
+   On the Alpha we have plenty of registers, so use TImode.  */
+#define MAX_FIXED_MODE_SIZE	GET_MODE_BITSIZE (TImode)
+
+/* Nonzero if access to memory by bytes is no faster than for words.
+   Also nonzero if doing byte operations (specifically shifts) in registers
+   is undesirable.
+
+   On the Alpha, we want to not use the byte operation and instead use
+   masking operations to access fields; these will save instructions.  */
+
+#define SLOW_BYTE_ACCESS	1
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* The CIX ctlz and cttz instructions return 64 for zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 64, TARGET_CIX)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 64, TARGET_CIX)
+
+/* Define the value returned by a floating-point comparison instruction.  */
+
+#define FLOAT_STORE_FLAG_VALUE(MODE) \
+  REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE))
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode DImode
+
+/* Mode of a function address in a call instruction (for indexing purposes).  */
+
+#define FUNCTION_MODE Pmode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.
+
+   We define this on the Alpha so that gen_call and gen_call_value
+   get to see the SYMBOL_REF (for the hint field of the jsr).  It will
+   then copy it into a register, thus actually letting the address be
+   cse'ed.  */
+
+#define NO_FUNCTION_CSE
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "")
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "")
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES						\
+{"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",		\
+ "$9", "$10", "$11", "$12", "$13", "$14", "$15",		\
+ "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",	\
+ "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP",		\
+ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",	\
+ "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",		\
+ "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",\
+ "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "FP"}
+
+/* Strip name encoding when emitting labels.  */
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+do {						\
+  const char *name_ = NAME;			\
+  if (*name_ == '@' || *name_ == '%')		\
+    name_ += 2;					\
+  if (*name_ == '*')				\
+    name_++;					\
+  else						\
+    fputs (user_label_prefix, STREAM);		\
+  fputs (name_, STREAM);			\
+} while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* Use dollar signs rather than periods in special g++ assembler names.  */
+
+#undef NO_DOLLAR_IN_LABEL
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*$%s%ld", (PREFIX), (long)(NUM))
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t.gprel32 $L%d\n", (VALUE))
+
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+#define PRINT_OPERAND(FILE, X, CODE)  print_operand (FILE, X, CODE)
+
+/* Determine which codes are valid without a following integer.  These must
+   not be alphabetic.
+
+   ~    Generates the name of the current function.
+
+   /	Generates the instruction suffix.  The TRAP_SUFFIX and ROUND_SUFFIX
+	attributes are examined to determine what is appropriate.
+
+   ,    Generates single precision suffix for floating point
+	instructions (s for IEEE, f for VAX)
+
+   -	Generates double precision suffix for floating point
+	instructions (t for IEEE, g for VAX)
+   */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+  ((CODE) == '/' || (CODE) == ',' || (CODE) == '-' || (CODE) == '~' \
+   || (CODE) == '#' || (CODE) == '*' || (CODE) == '&')
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+  print_operand_address((FILE), (ADDR))
+
+/* If we use NM, pass -g to it so it only lists globals.  */
+#define NM_FLAGS "-pg"
+
+/* Definitions for debugging.  */
+
+/* Correct the offset of automatic variables and arguments.  Note that
+   the Alpha debug format wants all automatic variables and arguments
+   to be in terms of two different offsets from the virtual frame pointer,
+   which is the stack pointer before any adjustment in the function.
+   The offset for the argument pointer is fixed for the native compiler,
+   it is either zero (for the no arguments case) or large enough to hold
+   all argument registers.
+   The offset for the auto pointer is the fourth argument to the .frame
+   directive (local_offset).
+   To stay compatible with the native tools we use the same offsets
+   from the virtual frame pointer and adjust the debugger arg/auto offsets
+   accordingly. These debugger offsets are set up in output_prolog.  */
+
+extern long alpha_arg_offset;
+extern long alpha_auto_offset;
+#define DEBUGGER_AUTO_OFFSET(X) \
+  ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + alpha_auto_offset)
+#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + alpha_arg_offset)
+
+#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME)			\
+  alpha_output_filename (STREAM, NAME)
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* The system headers under Alpha systems are generally C++-aware.  */
+#define NO_IMPLICIT_EXTERN_C
diff --git a/gcc-4.9/gcc/config/alpha/alpha.md b/gcc-4.9/gcc/config/alpha/alpha.md
new file mode 100644
index 000000000..795b4df3f
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/alpha.md
@@ -0,0 +1,6113 @@
+;; Machine description for DEC Alpha for GNU C compiler
+;; Copyright (C) 1992-2014 Free Software Foundation, Inc.
+;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Uses of UNSPEC in this file:
+
+(define_c_enum "unspec" [
+  UNSPEC_XFLT_COMPARE
+  UNSPEC_ARG_HOME
+  UNSPEC_LDGP1
+  UNSPEC_INSXH
+  UNSPEC_MSKXH
+  UNSPEC_CVTQL
+  UNSPEC_CVTLQ
+  UNSPEC_LDGP2
+  UNSPEC_LITERAL
+  UNSPEC_LITUSE
+  UNSPEC_SIBCALL
+  UNSPEC_SYMBOL
+
+  ;; TLS Support
+  UNSPEC_TLSGD_CALL
+  UNSPEC_TLSLDM_CALL
+  UNSPEC_TLSGD
+  UNSPEC_TLSLDM
+  UNSPEC_DTPREL
+  UNSPEC_TPREL
+  UNSPEC_TP
+
+  ;; Builtins
+  UNSPEC_CMPBGE
+  UNSPEC_ZAP
+  UNSPEC_AMASK
+  UNSPEC_IMPLVER
+  UNSPEC_PERR
+  UNSPEC_COPYSIGN
+
+  ;; Atomic operations
+  UNSPEC_MB
+  UNSPEC_ATOMIC
+  UNSPEC_CMPXCHG
+  UNSPEC_XCHG
+])
+
+;; UNSPEC_VOLATILE:
+
+(define_c_enum "unspecv" [
+  UNSPECV_IMB
+  UNSPECV_BLOCKAGE
+  UNSPECV_SETJMPR	; builtin_setjmp_receiver
+  UNSPECV_LONGJMP	; builtin_longjmp
+  UNSPECV_TRAPB
+  UNSPECV_PSPL		; prologue_stack_probe_loop
+  UNSPECV_REALIGN
+  UNSPECV_EHR		; exception_receiver
+  UNSPECV_MCOUNT
+  UNSPECV_FORCE_MOV
+  UNSPECV_LDGP1
+  UNSPECV_PLDGP2	; prologue ldgp
+  UNSPECV_SET_TP
+  UNSPECV_RPCC
+  UNSPECV_SETJMPR_ER	; builtin_setjmp_receiver fragment
+  UNSPECV_LL		; load-locked
+  UNSPECV_SC		; store-conditional
+  UNSPECV_CMPXCHG
+])
+
+;; On non-BWX targets, CQImode must be handled the similarly to HImode
+;; when generating reloads.
+(define_mode_iterator RELOAD12 [QI HI CQI])
+(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")])
+
+;; Other mode iterators
+(define_mode_iterator IMODE [QI HI SI DI])
+(define_mode_iterator I12MODE [QI HI])
+(define_mode_iterator I124MODE [QI HI SI])
+(define_mode_iterator I24MODE [HI SI])
+(define_mode_iterator I248MODE [HI SI DI])
+(define_mode_iterator I48MODE [SI DI])
+
+(define_mode_attr DWI [(SI "DI") (DI "TI")])
+(define_mode_attr modesuffix [(QI "b") (HI "w") (SI "l") (DI "q")
+		  	      (V8QI "b8") (V4HI "w4")
+			      (SF "%,") (DF "%-")])
+(define_mode_attr vecmodesuffix [(QI "b8") (HI "w4")])
+
+(define_code_iterator any_maxmin [smax smin umax umin])
+
+(define_code_attr maxmin [(smax "maxs") (smin "mins")
+			  (umax "maxu") (umin "minu")])
+
+;; Where necessary, the suffixes _le and _be are used to distinguish between
+;; little-endian and big-endian patterns.
+;;
+;; Note that the Unicos/Mk assembler does not support the following
+;; opcodes: mov, fmov, nop, fnop, unop.
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in alpha.h.
+
+(define_attr "tune" "ev4,ev5,ev6"
+  (const (symbol_ref "((enum attr_tune) alpha_tune)")))
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations, among other purposes.  For the most part, we use the names
+;; defined in the EV4 documentation, but add a few that we have to know about
+;; separately.
+
+(define_attr "type"
+  "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,jsr,iadd,ilog,shift,icmov,fcmov,
+   icmp,imul,fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c,
+   multi,none"
+  (const_string "iadd"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+;; Define the operand size an insn operates on.  Used primarily by mul
+;; and div operations that have size dependent timings.
+
+(define_attr "opsize" "si,di,udi"
+  (const_string "di"))
+
+;; The TRAP attribute marks instructions that may generate traps
+;; (which are imprecise and may need a trapb if software completion
+;; is desired).
+
+(define_attr "trap" "no,yes"
+  (const_string "no"))
+
+;; The ROUND_SUFFIX attribute marks which instructions require a
+;; rounding-mode suffix.  The value NONE indicates no suffix,
+;; the value NORMAL indicates a suffix controlled by alpha_fprm.
+
+(define_attr "round_suffix" "none,normal,c"
+  (const_string "none"))
+
+;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix:
+;;   NONE	no suffix
+;;   SU		accepts only /su (cmpt et al)
+;;   SUI	accepts only /sui (cvtqt and cvtqs)
+;;   V_SV	accepts /v and /sv (cvtql only)
+;;   V_SV_SVI	accepts /v, /sv and /svi (cvttq only)
+;;   U_SU_SUI	accepts /u, /su and /sui (most fp instructions)
+;;
+;; The actual suffix emitted is controlled by alpha_fptm.
+
+(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui"
+  (const_string "none"))
+
+;; The length of an instruction sequence in bytes.
+
+(define_attr "length" ""
+  (const_int 4))
+
+;; The USEGP attribute marks instructions that have relocations that use
+;; the GP.
+
+(define_attr "usegp" "no,yes"
+  (cond [(eq_attr "type" "ldsym,jsr")
+	   (const_string "yes")
+	 (eq_attr "type" "ild,fld,ist,fst")
+	   (symbol_ref "((enum attr_usegp) alpha_find_lo_sum_using_gp (insn))")
+	]
+	(const_string "no")))
+
+;; The CANNOT_COPY attribute marks instructions with relocations that
+;; cannot easily be duplicated.  This includes insns with gpdisp relocs
+;; since they have to stay in 1-1 correspondence with one another.  This
+;; also includes jsr insns, since they must stay in correspondence with
+;; the immediately following gpdisp instructions.
+
+(define_attr "cannot_copy" "false,true"
+  (const_string "false"))
+
+;; Used to control the "enabled" attribute on a per-instruction basis.
+;; For convenience, conflate ABI issues re loading of addresses with
+;; an "isa".
+(define_attr "isa" "base,bwx,max,fix,cix,vms,ner,er"
+  (const_string "base"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "isa" "bwx")	(symbol_ref "TARGET_BWX")
+	 (eq_attr "isa" "max")	(symbol_ref "TARGET_MAX")
+	 (eq_attr "isa" "fix")	(symbol_ref "TARGET_FIX")
+	 (eq_attr "isa" "cix")	(symbol_ref "TARGET_CIX")
+	 (eq_attr "isa" "vms")  (symbol_ref "TARGET_ABI_OPEN_VMS")
+	 (eq_attr "isa" "ner")	(symbol_ref "!TARGET_EXPLICIT_RELOCS")
+	 (eq_attr "isa" "er")	(symbol_ref "TARGET_EXPLICIT_RELOCS")
+	]
+	(const_int 1)))
+
+;; Include scheduling descriptions.
+  
+(include "ev4.md")
+(include "ev5.md")
+(include "ev6.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; First define the arithmetic insns.  Note that the 32-bit forms also
+;; sign-extend.
+
+;; Handle 32-64 bit extension from memory to a floating point register
+;; specially, since this occurs frequently in int->double conversions.
+;;
+;; Note that while we must retain the =f case in the insn for reload's
+;; benefit, it should be eliminated after reload, so we should never emit
+;; code for that case.  But we don't reject the possibility.
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
+
+(define_insn "*cvtlq"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTLQ))]
+  ""
+  "cvtlq %1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*extendsidi2_1"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,!*f")
+	(sign_extend:DI
+	  (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))]
+  ""
+  "@
+   addl $31,%1,%0
+   ldl %0,%1
+   lds %0,%1\;cvtlq %0,%0"
+  [(set_attr "type" "iadd,ild,fld")
+   (set_attr "length" "*,*,8")])
+
+(define_split
+  [(set (match_operand:DI 0 "hard_fp_register_operand")
+	(sign_extend:DI (match_operand:SI 1 "memory_operand")))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));
+})
+
+;; Optimize sign-extension of SImode loads.  This shows up in the wake of
+;; reload when converting fp->int.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "hard_int_register_operand")
+        (match_operand:SI 1 "memory_operand"))
+   (set (match_operand:DI 2 "hard_int_register_operand")
+        (sign_extend:DI (match_dup 0)))]
+  "true_regnum (operands[0]) == true_regnum (operands[2])
+   || peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))])
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ")
+		 (match_operand:SI 2 "add_operand" "rI,O,K,L")))]
+  ""
+  "@
+   addl %r1,%2,%0
+   subl %r1,%n2,%0
+   lda %0,%2(%r1)
+   ldah %0,%h2(%r1)")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(plus:SI (match_operand:SI 1 "register_operand")
+		 (match_operand:SI 2 "const_int_operand")))]
+  "! add_operand (operands[2], SImode)"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
+  HOST_WIDE_INT rest = val - low;
+
+  operands[3] = GEN_INT (rest);
+  operands[4] = GEN_INT (low);
+})
+
+(define_insn "*addsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		  (match_operand:SI 2 "sext_add_operand" "rI,O"))))]
+  ""
+  "@
+   addl %r1,%2,%0
+   subl %r1,%n2,%0")
+
+(define_insn "*addsi_se2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+			     (match_operand:DI 2 "sext_add_operand" "rI,O"))
+		    0)))]
+  ""
+  "@
+   addl %r1,%2,%0
+   subl %r1,%n2,%0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_not_elim_operand")
+		  (match_operand:SI 2 "const_int_operand"))))
+   (clobber (match_operand:SI 3 "reg_not_elim_operand"))]
+  "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0
+   && INTVAL (operands[2]) % 4 == 0"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3)
+							(match_dup 5))
+					       (match_dup 1))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]) / 4;
+  int mult = 4;
+
+  if (val % 2 == 0)
+    val /= 2, mult = 8;
+
+  operands[4] = GEN_INT (val);
+  operands[5] = GEN_INT (mult);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	 (plus:SI (match_operator:SI 1 "comparison_operator"
+				     [(match_operand 2)
+				      (match_operand 3)])
+		  (match_operand:SI 4 "add_operand"))))
+   (clobber (match_operand:DI 5 "register_operand"))]
+  ""
+  [(set (match_dup 5) (match_dup 6))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))]
+{
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
+				operands[2], operands[3]);
+  operands[7] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "add_operand")))])
+
+(define_insn "*adddi_er_lo16_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "dtp16_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!dtprel")
+
+(define_insn "*adddi_er_hi32_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "dtp32_symbolic_operand"))))]
+  "HAVE_AS_TLS"
+  "ldah %0,%2(%1)\t\t!dtprelhi")
+
+(define_insn "*adddi_er_lo32_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "dtp32_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!dtprello")
+
+(define_insn "*adddi_er_lo16_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "tp16_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!tprel")
+
+(define_insn "*adddi_er_hi32_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "tp32_symbolic_operand"))))]
+  "HAVE_AS_TLS"
+  "ldah %0,%2(%1)\t\t!tprelhi")
+
+(define_insn "*adddi_er_lo32_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "tp32_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!tprello")
+
+(define_insn "*adddi_er_high_l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "local_symbolic_operand"))))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  "ldah %0,%2(%1)\t\t!gprelhigh"
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+        (high:DI (match_operand:DI 1 "local_symbolic_operand")))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 2) (high:DI (match_dup 1))))]
+  "operands[2] = pic_offset_table_rtx;")
+
+;; We used to expend quite a lot of effort choosing addq/subq/lda.
+;; With complications like
+;;
+;;   The NT stack unwind code can't handle a subq to adjust the stack
+;;   (that's a bug, but not one we can do anything about).  As of NT4.0 SP3,
+;;   the exception handling code will loop if a subq is used and an
+;;   exception occurs.
+;;
+;;   The 19980616 change to emit prologues as RTL also confused some
+;;   versions of GDB, which also interprets prologues.  This has been
+;;   fixed as of GDB 4.18, but it does not harm to unconditionally
+;;   use lda here.
+;;
+;; and the fact that the three insns schedule exactly the same, it's
+;; just not worth the effort.
+
+(define_insn "*adddi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,r")
+		 (match_operand:DI 2 "add_operand" "r,K,L")))]
+  ""
+  "@
+   addq %1,%2,%0
+   lda %0,%2(%1)
+   ldah %0,%h2(%1)")
+
+;; ??? Allow large constants when basing off the frame pointer or some
+;; virtual register that may eliminate to the frame pointer.  This is
+;; done because register elimination offsets will change the hi/lo split,
+;; and if we split before reload, we will require additional instructions.
+
+(define_insn "*adddi_fp_hack"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+        (plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r")
+		 (match_operand:DI 2 "const_int_operand" "K,L,n")))]
+  "NONSTRICT_REG_OK_FP_BASE_P (operands[1])
+   && INTVAL (operands[2]) >= 0
+   /* This is the largest constant an lda+ldah pair can add, minus
+      an upper bound on the displacement between SP and AP during
+      register elimination.  See INITIAL_ELIMINATION_OFFSET.  */
+   && INTVAL (operands[2])
+	< (0x7fff8000
+	   - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD
+	   - ALPHA_ROUND(crtl->outgoing_args_size)
+	   - (ALPHA_ROUND (get_frame_size ()
+			   + max_reg_num () * UNITS_PER_WORD
+			   + crtl->args.pretend_args_size)
+	      - crtl->args.pretend_args_size))"
+  "@
+   lda %0,%2(%1)
+   ldah %0,%h2(%1)
+   #")
+
+;; Don't do this if we are adjusting SP since we don't want to do it
+;; in two steps.  Don't split FP sources for the reason listed above.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "const_int_operand")))]
+  "! add_operand (operands[2], DImode)
+   && operands[0] != stack_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[1] != arg_pointer_rtx"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
+  HOST_WIDE_INT rest = val - low;
+  rtx rest_rtx = GEN_INT (rest);
+
+  operands[4] = GEN_INT (low);
+  if (satisfies_constraint_L (rest_rtx))
+    operands[3] = rest_rtx;
+  else if (can_create_pseudo_p ())
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      emit_move_insn (operands[3], operands[2]);
+      emit_insn (gen_adddi3 (operands[0], operands[1], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn "*sadd<modesuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(plus:I48MODE
+	 (mult:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r,r")
+		       (match_operand:I48MODE 2 "const48_operand" "I,I"))
+	 (match_operand:I48MODE 3 "sext_add_operand" "rI,O")))]
+  ""
+  "@
+   s%2add<modesuffix> %1,%3,%0
+   s%2sub<modesuffix> %1,%n3,%0")
+
+(define_insn "*saddl_se"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
+			   (match_operand:SI 2 "const48_operand" "I,I"))
+		  (match_operand:SI 3 "sext_add_operand" "rI,O"))))]
+  ""
+  "@
+   s%2addl %1,%3,%0
+   s%2subl %1,%n3,%0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	 (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator"
+					      [(match_operand 2)
+					       (match_operand 3)])
+			   (match_operand:SI 4 "const48_operand"))
+		  (match_operand:SI 5 "sext_add_operand"))))
+   (clobber (match_operand:DI 6 "reg_not_elim_operand"))]
+  ""
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4))
+				 (match_dup 5))))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
+				operands[2], operands[3]);
+  operands[8] = gen_lowpart (SImode, operands[6]);
+})
+
+(define_insn "addv<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(plus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ,rJ")
+		      (match_operand:I48MODE 2 "sext_add_operand" "rI,O")))
+   (trap_if (ne (plus:<DWI> (sign_extend:<DWI> (match_dup 1))
+			    (sign_extend:<DWI> (match_dup 2)))
+		(sign_extend:<DWI> (plus:I48MODE (match_dup 1)
+						 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+   add<modesuffix>v %r1,%2,%0
+   sub<modesuffix>v %r1,%n2,%0")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(neg:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "sub<modesuffix> $31,%1,%0")
+
+(define_insn "*negsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (neg:SI
+			 (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "subl $31,%1,%0")
+
+(define_insn "negv<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(neg:I48MODE (match_operand:I48MODE 1 "register_operand" "r")))
+   (trap_if (ne (neg:<DWI> (sign_extend:<DWI> (match_dup 1)))
+		(sign_extend:<DWI> (neg:I48MODE (match_dup 1))))
+	    (const_int 0))]
+  ""
+  "sub<modesuffix>v $31,%1,%0")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ")
+		       (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "sub<modesuffix> %r1,%2,%0")
+
+(define_insn "*subsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "subl %r1,%2,%0")
+
+(define_insn "*subsi_se2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (match_operand:DI 2 "reg_or_8bit_operand" "rI"))
+		    0)))]
+  ""
+  "subl %r1,%2,%0")
+
+(define_insn "*ssub<modesuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(minus:I48MODE
+	 (mult:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r")
+		       (match_operand:I48MODE 2 "const48_operand" "I"))
+		  (match_operand:I48MODE 3 "reg_or_8bit_operand" "rI")))]
+  ""
+  "s%2sub<modesuffix> %1,%3,%0")
+
+(define_insn "*ssubl_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
+			    (match_operand:SI 2 "const48_operand" "I"))
+		   (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "s%2subl %1,%3,%0")
+
+(define_insn "subv<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ")
+		       (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))
+   (trap_if (ne (minus:<DWI> (sign_extend:<DWI> (match_dup 1))
+			     (sign_extend:<DWI> (match_dup 2)))
+		(sign_extend:<DWI> (minus:I48MODE (match_dup 1)
+						  (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "sub<modesuffix>v %r1,%2,%0")
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ")
+		      (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "mul<modesuffix> %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "<mode>")])
+
+(define_insn "*mulsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "mull %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "si")])
+
+(define_insn "mulv<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ")
+		      (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))
+   (trap_if (ne (mult:<DWI> (sign_extend:<DWI> (match_dup 1))
+			    (sign_extend:<DWI> (match_dup 2)))
+		(sign_extend:<DWI> (mult:I48MODE (match_dup 1)
+						 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "mul<modesuffix>v %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "<mode>")])
+
+(define_expand "umuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand"))
+		   (match_operand:DI 2 "reg_or_8bit_operand"))
+	  (const_int 64))))]
+  ""
+{
+  if (REG_P (operands[2]))
+    operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]);
+})
+
+(define_insn "*umuldi3_highpart_reg"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand" "r"))
+		   (zero_extend:TI
+		     (match_operand:DI 2 "register_operand" "r")))
+	  (const_int 64))))]
+  ""
+  "umulh %1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "udi")])
+
+(define_insn "*umuldi3_highpart_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r"))
+		   (match_operand:TI 2 "cint8_operand" "I"))
+	  (const_int 64))))]
+  ""
+  "umulh %1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "udi")])
+
+(define_expand "umulditi3"
+  [(set (match_operand:TI 0 "register_operand")
+       (mult:TI
+	 (zero_extend:TI (match_operand:DI 1 "reg_no_subreg_operand"))
+	 (zero_extend:TI (match_operand:DI 2 "reg_no_subreg_operand"))))]
+  ""
+{
+  rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (l, operands[1], operands[2]));
+  emit_insn (gen_umuldi3_highpart (h, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h);
+  DONE;
+})
+
+;; The divide and remainder operations take their inputs from r24 and
+;; r25, put their output in r27, and clobber r23 and r28 on all systems.
+;;
+;; ??? Force sign-extension here because some versions of OSF/1 and
+;; Interix/NT don't do the right thing if the inputs are not properly
+;; sign-extended.  But Linux, for instance, does not have this
+;; problem.  Is it worth the complication here to eliminate the sign
+;; extension?
+
+(define_code_iterator any_divmod [div mod udiv umod])
+
+(define_expand "<code>si3"
+  [(set (match_dup 3)
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))
+   (set (match_dup 4)
+	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
+   (parallel [(set (match_dup 5)
+		   (sign_extend:DI
+		    (any_divmod:SI (match_dup 3) (match_dup 4))))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])
+   (set (match_operand:SI 0 "nonimmediate_operand")
+	(subreg:SI (match_dup 5) 0))]
+  "TARGET_ABI_OSF"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+})
+
+(define_expand "<code>di3"
+  [(parallel [(set (match_operand:DI 0 "register_operand")
+		   (any_divmod:DI
+		    (match_operand:DI 1 "register_operand")
+		    (match_operand:DI 2 "register_operand")))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+  "TARGET_ABI_OSF")
+
+;; Lengths of 8 for ldq $t12,__divq($gp); jsr $t9,($t12),__divq as
+;; expanded by the assembler.
+
+(define_insn_and_split "*divmodsi_internal_er"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (sign_extend:DI (match_dup 3)))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+{
+  const char *str;
+  switch (GET_CODE (operands[3]))
+    {
+    case DIV: 
+      str = "__divl";
+      break; 
+    case UDIV:
+      str = "__divlu";
+      break;
+    case MOD:
+      str = "__reml";
+      break;
+    case UMOD:
+      str = "__remlu";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
+				  gen_rtx_SYMBOL_REF (DImode, str),
+				  operands[4]));
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+(define_insn "*divmodsi_internal_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+                        [(match_operand:DI 1 "register_operand" "a")
+                         (match_operand:DI 2 "register_operand" "b")])))
+   (use (match_operand:DI 4 "register_operand" "c"))
+   (use (match_operand 5 "const_int_operand"))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $23,($27),__%E3%j5"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "4")])
+
+(define_insn "*divmodsi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_ABI_OSF"
+  "%E3 %1,%2,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*divmoddi_internal_er"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 3))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+{
+  const char *str;
+  switch (GET_CODE (operands[3]))
+    {
+    case DIV: 
+      str = "__divq";
+      break; 
+    case UDIV:
+      str = "__divqu";
+      break;
+    case MOD:
+      str = "__remq";
+      break;
+    case UMOD:
+      str = "__remqu";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
+				  gen_rtx_SYMBOL_REF (DImode, str),
+				  operands[4]));
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+(define_insn "*divmoddi_internal_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+                        [(match_operand:DI 1 "register_operand" "a")
+                         (match_operand:DI 2 "register_operand" "b")]))
+   (use (match_operand:DI 4 "register_operand" "c"))
+   (use (match_operand 5 "const_int_operand"))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $23,($27),__%E3%j5"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "4")])
+
+(define_insn "*divmoddi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_ABI_OSF"
+  "%E3 %1,%2,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+;; Next are the basic logical operations.  We only expose the DImode operations
+;; to the rtl expanders, but SImode versions exist for combine as well as for
+;; the atomic operation splitters.
+
+(define_insn "*andsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
+		(match_operand:SI 2 "and_operand" "rI,N,MH")))]
+  ""
+  "@
+   and %r1,%2,%0
+   bic %r1,%N2,%0
+   zapnot %r1,%m2,%0"
+  [(set_attr "type" "ilog,ilog,shift")])
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
+		(match_operand:DI 2 "and_operand" "rI,N,MH")))]
+  ""
+  "@
+   and %r1,%2,%0
+   bic %r1,%N2,%0
+   zapnot %r1,%m2,%0"
+  [(set_attr "type" "ilog,ilog,shift")])
+
+;; There are times when we can split an AND into two AND insns.  This occurs
+;; when we can first clear any bytes and then clear anything else.  For
+;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07".
+;; Only do this when running on 64-bit host since the computations are
+;; too messy otherwise.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (match_operand:DI 1 "register_operand")
+		(match_operand:DI 2 "const_int_operand")))]
+  "HOST_BITS_PER_WIDE_INT == 64 && ! and_operand (operands[2], DImode)"
+  [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
+{
+  unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]);
+  unsigned HOST_WIDE_INT mask2 = mask1;
+  int i;
+
+  /* For each byte that isn't all zeros, make it all ones.  */
+  for (i = 0; i < 64; i += 8)
+    if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0)
+      mask1 |= (HOST_WIDE_INT) 0xff << i;
+
+  /* Now turn on any bits we've just turned off.  */
+  mask2 |= ~ mask1;
+
+  operands[3] = GEN_INT (mask1);
+  operands[4] = GEN_INT (mask2);
+})
+
+(define_insn "zero_extendqi<mode>2"
+  [(set (match_operand:I248MODE 0 "register_operand" "=r,r")
+	(zero_extend:I248MODE
+	  (match_operand:QI 1 "reg_or_bwx_memory_operand" "r,m")))]
+  ""
+  "@
+   and %1,0xff,%0
+   ldbu %0,%1"
+  [(set_attr "type" "ilog,ild")
+   (set_attr "isa" "*,bwx")])
+
+(define_insn "zero_extendhi<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(zero_extend:I48MODE
+	  (match_operand:HI 1 "reg_or_bwx_memory_operand" "r,m")))]
+  ""
+  "@
+   zapnot %1,3,%0
+   ldwu %0,%1"
+  [(set_attr "type" "shift,ild")
+   (set_attr "isa" "*,bwx")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "zapnot %1,15,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "andnot<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(and:I48MODE
+	 (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI"))
+	 (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "bic %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iorsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:SI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   bis %r1,%2,%0
+   ornot %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:DI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   bis %r1,%2,%0
+   ornot %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*one_cmplsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornot<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(ior:I48MODE
+	 (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI"))
+	 (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "ornot %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xorsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:SI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   xor %r1,%2,%0
+   eqv %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:DI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   xor %r1,%2,%0
+   eqv %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornot<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(not:I48MODE (xor:I48MODE
+		      (match_operand:I48MODE 1 "register_operand" "%rJ")
+		      (match_operand:I48MODE 2 "register_operand" "rI"))))]
+  ""
+  "eqv %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+;; Handle FFS and related insns iff we support CIX.
+
+(define_expand "ffsdi2"
+  [(set (match_dup 2)
+	(ctz:DI (match_operand:DI 1 "register_operand")))
+   (set (match_dup 3)
+	(plus:DI (match_dup 2) (const_int 1)))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (eq (match_dup 1) (const_int 0))
+			 (const_int 0) (match_dup 3)))]
+  "TARGET_CIX"
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+})
+
+(define_insn "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_CIX"
+  "ctlz %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ctz:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_CIX"
+  "cttz %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_CIX"
+  "ctpop %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(bswap:SI (match_operand:SI 1 "register_operand")))]
+  "!optimize_size"
+{
+  rtx t0, t1;
+
+  t0 = gen_reg_rtx (DImode);
+  t1 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_inslh (t0, gen_lowpart (DImode, operands[1]), GEN_INT (7)));
+  emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]),
+			      GEN_INT (24)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
+  emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x5)));
+  emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xa)));
+  emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0),
+			 gen_lowpart (SImode, t1)));
+  DONE;
+})
+
+(define_expand "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(bswap:DI (match_operand:DI 1 "register_operand")))]
+  "!optimize_size"
+{
+  rtx t0, t1;
+
+  t0 = gen_reg_rtx (DImode);
+  t1 = gen_reg_rtx (DImode);
+
+  /* This method of shifting and masking is not specific to Alpha, but
+     is only profitable on Alpha because of our handy byte zap insn.  */
+
+  emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32)));
+  emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
+  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16)));
+  emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xcc)));
+  emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x33)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8)));
+  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8)));
+  emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xaa)));
+  emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x55)));
+  emit_insn (gen_iordi3 (operands[0], t0, t1));
+  DONE;
+})
+
+;; Next come the shifts and the various extract and insert operations.
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ")
+		   (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (operands[2] == const1_rtx)
+	return "addq %r1,%r1,%0";
+      else
+	return "s%P2addq %r1,0,%0";
+    case 1:
+      return "sll %r1,%2,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "iadd,shift")])
+
+(define_insn "*ashldi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			       (match_operand:DI 2 "const_int_operand" "P"))
+		    0)))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3"
+{
+  if (operands[2] == const1_rtx)
+    return "addl %r1,%r1,%0";
+  else
+    return "s%P2addl %r1,0,%0";
+}
+  [(set_attr "type" "iadd")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  ""
+  "srl %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  ""
+  "sra %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendqi<mode>2"
+  [(set (match_operand:I24MODE 0 "register_operand" "=r")
+	(sign_extend:I24MODE
+	 (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:QI 1 "some_operand")))]
+  ""
+{
+  if (TARGET_BWX)
+    operands[1] = force_reg (QImode, operands[1]);
+  else
+    {
+      rtx x, t1, t2, i56;
+
+      if (unaligned_memory_operand (operands[1], QImode))
+	{
+	  x = gen_unaligned_extendqidi (operands[0], XEXP (operands[1], 0));
+	  alpha_set_memflags (x, operands[1]);
+	  emit_insn (x);
+	  DONE;
+	}
+
+      t1 = gen_reg_rtx (DImode);
+      t2 = gen_reg_rtx (DImode);
+      i56 = GEN_INT (56);
+
+      x = gen_lowpart (DImode, force_reg (QImode, operands[1]));
+      emit_move_insn (t1, x);
+      emit_insn (gen_ashldi3 (t2, t1, i56));
+      emit_insn (gen_ashrdi3 (operands[0], t2, i56));
+      DONE;
+    }
+})
+
+(define_insn "*extendqidi2_bwx"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextw %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:HI 1 "some_operand")))]
+  ""
+{
+  if (TARGET_BWX)
+    operands[1] = force_reg (HImode, operands[1]);
+  else
+    {
+      rtx x, t1, t2, i48;
+
+      if (unaligned_memory_operand (operands[1], HImode))
+	{
+	  x = gen_unaligned_extendhidi (operands[0], XEXP (operands[1], 0));
+	  alpha_set_memflags (x, operands[1]);
+	  emit_insn (x);
+	  DONE;
+	}
+
+      t1 = gen_reg_rtx (DImode);
+      t2 = gen_reg_rtx (DImode);
+      i48 = GEN_INT (48);
+
+      x = gen_lowpart (DImode, force_reg (HImode, operands[1]));
+      emit_move_insn (t1, x);
+      emit_insn (gen_ashldi3 (t2, t1, i48));
+      emit_insn (gen_ashrdi3 (operands[0], t2, i48));
+      DONE;
+    }
+})
+
+(define_insn "*extendhidi2_bwx"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextw %1,%0"
+  [(set_attr "type" "shift")])
+
+;; Here's how we sign extend an unaligned byte and halfword.  Doing this
+;; as a pattern saves one instruction.  The code is similar to that for
+;; the unaligned loads (see below).
+;;
+;; Operand 1 is the address, operand 0 is the result.
+
+(define_expand "unaligned_extendqidi"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (minus:DI (const_int 64)
+			     (ashift:DI
+			      (and:DI (match_dup 2) (const_int 7))
+			      (const_int 3)))))
+   (set (match_operand:QI 0 "register_operand")
+	(ashiftrt:DI (match_dup 4) (const_int 56)))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[2] = get_unaligned_offset (operands[1], 1);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_expand "unaligned_extendhidi"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (minus:DI (const_int 64)
+			     (ashift:DI
+			      (and:DI (match_dup 2) (const_int 7))
+			      (const_int 3)))))
+   (set (match_operand:HI 0 "register_operand")
+	(ashiftrt:DI (match_dup 4) (const_int 48)))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[2] = get_unaligned_offset (operands[1], 2);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_insn "*extxl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (match_operand:DI 2 "mode_width_operand" "n")
+			 (match_operand:DI 3 "mul8_operand" "I")))]
+  ""
+  "ext%M2l %r1,%s3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI
+	  (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (match_operand:DI 2 "mode_width_operand" "n")
+	  (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+		     (const_int 3))))]
+  ""
+  "ext%M2l %r1,%3,%0"
+  [(set_attr "type" "shift")])
+
+;; Combine has some strange notion of preserving existing undefined behavior
+;; in shifts larger than a word size.  So capture these patterns that it
+;; should have turned into zero_extracts.
+
+(define_insn "*extxl_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			     (const_int 3)))
+	     (match_operand:DI 3 "mode_mask_operand" "n")))]
+  ""
+  "ext%U3l %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "*extql_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		     (const_int 3))))]
+  ""
+  "extql %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extqh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  ""
+  "extqh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extwh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		 (const_int 65535))
+	 (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  ""
+  "extwh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extlh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		 (const_int 2147483647))
+	 (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  ""
+  "extlh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+;; This converts an extXl into an extXh with an appropriate adjustment
+;; to the address calculation.
+
+;;(define_split
+;;  [(set (match_operand:DI 0 "register_operand")
+;;	(ashift:DI (zero_extract:DI (match_operand:DI 1 "register_operand")
+;;				    (match_operand:DI 2 "mode_width_operand")
+;;				    (ashift:DI (match_operand:DI 3)
+;;					       (const_int 3)))
+;;		   (match_operand:DI 4 "const_int_operand")))
+;;   (clobber (match_operand:DI 5 "register_operand"))]
+;;  "INTVAL (operands[4]) == 64 - INTVAL (operands[2])"
+;;  [(set (match_dup 5) (match_dup 6))
+;;   (set (match_dup 0)
+;;	(ashift:DI (zero_extract:DI (match_dup 1) (match_dup 2)
+;;				    (ashift:DI (plus:DI (match_dup 5)
+;;							(match_dup 7))
+;;					       (const_int 3)))
+;;		   (match_dup 4)))]
+;;  "
+;;{
+;;  operands[6] = plus_constant (DImode, operands[3],
+;;			       INTVAL (operands[2]) / BITS_PER_UNIT);
+;;  operands[7] = GEN_INT (- INTVAL (operands[2]) / BITS_PER_UNIT);
+;;}")
+
+(define_insn "ins<modesuffix>l_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:I124MODE 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "ins<modesuffix>l %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "ins<modesuffix>l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:I124MODE 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  ""
+  "ins<modesuffix>l %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insql"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  ""
+  "insql %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+;; Combine has this sometimes habit of moving the and outside of the
+;; shift, making life more interesting.
+
+(define_insn "*insxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   	   (match_operand:DI 2 "mul8_operand" "I"))
+		(match_operand:DI 3 "immediate_operand" "i")))]
+  "HOST_BITS_PER_WIDE_INT == 64
+   && CONST_INT_P (operands[3])
+   && (((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
+        == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+       || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
+        == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+       || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
+        == (unsigned HOST_WIDE_INT) INTVAL (operands[3])))"
+{
+#if HOST_BITS_PER_WIDE_INT == 64
+  if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "insbl %1,%s2,%0";
+  if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "inswl %1,%s2,%0";
+  if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "insll %1,%s2,%0";
+#endif
+  gcc_unreachable ();
+}
+  [(set_attr "type" "shift")])
+
+;; We do not include the insXh insns because they are complex to express
+;; and it does not appear that we would ever want to generate them.
+;;
+;; Since we need them for block moves, though, cop out and use unspec.
+
+(define_insn "insxh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "mode_width_operand" "n")
+		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_INSXH))]
+  ""
+  "ins%M2h %1,%3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "mskxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (ashift:DI
+			 (match_operand:DI 2 "mode_mask_operand" "n")
+			 (ashift:DI
+			  (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+			  (const_int 3))))
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  ""
+  "msk%U2l %r1,%3,%0"
+  [(set_attr "type" "shift")])
+
+;; We do not include the mskXh insns because it does not appear we would
+;; ever generate one.
+;;
+;; Again, we do for block moves and we use unspec again.
+
+(define_insn "mskxh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "mode_width_operand" "n")
+		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MSKXH))]
+  ""
+  "msk%M2h %1,%3,%0"
+  [(set_attr "type" "shift")])
+
+;; Prefer AND + NE over LSHIFTRT + AND.
+
+(define_insn_and_split "*ze_and_ne"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "I")))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  "#"
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(ne:DI (match_dup 0) (const_int 0)))]
+  "operands[3] = GEN_INT (1 << INTVAL (operands[2]));")
+
+;; Floating-point operations.  All the double-precision insns can extend
+;; from single, so indicate that.  The exception are the ones that simply
+;; play with the sign bits; it's not clear what to do there.
+
+(define_mode_iterator FMODE [SF DF])
+
+(define_mode_attr opmode [(SF "si") (DF "di")])
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cpys $f31,%R1,%0"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "*nabs<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(neg:FMODE
+	 (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP"
+  "cpysn $f31,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_expand "abstf2"
+  [(parallel [(set (match_operand:TF 0 "register_operand")
+		   (abs:TF (match_operand:TF 1 "reg_or_0_operand")))
+	      (use (match_dup 2))])]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+#if HOST_BITS_PER_WIDE_INT >= 64
+  operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));
+#else
+  operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode));
+#endif
+})
+
+(define_insn_and_split "*abstf_internal"
+  [(set (match_operand:TF 0 "register_operand" "=r")
+	(abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "alpha_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(neg:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cpysn %R1,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_expand "negtf2"
+  [(parallel [(set (match_operand:TF 0 "register_operand")
+		   (neg:TF (match_operand:TF 1 "reg_or_0_operand")))
+	      (use (match_dup 2))])]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+#if HOST_BITS_PER_WIDE_INT >= 64
+  operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));
+#else
+  operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode));
+#endif
+})
+
+(define_insn_and_split "*negtf_internal"
+  [(set (match_operand:TF 0 "register_operand" "=r")
+	(neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "alpha_split_tfmode_frobsign (operands, gen_xordi3); DONE;")
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		       (match_operand:FMODE 2 "reg_or_0_operand" "fG")]
+		      UNSPEC_COPYSIGN))]
+  "TARGET_FP"
+  "cpys %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*ncopysign<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(neg:FMODE
+	 (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG")
+			(match_operand:FMODE 2 "reg_or_0_operand" "fG")]
+		       UNSPEC_COPYSIGN)))]
+  "TARGET_FP"
+  "cpysn %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*add<mode>3_ieee"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f")
+	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "add<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "add<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "add%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "add%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "addtf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (PLUS, operands); DONE;")
+
+(define_insn "*sub<mode>3_ieee"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f")
+	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		     (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "sub<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		     (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "sub<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "subtf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (MINUS, operands); DONE;")
+
+(define_insn "*mul<mode>3_ieee"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f")
+	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "mul<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "mul<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*muldf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "mul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*muldf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "mul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "multf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (MULT, operands); DONE;")
+
+(define_insn "*div<mode>3_ieee"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "div<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "div<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "div<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (float_extend:DF
+		 (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "divtf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (DIV, operands); DONE;")
+
+(define_insn "*sqrt<mode>2_ieee"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f")
+	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && TARGET_FIX && alpha_fptm >= ALPHA_FPTM_SU"
+  "sqrt<modesuffix>%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && TARGET_FIX"
+  "sqrt<modesuffix>%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+;; Define conversion operators between DFmode and SImode, using the cvtql
+;; instruction.  To allow combine et al to do useful things, we keep the
+;; operation as a unit until after reload, at which point we split the
+;; instructions.
+;;
+;; Note that we (attempt to) only consider this optimization when the
+;; ultimate destination is memory.  If we will be doing further integer
+;; processing, it is cheaper to do the truncation in the int regs.
+
+(define_insn "*cvtql"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTQL))]
+  "TARGET_FP"
+  "cvtql%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "v_sv")])
+
+(define_insn_and_split "*fix_truncdfsi_ieee"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator" 
+	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+{
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn_and_split "*fix_truncdfsi_internal"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 3 "fix_operator" 
+	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 3 [(match_dup 1)]))
+   (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 4))]
+{
+  operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*fix_truncdfdi_ieee"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(match_operand:DF 1 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_insn "*fix_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(match_operand:DF 1 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_expand "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(fix:DI (match_operand:DF 1 "reg_or_0_operand")))]
+  "TARGET_FP")
+
+(define_expand "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand")))]
+  "TARGET_FP")
+
+;; Likewise between SFmode and SImode.
+
+(define_insn_and_split "*fix_truncsfsi_ieee"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator" 
+	    [(float_extend:DF
+	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+  "operands[5] = adjust_address (operands[0], SFmode, 0);"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn_and_split "*fix_truncsfsi_internal"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 3 "fix_operator" 
+	    [(float_extend:DF
+	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 3 [(float_extend:DF (match_dup 1))]))
+   (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 4))]
+{
+  operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*fix_truncsfdi_ieee"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_insn "*fix_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_expand "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))]
+  "TARGET_FP")
+
+(define_expand "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(unsigned_fix:DI
+	  (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))]
+  "TARGET_FP")
+
+(define_expand "fix_trunctfdi2"
+  [(use (match_operand:DI 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FIX, operands); DONE;")
+
+(define_expand "fixuns_trunctfdi2"
+  [(use (match_operand:DI 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;")
+
+(define_insn "*floatdisf_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvtq%,%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP"
+  "cvtq%,%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn_and_split "*floatsisf2_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float:SF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:SF (match_dup 2)))]
+  "operands[1] = adjust_address (operands[1], SFmode, 0);")
+
+(define_insn_and_split "*floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_FP"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 0)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:SF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
+})
+
+(define_insn "*floatdidf_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvtq%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP"
+  "cvtq%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn_and_split "*floatsidf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float:DF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+  "operands[1] = adjust_address (operands[1], SFmode, 0);")
+
+(define_insn_and_split "*floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_FP"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0]));
+})
+
+(define_expand "floatditf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:DI 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FLOAT, operands); DONE;")
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand"))
+   (use (match_operand:DI 1 "register_operand"))]
+  "TARGET_FP"
+  "alpha_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand"))
+   (use (match_operand:DI 1 "register_operand"))]
+  "TARGET_FP"
+  "alpha_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsditf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:DI 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_FP"
+{
+  if (alpha_fptm >= ALPHA_FPTM_SU)
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+;; The Unicos/Mk assembler doesn't support cvtst, but we've already
+;; asserted that alpha_fptm == ALPHA_FPTM_N.
+
+(define_insn "*extendsfdf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvtsts %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*extendsfdf2_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,m")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   cpys %1,%1,%0
+   ld%, %0,%1
+   st%- %1,%0"
+  [(set_attr "type" "fcpys,fld,fst")])
+
+;; Use register_operand for operand 1 to prevent compress_float_constant
+;; from doing something silly.  When optimizing we'll put things back 
+;; together anyway.
+(define_expand "extendsftf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:SF 1 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "extenddftf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:DF 1 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*truncdfsf2_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvt%-%,%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cvt%-%,%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "trunctfdf2"
+  [(use (match_operand:DF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_expand "trunctfsf2"
+  [(use (match_operand:SF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_FP && TARGET_HAS_XFLOATING_LIBS"
+{
+  rtx tmpf, sticky, arg, lo, hi;
+
+  tmpf = gen_reg_rtx (DFmode);
+  sticky = gen_reg_rtx (DImode);
+  arg = copy_to_mode_reg (TFmode, operands[1]);
+  lo = gen_lowpart (DImode, arg);
+  hi = gen_highpart (DImode, arg);
+
+  /* Convert the low word of the TFmode value into a sticky rounding bit,
+     then or it into the low bit of the high word.  This leaves the sticky
+     bit at bit 48 of the fraction, which is representable in DFmode,
+     which prevents rounding error in the final conversion to SFmode.  */
+
+  emit_insn (gen_rtx_SET (VOIDmode, sticky,
+			  gen_rtx_NE (DImode, lo, const0_rtx)));
+  emit_insn (gen_iordi3 (hi, hi, sticky));
+  emit_insn (gen_trunctfdf2 (tmpf, arg));
+  emit_insn (gen_truncdfsf2 (operands[0], tmpf));
+  DONE;
+})
+
+;; Next are all the integer comparisons, and conditional moves and branches
+;; and some of the related define_expand's and define_split's.
+
+(define_insn "*setcc_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "alpha_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmp%C1 %2,%3,%0"
+  [(set_attr "type" "icmp")])
+
+;; Yes, we can technically support reg_or_8bit_operand in operand 2,
+;; but that's non-canonical rtl and allowing that causes inefficiencies
+;; from cse on.
+(define_insn "*setcc_swapped_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+        (match_operator 1 "alpha_swapped_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_0_operand" "rJ")]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmp%c1 %r3,%2,%0"
+  [(set_attr "type" "icmp")])
+
+;; Use match_operator rather than ne directly so that we can match
+;; multiple integer modes.
+(define_insn "*setne_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "signed_comparison_operator"
+			  [(match_operand:DI 2 "register_operand" "r")
+			   (const_int 0)]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_CODE (operands[1]) == NE
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmpult $31,%2,%0"
+  [(set_attr "type" "icmp")])
+
+;; The mode folding trick can't be used with const_int operands, since
+;; reload needs to know the proper mode.
+;;
+;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand
+;; in order to create more pairs of constants.  As long as we're allowing
+;; two constants at the same time, and will have to reload one of them...
+
+(define_insn "*mov<mode>cc_internal"
+  [(set (match_operand:IMODE 0 "register_operand" "=r,r,r,r")
+	(if_then_else:IMODE
+	 (match_operator 2 "signed_comparison_operator"
+			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
+			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
+	 (match_operand:IMODE 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:IMODE 5 "add_operand" "0,rI,0,rI")))]
+  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
+  "@
+   cmov%C2 %r3,%1,%0
+   cmov%D2 %r3,%5,%0
+   cmov%c2 %r4,%1,%0
+   cmov%d2 %r4,%5,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*mov<mode>cc_lbc"
+  [(set (match_operand:IMODE 0 "register_operand" "=r,r")
+	(if_then_else:IMODE
+	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbc %r2,%1,%0
+   cmovlbs %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*mov<mode>cc_lbs"
+  [(set (match_operand:IMODE 0 "register_operand" "=r,r")
+	(if_then_else:IMODE
+	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbs %r2,%1,%0
+   cmovlbc %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+;; For ABS, we have two choices, depending on whether the input and output
+;; registers are the same or not.
+(define_expand "absdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(abs:DI (match_operand:DI 1 "register_operand")))]
+  ""
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode)));
+  else
+    emit_insn (gen_absdi2_diff (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "absdi2_same"
+  [(set (match_operand:DI 1 "register_operand")
+	(neg:DI (match_operand:DI 0 "register_operand")))
+   (set (match_dup 0)
+	(if_then_else:DI (ge (match_dup 0) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))])
+
+(define_expand "absdi2_diff"
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI (match_operand:DI 1 "register_operand")))
+   (set (match_dup 0)
+	(if_then_else:DI (lt (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(abs:DI (match_dup 0)))
+   (clobber (match_operand:DI 1 "register_operand"))]
+  ""
+  [(set (match_dup 1) (neg:DI (match_dup 0)))
+   (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(abs:DI (match_operand:DI 1 "register_operand")))]
+  "! rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 0) (neg:DI (match_dup 1)))
+   (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI (abs:DI (match_dup 0))))
+   (clobber (match_operand:DI 1 "register_operand"))]
+  ""
+  [(set (match_dup 1) (neg:DI (match_dup 0)))
+   (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI (abs:DI (match_operand:DI 1 "register_operand"))))]
+  "! rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 0) (neg:DI (match_dup 1)))
+   (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:I12MODE 0 "register_operand" "=r")
+	(any_maxmin:I12MODE
+	 (match_operand:I12MODE 1 "reg_or_0_operand" "%rJ")
+	 (match_operand:I12MODE 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "<maxmin><vecmodesuffix> %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "smaxdi3"
+  [(set (match_dup 3)
+	(le:DI (match_operand:DI 1 "reg_or_0_operand")
+	       (match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (eq (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(smax:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_insn "*smax_const0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(smax:DI (match_operand:DI 1 "register_operand" "0")
+		 (const_int 0)))]
+  ""
+  "cmovlt %0,0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_expand "smindi3"
+  [(set (match_dup 3)
+	(lt:DI (match_operand:DI 1 "reg_or_0_operand")
+	       (match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(smin:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_insn "*smin_const0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(smin:DI (match_operand:DI 1 "register_operand" "0")
+		 (const_int 0)))]
+  ""
+  "cmovgt %0,0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_expand "umaxdi3"
+  [(set (match_dup 3)
+	(leu:DI (match_operand:DI 1 "reg_or_0_operand")
+		(match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (eq (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(umax:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_expand "umindi3"
+  [(set (match_dup 3)
+	(ltu:DI (match_operand:DI 1 "reg_or_0_operand")
+		(match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(umin:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+			  (const_int 0)])
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  ""
+  "b%C1 %r2,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*bcc_reverse"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+
+	 (pc)
+	 (label_ref (match_operand 0))))]
+  ""
+  "b%c1 %2,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*blbs_normal"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  ""
+  "blbs %r1,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*blbc_normal"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  ""
+  "blbc %r1,%0"
+  [(set_attr "type" "ibr")])
+
+(define_split
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	   (match_operator 1 "comparison_operator"
+	     [(zero_extract:DI (match_operand:DI 2 "register_operand")
+			       (const_int 1)
+			       (match_operand:DI 3 "const_int_operand"))
+	      (const_int 0)])
+	   (label_ref (match_operand 0))
+	   (pc)))
+     (clobber (match_operand:DI 4 "register_operand"))])]
+  "INTVAL (operands[3]) != 0"
+  [(set (match_dup 4)
+	(lshiftrt:DI (match_dup 2) (match_dup 3)))
+   (set (pc)
+	(if_then_else (match_op_dup 1
+				    [(zero_extract:DI (match_dup 4)
+						      (const_int 1)
+						      (const_int 0))
+				     (const_int 0)])
+		      (label_ref (match_dup 0))
+		      (pc)))]
+ )
+
+;; The following are the corresponding floating-point insns.  Recall
+;; we need to have variants that expand the arguments from SFmode
+;; to DFmode.
+
+(define_insn "*cmpdf_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*mov<mode>cc_internal"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(if_then_else:FMODE
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:FMODE 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:DF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext3"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(if_then_else:SF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:SF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:SF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext4"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_expand "smaxdf3"
+  [(set (match_dup 3)
+	(le:DF (match_operand:DF 1 "reg_or_0_operand")
+	       (match_operand:DF 2 "reg_or_0_operand")))
+   (set (match_operand:DF 0 "register_operand")
+	(if_then_else:DF (eq (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "smindf3"
+  [(set (match_dup 3)
+	(lt:DF (match_operand:DF 1 "reg_or_0_operand")
+	       (match_operand:DF 2 "reg_or_0_operand")))
+   (set (match_operand:DF 0 "register_operand")
+	(if_then_else:DF (ne (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "smaxsf3"
+  [(set (match_dup 3)
+	(le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))
+	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand"))))
+   (set (match_operand:SF 0 "register_operand")
+	(if_then_else:SF (eq (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "sminsf3"
+  [(set (match_dup 3)
+	(lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))
+	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand"))))
+   (set (match_operand:SF 0 "register_operand")
+	(if_then_else:SF (ne (match_dup 3) (match_dup 4))
+		      (match_dup 1) (match_dup 2)))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_insn "*fbcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			  (match_operand:DF 3 "const0_operand" "G")])
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  "TARGET_FP"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+(define_insn "*fbcc_ext_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			  (match_operand:DF 3 "const0_operand" "G")])
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  "TARGET_FP"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+;; These are the main define_expand's used to make conditional branches
+;; and compares.
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "alpha_cbranch_operator"
+         [(match_operand:DF 1 "reg_or_0_operand")
+          (match_operand:DF 2 "reg_or_0_operand")]))
+   (use (match_operand 3))]
+  "TARGET_FP"
+  "alpha_emit_conditional_branch (operands, DFmode); DONE;")
+
+(define_expand "cbranchtf4"
+  [(use (match_operator 0 "alpha_cbranch_operator"
+         [(match_operand:TF 1 "general_operand")
+          (match_operand:TF 2 "general_operand")]))
+   (use (match_operand 3))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_conditional_branch (operands, TFmode); DONE;")
+
+(define_expand "cbranchdi4"
+  [(use (match_operator 0 "alpha_cbranch_operator"
+         [(match_operand:DI 1 "some_operand")
+          (match_operand:DI 2 "some_operand")]))
+   (use (match_operand 3))]
+  ""
+  "alpha_emit_conditional_branch (operands, DImode); DONE;")
+
+(define_expand "cstoredf4"
+  [(use (match_operator:DI 1 "alpha_cbranch_operator"
+         [(match_operand:DF 2 "reg_or_0_operand")
+          (match_operand:DF 3 "reg_or_0_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  "TARGET_FP"
+{
+  if (alpha_emit_setcc (operands, DFmode))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "cstoretf4"
+  [(use (match_operator:DI 1 "alpha_cbranch_operator"
+         [(match_operand:TF 2 "general_operand")
+          (match_operand:TF 3 "general_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+  if (alpha_emit_setcc (operands, TFmode))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "cstoredi4"
+  [(use (match_operator:DI 1 "alpha_cbranch_operator"
+         [(match_operand:DI 2 "some_operand")
+          (match_operand:DI 3 "some_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  ""
+{
+  if (alpha_emit_setcc (operands, DImode))
+    DONE;
+  else
+    FAIL;
+})
+
+;; These are the main define_expand's used to make conditional moves.
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:I48MODE 0 "register_operand")
+	(if_then_else:I48MODE
+	  (match_operand 1 "comparison_operator")
+	  (match_operand:I48MODE 2 "reg_or_8bit_operand")
+	  (match_operand:I48MODE 3 "reg_or_8bit_operand")))]
+  ""
+{
+  operands[1] = alpha_emit_conditional_move (operands[1], <MODE>mode);
+  if (operands[1] == 0)
+    FAIL;
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:FMODE 0 "register_operand")
+	(if_then_else:FMODE
+	  (match_operand 1 "comparison_operator")
+	  (match_operand:FMODE 2 "reg_or_8bit_operand")
+	  (match_operand:FMODE 3 "reg_or_8bit_operand")))]
+  ""
+{
+  operands[1] = alpha_emit_conditional_move (operands[1], <MODE>mode);
+  if (operands[1] == 0)
+    FAIL;
+})
+
+;; These define_split definitions are used in cases when comparisons have
+;; not be stated in the correct way and we need to reverse the second
+;; comparison.  For example, x >= 7 has to be done as x < 6 with the
+;; comparison that tests the result being reversed.  We have one define_split
+;; for each use of a comparison.  They do not match valid insns and need
+;; not generate valid insns.
+;;
+;; We can also handle equality comparisons (and inequality comparisons in
+;; cases where the resulting add cannot overflow) by doing an add followed by
+;; a comparison with zero.  This is faster since the addition takes one
+;; less cycle than a compare when feeding into a conditional move.
+;; For this case, we also have an SImode pattern since we can merge the add
+;; and sign extend and the order doesn't matter.
+;;
+;; We do not do this for floating-point, since it isn't clear how the "wrong"
+;; operation could have been generated.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:DI 2 "reg_or_0_operand")
+			  (match_operand:DI 3 "reg_or_cint_operand")])
+	 (match_operand:DI 4 "reg_or_cint_operand")
+	 (match_operand:DI 5 "reg_or_cint_operand")))
+   (clobber (match_operand:DI 6 "register_operand"))]
+  "operands[3] != const0_rtx"
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
+
+  /* If we are comparing for equality with a constant and that constant
+     appears in the arm when the register equals the constant, use the
+     register since that is more likely to match (and to produce better code
+     if both would).  */
+
+  if (code == EQ && CONST_INT_P (operands[3])
+      && rtx_equal_p (operands[4], operands[3]))
+    operands[4] = operands[2];
+
+  else if (code == NE && CONST_INT_P (operands[3])
+	   && rtx_equal_p (operands[5], operands[3]))
+    operands[5] = operands[2];
+
+  if (code == NE || code == EQ
+      || (extended_count (operands[2], DImode, unsignedp) >= 1
+	  && extended_count (operands[3], DImode, unsignedp) >= 1))
+    {
+      if (CONST_INT_P (operands[3]))
+	operands[7] = gen_rtx_PLUS (DImode, operands[2],
+				    GEN_INT (- INTVAL (operands[3])));
+      else
+	operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]);
+
+      operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx);
+    }
+
+  else if (code == EQ || code == LE || code == LT
+	   || code == LEU || code == LTU)
+    {
+      operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]);
+      operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx);
+    }
+  else
+    {
+      operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode,
+				    operands[2], operands[3]);
+      operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 2 "reg_or_0_operand")
+			  (match_operand:SI 3 "reg_or_cint_operand")])
+	 (match_operand:DI 4 "reg_or_8bit_operand")
+	 (match_operand:DI 5 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 6 "register_operand"))]
+  "operands[3] != const0_rtx
+   && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)"
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
+  rtx tem;
+
+  if ((code != NE && code != EQ
+       && ! (extended_count (operands[2], DImode, unsignedp) >= 1
+	     && extended_count (operands[3], DImode, unsignedp) >= 1)))
+    FAIL;
+
+  if (CONST_INT_P (operands[3]))
+    tem = gen_rtx_PLUS (SImode, operands[2],
+			GEN_INT (- INTVAL (operands[3])));
+  else
+    tem = gen_rtx_MINUS (SImode, operands[2], operands[3]);
+
+  operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem);
+  operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+				operands[6], const0_rtx);
+})
+
+;; Prefer to use cmp and arithmetic when possible instead of a cmove.
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(match_operand:DI 2 "reg_or_0_operand")
+			    (const_int 0)])
+	  (match_operand 3 "const_int_operand")
+	  (match_operand 4 "const_int_operand")))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0],
+				    operands[2], operands[3], operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+;; ??? Why combine is allowed to create such non-canonical rtl, I don't know.
+;; Oh well, we match it in movcc, so it must be partially our fault.
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(const_int 0)
+			    (match_operand:DI 2 "reg_or_0_operand")])
+	  (match_operand 3 "const_int_operand")
+	  (match_operand 4 "const_int_operand")))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])),
+				    operands[0], operands[2], operands[3],
+				    operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn_and_split "*cmp_sadd_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (if_then_else:DI
+		   (match_operator 1 "alpha_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:DI 3 "const48_operand" "I")
+		   (const_int 0))
+	         (match_operand:DI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:DI (mult:DI (match_dup 5) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+})
+
+(define_insn_and_split "*cmp_sadd_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (if_then_else:SI
+		   (match_operator 1 "alpha_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:SI 3 "const48_operand" "I")
+		   (const_int 0))
+	         (match_operand:SI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 6) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = gen_lowpart (DImode, operands[0]);
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_sadd_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (plus:SI (if_then_else:SI
+		     (match_operator 1 "alpha_zero_comparison_operator"
+		       [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		        (const_int 0)])
+		     (match_operand:SI 3 "const48_operand" "I")
+		     (const_int 0))
+	           (match_operand:SI 4 "sext_add_operand" "rIO"))))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3))
+				 (match_dup 4))))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_ssub_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (if_then_else:DI
+		    (match_operator 1 "alpha_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:DI 3 "const48_operand" "I")
+		    (const_int 0))
+	          (match_operand:DI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:DI (mult:DI (match_dup 5) (match_dup 3))
+		  (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+})
+
+(define_insn_and_split "*cmp_ssub_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (if_then_else:SI
+		    (match_operator 1 "alpha_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:SI 3 "const48_operand" "I")
+		    (const_int 0))
+	          (match_operand:SI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:SI (mult:SI (match_dup 6) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = gen_lowpart (DImode, operands[0]);
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_ssub_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (minus:SI (if_then_else:SI
+		      (match_operator 1 "alpha_zero_comparison_operator"
+		        [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		         (const_int 0)])
+		      (match_operand:SI 3 "const48_operand" "I")
+		      (const_int 0))
+	            (match_operand:SI 4 "reg_or_8bit_operand" "rI"))))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3))
+				  (match_dup 4))))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+;; Here are the CALL and unconditional branch insns.  Calls on NT and OSF
+;; work differently, so we have different patterns for each.
+
+(define_expand "call"
+  [(use (match_operand:DI 0))
+   (use (match_operand 1))
+   (use (match_operand 2))
+   (use (match_operand 3))]
+  ""
+{
+  if (TARGET_ABI_OPEN_VMS)
+    emit_call_insn (gen_call_vms (operands[0], operands[2]));
+  else
+    emit_call_insn (gen_call_osf (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(parallel [(call (mem:DI (match_operand 0))
+			    (match_operand 1))
+	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
+  "TARGET_ABI_OSF"
+{
+  gcc_assert (MEM_P (operands[0]));
+  operands[0] = XEXP (operands[0], 0);
+})
+
+(define_expand "call_osf"
+  [(parallel [(call (mem:DI (match_operand 0))
+		    (match_operand 1))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  operands[0] = XEXP (operands[0], 0);
+  if (! call_operand (operands[0], Pmode))
+    operands[0] = copy_to_mode_reg (Pmode, operands[0]);
+})
+
+;;
+;; call openvms/alpha
+;; op 0: symbol ref for called function
+;; op 1: next_arg_reg (argument information value for R25)
+;;
+(define_expand "call_vms"
+  [(parallel [(call (mem:DI (match_operand 0))
+		    (match_operand 1))
+	      (use (match_dup 2))
+	      (use (reg:DI 25))
+	      (use (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  operands[0] = XEXP (operands[0], 0);
+
+  /* Always load AI with argument information, then handle symbolic and
+     indirect call differently.  Load RA and set operands[2] to PV in
+     both cases.  */
+
+  emit_move_insn (gen_rtx_REG (DImode, 25), operands[1]);
+  if (GET_CODE (operands[0]) == SYMBOL_REF)
+    {
+      operands[2] = const0_rtx;
+    }
+  else
+    {
+      emit_move_insn (gen_rtx_REG (Pmode, 26),
+		      gen_rtx_MEM (Pmode, plus_constant (Pmode,
+							 operands[0], 8)));
+      operands[2] = operands[0];
+    }
+})
+
+(define_expand "call_value"
+  [(use (match_operand 0))
+   (use (match_operand:DI 1))
+   (use (match_operand 2))
+   (use (match_operand 3))
+   (use (match_operand 4))]
+  ""
+{
+  if (TARGET_ABI_OPEN_VMS)
+    emit_call_insn (gen_call_value_vms (operands[0], operands[1],
+					operands[3]));
+  else
+    emit_call_insn (gen_call_value_osf (operands[0], operands[1],
+					operands[2]));
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand 1))
+		         (match_operand 2)))
+	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
+  "TARGET_ABI_OSF"
+{
+  gcc_assert (MEM_P (operands[1]));
+  operands[1] = XEXP (operands[1], 0);
+})
+
+(define_expand "call_value_osf"
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand 1))
+			 (match_operand 2)))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+  if (! call_operand (operands[1], Pmode))
+    operands[1] = copy_to_mode_reg (Pmode, operands[1]);
+})
+
+(define_expand "call_value_vms"
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand:DI 1))
+			 (match_operand 2)))
+	      (use (match_dup 3))
+	      (use (reg:DI 25))
+	      (use (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+
+  /* Always load AI with argument information, then handle symbolic and
+     indirect call differently.  Load RA and set operands[3] to PV in
+     both cases.  */
+
+  emit_move_insn (gen_rtx_REG (DImode, 25), operands[2]);
+  if (GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      operands[3] = const0_rtx;
+    }
+  else
+    {
+      emit_move_insn (gen_rtx_REG (Pmode, 26),
+		      gen_rtx_MEM (Pmode, plus_constant (Pmode,
+							 operands[1], 8)));
+      operands[3] = operands[1];
+    }
+})
+
+(define_insn "*call_osf_1_er_noreturn"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,%0\t\t!samegp
+   ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1_er"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,(%0),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%0\t\t!samegp
+   ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.  Consider the case of { bar(); while (1); }.
+(define_peephole2
+  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand"))
+		    (match_operand 1))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[0], Pmode)
+   && (peep2_regno_dead_p (1, 29)
+       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(call (mem:DI (match_dup 2))
+		    (match_dup 1))
+	      (use (reg:DI 29))
+	      (use (match_dup 0))
+	      (use (match_dup 3))
+	      (clobber (reg:DI 26))])]
+{
+  if (CONSTANT_P (operands[0]))
+    {
+      operands[2] = gen_rtx_REG (Pmode, 27);
+      operands[3] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
+				      operands[0], operands[3]));
+    }
+  else
+    {
+      operands[2] = operands[0];
+      operands[0] = const0_rtx;
+      operands[3] = const0_rtx;
+    }
+})
+
+(define_peephole2
+  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand"))
+		    (match_operand 1))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[0], Pmode)
+   && ! (peep2_regno_dead_p (1, 29)
+         || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(call (mem:DI (match_dup 2))
+		    (match_dup 1))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))]
+{
+  if (CONSTANT_P (operands[0]))
+    {
+      operands[2] = gen_rtx_REG (Pmode, 27);
+      operands[4] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
+				      operands[0], operands[4]));
+    }
+  else
+    {
+      operands[2] = operands[0];
+      operands[0] = const0_rtx;
+      operands[4] = const0_rtx;
+    }
+  operands[3] = GEN_INT (alpha_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_insn "*call_osf_2_er_nogp"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (use (match_operand 2))
+   (use (match_operand 3 "const_int_operand"))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%0),%2%J3"
+  [(set_attr "type" "jsr")])
+
+(define_insn "*call_osf_2_er"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 2))
+   (use (match_operand 3 "const_int_operand"))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%0),%2%J3\;ldah $29,0($26)\t\t!gpdisp!%4"
+  [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_osf_1_noreturn"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,$%0..ng
+   jsr $26,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%0..ng
+   jsr $26,%0\;ldgp $29,0($26)"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*sibcall_osf_1_er"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
+	 (match_operand 1))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,%0\t\t!samegp
+   ldq $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+;; Note that the DEC assembler expands "jmp foo" with $at, which
+;; doesn't do what we want.
+(define_insn "*sibcall_osf_1"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
+	 (match_operand 1))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,$%0..ng
+   lda $27,%0\;jmp $31,($27),%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+; GAS relies on the order and position of instructions output below in order
+; to generate relocs for VMS link to potentially optimize the call.
+; Please do not molest.
+(define_insn "*call_vms_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "r,s"))
+	 (match_operand 1))
+   (use (match_operand:DI 2 "nonmemory_operand" "r,n"))
+   (use (reg:DI 25))
+   (use (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  switch (which_alternative)
+    {
+    case 0:
+   	return "mov %2,$27\;jsr $26,0\;ldq $27,0($29)";
+    case 1:
+	operands [2] = alpha_use_linkage (operands [0], true, false);
+	operands [3] = alpha_use_linkage (operands [0], false, false);
+   	return "ldq $26,%3\;ldq $27,%2\;jsr $26,%0\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,16")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0)
+		    (const_int 0))
+	      (match_operand 1)
+	      (match_operand 2)])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "none")])
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  ""
+  "br $31,%l0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "return"
+  [(return)]
+  "direct_return ()")
+
+(define_insn "*return_internal"
+  [(return)]
+  "reload_completed"
+  "ret $31,($26),1"
+  [(set_attr "type" "ibr")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc)
+		   (match_operand 0 "register_operand"))
+	      (use (label_ref:DI (match_operand 1)))])]
+  ""
+{
+  if (TARGET_ABI_OSF)
+    {
+      rtx dest = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (dest, operands[0]));
+      emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest));	
+      operands[0] = dest;
+    }
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc)
+	(match_operand:DI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1)))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+;; Cache flush.  Used by alpha_trampoline_init.  0x86 is PAL_imb, but we don't
+;; want to have to include pal.h in our .s file.
+(define_insn "imb"
+  [(unspec_volatile [(const_int 0)] UNSPECV_IMB)]
+  ""
+  "call_pal 0x86"
+  [(set_attr "type" "callpal")])
+
+(define_expand "clear_cache"
+  [(match_operand:DI 0)		; region start
+   (match_operand:DI 1)]		; region end
+  ""
+{
+  emit_insn (gen_imb ());
+  DONE;
+})
+
+;; BUGCHK is documented common to OSF/1 and VMS PALcode.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "call_pal 0x81"
+  [(set_attr "type" "callpal")])
+
+;; For userland, we load the thread pointer from the TCB.
+;; For the kernel, we load the per-cpu private value.
+
+(define_insn "get_thread_pointerdi"
+  [(set (match_operand:DI 0 "register_operand" "=v")
+	(unspec:DI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_TLS_KERNEL)
+    return "call_pal 0x32";
+  else
+    return "call_pal 0x9e";
+}
+  [(set_attr "type" "callpal")])
+
+;; For completeness, and possibly a __builtin function, here's how to
+;; set the thread pointer.  Since we don't describe enough of this
+;; quantity for CSE, we have to use a volatile unspec, and then there's
+;; not much point in creating an R16_REG register class.
+
+(define_expand "set_thread_pointerdi"
+  [(set (reg:DI 16) (match_operand:DI 0 "input_operand"))
+   (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
+  "TARGET_ABI_OSF")
+
+(define_insn "*set_tp"
+  [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_TLS_KERNEL)
+    return "call_pal 0x31";
+  else
+    return "call_pal 0x9f";
+}
+  [(set_attr "type" "callpal")])
+
+;; Special builtins for establishing and reverting VMS condition handlers.
+
+(define_expand "builtin_establish_vms_condition_handler"
+  [(set (reg:DI 0) (match_operand:DI 0 "register_operand"))
+   (use (match_operand:DI 1 "address_operand"))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  alpha_expand_builtin_establish_vms_condition_handler (operands[0],
+                                                        operands[1]);
+})
+
+(define_expand "builtin_revert_vms_condition_handler"
+  [(set (reg:DI 0) (match_operand:DI 0 "register_operand"))]
+  "TARGET_ABI_OPEN_VMS"
+  "alpha_expand_builtin_revert_vms_condition_handler (operands[0]);")
+
+;; Finally, we have the basic data motion insns.  The byte and word insns
+;; are done via define_expand.  Start with the floating-point insns, since
+;; they are simpler.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(match_operand:SF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], SFmode))
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
+	(match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
+  "register_operand (operands[0], SFmode)
+   || reg_or_0_operand (operands[1], SFmode)"
+  "@
+   cpys %R1,%R1,%0
+   ld%, %0,%1
+   bis $31,%r1,%0
+   ldl %0,%1
+   st%, %R1,%0
+   stl %r1,%0
+   itofs %1,%0
+   ftois %1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")
+   (set_attr "isa" "*,*,*,*,*,*,fix,fix")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand")
+	(match_operand:DF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], DFmode))
+    operands[1] = force_reg (DFmode, operands[1]);
+})
+
+(define_insn "*movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
+	(match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
+  "register_operand (operands[0], DFmode)
+   || reg_or_0_operand (operands[1], DFmode)"
+  "@
+   cpys %R1,%R1,%0
+   ld%- %0,%1
+   bis $31,%r1,%0
+   ldq %0,%1
+   st%- %R1,%0
+   stq %r1,%0
+   itoft %1,%0
+   ftoit %1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")
+   (set_attr "isa" "*,*,*,*,*,*,fix,fix")])
+
+;; Subregs suck for register allocation.  Pretend we can move TFmode
+;; data between general registers until after reload.
+;; ??? Is this still true now that we have the lower-subreg pass?
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand")
+	(match_operand:TF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], TFmode))
+    operands[1] = force_reg (TFmode, operands[1]);
+})
+
+(define_insn_and_split "*movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o")
+	(match_operand:TF 1 "input_operand" "roG,rG"))]
+  "register_operand (operands[0], TFmode)
+   || reg_or_0_operand (operands[1], TFmode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+  "alpha_split_tmode_pair (operands, TFmode, true);")
+
+;; We do two major things here: handle mem->mem and construct long
+;; constants.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "general_operand"))]
+  ""
+{
+  if (alpha_expand_mov (SImode, operands))
+    DONE;
+})
+
+(define_insn "*movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,m,r")
+	(match_operand:SI 1 "input_operand" "rJ,K,L,n,m,rJ,s"))]
+  "register_operand (operands[0], SImode)
+   || reg_or_0_operand (operands[1], SImode)"
+  "@
+   bis $31,%r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   #
+   ldl %0,%1
+   stl %r1,%0
+   lda %0,%1"
+  [(set_attr "type" "ilog,iadd,iadd,multi,ild,ist,ldsym")
+   (set_attr "isa" "*,*,*,*,*,*,vms")])
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "non_add_const_operand"))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_const_mov (SImode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "*movdi_er_low_l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "local_symbolic_operand")))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+  if (true_regnum (operands[1]) == 29)
+    return "lda %0,%2(%1)\t\t!gprel";
+  else
+    return "lda %0,%2(%1)\t\t!gprellow";
+}
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "small_symbolic_operand"))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(lo_sum:DI (match_dup 2) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "local_symbolic_operand"))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 2) (high:DI (match_dup 1))))
+   (set (match_dup 0)
+	(lo_sum:DI (match_dup 0) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(match_operand 0 "some_small_symbolic_operand")]
+  ""
+  [(match_dup 0)]
+  "operands[0] = split_small_symbolic_operand (operands[0]);")
+
+;; Accepts any symbolic, not just global, since function calls that
+;; don't go via bsr still use !literal in hopes of linker relaxation.
+(define_insn "movdi_er_high_g"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_LITERAL))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldq %0,%2(%1)\t\t!literal";
+  else
+    return "ldq %0,%2(%1)\t\t!literal!%3";
+}
+  [(set_attr "type" "ldsym")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "global_symbolic_operand"))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)
+		    (const_int 0)] UNSPEC_LITERAL))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_insn "movdi_er_tlsgd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_TLSGD))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "lda %0,%2(%1)\t\t!tlsgd";
+  else
+    return "lda %0,%2(%1)\t\t!tlsgd!%3";
+})
+
+(define_insn "movdi_er_tlsldm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand")]
+		   UNSPEC_TLSLDM))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[2]) == 0)
+    return "lda %0,%&(%1)\t\t!tlsldm";
+  else
+    return "lda %0,%&(%1)\t\t!tlsldm!%2";
+})
+
+(define_insn "*movdi_er_gotdtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")]
+		   UNSPEC_DTPREL))]
+  "HAVE_AS_TLS"
+  "ldq %0,%2(%1)\t\t!gotdtprel"
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gotdtp_symbolic_operand"))]
+  "HAVE_AS_TLS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)] UNSPEC_DTPREL))]
+{
+  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
+  operands[2] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi_er_gottp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")]
+		   UNSPEC_TPREL))]
+  "HAVE_AS_TLS"
+  "ldq %0,%2(%1)\t\t!gottprel"
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gottp_symbolic_operand"))]
+  "HAVE_AS_TLS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)] UNSPEC_TPREL))]
+{
+  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
+  operands[2] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+				"=r,r,r,r,r,r,r,r, m, *f,*f, Q, r,*f")
+	(match_operand:DI 1 "input_operand"
+				"rJ,K,L,T,s,n,s,m,rJ,*fJ, Q,*f,*f, r"))]
+  "register_operand (operands[0], DImode)
+   || reg_or_0_operand (operands[1], DImode)"
+  "@
+   mov %r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   #
+   #
+   #
+   lda %0,%1
+   ldq%A1 %0,%1
+   stq%A0 %r1,%0
+   fmov %R1,%0
+   ldt %0,%1
+   stt %R1,%0
+   ftoit %1,%0
+   itoft %1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof")
+   (set_attr "isa" "*,*,*,er,er,*,ner,*,*,*,*,*,fix,fix")
+   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*,*")])
+
+;; VMS needs to set up "vms_base_regno" for unwinding.  This move
+;; often appears dead to the life analysis code, at which point we
+;; die for emitting dead prologue instructions.  Force this live.
+
+(define_insn "force_movdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")]
+			    UNSPECV_FORCE_MOV))]
+  ""
+  "mov %1,%0"
+  [(set_attr "type" "ilog")])
+
+;; We do three major things here: handle mem->mem, put 64-bit constants in
+;; memory, and construct long 32-bit constants.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(match_operand:DI 1 "general_operand"))]
+  ""
+{
+  if (alpha_expand_mov (DImode, operands))
+    DONE;
+})
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "non_add_const_operand"))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_const_mov (DImode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; We need to prevent reload from splitting TImode moves, because it
+;; might decide to overwrite a pointer with the value it points to.
+;; In that case we have to do the loads in the appropriate order so
+;; that the pointer is not destroyed too early.
+
+(define_insn_and_split "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+        (match_operand:TI 1 "input_operand" "roJ,rJ"))]
+  "(register_operand (operands[0], TImode)
+    /* Prevent rematerialization of constants.  */
+    && ! CONSTANT_P (operands[1]))
+   || reg_or_0_operand (operands[1], TImode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+  "alpha_split_tmode_pair (operands, TImode, true);")
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand")
+        (match_operand:TI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], TImode))
+    operands[1] = force_reg (TImode, operands[1]);
+
+  if (operands[1] == const0_rtx)
+    ;
+  /* We must put 64-bit constants in memory.  We could keep the
+     32-bit constants in TImode and rely on the splitter, but
+     this doesn't seem to be worth the pain.  */
+  else if (CONST_INT_P (operands[1])
+	   || GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      rtx in[2], out[2], target;
+
+      gcc_assert (can_create_pseudo_p ());
+
+      split_double (operands[1], &in[0], &in[1]);
+
+      if (in[0] == const0_rtx)
+	out[0] = const0_rtx;
+      else
+	{
+	  out[0] = gen_reg_rtx (DImode);
+	  emit_insn (gen_movdi (out[0], in[0]));
+	}
+
+      if (in[1] == const0_rtx)
+	out[1] = const0_rtx;
+      else
+	{
+	  out[1] = gen_reg_rtx (DImode);
+	  emit_insn (gen_movdi (out[1], in[1]));
+	}
+
+      if (!REG_P (operands[0]))
+	target = gen_reg_rtx (TImode);
+      else
+	target = operands[0];
+
+      emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0]));
+      emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1]));
+
+      if (target != operands[0])
+	emit_insn (gen_rtx_SET (VOIDmode, operands[0], target));
+
+      DONE;
+    }
+})
+
+;; These are the partial-word cases.
+;;
+;; First we have the code to load an aligned word.  Operand 0 is the register
+;; in which to place the result.  It's mode is QImode or HImode.  Operand 1
+;; is an SImode MEM at the low-order byte of the proper word.  Operand 2 is the
+;; number of bits within the word that the value is.  Operand 3 is an SImode
+;; scratch register.  If operand 0 is a hard register, operand 3 may be the
+;; same register.  It is allowed to conflict with operand 1 as well.
+
+(define_expand "aligned_loadqi"
+  [(set (match_operand:SI 3 "register_operand")
+	(match_operand:SI 1 "memory_operand"))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (subreg:DI (match_dup 3) 0)
+			 (const_int 8)
+			 (match_operand:DI 2 "const_int_operand")))])
+
+(define_expand "aligned_loadhi"
+  [(set (match_operand:SI 3 "register_operand")
+	(match_operand:SI 1 "memory_operand"))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (subreg:DI (match_dup 3) 0)
+			 (const_int 16)
+			 (match_operand:DI 2 "const_int_operand")))])
+
+;; Similar for unaligned loads, where we use the sequence from the
+;; Alpha Architecture manual. We have to distinguish between little-endian
+;; and big-endian systems as the sequences are different.
+;;
+;; Operand 1 is the address.  Operands 2 and 3 are temporaries, where
+;; operand 3 can overlap the input and output registers.
+
+(define_expand "unaligned_loadqi"
+  [(set (match_operand:DI 2 "register_operand")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 8)
+			 (ashift:DI (match_dup 3) (const_int 3))))])
+
+(define_expand "unaligned_loadhi"
+  [(set (match_operand:DI 2 "register_operand")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 16)
+			 (ashift:DI (match_dup 3) (const_int 3))))])
+
+;; Storing an aligned byte or word requires two temporaries.  Operand 0 is the
+;; aligned SImode MEM.  Operand 1 is the register containing the
+;; byte or word to store.  Operand 2 is the number of bits within the word that
+;; the value should be placed.  Operands 3 and 4 are SImode temporaries.
+
+(define_expand "aligned_store"
+  [(set (match_operand:SI 3 "register_operand")
+	(match_operand:SI 0 "memory_operand"))
+   (set (subreg:DI (match_dup 3) 0)
+	(and:DI (subreg:DI (match_dup 3) 0) (match_dup 5)))
+   (set (subreg:DI (match_operand:SI 4 "register_operand") 0)
+	(ashift:DI (zero_extend:DI (match_operand 1 "register_operand"))
+		   (match_operand:DI 2 "const_int_operand")))
+   (set (subreg:DI (match_dup 4) 0)
+	(ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0)))
+   (set (match_dup 0) (match_dup 4))]
+  ""
+{
+  operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1]))
+			    << INTVAL (operands[2])));
+})
+
+;; For the unaligned byte and halfword cases, we use code similar to that
+;; in the ;; Architecture book, but reordered to lower the number of registers
+;; required.  Operand 0 is the address.  Operand 1 is the data to store.
+;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may
+;; be the same temporary, if desired.  If the address is in a register,
+;; operand 2 can be that register.
+
+(define_expand "unaligned_store<mode>"
+  [(set (match_operand:DI 3 "register_operand")
+	(mem:DI (and:DI (match_operand:DI 0 "address_operand")
+			(const_int -8))))
+   (set (match_operand:DI 2 "register_operand")
+	(match_dup 0))
+   (set (match_dup 3)
+	(and:DI (not:DI (ashift:DI (match_dup 5)
+				   (ashift:DI (match_dup 2) (const_int 3))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand")
+	(ashift:DI (zero_extend:DI
+		     (match_operand:I12MODE 1 "register_operand"))
+		   (ashift:DI (match_dup 2) (const_int 3))))
+   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
+   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
+	(match_dup 4))]
+  ""
+  "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));")
+
+;; Here are the define_expand's for QI and HI moves that use the above
+;; patterns.  We have the normal sets, plus the ones that need scratch
+;; registers for reload.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:I12MODE 0 "nonimmediate_operand")
+	(match_operand:I12MODE 1 "general_operand"))]
+  ""
+{
+  if (TARGET_BWX
+      ? alpha_expand_mov (<MODE>mode, operands)
+      : alpha_expand_mov_nobwx (<MODE>mode, operands))
+    DONE;
+})
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))]
+  "register_operand (operands[0], QImode)
+   || reg_or_0_operand (operands[1], QImode)"
+  "@
+   bis $31,%r1,%0
+   lda %0,%L1($31)
+   ldbu %0,%1
+   stb %r1,%0"
+  [(set_attr "type" "ilog,iadd,ild,ist")
+   (set_attr "isa" "*,*,bwx,bwx")])
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))]
+  "register_operand (operands[0], HImode)
+   || reg_or_0_operand (operands[1], HImode)"
+  "@
+   bis $31,%r1,%0
+   lda %0,%L1($31)
+   ldwu %0,%1
+   stw %r1,%0"
+  [(set_attr "type" "ilog,iadd,ild,ist")
+   (set_attr "isa" "*,*,bwx,bwx")])
+
+;; We need to hook into the extra support that we have for HImode 
+;; reloads when BWX insns are not available.
+(define_expand "movcqi"
+  [(set (match_operand:CQI 0 "nonimmediate_operand")
+	(match_operand:CQI 1 "general_operand"))]
+  "!TARGET_BWX"
+{
+  if (GET_CODE (operands[0]) == CONCAT || GET_CODE (operands[1]) == CONCAT)
+    ;
+  else if (!any_memory_operand (operands[0], CQImode))
+    {
+      if (!any_memory_operand (operands[1], CQImode))
+	{
+	  emit_move_insn (gen_lowpart (HImode, operands[0]),
+			  gen_lowpart (HImode, operands[1]));
+	  DONE;
+	}
+      if (aligned_memory_operand (operands[1], CQImode))
+	{
+	  bool done;
+	do_aligned1:
+	  operands[1] = gen_lowpart (HImode, operands[1]);
+	do_aligned2:
+	  operands[0] = gen_lowpart (HImode, operands[0]);
+	  done = alpha_expand_mov_nobwx (HImode, operands);
+	  gcc_assert (done);
+	  DONE;
+	}
+    }
+  else if (aligned_memory_operand (operands[0], CQImode))
+    {
+      if (MEM_P (operands[1]))
+	{
+	  rtx x = gen_reg_rtx (HImode);
+	  emit_move_insn (gen_lowpart (CQImode, x), operands[1]);
+	  operands[1] = x;
+	  goto do_aligned2;
+	}
+      goto do_aligned1;
+    }
+
+  gcc_assert (!reload_in_progress);
+  emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
+;; Here are the versions for reload.
+;; 
+;; The aligned input case is recognized early in alpha_secondary_reload
+;; in order to avoid allocating an unnecessary scratch register.
+;; 
+;; Note that in the unaligned cases we know that the operand must not be
+;; a pseudo-register because stack slots are always aligned references.
+
+(define_expand "reload_in<mode>"
+  [(parallel [(match_operand:RELOAD12 0 "register_operand" "=r")
+	      (match_operand:RELOAD12 1 "any_memory_operand" "m")
+	      (match_operand:TI 2 "register_operand" "=&r")])]
+  "!TARGET_BWX"
+{
+  rtx scratch, seq, addr;
+  unsigned regno = REGNO (operands[2]);
+
+  /* It is possible that one of the registers we got for operands[2]
+     might coincide with that of operands[0] (which is why we made
+     it TImode).  Pick the other one to use as our scratch.  */
+  if (regno == REGNO (operands[0]))
+    regno++;
+  scratch = gen_rtx_REG (DImode, regno);
+
+  addr = get_unaligned_address (operands[1]);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  seq = gen_unaligned_load<reloadmode> (operands[0], addr,
+					scratch, operands[0]);
+  alpha_set_memflags (seq, operands[1]);
+
+  emit_insn (seq);
+  DONE;
+})
+
+(define_expand "reload_out<mode>"
+  [(parallel [(match_operand:RELOAD12 0 "any_memory_operand" "=m")
+	      (match_operand:RELOAD12 1 "register_operand" "r")
+	      (match_operand:TI 2 "register_operand" "=&r")])]
+  "!TARGET_BWX"
+{
+  unsigned regno = REGNO (operands[2]);
+
+  if (<MODE>mode == CQImode)
+    {
+      operands[0] = gen_lowpart (HImode, operands[0]);
+      operands[1] = gen_lowpart (HImode, operands[1]);
+    }
+
+  if (aligned_memory_operand (operands[0], <MODE>mode))
+    {
+      emit_insn (gen_reload_out<reloadmode>_aligned
+		 (operands[0], operands[1],
+		  gen_rtx_REG (SImode, regno),
+		  gen_rtx_REG (SImode, regno + 1)));
+    }
+  else
+    {
+      rtx addr = get_unaligned_address (operands[0]);
+      rtx scratch1 = gen_rtx_REG (DImode, regno);
+      rtx scratch2 = gen_rtx_REG (DImode, regno + 1);
+      rtx scratch3 = scratch1;
+      rtx seq;
+
+      if (REG_P (addr))
+	scratch1 = addr;
+
+      seq = gen_unaligned_store<reloadmode> (addr, operands[1], scratch1,
+					     scratch2, scratch3);
+      alpha_set_memflags (seq, operands[0]);
+      emit_insn (seq);
+    }
+  DONE;
+})
+
+;; Helpers for the above.  The way reload is structured, we can't
+;; always get a proper address for a stack slot during reload_foo
+;; expansion, so we must delay our address manipulations until after.
+
+(define_insn_and_split "reload_in<mode>_aligned"
+  [(set (match_operand:I12MODE 0 "register_operand" "=r")
+        (match_operand:I12MODE 1 "memory_operand" "m"))]
+  "!TARGET_BWX && (reload_in_progress || reload_completed)"
+  "#"
+  "!TARGET_BWX && reload_completed"
+  [(const_int 0)]
+{
+  rtx aligned_mem, bitnum;
+  get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+  emit_insn (gen_aligned_load<reloadmode>
+	     (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum,
+	      gen_rtx_REG (SImode, REGNO (operands[0]))));
+  DONE;
+})
+
+(define_insn_and_split "reload_out<mode>_aligned"
+  [(set (match_operand:I12MODE 0 "memory_operand" "=m")
+        (match_operand:I12MODE 1 "register_operand" "r"))
+   (clobber (match_operand:SI 2 "register_operand" "=r"))
+   (clobber (match_operand:SI 3 "register_operand" "=r"))]
+  "!TARGET_BWX && (reload_in_progress || reload_completed)"
+  "#"
+  "!TARGET_BWX && reload_completed"
+  [(const_int 0)]
+{
+  rtx aligned_mem, bitnum;
+  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+				operands[2], operands[3]));
+  DONE;
+})
+
+;; Vector operations
+
+(define_mode_iterator VEC [V8QI V4HI V2SI])
+(define_mode_iterator VEC12 [V8QI V4HI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand")
+        (match_operand:VEC 1 "general_operand"))]
+  ""
+{
+  if (alpha_expand_mov (<MODE>mode, operands))
+    DONE;
+})
+
+(define_split
+  [(set (match_operand:VEC 0 "register_operand")
+	(match_operand:VEC 1 "non_zero_const_operand"))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_const_mov (<MODE>mode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand")
+        (match_operand:VEC 1 "general_operand"))]
+  ""
+{
+  alpha_expand_movmisalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_fix"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f")
+	(match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))]
+  "register_operand (operands[0], <MODE>mode)
+   || reg_or_0_operand (operands[1], <MODE>mode)"
+  "@
+   bis $31,%r1,%0
+   #
+   ldq %0,%1
+   stq %r1,%0
+   cpys %R1,%R1,%0
+   ldt %0,%1
+   stt %R1,%0
+   ftoit %1,%0
+   itoft %1,%0"
+  [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof")
+   (set_attr "isa" "*,*,*,*,*,*,*,fix,fix")])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:VEC12 0 "register_operand" "=r")
+	(any_maxmin:VEC12
+	 (match_operand:VEC12 1 "reg_or_0_operand" "rW")
+	 (match_operand:VEC12 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "<maxmin><modesuffix> %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(not:VEC (match_operand:VEC 1 "register_operand" "r")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(and:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "and %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*andnot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r"))
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "bic %2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(ior:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "bis %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r"))
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "ornot %2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(xor:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "xor %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r")
+			  (match_operand:VEC 2 "register_operand" "r"))))]
+  ""
+  "eqv %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_operand:VEC 0 "register_operand")
+	(ashift:DI (match_operand:VEC 1 "register_operand")
+		   (match_operand:DI 2 "reg_or_6bit_operand")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:VEC 0 "register_operand")
+        (lshiftrt:DI (match_operand:VEC 1 "register_operand")
+                     (match_operand:DI 2 "reg_or_6bit_operand")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+;; Bit field extract patterns which use ext[wlq][lh]
+
+(define_expand "extvmisaligndi"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extract:DI (match_operand:BLK 1 "memory_operand")
+			 (match_operand:DI 2 "const_int_operand")
+			 (match_operand:DI 3 "const_int_operand")))]
+  ""
+{
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+
+  alpha_expand_unaligned_load (operands[0], operands[1],
+			       INTVAL (operands[2]) / 8,
+			       INTVAL (operands[3]) / 8, 1);
+  DONE;
+})
+
+(define_expand "extzvdi"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_operand:DI 1 "register_operand")
+			 (match_operand:DI 2 "const_int_operand")
+			 (match_operand:DI 3 "const_int_operand")))]
+  ""
+{
+  /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 8
+          && INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+})
+
+(define_expand "extzvmisaligndi"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_operand:BLK 1 "memory_operand")
+			 (match_operand:DI 2 "const_int_operand")
+			 (match_operand:DI 3 "const_int_operand")))]
+  ""
+{
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.
+     We fail 8-bit fields, falling back on a simple byte load.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+
+  alpha_expand_unaligned_load (operands[0], operands[1],
+			       INTVAL (operands[2]) / 8,
+			       INTVAL (operands[3]) / 8, 0);
+  DONE;
+})
+
+(define_expand "insvmisaligndi"
+  [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand")
+			 (match_operand:DI 1 "const_int_operand")
+			 (match_operand:DI 2 "const_int_operand"))
+	(match_operand:DI 3 "register_operand"))]
+  ""
+{
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[2]) % 8 != 0
+      || (INTVAL (operands[1]) != 16
+	  && INTVAL (operands[1]) != 32
+	  && INTVAL (operands[1]) != 64))
+    FAIL;
+
+  alpha_expand_unaligned_store (operands[0], operands[3],
+				INTVAL (operands[1]) / 8,
+				INTVAL (operands[2]) / 8);
+  DONE;
+})
+
+;; Block move/clear, see alpha.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemqi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+		   (match_operand:BLK 1 "memory_operand"))
+	      (use (match_operand:DI 2 "immediate_operand"))
+	      (use (match_operand:DI 3 "immediate_operand"))])]
+  ""
+{
+  if (alpha_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "movmemdi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+		   (match_operand:BLK 1 "memory_operand"))
+	      (use (match_operand:DI 2 "immediate_operand"))
+	      (use (match_operand:DI 3 "immediate_operand"))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 25))
+	      (clobber (reg:DI 16))
+	      (clobber (reg:DI 17))
+	      (clobber (reg:DI 18))
+	      (clobber (reg:DI 19))
+	      (clobber (reg:DI 20))
+	      (clobber (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  "TARGET_ABI_OPEN_VMS"
+  "operands[4] = gen_rtx_SYMBOL_REF (Pmode, \"OTS$MOVE\");")
+
+(define_insn "*movmemdi_1"
+  [(set (match_operand:BLK 0 "memory_operand" "=m,=m")
+	(match_operand:BLK 1 "memory_operand" "m,m"))
+   (use (match_operand:DI 2 "nonmemory_operand" "r,i"))
+   (use (match_operand:DI 3 "immediate_operand"))
+   (use (match_operand:DI 4 "call_operand" "i,i"))
+   (clobber (reg:DI 25))
+   (clobber (reg:DI 16))
+   (clobber (reg:DI 17))
+   (clobber (reg:DI 18))
+   (clobber (reg:DI 19))
+   (clobber (reg:DI 20))
+   (clobber (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  operands [5] = alpha_use_linkage (operands [4], false, true);
+  switch (which_alternative)
+    {
+    case 0:
+	return "lda $16,%0\;bis $31,%2,$17\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)";
+    case 1:
+	return "lda $16,%0\;lda $17,%2($31)\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "28")])
+
+(define_expand "setmemqi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+		   (match_operand 2 "const_int_operand"))
+	      (use (match_operand:DI 1 "immediate_operand"))
+	      (use (match_operand:DI 3 "immediate_operand"))])]
+  ""
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (alpha_expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "setmemdi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+		   (match_operand 2 "const_int_operand"))
+	      (use (match_operand:DI 1 "immediate_operand"))
+	      (use (match_operand:DI 3 "immediate_operand"))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 25))
+	      (clobber (reg:DI 16))
+	      (clobber (reg:DI 17))
+	      (clobber (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  "TARGET_ABI_OPEN_VMS"
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  operands[4] = gen_rtx_SYMBOL_REF (Pmode, "OTS$ZERO");
+})
+
+(define_insn "*clrmemdi_1"
+  [(set (match_operand:BLK 0 "memory_operand" "=m,=m")
+		   (const_int 0))
+   (use (match_operand:DI 1 "nonmemory_operand" "r,i"))
+   (use (match_operand:DI 2 "immediate_operand"))
+   (use (match_operand:DI 3 "call_operand" "i,i"))
+   (clobber (reg:DI 25))
+   (clobber (reg:DI 16))
+   (clobber (reg:DI 17))
+   (clobber (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  operands [4] = alpha_use_linkage (operands [3], false, true);
+  switch (which_alternative)
+    {
+    case 0:
+	return "lda $16,%0\;bis $31,%1,$17\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)";
+    case 1:
+	return "lda $16,%0\;lda $17,%1($31)\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+
+;; Subroutine of stack space allocation.  Perform a stack probe.
+(define_expand "probe_stack"
+  [(set (match_dup 1) (match_operand:DI 0 "const_int_operand"))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (DImode, plus_constant (Pmode, stack_pointer_rtx,
+						    INTVAL (operands[0])));
+  MEM_VOLATILE_P (operands[1]) = 1;
+
+  operands[0] = const0_rtx;
+})
+
+;; This is how we allocate stack space.  If we are allocating a
+;; constant amount of space and we know it is less than 4096
+;; bytes, we need do nothing.
+;;
+;; If it is more than 4096 bytes, we need to probe the stack
+;; periodically.
+(define_expand "allocate_stack"
+  [(set (reg:DI 30)
+	(plus:DI (reg:DI 30)
+		 (match_operand:DI 1 "reg_or_cint_operand")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_dup 2))]
+  ""
+{
+  if (CONST_INT_P (operands[1])
+      && INTVAL (operands[1]) < 32768)
+    {
+      if (INTVAL (operands[1]) >= 4096)
+	{
+	  /* We do this the same way as in the prologue and generate explicit
+	     probes.  Then we update the stack by the constant.  */
+
+	  int probed = 4096;
+
+	  emit_insn (gen_probe_stack (GEN_INT (- probed)));
+	  while (probed + 8192 < INTVAL (operands[1]))
+	    emit_insn (gen_probe_stack (GEN_INT (- (probed += 8192))));
+
+	  if (probed + 4096 < INTVAL (operands[1]))
+	    emit_insn (gen_probe_stack (GEN_INT (- INTVAL(operands[1]))));
+	}
+
+      operands[1] = GEN_INT (- INTVAL (operands[1]));
+      operands[2] = virtual_stack_dynamic_rtx;
+    }
+  else
+    {
+      rtx out_label = 0;
+      rtx loop_label = gen_label_rtx ();
+      rtx want = gen_reg_rtx (Pmode);
+      rtx tmp = gen_reg_rtx (Pmode);
+      rtx memref, test;
+
+      emit_insn (gen_subdi3 (want, stack_pointer_rtx,
+			     force_reg (Pmode, operands[1])));
+
+      if (!CONST_INT_P (operands[1]))
+	{
+	  rtx limit = GEN_INT (4096);
+	  out_label = gen_label_rtx ();
+	  test = gen_rtx_LTU (VOIDmode, operands[1], limit);
+	  emit_jump_insn
+	    (gen_cbranchdi4 (test, operands[1], limit, out_label));
+	}
+
+      emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096)));
+      emit_label (loop_label);
+      memref = gen_rtx_MEM (DImode, tmp);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_move_insn (memref, const0_rtx);
+      emit_insn (gen_adddi3 (tmp, tmp, GEN_INT(-8192)));
+      test = gen_rtx_GTU (VOIDmode, tmp, want);
+      emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label));
+
+      memref = gen_rtx_MEM (DImode, want);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_move_insn (memref, const0_rtx);
+
+      if (out_label)
+	emit_label (out_label);
+
+      emit_move_insn (stack_pointer_rtx, want);
+      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+})
+
+;; This is used by alpha_expand_prolog to do the same thing as above,
+;; except we cannot at that time generate new basic blocks, so we hide
+;; the loop in this one insn.
+
+(define_insn "prologue_stack_probe_loop"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+		     (match_operand:DI 1 "register_operand" "r")]
+		    UNSPECV_PSPL)]
+  ""
+{
+  operands[2] = gen_label_rtx ();
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+			     CODE_LABEL_NUMBER (operands[2]));
+
+  return "stq $31,-8192(%1)\;subq %0,1,%0\;lda %1,-8192(%1)\;bne %0,%l2";
+}
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  alpha_expand_prologue ();
+  DONE;
+})
+
+;; These take care of emitting the ldgp insn in the prologue. This will be
+;; an lda/ldah pair and we want to align them properly.  So we have two
+;; unspec_volatile insns, the first of which emits the ldgp assembler macro
+;; and the second of which emits nothing.  However, both are marked as type
+;; IADD (the default) so the alignment code in alpha.c does the right thing
+;; with them.
+
+(define_expand "prologue_ldgp"
+  [(set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))]
+  ""
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 27);
+  operands[2] = (TARGET_EXPLICIT_RELOCS
+		 ? GEN_INT (alpha_next_sequence_number++)
+		 : const0_rtx);
+})
+
+(define_insn "*ldgp_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_LDGP1))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "ldah %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand")]
+		   UNSPEC_LDGP2))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "lda %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+		   	    UNSPECV_PLDGP2))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "lda %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_LDGP1))]
+  ""
+  "ldgp %0,0(%1)\n$%~..ng:"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+		   	    UNSPECV_PLDGP2))]
+  ""
+ )
+
+;; The _mcount profiling hook has special calling conventions, and
+;; does not clobber all the registers that a normal call would.  So
+;; hide the fact this is a call at all.
+
+(define_insn "prologue_mcount"
+  [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)]
+  ""
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    /* Note that we cannot use a lituse_jsr reloc, since _mcount
+       cannot be called via the PLT.  */
+    return "ldq $28,_mcount($29)\t\t!literal\;jsr $28,($28),_mcount";
+  else
+    return "lda $28,_mcount\;jsr $28,($28),_mcount";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "init_fp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (match_operand:DI 1 "register_operand" "r"))
+   (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))]
+  ""
+  "bis $31,%1,%0")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "alpha_expand_epilogue ();")
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  "TARGET_ABI_OSF"
+{
+  alpha_expand_epilogue ();
+  DONE;
+})
+
+(define_expand "builtin_longjmp"
+  [(use (match_operand:DI 0 "register_operand" "r"))]
+  "TARGET_ABI_OSF"
+{
+  /* The elements of the buffer are, in order:  */
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 8));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 16));
+  rtx pv = gen_rtx_REG (Pmode, 27);
+
+  /* This bit is the same as expand_builtin_longjmp.  */
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+  emit_move_insn (pv, lab);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* Load the label we are jumping through into $27 so that we know
+     where to look for it when we get back to setjmp's function for
+     restoring the gp.  */
+  emit_jump_insn (gen_builtin_longjmp_internal (pv));
+  emit_barrier ();
+  DONE;
+})
+
+;; This is effectively a copy of indirect_jump, but constrained such
+;; that register renaming cannot foil our cunning plan with $27.
+(define_insn "builtin_longjmp_internal"
+  [(set (pc)
+	(unspec_volatile [(match_operand:DI 0 "register_operand" "c")]
+			 UNSPECV_LONGJMP))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(unspec_volatile [(label_ref (match_operand 0))] UNSPECV_SETJMPR)]
+  "TARGET_ABI_OSF")
+
+(define_insn_and_split "*builtin_setjmp_receiver_1"
+  [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    return "#";
+  else
+    return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)";
+}
+  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 1)
+	(unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1))
+   (set (match_dup 1)
+	(unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))]
+{
+  if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0))
+    emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]),
+					UNSPECV_SETJMPR_ER));
+  operands[1] = pic_offset_table_rtx;
+  operands[2] = gen_rtx_REG (Pmode, 27);
+  operands[3] = GEN_INT (alpha_next_sequence_number++);
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*builtin_setjmp_receiver_er_sl_1"
+  [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR_ER)]
+  "TARGET_ABI_OSF && TARGET_EXPLICIT_RELOCS"
+  "lda $27,$LSJ%=-%l0($27)\n$LSJ%=:")
+  
+;; When flag_reorder_blocks_and_partition is in effect, compiler puts
+;; exception landing pads in a cold section.  To prevent inter-section offset
+;; calculation, a jump to original landing pad is emitted in the place of the
+;; original landing pad.  Since landing pad is moved, RA-relative GP
+;; calculation in the prologue of landing pad breaks.  To solve this problem,
+;; we use alternative GP load approach.
+
+(define_expand "exception_receiver"
+  [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)]
+  "TARGET_ABI_OSF"
+{
+  if (flag_reorder_blocks_and_partition)
+    operands[0] = alpha_gp_save_rtx ();
+  else
+    operands[0] = const0_rtx;
+})
+
+(define_insn "*exception_receiver_2"
+  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)]
+  "TARGET_ABI_OSF && flag_reorder_blocks_and_partition"
+  "ldq $29,%0"
+  [(set_attr "type" "ild")])
+
+(define_insn_and_split "*exception_receiver_1"
+  [(unspec_volatile [(const_int 0)] UNSPECV_EHR)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    return "ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*";
+  else
+    return "ldgp $29,0($26)";
+}
+  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec:DI [(match_dup 0) (match_dup 2)] UNSPEC_LDGP2))]
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 26);
+  operands[2] = GEN_INT (alpha_next_sequence_number++);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+(define_expand "nonlocal_goto_receiver"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
+   (set (reg:DI 27) (mem:DI (reg:DI 29)))
+   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
+   (use (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS")
+
+(define_insn "arg_home"
+  [(unspec [(const_int 0)] UNSPEC_ARG_HOME)
+   (use (reg:DI 1))
+   (use (reg:DI 25))
+   (use (reg:DI 16))
+   (use (reg:DI 17))
+   (use (reg:DI 18))
+   (use (reg:DI 19))
+   (use (reg:DI 20))
+   (use (reg:DI 21))
+   (use (reg:DI 48))
+   (use (reg:DI 49))
+   (use (reg:DI 50))
+   (use (reg:DI 51))
+   (use (reg:DI 52))
+   (use (reg:DI 53))
+   (clobber (mem:BLK (const_int 0)))
+   (clobber (reg:DI 24))
+   (clobber (reg:DI 25))
+   (clobber (reg:DI 0))]
+  "TARGET_ABI_OPEN_VMS"
+  "lda $0,OTS$HOME_ARGS\;ldq $0,8($0)\;jsr $0,OTS$HOME_ARGS"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+;; Prefetch data.  
+;;
+;; On EV4, these instructions are nops -- no load occurs.
+;;
+;; On EV5, these instructions act as a normal load, and thus can trap
+;; if the address is invalid.  The OS may (or may not) handle this in
+;; the entMM fault handler and suppress the fault.  If so, then this
+;; has the effect of a read prefetch instruction.
+;;
+;; On EV6, these become official prefetch instructions.
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  "TARGET_FIXUP_EV5_PREFETCH || alpha_cpu == PROCESSOR_EV6"
+{
+  /* Interpret "no temporal locality" as this data should be evicted once
+     it is used.  The "evict next" alternatives load the data into the cache
+     and leave the LRU eviction counter pointing to that block.  */
+  static const char * const alt[2][2] = {
+    { 
+      "ldq $31,%a0",		/* read, evict next */
+      "ldl $31,%a0",		/* read, evict last */
+    },
+    {
+      "ldt $f31,%a0",		/* write, evict next */
+      "lds $f31,%a0",		/* write, evict last */
+    }
+  };
+
+  bool write = INTVAL (operands[1]) != 0;
+  bool lru = INTVAL (operands[2]) != 0;
+
+  return alt[write][lru];
+}
+  [(set_attr "type" "ild")])
+
+;; Close the trap shadow of preceding instructions.  This is generated
+;; by alpha_reorg.
+
+(define_insn "trapb"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)]
+  ""
+  "trapb"
+  [(set_attr "type" "misc")])
+
+;; No-op instructions used by machine-dependent reorg to preserve
+;; alignment for instruction issue.
+;; The Unicos/Mk assembler does not support these opcodes.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "bis $31,$31,$31"
+  [(set_attr "type" "ilog")])
+
+(define_insn "fnop"
+  [(const_int 1)]
+  "TARGET_FP"
+  "cpys $f31,$f31,$f31"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "unop"
+  [(const_int 2)]
+  ""
+  "ldq_u $31,0($30)")
+
+(define_insn "realign"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
+		    UNSPECV_REALIGN)]
+  ""
+  ".align %0 #realign")
+
+;; Instructions to be emitted from __builtins.
+
+(define_insn "builtin_cmpbge"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_CMPBGE))]
+  ""
+  "cmpbge %r1,%2,%0"
+  ;; The EV6 data sheets list this as ILOG.  OTOH, EV6 doesn't 
+  ;; actually differentiate between ILOG and ICMP in the schedule.
+  [(set_attr "type" "icmp")])
+
+(define_expand "extbl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (8), operands[2]));
+  DONE;
+})
+
+(define_expand "extwl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "extll"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "extql"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insbl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  operands[1] = gen_lowpart (QImode, operands[1]);
+  emit_insn (gen_insbl (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_inswl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  operands[1] = gen_lowpart (HImode, operands[1]);
+  emit_insn (gen_inswl (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insll"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  emit_insn (gen_insll (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "inswh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "inslh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "insqh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "mskbl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = GEN_INT (0xff);
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskwl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = GEN_INT (0xffff);
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskll"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = immed_double_const (0xffffffff, 0, DImode);
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskql"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = constm1_rtx;
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskwh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "msklh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "mskqh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_zap"
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(match_operand:DI 2 "reg_or_cint_operand")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx mask = alpha_expand_zap_mask (INTVAL (operands[2]));
+
+      if (mask == const0_rtx)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      if (mask == constm1_rtx)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_insn "*builtin_zap_1"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))]
+  ""
+  "@
+   #
+   #
+   bis $31,$31,%0
+   zap %r1,%2,%0"
+  [(set_attr "type" "shift,shift,ilog,shift")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "const_int_operand")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "const_int_operand")))]
+  ""
+  [(const_int 0)]
+{
+  rtx mask = alpha_expand_zap_mask (INTVAL (operands[2]));
+  if (HOST_BITS_PER_WIDE_INT >= 64 || CONST_INT_P (mask))
+    operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode);
+  else
+    {
+      HOST_WIDE_INT c_lo = INTVAL (operands[1]);
+      HOST_WIDE_INT c_hi = (c_lo < 0 ? -1 : 0);
+      operands[1] = immed_double_const (c_lo & CONST_DOUBLE_LOW (mask),
+					c_hi & CONST_DOUBLE_HIGH (mask),
+					DImode);
+    }
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "const_int_operand")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "register_operand")))]
+  ""
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 2)))]
+{
+  operands[2] = alpha_expand_zap_mask (INTVAL (operands[2]));
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      DONE;
+    }
+  if (operands[2] == constm1_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_expand "builtin_zapnot"
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(not:QI (match_operand:DI 2 "reg_or_cint_operand"))]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx mask = alpha_expand_zap_mask (~ INTVAL (operands[2]));
+
+      if (mask == const0_rtx)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      if (mask == constm1_rtx)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_insn "*builtin_zapnot_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (unspec:DI
+                  [(not:QI (match_operand:QI 2 "register_operand" "r"))]
+                  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  ""
+  "zapnot %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "builtin_amask"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_AMASK))]
+  ""
+  "amask %1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "builtin_implver"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec:DI [(const_int 0)] UNSPEC_IMPLVER))]
+  ""
+  "implver %0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "builtin_rpcc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))]
+  ""
+  "rpcc %0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "builtin_minub8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minsb8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minuw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minsw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxub8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxsb8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxuw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxsw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "builtin_perr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rJ")]
+		   UNSPEC_PERR))]
+  "TARGET_MAX"
+  "perr %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_pklb"
+  [(set (match_operand:DI 0 "register_operand")
+	(vec_concat:V8QI
+	  (vec_concat:V4QI
+	    (truncate:V2QI (match_operand:DI 1 "register_operand"))
+	    (match_dup 2))
+	  (match_dup 3)))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V8QImode, operands[0]);
+  operands[1] = gen_lowpart (V2SImode, operands[1]);
+  operands[2] = CONST0_RTX (V2QImode);
+  operands[3] = CONST0_RTX (V4QImode);
+})
+
+(define_insn "*pklb"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	  (vec_concat:V4QI
+	    (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r"))
+	    (match_operand:V2QI 2 "const0_operand"))
+	  (match_operand:V4QI 3 "const0_operand")))]
+  "TARGET_MAX"
+  "pklb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_pkwb"
+  [(set (match_operand:DI 0 "register_operand")
+	(vec_concat:V8QI
+	  (truncate:V4QI (match_operand:DI 1 "register_operand"))
+	  (match_dup 2)))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V8QImode, operands[0]);
+  operands[1] = gen_lowpart (V4HImode, operands[1]);
+  operands[2] = CONST0_RTX (V4QImode);
+})
+
+(define_insn "*pkwb"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	  (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r"))
+	  (match_operand:V4QI 2 "const0_operand")))]
+  "TARGET_MAX"
+  "pkwb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_unpkbl"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:V2SI
+	  (vec_select:V2QI (match_operand:DI 1 "register_operand")
+			   (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V2SImode, operands[0]);
+  operands[1] = gen_lowpart (V8QImode, operands[1]);
+})
+
+(define_insn "*unpkbl"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(zero_extend:V2SI
+	  (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+			   (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_MAX"
+  "unpkbl %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_unpkbw"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:V4HI
+	  (vec_select:V4QI (match_operand:DI 1 "register_operand")
+			   (parallel [(const_int 0)
+				      (const_int 1)
+				      (const_int 2)
+				      (const_int 3)]))))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V4HImode, operands[0]);
+  operands[1] = gen_lowpart (V8QImode, operands[1]);
+})
+
+(define_insn "*unpkbw"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(zero_extend:V4HI
+	  (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+			   (parallel [(const_int 0)
+				      (const_int 1)
+				      (const_int 2)
+				      (const_int 3)]))))]
+  "TARGET_MAX"
+  "unpkbw %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(include "sync.md")
+
+;; The call patterns are at the end of the file because their
+;; wildcard operand0 interferes with nice recognition.
+
+(define_insn "*call_value_osf_1_er_noreturn"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,%1\t\t!samegp
+   ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),%1\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_value_osf_1_er"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,(%1),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%1\t\t!samegp
+   ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.  Consider the case of { bar(); while (1); }.
+(define_peephole2
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand:DI 1 "call_operand"))
+		         (match_operand 2)))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[1], Pmode)
+   && (peep2_regno_dead_p (1, 29)
+       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (match_dup 2)))
+	      (use (reg:DI 29))
+	      (use (match_dup 1))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 26))])]
+{
+  if (CONSTANT_P (operands[1]))
+    {
+      operands[3] = gen_rtx_REG (Pmode, 27);
+      operands[4] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
+				      operands[1], operands[4]));
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[1] = const0_rtx;
+      operands[4] = const0_rtx;
+    }
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand:DI 1 "call_operand"))
+		         (match_operand 2)))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[1], Pmode)
+   && ! (peep2_regno_dead_p (1, 29)
+         || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (match_dup 2)))
+	      (set (match_dup 6)
+		   (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (match_dup 5))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 6)
+	(unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  if (CONSTANT_P (operands[1]))
+    {
+      operands[3] = gen_rtx_REG (Pmode, 27);
+      operands[5] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
+				      operands[1], operands[5]));
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[1] = const0_rtx;
+      operands[5] = const0_rtx;
+    }
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  operands[6] = pic_offset_table_rtx;
+})
+
+(define_insn "*call_value_osf_2_er_nogp"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (use (match_operand 3))
+   (use (match_operand 4))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%1),%3%J4"
+  [(set_attr "type" "jsr")])
+
+(define_insn "*call_value_osf_2_er"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
+	      (match_operand 2)))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 3))
+   (use (match_operand 4))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%1),%3%J4\;ldah $29,0($26)\t\t!gpdisp!%5"
+  [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_osf_1_noreturn"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,$%1..ng
+   jsr $26,%1"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn_and_split "call_value_osf_tlsgd"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+	      (const_int 0)))
+   (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSGD_CALL)
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "HAVE_AS_TLS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(unspec:DI [(match_dup 5)
+		    (match_dup 1)
+		    (match_dup 2)] UNSPEC_LITERAL))
+   (parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (const_int 0)))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  operands[3] = gen_rtx_REG (Pmode, 27);
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "call_value_osf_tlsldm"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+	      (const_int 0)))
+   (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSLDM_CALL)
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "HAVE_AS_TLS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(unspec:DI [(match_dup 5)
+		    (match_dup 1)
+		    (match_dup 2)] UNSPEC_LITERAL))
+   (parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (const_int 0)))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  operands[3] = gen_rtx_REG (Pmode, 27);
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn "*call_value_osf_1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%1..ng
+   jsr $26,%1\;ldgp $29,0($26)"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*sibcall_value_osf_1_er"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
+	      (match_operand 2)))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,%1\t\t!samegp
+   ldq $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+(define_insn "*sibcall_value_osf_1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
+	      (match_operand 2)))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,$%1..ng
+   lda $27,%1\;jmp $31,($27),%1"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+; GAS relies on the order and position of instructions output below in order
+; to generate relocs for VMS link to potentially optimize the call.
+; Please do not molest.
+(define_insn "*call_value_vms_1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "r,s"))
+	      (match_operand 2)))
+   (use (match_operand:DI 3 "nonmemory_operand" "r,n"))
+   (use (reg:DI 25))
+   (use (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  switch (which_alternative)
+    {
+    case 0:
+   	return "mov %3,$27\;jsr $26,0\;ldq $27,0($29)";
+    case 1:
+	operands [3] = alpha_use_linkage (operands [1], true, false);
+	operands [4] = alpha_use_linkage (operands [1], false, false);
+   	return "ldq $26,%4\;ldq $27,%3\;jsr $26,%1\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,16")])
diff --git a/gcc-4.9/gcc/config/alpha/alpha.opt b/gcc-4.9/gcc/config/alpha/alpha.opt
new file mode 100644
index 000000000..dc937ac66
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/alpha.opt
@@ -0,0 +1,130 @@
+; Options for the DEC Alpha port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msoft-float
+Target Report Mask(SOFT_FP)
+Do not use hardware fp
+
+mfp-regs
+Target Report Mask(FPREGS)
+Use fp registers
+
+mgas
+Target Ignore
+Does nothing. Preserved for backward compatibility.
+
+mieee-conformant
+Target RejectNegative Mask(IEEE_CONFORMANT)
+Request IEEE-conformant math library routines (OSF/1)
+
+mieee
+Target Report RejectNegative Mask(IEEE)
+Emit IEEE-conformant code, without inexact exceptions
+
+mieee-with-inexact
+Target Report RejectNegative Mask(IEEE_WITH_INEXACT)
+
+mbuild-constants
+Target Report Mask(BUILD_CONSTANTS)
+Do not emit complex integer constants to read-only memory
+
+mfloat-vax
+Target Report RejectNegative Mask(FLOAT_VAX)
+Use VAX fp
+
+mfloat-ieee
+Target Report RejectNegative InverseMask(FLOAT_VAX)
+Do not use VAX fp
+
+mbwx
+Target Report Mask(BWX)
+Emit code for the byte/word ISA extension
+
+mmax
+Target Report Mask(MAX)
+Emit code for the motion video ISA extension
+
+mfix
+Target Report Mask(FIX)
+Emit code for the fp move and sqrt ISA extension
+
+mcix
+Target Report Mask(CIX)
+Emit code for the counting ISA extension
+
+mexplicit-relocs
+Target Report Mask(EXPLICIT_RELOCS)
+Emit code using explicit relocation directives
+
+msmall-data
+Target Report RejectNegative Mask(SMALL_DATA)
+Emit 16-bit relocations to the small data areas
+
+mlarge-data
+Target Report RejectNegative InverseMask(SMALL_DATA)
+Emit 32-bit relocations to the small data areas
+
+msmall-text
+Target Report RejectNegative Mask(SMALL_TEXT)
+Emit direct branches to local functions
+
+mlarge-text
+Target Report RejectNegative InverseMask(SMALL_TEXT)
+Emit indirect branches to local functions
+
+mtls-kernel
+Target Report Mask(TLS_KERNEL)
+Emit rdval instead of rduniq for thread pointer
+
+mlong-double-128
+Target Report RejectNegative Mask(LONG_DOUBLE_128)
+Use 128-bit long double
+
+mlong-double-64
+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double
+
+mcpu=
+Target RejectNegative Joined Var(alpha_cpu_string)
+Use features of and schedule given CPU
+
+mtune=
+Target RejectNegative Joined Var(alpha_tune_string)
+Schedule given CPU
+
+mfp-rounding-mode=
+Target RejectNegative Joined Var(alpha_fprm_string)
+Control the generated fp rounding mode
+
+mfp-trap-mode=
+Target RejectNegative Joined Var(alpha_fptm_string)
+Control the IEEE trap mode
+
+mtrap-precision=
+Target RejectNegative Joined Var(alpha_tp_string)
+Control the precision given to fp exceptions
+
+mmemory-latency=
+Target RejectNegative Joined Var(alpha_mlat_string)
+Tune expected memory latency
+
+mtls-size=
+Target RejectNegative Joined UInteger Var(alpha_tls_size) Init(32)
+Specify bit size of immediate TLS offsets
diff --git a/gcc-4.9/gcc/config/alpha/constraints.md b/gcc-4.9/gcc/config/alpha/constraints.md
new file mode 100644
index 000000000..e67c9a9a0
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/constraints.md
@@ -0,0 +1,120 @@
+;; Constraint definitions for DEC Alpha.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;    ABCDEF               V  YZ
+;;;       de ghijkl   pq  tu wxyz
+
+;; Integer register constraints.
+
+(define_register_constraint "a" "R24_REG"
+ "General register 24, input to division routine")
+
+(define_register_constraint "b" "R25_REG"
+ "General register 24, input to division routine")
+
+(define_register_constraint "c" "R27_REG"
+ "General register 27, function call address")
+
+(define_register_constraint "f" "TARGET_FPREGS ? FLOAT_REGS : NO_REGS"
+ "Any floating-point register")
+
+(define_register_constraint "v" "R0_REG"
+ "General register 0, function value return address")
+
+(define_memory_constraint "w"
+ "A memory whose address is only a register"
+ (match_operand 0 "mem_noofs_operand"))
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "An unsigned 8 bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "J"
+  "The constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "Signed 16-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "L"
+  "A shifted signed 16-bit constant appropriate for LDAH"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0
+		    && (ival >> 31 == -1 || ival >> 31 == 0)")))
+
+(define_constraint "M"
+  "A valid operand of a ZAP insn"
+  (and (match_code "const_int")
+       (match_test "zap_mask (ival) != 0")))
+
+(define_constraint "N"
+  "A complemented unsigned 8-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (~ival, 0, 255)")))
+
+(define_constraint "O"
+  "A negated unsigned 8-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (-ival, 0, 255)")))
+
+(define_constraint "P"
+  "The constant 1, 2 or 3"
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 2 || ival == 3")))
+
+(define_constraint "H"
+  "A valid operand of a ZAP insn, when building with 32-bit HOST_WIDE_INT"
+  (and (match_code "const_double")
+       (match_test "mode == VOIDmode && zap_mask (hval) && zap_mask (lval)")))
+
+;; Floating-point constant constraints.
+(define_constraint "G"
+  "The floating point zero constant"
+  (and (match_code "const_double")
+       (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT
+		    && op == CONST0_RTX (mode)")))
+
+;; "Extra" constraints.
+(define_constraint "Q"
+  "@internal A normal_memory_operand"
+  (match_operand 0 "normal_memory_operand"))
+
+(define_constraint "R"
+  "@internal A direct_call_operand"
+  (match_operand:DI 0 "direct_call_operand"))
+
+(define_constraint "S"
+  "An unsigned 6-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "T"
+  "@internal A high-part symbol"
+  (match_code "high"))
+
+(define_constraint "W"
+  "A vector zero constant"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
diff --git a/gcc-4.9/gcc/config/alpha/driver-alpha.c b/gcc-4.9/gcc/config/alpha/driver-alpha.c
new file mode 100644
index 000000000..1981d0e6d
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/driver-alpha.c
@@ -0,0 +1,99 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Arthur Loiret <aloiret@debian.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* Chip family type IDs, returned by implver instruction.  */
+#define IMPLVER_EV4_FAMILY	0		/* LCA/EV4/EV45 */
+#define IMPLVER_EV5_FAMILY	1		/* EV5/EV56/PCA56 */
+#define IMPLVER_EV6_FAMILY	2		/* EV6 */
+#define IMPLVER_EV7_FAMILY	3		/* EV7 */
+
+/* Bit defines for amask instruction.  */
+#define AMASK_BWX          0x1          /* byte/word extension.  */
+#define AMASK_FIX          0x2          /* sqrt and f <-> i conversions 
+					   extension.  */
+#define AMASK_CIX          0x4          /* count extension.  */
+#define AMASK_MVI          0x100        /* multimedia extension.  */
+#define AMASK_PRECISE      0x200        /* Precise arithmetic traps.  */
+#define AMASK_LOCKPFTCHOK  0x1000       /* Safe to prefetch lock cache
+					   block.  */
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "cpu" or "tune" as argument depending on if -mcpu=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-mcpu=ev6" on an Alpha 21264 for
+   -mcpu=native.  If the routine can't detect a known processor,
+   the -mcpu or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  static const struct cpu_types {
+    long implver;
+    long amask;
+    const char *const cpu;
+  } cpu_types[] = {
+    { IMPLVER_EV7_FAMILY, AMASK_BWX|AMASK_MVI|AMASK_FIX|AMASK_CIX, "ev67" },
+    { IMPLVER_EV6_FAMILY, AMASK_BWX|AMASK_MVI|AMASK_FIX|AMASK_CIX, "ev67" },
+    { IMPLVER_EV6_FAMILY, AMASK_BWX|AMASK_MVI|AMASK_FIX, "ev6" },
+    { IMPLVER_EV5_FAMILY, AMASK_BWX|AMASK_MVI, "pca56" },
+    { IMPLVER_EV5_FAMILY, AMASK_BWX, "ev56" },
+    { IMPLVER_EV5_FAMILY, 0, "ev5" },
+    { IMPLVER_EV4_FAMILY, 0, "ev4" },
+    { 0, 0, NULL }
+  };
+  long implver;
+  long amask;
+  const char *cpu;
+  int i;
+
+  if (argc < 1)
+    return NULL;
+
+  if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune"))
+    return NULL;
+
+  implver = __builtin_alpha_implver ();
+  amask = __builtin_alpha_amask (~0L);
+  cpu = NULL;
+
+  for (i = 0; cpu_types[i].cpu != NULL; i++)
+    if (implver == cpu_types[i].implver
+	&& (~amask & cpu_types[i].amask) == cpu_types[i].amask)
+      {
+	cpu = cpu_types[i].cpu;
+	break;
+      }
+
+  if (cpu == NULL)
+    return NULL;
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
diff --git a/gcc-4.9/gcc/config/alpha/elf.h b/gcc-4.9/gcc/config/alpha/elf.h
new file mode 100644
index 000000000..5a6803aba
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/elf.h
@@ -0,0 +1,168 @@
+/* Definitions of target machine for GNU compiler, for DEC Alpha w/ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Richard Henderson (rth@tamu.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  CC1_SPEC
+#define CC1_SPEC  "%{G*}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC  "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug}"
+
+/* Do not output a .file directive at the beginning of the input file.  */
+ 
+#undef  TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)		\
+  if ((LOG) != 0)				\
+    fprintf (FILE, "\t.align %d\n", LOG);
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+  if (!flag_inhibit_size_directive)					\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+  ASM_OUTPUT_LABEL(FILE, NAME);						\
+  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);				\
+} while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  */
+
+#undef  ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+do {									\
+  ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#undef  BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#undef  SBSS_SECTION_ASM_OP
+#define SBSS_SECTION_ASM_OP	"\t.section\t.sbss,\"aw\""
+#undef  SDATA_SECTION_ASM_OP
+#define SDATA_SECTION_ASM_OP	"\t.section\t.sdata,\"aw\""
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#undef  ASM_OUTPUT_DEF
+#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME)			\
+  do {								\
+    assemble_name(FILE, ALIAS);					\
+    fputs(" = ", FILE);						\
+    assemble_name(FILE, NAME);					\
+    fputc('\n', FILE);						\
+  } while (0)
+
+#undef  ASM_OUTPUT_DEF_FROM_DECLS
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)		\
+  do {								\
+    const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0);	\
+    const char *name = IDENTIFIER_POINTER (TARGET);		\
+    if (TREE_CODE (DECL) == FUNCTION_DECL)			\
+      {								\
+	fputc ('$', FILE);					\
+	assemble_name (FILE, alias);				\
+	fputs ("..ng = $", FILE);				\
+	assemble_name (FILE, name);				\
+	fputs ("..ng\n", FILE);					\
+      }								\
+    ASM_OUTPUT_DEF (FILE, alias, name);				\
+  } while (0)
+
+/* Provide a STARTFILE_SPEC appropriate for ELF.  Here we add the
+   (even more) magical crtbegin.o file which provides part of the
+   support for getting C++ file-scope static object constructed
+   before entering `main'.  */
+
+#undef	STARTFILE_SPEC
+#ifdef HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+/* Provide a ENDFILE_SPEC appropriate for ELF.  Here we tack on the
+   magical crtend.o file which provides part of the support for
+   getting C++ file-scope static object constructed before entering
+   `main', followed by a normal ELF "finalizer" file, `crtn.o'.  */
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   Since application size is already constrained to <2GB by the form of
+   the ldgp relocation, we can use a 32-bit pc-relative relocation to
+   static data.  Dynamic data is accessed indirectly to allow for read
+   only EH sections.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)       \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+
+/* If defined, a C statement to be executed just prior to the output of
+   assembler code for INSN.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
+ (alpha_this_literal_sequence_number = 0,		\
+  alpha_this_gpdisp_sequence_number = 0)
+extern int alpha_this_literal_sequence_number;
+extern int alpha_this_gpdisp_sequence_number;
+
+/* Since the bits of the _init and _fini function is spread across
+   many object files, each potentially with its own GP, we must assume
+   we need to load our GP.  Further, the .init/.fini section can
+   easily be more than 4MB away from the function to call so we can't
+   use bsr.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n"					\
+"	br $29,1f\n"					\
+"1:	ldgp $29,0($29)\n"				\
+"	unop\n"						\
+"	jsr $26," USER_LABEL_PREFIX #FUNC "\n"		\
+"	.align 3\n"					\
+"	.previous");
+
+/* If we have the capability create headers for efficient EH lookup.
+   As of Jan 2002, only glibc 2.2.4 can actually make use of this, but
+   I imagine that other systems will catch up.  In the meantime, it
+   doesn't harm to make sure that the data exists to be used later.  */
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
diff --git a/gcc-4.9/gcc/config/alpha/elf.opt b/gcc-4.9/gcc/config/alpha/elf.opt
new file mode 100644
index 000000000..680379033
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/elf.opt
@@ -0,0 +1,29 @@
+; Alpha ELF options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+relax
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/alpha/ev4.md b/gcc-4.9/gcc/config/alpha/ev4.md
new file mode 100644
index 000000000..89d6c98e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/ev4.md
@@ -0,0 +1,161 @@
+;; Scheduling description for Alpha EV4.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; On EV4 there are two classes of resources to consider: resources needed
+; to issue, and resources needed to execute.  IBUS[01] are in the first
+; category.  ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
+; (There are a few other register-like resources, but ...)
+
+(define_automaton "ev4_0,ev4_1,ev4_2")
+(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0")
+(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1")
+(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2")
+(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev4_multi" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "multi"))
+  "ev4_ib0+ev4_ib1")
+
+; Loads from L0 completes in three cycles.  adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev4_ld" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "ild,fld,ldsym,ld_l"))
+  "ev4_ib01+ev4_abox")
+
+; Stores can issue before the data (but not address) is ready.
+(define_insn_reservation "ev4_ist" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "ist"))
+  "ev4_ib1+ev4_abox")
+
+; ??? Separate from ev4_ist because store_data_bypass_p can't handle
+; the patterns with multiple sets, like store-conditional.
+(define_insn_reservation "ev4_ist_c" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "st_c"))
+  "ev4_ib1+ev4_abox")
+
+(define_insn_reservation "ev4_fst" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "fst"))
+  "ev4_ib0+ev4_abox")
+
+; Memory barrier blocks ABOX insns until it's acknowledged by the external
+; memory bus.  This may be *quite* slow.  Setting this to 4 cycles gets
+; about all the benefit without making the DFA too large.
+(define_insn_reservation "ev4_mb" 4
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "mb"))
+  "ev4_ib1+ev4_abox,ev4_abox*3")
+
+; Branches have no delay cost, but do tie up the unit for two cycles.
+(define_insn_reservation "ev4_ibr" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "ibr,jsr"))
+  "ev4_ib1+ev4_bbox,ev4_bbox")
+
+(define_insn_reservation "ev4_callpal" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "callpal"))
+  "ev4_ib1+ev4_bbox,ev4_bbox")
+
+(define_insn_reservation "ev4_fbr" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "fbr"))
+  "ev4_ib0+ev4_bbox,ev4_bbox")
+
+; Arithmetic insns are normally have their results available after
+; two cycles.  There are a number of exceptions.
+
+(define_insn_reservation "ev4_iaddlog" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "iadd,ilog"))
+  "ev4_ib0+ev4_ebox")
+
+(define_bypass 1
+  "ev4_iaddlog"
+  "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi")
+
+(define_insn_reservation "ev4_shiftcm" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "shift,icmov"))
+  "ev4_ib0+ev4_ebox")
+
+(define_insn_reservation "ev4_icmp" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "icmp"))
+  "ev4_ib0+ev4_ebox")
+
+(define_bypass 1 "ev4_icmp" "ev4_ibr")
+
+(define_bypass 0
+  "ev4_iaddlog,ev4_shiftcm,ev4_icmp"
+  "ev4_ist"
+  "store_data_bypass_p")
+
+; Multiplies use a non-pipelined imul unit.  Also, "no [ebox] insn can
+; be issued exactly three cycles before an integer multiply completes".
+
+(define_insn_reservation "ev4_imulsi" 21
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "si")))
+  "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
+
+(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
+
+(define_insn_reservation "ev4_imuldi" 23
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "!si")))
+  "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
+
+(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
+
+; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
+(define_insn_reservation "ev4_fpop" 6
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
+  "ev4_ib1+ev4_fbox")
+
+(define_bypass 4 "ev4_fpop" "ev4_fpop")
+
+; The floating point divider is not pipelined.  Also, "no FPOP insn can be
+; issued exactly five or exactly six cycles before an fdiv insn completes".
+
+(define_insn_reservation "ev4_fdivsf" 34
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+(define_insn_reservation "ev4_fdivdf" 63
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+; Traps don't consume or produce data.
+(define_insn_reservation "ev4_misc" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "misc"))
+  "ev4_ib1")
diff --git a/gcc-4.9/gcc/config/alpha/ev5.md b/gcc-4.9/gcc/config/alpha/ev5.md
new file mode 100644
index 000000000..9d1871ea9
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/ev5.md
@@ -0,0 +1,194 @@
+;; Scheduling description for Alpha EV5.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; EV5 has two asymmetric integer units, E0 and E1, plus separate
+;; FP add and multiply units.
+
+(define_automaton "ev5_0,ev5_1")
+(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0")
+(define_reservation "ev5_e01" "ev5_e0|ev5_e1")
+(define_reservation "ev5_fam" "ev5_fa|ev5_fm")
+(define_cpu_unit "ev5_imul" "ev5_0")
+(define_cpu_unit "ev5_fdiv" "ev5_1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev5_multi" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "multi"))
+  "ev5_e0+ev5_e1+ev5_fa+ev5_fm")
+
+; Stores can only issue to E0, and may not issue with loads.
+; Model this with some fake units.
+
+(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0")
+(define_reservation "ev5_ld" "ev5_l0|ev5_l1")
+(exclusion_set "ev5_l0,ev5_l1" "ev5_st")
+
+(define_insn_reservation "ev5_st" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ist,fst,st_c,mb"))
+  "ev5_e0+ev5_st")
+
+; Loads from L0 complete in two cycles.  adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev5_ld" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ild,fld,ldsym"))
+  "ev5_e01+ev5_ld")
+
+(define_insn_reservation "ev5_ld_l" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ld_l"))
+  "ev5_e0+ev5_ld")
+
+; Integer branches slot only to E1.
+(define_insn_reservation "ev5_ibr" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ibr"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_callpal" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "callpal"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_jsr" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "jsr"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_shift" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "shift"))
+  "ev5_e0")
+
+(define_insn_reservation "ev5_mvi" 2
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "mvi"))
+  "ev5_e0")
+
+(define_insn_reservation "ev5_cmov" 2
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "icmov"))
+  "ev5_e01")
+
+(define_insn_reservation "ev5_iadd" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "iadd"))
+  "ev5_e01")
+
+(define_insn_reservation "ev5_ilogcmp" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ilog,icmp"))
+  "ev5_e01")
+
+; Conditional move and branch can issue the same cycle as the test.
+(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p")
+
+; Multiplies use a non-pipelined imul unit.  Also, "no insn can be issued
+; to E0 exactly two cycles before an integer multiply completes".
+
+(define_insn_reservation "ev5_imull" 8
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "si")))
+  "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulq" 12
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "di")))
+  "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulh" 14
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "udi")))
+  "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0")
+
+; The multiplier is unable to receive data from Ebox bypass paths.  The
+; instruction issues at the expected time, but its latency is increased
+; by the time it takes for the input data to become available to the
+; multiplier.  For example, an IMULL instruction issued one cycle later
+; than an ADDL instruction, which produced one of its operands, has a
+; latency of 10 (8 + 2).  If the IMULL instruction is issued two cycles
+; later than the ADDL instruction, the latency is 9 (8 + 1).
+;
+; Model this instead with increased latency on the input instruction.
+
+(define_bypass 3
+  "ev5_ld,ev5_ld_l,ev5_shift,ev5_mvi,ev5_cmov,ev5_iadd,ev5_ilogcmp"
+  "ev5_imull,ev5_imulq,ev5_imulh")
+
+(define_bypass  9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh")
+
+; Similarly for the FPU we have two asymmetric units.
+
+(define_insn_reservation "ev5_fadd" 4
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fadd,fcmov"))
+  "ev5_fa")
+
+(define_insn_reservation "ev5_fbr" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fbr"))
+  "ev5_fa")
+
+(define_insn_reservation "ev5_fcpys" 4
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fcpys"))
+  "ev5_fam")
+
+(define_insn_reservation "ev5_fmul" 4
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fmul"))
+  "ev5_fm")
+
+; The floating point divider is not pipelined.  Also, "no insn can be issued
+; to FA exactly five before an fdiv insn completes".
+;
+; ??? Do not model this late reservation due to the enormously increased
+; size of the resulting DFA.
+;
+; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces
+; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate
+; automata produces incorrect results for insns that can choose one or
+; the other, i.e. ev5_fcpys.
+
+(define_insn_reservation "ev5_fdivsf" 15
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  ; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+  "ev5_fa+ev5_fdiv,ev5_fdiv*14")
+
+(define_insn_reservation "ev5_fdivdf" 22
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  ; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+  "ev5_fa+ev5_fdiv,ev5_fdiv*21")
+
+; Traps don't consume or produce data; rpcc is latency 2 if we ever add it.
+(define_insn_reservation "ev5_misc" 2
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "misc"))
+  "ev5_e0")
diff --git a/gcc-4.9/gcc/config/alpha/ev6.md b/gcc-4.9/gcc/config/alpha/ev6.md
new file mode 100644
index 000000000..e0612a411
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/ev6.md
@@ -0,0 +1,181 @@
+;; Scheduling description for Alpha EV6.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; EV6 can issue 4 insns per clock.  It's out-of-order, so this isn't
+; expected to help over-much, but a precise description can be important
+; for software pipelining.
+;
+; EV6 has two symmetric pairs ("clusters") of two asymmetric integer
+; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+;
+; ??? The clusters have independent register files that are re-synced
+; every cycle.  Thus there is one additional cycle of latency between
+; insns issued on different clusters.  Possibly model that by duplicating
+; all EBOX insn_reservations that can issue to either cluster, increasing
+; all latencies by one, and adding bypasses within the cluster.
+;
+; ??? In addition, instruction order affects cluster issue.
+
+(define_automaton "ev6_0,ev6_1")
+(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0")
+(define_reservation "ev6_u" "ev6_u0|ev6_u1")
+(define_reservation "ev6_l" "ev6_l0|ev6_l1")
+(define_reservation "ev6_ebox" "ev6_u|ev6_l")
+
+(define_cpu_unit "ev6_fa" "ev6_1")
+(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0")
+(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev6_multi" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "multi"))
+  "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1")
+
+; Integer loads take at least 3 clocks, and only issue to lower units.
+; adjust_cost still factors in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev6_ild" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ild,ldsym,ld_l"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_ist" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ist,st_c"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_mb" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "mb"))
+  "ev6_l1")
+
+; FP loads take at least 4 clocks.  adjust_cost still factors
+; in user-specified memory latency, so return 2 here.
+(define_insn_reservation "ev6_fld" 2
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fld"))
+  "ev6_l")
+
+; The FPU communicates with memory and the integer register file
+; via two fp store units.  We need a slot in the fst immediately, and
+; a slot in LOW after the operand data is ready.  At which point the
+; data may be moved either to the store queue or the integer register
+; file and the insn retired.
+
+(define_insn_reservation "ev6_fst" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fst"))
+  "ev6_fst,nothing,ev6_l")
+
+; Arithmetic goes anywhere.
+(define_insn_reservation "ev6_arith" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "iadd,ilog,icmp"))
+  "ev6_ebox")
+
+; Motion video insns also issue only to U0, and take three ticks.
+(define_insn_reservation "ev6_mvi" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "mvi"))
+  "ev6_u0")
+
+; Shifts issue to upper units.
+(define_insn_reservation "ev6_shift" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "shift"))
+  "ev6_u")
+
+; Multiplies issue only to U1, and all take 7 ticks.
+(define_insn_reservation "ev6_imul" 7
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "imul"))
+  "ev6_u1")
+
+; Conditional moves decompose into two independent primitives, each taking
+; one cycle.  Since ev6 is out-of-order, we can't see anything but two cycles.
+(define_insn_reservation "ev6_icmov" 2
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "icmov"))
+  "ev6_ebox,ev6_ebox")
+
+; Integer branches issue to upper units
+(define_insn_reservation "ev6_ibr" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ibr,callpal"))
+  "ev6_u")
+
+; Calls only issue to L0.
+(define_insn_reservation "ev6_jsr" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "jsr"))
+  "ev6_l0")
+
+; Ftoi/itof only issue to lower pipes.
+(define_insn_reservation "ev6_itof" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "itof"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_ftoi" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ftoi"))
+  "ev6_fst,nothing,ev6_l")
+
+(define_insn_reservation "ev6_fmul" 4
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fmul"))
+  "ev6_fm")
+
+(define_insn_reservation "ev6_fadd" 4
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fadd,fcpys,fbr"))
+  "ev6_fa")
+
+(define_bypass 6 "ev6_fmul,ev6_fadd" "ev6_fst,ev6_ftoi")
+
+(define_insn_reservation "ev6_fcmov" 8
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fcmov"))
+  "ev6_fa,nothing*3,ev6_fa")
+
+(define_bypass 10 "ev6_fcmov" "ev6_fst,ev6_ftoi")
+
+(define_insn_reservation "ev6_fdivsf" 12
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  "ev6_fa*9")
+
+(define_insn_reservation "ev6_fdivdf" 15
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  "ev6_fa*12")
+
+(define_insn_reservation "ev6_sqrtsf" 18
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "si")))
+  "ev6_fa*15")
+
+(define_insn_reservation "ev6_sqrtdf" 33
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "di")))
+  "ev6_fa*30")
diff --git a/gcc-4.9/gcc/config/alpha/freebsd.h b/gcc-4.9/gcc/config/alpha/freebsd.h
new file mode 100644
index 000000000..9e52d33e4
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/freebsd.h
@@ -0,0 +1,68 @@
+/* Definitions for DEC Alpha/AXP running FreeBSD using the ELF format
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef  EXTRA_SPECS
+#define EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+/* Provide a CPP_SPEC appropriate for FreeBSD/alpha -- dealing with
+   the GCC option `-posix'.  */
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+#define LINK_SPEC "%{G*} %{relax:-relax}				\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}							\
+  %{symbolic:-Bsymbolic}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* alpha.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE	32
+
+#define TARGET_ELF	1
+
+#undef HAS_INIT_SECTION
+
+/* Show that we need a GP when profiling.  */
+#undef  TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* Don't default to pcc-struct-return, we want to retain compatibility with
+   older FreeBSD releases AND pcc-struct-return may not be reentrant.  */
+
+#undef  DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
diff --git a/gcc-4.9/gcc/config/alpha/linux-elf.h b/gcc-4.9/gcc/config/alpha/linux-elf.h
new file mode 100644
index 000000000..bdefe237f
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/linux-elf.h
@@ -0,0 +1,53 @@
+/* Definitions of target machine for GNU compiler
+   for Alpha Linux-based GNU systems using ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Richard Henderson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS \
+{ "elf_dynamic_linker", ELF_DYNAMIC_LINKER },
+
+#define GLIBC_DYNAMIC_LINKER	"/lib/ld-linux.so.2"
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define GNU_USER_DYNAMIC_LINKER \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER)
+
+#define ELF_DYNAMIC_LINKER	GNU_USER_DYNAMIC_LINKER
+
+#define LINK_SPEC "-m elf64alpha %{G*} %{relax:-relax}		\
+  %{O*:-O3} %{!O*:-O1}						\
+  %{shared:-shared}						\
+  %{!shared:							\
+    %{!static:							\
+      %{rdynamic:-export-dynamic}				\
+      -dynamic-linker %(elf_dynamic_linker)}	\
+    %{static:-static}}"
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+"%{pthread:-lpthread} %{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} "
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
diff --git a/gcc-4.9/gcc/config/alpha/linux.h b/gcc-4.9/gcc/config/alpha/linux.h
new file mode 100644
index 000000000..966e9b2d0
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/linux.h
@@ -0,0 +1,102 @@
+/* Definitions of target machine for GNU compiler,
+   for Alpha Linux-based GNU systems.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Richard Henderson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()				\
+    do {							\
+	builtin_define ("__gnu_linux__");			\
+	builtin_define ("_LONGLONG");				\
+	builtin_define_std ("linux");				\
+	builtin_define_std ("unix");				\
+	builtin_assert ("system=linux");			\
+	builtin_assert ("system=unix");				\
+	builtin_assert ("system=posix");			\
+	/* The GNU C++ standard library requires this.  */	\
+	if (c_dialect_cxx ())					\
+	  builtin_define ("_GNU_SOURCE");			\
+    } while (0)
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared: %{profile:-lc_p}%{!profile:-lc}}"
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* Don't care about faults in the prologue.  */
+#undef TARGET_CAN_FAULT_IN_PROLOGUE
+#define TARGET_CAN_FAULT_IN_PROLOGUE 1
+
+/* OS fixes up EV5 data fault on prefetch.  */
+#undef TARGET_FIXUP_EV5_PREFETCH
+#define TARGET_FIXUP_EV5_PREFETCH 1
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
+#endif
+
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
+
+#define TARGET_POSIX_IO
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an Alpha chip.  */
+#if defined(__alpha__) || defined(__alpha)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
diff --git a/gcc-4.9/gcc/config/alpha/netbsd.h b/gcc-4.9/gcc/config/alpha/netbsd.h
new file mode 100644
index 000000000..7c3ace6dd
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/netbsd.h
@@ -0,0 +1,72 @@
+/* Definitions of target machine for GNU compiler,
+   for Alpha NetBSD systems.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	NETBSD_OS_CPP_BUILTINS_ELF();		\
+    } while (0)
+
+
+/* NetBSD doesn't use the LANGUAGE* built-ins.  */
+#undef SUBTARGET_LANGUAGE_CPP_BUILTINS
+#define SUBTARGET_LANGUAGE_CPP_BUILTINS()	/* nothing */
+
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD/alpha.  We use
+   this to pull in CPP specs that all NetBSD configurations need.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS			\
+  { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF },	\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },	\
+  { "netbsd_endfile_spec", NETBSD_ENDFILE_SPEC },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/alpha ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{G*} %{relax:-relax} \
+   %{O*:-O3} %{!O*:-O1} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+
+/* Provide an ENDFILE_SPEC appropriate for NetBSD/alpha ELF.  Here we
+   add crtend.o, which provides part of the support for getting
+   C++ file-scope static objects deconstructed after exiting "main".
+
+   We also need to handle the GCC option `-ffast-math'.  */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC		\
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \
+   %(netbsd_endfile_spec)"
+
+#define HAVE_ENABLE_EXECUTE_STACK
diff --git a/gcc-4.9/gcc/config/alpha/openbsd.h b/gcc-4.9/gcc/config/alpha/openbsd.h
new file mode 100644
index 000000000..74f16e134
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/openbsd.h
@@ -0,0 +1,45 @@
+/* Configuration file for an alpha OpenBSD target.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Controlling the compilation driver.  */
+
+/* run-time target specifications */
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	OPENBSD_OS_CPP_BUILTINS_ELF();		\
+	OPENBSD_OS_CPP_BUILTINS_LP64();		\
+    } while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+#define LOCAL_LABEL_PREFIX	"."
diff --git a/gcc-4.9/gcc/config/alpha/predicates.md b/gcc-4.9/gcc/config/alpha/predicates.md
new file mode 100644
index 000000000..c68e83a70
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/predicates.md
@@ -0,0 +1,653 @@
+;; Predicate definitions for DEC Alpha.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is the zero constant for MODE.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or
+;; any register.
+(define_predicate "reg_or_6bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is an 8-bit constant.
+(define_predicate "cint8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")))
+
+;; Return 1 if OP is an 8-bit constant or any register.
+(define_predicate "reg_or_8bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant or any register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+;; Return 1 if the operand is a valid second operand to an add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a valid second operand to a
+;; sign-extending add insn.
+(define_predicate "sext_add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a non-symbolic constant operand that
+;; does not satisfy add_operand.
+(define_predicate "non_add_const_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (not (match_operand 0 "add_operand"))))
+
+;; Return 1 if the operand is a non-symbolic, nonzero constant operand.
+(define_predicate "non_zero_const_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op != CONST0_RTX (mode)")))
+
+;; Return 1 if OP is the constant 4 or 8.
+(define_predicate "const48_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 4 || INTVAL (op) == 8")))
+
+;; Return 1 if OP is a valid first operand to an AND insn.
+(define_predicate "and_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
+		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
+		 || zap_mask (INTVAL (op))")
+    (if_then_else (match_code "const_double")
+      (match_test "GET_MODE (op) == VOIDmode
+		   && zap_mask (CONST_DOUBLE_LOW (op))
+		   && zap_mask (CONST_DOUBLE_HIGH (op))")
+      (match_operand 0 "register_operand"))))
+
+;; Return 1 if OP is a valid first operand to an IOR or XOR insn.
+(define_predicate "or_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
+		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant that is the width, in bits, of an integral
+;; mode not larger than DImode.
+(define_predicate "mode_width_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 8 || i == 16 || i == 32 || i == 64;
+})
+
+;; Return 1 if OP is a constant that is a mask of ones of width of an
+;; integral machine mode not larger than DImode.
+(define_predicate "mode_mask_operand"
+  (match_code "const_int,const_double")
+{
+  if (CONST_INT_P (op))
+    {
+      HOST_WIDE_INT value = INTVAL (op);
+
+      if (value == 0xff)
+	return 1;
+      if (value == 0xffff)
+	return 1;
+      if (value == 0xffffffff)
+	return 1;
+      if (value == -1)
+	return 1;
+    }
+  else if (HOST_BITS_PER_WIDE_INT == 32 && GET_CODE (op) == CONST_DOUBLE)
+    {
+      if (CONST_DOUBLE_LOW (op) == 0xffffffff && CONST_DOUBLE_HIGH (op) == 0)
+	return 1;
+    }
+  return 0;
+})
+
+;; Return 1 if OP is a multiple of 8 less than 64.
+(define_predicate "mul8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT i = INTVAL (op);
+  return i < 64 && i % 8 == 0;
+})
+
+;; Return 1 if OP is a hard floating-point register.
+(define_predicate "hard_fp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
+})
+
+;; Return 1 if OP is a hard general register.
+(define_predicate "hard_int_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
+})
+
+;; Return 1 if OP is something that can be reloaded into a register;
+;; if it is a MEM, it need not be valid.
+(define_predicate "some_operand"
+  (ior (match_code "reg,mem,const_int,const_double,const_vector,
+		    label_ref,symbol_ref,const,high")
+       (and (match_code "subreg")
+	    (match_test "some_operand (SUBREG_REG (op), VOIDmode)"))))
+
+;; Likewise, but don't accept constants.
+(define_predicate "some_ni_operand"
+  (ior (match_code "reg,mem")
+       (and (match_code "subreg")
+	    (match_test "some_ni_operand (SUBREG_REG (op), VOIDmode)"))))
+
+;; Return 1 if OP is a valid operand for the source of a move insn.
+(define_predicate "input_operand"
+  (match_code "label_ref,symbol_ref,const,high,reg,subreg,mem,
+	       const_double,const_vector,const_int")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST:
+      if (TARGET_EXPLICIT_RELOCS)
+	{
+	  /* We don't split symbolic operands into something unintelligable
+	     until after reload, but we do not wish non-small, non-global
+	     symbolic operands to be reconstructed from their high/lo_sum
+	     form.  */
+	  return (small_symbolic_operand (op, mode)
+		  || global_symbolic_operand (op, mode)
+		  || gotdtp_symbolic_operand (op, mode)
+		  || gottp_symbolic_operand (op, mode));
+	}
+      /* VMS still has a 32-bit mode.  */
+      return mode == ptr_mode || mode == Pmode;
+
+    case HIGH:
+      return (TARGET_EXPLICIT_RELOCS
+	      && local_symbolic_operand (XEXP (op, 0), mode));
+
+    case REG:
+      return 1;
+
+    case SUBREG:
+      if (register_operand (op, mode))
+	return 1;
+      /* ... fall through ...  */
+    case MEM:
+      return ((TARGET_BWX || (mode != HImode && mode != QImode))
+	      && general_operand (op, mode));
+
+    case CONST_DOUBLE:
+      return op == CONST0_RTX (mode);
+
+    case CONST_VECTOR:
+      if (reload_in_progress || reload_completed)
+	return alpha_legitimate_constant_p (mode, op);
+      return op == CONST0_RTX (mode);
+
+    case CONST_INT:
+      if (mode == QImode || mode == HImode)
+	return true;
+      if (reload_in_progress || reload_completed)
+	return alpha_legitimate_constant_p (mode, op);
+      return add_operand (op, mode);
+
+    default:
+      gcc_unreachable ();
+    }
+  return 0;
+})
+
+;; Return 1 if OP is a SYMBOL_REF for a function known to be in this
+;; file, and in the same section as the current function.
+
+(define_predicate "samegp_function_operand"
+  (match_code "symbol_ref")
+{
+  /* Easy test for recursion.  */
+  if (op == XEXP (DECL_RTL (current_function_decl), 0))
+    return true;
+
+  /* Functions that are not local can be overridden, and thus may
+     not share the same gp.  */
+  if (! SYMBOL_REF_LOCAL_P (op))
+    return false;
+
+  /* If -msmall-data is in effect, assume that there is only one GP
+     for the module, and so any local symbol has this property.  We
+     need explicit relocations to be able to enforce this for symbols
+     not defined in this unit of translation, however.  */
+  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
+    return true;
+
+  /* Functions that are not external are defined in this UoT,
+     and thus must share the same gp.  */
+  return ! SYMBOL_REF_EXTERNAL_P (op);
+})
+
+;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr.
+(define_predicate "direct_call_operand"
+  (match_operand 0 "samegp_function_operand")
+{
+  /* If profiling is implemented via linker tricks, we can't jump
+     to the nogp alternate entry point.  Note that crtl->profile
+     would not be correct, since that doesn't indicate if the target
+     function uses profiling.  */
+  /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test,
+     but is approximately correct for the OSF ABIs.  Don't know
+     what to do for VMS, NT, or UMK.  */
+  if (!TARGET_PROFILING_NEEDS_GP && profile_flag)
+    return false;
+
+  /* Must be a function.  In some cases folks create thunks in static
+     data structures and then make calls to them.  If we allow the
+     direct call, we'll get an error from the linker about !samegp reloc
+     against a symbol without a .prologue directive.  */
+  if (!SYMBOL_REF_FUNCTION_P (op))
+    return false;
+  
+  /* Must be "near" so that the branch is assumed to reach.  With
+     -msmall-text, this is assumed true of all local symbols.  Since
+     we've already checked samegp, locality is already assured.  */
+  if (TARGET_SMALL_TEXT)
+    return true;
+
+  return false;
+})
+
+;; Return 1 if OP is a valid operand for the MEM of a CALL insn.
+;;
+;; For TARGET_ABI_OSF, we want to restrict to R27 or a pseudo.
+
+(define_predicate "call_operand"
+  (ior (match_code "symbol_ref")
+       (and (match_code "reg")
+	    (ior (match_test "!TARGET_ABI_OSF")
+		 (match_test "!HARD_REGISTER_P (op)")
+		 (match_test "REGNO (op) == R27_REG")))))
+
+;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing
+;; a (non-tls) variable known to be defined in this file.
+(define_predicate "local_symbolic_operand"
+  (match_code "label_ref,const,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  return (SYMBOL_REF_LOCAL_P (op)
+	  && !SYMBOL_REF_WEAK (op)
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
+;; known to be defined in this file in the small data area.
+(define_predicate "small_symbolic_operand"
+  (match_code "const,symbol_ref")
+{
+  HOST_WIDE_INT ofs = 0, max_ofs = 0;
+
+  if (! TARGET_SMALL_DATA)
+    return false;
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    {
+      ofs = INTVAL (XEXP (XEXP (op, 0), 1));
+      op = XEXP (XEXP (op, 0), 0);
+    }
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return false;
+
+  /* ??? There's no encode_section_info equivalent for the rtl
+     constant pool, so SYMBOL_FLAG_SMALL never gets set.  */
+  if (CONSTANT_POOL_ADDRESS_P (op))
+    {
+      max_ofs = GET_MODE_SIZE (get_pool_mode (op));
+      if (max_ofs > g_switch_value)
+	return false;
+    }
+  else if (SYMBOL_REF_LOCAL_P (op)
+	    && SYMBOL_REF_SMALL_P (op)
+	    && !SYMBOL_REF_WEAK (op)
+	    && !SYMBOL_REF_TLS_MODEL (op))
+    {
+      if (SYMBOL_REF_DECL (op))
+        max_ofs = tree_to_uhwi (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op)));
+    }
+  else
+    return false;
+
+  /* Given that we know that the GP is always 8 byte aligned, we can
+     always adjust by 7 without overflowing.  */
+  if (max_ofs < 8)
+    max_ofs = 8;
+
+  /* Since we know this is an object in a small data section, we know the
+     entire section is addressable via GP.  We don't know where the section
+     boundaries are, but we know the entire object is within.  */
+  return IN_RANGE (ofs, 0, max_ofs - 1);
+})
+
+;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
+;; not known (or known not) to be defined in this file.
+(define_predicate "global_symbolic_operand"
+  (match_code "const,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op))
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op,0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op,0), 0)) == LABEL_REF)
+			 && CONST_INT_P (XEXP (XEXP (op,0), 1))"))))
+
+;; Return true if OP is valid for 16-bit DTP relative relocations.
+(define_predicate "dtp16_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 32-bit DTP relative relocations.
+(define_predicate "dtp32_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 64-bit DTP relative relocations.
+(define_predicate "gotdtp_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 16-bit TP relative relocations.
+(define_predicate "tp16_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)")))
+
+;; Return true if OP is valid for 32-bit TP relative relocations.
+(define_predicate "tp32_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)")))
+
+;; Return true if OP is valid for 64-bit TP relative relocations.
+(define_predicate "gottp_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)")))
+
+;; Return 1 if this memory address is a known aligned register plus
+;; a constant.  It must be a valid address.  This means that we can do
+;; this as an aligned reference plus some offset.
+;;
+;; Take into account what reload will do.  Oh god this is awful.
+;; The horrible comma-operator construct below is to prevent genrecog
+;; from thinking that this predicate accepts REG and SUBREG.  We don't
+;; use recog during reload, so pretending these codes are accepted 
+;; pessimizes things a tad.
+
+(define_special_predicate "aligned_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (match_code "mem"))
+{
+  rtx base;
+  int offset;
+
+  if (MEM_ALIGN (op) >= 32)
+    return 1;
+
+  op = XEXP (op, 0);
+
+  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
+     sorts of constructs.  Dig for the real base register.  */
+  if (reload_in_progress
+      && GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (op, 0), 0);
+      offset = INTVAL (XEXP (op, 1));
+    }
+  else
+    {
+      if (! memory_address_p (mode, op))
+	return 0;
+      if (GET_CODE (op) == PLUS)
+	{
+	  base = XEXP (op, 0);
+	  offset = INTVAL (XEXP (op, 1));
+	}
+      else
+	{
+	  base = op;
+	  offset = 0;
+	}
+    }
+
+  if (offset % GET_MODE_SIZE (mode))
+    return 0;
+
+  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
+})
+
+;; Similar, but return 1 if OP is a MEM which is not alignable.
+
+(define_special_predicate "unaligned_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (match_code "mem"))
+{
+  rtx base;
+  int offset;
+
+  if (MEM_ALIGN (op) >= 32)
+    return 0;
+
+  op = XEXP (op, 0);
+
+  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
+     sorts of constructs.  Dig for the real base register.  */
+  if (reload_in_progress
+      && GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (op, 0), 0);
+      offset = INTVAL (XEXP (op, 1));
+    }
+  else
+    {
+      if (! memory_address_p (mode, op))
+	return 0;
+      if (GET_CODE (op) == PLUS)
+	{
+	  base = XEXP (op, 0);
+	  offset = INTVAL (XEXP (op, 1));
+	}
+      else
+	{
+	  base = op;
+	  offset = 0;
+	}
+    }
+
+  if (offset % GET_MODE_SIZE (mode))
+    return 1;
+
+  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
+})
+
+;; Return 1 if OP is any memory location.  During reload a pseudo matches.
+(define_special_predicate "any_memory_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (MEM_P (op))
+    return true;
+  if (reload_in_progress && REG_P (op))
+    {
+      unsigned regno = REGNO (op);
+      if (HARD_REGISTER_NUM_P (regno))
+	return false;
+      else
+	return reg_renumber[regno] < 0;
+    }
+
+  return false;
+})
+
+;; Return 1 is OP is a memory location that is not a reference
+;; (using an AND) to an unaligned location.  Take into account
+;; what reload will do.
+(define_special_predicate "normal_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (and (match_code "mem")
+	    (match_test "GET_CODE (XEXP (op, 0)) != AND"))))
+
+;; Returns 1 if OP is not an eliminable register.
+;;
+;; This exists to cure a pathological failure in the s8addq (et al) patterns,
+;;
+;;	long foo () { long t; bar(); return (long) &t * 26107; }
+;;
+;; which run afoul of a hack in reload to cure a (presumably) similar
+;; problem with lea-type instructions on other targets.  But there is
+;; one of us and many of them, so work around the problem by selectively
+;; preventing combine from making the optimization.
+
+(define_predicate "reg_not_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return op != frame_pointer_rtx && op != arg_pointer_rtx;
+})
+
+;; Accept a register, but not a subreg of any kind.  This allows us to
+;; avoid pathological cases in reload wrt data movement common in 
+;; int->fp conversion.  */
+(define_predicate "reg_no_subreg_operand"
+  (and (match_code "reg")
+       (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a valid Alpha comparison operator for "cbranch"
+;; instructions.
+(define_predicate "alpha_cbranch_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (match_code "ordered,unordered")))
+
+;; Return 1 if OP is a valid Alpha comparison operator for "cmp" style
+;; instructions.
+(define_predicate "alpha_comparison_operator"
+  (match_code "eq,le,lt,leu,ltu"))
+
+;; Similarly, but with swapped operands.
+(define_predicate "alpha_swapped_comparison_operator"
+  (match_code "eq,ge,gt,gtu"))
+
+;; Return 1 if OP is a valid Alpha comparison operator against zero
+;; for "bcc" style instructions.
+(define_predicate "alpha_zero_comparison_operator"
+  (match_code "eq,ne,le,lt,leu,ltu"))
+
+;; Return 1 if OP is a signed comparison operation.
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt"))
+
+;; Return 1 if OP is a valid Alpha floating point comparison operator.
+(define_predicate "alpha_fp_comparison_operator"
+  (match_code "eq,le,lt,unordered"))
+
+;; Return 1 if this is a divide or modulus operator.
+(define_predicate "divmod_operator"
+  (match_code "div,mod,udiv,umod"))
+
+;; Return 1 if this is a float->int conversion operator.
+(define_predicate "fix_operator"
+  (match_code "fix,unsigned_fix"))
+
+;; Recognize an addition operation that includes a constant.  Used to
+;; convince reload to canonize (plus (plus reg c1) c2) during register
+;; elimination.
+
+(define_predicate "addition_operation"
+  (and (match_code "plus")
+       (match_test "register_operand (XEXP (op, 0), mode)
+		    && satisfies_constraint_K (XEXP (op, 1))")))
+
+;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a
+;; small symbolic operand until after reload.  At which point we need
+;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref))
+;; so that sched2 has the proper dependency information.  */
+(define_predicate "some_small_symbolic_operand"
+  (match_code "set,parallel,prefetch,unspec,unspec_volatile")
+{
+  /* Avoid search unless necessary.  */
+  if (!TARGET_EXPLICIT_RELOCS || !reload_completed)
+    return false;
+  return for_each_rtx (&op, some_small_symbolic_operand_int, NULL);
+})
+
+;; Accept a register, or a memory if BWX is enabled.
+(define_predicate "reg_or_bwx_memory_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "TARGET_BWX")
+	    (match_operand 0 "memory_operand"))))
+
+;; Accept a memory whose address is only a register.
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
diff --git a/gcc-4.9/gcc/config/alpha/sync.md b/gcc-4.9/gcc/config/alpha/sync.md
new file mode 100644
index 000000000..2145fdf2b
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/sync.md
@@ -0,0 +1,343 @@
+;; GCC machine description for Alpha synchronization instructions.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+(define_code_attr fetchop_pred
+  [(plus "add_operand") (minus "reg_or_8bit_operand")
+   (ior "or_operand") (xor "or_operand") (and "and_operand")])
+(define_code_attr fetchop_constr
+  [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "riNHM")])
+
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+  "mb"
+  [(set_attr "type" "mb")])
+
+(define_insn "load_locked_<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec_volatile:I48MODE
+	  [(match_operand:I48MODE 1 "memory_operand" "m")]
+	  UNSPECV_LL))]
+  ""
+  "ld<modesuffix>_l %0,%1"
+  [(set_attr "type" "ld_l")])
+
+(define_insn "store_conditional_<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:I48MODE 1 "memory_operand" "=m")
+	(match_operand:I48MODE 2 "reg_or_0_operand" "0"))]
+  ""
+  "st<modesuffix>_c %0,%1"
+  [(set_attr "type" "st_c")])
+
+;; The Alpha Architecture Handbook says that it is UNPREDICTABLE whether
+;; the lock is cleared by a normal load or store.  This means we cannot
+;; expand a ll/sc sequence before reload, lest a register spill is
+;; inserted inside the sequence.  It is also UNPREDICTABLE whether the
+;; lock is cleared by a TAKEN branch.  This means that we can not expand
+;; a ll/sc sequence containing a branch (i.e. compare-and-swap) until after
+;; the final basic-block reordering pass.
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(parallel
+     [(set (match_operand:DI 0 "register_operand")	  ;; bool out
+	   (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+      (set (match_operand:I48MODE 1 "register_operand")	  ;; val out
+	   (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
+      (set (match_operand:I48MODE 2 "memory_operand")	  ;; memory
+	   (unspec_volatile:I48MODE
+	     [(match_dup 2)
+	      (match_operand:I48MODE 3 "reg_or_8bit_operand")  ;; expected
+	      (match_operand:I48MODE 4 "add_operand")	  ;; desired
+	      (match_operand:SI 5 "const_int_operand")	  ;; is_weak
+	      (match_operand:SI 6 "const_int_operand")	  ;; succ model
+	      (match_operand:SI 7 "const_int_operand")]	  ;; fail model
+	     UNSPECV_CMPXCHG))])]
+  ""
+{
+  if (<MODE>mode == SImode)
+    {
+      operands[3] = convert_modes (DImode, SImode, operands[3], 0);
+      operands[4] = convert_modes (DImode, SImode, operands[4], 0);
+    }
+})
+
+(define_insn_and_split "*atomic_compare_and_swap<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=&r")		;; bool out
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:I48MODE 1 "register_operand" "=&r")	;; val out
+	(unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:I48MODE 2 "memory_operand" "+m")		;; memory
+	(unspec_volatile:I48MODE
+	  [(match_dup 2)
+	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")	;; expected
+	   (match_operand:DI 4 "add_operand" "rKL")		;; desired
+	   (match_operand:SI 5 "const_int_operand")		;; is_weak
+	   (match_operand:SI 6 "const_int_operand")		;; succ model
+	   (match_operand:SI 7 "const_int_operand")]		;; fail model
+	  UNSPECV_CMPXCHG))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_compare_and_swap (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:DI 0 "register_operand")		;; bool out
+   (match_operand:I12MODE 1 "register_operand")		;; val out
+   (match_operand:I12MODE 2 "mem_noofs_operand")	;; memory
+   (match_operand:I12MODE 3 "register_operand")		;; expected
+   (match_operand:I12MODE 4 "add_operand")		;; desired
+   (match_operand:SI 5 "const_int_operand")		;; is_weak
+   (match_operand:SI 6 "const_int_operand")		;; succ model
+   (match_operand:SI 7 "const_int_operand")]		;; fail model
+  ""
+{
+  alpha_expand_compare_and_swap_12 (operands);
+  DONE;
+})
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r")		;; bool out
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:DI 1 "register_operand" "=&r")		;; val out
+	(zero_extend:DI
+	  (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG)))
+   (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w")	;; memory
+	(unspec_volatile:I12MODE
+	  [(match_dup 2)
+	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")	;; expected
+	   (match_operand:DI 4 "reg_or_0_operand" "rJ")		;; desired
+	   (match_operand:DI 5 "register_operand" "r")		;; align
+	   (match_operand:SI 6 "const_int_operand")		;; is_weak
+	   (match_operand:SI 7 "const_int_operand")		;; succ model
+	   (match_operand:SI 8 "const_int_operand")]		;; fail model
+	  UNSPECV_CMPXCHG))
+   (clobber (match_scratch:DI 9 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_compare_and_swap_12 (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_exchange<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")	;; output
+	(match_operand:I48MODE 1 "memory_operand" "+m"))	;; memory
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(match_operand:I48MODE 2 "add_operand" "rKL")	;; input
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_exchange (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:I12MODE 0 "register_operand")		;; output
+   (match_operand:I12MODE 1 "mem_noofs_operand")	;; memory
+   (match_operand:I12MODE 2 "reg_or_0_operand")		;; input
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  ""
+{
+  alpha_expand_atomic_exchange_12 (operands);
+  DONE;
+})
+
+(define_insn_and_split "atomic_exchange<mode>_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r")		;; output
+	(zero_extend:DI
+	  (match_operand:I12MODE 1 "mem_noofs_operand" "+w")))	;; memory
+   (set (match_dup 1)
+	(unspec:I12MODE
+	  [(match_operand:DI 2 "reg_or_8bit_operand" "rI")	;; input
+	   (match_operand:DI 3 "register_operand" "r")		;; align
+	   (match_operand:SI 4 "const_int_operand")]		;; model
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:DI 5 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_exchange_12 (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 0)
+	     (match_operand:I48MODE 1 "<fetchop_pred>" "<fetchop_constr>"))
+	   (match_operand:SI 2 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (<CODE>, operands[0], operands[1],
+			 NULL, NULL, operands[3],
+			 (enum memmodel) INTVAL (operands[2]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_nand<mode>"
+  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
+	(unspec:I48MODE
+	  [(not:I48MODE
+	     (and:I48MODE (match_dup 0)
+	       (match_operand:I48MODE 1 "register_operand" "r")))
+	   (match_operand:SI 2 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (NOT, operands[0], operands[1],
+			 NULL, NULL, operands[3],
+			 (enum memmodel) INTVAL (operands[2]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_fetch_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 1)
+	     (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>"))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (<CODE>, operands[1], operands[2],
+			 operands[0], NULL, operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_fetch_nand<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(not:I48MODE
+	     (and:I48MODE (match_dup 1)
+	       (match_operand:I48MODE 2 "register_operand" "r")))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (NOT, operands[1], operands[2],
+			 operands[0], NULL, operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_<fetchop_name>_fetch<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(FETCHOP:I48MODE 
+	  (match_operand:I48MODE 1 "memory_operand" "+m")
+	  (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>")))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 1) (match_dup 2))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (<CODE>, operands[1], operands[2],
+			 NULL, operands[0], operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_nand_fetch<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(not:I48MODE
+	  (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m")
+	    (match_operand:I48MODE 2 "register_operand" "r"))))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2)))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (NOT, operands[1], operands[2],
+			 NULL, operands[0], operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
diff --git a/gcc-4.9/gcc/config/alpha/t-linux b/gcc-4.9/gcc/config/alpha/t-linux
new file mode 100644
index 000000000..1b4a26f74
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/t-linux
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,alpha-linux-gnu)
diff --git a/gcc-4.9/gcc/config/alpha/t-vms b/gcc-4.9/gcc/config/alpha/t-vms
new file mode 100644
index 000000000..12a702125
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/t-vms
@@ -0,0 +1,21 @@
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mcpu=ev6
+MULTILIB_DIRNAMES = ev6
+MULTILIB_OSDIRNAMES = ev6
diff --git a/gcc-4.9/gcc/config/alpha/vms.h b/gcc-4.9/gcc/config/alpha/vms.h
new file mode 100644
index 000000000..b2977784b
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/vms.h
@@ -0,0 +1,306 @@
+/* Output variables, constants and external declarations, for GNU compiler.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Alpha/VMS object format is not really Elf, but this makes compiling
+   crtstuff.c and dealing with shared library initialization much easier.  */
+#define OBJECT_FORMAT_ELF
+
+/* Do not use TM clone registry as it currently doesn't work.  Alpha/VMS
+   object is too far from ELF for supporting TM out of the box.  */
+#define USE_TM_CLONE_REGISTRY 0
+
+/* This enables certain macros in alpha.h, which will make an indirect
+   reference to an external symbol an invalid address.  This needs to be
+   defined before we include alpha.h, since it determines which macros
+   are used for GO_IF_*.  */
+
+#define NO_EXTERNAL_INDIRECT_ADDRESS
+
+#define SUBTARGET_OS_CPP_BUILTINS()		\
+    do {					\
+      builtin_define ("__ALPHA");		\
+      if (TARGET_FLOAT_VAX)			\
+        builtin_define ("__G_FLOAT");		\
+      else					\
+        builtin_define ("__IEEE_FLOAT");	\
+    } while (0)
+
+#undef PCC_STATIC_STRUCT_RETURN
+
+#define MAX_OFILE_ALIGNMENT 524288  /* 8 x 2^16 by DEC Ada Test CD40VRA */
+
+/* The maximum alignment 'malloc' honors.  */
+#undef  MALLOC_ABI_ALIGNMENT
+#define MALLOC_ABI_ALIGNMENT \
+  ((flag_vms_malloc64 && flag_vms_pointer_size != VMS_POINTER_SIZE_NONE \
+   ? 16 : 8) * BITS_PER_UNIT)
+
+#undef FIXED_REGISTERS
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
+
+#undef CALL_USED_REGISTERS
+#define CALL_USED_REGISTERS  \
+ {1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.
+
+   We allocate in the following order:
+   $f1			(nonsaved floating-point register)
+   $f10-$f15		(likewise)
+   $f22-$f30		(likewise)
+   $f21-$f16		(likewise, but input args)
+   $f0			(nonsaved, but return value)
+   $f2-$f9		(saved floating-point registers)
+   $1			(nonsaved integer registers)
+   $22-$25		(likewise)
+   $28			(likewise)
+   $0			(likewise, but return value)
+   $21-$16		(likewise, but input args)
+   $27			(procedure value in OSF, nonsaved in NT)
+   $2-$8		(saved integer registers)
+   $9-$14		(saved integer registers)
+   $26			(return PC)
+   $15			(frame pointer)
+   $29			(global pointer)
+   $30, $31, $f31	(stack pointer and always zero/ap & fp)  */
+
+#undef REG_ALLOC_ORDER
+#define REG_ALLOC_ORDER		\
+  {33,					\
+   42, 43, 44, 45, 46, 47,		\
+   54, 55, 56, 57, 58, 59, 60, 61, 62,	\
+   53, 52, 51, 50, 49, 48,		\
+   32,					\
+   34, 35, 36, 37, 38, 39, 40, 41,	\
+   1,					\
+   22, 23, 24, 25,			\
+   28,					\
+   0,					\
+   21, 20, 19, 18, 17, 16,		\
+   27,					\
+   2, 3, 4, 5, 6, 7, 8,			\
+   9, 10, 11, 12, 13, 14,		\
+   26,					\
+   15,					\
+   29,					\
+   30, 31, 63 }
+
+#undef HARD_FRAME_POINTER_REGNUM
+#define HARD_FRAME_POINTER_REGNUM 29
+
+/* Define registers used by the epilogue and return instruction.  */
+#undef EPILOGUE_USES
+#define EPILOGUE_USES(REGNO)    ((REGNO) == 26 || (REGNO) == 29)
+
+#undef INITIAL_ELIMINATION_OFFSET
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  ((OFFSET) = alpha_vms_initial_elimination_offset(FROM, TO))
+
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On Alpha/VMS, this is a structure that contains the number of
+   arguments and, for each argument, the datatype of that argument.
+
+   The number of arguments is a number of words of arguments scanned so far.
+   Thus 6 or more means all following args should go on the stack.  */
+
+enum avms_arg_type {I64, FF, FD, FG, FS, FT};
+typedef struct {int num_args; enum avms_arg_type atypes[6];} avms_arg_info;
+
+#undef CUMULATIVE_ARGS
+#define CUMULATIVE_ARGS avms_arg_info
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#undef INIT_CUMULATIVE_ARGS
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM).num_args = 0;						\
+  (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64;	\
+  (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64;
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Even though pointers are 64bits, only 32bit ever remain significant in code
+   addresses.  */
+#define MASK_RETURN_ADDR                                \
+  (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE       \
+   ? constm1_rtx                                        \
+   : GEN_INT (0xffffffff))
+
+#undef  ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE, NAME)                            \
+   do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME);  \
+        fputc ('\n', FILE); } while (0)
+
+#define READONLY_DATA_SECTION_ASM_OP "\t.rdata"
+#define CTORS_SECTION_ASM_OP "\t.ctors"
+#define DTORS_SECTION_ASM_OP "\t.dtors"
+#define SDATA_SECTION_ASM_OP "\t.sdata"
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)              \
+   asm (SECTION_OP "\n\t.long " #FUNC"\n");
+
+#undef ASM_OUTPUT_ADDR_DIFF_ELT
+
+#undef ASM_OUTPUT_ADDR_VEC_ELT
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t.quad $L%d\n", (VALUE))
+
+#undef CASE_VECTOR_MODE
+#define CASE_VECTOR_MODE DImode
+#undef CASE_VECTOR_PC_RELATIVE
+
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN)	\
+{ ASM_OUTPUT_ALIGN (FILE, 3); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); }
+
+/* This says how to output assembler code to declare an                
+   uninitialized external linkage data object.  */ 
+
+#define COMMON_ASM_OP "\t.comm\t"
+
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+  vms_output_aligned_decl_common (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Control how constructors and destructors are emitted.  */
+#define TARGET_ASM_CONSTRUCTOR  vms_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   vms_asm_out_destructor
+
+#define DWARF2_DEBUGGING_INFO 1
+#define VMS_DEBUGGING_INFO 1
+
+#define DWARF2_UNWIND_INFO 1
+
+#undef EH_RETURN_HANDLER_RTX
+#define EH_RETURN_HANDLER_RTX \
+  gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx, 8))
+
+#define LINK_EH_SPEC "vms-dwarf2eh.o%s "
+#define LINK_GCC_C_SEQUENCE_SPEC "%G"
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    fprintf (FILE, "\t.align %d\n", LOG);
+
+/* This is how to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE,ROUNDED)	\
+( fputs ("\t.lcomm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)))
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION vms_asm_named_section
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)      \
+  do                                            \
+    {                                           \
+      fprintf ((FILE), "\t");                   \
+      assemble_name (FILE, LABEL1);             \
+      fprintf (FILE, " = ");                    \
+      assemble_name (FILE, LABEL2);             \
+      fprintf (FILE, "\n");                     \
+    }                                           \
+ while (0)
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE VMS_AND_DWARF2_DEBUG
+
+#define ASM_PN_FORMAT "%s___%lu"
+
+/* ??? VMS uses different linkage.  */
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+
+#undef ASM_SPEC
+#undef ASM_FINAL_SPEC
+
+/* The VMS convention is to always provide minimal debug info
+   for a traceback unless specifically overridden.
+
+   Because ASM_OUTPUT_ADDR_DIFF_ELT is not defined for alpha-vms,
+   jump tables cannot be output for PIC code, because you can't put
+   an absolute address in a readonly section.  Putting the table in
+   a writable section is a security hole.  Therefore, we unset the
+   flag_jump_tables flag, forcing switch statements to be expanded
+   using decision trees.  There are probably other ways to address
+   this issue, but using a decision tree is clearly safe.  */
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS                  \
+do {                                                \
+  if (write_symbols == NO_DEBUG                     \
+      && debug_info_level == DINFO_LEVEL_NONE)      \
+    {                                               \
+      write_symbols = VMS_DEBUG;                    \
+      debug_info_level = DINFO_LEVEL_TERSE;         \
+    }                                               \
+  if (flag_pic)                                     \
+    flag_jump_tables = 0;                           \
+} while (0)
+
+#undef LINK_SPEC
+#if HAVE_GNU_LD
+/* GNU-ld built-in linker script already handles the dwarf2 debug sections.  */
+#define LINK_SPEC "%{shared} %{v}"
+#else
+/* Link with vms-dwarf2.o if -g (except -g0). This causes the
+   VMS link to pull all the dwarf2 debug sections together.  */
+#define LINK_SPEC "%{g0} %{g*:-g vms-dwarf2.o%s} %{shared} %{v} %{map}"
+#endif
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:crt0.o%s crtbegin.o%s} \
+ %{!static:%{shared:crtbeginS.o%s}}"
+
+#define ENDFILE_SPEC "%{!shared:crtend.o%s} %{!static:%{shared:crtendS.o%s}}"
+
+#define INIT_SECTION_ASM_OP "\t.section LIB$INITIALIZE,GBL,NOWRT"
+
+#define LONGLONG_STANDALONE 1
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE vms_valid_pointer_mode
+
+/* Default values for _CRTL_VER and _VMS_VER.  */
+#define VMS_DEFAULT_CRTL_VER 70320000
+#define VMS_DEFAULT_VMS_VER 70320000
diff --git a/gcc-4.9/gcc/config/alpha/x-alpha b/gcc-4.9/gcc/config/alpha/x-alpha
new file mode 100644
index 000000000..2b22e5e32
--- /dev/null
+++ b/gcc-4.9/gcc/config/alpha/x-alpha
@@ -0,0 +1,3 @@
+driver-alpha.o: $(srcdir)/config/alpha/driver-alpha.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/arc/arc-modes.def b/gcc-4.9/gcc/config/arc/arc-modes.def
new file mode 100644
index 000000000..f279e3c72
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc-modes.def
@@ -0,0 +1,37 @@
+/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+		on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Some insns set all condition code flags, some only set the ZNC flags, and
+   some only set the ZN flags.  */
+
+CC_MODE (CC_ZN);
+CC_MODE (CC_Z);
+CC_MODE (CC_C);
+CC_MODE (CC_FP_GT);
+CC_MODE (CC_FP_GE);
+CC_MODE (CC_FP_ORD);
+CC_MODE (CC_FP_UNEQ);
+CC_MODE (CC_FPX);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
diff --git a/gcc-4.9/gcc/config/arc/arc-opts.h b/gcc-4.9/gcc/config/arc/arc-opts.h
new file mode 100644
index 000000000..f2f1bc72b
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc-opts.h
@@ -0,0 +1,28 @@
+/* GCC option-handling definitions for the Synopsys DesignWare ARC architecture.
+
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+enum processor_type
+{
+  PROCESSOR_NONE,
+  PROCESSOR_A5,
+  PROCESSOR_ARC600,
+  PROCESSOR_ARC601,
+  PROCESSOR_ARC700
+};
diff --git a/gcc-4.9/gcc/config/arc/arc-protos.h b/gcc-4.9/gcc/config/arc/arc-protos.h
new file mode 100644
index 000000000..dd54fa8bd
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc-protos.h
@@ -0,0 +1,118 @@
+/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef RTX_CODE
+
+extern enum machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx);
+
+/* Define the function that build the compare insn for scc, bcc and mov*cc.  */
+extern struct rtx_def *gen_compare_reg (rtx, enum machine_mode);
+
+/* Declarations for various fns used in the .md file.  */
+extern void arc_output_function_epilogue (FILE *, HOST_WIDE_INT, int);
+extern const char *output_shift (rtx *);
+extern bool compact_sda_memory_operand (rtx op,enum machine_mode  mode);
+extern bool arc_double_limm_p (rtx);
+extern void arc_print_operand (FILE *, rtx, int);
+extern void arc_print_operand_address (FILE *, rtx);
+extern void arc_final_prescan_insn (rtx, rtx *, int);
+extern void arc_set_default_type_attributes(tree type);
+extern const char *arc_output_libcall (const char *);
+extern bool prepare_extend_operands (rtx *operands, enum rtx_code code,
+				     enum machine_mode omode);
+extern int arc_output_addsi (rtx *operands, bool, bool);
+extern int arc_output_commutative_cond_exec (rtx *operands, bool);
+extern bool arc_expand_movmem (rtx *operands);
+extern bool prepare_move_operands (rtx *operands, enum machine_mode mode);
+extern void emit_shift (enum rtx_code, rtx, rtx, rtx);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern enum arc_function_type arc_compute_function_type (struct function *);
+#endif /* TREE_CODE */
+
+
+extern void arc_init (void);
+extern unsigned int arc_compute_frame_size (int);
+extern bool arc_ccfsm_branch_deleted_p (void);
+extern void arc_ccfsm_record_branch_deleted (void);
+
+extern rtx arc_legitimize_pic_address (rtx, rtx);
+void arc_asm_output_aligned_decl_local (FILE *, tree, const char *,
+					unsigned HOST_WIDE_INT,
+					unsigned HOST_WIDE_INT,
+					unsigned HOST_WIDE_INT);
+extern rtx arc_return_addr_rtx (int , rtx);
+extern bool check_if_valid_regno_const (rtx *, int);
+extern bool check_if_valid_sleep_operand (rtx *, int);
+extern bool arc_legitimate_constant_p (enum machine_mode, rtx);
+extern bool arc_legitimate_pc_offset_p (rtx);
+extern bool arc_legitimate_pic_addr_p (rtx);
+extern void emit_pic_move (rtx *, enum machine_mode);
+extern bool arc_raw_symbolic_reference_mentioned_p (rtx, bool);
+extern bool arc_legitimate_pic_operand_p (rtx);
+extern bool arc_is_longcall_p (rtx);
+extern bool arc_is_shortcall_p (rtx);
+extern bool arc_profile_call (rtx callee);
+extern bool valid_brcc_with_delay_p (rtx *);
+extern bool small_data_pattern (rtx , enum machine_mode);
+extern rtx arc_rewrite_small_data (rtx);
+extern bool arc_ccfsm_cond_exec_p (void);
+struct secondary_reload_info;
+extern int arc_register_move_cost (enum machine_mode, enum reg_class,
+				   enum reg_class);
+extern rtx disi_highpart (rtx);
+extern int arc_adjust_insn_length (rtx, int, bool);
+extern int arc_corereg_hazard (rtx, rtx);
+extern int arc_hazard (rtx, rtx);
+extern int arc_write_ext_corereg (rtx);
+extern rtx gen_acc1 (void);
+extern rtx gen_acc2 (void);
+extern rtx gen_mlo (void);
+extern rtx gen_mhi (void);
+extern bool arc_branch_size_unknown_p (void);
+struct arc_ccfsm;
+extern void arc_ccfsm_record_condition (rtx, bool, rtx, struct arc_ccfsm *);
+extern void arc_expand_prologue (void);
+extern void arc_expand_epilogue (int);
+extern void arc_init_expanders (void);
+extern int arc_check_millicode (rtx op, int offset, int load_p);
+extern int arc_get_unalign (void);
+extern void arc_clear_unalign (void);
+extern void arc_toggle_unalign (void);
+extern void split_addsi (rtx *);
+extern void split_subsi (rtx *);
+extern void arc_pad_return (void);
+extern rtx arc_split_move (rtx *);
+extern int arc_verify_short (rtx insn, int unalign, int);
+extern const char *arc_short_long (rtx insn, const char *, const char *);
+extern rtx arc_regno_use_in (unsigned int, rtx);
+extern int arc_attr_type (rtx);
+extern bool arc_scheduling_not_expected (void);
+extern bool arc_sets_cc_p (rtx insn);
+extern int arc_label_align (rtx label);
+extern bool arc_need_delay (rtx insn);
+extern bool arc_text_label (rtx);
+extern int arc_decl_pretend_args (tree decl);
+extern bool arc_short_comparison_p (rtx, int);
+extern bool arc_epilogue_uses (int regno);
+/* insn-attrtab.c doesn't include reload.h, which declares regno_clobbered_p. */
+extern int regno_clobbered_p (unsigned int, rtx, enum machine_mode, int);
+extern int arc_return_slot_offset (void);
+extern bool arc_legitimize_reload_address (rtx *, enum machine_mode, int, int);
diff --git a/gcc-4.9/gcc/config/arc/arc-simd.h b/gcc-4.9/gcc/config/arc/arc-simd.h
new file mode 100644
index 000000000..768e35b95
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc-simd.h
@@ -0,0 +1,186 @@
+/* Synopsys DesignWare ARC SIMD include file.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+   Written by Saurabh Verma (saurabh.verma@celunite.com) on behalf os Synopsys
+   Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+#ifndef _ARC_SIMD_H
+#define _ARC_SIMD_H 1
+
+#ifndef __ARC_SIMD__
+#error Use the "-msimd" flag to enable ARC SIMD support
+#endif
+
+/* I0-I7 registers.  */
+#define _IREG_I0  0
+#define _IREG_I1  1
+#define _IREG_I2  2
+#define _IREG_I3  3
+#define _IREG_I4  4
+#define _IREG_I5  5
+#define _IREG_I6  6
+#define _IREG_I7  7
+
+/* DMA configuration registers.  */
+#define _DMA_REG_DR0		0
+#define _DMA_SDM_SRC_ADR_REG	_DMA_REG_DR0
+#define _DMA_SDM_DEST_ADR_REG	_DMA_REG_DR0
+
+#define _DMA_REG_DR1		1
+#define _DMA_SDM_STRIDE_REG	_DMA_REG_DR1
+
+#define _DMA_REG_DR2		2
+#define _DMA_BLK_REG		_DMA_REG_DR2
+
+#define _DMA_REG_DR3		3
+#define _DMA_LOC_REG		_DMA_REG_DR3
+
+#define _DMA_REG_DR4		4
+#define _DMA_SYS_SRC_ADR_REG	_DMA_REG_DR4
+#define _DMA_SYS_DEST_ADR_REG	_DMA_REG_DR4
+
+#define _DMA_REG_DR5		5
+#define _DMA_SYS_STRIDE_REG	_DMA_REG_DR5
+
+#define _DMA_REG_DR6		6
+#define _DMA_CFG_REG		_DMA_REG_DR6
+
+#define _DMA_REG_DR7		7
+#define _DMA_FT_BASE_ADR_REG	_DMA_REG_DR7
+
+/* Predefined types used in vector instructions.  */
+typedef int   __v4si  __attribute__((vector_size(16)));
+typedef short __v8hi  __attribute__((vector_size(16)));
+
+/* Synonyms */
+#define _vaddaw    __builtin_arc_vaddaw
+#define _vaddw     __builtin_arc_vaddw
+#define _vavb      __builtin_arc_vavb
+#define _vavrb     __builtin_arc_vavrb
+#define _vdifaw    __builtin_arc_vdifaw
+#define _vdifw     __builtin_arc_vdifw
+#define _vmaxaw    __builtin_arc_vmaxaw
+#define _vmaxw     __builtin_arc_vmaxw
+#define _vminaw    __builtin_arc_vminaw
+#define _vminw     __builtin_arc_vminw
+#define _vmulaw    __builtin_arc_vmulaw
+#define _vmulfaw   __builtin_arc_vmulfaw
+#define _vmulfw    __builtin_arc_vmulfw
+#define _vmulw     __builtin_arc_vmulw
+#define _vsubaw    __builtin_arc_vsubaw
+#define _vsubw     __builtin_arc_vsubw
+#define _vsummw    __builtin_arc_vsummw
+#define _vand      __builtin_arc_vand
+#define _vandaw    __builtin_arc_vandaw
+#define _vbic      __builtin_arc_vbic
+#define _vbicaw    __builtin_arc_vbicaw
+#define _vor       __builtin_arc_vor
+#define _vxor      __builtin_arc_vxor
+#define _vxoraw    __builtin_arc_vxoraw
+#define _veqw      __builtin_arc_veqw
+#define _vlew      __builtin_arc_vlew
+#define _vltw      __builtin_arc_vltw
+#define _vnew      __builtin_arc_vnew
+#define _vmr1aw    __builtin_arc_vmr1aw
+#define _vmr1w     __builtin_arc_vmr1w
+#define _vmr2aw    __builtin_arc_vmr2aw
+#define _vmr2w     __builtin_arc_vmr2w
+#define _vmr3aw    __builtin_arc_vmr3aw
+#define _vmr3w     __builtin_arc_vmr3w
+#define _vmr4aw    __builtin_arc_vmr4aw
+#define _vmr4w     __builtin_arc_vmr4w
+#define _vmr5aw    __builtin_arc_vmr5aw
+#define _vmr5w     __builtin_arc_vmr5w
+#define _vmr6aw    __builtin_arc_vmr6aw
+#define _vmr6w     __builtin_arc_vmr6w
+#define _vmr7aw    __builtin_arc_vmr7aw
+#define _vmr7w     __builtin_arc_vmr7w
+#define _vmrb      __builtin_arc_vmrb
+#define _vh264f    __builtin_arc_vh264f
+#define _vh264ft   __builtin_arc_vh264ft
+#define _vh264fw   __builtin_arc_vh264fw
+#define _vvc1f     __builtin_arc_vvc1f
+#define _vvc1ft    __builtin_arc_vvc1ft
+#define _vbaddw    __builtin_arc_vbaddw
+#define _vbmaxw    __builtin_arc_vbmaxw
+#define _vbminw    __builtin_arc_vbminw
+#define _vbmulaw   __builtin_arc_vbmulaw
+#define _vbmulfw   __builtin_arc_vbmulfw
+#define _vbmulw    __builtin_arc_vbmulw
+#define _vbrsubw   __builtin_arc_vbrsubw
+#define _vbsubw    __builtin_arc_vbsubw
+#define _vasrw     __builtin_arc_vasrw
+#define _vsr8      __builtin_arc_vsr8
+#define _vsr8aw    __builtin_arc_vsr8aw
+#define _vasrrwi   __builtin_arc_vasrrwi
+#define _vasrsrwi  __builtin_arc_vasrsrwi
+#define _vasrwi    __builtin_arc_vasrwi
+#define _vasrpwbi  __builtin_arc_vasrpwbi
+#define _vasrrpwbi __builtin_arc_vasrrpwbi
+#define _vsr8awi   __builtin_arc_vsr8awi
+#define _vsr8i     __builtin_arc_vsr8i
+#define _vmvaw     __builtin_arc_vmvaw
+#define _vmvw      __builtin_arc_vmvw
+#define _vmvzw     __builtin_arc_vmvzw
+#define _vd6tapf   __builtin_arc_vd6tapf
+#define _vmovaw    __builtin_arc_vmovaw
+#define _vmovw     __builtin_arc_vmovw
+#define _vmovzw    __builtin_arc_vmovzw
+#define _vabsaw    __builtin_arc_vabsaw
+#define _vabsw     __builtin_arc_vabsw
+#define _vaddsuw   __builtin_arc_vaddsuw
+#define _vsignw    __builtin_arc_vsignw
+#define _vexch1    __builtin_arc_vexch1
+#define _vexch2    __builtin_arc_vexch2
+#define _vexch4    __builtin_arc_vexch4
+#define _vupbaw    __builtin_arc_vupbaw
+#define _vupbw     __builtin_arc_vupbw
+#define _vupsbaw   __builtin_arc_vupsbaw
+#define _vupsbw    __builtin_arc_vupsbw
+#define _vdirun    __builtin_arc_vdirun
+#define _vdorun    __builtin_arc_vdorun
+#define _vdiwr     __builtin_arc_vdiwr
+#define _vdowr     __builtin_arc_vdowr
+#define _vrec      __builtin_arc_vrec
+#define _vrun      __builtin_arc_vrun
+#define _vrecrun   __builtin_arc_vrecrun
+#define _vendrec   __builtin_arc_vendrec
+#define _vld32wh   __builtin_arc_vld32wh
+#define _vld32wl   __builtin_arc_vld32wl
+#define _vld64     __builtin_arc_vld64
+#define _vld32     __builtin_arc_vld32
+#define _vld64w    __builtin_arc_vld64w
+#define _vld128    __builtin_arc_vld128
+#define _vst128    __builtin_arc_vst128
+#define _vst64     __builtin_arc_vst64
+#define _vst16_n   __builtin_arc_vst16_n
+#define _vst32_n   __builtin_arc_vst32_n
+#define _vinti     __builtin_arc_vinti
+
+/* Additional synonyms to ease programming.  */
+#define _setup_dma_in_channel_reg  _vdiwr
+#define _setup_dma_out_channel_reg _vdowr
+
+#endif /* _ARC_SIMD_H */
diff --git a/gcc-4.9/gcc/config/arc/arc.c b/gcc-4.9/gcc/config/arc/arc.c
new file mode 100644
index 000000000..0eaabede2
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc.c
@@ -0,0 +1,9360 @@
+/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+   Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
+   behalf of Synopsys Inc.
+
+   Position Independent Code support added,Code cleaned up,
+   Comments and Support For ARC700 instructions added by
+   Saurabh Verma (saurabh.verma@codito.com)
+   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
+
+   Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
+   profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "function.h"
+#include "toplev.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "target.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "expr.h"
+#include "recog.h"
+#include "debug.h"
+#include "diagnostic.h"
+#include "insn-codes.h"
+#include "langhooks.h"
+#include "optabs.h"
+#include "tm-constrs.h"
+#include "reload.h" /* For operands_match_p */
+#include "df.h"
+#include "tree-pass.h"
+#include "context.h"
+#include "pass_manager.h"
+
+/* Which cpu we're compiling for (A5, ARC600, ARC601, ARC700).  */
+static const char *arc_cpu_string = "";
+
+/* ??? Loads can handle any constant, stores can only handle small ones.  */
+/* OTOH, LIMMs cost extra, so their usefulness is limited.  */
+#define RTX_OK_FOR_OFFSET_P(MODE, X) \
+(GET_CODE (X) == CONST_INT \
+ && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
+		     (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
+		      ? 0 \
+		      : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
+
+#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \
+(GET_CODE (X) == PLUS			     \
+  && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \
+  && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \
+       && GET_MODE_SIZE ((MODE)) <= 4) \
+      || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
+
+#define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \
+(GET_CODE (X) == PLUS \
+ && GET_CODE (XEXP (X, 0)) == MULT \
+ && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \
+ && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
+ && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
+     || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
+ && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \
+     || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
+
+#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
+  (GET_CODE (X) == PLUS \
+   && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \
+   && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \
+	&& SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
+       || (GET_CODE (XEXP ((X), 1)) == CONST \
+	   && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \
+	   && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \
+	   && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \
+	   && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT)))
+
+/* Array of valid operand punctuation characters.  */
+char arc_punct_chars[256];
+
+/* State used by arc_ccfsm_advance to implement conditional execution.  */
+struct GTY (()) arc_ccfsm
+{
+  int state;
+  int cc;
+  rtx cond;
+  rtx target_insn;
+  int target_label;
+};
+
+#define arc_ccfsm_current cfun->machine->ccfsm_current
+
+#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
+  ((STATE)->state == 1 || (STATE)->state == 2)
+
+/* Indicate we're conditionalizing insns now.  */
+#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
+  ((STATE)->state += 2)
+
+#define ARC_CCFSM_COND_EXEC_P(STATE) \
+  ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
+   || current_insn_predicate)
+
+/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
+#define CCFSM_ISCOMPACT(INSN,STATE) \
+  (ARC_CCFSM_COND_EXEC_P (STATE) \
+   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
+      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
+   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
+
+/* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
+#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
+  ((ARC_CCFSM_COND_EXEC_P (STATE) \
+    || (JUMP_P (JUMP) \
+	&& INSN_ANNULLED_BRANCH_P (JUMP) \
+	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
+   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
+      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
+   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
+
+/* The maximum number of insns skipped which will be conditionalised if
+   possible.  */
+/* When optimizing for speed:
+    Let p be the probability that the potentially skipped insns need to
+    be executed, pn the cost of a correctly predicted non-taken branch,
+    mt the cost of a mis/non-predicted taken branch,
+    mn mispredicted non-taken, pt correctly predicted taken ;
+    costs expressed in numbers of instructions like the ones considered
+    skipping.
+    Unfortunately we don't have a measure of predictability - this
+    is linked to probability only in that in the no-eviction-scenario
+    there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
+    value that can be assumed *if* the distribution is perfectly random.
+    A predictability of 1 is perfectly plausible not matter what p is,
+    because the decision could be dependent on an invocation parameter
+    of the program.
+    For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
+    For small p, we want MAX_INSNS_SKIPPED == pt
+
+   When optimizing for size:
+    We want to skip insn unless we could use 16 opcodes for the
+    non-conditionalized insn to balance the branch length or more.
+    Performance can be tie-breaker.  */
+/* If the potentially-skipped insns are likely to be executed, we'll
+   generally save one non-taken branch
+   o
+   this to be no less than the 1/p  */
+#define MAX_INSNS_SKIPPED 3
+
+/* The values of unspec's first field.  */
+enum {
+  ARC_UNSPEC_PLT = 3,
+  ARC_UNSPEC_GOT,
+  ARC_UNSPEC_GOTOFF
+} ;
+
+
+enum arc_builtins {
+  ARC_BUILTIN_NOP        =    2,
+  ARC_BUILTIN_NORM       =    3,
+  ARC_BUILTIN_NORMW      =    4,
+  ARC_BUILTIN_SWAP       =    5,
+  ARC_BUILTIN_BRK        =    6,
+  ARC_BUILTIN_DIVAW      =    7,
+  ARC_BUILTIN_EX         =    8,
+  ARC_BUILTIN_MUL64      =    9,
+  ARC_BUILTIN_MULU64     =   10,
+  ARC_BUILTIN_RTIE       =   11,
+  ARC_BUILTIN_SYNC       =   12,
+  ARC_BUILTIN_CORE_READ  =   13,
+  ARC_BUILTIN_CORE_WRITE =   14,
+  ARC_BUILTIN_FLAG       =   15,
+  ARC_BUILTIN_LR         =   16,
+  ARC_BUILTIN_SR         =   17,
+  ARC_BUILTIN_SLEEP      =   18,
+  ARC_BUILTIN_SWI        =   19,
+  ARC_BUILTIN_TRAP_S     =   20,
+  ARC_BUILTIN_UNIMP_S    =   21,
+  ARC_BUILTIN_ALIGNED    =   22,
+
+  /* Sentinel to mark start of simd builtins.  */
+  ARC_SIMD_BUILTIN_BEGIN      = 1000,
+
+  ARC_SIMD_BUILTIN_VADDAW     = 1001,
+  ARC_SIMD_BUILTIN_VADDW      = 1002,
+  ARC_SIMD_BUILTIN_VAVB       = 1003,
+  ARC_SIMD_BUILTIN_VAVRB      = 1004,
+  ARC_SIMD_BUILTIN_VDIFAW     = 1005,
+  ARC_SIMD_BUILTIN_VDIFW      = 1006,
+  ARC_SIMD_BUILTIN_VMAXAW     = 1007,
+  ARC_SIMD_BUILTIN_VMAXW      = 1008,
+  ARC_SIMD_BUILTIN_VMINAW     = 1009,
+  ARC_SIMD_BUILTIN_VMINW      = 1010,
+  ARC_SIMD_BUILTIN_VMULAW     = 1011,
+  ARC_SIMD_BUILTIN_VMULFAW    = 1012,
+  ARC_SIMD_BUILTIN_VMULFW     = 1013,
+  ARC_SIMD_BUILTIN_VMULW      = 1014,
+  ARC_SIMD_BUILTIN_VSUBAW     = 1015,
+  ARC_SIMD_BUILTIN_VSUBW      = 1016,
+  ARC_SIMD_BUILTIN_VSUMMW     = 1017,
+  ARC_SIMD_BUILTIN_VAND       = 1018,
+  ARC_SIMD_BUILTIN_VANDAW     = 1019,
+  ARC_SIMD_BUILTIN_VBIC       = 1020,
+  ARC_SIMD_BUILTIN_VBICAW     = 1021,
+  ARC_SIMD_BUILTIN_VOR        = 1022,
+  ARC_SIMD_BUILTIN_VXOR       = 1023,
+  ARC_SIMD_BUILTIN_VXORAW     = 1024,
+  ARC_SIMD_BUILTIN_VEQW       = 1025,
+  ARC_SIMD_BUILTIN_VLEW       = 1026,
+  ARC_SIMD_BUILTIN_VLTW       = 1027,
+  ARC_SIMD_BUILTIN_VNEW       = 1028,
+  ARC_SIMD_BUILTIN_VMR1AW     = 1029,
+  ARC_SIMD_BUILTIN_VMR1W      = 1030,
+  ARC_SIMD_BUILTIN_VMR2AW     = 1031,
+  ARC_SIMD_BUILTIN_VMR2W      = 1032,
+  ARC_SIMD_BUILTIN_VMR3AW     = 1033,
+  ARC_SIMD_BUILTIN_VMR3W      = 1034,
+  ARC_SIMD_BUILTIN_VMR4AW     = 1035,
+  ARC_SIMD_BUILTIN_VMR4W      = 1036,
+  ARC_SIMD_BUILTIN_VMR5AW     = 1037,
+  ARC_SIMD_BUILTIN_VMR5W      = 1038,
+  ARC_SIMD_BUILTIN_VMR6AW     = 1039,
+  ARC_SIMD_BUILTIN_VMR6W      = 1040,
+  ARC_SIMD_BUILTIN_VMR7AW     = 1041,
+  ARC_SIMD_BUILTIN_VMR7W      = 1042,
+  ARC_SIMD_BUILTIN_VMRB       = 1043,
+  ARC_SIMD_BUILTIN_VH264F     = 1044,
+  ARC_SIMD_BUILTIN_VH264FT    = 1045,
+  ARC_SIMD_BUILTIN_VH264FW    = 1046,
+  ARC_SIMD_BUILTIN_VVC1F      = 1047,
+  ARC_SIMD_BUILTIN_VVC1FT     = 1048,
+
+  /* Va, Vb, rlimm instructions.  */
+  ARC_SIMD_BUILTIN_VBADDW     = 1050,
+  ARC_SIMD_BUILTIN_VBMAXW     = 1051,
+  ARC_SIMD_BUILTIN_VBMINW     = 1052,
+  ARC_SIMD_BUILTIN_VBMULAW    = 1053,
+  ARC_SIMD_BUILTIN_VBMULFW    = 1054,
+  ARC_SIMD_BUILTIN_VBMULW     = 1055,
+  ARC_SIMD_BUILTIN_VBRSUBW    = 1056,
+  ARC_SIMD_BUILTIN_VBSUBW     = 1057,
+
+  /* Va, Vb, Ic instructions.  */
+  ARC_SIMD_BUILTIN_VASRW      = 1060,
+  ARC_SIMD_BUILTIN_VSR8       = 1061,
+  ARC_SIMD_BUILTIN_VSR8AW     = 1062,
+
+  /* Va, Vb, u6 instructions.  */
+  ARC_SIMD_BUILTIN_VASRRWi    = 1065,
+  ARC_SIMD_BUILTIN_VASRSRWi   = 1066,
+  ARC_SIMD_BUILTIN_VASRWi     = 1067,
+  ARC_SIMD_BUILTIN_VASRPWBi   = 1068,
+  ARC_SIMD_BUILTIN_VASRRPWBi  = 1069,
+  ARC_SIMD_BUILTIN_VSR8AWi    = 1070,
+  ARC_SIMD_BUILTIN_VSR8i      = 1071,
+
+  /* Va, Vb, u8 (simm) instructions.  */
+  ARC_SIMD_BUILTIN_VMVAW      = 1075,
+  ARC_SIMD_BUILTIN_VMVW       = 1076,
+  ARC_SIMD_BUILTIN_VMVZW      = 1077,
+  ARC_SIMD_BUILTIN_VD6TAPF    = 1078,
+
+  /* Va, rlimm, u8 (simm) instructions.  */
+  ARC_SIMD_BUILTIN_VMOVAW     = 1080,
+  ARC_SIMD_BUILTIN_VMOVW      = 1081,
+  ARC_SIMD_BUILTIN_VMOVZW     = 1082,
+
+  /* Va, Vb instructions.  */
+  ARC_SIMD_BUILTIN_VABSAW     = 1085,
+  ARC_SIMD_BUILTIN_VABSW      = 1086,
+  ARC_SIMD_BUILTIN_VADDSUW    = 1087,
+  ARC_SIMD_BUILTIN_VSIGNW     = 1088,
+  ARC_SIMD_BUILTIN_VEXCH1     = 1089,
+  ARC_SIMD_BUILTIN_VEXCH2     = 1090,
+  ARC_SIMD_BUILTIN_VEXCH4     = 1091,
+  ARC_SIMD_BUILTIN_VUPBAW     = 1092,
+  ARC_SIMD_BUILTIN_VUPBW      = 1093,
+  ARC_SIMD_BUILTIN_VUPSBAW    = 1094,
+  ARC_SIMD_BUILTIN_VUPSBW     = 1095,
+
+  ARC_SIMD_BUILTIN_VDIRUN     = 1100,
+  ARC_SIMD_BUILTIN_VDORUN     = 1101,
+  ARC_SIMD_BUILTIN_VDIWR      = 1102,
+  ARC_SIMD_BUILTIN_VDOWR      = 1103,
+
+  ARC_SIMD_BUILTIN_VREC       = 1105,
+  ARC_SIMD_BUILTIN_VRUN       = 1106,
+  ARC_SIMD_BUILTIN_VRECRUN    = 1107,
+  ARC_SIMD_BUILTIN_VENDREC    = 1108,
+
+  ARC_SIMD_BUILTIN_VLD32WH    = 1110,
+  ARC_SIMD_BUILTIN_VLD32WL    = 1111,
+  ARC_SIMD_BUILTIN_VLD64      = 1112,
+  ARC_SIMD_BUILTIN_VLD32      = 1113,
+  ARC_SIMD_BUILTIN_VLD64W     = 1114,
+  ARC_SIMD_BUILTIN_VLD128     = 1115,
+  ARC_SIMD_BUILTIN_VST128     = 1116,
+  ARC_SIMD_BUILTIN_VST64      = 1117,
+
+  ARC_SIMD_BUILTIN_VST16_N    = 1120,
+  ARC_SIMD_BUILTIN_VST32_N    = 1121,
+
+  ARC_SIMD_BUILTIN_VINTI      = 1201,
+
+  ARC_SIMD_BUILTIN_END
+};
+
+/* A nop is needed between a 4 byte insn that sets the condition codes and
+   a branch that uses them (the same isn't true for an 8 byte insn that sets
+   the condition codes).  Set by arc_ccfsm_advance.  Used by
+   arc_print_operand.  */
+
+static int get_arc_condition_code (rtx);
+
+static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
+
+/* Initialized arc_attribute_table to NULL since arc doesnot have any
+   machine specific supported attributes.  */
+const struct attribute_spec arc_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+      affects_type_identity } */
+  { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
+  /* Function calls made to this symbol must be done indirectly, because
+     it may lie outside of the 21/25 bit addressing range of a normal function
+     call.  */
+  { "long_call",    0, 0, false, true,  true,  NULL, false },
+  /* Whereas these functions are always known to reside within the 25 bit
+     addressing range of unconditionalized bl.  */
+  { "medium_call",   0, 0, false, true,  true,  NULL, false },
+  /* And these functions are always known to reside within the 21 bit
+     addressing range of blcc.  */
+  { "short_call",   0, 0, false, true,  true,  NULL, false },
+  { NULL, 0, 0, false, false, false, NULL, false }
+};
+static int arc_comp_type_attributes (const_tree, const_tree);
+static void arc_file_start (void);
+static void arc_internal_label (FILE *, const char *, unsigned long);
+static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
+				 tree);
+static int arc_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static void arc_encode_section_info (tree decl, rtx rtl, int first);
+
+static void arc_init_builtins (void);
+static rtx arc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+static int branch_dest (rtx);
+
+static void  arc_output_pic_addr_const (FILE *,  rtx, int);
+void emit_pic_move (rtx *, enum machine_mode);
+bool arc_legitimate_pic_operand_p (rtx);
+static bool arc_function_ok_for_sibcall (tree, tree);
+static rtx arc_function_value (const_tree, const_tree, bool);
+const char * output_shift (rtx *);
+static void arc_reorg (void);
+static bool arc_in_small_data_p (const_tree);
+
+static void arc_init_reg_tables (void);
+static bool arc_return_in_memory (const_tree, const_tree);
+static void arc_init_simd_builtins (void);
+static bool arc_vector_mode_supported_p (enum machine_mode);
+
+static bool arc_can_use_doloop_p (double_int, double_int, unsigned int, bool);
+static const char *arc_invalid_within_doloop (const_rtx);
+
+static void output_short_suffix (FILE *file);
+
+static bool arc_frame_pointer_required (void);
+
+/* Implements target hook vector_mode_supported_p.  */
+
+static bool
+arc_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (!TARGET_SIMD_SET)
+    return false;
+
+  if ((mode == V4SImode)
+      || (mode == V8HImode))
+    return true;
+
+  return false;
+}
+
+
+/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
+static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
+static rtx arc_delegitimize_address (rtx);
+static bool arc_can_follow_jump (const_rtx follower, const_rtx followee);
+
+static rtx frame_insn (rtx);
+static void arc_function_arg_advance (cumulative_args_t, enum machine_mode,
+				      const_tree, bool);
+static rtx arc_legitimize_address_0 (rtx, rtx, enum machine_mode mode);
+
+static void arc_finalize_pic (void);
+
+/* initialize the GCC target structure.  */
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START arc_file_start
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE arc_attribute_table
+#undef TARGET_ASM_INTERNAL_LABEL
+#define TARGET_ASM_INTERNAL_LABEL arc_internal_label
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS arc_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST arc_address_cost
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  arc_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arc_expand_builtin
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE \
+  default_promote_function_mode_always_promote
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arc_return_in_memory
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arc_function_value
+
+#undef  TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
+
+#undef TARGET_PRESERVE_RELOAD_P
+#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
+
+#undef TARGET_CAN_FOLLOW_JUMP
+#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
+
+/* Usually, we will be able to scale anchor offsets.
+   When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET (-1024)
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET (1020)
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD arc_secondary_reload
+
+#define TARGET_OPTION_OVERRIDE arc_override_options
+
+#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
+
+#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
+
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
+
+#define TARGET_CAN_ELIMINATE arc_can_eliminate
+
+#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
+
+#define TARGET_FUNCTION_ARG arc_function_arg
+
+#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
+
+#define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
+
+#define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
+
+#define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
+
+#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
+
+#define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
+
+#define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
+
+#define TARGET_LRA_P arc_lra_p
+#define TARGET_REGISTER_PRIORITY arc_register_priority
+/* Stores with scaled offsets have different displacement ranges.  */
+#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
+#define TARGET_SPILL_CLASS arc_spill_class
+
+#include "target-def.h"
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+/* Try to keep the (mov:DF _, reg) as early as possible so
+   that the d<add/sub/mul>h-lr insns appear together and can
+   use the peephole2 pattern.  */
+
+static int
+arc_sched_adjust_priority (rtx insn, int priority)
+{
+  rtx set = single_set (insn);
+  if (set
+      && GET_MODE (SET_SRC(set)) == DFmode
+      && GET_CODE (SET_SRC(set)) == REG)
+    {
+      /* Incrementing priority by 20 (empirically derived).  */
+      return priority + 20;
+    }
+
+  return priority;
+}
+
+static reg_class_t
+arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, enum machine_mode,
+		      secondary_reload_info *)
+{
+  if (cl == DOUBLE_REGS)
+    return GENERAL_REGS;
+
+  /* The loop counter register can be stored, but not loaded directly.  */
+  if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
+      && in_p && MEM_P (x))
+    return GENERAL_REGS;
+  return NO_REGS;
+}
+
+static unsigned arc_ifcvt (void);
+
+namespace {
+
+const pass_data pass_data_arc_ifcvt =
+{
+  RTL_PASS,
+  "arc_ifcvt",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  false,				/* has_gate */
+  true,					/* has_execute */
+  TV_IFCVT2,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish			/* todo_flags_finish */
+};
+
+class pass_arc_ifcvt : public rtl_opt_pass
+{
+public:
+  pass_arc_ifcvt(gcc::context *ctxt)
+  : rtl_opt_pass(pass_data_arc_ifcvt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); }
+  unsigned int execute () { return arc_ifcvt (); }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_arc_ifcvt (gcc::context *ctxt)
+{
+  return new pass_arc_ifcvt (ctxt);
+}
+
+static unsigned arc_predicate_delay_insns (void);
+
+namespace {
+
+const pass_data pass_data_arc_predicate_delay_insns =
+{
+  RTL_PASS,
+  "arc_predicate_delay_insns",		/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  false,				/* has_gate */
+  true,					/* has_execute */
+  TV_IFCVT2,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish			/* todo_flags_finish */
+};
+
+class pass_arc_predicate_delay_insns : public rtl_opt_pass
+{
+public:
+  pass_arc_predicate_delay_insns(gcc::context *ctxt)
+  : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return arc_predicate_delay_insns (); }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
+{
+  return new pass_arc_predicate_delay_insns (ctxt);
+}
+
+/* Called by OVERRIDE_OPTIONS to initialize various things.  */
+
+void
+arc_init (void)
+{
+  enum attr_tune tune_dflt = TUNE_NONE;
+
+  if (TARGET_A5)
+    {
+      arc_cpu_string = "A5";
+    }
+  else if (TARGET_ARC600)
+    {
+      arc_cpu_string = "ARC600";
+      tune_dflt = TUNE_ARC600;
+    }
+  else if (TARGET_ARC601)
+    {
+      arc_cpu_string = "ARC601";
+      tune_dflt = TUNE_ARC600;
+    }
+  else if (TARGET_ARC700)
+    {
+      arc_cpu_string = "ARC700";
+      tune_dflt = TUNE_ARC700_4_2_STD;
+    }
+  else
+    gcc_unreachable ();
+  if (arc_tune == TUNE_NONE)
+    arc_tune = tune_dflt;
+  /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
+  if (arc_multcost < 0)
+    switch (arc_tune)
+      {
+      case TUNE_ARC700_4_2_STD:
+	/* latency 7;
+	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
+	arc_multcost = COSTS_N_INSNS (4);
+	if (TARGET_NOMPY_SET)
+	  arc_multcost = COSTS_N_INSNS (30);
+	break;
+      case TUNE_ARC700_4_2_XMAC:
+	/* latency 5;
+	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
+	arc_multcost = COSTS_N_INSNS (3);
+	if (TARGET_NOMPY_SET)
+	  arc_multcost = COSTS_N_INSNS (30);
+	break;
+      case TUNE_ARC600:
+	if (TARGET_MUL64_SET)
+	  {
+	    arc_multcost = COSTS_N_INSNS (4);
+	    break;
+	  }
+	/* Fall through.  */
+      default:
+	arc_multcost = COSTS_N_INSNS (30);
+	break;
+      }
+
+  /* Support mul64 generation only for A5 and ARC600.  */
+  if (TARGET_MUL64_SET && TARGET_ARC700)
+      error ("-mmul64 not supported for ARC700");
+
+  /* MPY instructions valid only for ARC700.  */
+  if (TARGET_NOMPY_SET && !TARGET_ARC700)
+      error ("-mno-mpy supported only for ARC700");
+
+  /* mul/mac instructions only for ARC600.  */
+  if (TARGET_MULMAC_32BY16_SET && !(TARGET_ARC600 || TARGET_ARC601))
+      error ("-mmul32x16 supported only for ARC600 or ARC601");
+
+  if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
+      error ("-mno-dpfp-lrsr suppforted only with -mdpfp");
+
+  /* FPX-1. No fast and compact together.  */
+  if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
+      || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
+    error ("FPX fast and compact options cannot be specified together");
+
+  /* FPX-2. No fast-spfp for arc600 or arc601.  */
+  if (TARGET_SPFP_FAST_SET && (TARGET_ARC600 || TARGET_ARC601))
+    error ("-mspfp_fast not available on ARC600 or ARC601");
+
+  /* FPX-3. No FPX extensions on pre-ARC600 cores.  */
+  if ((TARGET_DPFP || TARGET_SPFP)
+      && !(TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700))
+    error ("FPX extensions not available on pre-ARC600 cores");
+
+  /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
+  if (flag_pic && !TARGET_ARC700)
+    {
+      warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string);
+      flag_pic = 0;
+    }
+
+  arc_init_reg_tables ();
+
+  /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
+  memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
+  arc_punct_chars['#'] = 1;
+  arc_punct_chars['*'] = 1;
+  arc_punct_chars['?'] = 1;
+  arc_punct_chars['!'] = 1;
+  arc_punct_chars['^'] = 1;
+  arc_punct_chars['&'] = 1;
+
+  if (optimize > 1 && !TARGET_NO_COND_EXEC)
+    {
+      /* There are two target-independent ifcvt passes, and arc_reorg may do
+	 one or more arc_ifcvt calls.  */
+      opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g);
+      struct register_pass_info arc_ifcvt4_info
+	= { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER };
+      struct register_pass_info arc_ifcvt5_info
+	= { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE };
+
+      register_pass (&arc_ifcvt4_info);
+      register_pass (&arc_ifcvt5_info);
+    }
+
+  if (flag_delayed_branch)
+    {
+      opt_pass *pass_arc_predicate_delay_insns
+	= make_pass_arc_predicate_delay_insns (g);
+      struct register_pass_info arc_predicate_delay_info
+	= { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
+
+      register_pass (&arc_predicate_delay_info);
+    }
+}
+
+/* Check ARC options, generate derived target attributes.  */
+
+static void
+arc_override_options (void)
+{
+  if (arc_cpu == PROCESSOR_NONE)
+    arc_cpu = PROCESSOR_ARC700;
+
+  if (arc_size_opt_level == 3)
+    optimize_size = 1;
+
+  if (flag_pic)
+    target_flags |= MASK_NO_SDATA_SET;
+
+  if (flag_no_common == 255)
+    flag_no_common = !TARGET_NO_SDATA_SET;
+
+  /* TARGET_COMPACT_CASESI needs the "q" register class.  */ \
+  if (TARGET_MIXED_CODE)
+    TARGET_Q_CLASS = 1;
+  if (!TARGET_Q_CLASS)
+    TARGET_COMPACT_CASESI = 0;
+  if (TARGET_COMPACT_CASESI)
+    TARGET_CASE_VECTOR_PC_RELATIVE = 1;
+
+  /* These need to be done at start up.  It's convenient to do them here.  */
+  arc_init ();
+}
+
+/* The condition codes of the ARC, and the inverse function.  */
+/* For short branches, the "c" / "nc" names are not defined in the ARC
+   Programmers manual, so we have to use "lo" / "hs"" instead.  */
+static const char *arc_condition_codes[] =
+{
+  "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
+  "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
+};
+
+enum arc_cc_code_index
+{
+  ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
+  ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
+  ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
+  ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
+};
+
+#define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
+
+/* Returns the index of the ARC condition code string in
+   `arc_condition_codes'.  COMPARISON should be an rtx like
+   `(eq (...) (...))'.  */
+
+static int
+get_arc_condition_code (rtx comparison)
+{
+  switch (GET_MODE (XEXP (comparison, 0)))
+    {
+    case CCmode:
+    case SImode: /* For BRcc.  */
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	case GT : return ARC_CC_GT;
+	case LE : return ARC_CC_LE;
+	case GE : return ARC_CC_GE;
+	case LT : return ARC_CC_LT;
+	case GTU : return ARC_CC_HI;
+	case LEU : return ARC_CC_LS;
+	case LTU : return ARC_CC_LO;
+	case GEU : return ARC_CC_HS;
+	default : gcc_unreachable ();
+	}
+    case CC_ZNmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	case GE: return ARC_CC_P;
+	case LT: return ARC_CC_N;
+	case GT : return ARC_CC_PNZ;
+	default : gcc_unreachable ();
+	}
+    case CC_Zmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	default : gcc_unreachable ();
+	}
+    case CC_Cmode:
+      switch (GET_CODE (comparison))
+	{
+	case LTU : return ARC_CC_C;
+	case GEU : return ARC_CC_NC;
+	default : gcc_unreachable ();
+	}
+    case CC_FP_GTmode:
+      if (TARGET_ARGONAUT_SET && TARGET_SPFP)
+	switch (GET_CODE (comparison))
+	  {
+	  case GT  : return ARC_CC_N;
+	  case UNLE: return ARC_CC_P;
+	  default : gcc_unreachable ();
+	}
+      else
+	switch (GET_CODE (comparison))
+	  {
+	  case GT   : return ARC_CC_HI;
+	  case UNLE : return ARC_CC_LS;
+	  default : gcc_unreachable ();
+	}
+    case CC_FP_GEmode:
+      /* Same for FPX and non-FPX.  */
+      switch (GET_CODE (comparison))
+	{
+	case GE   : return ARC_CC_HS;
+	case UNLT : return ARC_CC_LO;
+	default : gcc_unreachable ();
+	}
+    case CC_FP_UNEQmode:
+      switch (GET_CODE (comparison))
+	{
+	case UNEQ : return ARC_CC_EQ;
+	case LTGT : return ARC_CC_NE;
+	default : gcc_unreachable ();
+	}
+    case CC_FP_ORDmode:
+      switch (GET_CODE (comparison))
+	{
+	case UNORDERED : return ARC_CC_C;
+	case ORDERED   : return ARC_CC_NC;
+	default : gcc_unreachable ();
+	}
+    case CC_FPXmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ        : return ARC_CC_EQ;
+	case NE        : return ARC_CC_NE;
+	case UNORDERED : return ARC_CC_C;
+	case ORDERED   : return ARC_CC_NC;
+	case LTGT      : return ARC_CC_HI;
+	case UNEQ      : return ARC_CC_LS;
+	default : gcc_unreachable ();
+	}
+    default : gcc_unreachable ();
+    }
+  /*NOTREACHED*/
+  return (42);
+}
+
+/* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
+
+bool
+arc_short_comparison_p (rtx comparison, int offset)
+{
+  gcc_assert (ARC_CC_NC == ARC_CC_HS);
+  gcc_assert (ARC_CC_C == ARC_CC_LO);
+  switch (get_arc_condition_code (comparison))
+    {
+    case ARC_CC_EQ: case ARC_CC_NE:
+      return offset >= -512 && offset <= 506;
+    case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
+    case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
+      return offset >= -64 && offset <= 58;
+    default:
+      return false;
+    }
+}
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+
+enum machine_mode
+arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
+{
+  enum machine_mode mode = GET_MODE (x);
+  rtx x1;
+
+  /* For an operation that sets the condition codes as a side-effect, the
+     C and V flags is not set as for cmp, so we can only use comparisons where
+     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
+     instead.)  */
+  /* ??? We could use "pnz" for greater than zero, however, we could then
+     get into trouble because the comparison could not be reversed.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && y == const0_rtx
+      && (op == EQ || op == NE
+	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x) <= 4))))
+    return CC_ZNmode;
+
+  /* add.f for if (a+b) */
+  if (mode == SImode
+      && GET_CODE (y) == NEG
+      && (op == EQ || op == NE))
+    return CC_ZNmode;
+
+  /* Check if this is a test suitable for bxor.f .  */
+  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
+      && ((INTVAL (y) - 1) & INTVAL (y)) == 0
+      && INTVAL (y))
+    return CC_Zmode;
+
+  /* Check if this is a test suitable for add / bmsk.f .  */
+  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
+      && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
+      && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
+      && (~INTVAL (x1) | INTVAL (y)) < 0
+      && (~INTVAL (x1) | INTVAL (y)) > -0x800)
+    return CC_Zmode;
+
+  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
+      && GET_CODE (x) == PLUS
+      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
+    return CC_Cmode;
+
+  if (TARGET_ARGONAUT_SET
+      && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
+    switch (op)
+      {
+      case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
+	return CC_FPXmode;
+      case LT: case UNGE: case GT: case UNLE:
+	return CC_FP_GTmode;
+      case LE: case UNGT: case GE: case UNLT:
+	return CC_FP_GEmode;
+      default: gcc_unreachable ();
+      }
+  else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
+    switch (op)
+      {
+      case EQ: case NE: return CC_Zmode;
+      case LT: case UNGE:
+      case GT: case UNLE: return CC_FP_GTmode;
+      case LE: case UNGT:
+      case GE: case UNLT: return CC_FP_GEmode;
+      case UNEQ: case LTGT: return CC_FP_UNEQmode;
+      case ORDERED: case UNORDERED: return CC_FP_ORDmode;
+      default: gcc_unreachable ();
+      }
+
+  return CCmode;
+}
+
+/* Vectors to keep interesting information about registers where it can easily
+   be got.  We use to use the actual mode value as the bit number, but there
+   is (or may be) more than 32 modes now.  Instead we use two tables: one
+   indexed by hard register number, and one indexed by mode.  */
+
+/* The purpose of arc_mode_class is to shrink the range of modes so that
+   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
+   mapped into one arc_mode_class mode.  */
+
+enum arc_mode_class {
+  C_MODE,
+  S_MODE, D_MODE, T_MODE, O_MODE,
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
+  V_MODE
+};
+
+/* Modes for condition codes.  */
+#define C_MODES (1 << (int) C_MODE)
+
+/* Modes for single-word and smaller quantities.  */
+#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-word and smaller quantities.  */
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Mode for 8-byte DF values only.  */
+#define DF_MODES (1 << DF_MODE)
+
+/* Modes for quad-word and smaller quantities.  */
+#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+
+/* Modes for 128-bit vectors.  */
+#define V_MODES (1 << (int) V_MODE)
+
+/* Value is 1 if register/mode pair is acceptable on arc.  */
+
+unsigned int arc_hard_regno_mode_ok[] = {
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
+  D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+
+  /* ??? Leave these as S_MODES for now.  */
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
+
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
+};
+
+unsigned int arc_mode_class [NUM_MACHINE_MODES];
+
+enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+enum reg_class
+arc_preferred_reload_class (rtx, enum reg_class cl)
+{
+  if ((cl) == CHEAP_CORE_REGS  || (cl) == WRITABLE_CORE_REGS)
+    return GENERAL_REGS;
+  return cl;
+}
+
+/* Initialize the arc_mode_class array.  */
+
+static void
+arc_init_reg_tables (void)
+{
+  int i;
+
+  for (i = 0; i < NUM_MACHINE_MODES; i++)
+    {
+      switch (GET_MODE_CLASS (i))
+	{
+	case MODE_INT:
+	case MODE_PARTIAL_INT:
+	case MODE_COMPLEX_INT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    arc_mode_class[i] = 1 << (int) S_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    arc_mode_class[i] = 1 << (int) D_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    arc_mode_class[i] = 1 << (int) T_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    arc_mode_class[i] = 1 << (int) O_MODE;
+	  else
+	    arc_mode_class[i] = 0;
+	  break;
+	case MODE_FLOAT:
+	case MODE_COMPLEX_FLOAT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    arc_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    arc_mode_class[i] = 1 << (int) DF_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    arc_mode_class[i] = 1 << (int) TF_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    arc_mode_class[i] = 1 << (int) OF_MODE;
+	  else
+	    arc_mode_class[i] = 0;
+	  break;
+	case MODE_VECTOR_INT:
+	  arc_mode_class [i] = (1<< (int) V_MODE);
+	  break;
+	case MODE_CC:
+	default:
+	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
+	     we must explicitly check for them here.  */
+	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
+	      || i == (int) CC_Cmode
+	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode)
+	    arc_mode_class[i] = 1 << (int) C_MODE;
+	  else
+	    arc_mode_class[i] = 0;
+	  break;
+	}
+    }
+}
+
+/* Core registers 56..59 are used for multiply extension options.
+   The dsp option uses r56 and r57, these are then named acc1 and acc2.
+   acc1 is the highpart, and acc2 the lowpart, so which register gets which
+   number depends on endianness.
+   The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
+   Because mlo / mhi form a 64 bit value, we use different gcc internal
+   register numbers to make them form a register pair as the gcc internals
+   know it.  mmid gets number 57, if still available, and mlo / mhi get
+   number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
+   to map this back.  */
+  char rname56[5] = "r56";
+  char rname57[5] = "r57";
+  char rname58[5] = "r58";
+  char rname59[5] = "r59";
+
+static void
+arc_conditional_register_usage (void)
+{
+  int regno;
+  int i;
+  int fix_start = 60, fix_end = 55;
+
+  if (TARGET_MUL64_SET)
+    {
+      fix_start = 57;
+      fix_end = 59;
+
+      /* We don't provide a name for mmed.  In rtl / assembly resource lists,
+	 you are supposed to refer to it as mlo & mhi, e.g
+	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
+	 In an actual asm instruction, you are of course use mmed.
+	 The point of avoiding having a separate register for mmed is that
+	 this way, we don't have to carry clobbers of that reg around in every
+	 isntruction that modifies mlo and/or mhi.  */
+      strcpy (rname57, "");
+      strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
+      strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
+    }
+  if (TARGET_MULMAC_32BY16_SET)
+    {
+      fix_start = 56;
+      fix_end = fix_end > 57 ? fix_end : 57;
+      strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
+      strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
+    }
+  for (regno = fix_start; regno <= fix_end; regno++)
+    {
+      if (!fixed_regs[regno])
+	warning (0, "multiply option implies r%d is fixed", regno);
+      fixed_regs [regno] = call_used_regs[regno] = 1;
+    }
+  if (TARGET_Q_CLASS)
+    {
+      reg_alloc_order[2] = 12;
+      reg_alloc_order[3] = 13;
+      reg_alloc_order[4] = 14;
+      reg_alloc_order[5] = 15;
+      reg_alloc_order[6] = 1;
+      reg_alloc_order[7] = 0;
+      reg_alloc_order[8] = 4;
+      reg_alloc_order[9] = 5;
+      reg_alloc_order[10] = 6;
+      reg_alloc_order[11] = 7;
+      reg_alloc_order[12] = 8;
+      reg_alloc_order[13] = 9;
+      reg_alloc_order[14] = 10;
+      reg_alloc_order[15] = 11;
+    }
+  if (TARGET_SIMD_SET)
+    {
+      int i;
+      for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
+	reg_alloc_order [i] = i;
+      for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
+	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
+	reg_alloc_order [i] = i;
+    }
+  /* For Arctangent-A5 / ARC600, lp_count may not be read in an instruction
+     following immediately after another one setting it to a new value.
+     There was some discussion on how to enforce scheduling constraints for
+     processors with missing interlocks on the gcc mailing list:
+     http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
+     However, we can't actually use this approach, because for ARC the
+     delay slot scheduling pass is active, which runs after
+     machine_dependent_reorg.  */
+  if (TARGET_ARC600)
+    CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
+  else if (!TARGET_ARC700)
+    fixed_regs[LP_COUNT] = 1;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!call_used_regs[regno])
+      CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+  for (regno = 32; regno < 60; regno++)
+    if (!fixed_regs[regno])
+      SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
+  if (TARGET_ARC700)
+    {
+      for (regno = 32; regno <= 60; regno++)
+	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
+
+      /* If they have used -ffixed-lp_count, make sure it takes
+	 effect.  */
+      if (fixed_regs[LP_COUNT])
+	{
+	  CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
+	  CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
+	  CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
+
+	  /* Instead of taking out SF_MODE like below, forbid it outright.  */
+	  arc_hard_regno_mode_ok[60] = 0;
+	}
+      else
+	arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
+    }
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (i < 29)
+	{
+	  if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15))))
+	    arc_regno_reg_class[i] = ARCOMPACT16_REGS;
+	  else
+	    arc_regno_reg_class[i] = GENERAL_REGS;
+	}
+      else if (i < 60)
+	arc_regno_reg_class[i]
+	  = (fixed_regs[i]
+	     ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
+		? CHEAP_CORE_REGS : ALL_CORE_REGS)
+	     : ((TARGET_ARC700
+		 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
+		? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
+      else
+	arc_regno_reg_class[i] = NO_REGS;
+    }
+
+  /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated.  */
+  if (!TARGET_Q_CLASS)
+    {
+      CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
+      CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
+    }
+
+  gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
+
+  /* Handle Special Registers.  */
+  arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register.  */
+  arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
+  arc_regno_reg_class[31] = LINK_REGS; /* blink register.  */
+  arc_regno_reg_class[60] = LPCOUNT_REG;
+  arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
+  arc_regno_reg_class[62] = GENERAL_REGS;
+
+  if (TARGET_DPFP)
+    {
+      for (i = 40; i < 44; ++i)
+	{
+	  arc_regno_reg_class[i] = DOUBLE_REGS;
+
+	  /* Unless they want us to do 'mov d1, 0x00000000' make sure
+	     no attempt is made to use such a register as a destination
+	     operand in *movdf_insn.  */
+	  if (!TARGET_ARGONAUT_SET)
+	    {
+	    /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
+	       interpreted to mean they can use D1 or D2 in their insn.  */
+	    CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS       ], i);
+	    CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS         ], i);
+	    CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS    ], i);
+	    CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
+	    }
+	}
+    }
+  else
+    {
+      /* Disable all DOUBLE_REGISTER settings,
+	 if not generating DPFP code.  */
+      arc_regno_reg_class[40] = ALL_REGS;
+      arc_regno_reg_class[41] = ALL_REGS;
+      arc_regno_reg_class[42] = ALL_REGS;
+      arc_regno_reg_class[43] = ALL_REGS;
+
+      arc_hard_regno_mode_ok[40] = 0;
+      arc_hard_regno_mode_ok[42] = 0;
+
+      CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
+    }
+
+  if (TARGET_SIMD_SET)
+    {
+      gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
+      gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
+
+      for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
+	arc_regno_reg_class [i] =  SIMD_VR_REGS;
+
+      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
+      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
+      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
+      gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
+
+      for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
+	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
+	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
+    }
+
+  /* pc : r63 */
+  arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
+}
+
+/* Handle an "interrupt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
+				bool *no_add_attrs)
+{
+  gcc_assert (args);
+
+  tree value = TREE_VALUE (args);
+
+  if (TREE_CODE (value) != STRING_CST)
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not a string constant",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
+	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
+	       name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
+   and two if they are nearly compatible (which causes a warning to be
+   generated).  */
+
+static int
+arc_comp_type_attributes (const_tree type1,
+			  const_tree type2)
+{
+  int l1, l2, m1, m2, s1, s2;
+
+  /* Check for mismatch of non-default calling convention.  */
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
+
+  /* Check for mismatched call attributes.  */
+  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+  m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
+  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
+
+  /* Only bother to check if an attribute is defined.  */
+  if (l1 | l2 | m1 | m2 | s1 | s2)
+    {
+      /* If one type has an attribute, the other must have the same attribute.  */
+      if ((l1 != l2) || (m1 != m2) || (s1 != s2))
+	return 0;
+
+      /* Disallow mixed attributes.  */
+      if (l1 + m1 + s1 > 1)
+	return 0;
+    }
+
+
+  return 1;
+}
+
+/* Set the default attributes for TYPE.  */
+
+void
+arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable();
+}
+
+/* Misc. utilities.  */
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for the cc reg in the proper mode.  */
+
+rtx
+gen_compare_reg (rtx comparison, enum machine_mode omode)
+{
+  enum rtx_code code = GET_CODE (comparison);
+  rtx x = XEXP (comparison, 0);
+  rtx y = XEXP (comparison, 1);
+  rtx tmp, cc_reg;
+  enum machine_mode mode, cmode;
+
+
+  cmode = GET_MODE (x);
+  if (cmode == VOIDmode)
+    cmode = GET_MODE (y);
+  gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
+  if (cmode == SImode)
+    {
+      if (!register_operand (x, SImode))
+	{
+	  if (register_operand (y, SImode))
+	    {
+	      tmp = x;
+	      x = y;
+	      y = tmp;
+	      code = swap_condition (code);
+	    }
+	  else
+	    x = copy_to_mode_reg (SImode, x);
+	}
+      if (GET_CODE (y) == SYMBOL_REF && flag_pic)
+	y = copy_to_mode_reg (SImode, y);
+    }
+  else
+    {
+      x = force_reg (cmode, x);
+      y = force_reg (cmode, y);
+    }
+  mode = SELECT_CC_MODE (code, x, y);
+
+  cc_reg = gen_rtx_REG (mode, CC_REG);
+
+  /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
+     cmpdfpx_raw, is not a correct comparison for floats:
+        http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+   */
+  if (TARGET_ARGONAUT_SET
+      && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
+    {
+      switch (code)
+	{
+	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
+	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
+	  break;
+	case GT: case UNLE: case GE: case UNLT:
+	  code = swap_condition (code);
+	  tmp = x;
+	  x = y;
+	  y = tmp;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      if (cmode == SFmode)
+      {
+	emit_insn (gen_cmpsfpx_raw (x, y));
+      }
+      else /* DFmode */
+      {
+	/* Accepts Dx regs directly by insns.  */
+	emit_insn (gen_cmpdfpx_raw (x, y));
+      }
+
+      if (mode != CC_FPXmode)
+	emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
+				gen_rtx_COMPARE (mode,
+						 gen_rtx_REG (CC_FPXmode, 61),
+						 const0_rtx)));
+    }
+  else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
+    {
+      rtx op0 = gen_rtx_REG (cmode, 0);
+      rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
+
+      switch (code)
+	{
+	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
+	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
+	  break;
+	case LT: case UNGE: case LE: case UNGT:
+	  code = swap_condition (code);
+	  tmp = x;
+	  x = y;
+	  y = tmp;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      if (currently_expanding_to_rtl)
+	{
+	  emit_move_insn (op0, x);
+	  emit_move_insn (op1, y);
+	}
+      else
+	{
+	  gcc_assert (rtx_equal_p (op0, x));
+	  gcc_assert (rtx_equal_p (op1, y));
+	}
+      emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
+    }
+  else
+    emit_insn (gen_rtx_SET (omode, cc_reg,
+			    gen_rtx_COMPARE (mode, x, y)));
+  return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
+}
+
+/* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
+   We assume the value can be either signed or unsigned.  */
+
+bool
+arc_double_limm_p (rtx value)
+{
+  HOST_WIDE_INT low, high;
+
+  gcc_assert (GET_CODE (value) == CONST_DOUBLE);
+
+  if (TARGET_DPFP)
+    return true;
+
+  low = CONST_DOUBLE_LOW (value);
+  high = CONST_DOUBLE_HIGH (value);
+
+  if (low & 0x80000000)
+    {
+      return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
+	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
+		   == - (unsigned HOST_WIDE_INT) 0x80000000)
+		  && high == -1));
+    }
+  else
+    {
+      return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
+    }
+}
+
+/* Do any needed setup for a variadic function.  For the ARC, we must
+   create a register parameter block, and then copy any anonymous arguments
+   in registers to memory.
+
+   CUM has not been updated for the last named argument which has type TYPE
+   and mode MODE, and we rely on this fact.  */
+
+static void
+arc_setup_incoming_varargs (cumulative_args_t args_so_far,
+			    enum machine_mode mode, tree type,
+			    int *pretend_size, int no_rtl)
+{
+  int first_anon_arg;
+  CUMULATIVE_ARGS next_cum;
+
+  /* We must treat `__builtin_va_alist' as an anonymous arg.  */
+
+  next_cum = *get_cumulative_args (args_so_far);
+  arc_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
+  first_anon_arg = next_cum;
+
+  if (first_anon_arg < MAX_ARC_PARM_REGS)
+    {
+      /* First anonymous (unnamed) argument is in a reg.  */
+
+      /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
+      int first_reg_offset = first_anon_arg;
+
+      if (!no_rtl)
+	{
+	  rtx regblock
+	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
+			   FIRST_PARM_OFFSET (0)));
+	  move_block_from_reg (first_reg_offset, regblock,
+			       MAX_ARC_PARM_REGS - first_reg_offset);
+	}
+
+      *pretend_size
+	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
+    }
+}
+
+/* Cost functions.  */
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   If ADDR is not a valid address, its cost is irrelevant.  */
+
+int
+arc_address_cost (rtx addr, enum machine_mode, addr_space_t, bool speed)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
+    case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
+    case PRE_MODIFY: case POST_MODIFY:
+      return !speed;
+
+    case LABEL_REF :
+    case SYMBOL_REF :
+    case CONST :
+      /* Most likely needs a LIMM.  */
+      return COSTS_N_INSNS (1);
+
+    case PLUS :
+      {
+	register rtx plus0 = XEXP (addr, 0);
+	register rtx plus1 = XEXP (addr, 1);
+
+	if (GET_CODE (plus0) != REG
+	    && (GET_CODE (plus0) != MULT
+		|| !CONST_INT_P (XEXP (plus0, 1))
+		|| (INTVAL (XEXP (plus0, 1)) != 2
+		    && INTVAL (XEXP (plus0, 1)) != 4)))
+	  break;
+
+	switch (GET_CODE (plus1))
+	  {
+	  case CONST_INT :
+	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
+		    ? COSTS_N_INSNS (1)
+		    : speed
+		    ? 0
+		    : (satisfies_constraint_Rcq (plus0)
+		       && satisfies_constraint_O (plus1))
+		    ? 0
+		    : 1);
+	  case REG:
+	    return (speed < 1 ? 0
+		    : (satisfies_constraint_Rcq (plus0)
+		       && satisfies_constraint_Rcq (plus1))
+		    ? 0 : 1);
+	  case CONST :
+	  case SYMBOL_REF :
+	  case LABEL_REF :
+	    return COSTS_N_INSNS (1);
+	  default:
+	    break;
+	  }
+	break;
+      }
+    default:
+      break;
+    }
+
+  return 4;
+}
+
+/* Emit instruction X with the frame related bit set.  */
+
+static rtx
+frame_insn (rtx x)
+{
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Emit a frame insn to move SRC to DST.  */
+
+static rtx
+frame_move (rtx dst, rtx src)
+{
+  return frame_insn (gen_rtx_SET (VOIDmode, dst, src));
+}
+
+/* Like frame_move, but add a REG_INC note for REG if ADDR contains an
+   auto increment address, or is zero.  */
+
+static rtx
+frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
+{
+  rtx insn = frame_move (dst, src);
+
+  if (!addr
+      || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
+      || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
+    add_reg_note (insn, REG_INC, reg);
+  return insn;
+}
+
+/* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
+
+static rtx
+frame_add (rtx reg, HOST_WIDE_INT offset)
+{
+  gcc_assert ((offset & 0x3) == 0);
+  if (!offset)
+    return NULL_RTX;
+  return frame_move (reg, plus_constant (Pmode, reg, offset));
+}
+
+/* Emit a frame insn which adjusts stack pointer by OFFSET.  */
+
+static rtx
+frame_stack_add (HOST_WIDE_INT offset)
+{
+  return frame_add (stack_pointer_rtx, offset);
+}
+
+/* Traditionally, we push saved registers first in the prologue,
+   then we allocate the rest of the frame - and reverse in the epilogue.
+   This has still its merits for ease of debugging, or saving code size
+   or even execution time if the stack frame is so large that some accesses
+   can't be encoded anymore with offsets in the instruction code when using
+   a different scheme.
+   Also, it would be a good starting point if we got instructions to help
+   with register save/restore.
+
+   However, often stack frames are small, and the pushing / popping has
+   some costs:
+   - the stack modification prevents a lot of scheduling.
+   - frame allocation / deallocation needs extra instructions.
+   - unless we know that we compile ARC700 user code, we need to put
+     a memory barrier after frame allocation / before deallocation to
+     prevent interrupts clobbering our data in the frame.
+     In particular, we don't have any such guarantees for library functions,
+     which tend to, on the other hand, to have small frames.
+
+   Thus, for small frames, we'd like to use a different scheme:
+   - The frame is allocated in full with the first prologue instruction,
+     and deallocated in full with the last epilogue instruction.
+     Thus, the instructions in-betwen can be freely scheduled.
+   - If the function has no outgoing arguments on the stack, we can allocate
+     one register save slot at the top of the stack.  This register can then
+     be saved simultanously with frame allocation, and restored with
+     frame deallocation.
+     This register can be picked depending on scheduling considerations,
+     although same though should go into having some set of registers
+     to be potentially lingering after a call, and others to be available
+     immediately - i.e. in the absence of interprocedual optimization, we
+     can use an ABI-like convention for register allocation to reduce
+     stalls after function return.  */
+/* Function prologue/epilogue handlers.  */
+
+/* ARCompact stack frames look like:
+
+           Before call                     After call
+  high  +-----------------------+       +-----------------------+
+  mem   |  reg parm save area   |       | reg parm save area    |
+        |  only created for     |       | only created for      |
+        |  variable arg fns     |       | variable arg fns      |
+    AP  +-----------------------+       +-----------------------+
+        |  return addr register |       | return addr register  |
+        |  (if required)        |       | (if required)         |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  reg save area        |       | reg save area         |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |  frame pointer        |       | frame pointer         |
+        |  (if required)        |       | (if required)         |
+    FP  +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  local/temp variables |       | local/temp variables  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  arguments on stack   |       | arguments on stack    |
+        |                       |       |                       |
+    SP  +-----------------------+       +-----------------------+
+                                        | reg parm save area    |
+                                        | only created for      |
+                                        | variable arg fns      |
+                                    AP  +-----------------------+
+                                        | return addr register  |
+                                        | (if required)         |
+                                        +-----------------------+
+                                        |                       |
+                                        | reg save area         |
+                                        |                       |
+                                        +-----------------------+
+                                        | frame pointer         |
+                                        | (if required)         |
+                                    FP  +-----------------------+
+                                        |                       |
+                                        | local/temp variables  |
+                                        |                       |
+                                        +-----------------------+
+                                        |                       |
+                                        | arguments on stack    |
+  low                                   |                       |
+  mem                               SP  +-----------------------+
+
+Notes:
+1) The "reg parm save area" does not exist for non variable argument fns.
+   The "reg parm save area" can be eliminated completely if we created our
+   own va-arc.h, but that has tradeoffs as well (so it's not done).  */
+
+/* Structure to be filled in by arc_compute_frame_size with register
+   save masks, and offsets for the current function.  */
+struct GTY (()) arc_frame_info
+{
+  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
+  unsigned int extra_size;	/* # bytes of extra stuff.  */
+  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # bytes needed to store regs.  */
+  unsigned int var_size;	/* # bytes that variables take up.  */
+  unsigned int reg_offset;	/* Offset from new sp to store regs.  */
+  unsigned int gmask;		/* Mask of saved gp registers.  */
+  int          initialized;	/* Nonzero if frame size already calculated.  */
+  short millicode_start_reg;
+  short millicode_end_reg;
+  bool save_return_addr;
+};
+
+/* Defining data structures for per-function information.  */
+
+typedef struct GTY (()) machine_function
+{
+  enum arc_function_type fn_type;
+  struct arc_frame_info frame_info;
+  /* To keep track of unalignment caused by short insns.  */
+  int unalign;
+  int force_short_suffix; /* Used when disgorging return delay slot insns.  */
+  const char *size_reason;
+  struct arc_ccfsm ccfsm_current;
+  /* Map from uid to ccfsm state during branch shortening.  */
+  rtx ccfsm_current_insn;
+  char arc_reorg_started;
+  char prescan_initialized;
+} machine_function;
+
+/* Type of function DECL.
+
+   The result is cached.  To reset the cache at the end of a function,
+   call with DECL = NULL_TREE.  */
+
+enum arc_function_type
+arc_compute_function_type (struct function *fun)
+{
+  tree decl = fun->decl;
+  tree a;
+  enum arc_function_type fn_type = fun->machine->fn_type;
+
+  if (fn_type != ARC_FUNCTION_UNKNOWN)
+    return fn_type;
+
+  /* Assume we have a normal function (not an interrupt handler).  */
+  fn_type = ARC_FUNCTION_NORMAL;
+
+  /* Now see if this is an interrupt handler.  */
+  for (a = DECL_ATTRIBUTES (decl);
+       a;
+       a = TREE_CHAIN (a))
+    {
+      tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
+
+      if (name == get_identifier ("interrupt")
+	  && list_length (args) == 1
+	  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
+	{
+	  tree value = TREE_VALUE (args);
+
+	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1"))
+	    fn_type = ARC_FUNCTION_ILINK1;
+	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
+	    fn_type = ARC_FUNCTION_ILINK2;
+	  else
+	    gcc_unreachable ();
+	  break;
+	}
+    }
+
+  return fun->machine->fn_type = fn_type;
+}
+
+#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
+#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.
+   Addition for pic: The gp register needs to be saved if the current
+   function changes it to access gotoff variables.
+   FIXME: This will not be needed if we used some arbitrary register
+   instead of r26.
+*/
+#define MUST_SAVE_REGISTER(regno, interrupt_p) \
+(((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
+  && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
+ || (flag_pic && crtl->uses_pic_offset_table \
+     && regno == PIC_OFFSET_TABLE_REGNUM) )
+
+#define MUST_SAVE_RETURN_ADDR \
+  (cfun->machine->frame_info.save_return_addr)
+
+/* Return non-zero if there are registers to be saved or loaded using
+   millicode thunks.  We can only use consecutive sequences starting
+   with r13, and not going beyond r25.
+   GMASK is a bitmask of registers to save.  This function sets
+   FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
+   of registers to be saved / restored with a millicode call.  */
+
+static int
+arc_compute_millicode_save_restore_regs (unsigned int gmask,
+					 struct arc_frame_info *frame)
+{
+  int regno;
+
+  int start_reg = 13, end_reg = 25;
+
+  for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
+    regno++;
+  end_reg = regno - 1;
+  /* There is no point in using millicode thunks if we don't save/restore
+     at least three registers.  For non-leaf functions we also have the
+     blink restore.  */
+  if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
+    {
+      frame->millicode_start_reg = 13;
+      frame->millicode_end_reg = regno - 1;
+      return 1;
+    }
+  return 0;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   SIZE is the size needed for local variables.  */
+
+unsigned int
+arc_compute_frame_size (int size)	/* size = # of var. bytes allocated.  */
+{
+  int regno;
+  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
+  unsigned int reg_size, reg_offset;
+  unsigned int gmask;
+  enum arc_function_type fn_type;
+  int interrupt_p;
+  struct arc_frame_info *frame_info = &cfun->machine->frame_info;
+
+  size = ARC_STACK_ALIGN (size);
+
+  /* 1) Size of locals and temporaries */
+  var_size	= size;
+
+  /* 2) Size of outgoing arguments */
+  args_size	= crtl->outgoing_args_size;
+
+  /* 3) Calculate space needed for saved registers.
+     ??? We ignore the extension registers for now.  */
+
+  /* See if this is an interrupt handler.  Call used registers must be saved
+     for them too.  */
+
+  reg_size = 0;
+  gmask = 0;
+  fn_type = arc_compute_function_type (cfun);
+  interrupt_p = ARC_INTERRUPT_P (fn_type);
+
+  for (regno = 0; regno <= 31; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno, interrupt_p))
+	{
+	  reg_size += UNITS_PER_WORD;
+	  gmask |= 1 << regno;
+	}
+    }
+
+  /* 4) Space for back trace data structure.
+	<return addr reg size> (if required) + <fp size> (if required).  */
+  frame_info->save_return_addr
+    = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
+  /* Saving blink reg in case of leaf function for millicode thunk calls.  */
+  if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
+    {
+      if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
+	frame_info->save_return_addr = true;
+    }
+
+  extra_size = 0;
+  if (MUST_SAVE_RETURN_ADDR)
+    extra_size = 4;
+  if (frame_pointer_needed)
+    extra_size += 4;
+
+  /* 5) Space for variable arguments passed in registers */
+  pretend_size	= crtl->args.pretend_args_size;
+
+  /* Ensure everything before the locals is aligned appropriately.  */
+    {
+       unsigned int extra_plus_reg_size;
+       unsigned int extra_plus_reg_size_aligned;
+
+       extra_plus_reg_size = extra_size + reg_size;
+       extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
+       reg_size = extra_plus_reg_size_aligned - extra_size;
+    }
+
+  /* Compute total frame size.  */
+  total_size = var_size + args_size + extra_size + pretend_size + reg_size;
+
+  total_size = ARC_STACK_ALIGN (total_size);
+
+  /* Compute offset of register save area from stack pointer:
+     A5 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
+  */
+  reg_offset = (total_size - (pretend_size + reg_size + extra_size)
+		+ (frame_pointer_needed ? 4 : 0));
+
+  /* Save computed information.  */
+  frame_info->total_size   = total_size;
+  frame_info->extra_size   = extra_size;
+  frame_info->pretend_size = pretend_size;
+  frame_info->var_size     = var_size;
+  frame_info->args_size    = args_size;
+  frame_info->reg_size     = reg_size;
+  frame_info->reg_offset   = reg_offset;
+  frame_info->gmask        = gmask;
+  frame_info->initialized  = reload_completed;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Common code to save/restore registers.  */
+/* BASE_REG is the base register to use for addressing and to adjust.
+   GMASK is a bitmask of general purpose registers to save/restore.
+   epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
+   If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
+   using a pre-modify for the first memory access.  *FIRST_OFFSET is then
+   zeroed.  */
+
+static void
+arc_save_restore (rtx base_reg,
+		  unsigned int gmask, int epilogue_p, int *first_offset)
+{
+  unsigned int offset = 0;
+  int regno;
+  struct arc_frame_info *frame = &cfun->machine->frame_info;
+  rtx sibthunk_insn = NULL_RTX;
+
+  if (gmask)
+    {
+      /* Millicode thunks implementation:
+	 Generates calls to millicodes for registers starting from r13 to r25
+	 Present Limitations:
+	 - Only one range supported. The remaining regs will have the ordinary
+	   st and ld instructions for store and loads. Hence a gmask asking
+	   to store r13-14, r16-r25 will only generate calls to store and
+	   load r13 to r14 while store and load insns will be generated for
+	   r16 to r25 in the prologue and epilogue respectively.
+
+	 - Presently library only supports register ranges starting from r13.
+      */
+      if (epilogue_p == 2 || frame->millicode_end_reg > 14)
+	{
+	  int start_call = frame->millicode_start_reg;
+	  int end_call = frame->millicode_end_reg;
+	  int n_regs = end_call - start_call + 1;
+	  int i = 0, r, off = 0;
+	  rtx insn;
+	  rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+
+	  if (*first_offset)
+	    {
+	      /* "reg_size" won't be more than 127 .  */
+	      gcc_assert (epilogue_p || abs (*first_offset <= 127));
+	      frame_add (base_reg, *first_offset);
+	      *first_offset = 0;
+	    }
+	  insn = gen_rtx_PARALLEL
+		  (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
+	  if (epilogue_p == 2)
+	    i += 2;
+	  else
+	    XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
+	  for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, r);
+	      rtx mem
+		= gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
+
+	      if (epilogue_p)
+		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, reg, mem);
+	      else
+		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, mem, reg);
+	      gmask = gmask & ~(1L << r);
+	    }
+	  if (epilogue_p == 2)
+	    sibthunk_insn = insn;
+	  else
+	    frame_insn (insn);
+	  offset += off;
+	}
+
+      for (regno = 0; regno <= 31; regno++)
+	{
+	  if ((gmask & (1L << regno)) != 0)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, regno);
+	      rtx addr, mem;
+
+	      if (*first_offset)
+		{
+		  gcc_assert (!offset);
+		  addr = plus_constant (Pmode, base_reg, *first_offset);
+		  addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
+		  *first_offset = 0;
+		}
+	      else
+		{
+		  gcc_assert (SMALL_INT (offset));
+		  addr = plus_constant (Pmode, base_reg, offset);
+		}
+	      mem = gen_frame_mem (SImode, addr);
+	      if (epilogue_p)
+		frame_move_inc (reg, mem, base_reg, addr);
+	      else
+		frame_move_inc (mem, reg, base_reg, addr);
+	      offset += UNITS_PER_WORD;
+	    } /* if */
+	} /* for */
+    }/* if */
+  if (sibthunk_insn)
+    {
+      rtx r12 = gen_rtx_REG (Pmode, 12);
+
+      frame_insn (gen_rtx_SET (VOIDmode, r12, GEN_INT (offset)));
+      XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
+      XVECEXP (sibthunk_insn, 0, 1)
+	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
+      sibthunk_insn = emit_jump_insn (sibthunk_insn);
+      RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
+    }
+} /* arc_save_restore */
+
+
+int arc_return_address_regs[4]
+  = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
+
+/* Set up the stack and frame pointer (if desired) for the function.  */
+
+void
+arc_expand_prologue (void)
+{
+  int size = get_frame_size ();
+  unsigned int gmask = cfun->machine->frame_info.gmask;
+  /*  unsigned int frame_pointer_offset;*/
+  unsigned int frame_size_to_allocate;
+  /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
+     Change the stack layout so that we rather store a high register with the
+     PRE_MODIFY, thus enabling more short insn generation.)  */
+  int first_offset = 0;
+
+  size = ARC_STACK_ALIGN (size);
+
+  /* Compute/get total frame size.  */
+  size = (!cfun->machine->frame_info.initialized
+	   ? arc_compute_frame_size (size)
+	   : cfun->machine->frame_info.total_size);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = size;
+
+  /* Keep track of frame size to be allocated.  */
+  frame_size_to_allocate = size;
+
+  /* These cases shouldn't happen.  Catch them now.  */
+  gcc_assert (!(size == 0 && gmask));
+
+  /* Allocate space for register arguments if this is a variadic function.  */
+  if (cfun->machine->frame_info.pretend_size != 0)
+    {
+       /* Ensure pretend_size is maximum of 8 * word_size.  */
+      gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
+
+      frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
+      frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
+    }
+
+  /* The home-grown ABI says link register is saved first.  */
+  if (MUST_SAVE_RETURN_ADDR)
+    {
+      rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
+      rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+
+      frame_move_inc (mem, ra, stack_pointer_rtx, 0);
+      frame_size_to_allocate -= UNITS_PER_WORD;
+
+    } /* MUST_SAVE_RETURN_ADDR */
+
+  /* Save any needed call-saved regs (and call-used if this is an
+     interrupt handler) for ARCompact ISA.  */
+  if (cfun->machine->frame_info.reg_size)
+    {
+      first_offset = -cfun->machine->frame_info.reg_size;
+      /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
+      arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
+      frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
+    }
+
+
+  /* Save frame pointer if needed.  */
+  if (frame_pointer_needed)
+    {
+      rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			       GEN_INT (-UNITS_PER_WORD + first_offset));
+      rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
+							  stack_pointer_rtx,
+							  addr));
+      frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
+      frame_size_to_allocate -= UNITS_PER_WORD;
+      first_offset = 0;
+      frame_move (frame_pointer_rtx, stack_pointer_rtx);
+    }
+
+  /* ??? We don't handle the case where the saved regs are more than 252
+     bytes away from sp.  This can be handled by decrementing sp once, saving
+     the regs, and then decrementing it again.  The epilogue doesn't have this
+     problem as the `ld' insn takes reg+limm values (though it would be more
+     efficient to avoid reg+limm).  */
+
+  frame_size_to_allocate -= first_offset;
+  /* Allocate the stack frame.  */
+  if (frame_size_to_allocate > 0)
+    frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
+
+  /* Setup the gp register, if needed.  */
+  if (crtl->uses_pic_offset_table)
+    arc_finalize_pic ();
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+
+void
+arc_expand_epilogue (int sibcall_p)
+{
+  int size = get_frame_size ();
+  enum arc_function_type fn_type = arc_compute_function_type (cfun);
+
+  size = ARC_STACK_ALIGN (size);
+  size = (!cfun->machine->frame_info.initialized
+	   ? arc_compute_frame_size (size)
+	   : cfun->machine->frame_info.total_size);
+
+  unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
+  unsigned int frame_size;
+  unsigned int size_to_deallocate;
+  int restored;
+  int can_trust_sp_p = !cfun->calls_alloca;
+  int first_offset = 0;
+  int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
+
+  size_to_deallocate = size;
+
+  frame_size = size - (pretend_size +
+		       cfun->machine->frame_info.reg_size +
+		       cfun->machine->frame_info.extra_size);
+
+  /* ??? There are lots of optimizations that can be done here.
+     EG: Use fp to restore regs if it's closer.
+     Maybe in time we'll do them all.  For now, always restore regs from
+     sp, but don't restore sp if we don't have to.  */
+
+  if (!can_trust_sp_p)
+    gcc_assert (frame_pointer_needed);
+
+  /* Restore stack pointer to the beginning of saved register area for
+     ARCompact ISA.  */
+  if (frame_size)
+    {
+      if (frame_pointer_needed)
+	frame_move (stack_pointer_rtx, frame_pointer_rtx);
+      else
+	first_offset = frame_size;
+      size_to_deallocate -= frame_size;
+    }
+  else if (!can_trust_sp_p)
+    frame_stack_add (-frame_size);
+
+
+  /* Restore any saved registers.  */
+  if (frame_pointer_needed)
+    {
+	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+
+	  frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
+			  stack_pointer_rtx, 0);
+	  size_to_deallocate -= UNITS_PER_WORD;
+    }
+
+  /* Load blink after the calls to thunk calls in case of optimize size.  */
+  if (millicode_p)
+    {
+	  int sibthunk_p = (!sibcall_p
+			    && fn_type == ARC_FUNCTION_NORMAL
+			    && !cfun->machine->frame_info.pretend_size);
+
+	  gcc_assert (!(cfun->machine->frame_info.gmask
+			& (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
+	  arc_save_restore (stack_pointer_rtx,
+			    cfun->machine->frame_info.gmask,
+			    1 + sibthunk_p, &first_offset);
+	  if (sibthunk_p)
+	    goto epilogue_done;
+    }
+  /* If we are to restore registers, and first_offset would require
+     a limm to be encoded in a PRE_MODIFY, yet we can add it with a
+     fast add to the stack pointer, do this now.  */
+  if ((!SMALL_INT (first_offset)
+       && cfun->machine->frame_info.gmask
+       && ((TARGET_ARC700 && !optimize_size)
+	    ? first_offset <= 0x800
+	    : satisfies_constraint_C2a (GEN_INT (first_offset))))
+       /* Also do this if we have both gprs and return
+	  address to restore, and they both would need a LIMM.  */
+       || (MUST_SAVE_RETURN_ADDR
+	   && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
+	   && cfun->machine->frame_info.gmask))
+    {
+      frame_stack_add (first_offset);
+      first_offset = 0;
+    }
+  if (MUST_SAVE_RETURN_ADDR)
+    {
+      rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+      int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
+      rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
+
+      /* If the load of blink would need a LIMM, but we can add
+	 the offset quickly to sp, do the latter.  */
+      if (!SMALL_INT (ra_offs >> 2)
+	  && !cfun->machine->frame_info.gmask
+	  && ((TARGET_ARC700 && !optimize_size)
+	       ? ra_offs <= 0x800
+	       : satisfies_constraint_C2a (GEN_INT (ra_offs))))
+	{
+	   size_to_deallocate -= ra_offs - first_offset;
+	   first_offset = 0;
+	   frame_stack_add (ra_offs);
+	   ra_offs = 0;
+	   addr = stack_pointer_rtx;
+	}
+      /* See if we can combine the load of the return address with the
+	 final stack adjustment.
+	 We need a separate load if there are still registers to
+	 restore.  We also want a separate load if the combined insn
+	 would need a limm, but a separate load doesn't.  */
+      if (ra_offs
+	  && !cfun->machine->frame_info.gmask
+	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
+	{
+	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
+	  first_offset = 0;
+	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
+	}
+      else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
+	{
+	  addr = gen_rtx_POST_INC (Pmode, addr);
+	  size_to_deallocate = 0;
+	}
+      frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr);
+    }
+
+  if (!millicode_p)
+    {
+       if (cfun->machine->frame_info.reg_size)
+	 arc_save_restore (stack_pointer_rtx,
+	   /* The zeroing of these two bits is unnecessary, but leave this in for clarity.  */
+			   cfun->machine->frame_info.gmask
+			   & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
+    }
+
+
+  /* The rest of this function does the following:
+     ARCompact    : handle epilogue_delay, restore sp (phase-2), return
+  */
+
+  /* Keep track of how much of the stack pointer we've restored.
+     It makes the following a lot more readable.  */
+  size_to_deallocate += first_offset;
+  restored = size - size_to_deallocate;
+
+  if (size > restored)
+    frame_stack_add (size - restored);
+  /* Emit the return instruction.  */
+  if (sibcall_p == FALSE)
+    emit_jump_insn (gen_simple_return ());
+ epilogue_done:
+  if (!TARGET_EPILOGUE_CFI)
+    {
+      rtx insn;
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	RTX_FRAME_RELATED_P (insn) = 0;
+    }
+}
+
+/* Return the offset relative to the stack pointer where the return address
+   is stored, or -1 if it is not stored.  */
+
+int
+arc_return_slot_offset ()
+{
+  struct arc_frame_info *afi = &cfun->machine->frame_info;
+
+  return (afi->save_return_addr
+	  ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
+}
+
+/* PIC */
+
+/* Emit special PIC prologues and epilogues.  */
+/* If the function has any GOTOFF relocations, then the GOTBASE
+   register has to be setup in the prologue
+   The instruction needed at the function start for setting up the
+   GOTBASE register is
+      add rdest, pc,
+   ----------------------------------------------------------
+   The rtl to be emitted for this should be:
+     set (reg basereg)
+         (plus (reg pc)
+               (const (unspec (symref _DYNAMIC) 3)))
+   ----------------------------------------------------------  */
+
+static void
+arc_finalize_pic (void)
+{
+  rtx pat;
+  rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+
+  if (crtl->uses_pic_offset_table == 0)
+    return;
+
+  gcc_assert (flag_pic != 0);
+
+  pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
+  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT);
+  pat = gen_rtx_CONST (Pmode, pat);
+
+  pat = gen_rtx_SET (VOIDmode, baseptr_rtx, pat);
+
+  emit_insn (pat);
+}
+
+/* !TARGET_BARREL_SHIFTER support.  */
+/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
+   kind of shift.  */
+
+void
+emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
+{
+  rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
+  rtx pat
+    = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
+	(op0, op1, op2, shift));
+  emit_insn (pat);
+}
+
+/* Output the assembler code for doing a shift.
+   We go to a bit of trouble to generate efficient code as the ARC601 only has
+   single bit shifts.  This is taken from the h8300 port.  We only have one
+   mode of shifting and can't access individual bytes like the h8300 can, so
+   this is greatly simplified (at the expense of not generating hyper-
+   efficient code).
+
+   This function is not used if the variable shift insns are present.  */
+
+/* FIXME:  This probably can be done using a define_split in arc.md.
+   Alternately, generate rtx rather than output instructions.  */
+
+const char *
+output_shift (rtx *operands)
+{
+  /*  static int loopend_lab;*/
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  const char *shift_one;
+
+  gcc_assert (mode == SImode);
+
+  switch (code)
+    {
+    case ASHIFT:   shift_one = "add %0,%1,%1"; break;
+    case ASHIFTRT: shift_one = "asr %0,%1"; break;
+    case LSHIFTRT: shift_one = "lsr %0,%1"; break;
+    default:       gcc_unreachable ();
+    }
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
+      goto shiftloop;
+    }
+  else
+    {
+      int n;
+
+      n = INTVAL (operands[2]);
+
+      /* Only consider the lower 5 bits of the shift count.  */
+      n = n & 0x1f;
+
+      /* First see if we can do them inline.  */
+      /* ??? We could get better scheduling & shorter code (using short insns)
+	 by using splitters.  Alas, that'd be even more verbose.  */
+      if (code == ASHIFT && n <= 9 && n > 2
+	  && dest_reg_operand (operands[4], SImode))
+	{
+	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
+	  for (n -=3 ; n >= 3; n -= 3)
+	    output_asm_insn ("add3 %0,%4,%0", operands);
+	  if (n == 2)
+	    output_asm_insn ("add2 %0,%4,%0", operands);
+	  else if (n)
+	    output_asm_insn ("add %0,%0,%0", operands);
+	}
+      else if (n <= 4)
+	{
+	  while (--n >= 0)
+	    {
+	      output_asm_insn (shift_one, operands);
+	      operands[1] = operands[0];
+	    }
+	}
+      /* See if we can use a rotate/and.  */
+      else if (n == BITS_PER_WORD - 1)
+	{
+	  switch (code)
+	    {
+	    case ASHIFT :
+	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
+	      break;
+	    case ASHIFTRT :
+	      /* The ARC doesn't have a rol insn.  Use something else.  */
+	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
+	      break;
+	    case LSHIFTRT :
+	      /* The ARC doesn't have a rol insn.  Use something else.  */
+	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
+	      break;
+	    default:
+	      break;
+	    }
+	}
+      else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
+	{
+	  switch (code)
+	    {
+	    case ASHIFT :
+	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
+	      break;
+	    case ASHIFTRT :
+#if 1 /* Need some scheduling comparisons.  */
+	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
+			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
+#else
+	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
+			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
+#endif
+	      break;
+	    case LSHIFTRT :
+#if 1
+	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
+			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
+#else
+	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
+			       "and %0,%0,1\n\trlc %0,%0", operands);
+#endif
+	      break;
+	    default:
+	      break;
+	    }
+	}
+      else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
+	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
+			 operands);
+      /* Must loop.  */
+      else
+	{
+	  operands[2] = GEN_INT (n);
+	  output_asm_insn ("mov.f lp_count, %2", operands);
+
+	shiftloop:
+	    {
+	      output_asm_insn ("lpnz\t2f", operands);
+	      output_asm_insn (shift_one, operands);
+	      output_asm_insn ("nop", operands);
+	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
+		       ASM_COMMENT_START);
+	    }
+	}
+    }
+
+  return "";
+}
+
+/* Nested function support.  */
+
+/* Directly store VALUE into memory object BLOCK at OFFSET.  */
+
+static void
+emit_store_direct (rtx block, int offset, int value)
+{
+  emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
+			       force_reg (SImode,
+					  gen_int_mode (value, SImode))));
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+/* With potentially multiple shared objects loaded, and multiple stacks
+   present for multiple thereds where trampolines might reside, a simple
+   range check will likely not suffice for the profiler to tell if a callee
+   is a trampoline.  We a speedier check by making the trampoline start at
+   an address that is not 4-byte aligned.
+   A trampoline looks like this:
+
+   nop_s	     0x78e0
+entry:
+   ld_s r12,[pcl,12] 0xd403
+   ld   r11,[pcl,12] 0x170c 700b
+   j_s [r12]         0x7c00
+   nop_s	     0x78e0
+
+   The fastest trampoline to execute for trampolines within +-8KB of CTX
+   would be:
+   add2 r11,pcl,s12
+   j [limm]           0x20200f80 limm
+   and that would also be faster to write to the stack by computing the offset
+   from CTX to TRAMP at compile time.  However, it would really be better to
+   get rid of the high cost of cache invalidation when generating trampolines,
+   which requires that the code part of trampolines stays constant, and
+   additionally either
+   - making sure that no executable code but trampolines is on the stack,
+     no icache entries linger for the area of the stack from when before the
+     stack was allocated, and allocating trampolines in trampoline-only
+     cache lines
+  or
+   - allocate trampolines fram a special pool of pre-allocated trampolines.  */
+
+static void
+arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
+  emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
+  emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
+  emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
+  emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
+  emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
+}
+
+/* Allow the profiler to easily distinguish trampolines from normal
+  functions.  */
+
+static rtx
+arc_trampoline_adjust_address (rtx addr)
+{
+  return plus_constant (Pmode, addr, 2);
+}
+
+/* This is set briefly to 1 when we output a ".as" address modifer, and then
+   reset when we output the scaled address.  */
+static int output_scaled = 0;
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+/* In final.c:output_asm_insn:
+    'l' : label
+    'a' : address
+    'c' : constant address if CONSTANT_ADDRESS_P
+    'n' : negative
+   Here:
+    'Z': log2(x+1)-1
+    'z': log2
+    'M': log2(~x)
+    '#': condbranch delay slot suffix
+    '*': jump delay slot suffix
+    '?' : nonjump-insn suffix for conditional execution or short instruction
+    '!' : jump / call suffix for conditional execution or short instruction
+    '`': fold constant inside unary o-perator, re-recognize, and emit.
+    'd'
+    'D'
+    'R': Second word
+    'S'
+    'B': Branch comparison operand - suppress sda reference
+    'H': Most significant word
+    'L': Least significant word
+    'A': ASCII decimal representation of floating point value
+    'U': Load/store update or scaling indicator
+    'V': cache bypass indicator for volatile
+    'P'
+    'F'
+    '^'
+    'O': Operator
+    'o': original symbol - no @ prepending.  */
+
+void
+arc_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'Z':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
+      else
+	output_operand_lossage ("invalid operand to %%Z code");
+
+      return;
+
+    case 'z':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%d",exact_log2(INTVAL (x)) );
+      else
+	output_operand_lossage ("invalid operand to %%z code");
+
+      return;
+
+    case 'M':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
+      else
+	output_operand_lossage ("invalid operand to %%M code");
+
+      return;
+
+    case '#' :
+      /* Conditional branches depending on condition codes.
+	 Note that this is only for branches that were known to depend on
+	 condition codes before delay slot scheduling;
+	 out-of-range brcc / bbit expansions should use '*'.
+	 This distinction is important because of the different
+	 allowable delay slot insns and the output of the delay suffix
+	 for TARGET_AT_DBR_COND_EXEC.  */
+    case '*' :
+      /* Unconditional branches / branches not depending on condition codes.
+	 This could also be a CALL_INSN.
+	 Output the appropriate delay slot suffix.  */
+      if (final_sequence && XVECLEN (final_sequence, 0) != 1)
+	{
+	  rtx jump = XVECEXP (final_sequence, 0, 0);
+	  rtx delay = XVECEXP (final_sequence, 0, 1);
+
+	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
+	  if (INSN_DELETED_P (delay))
+	    return;
+	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
+	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
+		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
+		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
+		   : ".nd",
+		   file);
+	  else
+	    fputs (".d", file);
+	}
+      return;
+    case '?' : /* with leading "." */
+    case '!' : /* without leading "." */
+      /* This insn can be conditionally executed.  See if the ccfsm machinery
+	 says it should be conditionalized.
+	 If it shouldn't, we'll check the compact attribute if this insn
+	 has a short variant, which may be used depending on code size and
+	 alignment considerations.  */
+      if (current_insn_predicate)
+	arc_ccfsm_current.cc
+	  = get_arc_condition_code (current_insn_predicate);
+      if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
+	{
+	  /* Is this insn in a delay slot sequence?  */
+	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
+	      || current_insn_predicate
+	      || CALL_P (XVECEXP (final_sequence, 0, 0))
+	      || simplejump_p (XVECEXP (final_sequence, 0, 0)))
+	    {
+	      /* This insn isn't in a delay slot sequence, or conditionalized
+		 independently of its position in a delay slot.  */
+	      fprintf (file, "%s%s",
+		       code == '?' ? "." : "",
+		       arc_condition_codes[arc_ccfsm_current.cc]);
+	      /* If this is a jump, there are still short variants.  However,
+		 only beq_s / bne_s have the same offset range as b_s,
+		 and the only short conditional returns are jeq_s and jne_s.  */
+	      if (code == '!'
+		  && (arc_ccfsm_current.cc == ARC_CC_EQ
+		      || arc_ccfsm_current.cc == ARC_CC_NE
+		      || 0 /* FIXME: check if branch in 7 bit range.  */))
+		output_short_suffix (file);
+	    }
+	  else if (code == '!') /* Jump with delay slot.  */
+	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
+	  else /* An Instruction in a delay slot of a jump or call.  */
+	    {
+	      rtx jump = XVECEXP (final_sequence, 0, 0);
+	      rtx insn = XVECEXP (final_sequence, 0, 1);
+
+	      /* If the insn is annulled and is from the target path, we need
+		 to inverse the condition test.  */
+	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
+		{
+		  if (INSN_FROM_TARGET_P (insn))
+		    fprintf (file, "%s%s",
+			     code == '?' ? "." : "",
+			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
+		  else
+		    fprintf (file, "%s%s",
+			     code == '?' ? "." : "",
+			     arc_condition_codes[arc_ccfsm_current.cc]);
+		  if (arc_ccfsm_current.state == 5)
+		    arc_ccfsm_current.state = 0;
+		}
+	      else
+		/* This insn is executed for either path, so don't
+		   conditionalize it at all.  */
+		output_short_suffix (file);
+
+	    }
+	}
+      else
+	output_short_suffix (file);
+      return;
+    case'`':
+      /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
+      gcc_unreachable ();
+    case 'd' :
+      fputs (arc_condition_codes[get_arc_condition_code (x)], file);
+      return;
+    case 'D' :
+      fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
+				 (get_arc_condition_code (x))],
+	     file);
+      return;
+    case 'R' :
+      /* Write second word of DImode or DFmode reference,
+	 register or memory.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x)+1], file);
+      else if (GET_CODE (x) == MEM)
+	{
+	  fputc ('[', file);
+
+	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
+	    PRE_MODIFY, we will have handled the first word already;
+	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
+	    first word will be done later.  In either case, the access
+	    to the first word will do the modify, and we only have
+	    to add an offset of four here.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
+	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
+	      || GET_CODE (XEXP (x, 0)) == POST_INC
+	      || GET_CODE (XEXP (x, 0)) == POST_DEC
+	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
+	  else if (output_scaled)
+	    {
+	      rtx addr = XEXP (x, 0);
+	      int size = GET_MODE_SIZE (GET_MODE (x));
+
+	      output_address (plus_constant (Pmode, XEXP (addr, 0),
+					     ((INTVAL (XEXP (addr, 1)) + 4)
+					      >> (size == 2 ? 1 : 2))));
+	      output_scaled = 0;
+	    }
+	  else
+	    output_address (plus_constant (Pmode, XEXP (x, 0), 4));
+	  fputc (']', file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%R code");
+      return;
+    case 'S' :
+	/* FIXME: remove %S option.  */
+	break;
+    case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
+      if (CONSTANT_P (x))
+	{
+	  output_addr_const (file, x);
+	  return;
+	}
+      break;
+    case 'H' :
+    case 'L' :
+      if (GET_CODE (x) == REG)
+	{
+	  /* L = least significant word, H = most significant word.  */
+	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
+	    fputs (reg_names[REGNO (x)], file);
+	  else
+	    fputs (reg_names[REGNO (x)+1], file);
+	}
+      else if (GET_CODE (x) == CONST_INT
+	       || GET_CODE (x) == CONST_DOUBLE)
+	{
+	  rtx first, second;
+
+	  split_double (x, &first, &second);
+
+	  if((WORDS_BIG_ENDIAN) == 0)
+	      fprintf (file, "0x%08lx",
+		       code == 'L' ? INTVAL (first) : INTVAL (second));
+	  else
+	      fprintf (file, "0x%08lx",
+		       code == 'L' ? INTVAL (second) : INTVAL (first));
+
+
+	  }
+      else
+	output_operand_lossage ("invalid operand to %%H/%%L code");
+      return;
+    case 'A' :
+      {
+	char str[30];
+
+	gcc_assert (GET_CODE (x) == CONST_DOUBLE
+		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
+
+	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
+	fprintf (file, "%s", str);
+	return;
+      }
+    case 'U' :
+      /* Output a load/store with update indicator if appropriate.  */
+      if (GET_CODE (x) == MEM)
+	{
+	  rtx addr = XEXP (x, 0);
+	  switch (GET_CODE (addr))
+	    {
+	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
+	      fputs (".a", file); break;
+	    case POST_INC: case POST_DEC: case POST_MODIFY:
+	      fputs (".ab", file); break;
+	    case PLUS:
+	      /* Are we using a scaled index?  */
+	      if (GET_CODE (XEXP (addr, 0)) == MULT)
+		fputs (".as", file);
+	      /* Can we use a scaled offset?  */
+	      else if (CONST_INT_P (XEXP (addr, 1))
+		       && GET_MODE_SIZE (GET_MODE (x)) > 1
+		       && (!(INTVAL (XEXP (addr, 1))
+			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
+		       /* Does it make a difference?  */
+		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
+					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
+		{
+		  fputs (".as", file);
+		  output_scaled = 1;
+		}
+	      break;
+	    case REG:
+	      break;
+	    default:
+	      gcc_assert (CONSTANT_P (addr)); break;
+	    }
+	}
+      else
+	output_operand_lossage ("invalid operand to %%U code");
+      return;
+    case 'V' :
+      /* Output cache bypass indicator for a load/store insn.  Volatile memory
+	 refs are defined to use the cache bypass mechanism.  */
+      if (GET_CODE (x) == MEM)
+	{
+	  if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
+	    fputs (".di", file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%V code");
+      return;
+      /* plt code.  */
+    case 'P':
+    case 0 :
+      /* Do nothing special.  */
+      break;
+    case 'F':
+      fputs (reg_names[REGNO (x)]+1, file);
+      return;
+    case '^':
+	/* This punctuation character is needed because label references are
+	printed in the output template using %l. This is a front end
+	character, and when we want to emit a '@' before it, we have to use
+	this '^'.  */
+
+	fputc('@',file);
+	return;
+    case 'O':
+      /* Output an operator.  */
+      switch (GET_CODE (x))
+	{
+	case PLUS:	fputs ("add", file); return;
+	case SS_PLUS:	fputs ("adds", file); return;
+	case AND:	fputs ("and", file); return;
+	case IOR:	fputs ("or", file); return;
+	case XOR:	fputs ("xor", file); return;
+	case MINUS:	fputs ("sub", file); return;
+	case SS_MINUS:	fputs ("subs", file); return;
+	case ASHIFT:	fputs ("asl", file); return;
+	case ASHIFTRT:	fputs ("asr", file); return;
+	case LSHIFTRT:	fputs ("lsr", file); return;
+	case ROTATERT:	fputs ("ror", file); return;
+	case MULT:	fputs ("mpy", file); return;
+	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
+	case NEG:	fputs ("neg", file); return;
+	case SS_NEG:	fputs ("negs", file); return;
+	case NOT:	fputs ("not", file); return; /* Unconditional.  */
+	case ZERO_EXTEND:
+	  fputs ("ext", file); /* bmsk allows predication.  */
+	  goto size_suffix;
+	case SIGN_EXTEND: /* Unconditional.  */
+	  fputs ("sex", file);
+	size_suffix:
+	  switch (GET_MODE (XEXP (x, 0)))
+	    {
+	    case QImode: fputs ("b", file); return;
+	    case HImode: fputs ("w", file); return;
+	    default: break;
+	    }
+	  break;
+	case SS_TRUNCATE:
+	  if (GET_MODE (x) != HImode)
+	    break;
+	  fputs ("sat16", file);
+	default: break;
+	}
+      output_operand_lossage ("invalid operand to %%O code"); return;
+    case 'o':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  assemble_name (file, XSTR (x, 0));
+	  return;
+	}
+      break;
+    case '&':
+      if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
+	fprintf (file, "; unalign: %d", cfun->machine->unalign);
+      return;
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG :
+      fputs (reg_names[REGNO (x)], file);
+      break;
+    case MEM :
+      {
+	rtx addr = XEXP (x, 0);
+	int size = GET_MODE_SIZE (GET_MODE (x));
+
+	fputc ('[', file);
+
+	switch (GET_CODE (addr))
+	  {
+	  case PRE_INC: case POST_INC:
+	    output_address (plus_constant (Pmode, XEXP (addr, 0), size)); break;
+	  case PRE_DEC: case POST_DEC:
+	    output_address (plus_constant (Pmode, XEXP (addr, 0), -size));
+	    break;
+	  case PRE_MODIFY: case POST_MODIFY:
+	    output_address (XEXP (addr, 1)); break;
+	  case PLUS:
+	    if (output_scaled)
+	      {
+		output_address (plus_constant (Pmode, XEXP (addr, 0),
+					       (INTVAL (XEXP (addr, 1))
+						>> (size == 2 ? 1 : 2))));
+		output_scaled = 0;
+	      }
+	    else
+	      output_address (addr);
+	    break;
+	  default:
+	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
+	      arc_output_pic_addr_const (file, addr, code);
+	    else
+	      output_address (addr);
+	    break;
+	  }
+	fputc (']', file);
+	break;
+      }
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+      /* Fall through.  Let output_addr_const deal with it.  */
+    default :
+      if (flag_pic)
+	arc_output_pic_addr_const (file, x, code);
+      else
+	{
+	  /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
+	     with asm_output_symbol_ref */
+	  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+	    {
+	      x = XEXP (x, 0);
+	      output_addr_const (file, XEXP (x, 0));
+	      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
+		fprintf (file, "@sda");
+
+	      if (GET_CODE (XEXP (x, 1)) != CONST_INT
+		  || INTVAL (XEXP (x, 1)) >= 0)
+		fprintf (file, "+");
+	      output_addr_const (file, XEXP (x, 1));
+	    }
+	  else
+	    output_addr_const (file, x);
+	}
+      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
+	fprintf (file, "@sda");
+      break;
+    }
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+void
+arc_print_operand_address (FILE *file , rtx addr)
+{
+  register rtx base, index = 0;
+
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      fputs (reg_names[REGNO (addr)], file);
+      break;
+    case SYMBOL_REF :
+      output_addr_const (file, addr);
+      if (SYMBOL_REF_SMALL_P (addr))
+	fprintf (file, "@sda");
+      break;
+    case PLUS :
+      if (GET_CODE (XEXP (addr, 0)) == MULT)
+	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
+      else if (CONST_INT_P (XEXP (addr, 0)))
+	index = XEXP (addr, 0), base = XEXP (addr, 1);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+
+      gcc_assert (OBJECT_P (base));
+      arc_print_operand_address (file, base);
+      if (CONSTANT_P (base) && CONST_INT_P (index))
+	fputc ('+', file);
+      else
+	fputc (',', file);
+      gcc_assert (OBJECT_P (index));
+      arc_print_operand_address (file, index);
+      break;
+    case CONST:
+      {
+	rtx c = XEXP (addr, 0);
+
+	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
+	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
+
+	output_address(XEXP(addr,0));
+
+	break;
+      }
+    case PRE_INC :
+    case PRE_DEC :
+      /* We shouldn't get here as we've lost the mode of the memory object
+	 (which says how much to inc/dec by.  */
+      gcc_unreachable ();
+      break;
+    default :
+      if (flag_pic)
+	arc_output_pic_addr_const (file, addr, 0);
+      else
+	output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Called via walk_stores.  DATA points to a hash table we can use to
+   establish a unique SYMBOL_REF for each counter, which corresponds to
+   a caller-callee pair.
+   X is a store which we want to examine for an UNSPEC_PROF, which
+   would be an address loaded into a register, or directly used in a MEM.
+   If we found an UNSPEC_PROF, if we encounter a new counter the first time,
+   write out a description and a data allocation for a 32 bit counter.
+   Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance.  */
+
+static void
+write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data)
+{
+  rtx *srcp, src;
+  htab_t htab = (htab_t) data;
+  rtx *slot;
+
+  if (GET_CODE (x) != SET)
+    return;
+  srcp = &SET_SRC (x);
+  if (MEM_P (*srcp))
+    srcp = &XEXP (*srcp, 0);
+  else if (MEM_P (SET_DEST (x)))
+    srcp = &XEXP (SET_DEST (x), 0);
+  src = *srcp;
+  if (GET_CODE (src) != CONST)
+    return;
+  src = XEXP (src, 0);
+  if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF)
+    return;
+
+  gcc_assert (XVECLEN (src, 0) == 3);
+  if (!htab_elements (htab))
+    {
+      output_asm_insn (".section .__arc_profile_desc, \"a\"\n"
+		       "\t.long %0 + 1\n",
+		       &XVECEXP (src, 0, 0));
+    }
+  slot = (rtx *) htab_find_slot (htab, src, INSERT);
+  if (*slot == HTAB_EMPTY_ENTRY)
+    {
+      static int count_nr;
+      char buf[24];
+      rtx count;
+
+      *slot = src;
+      sprintf (buf, "__prof_count%d", count_nr++);
+      count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf));
+      XVECEXP (src, 0, 2) = count;
+      output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n"
+		       "\t.long\t%1\n"
+		       "\t.section\t.__arc_profile_counters, \"aw\"\n"
+		       "\t.type\t%o2, @object\n"
+		       "\t.size\t%o2, 4\n"
+		       "%o2:\t.zero 4",
+		       &XVECEXP (src, 0, 0));
+      *srcp = count;
+    }
+  else
+    *srcp = XVECEXP (*slot, 0, 2);
+}
+
+/* Hash function for UNSPEC_PROF htab.  Use both the caller's name and
+   the callee's name (if known).  */
+
+static hashval_t
+unspec_prof_hash (const void *x)
+{
+  const_rtx u = (const_rtx) x;
+  const_rtx s1 = XVECEXP (u, 0, 1);
+
+  return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0))
+	  ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0));
+}
+
+/* Equality function for UNSPEC_PROF htab.  Two pieces of UNSPEC_PROF rtl
+   shall refer to the same counter if both caller name and callee rtl
+   are identical.  */
+
+static int
+unspec_prof_htab_eq (const void *x, const void *y)
+{
+  const_rtx u0 = (const_rtx) x;
+  const_rtx u1 = (const_rtx) y;
+  const_rtx s01 = XVECEXP (u0, 0, 1);
+  const_rtx s11 = XVECEXP (u1, 0, 1);
+
+  return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0),
+		   XSTR (XVECEXP (u1, 0, 0), 0))
+	  && rtx_equal_p (s01, s11));
+}
+
+/* Conditional execution support.
+
+   This is based on the ARM port but for now is much simpler.
+
+   A finite state machine takes care of noticing whether or not instructions
+   can be conditionally executed, and thus decrease execution time and code
+   size by deleting branch instructions.  The fsm is controlled by
+   arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
+   actions of PRINT_OPERAND.  The patterns in the .md file for the branch
+   insns also have a hand in this.  */
+/* The way we leave dealing with non-anulled or annull-false delay slot
+   insns to the consumer is awkward.  */
+
+/* The state of the fsm controlling condition codes are:
+   0: normal, do nothing special
+   1: don't output this insn
+   2: don't output this insn
+   3: make insns conditional
+   4: make insns conditional
+   5: make insn conditional (only for outputting anulled delay slot insns)
+
+   special value for cfun->machine->uid_ccfsm_state:
+   6: return with but one insn before it since function start / call
+
+   State transitions (state->state by whom, under what condition):
+   0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
+          some instructions.
+   0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
+          by zero or more non-jump insns and an unconditional branch with
+	  the same target label as the condbranch.
+   1 -> 3 branch patterns, after having not output the conditional branch
+   2 -> 4 branch patterns, after having not output the conditional branch
+   0 -> 5 branch patterns, for anulled delay slot insn.
+   3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
+          (the target label has CODE_LABEL_NUMBER equal to
+	  arc_ccfsm_target_label).
+   4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
+   3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
+   5 -> 0 when outputting the delay slot insn
+
+   If the jump clobbers the conditions then we use states 2 and 4.
+
+   A similar thing can be done with conditional return insns.
+
+   We also handle separating branches from sets of the condition code.
+   This is done here because knowledge of the ccfsm state is required,
+   we may not be outputting the branch.  */
+
+/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
+   before letting final output INSN.  */
+
+static void
+arc_ccfsm_advance (rtx insn, struct arc_ccfsm *state)
+{
+  /* BODY will hold the body of INSN.  */
+  register rtx body;
+
+  /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
+     an if/then/else), and things need to be reversed.  */
+  int reverse = 0;
+
+  /* If we start with a return insn, we only succeed if we find another one.  */
+  int seeking_return = 0;
+
+  /* START_INSN will hold the insn from where we start looking.  This is the
+     first insn after the following code_label if REVERSE is true.  */
+  rtx start_insn = insn;
+
+  /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
+     since they don't rely on a cmp preceding the.  */
+  enum attr_type jump_insn_type;
+
+  /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
+     We can't do this in macro FINAL_PRESCAN_INSN because its called from
+     final_scan_insn which has `optimize' as a local.  */
+  if (optimize < 2 || TARGET_NO_COND_EXEC)
+    return;
+
+  /* Ignore notes and labels.  */
+  if (!INSN_P (insn))
+    return;
+  body = PATTERN (insn);
+  /* If in state 4, check if the target branch is reached, in order to
+     change back to state 0.  */
+  if (state->state == 4)
+    {
+      if (insn == state->target_insn)
+	{
+	  state->target_insn = NULL;
+	  state->state = 0;
+	}
+      return;
+    }
+
+  /* If in state 3, it is possible to repeat the trick, if this insn is an
+     unconditional branch to a label, and immediately following this branch
+     is the previous target label which is only used once, and the label this
+     branch jumps to is not too far off.  Or in other words "we've done the
+     `then' part, see if we can do the `else' part."  */
+  if (state->state == 3)
+    {
+      if (simplejump_p (insn))
+	{
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == BARRIER)
+	    {
+	      /* ??? Isn't this always a barrier?  */
+	      start_insn = next_nonnote_insn (start_insn);
+	    }
+	  if (GET_CODE (start_insn) == CODE_LABEL
+	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    reverse = TRUE;
+	  else
+	    return;
+	}
+      else if (GET_CODE (body) == SIMPLE_RETURN)
+	{
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == BARRIER)
+	    start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == CODE_LABEL
+	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    {
+	      reverse = TRUE;
+	      seeking_return = 1;
+	    }
+	  else
+	    return;
+	}
+      else
+	return;
+    }
+
+  if (GET_CODE (insn) != JUMP_INSN
+      || GET_CODE (PATTERN (insn)) == ADDR_VEC
+      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+    return;
+
+ /* We can't predicate BRCC or loop ends.
+    Also, when generating PIC code, and considering a medium range call,
+    we can't predicate the call.  */
+  jump_insn_type = get_attr_type (insn);
+  if (jump_insn_type == TYPE_BRCC
+      || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
+      || jump_insn_type == TYPE_LOOP_END
+      || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
+    return;
+
+  /* This jump might be paralleled with a clobber of the condition codes,
+     the jump should always come first.  */
+  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
+    body = XVECEXP (body, 0, 0);
+
+  if (reverse
+      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
+	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
+    {
+      int insns_skipped = 0, fail = FALSE, succeed = FALSE;
+      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
+      int then_not_else = TRUE;
+      /* Nonzero if next insn must be the target label.  */
+      int next_must_be_target_label_p;
+      rtx this_insn = start_insn, label = 0;
+
+      /* Register the insn jumped to.  */
+      if (reverse)
+	{
+	  if (!seeking_return)
+	    label = XEXP (SET_SRC (body), 0);
+	}
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
+	label = XEXP (XEXP (SET_SRC (body), 1), 0);
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
+	{
+	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
+	  then_not_else = FALSE;
+	}
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
+	seeking_return = 1;
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
+	{
+	  seeking_return = 1;
+	  then_not_else = FALSE;
+	}
+      else
+	gcc_unreachable ();
+
+      /* If this is a non-annulled branch with a delay slot, there is
+	 no need to conditionalize the delay slot.  */
+      if (NEXT_INSN (PREV_INSN (insn)) != insn
+	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
+	{
+	  this_insn = NEXT_INSN (this_insn);
+	  gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn)))
+		      == NEXT_INSN (this_insn));
+	}
+      /* See how many insns this branch skips, and what kind of insns.  If all
+	 insns are okay, and the label or unconditional branch to the same
+	 label is not too far away, succeed.  */
+      for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
+	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
+	   insns_skipped++)
+	{
+	  rtx scanbody;
+
+	  this_insn = next_nonnote_insn (this_insn);
+	  if (!this_insn)
+	    break;
+
+	  if (next_must_be_target_label_p)
+	    {
+	      if (GET_CODE (this_insn) == BARRIER)
+		continue;
+	      if (GET_CODE (this_insn) == CODE_LABEL
+		  && this_insn == label)
+		{
+		  state->state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+	    }
+
+	  scanbody = PATTERN (this_insn);
+
+	  switch (GET_CODE (this_insn))
+	    {
+	    case CODE_LABEL:
+	      /* Succeed if it is the target label, otherwise fail since
+		 control falls in from somewhere else.  */
+	      if (this_insn == label)
+		{
+		  state->state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case BARRIER:
+	      /* Succeed if the following insn is the target label.
+		 Otherwise fail.
+		 If return insns are used then the last insn in a function
+		 will be a barrier.  */
+	      next_must_be_target_label_p = TRUE;
+	      break;
+
+	    case CALL_INSN:
+	      /* Can handle a call insn if there are no insns after it.
+		 IE: The next "insn" is the target label.  We don't have to
+		 worry about delay slots as such insns are SEQUENCE's inside
+		 INSN's.  ??? It is possible to handle such insns though.  */
+	      if (get_attr_cond (this_insn) == COND_CANUSE)
+		next_must_be_target_label_p = TRUE;
+	      else
+		fail = TRUE;
+	      break;
+
+	    case JUMP_INSN:
+	      /* If this is an unconditional branch to the same label, succeed.
+		 If it is to another label, do nothing.  If it is conditional,
+		 fail.  */
+	      /* ??? Probably, the test for the SET and the PC are
+		 unnecessary.  */
+
+	      if (GET_CODE (scanbody) == SET
+		  && GET_CODE (SET_DEST (scanbody)) == PC)
+		{
+		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
+		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
+		    {
+		      state->state = 2;
+		      succeed = TRUE;
+		    }
+		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
+		    fail = TRUE;
+		  else if (get_attr_cond (this_insn) != COND_CANUSE)
+		    fail = TRUE;
+		}
+	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
+		       && seeking_return)
+		{
+		  state->state = 2;
+		  succeed = TRUE;
+		}
+	      else if (GET_CODE (scanbody) == PARALLEL)
+		{
+		  if (get_attr_cond (this_insn) != COND_CANUSE)
+		    fail = TRUE;
+		}
+	      break;
+
+	    case INSN:
+	      /* We can only do this with insns that can use the condition
+		 codes (and don't set them).  */
+	      if (GET_CODE (scanbody) == SET
+		  || GET_CODE (scanbody) == PARALLEL)
+		{
+		  if (get_attr_cond (this_insn) != COND_CANUSE)
+		    fail = TRUE;
+		}
+	      /* We can't handle other insns like sequences.  */
+	      else
+		fail = TRUE;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+
+      if (succeed)
+	{
+	  if ((!seeking_return) && (state->state == 1 || reverse))
+	    state->target_label = CODE_LABEL_NUMBER (label);
+	  else if (seeking_return || state->state == 2)
+	    {
+	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
+		{
+		  this_insn = next_nonnote_insn (this_insn);
+
+		  gcc_assert (!this_insn ||
+			      (GET_CODE (this_insn) != BARRIER
+			       && GET_CODE (this_insn) != CODE_LABEL));
+		}
+	      if (!this_insn)
+		{
+		  /* Oh dear! we ran off the end, give up.  */
+		  extract_insn_cached (insn);
+		  state->state = 0;
+		  state->target_insn = NULL;
+		  return;
+		}
+	      state->target_insn = this_insn;
+	    }
+	  else
+	    gcc_unreachable ();
+
+	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
+	     what it was.  */
+	  if (!reverse)
+	    {
+	      state->cond = XEXP (SET_SRC (body), 0);
+	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
+	    }
+
+	  if (reverse || then_not_else)
+	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
+	}
+
+      /* Restore recog_operand.  Getting the attributes of other insns can
+	 destroy this array, but final.c assumes that it remains intact
+	 across this call; since the insn has been recognized already we
+	 call insn_extract direct.  */
+      extract_insn_cached (insn);
+    }
+}
+
+/* Record that we are currently outputting label NUM with prefix PREFIX.
+   It it's the label we're looking for, reset the ccfsm machinery.
+
+   Called from ASM_OUTPUT_INTERNAL_LABEL.  */
+
+static void
+arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
+{
+  if (state->state == 3 && state->target_label == num
+      && !strcmp (prefix, "L"))
+    {
+      state->state = 0;
+      state->target_insn = NULL_RTX;
+    }
+}
+
+/* We are considering a conditional branch with the condition COND.
+   Check if we want to conditionalize a delay slot insn, and if so modify
+   the ccfsm state accordingly.
+   REVERSE says branch will branch when the condition is false.  */
+void
+arc_ccfsm_record_condition (rtx cond, bool reverse, rtx jump,
+			    struct arc_ccfsm *state)
+{
+  rtx seq_insn = NEXT_INSN (PREV_INSN (jump));
+  if (!state)
+    state = &arc_ccfsm_current;
+
+  gcc_assert (state->state == 0);
+  if (seq_insn != jump)
+    {
+      rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
+
+      if (!INSN_DELETED_P (insn)
+	  && INSN_ANNULLED_BRANCH_P (jump)
+	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
+	{
+	  state->cond = cond;
+	  state->cc = get_arc_condition_code (cond);
+	  if (!reverse)
+	    arc_ccfsm_current.cc
+	      = ARC_INVERSE_CONDITION_CODE (state->cc);
+	  rtx pat = PATTERN (insn);
+	  if (GET_CODE (pat) == COND_EXEC)
+	    gcc_assert ((INSN_FROM_TARGET_P (insn)
+			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
+			== get_arc_condition_code (XEXP (pat, 0)));
+	  else
+	    state->state = 5;
+	}
+    }
+}
+
+/* Update *STATE as we would when we emit INSN.  */
+
+static void
+arc_ccfsm_post_advance (rtx insn, struct arc_ccfsm *state)
+{
+  enum attr_type type;
+
+  if (LABEL_P (insn))
+    arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
+  else if (JUMP_P (insn)
+	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
+	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
+	       || (type == TYPE_UNCOND_BRANCH
+		   /* ??? Maybe should also handle TYPE_RETURN here,
+		      but we don't have a testcase for that.  */
+		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
+    {
+      if (ARC_CCFSM_BRANCH_DELETED_P (state))
+	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
+      else
+	{
+	  rtx src = SET_SRC (PATTERN (insn));
+	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
+				      insn, state);
+	}
+    }
+  else if (arc_ccfsm_current.state == 5)
+    arc_ccfsm_current.state = 0;
+}
+
+/* Return true if the current insn, which is a conditional branch, is to be
+   deleted.  */
+
+bool
+arc_ccfsm_branch_deleted_p (void)
+{
+  return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
+}
+
+/* Record a branch isn't output because subsequent insns can be
+   conditionalized.  */
+
+void
+arc_ccfsm_record_branch_deleted (void)
+{
+  ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
+}
+
+/* During insn output, indicate if the current insn is predicated.  */
+
+bool
+arc_ccfsm_cond_exec_p (void)
+{
+  return (cfun->machine->prescan_initialized
+	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
+}
+
+/* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
+   and look inside SEQUENCEs.  */
+
+static rtx
+arc_next_active_insn (rtx insn, struct arc_ccfsm *statep)
+{
+  rtx pat;
+
+  do
+    {
+      if (statep)
+	arc_ccfsm_post_advance (insn, statep);
+      insn = NEXT_INSN (insn);
+      if (!insn || BARRIER_P (insn))
+	return NULL_RTX;
+      if (statep)
+	arc_ccfsm_advance (insn, statep);
+    }
+  while (NOTE_P (insn)
+	 || (cfun->machine->arc_reorg_started
+	     && LABEL_P (insn) && !label_to_alignment (insn))
+	 || (NONJUMP_INSN_P (insn)
+	     && (GET_CODE (PATTERN (insn)) == USE
+		 || GET_CODE (PATTERN (insn)) == CLOBBER)));
+  if (!LABEL_P (insn))
+    {
+      gcc_assert (INSN_P (insn));
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
+	return NULL_RTX;
+      if (GET_CODE (pat) == SEQUENCE)
+	return XVECEXP (pat, 0, 0);
+    }
+  return insn;
+}
+
+/* When deciding if an insn should be output short, we want to know something
+   about the following insns:
+   - if another insn follows which we know we can output as a short insn
+     before an alignment-sensitive point, we can output this insn short:
+     the decision about the eventual alignment can be postponed.
+   - if a to-be-aligned label comes next, we should output this insn such
+     as to get / preserve 4-byte alignment.
+   - if a likely branch without delay slot insn, or a call with an immediately
+     following short insn comes next, we should out output this insn such as to
+     get / preserve 2 mod 4 unalignment.
+   - do the same for a not completely unlikely branch with a short insn
+     following before any other branch / label.
+   - in order to decide if we are actually looking at a branch, we need to
+     call arc_ccfsm_advance.
+   - in order to decide if we are looking at a short insn, we should know
+     if it is conditionalized.  To a first order of approximation this is
+     the case if the state from arc_ccfsm_advance from before this insn
+     indicates the insn is conditionalized.  However, a further refinement
+     could be to not conditionalize an insn if the destination register(s)
+     is/are dead in the non-executed case.  */
+/* Return non-zero if INSN should be output as a short insn.  UNALIGN is
+   zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
+   If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
+
+int
+arc_verify_short (rtx insn, int, int check_attr)
+{
+  enum attr_iscompact iscompact;
+  struct machine_function *machine;
+
+  if (check_attr > 0)
+    {
+      iscompact = get_attr_iscompact (insn);
+      if (iscompact == ISCOMPACT_FALSE)
+	return 0;
+    }
+  machine = cfun->machine;
+
+  if (machine->force_short_suffix >= 0)
+    return machine->force_short_suffix;
+
+  return (get_attr_length (insn) & 2) != 0;
+}
+
+/* When outputting an instruction (alternative) that can potentially be short,
+   output the short suffix if the insn is in fact short, and update
+   cfun->machine->unalign accordingly.  */
+
+static void
+output_short_suffix (FILE *file)
+{
+  rtx insn = current_output_insn;
+
+  if (arc_verify_short (insn, cfun->machine->unalign, 1))
+    {
+      fprintf (file, "_s");
+      cfun->machine->unalign ^= 2;
+    }
+  /* Restore recog_operand.  */
+  extract_insn_cached (insn);
+}
+
+/* Implement FINAL_PRESCAN_INSN.  */
+
+void
+arc_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  if (TARGET_DUMPISIZE)
+    fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
+
+  /* Output a nop if necessary to prevent a hazard.
+     Don't do this for delay slots: inserting a nop would
+     alter semantics, and the only time we would find a hazard is for a
+     call function result - and in that case, the hazard is spurious to
+     start with.  */
+  if (PREV_INSN (insn)
+      && PREV_INSN (NEXT_INSN (insn)) == insn
+      && arc_hazard (prev_real_insn (insn), insn))
+    {
+      current_output_insn =
+	emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
+      final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
+      current_output_insn = insn;
+    }
+  /* Restore extraction data which might have been clobbered by arc_hazard.  */
+  extract_constrain_insn_cached (insn);
+
+  if (!cfun->machine->prescan_initialized)
+    {
+      /* Clear lingering state from branch shortening.  */
+      memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
+      cfun->machine->prescan_initialized = 1;
+    }
+  arc_ccfsm_advance (insn, &arc_ccfsm_current);
+
+  cfun->machine->size_reason = 0;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   All eliminations are permissible. If we need a frame
+   pointer, we must eliminate ARG_POINTER_REGNUM into
+   FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
+
+static bool
+arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required ();
+}
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+arc_initial_elimination_offset (int from, int to)
+{
+  if (! cfun->machine->frame_info.initialized)
+     arc_compute_frame_size (get_frame_size ());
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    {
+      return (cfun->machine->frame_info.extra_size
+	      + cfun->machine->frame_info.reg_size);
+    }
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      return (cfun->machine->frame_info.total_size
+	      - cfun->machine->frame_info.pretend_size);
+    }
+
+  if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
+    {
+      return (cfun->machine->frame_info.total_size
+	      - (cfun->machine->frame_info.pretend_size
+	      + cfun->machine->frame_info.extra_size
+	      + cfun->machine->frame_info.reg_size));
+    }
+
+  gcc_unreachable ();
+}
+
+static bool
+arc_frame_pointer_required (void)
+{
+ return cfun->calls_alloca;
+}
+
+
+/* Return the destination address of a branch.  */
+
+int
+branch_dest (rtx branch)
+{
+  rtx pat = PATTERN (branch);
+  rtx dest = (GET_CODE (pat) == PARALLEL
+	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
+  int dest_uid;
+
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
+
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+
+  return INSN_ADDRESSES (dest_uid);
+}
+
+
+/* Implement TARGET_ENCODE_SECTION_INFO hook.  */
+
+static void
+arc_encode_section_info (tree decl, rtx rtl, int first)
+{
+  /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
+     This clears machine specific flags, so has to come first.  */
+  default_encode_section_info (decl, rtl, first);
+
+  /* Check if it is a function, and whether it has the
+     [long/medium/short]_call attribute specified.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      rtx symbol = XEXP (rtl, 0);
+      int flags = SYMBOL_REF_FLAGS (symbol);
+
+      tree attr = (TREE_TYPE (decl) != error_mark_node
+		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
+      tree long_call_attr = lookup_attribute ("long_call", attr);
+      tree medium_call_attr = lookup_attribute ("medium_call", attr);
+      tree short_call_attr = lookup_attribute ("short_call", attr);
+
+      if (long_call_attr != NULL_TREE)
+	flags |= SYMBOL_FLAG_LONG_CALL;
+      else if (medium_call_attr != NULL_TREE)
+	flags |= SYMBOL_FLAG_MEDIUM_CALL;
+      else if (short_call_attr != NULL_TREE)
+	flags |= SYMBOL_FLAG_SHORT_CALL;
+
+      SYMBOL_REF_FLAGS (symbol) = flags;
+    }
+}
+
+/* This is how to output a definition of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.  */
+
+static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
+{
+  if (cfun)
+    arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
+  default_internal_label (stream, prefix, labelno);
+}
+
+/* Set the cpu type and print out other fancy things,
+   at the top of the file.  */
+
+static void arc_file_start (void)
+{
+  default_file_start ();
+  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
+}
+
+/* Cost functions.  */
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+arc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+	       int *total, bool speed)
+{
+  switch (code)
+    {
+      /* Small integers are as cheap as registers.  */
+    case CONST_INT:
+      {
+	bool nolimm = false; /* Can we do without long immediate?  */
+	bool fast = false; /* Is the result available immediately?  */
+	bool condexec = false; /* Does this allow conditiobnal execution?  */
+	bool compact = false; /* Is a 16 bit opcode available?  */
+	/* CONDEXEC also implies that we can have an unconditional
+	   3-address operation.  */
+
+	nolimm = compact = condexec = false;
+	if (UNSIGNED_INT6 (INTVAL (x)))
+	  nolimm = condexec = compact = true;
+	else
+	  {
+	    if (SMALL_INT (INTVAL (x)))
+	      nolimm = fast = true;
+	    switch (outer_code)
+	      {
+	      case AND: /* bclr, bmsk, ext[bw] */
+		if (satisfies_constraint_Ccp (x) /* bclr */
+		    || satisfies_constraint_C1p (x) /* bmsk */)
+		  nolimm = fast = condexec = compact = true;
+		break;
+	      case IOR: /* bset */
+		if (satisfies_constraint_C0p (x)) /* bset */
+		  nolimm = fast = condexec = compact = true;
+		break;
+	      case XOR:
+		if (satisfies_constraint_C0p (x)) /* bxor */
+		  nolimm = fast = condexec = true;
+		break;
+	      case SET:
+		if (satisfies_constraint_Crr (x)) /* ror b,u6 */
+		  nolimm = true;
+	      default:
+		break;
+	      }
+	  }
+	/* FIXME: Add target options to attach a small cost if
+	   condexec / compact is not true.  */
+	if (nolimm)
+	  {
+	    *total = 0;
+	    return true;
+	  }
+      }
+      /* FALLTHRU */
+
+      /*  4 byte values can be fetched as immediate constants -
+	  let's give that the cost of an extra insn.  */
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST_DOUBLE:
+      {
+	rtx high, low;
+
+	if (TARGET_DPFP)
+	  {
+	    *total = COSTS_N_INSNS (1);
+	    return true;
+	  }
+	/* FIXME: correct the order of high,low */
+	split_double (x, &high, &low);
+	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
+				+ !SMALL_INT (INTVAL (low)));
+	return true;
+      }
+
+    /* Encourage synth_mult to find a synthetic multiply when reasonable.
+       If we need more than 12 insns to do a multiply, then go out-of-line,
+       since the call overhead will be < 10% of the cost of the multiply.  */
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TARGET_BARREL_SHIFTER)
+	{
+	  /* If we want to shift a constant, we need a LIMM.  */
+	  /* ??? when the optimizers want to know if a constant should be
+	     hoisted, they ask for the cost of the constant.  OUTER_CODE is
+	     insufficient context for shifts since we don't know which operand
+	     we are looking at.  */
+	  if (CONSTANT_P (XEXP (x, 0)))
+	    {
+	      *total += (COSTS_N_INSNS (2)
+			 + rtx_cost (XEXP (x, 1), (enum rtx_code) code, 0, speed));
+	      return true;
+	    }
+	  *total = COSTS_N_INSNS (1);
+	}
+      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	*total = COSTS_N_INSNS (16);
+      else
+	{
+	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
+	  /* ??? want_to_gcse_p can throw negative shift counts at us,
+	     and then panics when it gets a negative cost as result.
+	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
+	  if (*total < 0)
+	    *total = 0;
+	}
+      return false;
+
+    case DIV:
+    case UDIV:
+      if (speed)
+	*total = COSTS_N_INSNS(30);
+      else
+	*total = COSTS_N_INSNS(1);
+	return false;
+
+    case MULT:
+      if ((TARGET_DPFP && GET_MODE (x) == DFmode))
+	*total = COSTS_N_INSNS (1);
+      else if (speed)
+	*total= arc_multcost;
+      /* We do not want synth_mult sequences when optimizing
+	 for size.  */
+      else if (TARGET_MUL64_SET || (TARGET_ARC700 && !TARGET_NOMPY_SET))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (2);
+      return false;
+    case PLUS:
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total += (rtx_cost (XEXP (x, 1), PLUS, 0, speed)
+		     + rtx_cost (XEXP (XEXP (x, 0), 0), PLUS, 1, speed));
+	  return true;
+	}
+      return false;
+    case MINUS:
+      if (GET_CODE (XEXP (x, 1)) == MULT
+	  && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
+	{
+	  *total += (rtx_cost (XEXP (x, 0), PLUS, 0, speed)
+		     + rtx_cost (XEXP (XEXP (x, 1), 0), PLUS, 1, speed));
+	  return true;
+	}
+      return false;
+    case COMPARE:
+      {
+	rtx op0 = XEXP (x, 0);
+	rtx op1 = XEXP (x, 1);
+
+	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
+	    && XEXP (op0, 1) == const1_rtx)
+	  {
+	    /* btst / bbit0 / bbit1:
+	       Small integers and registers are free; everything else can
+	       be put in a register.  */
+	    *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed)
+		      + rtx_cost (XEXP (op0, 2), SET, 1, speed));
+	    return true;
+	  }
+	if (GET_CODE (op0) == AND && op1 == const0_rtx
+	    && satisfies_constraint_C1p (XEXP (op0, 1)))
+	  {
+	    /* bmsk.f */
+	    *total = rtx_cost (XEXP (op0, 0), SET, 1, speed);
+	    return true;
+	  }
+	/* add.f  */
+	if (GET_CODE (op1) == NEG)
+	  {
+	    /* op0 might be constant, the inside of op1 is rather
+	       unlikely to be so.  So swapping the operands might lower
+	       the cost.  */
+	    *total = (rtx_cost (op0, PLUS, 1, speed)
+		      + rtx_cost (XEXP (op1, 0), PLUS, 0, speed));
+	  }
+	return false;
+      }
+    case EQ: case NE:
+      if (outer_code == IF_THEN_ELSE
+	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+	  && XEXP (x, 1) == const0_rtx
+	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
+	{
+	  /* btst / bbit0 / bbit1:
+	     Small integers and registers are free; everything else can
+	     be put in a register.  */
+	  rtx op0 = XEXP (x, 0);
+
+	  *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed)
+		    + rtx_cost (XEXP (op0, 2), SET, 1, speed));
+	  return true;
+	}
+      /* Fall through.  */
+    /* scc_insn expands into two insns.  */
+    case GTU: case GEU: case LEU:
+      if (GET_MODE (x) == SImode)
+	*total += COSTS_N_INSNS (1);
+      return false;
+    case LTU: /* might use adc.  */
+      if (GET_MODE (x) == SImode)
+	*total += COSTS_N_INSNS (1) - 1;
+      return false;
+    default:
+      return false;
+    }
+}
+
+/* Return true if ADDR is an address that needs to be expressed as an
+   explicit sum of pcl + offset.  */
+
+bool
+arc_legitimate_pc_offset_p (rtx addr)
+{
+  if (GET_CODE (addr) != CONST)
+    return false;
+  addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	return false;
+      addr = XEXP (addr, 0);
+    }
+  return (GET_CODE (addr) == UNSPEC
+	  && XVECLEN (addr, 0) == 1
+	  && XINT (addr, 1) == ARC_UNSPEC_GOT
+	  && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF);
+}
+
+/* Return true if ADDR is a valid pic address.
+   A valid pic address on arc should look like
+   const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
+
+bool
+arc_legitimate_pic_addr_p (rtx addr)
+{
+  if (GET_CODE (addr) == LABEL_REF)
+    return true;
+  if (GET_CODE (addr) != CONST)
+    return false;
+
+  addr = XEXP (addr, 0);
+
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	return false;
+      addr = XEXP (addr, 0);
+    }
+
+  if (GET_CODE (addr) != UNSPEC
+      || XVECLEN (addr, 0) != 1)
+    return false;
+
+  /* Must be @GOT or @GOTOFF.  */
+  if (XINT (addr, 1) != ARC_UNSPEC_GOT
+      && XINT (addr, 1) != ARC_UNSPEC_GOTOFF)
+    return false;
+
+  if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
+      && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
+    return false;
+
+  return true;
+}
+
+
+
+/* Return true if OP contains a symbol reference.  */
+
+static bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
+   If SKIP_LOCAL is true, skip symbols that bind locally.
+   This is used further down in this file, and, without SKIP_LOCAL,
+   in the addsi3 / subsi3 expanders when generating PIC code.  */
+
+bool
+arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE(op) == UNSPEC)
+    return false;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    {
+      tree decl = SYMBOL_REF_DECL (op);
+      return !skip_local || !decl || !default_binds_local_p (decl);
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
+							skip_local))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e'
+	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
+							  skip_local))
+	return true;
+    }
+
+  return false;
+}
+
+/* Legitimize a pic address reference in ORIG.
+   The return value is the legitimated address.
+   If OLDX is non-zero, it is the target to assign the address to first.  */
+
+rtx
+arc_legitimize_pic_address (rtx orig, rtx oldx)
+{
+  rtx addr = orig;
+  rtx pat = orig;
+  rtx base;
+
+  if (oldx == orig)
+    oldx = NULL;
+
+  if (GET_CODE (addr) == LABEL_REF)
+    ; /* Do nothing.  */
+  else if (GET_CODE (addr) == SYMBOL_REF
+	   && (CONSTANT_POOL_ADDRESS_P (addr)
+	       || SYMBOL_REF_LOCAL_P (addr)))
+    {
+      /* This symbol may be referenced via a displacement from the PIC
+	 base address (@GOTOFF).  */
+
+      /* FIXME: if we had a way to emit pc-relative adds that don't
+	 create a GOT entry, we could do without the use of the gp register.  */
+      crtl->uses_pic_offset_table = 1;
+      pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
+      pat = gen_rtx_CONST (Pmode, pat);
+      pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
+
+      if (oldx == NULL)
+	oldx = gen_reg_rtx (Pmode);
+
+      if (oldx != 0)
+	{
+	  emit_move_insn (oldx, pat);
+	  pat = oldx;
+	}
+
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      /* This symbol must be referenced via a load from the
+	 Global Offset Table (@GOTPC).  */
+
+      pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
+      pat = gen_rtx_CONST (Pmode, pat);
+      pat = gen_const_mem (Pmode, pat);
+
+      if (oldx == 0)
+	oldx = gen_reg_rtx (Pmode);
+
+      emit_move_insn (oldx, pat);
+      pat = oldx;
+    }
+  else
+    {
+      if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+	  if (GET_CODE (addr) == UNSPEC)
+	    {
+	      /* Check that the unspec is one of the ones we generate?  */
+	    }
+	  else
+	    gcc_assert (GET_CODE (addr) == PLUS);
+	}
+
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+	  /* Check first to see if this is a constant offset from a @GOTOFF
+	     symbol reference.  */
+	  if ((GET_CODE (op0) == LABEL_REF
+	       || (GET_CODE (op0) == SYMBOL_REF
+		   && (CONSTANT_POOL_ADDRESS_P (op0)
+		       || SYMBOL_REF_LOCAL_P (op0))))
+	      && GET_CODE (op1) == CONST_INT)
+	    {
+	      /* FIXME: like above, could do without gp reference.  */
+	      crtl->uses_pic_offset_table = 1;
+	      pat
+		= gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF);
+	      pat = gen_rtx_PLUS (Pmode, pat, op1);
+	      pat = gen_rtx_CONST (Pmode, pat);
+	      pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
+
+	      if (oldx != 0)
+		{
+		  emit_move_insn (oldx, pat);
+		  pat = oldx;
+		}
+	    }
+	  else
+	    {
+	      base = arc_legitimize_pic_address (XEXP (addr, 0), oldx);
+	      pat  = arc_legitimize_pic_address (XEXP (addr, 1),
+					     base == oldx ? NULL_RTX : oldx);
+
+	      if (GET_CODE (pat) == CONST_INT)
+		pat = plus_constant (Pmode, base, INTVAL (pat));
+	      else
+		{
+		  if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1)))
+		    {
+		      base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0));
+		      pat = XEXP (pat, 1);
+		    }
+		  pat = gen_rtx_PLUS (Pmode, base, pat);
+		}
+	    }
+	}
+    }
+
+ return pat;
+}
+
+/* Output address constant X to FILE, taking PIC into account.  */
+
+void
+arc_output_pic_addr_const (FILE * file, rtx x, int code)
+{
+  char buf[256];
+
+ restart:
+  switch (GET_CODE (x))
+    {
+    case PC:
+      if (flag_pic)
+	putc ('.', file);
+      else
+	gcc_unreachable ();
+      break;
+
+    case SYMBOL_REF:
+      output_addr_const (file, x);
+
+      /* Local functions do not get references through the PLT.  */
+      if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+	fputs ("@plt", file);
+      break;
+
+    case LABEL_REF:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
+      assemble_name (file, buf);
+      break;
+
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      assemble_name (file, buf);
+      break;
+
+    case CONST_INT:
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+
+    case CONST:
+      arc_output_pic_addr_const (file, XEXP (x, 0), code);
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+	{
+	  /* We can use %d if the number is one word and positive.  */
+	  if (CONST_DOUBLE_HIGH (x))
+	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
+		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
+	  else if  (CONST_DOUBLE_LOW (x) < 0)
+	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case PLUS:
+      /* FIXME: Not needed here.  */
+      /* Some assemblers need integer constants to appear last (eg masm).  */
+      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	{
+	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
+	  fprintf (file, "+");
+	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
+	}
+      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
+	  if (INTVAL (XEXP (x, 1)) >= 0)
+	    fprintf (file, "+");
+	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
+	}
+      else
+	gcc_unreachable();
+      break;
+
+    case MINUS:
+      /* Avoid outputting things like x-x or x+5-x,
+	 since some assemblers can't handle that.  */
+      x = simplify_subtraction (x);
+      if (GET_CODE (x) != MINUS)
+	goto restart;
+
+      arc_output_pic_addr_const (file, XEXP (x, 0), code);
+      fprintf (file, "-");
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) < 0)
+	{
+	  fprintf (file, "(");
+	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
+	  fprintf (file, ")");
+	}
+      else
+	arc_output_pic_addr_const (file, XEXP (x, 1), code);
+      break;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      arc_output_pic_addr_const (file, XEXP (x, 0), code);
+      break;
+
+
+    case UNSPEC:
+      gcc_assert (XVECLEN (x, 0) == 1);
+      if (XINT (x, 1) == ARC_UNSPEC_GOT)
+	fputs ("pcl,", file);
+      arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+      switch (XINT (x, 1))
+	{
+	case ARC_UNSPEC_GOT:
+	  fputs ("@gotpc", file);
+	  break;
+	case ARC_UNSPEC_GOTOFF:
+	  fputs ("@gotoff", file);
+	  break;
+	case ARC_UNSPEC_PLT:
+	  fputs ("@plt", file);
+	  break;
+	default:
+	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
+	  break;
+	}
+       break;
+
+    default:
+      output_operand_lossage ("invalid expression as operand");
+    }
+}
+
+#define SYMBOLIC_CONST(X)	\
+(GET_CODE (X) == SYMBOL_REF						\
+ || GET_CODE (X) == LABEL_REF						\
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+/* Emit insns to move operands[1] into operands[0].  */
+
+void
+emit_pic_move (rtx *operands, enum machine_mode)
+{
+  rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
+    operands[1] = force_reg (Pmode, operands[1]);
+  else
+    operands[1] = arc_legitimize_pic_address (operands[1], temp);
+}
+
+
+/* The function returning the number of words, at the beginning of an
+   argument, must be put in registers.  The returned value must be
+   zero for arguments that are passed entirely in registers or that
+   are entirely pushed on the stack.
+
+   On some machines, certain arguments must be passed partially in
+   registers and partially in memory.  On these machines, typically
+   the first N words of arguments are passed in registers, and the
+   rest on the stack.  If a multi-word argument (a `double' or a
+   structure) crosses that boundary, its first few words must be
+   passed in registers and the rest must be pushed.  This function
+   tells the compiler when this occurs, and how many of the words
+   should go in registers.
+
+   `FUNCTION_ARG' for these arguments should return the first register
+   to be used by the caller for this argument; likewise
+   `FUNCTION_INCOMING_ARG', for the called function.
+
+   The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
+
+/* If REGNO is the least arg reg available then what is the total number of arg
+   regs available.  */
+#define GPR_REST_ARG_REGS(REGNO) \
+  ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
+
+/* Since arc parm regs are contiguous.  */
+#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.  */
+
+static int
+arc_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+		       tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int bytes = (mode == BLKmode
+	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
+  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int arg_num = *cum;
+  int ret;
+
+  arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
+  ret = GPR_REST_ARG_REGS (arg_num);
+
+  /* ICEd at function.c:2361, and ret is copied to data->partial */
+    ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
+
+  return ret;
+}
+
+
+
+/* This function is used to control a function argument is passed in a
+   register, and which register.
+
+   The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
+   (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
+   all of the previous arguments so far passed in registers; MODE, the
+   machine mode of the argument; TYPE, the data type of the argument
+   as a tree node or 0 if that is not known (which happens for C
+   support library functions); and NAMED, which is 1 for an ordinary
+   argument and 0 for nameless arguments that correspond to `...' in
+   the called function's prototype.
+
+   The returned value should either be a `reg' RTX for the hard
+   register in which to pass the argument, or zero to pass the
+   argument on the stack.
+
+   For machines like the Vax and 68000, where normally all arguments
+   are pushed, zero suffices as a definition.
+
+   The usual way to make the ANSI library `stdarg.h' work on a machine
+   where some arguments are usually passed in registers, is to cause
+   nameless arguments to be passed on the stack instead.  This is done
+   by making the function return 0 whenever NAMED is 0.
+
+   You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
+   definition of this function to determine if this argument is of a
+   type that must be passed in the stack.  If `REG_PARM_STACK_SPACE'
+   is not defined and the function returns non-zero for such an
+   argument, the compiler will abort.  If `REG_PARM_STACK_SPACE' is
+   defined, the argument will be computed in the stack and then loaded
+   into a register.
+
+   The function is used to implement macro FUNCTION_ARG.  */
+/* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+arc_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		  const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int arg_num = *cum;
+  rtx ret;
+  const char *debstr ATTRIBUTE_UNUSED;
+
+  arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
+  /* Return a marker for use in the call instruction.  */
+  if (mode == VOIDmode)
+    {
+      ret = const0_rtx;
+      debstr = "<0>";
+    }
+  else if (GPR_REST_ARG_REGS (arg_num) > 0)
+    {
+      ret = gen_rtx_REG (mode, arg_num);
+      debstr = reg_names [arg_num];
+    }
+  else
+    {
+      ret = NULL_RTX;
+      debstr = "memory";
+    }
+  return ret;
+}
+
+/* The function to update the summarizer variable *CUM to advance past
+   an argument in the argument list.  The values MODE, TYPE and NAMED
+   describe that argument.  Once this is done, the variable *CUM is
+   suitable for analyzing the *following* argument with
+   `FUNCTION_ARG', etc.
+
+   This function need not do anything if the argument in question was
+   passed on the stack.  The compiler knows how to track the amount of
+   stack space used for arguments without any special help.
+
+   The function is used to implement macro FUNCTION_ARG_ADVANCE.  */
+/* For the ARC: the cum set here is passed on to function_arg where we
+   look at its value and say which reg to use. Strategy: advance the
+   regnumber here till we run out of arg regs, then set *cum to last
+   reg. In function_arg, since *cum > last arg reg we would return 0
+   and thus the arg will end up on the stack. For straddling args of
+   course function_arg_partial_nregs will come into play.  */
+
+static void
+arc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int bytes = (mode == BLKmode
+	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
+  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
+  int i;
+
+  if (words)
+    *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
+  for (i = 0; i < words; i++)
+    *cum = ARC_NEXT_ARG_REG (*cum);
+
+}
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FN_DECL_OR_TYPE is its
+   FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
+
+static rtx
+arc_function_value (const_tree valtype,
+		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (valtype);
+  int unsignedp ATTRIBUTE_UNUSED;
+
+  unsignedp = TYPE_UNSIGNED (valtype);
+  if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
+    PROMOTE_MODE (mode, unsignedp, valtype);
+  return gen_rtx_REG (mode, 0);
+}
+
+/* Returns the return address that is used by builtin_return_address.  */
+
+rtx
+arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+bool
+arc_legitimate_pic_operand_p (rtx x)
+{
+  return !arc_raw_symbolic_reference_mentioned_p (x, true);
+}
+
+/* Determine if a given RTX is a valid constant.  We already know this
+   satisfies CONSTANT_P.  */
+
+bool
+arc_legitimate_constant_p (enum machine_mode, rtx x)
+{
+  if (!flag_pic)
+    return true;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (x) == UNSPEC)
+	switch (XINT (x, 1))
+	  {
+	  case ARC_UNSPEC_PLT:
+	  case ARC_UNSPEC_GOTOFF:
+	  case ARC_UNSPEC_GOT:
+	  case UNSPEC_PROF:
+	    return true;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+      /* We must have drilled down to a symbol.  */
+      if (arc_raw_symbolic_reference_mentioned_p (x, false))
+	return false;
+
+      /* Return true.  */
+      break;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return false;
+
+    default:
+      break;
+    }
+
+  /* Otherwise we handle everything else in the move patterns.  */
+  return true;
+}
+
+static bool
+arc_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (RTX_OK_FOR_BASE_P (x, strict))
+     return true;
+  if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict))
+     return true;
+  if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict))
+    return true;
+  if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
+     return true;
+  if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
+     return true;
+  if ((GET_MODE_SIZE (mode) != 16)
+      && (GET_CODE (x) == SYMBOL_REF
+	  || GET_CODE (x) == LABEL_REF
+	  || GET_CODE (x) == CONST))
+    {
+      if (!flag_pic || arc_legitimate_pic_addr_p (x))
+	return true;
+    }
+  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
+       || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
+      && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
+    return true;
+      /* We're restricted here by the `st' insn.  */
+  if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
+      && GET_CODE (XEXP ((x), 1)) == PLUS
+      && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
+      && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1),
+				      TARGET_AUTO_MODIFY_REG, strict))
+    return true;
+  return false;
+}
+
+/* Return true iff ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.  */
+
+static bool
+arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
+{
+  /* SYMBOL_REF is not mode dependent: it is either a small data reference,
+     which is valid for loads and stores, or a limm offset, which is valid for
+     loads.  */
+  /* Scaled indices are scaled by the access mode; likewise for scaled
+     offsets, which are needed for maximum offset stores.  */
+  if (GET_CODE (addr) == PLUS
+      && (GET_CODE (XEXP ((addr), 0)) == MULT
+	  || (CONST_INT_P (XEXP ((addr), 1))
+	      && !SMALL_INT (INTVAL (XEXP ((addr), 1))))))
+    return true;
+  return false;
+}
+
+/* Determine if it's legal to put X into the constant pool.  */
+
+static bool
+arc_cannot_force_const_mem (enum machine_mode mode, rtx x)
+{
+  return !arc_legitimate_constant_p (mode, x);
+}
+
+
+/* Generic function to define a builtin.  */
+#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+  do									\
+    {									\
+       if (MASK)							\
+	  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE); \
+    }									\
+  while (0)
+
+
+static void
+arc_init_builtins (void)
+{
+    tree endlink = void_list_node;
+
+    tree void_ftype_void
+	= build_function_type (void_type_node,
+			       endlink);
+
+    tree int_ftype_int
+	= build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+
+    tree pcvoid_type_node
+	= build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST));
+    tree int_ftype_pcvoid_int
+	= build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, pcvoid_type_node,
+			       tree_cons (NULL_TREE, integer_type_node,
+				    endlink)));
+
+    tree int_ftype_short_int
+	= build_function_type (integer_type_node,
+			       tree_cons (NULL_TREE, short_integer_type_node, endlink));
+
+    tree void_ftype_int_int
+	= build_function_type (void_type_node,
+			       tree_cons (NULL_TREE, integer_type_node,
+					  tree_cons (NULL_TREE, integer_type_node, endlink)));
+    tree void_ftype_usint_usint
+	= build_function_type (void_type_node,
+			       tree_cons (NULL_TREE, long_unsigned_type_node,
+					  tree_cons (NULL_TREE, long_unsigned_type_node, endlink)));
+
+    tree int_ftype_int_int
+	= build_function_type (integer_type_node,
+			       tree_cons (NULL_TREE, integer_type_node,
+					  tree_cons (NULL_TREE, integer_type_node, endlink)));
+
+    tree usint_ftype_usint
+	= build_function_type (long_unsigned_type_node,
+			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
+
+    tree void_ftype_usint
+	= build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
+
+    /* Add the builtins.  */
+    def_mbuiltin (1,"__builtin_arc_nop", void_ftype_void, ARC_BUILTIN_NOP);
+    def_mbuiltin (TARGET_NORM, "__builtin_arc_norm", int_ftype_int, ARC_BUILTIN_NORM);
+    def_mbuiltin (TARGET_NORM, "__builtin_arc_normw", int_ftype_short_int, ARC_BUILTIN_NORMW);
+    def_mbuiltin (TARGET_SWAP, "__builtin_arc_swap", int_ftype_int, ARC_BUILTIN_SWAP);
+    def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mul64", void_ftype_int_int, ARC_BUILTIN_MUL64);
+    def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mulu64", void_ftype_usint_usint, ARC_BUILTIN_MULU64);
+    def_mbuiltin (1,"__builtin_arc_rtie", void_ftype_void, ARC_BUILTIN_RTIE);
+    def_mbuiltin (TARGET_ARC700,"__builtin_arc_sync", void_ftype_void, ARC_BUILTIN_SYNC);
+    def_mbuiltin ((TARGET_EA_SET),"__builtin_arc_divaw", int_ftype_int_int, ARC_BUILTIN_DIVAW);
+    def_mbuiltin (1,"__builtin_arc_brk", void_ftype_void, ARC_BUILTIN_BRK);
+    def_mbuiltin (1,"__builtin_arc_flag", void_ftype_usint, ARC_BUILTIN_FLAG);
+    def_mbuiltin (1,"__builtin_arc_sleep", void_ftype_usint, ARC_BUILTIN_SLEEP);
+    def_mbuiltin (1,"__builtin_arc_swi", void_ftype_void, ARC_BUILTIN_SWI);
+    def_mbuiltin (1,"__builtin_arc_core_read", usint_ftype_usint, ARC_BUILTIN_CORE_READ);
+    def_mbuiltin (1,"__builtin_arc_core_write", void_ftype_usint_usint, ARC_BUILTIN_CORE_WRITE);
+    def_mbuiltin (1,"__builtin_arc_lr", usint_ftype_usint, ARC_BUILTIN_LR);
+    def_mbuiltin (1,"__builtin_arc_sr", void_ftype_usint_usint, ARC_BUILTIN_SR);
+    def_mbuiltin (TARGET_ARC700,"__builtin_arc_trap_s", void_ftype_usint, ARC_BUILTIN_TRAP_S);
+    def_mbuiltin (TARGET_ARC700,"__builtin_arc_unimp_s", void_ftype_void, ARC_BUILTIN_UNIMP_S);
+    def_mbuiltin (1,"__builtin_arc_aligned", int_ftype_pcvoid_int, ARC_BUILTIN_ALIGNED);
+
+    if (TARGET_SIMD_SET)
+      arc_init_simd_builtins ();
+}
+
+static rtx arc_expand_simd_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+arc_expand_builtin (tree exp,
+		    rtx target,
+		    rtx subtarget,
+		    enum machine_mode mode,
+		    int ignore)
+{
+  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree              arg0;
+  tree              arg1;
+  rtx               op0;
+  rtx               op1;
+  int               fcode = DECL_FUNCTION_CODE (fndecl);
+  int               icode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+
+  if (fcode > ARC_SIMD_BUILTIN_BEGIN && fcode < ARC_SIMD_BUILTIN_END)
+    return arc_expand_simd_builtin (exp, target, subtarget, mode, ignore);
+
+  switch (fcode)
+    {
+    case ARC_BUILTIN_NOP:
+      emit_insn (gen_nop ());
+      return NULL_RTX;
+
+    case ARC_BUILTIN_NORM:
+      icode = CODE_FOR_clrsbsi2;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 =  insn_data[icode].operand[1].mode;
+      target = gen_reg_rtx (SImode);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      emit_insn (gen_clrsbsi2 (target, op0));
+      return target;
+
+    case ARC_BUILTIN_NORMW:
+
+      /* FIXME : This should all be HImode, not SImode.  */
+      icode = CODE_FOR_normw;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 =  insn_data[icode].operand[1].mode;
+      target = gen_reg_rtx (SImode);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, convert_to_mode (mode0, op0,0));
+
+      emit_insn (gen_normw (target, op0));
+      return target;
+
+    case ARC_BUILTIN_MUL64:
+      icode = CODE_FOR_mul64;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+      mode0 =  insn_data[icode].operand[0].mode;
+      mode1 =  insn_data[icode].operand[1].mode;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      emit_insn (gen_mul64 (op0,op1));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_MULU64:
+      icode = CODE_FOR_mulu64;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+      mode0 =  insn_data[icode].operand[0].mode;
+      mode1 =  insn_data[icode].operand[1].mode;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[0].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      emit_insn (gen_mulu64 (op0,op1));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_RTIE:
+      icode = CODE_FOR_rtie;
+      emit_insn (gen_rtie (const1_rtx));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_SYNC:
+      icode = CODE_FOR_sync;
+      emit_insn (gen_sync (const1_rtx));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_SWAP:
+      icode = CODE_FOR_swap;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 =  insn_data[icode].operand[1].mode;
+      target = gen_reg_rtx (SImode);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      emit_insn (gen_swap (target, op0));
+      return target;
+
+    case ARC_BUILTIN_DIVAW:
+      icode = CODE_FOR_divaw;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      target = gen_reg_rtx (SImode);
+
+      mode0 =  insn_data[icode].operand[0].mode;
+      mode1 =  insn_data[icode].operand[1].mode;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      emit_insn (gen_divaw (target, op0, op1));
+      return target;
+
+    case ARC_BUILTIN_BRK:
+      icode = CODE_FOR_brk;
+      emit_insn (gen_brk (const1_rtx));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_SLEEP:
+      icode = CODE_FOR_sleep;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+
+      fold (arg0);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 = insn_data[icode].operand[1].mode;
+
+      emit_insn (gen_sleep (op0));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_SWI:
+      icode = CODE_FOR_swi;
+      emit_insn (gen_swi (const1_rtx));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_FLAG:
+      icode = CODE_FOR_flag;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 =  insn_data[icode].operand[0].mode;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      emit_insn (gen_flag (op0));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_CORE_READ:
+      icode = CODE_FOR_core_read;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      target = gen_reg_rtx (SImode);
+
+      fold (arg0);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 = insn_data[icode].operand[1].mode;
+
+      emit_insn (gen_core_read (target, op0));
+      return target;
+
+    case ARC_BUILTIN_CORE_WRITE:
+      icode = CODE_FOR_core_write;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      fold (arg1);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+
+      emit_insn (gen_core_write (op0, op1));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_LR:
+      icode = CODE_FOR_lr;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      target = gen_reg_rtx (SImode);
+
+      fold (arg0);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 = insn_data[icode].operand[1].mode;
+
+      emit_insn (gen_lr (target, op0));
+      return target;
+
+    case ARC_BUILTIN_SR:
+      icode = CODE_FOR_sr;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      fold (arg1);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+
+      emit_insn (gen_sr (op0, op1));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_TRAP_S:
+      icode = CODE_FOR_trap_s;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+
+      fold (arg0);
+
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      mode0 = insn_data[icode].operand[1].mode;
+
+      /* We don't give an error for non-cost values here because
+	 we still want to allow things to be fixed up by later inlining /
+	 constant folding / dead code elimination.  */
+      if  (CONST_INT_P (op0) && !satisfies_constraint_L (op0))
+	{
+	  /* Keep this message in sync with the one in arc.md:trap_s,
+	     because *.md files don't get scanned by exgettext.  */
+	  error ("operand to trap_s should be an unsigned 6-bit value");
+	}
+      emit_insn (gen_trap_s (op0));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_UNIMP_S:
+      icode = CODE_FOR_unimp_s;
+      emit_insn (gen_unimp_s (const1_rtx));
+      return NULL_RTX;
+
+    case ARC_BUILTIN_ALIGNED:
+      /* __builtin_arc_aligned (void* val, int alignval) */
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      fold (arg1);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      target = gen_reg_rtx (SImode);
+
+      if (!CONST_INT_P (op1))
+	{
+	  /* If we can't fold the alignment to a constant integer
+	     whilst optimizing, this is probably a user error.  */
+	  if (optimize)
+	    warning (0, "__builtin_arc_aligned with non-constant alignment");
+	}
+      else
+	{
+	  HOST_WIDE_INT alignTest = INTVAL (op1);
+	  /* Check alignTest is positive, and a power of two.  */
+	  if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
+	    {
+	      error ("invalid alignment value for __builtin_arc_aligned");
+	      return NULL_RTX;
+	    }
+
+	  if (CONST_INT_P (op0))
+	    {
+	      HOST_WIDE_INT pnt = INTVAL (op0);
+
+	      if ((pnt & (alignTest - 1)) == 0)
+		return const1_rtx;
+	    }
+	  else
+	    {
+	      unsigned  align = get_pointer_alignment (arg0);
+	      unsigned  numBits = alignTest * BITS_PER_UNIT;
+
+	      if (align && align >= numBits)
+		return const1_rtx;
+	      /* Another attempt to ascertain alignment.  Check the type
+		 we are pointing to.  */
+	      if (POINTER_TYPE_P (TREE_TYPE (arg0))
+		  && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
+		return const1_rtx;
+	    }
+	}
+
+      /* Default to false.  */
+      return const0_rtx;
+
+    default:
+      break;
+    }
+
+  /* @@@ Should really do something sensible here.  */
+  return NULL_RTX;
+}
+
+/* Returns true if the operands[opno] is a valid compile-time constant to be
+   used as register number in the code for builtins.  Else it flags an error
+   and returns false.  */
+
+bool
+check_if_valid_regno_const (rtx *operands, int opno)
+{
+
+  switch (GET_CODE (operands[opno]))
+    {
+    case SYMBOL_REF :
+    case CONST :
+    case CONST_INT :
+      return true;
+    default:
+	error ("register number must be a compile-time constant. Try giving higher optimization levels");
+	break;
+    }
+  return false;
+}
+
+/* Check that after all the constant folding, whether the operand to
+   __builtin_arc_sleep is an unsigned int of 6 bits.  If not, flag an error.  */
+
+bool
+check_if_valid_sleep_operand (rtx *operands, int opno)
+{
+  switch (GET_CODE (operands[opno]))
+    {
+    case CONST :
+    case CONST_INT :
+	if( UNSIGNED_INT6 (INTVAL (operands[opno])))
+	    return true;
+    default:
+	fatal_error("operand for sleep instruction must be an unsigned 6 bit compile-time constant");
+	break;
+    }
+  return false;
+}
+
+/* Return true if it is ok to make a tail-call to DECL.  */
+
+static bool
+arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+			     tree exp ATTRIBUTE_UNUSED)
+{
+  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
+  if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
+    return false;
+
+  /* Everything else is ok.  */
+  return true;
+}
+
+/* Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+
+static void
+arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset,
+		     tree function)
+{
+  int mi_delta = delta;
+  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
+  int shift = 0;
+  int this_regno
+    = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
+  rtx fnaddr;
+
+  if (mi_delta < 0)
+    mi_delta = - mi_delta;
+
+  /* Add DELTA.  When possible use a plain add, otherwise load it into
+     a register first.  */
+
+  while (mi_delta != 0)
+    {
+      if ((mi_delta & (3 << shift)) == 0)
+	shift += 2;
+      else
+	{
+	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
+		       mi_op, reg_names[this_regno], reg_names[this_regno],
+		       mi_delta & (0xff << shift));
+	  mi_delta &= ~(0xff << shift);
+	  shift += 8;
+	}
+    }
+
+  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
+  if (vcall_offset != 0)
+    {
+      /* ld  r12,[this]           --> temp = *this
+	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
+	 ld r12,[r12]
+	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
+      asm_fprintf (file, "\tld\t%s, [%s]\n",
+		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
+      asm_fprintf (file, "\tadd\t%s, %s, %ld\n",
+		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
+      asm_fprintf (file, "\tld\t%s, [%s]\n",
+		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
+      asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
+		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
+    }
+
+  fnaddr = XEXP (DECL_RTL (function), 0);
+
+  if (arc_is_longcall_p (fnaddr))
+    fputs ("\tj\t", file);
+  else
+    fputs ("\tb\t", file);
+  assemble_name (file, XSTR (fnaddr, 0));
+  fputc ('\n', file);
+}
+
+/* Return true if a 32 bit "long_call" should be generated for
+   this calling SYM_REF.  We generate a long_call if the function:
+
+        a.  has an __attribute__((long call))
+     or b.  the -mlong-calls command line switch has been specified
+
+   However we do not generate a long call if the function has an
+   __attribute__ ((short_call)) or __attribute__ ((medium_call))
+
+   This function will be called by C fragments contained in the machine
+   description file.  */
+
+bool
+arc_is_longcall_p (rtx sym_ref)
+{
+  if (GET_CODE (sym_ref) != SYMBOL_REF)
+    return false;
+
+  return (SYMBOL_REF_LONG_CALL_P (sym_ref)
+	  || (TARGET_LONG_CALLS_SET
+	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
+	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
+
+}
+
+/* Likewise for short calls.  */
+
+bool
+arc_is_shortcall_p (rtx sym_ref)
+{
+  if (GET_CODE (sym_ref) != SYMBOL_REF)
+    return false;
+
+  return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
+	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
+	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
+	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
+
+}
+
+/* Emit profiling code for calling CALLEE.  Return true if a special
+   call pattern needs to be generated.  */
+
+bool
+arc_profile_call (rtx callee)
+{
+  rtx from = XEXP (DECL_RTL (current_function_decl), 0);
+
+  if (TARGET_UCB_MCOUNT)
+    /* Profiling is done by instrumenting the callee.  */
+    return false;
+
+  if (CONSTANT_P (callee))
+    {
+      rtx count_ptr
+	= gen_rtx_CONST (Pmode,
+			 gen_rtx_UNSPEC (Pmode,
+					 gen_rtvec (3, from, callee,
+						    CONST0_RTX (Pmode)),
+					 UNSPEC_PROF));
+      rtx counter = gen_rtx_MEM (SImode, count_ptr);
+      /* ??? The increment would better be done atomically, but as there is
+	 no proper hardware support, that would be too expensive.  */
+      emit_move_insn (counter,
+		      force_reg (SImode, plus_constant (SImode, counter, 1)));
+      return false;
+    }
+  else
+    {
+      rtx count_list_ptr
+	= gen_rtx_CONST (Pmode,
+			 gen_rtx_UNSPEC (Pmode,
+					 gen_rtvec (3, from, CONST0_RTX (Pmode),
+						    CONST0_RTX (Pmode)),
+					 UNSPEC_PROF));
+      emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr);
+      emit_move_insn (gen_rtx_REG (Pmode, 9), callee);
+      return true;
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
+    return true;
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      return (size == -1 || size > 8);
+    }
+}
+
+
+/* This was in rtlanal.c, and can go in there when we decide we want
+   to submit the change for inclusion in the GCC tree.  */
+/* Like note_stores, but allow the callback to have side effects on the rtl
+   (like the note_stores of yore):
+   Call FUN on each register or MEM that is stored into or clobbered by X.
+   (X would be the pattern of an insn).  DATA is an arbitrary pointer,
+   ignored by note_stores, but passed to FUN.
+   FUN may alter parts of the RTL.
+
+   FUN receives three arguments:
+   1. the REG, MEM, CC0 or PC being stored in or clobbered,
+   2. the SET or CLOBBER rtx that does the store,
+   3. the pointer DATA provided to note_stores.
+
+  If the item being stored in or clobbered is a SUBREG of a hard register,
+  the SUBREG will be passed.  */
+
+/* For now.  */ static
+void
+walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
+{
+  int i;
+
+  if (GET_CODE (x) == COND_EXEC)
+    x = COND_EXEC_CODE (x);
+
+  if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
+    {
+      rtx dest = SET_DEST (x);
+
+      while ((GET_CODE (dest) == SUBREG
+	      && (!REG_P (SUBREG_REG (dest))
+		  || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
+	     || GET_CODE (dest) == ZERO_EXTRACT
+	     || GET_CODE (dest) == STRICT_LOW_PART)
+	dest = XEXP (dest, 0);
+
+      /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
+	 each of whose first operand is a register.  */
+      if (GET_CODE (dest) == PARALLEL)
+	{
+	  for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
+	    if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
+	      (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
+	}
+      else
+	(*fun) (dest, x, data);
+    }
+
+  else if (GET_CODE (x) == PARALLEL)
+    for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+      walk_stores (XVECEXP (x, 0, i), fun, data);
+}
+
+static bool
+arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       const_tree type,
+		       bool named ATTRIBUTE_UNUSED)
+{
+  return (type != 0
+	  && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	      || TREE_ADDRESSABLE (type)));
+}
+
+/* Implement TARGET_CAN_USE_DOLOOP_P.  */
+
+static bool
+arc_can_use_doloop_p (double_int iterations, double_int,
+		      unsigned int loop_depth, bool entered_at_top)
+{
+  if (loop_depth > 1)
+    return false;
+  /* Setting up the loop with two sr instructions costs 6 cycles.  */
+  if (TARGET_ARC700
+      && !entered_at_top
+      && iterations.high == 0
+      && iterations.low > 0
+      && iterations.low <= (flag_pic ? 6 : 3))
+    return false;
+  return true;
+}
+
+/* NULL if INSN insn is valid within a low-overhead loop.
+   Otherwise return why doloop cannot be applied.  */
+
+static const char *
+arc_invalid_within_doloop (const_rtx insn)
+{
+  if (CALL_P (insn))
+    return "Function call in the loop.";
+  return NULL;
+}
+
+static int arc_reorg_in_progress = 0;
+
+/* ARC's machince specific reorg function.  */
+
+static void
+arc_reorg (void)
+{
+  rtx insn, pattern;
+  rtx pc_target;
+  long offset;
+  int changed;
+
+  cfun->machine->arc_reorg_started = 1;
+  arc_reorg_in_progress = 1;
+
+  /* Emit special sections for profiling.  */
+  if (crtl->profile)
+    {
+      section *save_text_section;
+      rtx insn;
+      int size = get_max_uid () >> 4;
+      htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq,
+				 NULL);
+
+      save_text_section = in_section;
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if (NONJUMP_INSN_P (insn))
+	  walk_stores (PATTERN (insn), write_profile_sections, htab);
+      if (htab_elements (htab))
+	in_section = 0;
+      switch_to_section (save_text_section);
+      htab_delete (htab);
+    }
+
+  /* Link up loop ends with their loop start.  */
+  {
+    for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+      if (GET_CODE (insn) == JUMP_INSN
+	  && recog_memoized (insn) == CODE_FOR_doloop_end_i)
+	{
+	  rtx top_label
+	    = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0);
+	  rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
+	  rtx lp, prev = prev_nonnote_insn (top_label);
+	  rtx lp_simple = NULL_RTX;
+	  rtx next = NULL_RTX;
+	  rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
+	  HOST_WIDE_INT loop_end_id
+	    = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
+	  int seen_label = 0;
+
+	  for (lp = prev;
+	       (lp && NONJUMP_INSN_P (lp)
+		&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
+	       lp = prev_nonnote_insn (lp))
+	    ;
+	  if (!lp || !NONJUMP_INSN_P (lp)
+	      || dead_or_set_regno_p (lp, LP_COUNT))
+	    {
+	      for (prev = next = insn, lp = NULL_RTX ; prev || next;)
+		{
+		  if (prev)
+		    {
+		      if (NONJUMP_INSN_P (prev)
+			  && recog_memoized (prev) == CODE_FOR_doloop_begin_i
+			  && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
+			      == loop_end_id))
+			{
+			  lp = prev;
+			  break;
+			}
+		      else if (LABEL_P (prev))
+			seen_label = 1;
+		      prev = prev_nonnote_insn (prev);
+		    }
+		  if (next)
+		    {
+		      if (NONJUMP_INSN_P (next)
+			  && recog_memoized (next) == CODE_FOR_doloop_begin_i
+			  && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
+			      == loop_end_id))
+			{
+			  lp = next;
+			  break;
+			}
+		      next = next_nonnote_insn (next);
+		    }
+		}
+	      prev = NULL_RTX;
+	    }
+	  else
+	    lp_simple = lp;
+	  if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
+	    {
+	      rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
+	      if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
+		/* The loop end insn has been duplicated.  That can happen
+		   when there is a conditional block at the very end of
+		   the loop.  */
+		goto failure;
+	      /* If Register allocation failed to allocate to the right
+		 register, There is no point into teaching reload to
+		 fix this up with reloads, as that would cost more
+		 than using an ordinary core register with the
+		 doloop_fallback pattern.  */
+	      if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
+	      /* Likewise, if the loop setup is evidently inside the loop,
+		 we loose.  */
+		  || (!lp_simple && lp != next && !seen_label))
+		{
+		  remove_insn (lp);
+		  goto failure;
+		}
+	      /* It is common that the optimizers copy the loop count from
+		 another register, and doloop_begin_i is stuck with the
+		 source of the move.  Making doloop_begin_i only accept "l"
+		 is nonsentical, as this then makes reload evict the pseudo
+		 used for the loop end.  The underlying cause is that the
+		 optimizers don't understand that the register allocation for
+		 doloop_begin_i should be treated as part of the loop.
+		 Try to work around this problem by verifying the previous
+		 move exists.  */
+	      if (true_regnum (begin_cnt) != LP_COUNT)
+		{
+		  rtx mov, set, note;
+
+		  for (mov = prev_nonnote_insn (lp); mov;
+		       mov = prev_nonnote_insn (mov))
+		    {
+		      if (!NONJUMP_INSN_P (mov))
+			mov = 0;
+		      else if ((set = single_set (mov))
+			  && rtx_equal_p (SET_SRC (set), begin_cnt)
+			  && rtx_equal_p (SET_DEST (set), op0))
+			break;
+		    }
+		  if (mov)
+		    {
+		      XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
+		      note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
+		      if (note)
+			remove_note (lp, note);
+		    }
+		  else
+		    {
+		      remove_insn (lp);
+		      goto failure;
+		    }
+		}
+	      XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
+	      XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
+	      if (next == lp)
+		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
+	      else if (!lp_simple)
+		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
+	      else if (prev != lp)
+		{
+		  remove_insn (lp);
+		  add_insn_after (lp, prev, NULL);
+		}
+	      if (!lp_simple)
+		{
+		  XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
+		    = gen_rtx_LABEL_REF (Pmode, top_label);
+		  add_reg_note (lp, REG_LABEL_OPERAND, top_label);
+		  LABEL_NUSES (top_label)++;
+		}
+	      /* We can avoid tedious loop start / end setting for empty loops
+		 be merely setting the loop count to its final value.  */
+	      if (next_active_insn (top_label) == insn)
+		{
+		  rtx lc_set
+		    = gen_rtx_SET (VOIDmode,
+				   XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
+				   const0_rtx);
+
+		  lc_set = emit_insn_before (lc_set, insn);
+		  delete_insn (lp);
+		  delete_insn (insn);
+		  insn = lc_set;
+		}
+	      /* If the loop is non-empty with zero length, we can't make it
+		 a zero-overhead loop.  That can happen for empty asms.  */
+	      else
+		{
+		  rtx scan;
+
+		  for (scan = top_label;
+		       (scan && scan != insn
+			&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
+		       scan = NEXT_INSN (scan));
+		  if (scan == insn)
+		    {
+		      remove_insn (lp);
+		      goto failure;
+		    }
+		}
+	    }
+	  else
+	    {
+	      /* Sometimes the loop optimizer makes a complete hash of the
+		 loop.  If it were only that the loop is not entered at the
+		 top, we could fix this up by setting LP_START with SR .
+		 However, if we can't find the loop begin were it should be,
+		 chances are that it does not even dominate the loop, but is
+		 inside the loop instead.  Using SR there would kill
+		 performance.
+		 We use the doloop_fallback pattern here, which executes
+		 in two cycles on the ARC700 when predicted correctly.  */
+	    failure:
+	      if (!REG_P (op0))
+		{
+		  rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
+
+		  emit_insn_before (gen_move_insn (op3, op0), insn);
+		  PATTERN (insn)
+		    = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
+		}
+	      else
+		XVEC (PATTERN (insn), 0)
+		  = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
+			       XVECEXP (PATTERN (insn), 0, 1));
+	      INSN_CODE (insn) = -1;
+	    }
+	}
+    }
+
+/* FIXME: should anticipate ccfsm action, generate special patterns for
+   to-be-deleted branches that have no delay slot and have at least the
+   length of the size increase forced on other insns that are conditionalized.
+   This can also have an insn_list inside that enumerates insns which are
+   not actually conditionalized because the destinations are dead in the
+   not-execute case.
+   Could also tag branches that we want to be unaligned if they get no delay
+   slot, or even ones that we don't want to do delay slot sheduling for
+   because we can unalign them.
+
+   However, there are cases when conditional execution is only possible after
+   delay slot scheduling:
+
+   - If a delay slot is filled with a nocond/set insn from above, the previous
+     basic block can become elegible for conditional execution.
+   - If a delay slot is filled with a nocond insn from the fall-through path,
+     the branch with that delay slot can become eligble for conditional
+     execution (however, with the same sort of data flow analysis that dbr
+     does, we could have figured out before that we don't need to
+     conditionalize this insn.)
+     - If a delay slot insn is filled with an insn from the target, the
+       target label gets its uses decremented (even deleted if falling to zero),
+   thus possibly creating more condexec opportunities there.
+   Therefore, we should still be prepared to apply condexec optimization on
+   non-prepared branches if the size increase of conditionalized insns is no
+   more than the size saved from eliminating the branch.  An invocation option
+   could also be used to reserve a bit of extra size for condbranches so that
+   this'll work more often (could also test in arc_reorg if the block is
+   'close enough' to be eligible for condexec to make this likely, and
+   estimate required size increase).  */
+  /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
+  if (TARGET_NO_BRCC_SET)
+    return;
+
+  do
+    {
+      init_insn_lengths();
+      changed = 0;
+
+      if (optimize > 1 && !TARGET_NO_COND_EXEC)
+	{
+	  arc_ifcvt ();
+	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
+	  df_finish_pass ((flags & TODO_df_verify) != 0);
+	}
+
+      /* Call shorten_branches to calculate the insn lengths.  */
+      shorten_branches (get_insns());
+      cfun->machine->ccfsm_current_insn = NULL_RTX;
+
+      if (!INSN_ADDRESSES_SET_P())
+	  fatal_error ("Insn addresses not set after shorten_branches");
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  rtx label;
+	  enum attr_type insn_type;
+
+	  /* If a non-jump insn (or a casesi jump table), continue.  */
+	  if (GET_CODE (insn) != JUMP_INSN ||
+	      GET_CODE (PATTERN (insn)) == ADDR_VEC
+	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+	    continue;
+
+	  /* If we already have a brcc, note if it is suitable for brcc_s.
+	     Be a bit generous with the brcc_s range so that we can take
+	     advantage of any code shortening from delay slot scheduling.  */
+	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
+	    {
+	      rtx pat = PATTERN (insn);
+	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
+	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
+
+	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+	      if ((offset >= -140 && offset < 140)
+		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
+		  && compact_register_operand (XEXP (op, 0), VOIDmode)
+		  && equality_comparison_operator (op, VOIDmode))
+		PUT_MODE (*ccp, CC_Zmode);
+	      else if (GET_MODE (*ccp) == CC_Zmode)
+		PUT_MODE (*ccp, CC_ZNmode);
+	      continue;
+	    }
+	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
+	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
+	    continue;
+
+	  /* OK. so we have a jump insn.  */
+	  /* We need to check that it is a bcc.  */
+	  /* Bcc => set (pc) (if_then_else ) */
+	  pattern = PATTERN (insn);
+	  if (GET_CODE (pattern) != SET
+	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
+	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
+	    continue;
+
+	  /* Now check if the jump is beyond the s9 range.  */
+	  if (find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX))
+	    continue;
+	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+
+	  if(offset > 253 || offset < -254)
+	    continue;
+
+	  pc_target = SET_SRC (pattern);
+
+	  /* Now go back and search for the set cc insn.  */
+
+	  label = XEXP (pc_target, 1);
+
+	    {
+	      rtx pat, scan, link_insn = NULL;
+
+	      for (scan = PREV_INSN (insn);
+		   scan && GET_CODE (scan) != CODE_LABEL;
+		   scan = PREV_INSN (scan))
+		{
+		  if (! INSN_P (scan))
+		    continue;
+		  pat = PATTERN (scan);
+		  if (GET_CODE (pat) == SET
+		      && cc_register (SET_DEST (pat), VOIDmode))
+		    {
+		      link_insn = scan;
+		      break;
+		    }
+		}
+	      if (! link_insn)
+		continue;
+	      else
+		/* Check if this is a data dependency.  */
+		{
+		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
+		  rtx cmp0, cmp1;
+
+		  /* Ok this is the set cc. copy args here.  */
+		  op = XEXP (pc_target, 0);
+
+		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
+		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
+		  if (GET_CODE (op0) == ZERO_EXTRACT
+		      && XEXP (op0, 1) == const1_rtx
+		      && (GET_CODE (op) == EQ
+			  || GET_CODE (op) == NE))
+		    {
+		      /* btst / b{eq,ne} -> bbit{0,1} */
+		      op0 = XEXP (cmp0, 0);
+		      op1 = XEXP (cmp0, 2);
+		    }
+		  else if (!register_operand (op0, VOIDmode)
+			  || !general_operand (op1, VOIDmode))
+		    continue;
+		  /* Be careful not to break what cmpsfpx_raw is
+		     trying to create for checking equality of
+		     single-precision floats.  */
+		  else if (TARGET_SPFP
+			   && GET_MODE (op0) == SFmode
+			   && GET_MODE (op1) == SFmode)
+		    continue;
+
+		  /* None of the two cmp operands should be set between the
+		     cmp and the branch.  */
+		  if (reg_set_between_p (op0, link_insn, insn))
+		    continue;
+
+		  if (reg_set_between_p (op1, link_insn, insn))
+		    continue;
+
+		  /* Since the MODE check does not work, check that this is
+		     CC reg's last set location before insn, and also no
+		     instruction between the cmp and branch uses the
+		     condition codes.  */
+		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
+		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
+		    continue;
+
+		  /* CC reg should be dead after insn.  */
+		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
+		    continue;
+
+		  op = gen_rtx_fmt_ee (GET_CODE (op),
+				       GET_MODE (op), cmp0, cmp1);
+		  /* If we create a LIMM where there was none before,
+		     we only benefit if we can avoid a scheduling bubble
+		     for the ARC600.  Otherwise, we'd only forgo chances
+		     at short insn generation, and risk out-of-range
+		     branches.  */
+		  if (!brcc_nolimm_operator (op, VOIDmode)
+		      && !long_immediate_operand (op1, VOIDmode)
+		      && (TARGET_ARC700
+			  || next_active_insn (link_insn) != insn))
+		    continue;
+
+		  /* Emit bbit / brcc (or brcc_s if possible).
+		     CC_Zmode indicates that brcc_s is possible.  */
+
+		  if (op0 != cmp0)
+		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
+		  else if ((offset >= -140 && offset < 140)
+			   && rtx_equal_p (op1, const0_rtx)
+			   && compact_register_operand (op0, VOIDmode)
+			   && (GET_CODE (op) == EQ
+			       || GET_CODE (op) == NE))
+		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
+		  else
+		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
+
+		  brcc_insn
+		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
+		  brcc_insn = gen_rtx_SET (VOIDmode, pc_rtx, brcc_insn);
+		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
+		  brcc_insn
+		    = gen_rtx_PARALLEL
+			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
+		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
+
+		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
+		  note = find_reg_note (insn, REG_BR_PROB, 0);
+		  if (note)
+		    {
+		      XEXP (note, 1) = REG_NOTES (brcc_insn);
+		      REG_NOTES (brcc_insn) = note;
+		    }
+		  note = find_reg_note (link_insn, REG_DEAD, op0);
+		  if (note)
+		    {
+		      remove_note (link_insn, note);
+		      XEXP (note, 1) = REG_NOTES (brcc_insn);
+		      REG_NOTES (brcc_insn) = note;
+		    }
+		  note = find_reg_note (link_insn, REG_DEAD, op1);
+		  if (note)
+		    {
+		      XEXP (note, 1) = REG_NOTES (brcc_insn);
+		      REG_NOTES (brcc_insn) = note;
+		    }
+
+		  changed = 1;
+
+		  /* Delete the bcc insn.  */
+		  set_insn_deleted (insn);
+
+		  /* Delete the cmp insn.  */
+		  set_insn_deleted (link_insn);
+
+		}
+	    }
+	}
+      /* Clear out insn_addresses.  */
+      INSN_ADDRESSES_FREE ();
+
+    } while (changed);
+
+  if (INSN_ADDRESSES_SET_P())
+    fatal_error ("insn addresses not freed");
+
+  arc_reorg_in_progress = 0;
+}
+
+ /* Check if the operands are valid for BRcc.d generation
+    Valid Brcc.d patterns are
+        Brcc.d b, c, s9
+        Brcc.d b, u6, s9
+
+        For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
+      since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
+      does not have a delay slot
+
+  Assumed precondition: Second operand is either a register or a u6 value.  */
+
+bool
+valid_brcc_with_delay_p (rtx *operands)
+{
+  if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
+    return false;
+  return brcc_nolimm_operator (operands[0], VOIDmode);
+}
+
+/* ??? Hack.  This should no really be here.  See PR32143.  */
+static bool
+arc_decl_anon_ns_mem_p (const_tree decl)
+{
+  while (1)
+    {
+      if (decl == NULL_TREE || decl == error_mark_node)
+	return false;
+      if (TREE_CODE (decl) == NAMESPACE_DECL
+	  && DECL_NAME (decl) == NULL_TREE)
+	return true;
+      /* Classes and namespaces inside anonymous namespaces have
+	 TREE_PUBLIC == 0, so we can shortcut the search.  */
+      else if (TYPE_P (decl))
+	return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
+      else if (TREE_CODE (decl) == NAMESPACE_DECL)
+	return (TREE_PUBLIC (decl) == 0);
+      else
+	decl = DECL_CONTEXT (decl);
+    }
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
+   access DECL using %gp_rel(...)($gp).  */
+
+static bool
+arc_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+
+  if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+
+  /* We don't yet generate small-data references for -mabicalls.  See related
+     -G handling in override_options.  */
+  if (TARGET_NO_SDATA_SET)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+
+      /* Reject anything that isn't in a known small-data section.  */
+      name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
+	return false;
+
+      /* If a symbol is defined externally, the assembler will use the
+	 usual -G rules when deciding how to implement macros.  */
+      if (!DECL_EXTERNAL (decl))
+	  return true;
+    }
+  /* Only global variables go into sdata section for now.  */
+  else if (1)
+    {
+      /* Don't put constants into the small data section: we want them
+	 to be in ROM rather than RAM.  */
+      if (TREE_CODE (decl) != VAR_DECL)
+	return false;
+
+      if (TREE_READONLY (decl)
+	  && !TREE_SIDE_EFFECTS (decl)
+	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
+	return false;
+
+      /* TREE_PUBLIC might change after the first call, because of the patch
+	 for PR19238.  */
+      if (default_binds_local_p_1 (decl, 1)
+	  || arc_decl_anon_ns_mem_p (decl))
+	return false;
+
+      /* To ensure -mvolatile-cache works
+	 ld.di does not have a gp-relative variant.  */
+      if (TREE_THIS_VOLATILE (decl))
+	return false;
+    }
+
+  /* Disable sdata references to weak variables.  */
+  if (DECL_WEAK (decl))
+    return false;
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+/*   if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
+/*     return false; */
+
+  /* Allow only <=4B long data types into sdata.  */
+  return (size > 0 && size <= 4);
+}
+
+/* Return true if X is a small data address that can be rewritten
+   as a gp+symref.  */
+
+static bool
+arc_rewrite_small_data_p (rtx x)
+{
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	x = XEXP (x, 0);
+    }
+
+  return (GET_CODE (x) ==  SYMBOL_REF
+	  && SYMBOL_REF_SMALL_P(x));
+}
+
+/* A for_each_rtx callback, used by arc_rewrite_small_data.  */
+
+static int
+arc_rewrite_small_data_1 (rtx *loc, void *data)
+{
+  if (arc_rewrite_small_data_p (*loc))
+    {
+      rtx top;
+
+      gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM);
+      *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
+      if (loc == data)
+	return -1;
+      top = *(rtx*) data;
+      if (GET_CODE (top) == MEM && &XEXP (top, 0) == loc)
+	; /* OK.  */
+      else if (GET_CODE (top) == MEM
+	  && GET_CODE (XEXP (top, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (top, 0), 0)) == MULT)
+	*loc = force_reg (Pmode, *loc);
+      else
+	gcc_unreachable ();
+      return -1;
+    }
+
+  if (GET_CODE (*loc) == PLUS
+      && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
+    return -1;
+
+  return 0;
+}
+
+/* If possible, rewrite OP so that it refers to small data using
+   explicit relocations.  */
+
+rtx
+arc_rewrite_small_data (rtx op)
+{
+  op = copy_insn (op);
+  for_each_rtx (&op, arc_rewrite_small_data_1, &op);
+  return op;
+}
+
+/* A for_each_rtx callback for small_data_pattern.  */
+
+static int
+small_data_pattern_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*loc) == PLUS
+      && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
+    return  -1;
+
+  return arc_rewrite_small_data_p (*loc);
+}
+
+/* Return true if OP refers to small data symbols directly, not through
+   a PLUS.  */
+
+bool
+small_data_pattern (rtx op, enum machine_mode)
+{
+  return (GET_CODE (op) != SEQUENCE
+	  && for_each_rtx (&op, small_data_pattern_1, 0));
+}
+
+/* Return true if OP is an acceptable memory operand for ARCompact
+   16-bit gp-relative load instructions.
+   op shd look like : [r26, symref@sda]
+   i.e. (mem (plus (reg 26) (symref with smalldata flag set))
+  */
+/* volatile cache option still to be handled.  */
+
+bool
+compact_sda_memory_operand (rtx op, enum machine_mode mode)
+{
+  rtx addr;
+  int size;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate them.  */
+  if (size > UNITS_PER_WORD)
+    return false;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+
+  return LEGITIMATE_SMALL_DATA_ADDRESS_P  (addr);
+}
+
+/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
+
+void
+arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
+				   unsigned HOST_WIDE_INT size,
+				   unsigned HOST_WIDE_INT align,
+				   unsigned HOST_WIDE_INT globalize_p)
+{
+  int in_small_data =   arc_in_small_data_p (decl);
+
+  if (in_small_data)
+    switch_to_section (get_named_section (NULL, ".sbss", 0));
+  /*    named_section (0,".sbss",0); */
+  else
+    switch_to_section (bss_section);
+
+  if (globalize_p)
+    (*targetm.asm_out.globalize_label) (stream, name);
+
+  ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+  ASM_OUTPUT_LABEL (stream, name);
+
+  if (size != 0)
+    ASM_OUTPUT_SKIP (stream, size);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/* SIMD builtins support.  */
+enum simd_insn_args_type {
+  Va_Vb_Vc,
+  Va_Vb_rlimm,
+  Va_Vb_Ic,
+  Va_Vb_u6,
+  Va_Vb_u8,
+  Va_rlimm_u8,
+
+  Va_Vb,
+
+  void_rlimm,
+  void_u6,
+
+  Da_u3_rlimm,
+  Da_rlimm_rlimm,
+
+  Va_Ib_u8,
+  void_Va_Ib_u8,
+
+  Va_Vb_Ic_u8,
+  void_Va_u3_Ib_u8
+};
+
+struct builtin_description
+{
+  enum simd_insn_args_type args_type;
+  const enum insn_code     icode;
+  const char * const       name;
+  const enum arc_builtins  code;
+};
+
+static const struct builtin_description arc_simd_builtin_desc_list[] =
+{
+  /* VVV builtins go first.  */
+#define SIMD_BUILTIN(type, code, string, builtin) \
+  { type,CODE_FOR_##code, "__builtin_arc_" string, \
+    ARC_SIMD_BUILTIN_##builtin },
+
+  SIMD_BUILTIN (Va_Vb_Vc,    vaddaw_insn,   "vaddaw",     VADDAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vaddw_insn,    "vaddw",      VADDW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vavb_insn,     "vavb",       VAVB)
+  SIMD_BUILTIN (Va_Vb_Vc,     vavrb_insn,    "vavrb",      VAVRB)
+  SIMD_BUILTIN (Va_Vb_Vc,    vdifaw_insn,   "vdifaw",     VDIFAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vdifw_insn,    "vdifw",      VDIFW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmaxaw_insn,   "vmaxaw",     VMAXAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmaxw_insn,    "vmaxw",      VMAXW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vminaw_insn,   "vminaw",     VMINAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vminw_insn,    "vminw",      VMINW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmulaw_insn,   "vmulaw",     VMULAW)
+  SIMD_BUILTIN (Va_Vb_Vc,   vmulfaw_insn,  "vmulfaw",    VMULFAW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmulfw_insn,   "vmulfw",     VMULFW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmulw_insn,    "vmulw",      VMULW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vsubaw_insn,   "vsubaw",     VSUBAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vsubw_insn,    "vsubw",      VSUBW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vsummw_insn,   "vsummw",     VSUMMW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vand_insn,     "vand",       VAND)
+  SIMD_BUILTIN (Va_Vb_Vc,    vandaw_insn,   "vandaw",     VANDAW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vbic_insn,     "vbic",       VBIC)
+  SIMD_BUILTIN (Va_Vb_Vc,    vbicaw_insn,   "vbicaw",     VBICAW)
+  SIMD_BUILTIN (Va_Vb_Vc,       vor_insn,      "vor",        VOR)
+  SIMD_BUILTIN (Va_Vb_Vc,      vxor_insn,     "vxor",       VXOR)
+  SIMD_BUILTIN (Va_Vb_Vc,    vxoraw_insn,   "vxoraw",     VXORAW)
+  SIMD_BUILTIN (Va_Vb_Vc,      veqw_insn,     "veqw",       VEQW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vlew_insn,     "vlew",       VLEW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vltw_insn,     "vltw",       VLTW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vnew_insn,     "vnew",       VNEW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr1aw_insn,   "vmr1aw",     VMR1AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr1w_insn,    "vmr1w",      VMR1W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr2aw_insn,   "vmr2aw",     VMR2AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr2w_insn,    "vmr2w",      VMR2W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr3aw_insn,   "vmr3aw",     VMR3AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr3w_insn,    "vmr3w",      VMR3W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr4aw_insn,   "vmr4aw",     VMR4AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr4w_insn,    "vmr4w",      VMR4W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr5aw_insn,   "vmr5aw",     VMR5AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr5w_insn,    "vmr5w",      VMR5W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr6aw_insn,   "vmr6aw",     VMR6AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr6w_insn,    "vmr6w",      VMR6W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr7aw_insn,   "vmr7aw",     VMR7AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr7w_insn,    "vmr7w",      VMR7W)
+  SIMD_BUILTIN (Va_Vb_Vc,      vmrb_insn,     "vmrb",       VMRB)
+  SIMD_BUILTIN (Va_Vb_Vc,    vh264f_insn,   "vh264f",     VH264F)
+  SIMD_BUILTIN (Va_Vb_Vc,   vh264ft_insn,  "vh264ft",    VH264FT)
+  SIMD_BUILTIN (Va_Vb_Vc,   vh264fw_insn,  "vh264fw",    VH264FW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vvc1f_insn,    "vvc1f",      VVC1F)
+  SIMD_BUILTIN (Va_Vb_Vc,    vvc1ft_insn,   "vvc1ft",     VVC1FT)
+
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbaddw_insn,   "vbaddw",     VBADDW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbmaxw_insn,   "vbmaxw",     VBMAXW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbminw_insn,   "vbminw",     VBMINW)
+  SIMD_BUILTIN (Va_Vb_rlimm,   vbmulaw_insn,  "vbmulaw",    VBMULAW)
+  SIMD_BUILTIN (Va_Vb_rlimm,   vbmulfw_insn,  "vbmulfw",    VBMULFW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbmulw_insn,   "vbmulw",     VBMULW)
+  SIMD_BUILTIN (Va_Vb_rlimm,   vbrsubw_insn,  "vbrsubw",    VBRSUBW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbsubw_insn,   "vbsubw",     VBSUBW)
+
+  /* Va, Vb, Ic instructions.  */
+  SIMD_BUILTIN (Va_Vb_Ic,        vasrw_insn,    "vasrw",      VASRW)
+  SIMD_BUILTIN (Va_Vb_Ic,         vsr8_insn,     "vsr8",       VSR8)
+  SIMD_BUILTIN (Va_Vb_Ic,       vsr8aw_insn,   "vsr8aw",     VSR8AW)
+
+  /* Va, Vb, u6 instructions.  */
+  SIMD_BUILTIN (Va_Vb_u6,      vasrrwi_insn,  "vasrrwi",    VASRRWi)
+  SIMD_BUILTIN (Va_Vb_u6,     vasrsrwi_insn, "vasrsrwi",   VASRSRWi)
+  SIMD_BUILTIN (Va_Vb_u6,       vasrwi_insn,   "vasrwi",     VASRWi)
+  SIMD_BUILTIN (Va_Vb_u6,     vasrpwbi_insn, "vasrpwbi",   VASRPWBi)
+  SIMD_BUILTIN (Va_Vb_u6,    vasrrpwbi_insn,"vasrrpwbi",  VASRRPWBi)
+  SIMD_BUILTIN (Va_Vb_u6,      vsr8awi_insn,  "vsr8awi",    VSR8AWi)
+  SIMD_BUILTIN (Va_Vb_u6,        vsr8i_insn,    "vsr8i",      VSR8i)
+
+  /* Va, Vb, u8 (simm) instructions.  */
+  SIMD_BUILTIN (Va_Vb_u8,        vmvaw_insn,    "vmvaw",      VMVAW)
+  SIMD_BUILTIN (Va_Vb_u8,         vmvw_insn,     "vmvw",       VMVW)
+  SIMD_BUILTIN (Va_Vb_u8,        vmvzw_insn,    "vmvzw",      VMVZW)
+  SIMD_BUILTIN (Va_Vb_u8,      vd6tapf_insn,  "vd6tapf",    VD6TAPF)
+
+  /* Va, rlimm, u8 (simm) instructions.  */
+  SIMD_BUILTIN (Va_rlimm_u8,    vmovaw_insn,   "vmovaw",     VMOVAW)
+  SIMD_BUILTIN (Va_rlimm_u8,     vmovw_insn,    "vmovw",      VMOVW)
+  SIMD_BUILTIN (Va_rlimm_u8,    vmovzw_insn,   "vmovzw",     VMOVZW)
+
+  /* Va, Vb instructions.  */
+  SIMD_BUILTIN (Va_Vb,          vabsaw_insn,   "vabsaw",     VABSAW)
+  SIMD_BUILTIN (Va_Vb,           vabsw_insn,    "vabsw",      VABSW)
+  SIMD_BUILTIN (Va_Vb,         vaddsuw_insn,  "vaddsuw",    VADDSUW)
+  SIMD_BUILTIN (Va_Vb,          vsignw_insn,   "vsignw",     VSIGNW)
+  SIMD_BUILTIN (Va_Vb,          vexch1_insn,   "vexch1",     VEXCH1)
+  SIMD_BUILTIN (Va_Vb,          vexch2_insn,   "vexch2",     VEXCH2)
+  SIMD_BUILTIN (Va_Vb,          vexch4_insn,   "vexch4",     VEXCH4)
+  SIMD_BUILTIN (Va_Vb,          vupbaw_insn,   "vupbaw",     VUPBAW)
+  SIMD_BUILTIN (Va_Vb,           vupbw_insn,    "vupbw",      VUPBW)
+  SIMD_BUILTIN (Va_Vb,         vupsbaw_insn,  "vupsbaw",    VUPSBAW)
+  SIMD_BUILTIN (Va_Vb,          vupsbw_insn,   "vupsbw",     VUPSBW)
+
+  /* DIb, rlimm, rlimm instructions.  */
+  SIMD_BUILTIN (Da_rlimm_rlimm,  vdirun_insn,  "vdirun",     VDIRUN)
+  SIMD_BUILTIN (Da_rlimm_rlimm,  vdorun_insn,  "vdorun",     VDORUN)
+
+  /* DIb, limm, rlimm instructions.  */
+  SIMD_BUILTIN (Da_u3_rlimm,   vdiwr_insn,    "vdiwr",      VDIWR)
+  SIMD_BUILTIN (Da_u3_rlimm,    vdowr_insn,    "vdowr",     VDOWR)
+
+  /* rlimm instructions.  */
+  SIMD_BUILTIN (void_rlimm,        vrec_insn,     "vrec",      VREC)
+  SIMD_BUILTIN (void_rlimm,        vrun_insn,     "vrun",      VRUN)
+  SIMD_BUILTIN (void_rlimm,     vrecrun_insn,  "vrecrun",   VRECRUN)
+  SIMD_BUILTIN (void_rlimm,     vendrec_insn,  "vendrec",   VENDREC)
+
+  /* Va, [Ib,u8] instructions.  */
+  SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wh_insn,  "vld32wh",   VLD32WH)
+  SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wl_insn,  "vld32wl",   VLD32WL)
+  SIMD_BUILTIN (Va_Vb_Ic_u8,         vld64_insn,    "vld64",     VLD64)
+  SIMD_BUILTIN (Va_Vb_Ic_u8,         vld32_insn,    "vld32",     VLD32)
+
+  SIMD_BUILTIN (Va_Ib_u8,           vld64w_insn,   "vld64w",   VLD64W)
+  SIMD_BUILTIN (Va_Ib_u8,           vld128_insn,   "vld128",   VLD128)
+  SIMD_BUILTIN (void_Va_Ib_u8,      vst128_insn,   "vst128",   VST128)
+  SIMD_BUILTIN (void_Va_Ib_u8,       vst64_insn,    "vst64",    VST64)
+
+  /* Va, [Ib, u8] instructions.  */
+  SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst16_n_insn,  "vst16_n",   VST16_N)
+  SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst32_n_insn,  "vst32_n",   VST32_N)
+
+  SIMD_BUILTIN (void_u6,  vinti_insn,  "vinti",   VINTI)
+};
+
+static void
+arc_init_simd_builtins (void)
+{
+  int i;
+  tree endlink = void_list_node;
+  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
+
+  tree v8hi_ftype_v8hi_v8hi
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, V8HI_type_node,
+						 endlink)));
+  tree v8hi_ftype_v8hi_int
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+
+  tree v8hi_ftype_v8hi_int_int
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+
+  tree void_ftype_v8hi_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+
+  tree void_ftype_v8hi_int_int_int
+    = (build_function_type
+	(void_type_node,
+	 tree_cons (NULL_TREE, V8HI_type_node,
+		    tree_cons (NULL_TREE, integer_type_node,
+			       tree_cons (NULL_TREE, integer_type_node,
+					  tree_cons (NULL_TREE,
+						     integer_type_node,
+						     endlink))))));
+
+  tree v8hi_ftype_int_int
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+
+  tree void_ftype_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+
+  tree void_ftype_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+
+  tree v8hi_ftype_v8hi
+    = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node,
+						      endlink));
+
+  /* These asserts have been introduced to ensure that the order of builtins
+     does not get messed up, else the initialization goes wrong.  */
+  gcc_assert (arc_simd_builtin_desc_list [0].args_type == Va_Vb_Vc);
+  for (i=0; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Vc; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi_v8hi, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,
+		  void_ftype_int_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  void_ftype_int_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  void_ftype_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,
+		  void_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  void_ftype_v8hi_int_int_int,
+		  arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_u6);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_u6; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
+		  void_ftype_int, arc_simd_builtin_desc_list[i].code);
+
+  gcc_assert(i == ARRAY_SIZE (arc_simd_builtin_desc_list));
+}
+
+/* Helper function of arc_expand_builtin; has the same parameters,
+   except that EXP is now known to be a call to a simd builtin.  */
+
+static rtx
+arc_expand_simd_builtin (tree exp,
+			 rtx target,
+			 rtx subtarget ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED,
+			 int ignore ATTRIBUTE_UNUSED)
+{
+  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree              arg0;
+  tree              arg1;
+  tree              arg2;
+  tree              arg3;
+  rtx               op0;
+  rtx               op1;
+  rtx               op2;
+  rtx               op3;
+  rtx               op4;
+  rtx pat;
+  unsigned int         i;
+  int               fcode = DECL_FUNCTION_CODE (fndecl);
+  int               icode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+  enum machine_mode mode2;
+  enum machine_mode mode3;
+  enum machine_mode mode4;
+  const struct builtin_description * d;
+
+  for (i = 0, d = arc_simd_builtin_desc_list;
+       i < ARRAY_SIZE (arc_simd_builtin_desc_list); i++, d++)
+    if (d->code == (const enum arc_builtins) fcode)
+      break;
+
+  /* We must get an entry here.  */
+  gcc_assert (i < ARRAY_SIZE (arc_simd_builtin_desc_list));
+
+  switch (d->args_type)
+    {
+    case Va_Vb_rlimm:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
+
+      target = gen_reg_rtx (V8HImode);
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	  op1 = copy_to_mode_reg (mode1, op1);
+
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case Va_Vb_u6:
+    case Va_Vb_u8:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
+
+      target = gen_reg_rtx (V8HImode);
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)
+	  ||  (d->args_type == Va_Vb_u6 && !UNSIGNED_INT6 (INTVAL (op1)))
+	  ||  (d->args_type == Va_Vb_u8 && !UNSIGNED_INT8 (INTVAL (op1))))
+	error ("operand 2 of %s instruction should be an unsigned %d-bit value",
+	       d->name,
+	       (d->args_type == Va_Vb_u6)? 6: 8);
+
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case Va_rlimm_u8:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
+
+      target = gen_reg_rtx (V8HImode);
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	   || !(UNSIGNED_INT8 (INTVAL (op1))))
+	error ("operand 2 of %s instruction should be an unsigned 8-bit value",
+	       d->name);
+
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case Va_Vb_Ic:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
+      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
+
+      target = gen_reg_rtx (V8HImode);
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	   || !(UNSIGNED_INT3 (INTVAL (op1))))
+	error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)",
+	       d->name);
+
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case Va_Vb_Vc:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, V8HImode, EXPAND_NORMAL);
+
+      target = gen_reg_rtx (V8HImode);
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case Va_Vb:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
+
+      target = gen_reg_rtx (V8HImode);
+      mode0 =  insn_data[icode].operand[1].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (target, op0);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case Da_rlimm_rlimm:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
+
+
+      if (icode == CODE_FOR_vdirun_insn)
+	target = gen_rtx_REG (SImode, 131);
+      else if (icode == CODE_FOR_vdorun_insn)
+	target = gen_rtx_REG (SImode, 139);
+      else
+	  gcc_unreachable ();
+
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case Da_u3_rlimm:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
+
+
+      if (! (GET_CODE (op0) == CONST_INT)
+	  || !(UNSIGNED_INT3 (INTVAL (op0))))
+	error ("operand 1 of %s instruction should be an unsigned 3-bit value (DR0-DR7)",
+	       d->name);
+
+      mode1 =  insn_data[icode].operand[1].mode;
+
+      if (icode == CODE_FOR_vdiwr_insn)
+	target = gen_rtx_REG (SImode,
+			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
+      else if (icode == CODE_FOR_vdowr_insn)
+	target = gen_rtx_REG (SImode,
+			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
+      else
+	gcc_unreachable ();
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      pat = GEN_FCN (icode) (target, op1);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case void_u6:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+
+      fold (arg0);
+
+      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      /* op0 should be u6.  */
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)
+	  || !(UNSIGNED_INT6 (INTVAL (op0))))
+	error ("operand of %s instruction should be an unsigned 6-bit value",
+	       d->name);
+
+      pat = GEN_FCN (icode) (op0);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case void_rlimm:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+
+      fold (arg0);
+
+      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (op0);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case Va_Vb_Ic_u8:
+      {
+	rtx src_vreg;
+	icode = d->icode;
+	arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
+	arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
+	arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
+
+	src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
+	op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);  /* [I]0-7 */
+	op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);  /* u8 */
+	op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);	    /* VR0 */
+
+	/* target <- src vreg */
+	emit_insn (gen_move_insn (target, src_vreg));
+
+	/* target <- vec_concat: target, mem(Ib, u8) */
+	mode0 =  insn_data[icode].operand[3].mode;
+	mode1 =  insn_data[icode].operand[1].mode;
+
+	if ( (!(*insn_data[icode].operand[3].predicate) (op0, mode0))
+	     || !(UNSIGNED_INT3 (INTVAL (op0))))
+	  error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)",
+		 d->name);
+
+	if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
+	     || !(UNSIGNED_INT8 (INTVAL (op1))))
+	  error ("operand 2 of %s instruction should be an unsigned 8-bit value",
+		 d->name);
+
+	pat = GEN_FCN (icode) (target, op1, op2, op0);
+	if (! pat)
+	  return 0;
+
+	emit_insn (pat);
+	return target;
+      }
+
+    case void_Va_Ib_u8:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg */
+      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
+      arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
+
+      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);         /* VR0    */
+      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);   /* I[0-7] */
+      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);   /* u8     */
+      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); /* Vdest  */
+
+      mode0 =  insn_data[icode].operand[0].mode;
+      mode1 =  insn_data[icode].operand[1].mode;
+      mode2 =  insn_data[icode].operand[2].mode;
+      mode3 =  insn_data[icode].operand[3].mode;
+
+      if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
+	   || !(UNSIGNED_INT3 (INTVAL (op1))))
+	error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)",
+	       d->name);
+
+      if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
+	   || !(UNSIGNED_INT8 (INTVAL (op2))))
+	error ("operand 3 of %s instruction should be an unsigned 8-bit value",
+	       d->name);
+
+      if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
+	op3 = copy_to_mode_reg (mode3, op3);
+
+      pat = GEN_FCN (icode) (op0, op1, op2, op3);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case Va_Ib_u8:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg */
+      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
+
+      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);       /* VR0    */
+      op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */
+      op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* u8     */
+
+      /* target <- src vreg */
+      target = gen_reg_rtx (V8HImode);
+
+      /* target <- vec_concat: target, mem(Ib, u8) */
+      mode0 =  insn_data[icode].operand[1].mode;
+      mode1 =  insn_data[icode].operand[2].mode;
+      mode2 =  insn_data[icode].operand[3].mode;
+
+      if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	   || !(UNSIGNED_INT3 (INTVAL (op1))))
+	error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)",
+	       d->name);
+
+      if ( (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
+	   || !(UNSIGNED_INT8 (INTVAL (op2))))
+	error ("operand 2 of %s instruction should be an unsigned 8-bit value",
+	       d->name);
+
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return target;
+
+    case void_Va_u3_Ib_u8:
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
+      arg1 = CALL_EXPR_ARG (exp, 1); /* u3 */
+      arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7 */
+      arg3 = CALL_EXPR_ARG (exp, 3); /* u8 */
+
+      op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL); /* u8        */
+      op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);       /* VR        */
+      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7    */
+      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);/* vreg to be stored */
+      op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);  /* vreg 0-7 subreg no. */
+
+      mode0 =  insn_data[icode].operand[0].mode;
+      mode2 =  insn_data[icode].operand[2].mode;
+      mode3 =  insn_data[icode].operand[3].mode;
+      mode4 =  insn_data[icode].operand[4].mode;
+
+      /* Do some correctness checks for the operands.  */
+      if ( (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
+	   || !(UNSIGNED_INT8 (INTVAL (op0))))
+	error ("operand 4 of %s instruction should be an unsigned 8-bit value (0-255)",
+	       d->name);
+
+      if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
+	   || !(UNSIGNED_INT3 (INTVAL (op2))))
+	error ("operand 3 of %s instruction should be an unsigned 3-bit value (I0-I7)",
+	       d->name);
+
+      if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
+	op3 = copy_to_mode_reg (mode3, op3);
+
+      if ( (!(*insn_data[icode].operand[4].predicate) (op4, mode4))
+	   || !(UNSIGNED_INT3 (INTVAL (op4))))
+	error ("operand 2 of %s instruction should be an unsigned 3-bit value (subreg 0-7)",
+	       d->name);
+      else if (icode == CODE_FOR_vst32_n_insn
+	       && ((INTVAL(op4) % 2 ) != 0))
+	error ("operand 2 of %s instruction should be an even 3-bit value (subreg 0,2,4,6)",
+	       d->name);
+
+      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+      if (! pat)
+	return 0;
+
+      emit_insn (pat);
+      return NULL_RTX;
+
+    default:
+      gcc_unreachable ();
+    }
+  return NULL_RTX;
+}
+
+static bool
+arc_preserve_reload_p (rtx in)
+{
+  return (GET_CODE (in) == PLUS
+	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
+	  && CONST_INT_P (XEXP (in, 1))
+	  && !((INTVAL (XEXP (in, 1)) & 511)));
+}
+
+int
+arc_register_move_cost (enum machine_mode,
+			enum reg_class from_class, enum reg_class to_class)
+{
+  /* The ARC600 has no bypass for extension registers, hence a nop might be
+     needed to be inserted after a write so that reads are safe.  */
+  if (TARGET_ARC600)
+    {
+      if (to_class == MPY_WRITABLE_CORE_REGS)
+	return 3;
+     /* Instructions modifying LP_COUNT need 4 additional cycles before
+	the register will actually contain the value.  */
+      else if (to_class == LPCOUNT_REG)
+	return 6;
+      else if (to_class == WRITABLE_CORE_REGS)
+	return 6;
+    }
+
+  /* The ARC700 stalls for 3 cycles when *reading* from lp_count.  */
+  if (TARGET_ARC700
+      && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
+	  || from_class == WRITABLE_CORE_REGS))
+    return 8;
+
+  /* Force an attempt to 'mov Dy,Dx' to spill.  */
+  if (TARGET_ARC700 && TARGET_DPFP
+      && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
+    return 100;
+
+  return 2;
+}
+
+/* Emit code for an addsi3 instruction with OPERANDS.
+   COND_P indicates if this will use conditional execution.
+   Return the length of the instruction.
+   If OUTPUT_P is false, don't actually output the instruction, just return
+   its length.  */
+int
+arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
+{
+  char format[32];
+
+  int match = operands_match_p (operands[0], operands[1]);
+  int match2 = operands_match_p (operands[0], operands[2]);
+  int intval = (REG_P (operands[2]) ? 1
+		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
+  int neg_intval = -intval;
+  int short_0 = satisfies_constraint_Rcq (operands[0]);
+  int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
+  int ret = 0;
+
+#define ADDSI_OUTPUT1(FORMAT) do {\
+  if (output_p) \
+    output_asm_insn (FORMAT, operands);\
+  return ret; \
+} while (0)
+#define ADDSI_OUTPUT(LIST) do {\
+  if (output_p) \
+    sprintf LIST;\
+  ADDSI_OUTPUT1 (format);\
+  return ret; \
+} while (0)
+
+  /* First try to emit a 16 bit insn.  */
+  ret = 2;
+  if (!cond_p
+      /* If we are actually about to output this insn, don't try a 16 bit
+	 variant if we already decided that we don't want that
+	 (I.e. we upsized this insn to align some following insn.)
+	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
+	 but add1 r0,sp,35 doesn't.  */
+      && (!output_p || (get_attr_length (current_output_insn) & 2)))
+    {
+      if (short_p
+	  && (REG_P (operands[2])
+	      ? (match || satisfies_constraint_Rcq (operands[2]))
+	      : (unsigned) intval <= (match ? 127 : 7)))
+	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
+      if (short_0 && REG_P (operands[1]) && match2)
+	ADDSI_OUTPUT1 ("add%? %0,%2,%1");
+      if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
+	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
+	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
+
+      if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
+	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
+	      && match && !(neg_intval & ~124)))
+	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
+    }
+
+  /* Now try to emit a 32 bit insn without long immediate.  */
+  ret = 4;
+  if (!match && match2 && REG_P (operands[1]))
+    ADDSI_OUTPUT1 ("add%? %0,%2,%1");
+  if (match || !cond_p)
+    {
+      int limit = (match && !cond_p) ? 0x7ff : 0x3f;
+      int range_factor = neg_intval & intval;
+      int shift;
+
+      if (intval == -1 << 31)
+	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
+
+      /* If we can use a straight add / sub instead of a {add,sub}[123] of
+	 same size, do, so - the insn latency is lower.  */
+      /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
+	 0x800 is not.  */
+      if ((intval >= 0 && intval <= limit)
+	       || (intval == -0x800 && limit == 0x7ff))
+	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
+      else if ((intval < 0 && neg_intval <= limit)
+	       || (intval == 0x800 && limit == 0x7ff))
+	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
+      shift = range_factor >= 8 ? 3 : (range_factor >> 1);
+      gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
+      gcc_assert ((((1 << shift) - 1) & intval) == 0);
+      if (((intval < 0 && intval != -0x4000)
+	   /* sub[123] is slower than add_s / sub, only use it if it
+	      avoids a long immediate.  */
+	   && neg_intval <= limit << shift)
+	  || (intval == 0x4000 && limit == 0x7ff))
+	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
+		       shift, neg_intval >> shift));
+      else if ((intval >= 0 && intval <= limit << shift)
+	       || (intval == -0x4000 && limit == 0x7ff))
+	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
+    }
+  /* Try to emit a 16 bit opcode with long immediate.  */
+  ret = 6;
+  if (short_p && match)
+    ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
+
+  /* We have to use a 32 bit opcode, and with a long immediate.  */
+  ret = 8;
+  ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
+}
+
+/* Emit code for an commutative_cond_exec instruction with OPERANDS.
+   Return the length of the instruction.
+   If OUTPUT_P is false, don't actually output the instruction, just return
+   its length.  */
+int
+arc_output_commutative_cond_exec (rtx *operands, bool output_p)
+{
+  enum rtx_code commutative_op = GET_CODE (operands[3]);
+  const char *pat = NULL;
+
+  /* Canonical rtl should not have a constant in the first operand position.  */
+  gcc_assert (!CONSTANT_P (operands[1]));
+
+  switch (commutative_op)
+    {
+      case AND:
+	if (satisfies_constraint_C1p (operands[2]))
+	  pat = "bmsk%? %0,%1,%Z2";
+	else if (satisfies_constraint_Ccp (operands[2]))
+	  pat = "bclr%? %0,%1,%M2";
+	else if (satisfies_constraint_CnL (operands[2]))
+	  pat = "bic%? %0,%1,%n2-1";
+	break;
+      case IOR:
+	if (satisfies_constraint_C0p (operands[2]))
+	  pat = "bset%? %0,%1,%z2";
+	break;
+      case XOR:
+	if (satisfies_constraint_C0p (operands[2]))
+	  pat = "bxor%? %0,%1,%z2";
+	break;
+      case PLUS:
+	return arc_output_addsi (operands, true, output_p);
+      default: break;
+    }
+  if (output_p)
+    output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
+  if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
+    return 4;
+  return 8;
+}
+
+/* Helper function of arc_expand_movmem.  ADDR points to a chunk of memory.
+   Emit code and return an potentially modified address such that offsets
+   up to SIZE are can be added to yield a legitimate address.
+   if REUSE is set, ADDR is a register that may be modified.  */
+
+static rtx
+force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
+{
+  rtx base = addr;
+  rtx offs = const0_rtx;
+
+  if (GET_CODE (base) == PLUS)
+    {
+      offs = XEXP (base, 1);
+      base = XEXP (base, 0);
+    }
+  if (!REG_P (base)
+      || (REGNO (base) != STACK_POINTER_REGNUM
+	  && REGNO_PTR_FRAME_P (REGNO (addr)))
+      || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
+      || !SMALL_INT (INTVAL (offs) + size))
+    {
+      if (reuse)
+	emit_insn (gen_add2_insn (addr, offs));
+      else
+	addr = copy_to_mode_reg (Pmode, addr);
+    }
+  return addr;
+}
+
+/* Like move_by_pieces, but take account of load latency,
+   and actual offset ranges.
+   Return true on success.  */
+
+bool
+arc_expand_movmem (rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx dst_addr, src_addr;
+  HOST_WIDE_INT size;
+  int align = INTVAL (operands[3]);
+  unsigned n_pieces;
+  int piece = align;
+  rtx store[2];
+  rtx tmpx[2];
+  int i;
+
+  if (!CONST_INT_P (operands[2]))
+    return false;
+  size = INTVAL (operands[2]);
+  /* move_by_pieces_ninsns is static, so we can't use it.  */
+  if (align >= 4)
+    n_pieces = (size + 2) / 4U + (size & 1);
+  else if (align == 2)
+    n_pieces = (size + 1) / 2U;
+  else
+    n_pieces = size;
+  if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
+    return false;
+  if (piece > 4)
+    piece = 4;
+  dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
+  src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
+  store[0] = store[1] = NULL_RTX;
+  tmpx[0] = tmpx[1] = NULL_RTX;
+  for (i = 0; size > 0; i ^= 1, size -= piece)
+    {
+      rtx tmp;
+      enum machine_mode mode;
+
+      if (piece > size)
+	piece = size & -size;
+      mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
+      /* If we don't re-use temporaries, the scheduler gets carried away,
+	 and the register pressure gets unnecessarily high.  */
+      if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
+	tmp = tmpx[i];
+      else
+	tmpx[i] = tmp = gen_reg_rtx (mode);
+      dst_addr = force_offsettable (dst_addr, piece, 1);
+      src_addr = force_offsettable (src_addr, piece, 1);
+      if (store[i])
+	emit_insn (store[i]);
+      emit_move_insn (tmp, change_address (src, mode, src_addr));
+      store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
+      dst_addr = plus_constant (Pmode, dst_addr, piece);
+      src_addr = plus_constant (Pmode, src_addr, piece);
+    }
+  if (store[i])
+    emit_insn (store[i]);
+  if (store[i^1])
+    emit_insn (store[i^1]);
+  return true;
+}
+
+/* Prepare operands for move in MODE.  Return true iff the move has
+   been emitted.  */
+
+bool
+prepare_move_operands (rtx *operands, enum machine_mode mode)
+{
+  /* We used to do this only for MODE_INT Modes, but addresses to floating
+     point variables may well be in the small data section.  */
+  if (1)
+    {
+      if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
+	operands[0] = arc_rewrite_small_data (operands[0]);
+      else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1]))
+	{
+	  emit_pic_move (operands, SImode);
+
+	  /* Disable any REG_EQUALs associated with the symref
+	     otherwise the optimization pass undoes the work done
+	     here and references the variable directly.  */
+	}
+      else if (GET_CODE (operands[0]) != MEM
+	       && !TARGET_NO_SDATA_SET
+	       && small_data_pattern (operands[1], Pmode))
+       {
+	  /* This is to take care of address calculations involving sdata
+	     variables.  */
+	  operands[1] = arc_rewrite_small_data (operands[1]);
+
+	  emit_insn (gen_rtx_SET (mode, operands[0],operands[1]));
+	  /* ??? This note is useless, since it only restates the set itself.
+	     We should rather use the original SYMBOL_REF.  However, there is
+	     the problem that we are lying to the compiler about these
+	     SYMBOL_REFs to start with.  symbol@sda should be encoded specially
+	     so that we can tell it apart from an actual symbol.  */
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
+
+	  /* Take care of the REG_EQUAL note that will be attached to mark the
+	     output reg equal to the initial symbol_ref after this code is
+	     executed.  */
+	  emit_move_insn (operands[0], operands[0]);
+	  return true;
+	}
+    }
+
+  if (MEM_P (operands[0])
+      && !(reload_in_progress || reload_completed))
+    {
+      operands[1] = force_reg (mode, operands[1]);
+      if (!move_dest_operand (operands[0], mode))
+	{
+	  rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
+	     except that we can't use that function because it is static.  */
+	  rtx pat = change_address (operands[0], mode, addr);
+	  MEM_COPY_ATTRIBUTES (pat, operands[0]);
+	  operands[0] = pat;
+	}
+      if (!cse_not_expected)
+	{
+	  rtx pat = XEXP (operands[0], 0);
+
+	  pat = arc_legitimize_address_0 (pat, pat, mode);
+	  if (pat)
+	    {
+	      pat = change_address (operands[0], mode, pat);
+	      MEM_COPY_ATTRIBUTES (pat, operands[0]);
+	      operands[0] = pat;
+	    }
+	}
+    }
+
+  if (MEM_P (operands[1]) && !cse_not_expected)
+    {
+      rtx pat = XEXP (operands[1], 0);
+
+      pat = arc_legitimize_address_0 (pat, pat, mode);
+      if (pat)
+	{
+	  pat = change_address (operands[1], mode, pat);
+	  MEM_COPY_ATTRIBUTES (pat, operands[1]);
+	  operands[1] = pat;
+	}
+    }
+
+  return false;
+}
+
+/* Prepare OPERANDS for an extension using CODE to OMODE.
+   Return true iff the move has been emitted.  */
+
+bool
+prepare_extend_operands (rtx *operands, enum rtx_code code,
+			 enum machine_mode omode)
+{
+  if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
+    {
+      /* This is to take care of address calculations involving sdata
+	 variables.  */
+      operands[1]
+	= gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
+      emit_insn (gen_rtx_SET (omode, operands[0], operands[1]));
+      set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
+
+      /* Take care of the REG_EQUAL note that will be attached to mark the
+	 output reg equal to the initial extension after this code is
+	 executed.  */
+      emit_move_insn (operands[0], operands[0]);
+      return true;
+    }
+  return false;
+}
+
+/* Output a library call to a function called FNAME that has been arranged
+   to be local to any dso.  */
+
+const char *
+arc_output_libcall (const char *fname)
+{
+  unsigned len = strlen (fname);
+  static char buf[64];
+
+  gcc_assert (len < sizeof buf - 35);
+  if (TARGET_LONG_CALLS_SET
+     || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
+    {
+      if (flag_pic)
+	sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname);
+      else
+	sprintf (buf, "jl%%! @%s", fname);
+    }
+  else
+    sprintf (buf, "bl%%!%%* @%s", fname);
+  return buf;
+}
+
+/* Return the SImode highpart of the DImode value IN.  */
+
+rtx
+disi_highpart (rtx in)
+{
+  return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
+}
+
+/* Called by arc600_corereg_hazard via for_each_rtx.
+   If a hazard is found, return a conservative estimate of the required
+   length adjustment to accomodate a nop.  */
+
+static int
+arc600_corereg_hazard_1 (rtx *xp, void *data)
+{
+  rtx x = *xp;
+  rtx dest;
+  rtx pat = (rtx) data;
+
+  switch (GET_CODE (x))
+    {
+    case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
+      break;
+    default:
+    /* This is also fine for PRE/POST_MODIFY, because they contain a SET.  */
+      return 0;
+    }
+  dest = XEXP (x, 0);
+  /* Check if this sets a an extension register.  N.B. we use 61 for the
+     condition codes, which is definitely not an extension register.  */
+  if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
+      /* Check if the same register is used by the PAT.  */
+      && (refers_to_regno_p
+	   (REGNO (dest),
+	   REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, pat, 0)))
+    return 4;
+
+  return 0;
+}
+
+/* Return length adjustment for INSN.
+   For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+
+static int
+arc600_corereg_hazard (rtx pred, rtx succ)
+{
+  if (!TARGET_ARC600)
+    return 0;
+  /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
+     in front of SUCC anyway, so there will be separation between PRED and
+     SUCC.  */
+  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
+      && LABEL_P (prev_nonnote_insn (succ)))
+    return 0;
+  if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
+    return 0;
+  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
+    pred = XVECEXP (PATTERN (pred), 0, 1);
+  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
+    succ = XVECEXP (PATTERN (succ), 0, 0);
+  if (recog_memoized (pred) == CODE_FOR_mulsi_600
+      || recog_memoized (pred) == CODE_FOR_umul_600
+      || recog_memoized (pred) == CODE_FOR_mac_600
+      || recog_memoized (pred) == CODE_FOR_mul64_600
+      || recog_memoized (pred) == CODE_FOR_mac64_600
+      || recog_memoized (pred) == CODE_FOR_umul64_600
+      || recog_memoized (pred) == CODE_FOR_umac64_600)
+    return 0;
+  return for_each_rtx (&PATTERN (pred), arc600_corereg_hazard_1,
+		       PATTERN (succ));
+}
+
+/* For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+
+int
+arc_hazard (rtx pred, rtx succ)
+{
+  if (!TARGET_ARC600)
+    return 0;
+  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
+    return 0;
+  /* We might have a CALL to a non-returning function before a loop end.
+     ??? Although the manual says that's OK (the target is outside the loop,
+     and the loop counter unused there), the assembler barfs on this, so we
+     must instert a nop before such a call too.  */
+  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
+      && (JUMP_P (pred) || CALL_P (pred)
+	  || GET_CODE (PATTERN (pred)) == SEQUENCE))
+    return 4;
+  return arc600_corereg_hazard (pred, succ);
+}
+
+/* Return length adjustment for INSN.  */
+
+int
+arc_adjust_insn_length (rtx insn, int len, bool)
+{
+  if (!INSN_P (insn))
+    return len;
+  /* We already handle sequences by ignoring the delay sequence flag.  */
+  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+    return len;
+
+  /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
+     the ZOL mechanism only triggers when advancing to the end address,
+     so if there's a label at the end of a ZOL, we need to insert a nop.
+     The ARC600 ZOL also has extra restrictions on jumps at the end of a
+     loop.  */
+  if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
+    {
+      rtx prev = prev_nonnote_insn (insn);
+
+      return ((LABEL_P (prev)
+	       || (TARGET_ARC600
+		   && (JUMP_P (prev)
+		       || CALL_P (prev) /* Could be a noreturn call.  */
+		       || (NONJUMP_INSN_P (prev)
+			   && GET_CODE (PATTERN (prev)) == SEQUENCE))))
+	      ? len + 4 : len);
+    }
+
+  /* Check for return with but one preceding insn since function
+     start / call.  */
+  if (TARGET_PAD_RETURN
+      && JUMP_P (insn)
+      && GET_CODE (PATTERN (insn)) != ADDR_VEC
+      && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+      && get_attr_type (insn) == TYPE_RETURN)
+    {
+      rtx prev = prev_active_insn (insn);
+
+      if (!prev || !(prev = prev_active_insn (prev))
+	  || ((NONJUMP_INSN_P (prev)
+	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
+	      ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL)
+	      : CALL_ATTR (prev, NON_SIBCALL)))
+	return len + 4;
+    }
+  if (TARGET_ARC600)
+    {
+      rtx succ = next_real_insn (insn);
+
+      /* One the ARC600, a write to an extension register must be separated
+	 from a read.  */
+      if (succ && INSN_P (succ))
+	len += arc600_corereg_hazard (insn, succ);
+    }
+
+  /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
+     can go awry.  */
+  extract_constrain_insn_cached (insn);
+
+  return len;
+}
+
+/* Values for length_sensitive.  */
+enum
+{
+  ARC_LS_NONE,// Jcc
+  ARC_LS_25, // 25 bit offset, B
+  ARC_LS_21, // 21 bit offset, Bcc
+  ARC_LS_U13,// 13 bit unsigned offset, LP
+  ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
+  ARC_LS_9,  //  9 bit offset, BRcc
+  ARC_LS_8,  //  8 bit offset, BRcc_s
+  ARC_LS_U7, //  7 bit unsigned offset, LPcc
+  ARC_LS_7   //  7 bit offset, Bcc_s
+};
+
+/* While the infrastructure patch is waiting for review, duplicate the
+   struct definitions, to allow this file to compile.  */
+#if 1
+typedef struct
+{
+  unsigned align_set;
+  /* Cost as a branch / call target or call return address.  */
+  int target_cost;
+  int fallthrough_cost;
+  int branch_cost;
+  int length;
+  /* 0 for not length sensitive, 1 for largest offset range,
+ *      2 for next smaller etc.  */
+  unsigned length_sensitive : 8;
+  bool enabled;
+} insn_length_variant_t;
+
+typedef struct insn_length_parameters_s
+{
+  int align_unit_log;
+  int align_base_log;
+  int max_variants;
+  int (*get_variants) (rtx, int, bool, bool, insn_length_variant_t *);
+} insn_length_parameters_t;
+
+static void
+arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
+#endif
+
+static int
+arc_get_insn_variants (rtx insn, int len, bool, bool target_p,
+		       insn_length_variant_t *ilv)
+{
+  if (!NONDEBUG_INSN_P (insn))
+    return 0;
+  enum attr_type type;
+  /* shorten_branches doesn't take optimize_size into account yet for the
+     get_variants mechanism, so turn this off for now.  */
+  if (optimize_size)
+    return 0;
+  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+    {
+      /* The interaction of a short delay slot insn with a short branch is
+	 too weird for shorten_branches to piece together, so describe the
+	 entire SEQUENCE.  */
+      rtx pat, inner;
+      if (TARGET_UPSIZE_DBR
+	  && get_attr_length (XVECEXP ((pat = PATTERN (insn)), 0, 1)) <= 2
+	  && (((type = get_attr_type (inner = XVECEXP (pat, 0, 0)))
+	       == TYPE_UNCOND_BRANCH)
+	      || type == TYPE_BRANCH)
+	  && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
+	{
+	  int n_variants
+	    = arc_get_insn_variants (inner, get_attr_length (inner), true,
+				     target_p, ilv+1);
+	  /* The short variant gets split into a higher-cost aligned
+	     and a lower cost unaligned variant.  */
+	  gcc_assert (n_variants);
+	  gcc_assert (ilv[1].length_sensitive == ARC_LS_7
+		      || ilv[1].length_sensitive == ARC_LS_10);
+	  gcc_assert (ilv[1].align_set == 3);
+	  ilv[0] = ilv[1];
+	  ilv[0].align_set = 1;
+	  ilv[0].branch_cost += 1;
+	  ilv[1].align_set = 2;
+	  n_variants++;
+	  for (int i = 0; i < n_variants; i++)
+	    ilv[i].length += 2;
+	  /* In case an instruction with aligned size is wanted, and
+	     the short variants are unavailable / too expensive, add
+	     versions of long branch + long delay slot.  */
+	  for (int i = 2, end = n_variants; i < end; i++, n_variants++)
+	    {
+	      ilv[n_variants] = ilv[i];
+	      ilv[n_variants].length += 2;
+	    }
+	  return n_variants;
+	}
+      return 0;
+    }
+  insn_length_variant_t *first_ilv = ilv;
+  type = get_attr_type (insn);
+  bool delay_filled
+    = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
+  int branch_align_cost = delay_filled ? 0 : 1;
+  int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
+  /* If the previous instruction is an sfunc call, this insn is always
+     a target, even though the middle-end is unaware of this.  */
+  bool force_target = false;
+  rtx prev = prev_active_insn (insn);
+  if (prev && arc_next_active_insn (prev, 0) == insn
+      && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
+	  ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL)
+	  : (CALL_ATTR (prev, NON_SIBCALL)
+	     && NEXT_INSN (PREV_INSN (prev)) == prev)))
+    force_target = true;
+
+  switch (type)
+    {
+    case TYPE_BRCC:
+      /* Short BRCC only comes in no-delay-slot version, and without limm  */
+      if (!delay_filled)
+	{
+	  ilv->align_set = 3;
+	  ilv->length = 2;
+	  ilv->branch_cost = 1;
+	  ilv->enabled = (len == 2);
+	  ilv->length_sensitive = ARC_LS_8;
+	  ilv++;
+	}
+      /* Fall through.  */
+    case TYPE_BRCC_NO_DELAY_SLOT:
+      /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
+	 (delay slot) scheduling purposes, but they are longer.  */
+      if (GET_CODE (PATTERN (insn)) == PARALLEL
+	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
+	return 0;
+      /* Standard BRCC: 4 bytes, or 8 bytes with limm.  */
+      ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
+      ilv->align_set = 3;
+      ilv->branch_cost = branch_align_cost;
+      ilv->enabled = (len <= ilv->length);
+      ilv->length_sensitive = ARC_LS_9;
+      if ((target_p || force_target)
+	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
+	{
+	  ilv[1] = *ilv;
+	  ilv->align_set = 1;
+	  ilv++;
+	  ilv->align_set = 2;
+	  ilv->target_cost = 1;
+	  ilv->branch_cost = branch_unalign_cost;
+	}
+      ilv++;
+
+      rtx op, op0;
+      op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
+      op0 = XEXP (op, 0);
+
+      if (GET_CODE (op0) == ZERO_EXTRACT
+	  && satisfies_constraint_L (XEXP (op0, 2)))
+	op0 = XEXP (op0, 0);
+      if (satisfies_constraint_Rcq (op0))
+	{
+	  ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
+	  ilv->align_set = 3;
+	  ilv->branch_cost = 1 + branch_align_cost;
+	  ilv->fallthrough_cost = 1;
+	  ilv->enabled = true;
+	  ilv->length_sensitive = ARC_LS_21;
+	  if (!delay_filled && TARGET_UNALIGN_BRANCH)
+	    {
+	      ilv[1] = *ilv;
+	      ilv->align_set = 1;
+	      ilv++;
+	      ilv->align_set = 2;
+	      ilv->branch_cost = 1 + branch_unalign_cost;
+	    }
+	  ilv++;
+	}
+      ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
+      ilv->align_set = 3;
+      ilv->branch_cost = 1 + branch_align_cost;
+      ilv->fallthrough_cost = 1;
+      ilv->enabled = true;
+      ilv->length_sensitive = ARC_LS_21;
+      if ((target_p || force_target)
+	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
+	{
+	  ilv[1] = *ilv;
+	  ilv->align_set = 1;
+	  ilv++;
+	  ilv->align_set = 2;
+	  ilv->target_cost = 1;
+	  ilv->branch_cost = 1 + branch_unalign_cost;
+	}
+      ilv++;
+      break;
+
+    case TYPE_SFUNC:
+      ilv->length = 12;
+      goto do_call;
+    case TYPE_CALL_NO_DELAY_SLOT:
+      ilv->length = 8;
+      goto do_call;
+    case TYPE_CALL:
+      ilv->length = 4;
+      ilv->length_sensitive
+	= GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
+    do_call:
+      ilv->align_set = 3;
+      ilv->fallthrough_cost = branch_align_cost;
+      ilv->enabled = true;
+      if ((target_p || force_target)
+	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
+	{
+	  ilv[1] = *ilv;
+	  ilv->align_set = 1;
+	  ilv++;
+	  ilv->align_set = 2;
+	  ilv->target_cost = 1;
+	  ilv->fallthrough_cost = branch_unalign_cost;
+	}
+      ilv++;
+      break;
+    case TYPE_UNCOND_BRANCH:
+      /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
+	 but that makes no difference at the moment.  */
+      ilv->length_sensitive = ARC_LS_7;
+      ilv[1].length_sensitive = ARC_LS_25;
+      goto do_branch;
+    case TYPE_BRANCH:
+      ilv->length_sensitive = ARC_LS_10;
+      ilv[1].length_sensitive = ARC_LS_21;
+    do_branch:
+      ilv->align_set = 3;
+      ilv->length = 2;
+      ilv->branch_cost = branch_align_cost;
+      ilv->enabled = (len == ilv->length);
+      ilv++;
+      ilv->length = 4;
+      ilv->align_set = 3;
+      ilv->branch_cost = branch_align_cost;
+      ilv->enabled = true;
+      if ((target_p || force_target)
+	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
+	{
+	  ilv[1] = *ilv;
+	  ilv->align_set = 1;
+	  ilv++;
+	  ilv->align_set = 2;
+	  ilv->target_cost = 1;
+	  ilv->branch_cost = branch_unalign_cost;
+	}
+      ilv++;
+      break;
+    case TYPE_JUMP:
+      return 0;
+    default:
+      /* For every short insn, there is generally also a long insn.
+	 trap_s is an exception.  */
+      if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
+	return 0;
+      ilv->align_set = 3;
+      ilv->length = len;
+      ilv->enabled = 1;
+      ilv++;
+      ilv->align_set = 3;
+      ilv->length = len + 2;
+      ilv->enabled = 1;
+      if (target_p || force_target)
+	{
+	  ilv[1] = *ilv;
+	  ilv->align_set = 1;
+	  ilv++;
+	  ilv->align_set = 2;
+	  ilv->target_cost = 1;
+	}
+      ilv++;
+    }
+  /* If the previous instruction is an sfunc call, this insn is always
+     a target, even though the middle-end is unaware of this.
+     Therefore, if we have a call predecessor, transfer the target cost
+     to the fallthrough and branch costs.  */
+  if (force_target)
+    {
+      for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
+	{
+	  p->fallthrough_cost += p->target_cost;
+	  p->branch_cost += p->target_cost;
+	  p->target_cost = 0;
+	}
+    }
+
+  return ilv - first_ilv;
+}
+
+static void
+arc_insn_length_parameters (insn_length_parameters_t *ilp)
+{
+  ilp->align_unit_log = 1;
+  ilp->align_base_log = 1;
+  ilp->max_variants = 7;
+  ilp->get_variants = arc_get_insn_variants;
+}
+
+/* Return a copy of COND from *STATEP, inverted if that is indicated by the
+   CC field of *STATEP.  */
+
+static rtx
+arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
+{
+  rtx cond = statep->cond;
+  int raw_cc = get_arc_condition_code (cond);
+  if (reverse)
+    raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
+
+  if (statep->cc == raw_cc)
+    return copy_rtx (cond);
+
+  gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
+
+  enum machine_mode ccm = GET_MODE (XEXP (cond, 0));
+  enum rtx_code code = reverse_condition (GET_CODE (cond));
+  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
+    code = reverse_condition_maybe_unordered (GET_CODE (cond));
+
+  return gen_rtx_fmt_ee (code, GET_MODE (cond),
+			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
+}
+
+/* Return version of PAT conditionalized with COND, which is part of INSN.
+   ANNULLED indicates if INSN is an annulled delay-slot insn.
+   Register further changes if necessary.  */
+static rtx
+conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
+{
+  /* For commutative operators, we generally prefer to have
+     the first source match the destination.  */
+  if (GET_CODE (pat) == SET)
+    {
+      rtx src = SET_SRC (pat);
+
+      if (COMMUTATIVE_P (src))
+	{
+	  rtx src0 = XEXP (src, 0);
+	  rtx src1 = XEXP (src, 1);
+	  rtx dst = SET_DEST (pat);
+
+	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
+	      /* Leave add_n alone - the canonical form is to
+		 have the complex summand first.  */
+	      && REG_P (src0))
+	    pat = gen_rtx_SET (VOIDmode, dst,
+			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
+					       src1, src0));
+	}
+    }
+
+  /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
+     what to do with COND_EXEC.  */
+  if (RTX_FRAME_RELATED_P (insn))
+    {
+      /* If this is the delay slot insn of an anulled branch,
+	 dwarf2out.c:scan_trace understands the anulling semantics
+	 without the COND_EXEC.  */
+      gcc_assert (annulled);
+      rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
+				 REG_NOTES (insn));
+      validate_change (insn, &REG_NOTES (insn), note, 1);
+    }
+  pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
+  return pat;
+}
+
+/* Use the ccfsm machinery to do if conversion.  */
+
+static unsigned
+arc_ifcvt (void)
+{
+  struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
+  basic_block merge_bb = 0;
+
+  memset (statep, 0, sizeof *statep);
+  for (rtx insn = get_insns (); insn; insn = next_insn (insn))
+    {
+      arc_ccfsm_advance (insn, statep);
+
+      switch (statep->state)
+	{
+	case 0:
+	  if (JUMP_P (insn))
+	    merge_bb = 0;
+	  break;
+	case 1: case 2:
+	  {
+	    /* Deleted branch.  */
+	    gcc_assert (!merge_bb);
+	    merge_bb = BLOCK_FOR_INSN (insn);
+	    basic_block succ_bb
+	      = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn))));
+	    arc_ccfsm_post_advance (insn, statep);
+	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
+	    rtx seq = NEXT_INSN (PREV_INSN (insn));
+	    if (seq != insn)
+	      {
+		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
+		rtx pat = PATTERN (slot);
+		if (INSN_ANNULLED_BRANCH_P (insn))
+		  {
+		    rtx cond
+		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
+		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
+		  }
+		if (!validate_change (seq, &PATTERN (seq), pat, 0))
+		  gcc_unreachable ();
+		PUT_CODE (slot, NOTE);
+		NOTE_KIND (slot) = NOTE_INSN_DELETED;
+		if (merge_bb && succ_bb)
+		  merge_blocks (merge_bb, succ_bb);
+	      }
+	    else if (merge_bb && succ_bb)
+	      {
+		set_insn_deleted (insn);
+		merge_blocks (merge_bb, succ_bb);
+	      }
+	    else
+	      {
+		PUT_CODE (insn, NOTE);
+		NOTE_KIND (insn) = NOTE_INSN_DELETED;
+	      }
+	    continue;
+	  }
+	case 3:
+	  if (LABEL_P (insn)
+	      && statep->target_label == CODE_LABEL_NUMBER (insn))
+	    {
+	      arc_ccfsm_post_advance (insn, statep);
+	      basic_block succ_bb = BLOCK_FOR_INSN (insn);
+	      if (merge_bb && succ_bb)
+		merge_blocks (merge_bb, succ_bb);
+	      else if (--LABEL_NUSES (insn) == 0)
+		{
+		  const char *name = LABEL_NAME (insn);
+		  PUT_CODE (insn, NOTE);
+		  NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL;
+		  NOTE_DELETED_LABEL_NAME (insn) = name;
+		}
+	      merge_bb = 0;
+	      continue;
+	    }
+	  /* Fall through.  */
+	case 4: case 5:
+	  if (!NONDEBUG_INSN_P (insn))
+	    break;
+
+	  /* Conditionalized insn.  */
+
+	  rtx prev, pprev, *patp, pat, cond;
+	  bool annulled; annulled = false;
+
+	  /* If this is a delay slot insn in a non-annulled branch,
+	     don't conditionalize it.  N.B., this should be fine for
+	     conditional return too.  However, don't do this for
+	     unconditional branches, as these would be encountered when
+	     processing an 'else' part.  */
+	  prev = PREV_INSN (insn);
+	  pprev = PREV_INSN (prev);
+	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
+	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
+	    {
+	      if (!INSN_ANNULLED_BRANCH_P (prev))
+		break;
+	      annulled = true;
+	    }
+
+	  patp = &PATTERN (insn);
+	  pat = *patp;
+	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
+	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+	    {
+	      /* ??? don't conditionalize if all side effects are dead
+		 in the not-execute case.  */
+
+	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
+	    }
+	  else if (simplejump_p (insn))
+	    {
+	      patp = &SET_SRC (pat);
+	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
+	    }
+	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
+	    {
+	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
+	      pat = gen_rtx_SET (VOIDmode, pc_rtx, pat);
+	    }
+	  else
+	    gcc_unreachable ();
+	  validate_change (insn, patp, pat, 1);
+	  if (!apply_change_group ())
+	    gcc_unreachable ();
+	  if (JUMP_P (insn))
+	    {
+	      rtx next = next_nonnote_insn (insn);
+	      if (GET_CODE (next) == BARRIER)
+		delete_insn (next);
+	      if (statep->state == 3)
+		continue;
+	    }
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      arc_ccfsm_post_advance (insn, statep);
+    }
+  return 0;
+}
+
+/* Find annulled delay insns and convert them to use the appropriate predicate.
+   This allows branch shortening to size up these insns properly.  */
+
+static unsigned
+arc_predicate_delay_insns (void)
+{
+  for (rtx insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat, jump, dlay, src, cond, *patp;
+      int reverse;
+
+      if (!NONJUMP_INSN_P (insn)
+	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
+	continue;
+      jump = XVECEXP (pat, 0, 0);
+      dlay = XVECEXP (pat, 0, 1);
+      if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
+	continue;
+      /* If the branch insn does the annulling, leave the delay insn alone.  */
+      if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
+	continue;
+      /* ??? Could also leave DLAY un-conditionalized if its target is dead
+	 on the other path.  */
+      gcc_assert (GET_CODE (PATTERN (jump)) == SET);
+      gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
+      src = SET_SRC (PATTERN (jump));
+      gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
+      cond = XEXP (src, 0);
+      if (XEXP (src, 2) == pc_rtx)
+	reverse = 0;
+      else if (XEXP (src, 1) == pc_rtx)
+	reverse = 1;
+      else
+	gcc_unreachable ();
+      if (!INSN_FROM_TARGET_P (dlay) != reverse)
+	{
+	  enum machine_mode ccm = GET_MODE (XEXP (cond, 0));
+	  enum rtx_code code = reverse_condition (GET_CODE (cond));
+	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
+	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
+
+	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
+				 copy_rtx (XEXP (cond, 0)),
+				 copy_rtx (XEXP (cond, 1)));
+	}
+      else
+	cond = copy_rtx (cond);
+      patp = &PATTERN (dlay);
+      pat = *patp;
+      pat = conditionalize_nonjump (pat, cond, dlay, true);
+      validate_change (dlay, patp, pat, 1);
+      if (!apply_change_group ())
+	gcc_unreachable ();
+    }
+  return 0;
+}
+
+/* For ARC600: If a write to a core reg >=32 appears in a delay slot
+  (other than of a forward brcc), it creates a hazard when there is a read
+  of the same register at the branch target.  We can't know what is at the
+  branch target of calls, and for branches, we don't really know before the
+  end of delay slot scheduling, either.  Not only can individual instruction
+  be hoisted out into a delay slot, a basic block can also be emptied this
+  way, and branch and/or fall through targets be redirected.  Hence we don't
+  want such writes in a delay slot.  */
+/* Called by arc_write_ext_corereg via for_each_rtx.  */
+
+static int
+write_ext_corereg_1 (rtx *xp, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *xp;
+  rtx dest;
+
+  switch (GET_CODE (x))
+    {
+    case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
+      break;
+    default:
+    /* This is also fine for PRE/POST_MODIFY, because they contain a SET.  */
+      return 0;
+    }
+  dest = XEXP (x, 0);
+  if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
+    return 1;
+  return 0;
+}
+
+/* Return nonzreo iff INSN writes to an extension core register.  */
+
+int
+arc_write_ext_corereg (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), write_ext_corereg_1, 0);
+}
+
+/* This is like the hook, but returns NULL when it can't / won't generate
+   a legitimate address.  */
+
+static rtx
+arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode)
+{
+  rtx addr, inner;
+
+  if (flag_pic && SYMBOLIC_CONST (x))
+     (x) =  arc_legitimize_pic_address (x, 0);
+  addr = x;
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS
+      && CONST_INT_P (XEXP (addr, 1))
+      && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
+	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
+	  || (REG_P (XEXP (addr, 0))
+	      && (INTVAL (XEXP (addr, 1)) & 252))))
+    {
+      HOST_WIDE_INT offs, upper;
+      int size = GET_MODE_SIZE (mode);
+
+      offs = INTVAL (XEXP (addr, 1));
+      upper = (offs + 256 * size) & ~511 * size;
+      inner = plus_constant (Pmode, XEXP (addr, 0), upper);
+#if 0 /* ??? this produces worse code for EEMBC idctrn01  */
+      if (GET_CODE (x) == CONST)
+	inner = gen_rtx_CONST (Pmode, inner);
+#endif
+      addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
+      x = addr;
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
+    x = force_reg (Pmode, x);
+  if (memory_address_p ((enum machine_mode) mode, x))
+     return x;
+  return NULL_RTX;
+}
+
+static rtx
+arc_legitimize_address (rtx orig_x, rtx oldx, enum machine_mode mode)
+{
+  rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
+
+  if (new_x)
+    return new_x;
+  return orig_x;
+}
+
+static rtx
+arc_delegitimize_address_0 (rtx x)
+{
+  rtx u, gp;
+
+  if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
+    {
+      if (XINT (u, 1) == ARC_UNSPEC_GOT)
+	return XVECEXP (u, 0, 0);
+    }
+  else if (GET_CODE (x) == PLUS
+	   && ((REG_P (gp = XEXP (x, 0))
+		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
+	       || (GET_CODE (gp) == CONST
+		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
+		   && XINT (u, 1) == ARC_UNSPEC_GOT
+		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
+		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
+	   && GET_CODE (XEXP (x, 1)) == CONST
+	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
+	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
+    return XVECEXP (u, 0, 0);
+  else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+	   && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
+		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
+	       || (GET_CODE (gp) == CONST
+		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
+		   && XINT (u, 1) == ARC_UNSPEC_GOT
+		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
+		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
+	   && GET_CODE (XEXP (x, 1)) == CONST
+	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
+	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
+    return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
+			 XVECEXP (u, 0, 0));
+  else if (GET_CODE (x) == PLUS
+	   && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
+    return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
+  return NULL_RTX;
+}
+
+static rtx
+arc_delegitimize_address (rtx x)
+{
+  rtx orig_x = x = delegitimize_mem_from_attrs (x);
+  if (GET_CODE (x) == MEM)
+    x = XEXP (x, 0);
+  x = arc_delegitimize_address_0 (x);
+  if (x)
+    {
+      if (MEM_P (orig_x))
+	x = replace_equiv_address_nv (orig_x, x);
+      return x;
+    }
+  return orig_x;
+}
+
+/* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
+   differ from the hardware register number in order to allow the generic
+   code to correctly split the concatenation of acc1 and acc2.  */
+
+rtx
+gen_acc1 (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
+}
+
+/* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
+   differ from the hardware register number in order to allow the generic
+   code to correctly split the concatenation of acc1 and acc2.  */
+
+rtx
+gen_acc2 (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
+}
+
+/* Return a REG rtx for mlo.  N.B. the gcc-internal representation may
+   differ from the hardware register number in order to allow the generic
+   code to correctly split the concatenation of mhi and mlo.  */
+
+rtx
+gen_mlo (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
+}
+
+/* Return a REG rtx for mhi.  N.B. the gcc-internal representation may
+   differ from the hardware register number in order to allow the generic
+   code to correctly split the concatenation of mhi and mlo.  */
+
+rtx
+gen_mhi (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
+}
+
+/* FIXME: a parameter should be added, and code added to final.c,
+   to reproduce this functionality in shorten_branches.  */
+#if 0
+/* Return nonzero iff BRANCH should be unaligned if possible by upsizing
+   a previous instruction.  */
+int
+arc_unalign_branch_p (rtx branch)
+{
+  rtx note;
+
+  if (!TARGET_UNALIGN_BRANCH)
+    return 0;
+  /* Do not do this if we have a filled delay slot.  */
+  if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
+      && !INSN_DELETED_P (NEXT_INSN (branch)))
+    return 0;
+  note = find_reg_note (branch, REG_BR_PROB, 0);
+  return (!note
+	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
+	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
+}
+#endif
+
+/* When estimating sizes during arc_reorg, when optimizing for speed, there
+   are three reasons why we need to consider branches to be length 6:
+   - annull-false delay slot insns are implemented using conditional execution,
+     thus preventing short insn formation where used.
+   - for ARC600: annul-true delay slot insns are implemented where possible
+     using conditional execution, preventing short insn formation where used.
+   - for ARC700: likely or somewhat likely taken branches are made long and
+     unaligned if possible to avoid branch penalty.  */
+
+bool
+arc_branch_size_unknown_p (void)
+{
+  return !optimize_size && arc_reorg_in_progress;
+}
+
+/* We are about to output a return insn.  Add padding if necessary to avoid
+   a mispredict.  A return could happen immediately after the function
+   start, but after a call we know that there will be at least a blink
+   restore.  */
+
+void
+arc_pad_return (void)
+{
+  rtx insn = current_output_insn;
+  rtx prev = prev_active_insn (insn);
+  int want_long;
+
+  if (!prev)
+    {
+      fputs ("\tnop_s\n", asm_out_file);
+      cfun->machine->unalign ^= 2;
+      want_long = 1;
+    }
+  /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
+     because after a call, we'd have to restore blink first.  */
+  else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+    return;
+  else
+    {
+      want_long = (get_attr_length (prev) == 2);
+      prev = prev_active_insn (prev);
+    }
+  if (!prev
+      || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
+	  ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL)
+	  : CALL_ATTR (prev, NON_SIBCALL)))
+    {
+      if (want_long)
+	cfun->machine->size_reason
+	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
+      else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
+	{
+	  cfun->machine->size_reason
+	    = "Long unaligned jump avoids non-delay slot penalty";
+	  want_long = 1;
+	}
+      /* Disgorge delay insn, if there is any, and it may be moved.  */
+      if (final_sequence
+	  /* ??? Annulled would be OK if we can and do conditionalize
+	     the delay slot insn accordingly.  */
+	  && !INSN_ANNULLED_BRANCH_P (insn)
+	  && (get_attr_cond (insn) != COND_USE
+	      || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
+			     XVECEXP (final_sequence, 0, 1))))
+	{
+	  prev = XVECEXP (final_sequence, 0, 1);
+	  gcc_assert (!prev_real_insn (insn)
+		      || !arc_hazard (prev_real_insn (insn), prev));
+	  cfun->machine->force_short_suffix = !want_long;
+	  rtx save_pred = current_insn_predicate;
+	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
+	  cfun->machine->force_short_suffix = -1;
+	  INSN_DELETED_P (prev) = 1;
+	  current_output_insn = insn;
+	  current_insn_predicate = save_pred;
+	}
+      else if (want_long)
+	fputs ("\tnop\n", asm_out_file);
+      else
+	{
+	  fputs ("\tnop_s\n", asm_out_file);
+	  cfun->machine->unalign ^= 2;
+	}
+    }
+  return;
+}
+
+/* The usual; we set up our machine_function data.  */
+
+static struct machine_function *
+arc_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine = ggc_alloc_cleared_machine_function ();
+  machine->fn_type = ARC_FUNCTION_UNKNOWN;
+  machine->force_short_suffix = -1;
+
+  return machine;
+}
+
+/* Implements INIT_EXPANDERS.  We just set up to call the above
+   function.  */
+
+void
+arc_init_expanders (void)
+{
+  init_machine_status = arc_init_machine_status;
+}
+
+/* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
+   indicates a number of elements to ignore - that allows to have a
+   sibcall pattern that starts with (return).  LOAD_P is zero for store
+   multiple (for prologues), and one for load multiples (for epilogues),
+   and two for load multiples where no final clobber of blink is required.
+   We also skip the first load / store element since this is supposed to
+   be checked in the instruction pattern.  */
+
+int
+arc_check_millicode (rtx op, int offset, int load_p)
+{
+  int len = XVECLEN (op, 0) - offset;
+  int i;
+
+  if (load_p == 2)
+    {
+      if (len < 2 || len > 13)
+	return 0;
+      load_p = 1;
+    }
+  else
+    {
+      rtx elt = XVECEXP (op, 0, --len);
+
+      if (GET_CODE (elt) != CLOBBER
+	  || !REG_P (XEXP (elt, 0))
+	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
+	  || len < 3 || len > 13)
+	return 0;
+    }
+  for (i = 1; i < len; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i + offset);
+      rtx reg, mem, addr;
+
+      if (GET_CODE (elt) != SET)
+	return 0;
+      mem = XEXP (elt, load_p);
+      reg = XEXP (elt, 1-load_p);
+      if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
+	return 0;
+      addr = XEXP (mem, 0);
+      if (GET_CODE (addr) != PLUS
+	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
+	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
+	return 0;
+    }
+  return 1;
+}
+
+/* Accessor functions for cfun->machine->unalign.  */
+
+int
+arc_get_unalign (void)
+{
+  return cfun->machine->unalign;
+}
+
+void
+arc_clear_unalign (void)
+{
+  if (cfun)
+    cfun->machine->unalign = 0;
+}
+
+void
+arc_toggle_unalign (void)
+{
+  cfun->machine->unalign ^= 2;
+}
+
+/* Operands 0..2 are the operands of a addsi which uses a 12 bit
+   constant in operand 2, but which would require a LIMM because of
+   operand mismatch.
+   operands 3 and 4 are new SET_SRCs for operands 0.  */
+
+void
+split_addsi (rtx *operands)
+{
+  int val = INTVAL (operands[2]);
+
+  /* Try for two short insns first.  Lengths being equal, we prefer
+     expansions with shorter register lifetimes.  */
+  if (val > 127 && val <= 255
+      && satisfies_constraint_Rcq (operands[0]))
+    {
+      operands[3] = operands[2];
+      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
+    }
+}
+
+/* Operands 0..2 are the operands of a subsi which uses a 12 bit
+   constant in operand 1, but which would require a LIMM because of
+   operand mismatch.
+   operands 3 and 4 are new SET_SRCs for operands 0.  */
+
+void
+split_subsi (rtx *operands)
+{
+  int val = INTVAL (operands[1]);
+
+  /* Try for two short insns first.  Lengths being equal, we prefer
+     expansions with shorter register lifetimes.  */
+  if (satisfies_constraint_Rcq (operands[0])
+      && satisfies_constraint_Rcq (operands[2]))
+    {
+      if (val >= -31 && val <= 127)
+	{
+	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
+	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
+	  return;
+	}
+      else if (val >= 0 && val < 255)
+	{
+	  operands[3] = operands[1];
+	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
+	  return;
+	}
+    }
+  /* If the destination is not an ARCompact16 register, we might
+     still have a chance to make a short insn if the source is;
+      we need to start with a reg-reg move for this.  */
+  operands[3] = operands[2];
+  operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
+}
+
+/* Handle DOUBLE_REGS uses.
+   Operand 0: destination register
+   Operand 1: source register  */
+
+static rtx
+arc_process_double_reg_moves (rtx *operands)
+{
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  rtx val;
+
+  enum usesDxState { none, srcDx, destDx, maxDx };
+  enum usesDxState state = none;
+
+  if (refers_to_regno_p (40, 44, src, 0))
+    state = srcDx;
+  if (refers_to_regno_p (40, 44, dest, 0))
+    {
+      /* Via arc_register_move_cost, we should never see D,D moves.  */
+      gcc_assert (state == none);
+      state = destDx;
+    }
+
+  if (state == none)
+    return NULL_RTX;
+
+  start_sequence ();
+
+  if (state == srcDx)
+    {
+      /* Without the LR insn, we need to split this into a
+	 sequence of insns which will use the DEXCLx and DADDHxy
+	 insns to be able to read the Dx register in question.  */
+      if (TARGET_DPFP_DISABLE_LRSR)
+	{
+	  /* gen *movdf_insn_nolrsr */
+	  rtx set = gen_rtx_SET (VOIDmode, dest, src);
+	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
+	}
+      else
+	{
+	  /* When we have 'mov D, r' or 'mov D, D' then get the target
+	     register pair for use with LR insn.  */
+	  rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4);
+	  rtx destLow  = simplify_gen_subreg(SImode, dest, DFmode, 0);
+
+	  /* Produce the two LR insns to get the high and low parts.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  destHigh,
+				  gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src),
+				  VUNSPEC_LR_HIGH)));
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  destLow,
+				  gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src),
+				  VUNSPEC_LR)));
+	}
+    }
+  else if (state == destDx)
+    {
+      /* When we have 'mov r, D' or 'mov D, D' and we have access to the
+	 LR insn get the target register pair.  */
+      rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4);
+      rtx srcLow  = simplify_gen_subreg(SImode, src, DFmode, 0);
+
+      emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode,
+					  gen_rtvec (3, dest, srcHigh, srcLow),
+					  VUNSPEC_DEXCL_NORES));
+
+    }
+  else
+    gcc_unreachable ();
+
+  val = get_insns ();
+  end_sequence ();
+  return val;
+}
+
+/* operands 0..1 are the operands of a 64 bit move instruction.
+   split it into two moves with operands 2/3 and 4/5.  */
+
+rtx
+arc_split_move (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  int i;
+  int swap = 0;
+  rtx xop[4];
+  rtx val;
+
+  if (TARGET_DPFP)
+  {
+    val = arc_process_double_reg_moves (operands);
+    if (val)
+      return val;
+  }
+
+  for (i = 0; i < 2; i++)
+    {
+      if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
+	{
+	  rtx addr = XEXP (operands[i], 0);
+	  rtx r, o;
+	  enum rtx_code code;
+
+	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
+	  switch (GET_CODE (addr))
+	    {
+	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
+	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
+	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
+	    pre_modify:
+	      code = PRE_MODIFY;
+	      break;
+	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
+	    case POST_INC: o = GEN_INT (8); goto post_modify;
+	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
+	    post_modify:
+	      code = POST_MODIFY;
+	      swap = 2;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  r = XEXP (addr, 0);
+	  xop[0+i] = adjust_automodify_address_nv
+		      (operands[i], SImode,
+		       gen_rtx_fmt_ee (code, Pmode, r,
+				       gen_rtx_PLUS (Pmode, r, o)),
+		       0);
+	  xop[2+i] = adjust_automodify_address_nv
+		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
+	}
+      else
+	{
+	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
+	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
+	}
+    }
+  if (reg_overlap_mentioned_p (xop[0], xop[3]))
+    {
+      swap = 2;
+      gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
+    }
+  operands[2+swap] = xop[0];
+  operands[3+swap] = xop[1];
+  operands[4-swap] = xop[2];
+  operands[5-swap] = xop[3];
+
+  start_sequence ();
+  emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3]));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[5]));
+  val = get_insns ();
+  end_sequence ();
+
+  return val;
+}
+
+/* Select between the instruction output templates s_tmpl (for short INSNs)
+   and l_tmpl (for long INSNs).  */
+
+const char *
+arc_short_long (rtx insn, const char *s_tmpl, const char *l_tmpl)
+{
+  int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
+
+  extract_constrain_insn_cached (insn);
+  return is_short ? s_tmpl : l_tmpl;
+}
+
+/* Searches X for any reference to REGNO, returning the rtx of the
+   reference found if any.  Otherwise, returns NULL_RTX.  */
+
+rtx
+arc_regno_use_in (unsigned int regno, rtx x)
+{
+  const char *fmt;
+  int i, j;
+  rtx tem;
+
+  if (REG_P (x) && refers_to_regno_p (regno, regno+1, x, (rtx *) 0))
+    return x;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if ((tem = regno_use_in (regno, XEXP (x, i))))
+	    return tem;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
+	    return tem;
+    }
+
+  return NULL_RTX;
+}
+
+/* Return the integer value of the "type" attribute for INSN, or -1 if
+   INSN can't have attributes.  */
+
+int
+arc_attr_type (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == USE
+	 || GET_CODE (PATTERN (insn)) == CLOBBER)
+      : JUMP_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
+	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+      : !CALL_P (insn))
+    return -1;
+  return get_attr_type (insn);
+}
+
+/* Return true if insn sets the condition codes.  */
+
+bool
+arc_sets_cc_p (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+    insn = XVECEXP (PATTERN (insn), 0, XVECLEN (PATTERN (insn), 0) - 1);
+  return arc_attr_type (insn) == TYPE_COMPARE;
+}
+
+/* Return true if INSN is an instruction with a delay slot we may want
+   to fill.  */
+
+bool
+arc_need_delay (rtx insn)
+{
+  rtx next;
+
+  if (!flag_delayed_branch)
+    return false;
+  /* The return at the end of a function needs a delay slot.  */
+  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
+      && (!(next = next_active_insn (insn))
+	  || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
+	      && arc_attr_type (next) == TYPE_RETURN))
+      && (!TARGET_PAD_RETURN
+	  || (prev_active_insn (insn)
+	      && prev_active_insn (prev_active_insn (insn))
+	      && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
+    return true;
+  if (NONJUMP_INSN_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == USE
+	 || GET_CODE (PATTERN (insn)) == CLOBBER
+	 || GET_CODE (PATTERN (insn)) == SEQUENCE)
+      : JUMP_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
+	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+      : !CALL_P (insn))
+    return false;
+  return num_delay_slots (insn) != 0;
+}
+
+/* Return true if the scheduling pass(es) has/have already run,
+   i.e. where possible, we should try to mitigate high latencies
+   by different instruction selection.  */
+
+bool
+arc_scheduling_not_expected (void)
+{
+  return cfun->machine->arc_reorg_started;
+}
+
+/* Oddly enough, sometimes we get a zero overhead loop that branch
+   shortening doesn't think is a loop - observed with compile/pr24883.c
+   -O3 -fomit-frame-pointer -funroll-loops.  Make sure to include the
+   alignment visible for branch shortening  (we actually align the loop
+   insn before it, but that is equivalent since the loop insn is 4 byte
+   long.)  */
+
+int
+arc_label_align (rtx label)
+{
+  int loop_align = LOOP_ALIGN (LABEL);
+
+  if (loop_align > align_labels_log)
+    {
+      rtx prev = prev_nonnote_insn (label);
+
+      if (prev && NONJUMP_INSN_P (prev)
+	  && GET_CODE (PATTERN (prev)) == PARALLEL
+	  && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
+	return loop_align;
+    }
+  /* Code has a minimum p2 alignment of 1, which we must restore after an
+     ADDR_DIFF_VEC.  */
+  if (align_labels_log < 1)
+    {
+      rtx next = next_nonnote_nondebug_insn (label);
+      if (INSN_P (next) && recog_memoized (next) >= 0)
+	return 1;
+    }
+  return align_labels_log;
+}
+
+/* Return true if LABEL is in executable code.  */
+
+bool
+arc_text_label (rtx label)
+{
+  rtx next;
+
+  /* ??? We use deleted labels like they were still there, see
+     gcc.c-torture/compile/20000326-2.c .  */
+  gcc_assert (GET_CODE (label) == CODE_LABEL
+	      || (GET_CODE (label) == NOTE
+		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
+  next = next_nonnote_insn (label);
+  if (next)
+    return (!JUMP_TABLE_DATA_P (next)
+	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
+  else if (!PREV_INSN (label))
+    /* ??? sometimes text labels get inserted very late, see
+       gcc.dg/torture/stackalign/comp-goto-1.c */
+    return true;
+  return false;
+}
+
+/* Return the size of the pretend args for DECL.  */
+
+int
+arc_decl_pretend_args (tree decl)
+{
+  /* struct function is in DECL_STRUCT_FUNCTION (decl), but no
+     pretend_args there...  See PR38391.  */
+  gcc_assert (decl == current_function_decl);
+  return crtl->args.pretend_args_size;
+}
+
+/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
+  when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
+  -D_PROFILE_USE; delay branch scheduling then follows a REG_CROSSING_JUMP
+  to redirect two breqs.  */
+
+static bool
+arc_can_follow_jump (const_rtx follower, const_rtx followee)
+{
+  /* ??? get_attr_type is declared to take an rtx.  */
+  union { const_rtx c; rtx r; } u;
+
+  u.c = follower;
+  if (find_reg_note (followee, REG_CROSSING_JUMP, NULL_RTX))
+    switch (get_attr_type (u.r))
+      {
+      case TYPE_BRCC:
+      case TYPE_BRCC_NO_DELAY_SLOT:
+	return false;
+      default:
+	return true;
+      }
+  return true;
+}
+
+/* Implement EPILOGUE__USES.
+   Return true if REGNO should be added to the deemed uses of the epilogue.
+
+   We use the return address
+   arc_return_address_regs[arc_compute_function_type (cfun)] .
+   But also, we have to make sure all the register restore instructions
+   are known to be live in interrupt functions.  */
+
+bool
+arc_epilogue_uses (int regno)
+{
+  if (reload_completed)
+    {
+      if (ARC_INTERRUPT_P (cfun->machine->fn_type))
+	{
+	  if (!fixed_regs[regno])
+	    return true;
+	  return regno == arc_return_address_regs[cfun->machine->fn_type];
+	}
+      else
+	return regno == RETURN_ADDR_REGNUM;
+    }
+  else
+    return regno == arc_return_address_regs[arc_compute_function_type (cfun)];
+}
+
+#ifndef TARGET_NO_LRA
+#define TARGET_NO_LRA !TARGET_LRA
+#endif
+
+static bool
+arc_lra_p (void)
+{
+  return !TARGET_NO_LRA;
+}
+
+/* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
+   Rcq registers, because some insn are shorter with them.  OTOH we already
+   have separate alternatives for this purpose, and other insns don't
+   mind, so maybe we should rather prefer the other registers?
+   We need more data, and we can only get that if we allow people to
+   try all options.  */
+static int
+arc_register_priority (int r)
+{
+  switch (arc_lra_priority_tag)
+    {
+    case ARC_LRA_PRIORITY_NONE:
+      return 0;
+    case ARC_LRA_PRIORITY_NONCOMPACT:
+      return ((((r & 7) ^ 4) - 4) & 15) != r;
+    case ARC_LRA_PRIORITY_COMPACT:
+      return ((((r & 7) ^ 4) - 4) & 15) == r;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static reg_class_t
+arc_spill_class (reg_class_t /* orig_class */, enum machine_mode)
+{
+  return GENERAL_REGS;
+}
+
+bool
+arc_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
+			       int itype)
+{
+  rtx x = *p;
+  enum reload_type type = (enum reload_type) itype;
+
+  if (GET_CODE (x) == PLUS
+      && CONST_INT_P (XEXP (x, 1))
+      && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
+	  || (REG_P (XEXP (x, 0))
+	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
+    {
+      int scale = GET_MODE_SIZE (mode);
+      int shift;
+      rtx index_rtx = XEXP (x, 1);
+      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
+      rtx reg, sum, sum2;
+
+      if (scale > 4)
+	scale = 4;
+      if ((scale-1) & offset)
+	scale = 1;
+      shift = scale >> 1;
+      offset_base = (offset + (256 << shift)) & (-512 << shift);
+      /* Sometimes the normal form does not suit DImode.  We
+	 could avoid that by using smaller ranges, but that
+	 would give less optimized code when SImode is
+	 prevalent.  */
+      if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
+	{
+	  int regno;
+
+	  reg = XEXP (x, 0);
+	  regno = REGNO (reg);
+	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
+
+	  if (reg_equiv_constant (regno))
+	    {
+	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
+				    offset_base);
+	      if (GET_CODE (sum2) == PLUS)
+		sum2 = gen_rtx_CONST (Pmode, sum2);
+	    }
+	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
+	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
+		       type);
+	  return true;
+	}
+    }
+  /* We must re-recognize what we created before.  */
+  else if (GET_CODE (x) == PLUS
+	   && GET_CODE (XEXP (x, 0)) == PLUS
+	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	   && REG_P  (XEXP (XEXP (x, 0), 0))
+	   && CONST_INT_P (XEXP (x, 1)))
+    {
+      /* Because this address is so complex, we know it must have
+	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
+	 it is already unshared, and needs no further unsharing.  */
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+      return true;
+    }
+  return false;
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-arc.h"
diff --git a/gcc-4.9/gcc/config/arc/arc.h b/gcc-4.9/gcc/config/arc/arc.h
new file mode 100644
index 000000000..8c7350f3e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc.h
@@ -0,0 +1,1696 @@
+/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+   Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
+   behalf of Synopsys Inc.
+
+   Position Independent Code support added,Code cleaned up,
+   Comments and Support For ARC700 instructions added by
+   Saurabh Verma (saurabh.verma@codito.com)
+   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARC_H
+#define GCC_ARC_H
+
+/* Things to do:
+
+   - incscc, decscc?
+
+*/
+
+#define SYMBOL_FLAG_SHORT_CALL	(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_FLAG_MEDIUM_CALL	(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_FLAG_LONG_CALL	(SYMBOL_FLAG_MACH_DEP << 2)
+
+/* Check if this symbol has a long_call attribute in its declaration */
+#define SYMBOL_REF_LONG_CALL_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0)
+
+/* Check if this symbol has a medium_call attribute in its declaration */
+#define SYMBOL_REF_MEDIUM_CALL_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_MEDIUM_CALL) != 0)
+
+/* Check if this symbol has a short_call attribute in its declaration */
+#define SYMBOL_REF_SHORT_CALL_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SHORT_CALL) != 0)
+
+#undef ASM_SPEC
+#undef LINK_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
+#undef ASM_APP_ON
+#undef ASM_APP_OFF
+#undef CC1_SPEC
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS()	\
+ do {					\
+    builtin_define ("__arc__");		\
+    if (TARGET_A5)			\
+      builtin_define ("__A5__");	\
+    else if (TARGET_ARC600)			\
+      {					\
+	builtin_define ("__A6__");	\
+	builtin_define ("__ARC600__");	\
+      }					\
+    else if (TARGET_ARC601)			\
+      {					\
+	builtin_define ("__ARC601__");	\
+      }					\
+    else if (TARGET_ARC700)			\
+      {					\
+	builtin_define ("__A7__");	\
+	builtin_define ("__ARC700__");	\
+      }					\
+    if (TARGET_NORM)			\
+      {					\
+	builtin_define ("__ARC_NORM__");\
+	builtin_define ("__Xnorm");	\
+      }					\
+    if (TARGET_MUL64_SET)		\
+      builtin_define ("__ARC_MUL64__");\
+    if (TARGET_MULMAC_32BY16_SET)	\
+      builtin_define ("__ARC_MUL32BY16__");\
+    if (TARGET_SIMD_SET)        	\
+      builtin_define ("__ARC_SIMD__");	\
+    if (TARGET_BARREL_SHIFTER)		\
+      builtin_define ("__Xbarrel_shifter");\
+    builtin_assert ("cpu=arc");		\
+    builtin_assert ("machine=arc");	\
+    builtin_define (TARGET_BIG_ENDIAN	\
+		    ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); \
+    if (TARGET_BIG_ENDIAN)		\
+      builtin_define ("__big_endian__"); \
+} while(0)
+
+#if DEFAULT_LIBC == LIBC_UCLIBC
+
+#define TARGET_OS_CPP_BUILTINS() \
+  do \
+    { \
+      GNU_USER_TARGET_OS_CPP_BUILTINS (); \
+    } \
+  while (0)
+#endif
+
+/* Match the macros used in the assembler.  */
+#define CPP_SPEC "\
+%{msimd:-D__Xsimd} %{mno-mpy:-D__Xno_mpy} %{mswap:-D__Xswap} \
+%{mmin-max:-D__Xmin_max} %{mEA:-D__Xea} \
+%{mspfp*:-D__Xspfp} %{mdpfp*:-D__Xdpfp} \
+%{mmac-d16:-D__Xxmac_d16} %{mmac-24:-D__Xxmac_24} \
+%{mdsp-packa:-D__Xdsp_packa} %{mcrc:-D__Xcrc} %{mdvbf:-D__Xdvbf} \
+%{mtelephony:-D__Xtelephony} %{mxy:-D__Xxy} %{mmul64: -D__Xmult32} \
+%{mlock:-D__Xlock} %{mswape:-D__Xswape} %{mrtsc:-D__Xrtsc} \
+"
+
+#define CC1_SPEC "\
+%{EB:%{EL:%emay not use both -EB and -EL}} \
+%{EB:-mbig-endian} %{EL:-mlittle-endian} \
+"
+
+#define ASM_DEFAULT "-mARC700 -mEA"
+
+#define ASM_SPEC  "\
+%{mbig-endian|EB:-EB} %{EL} \
+%{mcpu=A5|mcpu=a5|mA5:-mA5} \
+%{mcpu=ARC600:-mARC600} \
+%{mcpu=ARC601:-mARC601} \
+%{mcpu=ARC700:-mARC700} \
+%{mcpu=ARC700:-mEA} \
+%{!mcpu=*:" ASM_DEFAULT "} \
+%{mbarrel-shifter} %{mno-mpy} %{mmul64} %{mmul32x16:-mdsp-packa} %{mnorm} \
+%{mswap} %{mEA} %{mmin-max} %{mspfp*} %{mdpfp*} \
+%{msimd} \
+%{mmac-d16} %{mmac-24} %{mdsp-packa} %{mcrc} %{mdvbf} %{mtelephony} %{mxy} \
+%{mcpu=ARC700|!mcpu=*:%{mlock}} \
+%{mcpu=ARC700|!mcpu=*:%{mswape}} \
+%{mcpu=ARC700|!mcpu=*:%{mrtsc}} \
+"
+
+#if DEFAULT_LIBC == LIBC_UCLIBC
+/* Note that the default is to link against dynamic libraries, if they are
+   available.  Override with -static.  */
+#define LINK_SPEC "%{h*} \
+		   %{static:-Bstatic} \
+		   %{symbolic:-Bsymbolic} \
+		   %{rdynamic:-export-dynamic}\
+		   -dynamic-linker /lib/ld-uClibc.so.0 \
+		   -X %{mbig-endian:-EB} \
+		   %{EB} %{EL} \
+		   %{marclinux*} \
+		   %{!marclinux*: %{pg|p|profile:-marclinux_prof;: -marclinux}} \
+		   %{!z:-z max-page-size=0x2000 -z common-page-size=0x2000} \
+		   %{shared:-shared}"
+/* Like the standard LINK_COMMAND_SPEC, but add %G when building
+   a shared library with -nostdlib, so that the hidden functions of libgcc
+   will be incorporated.
+   N.B., we don't want a plain -lgcc, as this would lead to re-exporting
+   non-hidden functions, so we have to consider libgcc_s.so.* first, which in
+   turn should be wrapped with --as-needed.  */
+#define LINK_COMMAND_SPEC "\
+%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\
+    %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\
+    %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\
+    %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\
+    %(mflib)\
+    %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\
+    %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\
+    %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} }}}}}}"
+
+#else
+#define LINK_SPEC "%{mbig-endian:-EB} %{EB} %{EL}\
+  %{pg|p:-marcelf_prof;mA7|mARC700|mcpu=arc700|mcpu=ARC700: -marcelf}"
+#endif
+
+#if DEFAULT_LIBC != LIBC_UCLIBC
+#define STARTFILE_SPEC "%{!shared:crt0.o%s} crti%O%s %{pg|p:crtg.o%s} crtbegin.o%s"
+#else
+#define STARTFILE_SPEC   "%{!shared:%{!mkernel:crt1.o%s}} crti.o%s \
+  %{!shared:%{pg|p|profile:crtg.o%s} crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#endif
+
+#if DEFAULT_LIBC != LIBC_UCLIBC
+#define ENDFILE_SPEC "%{pg|p:crtgend.o%s} crtend.o%s crtn%O%s"
+#else
+#define ENDFILE_SPEC "%{!shared:%{pg|p|profile:crtgend.o%s} crtend.o%s} \
+  %{shared:crtendS.o%s} crtn.o%s"
+
+#endif
+
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#undef LIB_SPEC
+#define LIB_SPEC  \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{pg|p|profile:-lgmon -u profil --defsym __profil=profil} -lc}"
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+#else
+#undef LIB_SPEC
+/* -lc_p not present for arc-elf32-* : ashwin */
+#define LIB_SPEC "%{!shared:%{g*:-lg} %{pg|p:-lgmon} -lc}"
+#endif
+
+#ifndef DRIVER_ENDIAN_SELF_SPECS
+#define DRIVER_ENDIAN_SELF_SPECS ""
+#endif
+#ifndef TARGET_SDATA_DEFAULT
+#define TARGET_SDATA_DEFAULT 1
+#endif
+#ifndef TARGET_MMEDIUM_CALLS_DEFAULT
+#define TARGET_MMEDIUM_CALLS_DEFAULT 0
+#endif
+
+#define DRIVER_SELF_SPECS DRIVER_ENDIAN_SELF_SPECS \
+  "%{mARC5|mA5: -mcpu=A5 %<mARC5 %<mA5}" \
+  "%{mARC600|mA6: -mcpu=ARC600 %<mARC600 %<mA6}" \
+  "%{mARC601: -mcpu=ARC601 %<mARC601}" \
+  "%{mARC700|mA7: -mcpu=ARC700 %<mARC700 %<mA7}" \
+  "%{mbarrel_shifte*: -mbarrel-shifte%* %<mbarrel_shifte*}" \
+  "%{mEA: -mea %<mEA}" \
+  "%{mspfp_*: -mspfp-%* %<mspfp_*}" \
+  "%{mdpfp_*: -mdpfp-%* %<mdpfp_*}" \
+  "%{mdsp_pack*: -mdsp-pack%* %<mdsp_pack*}" \
+  "%{mmac_*: -mmac-%* %<mmac_*}" \
+  "%{multcost=*: -mmultcost=%* %<multcost=*}"
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+#define TARGET_MIXED_CODE (TARGET_MIXED_CODE_SET)
+
+#define TARGET_SPFP (TARGET_SPFP_FAST_SET || TARGET_SPFP_COMPACT_SET)
+#define TARGET_DPFP (TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET)
+
+#define SUBTARGET_SWITCHES
+
+/* Instruction set characteristics.
+   These are internal macros, set by the appropriate -m option.  */
+
+/* Non-zero means the cpu supports norm instruction.  This flag is set by
+   default for A7, and only for pre A7 cores when -mnorm is given.  */
+#define TARGET_NORM (TARGET_ARC700 || TARGET_NORM_SET)
+/* Indicate if an optimized floating point emulation library is available.  */
+#define TARGET_OPTFPE \
+ (TARGET_ARC700 \
+  /* We need a barrel shifter and NORM.  */ \
+  || (TARGET_ARC600 && TARGET_NORM_SET))
+
+/* Non-zero means the cpu supports swap instruction.  This flag is set by
+   default for A7, and only for pre A7 cores when -mswap is given.  */
+#define TARGET_SWAP (TARGET_ARC700 || TARGET_SWAP_SET)
+
+/* Provide some macros for size / scheduling features of the ARC700, so
+   that we can pick & choose features if we get a new cpu family member.  */
+
+/* Should we try to unalign likely taken branches without a delay slot.  */
+#define TARGET_UNALIGN_BRANCH (TARGET_ARC700 && !optimize_size)
+
+/* Should we upsize short delayed branches with a short delay insn?  */
+#define TARGET_UPSIZE_DBR (TARGET_ARC700 && !optimize_size)
+
+/* Should we add padding before a return insn to avoid mispredict?  */
+#define TARGET_PAD_RETURN (TARGET_ARC700 && !optimize_size)
+
+/* For an anulled-true delay slot insn for a delayed branch, should we only
+   use conditional execution?  */
+#define TARGET_AT_DBR_CONDEXEC  (!TARGET_ARC700)
+
+#define TARGET_A5 (arc_cpu == PROCESSOR_A5)
+#define TARGET_ARC600 (arc_cpu == PROCESSOR_ARC600)
+#define TARGET_ARC601 (arc_cpu == PROCESSOR_ARC601)
+#define TARGET_ARC700 (arc_cpu == PROCESSOR_ARC700)
+
+/* Recast the cpu class to be the cpu attribute.  */
+#define arc_cpu_attr ((enum attr_cpu)arc_cpu)
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mARC700" }
+#endif
+
+/* Target machine storage layout.  */
+
+/* We want zero_extract to mean the same
+   no matter what the byte endianness is.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+/* Width in bits of a "word", which is the contents of a machine register.
+   Note that this is not necessarily the width of data type `int';
+   if using 16-bit ints on a 68000, this would still be 32.
+   But on a machine with 16-bit registers, this would be 16.  */
+#define BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \
+if (GET_MODE_CLASS (MODE) == MODE_INT		\
+    && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)	\
+{						\
+  (MODE) = SImode;				\
+}
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+/* TOCHECK: Changed from 64 to 32 */
+#define STACK_BOUNDARY 32
+
+/* ALIGN FRAMES on word boundaries.  */
+#define ARC_STACK_ALIGN(LOC) \
+  (((LOC) + STACK_BOUNDARY / BITS_PER_UNIT - 1) & -STACK_BOUNDARY/BITS_PER_UNIT)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bitfield declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way (including applying of
+   `BIGGEST_ALIGNMENT' and `BIGGEST_FIELD_ALIGNMENT' to the
+   alignment) is COMPUTED.  It overrides alignment only if the field
+   alignment has not been set by the `__attribute__ ((aligned (N)))'
+   construct.
+*/
+
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+(TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode \
+ ? MIN ((COMPUTED), 32) : (COMPUTED))
+
+
+
+/* No data type wants to be aligned rounder than this.  */
+/* This is bigger than currently necessary for the ARC.  If 8 byte floats are
+   ever added it's not clear whether they'll need such alignment or not.  For
+   now we assume they will.  We can always relax it if necessary but the
+   reverse isn't true.  */
+/* TOCHECK: Changed from 64 to 32 */
+#define BIGGEST_ALIGNMENT 32
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)             \
+  (TREE_CODE (TYPE) == ARRAY_TYPE               \
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode    \
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && arc_size_opt_level < 3			\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+/* On the ARC the lower address bits are masked to 0 as necessary.  The chip
+   won't croak when given an unaligned address, but the insn will still fail
+   to produce the correct result.  */
+#define STRICT_ALIGNMENT 1
+
+/* Layout of source language data types.  */
+
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+#define WCHAR_TYPE "int"
+#define WCHAR_TYPE_SIZE 32
+
+
+/* ashwin : shifted from arc.c:102 */
+#define PROGRAM_COUNTER_REGNO 63
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   Registers 61, 62, and 63 are not really registers and we needn't treat
+   them as such.  We still need a register for the condition code and
+   argument pointer.  */
+
+/* r63 is pc, r64-r127 = simd vregs, r128-r143 = simd dma config regs
+   r144, r145 = lp_start, lp_end
+   and therefore the pseudo registers start from r146. */
+#define FIRST_PSEUDO_REGISTER 146
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   0-28  - general purpose registers
+   29    - ilink1 (interrupt link register)
+   30    - ilink2 (interrupt link register)
+   31    - blink (branch link register)
+   32-59 - reserved for extensions
+   60    - LP_COUNT
+   61    - condition code
+   62    - argument pointer
+   63    - program counter
+
+   FWIW, this is how the 61-63 encodings are used by the hardware:
+   61    - reserved
+   62    - long immediate data indicator
+   63    - PCL (program counter aligned to 32 bit, read-only)
+
+   The general purpose registers are further broken down into:
+
+   0-7   - arguments/results
+   8-12  - call used (r11 - static chain pointer)
+   13-25 - call saved
+   26    - global pointer
+   27    - frame pointer
+   28    - stack pointer
+   29    - ilink1
+   30    - ilink2
+   31    - return address register
+
+   By default, the extension registers are not available.  */
+/* Present implementations only have VR0-VR23 only.  */
+/* ??? FIXME: r27 and r31 should not be fixed registers.  */
+#define FIXED_REGISTERS \
+{ 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 0, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS     \
+{                               \
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1}
+
+/* If defined, an initializer for a vector of integers, containing the
+   numbers of hard registers in the order in which GCC should
+   prefer to use them (from most preferred to least).  */
+#define REG_ALLOC_ORDER \
+{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1,			\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 				\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,		\
+  27, 28, 29, 30, 31, 63}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+((GET_MODE_SIZE (MODE) == 16 \
+  && REGNO >= ARC_FIRST_SIMD_VR_REG && REGNO <= ARC_LAST_SIMD_VR_REG) ? 1 \
+ : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+extern unsigned int arc_hard_regno_mode_ok[];
+extern unsigned int arc_mode_class[];
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+((arc_hard_regno_mode_ok[REGNO] & arc_mode_class[MODE]) != 0)
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero.  */
+
+/* Tie QI/HI/SI modes together.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+(GET_MODE_CLASS (MODE1) == MODE_INT		\
+ && GET_MODE_CLASS (MODE2) == MODE_INT		\
+ && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+ && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register for compact insns (r0-r3,r12-r15), or
+   stack pointer (r28).  */
+
+#define COMPACT_GP_REG_P(REGNO) \
+   (((signed)(REGNO) >= 0 && (REGNO) <= 3) || ((REGNO) >= 12 && (REGNO) <= 15))
+#define SP_REG_P(REGNO)  ((REGNO) == 28)
+
+
+
+/* Register classes and constants.  */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It is important that any condition codes have class NO_REGS.
+   See `register_operand'.  */
+
+enum reg_class
+{
+   NO_REGS,
+   R0_REGS,			/* 'x' */
+   GP_REG,			/* 'Rgp' */
+   FP_REG,			/* 'f' */
+   SP_REGS,			/* 'b' */
+   LPCOUNT_REG, 		/* 'l' */
+   LINK_REGS,	 		/* 'k' */
+   DOUBLE_REGS,			/* D0, D1 */
+   SIMD_VR_REGS,		/* VR00-VR63 */
+   SIMD_DMA_CONFIG_REGS,	/* DI0-DI7,DO0-DO7 */
+   ARCOMPACT16_REGS,		/* 'q' */
+   AC16_BASE_REGS,  		/* 'e' */
+   SIBCALL_REGS,		/* "Rsc" */
+   GENERAL_REGS,		/* 'r' */
+   MPY_WRITABLE_CORE_REGS,	/* 'W' */
+   WRITABLE_CORE_REGS,		/* 'w' */
+   CHEAP_CORE_REGS,		/* 'c' */
+   ALL_CORE_REGS,		/* 'Rac' */
+   ALL_REGS,
+   LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.   */
+#define REG_CLASS_NAMES	  \
+{                         \
+  "NO_REGS",           	  \
+  "R0_REGS",            	  \
+  "GP_REG",            	  \
+  "FP_REG",            	  \
+  "SP_REGS",		  \
+  "LPCOUNT_REG",	  \
+  "LINK_REGS",         	  \
+  "DOUBLE_REGS",          \
+  "SIMD_VR_REGS",         \
+  "SIMD_DMA_CONFIG_REGS", \
+  "ARCOMPACT16_REGS",  	  \
+  "AC16_BASE_REGS",       \
+  "SIBCALL_REGS",	  \
+  "GENERAL_REGS",      	  \
+  "MPY_WRITABLE_CORE_REGS",   \
+  "WRITABLE_CORE_REGS",   \
+  "CHEAP_CORE_REGS",	  \
+  "ALL_CORE_REGS",	  \
+  "ALL_REGS"          	  \
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS \
+{													\
+  {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},	     /* No Registers */			\
+  {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'x', r0 register , r0 */	\
+  {0x04000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'Rgp', Global Pointer, r26 */	\
+  {0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'f', Frame Pointer, r27 */	\
+  {0x10000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'b', Stack Pointer, r28 */	\
+  {0x00000000, 0x10000000, 0x00000000, 0x00000000, 0x00000000},      /* 'l', LPCOUNT Register, r60 */	\
+  {0xe0000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'k', LINK Registers, r29-r31 */	\
+  {0x00000000, 0x00000f00, 0x00000000, 0x00000000, 0x00000000},      /* 'D', D1, D2 Registers */	\
+  {0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000},      /* 'V', VR00-VR63 Registers */	\
+  {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ffff},      /* 'V', DI0-7,DO0-7 Registers */	\
+  {0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000},	     /* 'q', r0-r3, r12-r15 */		\
+  {0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000},	     /* 'e', r0-r3, r12-r15, sp */	\
+  {0x1c001fff, 0x00000000, 0x00000000, 0x00000000, 0x00000000},    /* "Rsc", r0-r12 */ \
+  {0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000},      /* 'r', r0-r28, blink, ap and pcl */	\
+  {0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'W',  r0-r31 */ \
+  /* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry.  As these \
+     registers are fixed, it does not affect the literal meaning of the \
+     constraints, but it makes it a superset of GENERAL_REGS, thus \
+     enabling some operations that would otherwise not be possible.  */ \
+  {0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000},      /* 'w', r0-r31, r60 */ \
+  {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000},      /* 'c', r0-r60, ap, pcl */ \
+  {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000},      /* 'Rac', r0-r60, ap, pcl */ \
+  {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff}       /* All Registers */		\
+}
+
+/* Local macros to mark the first and last regs of different classes.  */
+#define ARC_FIRST_SIMD_VR_REG              64
+#define ARC_LAST_SIMD_VR_REG               127
+
+#define ARC_FIRST_SIMD_DMA_CONFIG_REG      128
+#define ARC_FIRST_SIMD_DMA_CONFIG_IN_REG   128
+#define ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG  136
+#define ARC_LAST_SIMD_DMA_CONFIG_REG       143
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern enum reg_class arc_regno_reg_class[];
+
+#define REGNO_REG_CLASS(REGNO) (arc_regno_reg_class[REGNO])
+
+/* The class value for valid index registers. An index register is
+   one used in an address where its value is either multiplied by
+   a scale factor or added to another register (as well as added to a
+   displacement).  */
+
+#define INDEX_REG_CLASS (TARGET_MIXED_CODE ? ARCOMPACT16_REGS : GENERAL_REGS)
+
+/* The class value for valid base registers. A base register is one used in
+   an address which is the register value plus a displacement.  */
+
+#define BASE_REG_CLASS (TARGET_MIXED_CODE ? AC16_BASE_REGS : GENERAL_REGS)
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+((REGNO) < 29 || ((REGNO) == ARG_POINTER_REGNUM) || ((REGNO) == 63) ||\
+ (unsigned) reg_renumber[REGNO] < 29)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+  arc_preferred_reload_class((X), (CLASS))
+
+  extern enum reg_class arc_preferred_reload_class (rtx, enum reg_class);
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+(( GET_MODE_SIZE (MODE) == 16 && CLASS == SIMD_VR_REGS) ? 1: \
+((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+#define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200)
+#define SMALL_INT_RANGE(X, OFFSET, SHIFT) \
+  ((unsigned) (((X) >> (SHIFT)) + 0x100) \
+   < 0x200 - ((unsigned) (OFFSET) >> (SHIFT)))
+#define SIGNED_INT12(X) ((unsigned) ((X) + 0x800) < 0x1000)
+#define LARGE_INT(X) \
+(((X) < 0) \
+ ? (X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \
+ : (unsigned HOST_WIDE_INT) (X) <= (unsigned HOST_WIDE_INT) 0xffffffff)
+#define UNSIGNED_INT3(X) ((unsigned) (X) < 0x8)
+#define UNSIGNED_INT5(X) ((unsigned) (X) < 0x20)
+#define UNSIGNED_INT6(X) ((unsigned) (X) < 0x40)
+#define UNSIGNED_INT7(X) ((unsigned) (X) < 0x80)
+#define UNSIGNED_INT8(X) ((unsigned) (X) < 0x100)
+#define IS_ONE(X) ((X) == 1)
+#define IS_ZERO(X) ((X) == 0)
+
+/* Stack layout and stack pointer usage.  */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET (0)
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) (0)
+
+/* A C expression whose value is RTL representing the address in a
+   stack frame where the pointer to the caller's frame is stored.
+   Assume that FRAMEADDR is an RTL expression for the address of the
+   stack frame itself.
+
+   If you don't define this macro, the default is to return the value
+   of FRAMEADDR--that is, the stack frame address is also the address
+   of the stack word that points to the previous frame.  */
+/* ??? unfinished */
+/*define DYNAMIC_CHAIN_ADDRESS (FRAMEADDR)*/
+
+/* A C expression whose value is RTL representing the value of the
+   return address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is the frame pointer of the COUNT frame, or the frame
+   pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME'
+   is defined.  */
+/* The current return address is in r31.  The return address of anything
+   farther back is at [%fp,4].  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+arc_return_addr_rtx(COUNT,FRAME)
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 28
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 27
+
+/* Base register for access to arguments of the function. This register
+   will be eliminated into either fp or sp.  */
+#define ARG_POINTER_REGNUM 62
+
+#define RETURN_ADDR_REGNUM 31
+
+/* TODO - check usage of STATIC_CHAIN_REGNUM with a testcase */
+/* Register in which static-chain is passed to a function.  This must
+   not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM  11
+
+/* Function argument passing.  */
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
+((CUM) = 0)
+
+/* The number of registers used for parameter passing.  Local to this file.  */
+#define MAX_ARC_PARM_REGS 8
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) \
+((unsigned) (N) < MAX_ARC_PARM_REGS)
+
+/* The ROUND_ADVANCE* macros are local to this file.  */
+/* Round SIZE up to a word boundary.  */
+#define ROUND_ADVANCE(SIZE) \
+(((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round arg MODE/TYPE up to the next word boundary.  */
+#define ROUND_ADVANCE_ARG(MODE, TYPE) \
+((MODE) == BLKmode				\
+ ? ROUND_ADVANCE (int_size_in_bytes (TYPE))	\
+ : ROUND_ADVANCE (GET_MODE_SIZE (MODE)))
+
+#define ARC_FUNCTION_ARG_BOUNDARY(MODE,TYPE) PARM_BOUNDARY
+/* Round CUM up to the necessary point for argument MODE/TYPE.  */
+/* N.B. Vectors have alignment exceeding BIGGEST_ALIGNMENT.
+   ARC_FUNCTION_ARG_BOUNDARY reduces this to no more than 32 bit.  */
+#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \
+  ((((CUM) - 1) | (ARC_FUNCTION_ARG_BOUNDARY ((MODE), (TYPE)) - 1)/BITS_PER_WORD)\
+   + 1)
+
+/* Return boolean indicating arg of type TYPE and mode MODE will be passed in
+   a reg.  This includes arguments that have to be passed by reference as the
+   pointer to them is passed in a reg if one is available (and that is what
+   we're given).
+   When passing arguments NAMED is always 1.  When receiving arguments NAMED
+   is 1 for each argument except the last in a stdarg/varargs function.  In
+   a stdarg function we want to treat the last named arg as named.  In a
+   varargs function we want to treat the last named arg (which is
+   `__builtin_va_alist') as unnamed.
+   This macro is only used in this file.  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+((CUM) < MAX_ARC_PARM_REGS)
+
+
+/* Function results.  */
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, 0)
+
+/* 1 if N is a possible register number for a function value
+   as seen by the caller.  */
+/* ??? What about r1 in DI/DF values.  */
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0)
+
+/* Tell GCC to use RETURN_IN_MEMORY.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Register in which address to store a structure value
+   is passed to a function, or 0 to use `invisible' first argument.  */
+#define STRUCT_VALUE 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 0
+
+#define EPILOGUE_USES(REGNO) arc_epilogue_uses ((REGNO))
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have two registers that can be eliminated on the ARC.  First, the
+   argument pointer register can always be eliminated in favor of the stack
+   pointer register or frame pointer register.  Secondly, the frame pointer
+   register can often be eliminated in favor of the stack pointer register.
+*/
+
+#define ELIMINABLE_REGS					\
+{{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},		\
+ {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+extern int arc_initial_elimination_offset(int from, int to);
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)                    \
+  (OFFSET) = arc_initial_elimination_offset ((FROM), (TO))
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.
+   We actually emit the profiler code at the call site, so leave this one
+   empty.  */
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  if (TARGET_UCB_MCOUNT) \
+    fprintf (FILE, "\t%s\n", arc_output_libcall ("__mcount"))
+
+#define NO_PROFILE_COUNTERS  1
+
+/* Trampolines.  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE 20
+
+/* Alignment required for a trampoline in bits .  */
+/* For actual data alignment we just need 32, no more than the stack;
+   however, to reduce cache coherency issues, we want to make sure that
+   trampoline instructions always appear the same in any given cache line.  */
+#define TRAMPOLINE_ALIGNMENT 256
+
+/* Library calls.  */
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+/* The `ld' insn allows 2, but the `st' insn only allows 1.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* We have pre inc/dec (load/store with update).  */
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_PRE_MODIFY_REG 1
+#define HAVE_POST_MODIFY_REG 1
+/* ??? should also do PRE_MODIFY_REG / POST_MODIFY_REG, but that requires
+   a special predicate for the memory operand of stores, like for the SH.  */
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) \
+(flag_pic?arc_legitimate_pic_addr_p (X): \
+(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
+ || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST))
+
+/* Is the argument a const_int rtx, containing an exact power of 2 */
+#define  IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The *_NONSTRICT definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P_NONSTRICT(X) \
+((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \
+ (unsigned) REGNO (X) < 29 || \
+ (unsigned) REGNO (X) == 63 || \
+ (unsigned) REGNO (X) == ARG_POINTER_REGNUM)
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P_NONSTRICT(X) \
+((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \
+ (unsigned) REGNO (X) < 29 || \
+ (unsigned) REGNO (X) == 63 || \
+ (unsigned) REGNO (X) == ARG_POINTER_REGNUM)
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P_STRICT(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P_STRICT(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.  */
+/* The `ld' insn allows [reg],[reg+shimm],[reg+limm],[reg+reg],[limm]
+   but the `st' insn only allows [reg],[reg+shimm],[limm].
+   The only thing we can do is only allow the most strict case `st' and hope
+   other parts optimize out the restrictions for `ld'.  */
+
+#define RTX_OK_FOR_BASE_P(X, STRICT) \
+(REG_P (X) \
+ && ((STRICT) ? REG_OK_FOR_BASE_P_STRICT (X) : REG_OK_FOR_BASE_P_NONSTRICT (X)))
+
+#define RTX_OK_FOR_INDEX_P(X, STRICT) \
+(REG_P (X) \
+ && ((STRICT) ? REG_OK_FOR_INDEX_P_STRICT (X) : REG_OK_FOR_INDEX_P_NONSTRICT (X)))
+
+/* A C compound statement that attempts to replace X, which is an address
+   that needs reloading, with a valid memory address for an operand of
+   mode MODE.  WIN is a C statement label elsewhere in the code.
+
+   We try to get a normal form
+   of the address.  That will allow inheritance of the address reloads.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	\
+  do {									\
+    if (arc_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE)))	\
+      goto WIN;								\
+  } while (0)
+
+/* Reading lp_count for anything but the lp instruction is very slow on the
+   ARC700.  */
+#define DONT_REALLOC(REGNO,MODE) \
+  (TARGET_ARC700 && (REGNO) == 60)
+
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+/*extern enum machine_mode arc_select_cc_mode ();*/
+#define SELECT_CC_MODE(OP, X, Y) \
+arc_select_cc_mode (OP, X, Y)
+
+/* Return non-zero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/
+
+/* Costs.  */
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+#define REGISTER_MOVE_COST(MODE, CLASS, TO_CLASS) \
+   arc_register_move_cost ((MODE), (CLASS), (TO_CLASS))
+
+/* Compute the cost of moving data between registers and memory.  */
+/* Memory is 3 times as expensive as registers.
+   ??? Is that the right way to look at it?  */
+#define MEMORY_MOVE_COST(MODE,CLASS,IN) \
+(GET_MODE_SIZE (MODE) <= UNITS_PER_WORD ? 6 : 12)
+
+/* The cost of a branch insn.  */
+/* ??? What's the right value here?  Branches are certainly more
+   expensive than reg->reg moves.  */
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+/* Scc sets the destination to 1 and then conditionally zeroes it.
+   Best case, ORed SCCs can be made into clear - condset - condset.
+   But it could also end up as five insns.  So say it costs four on
+   average.
+   These extra instructions - and the second comparison - will also be
+   an extra cost if the first comparison would have been decisive.
+   So get an average saving, with a probability of the first branch
+   beging decisive of p0, we want:
+   p0 * (branch_cost - 4) > (1 - p0) * 5
+   ??? We don't get to see that probability to evaluate, so we can
+   only wildly guess that it might be 50%.
+   ??? The compiler also lacks the notion of branch predictability.  */
+#define LOGICAL_OP_NON_SHORT_CIRCUIT \
+  (BRANCH_COST (optimize_function_for_speed_p (cfun), \
+		false) > 9)
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS  0
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+/* On the ARC, calling through registers is slow.  */
+#define NO_FUNCTION_CSE
+
+/* Section selection.  */
+/* WARNING: These section names also appear in dwarfout.c.  */
+
+#define TEXT_SECTION_ASM_OP	"\t.section\t.text"
+#define DATA_SECTION_ASM_OP	"\t.section\t.data"
+
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#define SDATA_SECTION_ASM_OP	"\t.section\t.sdata"
+#define SBSS_SECTION_ASM_OP	"\t.section\t.sbss"
+
+/* Expression whose value is a string, including spacing, containing the
+   assembler operation to identify the following data as initialization/termination
+   code. If not defined, GCC will assume such a section does not exist. */
+#define INIT_SECTION_ASM_OP "\t.section\t.init"
+#define FINI_SECTION_ASM_OP "\t.section\t.fini"
+
+/* Define this macro if jump tables (for tablejump insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.
+   This macro is irrelevant if there is no separate readonly data section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION  (flag_pic || CASE_VECTOR_PC_RELATIVE)
+
+/* For DWARF.  Marginally different than default so output is "prettier"
+   (and consistent with above).  */
+#define PUSHSECTION_FORMAT "\t%s %s\n"
+
+/* Tell crtstuff.c we're using ELF.  */
+#define OBJECT_FORMAT_ELF
+
+/* PIC */
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  In some cases this register is defined by a
+   processor's ``application binary interface'' (ABI).  When this macro
+   is defined, RTL is generated for this register once, as with the stack
+   pointer and frame pointer registers.  If this macro is not defined, it
+   is up to the machine-dependent files to allocate such a register (if
+   necessary).  */
+#define PIC_OFFSET_TABLE_REGNUM 26
+
+/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is
+   clobbered by calls.  Do not define this macro if PIC_OFFSET_TABLE_REGNUM
+   is not defined.  */
+/* This register is call-saved on the ARC.  */
+/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent code.
+   You can assume that X satisfies CONSTANT_P, so you need not
+   check this.  You can also assume `flag_pic' is true, so you need not
+   check it either.  You need not define this macro if all constants
+   (including SYMBOL_REF) can be immediate operands when generating
+   position independent code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)  (arc_legitimate_pic_operand_p(X))
+
+/* PIC and small data don't mix on ARC because they use the same register.  */
+#define SDATA_BASE_REGNUM 26
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  (flag_pic \
+   ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \
+   : DW_EH_PE_absptr)
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will
+   end at the end of the line.  */
+/* Gas needs this to be "#" in order to recognize line directives.  */
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF ""
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* This is how to output an assembler line defining a `char' constant.  */
+#define ASM_OUTPUT_CHAR(FILE, VALUE) \
+( fprintf (FILE, "\t.byte\t"),			\
+  output_addr_const (FILE, (VALUE)),		\
+  fprintf (FILE, "\n"))
+
+/* This is how to output an assembler line defining a `short' constant.  */
+#define ASM_OUTPUT_SHORT(FILE, VALUE) \
+( fprintf (FILE, "\t.hword\t"),			\
+  output_addr_const (FILE, (VALUE)),		\
+  fprintf (FILE, "\n"))
+
+/* This is how to output an assembler line defining an `int' constant.
+   We also handle symbol output here.  Code addresses must be right shifted
+   by 2 because that's how the jump instruction wants them.  */
+#define ASM_OUTPUT_INT(FILE, VALUE) \
+do {									\
+  fprintf (FILE, "\t.word\t");						\
+  if (GET_CODE (VALUE) == LABEL_REF)					\
+    {									\
+      fprintf (FILE, "%%st(@");						\
+      output_addr_const (FILE, (VALUE));				\
+      fprintf (FILE, ")");						\
+    }									\
+  else									\
+    output_addr_const (FILE, (VALUE));					\
+  fprintf (FILE, "\n");					                \
+} while (0)
+
+/* This is how to output an assembler line defining a `float' constant.  */
+#define ASM_OUTPUT_FLOAT(FILE, VALUE) \
+{							\
+  long t;						\
+  char str[30];						\
+  REAL_VALUE_TO_TARGET_SINGLE ((VALUE), t);		\
+  REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str);	\
+  fprintf (FILE, "\t.word\t0x%lx %s %s\n",		\
+	   t, ASM_COMMENT_START, str);			\
+}
+
+/* This is how to output an assembler line defining a `double' constant.  */
+#define ASM_OUTPUT_DOUBLE(FILE, VALUE) \
+{							\
+  long t[2];						\
+  char str[30];						\
+  REAL_VALUE_TO_TARGET_DOUBLE ((VALUE), t);		\
+  REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str);	\
+  fprintf (FILE, "\t.word\t0x%lx %s %s\n\t.word\t0x%lx\n", \
+	   t[0], ASM_COMMENT_START, str, t[1]);		\
+}
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+#define ASM_OUTPUT_LABEL(FILE, NAME) \
+do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_NAME_P(NAME) ( NAME[0]=='*')
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+/* We work around a dwarfout.c deficiency by watching for labels from it and
+   not adding the '_' prefix.  There is a comment in
+   dwarfout.c that says it should be using ASM_OUTPUT_INTERNAL_LABEL.  */
+#define ASM_OUTPUT_LABELREF(FILE, NAME1) \
+do {							\
+  const char *NAME;					\
+  NAME = (*targetm.strip_name_encoding)(NAME1);		\
+  if ((NAME)[0] == '.' && (NAME)[1] == 'L')		\
+    fprintf (FILE, "%s", NAME);				\
+  else							\
+    {							\
+      if (!ASM_NAME_P (NAME1))				\
+	fprintf (FILE, "%s", user_label_prefix);	\
+      fprintf (FILE, "%s", NAME);			\
+    }							\
+} while (0)
+
+/* This is how to output a reference to a symbol_ref / label_ref as
+   (part of) an operand.  To disambiguate from register names like
+   a1 / a2 / status etc, symbols are preceded by '@'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \
+  ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0))
+#define ASM_OUTPUT_LABEL_REF(FILE,STR)			\
+  do							\
+    {							\
+      fputc ('@', file);				\
+      assemble_name ((FILE), (STR));			\
+    }							\
+  while (0)
+
+/* Store in OUTPUT a string (made with alloca) containing
+   an assembler-name for a local static variable named NAME.
+   LABELNO is an integer which is different for each call.  */
+#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO) \
+( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10),	\
+  sprintf ((OUTPUT), "%s.%d", (NAME), (LABELNO)))
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#undef  TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+
+/*  A C string containing the appropriate assembler directive to
+    specify the size of a symbol, without any arguments.  On systems
+    that use ELF, the default (in `config/elfos.h') is `"\t.size\t"';
+    on other systems, the default is not to define this macro.  */
+#undef SIZE_ASM_OP
+#define SIZE_ASM_OP "\t.size\t"
+
+/* Assembler pseudo-op to equate one value with another.  */
+/* ??? This is needed because dwarfout.c provides a default definition too
+   late for defaults.h (which contains the default definition of ASM_OTPUT_DEF
+   that we use).  */
+#ifdef SET_ASM_OP
+#undef SET_ASM_OP
+#endif
+#define SET_ASM_OP "\t.set\t"
+
+extern char rname56[], rname57[], rname58[], rname59[];
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES								\
+{  "r0",   "r1",   "r2",   "r3",       "r4",     "r5",     "r6",    "r7",	\
+   "r8",   "r9",  "r10",  "r11",      "r12",    "r13",    "r14",   "r15",	\
+  "r16",  "r17",  "r18",  "r19",      "r20",    "r21",    "r22",   "r23",	\
+  "r24",  "r25",   "gp",   "fp",       "sp", "ilink1", "ilink2", "blink",	\
+  "r32",  "r33",  "r34",  "r35",      "r36",    "r37",    "r38",   "r39",	\
+   "d1",   "d1",   "d2",   "d2",      "r44",    "r45",    "r46",   "r47",	\
+  "r48",  "r49",  "r50",  "r51",      "r52",    "r53",    "r54",   "r55",	\
+  rname56,rname57,rname58,rname59,"lp_count",    "cc",     "ap",   "pcl",	\
+  "vr0",  "vr1",  "vr2",  "vr3",      "vr4",    "vr5",    "vr6",   "vr7",       \
+  "vr8",  "vr9", "vr10", "vr11",     "vr12",   "vr13",   "vr14",  "vr15",	\
+ "vr16", "vr17", "vr18", "vr19",     "vr20",   "vr21",   "vr22",  "vr23",	\
+ "vr24", "vr25", "vr26", "vr27",     "vr28",   "vr29",   "vr30",  "vr31",	\
+ "vr32", "vr33", "vr34", "vr35",     "vr36",   "vr37",   "vr38",  "vr39",	\
+ "vr40", "vr41", "vr42", "vr43",     "vr44",   "vr45",   "vr46",  "vr47",	\
+ "vr48", "vr49", "vr50", "vr51",     "vr52",   "vr53",   "vr54",  "vr55",	\
+ "vr56", "vr57", "vr58", "vr59",     "vr60",   "vr61",   "vr62",  "vr63",	\
+  "dr0",  "dr1",  "dr2",  "dr3",      "dr4",    "dr5",    "dr6",   "dr7",	\
+  "dr0",  "dr1",  "dr2",  "dr3",      "dr4",    "dr5",    "dr6",   "dr7",	\
+  "lp_start", "lp_end" \
+}
+
+/* Entry to the insn conditionalizer.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  arc_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+/* A C expression which evaluates to true if CODE is a valid
+   punctuation character for use in the `PRINT_OPERAND' macro.  */
+extern char arc_punct_chars[];
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+arc_punct_chars[(unsigned char) (CHAR)]
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+#define PRINT_OPERAND(FILE, X, CODE) \
+arc_print_operand (FILE, X, CODE)
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   On some machines, the syntax for a symbolic address depends on
+   the section that the address refers to.  On these machines,
+   define the macro `ENCODE_SECTION_INFO' to store the information
+   into the `symbol_ref', and then check for it here.  */
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+arc_print_operand_address (FILE, ADDR)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+do {							\
+  char label[30];					\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+  fprintf (FILE, "\t.word ");				\
+  assemble_name (FILE, label);				\
+  fprintf(FILE, "\n");					\
+} while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+do {							\
+  char label[30];					\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+  switch (GET_MODE (BODY))				\
+    {							\
+    case QImode: fprintf (FILE, "\t.byte "); break;	\
+    case HImode: fprintf (FILE, "\t.hword "); break;	\
+    case SImode: fprintf (FILE, "\t.word "); break;	\
+    default: gcc_unreachable ();			\
+    }							\
+  assemble_name (FILE, label);				\
+  fprintf (FILE, "-");					\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);	\
+  assemble_name (FILE, label);				\
+  if (TARGET_COMPACT_CASESI)				\
+    fprintf (FILE, " + %d", 4 + arc_get_unalign ());	\
+  fprintf(FILE, "\n");                                  \
+} while (0)
+
+/* ADDR_DIFF_VECs are in the text section and thus can affect the
+   current alignment.  */
+#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE)       \
+  do                                                    \
+    {                                                   \
+      if (GET_CODE (PATTERN (JUMPTABLE)) == ADDR_DIFF_VEC \
+	  && ((GET_MODE_SIZE (GET_MODE (PATTERN (JUMPTABLE))) \
+	       * XVECLEN (PATTERN (JUMPTABLE), 1) + 1)	\
+	      & 2))					\
+      arc_toggle_unalign ();				\
+    }                                                   \
+  while (0)
+
+#define JUMP_ALIGN(LABEL) (arc_size_opt_level < 2 ? 2 : 0)
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL) \
+  (JUMP_ALIGN(LABEL) \
+   ? JUMP_ALIGN(LABEL) \
+   : GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \
+   ? 1 : 0)
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+/* On the ARC, align loops to 4 byte boundaries unless doing all-out size
+   optimization.  */
+#define LOOP_ALIGN JUMP_ALIGN
+
+#define LABEL_ALIGN(LABEL) (arc_label_align (LABEL))
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+do { \
+  if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); \
+  if ((LOG)  > 1) \
+    arc_clear_unalign (); \
+} while (0)
+
+/*  ASM_OUTPUT_ALIGNED_DECL_LOCAL (STREAM, DECL, NAME, SIZE, ALIGNMENT)
+    Define this macro when you need to see the variable's decl in order to
+    chose what to output.  */
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+  arc_asm_output_aligned_decl_local (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+/* To translate the return value of arc_function_type into a register number
+   to jump through for function return.  */
+extern int arc_return_address_regs[4];
+
+/* Debugging information.  */
+
+/* Generate DBX and DWARF debugging information.  */
+#ifdef DBX_DEBUGGING_INFO
+#undef DBX_DEBUGGING_INFO
+#endif
+#define DBX_DEBUGGING_INFO
+
+#ifdef DWARF2_DEBUGGING_INFO
+#undef DWARF2_DEBUGGING_INFO
+#endif
+#define DWARF2_DEBUGGING_INFO
+
+/* Prefer STABS (for now).  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* How to renumber registers for dbx and gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((TARGET_MULMAC_32BY16_SET && (REGNO) >= 56 && (REGNO) <= 57) \
+   ? ((REGNO) ^ !TARGET_BIG_ENDIAN) \
+   : (TARGET_MUL64_SET && (REGNO) >= 57 && (REGNO) <= 59) \
+   ? ((REGNO) == 57 \
+      ? 58 /* MMED */ \
+      : ((REGNO) & 1) ^ TARGET_BIG_ENDIAN \
+      ? 59 /* MHI */ \
+      : 57 + !!TARGET_MULMAC_32BY16_SET) /* MLO */ \
+   : (REGNO))
+
+#define DWARF_FRAME_REGNUM(REG) (REG)
+
+#define DWARF_FRAME_RETURN_COLUMN 	DWARF_FRAME_REGNUM (31)
+
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, 31)
+
+/* Frame info.  */
+
+/* Define this macro to 0 if your target supports DWARF 2 frame unwind
+   information, but it does not yet work with exception handling.  */
+/* N.B. the below test is valid in an #if, but not in a C expression.  */
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define DWARF2_UNWIND_INFO 1
+#else
+#define DWARF2_UNWIND_INFO 0
+#endif
+
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 4 ? (N) : INVALID_REGNUM)
+
+/* Turn off splitting of long stabs.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Miscellaneous.  */
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.
+   If we have pc relative case vectors, we start the case vector shortening
+   with QImode.  */
+#define CASE_VECTOR_MODE \
+  ((optimize && (CASE_VECTOR_PC_RELATIVE || flag_pic)) ? QImode : Pmode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE TARGET_CASE_VECTOR_PC_RELATIVE
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+  CASE_VECTOR_SHORTEN_MODE_1 \
+    (MIN_OFFSET, TARGET_COMPACT_CASESI ? MAX_OFFSET + 6 : MAX_OFFSET, BODY)
+
+#define CASE_VECTOR_SHORTEN_MODE_1(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -128 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 65535 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, HImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, HImode) \
+ : SImode)
+
+#define ADDR_VEC_ALIGN(VEC_INSN) \
+  (exact_log2 (GET_MODE_SIZE (GET_MODE (PATTERN (VEC_INSN)))))
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), ADDR_VEC_ALIGN (TABLE));
+
+#define INSN_LENGTH_ALIGNMENT(INSN) \
+  ((JUMP_P (INSN) \
+    && GET_CODE (PATTERN (INSN)) == ADDR_DIFF_VEC \
+    && GET_MODE (PATTERN (INSN)) == QImode) \
+   ? 0 : length_unit_log)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, NIL if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Let the movmem expander handle small block moves.  */
+#define MOVE_BY_PIECES_P(LEN, ALIGN)  0
+#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (unsigned int) MOVE_RATIO (!optimize_size))
+
+/* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P .  */
+#define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3)
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits. Changed from 1 to 0 for rotate pattern testcases
+   (e.g. 20020226-1.c). This change truncates the upper 27 bits of a word
+   while rotating a word. Came to notice through a combine phase
+   optimization viz. a << (32-b) is equivalent to a << (-b).
+*/
+#define SHIFT_COUNT_TRUNCATED 0
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* We assume that the store-condition-codes instructions store 0 for false
+   and some other value for true.  This is the value stored for true.  */
+#define STORE_FLAG_VALUE 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+/* ARCompact has full 32-bit pointers.  */
+#define Pmode SImode
+
+/* A function address in a call instruction.  */
+#define FUNCTION_MODE SImode
+
+/* Define the information needed to generate branch and scc insns.  This is
+   stored from the compare operation.  Note that we can't use "rtx" here
+   since it hasn't been defined!  */
+extern struct rtx_def *arc_compare_op0, *arc_compare_op1;
+
+/* ARC function types.   */
+enum arc_function_type {
+  ARC_FUNCTION_UNKNOWN, ARC_FUNCTION_NORMAL,
+  /* These are interrupt handlers.  The name corresponds to the register
+     name that contains the return address.  */
+  ARC_FUNCTION_ILINK1, ARC_FUNCTION_ILINK2
+};
+#define ARC_INTERRUPT_P(TYPE) \
+((TYPE) == ARC_FUNCTION_ILINK1 || (TYPE) == ARC_FUNCTION_ILINK2)
+
+/* Compute the type of a function from its DECL.  Needed for EPILOGUE_USES.  */
+struct function;
+extern enum arc_function_type arc_compute_function_type (struct function *);
+
+/* Called by crtstuff.c to make calls to function FUNCTION that are defined in
+   SECTION_OP, and then to switch back to text section.  */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\t"				\
+	"bl @" USER_LABEL_PREFIX #FUNC "\n"		\
+	TEXT_SECTION_ASM_OP);
+
+/* This macro expands to the name of the scratch register r12, used for
+   temporary calculations according to the ABI.  */
+#define ARC_TEMP_SCRATCH_REG "r12"
+
+/* The C++ compiler must use one bit to indicate whether the function
+   that will be called through a pointer-to-member-function is
+   virtual.  Normally, we assume that the low-order bit of a function
+   pointer must always be zero.  Then, by ensuring that the
+   vtable_index is odd, we can distinguish which variant of the union
+   is in use.  But, on some platforms function pointers can be odd,
+   and so this doesn't work.  In that case, we use the low-order bit
+   of the `delta' field, and shift the remainder of the `delta' field
+   to the left. We needed to do this for A4 because the address was always
+   shifted and thus could be odd.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION \
+  (ptrmemfunc_vbit_in_pfn)
+
+#define INSN_SETS_ARE_DELAYED(X)		\
+  (GET_CODE (X) == INSN				\
+   && GET_CODE (PATTERN (X)) != SEQUENCE	\
+   && GET_CODE (PATTERN (X)) != USE		\
+   && GET_CODE (PATTERN (X)) != CLOBBER		\
+   && (get_attr_type (X) == TYPE_CALL || get_attr_type (X) == TYPE_SFUNC))
+
+#define INSN_REFERENCES_ARE_DELAYED(insn) INSN_SETS_ARE_DELAYED (insn)
+
+#define CALL_ATTR(X, NAME) \
+  ((CALL_P (X) || NONJUMP_INSN_P (X)) \
+   && GET_CODE (PATTERN (X)) != USE \
+   && GET_CODE (PATTERN (X)) != CLOBBER \
+   && get_attr_is_##NAME (X) == IS_##NAME##_YES) \
+
+#define REVERSE_CONDITION(CODE,MODE) \
+	(((MODE) == CC_FP_GTmode || (MODE) == CC_FP_GEmode \
+	  || (MODE) == CC_FP_UNEQmode || (MODE) == CC_FP_ORDmode \
+	  || (MODE) == CC_FPXmode) \
+	 ? reverse_condition_maybe_unordered ((CODE)) \
+	 : reverse_condition ((CODE)))
+
+#define ADJUST_INSN_LENGTH(X, LENGTH) \
+  ((LENGTH) \
+   = (GET_CODE (PATTERN (X)) == SEQUENCE \
+      ? ((LENGTH) \
+	 + arc_adjust_insn_length (XVECEXP (PATTERN (X), 0, 0), \
+				   get_attr_length (XVECEXP (PATTERN (X), \
+						    0, 0)), \
+				   true) \
+	 - get_attr_length (XVECEXP (PATTERN (X), 0, 0)) \
+	 + arc_adjust_insn_length (XVECEXP (PATTERN (X), 0, 1), \
+				   get_attr_length (XVECEXP (PATTERN (X), \
+						    0, 1)), \
+				   true) \
+	 - get_attr_length (XVECEXP (PATTERN (X), 0, 1))) \
+      : arc_adjust_insn_length ((X), (LENGTH), false)))
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C,STR) ((C) == '`')
+
+#define INIT_EXPANDERS arc_init_expanders ()
+
+#define CFA_FRAME_BASE_OFFSET(FUNDECL) (-arc_decl_pretend_args ((FUNDECL)))
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (FIRST_PARM_OFFSET (FNDECL) + arc_decl_pretend_args ((FNDECL)))
+
+enum
+{
+  ARC_LRA_PRIORITY_NONE, ARC_LRA_PRIORITY_NONCOMPACT, ARC_LRA_PRIORITY_COMPACT
+};
+
+/* The define_cond_exec construct is rather crude, as we can't have
+   different ones with different conditions apply to different sets
+   of instructions.  We can't use an attribute test inside the condition,
+   because that would lead to infinite recursion as the attribute test
+   needs to recognize the insn.  So, instead we have a clause for
+   the pattern condition of all sfunc patterns which is only relevant for
+   the predicated varaint.  */
+#define SFUNC_CHECK_PREDICABLE \
+  (GET_CODE (PATTERN (insn)) != COND_EXEC || !flag_pic || !TARGET_MEDIUM_CALLS)
+
+#endif /* GCC_ARC_H */
diff --git a/gcc-4.9/gcc/config/arc/arc.md b/gcc-4.9/gcc/config/arc/arc.md
new file mode 100644
index 000000000..80f6e338a
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc.md
@@ -0,0 +1,5165 @@
+;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler
+;; Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+;; Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
+;; behalf of Synopsys Inc.
+
+;;    Position Independent Code support added,Code cleaned up,
+;;    Comments and Support For ARC700 instructions added by
+;;    Saurabh Verma (saurabh.verma@codito.com)
+;;    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
+;;
+;;    Profiling support and performance improvements by
+;;    Joern Rennecke (joern.rennecke@embecosm.com)
+;;
+;;    Support for DSP multiply instructions and mul64
+;;    instructions for ARC600; and improvements in flag setting
+;;    instructions by
+;;    Muhammad Khurram Riaz (Khurram.Riaz@arc.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; <op> dest, src         Two operand instruction's syntax
+;; <op> dest, src1, src2  Three operand instruction's syntax
+
+;; ARC and ARCompact PREDICATES:
+;;
+;;   comparison_operator   LT, GT, LE, GE, LTU, GTU, LEU, GEU, EQ, NE
+;;   memory_operand        memory                         [m]
+;;   immediate_operand     immediate constant             [IKLMNOP]
+;;   register_operand      register                       [rq]
+;;   general_operand       register, memory, constant     [rqmIKLMNOP]
+
+;;  Note that the predicates are only used when selecting a pattern
+;;  to determine if an operand is valid.
+
+;;  The constraints then select which of the possible valid operands
+;;  is present (and guide register selection). The actual assembly
+;;  instruction is then selected on the basis of the constraints.
+
+;; ARC and ARCompact CONSTRAINTS:
+;;
+;;   b  stack pointer                           r28
+;;   f  frame pointer                           r27
+;;   Rgp global pointer                         r26
+;;   g  general reg, memory, constant
+;;   m  memory
+;;   p  memory address
+;;   q  registers commonly used in
+;;      16-bit insns                            r0-r3, r12-r15
+;;   c  core registers				r0-r60, ap, pcl
+;;   r  general registers                       r0-r28, blink, ap, pcl
+;;
+;;   H  fp 16-bit constant
+;;   I signed 12-bit immediate (for ARCompact)
+;;   K  unsigned 3-bit immediate (for ARCompact)
+;;   L  unsigned 6-bit immediate (for ARCompact)
+;;   M  unsinged 5-bit immediate (for ARCompact)
+;;   O  unsinged 7-bit immediate (for ARCompact)
+;;   P  unsinged 8-bit immediate (for ARCompact)
+;;   N  constant '1' (for ARCompact)
+
+
+;; TODO:
+;; -> prefetch instruction
+
+;;  -----------------------------------------------------------------------------
+
+;; Include DFA scheduluers
+(include ("arc600.md"))
+(include ("arc700.md"))
+
+;; Predicates
+
+(include ("predicates.md"))
+(include ("constraints.md"))
+;;  -----------------------------------------------------------------------------
+
+;; UNSPEC Usage:
+;; ~~~~~~~~~~~~
+;;  -----------------------------------------------------------------------------
+;;  Symbolic name  Value              Desc.
+;;  -----------------------------------------------------------------------------
+;;  UNSPEC_PLT       3        symbol to be referenced through the PLT
+;;  UNSPEC_GOT       4        symbol to be rerenced through the GOT
+;;  UNSPEC_GOTOFF    5        Local symbol.To be referenced relative to the
+;;                            GOTBASE.(Referenced as @GOTOFF)
+;;  ----------------------------------------------------------------------------
+
+
+(define_constants
+  [(UNSPEC_SWAP 13) ; swap generation through builtins. candidate for scheduling
+   (UNSPEC_MUL64 14) ; mul64 generation through builtins. candidate for scheduling
+   (UNSPEC_MULU64 15) ; mulu64 generation through builtins. candidate for scheduling
+   (UNSPEC_DIVAW 16) ; divaw generation through builtins. candidate for scheduling
+   (UNSPEC_DIRECT 17)
+   (UNSPEC_PROF 18) ; profile callgraph counter
+   (UNSPEC_LP 19) ; to set LP_END
+   (UNSPEC_CASESI 20)
+   (VUNSPEC_RTIE 17) ; blockage insn for rtie generation
+   (VUNSPEC_SYNC 18) ; blockage insn for sync generation
+   (VUNSPEC_BRK 19) ; blockage insn for brk generation
+   (VUNSPEC_FLAG 20) ; blockage insn for flag generation
+   (VUNSPEC_SLEEP 21) ; blockage insn for sleep generation
+   (VUNSPEC_SWI 22) ; blockage insn for swi generation
+   (VUNSPEC_CORE_READ 23) ; blockage insn for reading a core register
+   (VUNSPEC_CORE_WRITE 24) ; blockage insn for writing to a core register
+   (VUNSPEC_LR 25) ; blockage insn for reading an auxiliary register
+   (VUNSPEC_SR 26) ; blockage insn for writing to an auxiliary register
+   (VUNSPEC_TRAP_S 27) ; blockage insn for trap_s generation
+   (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation
+
+   (R0_REG 0)
+   (R1_REG 1)
+   (R2_REG 2)
+   (R3_REG 3)
+   (R12_REG 12)
+   (SP_REG 28)
+   (ILINK1_REGNUM 29)
+   (ILINK2_REGNUM 30)
+   (RETURN_ADDR_REGNUM 31)
+   (MUL64_OUT_REG 58)
+
+   (VUNSPEC_DEXCL 32) ; blockage insn for reading an auxiliary register without LR support
+   (VUNSPEC_DEXCL_NORES 33) ; blockage insn for reading an auxiliary register without LR support
+   (VUNSPEC_LR_HIGH 34) ; blockage insn for reading an auxiliary register
+
+   (LP_COUNT 60)
+   (CC_REG 61)
+   (LP_START 144)
+   (LP_END 145)
+  ]
+)
+
+(define_attr "is_sfunc" "no,yes" (const_string "no"))
+
+;; Insn type.  Used to default other attribute values.
+; While the attribute is_sfunc is set for any call of a special function,
+; the instruction type sfunc is used only for the special call sequence
+; that loads the (pc-relative) function address into r12 and then calls
+; via r12.
+
+(define_attr "type"
+  "move,load,store,cmove,unary,binary,compare,shift,uncond_branch,jump,branch,
+   brcc,brcc_no_delay_slot,call,sfunc,call_no_delay_slot,
+   multi,umulti, two_cycle_core,lr,sr,divaw,loop_setup,loop_end,return,
+   misc,spfp,dpfp_mult,dpfp_addsub,mulmac_600,cc_arith,
+   simd_vload, simd_vload128, simd_vstore, simd_vmove, simd_vmove_else_zero,
+   simd_vmove_with_acc, simd_varith_1cycle, simd_varith_2cycle,
+   simd_varith_with_acc, simd_vlogic, simd_vlogic_with_acc,
+   simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
+   simd_valign, simd_valign_with_acc, simd_vcontrol,
+   simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma"
+  (cond [(eq_attr "is_sfunc" "yes")
+	 (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
+		(match_test "flag_pic") (const_string "sfunc")]
+	       (const_string "call_no_delay_slot"))]
+	(const_string "binary")))
+
+;; The following three attributes are mixed case so that they can be
+;; used conveniently with the CALL_ATTR macro.
+(define_attr "is_CALL" "no,yes"
+  (cond [(eq_attr "is_sfunc" "yes") (const_string "yes")
+	 (eq_attr "type" "call,call_no_delay_slot") (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "is_SIBCALL" "no,yes" (const_string "no"))
+
+(define_attr "is_NON_SIBCALL" "no,yes"
+  (cond [(eq_attr "is_SIBCALL" "yes") (const_string "no")
+	 (eq_attr "is_CALL" "yes") (const_string "yes")]
+	(const_string "no")))
+
+
+;; Attribute describing the processor
+(define_attr "cpu" "none,A5,ARC600,ARC700"
+  (const (symbol_ref "arc_cpu_attr")))
+
+;; true for compact instructions (those with _s suffix)
+;; "maybe" means compact unless we conditionalize the insn.
+(define_attr "iscompact" "true,maybe,true_limm,maybe_limm,false"
+  (cond [(eq_attr "type" "sfunc")
+	 (const_string "maybe")]
+	(const_string "false")))
+
+
+; Is there an instruction that we are actually putting into the delay slot?
+(define_attr "delay_slot_filled" "no,yes"
+  (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+	 (const_string "no")
+	 (match_test "!TARGET_AT_DBR_CONDEXEC
+		      && JUMP_P (insn)
+		      && INSN_ANNULLED_BRANCH_P (insn)
+		      && !INSN_FROM_TARGET_P (NEXT_INSN (insn))")
+	 (const_string "no")]
+	(const_string "yes")))
+
+; Is a delay slot present for purposes of shorten_branches?
+; We have to take the length of this insn into account for forward branches
+; even if we don't put the insn actually into a delay slot.
+(define_attr "delay_slot_present" "no,yes"
+  (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+	 (const_string "no")]
+	(const_string "yes")))
+
+; We can't use get_attr_length (NEXT_INSN (insn)) because this gives the
+; length of a different insn with the same uid.
+(define_attr "delay_slot_length" ""
+  (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+	 (const_int 0)]
+	(symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn)))
+		     - get_attr_length (insn)")))
+
+
+(define_attr "enabled" "no,yes" (const_string "yes"))
+
+(define_attr "predicable" "no,yes" (const_string "no"))
+;; if 'predicable' were not so brain-dead, we would specify:
+;; (cond [(eq_attr "cond" "!canuse") (const_string "no")
+;;        (eq_attr "iscompact" "maybe") (const_string "no")]
+;;       (const_string "yes"))
+;; and then for everything but calls, we could just set the cond attribute.
+
+;; Condition codes: this one is used by final_prescan_insn to speed up
+;; conditionalizing instructions.  It saves having to scan the rtl to see if
+;; it uses or alters the condition codes.
+
+;; USE: This insn uses the condition codes (eg: a conditional branch).
+;; CANUSE: This insn can use the condition codes (for conditional execution).
+;; SET: All condition codes are set by this insn.
+;; SET_ZN: the Z and N flags are set by this insn.
+;; SET_ZNC: the Z, N, and C flags are set by this insn.
+;; CLOB: The condition codes are set to unknown values by this insn.
+;; NOCOND: This insn can't use and doesn't affect the condition codes.
+
+(define_attr "cond" "use,canuse,canuse_limm,canuse_limm_add,set,set_zn,clob,nocond"
+  (cond
+    [(and (eq_attr "predicable" "yes")
+	  (eq_attr "is_sfunc" "no")
+	  (eq_attr "delay_slot_filled" "no"))
+     (const_string "canuse")
+
+     (eq_attr "type" "call")
+     (cond [(eq_attr "delay_slot_filled" "yes") (const_string "nocond")
+	    (match_test "!flag_pic") (const_string "canuse_limm")]
+	   (const_string "nocond"))
+
+     (eq_attr "iscompact" "maybe,false")
+     (cond [ (and (eq_attr "type" "move")
+		  (match_operand 1 "immediate_operand" ""))
+	     (if_then_else
+		(ior (match_operand 1 "u6_immediate_operand" "")
+		     (match_operand 1 "long_immediate_operand" ""))
+		(const_string "canuse")
+		(const_string "canuse_limm"))
+
+	     (eq_attr "type" "binary")
+	     (cond [(ne (symbol_ref "REGNO (operands[0])")
+			(symbol_ref "REGNO (operands[1])"))
+		    (const_string "nocond")
+		    (match_operand 2 "register_operand" "")
+		    (const_string "canuse")
+		    (match_operand 2 "u6_immediate_operand" "")
+		    (const_string "canuse")
+		    (match_operand 2 "long_immediate_operand" "")
+		    (const_string "canuse")
+		    (match_operand 2 "const_int_operand" "")
+		    (const_string "canuse_limm")]
+		   (const_string "nocond"))
+
+	     (eq_attr "type" "compare")
+	     (const_string "set")
+
+	     (eq_attr "type" "cmove,branch")
+	     (const_string "use")
+
+	     (eq_attr "is_sfunc" "yes")
+	     (cond [(match_test "(TARGET_MEDIUM_CALLS
+				  && !TARGET_LONG_CALLS_SET
+				  && flag_pic)")
+		    (const_string "canuse_limm_add")
+		    (match_test "(TARGET_MEDIUM_CALLS
+				  && !TARGET_LONG_CALLS_SET)")
+		    (const_string "canuse_limm")]
+		   (const_string "canuse"))
+
+	    ]
+
+	    (const_string "nocond"))]
+
+      (cond [(eq_attr "type" "compare")
+	     (const_string "set")
+
+	     (eq_attr "type" "cmove,branch")
+	     (const_string "use")
+
+	    ]
+
+	    (const_string "nocond"))))
+
+/* ??? Having all these patterns gives ifcvt more freedom to generate
+   inefficient code.  It seem to operate on the premise that
+   register-register copies and registers are free.  I see better code
+   with -fno-if-convert now than without.  */
+(define_cond_exec
+  [(match_operator 0 "proper_comparison_operator"
+     [(reg CC_REG) (const_int 0)])]
+  "true"
+  "")
+
+;; Length (in # of bytes, long immediate constants counted too).
+;; ??? There's a nasty interaction between the conditional execution fsm
+;; and insn lengths: insns with shimm values cannot be conditionally executed.
+(define_attr "length" ""
+  (cond
+    [(eq_attr "iscompact" "true,maybe")
+     (cond
+       [(eq_attr "type" "sfunc")
+	(cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
+	       (const_int 12)]
+	      (const_int 10))
+	(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)]
+      (const_int 2))
+
+    (eq_attr "iscompact" "true_limm,maybe_limm")
+    (const_int 6)
+
+    (eq_attr "type" "load")
+    (if_then_else
+       (match_operand 1 "long_immediate_loadstore_operand" "")
+       (const_int 8) (const_int 4))
+
+    (eq_attr "type" "store")
+    (if_then_else
+      (ior (match_operand 0 "long_immediate_loadstore_operand" "")
+	   (match_operand 1 "immediate_operand" ""))
+      (const_int 8) (const_int 4))
+
+    (eq_attr "type" "move,unary")
+    (cond
+      [(match_operand 1 "u6_immediate_operand" "") (const_int 4)
+       (match_operand 1 "register_operand" "") (const_int 4)
+       (match_operand 1 "long_immediate_operand" "") (const_int 8)
+       (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 8)]
+      (const_int 4))
+
+    (and (eq_attr "type" "shift")
+	 (match_operand 1 "immediate_operand"))
+		 (const_int 8)
+    (eq_attr "type" "binary,shift")
+    (if_then_else
+       (ior (match_operand 2 "long_immediate_operand" "")
+	    (and (ne (symbol_ref "REGNO (operands[0])")
+		     (symbol_ref "REGNO (operands[1])"))
+		 (eq (match_operand 2 "u6_immediate_operand" "")
+		     (const_int 0))))
+
+       (const_int 8) (const_int 4))
+
+    (eq_attr "type" "cmove")
+       (if_then_else (match_operand 1 "register_operand" "")
+		     (const_int 4) (const_int 8))
+
+    (eq_attr "type" "call_no_delay_slot") (const_int 8)
+   ]
+
+   (const_int 4))
+)
+
+;; The length here is the length of a single asm.  Unfortunately it might be
+;; 4 or 8 so we must allow for 8.  That's ok though.  How often will users
+;; lament asm's not being put in delay slots?
+;;
+(define_asm_attributes
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")
+   (set_attr "cond" "clob") ])
+
+;; Delay slots.
+;; The first two cond clauses and the default are necessary for correctness;
+;; the remaining cond clause is mainly an optimization, as otherwise nops
+;; would be inserted; however, if we didn't do this optimization, we would
+;; have to be more conservative in our length calculations.
+
+(define_attr "in_delay_slot" "false,true"
+  (cond [(eq_attr "type" "uncond_branch,jump,branch,
+			  call,sfunc,call_no_delay_slot,
+			  brcc, brcc_no_delay_slot,loop_setup,loop_end")
+	 (const_string "false")
+	 (match_test "arc_write_ext_corereg (insn)")
+	 (const_string "false")
+	 (gt (symbol_ref "arc_hazard (prev_active_insn (insn),
+				      next_active_insn (insn))")
+	     (symbol_ref "(arc_hazard (prev_active_insn (insn), insn)
+			   + arc_hazard (insn, next_active_insn (insn)))"))
+	 (const_string "false")
+	 (eq_attr "iscompact" "maybe") (const_string "true")
+	 ]
+
+	 (if_then_else (eq_attr "length" "2,4")
+		       (const_string "true")
+		       (const_string "false"))))
+
+; must not put an insn inside that refers to blink.
+(define_attr "in_call_delay_slot" "false,true"
+  (cond [(eq_attr "in_delay_slot" "false")
+	 (const_string "false")
+	 (match_test "arc_regno_use_in (RETURN_ADDR_REGNUM, PATTERN (insn))")
+	 (const_string "false")]
+	(const_string "true")))
+
+(define_attr "in_sfunc_delay_slot" "false,true"
+  (cond [(eq_attr "in_call_delay_slot" "false")
+	 (const_string "false")
+	 (match_test "arc_regno_use_in (12, PATTERN (insn))")
+	 (const_string "false")]
+	(const_string "true")))
+
+;; Instructions that we can put into a delay slot and conditionalize.
+(define_attr "cond_delay_insn" "no,yes"
+  (cond [(eq_attr "cond" "!canuse") (const_string "no")
+	 (eq_attr "type" "call,branch,uncond_branch,jump,brcc")
+	 (const_string "no")
+	 (eq_attr "length" "2,4") (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "in_ret_delay_slot" "no,yes"
+  (cond [(eq_attr "in_delay_slot" "false")
+	 (const_string "no")
+	 (match_test "regno_clobbered_p
+			(arc_return_address_regs
+			  [arc_compute_function_type (cfun)],
+			 insn, SImode, 1)")
+	 (const_string "no")]
+	(const_string "yes")))
+
+(define_attr "cond_ret_delay_insn" "no,yes"
+  (cond [(eq_attr "in_ret_delay_slot" "no") (const_string "no")
+	 (eq_attr "cond_delay_insn" "no") (const_string "no")]
+	(const_string "yes")))
+
+(define_attr "annul_ret_delay_insn" "no,yes"
+  (cond [(eq_attr "cond_ret_delay_insn" "yes") (const_string "yes")
+	 (match_test "TARGET_AT_DBR_CONDEXEC") (const_string "no")
+	 (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc")
+	   (const_string "yes")]
+   (const_string "no")))
+
+
+;; Delay slot definition for ARCompact ISA
+;; ??? FIXME:
+;; When outputting an annul-true insn elegible for cond-exec
+;; in a cbranch delay slot, unless optimizing for size, we use cond-exec
+;; for ARC600; we could also use this for ARC700 if the branch can't be
+;; unaligned and is at least somewhat likely (add parameter for this).
+
+(define_delay (eq_attr "type" "call")
+  [(eq_attr "in_call_delay_slot" "true")
+   (eq_attr "in_call_delay_slot" "true")
+   (nil)])
+
+(define_delay (and (match_test "!TARGET_AT_DBR_CONDEXEC")
+		   (eq_attr "type" "brcc"))
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "in_delay_slot" "true")
+   (nil)])
+
+(define_delay (and (match_test "TARGET_AT_DBR_CONDEXEC")
+		   (eq_attr "type" "brcc"))
+  [(eq_attr "in_delay_slot" "true")
+   (nil)
+   (nil)])
+
+(define_delay
+  (eq_attr "type" "return")
+  [(eq_attr "in_ret_delay_slot" "yes")
+   (eq_attr "annul_ret_delay_insn" "yes")
+   (eq_attr "cond_ret_delay_insn" "yes")])
+
+;; For ARC600, unexposing the delay sloy incurs a penalty also in the
+;; non-taken case, so the only meaningful way to have an annull-true
+;; filled delay slot is to conditionalize the delay slot insn.
+(define_delay (and (match_test "TARGET_AT_DBR_CONDEXEC")
+		   (eq_attr "type" "branch,uncond_branch,jump")
+		   (match_test "!optimize_size"))
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "cond_delay_insn" "yes")
+   (eq_attr "cond_delay_insn" "yes")])
+
+;; For ARC700, anything goes for annulled-true insns, since there is no
+;; penalty for the unexposed delay slot when the branch is not taken,
+;; however, we must avoid things that have a delay slot themselvese to
+;; avoid confusing gcc.
+(define_delay (and (match_test "!TARGET_AT_DBR_CONDEXEC")
+		   (eq_attr "type" "branch,uncond_branch,jump")
+		   (match_test "!optimize_size"))
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc")
+   (eq_attr "cond_delay_insn" "yes")])
+
+;; -mlongcall -fpic sfuncs use r12 to load the function address
+(define_delay (eq_attr "type" "sfunc")
+  [(eq_attr "in_sfunc_delay_slot" "true")
+   (eq_attr "in_sfunc_delay_slot" "true")
+   (nil)])
+;; ??? need to use a working strategy for canuse_limm:
+;; - either canuse_limm is not eligible for delay slots, and has no
+;;   delay slots, or arc_reorg has to treat them as nocond, or it has to
+;;   somehow modify them to become inelegible for delay slots if a decision
+;;   is made that makes conditional execution required.
+
+(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac"
+  (const
+   (cond [(symbol_ref "arc_tune == TUNE_ARC600")
+	  (const_string "arc600")
+	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
+	  (const_string "arc700_4_2_std")
+	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
+	  (const_string "arc700_4_2_xmac")]
+	 (const_string "none"))))
+
+(define_attr "tune_arc700" "false,true"
+  (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac")
+		(const_string "true")
+		(const_string "false")))
+
+;; Move instructions.
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "move_dest_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "if (prepare_move_operands (operands, QImode)) DONE;")
+
+; In order to allow the ccfsm machinery to do its work, the leading compact
+; alternatives say 'canuse' - there is another alternative that will match
+; when the condition codes are used.
+; Rcq won't match if the condition is actually used; to avoid a spurious match
+; via q, q is inactivated as constraint there.
+; Likewise, the length of an alternative that might be shifted to conditional
+; execution must reflect this, lest out-of-range branches are created.
+; The iscompact attribute allows the epilogue expander to know for which
+; insns it should lengthen the return insn.
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w, w,Rcq,S,!*x,r,m,???m")
+	(match_operand:QI 1 "move_src_operand"   "cL,cP,Rcq#q,cL,I,?Rac,?i,T,Rcq,Usd,m,c,?Rac"))]
+  "register_operand (operands[0], QImode)
+   || register_operand (operands[1], QImode)"
+  "@
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1
+   mov%? %0,%1
+   mov%? %0,%1
+   mov%? %0,%S1
+   ldb%? %0,%1%&
+   stb%? %1,%0%&
+   ldb%? %0,%1%&
+   ldb%U1%V1 %0,%1
+   stb%U0%V0 %1,%0
+   stb%U0%V0 %1,%0"
+  [(set_attr "type" "move,move,move,move,move,move,move,load,store,load,load,store,store")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,true,true,true,false,false,false")
+   (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,no,no,no,no,no,no")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "move_dest_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "if (prepare_move_operands (operands, HImode)) DONE;")
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w,Rcq#q,w,Rcq,S,r,m,???m,VUsc")
+	(match_operand:HI 1 "move_src_operand"   "cL,cP,Rcq#q,cL,I,?Rac,  ?i,?i,T,Rcq,m,c,?Rac,i"))]
+  "register_operand (operands[0], HImode)
+   || register_operand (operands[1], HImode)
+   || (CONSTANT_P (operands[1])
+       /* Don't use a LIMM that we could load with a single insn - we loose
+	  delay-slot filling opportunities.  */
+       && !satisfies_constraint_I (operands[1])
+       && satisfies_constraint_Usc (operands[0]))"
+  "@
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1
+   mov%? %0,%1
+   mov%? %0,%1
+   mov%? %0,%S1%&
+   mov%? %0,%S1
+   ldw%? %0,%1%&
+   stw%? %1,%0%&
+   ldw%U1%V1 %0,%1
+   stw%U0%V0 %1,%0
+   stw%U0%V0 %1,%0
+   stw%U0%V0 %S1,%0"
+  [(set_attr "type" "move,move,move,move,move,move,move,move,load,store,load,store,store,store")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,maybe_limm,false,true,true,false,false,false,false")
+   (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,yes,no,no,no,no,no,no")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "move_dest_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "if (prepare_move_operands (operands, SImode)) DONE;")
+
+; In order to allow the ccfsm machinery to do its work, the leading compact
+; alternatives say 'canuse' - there is another alternative that will match
+; when the condition codes are used.
+; Rcq won't match if the condition is actually used; to avoid a spurious match
+; via q, q is inactivated as constraint there.
+; Likewise, the length of an alternative that might be shifted to conditional
+; execution must reflect this, lest out-of-range branches are created.
+; the iscompact attribute allows the epilogue expander to know for which
+; insns it should lengthen the return insn.
+; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc .
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,  w,???w, ?w,  w,Rcq#q, w,Rcq,  S,Us<,RcqRck,!*x,r,m,???m,VUsc")
+	(match_operand:SI 1 "move_src_operand"  " cL,cP,Rcq#q,cL,I,Crr,?Rac,Cpc,Clb,?Cal,?Cal,T,Rcq,RcqRck,Us>,Usd,m,c,?Rac,C32"))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)
+   || (CONSTANT_P (operands[1])
+       /* Don't use a LIMM that we could load with a single insn - we loose
+	  delay-slot filling opportunities.  */
+       && !satisfies_constraint_I (operands[1])
+       && satisfies_constraint_Usc (operands[0]))"
+  "@
+   mov%? %0,%1%&	;0
+   mov%? %0,%1%&	;1
+   mov%? %0,%1%&	;2
+   mov%? %0,%1		;3
+   mov%? %0,%1		;4
+   ror %0,((%1*2+1) & 0x3f) ;5
+   mov%? %0,%1		;6
+   add %0,%S1		;7
+   * return arc_get_unalign () ? \"add %0,pcl,%1-.+2\" : \"add %0,pcl,%1-.\";
+   mov%? %0,%S1%&	;9
+   mov%? %0,%S1		;10
+   ld%? %0,%1%&		;11
+   st%? %1,%0%&		;12
+   * return arc_short_long (insn, \"push%? %1%&\", \"st%U0 %1,%0%&\");
+   * return arc_short_long (insn, \"pop%? %0%&\",  \"ld%U1 %0,%1%&\");
+   ld%? %0,%1%&		;15
+   ld%U1%V1 %0,%1	;16
+   st%U0%V0 %1,%0       ;17
+   st%U0%V0 %1,%0       ;18
+   st%U0%V0 %S1,%0      ;19"
+  [(set_attr "type" "move,move,move,move,move,two_cycle_core,move,binary,binary,move,move,load,store,store,load,load,load,store,store,store")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,maybe_limm,false,true,true,true,true,true,false,false,false,false")
+   ; Use default length for iscompact to allow for COND_EXEC.  But set length
+   ; of Crr to 4.
+   (set_attr "length" "*,*,*,4,4,4,4,8,8,*,8,*,*,*,*,*,*,*,*,8")
+   (set_attr "predicable" "yes,no,yes,yes,no,no,yes,no,no,yes,yes,no,no,no,no,no,no,no,no,no")])
+
+;; Sometimes generated by the epilogue code.  We don't want to
+;; recognize these addresses in general, because the limm is costly,
+;; and we can't use them for stores.  */
+(define_insn "*movsi_pre_mod"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+	(mem:SI (pre_modify
+		  (reg:SI SP_REG)
+		  (plus:SI (reg:SI SP_REG)
+			   (match_operand 1 "immediate_operand" "Cal")))))]
+  "reload_completed"
+  "ld.a %0,[sp,%1]"
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+;; Store a value to directly to memory.  The location might also be cached.
+;; Since the cached copy can cause a write-back at unpredictable times,
+;; we first write cached, then we write uncached.
+(define_insn "store_direct"
+  [(set (match_operand:SI 0 "move_dest_operand" "=m")
+      (unspec:SI [(match_operand:SI 1 "register_operand" "c")]
+       UNSPEC_DIRECT))]
+  ""
+  "st%U0 %1,%0\;st%U0.di %1,%0"
+  [(set_attr "type" "store")])
+
+(define_insn_and_split "*movsi_set_cc_insn"
+  [(set (match_operand:CC_ZN 2 "cc_set_register" "")
+	(match_operator 3 "zn_compare_operator"
+	  [(match_operand:SI 1 "nonmemory_operand" "cI,cL,Cal") (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(match_dup 1))]
+  ""
+  "mov%?.f %0,%S1"
+  ; splitting to 'tst' allows short insns and combination into brcc.
+  "reload_completed && operands_match_p (operands[0], operands[1])"
+  [(set (match_dup 2) (match_dup 3))]
+  ""
+  [(set_attr "type" "compare")
+   (set_attr "predicable" "no,yes,yes")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "unary_comparison"
+  [(set (match_operand:CC_ZN 0 "cc_set_register" "")
+	(match_operator 3 "zn_compare_operator"
+	  [(match_operator:SI 2 "unary_operator"
+	     [(match_operand:SI 1 "register_operand" "c")])
+	   (const_int 0)]))]
+  ""
+  "%O2.f 0,%1"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")])
+
+
+; this pattern is needed by combiner for cases like if (c=(~b)) { ... }
+(define_insn "*unary_comparison_result_used"
+  [(set (match_operand 2 "cc_register" "")
+	(match_operator 4 "zn_compare_operator"
+	  [(match_operator:SI 3 "unary_operator"
+	     [(match_operand:SI 1 "register_operand" "c")])
+	       (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w")
+	(match_dup 3))]
+  ""
+  "%O3.f %0,%1"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4")])
+
+(define_insn "*tst"
+  [(set
+     (match_operand 0 "cc_register" "")
+     (match_operator 3 "zn_compare_operator"
+       [(and:SI
+	  (match_operand:SI 1 "register_operand"
+	   "%Rcq,Rcq, c, c, c,  c,  c,  c")
+	  (match_operand:SI 2 "nonmemory_operand"
+	   " Rcq,C0p,cI,cL,C1p,Ccp,CnL,Cal"))
+	(const_int 0)]))]
+  "(register_operand (operands[1], SImode)
+    && nonmemory_operand (operands[2], SImode))
+   || (memory_operand (operands[1], SImode)
+       && satisfies_constraint_Cux (operands[2]))"
+  "*
+    switch (which_alternative)
+    {
+    case 0: case 2: case 3: case 7:
+      return \"tst%? %1,%2\";
+    case 1:
+      return \"btst%? %1,%z2\";
+    case 4:
+      return \"bmsk%?.f 0,%1,%Z2%&\";
+    case 5:
+      return \"bclr%?.f 0,%1,%M2%&\";
+    case 6:
+      return \"bic%?.f 0,%1,%n2-1\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false")
+   (set_attr "type" "compare")
+   (set_attr "length" "*,*,4,4,4,4,4,8")
+   (set_attr "predicable" "no,yes,no,yes,no,no,no,yes")
+   (set_attr "cond" "set_zn")])
+
+(define_insn "*commutative_binary_comparison"
+  [(set (match_operand:CC_ZN 0 "cc_set_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  [(match_operator:SI 4 "commutative_operator"
+	     [(match_operand:SI 1 "register_operand" "%c,c,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+   (clobber (match_scratch:SI 3 "=X,1,X"))]
+  ""
+  "%O4.f 0,%1,%2"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4,4,8")])
+
+; for flag setting 'add' instructions like if (a+b) { ...}
+; the combiner needs this pattern
+(define_insn "*addsi_compare"
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN (match_operand:SI 0 "register_operand" "c")
+		       (neg:SI (match_operand:SI 1 "register_operand" "c"))))]
+  ""
+  "add.f 0,%0,%1"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+; for flag setting 'add' instructions like if (a+b < a) { ...}
+; the combiner needs this pattern
+(define_insn "addsi_compare_2"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c,c")
+			       (match_operand:SI 1 "nonmemory_operand" "cL,Cal"))
+		      (match_dup 0)))]
+  ""
+  "add.f 0,%0,%1"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*addsi_compare_3"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c")
+			       (match_operand:SI 1 "register_operand" "c"))
+		      (match_dup 1)))]
+  ""
+  "add.f 0,%0,%1"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+; this pattern is needed by combiner for cases like if (c=a+b) { ... }
+(define_insn "*commutative_binary_comparison_result_used"
+  [(set (match_operand 3 "cc_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  ; We can accept any commutative operator except mult because
+	  ; our 'w' class below could try to use LP_COUNT.
+	  [(match_operator:SI 4 "commutative_operator_sans_mult"
+	     [(match_operand:SI 1 "register_operand" "c,0,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(match_dup 4))]
+  ""
+  "%O4.f %0,%1,%2 ; non-mult commutative"
+  [(set_attr "type" "compare,compare,compare")
+   (set_attr "cond" "set_zn,set_zn,set_zn")
+   (set_attr "length" "4,4,8")])
+
+; a MULT-specific version of this pattern to avoid touching the
+; LP_COUNT register
+(define_insn "*commutative_binary_mult_comparison_result_used"
+  [(set (match_operand 3 "cc_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  [(match_operator:SI 4 "mult_operator"
+	     [(match_operand:SI 1 "register_operand" "c,0,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+	; Make sure to use the W class to not touch LP_COUNT.
+   (set (match_operand:SI 0 "register_operand" "=W,W,W")
+	(match_dup 4))]
+  "TARGET_ARC700"
+  "%O4.f %0,%1,%2 ; mult commutative"
+  [(set_attr "type" "compare,compare,compare")
+   (set_attr "cond" "set_zn,set_zn,set_zn")
+   (set_attr "length" "4,4,8")])
+
+; this pattern is needed by combiner for cases like if (c=a<<b) { ... }
+(define_insn "*noncommutative_binary_comparison_result_used"
+  [(set (match_operand 3 "cc_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  [(match_operator:SI 4 "noncommutative_operator"
+	     [(match_operand:SI 1 "register_operand" "c,0,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	       (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(match_dup 4 ))]
+  "TARGET_BARREL_SHIFTER || GET_CODE (operands[4]) == MINUS"
+  "%O4.f %0,%1,%2"
+  [(set_attr "type" "compare,compare,compare")
+   (set_attr "cond" "set_zn,set_zn,set_zn")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "*noncommutative_binary_comparison"
+  [(set (match_operand:CC_ZN 0 "cc_set_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  [(match_operator:SI 4 "noncommutative_operator"
+	     [(match_operand:SI 1 "register_operand" "c,c,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+   (clobber (match_scratch:SI 3 "=X,1,X"))]
+  "TARGET_BARREL_SHIFTER || GET_CODE (operands[4]) == MINUS"
+  "%O4.f 0,%1,%2"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4,4,8")])
+
+(define_expand "bic_f_zn"
+  [(parallel
+     [(set (reg:CC_ZN CC_REG)
+	   (compare:CC_ZN
+	     (and:SI (match_operand:SI 1 "register_operand" "")
+		     (not:SI (match_operand:SI 2 "nonmemory_operand" "")))
+	   (const_int 0)))
+      (set (match_operand:SI 0 "register_operand" "")
+	   (and:SI (match_dup 1) (not:SI (match_dup 2))))])]
+  "")
+
+(define_insn "*bic_f"
+  [(set (match_operand 3 "cc_register" "=Rcc,Rcc,Rcc")
+	(match_operator 4 "zn_compare_operator"
+	  [(and:SI (match_operand:SI 1 "register_operand" "c,0,c")
+		   (not:SI
+		     (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")))
+	   (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(and:SI (match_dup 1) (not:SI (match_dup 2))))]
+  ""
+  "bic.f %0,%1,%2"
+  [(set_attr "type" "compare,compare,compare")
+   (set_attr "cond" "set_zn,set_zn,set_zn")
+   (set_attr "length" "4,4,8")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "move_dest_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DImode, operands[1]);
+}")
+
+(define_insn_and_split "*movdi_insn"
+  [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+	(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
+  "register_operand (operands[0], DImode)
+   || register_operand (operands[1], DImode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+    case 0 :
+      /* We normally copy the low-numbered register first.  However, if
+	 the first register operand 0 is the same as the second register of
+	 operand 1, we must copy in the opposite order.  */
+      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+	return \"mov%? %R0,%R1\;mov%? %0,%1\";
+      else
+      return \"mov%? %0,%1\;mov%? %R0,%R1\";
+    case 1 :
+      return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
+    case 2 :
+      /* If the low-address word is used in the address, we must load it
+	 last.  Otherwise, load it first.  Note that we cannot have
+	 auto-increment in that case since the address register is known to be
+	 dead.  */
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands [1], 0))
+	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
+      else switch (GET_CODE (XEXP(operands[1], 0)))
+	{
+	case POST_MODIFY: case POST_INC: case POST_DEC:
+	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
+	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
+	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
+	default:
+	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
+	}
+    case 3 :
+      switch (GET_CODE (XEXP(operands[0], 0)))
+	{
+	case POST_MODIFY: case POST_INC: case POST_DEC:
+     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
+	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
+     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
+	default:
+     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
+	}
+    }
+}"
+  "&& reload_completed && optimize"
+  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
+  "arc_split_move (operands);"
+  [(set_attr "type" "move,move,load,store")
+   ;; ??? The ld/st values could be 4 if it's [reg,bignum].
+   (set_attr "length" "8,16,16,16")])
+
+
+;; Floating point move insns.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "if (prepare_move_operands (operands, SFmode)) DONE;")
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "move_dest_operand" "=w,w,r,m")
+	(match_operand:SF 1 "move_src_operand" "c,E,m,c"))]
+  "register_operand (operands[0], SFmode)
+   || register_operand (operands[1], SFmode)"
+  "@
+   mov%? %0,%1
+   mov%? %0,%1 ; %A1
+   ld%U1%V1 %0,%1
+   st%U0%V0 %1,%0"
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,yes,no,no")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "if (prepare_move_operands (operands, DFmode)) DONE;")
+
+(define_insn "*movdf_insn"
+  [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
+	(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
+  "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
+  "#"
+  [(set_attr "type" "move,move,move,move,load,store")
+   (set_attr "predicable" "no,no,yes,yes,no,no")
+   ;; ??? The ld/st values could be 16 if it's [reg,bignum].
+   (set_attr "length" "4,16,8,16,16,16")])
+
+(define_split
+  [(set (match_operand:DF 0 "move_dest_operand" "")
+	(match_operand:DF 1 "move_double_src_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = arc_split_move (operands);")
+
+(define_insn_and_split "*movdf_insn_nolrsr"
+  [(set (match_operand:DF 0 "register_operand"       "=r")
+	(match_operand:DF 1 "arc_double_register_operand" "D"))
+   (use (match_operand:SI 2 "" "N")) ; aka const1_rtx
+   ]
+  "TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR"
+  "#"
+  "&& 1"
+  [
+    ; mov r0, 0
+    (set (match_dup 0) (match_dup 3))
+
+    ; daddh?? r1, r0, r0
+    (parallel [
+    	(set (match_dup 1) (plus:DF (match_dup 1) (match_dup 0)))
+    	(use (const_int 1))
+    	(use (const_int 1))
+	(use (match_dup 0)) ; used to block can_combine_p
+    	(set (match_dup 0) (plus:DF (match_dup 1) (match_dup 0))) ; r1 in op 0
+    ])
+
+    ; We have to do this twice, once to read the value into R0 and
+    ; second time to put back the contents which the first DEXCLx
+    ; will have overwritten
+    ; dexcl2 r0, r1, r0
+    (set (match_dup 4) ; aka r0result
+     	 ; aka DF, r1, r0
+	 (unspec_volatile:SI [(match_dup 1) (match_dup 5) (match_dup 4)] VUNSPEC_DEXCL ))
+    ; Generate the second, which makes sure operand5 and operand4 values
+    ; are put back in the Dx register properly.
+    (unspec_volatile:SI [(match_dup 1) (match_dup 5) (match_dup 4)] VUNSPEC_DEXCL_NORES )
+
+    ; Note: we cannot use a (clobber (match_scratch)) here because
+    ; the combine pass will end up replacing uses of it with 0
+  ]
+  "operands[3] = CONST0_RTX (DFmode);
+   operands[4] = simplify_gen_subreg (SImode, operands[0], DFmode, 0);
+   operands[5] = simplify_gen_subreg (SImode, operands[0], DFmode, 4);"
+  [(set_attr "type" "move")])
+
+;; Load/Store with update instructions.
+;;
+;; Some of these we can get by using pre-decrement or pre-increment, but the
+;; hardware can also do cases where the increment is not the size of the
+;; object.
+;;
+;; In all these cases, we use operands 0 and 1 for the register being
+;; incremented because those are the operands that local-alloc will
+;; tie and these are the pair most likely to be tieable (and the ones
+;; that will benefit the most).
+;;
+;; We use match_operator here because we need to know whether the memory
+;; object is volatile or not.
+
+
+;; Note: loadqi_update has no 16-bit variant
+(define_insn "*loadqi_update"
+  [(set (match_operand:QI 3 "dest_reg_operand" "=r,r")
+	(match_operator:QI 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldb.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*load_zeroextendqisi_update"
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
+	(zero_extend:SI (match_operator:QI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldb.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*load_signextendqisi_update"
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
+	(sign_extend:SI (match_operator:QI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldb.x.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*storeqi_update"
+  [(set (match_operator:QI 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:QI 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "stb.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; ??? pattern may have to be re-written
+;; Note: no 16-bit variant for this pattern
+(define_insn "*loadhi_update"
+  [(set (match_operand:HI 3 "dest_reg_operand" "=r,r")
+	(match_operator:HI 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldw.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*load_zeroextendhisi_update"
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
+	(zero_extend:SI (match_operator:HI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldw.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+;; Note: no 16-bit variant for this instruction
+(define_insn "*load_signextendhisi_update"
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
+	(sign_extend:SI (match_operator:HI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldw.x.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*storehi_update"
+  [(set (match_operator:HI 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:HI 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "stw.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; No 16-bit variant for this instruction pattern
+(define_insn "*loadsi_update"
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
+	(match_operator:SI 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ld.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*storesi_update"
+  [(set (match_operator:SI 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:SI 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "st.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "*loadsf_update"
+  [(set (match_operand:SF 3 "dest_reg_operand" "=r,r")
+	(match_operator:SF 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ld.a%V4 %3,[%0,%S2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "4,8")])
+
+(define_insn "*storesf_update"
+  [(set (match_operator:SF 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:SF 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "st.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; Conditional move instructions.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+		         (match_operand:SI 2 "nonmemory_operand" "")
+ 		         (match_operand:SI 3 "register_operand" "")))]
+  ""
+  "operands[1] = gen_compare_reg (operands[1], VOIDmode);")
+
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "dest_reg_operand" "")
+	(if_then_else:DI(match_operand 1 "comparison_operator" "")
+		        (match_operand:DI 2 "nonmemory_operand" "")
+		        (match_operand:DI 3 "register_operand" "")))]
+  ""
+  "operands[1] = gen_compare_reg (operands[1], VOIDmode);")
+
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "dest_reg_operand" "")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+		      (match_operand:SF 2 "nonmemory_operand" "")
+		      (match_operand:SF 3 "register_operand" "")))]
+  ""
+  "operands[1] = gen_compare_reg (operands[1], VOIDmode);")
+
+(define_expand "movdfcc"
+  [(set (match_operand:DF 0 "dest_reg_operand" "")
+	(if_then_else:DF (match_operand 1 "comparison_operator" "")
+		      (match_operand:DF 2 "nonmemory_operand" "")
+		      (match_operand:DF 3 "register_operand" "")))]
+  ""
+  "operands[1] = gen_compare_reg (operands[1], VOIDmode);")
+
+(define_insn "*movsicc_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+  	(if_then_else:SI (match_operator 3 "proper_comparison_operator"
+  		       [(match_operand 4 "cc_register" "") (const_int 0)])
+  		      (match_operand:SI 1 "nonmemory_operand" "cL,Cal")
+  		      (match_operand:SI 2 "register_operand" "0,0")))]
+  ""
+{
+  if (rtx_equal_p (operands[1], const0_rtx) && GET_CODE (operands[3]) == NE
+      && satisfies_constraint_Rcq (operands[0]))
+    return "sub%?.ne %0,%0,%0";
+  /* ??? might be good for speed on ARC600 too, *if* properly scheduled.  */
+  if ((TARGET_ARC700 || optimize_size)
+      && rtx_equal_p (operands[1], constm1_rtx)
+      && GET_CODE (operands[3]) == LTU)
+    return "sbc.cs %0,%0,%0";
+  return "mov.%d3 %0,%S1";
+}
+  [(set_attr "type" "cmove,cmove")
+   (set_attr "length" "4,8")])
+
+; Try to generate more short moves, and/or less limms, by substituting a
+; conditional move with a conditional sub.
+(define_peephole2
+  [(set (match_operand:SI 0 "compact_register_operand")
+	(match_operand:SI 1 "const_int_operand"))
+   (set (match_dup 0)
+  	(if_then_else:SI (match_operator 3 "proper_comparison_operator"
+			   [(match_operand 4 "cc_register" "") (const_int 0)])
+			    (match_operand:SI 2 "const_int_operand" "")
+  		      (match_dup 0)))]
+  "!satisfies_constraint_P (operands[1])
+   && satisfies_constraint_P (operands[2])
+   && UNSIGNED_INT6 (INTVAL (operands[2]) - INTVAL (operands[1]))"
+  [(set (match_dup 0) (match_dup 2))
+   (cond_exec
+     (match_dup 3)
+     (set (match_dup 0)
+	  (plus:SI (match_dup 0) (match_dup 1))))]
+  "operands[3] = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[3]),
+						    GET_MODE (operands[4])),
+				 VOIDmode, operands[4], const0_rtx);
+   operands[1] = GEN_INT (INTVAL (operands[1]) - INTVAL (operands[2]));")
+
+(define_insn "*movdicc_insn"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w")
+	(if_then_else:DI (match_operator 3 "proper_comparison_operator"
+			[(match_operand 4 "cc_register" "") (const_int 0)])
+		      (match_operand:DI 1 "nonmemory_operand" "c,i")
+		      (match_operand:DI 2 "register_operand" "0,0")))]
+   ""
+   "*
+{
+   switch (which_alternative)
+     {
+     default:
+     case 0 :
+       /* We normally copy the low-numbered register first.  However, if
+ 	 the first register operand 0 is the same as the second register of
+ 	 operand 1, we must copy in the opposite order.  */
+       if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+ 	return \"mov.%d3 %R0,%R1\;mov.%d3 %0,%1\";
+       else
+ 	return \"mov.%d3 %0,%1\;mov.%d3 %R0,%R1\";
+     case 1 :
+	return \"mov.%d3 %L0,%L1\;mov.%d3 %H0,%H1\";
+
+
+     }
+}"
+  [(set_attr "type" "cmove,cmove")
+   (set_attr "length" "8,16")])
+
+
+(define_insn "*movsfcc_insn"
+  [(set (match_operand:SF 0 "dest_reg_operand" "=w,w")
+	(if_then_else:SF (match_operator 3 "proper_comparison_operator"
+		       [(match_operand 4 "cc_register" "") (const_int 0)])
+		      (match_operand:SF 1 "nonmemory_operand" "c,E")
+		      (match_operand:SF 2 "register_operand" "0,0")))]
+  ""
+  "@
+   mov.%d3 %0,%1
+   mov.%d3 %0,%1 ; %A1"
+  [(set_attr "type" "cmove,cmove")])
+
+(define_insn "*movdfcc_insn"
+  [(set (match_operand:DF 0 "dest_reg_operand" "=w,w")
+	(if_then_else:DF (match_operator 1 "proper_comparison_operator"
+		 [(match_operand 4 "cc_register" "") (const_int 0)])
+		      (match_operand:DF 2 "nonmemory_operand" "c,E")
+		      (match_operand:DF 3 "register_operand" "0,0")))]
+  ""
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+    case 0 :
+      /* We normally copy the low-numbered register first.  However, if
+	 the first register operand 0 is the same as the second register of
+	 operand 1, we must copy in the opposite order.  */
+      if (REGNO (operands[0]) == REGNO (operands[2]) + 1)
+	return \"mov.%d1 %R0,%R2\;mov.%d1 %0,%2\";
+      else
+	return \"mov.%d1 %0,%2\;mov.%d1 %R0,%R2\";
+    case 1 :
+	      return \"mov.%d1 %L0,%L2\;mov.%d1 %H0,%H2; %A2 \";
+
+    }
+}"
+  [(set_attr "type" "cmove,cmove")
+   (set_attr "length" "8,16")])
+
+
+(define_insn "*zero_extendqihi2_i"
+  [(set (match_operand:HI 0 "dest_reg_operand" "=Rcq,Rcq#q,Rcw,w,r")
+	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "0,Rcq#q,0,c,m")))]
+  ""
+  "@
+   extb%? %0,%1%&
+   extb%? %0,%1%&
+   bmsk%? %0,%1,7
+   extb %0,%1
+   ldb%U1 %0,%1"
+  [(set_attr "type" "unary,unary,unary,unary,load")
+   (set_attr "iscompact" "maybe,true,false,false,false")
+   (set_attr "predicable" "no,no,yes,no,no")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "dest_reg_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
+  ""
+  "if (prepare_extend_operands (operands, ZERO_EXTEND, HImode)) DONE;"
+)
+
+(define_insn "*zero_extendqisi2_ac"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcq#q,Rcw,w,qRcq,!*x,r")
+	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "0,Rcq#q,0,c,T,Usd,m")))]
+  ""
+  "@
+   extb%? %0,%1%&
+   extb%? %0,%1%&
+   bmsk%? %0,%1,7
+   extb %0,%1
+   ldb%? %0,%1%&
+   ldb%? %0,%1%&
+   ldb%U1 %0,%1"
+  [(set_attr "type" "unary,unary,unary,unary,load,load,load")
+   (set_attr "iscompact" "maybe,true,false,false,true,true,false")
+   (set_attr "predicable" "no,no,yes,no,no,no,no")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
+  ""
+  "if (prepare_extend_operands (operands, ZERO_EXTEND, SImode)) DONE;"
+)
+
+(define_insn "*zero_extendhisi2_i"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,q,Rcw,w,!x,Rcqq,r")
+	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "0,q,0,c,Usd,Usd,m")))]
+  ""
+  "@
+   extw%? %0,%1%&
+   extw%? %0,%1%&
+   bmsk%? %0,%1,15
+   extw %0,%1
+   ldw%? %0,%1%&
+   ldw%U1 %0,%1
+   ldw%U1%V1 %0,%1"
+  [(set_attr "type" "unary,unary,unary,unary,load,load,load")
+   (set_attr "iscompact" "maybe,true,false,false,true,false,false")
+   (set_attr "predicable" "no,no,yes,no,no,no,no")])
+
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "")))]
+  ""
+  "if (prepare_extend_operands (operands, ZERO_EXTEND, SImode)) DONE;"
+)
+
+;; Sign extension instructions.
+
+(define_insn "*extendqihi2_i"
+  [(set (match_operand:HI 0 "dest_reg_operand" "=Rcqq,r,r")
+	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "Rcqq,r,m")))]
+  ""
+  "@
+   sexb%? %0,%1%&
+   sexb %0,%1
+   ldb.x%U1 %0,%1"
+  [(set_attr "type" "unary,unary,load")
+   (set_attr "iscompact" "true,false,false")])
+
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "dest_reg_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
+  ""
+  "if (prepare_extend_operands (operands, SIGN_EXTEND, HImode)) DONE;"
+)
+
+(define_insn "*extendqisi2_ac"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w,r")
+	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))]
+  ""
+  "@
+   sexb%? %0,%1%&
+   sexb %0,%1
+   ldb.x%U1 %0,%1"
+  [(set_attr "type" "unary,unary,load")
+   (set_attr "iscompact" "true,false,false")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
+  ""
+  "if (prepare_extend_operands (operands, SIGN_EXTEND, SImode)) DONE;"
+)
+
+(define_insn "*extendhisi2_i"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w,r")
+	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))]
+  ""
+  "@
+   sexw%? %0,%1%&
+   sexw %0,%1
+   ldw.x%U1%V1 %0,%1"
+  [(set_attr "type" "unary,unary,load")
+   (set_attr "iscompact" "true,false,false")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "")))]
+  ""
+  "if (prepare_extend_operands (operands, SIGN_EXTEND, SImode)) DONE;"
+)
+
+;; Unary arithmetic insns
+
+;; We allow constant operands to enable late constant propagation, but it is
+;; not worth while to have more than one dedicated alternative to output them -
+;; if we are really worried about getting these the maximum benefit of all
+;; the available alternatives, we should add an extra pass to fold such
+;; operations to movsi.
+
+;; Absolute instructions
+
+(define_insn "*abssi2_mixed"
+  [(set (match_operand:SI 0 "compact_register_operand" "=q")
+	(abs:SI (match_operand:SI 1 "compact_register_operand" "q")))]
+  "TARGET_MIXED_CODE"
+  "abs%? %0,%1%&"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "iscompact" "true")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w")
+	(abs:SI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,Cal")))]
+  ""
+  "abs%? %0,%1%&"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "length" "*,4,8")
+   (set_attr "iscompact" "true,false,false")])
+
+;; Maximum and minimum insns
+
+(define_insn "smaxsi3"
+   [(set (match_operand:SI 0 "dest_reg_operand"         "=Rcw, w,  w")
+	 (smax:SI (match_operand:SI 1 "register_operand"  "%0, c,  c")
+		  (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))]
+  ""
+  "max%? %0,%1,%2"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")]
+)
+
+(define_insn "sminsi3"
+   [(set (match_operand:SI 0 "dest_reg_operand"         "=Rcw, w,  w")
+	 (smin:SI (match_operand:SI 1 "register_operand"  "%0, c,  c")
+		  (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))]
+  ""
+  "min%? %0,%1,%2"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")]
+)
+
+;; Arithmetic instructions.
+
+; We say an insn can be conditionalized if this doesn't introduce a long
+; immediate.  We set the type such that we still have good scheduling if the
+; insn is conditionalized.
+; ??? It would make sense to allow introduction of long immediates, but
+;     we'd need to communicate to the ccfsm machinery the extra cost.
+; The alternatives in the constraints still serve three purposes:
+; - estimate insn size assuming conditional execution
+; - guide reload to re-order the second and third operand to get a better fit.
+; - give tentative insn type to guide scheduling
+;   N.B. "%" for commutativity doesn't help when there is another matching
+;   (but longer) alternative.
+; We avoid letting this pattern use LP_COUNT as a register by specifying
+;  register class 'W' instead of 'w'.
+(define_insn_and_split "*addsi3_mixed"
+  ;;                                                      0       1   2   3   4   5   6    7     8    9     a   b c   d    e   f  10
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcq#q,Rcq,Rcw,Rcw,Rcq,Rcb,Rcq, Rcw, Rcqq,Rcqq,    W,  W,W,  W,Rcqq,Rcw,  W")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,      c,  0,  c,  0,  0,Rcb,   0, Rcqq,   0,    c,  c,0,  0,   0,  0,  c")
+		 (match_operand:SI 2 "nonmemory_operand" "cL,     0, cL,  0,CL2,Csp,CM4,cCca,RcqqK,  cO,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))]
+  ""
+{
+  arc_output_addsi (operands, arc_ccfsm_cond_exec_p (), true);
+  return "";
+}
+  "&& reload_completed && get_attr_length (insn) == 8
+   && satisfies_constraint_I (operands[2])
+   && GET_CODE (PATTERN (insn)) != COND_EXEC"
+  [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))]
+  "split_addsi (operands);"
+  [(set_attr "type" "*,*,*,*,two_cycle_core,two_cycle_core,*,two_cycle_core,*,*,*,two_cycle_core,*,two_cycle_core,*,*,*")
+   (set (attr "iscompact")
+	(cond [(match_test "~arc_output_addsi (operands, false, false) & 2")
+	       (const_string "false")
+	       (match_operand 2 "long_immediate_operand" "")
+	       (const_string "maybe_limm")]
+	      (const_string "maybe")))
+   (set_attr "length" "*,*,4,4,*,*,*,4,*,*,4,4,4,4,*,8,8")
+   (set_attr "predicable" "no,no,yes,yes,no,no,no,yes,no,no,no,no,no,no,no,yes,no")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,nocond,canuse,nocond,nocond,nocond,nocond,canuse_limm,canuse_limm,canuse,canuse,nocond")
+])
+
+;; ARC700/ARC600 multiply
+;; SI <- SI * SI
+
+(define_expand "mulsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand"            "")
+	(mult:SI (match_operand:SI 1 "register_operand"  "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (TARGET_ARC700 && !TARGET_NOMPY_SET)
+    {
+      if (!register_operand (operands[0], SImode))
+	{
+	  rtx result = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_mulsi3 (result, operands[1], operands[2]));
+	  emit_move_insn (operands[0], result);
+	  DONE;
+	}
+    }
+  else if (TARGET_MUL64_SET)
+    {
+      emit_insn (gen_mulsi_600 (operands[1], operands[2],
+				gen_mlo (), gen_mhi ()));
+      emit_move_insn (operands[0], gen_mlo ());
+      DONE;
+    }
+  else if (TARGET_MULMAC_32BY16_SET)
+    {
+      if (immediate_operand (operands[2], SImode)
+	  && INTVAL (operands[2]) >= 0
+	  && INTVAL (operands[2]) <= 65535)
+	{
+	  emit_insn (gen_umul_600 (operands[1], operands[2],
+				     gen_acc2 (), gen_acc1 ()));
+	  emit_move_insn (operands[0], gen_acc2 ());
+	  DONE;
+	}
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_umul_600 (operands[1], operands[2],
+			       gen_acc2 (), gen_acc1 ()));
+      emit_insn (gen_mac_600 (operands[1], operands[2],
+			       gen_acc2 (), gen_acc1 ()));
+      emit_move_insn (operands[0], gen_acc2 ());
+      DONE;
+    }
+  else
+    {
+      emit_move_insn (gen_rtx_REG (SImode, R0_REG), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, R1_REG), operands[2]);
+      emit_insn (gen_mulsi3_600_lib ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, R0_REG));
+      DONE;
+    }
+})
+
+; mululw conditional execution without a LIMM clobbers an input register;
+; we'd need a different pattern to describe this.
+; To make the conditional execution valid for the LIMM alternative, we
+; have to emit the LIMM before the register operand.
+(define_insn "umul_600"
+  [(set (match_operand:SI 2 "acc2_operand" "")
+	(mult:SI (match_operand:SI 0 "register_operand"  "c,c,c")
+		 (zero_extract:SI (match_operand:SI 1 "nonmemory_operand"
+							 "c,L,Cal")
+				  (const_int 16)
+				  (const_int 0))))
+   (clobber (match_operand:SI 3 "acc1_operand" ""))]
+  "TARGET_MULMAC_32BY16_SET"
+  "@mululw 0, %0, %1
+    mululw 0, %0, %1
+    mululw%? 0, %1, %0"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600, mulmac_600, mulmac_600")
+   (set_attr "predicable" "no, no, yes")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+(define_insn "mac_600"
+  [(set (match_operand:SI 2 "acc2_operand" "")
+	(plus:SI
+	  (mult:SI (match_operand:SI 0 "register_operand" "c,c,c")
+		   (ashift:SI
+		     (zero_extract:SI (match_operand:SI 1 "nonmemory_operand" "c,L,Cal")
+				      (const_int 16)
+				      (const_int 16))
+		     (const_int 16)))
+	  (match_dup 2)))
+   (clobber (match_operand:SI 3 "acc1_operand" ""))]
+  "TARGET_MULMAC_32BY16_SET"
+  "machlw%? 0, %0, %1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600, mulmac_600, mulmac_600")
+   (set_attr "predicable" "no, no, yes")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+(define_insn "mulsi_600"
+  [(set (match_operand:SI 2 "mlo_operand" "")
+	(mult:SI (match_operand:SI 0 "register_operand"  "Rcq#q,c,c,%c")
+		 (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,Cal")))
+   (clobber (match_operand:SI 3 "mhi_operand" ""))]
+  "TARGET_MUL64_SET"
+; The assembler mis-assembles mul64 / mulu64 with "I" constraint constants,
+; using a machine code pattern that only allows "L" constraint constants.
+;  "mul64%? \t0, %0, %1%&"
+{
+  if (satisfies_constraint_I (operands[1])
+      && !satisfies_constraint_L (operands[1]))
+    {
+      /* MUL64 <0,>b,s12 00101bbb10000100 0BBBssssssSSSSSS  */
+      int n = true_regnum (operands[0]);
+      int i = INTVAL (operands[1]);
+      asm_fprintf (asm_out_file, "\t.short %d`", 0x2884 + ((n & 7) << 8));
+      asm_fprintf (asm_out_file, "\t.short %d`",
+		   ((i & 0x3f) << 6) + ((i >> 6) & 0x3f) + ((n & 070) << 9));
+      return "; mul64%? \t0, %0, %1%&";
+    }
+  return "mul64%? \t0, %0, %1%&";
+}
+  [(set_attr "length" "*,4,4,8")
+   (set_attr "iscompact" "maybe,false,false,false")
+   (set_attr "type" "multi,multi,multi,multi")
+   (set_attr "predicable" "yes,yes,no,yes")
+   (set_attr "cond" "canuse,canuse,canuse_limm,canuse")])
+
+; If we compile without an mul option enabled, but link with libraries
+; for a mul option, we'll see clobbers of multiplier output registers.
+; There is also an implementation using norm that clobbers the loop registers.
+(define_insn "mulsi3_600_lib"
+  [(set (reg:SI R0_REG)
+	(mult:SI (reg:SI R0_REG) (reg:SI R1_REG)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R3_REG))
+   (clobber (reg:DI MUL64_OUT_REG))
+   (clobber (reg:SI LP_COUNT))
+   (clobber (reg:SI LP_START))
+   (clobber (reg:SI LP_END))
+   (clobber (reg:CC CC_REG))]
+  "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
+   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && SFUNC_CHECK_PREDICABLE"
+  "*return arc_output_libcall (\"__mulsi3\");"
+  [(set_attr "is_sfunc" "yes")
+   (set_attr "predicable" "yes")])
+
+(define_insn "mulsidi_600"
+  [(set (reg:DI MUL64_OUT_REG)
+	(mult:DI (sign_extend:DI
+		   (match_operand:SI 0 "register_operand"  "Rcq#q,c,c,%c"))
+		 (sign_extend:DI
+; assembler issue for "I", see mulsi_600
+;		   (match_operand:SI 1 "register_operand" "Rcq#q,cL,I,Cal"))))]
+		   (match_operand:SI 1 "register_operand" "Rcq#q,cL,L,C32"))))]
+  "TARGET_MUL64_SET"
+  "mul64%? \t0, %0, %1%&"
+  [(set_attr "length" "*,4,4,8")
+   (set_attr "iscompact" "maybe,false,false,false")
+   (set_attr "type" "multi,multi,multi,multi")
+   (set_attr "predicable" "yes,yes,no,yes")
+   (set_attr "cond" "canuse,canuse,canuse_limm,canuse")])
+
+(define_insn "umulsidi_600"
+  [(set (reg:DI MUL64_OUT_REG)
+	(mult:DI (zero_extend:DI
+		   (match_operand:SI 0 "register_operand"  "c,c,%c"))
+		 (sign_extend:DI
+; assembler issue for "I", see mulsi_600
+;		   (match_operand:SI 1 "register_operand" "cL,I,Cal"))))]
+		   (match_operand:SI 1 "register_operand" "cL,L,C32"))))]
+  "TARGET_MUL64_SET"
+  "mulu64%? \t0, %0, %1%&"
+  [(set_attr "length" "4,4,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "umulti")
+   (set_attr "predicable" "yes,no,yes")
+   (set_attr "cond" "canuse,canuse_limm,canuse")])
+
+; ARC700 mpy* instructions: This is a multi-cycle extension, and thus 'w'
+; may not be used as destination constraint.
+
+; The result of mpy and mpyu is the same except for flag setting (if enabled),
+; but mpyu is faster for the standard multiplier.
+; Note: we must make sure LP_COUNT is not one of the destination
+; registers, since it cannot be the destination of a multi-cycle insn
+; like MPY or MPYU.
+(define_insn "mulsi3_700"
+ [(set (match_operand:SI 0 "mpy_dest_reg_operand"        "=Rcr,r,r,Rcr,r")
+	(mult:SI (match_operand:SI 1 "register_operand"  " 0,c,0,0,c")
+		 (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))]
+"TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyu%? %0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+   (set_attr "type" "umulti")
+   (set_attr "predicable" "yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(mult:DI (sign_extend:DI(match_operand:SI 1 "register_operand" ""))
+		 (sign_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))]
+  "(TARGET_ARC700 && !TARGET_NOMPY_SET)
+   || TARGET_MUL64_SET
+   || TARGET_MULMAC_32BY16_SET"
+"
+{
+  if (TARGET_ARC700 && !TARGET_NOMPY_SET)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      if (!register_operand (operands[0], DImode))
+	{
+	  rtx result = gen_reg_rtx (DImode);
+
+	  operands[2] = force_reg (SImode, operands[2]);
+	  emit_insn (gen_mulsidi3 (result, operands[1], operands[2]));
+	  emit_move_insn (operands[0], result);
+	  DONE;
+	}
+    }
+  else if (TARGET_MUL64_SET)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_mulsidi_600 (operands[1], operands[2]));
+      emit_move_insn (operands[0], gen_rtx_REG (DImode, MUL64_OUT_REG));
+      DONE;
+    }
+  else if (TARGET_MULMAC_32BY16_SET)
+    {
+      rtx result_hi = gen_highpart(SImode, operands[0]);
+      rtx result_low = gen_lowpart(SImode, operands[0]);
+
+      emit_insn (gen_mul64_600 (operands[1], operands[2]));
+      emit_insn (gen_mac64_600 (result_hi, operands[1], operands[2]));
+      emit_move_insn (result_low, gen_acc2 ());
+      DONE;
+    }
+}")
+
+(define_insn "mul64_600"
+  [(set (reg:DI 56)
+	(mult:DI (sign_extend:DI (match_operand:SI 0 "register_operand"
+				  "c,c,c"))
+		 (zero_extract:DI (match_operand:SI 1 "nonmemory_operand"
+				  "c,L,Cal")
+				  (const_int 16)
+				  (const_int 0))))
+  ]
+  "TARGET_MULMAC_32BY16_SET"
+  "mullw%? 0, %0, %1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "predicable" "no,no,yes")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+;; ??? check if this is canonical rtl
+(define_insn "mac64_600"
+  [(set (reg:DI 56)
+	(plus:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "c,c,c"))
+		   (ashift:DI
+		     (sign_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal")
+				      (const_int 16) (const_int 16))
+		     (const_int 16)))
+	  (reg:DI 56)))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(zero_extract:SI
+	  (plus:DI
+	    (mult:DI (sign_extend:DI (match_dup 1))
+		     (ashift:DI
+		       (sign_extract:DI (match_dup 2)
+					(const_int 16) (const_int 16))
+			  (const_int 16)))
+	    (reg:DI 56))
+	  (const_int 32) (const_int 32)))]
+  "TARGET_MULMAC_32BY16_SET"
+  "machlw%? %0, %1, %2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "predicable" "no,no,yes")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+;; DI <- DI(signed SI) * DI(signed SI)
+(define_insn_and_split "mulsidi3_700"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%c"))
+		 (sign_extend:DI (match_operand:SI 2 "extend_operand" "cL"))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
+  int lo = TARGET_BIG_ENDIAN ? UNITS_PER_WORD : 0;
+  rtx l0 = simplify_gen_subreg (word_mode, operands[0], DImode, lo);
+  rtx h0 = simplify_gen_subreg (word_mode, operands[0], DImode, hi);
+  emit_insn (gen_mulsi3_highpart (h0, operands[1], operands[2]));
+  emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                  "=Rcr,r,Rcr,r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c,  0,c"))
+	   (sign_extend:DI (match_operand:SI 2 "extend_operand"    "c,c,  i,i")))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyh%? %0,%1,%2"
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse,nocond")])
+
+; Note that mpyhu has the same latency as mpy / mpyh,
+; thus we use the type multi.
+(define_insn "*umulsi3_highpart_i"
+  [(set (match_operand:SI 0 "register_operand"                  "=Rcr,r,Rcr,r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c,  0,c"))
+	   (zero_extend:DI (match_operand:SI 2 "extend_operand"    "c,c,  i,i")))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyhu%? %0,%1,%2"
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse,nocond")])
+
+; Implementations include additional labels for umulsidi3, so we got all
+; the same clobbers - plus one for the result low part.  */
+(define_insn "umulsi3_highpart_600_lib_le"
+  [(set (reg:SI R1_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (reg:SI R0_REG))
+		   (zero_extend:DI (reg:SI R1_REG)))
+	  (const_int 32))))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:DI R2_REG))
+   (clobber (reg:SI R12_REG))
+   (clobber (reg:DI MUL64_OUT_REG))
+   (clobber (reg:CC CC_REG))]
+  "!TARGET_BIG_ENDIAN
+   && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
+   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && SFUNC_CHECK_PREDICABLE"
+  "*return arc_output_libcall (\"__umulsi3_highpart\");"
+  [(set_attr "is_sfunc" "yes")
+   (set_attr "predicable" "yes")])
+
+(define_insn "umulsi3_highpart_600_lib_be"
+  [(set (reg:SI R0_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (reg:SI R0_REG))
+		   (zero_extend:DI (reg:SI R1_REG)))
+	  (const_int 32))))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:DI R2_REG))
+   (clobber (reg:SI R12_REG))
+   (clobber (reg:DI MUL64_OUT_REG))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_BIG_ENDIAN
+   && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
+   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && SFUNC_CHECK_PREDICABLE"
+  "*return arc_output_libcall (\"__umulsi3_highpart\");"
+  [(set_attr "is_sfunc" "yes")
+   (set_attr "predicable" "yes")])
+
+;; (zero_extend:DI (const_int)) leads to internal errors in combine, so we
+;; need a separate pattern for immediates
+;; ??? This is fine for combine, but not for reload.
+(define_insn "umulsi3_highpart_int"
+  [(set (match_operand:SI 0 "register_operand"            "=Rcr, r, r,Rcr,  r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand"  " 0, c, 0,  0,  c"))
+	   (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal"))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyhu%? %0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")])
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "general_operand"  "")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+	   (zero_extend:DI (match_operand:SI 2 "nonmemory_operand" "")))
+	  (const_int 32))))]
+  "TARGET_ARC700 || (!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET)"
+  "
+{
+  rtx target = operands[0];
+
+  if (!TARGET_ARC700 || TARGET_NOMPY_SET)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, 0), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, 1), operands[2]);
+      if (TARGET_BIG_ENDIAN)
+	emit_insn (gen_umulsi3_highpart_600_lib_be ());
+      else
+	emit_insn (gen_umulsi3_highpart_600_lib_le ());
+      emit_move_insn (target, gen_rtx_REG (SImode, 0));
+      DONE;
+    }
+
+  if (!register_operand (target, SImode))
+    target = gen_reg_rtx (SImode);
+
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+    operands[2] = simplify_const_unary_operation (ZERO_EXTEND, DImode,
+						  operands[2], SImode);
+  else if (!immediate_operand (operands[2], SImode))
+    operands[2] = gen_rtx_ZERO_EXTEND (DImode, operands[2]);
+  emit_insn (gen_umulsi3_highpart_int (target, operands[1], operands[2]));
+  if (target != operands[0])
+    emit_move_insn (operands[0], target);
+  DONE;
+}")
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(mult:DI (zero_extend:DI(match_operand:SI 1 "register_operand" ""))
+		 (zero_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))]
+  ""
+{
+  if (TARGET_ARC700 && !TARGET_NOMPY_SET)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      if (!register_operand (operands[0], DImode))
+	{
+	  rtx result = gen_reg_rtx (DImode);
+
+	  emit_insn (gen_umulsidi3 (result, operands[1], operands[2]));
+	  emit_move_insn (operands[0], result);
+	  DONE;
+	}
+    }
+  else if (TARGET_MUL64_SET)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_umulsidi_600 (operands[1], operands[2]));
+      emit_move_insn (operands[0], gen_rtx_REG (DImode, MUL64_OUT_REG));
+      DONE;
+    }
+  else if (TARGET_MULMAC_32BY16_SET)
+    {
+      rtx result_hi = gen_reg_rtx (SImode);
+      rtx result_low = gen_reg_rtx (SImode);
+
+      result_hi = gen_highpart(SImode , operands[0]);
+      result_low = gen_lowpart(SImode , operands[0]);
+
+      emit_insn (gen_umul64_600 (operands[1], operands[2]));
+      emit_insn (gen_umac64_600 (result_hi, operands[1], operands[2]));
+      emit_move_insn (result_low, gen_acc2 ());
+      DONE;
+    }
+  else
+    {
+      emit_move_insn (gen_rtx_REG (SImode, R0_REG), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, R1_REG), operands[2]);
+      emit_insn (gen_umulsidi3_600_lib ());
+      emit_move_insn (operands[0], gen_rtx_REG (DImode, R0_REG));
+      DONE;
+    }
+})
+
+(define_insn "umul64_600"
+  [(set (reg:DI 56)
+	(mult:DI (zero_extend:DI (match_operand:SI 0 "register_operand"
+				  "c,c,c"))
+		 (zero_extract:DI (match_operand:SI 1 "nonmemory_operand"
+				  "c,L,Cal")
+				  (const_int 16)
+				  (const_int 0))))
+  ]
+  "TARGET_MULMAC_32BY16_SET"
+  "@mululw 0, %0, %1
+    mululw 0, %0, %1
+    mululw%? 0, %1, %0"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "predicable" "no,no,yes")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+(define_insn "umac64_600"
+  [(set (reg:DI 56)
+	(plus:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "c,c,c"))
+		   (ashift:DI
+		     (zero_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal")
+				      (const_int 16) (const_int 16))
+		     (const_int 16)))
+	  (reg:DI 56)))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(zero_extract:SI
+	  (plus:DI
+	    (mult:DI (zero_extend:DI (match_dup 1))
+		     (ashift:DI
+		       (zero_extract:DI (match_dup 2)
+					(const_int 16) (const_int 16))
+			  (const_int 16)))
+	    (reg:DI 56))
+	  (const_int 32) (const_int 32)))]
+  "TARGET_MULMAC_32BY16_SET"
+  "machulw%? %0, %1, %2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "predicable" "no,no,yes")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+
+;; DI <- DI(unsigned SI) * DI(unsigned SI)
+(define_insn_and_split "umulsidi3_700"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%c"))
+		 (zero_extend:DI (match_operand:SI 2 "extend_operand" "cL"))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  emit_insn (gen_umulsi3_highpart (h0, operands[1], operands[2]));
+  emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type" "umulti")
+  (set_attr "length" "8")])
+
+(define_insn "umulsidi3_600_lib"
+  [(set (reg:DI R0_REG)
+	(mult:DI (zero_extend:DI (reg:SI R0_REG))
+		 (zero_extend:DI (reg:SI R1_REG))))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:DI R2_REG))
+   (clobber (reg:SI R12_REG))
+   (clobber (reg:DI MUL64_OUT_REG))
+   (clobber (reg:CC CC_REG))]
+   "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
+   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && SFUNC_CHECK_PREDICABLE"
+  "*return arc_output_libcall (\"__umulsidi3\");"
+  [(set_attr "is_sfunc" "yes")
+   (set_attr "predicable" "yes")])
+
+(define_peephole2
+  [(parallel
+     [(set (reg:DI R0_REG)
+	   (mult:DI (zero_extend:DI (reg:SI R0_REG))
+		    (zero_extend:DI (reg:SI R1_REG))))
+      (clobber (reg:SI RETURN_ADDR_REGNUM))
+      (clobber (reg:DI R2_REG))
+      (clobber (reg:SI R12_REG))
+      (clobber (reg:DI MUL64_OUT_REG))
+      (clobber (reg:CC CC_REG))])]
+  "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
+   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && peep2_regno_dead_p (1, TARGET_BIG_ENDIAN ? R1_REG : R0_REG)"
+  [(pc)]
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_umulsi3_highpart_600_lib_be ());
+  else
+    emit_insn (gen_umulsi3_highpart_600_lib_le ());
+  DONE;
+})
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false))
+     {
+       operands[2]=force_reg(SImode, operands[2]);
+     }
+  else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[2], Pmode))
+   {
+      operands[2] = force_reg (SImode, arc_rewrite_small_data (operands[2]));
+   }
+
+  ")
+
+(define_expand "adddi3"
+  [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "")
+		   (plus:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (TARGET_EXPAND_ADDDI)
+    {
+      rtx l0 = gen_lowpart (SImode, operands[0]);
+      rtx h0 = disi_highpart (operands[0]);
+      rtx l1 = gen_lowpart (SImode, operands[1]);
+      rtx h1 = disi_highpart (operands[1]);
+      rtx l2 = gen_lowpart (SImode, operands[2]);
+      rtx h2 = disi_highpart (operands[2]);
+      rtx cc_c = gen_rtx_REG (CC_Cmode, CC_REG);
+
+      if (CONST_INT_P (h2) && INTVAL (h2) < 0 && SIGNED_INT12 (INTVAL (h2)))
+	{
+	  emit_insn (gen_sub_f (l0, l1, gen_int_mode (-INTVAL (l2), SImode)));
+	  emit_insn (gen_sbc (h0, h1,
+			      gen_int_mode (-INTVAL (h2) - (l1 != 0), SImode),
+			      cc_c));
+	  DONE;
+	}
+      emit_insn (gen_add_f (l0, l1, l2));
+      emit_insn (gen_adc (h0, h1, h2));
+      DONE;
+    }
+})
+
+; This assumes that there can be no strictly partial overlap between
+; operands[1] and operands[2].
+(define_insn_and_split "*adddi3_i"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w")
+	(plus:DI (match_operand:DI 1 "register_operand" "%c,0,c")
+		 (match_operand:DI 2 "nonmemory_operand" "ci,ci,!i")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  rtx l1 = operand_subword (operands[1], lo, 0, DImode);
+  rtx h1 = operand_subword (operands[1], hi, 0, DImode);
+  rtx l2 = operand_subword (operands[2], lo, 0, DImode);
+  rtx h2 = operand_subword (operands[2], hi, 0, DImode);
+
+
+  if (l2 == const0_rtx)
+    {
+      if (!rtx_equal_p (l0, l1) && !rtx_equal_p (l0, h1))
+	emit_move_insn (l0, l1);
+      emit_insn (gen_addsi3 (h0, h1, h2));
+      if (!rtx_equal_p (l0, l1) && rtx_equal_p (l0, h1))
+	emit_move_insn (l0, l1);
+      DONE;
+    }
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0
+      && INTVAL (operands[2]) >= -0x7fffffff)
+    {
+      emit_insn (gen_subdi3_i (operands[0], operands[1],
+		 GEN_INT (-INTVAL (operands[2]))));
+      DONE;
+    }
+  if (rtx_equal_p (l0, h1))
+    {
+      if (h2 != const0_rtx)
+	emit_insn (gen_addsi3 (h0, h1, h2));
+      else if (!rtx_equal_p (h0, h1))
+	emit_move_insn (h0, h1);
+      emit_insn (gen_add_f (l0, l1, l2));
+      emit_insn
+	(gen_rtx_COND_EXEC
+	  (VOIDmode,
+	   gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)),
+	   gen_rtx_SET (VOIDmode, h0, plus_constant (SImode, h0, 1))));
+      DONE;
+    }
+  emit_insn (gen_add_f (l0, l1, l2));
+  emit_insn (gen_adc (h0, h1, h2));
+  DONE;
+}
+  [(set_attr "cond" "clob")
+   (set_attr "type" "binary")
+   (set_attr "length" "16,16,20")])
+
+(define_insn "add_f"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C
+	  (plus:SI (match_operand:SI 1 "register_operand" "c,0,c")
+		   (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal"))
+	  (match_dup 1)))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "add.f %0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "*add_f_2"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C
+	  (plus:SI (match_operand:SI 1 "register_operand" "c,0,c")
+		   (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal"))
+	  (match_dup 2)))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "add.f %0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4,4,8")])
+
+; w/c/c comes first (rather than w/0/C_0) to prevent the middle-end
+; needlessly prioritizing the matching constraint.
+; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional
+; execution is used where possible.
+(define_insn_and_split "adc"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w")
+	(plus:SI (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+			  (match_operand:SI 1 "nonmemory_operand"
+							 "%c,0,c,0,cCal"))
+		 (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+	adc %0,%1,%2
+	add.cs %0,%1,1
+	adc %0,%1,%2
+	adc %0,%1,%2
+	adc %0,%1,%2"
+  ; if we have a bad schedule after sched2, split.
+  "reload_completed
+   && !optimize_size && TARGET_ARC700
+   && arc_scheduling_not_expected ()
+   && arc_sets_cc_p (prev_nonnote_insn (insn))
+   /* If next comes a return or other insn that needs a delay slot,
+      expect the adc to get into the delay slot.  */
+   && next_nonnote_insn (insn)
+   && !arc_need_delay (next_nonnote_insn (insn))
+   /* Restore operands before emitting.  */
+   && (extract_insn_cached (insn), 1)"
+  [(set (match_dup 0) (match_dup 3))
+   (cond_exec
+     (ltu (reg:CC_C CC_REG) (const_int 0))
+     (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))))]
+  "operands[3] = simplify_gen_binary (PLUS, SImode, operands[1], operands[2]);"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4,4,4,4,8")])
+
+; combiner-splitter cmp / scc -> cmp / adc
+(define_split
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(gtu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (clobber (reg CC_REG))]
+  ""
+  [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (ltu:SI (reg:CC_C CC_REG) (const_int 0)))])
+
+; combine won't work when an intermediate result is used later...
+; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (reg:CC_C CC_REG)
+	(compare:CC_C (match_dup 0)
+		      (match_operand:SI 3 "nonmemory_operand" "")))]
+  "rtx_equal_p (operands[1], operands[3])
+   || rtx_equal_p (operands[2], operands[3])"
+  [(parallel
+     [(set (reg:CC_C CC_REG)
+	   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))
+      (set (match_dup 0)
+	   (plus:SI (match_dup 1) (match_dup 2)))])])
+
+; ??? need to delve into combine to find out why this is not useful.
+; We'd like to be able to grok various C idioms for carry bit usage.
+;(define_insn "*adc_0"
+;  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+;	(plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+;		 (match_operand:SI 1 "register_operand" "c")))]
+;  ""
+;  "adc %0,%1,0"
+;  [(set_attr "cond" "use")
+;   (set_attr "type" "cc_arith")
+;   (set_attr "length" "4")])
+;
+;(define_split
+;  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+;	(plus:SI (gtu:SI (match_operand:SI 1 "register_operand" "c")
+;			 (match_operand:SI 2 "register_operand" "c"))
+;		 (match_operand:SI 3 "register_operand" "c")))
+;   (clobber (reg CC_REG))]
+;  ""
+;  [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1)))
+;   (set (match_dup 0)
+;	(plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+;		 (match_dup 3)))])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(minus:SI (match_operand:SI 1 "nonmemory_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  int c = 1;
+
+  if (!register_operand (operands[2], SImode))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      c = 2;
+    }
+  if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[c], false))
+    operands[c] = force_reg (SImode, operands[c]);
+  else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[c], Pmode))
+      operands[c] = force_reg (SImode, arc_rewrite_small_data (operands[c]));
+}")
+
+; the casesi expander might generate a sub of zero, so we have to recognize it.
+; combine should make such an insn go away.
+(define_insn_and_split "subsi3_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcqq,Rcw,Rcw,w,w,w,  w,  w,  w")
+	(minus:SI (match_operand:SI 1 "nonmemory_operand"   "0,  0, cL,c,L,I,Cal,Cal,  c")
+		  (match_operand:SI 2 "nonmemory_operand" "Rcqq, c,  0,c,c,0,  0,  c,Cal")))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+    sub%? %0,%1,%2%&
+    sub%? %0,%1,%2
+    rsub%? %0,%2,%1
+    sub %0,%1,%2
+    rsub %0,%2,%1
+    rsub %0,%2,%1
+    rsub%? %0,%2,%1
+    rsub %0,%2,%1
+    sub %0,%1,%2"
+  "reload_completed && get_attr_length (insn) == 8
+   && satisfies_constraint_I (operands[1])
+   && GET_CODE (PATTERN (insn)) != COND_EXEC"
+  [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))]
+  "split_subsi (operands);"
+  [(set_attr "iscompact" "maybe,false,false,false,false,false,false,false, false")
+  (set_attr "length" "*,4,4,4,4,4,8,8,8")
+  (set_attr "predicable" "yes,yes,yes,no,no,no,yes,no,no")
+  (set_attr "cond" "canuse,canuse,canuse,nocond,nocond,canuse_limm,canuse,nocond,nocond")])
+
+(define_expand "subdi3"
+  [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "")
+		   (minus:DI (match_operand:DI 1 "nonmemory_operand" "")
+			     (match_operand:DI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (!register_operand (operands[2], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+  if (TARGET_EXPAND_ADDDI)
+    {
+      rtx l0 = gen_lowpart (SImode, operands[0]);
+      rtx h0 = disi_highpart (operands[0]);
+      rtx l1 = gen_lowpart (SImode, operands[1]);
+      rtx h1 = disi_highpart (operands[1]);
+      rtx l2 = gen_lowpart (SImode, operands[2]);
+      rtx h2 = disi_highpart (operands[2]);
+      rtx cc_c = gen_rtx_REG (CC_Cmode, CC_REG);
+
+      emit_insn (gen_sub_f (l0, l1, l2));
+      emit_insn (gen_sbc (h0, h1, h2, cc_c));
+      DONE;
+    }
+})
+
+(define_insn_and_split "subdi3_i"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w,w,w")
+	(minus:DI (match_operand:DI 1 "nonmemory_operand" "ci,0,ci,c,!i")
+		  (match_operand:DI 2 "nonmemory_operand" "ci,ci,0,!i,c")))
+   (clobber (reg:CC CC_REG))]
+  "register_operand (operands[1], DImode)
+   || register_operand (operands[2], DImode)"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  rtx l1 = operand_subword (operands[1], lo, 0, DImode);
+  rtx h1 = operand_subword (operands[1], hi, 0, DImode);
+  rtx l2 = operand_subword (operands[2], lo, 0, DImode);
+  rtx h2 = operand_subword (operands[2], hi, 0, DImode);
+
+  if (rtx_equal_p (l0, h1) || rtx_equal_p (l0, h2))
+    {
+      h1 = simplify_gen_binary (MINUS, SImode, h1, h2);
+      if (!rtx_equal_p (h0, h1))
+	emit_insn (gen_rtx_SET (VOIDmode, h0, h1));
+      emit_insn (gen_sub_f (l0, l1, l2));
+      emit_insn
+	(gen_rtx_COND_EXEC
+	  (VOIDmode,
+	   gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)),
+	   gen_rtx_SET (VOIDmode, h0, plus_constant (SImode, h0, -1))));
+      DONE;
+    }
+  emit_insn (gen_sub_f (l0, l1, l2));
+  emit_insn (gen_sbc (h0, h1, h2, gen_rtx_REG (CCmode, CC_REG)));
+  DONE;
+}
+  [(set_attr "cond" "clob")
+   (set_attr "length" "16,16,16,20,20")])
+
+(define_insn "*sbc_0"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(minus:SI (match_operand:SI 1 "register_operand" "c")
+		  (ltu:SI (match_operand:CC_C 2 "cc_use_register")
+			  (const_int 0))))]
+  ""
+  "sbc %0,%1,0"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4")])
+
+; w/c/c comes first (rather than Rcw/0/C_0) to prevent the middle-end
+; needlessly prioritizing the matching constraint.
+; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional execution
+; is used where possible.
+(define_insn_and_split "sbc"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w")
+	(minus:SI (minus:SI (match_operand:SI 1 "nonmemory_operand"
+						"c,0,c,0,cCal")
+			    (ltu:SI (match_operand:CC_C 3 "cc_use_register")
+				    (const_int 0)))
+		  (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+	sbc %0,%1,%2
+	sub.cs %0,%1,1
+	sbc %0,%1,%2
+	sbc %0,%1,%2
+	sbc %0,%1,%2"
+  ; if we have a bad schedule after sched2, split.
+  "reload_completed
+   && !optimize_size && TARGET_ARC700
+   && arc_scheduling_not_expected ()
+   && arc_sets_cc_p (prev_nonnote_insn (insn))
+   /* If next comes a return or other insn that needs a delay slot,
+      expect the adc to get into the delay slot.  */
+   && next_nonnote_insn (insn)
+   && !arc_need_delay (next_nonnote_insn (insn))
+   /* Restore operands before emitting.  */
+   && (extract_insn_cached (insn), 1)"
+  [(set (match_dup 0) (match_dup 4))
+   (cond_exec
+     (ltu (reg:CC_C CC_REG) (const_int 0))
+     (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))))]
+  "operands[4] = simplify_gen_binary (MINUS, SImode, operands[1], operands[2]);"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4,4,4,4,8")])
+
+(define_insn "sub_f"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal")
+		    (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c")))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+	sub.f %0,%1,%2
+	rsub.f %0,%2,%1
+	sub.f %0,%1,%2
+	rsub.f %0,%2,%1
+	sub.f %0,%1,%2
+	sub.f %0,%1,%2"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,4,4,4,8,8")])
+
+; combine won't work when an intermediate result is used later...
+; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2
+(define_peephole2
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "dest_reg_operand" "")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  [(parallel
+     [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2)))
+      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])])
+
+(define_peephole2
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (match_operand 3 "" "") (match_operand 4 "" ""))
+   (set (match_operand:SI 0 "dest_reg_operand" "")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "!reg_overlap_mentioned_p (operands[3], operands[1])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(parallel
+     [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2)))
+      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (match_dup 4))])
+
+(define_insn "*add_n"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,W,W,w,w")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "Rcqq,c,c,c,c,c")
+			  (match_operand:SI 2 "_2_4_8_operand" ""))
+		 (match_operand:SI 3 "nonmemory_operand" "0,0,c,?Cal,?c,??Cal")))]
+  ""
+  "add%z2%? %0,%3,%1%&"
+  [(set_attr "type" "shift")
+   (set_attr "length" "*,4,4,8,4,8")
+   (set_attr "predicable" "yes,yes,no,no,no,no")
+   (set_attr "cond" "canuse,canuse,nocond,nocond,nocond,nocond")
+   (set_attr "iscompact" "maybe,false,false,false,false,false")])
+
+;; N.B. sub[123] has the operands of the MINUS in the opposite order from
+;; what synth_mult likes.
+(define_insn "*sub_n"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal")
+		  (mult:SI (match_operand:SI 2 "register_operand" "c,c,c")
+			   (match_operand:SI 3 "_2_4_8_operand" ""))))]
+  ""
+  "sub%z3%? %0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")
+   (set_attr "iscompact" "false")])
+
+; ??? check if combine matches this.
+(define_insn "*bset"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(ior:SI (ashift:SI (const_int 1)
+			   (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))
+		(match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+  ""
+  "bset%? %0,%2,%1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; ??? check if combine matches this.
+(define_insn "*bxor"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(xor:SI (ashift:SI (const_int 1)
+			   (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))
+		(match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+  ""
+  "bxor%? %0,%2,%1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; ??? check if combine matches this.
+(define_insn "*bclr"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(and:SI (not:SI (ashift:SI (const_int 1)
+				   (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")))
+		(match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+  ""
+  "bclr%? %0,%2,%1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; ??? FIXME: find combine patterns for bmsk.
+
+;;Following are the define_insns added for the purpose of peephole2's
+
+; see also iorsi3 for use with constant bit number.
+(define_insn "*bset_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+		(ashift:SI (const_int 1)
+			   (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ]
+  ""
+  "@
+     bset%? %0,%1,%2 ;;peep2, constr 1
+     bset %0,%1,%2 ;;peep2, constr 2
+     bset %0,%S1,%2 ;;peep2, constr 3"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; see also xorsi3 for use with constant bit number.
+(define_insn "*bxor_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+		(ashift:SI (const_int 1)
+			(match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ]
+  ""
+  "@
+     bxor%? %0,%1,%2
+     bxor %0,%1,%2
+     bxor %0,%S1,%2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; see also andsi3 for use with constant bit number.
+(define_insn "*bclr_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(and:SI (not:SI (ashift:SI (const_int 1)
+				   (match_operand:SI 2 "nonmemory_operand" "cL,rL,r")))
+		(match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))]
+  ""
+  "@
+     bclr%? %0,%1,%2
+     bclr %0,%1,%2
+     bclr %0,%S1,%2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; see also andsi3 for use with constant bit number.
+(define_insn "*bmsk_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+		(plus:SI (ashift:SI (const_int 1)
+				    (plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r")
+					     (const_int 1)))
+			 (const_int -1))))]
+  ""
+  "@
+     bmsk%? %0,%S1,%2
+     bmsk %0,%1,%2
+     bmsk %0,%S1,%2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+;;Instructions added for peephole2s end
+
+;; Boolean instructions.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		(match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "if (!satisfies_constraint_Cux (operands[2]))
+     operands[1] = force_reg (SImode, operands[1]);
+   else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
+     operands[1] = arc_rewrite_small_data (operands[1]);")
+
+(define_insn "andsi3_i"
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcqq,Rcq,Rcqq,Rcqq,Rcqq,Rcw,Rcw,Rcw,Rcw,Rcw,Rcw,  w,  w,  w,  w,w,Rcw,  w,  W")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,Rcq,   0,   0,Rcqq,  0,  c,  0,  0,  0,  0,  c,  c,  c,  c,0,  0,  c,  o")
+		(match_operand:SI 2 "nonmemory_operand" " Rcqq,  0, C1p, Ccp, Cux, cL,  0,C1p,Ccp,CnL,  I, Lc,C1p,Ccp,CnL,I,Cal,Cal,Cux")))]
+  "(register_operand (operands[1], SImode)
+    && nonmemory_operand (operands[2], SImode))
+   || (memory_operand (operands[1], SImode)
+       && satisfies_constraint_Cux (operands[2]))"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0: case 5: case 10: case 11: case 15: case 16: case 17:
+      return \"and%? %0,%1,%2%&\";
+    case 1: case 6:
+      return \"and%? %0,%2,%1%&\";
+    case 2: case 7: case 12:
+      return \"bmsk%? %0,%1,%Z2%&\";
+    case 3: case 8: case 13:
+      return \"bclr%? %0,%1,%M2%&\";
+    case 4:
+      return (INTVAL (operands[2]) == 0xff
+	      ? \"extb%? %0,%1%&\" : \"extw%? %0,%1%&\");
+    case 9: case 14: return \"bic%? %0,%1,%n2-1\";
+    case 18:
+      if (TARGET_BIG_ENDIAN)
+	{
+	  rtx xop[2];
+
+	  xop[0] = operands[0];
+	  xop[1] = adjust_address (operands[1], QImode,
+				   INTVAL (operands[2]) == 0xff ? 3 : 2);
+	  output_asm_insn (INTVAL (operands[2]) == 0xff
+			   ? \"ldb %0,%1\" : \"ldw %0,%1\",
+			   xop);
+	  return \"\";
+	}
+      return INTVAL (operands[2]) == 0xff ? \"ldb %0,%1\" : \"ldw %0,%1\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "iscompact" "maybe,maybe,maybe,maybe,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false")
+   (set_attr "type" "binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,load")
+   (set_attr "length" "*,*,*,*,*,4,4,4,4,4,4,4,4,4,4,4,8,8,*")
+   (set_attr "predicable" "no,no,no,no,no,yes,yes,yes,yes,yes,no,no,no,no,no,no,yes,no,no")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,nocond,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,nocond,nocond,canuse_limm,canuse,nocond,nocond")])
+
+; combiner splitter, pattern found in ldtoa.c .
+; and op3,op0,op1 / cmp op3,op2 -> add op3,op0,op4 / bmsk.f 0,op3,op1
+(define_split
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z (and:SI (match_operand:SI 0 "register_operand" "")
+			      (match_operand 1 "const_int_operand" ""))
+		      (match_operand 2 "const_int_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  "((INTVAL (operands[1]) + 1) & INTVAL (operands[1])) == 0"
+  [(set (match_dup 3)
+	(plus:SI (match_dup 0) (match_dup 4)))
+   (set (reg:CC_Z CC_REG)
+	(compare:CC_Z (and:SI (match_dup 3) (match_dup 1))
+		      (const_int 0)))]
+  "operands[4] = GEN_INT ( -(~INTVAL (operands[1]) | INTVAL (operands[2])));")
+
+;;bic define_insn that allows limm to be the first operand
+(define_insn "*bicsi3_insn"
+   [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w")
+ 	(and:SI	(not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c"))
+ 		(match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))]
+  ""
+  "@
+   bic%? %0, %2, %1%& ;;constraint 0
+   bic%? %0,%2,%1  ;;constraint 1
+   bic %0,%2,%1    ;;constraint 2, FIXME: will it ever get generated ???
+   bic%? %0,%2,%S1 ;;constraint 3, FIXME: will it ever get generated ???
+   bic %0,%2,%1    ;;constraint 4
+   bic %0,%2,%S1   ;;constraint 5, FIXME: will it ever get generated ???
+   bic %0,%S2,%1   ;;constraint 6"
+  [(set_attr "length" "*,4,4,8,4,8,8")
+  (set_attr "iscompact" "maybe, false, false, false, false, false, false")
+  (set_attr "predicable" "no,yes,no,yes,no,no,no")
+  (set_attr "cond" "canuse,canuse,canuse_limm,canuse,nocond,nocond,nocond")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand"        "=Rcqq,Rcq,Rcqq,Rcw,Rcw,Rcw,Rcw,w,  w,w,Rcw,  w")
+	(ior:SI (match_operand:SI 1 "nonmemory_operand" "% 0,Rcq,   0,  0,  c,  0, 0, c,  c,0,  0,  c")
+		(match_operand:SI 2 "nonmemory_operand" "Rcqq, 0, C0p, cL,  0,C0p, I,cL,C0p,I,Cal,Cal")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 3: case 6: case 7: case 9: case 10: case 11:
+      return \"or%? %0,%1,%2%&\";
+    case 1: case 4:
+      return \"or%? %0,%2,%1%&\";
+    case 2: case 5: case 8:
+      return \"bset%? %0,%1,%z2%&\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,false,false,false")
+   (set_attr "length" "*,*,*,4,4,4,4,4,4,4,8,8")
+   (set_attr "predicable" "no,no,no,yes,yes,yes,no,no,no,no,yes,no")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w,  w,w,  w,  w")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0,   Rcq,  0,  c,  0,  0, c,  c,0,  0,  c")
+		(match_operand:SI 2 "nonmemory_operand" " Rcqq,  0, cL,  0,C0p,  I,cL,C0p,I,Cal,Cal")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 2: case 5: case 6: case 8: case 9: case 10:
+      return \"xor%? %0,%1,%2%&\";
+    case 1: case 3:
+      return \"xor%? %0,%2,%1%&\";
+    case 4: case 7:
+      return \"bxor%? %0,%1,%z2\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false,false,false,false")
+   (set_attr "type" "binary")
+   (set_attr "length" "*,*,4,4,4,4,4,4,4,8,8")
+   (set_attr "predicable" "no,no,yes,yes,yes,no,no,no,no,yes,no")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w")
+	(neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))]
+  ""
+  "neg%? %0,%1%&"
+  [(set_attr "type" "unary")
+   (set_attr "iscompact" "maybe,true,false,false")
+   (set_attr "predicable" "no,no,yes,no")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w")
+	(not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))]
+  ""
+  "not%? %0,%1%&"
+  [(set_attr "type" "unary,unary")
+   (set_attr "iscompact" "true,false")])
+
+(define_insn_and_split "one_cmpldi2"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=q,w")
+	(not:DI (match_operand:DI 1 "register_operand" "q,c")))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (not:SI (match_dup 3)))
+   (set (match_dup 4) (not:SI (match_dup 5)))]
+{
+  int swap = (true_regnum (operands[0]) == true_regnum (operands[1]) + 1);
+
+  operands[2] = operand_subword (operands[0], 0+swap, 0, DImode);
+  operands[3] = operand_subword (operands[1], 0+swap, 0, DImode);
+  operands[4] = operand_subword (operands[0], 1-swap, 0, DImode);
+  operands[5] = operand_subword (operands[1], 1-swap, 0, DImode);
+}
+  [(set_attr "type" "unary,unary")
+   (set_attr "cond" "nocond,nocond")
+   (set_attr "length" "4,8")])
+
+;; Shift instructions.
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (!TARGET_BARREL_SHIFTER)
+    {
+      emit_shift (ASHIFT, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+}")
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (!TARGET_BARREL_SHIFTER)
+    {
+      emit_shift (ASHIFTRT, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+}")
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (!TARGET_BARREL_SHIFTER)
+    {
+      emit_shift (LSHIFTRT, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+}")
+
+(define_insn "shift_si3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(match_operator:SI 3 "shift4_operator"
+			   [(match_operand:SI 1 "register_operand" "0")
+			    (match_operand:SI 2 "const_int_operand" "n")]))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (reg:CC CC_REG))
+  ]
+  "!TARGET_BARREL_SHIFTER"
+  "* return output_shift (operands);"
+  [(set_attr "type" "shift")
+   (set_attr "length" "16")])
+
+(define_insn "shift_si3_loop"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(match_operator:SI 3 "shift_operator"
+			   [(match_operand:SI 1 "register_operand" "0,0")
+			    (match_operand:SI 2 "nonmemory_operand" "rn,Cal")]))
+   (clobber (match_scratch:SI 4 "=X,X"))
+   (clobber (reg:SI LP_COUNT))
+   (clobber (reg:SI LP_START))
+   (clobber (reg:SI LP_END))
+   (clobber (reg:CC CC_REG))
+  ]
+  "!TARGET_BARREL_SHIFTER"
+  "* return output_shift (operands);"
+  [(set_attr "type" "shift")
+   (set_attr "length" "16,20")])
+
+; asl, asr, lsr patterns:
+; There is no point in including an 'I' alternative since only the lowest 5
+; bits are used for the shift.  OTOH Cal can be useful if the shift amount
+; is defined in an external symbol, as we don't have special relocations
+; to truncate a symbol in a u6 immediate; but that's rather exotic, so only
+; provide one alternatice for this, without condexec support.
+(define_insn "*ashlsi3_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand"           "=Rcq,Rcqq,Rcqq,Rcw, w,   w")
+	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, c,cCal")
+		   (match_operand:SI 2 "nonmemory_operand"  "K,  K,RcqqM, cL,cL,cCal")))]
+  "TARGET_BARREL_SHIFTER
+   && (register_operand (operands[1], SImode)
+       || register_operand (operands[2], SImode))"
+  "asl%? %0,%1,%2%&"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
+   (set_attr "predicable" "no,no,no,yes,no,no")
+   (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
+
+(define_insn "*ashrsi3_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand"             "=Rcq,Rcqq,Rcqq,Rcw, w,   w")
+	(ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, c,cCal")
+		     (match_operand:SI 2 "nonmemory_operand"  "K,  K,RcqqM, cL,cL,cCal")))]
+  "TARGET_BARREL_SHIFTER
+   && (register_operand (operands[1], SImode)
+       || register_operand (operands[2], SImode))"
+  "asr%? %0,%1,%2%&"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
+   (set_attr "predicable" "no,no,no,yes,no,no")
+   (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
+
+(define_insn "*lshrsi3_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand"             "=Rcq,Rcqq,Rcqq,Rcw, w,   w")
+	(lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, c,cCal")
+		     (match_operand:SI 2 "nonmemory_operand"  "N,  N,RcqqM, cL,cL,cCal")))]
+  "TARGET_BARREL_SHIFTER
+   && (register_operand (operands[1], SImode)
+       || register_operand (operands[2], SImode))"
+  "*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p ()
+	    ?  \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
+   (set_attr "predicable" "no,no,no,yes,no,no")
+   (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand"             "=Rcw, w,   w")
+	(rotatert:SI (match_operand:SI 1 "register_operand"  " 0,cL,cCal")
+		     (match_operand:SI 2 "nonmemory_operand" "cL,cL,cCal")))]
+  "TARGET_BARREL_SHIFTER"
+  "ror%? %0,%1,%2"
+  [(set_attr "type" "shift,shift,shift")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "length" "4,4,8")])
+
+;; Compare / branch instructions.
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 1 "nonmemory_operand" "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (pc)
+	(if_then_else
+	      (match_operator 0 "ordered_comparison_operator" [(reg CC_REG)
+							       (const_int 0)])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+{
+  gcc_assert (XEXP (operands[0], 0) == operands[1]);
+  gcc_assert (XEXP (operands[0], 1) == operands[2]);
+  operands[0] = gen_compare_reg (operands[0], VOIDmode);
+  emit_jump_insn (gen_branch_insn (operands[3], operands[0]));
+  DONE;
+})
+
+;; ??? Could add a peephole to generate compare with swapped operands and
+;; modifed cc user if second, but not first operand is a compact register.
+(define_insn "cmpsi_cc_insn_mixed"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 0 "register_operand" "Rcq#q,c,c, qRcq, c")
+		    (match_operand:SI 1 "nonmemory_operand" "cO,cI,cL,  Cal, Cal")))]
+  ""
+  "cmp%? %0,%B1%&"
+  [(set_attr "type" "compare")
+   (set_attr "iscompact" "true,false,false,true_limm,false")
+   (set_attr "predicable" "no,no,yes,no,yes")
+   (set_attr "cond" "set")
+   (set_attr "length" "*,4,4,*,8")])
+
+(define_insn "*cmpsi_cc_zn_insn"
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN (match_operand:SI 0 "register_operand"  "qRcq,c")
+		       (const_int 0)))]
+  ""
+  "tst%? %0,%0%&"
+  [(set_attr "type" "compare,compare")
+   (set_attr "iscompact" "true,false")
+   (set_attr "predicable" "no,yes")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "*,4")])
+
+; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes.
+(define_insn "*btst"
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN
+	  (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c")
+			   (const_int 1)
+			   (match_operand:SI 1 "nonmemory_operand" "L,Lc"))
+	  (const_int 0)))]
+  ""
+  "btst%? %0,%1"
+  [(set_attr "iscompact" "true,false")
+   (set_attr "predicable" "no,yes")
+   (set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "*,4")])
+
+; combine suffers from 'simplifications' that replace a one-bit zero
+; extract with a shift if it can prove that the upper bits are zero.
+; arc_reorg sees the code after sched2, which can have caused our
+; inputs to be clobbered even if they were not clobbered before.
+; Therefore, add a third way to convert btst / b{eq,ne} to bbit{0,1}
+; OTOH, this is somewhat marginal, and can leat to out-of-range
+; bbit (i.e. bad scheduling) and missed conditional execution,
+; so make this an option.
+(define_peephole2
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN
+	  (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			   (const_int 1)
+			   (match_operand:SI 1 "nonmemory_operand" ""))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+				      [(reg:CC_ZN CC_REG) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_BBIT_PEEPHOLE && peep2_regno_dead_p (2, CC_REG)"
+  [(parallel [(set (pc)
+		   (if_then_else
+		     (match_op_dup 3
+		       [(zero_extract:SI (match_dup 0)
+					 (const_int 1) (match_dup 1))
+			(const_int 0)])
+		     (label_ref (match_dup 2))
+		     (pc)))
+	      (clobber (reg:CC_ZN CC_REG))])])
+
+(define_insn "*cmpsi_cc_z_insn"
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z (match_operand:SI 0 "register_operand"  "qRcq,c")
+		      (match_operand:SI 1 "p2_immediate_operand"  "O,n")))]
+  ""
+  "@
+	cmp%? %0,%1%&
+	bxor.f 0,%0,%z1"
+  [(set_attr "type" "compare,compare")
+   (set_attr "iscompact" "true,false")
+   (set_attr "cond" "set,set_zn")
+   (set_attr "length" "*,4")])
+
+(define_insn "*cmpsi_cc_c_insn"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C (match_operand:SI 0 "register_operand"  "Rcqq, c,Rcqq,  c")
+		      (match_operand:SI 1 "nonmemory_operand" "cO,  cI, Cal,Cal")))]
+  ""
+  "cmp%? %0,%S1%&"
+  [(set_attr "type" "compare")
+   (set_attr "iscompact" "true,false,true_limm,false")
+   (set_attr "cond" "set")
+   (set_attr "length" "*,4,*,8")])
+
+;; Next come the scc insns.
+
+(define_expand "cstoresi4"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 2 "nonmemory_operand" "")
+		    (match_operand:SI 3 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "dest_reg_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator" [(reg CC_REG)
+							    (const_int 0)]))]
+  ""
+{
+  gcc_assert (XEXP (operands[1], 0) == operands[2]);
+  gcc_assert (XEXP (operands[1], 1) == operands[3]);
+  operands[1] = gen_compare_reg (operands[1], SImode);
+  emit_insn (gen_scc_insn (operands[0], operands[1]));
+  DONE;
+})
+
+(define_mode_iterator SDF [SF DF])
+
+(define_expand "cstore<mode>4"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SDF 2 "register_operand" "")
+		    (match_operand:SDF 3 "register_operand" "")))
+   (set (match_operand:SI 0 "dest_reg_operand" "")
+	(match_operator:SI 1 "comparison_operator" [(reg CC_REG)
+						    (const_int 0)]))]
+
+  "TARGET_OPTFPE"
+{
+  gcc_assert (XEXP (operands[1], 0) == operands[2]);
+  gcc_assert (XEXP (operands[1], 1) == operands[3]);
+  operands[1] = gen_compare_reg (operands[1], SImode);
+  emit_insn (gen_scc_insn (operands[0], operands[1]));
+  DONE;
+})
+
+; We need a separate expander for this lest we loose the mode of CC_REG
+; when match_operator substitutes the literal operand into the comparison.
+(define_expand "scc_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w") (match_operand:SI 1 ""))])
+
+(define_insn_and_split "*scc_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(match_operator:SI 1 "proper_comparison_operator" [(reg CC_REG) (const_int 0)]))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (const_int 1))
+   (cond_exec
+     (match_dup 1)
+     (set (match_dup 0) (const_int 0)))]
+{
+  operands[1]
+    = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[1]),
+					 GET_MODE (XEXP (operands[1], 0))),
+		      VOIDmode,
+		      XEXP (operands[1], 0), XEXP (operands[1], 1));
+}
+  [(set_attr "type" "unary")])
+
+;; ??? At least for ARC600, we should use sbc b,b,s12 if we want a value
+;; that is one lower if the carry flag is set.
+
+;; ??? Look up negscc insn.  See pa.md for example.
+(define_insn "*neg_scc_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(neg:SI (match_operator:SI 1 "proper_comparison_operator"
+		 [(reg CC_REG) (const_int 0)])))]
+  ""
+  "mov %0,-1\;sub.%D1 %0,%0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn "*not_scc_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(not:SI (match_operator:SI 1 "proper_comparison_operator"
+		 [(reg CC_REG) (const_int 0)])))]
+  ""
+  "mov %0,1\;sub.%d1 %0,%0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+; cond_exec patterns
+(define_insn "*movsi_ne"
+  [(cond_exec
+     (ne (match_operand:CC_Z 2 "cc_use_register" "Rcc,Rcc,Rcc") (const_int 0))
+     (set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w")
+	  (match_operand:SI 1 "nonmemory_operand" "C_0,Lc,?Cal")))]
+  ""
+  "@
+	* current_insn_predicate = 0; return \"sub%?.ne %0,%0,%0%&\";
+	mov.ne %0,%1
+	mov.ne %0,%S1"
+  [(set_attr "type" "cmove,cmove,cmove")
+   (set_attr "iscompact" "true,false,false")
+   (set_attr "length" "2,4,8")])
+
+(define_insn "*movsi_cond_exec"
+  [(cond_exec
+     (match_operator 3 "proper_comparison_operator"
+       [(match_operand 2 "cc_register" "Rcc,Rcc") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (match_operand:SI 1 "nonmemory_operand" "Lc,?Cal")))]
+  ""
+  "mov.%d3 %0,%S1"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "4,8")])
+
+(define_insn "*commutative_cond_exec"
+  [(cond_exec
+     (match_operator 5 "proper_comparison_operator"
+       [(match_operand 4 "cc_register" "Rcc,Rcc") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (match_operator:SI 3 "commutative_operator"
+	    [(match_operand:SI 1 "register_operand" "%0,0")
+	     (match_operand:SI 2 "nonmemory_operand" "cL,?Cal")])))]
+  ""
+{
+  arc_output_commutative_cond_exec (operands, true);
+  return "";
+}
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr_alternative "length"
+     [(const_int 4)
+      (cond
+	[(eq (symbol_ref "arc_output_commutative_cond_exec (operands, false)")
+	     (const_int 4))
+	 (const_int 4)]
+	(const_int 8))])])
+
+(define_insn "*sub_cond_exec"
+  [(cond_exec
+     (match_operator 4 "proper_comparison_operator"
+       [(match_operand 3 "cc_register" "Rcc,Rcc,Rcc") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w,w")
+	  (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,cL,Cal")
+		    (match_operand:SI 2 "nonmemory_operand" "cL,0,0"))))]
+  ""
+  "@
+	sub.%d4 %0,%1,%2
+	rsub.%d4 %0,%2,%1
+	rsub.%d4 %0,%2,%1"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "*noncommutative_cond_exec"
+  [(cond_exec
+     (match_operator 5 "proper_comparison_operator"
+       [(match_operand 4 "cc_register" "Rcc,Rcc") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (match_operator:SI 3 "noncommutative_operator"
+	    [(match_operand:SI 1 "register_operand" "0,0")
+	     (match_operand:SI 2 "nonmemory_operand" "cL,Cal")])))]
+  ""
+  "%O3.%d5 %0,%1,%2"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr "length" "4,8")])
+
+;; These control RTL generation for conditional jump insns
+;; Match both normal and inverted jump.
+
+; We need a separate expander for this lest we loose the mode of CC_REG
+; when match_operator substitutes the literal operand into the comparison.
+(define_expand "branch_insn"
+  [(set (pc)
+	(if_then_else (match_operand 1 "" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))])
+
+; When estimating sizes during arc_reorg, when optimizing for speed, there
+; are three reasons why we need to consider branches to be length 6:
+; - annull-false delay slot insns are implemented using conditional execution,
+;   thus preventing short insn formation where used.
+; - for ARC600: annull-true delay slot isnns are implemented where possile
+;   using conditional execution, preventing short insn formation where used.
+; - for ARC700: likely or somewhat likely taken branches are made long and
+;   unaligned if possible to avoid branch penalty.
+(define_insn "*branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "proper_comparison_operator"
+				      [(reg CC_REG) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (arc_ccfsm_branch_deleted_p ())
+    {
+      arc_ccfsm_record_branch_deleted ();
+      return \"; branch deleted, next insns conditionalized\";
+    }
+  else
+    {
+      arc_ccfsm_record_condition (operands[1], false, insn, 0);
+      if (get_attr_length (insn) == 2)
+	 return \"b%d1%? %^%l0%&\";
+      else
+	 return \"b%d1%# %^%l0\";
+    }
+}"
+  [(set_attr "type" "branch")
+   (set
+     (attr "length")
+     (cond [
+       (eq_attr "delay_slot_filled" "yes")
+       (const_int 4)
+
+       (ne
+	 (if_then_else
+	   (match_operand 1 "equality_comparison_operator" "")
+	   (ior (lt (minus (match_dup 0) (pc)) (const_int -512))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 506)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (ior (match_test "!arc_short_comparison_p (operands[1], -64)")
+		(lt (minus (match_dup 0) (pc)) (const_int -64))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 58)
+			   (symbol_ref "get_attr_delay_slot_length (insn)")))))
+	 (const_int 0))
+       (const_int 4)]
+      (const_int 2)))
+
+   (set (attr "iscompact")
+	(cond [(match_test "get_attr_length (insn) == 2") (const_string "true")]
+	      (const_string "false")))])
+
+(define_insn "*rev_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "proper_comparison_operator"
+				      [(reg CC_REG) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))"
+  "*
+{
+  if (arc_ccfsm_branch_deleted_p ())
+    {
+      arc_ccfsm_record_branch_deleted ();
+      return \"; branch deleted, next insns conditionalized\";
+    }
+  else
+    {
+      arc_ccfsm_record_condition (operands[1], true, insn, 0);
+      if (get_attr_length (insn) == 2)
+	 return \"b%D1%? %^%l0\";
+      else
+	 return \"b%D1%# %^%l0\";
+    }
+}"
+  [(set_attr "type" "branch")
+   (set
+     (attr "length")
+     (cond [
+       (eq_attr "delay_slot_filled" "yes")
+       (const_int 4)
+
+       (ne
+	 (if_then_else
+	   (match_operand 1 "equality_comparison_operator" "")
+	   (ior (lt (minus (match_dup 0) (pc)) (const_int -512))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 506)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (ior (match_test "!arc_short_comparison_p (operands[1], -64)")
+		(lt (minus (match_dup 0) (pc)) (const_int -64))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 58)
+			   (symbol_ref "get_attr_delay_slot_length (insn)")))))
+	 (const_int 0))
+       (const_int 4)]
+      (const_int 2)))
+
+   (set (attr "iscompact")
+	(cond [(match_test "get_attr_length (insn) == 2") (const_string "true")]
+	      (const_string "false")))])
+
+;; Unconditional and other jump instructions.
+
+(define_expand "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "")
+
+(define_insn "jump_i"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "!TARGET_LONG_CALLS_SET || !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+  "b%!%* %^%l0%&"
+  [(set_attr "type" "uncond_branch")
+   (set (attr "iscompact")
+	(if_then_else (match_test "get_attr_length (insn) == 2")
+		      (const_string "true") (const_string "false")))
+   (set_attr "cond" "canuse")
+   (set (attr "length")
+	(cond [
+	  ; In arc_reorg we just guesstimate; might be more or less than 4.
+	  (match_test "arc_branch_size_unknown_p ()")
+	  (const_int 4)
+
+	  (eq_attr "delay_slot_filled" "yes")
+	  (const_int 4)
+
+	  (match_test "find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+	  (const_int 4)
+
+	  (ior (lt (minus (match_dup 0) (pc)) (const_int -512))
+	       (gt (minus (match_dup 0) (pc))
+		   (minus (const_int 506)
+			  (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	  (const_int 4)]
+	 (const_int 2)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))]
+  ""
+  "j%!%* [%0]%&"
+  [(set_attr "type" "jump")
+   (set_attr "iscompact" "false,false,false,maybe,false")
+   (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse")])
+
+;; Implement a switch statement.
+
+(define_expand "casesi"
+  [(set (match_dup 5)
+	(minus:SI (match_operand:SI 0 "register_operand" "")
+		  (match_operand:SI 1 "nonmemory_operand" "")))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_dup 5)
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (pc)
+	(if_then_else (gtu (reg:CC CC_REG)
+			   (const_int 0))
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (set (match_dup 6)
+	(unspec:SI [(match_operand 3 "" "")
+		    (match_dup 5) (match_dup 7)] UNSPEC_CASESI))
+   (parallel [(set (pc) (match_dup 6)) (use (match_dup 7))])]
+  ""
+  "
+{
+  rtx x;
+
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = operands[3];
+  emit_insn (gen_subsi3 (operands[5], operands[0], operands[1]));
+  emit_insn (gen_cmpsi_cc_insn_mixed (operands[5], operands[2]));
+  x = gen_rtx_GTU (VOIDmode, gen_rtx_REG (CCmode, CC_REG), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[4]), pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
+  if (TARGET_COMPACT_CASESI)
+    {
+      emit_jump_insn (gen_casesi_compact_jump (operands[5], operands[7]));
+    }
+  else
+    {
+      operands[3] = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+      if (flag_pic || !cse_not_expected)
+	operands[3] = force_reg (Pmode, operands[3]);
+      emit_insn (gen_casesi_load (operands[6],
+				  operands[3], operands[5], operands[7]));
+      if (CASE_VECTOR_PC_RELATIVE || flag_pic)
+	emit_insn (gen_addsi3 (operands[6], operands[6], operands[3]));
+      emit_jump_insn (gen_casesi_jump (operands[6], operands[7]));
+    }
+  DONE;
+}")
+
+(define_insn "casesi_load"
+  [(set (match_operand:SI 0 "register_operand"             "=Rcq,r,r")
+	(unspec:SI [(match_operand:SI 1 "nonmemory_operand" "Rcq,c,Cal")
+		    (match_operand:SI 2 "register_operand"  "Rcq,c,c")
+		    (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))]
+  ""
+  "*
+{
+  rtx diff_vec = PATTERN (next_nonnote_insn (operands[3]));
+
+  if (GET_CODE (diff_vec) != ADDR_DIFF_VEC)
+    {
+      gcc_assert (GET_CODE (diff_vec) == ADDR_VEC);
+      gcc_assert (GET_MODE (diff_vec) == SImode);
+      gcc_assert (!CASE_VECTOR_PC_RELATIVE && !flag_pic);
+    }
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return \"ld.as %0,[%1,%2]%&\";
+    case HImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"ldw.as %0,[%1,%2]\";
+      return \"ldw.x.as %0,[%1,%2]\";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"ldb%? %0,[%1,%2]%&\";
+      return \"ldb.x %0,[%1,%2]\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "load")
+   (set_attr_alternative "iscompact"
+     [(cond
+	[(ne (symbol_ref "GET_MODE (PATTERN (next_nonnote_insn (operands[3])))")
+	     (symbol_ref "QImode"))
+	 (const_string "false")
+	 (match_test "!ADDR_DIFF_VEC_FLAGS (PATTERN (next_nonnote_insn (operands[3]))).offset_unsigned")
+	 (const_string "false")]
+	(const_string "true"))
+      (const_string "false")
+      (const_string "false")])
+   (set_attr_alternative "length"
+     [(cond
+	[(eq_attr "iscompact" "false") (const_int 4)
+	; We have to mention (match_dup 3) to convince genattrtab.c that this
+	; is a varying length insn.
+	 (eq (symbol_ref "1+1") (const_int 2)) (const_int 2)
+	 (gt (minus (match_dup 3) (pc)) (const_int 42)) (const_int 4)]
+	(const_int 2))
+      (const_int 4)
+      (const_int 8)])])
+
+; Unlike the canonical tablejump, this pattern always uses a jump address,
+; even for CASE_VECTOR_PC_RELATIVE.
+(define_insn "casesi_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "j%!%* [%0]%&"
+  [(set_attr "type" "jump")
+   (set_attr "iscompact" "false,maybe,false")
+   (set_attr "cond" "canuse")])
+
+(define_insn "casesi_compact_jump"
+  [(set (pc)
+	(unspec:SI [(match_operand:SI 0 "register_operand" "c,q")]
+		   UNSPEC_CASESI))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_scratch:SI 2 "=q,0"))]
+  "TARGET_COMPACT_CASESI"
+  "*
+{
+  rtx diff_vec = PATTERN (next_nonnote_insn (operands[1]));
+  int unalign = arc_get_unalign ();
+  rtx xop[3];
+  const char *s;
+
+  xop[0] = operands[0];
+  xop[2] = operands[2];
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      /* Max length can be 12 in this case, but this is OK because
+	 2 of these are for alignment, and are anticipated in the length
+	 of the ADDR_DIFF_VEC.  */
+      if (unalign && !satisfies_constraint_Rcq (xop[0]))
+	s = \"add2 %2,pcl,%0\n\tld_s%2,[%2,12]\";
+      else if (unalign)
+	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";
+      else
+	s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\";
+      arc_clear_unalign ();
+      break;
+    case HImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	{
+	  if (satisfies_constraint_Rcq (xop[0]))
+	    {
+	      s = \"add_s %2,%0,%1\n\tldw.as %2,[pcl,%2]\";
+	      xop[1] = GEN_INT ((10 - unalign) / 2U);
+	    }
+	  else
+	    {
+	      s = \"add1 %2,pcl,%0\n\tldw_s %2,[%2,%1]\";
+	      xop[1] = GEN_INT (10 + unalign);
+	    }
+	}
+      else
+	{
+	  if (satisfies_constraint_Rcq (xop[0]))
+	    {
+	      s = \"add_s %2,%0,%1\n\tldw.x.as %2,[pcl,%2]\";
+	      xop[1] = GEN_INT ((10 - unalign) / 2U);
+	    }
+	  else
+	    {
+	      s = \"add1 %2,pcl,%0\n\tldw_s.x %2,[%2,%1]\";
+	      xop[1] = GEN_INT (10 + unalign);
+	    }
+	}
+      arc_toggle_unalign ();
+      break;
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	{
+	  if ((rtx_equal_p (xop[2], xop[0])
+	       || find_reg_note (insn, REG_DEAD, xop[0]))
+	      && satisfies_constraint_Rcq (xop[0]))
+	    {
+	      s = \"add_s %0,%0,pcl\n\tldb_s %2,[%0,%1]\";
+	      xop[1] = GEN_INT (8 + unalign);
+	    }
+	  else
+	    {
+	      s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\";
+	      xop[1] = GEN_INT (10 + unalign);
+	      arc_toggle_unalign ();
+	    }
+	}
+      else if ((rtx_equal_p (xop[0], xop[2])
+		|| find_reg_note (insn, REG_DEAD, xop[0]))
+	       && satisfies_constraint_Rcq (xop[0]))
+	{
+	  s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\";
+	  xop[1] = GEN_INT (10 - unalign);
+	  arc_toggle_unalign ();
+	}
+      else
+	{
+	  /* ??? Length is 12.  */
+	  s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\";
+	  xop[1] = GEN_INT (8 + unalign);
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_asm_insn (s, xop);
+  return \"add_s %2,%2,pcl\n\tj_s%* [%2]\";
+}"
+  [(set_attr "length" "10")
+   (set_attr "type" "jump")
+   (set_attr "iscompact" "true")
+   (set_attr "cond" "nocond")])
+
+(define_expand "call"
+  ;; operands[1] is stack_size_rtx
+  ;; operands[2] is next_arg_register
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	     (clobber (reg:SI 31))])]
+  ""
+  "{
+    rtx callee;
+
+    gcc_assert (MEM_P (operands[0]));
+    callee  = XEXP (operands[0], 0);
+    if (crtl->profile && arc_profile_call (callee))
+      {
+	emit_call_insn (gen_call_prof (gen_rtx_SYMBOL_REF (Pmode,
+							   \"_mcount_call\"),
+				       operands[1]));
+	DONE;
+      }
+    /* This is to decide if we should generate indirect calls by loading the
+       32 bit address of the callee into a register before performing the
+       branch and link - this exposes cse opportunities.
+       Also, in weird cases like compile/20010107-1.c, we may get a PLUS.  */
+    if (GET_CODE (callee) != REG
+	&& (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee)))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+  }
+")
+
+
+; Rcq, which is used in alternative 0, checks for conditional execution.
+; At instruction output time, if it doesn't match and we end up with
+; alternative 1 ("q"), that means that we can't use the short form.
+(define_insn "*call_i"
+  [(call (mem:SI (match_operand:SI 0
+		  "call_address_operand" "Rcq,q,c,Cbp,Cbr,L,I,Cal"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 31))]
+  ""
+  "@
+   jl%!%* [%0]%&
+   jl%!%* [%0]%&
+   jl%!%* [%0]
+   bl%!%* %P0
+   bl%!%* %P0
+   jl%!%* %S0
+   jl%* %S0
+   jl%! %S0"
+  [(set_attr "type" "call,call,call,call,call,call,call,call_no_delay_slot")
+   (set_attr "iscompact" "maybe,false,*,*,*,*,*,*")
+   (set_attr "predicable" "no,no,yes,yes,no,yes,no,yes")
+   (set_attr "length" "*,*,4,4,4,4,4,8")])
+
+(define_insn "call_prof"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "Cbr,Cal"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 31))
+   (use (reg:SI 8))
+   (use (reg:SI 9))]
+   ""
+  "@
+   bl%!%* %P0;2
+   jl%! %^%S0"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "predicable" "yes,yes")
+   (set_attr "length" "4,8")])
+
+(define_expand "call_value"
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  [(parallel [(set (match_operand 0 "dest_reg_operand" "=r")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	     (clobber (reg:SI 31))])]
+  ""
+  "
+  {
+    rtx callee;
+
+    gcc_assert (MEM_P (operands[1]));
+    callee = XEXP (operands[1], 0);
+    if (crtl->profile && arc_profile_call (callee))
+      {
+	emit_call_insn (gen_call_value_prof (operands[0],
+					     gen_rtx_SYMBOL_REF (Pmode,
+							    \"_mcount_call\"),
+					     operands[2]));
+	DONE;
+      }
+     /* See the comment in define_expand \"call\".  */
+    if (GET_CODE (callee) != REG
+	&& (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee)))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+  }")
+
+
+; Rcq, which is used in alternative 0, checks for conditional execution.
+; At instruction output time, if it doesn't match and we end up with
+; alternative 1 ("q"), that means that we can't use the short form.
+(define_insn "*call_value_i"
+  [(set (match_operand 0 "dest_reg_operand"  "=Rcq,q,w,  w,  w,w,w,  w")
+	(call (mem:SI (match_operand:SI 1
+		       "call_address_operand" "Rcq,q,c,Cbp,Cbr,L,I,Cal"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 31))]
+  ""
+  "@
+   jl%!%* [%1]%&
+   jl%!%* [%1]%&
+   jl%!%* [%1]
+   bl%!%* %P1;1
+   bl%!%* %P1;1
+   jl%!%* %S1
+   jl%* %S1
+   jl%! %S1"
+  [(set_attr "type" "call,call,call,call,call,call,call,call_no_delay_slot")
+   (set_attr "iscompact" "maybe,false,*,*,*,*,*,*")
+   (set_attr "predicable" "no,no,yes,yes,no,yes,no,yes")
+   (set_attr "length" "*,*,4,4,4,4,4,8")])
+
+; There is a bl_s instruction (16 bit opcode branch-and-link), but we can't
+; use it for lack of inter-procedural branch shortening.
+; Link-time relaxation would help...
+
+
+(define_insn "call_value_prof"
+  [(set (match_operand 0 "dest_reg_operand" "=r,r")
+	(call (mem:SI (match_operand:SI 1 "symbolic_operand" "Cbr,Cal"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 31))
+   (use (reg:SI 8))
+   (use (reg:SI 9))]
+   ""
+  "@
+   bl%!%* %P1;1
+   jl%! %^%S1"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "predicable" "yes,yes")
+   (set_attr "length" "4,8")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop%?"
+  [(set_attr "type" "misc")
+   (set_attr "iscompact" "true")
+   (set_attr "cond" "canuse")
+   (set_attr "length" "2")])
+
+;; Special pattern to flush the icache.
+;; ??? Not sure what to do here.  Some ARC's are known to support this.
+
+(define_insn "flush_icache"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 0)]
+  ""
+  "* return \"\";"
+  [(set_attr "type" "misc")])
+
+;; Split up troublesome insns for better scheduling.
+
+;; Peepholes go at the end.
+;;asl followed by add can be replaced by an add{1,2,3}
+;; Three define_peepholes have been added for this optimization
+;; ??? This used to target non-canonical rtl.  Now we use add_n, which
+;; can be generated by combine.  Check if these peepholes still provide
+;; any benefit.
+
+;; -------------------------------------------------------------
+;; Pattern 1 : r0 = r1 << {i}
+;;             r3 = r4/INT + r0     ;;and commutative
+;;                 ||
+;;                 \/
+;;             add{i} r3,r4/INT,r1
+;; -------------------------------------------------------------
+;; ??? This should be covered by combine, alas, at times combine gets
+;; too clever for it's own good: when the shifted input is known to be
+;; either 0 or 1, the operation will be made into an if-then-else, and
+;; thus fail to match the add_n pattern.  Example: _mktm_r, line 85 in
+;; newlib/libc/time/mktm_r.c .
+
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))
+  (set (match_operand:SI 3 "dest_reg_operand" "")
+       (plus:SI (match_operand:SI 4 "nonmemory_operand" "")
+		(match_operand:SI 5 "nonmemory_operand" "")))]
+  "(INTVAL (operands[2]) == 1
+    || INTVAL (operands[2]) == 2
+    || INTVAL (operands[2]) == 3)
+   && (true_regnum (operands[4]) == true_regnum (operands[0])
+       || true_regnum (operands[5]) == true_regnum (operands[0]))
+   && (peep2_reg_dead_p (2, operands[0]) || (true_regnum (operands[3]) == true_regnum (operands[0])))"
+ ;; the preparation statements take care to put proper operand in operands[4]
+ ;; operands[4] will always contain the correct operand. This is added to satisfy commutativity
+  [(set (match_dup 3)
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 4)))]
+  "if (true_regnum (operands[4]) == true_regnum (operands[0]))
+      operands[4] = operands[5];
+   operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
+)
+
+;; -------------------------------------------------------------
+;; Pattern 1 : r0 = r1 << {i}
+;;             r3 = r4 - r0
+;;                 ||
+;;                 \/
+;;             sub{i} r3,r4,r1
+;; -------------------------------------------------------------
+;; ??? This should be covered by combine, alas, at times combine gets
+;; too clever for it's own good: when the shifted input is known to be
+;; either 0 or 1, the operation will be made into an if-then-else, and
+;; thus fail to match the sub_n pattern.  Example: __ieee754_yn, line 239 in
+;; newlib/libm/math/e_jn.c .
+
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 3 "dest_reg_operand" "")
+	(minus:SI (match_operand:SI 4 "nonmemory_operand" "")
+		  (match_dup 0)))]
+  "(INTVAL (operands[2]) == 1
+    || INTVAL (operands[2]) == 2
+    || INTVAL (operands[2]) == 3)
+   && (peep2_reg_dead_p (2, operands[0])
+       || (true_regnum (operands[3]) == true_regnum (operands[0])))"
+  [(set (match_dup 3)
+	(minus:SI (match_dup 4)
+		  (mult:SI (match_dup 1)
+			   (match_dup 2))))]
+  "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
+)
+
+
+
+; When using the high single bit, the result of a multiply is either
+; the original number or zero.  But MPY costs 4 cycles, which we
+; can replace with the 2 cycles for the pair of TST_S and ADD.NE.
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+ 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (const_int 31)))
+   (set (match_operand:SI 4 "register_operand" "")
+  	(mult:SI (match_operand:SI 2 "register_operand")
+		 (match_operand:SI 3 "nonmemory_operand" "")))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET
+   && (rtx_equal_p (operands[0], operands[2])
+       || rtx_equal_p (operands[0], operands[3]))
+   && peep2_regno_dead_p (0, CC_REG)
+   && (rtx_equal_p (operands[0], operands[4])
+       || (peep2_reg_dead_p (2, operands[0])
+	  && peep2_reg_dead_p (1, operands[4])))"
+  [(parallel [(set (reg:CC_Z CC_REG)
+		   (compare:CC_Z (lshiftrt:SI (match_dup 1) (const_int 31))
+				 (const_int 0)))
+	      (set (match_dup 4) (lshiftrt:SI (match_dup 1) (const_int 31)))])
+   (cond_exec
+     (ne (reg:CC_Z CC_REG) (const_int 0))
+     (set (match_dup 4) (match_dup 5)))]
+{
+  if (!rtx_equal_p (operands[0], operands[2]))
+    operands[5] = operands[2];
+  else if (!rtx_equal_p (operands[0], operands[3]))
+    operands[5] = operands[3];
+  else
+    operands[5] = operands[4]; /* Actually a no-op... presumably rare.  */
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+ 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (const_int 31)))
+   (set (match_operand:SI 4 "register_operand" "")
+  	(mult:SI (match_operand:SI 2 "register_operand")
+		 (match_operand:SI 3 "nonmemory_operand" "")))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET
+   && (rtx_equal_p (operands[0], operands[2])
+       || rtx_equal_p (operands[0], operands[3]))
+   && peep2_regno_dead_p (2, CC_REG)"
+  [(parallel [(set (reg:CC_Z CC_REG)
+		   (compare:CC_Z (lshiftrt:SI (match_dup 1) (const_int 31))
+				 (const_int 0)))
+	      (set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))])
+   (set (match_dup 4) (match_dup 5))
+   (cond_exec
+     (eq (reg:CC_Z CC_REG) (const_int 0))
+     (set (match_dup 4) (const_int 0)))]
+ "operands[5] = operands[rtx_equal_p (operands[0], operands[2]) ? 3 : 2];")
+
+;; Instructions generated through builtins
+
+(define_insn "clrsbsi2"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
+	(clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))]
+  "TARGET_NORM"
+  "@
+   norm \t%0, %1
+   norm \t%0, %S1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core,two_cycle_core")])
+
+(define_insn "norm_f"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
+	(clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))
+   (set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN (match_dup 1) (const_int 0)))]
+  "TARGET_NORM"
+  "@
+   norm.f\t%0, %1
+   norm.f\t%0, %S1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core,two_cycle_core")])
+
+(define_insn_and_split "clrsbhi2"
+  [(set (match_operand:HI  0 "dest_reg_operand" "=w,w")
+	(clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal")))]
+  "TARGET_NORM"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (zero_extend:SI (clrsb:HI (match_dup 1))))]
+  "operands[0] = simplify_gen_subreg (SImode, operands[0], HImode, 0);")
+
+(define_insn "normw"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
+	(zero_extend:SI
+	  (clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal"))))]
+  "TARGET_NORM"
+  "@
+   normw \t%0, %1
+   normw \t%0, %S1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core,two_cycle_core")])
+
+(define_expand "clzsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(clz:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NORM"
+{
+  emit_insn (gen_norm_f (operands[0], operands[1]));
+  emit_insn
+    (gen_rtx_COND_EXEC
+      (VOIDmode,
+       gen_rtx_LT (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx),
+       gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
+  emit_insn
+    (gen_rtx_COND_EXEC
+      (VOIDmode,
+       gen_rtx_GE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx),
+       gen_rtx_SET (VOIDmode, operands[0],
+		    plus_constant (SImode, operands[0], 1))));
+  DONE;
+})
+
+(define_expand "ctzsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ctz:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NORM"
+{
+  rtx temp = operands[0];
+
+  if (reg_overlap_mentioned_p (temp, operands[1])
+      || (REGNO (temp) < FIRST_PSEUDO_REGISTER
+	  && !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS],
+				 REGNO (temp))))
+    temp = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (temp, operands[1], constm1_rtx));
+  emit_insn (gen_bic_f_zn (temp, temp, operands[1]));
+  emit_insn (gen_clrsbsi2 (temp, temp));
+  emit_insn
+    (gen_rtx_COND_EXEC
+      (VOIDmode,
+       gen_rtx_LT (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx),
+       gen_rtx_SET (VOIDmode, operands[0], GEN_INT (32))));
+  emit_insn
+    (gen_rtx_COND_EXEC
+      (VOIDmode,
+       gen_rtx_GE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx),
+       gen_rtx_SET (VOIDmode, operands[0],
+		    gen_rtx_MINUS (SImode, GEN_INT (31), temp))));
+  DONE;
+})
+
+
+(define_insn "swap"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w,w")
+	(unspec:SI [(match_operand:SI 1 "general_operand" "L,Cal,c")]
+			    UNSPEC_SWAP))]
+  "TARGET_SWAP"
+  "@
+   swap \t%0, %1
+   swap \t%0, %S1
+   swap \t%0, %1"
+  [(set_attr "length" "4,8,4")
+   (set_attr "type" "two_cycle_core,two_cycle_core,two_cycle_core")])
+
+;; FIXME: an intrinsic for multiply is daft.  Can we remove this?
+(define_insn "mul64"
+  [(unspec [(match_operand:SI 0 "general_operand" "q,r,r,%r")
+		     (match_operand:SI 1 "general_operand" "q,rL,I,Cal")]
+		   UNSPEC_MUL64)]
+  "TARGET_MUL64_SET"
+  "@
+   mul64%? \t0, %0, %1%&
+   mul64%? \t0, %0, %1
+   mul64 \t0, %0, %1
+   mul64%? \t0, %0, %S1"
+  [(set_attr "length" "2,4,4,8")
+  (set_attr "iscompact" "true,false,false,false")
+  (set_attr "type" "binary,binary,binary,binary")
+  (set_attr "cond" "canuse,canuse, nocond, canuse")])
+
+(define_insn "mulu64"
+  [(unspec [(match_operand:SI 0 "general_operand" "%r,r,r,r")
+		     (match_operand:SI 1 "general_operand" "rL,I,r,Cal")]
+		   UNSPEC_MULU64)]
+  "TARGET_MUL64_SET"
+  "@
+   mulu64%? \t0, %0, %1
+   mulu64 \t0, %0, %1
+   mulu64 \t0, %0, %1
+   mulu64%? \t0, %0, %S1"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "binary,binary,binary,binary")
+   (set_attr "cond" "canuse,nocond,nocond,canuse")])
+
+(define_insn "divaw"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=&w,&w,&w")
+			  (unspec:SI [(div:SI (match_operand:SI 1 "general_operand" "r,Cal,r")
+					   (match_operand:SI 2 "general_operand" "r,r,Cal"))]
+					   UNSPEC_DIVAW))]
+  "TARGET_ARC700 || TARGET_EA_SET"
+  "@
+   divaw \t%0, %1, %2
+   divaw \t%0, %S1, %2
+   divaw \t%0, %1, %S2"
+  [(set_attr "length" "4,8,8")
+   (set_attr "type" "divaw,divaw,divaw")])
+
+(define_insn "flag"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "rL,I,Cal")]
+		   VUNSPEC_FLAG)]
+  ""
+  "@
+    flag%? %0
+    flag %0
+    flag%? %S0"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "misc,misc,misc")
+   (set_attr "predicable" "yes,no,yes")
+   (set_attr "cond" "clob,clob,clob")])
+
+(define_insn "brk"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_BRK)]
+  ""
+  "brk"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "rtie"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_RTIE)]
+  ""
+  "rtie"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")
+  (set_attr "cond" "clob")])
+
+(define_insn "sync"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_SYNC)]
+  ""
+  "sync"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "swi"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_SWI)]
+  ""
+  "*
+{
+    if(TARGET_ARC700)
+	return \"trap0\";
+    else
+	return \"swi\";
+}"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+
+(define_insn "sleep"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L")]
+		   VUNSPEC_SLEEP)]
+  "check_if_valid_sleep_operand(operands,0)"
+  "sleep %0"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "core_read"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r")
+	(unspec_volatile:SI [(match_operand:SI 1 "general_operand" "Hn,!r")]
+			    VUNSPEC_CORE_READ))]
+  ""
+  "*
+    if (check_if_valid_regno_const (operands, 1))
+      return \"mov \t%0, r%1\";
+    return \"mov \t%0, r%1\";
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "unary")])
+
+(define_insn "core_write"
+  [(unspec_volatile [(match_operand:SI 0 "general_operand" "r,r")
+		     (match_operand:SI 1 "general_operand" "Hn,!r")]
+		   VUNSPEC_CORE_WRITE)]
+  ""
+  "*
+    if (check_if_valid_regno_const (operands, 1))
+      return \"mov \tr%1, %0\";
+    return \"mov \tr%1, %0\";
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "unary")])
+
+(define_insn "lr"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r,r,r")
+	(unspec_volatile:SI [(match_operand:SI 1 "general_operand" "I,HCal,r,D")]
+			    VUNSPEC_LR))]
+  ""
+  "lr\t%0, [%1]"
+  [(set_attr "length" "4,8,4,8")
+   (set_attr "type" "lr,lr,lr,lr")])
+
+(define_insn "sr"
+  [(unspec_volatile [(match_operand:SI 0 "general_operand" "Cal,r,r,r")
+		     (match_operand:SI 1 "general_operand" "Ir,I,HCal,r")]
+		   VUNSPEC_SR)]
+  ""
+  "sr\t%S0, [%1]"
+  [(set_attr "length" "8,4,8,4")
+   (set_attr "type" "sr,sr,sr,sr")])
+
+(define_insn "trap_s"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L,Cal")]
+		   VUNSPEC_TRAP_S)]
+  "TARGET_ARC700"
+{
+  if (which_alternative == 0)
+    {
+      arc_toggle_unalign ();
+      return \"trap_s %0\";
+    }
+
+  /* Keep this message in sync with the one in arc.c:arc_expand_builtin,
+     because *.md files do not get scanned by exgettext.  */
+  fatal_error (\"operand to trap_s should be an unsigned 6-bit value\");
+}
+  [(set_attr "length" "2")
+  (set_attr "type" "misc")])
+
+(define_insn "unimp_s"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_UNIMP_S)]
+  "TARGET_ARC700"
+  "unimp_s"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+;; End of instructions generated through builtins
+
+; Since the demise of REG_N_SETS as reliable data readily available to the
+; target, it is no longer possible to find out
+; in the prologue / epilogue expanders how many times blink is set.
+; Using df_regs_ever_live_p to decide if blink needs saving means that
+; any explicit use of blink will cause it to be saved; hence we cannot
+; represent the blink use in return / sibcall instructions themselves, and
+; instead have to show it in EPILOGUE_USES and must explicitly
+; forbid instructions that change blink in the return / sibcall delay slot.
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (simple_return)
+	      (use (match_operand 2 "" ""))])]
+  ""
+  "
+  {
+    rtx callee = XEXP (operands[0], 0);
+
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+    if (crtl->profile && arc_profile_call (callee))
+      {
+	emit_insn (gen_sibcall_prof
+		    (gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"),
+		     operands[1], operands[2]));
+	DONE;
+      }
+    if (GET_CODE (callee) != REG
+	&& (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee)))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+  }"
+)
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "dest_reg_operand" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (simple_return)
+	      (use (match_operand 3 "" ""))])]
+  ""
+  "
+  {
+    rtx callee = XEXP (operands[1], 0);
+
+    if (operands[3] == NULL_RTX)
+      operands[3] = const0_rtx;
+    if (crtl->profile && arc_profile_call (XEXP (operands[1], 0)))
+      {
+	emit_insn (gen_sibcall_value_prof
+		    (operands[0], gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"),
+		     operands[2], operands[3]));
+	DONE;
+      }
+    if (GET_CODE (callee) != REG && arc_is_longcall_p (callee))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+  }"
+)
+
+(define_insn "*sibcall_insn"
+ [(call (mem:SI (match_operand:SI 0 "call_address_operand"
+		 "Cbp,Cbr,Rs5,Rsc,Cal"))
+	(match_operand 1 "" ""))
+  (simple_return)
+  (use (match_operand 2 "" ""))]
+  ""
+  "@
+   b%!%* %P0
+   b%!%* %P0
+   j%!%* [%0]%&
+   j%!%* [%0]
+   j%! %P0"
+  [(set_attr "type" "call,call,call,call,call_no_delay_slot")
+   (set_attr "predicable" "yes,no,no,yes,yes")
+   (set_attr "iscompact" "false,false,maybe,false,false")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_insn "*sibcall_value_insn"
+ [(set (match_operand 0 "dest_reg_operand" "")
+       (call (mem:SI (match_operand:SI 1 "call_address_operand"
+	      "Cbp,Cbr,Rs5,Rsc,Cal"))
+	     (match_operand 2 "" "")))
+  (simple_return)
+  (use (match_operand 3 "" ""))]
+  ""
+  "@
+   b%!%* %P1
+   b%!%* %P1
+   j%!%* [%1]%&
+   j%!%* [%1]
+   j%! %P1"
+  [(set_attr "type" "call,call,call,call,call_no_delay_slot")
+   (set_attr "predicable" "yes,no,no,yes,yes")
+   (set_attr "iscompact" "false,false,maybe,false,false")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_insn "sibcall_prof"
+ [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Cbr,Cal"))
+	(match_operand 1 "" ""))
+  (simple_return)
+  (use (match_operand 2 "" ""))
+  (use (reg:SI 8))
+  (use (reg:SI 9))]
+  ""
+  "@
+   b%!%* %P0;2
+   j%! %^%S0;2"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "predicable" "yes")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_insn "sibcall_value_prof"
+ [(set (match_operand 0 "dest_reg_operand" "")
+       (call (mem:SI (match_operand:SI 1 "call_address_operand" "Cbr,Cal"))
+	     (match_operand 2 "" "")))
+  (simple_return)
+  (use (match_operand 3 "" ""))
+  (use (reg:SI 8))
+  (use (reg:SI 9))]
+  ""
+  "@
+   b%!%* %P1;1
+   j%! %^%S1;1"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "predicable" "yes")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_expand "prologue"
+  [(pc)]
+  ""
+{
+  arc_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(pc)]
+  ""
+{
+  arc_expand_epilogue (0);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(pc)]
+  ""
+{
+  arc_expand_epilogue (1);
+  DONE;
+})
+
+; Since the demise of REG_N_SETS, it is no longer possible to find out
+; in the prologue / epilogue expanders how many times blink is set.
+; Using df_regs_ever_live_p to decide if blink needs saving means that
+; any explicit use of blink will cause it to be saved; hence we cannot
+; represent the blink use in return / sibcall instructions themselves, and
+; instead have to show it in EPILOGUE_USES and must explicitly
+; forbid instructions that change blink in the return / sibcall delay slot.
+(define_insn "simple_return"
+  [(simple_return)]
+  "reload_completed"
+{
+  rtx reg
+    = gen_rtx_REG (Pmode,
+		   arc_return_address_regs[arc_compute_function_type (cfun)]);
+
+  if (TARGET_PAD_RETURN)
+    arc_pad_return ();
+  output_asm_insn (\"j%!%* [%0]%&\", &reg);
+  return \"\";
+}
+  [(set_attr "type" "return")
+   ; predicable won't help here since the canonical rtl looks different
+   ; for branches.
+   (set_attr "cond" "canuse")
+   (set (attr "iscompact")
+	(cond [(eq (symbol_ref "arc_compute_function_type (cfun)")
+		   (symbol_ref "ARC_FUNCTION_NORMAL"))
+	       (const_string "maybe")]
+	      (const_string "false")))
+   (set (attr "length")
+	(cond [(ne (symbol_ref "arc_compute_function_type (cfun)")
+		   (symbol_ref "ARC_FUNCTION_NORMAL"))
+	       (const_int 4)]
+	      (const_int 2)))])
+
+(define_insn "p_return_i"
+  [(set (pc)
+	(if_then_else (match_operator 0 "proper_comparison_operator"
+				      [(reg CC_REG) (const_int 0)])
+		      (simple_return) (pc)))]
+  "reload_completed"
+{
+  rtx xop[2];
+  xop[0] = operands[0];
+  xop[1]
+    = gen_rtx_REG (Pmode,
+		   arc_return_address_regs[arc_compute_function_type (cfun)]);
+
+  if (TARGET_PAD_RETURN)
+    arc_pad_return ();
+  output_asm_insn (\"j%d0%!%# [%1]%&\", xop);
+  /* record the condition in case there is a delay insn.  */
+  arc_ccfsm_record_condition (xop[0], false, insn, 0);
+  return \"\";
+}
+  [(set_attr "type" "return")
+   (set_attr "cond" "use")
+   (set (attr "iscompact")
+	(cond [(eq (symbol_ref "arc_compute_function_type (cfun)")
+		   (symbol_ref "ARC_FUNCTION_NORMAL"))
+	       (const_string "maybe")]
+	      (const_string "false")))
+   (set (attr "length")
+	(cond [(ne (symbol_ref "arc_compute_function_type (cfun)")
+		   (symbol_ref "ARC_FUNCTION_NORMAL"))
+	       (const_int 4)
+	       (not (match_operand 0 "equality_comparison_operator" ""))
+	       (const_int 4)
+	       (eq_attr "delay_slot_filled" "yes")
+	       (const_int 4)]
+	      (const_int 2)))])
+
+(define_insn_and_split "eh_return"
+  [(eh_return)
+   (use (match_operand:SI 0 "move_src_operand" "rC32,mCalCpc"))
+   (clobber (match_scratch:SI 1  "=X,r"))
+   (clobber (match_scratch:SI 2 "=&r,r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 0))]
+{
+  int offs = arc_return_slot_offset ();
+
+  if (offs < 0)
+    operands[2] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+  else
+    {
+      if (!register_operand (operands[0], Pmode)
+	  && !satisfies_constraint_C32 (operands[0]))
+	{
+	  emit_move_insn (operands[1], operands[0]);
+	  operands[0] = operands[1];
+	}
+      rtx addr = plus_constant (Pmode, stack_pointer_rtx, offs);
+      if (!strict_memory_address_p (Pmode, addr))
+	{
+	  emit_move_insn (operands[2], addr);
+	  addr = operands[2];
+	}
+      operands[2] = gen_frame_mem (Pmode, addr);
+    }
+}
+  [(set_attr "length" "12")])
+
+;; ??? #ifdefs in function.c require the presence of this pattern, with a
+;; non-constant predicate.
+(define_expand "return"
+  [(return)]
+  "optimize < 0")
+
+ ;; Comment in final.c (insn_current_reference_address) says
+ ;; forward branch addresses are calculated from the next insn after branch
+ ;; and for backward branches, it is calculated from the branch insn start.
+ ;; The shortening logic here is tuned to accomodate this behaviour
+;; ??? This should be grokked by the ccfsm machinery.
+(define_insn "cbranchsi4_scratch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "proper_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "c,c, c")
+			 (match_operand:SI 2 "nonmemory_operand" "L,c,?Cal")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_operand 4 "cc_register" ""))]
+   "(reload_completed
+     || (TARGET_EARLY_CBRANCHSI
+	 && brcc_nolimm_operator (operands[0], VOIDmode)))
+    && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+   "*
+     switch (get_attr_length (insn))
+     {
+       case 2: return \"br%d0%? %1, %2, %^%l3%&\";
+       case 4: return \"br%d0%* %1, %B2, %^%l3\";
+       case 8: if (!brcc_nolimm_operator (operands[0], VOIDmode))
+		 return \"br%d0%* %1, %B2, %^%l3\";
+       case 6: case 10:
+       case 12:return \"cmp%? %1, %B2\\n\\tb%d0%* %^%l3%&;br%d0 out of range\";
+       default: fprintf (stderr, \"unexpected length %d\\n\", get_attr_length (insn)); fflush (stderr); gcc_unreachable ();
+     }
+   "
+  [(set_attr "cond" "clob, clob, clob")
+   (set (attr "type")
+	(if_then_else
+	  (match_test "valid_brcc_with_delay_p (operands)")
+	  (const_string "brcc")
+	  (const_string "brcc_no_delay_slot")))
+   ; For forward branches, we need to account not only for the distance to
+   ; the target, but also the difference between pcl and pc, the instruction
+   ; length, and any delay insn, if present.
+   (set
+     (attr "length")
+     (cond ; the outer cond does a test independent of branch shortening.
+       [(match_operand 0 "brcc_nolimm_operator" "")
+	(cond
+	  [(and (match_operand:CC_Z 4 "cc_register")
+		(eq_attr "delay_slot_filled" "no")
+		(ge (minus (match_dup 3) (pc)) (const_int -128))
+		(le (minus (match_dup 3) (pc))
+		    (minus (const_int 122)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (const_int 2)
+	   (and (ge (minus (match_dup 3) (pc)) (const_int -256))
+		(le (minus (match_dup 3) (pc))
+		    (minus (const_int 244)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (const_int 4)
+	   (match_operand:SI 1 "compact_register_operand" "")
+	   (const_int 6)]
+	  (const_int 8))]
+	 (cond [(and (ge (minus (match_dup 3) (pc)) (const_int -256))
+		     (le (minus (match_dup 3) (pc)) (const_int 244)))
+		(const_int 8)
+		(match_operand:SI 1 "compact_register_operand" "")
+		(const_int 10)]
+	       (const_int 12))))
+   (set (attr "iscompact")
+	(if_then_else (match_test "get_attr_length (insn) & 2")
+		      (const_string "true") (const_string "false")))])
+
+; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes.
+(define_insn "*bbit"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "equality_comparison_operator"
+	    [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c")
+			      (const_int 1)
+			      (match_operand:SI 2 "nonmemory_operand" "L,Lc"))
+	     (const_int 0)])
+	  (label_ref (match_operand 0 "" ""))
+	  (pc)))
+   (clobber (reg:CC_ZN CC_REG))]
+  "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+{
+  switch (get_attr_length (insn))
+    {
+      case 4: return (GET_CODE (operands[3]) == EQ
+		      ? \"bbit0%* %1,%2,%0\" : \"bbit1%* %1,%2,%0\");
+      case 6:
+      case 8: return \"btst%? %1,%2\n\tb%d3%* %0; bbit out of range\";
+      default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "brcc")
+   (set_attr "cond" "clob")
+   (set (attr "length")
+	(cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc))
+		    (minus (const_int 248)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	       (const_int 4)
+	       (eq (symbol_ref "which_alternative") (const_int 0))
+	       (const_int 6)]
+	      (const_int 8)))
+   (set (attr "iscompact")
+	(if_then_else (match_test "get_attr_length (insn) == 6")
+		      (const_string "true") (const_string "false")))])
+
+; ??? When testing a bit from a DImode register, combine creates a
+; zero_extract in DImode.  This goes via an AND with a DImode constant,
+; so can only be observed on 64 bit hosts.
+(define_insn_and_split "*bbit_di"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "equality_comparison_operator"
+	    [(zero_extract:DI (match_operand:SI 1 "register_operand" "Rcqq,c")
+			      (const_int 1)
+			      (match_operand 2 "immediate_operand" "L,L"))
+	     (const_int 0)])
+	  (label_ref (match_operand 0 "" ""))
+	  (pc)))
+   (clobber (reg:CC_ZN CC_REG))]
+  "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+  "#"
+  ""
+  [(parallel
+     [(set (pc) (if_then_else (match_dup 3) (label_ref (match_dup 0)) (pc)))
+      (clobber (reg:CC_ZN CC_REG))])]
+{
+  rtx xtr;
+
+  xtr = gen_rtx_ZERO_EXTRACT (SImode, operands[1], const1_rtx, operands[2]);
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+				xtr, const0_rtx);
+})
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the loop end pattern
+(define_expand "doloop_begin"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand 1 "" ""))]
+  ""
+{
+  /* Using the INSN_UID of the loop end pattern to identify it causes
+     trouble with -fcompare-debug, so allocate a debug-independent
+     id instead.  We use negative numbers so that we can use the same
+     slot in doloop_end_i where we later store a CODE_LABEL_NUMBER, and
+     still be able to tell what kind of number this is.  */
+  static HOST_WIDE_INT loop_end_id = 0;
+
+  rtx id = GEN_INT (--loop_end_id);
+  XEXP (XVECEXP (PATTERN (operands[1]), 0, 4), 0) = id;
+  emit_insn (gen_doloop_begin_i (operands[0], const0_rtx, id,
+				 const0_rtx, const0_rtx));
+  DONE;
+})
+
+; ??? can't describe the insn properly as then the optimizers try to
+; hoist the SETs.
+;(define_insn "doloop_begin_i"
+;  [(set (reg:SI LP_START) (pc))
+;   (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_LP))
+;   (use (match_operand 0 "const_int_operand" "n"))]
+;  ""
+;  "lp .L__GCC__LP%0"
+;)
+
+; The operands of doloop_end_i are also read / written by arc_reorg with
+; XVECEXP (PATTERN (lp, 0, N), so if you want to change the pattern, you
+; might have to adjust arc_reorg.
+; operands 0 / 2 are supplied by the expander, 1, 3 and 4 are filled in
+; by arc_reorg.  arc_reorg might also alter operand 0.
+;
+; N in XVECEXP PATTERN (lp, 0 N)
+;  V              rtl                 purpose
+;  0           unspec UNSPEC_LP     identify pattern
+;  1           clobber LP_START     show LP_START is set
+;  2           clobber LP_END       show LP_END is set
+;  3           use operand0         loop count pseudo register
+;  4           use operand1         before arc_reorg: -id
+;                                   after : CODE_LABEL_NUMBER of loop top label
+;  5           use operand2         INSN_UID of loop end insn
+;  6           use operand3         loop setup not at start (1 above, 2 below)
+;  7           use operand4         LABEL_REF of top label, if not
+;                                   immediately following
+; If operand1 is still zero after arc_reorg, this is an orphaned loop
+; instruction that was not at the start of the loop.
+; There is no point is reloading this insn - then lp_count would still not
+; be available for the loop end.
+(define_insn "doloop_begin_i"
+  [(unspec:SI [(pc)] UNSPEC_LP)
+   (clobber (reg:SI LP_START))
+   (clobber (reg:SI LP_END))
+   (use (match_operand:SI 0 "register_operand" "l,l,????*X"))
+   (use (match_operand 1 "const_int_operand" "n,n,C_0"))
+   (use (match_operand 2 "const_int_operand" "n,n,X"))
+   (use (match_operand 3 "const_int_operand" "C_0,n,X"))
+   (use (match_operand 4 "const_int_operand" "C_0,X,X"))]
+  ""
+{
+  rtx scan;
+  int len, size = 0;
+  int n_insns = 0;
+  rtx loop_start = operands[4];
+
+  if (CONST_INT_P (loop_start))
+    loop_start = NULL_RTX;
+  /* Size implications of the alignment will be taken care of by the
+     alignment inserted at the loop start.  */
+  if (LOOP_ALIGN (0) && INTVAL (operands[1]))
+    {
+      asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
+      arc_clear_unalign ();
+    }
+  if (!INTVAL (operands[1]))
+    return "; LITTLE LOST LOOP";
+  if (loop_start && flag_pic)
+    {
+      /* ??? Can do better for when a scratch register
+	 is known.  But that would require extra testing.  */
+      return "push_s r0\;add r0,pcl,%4-(.&-4)\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-(.&-4)\;sr r0,[3]; LP_END\;pop_s r0";
+    }
+  /* Check if the loop end is in range to be set by the lp instruction.  */
+  size = INTVAL (operands[3]) < 2 ? 0 : 2048;
+  for (scan = insn; scan && size < 2048; scan = NEXT_INSN (scan))
+    {
+      if (!INSN_P (scan))
+	continue;
+      if (recog_memoized (scan) == CODE_FOR_doloop_end_i
+	  && (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0)
+	      == XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)))
+	break;
+      len = get_attr_length (scan);
+      size += len;
+    }
+  /* Try to verify that there are at least three instruction fetches
+     between the loop setup and the first encounter of the loop end.  */
+  for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan))
+    {
+      if (!INSN_P (scan))
+	continue;
+      if (GET_CODE (PATTERN (scan)) == SEQUENCE)
+	scan = XVECEXP (PATTERN (scan), 0, 0);
+      if (JUMP_P (scan))
+	{
+	  if (recog_memoized (scan) != CODE_FOR_doloop_end_i)
+	    {
+	      n_insns += 2;
+	      if (simplejump_p (scan))
+		{
+		  scan = XEXP (SET_SRC (PATTERN (scan)), 0);
+		  continue;
+		}
+	      if (JUMP_LABEL (scan)
+		  /* JUMP_LABEL might be simple_return instead if an insn.  */
+		  && (!INSN_P (JUMP_LABEL (scan))
+		      || (!next_active_insn (JUMP_LABEL (scan))
+			  || (recog_memoized (next_active_insn (JUMP_LABEL (scan)))
+			      != CODE_FOR_doloop_begin_i)))
+		  && (!next_active_insn (NEXT_INSN (PREV_INSN (scan)))
+		      || (recog_memoized
+			   (next_active_insn (NEXT_INSN (PREV_INSN (scan))))
+			  != CODE_FOR_doloop_begin_i)))
+		n_insns++;
+	    }
+	  break;
+	}
+      len = get_attr_length (scan);
+      /* Size estimation of asms assumes that each line which is nonempty
+	 codes an insn, and that each has a long immediate.  For minimum insn
+	 count, assume merely that a nonempty asm has at least one insn.  */
+      if (GET_CODE (PATTERN (scan)) == ASM_INPUT
+	  || asm_noperands (PATTERN (scan)) >= 0)
+	n_insns += (len != 0);
+      else
+	n_insns += (len > 4 ? 2 : (len ? 1 : 0));
+    }
+  if (LOOP_ALIGN (0))
+    {
+      asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
+      arc_clear_unalign ();
+    }
+  gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL);
+  if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start)
+    {
+      if (flag_pic)
+	{
+	  /* ??? Can do better for when a scratch register
+	     is known.  But that would require extra testing.  */
+	  arc_clear_unalign ();
+	  return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0";
+	}
+      output_asm_insn ((size < 2048
+			? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"),
+		       operands);
+      output_asm_insn (loop_start
+		       ? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START",
+		       operands);
+      if (TARGET_ARC600 && n_insns < 1)
+	output_asm_insn ("nop", operands);
+      return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:";
+    }
+  else if (TARGET_ARC600 && n_insns < 3)
+    {
+      /* At least four instructions are needed between the setting of LP_COUNT
+	 and the loop end - but the lp instruction qualifies as one.  */
+      rtx prev = prev_nonnote_insn (insn);
+
+      if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT))
+	output_asm_insn ("nop", operands);
+    }
+  return "lp .L__GCC__LP%1";
+}
+  [(set_attr "type" "loop_setup")
+   (set_attr_alternative "length"
+;     FIXME: length is usually 4, but we need branch shortening
+;     to get this right.
+;     [(if_then_else (match_test "TARGET_ARC600") (const_int 16) (const_int 4))
+     [(if_then_else (match_test "flag_pic") (const_int 24) (const_int 16))
+      (if_then_else (match_test "flag_pic") (const_int 28) (const_int 16))
+      (const_int 0)])]
+  ;; ??? we should really branch shorten this insn, but then we'd
+  ;; need a proper label first.  N.B. the end label can not only go out
+  ;; of range when it is far away, but also when it precedes the loop -
+  ;; which, unfortunately, it sometimes does, when the loop "optimizer"
+  ;; messes things up.
+)
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+; Use this for the ARC600 and ARC700.  For ARCtangent-A5, this is unsafe
+; without further checking for nearby branches etc., and without proper
+; annotation of shift patterns that clobber lp_count
+; ??? ARC600 might want to check if the loop has few iteration and only a
+; single insn - loop setup is expensive then.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_ARC600 || TARGET_ARC700"
+{
+  /* We could do smaller bivs with biv widening, and wider bivs by having
+     a high-word counter in an outer loop - but punt on this for now.  */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  emit_jump_insn (gen_doloop_end_i (operands[0], operands[1], const0_rtx));
+  DONE;
+})
+
+(define_insn_and_split "doloop_end_i"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m")
+			   (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
+   (use (reg:SI LP_START))
+   (use (reg:SI LP_END))
+   (use (match_operand 2 "const_int_operand" "n,???Cn0,???X"))
+   (clobber (match_scratch:SI 3 "=X,X,&????r"))]
+  ""
+  "*
+{
+  rtx prev = prev_nonnote_insn (insn);
+
+  /* If there is an immediately preceding label, we must output a nop,
+     lest a branch to that label will fall out of the loop.
+     ??? We could try to avoid this by claiming to have a delay slot if there
+     is a preceding label, and outputting the delay slot insn instead, if
+     present.
+     Or we could have some optimization that changes the source edge to update
+     the loop count and jump to the loop start instead.  */
+  /* For ARC600, we must also prevent jumps inside the loop and jumps where
+     the loop counter value is live at the target from being directly at the
+     loop end.  Being sure that the loop counter is dead at the target is
+     too much hair - we can't rely on data flow information at this point -
+     so insert a nop for all branches.
+     The ARC600 also can't read the loop counter in the last insn of a loop.  */
+  if (LABEL_P (prev))
+    output_asm_insn (\"nop%?\", operands);
+  return \"\\n.L__GCC__LP%2: ; loop end, start is %1\";
+}"
+  "&& memory_operand (operands[0], SImode)"
+  [(pc)]
+{
+  emit_move_insn (operands[3], operands[0]);
+  emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0]));
+  DONE;
+}
+  [(set_attr "type" "loop_end")
+   (set (attr "length")
+	(if_then_else (match_test "LABEL_P (prev_nonnote_insn (insn))")
+		      (const_int 4) (const_int 0)))]
+)
+
+; This pattern is generated by arc_reorg when there is no recognizable
+; loop start.
+(define_insn "*doloop_fallback"
+  [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w")
+				(const_int 1))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+   ; avoid fooling the loop optimizer into assuming this is a special insn.
+  "reload_completed"
+  "*return get_attr_length (insn) == 8
+   ? \"brne.d %0,1,%1\;sub %0,%0,1\"
+   : \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256))
+ 			   (le (minus (match_dup 1) (pc)) (const_int 244)))
+ 		      (const_int 8) (const_int 12)))
+   (set_attr "type" "brcc_no_delay_slot")
+   (set_attr "cond" "nocond")]
+)
+
+; reload can't make output reloads for jump insns, so we have to do this by hand.
+(define_insn "doloop_fallback_m"
+  [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r")
+				(const_int 1))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
+   (set (match_operand:SI 2 "memory_operand" "=m")
+	(plus:SI (match_dup 0) (const_int -1)))]
+   ; avoid fooling the loop optimizer into assuming this is a special insn.
+  "reload_completed"
+  "*return get_attr_length (insn) == 12
+   ? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\"
+   : \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252))
+ 			   (le (minus (match_dup 1) (pc)) (const_int 244)))
+ 		      (const_int 12) (const_int 16)))
+   (set_attr "type" "brcc_no_delay_slot")
+   (set_attr "cond" "nocond")]
+)
+
+(define_expand "movmemsi"
+  [(match_operand:BLK 0 "" "")
+   (match_operand:BLK 1 "" "")
+   (match_operand:SI 2 "nonmemory_operand" "")
+   (match_operand 3 "immediate_operand" "")]
+  ""
+  "if (arc_expand_movmem (operands)) DONE; else FAIL;")
+
+;; Close http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35803 if this works
+;; to the point that we can generate cmove instructions.
+(define_expand "cbranch<mode>4"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SDF 1 "register_operand" "")
+		    (match_operand:SDF 2 "register_operand" "")))
+   (set (pc)
+	(if_then_else
+	      (match_operator 0 "comparison_operator" [(reg CC_REG)
+						       (const_int 0)])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+
+  "TARGET_OPTFPE"
+{
+  gcc_assert (XEXP (operands[0], 0) == operands[1]);
+  gcc_assert (XEXP (operands[0], 1) == operands[2]);
+  operands[0] = gen_compare_reg (operands[0], VOIDmode);
+  emit_jump_insn (gen_branch_insn (operands[3], operands[0]));
+  DONE;
+})
+
+(define_expand "cmp_float"
+  [(parallel [(set (match_operand 0 "") (match_operand 1 ""))
+	      (clobber (reg:SI RETURN_ADDR_REGNUM))
+	      (clobber (reg:SI R12_REG))])]
+  ""
+  "")
+
+(define_mode_iterator OPTFPE_CMP [CC_Z CC_FP_GT CC_FP_GE CC_FP_UNEQ CC_FP_ORD])
+(define_mode_attr cmp [(CC_Z "eq") (CC_FP_GT "gt") (CC_FP_GE "ge")
+		       (CC_FP_UNEQ "uneq") (CC_FP_ORD "ord")])
+
+(define_insn "*cmpsf_<cmp>"
+  [(set (reg:OPTFPE_CMP CC_REG) (compare:OPTFPE_CMP (reg:SF 0) (reg:SF 1)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI R12_REG))]
+  "TARGET_OPTFPE && (!TARGET_ARGONAUT_SET || !TARGET_SPFP)
+   && SFUNC_CHECK_PREDICABLE"
+  "*return arc_output_libcall (\"__<cmp>sf2\");"
+  [(set_attr "is_sfunc" "yes")
+   (set_attr "predicable" "yes")])
+
+;; N.B. for "*cmpdf_ord":
+;; double precision fpx sets bit 31 for NaNs.  We need bit 51 set
+;; for the floating point emulation to recognize the NaN.
+(define_insn "*cmpdf_<cmp>"
+  [(set (reg:OPTFPE_CMP CC_REG) (compare:OPTFPE_CMP (reg:DF 0) (reg:DF 2)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI R12_REG))]
+  "TARGET_OPTFPE && (!TARGET_ARGONAUT_SET || !TARGET_DPFP)
+   && SFUNC_CHECK_PREDICABLE"
+  "*return arc_output_libcall (\"__<cmp>df2\");"
+  [(set_attr "is_sfunc" "yes")
+   (set_attr "predicable" "yes")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "dest_reg_operand"    "=Rcq#q,Rcw,w")
+	(abs:SF (match_operand:SF 1 "register_operand" "0,  0,c")))]
+  ""
+  "bclr%? %0,%1,31%&"
+  [(set_attr "type" "unary")
+   (set_attr "iscompact" "maybe,false,false")
+   (set_attr "length" "2,4,4")
+   (set_attr "predicable" "no,yes,no")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w")
+	(neg:SF (match_operand:SF 1 "register_operand" "0,c")))]
+  ""
+  "bxor%? %0,%1,31"
+  [(set_attr "type" "unary")
+   (set_attr "predicable" "yes,no")])
+
+;; ??? Should this use arc_output_libcall and set is_sfunc?
+(define_insn "*millicode_thunk_st"
+  [(match_parallel 0 "millicode_store_operation"
+     [(set (mem:SI (reg:SI SP_REG)) (reg:SI 13))])]
+  ""
+{
+  output_asm_insn ("bl%* __st_r13_to_%0",
+		   &SET_SRC (XVECEXP (operands[0], 0,
+				      XVECLEN (operands[0], 0) - 2)));
+  return "";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*millicode_thunk_ld"
+  [(match_parallel 0 "millicode_load_clob_operation"
+     [(set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])]
+  ""
+{
+  output_asm_insn ("bl%* __ld_r13_to_%0",
+		   &SET_DEST (XVECEXP (operands[0], 0,
+				       XVECLEN (operands[0], 0) - 2)));
+  return "";
+}
+  [(set_attr "type" "call")])
+
+; the sibthunk restores blink, so we use the return rtx.
+(define_insn "*millicode_sibthunk_ld"
+  [(match_parallel 0 "millicode_load_operation"
+     [(return)
+      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (reg:SI 12)))
+      (set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])]
+  ""
+{
+  output_asm_insn ("b%* __ld_r13_to_%0_ret",
+		   &SET_DEST (XVECEXP (operands[0], 0,
+				       XVECLEN (operands[0], 0) - 1)));
+  return "";
+}
+  [(set_attr "type" "call")
+   (set_attr "is_SIBCALL" "yes")])
+
+;; If hardware floating point is available, don't define a negdf pattern;
+;; it would be something like:
+;;(define_insn "negdf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=w,w,D,?r")
+;;	(neg:DF (match_operand:DF 1 "register_operand" "0,c,D,D")))
+;;   (clobber (match_scratch:DF 2 "=X,X,X,X,D1"))]
+;;  ""
+;;  "@
+;;   bxor%? %H0,%H1,31
+;;   bxor %H0,%H1,31 ` mov %L0,%L1
+;;   drsubh%F0%F1 0,0,0
+;;   drsubh%F2%F1 %H0,0,0 ` dexcl%F2 %L0,%H0,%L0"
+;;  [(set_attr "type" "unary,unary,dpfp_addsub,dpfp_addsub")
+;;   (set_attr "iscompact" "false,false,false,false")
+;;   (set_attr "length" "4,4,8,12")
+;;   (set_attr "cond" "canuse,nocond,nocond,nocond")])
+;; and this suffers from always requiring a long immediate when using
+;; the floating point hardware.
+;; We then want the sub[sd]f patterns to be used, so that we can load the
+;; constant zero efficiently into a register when we want to do the
+;; computation using the floating point hardware.  There should be a special
+;; subdf alternative that matches a zero operand 1, which then can allow
+;; to use bxor to flip the high bit of an integer register.
+;; ??? we actually can't use the floating point hardware for neg, because
+;; this would not work right for -0.  OTOH optabs.c has already code
+;; to synthesyze negate by flipping the sign bit.
+
+
+;; include the arc-FPX instructions
+(include "fpx.md")
+
+(include "simdext.md")
diff --git a/gcc-4.9/gcc/config/arc/arc.opt b/gcc-4.9/gcc/config/arc/arc.opt
new file mode 100644
index 000000000..2deb9e77e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc.opt
@@ -0,0 +1,390 @@
+; Options for the Synopsys DesignWare ARC port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/arc/arc-opts.h
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Compile code for big endian mode
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_ENDIAN)
+Compile code for little endian mode.  This is the default
+
+mno-cond-exec
+Target Report RejectNegative Mask(NO_COND_EXEC)
+Disable ARCompact specific pass to generate conditional execution instructions
+
+mA5
+Target Report
+Generate ARCompact 32-bit code for ARCtangent-A5 processor
+
+mA6
+Target Report
+Generate ARCompact 32-bit code for ARC600 processor
+
+mARC600
+Target Report
+Same as -mA6
+
+mARC601
+Target Report
+Generate ARCompact 32-bit code for ARC601 processor
+
+mA7
+Target Report
+Generate ARCompact 32-bit code for ARC700 processor
+
+mARC700
+Target Report
+Same as -mA7
+
+mmixed-code
+Target Report Mask(MIXED_CODE_SET)
+Tweak register allocation to help 16-bit instruction generation
+; originally this was:
+;Generate ARCompact 16-bit instructions intermixed with 32-bit instructions for ARCtangent-A5 and higher processors
+; but we do that without -mmixed-code, too, it's just a different instruction
+; count / size tradeoff.
+
+; We use an explict definition for the negative form because that is the
+; actually interesting option, and we want that to have its own comment.
+mvolatile-cache
+Target Report RejectNegative Mask(VOLATILE_CACHE_SET)
+Use ordinarily cached memory accesses for volatile references
+
+mno-volatile-cache
+Target Report RejectNegative InverseMask(VOLATILE_CACHE_SET)
+Enable cache bypass for volatile references
+
+mbarrel-shifter
+Target Report Mask(BARREL_SHIFTER)
+Generate instructions supported by barrel shifter
+
+mnorm
+Target Report Mask(NORM_SET)
+Generate norm instruction
+
+mswap
+Target Report Mask(SWAP_SET)
+Generate swap instruction
+
+mmul64
+Target Report Mask(MUL64_SET)
+Generate mul64 and mulu64 instructions
+
+mno-mpy
+Target Report Mask(NOMPY_SET)
+Do not generate mpy instructions for ARC700
+
+mea
+Target Report Mask(EA_SET)
+Generate Extended arithmetic instructions.  Currently only divaw, adds, subs and sat16 are supported
+
+msoft-float
+Target Report Mask(0)
+Dummy flag. This is the default unless FPX switches are provided explicitly
+
+mlong-calls
+Target Report Mask(LONG_CALLS_SET)
+Generate call insns as register indirect calls
+
+mno-brcc
+Target Report Mask(NO_BRCC_SET)
+Do no generate BRcc instructions in arc_reorg.
+
+msdata
+Target Report InverseMask(NO_SDATA_SET)
+Generate sdata references.  This is the default, unless you compile for PIC.
+
+mno-millicode
+Target Report Mask(NO_MILLICODE_THUNK_SET)
+Do not generate millicode thunks (needed only with -Os)
+
+mspfp
+Target Report Mask(SPFP_COMPACT_SET)
+FPX: Generate Single Precision FPX (compact) instructions.
+
+mspfp-compact
+Target Report Mask(SPFP_COMPACT_SET) MaskExists
+FPX: Generate Single Precision FPX (compact) instructions.
+
+mspfp-fast
+Target Report Mask(SPFP_FAST_SET)
+FPX: Generate Single Precision FPX (fast) instructions.
+
+margonaut
+Target Report Mask(ARGONAUT_SET)
+FPX: Enable Argonaut ARC CPU Double Precision Floating Point extensions.
+
+mdpfp
+Target Report Mask(DPFP_COMPACT_SET)
+FPX: Generate Double Precision FPX (compact) instructions.
+
+mdpfp-compact
+Target Report Mask(DPFP_COMPACT_SET) MaskExists
+FPX: Generate Double Precision FPX (compact) instructions.
+
+mdpfp-fast
+Target Report Mask(DPFP_FAST_SET)
+FPX: Generate Double Precision FPX (fast) instructions.
+
+mno-dpfp-lrsr
+Target Report Mask(DPFP_DISABLE_LRSR)
+Disable LR and SR instructions from using FPX extension aux registers.
+
+msimd
+Target Report Mask(SIMD_SET)
+Enable generation of ARC SIMD instructions via target-specific builtins.
+
+mcpu=
+Target RejectNegative Joined Var(arc_cpu) Enum(processor_type) Init(PROCESSOR_NONE)
+-mcpu=CPU	Compile code for ARC variant CPU
+
+Enum
+Name(processor_type) Type(enum processor_type)
+
+EnumValue
+Enum(processor_type) String(A5) Value(PROCESSOR_A5)
+
+EnumValue
+Enum(processor_type) String(ARC600) Value(PROCESSOR_ARC600)
+
+EnumValue
+Enum(processor_type) String(ARC601) Value(PROCESSOR_ARC601)
+
+EnumValue
+Enum(processor_type) String(ARC700) Value(PROCESSOR_ARC700)
+
+msize-level=
+Target RejectNegative Joined UInteger Var(arc_size_opt_level) Init(-1)
+size optimization level: 0:none 1:opportunistic 2: regalloc 3:drop align, -Os
+
+misize
+Target Report PchIgnore Var(TARGET_DUMPISIZE)
+Annotate assembler instructions with estimated addresses
+
+mmultcost=
+Target RejectNegative Joined UInteger Var(arc_multcost) Init(-1)
+Cost to assume for a multiply instruction, with 4 being equal to a normal insn.
+
+mtune=ARC600
+Target RejectNegative Var(arc_tune, TUNE_ARC600)
+Tune for ARC600 cpu.
+
+mtune=ARC601
+Target RejectNegative Var(arc_tune, TUNE_ARC600)
+Tune for ARC601 cpu.
+
+mtune=ARC700
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_STD)
+Tune for ARC700 R4.2 Cpu with standard multiplier block.
+
+mtune=ARC700-xmac
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC)
+Tune for ARC700 R4.2 Cpu with XMAC block.
+
+mtune=ARC725D
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC)
+Tune for ARC700 R4.2 Cpu with XMAC block.
+
+mtune=ARC750D
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC)
+Tune for ARC700 R4.2 Cpu with XMAC block.
+
+mindexed-loads
+Target Var(TARGET_INDEXED_LOADS)
+Enable the use of indexed loads
+
+mauto-modify-reg
+Target Var(TARGET_AUTO_MODIFY_REG)
+Enable the use of pre/post modify with register displacement.
+
+mmul32x16
+Target Report Mask(MULMAC_32BY16_SET)
+Generate 32x16 multiply and mac instructions
+
+; the initializer is supposed to be: Init(REG_BR_PROB_BASE/2) ,
+; alas, basic-block.h is not included in options.c .
+munalign-prob-threshold=
+Target RejectNegative Joined UInteger Var(arc_unalign_prob_threshold) Init(10000/2)
+Set probability threshold for unaligning branches
+
+mmedium-calls
+Target Var(TARGET_MEDIUM_CALLS) Init(TARGET_MMEDIUM_CALLS_DEFAULT)
+Don't use less than 25 bit addressing range for calls.
+
+mannotate-align
+Target Var(TARGET_ANNOTATE_ALIGN)
+Explain what alignment considerations lead to the decision to make an insn short or long.
+
+malign-call
+Target Var(TARGET_ALIGN_CALL)
+Do alignment optimizations for call instructions.
+
+mRcq
+Target Var(TARGET_Rcq)
+Enable Rcq constraint handling - most short code generation depends on this.
+
+mRcw
+Target Var(TARGET_Rcw)
+Enable Rcw constraint handling - ccfsm condexec mostly depends on this.
+
+mearly-cbranchsi
+Target Var(TARGET_EARLY_CBRANCHSI)
+Enable pre-reload use of cbranchsi pattern
+
+mbbit-peephole
+Target Var(TARGET_BBIT_PEEPHOLE)
+Enable bbit peephole2
+
+mcase-vector-pcrel
+Target Var(TARGET_CASE_VECTOR_PC_RELATIVE)
+Use pc-relative switch case tables - this enables case table shortening.
+
+mcompact-casesi
+Target Var(TARGET_COMPACT_CASESI)
+Enable compact casesi pattern
+
+mq-class
+Target Var(TARGET_Q_CLASS)
+Enable 'q' instruction alternatives.
+
+mexpand-adddi
+Target Var(TARGET_EXPAND_ADDDI)
+Expand adddi3 and subdi3 at rtl generation time into add.f / adc etc.
+
+
+; Flags used by the assembler, but for which we define preprocessor
+; macro symbols as well.
+mcrc
+Target Report
+Enable variable polynomial CRC extension
+
+mdsp-packa
+Target Report
+Enable DSP 3.1 Pack A extensions
+
+mdvbf
+Target Report
+Enable dual viterbi butterfly extension
+
+mmac-d16
+Target Report Undocumented
+
+mmac-24
+Target Report Undocumented
+
+mtelephony
+Target Report RejectNegative
+Enable Dual and Single Operand Instructions for Telephony
+
+mxy
+Target Report
+Enable XY Memory extension (DSP version 3)
+
+; ARC700 4.10 extension instructions
+mlock
+Target Report
+Enable Locked Load/Store Conditional extension
+
+mswape
+Target Report
+Enable swap byte ordering extension instruction
+
+mrtsc
+Target Report
+Enable 64-bit Time-Stamp Counter extension instruction
+
+mno-epilogue-cfi
+Target Report RejectNegative InverseMask(EPILOGUE_CFI)
+Disable generation of cfi for epilogues.
+
+mepilogue-cfi
+Target RejectNegative Mask(EPILOGUE_CFI)
+Enable generation of cfi for epilogues.
+
+EB
+Target
+Pass -EB option through to linker.
+
+EL
+Target
+Pass -EL option through to linker.
+
+marclinux
+target
+Pass -marclinux option through to linker.
+
+marclinux_prof
+target
+Pass -marclinux_prof option through to linker.
+
+;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra.
+;Target InverseMask(NO_LRA)
+mlra
+; lra still won't allow to configure libgcc; see PR rtl-optimization/55464.
+; so don't enable by default.
+Target Mask(LRA)
+Enable lra
+
+mlra-priority-none
+Target RejectNegative Var(arc_lra_priority_tag, ARC_LRA_PRIORITY_NONE)
+Don't indicate any priority with TARGET_REGISTER_PRIORITY
+
+mlra-priority-compact
+Target RejectNegative Var(arc_lra_prioritytag, ARC_LRA_PRIORITY_COMPACT)
+Indicate priority for r0..r3 / r12..r15 with TARGET_REGISTER_PRIORITY
+
+mlra-priority-noncompact
+Target RejectNegative Var(arc_lra_prioritytag, ARC_LRA_PRIORITY_NONCOMPACT)
+Reduce priority for r0..r3 / r12..r15 with TARGET_REGISTER_PRIORITY
+
+mucb-mcount
+Target Report Var(TARGET_UCB_MCOUNT)
+instrument with mcount calls as in the ucb code
+
+; backward-compatibility aliases, translated by DRIVER_SELF_SPECS
+
+mEA
+Target
+
+multcost=
+Target RejectNegative Joined
+
+; Unfortunately, listing the full option name gives us clashes
+; with OPT_opt_name being claimed for both opt_name and opt-name,
+; so we leave out the last character or more.
+mbarrel_shifte
+Target Joined
+
+mspfp_
+Target Joined
+
+mdpfp_
+Target Joined
+
+mdsp_pack
+Target Joined
+
+mmac_
+Target Joined
+
diff --git a/gcc-4.9/gcc/config/arc/arc600.md b/gcc-4.9/gcc/config/arc/arc600.md
new file mode 100644
index 000000000..8255e244d
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc600.md
@@ -0,0 +1,63 @@
+;; DFA scheduling description of the Synopsys DesignWare ARC600 cpu
+;; for GNU C compiler
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+;;              on behalf of Synopsys Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARC600")
+
+(define_cpu_unit "issue_600" "ARC600")
+(define_cpu_unit "mul64_600" "ARC600")
+
+; latency from flag-setting insns to branches is 3.
+(define_insn_reservation "compare_600" 3
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "compare"))
+  "issue_600")
+
+(define_insn_reservation "load_DI_600" 4
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "load")
+       (match_operand:DI 0 "" ""))
+  "issue_600")
+
+(define_insn_reservation "load_600" 3
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "load")
+       (not (match_operand:DI 0 "" "")))
+  "issue_600")
+
+(define_insn_reservation "mul_600_fast" 3
+  (and (eq_attr "tune" "arc600")
+       (match_test "arc_multcost < COSTS_N_INSNS (7)")
+       (eq_attr "type" "multi,umulti"))
+  "mul64_600*3")
+
+(define_insn_reservation "mul_600_slow" 8
+  (and (eq_attr "tune" "arc600")
+       (match_test "arc_multcost >= COSTS_N_INSNS (7)")
+       (eq_attr "type" "multi,umulti"))
+  "mul64_600*8")
+
+(define_insn_reservation "mul_mac_600" 3
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "mulmac_600"))
+  "nothing*3")
+
+(define_bypass 1 "mul_mac_600" "mul_mac_600")
diff --git a/gcc-4.9/gcc/config/arc/arc700.md b/gcc-4.9/gcc/config/arc/arc700.md
new file mode 100644
index 000000000..8e80b4f7c
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/arc700.md
@@ -0,0 +1,170 @@
+;; DFA scheduling description of the Synopsys DesignWare ARC700 cpu
+;; for GNU C compiler
+;;    Comments and Support For ARC700 instructions added by
+;;    Saurabh Verma (saurabh.verma@codito.com)
+;;    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
+;;    Factoring out and improvement of ARC700 Scheduling by
+;;    Joern Rennecke (joern.rennecke@embecosm.com)
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARC700")
+
+;; aux to be added here
+(define_cpu_unit "core, dmp,  write_port, dmp_write_port, multiplier, issue, blockage, simd_unit" "ARC700")
+
+(define_insn_reservation "core_insn_DI" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "unary, move, cmove, binary")
+       (match_operand:DI 0 "" ""))
+  "issue+core, issue+core+write_port, write_port")
+
+(define_insn_reservation "lr" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "lr"))
+  "issue+blockage, blockage*2, write_port")
+
+(define_insn_reservation "sr" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "sr"))
+  "issue+dmp_write_port+blockage, blockage*9")
+
+(define_insn_reservation "core_insn" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "unary, move, binary"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "cmove" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "cmove"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "cc_arith" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "cc_arith"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "two_cycle_core_insn" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "two_cycle_core"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "divaw_insn" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "divaw"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "shift_insn" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "shift"))
+  "issue+core, nothing, write_port")
+
+; Latency from flag setters to arithmetic with carry is 3.
+(define_insn_reservation "compare_700" 3
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "compare"))
+  "issue+core, nothing, write_port")
+
+; Assume here the branch is predicted correctly and has a delay slot insn
+; or is properly unaligned.
+(define_insn_reservation "branch_700" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "compare"))
+  "issue+core, nothing, write_port")
+
+; TODOs: is this correct ??
+(define_insn_reservation "multi_DI" 10
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "multi")
+       (match_operand:DI 0 "" ""))
+  "issue+multiplier, multiplier*2,issue+multiplier, multiplier*2,
+   nothing,write_port,nothing*2, write_port")
+
+(define_insn_reservation "umulti_DI" 9
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "umulti")
+       (match_operand:DI 0 "" ""))
+  "issue+multiplier, multiplier,issue+multiplier, multiplier*2,
+   write_port,nothing*3, write_port")
+
+(define_insn_reservation "umulti_xmac" 5
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "umulti"))
+  "issue+multiplier, multiplier, nothing*3, write_port")
+
+; latency of mpyu is lower than mpy / mpyh / mpyhu
+(define_insn_reservation "umulti_std" 6
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "umulti"))
+  "issue+multiplier, multiplier*3, nothing*2, write_port")
+
+;; arc700 xmac multiplier
+(define_insn_reservation "multi_xmac" 5
+  (and (eq_attr "tune" "arc700_4_2_xmac")
+       (eq_attr "type" "multi"))
+  "issue+multiplier,multiplier,nothing*3,write_port")
+
+; arc700 standard multiplier
+(define_insn_reservation "multi_std" 7
+  (and (eq_attr "tune" "arc700_4_2_std")
+       (eq_attr "type" "multi"))
+  "issue+multiplier,multiplier*4,nothing*2,write_port")
+
+;(define_insn_reservation "multi_SI" 7
+;       (eq_attr "type" "multi")
+;  "issue+multiplier, multiplier*2, nothing*4, write_port")
+
+; There is no multiplier -> multiplier bypass except for the
+; mac -> mac dependency on the accumulator.
+
+; divaw -> divaw latency is 1 cycle
+(define_bypass 1 "divaw_insn" "divaw_insn")
+
+(define_bypass 1 "compare_700" "branch_700,core_insn,data_store,data_load")
+
+; we could shedule the cmove immediately after the compare, but then
+; the cmove would have higher latency... so just keep the cmove apart
+; from the compare.
+(define_bypass 2 "compare_700" "cmove")
+
+; no functional unit runs when blockage is reserved
+(exclusion_set "blockage" "core, multiplier")
+
+(define_insn_reservation "data_load_DI" 4
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "load")
+       (match_operand:DI 0 "" ""))
+  "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port")
+
+(define_insn_reservation "data_load" 3
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "load")
+       (not (match_operand:DI 0 "" "")))
+  "issue+dmp, nothing, dmp_write_port")
+
+(define_insn_reservation "data_store_DI" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "store")
+       (match_operand:DI 0 "" ""))
+  "issue+dmp_write_port, issue+dmp_write_port")
+
+(define_insn_reservation "data_store" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "store")
+       (not (match_operand:DI 0 "" "")))
+  "issue+dmp_write_port")
diff --git a/gcc-4.9/gcc/config/arc/constraints.md b/gcc-4.9/gcc/config/arc/constraints.md
new file mode 100644
index 000000000..d01e156c6
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/constraints.md
@@ -0,0 +1,399 @@
+;; Constraint definitions for Synopsys DesignWare ARC.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+; Most instructions accept arbitrary core registers for their inputs, even
+; if the core register in question cannot be written to, like the multiply
+; result registers of the ARCtangent-A5 and ARC600 .
+; First, define a class for core registers that can be read cheaply.  This
+; is most or all core registers for ARC600, but only r0-r31 for ARC700
+(define_register_constraint "c" "CHEAP_CORE_REGS"
+  "core register @code{r0}-@code{r31}, @code{ap},@code{pcl}")
+
+; All core regs - e.g. for when we must have a way to reload a register.
+(define_register_constraint "Rac" "ALL_CORE_REGS"
+  "core register @code{r0}-@code{r60}, @code{ap},@code{pcl}")
+
+; Some core registers (.e.g lp_count) aren't general registers because they
+; can't be used as the destination of a multi-cycle operation like
+; load and/or multiply, yet they are still writable in the sense that
+; register-register moves and single-cycle arithmetic (e.g "add", "and",
+; but not "mpy") can write to them.
+(define_register_constraint "w" "WRITABLE_CORE_REGS"
+  "writable core register: @code{r0}-@code{r31}, @code{r60}, nonfixed core register")
+
+(define_register_constraint "W" "MPY_WRITABLE_CORE_REGS"
+  "writable core register except @code{LP_COUNT} (@code{r60}): @code{r0}-@code{r31}, nonfixed core register")
+
+(define_register_constraint "l" "LPCOUNT_REG"
+  "@internal
+   Loop count register @code{r60}")
+
+(define_register_constraint "x" "R0_REGS"
+  "@code{R0} register.")
+
+(define_register_constraint "Rgp" "GP_REG"
+  "@internal
+   Global Pointer register @code{r26}")
+
+(define_register_constraint "f" "FP_REG"
+  "@internal
+   Frame Pointer register @code{r27}")
+
+(define_register_constraint "b" "SP_REGS"
+  "@internal
+   Stack Pointer register @code{r28}")
+
+(define_register_constraint "k" "LINK_REGS"
+  "@internal
+   Link Registers @code{ilink1}:@code{r29}, @code{ilink2}:@code{r30},
+   @code{blink}:@code{r31},")
+
+(define_register_constraint "q" "ARCOMPACT16_REGS"
+  "Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
+   @code{r12}-@code{r15}")
+
+(define_register_constraint "e" "AC16_BASE_REGS"
+  "Registers usable as base-regs of memory addresses in ARCompact 16-bit memory
+   instructions: @code{r0}-@code{r3}, @code{r12}-@code{r15}, @code{sp}")
+
+(define_register_constraint "D" "DOUBLE_REGS"
+  "ARC FPX (dpfp) 64-bit registers. @code{D0}, @code{D1}")
+
+(define_register_constraint "d" "SIMD_DMA_CONFIG_REGS"
+  "@internal
+   ARC SIMD DMA configuration registers @code{di0}-@code{di7},
+   @code{do0}-@code{do7}")
+
+(define_register_constraint "v" "SIMD_VR_REGS"
+  "ARC SIMD 128-bit registers @code{VR0}-@code{VR23}")
+
+; We could allow call-saved registers for sibling calls if we restored them
+; in the delay slot of the call.  However, that would not allow to adjust the
+; stack pointer afterwards, so the call-saved register would have to be
+; restored from a call-used register that was just loaded with the value
+; before.  So sticking to call-used registers for sibcalls will likely
+; generate better code overall.
+(define_register_constraint "Rsc" "SIBCALL_REGS"
+  "@internal
+   Sibling call register")
+
+;; Integer constraints
+
+(define_constraint "I"
+  "@internal
+   A signed 12-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT12 (ival)")))
+
+(define_constraint "K"
+  "@internal
+   A 3-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT3 (ival)")))
+
+(define_constraint "L"
+  "@internal
+   A 6-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (ival)")))
+
+(define_constraint "CnL"
+  "@internal
+   One's complement of a 6-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (~ival)")))
+
+(define_constraint "CmL"
+  "@internal
+   Two's complement of a 6-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (-ival)")))
+
+(define_constraint "M"
+  "@internal
+   A 5-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT5 (ival)")))
+
+(define_constraint "N"
+  "@internal
+   Integer constant 1"
+  (and (match_code "const_int")
+       (match_test "IS_ONE (ival)")))
+
+(define_constraint "O"
+  "@internal
+   A 7-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT7 (ival)")))
+
+(define_constraint "P"
+  "@internal
+   An 8-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT8 (ival)")))
+
+(define_constraint "C_0"
+  "@internal
+   Zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Cn0"
+  "@internal
+   Negative or zero"
+  (and (match_code "const_int")
+       (match_test "ival <= 0")))
+
+(define_constraint "Cca"
+  "@internal
+   Conditional or three-address add / sub constant"
+  (and (match_code "const_int")
+       (match_test "ival == -1 << 31
+		    || (ival >= -0x1f8 && ival <= 0x1f8
+			&& ((ival >= 0 ? ival : -ival)
+			    <= 0x3f * (ival & -ival)))")))
+
+; intersection of "O" and "Cca".
+(define_constraint "CL2"
+  "@internal
+   A 6-bit unsigned integer constant times 2"
+  (and (match_code "const_int")
+       (match_test "!(ival & ~126)")))
+
+(define_constraint "CM4"
+  "@internal
+   A 5-bit unsigned integer constant times 4"
+  (and (match_code "const_int")
+       (match_test "!(ival & ~124)")))
+
+(define_constraint "Csp"
+  "@internal
+   A valid stack pointer offset for a short add"
+  (and (match_code "const_int")
+       (match_test "!(ival & ~124) || !(-ival & ~124)")))
+
+(define_constraint "C2a"
+  "@internal
+   Unconditional two-address add / sub constant"
+  (and (match_code "const_int")
+       (match_test "ival == -1 << 31
+		    || (ival >= -0x4000 && ival <= 0x4000
+			&& ((ival >= 0 ? ival : -ival)
+			    <= 0x7ff * (ival & -ival)))")))
+
+(define_constraint "C0p"
+ "@internal
+  power of two"
+  (and (match_code "const_int")
+       (match_test "IS_POWEROF2_P (ival)")))
+
+(define_constraint "C1p"
+ "@internal
+  constant such that x+1 is a power of two, and x != 0"
+  (and (match_code "const_int")
+       (match_test "ival && IS_POWEROF2_P (ival + 1)")))
+
+(define_constraint "Ccp"
+ "@internal
+  constant such that ~x (one's Complement) is a power of two"
+  (and (match_code "const_int")
+       (match_test "IS_POWEROF2_P (~ival)")))
+
+(define_constraint "Cux"
+ "@internal
+  constant such that AND gives an unsigned extension"
+  (and (match_code "const_int")
+       (match_test "ival == 0xff || ival == 0xffff")))
+
+(define_constraint "Crr"
+ "@internal
+  constant that can be loaded with ror b,u6"
+  (and (match_code "const_int")
+       (match_test "(ival & ~0x8000001f) == 0 && !arc_ccfsm_cond_exec_p ()")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "@internal
+   A 32-bit constant double value"
+  (and (match_code "const_double")
+       (match_test "arc_double_limm_p (op)")))
+
+(define_constraint "H"
+  "@internal
+   All const_double values (including 64-bit values)"
+  (and (match_code "const_double")
+       (match_test "1")))
+
+;; Memory constraints
+(define_memory_constraint "T"
+  "@internal
+   A valid memory operand for ARCompact load instructions"
+  (and (match_code "mem")
+       (match_test "compact_load_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "S"
+  "@internal
+   A valid memory operand for ARCompact store instructions"
+  (and (match_code "mem")
+       (match_test "compact_store_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "Usd"
+  "@internal
+   A valid _small-data_ memory operand for ARCompact instructions"
+  (and (match_code "mem")
+       (match_test "compact_sda_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "Usc"
+  "@internal
+   A valid memory operand for storing constants"
+  (and (match_code "mem")
+       (match_test "!CONSTANT_P (XEXP (op,0))")
+;; ??? the assembler rejects stores of immediates to small data.
+       (match_test "!compact_sda_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "Us<"
+  "@internal
+   Stack pre-decrement"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG")))
+
+(define_memory_constraint "Us>"
+  "@internal
+   Stack post-increment"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == POST_INC")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG")))
+
+;; General constraints
+
+(define_constraint "Cbr"
+  "Branch destination"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!arc_is_longcall_p (op)"))
+       (match_code "label_ref")))
+
+(define_constraint "Cbp"
+  "predicable branch/call destination"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "arc_is_shortcall_p (op)"))
+       (match_code "label_ref")))
+
+(define_constraint "Cpc"
+  "pc-relative constant"
+  (match_test "arc_legitimate_pc_offset_p (op)"))
+
+(define_constraint "Clb"
+  "label"
+  (and (match_code "label_ref")
+       (match_test "arc_text_label (XEXP (op, 0))")))
+
+(define_constraint "Cal"
+  "constant for arithmetic/logical operations"
+  (match_test "immediate_operand (op, VOIDmode) && !arc_legitimate_pc_offset_p (op)"))
+
+(define_constraint "C32"
+  "32 bit constant for arithmetic/logical operations"
+  (match_test "immediate_operand (op, VOIDmode)
+	       && !arc_legitimate_pc_offset_p (op)
+	       && !satisfies_constraint_I (op)"))
+
+; Note that the 'cryptic' register constraints will not make reload use the
+; associated class to reload into, but this will not penalize reloading of any
+; other operands, or using an alternate part of the same alternative.
+
+; Rcq is different in three important ways from a register class constraint:
+; - It does not imply a register class, hence reload will not use it to drive
+;   reloads.
+; - It matches even when there is no register class to describe its accepted
+;   set; not having such a set again lessens the impact on register allocation.
+; - It won't match when the instruction is conditionalized by the ccfsm.
+(define_constraint "Rcq"
+  "@internal
+   Cryptic q - for short insn generation while not affecting register allocation
+   Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
+   @code{r12}-@code{r15}"
+  (and (match_code "REG")
+       (match_test "TARGET_Rcq
+		    && !arc_ccfsm_cond_exec_p ()
+		    && IN_RANGE (REGNO (op) ^ 4, 4, 11)")))
+
+; If we need a reload, we generally want to steer reload to use three-address
+; alternatives in preference of two-address alternatives, unless the
+; three-address alternative introduces a LIMM that is unnecessary for the
+; two-address alternative.
+(define_constraint "Rcw"
+  "@internal
+   Cryptic w - for use in early alternatives with matching constraint"
+  (and (match_code "REG")
+       (match_test
+	"TARGET_Rcw
+	 && REGNO (op) < FIRST_PSEUDO_REGISTER
+	 && TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS],
+			       REGNO (op))")))
+
+(define_constraint "Rcr"
+  "@internal
+   Cryptic r - for use in early alternatives with matching constraint"
+  (and (match_code "REG")
+       (match_test
+	"TARGET_Rcw
+	 && REGNO (op) < FIRST_PSEUDO_REGISTER
+	 && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS],
+			       REGNO (op))")))
+
+(define_constraint "Rcb"
+  "@internal
+   Stack Pointer register @code{r28} - do not reload into its class"
+  (and (match_code "REG")
+       (match_test "REGNO (op) == 28")))
+
+(define_constraint "Rck"
+  "@internal
+   blink (usful for push_s / pop_s)"
+  (and (match_code "REG")
+       (match_test "REGNO (op) == 31")))
+
+(define_constraint "Rs5"
+  "@internal
+   sibcall register - only allow one of the five available 16 bit isnsn.
+   Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
+   @code{r12}"
+  (and (match_code "REG")
+       (match_test "!arc_ccfsm_cond_exec_p ()")
+       (ior (match_test "(unsigned) REGNO (op) <= 3")
+	    (match_test "REGNO (op) == 12"))))
+
+(define_constraint "Rcc"
+  "@internal
+  Condition Codes"
+  (and (match_code "REG") (match_test "cc_register (op, VOIDmode)")))
+
+
+(define_constraint "Q"
+  "@internal
+   Integer constant zero"
+  (and (match_code "const_int")
+       (match_test "IS_ZERO (ival)")))
diff --git a/gcc-4.9/gcc/config/arc/fpx.md b/gcc-4.9/gcc/config/arc/fpx.md
new file mode 100644
index 000000000..4eee6aff9
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/fpx.md
@@ -0,0 +1,674 @@
+;; Machine description of the Synopsys DesignWare ARC cpu Floating Point
+;; extensions for GNU C compiler
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; TODOs:
+;;        dpfp blocks?
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Scheduler descriptions for the fpx instructions
+(define_insn_reservation "spfp_compact" 3
+  (and (match_test "TARGET_SPFP_COMPACT_SET")
+       (eq_attr "type" "spfp"))
+  "issue+core, nothing*2, write_port")
+
+(define_insn_reservation "spfp_fast" 6
+  (and (match_test "TARGET_SPFP_FAST_SET")
+       (eq_attr "type" "spfp"))
+  "issue+core, nothing*5, write_port")
+
+(define_insn_reservation "dpfp_compact_mult" 7
+  (and (match_test "TARGET_DPFP_COMPACT_SET")
+       (eq_attr "type" "dpfp_mult"))
+  "issue+core, nothing*6, write_port")
+
+(define_insn_reservation "dpfp_compact_addsub" 5
+  (and (match_test "TARGET_DPFP_COMPACT_SET")
+       (eq_attr "type" "dpfp_addsub"))
+  "issue+core, nothing*4, write_port")
+
+(define_insn_reservation "dpfp_fast" 5
+  (and (match_test "TARGET_DPFP_FAST_SET")
+       (eq_attr "type" "dpfp_mult,dpfp_addsub"))
+  "issue+core, nothing*4, write_port")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand"          "=r,r,r,r,r ")
+	(plus:SF (match_operand:SF 1 "nonmemory_operand" "0,r,GCal,r,0")
+		 (match_operand:SF 2 "nonmemory_operand" "I,rL,r,GCal,LrCal")))]
+;  "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float
+  "TARGET_SPFP"
+  "@
+   fadd %0,%1,%2
+   fadd %0,%1,%2
+   fadd   %0,%S1,%2
+   fadd   %0,%1,%S2
+   fadd%? %0,%1,%S2"
+  [(set_attr "type" "spfp")
+  (set_attr "length" "4,4,8,8,8")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand"          "=r,r,r,r,r ")
+	(minus:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GCal,r,0")
+		 (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GCal,LrCal")))]
+  ;"(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float
+  "TARGET_SPFP"
+  "@
+   fsub %0,%1,%2
+   fsub %0,%1,%2
+   fsub   %0,%S1,%2
+   fsub   %0,%1,%S2
+   fsub%? %0,%1,%S2"
+  [(set_attr "type" "spfp")
+  (set_attr "length" "4,4,8,8,8")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand"          "=r,r,r,r,r ")
+	(mult:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GCal,r,0")
+		 (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GCal,LrCal")))]
+;  "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET"	;Add flag for float
+  "TARGET_SPFP"
+  "@
+   fmul %0,%1,%2
+   fmul %0,%1,%2
+   fmul   %0,%S1,%2
+   fmul   %0,%1,%S2
+   fmul%? %0,%1,%S2"
+  [(set_attr "type" "spfp")
+  (set_attr "length" "4,4,8,8,8")])
+
+
+;; For comparisons, we can avoid storing the top half of the result into
+;; a register since '.f' lets us set the Z bit for the conditional
+;; branch insns.
+
+;; ??? FIXME (x-y)==0 is not a correct comparison for floats:
+;;     http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+(define_insn "cmpsfpx_raw"
+  [(set (reg:CC_FPX 61)
+	(compare:CC_FPX (match_operand:SF 0 "register_operand" "r")
+			 (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_ARGONAUT_SET && TARGET_SPFP"
+  "fsub.f 0,%0,%1"
+  [(set_attr "type" "spfp")
+   (set_attr "length" "4")])
+
+;; ??? FIXME (x-y)==0 is not a correct comparison for floats:
+;;     http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+;; ??? FIXME we claim to clobber operand 2, yet the two numbers appended
+;; to the actual instructions are incorrect.  The result of the d*subh
+;; insn is stored in the Dx register specified by that first number.
+(define_insn "cmpdfpx_raw"
+  [(set (reg:CC_FPX 61)
+	(compare:CC_FPX (match_operand:DF 0 "nonmemory_operand" "D,r")
+			 (match_operand:DF 1 "nonmemory_operand" "r,D")))
+   (clobber (match_scratch:DF 2 "=D,D"))]
+  "TARGET_ARGONAUT_SET && TARGET_DPFP"
+  "@
+   dsubh%F0%F1.f 0,%H2,%L2
+   drsubh%F0%F2.f 0,%H1,%L1"
+  [(set_attr "type" "dpfp_addsub")
+   (set_attr "length" "4")])
+
+;; ??? FIXME subtraction is not a correct comparison for floats:
+;;     http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+(define_insn "*cmpfpx_gt"
+  [(set (reg:CC_FP_GT 61) (compare:CC_FP_GT (reg:CC_FPX 61) (const_int 0)))]
+  "TARGET_ARGONAUT_SET"
+  "cmp.ls pcl,pcl"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+;; ??? FIXME subtraction is not a correct comparison for floats:
+;;     http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+(define_insn "*cmpfpx_ge"
+  [(set (reg:CC_FP_GE 61) (compare:CC_FP_GE (reg:CC_FPX 61) (const_int 0)))]
+  "TARGET_ARGONAUT_SET"
+  "rcmp.pnz pcl,0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+;; DPFP instructions begin...
+
+;; op0_reg = D1_reg.low
+(define_insn "*lr_double_lower"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "D")] VUNSPEC_LR ))]
+ "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR"
+"lr %0, [%1l] ; *lr_double_lower"
+[(set_attr "length" "8")
+(set_attr "type" "lr")]
+)
+
+(define_insn "*lr_double_higher"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "D")] VUNSPEC_LR_HIGH ))]
+ "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR"
+"lr %0, [%1h] ; *lr_double_higher"
+[(set_attr "length" "8")
+(set_attr "type" "lr")]
+)
+
+
+(define_insn "*dexcl_3op_peep2_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") ; not register_operand, to accept SUBREG
+		   (unspec_volatile:SI [
+		   			(match_operand:DF 1 "arc_double_register_operand" "D")
+					(match_operand:SI 2 "shouldbe_register_operand" "r")  ; r1
+					(match_operand:SI 3 "shouldbe_register_operand" "r") ; r0
+					] VUNSPEC_DEXCL ))
+  ]
+  "TARGET_DPFP"
+  "dexcl%F1 %0, %2, %3"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")]
+)
+
+;; version which will not overwrite operand0
+(define_insn "*dexcl_3op_peep2_insn_nores"
+  [   (unspec_volatile:SI [
+		   			(match_operand:DF 0 "arc_double_register_operand" "D")
+					(match_operand:SI 1 "shouldbe_register_operand" "r")  ; r1
+					(match_operand:SI 2 "shouldbe_register_operand" "r") ; r0
+					] VUNSPEC_DEXCL_NORES )
+  ]
+  "TARGET_DPFP"
+  "dexcl%F0 0, %1, %2"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")]
+)
+
+;; dexcl a,b,c pattern generated by the peephole2 above
+(define_insn "*dexcl_3op_peep2_insn_lr"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
+		   (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "=D")] VUNSPEC_LR ))
+	     (set (match_dup 1) (match_operand:DF 2 "register_operand" "r"))]
+	    )
+  ]
+  "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR"
+  "dexcl%F1 %0, %H2, %L2"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")]
+)
+
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;                             doubles support for ARC
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; D0 = D1+{reg_pair}2
+;; (define_expand "adddf3"
+;;   [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+;; 	(plus:DF (match_operand:DF 1 "arc_double_register_operand" "")
+;; 		 (match_operand:DF 2 "nonmemory_operand" "")))]
+;;  "TARGET_DPFP"
+;;  " "
+;; )
+;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo
+;; OR
+;; daddh{0}{1} 0, reg3, limm2.lo
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+	(plus:DF (match_operand:DF 1 "arc_double_register_operand" "")
+		 (match_operand:DF 2 "nonmemory_operand" "")))
+     ]
+ "TARGET_DPFP"
+ " if (GET_CODE (operands[2]) == CONST_DOUBLE)
+     {
+        rtx high, low, tmp;
+        split_double (operands[2], &low, &high);
+        tmp = force_reg (SImode, high);
+        emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx));
+     }
+   else
+     emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx));
+     DONE;
+ "
+)
+
+;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo  /* operand 4 = 1*/
+;; OR
+;; daddh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0 */
+;;
+(define_insn "adddf3_insn"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "=D,D")
+	(plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+		 (match_operand:DF 2 "nonmemory_operand" "!r,G")))
+  (use (match_operand:SI 3 "" "N,r"))
+  (use (match_operand:SI 4 "" "N,Q"))
+  ; Prevent can_combine_p from combining muldf3_insn patterns with
+  ; different USE pairs.
+  (use (match_dup 2))
+  ]
+  "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+  "@
+     daddh%F0%F1 0,%H2,%L2
+     daddh%F0%F1 0,%3,%L2"
+  [(set_attr "type" "dpfp_addsub")
+  (set_attr "length" "4,8")])
+
+;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo
+;; OR
+;; dmulh{0}{1} 0, reg3, limm2.lo
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+	(mult:DF (match_operand:DF 1 "arc_double_register_operand" "")
+		 (match_operand:DF 2 "nonmemory_operand" "")))]
+"TARGET_DPFP"
+"  if (GET_CODE (operands[2]) == CONST_DOUBLE)
+     {
+        rtx high, low, tmp;
+        split_double (operands[2], &low, &high);
+        tmp = force_reg (SImode, high);
+        emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx));
+     }
+   else
+     emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx));
+
+  DONE;
+ ")
+
+
+;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1*/
+;; OR
+;; dmulh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/
+(define_insn "muldf3_insn"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "=D,D")
+	(mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+		 (match_operand:DF 2 "nonmemory_operand" "!r,G")))
+  (use (match_operand:SI 3 "" "N,!r"))
+  (use (match_operand:SI 4 "" "N,Q"))
+  ; Prevent can_combine_p from combining muldf3_insn patterns with
+  ; different USE pairs.
+  (use (match_dup 2))
+  ]
+  "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+  "@
+    dmulh%F0%F1 0,%H2,%L2
+    dmulh%F0%F1 0,%3, %L2"
+  [(set_attr "type" "dpfp_mult")
+  (set_attr "length" "4,8")])
+
+;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo
+;; OR
+;; dsubh{0}{1} 0, reg3, limm2.lo
+;; OR
+;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo
+;; OR
+;; drsubh{0}{2} 0, reg3, limm1.lo
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+		    (minus:DF (match_operand:DF 1 "nonmemory_operand" "")
+				  (match_operand:DF 2 "nonmemory_operand" "")))]
+"TARGET_DPFP"
+"   if (GET_CODE (operands[1]) == CONST_DOUBLE || GET_CODE (operands[2]) == CONST_DOUBLE)
+     {
+        rtx high, low, tmp;
+        int const_index = ((GET_CODE (operands[1]) == CONST_DOUBLE) ? 1: 2);
+        split_double (operands[const_index], &low, &high);
+        tmp = force_reg (SImode, high);
+        emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx));
+     }
+   else
+     emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx));
+
+   DONE;
+  "
+)
+
+;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1 */
+;; OR
+;; dsubh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/
+;; OR
+;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo /* operand 4 = 1 */
+;; OR
+;; drsubh{0}{2} 0, reg3, limm1.lo /* operand 4 = 0*/
+(define_insn "subdf3_insn"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "=D,D,D,D")
+		   (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,!r,G")
+			    (match_operand:DF 2 "nonmemory_operand" "!r,G,D,D")))
+  (use (match_operand:SI 3 "" "N,r,N,r"))
+  (use (match_operand:SI 4 "" "N,Q,N,Q"))
+  ; Prevent can_combine_p from combining muldf3_insn patterns with
+  ; different USE pairs.
+  (use (match_dup 2))]
+  "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT) &&
+   !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+  "@
+     dsubh%F0%F1 0,%H2,%L2
+     dsubh%F0%F1 0,%3,%L2
+     drsubh%F0%F2 0,%H1,%L1
+     drsubh%F0%F2 0,%3,%L1"
+  [(set_attr "type" "dpfp_addsub")
+  (set_attr "length" "4,8,4,8")])
+
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ;; Peephole for following conversion
+;; ;;                    D0 = D2<op>{reg_pair}3
+;; ;;                    {reg_pair}5 = D0
+;; ;;                    D0 = {reg_pair}6
+;; ;;                            |
+;; ;;                            V
+;; ;;            _________________________________________________________
+;; ;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;; ;;    ---- +   {reg_pair}5.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;; ;;   |       \_________________________________________________________
+;; ;;   |
+;; ;;   |         ________________________________________________________
+;; ;;   |      / {reg_pair}5.lo  = ( D2<op>{regpair3_or_limmreg34} ).lo
+;; ;;   +-----+  D0              = {reg_pair}6
+;; ;;          \ _________________________________________________________
+;; ;;                            ||
+;; ;;                            ||
+;; ;;                            \/
+;; ;;  d<op>{0}{2}h {reg_pair}5.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;; ;;  dexcl{0}    {reg_pair}5.lo, {reg_pair}6.lo, {reg_pair}6.hi
+;; ;; -----------------------------------------------------------------------------------------
+;; ;;  where <op> is one of {+,*,-}
+;; ;;        <opname> is {add,mult,sub}
+;; ;;
+;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;; ;;       {regpair2_or_limmreg24} and D3
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; (define_peephole2
+;;   [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+;; 	(match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+;; 			   (match_operand:DF 3 "nonmemory_operand" "")]))
+;; 	     (use (match_operand:SI 4 "" ""))])
+;;   (set (match_operand:DF 5 "register_operand" "")
+;;        (match_dup 0))
+;;   (set (match_dup 0)
+;;        (match_operand:DF 6 "register_operand" ""))
+;;   ]
+;;   "TARGET_DPFP"
+;;   [
+;;   (parallel [(set (match_dup 0)
+;; 		  (match_op_dup:DF 1 [(match_dup 2)
+;; 				   (match_dup 3)]))
+;; 	    (use (match_dup 4))
+;;             (set (match_dup 5)
+;; 		 (match_op_dup:DF  1 [(match_dup 2)
+;; 				   (match_dup 3)]))])
+;;   (parallel [
+;; ;;	    (set (subreg:SI (match_dup 5) 0)
+;; 	    (set (match_dup 7)
+;; 		 (unspec_volatile [(match_dup 0)] VUNSPEC_LR ))
+;; 	    (set (match_dup 0) (match_dup 6))]
+;; 	    )
+;;   ]
+;;   "operands[7] = simplify_gen_subreg(SImode,operands[5],DFmode,0);"
+;;   )
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Peephole for following conversion
+;;                    D0 = D2<op>{reg_pair}3
+;;                    {reg_pair}6 = D0
+;;                    D0 = {reg_pair}7
+;;                            |
+;;                            V
+;;            _________________________________________________________
+;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;;    ---- +   {reg_pair}6.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;;   |       \_________________________________________________________
+;;   |
+;;   |         ________________________________________________________
+;;   |      / {reg_pair}6.lo  = ( D2<op>{regpair3_or_limmreg34} ).lo
+;;   +-----+  D0              = {reg_pair}7
+;;          \ _________________________________________________________
+;;                            ||
+;;                            ||
+;;                            \/
+;;  d<op>{0}{2}h {reg_pair}6.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;;  dexcl{0}    {reg_pair}6.lo, {reg_pair}7.lo, {reg_pair}7.hi
+;; -----------------------------------------------------------------------------------------
+;;  where <op> is one of {+,*,-}
+;;        <opname> is {add,mult,sub}
+;;
+;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;;       {regpair2_or_limmreg24} and D3
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_peephole2
+  [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+	(match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+			   (match_operand:DF 3 "nonmemory_operand" "")]))
+	     (use (match_operand:SI 4 "" ""))
+	     (use (match_operand:SI 5 "" ""))
+	     (use (match_operand:SI 6 "" ""))])
+  (set (match_operand:DF 7 "register_operand" "")
+       (match_dup 0))
+  (set (match_dup 0)
+       (match_operand:DF 8 "register_operand" ""))
+  ]
+  "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR"
+  [
+  (parallel [(set (match_dup 0)
+		  (match_op_dup:DF 1 [(match_dup 2)
+				   (match_dup 3)]))
+	    (use (match_dup 4))
+	    (use (match_dup 5))
+            (set (match_dup 7)
+		 (match_op_dup:DF  1 [(match_dup 2)
+				   (match_dup 3)]))])
+  (parallel [
+;;	    (set (subreg:SI (match_dup 7) 0)
+	    (set (match_dup 9)
+		 (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR ))
+	    (set (match_dup 0) (match_dup 8))]
+	    )
+  ]
+  "operands[9] = simplify_gen_subreg(SImode,operands[7],DFmode,0);"
+  )
+
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ;; Peephole to generate d<opname>{ij}h a,b,c instructions
+;; ;;                    D0 = D2<op>{reg_pair}3
+;; ;;                    {reg_pair}5 = D0
+;; ;;                            |
+;; ;;                            V
+;; ;;            __________________________________________
+;; ;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;; ;;    ---- +   {reg_pair}5.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;; ;;   |       \__________________________________________
+;; ;;   |
+;; ;;   + ---    {reg_pair}5.lo     = ( D2<op>{regpair3_or_limmreg34} ).lo
+;; ;;                            ||
+;; ;;                            ||
+;; ;;                            \/
+;; ;;  d<op>{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;; ;;  lr    {reg_pair}4.lo, {D2l}
+;; ;; ----------------------------------------------------------------------------------------
+;; ;;  where <op> is one of {+,*,-}
+;; ;;        <opname> is {add,mult,sub}
+;; ;;
+;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;; ;;       {regpair2_or_limmreg24} and D3
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; (define_peephole2
+;;   [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+;; 		   (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+;; 				      (match_operand:DF 3 "nonmemory_operand" "")]))
+;; 	     (use (match_operand:SI 4 "" ""))])
+;;   (set (match_operand:DF 5 "register_operand" "")
+;;        (match_dup 0))
+;;   ]
+;;   "TARGET_DPFP"
+;;   [
+;;   (parallel [(set (match_dup 0)
+;; 		  (match_op_dup:DF 1 [(match_dup 2)
+;; 				   (match_dup 3)]))
+;; 	    (use (match_dup 4))
+;;             (set (match_dup 5)
+;; 		 (match_op_dup:DF  1 [(match_dup 2)
+;; 				   (match_dup 3)]))])
+;; ;  (set (subreg:SI (match_dup 5) 0)
+;;   (set (match_dup 6)
+;;        (unspec_volatile [(match_dup 0)] VUNSPEC_LR ))
+;;   ]
+;;   "operands[6] = simplify_gen_subreg(SImode,operands[5],DFmode,0);"
+;;   )
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Peephole to generate d<opname>{ij}h a,b,c instructions
+;;                    D0 = D2<op>{reg_pair}3
+;;                    {reg_pair}6 = D0
+;;                            |
+;;                            V
+;;            __________________________________________
+;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;;    ---- +   {reg_pair}6.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;;   |       \__________________________________________
+;;   |
+;;   + ---    {reg_pair}6.lo     = ( D2<op>{regpair3_or_limmreg34} ).lo
+;;                            ||
+;;                            ||
+;;                            \/
+;;  d<op>{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;;  lr    {reg_pair}4.lo, {D2l}
+;; ----------------------------------------------------------------------------------------
+;;  where <op> is one of {+,*,-}
+;;        <opname> is {add,mult,sub}
+;;
+;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;;       {regpair2_or_limmreg24} and D3
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_peephole2
+  [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+		   (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+				      (match_operand:DF 3 "nonmemory_operand" "")]))
+	     (use (match_operand:SI 4 "" ""))
+	     (use (match_operand:SI 5 "" ""))
+	     (use (match_operand:SI 6 "" ""))])
+  (set (match_operand:DF 7 "register_operand" "")
+       (match_dup 0))
+  ]
+  "TARGET_DPFP  && !TARGET_DPFP_DISABLE_LRSR"
+  [
+  (parallel [(set (match_dup 0)
+		  (match_op_dup:DF 1 [(match_dup 2)
+				   (match_dup 3)]))
+	    (use (match_dup 4))
+	    (use (match_dup 5))
+            (set (match_dup 7)
+		 (match_op_dup:DF  1 [(match_dup 2)
+				   (match_dup 3)]))])
+;  (set (subreg:SI (match_dup 7) 0)
+  (set (match_dup 8)
+       (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR ))
+  ]
+  "operands[8] = simplify_gen_subreg(SImode,operands[7],DFmode,0);"
+  )
+
+;; ;;            _______________________________________________________
+;; ;;           / D0             = D1 + {regpair2_or_limmreg23}
+;; ;;         +   {reg_pair}4.hi = ( D1 + {regpair2_or_limmreg23} ).hi
+;; ;;           \_______________________________________________________
+;; (define_insn "*daddh_peep2_insn"
+;;   [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D")
+;; 		   (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+;; 			    (match_operand:DF 2 "nonmemory_operand" "r,G")))
+;; 	     (use (match_operand:SI 3 "" "N,r"))
+;; 	     (set (match_operand:DF 4 "register_operand" "=r,r")
+;; 		  (plus:DF (match_dup 1)
+;; 			   (match_dup 2)))])]
+;;  "TARGET_DPFP"
+;;  "@
+;;     daddh%F0%F1 %H4, %H2, %L2
+;;     daddh%F0%F1 %H4, %3, %L2"
+;;  [(set_attr "type" "dpfp_addsub")
+;;  (set_attr "length" "4,8")]
+;; )
+;;            _______________________________________________________
+;;           / D0             = D1 + {regpair2_or_limmreg23}
+;;         +   {reg_pair}5.hi = ( D1 + {regpair2_or_limmreg23} ).hi
+;;           \_______________________________________________________
+(define_insn "*daddh_peep2_insn"
+  [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D")
+		   (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+			    (match_operand:DF 2 "nonmemory_operand" "r,G")))
+	     (use (match_operand:SI 3 "" "N,r"))
+	     (use (match_operand:SI 4 "" "N,Q"))
+	     (use (match_operand:SI 5 "" ""))
+	     (set (match_operand:DF 6 "register_operand" "=r,r")
+		  (plus:DF (match_dup 1)
+			   (match_dup 2)))])]
+ "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+ "@
+    daddh%F0%F1 %H6, %H2, %L2
+    daddh%F0%F1 %H6, %3, %L2"
+ [(set_attr "type" "dpfp_addsub")
+ (set_attr "length" "4,8")]
+)
+
+;;            _______________________________________________________
+;;           / D0             = D1 * {regpair2_or_limmreg23}
+;;         +   {reg_pair}5.hi = ( D1 * {regpair2_or_limmreg23} ).hi
+;;           \_______________________________________________________
+(define_insn "*dmulh_peep2_insn"
+  [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D")
+		   (mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+			    (match_operand:DF 2 "nonmemory_operand" "r,G")))
+	     (use (match_operand:SI 3 "" "N,r"))
+	     (use (match_operand:SI 4 "" "N,Q"))
+	     (use (match_operand:SI 5 "" ""))
+	     (set (match_operand:DF 6 "register_operand" "=r,r")
+		  (mult:DF (match_dup 1)
+				      (match_dup 2)))])]
+ "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+ "@
+    dmulh%F0%F1 %H6, %H2, %L2
+    dmulh%F0%F1 %H6, %3, %L2"
+ [(set_attr "type" "dpfp_mult")
+ (set_attr "length" "4,8")]
+)
+
+;;            _______________________________________________________
+;;           / D0             = D1 - {regpair2_or_limmreg23}
+;;         +   {reg_pair}5.hi = ( D1 - {regpair2_or_limmreg23} ).hi
+;;           \_______________________________________________________
+;;  OR
+;;            _______________________________________________________
+;;           / D0             = {regpair1_or_limmreg13} - D2
+;;         +   {reg_pair}5.hi = ( {regpair1_or_limmreg13} ).hi - D2
+;;           \_______________________________________________________
+(define_insn "*dsubh_peep2_insn"
+  [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D,D,D")
+		   (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,r,G")
+			     (match_operand:DF 2 "nonmemory_operand" "r,G,D,D")))
+	     (use (match_operand:SI 3 "" "N,r,N,r"))
+	     (use (match_operand:SI 4 "" "N,Q,N,Q"))
+	     (use (match_operand:SI 5 "" ""))
+	     (set (match_operand:DF 6 "register_operand" "=r,r,r,r")
+		  (minus:DF (match_dup 1)
+				      (match_dup 2)))])]
+ "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)  &&
+   !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+ "@
+  dsubh%F0%F1 %H6, %H2, %L2
+  dsubh%F0%F1 %H6, %3, %L2
+  drsubh%F0%F2 %H6, %H1, %L1
+  drsubh%F0%F2 %H6, %3, %L1"
+ [(set_attr "type" "dpfp_addsub")
+  (set_attr "length" "4,8,4,8")]
+)
diff --git a/gcc-4.9/gcc/config/arc/predicates.md b/gcc-4.9/gcc/config/arc/predicates.md
new file mode 100644
index 000000000..81bf6277e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/predicates.md
@@ -0,0 +1,811 @@
+;; Predicate definitions for Synopsys DesignWare ARC.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "dest_reg_operand"
+  (match_code "reg,subreg")
+{
+  rtx op0 = op;
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = SUBREG_REG (op0);
+  if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER
+      && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS],
+			    REGNO (op0))
+      && !TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS],
+			    REGNO (op0)))
+    return 0;
+  return register_operand (op, mode);
+})
+
+(define_predicate "mpy_dest_reg_operand"
+  (match_code "reg,subreg")
+{
+  rtx op0 = op;
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = SUBREG_REG (op0);
+  if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER
+      && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS],
+			    REGNO (op0))
+      /* Make sure the destination register is not LP_COUNT.  */
+      && !TEST_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS],
+			    REGNO (op0)))
+    return 0;
+  return register_operand (op, mode);
+})
+
+
+;; Returns 1 if OP is a symbol reference.
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref, label_ref, const")
+)
+
+;; Acceptable arguments to the call insn.
+(define_predicate "call_address_operand"
+  (ior (match_code "const_int, reg")
+       (match_operand 0 "symbolic_operand")
+       (match_test "CONSTANT_P (op)
+		    && arc_legitimate_constant_p (VOIDmode, op)"))
+)
+
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "call_address_operand (XEXP (op, 0), mode)"))
+)
+
+;; Return true if OP is a unsigned 6-bit immediate (u6) value.
+(define_predicate "u6_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (INTVAL (op))"))
+)
+
+;; Return true if OP is a short immediate (shimm) value.
+(define_predicate "short_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_INT (INTVAL (op))"))
+)
+
+(define_predicate "p2_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "((INTVAL (op) - 1) & INTVAL (op)) == 0")
+       (match_test "INTVAL (op)"))
+)
+
+;; Return true if OP will require a long immediate (limm) value.
+;; This is currently only used when calculating length attributes.
+(define_predicate "long_immediate_operand"
+  (match_code "symbol_ref, label_ref, const, const_double, const_int")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      return !SIGNED_INT12 (INTVAL (op));
+    case CONST_DOUBLE :
+      /* These can happen because large unsigned 32 bit constants are
+	 represented this way (the multiplication patterns can cause these
+	 to be generated).  They also occur for SFmode values.  */
+      return 1;
+    default:
+      break;
+    }
+  return 0;
+}
+)
+
+;; Return true if OP is a MEM that when used as a load or store address will
+;; require an 8 byte insn.
+;; Load and store instructions don't allow the same possibilities but they're
+;; similar enough that this one function will do.
+;; This is currently only used when calculating length attributes.  */
+(define_predicate "long_immediate_loadstore_operand"
+  (match_code "mem")
+{
+  int size = GET_MODE_SIZE (GET_MODE (op));
+
+  op = XEXP (op, 0);
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      /* This must be handled as "st c,[limm]".  Ditto for load.
+	 Technically, the assembler could translate some possibilities to
+	 "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't
+	 assume that it does.  */
+      return 1;
+    case CONST_DOUBLE :
+      /* These can happen because large unsigned 32 bit constants are
+	 represented this way (the multiplication patterns can cause these
+	 to be generated).  They also occur for SFmode values.  */
+      return 1;
+    case REG :
+      return 0;
+    case PLUS :
+      {
+	rtx x = XEXP (op, 1);
+
+	if (GET_CODE (x) == CONST)
+	  {
+	    x = XEXP (x, 0);
+	    if (GET_CODE (x) == PLUS)
+	      x = XEXP (x, 0);
+	  }
+	if (CONST_INT_P (x))
+	  return (!SMALL_INT (INTVAL (x))
+		  && (size <= 1 || size > 4
+		      || (INTVAL (x) & (size - 1)) != 0
+		      || !SMALL_INT (INTVAL (x) / size)));
+	else if (GET_CODE (x) == SYMBOL_REF)
+	  return TARGET_NO_SDATA_SET || !SYMBOL_REF_SMALL_P (x);
+	return 0;
+      }
+    default:
+      break;
+    }
+  return 0;
+}
+)
+
+;; Return true if OP is any of R0-R3,R12-R15 for ARCompact 16-bit
+;; instructions
+(define_predicate "compact_register_operand"
+  (match_code "reg, subreg")
+  {
+     if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+	 return 0;
+
+      return (GET_CODE (op) == REG)
+      && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		|| COMPACT_GP_REG_P (REGNO (op))) ;
+  }
+)
+
+;; Return true if OP is an acceptable memory operand for ARCompact
+;; 16-bit load instructions.
+(define_predicate "compact_load_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, plus0, plus1;
+  int size, off;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* .di instructions have no 16-bit form.  */
+  if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
+     return 0;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate them.  */
+  if (size > UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
+	      || COMPACT_GP_REG_P (REGNO (addr))
+	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
+	/* Reverting for the moment since ldw_s does not have sp as a valid
+	   parameter.  */
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+
+      if ((GET_CODE (plus0) == REG)
+	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+	      || COMPACT_GP_REG_P (REGNO (plus0)))
+	  && ((GET_CODE (plus1) == REG)
+	      && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
+		  || COMPACT_GP_REG_P (REGNO (plus1)))))
+	{
+	  return 1;
+	}
+
+      if ((GET_CODE (plus0) == REG)
+	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+	      || COMPACT_GP_REG_P (REGNO (plus0)))
+	  && (GET_CODE (plus1) == CONST_INT))
+	{
+	  off = INTVAL (plus1);
+
+	  /* Negative offset is not supported in 16-bit load/store insns.  */
+	  if (off < 0)
+	    return 0;
+
+	  switch (size)
+	    {
+	    case 1:
+	      return (off < 32);
+	    case 2:
+	      return ((off < 64) && (off % 2 == 0));
+	    case 4:
+	      return ((off < 128) && (off % 4 == 0));
+	    }
+	}
+
+      if ((GET_CODE (plus0) == REG)
+	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+	      || SP_REG_P (REGNO (plus0)))
+	  && (GET_CODE (plus1) == CONST_INT))
+	{
+	  off = INTVAL (plus1);
+	  return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
+	}
+    default:
+      break ;
+      /* TODO: 'gp' and 'pcl' are to supported as base address operand
+	       for 16-bit load instructions.  */
+    }
+  return 0;
+
+}
+)
+
+;; Return true if OP is an acceptable memory operand for ARCompact
+;; 16-bit store instructions
+(define_predicate "compact_store_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, plus0, plus1;
+  int size, off;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* .di instructions have no 16-bit form.  */
+  if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
+     return 0;
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate them.  */
+  if (size > UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
+		|| COMPACT_GP_REG_P (REGNO (addr))
+	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
+	/* stw_s does not support SP as a parameter.  */
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+
+      if ((GET_CODE (plus0) == REG)
+	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+	      || COMPACT_GP_REG_P (REGNO (plus0)))
+	  && (GET_CODE (plus1) == CONST_INT))
+	{
+	  off = INTVAL (plus1);
+
+	  /* Negative offset is not supported in 16-bit load/store insns.  */
+	  if (off < 0)
+	    return 0;
+
+	  switch (size)
+	    {
+	    case 1:
+	      return (off < 32);
+	    case 2:
+	      return ((off < 64) && (off % 2 == 0));
+	    case 4:
+	      return ((off < 128) && (off % 4 == 0));
+	    }
+	}
+
+      if ((GET_CODE (plus0) == REG)
+	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+	      || SP_REG_P (REGNO (plus0)))
+	  && (GET_CODE (plus1) == CONST_INT))
+	{
+	  off = INTVAL (plus1);
+
+	  return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
+	}
+    default:
+      break;
+    }
+  return 0;
+  }
+)
+
+;; Return true if OP is an acceptable argument for a single word
+;;   move source.
+(define_predicate "move_src_operand"
+  (match_code "symbol_ref, label_ref, const, const_int, const_double, reg, subreg, mem")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return (!flag_pic || arc_legitimate_pic_operand_p(op));
+    case CONST_INT :
+      return (LARGE_INT (INTVAL (op)));
+    case CONST_DOUBLE :
+      /* We can handle DImode integer constants in SImode if the value
+	 (signed or unsigned) will fit in 32 bits.  This is needed because
+	 large unsigned 32 bit constants are represented as CONST_DOUBLEs.  */
+      if (mode == SImode)
+	return arc_double_limm_p (op);
+      /* We can handle 32 bit floating point constants.  */
+      if (mode == SFmode)
+	return GET_MODE (op) == SFmode;
+      return 0;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+}
+)
+
+;; Return true if OP is an acceptable argument for a double word
+;; move source.
+(define_predicate "move_double_src_operand"
+  (match_code "reg, subreg, mem, const_int, const_double")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return move_double_src_operand (SUBREG_REG (op), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    case CONST_INT :
+    case CONST_DOUBLE :
+      return 1;
+    default :
+      return 0;
+    }
+}
+)
+
+;; Return true if OP is an acceptable argument for a move destination.
+(define_predicate "move_dest_operand"
+  (match_code "reg, subreg, mem")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+     /* Program Counter register cannot be the target of a move.  It is
+	 a readonly register.  */
+      if (REGNO (op) == PROGRAM_COUNTER_REGNO)
+	return 0;
+      else if (TARGET_MULMAC_32BY16_SET
+	       && (REGNO (op) == 56 || REGNO(op) == 57))
+	return 0;
+      else if (TARGET_MUL64_SET
+	       && (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 ))
+	return 0;
+      else
+	return dest_reg_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return dest_reg_operand (op, mode);
+    case MEM :
+      {
+	rtx addr = XEXP (op, 0);
+
+	if (GET_CODE (addr) == PLUS
+	    && (GET_CODE (XEXP (addr, 0)) == MULT
+		|| (!CONST_INT_P (XEXP (addr, 1))
+		    && (TARGET_NO_SDATA_SET
+			|| GET_CODE (XEXP (addr, 1)) != SYMBOL_REF
+			|| !SYMBOL_REF_SMALL_P (XEXP (addr, 1))))))
+	  return 0;
+	if ((GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
+	    && (GET_CODE (XEXP (addr, 1)) != PLUS
+		|| !CONST_INT_P (XEXP (XEXP (addr, 1), 1))))
+	  return 0;
+	return address_operand (addr, mode);
+      }
+    default :
+      return 0;
+    }
+
+}
+)
+
+;; Return true if OP is valid load with update operand.
+(define_predicate "load_update_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) != MEM
+      || GET_MODE (op) != mode)
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS
+      || GET_MODE (op) != Pmode
+      || !register_operand (XEXP (op, 0), Pmode)
+      || !nonmemory_operand (XEXP (op, 1), Pmode))
+    return 0;
+  return 1;
+
+}
+)
+
+;; Return true if OP is valid store with update operand.
+(define_predicate "store_update_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) != MEM
+      || GET_MODE (op) != mode)
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS
+      || GET_MODE (op) != Pmode
+      || !register_operand (XEXP (op, 0), Pmode)
+      || !(GET_CODE (XEXP (op, 1)) == CONST_INT
+	   && SMALL_INT (INTVAL (XEXP (op, 1)))))
+    return 0;
+  return 1;
+}
+)
+
+;; Return true if OP is a non-volatile non-immediate operand.
+;; Volatile memory refs require a special "cache-bypass" instruction
+;; and only the standard movXX patterns are set up to handle them.
+(define_predicate "nonvol_nonimm_operand"
+  (and (match_code "subreg, reg, mem")
+       (match_test "(GET_CODE (op) != MEM || !MEM_VOLATILE_P (op)) && nonimmediate_operand (op, mode)"))
+)
+
+;; Return 1 if OP is a comparison operator valid for the mode of CC.
+;; This allows the use of MATCH_OPERATOR to recognize all the branch insns.
+
+(define_predicate "proper_comparison_operator"
+  (match_code "eq, ne, le, lt, ge, gt, leu, ltu, geu, gtu, unordered, ordered, uneq, unge, ungt, unle, unlt, ltgt")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (!COMPARISON_P (op))
+    return 0;
+
+  /* After generic flag-setting insns, we can use eq / ne / pl / mi / pnz .
+     There are some creative uses for hi / ls after shifts, but these are
+     hard to understand for the compiler and could be at best the target of
+     a peephole.  */
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case CC_ZNmode:
+      return (code == EQ || code == NE || code == GE || code == LT
+	      || code == GT);
+    case CC_Zmode:
+      return code == EQ || code == NE;
+    case CC_Cmode:
+      return code == LTU || code == GEU;
+    case CC_FP_GTmode:
+      return code == GT || code == UNLE;
+    case CC_FP_GEmode:
+      return code == GE || code == UNLT;
+    case CC_FP_ORDmode:
+      return code == ORDERED || code == UNORDERED;
+    case CC_FP_UNEQmode:
+      return code == UNEQ || code == LTGT;
+    case CC_FPXmode:
+      return (code == EQ || code == NE || code == UNEQ || code == LTGT
+	      || code == ORDERED || code == UNORDERED);
+
+    case CCmode:
+    case SImode: /* Used for BRcc.  */
+      return 1;
+    /* From combiner.  */
+    case QImode: case HImode: case DImode: case SFmode: case DFmode:
+      return 0;
+    default:
+      gcc_unreachable ();
+  }
+})
+
+(define_predicate "equality_comparison_operator"
+  (match_code "eq, ne"))
+
+(define_predicate "brcc_nolimm_operator"
+  (ior (match_test "REG_P (XEXP (op, 1))")
+       (and (match_code "eq, ne, lt, ge, ltu, geu")
+	    (match_test "u6_immediate_operand (XEXP (op, 1), SImode)"))
+       (and (match_code "le, gt, leu, gtu")
+	    (match_test "UNSIGNED_INT6 (INTVAL (XEXP (op, 1)) + 1)"))))
+
+;; Return TRUE if this is the condition code register, if we aren't given
+;; a mode, accept any CCmode register
+(define_special_predicate "cc_register"
+  (match_code "reg")
+{
+  if (mode == VOIDmode)
+    {
+      mode = GET_MODE (op);
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+	return FALSE;
+    }
+
+  if (mode == GET_MODE (op) && GET_CODE (op) == REG && REGNO (op) == CC_REG)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return TRUE if this is the condition code register; if we aren't given
+;; a mode, accept any CCmode register.  If we are given a mode, accept
+;; modes that set a subset of flags.
+(define_special_predicate "cc_set_register"
+  (match_code "reg")
+{
+  enum machine_mode rmode = GET_MODE (op);
+
+  if (mode == VOIDmode)
+    {
+      mode = rmode;
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+	return FALSE;
+    }
+
+  if (REGNO (op) != 61)
+    return FALSE;
+  if (mode == rmode
+      || (mode == CC_ZNmode && rmode == CC_Zmode)
+      || (mode == CCmode && rmode == CC_Zmode)
+      || (mode == CCmode && rmode == CC_ZNmode)
+      || (mode == CCmode && rmode == CC_Cmode))
+    return TRUE;
+
+  return FALSE;
+})
+
+; Accept CC_REG in modes which provide the flags needed for MODE.  */
+(define_special_predicate "cc_use_register"
+  (match_code "reg")
+{
+  if (REGNO (op) != CC_REG)
+    return 0;
+  if (GET_MODE (op) == mode)
+    return 1;
+  switch (mode)
+    {
+    case CC_Zmode:
+      if (GET_MODE (op) == CC_ZNmode)
+	return 1;
+      /* Fall through.  */
+    case CC_ZNmode: case CC_Cmode:
+      return GET_MODE (op) == CCmode;
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_special_predicate "zn_compare_operator"
+  (match_code "compare")
+{
+  return GET_MODE (op) == CC_ZNmode || GET_MODE (op) == CC_Zmode;
+})
+
+;; Return true if OP is a shift operator.
+(define_predicate "shift_operator"
+  (match_code "ashiftrt, lshiftrt, ashift")
+)
+
+;; Return true if OP is a left shift operator that can be implemented in
+;; four insn words or less without a barrel shifter or multiplier.
+(define_predicate "shiftl4_operator"
+  (and (match_code "ashift")
+       (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ")
+       (match_test "UINTVAL (XEXP (op, 1)) <= 9U
+		    || INTVAL (XEXP (op, 1)) == 29
+		    || INTVAL (XEXP (op, 1)) == 30
+		    || INTVAL (XEXP (op, 1)) == 31")))
+
+;; Return true if OP is a right shift operator that can be implemented in
+;; four insn words or less without a barrel shifter or multiplier.
+(define_predicate "shiftr4_operator"
+  (and (match_code "ashiftrt, lshiftrt")
+       (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ")
+       (match_test "UINTVAL (XEXP (op, 1)) <= 4U
+		    || INTVAL (XEXP (op, 1)) == 30
+		    || INTVAL (XEXP (op, 1)) == 31")))
+
+;; Return true if OP is a shift operator that can be implemented in
+;; four insn words or less without a barrel shifter or multiplier.
+(define_predicate "shift4_operator"
+  (ior (match_operand 0 "shiftl4_operator")
+       (match_operand 0 "shiftr4_operator")))
+
+(define_predicate "mult_operator"
+    (and (match_code "mult") (match_test "TARGET_ARC700 && !TARGET_NOMPY_SET"))
+)
+
+(define_predicate "commutative_operator"
+  (ior (match_code "plus,ior,xor,and")
+       (match_operand 0 "mult_operator")
+       (and (match_code "ss_plus")
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "commutative_operator_sans_mult"
+  (ior (match_code "plus,ior,xor,and")
+       (and (match_code "ss_plus")
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "noncommutative_operator"
+  (ior (match_code "minus,ashift,ashiftrt,lshiftrt,rotatert")
+       (and (match_code "ss_minus")
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "unary_operator"
+  (ior (match_code "abs,neg,not,sign_extend,zero_extend")
+       (and (ior (match_code "ss_neg")
+		 (and (match_code "ss_truncate")
+		      (match_test "GET_MODE (XEXP (op, 0)) == HImode")))
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "_2_4_8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8"))
+)
+
+(define_predicate "arc_double_register_operand"
+  (match_code "reg")
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+
+  return (GET_CODE (op) == REG
+		   && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+			     || REGNO_REG_CLASS (REGNO (op)) == DOUBLE_REGS));
+})
+
+(define_predicate "shouldbe_register_operand"
+  (match_code "reg,subreg,mem")
+{
+  return ((reload_in_progress || reload_completed)
+	  ? general_operand : register_operand) (op, mode);
+})
+
+(define_predicate "vector_register_operand"
+  (match_code "reg")
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS));
+})
+
+(define_predicate "vector_register_or_memory_operand"
+  ( ior (match_code "reg")
+	(match_code "mem"))
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+
+  if ((GET_CODE (op) == MEM)
+      && (mode == V8HImode)
+      && GET_CODE (XEXP (op,0)) == REG)
+    return 1;
+
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS));
+})
+
+(define_predicate "arc_dpfp_operator"
+  (match_code "plus, mult,minus")
+)
+
+(define_predicate "arc_simd_dma_register_operand"
+  (match_code "reg")
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == SIMD_DMA_CONFIG_REGS));
+})
+
+(define_predicate "acc1_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 56 : 57)")))
+
+(define_predicate "acc2_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 57 : 56)")))
+
+(define_predicate "mlo_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 59 : 58)")))
+
+(define_predicate "mhi_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 58 : 59)")))
+
+; Unfortunately, we can not allow a const_int_operand before reload, because
+; reload needs a non-void mode to guide it how to reload the inside of a
+; {sign_}extend.
+(define_predicate "extend_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "immediate_operand")
+	    (ior (not (match_operand 0 "const_int_operand"))
+		 (match_test "reload_in_progress || reload_completed")))))
+
+(define_predicate "millicode_store_operation"
+  (match_code "parallel")
+{
+  return arc_check_millicode (op, 0, 0);
+})
+
+(define_predicate "millicode_load_operation"
+  (match_code "parallel")
+{
+  return arc_check_millicode (op, 2, 2);
+})
+
+(define_predicate "millicode_load_clob_operation"
+  (match_code "parallel")
+{
+  return arc_check_millicode (op, 0, 1);
+})
+
+(define_special_predicate "immediate_usidi_operand"
+  (if_then_else
+    (match_code "const_int")
+    (match_test "INTVAL (op) >= 0")
+    (and (match_test "const_double_operand (op, mode)")
+	 (match_test "CONST_DOUBLE_HIGH (op) == 0"))))
diff --git a/gcc-4.9/gcc/config/arc/simdext.md b/gcc-4.9/gcc/config/arc/simdext.md
new file mode 100644
index 000000000..13e268c11
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/simdext.md
@@ -0,0 +1,1292 @@
+;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [
+  ;; Va, Vb, Vc builtins
+  (UNSPEC_ARC_SIMD_VADDAW     1000)
+  (UNSPEC_ARC_SIMD_VADDW      1001)
+  (UNSPEC_ARC_SIMD_VAVB       1002)
+  (UNSPEC_ARC_SIMD_VAVRB      1003)
+  (UNSPEC_ARC_SIMD_VDIFAW     1004)
+  (UNSPEC_ARC_SIMD_VDIFW      1005)
+  (UNSPEC_ARC_SIMD_VMAXAW     1006)
+  (UNSPEC_ARC_SIMD_VMAXW      1007)
+  (UNSPEC_ARC_SIMD_VMINAW     1008)
+  (UNSPEC_ARC_SIMD_VMINW      1009)
+  (UNSPEC_ARC_SIMD_VMULAW     1010)
+  (UNSPEC_ARC_SIMD_VMULFAW    1011)
+  (UNSPEC_ARC_SIMD_VMULFW     1012)
+  (UNSPEC_ARC_SIMD_VMULW      1013)
+  (UNSPEC_ARC_SIMD_VSUBAW     1014)
+  (UNSPEC_ARC_SIMD_VSUBW      1015)
+  (UNSPEC_ARC_SIMD_VSUMMW     1016)
+  (UNSPEC_ARC_SIMD_VAND       1017)
+  (UNSPEC_ARC_SIMD_VANDAW     1018)
+  (UNSPEC_ARC_SIMD_VBIC       1019)
+  (UNSPEC_ARC_SIMD_VBICAW     1020)
+  (UNSPEC_ARC_SIMD_VOR        1021)
+  (UNSPEC_ARC_SIMD_VXOR       1022)
+  (UNSPEC_ARC_SIMD_VXORAW     1023)
+  (UNSPEC_ARC_SIMD_VEQW       1024)
+  (UNSPEC_ARC_SIMD_VLEW       1025)
+  (UNSPEC_ARC_SIMD_VLTW       1026)
+  (UNSPEC_ARC_SIMD_VNEW       1027)
+  (UNSPEC_ARC_SIMD_VMR1AW     1028)
+  (UNSPEC_ARC_SIMD_VMR1W      1029)
+  (UNSPEC_ARC_SIMD_VMR2AW     1030)
+  (UNSPEC_ARC_SIMD_VMR2W      1031)
+  (UNSPEC_ARC_SIMD_VMR3AW     1032)
+  (UNSPEC_ARC_SIMD_VMR3W      1033)
+  (UNSPEC_ARC_SIMD_VMR4AW     1034)
+  (UNSPEC_ARC_SIMD_VMR4W      1035)
+  (UNSPEC_ARC_SIMD_VMR5AW     1036)
+  (UNSPEC_ARC_SIMD_VMR5W      1037)
+  (UNSPEC_ARC_SIMD_VMR6AW     1038)
+  (UNSPEC_ARC_SIMD_VMR6W      1039)
+  (UNSPEC_ARC_SIMD_VMR7AW     1040)
+  (UNSPEC_ARC_SIMD_VMR7W      1041)
+  (UNSPEC_ARC_SIMD_VMRB       1042)
+  (UNSPEC_ARC_SIMD_VH264F     1043)
+  (UNSPEC_ARC_SIMD_VH264FT    1044)
+  (UNSPEC_ARC_SIMD_VH264FW    1045)
+  (UNSPEC_ARC_SIMD_VVC1F      1046)
+  (UNSPEC_ARC_SIMD_VVC1FT     1047)
+  ;; Va, Vb, rc/limm builtins
+  (UNSPEC_ARC_SIMD_VBADDW     1050)
+  (UNSPEC_ARC_SIMD_VBMAXW     1051)
+  (UNSPEC_ARC_SIMD_VBMINW     1052)
+  (UNSPEC_ARC_SIMD_VBMULAW    1053)
+  (UNSPEC_ARC_SIMD_VBMULFW    1054)
+  (UNSPEC_ARC_SIMD_VBMULW     1055)
+  (UNSPEC_ARC_SIMD_VBRSUBW    1056)
+  (UNSPEC_ARC_SIMD_VBSUBW     1057)
+
+  ;; Va, Vb, Ic builtins
+  (UNSPEC_ARC_SIMD_VASRW      1060)
+  (UNSPEC_ARC_SIMD_VSR8       1061)
+  (UNSPEC_ARC_SIMD_VSR8AW     1062)
+
+  ;; Va, Vb, Ic builtins
+  (UNSPEC_ARC_SIMD_VASRRWi    1065)
+  (UNSPEC_ARC_SIMD_VASRSRWi   1066)
+  (UNSPEC_ARC_SIMD_VASRWi     1067)
+  (UNSPEC_ARC_SIMD_VASRPWBi   1068)
+  (UNSPEC_ARC_SIMD_VASRRPWBi  1069)
+  (UNSPEC_ARC_SIMD_VSR8AWi    1070)
+  (UNSPEC_ARC_SIMD_VSR8i      1071)
+
+  ;; Va, Vb, u8 (simm) builtins
+  (UNSPEC_ARC_SIMD_VMVAW      1075)
+  (UNSPEC_ARC_SIMD_VMVW       1076)
+  (UNSPEC_ARC_SIMD_VMVZW      1077)
+  (UNSPEC_ARC_SIMD_VD6TAPF    1078)
+
+  ;; Va, rlimm, u8 (simm) builtins
+  (UNSPEC_ARC_SIMD_VMOVAW     1080)
+  (UNSPEC_ARC_SIMD_VMOVW      1081)
+  (UNSPEC_ARC_SIMD_VMOVZW     1082)
+
+  ;; Va, Vb builtins
+  (UNSPEC_ARC_SIMD_VABSAW     1085)
+  (UNSPEC_ARC_SIMD_VABSW      1086)
+  (UNSPEC_ARC_SIMD_VADDSUW    1087)
+  (UNSPEC_ARC_SIMD_VSIGNW     1088)
+  (UNSPEC_ARC_SIMD_VEXCH1     1089)
+  (UNSPEC_ARC_SIMD_VEXCH2     1090)
+  (UNSPEC_ARC_SIMD_VEXCH4     1091)
+  (UNSPEC_ARC_SIMD_VUPBAW     1092)
+  (UNSPEC_ARC_SIMD_VUPBW      1093)
+  (UNSPEC_ARC_SIMD_VUPSBAW    1094)
+  (UNSPEC_ARC_SIMD_VUPSBW     1095)
+
+  (UNSPEC_ARC_SIMD_VDIRUN     1100)
+  (UNSPEC_ARC_SIMD_VDORUN     1101)
+  (UNSPEC_ARC_SIMD_VDIWR      1102)
+  (UNSPEC_ARC_SIMD_VDOWR      1103)
+
+  (UNSPEC_ARC_SIMD_VREC      1105)
+  (UNSPEC_ARC_SIMD_VRUN      1106)
+  (UNSPEC_ARC_SIMD_VRECRUN   1107)
+  (UNSPEC_ARC_SIMD_VENDREC   1108)
+
+  (UNSPEC_ARC_SIMD_VCAST     1200)
+  (UNSPEC_ARC_SIMD_VINTI     1201)
+   ]
+)
+
+;; Scheduler descriptions for the simd instructions
+(define_insn_reservation "simd_lat_0_insn" 1
+  (eq_attr "type" "simd_dma, simd_vstore, simd_vcontrol")
+  "issue+simd_unit")
+
+(define_insn_reservation "simd_lat_1_insn" 2
+       (eq_attr "type" "simd_vcompare, simd_vlogic,
+                        simd_vmove_else_zero, simd_varith_1cycle")
+  "issue+simd_unit, nothing")
+
+(define_insn_reservation "simd_lat_2_insn" 3
+       (eq_attr "type" "simd_valign, simd_vpermute,
+                        simd_vpack, simd_varith_2cycle")
+  "issue+simd_unit, nothing*2")
+
+(define_insn_reservation "simd_lat_3_insn" 4
+       (eq_attr "type" "simd_valign_with_acc, simd_vpack_with_acc,
+                        simd_vlogic_with_acc, simd_vload128,
+                        simd_vmove_with_acc, simd_vspecial_3cycle,
+                        simd_varith_with_acc")
+  "issue+simd_unit, nothing*3")
+
+(define_insn_reservation "simd_lat_4_insn" 5
+       (eq_attr "type" "simd_vload, simd_vmove, simd_vspecial_4cycle")
+  "issue+simd_unit, nothing*4")
+
+(define_expand "movv8hi"
+  [(set (match_operand:V8HI 0 "general_operand" "")
+	(match_operand:V8HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM)
+    operands[1] = force_reg (V8HImode, operands[1]);
+}")
+
+;; This pattern should appear before the movv8hi_insn pattern
+(define_insn "vld128_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand" "=v")
+	(mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v")
+							  (parallel [(match_operand:SI 2 "immediate_operand" "L")])))
+			   (match_operand:SI 3 "immediate_operand" "P"))))]
+ "TARGET_SIMD_SET"
+ "vld128 %0, [i%2, %3]"
+ [(set_attr "type" "simd_vload128")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "vst128_insn"
+  [(set	(mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand"  "v")
+							  (parallel [(match_operand:SI 1 "immediate_operand" "L")])))
+			   (match_operand:SI 2 "immediate_operand" "P")))
+	(match_operand:V8HI 3 "vector_register_operand" "=v"))]
+ "TARGET_SIMD_SET"
+ "vst128 %3, [i%1, %2]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "vst64_insn"
+  [(set	(mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand"  "v")
+							  (parallel [(match_operand:SI 1 "immediate_operand" "L")])))
+			   (match_operand:SI 2 "immediate_operand" "P")))
+	(vec_select:V4HI (match_operand:V8HI 3 "vector_register_operand" "=v")
+			 (parallel [(const_int 0)])))]
+ "TARGET_SIMD_SET"
+ "vst64 %3, [i%1, %2]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "movv8hi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_or_memory_operand" "=v,m,v")
+	(match_operand:V8HI 1 "vector_register_or_memory_operand" "m,v,v"))]
+  "TARGET_SIMD_SET && !(GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM)"
+  "@
+    vld128r %0, %1
+    vst128r %1, %0
+    vmvzw %0,%1,0xffff"
+  [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero")
+   (set_attr "length" "8,8,4")
+   (set_attr "cond" "nocond, nocond, nocond")])
+
+(define_insn "movti_insn"
+  [(set (match_operand:TI 0 "vector_register_or_memory_operand" "=v,m,v")
+	(match_operand:TI 1 "vector_register_or_memory_operand" "m,v,v"))]
+  ""
+  "@
+    vld128r %0, %1
+    vst128r %1, %0
+    vmvzw %0,%1,0xffff"
+  [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero")
+   (set_attr "length" "8,8,4")
+   (set_attr "cond" "nocond, nocond, nocond")])
+
+;; (define_insn "*movv8hi_insn_rr"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand" "=v")
+;; 	(match_operand:V8HI 1 "vector_register_operand" "v"))]
+;;   ""
+;;   "mov reg,reg"
+;;   [(set_attr "length" "8")
+;;   (set_attr "type" "move")])
+
+;; (define_insn "*movv8_out"
+;;   [(set (match_operand:V8HI 0 "memory_operand" "=m")
+;; 	(match_operand:V8HI 1 "vector_register_operand" "v"))]
+;;   ""
+;;   "mov out"
+;;   [(set_attr "length" "8")
+;;   (set_attr "type" "move")])
+
+
+;; (define_insn "addv8hi3"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(plus:V8HI (match_operand:V8HI 1 "vector_register_operand"  "v")
+;; 		   (match_operand:V8HI 2 "vector_register_operand" "v")))]
+;;   "TARGET_SIMD_SET"
+;;   "vaddw %0, %1, %2"
+;;   [(set_attr "length" "8")
+;;    (set_attr "cond" "nocond")])
+
+;; (define_insn "vaddw_insn"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(unspec [(match_operand:V8HI 1 "vector_register_operand"  "v")
+;; 			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))]
+;;   "TARGET_SIMD_SET"
+;;   "vaddw %0, %1, %2"
+;;   [(set_attr "length" "8")
+;;    (set_attr "cond" "nocond")])
+
+;; V V V Insns
+(define_insn "vaddaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDAW))]
+  "TARGET_SIMD_SET"
+  "vaddaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vaddw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))]
+  "TARGET_SIMD_SET"
+  "vaddw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vavb_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVB))]
+  "TARGET_SIMD_SET"
+  "vavb %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vavrb_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVRB))]
+  "TARGET_SIMD_SET"
+  "vavrb %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdifaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFAW))]
+  "TARGET_SIMD_SET"
+  "vdifaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdifw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFW))]
+  "TARGET_SIMD_SET"
+  "vdifw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmaxaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXAW))]
+  "TARGET_SIMD_SET"
+  "vmaxaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmaxw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXW))]
+  "TARGET_SIMD_SET"
+  "vmaxw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vminaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINAW))]
+  "TARGET_SIMD_SET"
+  "vminaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vminw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINW))]
+  "TARGET_SIMD_SET"
+  "vminw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULAW))]
+  "TARGET_SIMD_SET"
+  "vmulaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulfaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFAW))]
+  "TARGET_SIMD_SET"
+  "vmulfaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulfw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFW))]
+  "TARGET_SIMD_SET"
+  "vmulfw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULW))]
+  "TARGET_SIMD_SET"
+  "vmulw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsubaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBAW))]
+  "TARGET_SIMD_SET"
+  "vsubaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsubw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBW))]
+  "TARGET_SIMD_SET"
+  "vsubw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsummw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUMMW))]
+  "TARGET_SIMD_SET"
+  "vsummw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vand_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAND))]
+  "TARGET_SIMD_SET"
+  "vand %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vandaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VANDAW))]
+  "TARGET_SIMD_SET"
+  "vandaw %0, %1, %2"
+  [(set_attr "type" "simd_vlogic_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbic_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBIC))]
+  "TARGET_SIMD_SET"
+  "vbic %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbicaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBICAW))]
+  "TARGET_SIMD_SET"
+  "vbicaw %0, %1, %2"
+  [(set_attr "type" "simd_vlogic_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vor_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VOR))]
+  "TARGET_SIMD_SET"
+  "vor %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vxor_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXOR))]
+  "TARGET_SIMD_SET"
+  "vxor %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vxoraw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXORAW))]
+  "TARGET_SIMD_SET"
+  "vxoraw %0, %1, %2"
+  [(set_attr "type" "simd_vlogic_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "veqw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEQW))]
+  "TARGET_SIMD_SET"
+  "veqw %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vlew_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLEW))]
+  "TARGET_SIMD_SET"
+  "vlew %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vltw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLTW))]
+  "TARGET_SIMD_SET"
+  "vltw %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vnew_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VNEW))]
+  "TARGET_SIMD_SET"
+  "vnew %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr1aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1AW))]
+  "TARGET_SIMD_SET"
+  "vmr1aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr1w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1W))]
+  "TARGET_SIMD_SET"
+  "vmr1w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr2aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2AW))]
+  "TARGET_SIMD_SET"
+  "vmr2aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr2w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2W))]
+  "TARGET_SIMD_SET"
+  "vmr2w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr3aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3AW))]
+  "TARGET_SIMD_SET"
+  "vmr3aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr3w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3W))]
+  "TARGET_SIMD_SET"
+  "vmr3w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr4aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4AW))]
+  "TARGET_SIMD_SET"
+  "vmr4aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr4w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4W))]
+  "TARGET_SIMD_SET"
+  "vmr4w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr5aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5AW))]
+  "TARGET_SIMD_SET"
+  "vmr5aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr5w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5W))]
+  "TARGET_SIMD_SET"
+  "vmr5w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr6aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6AW))]
+  "TARGET_SIMD_SET"
+  "vmr6aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr6w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6W))]
+  "TARGET_SIMD_SET"
+  "vmr6w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr7aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7AW))]
+  "TARGET_SIMD_SET"
+  "vmr7aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr7w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7W))]
+  "TARGET_SIMD_SET"
+  "vmr7w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmrb_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMRB))]
+  "TARGET_SIMD_SET"
+  "vmrb %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vh264f_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264F))]
+  "TARGET_SIMD_SET"
+  "vh264f %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vh264ft_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FT))]
+  "TARGET_SIMD_SET"
+  "vh264ft %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vh264fw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FW))]
+  "TARGET_SIMD_SET"
+  "vh264fw %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vvc1f_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1F))]
+  "TARGET_SIMD_SET"
+  "vvc1f %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vvc1ft_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1FT))]
+  "TARGET_SIMD_SET"
+  "vvc1ft %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+
+
+;;---
+;; V V r/limm Insns
+
+;; (define_insn "vbaddw_insn"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+;; 			      (match_operand:SI 2 "nonmemory_operand" "rCal")] UNSPEC_ARC_SIMD_VBADDW))]
+;;   "TARGET_SIMD_SET"
+;;   "vbaddw %0, %1, %2"
+;;   [(set_attr "length" "4")
+;;    (set_attr "cond" "nocond")])
+
+(define_insn "vbaddw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBADDW))]
+  "TARGET_SIMD_SET"
+  "vbaddw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmaxw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMAXW))]
+  "TARGET_SIMD_SET"
+  "vbmaxw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbminw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMINW))]
+  "TARGET_SIMD_SET"
+  "vbminw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmulaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULAW))]
+  "TARGET_SIMD_SET"
+  "vbmulaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmulfw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULFW))]
+  "TARGET_SIMD_SET"
+  "vbmulfw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmulw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULW))]
+  "TARGET_SIMD_SET"
+  "vbmulw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbrsubw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		     (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBRSUBW))]
+  "TARGET_SIMD_SET"
+  "vbrsubw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbsubw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBSUBW))]
+  "TARGET_SIMD_SET"
+  "vbsubw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+; Va, Vb, Ic instructions
+
+; Va, Vb, u6 instructions
+(define_insn "vasrrwi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRWi))]
+  "TARGET_SIMD_SET"
+  "vasrrwi %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrsrwi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		     (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRSRWi))]
+  "TARGET_SIMD_SET"
+  "vasrsrwi %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrwi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRWi))]
+  "TARGET_SIMD_SET"
+  "vasrwi %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrpwbi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRPWBi))]
+  "TARGET_SIMD_SET"
+  "vasrpwbi %0, %1, %2"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrrpwbi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRPWBi))]
+  "TARGET_SIMD_SET"
+  "vasrrpwbi %0, %1, %2"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsr8awi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8AWi))]
+  "TARGET_SIMD_SET"
+  "vsr8awi %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsr8i_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8i))]
+  "TARGET_SIMD_SET"
+  "vsr8i %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, Vb, u8 (simm) insns
+
+(define_insn "vmvaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVAW))]
+  "TARGET_SIMD_SET"
+  "vmvaw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmvw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVW))]
+  "TARGET_SIMD_SET"
+  "vmvw %0, %1, %2"
+  [(set_attr "type" "simd_vmove")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmvzw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVZW))]
+  "TARGET_SIMD_SET"
+  "vmvzw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_else_zero")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vd6tapf_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VD6TAPF))]
+  "TARGET_SIMD_SET"
+  "vd6tapf %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_4cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, rlimm, u8 (simm) insns
+(define_insn "vmovaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand"  "r")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVAW))]
+  "TARGET_SIMD_SET"
+  "vmovaw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmovw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand"  "r")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVW))]
+  "TARGET_SIMD_SET"
+  "vmovw %0, %1, %2"
+  [(set_attr "type" "simd_vmove")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmovzw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand"  "r")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVZW))]
+  "TARGET_SIMD_SET"
+  "vmovzw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_else_zero")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, rlimm, Ic insns
+(define_insn "vsr8_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "K")
+		      (match_operand:V8HI 3 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VSR8))]
+  "TARGET_SIMD_SET"
+  "vsr8 %0, %1, i%2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "K")
+		      (match_operand:V8HI 3 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VASRW))]
+  "TARGET_SIMD_SET"
+  "vasrw %0, %1, i%2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsr8aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "K")
+		      (match_operand:V8HI 3 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VSR8AW))]
+  "TARGET_SIMD_SET"
+  "vsr8aw %0, %1, i%2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, Vb insns
+(define_insn "vabsaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VABSAW))]
+  "TARGET_SIMD_SET"
+  "vabsaw %0, %1"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vabsw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VABSW))]
+  "TARGET_SIMD_SET"
+  "vabsw %0, %1"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vaddsuw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VADDSUW))]
+  "TARGET_SIMD_SET"
+  "vaddsuw %0, %1"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsignw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VSIGNW))]
+  "TARGET_SIMD_SET"
+  "vsignw %0, %1"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vexch1_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VEXCH1))]
+  "TARGET_SIMD_SET"
+  "vexch1 %0, %1"
+  [(set_attr "type" "simd_vpermute")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vexch2_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VEXCH2))]
+  "TARGET_SIMD_SET"
+  "vexch2 %0, %1"
+  [(set_attr "type" "simd_vpermute")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vexch4_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VEXCH4))]
+  "TARGET_SIMD_SET"
+  "vexch4 %0, %1"
+  [(set_attr "type" "simd_vpermute")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupbaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPBAW))]
+  "TARGET_SIMD_SET"
+  "vupbaw %0, %1"
+  [(set_attr "type" "simd_vpack_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupbw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPBW))]
+  "TARGET_SIMD_SET"
+  "vupbw %0, %1"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupsbaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPSBAW))]
+  "TARGET_SIMD_SET"
+  "vupsbaw %0, %1"
+  [(set_attr "type" "simd_vpack_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupsbw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPSBW))]
+  "TARGET_SIMD_SET"
+  "vupsbw %0, %1"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+; DMA setup instructions
+(define_insn "vdirun_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"           "=d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"  "r")
+			     (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VDIRUN))]
+  "TARGET_SIMD_SET"
+  "vdirun %1, %2"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdorun_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"              "=d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"     "r")
+			     (match_operand:SI 2 "nonmemory_operand"     "r")] UNSPEC_ARC_SIMD_VDORUN))]
+  "TARGET_SIMD_SET"
+  "vdorun %1, %2"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdiwr_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"           "=d,d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"  "r,Cal")] UNSPEC_ARC_SIMD_VDIWR))]
+  "TARGET_SIMD_SET"
+  "vdiwr %0, %1"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4,8")
+   (set_attr "cond" "nocond,nocond")])
+
+(define_insn "vdowr_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"           "=d,d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"  "r,Cal")] UNSPEC_ARC_SIMD_VDOWR))]
+  "TARGET_SIMD_SET"
+  "vdowr %0, %1"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4,8")
+   (set_attr "cond" "nocond,nocond")])
+
+;; vector record and run instructions
+(define_insn "vrec_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VREC)]
+  "TARGET_SIMD_SET"
+  "vrec %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vrun_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VRUN)]
+  "TARGET_SIMD_SET"
+  "vrun %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vrecrun_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VRECRUN)]
+  "TARGET_SIMD_SET"
+  "vrecrun %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vendrec_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VENDREC)]
+  "TARGET_SIMD_SET"
+  "vendrec %S0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld32wh_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+							      (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v")
+											      (parallel [(match_operand:SI 3 "immediate_operand" "L")]))))))
+			 (vec_select:V4HI (match_dup 0)
+					  (parallel [(const_int 0)]))))]
+  "TARGET_SIMD_SET"
+  "vld32wh %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld32wl_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (vec_select:V4HI (match_dup 0)
+					  (parallel [(const_int 1)]))
+			 (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+							      (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v")
+											      (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))]
+  "TARGET_SIMD_SET"
+  "vld32wl %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld64w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand" "=v")
+	(zero_extend:V8HI (mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v")
+									    (parallel [(match_operand:SI 2 "immediate_operand" "L")])))
+					     (match_operand:SI 3 "immediate_operand" "P")))))]
+ "TARGET_SIMD_SET"
+ "vld64w %0, [i%2, %3]"
+ [(set_attr "type" "simd_vload")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "vld64_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (vec_select:V4HI (match_dup 0)
+					  (parallel [(const_int 1)]))
+			 (mem:V4HI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+					    (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v")
+									    (parallel [(match_operand:SI 3 "immediate_operand" "L")]))))) ))]
+  "TARGET_SIMD_SET"
+  "vld64 %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld32_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (vec_select:V4HI (match_dup 0)
+					  (parallel [(const_int 1)]))
+			 (vec_concat:V4HI  (vec_select:V2HI (match_dup 0)
+							    (parallel [(const_int 1)]))
+					   (mem:V2HI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+							      (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v")
+											      (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))]
+  "TARGET_SIMD_SET"
+  "vld32 %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vst16_n_insn"
+  [(set  (mem:HI (plus:SI (match_operand:SI 0 "immediate_operand" "P")
+			  (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v")
+							  (parallel [(match_operand:SI 2 "immediate_operand" "L")])))))
+	 (vec_select:HI (match_operand:V8HI 3 "vector_register_operand" "v")
+			(parallel [(match_operand:SI 4 "immediate_operand" "L")])))]
+ "TARGET_SIMD_SET"
+ "vst16_%4 %3,[i%2, %0]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")])
+
+(define_insn "vst32_n_insn"
+  [(set  (mem:SI (plus:SI (match_operand:SI 0 "immediate_operand" "P")
+			  (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v")
+							  (parallel [(match_operand:SI 2 "immediate_operand" "L")])))))
+	 (vec_select:SI (unspec:V4SI [(match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VCAST)
+			(parallel [(match_operand:SI 4 "immediate_operand" "L")])))]
+ "TARGET_SIMD_SET"
+ "vst32_%4 %3,[i%2, %0]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")])
+
+;; SIMD unit interrupt
+(define_insn "vinti_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "L")] UNSPEC_ARC_SIMD_VINTI)]
+  "TARGET_SIMD_SET"
+  "vinti %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
diff --git a/gcc-4.9/gcc/config/arc/t-arc-newlib b/gcc-4.9/gcc/config/arc/t-arc-newlib
new file mode 100644
index 000000000..5c1cb26b6
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/t-arc-newlib
@@ -0,0 +1,38 @@
+# GCC Makefile fragment for Synopsys DesignWare ARC with newlib.
+
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 3, or (at your option) any later version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+
+# You should have received a copy of the GNU General Public License along
+# with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Selecting -mA5 uses the same functional multilib files/libraries
+# as get used for -mARC600 aka -mA6.
+MULTILIB_OPTIONS=mcpu=ARC600/mcpu=ARC601 mmul64/mmul32x16 mnorm
+MULTILIB_DIRNAMES=arc600 arc601 mul64 mul32x16 norm
+#
+# Aliases:
+MULTILIB_MATCHES  = mcpu?ARC600=mcpu?arc600
+MULTILIB_MATCHES += mcpu?ARC600=mARC600
+MULTILIB_MATCHES += mcpu?ARC600=mA6
+MULTILIB_MATCHES += mcpu?ARC600=mA5
+MULTILIB_MATCHES += mcpu?ARC600=mno-mpy
+MULTILIB_MATCHES += mcpu?ARC601=mcpu?arc601
+MULTILIB_MATCHES += EL=mlittle-endian
+MULTILIB_MATCHES += EB=mbig-endian
+#
+# These don't make sense for the ARC700 default target:
+MULTILIB_EXCEPTIONS=mmul64* mmul32x16* mnorm*
+# And neither of the -mmul* options make sense without -mnorm:
+MULTILIB_EXCLUSIONS=mARC600/mmul64/!mnorm mcpu=ARC601/mmul64/!mnorm mARC600/mmul32x16/!mnorm
diff --git a/gcc-4.9/gcc/config/arc/t-arc-uClibc b/gcc-4.9/gcc/config/arc/t-arc-uClibc
new file mode 100644
index 000000000..704a3aa67
--- /dev/null
+++ b/gcc-4.9/gcc/config/arc/t-arc-uClibc
@@ -0,0 +1,20 @@
+# GCC Makefile fragment for Synopsys DesignWare ARC with uClibc
+
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 3, or (at your option) any later version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+
+# You should have received a copy of the GNU General Public License along
+# with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_EXTRA_OPTS = mno-sdata
diff --git a/gcc-4.9/gcc/config/arm/README-interworking b/gcc-4.9/gcc/config/arm/README-interworking
new file mode 100644
index 000000000..3e36f12a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/README-interworking
@@ -0,0 +1,749 @@
+		Arm / Thumb Interworking
+		========================
+
+The Cygnus GNU Pro Toolkit for the ARM7T processor supports function
+calls between code compiled for the ARM instruction set and code
+compiled for the Thumb instruction set and vice versa.  This document
+describes how that interworking support operates and explains the
+command line switches that should be used in order to produce working
+programs.
+
+Note:  The Cygnus GNU Pro Toolkit does not support switching between
+compiling for the ARM instruction set and the Thumb instruction set
+on anything other than a per file basis.  There are in fact two
+completely separate compilers, one that produces ARM assembler
+instructions and one that produces Thumb assembler instructions.  The
+two compilers share the same assembler, linker and so on.
+
+
+1. Explicit interworking support for C and C++ files
+====================================================
+
+By default if a file is compiled without any special command line
+switches then the code produced will not support interworking.
+Provided that a program is made up entirely from object files and
+libraries produced in this way and which contain either exclusively
+ARM instructions or exclusively Thumb instructions then this will not
+matter and a working executable will be created.  If an attempt is
+made to link together mixed ARM and Thumb object files and libraries,
+then warning messages will be produced by the linker and a non-working
+executable will be created.
+
+In order to produce code which does support interworking it should be
+compiled with the
+
+	-mthumb-interwork
+
+command line option.  Provided that a program is made up entirely from
+object files and libraries built with this command line switch a
+working executable will be produced, even if both ARM and Thumb
+instructions are used by the various components of the program.  (No
+warning messages will be produced by the linker either).
+
+Note that specifying -mthumb-interwork does result in slightly larger,
+slower code being produced.  This is why interworking support must be
+specifically enabled by a switch.
+
+
+2. Explicit interworking support for assembler files
+====================================================
+
+If assembler files are to be included into an interworking program
+then the following rules must be obeyed:
+
+	* Any externally visible functions must return by using the BX
+	instruction.
+
+	* Normal function calls can just use the BL instruction.  The
+	linker will automatically insert code to switch between ARM
+	and Thumb modes as necessary.
+
+	* Calls via function pointers should use the BX instruction if
+	the call is made in ARM mode:
+
+		.code 32
+		mov lr, pc
+		bx  rX
+
+	This code sequence will not work in Thumb mode however, since
+	the mov instruction will not set the bottom bit of the lr
+	register.  Instead a branch-and-link to the _call_via_rX
+	functions should be used instead:
+
+		.code 16
+		bl  _call_via_rX
+
+	where rX is replaced by the name of the register containing
+	the function address.
+
+	* All externally visible functions which should be entered in
+	Thumb mode must have the .thumb_func pseudo op specified just
+	before their entry point.  e.g.:
+
+			.code 16
+			.global function
+			.thumb_func
+		function:
+			...start of function....
+
+	* All assembler files must be assembled with the switch
+	-mthumb-interwork specified on the command line.  (If the file
+	is assembled by calling gcc it will automatically pass on the
+	-mthumb-interwork switch to the assembler, provided that it
+	was specified on the gcc command line in the first place.) 
+
+
+3. Support for old, non-interworking aware code.
+================================================
+
+If it is necessary to link together code produced by an older,
+non-interworking aware compiler, or code produced by the new compiler
+but without the -mthumb-interwork command line switch specified, then
+there are two command line switches that can be used to support this.
+
+The switch
+
+	-mcaller-super-interworking
+
+will allow calls via function pointers in Thumb mode to work,
+regardless of whether the function pointer points to old,
+non-interworking aware code or not.  Specifying this switch does
+produce slightly slower code however.
+
+Note:  There is no switch to allow calls via function pointers in ARM
+mode to be handled specially.  Calls via function pointers from
+interworking aware ARM code to non-interworking aware ARM code work
+without any special considerations by the compiler.  Calls via
+function pointers from interworking aware ARM code to non-interworking
+aware Thumb code however will not work.  (Actually under some
+circumstances they may work, but there are no guarantees).  This is
+because only the new compiler is able to produce Thumb code, and this
+compiler already has a command line switch to produce interworking
+aware code.
+
+
+The switch
+
+	-mcallee-super-interworking
+
+will allow non-interworking aware ARM or Thumb code to call Thumb
+functions, either directly or via function pointers.  Specifying this
+switch does produce slightly larger, slower code however.
+
+Note:  There is no switch to allow non-interworking aware ARM or Thumb
+code to call ARM functions.  There is no need for any special handling
+of calls from non-interworking aware ARM code to interworking aware
+ARM functions, they just work normally.  Calls from non-interworking
+aware Thumb functions to ARM code however, will not work.  There is no
+option to support this, since it is always possible to recompile the
+Thumb code to be interworking aware.
+
+As an alternative to the command line switch
+-mcallee-super-interworking, which affects all externally visible
+functions in a file, it is possible to specify an attribute or
+declspec for individual functions, indicating that that particular
+function should support being called by non-interworking aware code.
+The function should be defined like this:
+
+	int __attribute__((interfacearm)) function 
+	{
+		... body of function ...
+	}
+
+or
+
+	int __declspec(interfacearm) function
+	{
+		... body of function ...
+	}
+
+
+
+4. Interworking support in dlltool
+==================================
+
+It is possible to create DLLs containing mixed ARM and Thumb code.  It
+is also possible to call Thumb code in a DLL from an ARM program and
+vice versa.  It is even possible to call ARM DLLs that have been compiled
+without interworking support (say by an older version of the compiler),
+from Thumb programs and still have things work properly.
+
+   A version of the `dlltool' program which supports the `--interwork'
+command line switch is needed, as well as the following special
+considerations when building programs and DLLs:
+
+*Use `-mthumb-interwork'*
+     When compiling files for a DLL or a program the `-mthumb-interwork'
+     command line switch should be specified if calling between ARM and
+     Thumb code can happen.  If a program is being compiled and the
+     mode of the DLLs that it uses is not known, then it should be
+     assumed that interworking might occur and the switch used.
+
+*Use `-m thumb'*
+     If the exported functions from a DLL are all Thumb encoded then the
+     `-m thumb' command line switch should be given to dlltool when
+     building the stubs.  This will make dlltool create Thumb encoded
+     stubs, rather than its default of ARM encoded stubs.
+
+     If the DLL consists of both exported Thumb functions and exported
+     ARM functions then the `-m thumb' switch should not be used.
+     Instead the Thumb functions in the DLL should be compiled with the
+     `-mcallee-super-interworking' switch, or with the `interfacearm'
+     attribute specified on their prototypes.  In this way they will be
+     given ARM encoded prologues, which will work with the ARM encoded
+     stubs produced by dlltool.
+
+*Use `-mcaller-super-interworking'*
+     If it is possible for Thumb functions in a DLL to call
+     non-interworking aware code via a function pointer, then the Thumb
+     code must be compiled with the `-mcaller-super-interworking'
+     command line switch.  This will force the function pointer calls
+     to use the _interwork_call_via_rX stub functions which will
+     correctly restore Thumb mode upon return from the called function.
+
+*Link with `libgcc.a'*
+     When the dll is built it may have to be linked with the GCC
+     library (`libgcc.a') in order to extract the _call_via_rX functions
+     or the _interwork_call_via_rX functions.  This represents a partial
+     redundancy since the same functions *may* be present in the
+     application itself, but since they only take up 372 bytes this
+     should not be too much of a consideration.
+
+*Use `--support-old-code'*
+     When linking a program with an old DLL which does not support
+     interworking, the `--support-old-code' command line switch to the
+     linker should be used.   This causes the linker to generate special
+     interworking stubs which can cope with old, non-interworking aware
+     ARM code, at the cost of generating bulkier code.  The linker will
+     still generate a warning message along the lines of:
+       "Warning: input file XXX does not support interworking, whereas YYY does."
+     but this can now be ignored because the --support-old-code switch
+     has been used.
+
+
+
+5. How interworking support works
+=================================
+
+Switching between the ARM and Thumb instruction sets is accomplished
+via the BX instruction which takes as an argument a register name.
+Control is transferred to the address held in this register (with the
+bottom bit masked out), and if the bottom bit is set, then Thumb
+instruction processing is enabled, otherwise ARM instruction
+processing is enabled.
+
+When the -mthumb-interwork command line switch is specified, gcc
+arranges for all functions to return to their caller by using the BX
+instruction.  Thus provided that the return address has the bottom bit
+correctly initialized to indicate the instruction set of the caller,
+correct operation will ensue.
+
+When a function is called explicitly (rather than via a function
+pointer), the compiler generates a BL instruction to do this.  The
+Thumb version of the BL instruction has the special property of
+setting the bottom bit of the LR register after it has stored the
+return address into it, so that a future BX instruction will correctly
+return the instruction after the BL instruction, in Thumb mode.
+
+The BL instruction does not change modes itself however, so if an ARM
+function is calling a Thumb function, or vice versa, it is necessary
+to generate some extra instructions to handle this.  This is done in
+the linker when it is storing the address of the referenced function
+into the BL instruction.  If the BL instruction is an ARM style BL
+instruction, but the referenced function is a Thumb function, then the
+linker automatically generates a calling stub that converts from ARM
+mode to Thumb mode, puts the address of this stub into the BL
+instruction, and puts the address of the referenced function into the
+stub.  Similarly if the BL instruction is a Thumb BL instruction, and
+the referenced function is an ARM function, the linker generates a
+stub which converts from Thumb to ARM mode, puts the address of this
+stub into the BL instruction, and the address of the referenced
+function into the stub.
+
+This is why it is necessary to mark Thumb functions with the
+.thumb_func pseudo op when creating assembler files.  This pseudo op
+allows the assembler to distinguish between ARM functions and Thumb
+functions.  (The Thumb version of GCC automatically generates these
+pseudo ops for any Thumb functions that it generates).
+
+Calls via function pointers work differently.  Whenever the address of
+a function is taken, the linker examines the type of the function
+being referenced.  If the function is a Thumb function, then it sets
+the bottom bit of the address.  Technically this makes the address
+incorrect, since it is now one byte into the start of the function,
+but this is never a problem because:
+
+	a. with interworking enabled all calls via function pointer
+	   are done using the BX instruction and this ignores the
+	   bottom bit when computing where to go to.
+
+	b. the linker will always set the bottom bit when the address
+	   of the function is taken, so it is never possible to take
+	   the address of the function in two different places and
+	   then compare them and find that they are not equal.
+
+As already mentioned any call via a function pointer will use the BX
+instruction (provided that interworking is enabled).  The only problem
+with this is computing the return address for the return from the
+called function.  For ARM code this can easily be done by the code
+sequence:
+
+	mov	lr, pc
+	bx	rX
+
+(where rX is the name of the register containing the function
+pointer).  This code does not work for the Thumb instruction set,
+since the MOV instruction will not set the bottom bit of the LR
+register, so that when the called function returns, it will return in
+ARM mode not Thumb mode.  Instead the compiler generates this
+sequence:
+
+	bl	_call_via_rX
+
+(again where rX is the name if the register containing the function
+pointer).  The special call_via_rX functions look like this:
+
+	.thumb_func
+_call_via_r0:
+	bx	r0
+	nop
+
+The BL instruction ensures that the correct return address is stored
+in the LR register and then the BX instruction jumps to the address
+stored in the function pointer, switch modes if necessary.
+
+
+6. How caller-super-interworking support works
+==============================================
+
+When the -mcaller-super-interworking command line switch is specified
+it changes the code produced by the Thumb compiler so that all calls
+via function pointers (including virtual function calls) now go via a
+different stub function.  The code to call via a function pointer now
+looks like this:
+
+	bl _interwork_call_via_r0
+
+Note: The compiler does not insist that r0 be used to hold the
+function address.  Any register will do, and there are a suite of stub
+functions, one for each possible register.  The stub functions look
+like this:
+
+	.code 16
+	.thumb_func
+_interwork_call_via_r0
+	bx 	pc
+	nop
+	
+	.code 32
+	tst	r0, #1
+	stmeqdb	r13!, {lr}
+	adreq	lr, _arm_return
+	bx	r0
+
+The stub first switches to ARM mode, since it is a lot easier to
+perform the necessary operations using ARM instructions.  It then
+tests the bottom bit of the register containing the address of the
+function to be called.  If this bottom bit is set then the function
+being called uses Thumb instructions and the BX instruction to come
+will switch back into Thumb mode before calling this function.  (Note
+that it does not matter how this called function chooses to return to
+its caller, since the both the caller and callee are Thumb functions,
+and mode switching is necessary).  If the function being called is an
+ARM mode function however, the stub pushes the return address (with
+its bottom bit set) onto the stack, replaces the return address with
+the address of the a piece of code called '_arm_return' and then
+performs a BX instruction to call the function.
+
+The '_arm_return' code looks like this:
+
+	.code 32
+_arm_return:		
+	ldmia 	r13!, {r12}
+	bx 	r12
+	.code 16
+
+
+It simply retrieves the return address from the stack, and then
+performs a BX operation to return to the caller and switch back into
+Thumb mode.
+
+
+7. How callee-super-interworking support works
+==============================================
+
+When -mcallee-super-interworking is specified on the command line the
+Thumb compiler behaves as if every externally visible function that it
+compiles has had the (interfacearm) attribute specified for it.  What
+this attribute does is to put a special, ARM mode header onto the
+function which forces a switch into Thumb mode:
+
+  without __attribute__((interfacearm)):
+
+		.code 16
+		.thumb_func
+	function:
+		... start of function ...
+
+  with __attribute__((interfacearm)):
+
+		.code 32
+	function:
+		orr	r12, pc, #1
+		bx	r12
+
+		.code 16
+                .thumb_func
+        .real_start_of_function:
+
+		... start of function ...
+
+Note that since the function now expects to be entered in ARM mode, it
+no longer has the .thumb_func pseudo op specified for its name.
+Instead the pseudo op is attached to a new label .real_start_of_<name>
+(where <name> is the name of the function) which indicates the start
+of the Thumb code.  This does have the interesting side effect in that
+if this function is now called from a Thumb mode piece of code
+outside of the current file, the linker will generate a calling stub
+to switch from Thumb mode into ARM mode, and then this is immediately
+overridden by the function's header which switches back into Thumb
+mode. 
+
+In addition the (interfacearm) attribute also forces the function to
+return by using the BX instruction, even if has not been compiled with
+the -mthumb-interwork command line flag, so that the correct mode will
+be restored upon exit from the function.
+
+
+8. Some examples
+================
+
+   Given these two test files:
+
+             int arm (void) { return 1 + thumb (); }
+
+             int thumb (void) { return 2 + arm (); }
+
+   The following pieces of assembler are produced by the ARM and Thumb
+version of GCC depending upon the command line options used:
+
+   `-O2':
+             .code 32                               .code 16
+             .global _arm                           .global _thumb
+                                                    .thumb_func
+     _arm:                                    _thumb:
+             mov     ip, sp
+             stmfd   sp!, {fp, ip, lr, pc}          push    {lr}
+             sub     fp, ip, #4
+             bl      _thumb                          bl      _arm
+             add     r0, r0, #1                      add     r0, r0, #2
+             ldmea   fp, {fp, sp, pc}                pop     {pc}
+
+   Note how the functions return without using the BX instruction.  If
+these files were assembled and linked together they would fail to work
+because they do not change mode when returning to their caller.
+
+   `-O2 -mthumb-interwork':
+
+             .code 32                               .code 16
+             .global _arm                           .global _thumb
+                                                    .thumb_func
+     _arm:                                    _thumb:
+             mov     ip, sp
+             stmfd   sp!, {fp, ip, lr, pc}          push    {lr}
+             sub     fp, ip, #4
+             bl      _thumb                         bl       _arm
+             add     r0, r0, #1                     add      r0, r0, #2
+             ldmea   fp, {fp, sp, lr}               pop      {r1}
+             bx      lr                             bx       r1
+
+   Now the functions use BX to return their caller.  They have grown by
+4 and 2 bytes respectively, but they can now successfully be linked
+together and be expect to work.  The linker will replace the
+destinations of the two BL instructions with the addresses of calling
+stubs which convert to the correct mode before jumping to the called
+function.
+
+   `-O2 -mcallee-super-interworking':
+
+             .code 32                               .code 32
+             .global _arm                           .global _thumb
+     _arm:                                    _thumb:
+                                                    orr      r12, pc, #1
+                                                    bx       r12
+             mov     ip, sp                         .code 16
+             stmfd   sp!, {fp, ip, lr, pc}          push     {lr}
+             sub     fp, ip, #4
+             bl      _thumb                         bl       _arm
+             add     r0, r0, #1                     add      r0, r0, #2
+             ldmea   fp, {fp, sp, lr}               pop      {r1}
+             bx      lr                             bx       r1
+
+   The thumb function now has an ARM encoded prologue, and it no longer
+has the `.thumb-func' pseudo op attached to it.  The linker will not
+generate a calling stub for the call from arm() to thumb(), but it will
+still have to generate a stub for the call from thumb() to arm().  Also
+note how specifying `--mcallee-super-interworking' automatically
+implies `-mthumb-interworking'.
+
+
+9. Some Function Pointer Examples
+=================================
+
+   Given this test file:
+
+     	int func (void) { return 1; }
+     
+     	int call (int (* ptr)(void)) { return ptr (); }
+
+   The following varying pieces of assembler are produced by the Thumb
+version of GCC depending upon the command line options used:
+
+   `-O2':
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.thumb_func
+     	_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{pc}
+
+   Note how the two functions have different exit sequences.  In
+particular call() uses pop {pc} to return, which would not work if the
+caller was in ARM mode.  func() however, uses the BX instruction, even
+though `-mthumb-interwork' has not been specified, as this is the most
+efficient way to exit a function when the return address is held in the
+link register.
+
+   `-O2 -mthumb-interwork':
+
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.thumb_func
+     	_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{r1}
+     		bx	r1
+
+   This time both functions return by using the BX instruction.  This
+means that call() is now two bytes longer and several cycles slower
+than the previous version.
+
+   `-O2 -mcaller-super-interworking':
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.thumb_func
+     	_call:
+     		push	{lr}
+     		bl	__interwork_call_via_r0
+     		pop	{pc}
+
+   Very similar to the first (non-interworking) version, except that a
+different stub is used to call via the function pointer.  This new stub
+will work even if the called function is not interworking aware, and
+tries to return to call() in ARM mode.  Note that the assembly code for
+call() is still not interworking aware itself, and so should not be
+called from ARM code.
+
+   `-O2 -mcallee-super-interworking':
+
+     		.code	32
+     		.globl	_func
+     	_func:
+     		orr	r12, pc, #1
+     		bx	r12
+     
+     		.code	16
+     		.globl .real_start_of_func
+     		.thumb_func
+     	.real_start_of_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.code	32
+     		.globl	_call
+     	_call:
+     		orr	r12, pc, #1
+     		bx	r12
+     
+     		.code	16
+     		.globl .real_start_of_call
+     		.thumb_func
+     	.real_start_of_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{r1}
+     		bx	r1
+
+   Now both functions have an ARM coded prologue, and both functions
+return by using the BX instruction.  These functions are interworking
+aware therefore and can safely be called from ARM code.  The code for
+the call() function is now 10 bytes longer than the original, non
+interworking aware version, an increase of over 200%.
+
+   If a prototype for call() is added to the source code, and this
+prototype includes the `interfacearm' attribute:
+
+     	int __attribute__((interfacearm)) call (int (* ptr)(void));
+
+   then this code is produced (with only -O2 specified on the command
+line):
+
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.code	32
+     	_call:
+     		orr	r12, pc, #1
+     		bx	r12
+     
+     		.code	16
+     		.globl .real_start_of_call
+     		.thumb_func
+     	.real_start_of_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{r1}
+     		bx	r1
+
+   So now both call() and func() can be safely called via
+non-interworking aware ARM code.  If, when such a file is assembled,
+the assembler detects the fact that call() is being called by another
+function in the same file, it will automatically adjust the target of
+the BL instruction to point to .real_start_of_call.  In this way there
+is no need for the linker to generate a Thumb-to-ARM calling stub so
+that call can be entered in ARM mode.
+
+
+10. How to use dlltool to build ARM/Thumb DLLs
+==============================================
+   Given a program (`prog.c') like this:
+
+             extern int func_in_dll (void);
+     
+             int main (void) { return func_in_dll(); }
+
+   And a DLL source file (`dll.c') like this:
+
+             int func_in_dll (void) { return 1; }
+
+   Here is how to build the DLL and the program for a purely ARM based
+environment:
+
+*Step One
+     Build a `.def' file describing the DLL:
+
+             ; example.def
+             ; This file describes the contents of the DLL
+             LIBRARY     example
+             HEAPSIZE    0x40000, 0x2000
+             EXPORTS
+                          func_in_dll  1
+
+*Step Two
+     Compile the DLL source code:
+
+            arm-pe-gcc -O2 -c dll.c
+
+*Step Three
+     Use `dlltool' to create an exports file and a library file:
+
+            dlltool --def example.def --output-exp example.o --output-lib example.a
+
+*Step Four
+     Link together the complete DLL:
+
+            arm-pe-ld dll.o example.o -o example.dll
+
+*Step Five
+     Compile the program's source code:
+
+            arm-pe-gcc -O2 -c prog.c
+
+*Step Six
+     Link together the program and the DLL's library file:
+
+            arm-pe-gcc prog.o example.a -o prog
+
+   If instead this was a Thumb DLL being called from an ARM program, the
+steps would look like this.  (To save space only those steps that are
+different from the previous version are shown):
+
+*Step Two
+     Compile the DLL source code (using the Thumb compiler):
+
+            thumb-pe-gcc -O2 -c dll.c -mthumb-interwork
+
+*Step Three
+     Build the exports and library files (and support interworking):
+
+            dlltool -d example.def -z example.o -l example.a --interwork -m thumb
+
+*Step Five
+     Compile the program's source code (and support interworking):
+
+            arm-pe-gcc -O2 -c prog.c -mthumb-interwork
+
+   If instead, the DLL was an old, ARM DLL which does not support
+interworking, and which cannot be rebuilt, then these steps would be
+used.
+
+*Step One
+     Skip.  If you do not have access to the sources of a DLL, there is
+     no point in building a `.def' file for it.
+
+*Step Two
+     Skip.  With no DLL sources there is nothing to compile.
+
+*Step Three
+     Skip.  Without a `.def' file you cannot use dlltool to build an
+     exports file or a library file.
+
+*Step Four
+     Skip.  Without a set of DLL object files you cannot build the DLL.
+     Besides it has already been built for you by somebody else.
+
+*Step Five
+     Compile the program's source code, this is the same as before:
+
+            arm-pe-gcc -O2 -c prog.c
+
+*Step Six
+     Link together the program and the DLL's library file, passing the
+     `--support-old-code' option to the linker:
+
+            arm-pe-gcc prog.o example.a -Wl,--support-old-code -o prog
+
+     Ignore the warning message about the input file not supporting
+     interworking as the --support-old-code switch has taken care if this.
+
+
+Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc-4.9/gcc/config/arm/aarch-common-protos.h b/gcc-4.9/gcc/config/arm/aarch-common-protos.h
new file mode 100644
index 000000000..a5ff6b4f9
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/aarch-common-protos.h
@@ -0,0 +1,134 @@
+/* Functions and structures shared between arm and aarch64.
+
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef GCC_AARCH_COMMON_PROTOS_H
+#define GCC_AARCH_COMMON_PROTOS_H
+
+extern int arm_early_load_addr_dep (rtx, rtx);
+extern int arm_early_store_addr_dep (rtx, rtx);
+extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
+extern int arm_mac_accumulator_is_result (rtx, rtx);
+extern int arm_no_early_alu_shift_dep (rtx, rtx);
+extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
+extern int arm_no_early_mul_dep (rtx, rtx);
+extern int arm_no_early_store_addr_dep (rtx, rtx);
+extern bool arm_rtx_shift_left_p (rtx);
+
+/* RTX cost table definitions.  These are used when tuning for speed rather
+   than for size and should reflect the _additional_ cost over the cost
+   of the fastest instruction in the machine, which is COSTS_N_INSNS (1).
+   Therefore it's okay for some costs to be 0.
+   Costs may not have a negative value.  */
+struct alu_cost_table
+{
+  const int arith;		/* ADD/SUB.  */
+  const int logical;		/* AND/ORR/EOR/BIC, etc.  */
+  const int shift;		/* Simple shift.  */
+  const int shift_reg;		/* Simple shift by reg.  */
+  const int arith_shift;	/* Additional when arith also shifts...  */
+  const int arith_shift_reg;	/* ... and when the shift is by a reg.  */
+  const int log_shift;		/* Additional when logic also shifts...  */
+  const int log_shift_reg;	/* ... and when the shift is by a reg.  */
+  const int extend;		/* Zero/sign extension.  */
+  const int extend_arith;	/* Extend and arith.  */
+  const int bfi;		/* Bit-field insert.  */
+  const int bfx;		/* Bit-field extraction.  */
+  const int clz;		/* Count Leading Zeros.  */
+  const int non_exec;		/* Extra cost when not executing insn.  */
+  const bool non_exec_costs_exec; /* True if non-execution must add the exec
+				     cost.  */
+};
+
+struct mult_cost_table
+{
+  const int simple;
+  const int flag_setting;	/* Additional cost if multiply sets flags. */
+  const int extend;
+  const int add;
+  const int extend_add;
+  const int idiv;
+};
+
+/* Calculations of LDM costs are complex.  We assume an initial cost
+   (ldm_1st) which will load the number of registers mentioned in
+   ldm_regs_per_insn_1st registers; then each additional
+   ldm_regs_per_insn_subsequent registers cost one more insn.
+   Similarly for STM operations.
+   Therefore the ldm_regs_per_insn_1st/stm_regs_per_insn_1st and
+   ldm_regs_per_insn_subsequent/stm_regs_per_insn_subsequent fields indicate
+   the number of registers loaded/stored and are expressed by a simple integer
+   and not by a COSTS_N_INSNS (N) expression.
+   */
+struct mem_cost_table
+{
+  const int load;
+  const int load_sign_extend;	/* Additional to load cost.  */
+  const int ldrd;		/* Cost of LDRD.  */
+  const int ldm_1st;
+  const int ldm_regs_per_insn_1st;
+  const int ldm_regs_per_insn_subsequent;
+  const int loadf;		/* SFmode.  */
+  const int loadd;		/* DFmode.  */
+  const int load_unaligned;	/* Extra for unaligned loads.  */
+  const int store;
+  const int strd;
+  const int stm_1st;
+  const int stm_regs_per_insn_1st;
+  const int stm_regs_per_insn_subsequent;
+  const int storef;		/* SFmode.  */
+  const int stored;		/* DFmode.  */
+  const int store_unaligned;	/* Extra for unaligned stores.  */
+};
+
+struct fp_cost_table
+{
+  const int div;
+  const int mult;
+  const int mult_addsub;	/* Non-fused.  */
+  const int fma;		/* Fused.  */
+  const int addsub;
+  const int fpconst;		/* Immediate.  */
+  const int neg;		/* NEG and ABS.  */
+  const int compare;
+  const int widen;		/* Widen to this size.  */
+  const int narrow;		/* Narrow from this size.  */
+  const int toint;
+  const int fromint;
+  const int roundint;		/* V8 round to integral, remains FP format.  */
+};
+
+struct vector_cost_table
+{
+  const int alu;
+};
+
+struct cpu_cost_table
+{
+  const struct alu_cost_table alu;
+  const struct mult_cost_table mult[2]; /* SImode and DImode.  */
+  const struct mem_cost_table ldst;
+  const struct fp_cost_table fp[2]; /* SFmode and DFmode.  */
+  const struct vector_cost_table vect;
+};
+
+
+#endif /* GCC_AARCH_COMMON_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/arm/aarch-common.c b/gcc-4.9/gcc/config/arm/aarch-common.c
new file mode 100644
index 000000000..c11f7e954
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/aarch-common.c
@@ -0,0 +1,353 @@
+/* Dependency checks for instruction scheduling, shared between ARM and
+   AARCH64.
+
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "rtl.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "rtl.h"
+
+typedef struct
+{
+  rtx_code search_code;
+  rtx search_result;
+  bool find_any_shift;
+} search_term;
+
+/* Return TRUE if X is either an arithmetic shift left, or
+   is a multiplication by a power of two.  */
+bool
+arm_rtx_shift_left_p (rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+
+  if (code == MULT && CONST_INT_P (XEXP (x, 1))
+      && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
+    return true;
+
+  if (code == ASHIFT)
+    return true;
+
+  return false;
+}
+
+static rtx_code shift_rtx_codes[] =
+  { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
+    ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
+
+/* Callback function for arm_find_sub_rtx_with_code.
+   DATA is safe to treat as a SEARCH_TERM, ST.  This will
+   hold a SEARCH_CODE.  PATTERN is checked to see if it is an
+   RTX with that code.  If it is, write SEARCH_RESULT in ST
+   and return 1.  Otherwise, or if we have been passed a NULL_RTX
+   return 0.  If ST.FIND_ANY_SHIFT then we are interested in
+   anything which can reasonably be described as a SHIFT RTX.  */
+static int
+arm_find_sub_rtx_with_search_term (rtx *pattern, void *data)
+{
+  search_term *st = (search_term *) data;
+  rtx_code pattern_code;
+  int found = 0;
+
+  gcc_assert (pattern);
+  gcc_assert (st);
+
+  /* Poorly formed patterns can really ruin our day.  */
+  if (*pattern == NULL_RTX)
+    return 0;
+
+  pattern_code = GET_CODE (*pattern);
+
+  if (st->find_any_shift)
+    {
+      unsigned i = 0;
+
+      /* Left shifts might have been canonicalized to a MULT of some
+	 power of two.  Make sure we catch them.  */
+      if (arm_rtx_shift_left_p (*pattern))
+	found = 1;
+      else
+	for (i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
+	  if (pattern_code == shift_rtx_codes[i])
+	    found = 1;
+    }
+
+  if (pattern_code == st->search_code)
+    found = 1;
+
+  if (found)
+    st->search_result = *pattern;
+
+  return found;
+}
+
+/* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.  */
+static rtx
+arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
+{
+  search_term st;
+  int result = 0;
+
+  gcc_assert (pattern != NULL_RTX);
+  st.search_code = code;
+  st.search_result = NULL_RTX;
+  st.find_any_shift = find_any_shift;
+  result = for_each_rtx (&pattern, arm_find_sub_rtx_with_search_term, &st);
+  if (result)
+    return st.search_result;
+  else
+    return NULL_RTX;
+}
+
+/* Traverse PATTERN looking for any sub-rtx which looks like a shift.  */
+static rtx
+arm_find_shift_sub_rtx (rtx pattern)
+{
+  return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
+}
+
+/* PRODUCER and CONSUMER are two potentially dependant RTX.  PRODUCER
+   (possibly) contains a SET which will provide a result we can access
+   using the SET_DEST macro.  We will place the RTX which would be
+   written by PRODUCER in SET_SOURCE.
+   Similarly, CONSUMER (possibly) contains a SET which has an operand
+   we can access using SET_SRC.  We place this operand in
+   SET_DESTINATION.
+
+   Return nonzero if we found the SET RTX we expected.  */
+static int
+arm_get_set_operands (rtx producer, rtx consumer,
+		      rtx *set_source, rtx *set_destination)
+{
+  rtx set_producer = arm_find_sub_rtx_with_code (producer, SET, false);
+  rtx set_consumer = arm_find_sub_rtx_with_code (consumer, SET, false);
+
+  if (set_producer && set_consumer)
+    {
+      *set_source = SET_DEST (set_producer);
+      *set_destination = SET_SRC (set_consumer);
+      return 1;
+    }
+  return 0;
+}
+
+/* Return nonzero if the CONSUMER instruction (a load) does need
+   PRODUCER's value to calculate the address.  */
+int
+arm_early_load_addr_dep (rtx producer, rtx consumer)
+{
+  rtx value, addr;
+
+  if (!arm_get_set_operands (producer, consumer, &value, &addr))
+    return 0;
+
+  return reg_overlap_mentioned_p (value, addr);
+}
+
+/* Return nonzero if the CONSUMER instruction (an ALU op) does not
+   have an early register shift value or amount dependency on the
+   result of PRODUCER.  */
+int
+arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
+{
+  rtx value, op;
+  rtx early_op;
+
+  if (!arm_get_set_operands (producer, consumer, &value, &op))
+    return 0;
+
+  if ((early_op = arm_find_shift_sub_rtx (op)))
+    {
+      if (REG_P (early_op))
+	early_op = op;
+
+      return !reg_overlap_mentioned_p (value, early_op);
+    }
+
+  return 0;
+}
+
+/* Return nonzero if the CONSUMER instruction (an ALU op) does not
+   have an early register shift value dependency on the result of
+   PRODUCER.  */
+int
+arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
+{
+  rtx value, op;
+  rtx early_op;
+
+  if (!arm_get_set_operands (producer, consumer, &value, &op))
+    return 0;
+
+  if ((early_op = arm_find_shift_sub_rtx (op)))
+    /* We want to check the value being shifted.  */
+    if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
+      return 1;
+
+  return 0;
+}
+
+/* Return nonzero if the CONSUMER (a mul or mac op) does not
+   have an early register mult dependency on the result of
+   PRODUCER.  */
+int
+arm_no_early_mul_dep (rtx producer, rtx consumer)
+{
+  rtx value, op;
+
+  if (!arm_get_set_operands (producer, consumer, &value, &op))
+    return 0;
+
+  if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+    {
+      if (GET_CODE (XEXP (op, 0)) == MULT)
+	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
+      else
+	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
+    }
+
+  return 0;
+}
+
+/* Return nonzero if the CONSUMER instruction (a store) does not need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_no_early_store_addr_dep (rtx producer, rtx consumer)
+{
+  rtx value = arm_find_sub_rtx_with_code (producer, SET, false);
+  rtx addr = arm_find_sub_rtx_with_code (consumer, SET, false);
+
+  if (value)
+    value = SET_DEST (value);
+
+  if (addr)
+    addr = SET_DEST (addr);
+
+  if (!value || !addr)
+    return 0;
+
+  return !reg_overlap_mentioned_p (value, addr);
+}
+
+/* Return nonzero if the CONSUMER instruction (a store) does need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_early_store_addr_dep (rtx producer, rtx consumer)
+{
+  return !arm_no_early_store_addr_dep (producer, consumer);
+}
+
+/* Return non-zero iff the consumer (a multiply-accumulate or a
+   multiple-subtract instruction) has an accumulator dependency on the
+   result of the producer and no other dependency on that result.  It
+   does not check if the producer is multiply-accumulate instruction.  */
+int
+arm_mac_accumulator_is_result (rtx producer, rtx consumer)
+{
+  rtx result;
+  rtx op0, op1, acc;
+
+  producer = PATTERN (producer);
+  consumer = PATTERN (consumer);
+
+  if (GET_CODE (producer) == COND_EXEC)
+    producer = COND_EXEC_CODE (producer);
+  if (GET_CODE (consumer) == COND_EXEC)
+    consumer = COND_EXEC_CODE (consumer);
+
+  if (GET_CODE (producer) != SET)
+    return 0;
+
+  result = XEXP (producer, 0);
+
+  if (GET_CODE (consumer) != SET)
+    return 0;
+
+  /* Check that the consumer is of the form
+     (set (...) (plus (mult ...) (...)))
+     or
+     (set (...) (minus (...) (mult ...))).  */
+  if (GET_CODE (XEXP (consumer, 1)) == PLUS)
+    {
+      if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
+        return 0;
+
+      op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
+      op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
+      acc = XEXP (XEXP (consumer, 1), 1);
+    }
+  else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
+    {
+      if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
+        return 0;
+
+      op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
+      op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
+      acc = XEXP (XEXP (consumer, 1), 0);
+    }
+  else
+    return 0;
+
+  return (reg_overlap_mentioned_p (result, acc)
+          && !reg_overlap_mentioned_p (result, op0)
+          && !reg_overlap_mentioned_p (result, op1));
+}
+
+/* Return non-zero if the consumer (a multiply-accumulate instruction)
+   has an accumulator dependency on the result of the producer (a
+   multiplication instruction) and no other dependency on that result.  */
+int
+arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
+{
+  rtx mul = PATTERN (producer);
+  rtx mac = PATTERN (consumer);
+  rtx mul_result;
+  rtx mac_op0, mac_op1, mac_acc;
+
+  if (GET_CODE (mul) == COND_EXEC)
+    mul = COND_EXEC_CODE (mul);
+  if (GET_CODE (mac) == COND_EXEC)
+    mac = COND_EXEC_CODE (mac);
+
+  /* Check that mul is of the form (set (...) (mult ...))
+     and mla is of the form (set (...) (plus (mult ...) (...))).  */
+  if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
+      || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
+          || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
+    return 0;
+
+  mul_result = XEXP (mul, 0);
+  mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
+  mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
+  mac_acc = XEXP (XEXP (mac, 1), 1);
+
+  return (reg_overlap_mentioned_p (mul_result, mac_acc)
+          && !reg_overlap_mentioned_p (mul_result, mac_op0)
+          && !reg_overlap_mentioned_p (mul_result, mac_op1));
+}
diff --git a/gcc-4.9/gcc/config/arm/aarch-cost-tables.h b/gcc-4.9/gcc/config/arm/aarch-cost-tables.h
new file mode 100644
index 000000000..c30ea2f92
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/aarch-cost-tables.h
@@ -0,0 +1,325 @@
+/* RTX cost tables shared between arm and aarch64.
+
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH_COST_TABLES_H
+#define GCC_AARCH_COST_TABLES_H
+
+const struct cpu_cost_table generic_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    0,			/* shift.  */
+    COSTS_N_INSNS (1),	/* shift_reg.  */
+    0,			/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    0,			/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    0,			/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    0,			/* bfi.  */
+    0,			/* bfx.  */
+    0,			/* clz.  */
+    COSTS_N_INSNS (1),	/* non_exec.  */
+    false		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),	/* simple.  */
+      COSTS_N_INSNS (1),	/* flag_setting.  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (3),	/* add.  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      COSTS_N_INSNS (8)		/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (2),	/* load.  */
+    COSTS_N_INSNS (2),	/* load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* ldrd.  */
+    COSTS_N_INSNS (2),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    1,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* loadf.  */
+    COSTS_N_INSNS (3),	/* loadd.  */
+    COSTS_N_INSNS (1),  /* load_unaligned.  */
+    COSTS_N_INSNS (2),	/* store.  */
+    COSTS_N_INSNS (3),	/* strd.  */
+    COSTS_N_INSNS (2),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    1,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* storef.  */
+    COSTS_N_INSNS (3),	/* stored.  */
+    COSTS_N_INSNS (1)  /* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (7),	/* div.  */
+      COSTS_N_INSNS (2),	/* mult.  */
+      COSTS_N_INSNS (3),	/* mult_addsub.  */
+      COSTS_N_INSNS (3),	/* fma.  */
+      COSTS_N_INSNS (1),	/* addsub.  */
+      0,			/* fpconst.  */
+      0,			/* neg.  */
+      0,			/* compare.  */
+      0,			/* widen.  */
+      0,			/* narrow.  */
+      0,			/* toint.  */
+      0,			/* fromint.  */
+      0				/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (15),	/* div.  */
+      COSTS_N_INSNS (5),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub.  */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      0,			/* fpconst.  */
+      0,			/* neg.  */
+      0,			/* compare.  */
+      0,			/* widen.  */
+      0,			/* narrow.  */
+      0,			/* toint.  */
+      0,			/* fromint.  */
+      0				/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+const struct cpu_cost_table cortexa53_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    COSTS_N_INSNS (1),	/* shift.  */
+    COSTS_N_INSNS (2),	/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    COSTS_N_INSNS (2),	/* arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* log_shift.  */
+    COSTS_N_INSNS (2),	/* log_shift_reg.  */
+    0,			/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    COSTS_N_INSNS (1),	/* bfi.  */
+    COSTS_N_INSNS (1),	/* bfx.  */
+    0,			/* clz.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (1),	/* simple.  */
+      COSTS_N_INSNS (2),	/* flag_setting.  */
+      COSTS_N_INSNS (1),	/* extend.  */
+      COSTS_N_INSNS (1),	/* add.  */
+      COSTS_N_INSNS (1),	/* extend_add.  */
+      COSTS_N_INSNS (7)		/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (2),	/* simple.  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (2),	/* add.  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      COSTS_N_INSNS (15)	/* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (1),		/* load.  */
+    COSTS_N_INSNS (1),		/* load_sign_extend.  */
+    COSTS_N_INSNS (1),		/* ldrd.  */
+    COSTS_N_INSNS (1),		/* ldm_1st.  */
+    1,				/* ldm_regs_per_insn_1st.  */
+    2,				/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (1),		/* loadf.  */
+    COSTS_N_INSNS (1),		/* loadd.  */
+    COSTS_N_INSNS (1),		/* load_unaligned.  */
+    0,				/* store.  */
+    0,				/* strd.  */
+    0,				/* stm_1st.  */
+    1,				/* stm_regs_per_insn_1st.  */
+    2,				/* stm_regs_per_insn_subsequent.  */
+    0,				/* storef.  */
+    0,				/* stored.  */
+    COSTS_N_INSNS (1)		/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (15),	/* div.  */
+      COSTS_N_INSNS (3),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub. */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (1),	/* fpconst. */
+      COSTS_N_INSNS (2),	/* neg.  */
+      COSTS_N_INSNS (1),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (30),	/* div.  */
+      COSTS_N_INSNS (3),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub.  */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (1),	/* fpconst.  */
+      COSTS_N_INSNS (2),	/* neg.  */
+      COSTS_N_INSNS (1),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+const struct cpu_cost_table cortexa57_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    COSTS_N_INSNS (1), /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    0,                 /* extend.  */
+    COSTS_N_INSNS (1), /* extend_arith.  */
+    COSTS_N_INSNS (1), /* bfi.  */
+    0,                 /* bfx.  */
+    0,                 /* clz.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),       /* simple.  */
+      COSTS_N_INSNS (3),       /* flag_setting.  */
+      COSTS_N_INSNS (2),       /* extend.  */
+      COSTS_N_INSNS (2),       /* add.  */
+      COSTS_N_INSNS (2),       /* extend_add.  */
+      COSTS_N_INSNS (18)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (4),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (2),       /* extend.  */
+      COSTS_N_INSNS (4),       /* add.  */
+      COSTS_N_INSNS (2),       /* extend_add.  */
+      COSTS_N_INSNS (34)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),         /* load.  */
+    COSTS_N_INSNS (3),         /* load_sign_extend.  */
+    COSTS_N_INSNS (3),         /* ldrd.  */
+    COSTS_N_INSNS (2),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    2,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),         /* loadf.  */
+    COSTS_N_INSNS (4),         /* loadd.  */
+    COSTS_N_INSNS (5),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    2,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    COSTS_N_INSNS (1)          /* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (17),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (9),       /* mult_addsub. */
+      COSTS_N_INSNS (9),       /* fma.  */
+      COSTS_N_INSNS (4),       /* addsub.  */
+      COSTS_N_INSNS (2),       /* fpconst. */
+      COSTS_N_INSNS (2),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (4),       /* widen.  */
+      COSTS_N_INSNS (4),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (31),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (9),       /* mult_addsub.  */
+      COSTS_N_INSNS (9),       /* fma.  */
+      COSTS_N_INSNS (4),       /* addsub.  */
+      COSTS_N_INSNS (2),       /* fpconst.  */
+      COSTS_N_INSNS (2),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (4),       /* widen.  */
+      COSTS_N_INSNS (4),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
+#endif /* GCC_AARCH_COST_TABLES_H */
diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h
new file mode 100644
index 000000000..51d32a9d4
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/aout.h
@@ -0,0 +1,303 @@
+/* Definitions of target machine for GNU compiler, for ARM with a.out
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Richard Earnshaw (rearnsha@armltd.co.uk).
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef ASM_APP_ON
+#define ASM_APP_ON  		""
+#endif
+#ifndef ASM_APP_OFF
+#define ASM_APP_OFF  		""
+#endif
+
+/* Switch to the text or data segment.  */
+#define TEXT_SECTION_ASM_OP  	"\t.text"
+#define DATA_SECTION_ASM_OP  	"\t.data"
+#define BSS_SECTION_ASM_OP   	"\t.bss"
+
+/* Note: If USER_LABEL_PREFIX or LOCAL_LABEL_PREFIX are changed,
+   make sure that this change is reflected in the function
+   coff_arm_is_local_label_name() in bfd/coff-arm.c.  */
+#ifndef REGISTER_PREFIX
+#define REGISTER_PREFIX 	""
+#endif
+
+#ifndef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX 	"_"
+#endif
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX 	""
+#endif
+
+/* The assembler's names for the registers.  Note that the ?xx registers are
+   there so that VFPv3/NEON registers D16-D31 have the same spacing as D0-D15
+   (each of which is overlaid on two S registers), although there are no
+   actual single-precision registers which correspond to D16-D31.  */
+#ifndef REGISTER_NAMES
+#define REGISTER_NAMES						\
+{								\
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",		\
+  "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc",		\
+  "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",	\
+  "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",	\
+  "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",	\
+  "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",	\
+  "d16", "?16", "d17", "?17", "d18", "?18", "d19", "?19",	\
+  "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23",	\
+  "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27",	\
+  "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31",	\
+  "wr0",   "wr1",   "wr2",   "wr3",				\
+  "wr4",   "wr5",   "wr6",   "wr7",				\
+  "wr8",   "wr9",   "wr10",  "wr11",				\
+  "wr12",  "wr13",  "wr14",  "wr15",				\
+  "wcgr0", "wcgr1", "wcgr2", "wcgr3",				\
+  "cc", "vfpcc", "sfp", "afp"					\
+}
+#endif
+
+#ifndef ADDITIONAL_REGISTER_NAMES
+#define ADDITIONAL_REGISTER_NAMES		\
+{						\
+  {"a1", 0},					\
+  {"a2", 1},					\
+  {"a3", 2},					\
+  {"a4", 3},					\
+  {"v1", 4},					\
+  {"v2", 5},					\
+  {"v3", 6},					\
+  {"v4", 7},					\
+  {"v5", 8},					\
+  {"v6", 9},					\
+  {"rfp", 9}, /* Historical.  */		\
+  {"sb", 9}, /* Historical.  */			\
+  {"v7", 10},					\
+  {"sl", 10},	/* Historical.  */		\
+  {"r11", 11},	/* fp */			\
+  {"r12", 12},	/* ip */			\
+  {"r13", 13},	/* sp */			\
+  {"r14", 14},	/* lr */			\
+  {"r15", 15}	/* pc */			\
+}
+#endif
+
+#ifndef OVERLAPPING_REGISTER_NAMES
+#define OVERLAPPING_REGISTER_NAMES		\
+{						\
+  {"d0",  FIRST_VFP_REGNUM + 0,  2},		\
+  {"d1",  FIRST_VFP_REGNUM + 2,  2},		\
+  {"d2",  FIRST_VFP_REGNUM + 4,  2},		\
+  {"d3",  FIRST_VFP_REGNUM + 6,  2},		\
+  {"d4",  FIRST_VFP_REGNUM + 8,  2},		\
+  {"d5",  FIRST_VFP_REGNUM + 10, 2},		\
+  {"d6",  FIRST_VFP_REGNUM + 12, 2},		\
+  {"d7",  FIRST_VFP_REGNUM + 14, 2},		\
+  {"d8",  FIRST_VFP_REGNUM + 16, 2},		\
+  {"d9",  FIRST_VFP_REGNUM + 18, 2},		\
+  {"d10", FIRST_VFP_REGNUM + 20, 2},		\
+  {"d11", FIRST_VFP_REGNUM + 22, 2},		\
+  {"d12", FIRST_VFP_REGNUM + 24, 2},		\
+  {"d13", FIRST_VFP_REGNUM + 26, 2},		\
+  {"d14", FIRST_VFP_REGNUM + 28, 2},		\
+  {"d15", FIRST_VFP_REGNUM + 30, 2},		\
+  {"q0",  FIRST_VFP_REGNUM + 0,  4},		\
+  {"q1",  FIRST_VFP_REGNUM + 4,  4},		\
+  {"q2",  FIRST_VFP_REGNUM + 8,  4},		\
+  {"q3",  FIRST_VFP_REGNUM + 12, 4},		\
+  {"q4",  FIRST_VFP_REGNUM + 16, 4},		\
+  {"q5",  FIRST_VFP_REGNUM + 20, 4},		\
+  {"q6",  FIRST_VFP_REGNUM + 24, 4},		\
+  {"q7",  FIRST_VFP_REGNUM + 28, 4},		\
+  {"q8",  FIRST_VFP_REGNUM + 32, 4},		\
+  {"q9",  FIRST_VFP_REGNUM + 36, 4},		\
+  {"q10", FIRST_VFP_REGNUM + 40, 4},		\
+  {"q11", FIRST_VFP_REGNUM + 44, 4},		\
+  {"q12", FIRST_VFP_REGNUM + 48, 4},		\
+  {"q13", FIRST_VFP_REGNUM + 52, 4},		\
+  {"q14", FIRST_VFP_REGNUM + 56, 4},		\
+  {"q15", FIRST_VFP_REGNUM + 60, 4}		\
+}
+#endif
+
+#ifndef NO_DOLLAR_IN_LABEL
+#define NO_DOLLAR_IN_LABEL 1
+#endif
+
+/* Generate DBX debugging information.  riscix.h will undefine this because
+   the native assembler does not support stabs.  */
+#define DBX_DEBUGGING_INFO 1
+
+/* Acorn dbx moans about continuation chars, so don't use any.  */
+#ifndef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH  0
+#endif
+
+/* Output a function label definition.  */
+#ifndef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL)	\
+  do							\
+    {							\
+      ARM_DECLARE_FUNCTION_NAME (STREAM, NAME, DECL);   \
+      ASM_OUTPUT_LABEL (STREAM, NAME);			\
+    }							\
+  while (0)
+#endif
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Make an internal label into a string.  */
+#ifndef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
+  sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned int)(NUM))
+#endif
+     
+/* Output an element of a dispatch table.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)			\
+  do								\
+    {								\
+      gcc_assert (!TARGET_THUMB2);				\
+      asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);		\
+    }								\
+  while (0)
+	  
+
+/* Thumb-2 always uses addr_diff_elf so that the Table Branch instructions
+   can be used.  For non-pic code where the offsets do not suitable for
+   TBB/TBH the elements are output as absolute labels.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do									\
+    {									\
+      if (TARGET_ARM)							\
+	asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE);			\
+      else if (TARGET_THUMB1)						\
+	{								\
+	  if (flag_pic || optimize_size)				\
+	    {								\
+	      switch (GET_MODE(body))					\
+		{							\
+		case QImode:						\
+		  asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n",	\
+			       VALUE, REL);				\
+		  break;						\
+		case HImode: /* TBH */					\
+		  asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n",	\
+			       VALUE, REL);				\
+		  break;						\
+		case SImode:						\
+		  asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d\n",	\
+			       VALUE, REL);				\
+		  break;						\
+		default:						\
+		  gcc_unreachable();					\
+		}							\
+	    }								\
+	  else								\
+	    asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE);		\
+	}								\
+      else /* Thumb-2 */						\
+	{								\
+	  switch (GET_MODE(body))					\
+	    {								\
+	    case QImode: /* TBB */					\
+	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n",	\
+			   VALUE, REL);					\
+	      break;							\
+	    case HImode: /* TBH */					\
+	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n",	\
+			   VALUE, REL);					\
+	      break;							\
+	    case SImode:						\
+	      if (flag_pic)						\
+		asm_fprintf (STREAM, "\t.word\t%LL%d+1-%LL%d\n", VALUE, REL); \
+	      else							\
+		asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE);	\
+	      break;							\
+	    default:							\
+	      gcc_unreachable();					\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+
+#undef  ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN)  \
+  output_ascii_pseudo_op (STREAM, (const unsigned char *) (PTR), LEN)
+
+/* Output a gap.  In fact we fill it with nulls.  */
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES) 	\
+  fprintf (STREAM, "\t.space\t%d\n", (int) (NBYTES))
+
+/* Align output to a power of two.  Horrible /bin/as.  */
+#ifndef ASM_OUTPUT_ALIGN  
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)			\
+  do							\
+    {							\
+      register int amount = 1 << (POWER);		\
+							\
+      if (amount == 2)					\
+	fprintf (STREAM, "\t.even\n");			\
+      else if (amount != 1)				\
+	fprintf (STREAM, "\t.align\t%d\n", amount - 4);	\
+    }							\
+  while (0)
+#endif
+
+/* Output a common block.  */
+#ifndef ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)	\
+  do							\
+    {							\
+      fprintf (STREAM, "\t.comm\t");			\
+      assemble_name (STREAM, NAME);			\
+      asm_fprintf (STREAM, ", %d\t%@ %d\n", 		\
+	           (int)(ROUNDED), (int)(SIZE));	\
+    }							\
+  while (0)
+#endif
+     
+/* Output a local common block.  /bin/as can't do this, so hack a
+   `.space' into the bss segment.  Note that this is *bad* practice,
+   which is guaranteed NOT to work since it doesn't define STATIC
+   COMMON space but merely STATIC BSS space.  */
+#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      switch_to_section (bss_section);					\
+      ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL (STREAM, NAME);					\
+      fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE));			\
+    }									\
+  while (0)
+#endif
+     
+/* Output a zero-initialized block.  */
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(STREAM, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (STREAM, DECL, NAME, SIZE, ALIGN)
+#endif
+
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START 	"@"
+#endif
+
+/* This works for GAS and some other assemblers.  */
+#define SET_ASM_OP		"\t.set\t"
diff --git a/gcc-4.9/gcc/config/arm/arm-arches.def b/gcc-4.9/gcc/config/arm/arm-arches.def
new file mode 100644
index 000000000..9adb791db
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-arches.def
@@ -0,0 +1,60 @@
+/* ARM CPU architectures.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before using #include to read this file, define a macro:
+
+      ARM_ARCH(NAME, CORE, ARCH, FLAGS)
+
+   The NAME is the name of the architecture, represented as a string
+   constant.  The CORE is the identifier for a core representative of
+   this architecture.  ARCH is the architecture revision.  FLAGS are
+   the flags implied by the architecture.
+
+   genopt.sh assumes no whitespace up to the first "," in each entry.  */
+
+ARM_ARCH("armv2",   arm2,       2,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)
+ARM_ARCH("armv2a",  arm2,       2,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)
+ARM_ARCH("armv3",   arm6,       3,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3)
+ARM_ARCH("armv3m",  arm7m,      3M,  FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M)
+ARM_ARCH("armv4",   arm7tdmi,   4,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4)
+/* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
+   implementations that support it, so we will leave it out for now.  */
+ARM_ARCH("armv4t",  arm7tdmi,   4T,  FL_CO_PROC |             FL_FOR_ARCH4T)
+ARM_ARCH("armv5",   arm10tdmi,  5,   FL_CO_PROC |             FL_FOR_ARCH5)
+ARM_ARCH("armv5t",  arm10tdmi,  5T,  FL_CO_PROC |             FL_FOR_ARCH5T)
+ARM_ARCH("armv5e",  arm1026ejs, 5E,  FL_CO_PROC |             FL_FOR_ARCH5E)
+ARM_ARCH("armv5te", arm1026ejs, 5TE, FL_CO_PROC |             FL_FOR_ARCH5TE)
+ARM_ARCH("armv6",   arm1136js,  6,   FL_CO_PROC |             FL_FOR_ARCH6)
+ARM_ARCH("armv6j",  arm1136js,  6J,  FL_CO_PROC |             FL_FOR_ARCH6J)
+ARM_ARCH("armv6k",  mpcore,	6K,  FL_CO_PROC |             FL_FOR_ARCH6K)
+ARM_ARCH("armv6z",  arm1176jzs, 6Z,  FL_CO_PROC |             FL_FOR_ARCH6Z)
+ARM_ARCH("armv6zk", arm1176jzs, 6ZK, FL_CO_PROC |             FL_FOR_ARCH6ZK)
+ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC |             FL_FOR_ARCH6T2)
+ARM_ARCH("armv6-m", cortexm1,	6M,			      FL_FOR_ARCH6M)
+ARM_ARCH("armv6s-m", cortexm1,	6M,			      FL_FOR_ARCH6M)
+ARM_ARCH("armv7",   cortexa8,	7,   FL_CO_PROC |	      FL_FOR_ARCH7)
+ARM_ARCH("armv7-a", cortexa8,	7A,  FL_CO_PROC |	      FL_FOR_ARCH7A)
+ARM_ARCH("armv7ve", cortexa8,	7A,  FL_CO_PROC |	      FL_FOR_ARCH7VE)
+ARM_ARCH("armv7-r", cortexr4,	7R,  FL_CO_PROC |	      FL_FOR_ARCH7R)
+ARM_ARCH("armv7-m", cortexm3,	7M,  FL_CO_PROC |	      FL_FOR_ARCH7M)
+ARM_ARCH("armv7e-m", cortexm4,  7EM, FL_CO_PROC |	      FL_FOR_ARCH7EM)
+ARM_ARCH("armv8-a", cortexa53,  8A,  FL_CO_PROC |             FL_FOR_ARCH8A)
+ARM_ARCH("armv8-a+crc",cortexa53, 8A,FL_CO_PROC | FL_CRC32  | FL_FOR_ARCH8A)
+ARM_ARCH("iwmmxt",  iwmmxt,     5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
+ARM_ARCH("iwmmxt2", iwmmxt2,    5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
diff --git a/gcc-4.9/gcc/config/arm/arm-c.c b/gcc-4.9/gcc/config/arm/arm-c.c
new file mode 100644
index 000000000..af64f7a1f
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-c.c
@@ -0,0 +1,44 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+
+/* Output C specific EABI object attributes.  These can not be done in
+   arm.c because they require information from the C frontend.  */
+
+static void
+arm_output_c_attributes (void)
+{
+  int wchar_size = (int)(TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT);
+  arm_emit_eabi_attribute ("Tag_ABI_PCS_wchar_t", 18, wchar_size);
+}
+
+
+/* Setup so that common code calls arm_output_c_attributes.  */
+
+void
+arm_lang_object_attributes_init (void)
+{
+  arm_lang_output_object_attributes_hook = arm_output_c_attributes;
+}
diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def
new file mode 100644
index 000000000..42f00b463
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-cores.def
@@ -0,0 +1,159 @@
+/* ARM CPU Cores
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Written by CodeSourcery, LLC
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before using #include to read this file, define a macro:
+
+      ARM_CORE(CORE_NAME, INTERNAL_IDENT, TUNE_IDENT, ARCH, FLAGS, COSTS)
+
+   The CORE_NAME is the name of the core, represented as a string constant.
+   The INTERNAL_IDENT is the name of the core represented as an identifier.
+   This must be unique for each entry in this table.
+   The TUNE_IDENT is the name of the core for which scheduling decisions
+   should be made, represented as an identifier.
+   ARCH is the architecture revision implemented by the chip.
+   FLAGS are the bitwise-or of the traits that apply to that core.
+   This need not include flags implied by the architecture.
+   COSTS is the name of the rtx_costs routine to use.
+
+   If you update this table, you must update the "tune" attribute in
+   arm.md.
+   
+   Some tools assume no whitespace up to the first "," in each entry.  */
+
+/* V2/V2A Architecture Processors */
+ARM_CORE("arm2", 	arm2, arm2,	2, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm250", 	arm250, arm250,	2, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm3",	arm3, arm3,	2, FL_CO_PROC | FL_MODE26, slowmul)
+
+/* V3 Architecture Processors */
+ARM_CORE("arm6",	arm6, arm6,		3, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm60",	arm60, arm60,		3, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm600",	arm600, arm600,		3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm610",	arm610, arm610,		3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm620",	arm620, arm620,		3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7",	arm7, arm7,		3, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm7d",	arm7d, arm7d,		3, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm7di",	arm7di, arm7di,		3, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm70",	arm70, arm70,		3, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm700",	arm700, arm700,		3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm700i",	arm700i, arm700i,	3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm710",	arm710, arm710,		3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm720",	arm720, arm720,		3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm710c",	arm710c, arm710c,	3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7100",	arm7100, arm7100,	3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7500",	arm7500, arm7500,	3, FL_MODE26 | FL_WBUF, slowmul)
+/* Doesn't have an external co-proc, but does have embedded fpa. */
+ARM_CORE("arm7500fe", arm7500fe, arm7500fe,	3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+
+/* V3M Architecture Processors */
+/* arm7m doesn't exist on its own, but only with D, ("and", and I), but
+   those don't alter the code, so arm7m is sometimes used.  */
+ARM_CORE("arm7m",   arm7m, arm7m,	3M, FL_CO_PROC | FL_MODE26, fastmul)
+ARM_CORE("arm7dm",  arm7dm, arm7dm,	3M, FL_CO_PROC | FL_MODE26, fastmul)
+ARM_CORE("arm7dmi", arm7dmi, arm7dmi,	3M, FL_CO_PROC | FL_MODE26, fastmul)
+
+/* V4 Architecture Processors */
+ARM_CORE("arm8",          arm8, arm8,			4, FL_MODE26 | FL_LDSCHED, fastmul)
+ARM_CORE("arm810",        arm810, arm810,		4, FL_MODE26 | FL_LDSCHED, fastmul)
+ARM_CORE("strongarm",     strongarm, strongarm,		4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm110",  strongarm110, strongarm110,	4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("fa526",         fa526, fa526,			4, FL_LDSCHED, fastmul)
+ARM_CORE("fa626",         fa626, fa626,			4, FL_LDSCHED, fastmul)
+
+/* V4T Architecture Processors */
+ARM_CORE("arm7tdmi",	arm7tdmi, arm7tdmi,	4T, FL_CO_PROC, fastmul)
+ARM_CORE("arm7tdmi-s",	arm7tdmis, arm7tdmis,	4T, FL_CO_PROC, fastmul)
+ARM_CORE("arm710t",	arm710t, arm710t,	4T, FL_WBUF,    fastmul)
+ARM_CORE("arm720t",	arm720t, arm720t,	4T, FL_WBUF,    fastmul)
+ARM_CORE("arm740t",	arm740t, arm740t,	4T, FL_WBUF,    fastmul)
+ARM_CORE("arm9",	arm9, arm9,		4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm9tdmi",	arm9tdmi, arm9tdmi,	4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm920",	arm920, arm920,		4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm920t",	arm920t, arm920t,	4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm922t",	arm922t, arm922t,	4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm940t",	arm940t, arm940t,	4T, FL_LDSCHED, fastmul)
+ARM_CORE("ep9312",	ep9312, ep9312,		4T, FL_LDSCHED, fastmul)
+
+/* V5T Architecture Processors */
+ARM_CORE("arm10tdmi",	arm10tdmi, arm10tdmi,	5T, FL_LDSCHED, fastmul)
+ARM_CORE("arm1020t",	arm1020t, arm1020t,	5T, FL_LDSCHED, fastmul)
+
+/* V5TE Architecture Processors */
+ARM_CORE("arm9e",	arm9e, arm9e,		5TE, FL_LDSCHED, 9e)
+ARM_CORE("arm946e-s",	arm946es, arm946es,	5TE, FL_LDSCHED, 9e)
+ARM_CORE("arm966e-s",	arm966es, arm966es,	5TE, FL_LDSCHED, 9e)
+ARM_CORE("arm968e-s",	arm968es, arm968es,	5TE, FL_LDSCHED, 9e)
+ARM_CORE("arm10e",	arm10e, arm10e,		5TE, FL_LDSCHED, fastmul)
+ARM_CORE("arm1020e",	arm1020e, arm1020e,	5TE, FL_LDSCHED, fastmul)
+ARM_CORE("arm1022e",	arm1022e, arm1022e,	5TE, FL_LDSCHED, fastmul)
+ARM_CORE("xscale",	xscale, xscale,		5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
+ARM_CORE("iwmmxt",	iwmmxt, iwmmxt,		5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
+ARM_CORE("iwmmxt2",	iwmmxt2, iwmmxt2,	5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2, xscale)
+ARM_CORE("fa606te",	fa606te, fa606te,	5TE, FL_LDSCHED, 9e)
+ARM_CORE("fa626te",	fa626te, fa626te,	5TE, FL_LDSCHED, 9e)
+ARM_CORE("fmp626",	fmp626, fmp626,		5TE, FL_LDSCHED, 9e)
+ARM_CORE("fa726te",	fa726te, fa726te,	5TE, FL_LDSCHED, fa726te)
+
+/* V5TEJ Architecture Processors */
+ARM_CORE("arm926ej-s",	arm926ejs, arm926ejs,	5TEJ, FL_LDSCHED, 9e)
+ARM_CORE("arm1026ej-s",	arm1026ejs, arm1026ejs,	5TEJ, FL_LDSCHED, 9e)
+
+/* V6 Architecture Processors */
+ARM_CORE("arm1136j-s",		arm1136js, arm1136js,		6J,  FL_LDSCHED, 9e)
+ARM_CORE("arm1136jf-s",		arm1136jfs, arm1136jfs,		6J,  FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("arm1176jz-s",		arm1176jzs, arm1176jzs,		6ZK, FL_LDSCHED, 9e)
+ARM_CORE("arm1176jzf-s",	arm1176jzfs, arm1176jzfs,	6ZK, FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("mpcorenovfp",		mpcorenovfp, mpcorenovfp,	6K,  FL_LDSCHED, 9e)
+ARM_CORE("mpcore",		mpcore, mpcore,			6K,  FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("arm1156t2-s",		arm1156t2s, arm1156t2s,		6T2, FL_LDSCHED, v6t2)
+ARM_CORE("arm1156t2f-s",	arm1156t2fs, arm1156t2fs,	6T2, FL_LDSCHED | FL_VFPV2, v6t2)
+
+/* V6M Architecture Processors */
+ARM_CORE("cortex-m1",		cortexm1, cortexm1,		6M, FL_LDSCHED, v6m)
+ARM_CORE("cortex-m0",		cortexm0, cortexm0,		6M, FL_LDSCHED, v6m)
+ARM_CORE("cortex-m0plus",	cortexm0plus, cortexm0plus,	6M, FL_LDSCHED, v6m)
+
+/* V7 Architecture Processors */
+ARM_CORE("generic-armv7-a",	genericv7a, genericv7a,		7A,  FL_LDSCHED, cortex)
+ARM_CORE("cortex-a5",		cortexa5, cortexa5,		7A,  FL_LDSCHED, cortex_a5)
+ARM_CORE("cortex-a7",		cortexa7, cortexa7,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7)
+ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,  FL_LDSCHED, cortex)
+ARM_CORE("cortex-a9",		cortexa9, cortexa9,		7A,  FL_LDSCHED, cortex_a9)
+ARM_CORE("cortex-a12",	  	cortexa12, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
+ARM_CORE("cortex-a15",		cortexa15, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+ARM_CORE("cortex-r4",		cortexr4, cortexr4,		7R,  FL_LDSCHED, cortex)
+ARM_CORE("cortex-r4f",		cortexr4f, cortexr4f,		7R,  FL_LDSCHED, cortex)
+ARM_CORE("cortex-r5",		cortexr5, cortexr5,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
+ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
+ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, 9e)
+
+/* V7 big.LITTLE implementations */
+ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+
+/* V8 Architecture Processors */
+ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A, FL_LDSCHED | FL_CRC32, cortex_a53)
+ARM_CORE("cortex-a57",	cortexa57, cortexa15,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+
+/* V8 big.LITTLE implementations */
+ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
diff --git a/gcc-4.9/gcc/config/arm/arm-fixed.md b/gcc-4.9/gcc/config/arm/arm-fixed.md
new file mode 100644
index 000000000..4ab9d3597
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-fixed.md
@@ -0,0 +1,429 @@
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; This file contains ARM instructions that support fixed-point operations.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+		    (match_operand:FIXED 2 "s_register_operand" "l,r")))]
+  "TARGET_32BIT"
+  "add%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
+	(plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
+		     (match_operand:ADDSUB 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "sadd<qaddsub_suf>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "usadd<mode>3"
+  [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
+	(us_plus:UQADDSUB (match_operand:UQADDSUB 1 "s_register_operand" "r")
+			  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "ssadd<mode>3"
+  [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
+	(ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
+			 (match_operand:QADDSUB 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "qadd<qaddsub_suf>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+		     (match_operand:FIXED 2 "s_register_operand" "l,r")))]
+  "TARGET_32BIT"
+  "sub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
+	(minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
+		      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "ssub<qaddsub_suf>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "ussub<mode>3"
+  [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
+	(us_minus:UQADDSUB
+	  (match_operand:UQADDSUB 1 "s_register_operand" "r")
+	  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
+
+(define_insn "sssub<mode>3"
+  [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
+	(ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
+			  (match_operand:QADDSUB 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "qsub<qaddsub_suf>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
+
+;; Fractional multiplies.
+
+; Note: none of these do any rounding.
+
+(define_expand "mulqq3"
+  [(set (match_operand:QQ 0 "s_register_operand" "")
+	(mult:QQ (match_operand:QQ 1 "s_register_operand" "")
+		 (match_operand:QQ 2 "s_register_operand" "")))]
+  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
+{
+  rtx tmp1 = gen_reg_rtx (HImode);
+  rtx tmp2 = gen_reg_rtx (HImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_extendqihi2 (tmp1, gen_lowpart (QImode, operands[1])));
+  emit_insn (gen_extendqihi2 (tmp2, gen_lowpart (QImode, operands[2])));
+  emit_insn (gen_mulhisi3 (tmp3, tmp1, tmp2));
+  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp3, GEN_INT (8),
+		       GEN_INT (7)));
+  DONE;
+})
+
+(define_expand "mulhq3"
+  [(set (match_operand:HQ 0 "s_register_operand" "")
+	(mult:HQ (match_operand:HQ 1 "s_register_operand" "")
+		 (match_operand:HQ 2 "s_register_operand" "")))]
+  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+
+  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
+			   gen_lowpart (HImode, operands[2])));
+  /* We're doing a s.15 * s.15 multiplication, getting an s.30 result.  Extract
+     an s.15 value from that.  This won't overflow/saturate for _Fract
+     values.  */
+  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp,
+		       GEN_INT (16), GEN_INT (15)));
+  DONE;
+})
+
+(define_expand "mulsq3"
+  [(set (match_operand:SQ 0 "s_register_operand" "")
+	(mult:SQ (match_operand:SQ 1 "s_register_operand" "")
+		 (match_operand:SQ 2 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+{
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  /* s.31 * s.31 -> s.62 multiplication.  */
+  emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
+			   gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (31)));
+  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (1)));
+  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
+
+  DONE;
+})
+
+;; Accumulator multiplies.
+
+(define_expand "mulsa3"
+  [(set (match_operand:SA 0 "s_register_operand" "")
+	(mult:SA (match_operand:SA 1 "s_register_operand" "")
+		 (match_operand:SA 2 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+{
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
+			   gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (15)));
+  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (17)));
+  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
+
+  DONE;
+})
+
+(define_expand "mulusa3"
+  [(set (match_operand:USA 0 "s_register_operand" "")
+	(mult:USA (match_operand:USA 1 "s_register_operand" "")
+		  (match_operand:USA 2 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+{
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_umulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
+			    gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (16)));
+  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (16)));
+  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
+
+  DONE;
+})
+
+;; The code sequence emitted by this insn pattern uses the Q flag, which GCC
+;; doesn't generally know about, so we don't bother expanding to individual
+;; instructions.  It may be better to just use an out-of-line asm libcall for
+;; this.
+
+(define_insn "ssmulsa3"
+  [(set (match_operand:SA 0 "s_register_operand" "=r")
+	(ss_mult:SA (match_operand:SA 1 "s_register_operand" "r")
+		    (match_operand:SA 2 "s_register_operand" "r")))
+   (clobber (match_scratch:DI 3 "=r"))
+   (clobber (match_scratch:SI 4 "=r"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && arm_arch6"
+{
+  /* s16.15 * s16.15 -> s32.30.  */
+  output_asm_insn ("smull\\t%Q3, %R3, %1, %2", operands);
+
+  if (TARGET_ARM)
+    output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
+  else
+    {
+      output_asm_insn ("mov\\t%4, #0", operands);
+      output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
+    }
+
+  /* We have:
+      31  high word  0     31  low word  0
+
+    [ S i i .... i i i ] [ i f f f ... f f ]
+                        |
+			v
+	     [ S i ... i f ... f f ]
+
+    Need 16 integral bits, so saturate at 15th bit of high word.  */
+
+  output_asm_insn ("ssat\\t%R3, #15, %R3", operands);
+  output_asm_insn ("mrs\\t%4, APSR", operands);
+  output_asm_insn ("tst\\t%4, #1<<27", operands);
+  if (arm_restrict_it)
+    {
+      output_asm_insn ("mvn\\t%4, %R3, asr #32", operands);
+      output_asm_insn ("it\\tne", operands);
+      output_asm_insn ("movne\\t%Q3, %4", operands);
+    }
+  else
+    {
+      if (TARGET_THUMB2)
+        output_asm_insn ("it\\tne", operands);
+      output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands);
+    }
+  output_asm_insn ("mov\\t%0, %Q3, lsr #15", operands);
+  output_asm_insn ("orr\\t%0, %0, %R3, asl #17", operands);
+  return "";
+}
+  [(set_attr "conds" "clob")
+   (set_attr "type" "multiple")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (if_then_else (match_test "arm_restrict_it")
+		                    (const_int 40)
+		                    (const_int 38))
+		      (const_int 32)))])
+
+;; Same goes for this.
+
+(define_insn "usmulusa3"
+  [(set (match_operand:USA 0 "s_register_operand" "=r")
+	(us_mult:USA (match_operand:USA 1 "s_register_operand" "r")
+		     (match_operand:USA 2 "s_register_operand" "r")))
+   (clobber (match_scratch:DI 3 "=r"))
+   (clobber (match_scratch:SI 4 "=r"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && arm_arch6"
+{
+  /* 16.16 * 16.16 -> 32.32.  */
+  output_asm_insn ("umull\\t%Q3, %R3, %1, %2", operands);
+
+  if (TARGET_ARM)
+    output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
+  else
+    {
+      output_asm_insn ("mov\\t%4, #0", operands);
+      output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
+    }
+
+  /* We have:
+      31  high word  0     31  low word  0
+
+    [ i i i .... i i i ] [ f f f f ... f f ]
+                        |
+			v
+	     [ i i ... i f ... f f ]
+
+    Need 16 integral bits, so saturate at 16th bit of high word.  */
+
+  output_asm_insn ("usat\\t%R3, #16, %R3", operands);
+  output_asm_insn ("mrs\\t%4, APSR", operands);
+  output_asm_insn ("tst\\t%4, #1<<27", operands);
+  if (arm_restrict_it)
+    {
+      output_asm_insn ("sbfx\\t%4, %R3, #15, #1", operands);
+      output_asm_insn ("it\\tne", operands);
+      output_asm_insn ("movne\\t%Q3, %4", operands);
+    }
+  else
+    {
+      if (TARGET_THUMB2)
+        output_asm_insn ("it\\tne", operands);
+      output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands);
+    }
+  output_asm_insn ("lsr\\t%0, %Q3, #16", operands);
+  output_asm_insn ("orr\\t%0, %0, %R3, asl #16", operands);
+  return "";
+}
+  [(set_attr "conds" "clob")
+   (set_attr "type" "multiple")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (if_then_else (match_test "arm_restrict_it")
+		                    (const_int 40)
+		                    (const_int 38))
+		      (const_int 32)))])
+
+(define_expand "mulha3"
+  [(set (match_operand:HA 0 "s_register_operand" "")
+	(mult:HA (match_operand:HA 1 "s_register_operand" "")
+		 (match_operand:HA 2 "s_register_operand" "")))]
+  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+
+  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
+			   gen_lowpart (HImode, operands[2])));
+  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, GEN_INT (16),
+		       GEN_INT (7)));
+
+  DONE;
+})
+
+(define_expand "muluha3"
+  [(set (match_operand:UHA 0 "s_register_operand" "")
+	(mult:UHA (match_operand:UHA 1 "s_register_operand" "")
+		  (match_operand:UHA 2 "s_register_operand" "")))]
+  "TARGET_DSP_MULTIPLY"
+{
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  /* 8.8 * 8.8 -> 16.16 multiply.  */
+  emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
+  emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
+  emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
+  emit_insn (gen_extzv (gen_lowpart (SImode, operands[0]), tmp3,
+			GEN_INT (16), GEN_INT (8)));
+
+  DONE;
+})
+
+(define_expand "ssmulha3"
+  [(set (match_operand:HA 0 "s_register_operand" "")
+	(ss_mult:HA (match_operand:HA 1 "s_register_operand" "")
+		    (match_operand:HA 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_DSP_MULTIPLY && arm_arch6"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx rshift;
+
+  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
+			   gen_lowpart (HImode, operands[2])));
+
+  rshift = gen_rtx_ASHIFTRT (SImode, tmp, GEN_INT (7));
+
+  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (HImode, operands[0]),
+			  gen_rtx_SS_TRUNCATE (HImode, rshift)));
+
+  DONE;
+})
+
+(define_expand "usmuluha3"
+  [(set (match_operand:UHA 0 "s_register_operand" "")
+	(us_mult:UHA (match_operand:UHA 1 "s_register_operand" "")
+		     (match_operand:UHA 2 "s_register_operand" "")))]
+  "TARGET_INT_SIMD"
+{
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+  rtx rshift_tmp = gen_reg_rtx (SImode);
+
+  /* Note: there's no smul[bt][bt] equivalent for unsigned multiplies.  Use a
+     normal 32x32->32-bit multiply instead.  */
+  emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
+  emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
+
+  emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
+
+  /* The operand to "usat" is signed, so we cannot use the "..., asr #8"
+     form of that instruction since the multiplication result TMP3 may have the
+     top bit set, thus be negative and saturate to zero.  Use a separate
+     logical right-shift instead.  */
+  emit_insn (gen_lshrsi3 (rshift_tmp, tmp3, GEN_INT (8)));
+  emit_insn (gen_arm_usatsihi (gen_lowpart (HImode, operands[0]), rshift_tmp));
+
+  DONE;
+})
+
+(define_insn "arm_ssatsihi_shift"
+  [(set (match_operand:HI 0 "s_register_operand" "=r")
+	(ss_truncate:HI (match_operator:SI 1 "sat_shift_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r")
+			   (match_operand:SI 3 "immediate_operand" "I")])))]
+  "TARGET_32BIT && arm_arch6"
+  "ssat%?\\t%0, #16, %2%S1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "shift" "1")
+   (set_attr "type" "alu_shift_imm")])
+
+(define_insn "arm_usatsihi"
+  [(set (match_operand:HI 0 "s_register_operand" "=r")
+	(us_truncate:HI (match_operand:SI 1 "s_register_operand")))]
+  "TARGET_INT_SIMD"
+  "usat%?\\t%0, #16, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_imm")]
+)
diff --git a/gcc-4.9/gcc/config/arm/arm-fpus.def b/gcc-4.9/gcc/config/arm/arm-fpus.def
new file mode 100644
index 000000000..85d9693c1
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-fpus.def
@@ -0,0 +1,46 @@
+/* ARM FPU variants.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before using #include to read this file, define a macro:
+
+      ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO)
+
+   The arguments are the fields of struct arm_fpu_desc.
+
+   genopt.sh assumes no whitespace up to the first "," in each entry.  */
+
+ARM_FPU("vfp",		ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false, false)
+ARM_FPU("vfpv3",	ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false, false)
+ARM_FPU("vfpv3-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true, false)
+ARM_FPU("vfpv3-d16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false, false)
+ARM_FPU("vfpv3-d16-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true, false)
+ARM_FPU("vfpv3xd",	ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false, false)
+ARM_FPU("vfpv3xd-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true, false)
+ARM_FPU("neon",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false, false)
+ARM_FPU("neon-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true, true, false)
+ARM_FPU("vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true, false)
+ARM_FPU("vfpv4-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true, false)
+ARM_FPU("fpv4-sp-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true, false)
+ARM_FPU("neon-vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true, false)
+ARM_FPU("fp-armv8",	ARM_FP_MODEL_VFP, 8, VFP_REG_D32, false, true, false)
+ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, false)
+ARM_FPU("crypto-neon-fp-armv8",
+			ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, true)
+/* Compatibility aliases.  */
+ARM_FPU("vfp3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false, false)
diff --git a/gcc-4.9/gcc/config/arm/arm-generic.md b/gcc-4.9/gcc/config/arm/arm-generic.md
new file mode 100644
index 000000000..b26c72c44
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-generic.md
@@ -0,0 +1,152 @@
+;; Generic ARM Pipeline Description
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_automaton "arm")
+
+;; Write buffer
+;
+; Strictly, we should model a 4-deep write buffer for ARM7xx based chips
+;
+; The write buffer on some of the arm6 processors is hard to model exactly.
+; There is room in the buffer for up to two addresses and up to eight words
+; of memory, but the two needn't be split evenly.  When writing the two
+; addresses are fully pipelined.  However, a read from memory that is not
+; currently in the cache will block until the writes have completed.
+; It is normally the case that FCLK and MCLK will be in the ratio 2:1, so
+; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous
+; (they aren't allowed to be at present) then there is a startup cost of 1MCLK
+; cycle to add as well.
+(define_cpu_unit "write_buf" "arm")
+
+;; Write blockage unit
+;
+; The write_blockage unit models (partially), the fact that reads will stall
+; until the write buffer empties.
+(define_cpu_unit "write_blockage" "arm")
+
+;; Core
+;
+(define_cpu_unit "core" "arm")
+
+(define_insn_reservation "store_wbuf" 5
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+       	    (eq_attr "type" "store1")))
+  "core+write_buf*3+write_blockage*5")
+
+(define_insn_reservation "store2_wbuf" 7
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+	    (eq_attr "type" "store2")))
+  "core+write_buf*4+write_blockage*7")
+
+(define_insn_reservation "store3_wbuf" 9
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+	    (eq_attr "type" "store3")))
+  "core+write_buf*5+write_blockage*9")
+
+(define_insn_reservation "store4_wbuf" 11
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+            (eq_attr "type" "store4")))
+  "core+write_buf*6+write_blockage*11")
+
+(define_insn_reservation "store2" 3
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "no")
+            (eq_attr "type" "store2")))
+  "core*3")
+
+(define_insn_reservation "store3" 4
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "no")
+            (eq_attr "type" "store3")))
+  "core*4")
+
+(define_insn_reservation "store4" 5
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "no")
+	    (eq_attr "type" "store4")))
+  "core*5")
+
+(define_insn_reservation "store_ldsched" 1
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (eq_attr "type" "store1")))
+  "core")
+
+(define_insn_reservation "load_ldsched_xscale" 3
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "type" "load_byte,load1")
+	         (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
+  "core")
+
+(define_insn_reservation "load_ldsched" 2
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "type" "load_byte,load1")
+	         (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
+  "core")
+
+(define_insn_reservation "load_or_store" 2
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "!yes") 
+	    (eq_attr "type" "load_byte,load1,load2,load3,load4,store1")))
+  "core*2")
+
+(define_insn_reservation "mult" 16
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "no")
+	    (ior (eq_attr "mul32" "yes")
+		 (eq_attr "mul64" "yes"))))
+  "core*16")
+
+(define_insn_reservation "mult_ldsched_strongarm" 3
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "tune"
+		  "strongarm,strongarm110,strongarm1100,strongarm1110")
+		 (ior (eq_attr "mul32" "yes")
+		      (eq_attr "mul64" "yes")))))
+  "core*2")
+
+(define_insn_reservation "mult_ldsched" 4
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "tune"
+		  "!strongarm,strongarm110,strongarm1100,strongarm1110")
+	         (ior (eq_attr "mul32" "yes")
+		      (eq_attr "mul64" "yes")))))
+  "core*4")
+
+(define_insn_reservation "multi_cycle" 32
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "core_cycles" "multi")
+            (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\
+                                  store1,store2,store3,store4")
+		 (not (ior (eq_attr "mul32" "yes")
+			   (eq_attr "mul64" "yes"))))))
+  "core*32")
+
+(define_insn_reservation "single_cycle" 1
+  (and (eq_attr "generic_sched" "yes")
+       (eq_attr "core_cycles" "single"))
+  "core")
diff --git a/gcc-4.9/gcc/config/arm/arm-ldmstm.ml b/gcc-4.9/gcc/config/arm/arm-ldmstm.ml
new file mode 100644
index 000000000..2d8f9e267
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-ldmstm.ml
@@ -0,0 +1,345 @@
+(* Auto-generate ARM ldm/stm patterns
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Run with:
+     ocaml arm-ldmstm.ml >/path/to/gcc/config/arm/ldmstm.md
+*)
+
+type amode = IA | IB | DA | DB
+
+type optype = IN | OUT | INOUT
+
+let rec string_of_addrmode addrmode =
+  match addrmode with
+    IA -> "ia" | IB -> "ib" | DA -> "da" | DB -> "db"
+
+let rec initial_offset addrmode nregs =
+  match addrmode with
+    IA -> 0
+  | IB -> 4
+  | DA -> -4 * nregs + 4
+  | DB -> -4 * nregs
+
+let rec final_offset addrmode nregs =
+  match addrmode with
+    IA -> nregs * 4
+  | IB -> nregs * 4
+  | DA -> -4 * nregs
+  | DB -> -4 * nregs
+
+let constr thumb =
+  if thumb then "l" else "rk"
+
+let inout_constr op_type =
+  match op_type with
+  OUT -> "=&"
+  | INOUT -> "+&"
+  | IN -> ""
+
+let destreg nregs first op_type thumb =
+  if not first then
+    Printf.sprintf "(match_dup %d)" (nregs + 1)
+  else
+    Printf.sprintf ("(match_operand:SI %d \"s_register_operand\" \"%s%s\")")
+      (nregs + 1) (inout_constr op_type) (constr thumb)
+
+let reg_predicate thumb =
+  if thumb then "low_register_operand" else "arm_hard_general_register_operand"
+
+let write_ldm_set thumb nregs offset opnr first =
+  let indent = "     " in
+  Printf.printf "%s" (if first then "    [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"%s\" \"\")\n" opnr (reg_predicate thumb);
+  Printf.printf "%s     (mem:SI " indent;
+  begin if offset != 0 then Printf.printf "(plus:SI " end;
+  Printf.printf "%s" (destreg nregs first IN thumb);
+  begin if offset != 0 then Printf.printf "\n%s             (const_int %d))" indent offset end;
+  Printf.printf "))"
+
+let write_stm_set thumb nregs offset opnr first =
+  let indent = "     " in
+  Printf.printf "%s" (if first then "    [" else indent);
+  Printf.printf "(set (mem:SI ";
+  begin if offset != 0 then Printf.printf "(plus:SI " end;
+  Printf.printf "%s" (destreg nregs first IN thumb);
+  begin if offset != 0 then Printf.printf " (const_int %d))" offset end;
+  Printf.printf ")\n%s     (match_operand:SI %d \"%s\" \"\"))" indent opnr (reg_predicate thumb)
+
+let write_ldm_peep_set extra_indent nregs opnr first =
+  let indent = "   " ^ extra_indent in
+  Printf.printf "%s" (if first then extra_indent ^ "  [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr;
+  Printf.printf "%s     (match_operand:SI %d \"memory_operand\" \"\"))" indent (nregs + opnr)
+
+let write_stm_peep_set extra_indent nregs opnr first =
+  let indent = "   " ^ extra_indent in
+  Printf.printf "%s" (if first then extra_indent ^ "  [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"memory_operand\" \"\")\n" (nregs + opnr);
+  Printf.printf "%s     (match_operand:SI %d \"s_register_operand\" \"\"))" indent opnr
+
+let write_any_load optype nregs opnr first =
+  let indent = "   " in
+  Printf.printf "%s" (if first then "  [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr;
+  Printf.printf "%s     (match_operand:SI %d \"%s\" \"\"))" indent (nregs * 2 + opnr) optype
+
+let write_const_store nregs opnr first =
+  let indent = "   " in
+  Printf.printf "%s(set (match_operand:SI %d \"memory_operand\" \"\")\n" indent (nregs + opnr);
+  Printf.printf "%s     (match_dup %d))" indent opnr
+
+let write_const_stm_peep_set nregs opnr first =
+  write_any_load "const_int_operand" nregs opnr first;
+  Printf.printf "\n";
+  write_const_store nregs opnr false
+
+  
+let rec write_pat_sets func opnr offset first n_left =
+  func offset opnr first;
+  begin
+    if n_left > 1 then begin
+      Printf.printf "\n";
+      write_pat_sets func (opnr + 1) (offset + 4) false (n_left - 1);
+    end else
+      Printf.printf "]"
+  end
+
+let rec write_peep_sets func opnr first n_left =
+  func opnr first;
+  begin
+    if n_left > 1 then begin
+      Printf.printf "\n";
+      write_peep_sets func (opnr + 1) false (n_left - 1);
+    end
+  end
+    
+let can_thumb addrmode update is_store =
+  match addrmode, update, is_store with
+    (* Thumb1 mode only supports IA with update.  However, for LDMIA,
+       if the address register also appears in the list of loaded
+       registers, the loaded value is stored, hence the RTL pattern
+       to describe such an insn does not have an update.  We check
+       in the match_parallel predicate that the condition described
+       above is met.  *)
+    IA, _, false -> true
+  | IA, true, true -> true
+  | _ -> false
+
+exception InvalidAddrMode of string;;
+
+let target addrmode thumb =
+  match addrmode, thumb with
+    IA, true -> "TARGET_THUMB1"
+  | IA, false -> "TARGET_32BIT"
+  | DB, false -> "TARGET_32BIT"
+  | _, false -> "TARGET_ARM"
+  | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.")
+
+let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
+  let astr = string_of_addrmode addrmode in
+  Printf.printf "(define_insn \"*%s%s%d_%s%s\"\n"
+    (if thumb then "thumb_" else "") name nregs astr
+    (if update then "_update" else "");
+  Printf.printf "  [(match_parallel 0 \"%s_multiple_operation\"\n" ls;
+  begin
+    if update then begin
+      Printf.printf "    [(set %s\n          (plus:SI %s"
+	(destreg nregs true INOUT thumb) (destreg nregs false IN thumb);
+      Printf.printf " (const_int %d)))\n"
+	(final_offset addrmode nregs)
+    end
+  end;
+  write_pat_sets
+    (write_set_fn thumb nregs) 1
+    (initial_offset addrmode nregs)
+    (not update) nregs;
+  Printf.printf ")]\n  \"%s && XVECLEN (operands[0], 0) == %d\"\n"
+    (target addrmode thumb)
+    (if update then nregs + 1 else nregs);
+  Printf.printf "  \"%s%%(%s%%)\\t%%%d%s, {"
+    name astr (nregs + 1) (if update then "!" else "");
+  for n = 1 to nregs; do
+    Printf.printf "%%%d%s" n (if n < nregs then ", " else "")
+  done;
+  Printf.printf "}\"\n";
+  Printf.printf "  [(set_attr \"type\" \"%s%d\")" ls nregs;
+  if not thumb then begin
+    Printf.printf "\n   (set_attr \"predicable\" \"yes\")";
+    if addrmode == IA || addrmode == DB then
+      Printf.printf "\n   (set_attr \"predicable_short_it\" \"no\")";
+  end;
+  Printf.printf "])\n\n"
+
+let write_ldm_pattern addrmode nregs update =
+  write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update false;
+  begin if can_thumb addrmode update false then
+    write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update true;
+  end
+
+let write_stm_pattern addrmode nregs update =
+  write_pattern_1 "stm" "store" addrmode nregs write_stm_set update false;
+  begin if can_thumb addrmode update true then
+    write_pattern_1 "stm" "store" addrmode nregs write_stm_set update true;
+  end
+
+let write_ldm_commutative_peephole thumb =
+  let nregs = 2 in
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs;
+  let indent = "   " in
+  if thumb then begin
+    Printf.printf "\n%s(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2);
+    Printf.printf "%s     (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1);
+    Printf.printf "%s      [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2);
+    Printf.printf "%s       (match_operand:SI %d \"s_register_operand\" \"\")]))]\n" indent (nregs * 2 + 3)
+  end else begin
+    Printf.printf "\n%s(parallel\n" indent;
+    Printf.printf "%s  [(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2);
+    Printf.printf "%s        (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1);
+    Printf.printf "%s         [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2);
+    Printf.printf "%s          (match_operand:SI %d \"s_register_operand\" \"\")]))\n" indent (nregs * 2 + 3);
+    Printf.printf "%s   (clobber (reg:CC CC_REGNUM))])]\n" indent
+  end;
+  Printf.printf "  \"((((REGNO (operands[%d]) == REGNO (operands[0]))\n" (nregs * 2 + 2);
+  Printf.printf "         && (REGNO (operands[%d]) == REGNO (operands[1])))\n"  (nregs * 2 + 3);
+  Printf.printf "      || ((REGNO (operands[%d]) == REGNO (operands[0]))\n" (nregs * 2 + 3);
+  Printf.printf "         && (REGNO (operands[%d]) == REGNO (operands[1]))))\n" (nregs * 2 + 2);
+  Printf.printf "    && (peep2_regno_dead_p (%d, REGNO (operands[0]))\n" (nregs + 1);
+  Printf.printf "      || (REGNO (operands[0]) == REGNO (operands[%d])))\n"  (nregs * 2);
+  Printf.printf "    && (peep2_regno_dead_p (%d, REGNO (operands[1]))\n" (nregs + 1);
+  Printf.printf "      || (REGNO (operands[1]) == REGNO (operands[%d]))))\"\n" (nregs * 2);
+  begin
+    if thumb then
+      Printf.printf "  [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))]\n"
+	(nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3)
+    else begin
+      Printf.printf "  [(parallel\n";
+      Printf.printf "    [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))\n"
+	(nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3);
+      Printf.printf "     (clobber (reg:CC CC_REGNUM))])]\n"
+    end
+  end;
+  Printf.printf "{\n  if (!gen_ldm_seq (operands, %d, true))\n    FAIL;\n" nregs;
+  Printf.printf "})\n\n"
+
+let write_ldm_peephole nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_ldm_seq (operands, %d, false))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let write_ldm_peephole_b nregs =
+  if nregs > 2 then begin
+    Printf.printf "(define_peephole2\n";
+    write_ldm_peep_set "" nregs 0 true;
+    Printf.printf "\n   (parallel\n";
+    write_peep_sets (write_ldm_peep_set "  " nregs) 1 true (nregs - 1);
+    Printf.printf "])]\n  \"\"\n  [(const_int 0)]\n{\n";
+    Printf.printf "  if (gen_ldm_seq (operands, %d, false))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+  end
+
+let write_stm_peephole nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_stm_peep_set "" nregs) 0 true nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let write_stm_peephole_b nregs =
+  if nregs > 2 then begin
+    Printf.printf "(define_peephole2\n";
+    write_stm_peep_set "" nregs 0 true;
+    Printf.printf "\n   (parallel\n";
+    write_peep_sets (write_stm_peep_set "" nregs) 1 true (nregs - 1);
+    Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+    Printf.printf "  if (gen_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+  end
+
+let write_const_stm_peephole_a nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_const_stm_peep_set nregs) 0 true nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_const_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let write_const_stm_peephole_b nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_any_load "const_int_operand" nregs) 0 true nregs;
+  Printf.printf "\n";
+  write_peep_sets (write_const_store nregs) 0 false nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_const_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let patterns () =
+  let addrmodes = [ IA; IB; DA; DB ]  in
+  let sizes = [ 4; 3; 2] in
+  List.iter
+    (fun n ->
+      List.iter
+	(fun addrmode ->
+	  write_ldm_pattern addrmode n false;
+	  write_ldm_pattern addrmode n true;
+	  write_stm_pattern addrmode n false;
+	  write_stm_pattern addrmode n true)
+	addrmodes;
+      write_ldm_peephole n;
+      write_ldm_peephole_b n;
+      write_const_stm_peephole_a n;
+      write_const_stm_peephole_b n;
+      write_stm_peephole n;)
+    sizes;
+  write_ldm_commutative_peephole false;
+  write_ldm_commutative_peephole true
+
+let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
+
+(* Do it.  *)
+
+let _ = 
+  print_lines [
+"/* ARM ldm/stm instruction patterns.  This file was automatically generated";
+"   using arm-ldmstm.ml.  Please do not edit manually.";
+"";
+"   Copyright (C) 2010-2014 Free Software Foundation, Inc.";
+"   Contributed by CodeSourcery.";
+"";
+"   This file is part of GCC.";
+"";
+"   GCC is free software; you can redistribute it and/or modify it";
+"   under the terms of the GNU General Public License as published";
+"   by the Free Software Foundation; either version 3, or (at your";
+"   option) any later version.";
+"";
+"   GCC is distributed in the hope that it will be useful, but WITHOUT";
+"   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
+"   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public";
+"   License for more details.";
+"";
+"   You should have received a copy of the GNU General Public License and";
+"   a copy of the GCC Runtime Library Exception along with this program;";
+"   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see";
+"   <http://www.gnu.org/licenses/>.  */";
+""];
+  patterns ();
diff --git a/gcc-4.9/gcc/config/arm/arm-modes.def b/gcc-4.9/gcc/config/arm/arm-modes.def
new file mode 100644
index 000000000..882aa55c5
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-modes.def
@@ -0,0 +1,84 @@
+/* Definitions of target machine for GNU compiler, for ARM.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+   and Martin Simmons (@harleqn.co.uk).
+   More major hacks by Richard Earnshaw (rearnsha@arm.com)
+   Minor hacks by Nick Clifton (nickc@cygnus.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Extended precision floating point.
+   FIXME What format is this?  */
+FLOAT_MODE (XF, 12, 0);
+
+/* Half-precision floating point */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
+			  ? &arm_half_format : &ieee_half_format));
+
+/* CCFPEmode should be used with floating inequalities,
+   CCFPmode should be used with floating equalities.
+   CC_NOOVmode should be used with SImode integer equalities.
+   CC_Zmode should be used if only the Z flag is set correctly
+   CC_Cmode should be used if only the C flag is set correctly, after an
+     addition.
+   CC_Nmode should be used if only the N (sign) flag is set correctly
+   CC_CZmode should be used if only the C and Z flags are correct
+   (used for DImode unsigned comparisons).
+   CC_NCVmode should be used if only the N, C, and V flags are correct
+   (used for DImode signed comparisons).
+   CCmode should be used otherwise.  */
+
+CC_MODE (CC_NOOV);
+CC_MODE (CC_Z);
+CC_MODE (CC_CZ);
+CC_MODE (CC_NCV);
+CC_MODE (CC_SWP);
+CC_MODE (CCFP);
+CC_MODE (CCFPE);
+CC_MODE (CC_DNE);
+CC_MODE (CC_DEQ);
+CC_MODE (CC_DLE);
+CC_MODE (CC_DLT);
+CC_MODE (CC_DGE);
+CC_MODE (CC_DGT);
+CC_MODE (CC_DLEU);
+CC_MODE (CC_DLTU);
+CC_MODE (CC_DGEU);
+CC_MODE (CC_DGTU);
+CC_MODE (CC_C);
+CC_MODE (CC_N);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+
+/* Fraction and accumulator vector modes.  */
+VECTOR_MODES (FRACT, 4);      /* V4QQ  V2HQ */
+VECTOR_MODES (UFRACT, 4);     /* V4UQQ V2UHQ */
+VECTOR_MODES (ACCUM, 4);      /*       V2HA */
+VECTOR_MODES (UACCUM, 4);     /*       V2UHA */
+
+/* Opaque integer modes for 3, 4, 6 or 8 Neon double registers (2 is
+   TImode).  */
+INT_MODE (EI, 24);
+INT_MODE (OI, 32);
+INT_MODE (CI, 48);
+INT_MODE (XI, 64);
diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h
new file mode 100644
index 000000000..a8393975a
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-opts.h
@@ -0,0 +1,75 @@
+/* Definitions for option handling for ARM.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef ARM_OPTS_H
+#define ARM_OPTS_H
+
+/* The various ARM cores.  */
+enum processor_type
+{
+#undef ARM_CORE
+#define ARM_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \
+  INTERNAL_IDENT,
+#include "arm-cores.def"
+#undef ARM_CORE
+  /* Used to indicate that no processor has been specified.  */
+  arm_none
+};
+
+/* Which __fp16 format to use.
+   The enumeration values correspond to the numbering for the
+   Tag_ABI_FP_16bit_format attribute.
+ */
+enum arm_fp16_format_type
+{
+  ARM_FP16_FORMAT_NONE = 0,
+  ARM_FP16_FORMAT_IEEE = 1,
+  ARM_FP16_FORMAT_ALTERNATIVE = 2
+};
+
+/* Which ABI to use.  */
+enum arm_abi_type
+{
+  ARM_ABI_APCS,
+  ARM_ABI_ATPCS,
+  ARM_ABI_AAPCS,
+  ARM_ABI_IWMMXT,
+  ARM_ABI_AAPCS_LINUX
+};
+
+enum float_abi_type
+{
+  ARM_FLOAT_ABI_SOFT,
+  ARM_FLOAT_ABI_SOFTFP,
+  ARM_FLOAT_ABI_HARD
+};
+
+/* Which thread pointer access sequence to use.  */
+enum arm_tp_type {
+  TP_AUTO,
+  TP_SOFT,
+  TP_CP15
+};
+
+/* Which TLS scheme to use.  */
+enum arm_tls_type {
+  TLS_GNU,
+  TLS_GNU2
+};
+#endif
diff --git a/gcc-4.9/gcc/config/arm/arm-protos.h b/gcc-4.9/gcc/config/arm/arm-protos.h
new file mode 100644
index 000000000..13874ee6e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-protos.h
@@ -0,0 +1,297 @@
+/* Prototypes for exported functions defined in arm.c and pe.c
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Richard Earnshaw (rearnsha@arm.com)
+   Minor hacks by Nick Clifton (nickc@cygnus.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_PROTOS_H
+#define GCC_ARM_PROTOS_H
+
+extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
+extern int use_return_insn (int, rtx);
+extern bool use_simple_return_p (void);
+extern enum reg_class arm_regno_class (int);
+extern void arm_load_pic_register (unsigned long);
+extern int arm_volatile_func (void);
+extern void arm_expand_prologue (void);
+extern void arm_expand_epilogue (bool);
+extern void thumb2_expand_return (bool);
+extern const char *arm_strip_name_encoding (const char *);
+extern void arm_asm_output_labelref (FILE *, const char *);
+extern void thumb2_asm_output_opcode (FILE *);
+extern unsigned long arm_current_func_type (void);
+extern HOST_WIDE_INT arm_compute_initial_elimination_offset (unsigned int,
+							     unsigned int);
+extern HOST_WIDE_INT thumb_compute_initial_elimination_offset (unsigned int,
+							       unsigned int);
+extern unsigned int arm_dbx_register_number (unsigned int);
+extern void arm_output_fn_unwind (FILE *, bool);
+  
+
+#ifdef RTX_CODE
+extern bool arm_vector_mode_supported_p (enum machine_mode);
+extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
+extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern int const_ok_for_arm (HOST_WIDE_INT);
+extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
+extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
+			       HOST_WIDE_INT, rtx, rtx, int);
+extern int legitimate_pic_operand_p (rtx);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern rtx legitimize_tls_address (rtx, rtx);
+extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int);
+extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT);
+extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int,
+					   int);
+extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
+					    int);
+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
+extern bool ldm_stm_operation_p (rtx, bool, enum machine_mode mode,
+                                 bool, bool);
+extern int arm_const_double_rtx (rtx);
+extern int vfp3_const_double_rtx (rtx);
+extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
+extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
+					   int *);
+extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *,
+					   int *, bool);
+extern char *neon_output_logic_immediate (const char *, rtx *,
+					  enum machine_mode, int, int);
+extern char *neon_output_shift_immediate (const char *, char, rtx *,
+					  enum machine_mode, int, bool);
+extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
+				  rtx (*) (rtx, rtx, rtx));
+extern rtx neon_make_constant (rtx);
+extern tree arm_builtin_vectorized_function (tree, tree, tree);
+extern void neon_expand_vector_init (rtx, rtx);
+extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern HOST_WIDE_INT neon_element_bits (enum machine_mode);
+extern void neon_reinterpret (rtx, rtx);
+extern void neon_emit_pair_result_insn (enum machine_mode,
+					rtx (*) (rtx, rtx, rtx, rtx),
+					rtx, rtx, rtx);
+extern void neon_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int);
+extern void neon_split_vcombine (rtx op[3]);
+extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
+						     bool);
+extern bool arm_tls_referenced_p (rtx);
+
+extern int arm_coproc_mem_operand (rtx, bool);
+extern int neon_vector_mem_operand (rtx, int, bool);
+extern int neon_struct_mem_operand (rtx);
+
+extern int tls_mentioned_p (rtx);
+extern int symbol_mentioned_p (rtx);
+extern int label_mentioned_p (rtx);
+extern RTX_CODE minmax_code (rtx);
+extern bool arm_sat_operator_match (rtx, rtx, int *, bool *);
+extern int adjacent_mem_locations (rtx, rtx);
+extern bool gen_ldm_seq (rtx *, int, bool);
+extern bool gen_stm_seq (rtx *, int);
+extern bool gen_const_stm_seq (rtx *, int);
+extern rtx arm_gen_load_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
+extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
+extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
+extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
+extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
+extern int arm_gen_movmemqi (rtx *);
+extern bool gen_movmem_ldrd_strd (rtx *);
+extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
+extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
+						       HOST_WIDE_INT);
+extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx);
+extern rtx arm_gen_return_addr_mask (void);
+extern void arm_reload_in_hi (rtx *);
+extern void arm_reload_out_hi (rtx *);
+extern int arm_max_const_double_inline_cost (void);
+extern int arm_const_double_inline_cost (rtx);
+extern bool arm_const_double_by_parts (rtx);
+extern bool arm_const_double_by_immediates (rtx);
+extern const char *fp_immediate_constant (rtx);
+extern void arm_emit_call_insn (rtx, rtx);
+extern const char *output_call (rtx *);
+extern const char *output_call_mem (rtx *);
+void arm_emit_movpair (rtx, rtx);
+extern const char *output_mov_long_double_arm_from_arm (rtx *);
+extern const char *output_move_double (rtx *, bool, int *count);
+extern const char *output_move_quad (rtx *);
+extern int arm_count_output_move_double_insns (rtx *);
+extern const char *output_move_vfp (rtx *operands);
+extern const char *output_move_neon (rtx *operands);
+extern int arm_attr_length_move_neon (rtx);
+extern int arm_address_offset_is_imm (rtx);
+extern const char *output_add_immediate (rtx *);
+extern const char *arithmetic_instr (rtx, int);
+extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
+extern const char *output_return_instruction (rtx, bool, bool, bool);
+extern void arm_poke_function_name (FILE *, const char *);
+extern void arm_final_prescan_insn (rtx);
+extern int arm_debugger_arg_offset (int, rtx);
+extern bool arm_is_long_call_p (tree);
+extern int    arm_emit_vector_const (FILE *, rtx);
+extern void arm_emit_fp16_const (rtx c);
+extern const char * arm_output_load_gr (rtx *);
+extern const char *vfp_output_fstmd (rtx *);
+extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
+extern void arm_set_return_address (rtx, rtx);
+extern int arm_eliminable_register (rtx);
+extern const char *arm_output_shift(rtx *, int);
+extern const char *arm_output_iwmmxt_shift_immediate (const char *, rtx *, bool);
+extern const char *arm_output_iwmmxt_tinsr (rtx *);
+extern unsigned int arm_sync_loop_insns (rtx , rtx *);
+extern int arm_attr_length_push_multi(rtx, rtx);
+extern void arm_expand_compare_and_swap (rtx op[]);
+extern void arm_split_compare_and_swap (rtx op[]);
+extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
+extern rtx arm_load_tp (rtx);
+
+#if defined TREE_CODE
+extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+extern bool arm_pad_arg_upward (enum machine_mode, const_tree);
+extern bool arm_pad_reg_upward (enum machine_mode, tree, int);
+#endif
+extern int arm_apply_result_size (void);
+
+#endif /* RTX_CODE */
+
+/* Thumb functions.  */
+extern void arm_init_expanders (void);
+extern const char *thumb1_unexpanded_epilogue (void);
+extern void thumb1_expand_prologue (void);
+extern void thumb1_expand_epilogue (void);
+extern const char *thumb1_output_interwork (void);
+#ifdef TREE_CODE
+extern int is_called_in_ARM_mode (tree);
+#endif
+extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
+#ifdef RTX_CODE
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
+extern void thumb1_final_prescan_insn (rtx);
+extern void thumb2_final_prescan_insn (rtx);
+extern const char *thumb_load_double_from_address (rtx *);
+extern const char *thumb_output_move_mem_multiple (int, rtx *);
+extern const char *thumb_call_via_reg (rtx);
+extern void thumb_expand_movmemqi (rtx *);
+extern rtx arm_return_addr (int, rtx);
+extern void thumb_reload_out_hi (rtx *);
+extern void thumb_reload_in_hi (rtx *);
+extern void thumb_set_return_address (rtx, rtx);
+extern const char *thumb1_output_casesi (rtx *);
+extern const char *thumb2_output_casesi (rtx *);
+#endif
+
+/* Defined in pe.c.  */
+extern int arm_dllexport_name_p (const char *);
+extern int arm_dllimport_name_p (const char *);
+
+#ifdef TREE_CODE
+extern void arm_pe_unique_section (tree, int);
+extern void arm_pe_encode_section_info (tree, rtx, int);
+extern int arm_dllexport_p (tree);
+extern int arm_dllimport_p (tree);
+extern void arm_mark_dllexport (tree);
+extern void arm_mark_dllimport (tree);
+#endif
+
+extern void arm_pr_long_calls (struct cpp_reader *);
+extern void arm_pr_no_long_calls (struct cpp_reader *);
+extern void arm_pr_long_calls_off (struct cpp_reader *);
+
+extern void arm_lang_object_attributes_init(void);
+
+extern const char *arm_mangle_type (const_tree);
+
+extern void arm_order_regs_for_local_alloc (void);
+
+extern int arm_max_conditional_execute ();
+
+/* Vectorizer cost model implementation.  */
+struct cpu_vec_costs {
+  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
+				   load and store.  */
+  const int scalar_load_cost;   /* Cost of scalar load.  */
+  const int scalar_store_cost;  /* Cost of scalar store.  */
+  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
+                                   load, store, vector-to-scalar and
+                                   scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
+  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
+  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
+  const int vec_unalign_store_cost; /* Cost of unaligned vector load.  */
+  const int vec_store_cost;        /* Cost of vector store.  */
+  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
+					  cost model.  */
+  const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+					  vectorizer cost model.  */
+};
+
+#ifdef RTX_CODE
+/* This needs to be here because we need RTX_CODE and similar.  */
+
+struct cpu_cost_table;
+
+struct tune_params
+{
+  bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
+  const struct cpu_cost_table *insn_extra_cost;
+  bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
+  int constant_limit;
+  /* Maximum number of instructions to conditionalise.  */
+  int max_insns_skipped;
+  int num_prefetch_slots;
+  int l1_cache_size;
+  int l1_cache_line_size;
+  bool prefer_constant_pool;
+  int (*branch_cost) (bool, bool);
+  /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM.  */
+  bool prefer_ldrd_strd;
+  /* The preference for non short cirtcuit operation when optimizing for
+     performance. The first element covers Thumb state and the second one
+     is for ARM state.  */
+  bool logical_op_non_short_circuit[2];
+  /* Vectorizer costs.  */
+  const struct cpu_vec_costs* vec_costs;
+  /* Prefer Neon for 64-bit bitops.  */
+  bool prefer_neon_for_64bits;
+};
+
+extern const struct tune_params *current_tune;
+extern int vfp3_const_double_for_fract_bits (rtx);
+/* return power of two from operand, otherwise 0.  */
+extern int vfp3_const_double_for_bits (rtx);
+
+extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
+					   rtx);
+extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+#endif /* RTX_CODE */
+
+extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
+extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+
+extern bool arm_autoinc_modes_ok_p (enum machine_mode, enum arm_auto_incmodes);
+
+extern void arm_emit_eabi_attribute (const char *, int, int);
+
+/* Defined in gcc/common/config/arm-common.c.  */
+extern const char *arm_rewrite_selected_cpu (const char *name);
+
+#endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/arm/arm-tables.opt b/gcc-4.9/gcc/config/arm/arm-tables.opt
new file mode 100644
index 000000000..bc046a0de
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-tables.opt
@@ -0,0 +1,439 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from arm-cores.def, arm-arches.def
+; and arm-fpus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(processor_type) Type(enum processor_type)
+Known ARM CPUs (for use with the -mcpu= and -mtune= options):
+
+EnumValue
+Enum(processor_type) String(arm2) Value(arm2)
+
+EnumValue
+Enum(processor_type) String(arm250) Value(arm250)
+
+EnumValue
+Enum(processor_type) String(arm3) Value(arm3)
+
+EnumValue
+Enum(processor_type) String(arm6) Value(arm6)
+
+EnumValue
+Enum(processor_type) String(arm60) Value(arm60)
+
+EnumValue
+Enum(processor_type) String(arm600) Value(arm600)
+
+EnumValue
+Enum(processor_type) String(arm610) Value(arm610)
+
+EnumValue
+Enum(processor_type) String(arm620) Value(arm620)
+
+EnumValue
+Enum(processor_type) String(arm7) Value(arm7)
+
+EnumValue
+Enum(processor_type) String(arm7d) Value(arm7d)
+
+EnumValue
+Enum(processor_type) String(arm7di) Value(arm7di)
+
+EnumValue
+Enum(processor_type) String(arm70) Value(arm70)
+
+EnumValue
+Enum(processor_type) String(arm700) Value(arm700)
+
+EnumValue
+Enum(processor_type) String(arm700i) Value(arm700i)
+
+EnumValue
+Enum(processor_type) String(arm710) Value(arm710)
+
+EnumValue
+Enum(processor_type) String(arm720) Value(arm720)
+
+EnumValue
+Enum(processor_type) String(arm710c) Value(arm710c)
+
+EnumValue
+Enum(processor_type) String(arm7100) Value(arm7100)
+
+EnumValue
+Enum(processor_type) String(arm7500) Value(arm7500)
+
+EnumValue
+Enum(processor_type) String(arm7500fe) Value(arm7500fe)
+
+EnumValue
+Enum(processor_type) String(arm7m) Value(arm7m)
+
+EnumValue
+Enum(processor_type) String(arm7dm) Value(arm7dm)
+
+EnumValue
+Enum(processor_type) String(arm7dmi) Value(arm7dmi)
+
+EnumValue
+Enum(processor_type) String(arm8) Value(arm8)
+
+EnumValue
+Enum(processor_type) String(arm810) Value(arm810)
+
+EnumValue
+Enum(processor_type) String(strongarm) Value(strongarm)
+
+EnumValue
+Enum(processor_type) String(strongarm110) Value(strongarm110)
+
+EnumValue
+Enum(processor_type) String(strongarm1100) Value(strongarm1100)
+
+EnumValue
+Enum(processor_type) String(strongarm1110) Value(strongarm1110)
+
+EnumValue
+Enum(processor_type) String(fa526) Value(fa526)
+
+EnumValue
+Enum(processor_type) String(fa626) Value(fa626)
+
+EnumValue
+Enum(processor_type) String(arm7tdmi) Value(arm7tdmi)
+
+EnumValue
+Enum(processor_type) String(arm7tdmi-s) Value(arm7tdmis)
+
+EnumValue
+Enum(processor_type) String(arm710t) Value(arm710t)
+
+EnumValue
+Enum(processor_type) String(arm720t) Value(arm720t)
+
+EnumValue
+Enum(processor_type) String(arm740t) Value(arm740t)
+
+EnumValue
+Enum(processor_type) String(arm9) Value(arm9)
+
+EnumValue
+Enum(processor_type) String(arm9tdmi) Value(arm9tdmi)
+
+EnumValue
+Enum(processor_type) String(arm920) Value(arm920)
+
+EnumValue
+Enum(processor_type) String(arm920t) Value(arm920t)
+
+EnumValue
+Enum(processor_type) String(arm922t) Value(arm922t)
+
+EnumValue
+Enum(processor_type) String(arm940t) Value(arm940t)
+
+EnumValue
+Enum(processor_type) String(ep9312) Value(ep9312)
+
+EnumValue
+Enum(processor_type) String(arm10tdmi) Value(arm10tdmi)
+
+EnumValue
+Enum(processor_type) String(arm1020t) Value(arm1020t)
+
+EnumValue
+Enum(processor_type) String(arm9e) Value(arm9e)
+
+EnumValue
+Enum(processor_type) String(arm946e-s) Value(arm946es)
+
+EnumValue
+Enum(processor_type) String(arm966e-s) Value(arm966es)
+
+EnumValue
+Enum(processor_type) String(arm968e-s) Value(arm968es)
+
+EnumValue
+Enum(processor_type) String(arm10e) Value(arm10e)
+
+EnumValue
+Enum(processor_type) String(arm1020e) Value(arm1020e)
+
+EnumValue
+Enum(processor_type) String(arm1022e) Value(arm1022e)
+
+EnumValue
+Enum(processor_type) String(xscale) Value(xscale)
+
+EnumValue
+Enum(processor_type) String(iwmmxt) Value(iwmmxt)
+
+EnumValue
+Enum(processor_type) String(iwmmxt2) Value(iwmmxt2)
+
+EnumValue
+Enum(processor_type) String(fa606te) Value(fa606te)
+
+EnumValue
+Enum(processor_type) String(fa626te) Value(fa626te)
+
+EnumValue
+Enum(processor_type) String(fmp626) Value(fmp626)
+
+EnumValue
+Enum(processor_type) String(fa726te) Value(fa726te)
+
+EnumValue
+Enum(processor_type) String(arm926ej-s) Value(arm926ejs)
+
+EnumValue
+Enum(processor_type) String(arm1026ej-s) Value(arm1026ejs)
+
+EnumValue
+Enum(processor_type) String(arm1136j-s) Value(arm1136js)
+
+EnumValue
+Enum(processor_type) String(arm1136jf-s) Value(arm1136jfs)
+
+EnumValue
+Enum(processor_type) String(arm1176jz-s) Value(arm1176jzs)
+
+EnumValue
+Enum(processor_type) String(arm1176jzf-s) Value(arm1176jzfs)
+
+EnumValue
+Enum(processor_type) String(mpcorenovfp) Value(mpcorenovfp)
+
+EnumValue
+Enum(processor_type) String(mpcore) Value(mpcore)
+
+EnumValue
+Enum(processor_type) String(arm1156t2-s) Value(arm1156t2s)
+
+EnumValue
+Enum(processor_type) String(arm1156t2f-s) Value(arm1156t2fs)
+
+EnumValue
+Enum(processor_type) String(cortex-m1) Value(cortexm1)
+
+EnumValue
+Enum(processor_type) String(cortex-m0) Value(cortexm0)
+
+EnumValue
+Enum(processor_type) String(cortex-m0plus) Value(cortexm0plus)
+
+EnumValue
+Enum(processor_type) String(generic-armv7-a) Value(genericv7a)
+
+EnumValue
+Enum(processor_type) String(cortex-a5) Value(cortexa5)
+
+EnumValue
+Enum(processor_type) String(cortex-a7) Value(cortexa7)
+
+EnumValue
+Enum(processor_type) String(cortex-a8) Value(cortexa8)
+
+EnumValue
+Enum(processor_type) String(cortex-a9) Value(cortexa9)
+
+EnumValue
+Enum(processor_type) String(cortex-a12) Value(cortexa12)
+
+EnumValue
+Enum(processor_type) String(cortex-a15) Value(cortexa15)
+
+EnumValue
+Enum(processor_type) String(cortex-r4) Value(cortexr4)
+
+EnumValue
+Enum(processor_type) String(cortex-r4f) Value(cortexr4f)
+
+EnumValue
+Enum(processor_type) String(cortex-r5) Value(cortexr5)
+
+EnumValue
+Enum(processor_type) String(cortex-r7) Value(cortexr7)
+
+EnumValue
+Enum(processor_type) String(cortex-m4) Value(cortexm4)
+
+EnumValue
+Enum(processor_type) String(cortex-m3) Value(cortexm3)
+
+EnumValue
+Enum(processor_type) String(marvell-pj4) Value(marvell_pj4)
+
+EnumValue
+Enum(processor_type) String(cortex-a15.cortex-a7) Value(cortexa15cortexa7)
+
+EnumValue
+Enum(processor_type) String(cortex-a53) Value(cortexa53)
+
+EnumValue
+Enum(processor_type) String(cortex-a57) Value(cortexa57)
+
+EnumValue
+Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53)
+
+Enum
+Name(arm_arch) Type(int)
+Known ARM architectures (for use with the -march= option):
+
+EnumValue
+Enum(arm_arch) String(armv2) Value(0)
+
+EnumValue
+Enum(arm_arch) String(armv2a) Value(1)
+
+EnumValue
+Enum(arm_arch) String(armv3) Value(2)
+
+EnumValue
+Enum(arm_arch) String(armv3m) Value(3)
+
+EnumValue
+Enum(arm_arch) String(armv4) Value(4)
+
+EnumValue
+Enum(arm_arch) String(armv4t) Value(5)
+
+EnumValue
+Enum(arm_arch) String(armv5) Value(6)
+
+EnumValue
+Enum(arm_arch) String(armv5t) Value(7)
+
+EnumValue
+Enum(arm_arch) String(armv5e) Value(8)
+
+EnumValue
+Enum(arm_arch) String(armv5te) Value(9)
+
+EnumValue
+Enum(arm_arch) String(armv6) Value(10)
+
+EnumValue
+Enum(arm_arch) String(armv6j) Value(11)
+
+EnumValue
+Enum(arm_arch) String(armv6k) Value(12)
+
+EnumValue
+Enum(arm_arch) String(armv6z) Value(13)
+
+EnumValue
+Enum(arm_arch) String(armv6zk) Value(14)
+
+EnumValue
+Enum(arm_arch) String(armv6t2) Value(15)
+
+EnumValue
+Enum(arm_arch) String(armv6-m) Value(16)
+
+EnumValue
+Enum(arm_arch) String(armv6s-m) Value(17)
+
+EnumValue
+Enum(arm_arch) String(armv7) Value(18)
+
+EnumValue
+Enum(arm_arch) String(armv7-a) Value(19)
+
+EnumValue
+Enum(arm_arch) String(armv7ve) Value(20)
+
+EnumValue
+Enum(arm_arch) String(armv7-r) Value(21)
+
+EnumValue
+Enum(arm_arch) String(armv7-m) Value(22)
+
+EnumValue
+Enum(arm_arch) String(armv7e-m) Value(23)
+
+EnumValue
+Enum(arm_arch) String(armv8-a) Value(24)
+
+EnumValue
+Enum(arm_arch) String(armv8-a+crc) Value(25)
+
+EnumValue
+Enum(arm_arch) String(iwmmxt) Value(26)
+
+EnumValue
+Enum(arm_arch) String(iwmmxt2) Value(27)
+
+Enum
+Name(arm_fpu) Type(int)
+Known ARM FPUs (for use with the -mfpu= option):
+
+EnumValue
+Enum(arm_fpu) String(vfp) Value(0)
+
+EnumValue
+Enum(arm_fpu) String(vfpv3) Value(1)
+
+EnumValue
+Enum(arm_fpu) String(vfpv3-fp16) Value(2)
+
+EnumValue
+Enum(arm_fpu) String(vfpv3-d16) Value(3)
+
+EnumValue
+Enum(arm_fpu) String(vfpv3-d16-fp16) Value(4)
+
+EnumValue
+Enum(arm_fpu) String(vfpv3xd) Value(5)
+
+EnumValue
+Enum(arm_fpu) String(vfpv3xd-fp16) Value(6)
+
+EnumValue
+Enum(arm_fpu) String(neon) Value(7)
+
+EnumValue
+Enum(arm_fpu) String(neon-fp16) Value(8)
+
+EnumValue
+Enum(arm_fpu) String(vfpv4) Value(9)
+
+EnumValue
+Enum(arm_fpu) String(vfpv4-d16) Value(10)
+
+EnumValue
+Enum(arm_fpu) String(fpv4-sp-d16) Value(11)
+
+EnumValue
+Enum(arm_fpu) String(neon-vfpv4) Value(12)
+
+EnumValue
+Enum(arm_fpu) String(fp-armv8) Value(13)
+
+EnumValue
+Enum(arm_fpu) String(neon-fp-armv8) Value(14)
+
+EnumValue
+Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(15)
+
+EnumValue
+Enum(arm_fpu) String(vfp3) Value(16)
+
diff --git a/gcc-4.9/gcc/config/arm/arm-tune.md b/gcc-4.9/gcc/config/arm/arm-tune.md
new file mode 100644
index 000000000..954cab8ef
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm-tune.md
@@ -0,0 +1,34 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically by gentune.sh from arm-cores.def
+(define_attr "tune"
+	"arm2,arm250,arm3,
+	arm6,arm60,arm600,
+	arm610,arm620,arm7,
+	arm7d,arm7di,arm70,
+	arm700,arm700i,arm710,
+	arm720,arm710c,arm7100,
+	arm7500,arm7500fe,arm7m,
+	arm7dm,arm7dmi,arm8,
+	arm810,strongarm,strongarm110,
+	strongarm1100,strongarm1110,fa526,
+	fa626,arm7tdmi,arm7tdmis,
+	arm710t,arm720t,arm740t,
+	arm9,arm9tdmi,arm920,
+	arm920t,arm922t,arm940t,
+	ep9312,arm10tdmi,arm1020t,
+	arm9e,arm946es,arm966es,
+	arm968es,arm10e,arm1020e,
+	arm1022e,xscale,iwmmxt,
+	iwmmxt2,fa606te,fa626te,
+	fmp626,fa726te,arm926ejs,
+	arm1026ejs,arm1136js,arm1136jfs,
+	arm1176jzs,arm1176jzfs,mpcorenovfp,
+	mpcore,arm1156t2s,arm1156t2fs,
+	cortexm1,cortexm0,cortexm0plus,
+	genericv7a,cortexa5,cortexa7,
+	cortexa8,cortexa9,cortexa12,
+	cortexa15,cortexr4,cortexr4f,
+	cortexr5,cortexr7,cortexm4,
+	cortexm3,marvell_pj4,cortexa15cortexa7,
+	cortexa53,cortexa57,cortexa57cortexa53"
+	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c
new file mode 100644
index 000000000..0240cc70e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm.c
@@ -0,0 +1,31119 @@
+/* Output routines for GCC for ARM.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+   and Martin Simmons (@harleqn.co.uk).
+   More major hacks by Richard Earnshaw (rearnsha@arm.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hash-table.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "obstack.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "reload.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "cgraph.h"
+#include "ggc.h"
+#include "except.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "df.h"
+#include "intl.h"
+#include "libfuncs.h"
+#include "params.h"
+#include "opts.h"
+#include "dumpfile.h"
+
+/* Forward definitions of types.  */
+typedef struct minipool_node    Mnode;
+typedef struct minipool_fixup   Mfix;
+
+void (*arm_lang_output_object_attributes_hook)(void);
+
+struct four_ints
+{
+  int i[4];
+};
+
+/* Forward function declarations.  */
+static bool arm_lra_p (void);
+static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
+static int arm_compute_static_chain_stack_bytes (void);
+static arm_stack_offsets *arm_get_frame_offsets (void);
+static void arm_add_gc_roots (void);
+static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
+			     HOST_WIDE_INT, rtx, rtx, int, int);
+static unsigned bit_count (unsigned long);
+static int arm_address_register_rtx_p (rtx, int);
+static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
+static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
+static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
+static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
+static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
+static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
+inline static int thumb1_index_register_rtx_p (rtx, int);
+static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
+static int thumb_far_jump_used_p (void);
+static bool thumb_force_lr_save (void);
+static unsigned arm_size_return_regs (void);
+static bool arm_assemble_integer (rtx, unsigned int, int);
+static void arm_print_operand (FILE *, rtx, int);
+static void arm_print_operand_address (FILE *, rtx);
+static bool arm_print_operand_punct_valid_p (unsigned char code);
+static const char *fp_const_from_val (REAL_VALUE_TYPE *);
+static arm_cc get_arm_condition_code (rtx);
+static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
+static const char *output_multi_immediate (rtx *, const char *, const char *,
+					   int, HOST_WIDE_INT);
+static const char *shift_op (rtx, HOST_WIDE_INT *);
+static struct machine_function *arm_init_machine_status (void);
+static void thumb_exit (FILE *, int);
+static HOST_WIDE_INT get_jump_table_size (rtx);
+static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
+static Mnode *add_minipool_forward_ref (Mfix *);
+static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
+static Mnode *add_minipool_backward_ref (Mfix *);
+static void assign_minipool_offsets (Mfix *);
+static void arm_print_value (FILE *, rtx);
+static void dump_minipool (rtx);
+static int arm_barrier_cost (rtx);
+static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
+static void push_minipool_barrier (rtx, HOST_WIDE_INT);
+static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
+			       rtx);
+static void arm_reorg (void);
+static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
+static unsigned long arm_compute_save_reg0_reg12_mask (void);
+static unsigned long arm_compute_save_reg_mask (void);
+static unsigned long arm_isr_value (tree);
+static unsigned long arm_compute_func_type (void);
+static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
+#endif
+static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
+static int arm_comp_type_attributes (const_tree, const_tree);
+static void arm_set_default_type_attributes (tree);
+static int arm_adjust_cost (rtx, rtx, rtx, int);
+static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
+static int optimal_immediate_sequence (enum rtx_code code,
+				       unsigned HOST_WIDE_INT val,
+				       struct four_ints *return_sequence);
+static int optimal_immediate_sequence_1 (enum rtx_code code,
+					 unsigned HOST_WIDE_INT val,
+					 struct four_ints *return_sequence,
+					 int i);
+static int arm_get_strip_length (int);
+static bool arm_function_ok_for_sibcall (tree, tree);
+static enum machine_mode arm_promote_function_mode (const_tree,
+						    enum machine_mode, int *,
+						    const_tree, int);
+static bool arm_return_in_memory (const_tree, const_tree);
+static rtx arm_function_value (const_tree, const_tree, bool);
+static rtx arm_libcall_value_1 (enum machine_mode);
+static rtx arm_libcall_value (enum machine_mode, const_rtx);
+static bool arm_function_value_regno_p (const unsigned int);
+static void arm_internal_label (FILE *, const char *, unsigned long);
+static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
+				 tree);
+static bool arm_have_conditional_execution (void);
+static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
+static bool arm_legitimate_constant_p (enum machine_mode, rtx);
+static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
+static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
+static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
+static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static void arm_init_builtins (void);
+static void arm_init_iwmmxt_builtins (void);
+static rtx safe_vector_operand (rtx, enum machine_mode);
+static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
+static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static tree arm_builtin_decl (unsigned, bool);
+static void emit_constant_insn (rtx cond, rtx pattern);
+static rtx emit_set_insn (rtx, rtx);
+static rtx emit_multi_reg_push (unsigned long, unsigned long);
+static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				  tree, bool);
+static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
+			     const_tree, bool);
+static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
+				      const_tree, bool);
+static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+				      const_tree);
+static rtx aapcs_libcall_value (enum machine_mode);
+static int aapcs_select_return_coproc (const_tree, const_tree);
+
+#ifdef OBJECT_FORMAT_ELF
+static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
+static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
+#endif
+#ifndef ARM_PE
+static void arm_encode_section_info (tree, rtx, int);
+#endif
+
+static void arm_file_end (void);
+static void arm_file_start (void);
+
+static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+					tree, int *, int);
+static bool arm_pass_by_reference (cumulative_args_t,
+				   enum machine_mode, const_tree, bool);
+static bool arm_promote_prototypes (const_tree);
+static bool arm_default_short_enums (void);
+static bool arm_align_anon_bitfield (void);
+static bool arm_return_in_msb (const_tree);
+static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
+static bool arm_return_in_memory (const_tree, const_tree);
+#if ARM_UNWIND_INFO
+static void arm_unwind_emit (FILE *, rtx);
+static bool arm_output_ttype (rtx);
+static void arm_asm_emit_except_personality (rtx);
+static void arm_asm_init_sections (void);
+#endif
+static rtx arm_dwarf_register_span (rtx);
+
+static tree arm_cxx_guard_type (void);
+static bool arm_cxx_guard_mask_bit (void);
+static tree arm_get_cookie_size (tree);
+static bool arm_cookie_has_size (void);
+static bool arm_cxx_cdtor_returns_this (void);
+static bool arm_cxx_key_method_may_be_inline (void);
+static void arm_cxx_determine_class_data_visibility (tree);
+static bool arm_cxx_class_data_always_comdat (void);
+static bool arm_cxx_use_aeabi_atexit (void);
+static void arm_init_libfuncs (void);
+static tree arm_build_builtin_va_list (void);
+static void arm_expand_builtin_va_start (tree, rtx);
+static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static void arm_option_override (void);
+static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
+static bool arm_cannot_copy_insn_p (rtx);
+static int arm_issue_rate (void);
+static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static bool arm_output_addr_const_extra (FILE *, rtx);
+static bool arm_allocate_stack_slots_for_args (void);
+static bool arm_warn_func_return (tree);
+static const char *arm_invalid_parameter_type (const_tree t);
+static const char *arm_invalid_return_type (const_tree t);
+static tree arm_promoted_type (const_tree t);
+static tree arm_convert_to_type (tree type, tree expr);
+static bool arm_scalar_mode_supported_p (enum machine_mode);
+static bool arm_frame_pointer_required (void);
+static bool arm_can_eliminate (const int, const int);
+static void arm_asm_trampoline_template (FILE *);
+static void arm_trampoline_init (rtx, tree, rtx);
+static rtx arm_trampoline_adjust_address (rtx);
+static rtx arm_pic_static_addr (rtx orig, rtx reg);
+static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool arm_array_mode_supported_p (enum machine_mode,
+					unsigned HOST_WIDE_INT);
+static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
+static bool arm_class_likely_spilled_p (reg_class_t);
+static HOST_WIDE_INT arm_vector_alignment (const_tree type);
+static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
+static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
+						     const_tree type,
+						     int misalignment,
+						     bool is_packed);
+static void arm_conditional_register_usage (void);
+static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static unsigned int arm_autovectorize_vector_sizes (void);
+static int arm_default_branch_cost (bool, bool);
+static int arm_cortex_a5_branch_cost (bool, bool);
+static int arm_cortex_m_branch_cost (bool, bool);
+
+static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+					     const unsigned char *sel);
+
+static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+					   tree vectype,
+					   int misalign ATTRIBUTE_UNUSED);
+static unsigned arm_add_stmt_cost (void *data, int count,
+				   enum vect_cost_for_stmt kind,
+				   struct _stmt_vec_info *stmt_info,
+				   int misalign,
+				   enum vect_cost_model_location where);
+
+static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+					 bool op0_preserve_value);
+static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
+
+/* Table of machine attributes.  */
+static const struct attribute_spec arm_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  /* Function calls made to this symbol must be done indirectly, because
+     it may lie outside of the 26 bit addressing range of a normal function
+     call.  */
+  { "long_call",    0, 0, false, true,  true,  NULL, false },
+  /* Whereas these functions are always known to reside within the 26 bit
+     addressing range.  */
+  { "short_call",   0, 0, false, true,  true,  NULL, false },
+  /* Specify the procedure call conventions for a function.  */
+  { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
+    false },
+  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
+  { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
+    false },
+  { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
+    false },
+  { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
+    false },
+#ifdef ARM_PE
+  /* ARM/PE has three new attributes:
+     interfacearm - ?
+     dllexport - for exporting a function/variable that will live in a dll
+     dllimport - for importing a function/variable from a dll
+
+     Microsoft allows multiple declspecs in one __declspec, separating
+     them with spaces.  We do NOT support this.  Instead, use __declspec
+     multiple times.
+  */
+  { "dllimport",    0, 0, true,  false, false, NULL, false },
+  { "dllexport",    0, 0, true,  false, false, NULL, false },
+  { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
+    false },
+#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
+  { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
+  { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
+  { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
+    false },
+#endif
+  { NULL,           0, 0, false, false, false, NULL, false }
+};
+
+/* Initialize the GCC target structure.  */
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef  TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P arm_lra_p
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START arm_file_start
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END arm_file_end
+
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP NULL
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER arm_assemble_integer
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND arm_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
+
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arm_option_override
+
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
+
+#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
+
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER arm_sched_reorder
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
+
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifdef ARM_PE
+#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
+#endif
+
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
+
+#undef  TARGET_ASM_INTERNAL_LABEL
+#define TARGET_ASM_INTERNAL_LABEL arm_internal_label
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arm_function_value
+
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE arm_libcall_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS arm_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST arm_address_cost
+
+#undef TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_ARRAY_MODE_SUPPORTED_P
+#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  arm_autovectorize_vector_sizes
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  arm_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arm_expand_builtin
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL arm_builtin_decl
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arm_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
+
+#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN arm_warn_func_return
+
+#undef TARGET_DEFAULT_SHORT_ENUMS
+#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
+
+#undef TARGET_ALIGN_ANON_BITFIELD
+#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
+
+#undef TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
+
+#undef TARGET_CXX_GUARD_TYPE
+#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
+
+#undef TARGET_CXX_GUARD_MASK_BIT
+#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
+
+#undef TARGET_CXX_GET_COOKIE_SIZE
+#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
+
+#undef TARGET_CXX_COOKIE_HAS_SIZE
+#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
+
+#undef TARGET_CXX_CDTOR_RETURNS_THIS
+#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
+
+#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
+#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
+
+#undef TARGET_CXX_USE_AEABI_ATEXIT
+#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
+
+#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
+#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
+  arm_cxx_determine_class_data_visibility
+
+#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
+#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB arm_return_in_msb
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arm_return_in_memory
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
+
+#if ARM_UNWIND_INFO
+#undef TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
+
+/* EABI unwinding tables use a different format for the typeinfo tables.  */
+#undef TARGET_ASM_TTYPE
+#define TARGET_ASM_TTYPE arm_output_ttype
+
+#undef TARGET_ARM_EABI_UNWINDER
+#define TARGET_ARM_EABI_UNWINDER true
+
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
+#endif /* ARM_UNWIND_INFO */
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
+
+#undef  TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
+
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 4095
+
+/* The minimum is set such that the total size of the block
+   for a particular anchor is -4088 + 1 + 4095 bytes, which is
+   divisible by eight, ensuring natural spacing of anchors.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -4088
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE arm_mangle_type
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
+
+#undef TARGET_INVALID_PARAMETER_TYPE
+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
+
+#undef TARGET_INVALID_RETURN_TYPE
+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
+
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE arm_promoted_type
+
+#undef TARGET_CONVERT_TO_TYPE
+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arm_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
+
+#undef TARGET_VECTORIZE_BUILTINS
+#define TARGET_VECTORIZE_BUILTINS
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  arm_builtin_vectorized_function
+
+#undef TARGET_VECTOR_ALIGNMENT
+#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+  arm_vector_alignment_reachable
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+  arm_builtin_support_vector_misalignment
+
+#undef TARGET_PREFERRED_RENAME_CLASS
+#define TARGET_PREFERRED_RENAME_CLASS \
+  arm_preferred_rename_class
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
+  arm_vectorize_vec_perm_const_ok
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  arm_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON \
+  arm_canonicalize_comparison
+
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
+
+#undef MAX_INSN_PER_IT_BLOCK
+#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Obstack for minipool constant handling.  */
+static struct obstack minipool_obstack;
+static char *         minipool_startobj;
+
+/* The maximum number of insns skipped which
+   will be conditionalised if possible.  */
+static int max_insns_skipped = 5;
+
+extern FILE * asm_out_file;
+
+/* True if we are currently building a constant table.  */
+int making_const_table;
+
+/* The processor for which instructions should be scheduled.  */
+enum processor_type arm_tune = arm_none;
+
+/* The current tuning set.  */
+const struct tune_params *current_tune;
+
+/* Which floating point hardware to schedule for.  */
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use.  */
+const struct arm_fpu_desc *arm_fpu_desc;
+
+/* Used for Thumb call_via trampolines.  */
+rtx thumb_call_via_label[14];
+static int thumb_call_reg_needed;
+
+/* Bit values used to identify processor capabilities.  */
+#define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
+#define FL_ARCH3M     (1 << 1)        /* Extended multiply */
+#define FL_MODE26     (1 << 2)        /* 26-bit mode support */
+#define FL_MODE32     (1 << 3)        /* 32-bit mode support */
+#define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
+#define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
+#define FL_THUMB      (1 << 6)        /* Thumb aware */
+#define FL_LDSCHED    (1 << 7)	      /* Load scheduling necessary */
+#define FL_STRONG     (1 << 8)	      /* StrongARM */
+#define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
+#define FL_XSCALE     (1 << 10)	      /* XScale */
+/* spare	      (1 << 11)	*/
+#define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
+					 media instructions.  */
+#define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
+#define FL_WBUF	      (1 << 14)	      /* Schedule for write buffer ops.
+					 Note: ARM6 & 7 derivatives only.  */
+#define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
+#define FL_THUMB2     (1 << 16)	      /* Thumb-2.  */
+#define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
+					 profile.  */
+#define FL_THUMB_DIV  (1 << 18)	      /* Hardware divide (Thumb mode).  */
+#define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
+#define FL_NEON       (1 << 20)       /* Neon instructions.  */
+#define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
+					 architecture.  */
+#define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
+#define FL_ARM_DIV    (1 << 23)	      /* Hardware divide (ARM mode).  */
+#define FL_ARCH8      (1 << 24)       /* Architecture 8.  */
+#define FL_CRC32      (1 << 25)	      /* ARMv8 CRC32 instructions.  */
+
+#define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
+#define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
+
+/* Flags that only effect tuning, not available instructions.  */
+#define FL_TUNE		(FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
+			 | FL_CO_PROC)
+
+#define FL_FOR_ARCH2	FL_NOTM
+#define FL_FOR_ARCH3	(FL_FOR_ARCH2 | FL_MODE32)
+#define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
+#define FL_FOR_ARCH4	(FL_FOR_ARCH3M | FL_ARCH4)
+#define FL_FOR_ARCH4T	(FL_FOR_ARCH4 | FL_THUMB)
+#define FL_FOR_ARCH5	(FL_FOR_ARCH4 | FL_ARCH5)
+#define FL_FOR_ARCH5T	(FL_FOR_ARCH5 | FL_THUMB)
+#define FL_FOR_ARCH5E	(FL_FOR_ARCH5 | FL_ARCH5E)
+#define FL_FOR_ARCH5TE	(FL_FOR_ARCH5E | FL_THUMB)
+#define FL_FOR_ARCH5TEJ	FL_FOR_ARCH5TE
+#define FL_FOR_ARCH6	(FL_FOR_ARCH5TE | FL_ARCH6)
+#define FL_FOR_ARCH6J	FL_FOR_ARCH6
+#define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
+#define FL_FOR_ARCH6Z	FL_FOR_ARCH6
+#define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
+#define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
+#define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
+#define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
+#define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
+#define FL_FOR_ARCH7VE	(FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
+#define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_THUMB_DIV)
+#define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_THUMB_DIV)
+#define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
+#define FL_FOR_ARCH8A	(FL_FOR_ARCH7VE | FL_ARCH8)
+
+/* The bits in this mask specify which
+   instructions we are allowed to generate.  */
+static unsigned long insn_flags = 0;
+
+/* The bits in this mask specify which instruction scheduling options should
+   be used.  */
+static unsigned long tune_flags = 0;
+
+/* The highest ARM architecture version supported by the
+   target.  */
+enum base_architecture arm_base_arch = BASE_ARCH_0;
+
+/* The following are used in the arm.md file as equivalents to bits
+   in the above two flag variables.  */
+
+/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
+int arm_arch3m = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
+int arm_arch4 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
+int arm_arch4t = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
+int arm_arch5 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
+int arm_arch5e = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
+int arm_arch6 = 0;
+
+/* Nonzero if this chip supports the ARM 6K extensions.  */
+int arm_arch6k = 0;
+
+/* Nonzero if instructions present in ARMv6-M can be used.  */
+int arm_arch6m = 0;
+
+/* Nonzero if this chip supports the ARM 7 extensions.  */
+int arm_arch7 = 0;
+
+/* Nonzero if instructions not present in the 'M' profile can be used.  */
+int arm_arch_notm = 0;
+
+/* Nonzero if instructions present in ARMv7E-M can be used.  */
+int arm_arch7em = 0;
+
+/* Nonzero if instructions present in ARMv8 can be used.  */
+int arm_arch8 = 0;
+
+/* Nonzero if this chip can benefit from load scheduling.  */
+int arm_ld_sched = 0;
+
+/* Nonzero if this chip is a StrongARM.  */
+int arm_tune_strongarm = 0;
+
+/* Nonzero if this chip supports Intel Wireless MMX technology.  */
+int arm_arch_iwmmxt = 0;
+
+/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
+int arm_arch_iwmmxt2 = 0;
+
+/* Nonzero if this chip is an XScale.  */
+int arm_arch_xscale = 0;
+
+/* Nonzero if tuning for XScale  */
+int arm_tune_xscale = 0;
+
+/* Nonzero if we want to tune for stores that access the write-buffer.
+   This typically means an ARM6 or ARM7 with MMU or MPU.  */
+int arm_tune_wbuf = 0;
+
+/* Nonzero if tuning for Cortex-A9.  */
+int arm_tune_cortex_a9 = 0;
+
+/* Nonzero if generating Thumb instructions.  */
+int thumb_code = 0;
+
+/* Nonzero if generating Thumb-1 instructions.  */
+int thumb1_code = 0;
+
+/* Nonzero if we should define __THUMB_INTERWORK__ in the
+   preprocessor.
+   XXX This is a bit of a hack, it's intended to help work around
+   problems in GLD which doesn't understand that armv5t code is
+   interworking clean.  */
+int arm_cpp_interwork = 0;
+
+/* Nonzero if chip supports Thumb 2.  */
+int arm_arch_thumb2;
+
+/* Nonzero if chip supports integer division instruction.  */
+int arm_arch_arm_hwdiv;
+int arm_arch_thumb_hwdiv;
+
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+   than core registers.  */
+int prefer_neon_for_64bits = 0;
+
+/* Nonzero if we shouldn't use literal pools.  */
+bool arm_disable_literal_pool = false;
+
+/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
+   we must report the mode of the memory reference from
+   TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
+enum machine_mode output_memory_reference_mode;
+
+/* The register number to be used for the PIC offset register.  */
+unsigned arm_pic_register = INVALID_REGNUM;
+
+/* Set to 1 after arm_reorg has started.  Reset to start at the start of
+   the next function.  */
+static int after_arm_reorg = 0;
+
+enum arm_pcs arm_pcs_default;
+
+/* For an explanation of these variables, see final_prescan_insn below.  */
+int arm_ccfsm_state;
+/* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
+enum arm_cond_code arm_current_cc;
+
+rtx arm_target_insn;
+int arm_target_label;
+/* The number of conditionally executed insns, including the current insn.  */
+int arm_condexec_count = 0;
+/* A bitmask specifying the patterns for the IT block.
+   Zero means do not output an IT block before this insn. */
+int arm_condexec_mask = 0;
+/* The number of bits used in arm_condexec_mask.  */
+int arm_condexec_masklen = 0;
+
+/* Nonzero if chip supports the ARMv8 CRC instructions.  */
+int arm_arch_crc = 0;
+
+/* The condition codes of the ARM, and the inverse function.  */
+static const char * const arm_condition_codes[] =
+{
+  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
+};
+
+/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
+int arm_regs_in_sequence[] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
+#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
+#define streq(string1, string2) (strcmp (string1, string2) == 0)
+
+#define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
+				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
+				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
+
+/* Initialization code.  */
+
+struct processors
+{
+  const char *const name;
+  enum processor_type core;
+  const char *arch;
+  enum base_architecture base_arch;
+  const unsigned long flags;
+  const struct tune_params *const tune;
+};
+
+
+#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
+#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
+  prefetch_slots, \
+  l1_size, \
+  l1_line_size
+
+/* arm generic vectorizer costs.  */
+static const
+struct cpu_vec_costs arm_default_vec_cost = {
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  1,					/* vec_unalign_load_cost.  */
+  1,					/* vec_unalign_store_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
+#include "aarch-cost-tables.h"
+
+
+
+const struct cpu_cost_table cortexa9_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    0,			/* shift.  */
+    COSTS_N_INSNS (1),	/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    COSTS_N_INSNS (2),	/* arith_shift_reg.  */
+    0,			/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    COSTS_N_INSNS (1),	/* extend.  */
+    COSTS_N_INSNS (2),	/* extend_arith.  */
+    COSTS_N_INSNS (1),	/* bfi.  */
+    COSTS_N_INSNS (1),	/* bfx.  */
+    0,			/* clz.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (3),	/* simple.  */
+      COSTS_N_INSNS (3),	/* flag_setting.  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (3),	/* add.  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (4),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (4),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (2),	/* load.  */
+    COSTS_N_INSNS (2),	/* load_sign_extend.  */
+    COSTS_N_INSNS (2),	/* ldrd.  */
+    COSTS_N_INSNS (2),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    2,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (5),	/* loadf.  */
+    COSTS_N_INSNS (5),	/* loadd.  */
+    COSTS_N_INSNS (1),  /* load_unaligned.  */
+    COSTS_N_INSNS (2),	/* store.  */
+    COSTS_N_INSNS (2),	/* strd.  */
+    COSTS_N_INSNS (2),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    2,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (1),	/* storef.  */
+    COSTS_N_INSNS (1),	/* stored.  */
+    COSTS_N_INSNS (1)	/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (14),	/* div.  */
+      COSTS_N_INSNS (4),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub. */
+      COSTS_N_INSNS (30),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (1),	/* fpconst.  */
+      COSTS_N_INSNS (1),	/* neg.  */
+      COSTS_N_INSNS (3),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (24),	/* div.  */
+      COSTS_N_INSNS (5),	/* mult.  */
+      COSTS_N_INSNS (8),	/* mult_addsub.  */
+      COSTS_N_INSNS (30),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (1),	/* fpconst.  */
+      COSTS_N_INSNS (1),	/* neg.  */
+      COSTS_N_INSNS (3),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+
+const struct cpu_cost_table cortexa7_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    COSTS_N_INSNS (1),	/* shift.  */
+    COSTS_N_INSNS (1),	/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    COSTS_N_INSNS (1),	/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    COSTS_N_INSNS (1),	/* bfi.  */
+    COSTS_N_INSNS (1),	/* bfx.  */
+    COSTS_N_INSNS (1),	/* clz.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+
+  {
+    /* MULT SImode */
+    {
+      0,			/* simple.  */
+      COSTS_N_INSNS (1),	/* flag_setting.  */
+      COSTS_N_INSNS (1),	/* extend.  */
+      COSTS_N_INSNS (1),	/* add.  */
+      COSTS_N_INSNS (1),	/* extend_add.  */
+      COSTS_N_INSNS (7)		/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (1),	/* extend.  */
+      0,			/* add.  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (1),	/* load.  */
+    COSTS_N_INSNS (1),	/* load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* ldrd.  */
+    COSTS_N_INSNS (1),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    2,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* loadf.  */
+    COSTS_N_INSNS (2),	/* loadd.  */
+    COSTS_N_INSNS (1),	/* load_unaligned.  */
+    COSTS_N_INSNS (1),	/* store.  */
+    COSTS_N_INSNS (3),	/* strd.  */
+    COSTS_N_INSNS (1),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    2,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* storef.  */
+    COSTS_N_INSNS (2),	/* stored.  */
+    COSTS_N_INSNS (1)	/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (15),	/* div.  */
+      COSTS_N_INSNS (3),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub. */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (3),	/* fpconst.  */
+      COSTS_N_INSNS (3),	/* neg.  */
+      COSTS_N_INSNS (3),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (30),	/* div.  */
+      COSTS_N_INSNS (6),	/* mult.  */
+      COSTS_N_INSNS (10),	/* mult_addsub.  */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (3),	/* fpconst.  */
+      COSTS_N_INSNS (3),	/* neg.  */
+      COSTS_N_INSNS (3),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+const struct cpu_cost_table cortexa12_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    0,			/* shift.  */
+    COSTS_N_INSNS (1),	/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    0,			/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    0,			/* bfi.  */
+    COSTS_N_INSNS (1),	/* bfx.  */
+    COSTS_N_INSNS (1),	/* clz.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+  /* MULT SImode */
+  {
+    {
+      COSTS_N_INSNS (2),	/* simple.  */
+      COSTS_N_INSNS (3),	/* flag_setting.  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (3),	/* add.  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      COSTS_N_INSNS (18)	/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (3),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),	/* load.  */
+    COSTS_N_INSNS (3),	/* load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* ldrd.  */
+    COSTS_N_INSNS (3),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    2,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (3),	/* loadf.  */
+    COSTS_N_INSNS (3),	/* loadd.  */
+    0,			/* load_unaligned.  */
+    0,			/* store.  */
+    0,			/* strd.  */
+    0,			/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    2,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* storef.  */
+    COSTS_N_INSNS (2),	/* stored.  */
+    0			/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (17),	/* div.  */
+      COSTS_N_INSNS (4),	/* mult.  */
+      COSTS_N_INSNS (8),	/* mult_addsub. */
+      COSTS_N_INSNS (8),	/* fma.  */
+      COSTS_N_INSNS (4),	/* addsub.  */
+      COSTS_N_INSNS (2),	/* fpconst. */
+      COSTS_N_INSNS (2),	/* neg.  */
+      COSTS_N_INSNS (2),	/* compare.  */
+      COSTS_N_INSNS (4),	/* widen.  */
+      COSTS_N_INSNS (4),	/* narrow.  */
+      COSTS_N_INSNS (4),	/* toint.  */
+      COSTS_N_INSNS (4),	/* fromint.  */
+      COSTS_N_INSNS (4)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (31),	/* div.  */
+      COSTS_N_INSNS (4),	/* mult.  */
+      COSTS_N_INSNS (8),	/* mult_addsub.  */
+      COSTS_N_INSNS (8),	/* fma.  */
+      COSTS_N_INSNS (4),	/* addsub.  */
+      COSTS_N_INSNS (2),	/* fpconst.  */
+      COSTS_N_INSNS (2),	/* neg.  */
+      COSTS_N_INSNS (2),	/* compare.  */
+      COSTS_N_INSNS (4),	/* widen.  */
+      COSTS_N_INSNS (4),	/* narrow.  */
+      COSTS_N_INSNS (4),	/* toint.  */
+      COSTS_N_INSNS (4),	/* fromint.  */
+      COSTS_N_INSNS (4)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+const struct cpu_cost_table cortexa15_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    0,			/* shift.  */
+    0,			/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    0,			/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    COSTS_N_INSNS (1),	/* bfi.  */
+    0,			/* bfx.  */
+    0,			/* clz.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+  /* MULT SImode */
+  {
+    {
+      COSTS_N_INSNS (2),	/* simple.  */
+      COSTS_N_INSNS (3),	/* flag_setting.  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (2),	/* add.  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      COSTS_N_INSNS (18)	/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (3),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),	/* load.  */
+    COSTS_N_INSNS (3),	/* load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* ldrd.  */
+    COSTS_N_INSNS (4),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    2,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),	/* loadf.  */
+    COSTS_N_INSNS (4),	/* loadd.  */
+    0,			/* load_unaligned.  */
+    0,			/* store.  */
+    0,			/* strd.  */
+    COSTS_N_INSNS (1),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    2,			/* stm_regs_per_insn_subsequent.  */
+    0,			/* storef.  */
+    0,			/* stored.  */
+    0			/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (17),	/* div.  */
+      COSTS_N_INSNS (4),	/* mult.  */
+      COSTS_N_INSNS (8),	/* mult_addsub. */
+      COSTS_N_INSNS (8),	/* fma.  */
+      COSTS_N_INSNS (4),	/* addsub.  */
+      COSTS_N_INSNS (2),	/* fpconst. */
+      COSTS_N_INSNS (2),	/* neg.  */
+      COSTS_N_INSNS (5),	/* compare.  */
+      COSTS_N_INSNS (4),	/* widen.  */
+      COSTS_N_INSNS (4),	/* narrow.  */
+      COSTS_N_INSNS (4),	/* toint.  */
+      COSTS_N_INSNS (4),	/* fromint.  */
+      COSTS_N_INSNS (4)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (31),	/* div.  */
+      COSTS_N_INSNS (4),	/* mult.  */
+      COSTS_N_INSNS (8),	/* mult_addsub.  */
+      COSTS_N_INSNS (8),	/* fma.  */
+      COSTS_N_INSNS (4),	/* addsub.  */
+      COSTS_N_INSNS (2),	/* fpconst.  */
+      COSTS_N_INSNS (2),	/* neg.  */
+      COSTS_N_INSNS (2),	/* compare.  */
+      COSTS_N_INSNS (4),	/* widen.  */
+      COSTS_N_INSNS (4),	/* narrow.  */
+      COSTS_N_INSNS (4),	/* toint.  */
+      COSTS_N_INSNS (4),	/* fromint.  */
+      COSTS_N_INSNS (4)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+const struct cpu_cost_table v7m_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    0,			/* shift.  */
+    0,			/* shift_reg.  */
+    0,			/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    0,			/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    0,			/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    0,			/* bfi.  */
+    0,			/* bfx.  */
+    0,			/* clz.  */
+    COSTS_N_INSNS (1),	/* non_exec.  */
+    false		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (1),	/* simple.  */
+      COSTS_N_INSNS (1),	/* flag_setting.  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (1),	/* add.  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      COSTS_N_INSNS (8)		/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (2),	/* load.  */
+    0,			/* load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* ldrd.  */
+    COSTS_N_INSNS (2),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    1,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* loadf.  */
+    COSTS_N_INSNS (3),	/* loadd.  */
+    COSTS_N_INSNS (1),  /* load_unaligned.  */
+    COSTS_N_INSNS (2),	/* store.  */
+    COSTS_N_INSNS (3),	/* strd.  */
+    COSTS_N_INSNS (2),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    1,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* storef.  */
+    COSTS_N_INSNS (3),	/* stored.  */
+    COSTS_N_INSNS (1)  /* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (7),	/* div.  */
+      COSTS_N_INSNS (2),	/* mult.  */
+      COSTS_N_INSNS (5),	/* mult_addsub.  */
+      COSTS_N_INSNS (3),	/* fma.  */
+      COSTS_N_INSNS (1),	/* addsub.  */
+      0,			/* fpconst.  */
+      0,			/* neg.  */
+      0,			/* compare.  */
+      0,			/* widen.  */
+      0,			/* narrow.  */
+      0,			/* toint.  */
+      0,			/* fromint.  */
+      0				/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (15),	/* div.  */
+      COSTS_N_INSNS (5),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub.  */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      0,			/* fpconst.  */
+      0,			/* neg.  */
+      0,			/* compare.  */
+      0,			/* widen.  */
+      0,			/* narrow.  */
+      0,			/* toint.  */
+      0,			/* fromint.  */
+      0				/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+const struct tune_params arm_slowmul_tune =
+{
+  arm_slowmul_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  3,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_fastmul_tune =
+{
+  arm_fastmul_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+/* StrongARM has early execution of branches, so a sequence that is worth
+   skipping is shorter.  Set max_insns_skipped to a lower value.  */
+
+const struct tune_params arm_strongarm_tune =
+{
+  arm_fastmul_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  3,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_xscale_tune =
+{
+  arm_xscale_rtx_costs,
+  NULL,
+  xscale_sched_adjust_cost,
+  2,						/* Constant limit.  */
+  3,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_9e_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_v6t2_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+/* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
+const struct tune_params arm_cortex_tune =
+{
+  arm_9e_rtx_costs,
+  &generic_extra_costs,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_cortex_a7_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa7_extra_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,			/* Vectorizer costs.  */
+  false						/* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_cortex_a15_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa15_extra_costs,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  2,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,						/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_cortex_a53_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa53_extra_costs,
+  NULL,						/* Scheduler cost adjustment.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,			/* Vectorizer costs.  */
+  false						/* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_cortex_a57_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa57_extra_costs,
+  NULL,                                         /* Scheduler cost adjustment.  */
+  1,                                           /* Constant limit.  */
+  2,                                           /* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,                                       /* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,                                       /* Prefer LDRD/STRD.  */
+  {true, true},                                /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                       /* Vectorizer costs.  */
+  false                                        /* Prefer Neon for 64-bits bitops.  */
+};
+
+/* Branches can be dual-issued on Cortex-A5, so conditional execution is
+   less appealing.  Set max_insns_skipped to a low value.  */
+
+const struct tune_params arm_cortex_a5_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  1,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_cortex_a5_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {false, false},				/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_cortex_a9_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa9_extra_costs,
+  cortex_a9_sched_adjust_cost,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_BENEFICIAL(4,32,32),
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_cortex_a12_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa12_extra_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_BENEFICIAL(4,32,32),
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,						/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+/* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
+   cycle to execute each.  An LDR from the constant pool also takes two cycles
+   to execute, but mildly increases pipelining opportunity (consecutive
+   loads/stores can be pipelined together, saving one cycle), and may also
+   improve icache utilisation.  Hence we prefer the constant pool for such
+   processors.  */
+
+const struct tune_params arm_v7m_tune =
+{
+  arm_9e_rtx_costs,
+  &v7m_extra_costs,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  2,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_cortex_m_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {false, false},				/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
+   arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
+const struct tune_params arm_v6m_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {false, false},				/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+const struct tune_params arm_fa726te_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  fa726te_sched_adjust_cost,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
+
+/* Not all of these give usefully different compilation alternatives,
+   but there is no simple way of generalizing them.  */
+static const struct processors all_cores[] =
+{
+  /* ARM Cores */
+#define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
+  {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,	  \
+    FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
+#include "arm-cores.def"
+#undef ARM_CORE
+  {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
+};
+
+static const struct processors all_architectures[] =
+{
+  /* ARM Architectures */
+  /* We don't specify tuning costs here as it will be figured out
+     from the core.  */
+
+#define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
+  {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
+#include "arm-arches.def"
+#undef ARM_ARCH
+  {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
+};
+
+
+/* These are populated as commandline arguments are processed, or NULL
+   if not specified.  */
+static const struct processors *arm_selected_arch;
+static const struct processors *arm_selected_cpu;
+static const struct processors *arm_selected_tune;
+
+/* The name of the preprocessor macro to define for this architecture.  */
+
+char arm_arch_name[] = "__ARM_ARCH_0UNK__";
+
+/* Available values for -mfpu=.  */
+
+static const struct arm_fpu_desc all_fpus[] =
+{
+#define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
+  { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
+#include "arm-fpus.def"
+#undef ARM_FPU
+};
+
+
+/* Supported TLS relocations.  */
+
+enum tls_reloc {
+  TLS_GD32,
+  TLS_LDM32,
+  TLS_LDO32,
+  TLS_IE32,
+  TLS_LE32,
+  TLS_DESCSEQ	/* GNU scheme */
+};
+
+/* The maximum number of insns to be used when loading a constant.  */
+inline static int
+arm_constant_limit (bool size_p)
+{
+  return size_p ? 1 : current_tune->constant_limit;
+}
+
+/* Emit an insn that's a simple single-set.  Both the operands must be known
+   to be valid.  */
+inline static rtx
+emit_set_insn (rtx x, rtx y)
+{
+  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
+}
+
+/* Return the number of bits set in VALUE.  */
+static unsigned
+bit_count (unsigned long value)
+{
+  unsigned long count = 0;
+
+  while (value)
+    {
+      count++;
+      value &= value - 1;  /* Clear the least-significant set bit.  */
+    }
+
+  return count;
+}
+
+typedef struct
+{
+  enum machine_mode mode;
+  const char *name;
+} arm_fixed_mode_set;
+
+/* A small helper for setting fixed-point library libfuncs.  */
+
+static void
+arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
+			     const char *funcname, const char *modename,
+			     int num_suffix)
+{
+  char buffer[50];
+
+  if (num_suffix == 0)
+    sprintf (buffer, "__gnu_%s%s", funcname, modename);
+  else
+    sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
+
+  set_optab_libfunc (optable, mode, buffer);
+}
+
+static void
+arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
+			    enum machine_mode from, const char *funcname,
+			    const char *toname, const char *fromname)
+{
+  char buffer[50];
+  const char *maybe_suffix_2 = "";
+
+  /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
+  if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
+      && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
+      && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
+    maybe_suffix_2 = "2";
+
+  sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
+	   maybe_suffix_2);
+
+  set_conv_libfunc (optable, to, from, buffer);
+}
+
+/* Set up library functions unique to ARM.  */
+
+static void
+arm_init_libfuncs (void)
+{
+  /* For Linux, we have access to kernel support for atomic operations.  */
+  if (arm_abi == ARM_ABI_AAPCS_LINUX)
+    init_sync_libfuncs (2 * UNITS_PER_WORD);
+
+  /* There are no special library functions unless we are using the
+     ARM BPABI.  */
+  if (!TARGET_BPABI)
+    return;
+
+  /* The functions below are described in Section 4 of the "Run-Time
+     ABI for the ARM architecture", Version 1.0.  */
+
+  /* Double-precision floating-point arithmetic.  Table 2.  */
+  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
+  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
+  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
+  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
+  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
+
+  /* Double-precision comparisons.  Table 3.  */
+  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
+  set_optab_libfunc (ne_optab, DFmode, NULL);
+  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
+  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
+  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
+  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
+  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
+
+  /* Single-precision floating-point arithmetic.  Table 4.  */
+  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
+  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
+  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
+  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
+  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
+
+  /* Single-precision comparisons.  Table 5.  */
+  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
+  set_optab_libfunc (ne_optab, SFmode, NULL);
+  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
+  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
+  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
+  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
+  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
+
+  /* Floating-point to integer conversions.  Table 6.  */
+  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
+  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
+  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
+  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
+  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
+  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
+  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
+  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
+
+  /* Conversions between floating types.  Table 7.  */
+  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
+  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
+
+  /* Integer to floating-point conversions.  Table 8.  */
+  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
+  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
+  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
+  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
+  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
+  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
+  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
+  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
+
+  /* Long long.  Table 9.  */
+  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
+  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
+  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
+  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
+  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
+  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
+  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
+  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
+
+  /* Integer (32/32->32) division.  \S 4.3.1.  */
+  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
+  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
+
+  /* The divmod functions are designed so that they can be used for
+     plain division, even though they return both the quotient and the
+     remainder.  The quotient is returned in the usual location (i.e.,
+     r0 for SImode, {r0, r1} for DImode), just as would be expected
+     for an ordinary division routine.  Because the AAPCS calling
+     conventions specify that all of { r0, r1, r2, r3 } are
+     callee-saved registers, there is no need to tell the compiler
+     explicitly that those registers are clobbered by these
+     routines.  */
+  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
+  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
+
+  /* For SImode division the ABI provides div-without-mod routines,
+     which are faster.  */
+  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
+  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
+
+  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
+     divmod libcalls instead.  */
+  set_optab_libfunc (smod_optab, DImode, NULL);
+  set_optab_libfunc (umod_optab, DImode, NULL);
+  set_optab_libfunc (smod_optab, SImode, NULL);
+  set_optab_libfunc (umod_optab, SImode, NULL);
+
+  /* Half-precision float operations.  The compiler handles all operations
+     with NULL libfuncs by converting the SFmode.  */
+  switch (arm_fp16_format)
+    {
+    case ARM_FP16_FORMAT_IEEE:
+    case ARM_FP16_FORMAT_ALTERNATIVE:
+
+      /* Conversions.  */
+      set_conv_libfunc (trunc_optab, HFmode, SFmode,
+			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
+			 ? "__gnu_f2h_ieee"
+			 : "__gnu_f2h_alternative"));
+      set_conv_libfunc (sext_optab, SFmode, HFmode,
+			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
+			 ? "__gnu_h2f_ieee"
+			 : "__gnu_h2f_alternative"));
+
+      /* Arithmetic.  */
+      set_optab_libfunc (add_optab, HFmode, NULL);
+      set_optab_libfunc (sdiv_optab, HFmode, NULL);
+      set_optab_libfunc (smul_optab, HFmode, NULL);
+      set_optab_libfunc (neg_optab, HFmode, NULL);
+      set_optab_libfunc (sub_optab, HFmode, NULL);
+
+      /* Comparisons.  */
+      set_optab_libfunc (eq_optab, HFmode, NULL);
+      set_optab_libfunc (ne_optab, HFmode, NULL);
+      set_optab_libfunc (lt_optab, HFmode, NULL);
+      set_optab_libfunc (le_optab, HFmode, NULL);
+      set_optab_libfunc (ge_optab, HFmode, NULL);
+      set_optab_libfunc (gt_optab, HFmode, NULL);
+      set_optab_libfunc (unord_optab, HFmode, NULL);
+      break;
+
+    default:
+      break;
+    }
+
+  /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
+  {
+    const arm_fixed_mode_set fixed_arith_modes[] =
+      {
+	{ QQmode, "qq" },
+	{ UQQmode, "uqq" },
+	{ HQmode, "hq" },
+	{ UHQmode, "uhq" },
+	{ SQmode, "sq" },
+	{ USQmode, "usq" },
+	{ DQmode, "dq" },
+	{ UDQmode, "udq" },
+	{ TQmode, "tq" },
+	{ UTQmode, "utq" },
+	{ HAmode, "ha" },
+	{ UHAmode, "uha" },
+	{ SAmode, "sa" },
+	{ USAmode, "usa" },
+	{ DAmode, "da" },
+	{ UDAmode, "uda" },
+	{ TAmode, "ta" },
+	{ UTAmode, "uta" }
+      };
+    const arm_fixed_mode_set fixed_conv_modes[] =
+      {
+	{ QQmode, "qq" },
+	{ UQQmode, "uqq" },
+	{ HQmode, "hq" },
+	{ UHQmode, "uhq" },
+	{ SQmode, "sq" },
+	{ USQmode, "usq" },
+	{ DQmode, "dq" },
+	{ UDQmode, "udq" },
+	{ TQmode, "tq" },
+	{ UTQmode, "utq" },
+	{ HAmode, "ha" },
+	{ UHAmode, "uha" },
+	{ SAmode, "sa" },
+	{ USAmode, "usa" },
+	{ DAmode, "da" },
+	{ UDAmode, "uda" },
+	{ TAmode, "ta" },
+	{ UTAmode, "uta" },
+	{ QImode, "qi" },
+	{ HImode, "hi" },
+	{ SImode, "si" },
+	{ DImode, "di" },
+	{ TImode, "ti" },
+	{ SFmode, "sf" },
+	{ DFmode, "df" }
+      };
+    unsigned int i, j;
+
+    for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
+      {
+	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
+				     "add", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
+				     "ssadd", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
+				     "usadd", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
+				     "sub", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
+				     "sssub", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
+				     "ussub", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
+				     "mul", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
+				     "ssmul", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
+				     "usmul", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
+				     "div", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
+				     "udiv", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
+				     "ssdiv", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
+				     "usdiv", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
+				     "neg", fixed_arith_modes[i].name, 2);
+	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
+				     "ssneg", fixed_arith_modes[i].name, 2);
+	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
+				     "usneg", fixed_arith_modes[i].name, 2);
+	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
+				     "ashl", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
+				     "ashr", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
+				     "lshr", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
+				     "ssashl", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
+				     "usashl", fixed_arith_modes[i].name, 3);
+	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
+				     "cmp", fixed_arith_modes[i].name, 2);
+      }
+
+    for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
+      for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
+	{
+	  if (i == j
+	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
+		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
+	    continue;
+
+	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
+				      fixed_conv_modes[j].mode, "fract",
+				      fixed_conv_modes[i].name,
+				      fixed_conv_modes[j].name);
+	  arm_set_fixed_conv_libfunc (satfract_optab,
+				      fixed_conv_modes[i].mode,
+				      fixed_conv_modes[j].mode, "satfract",
+				      fixed_conv_modes[i].name,
+				      fixed_conv_modes[j].name);
+	  arm_set_fixed_conv_libfunc (fractuns_optab,
+				      fixed_conv_modes[i].mode,
+				      fixed_conv_modes[j].mode, "fractuns",
+				      fixed_conv_modes[i].name,
+				      fixed_conv_modes[j].name);
+	  arm_set_fixed_conv_libfunc (satfractuns_optab,
+				      fixed_conv_modes[i].mode,
+				      fixed_conv_modes[j].mode, "satfractuns",
+				      fixed_conv_modes[i].name,
+				      fixed_conv_modes[j].name);
+	}
+  }
+
+  if (TARGET_AAPCS_BASED)
+    synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+}
+
+/* On AAPCS systems, this is the "struct __va_list".  */
+static GTY(()) tree va_list_type;
+
+/* Return the type to use as __builtin_va_list.  */
+static tree
+arm_build_builtin_va_list (void)
+{
+  tree va_list_name;
+  tree ap_field;
+
+  if (!TARGET_AAPCS_BASED)
+    return std_build_builtin_va_list ();
+
+  /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
+     defined as:
+
+       struct __va_list
+       {
+	 void *__ap;
+       };
+
+     The C Library ABI further reinforces this definition in \S
+     4.1.
+
+     We must follow this definition exactly.  The structure tag
+     name is visible in C++ mangled names, and thus forms a part
+     of the ABI.  The field name may be used by people who
+     #include <stdarg.h>.  */
+  /* Create the type.  */
+  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
+  /* Give it the required name.  */
+  va_list_name = build_decl (BUILTINS_LOCATION,
+			     TYPE_DECL,
+			     get_identifier ("__va_list"),
+			     va_list_type);
+  DECL_ARTIFICIAL (va_list_name) = 1;
+  TYPE_NAME (va_list_type) = va_list_name;
+  TYPE_STUB_DECL (va_list_type) = va_list_name;
+  /* Create the __ap field.  */
+  ap_field = build_decl (BUILTINS_LOCATION,
+			 FIELD_DECL,
+			 get_identifier ("__ap"),
+			 ptr_type_node);
+  DECL_ARTIFICIAL (ap_field) = 1;
+  DECL_FIELD_CONTEXT (ap_field) = va_list_type;
+  TYPE_FIELDS (va_list_type) = ap_field;
+  /* Compute its layout.  */
+  layout_type (va_list_type);
+
+  return va_list_type;
+}
+
+/* Return an expression of type "void *" pointing to the next
+   available argument in a variable-argument list.  VALIST is the
+   user-level va_list object, of type __builtin_va_list.  */
+static tree
+arm_extract_valist_ptr (tree valist)
+{
+  if (TREE_TYPE (valist) == error_mark_node)
+    return error_mark_node;
+
+  /* On an AAPCS target, the pointer is stored within "struct
+     va_list".  */
+  if (TARGET_AAPCS_BASED)
+    {
+      tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
+      valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
+		       valist, ap_field, NULL_TREE);
+    }
+
+  return valist;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+static void
+arm_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+  valist = arm_extract_valist_ptr (valist);
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+static tree
+arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			  gimple_seq *post_p)
+{
+  valist = arm_extract_valist_ptr (valist);
+  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+}
+
+/* Fix up any incompatible options that the user has specified.  */
+static void
+arm_option_override (void)
+{
+  if (global_options_set.x_arm_arch_option)
+    arm_selected_arch = &all_architectures[arm_arch_option];
+
+  if (global_options_set.x_arm_cpu_option)
+    {
+      arm_selected_cpu = &all_cores[(int) arm_cpu_option];
+      arm_selected_tune = &all_cores[(int) arm_cpu_option];
+    }
+
+  if (global_options_set.x_arm_tune_option)
+    arm_selected_tune = &all_cores[(int) arm_tune_option];
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  if (arm_selected_arch)
+    {
+      if (arm_selected_cpu)
+	{
+	  /* Check for conflict between mcpu and march.  */
+	  if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
+	    {
+	      warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
+		       arm_selected_cpu->name, arm_selected_arch->name);
+	      /* -march wins for code generation.
+	         -mcpu wins for default tuning.  */
+	      if (!arm_selected_tune)
+		arm_selected_tune = arm_selected_cpu;
+
+	      arm_selected_cpu = arm_selected_arch;
+	    }
+	  else
+	    /* -mcpu wins.  */
+	    arm_selected_arch = NULL;
+	}
+      else
+	/* Pick a CPU based on the architecture.  */
+	arm_selected_cpu = arm_selected_arch;
+    }
+
+  /* If the user did not specify a processor, choose one for them.  */
+  if (!arm_selected_cpu)
+    {
+      const struct processors * sel;
+      unsigned int        sought;
+
+      arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
+      if (!arm_selected_cpu->name)
+	{
+#ifdef SUBTARGET_CPU_DEFAULT
+	  /* Use the subtarget default CPU if none was specified by
+	     configure.  */
+	  arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
+#endif
+	  /* Default to ARM6.  */
+	  if (!arm_selected_cpu->name)
+	    arm_selected_cpu = &all_cores[arm6];
+	}
+
+      sel = arm_selected_cpu;
+      insn_flags = sel->flags;
+
+      /* Now check to see if the user has specified some command line
+	 switch that require certain abilities from the cpu.  */
+      sought = 0;
+
+      if (TARGET_INTERWORK || TARGET_THUMB)
+	{
+	  sought |= (FL_THUMB | FL_MODE32);
+
+	  /* There are no ARM processors that support both APCS-26 and
+	     interworking.  Therefore we force FL_MODE26 to be removed
+	     from insn_flags here (if it was set), so that the search
+	     below will always be able to find a compatible processor.  */
+	  insn_flags &= ~FL_MODE26;
+	}
+
+      if (sought != 0 && ((sought & insn_flags) != sought))
+	{
+	  /* Try to locate a CPU type that supports all of the abilities
+	     of the default CPU, plus the extra abilities requested by
+	     the user.  */
+	  for (sel = all_cores; sel->name != NULL; sel++)
+	    if ((sel->flags & sought) == (sought | insn_flags))
+	      break;
+
+	  if (sel->name == NULL)
+	    {
+	      unsigned current_bit_count = 0;
+	      const struct processors * best_fit = NULL;
+
+	      /* Ideally we would like to issue an error message here
+		 saying that it was not possible to find a CPU compatible
+		 with the default CPU, but which also supports the command
+		 line options specified by the programmer, and so they
+		 ought to use the -mcpu=<name> command line option to
+		 override the default CPU type.
+
+		 If we cannot find a cpu that has both the
+		 characteristics of the default cpu and the given
+		 command line options we scan the array again looking
+		 for a best match.  */
+	      for (sel = all_cores; sel->name != NULL; sel++)
+		if ((sel->flags & sought) == sought)
+		  {
+		    unsigned count;
+
+		    count = bit_count (sel->flags & insn_flags);
+
+		    if (count >= current_bit_count)
+		      {
+			best_fit = sel;
+			current_bit_count = count;
+		      }
+		  }
+
+	      gcc_assert (best_fit);
+	      sel = best_fit;
+	    }
+
+	  arm_selected_cpu = sel;
+	}
+    }
+
+  gcc_assert (arm_selected_cpu);
+  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
+  if (!arm_selected_tune)
+    arm_selected_tune = &all_cores[arm_selected_cpu->core];
+
+  sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
+  insn_flags = arm_selected_cpu->flags;
+  arm_base_arch = arm_selected_cpu->base_arch;
+
+  arm_tune = arm_selected_tune->core;
+  tune_flags = arm_selected_tune->flags;
+  current_tune = arm_selected_tune->tune;
+
+  /* Make sure that the processor choice does not conflict with any of the
+     other command line choices.  */
+  if (TARGET_ARM && !(insn_flags & FL_NOTM))
+    error ("target CPU does not support ARM mode");
+
+  /* BPABI targets use linker tricks to allow interworking on cores
+     without thumb support.  */
+  if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
+    {
+      warning (0, "target CPU does not support interworking" );
+      target_flags &= ~MASK_INTERWORK;
+    }
+
+  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
+    {
+      warning (0, "target CPU does not support THUMB instructions");
+      target_flags &= ~MASK_THUMB;
+    }
+
+  if (TARGET_APCS_FRAME && TARGET_THUMB)
+    {
+      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
+      target_flags &= ~MASK_APCS_FRAME;
+    }
+
+  /* Callee super interworking implies thumb interworking.  Adding
+     this to the flags here simplifies the logic elsewhere.  */
+  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
+    target_flags |= MASK_INTERWORK;
+
+  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
+     from here where no function is being compiled currently.  */
+  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
+    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
+
+  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
+    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
+
+  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
+    {
+      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
+      target_flags |= MASK_APCS_FRAME;
+    }
+
+  if (TARGET_POKE_FUNCTION_NAME)
+    target_flags |= MASK_APCS_FRAME;
+
+  if (TARGET_APCS_REENT && flag_pic)
+    error ("-fpic and -mapcs-reent are incompatible");
+
+  if (TARGET_APCS_REENT)
+    warning (0, "APCS reentrant code not supported.  Ignored");
+
+  /* If this target is normally configured to use APCS frames, warn if they
+     are turned off and debugging is turned on.  */
+  if (TARGET_ARM
+      && write_symbols != NO_DEBUG
+      && !TARGET_APCS_FRAME
+      && (TARGET_DEFAULT & MASK_APCS_FRAME))
+    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
+
+  if (TARGET_APCS_FLOAT)
+    warning (0, "passing floating point arguments in fp regs not yet supported");
+
+  if (TARGET_LITTLE_WORDS)
+    warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
+	     "will be removed in a future release");
+
+  /* Initialize boolean versions of the flags, for use in the arm.md file.  */
+  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
+  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
+  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
+  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
+  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
+  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
+  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
+  arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+  arm_arch6m = arm_arch6 && !arm_arch_notm;
+  arm_arch7 = (insn_flags & FL_ARCH7) != 0;
+  arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
+  arm_arch8 = (insn_flags & FL_ARCH8) != 0;
+  arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
+  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
+
+  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
+  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
+  thumb_code = TARGET_ARM == 0;
+  thumb1_code = TARGET_THUMB1 != 0;
+  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
+  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
+  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
+  arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
+  arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
+  arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
+  arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+  arm_arch_crc = (insn_flags & FL_CRC32) != 0;
+  if (arm_restrict_it == 2)
+    arm_restrict_it = arm_arch8 && TARGET_THUMB2;
+
+  if (!TARGET_THUMB2)
+    arm_restrict_it = 0;
+
+  /* If we are not using the default (ARM mode) section anchor offset
+     ranges, then set the correct ranges now.  */
+  if (TARGET_THUMB1)
+    {
+      /* Thumb-1 LDR instructions cannot have negative offsets.
+         Permissible positive offset ranges are 5-bit (for byte loads),
+         6-bit (for halfword loads), or 7-bit (for word loads).
+         Empirical results suggest a 7-bit anchor range gives the best
+         overall code size.  */
+      targetm.min_anchor_offset = 0;
+      targetm.max_anchor_offset = 127;
+    }
+  else if (TARGET_THUMB2)
+    {
+      /* The minimum is set such that the total size of the block
+         for a particular anchor is 248 + 1 + 4095 bytes, which is
+         divisible by eight, ensuring natural spacing of anchors.  */
+      targetm.min_anchor_offset = -248;
+      targetm.max_anchor_offset = 4095;
+    }
+
+  /* V5 code we generate is completely interworking capable, so we turn off
+     TARGET_INTERWORK here to avoid many tests later on.  */
+
+  /* XXX However, we must pass the right pre-processor defines to CPP
+     or GLD can get confused.  This is a hack.  */
+  if (TARGET_INTERWORK)
+    arm_cpp_interwork = 1;
+
+  if (arm_arch5)
+    target_flags &= ~MASK_INTERWORK;
+
+  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
+    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
+
+  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
+    error ("iwmmxt abi requires an iwmmxt capable cpu");
+
+  if (!global_options_set.x_arm_fpu_index)
+    {
+      const char *target_fpu_name;
+      bool ok;
+
+#ifdef FPUTYPE_DEFAULT
+      target_fpu_name = FPUTYPE_DEFAULT;
+#else
+      target_fpu_name = "vfp";
+#endif
+
+      ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
+				  CL_TARGET);
+      gcc_assert (ok);
+    }
+
+  arm_fpu_desc = &all_fpus[arm_fpu_index];
+
+  switch (arm_fpu_desc->model)
+    {
+    case ARM_FP_MODEL_VFP:
+      arm_fpu_attr = FPU_VFP;
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  if (TARGET_AAPCS_BASED)
+    {
+      if (TARGET_CALLER_INTERWORKING)
+	error ("AAPCS does not support -mcaller-super-interworking");
+      else
+	if (TARGET_CALLEE_INTERWORKING)
+	  error ("AAPCS does not support -mcallee-super-interworking");
+    }
+
+  /* iWMMXt and NEON are incompatible.  */
+  if (TARGET_IWMMXT && TARGET_NEON)
+    error ("iWMMXt and NEON are incompatible");
+
+  /* iWMMXt unsupported under Thumb mode.  */
+  if (TARGET_THUMB && TARGET_IWMMXT)
+    error ("iWMMXt unsupported under Thumb mode");
+
+  /* __fp16 support currently assumes the core has ldrh.  */
+  if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
+    sorry ("__fp16 and no ldrh");
+
+  /* If soft-float is specified then don't use FPU.  */
+  if (TARGET_SOFT_FLOAT)
+    arm_fpu_attr = FPU_NONE;
+
+  if (TARGET_AAPCS_BASED)
+    {
+      if (arm_abi == ARM_ABI_IWMMXT)
+	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
+      else if (arm_float_abi == ARM_FLOAT_ABI_HARD
+	       && TARGET_HARD_FLOAT
+	       && TARGET_VFP)
+	arm_pcs_default = ARM_PCS_AAPCS_VFP;
+      else
+	arm_pcs_default = ARM_PCS_AAPCS;
+    }
+  else
+    {
+      if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
+	sorry ("-mfloat-abi=hard and VFP");
+
+      if (arm_abi == ARM_ABI_APCS)
+	arm_pcs_default = ARM_PCS_APCS;
+      else
+	arm_pcs_default = ARM_PCS_ATPCS;
+    }
+
+  /* For arm2/3 there is no need to do any scheduling if we are doing
+     software floating-point.  */
+  if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
+    flag_schedule_insns = flag_schedule_insns_after_reload = 0;
+
+  /* Use the cp15 method if it is available.  */
+  if (target_thread_pointer == TP_AUTO)
+    {
+      if (arm_arch6k && !TARGET_THUMB1)
+	target_thread_pointer = TP_CP15;
+      else
+	target_thread_pointer = TP_SOFT;
+    }
+
+  if (TARGET_HARD_TP && TARGET_THUMB1)
+    error ("can not use -mtp=cp15 with 16-bit Thumb");
+
+  /* Override the default structure alignment for AAPCS ABI.  */
+  if (!global_options_set.x_arm_structure_size_boundary)
+    {
+      if (TARGET_AAPCS_BASED)
+	arm_structure_size_boundary = 8;
+    }
+  else
+    {
+      if (arm_structure_size_boundary != 8
+	  && arm_structure_size_boundary != 32
+	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
+	{
+	  if (ARM_DOUBLEWORD_ALIGN)
+	    warning (0,
+		     "structure size boundary can only be set to 8, 32 or 64");
+	  else
+	    warning (0, "structure size boundary can only be set to 8 or 32");
+	  arm_structure_size_boundary
+	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
+	}
+    }
+
+  if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
+    {
+      error ("RTP PIC is incompatible with Thumb");
+      flag_pic = 0;
+    }
+
+  /* If stack checking is disabled, we can use r10 as the PIC register,
+     which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
+  if (flag_pic && TARGET_SINGLE_PIC_BASE)
+    {
+      if (TARGET_VXWORKS_RTP)
+	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
+      arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
+    }
+
+  if (flag_pic && TARGET_VXWORKS_RTP)
+    arm_pic_register = 9;
+
+  if (arm_pic_register_string != NULL)
+    {
+      int pic_register = decode_reg_name (arm_pic_register_string);
+
+      if (!flag_pic)
+	warning (0, "-mpic-register= is useless without -fpic");
+
+      /* Prevent the user from choosing an obviously stupid PIC register.  */
+      else if (pic_register < 0 || call_used_regs[pic_register]
+	       || pic_register == HARD_FRAME_POINTER_REGNUM
+	       || pic_register == STACK_POINTER_REGNUM
+	       || pic_register >= PC_REGNUM
+	       || (TARGET_VXWORKS_RTP
+		   && (unsigned int) pic_register != arm_pic_register))
+	error ("unable to use '%s' for PIC register", arm_pic_register_string);
+      else
+	arm_pic_register = pic_register;
+    }
+
+  if (TARGET_VXWORKS_RTP
+      && !global_options_set.x_arm_pic_data_is_text_relative)
+    arm_pic_data_is_text_relative = 0;
+
+  /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
+  if (fix_cm3_ldrd == 2)
+    {
+      if (arm_selected_cpu->core == cortexm3)
+	fix_cm3_ldrd = 1;
+      else
+	fix_cm3_ldrd = 0;
+    }
+
+  /* Enable -munaligned-access by default for
+     - all ARMv6 architecture-based processors
+     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+     - ARMv8 architecture-base processors.
+
+     Disable -munaligned-access by default for
+     - all pre-ARMv6 architecture-based processors
+     - ARMv6-M architecture-based processors.  */
+
+  if (unaligned_access == 2)
+    {
+      if (arm_arch6 && (arm_arch_notm || arm_arch7))
+	unaligned_access = 1;
+      else
+	unaligned_access = 0;
+    }
+  else if (unaligned_access == 1
+	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
+    {
+      warning (0, "target CPU does not support unaligned accesses");
+      unaligned_access = 0;
+    }
+
+  if (TARGET_THUMB1 && flag_schedule_insns)
+    {
+      /* Don't warn since it's on by default in -O2.  */
+      flag_schedule_insns = 0;
+    }
+
+  if (optimize_size)
+    {
+      /* If optimizing for size, bump the number of instructions that we
+         are prepared to conditionally execute (even on a StrongARM).  */
+      max_insns_skipped = 6;
+    }
+  else
+    max_insns_skipped = current_tune->max_insns_skipped;
+
+  /* Hot/Cold partitioning is not currently supported, since we can't
+     handle literal pool placement in that case.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      inform (input_location,
+	      "-freorder-blocks-and-partition not supported on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+  if (flag_pic)
+    /* Hoisting PIC address calculations more aggressively provides a small,
+       but measurable, size reduction for PIC code.  Therefore, we decrease
+       the bar for unrestricted expression hoisting to the cost of PIC address
+       calculation, which is 2 instructions.  */
+    maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+
+  /* ARM EABI defaults to strict volatile bitfields.  */
+  if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
+      && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
+     it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
+  if (flag_prefetch_loop_arrays < 0
+      && HAVE_prefetch
+      && optimize >= 3
+      && current_tune->num_prefetch_slots > 0)
+    flag_prefetch_loop_arrays = 1;
+
+  /* Set up parameters to be used in prefetching algorithm.  Do not override the
+     defaults unless we are tuning for a core we have researched values for.  */
+  if (current_tune->num_prefetch_slots > 0)
+    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+                           current_tune->num_prefetch_slots,
+                           global_options.x_param_values,
+                           global_options_set.x_param_values);
+  if (current_tune->l1_cache_line_size >= 0)
+    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+                           current_tune->l1_cache_line_size,
+                           global_options.x_param_values,
+                           global_options_set.x_param_values);
+  if (current_tune->l1_cache_size >= 0)
+    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+                           current_tune->l1_cache_size,
+                           global_options.x_param_values,
+                           global_options_set.x_param_values);
+
+  /* Use Neon to perform 64-bits operations rather than core
+     registers.  */
+  prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
+  if (use_neon_for_64bits == 1)
+     prefer_neon_for_64bits = true;
+
+  /* Use the alternative scheduling-pressure algorithm by default.  */
+  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
+                         global_options.x_param_values,
+                         global_options_set.x_param_values);
+
+  /* Disable shrink-wrap when optimizing function for size, since it tends to
+     generate additional returns.  */
+  if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
+    flag_shrink_wrap = false;
+  /* TBD: Dwarf info for apcs frame is not handled yet.  */
+  if (TARGET_APCS_FRAME)
+    flag_shrink_wrap = false;
+
+  /* We only support -mslow-flash-data on armv7-m targets.  */
+  if (target_slow_flash_data
+      && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
+	  || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
+    error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
+
+  /* Currently, for slow flash data, we just disable literal pools.  */
+  if (target_slow_flash_data)
+    arm_disable_literal_pool = true;
+
+  /* Register global variables with the garbage collector.  */
+  arm_add_gc_roots ();
+}
+
+static void
+arm_add_gc_roots (void)
+{
+  gcc_obstack_init(&minipool_obstack);
+  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
+}
+
+/* A table of known ARM exception types.
+   For use with the interrupt function attribute.  */
+
+typedef struct
+{
+  const char *const arg;
+  const unsigned long return_value;
+}
+isr_attribute_arg;
+
+static const isr_attribute_arg isr_attribute_args [] =
+{
+  { "IRQ",   ARM_FT_ISR },
+  { "irq",   ARM_FT_ISR },
+  { "FIQ",   ARM_FT_FIQ },
+  { "fiq",   ARM_FT_FIQ },
+  { "ABORT", ARM_FT_ISR },
+  { "abort", ARM_FT_ISR },
+  { "ABORT", ARM_FT_ISR },
+  { "abort", ARM_FT_ISR },
+  { "UNDEF", ARM_FT_EXCEPTION },
+  { "undef", ARM_FT_EXCEPTION },
+  { "SWI",   ARM_FT_EXCEPTION },
+  { "swi",   ARM_FT_EXCEPTION },
+  { NULL,    ARM_FT_NORMAL }
+};
+
+/* Returns the (interrupt) function type of the current
+   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
+
+static unsigned long
+arm_isr_value (tree argument)
+{
+  const isr_attribute_arg * ptr;
+  const char *              arg;
+
+  if (!arm_arch_notm)
+    return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
+
+  /* No argument - default to IRQ.  */
+  if (argument == NULL_TREE)
+    return ARM_FT_ISR;
+
+  /* Get the value of the argument.  */
+  if (TREE_VALUE (argument) == NULL_TREE
+      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
+    return ARM_FT_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
+
+  /* Check it against the list of known arguments.  */
+  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
+    if (streq (arg, ptr->arg))
+      return ptr->return_value;
+
+  /* An unrecognized interrupt type.  */
+  return ARM_FT_UNKNOWN;
+}
+
+/* Computes the type of the current function.  */
+
+static unsigned long
+arm_compute_func_type (void)
+{
+  unsigned long type = ARM_FT_UNKNOWN;
+  tree a;
+  tree attr;
+
+  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
+
+  /* Decide if the current function is volatile.  Such functions
+     never return, and many memory cycles can be saved by not storing
+     register values that will never be needed again.  This optimization
+     was added to speed up context switching in a kernel application.  */
+  if (optimize > 0
+      && (TREE_NOTHROW (current_function_decl)
+          || !(flag_unwind_tables
+               || (flag_exceptions
+		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
+      && TREE_THIS_VOLATILE (current_function_decl))
+    type |= ARM_FT_VOLATILE;
+
+  if (cfun->static_chain_decl != NULL)
+    type |= ARM_FT_NESTED;
+
+  attr = DECL_ATTRIBUTES (current_function_decl);
+
+  a = lookup_attribute ("naked", attr);
+  if (a != NULL_TREE)
+    type |= ARM_FT_NAKED;
+
+  a = lookup_attribute ("isr", attr);
+  if (a == NULL_TREE)
+    a = lookup_attribute ("interrupt", attr);
+
+  if (a == NULL_TREE)
+    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
+  else
+    type |= arm_isr_value (TREE_VALUE (a));
+
+  return type;
+}
+
+/* Returns the type of the current function.  */
+
+unsigned long
+arm_current_func_type (void)
+{
+  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
+    cfun->machine->func_type = arm_compute_func_type ();
+
+  return cfun->machine->func_type;
+}
+
+bool
+arm_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return !IS_NAKED (arm_current_func_type ());
+}
+
+static bool
+arm_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
+}
+
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+
+   On the ARM, (if r8 is the static chain regnum, and remembering that
+   referencing pc adds an offset of 8) the trampoline looks like:
+	   ldr 		r8, [pc, #0]
+	   ldr		pc, [pc]
+	   .word	static chain value
+	   .word	function's address
+   XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
+
+static void
+arm_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_ARM)
+    {
+      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
+      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
+    }
+  else if (TARGET_THUMB2)
+    {
+      /* The Thumb-2 trampoline is similar to the arm implementation.
+	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
+      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
+		   STATIC_CHAIN_REGNUM, PC_REGNUM);
+      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
+    }
+  else
+    {
+      ASM_OUTPUT_ALIGN (f, 2);
+      fprintf (f, "\t.code\t16\n");
+      fprintf (f, ".Ltrampoline_start:\n");
+      asm_fprintf (f, "\tpush\t{r0, r1}\n");
+      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+      asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
+      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+      asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
+      asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
+    }
+  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.  */
+
+static void
+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr, mem, a_tramp;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
+  emit_move_insn (mem, chain_value);
+
+  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  emit_move_insn (mem, fnaddr);
+
+  a_tramp = XEXP (m_tramp, 0);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
+		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
+}
+
+/* Thumb trampolines should be entered in thumb mode, so set
+   the bottom bit of the address.  */
+
+static rtx
+arm_trampoline_adjust_address (rtx addr)
+{
+  if (TARGET_THUMB)
+    addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
+				NULL, 0, OPTAB_LIB_WIDEN);
+  return addr;
+}
+
+/* Return 1 if it is possible to return using a single instruction.
+   If SIBLING is non-null, this is a test for a return before a sibling
+   call.  SIBLING is the call insn, so we can examine its register usage.  */
+
+int
+use_return_insn (int iscond, rtx sibling)
+{
+  int regno;
+  unsigned int func_type;
+  unsigned long saved_int_regs;
+  unsigned HOST_WIDE_INT stack_adjust;
+  arm_stack_offsets *offsets;
+
+  /* Never use a return instruction before reload has run.  */
+  if (!reload_completed)
+    return 0;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked, volatile and stack alignment functions need special
+     consideration.  */
+  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
+    return 0;
+
+  /* So do interrupt functions that use the frame pointer and Thumb
+     interrupt functions.  */
+  if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
+    return 0;
+
+  if (TARGET_LDRD && current_tune->prefer_ldrd_strd
+      && !optimize_function_for_size_p (cfun))
+    return 0;
+
+  offsets = arm_get_frame_offsets ();
+  stack_adjust = offsets->outgoing_args - offsets->saved_regs;
+
+  /* As do variadic functions.  */
+  if (crtl->args.pretend_args_size
+      || cfun->machine->uses_anonymous_args
+      /* Or if the function calls __builtin_eh_return () */
+      || crtl->calls_eh_return
+      /* Or if the function calls alloca */
+      || cfun->calls_alloca
+      /* Or if there is a stack adjustment.  However, if the stack pointer
+	 is saved on the stack, we can use a pre-incrementing stack load.  */
+      || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
+				 && stack_adjust == 4)))
+    return 0;
+
+  saved_int_regs = offsets->saved_regs_mask;
+
+  /* Unfortunately, the insn
+
+       ldmib sp, {..., sp, ...}
+
+     triggers a bug on most SA-110 based devices, such that the stack
+     pointer won't be correctly restored if the instruction takes a
+     page fault.  We work around this problem by popping r3 along with
+     the other registers, since that is never slower than executing
+     another instruction.
+
+     We test for !arm_arch5 here, because code for any architecture
+     less than this could potentially be run on one of the buggy
+     chips.  */
+  if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
+    {
+      /* Validate that r3 is a call-clobbered register (always true in
+	 the default abi) ...  */
+      if (!call_used_regs[3])
+	return 0;
+
+      /* ... that it isn't being used for a return value ... */
+      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
+	return 0;
+
+      /* ... or for a tail-call argument ...  */
+      if (sibling)
+	{
+	  gcc_assert (CALL_P (sibling));
+
+	  if (find_regno_fusage (sibling, USE, 3))
+	    return 0;
+	}
+
+      /* ... and that there are no call-saved registers in r0-r2
+	 (always true in the default ABI).  */
+      if (saved_int_regs & 0x7)
+	return 0;
+    }
+
+  /* Can't be done if interworking with Thumb, and any registers have been
+     stacked.  */
+  if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
+    return 0;
+
+  /* On StrongARM, conditional returns are expensive if they aren't
+     taken and multiple registers have been stacked.  */
+  if (iscond && arm_tune_strongarm)
+    {
+      /* Conditional return when just the LR is stored is a simple
+	 conditional-load instruction, that's not expensive.  */
+      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
+	return 0;
+
+      if (flag_pic
+	  && arm_pic_register != INVALID_REGNUM
+	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+	return 0;
+    }
+
+  /* If there are saved registers but the LR isn't saved, then we need
+     two instructions for the return.  */
+  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
+    return 0;
+
+  /* Can't be done if any of the VFP regs are pushed,
+     since this also requires an insn.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	return 0;
+
+  if (TARGET_REALLY_IWMMXT)
+    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
+      if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	return 0;
+
+  return 1;
+}
+
+/* Return TRUE if we should try to use a simple_return insn, i.e. perform
+   shrink-wrapping if possible.  This is the case if we need to emit a
+   prologue, which we can test by looking at the offsets.  */
+bool
+use_simple_return_p (void)
+{
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  return offsets->outgoing_args != 0;
+}
+
+/* Return TRUE if int I is a valid immediate ARM constant.  */
+
+int
+const_ok_for_arm (HOST_WIDE_INT i)
+{
+  int lowbit;
+
+  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
+     be all zero, or all one.  */
+  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
+      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
+	  != ((~(unsigned HOST_WIDE_INT) 0)
+	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
+    return FALSE;
+
+  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
+  /* Fast return for 0 and small values.  We must do this for zero, since
+     the code below can't handle that one case.  */
+  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
+    return TRUE;
+
+  /* Get the number of trailing zeros.  */
+  lowbit = ffs((int) i) - 1;
+
+  /* Only even shifts are allowed in ARM mode so round down to the
+     nearest even number.  */
+  if (TARGET_ARM)
+    lowbit &= ~1;
+
+  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
+    return TRUE;
+
+  if (TARGET_ARM)
+    {
+      /* Allow rotated constants in ARM mode.  */
+      if (lowbit <= 4
+	   && ((i & ~0xc000003f) == 0
+	       || (i & ~0xf000000f) == 0
+	       || (i & ~0xfc000003) == 0))
+	return TRUE;
+    }
+  else
+    {
+      HOST_WIDE_INT v;
+
+      /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
+      v = i & 0xff;
+      v |= v << 16;
+      if (i == v || i == (v | (v << 8)))
+	return TRUE;
+
+      /* Allow repeated pattern 0xXY00XY00.  */
+      v = i & 0xff00;
+      v |= v << 16;
+      if (i == v)
+	return TRUE;
+    }
+
+  return FALSE;
+}
+
+/* Return true if I is a valid constant for the operation CODE.  */
+int
+const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
+{
+  if (const_ok_for_arm (i))
+    return 1;
+
+  switch (code)
+    {
+    case SET:
+      /* See if we can use movw.  */
+      if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
+	return 1;
+      else
+	/* Otherwise, try mvn.  */
+	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
+
+    case PLUS:
+      /* See if we can use addw or subw.  */
+      if (TARGET_THUMB2
+	  && ((i & 0xfffff000) == 0
+	      || ((-i) & 0xfffff000) == 0))
+	return 1;
+      /* else fall through.  */
+
+    case COMPARE:
+    case EQ:
+    case NE:
+    case GT:
+    case LE:
+    case LT:
+    case GE:
+    case GEU:
+    case LTU:
+    case GTU:
+    case LEU:
+    case UNORDERED:
+    case ORDERED:
+    case UNEQ:
+    case UNGE:
+    case UNLT:
+    case UNGT:
+    case UNLE:
+      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
+
+    case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
+    case XOR:
+      return 0;
+
+    case IOR:
+      if (TARGET_THUMB2)
+	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
+      return 0;
+
+    case AND:
+      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if I is a valid di mode constant for the operation CODE.  */
+int
+const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
+{
+  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
+  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
+  rtx hi = GEN_INT (hi_val);
+  rtx lo = GEN_INT (lo_val);
+
+  if (TARGET_THUMB1)
+    return 0;
+
+  switch (code)
+    {
+    case AND:
+    case IOR:
+    case XOR:
+      return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
+              && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
+    case PLUS:
+      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
+
+    default:
+      return 0;
+    }
+}
+
+/* Emit a sequence of insns to handle a large constant.
+   CODE is the code of the operation required, it can be any of SET, PLUS,
+   IOR, AND, XOR, MINUS;
+   MODE is the mode in which the operation is being performed;
+   VAL is the integer to operate on;
+   SOURCE is the other operand (a register, or a null-pointer for SET);
+   SUBTARGETS means it is safe to create scratch registers if that will
+   either produce a simpler sequence, or we will want to cse the values.
+   Return value is the number of insns emitted.  */
+
+/* ??? Tweak this for thumb2.  */
+int
+arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
+		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
+{
+  rtx cond;
+
+  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
+    cond = COND_EXEC_TEST (PATTERN (insn));
+  else
+    cond = NULL_RTX;
+
+  if (subtargets || code == SET
+      || (REG_P (target) && REG_P (source)
+	  && REGNO (target) != REGNO (source)))
+    {
+      /* After arm_reorg has been called, we can't fix up expensive
+	 constants by pushing them into memory so we must synthesize
+	 them in-line, regardless of the cost.  This is only likely to
+	 be more costly on chips that have load delay slots and we are
+	 compiling without running the scheduler (so no splitting
+	 occurred before the final instruction emission).
+
+	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
+      */
+      if (!after_arm_reorg
+	  && !cond
+	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
+				1, 0)
+	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
+		 + (code != SET))))
+	{
+	  if (code == SET)
+	    {
+	      /* Currently SET is the only monadic value for CODE, all
+		 the rest are diadic.  */
+	      if (TARGET_USE_MOVT)
+		arm_emit_movpair (target, GEN_INT (val));
+	      else
+		emit_set_insn (target, GEN_INT (val));
+
+	      return 1;
+	    }
+	  else
+	    {
+	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
+
+	      if (TARGET_USE_MOVT)
+		arm_emit_movpair (temp, GEN_INT (val));
+	      else
+		emit_set_insn (temp, GEN_INT (val));
+
+	      /* For MINUS, the value is subtracted from, since we never
+		 have subtraction of a constant.  */
+	      if (code == MINUS)
+		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
+	      else
+		emit_set_insn (target,
+			       gen_rtx_fmt_ee (code, mode, source, temp));
+	      return 2;
+	    }
+	}
+    }
+
+  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
+			   1);
+}
+
+/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
+   ARM/THUMB2 immediates, and add up to VAL.
+   Thr function return value gives the number of insns required.  */
+static int
+optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
+			    struct four_ints *return_sequence)
+{
+  int best_consecutive_zeros = 0;
+  int i;
+  int best_start = 0;
+  int insns1, insns2;
+  struct four_ints tmp_sequence;
+
+  /* If we aren't targeting ARM, the best place to start is always at
+     the bottom, otherwise look more closely.  */
+  if (TARGET_ARM)
+    {
+      for (i = 0; i < 32; i += 2)
+	{
+	  int consecutive_zeros = 0;
+
+	  if (!(val & (3 << i)))
+	    {
+	      while ((i < 32) && !(val & (3 << i)))
+		{
+		  consecutive_zeros += 2;
+		  i += 2;
+		}
+	      if (consecutive_zeros > best_consecutive_zeros)
+		{
+		  best_consecutive_zeros = consecutive_zeros;
+		  best_start = i - consecutive_zeros;
+		}
+	      i -= 2;
+	    }
+	}
+    }
+
+  /* So long as it won't require any more insns to do so, it's
+     desirable to emit a small constant (in bits 0...9) in the last
+     insn.  This way there is more chance that it can be combined with
+     a later addressing insn to form a pre-indexed load or store
+     operation.  Consider:
+
+	   *((volatile int *)0xe0000100) = 1;
+	   *((volatile int *)0xe0000110) = 2;
+
+     We want this to wind up as:
+
+	    mov rA, #0xe0000000
+	    mov rB, #1
+	    str rB, [rA, #0x100]
+	    mov rB, #2
+	    str rB, [rA, #0x110]
+
+     rather than having to synthesize both large constants from scratch.
+
+     Therefore, we calculate how many insns would be required to emit
+     the constant starting from `best_start', and also starting from
+     zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
+     yield a shorter sequence, we may as well use zero.  */
+  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
+  if (best_start != 0
+      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
+    {
+      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
+      if (insns2 <= insns1)
+	{
+	  *return_sequence = tmp_sequence;
+	  insns1 = insns2;
+	}
+    }
+
+  return insns1;
+}
+
+/* As for optimal_immediate_sequence, but starting at bit-position I.  */
+static int
+optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
+			     struct four_ints *return_sequence, int i)
+{
+  int remainder = val & 0xffffffff;
+  int insns = 0;
+
+  /* Try and find a way of doing the job in either two or three
+     instructions.
+
+     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
+     location.  We start at position I.  This may be the MSB, or
+     optimial_immediate_sequence may have positioned it at the largest block
+     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
+     wrapping around to the top of the word when we drop off the bottom.
+     In the worst case this code should produce no more than four insns.
+
+     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
+     constants, shifted to any arbitrary location.  We should always start
+     at the MSB.  */
+  do
+    {
+      int end;
+      unsigned int b1, b2, b3, b4;
+      unsigned HOST_WIDE_INT result;
+      int loc;
+
+      gcc_assert (insns < 4);
+
+      if (i <= 0)
+	i += 32;
+
+      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
+      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
+	{
+	  loc = i;
+	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
+	    /* We can use addw/subw for the last 12 bits.  */
+	    result = remainder;
+	  else
+	    {
+	      /* Use an 8-bit shifted/rotated immediate.  */
+	      end = i - 8;
+	      if (end < 0)
+		end += 32;
+	      result = remainder & ((0x0ff << end)
+				   | ((i < end) ? (0xff >> (32 - end))
+						: 0));
+	      i -= 8;
+	    }
+	}
+      else
+	{
+	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
+	     arbitrary shifts.  */
+	  i -= TARGET_ARM ? 2 : 1;
+	  continue;
+	}
+
+      /* Next, see if we can do a better job with a thumb2 replicated
+	 constant.
+
+         We do it this way around to catch the cases like 0x01F001E0 where
+	 two 8-bit immediates would work, but a replicated constant would
+	 make it worse.
+
+         TODO: 16-bit constants that don't clear all the bits, but still win.
+         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
+      if (TARGET_THUMB2)
+	{
+	  b1 = (remainder & 0xff000000) >> 24;
+	  b2 = (remainder & 0x00ff0000) >> 16;
+	  b3 = (remainder & 0x0000ff00) >> 8;
+	  b4 = remainder & 0xff;
+
+	  if (loc > 24)
+	    {
+	      /* The 8-bit immediate already found clears b1 (and maybe b2),
+		 but must leave b3 and b4 alone.  */
+
+	      /* First try to find a 32-bit replicated constant that clears
+		 almost everything.  We can assume that we can't do it in one,
+		 or else we wouldn't be here.  */
+	      unsigned int tmp = b1 & b2 & b3 & b4;
+	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
+				  + (tmp << 24);
+	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
+					    + (tmp == b3) + (tmp == b4);
+	      if (tmp
+		  && (matching_bytes >= 3
+		      || (matching_bytes == 2
+			  && const_ok_for_op (remainder & ~tmp2, code))))
+		{
+		  /* At least 3 of the bytes match, and the fourth has at
+		     least as many bits set, or two of the bytes match
+		     and it will only require one more insn to finish.  */
+		  result = tmp2;
+		  i = tmp != b1 ? 32
+		      : tmp != b2 ? 24
+		      : tmp != b3 ? 16
+		      : 8;
+		}
+
+	      /* Second, try to find a 16-bit replicated constant that can
+		 leave three of the bytes clear.  If b2 or b4 is already
+		 zero, then we can.  If the 8-bit from above would not
+		 clear b2 anyway, then we still win.  */
+	      else if (b1 == b3 && (!b2 || !b4
+			       || (remainder & 0x00ff0000 & ~result)))
+		{
+		  result = remainder & 0xff00ff00;
+		  i = 24;
+		}
+	    }
+	  else if (loc > 16)
+	    {
+	      /* The 8-bit immediate already found clears b2 (and maybe b3)
+		 and we don't get here unless b1 is alredy clear, but it will
+		 leave b4 unchanged.  */
+
+	      /* If we can clear b2 and b4 at once, then we win, since the
+		 8-bits couldn't possibly reach that far.  */
+	      if (b2 == b4)
+		{
+		  result = remainder & 0x00ff00ff;
+		  i = 16;
+		}
+	    }
+	}
+
+      return_sequence->i[insns++] = result;
+      remainder &= ~result;
+
+      if (code == SET || code == MINUS)
+	code = PLUS;
+    }
+  while (remainder);
+
+  return insns;
+}
+
+/* Emit an instruction with the indicated PATTERN.  If COND is
+   non-NULL, conditionalize the execution of the instruction on COND
+   being true.  */
+
+static void
+emit_constant_insn (rtx cond, rtx pattern)
+{
+  if (cond)
+    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
+  emit_insn (pattern);
+}
+
+/* As above, but extra parameter GENERATE which, if clear, suppresses
+   RTL generation.  */
+
+static int
+arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
+		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
+		  int generate)
+{
+  int can_invert = 0;
+  int can_negate = 0;
+  int final_invert = 0;
+  int i;
+  int set_sign_bit_copies = 0;
+  int clear_sign_bit_copies = 0;
+  int clear_zero_bit_copies = 0;
+  int set_zero_bit_copies = 0;
+  int insns = 0, neg_insns, inv_insns;
+  unsigned HOST_WIDE_INT temp1, temp2;
+  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+  struct four_ints *immediates;
+  struct four_ints pos_immediates, neg_immediates, inv_immediates;
+
+  /* Find out which operations are safe for a given CODE.  Also do a quick
+     check for degenerate cases; these can occur when DImode operations
+     are split.  */
+  switch (code)
+    {
+    case SET:
+      can_invert = 1;
+      break;
+
+    case PLUS:
+      can_negate = 1;
+      break;
+
+    case IOR:
+      if (remainder == 0xffffffff)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     GEN_INT (ARM_SIGN_EXTEND (val))));
+	  return 1;
+	}
+
+      if (remainder == 0)
+	{
+	  if (reload_completed && rtx_equal_p (target, source))
+	    return 0;
+
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, source));
+	  return 1;
+	}
+      break;
+
+    case AND:
+      if (remainder == 0)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, const0_rtx));
+	  return 1;
+	}
+      if (remainder == 0xffffffff)
+	{
+	  if (reload_completed && rtx_equal_p (target, source))
+	    return 0;
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, source));
+	  return 1;
+	}
+      can_invert = 1;
+      break;
+
+    case XOR:
+      if (remainder == 0)
+	{
+	  if (reload_completed && rtx_equal_p (target, source))
+	    return 0;
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, source));
+	  return 1;
+	}
+
+      if (remainder == 0xffffffff)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_NOT (mode, source)));
+	  return 1;
+	}
+      final_invert = 1;
+      break;
+
+    case MINUS:
+      /* We treat MINUS as (val - source), since (source - val) is always
+	 passed as (source + (-val)).  */
+      if (remainder == 0)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_NEG (mode, source)));
+	  return 1;
+	}
+      if (const_ok_for_arm (val))
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_MINUS (mode, GEN_INT (val),
+							    source)));
+	  return 1;
+	}
+
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* If we can do it in one insn get out quickly.  */
+  if (const_ok_for_op (val, code))
+    {
+      if (generate)
+	emit_constant_insn (cond,
+			    gen_rtx_SET (VOIDmode, target,
+					 (source
+					  ? gen_rtx_fmt_ee (code, mode, source,
+							    GEN_INT (val))
+					  : GEN_INT (val))));
+      return 1;
+    }
+
+  /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
+     insn.  */
+  if (code == AND && (i = exact_log2 (remainder + 1)) > 0
+      && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
+    {
+      if (generate)
+	{
+	  if (mode == SImode && i == 16)
+	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
+	       smaller insn.  */
+	    emit_constant_insn (cond,
+				gen_zero_extendhisi2
+				(target, gen_lowpart (HImode, source)));
+	  else
+	    /* Extz only supports SImode, but we can coerce the operands
+	       into that mode.  */
+	    emit_constant_insn (cond,
+				gen_extzv_t2 (gen_lowpart (SImode, target),
+					      gen_lowpart (SImode, source),
+					      GEN_INT (i), const0_rtx));
+	}
+
+      return 1;
+    }
+
+  /* Calculate a few attributes that may be useful for specific
+     optimizations.  */
+  /* Count number of leading zeros.  */
+  for (i = 31; i >= 0; i--)
+    {
+      if ((remainder & (1 << i)) == 0)
+	clear_sign_bit_copies++;
+      else
+	break;
+    }
+
+  /* Count number of leading 1's.  */
+  for (i = 31; i >= 0; i--)
+    {
+      if ((remainder & (1 << i)) != 0)
+	set_sign_bit_copies++;
+      else
+	break;
+    }
+
+  /* Count number of trailing zero's.  */
+  for (i = 0; i <= 31; i++)
+    {
+      if ((remainder & (1 << i)) == 0)
+	clear_zero_bit_copies++;
+      else
+	break;
+    }
+
+  /* Count number of trailing 1's.  */
+  for (i = 0; i <= 31; i++)
+    {
+      if ((remainder & (1 << i)) != 0)
+	set_zero_bit_copies++;
+      else
+	break;
+    }
+
+  switch (code)
+    {
+    case SET:
+      /* See if we can do this by sign_extending a constant that is known
+	 to be negative.  This is a good, way of doing it, since the shift
+	 may well merge into a subsequent insn.  */
+      if (set_sign_bit_copies > 1)
+	{
+	  if (const_ok_for_arm
+	      (temp1 = ARM_SIGN_EXTEND (remainder
+					<< (set_sign_bit_copies - 1))))
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, new_src,
+						   GEN_INT (temp1)));
+		  emit_constant_insn (cond,
+				      gen_ashrsi3 (target, new_src,
+						   GEN_INT (set_sign_bit_copies - 1)));
+		}
+	      return 2;
+	    }
+	  /* For an inverted constant, we will need to set the low bits,
+	     these will be shifted out of harm's way.  */
+	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
+	  if (const_ok_for_arm (~temp1))
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, new_src,
+						   GEN_INT (temp1)));
+		  emit_constant_insn (cond,
+				      gen_ashrsi3 (target, new_src,
+						   GEN_INT (set_sign_bit_copies - 1)));
+		}
+	      return 2;
+	    }
+	}
+
+      /* See if we can calculate the value as the difference between two
+	 valid immediates.  */
+      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
+	{
+	  int topshift = clear_sign_bit_copies & ~1;
+
+	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
+				   & (0xff000000 >> topshift));
+
+	  /* If temp1 is zero, then that means the 9 most significant
+	     bits of remainder were 1 and we've caused it to overflow.
+	     When topshift is 0 we don't need to do anything since we
+	     can borrow from 'bit 32'.  */
+	  if (temp1 == 0 && topshift != 0)
+	    temp1 = 0x80000000 >> (topshift - 1);
+
+	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
+
+	  if (const_ok_for_arm (temp2))
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, new_src,
+						   GEN_INT (temp1)));
+		  emit_constant_insn (cond,
+				      gen_addsi3 (target, new_src,
+						  GEN_INT (-temp2)));
+		}
+
+	      return 2;
+	    }
+	}
+
+      /* See if we can generate this by setting the bottom (or the top)
+	 16 bits, and then shifting these into the other half of the
+	 word.  We only look for the simplest cases, to do more would cost
+	 too much.  Be careful, however, not to generate this when the
+	 alternative would take fewer insns.  */
+      if (val & 0xffff0000)
+	{
+	  temp1 = remainder & 0xffff0000;
+	  temp2 = remainder & 0x0000ffff;
+
+	  /* Overlaps outside this range are best done using other methods.  */
+	  for (i = 9; i < 24; i++)
+	    {
+	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
+		  && !const_ok_for_arm (temp2))
+		{
+		  rtx new_src = (subtargets
+				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
+				 : target);
+		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
+					    source, subtargets, generate);
+		  source = new_src;
+		  if (generate)
+		    emit_constant_insn
+		      (cond,
+		       gen_rtx_SET
+		       (VOIDmode, target,
+			gen_rtx_IOR (mode,
+				     gen_rtx_ASHIFT (mode, source,
+						     GEN_INT (i)),
+				     source)));
+		  return insns + 1;
+		}
+	    }
+
+	  /* Don't duplicate cases already considered.  */
+	  for (i = 17; i < 24; i++)
+	    {
+	      if (((temp1 | (temp1 >> i)) == remainder)
+		  && !const_ok_for_arm (temp1))
+		{
+		  rtx new_src = (subtargets
+				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
+				 : target);
+		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
+					    source, subtargets, generate);
+		  source = new_src;
+		  if (generate)
+		    emit_constant_insn
+		      (cond,
+		       gen_rtx_SET (VOIDmode, target,
+				    gen_rtx_IOR
+				    (mode,
+				     gen_rtx_LSHIFTRT (mode, source,
+						       GEN_INT (i)),
+				     source)));
+		  return insns + 1;
+		}
+	    }
+	}
+      break;
+
+    case IOR:
+    case XOR:
+      /* If we have IOR or XOR, and the constant can be loaded in a
+	 single instruction, and we can find a temporary to put it in,
+	 then this can be done in two instructions instead of 3-4.  */
+      if (subtargets
+	  /* TARGET can't be NULL if SUBTARGETS is 0 */
+	  || (reload_completed && !reg_mentioned_p (target, source)))
+	{
+	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
+	    {
+	      if (generate)
+		{
+		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, sub,
+						   GEN_INT (val)));
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, target,
+						   gen_rtx_fmt_ee (code, mode,
+								   source, sub)));
+		}
+	      return 2;
+	    }
+	}
+
+      if (code == XOR)
+	break;
+
+      /*  Convert.
+	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
+	                     and the remainder 0s for e.g. 0xfff00000)
+	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
+
+	  This can be done in 2 instructions by using shifts with mov or mvn.
+	  e.g. for
+	  x = x | 0xfff00000;
+	  we generate.
+	  mvn	r0, r0, asl #12
+	  mvn	r0, r0, lsr #12  */
+      if (set_sign_bit_copies > 8
+	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
+	{
+	  if (generate)
+	    {
+	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (set_sign_bit_copies);
+
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, sub,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_ASHIFT (mode,
+							   source,
+							   shift))));
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_LSHIFTRT (mode, sub,
+							     shift))));
+	    }
+	  return 2;
+	}
+
+      /* Convert
+	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
+	   to
+	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
+
+	  For eg. r0 = r0 | 0xfff
+	       mvn	r0, r0, lsr #12
+	       mvn	r0, r0, asl #12
+
+      */
+      if (set_zero_bit_copies > 8
+	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
+	{
+	  if (generate)
+	    {
+	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (set_zero_bit_copies);
+
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, sub,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_LSHIFTRT (mode,
+							     source,
+							     shift))));
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_ASHIFT (mode, sub,
+							   shift))));
+	    }
+	  return 2;
+	}
+
+      /* This will never be reached for Thumb2 because orn is a valid
+	 instruction. This is for Thumb1 and the ARM 32 bit cases.
+
+	 x = y | constant (such that ~constant is a valid constant)
+	 Transform this to
+	 x = ~(~y & ~constant).
+      */
+      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
+	{
+	  if (generate)
+	    {
+	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+	      emit_constant_insn (cond,
+				  gen_rtx_SET (VOIDmode, sub,
+					       gen_rtx_NOT (mode, source)));
+	      source = sub;
+	      if (subtargets)
+		sub = gen_reg_rtx (mode);
+	      emit_constant_insn (cond,
+				  gen_rtx_SET (VOIDmode, sub,
+					       gen_rtx_AND (mode, source,
+							    GEN_INT (temp1))));
+	      emit_constant_insn (cond,
+				  gen_rtx_SET (VOIDmode, target,
+					       gen_rtx_NOT (mode, sub)));
+	    }
+	  return 3;
+	}
+      break;
+
+    case AND:
+      /* See if two shifts will do 2 or more insn's worth of work.  */
+      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
+	{
+	  HOST_WIDE_INT shift_mask = ((0xffffffff
+				       << (32 - clear_sign_bit_copies))
+				      & 0xffffffff);
+
+	  if ((remainder | shift_mask) != 0xffffffff)
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    new_src, source, subtargets, 1);
+		  source = new_src;
+		}
+	      else
+		{
+		  rtx targ = subtargets ? NULL_RTX : target;
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    targ, source, subtargets, 0);
+		}
+	    }
+
+	  if (generate)
+	    {
+	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (clear_sign_bit_copies);
+
+	      emit_insn (gen_ashlsi3 (new_src, source, shift));
+	      emit_insn (gen_lshrsi3 (target, new_src, shift));
+	    }
+
+	  return insns + 2;
+	}
+
+      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
+	{
+	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
+
+	  if ((remainder | shift_mask) != 0xffffffff)
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    new_src, source, subtargets, 1);
+		  source = new_src;
+		}
+	      else
+		{
+		  rtx targ = subtargets ? NULL_RTX : target;
+
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    targ, source, subtargets, 0);
+		}
+	    }
+
+	  if (generate)
+	    {
+	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (clear_zero_bit_copies);
+
+	      emit_insn (gen_lshrsi3 (new_src, source, shift));
+	      emit_insn (gen_ashlsi3 (target, new_src, shift));
+	    }
+
+	  return insns + 2;
+	}
+
+      break;
+
+    default:
+      break;
+    }
+
+  /* Calculate what the instruction sequences would be if we generated it
+     normally, negated, or inverted.  */
+  if (code == AND)
+    /* AND cannot be split into multiple insns, so invert and use BIC.  */
+    insns = 99;
+  else
+    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
+
+  if (can_negate)
+    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
+					    &neg_immediates);
+  else
+    neg_insns = 99;
+
+  if (can_invert || final_invert)
+    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
+					    &inv_immediates);
+  else
+    inv_insns = 99;
+
+  immediates = &pos_immediates;
+
+  /* Is the negated immediate sequence more efficient?  */
+  if (neg_insns < insns && neg_insns <= inv_insns)
+    {
+      insns = neg_insns;
+      immediates = &neg_immediates;
+    }
+  else
+    can_negate = 0;
+
+  /* Is the inverted immediate sequence more efficient?
+     We must allow for an extra NOT instruction for XOR operations, although
+     there is some chance that the final 'mvn' will get optimized later.  */
+  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
+    {
+      insns = inv_insns;
+      immediates = &inv_immediates;
+    }
+  else
+    {
+      can_invert = 0;
+      final_invert = 0;
+    }
+
+  /* Now output the chosen sequence as instructions.  */
+  if (generate)
+    {
+      for (i = 0; i < insns; i++)
+	{
+	  rtx new_src, temp1_rtx;
+
+	  temp1 = immediates->i[i];
+
+	  if (code == SET || code == MINUS)
+	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
+	  else if ((final_invert || i < (insns - 1)) && subtargets)
+	    new_src = gen_reg_rtx (mode);
+	  else
+	    new_src = target;
+
+	  if (can_invert)
+	    temp1 = ~temp1;
+	  else if (can_negate)
+	    temp1 = -temp1;
+
+	  temp1 = trunc_int_for_mode (temp1, mode);
+	  temp1_rtx = GEN_INT (temp1);
+
+	  if (code == SET)
+	    ;
+	  else if (code == MINUS)
+	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+	  else
+	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+
+	  emit_constant_insn (cond,
+			      gen_rtx_SET (VOIDmode, new_src,
+					   temp1_rtx));
+	  source = new_src;
+
+	  if (code == SET)
+	    {
+	      can_negate = can_invert;
+	      can_invert = 0;
+	      code = PLUS;
+	    }
+	  else if (code == MINUS)
+	    code = PLUS;
+	}
+    }
+
+  if (final_invert)
+    {
+      if (generate)
+	emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+					       gen_rtx_NOT (mode, source)));
+      insns++;
+    }
+
+  return insns;
+}
+
+/* Canonicalize a comparison so that we are more likely to recognize it.
+   This can be done for a few constant compares, where we can make the
+   immediate value easier to load.  */
+
+static void
+arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			     bool op0_preserve_value)
+{
+  enum machine_mode mode;
+  unsigned HOST_WIDE_INT i, maxval;
+
+  mode = GET_MODE (*op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (*op1);
+
+  maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
+
+  /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
+     we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
+     reversed or (for constant OP1) adjusted to GE/LT.  Similarly
+     for GTU/LEU in Thumb mode.  */
+  if (mode == DImode)
+    {
+      rtx tem;
+
+      if (*code == GT || *code == LE
+	  || (!TARGET_ARM && (*code == GTU || *code == LEU)))
+	{
+	  /* Missing comparison.  First try to use an available
+	     comparison.  */
+	  if (CONST_INT_P (*op1))
+	    {
+	      i = INTVAL (*op1);
+	      switch (*code)
+		{
+		case GT:
+		case LE:
+		  if (i != maxval
+		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
+		    {
+		      *op1 = GEN_INT (i + 1);
+		      *code = *code == GT ? GE : LT;
+		      return;
+		    }
+		  break;
+		case GTU:
+		case LEU:
+		  if (i != ~((unsigned HOST_WIDE_INT) 0)
+		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
+		    {
+		      *op1 = GEN_INT (i + 1);
+		      *code = *code == GTU ? GEU : LTU;
+		      return;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+
+	  /* If that did not work, reverse the condition.  */
+	  if (!op0_preserve_value)
+	    {
+	      tem = *op0;
+	      *op0 = *op1;
+	      *op1 = tem;
+	      *code = (int)swap_condition ((enum rtx_code)*code);
+	    }
+	}
+      return;
+    }
+
+  /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
+     with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
+     to facilitate possible combining with a cmp into 'ands'.  */
+  if (mode == SImode
+      && GET_CODE (*op0) == ZERO_EXTEND
+      && GET_CODE (XEXP (*op0, 0)) == SUBREG
+      && GET_MODE (XEXP (*op0, 0)) == QImode
+      && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
+      && subreg_lowpart_p (XEXP (*op0, 0))
+      && *op1 == const0_rtx)
+    *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
+			GEN_INT (255));
+
+  /* Comparisons smaller than DImode.  Only adjust comparisons against
+     an out-of-range constant.  */
+  if (!CONST_INT_P (*op1)
+      || const_ok_for_arm (INTVAL (*op1))
+      || const_ok_for_arm (- INTVAL (*op1)))
+    return;
+
+  i = INTVAL (*op1);
+
+  switch (*code)
+    {
+    case EQ:
+    case NE:
+      return;
+
+    case GT:
+    case LE:
+      if (i != maxval
+	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
+	{
+	  *op1 = GEN_INT (i + 1);
+	  *code = *code == GT ? GE : LT;
+	  return;
+	}
+      break;
+
+    case GE:
+    case LT:
+      if (i != ~maxval
+	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
+	{
+	  *op1 = GEN_INT (i - 1);
+	  *code = *code == GE ? GT : LE;
+	  return;
+	}
+      break;
+
+    case GTU:
+    case LEU:
+      if (i != ~((unsigned HOST_WIDE_INT) 0)
+	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
+	{
+	  *op1 = GEN_INT (i + 1);
+	  *code = *code == GTU ? GEU : LTU;
+	  return;
+	}
+      break;
+
+    case GEU:
+    case LTU:
+      if (i != 0
+	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
+	{
+	  *op1 = GEN_INT (i - 1);
+	  *code = *code == GEU ? GTU : LEU;
+	  return;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Define how to find the value returned by a function.  */
+
+static rtx
+arm_function_value(const_tree type, const_tree func,
+		   bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  int unsignedp ATTRIBUTE_UNUSED;
+  rtx r ATTRIBUTE_UNUSED;
+
+  mode = TYPE_MODE (type);
+
+  if (TARGET_AAPCS_BASED)
+    return aapcs_allocate_return_reg (mode, type, func);
+
+  /* Promote integer types.  */
+  if (INTEGRAL_TYPE_P (type))
+    mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
+
+  /* Promotes small structs returned in a register to full-word size
+     for big-endian AAPCS.  */
+  if (arm_return_in_msb (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size % UNITS_PER_WORD != 0)
+	{
+	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  return arm_libcall_value_1 (mode);
+}
+
+/* libcall hashtable helpers.  */
+
+struct libcall_hasher : typed_noop_remove <rtx_def>
+{
+  typedef rtx_def value_type;
+  typedef rtx_def compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+inline bool
+libcall_hasher::equal (const value_type *p1, const compare_type *p2)
+{
+  return rtx_equal_p (p1, p2);
+}
+
+inline hashval_t
+libcall_hasher::hash (const value_type *p1)
+{
+  return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
+}
+
+typedef hash_table <libcall_hasher> libcall_table_type;
+
+static void
+add_libcall (libcall_table_type htab, rtx libcall)
+{
+  *htab.find_slot (libcall, INSERT) = libcall;
+}
+
+static bool
+arm_libcall_uses_aapcs_base (const_rtx libcall)
+{
+  static bool init_done = false;
+  static libcall_table_type libcall_htab;
+
+  if (!init_done)
+    {
+      init_done = true;
+
+      libcall_htab.create (31);
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
+
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
+
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
+
+      /* Values from double-precision helper functions are returned in core
+	 registers if the selected core only supports single-precision
+	 arithmetic, even if we are using the hard-float ABI.  The same is
+	 true for single-precision helpers, but we will never be using the
+	 hard-float ABI on a CPU which doesn't support single-precision
+	 operations in hardware.  */
+      add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
+      add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
+							SFmode));
+      add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
+							DFmode));
+    }
+
+  return libcall && libcall_htab.find (libcall) != NULL;
+}
+
+static rtx
+arm_libcall_value_1 (enum machine_mode mode)
+{
+  if (TARGET_AAPCS_BASED)
+    return aapcs_libcall_value (mode);
+  else if (TARGET_IWMMXT_ABI
+	   && arm_vector_mode_supported_p (mode))
+    return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
+  else
+    return gen_rtx_REG (mode, ARG_REGISTER (1));
+}
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+static rtx
+arm_libcall_value (enum machine_mode mode, const_rtx libcall)
+{
+  if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
+      && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      /* The following libcalls return their result in integer registers,
+	 even though they return a floating point value.  */
+      if (arm_libcall_uses_aapcs_base (libcall))
+	return gen_rtx_REG (mode, ARG_REGISTER(1));
+
+    }
+
+  return arm_libcall_value_1 (mode);
+}
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+arm_function_value_regno_p (const unsigned int regno)
+{
+  if (regno == ARG_REGISTER (1)
+      || (TARGET_32BIT
+	  && TARGET_AAPCS_BASED
+	  && TARGET_VFP
+	  && TARGET_HARD_FLOAT
+	  && regno == FIRST_VFP_REGNUM)
+      || (TARGET_IWMMXT_ABI
+	  && regno == FIRST_IWMMXT_REGNUM))
+    return true;
+
+  return false;
+}
+
+/* Determine the amount of memory needed to store the possible return
+   registers of an untyped call.  */
+int
+arm_apply_result_size (void)
+{
+  int size = 16;
+
+  if (TARGET_32BIT)
+    {
+      if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
+	size += 32;
+      if (TARGET_IWMMXT_ABI)
+	size += 8;
+    }
+
+  return size;
+}
+
+/* Decide whether TYPE should be returned in memory (true)
+   or in a register (false).  FNTYPE is the type of the function making
+   the call.  */
+static bool
+arm_return_in_memory (const_tree type, const_tree fntype)
+{
+  HOST_WIDE_INT size;
+
+  size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* Simple, non-aggregate types (ie not including vectors and
+	 complex) are always returned in a register (or registers).
+	 We don't care about which register here, so we can short-cut
+	 some of the detail.  */
+      if (!AGGREGATE_TYPE_P (type)
+	  && TREE_CODE (type) != VECTOR_TYPE
+	  && TREE_CODE (type) != COMPLEX_TYPE)
+	return false;
+
+      /* Any return value that is no larger than one word can be
+	 returned in r0.  */
+      if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
+	return false;
+
+      /* Check any available co-processors to see if they accept the
+	 type as a register candidate (VFP, for example, can return
+	 some aggregates in consecutive registers).  These aren't
+	 available if the call is variadic.  */
+      if (aapcs_select_return_coproc (type, fntype) >= 0)
+	return false;
+
+      /* Vector values should be returned using ARM registers, not
+	 memory (unless they're over 16 bytes, which will break since
+	 we only have four call-clobbered registers to play with).  */
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+      /* The rest go in memory.  */
+      return true;
+    }
+
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+  if (!AGGREGATE_TYPE_P (type) &&
+      (TREE_CODE (type) != VECTOR_TYPE))
+    /* All simple types are returned in registers.  */
+    return false;
+
+  if (arm_abi != ARM_ABI_APCS)
+    {
+      /* ATPCS and later return aggregate types in memory only if they are
+	 larger than a word (or are variable size).  */
+      return (size < 0 || size > UNITS_PER_WORD);
+    }
+
+  /* For the arm-wince targets we choose to be compatible with Microsoft's
+     ARM and Thumb compilers, which always return aggregates in memory.  */
+#ifndef ARM_WINCE
+  /* All structures/unions bigger than one word are returned in memory.
+     Also catch the case where int_size_in_bytes returns -1.  In this case
+     the aggregate is either huge or of variable size, and in either case
+     we will want to return it via memory and not in a register.  */
+  if (size < 0 || size > UNITS_PER_WORD)
+    return true;
+
+  if (TREE_CODE (type) == RECORD_TYPE)
+    {
+      tree field;
+
+      /* For a struct the APCS says that we only return in a register
+	 if the type is 'integer like' and every addressable element
+	 has an offset of zero.  For practical purposes this means
+	 that the structure can have at most one non bit-field element
+	 and that this element must be the first one in the structure.  */
+
+      /* Find the first field, ignoring non FIELD_DECL things which will
+	 have been created by C++.  */
+      for (field = TYPE_FIELDS (type);
+	   field && TREE_CODE (field) != FIELD_DECL;
+	   field = DECL_CHAIN (field))
+	continue;
+
+      if (field == NULL)
+	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
+
+      /* Check that the first field is valid for returning in a register.  */
+
+      /* ... Floats are not allowed */
+      if (FLOAT_TYPE_P (TREE_TYPE (field)))
+	return true;
+
+      /* ... Aggregates that are not themselves valid for returning in
+	 a register are not allowed.  */
+      if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
+	return true;
+
+      /* Now check the remaining fields, if any.  Only bitfields are allowed,
+	 since they are not addressable.  */
+      for (field = DECL_CHAIN (field);
+	   field;
+	   field = DECL_CHAIN (field))
+	{
+	  if (TREE_CODE (field) != FIELD_DECL)
+	    continue;
+
+	  if (!DECL_BIT_FIELD_TYPE (field))
+	    return true;
+	}
+
+      return false;
+    }
+
+  if (TREE_CODE (type) == UNION_TYPE)
+    {
+      tree field;
+
+      /* Unions can be returned in registers if every element is
+	 integral, or can be returned in an integer register.  */
+      for (field = TYPE_FIELDS (type);
+	   field;
+	   field = DECL_CHAIN (field))
+	{
+	  if (TREE_CODE (field) != FIELD_DECL)
+	    continue;
+
+	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
+	    return true;
+
+	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
+	    return true;
+	}
+
+      return false;
+    }
+#endif /* not ARM_WINCE */
+
+  /* Return all other types in memory.  */
+  return true;
+}
+
+const struct pcs_attribute_arg
+{
+  const char *arg;
+  enum arm_pcs value;
+} pcs_attribute_args[] =
+  {
+    {"aapcs", ARM_PCS_AAPCS},
+    {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+#if 0
+    /* We could recognize these, but changes would be needed elsewhere
+     * to implement them.  */
+    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
+    {"atpcs", ARM_PCS_ATPCS},
+    {"apcs", ARM_PCS_APCS},
+#endif
+    {NULL, ARM_PCS_UNKNOWN}
+  };
+
+static enum arm_pcs
+arm_pcs_from_attribute (tree attr)
+{
+  const struct pcs_attribute_arg *ptr;
+  const char *arg;
+
+  /* Get the value of the argument.  */
+  if (TREE_VALUE (attr) == NULL_TREE
+      || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
+    return ARM_PCS_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (attr));
+
+  /* Check it against the list of known arguments.  */
+  for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
+    if (streq (arg, ptr->arg))
+      return ptr->value;
+
+  /* An unrecognized interrupt type.  */
+  return ARM_PCS_UNKNOWN;
+}
+
+/* Get the PCS variant to use for this call.  TYPE is the function's type
+   specification, DECL is the specific declartion.  DECL may be null if
+   the call could be indirect or if this is a library call.  */
+static enum arm_pcs
+arm_get_pcs_model (const_tree type, const_tree decl)
+{
+  bool user_convention = false;
+  enum arm_pcs user_pcs = arm_pcs_default;
+  tree attr;
+
+  gcc_assert (type);
+
+  attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
+  if (attr)
+    {
+      user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
+      user_convention = true;
+    }
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* Detect varargs functions.  These always use the base rules
+	 (no argument is ever a candidate for a co-processor
+	 register).  */
+      bool base_rules = stdarg_p (type);
+
+      if (user_convention)
+	{
+	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
+	    sorry ("non-AAPCS derived PCS variant");
+	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
+	    error ("variadic functions must use the base AAPCS variant");
+	}
+
+      if (base_rules)
+	return ARM_PCS_AAPCS;
+      else if (user_convention)
+	return user_pcs;
+      else if (decl && flag_unit_at_a_time)
+	{
+	  /* Local functions never leak outside this compilation unit,
+	     so we are free to use whatever conventions are
+	     appropriate.  */
+	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
+	  struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+	  if (i && i->local)
+	    return ARM_PCS_AAPCS_LOCAL;
+	}
+    }
+  else if (user_convention && user_pcs != arm_pcs_default)
+    sorry ("PCS variant");
+
+  /* For everything else we use the target's default.  */
+  return arm_pcs_default;
+}
+
+
+static void
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
+		    const_tree fntype ATTRIBUTE_UNUSED,
+		    rtx libcall ATTRIBUTE_UNUSED,
+		    const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  /* Record the unallocated VFP registers.  */
+  pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
+  pcum->aapcs_vfp_reg_alloc = 0;
+}
+
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   type.  If a non-floating point type is found, or if a floating point
+   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+   otherwise return the count in the sub-tree.  */
+static int
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (mode != DFmode && mode != SFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (mode != DFmode && mode != SFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      /* Use V2SImode and V4SImode as representatives of all 64-bit
+	 and 128-bit vector types, whether or not those modes are
+	 supported with the present options.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 8:
+	  mode = V2SImode;
+	  break;
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
+	    || !TYPE_MIN_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* Return true if PCS_VARIANT should use VFP registers.  */
+static bool
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
+{
+  if (pcs_variant == ARM_PCS_AAPCS_VFP)
+    {
+      static bool seen_thumb1_vfp = false;
+
+      if (TARGET_THUMB1 && !seen_thumb1_vfp)
+	{
+	  sorry ("Thumb-1 hard-float VFP ABI");
+	  /* sorry() is not immediately fatal, so only display this once.  */
+	  seen_thumb1_vfp = true;
+	}
+
+      return true;
+    }
+
+  if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
+    return false;
+
+  return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
+	  (TARGET_VFP_DOUBLE || !is_double));
+}
+
+/* Return true if an argument whose type is TYPE, or mode is MODE, is
+   suitable for passing or returning in VFP registers for the PCS
+   variant selected.  If it is, then *BASE_MODE is updated to contain
+   a machine mode describing each element of the argument's type and
+   *COUNT to hold the number of such elements.  */
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
+				       enum machine_mode mode, const_tree type,
+				       enum machine_mode *base_mode, int *count)
+{
+  enum machine_mode new_mode = VOIDmode;
+
+  /* If we have the type information, prefer that to working things
+     out from the mode.  */
+  if (type)
+    {
+      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+      if (ag_count > 0 && ag_count <= 4)
+	*count = ag_count;
+      else
+	return false;
+    }
+  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      *count = 1;
+      new_mode = mode;
+    }
+  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+    {
+      *count = 2;
+      new_mode = (mode == DCmode ? DFmode : SFmode);
+    }
+  else
+    return false;
+
+
+  if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
+    return false;
+
+  *base_mode = new_mode;
+  return true;
+}
+
+static bool
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
+			       enum machine_mode mode, const_tree type)
+{
+  int count ATTRIBUTE_UNUSED;
+  enum machine_mode ag_mode ATTRIBUTE_UNUSED;
+
+  if (!use_vfp_abi (pcs_variant, false))
+    return false;
+  return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+						&ag_mode, &count);
+}
+
+static bool
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			     const_tree type)
+{
+  if (!use_vfp_abi (pcum->pcs_variant, false))
+    return false;
+
+  return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
+						&pcum->aapcs_vfp_rmode,
+						&pcum->aapcs_vfp_rcount);
+}
+
+static bool
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		    const_tree type  ATTRIBUTE_UNUSED)
+{
+  int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
+  unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
+  int regno;
+
+  for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
+    if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
+      {
+	pcum->aapcs_vfp_reg_alloc = mask << regno;
+	if (mode == BLKmode
+	    || (mode == TImode && ! TARGET_NEON)
+	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
+	  {
+	    int i;
+	    int rcount = pcum->aapcs_vfp_rcount;
+	    int rshift = shift;
+	    enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+	    rtx par;
+	    if (!TARGET_NEON)
+	      {
+		/* Avoid using unsupported vector modes.  */
+		if (rmode == V2SImode)
+		  rmode = DImode;
+		else if (rmode == V4SImode)
+		  {
+		    rmode = DImode;
+		    rcount *= 2;
+		    rshift /= 2;
+		  }
+	      }
+	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
+	    for (i = 0; i < rcount; i++)
+	      {
+		rtx tmp = gen_rtx_REG (rmode,
+				       FIRST_VFP_REGNUM + regno + i * rshift);
+		tmp = gen_rtx_EXPR_LIST
+		  (VOIDmode, tmp,
+		   GEN_INT (i * GET_MODE_SIZE (rmode)));
+		XVECEXP (par, 0, i) = tmp;
+	      }
+
+	    pcum->aapcs_reg = par;
+	  }
+	else
+	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
+	return true;
+      }
+  return false;
+}
+
+static rtx
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
+			       enum machine_mode mode,
+			       const_tree type ATTRIBUTE_UNUSED)
+{
+  if (!use_vfp_abi (pcs_variant, false))
+    return NULL;
+
+  if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+    {
+      int count;
+      enum machine_mode ag_mode;
+      int i;
+      rtx par;
+      int shift;
+
+      aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+					     &ag_mode, &count);
+
+      if (!TARGET_NEON)
+	{
+	  if (ag_mode == V2SImode)
+	    ag_mode = DImode;
+	  else if (ag_mode == V4SImode)
+	    {
+	      ag_mode = DImode;
+	      count *= 2;
+	    }
+	}
+      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+      par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+      for (i = 0; i < count; i++)
+	{
+	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
+	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+	  XVECEXP (par, 0, i) = tmp;
+	}
+
+      return par;
+    }
+
+  return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
+}
+
+static void
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
+		   enum machine_mode mode  ATTRIBUTE_UNUSED,
+		   const_tree type  ATTRIBUTE_UNUSED)
+{
+  pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
+  pcum->aapcs_vfp_reg_alloc = 0;
+  return;
+}
+
+#define AAPCS_CP(X)				\
+  {						\
+    aapcs_ ## X ## _cum_init,			\
+    aapcs_ ## X ## _is_call_candidate,		\
+    aapcs_ ## X ## _allocate,			\
+    aapcs_ ## X ## _is_return_candidate,	\
+    aapcs_ ## X ## _allocate_return_reg,	\
+    aapcs_ ## X ## _advance			\
+  }
+
+/* Table of co-processors that can be used to pass arguments in
+   registers.  Idealy no arugment should be a candidate for more than
+   one co-processor table entry, but the table is processed in order
+   and stops after the first match.  If that entry then fails to put
+   the argument into a co-processor register, the argument will go on
+   the stack.  */
+static struct
+{
+  /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
+  void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
+
+  /* Return true if an argument of mode MODE (or type TYPE if MODE is
+     BLKmode) is a candidate for this co-processor's registers; this
+     function should ignore any position-dependent state in
+     CUMULATIVE_ARGS and only use call-type dependent information.  */
+  bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+  /* Return true if the argument does get a co-processor register; it
+     should set aapcs_reg to an RTX of the register allocated as is
+     required for a return from FUNCTION_ARG.  */
+  bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+  /* Return true if a result of mode MODE (or type TYPE if MODE is
+     BLKmode) is can be returned in this co-processor's registers.  */
+  bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+
+  /* Allocate and return an RTX element to hold the return type of a
+     call, this routine must not fail and will only be called if
+     is_return_candidate returned true with the same parameters.  */
+  rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+
+  /* Finish processing this argument and prepare to start processing
+     the next one.  */
+  void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
+  {
+    AAPCS_CP(vfp)
+  };
+
+#undef AAPCS_CP
+
+static int
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			  const_tree type)
+{
+  int i;
+
+  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+    if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
+      return i;
+
+  return -1;
+}
+
+static int
+aapcs_select_return_coproc (const_tree type, const_tree fntype)
+{
+  /* We aren't passed a decl, so we can't check that a call is local.
+     However, it isn't clear that that would be a win anyway, since it
+     might limit some tail-calling opportunities.  */
+  enum arm_pcs pcs_variant;
+
+  if (fntype)
+    {
+      const_tree fndecl = NULL_TREE;
+
+      if (TREE_CODE (fntype) == FUNCTION_DECL)
+	{
+	  fndecl = fntype;
+	  fntype = TREE_TYPE (fntype);
+	}
+
+      pcs_variant = arm_get_pcs_model (fntype, fndecl);
+    }
+  else
+    pcs_variant = arm_pcs_default;
+
+  if (pcs_variant != ARM_PCS_AAPCS)
+    {
+      int i;
+
+      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
+							TYPE_MODE (type),
+							type))
+	  return i;
+    }
+  return -1;
+}
+
+static rtx
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+			   const_tree fntype)
+{
+  /* We aren't passed a decl, so we can't check that a call is local.
+     However, it isn't clear that that would be a win anyway, since it
+     might limit some tail-calling opportunities.  */
+  enum arm_pcs pcs_variant;
+  int unsignedp ATTRIBUTE_UNUSED;
+
+  if (fntype)
+    {
+      const_tree fndecl = NULL_TREE;
+
+      if (TREE_CODE (fntype) == FUNCTION_DECL)
+	{
+	  fndecl = fntype;
+	  fntype = TREE_TYPE (fntype);
+	}
+
+      pcs_variant = arm_get_pcs_model (fntype, fndecl);
+    }
+  else
+    pcs_variant = arm_pcs_default;
+
+  /* Promote integer types.  */
+  if (type && INTEGRAL_TYPE_P (type))
+    mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
+
+  if (pcs_variant != ARM_PCS_AAPCS)
+    {
+      int i;
+
+      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
+							type))
+	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
+							     mode, type);
+    }
+
+  /* Promotes small structs returned in a register to full-word size
+     for big-endian AAPCS.  */
+  if (type && arm_return_in_msb (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size % UNITS_PER_WORD != 0)
+	{
+	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+static rtx
+aapcs_libcall_value (enum machine_mode mode)
+{
+  if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
+      && GET_MODE_SIZE (mode) <= 4)
+    mode = SImode;
+
+  return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
+}
+
+/* Lay out a function argument using the AAPCS rules.  The rule
+   numbers referred to here are those in the AAPCS.  */
+static void
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		  const_tree type, bool named)
+{
+  int nregs, nregs2;
+  int ncrn;
+
+  /* We only need to do this once per argument.  */
+  if (pcum->aapcs_arg_processed)
+    return;
+
+  pcum->aapcs_arg_processed = true;
+
+  /* Special case: if named is false then we are handling an incoming
+     anonymous argument which is on the stack.  */
+  if (!named)
+    return;
+
+  /* Is this a potential co-processor register candidate?  */
+  if (pcum->pcs_variant != ARM_PCS_AAPCS)
+    {
+      int slot = aapcs_select_call_coproc (pcum, mode, type);
+      pcum->aapcs_cprc_slot = slot;
+
+      /* We don't have to apply any of the rules from part B of the
+	 preparation phase, these are handled elsewhere in the
+	 compiler.  */
+
+      if (slot >= 0)
+	{
+	  /* A Co-processor register candidate goes either in its own
+	     class of registers or on the stack.  */
+	  if (!pcum->aapcs_cprc_failed[slot])
+	    {
+	      /* C1.cp - Try to allocate the argument to co-processor
+		 registers.  */
+	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
+		return;
+
+	      /* C2.cp - Put the argument on the stack and note that we
+		 can't assign any more candidates in this slot.  We also
+		 need to note that we have allocated stack space, so that
+		 we won't later try to split a non-cprc candidate between
+		 core registers and the stack.  */
+	      pcum->aapcs_cprc_failed[slot] = true;
+	      pcum->can_split = false;
+	    }
+
+	  /* We didn't get a register, so this argument goes on the
+	     stack.  */
+	  gcc_assert (pcum->can_split == false);
+	  return;
+	}
+    }
+
+  /* C3 - For double-word aligned arguments, round the NCRN up to the
+     next even number.  */
+  ncrn = pcum->aapcs_ncrn;
+  if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
+    ncrn++;
+
+  nregs = ARM_NUM_REGS2(mode, type);
+
+  /* Sigh, this test should really assert that nregs > 0, but a GCC
+     extension allows empty structs and then gives them empty size; it
+     then allows such a structure to be passed by value.  For some of
+     the code below we have to pretend that such an argument has
+     non-zero size so that we 'locate' it correctly either in
+     registers or on the stack.  */
+  gcc_assert (nregs >= 0);
+
+  nregs2 = nregs ? nregs : 1;
+
+  /* C4 - Argument fits entirely in core registers.  */
+  if (ncrn + nregs2 <= NUM_ARG_REGS)
+    {
+      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+      pcum->aapcs_next_ncrn = ncrn + nregs;
+      return;
+    }
+
+  /* C5 - Some core registers left and there are no arguments already
+     on the stack: split this argument between the remaining core
+     registers and the stack.  */
+  if (ncrn < NUM_ARG_REGS && pcum->can_split)
+    {
+      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+      pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+      pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
+      return;
+    }
+
+  /* C6 - NCRN is set to 4.  */
+  pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+
+  /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
+  return;
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is NULL.  */
+void
+arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
+			  rtx libname,
+			  tree fndecl ATTRIBUTE_UNUSED)
+{
+  /* Long call handling.  */
+  if (fntype)
+    pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
+  else
+    pcum->pcs_variant = arm_pcs_default;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      if (arm_libcall_uses_aapcs_base (libname))
+	pcum->pcs_variant = ARM_PCS_AAPCS;
+
+      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
+      pcum->aapcs_reg = NULL_RTX;
+      pcum->aapcs_partial = 0;
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_cprc_slot = -1;
+      pcum->can_split = true;
+
+      if (pcum->pcs_variant != ARM_PCS_AAPCS)
+	{
+	  int i;
+
+	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	    {
+	      pcum->aapcs_cprc_failed[i] = false;
+	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
+	    }
+	}
+      return;
+    }
+
+  /* Legacy ABIs */
+
+  /* On the ARM, the offset starts at 0.  */
+  pcum->nregs = 0;
+  pcum->iwmmxt_nregs = 0;
+  pcum->can_split = true;
+
+  /* Varargs vectors are treated the same as long long.
+     named_count avoids having to change the way arm handles 'named' */
+  pcum->named_count = 0;
+  pcum->nargs = 0;
+
+  if (TARGET_REALLY_IWMMXT && fntype)
+    {
+      tree fn_arg;
+
+      for (fn_arg = TYPE_ARG_TYPES (fntype);
+	   fn_arg;
+	   fn_arg = TREE_CHAIN (fn_arg))
+	pcum->named_count += 1;
+
+      if (! pcum->named_count)
+	pcum->named_count = INT_MAX;
+    }
+}
+
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+arm_lra_p (void)
+{
+  return arm_lra_flag;
+}
+
+/* Return true if mode/type need doubleword alignment.  */
+static bool
+arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
+{
+  return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
+	  || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
+}
+
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
+   other arguments are passed on the stack.  If (NAMED == 0) (which happens
+   only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
+   defined), say it is passed in the stack (function_prologue will
+   indeed make it pass in the stack if necessary).  */
+
+static rtx
+arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
+		  const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int nregs;
+
+  /* Handle the special case quickly.  Pick an arbitrary value for op2 of
+     a call insn (op3 of a call_value insn).  */
+  if (mode == VOIDmode)
+    return const0_rtx;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+      return pcum->aapcs_reg;
+    }
+
+  /* Varargs vectors are treated the same as long long.
+     named_count avoids having to change the way arm handles 'named' */
+  if (TARGET_IWMMXT_ABI
+      && arm_vector_mode_supported_p (mode)
+      && pcum->named_count > pcum->nargs + 1)
+    {
+      if (pcum->iwmmxt_nregs <= 9)
+	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
+      else
+	{
+	  pcum->can_split = false;
+	  return NULL_RTX;
+	}
+    }
+
+  /* Put doubleword aligned quantities in even register pairs.  */
+  if (pcum->nregs & 1
+      && ARM_DOUBLEWORD_ALIGN
+      && arm_needs_doubleword_align (mode, type))
+    pcum->nregs++;
+
+  /* Only allow splitting an arg between regs and memory if all preceding
+     args were allocated to regs.  For args passed by reference we only count
+     the reference pointer.  */
+  if (pcum->can_split)
+    nregs = 1;
+  else
+    nregs = ARM_NUM_REGS2 (mode, type);
+
+  if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
+    return NULL_RTX;
+
+  return gen_rtx_REG (mode, pcum->nregs);
+}
+
+static unsigned int
+arm_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
+	  ? DOUBLEWORD_ALIGNMENT
+	  : PARM_BOUNDARY);
+}
+
+static int
+arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
+		       tree type, bool named)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int nregs = pcum->nregs;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+      return pcum->aapcs_partial;
+    }
+
+  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
+    return 0;
+
+  if (NUM_ARG_REGS > nregs
+      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
+      && pcum->can_split)
+    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+
+  return 0;
+}
+
+/* Update the data in PCUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
+			  const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+
+      if (pcum->aapcs_cprc_slot >= 0)
+	{
+	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
+							      type);
+	  pcum->aapcs_cprc_slot = -1;
+	}
+
+      /* Generic stuff.  */
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
+      pcum->aapcs_reg = NULL_RTX;
+      pcum->aapcs_partial = 0;
+    }
+  else
+    {
+      pcum->nargs += 1;
+      if (arm_vector_mode_supported_p (mode)
+	  && pcum->named_count > pcum->nargs
+	  && TARGET_IWMMXT_ABI)
+	pcum->iwmmxt_nregs += 1;
+      else
+	pcum->nregs += ARM_NUM_REGS2 (mode, type);
+    }
+}
+
+/* Variable sized types are passed by reference.  This is a GCC
+   extension to the ARM ABI.  */
+
+static bool
+arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
+}
+
+/* Encode the current state of the #pragma [no_]long_calls.  */
+typedef enum
+{
+  OFF,		/* No #pragma [no_]long_calls is in effect.  */
+  LONG,		/* #pragma long_calls is in effect.  */
+  SHORT		/* #pragma no_long_calls is in effect.  */
+} arm_pragma_enum;
+
+static arm_pragma_enum arm_pragma_long_calls = OFF;
+
+void
+arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  arm_pragma_long_calls = LONG;
+}
+
+void
+arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  arm_pragma_long_calls = SHORT;
+}
+
+void
+arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  arm_pragma_long_calls = OFF;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL;
+   arguments as in struct attribute_spec.handler.  */
+static tree
+arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt" or "isr" attribute;
+   arguments as in struct attribute_spec.handler.  */
+static tree
+arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
+			  bool *no_add_attrs)
+{
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) != FUNCTION_DECL)
+	{
+	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
+		   name);
+	  *no_add_attrs = true;
+	}
+      /* FIXME: the argument if any is checked for type attributes;
+	 should it be checked for decl ones?  */
+    }
+  else
+    {
+      if (TREE_CODE (*node) == FUNCTION_TYPE
+	  || TREE_CODE (*node) == METHOD_TYPE)
+	{
+	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
+	    {
+	      warning (OPT_Wattributes, "%qE attribute ignored",
+		       name);
+	      *no_add_attrs = true;
+	    }
+	}
+      else if (TREE_CODE (*node) == POINTER_TYPE
+	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
+		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
+	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
+	{
+	  *node = build_variant_type_copy (*node);
+	  TREE_TYPE (*node) = build_type_attribute_variant
+	    (TREE_TYPE (*node),
+	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
+	  *no_add_attrs = true;
+	}
+      else
+	{
+	  /* Possibly pass this attribute on from the type to a decl.  */
+	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
+		       | (int) ATTR_FLAG_FUNCTION_NEXT
+		       | (int) ATTR_FLAG_ARRAY_NEXT))
+	    {
+	      *no_add_attrs = true;
+	      return tree_cons (name, args, NULL_TREE);
+	    }
+	  else
+	    {
+	      warning (OPT_Wattributes, "%qE attribute ignored",
+		       name);
+	    }
+	}
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "pcs" attribute; arguments as in struct
+   attribute_spec.handler.  */
+static tree
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored", name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+/* Handle the "notshared" attribute.  This attribute is another way of
+   requesting hidden visibility.  ARM's compiler supports
+   "__declspec(notshared)"; we support the same thing via an
+   attribute.  */
+
+static tree
+arm_handle_notshared_attribute (tree *node,
+				tree name ATTRIBUTE_UNUSED,
+				tree args ATTRIBUTE_UNUSED,
+				int flags ATTRIBUTE_UNUSED,
+				bool *no_add_attrs)
+{
+  tree decl = TYPE_NAME (*node);
+
+  if (decl)
+    {
+      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+      DECL_VISIBILITY_SPECIFIED (decl) = 1;
+      *no_add_attrs = false;
+    }
+  return NULL_TREE;
+}
+#endif
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible, and 2 if they are nearly compatible (which causes a
+   warning to be generated).  */
+static int
+arm_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  int l1, l2, s1, s2;
+
+  /* Check for mismatch of non-default calling convention.  */
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
+
+  /* Check for mismatched call attributes.  */
+  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
+
+  /* Only bother to check if an attribute is defined.  */
+  if (l1 | l2 | s1 | s2)
+    {
+      /* If one type has an attribute, the other must have the same attribute.  */
+      if ((l1 != l2) || (s1 != s2))
+	return 0;
+
+      /* Disallow mixed attributes.  */
+      if ((l1 & s2) || (l2 & s1))
+	return 0;
+    }
+
+  /* Check for mismatched ISR attribute.  */
+  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
+  if (! l1)
+    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
+  if (! l2)
+    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
+  if (l1 != l2)
+    return 0;
+
+  return 1;
+}
+
+/*  Assigns default attributes to newly defined type.  This is used to
+    set short_call/long_call attributes for function types of
+    functions defined inside corresponding #pragma scopes.  */
+static void
+arm_set_default_type_attributes (tree type)
+{
+  /* Add __attribute__ ((long_call)) to all functions, when
+     inside #pragma long_calls or __attribute__ ((short_call)),
+     when inside #pragma no_long_calls.  */
+  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
+    {
+      tree type_attr_list, attr_name;
+      type_attr_list = TYPE_ATTRIBUTES (type);
+
+      if (arm_pragma_long_calls == LONG)
+ 	attr_name = get_identifier ("long_call");
+      else if (arm_pragma_long_calls == SHORT)
+ 	attr_name = get_identifier ("short_call");
+      else
+ 	return;
+
+      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
+      TYPE_ATTRIBUTES (type) = type_attr_list;
+    }
+}
+
+/* Return true if DECL is known to be linked into section SECTION.  */
+
+static bool
+arm_function_in_section_p (tree decl, section *section)
+{
+  /* We can only be certain about functions defined in the same
+     compilation unit.  */
+  if (!TREE_STATIC (decl))
+    return false;
+
+  /* Make sure that SYMBOL always binds to the definition in this
+     compilation unit.  */
+  if (!targetm.binds_local_p (decl))
+    return false;
+
+  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
+  if (!DECL_SECTION_NAME (decl))
+    {
+      /* Make sure that we will not create a unique section for DECL.  */
+      if (flag_function_sections || DECL_ONE_ONLY (decl))
+	return false;
+    }
+
+  return function_section (decl) == section;
+}
+
+/* Return nonzero if a 32-bit "long_call" should be generated for
+   a call from the current function to DECL.  We generate a long_call
+   if the function:
+
+        a.  has an __attribute__((long call))
+     or b.  is within the scope of a #pragma long_calls
+     or c.  the -mlong-calls command line switch has been specified
+
+   However we do not generate a long call if the function:
+
+        d.  has an __attribute__ ((short_call))
+     or e.  is inside the scope of a #pragma no_long_calls
+     or f.  is defined in the same section as the current function.  */
+
+bool
+arm_is_long_call_p (tree decl)
+{
+  tree attrs;
+
+  if (!decl)
+    return TARGET_LONG_CALLS;
+
+  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  if (lookup_attribute ("short_call", attrs))
+    return false;
+
+  /* For "f", be conservative, and only cater for cases in which the
+     whole of the current function is placed in the same section.  */
+  if (!flag_reorder_blocks_and_partition
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && arm_function_in_section_p (decl, current_function_section ()))
+    return false;
+
+  if (lookup_attribute ("long_call", attrs))
+    return true;
+
+  return TARGET_LONG_CALLS;
+}
+
+/* Return nonzero if it is ok to make a tail-call to DECL.  */
+static bool
+arm_function_ok_for_sibcall (tree decl, tree exp)
+{
+  unsigned long func_type;
+
+  if (cfun->machine->sibcall_blocked)
+    return false;
+
+  /* Never tailcall something if we are generating code for Thumb-1.  */
+  if (TARGET_THUMB1)
+    return false;
+
+  /* The PIC register is live on entry to VxWorks PLT entries, so we
+     must make the call before restoring the PIC register.  */
+  if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
+    return false;
+
+  /* Cannot tail-call to long calls, since these are out of range of
+     a branch instruction.  */
+  if (decl && arm_is_long_call_p (decl))
+    return false;
+
+  /* If we are interworking and the function is not declared static
+     then we can't tail-call it unless we know that it exists in this
+     compilation unit (since it might be a Thumb routine).  */
+  if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
+      && !TREE_ASM_WRITTEN (decl))
+    return false;
+
+  func_type = arm_current_func_type ();
+  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
+  if (IS_INTERRUPT (func_type))
+    return false;
+
+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a VFP register but then need to transfer it to a core
+	 register.  */
+      rtx a, b;
+
+      a = arm_function_value (TREE_TYPE (exp), decl, false);
+      b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+			      cfun->decl, false);
+      if (!rtx_equal_p (a, b))
+	return false;
+    }
+
+  /* Never tailcall if function may be called with a misaligned SP.  */
+  if (IS_STACKALIGN (func_type))
+    return false;
+
+  /* The AAPCS says that, on bare-metal, calls to unresolved weak
+     references should become a NOP.  Don't convert such calls into
+     sibling calls.  */
+  if (TARGET_AAPCS_BASED
+      && arm_abi == ARM_ABI_AAPCS
+      && decl
+      && DECL_WEAK (decl))
+    return false;
+
+  /* Everything else is ok.  */
+  return true;
+}
+
+
+/* Addressing mode support functions.  */
+
+/* Return nonzero if X is a legitimate immediate operand when compiling
+   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
+int
+legitimate_pic_operand_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      || (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
+    return 0;
+
+  return 1;
+}
+
+/* Record that the current function needs a PIC register.  Initialize
+   cfun->machine->pic_reg if we have not already done so.  */
+
+static void
+require_pic_register (void)
+{
+  /* A lot of the logic here is made obscure by the fact that this
+     routine gets called as part of the rtx cost estimation process.
+     We don't want those calls to affect any assumptions about the real
+     function; and further, we can't call entry_of_function() until we
+     start the real expansion process.  */
+  if (!crtl->uses_pic_offset_table)
+    {
+      gcc_assert (can_create_pseudo_p ());
+      if (arm_pic_register != INVALID_REGNUM
+	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
+	{
+	  if (!cfun->machine->pic_reg)
+	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
+
+	  /* Play games to avoid marking the function as needing pic
+	     if we are being called as part of the cost-estimation
+	     process.  */
+	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+	    crtl->uses_pic_offset_table = 1;
+	}
+      else
+	{
+	  rtx seq, insn;
+
+	  if (!cfun->machine->pic_reg)
+	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+
+	  /* Play games to avoid marking the function as needing pic
+	     if we are being called as part of the cost-estimation
+	     process.  */
+	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+	    {
+	      crtl->uses_pic_offset_table = 1;
+	      start_sequence ();
+
+	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
+		  && arm_pic_register > LAST_LO_REGNUM)
+		emit_move_insn (cfun->machine->pic_reg,
+				gen_rtx_REG (Pmode, arm_pic_register));
+	      else
+		arm_load_pic_register (0UL);
+
+	      seq = get_insns ();
+	      end_sequence ();
+
+	      for (insn = seq; insn; insn = NEXT_INSN (insn))
+		if (INSN_P (insn))
+		  INSN_LOCATION (insn) = prologue_location;
+
+	      /* We can be called during expansion of PHI nodes, where
+	         we can't yet emit instructions directly in the final
+		 insn stream.  Queue the insns on the entry edge, they will
+		 be committed after everything else is expanded.  */
+	      insert_insn_on_edge (seq,
+				   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+	    }
+	}
+    }
+}
+
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+{
+  if (GET_CODE (orig) == SYMBOL_REF
+      || GET_CODE (orig) == LABEL_REF)
+    {
+      rtx insn;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      /* VxWorks does not impose a fixed gap between segments; the run-time
+	 gap can be different from the object-file gap.  We therefore can't
+	 use GOTOFF unless we are absolutely sure that the symbol is in the
+	 same segment as the GOT.  Unfortunately, the flexibility of linker
+	 scripts means that we can't be sure of that in general, so assume
+	 that GOTOFF is never valid on VxWorks.  */
+      if ((GET_CODE (orig) == LABEL_REF
+	   || (GET_CODE (orig) == SYMBOL_REF &&
+	       SYMBOL_REF_LOCAL_P (orig)))
+	  && NEED_GOT_RELOC
+	  && arm_pic_data_is_text_relative)
+	insn = arm_pic_static_addr (orig, reg);
+      else
+	{
+	  rtx pat;
+	  rtx mem;
+
+	  /* If this function doesn't have a pic register, create one now.  */
+	  require_pic_register ();
+
+	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
+
+	  /* Make the MEM as close to a constant as possible.  */
+	  mem = SET_SRC (pat);
+	  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
+	  MEM_READONLY_P (mem) = 1;
+	  MEM_NOTRAP_P (mem) = 1;
+
+	  insn = emit_insn (pat);
+	}
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized
+	 by loop.  */
+      set_unique_reg_note (insn, REG_EQUAL, orig);
+
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
+	return orig;
+
+      /* Handle the case where we have: const (UNSPEC_TLS).  */
+      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
+	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
+	return orig;
+
+      /* Handle the case where we have:
+         const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
+         CONST_INT.  */
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+          && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
+          && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
+        {
+	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
+	  return orig;
+	}
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				       base == reg ? 0 : reg);
+
+      if (CONST_INT_P (offset))
+	{
+	  /* The base register doesn't really matter, we only want to
+	     test the index for the appropriate mode.  */
+	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      offset = force_reg (Pmode, offset);
+	    }
+
+	  if (CONST_INT_P (offset))
+	    return plus_constant (Pmode, base, INTVAL (offset));
+	}
+
+      if (GET_MODE_SIZE (mode) > 4
+	  && (GET_MODE_CLASS (mode) == MODE_INT
+	      || TARGET_SOFT_FLOAT))
+	{
+	  emit_insn (gen_addsi3 (reg, base, offset));
+	  return reg;
+	}
+
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  return orig;
+}
+
+
+/* Find a spare register to use during the prolog of a function.  */
+
+static int
+thumb_find_work_register (unsigned long pushed_regs_mask)
+{
+  int reg;
+
+  /* Check the argument registers first as these are call-used.  The
+     register allocation order means that sometimes r3 might be used
+     but earlier argument registers might not, so check them all.  */
+  for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
+    if (!df_regs_ever_live_p (reg))
+      return reg;
+
+  /* Before going on to check the call-saved registers we can try a couple
+     more ways of deducing that r3 is available.  The first is when we are
+     pushing anonymous arguments onto the stack and we have less than 4
+     registers worth of fixed arguments(*).  In this case r3 will be part of
+     the variable argument list and so we can be sure that it will be
+     pushed right at the start of the function.  Hence it will be available
+     for the rest of the prologue.
+     (*): ie crtl->args.pretend_args_size is greater than 0.  */
+  if (cfun->machine->uses_anonymous_args
+      && crtl->args.pretend_args_size > 0)
+    return LAST_ARG_REGNUM;
+
+  /* The other case is when we have fixed arguments but less than 4 registers
+     worth.  In this case r3 might be used in the body of the function, but
+     it is not being used to convey an argument into the function.  In theory
+     we could just check crtl->args.size to see how many bytes are
+     being passed in argument registers, but it seems that it is unreliable.
+     Sometimes it will have the value 0 when in fact arguments are being
+     passed.  (See testcase execute/20021111-1.c for an example).  So we also
+     check the args_info.nregs field as well.  The problem with this field is
+     that it makes no allowances for arguments that are passed to the
+     function but which are not used.  Hence we could miss an opportunity
+     when a function has an unused argument in r3.  But it is better to be
+     safe than to be sorry.  */
+  if (! cfun->machine->uses_anonymous_args
+      && crtl->args.size >= 0
+      && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
+      && (TARGET_AAPCS_BASED
+	  ? crtl->args.info.aapcs_ncrn < 4
+	  : crtl->args.info.nregs < 4))
+    return LAST_ARG_REGNUM;
+
+  /* Otherwise look for a call-saved register that is going to be pushed.  */
+  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
+    if (pushed_regs_mask & (1 << reg))
+      return reg;
+
+  if (TARGET_THUMB2)
+    {
+      /* Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
+	if (pushed_regs_mask & (1 << reg))
+	  return reg;
+    }
+  /* Something went wrong - thumb_compute_save_reg_mask()
+     should have arranged for a suitable register to be pushed.  */
+  gcc_unreachable ();
+}
+
+static GTY(()) int pic_labelno;
+
+/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
+   low register.  */
+
+void
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+{
+  rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+
+  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
+    return;
+
+  gcc_assert (flag_pic);
+
+  pic_reg = cfun->machine->pic_reg;
+  if (TARGET_VXWORKS_RTP)
+    {
+      pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+      emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+
+      emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
+
+      pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
+    }
+  else
+    {
+      /* We use an UNSPEC rather than a LABEL_REF because this label
+	 never appears in the code stream.  */
+
+      labelno = GEN_INT (pic_labelno++);
+      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+      l1 = gen_rtx_CONST (VOIDmode, l1);
+
+      /* On the ARM the PC register contains 'dot + 8' at the time of the
+	 addition, on the Thumb it is 'dot + 4'.  */
+      pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
+      pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
+				UNSPEC_GOTSYM_OFF);
+      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+
+      if (TARGET_32BIT)
+	{
+	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
+	}
+      else /* TARGET_THUMB1 */
+	{
+	  if (arm_pic_register != INVALID_REGNUM
+	      && REGNO (pic_reg) > LAST_LO_REGNUM)
+	    {
+	      /* We will have pushed the pic register, so we should always be
+		 able to find a work register.  */
+	      pic_tmp = gen_rtx_REG (SImode,
+				     thumb_find_work_register (saved_regs));
+	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
+	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
+	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
+	    }
+	  else if (arm_pic_register != INVALID_REGNUM
+		   && arm_pic_register > LAST_LO_REGNUM
+		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
+	    {
+	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
+	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
+	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
+	    }
+	  else
+	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
+	}
+    }
+
+  /* Need to emit this whether or not we obey regdecls,
+     since setjmp/longjmp can cause life info to screw up.  */
+  emit_use (pic_reg);
+}
+
+/* Generate code to load the address of a static var when flag_pic is set.  */
+static rtx
+arm_pic_static_addr (rtx orig, rtx reg)
+{
+  rtx l1, labelno, offset_rtx, insn;
+
+  gcc_assert (flag_pic);
+
+  /* We use an UNSPEC rather than a LABEL_REF because this label
+     never appears in the code stream.  */
+  labelno = GEN_INT (pic_labelno++);
+  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+  l1 = gen_rtx_CONST (VOIDmode, l1);
+
+  /* On the ARM the PC register contains 'dot + 8' at the time of the
+     addition, on the Thumb it is 'dot + 4'.  */
+  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
+  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
+                               UNSPEC_SYMBOL_OFFSET);
+  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
+
+  insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
+  return insn;
+}
+
+/* Return nonzero if X is valid as an ARM state addressing register.  */
+static int
+arm_address_register_rtx_p (rtx x, int strict_p)
+{
+  int regno;
+
+  if (!REG_P (x))
+    return 0;
+
+  regno = REGNO (x);
+
+  if (strict_p)
+    return ARM_REGNO_OK_FOR_BASE_P (regno);
+
+  return (regno <= LAST_ARM_REGNUM
+	  || regno >= FIRST_PSEUDO_REGISTER
+	  || regno == FRAME_POINTER_REGNUM
+	  || regno == ARG_POINTER_REGNUM);
+}
+
+/* Return TRUE if this rtx is the difference of a symbol and a label,
+   and will reduce to a PC-relative relocation in the object file.
+   Expressions like this can be left alone when generating PIC, rather
+   than forced through the GOT.  */
+static int
+pcrel_constant_p (rtx x)
+{
+  if (GET_CODE (x) == MINUS)
+    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
+
+  return FALSE;
+}
+
+/* Return true if X will surely end up in an index register after next
+   splitting pass.  */
+static bool
+will_be_in_index_register (const_rtx x)
+{
+  /* arm.md: calculate_pic_address will split this into a register.  */
+  return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
+}
+
+/* Return nonzero if X is a valid ARM state address operand.  */
+int
+arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
+			        int strict_p)
+{
+  bool use_ldrd;
+  enum rtx_code code = GET_CODE (x);
+
+  if (arm_address_register_rtx_p (x, strict_p))
+    return 1;
+
+  use_ldrd = (TARGET_LDRD
+	      && (mode == DImode
+		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+
+  if (code == POST_INC || code == PRE_DEC
+      || ((code == PRE_INC || code == POST_DEC)
+	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
+    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+
+  else if ((code == POST_MODIFY || code == PRE_MODIFY)
+	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
+	   && GET_CODE (XEXP (x, 1)) == PLUS
+	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+    {
+      rtx addend = XEXP (XEXP (x, 1), 1);
+
+      /* Don't allow ldrd post increment by register because it's hard
+	 to fixup invalid register choices.  */
+      if (use_ldrd
+	  && GET_CODE (x) == POST_MODIFY
+	  && REG_P (addend))
+	return 0;
+
+      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
+	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
+    }
+
+  /* After reload constants split into minipools will have addresses
+     from a LABEL_REF.  */
+  else if (reload_completed
+	   && (code == LABEL_REF
+	       || (code == CONST
+		   && GET_CODE (XEXP (x, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
+    return 1;
+
+  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+    return 0;
+
+  else if (code == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      return ((arm_address_register_rtx_p (xop0, strict_p)
+	       && ((CONST_INT_P (xop1)
+		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
+		   || (!strict_p && will_be_in_index_register (xop1))))
+	      || (arm_address_register_rtx_p (xop1, strict_p)
+		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
+    }
+
+#if 0
+  /* Reload currently can't handle MINUS, so disable this for now */
+  else if (GET_CODE (x) == MINUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      return (arm_address_register_rtx_p (xop0, strict_p)
+	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
+    }
+#endif
+
+  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	   && code == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x)
+	   && ! (flag_pic
+		 && symbol_mentioned_p (get_pool_constant (x))
+		 && ! pcrel_constant_p (get_pool_constant (x))))
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if X is a valid Thumb-2 address operand.  */
+static int
+thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+  bool use_ldrd;
+  enum rtx_code code = GET_CODE (x);
+
+  if (arm_address_register_rtx_p (x, strict_p))
+    return 1;
+
+  use_ldrd = (TARGET_LDRD
+	      && (mode == DImode
+		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+
+  if (code == POST_INC || code == PRE_DEC
+      || ((code == PRE_INC || code == POST_DEC)
+	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
+    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+
+  else if ((code == POST_MODIFY || code == PRE_MODIFY)
+	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
+	   && GET_CODE (XEXP (x, 1)) == PLUS
+	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+    {
+      /* Thumb-2 only has autoincrement by constant.  */
+      rtx addend = XEXP (XEXP (x, 1), 1);
+      HOST_WIDE_INT offset;
+
+      if (!CONST_INT_P (addend))
+	return 0;
+
+      offset = INTVAL(addend);
+      if (GET_MODE_SIZE (mode) <= 4)
+	return (offset > -256 && offset < 256);
+
+      return (use_ldrd && offset > -1024 && offset < 1024
+	      && (offset & 3) == 0);
+    }
+
+  /* After reload constants split into minipools will have addresses
+     from a LABEL_REF.  */
+  else if (reload_completed
+	   && (code == LABEL_REF
+	       || (code == CONST
+		   && GET_CODE (XEXP (x, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
+    return 1;
+
+  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+    return 0;
+
+  else if (code == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      return ((arm_address_register_rtx_p (xop0, strict_p)
+	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
+		   || (!strict_p && will_be_in_index_register (xop1))))
+	      || (arm_address_register_rtx_p (xop1, strict_p)
+		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
+    }
+
+  /* Normally we can assign constant values to target registers without
+     the help of constant pool.  But there are cases we have to use constant
+     pool like:
+     1) assign a label to register.
+     2) sign-extend a 8bit value to 32bit and then assign to register.
+
+     Constant pool access in format:
+     (set (reg r0) (mem (symbol_ref (".LC0"))))
+     will cause the use of literal pool (later in function arm_reorg).
+     So here we mark such format as an invalid format, then the compiler
+     will adjust it into:
+     (set (reg r0) (symbol_ref (".LC0")))
+     (set (reg r0) (mem (reg r0))).
+     No extra register is required, and (mem (reg r0)) won't cause the use
+     of literal pools.  */
+  else if (arm_disable_literal_pool && code == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x))
+    return 0;
+
+  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	   && code == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x)
+	   && ! (flag_pic
+		 && symbol_mentioned_p (get_pool_constant (x))
+		 && ! pcrel_constant_p (get_pool_constant (x))))
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if INDEX is valid for an address index operand in
+   ARM state.  */
+static int
+arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
+			int strict_p)
+{
+  HOST_WIDE_INT range;
+  enum rtx_code code = GET_CODE (index);
+
+  /* Standard coprocessor addressing modes.  */
+  if (TARGET_HARD_FLOAT
+      && TARGET_VFP
+      && (mode == SFmode || mode == DFmode))
+    return (code == CONST_INT && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  /* For quad modes, we restrict the constant offset to be slightly less
+     than what the instruction format permits.  We do this because for
+     quad mode moves, we will actually decompose them into two separate
+     double-mode reads or writes.  INDEX must therefore be a valid
+     (double-mode) offset and so should INDEX+8.  */
+  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1016
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  /* We have no such constraint on double mode offsets, so we permit the
+     full range of the instruction format.  */
+  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+
+	  if (TARGET_LDRD)
+	    return val > -256 && val < 256;
+	  else
+	    return val > -4096 && val < 4092;
+	}
+
+      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+    }
+
+  if (GET_MODE_SIZE (mode) <= 4
+      && ! (arm_arch4
+	    && (mode == HImode
+		|| mode == HFmode
+		|| (mode == QImode && outer == SIGN_EXTEND))))
+    {
+      if (code == MULT)
+	{
+	  rtx xiop0 = XEXP (index, 0);
+	  rtx xiop1 = XEXP (index, 1);
+
+	  return ((arm_address_register_rtx_p (xiop0, strict_p)
+		   && power_of_two_operand (xiop1, SImode))
+		  || (arm_address_register_rtx_p (xiop1, strict_p)
+		      && power_of_two_operand (xiop0, SImode)));
+	}
+      else if (code == LSHIFTRT || code == ASHIFTRT
+	       || code == ASHIFT || code == ROTATERT)
+	{
+	  rtx op = XEXP (index, 1);
+
+	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
+		  && CONST_INT_P (op)
+		  && INTVAL (op) > 0
+		  && INTVAL (op) <= 31);
+	}
+    }
+
+  /* For ARM v4 we may be doing a sign-extend operation during the
+     load.  */
+  if (arm_arch4)
+    {
+      if (mode == HImode
+	  || mode == HFmode
+	  || (outer == SIGN_EXTEND && mode == QImode))
+	range = 256;
+      else
+	range = 4096;
+    }
+  else
+    range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
+
+  return (code == CONST_INT
+	  && INTVAL (index) < range
+	  && INTVAL (index) > -range);
+}
+
+/* Return true if OP is a valid index scaling factor for Thumb-2 address
+   index operand.  i.e. 1, 2, 4 or 8.  */
+static bool
+thumb2_index_mul_operand (rtx op)
+{
+  HOST_WIDE_INT val;
+
+  if (!CONST_INT_P (op))
+    return false;
+
+  val = INTVAL(op);
+  return (val == 1 || val == 2 || val == 4 || val == 8);
+}
+
+/* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
+static int
+thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
+{
+  enum rtx_code code = GET_CODE (index);
+
+  /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
+  /* Standard coprocessor addressing modes.  */
+  if (TARGET_HARD_FLOAT
+      && TARGET_VFP
+      && (mode == SFmode || mode == DFmode))
+    return (code == CONST_INT && INTVAL (index) < 1024
+	    /* Thumb-2 allows only > -256 index range for it's core register
+	       load/stores. Since we allow SF/DF in core registers, we have
+	       to use the intersection between -256~4096 (core) and -1024~1024
+	       (coprocessor).  */
+	    && INTVAL (index) > -256
+	    && (INTVAL (index) & 3) == 0);
+
+  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+    {
+      /* For DImode assume values will usually live in core regs
+	 and only allow LDRD addressing modes.  */
+      if (!TARGET_LDRD || mode != DImode)
+	return (code == CONST_INT
+		&& INTVAL (index) < 1024
+		&& INTVAL (index) > -1024
+		&& (INTVAL (index) & 3) == 0);
+    }
+
+  /* For quad modes, we restrict the constant offset to be slightly less
+     than what the instruction format permits.  We do this because for
+     quad mode moves, we will actually decompose them into two separate
+     double-mode reads or writes.  INDEX must therefore be a valid
+     (double-mode) offset and so should INDEX+8.  */
+  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1016
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  /* We have no such constraint on double mode offsets, so we permit the
+     full range of the instruction format.  */
+  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+	  /* ??? Can we assume ldrd for thumb2?  */
+	  /* Thumb-2 ldrd only has reg+const addressing modes.  */
+	  /* ldrd supports offsets of +-1020.
+	     However the ldr fallback does not.  */
+	  return val > -256 && val < 256 && (val & 3) == 0;
+	}
+      else
+	return 0;
+    }
+
+  if (code == MULT)
+    {
+      rtx xiop0 = XEXP (index, 0);
+      rtx xiop1 = XEXP (index, 1);
+
+      return ((arm_address_register_rtx_p (xiop0, strict_p)
+	       && thumb2_index_mul_operand (xiop1))
+	      || (arm_address_register_rtx_p (xiop1, strict_p)
+		  && thumb2_index_mul_operand (xiop0)));
+    }
+  else if (code == ASHIFT)
+    {
+      rtx op = XEXP (index, 1);
+
+      return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
+	      && CONST_INT_P (op)
+	      && INTVAL (op) > 0
+	      && INTVAL (op) <= 3);
+    }
+
+  return (code == CONST_INT
+	  && INTVAL (index) < 4096
+	  && INTVAL (index) > -256);
+}
+
+/* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
+static int
+thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
+{
+  int regno;
+
+  if (!REG_P (x))
+    return 0;
+
+  regno = REGNO (x);
+
+  if (strict_p)
+    return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
+
+  return (regno <= LAST_LO_REGNUM
+	  || regno > LAST_VIRTUAL_REGISTER
+	  || regno == FRAME_POINTER_REGNUM
+	  || (GET_MODE_SIZE (mode) >= 4
+	      && (regno == STACK_POINTER_REGNUM
+		  || regno >= FIRST_PSEUDO_REGISTER
+		  || x == hard_frame_pointer_rtx
+		  || x == arg_pointer_rtx)));
+}
+
+/* Return nonzero if x is a legitimate index register.  This is the case
+   for any base register that can access a QImode object.  */
+inline static int
+thumb1_index_register_rtx_p (rtx x, int strict_p)
+{
+  return thumb1_base_register_rtx_p (x, QImode, strict_p);
+}
+
+/* Return nonzero if x is a legitimate 16-bit Thumb-state address.
+
+   The AP may be eliminated to either the SP or the FP, so we use the
+   least common denominator, e.g. SImode, and offsets from 0 to 64.
+
+   ??? Verify whether the above is the right approach.
+
+   ??? Also, the FP may be eliminated to the SP, so perhaps that
+   needs special handling also.
+
+   ??? Look at how the mips16 port solves this problem.  It probably uses
+   better ways to solve some of these problems.
+
+   Although it is not incorrect, we don't accept QImode and HImode
+   addresses based on the frame pointer or arg pointer until the
+   reload pass starts.  This is so that eliminating such addresses
+   into stack based ones won't produce impossible code.  */
+int
+thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+  /* ??? Not clear if this is right.  Experiment.  */
+  if (GET_MODE_SIZE (mode) < 4
+      && !(reload_in_progress || reload_completed)
+      && (reg_mentioned_p (frame_pointer_rtx, x)
+	  || reg_mentioned_p (arg_pointer_rtx, x)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
+    return 0;
+
+  /* Accept any base register.  SP only in SImode or larger.  */
+  else if (thumb1_base_register_rtx_p (x, mode, strict_p))
+    return 1;
+
+  /* This is PC relative data before arm_reorg runs.  */
+  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
+	   && GET_CODE (x) == SYMBOL_REF
+           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
+    return 1;
+
+  /* This is PC relative data after arm_reorg runs.  */
+  else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
+	   && reload_completed
+	   && (GET_CODE (x) == LABEL_REF
+	       || (GET_CODE (x) == CONST
+		   && GET_CODE (XEXP (x, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
+    return 1;
+
+  /* Post-inc indexing only supported for SImode and larger.  */
+  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
+	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
+    return 1;
+
+  else if (GET_CODE (x) == PLUS)
+    {
+      /* REG+REG address can be any two index registers.  */
+      /* We disallow FRAME+REG addressing since we know that FRAME
+	 will be replaced with STACK, and SP relative addressing only
+	 permits SP+OFFSET.  */
+      if (GET_MODE_SIZE (mode) <= 4
+	  && XEXP (x, 0) != frame_pointer_rtx
+	  && XEXP (x, 1) != frame_pointer_rtx
+	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
+	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
+	return 1;
+
+      /* REG+const has 5-7 bit offset for non-SP registers.  */
+      else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+		|| XEXP (x, 0) == arg_pointer_rtx)
+	       && CONST_INT_P (XEXP (x, 1))
+	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+	return 1;
+
+      /* REG+const has 10-bit offset for SP, but only SImode and
+	 larger is supported.  */
+      /* ??? Should probably check for DI/DFmode overflow here
+	 just like GO_IF_LEGITIMATE_OFFSET does.  */
+      else if (REG_P (XEXP (x, 0))
+	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
+	       && GET_MODE_SIZE (mode) >= 4
+	       && CONST_INT_P (XEXP (x, 1))
+	       && INTVAL (XEXP (x, 1)) >= 0
+	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
+	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
+	return 1;
+
+      else if (REG_P (XEXP (x, 0))
+	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
+		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
+		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
+		       && REGNO (XEXP (x, 0))
+			  <= LAST_VIRTUAL_POINTER_REGISTER))
+	       && GET_MODE_SIZE (mode) >= 4
+	       && CONST_INT_P (XEXP (x, 1))
+	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
+	return 1;
+    }
+
+  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	   && GET_MODE_SIZE (mode) == 4
+	   && GET_CODE (x) == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x)
+	   && ! (flag_pic
+		 && symbol_mentioned_p (get_pool_constant (x))
+		 && ! pcrel_constant_p (get_pool_constant (x))))
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if VAL can be used as an offset in a Thumb-state address
+   instruction of mode MODE.  */
+int
+thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
+{
+  switch (GET_MODE_SIZE (mode))
+    {
+    case 1:
+      return val >= 0 && val < 32;
+
+    case 2:
+      return val >= 0 && val < 64 && (val & 1) == 0;
+
+    default:
+      return (val >= 0
+	      && (val + GET_MODE_SIZE (mode)) <= 128
+	      && (val & 3) == 0);
+    }
+}
+
+bool
+arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  if (TARGET_ARM)
+    return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
+  else if (TARGET_THUMB2)
+    return thumb2_legitimate_address_p (mode, x, strict_p);
+  else /* if (TARGET_THUMB1) */
+    return thumb1_legitimate_address_p (mode, x, strict_p);
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
+
+   Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS, but for the Thumb core registers and
+   immediate constants we prefer a LO_REGS class or a subset.  */
+
+static reg_class_t
+arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
+{
+  if (TARGET_32BIT)
+    return rclass;
+  else
+    {
+      if (rclass == GENERAL_REGS)
+	return LO_REGS;
+      else
+	return rclass;
+    }
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+rtx
+arm_load_tp (rtx target)
+{
+  if (!target)
+    target = gen_reg_rtx (SImode);
+
+  if (TARGET_HARD_TP)
+    {
+      /* Can return in any reg.  */
+      emit_insn (gen_load_tp_hard (target));
+    }
+  else
+    {
+      /* Always returned in r0.  Immediately copy the result into a pseudo,
+	 otherwise other uses of r0 (e.g. setting up function arguments) may
+	 clobber the value.  */
+
+      rtx tmp;
+
+      emit_insn (gen_load_tp_soft ());
+
+      tmp = gen_rtx_REG (SImode, 0);
+      emit_move_insn (target, tmp);
+    }
+  return target;
+}
+
+static rtx
+load_tls_operand (rtx x, rtx reg)
+{
+  rtx tmp;
+
+  if (reg == NULL_RTX)
+    reg = gen_reg_rtx (SImode);
+
+  tmp = gen_rtx_CONST (SImode, x);
+
+  emit_move_insn (reg, tmp);
+
+  return reg;
+}
+
+static rtx
+arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
+{
+  rtx insns, label, labelno, sum;
+
+  gcc_assert (reloc != TLS_DESCSEQ);
+  start_sequence ();
+
+  labelno = GEN_INT (pic_labelno++);
+  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+  label = gen_rtx_CONST (VOIDmode, label);
+
+  sum = gen_rtx_UNSPEC (Pmode,
+			gen_rtvec (4, x, GEN_INT (reloc), label,
+				   GEN_INT (TARGET_ARM ? 8 : 4)),
+			UNSPEC_TLS);
+  reg = load_tls_operand (sum, reg);
+
+  if (TARGET_ARM)
+    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
+  else
+    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+
+  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
+				     LCT_PURE, /* LCT_CONST?  */
+				     Pmode, 1, reg, Pmode);
+
+  insns = get_insns ();
+  end_sequence ();
+
+  return insns;
+}
+
+static rtx
+arm_tls_descseq_addr (rtx x, rtx reg)
+{
+  rtx labelno = GEN_INT (pic_labelno++);
+  rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+  rtx sum = gen_rtx_UNSPEC (Pmode,
+			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
+				       gen_rtx_CONST (VOIDmode, label),
+				       GEN_INT (!TARGET_ARM)),
+			    UNSPEC_TLS);
+  rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
+
+  emit_insn (gen_tlscall (x, labelno));
+  if (!reg)
+    reg = gen_reg_rtx (SImode);
+  else
+    gcc_assert (REGNO (reg) != 0);
+
+  emit_move_insn (reg, reg0);
+
+  return reg;
+}
+
+rtx
+legitimize_tls_address (rtx x, rtx reg)
+{
+  rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
+  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      if (TARGET_GNU2_TLS)
+	{
+	  reg = arm_tls_descseq_addr (x, reg);
+
+	  tp = arm_load_tp (NULL_RTX);
+
+	  dest = gen_rtx_PLUS (Pmode, tp, reg);
+	}
+      else
+	{
+	  /* Original scheme */
+	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_libcall_block (insns, dest, ret, x);
+	}
+      return dest;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      if (TARGET_GNU2_TLS)
+	{
+	  reg = arm_tls_descseq_addr (x, reg);
+
+	  tp = arm_load_tp (NULL_RTX);
+
+	  dest = gen_rtx_PLUS (Pmode, tp, reg);
+	}
+      else
+	{
+	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
+
+	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	     share the LDM result with other LD model accesses.  */
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
+				UNSPEC_TLS);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_libcall_block (insns, dest, ret, eqv);
+
+	  /* Load the addend.  */
+	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
+						     GEN_INT (TLS_LDO32)),
+				   UNSPEC_TLS);
+	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
+	  dest = gen_rtx_PLUS (Pmode, dest, addend);
+	}
+      return dest;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      labelno = GEN_INT (pic_labelno++);
+      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+      label = gen_rtx_CONST (VOIDmode, label);
+      sum = gen_rtx_UNSPEC (Pmode,
+			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
+				       GEN_INT (TARGET_ARM ? 8 : 4)),
+			    UNSPEC_TLS);
+      reg = load_tls_operand (sum, reg);
+
+      if (TARGET_ARM)
+	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
+      else if (TARGET_THUMB2)
+	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
+      else
+	{
+	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+	  emit_move_insn (reg, gen_const_mem (SImode, reg));
+	}
+
+      tp = arm_load_tp (NULL_RTX);
+
+      return gen_rtx_PLUS (Pmode, tp, reg);
+
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = arm_load_tp (NULL_RTX);
+
+      reg = gen_rtx_UNSPEC (Pmode,
+			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
+			    UNSPEC_TLS);
+      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
+
+      return gen_rtx_PLUS (Pmode, tp, reg);
+
+    default:
+      abort ();
+    }
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.  */
+rtx
+arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+{
+  if (arm_tls_referenced_p (x))
+    {
+      rtx addend = NULL;
+
+      if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+	{
+	  addend = XEXP (XEXP (x, 0), 1);
+	  x = XEXP (XEXP (x, 0), 0);
+	}
+
+      if (GET_CODE (x) != SYMBOL_REF)
+	return x;
+
+      gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
+
+      x = legitimize_tls_address (x, NULL_RTX);
+
+      if (addend)
+	{
+	  x = gen_rtx_PLUS (SImode, x, addend);
+	  orig_x = x;
+	}
+      else
+	return x;
+    }
+
+  if (!TARGET_ARM)
+    {
+      /* TODO: legitimize_address for Thumb2.  */
+      if (TARGET_THUMB2)
+        return x;
+      return thumb_legitimize_address (x, orig_x, mode);
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
+	xop0 = force_reg (SImode, xop0);
+
+      if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
+	  && !symbol_mentioned_p (xop1))
+	xop1 = force_reg (SImode, xop1);
+
+      if (ARM_BASE_REGISTER_RTX_P (xop0)
+	  && CONST_INT_P (xop1))
+	{
+	  HOST_WIDE_INT n, low_n;
+	  rtx base_reg, val;
+	  n = INTVAL (xop1);
+
+	  /* VFP addressing modes actually allow greater offsets, but for
+	     now we just stick with the lowest common denominator.  */
+	  if (mode == DImode
+	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
+	    {
+	      low_n = n & 0x0f;
+	      n &= ~0x0f;
+	      if (low_n > 4)
+		{
+		  n += 16;
+		  low_n -= 16;
+		}
+	    }
+	  else
+	    {
+	      low_n = ((mode) == TImode ? 0
+		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
+	      n -= low_n;
+	    }
+
+	  base_reg = gen_reg_rtx (SImode);
+	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
+	  emit_move_insn (base_reg, val);
+	  x = plus_constant (Pmode, base_reg, low_n);
+	}
+      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
+	x = gen_rtx_PLUS (SImode, xop0, xop1);
+    }
+
+  /* XXX We don't allow MINUS any more -- see comment in
+     arm_legitimate_address_outer_p ().  */
+  else if (GET_CODE (x) == MINUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (CONSTANT_P (xop0))
+	xop0 = force_reg (SImode, xop0);
+
+      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
+	xop1 = force_reg (SImode, xop1);
+
+      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
+	x = gen_rtx_MINUS (SImode, xop0, xop1);
+    }
+
+  /* Make sure to take full advantage of the pre-indexed addressing mode
+     with absolute addresses which often allows for the base register to
+     be factorized for multiple adjacent memory references, and it might
+     even allows for the mini pool to be avoided entirely. */
+  else if (CONST_INT_P (x) && optimize > 0)
+    {
+      unsigned int bits;
+      HOST_WIDE_INT mask, base, index;
+      rtx base_reg;
+
+      /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
+         use a 8-bit index. So let's use a 12-bit index for SImode only and
+         hope that arm_gen_constant will enable ldrb to use more bits. */
+      bits = (mode == SImode) ? 12 : 8;
+      mask = (1 << bits) - 1;
+      base = INTVAL (x) & ~mask;
+      index = INTVAL (x) & mask;
+      if (bit_count (base & 0xffffffff) > (32 - bits)/2)
+        {
+	  /* It'll most probably be more efficient to generate the base
+	     with more bits set and use a negative index instead. */
+	  base |= mask;
+	  index -= mask;
+	}
+      base_reg = force_reg (SImode, GEN_INT (base));
+      x = plus_constant (Pmode, base_reg, index);
+    }
+
+  if (flag_pic)
+    {
+      /* We need to find and carefully transform any SYMBOL and LABEL
+	 references; so go back to the original address expression.  */
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+
+      if (new_x != orig_x)
+	x = new_x;
+    }
+
+  return x;
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate Thumb address
+   to be legitimate.  If we find one, return the new, valid address.  */
+rtx
+thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+{
+  if (GET_CODE (x) == PLUS
+      && CONST_INT_P (XEXP (x, 1))
+      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
+	  || INTVAL (XEXP (x, 1)) < 0))
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+      HOST_WIDE_INT offset = INTVAL (xop1);
+
+      /* Try and fold the offset into a biasing of the base register and
+	 then offsetting that.  Don't do this when optimizing for space
+	 since it can cause too many CSEs.  */
+      if (optimize_size && offset >= 0
+	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
+	{
+	  HOST_WIDE_INT delta;
+
+	  if (offset >= 256)
+	    delta = offset - (256 - GET_MODE_SIZE (mode));
+	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
+	    delta = 31 * GET_MODE_SIZE (mode);
+	  else
+	    delta = offset & (~31 * GET_MODE_SIZE (mode));
+
+	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
+				NULL_RTX);
+	  x = plus_constant (Pmode, xop0, delta);
+	}
+      else if (offset < 0 && offset > -256)
+	/* Small negative offsets are best done with a subtract before the
+	   dereference, forcing these into a register normally takes two
+	   instructions.  */
+	x = force_operand (x, NULL_RTX);
+      else
+	{
+	  /* For the remaining cases, force the constant into a register.  */
+	  xop1 = force_reg (SImode, xop1);
+	  x = gen_rtx_PLUS (SImode, xop0, xop1);
+	}
+    }
+  else if (GET_CODE (x) == PLUS
+	   && s_register_operand (XEXP (x, 1), SImode)
+	   && !s_register_operand (XEXP (x, 0), SImode))
+    {
+      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
+
+      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
+    }
+
+  if (flag_pic)
+    {
+      /* We need to find and carefully transform any SYMBOL and LABEL
+	 references; so go back to the original address expression.  */
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+
+      if (new_x != orig_x)
+	x = new_x;
+    }
+
+  return x;
+}
+
+bool
+arm_legitimize_reload_address (rtx *p,
+			       enum machine_mode mode,
+			       int opnum, int type,
+			       int ind_levels ATTRIBUTE_UNUSED)
+{
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (*p) == PLUS
+      && GET_CODE (XEXP (*p, 0)) == PLUS
+      && REG_P (XEXP (XEXP (*p, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
+      && CONST_INT_P (XEXP (*p, 1)))
+    {
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return true;
+    }
+
+  if (GET_CODE (*p) == PLUS
+      && REG_P (XEXP (*p, 0))
+      && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
+      /* If the base register is equivalent to a constant, let the generic
+	 code handle it.  Otherwise we will run into problems if a future
+	 reload pass decides to rematerialize the constant.  */
+      && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
+      && CONST_INT_P (XEXP (*p, 1)))
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
+      HOST_WIDE_INT low, high;
+
+      /* Detect coprocessor load/stores.  */
+      bool coproc_p = ((TARGET_HARD_FLOAT
+			&& TARGET_VFP
+			&& (mode == SFmode || mode == DFmode))
+		       || (TARGET_REALLY_IWMMXT
+			   && VALID_IWMMXT_REG_MODE (mode))
+		       || (TARGET_NEON
+			   && (VALID_NEON_DREG_MODE (mode)
+			       || VALID_NEON_QREG_MODE (mode))));
+
+      /* For some conditions, bail out when lower two bits are unaligned.  */
+      if ((val & 0x3) != 0
+	  /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
+	  && (coproc_p
+	      /* For DI, and DF under soft-float: */
+	      || ((mode == DImode || mode == DFmode)
+		  /* Without ldrd, we use stm/ldm, which does not
+		     fair well with unaligned bits.  */
+		  && (! TARGET_LDRD
+		      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
+		      || TARGET_THUMB2))))
+	return false;
+
+      /* When breaking down a [reg+index] reload address into [(reg+high)+low],
+	 of which the (reg+high) gets turned into a reload add insn,
+	 we try to decompose the index into high/low values that can often
+	 also lead to better reload CSE.
+	 For example:
+	         ldr r0, [r2, #4100]  // Offset too large
+		 ldr r1, [r2, #4104]  // Offset too large
+
+	 is best reloaded as:
+	         add t1, r2, #4096
+		 ldr r0, [t1, #4]
+		 add t2, r2, #4096
+		 ldr r1, [t2, #8]
+
+	 which post-reload CSE can simplify in most cases to eliminate the
+	 second add instruction:
+	         add t1, r2, #4096
+		 ldr r0, [t1, #4]
+		 ldr r1, [t1, #8]
+
+	 The idea here is that we want to split out the bits of the constant
+	 as a mask, rather than as subtracting the maximum offset that the
+	 respective type of load/store used can handle.
+
+	 When encountering negative offsets, we can still utilize it even if
+	 the overall offset is positive; sometimes this may lead to an immediate
+	 that can be constructed with fewer instructions.
+	 For example:
+	         ldr r0, [r2, #0x3FFFFC]
+
+	 This is best reloaded as:
+	         add t1, r2, #0x400000
+		 ldr r0, [t1, #-4]
+
+	 The trick for spotting this for a load insn with N bits of offset
+	 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
+	 negative offset that is going to make bit N and all the bits below
+	 it become zero in the remainder part.
+
+	 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
+	 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
+	 used in most cases of ARM load/store instructions.  */
+
+#define SIGN_MAG_LOW_ADDR_BITS(VAL, N)					\
+      (((VAL) & ((1 << (N)) - 1))					\
+       ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))	\
+       : 0)
+
+      if (coproc_p)
+	{
+	  low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
+
+	  /* NEON quad-word load/stores are made of two double-word accesses,
+	     so the valid index range is reduced by 8. Treat as 9-bit range if
+	     we go over it.  */
+	  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
+	    low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
+	}
+      else if (GET_MODE_SIZE (mode) == 8)
+	{
+	  if (TARGET_LDRD)
+	    low = (TARGET_THUMB2
+		   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
+		   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
+	  else
+	    /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
+	       to access doublewords. The supported load/store offsets are
+	       -8, -4, and 4, which we try to produce here.  */
+	    low = ((val & 0xf) ^ 0x8) - 0x8;
+	}
+      else if (GET_MODE_SIZE (mode) < 8)
+	{
+	  /* NEON element load/stores do not have an offset.  */
+	  if (TARGET_NEON_FP16 && mode == HFmode)
+	    return false;
+
+	  if (TARGET_THUMB2)
+	    {
+	      /* Thumb-2 has an asymmetrical index range of (-256,4096).
+		 Try the wider 12-bit range first, and re-try if the result
+		 is out of range.  */
+	      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+	      if (low < -255)
+		low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
+	    }
+	  else
+	    {
+	      if (mode == HImode || mode == HFmode)
+		{
+		  if (arm_arch4)
+		    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
+		  else
+		    {
+		      /* The storehi/movhi_bytes fallbacks can use only
+			 [-4094,+4094] of the full ldrb/strb index range.  */
+		      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+		      if (low == 4095 || low == -4095)
+			return false;
+		    }
+		}
+	      else
+		low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+	    }
+	}
+      else
+	return false;
+
+      high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
+	       ^ (unsigned HOST_WIDE_INT) 0x80000000)
+	      - (unsigned HOST_WIDE_INT) 0x80000000);
+      /* Check for overflow or zero */
+      if (low == 0 || high == 0 || (high + low != val))
+	return false;
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem.
+	 Note that replacing this gen_rtx_PLUS with plus_constant is
+	 wrong in this case because we rely on the
+	 (plus (plus reg c1) c2) structure being preserved so that
+	 XEXP (*p, 0) in push_reload below uses the correct term.  */
+      *p = gen_rtx_PLUS (GET_MODE (*p),
+			 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
+				       GEN_INT (high)),
+			 GEN_INT (low));
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return true;
+    }
+
+  return false;
+}
+
+rtx
+thumb_legitimize_reload_address (rtx *x_p,
+				 enum machine_mode mode,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  rtx x = *x_p;
+
+  if (GET_CODE (x) == PLUS
+      && GET_MODE_SIZE (mode) < 4
+      && REG_P (XEXP (x, 0))
+      && XEXP (x, 0) == stack_pointer_rtx
+      && CONST_INT_P (XEXP (x, 1))
+      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+    {
+      rtx orig_x = x;
+
+      x = copy_rtx (x);
+      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return x;
+    }
+
+  /* If both registers are hi-regs, then it's better to reload the
+     entire expression rather than each register individually.  That
+     only requires one reload register rather than two.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && REG_P (XEXP (x, 1))
+      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
+      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
+    {
+      rtx orig_x = x;
+
+      x = copy_rtx (x);
+      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return x;
+    }
+
+  return NULL;
+}
+
+/* Test for various thread-local symbols.  */
+
+/* Helper for arm_tls_referenced_p.  */
+
+static int
+arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+     TLS offsets, not real symbol references.  */
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_TLS)
+    return -1;
+
+  return 0;
+}
+
+/* Return TRUE if X contains any TLS symbol references.  */
+
+bool
+arm_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.
+
+   On the ARM, allow any integer (invalid ones are removed later by insn
+   patterns), nice doubles and symbol_refs which refer to the function's
+   constant pool XXX.
+
+   When generating pic allow anything.  */
+
+static bool
+arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
+{
+  /* At present, we have no support for Neon structure constants, so forbid
+     them here.  It might be possible to handle simple cases like 0 and -1
+     in future.  */
+  if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
+    return false;
+
+  return flag_pic || !label_mentioned_p (x);
+}
+
+static bool
+thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return (CONST_INT_P (x)
+	  || CONST_DOUBLE_P (x)
+	  || CONSTANT_ADDRESS_P (x)
+	  || flag_pic);
+}
+
+static bool
+arm_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  return (!arm_cannot_force_const_mem (mode, x)
+	  && (TARGET_32BIT
+	      ? arm_legitimate_constant_p_1 (mode, x)
+	      : thumb_legitimate_constant_p (mode, x)));
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  rtx base, offset;
+
+  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	return true;
+    }
+  return arm_tls_referenced_p (x);
+}
+
+#define REG_OR_SUBREG_REG(X)						\
+  (REG_P (X)							\
+   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
+
+#define REG_OR_SUBREG_RTX(X)			\
+   (REG_P (X) ? (X) : SUBREG_REG (X))
+
+static inline int
+thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int total, words;
+
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
+
+    case PLUS:
+    case MINUS:
+    case COMPARE:
+    case NEG:
+    case NOT:
+      return COSTS_N_INSNS (1);
+
+    case MULT:
+      if (CONST_INT_P (XEXP (x, 1)))
+	{
+	  int cycles = 0;
+	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+
+	  while (i)
+	    {
+	      i >>= 2;
+	      cycles++;
+	    }
+	  return COSTS_N_INSNS (2) + cycles;
+	}
+      return COSTS_N_INSNS (1) + 16;
+
+    case SET:
+      /* A SET doesn't have a mode, so let's look at the SET_DEST to get
+	 the mode.  */
+      words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
+      return (COSTS_N_INSNS (words)
+	      + 4 * ((MEM_P (SET_SRC (x)))
+		     + MEM_P (SET_DEST (x))));
+
+    case CONST_INT:
+      if (outer == SET)
+	{
+	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+	    return 0;
+	  if (thumb_shiftable_const (INTVAL (x)))
+	    return COSTS_N_INSNS (2);
+	  return COSTS_N_INSNS (3);
+	}
+      else if ((outer == PLUS || outer == COMPARE)
+	       && INTVAL (x) < 256 && INTVAL (x) > -256)
+	return 0;
+      else if ((outer == IOR || outer == XOR || outer == AND)
+	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
+	return COSTS_N_INSNS (1);
+      else if (outer == AND)
+	{
+	  int i;
+	  /* This duplicates the tests in the andsi3 expander.  */
+	  for (i = 9; i <= 31; i++)
+	    if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+		|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+	      return COSTS_N_INSNS (2);
+	}
+      else if (outer == ASHIFT || outer == ASHIFTRT
+	       || outer == LSHIFTRT)
+	return 0;
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+    case CONST_DOUBLE:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return COSTS_N_INSNS (3);
+
+    case UDIV:
+    case UMOD:
+    case DIV:
+    case MOD:
+      return 100;
+
+    case TRUNCATE:
+      return 99;
+
+    case AND:
+    case XOR:
+    case IOR:
+      /* XXX guess.  */
+      return 8;
+
+    case MEM:
+      /* XXX another guess.  */
+      /* Memory costs quite a lot for the first word, but subsequent words
+	 load at the equivalent of a single insn each.  */
+      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+		 ? 4 : 0));
+
+    case IF_THEN_ELSE:
+      /* XXX a guess.  */
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+	return 14;
+      return 2;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      total = mode == DImode ? COSTS_N_INSNS (1) : 0;
+      total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
+
+      if (mode == SImode)
+	return total;
+
+      if (arm_arch6)
+	return total + COSTS_N_INSNS (1);
+
+      /* Assume a two-shift sequence.  Increase the cost slightly so
+	 we prefer actual shifts over an extend operation.  */
+      return total + 1 + COSTS_N_INSNS (2);
+
+    default:
+      return 99;
+    }
+}
+
+static inline bool
+arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  enum rtx_code subcode;
+  rtx operand;
+  enum rtx_code code = GET_CODE (x);
+  *total = 0;
+
+  switch (code)
+    {
+    case MEM:
+      /* Memory costs quite a lot for the first word, but subsequent words
+	 load at the equivalent of a single insn each.  */
+      *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      if (TARGET_HARD_FLOAT && mode == SFmode)
+	*total = COSTS_N_INSNS (2);
+      else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = COSTS_N_INSNS (20);
+      return false;
+
+    case ROTATE:
+      if (REG_P (XEXP (x, 1)))
+	*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
+      else if (!CONST_INT_P (XEXP (x, 1)))
+	*total = rtx_cost (XEXP (x, 1), code, 1, speed);
+
+      /* Fall through */
+    case ROTATERT:
+      if (mode != SImode)
+	{
+	  *total += COSTS_N_INSNS (4);
+	  return true;
+	}
+
+      /* Fall through */
+    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
+      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+      if (mode == DImode)
+	{
+	  *total += COSTS_N_INSNS (3);
+	  return true;
+	}
+
+      *total += COSTS_N_INSNS (1);
+      /* Increase the cost of complex shifts because they aren't any faster,
+         and reduce dual issue opportunities.  */
+      if (arm_tune_cortex_a9
+	  && outer != SET && !CONST_INT_P (XEXP (x, 1)))
+	++*total;
+
+      return true;
+
+    case MINUS:
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+	  if (CONST_INT_P (XEXP (x, 0))
+	      && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+	    {
+	      *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	      return true;
+	    }
+
+	  if (CONST_INT_P (XEXP (x, 1))
+	      && const_ok_for_arm (INTVAL (XEXP (x, 1))))
+	    {
+	      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	      return true;
+	    }
+
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      if (CONST_DOUBLE_P (XEXP (x, 0))
+		  && arm_const_double_rtx (XEXP (x, 0)))
+		{
+		  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+		  return true;
+		}
+
+	      if (CONST_DOUBLE_P (XEXP (x, 1))
+		  && arm_const_double_rtx (XEXP (x, 1)))
+		{
+		  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+		  return true;
+		}
+
+	      return false;
+	    }
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+
+      *total = COSTS_N_INSNS (1);
+      if (CONST_INT_P (XEXP (x, 0))
+	  && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  return true;
+	}
+
+      subcode = GET_CODE (XEXP (x, 1));
+      if (subcode == ASHIFT || subcode == ASHIFTRT
+	  || subcode == LSHIFTRT
+	  || subcode == ROTATE || subcode == ROTATERT)
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
+	  return true;
+	}
+
+      /* A shift as a part of RSB costs no more than RSB itself.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  return true;
+	}
+
+      if (subcode == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
+	  return true;
+	}
+
+      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
+	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
+	  if (REG_P (XEXP (XEXP (x, 1), 0))
+	      && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
+	    *total += COSTS_N_INSNS (1);
+
+	  return true;
+	}
+
+      /* Fall through */
+
+    case PLUS:
+      if (code == PLUS && arm_arch6 && mode == SImode
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
+			      0, speed);
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  return true;
+	}
+
+      /* MLA: All arguments must be registers.  We filter out
+	 multiplication by a power of two, so that we fall down into
+	 the code below.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  /* The cost comes from the cost of the multiply.  */
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      if (CONST_DOUBLE_P (XEXP (x, 1))
+		  && arm_const_double_rtx (XEXP (x, 1)))
+		{
+		  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+		  return true;
+		}
+
+	      return false;
+	    }
+
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+
+      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
+	  if (REG_P (XEXP (XEXP (x, 0), 0))
+	      && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
+	    *total += COSTS_N_INSNS (1);
+	  return true;
+	}
+
+      /* Fall through */
+
+    case AND: case XOR: case IOR:
+
+      /* Normally the frame registers will be spilt into reg+const during
+	 reload, so it is a bad idea to combine them with other instructions,
+	 since then they might not be moved outside of loops.  As a compromise
+	 we allow integration with ops that have a constant as their second
+	 operand.  */
+      if (REG_OR_SUBREG_REG (XEXP (x, 0))
+	  && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
+	  && !CONST_INT_P (XEXP (x, 1)))
+	*total = COSTS_N_INSNS (1);
+
+      if (mode == DImode)
+	{
+	  *total += COSTS_N_INSNS (2);
+	  if (CONST_INT_P (XEXP (x, 1))
+	      && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+	    {
+	      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	      return true;
+	    }
+
+	  return false;
+	}
+
+      *total += COSTS_N_INSNS (1);
+      if (CONST_INT_P (XEXP (x, 1))
+	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	  return true;
+	}
+      subcode = GET_CODE (XEXP (x, 0));
+      if (subcode == ASHIFT || subcode == ASHIFTRT
+	  || subcode == LSHIFTRT
+	  || subcode == ROTATE || subcode == ROTATERT)
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
+	  return true;
+	}
+
+      if (subcode == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
+	  return true;
+	}
+
+      if (subcode == UMIN || subcode == UMAX
+	  || subcode == SMIN || subcode == SMAX)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+	}
+
+      return false;
+
+    case MULT:
+      /* This should have been handled by the CPU specific routines.  */
+      gcc_unreachable ();
+
+    case TRUNCATE:
+      if (arm_arch3m && mode == SImode
+	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
+	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
+	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
+	{
+	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
+	  return true;
+	}
+      *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
+      return false;
+
+    case NEG:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+      /* Fall through */
+    case NOT:
+      *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
+      if (mode == SImode && code == NOT)
+	{
+	  subcode = GET_CODE (XEXP (x, 0));
+	  if (subcode == ASHIFT || subcode == ASHIFTRT
+	      || subcode == LSHIFTRT
+	      || subcode == ROTATE || subcode == ROTATERT
+	      || (subcode == MULT
+		  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
+	    {
+	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
+	      /* Register shifts cost an extra cycle.  */
+	      if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+		*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
+							subcode, 1, speed);
+	      return true;
+	    }
+	}
+
+      return false;
+
+    case IF_THEN_ELSE:
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return true;
+	}
+
+      operand = XEXP (x, 0);
+
+      if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
+	     || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
+	    && REG_P (XEXP (operand, 0))
+	    && REGNO (XEXP (operand, 0)) == CC_REGNUM))
+	*total += COSTS_N_INSNS (1);
+      *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
+		 + rtx_cost (XEXP (x, 2), code, 2, speed));
+      return true;
+
+    case NE:
+      if (mode == SImode && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
+	  return true;
+	}
+      goto scc_insn;
+
+    case GE:
+      if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
+	  && mode == SImode && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
+	  return true;
+	}
+      goto scc_insn;
+
+    case LT:
+      if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
+	  && mode == SImode && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
+	  return true;
+	}
+      goto scc_insn;
+
+    case EQ:
+    case GT:
+    case LE:
+    case GEU:
+    case LTU:
+    case GTU:
+    case LEU:
+    case UNORDERED:
+    case ORDERED:
+    case UNEQ:
+    case UNGE:
+    case UNLT:
+    case UNGT:
+    case UNLE:
+    scc_insn:
+      /* SCC insns.  In the case where the comparison has already been
+	 performed, then they cost 2 instructions.  Otherwise they need
+	 an additional comparison before them.  */
+      *total = COSTS_N_INSNS (2);
+      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
+	{
+	  return true;
+	}
+
+      /* Fall through */
+    case COMPARE:
+      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      *total += COSTS_N_INSNS (1);
+      if (CONST_INT_P (XEXP (x, 1))
+	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	  return true;
+	}
+
+      subcode = GET_CODE (XEXP (x, 0));
+      if (subcode == ASHIFT || subcode == ASHIFTRT
+	  || subcode == LSHIFTRT
+	  || subcode == ROTATE || subcode == ROTATERT)
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
+	  return true;
+	}
+
+      if (subcode == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
+	  return true;
+	}
+
+      return false;
+
+    case UMIN:
+    case UMAX:
+    case SMIN:
+    case SMAX:
+      *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
+      if (!CONST_INT_P (XEXP (x, 1))
+	  || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
+	*total += rtx_cost (XEXP (x, 1), code, 1, speed);
+      return true;
+
+    case ABS:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+      *total = COSTS_N_INSNS (1);
+      if (mode == DImode)
+	*total += COSTS_N_INSNS (3);
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      *total = 0;
+      if (GET_MODE_CLASS (mode) == MODE_INT)
+	{
+	  rtx op = XEXP (x, 0);
+	  enum machine_mode opmode = GET_MODE (op);
+
+	  if (mode == DImode)
+	    *total += COSTS_N_INSNS (1);
+
+	  if (opmode != SImode)
+	    {
+	      if (MEM_P (op))
+		{
+		  /* If !arm_arch4, we use one of the extendhisi2_mem
+		     or movhi_bytes patterns for HImode.  For a QImode
+		     sign extension, we first zero-extend from memory
+		     and then perform a shift sequence.  */
+		  if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
+		    *total += COSTS_N_INSNS (2);
+		}
+	      else if (arm_arch6)
+		*total += COSTS_N_INSNS (1);
+
+	      /* We don't have the necessary insn, so we need to perform some
+		 other operation.  */
+	      else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
+		/* An and with constant 255.  */
+		*total += COSTS_N_INSNS (1);
+	      else
+		/* A shift sequence.  Increase costs slightly to avoid
+		   combining two shifts into an extend operation.  */
+		*total += COSTS_N_INSNS (2) + 1;
+	    }
+
+	  return false;
+	}
+
+      switch (GET_MODE (XEXP (x, 0)))
+	{
+	case V8QImode:
+	case V4HImode:
+	case V2SImode:
+	case V4QImode:
+	case V2HImode:
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+
+	default:
+	  gcc_unreachable ();
+	}
+      gcc_unreachable ();
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
+      return true;
+
+    case CONST_INT:
+      if (const_ok_for_arm (INTVAL (x))
+	  || const_ok_for_arm (~INTVAL (x)))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
+						  INTVAL (x), NULL_RTX,
+						  NULL_RTX, 0, 0));
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case HIGH:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case LO_SUM:
+      *total = COSTS_N_INSNS (1);
+      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (4);
+      return true;
+
+    case SET:
+      /* The vec_extract patterns accept memory operands that require an
+	 address reload.  Account for the cost of that reload to give the
+	 auto-inc-dec pass an incentive to try to replace them.  */
+      if (TARGET_NEON && MEM_P (SET_DEST (x))
+	  && GET_CODE (SET_SRC (x)) == VEC_SELECT)
+	{
+	  *total = rtx_cost (SET_DEST (x), code, 0, speed);
+	  if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
+	    *total += COSTS_N_INSNS (1);
+	  return true;
+	}
+      /* Likewise for the vec_set patterns.  */
+      if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
+	  && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
+	  && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
+	{
+	  rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
+	  *total = rtx_cost (mem, code, 0, speed);
+	  if (!neon_vector_mem_operand (mem, 2, true))
+	    *total += COSTS_N_INSNS (1);
+	  return true;
+	}
+      return false;
+
+    case UNSPEC:
+      /* We cost this as high as our memory costs to allow this to
+	 be hoisted from loops.  */
+      if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
+	{
+	  *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+	}
+      return true;
+
+    case CONST_VECTOR:
+      if (TARGET_NEON
+	  && TARGET_HARD_FLOAT
+	  && outer == SET
+	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (4);
+      return true;
+
+    default:
+      *total = COSTS_N_INSNS (4);
+      return false;
+    }
+}
+
+/* Estimates the size cost of thumb1 instructions.
+   For now most of the code is copied from thumb1_rtx_costs. We need more
+   fine grain tuning when we have more related test cases.  */
+static inline int
+thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int words;
+
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
+
+    case PLUS:
+    case MINUS:
+      /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
+	 defined by RTL expansion, especially for the expansion of
+	 multiplication.  */
+      if ((GET_CODE (XEXP (x, 0)) == MULT
+	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
+	  || (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
+	return COSTS_N_INSNS (2);
+      /* On purpose fall through for normal RTX.  */
+    case COMPARE:
+    case NEG:
+    case NOT:
+      return COSTS_N_INSNS (1);
+
+    case MULT:
+      if (CONST_INT_P (XEXP (x, 1)))
+        {
+          /* Thumb1 mul instruction can't operate on const. We must Load it
+             into a register first.  */
+          int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
+          return COSTS_N_INSNS (1) + const_size;
+        }
+      return COSTS_N_INSNS (1);
+
+    case SET:
+      /* A SET doesn't have a mode, so let's look at the SET_DEST to get
+	 the mode.  */
+      words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
+      return (COSTS_N_INSNS (words)
+              + 4 * ((MEM_P (SET_SRC (x)))
+                     + MEM_P (SET_DEST (x))));
+
+    case CONST_INT:
+      if (outer == SET)
+        {
+          if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+            return COSTS_N_INSNS (1);
+	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
+	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
+            return COSTS_N_INSNS (2);
+	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
+          if (thumb_shiftable_const (INTVAL (x)))
+            return COSTS_N_INSNS (2);
+          return COSTS_N_INSNS (3);
+        }
+      else if ((outer == PLUS || outer == COMPARE)
+               && INTVAL (x) < 256 && INTVAL (x) > -256)
+        return 0;
+      else if ((outer == IOR || outer == XOR || outer == AND)
+               && INTVAL (x) < 256 && INTVAL (x) >= -256)
+        return COSTS_N_INSNS (1);
+      else if (outer == AND)
+        {
+          int i;
+          /* This duplicates the tests in the andsi3 expander.  */
+          for (i = 9; i <= 31; i++)
+            if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+                || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+              return COSTS_N_INSNS (2);
+        }
+      else if (outer == ASHIFT || outer == ASHIFTRT
+               || outer == LSHIFTRT)
+        return 0;
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+    case CONST_DOUBLE:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return COSTS_N_INSNS (3);
+
+    case UDIV:
+    case UMOD:
+    case DIV:
+    case MOD:
+      return 100;
+
+    case TRUNCATE:
+      return 99;
+
+    case AND:
+    case XOR:
+    case IOR:
+      /* XXX guess.  */
+      return 8;
+
+    case MEM:
+      /* XXX another guess.  */
+      /* Memory costs quite a lot for the first word, but subsequent words
+         load at the equivalent of a single insn each.  */
+      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+                 ? 4 : 0));
+
+    case IF_THEN_ELSE:
+      /* XXX a guess.  */
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+        return 14;
+      return 2;
+
+    case ZERO_EXTEND:
+      /* XXX still guessing.  */
+      switch (GET_MODE (XEXP (x, 0)))
+        {
+          case QImode:
+            return (1 + (mode == DImode ? 4 : 0)
+                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
+
+          case HImode:
+            return (4 + (mode == DImode ? 4 : 0)
+                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
+
+          case SImode:
+            return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
+
+          default:
+            return 99;
+        }
+
+    default:
+      return 99;
+    }
+}
+
+/* RTX costs when optimizing for size.  */
+static bool
+arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		    int *total)
+{
+  enum machine_mode mode = GET_MODE (x);
+  if (TARGET_THUMB1)
+    {
+      *total = thumb1_size_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
+  switch (code)
+    {
+    case MEM:
+      /* A memory access costs 1 insn if the mode is small, or the address is
+	 a single register, otherwise it costs one insn per word.  */
+      if (REG_P (XEXP (x, 0)))
+	*total = COSTS_N_INSNS (1);
+      else if (flag_pic
+	       && GET_CODE (XEXP (x, 0)) == PLUS
+	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
+	/* This will be split into two instructions.
+	   See arm.md:calculate_pic_address.  */
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      /* Needs a libcall, so it costs about this.  */
+      *total = COSTS_N_INSNS (2);
+      return false;
+
+    case ROTATE:
+      if (mode == SImode && REG_P (XEXP (x, 1)))
+	{
+	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
+	  return true;
+	}
+      /* Fall through */
+    case ROTATERT:
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
+	{
+	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
+	  return true;
+	}
+      else if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
+	  /* Slightly disparage register shifts, but not by much.  */
+	  if (!CONST_INT_P (XEXP (x, 1)))
+	    *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
+	  return true;
+	}
+
+      /* Needs a libcall.  */
+      *total = COSTS_N_INSNS (2);
+      return false;
+
+    case MINUS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
+	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
+
+	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
+	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
+	      || subcode1 == ROTATE || subcode1 == ROTATERT
+	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
+	      || subcode1 == ASHIFTRT)
+	    {
+	      /* It's just the cost of the two operands.  */
+	      *total = 0;
+	      return false;
+	    }
+
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return false;
+
+    case PLUS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      /* A shift as a part of ADD costs nothing.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
+	  *total += rtx_cost (XEXP (x, 1), code, 1, false);
+	  return true;
+	}
+
+      /* Fall through */
+    case AND: case XOR: case IOR:
+      if (mode == SImode)
+	{
+	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
+
+	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
+	      || subcode == LSHIFTRT || subcode == ASHIFTRT
+	      || (code == AND && subcode == NOT))
+	    {
+	      /* It's just the cost of the two operands.  */
+	      *total = 0;
+	      return false;
+	    }
+	}
+
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return false;
+
+    case MULT:
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return false;
+
+    case NEG:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      /* Fall through */
+    case NOT:
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+
+      return false;
+
+    case IF_THEN_ELSE:
+      *total = 0;
+      return false;
+
+    case COMPARE:
+      if (cc_register (XEXP (x, 0), VOIDmode))
+	* total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case ABS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      return arm_rtx_costs_1 (x, outer_code, total, 0);
+
+    case CONST_INT:
+      if (const_ok_for_arm (INTVAL (x)))
+	/* A multiplication by a constant requires another instruction
+	   to load the constant to a register.  */
+	*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
+				? 1 : 0);
+      else if (const_ok_for_arm (~INTVAL (x)))
+	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
+      else if (const_ok_for_arm (-INTVAL (x)))
+	{
+	  if (outer_code == COMPARE || outer_code == PLUS
+	      || outer_code == MINUS)
+	    *total = 0;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	}
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case CONST_VECTOR:
+      if (TARGET_NEON
+	  && TARGET_HARD_FLOAT
+	  && outer_code == SET
+	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (4);
+      return true;
+
+    case HIGH:
+    case LO_SUM:
+      /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
+	 cost of these slightly.  */
+      *total = COSTS_N_INSNS (1) + 1;
+      return true;
+
+    case SET:
+      return false;
+
+    default:
+      if (mode != VOIDmode)
+	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      else
+	*total = COSTS_N_INSNS (4); /* How knows?  */
+      return false;
+    }
+}
+
+/* Helper function for arm_rtx_costs.  If the operand is a valid shift
+   operand, then return the operand that is being shifted.  If the shift
+   is not by a constant, then set SHIFT_REG to point to the operand.
+   Return NULL if OP is not a shifter operand.  */
+static rtx
+shifter_op_p (rtx op, rtx *shift_reg)
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (code == MULT && CONST_INT_P (XEXP (op, 1))
+      && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
+    return XEXP (op, 0);
+  else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
+    return XEXP (op, 0);
+  else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
+	   || code == ASHIFTRT)
+    {
+      if (!CONST_INT_P (XEXP (op, 1)))
+	*shift_reg = XEXP (op, 1);
+      return XEXP (op, 0);
+    }
+
+  return NULL;
+}
+
+static bool
+arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
+{
+  const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
+  gcc_assert (GET_CODE (x) == UNSPEC);
+
+  switch (XINT (x, 1))
+    {
+    case UNSPEC_UNALIGNED_LOAD:
+      /* We can only do unaligned loads into the integer unit, and we can't
+	 use LDM or LDRD.  */
+      *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
+      if (speed_p)
+	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
+		  + extra_cost->ldst.load_unaligned);
+
+#ifdef NOT_YET
+      *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
+				 ADDR_SPACE_GENERIC, speed_p);
+#endif
+      return true;
+
+    case UNSPEC_UNALIGNED_STORE:
+      *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
+      if (speed_p)
+	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
+		  + extra_cost->ldst.store_unaligned);
+
+      *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
+#ifdef NOT_YET
+      *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
+				 ADDR_SPACE_GENERIC, speed_p);
+#endif
+      return true;
+
+    case UNSPEC_VRINTZ:
+    case UNSPEC_VRINTP:
+    case UNSPEC_VRINTM:
+    case UNSPEC_VRINTR:
+    case UNSPEC_VRINTX:
+    case UNSPEC_VRINTA:
+      *cost = COSTS_N_INSNS (1);
+      if (speed_p)
+        *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
+
+      return true;
+    default:
+      *cost = COSTS_N_INSNS (2);
+      break;
+    }
+  return false;
+}
+
+/* Cost of a libcall.  We assume one insn per argument, an amount for the
+   call (one insn for -Os) and then one for processing the result.  */
+#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
+
+#define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
+	do								\
+	  {								\
+	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
+	    if (shift_op != NULL					\
+	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
+	      {								\
+	        if (shift_reg)						\
+		  {							\
+		    if (speed_p)					\
+		      *cost += extra_cost->alu.arith_shift_reg;	\
+		    *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);	\
+		  }							\
+	        else if (speed_p)					\
+		  *cost += extra_cost->alu.arith_shift;		\
+									\
+		  *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)	\
+			  + rtx_cost (XEXP (x, 1 - IDX),		\
+			              OP, 1, speed_p));		\
+	        return true;						\
+	      }								\
+	  }								\
+	while (0);
+
+/* RTX costs.  Make an estimate of the cost of executing the operation
+   X, which is contained with an operation with code OUTER_CODE.
+   SPEED_P indicates whether the cost desired is the performance cost,
+   or the size cost.  The estimate is stored in COST and the return
+   value is TRUE if the cost calculation is final, or FALSE if the
+   caller should recurse through the operands of X to add additional
+   costs.
+
+   We currently make no attempt to model the size savings of Thumb-2
+   16-bit instructions.  At the normal points in compilation where
+   this code is called we have no measure of whether the condition
+   flags are live or not, and thus no realistic way to determine what
+   the size will eventually be.  */
+static bool
+arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		   const struct cpu_cost_table *extra_cost,
+		   int *cost, bool speed_p)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB1)
+    {
+      if (speed_p)
+	*cost = thumb1_rtx_costs (x, code, outer_code);
+      else
+	*cost = thumb1_size_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  switch (code)
+    {
+    case SET:
+      *cost = 0;
+      /* SET RTXs don't have a mode so we get it from the destination.  */
+      mode = GET_MODE (SET_DEST (x));
+
+      if (REG_P (SET_SRC (x))
+	  && REG_P (SET_DEST (x)))
+	{
+	  /* Assume that most copies can be done with a single insn,
+	     unless we don't have HW FP, in which case everything
+	     larger than word mode will require two insns.  */
+	  *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
+				   && GET_MODE_SIZE (mode) > 4)
+				  || mode == DImode)
+				 ? 2 : 1);
+	  /* Conditional register moves can be encoded
+	     in 16 bits in Thumb mode.  */
+	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
+	    *cost >>= 1;
+
+	  return true;
+	}
+
+      if (CONST_INT_P (SET_SRC (x)))
+	{
+	  /* Handle CONST_INT here, since the value doesn't have a mode
+	     and we would otherwise be unable to work out the true cost.  */
+	  *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
+	  outer_code = SET;
+	  /* Slightly lower the cost of setting a core reg to a constant.
+	     This helps break up chains and allows for better scheduling.  */
+	  if (REG_P (SET_DEST (x))
+	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
+	    *cost -= 1;
+	  x = SET_SRC (x);
+	  /* Immediate moves with an immediate in the range [0, 255] can be
+	     encoded in 16 bits in Thumb mode.  */
+	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
+	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
+	    *cost >>= 1;
+	  goto const_int_cost;
+	}
+
+      return false;
+
+    case MEM:
+      /* A memory access costs 1 insn if the mode is small, or the address is
+	 a single register, otherwise it costs one insn per word.  */
+      if (REG_P (XEXP (x, 0)))
+	*cost = COSTS_N_INSNS (1);
+      else if (flag_pic
+	       && GET_CODE (XEXP (x, 0)) == PLUS
+	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
+	/* This will be split into two instructions.
+	   See arm.md:calculate_pic_address.  */
+	*cost = COSTS_N_INSNS (2);
+      else
+	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+
+      /* For speed optimizations, add the costs of the address and
+	 accessing memory.  */
+      if (speed_p)
+#ifdef NOT_YET
+	*cost += (extra_cost->ldst.load
+		  + arm_address_cost (XEXP (x, 0), mode,
+				      ADDR_SPACE_GENERIC, speed_p));
+#else
+        *cost += extra_cost->ldst.load;
+#endif
+      return true;
+
+    case PARALLEL:
+    {
+   /* Calculations of LDM costs are complex.  We assume an initial cost
+   (ldm_1st) which will load the number of registers mentioned in
+   ldm_regs_per_insn_1st registers; then each additional
+   ldm_regs_per_insn_subsequent registers cost one more insn.  The
+   formula for N regs is thus:
+
+   ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
+			     + ldm_regs_per_insn_subsequent - 1)
+			    / ldm_regs_per_insn_subsequent).
+
+   Additional costs may also be added for addressing.  A similar
+   formula is used for STM.  */
+
+      bool is_ldm = load_multiple_operation (x, SImode);
+      bool is_stm = store_multiple_operation (x, SImode);
+
+      *cost = COSTS_N_INSNS (1);
+
+      if (is_ldm || is_stm)
+        {
+	  if (speed_p)
+	    {
+	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
+	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
+	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
+	                              : extra_cost->ldst.stm_regs_per_insn_1st;
+	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
+	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
+	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
+
+	      *cost += regs_per_insn_1st
+	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
+					    + regs_per_insn_sub - 1)
+					  / regs_per_insn_sub);
+	      return true;
+	    }
+
+        }
+      return false;
+    }
+    case DIV:
+    case UDIV:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	*cost = COSTS_N_INSNS (speed_p
+			       ? extra_cost->fp[mode != SFmode].div : 1);
+      else if (mode == SImode && TARGET_IDIV)
+	*cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
+      else
+	*cost = LIBCALL_COST (2);
+      return false;	/* All arguments must be in registers.  */
+
+    case MOD:
+    case UMOD:
+      *cost = LIBCALL_COST (2);
+      return false;	/* All arguments must be in registers.  */
+
+    case ROTATE:
+      if (mode == SImode && REG_P (XEXP (x, 1)))
+	{
+	  *cost = (COSTS_N_INSNS (2)
+		   + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+	  if (speed_p)
+	    *cost += extra_cost->alu.shift_reg;
+	  return true;
+	}
+      /* Fall through */
+    case ROTATERT:
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
+	{
+	  *cost = (COSTS_N_INSNS (3)
+		   + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.shift;
+	  return true;
+	}
+      else if (mode == SImode)
+	{
+	  *cost = (COSTS_N_INSNS (1)
+		   + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+	  /* Slightly disparage register shifts at -Os, but not by much.  */
+	  if (!CONST_INT_P (XEXP (x, 1)))
+	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
+		      + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+	  return true;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_INT
+	       && GET_MODE_SIZE (mode) < 4)
+	{
+	  if (code == ASHIFT)
+	    {
+	      *cost = (COSTS_N_INSNS (1)
+		       + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+	      /* Slightly disparage register shifts at -Os, but not by
+	         much.  */
+	      if (!CONST_INT_P (XEXP (x, 1)))
+		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
+			  + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+	    }
+	  else if (code == LSHIFTRT || code == ASHIFTRT)
+	    {
+	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
+		{
+		  /* Can use SBFX/UBFX.  */
+		  *cost = COSTS_N_INSNS (1);
+		  if (speed_p)
+		    *cost += extra_cost->alu.bfx;
+		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+		}
+	      else
+		{
+		  *cost = COSTS_N_INSNS (2);
+		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+		  if (speed_p)
+		    {
+		      if (CONST_INT_P (XEXP (x, 1)))
+			*cost += 2 * extra_cost->alu.shift;
+		      else
+			*cost += (extra_cost->alu.shift
+				  + extra_cost->alu.shift_reg);
+		    }
+		  else
+		    /* Slightly disparage register shifts.  */
+		    *cost += !CONST_INT_P (XEXP (x, 1));
+		}
+	    }
+	  else /* Rotates.  */
+	    {
+	      *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
+	      *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	      if (speed_p)
+		{
+		  if (CONST_INT_P (XEXP (x, 1)))
+		    *cost += (2 * extra_cost->alu.shift
+			      + extra_cost->alu.log_shift);
+		  else
+		    *cost += (extra_cost->alu.shift
+			      + extra_cost->alu.shift_reg
+			      + extra_cost->alu.log_shift_reg);
+		}
+	    }
+	  return true;
+	}
+
+      *cost = LIBCALL_COST (2);
+      return false;
+
+    case MINUS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      || GET_CODE (XEXP (x, 1)) == MULT)
+	    {
+	      rtx mul_op0, mul_op1, sub_op;
+
+	      if (speed_p)
+		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
+
+	      if (GET_CODE (XEXP (x, 0)) == MULT)
+		{
+		  mul_op0 = XEXP (XEXP (x, 0), 0);
+		  mul_op1 = XEXP (XEXP (x, 0), 1);
+		  sub_op = XEXP (x, 1);
+		}
+	      else
+		{
+		  mul_op0 = XEXP (XEXP (x, 1), 0);
+		  mul_op1 = XEXP (XEXP (x, 1), 1);
+		  sub_op = XEXP (x, 0);
+		}
+
+	      /* The first operand of the multiply may be optionally
+		 negated.  */
+	      if (GET_CODE (mul_op0) == NEG)
+		mul_op0 = XEXP (mul_op0, 0);
+
+	      *cost += (rtx_cost (mul_op0, code, 0, speed_p)
+			+ rtx_cost (mul_op1, code, 0, speed_p)
+			+ rtx_cost (sub_op, code, 0, speed_p));
+
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode != SFmode].addsub;
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  rtx shift_by_reg = NULL;
+	  rtx shift_op;
+	  rtx non_shift_op;
+
+	  *cost = COSTS_N_INSNS (1);
+
+	  shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
+	  if (shift_op == NULL)
+	    {
+	      shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
+	      non_shift_op = XEXP (x, 0);
+	    }
+	  else
+	    non_shift_op = XEXP (x, 1);
+
+	  if (shift_op != NULL)
+	    {
+	      if (shift_by_reg != NULL)
+		{
+		  if (speed_p)
+		    *cost += extra_cost->alu.arith_shift_reg;
+		  *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
+		}
+	      else if (speed_p)
+		*cost += extra_cost->alu.arith_shift;
+
+	      *cost += (rtx_cost (shift_op, code, 0, speed_p)
+			+ rtx_cost (non_shift_op, code, 0, speed_p));
+	      return true;
+	    }
+
+	  if (arm_arch_thumb2
+	      && GET_CODE (XEXP (x, 1)) == MULT)
+	    {
+	      /* MLS.  */
+	      if (speed_p)
+		*cost += extra_cost->mult[0].add;
+	      *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
+			+ rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
+			+ rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
+	      return true;
+	    }
+
+	  if (CONST_INT_P (XEXP (x, 0)))
+	    {
+	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
+					    INTVAL (XEXP (x, 0)), NULL_RTX,
+					    NULL_RTX, 1, 0);
+	      *cost = COSTS_N_INSNS (insns);
+	      if (speed_p)
+		*cost += insns * extra_cost->alu.arith;
+	      *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
+	      return true;
+	    }
+
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) < 4)
+	{
+	  rtx shift_op, shift_reg;
+	  shift_reg = NULL;
+
+	  /* We check both sides of the MINUS for shifter operands since,
+	     unlike PLUS, it's not commutative.  */
+
+	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
+	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
+
+	  /* Slightly disparage, as we might need to widen the result.  */
+	  *cost = 1 + COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.arith;
+
+	  if (CONST_INT_P (XEXP (x, 0)))
+	    {
+	      *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
+	      return true;
+	    }
+
+	  return false;
+	}
+
+      if (mode == DImode)
+	{
+	  *cost = COSTS_N_INSNS (2);
+
+	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
+	    {
+	      rtx op1 = XEXP (x, 1);
+
+	      if (speed_p)
+		*cost += 2 * extra_cost->alu.arith;
+
+	      if (GET_CODE (op1) == ZERO_EXTEND)
+		*cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
+	      else
+		*cost += rtx_cost (op1, MINUS, 1, speed_p);
+	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
+				 0, speed_p);
+	      return true;
+	    }
+	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	    {
+	      if (speed_p)
+		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
+	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
+				  0, speed_p)
+			+ rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
+	      return true;
+	    }
+	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
+	    {
+	      if (speed_p)
+		*cost += (extra_cost->alu.arith
+			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+			     ? extra_cost->alu.arith
+			     : extra_cost->alu.arith_shift));
+	      *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
+			+ rtx_cost (XEXP (XEXP (x, 1), 0),
+				    GET_CODE (XEXP (x, 1)), 0, speed_p));
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.arith;
+	  return false;
+	}
+
+      /* Vector mode?  */
+
+      *cost = LIBCALL_COST (2);
+      return false;
+
+    case PLUS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 0)) == MULT)
+	    {
+	      rtx mul_op0, mul_op1, add_op;
+
+	      if (speed_p)
+		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
+
+	      mul_op0 = XEXP (XEXP (x, 0), 0);
+	      mul_op1 = XEXP (XEXP (x, 0), 1);
+	      add_op = XEXP (x, 1);
+
+	      *cost += (rtx_cost (mul_op0, code, 0, speed_p)
+			+ rtx_cost (mul_op1, code, 0, speed_p)
+			+ rtx_cost (add_op, code, 0, speed_p));
+
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode != SFmode].addsub;
+	  return false;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  *cost = LIBCALL_COST (2);
+	  return false;
+	}
+
+	/* Narrow modes can be synthesized in SImode, but the range
+	   of useful sub-operations is limited.  Check for shift operations
+	   on one of the operands.  Only left shifts can be used in the
+	   narrow modes.  */
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) < 4)
+	{
+	  rtx shift_op, shift_reg;
+	  shift_reg = NULL;
+
+	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
+
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
+					    INTVAL (XEXP (x, 1)), NULL_RTX,
+					    NULL_RTX, 1, 0);
+	      *cost = COSTS_N_INSNS (insns);
+	      if (speed_p)
+		*cost += insns * extra_cost->alu.arith;
+	      /* Slightly penalize a narrow operation as the result may
+		 need widening.  */
+	      *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
+	      return true;
+	    }
+
+	  /* Slightly penalize a narrow operation as the result may
+	     need widening.  */
+	  *cost = 1 + COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.arith;
+
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  rtx shift_op, shift_reg;
+
+	  *cost = COSTS_N_INSNS (1);
+	  if (TARGET_INT_SIMD
+	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	    {
+	      /* UXTA[BH] or SXTA[BH].  */
+	      if (speed_p)
+		*cost += extra_cost->alu.extend_arith;
+	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
+				  speed_p)
+			+ rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
+	      return true;
+	    }
+
+	  shift_reg = NULL;
+	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
+	  if (shift_op != NULL)
+	    {
+	      if (shift_reg)
+		{
+		  if (speed_p)
+		    *cost += extra_cost->alu.arith_shift_reg;
+		  *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+		}
+	      else if (speed_p)
+		*cost += extra_cost->alu.arith_shift;
+
+	      *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+	      return true;
+	    }
+	  if (GET_CODE (XEXP (x, 0)) == MULT)
+	    {
+	      rtx mul_op = XEXP (x, 0);
+
+	      *cost = COSTS_N_INSNS (1);
+
+	      if (TARGET_DSP_MULTIPLY
+		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
+		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
+			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
+			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
+			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
+		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
+			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
+			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
+			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
+			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
+				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
+				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
+				      == 16))))))
+		{
+		  /* SMLA[BT][BT].  */
+		  if (speed_p)
+		    *cost += extra_cost->mult[0].extend_add;
+		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
+				      SIGN_EXTEND, 0, speed_p)
+			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
+					SIGN_EXTEND, 0, speed_p)
+			    + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+		  return true;
+		}
+
+	      if (speed_p)
+		*cost += extra_cost->mult[0].add;
+	      *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
+			+ rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
+			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+	      return true;
+	    }
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
+					    INTVAL (XEXP (x, 1)), NULL_RTX,
+					    NULL_RTX, 1, 0);
+	      *cost = COSTS_N_INSNS (insns);
+	      if (speed_p)
+		*cost += insns * extra_cost->alu.arith;
+	      *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
+	      return true;
+	    }
+	  return false;
+	}
+
+      if (mode == DImode)
+	{
+	  if (arm_arch3m
+	      && GET_CODE (XEXP (x, 0)) == MULT
+	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
+		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
+		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->mult[1].extend_add;
+	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+				  ZERO_EXTEND, 0, speed_p)
+			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
+				    ZERO_EXTEND, 0, speed_p)
+			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+	      return true;
+	    }
+
+	  *cost = COSTS_N_INSNS (2);
+
+	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	    {
+	      if (speed_p)
+		*cost += (extra_cost->alu.arith
+			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+			     ? extra_cost->alu.arith
+			     : extra_cost->alu.arith_shift));
+
+	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
+				  speed_p)
+			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.arith;
+	  return false;
+	}
+
+      /* Vector mode?  */
+      *cost = LIBCALL_COST (2);
+      return false;
+
+    case AND: case XOR: case IOR:
+      if (mode == SImode)
+	{
+	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
+	  rtx op0 = XEXP (x, 0);
+	  rtx shift_op, shift_reg;
+
+	  *cost = COSTS_N_INSNS (1);
+
+	  if (subcode == NOT
+	      && (code == AND
+		  || (code == IOR && TARGET_THUMB2)))
+	    op0 = XEXP (op0, 0);
+
+	  shift_reg = NULL;
+	  shift_op = shifter_op_p (op0, &shift_reg);
+	  if (shift_op != NULL)
+	    {
+	      if (shift_reg)
+		{
+		  if (speed_p)
+		    *cost += extra_cost->alu.log_shift_reg;
+		  *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+		}
+	      else if (speed_p)
+		*cost += extra_cost->alu.log_shift;
+
+	      *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+			+ rtx_cost (XEXP (x, 1), code, 1, speed_p));
+	      return true;
+	    }
+
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
+					    INTVAL (XEXP (x, 1)), NULL_RTX,
+					    NULL_RTX, 1, 0);
+
+	      *cost = COSTS_N_INSNS (insns);
+	      if (speed_p)
+		*cost += insns * extra_cost->alu.logical;
+	      *cost += rtx_cost (op0, code, 0, speed_p);
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += extra_cost->alu.logical;
+	  *cost += (rtx_cost (op0, code, 0, speed_p)
+		    + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+	  return true;
+	}
+
+      if (mode == DImode)
+	{
+	  rtx op0 = XEXP (x, 0);
+	  enum rtx_code subcode = GET_CODE (op0);
+
+	  *cost = COSTS_N_INSNS (2);
+
+	  if (subcode == NOT
+	      && (code == AND
+		  || (code == IOR && TARGET_THUMB2)))
+	    op0 = XEXP (op0, 0);
+
+	  if (GET_CODE (op0) == ZERO_EXTEND)
+	    {
+	      if (speed_p)
+		*cost += 2 * extra_cost->alu.logical;
+
+	      *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
+			+ rtx_cost (XEXP (x, 1), code, 0, speed_p));
+	      return true;
+	    }
+	  else if (GET_CODE (op0) == SIGN_EXTEND)
+	    {
+	      if (speed_p)
+		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
+
+	      *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
+			+ rtx_cost (XEXP (x, 1), code, 0, speed_p));
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.logical;
+
+	  return true;
+	}
+      /* Vector mode?  */
+
+      *cost = LIBCALL_COST (2);
+      return false;
+
+    case MULT:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  rtx op0 = XEXP (x, 0);
+
+	  *cost = COSTS_N_INSNS (1);
+
+	  if (GET_CODE (op0) == NEG)
+	    op0 = XEXP (op0, 0);
+
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode != SFmode].mult;
+
+	  *cost += (rtx_cost (op0, MULT, 0, speed_p)
+		    + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
+	  return true;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  *cost = LIBCALL_COST (2);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (TARGET_DSP_MULTIPLY
+	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
+			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
+		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
+		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
+		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
+			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+			      && (INTVAL (XEXP (XEXP (x, 1), 1))
+				  == 16))))))
+	    {
+	      /* SMUL[TB][TB].  */
+	      if (speed_p)
+		*cost += extra_cost->mult[0].extend;
+	      *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
+			+ rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
+	      return true;
+	    }
+	  if (speed_p)
+	    *cost += extra_cost->mult[0].simple;
+	  return false;
+	}
+
+      if (mode == DImode)
+	{
+	  if (arm_arch3m
+	      && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+		   && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+		  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+		      && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->mult[1].extend;
+	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
+				  ZERO_EXTEND, 0, speed_p)
+			+ rtx_cost (XEXP (XEXP (x, 1), 0),
+				    ZERO_EXTEND, 0, speed_p));
+	      return true;
+	    }
+
+	  *cost = LIBCALL_COST (2);
+	  return false;
+	}
+
+      /* Vector mode?  */
+      *cost = LIBCALL_COST (2);
+      return false;
+
+    case NEG:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode != SFmode].neg;
+
+	  return false;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  *cost = LIBCALL_COST (1);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == ABS)
+	    {
+	      *cost = COSTS_N_INSNS (2);
+	      /* Assume the non-flag-changing variant.  */
+	      if (speed_p)
+		*cost += (extra_cost->alu.log_shift
+			  + extra_cost->alu.arith_shift);
+	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
+	      return true;
+	    }
+
+	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+	    {
+	      *cost = COSTS_N_INSNS (2);
+	      /* No extra cost for MOV imm and MVN imm.  */
+	      /* If the comparison op is using the flags, there's no further
+		 cost, otherwise we need to add the cost of the comparison.  */
+	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
+		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
+		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
+		{
+		  *cost += (COSTS_N_INSNS (1)
+			    + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
+					speed_p)
+			    + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
+					speed_p));
+		  if (speed_p)
+		    *cost += extra_cost->alu.arith;
+		}
+	      return true;
+	    }
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.arith;
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) < 4)
+	{
+	  /* Slightly disparage, as we might need an extend operation.  */
+	  *cost = 1 + COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.arith;
+	  return false;
+	}
+
+      if (mode == DImode)
+	{
+	  *cost = COSTS_N_INSNS (2);
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.arith;
+	  return false;
+	}
+
+      /* Vector mode?  */
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case NOT:
+      if (mode == SImode)
+	{
+	  rtx shift_op;
+	  rtx shift_reg = NULL;
+
+	  *cost = COSTS_N_INSNS (1);
+	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
+
+	  if (shift_op)
+	    {
+	      if (shift_reg != NULL)
+		{
+		  if (speed_p)
+		    *cost += extra_cost->alu.log_shift_reg;
+		  *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+		}
+	      else if (speed_p)
+		*cost += extra_cost->alu.log_shift;
+	      *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
+	      return true;
+	    }
+
+	  if (speed_p)
+	    *cost += extra_cost->alu.logical;
+	  return false;
+	}
+      if (mode == DImode)
+	{
+	  *cost = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+      /* Vector mode?  */
+
+      *cost += LIBCALL_COST (1);
+      return false;
+
+    case IF_THEN_ELSE:
+      {
+        if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+	  {
+	    *cost = COSTS_N_INSNS (4);
+	    return true;
+	  }
+	int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
+	int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
+
+	*cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
+	/* Assume that if one arm of the if_then_else is a register,
+	   that it will be tied with the result and eliminate the
+	   conditional insn.  */
+	if (REG_P (XEXP (x, 1)))
+	  *cost += op2cost;
+	else if (REG_P (XEXP (x, 2)))
+	  *cost += op1cost;
+	else
+	  {
+	    if (speed_p)
+	      {
+		if (extra_cost->alu.non_exec_costs_exec)
+		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
+		else
+		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
+	      }
+	    else
+	      *cost += op1cost + op2cost;
+	  }
+      }
+      return true;
+
+    case COMPARE:
+      if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
+	*cost = 0;
+      else
+	{
+	  enum machine_mode op0mode;
+	  /* We'll mostly assume that the cost of a compare is the cost of the
+	     LHS.  However, there are some notable exceptions.  */
+
+	  /* Floating point compares are never done as side-effects.  */
+	  op0mode = GET_MODE (XEXP (x, 0));
+	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
+	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->fp[op0mode != SFmode].compare;
+
+	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
+		{
+		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+		  return true;
+		}
+
+	      return false;
+	    }
+	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
+	    {
+	      *cost = LIBCALL_COST (2);
+	      return false;
+	    }
+
+	  /* DImode compares normally take two insns.  */
+	  if (op0mode == DImode)
+	    {
+	      *cost = COSTS_N_INSNS (2);
+	      if (speed_p)
+		*cost += 2 * extra_cost->alu.arith;
+	      return false;
+	    }
+
+	  if (op0mode == SImode)
+	    {
+	      rtx shift_op;
+	      rtx shift_reg;
+
+	      if (XEXP (x, 1) == const0_rtx
+		  && !(REG_P (XEXP (x, 0))
+		       || (GET_CODE (XEXP (x, 0)) == SUBREG
+			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
+		{
+		  *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
+
+		  /* Multiply operations that set the flags are often
+		     significantly more expensive.  */
+		  if (speed_p
+		      && GET_CODE (XEXP (x, 0)) == MULT
+		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+		    *cost += extra_cost->mult[0].flag_setting;
+
+		  if (speed_p
+		      && GET_CODE (XEXP (x, 0)) == PLUS
+		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
+							    0), 1), mode))
+		    *cost += extra_cost->mult[0].flag_setting;
+		  return true;
+		}
+
+	      shift_reg = NULL;
+	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
+	      if (shift_op != NULL)
+		{
+		  *cost = COSTS_N_INSNS (1);
+		  if (shift_reg != NULL)
+		    {
+		      *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+		      if (speed_p)
+			*cost += extra_cost->alu.arith_shift_reg;
+		    }
+		  else if (speed_p)
+		    *cost += extra_cost->alu.arith_shift;
+		  *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+			    + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
+		  return true;
+		}
+
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->alu.arith;
+	      if (CONST_INT_P (XEXP (x, 1))
+		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
+		{
+		  *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
+		  return true;
+		}
+	      return false;
+	    }
+
+	  /* Vector mode?  */
+
+	  *cost = LIBCALL_COST (2);
+	  return false;
+	}
+      return true;
+
+    case EQ:
+    case NE:
+    case LT:
+    case LE:
+    case GT:
+    case GE:
+    case LTU:
+    case LEU:
+    case GEU:
+    case GTU:
+    case ORDERED:
+    case UNORDERED:
+    case UNEQ:
+    case UNLE:
+    case UNLT:
+    case UNGE:
+    case UNGT:
+    case LTGT:
+      if (outer_code == SET)
+	{
+	  /* Is it a store-flag operation?  */
+	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
+	      && XEXP (x, 1) == const0_rtx)
+	    {
+	      /* Thumb also needs an IT insn.  */
+	      *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
+	      return true;
+	    }
+	  if (XEXP (x, 1) == const0_rtx)
+	    {
+	      switch (code)
+		{
+		case LT:
+		  /* LSR Rd, Rn, #31.  */
+		  *cost = COSTS_N_INSNS (1);
+		  if (speed_p)
+		    *cost += extra_cost->alu.shift;
+		  break;
+
+		case EQ:
+		  /* RSBS T1, Rn, #0
+		     ADC  Rd, Rn, T1.  */
+
+		case NE:
+		  /* SUBS T1, Rn, #1
+		     SBC  Rd, Rn, T1.  */
+		  *cost = COSTS_N_INSNS (2);
+		  break;
+
+		case LE:
+		  /* RSBS T1, Rn, Rn, LSR #31
+		     ADC  Rd, Rn, T1. */
+		  *cost = COSTS_N_INSNS (2);
+		  if (speed_p)
+		    *cost += extra_cost->alu.arith_shift;
+		  break;
+
+		case GT:
+		  /* RSB  Rd, Rn, Rn, ASR #1
+		     LSR  Rd, Rd, #31.  */
+		  *cost = COSTS_N_INSNS (2);
+		  if (speed_p)
+		    *cost += (extra_cost->alu.arith_shift
+			      + extra_cost->alu.shift);
+		  break;
+
+		case GE:
+		  /* ASR  Rd, Rn, #31
+		     ADD  Rd, Rn, #1.  */
+		  *cost = COSTS_N_INSNS (2);
+		  if (speed_p)
+		    *cost += extra_cost->alu.shift;
+		  break;
+
+		default:
+		  /* Remaining cases are either meaningless or would take
+		     three insns anyway.  */
+		  *cost = COSTS_N_INSNS (3);
+		  break;
+		}
+	      *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	      return true;
+	    }
+	  else
+	    {
+	      *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
+	      if (CONST_INT_P (XEXP (x, 1))
+		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
+		{
+		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+		  return true;
+		}
+
+	      return false;
+	    }
+	}
+      /* Not directly inside a set.  If it involves the condition code
+	 register it must be the condition for a branch, cond_exec or
+	 I_T_E operation.  Since the comparison is performed elsewhere
+	 this is just the control part which has no additional
+	 cost.  */
+      else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
+	       && XEXP (x, 1) == const0_rtx)
+	{
+	  *cost = 0;
+	  return true;
+	}
+      return false;
+
+    case ABS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode != SFmode].neg;
+
+	  return false;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  *cost = LIBCALL_COST (1);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
+	  return false;
+	}
+      /* Vector mode?  */
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case SIGN_EXTEND:
+      if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
+	  && MEM_P (XEXP (x, 0)))
+	{
+	  *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
+
+	  if (mode == DImode)
+	    *cost += COSTS_N_INSNS (1);
+
+	  if (!speed_p)
+	    return true;
+
+	  if (GET_MODE (XEXP (x, 0)) == SImode)
+	    *cost += extra_cost->ldst.load;
+	  else
+	    *cost += extra_cost->ldst.load_sign_extend;
+
+	  if (mode == DImode)
+	    *cost += extra_cost->alu.shift;
+
+	  return true;
+	}
+
+      /* Widening from less than 32-bits requires an extend operation.  */
+      if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
+	{
+	  /* We have SXTB/SXTH.  */
+	  *cost = COSTS_N_INSNS (1);
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  if (speed_p)
+	    *cost += extra_cost->alu.extend;
+	}
+      else if (GET_MODE (XEXP (x, 0)) != SImode)
+	{
+	  /* Needs two shifts.  */
+	  *cost = COSTS_N_INSNS (2);
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.shift;
+	}
+
+      /* Widening beyond 32-bits requires one more insn.  */
+      if (mode == DImode)
+	{
+	  *cost += COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.shift;
+	}
+
+      return true;
+
+    case ZERO_EXTEND:
+      if ((arm_arch4
+	   || GET_MODE (XEXP (x, 0)) == SImode
+	   || GET_MODE (XEXP (x, 0)) == QImode)
+	  && MEM_P (XEXP (x, 0)))
+	{
+	  *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
+
+	  if (mode == DImode)
+	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
+
+	  return true;
+	}
+
+      /* Widening from less than 32-bits requires an extend operation.  */
+      if (GET_MODE (XEXP (x, 0)) == QImode)
+	{
+	  /* UXTB can be a shorter instruction in Thumb2, but it might
+	     be slower than the AND Rd, Rn, #255 alternative.  When
+	     optimizing for speed it should never be slower to use
+	     AND, and we don't really model 16-bit vs 32-bit insns
+	     here.  */
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.logical;
+	}
+      else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
+	{
+	  /* We have UXTB/UXTH.  */
+	  *cost = COSTS_N_INSNS (1);
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  if (speed_p)
+	    *cost += extra_cost->alu.extend;
+	}
+      else if (GET_MODE (XEXP (x, 0)) != SImode)
+	{
+	  /* Needs two shifts.  It's marginally preferable to use
+	     shifts rather than two BIC instructions as the second
+	     shift may merge with a subsequent insn as a shifter
+	     op.  */
+	  *cost = COSTS_N_INSNS (2);
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.shift;
+	}
+      else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
+        *cost = COSTS_N_INSNS (1);
+
+      /* Widening beyond 32-bits requires one more insn.  */
+      if (mode == DImode)
+	{
+	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
+	}
+
+      return true;
+
+    case CONST_INT:
+      *cost = 0;
+      /* CONST_INT has no mode, so we cannot tell for sure how many
+	 insns are really going to be needed.  The best we can do is
+	 look at the value passed.  If it fits in SImode, then assume
+	 that's the mode it will be used for.  Otherwise assume it
+	 will be used in DImode.  */
+      if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
+	mode = SImode;
+      else
+	mode = DImode;
+
+      /* Avoid blowing up in arm_gen_constant ().  */
+      if (!(outer_code == PLUS
+	    || outer_code == AND
+	    || outer_code == IOR
+	    || outer_code == XOR
+	    || outer_code == MINUS))
+	outer_code = SET;
+
+    const_int_cost:
+      if (mode == SImode)
+	{
+	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
+						    INTVAL (x), NULL, NULL,
+						    0, 0));
+	  /* Extra costs?  */
+	}
+      else
+	{
+	  *cost += COSTS_N_INSNS (arm_gen_constant
+				  (outer_code, SImode, NULL,
+				   trunc_int_for_mode (INTVAL (x), SImode),
+				   NULL, NULL, 0, 0)
+				  + arm_gen_constant (outer_code, SImode, NULL,
+						      INTVAL (x) >> 32, NULL,
+						      NULL, 0, 0));
+	  /* Extra costs?  */
+	}
+
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (speed_p)
+	{
+	  if (arm_arch_thumb2 && !flag_pic)
+	    *cost = COSTS_N_INSNS (2);
+	  else
+	    *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
+	}
+      else
+	*cost = COSTS_N_INSNS (2);
+
+      if (flag_pic)
+	{
+	  *cost += COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.arith;
+	}
+
+      return true;
+
+    case CONST_FIXED:
+      *cost = COSTS_N_INSNS (4);
+      /* Fixme.  */
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  if (vfp3_const_double_rtx (x))
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->fp[mode == DFmode].fpconst;
+	      return true;
+	    }
+
+	  if (speed_p)
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (mode == DFmode)
+		*cost += extra_cost->ldst.loadd;
+	      else
+		*cost += extra_cost->ldst.loadf;
+	    }
+	  else
+	    *cost = COSTS_N_INSNS (2 + (mode == DFmode));
+
+	  return true;
+	}
+      *cost = COSTS_N_INSNS (4);
+      return true;
+
+    case CONST_VECTOR:
+      /* Fixme.  */
+      if (TARGET_NEON
+	  && TARGET_HARD_FLOAT
+	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
+	*cost = COSTS_N_INSNS (1);
+      else
+	*cost = COSTS_N_INSNS (4);
+      return true;
+
+    case HIGH:
+    case LO_SUM:
+      *cost = COSTS_N_INSNS (1);
+      /* When optimizing for size, we prefer constant pool entries to
+	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
+      if (!speed_p)
+	*cost += 1;
+      return true;
+
+    case CLZ:
+      *cost = COSTS_N_INSNS (1);
+      if (speed_p)
+	*cost += extra_cost->alu.clz;
+      return false;
+
+    case SMIN:
+      if (XEXP (x, 1) == const0_rtx)
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.log_shift;
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  return true;
+	}
+      /* Fall through.  */
+    case SMAX:
+    case UMIN:
+    case UMAX:
+      *cost = COSTS_N_INSNS (2);
+      return false;
+
+    case TRUNCATE:
+      if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
+	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
+		      == ZERO_EXTEND))))
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->mult[1].extend;
+	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
+			      speed_p)
+		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
+				0, speed_p));
+	  return true;
+	}
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case UNSPEC:
+      return arm_unspec_cost (x, outer_code, speed_p, cost);
+
+    case PC:
+      /* Reading the PC is like reading any other register.  Writing it
+	 is more expensive, but we take that into account elsewhere.  */
+      *cost = 0;
+      return true;
+
+    case ZERO_EXTRACT:
+      /* TODO: Simple zero_extract of bottom bits using AND.  */
+      /* Fall through.  */
+    case SIGN_EXTRACT:
+      if (arm_arch6
+	  && mode == SImode
+	  && CONST_INT_P (XEXP (x, 1))
+	  && CONST_INT_P (XEXP (x, 2)))
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->alu.bfx;
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  return true;
+	}
+      /* Without UBFX/SBFX, need to resort to shift operations.  */
+      *cost = COSTS_N_INSNS (2);
+      if (speed_p)
+	*cost += 2 * extra_cost->alu.shift;
+      *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
+      return true;
+
+    case FLOAT_EXTEND:
+      if (TARGET_HARD_FLOAT)
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode == DFmode].widen;
+	  if (!TARGET_FPU_ARMV8
+	      && GET_MODE (XEXP (x, 0)) == HFmode)
+	    {
+	      /* Pre v8, widening HF->DF is a two-step process, first
+	         widening to SFmode.  */
+	      *cost += COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->fp[0].widen;
+	    }
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  return true;
+	}
+
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case FLOAT_TRUNCATE:
+      if (TARGET_HARD_FLOAT)
+	{
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode == DFmode].narrow;
+	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	  return true;
+	  /* Vector modes?  */
+	}
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case FIX:
+    case UNSIGNED_FIX:
+      if (TARGET_HARD_FLOAT)
+	{
+	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
+	      /* Strip of the 'cost' of rounding towards zero.  */
+	      if (GET_CODE (XEXP (x, 0)) == FIX)
+		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
+	      else
+		*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+	      /* ??? Increase the cost to deal with transferring from
+		 FP -> CORE registers?  */
+	      return true;
+	    }
+	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+		   && TARGET_FPU_ARMV8)
+	    {
+	      *cost = COSTS_N_INSNS (1);
+	      if (speed_p)
+		*cost += extra_cost->fp[mode == DFmode].roundint;
+	      return false;
+	    }
+	  /* Vector costs? */
+	}
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+      if (TARGET_HARD_FLOAT)
+	{
+	  /* ??? Increase the cost to deal with transferring from CORE
+	     -> FP registers?  */
+	  *cost = COSTS_N_INSNS (1);
+	  if (speed_p)
+	    *cost += extra_cost->fp[mode == DFmode].fromint;
+	  return false;
+	}
+      *cost = LIBCALL_COST (1);
+      return false;
+
+    case CALL:
+      *cost = COSTS_N_INSNS (1);
+      return true;
+
+    case ASM_OPERANDS:
+      /* Just a guess.  Cost one insn per input.  */
+      *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
+      return true;
+
+    default:
+      if (mode != VOIDmode)
+	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      else
+	*cost = COSTS_N_INSNS (4); /* Who knows?  */
+      return false;
+    }
+}
+
+#undef HANDLE_NARROW_SHIFT_ARITH
+
+/* RTX costs when optimizing for size.  */
+static bool
+arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+	       int *total, bool speed)
+{
+  bool result;
+
+  if (TARGET_OLD_RTX_COSTS
+      || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
+    {
+      /* Old way.  (Deprecated.)  */
+      if (!speed)
+	result = arm_size_rtx_costs (x, (enum rtx_code) code,
+				     (enum rtx_code) outer_code, total);
+      else
+	result = current_tune->rtx_costs (x,  (enum rtx_code) code,
+					  (enum rtx_code) outer_code, total,
+					  speed);
+    }
+  else
+    {
+    /* New way.  */
+      if (current_tune->insn_extra_cost)
+        result =  arm_new_rtx_costs (x, (enum rtx_code) code,
+				     (enum rtx_code) outer_code,
+				     current_tune->insn_extra_cost,
+				     total, speed);
+    /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
+       && current_tune->insn_extra_cost != NULL  */
+      else
+        result =  arm_new_rtx_costs (x, (enum rtx_code) code,
+				    (enum rtx_code) outer_code,
+				    &generic_extra_costs, total, speed);
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      print_rtl_single (dump_file, x);
+      fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
+	       *total, result ? "final" : "partial");
+    }
+  return result;
+}
+
+/* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
+   supported on any "slowmul" cores, so it can be ignored.  */
+
+static bool
+arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		       int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB)
+    {
+      *total = thumb1_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  switch (code)
+    {
+    case MULT:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT
+	  || mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+
+      if (CONST_INT_P (XEXP (x, 1)))
+	{
+	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
+				      & (unsigned HOST_WIDE_INT) 0xffffffff);
+	  int cost, const_ok = const_ok_for_arm (i);
+	  int j, booth_unit_size;
+
+	  /* Tune as appropriate.  */
+	  cost = const_ok ? 4 : 8;
+	  booth_unit_size = 2;
+	  for (j = 0; i && j < 32; j += booth_unit_size)
+	    {
+	      i >>= booth_unit_size;
+	      cost++;
+	    }
+
+	  *total = COSTS_N_INSNS (cost);
+	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
+	  return true;
+	}
+
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);;
+    }
+}
+
+
+/* RTX cost for cores with a fast multiply unit (M variants).  */
+
+static bool
+arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		       int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB1)
+    {
+      *total = thumb1_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  /* ??? should thumb2 use different costs?  */
+  switch (code)
+    {
+    case MULT:
+      /* There is no point basing this on the tuning, since it is always the
+	 fast variant if it exists at all.  */
+      if (mode == DImode
+	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS(2);
+	  return false;
+	}
+
+
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (5);
+	  return false;
+	}
+
+      if (CONST_INT_P (XEXP (x, 1)))
+	{
+	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
+				      & (unsigned HOST_WIDE_INT) 0xffffffff);
+	  int cost, const_ok = const_ok_for_arm (i);
+	  int j, booth_unit_size;
+
+	  /* Tune as appropriate.  */
+	  cost = const_ok ? 4 : 8;
+	  booth_unit_size = 8;
+	  for (j = 0; i && j < 32; j += booth_unit_size)
+	    {
+	      i >>= booth_unit_size;
+	      cost++;
+	    }
+
+	  *total = COSTS_N_INSNS(cost);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	}
+
+      /* Requires a lib call */
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);
+    }
+}
+
+
+/* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
+   so it can be ignored.  */
+
+static bool
+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		      int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB)
+    {
+      *total = thumb1_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  switch (code)
+    {
+    case COMPARE:
+      if (GET_CODE (XEXP (x, 0)) != MULT)
+	return arm_rtx_costs_1 (x, outer_code, total, speed);
+
+      /* A COMPARE of a MULT is slow on XScale; the muls instruction
+	 will stall until the multiplication is complete.  */
+      *total = COSTS_N_INSNS (3);
+      return false;
+
+    case MULT:
+      /* There is no point basing this on the tuning, since it is always the
+	 fast variant if it exists at all.  */
+      if (mode == DImode
+	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (5);
+	  return false;
+	}
+
+      if (CONST_INT_P (XEXP (x, 1)))
+	{
+	  /* If operand 1 is a constant we can more accurately
+	     calculate the cost of the multiply.  The multiplier can
+	     retire 15 bits on the first cycle and a further 12 on the
+	     second.  We do, of course, have to load the constant into
+	     a register first.  */
+	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+	  /* There's a general overhead of one cycle.  */
+	  int cost = 1;
+	  unsigned HOST_WIDE_INT masked_const;
+
+	  if (i & 0x80000000)
+	    i = ~i;
+
+	  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
+	  masked_const = i & 0xffff8000;
+	  if (masked_const != 0)
+	    {
+	      cost++;
+	      masked_const = i & 0xf8000000;
+	      if (masked_const != 0)
+		cost++;
+	    }
+	  *total = COSTS_N_INSNS (cost);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return false;
+	}
+
+      /* Requires a lib call */
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);
+    }
+}
+
+
+/* RTX costs for 9e (and later) cores.  */
+
+static bool
+arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		  int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB1)
+    {
+      switch (code)
+	{
+	case MULT:
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+
+	default:
+	  *total = thumb1_rtx_costs (x, code, outer_code);
+	  return true;
+	}
+    }
+
+  switch (code)
+    {
+    case MULT:
+      /* There is no point basing this on the tuning, since it is always the
+	 fast variant if it exists at all.  */
+      if (mode == DImode
+	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (5);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	}
+
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);
+    }
+}
+/* All address computations that can be done are free, but rtx cost returns
+   the same for practically all of them.  So we weight the different types
+   of address here in the order (most pref first):
+   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
+static inline int
+arm_arm_address_cost (rtx x)
+{
+  enum rtx_code c  = GET_CODE (x);
+
+  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
+    return 0;
+  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
+    return 10;
+
+  if (c == PLUS)
+    {
+      if (CONST_INT_P (XEXP (x, 1)))
+	return 2;
+
+      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
+	return 3;
+
+      return 4;
+    }
+
+  return 6;
+}
+
+static inline int
+arm_thumb_address_cost (rtx x)
+{
+  enum rtx_code c  = GET_CODE (x);
+
+  if (c == REG)
+    return 1;
+  if (c == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1)))
+    return 1;
+
+  return 2;
+}
+
+static int
+arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
+{
+  return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
+}
+
+/* Adjust cost hook for XScale.  */
+static bool
+xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+  /* Some true dependencies can have a higher cost depending
+     on precisely how certain input operands are used.  */
+  if (REG_NOTE_KIND(link) == 0
+      && recog_memoized (insn) >= 0
+      && recog_memoized (dep) >= 0)
+    {
+      int shift_opnum = get_attr_shift (insn);
+      enum attr_type attr_type = get_attr_type (dep);
+
+      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
+	 operand for INSN.  If we have a shifted input operand and the
+	 instruction we depend on is another ALU instruction, then we may
+	 have to account for an additional stall.  */
+      if (shift_opnum != 0
+	  && (attr_type == TYPE_ALU_SHIFT_IMM
+	      || attr_type == TYPE_ALUS_SHIFT_IMM
+	      || attr_type == TYPE_LOGIC_SHIFT_IMM
+	      || attr_type == TYPE_LOGICS_SHIFT_IMM
+	      || attr_type == TYPE_ALU_SHIFT_REG
+	      || attr_type == TYPE_ALUS_SHIFT_REG
+	      || attr_type == TYPE_LOGIC_SHIFT_REG
+	      || attr_type == TYPE_LOGICS_SHIFT_REG
+	      || attr_type == TYPE_MOV_SHIFT
+	      || attr_type == TYPE_MVN_SHIFT
+	      || attr_type == TYPE_MOV_SHIFT_REG
+	      || attr_type == TYPE_MVN_SHIFT_REG))
+	{
+	  rtx shifted_operand;
+	  int opno;
+
+	  /* Get the shifted operand.  */
+	  extract_insn (insn);
+	  shifted_operand = recog_data.operand[shift_opnum];
+
+	  /* Iterate over all the operands in DEP.  If we write an operand
+	     that overlaps with SHIFTED_OPERAND, then we have increase the
+	     cost of this dependency.  */
+	  extract_insn (dep);
+	  preprocess_constraints ();
+	  for (opno = 0; opno < recog_data.n_operands; opno++)
+	    {
+	      /* We can ignore strict inputs.  */
+	      if (recog_data.operand_type[opno] == OP_IN)
+		continue;
+
+	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
+					   shifted_operand))
+		{
+		  *cost = 2;
+		  return false;
+		}
+	    }
+	}
+    }
+  return true;
+}
+
+/* Adjust cost hook for Cortex A9.  */
+static bool
+cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+  switch (REG_NOTE_KIND (link))
+    {
+    case REG_DEP_ANTI:
+      *cost = 0;
+      return false;
+
+    case REG_DEP_TRUE:
+    case REG_DEP_OUTPUT:
+	if (recog_memoized (insn) >= 0
+	    && recog_memoized (dep) >= 0)
+	  {
+	    if (GET_CODE (PATTERN (insn)) == SET)
+	      {
+		if (GET_MODE_CLASS
+		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
+		  || GET_MODE_CLASS
+		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
+		  {
+		    enum attr_type attr_type_insn = get_attr_type (insn);
+		    enum attr_type attr_type_dep = get_attr_type (dep);
+
+		    /* By default all dependencies of the form
+		       s0 = s0 <op> s1
+		       s0 = s0 <op> s2
+		       have an extra latency of 1 cycle because
+		       of the input and output dependency in this
+		       case. However this gets modeled as an true
+		       dependency and hence all these checks.  */
+		    if (REG_P (SET_DEST (PATTERN (insn)))
+			&& REG_P (SET_DEST (PATTERN (dep)))
+			&& reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
+						    SET_DEST (PATTERN (dep))))
+		      {
+			/* FMACS is a special case where the dependent
+			   instruction can be issued 3 cycles before
+			   the normal latency in case of an output
+			   dependency.  */
+			if ((attr_type_insn == TYPE_FMACS
+			     || attr_type_insn == TYPE_FMACD)
+			    && (attr_type_dep == TYPE_FMACS
+				|| attr_type_dep == TYPE_FMACD))
+			  {
+			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+			      *cost = insn_default_latency (dep) - 3;
+			    else
+			      *cost = insn_default_latency (dep);
+			    return false;
+			  }
+			else
+			  {
+			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+			      *cost = insn_default_latency (dep) + 1;
+			    else
+			      *cost = insn_default_latency (dep);
+			  }
+			return false;
+		      }
+		  }
+	      }
+	  }
+	break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return true;
+}
+
+/* Adjust cost hook for FA726TE.  */
+static bool
+fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+  /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
+     have penalty of 3.  */
+  if (REG_NOTE_KIND (link) == REG_DEP_TRUE
+      && recog_memoized (insn) >= 0
+      && recog_memoized (dep) >= 0
+      && get_attr_conds (dep) == CONDS_SET)
+    {
+      /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
+      if (get_attr_conds (insn) == CONDS_USE
+          && get_attr_type (insn) != TYPE_BRANCH)
+        {
+          *cost = 3;
+          return false;
+        }
+
+      if (GET_CODE (PATTERN (insn)) == COND_EXEC
+          || get_attr_conds (insn) == CONDS_USE)
+        {
+          *cost = 0;
+          return false;
+        }
+    }
+
+  return true;
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.
+
+   Moves between VFP_REGS and GENERAL_REGS are a single insn, but
+   it is typically more expensive than a single memory access.  We set
+   the cost to less than two memory accesses so that floating
+   point to integer conversion does not go through memory.  */
+
+int
+arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			reg_class_t from, reg_class_t to)
+{
+  if (TARGET_32BIT)
+    {
+      if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
+	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
+	return 15;
+      else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
+	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
+	return 4;
+      else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
+	return 20;
+      else
+	return 2;
+    }
+  else
+    {
+      if (from == HI_REGS || to == HI_REGS)
+	return 4;
+      else
+	return 2;
+    }
+}
+
+/* Implement TARGET_MEMORY_MOVE_COST.  */
+
+int
+arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+		      bool in ATTRIBUTE_UNUSED)
+{
+  if (TARGET_32BIT)
+    return 10;
+  else
+    {
+      if (GET_MODE_SIZE (mode) < 4)
+	return 8;
+      else
+	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
+    }
+}
+
+/* Vectorizer cost model implementation.  */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+				tree vectype,
+				int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+        return current_tune->vec_costs->scalar_stmt_cost;
+
+      case scalar_load:
+        return current_tune->vec_costs->scalar_load_cost;
+
+      case scalar_store:
+        return current_tune->vec_costs->scalar_store_cost;
+
+      case vector_stmt:
+        return current_tune->vec_costs->vec_stmt_cost;
+
+      case vector_load:
+        return current_tune->vec_costs->vec_align_load_cost;
+
+      case vector_store:
+        return current_tune->vec_costs->vec_store_cost;
+
+      case vec_to_scalar:
+        return current_tune->vec_costs->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+        return current_tune->vec_costs->scalar_to_vec_cost;
+
+      case unaligned_load:
+        return current_tune->vec_costs->vec_unalign_load_cost;
+
+      case unaligned_store:
+        return current_tune->vec_costs->vec_unalign_store_cost;
+
+      case cond_branch_taken:
+        return current_tune->vec_costs->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+        return current_tune->vec_costs->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+        return current_tune->vec_costs->vec_stmt_cost;
+
+      case vec_construct:
+	elements = TYPE_VECTOR_SUBPARTS (vectype);
+	return elements / 2 + 1;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+
+static unsigned
+arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		   struct _stmt_vec_info *stmt_info, int misalign,
+		   enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+	 vectorized are weighted more heavily.  The value here is
+	 arbitrary and could potentially be improved with analysis.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+	count *= 50;  /* FIXME.  */
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
+
+/* Return true if and only if this insn can dual-issue only as older.  */
+static bool
+cortexa7_older_only (rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+    return false;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU_REG:
+    case TYPE_ALUS_REG:
+    case TYPE_LOGIC_REG:
+    case TYPE_LOGICS_REG:
+    case TYPE_ADC_REG:
+    case TYPE_ADCS_REG:
+    case TYPE_ADR:
+    case TYPE_BFM:
+    case TYPE_REV:
+    case TYPE_MVN_REG:
+    case TYPE_SHIFT_IMM:
+    case TYPE_SHIFT_REG:
+    case TYPE_LOAD_BYTE:
+    case TYPE_LOAD1:
+    case TYPE_STORE1:
+    case TYPE_FFARITHS:
+    case TYPE_FADDS:
+    case TYPE_FFARITHD:
+    case TYPE_FADDD:
+    case TYPE_FMOV:
+    case TYPE_F_CVT:
+    case TYPE_FCMPS:
+    case TYPE_FCMPD:
+    case TYPE_FCONSTS:
+    case TYPE_FCONSTD:
+    case TYPE_FMULS:
+    case TYPE_FMACS:
+    case TYPE_FMULD:
+    case TYPE_FMACD:
+    case TYPE_FDIVS:
+    case TYPE_FDIVD:
+    case TYPE_F_MRC:
+    case TYPE_F_MRRC:
+    case TYPE_F_FLAG:
+    case TYPE_F_LOADS:
+    case TYPE_F_STORES:
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Return true if and only if this insn can dual-issue as younger.  */
+static bool
+cortexa7_younger (FILE *file, int verbose, rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+    {
+      if (verbose > 5)
+        fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
+      return false;
+    }
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU_IMM:
+    case TYPE_ALUS_IMM:
+    case TYPE_LOGIC_IMM:
+    case TYPE_LOGICS_IMM:
+    case TYPE_EXTEND:
+    case TYPE_MVN_IMM:
+    case TYPE_MOV_IMM:
+    case TYPE_MOV_REG:
+    case TYPE_MOV_SHIFT:
+    case TYPE_MOV_SHIFT_REG:
+    case TYPE_BRANCH:
+    case TYPE_CALL:
+      return true;
+    default:
+      return false;
+    }
+}
+
+
+/* Look for an instruction that can dual issue only as an older
+   instruction, and move it in front of any instructions that can
+   dual-issue as younger, while preserving the relative order of all
+   other instructions in the ready list.  This is a hueuristic to help
+   dual-issue in later cycles, by postponing issue of more flexible
+   instructions.  This heuristic may affect dual issue opportunities
+   in the current cycle.  */
+static void
+cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+                        int clock)
+{
+  int i;
+  int first_older_only = -1, first_younger = -1;
+
+  if (verbose > 5)
+    fprintf (file,
+             ";; sched_reorder for cycle %d with %d insns in ready list\n",
+             clock,
+             *n_readyp);
+
+  /* Traverse the ready list from the head (the instruction to issue
+     first), and looking for the first instruction that can issue as
+     younger and the first instruction that can dual-issue only as
+     older.  */
+  for (i = *n_readyp - 1; i >= 0; i--)
+    {
+      rtx insn = ready[i];
+      if (cortexa7_older_only (insn))
+        {
+          first_older_only = i;
+          if (verbose > 5)
+            fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
+          break;
+        }
+      else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
+        first_younger = i;
+    }
+
+  /* Nothing to reorder because either no younger insn found or insn
+     that can dual-issue only as older appears before any insn that
+     can dual-issue as younger.  */
+  if (first_younger == -1)
+    {
+      if (verbose > 5)
+        fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
+      return;
+    }
+
+  /* Nothing to reorder because no older-only insn in the ready list.  */
+  if (first_older_only == -1)
+    {
+      if (verbose > 5)
+        fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
+      return;
+    }
+
+  /* Move first_older_only insn before first_younger.  */
+  if (verbose > 5)
+    fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
+             INSN_UID(ready [first_older_only]),
+             INSN_UID(ready [first_younger]));
+  rtx first_older_only_insn = ready [first_older_only];
+  for (i = first_older_only; i < first_younger; i++)
+    {
+      ready[i] = ready[i+1];
+    }
+
+  ready[i] = first_older_only_insn;
+  return;
+}
+
+/* Implement TARGET_SCHED_REORDER. */
+static int
+arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+                   int clock)
+{
+  switch (arm_tune)
+    {
+    case cortexa7:
+      cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
+      break;
+    default:
+      /* Do nothing for other cores.  */
+      break;
+    }
+
+  return arm_issue_rate ();
+}
+
+/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
+   It corrects the value of COST based on the relationship between
+   INSN and DEP through the dependence LINK.  It returns the new
+   value. There is a per-core adjust_cost hook to adjust scheduler costs
+   and the per-core hook can choose to completely override the generic
+   adjust_cost function. Only put bits of code into arm_adjust_cost that
+   are common across all cores.  */
+static int
+arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
+{
+  rtx i_pat, d_pat;
+
+ /* When generating Thumb-1 code, we want to place flag-setting operations
+    close to a conditional branch which depends on them, so that we can
+    omit the comparison. */
+  if (TARGET_THUMB1
+      && REG_NOTE_KIND (link) == 0
+      && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
+      && recog_memoized (dep) >= 0
+      && get_attr_conds (dep) == CONDS_SET)
+    return 0;
+
+  if (current_tune->sched_adjust_cost != NULL)
+    {
+      if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
+	return cost;
+    }
+
+  /* XXX Is this strictly true?  */
+  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+    return 0;
+
+  /* Call insns don't incur a stall, even if they follow a load.  */
+  if (REG_NOTE_KIND (link) == 0
+      && CALL_P (insn))
+    return 1;
+
+  if ((i_pat = single_set (insn)) != NULL
+      && MEM_P (SET_SRC (i_pat))
+      && (d_pat = single_set (dep)) != NULL
+      && MEM_P (SET_DEST (d_pat)))
+    {
+      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
+      /* This is a load after a store, there is no conflict if the load reads
+	 from a cached area.  Assume that loads from the stack, and from the
+	 constant pool are cached, and that others will miss.  This is a
+	 hack.  */
+
+      if ((GET_CODE (src_mem) == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (src_mem))
+	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
+	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
+	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
+	return 1;
+    }
+
+  return cost;
+}
+
+int
+arm_max_conditional_execute (void)
+{
+  return max_insns_skipped;
+}
+
+static int
+arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
+{
+  if (TARGET_32BIT)
+    return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
+  else
+    return (optimize > 0) ? 2 : 0;
+}
+
+static int
+arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
+{
+  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
+}
+
+/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
+   on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
+   sequences of non-executed instructions in IT blocks probably take the same
+   amount of time as executed instructions (and the IT instruction itself takes
+   space in icache).  This function was experimentally determined to give good
+   results on a popular embedded benchmark.  */
+
+static int
+arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
+{
+  return (TARGET_32BIT && speed_p) ? 1
+         : arm_default_branch_cost (speed_p, predictable_p);
+}
+
+static bool fp_consts_inited = false;
+
+static REAL_VALUE_TYPE value_fp0;
+
+static void
+init_fp_table (void)
+{
+  REAL_VALUE_TYPE r;
+
+  r = REAL_VALUE_ATOF ("0", DFmode);
+  value_fp0 = r;
+  fp_consts_inited = true;
+}
+
+/* Return TRUE if rtx X is a valid immediate FP constant.  */
+int
+arm_const_double_rtx (rtx x)
+{
+  REAL_VALUE_TYPE r;
+
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+  if (REAL_VALUE_MINUS_ZERO (r))
+    return 0;
+
+  if (REAL_VALUES_EQUAL (r, value_fp0))
+    return 1;
+
+  return 0;
+}
+
+/* VFPv3 has a fairly wide range of representable immediates, formed from
+   "quarter-precision" floating-point values. These can be evaluated using this
+   formula (with ^ for exponentiation):
+
+     -1^s * n * 2^-r
+
+   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
+   16 <= n <= 31 and 0 <= r <= 7.
+
+   These values are mapped onto an 8-bit integer ABCDEFGH s.t.
+
+     - A (most-significant) is the sign bit.
+     - BCD are the exponent (encoded as r XOR 3).
+     - EFGH are the mantissa (encoded as n - 16).
+*/
+
+/* Return an integer index for a VFPv3 immediate operand X suitable for the
+   fconst[sd] instruction, or -1 if X isn't suitable.  */
+static int
+vfp3_const_double_index (rtx x)
+{
+  REAL_VALUE_TYPE r, m;
+  int sign, exponent;
+  unsigned HOST_WIDE_INT mantissa, mant_hi;
+  unsigned HOST_WIDE_INT mask;
+  HOST_WIDE_INT m1, m2;
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+
+  if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
+    return -1;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* We can't represent these things, so detect them first.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
+    return -1;
+
+  /* Extract sign, exponent and mantissa.  */
+  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
+  r = real_value_abs (&r);
+  exponent = REAL_EXP (&r);
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
+     bits for the mantissa, this may fail (low bits would be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  REAL_VALUE_TO_INT (&m1, &m2, m);
+  mantissa = m1;
+  mant_hi = m2;
+
+  /* If there are bits set in the low part of the mantissa, we can't
+     represent this value.  */
+  if (mantissa != 0)
+    return -1;
+
+  /* Now make it so that mantissa contains the most-significant bits, and move
+     the point_pos to indicate that the least-significant bits have been
+     discarded.  */
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+  mantissa = mant_hi;
+
+  /* We can permit four significant bits of mantissa only, plus a high bit
+     which is always 1.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return -1;
+
+  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* The mantissa may be zero. Disallow that case. (It's possible to load the
+     floating-point immediate zero with Neon using an integer-zero load, but
+     that case is handled elsewhere.)  */
+  if (mantissa == 0)
+    return -1;
+
+  gcc_assert (mantissa >= 16 && mantissa <= 31);
+
+  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
+     normalized significands are in the range [1, 2). (Our mantissa is shifted
+     left 4 places at this point relative to normalized IEEE754 values).  GCC
+     internally uses [0.5, 1) (see real.c), so the exponent returned from
+     REAL_EXP must be altered.  */
+  exponent = 5 - exponent;
+
+  if (exponent < 0 || exponent > 7)
+    return -1;
+
+  /* Sign, mantissa and exponent are now in the correct form to plug into the
+     formula described in the comment above.  */
+  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
+}
+
+/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
+int
+vfp3_const_double_rtx (rtx x)
+{
+  if (!TARGET_VFP3)
+    return 0;
+
+  return vfp3_const_double_index (x) != -1;
+}
+
+/* Recognize immediates which can be used in various Neon instructions. Legal
+   immediates are described by the following table (for VMVN variants, the
+   bitwise inverse of the constant shown is recognized. In either case, VMOV
+   is output and the correct instruction to use for a given constant is chosen
+   by the assembler). The constant shown is replicated across all elements of
+   the destination vector.
+
+   insn elems variant constant (binary)
+   ---- ----- ------- -----------------
+   vmov  i32     0    00000000 00000000 00000000 abcdefgh
+   vmov  i32     1    00000000 00000000 abcdefgh 00000000
+   vmov  i32     2    00000000 abcdefgh 00000000 00000000
+   vmov  i32     3    abcdefgh 00000000 00000000 00000000
+   vmov  i16     4    00000000 abcdefgh
+   vmov  i16     5    abcdefgh 00000000
+   vmvn  i32     6    00000000 00000000 00000000 abcdefgh
+   vmvn  i32     7    00000000 00000000 abcdefgh 00000000
+   vmvn  i32     8    00000000 abcdefgh 00000000 00000000
+   vmvn  i32     9    abcdefgh 00000000 00000000 00000000
+   vmvn  i16    10    00000000 abcdefgh
+   vmvn  i16    11    abcdefgh 00000000
+   vmov  i32    12    00000000 00000000 abcdefgh 11111111
+   vmvn  i32    13    00000000 00000000 abcdefgh 11111111
+   vmov  i32    14    00000000 abcdefgh 11111111 11111111
+   vmvn  i32    15    00000000 abcdefgh 11111111 11111111
+   vmov   i8    16    abcdefgh
+   vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
+                      eeeeeeee ffffffff gggggggg hhhhhhhh
+   vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
+   vmov  f32    19    00000000 00000000 00000000 00000000
+
+   For case 18, B = !b. Representable values are exactly those accepted by
+   vfp3_const_double_index, but are output as floating-point numbers rather
+   than indices.
+
+   For case 19, we will change it to vmov.i32 when assembling.
+
+   Variants 0-5 (inclusive) may also be used as immediates for the second
+   operand of VORR/VBIC instructions.
+
+   The INVERSE argument causes the bitwise inverse of the given operand to be
+   recognized instead (used for recognizing legal immediates for the VAND/VORN
+   pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
+   *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
+   output, rather than the real insns vbic/vorr).
+
+   INVERSE makes no difference to the recognition of float vectors.
+
+   The return value is the variant of immediate as shown in the above table, or
+   -1 if the given value doesn't match any of the listed patterns.
+*/
+static int
+neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
+		      rtx *modconst, int *elementwidth)
+{
+#define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
+  matches = 1;					\
+  for (i = 0; i < idx; i += (STRIDE))		\
+    if (!(TEST))				\
+      matches = 0;				\
+  if (matches)					\
+    {						\
+      immtype = (CLASS);			\
+      elsize = (ELSIZE);			\
+      break;					\
+    }
+
+  unsigned int i, elsize = 0, idx = 0, n_elts;
+  unsigned int innersize;
+  unsigned char bytes[16];
+  int immtype = -1, matches;
+  unsigned int invmask = inverse ? 0xff : 0;
+  bool vector = GET_CODE (op) == CONST_VECTOR;
+
+  if (vector)
+    {
+      n_elts = CONST_VECTOR_NUNITS (op);
+      innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+    }
+  else
+    {
+      n_elts = 1;
+      if (mode == VOIDmode)
+	mode = DImode;
+      innersize = GET_MODE_SIZE (mode);
+    }
+
+  /* Vectors of float constants.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      rtx el0 = CONST_VECTOR_ELT (op, 0);
+      REAL_VALUE_TYPE r0;
+
+      if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
+        return -1;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
+
+      for (i = 1; i < n_elts; i++)
+        {
+          rtx elt = CONST_VECTOR_ELT (op, i);
+          REAL_VALUE_TYPE re;
+
+          REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
+
+          if (!REAL_VALUES_EQUAL (r0, re))
+            return -1;
+        }
+
+      if (modconst)
+        *modconst = CONST_VECTOR_ELT (op, 0);
+
+      if (elementwidth)
+        *elementwidth = 0;
+
+      if (el0 == CONST0_RTX (GET_MODE (el0)))
+	return 19;
+      else
+	return 18;
+    }
+
+  /* Splat vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
+      unsigned HOST_WIDE_INT elpart;
+      unsigned int part, parts;
+
+      if (CONST_INT_P (el))
+        {
+          elpart = INTVAL (el);
+          parts = 1;
+        }
+      else if (CONST_DOUBLE_P (el))
+        {
+          elpart = CONST_DOUBLE_LOW (el);
+          parts = 2;
+        }
+      else
+        gcc_unreachable ();
+
+      for (part = 0; part < parts; part++)
+        {
+          unsigned int byte;
+          for (byte = 0; byte < innersize; byte++)
+            {
+              bytes[idx++] = (elpart & 0xff) ^ invmask;
+              elpart >>= BITS_PER_UNIT;
+            }
+          if (CONST_DOUBLE_P (el))
+            elpart = CONST_DOUBLE_HIGH (el);
+        }
+    }
+
+  /* Sanity check.  */
+  gcc_assert (idx == GET_MODE_SIZE (mode));
+
+  do
+    {
+      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
+
+      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
+
+      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
+
+      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
+
+      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
+
+      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
+
+      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+      CHECK (1, 8, 16, bytes[i] == bytes[0]);
+
+      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+			&& bytes[i] == bytes[(i + 8) % idx]);
+    }
+  while (0);
+
+  if (immtype == -1)
+    return -1;
+
+  if (elementwidth)
+    *elementwidth = elsize;
+
+  if (modconst)
+    {
+      unsigned HOST_WIDE_INT imm = 0;
+
+      /* Un-invert bytes of recognized vector, if necessary.  */
+      if (invmask != 0)
+        for (i = 0; i < idx; i++)
+          bytes[i] ^= invmask;
+
+      if (immtype == 17)
+        {
+          /* FIXME: Broken on 32-bit H_W_I hosts.  */
+          gcc_assert (sizeof (HOST_WIDE_INT) == 8);
+
+          for (i = 0; i < 8; i++)
+            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
+                   << (i * BITS_PER_UNIT);
+
+          *modconst = GEN_INT (imm);
+        }
+      else
+        {
+          unsigned HOST_WIDE_INT imm = 0;
+
+          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+
+          *modconst = GEN_INT (imm);
+        }
+    }
+
+  return immtype;
+#undef CHECK
+}
+
+/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
+   VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
+   float elements), and a modified constant (whatever should be output for a
+   VMOV) in *MODCONST.  */
+
+int
+neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
+			       rtx *modconst, int *elementwidth)
+{
+  rtx tmpconst;
+  int tmpwidth;
+  int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
+
+  if (retval == -1)
+    return 0;
+
+  if (modconst)
+    *modconst = tmpconst;
+
+  if (elementwidth)
+    *elementwidth = tmpwidth;
+
+  return 1;
+}
+
+/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
+   the immediate is valid, write a constant suitable for using as an operand
+   to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
+   *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
+
+int
+neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
+				rtx *modconst, int *elementwidth)
+{
+  rtx tmpconst;
+  int tmpwidth;
+  int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
+
+  if (retval < 0 || retval > 5)
+    return 0;
+
+  if (modconst)
+    *modconst = tmpconst;
+
+  if (elementwidth)
+    *elementwidth = tmpwidth;
+
+  return 1;
+}
+
+/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
+   the immediate is valid, write a constant suitable for using as an operand
+   to VSHR/VSHL to *MODCONST and the corresponding element width to
+   *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
+   because they have different limitations.  */
+
+int
+neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
+				rtx *modconst, int *elementwidth,
+				bool isleftshift)
+{
+  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
+  unsigned HOST_WIDE_INT last_elt = 0;
+  unsigned HOST_WIDE_INT maxshift;
+
+  /* Split vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = CONST_VECTOR_ELT (op, i);
+      unsigned HOST_WIDE_INT elpart;
+
+      if (CONST_INT_P (el))
+        elpart = INTVAL (el);
+      else if (CONST_DOUBLE_P (el))
+        return 0;
+      else
+        gcc_unreachable ();
+
+      if (i != 0 && elpart != last_elt)
+        return 0;
+
+      last_elt = elpart;
+    }
+
+  /* Shift less than element size.  */
+  maxshift = innersize * 8;
+
+  if (isleftshift)
+    {
+      /* Left shift immediate value can be from 0 to <size>-1.  */
+      if (last_elt >= maxshift)
+        return 0;
+    }
+  else
+    {
+      /* Right shift immediate value can be from 1 to <size>.  */
+      if (last_elt == 0 || last_elt > maxshift)
+	return 0;
+    }
+
+  if (elementwidth)
+    *elementwidth = innersize * 8;
+
+  if (modconst)
+    *modconst = CONST_VECTOR_ELT (op, 0);
+
+  return 1;
+}
+
+/* Return a string suitable for output of Neon immediate logic operation
+   MNEM.  */
+
+char *
+neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
+			     int inverse, int quad)
+{
+  int width, is_valid;
+  static char templ[40];
+
+  is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
+
+  gcc_assert (is_valid != 0);
+
+  if (quad)
+    sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
+  else
+    sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
+
+  return templ;
+}
+
+/* Return a string suitable for output of Neon immediate shift operation
+   (VSHR or VSHL) MNEM.  */
+
+char *
+neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
+			     enum machine_mode mode, int quad,
+			     bool isleftshift)
+{
+  int width, is_valid;
+  static char templ[40];
+
+  is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
+  gcc_assert (is_valid != 0);
+
+  if (quad)
+    sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
+  else
+    sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
+
+  return templ;
+}
+
+/* Output a sequence of pairwise operations to implement a reduction.
+   NOTE: We do "too much work" here, because pairwise operations work on two
+   registers-worth of operands in one go. Unfortunately we can't exploit those
+   extra calculations to do the full operation in fewer steps, I don't think.
+   Although all vector elements of the result but the first are ignored, we
+   actually calculate the same result in each of the elements. An alternative
+   such as initially loading a vector with zero to use as each of the second
+   operands would use up an additional register and take an extra instruction,
+   for no particular gain.  */
+
+void
+neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
+		      rtx (*reduc) (rtx, rtx, rtx))
+{
+  enum machine_mode inner = GET_MODE_INNER (mode);
+  unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
+  rtx tmpsum = op1;
+
+  for (i = parts / 2; i >= 1; i /= 2)
+    {
+      rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
+      emit_insn (reduc (dest, tmpsum, tmpsum));
+      tmpsum = dest;
+    }
+}
+
+/* If VALS is a vector constant that can be loaded into a register
+   using VDUP, generate instructions to do so and return an RTX to
+   assign to the register.  Otherwise return NULL_RTX.  */
+
+static rtx
+neon_vdup_constant (rtx vals)
+{
+  enum machine_mode mode = GET_MODE (vals);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  bool all_same = true;
+  rtx x;
+  int i;
+
+  if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
+    return NULL_RTX;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (!all_same)
+    /* The elements are not all the same.  We could handle repeating
+       patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
+       {0, C, 0, C, 0, C, 0, C} which can be loaded using
+       vdup.i16).  */
+    return NULL_RTX;
+
+  /* We can load this constant by using VDUP and a constant in a
+     single ARM register.  This will be cheaper than a vector
+     load.  */
+
+  x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+  return gen_rtx_VEC_DUPLICATE (mode, x);
+}
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+   constants (for vec_init) or CONST_VECTOR, efficiently into a
+   register.  Returns an RTX to copy into the register, or NULL_RTX
+   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
+
+rtx
+neon_make_constant (rtx vals)
+{
+  enum machine_mode mode = GET_MODE (vals);
+  rtx target;
+  rtx const_vec = NULL_RTX;
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_const = 0;
+  int i;
+
+  if (GET_CODE (vals) == CONST_VECTOR)
+    const_vec = vals;
+  else if (GET_CODE (vals) == PARALLEL)
+    {
+      /* A CONST_VECTOR must contain only CONST_INTs and
+	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+	 Only store valid constants in a CONST_VECTOR.  */
+      for (i = 0; i < n_elts; ++i)
+	{
+	  rtx x = XVECEXP (vals, 0, i);
+	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+	    n_const++;
+	}
+      if (n_const == n_elts)
+	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+    }
+  else
+    gcc_unreachable ();
+
+  if (const_vec != NULL
+      && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
+    /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
+    return const_vec;
+  else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+    /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
+       pipeline cycle; creating the constant takes one or two ARM
+       pipeline cycles.  */
+    return target;
+  else if (const_vec != NULL_RTX)
+    /* Load from constant pool.  On Cortex-A8 this takes two cycles
+       (for either double or quad vectors).  We can not take advantage
+       of single-cycle VLD1 because we need a PC-relative addressing
+       mode.  */
+    return const_vec;
+  else
+    /* A PARALLEL containing something not valid inside CONST_VECTOR.
+       We can not construct an initializer.  */
+    return NULL_RTX;
+}
+
+/* Initialize vector TARGET to VALS.  */
+
+void
+neon_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true;
+  rtx x, mem;
+  int i;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!CONSTANT_P (x))
+	++n_var, one_var = i;
+
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (n_var == 0)
+    {
+      rtx constant = neon_make_constant (vals);
+      if (constant != NULL_RTX)
+	{
+	  emit_move_insn (target, constant);
+	  return;
+	}
+    }
+
+  /* Splat a single non-constant element if we can.  */
+  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+    {
+      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_DUPLICATE (mode, x)));
+      return;
+    }
+
+  /* One field is non-constant.  Load constant then overwrite varying
+     field.  This is more efficient than using the stack.  */
+  if (n_var == 1)
+    {
+      rtx copy = copy_rtx (vals);
+      rtx index = GEN_INT (one_var);
+
+      /* Load constant part of vector, substitute neighboring value for
+	 varying element.  */
+      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+      neon_expand_vector_init (target, copy);
+
+      /* Insert variable.  */
+      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+      switch (mode)
+	{
+	case V8QImode:
+	  emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
+	  break;
+	case V16QImode:
+	  emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
+	  break;
+	case V4HImode:
+	  emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
+	  break;
+	case V8HImode:
+	  emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
+	  break;
+	case V2SImode:
+	  emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
+	  break;
+	case V4SImode:
+	  emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
+	  break;
+	case V2SFmode:
+	  emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
+	  break;
+	case V4SFmode:
+	  emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
+	  break;
+	case V2DImode:
+	  emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+
+  /* Construct the vector in memory one field at a time
+     and load the whole vector.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner_mode,
+				    i * GET_MODE_SIZE (inner_mode)),
+		    XVECEXP (vals, 0, i));
+  emit_move_insn (target, mem);
+}
+
+/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
+   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
+   reported source locations are bogus.  */
+
+static void
+bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+	      const char *err)
+{
+  HOST_WIDE_INT lane;
+
+  gcc_assert (CONST_INT_P (operand));
+
+  lane = INTVAL (operand);
+
+  if (lane < low || lane >= high)
+    error (err);
+}
+
+/* Bounds-check lanes.  */
+
+void
+neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  bounds_check (operand, low, high, "lane out of range");
+}
+
+/* Bounds-check constants.  */
+
+void
+neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  bounds_check (operand, low, high, "constant out of range");
+}
+
+HOST_WIDE_INT
+neon_element_bits (enum machine_mode mode)
+{
+  if (mode == DImode)
+    return GET_MODE_BITSIZE (mode);
+  else
+    return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+}
+
+
+/* Predicates for `match_operand' and `match_operator'.  */
+
+/* Return TRUE if OP is a valid coprocessor memory address pattern.
+   WB is true if full writeback address modes are allowed and is false
+   if limited writeback address modes (POST_INC and PRE_DEC) are
+   allowed.  */
+
+int
+arm_coproc_mem_operand (rtx op, bool wb)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed || lra_in_progress)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return FALSE;
+
+  /* Constants are converted into offsets from labels.  */
+  if (!MEM_P (op))
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (REG_P (ind))
+    return arm_address_register_rtx_p (ind, 0);
+
+  /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
+     acceptable in any case (subject to verification by
+     arm_address_register_rtx_p).  We need WB to be true to accept
+     PRE_INC and POST_DEC.  */
+  if (GET_CODE (ind) == POST_INC
+      || GET_CODE (ind) == PRE_DEC
+      || (wb
+	  && (GET_CODE (ind) == PRE_INC
+	      || GET_CODE (ind) == POST_DEC)))
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+  if (wb
+      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
+      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
+      && GET_CODE (XEXP (ind, 1)) == PLUS
+      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
+    ind = XEXP (ind, 1);
+
+  /* Match:
+     (plus (reg)
+	   (const)).  */
+  if (GET_CODE (ind) == PLUS
+      && REG_P (XEXP (ind, 0))
+      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+      && CONST_INT_P (XEXP (ind, 1))
+      && INTVAL (XEXP (ind, 1)) > -1024
+      && INTVAL (XEXP (ind, 1)) <  1024
+      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Return TRUE if OP is a memory operand which we can load or store a vector
+   to/from. TYPE is one of the following values:
+    0 - Vector load/stor (vldr)
+    1 - Core registers (ldm)
+    2 - Element/structure loads (vld1)
+ */
+int
+neon_vector_mem_operand (rtx op, int type, bool strict)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return !strict;
+
+  /* Constants are converted into offsets from labels.  */
+  if (!MEM_P (op))
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (REG_P (ind))
+    return arm_address_register_rtx_p (ind, 0);
+
+  /* Allow post-increment with Neon registers.  */
+  if ((type != 1 && GET_CODE (ind) == POST_INC)
+      || (type == 0 && GET_CODE (ind) == PRE_DEC))
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+  /* FIXME: vld1 allows register post-modify.  */
+
+  /* Match:
+     (plus (reg)
+          (const)).  */
+  if (type == 0
+      && GET_CODE (ind) == PLUS
+      && REG_P (XEXP (ind, 0))
+      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+      && CONST_INT_P (XEXP (ind, 1))
+      && INTVAL (XEXP (ind, 1)) > -1024
+      /* For quad modes, we restrict the constant offset to be slightly less
+	 than what the instruction format permits.  We have no such constraint
+	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
+      && (INTVAL (XEXP (ind, 1))
+	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
+      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
+   type.  */
+int
+neon_struct_mem_operand (rtx op)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return FALSE;
+
+  /* Constants are converted into offsets from labels.  */
+  if (!MEM_P (op))
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (REG_P (ind))
+    return arm_address_register_rtx_p (ind, 0);
+
+  /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
+  if (GET_CODE (ind) == POST_INC
+      || GET_CODE (ind) == PRE_DEC)
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+  return FALSE;
+}
+
+/* Return true if X is a register that will be eliminated later on.  */
+int
+arm_eliminable_register (rtx x)
+{
+  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+		       || REGNO (x) == ARG_POINTER_REGNUM
+		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+}
+
+/* Return GENERAL_REGS if a scratch register required to reload x to/from
+   coprocessor registers.  Otherwise return NO_REGS.  */
+
+enum reg_class
+coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
+{
+  if (mode == HFmode)
+    {
+      if (!TARGET_NEON_FP16)
+	return GENERAL_REGS;
+      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
+	return NO_REGS;
+      return GENERAL_REGS;
+    }
+
+  /* The neon move patterns handle all legitimate vector and struct
+     addresses.  */
+  if (TARGET_NEON
+      && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
+      && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+	  || VALID_NEON_STRUCT_MODE (mode)))
+    return NO_REGS;
+
+  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
+    return NO_REGS;
+
+  return GENERAL_REGS;
+}
+
+/* Values which must be returned in the most-significant end of the return
+   register.  */
+
+static bool
+arm_return_in_msb (const_tree valtype)
+{
+  return (TARGET_AAPCS_BASED
+          && BYTES_BIG_ENDIAN
+	  && (AGGREGATE_TYPE_P (valtype)
+	      || TREE_CODE (valtype) == COMPLEX_TYPE
+	      || FIXED_POINT_TYPE_P (valtype)));
+}
+
+/* Return TRUE if X references a SYMBOL_REF.  */
+int
+symbol_mentioned_p (rtx x)
+{
+  const char * fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return 1;
+
+  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
+     are constant offsets, not symbols.  */
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return TRUE if X references a LABEL_REF.  */
+int
+label_mentioned_p (rtx x)
+{
+  const char * fmt;
+  int i;
+
+  if (GET_CODE (x) == LABEL_REF)
+    return 1;
+
+  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
+     instruction, but they are constant offsets, not symbols.  */
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (label_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+tls_mentioned_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      return tls_mentioned_p (XEXP (x, 0));
+
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_TLS)
+	return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Must not copy any rtx that uses a pc-relative address.  */
+
+static int
+arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == UNSPEC
+      && (XINT (*x, 1) == UNSPEC_PIC_BASE
+	  || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
+    return 1;
+  return 0;
+}
+
+static bool
+arm_cannot_copy_insn_p (rtx insn)
+{
+  /* The tls call insn cannot be copied, as it is paired with a data
+     word.  */
+  if (recog_memoized (insn) == CODE_FOR_tlscall)
+    return true;
+
+  return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
+}
+
+enum rtx_code
+minmax_code (rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+
+  switch (code)
+    {
+    case SMAX:
+      return GE;
+    case SMIN:
+      return LE;
+    case UMIN:
+      return LEU;
+    case UMAX:
+      return GEU;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Match pair of min/max operators that can be implemented via usat/ssat.  */
+
+bool
+arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
+			int *mask, bool *signed_sat)
+{
+  /* The high bound must be a power of two minus one.  */
+  int log = exact_log2 (INTVAL (hi_bound) + 1);
+  if (log == -1)
+    return false;
+
+  /* The low bound is either zero (for usat) or one less than the
+     negation of the high bound (for ssat).  */
+  if (INTVAL (lo_bound) == 0)
+    {
+      if (mask)
+        *mask = log;
+      if (signed_sat)
+        *signed_sat = false;
+
+      return true;
+    }
+
+  if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
+    {
+      if (mask)
+        *mask = log + 1;
+      if (signed_sat)
+        *signed_sat = true;
+
+      return true;
+    }
+
+  return false;
+}
+
+/* Return 1 if memory locations are adjacent.  */
+int
+adjacent_mem_locations (rtx a, rtx b)
+{
+  /* We don't guarantee to preserve the order of these memory refs.  */
+  if (volatile_refs_p (a) || volatile_refs_p (b))
+    return 0;
+
+  if ((REG_P (XEXP (a, 0))
+       || (GET_CODE (XEXP (a, 0)) == PLUS
+	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
+      && (REG_P (XEXP (b, 0))
+	  || (GET_CODE (XEXP (b, 0)) == PLUS
+	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
+    {
+      HOST_WIDE_INT val0 = 0, val1 = 0;
+      rtx reg0, reg1;
+      int val_diff;
+
+      if (GET_CODE (XEXP (a, 0)) == PLUS)
+        {
+	  reg0 = XEXP (XEXP (a, 0), 0);
+	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
+        }
+      else
+	reg0 = XEXP (a, 0);
+
+      if (GET_CODE (XEXP (b, 0)) == PLUS)
+        {
+	  reg1 = XEXP (XEXP (b, 0), 0);
+	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
+        }
+      else
+	reg1 = XEXP (b, 0);
+
+      /* Don't accept any offset that will require multiple
+	 instructions to handle, since this would cause the
+	 arith_adjacentmem pattern to output an overlong sequence.  */
+      if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
+	return 0;
+
+      /* Don't allow an eliminable register: register elimination can make
+	 the offset too large.  */
+      if (arm_eliminable_register (reg0))
+	return 0;
+
+      val_diff = val1 - val0;
+
+      if (arm_ld_sched)
+	{
+	  /* If the target has load delay slots, then there's no benefit
+	     to using an ldm instruction unless the offset is zero and
+	     we are optimizing for size.  */
+	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
+		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
+		  && (val_diff == 4 || val_diff == -4));
+	}
+
+      return ((REGNO (reg0) == REGNO (reg1))
+	      && (val_diff == 4 || val_diff == -4));
+    }
+
+  return 0;
+}
+
+/* Return true if OP is a valid load or store multiple operation.  LOAD is true
+   for load operations, false for store operations.  CONSECUTIVE is true
+   if the register numbers in the operation must be consecutive in the register
+   bank. RETURN_PC is true if value is to be loaded in PC.
+   The pattern we are trying to match for load is:
+     [(SET (R_d0) (MEM (PLUS (addr) (offset))))
+      (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
+       :
+       :
+      (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
+     ]
+     where
+     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+     3.  If consecutive is TRUE, then for kth register being loaded,
+         REGNO (R_dk) = REGNO (R_d0) + k.
+   The pattern for store is similar.  */
+bool
+ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
+                     bool consecutive, bool return_pc)
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  rtx reg, mem, addr;
+  unsigned regno;
+  unsigned first_regno;
+  HOST_WIDE_INT i = 1, base = 0, offset = 0;
+  rtx elt;
+  bool addr_reg_in_reglist = false;
+  bool update = false;
+  int reg_increment;
+  int offset_adj;
+  int regs_per_val;
+
+  /* If not in SImode, then registers must be consecutive
+     (e.g., VLDM instructions for DFmode).  */
+  gcc_assert ((mode == SImode) || consecutive);
+  /* Setting return_pc for stores is illegal.  */
+  gcc_assert (!return_pc || load);
+
+  /* Set up the increments and the regs per val based on the mode.  */
+  reg_increment = GET_MODE_SIZE (mode);
+  regs_per_val = reg_increment / 4;
+  offset_adj = return_pc ? 1 : 0;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+      || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
+    return false;
+
+  /* Check if this is a write-back.  */
+  elt = XVECEXP (op, 0, offset_adj);
+  if (GET_CODE (SET_SRC (elt)) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* The offset adjustment must be the number of registers being
+         popped times the size of a single register.  */
+      if (!REG_P (SET_DEST (elt))
+          || !REG_P (XEXP (SET_SRC (elt), 0))
+          || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
+             ((count - 1 - offset_adj) * reg_increment))
+        return false;
+    }
+
+  i = i + offset_adj;
+  base = base + offset_adj;
+  /* Perform a quick check so we don't blow up below. If only one reg is loaded,
+     success depends on the type: VLDM can do just one reg,
+     LDM must do at least two.  */
+  if ((count <= i) && (mode == SImode))
+      return false;
+
+  elt = XVECEXP (op, 0, i - 1);
+  if (GET_CODE (elt) != SET)
+    return false;
+
+  if (load)
+    {
+      reg = SET_DEST (elt);
+      mem = SET_SRC (elt);
+    }
+  else
+    {
+      reg = SET_SRC (elt);
+      mem = SET_DEST (elt);
+    }
+
+  if (!REG_P (reg) || !MEM_P (mem))
+    return false;
+
+  regno = REGNO (reg);
+  first_regno = regno;
+  addr = XEXP (mem, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (addr, 1)))
+	return false;
+
+      offset = INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  if (!REG_P (addr))
+    return false;
+
+  /* Don't allow SP to be loaded unless it is also the base register. It
+     guarantees that SP is reset correctly when an LDM instruction
+     is interrupted. Otherwise, we might end up with a corrupt stack.  */
+  if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
+    return false;
+
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+      if (GET_CODE (elt) != SET)
+        return false;
+
+      if (load)
+        {
+          reg = SET_DEST (elt);
+          mem = SET_SRC (elt);
+        }
+      else
+        {
+          reg = SET_SRC (elt);
+          mem = SET_DEST (elt);
+        }
+
+      if (!REG_P (reg)
+          || GET_MODE (reg) != mode
+          || REGNO (reg) <= regno
+          || (consecutive
+              && (REGNO (reg) !=
+                  (unsigned int) (first_regno + regs_per_val * (i - base))))
+          /* Don't allow SP to be loaded unless it is also the base register. It
+             guarantees that SP is reset correctly when an LDM instruction
+             is interrupted. Otherwise, we might end up with a corrupt stack.  */
+          || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
+          || !MEM_P (mem)
+          || GET_MODE (mem) != mode
+          || ((GET_CODE (XEXP (mem, 0)) != PLUS
+	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
+	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
+	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
+                   offset + (i - base) * reg_increment))
+	      && (!REG_P (XEXP (mem, 0))
+		  || offset + (i - base) * reg_increment != 0)))
+        return false;
+
+      regno = REGNO (reg);
+      if (regno == REGNO (addr))
+        addr_reg_in_reglist = true;
+    }
+
+  if (load)
+    {
+      if (update && addr_reg_in_reglist)
+        return false;
+
+      /* For Thumb-1, address register is always modified - either by write-back
+         or by explicit load.  If the pattern does not describe an update,
+         then the address register must be in the list of loaded registers.  */
+      if (TARGET_THUMB1)
+        return update || addr_reg_in_reglist;
+    }
+
+  return true;
+}
+
+/* Return true iff it would be profitable to turn a sequence of NOPS loads
+   or stores (depending on IS_STORE) into a load-multiple or store-multiple
+   instruction.  ADD_OFFSET is nonzero if the base address register needs
+   to be modified with an add instruction before we can use it.  */
+
+static bool
+multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
+				 int nops, HOST_WIDE_INT add_offset)
+ {
+  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
+     if the offset isn't small enough.  The reason 2 ldrs are faster
+     is because these ARMs are able to do more than one cache access
+     in a single cycle.  The ARM9 and StrongARM have Harvard caches,
+     whilst the ARM8 has a double bandwidth cache.  This means that
+     these cores can do both an instruction fetch and a data fetch in
+     a single cycle, so the trick of calculating the address into a
+     scratch register (one of the result regs) and then doing a load
+     multiple actually becomes slower (and no smaller in code size).
+     That is the transformation
+
+ 	ldr	rd1, [rbase + offset]
+ 	ldr	rd2, [rbase + offset + 4]
+
+     to
+
+ 	add	rd1, rbase, offset
+ 	ldmia	rd1, {rd1, rd2}
+
+     produces worse code -- '3 cycles + any stalls on rd2' instead of
+     '2 cycles + any stalls on rd2'.  On ARMs with only one cache
+     access per cycle, the first sequence could never complete in less
+     than 6 cycles, whereas the ldm sequence would only take 5 and
+     would make better use of sequential accesses if not hitting the
+     cache.
+
+     We cheat here and test 'arm_ld_sched' which we currently know to
+     only be true for the ARM8, ARM9 and StrongARM.  If this ever
+     changes, then the test below needs to be reworked.  */
+  if (nops == 2 && arm_ld_sched && add_offset != 0)
+    return false;
+
+  /* XScale has load-store double instructions, but they have stricter
+     alignment requirements than load-store multiple, so we cannot
+     use them.
+
+     For XScale ldm requires 2 + NREGS cycles to complete and blocks
+     the pipeline until completion.
+
+	NREGS		CYCLES
+	  1		  3
+	  2		  4
+	  3		  5
+	  4		  6
+
+     An ldr instruction takes 1-3 cycles, but does not block the
+     pipeline.
+
+	NREGS		CYCLES
+	  1		 1-3
+	  2		 2-6
+	  3		 3-9
+	  4		 4-12
+
+     Best case ldr will always win.  However, the more ldr instructions
+     we issue, the less likely we are to be able to schedule them well.
+     Using ldr instructions also increases code size.
+
+     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
+     for counts of 3 or 4 regs.  */
+  if (nops <= 2 && arm_tune_xscale && !optimize_size)
+    return false;
+  return true;
+}
+
+/* Subroutine of load_multiple_sequence and store_multiple_sequence.
+   Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
+   an array ORDER which describes the sequence to use when accessing the
+   offsets that produces an ascending order.  In this sequence, each
+   offset must be larger by exactly 4 than the previous one.  ORDER[0]
+   must have been filled in with the lowest offset by the caller.
+   If UNSORTED_REGS is nonnull, it is an array of register numbers that
+   we use to verify that ORDER produces an ascending order of registers.
+   Return true if it was possible to construct such an order, false if
+   not.  */
+
+static bool
+compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
+		      int *unsorted_regs)
+{
+  int i;
+  for (i = 1; i < nops; i++)
+    {
+      int j;
+
+      order[i] = order[i - 1];
+      for (j = 0; j < nops; j++)
+	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
+	  {
+	    /* We must find exactly one offset that is higher than the
+	       previous one by 4.  */
+	    if (order[i] != order[i - 1])
+	      return false;
+	    order[i] = j;
+	  }
+      if (order[i] == order[i - 1])
+	return false;
+      /* The register numbers must be ascending.  */
+      if (unsorted_regs != NULL
+	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
+	return false;
+    }
+  return true;
+}
+
+/* Used to determine in a peephole whether a sequence of load
+   instructions can be changed into a load-multiple instruction.
+   NOPS is the number of separate load instructions we are examining.  The
+   first NOPS entries in OPERANDS are the destination registers, the
+   next NOPS entries are memory operands.  If this function is
+   successful, *BASE is set to the common base register of the memory
+   accesses; *LOAD_OFFSET is set to the first memory location's offset
+   from that base register.
+   REGS is an array filled in with the destination register numbers.
+   SAVED_ORDER (if nonnull), is an array filled in with an order that maps
+   insn numbers to an ascending order of stores.  If CHECK_REGS is true,
+   the sequence of registers in REGS matches the loads from ascending memory
+   locations, and the function verifies that the register numbers are
+   themselves ascending.  If CHECK_REGS is false, the register numbers
+   are stored in the order they are found in the operands.  */
+static int
+load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
+			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
+{
+  int unsorted_regs[MAX_LDM_STM_OPS];
+  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+  int order[MAX_LDM_STM_OPS];
+  rtx base_reg_rtx = NULL;
+  int base_reg = -1;
+  int i, ldm_case;
+
+  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+     easily extended if required.  */
+  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
+
+  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
+
+  /* Loop over the operands and check that the memory references are
+     suitable (i.e. immediate offsets from the same base register).  At
+     the same time, extract the target register, and the memory
+     offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      rtx reg;
+      rtx offset;
+
+      /* Convert a subreg of a mem into the mem itself.  */
+      if (GET_CODE (operands[nops + i]) == SUBREG)
+	operands[nops + i] = alter_subreg (operands + (nops + i), true);
+
+      gcc_assert (MEM_P (operands[nops + i]));
+
+      /* Don't reorder volatile memory references; it doesn't seem worth
+	 looking for the case where the order is ok anyway.  */
+      if (MEM_VOLATILE_P (operands[nops + i]))
+	return 0;
+
+      offset = const0_rtx;
+
+      if ((REG_P (reg = XEXP (operands[nops + i], 0))
+	   || (GET_CODE (reg) == SUBREG
+	       && REG_P (reg = SUBREG_REG (reg))))
+	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
+	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
+		  || (GET_CODE (reg) == SUBREG
+		      && REG_P (reg = SUBREG_REG (reg))))
+	      && (CONST_INT_P (offset
+		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
+	{
+	  if (i == 0)
+	    {
+	      base_reg = REGNO (reg);
+	      base_reg_rtx = reg;
+	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
+		return 0;
+	    }
+	  else if (base_reg != (int) REGNO (reg))
+	    /* Not addressed from the same base register.  */
+	    return 0;
+
+	  unsorted_regs[i] = (REG_P (operands[i])
+			      ? REGNO (operands[i])
+			      : REGNO (SUBREG_REG (operands[i])));
+
+	  /* If it isn't an integer register, or if it overwrites the
+	     base register but isn't the last insn in the list, then
+	     we can't do this.  */
+	  if (unsorted_regs[i] < 0
+	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+	      || unsorted_regs[i] > 14
+	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
+	    return 0;
+
+          /* Don't allow SP to be loaded unless it is also the base
+             register.  It guarantees that SP is reset correctly when
+             an LDM instruction is interrupted.  Otherwise, we might
+             end up with a corrupt stack.  */
+          if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
+            return 0;
+
+	  unsorted_offsets[i] = INTVAL (offset);
+	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+	    order[0] = i;
+	}
+      else
+	/* Not a suitable memory address.  */
+	return 0;
+    }
+
+  /* All the useful information has now been extracted from the
+     operands into unsorted_regs and unsorted_offsets; additionally,
+     order[0] has been set to the lowest offset in the list.  Sort
+     the offsets into order, verifying that they are adjacent, and
+     check that the register numbers are ascending.  */
+  if (!compute_offset_order (nops, unsorted_offsets, order,
+			     check_regs ? unsorted_regs : NULL))
+    return 0;
+
+  if (saved_order)
+    memcpy (saved_order, order, sizeof order);
+
+  if (base)
+    {
+      *base = base_reg;
+
+      for (i = 0; i < nops; i++)
+	regs[i] = unsorted_regs[check_regs ? order[i] : i];
+
+      *load_offset = unsorted_offsets[order[0]];
+    }
+
+  if (TARGET_THUMB1
+      && !peep2_reg_dead_p (nops, base_reg_rtx))
+    return 0;
+
+  if (unsorted_offsets[order[0]] == 0)
+    ldm_case = 1; /* ldmia */
+  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+    ldm_case = 2; /* ldmib */
+  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+    ldm_case = 3; /* ldmda */
+  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+    ldm_case = 4; /* ldmdb */
+  else if (const_ok_for_arm (unsorted_offsets[order[0]])
+	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
+    ldm_case = 5;
+  else
+    return 0;
+
+  if (!multiple_operation_profitable_p (false, nops,
+					ldm_case == 5
+					? unsorted_offsets[order[0]] : 0))
+    return 0;
+
+  return ldm_case;
+}
+
+/* Used to determine in a peephole whether a sequence of store instructions can
+   be changed into a store-multiple instruction.
+   NOPS is the number of separate store instructions we are examining.
+   NOPS_TOTAL is the total number of instructions recognized by the peephole
+   pattern.
+   The first NOPS entries in OPERANDS are the source registers, the next
+   NOPS entries are memory operands.  If this function is successful, *BASE is
+   set to the common base register of the memory accesses; *LOAD_OFFSET is set
+   to the first memory location's offset from that base register.  REGS is an
+   array filled in with the source register numbers, REG_RTXS (if nonnull) is
+   likewise filled with the corresponding rtx's.
+   SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
+   numbers to an ascending order of stores.
+   If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
+   from ascending memory locations, and the function verifies that the register
+   numbers are themselves ascending.  If CHECK_REGS is false, the register
+   numbers are stored in the order they are found in the operands.  */
+static int
+store_multiple_sequence (rtx *operands, int nops, int nops_total,
+			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
+			 HOST_WIDE_INT *load_offset, bool check_regs)
+{
+  int unsorted_regs[MAX_LDM_STM_OPS];
+  rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
+  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+  int order[MAX_LDM_STM_OPS];
+  int base_reg = -1;
+  rtx base_reg_rtx = NULL;
+  int i, stm_case;
+
+  /* Write back of base register is currently only supported for Thumb 1.  */
+  int base_writeback = TARGET_THUMB1;
+
+  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+     easily extended if required.  */
+  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
+
+  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
+
+  /* Loop over the operands and check that the memory references are
+     suitable (i.e. immediate offsets from the same base register).  At
+     the same time, extract the target register, and the memory
+     offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      rtx reg;
+      rtx offset;
+
+      /* Convert a subreg of a mem into the mem itself.  */
+      if (GET_CODE (operands[nops + i]) == SUBREG)
+	operands[nops + i] = alter_subreg (operands + (nops + i), true);
+
+      gcc_assert (MEM_P (operands[nops + i]));
+
+      /* Don't reorder volatile memory references; it doesn't seem worth
+	 looking for the case where the order is ok anyway.  */
+      if (MEM_VOLATILE_P (operands[nops + i]))
+	return 0;
+
+      offset = const0_rtx;
+
+      if ((REG_P (reg = XEXP (operands[nops + i], 0))
+	   || (GET_CODE (reg) == SUBREG
+	       && REG_P (reg = SUBREG_REG (reg))))
+	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
+	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
+		  || (GET_CODE (reg) == SUBREG
+		      && REG_P (reg = SUBREG_REG (reg))))
+	      && (CONST_INT_P (offset
+		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
+	{
+	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
+				  ? operands[i] : SUBREG_REG (operands[i]));
+	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
+
+	  if (i == 0)
+	    {
+	      base_reg = REGNO (reg);
+	      base_reg_rtx = reg;
+	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
+		return 0;
+	    }
+	  else if (base_reg != (int) REGNO (reg))
+	    /* Not addressed from the same base register.  */
+	    return 0;
+
+	  /* If it isn't an integer register, then we can't do this.  */
+	  if (unsorted_regs[i] < 0
+	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+	      /* The effects are unpredictable if the base register is
+		 both updated and stored.  */
+	      || (base_writeback && unsorted_regs[i] == base_reg)
+	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
+	      || unsorted_regs[i] > 14)
+	    return 0;
+
+	  unsorted_offsets[i] = INTVAL (offset);
+	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+	    order[0] = i;
+	}
+      else
+	/* Not a suitable memory address.  */
+	return 0;
+    }
+
+  /* All the useful information has now been extracted from the
+     operands into unsorted_regs and unsorted_offsets; additionally,
+     order[0] has been set to the lowest offset in the list.  Sort
+     the offsets into order, verifying that they are adjacent, and
+     check that the register numbers are ascending.  */
+  if (!compute_offset_order (nops, unsorted_offsets, order,
+			     check_regs ? unsorted_regs : NULL))
+    return 0;
+
+  if (saved_order)
+    memcpy (saved_order, order, sizeof order);
+
+  if (base)
+    {
+      *base = base_reg;
+
+      for (i = 0; i < nops; i++)
+	{
+	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
+	  if (reg_rtxs)
+	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
+	}
+
+      *load_offset = unsorted_offsets[order[0]];
+    }
+
+  if (TARGET_THUMB1
+      && !peep2_reg_dead_p (nops_total, base_reg_rtx))
+    return 0;
+
+  if (unsorted_offsets[order[0]] == 0)
+    stm_case = 1; /* stmia */
+  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+    stm_case = 2; /* stmib */
+  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+    stm_case = 3; /* stmda */
+  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+    stm_case = 4; /* stmdb */
+  else
+    return 0;
+
+  if (!multiple_operation_profitable_p (false, nops, 0))
+    return 0;
+
+  return stm_case;
+}
+
+/* Routines for use in generating RTL.  */
+
+/* Generate a load-multiple instruction.  COUNT is the number of loads in
+   the instruction; REGS and MEMS are arrays containing the operands.
+   BASEREG is the base register to be used in addressing the memory operands.
+   WBACK_OFFSET is nonzero if the instruction should update the base
+   register.  */
+
+static rtx
+arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+			 HOST_WIDE_INT wback_offset)
+{
+  int i = 0, j;
+  rtx result;
+
+  if (!multiple_operation_profitable_p (false, count, 0))
+    {
+      rtx seq;
+
+      start_sequence ();
+
+      for (i = 0; i < count; i++)
+	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
+
+      if (wback_offset != 0)
+	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
+
+      seq = get_insns ();
+      end_sequence ();
+
+      return seq;
+    }
+
+  result = gen_rtx_PARALLEL (VOIDmode,
+			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+  if (wback_offset != 0)
+    {
+      XVECEXP (result, 0, 0)
+	= gen_rtx_SET (VOIDmode, basereg,
+		       plus_constant (Pmode, basereg, wback_offset));
+      i = 1;
+      count++;
+    }
+
+  for (j = 0; i < count; i++, j++)
+    XVECEXP (result, 0, i)
+      = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
+
+  return result;
+}
+
+/* Generate a store-multiple instruction.  COUNT is the number of stores in
+   the instruction; REGS and MEMS are arrays containing the operands.
+   BASEREG is the base register to be used in addressing the memory operands.
+   WBACK_OFFSET is nonzero if the instruction should update the base
+   register.  */
+
+static rtx
+arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+			  HOST_WIDE_INT wback_offset)
+{
+  int i = 0, j;
+  rtx result;
+
+  if (GET_CODE (basereg) == PLUS)
+    basereg = XEXP (basereg, 0);
+
+  if (!multiple_operation_profitable_p (false, count, 0))
+    {
+      rtx seq;
+
+      start_sequence ();
+
+      for (i = 0; i < count; i++)
+	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
+
+      if (wback_offset != 0)
+	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
+
+      seq = get_insns ();
+      end_sequence ();
+
+      return seq;
+    }
+
+  result = gen_rtx_PARALLEL (VOIDmode,
+			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+  if (wback_offset != 0)
+    {
+      XVECEXP (result, 0, 0)
+	= gen_rtx_SET (VOIDmode, basereg,
+		       plus_constant (Pmode, basereg, wback_offset));
+      i = 1;
+      count++;
+    }
+
+  for (j = 0; i < count; i++, j++)
+    XVECEXP (result, 0, i)
+      = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
+
+  return result;
+}
+
+/* Generate either a load-multiple or a store-multiple instruction.  This
+   function can be used in situations where we can start with a single MEM
+   rtx and adjust its address upwards.
+   COUNT is the number of operations in the instruction, not counting a
+   possible update of the base register.  REGS is an array containing the
+   register operands.
+   BASEREG is the base register to be used in addressing the memory operands,
+   which are constructed from BASEMEM.
+   WRITE_BACK specifies whether the generated instruction should include an
+   update of the base register.
+   OFFSETP is used to pass an offset to and from this function; this offset
+   is not used when constructing the address (instead BASEMEM should have an
+   appropriate offset in its address), it is used only for setting
+   MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
+
+static rtx
+arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
+		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+{
+  rtx mems[MAX_LDM_STM_OPS];
+  HOST_WIDE_INT offset = *offsetp;
+  int i;
+
+  gcc_assert (count <= MAX_LDM_STM_OPS);
+
+  if (GET_CODE (basereg) == PLUS)
+    basereg = XEXP (basereg, 0);
+
+  for (i = 0; i < count; i++)
+    {
+      rtx addr = plus_constant (Pmode, basereg, i * 4);
+      mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
+      offset += 4;
+    }
+
+  if (write_back)
+    *offsetp = offset;
+
+  if (is_load)
+    return arm_gen_load_multiple_1 (count, regs, mems, basereg,
+				    write_back ? 4 * count : 0);
+  else
+    return arm_gen_store_multiple_1 (count, regs, mems, basereg,
+				     write_back ? 4 * count : 0);
+}
+
+rtx
+arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
+		       rtx basemem, HOST_WIDE_INT *offsetp)
+{
+  return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
+			      offsetp);
+}
+
+rtx
+arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
+			rtx basemem, HOST_WIDE_INT *offsetp)
+{
+  return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
+			      offsetp);
+}
+
+/* Called from a peephole2 expander to turn a sequence of loads into an
+   LDM instruction.  OPERANDS are the operands found by the peephole matcher;
+   NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
+   is true if we can reorder the registers because they are used commutatively
+   subsequently.
+   Returns true iff we could generate a new instruction.  */
+
+bool
+gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
+{
+  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+  rtx mems[MAX_LDM_STM_OPS];
+  int i, j, base_reg;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT offset;
+  int write_back = FALSE;
+  int ldm_case;
+  rtx addr;
+
+  ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
+				     &base_reg, &offset, !sort_regs);
+
+  if (ldm_case == 0)
+    return false;
+
+  if (sort_regs)
+    for (i = 0; i < nops - 1; i++)
+      for (j = i + 1; j < nops; j++)
+	if (regs[i] > regs[j])
+	  {
+	    int t = regs[i];
+	    regs[i] = regs[j];
+	    regs[j] = t;
+	  }
+  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+  if (TARGET_THUMB1)
+    {
+      gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
+      gcc_assert (ldm_case == 1 || ldm_case == 5);
+      write_back = TRUE;
+    }
+
+  if (ldm_case == 5)
+    {
+      rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
+      emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
+      offset = 0;
+      if (!TARGET_THUMB1)
+	{
+	  base_reg = regs[0];
+	  base_reg_rtx = newbase;
+	}
+    }
+
+  for (i = 0; i < nops; i++)
+    {
+      addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
+      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+					      SImode, addr, 0);
+    }
+  emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
+				      write_back ? offset + i * 4 : 0));
+  return true;
+}
+
+/* Called from a peephole2 expander to turn a sequence of stores into an
+   STM instruction.  OPERANDS are the operands found by the peephole matcher;
+   NOPS indicates how many separate stores we are trying to combine.
+   Returns true iff we could generate a new instruction.  */
+
+bool
+gen_stm_seq (rtx *operands, int nops)
+{
+  int i;
+  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+  rtx mems[MAX_LDM_STM_OPS];
+  int base_reg;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT offset;
+  int write_back = FALSE;
+  int stm_case;
+  rtx addr;
+  bool base_reg_dies;
+
+  stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
+				      mem_order, &base_reg, &offset, true);
+
+  if (stm_case == 0)
+    return false;
+
+  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+  base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
+  if (TARGET_THUMB1)
+    {
+      gcc_assert (base_reg_dies);
+      write_back = TRUE;
+    }
+
+  if (stm_case == 5)
+    {
+      gcc_assert (base_reg_dies);
+      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+      offset = 0;
+    }
+
+  addr = plus_constant (Pmode, base_reg_rtx, offset);
+
+  for (i = 0; i < nops; i++)
+    {
+      addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
+      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+					      SImode, addr, 0);
+    }
+  emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
+				       write_back ? offset + i * 4 : 0));
+  return true;
+}
+
+/* Called from a peephole2 expander to turn a sequence of stores that are
+   preceded by constant loads into an STM instruction.  OPERANDS are the
+   operands found by the peephole matcher; NOPS indicates how many
+   separate stores we are trying to combine; there are 2 * NOPS
+   instructions in the peephole.
+   Returns true iff we could generate a new instruction.  */
+
+bool
+gen_const_stm_seq (rtx *operands, int nops)
+{
+  int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
+  int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+  rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
+  rtx mems[MAX_LDM_STM_OPS];
+  int base_reg;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT offset;
+  int write_back = FALSE;
+  int stm_case;
+  rtx addr;
+  bool base_reg_dies;
+  int i, j;
+  HARD_REG_SET allocated;
+
+  stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
+				      mem_order, &base_reg, &offset, false);
+
+  if (stm_case == 0)
+    return false;
+
+  memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
+
+  /* If the same register is used more than once, try to find a free
+     register.  */
+  CLEAR_HARD_REG_SET (allocated);
+  for (i = 0; i < nops; i++)
+    {
+      for (j = i + 1; j < nops; j++)
+	if (regs[i] == regs[j])
+	  {
+	    rtx t = peep2_find_free_register (0, nops * 2,
+					      TARGET_THUMB1 ? "l" : "r",
+					      SImode, &allocated);
+	    if (t == NULL_RTX)
+	      return false;
+	    reg_rtxs[i] = t;
+	    regs[i] = REGNO (t);
+	  }
+    }
+
+  /* Compute an ordering that maps the register numbers to an ascending
+     sequence.  */
+  reg_order[0] = 0;
+  for (i = 0; i < nops; i++)
+    if (regs[i] < regs[reg_order[0]])
+      reg_order[0] = i;
+
+  for (i = 1; i < nops; i++)
+    {
+      int this_order = reg_order[i - 1];
+      for (j = 0; j < nops; j++)
+	if (regs[j] > regs[reg_order[i - 1]]
+	    && (this_order == reg_order[i - 1]
+		|| regs[j] < regs[this_order]))
+	  this_order = j;
+      reg_order[i] = this_order;
+    }
+
+  /* Ensure that registers that must be live after the instruction end
+     up with the correct value.  */
+  for (i = 0; i < nops; i++)
+    {
+      int this_order = reg_order[i];
+      if ((this_order != mem_order[i]
+	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
+	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
+	return false;
+    }
+
+  /* Load the constants.  */
+  for (i = 0; i < nops; i++)
+    {
+      rtx op = operands[2 * nops + mem_order[i]];
+      sorted_regs[i] = regs[reg_order[i]];
+      emit_move_insn (reg_rtxs[reg_order[i]], op);
+    }
+
+  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+  base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
+  if (TARGET_THUMB1)
+    {
+      gcc_assert (base_reg_dies);
+      write_back = TRUE;
+    }
+
+  if (stm_case == 5)
+    {
+      gcc_assert (base_reg_dies);
+      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+      offset = 0;
+    }
+
+  addr = plus_constant (Pmode, base_reg_rtx, offset);
+
+  for (i = 0; i < nops; i++)
+    {
+      addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
+      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+					      SImode, addr, 0);
+    }
+  emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
+				       write_back ? offset + i * 4 : 0));
+  return true;
+}
+
+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
+   unaligned copies on processors which support unaligned semantics for those
+   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
+   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
+   An interleave factor of 1 (the minimum) will perform no interleaving.
+   Load/store multiple are used for aligned addresses where possible.  */
+
+static void
+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
+				   HOST_WIDE_INT length,
+				   unsigned int interleave_factor)
+{
+  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
+  int *regnos = XALLOCAVEC (int, interleave_factor);
+  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
+  HOST_WIDE_INT i, j;
+  HOST_WIDE_INT remaining = length, words;
+  rtx halfword_tmp = NULL, byte_tmp = NULL;
+  rtx dst, src;
+  bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
+  bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
+  HOST_WIDE_INT srcoffset, dstoffset;
+  HOST_WIDE_INT src_autoinc, dst_autoinc;
+  rtx mem, addr;
+  
+  gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
+  
+  /* Use hard registers if we have aligned source or destination so we can use
+     load/store multiple with contiguous registers.  */
+  if (dst_aligned || src_aligned)
+    for (i = 0; i < interleave_factor; i++)
+      regs[i] = gen_rtx_REG (SImode, i);
+  else
+    for (i = 0; i < interleave_factor; i++)
+      regs[i] = gen_reg_rtx (SImode);
+
+  dst = copy_addr_to_reg (XEXP (dstbase, 0));
+  src = copy_addr_to_reg (XEXP (srcbase, 0));
+
+  srcoffset = dstoffset = 0;
+  
+  /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
+     For copying the last bytes we want to subtract this offset again.  */
+  src_autoinc = dst_autoinc = 0;
+
+  for (i = 0; i < interleave_factor; i++)
+    regnos[i] = i;
+
+  /* Copy BLOCK_SIZE_BYTES chunks.  */
+
+  for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
+    {
+      /* Load words.  */
+      if (src_aligned && interleave_factor > 1)
+	{
+	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
+					    TRUE, srcbase, &srcoffset));
+	  src_autoinc += UNITS_PER_WORD * interleave_factor;
+	}
+      else
+	{
+	  for (j = 0; j < interleave_factor; j++)
+	    {
+	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
+						 - src_autoinc));
+	      mem = adjust_automodify_address (srcbase, SImode, addr,
+					       srcoffset + j * UNITS_PER_WORD);
+	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
+	    }
+	  srcoffset += block_size_bytes;
+	}
+
+      /* Store words.  */
+      if (dst_aligned && interleave_factor > 1)
+	{
+	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
+					     TRUE, dstbase, &dstoffset));
+	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
+	}
+      else
+	{
+	  for (j = 0; j < interleave_factor; j++)
+	    {
+	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
+						 - dst_autoinc));
+	      mem = adjust_automodify_address (dstbase, SImode, addr,
+					       dstoffset + j * UNITS_PER_WORD);
+	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
+	    }
+	  dstoffset += block_size_bytes;
+	}
+
+      remaining -= block_size_bytes;
+    }
+  
+  /* Copy any whole words left (note these aren't interleaved with any
+     subsequent halfword/byte load/stores in the interests of simplicity).  */
+  
+  words = remaining / UNITS_PER_WORD;
+
+  gcc_assert (words < interleave_factor);
+  
+  if (src_aligned && words > 1)
+    {
+      emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
+					&srcoffset));
+      src_autoinc += UNITS_PER_WORD * words;
+    }
+  else
+    {
+      for (j = 0; j < words; j++)
+	{
+	  addr = plus_constant (Pmode, src,
+				srcoffset + j * UNITS_PER_WORD - src_autoinc);
+	  mem = adjust_automodify_address (srcbase, SImode, addr,
+					   srcoffset + j * UNITS_PER_WORD);
+	  emit_insn (gen_unaligned_loadsi (regs[j], mem));
+	}
+      srcoffset += words * UNITS_PER_WORD;
+    }
+
+  if (dst_aligned && words > 1)
+    {
+      emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
+					 &dstoffset));
+      dst_autoinc += words * UNITS_PER_WORD;
+    }
+  else
+    {
+      for (j = 0; j < words; j++)
+	{
+	  addr = plus_constant (Pmode, dst,
+				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
+	  mem = adjust_automodify_address (dstbase, SImode, addr,
+					   dstoffset + j * UNITS_PER_WORD);
+	  emit_insn (gen_unaligned_storesi (mem, regs[j]));
+	}
+      dstoffset += words * UNITS_PER_WORD;
+    }
+
+  remaining -= words * UNITS_PER_WORD;
+  
+  gcc_assert (remaining < 4);
+  
+  /* Copy a halfword if necessary.  */
+  
+  if (remaining >= 2)
+    {
+      halfword_tmp = gen_reg_rtx (SImode);
+
+      addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
+      mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
+      emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
+
+      /* Either write out immediately, or delay until we've loaded the last
+	 byte, depending on interleave factor.  */
+      if (interleave_factor == 1)
+	{
+	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
+	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
+	  emit_insn (gen_unaligned_storehi (mem,
+		       gen_lowpart (HImode, halfword_tmp)));
+	  halfword_tmp = NULL;
+	  dstoffset += 2;
+	}
+
+      remaining -= 2;
+      srcoffset += 2;
+    }
+  
+  gcc_assert (remaining < 2);
+  
+  /* Copy last byte.  */
+  
+  if ((remaining & 1) != 0)
+    {
+      byte_tmp = gen_reg_rtx (SImode);
+
+      addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
+      mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
+      emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
+
+      if (interleave_factor == 1)
+	{
+	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
+	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
+	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
+	  byte_tmp = NULL;
+	  dstoffset++;
+	}
+
+      remaining--;
+      srcoffset++;
+    }
+  
+  /* Store last halfword if we haven't done so already.  */
+  
+  if (halfword_tmp)
+    {
+      addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
+      mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
+      emit_insn (gen_unaligned_storehi (mem,
+		   gen_lowpart (HImode, halfword_tmp)));
+      dstoffset += 2;
+    }
+
+  /* Likewise for last byte.  */
+
+  if (byte_tmp)
+    {
+      addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
+      mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
+      emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
+      dstoffset++;
+    }
+  
+  gcc_assert (remaining == 0 && srcoffset == dstoffset);
+}
+
+/* From mips_adjust_block_mem:
+
+   Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
+		      rtx *loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+  
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* From mips_block_move_loop:
+
+   Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
+
+static void
+arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+			       unsigned int interleave_factor,
+			       HOST_WIDE_INT bytes_per_iter)
+{
+  rtx label, src_reg, dest_reg, final_src, test;
+  HOST_WIDE_INT leftover;
+  
+  leftover = length % bytes_per_iter;
+  length -= leftover;
+  
+  /* Create registers and memory references for use within the loop.  */
+  arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+  
+  /* Calculate the value that SRC_REG should have after the last iteration of
+     the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+  
+  /* Emit the loop body.  */
+  arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
+				     interleave_factor);
+
+  /* Move on to the next block.  */
+  emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
+  emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
+  
+  /* Emit the loop condition.  */
+  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+  
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
+}
+
+/* Emit a block move when either the source or destination is unaligned (not
+   aligned to a four-byte boundary).  This may need further tuning depending on
+   core type, optimize_size setting, etc.  */
+
+static int
+arm_movmemqi_unaligned (rtx *operands)
+{
+  HOST_WIDE_INT length = INTVAL (operands[2]);
+  
+  if (optimize_size)
+    {
+      bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
+      bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
+      /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
+	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
+	 or dst_aligned though: allow more interleaving in those cases since the
+	 resulting code can be smaller.  */
+      unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
+      HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
+      
+      if (length > 12)
+	arm_block_move_unaligned_loop (operands[0], operands[1], length,
+				       interleave_factor, bytes_per_iter);
+      else
+	arm_block_move_unaligned_straight (operands[0], operands[1], length,
+					   interleave_factor);
+    }
+  else
+    {
+      /* Note that the loop created by arm_block_move_unaligned_loop may be
+	 subject to loop unrolling, which makes tuning this condition a little
+	 redundant.  */
+      if (length > 32)
+	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
+      else
+	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
+    }
+  
+  return 1;
+}
+
+int
+arm_gen_movmemqi (rtx *operands)
+{
+  HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
+  HOST_WIDE_INT srcoffset, dstoffset;
+  int i;
+  rtx src, dst, srcbase, dstbase;
+  rtx part_bytes_reg = NULL;
+  rtx mem;
+
+  if (!CONST_INT_P (operands[2])
+      || !CONST_INT_P (operands[3])
+      || INTVAL (operands[2]) > 64)
+    return 0;
+
+  if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
+    return arm_movmemqi_unaligned (operands);
+
+  if (INTVAL (operands[3]) & 3)
+    return 0;
+
+  dstbase = operands[0];
+  srcbase = operands[1];
+
+  dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
+  src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
+
+  in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
+  out_words_to_go = INTVAL (operands[2]) / 4;
+  last_bytes = INTVAL (operands[2]) & 3;
+  dstoffset = srcoffset = 0;
+
+  if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
+    part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
+
+  for (i = 0; in_words_to_go >= 2; i+=4)
+    {
+      if (in_words_to_go > 4)
+	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
+					  TRUE, srcbase, &srcoffset));
+      else
+	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
+					  src, FALSE, srcbase,
+					  &srcoffset));
+
+      if (out_words_to_go)
+	{
+	  if (out_words_to_go > 4)
+	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
+					       TRUE, dstbase, &dstoffset));
+	  else if (out_words_to_go != 1)
+	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
+					       out_words_to_go, dst,
+					       (last_bytes == 0
+						? FALSE : TRUE),
+					       dstbase, &dstoffset));
+	  else
+	    {
+	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
+	      emit_move_insn (mem, gen_rtx_REG (SImode, 0));
+	      if (last_bytes != 0)
+		{
+		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
+		  dstoffset += 4;
+		}
+	    }
+	}
+
+      in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
+      out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
+    }
+
+  /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
+  if (out_words_to_go)
+    {
+      rtx sreg;
+
+      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
+      sreg = copy_to_reg (mem);
+
+      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
+      emit_move_insn (mem, sreg);
+      in_words_to_go--;
+
+      gcc_assert (!in_words_to_go);	/* Sanity check */
+    }
+
+  if (in_words_to_go)
+    {
+      gcc_assert (in_words_to_go > 0);
+
+      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
+      part_bytes_reg = copy_to_mode_reg (SImode, mem);
+    }
+
+  gcc_assert (!last_bytes || part_bytes_reg);
+
+  if (BYTES_BIG_ENDIAN && last_bytes)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+
+      /* The bytes we want are in the top end of the word.  */
+      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
+			      GEN_INT (8 * (4 - last_bytes))));
+      part_bytes_reg = tmp;
+
+      while (last_bytes)
+	{
+	  mem = adjust_automodify_address (dstbase, QImode,
+					   plus_constant (Pmode, dst,
+							  last_bytes - 1),
+					   dstoffset + last_bytes - 1);
+	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
+
+	  if (--last_bytes)
+	    {
+	      tmp = gen_reg_rtx (SImode);
+	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
+	      part_bytes_reg = tmp;
+	    }
+	}
+
+    }
+  else
+    {
+      if (last_bytes > 1)
+	{
+	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
+	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
+	  last_bytes -= 2;
+	  if (last_bytes)
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);
+	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
+	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
+	      part_bytes_reg = tmp;
+	      dstoffset += 2;
+	    }
+	}
+
+      if (last_bytes)
+	{
+	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
+	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
+	}
+    }
+
+  return 1;
+}
+
+/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
+by mode size.  */
+inline static rtx
+next_consecutive_mem (rtx mem)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
+  rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
+
+  return adjust_automodify_address (mem, mode, addr, offset);
+}
+
+/* Copy using LDRD/STRD instructions whenever possible.
+   Returns true upon success. */
+bool
+gen_movmem_ldrd_strd (rtx *operands)
+{
+  unsigned HOST_WIDE_INT len;
+  HOST_WIDE_INT align;
+  rtx src, dst, base;
+  rtx reg0;
+  bool src_aligned, dst_aligned;
+  bool src_volatile, dst_volatile;
+
+  gcc_assert (CONST_INT_P (operands[2]));
+  gcc_assert (CONST_INT_P (operands[3]));
+
+  len = UINTVAL (operands[2]);
+  if (len > 64)
+    return false;
+
+  /* Maximum alignment we can assume for both src and dst buffers.  */
+  align = INTVAL (operands[3]);
+
+  if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
+    return false;
+
+  /* Place src and dst addresses in registers
+     and update the corresponding mem rtx.  */
+  dst = operands[0];
+  dst_volatile = MEM_VOLATILE_P (dst);
+  dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
+  base = copy_to_mode_reg (SImode, XEXP (dst, 0));
+  dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+  src = operands[1];
+  src_volatile = MEM_VOLATILE_P (src);
+  src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
+  base = copy_to_mode_reg (SImode, XEXP (src, 0));
+  src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+  if (!unaligned_access && !(src_aligned && dst_aligned))
+    return false;
+
+  if (src_volatile || dst_volatile)
+    return false;
+
+  /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
+  if (!(dst_aligned || src_aligned))
+    return arm_gen_movmemqi (operands);
+
+  src = adjust_address (src, DImode, 0);
+  dst = adjust_address (dst, DImode, 0);
+  while (len >= 8)
+    {
+      len -= 8;
+      reg0 = gen_reg_rtx (DImode);
+      if (src_aligned)
+        emit_move_insn (reg0, src);
+      else
+        emit_insn (gen_unaligned_loaddi (reg0, src));
+
+      if (dst_aligned)
+        emit_move_insn (dst, reg0);
+      else
+        emit_insn (gen_unaligned_storedi (dst, reg0));
+
+      src = next_consecutive_mem (src);
+      dst = next_consecutive_mem (dst);
+    }
+
+  gcc_assert (len < 8);
+  if (len >= 4)
+    {
+      /* More than a word but less than a double-word to copy.  Copy a word.  */
+      reg0 = gen_reg_rtx (SImode);
+      src = adjust_address (src, SImode, 0);
+      dst = adjust_address (dst, SImode, 0);
+      if (src_aligned)
+        emit_move_insn (reg0, src);
+      else
+        emit_insn (gen_unaligned_loadsi (reg0, src));
+
+      if (dst_aligned)
+        emit_move_insn (dst, reg0);
+      else
+        emit_insn (gen_unaligned_storesi (dst, reg0));
+
+      src = next_consecutive_mem (src);
+      dst = next_consecutive_mem (dst);
+      len -= 4;
+    }
+
+  if (len == 0)
+    return true;
+
+  /* Copy the remaining bytes.  */
+  if (len >= 2)
+    {
+      dst = adjust_address (dst, HImode, 0);
+      src = adjust_address (src, HImode, 0);
+      reg0 = gen_reg_rtx (SImode);
+      if (src_aligned)
+        emit_insn (gen_zero_extendhisi2 (reg0, src));
+      else
+        emit_insn (gen_unaligned_loadhiu (reg0, src));
+
+      if (dst_aligned)
+        emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
+      else
+        emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
+
+      src = next_consecutive_mem (src);
+      dst = next_consecutive_mem (dst);
+      if (len == 2)
+        return true;
+    }
+
+  dst = adjust_address (dst, QImode, 0);
+  src = adjust_address (src, QImode, 0);
+  reg0 = gen_reg_rtx (QImode);
+  emit_move_insn (reg0, src);
+  emit_move_insn (dst, reg0);
+  return true;
+}
+
+/* Select a dominance comparison mode if possible for a test of the general
+   form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
+   COND_OR == DOM_CC_X_AND_Y => (X && Y)
+   COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
+   COND_OR == DOM_CC_X_OR_Y => (X || Y)
+   In all cases OP will be either EQ or NE, but we don't need to know which
+   here.  If we are unable to support a dominance comparison we return
+   CC mode.  This will then fail to match for the RTL expressions that
+   generate this call.  */
+enum machine_mode
+arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
+{
+  enum rtx_code cond1, cond2;
+  int swapped = 0;
+
+  /* Currently we will probably get the wrong result if the individual
+     comparisons are not simple.  This also ensures that it is safe to
+     reverse a comparison if necessary.  */
+  if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
+       != CCmode)
+      || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
+	  != CCmode))
+    return CCmode;
+
+  /* The if_then_else variant of this tests the second condition if the
+     first passes, but is true if the first fails.  Reverse the first
+     condition to get a true "inclusive-or" expression.  */
+  if (cond_or == DOM_CC_NX_OR_Y)
+    cond1 = reverse_condition (cond1);
+
+  /* If the comparisons are not equal, and one doesn't dominate the other,
+     then we can't do this.  */
+  if (cond1 != cond2
+      && !comparison_dominates_p (cond1, cond2)
+      && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
+    return CCmode;
+
+  if (swapped)
+    {
+      enum rtx_code temp = cond1;
+      cond1 = cond2;
+      cond2 = temp;
+    }
+
+  switch (cond1)
+    {
+    case EQ:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DEQmode;
+
+      switch (cond2)
+	{
+	case EQ: return CC_DEQmode;
+	case LE: return CC_DLEmode;
+	case LEU: return CC_DLEUmode;
+	case GE: return CC_DGEmode;
+	case GEU: return CC_DGEUmode;
+	default: gcc_unreachable ();
+	}
+
+    case LT:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DLTmode;
+
+      switch (cond2)
+	{
+	case  LT:
+	    return CC_DLTmode;
+	case LE:
+	  return CC_DLEmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    case GT:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DGTmode;
+
+      switch (cond2)
+	{
+	case GT:
+	  return CC_DGTmode;
+	case GE:
+	  return CC_DGEmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    case LTU:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DLTUmode;
+
+      switch (cond2)
+	{
+	case LTU:
+	  return CC_DLTUmode;
+	case LEU:
+	  return CC_DLEUmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    case GTU:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DGTUmode;
+
+      switch (cond2)
+	{
+	case GTU:
+	  return CC_DGTUmode;
+	case GEU:
+	  return CC_DGEUmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    /* The remaining cases only occur when both comparisons are the
+       same.  */
+    case NE:
+      gcc_assert (cond1 == cond2);
+      return CC_DNEmode;
+
+    case LE:
+      gcc_assert (cond1 == cond2);
+      return CC_DLEmode;
+
+    case GE:
+      gcc_assert (cond1 == cond2);
+      return CC_DGEmode;
+
+    case LEU:
+      gcc_assert (cond1 == cond2);
+      return CC_DLEUmode;
+
+    case GEU:
+      gcc_assert (cond1 == cond2);
+      return CC_DGEUmode;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+enum machine_mode
+arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
+{
+  /* All floating point compares return CCFP if it is an equality
+     comparison, and CCFPE otherwise.  */
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	case UNORDERED:
+	case ORDERED:
+	case UNLT:
+	case UNLE:
+	case UNGT:
+	case UNGE:
+	case UNEQ:
+	case LTGT:
+	  return CCFPmode;
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  return CCFPEmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* A compare with a shifted operand.  Because of canonicalization, the
+     comparison will have to be swapped when we emit the assembler.  */
+  if (GET_MODE (y) == SImode
+      && (REG_P (y) || (GET_CODE (y) == SUBREG))
+      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
+	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
+	  || GET_CODE (x) == ROTATERT))
+    return CC_SWPmode;
+
+  /* This operation is performed swapped, but since we only rely on the Z
+     flag we don't need an additional mode.  */
+  if (GET_MODE (y) == SImode
+      && (REG_P (y) || (GET_CODE (y) == SUBREG))
+      && GET_CODE (x) == NEG
+      && (op ==	EQ || op == NE))
+    return CC_Zmode;
+
+  /* This is a special case that is used by combine to allow a
+     comparison of a shifted byte load to be split into a zero-extend
+     followed by a comparison of the shifted integer (only valid for
+     equalities and unsigned inequalities).  */
+  if (GET_MODE (x) == SImode
+      && GET_CODE (x) == ASHIFT
+      && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
+      && GET_CODE (XEXP (x, 0)) == SUBREG
+      && MEM_P (SUBREG_REG (XEXP (x, 0)))
+      && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
+      && (op == EQ || op == NE
+	  || op == GEU || op == GTU || op == LTU || op == LEU)
+      && CONST_INT_P (y))
+    return CC_Zmode;
+
+  /* A construct for a conditional compare, if the false arm contains
+     0, then both conditions must be true, otherwise either condition
+     must be true.  Not all conditions are possible, so CCmode is
+     returned if it can't be done.  */
+  if (GET_CODE (x) == IF_THEN_ELSE
+      && (XEXP (x, 2) == const0_rtx
+	  || XEXP (x, 2) == const1_rtx)
+      && COMPARISON_P (XEXP (x, 0))
+      && COMPARISON_P (XEXP (x, 1)))
+    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
+					 INTVAL (XEXP (x, 2)));
+
+  /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
+  if (GET_CODE (x) == AND
+      && (op == EQ || op == NE)
+      && COMPARISON_P (XEXP (x, 0))
+      && COMPARISON_P (XEXP (x, 1)))
+    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
+					 DOM_CC_X_AND_Y);
+
+  if (GET_CODE (x) == IOR
+      && (op == EQ || op == NE)
+      && COMPARISON_P (XEXP (x, 0))
+      && COMPARISON_P (XEXP (x, 1)))
+    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
+					 DOM_CC_X_OR_Y);
+
+  /* An operation (on Thumb) where we want to test for a single bit.
+     This is done by shifting that bit up into the top bit of a
+     scratch register; we can then branch on the sign bit.  */
+  if (TARGET_THUMB1
+      && GET_MODE (x) == SImode
+      && (op == EQ || op == NE)
+      && GET_CODE (x) == ZERO_EXTRACT
+      && XEXP (x, 1) == const1_rtx)
+    return CC_Nmode;
+
+  /* An operation that sets the condition codes as a side-effect, the
+     V flag is not set correctly, so we can only use comparisons where
+     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
+     instead.)  */
+  /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
+  if (GET_MODE (x) == SImode
+      && y == const0_rtx
+      && (op == EQ || op == NE || op == LT || op == GE)
+      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
+	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
+	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
+	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
+	  || GET_CODE (x) == LSHIFTRT
+	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
+	  || GET_CODE (x) == ROTATERT
+	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
+    return CC_NOOVmode;
+
+  if (GET_MODE (x) == QImode && (op == EQ || op == NE))
+    return CC_Zmode;
+
+  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
+      && GET_CODE (x) == PLUS
+      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
+    return CC_Cmode;
+
+  if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
+    {
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	  /* A DImode comparison against zero can be implemented by
+	     or'ing the two halves together.  */
+	  if (y == const0_rtx)
+	    return CC_Zmode;
+
+	  /* We can do an equality test in three Thumb instructions.  */
+	  if (!TARGET_32BIT)
+	    return CC_Zmode;
+
+	  /* FALLTHROUGH */
+
+	case LTU:
+	case LEU:
+	case GTU:
+	case GEU:
+	  /* DImode unsigned comparisons can be implemented by cmp +
+	     cmpeq without a scratch register.  Not worth doing in
+	     Thumb-2.  */
+	  if (TARGET_32BIT)
+	    return CC_CZmode;
+
+	  /* FALLTHROUGH */
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  /* DImode signed and unsigned comparisons can be implemented
+	     by cmp + sbcs with a scratch register, but that does not
+	     set the Z flag - we must reverse GT/LE/GTU/LEU.  */
+	  gcc_assert (op != EQ && op != NE);
+	  return CC_NCVmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
+    return GET_MODE (x);
+
+  return CCmode;
+}
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for register 0 in the proper mode.  FP means this is a
+   floating point compare: I don't think that it is needed on the arm.  */
+rtx
+arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
+{
+  enum machine_mode mode;
+  rtx cc_reg;
+  int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
+
+  /* We might have X as a constant, Y as a register because of the predicates
+     used for cmpdi.  If so, force X to a register here.  */
+  if (dimode_comparison && !REG_P (x))
+    x = force_reg (DImode, x);
+
+  mode = SELECT_CC_MODE (code, x, y);
+  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+
+  if (dimode_comparison
+      && mode != CC_CZmode)
+    {
+      rtx clobber, set;
+
+      /* To compare two non-zero values for equality, XOR them and
+	 then compare against zero.  Not used for ARM mode; there
+	 CC_CZmode is cheaper.  */
+      if (mode == CC_Zmode && y != const0_rtx)
+	{
+	  gcc_assert (!reload_completed);
+	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
+	  y = const0_rtx;
+	}
+
+      /* A scratch register is required.  */
+      if (reload_completed)
+	gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
+      else
+	scratch = gen_rtx_SCRATCH (SImode);
+
+      clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
+      set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+    }
+  else
+    emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+
+  return cc_reg;
+}
+
+/* Generate a sequence of insns that will generate the correct return
+   address mask depending on the physical architecture that the program
+   is running on.  */
+rtx
+arm_gen_return_addr_mask (void)
+{
+  rtx reg = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_return_addr_mask (reg));
+  return reg;
+}
+
+void
+arm_reload_in_hi (rtx *operands)
+{
+  rtx ref = operands[1];
+  rtx base, scratch;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (ref) == SUBREG)
+    {
+      offset = SUBREG_BYTE (ref);
+      ref = SUBREG_REG (ref);
+    }
+
+  if (REG_P (ref))
+    {
+      /* We have a pseudo which has been spilt onto the stack; there
+	 are two cases here: the first where there is a simple
+	 stack-slot replacement and a second where the stack-slot is
+	 out of range, or is used as a subreg.  */
+      if (reg_equiv_mem (REGNO (ref)))
+	{
+	  ref = reg_equiv_mem (REGNO (ref));
+	  base = find_replacement (&XEXP (ref, 0));
+	}
+      else
+	/* The slot is out of range, or was dressed up in a SUBREG.  */
+	base = reg_equiv_address (REGNO (ref));
+    }
+  else
+    base = find_replacement (&XEXP (ref, 0));
+
+  /* Handle the case where the address is too complex to be offset by 1.  */
+  if (GET_CODE (base) == MINUS
+      || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
+    {
+      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+      emit_set_insn (base_plus, base);
+      base = base_plus;
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      /* The addend must be CONST_INT, or we would have dealt with it above.  */
+      HOST_WIDE_INT hi, lo;
+
+      offset += INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+
+      /* Rework the address into a legal sequence of insns.  */
+      /* Valid range for lo is -4095 -> 4095 */
+      lo = (offset >= 0
+	    ? (offset & 0xfff)
+	    : -((-offset) & 0xfff));
+
+      /* Corner case, if lo is the max offset then we would be out of range
+	 once we have added the additional 1 below, so bump the msb into the
+	 pre-loading insn(s).  */
+      if (lo == 4095)
+	lo &= 0x7ff;
+
+      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
+	     ^ (HOST_WIDE_INT) 0x80000000)
+	    - (HOST_WIDE_INT) 0x80000000);
+
+      gcc_assert (hi + lo == offset);
+
+      if (hi != 0)
+	{
+	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+	  /* Get the base address; addsi3 knows how to handle constants
+	     that require more than one insn.  */
+	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
+	  base = base_plus;
+	  offset = lo;
+	}
+    }
+
+  /* Operands[2] may overlap operands[0] (though it won't overlap
+     operands[1]), that's why we asked for a DImode reg -- so we can
+     use the bit that does not overlap.  */
+  if (REGNO (operands[2]) == REGNO (operands[0]))
+    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+  else
+    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
+
+  emit_insn (gen_zero_extendqisi2 (scratch,
+				   gen_rtx_MEM (QImode,
+						plus_constant (Pmode, base,
+							       offset))));
+  emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
+				   gen_rtx_MEM (QImode,
+						plus_constant (Pmode, base,
+							       offset + 1))));
+  if (!BYTES_BIG_ENDIAN)
+    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
+		   gen_rtx_IOR (SImode,
+				gen_rtx_ASHIFT
+				(SImode,
+				 gen_rtx_SUBREG (SImode, operands[0], 0),
+				 GEN_INT (8)),
+				scratch));
+  else
+    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
+		   gen_rtx_IOR (SImode,
+				gen_rtx_ASHIFT (SImode, scratch,
+						GEN_INT (8)),
+				gen_rtx_SUBREG (SImode, operands[0], 0)));
+}
+
+/* Handle storing a half-word to memory during reload by synthesizing as two
+   byte stores.  Take care not to clobber the input values until after we
+   have moved them somewhere safe.  This code assumes that if the DImode
+   scratch in operands[2] overlaps either the input value or output address
+   in some way, then that value must die in this insn (we absolutely need
+   two scratch registers for some corner cases).  */
+void
+arm_reload_out_hi (rtx *operands)
+{
+  rtx ref = operands[0];
+  rtx outval = operands[1];
+  rtx base, scratch;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (ref) == SUBREG)
+    {
+      offset = SUBREG_BYTE (ref);
+      ref = SUBREG_REG (ref);
+    }
+
+  if (REG_P (ref))
+    {
+      /* We have a pseudo which has been spilt onto the stack; there
+	 are two cases here: the first where there is a simple
+	 stack-slot replacement and a second where the stack-slot is
+	 out of range, or is used as a subreg.  */
+      if (reg_equiv_mem (REGNO (ref)))
+	{
+	  ref = reg_equiv_mem (REGNO (ref));
+	  base = find_replacement (&XEXP (ref, 0));
+	}
+      else
+	/* The slot is out of range, or was dressed up in a SUBREG.  */
+	base = reg_equiv_address (REGNO (ref));
+    }
+  else
+    base = find_replacement (&XEXP (ref, 0));
+
+  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
+
+  /* Handle the case where the address is too complex to be offset by 1.  */
+  if (GET_CODE (base) == MINUS
+      || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
+    {
+      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+      /* Be careful not to destroy OUTVAL.  */
+      if (reg_overlap_mentioned_p (base_plus, outval))
+	{
+	  /* Updating base_plus might destroy outval, see if we can
+	     swap the scratch and base_plus.  */
+	  if (!reg_overlap_mentioned_p (scratch, outval))
+	    {
+	      rtx tmp = scratch;
+	      scratch = base_plus;
+	      base_plus = tmp;
+	    }
+	  else
+	    {
+	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
+
+	      /* Be conservative and copy OUTVAL into the scratch now,
+		 this should only be necessary if outval is a subreg
+		 of something larger than a word.  */
+	      /* XXX Might this clobber base?  I can't see how it can,
+		 since scratch is known to overlap with OUTVAL, and
+		 must be wider than a word.  */
+	      emit_insn (gen_movhi (scratch_hi, outval));
+	      outval = scratch_hi;
+	    }
+	}
+
+      emit_set_insn (base_plus, base);
+      base = base_plus;
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      /* The addend must be CONST_INT, or we would have dealt with it above.  */
+      HOST_WIDE_INT hi, lo;
+
+      offset += INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+
+      /* Rework the address into a legal sequence of insns.  */
+      /* Valid range for lo is -4095 -> 4095 */
+      lo = (offset >= 0
+	    ? (offset & 0xfff)
+	    : -((-offset) & 0xfff));
+
+      /* Corner case, if lo is the max offset then we would be out of range
+	 once we have added the additional 1 below, so bump the msb into the
+	 pre-loading insn(s).  */
+      if (lo == 4095)
+	lo &= 0x7ff;
+
+      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
+	     ^ (HOST_WIDE_INT) 0x80000000)
+	    - (HOST_WIDE_INT) 0x80000000);
+
+      gcc_assert (hi + lo == offset);
+
+      if (hi != 0)
+	{
+	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+	  /* Be careful not to destroy OUTVAL.  */
+	  if (reg_overlap_mentioned_p (base_plus, outval))
+	    {
+	      /* Updating base_plus might destroy outval, see if we
+		 can swap the scratch and base_plus.  */
+	      if (!reg_overlap_mentioned_p (scratch, outval))
+		{
+		  rtx tmp = scratch;
+		  scratch = base_plus;
+		  base_plus = tmp;
+		}
+	      else
+		{
+		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
+
+		  /* Be conservative and copy outval into scratch now,
+		     this should only be necessary if outval is a
+		     subreg of something larger than a word.  */
+		  /* XXX Might this clobber base?  I can't see how it
+		     can, since scratch is known to overlap with
+		     outval.  */
+		  emit_insn (gen_movhi (scratch_hi, outval));
+		  outval = scratch_hi;
+		}
+	    }
+
+	  /* Get the base address; addsi3 knows how to handle constants
+	     that require more than one insn.  */
+	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
+	  base = base_plus;
+	  offset = lo;
+	}
+    }
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
+					 plus_constant (Pmode, base,
+							offset + 1)),
+			    gen_lowpart (QImode, outval)));
+      emit_insn (gen_lshrsi3 (scratch,
+			      gen_rtx_SUBREG (SImode, outval, 0),
+			      GEN_INT (8)));
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
+								offset)),
+			    gen_lowpart (QImode, scratch)));
+    }
+  else
+    {
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
+								offset)),
+			    gen_lowpart (QImode, outval)));
+      emit_insn (gen_lshrsi3 (scratch,
+			      gen_rtx_SUBREG (SImode, outval, 0),
+			      GEN_INT (8)));
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
+					 plus_constant (Pmode, base,
+							offset + 1)),
+			    gen_lowpart (QImode, scratch)));
+    }
+}
+
+/* Return true if a type must be passed in memory. For AAPCS, small aggregates
+   (padded to the size of a word) should be passed in a register.  */
+
+static bool
+arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (TARGET_AAPCS_BASED)
+    return must_pass_in_stack_var_size (mode, type);
+  else
+    return must_pass_in_stack_var_size_or_pad (mode, type);
+}
+
+
+/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
+   Return true if an argument passed on the stack should be padded upwards,
+   i.e. if the least-significant byte has useful data.
+   For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
+   aggregate types are placed in the lowest memory address.  */
+
+bool
+arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
+{
+  if (!TARGET_AAPCS_BASED)
+    return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
+
+  if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
+    return false;
+
+  return true;
+}
+
+
+/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
+   Return !BYTES_BIG_ENDIAN if the least significant byte of the
+   register has useful data, and return the opposite if the most
+   significant byte does.  */
+
+bool
+arm_pad_reg_upward (enum machine_mode mode,
+                    tree type, int first ATTRIBUTE_UNUSED)
+{
+  if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
+    {
+      /* For AAPCS, small aggregates, small fixed-point types,
+	 and small complex types are always padded upwards.  */
+      if (type)
+	{
+	  if ((AGGREGATE_TYPE_P (type)
+	       || TREE_CODE (type) == COMPLEX_TYPE
+	       || FIXED_POINT_TYPE_P (type))
+	      && int_size_in_bytes (type) <= 4)
+	    return true;
+	}
+      else
+	{
+	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
+	      && GET_MODE_SIZE (mode) <= 4)
+	    return true;
+	}
+    }
+
+  /* Otherwise, use default padding.  */
+  return !BYTES_BIG_ENDIAN;
+}
+
+/* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
+   assuming that the address in the base register is word aligned.  */
+bool
+offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
+{
+  HOST_WIDE_INT max_offset;
+
+  /* Offset must be a multiple of 4 in Thumb mode.  */
+  if (TARGET_THUMB2 && ((offset & 3) != 0))
+    return false;
+
+  if (TARGET_THUMB2)
+    max_offset = 1020;
+  else if (TARGET_ARM)
+    max_offset = 255;
+  else
+    return false;
+
+  return ((offset <= max_offset) && (offset >= -max_offset));
+}
+
+/* Checks whether the operands are valid for use in an LDRD/STRD instruction.
+   Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
+   Assumes that the address in the base register RN is word aligned.  Pattern
+   guarantees that both memory accesses use the same base register,
+   the offsets are constants within the range, and the gap between the offsets is 4.
+   If preload complete then check that registers are legal.  WBACK indicates whether
+   address is updated.  LOAD indicates whether memory access is load or store.  */
+bool
+operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
+                       bool wback, bool load)
+{
+  unsigned int t, t2, n;
+
+  if (!reload_completed)
+    return true;
+
+  if (!offset_ok_for_ldrd_strd (offset))
+    return false;
+
+  t = REGNO (rt);
+  t2 = REGNO (rt2);
+  n = REGNO (rn);
+
+  if ((TARGET_THUMB2)
+      && ((wback && (n == t || n == t2))
+          || (t == SP_REGNUM)
+          || (t == PC_REGNUM)
+          || (t2 == SP_REGNUM)
+          || (t2 == PC_REGNUM)
+          || (!load && (n == PC_REGNUM))
+          || (load && (t == t2))
+          /* Triggers Cortex-M3 LDRD errata.  */
+          || (!wback && load && fix_cm3_ldrd && (n == t))))
+    return false;
+
+  if ((TARGET_ARM)
+      && ((wback && (n == t || n == t2))
+          || (t2 == PC_REGNUM)
+          || (t % 2 != 0)   /* First destination register is not even.  */
+          || (t2 != t + 1)
+          /* PC can be used as base register (for offset addressing only),
+             but it is depricated.  */
+          || (n == PC_REGNUM)))
+    return false;
+
+  return true;
+}
+
+/* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
+   operand MEM's address contains an immediate offset from the base
+   register and has no side effects, in which case it sets BASE and
+   OFFSET accordingly.  */
+static bool
+mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
+{
+  rtx addr;
+
+  gcc_assert (base != NULL && offset != NULL);
+
+  /* TODO: Handle more general memory operand patterns, such as
+     PRE_DEC and PRE_INC.  */
+
+  if (side_effects_p (mem))
+    return false;
+
+  /* Can't deal with subregs.  */
+  if (GET_CODE (mem) == SUBREG)
+    return false;
+
+  gcc_assert (MEM_P (mem));
+
+  *offset = const0_rtx;
+
+  addr = XEXP (mem, 0);
+
+  /* If addr isn't valid for DImode, then we can't handle it.  */
+  if (!arm_legitimate_address_p (DImode, addr,
+				 reload_in_progress || reload_completed))
+    return false;
+
+  if (REG_P (addr))
+    {
+      *base = addr;
+      return true;
+    }
+  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+    {
+      *base = XEXP (addr, 0);
+      *offset = XEXP (addr, 1);
+      return (REG_P (*base) && CONST_INT_P (*offset));
+    }
+
+  return false;
+}
+
+#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
+
+/* Called from a peephole2 to replace two word-size accesses with a
+   single LDRD/STRD instruction.  Returns true iff we can generate a
+   new instruction sequence.  That is, both accesses use the same base
+   register and the gap between constant offsets is 4.  This function
+   may reorder its operands to match ldrd/strd RTL templates.
+   OPERANDS are the operands found by the peephole matcher;
+   OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
+   corresponding memory operands.  LOAD indicaates whether the access
+   is load or store.  CONST_STORE indicates a store of constant
+   integer values held in OPERANDS[4,5] and assumes that the pattern
+   is of length 4 insn, for the purpose of checking dead registers.
+   COMMUTE indicates that register operands may be reordered.  */
+bool
+gen_operands_ldrd_strd (rtx *operands, bool load,
+                        bool const_store, bool commute)
+{
+  int nops = 2;
+  HOST_WIDE_INT offsets[2], offset;
+  rtx base = NULL_RTX;
+  rtx cur_base, cur_offset, tmp;
+  int i, gap;
+  HARD_REG_SET regset;
+
+  gcc_assert (!const_store || !load);
+  /* Check that the memory references are immediate offsets from the
+     same base register.  Extract the base register, the destination
+     registers, and the corresponding memory offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
+        return false;
+
+      if (i == 0)
+        base = cur_base;
+      else if (REGNO (base) != REGNO (cur_base))
+        return false;
+
+      offsets[i] = INTVAL (cur_offset);
+      if (GET_CODE (operands[i]) == SUBREG)
+        {
+          tmp = SUBREG_REG (operands[i]);
+          gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
+          operands[i] = tmp;
+        }
+    }
+
+  /* Make sure there is no dependency between the individual loads.  */
+  if (load && REGNO (operands[0]) == REGNO (base))
+    return false; /* RAW */
+
+  if (load && REGNO (operands[0]) == REGNO (operands[1]))
+    return false; /* WAW */
+
+  /* If the same input register is used in both stores
+     when storing different constants, try to find a free register.
+     For example, the code
+        mov r0, 0
+        str r0, [r2]
+        mov r0, 1
+        str r0, [r2, #4]
+     can be transformed into
+        mov r1, 0
+        strd r1, r0, [r2]
+     in Thumb mode assuming that r1 is free.  */
+  if (const_store
+      && REGNO (operands[0]) == REGNO (operands[1])
+      && INTVAL (operands[4]) != INTVAL (operands[5]))
+    {
+    if (TARGET_THUMB2)
+      {
+        CLEAR_HARD_REG_SET (regset);
+        tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
+        if (tmp == NULL_RTX)
+          return false;
+
+        /* Use the new register in the first load to ensure that
+           if the original input register is not dead after peephole,
+           then it will have the correct constant value.  */
+        operands[0] = tmp;
+      }
+    else if (TARGET_ARM)
+      {
+        return false;
+        int regno = REGNO (operands[0]);
+        if (!peep2_reg_dead_p (4, operands[0]))
+          {
+            /* When the input register is even and is not dead after the
+               pattern, it has to hold the second constant but we cannot
+               form a legal STRD in ARM mode with this register as the second
+               register.  */
+            if (regno % 2 == 0)
+              return false;
+
+            /* Is regno-1 free? */
+            SET_HARD_REG_SET (regset);
+            CLEAR_HARD_REG_BIT(regset, regno - 1);
+            tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
+            if (tmp == NULL_RTX)
+              return false;
+
+            operands[0] = tmp;
+          }
+        else
+          {
+            /* Find a DImode register.  */
+            CLEAR_HARD_REG_SET (regset);
+            tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
+            if (tmp != NULL_RTX)
+              {
+                operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
+                operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
+              }
+            else
+              {
+                /* Can we use the input register to form a DI register?  */
+                SET_HARD_REG_SET (regset);
+                CLEAR_HARD_REG_BIT(regset,
+                                   regno % 2 == 0 ? regno + 1 : regno - 1);
+                tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
+                if (tmp == NULL_RTX)
+                  return false;
+                operands[regno % 2 == 1 ? 0 : 1] = tmp;
+              }
+          }
+
+        gcc_assert (operands[0] != NULL_RTX);
+        gcc_assert (operands[1] != NULL_RTX);
+        gcc_assert (REGNO (operands[0]) % 2 == 0);
+        gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
+      }
+    }
+
+  /* Make sure the instructions are ordered with lower memory access first.  */
+  if (offsets[0] > offsets[1])
+    {
+      gap = offsets[0] - offsets[1];
+      offset = offsets[1];
+
+      /* Swap the instructions such that lower memory is accessed first.  */
+      SWAP_RTX (operands[0], operands[1]);
+      SWAP_RTX (operands[2], operands[3]);
+      if (const_store)
+        SWAP_RTX (operands[4], operands[5]);
+    }
+  else
+    {
+      gap = offsets[1] - offsets[0];
+      offset = offsets[0];
+    }
+
+  /* Make sure accesses are to consecutive memory locations.  */
+  if (gap != 4)
+    return false;
+
+  /* Make sure we generate legal instructions.  */
+  if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+                             false, load))
+    return true;
+
+  /* In Thumb state, where registers are almost unconstrained, there
+     is little hope to fix it.  */
+  if (TARGET_THUMB2)
+    return false;
+
+  if (load && commute)
+    {
+      /* Try reordering registers.  */
+      SWAP_RTX (operands[0], operands[1]);
+      if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+                                 false, load))
+        return true;
+    }
+
+  if (const_store)
+    {
+      /* If input registers are dead after this pattern, they can be
+         reordered or replaced by other registers that are free in the
+         current pattern.  */
+      if (!peep2_reg_dead_p (4, operands[0])
+          || !peep2_reg_dead_p (4, operands[1]))
+        return false;
+
+      /* Try to reorder the input registers.  */
+      /* For example, the code
+           mov r0, 0
+           mov r1, 1
+           str r1, [r2]
+           str r0, [r2, #4]
+         can be transformed into
+           mov r1, 0
+           mov r0, 1
+           strd r0, [r2]
+      */
+      if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
+                                  false, false))
+        {
+          SWAP_RTX (operands[0], operands[1]);
+          return true;
+        }
+
+      /* Try to find a free DI register.  */
+      CLEAR_HARD_REG_SET (regset);
+      add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
+      add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
+      while (true)
+        {
+          tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
+          if (tmp == NULL_RTX)
+            return false;
+
+          /* DREG must be an even-numbered register in DImode.
+             Split it into SI registers.  */
+          operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
+          operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
+          gcc_assert (operands[0] != NULL_RTX);
+          gcc_assert (operands[1] != NULL_RTX);
+          gcc_assert (REGNO (operands[0]) % 2 == 0);
+          gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
+
+          return (operands_ok_ldrd_strd (operands[0], operands[1],
+                                         base, offset,
+                                         false, load));
+        }
+    }
+
+  return false;
+}
+#undef SWAP_RTX
+
+
+
+
+/* Print a symbolic form of X to the debug file, F.  */
+static void
+arm_print_value (FILE *f, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      return;
+
+    case CONST_DOUBLE:
+      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
+      return;
+
+    case CONST_VECTOR:
+      {
+	int i;
+
+	fprintf (f, "<");
+	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
+	  {
+	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
+	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
+	      fputc (',', f);
+	  }
+	fprintf (f, ">");
+      }
+      return;
+
+    case CONST_STRING:
+      fprintf (f, "\"%s\"", XSTR (x, 0));
+      return;
+
+    case SYMBOL_REF:
+      fprintf (f, "`%s'", XSTR (x, 0));
+      return;
+
+    case LABEL_REF:
+      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
+      return;
+
+    case CONST:
+      arm_print_value (f, XEXP (x, 0));
+      return;
+
+    case PLUS:
+      arm_print_value (f, XEXP (x, 0));
+      fprintf (f, "+");
+      arm_print_value (f, XEXP (x, 1));
+      return;
+
+    case PC:
+      fprintf (f, "pc");
+      return;
+
+    default:
+      fprintf (f, "????");
+      return;
+    }
+}
+
+/* Routines for manipulation of the constant pool.  */
+
+/* Arm instructions cannot load a large constant directly into a
+   register; they have to come from a pc relative load.  The constant
+   must therefore be placed in the addressable range of the pc
+   relative load.  Depending on the precise pc relative load
+   instruction the range is somewhere between 256 bytes and 4k.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow
+   things down and make the code larger.
+
+   Normally we can hide the table after an existing unconditional
+   branch so that there is no interruption of the flow, but in the
+   worst case the code looks like this:
+
+	ldr	rn, L1
+	...
+	b	L2
+	align
+	L1:	.long value
+	L2:
+	...
+
+	ldr	rn, L3
+	...
+	b	L4
+	align
+	L3:	.long value
+	L4:
+	...
+
+   We fix this by performing a scan after scheduling, which notices
+   which instructions need to have their operands fetched from the
+   constant table and builds the table.
+
+   The algorithm starts by building a table of all the constants that
+   need fixing up and all the natural barriers in the function (places
+   where a constant table can be dropped without breaking the flow).
+   For each fixup we note how far the pc-relative replacement will be
+   able to reach and the offset of the instruction into the function.
+
+   Having built the table we then group the fixes together to form
+   tables that are as large as possible (subject to addressing
+   constraints) and emit each table of constants after the last
+   barrier that is within range of all the instructions in the group.
+   If a group does not contain a barrier, then we forcibly create one
+   by inserting a jump instruction into the flow.  Once the table has
+   been inserted, the insns are then modified to reference the
+   relevant entry in the pool.
+
+   Possible enhancements to the algorithm (not implemented) are:
+
+   1) For some processors and object formats, there may be benefit in
+   aligning the pools to the start of cache lines; this alignment
+   would need to be taken into account when calculating addressability
+   of a pool.  */
+
+/* These typedefs are located at the start of this file, so that
+   they can be used in the prototypes there.  This comment is to
+   remind readers of that fact so that the following structures
+   can be understood more easily.
+
+     typedef struct minipool_node    Mnode;
+     typedef struct minipool_fixup   Mfix;  */
+
+struct minipool_node
+{
+  /* Doubly linked chain of entries.  */
+  Mnode * next;
+  Mnode * prev;
+  /* The maximum offset into the code that this entry can be placed.  While
+     pushing fixes for forward references, all entries are sorted in order
+     of increasing max_address.  */
+  HOST_WIDE_INT max_address;
+  /* Similarly for an entry inserted for a backwards ref.  */
+  HOST_WIDE_INT min_address;
+  /* The number of fixes referencing this entry.  This can become zero
+     if we "unpush" an entry.  In this case we ignore the entry when we
+     come to emit the code.  */
+  int refcount;
+  /* The offset from the start of the minipool.  */
+  HOST_WIDE_INT offset;
+  /* The value in table.  */
+  rtx value;
+  /* The mode of value.  */
+  enum machine_mode mode;
+  /* The size of the value.  With iWMMXt enabled
+     sizes > 4 also imply an alignment of 8-bytes.  */
+  int fix_size;
+};
+
+struct minipool_fixup
+{
+  Mfix *            next;
+  rtx               insn;
+  HOST_WIDE_INT     address;
+  rtx *             loc;
+  enum machine_mode mode;
+  int               fix_size;
+  rtx               value;
+  Mnode *           minipool;
+  HOST_WIDE_INT     forwards;
+  HOST_WIDE_INT     backwards;
+};
+
+/* Fixes less than a word need padding out to a word boundary.  */
+#define MINIPOOL_FIX_SIZE(mode) \
+  (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
+
+static Mnode *	minipool_vector_head;
+static Mnode *	minipool_vector_tail;
+static rtx	minipool_vector_label;
+static int	minipool_pad;
+
+/* The linked list of all minipool fixes required for this function.  */
+Mfix * 		minipool_fix_head;
+Mfix * 		minipool_fix_tail;
+/* The fix entry for the current minipool, once it has been placed.  */
+Mfix *		minipool_barrier;
+
+#ifndef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+#endif
+
+static HOST_WIDE_INT
+get_jump_table_size (rtx insn)
+{
+  /* ADDR_VECs only take room if read-only data does into the text
+     section.  */
+  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
+    {
+      rtx body = PATTERN (insn);
+      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
+      HOST_WIDE_INT size;
+      HOST_WIDE_INT modesize;
+
+      modesize = GET_MODE_SIZE (GET_MODE (body));
+      size = modesize * XVECLEN (body, elt);
+      switch (modesize)
+	{
+	case 1:
+	  /* Round up size  of TBB table to a halfword boundary.  */
+	  size = (size + 1) & ~(HOST_WIDE_INT)1;
+	  break;
+	case 2:
+	  /* No padding necessary for TBH.  */
+	  break;
+	case 4:
+	  /* Add two bytes for alignment on Thumb.  */
+	  if (TARGET_THUMB)
+	    size += 2;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return size;
+    }
+
+  return 0;
+}
+
+/* Return the maximum amount of padding that will be inserted before
+   label LABEL.  */
+
+static HOST_WIDE_INT
+get_label_padding (rtx label)
+{
+  HOST_WIDE_INT align, min_insn_size;
+
+  align = 1 << label_to_alignment (label);
+  min_insn_size = TARGET_THUMB ? 2 : 4;
+  return align > min_insn_size ? align - min_insn_size : 0;
+}
+
+/* Move a minipool fix MP from its current location to before MAX_MP.
+   If MAX_MP is NULL, then MP doesn't need moving, but the addressing
+   constraints may need updating.  */
+static Mnode *
+move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
+			       HOST_WIDE_INT max_address)
+{
+  /* The code below assumes these are different.  */
+  gcc_assert (mp != max_mp);
+
+  if (max_mp == NULL)
+    {
+      if (max_address < mp->max_address)
+	mp->max_address = max_address;
+    }
+  else
+    {
+      if (max_address > max_mp->max_address - mp->fix_size)
+	mp->max_address = max_mp->max_address - mp->fix_size;
+      else
+	mp->max_address = max_address;
+
+      /* Unlink MP from its current position.  Since max_mp is non-null,
+       mp->prev must be non-null.  */
+      mp->prev->next = mp->next;
+      if (mp->next != NULL)
+	mp->next->prev = mp->prev;
+      else
+	minipool_vector_tail = mp->prev;
+
+      /* Re-insert it before MAX_MP.  */
+      mp->next = max_mp;
+      mp->prev = max_mp->prev;
+      max_mp->prev = mp;
+
+      if (mp->prev != NULL)
+	mp->prev->next = mp;
+      else
+	minipool_vector_head = mp;
+    }
+
+  /* Save the new entry.  */
+  max_mp = mp;
+
+  /* Scan over the preceding entries and adjust their addresses as
+     required.  */
+  while (mp->prev != NULL
+	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
+    {
+      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
+      mp = mp->prev;
+    }
+
+  return max_mp;
+}
+
+/* Add a constant to the minipool for a forward reference.  Returns the
+   node added or NULL if the constant will not fit in this pool.  */
+static Mnode *
+add_minipool_forward_ref (Mfix *fix)
+{
+  /* If set, max_mp is the first pool_entry that has a lower
+     constraint than the one we are trying to add.  */
+  Mnode *       max_mp = NULL;
+  HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
+  Mnode *       mp;
+
+  /* If the minipool starts before the end of FIX->INSN then this FIX
+     can not be placed into the current pool.  Furthermore, adding the
+     new constant pool entry may cause the pool to start FIX_SIZE bytes
+     earlier.  */
+  if (minipool_vector_head &&
+      (fix->address + get_attr_length (fix->insn)
+       >= minipool_vector_head->max_address - fix->fix_size))
+    return NULL;
+
+  /* Scan the pool to see if a constant with the same value has
+     already been added.  While we are doing this, also note the
+     location where we must insert the constant if it doesn't already
+     exist.  */
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+	  && fix->mode == mp->mode
+	  && (!LABEL_P (fix->value)
+	      || (CODE_LABEL_NUMBER (fix->value)
+		  == CODE_LABEL_NUMBER (mp->value)))
+	  && rtx_equal_p (fix->value, mp->value))
+	{
+	  /* More than one fix references this entry.  */
+	  mp->refcount++;
+	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
+	}
+
+      /* Note the insertion point if necessary.  */
+      if (max_mp == NULL
+	  && mp->max_address > max_address)
+	max_mp = mp;
+
+      /* If we are inserting an 8-bytes aligned quantity and
+	 we have not already found an insertion point, then
+	 make sure that all such 8-byte aligned quantities are
+	 placed at the start of the pool.  */
+      if (ARM_DOUBLEWORD_ALIGN
+	  && max_mp == NULL
+	  && fix->fix_size >= 8
+	  && mp->fix_size < 8)
+	{
+	  max_mp = mp;
+	  max_address = mp->max_address;
+	}
+    }
+
+  /* The value is not currently in the minipool, so we need to create
+     a new entry for it.  If MAX_MP is NULL, the entry will be put on
+     the end of the list since the placement is less constrained than
+     any existing entry.  Otherwise, we insert the new fix before
+     MAX_MP and, if necessary, adjust the constraints on the other
+     entries.  */
+  mp = XNEW (Mnode);
+  mp->fix_size = fix->fix_size;
+  mp->mode = fix->mode;
+  mp->value = fix->value;
+  mp->refcount = 1;
+  /* Not yet required for a backwards ref.  */
+  mp->min_address = -65536;
+
+  if (max_mp == NULL)
+    {
+      mp->max_address = max_address;
+      mp->next = NULL;
+      mp->prev = minipool_vector_tail;
+
+      if (mp->prev == NULL)
+	{
+	  minipool_vector_head = mp;
+	  minipool_vector_label = gen_label_rtx ();
+	}
+      else
+	mp->prev->next = mp;
+
+      minipool_vector_tail = mp;
+    }
+  else
+    {
+      if (max_address > max_mp->max_address - mp->fix_size)
+	mp->max_address = max_mp->max_address - mp->fix_size;
+      else
+	mp->max_address = max_address;
+
+      mp->next = max_mp;
+      mp->prev = max_mp->prev;
+      max_mp->prev = mp;
+      if (mp->prev != NULL)
+	mp->prev->next = mp;
+      else
+	minipool_vector_head = mp;
+    }
+
+  /* Save the new entry.  */
+  max_mp = mp;
+
+  /* Scan over the preceding entries and adjust their addresses as
+     required.  */
+  while (mp->prev != NULL
+	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
+    {
+      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
+      mp = mp->prev;
+    }
+
+  return max_mp;
+}
+
+static Mnode *
+move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
+				HOST_WIDE_INT  min_address)
+{
+  HOST_WIDE_INT offset;
+
+  /* The code below assumes these are different.  */
+  gcc_assert (mp != min_mp);
+
+  if (min_mp == NULL)
+    {
+      if (min_address > mp->min_address)
+	mp->min_address = min_address;
+    }
+  else
+    {
+      /* We will adjust this below if it is too loose.  */
+      mp->min_address = min_address;
+
+      /* Unlink MP from its current position.  Since min_mp is non-null,
+	 mp->next must be non-null.  */
+      mp->next->prev = mp->prev;
+      if (mp->prev != NULL)
+	mp->prev->next = mp->next;
+      else
+	minipool_vector_head = mp->next;
+
+      /* Reinsert it after MIN_MP.  */
+      mp->prev = min_mp;
+      mp->next = min_mp->next;
+      min_mp->next = mp;
+      if (mp->next != NULL)
+	mp->next->prev = mp;
+      else
+	minipool_vector_tail = mp;
+    }
+
+  min_mp = mp;
+
+  offset = 0;
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      mp->offset = offset;
+      if (mp->refcount > 0)
+	offset += mp->fix_size;
+
+      if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
+	mp->next->min_address = mp->min_address + mp->fix_size;
+    }
+
+  return min_mp;
+}
+
+/* Add a constant to the minipool for a backward reference.  Returns the
+   node added or NULL if the constant will not fit in this pool.
+
+   Note that the code for insertion for a backwards reference can be
+   somewhat confusing because the calculated offsets for each fix do
+   not take into account the size of the pool (which is still under
+   construction.  */
+static Mnode *
+add_minipool_backward_ref (Mfix *fix)
+{
+  /* If set, min_mp is the last pool_entry that has a lower constraint
+     than the one we are trying to add.  */
+  Mnode *min_mp = NULL;
+  /* This can be negative, since it is only a constraint.  */
+  HOST_WIDE_INT  min_address = fix->address - fix->backwards;
+  Mnode *mp;
+
+  /* If we can't reach the current pool from this insn, or if we can't
+     insert this entry at the end of the pool without pushing other
+     fixes out of range, then we don't try.  This ensures that we
+     can't fail later on.  */
+  if (min_address >= minipool_barrier->address
+      || (minipool_vector_tail->min_address + fix->fix_size
+	  >= minipool_barrier->address))
+    return NULL;
+
+  /* Scan the pool to see if a constant with the same value has
+     already been added.  While we are doing this, also note the
+     location where we must insert the constant if it doesn't already
+     exist.  */
+  for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
+    {
+      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+	  && fix->mode == mp->mode
+	  && (!LABEL_P (fix->value)
+	      || (CODE_LABEL_NUMBER (fix->value)
+		  == CODE_LABEL_NUMBER (mp->value)))
+	  && rtx_equal_p (fix->value, mp->value)
+	  /* Check that there is enough slack to move this entry to the
+	     end of the table (this is conservative).  */
+	  && (mp->max_address
+	      > (minipool_barrier->address
+		 + minipool_vector_tail->offset
+		 + minipool_vector_tail->fix_size)))
+	{
+	  mp->refcount++;
+	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
+	}
+
+      if (min_mp != NULL)
+	mp->min_address += fix->fix_size;
+      else
+	{
+	  /* Note the insertion point if necessary.  */
+	  if (mp->min_address < min_address)
+	    {
+	      /* For now, we do not allow the insertion of 8-byte alignment
+		 requiring nodes anywhere but at the start of the pool.  */
+	      if (ARM_DOUBLEWORD_ALIGN
+		  && fix->fix_size >= 8 && mp->fix_size < 8)
+		return NULL;
+	      else
+		min_mp = mp;
+	    }
+	  else if (mp->max_address
+		   < minipool_barrier->address + mp->offset + fix->fix_size)
+	    {
+	      /* Inserting before this entry would push the fix beyond
+		 its maximum address (which can happen if we have
+		 re-located a forwards fix); force the new fix to come
+		 after it.  */
+	      if (ARM_DOUBLEWORD_ALIGN
+		  && fix->fix_size >= 8 && mp->fix_size < 8)
+		return NULL;
+	      else
+		{
+		  min_mp = mp;
+		  min_address = mp->min_address + fix->fix_size;
+		}
+	    }
+	  /* Do not insert a non-8-byte aligned quantity before 8-byte
+	     aligned quantities.  */
+	  else if (ARM_DOUBLEWORD_ALIGN
+		   && fix->fix_size < 8
+		   && mp->fix_size >= 8)
+	    {
+	      min_mp = mp;
+	      min_address = mp->min_address + fix->fix_size;
+	    }
+	}
+    }
+
+  /* We need to create a new entry.  */
+  mp = XNEW (Mnode);
+  mp->fix_size = fix->fix_size;
+  mp->mode = fix->mode;
+  mp->value = fix->value;
+  mp->refcount = 1;
+  mp->max_address = minipool_barrier->address + 65536;
+
+  mp->min_address = min_address;
+
+  if (min_mp == NULL)
+    {
+      mp->prev = NULL;
+      mp->next = minipool_vector_head;
+
+      if (mp->next == NULL)
+	{
+	  minipool_vector_tail = mp;
+	  minipool_vector_label = gen_label_rtx ();
+	}
+      else
+	mp->next->prev = mp;
+
+      minipool_vector_head = mp;
+    }
+  else
+    {
+      mp->next = min_mp->next;
+      mp->prev = min_mp;
+      min_mp->next = mp;
+
+      if (mp->next != NULL)
+	mp->next->prev = mp;
+      else
+	minipool_vector_tail = mp;
+    }
+
+  /* Save the new entry.  */
+  min_mp = mp;
+
+  if (mp->prev)
+    mp = mp->prev;
+  else
+    mp->offset = 0;
+
+  /* Scan over the following entries and adjust their offsets.  */
+  while (mp->next != NULL)
+    {
+      if (mp->next->min_address < mp->min_address + mp->fix_size)
+	mp->next->min_address = mp->min_address + mp->fix_size;
+
+      if (mp->refcount)
+	mp->next->offset = mp->offset + mp->fix_size;
+      else
+	mp->next->offset = mp->offset;
+
+      mp = mp->next;
+    }
+
+  return min_mp;
+}
+
+static void
+assign_minipool_offsets (Mfix *barrier)
+{
+  HOST_WIDE_INT offset = 0;
+  Mnode *mp;
+
+  minipool_barrier = barrier;
+
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      mp->offset = offset;
+
+      if (mp->refcount > 0)
+	offset += mp->fix_size;
+    }
+}
+
+/* Output the literal table */
+static void
+dump_minipool (rtx scan)
+{
+  Mnode * mp;
+  Mnode * nmp;
+  int align64 = 0;
+
+  if (ARM_DOUBLEWORD_ALIGN)
+    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+      if (mp->refcount > 0 && mp->fix_size >= 8)
+	{
+	  align64 = 1;
+	  break;
+	}
+
+  if (dump_file)
+    fprintf (dump_file,
+	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
+	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
+
+  scan = emit_label_after (gen_label_rtx (), scan);
+  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
+  scan = emit_label_after (minipool_vector_label, scan);
+
+  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
+    {
+      if (mp->refcount > 0)
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file,
+		       ";;  Offset %u, min %ld, max %ld ",
+		       (unsigned) mp->offset, (unsigned long) mp->min_address,
+		       (unsigned long) mp->max_address);
+	      arm_print_value (dump_file, mp->value);
+	      fputc ('\n', dump_file);
+	    }
+
+	  switch (mp->fix_size)
+	    {
+#ifdef HAVE_consttable_1
+	    case 1:
+	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_2
+	    case 2:
+	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_4
+	    case 4:
+	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_8
+	    case 8:
+	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_16
+	    case 16:
+              scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
+              break;
+
+#endif
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      nmp = mp->next;
+      free (mp);
+    }
+
+  minipool_vector_head = minipool_vector_tail = NULL;
+  scan = emit_insn_after (gen_consttable_end (), scan);
+  scan = emit_barrier_after (scan);
+}
+
+/* Return the cost of forcibly inserting a barrier after INSN.  */
+static int
+arm_barrier_cost (rtx insn)
+{
+  /* Basing the location of the pool on the loop depth is preferable,
+     but at the moment, the basic block information seems to be
+     corrupt by this stage of the compilation.  */
+  int base_cost = 50;
+  rtx next = next_nonnote_insn (insn);
+
+  if (next != NULL && LABEL_P (next))
+    base_cost -= 20;
+
+  switch (GET_CODE (insn))
+    {
+    case CODE_LABEL:
+      /* It will always be better to place the table before the label, rather
+	 than after it.  */
+      return 50;
+
+    case INSN:
+    case CALL_INSN:
+      return base_cost;
+
+    case JUMP_INSN:
+      return base_cost - 10;
+
+    default:
+      return base_cost + 10;
+    }
+}
+
+/* Find the best place in the insn stream in the range
+   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
+   Create the barrier by inserting a jump and add a new fix entry for
+   it.  */
+static Mfix *
+create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
+{
+  HOST_WIDE_INT count = 0;
+  rtx barrier;
+  rtx from = fix->insn;
+  /* The instruction after which we will insert the jump.  */
+  rtx selected = NULL;
+  int selected_cost;
+  /* The address at which the jump instruction will be placed.  */
+  HOST_WIDE_INT selected_address;
+  Mfix * new_fix;
+  HOST_WIDE_INT max_count = max_address - fix->address;
+  rtx label = gen_label_rtx ();
+
+  selected_cost = arm_barrier_cost (from);
+  selected_address = fix->address;
+
+  while (from && count < max_count)
+    {
+      rtx tmp;
+      int new_cost;
+
+      /* This code shouldn't have been called if there was a natural barrier
+	 within range.  */
+      gcc_assert (!BARRIER_P (from));
+
+      /* Count the length of this insn.  This must stay in sync with the
+	 code that pushes minipool fixes.  */
+      if (LABEL_P (from))
+	count += get_label_padding (from);
+      else
+	count += get_attr_length (from);
+
+      /* If there is a jump table, add its length.  */
+      if (tablejump_p (from, NULL, &tmp))
+	{
+	  count += get_jump_table_size (tmp);
+
+	  /* Jump tables aren't in a basic block, so base the cost on
+	     the dispatch insn.  If we select this location, we will
+	     still put the pool after the table.  */
+	  new_cost = arm_barrier_cost (from);
+
+	  if (count < max_count
+	      && (!selected || new_cost <= selected_cost))
+	    {
+	      selected = tmp;
+	      selected_cost = new_cost;
+	      selected_address = fix->address + count;
+	    }
+
+	  /* Continue after the dispatch table.  */
+	  from = NEXT_INSN (tmp);
+	  continue;
+	}
+
+      new_cost = arm_barrier_cost (from);
+
+      if (count < max_count
+	  && (!selected || new_cost <= selected_cost))
+	{
+	  selected = from;
+	  selected_cost = new_cost;
+	  selected_address = fix->address + count;
+	}
+
+      from = NEXT_INSN (from);
+    }
+
+  /* Make sure that we found a place to insert the jump.  */
+  gcc_assert (selected);
+
+  /* Make sure we do not split a call and its corresponding
+     CALL_ARG_LOCATION note.  */
+  if (CALL_P (selected))
+    {
+      rtx next = NEXT_INSN (selected);
+      if (next && NOTE_P (next)
+	  && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
+	  selected = next;
+    }
+
+  /* Create a new JUMP_INSN that branches around a barrier.  */
+  from = emit_jump_insn_after (gen_jump (label), selected);
+  JUMP_LABEL (from) = label;
+  barrier = emit_barrier_after (from);
+  emit_label_after (label, barrier);
+
+  /* Create a minipool barrier entry for the new barrier.  */
+  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
+  new_fix->insn = barrier;
+  new_fix->address = selected_address;
+  new_fix->next = fix->next;
+  fix->next = new_fix;
+
+  return new_fix;
+}
+
+/* Record that there is a natural barrier in the insn stream at
+   ADDRESS.  */
+static void
+push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
+{
+  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
+
+  fix->insn = insn;
+  fix->address = address;
+
+  fix->next = NULL;
+  if (minipool_fix_head != NULL)
+    minipool_fix_tail->next = fix;
+  else
+    minipool_fix_head = fix;
+
+  minipool_fix_tail = fix;
+}
+
+/* Record INSN, which will need fixing up to load a value from the
+   minipool.  ADDRESS is the offset of the insn since the start of the
+   function; LOC is a pointer to the part of the insn which requires
+   fixing; VALUE is the constant that must be loaded, which is of type
+   MODE.  */
+static void
+push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
+		   enum machine_mode mode, rtx value)
+{
+  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
+
+  fix->insn = insn;
+  fix->address = address;
+  fix->loc = loc;
+  fix->mode = mode;
+  fix->fix_size = MINIPOOL_FIX_SIZE (mode);
+  fix->value = value;
+  fix->forwards = get_attr_pool_range (insn);
+  fix->backwards = get_attr_neg_pool_range (insn);
+  fix->minipool = NULL;
+
+  /* If an insn doesn't have a range defined for it, then it isn't
+     expecting to be reworked by this code.  Better to stop now than
+     to generate duff assembly code.  */
+  gcc_assert (fix->forwards || fix->backwards);
+
+  /* If an entry requires 8-byte alignment then assume all constant pools
+     require 4 bytes of padding.  Trying to do this later on a per-pool
+     basis is awkward because existing pool entries have to be modified.  */
+  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
+    minipool_pad = 4;
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
+	       GET_MODE_NAME (mode),
+	       INSN_UID (insn), (unsigned long) address,
+	       -1 * (long)fix->backwards, (long)fix->forwards);
+      arm_print_value (dump_file, fix->value);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Add it to the chain of fixes.  */
+  fix->next = NULL;
+
+  if (minipool_fix_head != NULL)
+    minipool_fix_tail->next = fix;
+  else
+    minipool_fix_head = fix;
+
+  minipool_fix_tail = fix;
+}
+
+/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
+   Returns the number of insns needed, or 99 if we always want to synthesize
+   the value.  */
+int
+arm_max_const_double_inline_cost ()
+{
+  /* Let the value get synthesized to avoid the use of literal pools.  */
+  if (arm_disable_literal_pool)
+    return 99;
+
+  return ((optimize_size || arm_ld_sched) ? 3 : 4);
+}
+
+/* Return the cost of synthesizing a 64-bit constant VAL inline.
+   Returns the number of insns needed, or 99 if we don't know how to
+   do it.  */
+int
+arm_const_double_inline_cost (rtx val)
+{
+  rtx lowpart, highpart;
+  enum machine_mode mode;
+
+  mode = GET_MODE (val);
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  gcc_assert (GET_MODE_SIZE (mode) == 8);
+
+  lowpart = gen_lowpart (SImode, val);
+  highpart = gen_highpart_mode (SImode, mode, val);
+
+  gcc_assert (CONST_INT_P (lowpart));
+  gcc_assert (CONST_INT_P (highpart));
+
+  return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
+			    NULL_RTX, NULL_RTX, 0, 0)
+	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
+			      NULL_RTX, NULL_RTX, 0, 0));
+}
+
+/* Return true if it is worthwhile to split a 64-bit constant into two
+   32-bit operations.  This is the case if optimizing for size, or
+   if we have load delay slots, or if one 32-bit part can be done with
+   a single data operation.  */
+bool
+arm_const_double_by_parts (rtx val)
+{
+  enum machine_mode mode = GET_MODE (val);
+  rtx part;
+
+  if (optimize_size || arm_ld_sched)
+    return true;
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  part = gen_highpart_mode (SImode, mode, val);
+
+  gcc_assert (CONST_INT_P (part));
+
+  if (const_ok_for_arm (INTVAL (part))
+      || const_ok_for_arm (~INTVAL (part)))
+    return true;
+
+  part = gen_lowpart (SImode, val);
+
+  gcc_assert (CONST_INT_P (part));
+
+  if (const_ok_for_arm (INTVAL (part))
+      || const_ok_for_arm (~INTVAL (part)))
+    return true;
+
+  return false;
+}
+
+/* Return true if it is possible to inline both the high and low parts
+   of a 64-bit constant into 32-bit data processing instructions.  */
+bool
+arm_const_double_by_immediates (rtx val)
+{
+  enum machine_mode mode = GET_MODE (val);
+  rtx part;
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  part = gen_highpart_mode (SImode, mode, val);
+
+  gcc_assert (CONST_INT_P (part));
+
+  if (!const_ok_for_arm (INTVAL (part)))
+    return false;
+
+  part = gen_lowpart (SImode, val);
+
+  gcc_assert (CONST_INT_P (part));
+
+  if (!const_ok_for_arm (INTVAL (part)))
+    return false;
+
+  return true;
+}
+
+/* Scan INSN and note any of its operands that need fixing.
+   If DO_PUSHES is false we do not actually push any of the fixups
+   needed.  */
+static void
+note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
+{
+  int opno;
+
+  extract_insn (insn);
+
+  if (!constrain_operands (1))
+    fatal_insn_not_found (insn);
+
+  if (recog_data.n_alternatives == 0)
+    return;
+
+  /* Fill in recog_op_alt with information about the constraints of
+     this insn.  */
+  preprocess_constraints ();
+
+  for (opno = 0; opno < recog_data.n_operands; opno++)
+    {
+      /* Things we need to fix can only occur in inputs.  */
+      if (recog_data.operand_type[opno] != OP_IN)
+	continue;
+
+      /* If this alternative is a memory reference, then any mention
+	 of constants in this alternative is really to fool reload
+	 into allowing us to accept one there.  We need to fix them up
+	 now so that we output the right code.  */
+      if (recog_op_alt[opno][which_alternative].memory_ok)
+	{
+	  rtx op = recog_data.operand[opno];
+
+	  if (CONSTANT_P (op))
+	    {
+	      if (do_pushes)
+		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
+				   recog_data.operand_mode[opno], op);
+	    }
+	  else if (MEM_P (op)
+		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+	    {
+	      if (do_pushes)
+		{
+		  rtx cop = avoid_constant_pool_reference (op);
+
+		  /* Casting the address of something to a mode narrower
+		     than a word can cause avoid_constant_pool_reference()
+		     to return the pool reference itself.  That's no good to
+		     us here.  Lets just hope that we can use the
+		     constant pool value directly.  */
+		  if (op == cop)
+		    cop = get_pool_constant (XEXP (op, 0));
+
+		  push_minipool_fix (insn, address,
+				     recog_data.operand_loc[opno],
+				     recog_data.operand_mode[opno], cop);
+		}
+
+	    }
+	}
+    }
+
+  return;
+}
+
+/* Rewrite move insn into subtract of 0 if the condition codes will
+   be useful in next conditional jump insn.  */
+
+static void
+thumb1_reorg (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx dest, src;
+      rtx pat, op0, set = NULL;
+      rtx prev, insn = BB_END (bb);
+      bool insn_clobbered = false;
+
+      while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
+	insn = PREV_INSN (insn);
+
+      /* Find the last cbranchsi4_insn in basic block BB.  */
+      if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+	continue;
+
+      /* Get the register with which we are comparing.  */
+      pat = PATTERN (insn);
+      op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
+
+      /* Find the first flag setting insn before INSN in basic block BB.  */
+      gcc_assert (insn != BB_HEAD (bb));
+      for (prev = PREV_INSN (insn);
+	   (!insn_clobbered
+	    && prev != BB_HEAD (bb)
+	    && (NOTE_P (prev)
+		|| DEBUG_INSN_P (prev)
+		|| ((set = single_set (prev)) != NULL
+		    && get_attr_conds (prev) == CONDS_NOCOND)));
+	   prev = PREV_INSN (prev))
+	{
+	  if (reg_set_p (op0, prev))
+	    insn_clobbered = true;
+	}
+
+      /* Skip if op0 is clobbered by insn other than prev. */
+      if (insn_clobbered)
+	continue;
+
+      if (!set)
+	continue;
+
+      dest = SET_DEST (set);
+      src = SET_SRC (set);
+      if (!low_register_operand (dest, SImode)
+	  || !low_register_operand (src, SImode))
+	continue;
+
+      /* Rewrite move into subtract of 0 if its operand is compared with ZERO
+	 in INSN.  Both src and dest of the move insn are checked.  */
+      if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
+	{
+	  dest = copy_rtx (dest);
+	  src = copy_rtx (src);
+	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
+	  PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
+	  INSN_CODE (prev) = -1;
+	  /* Set test register in INSN to dest.  */
+	  XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
+	  INSN_CODE (insn) = -1;
+	}
+    }
+}
+
+/* Convert instructions to their cc-clobbering variant if possible, since
+   that allows us to use smaller encodings.  */
+
+static void
+thumb2_reorg (void)
+{
+  basic_block bb;
+  regset_head live;
+
+  INIT_REG_SET (&live);
+
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+  df_analyze ();
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn;
+
+      COPY_REG_SET (&live, DF_LR_OUT (bb));
+      df_simulate_initialize_backwards (bb, &live);
+      FOR_BB_INSNS_REVERSE (bb, insn)
+	{
+	  if (NONJUMP_INSN_P (insn)
+	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
+	      && GET_CODE (PATTERN (insn)) == SET)
+	    {
+	      enum {SKIP, CONV, SWAP_CONV} action = SKIP;
+	      rtx pat = PATTERN (insn);
+	      rtx dst = XEXP (pat, 0);
+	      rtx src = XEXP (pat, 1);
+	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
+
+	      if (!OBJECT_P (src))
+		  op0 = XEXP (src, 0);
+
+	      if (BINARY_P (src))
+		  op1 = XEXP (src, 1);
+
+	      if (low_register_operand (dst, SImode))
+		{
+		  switch (GET_CODE (src))
+		    {
+		    case PLUS:
+		      /* Adding two registers and storing the result
+			 in the first source is already a 16-bit
+			 operation.  */
+		      if (rtx_equal_p (dst, op0)
+			  && register_operand (op1, SImode))
+			break;
+
+		      if (low_register_operand (op0, SImode))
+			{
+			  /* ADDS <Rd>,<Rn>,<Rm>  */
+			  if (low_register_operand (op1, SImode))
+			    action = CONV;
+			  /* ADDS <Rdn>,#<imm8>  */
+			  /* SUBS <Rdn>,#<imm8>  */
+			  else if (rtx_equal_p (dst, op0)
+				   && CONST_INT_P (op1)
+				   && IN_RANGE (INTVAL (op1), -255, 255))
+			    action = CONV;
+			  /* ADDS <Rd>,<Rn>,#<imm3>  */
+			  /* SUBS <Rd>,<Rn>,#<imm3>  */
+			  else if (CONST_INT_P (op1)
+				   && IN_RANGE (INTVAL (op1), -7, 7))
+			    action = CONV;
+			}
+		      /* ADCS <Rd>, <Rn>  */
+		      else if (GET_CODE (XEXP (src, 0)) == PLUS
+			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
+			      && low_register_operand (XEXP (XEXP (src, 0), 1),
+						       SImode)
+			      && COMPARISON_P (op1)
+			      && cc_register (XEXP (op1, 0), VOIDmode)
+			      && maybe_get_arm_condition_code (op1) == ARM_CS
+			      && XEXP (op1, 1) == const0_rtx)
+		        action = CONV;
+		      break;
+
+		    case MINUS:
+		      /* RSBS <Rd>,<Rn>,#0
+			 Not handled here: see NEG below.  */
+		      /* SUBS <Rd>,<Rn>,#<imm3>
+			 SUBS <Rdn>,#<imm8>
+			 Not handled here: see PLUS above.  */
+		      /* SUBS <Rd>,<Rn>,<Rm>  */
+		      if (low_register_operand (op0, SImode)
+			  && low_register_operand (op1, SImode))
+			    action = CONV;
+		      break;
+
+		    case MULT:
+		      /* MULS <Rdm>,<Rn>,<Rdm>
+			 As an exception to the rule, this is only used
+			 when optimizing for size since MULS is slow on all
+			 known implementations.  We do not even want to use
+			 MULS in cold code, if optimizing for speed, so we
+			 test the global flag here.  */
+		      if (!optimize_size)
+			break;
+		      /* else fall through.  */
+		    case AND:
+		    case IOR:
+		    case XOR:
+		      /* ANDS <Rdn>,<Rm>  */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      else if (rtx_equal_p (dst, op1)
+			       && low_register_operand (op0, SImode))
+			action = SWAP_CONV;
+		      break;
+
+		    case ASHIFTRT:
+		    case ASHIFT:
+		    case LSHIFTRT:
+		      /* ASRS <Rdn>,<Rm> */
+		      /* LSRS <Rdn>,<Rm> */
+		      /* LSLS <Rdn>,<Rm> */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      /* ASRS <Rd>,<Rm>,#<imm5> */
+		      /* LSRS <Rd>,<Rm>,#<imm5> */
+		      /* LSLS <Rd>,<Rm>,#<imm5> */
+		      else if (low_register_operand (op0, SImode)
+			       && CONST_INT_P (op1)
+			       && IN_RANGE (INTVAL (op1), 0, 31))
+			action = CONV;
+		      break;
+
+		    case ROTATERT:
+		      /* RORS <Rdn>,<Rm>  */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      break;
+
+		    case NOT:
+		    case NEG:
+		      /* MVNS <Rd>,<Rm>  */
+		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
+		      if (low_register_operand (op0, SImode))
+			action = CONV;
+		      break;
+
+		    case CONST_INT:
+		      /* MOVS <Rd>,#<imm8>  */
+		      if (CONST_INT_P (src)
+			  && IN_RANGE (INTVAL (src), 0, 255))
+			action = CONV;
+		      break;
+
+		    case REG:
+		      /* MOVS and MOV<c> with registers have different
+			 encodings, so are not relevant here.  */
+		      break;
+
+		    default:
+		      break;
+		    }
+		}
+
+	      if (action != SKIP)
+		{
+		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+		  rtvec vec;
+
+		  if (action == SWAP_CONV)
+		    {
+		      src = copy_rtx (src);
+		      XEXP (src, 0) = op1;
+		      XEXP (src, 1) = op0;
+		      pat = gen_rtx_SET (VOIDmode, dst, src);
+		      vec = gen_rtvec (2, pat, clobber);
+		    }
+		  else /* action == CONV */
+		    vec = gen_rtvec (2, pat, clobber);
+
+		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+		  INSN_CODE (insn) = -1;
+		}
+	    }
+
+	  if (NONDEBUG_INSN_P (insn))
+	    df_simulate_one_insn_backwards (bb, insn, &live);
+	}
+    }
+
+  CLEAR_REG_SET (&live);
+}
+
+/* Gcc puts the pool in the wrong place for ARM, since we can only
+   load addresses a limited distance around the pc.  We do some
+   special munging to move the constant pool values to the correct
+   point in the code.  */
+static void
+arm_reorg (void)
+{
+  rtx insn;
+  HOST_WIDE_INT address = 0;
+  Mfix * fix;
+
+  if (TARGET_THUMB1)
+    thumb1_reorg ();
+  else if (TARGET_THUMB2)
+    thumb2_reorg ();
+
+  /* Ensure all insns that must be split have been split at this point.
+     Otherwise, the pool placement code below may compute incorrect
+     insn lengths.  Note that when optimizing, all insns have already
+     been split at this point.  */
+  if (!optimize)
+    split_all_insns_noflow ();
+
+  minipool_fix_head = minipool_fix_tail = NULL;
+
+  /* The first insn must always be a note, or the code below won't
+     scan it properly.  */
+  insn = get_insns ();
+  gcc_assert (NOTE_P (insn));
+  minipool_pad = 0;
+
+  /* Scan all the insns and record the operands that will need fixing.  */
+  for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
+    {
+      if (BARRIER_P (insn))
+	push_minipool_barrier (insn, address);
+      else if (INSN_P (insn))
+	{
+	  rtx table;
+
+	  note_invalid_constants (insn, address, true);
+	  address += get_attr_length (insn);
+
+	  /* If the insn is a vector jump, add the size of the table
+	     and skip the table.  */
+	  if (tablejump_p (insn, NULL, &table))
+	    {
+	      address += get_jump_table_size (table);
+	      insn = table;
+	    }
+	}
+      else if (LABEL_P (insn))
+	/* Add the worst-case padding due to alignment.  We don't add
+	   the _current_ padding because the minipool insertions
+	   themselves might change it.  */
+	address += get_label_padding (insn);
+    }
+
+  fix = minipool_fix_head;
+
+  /* Now scan the fixups and perform the required changes.  */
+  while (fix)
+    {
+      Mfix * ftmp;
+      Mfix * fdel;
+      Mfix *  last_added_fix;
+      Mfix * last_barrier = NULL;
+      Mfix * this_fix;
+
+      /* Skip any further barriers before the next fix.  */
+      while (fix && BARRIER_P (fix->insn))
+	fix = fix->next;
+
+      /* No more fixes.  */
+      if (fix == NULL)
+	break;
+
+      last_added_fix = NULL;
+
+      for (ftmp = fix; ftmp; ftmp = ftmp->next)
+	{
+	  if (BARRIER_P (ftmp->insn))
+	    {
+	      if (ftmp->address >= minipool_vector_head->max_address)
+		break;
+
+	      last_barrier = ftmp;
+	    }
+	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
+	    break;
+
+	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
+	}
+
+      /* If we found a barrier, drop back to that; any fixes that we
+	 could have reached but come after the barrier will now go in
+	 the next mini-pool.  */
+      if (last_barrier != NULL)
+	{
+	  /* Reduce the refcount for those fixes that won't go into this
+	     pool after all.  */
+	  for (fdel = last_barrier->next;
+	       fdel && fdel != ftmp;
+	       fdel = fdel->next)
+	    {
+	      fdel->minipool->refcount--;
+	      fdel->minipool = NULL;
+	    }
+
+	  ftmp = last_barrier;
+	}
+      else
+        {
+	  /* ftmp is first fix that we can't fit into this pool and
+	     there no natural barriers that we could use.  Insert a
+	     new barrier in the code somewhere between the previous
+	     fix and this one, and arrange to jump around it.  */
+	  HOST_WIDE_INT max_address;
+
+	  /* The last item on the list of fixes must be a barrier, so
+	     we can never run off the end of the list of fixes without
+	     last_barrier being set.  */
+	  gcc_assert (ftmp);
+
+	  max_address = minipool_vector_head->max_address;
+	  /* Check that there isn't another fix that is in range that
+	     we couldn't fit into this pool because the pool was
+	     already too large: we need to put the pool before such an
+	     instruction.  The pool itself may come just after the
+	     fix because create_fix_barrier also allows space for a
+	     jump instruction.  */
+	  if (ftmp->address < max_address)
+	    max_address = ftmp->address + 1;
+
+	  last_barrier = create_fix_barrier (last_added_fix, max_address);
+	}
+
+      assign_minipool_offsets (last_barrier);
+
+      while (ftmp)
+	{
+	  if (!BARRIER_P (ftmp->insn)
+	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
+		  == NULL))
+	    break;
+
+	  ftmp = ftmp->next;
+	}
+
+      /* Scan over the fixes we have identified for this pool, fixing them
+	 up and adding the constants to the pool itself.  */
+      for (this_fix = fix; this_fix && ftmp != this_fix;
+	   this_fix = this_fix->next)
+	if (!BARRIER_P (this_fix->insn))
+	  {
+	    rtx addr
+	      = plus_constant (Pmode,
+			       gen_rtx_LABEL_REF (VOIDmode,
+						  minipool_vector_label),
+			       this_fix->minipool->offset);
+	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
+	  }
+
+      dump_minipool (last_barrier->insn);
+      fix = ftmp;
+    }
+
+  /* From now on we must synthesize any constants that we can't handle
+     directly.  This can happen if the RTL gets split during final
+     instruction generation.  */
+  after_arm_reorg = 1;
+
+  /* Free the minipool memory.  */
+  obstack_free (&minipool_obstack, minipool_startobj);
+}
+
+/* Routines to output assembly language.  */
+
+/* If the rtx is the correct value then return the string of the number.
+   In this way we can ensure that valid double constants are generated even
+   when cross compiling.  */
+const char *
+fp_immediate_constant (rtx x)
+{
+  REAL_VALUE_TYPE r;
+
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
+  return "0";
+}
+
+/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
+static const char *
+fp_const_from_val (REAL_VALUE_TYPE *r)
+{
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
+  return "0";
+}
+
+/* OPERANDS[0] is the entire list of insns that constitute pop,
+   OPERANDS[1] is the base register, RETURN_PC is true iff return insn
+   is in the list, UPDATE is true iff the list contains explicit
+   update of base register.  */
+void
+arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
+                         bool update)
+{
+  int i;
+  char pattern[100];
+  int offset;
+  const char *conditional;
+  int num_saves = XVECLEN (operands[0], 0);
+  unsigned int regno;
+  unsigned int regno_base = REGNO (operands[1]);
+
+  offset = 0;
+  offset += update ? 1 : 0;
+  offset += return_pc ? 1 : 0;
+
+  /* Is the base register in the list?  */
+  for (i = offset; i < num_saves; i++)
+    {
+      regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
+      /* If SP is in the list, then the base register must be SP.  */
+      gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
+      /* If base register is in the list, there must be no explicit update.  */
+      if (regno == regno_base)
+        gcc_assert (!update);
+    }
+
+  conditional = reverse ? "%?%D0" : "%?%d0";
+  if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
+    {
+      /* Output pop (not stmfd) because it has a shorter encoding.  */
+      gcc_assert (update);
+      sprintf (pattern, "pop%s\t{", conditional);
+    }
+  else
+    {
+      /* Output ldmfd when the base register is SP, otherwise output ldmia.
+         It's just a convention, their semantics are identical.  */
+      if (regno_base == SP_REGNUM)
+        sprintf (pattern, "ldm%sfd\t", conditional);
+      else if (TARGET_UNIFIED_ASM)
+        sprintf (pattern, "ldmia%s\t", conditional);
+      else
+        sprintf (pattern, "ldm%sia\t", conditional);
+
+      strcat (pattern, reg_names[regno_base]);
+      if (update)
+        strcat (pattern, "!, {");
+      else
+        strcat (pattern, ", {");
+    }
+
+  /* Output the first destination register.  */
+  strcat (pattern,
+          reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
+
+  /* Output the rest of the destination registers.  */
+  for (i = offset + 1; i < num_saves; i++)
+    {
+      strcat (pattern, ", ");
+      strcat (pattern,
+              reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+    }
+
+  strcat (pattern, "}");
+
+  if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
+    strcat (pattern, "^");
+
+  output_asm_insn (pattern, &cond);
+}
+
+
+/* Output the assembly for a store multiple.  */
+
+const char *
+vfp_output_fstmd (rtx * operands)
+{
+  char pattern[100];
+  int p;
+  int base;
+  int i;
+
+  strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
+  p = strlen (pattern);
+
+  gcc_assert (REG_P (operands[1]));
+
+  base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
+  for (i = 1; i < XVECLEN (operands[2], 0); i++)
+    {
+      p += sprintf (&pattern[p], ", d%d", base + i);
+    }
+  strcpy (&pattern[p], "}");
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+
+/* Emit RTL to save block of VFP register pairs to the stack.  Returns the
+   number of bytes pushed.  */
+
+static int
+vfp_emit_fstmd (int base_reg, int count)
+{
+  rtx par;
+  rtx dwarf;
+  rtx tmp, reg;
+  int i;
+
+  /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
+     register pairs are stored by a store multiple insn.  We avoid this
+     by pushing an extra pair.  */
+  if (count == 2 && !arm_arch6)
+    {
+      if (base_reg == LAST_VFP_REGNUM - 3)
+	base_reg -= 2;
+      count++;
+    }
+
+  /* FSTMD may not store more than 16 doubleword registers at once.  Split
+     larger stores into multiple parts (up to a maximum of two, in
+     practice).  */
+  if (count > 16)
+    {
+      int saved;
+      /* NOTE: base_reg is an internal register number, so each D register
+         counts as 2.  */
+      saved = vfp_emit_fstmd (base_reg + 32, count - 16);
+      saved += vfp_emit_fstmd (base_reg, 16);
+      return saved;
+    }
+
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+
+  reg = gen_rtx_REG (DFmode, base_reg);
+  base_reg += 2;
+
+  XVECEXP (par, 0, 0)
+    = gen_rtx_SET (VOIDmode,
+		   gen_frame_mem
+		   (BLKmode,
+		    gen_rtx_PRE_MODIFY (Pmode,
+					stack_pointer_rtx,
+					plus_constant
+					(Pmode, stack_pointer_rtx,
+					 - (count * 8)))
+		    ),
+		   gen_rtx_UNSPEC (BLKmode,
+				   gen_rtvec (1, reg),
+				   UNSPEC_PUSH_MULT));
+
+  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  tmp = gen_rtx_SET (VOIDmode,
+		     gen_frame_mem (DFmode, stack_pointer_rtx),
+		     reg);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 1) = tmp;
+
+  for (i = 1; i < count; i++)
+    {
+      reg = gen_rtx_REG (DFmode, base_reg);
+      base_reg += 2;
+      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
+
+      tmp = gen_rtx_SET (VOIDmode,
+			 gen_frame_mem (DFmode,
+					plus_constant (Pmode,
+						       stack_pointer_rtx,
+						       i * 8)),
+			 reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (dwarf, 0, i + 1) = tmp;
+    }
+
+  par = emit_insn (par);
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+  RTX_FRAME_RELATED_P (par) = 1;
+
+  return count * 8;
+}
+
+/* Emit a call instruction with pattern PAT.  ADDR is the address of
+   the call target.  */
+
+void
+arm_emit_call_insn (rtx pat, rtx addr)
+{
+  rtx insn;
+
+  insn = emit_call_insn (pat);
+
+  /* The PIC register is live on entry to VxWorks PIC PLT entries.
+     If the call might use such an entry, add a use of the PIC register
+     to the instruction's CALL_INSN_FUNCTION_USAGE.  */
+  if (TARGET_VXWORKS_RTP
+      && flag_pic
+      && GET_CODE (addr) == SYMBOL_REF
+      && (SYMBOL_REF_DECL (addr)
+	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
+	  : !SYMBOL_REF_LOCAL_P (addr)))
+    {
+      require_pic_register ();
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
+    }
+}
+
+/* Output a 'call' insn.  */
+const char *
+output_call (rtx *operands)
+{
+  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
+
+  /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
+  if (REGNO (operands[0]) == LR_REGNUM)
+    {
+      operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
+      output_asm_insn ("mov%?\t%0, %|lr", operands);
+    }
+
+  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+
+  if (TARGET_INTERWORK || arm_arch4t)
+    output_asm_insn ("bx%?\t%0", operands);
+  else
+    output_asm_insn ("mov%?\t%|pc, %0", operands);
+
+  return "";
+}
+
+/* Output a 'call' insn that is a reference in memory. This is
+   disabled for ARMv5 and we prefer a blx instead because otherwise
+   there's a significant performance overhead.  */
+const char *
+output_call_mem (rtx *operands)
+{
+  gcc_assert (!arm_arch5);
+  if (TARGET_INTERWORK)
+    {
+      output_asm_insn ("ldr%?\t%|ip, %0", operands);
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      output_asm_insn ("bx%?\t%|ip", operands);
+    }
+  else if (regno_use_in (LR_REGNUM, operands[0]))
+    {
+      /* LR is used in the memory address.  We load the address in the
+	 first instruction.  It's safe to use IP as the target of the
+	 load since the call will kill it anyway.  */
+      output_asm_insn ("ldr%?\t%|ip, %0", operands);
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      if (arm_arch4t)
+	output_asm_insn ("bx%?\t%|ip", operands);
+      else
+	output_asm_insn ("mov%?\t%|pc, %|ip", operands);
+    }
+  else
+    {
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      output_asm_insn ("ldr%?\t%|pc, %0", operands);
+    }
+
+  return "";
+}
+
+
+/* Output a move from arm registers to arm registers of a long double
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.  */
+const char *
+output_mov_long_double_arm_from_arm (rtx *operands)
+{
+  /* We have to be careful here because the two might overlap.  */
+  int dest_start = REGNO (operands[0]);
+  int src_start = REGNO (operands[1]);
+  rtx ops[2];
+  int i;
+
+  if (dest_start < src_start)
+    {
+      for (i = 0; i < 3; i++)
+	{
+	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
+	  ops[1] = gen_rtx_REG (SImode, src_start + i);
+	  output_asm_insn ("mov%?\t%0, %1", ops);
+	}
+    }
+  else
+    {
+      for (i = 2; i >= 0; i--)
+	{
+	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
+	  ops[1] = gen_rtx_REG (SImode, src_start + i);
+	  output_asm_insn ("mov%?\t%0, %1", ops);
+	}
+    }
+
+  return "";
+}
+
+void
+arm_emit_movpair (rtx dest, rtx src)
+ {
+  /* If the src is an immediate, simplify it.  */
+  if (CONST_INT_P (src))
+    {
+      HOST_WIDE_INT val = INTVAL (src);
+      emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
+      if ((val >> 16) & 0x0000ffff)
+        emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
+                                             GEN_INT (16)),
+                       GEN_INT ((val >> 16) & 0x0000ffff));
+      return;
+    }
+   emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+   emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+ }
+
+/* Output a move between double words.  It must be REG<-MEM
+   or MEM<-REG.  */
+const char *
+output_move_double (rtx *operands, bool emit, int *count)
+{
+  enum rtx_code code0 = GET_CODE (operands[0]);
+  enum rtx_code code1 = GET_CODE (operands[1]);
+  rtx otherops[3];
+  if (count)
+    *count = 1;
+
+  /* The only case when this might happen is when
+     you are looking at the length of a DImode instruction
+     that has an invalid constant in it.  */
+  if (code0 == REG && code1 != MEM)
+    {
+      gcc_assert (!emit);
+      *count = 2;
+      return "";
+    }
+
+  if (code0 == REG)
+    {
+      unsigned int reg0 = REGNO (operands[0]);
+
+      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
+
+      gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
+
+      switch (GET_CODE (XEXP (operands[1], 0)))
+	{
+	case REG:
+
+	  if (emit)
+	    {
+	      if (TARGET_LDRD
+		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
+		output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
+	      else
+		output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+	    }
+	  break;
+
+	case PRE_INC:
+	  gcc_assert (TARGET_LDRD);
+	  if (emit)
+	    output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
+	  break;
+
+	case PRE_DEC:
+	  if (emit)
+	    {
+	      if (TARGET_LDRD)
+		output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
+	      else
+		output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
+	    }
+	  break;
+
+	case POST_INC:
+	  if (emit)
+	    {
+	      if (TARGET_LDRD)
+		output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
+	      else
+		output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
+	    }
+	  break;
+
+	case POST_DEC:
+	  gcc_assert (TARGET_LDRD);
+	  if (emit)
+	    output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
+	  break;
+
+	case PRE_MODIFY:
+	case POST_MODIFY:
+	  /* Autoicrement addressing modes should never have overlapping
+	     base and destination registers, and overlapping index registers
+	     are already prohibited, so this doesn't need to worry about
+	     fix_cm3_ldrd.  */
+	  otherops[0] = operands[0];
+	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
+	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
+
+	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
+	    {
+	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
+		{
+		  /* Registers overlap so split out the increment.  */
+		  if (emit)
+		    {
+		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
+		      output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
+		    }
+		  if (count)
+		    *count = 2;
+		}
+	      else
+		{
+		  /* Use a single insn if we can.
+		     FIXME: IWMMXT allows offsets larger than ldrd can
+		     handle, fix these up with a pair of ldr.  */
+		  if (TARGET_THUMB2
+		      || !CONST_INT_P (otherops[2])
+		      || (INTVAL (otherops[2]) > -256
+			  && INTVAL (otherops[2]) < 256))
+		    {
+		      if (emit)
+			output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
+		    }
+		  else
+		    {
+		      if (emit)
+			{
+			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
+			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+			}
+		      if (count)
+			*count = 2;
+
+		    }
+		}
+	    }
+	  else
+	    {
+	      /* Use a single insn if we can.
+		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
+		 fix these up with a pair of ldr.  */
+	      if (TARGET_THUMB2
+		  || !CONST_INT_P (otherops[2])
+		  || (INTVAL (otherops[2]) > -256
+		      && INTVAL (otherops[2]) < 256))
+		{
+		  if (emit)
+		    output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
+		}
+	      else
+		{
+		  if (emit)
+		    {
+		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
+		    }
+		  if (count)
+		    *count = 2;
+		}
+	    }
+	  break;
+
+	case LABEL_REF:
+	case CONST:
+	  /* We might be able to use ldrd %0, %1 here.  However the range is
+	     different to ldr/adr, and it is broken on some ARMv7-M
+	     implementations.  */
+	  /* Use the second register of the pair to avoid problematic
+	     overlap.  */
+	  otherops[1] = operands[1];
+	  if (emit)
+	    output_asm_insn ("adr%?\t%0, %1", otherops);
+	  operands[1] = otherops[0];
+	  if (emit)
+	    {
+	      if (TARGET_LDRD)
+		output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+	      else
+		output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
+	    }
+
+	  if (count)
+	    *count = 2;
+	  break;
+
+	  /* ??? This needs checking for thumb2.  */
+	default:
+	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
+			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
+	    {
+	      otherops[0] = operands[0];
+	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
+	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
+
+	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
+		{
+		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
+		    {
+		      switch ((int) INTVAL (otherops[2]))
+			{
+			case -8:
+			  if (emit)
+			    output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
+			  return "";
+			case -4:
+			  if (TARGET_THUMB2)
+			    break;
+			  if (emit)
+			    output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
+			  return "";
+			case 4:
+			  if (TARGET_THUMB2)
+			    break;
+			  if (emit)
+			    output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
+			  return "";
+			}
+		    }
+		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
+		  operands[1] = otherops[0];
+		  if (TARGET_LDRD
+		      && (REG_P (otherops[2])
+			  || TARGET_THUMB2
+			  || (CONST_INT_P (otherops[2])
+			      && INTVAL (otherops[2]) > -256
+			      && INTVAL (otherops[2]) < 256)))
+		    {
+		      if (reg_overlap_mentioned_p (operands[0],
+						   otherops[2]))
+			{
+			  rtx tmp;
+			  /* Swap base and index registers over to
+			     avoid a conflict.  */
+			  tmp = otherops[1];
+			  otherops[1] = otherops[2];
+			  otherops[2] = tmp;
+			}
+		      /* If both registers conflict, it will usually
+			 have been fixed by a splitter.  */
+		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
+			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
+			{
+			  if (emit)
+			    {
+			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
+			      output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+			    }
+			  if (count)
+			    *count = 2;
+			}
+		      else
+			{
+			  otherops[0] = operands[0];
+			  if (emit)
+			    output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
+			}
+		      return "";
+		    }
+
+		  if (CONST_INT_P (otherops[2]))
+		    {
+		      if (emit)
+			{
+			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
+			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
+			  else
+			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
+			}
+		    }
+		  else
+		    {
+		      if (emit)
+			output_asm_insn ("add%?\t%0, %1, %2", otherops);
+		    }
+		}
+	      else
+		{
+		  if (emit)
+		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
+		}
+
+	      if (count)
+		*count = 2;
+
+	      if (TARGET_LDRD)
+		return "ldr%(d%)\t%0, [%1]";
+
+	      return "ldm%(ia%)\t%1, %M0";
+	    }
+	  else
+	    {
+	      otherops[1] = adjust_address (operands[1], SImode, 4);
+	      /* Take care of overlapping base/data reg.  */
+	      if (reg_mentioned_p (operands[0], operands[1]))
+		{
+		  if (emit)
+		    {
+		      output_asm_insn ("ldr%?\t%0, %1", otherops);
+		      output_asm_insn ("ldr%?\t%0, %1", operands);
+		    }
+		  if (count)
+		    *count = 2;
+
+		}
+	      else
+		{
+		  if (emit)
+		    {
+		      output_asm_insn ("ldr%?\t%0, %1", operands);
+		      output_asm_insn ("ldr%?\t%0, %1", otherops);
+		    }
+		  if (count)
+		    *count = 2;
+		}
+	    }
+	}
+    }
+  else
+    {
+      /* Constraints should ensure this.  */
+      gcc_assert (code0 == MEM && code1 == REG);
+      gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
+                  || (TARGET_ARM && TARGET_LDRD));
+
+      switch (GET_CODE (XEXP (operands[0], 0)))
+        {
+	case REG:
+	  if (emit)
+	    {
+	      if (TARGET_LDRD)
+		output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
+	      else
+		output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+	    }
+	  break;
+
+        case PRE_INC:
+	  gcc_assert (TARGET_LDRD);
+	  if (emit)
+	    output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
+	  break;
+
+        case PRE_DEC:
+	  if (emit)
+	    {
+	      if (TARGET_LDRD)
+		output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
+	      else
+		output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
+	    }
+	  break;
+
+        case POST_INC:
+	  if (emit)
+	    {
+	      if (TARGET_LDRD)
+		output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
+	      else
+		output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
+	    }
+	  break;
+
+        case POST_DEC:
+	  gcc_assert (TARGET_LDRD);
+	  if (emit)
+	    output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
+	  break;
+
+	case PRE_MODIFY:
+	case POST_MODIFY:
+	  otherops[0] = operands[1];
+	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
+	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
+
+	  /* IWMMXT allows offsets larger than ldrd can handle,
+	     fix these up with a pair of ldr.  */
+	  if (!TARGET_THUMB2
+	      && CONST_INT_P (otherops[2])
+	      && (INTVAL(otherops[2]) <= -256
+		  || INTVAL(otherops[2]) >= 256))
+	    {
+	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
+		{
+		  if (emit)
+		    {
+		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
+		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+		    }
+		  if (count)
+		    *count = 2;
+		}
+	      else
+		{
+		  if (emit)
+		    {
+		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
+		    }
+		  if (count)
+		    *count = 2;
+		}
+	    }
+	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
+	    {
+	      if (emit)
+		output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
+	    }
+	  else
+	    {
+	      if (emit)
+		output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
+	    }
+	  break;
+
+	case PLUS:
+	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
+	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
+	    {
+	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
+		{
+		case -8:
+		  if (emit)
+		    output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
+		  return "";
+
+		case -4:
+		  if (TARGET_THUMB2)
+		    break;
+		  if (emit)
+		    output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
+		  return "";
+
+		case 4:
+		  if (TARGET_THUMB2)
+		    break;
+		  if (emit)
+		    output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
+		  return "";
+		}
+	    }
+	  if (TARGET_LDRD
+	      && (REG_P (otherops[2])
+		  || TARGET_THUMB2
+		  || (CONST_INT_P (otherops[2])
+		      && INTVAL (otherops[2]) > -256
+		      && INTVAL (otherops[2]) < 256)))
+	    {
+	      otherops[0] = operands[1];
+	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
+	      if (emit)
+		output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
+	      return "";
+	    }
+	  /* Fall through */
+
+        default:
+	  otherops[0] = adjust_address (operands[0], SImode, 4);
+	  otherops[1] = operands[1];
+	  if (emit)
+	    {
+	      output_asm_insn ("str%?\t%1, %0", operands);
+	      output_asm_insn ("str%?\t%H1, %0", otherops);
+	    }
+	  if (count)
+	    *count = 2;
+	}
+    }
+
+  return "";
+}
+
+/* Output a move, load or store for quad-word vectors in ARM registers.  Only
+   handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
+
+const char *
+output_move_quad (rtx *operands)
+{
+  if (REG_P (operands[0]))
+    {
+      /* Load, or reg->reg move.  */
+
+      if (MEM_P (operands[1]))
+        {
+          switch (GET_CODE (XEXP (operands[1], 0)))
+            {
+            case REG:
+              output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+              break;
+
+            case LABEL_REF:
+            case CONST:
+              output_asm_insn ("adr%?\t%0, %1", operands);
+              output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
+              break;
+
+            default:
+              gcc_unreachable ();
+            }
+        }
+      else
+        {
+          rtx ops[2];
+          int dest, src, i;
+
+          gcc_assert (REG_P (operands[1]));
+
+          dest = REGNO (operands[0]);
+          src = REGNO (operands[1]);
+
+          /* This seems pretty dumb, but hopefully GCC won't try to do it
+             very often.  */
+          if (dest < src)
+            for (i = 0; i < 4; i++)
+              {
+                ops[0] = gen_rtx_REG (SImode, dest + i);
+                ops[1] = gen_rtx_REG (SImode, src + i);
+                output_asm_insn ("mov%?\t%0, %1", ops);
+              }
+          else
+            for (i = 3; i >= 0; i--)
+              {
+                ops[0] = gen_rtx_REG (SImode, dest + i);
+                ops[1] = gen_rtx_REG (SImode, src + i);
+                output_asm_insn ("mov%?\t%0, %1", ops);
+              }
+        }
+    }
+  else
+    {
+      gcc_assert (MEM_P (operands[0]));
+      gcc_assert (REG_P (operands[1]));
+      gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
+
+      switch (GET_CODE (XEXP (operands[0], 0)))
+        {
+        case REG:
+          output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+          break;
+
+        default:
+          gcc_unreachable ();
+        }
+    }
+
+  return "";
+}
+
+/* Output a VFP load or store instruction.  */
+
+const char *
+output_move_vfp (rtx *operands)
+{
+  rtx reg, mem, addr, ops[2];
+  int load = REG_P (operands[0]);
+  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
+  int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
+  const char *templ;
+  char buff[50];
+  enum machine_mode mode;
+
+  reg = operands[!load];
+  mem = operands[load];
+
+  mode = GET_MODE (reg);
+
+  gcc_assert (REG_P (reg));
+  gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
+  gcc_assert (mode == SFmode
+	      || mode == DFmode
+	      || mode == SImode
+	      || mode == DImode
+              || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case PRE_DEC:
+      templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    case POST_INC:
+      templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    default:
+      templ = "f%s%c%%?\t%%%s0, %%1%s";
+      ops[0] = reg;
+      ops[1] = mem;
+      break;
+    }
+
+  sprintf (buff, templ,
+	   load ? "ld" : "st",
+	   dp ? 'd' : 's',
+	   dp ? "P" : "",
+	   integer_p ? "\t%@ int" : "");
+  output_asm_insn (buff, ops);
+
+  return "";
+}
+
+/* Output a Neon double-word or quad-word load or store, or a load
+   or store for larger structure modes.
+
+   WARNING: The ordering of elements is weird in big-endian mode,
+   because the EABI requires that vectors stored in memory appear
+   as though they were stored by a VSTM, as required by the EABI.
+   GCC RTL defines element ordering based on in-memory order.
+   This can be different from the architectural ordering of elements
+   within a NEON register. The intrinsics defined in arm_neon.h use the
+   NEON register element ordering, not the GCC RTL element ordering.
+
+   For example, the in-memory ordering of a big-endian a quadword
+   vector with 16-bit elements when stored from register pair {d0,d1}
+   will be (lowest address first, d0[N] is NEON register element N):
+
+     [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
+
+   When necessary, quadword registers (dN, dN+1) are moved to ARM
+   registers from rN in the order:
+
+     dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
+
+   So that STM/LDM can be used on vectors in ARM registers, and the
+   same memory layout will result as if VSTM/VLDM were used.
+
+   Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
+   possible, which allows use of appropriate alignment tags.
+   Note that the choice of "64" is independent of the actual vector
+   element size; this size simply ensures that the behavior is
+   equivalent to VSTM/VLDM in both little-endian and big-endian mode.
+
+   Due to limitations of those instructions, use of VST1.64/VLD1.64
+   is not possible if:
+    - the address contains PRE_DEC, or
+    - the mode refers to more than 4 double-word registers
+
+   In those cases, it would be possible to replace VSTM/VLDM by a
+   sequence of instructions; this is not currently implemented since
+   this is not certain to actually improve performance.  */
+
+const char *
+output_move_neon (rtx *operands)
+{
+  rtx reg, mem, addr, ops[2];
+  int regno, nregs, load = REG_P (operands[0]);
+  const char *templ;
+  char buff[50];
+  enum machine_mode mode;
+
+  reg = operands[!load];
+  mem = operands[load];
+
+  mode = GET_MODE (reg);
+
+  gcc_assert (REG_P (reg));
+  regno = REGNO (reg);
+  nregs = HARD_REGNO_NREGS (regno, mode) / 2;
+  gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
+	      || NEON_REGNO_OK_FOR_QUAD (regno));
+  gcc_assert (VALID_NEON_DREG_MODE (mode)
+	      || VALID_NEON_QREG_MODE (mode)
+	      || VALID_NEON_STRUCT_MODE (mode));
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  /* Strip off const from addresses like (const (plus (...))).  */
+  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case POST_INC:
+      /* We have to use vldm / vstm for too-large modes.  */
+      if (nregs > 4)
+	{
+	  templ = "v%smia%%?\t%%0!, %%h1";
+	  ops[0] = XEXP (addr, 0);
+	}
+      else
+	{
+	  templ = "v%s1.64\t%%h1, %%A0";
+	  ops[0] = mem;
+	}
+      ops[1] = reg;
+      break;
+
+    case PRE_DEC:
+      /* We have to use vldm / vstm in this case, since there is no
+	 pre-decrement form of the vld1 / vst1 instructions.  */
+      templ = "v%smdb%%?\t%%0!, %%h1";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    case POST_MODIFY:
+      /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
+      gcc_unreachable ();
+
+    case LABEL_REF:
+    case PLUS:
+      {
+	int i;
+	int overlap = -1;
+	for (i = 0; i < nregs; i++)
+	  {
+	    /* We're only using DImode here because it's a convenient size.  */
+	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
+	    ops[1] = adjust_address (mem, DImode, 8 * i);
+	    if (reg_overlap_mentioned_p (ops[0], mem))
+	      {
+		gcc_assert (overlap == -1);
+		overlap = i;
+	      }
+	    else
+	      {
+		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+		output_asm_insn (buff, ops);
+	      }
+	  }
+	if (overlap != -1)
+	  {
+	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
+	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
+	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+	    output_asm_insn (buff, ops);
+	  }
+
+        return "";
+      }
+
+    default:
+      /* We have to use vldm / vstm for too-large modes.  */
+      if (nregs > 4)
+	templ = "v%smia%%?\t%%m0, %%h1";
+      else
+	templ = "v%s1.64\t%%h1, %%A0";
+
+      ops[0] = mem;
+      ops[1] = reg;
+    }
+
+  sprintf (buff, templ, load ? "ld" : "st");
+  output_asm_insn (buff, ops);
+
+  return "";
+}
+
+/* Compute and return the length of neon_mov<mode>, where <mode> is
+   one of VSTRUCT modes: EI, OI, CI or XI.  */
+int
+arm_attr_length_move_neon (rtx insn)
+{
+  rtx reg, mem, addr;
+  int load;
+  enum machine_mode mode;
+
+  extract_insn_cached (insn);
+
+  if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
+    {
+      mode = GET_MODE (recog_data.operand[0]);
+      switch (mode)
+	{
+	case EImode:
+	case OImode:
+	  return 8;
+	case CImode:
+	  return 12;
+	case XImode:
+	  return 16;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  load = REG_P (recog_data.operand[0]);
+  reg = recog_data.operand[!load];
+  mem = recog_data.operand[load];
+
+  gcc_assert (MEM_P (mem));
+
+  mode = GET_MODE (reg);
+  addr = XEXP (mem, 0);
+
+  /* Strip off const from addresses like (const (plus (...))).  */
+  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
+    {
+      int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
+      return insns * 4;
+    }
+  else
+    return 4;
+}
+
+/* Return nonzero if the offset in the address is an immediate.  Otherwise,
+   return zero.  */
+
+int
+arm_address_offset_is_imm (rtx insn)
+{
+  rtx mem, addr;
+
+  extract_insn_cached (insn);
+
+  if (REG_P (recog_data.operand[0]))
+    return 0;
+
+  mem = recog_data.operand[0];
+
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  if (REG_P (addr)
+      || (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && CONST_INT_P (XEXP (addr, 1))))
+    return 1;
+  else
+    return 0;
+}
+
+/* Output an ADD r, s, #n where n may be too big for one instruction.
+   If adding zero to one register, output nothing.  */
+const char *
+output_add_immediate (rtx *operands)
+{
+  HOST_WIDE_INT n = INTVAL (operands[2]);
+
+  if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
+    {
+      if (n < 0)
+	output_multi_immediate (operands,
+				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
+				-n);
+      else
+	output_multi_immediate (operands,
+				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
+				n);
+    }
+
+  return "";
+}
+
+/* Output a multiple immediate operation.
+   OPERANDS is the vector of operands referred to in the output patterns.
+   INSTR1 is the output pattern to use for the first constant.
+   INSTR2 is the output pattern to use for subsequent constants.
+   IMMED_OP is the index of the constant slot in OPERANDS.
+   N is the constant value.  */
+static const char *
+output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
+			int immed_op, HOST_WIDE_INT n)
+{
+#if HOST_BITS_PER_WIDE_INT > 32
+  n &= 0xffffffff;
+#endif
+
+  if (n == 0)
+    {
+      /* Quick and easy output.  */
+      operands[immed_op] = const0_rtx;
+      output_asm_insn (instr1, operands);
+    }
+  else
+    {
+      int i;
+      const char * instr = instr1;
+
+      /* Note that n is never zero here (which would give no output).  */
+      for (i = 0; i < 32; i += 2)
+	{
+	  if (n & (3 << i))
+	    {
+	      operands[immed_op] = GEN_INT (n & (255 << i));
+	      output_asm_insn (instr, operands);
+	      instr = instr2;
+	      i += 6;
+	    }
+	}
+    }
+
+  return "";
+}
+
+/* Return the name of a shifter operation.  */
+static const char *
+arm_shift_nmem(enum rtx_code code)
+{
+  switch (code)
+    {
+    case ASHIFT:
+      return ARM_LSL_NAME;
+
+    case ASHIFTRT:
+      return "asr";
+
+    case LSHIFTRT:
+      return "lsr";
+
+    case ROTATERT:
+      return "ror";
+
+    default:
+      abort();
+    }
+}
+
+/* Return the appropriate ARM instruction for the operation code.
+   The returned result should not be overwritten.  OP is the rtx of the
+   operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
+   was shifted.  */
+const char *
+arithmetic_instr (rtx op, int shift_first_arg)
+{
+  switch (GET_CODE (op))
+    {
+    case PLUS:
+      return "add";
+
+    case MINUS:
+      return shift_first_arg ? "rsb" : "sub";
+
+    case IOR:
+      return "orr";
+
+    case XOR:
+      return "eor";
+
+    case AND:
+      return "and";
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      return arm_shift_nmem(GET_CODE(op));
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Ensure valid constant shifts and return the appropriate shift mnemonic
+   for the operation code.  The returned result should not be overwritten.
+   OP is the rtx code of the shift.
+   On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
+   shift.  */
+static const char *
+shift_op (rtx op, HOST_WIDE_INT *amountp)
+{
+  const char * mnem;
+  enum rtx_code code = GET_CODE (op);
+
+  switch (code)
+    {
+    case ROTATE:
+      if (!CONST_INT_P (XEXP (op, 1)))
+	{
+	  output_operand_lossage ("invalid shift operand");
+	  return NULL;
+	}
+
+      code = ROTATERT;
+      *amountp = 32 - INTVAL (XEXP (op, 1));
+      mnem = "ror";
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      mnem = arm_shift_nmem(code);
+      if (CONST_INT_P (XEXP (op, 1)))
+	{
+	  *amountp = INTVAL (XEXP (op, 1));
+	}
+      else if (REG_P (XEXP (op, 1)))
+	{
+	  *amountp = -1;
+	  return mnem;
+	}
+      else
+	{
+	  output_operand_lossage ("invalid shift operand");
+	  return NULL;
+	}
+      break;
+
+    case MULT:
+      /* We never have to worry about the amount being other than a
+	 power of 2, since this case can never be reloaded from a reg.  */
+      if (!CONST_INT_P (XEXP (op, 1)))
+	{
+	  output_operand_lossage ("invalid shift operand");
+	  return NULL;
+	}
+
+      *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
+
+      /* Amount must be a power of two.  */
+      if (*amountp & (*amountp - 1))
+	{
+	  output_operand_lossage ("invalid shift operand");
+	  return NULL;
+	}
+
+      *amountp = int_log2 (*amountp);
+      return ARM_LSL_NAME;
+
+    default:
+      output_operand_lossage ("invalid shift operand");
+      return NULL;
+    }
+
+  /* This is not 100% correct, but follows from the desire to merge
+     multiplication by a power of 2 with the recognizer for a
+     shift.  >=32 is not a valid shift for "lsl", so we must try and
+     output a shift that produces the correct arithmetical result.
+     Using lsr #32 is identical except for the fact that the carry bit
+     is not set correctly if we set the flags; but we never use the
+     carry bit from such an operation, so we can ignore that.  */
+  if (code == ROTATERT)
+    /* Rotate is just modulo 32.  */
+    *amountp &= 31;
+  else if (*amountp != (*amountp & 31))
+    {
+      if (code == ASHIFT)
+	mnem = "lsr";
+      *amountp = 32;
+    }
+
+  /* Shifts of 0 are no-ops.  */
+  if (*amountp == 0)
+    return NULL;
+
+  return mnem;
+}
+
+/* Obtain the shift from the POWER of two.  */
+
+static HOST_WIDE_INT
+int_log2 (HOST_WIDE_INT power)
+{
+  HOST_WIDE_INT shift = 0;
+
+  while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
+    {
+      gcc_assert (shift <= 31);
+      shift++;
+    }
+
+  return shift;
+}
+
+/* Output a .ascii pseudo-op, keeping track of lengths.  This is
+   because /bin/as is horribly restrictive.  The judgement about
+   whether or not each character is 'printable' (and can be output as
+   is) or not (and must be printed with an octal escape) must be made
+   with reference to the *host* character set -- the situation is
+   similar to that discussed in the comments above pp_c_char in
+   c-pretty-print.c.  */
+
+#define MAX_ASCII_LEN 51
+
+void
+output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
+{
+  int i;
+  int len_so_far = 0;
+
+  fputs ("\t.ascii\t\"", stream);
+
+  for (i = 0; i < len; i++)
+    {
+      int c = p[i];
+
+      if (len_so_far >= MAX_ASCII_LEN)
+	{
+	  fputs ("\"\n\t.ascii\t\"", stream);
+	  len_so_far = 0;
+	}
+
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
+	    {
+	      putc ('\\', stream);
+	      len_so_far++;
+	    }
+	  putc (c, stream);
+	  len_so_far++;
+	}
+      else
+	{
+	  fprintf (stream, "\\%03o", c);
+	  len_so_far += 4;
+	}
+    }
+
+  fputs ("\"\n", stream);
+}
+
+/* Compute the register save mask for registers 0 through 12
+   inclusive.  This code is used by arm_compute_save_reg_mask.  */
+
+static unsigned long
+arm_compute_save_reg0_reg12_mask (void)
+{
+  unsigned long func_type = arm_current_func_type ();
+  unsigned long save_reg_mask = 0;
+  unsigned int reg;
+
+  if (IS_INTERRUPT (func_type))
+    {
+      unsigned int max_reg;
+      /* Interrupt functions must not corrupt any registers,
+	 even call clobbered ones.  If this is a leaf function
+	 we can just examine the registers used by the RTL, but
+	 otherwise we have to assume that whatever function is
+	 called might clobber anything, and so we have to save
+	 all the call-clobbered registers as well.  */
+      if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
+	/* FIQ handlers have registers r8 - r12 banked, so
+	   we only need to check r0 - r7, Normal ISRs only
+	   bank r14 and r15, so we must check up to r12.
+	   r13 is the stack pointer which is always preserved,
+	   so we do not need to consider it here.  */
+	max_reg = 7;
+      else
+	max_reg = 12;
+
+      for (reg = 0; reg <= max_reg; reg++)
+	if (df_regs_ever_live_p (reg)
+	    || (! crtl->is_leaf && call_used_regs[reg]))
+	  save_reg_mask |= (1 << reg);
+
+      /* Also save the pic base register if necessary.  */
+      if (flag_pic
+	  && !TARGET_SINGLE_PIC_BASE
+	  && arm_pic_register != INVALID_REGNUM
+	  && crtl->uses_pic_offset_table)
+	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
+    }
+  else if (IS_VOLATILE(func_type))
+    {
+      /* For noreturn functions we historically omitted register saves
+	 altogether.  However this really messes up debugging.  As a
+	 compromise save just the frame pointers.  Combined with the link
+	 register saved elsewhere this should be sufficient to get
+	 a backtrace.  */
+      if (frame_pointer_needed)
+	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+      if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
+	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+      if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
+	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
+    }
+  else
+    {
+      /* In the normal case we only need to save those registers
+	 which are call saved and which are used by this function.  */
+      for (reg = 0; reg <= 11; reg++)
+	if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
+	  save_reg_mask |= (1 << reg);
+
+      /* Handle the frame pointer as a special case.  */
+      if (frame_pointer_needed)
+	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+
+      /* If we aren't loading the PIC register,
+	 don't stack it even though it may be live.  */
+      if (flag_pic
+	  && !TARGET_SINGLE_PIC_BASE
+	  && arm_pic_register != INVALID_REGNUM
+	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
+	      || crtl->uses_pic_offset_table))
+	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
+
+      /* The prologue will copy SP into R0, so save it.  */
+      if (IS_STACKALIGN (func_type))
+	save_reg_mask |= 1;
+    }
+
+  /* Save registers so the exception handler can modify them.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0; ; i++)
+	{
+	  reg = EH_RETURN_DATA_REGNO (i);
+	  if (reg == INVALID_REGNUM)
+	    break;
+	  save_reg_mask |= 1 << reg;
+	}
+    }
+
+  return save_reg_mask;
+}
+
+/* Return true if r3 is live at the start of the function.  */
+
+static bool
+arm_r3_live_at_start_p (void)
+{
+  /* Just look at cfg info, which is still close enough to correct at this
+     point.  This gives false positives for broken functions that might use
+     uninitialized data that happens to be allocated in r3, but who cares?  */
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
+}
+
+/* Compute the number of bytes used to store the static chain register on the
+   stack, above the stack frame.  We need to know this accurately to get the
+   alignment of the rest of the stack frame correct.  */
+
+static int
+arm_compute_static_chain_stack_bytes (void)
+{
+  /* See the defining assertion in arm_expand_prologue.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
+      && IS_NESTED (arm_current_func_type ())
+      && arm_r3_live_at_start_p ()
+      && crtl->args.pretend_args_size == 0)
+    return 4;
+
+  return 0;
+}
+
+/* Compute a bit mask of which registers need to be
+   saved on the stack for the current function.
+   This is used by arm_get_frame_offsets, which may add extra registers.  */
+
+static unsigned long
+arm_compute_save_reg_mask (void)
+{
+  unsigned int save_reg_mask = 0;
+  unsigned long func_type = arm_current_func_type ();
+  unsigned int reg;
+
+  if (IS_NAKED (func_type))
+    /* This should never really happen.  */
+    return 0;
+
+  /* If we are creating a stack frame, then we must save the frame pointer,
+     IP (which will hold the old stack pointer), LR and the PC.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    save_reg_mask |=
+      (1 << ARM_HARD_FRAME_POINTER_REGNUM)
+      | (1 << IP_REGNUM)
+      | (1 << LR_REGNUM)
+      | (1 << PC_REGNUM);
+
+  save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
+
+  /* Decide if we need to save the link register.
+     Interrupt routines have their own banked link register,
+     so they never need to save it.
+     Otherwise if we do not use the link register we do not need to save
+     it.  If we are pushing other registers onto the stack however, we
+     can save an instruction in the epilogue by pushing the link register
+     now and then popping it back into the PC.  This incurs extra memory
+     accesses though, so we only do it when optimizing for size, and only
+     if we know that we will not need a fancy return sequence.  */
+  if (df_regs_ever_live_p (LR_REGNUM)
+      || (save_reg_mask
+	  && optimize_size
+	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+	  && !crtl->calls_eh_return))
+    save_reg_mask |= 1 << LR_REGNUM;
+
+  if (cfun->machine->lr_save_eliminated)
+    save_reg_mask &= ~ (1 << LR_REGNUM);
+
+  if (TARGET_REALLY_IWMMXT
+      && ((bit_count (save_reg_mask)
+	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
+			   arm_compute_static_chain_stack_bytes())
+	   ) % 2) != 0)
+    {
+      /* The total number of registers that are going to be pushed
+	 onto the stack is odd.  We need to ensure that the stack
+	 is 64-bit aligned before we start to save iWMMXt registers,
+	 and also before we start to create locals.  (A local variable
+	 might be a double or long long which we will load/store using
+	 an iWMMXt instruction).  Therefore we need to push another
+	 ARM register, so that the stack will be 64-bit aligned.  We
+	 try to avoid using the arg registers (r0 -r3) as they might be
+	 used to pass values in a tail call.  */
+      for (reg = 4; reg <= 12; reg++)
+	if ((save_reg_mask & (1 << reg)) == 0)
+	  break;
+
+      if (reg <= 12)
+	save_reg_mask |= (1 << reg);
+      else
+	{
+	  cfun->machine->sibcall_blocked = 1;
+	  save_reg_mask |= (1 << 3);
+	}
+    }
+
+  /* We may need to push an additional register for use initializing the
+     PIC base register.  */
+  if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
+      && (save_reg_mask & THUMB2_WORK_REGS) == 0)
+    {
+      reg = thumb_find_work_register (1 << 4);
+      if (!call_used_regs[reg])
+	save_reg_mask |= (1 << reg);
+    }
+
+  return save_reg_mask;
+}
+
+
+/* Compute a bit mask of which registers need to be
+   saved on the stack for the current function.  */
+static unsigned long
+thumb1_compute_save_reg_mask (void)
+{
+  unsigned long mask;
+  unsigned reg;
+
+  mask = 0;
+  for (reg = 0; reg < 12; reg ++)
+    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+      mask |= 1 << reg;
+
+  if (flag_pic
+      && !TARGET_SINGLE_PIC_BASE
+      && arm_pic_register != INVALID_REGNUM
+      && crtl->uses_pic_offset_table)
+    mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
+
+  /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
+  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
+    mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+
+  /* LR will also be pushed if any lo regs are pushed.  */
+  if (mask & 0xff || thumb_force_lr_save ())
+    mask |= (1 << LR_REGNUM);
+
+  /* Make sure we have a low work register if we need one.
+     We will need one if we are going to push a high register,
+     but we are not currently intending to push a low register.  */
+  if ((mask & 0xff) == 0
+      && ((mask & 0x0f00) || TARGET_BACKTRACE))
+    {
+      /* Use thumb_find_work_register to choose which register
+	 we will use.  If the register is live then we will
+	 have to push it.  Use LAST_LO_REGNUM as our fallback
+	 choice for the register to select.  */
+      reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
+      /* Make sure the register returned by thumb_find_work_register is
+	 not part of the return value.  */
+      if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
+	reg = LAST_LO_REGNUM;
+
+      if (! call_used_regs[reg])
+	mask |= 1 << reg;
+    }
+
+  /* The 504 below is 8 bytes less than 512 because there are two possible
+     alignment words.  We can't tell here if they will be present or not so we
+     have to play it safe and assume that they are. */
+  if ((CALLER_INTERWORKING_SLOT_SIZE +
+       ROUND_UP_WORD (get_frame_size ()) +
+       crtl->outgoing_args_size) >= 504)
+    {
+      /* This is the same as the code in thumb1_expand_prologue() which
+	 determines which register to use for stack decrement. */
+      for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
+	if (mask & (1 << reg))
+	  break;
+
+      if (reg > LAST_LO_REGNUM)
+	{
+	  /* Make sure we have a register available for stack decrement. */
+	  mask |= 1 << LAST_LO_REGNUM;
+	}
+    }
+
+  return mask;
+}
+
+
+/* Return the number of bytes required to save VFP registers.  */
+static int
+arm_get_vfp_saved_size (void)
+{
+  unsigned int regno;
+  int count;
+  int saved;
+
+  saved = 0;
+  /* Space for saved VFP registers.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      count = 0;
+      for (regno = FIRST_VFP_REGNUM;
+	   regno < LAST_VFP_REGNUM;
+	   regno += 2)
+	{
+	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
+	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
+	    {
+	      if (count > 0)
+		{
+		  /* Workaround ARM10 VFPr1 bug.  */
+		  if (count == 2 && !arm_arch6)
+		    count++;
+		  saved += count * 8;
+		}
+	      count = 0;
+	    }
+	  else
+	    count++;
+	}
+      if (count > 0)
+	{
+	  if (count == 2 && !arm_arch6)
+	    count++;
+	  saved += count * 8;
+	}
+    }
+  return saved;
+}
+
+
+/* Generate a function exit sequence.  If REALLY_RETURN is false, then do
+   everything bar the final return instruction.  If simple_return is true,
+   then do not output epilogue, because it has already been emitted in RTL.  */
+const char *
+output_return_instruction (rtx operand, bool really_return, bool reverse,
+                           bool simple_return)
+{
+  char conditional[10];
+  char instr[100];
+  unsigned reg;
+  unsigned long live_regs_mask;
+  unsigned long func_type;
+  arm_stack_offsets *offsets;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    return "";
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      /* If this function was declared non-returning, and we have
+	 found a tail call, then we have to trust that the called
+	 function won't return.  */
+      if (really_return)
+	{
+	  rtx ops[2];
+
+	  /* Otherwise, trap an attempted return by aborting.  */
+	  ops[0] = operand;
+	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
+				       : "abort");
+	  assemble_external_libcall (ops[1]);
+	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
+	}
+
+      return "";
+    }
+
+  gcc_assert (!cfun->calls_alloca || really_return);
+
+  sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
+
+  cfun->machine->return_used_this_function = 1;
+
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+
+  if (!simple_return && live_regs_mask)
+    {
+      const char * return_reg;
+
+      /* If we do not have any special requirements for function exit
+	 (e.g. interworking) then we can load the return address
+	 directly into the PC.  Otherwise we must load it into LR.  */
+      if (really_return
+	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
+	return_reg = reg_names[PC_REGNUM];
+      else
+	return_reg = reg_names[LR_REGNUM];
+
+      if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
+	{
+	  /* There are three possible reasons for the IP register
+	     being saved.  1) a stack frame was created, in which case
+	     IP contains the old stack pointer, or 2) an ISR routine
+	     corrupted it, or 3) it was saved to align the stack on
+	     iWMMXt.  In case 1, restore IP into SP, otherwise just
+	     restore IP.  */
+	  if (frame_pointer_needed)
+	    {
+	      live_regs_mask &= ~ (1 << IP_REGNUM);
+	      live_regs_mask |=   (1 << SP_REGNUM);
+	    }
+	  else
+	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
+	}
+
+      /* On some ARM architectures it is faster to use LDR rather than
+	 LDM to load a single register.  On other architectures, the
+	 cost is the same.  In 26 bit mode, or for exception handlers,
+	 we have to use LDM to load the PC so that the CPSR is also
+	 restored.  */
+      for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
+	if (live_regs_mask == (1U << reg))
+	  break;
+
+      if (reg <= LAST_ARM_REGNUM
+	  && (reg != LR_REGNUM
+	      || ! really_return
+	      || ! IS_INTERRUPT (func_type)))
+	{
+	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
+		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
+	}
+      else
+	{
+	  char *p;
+	  int first = 1;
+
+	  /* Generate the load multiple instruction to restore the
+	     registers.  Note we can get here, even if
+	     frame_pointer_needed is true, but only if sp already
+	     points to the base of the saved core registers.  */
+	  if (live_regs_mask & (1 << SP_REGNUM))
+	    {
+	      unsigned HOST_WIDE_INT stack_adjust;
+
+	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
+	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
+
+	      if (stack_adjust && arm_arch5 && TARGET_ARM)
+		if (TARGET_UNIFIED_ASM)
+		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
+		else
+		  sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
+	      else
+		{
+		  /* If we can't use ldmib (SA110 bug),
+		     then try to pop r3 instead.  */
+		  if (stack_adjust)
+		    live_regs_mask |= 1 << 3;
+
+		  if (TARGET_UNIFIED_ASM)
+		    sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
+		  else
+		    sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
+		}
+	    }
+	  else
+	    if (TARGET_UNIFIED_ASM)
+	      sprintf (instr, "pop%s\t{", conditional);
+	    else
+	      sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
+
+	  p = instr + strlen (instr);
+
+	  for (reg = 0; reg <= SP_REGNUM; reg++)
+	    if (live_regs_mask & (1 << reg))
+	      {
+		int l = strlen (reg_names[reg]);
+
+		if (first)
+		  first = 0;
+		else
+		  {
+		    memcpy (p, ", ", 2);
+		    p += 2;
+		  }
+
+		memcpy (p, "%|", 2);
+		memcpy (p + 2, reg_names[reg], l);
+		p += l + 2;
+	      }
+
+	  if (live_regs_mask & (1 << LR_REGNUM))
+	    {
+	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
+	      /* If returning from an interrupt, restore the CPSR.  */
+	      if (IS_INTERRUPT (func_type))
+		strcat (p, "^");
+	    }
+	  else
+	    strcpy (p, "}");
+	}
+
+      output_asm_insn (instr, & operand);
+
+      /* See if we need to generate an extra instruction to
+	 perform the actual function return.  */
+      if (really_return
+	  && func_type != ARM_FT_INTERWORKED
+	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
+	{
+	  /* The return has already been handled
+	     by loading the LR into the PC.  */
+          return "";
+	}
+    }
+
+  if (really_return)
+    {
+      switch ((int) ARM_FUNC_TYPE (func_type))
+	{
+	case ARM_FT_ISR:
+	case ARM_FT_FIQ:
+	  /* ??? This is wrong for unified assembly syntax.  */
+	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
+	  break;
+
+	case ARM_FT_INTERWORKED:
+	  sprintf (instr, "bx%s\t%%|lr", conditional);
+	  break;
+
+	case ARM_FT_EXCEPTION:
+	  /* ??? This is wrong for unified assembly syntax.  */
+	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
+	  break;
+
+	default:
+	  /* Use bx if it's available.  */
+	  if (arm_arch5 || arm_arch4t)
+	    sprintf (instr, "bx%s\t%%|lr", conditional);
+	  else
+	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
+	  break;
+	}
+
+      output_asm_insn (instr, & operand);
+    }
+
+  return "";
+}
+
+/* Write the function name into the code section, directly preceding
+   the function prologue.
+
+   Code will be output similar to this:
+     t0
+	 .ascii "arm_poke_function_name", 0
+	 .align
+     t1
+	 .word 0xff000000 + (t1 - t0)
+     arm_poke_function_name
+	 mov     ip, sp
+	 stmfd   sp!, {fp, ip, lr, pc}
+	 sub     fp, ip, #4
+
+   When performing a stack backtrace, code can inspect the value
+   of 'pc' stored at 'fp' + 0.  If the trace function then looks
+   at location pc - 12 and the top 8 bits are set, then we know
+   that there is a function name embedded immediately preceding this
+   location and has length ((pc[-3]) & 0xff000000).
+
+   We assume that pc is declared as a pointer to an unsigned long.
+
+   It is of no benefit to output the function name if we are assembling
+   a leaf function.  These function types will not contain a stack
+   backtrace structure, therefore it is not possible to determine the
+   function name.  */
+void
+arm_poke_function_name (FILE *stream, const char *name)
+{
+  unsigned long alignlength;
+  unsigned long length;
+  rtx           x;
+
+  length      = strlen (name) + 1;
+  alignlength = ROUND_UP_WORD (length);
+
+  ASM_OUTPUT_ASCII (stream, name, length);
+  ASM_OUTPUT_ALIGN (stream, 2);
+  x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
+  assemble_aligned_integer (UNITS_PER_WORD, x);
+}
+
+/* Place some comments into the assembler stream
+   describing the current function.  */
+static void
+arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
+{
+  unsigned long func_type;
+
+  /* ??? Do we want to print some of the below anyway?  */
+  if (TARGET_THUMB1)
+    return;
+
+  /* Sanity check.  */
+  gcc_assert (!arm_ccfsm_state && !arm_target_insn);
+
+  func_type = arm_current_func_type ();
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    default:
+    case ARM_FT_NORMAL:
+      break;
+    case ARM_FT_INTERWORKED:
+      asm_fprintf (f, "\t%@ Function supports interworking.\n");
+      break;
+    case ARM_FT_ISR:
+      asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
+      break;
+    case ARM_FT_FIQ:
+      asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
+      break;
+    case ARM_FT_EXCEPTION:
+      asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
+      break;
+    }
+
+  if (IS_NAKED (func_type))
+    asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
+
+  if (IS_VOLATILE (func_type))
+    asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
+
+  if (IS_NESTED (func_type))
+    asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
+  if (IS_STACKALIGN (func_type))
+    asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
+
+  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
+	       crtl->args.size,
+	       crtl->args.pretend_args_size, frame_size);
+
+  asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
+	       frame_pointer_needed,
+	       cfun->machine->uses_anonymous_args);
+
+  if (cfun->machine->lr_save_eliminated)
+    asm_fprintf (f, "\t%@ link register save eliminated.\n");
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
+
+}
+
+static void
+arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
+{
+  arm_stack_offsets *offsets;
+
+  if (TARGET_THUMB1)
+    {
+      int regno;
+
+      /* Emit any call-via-reg trampolines that are needed for v4t support
+	 of call_reg and call_value_reg type insns.  */
+      for (regno = 0; regno < LR_REGNUM; regno++)
+	{
+	  rtx label = cfun->machine->call_via[regno];
+
+	  if (label != NULL)
+	    {
+	      switch_to_section (function_section (current_function_decl));
+	      targetm.asm_out.internal_label (asm_out_file, "L",
+					      CODE_LABEL_NUMBER (label));
+	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
+	    }
+	}
+
+      /* ??? Probably not safe to set this here, since it assumes that a
+	 function will be emitted as assembly immediately after we generate
+	 RTL for it.  This does not happen for inline functions.  */
+      cfun->machine->return_used_this_function = 0;
+    }
+  else /* TARGET_32BIT */
+    {
+      /* We need to take into account any stack-frame rounding.  */
+      offsets = arm_get_frame_offsets ();
+
+      gcc_assert (!use_return_insn (FALSE, NULL)
+		  || (cfun->machine->return_used_this_function != 0)
+		  || offsets->saved_regs == offsets->outgoing_args
+		  || frame_pointer_needed);
+
+      /* Reset the ARM-specific per-function variables.  */
+      after_arm_reorg = 0;
+    }
+}
+
+/* Generate and emit a sequence of insns equivalent to PUSH, but using
+   STR and STRD.  If an even number of registers are being pushed, one
+   or more STRD patterns are created for each register pair.  If an
+   odd number of registers are pushed, emit an initial STR followed by
+   as many STRD instructions as are needed.  This works best when the
+   stack is initially 64-bit aligned (the normal case), since it
+   ensures that each STRD is also 64-bit aligned.  */
+static void
+thumb2_emit_strd_push (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i;
+  int regno;
+  rtx par = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp;
+  bool first = true;
+
+  num_regs = bit_count (saved_regs_mask);
+
+  /* Must be at least one register to save, and can't save SP or PC.  */
+  gcc_assert (num_regs > 0 && num_regs <= 14);
+  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
+  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+
+  /* Create sequence for DWARF info.  All the frame-related data for
+     debugging is held in this wrapper.  */
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* Describe the stack adjustment.  */
+  tmp = gen_rtx_SET (VOIDmode,
+		      stack_pointer_rtx,
+		      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  /* Find the first register.  */
+  for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
+    ;
+
+  i = 0;
+
+  /* If there's an odd number of registers to push.  Start off by
+     pushing a single register.  This ensures that subsequent strd
+     operations are dword aligned (assuming that SP was originally
+     64-bit aligned).  */
+  if ((num_regs & 1) != 0)
+    {
+      rtx reg, mem, insn;
+
+      reg = gen_rtx_REG (SImode, regno);
+      if (num_regs == 1)
+	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
+						     stack_pointer_rtx));
+      else
+	mem = gen_frame_mem (Pmode,
+			     gen_rtx_PRE_MODIFY
+			     (Pmode, stack_pointer_rtx,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     -4 * num_regs)));
+
+      tmp = gen_rtx_SET (VOIDmode, mem, reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      insn = emit_insn (tmp);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+      tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
+			 reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      i++;
+      regno++;
+      XVECEXP (dwarf, 0, i) = tmp;
+      first = false;
+    }
+
+  while (i < num_regs)
+    if (saved_regs_mask & (1 << regno))
+      {
+	rtx reg1, reg2, mem1, mem2;
+	rtx tmp0, tmp1, tmp2;
+	int regno2;
+
+	/* Find the register to pair with this one.  */
+	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
+	     regno2++)
+	  ;
+
+	reg1 = gen_rtx_REG (SImode, regno);
+	reg2 = gen_rtx_REG (SImode, regno2);
+
+	if (first)
+	  {
+	    rtx insn;
+
+	    first = false;
+	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							-4 * num_regs));
+	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							-4 * (num_regs - 1)));
+	    tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				plus_constant (Pmode, stack_pointer_rtx,
+					       -4 * (num_regs)));
+	    tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
+	    tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
+	    RTX_FRAME_RELATED_P (tmp0) = 1;
+	    RTX_FRAME_RELATED_P (tmp1) = 1;
+	    RTX_FRAME_RELATED_P (tmp2) = 1;
+	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+	    XVECEXP (par, 0, 0) = tmp0;
+	    XVECEXP (par, 0, 1) = tmp1;
+	    XVECEXP (par, 0, 2) = tmp2;
+	    insn = emit_insn (par);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	  }
+	else
+	  {
+	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							4 * i));
+	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							4 * (i + 1)));
+	    tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
+	    tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
+	    RTX_FRAME_RELATED_P (tmp1) = 1;
+	    RTX_FRAME_RELATED_P (tmp2) = 1;
+	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+	    XVECEXP (par, 0, 0) = tmp1;
+	    XVECEXP (par, 0, 1) = tmp2;
+	    emit_insn (par);
+	  }
+
+	/* Create unwind information.  This is an approximation.  */
+	tmp1 = gen_rtx_SET (VOIDmode,
+			    gen_frame_mem (Pmode,
+					   plus_constant (Pmode,
+							  stack_pointer_rtx,
+							  4 * i)),
+			    reg1);
+	tmp2 = gen_rtx_SET (VOIDmode,
+			    gen_frame_mem (Pmode,
+					   plus_constant (Pmode,
+							  stack_pointer_rtx,
+							  4 * (i + 1))),
+			    reg2);
+
+	RTX_FRAME_RELATED_P (tmp1) = 1;
+	RTX_FRAME_RELATED_P (tmp2) = 1;
+	XVECEXP (dwarf, 0, i + 1) = tmp1;
+	XVECEXP (dwarf, 0, i + 2) = tmp2;
+	i += 2;
+	regno = regno2 + 1;
+      }
+    else
+      regno++;
+
+  return;
+}
+
+/* STRD in ARM mode requires consecutive registers.  This function emits STRD
+   whenever possible, otherwise it emits single-word stores.  The first store
+   also allocates stack space for all saved registers, using writeback with
+   post-addressing mode.  All other stores use offset addressing.  If no STRD
+   can be emitted, this function emits a sequence of single-word stores,
+   and not an STM as before, because single-word stores provide more freedom
+   scheduling and can be turned into an STM by peephole optimizations.  */
+static void
+arm_emit_strd_push (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i, j, dwarf_index  = 0;
+  int offset = 0;
+  rtx dwarf = NULL_RTX;
+  rtx insn = NULL_RTX;
+  rtx tmp, mem;
+
+  /* TODO: A more efficient code can be emitted by changing the
+     layout, e.g., first push all pairs that can use STRD to keep the
+     stack aligned, and then push all other registers.  */
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
+  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+  gcc_assert (num_regs > 0);
+
+  /* Create sequence for DWARF info.  */
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* For dwarf info, we generate explicit stack update.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, dwarf_index++) = tmp;
+
+  /* Save registers.  */
+  offset = - 4 * num_regs;
+  j = 0;
+  while (j <= LAST_ARM_REGNUM)
+    if (saved_regs_mask & (1 << j))
+      {
+        if ((j % 2 == 0)
+            && (saved_regs_mask & (1 << (j + 1))))
+          {
+            /* Current register and previous register form register pair for
+               which STRD can be generated.  */
+            if (offset < 0)
+              {
+                /* Allocate stack space for all saved registers.  */
+                tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
+                tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+                mem = gen_frame_mem (DImode, tmp);
+                offset = 0;
+              }
+            else if (offset > 0)
+              mem = gen_frame_mem (DImode,
+                                   plus_constant (Pmode,
+                                                  stack_pointer_rtx,
+                                                  offset));
+            else
+              mem = gen_frame_mem (DImode, stack_pointer_rtx);
+
+            tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            tmp = emit_insn (tmp);
+
+            /* Record the first store insn.  */
+            if (dwarf_index == 1)
+              insn = tmp;
+
+            /* Generate dwarf info.  */
+            mem = gen_frame_mem (SImode,
+                                 plus_constant (Pmode,
+                                                stack_pointer_rtx,
+                                                offset));
+            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            XVECEXP (dwarf, 0, dwarf_index++) = tmp;
+
+            mem = gen_frame_mem (SImode,
+                                 plus_constant (Pmode,
+                                                stack_pointer_rtx,
+                                                offset + 4));
+            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            XVECEXP (dwarf, 0, dwarf_index++) = tmp;
+
+            offset += 8;
+            j += 2;
+          }
+        else
+          {
+            /* Emit a single word store.  */
+            if (offset < 0)
+              {
+                /* Allocate stack space for all saved registers.  */
+                tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
+                tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+                mem = gen_frame_mem (SImode, tmp);
+                offset = 0;
+              }
+            else if (offset > 0)
+              mem = gen_frame_mem (SImode,
+                                   plus_constant (Pmode,
+                                                  stack_pointer_rtx,
+                                                  offset));
+            else
+              mem = gen_frame_mem (SImode, stack_pointer_rtx);
+
+            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            tmp = emit_insn (tmp);
+
+            /* Record the first store insn.  */
+            if (dwarf_index == 1)
+              insn = tmp;
+
+            /* Generate dwarf info.  */
+            mem = gen_frame_mem (SImode,
+                                 plus_constant(Pmode,
+                                               stack_pointer_rtx,
+                                               offset));
+            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            XVECEXP (dwarf, 0, dwarf_index++) = tmp;
+
+            offset += 4;
+            j += 1;
+          }
+      }
+    else
+      j++;
+
+  /* Attach dwarf info to the first insn we generate.  */
+  gcc_assert (insn != NULL_RTX);
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Generate and emit an insn that we will recognize as a push_multi.
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
+   MASK for registers that should be annotated for DWARF2 frame unwind
+   information.  */
+static rtx
+emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
+{
+  int num_regs = 0;
+  int num_dwarf_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf;
+  int dwarf_par_index;
+  rtx tmp, reg;
+
+  /* We don't record the PC in the dwarf frame information.  */
+  dwarf_regs_mask &= ~(1 << PC_REGNUM);
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    {
+      if (mask & (1 << i))
+	num_regs++;
+      if (dwarf_regs_mask & (1 << i))
+	num_dwarf_regs++;
+    }
+
+  gcc_assert (num_regs && num_regs <= 16);
+  gcc_assert ((dwarf_regs_mask & ~mask) == 0);
+
+  /* For the body of the insn we are going to generate an UNSPEC in
+     parallel with several USEs.  This allows the insn to be recognized
+     by the push_multi pattern in the arm.md file.
+
+     The body of the insn looks something like this:
+
+       (parallel [
+           (set (mem:BLK (pre_modify:SI (reg:SI sp)
+	                                (const_int:SI <num>)))
+	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
+           (use (reg:SI XX))
+           (use (reg:SI YY))
+	   ...
+        ])
+
+     For the frame note however, we try to be more explicit and actually
+     show each register being stored into the stack frame, plus a (single)
+     decrement of the stack pointer.  We do it this way in order to be
+     friendly to the stack unwinding code, which only wants to see a single
+     stack decrement per instruction.  The RTL we generate for the note looks
+     something like this:
+
+      (sequence [
+           (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
+           (set (mem:SI (reg:SI sp)) (reg:SI r4))
+           (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
+           (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
+	   ...
+        ])
+
+     FIXME:: In an ideal world the PRE_MODIFY would not exist and
+     instead we'd have a parallel expression detailing all
+     the stores to the various memory addresses so that debug
+     information is more up-to-date. Remember however while writing
+     this to take care of the constraints with the push instruction.
+
+     Note also that this has to be taken care of for the VFP registers.
+
+     For more see PR43399.  */
+
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
+  dwarf_par_index = 1;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    {
+      if (mask & (1 << i))
+	{
+	  reg = gen_rtx_REG (SImode, i);
+
+	  XVECEXP (par, 0, 0)
+	    = gen_rtx_SET (VOIDmode,
+			   gen_frame_mem
+			   (BLKmode,
+			    gen_rtx_PRE_MODIFY (Pmode,
+						stack_pointer_rtx,
+						plus_constant
+						(Pmode, stack_pointer_rtx,
+						 -4 * num_regs))
+			    ),
+			   gen_rtx_UNSPEC (BLKmode,
+					   gen_rtvec (1, reg),
+					   UNSPEC_PUSH_MULT));
+
+	  if (dwarf_regs_mask & (1 << i))
+	    {
+	      tmp = gen_rtx_SET (VOIDmode,
+				 gen_frame_mem (SImode, stack_pointer_rtx),
+				 reg);
+	      RTX_FRAME_RELATED_P (tmp) = 1;
+	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
+	    }
+
+	  break;
+	}
+    }
+
+  for (j = 1, i++; j < num_regs; i++)
+    {
+      if (mask & (1 << i))
+	{
+	  reg = gen_rtx_REG (SImode, i);
+
+	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
+
+	  if (dwarf_regs_mask & (1 << i))
+	    {
+	      tmp
+		= gen_rtx_SET (VOIDmode,
+			       gen_frame_mem
+			       (SImode,
+				plus_constant (Pmode, stack_pointer_rtx,
+					       4 * j)),
+			       reg);
+	      RTX_FRAME_RELATED_P (tmp) = 1;
+	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
+	    }
+
+	  j++;
+	}
+    }
+
+  par = emit_insn (par);
+
+  tmp = gen_rtx_SET (VOIDmode,
+		     stack_pointer_rtx,
+		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
+  return par;
+}
+
+/* Add a REG_CFA_ADJUST_CFA REG note to INSN.
+   SIZE is the offset to be adjusted.
+   DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
+static void
+arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
+{
+  rtx dwarf;
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
+  add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+  bool return_in_pc;
+  int offset_adj;
+  int emit_update;
+
+  return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
+  offset_adj = return_in_pc ? 1 : 0;
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* If SP is in reglist, then we don't emit SP update insn.  */
+  emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
+
+  if (return_in_pc)
+    {
+      tmp = ret_rtx;
+      XVECEXP (par, 0, 0) = tmp;
+    }
+
+  if (emit_update)
+    {
+      /* Increment the stack pointer, based on there being
+         num_regs 4-byte registers to restore.  */
+      tmp = gen_rtx_SET (VOIDmode,
+                         stack_pointer_rtx,
+                         plus_constant (Pmode,
+                                        stack_pointer_rtx,
+                                        4 * num_regs));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, offset_adj) = tmp;
+    }
+
+  /* Now restore every reg, which may include PC.  */
+  for (j = 0, i = 0; j < num_regs; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        reg = gen_rtx_REG (SImode, i);
+        if ((num_regs == 1) && emit_update && !return_in_pc)
+          {
+            /* Emit single load with writeback.  */
+            tmp = gen_frame_mem (SImode,
+                                 gen_rtx_POST_INC (Pmode,
+                                                   stack_pointer_rtx));
+            tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
+            REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+            return;
+          }
+
+        tmp = gen_rtx_SET (VOIDmode,
+                           reg,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
+
+        /* We need to maintain a sequence for DWARF info too.  As dwarf info
+           should not have PC, skip PC.  */
+        if (i != PC_REGNUM)
+          dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        j++;
+      }
+
+  if (return_in_pc)
+    par = emit_jump_insn (par);
+  else
+    par = emit_insn (par);
+
+  REG_NOTES (par) = dwarf;
+  if (!return_in_pc)
+    arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
+				 stack_pointer_rtx, stack_pointer_rtx);
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
+{
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+
+  gcc_assert (num_regs && num_regs <= 32);
+
+    /* Workaround ARM10 VFPr1 bug.  */
+  if (num_regs == 2 && !arm_arch6)
+    {
+      if (first_reg == 15)
+        first_reg--;
+
+      num_regs++;
+    }
+
+  /* We can emit at most 16 D-registers in a single pop_multi instruction, and
+     there could be up to 32 D-registers to restore.
+     If there are more than 16 D-registers, make two recursive calls,
+     each of which emits one pop_multi instruction.  */
+  if (num_regs > 16)
+    {
+      arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
+      arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
+      return;
+    }
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* Increment the stack pointer, based on there being
+     num_regs 8-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     base_reg,
+                     plus_constant (Pmode, base_reg, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, 0) = tmp;
+
+  /* Now show every reg that will be restored, using a SET for each.  */
+  for (j = 0, i=first_reg; j < num_regs; i += 2)
+    {
+      reg = gen_rtx_REG (DFmode, i);
+
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (Pmode, base_reg, 8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, j + 1) = tmp;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      j++;
+    }
+
+  par = emit_insn (par);
+  REG_NOTES (par) = dwarf;
+
+  /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
+  if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
+    {
+      RTX_FRAME_RELATED_P (par) = 1;
+      add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
+    }
+  else
+    arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
+				 base_reg, base_reg);
+}
+
+/* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
+   number of registers are being popped, multiple LDRD patterns are created for
+   all register pairs.  If odd number of registers are popped, last register is
+   loaded by using LDR pattern.  */
+static void
+thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg, tmp1;
+  bool return_in_pc;
+
+  return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
+     to be popped.  So, if num_regs is even, now it will become odd,
+     and we can generate pop with PC.  If num_regs is odd, it will be
+     even now, and ldr with return can be generated for PC.  */
+  if (return_in_pc)
+    num_regs--;
+
+  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
+
+  /* Var j iterates over all the registers to gather all the registers in
+     saved_regs_mask.  Var i gives index of saved registers in stack frame.
+     A PARALLEL RTX of register-pair is created here, so that pattern for
+     LDRD can be matched.  As PC is always last register to be popped, and
+     we have already decremented num_regs if PC, we don't have to worry
+     about PC in this loop.  */
+  for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
+    if (saved_regs_mask & (1 << j))
+      {
+        /* Create RTX for memory load.  */
+        reg = gen_rtx_REG (SImode, j);
+        tmp = gen_rtx_SET (SImode,
+                           reg,
+                           gen_frame_mem (SImode,
+                               plus_constant (Pmode,
+                                              stack_pointer_rtx, 4 * i)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+
+        if (i % 2 == 0)
+          {
+            /* When saved-register index (i) is even, the RTX to be emitted is
+               yet to be created.  Hence create it first.  The LDRD pattern we
+               are generating is :
+               [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
+                 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
+               where target registers need not be consecutive.  */
+            par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+            dwarf = NULL_RTX;
+          }
+
+        /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
+           added as 0th element and if i is odd, reg_i is added as 1st element
+           of LDRD pattern shown above.  */
+        XVECEXP (par, 0, (i % 2)) = tmp;
+        dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        if ((i % 2) == 1)
+          {
+            /* When saved-register index (i) is odd, RTXs for both the registers
+               to be loaded are generated in above given LDRD pattern, and the
+               pattern can be emitted now.  */
+            par = emit_insn (par);
+            REG_NOTES (par) = dwarf;
+	    RTX_FRAME_RELATED_P (par) = 1;
+          }
+
+        i++;
+      }
+
+  /* If the number of registers pushed is odd AND return_in_pc is false OR
+     number of registers are even AND return_in_pc is true, last register is
+     popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
+     then LDR with post increment.  */
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (Pmode, stack_pointer_rtx, 4 * i));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  tmp = emit_insn (tmp);
+  if (!return_in_pc)
+    {
+      arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
+				   stack_pointer_rtx, stack_pointer_rtx);
+    }
+
+  dwarf = NULL_RTX;
+
+  if (((num_regs % 2) == 1 && !return_in_pc)
+      || ((num_regs % 2) == 0 && return_in_pc))
+    {
+      /* Scan for the single register to be popped.  Skip until the saved
+         register is found.  */
+      for (; (saved_regs_mask & (1 << j)) == 0; j++);
+
+      /* Gen LDR with post increment here.  */
+      tmp1 = gen_rtx_MEM (SImode,
+                          gen_rtx_POST_INC (SImode,
+                                            stack_pointer_rtx));
+      set_mem_alias_set (tmp1, get_frame_alias_set ());
+
+      reg = gen_rtx_REG (SImode, j);
+      tmp = gen_rtx_SET (SImode, reg, tmp1);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      if (return_in_pc)
+        {
+          /* If return_in_pc, j must be PC_REGNUM.  */
+          gcc_assert (j == PC_REGNUM);
+          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = tmp;
+          par = emit_jump_insn (par);
+        }
+      else
+        {
+          par = emit_insn (tmp);
+	  REG_NOTES (par) = dwarf;
+	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
+				       stack_pointer_rtx, stack_pointer_rtx);
+        }
+
+    }
+  else if ((num_regs % 2) == 1 && return_in_pc)
+    {
+      /* There are 2 registers to be popped.  So, generate the pattern
+         pop_multiple_with_stack_update_and_return to pop in PC.  */
+      arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
+    }
+
+  return;
+}
+
+/* LDRD in ARM mode needs consecutive registers as operands.  This function
+   emits LDRD whenever possible, otherwise it emits single-word loads. It uses
+   offset addressing and then generates one separate stack udpate. This provides
+   more scheduling freedom, compared to writeback on every load.  However,
+   if the function returns using load into PC directly
+   (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
+   before the last load.  TODO: Add a peephole optimization to recognize
+   the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
+   peephole optimization to merge the load at stack-offset zero
+   with the stack update instruction using load with writeback
+   in post-index addressing mode.  */
+static void
+arm_emit_ldrd_pop (unsigned long saved_regs_mask)
+{
+  int j = 0;
+  int offset = 0;
+  rtx par = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, mem;
+
+  /* Restore saved registers.  */
+  gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
+  j = 0;
+  while (j <= LAST_ARM_REGNUM)
+    if (saved_regs_mask & (1 << j))
+      {
+        if ((j % 2) == 0
+            && (saved_regs_mask & (1 << (j + 1)))
+            && (j + 1) != PC_REGNUM)
+          {
+            /* Current register and next register form register pair for which
+               LDRD can be generated. PC is always the last register popped, and
+               we handle it separately.  */
+            if (offset > 0)
+              mem = gen_frame_mem (DImode,
+                                   plus_constant (Pmode,
+                                                  stack_pointer_rtx,
+                                                  offset));
+            else
+              mem = gen_frame_mem (DImode, stack_pointer_rtx);
+
+            tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
+            tmp = emit_insn (tmp);
+	    RTX_FRAME_RELATED_P (tmp) = 1;
+
+            /* Generate dwarf info.  */
+
+            dwarf = alloc_reg_note (REG_CFA_RESTORE,
+                                    gen_rtx_REG (SImode, j),
+                                    NULL_RTX);
+            dwarf = alloc_reg_note (REG_CFA_RESTORE,
+                                    gen_rtx_REG (SImode, j + 1),
+                                    dwarf);
+
+            REG_NOTES (tmp) = dwarf;
+
+            offset += 8;
+            j += 2;
+          }
+        else if (j != PC_REGNUM)
+          {
+            /* Emit a single word load.  */
+            if (offset > 0)
+              mem = gen_frame_mem (SImode,
+                                   plus_constant (Pmode,
+                                                  stack_pointer_rtx,
+                                                  offset));
+            else
+              mem = gen_frame_mem (SImode, stack_pointer_rtx);
+
+            tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
+            tmp = emit_insn (tmp);
+	    RTX_FRAME_RELATED_P (tmp) = 1;
+
+            /* Generate dwarf info.  */
+            REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
+                                              gen_rtx_REG (SImode, j),
+                                              NULL_RTX);
+
+            offset += 4;
+            j += 1;
+          }
+        else /* j == PC_REGNUM */
+          j++;
+      }
+    else
+      j++;
+
+  /* Update the stack.  */
+  if (offset > 0)
+    {
+      tmp = gen_rtx_SET (Pmode,
+                         stack_pointer_rtx,
+                         plus_constant (Pmode,
+                                        stack_pointer_rtx,
+                                        offset));
+      tmp = emit_insn (tmp);
+      arm_add_cfa_adjust_cfa_note (tmp, offset,
+				   stack_pointer_rtx, stack_pointer_rtx);
+      offset = 0;
+    }
+
+  if (saved_regs_mask & (1 << PC_REGNUM))
+    {
+      /* Only PC is to be popped.  */
+      par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+      XVECEXP (par, 0, 0) = ret_rtx;
+      tmp = gen_rtx_SET (SImode,
+                         gen_rtx_REG (SImode, PC_REGNUM),
+                         gen_frame_mem (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, 1) = tmp;
+      par = emit_jump_insn (par);
+
+      /* Generate dwarf info.  */
+      dwarf = alloc_reg_note (REG_CFA_RESTORE,
+                              gen_rtx_REG (SImode, PC_REGNUM),
+                              NULL_RTX);
+      REG_NOTES (par) = dwarf;
+      arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
+				   stack_pointer_rtx, stack_pointer_rtx);
+    }
+}
+
+/* Calculate the size of the return value that is passed in registers.  */
+static unsigned
+arm_size_return_regs (void)
+{
+  enum machine_mode mode;
+
+  if (crtl->return_rtx != 0)
+    mode = GET_MODE (crtl->return_rtx);
+  else
+    mode = DECL_MODE (DECL_RESULT (current_function_decl));
+
+  return GET_MODE_SIZE (mode);
+}
+
+/* Return true if the current function needs to save/restore LR.  */
+static bool
+thumb_force_lr_save (void)
+{
+  return !cfun->machine->lr_save_eliminated
+	 && (!leaf_function_p ()
+	     || thumb_far_jump_used_p ()
+	     || df_regs_ever_live_p (LR_REGNUM));
+}
+
+/* We do not know if r3 will be available because
+   we do have an indirect tailcall happening in this
+   particular case.  */
+static bool
+is_indirect_tailcall_p (rtx call)
+{
+  rtx pat = PATTERN (call);
+
+  /* Indirect tail call.  */
+  pat = XVECEXP (pat, 0, 0);
+  if (GET_CODE (pat) == SET)
+    pat = SET_SRC (pat);
+
+  pat = XEXP (XEXP (pat, 0), 0);
+  return REG_P (pat);
+}
+
+/* Return true if r3 is used by any of the tail call insns in the
+   current function.  */
+static bool
+any_sibcall_could_use_r3 (void)
+{
+  edge_iterator ei;
+  edge e;
+
+  if (!crtl->tail_call_emit)
+    return false;
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    if (e->flags & EDGE_SIBCALL)
+      {
+	rtx call = BB_END (e->src);
+	if (!CALL_P (call))
+	  call = prev_nonnote_nondebug_insn (call);
+	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
+	if (find_regno_fusage (call, USE, 3)
+	    || is_indirect_tailcall_p (call))
+	  return true;
+      }
+  return false;
+}
+
+
+/* Compute the distance from register FROM to register TO.
+   These can be the arg pointer (26), the soft frame pointer (25),
+   the stack pointer (13) or the hard frame pointer (11).
+   In thumb mode r7 is used as the soft frame pointer, if needed.
+   Typical stack layout looks like this:
+
+       old stack pointer -> |    |
+                             ----
+                            |    | \
+                            |    |   saved arguments for
+                            |    |   vararg functions
+			    |    | /
+                              --
+   hard FP & arg pointer -> |    | \
+                            |    |   stack
+                            |    |   frame
+                            |    | /
+                              --
+                            |    | \
+                            |    |   call saved
+                            |    |   registers
+      soft frame pointer -> |    | /
+                              --
+                            |    | \
+                            |    |   local
+                            |    |   variables
+     locals base pointer -> |    | /
+                              --
+                            |    | \
+                            |    |   outgoing
+                            |    |   arguments
+   current stack pointer -> |    | /
+                              --
+
+  For a given function some or all of these stack components
+  may not be needed, giving rise to the possibility of
+  eliminating some of the registers.
+
+  The values returned by this function must reflect the behavior
+  of arm_expand_prologue() and arm_compute_save_reg_mask().
+
+  The sign of the number returned reflects the direction of stack
+  growth, so the values are positive for all eliminations except
+  from the soft frame pointer to the hard frame pointer.
+
+  SFP may point just inside the local variables block to ensure correct
+  alignment.  */
+
+
+/* Calculate stack offsets.  These are used to calculate register elimination
+   offsets and in prologue/epilogue code.  Also calculates which registers
+   should be saved.  */
+
+static arm_stack_offsets *
+arm_get_frame_offsets (void)
+{
+  struct arm_stack_offsets *offsets;
+  unsigned long func_type;
+  int leaf;
+  int saved;
+  int core_saved;
+  HOST_WIDE_INT frame_size;
+  int i;
+
+  offsets = &cfun->machine->stack_offsets;
+
+  /* We need to know if we are a leaf function.  Unfortunately, it
+     is possible to be called after start_sequence has been called,
+     which causes get_insns to return the insns for the sequence,
+     not the function, which will cause leaf_function_p to return
+     the incorrect result.
+
+     to know about leaf functions once reload has completed, and the
+     frame size cannot be changed after that time, so we can safely
+     use the cached value.  */
+
+  if (reload_completed)
+    return offsets;
+
+  /* Initially this is the size of the local variables.  It will translated
+     into an offset once we have determined the size of preceding data.  */
+  frame_size = ROUND_UP_WORD (get_frame_size ());
+
+  leaf = leaf_function_p ();
+
+  /* Space for variadic functions.  */
+  offsets->saved_args = crtl->args.pretend_args_size;
+
+  /* In Thumb mode this is incorrect, but never used.  */
+  offsets->frame
+    = (offsets->saved_args
+       + arm_compute_static_chain_stack_bytes ()
+       + (frame_pointer_needed ? 4 : 0));
+
+  if (TARGET_32BIT)
+    {
+      unsigned int regno;
+
+      offsets->saved_regs_mask = arm_compute_save_reg_mask ();
+      core_saved = bit_count (offsets->saved_regs_mask) * 4;
+      saved = core_saved;
+
+      /* We know that SP will be doubleword aligned on entry, and we must
+	 preserve that condition at any subroutine call.  We also require the
+	 soft frame pointer to be doubleword aligned.  */
+
+      if (TARGET_REALLY_IWMMXT)
+	{
+	  /* Check for the call-saved iWMMXt registers.  */
+	  for (regno = FIRST_IWMMXT_REGNUM;
+	       regno <= LAST_IWMMXT_REGNUM;
+	       regno++)
+	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	      saved += 8;
+	}
+
+      func_type = arm_current_func_type ();
+      /* Space for saved VFP registers.  */
+      if (! IS_VOLATILE (func_type)
+	  && TARGET_HARD_FLOAT && TARGET_VFP)
+	saved += arm_get_vfp_saved_size ();
+    }
+  else /* TARGET_THUMB1 */
+    {
+      offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
+      core_saved = bit_count (offsets->saved_regs_mask) * 4;
+      saved = core_saved;
+      if (TARGET_BACKTRACE)
+	saved += 16;
+    }
+
+  /* Saved registers include the stack frame.  */
+  offsets->saved_regs
+    = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
+  offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
+
+  /* A leaf function does not need any stack alignment if it has nothing
+     on the stack.  */
+  if (leaf && frame_size == 0
+      /* However if it calls alloca(), we have a dynamically allocated
+	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
+      && ! cfun->calls_alloca)
+    {
+      offsets->outgoing_args = offsets->soft_frame;
+      offsets->locals_base = offsets->soft_frame;
+      return offsets;
+    }
+
+  /* Ensure SFP has the correct alignment.  */
+  if (ARM_DOUBLEWORD_ALIGN
+      && (offsets->soft_frame & 7))
+    {
+      offsets->soft_frame += 4;
+      /* Try to align stack by pushing an extra reg.  Don't bother doing this
+         when there is a stack frame as the alignment will be rolled into
+	 the normal stack adjustment.  */
+      if (frame_size + crtl->outgoing_args_size == 0)
+	{
+	  int reg = -1;
+
+	  /* If it is safe to use r3, then do so.  This sometimes
+	     generates better code on Thumb-2 by avoiding the need to
+	     use 32-bit push/pop instructions.  */
+          if (! any_sibcall_could_use_r3 ()
+	      && arm_size_return_regs () <= 12
+	      && (offsets->saved_regs_mask & (1 << 3)) == 0
+              && (TARGET_THUMB2
+		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
+	    {
+	      reg = 3;
+	    }
+	  else
+	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+	      {
+		/* Avoid fixed registers; they may be changed at
+		   arbitrary times so it's unsafe to restore them
+		   during the epilogue.  */
+		if (!fixed_regs[i]
+		    && (offsets->saved_regs_mask & (1 << i)) == 0)
+		  {
+		    reg = i;
+		    break;
+		  }
+	      }
+
+	  if (reg != -1)
+	    {
+	      offsets->saved_regs += 4;
+	      offsets->saved_regs_mask |= (1 << reg);
+	    }
+	}
+    }
+
+  offsets->locals_base = offsets->soft_frame + frame_size;
+  offsets->outgoing_args = (offsets->locals_base
+			    + crtl->outgoing_args_size);
+
+  if (ARM_DOUBLEWORD_ALIGN)
+    {
+      /* Ensure SP remains doubleword aligned.  */
+      if (offsets->outgoing_args & 7)
+	offsets->outgoing_args += 4;
+      gcc_assert (!(offsets->outgoing_args & 7));
+    }
+
+  return offsets;
+}
+
+
+/* Calculate the relative offsets for the different stack pointers.  Positive
+   offsets are in the direction of stack growth.  */
+
+HOST_WIDE_INT
+arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
+{
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+
+  /* OK, now we have enough information to compute the distances.
+     There must be an entry in these switch tables for each pair
+     of registers in ELIMINABLE_REGS, even if some of the entries
+     seem to be redundant or useless.  */
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      switch (to)
+	{
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return 0;
+
+	case FRAME_POINTER_REGNUM:
+	  /* This is the reverse of the soft frame pointer
+	     to hard frame pointer elimination below.  */
+	  return offsets->soft_frame - offsets->saved_args;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  /* This is only non-zero in the case where the static chain register
+	     is stored above the frame.  */
+	  return offsets->frame - offsets->saved_args - 4;
+
+	case STACK_POINTER_REGNUM:
+	  /* If nothing has been pushed on the stack at all
+	     then this will return -4.  This *is* correct!  */
+	  return offsets->outgoing_args - (offsets->saved_args + 4);
+
+	default:
+	  gcc_unreachable ();
+	}
+      gcc_unreachable ();
+
+    case FRAME_POINTER_REGNUM:
+      switch (to)
+	{
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return 0;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  /* The hard frame pointer points to the top entry in the
+	     stack frame.  The soft frame pointer to the bottom entry
+	     in the stack frame.  If there is no stack frame at all,
+	     then they are identical.  */
+
+	  return offsets->frame - offsets->soft_frame;
+
+	case STACK_POINTER_REGNUM:
+	  return offsets->outgoing_args - offsets->soft_frame;
+
+	default:
+	  gcc_unreachable ();
+	}
+      gcc_unreachable ();
+
+    default:
+      /* You cannot eliminate from the stack pointer.
+	 In theory you could eliminate from the hard frame
+	 pointer to the stack pointer, but this will never
+	 happen, since if a stack frame is not needed the
+	 hard frame pointer will never be used.  */
+      gcc_unreachable ();
+    }
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  Frame pointer elimination is automatically handled.
+
+   All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
+   HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
+   pointer, we must eliminate FRAME_POINTER_REGNUM into
+   HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
+   ARG_POINTER_REGNUM.  */
+
+bool
+arm_can_eliminate (const int from, const int to)
+{
+  return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
+          (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
+          (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
+          (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
+           true);
+}
+
+/* Emit RTL to save coprocessor registers on function entry.  Returns the
+   number of bytes pushed.  */
+
+static int
+arm_save_coproc_regs(void)
+{
+  int saved_size = 0;
+  unsigned reg;
+  unsigned start_reg;
+  rtx insn;
+
+  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
+    if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
+      {
+	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	insn = gen_rtx_MEM (V2SImode, insn);
+	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
+	RTX_FRAME_RELATED_P (insn) = 1;
+	saved_size += 8;
+      }
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      start_reg = FIRST_VFP_REGNUM;
+
+      for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
+	{
+	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
+	    {
+	      if (start_reg != reg)
+		saved_size += vfp_emit_fstmd (start_reg,
+					      (reg - start_reg) / 2);
+	      start_reg = reg + 2;
+	    }
+	}
+      if (start_reg != reg)
+	saved_size += vfp_emit_fstmd (start_reg,
+				      (reg - start_reg) / 2);
+    }
+  return saved_size;
+}
+
+
+/* Set the Thumb frame pointer from the stack pointer.  */
+
+static void
+thumb_set_frame_pointer (arm_stack_offsets *offsets)
+{
+  HOST_WIDE_INT amount;
+  rtx insn, dwarf;
+
+  amount = offsets->outgoing_args - offsets->locals_base;
+  if (amount < 1024)
+    insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+				  stack_pointer_rtx, GEN_INT (amount)));
+  else
+    {
+      emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
+      /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
+         expects the first two operands to be the same.  */
+      if (TARGET_THUMB2)
+	{
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+					stack_pointer_rtx,
+					hard_frame_pointer_rtx));
+	}
+      else
+	{
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+					hard_frame_pointer_rtx,
+					stack_pointer_rtx));
+	}
+      dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+			   plus_constant (Pmode, stack_pointer_rtx, amount));
+      RTX_FRAME_RELATED_P (dwarf) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+    }
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Generate the prologue instructions for entry into an ARM or Thumb-2
+   function.  */
+void
+arm_expand_prologue (void)
+{
+  rtx amount;
+  rtx insn;
+  rtx ip_rtx;
+  unsigned long live_regs_mask;
+  unsigned long func_type;
+  int fp_offset = 0;
+  int saved_pretend_args = 0;
+  int saved_regs = 0;
+  unsigned HOST_WIDE_INT args_to_push;
+  arm_stack_offsets *offsets;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have prologues.  */
+  if (IS_NAKED (func_type))
+    return;
+
+  /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
+  args_to_push = crtl->args.pretend_args_size;
+
+  /* Compute which register we will have to save onto the stack.  */
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+
+  ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
+
+  if (IS_STACKALIGN (func_type))
+    {
+      rtx r0, r1;
+
+      /* Handle a word-aligned stack pointer.  We generate the following:
+
+	  mov r0, sp
+	  bic r1, r0, #7
+	  mov sp, r1
+	  <save and restore r0 in normal prologue/epilogue>
+	  mov sp, r0
+	  bx lr
+
+	 The unwinder doesn't need to know about the stack realignment.
+	 Just tell it we saved SP in r0.  */
+      gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
+
+      r0 = gen_rtx_REG (SImode, 0);
+      r1 = gen_rtx_REG (SImode, 1);
+
+      insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_REGISTER, NULL);
+
+      emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
+
+      /* ??? The CFA changes here, which may cause GDB to conclude that it
+	 has entered a different function.  That said, the unwind info is
+	 correct, individually, before and after this instruction because
+	 we've described the save of SP, which will override the default
+	 handling of SP as restoring from the CFA.  */
+      emit_insn (gen_movsi (stack_pointer_rtx, r1));
+    }
+
+  /* For APCS frames, if IP register is clobbered
+     when creating frame, save that register in a special
+     way.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    {
+      if (IS_INTERRUPT (func_type))
+	{
+	  /* Interrupt functions must not corrupt any registers.
+	     Creating a frame pointer however, corrupts the IP
+	     register, so we must push it first.  */
+	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
+
+	  /* Do not set RTX_FRAME_RELATED_P on this insn.
+	     The dwarf stack unwinding code only wants to see one
+	     stack decrement per function, and this is not it.  If
+	     this instruction is labeled as being part of the frame
+	     creation sequence then dwarf2out_frame_debug_expr will
+	     die when it encounters the assignment of IP to FP
+	     later on, since the use of SP here establishes SP as
+	     the CFA register and not IP.
+
+	     Anyway this instruction is not really part of the stack
+	     frame creation although it is part of the prologue.  */
+	}
+      else if (IS_NESTED (func_type))
+	{
+	  /* The static chain register is the same as the IP register
+	     used as a scratch register during stack frame creation.
+	     To get around this need to find somewhere to store IP
+	     whilst the frame is being created.  We try the following
+	     places in order:
+
+	       1. The last argument register r3 if it is available.
+	       2. A slot on the stack above the frame if there are no
+		  arguments to push onto the stack.
+	       3. Register r3 again, after pushing the argument registers
+	          onto the stack, if this is a varargs function.
+	       4. The last slot on the stack created for the arguments to
+		  push, if this isn't a varargs function.
+
+	     Note - we only need to tell the dwarf2 backend about the SP
+	     adjustment in the second variant; the static chain register
+	     doesn't need to be unwound, as it doesn't contain a value
+	     inherited from the caller.  */
+
+	  if (!arm_r3_live_at_start_p ())
+	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
+	  else if (args_to_push == 0)
+	    {
+	      rtx addr, dwarf;
+
+	      gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
+	      saved_regs += 4;
+
+	      addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
+	      fp_offset = 4;
+
+	      /* Just tell the dwarf backend that we adjusted SP.  */
+	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  -fp_offset));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	    }
+	  else
+	    {
+	      /* Store the args on the stack.  */
+	      if (cfun->machine->uses_anonymous_args)
+		{
+		  insn
+		    = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
+					   (0xf0 >> (args_to_push / 4)) & 0xf);
+		  emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
+		  saved_pretend_args = 1;
+		}
+	      else
+		{
+		  rtx addr, dwarf;
+
+		  if (args_to_push == 4)
+		    addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+		  else
+		    addr
+		      = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
+					    plus_constant (Pmode,
+							   stack_pointer_rtx,
+							   -args_to_push));
+
+		  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
+
+		  /* Just tell the dwarf backend that we adjusted SP.  */
+		  dwarf
+		    = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  -args_to_push));
+		  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+		}
+
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      fp_offset = args_to_push;
+	      args_to_push = 0;
+	    }
+	}
+
+      insn = emit_set_insn (ip_rtx,
+			    plus_constant (Pmode, stack_pointer_rtx,
+					   fp_offset));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (args_to_push)
+    {
+      /* Push the argument registers, or reserve space for them.  */
+      if (cfun->machine->uses_anonymous_args)
+	insn = emit_multi_reg_push
+	  ((0xf0 >> (args_to_push / 4)) & 0xf,
+	   (0xf0 >> (args_to_push / 4)) & 0xf);
+      else
+	insn = emit_insn
+	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+		       GEN_INT (- args_to_push)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* If this is an interrupt service routine, and the link register
+     is going to be pushed, and we're not generating extra
+     push of IP (needed when frame is needed and frame layout if apcs),
+     subtracting four from LR now will mean that the function return
+     can be done with a single instruction.  */
+  if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
+      && (live_regs_mask & (1 << LR_REGNUM)) != 0
+      && !(frame_pointer_needed && TARGET_APCS_FRAME)
+      && TARGET_ARM)
+    {
+      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
+
+      emit_set_insn (lr, plus_constant (SImode, lr, -4));
+    }
+
+  if (live_regs_mask)
+    {
+      unsigned long dwarf_regs_mask = live_regs_mask;
+
+      saved_regs += bit_count (live_regs_mask) * 4;
+      if (optimize_size && !frame_pointer_needed
+	  && saved_regs == offsets->saved_regs - offsets->saved_args)
+	{
+	  /* If no coprocessor registers are being pushed and we don't have
+	     to worry about a frame pointer then push extra registers to
+	     create the stack frame.  This is done is a way that does not
+	     alter the frame layout, so is independent of the epilogue.  */
+	  int n;
+	  int frame;
+	  n = 0;
+	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
+	    n++;
+	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
+	  if (frame && n * 4 >= frame)
+	    {
+	      n = frame / 4;
+	      live_regs_mask |= (1 << n) - 1;
+	      saved_regs += frame;
+	    }
+	}
+
+      if (TARGET_LDRD
+	  && current_tune->prefer_ldrd_strd
+          && !optimize_function_for_size_p (cfun))
+        {
+	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
+          if (TARGET_THUMB2)
+	    thumb2_emit_strd_push (live_regs_mask);
+          else if (TARGET_ARM
+                   && !TARGET_APCS_FRAME
+                   && !IS_INTERRUPT (func_type))
+	    arm_emit_strd_push (live_regs_mask);
+          else
+            {
+	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
+              RTX_FRAME_RELATED_P (insn) = 1;
+            }
+        }
+      else
+        {
+	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
+          RTX_FRAME_RELATED_P (insn) = 1;
+        }
+    }
+
+  if (! IS_VOLATILE (func_type))
+    saved_regs += arm_save_coproc_regs ();
+
+  if (frame_pointer_needed && TARGET_ARM)
+    {
+      /* Create the new frame pointer.  */
+      if (TARGET_APCS_FRAME)
+	{
+	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (IS_NESTED (func_type))
+	    {
+	      /* Recover the static chain register.  */
+	      if (!arm_r3_live_at_start_p () || saved_pretend_args)
+		insn = gen_rtx_REG (SImode, 3);
+	      else
+		{
+		  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
+		  insn = gen_frame_mem (SImode, insn);
+		}
+	      emit_set_insn (ip_rtx, insn);
+	      /* Add a USE to stop propagate_one_insn() from barfing.  */
+	      emit_insn (gen_force_register_use (ip_rtx));
+	    }
+	}
+      else
+	{
+	  insn = GEN_INT (saved_regs - 4);
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+					stack_pointer_rtx, insn));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size
+      = offsets->outgoing_args - offsets->saved_args;
+
+  if (offsets->outgoing_args != offsets->saved_args + saved_regs)
+    {
+      /* This add can produce multiple insns for a large constant, so we
+	 need to get tricky.  */
+      rtx last = get_last_insn ();
+
+      amount = GEN_INT (offsets->saved_args + saved_regs
+			- offsets->outgoing_args);
+
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    amount));
+      do
+	{
+	  last = last ? NEXT_INSN (last) : get_insns ();
+	  RTX_FRAME_RELATED_P (last) = 1;
+	}
+      while (last != insn);
+
+      /* If the frame pointer is needed, emit a special barrier that
+	 will prevent the scheduler from moving stores to the frame
+	 before the stack adjustment.  */
+      if (frame_pointer_needed)
+	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
+					 hard_frame_pointer_rtx));
+    }
+
+
+  if (frame_pointer_needed && TARGET_THUMB2)
+    thumb_set_frame_pointer (offsets);
+
+  if (flag_pic && arm_pic_register != INVALID_REGNUM)
+    {
+      unsigned long mask;
+
+      mask = live_regs_mask;
+      mask &= THUMB2_WORK_REGS;
+      if (!IS_NESTED (func_type))
+	mask |= (1 << IP_REGNUM);
+      arm_load_pic_register (mask);
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  Similarly if the user has requested no
+     scheduling in the prolog.  Similarly if we want non-call exceptions
+     using the EABI unwinder, to prevent faulting instructions from being
+     swapped with a stack adjustment.  */
+  if (crtl->profile || !TARGET_SCHED_PROLOG
+      || (arm_except_unwind_info (&global_options) == UI_TARGET
+	  && cfun->can_throw_non_call_exceptions))
+    emit_insn (gen_blockage ());
+
+  /* If the link register is being kept alive, with the return address in it,
+     then make sure that it does not get reused by the ce2 pass.  */
+  if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
+    cfun->machine->lr_save_eliminated = 1;
+}
+
+/* Print condition code to STREAM.  Helper function for arm_print_operand.  */
+static void
+arm_print_condition (FILE *stream)
+{
+  if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
+    {
+      /* Branch conversion is not implemented for Thumb-2.  */
+      if (TARGET_THUMB)
+	{
+	  output_operand_lossage ("predicated Thumb instruction");
+	  return;
+	}
+      if (current_insn_predicate != NULL)
+	{
+	  output_operand_lossage
+	    ("predicated instruction in conditional sequence");
+	  return;
+	}
+
+      fputs (arm_condition_codes[arm_current_cc], stream);
+    }
+  else if (current_insn_predicate)
+    {
+      enum arm_cond_code code;
+
+      if (TARGET_THUMB1)
+	{
+	  output_operand_lossage ("predicated Thumb instruction");
+	  return;
+	}
+
+      code = get_arm_condition_code (current_insn_predicate);
+      fputs (arm_condition_codes[code], stream);
+    }
+}
+
+
+/* If CODE is 'd', then the X is a condition operand and the instruction
+   should only be executed if the condition is true.
+   if CODE is 'D', then the X is a condition operand and the instruction
+   should only be executed if the condition is false: however, if the mode
+   of the comparison is CCFPEmode, then always execute the instruction -- we
+   do this because in these circumstances !GE does not necessarily imply LT;
+   in these cases the instruction pattern will take care to make sure that
+   an instruction containing %d will follow, thereby undoing the effects of
+   doing this instruction unconditionally.
+   If CODE is 'N' then X is a floating point operand that must be negated
+   before output.
+   If CODE is 'B' then output a bitwise inverted value of X (a const int).
+   If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
+static void
+arm_print_operand (FILE *stream, rtx x, int code)
+{
+  switch (code)
+    {
+    case '@':
+      fputs (ASM_COMMENT_START, stream);
+      return;
+
+    case '_':
+      fputs (user_label_prefix, stream);
+      return;
+
+    case '|':
+      fputs (REGISTER_PREFIX, stream);
+      return;
+
+    case '?':
+      arm_print_condition (stream);
+      return;
+
+    case '(':
+      /* Nothing in unified syntax, otherwise the current condition code.  */
+      if (!TARGET_UNIFIED_ASM)
+	arm_print_condition (stream);
+      break;
+
+    case ')':
+      /* The current condition code in unified syntax, otherwise nothing.  */
+      if (TARGET_UNIFIED_ASM)
+	arm_print_condition (stream);
+      break;
+
+    case '.':
+      /* The current condition code for a condition code setting instruction.
+	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
+      if (TARGET_UNIFIED_ASM)
+	{
+	  fputc('s', stream);
+	  arm_print_condition (stream);
+	}
+      else
+	{
+	  arm_print_condition (stream);
+	  fputc('s', stream);
+	}
+      return;
+
+    case '!':
+      /* If the instruction is conditionally executed then print
+	 the current condition code, otherwise print 's'.  */
+      gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
+      if (current_insn_predicate)
+	arm_print_condition (stream);
+      else
+	fputc('s', stream);
+      break;
+
+    /* %# is a "break" sequence. It doesn't output anything, but is used to
+       separate e.g. operand numbers from following text, if that text consists
+       of further digits which we don't want to be part of the operand
+       number.  */
+    case '#':
+      return;
+
+    case 'N':
+      {
+	REAL_VALUE_TYPE r;
+	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	r = real_value_negate (&r);
+	fprintf (stream, "%s", fp_const_from_val (&r));
+      }
+      return;
+
+    /* An integer or symbol address without a preceding # sign.  */
+    case 'c':
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+	  break;
+
+	case SYMBOL_REF:
+	  output_addr_const (stream, x);
+	  break;
+
+	case CONST:
+	  if (GET_CODE (XEXP (x, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
+	    {
+	      output_addr_const (stream, x);
+	      break;
+	    }
+	  /* Fall through.  */
+
+	default:
+	  output_operand_lossage ("Unsupported operand for code '%c'", code);
+	}
+      return;
+
+    /* An integer that we want to print in HEX.  */
+    case 'x':
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+	  break;
+
+	default:
+	  output_operand_lossage ("Unsupported operand for code '%c'", code);
+	}
+      return;
+
+    case 'B':
+      if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT val;
+	  val = ARM_SIGN_EXTEND (~INTVAL (x));
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
+	}
+      else
+	{
+	  putc ('~', stream);
+	  output_addr_const (stream, x);
+	}
+      return;
+
+    case 'L':
+      /* The low 16 bits of an immediate constant.  */
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
+      return;
+
+    case 'i':
+      fprintf (stream, "%s", arithmetic_instr (x, 1));
+      return;
+
+    case 'I':
+      fprintf (stream, "%s", arithmetic_instr (x, 0));
+      return;
+
+    case 'S':
+      {
+	HOST_WIDE_INT val;
+	const char *shift;
+
+	shift = shift_op (x, &val);
+
+	if (shift)
+	  {
+	    fprintf (stream, ", %s ", shift);
+	    if (val == -1)
+	      arm_print_operand (stream, XEXP (x, 1), 0);
+	    else
+	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
+	  }
+      }
+      return;
+
+      /* An explanation of the 'Q', 'R' and 'H' register operands:
+
+	 In a pair of registers containing a DI or DF value the 'Q'
+	 operand returns the register number of the register containing
+	 the least significant part of the value.  The 'R' operand returns
+	 the register number of the register containing the most
+	 significant part of the value.
+
+	 The 'H' operand returns the higher of the two register numbers.
+	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
+	 same as the 'Q' operand, since the most significant part of the
+	 value is held in the lower number register.  The reverse is true
+	 on systems where WORDS_BIG_ENDIAN is false.
+
+	 The purpose of these operands is to distinguish between cases
+	 where the endian-ness of the values is important (for example
+	 when they are added together), and cases where the endian-ness
+	 is irrelevant, but the order of register operations is important.
+	 For example when loading a value from memory into a register
+	 pair, the endian-ness does not matter.  Provided that the value
+	 from the lower memory address is put into the lower numbered
+	 register, and the value from the higher address is put into the
+	 higher numbered register, the load will work regardless of whether
+	 the value being loaded is big-wordian or little-wordian.  The
+	 order of the two register loads can matter however, if the address
+	 of the memory location is actually held in one of the registers
+	 being overwritten by the load.
+
+	 The 'Q' and 'R' constraints are also available for 64-bit
+	 constants.  */
+    case 'Q':
+      if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+	{
+	  rtx part = gen_lowpart (SImode, x);
+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
+	  return;
+	}
+
+      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
+      return;
+
+    case 'R':
+      if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+	{
+	  enum machine_mode mode = GET_MODE (x);
+	  rtx part;
+
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  part = gen_highpart_mode (SImode, mode, x);
+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
+	  return;
+	}
+
+      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
+      return;
+
+    case 'H':
+      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + 1);
+      return;
+
+    case 'J':
+      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
+      return;
+
+    case 'K':
+      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
+      return;
+
+    case 'm':
+      asm_fprintf (stream, "%r",
+		   REG_P (XEXP (x, 0))
+		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
+      return;
+
+    case 'M':
+      asm_fprintf (stream, "{%r-%r}",
+		   REGNO (x),
+		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
+      return;
+
+    /* Like 'M', but writing doubleword vector registers, for use by Neon
+       insns.  */
+    case 'h':
+      {
+        int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
+        int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
+        if (numregs == 1)
+          asm_fprintf (stream, "{d%d}", regno);
+        else
+          asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
+      }
+      return;
+
+    case 'd':
+      /* CONST_TRUE_RTX means always -- that's the default.  */
+      if (x == const_true_rtx)
+	return;
+
+      if (!COMPARISON_P (x))
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      fputs (arm_condition_codes[get_arm_condition_code (x)],
+	     stream);
+      return;
+
+    case 'D':
+      /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
+	 want to do that.  */
+      if (x == const_true_rtx)
+	{
+	  output_operand_lossage ("instruction never executed");
+	  return;
+	}
+      if (!COMPARISON_P (x))
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
+				 (get_arm_condition_code (x))],
+	     stream);
+      return;
+
+    case 's':
+    case 'V':
+    case 'W':
+    case 'X':
+    case 'Y':
+    case 'Z':
+      /* Former Maverick support, removed after GCC-4.7.  */
+      output_operand_lossage ("obsolete Maverick format code '%c'", code);
+      return;
+
+    case 'U':
+      if (!REG_P (x)
+	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
+	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
+	/* Bad value for wCG register number.  */
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      else
+	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
+      return;
+
+      /* Print an iWMMXt control register name.  */
+    case 'w':
+      if (!CONST_INT_P (x)
+	  || INTVAL (x) < 0
+	  || INTVAL (x) >= 16)
+	/* Bad value for wC register number.  */
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      else
+	{
+	  static const char * wc_reg_names [16] =
+	    {
+	      "wCID",  "wCon",  "wCSSF", "wCASF",
+	      "wC4",   "wC5",   "wC6",   "wC7",
+	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
+	      "wC12",  "wC13",  "wC14",  "wC15"
+	    };
+
+	  fputs (wc_reg_names [INTVAL (x)], stream);
+	}
+      return;
+
+    /* Print the high single-precision register of a VFP double-precision
+       register.  */
+    case 'p':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
+      }
+      return;
+
+    /* Print a VFP/Neon double precision or quad precision register name.  */
+    case 'P':
+    case 'q':
+      {
+	int mode = GET_MODE (x);
+	int is_quad = (code == 'q');
+	int regno;
+
+	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	if (!REG_P (x)
+	    || !IS_VFP_REGNUM (REGNO (x)))
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	regno = REGNO (x);
+	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
+            || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
+	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
+      }
+      return;
+
+    /* These two codes print the low/high doubleword register of a Neon quad
+       register, respectively.  For pair-structure types, can also print
+       low/high quadword registers.  */
+    case 'e':
+    case 'f':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if ((GET_MODE_SIZE (mode) != 16
+	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!NEON_REGNO_OK_FOR_QUAD (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        if (GET_MODE_SIZE (mode) == 16)
+          fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
+				  + (code == 'f' ? 1 : 0));
+        else
+          fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
+				  + (code == 'f' ? 1 : 0));
+      }
+      return;
+
+    /* Print a VFPv3 floating-point constant, represented as an integer
+       index.  */
+    case 'G':
+      {
+        int index = vfp3_const_double_index (x);
+	gcc_assert (index != -1);
+	fprintf (stream, "%d", index);
+      }
+      return;
+
+    /* Print bits representing opcode features for Neon.
+
+       Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
+       and polynomials as unsigned.
+
+       Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
+
+       Bit 2 is 1 for rounding functions, 0 otherwise.  */
+
+    /* Identify the type as 's', 'u', 'p' or 'f'.  */
+    case 'T':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputc ("uspf"[bits & 3], stream);
+      }
+      return;
+
+    /* Likewise, but signed and unsigned integers are both 'i'.  */
+    case 'F':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputc ("iipf"[bits & 3], stream);
+      }
+      return;
+
+    /* As for 'T', but emit 'u' instead of 'p'.  */
+    case 't':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputc ("usuf"[bits & 3], stream);
+      }
+      return;
+
+    /* Bit 2: rounding (vs none).  */
+    case 'O':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputs ((bits & 4) != 0 ? "r" : "", stream);
+      }
+      return;
+
+    /* Memory operand for vld1/vst1 instruction.  */
+    case 'A':
+      {
+	rtx addr;
+	bool postinc = FALSE;
+	unsigned align, memsize, align_bits;
+
+	gcc_assert (MEM_P (x));
+	addr = XEXP (x, 0);
+	if (GET_CODE (addr) == POST_INC)
+	  {
+	    postinc = 1;
+	    addr = XEXP (addr, 0);
+	  }
+	asm_fprintf (stream, "[%r", REGNO (addr));
+
+	/* We know the alignment of this access, so we can emit a hint in the
+	   instruction (for some alignments) as an aid to the memory subsystem
+	   of the target.  */
+	align = MEM_ALIGN (x) >> 3;
+	memsize = MEM_SIZE (x);
+
+	/* Only certain alignment specifiers are supported by the hardware.  */
+	if (memsize == 32 && (align % 32) == 0)
+	  align_bits = 256;
+	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
+	  align_bits = 128;
+	else if (memsize >= 8 && (align % 8) == 0)
+	  align_bits = 64;
+	else
+	  align_bits = 0;
+
+	if (align_bits != 0)
+	  asm_fprintf (stream, ":%d", align_bits);
+
+	asm_fprintf (stream, "]");
+
+	if (postinc)
+	  fputs("!", stream);
+      }
+      return;
+
+    case 'C':
+      {
+	rtx addr;
+
+	gcc_assert (MEM_P (x));
+	addr = XEXP (x, 0);
+	gcc_assert (REG_P (addr));
+	asm_fprintf (stream, "[%r]", REGNO (addr));
+      }
+      return;
+
+    /* Translate an S register number into a D register number and element index.  */
+    case 'y':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	regno = regno - FIRST_VFP_REGNUM;
+	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
+      }
+      return;
+
+    case 'v':
+	gcc_assert (CONST_DOUBLE_P (x));
+	int result;
+	result = vfp3_const_double_for_fract_bits (x);
+	if (result == 0)
+	  result = vfp3_const_double_for_bits (x);
+	fprintf (stream, "#%d", result);
+	return;
+
+    /* Register specifier for vld1.16/vst1.16.  Translate the S register
+       number into a D register number and element index.  */
+    case 'z':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	regno = regno - FIRST_VFP_REGNUM;
+	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
+      }
+      return;
+
+    default:
+      if (x == 0)
+	{
+	  output_operand_lossage ("missing operand");
+	  return;
+	}
+
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  asm_fprintf (stream, "%r", REGNO (x));
+	  break;
+
+	case MEM:
+	  output_memory_reference_mode = GET_MODE (x);
+	  output_address (XEXP (x, 0));
+	  break;
+
+	case CONST_DOUBLE:
+          if (TARGET_NEON)
+            {
+              char fpstr[20];
+              real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
+			       sizeof (fpstr), 0, 1);
+              fprintf (stream, "#%s", fpstr);
+            }
+          else
+	    fprintf (stream, "#%s", fp_immediate_constant (x));
+	  break;
+
+	default:
+	  gcc_assert (GET_CODE (x) != NEG);
+	  fputc ('#', stream);
+	  if (GET_CODE (x) == HIGH)
+	    {
+	      fputs (":lower16:", stream);
+	      x = XEXP (x, 0);
+	    }
+
+	  output_addr_const (stream, x);
+	  break;
+	}
+    }
+}
+
+/* Target hook for printing a memory address.  */
+static void
+arm_print_operand_address (FILE *stream, rtx x)
+{
+  if (TARGET_32BIT)
+    {
+      int is_minus = GET_CODE (x) == MINUS;
+
+      if (REG_P (x))
+	asm_fprintf (stream, "[%r]", REGNO (x));
+      else if (GET_CODE (x) == PLUS || is_minus)
+	{
+	  rtx base = XEXP (x, 0);
+	  rtx index = XEXP (x, 1);
+	  HOST_WIDE_INT offset = 0;
+	  if (!REG_P (base)
+	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
+	    {
+	      /* Ensure that BASE is a register.  */
+	      /* (one of them must be).  */
+	      /* Also ensure the SP is not used as in index register.  */
+	      rtx temp = base;
+	      base = index;
+	      index = temp;
+	    }
+	  switch (GET_CODE (index))
+	    {
+	    case CONST_INT:
+	      offset = INTVAL (index);
+	      if (is_minus)
+		offset = -offset;
+	      asm_fprintf (stream, "[%r, #%wd]",
+			   REGNO (base), offset);
+	      break;
+
+	    case REG:
+	      asm_fprintf (stream, "[%r, %s%r]",
+			   REGNO (base), is_minus ? "-" : "",
+			   REGNO (index));
+	      break;
+
+	    case MULT:
+	    case ASHIFTRT:
+	    case LSHIFTRT:
+	    case ASHIFT:
+	    case ROTATERT:
+	      {
+		asm_fprintf (stream, "[%r, %s%r",
+			     REGNO (base), is_minus ? "-" : "",
+			     REGNO (XEXP (index, 0)));
+		arm_print_operand (stream, index, 'S');
+		fputs ("]", stream);
+		break;
+	      }
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
+	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
+	{
+	  extern enum machine_mode output_memory_reference_mode;
+
+	  gcc_assert (REG_P (XEXP (x, 0)));
+
+	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
+	    asm_fprintf (stream, "[%r, #%s%d]!",
+			 REGNO (XEXP (x, 0)),
+			 GET_CODE (x) == PRE_DEC ? "-" : "",
+			 GET_MODE_SIZE (output_memory_reference_mode));
+	  else
+	    asm_fprintf (stream, "[%r], #%s%d",
+			 REGNO (XEXP (x, 0)),
+			 GET_CODE (x) == POST_DEC ? "-" : "",
+			 GET_MODE_SIZE (output_memory_reference_mode));
+	}
+      else if (GET_CODE (x) == PRE_MODIFY)
+	{
+	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
+	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+	    asm_fprintf (stream, "#%wd]!",
+			 INTVAL (XEXP (XEXP (x, 1), 1)));
+	  else
+	    asm_fprintf (stream, "%r]!",
+			 REGNO (XEXP (XEXP (x, 1), 1)));
+	}
+      else if (GET_CODE (x) == POST_MODIFY)
+	{
+	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
+	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+	    asm_fprintf (stream, "#%wd",
+			 INTVAL (XEXP (XEXP (x, 1), 1)));
+	  else
+	    asm_fprintf (stream, "%r",
+			 REGNO (XEXP (XEXP (x, 1), 1)));
+	}
+      else output_addr_const (stream, x);
+    }
+  else
+    {
+      if (REG_P (x))
+	asm_fprintf (stream, "[%r]", REGNO (x));
+      else if (GET_CODE (x) == POST_INC)
+	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
+      else if (GET_CODE (x) == PLUS)
+	{
+	  gcc_assert (REG_P (XEXP (x, 0)));
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    asm_fprintf (stream, "[%r, #%wd]",
+			 REGNO (XEXP (x, 0)),
+			 INTVAL (XEXP (x, 1)));
+	  else
+	    asm_fprintf (stream, "[%r, %r]",
+			 REGNO (XEXP (x, 0)),
+			 REGNO (XEXP (x, 1)));
+	}
+      else
+	output_addr_const (stream, x);
+    }
+}
+
+/* Target hook for indicating whether a punctuation character for
+   TARGET_PRINT_OPERAND is valid.  */
+static bool
+arm_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '@' || code == '|' || code == '.'
+	  || code == '(' || code == ')' || code == '#'
+	  || (TARGET_32BIT && (code == '?'))
+	  || (TARGET_THUMB2 && (code == '!'))
+	  || (TARGET_THUMB && (code == '_')));
+}
+
+/* Target hook for assembling integer objects.  The ARM version needs to
+   handle word-sized values specially.  */
+static bool
+arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  enum machine_mode mode;
+
+  if (size == UNITS_PER_WORD && aligned_p)
+    {
+      fputs ("\t.word\t", asm_out_file);
+      output_addr_const (asm_out_file, x);
+
+      /* Mark symbols as position independent.  We only do this in the
+	 .text segment, not in the .data segment.  */
+      if (NEED_GOT_RELOC && flag_pic && making_const_table &&
+	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
+	{
+	  /* See legitimize_pic_address for an explanation of the
+	     TARGET_VXWORKS_RTP check.  */
+	  if (!arm_pic_data_is_text_relative
+	      || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
+	    fputs ("(GOT)", asm_out_file);
+	  else
+	    fputs ("(GOTOFF)", asm_out_file);
+	}
+      fputc ('\n', asm_out_file);
+      return true;
+    }
+
+  mode = GET_MODE (x);
+
+  if (arm_vector_mode_supported_p (mode))
+    {
+      int i, units;
+
+      gcc_assert (GET_CODE (x) == CONST_VECTOR);
+
+      units = CONST_VECTOR_NUNITS (x);
+      size = GET_MODE_SIZE (GET_MODE_INNER (mode));
+
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        for (i = 0; i < units; i++)
+	  {
+	    rtx elt = CONST_VECTOR_ELT (x, i);
+	    assemble_integer
+	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
+	  }
+      else
+        for (i = 0; i < units; i++)
+          {
+            rtx elt = CONST_VECTOR_ELT (x, i);
+            REAL_VALUE_TYPE rval;
+
+            REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
+
+            assemble_real
+              (rval, GET_MODE_INNER (mode),
+              i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
+          }
+
+      return true;
+    }
+
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+static void
+arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
+{
+  section *s;
+
+  if (!TARGET_AAPCS_BASED)
+    {
+      (is_ctor ?
+       default_named_section_asm_out_constructor
+       : default_named_section_asm_out_destructor) (symbol, priority);
+      return;
+    }
+
+  /* Put these in the .init_array section, using a special relocation.  */
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      char buf[18];
+      sprintf (buf, "%s.%.5u",
+	       is_ctor ? ".init_array" : ".fini_array",
+	       priority);
+      s = get_section (buf, SECTION_WRITE, NULL_TREE);
+    }
+  else if (is_ctor)
+    s = ctors_section;
+  else
+    s = dtors_section;
+
+  switch_to_section (s);
+  assemble_align (POINTER_SIZE);
+  fputs ("\t.word\t", asm_out_file);
+  output_addr_const (asm_out_file, symbol);
+  fputs ("(target1)\n", asm_out_file);
+}
+
+/* Add a function to the list of static constructors.  */
+
+static void
+arm_elf_asm_constructor (rtx symbol, int priority)
+{
+  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
+}
+
+/* Add a function to the list of static destructors.  */
+
+static void
+arm_elf_asm_destructor (rtx symbol, int priority)
+{
+  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
+}
+
+/* A finite state machine takes care of noticing whether or not instructions
+   can be conditionally executed, and thus decrease execution time and code
+   size by deleting branch instructions.  The fsm is controlled by
+   final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
+
+/* The state of the fsm controlling condition codes are:
+   0: normal, do nothing special
+   1: make ASM_OUTPUT_OPCODE not output this instruction
+   2: make ASM_OUTPUT_OPCODE not output this instruction
+   3: make instructions conditional
+   4: make instructions conditional
+
+   State transitions (state->state by whom under condition):
+   0 -> 1 final_prescan_insn if the `target' is a label
+   0 -> 2 final_prescan_insn if the `target' is an unconditional branch
+   1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
+   2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
+   3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
+          (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
+   4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
+          (the target insn is arm_target_insn).
+
+   If the jump clobbers the conditions then we use states 2 and 4.
+
+   A similar thing can be done with conditional return insns.
+
+   XXX In case the `target' is an unconditional branch, this conditionalising
+   of the instructions always reduces code size, but not always execution
+   time.  But then, I want to reduce the code size to somewhere near what
+   /bin/cc produces.  */
+
+/* In addition to this, state is maintained for Thumb-2 COND_EXEC
+   instructions.  When a COND_EXEC instruction is seen the subsequent
+   instructions are scanned so that multiple conditional instructions can be
+   combined into a single IT block.  arm_condexec_count and arm_condexec_mask
+   specify the length and true/false mask for the IT block.  These will be
+   decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
+
+/* Returns the index of the ARM condition code string in
+   `arm_condition_codes', or ARM_NV if the comparison is invalid.
+   COMPARISON should be an rtx like `(eq (...) (...))'.  */
+
+enum arm_cond_code
+maybe_get_arm_condition_code (rtx comparison)
+{
+  enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
+  enum arm_cond_code code;
+  enum rtx_code comp_code = GET_CODE (comparison);
+
+  if (GET_MODE_CLASS (mode) != MODE_CC)
+    mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
+			   XEXP (comparison, 1));
+
+  switch (mode)
+    {
+    case CC_DNEmode: code = ARM_NE; goto dominance;
+    case CC_DEQmode: code = ARM_EQ; goto dominance;
+    case CC_DGEmode: code = ARM_GE; goto dominance;
+    case CC_DGTmode: code = ARM_GT; goto dominance;
+    case CC_DLEmode: code = ARM_LE; goto dominance;
+    case CC_DLTmode: code = ARM_LT; goto dominance;
+    case CC_DGEUmode: code = ARM_CS; goto dominance;
+    case CC_DGTUmode: code = ARM_HI; goto dominance;
+    case CC_DLEUmode: code = ARM_LS; goto dominance;
+    case CC_DLTUmode: code = ARM_CC;
+
+    dominance:
+      if (comp_code == EQ)
+	return ARM_INVERSE_CONDITION_CODE (code);
+      if (comp_code == NE)
+	return code;
+      return ARM_NV;
+
+    case CC_NOOVmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GE: return ARM_PL;
+	case LT: return ARM_MI;
+	default: return ARM_NV;
+	}
+
+    case CC_Zmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	default: return ARM_NV;
+	}
+
+    case CC_Nmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_MI;
+	case EQ: return ARM_PL;
+	default: return ARM_NV;
+	}
+
+    case CCFPEmode:
+    case CCFPmode:
+      /* We can handle all cases except UNEQ and LTGT.  */
+      switch (comp_code)
+	{
+	case GE: return ARM_GE;
+	case GT: return ARM_GT;
+	case LE: return ARM_LS;
+	case LT: return ARM_MI;
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case ORDERED: return ARM_VC;
+	case UNORDERED: return ARM_VS;
+	case UNLT: return ARM_LT;
+	case UNLE: return ARM_LE;
+	case UNGT: return ARM_HI;
+	case UNGE: return ARM_PL;
+	  /* UNEQ and LTGT do not have a representation.  */
+	case UNEQ: /* Fall through.  */
+	case LTGT: /* Fall through.  */
+	default: return ARM_NV;
+	}
+
+    case CC_SWPmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GE: return ARM_LE;
+	case GT: return ARM_LT;
+	case LE: return ARM_GE;
+	case LT: return ARM_GT;
+	case GEU: return ARM_LS;
+	case GTU: return ARM_CC;
+	case LEU: return ARM_CS;
+	case LTU: return ARM_HI;
+	default: return ARM_NV;
+	}
+
+    case CC_Cmode:
+      switch (comp_code)
+	{
+	case LTU: return ARM_CS;
+	case GEU: return ARM_CC;
+	default: return ARM_NV;
+	}
+
+    case CC_CZmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GEU: return ARM_CS;
+	case GTU: return ARM_HI;
+	case LEU: return ARM_LS;
+	case LTU: return ARM_CC;
+	default: return ARM_NV;
+	}
+
+    case CC_NCVmode:
+      switch (comp_code)
+	{
+	case GE: return ARM_GE;
+	case LT: return ARM_LT;
+	case GEU: return ARM_CS;
+	case LTU: return ARM_CC;
+	default: return ARM_NV;
+	}
+
+    case CCmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GE: return ARM_GE;
+	case GT: return ARM_GT;
+	case LE: return ARM_LE;
+	case LT: return ARM_LT;
+	case GEU: return ARM_CS;
+	case GTU: return ARM_HI;
+	case LEU: return ARM_LS;
+	case LTU: return ARM_CC;
+	default: return ARM_NV;
+	}
+
+    default: gcc_unreachable ();
+    }
+}
+
+/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+  gcc_assert (code != ARM_NV);
+  return code;
+}
+
+/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
+   instructions.  */
+void
+thumb2_final_prescan_insn (rtx insn)
+{
+  rtx first_insn = insn;
+  rtx body = PATTERN (insn);
+  rtx predicate;
+  enum arm_cond_code code;
+  int n;
+  int mask;
+  int max;
+
+  /* max_insns_skipped in the tune was already taken into account in the
+     cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
+     just emit the IT blocks as we can.  It does not make sense to split
+     the IT blocks.  */
+  max = MAX_INSN_PER_IT_BLOCK;
+
+  /* Remove the previous insn from the count of insns to be output.  */
+  if (arm_condexec_count)
+      arm_condexec_count--;
+
+  /* Nothing to do if we are already inside a conditional block.  */
+  if (arm_condexec_count)
+    return;
+
+  if (GET_CODE (body) != COND_EXEC)
+    return;
+
+  /* Conditional jumps are implemented directly.  */
+  if (JUMP_P (insn))
+    return;
+
+  predicate = COND_EXEC_TEST (body);
+  arm_current_cc = get_arm_condition_code (predicate);
+
+  n = get_attr_ce_count (insn);
+  arm_condexec_count = 1;
+  arm_condexec_mask = (1 << n) - 1;
+  arm_condexec_masklen = n;
+  /* See if subsequent instructions can be combined into the same block.  */
+  for (;;)
+    {
+      insn = next_nonnote_insn (insn);
+
+      /* Jumping into the middle of an IT block is illegal, so a label or
+         barrier terminates the block.  */
+      if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
+	break;
+
+      body = PATTERN (insn);
+      /* USE and CLOBBER aren't really insns, so just skip them.  */
+      if (GET_CODE (body) == USE
+	  || GET_CODE (body) == CLOBBER)
+	continue;
+
+      /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
+      if (GET_CODE (body) != COND_EXEC)
+	break;
+      /* Maximum number of conditionally executed instructions in a block.  */
+      n = get_attr_ce_count (insn);
+      if (arm_condexec_masklen + n > max)
+	break;
+
+      predicate = COND_EXEC_TEST (body);
+      code = get_arm_condition_code (predicate);
+      mask = (1 << n) - 1;
+      if (arm_current_cc == code)
+	arm_condexec_mask |= (mask << arm_condexec_masklen);
+      else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
+	break;
+
+      arm_condexec_count++;
+      arm_condexec_masklen += n;
+
+      /* A jump must be the last instruction in a conditional block.  */
+      if (JUMP_P (insn))
+	break;
+    }
+  /* Restore recog_data (getting the attributes of other insns can
+     destroy this array, but final.c assumes that it remains intact
+     across this call).  */
+  extract_constrain_insn_cached (first_insn);
+}
+
+void
+arm_final_prescan_insn (rtx insn)
+{
+  /* BODY will hold the body of INSN.  */
+  rtx body = PATTERN (insn);
+
+  /* This will be 1 if trying to repeat the trick, and things need to be
+     reversed if it appears to fail.  */
+  int reverse = 0;
+
+  /* If we start with a return insn, we only succeed if we find another one.  */
+  int seeking_return = 0;
+  enum rtx_code return_code = UNKNOWN;
+
+  /* START_INSN will hold the insn from where we start looking.  This is the
+     first insn after the following code_label if REVERSE is true.  */
+  rtx start_insn = insn;
+
+  /* If in state 4, check if the target branch is reached, in order to
+     change back to state 0.  */
+  if (arm_ccfsm_state == 4)
+    {
+      if (insn == arm_target_insn)
+	{
+	  arm_target_insn = NULL;
+	  arm_ccfsm_state = 0;
+	}
+      return;
+    }
+
+  /* If in state 3, it is possible to repeat the trick, if this insn is an
+     unconditional branch to a label, and immediately following this branch
+     is the previous target label which is only used once, and the label this
+     branch jumps to is not too far off.  */
+  if (arm_ccfsm_state == 3)
+    {
+      if (simplejump_p (insn))
+	{
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (BARRIER_P (start_insn))
+	    {
+	      /* XXX Isn't this always a barrier?  */
+	      start_insn = next_nonnote_insn (start_insn);
+	    }
+	  if (LABEL_P (start_insn)
+	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    reverse = TRUE;
+	  else
+	    return;
+	}
+      else if (ANY_RETURN_P (body))
+        {
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (BARRIER_P (start_insn))
+	    start_insn = next_nonnote_insn (start_insn);
+	  if (LABEL_P (start_insn)
+	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    {
+	      reverse = TRUE;
+	      seeking_return = 1;
+	      return_code = GET_CODE (body);
+	    }
+	  else
+	    return;
+        }
+      else
+	return;
+    }
+
+  gcc_assert (!arm_ccfsm_state || reverse);
+  if (!JUMP_P (insn))
+    return;
+
+  /* This jump might be paralleled with a clobber of the condition codes
+     the jump should always come first */
+  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
+    body = XVECEXP (body, 0, 0);
+
+  if (reverse
+      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
+	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
+    {
+      int insns_skipped;
+      int fail = FALSE, succeed = FALSE;
+      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
+      int then_not_else = TRUE;
+      rtx this_insn = start_insn, label = 0;
+
+      /* Register the insn jumped to.  */
+      if (reverse)
+        {
+	  if (!seeking_return)
+	    label = XEXP (SET_SRC (body), 0);
+        }
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
+	label = XEXP (XEXP (SET_SRC (body), 1), 0);
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
+	{
+	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
+	  then_not_else = FALSE;
+	}
+      else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
+	{
+	  seeking_return = 1;
+	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
+	}
+      else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
+        {
+	  seeking_return = 1;
+	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
+	  then_not_else = FALSE;
+        }
+      else
+	gcc_unreachable ();
+
+      /* See how many insns this branch skips, and what kind of insns.  If all
+	 insns are okay, and the label or unconditional branch to the same
+	 label is not too far away, succeed.  */
+      for (insns_skipped = 0;
+	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
+	{
+	  rtx scanbody;
+
+	  this_insn = next_nonnote_insn (this_insn);
+	  if (!this_insn)
+	    break;
+
+	  switch (GET_CODE (this_insn))
+	    {
+	    case CODE_LABEL:
+	      /* Succeed if it is the target label, otherwise fail since
+		 control falls in from somewhere else.  */
+	      if (this_insn == label)
+		{
+		  arm_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case BARRIER:
+	      /* Succeed if the following insn is the target label.
+		 Otherwise fail.
+		 If return insns are used then the last insn in a function
+		 will be a barrier.  */
+	      this_insn = next_nonnote_insn (this_insn);
+	      if (this_insn && this_insn == label)
+		{
+		  arm_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case CALL_INSN:
+	      /* The AAPCS says that conditional calls should not be
+		 used since they make interworking inefficient (the
+		 linker can't transform BL<cond> into BLX).  That's
+		 only a problem if the machine has BLX.  */
+	      if (arm_arch5)
+		{
+		  fail = TRUE;
+		  break;
+		}
+
+	      /* Succeed if the following insn is the target label, or
+		 if the following two insns are a barrier and the
+		 target label.  */
+	      this_insn = next_nonnote_insn (this_insn);
+	      if (this_insn && BARRIER_P (this_insn))
+		this_insn = next_nonnote_insn (this_insn);
+
+	      if (this_insn && this_insn == label
+		  && insns_skipped < max_insns_skipped)
+		{
+		  arm_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case JUMP_INSN:
+      	      /* If this is an unconditional branch to the same label, succeed.
+		 If it is to another label, do nothing.  If it is conditional,
+		 fail.  */
+	      /* XXX Probably, the tests for SET and the PC are
+		 unnecessary.  */
+
+	      scanbody = PATTERN (this_insn);
+	      if (GET_CODE (scanbody) == SET
+		  && GET_CODE (SET_DEST (scanbody)) == PC)
+		{
+		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
+		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
+		    {
+		      arm_ccfsm_state = 2;
+		      succeed = TRUE;
+		    }
+		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
+		    fail = TRUE;
+		}
+	      /* Fail if a conditional return is undesirable (e.g. on a
+		 StrongARM), but still allow this if optimizing for size.  */
+	      else if (GET_CODE (scanbody) == return_code
+		       && !use_return_insn (TRUE, NULL)
+		       && !optimize_size)
+		fail = TRUE;
+	      else if (GET_CODE (scanbody) == return_code)
+	        {
+		  arm_ccfsm_state = 2;
+		  succeed = TRUE;
+	        }
+	      else if (GET_CODE (scanbody) == PARALLEL)
+	        {
+		  switch (get_attr_conds (this_insn))
+		    {
+		    case CONDS_NOCOND:
+		      break;
+		    default:
+		      fail = TRUE;
+		      break;
+		    }
+		}
+	      else
+		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
+
+	      break;
+
+	    case INSN:
+	      /* Instructions using or affecting the condition codes make it
+		 fail.  */
+	      scanbody = PATTERN (this_insn);
+	      if (!(GET_CODE (scanbody) == SET
+		    || GET_CODE (scanbody) == PARALLEL)
+		  || get_attr_conds (this_insn) != CONDS_NOCOND)
+		fail = TRUE;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+      if (succeed)
+	{
+	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
+	    arm_target_label = CODE_LABEL_NUMBER (label);
+	  else
+	    {
+	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
+
+	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
+	        {
+		  this_insn = next_nonnote_insn (this_insn);
+		  gcc_assert (!this_insn
+			      || (!BARRIER_P (this_insn)
+				  && !LABEL_P (this_insn)));
+	        }
+	      if (!this_insn)
+	        {
+		  /* Oh, dear! we ran off the end.. give up.  */
+		  extract_constrain_insn_cached (insn);
+		  arm_ccfsm_state = 0;
+		  arm_target_insn = NULL;
+		  return;
+	        }
+	      arm_target_insn = this_insn;
+	    }
+
+	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
+	     what it was.  */
+	  if (!reverse)
+	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
+
+	  if (reverse || then_not_else)
+	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
+	}
+
+      /* Restore recog_data (getting the attributes of other insns can
+	 destroy this array, but final.c assumes that it remains intact
+	 across this call.  */
+      extract_constrain_insn_cached (insn);
+    }
+}
+
+/* Output IT instructions.  */
+void
+thumb2_asm_output_opcode (FILE * stream)
+{
+  char buff[5];
+  int n;
+
+  if (arm_condexec_mask)
+    {
+      for (n = 0; n < arm_condexec_masklen; n++)
+	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
+      buff[n] = 0;
+      asm_fprintf(stream, "i%s\t%s\n\t", buff,
+		  arm_condition_codes[arm_current_cc]);
+      arm_condexec_mask = 0;
+    }
+}
+
+/* Returns true if REGNO is a valid register
+   for holding a quantity of type MODE.  */
+int
+arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return (regno == CC_REGNUM
+	    || (TARGET_HARD_FLOAT && TARGET_VFP
+		&& regno == VFPCC_REGNUM));
+
+  if (TARGET_THUMB1)
+    /* For the Thumb we only allow values bigger than SImode in
+       registers 0 - 6, so that there is always a second low
+       register available to hold the upper part of the value.
+       We probably we ought to ensure that the register is the
+       start of an even numbered register pair.  */
+    return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP
+      && IS_VFP_REGNUM (regno))
+    {
+      if (mode == SFmode || mode == SImode)
+	return VFP_REGNO_OK_FOR_SINGLE (regno);
+
+      if (mode == DFmode)
+	return VFP_REGNO_OK_FOR_DOUBLE (regno);
+
+      /* VFP registers can hold HFmode values, but there is no point in
+	 putting them there unless we have hardware conversion insns. */
+      if (mode == HFmode)
+	return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
+
+      if (TARGET_NEON)
+        return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
+               || (VALID_NEON_QREG_MODE (mode)
+                   && NEON_REGNO_OK_FOR_QUAD (regno))
+	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
+	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
+	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
+	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
+
+      return FALSE;
+    }
+
+  if (TARGET_REALLY_IWMMXT)
+    {
+      if (IS_IWMMXT_GR_REGNUM (regno))
+	return mode == SImode;
+
+      if (IS_IWMMXT_REGNUM (regno))
+	return VALID_IWMMXT_REG_MODE (mode);
+    }
+
+  /* We allow almost any value to be stored in the general registers.
+     Restrict doubleword quantities to even register pairs so that we can
+     use ldrd.  Do not allow very large Neon structure opaque modes in
+     general registers; they would use too many.  */
+  if (regno <= LAST_ARM_REGNUM)
+    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
+      && ARM_NUM_REGS (mode) <= 4;
+
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    /* We only allow integers in the fake hard registers.  */
+    return GET_MODE_CLASS (mode) == MODE_INT;
+
+  return FALSE;
+}
+
+/* Implement MODES_TIEABLE_P.  */
+
+bool
+arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
+    return true;
+
+  /* We specifically want to allow elements of "structure" modes to
+     be tieable to the structure.  This more general condition allows
+     other rarer situations too.  */
+  if (TARGET_NEON
+      && (VALID_NEON_DREG_MODE (mode1)
+	  || VALID_NEON_QREG_MODE (mode1)
+	  || VALID_NEON_STRUCT_MODE (mode1))
+      && (VALID_NEON_DREG_MODE (mode2)
+	  || VALID_NEON_QREG_MODE (mode2)
+	  || VALID_NEON_STRUCT_MODE (mode2)))
+    return true;
+
+  return false;
+}
+
+/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
+   not used in arm mode.  */
+
+enum reg_class
+arm_regno_class (int regno)
+{
+  if (TARGET_THUMB1)
+    {
+      if (regno == STACK_POINTER_REGNUM)
+	return STACK_REG;
+      if (regno == CC_REGNUM)
+	return CC_REG;
+      if (regno < 8)
+	return LO_REGS;
+      return HI_REGS;
+    }
+
+  if (TARGET_THUMB2 && regno < 8)
+    return LO_REGS;
+
+  if (   regno <= LAST_ARM_REGNUM
+      || regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
+
+  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
+    return TARGET_THUMB2 ? CC_REG : NO_REGS;
+
+  if (IS_VFP_REGNUM (regno))
+    {
+      if (regno <= D7_VFP_REGNUM)
+	return VFP_D0_D7_REGS;
+      else if (regno <= LAST_LO_VFP_REGNUM)
+        return VFP_LO_REGS;
+      else
+        return VFP_HI_REGS;
+    }
+
+  if (IS_IWMMXT_REGNUM (regno))
+    return IWMMXT_REGS;
+
+  if (IS_IWMMXT_GR_REGNUM (regno))
+    return IWMMXT_GR_REGS;
+
+  return NO_REGS;
+}
+
+/* Handle a special case when computing the offset
+   of an argument from the frame pointer.  */
+int
+arm_debugger_arg_offset (int value, rtx addr)
+{
+  rtx insn;
+
+  /* We are only interested if dbxout_parms() failed to compute the offset.  */
+  if (value != 0)
+    return 0;
+
+  /* We can only cope with the case where the address is held in a register.  */
+  if (!REG_P (addr))
+    return 0;
+
+  /* If we are using the frame pointer to point at the argument, then
+     an offset of 0 is correct.  */
+  if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
+    return 0;
+
+  /* If we are using the stack pointer to point at the
+     argument, then an offset of 0 is correct.  */
+  /* ??? Check this is consistent with thumb2 frame layout.  */
+  if ((TARGET_THUMB || !frame_pointer_needed)
+      && REGNO (addr) == SP_REGNUM)
+    return 0;
+
+  /* Oh dear.  The argument is pointed to by a register rather
+     than being held in a register, or being stored at a known
+     offset from the frame pointer.  Since GDB only understands
+     those two kinds of argument we must translate the address
+     held in the register into an offset from the frame pointer.
+     We do this by searching through the insns for the function
+     looking to see where this register gets its value.  If the
+     register is initialized from the frame pointer plus an offset
+     then we are in luck and we can continue, otherwise we give up.
+
+     This code is exercised by producing debugging information
+     for a function with arguments like this:
+
+           double func (double a, double b, int c, double d) {return d;}
+
+     Without this code the stab for parameter 'd' will be set to
+     an offset of 0 from the frame pointer, rather than 8.  */
+
+  /* The if() statement says:
+
+     If the insn is a normal instruction
+     and if the insn is setting the value in a register
+     and if the register being set is the register holding the address of the argument
+     and if the address is computing by an addition
+     that involves adding to a register
+     which is the frame pointer
+     a constant integer
+
+     then...  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (   NONJUMP_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
+	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
+	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
+	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
+	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
+	     )
+	{
+	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
+
+	  break;
+	}
+    }
+
+  if (value == 0)
+    {
+      debug_rtx (addr);
+      warning (0, "unable to compute real location of stacked parameter");
+      value = 8; /* XXX magic hack */
+    }
+
+  return value;
+}
+
+typedef enum {
+  T_V8QI,
+  T_V4HI,
+  T_V4HF,
+  T_V2SI,
+  T_V2SF,
+  T_DI,
+  T_V16QI,
+  T_V8HI,
+  T_V4SI,
+  T_V4SF,
+  T_V2DI,
+  T_TI,
+  T_EI,
+  T_OI,
+  T_MAX		/* Size of enum.  Keep last.  */
+} neon_builtin_type_mode;
+
+#define TYPE_MODE_BIT(X) (1 << (X))
+
+#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)	\
+		 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)	\
+		 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
+#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)	\
+		 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)	\
+		 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
+
+#define v8qi_UP  T_V8QI
+#define v4hi_UP  T_V4HI
+#define v4hf_UP  T_V4HF
+#define v2si_UP  T_V2SI
+#define v2sf_UP  T_V2SF
+#define di_UP    T_DI
+#define v16qi_UP T_V16QI
+#define v8hi_UP  T_V8HI
+#define v4si_UP  T_V4SI
+#define v4sf_UP  T_V4SF
+#define v2di_UP  T_V2DI
+#define ti_UP	 T_TI
+#define ei_UP	 T_EI
+#define oi_UP	 T_OI
+
+#define UP(X) X##_UP
+
+typedef enum {
+  NEON_BINOP,
+  NEON_TERNOP,
+  NEON_UNOP,
+  NEON_GETLANE,
+  NEON_SETLANE,
+  NEON_CREATE,
+  NEON_RINT,
+  NEON_DUP,
+  NEON_DUPLANE,
+  NEON_COMBINE,
+  NEON_SPLIT,
+  NEON_LANEMUL,
+  NEON_LANEMULL,
+  NEON_LANEMULH,
+  NEON_LANEMAC,
+  NEON_SCALARMUL,
+  NEON_SCALARMULL,
+  NEON_SCALARMULH,
+  NEON_SCALARMAC,
+  NEON_CONVERT,
+  NEON_FLOAT_WIDEN,
+  NEON_FLOAT_NARROW,
+  NEON_FIXCONV,
+  NEON_SELECT,
+  NEON_RESULTPAIR,
+  NEON_REINTERP,
+  NEON_VTBL,
+  NEON_VTBX,
+  NEON_LOAD1,
+  NEON_LOAD1LANE,
+  NEON_STORE1,
+  NEON_STORE1LANE,
+  NEON_LOADSTRUCT,
+  NEON_LOADSTRUCTLANE,
+  NEON_STORESTRUCT,
+  NEON_STORESTRUCTLANE,
+  NEON_LOGICBINOP,
+  NEON_SHIFTINSERT,
+  NEON_SHIFTIMM,
+  NEON_SHIFTACC
+} neon_itype;
+
+typedef struct {
+  const char *name;
+  const neon_itype itype;
+  const neon_builtin_type_mode mode;
+  const enum insn_code code;
+  unsigned int fcode;
+} neon_builtin_datum;
+
+#define CF(N,X) CODE_FOR_neon_##N##X
+
+#define VAR1(T, N, A) \
+  {#N, NEON_##T, UP (A), CF (N, A), 0}
+#define VAR2(T, N, A, B) \
+  VAR1 (T, N, A), \
+  {#N, NEON_##T, UP (B), CF (N, B), 0}
+#define VAR3(T, N, A, B, C) \
+  VAR2 (T, N, A, B), \
+  {#N, NEON_##T, UP (C), CF (N, C), 0}
+#define VAR4(T, N, A, B, C, D) \
+  VAR3 (T, N, A, B, C), \
+  {#N, NEON_##T, UP (D), CF (N, D), 0}
+#define VAR5(T, N, A, B, C, D, E) \
+  VAR4 (T, N, A, B, C, D), \
+  {#N, NEON_##T, UP (E), CF (N, E), 0}
+#define VAR6(T, N, A, B, C, D, E, F) \
+  VAR5 (T, N, A, B, C, D, E), \
+  {#N, NEON_##T, UP (F), CF (N, F), 0}
+#define VAR7(T, N, A, B, C, D, E, F, G) \
+  VAR6 (T, N, A, B, C, D, E, F), \
+  {#N, NEON_##T, UP (G), CF (N, G), 0}
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
+  VAR7 (T, N, A, B, C, D, E, F, G), \
+  {#N, NEON_##T, UP (H), CF (N, H), 0}
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+  VAR8 (T, N, A, B, C, D, E, F, G, H), \
+  {#N, NEON_##T, UP (I), CF (N, I), 0}
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+  VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
+  {#N, NEON_##T, UP (J), CF (N, J), 0}
+
+/* The NEON builtin data can be found in arm_neon_builtins.def.
+   The mode entries in the following table correspond to the "key" type of the
+   instruction variant, i.e. equivalent to that which would be specified after
+   the assembler mnemonic, which usually refers to the last vector operand.
+   (Signed/unsigned/polynomial types are not differentiated between though, and
+   are all mapped onto the same mode for a given element size.) The modes
+   listed per instruction should be the same as those defined for that
+   instruction's pattern in neon.md.  */
+
+static neon_builtin_datum neon_builtin_data[] =
+{
+#include "arm_neon_builtins.def"
+};
+
+#undef CF
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+
+#define CF(N,X) ARM_BUILTIN_NEON_##N##X
+#define VAR1(T, N, A) \
+  CF (N, A)
+#define VAR2(T, N, A, B) \
+  VAR1 (T, N, A), \
+  CF (N, B)
+#define VAR3(T, N, A, B, C) \
+  VAR2 (T, N, A, B), \
+  CF (N, C)
+#define VAR4(T, N, A, B, C, D) \
+  VAR3 (T, N, A, B, C), \
+  CF (N, D)
+#define VAR5(T, N, A, B, C, D, E) \
+  VAR4 (T, N, A, B, C, D), \
+  CF (N, E)
+#define VAR6(T, N, A, B, C, D, E, F) \
+  VAR5 (T, N, A, B, C, D, E), \
+  CF (N, F)
+#define VAR7(T, N, A, B, C, D, E, F, G) \
+  VAR6 (T, N, A, B, C, D, E, F), \
+  CF (N, G)
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
+  VAR7 (T, N, A, B, C, D, E, F, G), \
+  CF (N, H)
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+  VAR8 (T, N, A, B, C, D, E, F, G, H), \
+  CF (N, I)
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+  VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
+  CF (N, J)
+enum arm_builtins
+{
+  ARM_BUILTIN_GETWCGR0,
+  ARM_BUILTIN_GETWCGR1,
+  ARM_BUILTIN_GETWCGR2,
+  ARM_BUILTIN_GETWCGR3,
+
+  ARM_BUILTIN_SETWCGR0,
+  ARM_BUILTIN_SETWCGR1,
+  ARM_BUILTIN_SETWCGR2,
+  ARM_BUILTIN_SETWCGR3,
+
+  ARM_BUILTIN_WZERO,
+
+  ARM_BUILTIN_WAVG2BR,
+  ARM_BUILTIN_WAVG2HR,
+  ARM_BUILTIN_WAVG2B,
+  ARM_BUILTIN_WAVG2H,
+
+  ARM_BUILTIN_WACCB,
+  ARM_BUILTIN_WACCH,
+  ARM_BUILTIN_WACCW,
+
+  ARM_BUILTIN_WMACS,
+  ARM_BUILTIN_WMACSZ,
+  ARM_BUILTIN_WMACU,
+  ARM_BUILTIN_WMACUZ,
+
+  ARM_BUILTIN_WSADB,
+  ARM_BUILTIN_WSADBZ,
+  ARM_BUILTIN_WSADH,
+  ARM_BUILTIN_WSADHZ,
+
+  ARM_BUILTIN_WALIGNI,
+  ARM_BUILTIN_WALIGNR0,
+  ARM_BUILTIN_WALIGNR1,
+  ARM_BUILTIN_WALIGNR2,
+  ARM_BUILTIN_WALIGNR3,
+
+  ARM_BUILTIN_TMIA,
+  ARM_BUILTIN_TMIAPH,
+  ARM_BUILTIN_TMIABB,
+  ARM_BUILTIN_TMIABT,
+  ARM_BUILTIN_TMIATB,
+  ARM_BUILTIN_TMIATT,
+
+  ARM_BUILTIN_TMOVMSKB,
+  ARM_BUILTIN_TMOVMSKH,
+  ARM_BUILTIN_TMOVMSKW,
+
+  ARM_BUILTIN_TBCSTB,
+  ARM_BUILTIN_TBCSTH,
+  ARM_BUILTIN_TBCSTW,
+
+  ARM_BUILTIN_WMADDS,
+  ARM_BUILTIN_WMADDU,
+
+  ARM_BUILTIN_WPACKHSS,
+  ARM_BUILTIN_WPACKWSS,
+  ARM_BUILTIN_WPACKDSS,
+  ARM_BUILTIN_WPACKHUS,
+  ARM_BUILTIN_WPACKWUS,
+  ARM_BUILTIN_WPACKDUS,
+
+  ARM_BUILTIN_WADDB,
+  ARM_BUILTIN_WADDH,
+  ARM_BUILTIN_WADDW,
+  ARM_BUILTIN_WADDSSB,
+  ARM_BUILTIN_WADDSSH,
+  ARM_BUILTIN_WADDSSW,
+  ARM_BUILTIN_WADDUSB,
+  ARM_BUILTIN_WADDUSH,
+  ARM_BUILTIN_WADDUSW,
+  ARM_BUILTIN_WSUBB,
+  ARM_BUILTIN_WSUBH,
+  ARM_BUILTIN_WSUBW,
+  ARM_BUILTIN_WSUBSSB,
+  ARM_BUILTIN_WSUBSSH,
+  ARM_BUILTIN_WSUBSSW,
+  ARM_BUILTIN_WSUBUSB,
+  ARM_BUILTIN_WSUBUSH,
+  ARM_BUILTIN_WSUBUSW,
+
+  ARM_BUILTIN_WAND,
+  ARM_BUILTIN_WANDN,
+  ARM_BUILTIN_WOR,
+  ARM_BUILTIN_WXOR,
+
+  ARM_BUILTIN_WCMPEQB,
+  ARM_BUILTIN_WCMPEQH,
+  ARM_BUILTIN_WCMPEQW,
+  ARM_BUILTIN_WCMPGTUB,
+  ARM_BUILTIN_WCMPGTUH,
+  ARM_BUILTIN_WCMPGTUW,
+  ARM_BUILTIN_WCMPGTSB,
+  ARM_BUILTIN_WCMPGTSH,
+  ARM_BUILTIN_WCMPGTSW,
+
+  ARM_BUILTIN_TEXTRMSB,
+  ARM_BUILTIN_TEXTRMSH,
+  ARM_BUILTIN_TEXTRMSW,
+  ARM_BUILTIN_TEXTRMUB,
+  ARM_BUILTIN_TEXTRMUH,
+  ARM_BUILTIN_TEXTRMUW,
+  ARM_BUILTIN_TINSRB,
+  ARM_BUILTIN_TINSRH,
+  ARM_BUILTIN_TINSRW,
+
+  ARM_BUILTIN_WMAXSW,
+  ARM_BUILTIN_WMAXSH,
+  ARM_BUILTIN_WMAXSB,
+  ARM_BUILTIN_WMAXUW,
+  ARM_BUILTIN_WMAXUH,
+  ARM_BUILTIN_WMAXUB,
+  ARM_BUILTIN_WMINSW,
+  ARM_BUILTIN_WMINSH,
+  ARM_BUILTIN_WMINSB,
+  ARM_BUILTIN_WMINUW,
+  ARM_BUILTIN_WMINUH,
+  ARM_BUILTIN_WMINUB,
+
+  ARM_BUILTIN_WMULUM,
+  ARM_BUILTIN_WMULSM,
+  ARM_BUILTIN_WMULUL,
+
+  ARM_BUILTIN_PSADBH,
+  ARM_BUILTIN_WSHUFH,
+
+  ARM_BUILTIN_WSLLH,
+  ARM_BUILTIN_WSLLW,
+  ARM_BUILTIN_WSLLD,
+  ARM_BUILTIN_WSRAH,
+  ARM_BUILTIN_WSRAW,
+  ARM_BUILTIN_WSRAD,
+  ARM_BUILTIN_WSRLH,
+  ARM_BUILTIN_WSRLW,
+  ARM_BUILTIN_WSRLD,
+  ARM_BUILTIN_WRORH,
+  ARM_BUILTIN_WRORW,
+  ARM_BUILTIN_WRORD,
+  ARM_BUILTIN_WSLLHI,
+  ARM_BUILTIN_WSLLWI,
+  ARM_BUILTIN_WSLLDI,
+  ARM_BUILTIN_WSRAHI,
+  ARM_BUILTIN_WSRAWI,
+  ARM_BUILTIN_WSRADI,
+  ARM_BUILTIN_WSRLHI,
+  ARM_BUILTIN_WSRLWI,
+  ARM_BUILTIN_WSRLDI,
+  ARM_BUILTIN_WRORHI,
+  ARM_BUILTIN_WRORWI,
+  ARM_BUILTIN_WRORDI,
+
+  ARM_BUILTIN_WUNPCKIHB,
+  ARM_BUILTIN_WUNPCKIHH,
+  ARM_BUILTIN_WUNPCKIHW,
+  ARM_BUILTIN_WUNPCKILB,
+  ARM_BUILTIN_WUNPCKILH,
+  ARM_BUILTIN_WUNPCKILW,
+
+  ARM_BUILTIN_WUNPCKEHSB,
+  ARM_BUILTIN_WUNPCKEHSH,
+  ARM_BUILTIN_WUNPCKEHSW,
+  ARM_BUILTIN_WUNPCKEHUB,
+  ARM_BUILTIN_WUNPCKEHUH,
+  ARM_BUILTIN_WUNPCKEHUW,
+  ARM_BUILTIN_WUNPCKELSB,
+  ARM_BUILTIN_WUNPCKELSH,
+  ARM_BUILTIN_WUNPCKELSW,
+  ARM_BUILTIN_WUNPCKELUB,
+  ARM_BUILTIN_WUNPCKELUH,
+  ARM_BUILTIN_WUNPCKELUW,
+
+  ARM_BUILTIN_WABSB,
+  ARM_BUILTIN_WABSH,
+  ARM_BUILTIN_WABSW,
+
+  ARM_BUILTIN_WADDSUBHX,
+  ARM_BUILTIN_WSUBADDHX,
+
+  ARM_BUILTIN_WABSDIFFB,
+  ARM_BUILTIN_WABSDIFFH,
+  ARM_BUILTIN_WABSDIFFW,
+
+  ARM_BUILTIN_WADDCH,
+  ARM_BUILTIN_WADDCW,
+
+  ARM_BUILTIN_WAVG4,
+  ARM_BUILTIN_WAVG4R,
+
+  ARM_BUILTIN_WMADDSX,
+  ARM_BUILTIN_WMADDUX,
+
+  ARM_BUILTIN_WMADDSN,
+  ARM_BUILTIN_WMADDUN,
+
+  ARM_BUILTIN_WMULWSM,
+  ARM_BUILTIN_WMULWUM,
+
+  ARM_BUILTIN_WMULWSMR,
+  ARM_BUILTIN_WMULWUMR,
+
+  ARM_BUILTIN_WMULWL,
+
+  ARM_BUILTIN_WMULSMR,
+  ARM_BUILTIN_WMULUMR,
+
+  ARM_BUILTIN_WQMULM,
+  ARM_BUILTIN_WQMULMR,
+
+  ARM_BUILTIN_WQMULWM,
+  ARM_BUILTIN_WQMULWMR,
+
+  ARM_BUILTIN_WADDBHUSM,
+  ARM_BUILTIN_WADDBHUSL,
+
+  ARM_BUILTIN_WQMIABB,
+  ARM_BUILTIN_WQMIABT,
+  ARM_BUILTIN_WQMIATB,
+  ARM_BUILTIN_WQMIATT,
+
+  ARM_BUILTIN_WQMIABBN,
+  ARM_BUILTIN_WQMIABTN,
+  ARM_BUILTIN_WQMIATBN,
+  ARM_BUILTIN_WQMIATTN,
+
+  ARM_BUILTIN_WMIABB,
+  ARM_BUILTIN_WMIABT,
+  ARM_BUILTIN_WMIATB,
+  ARM_BUILTIN_WMIATT,
+
+  ARM_BUILTIN_WMIABBN,
+  ARM_BUILTIN_WMIABTN,
+  ARM_BUILTIN_WMIATBN,
+  ARM_BUILTIN_WMIATTN,
+
+  ARM_BUILTIN_WMIAWBB,
+  ARM_BUILTIN_WMIAWBT,
+  ARM_BUILTIN_WMIAWTB,
+  ARM_BUILTIN_WMIAWTT,
+
+  ARM_BUILTIN_WMIAWBBN,
+  ARM_BUILTIN_WMIAWBTN,
+  ARM_BUILTIN_WMIAWTBN,
+  ARM_BUILTIN_WMIAWTTN,
+
+  ARM_BUILTIN_WMERGE,
+
+  ARM_BUILTIN_CRC32B,
+  ARM_BUILTIN_CRC32H,
+  ARM_BUILTIN_CRC32W,
+  ARM_BUILTIN_CRC32CB,
+  ARM_BUILTIN_CRC32CH,
+  ARM_BUILTIN_CRC32CW,
+
+#undef CRYPTO1
+#undef CRYPTO2
+#undef CRYPTO3
+
+#define CRYPTO1(L, U, M1, M2) \
+  ARM_BUILTIN_CRYPTO_##U,
+#define CRYPTO2(L, U, M1, M2, M3) \
+  ARM_BUILTIN_CRYPTO_##U,
+#define CRYPTO3(L, U, M1, M2, M3, M4) \
+  ARM_BUILTIN_CRYPTO_##U,
+
+#include "crypto.def"
+
+#undef CRYPTO1
+#undef CRYPTO2
+#undef CRYPTO3
+
+#include "arm_neon_builtins.def"
+
+  ,ARM_BUILTIN_MAX
+};
+
+#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
+
+#undef CF
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+
+static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
+
+#define NUM_DREG_TYPES 5
+#define NUM_QREG_TYPES 6
+
+static void
+arm_init_neon_builtins (void)
+{
+  unsigned int i, fcode;
+  tree decl;
+
+  tree neon_intQI_type_node;
+  tree neon_intHI_type_node;
+  tree neon_floatHF_type_node;
+  tree neon_polyQI_type_node;
+  tree neon_polyHI_type_node;
+  tree neon_intSI_type_node;
+  tree neon_intDI_type_node;
+  tree neon_intUTI_type_node;
+  tree neon_float_type_node;
+
+  tree intQI_pointer_node;
+  tree intHI_pointer_node;
+  tree intSI_pointer_node;
+  tree intDI_pointer_node;
+  tree float_pointer_node;
+
+  tree const_intQI_node;
+  tree const_intHI_node;
+  tree const_intSI_node;
+  tree const_intDI_node;
+  tree const_float_node;
+
+  tree const_intQI_pointer_node;
+  tree const_intHI_pointer_node;
+  tree const_intSI_pointer_node;
+  tree const_intDI_pointer_node;
+  tree const_float_pointer_node;
+
+  tree V8QI_type_node;
+  tree V4HI_type_node;
+  tree V4HF_type_node;
+  tree V2SI_type_node;
+  tree V2SF_type_node;
+  tree V16QI_type_node;
+  tree V8HI_type_node;
+  tree V4SI_type_node;
+  tree V4SF_type_node;
+  tree V2DI_type_node;
+
+  tree intUQI_type_node;
+  tree intUHI_type_node;
+  tree intUSI_type_node;
+  tree intUDI_type_node;
+
+  tree intEI_type_node;
+  tree intOI_type_node;
+  tree intCI_type_node;
+  tree intXI_type_node;
+
+  tree V8QI_pointer_node;
+  tree V4HI_pointer_node;
+  tree V2SI_pointer_node;
+  tree V2SF_pointer_node;
+  tree V16QI_pointer_node;
+  tree V8HI_pointer_node;
+  tree V4SI_pointer_node;
+  tree V4SF_pointer_node;
+  tree V2DI_pointer_node;
+
+  tree void_ftype_pv8qi_v8qi_v8qi;
+  tree void_ftype_pv4hi_v4hi_v4hi;
+  tree void_ftype_pv2si_v2si_v2si;
+  tree void_ftype_pv2sf_v2sf_v2sf;
+  tree void_ftype_pdi_di_di;
+  tree void_ftype_pv16qi_v16qi_v16qi;
+  tree void_ftype_pv8hi_v8hi_v8hi;
+  tree void_ftype_pv4si_v4si_v4si;
+  tree void_ftype_pv4sf_v4sf_v4sf;
+  tree void_ftype_pv2di_v2di_v2di;
+
+  tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
+  tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
+  tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
+
+  /* Create distinguished type nodes for NEON vector element types,
+     and pointers to values of such types, so we can detect them later.  */
+  neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+  neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+  neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+  neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+  neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
+  neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
+  neon_float_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
+  layout_type (neon_float_type_node);
+  neon_floatHF_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
+  layout_type (neon_floatHF_type_node);
+
+  /* Define typedefs which exactly correspond to the modes we are basing vector
+     types on.  If you change these names you'll need to change
+     the table used by arm_mangle_type too.  */
+  (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
+					     "__builtin_neon_qi");
+  (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
+					     "__builtin_neon_hi");
+  (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
+					     "__builtin_neon_hf");
+  (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
+					     "__builtin_neon_si");
+  (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
+					     "__builtin_neon_sf");
+  (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
+					     "__builtin_neon_di");
+  (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
+					     "__builtin_neon_poly8");
+  (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
+					     "__builtin_neon_poly16");
+
+  intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
+  intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
+  intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
+  intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
+  float_pointer_node = build_pointer_type (neon_float_type_node);
+
+  /* Next create constant-qualified versions of the above types.  */
+  const_intQI_node = build_qualified_type (neon_intQI_type_node,
+					   TYPE_QUAL_CONST);
+  const_intHI_node = build_qualified_type (neon_intHI_type_node,
+					   TYPE_QUAL_CONST);
+  const_intSI_node = build_qualified_type (neon_intSI_type_node,
+					   TYPE_QUAL_CONST);
+  const_intDI_node = build_qualified_type (neon_intDI_type_node,
+					   TYPE_QUAL_CONST);
+  const_float_node = build_qualified_type (neon_float_type_node,
+					   TYPE_QUAL_CONST);
+
+  const_intQI_pointer_node = build_pointer_type (const_intQI_node);
+  const_intHI_pointer_node = build_pointer_type (const_intHI_node);
+  const_intSI_pointer_node = build_pointer_type (const_intSI_node);
+  const_intDI_pointer_node = build_pointer_type (const_intDI_node);
+  const_float_pointer_node = build_pointer_type (const_float_node);
+
+  /* Now create vector types based on our NEON element types.  */
+  /* 64-bit vectors.  */
+  V8QI_type_node =
+    build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
+  V4HI_type_node =
+    build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+  V4HF_type_node =
+    build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
+  V2SI_type_node =
+    build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
+  V2SF_type_node =
+    build_vector_type_for_mode (neon_float_type_node, V2SFmode);
+  /* 128-bit vectors.  */
+  V16QI_type_node =
+    build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
+  V8HI_type_node =
+    build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
+  V4SI_type_node =
+    build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
+  V4SF_type_node =
+    build_vector_type_for_mode (neon_float_type_node, V4SFmode);
+  V2DI_type_node =
+    build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
+
+  /* Unsigned integer types for various mode sizes.  */
+  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
+  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
+  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
+  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
+  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
+
+
+  (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
+					     "__builtin_neon_uqi");
+  (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
+					     "__builtin_neon_uhi");
+  (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
+					     "__builtin_neon_usi");
+  (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
+					     "__builtin_neon_udi");
+  (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
+					     "__builtin_neon_poly64");
+  (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
+					     "__builtin_neon_poly128");
+
+  /* Opaque integer types for structures of vectors.  */
+  intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
+  intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
+  intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
+  intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
+
+  (*lang_hooks.types.register_builtin_type) (intTI_type_node,
+					     "__builtin_neon_ti");
+  (*lang_hooks.types.register_builtin_type) (intEI_type_node,
+					     "__builtin_neon_ei");
+  (*lang_hooks.types.register_builtin_type) (intOI_type_node,
+					     "__builtin_neon_oi");
+  (*lang_hooks.types.register_builtin_type) (intCI_type_node,
+					     "__builtin_neon_ci");
+  (*lang_hooks.types.register_builtin_type) (intXI_type_node,
+					     "__builtin_neon_xi");
+
+  /* Pointers to vector types.  */
+  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
+  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
+  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
+  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
+  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
+  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
+  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
+  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
+  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
+
+  /* Operations which return results as pairs.  */
+  void_ftype_pv8qi_v8qi_v8qi =
+    build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
+  			      V8QI_type_node, NULL);
+  void_ftype_pv4hi_v4hi_v4hi =
+    build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
+  			      V4HI_type_node, NULL);
+  void_ftype_pv2si_v2si_v2si =
+    build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
+  			      V2SI_type_node, NULL);
+  void_ftype_pv2sf_v2sf_v2sf =
+    build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
+  			      V2SF_type_node, NULL);
+  void_ftype_pdi_di_di =
+    build_function_type_list (void_type_node, intDI_pointer_node,
+			      neon_intDI_type_node, neon_intDI_type_node, NULL);
+  void_ftype_pv16qi_v16qi_v16qi =
+    build_function_type_list (void_type_node, V16QI_pointer_node,
+			      V16QI_type_node, V16QI_type_node, NULL);
+  void_ftype_pv8hi_v8hi_v8hi =
+    build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
+  			      V8HI_type_node, NULL);
+  void_ftype_pv4si_v4si_v4si =
+    build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
+  			      V4SI_type_node, NULL);
+  void_ftype_pv4sf_v4sf_v4sf =
+    build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
+  			      V4SF_type_node, NULL);
+  void_ftype_pv2di_v2di_v2di =
+    build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
+			      V2DI_type_node, NULL);
+
+  if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
+  {
+    tree V4USI_type_node =
+      build_vector_type_for_mode (intUSI_type_node, V4SImode);
+
+    tree V16UQI_type_node =
+      build_vector_type_for_mode (intUQI_type_node, V16QImode);
+
+    tree v16uqi_ftype_v16uqi
+      = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
+
+    tree v16uqi_ftype_v16uqi_v16uqi
+      = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
+                                  V16UQI_type_node, NULL_TREE);
+
+    tree v4usi_ftype_v4usi
+      = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
+
+    tree v4usi_ftype_v4usi_v4usi
+      = build_function_type_list (V4USI_type_node, V4USI_type_node,
+                                  V4USI_type_node, NULL_TREE);
+
+    tree v4usi_ftype_v4usi_v4usi_v4usi
+      = build_function_type_list (V4USI_type_node, V4USI_type_node,
+                                  V4USI_type_node, V4USI_type_node, NULL_TREE);
+
+    tree uti_ftype_udi_udi
+      = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
+                                  intUDI_type_node, NULL_TREE);
+
+    #undef CRYPTO1
+    #undef CRYPTO2
+    #undef CRYPTO3
+    #undef C
+    #undef N
+    #undef CF
+    #undef FT1
+    #undef FT2
+    #undef FT3
+
+    #define C(U) \
+      ARM_BUILTIN_CRYPTO_##U
+    #define N(L) \
+      "__builtin_arm_crypto_"#L
+    #define FT1(R, A) \
+      R##_ftype_##A
+    #define FT2(R, A1, A2) \
+      R##_ftype_##A1##_##A2
+    #define FT3(R, A1, A2, A3) \
+      R##_ftype_##A1##_##A2##_##A3
+    #define CRYPTO1(L, U, R, A) \
+      arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
+                                                       C (U), BUILT_IN_MD, \
+                                                       NULL, NULL_TREE);
+    #define CRYPTO2(L, U, R, A1, A2) \
+      arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
+                                                       C (U), BUILT_IN_MD, \
+                                                       NULL, NULL_TREE);
+
+    #define CRYPTO3(L, U, R, A1, A2, A3) \
+      arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
+                                                       C (U), BUILT_IN_MD, \
+                                                       NULL, NULL_TREE);
+    #include "crypto.def"
+
+    #undef CRYPTO1
+    #undef CRYPTO2
+    #undef CRYPTO3
+    #undef C
+    #undef N
+    #undef FT1
+    #undef FT2
+    #undef FT3
+  }
+  dreg_types[0] = V8QI_type_node;
+  dreg_types[1] = V4HI_type_node;
+  dreg_types[2] = V2SI_type_node;
+  dreg_types[3] = V2SF_type_node;
+  dreg_types[4] = neon_intDI_type_node;
+
+  qreg_types[0] = V16QI_type_node;
+  qreg_types[1] = V8HI_type_node;
+  qreg_types[2] = V4SI_type_node;
+  qreg_types[3] = V4SF_type_node;
+  qreg_types[4] = V2DI_type_node;
+  qreg_types[5] = neon_intUTI_type_node;
+
+  for (i = 0; i < NUM_QREG_TYPES; i++)
+    {
+      int j;
+      for (j = 0; j < NUM_QREG_TYPES; j++)
+        {
+          if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
+            reinterp_ftype_dreg[i][j]
+              = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
+
+          reinterp_ftype_qreg[i][j]
+            = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
+        }
+    }
+
+  for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
+       i < ARRAY_SIZE (neon_builtin_data);
+       i++, fcode++)
+    {
+      neon_builtin_datum *d = &neon_builtin_data[i];
+
+      const char* const modenames[] = {
+	"v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
+	"v16qi", "v8hi", "v4si", "v4sf", "v2di",
+	"ti", "ei", "oi"
+      };
+      char namebuf[60];
+      tree ftype = NULL;
+      int is_load = 0, is_store = 0;
+
+      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
+
+      d->fcode = fcode;
+
+      switch (d->itype)
+	{
+	case NEON_LOAD1:
+	case NEON_LOAD1LANE:
+	case NEON_LOADSTRUCT:
+	case NEON_LOADSTRUCTLANE:
+	  is_load = 1;
+	  /* Fall through.  */
+	case NEON_STORE1:
+	case NEON_STORE1LANE:
+	case NEON_STORESTRUCT:
+	case NEON_STORESTRUCTLANE:
+	  if (!is_load)
+	    is_store = 1;
+	  /* Fall through.  */
+	case NEON_UNOP:
+	case NEON_RINT:
+	case NEON_BINOP:
+	case NEON_LOGICBINOP:
+	case NEON_SHIFTINSERT:
+	case NEON_TERNOP:
+	case NEON_GETLANE:
+	case NEON_SETLANE:
+	case NEON_CREATE:
+	case NEON_DUP:
+	case NEON_DUPLANE:
+	case NEON_SHIFTIMM:
+	case NEON_SHIFTACC:
+	case NEON_COMBINE:
+	case NEON_SPLIT:
+	case NEON_CONVERT:
+	case NEON_FIXCONV:
+	case NEON_LANEMUL:
+	case NEON_LANEMULL:
+	case NEON_LANEMULH:
+	case NEON_LANEMAC:
+	case NEON_SCALARMUL:
+	case NEON_SCALARMULL:
+	case NEON_SCALARMULH:
+	case NEON_SCALARMAC:
+	case NEON_SELECT:
+	case NEON_VTBL:
+	case NEON_VTBX:
+	  {
+	    int k;
+	    tree return_type = void_type_node, args = void_list_node;
+
+	    /* Build a function type directly from the insn_data for
+	       this builtin.  The build_function_type() function takes
+	       care of removing duplicates for us.  */
+	    for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
+	      {
+		tree eltype;
+
+		if (is_load && k == 1)
+		  {
+		    /* Neon load patterns always have the memory
+		       operand in the operand 1 position.  */
+		    gcc_assert (insn_data[d->code].operand[k].predicate
+				== neon_struct_operand);
+
+		    switch (d->mode)
+		      {
+		      case T_V8QI:
+		      case T_V16QI:
+			eltype = const_intQI_pointer_node;
+			break;
+
+		      case T_V4HI:
+		      case T_V8HI:
+			eltype = const_intHI_pointer_node;
+			break;
+
+		      case T_V2SI:
+		      case T_V4SI:
+			eltype = const_intSI_pointer_node;
+			break;
+
+		      case T_V2SF:
+		      case T_V4SF:
+			eltype = const_float_pointer_node;
+			break;
+
+		      case T_DI:
+		      case T_V2DI:
+			eltype = const_intDI_pointer_node;
+			break;
+
+		      default: gcc_unreachable ();
+		      }
+		  }
+		else if (is_store && k == 0)
+		  {
+		    /* Similarly, Neon store patterns use operand 0 as
+		       the memory location to store to.  */
+		    gcc_assert (insn_data[d->code].operand[k].predicate
+				== neon_struct_operand);
+
+		    switch (d->mode)
+		      {
+		      case T_V8QI:
+		      case T_V16QI:
+			eltype = intQI_pointer_node;
+			break;
+
+		      case T_V4HI:
+		      case T_V8HI:
+			eltype = intHI_pointer_node;
+			break;
+
+		      case T_V2SI:
+		      case T_V4SI:
+			eltype = intSI_pointer_node;
+			break;
+
+		      case T_V2SF:
+		      case T_V4SF:
+			eltype = float_pointer_node;
+			break;
+
+		      case T_DI:
+		      case T_V2DI:
+			eltype = intDI_pointer_node;
+			break;
+
+		      default: gcc_unreachable ();
+		      }
+		  }
+		else
+		  {
+		    switch (insn_data[d->code].operand[k].mode)
+		      {
+		      case VOIDmode: eltype = void_type_node; break;
+			/* Scalars.  */
+		      case QImode: eltype = neon_intQI_type_node; break;
+		      case HImode: eltype = neon_intHI_type_node; break;
+		      case SImode: eltype = neon_intSI_type_node; break;
+		      case SFmode: eltype = neon_float_type_node; break;
+		      case DImode: eltype = neon_intDI_type_node; break;
+		      case TImode: eltype = intTI_type_node; break;
+		      case EImode: eltype = intEI_type_node; break;
+		      case OImode: eltype = intOI_type_node; break;
+		      case CImode: eltype = intCI_type_node; break;
+		      case XImode: eltype = intXI_type_node; break;
+			/* 64-bit vectors.  */
+		      case V8QImode: eltype = V8QI_type_node; break;
+		      case V4HImode: eltype = V4HI_type_node; break;
+		      case V2SImode: eltype = V2SI_type_node; break;
+		      case V2SFmode: eltype = V2SF_type_node; break;
+			/* 128-bit vectors.  */
+		      case V16QImode: eltype = V16QI_type_node; break;
+		      case V8HImode: eltype = V8HI_type_node; break;
+		      case V4SImode: eltype = V4SI_type_node; break;
+		      case V4SFmode: eltype = V4SF_type_node; break;
+		      case V2DImode: eltype = V2DI_type_node; break;
+		      default: gcc_unreachable ();
+		      }
+		  }
+
+		if (k == 0 && !is_store)
+		  return_type = eltype;
+		else
+		  args = tree_cons (NULL_TREE, eltype, args);
+	      }
+
+	    ftype = build_function_type (return_type, args);
+	  }
+	  break;
+
+	case NEON_RESULTPAIR:
+	  {
+	    switch (insn_data[d->code].operand[1].mode)
+	      {
+	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
+	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
+	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
+	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
+	      case DImode: ftype = void_ftype_pdi_di_di; break;
+	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
+	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
+	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
+	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
+	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
+	      default: gcc_unreachable ();
+	      }
+	  }
+	  break;
+
+	case NEON_REINTERP:
+	  {
+	    /* We iterate over NUM_DREG_TYPES doubleword types,
+	       then NUM_QREG_TYPES quadword  types.
+	       V4HF is not a type used in reinterpret, so we translate
+	       d->mode to the correct index in reinterp_ftype_dreg.  */
+	    bool qreg_p
+	      = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
+	    int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
+	              % NUM_QREG_TYPES;
+	    switch (insn_data[d->code].operand[0].mode)
+	      {
+	      case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
+	      case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
+	      case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
+	      case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
+	      case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
+	      case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
+	      case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
+	      case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
+	      case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
+	      case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
+	      case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
+	      default: gcc_unreachable ();
+	      }
+	  }
+	  break;
+	case NEON_FLOAT_WIDEN:
+	  {
+	    tree eltype = NULL_TREE;
+	    tree return_type = NULL_TREE;
+
+	    switch (insn_data[d->code].operand[1].mode)
+	    {
+	      case V4HFmode:
+	        eltype = V4HF_type_node;
+	        return_type = V4SF_type_node;
+	        break;
+	      default: gcc_unreachable ();
+	    }
+	    ftype = build_function_type_list (return_type, eltype, NULL);
+	    break;
+	  }
+	case NEON_FLOAT_NARROW:
+	  {
+	    tree eltype = NULL_TREE;
+	    tree return_type = NULL_TREE;
+
+	    switch (insn_data[d->code].operand[1].mode)
+	    {
+	      case V4SFmode:
+	        eltype = V4SF_type_node;
+	        return_type = V4HF_type_node;
+	        break;
+	      default: gcc_unreachable ();
+	    }
+	    ftype = build_function_type_list (return_type, eltype, NULL);
+	    break;
+	  }
+	default:
+	  gcc_unreachable ();
+	}
+
+      gcc_assert (ftype != NULL);
+
+      sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
+
+      decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
+				   NULL_TREE);
+      arm_builtin_decls[fcode] = decl;
+    }
+}
+
+#undef NUM_DREG_TYPES
+#undef NUM_QREG_TYPES
+
+#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+  do									\
+    {									\
+      if ((MASK) & insn_flags)						\
+	{								\
+	  tree bdecl;							\
+	  bdecl = add_builtin_function ((NAME), (TYPE), (CODE),		\
+					BUILT_IN_MD, NULL, NULL_TREE);	\
+	  arm_builtin_decls[CODE] = bdecl;				\
+	}								\
+    }									\
+  while (0)
+
+struct builtin_description
+{
+  const unsigned int       mask;
+  const enum insn_code     icode;
+  const char * const       name;
+  const enum arm_builtins  code;
+  const enum rtx_code      comparison;
+  const unsigned int       flag;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+#define IWMMXT_BUILTIN(code, string, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
+#define IWMMXT2_BUILTIN(code, string, builtin) \
+  { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
+    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
+  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
+  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
+  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
+  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
+  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
+  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
+  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
+  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
+  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
+  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
+  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
+  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
+  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
+  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
+  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
+  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
+  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
+  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
+  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
+  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
+  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
+  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
+  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
+  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
+  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
+  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
+  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
+  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
+  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
+  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
+  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
+  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
+  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
+  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
+  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
+  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
+  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
+  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
+  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
+  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
+  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
+  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
+  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
+  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
+  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
+  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
+  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
+  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
+  IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
+  IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
+  IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
+  IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
+  IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
+  IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
+  IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
+  IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
+  IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
+  IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
+  IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
+  IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
+  IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
+
+#define IWMMXT_BUILTIN2(code, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
+#define IWMMXT2_BUILTIN2(code, builtin) \
+  { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
+  IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
+  IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
+
+#define CRC32_BUILTIN(L, U) \
+  {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
+   UNKNOWN, 0},
+   CRC32_BUILTIN (crc32b, CRC32B)
+   CRC32_BUILTIN (crc32h, CRC32H)
+   CRC32_BUILTIN (crc32w, CRC32W)
+   CRC32_BUILTIN (crc32cb, CRC32CB)
+   CRC32_BUILTIN (crc32ch, CRC32CH)
+   CRC32_BUILTIN (crc32cw, CRC32CW)
+#undef CRC32_BUILTIN
+
+
+#define CRYPTO_BUILTIN(L, U) \
+  {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
+   UNKNOWN, 0},
+#undef CRYPTO1
+#undef CRYPTO2
+#undef CRYPTO3
+#define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
+#define CRYPTO1(L, U, R, A)
+#define CRYPTO3(L, U, R, A1, A2, A3)
+#include "crypto.def"
+#undef CRYPTO1
+#undef CRYPTO2
+#undef CRYPTO3
+
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
+  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
+  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
+  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
+  IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
+  IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
+  IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
+  IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
+  IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
+  IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
+
+#define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
+#define CRYPTO2(L, U, R, A1, A2)
+#define CRYPTO3(L, U, R, A1, A2, A3)
+#include "crypto.def"
+#undef CRYPTO1
+#undef CRYPTO2
+#undef CRYPTO3
+};
+
+static const struct builtin_description bdesc_3arg[] =
+{
+#define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
+#define CRYPTO1(L, U, R, A)
+#define CRYPTO2(L, U, R, A1, A2)
+#include "crypto.def"
+#undef CRYPTO1
+#undef CRYPTO2
+#undef CRYPTO3
+ };
+#undef CRYPTO_BUILTIN
+
+/* Set up all the iWMMXt builtins.  This is not called if
+   TARGET_IWMMXT is zero.  */
+
+static void
+arm_init_iwmmxt_builtins (void)
+{
+  const struct builtin_description * d;
+  size_t i;
+
+  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+
+  tree v8qi_ftype_v8qi_v8qi_int
+    = build_function_type_list (V8QI_type_node,
+				V8QI_type_node, V8QI_type_node,
+				integer_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_int
+    = build_function_type_list (V4HI_type_node,
+				V4HI_type_node, integer_type_node, NULL_TREE);
+  tree v2si_ftype_v2si_int
+    = build_function_type_list (V2SI_type_node,
+				V2SI_type_node, integer_type_node, NULL_TREE);
+  tree v2si_ftype_di_di
+    = build_function_type_list (V2SI_type_node,
+				long_long_integer_type_node,
+				long_long_integer_type_node,
+				NULL_TREE);
+  tree di_ftype_di_int
+    = build_function_type_list (long_long_integer_type_node,
+				long_long_integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree di_ftype_di_int_int
+    = build_function_type_list (long_long_integer_type_node,
+				long_long_integer_type_node,
+				integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_v8qi
+    = build_function_type_list (integer_type_node,
+				V8QI_type_node, NULL_TREE);
+  tree int_ftype_v4hi
+    = build_function_type_list (integer_type_node,
+				V4HI_type_node, NULL_TREE);
+  tree int_ftype_v2si
+    = build_function_type_list (integer_type_node,
+				V2SI_type_node, NULL_TREE);
+  tree int_ftype_v8qi_int
+    = build_function_type_list (integer_type_node,
+				V8QI_type_node, integer_type_node, NULL_TREE);
+  tree int_ftype_v4hi_int
+    = build_function_type_list (integer_type_node,
+				V4HI_type_node, integer_type_node, NULL_TREE);
+  tree int_ftype_v2si_int
+    = build_function_type_list (integer_type_node,
+				V2SI_type_node, integer_type_node, NULL_TREE);
+  tree v8qi_ftype_v8qi_int_int
+    = build_function_type_list (V8QI_type_node,
+				V8QI_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_int_int
+    = build_function_type_list (V4HI_type_node,
+				V4HI_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree v2si_ftype_v2si_int_int
+    = build_function_type_list (V2SI_type_node,
+				V2SI_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  /* Miscellaneous.  */
+  tree v8qi_ftype_v4hi_v4hi
+    = build_function_type_list (V8QI_type_node,
+				V4HI_type_node, V4HI_type_node, NULL_TREE);
+  tree v4hi_ftype_v2si_v2si
+    = build_function_type_list (V4HI_type_node,
+				V2SI_type_node, V2SI_type_node, NULL_TREE);
+  tree v8qi_ftype_v4hi_v8qi
+    = build_function_type_list (V8QI_type_node,
+	                        V4HI_type_node, V8QI_type_node, NULL_TREE);
+  tree v2si_ftype_v4hi_v4hi
+    = build_function_type_list (V2SI_type_node,
+				V4HI_type_node, V4HI_type_node, NULL_TREE);
+  tree v2si_ftype_v8qi_v8qi
+    = build_function_type_list (V2SI_type_node,
+				V8QI_type_node, V8QI_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_di
+    = build_function_type_list (V4HI_type_node,
+				V4HI_type_node, long_long_integer_type_node,
+				NULL_TREE);
+  tree v2si_ftype_v2si_di
+    = build_function_type_list (V2SI_type_node,
+				V2SI_type_node, long_long_integer_type_node,
+				NULL_TREE);
+  tree di_ftype_void
+    = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
+  tree int_ftype_void
+    = build_function_type_list (integer_type_node, NULL_TREE);
+  tree di_ftype_v8qi
+    = build_function_type_list (long_long_integer_type_node,
+				V8QI_type_node, NULL_TREE);
+  tree di_ftype_v4hi
+    = build_function_type_list (long_long_integer_type_node,
+				V4HI_type_node, NULL_TREE);
+  tree di_ftype_v2si
+    = build_function_type_list (long_long_integer_type_node,
+				V2SI_type_node, NULL_TREE);
+  tree v2si_ftype_v4hi
+    = build_function_type_list (V2SI_type_node,
+				V4HI_type_node, NULL_TREE);
+  tree v4hi_ftype_v8qi
+    = build_function_type_list (V4HI_type_node,
+				V8QI_type_node, NULL_TREE);
+  tree v8qi_ftype_v8qi
+    = build_function_type_list (V8QI_type_node,
+	                        V8QI_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi
+    = build_function_type_list (V4HI_type_node,
+	                        V4HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2si
+    = build_function_type_list (V2SI_type_node,
+	                        V2SI_type_node, NULL_TREE);
+
+  tree di_ftype_di_v4hi_v4hi
+    = build_function_type_list (long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				V4HI_type_node, V4HI_type_node,
+				NULL_TREE);
+
+  tree di_ftype_v4hi_v4hi
+    = build_function_type_list (long_long_unsigned_type_node,
+				V4HI_type_node,V4HI_type_node,
+				NULL_TREE);
+
+  tree v2si_ftype_v2si_v4hi_v4hi
+    = build_function_type_list (V2SI_type_node,
+                                V2SI_type_node, V4HI_type_node,
+                                V4HI_type_node, NULL_TREE);
+
+  tree v2si_ftype_v2si_v8qi_v8qi
+    = build_function_type_list (V2SI_type_node,
+                                V2SI_type_node, V8QI_type_node,
+                                V8QI_type_node, NULL_TREE);
+
+  tree di_ftype_di_v2si_v2si
+     = build_function_type_list (long_long_unsigned_type_node,
+                                 long_long_unsigned_type_node,
+                                 V2SI_type_node, V2SI_type_node,
+                                 NULL_TREE);
+
+   tree di_ftype_di_di_int
+     = build_function_type_list (long_long_unsigned_type_node,
+                                 long_long_unsigned_type_node,
+                                 long_long_unsigned_type_node,
+                                 integer_type_node, NULL_TREE);
+
+   tree void_ftype_int
+     = build_function_type_list (void_type_node,
+                                 integer_type_node, NULL_TREE);
+
+   tree v8qi_ftype_char
+     = build_function_type_list (V8QI_type_node,
+                                 signed_char_type_node, NULL_TREE);
+
+   tree v4hi_ftype_short
+     = build_function_type_list (V4HI_type_node,
+                                 short_integer_type_node, NULL_TREE);
+
+   tree v2si_ftype_int
+     = build_function_type_list (V2SI_type_node,
+                                 integer_type_node, NULL_TREE);
+
+  /* Normal vector binops.  */
+  tree v8qi_ftype_v8qi_v8qi
+    = build_function_type_list (V8QI_type_node,
+				V8QI_type_node, V8QI_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type_list (V4HI_type_node,
+				V4HI_type_node,V4HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2si_v2si
+    = build_function_type_list (V2SI_type_node,
+				V2SI_type_node, V2SI_type_node, NULL_TREE);
+  tree di_ftype_di_di
+    = build_function_type_list (long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				NULL_TREE);
+
+  /* Add all builtins that are more or less simple operations on two
+     operands.  */
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    {
+      /* Use one of the operands; the target can have a different mode for
+	 mask-generating compares.  */
+      enum machine_mode mode;
+      tree type;
+
+      if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
+	continue;
+
+      mode = insn_data[d->icode].operand[1].mode;
+
+      switch (mode)
+	{
+	case V8QImode:
+	  type = v8qi_ftype_v8qi_v8qi;
+	  break;
+	case V4HImode:
+	  type = v4hi_ftype_v4hi_v4hi;
+	  break;
+	case V2SImode:
+	  type = v2si_ftype_v2si_v2si;
+	  break;
+	case DImode:
+	  type = di_ftype_di_di;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_mbuiltin (d->mask, d->name, type, d->code);
+    }
+
+  /* Add the remaining MMX insns with somewhat more complicated types.  */
+#define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),	\
+		ARM_BUILTIN_ ## CODE)
+
+#define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
+  def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
+               ARM_BUILTIN_ ## CODE)
+
+  iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
+  iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
+  iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
+  iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
+  iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
+  iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
+  iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
+  iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
+  iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
+
+  iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
+  iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
+  iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
+  iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
+  iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
+  iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
+
+  iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
+  iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
+  iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
+  iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
+  iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
+  iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
+
+  iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
+  iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
+  iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
+  iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
+  iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
+  iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
+
+  iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
+  iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
+  iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
+  iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
+  iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
+  iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
+
+  iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
+
+  iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
+  iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
+  iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
+  iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
+  iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
+  iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
+  iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
+  iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
+  iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
+  iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
+
+  iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
+  iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
+  iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
+  iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
+  iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
+  iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
+  iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
+  iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
+  iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
+
+  iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
+  iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
+  iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
+
+  iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
+  iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
+  iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
+
+  iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
+  iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
+
+  iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
+  iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
+  iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
+  iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
+  iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
+  iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
+
+  iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
+  iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
+  iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
+  iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
+  iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
+  iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
+  iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
+  iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
+  iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
+  iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
+  iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
+  iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
+
+  iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
+  iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
+  iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
+  iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
+
+  iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
+  iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
+  iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
+  iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
+  iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
+  iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
+  iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
+
+  iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
+  iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
+  iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
+
+  iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
+  iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
+  iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
+  iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
+
+  iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
+  iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
+  iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
+  iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
+
+  iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
+  iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
+  iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
+  iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
+
+  iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
+  iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
+  iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
+  iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
+
+  iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
+  iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
+  iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
+  iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
+
+  iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
+  iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
+  iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
+  iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
+
+  iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
+
+  iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
+  iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
+  iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
+
+#undef iwmmx_mbuiltin
+#undef iwmmx2_mbuiltin
+}
+
+static void
+arm_init_fp16_builtins (void)
+{
+  tree fp16_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (fp16_type) = 16;
+  layout_type (fp16_type);
+  (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+}
+
+static void
+arm_init_crc32_builtins ()
+{
+  tree si_ftype_si_qi
+    = build_function_type_list (unsigned_intSI_type_node,
+                                unsigned_intSI_type_node,
+                                unsigned_intQI_type_node, NULL_TREE);
+  tree si_ftype_si_hi
+    = build_function_type_list (unsigned_intSI_type_node,
+                                unsigned_intSI_type_node,
+                                unsigned_intHI_type_node, NULL_TREE);
+  tree si_ftype_si_si
+    = build_function_type_list (unsigned_intSI_type_node,
+                                unsigned_intSI_type_node,
+                                unsigned_intSI_type_node, NULL_TREE);
+
+  arm_builtin_decls[ARM_BUILTIN_CRC32B]
+    = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
+                            ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
+  arm_builtin_decls[ARM_BUILTIN_CRC32H]
+    = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
+                            ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
+  arm_builtin_decls[ARM_BUILTIN_CRC32W]
+    = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
+                            ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
+  arm_builtin_decls[ARM_BUILTIN_CRC32CB]
+    = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
+                            ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
+  arm_builtin_decls[ARM_BUILTIN_CRC32CH]
+    = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
+                            ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
+  arm_builtin_decls[ARM_BUILTIN_CRC32CW]
+    = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
+                            ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
+}
+
+static void
+arm_init_builtins (void)
+{
+  if (TARGET_REALLY_IWMMXT)
+    arm_init_iwmmxt_builtins ();
+
+  if (TARGET_NEON)
+    arm_init_neon_builtins ();
+
+  if (arm_fp16_format)
+    arm_init_fp16_builtins ();
+
+  if (TARGET_CRC32)
+    arm_init_crc32_builtins ();
+}
+
+/* Return the ARM builtin for CODE.  */
+
+static tree
+arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ARM_BUILTIN_MAX)
+    return error_mark_node;
+
+  return arm_builtin_decls[code];
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+
+static const char *
+arm_invalid_parameter_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return N_("function parameters cannot have __fp16 type");
+  return NULL;
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+
+static const char *
+arm_invalid_return_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return N_("functions cannot return __fp16 type");
+  return NULL;
+}
+
+/* Implement TARGET_PROMOTED_TYPE.  */
+
+static tree
+arm_promoted_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return float_type_node;
+  return NULL_TREE;
+}
+
+/* Implement TARGET_CONVERT_TO_TYPE.
+   Specifically, this hook implements the peculiarity of the ARM
+   half-precision floating-point C semantics that requires conversions between
+   __fp16 to or from double to do an intermediate conversion to float.  */
+
+static tree
+arm_convert_to_type (tree type, tree expr)
+{
+  tree fromtype = TREE_TYPE (expr);
+  if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
+    return NULL_TREE;
+  if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
+      || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
+    return convert (type, convert (float_type_node, expr));
+  return NULL_TREE;
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
+   This simply adds HFmode as a supported mode; even though we don't
+   implement arithmetic on this type directly, it's supported by
+   optabs conversions, much the way the double-word arithmetic is
+   special-cased in the default hook.  */
+
+static bool
+arm_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (mode == HFmode)
+    return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
+  else if (ALL_FIXED_POINT_MODE_P (mode))
+    return true;
+  else
+    return default_scalar_mode_supported_p (mode);
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x != const0_rtx)
+    return x;
+  x = gen_reg_rtx (mode);
+
+  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
+			       : gen_rtx_SUBREG (DImode, x, 0)));
+  return x;
+}
+
+/* Function to expand ternary builtins.  */
+static rtx
+arm_expand_ternop_builtin (enum insn_code icode,
+                           tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx op3 = NULL_RTX;
+
+  /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
+     lane operand depending on endianness.  */
+  bool builtin_sha1cpm_p = false;
+
+  if (insn_data[icode].n_operands == 5)
+    {
+      gcc_assert (icode == CODE_FOR_crypto_sha1c
+                  || icode == CODE_FOR_crypto_sha1p
+                  || icode == CODE_FOR_crypto_sha1m);
+      builtin_sha1cpm_p = true;
+    }
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  enum machine_mode mode2 = insn_data[icode].operand[3].mode;
+
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+  if (VECTOR_MODE_P (mode2))
+    op2 = safe_vector_operand (op2, mode2);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
+	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
+	      && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+  if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+    op2 = copy_to_mode_reg (mode2, op2);
+  if (builtin_sha1cpm_p)
+    op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
+
+  if (builtin_sha1cpm_p)
+    pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
+  else
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of arm_expand_builtin to take care of binop insns.  */
+
+static rtx
+arm_expand_binop_builtin (enum insn_code icode,
+			  tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
+	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of arm_expand_builtin to take care of unop insns.  */
+
+static rtx
+arm_expand_unop_builtin (enum insn_code icode,
+			 tree exp, rtx target, int do_load)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = NULL_RTX;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  bool builtin_sha1h_p = false;
+
+  if (insn_data[icode].n_operands == 3)
+    {
+      gcc_assert (icode == CODE_FOR_crypto_sha1h);
+      builtin_sha1h_p = true;
+    }
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+  if (do_load)
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  else
+    {
+      if (VECTOR_MODE_P (mode0))
+	op0 = safe_vector_operand (op0, mode0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+    }
+  if (builtin_sha1h_p)
+    op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
+
+  if (builtin_sha1h_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+typedef enum {
+  NEON_ARG_COPY_TO_REG,
+  NEON_ARG_CONSTANT,
+  NEON_ARG_MEMORY,
+  NEON_ARG_STOP
+} builtin_arg;
+
+#define NEON_MAX_BUILTIN_ARGS 5
+
+/* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
+   and return an expression for the accessed memory.
+
+   The intrinsic function operates on a block of registers that has
+   mode REG_MODE.  This block contains vectors of type TYPE_MODE.  The
+   function references the memory at EXP of type TYPE and in mode
+   MEM_MODE; this mode may be BLKmode if no more suitable mode is
+   available.  */
+
+static tree
+neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
+			  enum machine_mode reg_mode,
+			  neon_builtin_type_mode type_mode)
+{
+  HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
+  tree elem_type, upper_bound, array_type;
+
+  /* Work out the size of the register block in bytes.  */
+  reg_size = GET_MODE_SIZE (reg_mode);
+
+  /* Work out the size of each vector in bytes.  */
+  gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
+  vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
+
+  /* Work out how many vectors there are.  */
+  gcc_assert (reg_size % vector_size == 0);
+  nvectors = reg_size / vector_size;
+
+  /* Work out the type of each element.  */
+  gcc_assert (POINTER_TYPE_P (type));
+  elem_type = TREE_TYPE (type);
+
+  /* Work out how many elements are being loaded or stored.
+     MEM_MODE == REG_MODE implies a one-to-one mapping between register
+     and memory elements; anything else implies a lane load or store.  */
+  if (mem_mode == reg_mode)
+    nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
+  else
+    nelems = nvectors;
+
+  /* Create a type that describes the full access.  */
+  upper_bound = build_int_cst (size_type_node, nelems - 1);
+  array_type = build_array_type (elem_type, build_index_type (upper_bound));
+
+  /* Dereference EXP using that type.  */
+  return fold_build2 (MEM_REF, array_type, exp,
+		      build_int_cst (build_pointer_type (array_type), 0));
+}
+
+/* Expand a Neon builtin.  */
+static rtx
+arm_expand_neon_args (rtx target, int icode, int have_retval,
+		      neon_builtin_type_mode type_mode,
+		      tree exp, int fcode, ...)
+{
+  va_list ap;
+  rtx pat;
+  tree arg[NEON_MAX_BUILTIN_ARGS];
+  rtx op[NEON_MAX_BUILTIN_ARGS];
+  tree arg_type;
+  tree formals;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
+  enum machine_mode other_mode;
+  int argc = 0;
+  int opno;
+
+  if (have_retval
+      && (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
+    target = gen_reg_rtx (tmode);
+
+  va_start (ap, fcode);
+
+  formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
+
+  for (;;)
+    {
+      builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
+
+      if (thisarg == NEON_ARG_STOP)
+        break;
+      else
+        {
+          opno = argc + have_retval;
+          mode[argc] = insn_data[icode].operand[opno].mode;
+          arg[argc] = CALL_EXPR_ARG (exp, argc);
+	  arg_type = TREE_VALUE (formals);
+          if (thisarg == NEON_ARG_MEMORY)
+            {
+              other_mode = insn_data[icode].operand[1 - opno].mode;
+              arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
+						    mode[argc], other_mode,
+						    type_mode);
+            }
+
+	  /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
+	     be returned.  */
+	  op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
+				  (thisarg == NEON_ARG_MEMORY
+				   ? EXPAND_MEMORY : EXPAND_NORMAL));
+
+          switch (thisarg)
+            {
+            case NEON_ARG_COPY_TO_REG:
+              /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
+              if (!(*insn_data[icode].operand[opno].predicate)
+                     (op[argc], mode[argc]))
+                op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
+              break;
+
+            case NEON_ARG_CONSTANT:
+              /* FIXME: This error message is somewhat unhelpful.  */
+              if (!(*insn_data[icode].operand[opno].predicate)
+                    (op[argc], mode[argc]))
+		error ("argument must be a constant");
+              break;
+
+            case NEON_ARG_MEMORY:
+	      /* Check if expand failed.  */
+	      if (op[argc] == const0_rtx)
+		return 0;
+	      gcc_assert (MEM_P (op[argc]));
+	      PUT_MODE (op[argc], mode[argc]);
+	      /* ??? arm_neon.h uses the same built-in functions for signed
+		 and unsigned accesses, casting where necessary.  This isn't
+		 alias safe.  */
+	      set_mem_alias_set (op[argc], 0);
+	      if (!(*insn_data[icode].operand[opno].predicate)
+                    (op[argc], mode[argc]))
+		op[argc] = (replace_equiv_address
+			    (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
+              break;
+
+            case NEON_ARG_STOP:
+              gcc_unreachable ();
+            }
+
+          argc++;
+	  formals = TREE_CHAIN (formals);
+        }
+    }
+
+  va_end (ap);
+
+  if (have_retval)
+    switch (argc)
+      {
+      case 1:
+	pat = GEN_FCN (icode) (target, op[0]);
+	break;
+
+      case 2:
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+	break;
+
+      case 3:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+	break;
+
+      case 4:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+	break;
+
+      case 5:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+  else
+    switch (argc)
+      {
+      case 1:
+	pat = GEN_FCN (icode) (op[0]);
+	break;
+
+      case 2:
+	pat = GEN_FCN (icode) (op[0], op[1]);
+	break;
+
+      case 3:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+	break;
+
+      case 4:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+	break;
+
+      case 5:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+        break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  if (!pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Expand a Neon builtin. These are "special" because they don't have symbolic
+   constants defined per-instruction or per instruction-variant. Instead, the
+   required info is looked up in the table neon_builtin_data.  */
+static rtx
+arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+{
+  neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
+  neon_itype itype = d->itype;
+  enum insn_code icode = d->code;
+  neon_builtin_type_mode type_mode = d->mode;
+
+  switch (itype)
+    {
+    case NEON_UNOP:
+    case NEON_CONVERT:
+    case NEON_DUPLANE:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_BINOP:
+    case NEON_SETLANE:
+    case NEON_SCALARMUL:
+    case NEON_SCALARMULL:
+    case NEON_SCALARMULH:
+    case NEON_SHIFTINSERT:
+    case NEON_LOGICBINOP:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+        NEON_ARG_STOP);
+
+    case NEON_TERNOP:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_GETLANE:
+    case NEON_FIXCONV:
+    case NEON_SHIFTIMM:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
+        NEON_ARG_STOP);
+
+    case NEON_CREATE:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_DUP:
+    case NEON_RINT:
+    case NEON_SPLIT:
+    case NEON_FLOAT_WIDEN:
+    case NEON_FLOAT_NARROW:
+    case NEON_REINTERP:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_COMBINE:
+    case NEON_VTBL:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_RESULTPAIR:
+      return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_STOP);
+
+    case NEON_LANEMUL:
+    case NEON_LANEMULL:
+    case NEON_LANEMULH:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_LANEMAC:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_SHIFTACC:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_SCALARMAC:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_SELECT:
+    case NEON_VTBX:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_STOP);
+
+    case NEON_LOAD1:
+    case NEON_LOADSTRUCT:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+	NEON_ARG_MEMORY, NEON_ARG_STOP);
+
+    case NEON_LOAD1LANE:
+    case NEON_LOADSTRUCTLANE:
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
+	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+	NEON_ARG_STOP);
+
+    case NEON_STORE1:
+    case NEON_STORESTRUCT:
+      return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
+	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_STORE1LANE:
+    case NEON_STORESTRUCTLANE:
+      return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
+	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+	NEON_ARG_STOP);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Emit code to reinterpret one Neon type as another, without altering bits.  */
+void
+neon_reinterpret (rtx dest, rtx src)
+{
+  emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
+}
+
+/* Emit code to place a Neon pair result in memory locations (with equal
+   registers).  */
+void
+neon_emit_pair_result_insn (enum machine_mode mode,
+			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
+                            rtx op1, rtx op2)
+{
+  rtx mem = gen_rtx_MEM (mode, destaddr);
+  rtx tmp1 = gen_reg_rtx (mode);
+  rtx tmp2 = gen_reg_rtx (mode);
+
+  emit_insn (intfn (tmp1, op1, op2, tmp2));
+
+  emit_move_insn (mem, tmp1);
+  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
+  emit_move_insn (mem, tmp2);
+}
+
+/* Set up OPERANDS for a register copy from SRC to DEST, taking care
+   not to early-clobber SRC registers in the process.
+
+   We assume that the operands described by SRC and DEST represent a
+   decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
+   number of components into which the copy has been decomposed.  */
+void
+neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
+{
+  unsigned int i;
+
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      || REGNO (operands[0]) < REGNO (operands[1]))
+    {
+      for (i = 0; i < count; i++)
+	{
+	  operands[2 * i] = dest[i];
+	  operands[2 * i + 1] = src[i];
+	}
+    }
+  else
+    {
+      for (i = 0; i < count; i++)
+	{
+	  operands[2 * i] = dest[count - i - 1];
+	  operands[2 * i + 1] = src[count - i - 1];
+	}
+    }
+}
+
+/* Split operands into moves from op[1] + op[2] into op[0].  */
+
+void
+neon_split_vcombine (rtx operands[3])
+{
+  unsigned int dest = REGNO (operands[0]);
+  unsigned int src1 = REGNO (operands[1]);
+  unsigned int src2 = REGNO (operands[2]);
+  enum machine_mode halfmode = GET_MODE (operands[1]);
+  unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
+  rtx destlo, desthi;
+
+  if (src1 == dest && src2 == dest + halfregs)
+    {
+      /* No-op move.  Can't split to nothing; emit something.  */
+      emit_note (NOTE_INSN_DELETED);
+      return;
+    }
+
+  /* Preserve register attributes for variable tracking.  */
+  destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
+  desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
+			       GET_MODE_SIZE (halfmode));
+
+  /* Special case of reversed high/low parts.  Use VSWP.  */
+  if (src2 == dest && src1 == dest + halfregs)
+    {
+      rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
+      rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
+      return;
+    }
+
+  if (!reg_overlap_mentioned_p (operands[2], destlo))
+    {
+      /* Try to avoid unnecessary moves if part of the result
+	 is in the right place already.  */
+      if (src1 != dest)
+	emit_move_insn (destlo, operands[1]);
+      if (src2 != dest + halfregs)
+	emit_move_insn (desthi, operands[2]);
+    }
+  else
+    {
+      if (src2 != dest + halfregs)
+	emit_move_insn (desthi, operands[2]);
+      if (src1 != dest)
+	emit_move_insn (destlo, operands[1]);
+    }
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+arm_expand_builtin (tree exp,
+		    rtx target,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  const struct builtin_description * d;
+  enum insn_code    icode;
+  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree              arg0;
+  tree              arg1;
+  tree              arg2;
+  rtx               op0;
+  rtx               op1;
+  rtx               op2;
+  rtx               pat;
+  unsigned int      fcode = DECL_FUNCTION_CODE (fndecl);
+  size_t            i;
+  enum machine_mode tmode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+  enum machine_mode mode2;
+  int opint;
+  int selector;
+  int mask;
+  int imm;
+
+  if (fcode >= ARM_BUILTIN_NEON_BASE)
+    return arm_expand_neon_builtin (fcode, exp, target);
+
+  switch (fcode)
+    {
+    case ARM_BUILTIN_TEXTRMSB:
+    case ARM_BUILTIN_TEXTRMUB:
+    case ARM_BUILTIN_TEXTRMSH:
+    case ARM_BUILTIN_TEXTRMUH:
+    case ARM_BUILTIN_TEXTRMSW:
+    case ARM_BUILTIN_TEXTRMUW:
+      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
+	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
+	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
+	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
+	       : CODE_FOR_iwmmxt_textrmw);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	{
+	  /* @@@ better error message */
+	  error ("selector must be an immediate");
+	  return gen_reg_rtx (tmode);
+	}
+
+      opint = INTVAL (op1);
+      if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
+	{
+	  if (opint > 7 || opint < 0)
+	    error ("the range of selector should be in 0 to 7");
+	}
+      else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
+	{
+	  if (opint > 3 || opint < 0)
+	    error ("the range of selector should be in 0 to 3");
+	}
+      else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
+	{
+	  if (opint > 1 || opint < 0)
+	    error ("the range of selector should be in 0 to 1");
+	}
+
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WALIGNI:
+      /* If op2 is immediate, call walighi, else call walighr.  */
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      if (CONST_INT_P (op2))
+        {
+	  icode = CODE_FOR_iwmmxt_waligni;
+          tmode = insn_data[icode].operand[0].mode;
+	  mode0 = insn_data[icode].operand[1].mode;
+	  mode1 = insn_data[icode].operand[2].mode;
+	  mode2 = insn_data[icode].operand[3].mode;
+          if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+	    op0 = copy_to_mode_reg (mode0, op0);
+          if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	    op1 = copy_to_mode_reg (mode1, op1);
+          gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
+	  selector = INTVAL (op2);
+	  if (selector > 7 || selector < 0)
+	    error ("the range of selector should be in 0 to 7");
+	}
+      else
+        {
+	  icode = CODE_FOR_iwmmxt_walignr;
+          tmode = insn_data[icode].operand[0].mode;
+	  mode0 = insn_data[icode].operand[1].mode;
+	  mode1 = insn_data[icode].operand[2].mode;
+	  mode2 = insn_data[icode].operand[3].mode;
+          if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+	    op0 = copy_to_mode_reg (mode0, op0);
+          if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	    op1 = copy_to_mode_reg (mode1, op1);
+          if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
+	    op2 = copy_to_mode_reg (mode2, op2);
+	}
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (!pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_TINSRB:
+    case ARM_BUILTIN_TINSRH:
+    case ARM_BUILTIN_TINSRW:
+    case ARM_BUILTIN_WMERGE:
+      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
+	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
+	       : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
+	       : CODE_FOR_iwmmxt_tinsrw);
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+	{
+	  error ("selector must be an immediate");
+	  return const0_rtx;
+	}
+      if (icode == CODE_FOR_iwmmxt_wmerge)
+	{
+	  selector = INTVAL (op2);
+	  if (selector > 7 || selector < 0)
+	    error ("the range of selector should be in 0 to 7");
+	}
+      if ((icode == CODE_FOR_iwmmxt_tinsrb)
+	  || (icode == CODE_FOR_iwmmxt_tinsrh)
+	  || (icode == CODE_FOR_iwmmxt_tinsrw))
+        {
+	  mask = 0x01;
+	  selector= INTVAL (op2);
+	  if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
+	    error ("the range of selector should be in 0 to 7");
+	  else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
+	    error ("the range of selector should be in 0 to 3");
+	  else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
+	    error ("the range of selector should be in 0 to 1");
+	  mask <<= selector;
+	  op2 = GEN_INT (mask);
+	}
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_SETWCGR0:
+    case ARM_BUILTIN_SETWCGR1:
+    case ARM_BUILTIN_SETWCGR2:
+    case ARM_BUILTIN_SETWCGR3:
+      icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
+	       : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
+	       : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
+	       : CODE_FOR_iwmmxt_setwcgr3);
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+      if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
+        op0 = copy_to_mode_reg (mode0, op0);
+      pat = GEN_FCN (icode) (op0);
+      if (!pat)
+	return 0;
+      emit_insn (pat);
+      return 0;
+
+    case ARM_BUILTIN_GETWCGR0:
+    case ARM_BUILTIN_GETWCGR1:
+    case ARM_BUILTIN_GETWCGR2:
+    case ARM_BUILTIN_GETWCGR3:
+      icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
+	       : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
+	       : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
+	       : CODE_FOR_iwmmxt_getwcgr3);
+      tmode = insn_data[icode].operand[0].mode;
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target);
+      if (!pat)
+        return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WSHUFH:
+      icode = CODE_FOR_iwmmxt_wshufh;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+	op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+	{
+	  error ("mask must be an immediate");
+	  return const0_rtx;
+	}
+      selector = INTVAL (op1);
+      if (selector < 0 || selector > 255)
+	error ("the range of mask should be in 0 to 255");
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WMADDS:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
+    case ARM_BUILTIN_WMADDSX:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
+    case ARM_BUILTIN_WMADDSN:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
+    case ARM_BUILTIN_WMADDU:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
+    case ARM_BUILTIN_WMADDUX:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
+    case ARM_BUILTIN_WMADDUN:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
+    case ARM_BUILTIN_WSADBZ:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
+    case ARM_BUILTIN_WSADHZ:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
+
+      /* Several three-argument builtins.  */
+    case ARM_BUILTIN_WMACS:
+    case ARM_BUILTIN_WMACU:
+    case ARM_BUILTIN_TMIA:
+    case ARM_BUILTIN_TMIAPH:
+    case ARM_BUILTIN_TMIATT:
+    case ARM_BUILTIN_TMIATB:
+    case ARM_BUILTIN_TMIABT:
+    case ARM_BUILTIN_TMIABB:
+    case ARM_BUILTIN_WQMIABB:
+    case ARM_BUILTIN_WQMIABT:
+    case ARM_BUILTIN_WQMIATB:
+    case ARM_BUILTIN_WQMIATT:
+    case ARM_BUILTIN_WQMIABBN:
+    case ARM_BUILTIN_WQMIABTN:
+    case ARM_BUILTIN_WQMIATBN:
+    case ARM_BUILTIN_WQMIATTN:
+    case ARM_BUILTIN_WMIABB:
+    case ARM_BUILTIN_WMIABT:
+    case ARM_BUILTIN_WMIATB:
+    case ARM_BUILTIN_WMIATT:
+    case ARM_BUILTIN_WMIABBN:
+    case ARM_BUILTIN_WMIABTN:
+    case ARM_BUILTIN_WMIATBN:
+    case ARM_BUILTIN_WMIATTN:
+    case ARM_BUILTIN_WMIAWBB:
+    case ARM_BUILTIN_WMIAWBT:
+    case ARM_BUILTIN_WMIAWTB:
+    case ARM_BUILTIN_WMIAWTT:
+    case ARM_BUILTIN_WMIAWBBN:
+    case ARM_BUILTIN_WMIAWBTN:
+    case ARM_BUILTIN_WMIAWTBN:
+    case ARM_BUILTIN_WMIAWTTN:
+    case ARM_BUILTIN_WSADB:
+    case ARM_BUILTIN_WSADH:
+      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
+	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
+	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
+	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
+	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
+	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
+	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
+	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
+	       : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
+	       : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
+	       : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
+	       : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
+	       : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
+	       : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
+	       : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
+	       : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
+	       : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
+	       : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
+	       : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
+	       : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
+	       : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
+	       : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
+	       : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
+	       : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
+	       : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
+	       : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
+	       : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
+	       : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
+	       : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
+	       : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
+	       : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
+	       : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
+	       : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
+	       : CODE_FOR_iwmmxt_wsadh);
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+	op2 = copy_to_mode_reg (mode2, op2);
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WZERO:
+      target = gen_reg_rtx (DImode);
+      emit_insn (gen_iwmmxt_clrdi (target));
+      return target;
+
+    case ARM_BUILTIN_WSRLHI:
+    case ARM_BUILTIN_WSRLWI:
+    case ARM_BUILTIN_WSRLDI:
+    case ARM_BUILTIN_WSLLHI:
+    case ARM_BUILTIN_WSLLWI:
+    case ARM_BUILTIN_WSLLDI:
+    case ARM_BUILTIN_WSRAHI:
+    case ARM_BUILTIN_WSRAWI:
+    case ARM_BUILTIN_WSRADI:
+    case ARM_BUILTIN_WRORHI:
+    case ARM_BUILTIN_WRORWI:
+    case ARM_BUILTIN_WRORDI:
+    case ARM_BUILTIN_WSRLH:
+    case ARM_BUILTIN_WSRLW:
+    case ARM_BUILTIN_WSRLD:
+    case ARM_BUILTIN_WSLLH:
+    case ARM_BUILTIN_WSLLW:
+    case ARM_BUILTIN_WSLLD:
+    case ARM_BUILTIN_WSRAH:
+    case ARM_BUILTIN_WSRAW:
+    case ARM_BUILTIN_WSRAD:
+    case ARM_BUILTIN_WRORH:
+    case ARM_BUILTIN_WRORW:
+    case ARM_BUILTIN_WRORD:
+      icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
+	       : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
+	       : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
+	       : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
+	       : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
+	       : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
+	       : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
+	       : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
+	       : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
+	       : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
+	       : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
+	       : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
+	       : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
+	       : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
+	       : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
+	       : CODE_FOR_nothing);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op1 = expand_normal (arg1);
+      if (GET_MODE (op1) == VOIDmode)
+	{
+	  imm = INTVAL (op1);
+	  if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
+	       || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
+	      && (imm < 0 || imm > 32))
+	    {
+	      if (fcode == ARM_BUILTIN_WRORHI)
+		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WRORWI)
+		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi32 in code.");
+	      else if (fcode == ARM_BUILTIN_WRORH)
+		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi16 in code.");
+	      else
+		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi32 in code.");
+	    }
+	  else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
+		   && (imm < 0 || imm > 64))
+	    {
+	      if (fcode == ARM_BUILTIN_WRORDI)
+		error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_rori_si64 in code.");
+	      else
+		error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_ror_si64 in code.");
+	    }
+	  else if (imm < 0)
+	    {
+	      if (fcode == ARM_BUILTIN_WSRLHI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRLWI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi32 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRLDI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srli_si64 in code.");
+	      else if (fcode == ARM_BUILTIN_WSLLHI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WSLLWI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi32 in code.");
+	      else if (fcode == ARM_BUILTIN_WSLLDI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_slli_si64 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRAHI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRAWI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi32 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRADI)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srai_si64 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRLH)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRLW)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi32 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRLD)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_srl_si64 in code.");
+	      else if (fcode == ARM_BUILTIN_WSLLH)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WSLLW)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi32 in code.");
+	      else if (fcode == ARM_BUILTIN_WSLLD)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_sll_si64 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRAH)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi16 in code.");
+	      else if (fcode == ARM_BUILTIN_WSRAW)
+		error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi32 in code.");
+	      else
+		error ("the count should be no less than 0.  please check the intrinsic _mm_sra_si64 in code.");
+	    }
+	}
+      return arm_expand_binop_builtin (icode, exp, target);
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_binop_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_unop_builtin (d->icode, exp, target, 0);
+
+  for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_ternop_builtin (d->icode, exp, target);
+
+  /* @@@ Should really do something sensible here.  */
+  return NULL_RTX;
+}
+
+/* Return the number (counting from 0) of
+   the least significant set bit in MASK.  */
+
+inline static int
+number_of_first_bit_set (unsigned mask)
+{
+  return ctz_hwi (mask);
+}
+
+/* Like emit_multi_reg_push, but allowing for a different set of
+   registers to be described as saved.  MASK is the set of registers
+   to be saved; REAL_REGS is the set of registers to be described as
+   saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
+
+static rtx
+thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
+{
+  unsigned long regno;
+  rtx par[10], tmp, reg, insn;
+  int i, j;
+
+  /* Build the parallel of the registers actually being stored.  */
+  for (i = 0; mask; ++i, mask &= mask - 1)
+    {
+      regno = ctz_hwi (mask);
+      reg = gen_rtx_REG (SImode, regno);
+
+      if (i == 0)
+	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
+      else
+	tmp = gen_rtx_USE (VOIDmode, reg);
+
+      par[i] = tmp;
+    }
+
+  tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
+  par[0] = tmp;
+
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
+  insn = emit_insn (tmp);
+
+  /* Always build the stack adjustment note for unwind info.  */
+  tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
+  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  par[0] = tmp;
+
+  /* Build the parallel of the registers recorded as saved for unwind.  */
+  for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
+    {
+      regno = ctz_hwi (real_regs);
+      reg = gen_rtx_REG (SImode, regno);
+
+      tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
+      tmp = gen_frame_mem (SImode, tmp);
+      tmp = gen_rtx_SET (VOIDmode, tmp, reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      par[j + 1] = tmp;
+    }
+
+  if (j == 0)
+    tmp = par[0];
+  else
+    {
+      RTX_FRAME_RELATED_P (par[0]) = 1;
+      tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
+    }
+
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
+
+  return insn;
+}
+
+/* Emit code to push or pop registers to or from the stack.  F is the
+   assembly file.  MASK is the registers to pop.  */
+static void
+thumb_pop (FILE *f, unsigned long mask)
+{
+  int regno;
+  int lo_mask = mask & 0xFF;
+  int pushed_words = 0;
+
+  gcc_assert (mask);
+
+  if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
+    {
+      /* Special case.  Do not generate a POP PC statement here, do it in
+	 thumb_exit() */
+      thumb_exit (f, -1);
+      return;
+    }
+
+  fprintf (f, "\tpop\t{");
+
+  /* Look at the low registers first.  */
+  for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
+    {
+      if (lo_mask & 1)
+	{
+	  asm_fprintf (f, "%r", regno);
+
+	  if ((lo_mask & ~1) != 0)
+	    fprintf (f, ", ");
+
+	  pushed_words++;
+	}
+    }
+
+  if (mask & (1 << PC_REGNUM))
+    {
+      /* Catch popping the PC.  */
+      if (TARGET_INTERWORK || TARGET_BACKTRACE
+	  || crtl->calls_eh_return)
+	{
+	  /* The PC is never poped directly, instead
+	     it is popped into r3 and then BX is used.  */
+	  fprintf (f, "}\n");
+
+	  thumb_exit (f, -1);
+
+	  return;
+	}
+      else
+	{
+	  if (mask & 0xFF)
+	    fprintf (f, ", ");
+
+	  asm_fprintf (f, "%r", PC_REGNUM);
+	}
+    }
+
+  fprintf (f, "}\n");
+}
+
+/* Generate code to return from a thumb function.
+   If 'reg_containing_return_addr' is -1, then the return address is
+   actually on the stack, at the stack pointer.  */
+static void
+thumb_exit (FILE *f, int reg_containing_return_addr)
+{
+  unsigned regs_available_for_popping;
+  unsigned regs_to_pop;
+  int pops_needed;
+  unsigned available;
+  unsigned required;
+  int mode;
+  int size;
+  int restore_a4 = FALSE;
+
+  /* Compute the registers we need to pop.  */
+  regs_to_pop = 0;
+  pops_needed = 0;
+
+  if (reg_containing_return_addr == -1)
+    {
+      regs_to_pop |= 1 << LR_REGNUM;
+      ++pops_needed;
+    }
+
+  if (TARGET_BACKTRACE)
+    {
+      /* Restore the (ARM) frame pointer and stack pointer.  */
+      regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
+      pops_needed += 2;
+    }
+
+  /* If there is nothing to pop then just emit the BX instruction and
+     return.  */
+  if (pops_needed == 0)
+    {
+      if (crtl->calls_eh_return)
+	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
+
+      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
+      return;
+    }
+  /* Otherwise if we are not supporting interworking and we have not created
+     a backtrace structure and the function was not entered in ARM mode then
+     just pop the return address straight into the PC.  */
+  else if (!TARGET_INTERWORK
+	   && !TARGET_BACKTRACE
+	   && !is_called_in_ARM_mode (current_function_decl)
+	   && !crtl->calls_eh_return)
+    {
+      asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
+      return;
+    }
+
+  /* Find out how many of the (return) argument registers we can corrupt.  */
+  regs_available_for_popping = 0;
+
+  /* If returning via __builtin_eh_return, the bottom three registers
+     all contain information needed for the return.  */
+  if (crtl->calls_eh_return)
+    size = 12;
+  else
+    {
+      /* If we can deduce the registers used from the function's
+	 return value.  This is more reliable that examining
+	 df_regs_ever_live_p () because that will be set if the register is
+	 ever used in the function, not just if the register is used
+	 to hold a return value.  */
+
+      if (crtl->return_rtx != 0)
+	mode = GET_MODE (crtl->return_rtx);
+      else
+	mode = DECL_MODE (DECL_RESULT (current_function_decl));
+
+      size = GET_MODE_SIZE (mode);
+
+      if (size == 0)
+	{
+	  /* In a void function we can use any argument register.
+	     In a function that returns a structure on the stack
+	     we can use the second and third argument registers.  */
+	  if (mode == VOIDmode)
+	    regs_available_for_popping =
+	      (1 << ARG_REGISTER (1))
+	      | (1 << ARG_REGISTER (2))
+	      | (1 << ARG_REGISTER (3));
+	  else
+	    regs_available_for_popping =
+	      (1 << ARG_REGISTER (2))
+	      | (1 << ARG_REGISTER (3));
+	}
+      else if (size <= 4)
+	regs_available_for_popping =
+	  (1 << ARG_REGISTER (2))
+	  | (1 << ARG_REGISTER (3));
+      else if (size <= 8)
+	regs_available_for_popping =
+	  (1 << ARG_REGISTER (3));
+    }
+
+  /* Match registers to be popped with registers into which we pop them.  */
+  for (available = regs_available_for_popping,
+       required  = regs_to_pop;
+       required != 0 && available != 0;
+       available &= ~(available & - available),
+       required  &= ~(required  & - required))
+    -- pops_needed;
+
+  /* If we have any popping registers left over, remove them.  */
+  if (available > 0)
+    regs_available_for_popping &= ~available;
+
+  /* Otherwise if we need another popping register we can use
+     the fourth argument register.  */
+  else if (pops_needed)
+    {
+      /* If we have not found any free argument registers and
+	 reg a4 contains the return address, we must move it.  */
+      if (regs_available_for_popping == 0
+	  && reg_containing_return_addr == LAST_ARG_REGNUM)
+	{
+	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
+	  reg_containing_return_addr = LR_REGNUM;
+	}
+      else if (size > 12)
+	{
+	  /* Register a4 is being used to hold part of the return value,
+	     but we have dire need of a free, low register.  */
+	  restore_a4 = TRUE;
+
+	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
+	}
+
+      if (reg_containing_return_addr != LAST_ARG_REGNUM)
+	{
+	  /* The fourth argument register is available.  */
+	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
+
+	  --pops_needed;
+	}
+    }
+
+  /* Pop as many registers as we can.  */
+  thumb_pop (f, regs_available_for_popping);
+
+  /* Process the registers we popped.  */
+  if (reg_containing_return_addr == -1)
+    {
+      /* The return address was popped into the lowest numbered register.  */
+      regs_to_pop &= ~(1 << LR_REGNUM);
+
+      reg_containing_return_addr =
+	number_of_first_bit_set (regs_available_for_popping);
+
+      /* Remove this register for the mask of available registers, so that
+         the return address will not be corrupted by further pops.  */
+      regs_available_for_popping &= ~(1 << reg_containing_return_addr);
+    }
+
+  /* If we popped other registers then handle them here.  */
+  if (regs_available_for_popping)
+    {
+      int frame_pointer;
+
+      /* Work out which register currently contains the frame pointer.  */
+      frame_pointer = number_of_first_bit_set (regs_available_for_popping);
+
+      /* Move it into the correct place.  */
+      asm_fprintf (f, "\tmov\t%r, %r\n",
+		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
+
+      /* (Temporarily) remove it from the mask of popped registers.  */
+      regs_available_for_popping &= ~(1 << frame_pointer);
+      regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
+
+      if (regs_available_for_popping)
+	{
+	  int stack_pointer;
+
+	  /* We popped the stack pointer as well,
+	     find the register that contains it.  */
+	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
+
+	  /* Move it into the stack register.  */
+	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
+
+	  /* At this point we have popped all necessary registers, so
+	     do not worry about restoring regs_available_for_popping
+	     to its correct value:
+
+	     assert (pops_needed == 0)
+	     assert (regs_available_for_popping == (1 << frame_pointer))
+	     assert (regs_to_pop == (1 << STACK_POINTER))  */
+	}
+      else
+	{
+	  /* Since we have just move the popped value into the frame
+	     pointer, the popping register is available for reuse, and
+	     we know that we still have the stack pointer left to pop.  */
+	  regs_available_for_popping |= (1 << frame_pointer);
+	}
+    }
+
+  /* If we still have registers left on the stack, but we no longer have
+     any registers into which we can pop them, then we must move the return
+     address into the link register and make available the register that
+     contained it.  */
+  if (regs_available_for_popping == 0 && pops_needed > 0)
+    {
+      regs_available_for_popping |= 1 << reg_containing_return_addr;
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
+		   reg_containing_return_addr);
+
+      reg_containing_return_addr = LR_REGNUM;
+    }
+
+  /* If we have registers left on the stack then pop some more.
+     We know that at most we will want to pop FP and SP.  */
+  if (pops_needed > 0)
+    {
+      int  popped_into;
+      int  move_to;
+
+      thumb_pop (f, regs_available_for_popping);
+
+      /* We have popped either FP or SP.
+	 Move whichever one it is into the correct register.  */
+      popped_into = number_of_first_bit_set (regs_available_for_popping);
+      move_to     = number_of_first_bit_set (regs_to_pop);
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
+
+      regs_to_pop &= ~(1 << move_to);
+
+      --pops_needed;
+    }
+
+  /* If we still have not popped everything then we must have only
+     had one register available to us and we are now popping the SP.  */
+  if (pops_needed > 0)
+    {
+      int  popped_into;
+
+      thumb_pop (f, regs_available_for_popping);
+
+      popped_into = number_of_first_bit_set (regs_available_for_popping);
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
+      /*
+	assert (regs_to_pop == (1 << STACK_POINTER))
+	assert (pops_needed == 1)
+      */
+    }
+
+  /* If necessary restore the a4 register.  */
+  if (restore_a4)
+    {
+      if (reg_containing_return_addr != LR_REGNUM)
+	{
+	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
+	  reg_containing_return_addr = LR_REGNUM;
+	}
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
+    }
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
+
+  /* Return to caller.  */
+  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
+}
+
+/* Scan INSN just before assembler is output for it.
+   For Thumb-1, we track the status of the condition codes; this
+   information is used in the cbranchsi4_insn pattern.  */
+void
+thumb1_final_prescan_insn (rtx insn)
+{
+  if (flag_print_asm_name)
+    asm_fprintf (asm_out_file, "%@ 0x%04x\n",
+		 INSN_ADDRESSES (INSN_UID (insn)));
+  /* Don't overwrite the previous setter when we get to a cbranch.  */
+  if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+    {
+      enum attr_conds conds;
+
+      if (cfun->machine->thumb1_cc_insn)
+	{
+	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
+	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
+	    CC_STATUS_INIT;
+	}
+      conds = get_attr_conds (insn);
+      if (conds == CONDS_SET)
+	{
+	  rtx set = single_set (insn);
+	  cfun->machine->thumb1_cc_insn = insn;
+	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
+	  cfun->machine->thumb1_cc_op1 = const0_rtx;
+	  cfun->machine->thumb1_cc_mode = CC_NOOVmode;
+	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
+	    {
+	      rtx src1 = XEXP (SET_SRC (set), 1);
+	      if (src1 == const0_rtx)
+		cfun->machine->thumb1_cc_mode = CCmode;
+	    }
+	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
+	    {
+	      /* Record the src register operand instead of dest because
+		 cprop_hardreg pass propagates src.  */
+	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
+	    }
+	}
+      else if (conds != CONDS_NOCOND)
+	cfun->machine->thumb1_cc_insn = NULL_RTX;
+    }
+
+    /* Check if unexpected far jump is used.  */
+    if (cfun->machine->lr_save_eliminated
+        && get_attr_far_jump (insn) == FAR_JUMP_YES)
+      internal_error("Unexpected thumb1 far jump");
+}
+
+int
+thumb_shiftable_const (unsigned HOST_WIDE_INT val)
+{
+  unsigned HOST_WIDE_INT mask = 0xff;
+  int i;
+
+  val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
+  if (val == 0) /* XXX */
+    return 0;
+
+  for (i = 0; i < 25; i++)
+    if ((val & (mask << i)) == val)
+      return 1;
+
+  return 0;
+}
+
+/* Returns nonzero if the current function contains,
+   or might contain a far jump.  */
+static int
+thumb_far_jump_used_p (void)
+{
+  rtx insn;
+  bool far_jump = false;
+  unsigned int func_size = 0;
+
+  /* This test is only important for leaf functions.  */
+  /* assert (!leaf_function_p ()); */
+
+  /* If we have already decided that far jumps may be used,
+     do not bother checking again, and always return true even if
+     it turns out that they are not being used.  Once we have made
+     the decision that far jumps are present (and that hence the link
+     register will be pushed onto the stack) we cannot go back on it.  */
+  if (cfun->machine->far_jump_used)
+    return 1;
+
+  /* If this function is not being called from the prologue/epilogue
+     generation code then it must be being called from the
+     INITIAL_ELIMINATION_OFFSET macro.  */
+  if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
+    {
+      /* In this case we know that we are being asked about the elimination
+	 of the arg pointer register.  If that register is not being used,
+	 then there are no arguments on the stack, and we do not have to
+	 worry that a far jump might force the prologue to push the link
+	 register, changing the stack offsets.  In this case we can just
+	 return false, since the presence of far jumps in the function will
+	 not affect stack offsets.
+
+	 If the arg pointer is live (or if it was live, but has now been
+	 eliminated and so set to dead) then we do have to test to see if
+	 the function might contain a far jump.  This test can lead to some
+	 false negatives, since before reload is completed, then length of
+	 branch instructions is not known, so gcc defaults to returning their
+	 longest length, which in turn sets the far jump attribute to true.
+
+	 A false negative will not result in bad code being generated, but it
+	 will result in a needless push and pop of the link register.  We
+	 hope that this does not occur too often.
+
+	 If we need doubleword stack alignment this could affect the other
+	 elimination offsets so we can't risk getting it wrong.  */
+      if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
+	cfun->machine->arg_pointer_live = 1;
+      else if (!cfun->machine->arg_pointer_live)
+	return 0;
+    }
+
+  /* We should not change far_jump_used during or after reload, as there is
+     no chance to change stack frame layout.  */
+  if (reload_in_progress || reload_completed)
+    return 0;
+
+  /* Check to see if the function contains a branch
+     insn with the far jump attribute set.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
+	{
+	  far_jump = true;
+	}
+      func_size += get_attr_length (insn);
+    }
+
+  /* Attribute far_jump will always be true for thumb1 before
+     shorten_branch pass.  So checking far_jump attribute before
+     shorten_branch isn't much useful.
+
+     Following heuristic tries to estimate more accurately if a far jump
+     may finally be used.  The heuristic is very conservative as there is
+     no chance to roll-back the decision of not to use far jump.
+
+     Thumb1 long branch offset is -2048 to 2046.  The worst case is each
+     2-byte insn is associated with a 4 byte constant pool.  Using
+     function size 2048/3 as the threshold is conservative enough.  */
+  if (far_jump)
+    {
+      if ((func_size * 3) >= 2048)
+        {
+	  /* Record the fact that we have decided that
+	     the function does use far jumps.  */
+	  cfun->machine->far_jump_used = 1;
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
+/* Return nonzero if FUNC must be entered in ARM mode.  */
+int
+is_called_in_ARM_mode (tree func)
+{
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+
+  /* Ignore the problem about functions whose address is taken.  */
+  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
+    return TRUE;
+
+#ifdef ARM_PE
+  return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
+#else
+  return FALSE;
+#endif
+}
+
+/* Given the stack offsets and register mask in OFFSETS, decide how
+   many additional registers to push instead of subtracting a constant
+   from SP.  For epilogues the principle is the same except we use pop.
+   FOR_PROLOGUE indicates which we're generating.  */
+static int
+thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
+{
+  HOST_WIDE_INT amount;
+  unsigned long live_regs_mask = offsets->saved_regs_mask;
+  /* Extract a mask of the ones we can give to the Thumb's push/pop
+     instruction.  */
+  unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
+  /* Then count how many other high registers will need to be pushed.  */
+  unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+  int n_free, reg_base, size;
+
+  if (!for_prologue && frame_pointer_needed)
+    amount = offsets->locals_base - offsets->saved_regs;
+  else
+    amount = offsets->outgoing_args - offsets->saved_regs;
+
+  /* If the stack frame size is 512 exactly, we can save one load
+     instruction, which should make this a win even when optimizing
+     for speed.  */
+  if (!optimize_size && amount != 512)
+    return 0;
+
+  /* Can't do this if there are high registers to push.  */
+  if (high_regs_pushed != 0)
+    return 0;
+
+  /* Shouldn't do it in the prologue if no registers would normally
+     be pushed at all.  In the epilogue, also allow it if we'll have
+     a pop insn for the PC.  */
+  if  (l_mask == 0
+       && (for_prologue
+	   || TARGET_BACKTRACE
+	   || (live_regs_mask & 1 << LR_REGNUM) == 0
+	   || TARGET_INTERWORK
+	   || crtl->args.pretend_args_size != 0))
+    return 0;
+
+  /* Don't do this if thumb_expand_prologue wants to emit instructions
+     between the push and the stack frame allocation.  */
+  if (for_prologue
+      && ((flag_pic && arm_pic_register != INVALID_REGNUM)
+	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
+    return 0;
+
+  reg_base = 0;
+  n_free = 0;
+  if (!for_prologue)
+    {
+      size = arm_size_return_regs ();
+      reg_base = ARM_NUM_INTS (size);
+      live_regs_mask >>= reg_base;
+    }
+
+  while (reg_base + n_free < 8 && !(live_regs_mask & 1)
+	 && (for_prologue || call_used_regs[reg_base + n_free]))
+    {
+      live_regs_mask >>= 1;
+      n_free++;
+    }
+
+  if (n_free == 0)
+    return 0;
+  gcc_assert (amount / 4 * 4 == amount);
+
+  if (amount >= 512 && (amount - n_free * 4) < 512)
+    return (amount - 508) / 4;
+  if (amount <= n_free * 4)
+    return amount / 4;
+  return 0;
+}
+
+/* The bits which aren't usefully expanded as rtl.  */
+const char *
+thumb1_unexpanded_epilogue (void)
+{
+  arm_stack_offsets *offsets;
+  int regno;
+  unsigned long live_regs_mask = 0;
+  int high_regs_pushed = 0;
+  int extra_pop;
+  int had_to_push_lr;
+  int size;
+
+  if (cfun->machine->return_used_this_function != 0)
+    return "";
+
+  if (IS_NAKED (arm_current_func_type ()))
+    return "";
+
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+
+  /* If we can deduce the registers used from the function's return value.
+     This is more reliable that examining df_regs_ever_live_p () because that
+     will be set if the register is ever used in the function, not just if
+     the register is used to hold a return value.  */
+  size = arm_size_return_regs ();
+
+  extra_pop = thumb1_extra_regs_pushed (offsets, false);
+  if (extra_pop > 0)
+    {
+      unsigned long extra_mask = (1 << extra_pop) - 1;
+      live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
+    }
+
+  /* The prolog may have pushed some high registers to use as
+     work registers.  e.g. the testsuite file:
+     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
+     compiles to produce:
+	push	{r4, r5, r6, r7, lr}
+	mov	r7, r9
+	mov	r6, r8
+	push	{r6, r7}
+     as part of the prolog.  We have to undo that pushing here.  */
+
+  if (high_regs_pushed)
+    {
+      unsigned long mask = live_regs_mask & 0xff;
+      int next_hi_reg;
+
+      /* The available low registers depend on the size of the value we are
+         returning.  */
+      if (size <= 12)
+	mask |=  1 << 3;
+      if (size <= 8)
+	mask |= 1 << 2;
+
+      if (mask == 0)
+	/* Oh dear!  We have no low registers into which we can pop
+           high registers!  */
+	internal_error
+	  ("no low registers available for popping high registers");
+
+      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
+	if (live_regs_mask & (1 << next_hi_reg))
+	  break;
+
+      while (high_regs_pushed)
+	{
+	  /* Find lo register(s) into which the high register(s) can
+             be popped.  */
+	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
+	    {
+	      if (mask & (1 << regno))
+		high_regs_pushed--;
+	      if (high_regs_pushed == 0)
+		break;
+	    }
+
+	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
+
+	  /* Pop the values into the low register(s).  */
+	  thumb_pop (asm_out_file, mask);
+
+	  /* Move the value(s) into the high registers.  */
+	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
+	    {
+	      if (mask & (1 << regno))
+		{
+		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
+			       regno);
+
+		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
+		    if (live_regs_mask & (1 << next_hi_reg))
+		      break;
+		}
+	    }
+	}
+      live_regs_mask &= ~0x0f00;
+    }
+
+  had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
+  live_regs_mask &= 0xff;
+
+  if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
+    {
+      /* Pop the return address into the PC.  */
+      if (had_to_push_lr)
+	live_regs_mask |= 1 << PC_REGNUM;
+
+      /* Either no argument registers were pushed or a backtrace
+	 structure was created which includes an adjusted stack
+	 pointer, so just pop everything.  */
+      if (live_regs_mask)
+	thumb_pop (asm_out_file, live_regs_mask);
+
+      /* We have either just popped the return address into the
+	 PC or it is was kept in LR for the entire function.
+	 Note that thumb_pop has already called thumb_exit if the
+	 PC was in the list.  */
+      if (!had_to_push_lr)
+	thumb_exit (asm_out_file, LR_REGNUM);
+    }
+  else
+    {
+      /* Pop everything but the return address.  */
+      if (live_regs_mask)
+	thumb_pop (asm_out_file, live_regs_mask);
+
+      if (had_to_push_lr)
+	{
+	  if (size > 12)
+	    {
+	      /* We have no free low regs, so save one.  */
+	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
+			   LAST_ARG_REGNUM);
+	    }
+
+	  /* Get the return address into a temporary register.  */
+	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
+
+	  if (size > 12)
+	    {
+	      /* Move the return address to lr.  */
+	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
+			   LAST_ARG_REGNUM);
+	      /* Restore the low register.  */
+	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
+			   IP_REGNUM);
+	      regno = LR_REGNUM;
+	    }
+	  else
+	    regno = LAST_ARG_REGNUM;
+	}
+      else
+	regno = LR_REGNUM;
+
+      /* Remove the argument registers that were pushed onto the stack.  */
+      asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
+		   SP_REGNUM, SP_REGNUM,
+		   crtl->args.pretend_args_size);
+
+      thumb_exit (asm_out_file, regno);
+    }
+
+  return "";
+}
+
+/* Functions to save and restore machine-specific function data.  */
+static struct machine_function *
+arm_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine = ggc_alloc_cleared_machine_function ();
+
+#if ARM_FT_UNKNOWN != 0
+  machine->func_type = ARM_FT_UNKNOWN;
+#endif
+  return machine;
+}
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+rtx
+arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL_RTX;
+
+  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
+}
+
+/* Do anything needed before RTL is emitted for each function.  */
+void
+arm_init_expanders (void)
+{
+  /* Arrange to initialize and mark the machine per-function status.  */
+  init_machine_status = arm_init_machine_status;
+
+  /* This is to stop the combine pass optimizing away the alignment
+     adjustment of va_arg.  */
+  /* ??? It is claimed that this should not be necessary.  */
+  if (cfun)
+    mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
+}
+
+
+/* Like arm_compute_initial_elimination offset.  Simpler because there
+   isn't an ABI specified frame pointer for Thumb.  Instead, we set it
+   to point at the base of the local variables after static stack
+   space for a function has been allocated.  */
+
+HOST_WIDE_INT
+thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
+{
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      switch (to)
+	{
+	case STACK_POINTER_REGNUM:
+	  return offsets->outgoing_args - offsets->saved_args;
+
+	case FRAME_POINTER_REGNUM:
+	  return offsets->soft_frame - offsets->saved_args;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->saved_regs - offsets->saved_args;
+
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->locals_base - offsets->saved_args;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FRAME_POINTER_REGNUM:
+      switch (to)
+	{
+	case STACK_POINTER_REGNUM:
+	  return offsets->outgoing_args - offsets->soft_frame;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->saved_regs - offsets->soft_frame;
+
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->locals_base - offsets->soft_frame;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Generate the function's prologue.  */
+
+void
+thumb1_expand_prologue (void)
+{
+  rtx insn;
+
+  HOST_WIDE_INT amount;
+  arm_stack_offsets *offsets;
+  unsigned long func_type;
+  int regno;
+  unsigned long live_regs_mask;
+  unsigned long l_mask;
+  unsigned high_regs_pushed = 0;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have prologues.  */
+  if (IS_NAKED (func_type))
+    return;
+
+  if (IS_INTERRUPT (func_type))
+    {
+      error ("interrupt Service Routines cannot be coded in Thumb mode");
+      return;
+    }
+
+  if (is_called_in_ARM_mode (current_function_decl))
+    emit_insn (gen_prologue_thumb1_interwork ());
+
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+
+  /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
+  l_mask = live_regs_mask & 0x40ff;
+  /* Then count how many other high registers will need to be pushed.  */
+  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+
+  if (crtl->args.pretend_args_size)
+    {
+      rtx x = GEN_INT (-crtl->args.pretend_args_size);
+
+      if (cfun->machine->uses_anonymous_args)
+	{
+	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
+	  unsigned long mask;
+
+	  mask = 1ul << (LAST_ARG_REGNUM + 1);
+	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
+
+	  insn = thumb1_emit_multi_reg_push (mask, 0);
+	}
+      else
+	{
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+					stack_pointer_rtx, x));
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (TARGET_BACKTRACE)
+    {
+      HOST_WIDE_INT offset = 0;
+      unsigned work_register;
+      rtx work_reg, x, arm_hfp_rtx;
+
+      /* We have been asked to create a stack backtrace structure.
+         The code looks like this:
+
+	 0   .align 2
+	 0   func:
+         0     sub   SP, #16         Reserve space for 4 registers.
+	 2     push  {R7}            Push low registers.
+         4     add   R7, SP, #20     Get the stack pointer before the push.
+         6     str   R7, [SP, #8]    Store the stack pointer
+					(before reserving the space).
+         8     mov   R7, PC          Get hold of the start of this code + 12.
+        10     str   R7, [SP, #16]   Store it.
+        12     mov   R7, FP          Get hold of the current frame pointer.
+        14     str   R7, [SP, #4]    Store it.
+        16     mov   R7, LR          Get hold of the current return address.
+        18     str   R7, [SP, #12]   Store it.
+        20     add   R7, SP, #16     Point at the start of the
+					backtrace structure.
+        22     mov   FP, R7          Put this value into the frame pointer.  */
+
+      work_register = thumb_find_work_register (live_regs_mask);
+      work_reg = gen_rtx_REG (SImode, work_register);
+      arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
+
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx, GEN_INT (-16)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (l_mask)
+	{
+	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  offset = bit_count (l_mask) * UNITS_PER_WORD;
+	}
+
+      x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
+      emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
+
+      x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
+      x = gen_frame_mem (SImode, x);
+      emit_move_insn (x, work_reg);
+
+      /* Make sure that the instruction fetching the PC is in the right place
+	 to calculate "start of backtrace creation code + 12".  */
+      /* ??? The stores using the common WORK_REG ought to be enough to
+	 prevent the scheduler from doing anything weird.  Failing that
+	 we could always move all of the following into an UNSPEC_VOLATILE.  */
+      if (l_mask)
+	{
+	  x = gen_rtx_REG (SImode, PC_REGNUM);
+	  emit_move_insn (work_reg, x);
+
+	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
+	  x = gen_frame_mem (SImode, x);
+	  emit_move_insn (x, work_reg);
+
+	  emit_move_insn (work_reg, arm_hfp_rtx);
+
+	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
+	  x = gen_frame_mem (SImode, x);
+	  emit_move_insn (x, work_reg);
+	}
+      else
+	{
+	  emit_move_insn (work_reg, arm_hfp_rtx);
+
+	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
+	  x = gen_frame_mem (SImode, x);
+	  emit_move_insn (x, work_reg);
+
+	  x = gen_rtx_REG (SImode, PC_REGNUM);
+	  emit_move_insn (work_reg, x);
+
+	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
+	  x = gen_frame_mem (SImode, x);
+	  emit_move_insn (x, work_reg);
+	}
+
+      x = gen_rtx_REG (SImode, LR_REGNUM);
+      emit_move_insn (work_reg, x);
+
+      x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
+      x = gen_frame_mem (SImode, x);
+      emit_move_insn (x, work_reg);
+
+      x = GEN_INT (offset + 12);
+      emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
+
+      emit_move_insn (arm_hfp_rtx, work_reg);
+    }
+  /* Optimization:  If we are not pushing any low registers but we are going
+     to push some high registers then delay our first push.  This will just
+     be a push of LR and we can combine it with the push of the first high
+     register.  */
+  else if ((l_mask & 0xff) != 0
+	   || (high_regs_pushed == 0 && l_mask))
+    {
+      unsigned long mask = l_mask;
+      mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
+      insn = thumb1_emit_multi_reg_push (mask, mask);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (high_regs_pushed)
+    {
+      unsigned pushable_regs;
+      unsigned next_hi_reg;
+      unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
+						 : crtl->args.info.nregs;
+      unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
+
+      for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
+	if (live_regs_mask & (1 << next_hi_reg))
+	  break;
+
+      /* Here we need to mask out registers used for passing arguments
+	 even if they can be pushed.  This is to avoid using them to stash the high
+	 registers.  Such kind of stash may clobber the use of arguments.  */
+      pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
+
+      if (pushable_regs == 0)
+	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
+
+      while (high_regs_pushed > 0)
+	{
+	  unsigned long real_regs_mask = 0;
+
+	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
+	    {
+	      if (pushable_regs & (1 << regno))
+		{
+		  emit_move_insn (gen_rtx_REG (SImode, regno),
+				  gen_rtx_REG (SImode, next_hi_reg));
+
+		  high_regs_pushed --;
+		  real_regs_mask |= (1 << next_hi_reg);
+
+		  if (high_regs_pushed)
+		    {
+		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
+			   next_hi_reg --)
+			if (live_regs_mask & (1 << next_hi_reg))
+			  break;
+		    }
+		  else
+		    {
+		      pushable_regs &= ~((1 << regno) - 1);
+		      break;
+		    }
+		}
+	    }
+
+	  /* If we had to find a work register and we have not yet
+	     saved the LR then add it to the list of regs to push.  */
+	  if (l_mask == (1 << LR_REGNUM))
+	    {
+	      pushable_regs |= l_mask;
+	      real_regs_mask |= l_mask;
+	      l_mask = 0;
+	    }
+
+	  insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  /* Load the pic register before setting the frame pointer,
+     so we can use r7 as a temporary work register.  */
+  if (flag_pic && arm_pic_register != INVALID_REGNUM)
+    arm_load_pic_register (live_regs_mask);
+
+  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
+    emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
+		    stack_pointer_rtx);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size
+      = offsets->outgoing_args - offsets->saved_args;
+
+  amount = offsets->outgoing_args - offsets->saved_regs;
+  amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+  if (amount)
+    {
+      if (amount < 512)
+	{
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+					GEN_INT (- amount)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else
+	{
+	  rtx reg, dwarf;
+
+	  /* The stack decrement is too big for an immediate value in a single
+	     insn.  In theory we could issue multiple subtracts, but after
+	     three of them it becomes more space efficient to place the full
+	     value in the constant pool and load into a register.  (Also the
+	     ARM debugger really likes to see only one stack decrement per
+	     function).  So instead we look for a scratch register into which
+	     we can load the decrement, and then we subtract this from the
+	     stack pointer.  Unfortunately on the thumb the only available
+	     scratch registers are the argument registers, and we cannot use
+	     these as they may hold arguments to the function.  Instead we
+	     attempt to locate a call preserved register which is used by this
+	     function.  If we can find one, then we know that it will have
+	     been pushed at the start of the prologue and so we can corrupt
+	     it now.  */
+	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
+	    if (live_regs_mask & (1 << regno))
+	      break;
+
+	  gcc_assert(regno <= LAST_LO_REGNUM);
+
+	  reg = gen_rtx_REG (SImode, regno);
+
+	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
+
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+					stack_pointer_rtx, reg));
+
+	  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			       plus_constant (Pmode, stack_pointer_rtx,
+					      -amount));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (frame_pointer_needed)
+    thumb_set_frame_pointer (offsets);
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  Similarly if the user has requested no
+     scheduling in the prolog.  Similarly if we want non-call exceptions
+     using the EABI unwinder, to prevent faulting instructions from being
+     swapped with a stack adjustment.  */
+  if (crtl->profile || !TARGET_SCHED_PROLOG
+      || (arm_except_unwind_info (&global_options) == UI_TARGET
+	  && cfun->can_throw_non_call_exceptions))
+    emit_insn (gen_blockage ());
+
+  cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
+  if (live_regs_mask & 0xff)
+    cfun->machine->lr_save_eliminated = 0;
+}
+
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+   POP instruction can be generated.  LR should be replaced by PC.  All
+   the checks required are already done by  USE_RETURN_INSN ().  Hence,
+   all we really need to check here is if single register is to be
+   returned, or multiple register return.  */
+void
+thumb2_expand_return (bool simple_return)
+{
+  int i, num_regs;
+  unsigned long saved_regs_mask;
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  if (!simple_return && saved_regs_mask)
+    {
+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          saved_regs_mask &= ~ (1 << LR_REGNUM);
+          saved_regs_mask |=   (1 << PC_REGNUM);
+          arm_emit_multi_reg_pop (saved_regs_mask);
+        }
+    }
+  else
+    {
+      emit_jump_insn (simple_return_rtx);
+    }
+}
+
+void
+thumb1_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  arm_stack_offsets *offsets;
+  int regno;
+
+  /* Naked functions don't have prologues.  */
+  if (IS_NAKED (arm_current_func_type ()))
+    return;
+
+  offsets = arm_get_frame_offsets ();
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+  amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    {
+      emit_insn (gen_blockage ());
+
+      if (amount < 512)
+	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (amount)));
+      else
+	{
+	  /* r3 is always free in the epilogue.  */
+	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
+
+	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
+	}
+    }
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_force_register_use (stack_pointer_rtx));
+
+  if (crtl->profile || !TARGET_SCHED_PROLOG)
+    emit_insn (gen_blockage ());
+
+  /* Emit a clobber for each insn that will be restored in the epilogue,
+     so that flow2 will get register lifetimes correct.  */
+  for (regno = 0; regno < 13; regno++)
+    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+      emit_clobber (gen_rtx_REG (SImode, regno));
+
+  if (! df_regs_ever_live_p (LR_REGNUM))
+    emit_use (gen_rtx_REG (SImode, LR_REGNUM));
+}
+
+/* Epilogue code for APCS frame.  */
+static void
+arm_expand_epilogue_apcs_frame (bool really_return)
+{
+  unsigned long func_type;
+  unsigned long saved_regs_mask;
+  int num_regs = 0;
+  int i;
+  int floats_from_frame = 0;
+  arm_stack_offsets *offsets;
+
+  gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
+  func_type = arm_current_func_type ();
+
+  /* Get frame offsets for ARM.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  /* Find the offset of the floating-point save area in the frame.  */
+  floats_from_frame
+    = (offsets->saved_args
+       + arm_compute_static_chain_stack_bytes ()
+       - offsets->frame);
+
+  /* Compute how many core registers saved and how far away the floats are.  */
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        num_regs++;
+        floats_from_frame += 4;
+      }
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int start_reg;
+      rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
+
+      /* The offset is from IP_REGNUM.  */
+      int saved_size = arm_get_vfp_saved_size ();
+      if (saved_size > 0)
+        {
+	  rtx insn;
+          floats_from_frame += saved_size;
+          insn = emit_insn (gen_addsi3 (ip_rtx,
+					hard_frame_pointer_rtx,
+					GEN_INT (-floats_from_frame)));
+	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
+				       ip_rtx, hard_frame_pointer_rtx);
+        }
+
+      /* Generate VFP register multi-pop.  */
+      start_reg = FIRST_VFP_REGNUM;
+
+      for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+            && (!df_regs_ever_live_p (i + 1)
+                || call_used_regs[i + 1]))
+          {
+            if (start_reg != i)
+              arm_emit_vfp_multi_reg_pop (start_reg,
+                                          (i - start_reg) / 2,
+                                          gen_rtx_REG (SImode,
+                                                       IP_REGNUM));
+            start_reg = i + 2;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never
+         fired).  */
+      if (start_reg != i)
+        arm_emit_vfp_multi_reg_pop (start_reg,
+                                    (i - start_reg) / 2,
+                                    gen_rtx_REG (SImode, IP_REGNUM));
+    }
+
+  if (TARGET_IWMMXT)
+    {
+      /* The frame pointer is guaranteed to be non-double-word aligned, as
+         it is set to double-word-aligned old_stack_pointer - 4.  */
+      rtx insn;
+      int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
+
+      for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
+        if (df_regs_ever_live_p (i) && !call_used_regs[i])
+          {
+            rtx addr = gen_frame_mem (V2SImode,
+                                 plus_constant (Pmode, hard_frame_pointer_rtx,
+                                                - lrm_count * 4));
+            insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                               gen_rtx_REG (V2SImode, i),
+                                               NULL_RTX);
+            lrm_count += 2;
+          }
+    }
+
+  /* saved_regs_mask should contain IP which contains old stack pointer
+     at the time of activation creation.  Since SP and IP are adjacent registers,
+     we can restore the value directly into SP.  */
+  gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
+  saved_regs_mask &= ~(1 << IP_REGNUM);
+  saved_regs_mask |= (1 << SP_REGNUM);
+
+  /* There are two registers left in saved_regs_mask - LR and PC.  We
+     only need to restore LR (the return address), but to
+     save time we can load it directly into PC, unless we need a
+     special function exit sequence, or we are not really returning.  */
+  if (really_return
+      && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+      && !crtl->calls_eh_return)
+    /* Delete LR from the register mask, so that LR on
+       the stack is loaded into the PC in the register mask.  */
+    saved_regs_mask &= ~(1 << LR_REGNUM);
+  else
+    saved_regs_mask &= ~(1 << PC_REGNUM);
+
+  num_regs = bit_count (saved_regs_mask);
+  if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
+    {
+      rtx insn;
+      emit_insn (gen_blockage ());
+      /* Unwind the stack to just below the saved registers.  */
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    hard_frame_pointer_rtx,
+				    GEN_INT (- 4 * num_regs)));
+
+      arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
+				   stack_pointer_rtx, hard_frame_pointer_rtx);
+    }
+
+  arm_emit_multi_reg_pop (saved_regs_mask);
+
+  if (IS_INTERRUPT (func_type))
+    {
+      /* Interrupt handlers will have pushed the
+         IP onto the stack, so restore it now.  */
+      rtx insn;
+      rtx addr = gen_rtx_MEM (SImode,
+                              gen_rtx_POST_INC (SImode,
+                              stack_pointer_rtx));
+      set_mem_alias_set (addr, get_frame_alias_set ());
+      insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
+      REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                         gen_rtx_REG (SImode, IP_REGNUM),
+                                         NULL_RTX);
+    }
+
+  if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
+    return;
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  if (IS_STACKALIGN (func_type))
+    /* Restore the original stack pointer.  Before prologue, the stack was
+       realigned and the original stack pointer saved in r0.  For details,
+       see comment in arm_expand_prologue.  */
+    emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+
+  emit_jump_insn (simple_return_rtx);
+}
+
+/* Generate RTL to represent ARM epilogue.  Really_return is true if the
+   function is not a sibcall.  */
+void
+arm_expand_epilogue (bool really_return)
+{
+  unsigned long func_type;
+  unsigned long saved_regs_mask;
+  int num_regs = 0;
+  int i;
+  int amount;
+  arm_stack_offsets *offsets;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogue.  Hence, generate return pattern, and
+     let output_return_instruction take care of instruction emission if any.  */
+  if (IS_NAKED (func_type)
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      if (really_return)
+        emit_jump_insn (simple_return_rtx);
+      return;
+    }
+
+  /* If we are throwing an exception, then we really must be doing a
+     return, so we can't tail-call.  */
+  gcc_assert (!crtl->calls_eh_return || really_return);
+
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    {
+      arm_expand_epilogue_apcs_frame (really_return);
+      return;
+    }
+
+  /* Get frame offsets for ARM.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+  num_regs = bit_count (saved_regs_mask);
+
+  if (frame_pointer_needed)
+    {
+      rtx insn;
+      /* Restore stack pointer if necessary.  */
+      if (TARGET_ARM)
+        {
+          /* In ARM mode, frame pointer points to first saved register.
+             Restore stack pointer to last saved register.  */
+          amount = offsets->frame - offsets->saved_regs;
+
+          /* Force out any pending memory operations that reference stacked data
+             before stack de-allocation occurs.  */
+          emit_insn (gen_blockage ());
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+			    hard_frame_pointer_rtx,
+			    GEN_INT (amount)));
+	  arm_add_cfa_adjust_cfa_note (insn, amount,
+				       stack_pointer_rtx,
+				       hard_frame_pointer_rtx);
+
+          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
+             deleted.  */
+          emit_insn (gen_force_register_use (stack_pointer_rtx));
+        }
+      else
+        {
+          /* In Thumb-2 mode, the frame pointer points to the last saved
+             register.  */
+	  amount = offsets->locals_base - offsets->saved_regs;
+	  if (amount)
+	    {
+	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+				hard_frame_pointer_rtx,
+				GEN_INT (amount)));
+	      arm_add_cfa_adjust_cfa_note (insn, amount,
+					   hard_frame_pointer_rtx,
+					   hard_frame_pointer_rtx);
+	    }
+
+          /* Force out any pending memory operations that reference stacked data
+             before stack de-allocation occurs.  */
+          emit_insn (gen_blockage ());
+	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
+				       hard_frame_pointer_rtx));
+	  arm_add_cfa_adjust_cfa_note (insn, 0,
+				       stack_pointer_rtx,
+				       hard_frame_pointer_rtx);
+          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
+             deleted.  */
+          emit_insn (gen_force_register_use (stack_pointer_rtx));
+        }
+    }
+  else
+    {
+      /* Pop off outgoing args and local frame to adjust stack pointer to
+         last saved register.  */
+      amount = offsets->outgoing_args - offsets->saved_regs;
+      if (amount)
+        {
+	  rtx tmp;
+          /* Force out any pending memory operations that reference stacked data
+             before stack de-allocation occurs.  */
+          emit_insn (gen_blockage ());
+	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (amount)));
+	  arm_add_cfa_adjust_cfa_note (tmp, amount,
+				       stack_pointer_rtx, stack_pointer_rtx);
+          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
+             not deleted.  */
+          emit_insn (gen_force_register_use (stack_pointer_rtx));
+        }
+    }
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      /* Generate VFP register multi-pop.  */
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         vldm operations.  The need to match groups is because,
+         unlike pop, vldm can only do consecutive regs.  */
+      for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+            && (!df_regs_ever_live_p (i + 1)
+                || call_used_regs[i + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to
+               end_reg).  */
+            if (end_reg > i + 2)
+              arm_emit_vfp_multi_reg_pop (i + 2,
+                                          (end_reg - (i + 2)) / 2,
+                                          stack_pointer_rtx);
+            end_reg = i;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never
+         fired).  */
+      if (end_reg > i + 2)
+        arm_emit_vfp_multi_reg_pop (i + 2,
+                                    (end_reg - (i + 2)) / 2,
+                                    stack_pointer_rtx);
+    }
+
+  if (TARGET_IWMMXT)
+    for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
+      if (df_regs_ever_live_p (i) && !call_used_regs[i])
+        {
+          rtx insn;
+          rtx addr = gen_rtx_MEM (V2SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+          REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                             gen_rtx_REG (V2SImode, i),
+                                             NULL_RTX);
+	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
+				       stack_pointer_rtx, stack_pointer_rtx);
+        }
+
+  if (saved_regs_mask)
+    {
+      rtx insn;
+      bool return_in_pc = false;
+
+      if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
+          && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
+          && !IS_STACKALIGN (func_type)
+          && really_return
+          && crtl->args.pretend_args_size == 0
+          && saved_regs_mask & (1 << LR_REGNUM)
+          && !crtl->calls_eh_return)
+        {
+          saved_regs_mask &= ~(1 << LR_REGNUM);
+          saved_regs_mask |= (1 << PC_REGNUM);
+          return_in_pc = true;
+        }
+
+      if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx addr = gen_rtx_MEM (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+
+                if (i == PC_REGNUM)
+                  {
+                    insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+                    XVECEXP (insn, 0, 0) = ret_rtx;
+                    XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
+                                                        gen_rtx_REG (SImode, i),
+                                                        addr);
+                    RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
+                    insn = emit_jump_insn (insn);
+                  }
+                else
+                  {
+                    insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
+                                                 addr));
+                    REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                                       gen_rtx_REG (SImode, i),
+                                                       NULL_RTX);
+		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
+						 stack_pointer_rtx,
+						 stack_pointer_rtx);
+                  }
+              }
+        }
+      else
+        {
+          if (TARGET_LDRD
+	      && current_tune->prefer_ldrd_strd
+              && !optimize_function_for_size_p (cfun))
+            {
+              if (TARGET_THUMB2)
+                thumb2_emit_ldrd_pop (saved_regs_mask);
+              else if (TARGET_ARM && !IS_INTERRUPT (func_type))
+                arm_emit_ldrd_pop (saved_regs_mask);
+              else
+                arm_emit_multi_reg_pop (saved_regs_mask);
+            }
+          else
+            arm_emit_multi_reg_pop (saved_regs_mask);
+        }
+
+      if (return_in_pc == true)
+        return;
+    }
+
+  if (crtl->args.pretend_args_size)
+    {
+      int i, j;
+      rtx dwarf = NULL_RTX;
+      rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   GEN_INT (crtl->args.pretend_args_size)));
+
+      RTX_FRAME_RELATED_P (tmp) = 1;
+
+      if (cfun->machine->uses_anonymous_args)
+	{
+	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
+	     pretend_args in stack.  */
+	  int num_regs = crtl->args.pretend_args_size / 4;
+	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
+	  for (j = 0, i = 0; j < num_regs; i++)
+	    if (saved_regs_mask & (1 << i))
+	      {
+		rtx reg = gen_rtx_REG (SImode, i);
+		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+		j++;
+	      }
+	  REG_NOTES (tmp) = dwarf;
+	}
+      arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
+				   stack_pointer_rtx, stack_pointer_rtx);
+    }
+
+  if (!really_return)
+    return;
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  if (IS_STACKALIGN (func_type))
+    /* Restore the original stack pointer.  Before prologue, the stack was
+       realigned and the original stack pointer saved in r0.  For details,
+       see comment in arm_expand_prologue.  */
+    emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+
+  emit_jump_insn (simple_return_rtx);
+}
+
+/* Implementation of insn prologue_thumb1_interwork.  This is the first
+   "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
+
+const char *
+thumb1_output_interwork (void)
+{
+  const char * name;
+  FILE *f = asm_out_file;
+
+  gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
+  gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
+	      == SYMBOL_REF);
+  name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+  /* Generate code sequence to switch us into Thumb mode.  */
+  /* The .code 32 directive has already been emitted by
+     ASM_DECLARE_FUNCTION_NAME.  */
+  asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
+  asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
+
+  /* Generate a label, so that the debugger will notice the
+     change in instruction sets.  This label is also used by
+     the assembler to bypass the ARM code when this function
+     is called from a Thumb encoded function elsewhere in the
+     same file.  Hence the definition of STUB_NAME here must
+     agree with the definition in gas/config/tc-arm.c.  */
+
+#define STUB_NAME ".real_start_of"
+
+  fprintf (f, "\t.code\t16\n");
+#ifdef ARM_PE
+  if (arm_dllexport_name_p (name))
+    name = arm_strip_name_encoding (name);
+#endif
+  asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
+  fprintf (f, "\t.thumb_func\n");
+  asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
+
+  return "";
+}
+
+/* Handle the case of a double word load into a low register from
+   a computed memory address.  The computed address may involve a
+   register which is overwritten by the load.  */
+const char *
+thumb_load_double_from_address (rtx *operands)
+{
+  rtx addr;
+  rtx base;
+  rtx offset;
+  rtx arg1;
+  rtx arg2;
+
+  gcc_assert (REG_P (operands[0]));
+  gcc_assert (MEM_P (operands[1]));
+
+  /* Get the memory address.  */
+  addr = XEXP (operands[1], 0);
+
+  /* Work out how the memory address is computed.  */
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      operands[2] = adjust_address (operands[1], SImode, 4);
+
+      if (REGNO (operands[0]) == REGNO (addr))
+	{
+	  output_asm_insn ("ldr\t%H0, %2", operands);
+	  output_asm_insn ("ldr\t%0, %1", operands);
+	}
+      else
+	{
+	  output_asm_insn ("ldr\t%0, %1", operands);
+	  output_asm_insn ("ldr\t%H0, %2", operands);
+	}
+      break;
+
+    case CONST:
+      /* Compute <address> + 4 for the high order load.  */
+      operands[2] = adjust_address (operands[1], SImode, 4);
+
+      output_asm_insn ("ldr\t%0, %1", operands);
+      output_asm_insn ("ldr\t%H0, %2", operands);
+      break;
+
+    case PLUS:
+      arg1   = XEXP (addr, 0);
+      arg2   = XEXP (addr, 1);
+
+      if (CONSTANT_P (arg1))
+	base = arg2, offset = arg1;
+      else
+	base = arg1, offset = arg2;
+
+      gcc_assert (REG_P (base));
+
+      /* Catch the case of <address> = <reg> + <reg> */
+      if (REG_P (offset))
+	{
+	  int reg_offset = REGNO (offset);
+	  int reg_base   = REGNO (base);
+	  int reg_dest   = REGNO (operands[0]);
+
+	  /* Add the base and offset registers together into the
+             higher destination register.  */
+	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
+		       reg_dest + 1, reg_base, reg_offset);
+
+	  /* Load the lower destination register from the address in
+             the higher destination register.  */
+	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
+		       reg_dest, reg_dest + 1);
+
+	  /* Load the higher destination register from its own address
+             plus 4.  */
+	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
+		       reg_dest + 1, reg_dest + 1);
+	}
+      else
+	{
+	  /* Compute <address> + 4 for the high order load.  */
+	  operands[2] = adjust_address (operands[1], SImode, 4);
+
+	  /* If the computed address is held in the low order register
+	     then load the high order register first, otherwise always
+	     load the low order register first.  */
+	  if (REGNO (operands[0]) == REGNO (base))
+	    {
+	      output_asm_insn ("ldr\t%H0, %2", operands);
+	      output_asm_insn ("ldr\t%0, %1", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ldr\t%0, %1", operands);
+	      output_asm_insn ("ldr\t%H0, %2", operands);
+	    }
+	}
+      break;
+
+    case LABEL_REF:
+      /* With no registers to worry about we can just load the value
+         directly.  */
+      operands[2] = adjust_address (operands[1], SImode, 4);
+
+      output_asm_insn ("ldr\t%H0, %2", operands);
+      output_asm_insn ("ldr\t%0, %1", operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return "";
+}
+
+const char *
+thumb_output_move_mem_multiple (int n, rtx *operands)
+{
+  rtx tmp;
+
+  switch (n)
+    {
+    case 2:
+      if (REGNO (operands[4]) > REGNO (operands[5]))
+	{
+	  tmp = operands[4];
+	  operands[4] = operands[5];
+	  operands[5] = tmp;
+	}
+      output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
+      output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
+      break;
+
+    case 3:
+      if (REGNO (operands[4]) > REGNO (operands[5]))
+	{
+	  tmp = operands[4];
+	  operands[4] = operands[5];
+	  operands[5] = tmp;
+	}
+      if (REGNO (operands[5]) > REGNO (operands[6]))
+	{
+	  tmp = operands[5];
+	  operands[5] = operands[6];
+	  operands[6] = tmp;
+	}
+      if (REGNO (operands[4]) > REGNO (operands[5]))
+	{
+	  tmp = operands[4];
+	  operands[4] = operands[5];
+	  operands[5] = tmp;
+	}
+
+      output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
+      output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return "";
+}
+
+/* Output a call-via instruction for thumb state.  */
+const char *
+thumb_call_via_reg (rtx reg)
+{
+  int regno = REGNO (reg);
+  rtx *labelp;
+
+  gcc_assert (regno < LR_REGNUM);
+
+  /* If we are in the normal text section we can use a single instance
+     per compilation unit.  If we are doing function sections, then we need
+     an entry per section, since we can't rely on reachability.  */
+  if (in_section == text_section)
+    {
+      thumb_call_reg_needed = 1;
+
+      if (thumb_call_via_label[regno] == NULL)
+	thumb_call_via_label[regno] = gen_label_rtx ();
+      labelp = thumb_call_via_label + regno;
+    }
+  else
+    {
+      if (cfun->machine->call_via[regno] == NULL)
+	cfun->machine->call_via[regno] = gen_label_rtx ();
+      labelp = cfun->machine->call_via + regno;
+    }
+
+  output_asm_insn ("bl\t%a0", labelp);
+  return "";
+}
+
+/* Routines for generating rtl.  */
+void
+thumb_expand_movmemqi (rtx *operands)
+{
+  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
+  rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
+  HOST_WIDE_INT len = INTVAL (operands[2]);
+  HOST_WIDE_INT offset = 0;
+
+  while (len >= 12)
+    {
+      emit_insn (gen_movmem12b (out, in, out, in));
+      len -= 12;
+    }
+
+  if (len >= 8)
+    {
+      emit_insn (gen_movmem8b (out, in, out, in));
+      len -= 8;
+    }
+
+  if (len >= 4)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
+      emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
+      len -= 4;
+      offset += 4;
+    }
+
+  if (len >= 2)
+    {
+      rtx reg = gen_reg_rtx (HImode);
+      emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
+					      plus_constant (Pmode, in,
+							     offset))));
+      emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
+								offset)),
+			    reg));
+      len -= 2;
+      offset += 2;
+    }
+
+  if (len)
+    {
+      rtx reg = gen_reg_rtx (QImode);
+      emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
+					      plus_constant (Pmode, in,
+							     offset))));
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
+								offset)),
+			    reg));
+    }
+}
+
+void
+thumb_reload_out_hi (rtx *operands)
+{
+  emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
+}
+
+/* Handle reading a half-word from memory during reload.  */
+void
+thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+
+/* Return the length of a function name prefix
+    that starts with the character 'c'.  */
+static int
+arm_get_strip_length (int c)
+{
+  switch (c)
+    {
+    ARM_NAME_ENCODING_LENGTHS
+      default: return 0;
+    }
+}
+
+/* Return a pointer to a function's name with any
+   and all prefix encodings stripped from it.  */
+const char *
+arm_strip_name_encoding (const char *name)
+{
+  int skip;
+
+  while ((skip = arm_get_strip_length (* name)))
+    name += skip;
+
+  return name;
+}
+
+/* If there is a '*' anywhere in the name's prefix, then
+   emit the stripped name verbatim, otherwise prepend an
+   underscore if leading underscores are being used.  */
+void
+arm_asm_output_labelref (FILE *stream, const char *name)
+{
+  int skip;
+  int verbatim = 0;
+
+  while ((skip = arm_get_strip_length (* name)))
+    {
+      verbatim |= (*name == '*');
+      name += skip;
+    }
+
+  if (verbatim)
+    fputs (name, stream);
+  else
+    asm_fprintf (stream, "%U%s", name);
+}
+
+/* This function is used to emit an EABI tag and its associated value.
+   We emit the numerical value of the tag in case the assembler does not
+   support textual tags.  (Eg gas prior to 2.20).  If requested we include
+   the tag name in a comment so that anyone reading the assembler output
+   will know which tag is being set.
+
+   This function is not static because arm-c.c needs it too.  */
+
+void
+arm_emit_eabi_attribute (const char *name, int num, int val)
+{
+  asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
+  if (flag_verbose_asm || flag_debug_asm)
+    asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
+  asm_fprintf (asm_out_file, "\n");
+}
+
+static void
+arm_file_start (void)
+{
+  int val;
+
+  if (TARGET_UNIFIED_ASM)
+    asm_fprintf (asm_out_file, "\t.syntax unified\n");
+
+  if (TARGET_BPABI)
+    {
+      const char *fpu_name;
+      if (arm_selected_arch)
+        {
+	  /* armv7ve doesn't support any extensions.  */
+	  if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
+	    {
+	      /* Keep backward compatability for assemblers
+		 which don't support armv7ve.  */
+	      asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
+	      asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
+	      asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
+	      asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
+	      asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
+	    }
+	  else
+	    {
+	      const char* pos = strchr (arm_selected_arch->name, '+');
+	      if (pos)
+		{
+		  char buf[15];
+		  gcc_assert (strlen (arm_selected_arch->name)
+			      <= sizeof (buf) / sizeof (*pos));
+		  strncpy (buf, arm_selected_arch->name,
+				(pos - arm_selected_arch->name) * sizeof (*pos));
+		  buf[pos - arm_selected_arch->name] = '\0';
+		  asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
+		  asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
+		}
+	      else
+		asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
+	    }
+        }
+      else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
+	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
+      else
+	{
+	  const char* truncated_name
+	    = arm_rewrite_selected_cpu (arm_selected_cpu->name);
+	  asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
+	}
+
+      if (TARGET_SOFT_FLOAT)
+	{
+	  fpu_name = "softvfp";
+	}
+      else
+	{
+	  fpu_name = arm_fpu_desc->name;
+	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
+	    {
+	      if (TARGET_HARD_FLOAT)
+		arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
+	      if (TARGET_HARD_FLOAT_ABI)
+		arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
+	    }
+	}
+      asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
+
+      /* Some of these attributes only apply when the corresponding features
+         are used.  However we don't have any easy way of figuring this out.
+	 Conservatively record the setting that would have been used.  */
+
+      if (flag_rounding_math)
+	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
+
+      if (!flag_unsafe_math_optimizations)
+	{
+	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
+	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
+	}
+      if (flag_signaling_nans)
+	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
+
+      arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
+			   flag_finite_math_only ? 1 : 3);
+
+      arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
+      arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
+      arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
+			       flag_short_enums ? 1 : 2);
+
+      /* Tag_ABI_optimization_goals.  */
+      if (optimize_size)
+	val = 4;
+      else if (optimize >= 2)
+	val = 2;
+      else if (optimize)
+	val = 1;
+      else
+	val = 6;
+      arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
+
+      arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
+			       unaligned_access);
+
+      if (arm_fp16_format)
+	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
+			     (int) arm_fp16_format);
+
+      if (arm_lang_output_object_attributes_hook)
+	arm_lang_output_object_attributes_hook();
+    }
+
+  default_file_start ();
+}
+
+static void
+arm_file_end (void)
+{
+  int regno;
+
+  if (NEED_INDICATE_EXEC_STACK)
+    /* Add .note.GNU-stack.  */
+    file_end_indicate_exec_stack ();
+
+  if (! thumb_call_reg_needed)
+    return;
+
+  switch_to_section (text_section);
+  asm_fprintf (asm_out_file, "\t.code 16\n");
+  ASM_OUTPUT_ALIGN (asm_out_file, 1);
+
+  for (regno = 0; regno < LR_REGNUM; regno++)
+    {
+      rtx label = thumb_call_via_label[regno];
+
+      if (label != 0)
+	{
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (label));
+	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
+	}
+    }
+}
+
+#ifndef ARM_PE
+/* Symbols in the text segment can be accessed without indirecting via the
+   constant pool; it may take an extra binary operation, but this is still
+   faster than indirecting via memory.  Don't do this when not optimizing,
+   since we won't be calculating al of the offsets necessary to do this
+   simplification.  */
+
+static void
+arm_encode_section_info (tree decl, rtx rtl, int first)
+{
+  if (optimize > 0 && TREE_CONSTANT (decl))
+    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+
+  default_encode_section_info (decl, rtl, first);
+}
+#endif /* !ARM_PE */
+
+static void
+arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
+{
+  if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
+      && !strcmp (prefix, "L"))
+    {
+      arm_ccfsm_state = 0;
+      arm_target_insn = NULL;
+    }
+  default_internal_label (stream, prefix, labelno);
+}
+
+/* Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+static void
+arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+		     tree function)
+{
+  static int thunk_label = 0;
+  char label[256];
+  char labelpc[256];
+  int mi_delta = delta;
+  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
+  int shift = 0;
+  int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
+                    ? 1 : 0);
+  if (mi_delta < 0)
+    mi_delta = - mi_delta;
+
+  final_start_function (emit_barrier (), file, 1);
+
+  if (TARGET_THUMB1)
+    {
+      int labelno = thunk_label++;
+      ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
+      /* Thunks are entered in arm mode when avaiable.  */
+      if (TARGET_THUMB1_ONLY)
+	{
+	  /* push r3 so we can use it as a temporary.  */
+	  /* TODO: Omit this save if r3 is not used.  */
+	  fputs ("\tpush {r3}\n", file);
+	  fputs ("\tldr\tr3, ", file);
+	}
+      else
+	{
+	  fputs ("\tldr\tr12, ", file);
+	}
+      assemble_name (file, label);
+      fputc ('\n', file);
+      if (flag_pic)
+	{
+	  /* If we are generating PIC, the ldr instruction below loads
+	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
+	     the address of the add + 8, so we have:
+
+	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
+	         = target + 1.
+
+	     Note that we have "+ 1" because some versions of GNU ld
+	     don't set the low bit of the result for R_ARM_REL32
+	     relocations against thumb function symbols.
+	     On ARMv6M this is +4, not +8.  */
+	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
+	  assemble_name (file, labelpc);
+	  fputs (":\n", file);
+	  if (TARGET_THUMB1_ONLY)
+	    {
+	      /* This is 2 insns after the start of the thunk, so we know it
+	         is 4-byte aligned.  */
+	      fputs ("\tadd\tr3, pc, r3\n", file);
+	      fputs ("\tmov r12, r3\n", file);
+	    }
+	  else
+	    fputs ("\tadd\tr12, pc, r12\n", file);
+	}
+      else if (TARGET_THUMB1_ONLY)
+	fputs ("\tmov r12, r3\n", file);
+    }
+  if (TARGET_THUMB1_ONLY)
+    {
+      if (mi_delta > 255)
+	{
+	  fputs ("\tldr\tr3, ", file);
+	  assemble_name (file, label);
+	  fputs ("+4\n", file);
+	  asm_fprintf (file, "\t%s\t%r, %r, r3\n",
+		       mi_op, this_regno, this_regno);
+	}
+      else if (mi_delta != 0)
+	{
+	  asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+		       mi_op, this_regno, this_regno,
+		       mi_delta);
+	}
+    }
+  else
+    {
+      /* TODO: Use movw/movt for large constants when available.  */
+      while (mi_delta != 0)
+	{
+	  if ((mi_delta & (3 << shift)) == 0)
+	    shift += 2;
+	  else
+	    {
+	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+			   mi_op, this_regno, this_regno,
+			   mi_delta & (0xff << shift));
+	      mi_delta &= ~(0xff << shift);
+	      shift += 8;
+	    }
+	}
+    }
+  if (TARGET_THUMB1)
+    {
+      if (TARGET_THUMB1_ONLY)
+	fputs ("\tpop\t{r3}\n", file);
+
+      fprintf (file, "\tbx\tr12\n");
+      ASM_OUTPUT_ALIGN (file, 2);
+      assemble_name (file, label);
+      fputs (":\n", file);
+      if (flag_pic)
+	{
+	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
+	  rtx tem = XEXP (DECL_RTL (function), 0);
+	  tem = plus_constant (GET_MODE (tem), tem, -7);
+	  tem = gen_rtx_MINUS (GET_MODE (tem),
+			       tem,
+			       gen_rtx_SYMBOL_REF (Pmode,
+						   ggc_strdup (labelpc)));
+	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
+	}
+      else
+	/* Output ".word .LTHUNKn".  */
+	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
+
+      if (TARGET_THUMB1_ONLY && mi_delta > 255)
+	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
+    }
+  else
+    {
+      fputs ("\tb\t", file);
+      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+      if (NEED_PLT_RELOC)
+        fputs ("(PLT)", file);
+      fputc ('\n', file);
+    }
+
+  final_end_function ();
+}
+
+int
+arm_emit_vector_const (FILE *file, rtx x)
+{
+  int i;
+  const char * pattern;
+
+  gcc_assert (GET_CODE (x) == CONST_VECTOR);
+
+  switch (GET_MODE (x))
+    {
+    case V2SImode: pattern = "%08x"; break;
+    case V4HImode: pattern = "%04x"; break;
+    case V8QImode: pattern = "%02x"; break;
+    default:       gcc_unreachable ();
+    }
+
+  fprintf (file, "0x");
+  for (i = CONST_VECTOR_NUNITS (x); i--;)
+    {
+      rtx element;
+
+      element = CONST_VECTOR_ELT (x, i);
+      fprintf (file, pattern, INTVAL (element));
+    }
+
+  return 1;
+}
+
+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
+   HFmode constant pool entries are actually loaded with ldr.  */
+void
+arm_emit_fp16_const (rtx c)
+{
+  REAL_VALUE_TYPE r;
+  long bits;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+  bits = real_to_target (NULL, &r, HFmode);
+  if (WORDS_BIG_ENDIAN)
+    assemble_zeros (2);
+  assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
+  if (!WORDS_BIG_ENDIAN)
+    assemble_zeros (2);
+}
+
+const char *
+arm_output_load_gr (rtx *operands)
+{
+  rtx reg;
+  rtx offset;
+  rtx wcgr;
+  rtx sum;
+
+  if (!MEM_P (operands [1])
+      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
+      || !REG_P (reg = XEXP (sum, 0))
+      || !CONST_INT_P (offset = XEXP (sum, 1))
+      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
+    return "wldrw%?\t%0, %1";
+
+  /* Fix up an out-of-range load of a GR register.  */
+  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
+  wcgr = operands[0];
+  operands[0] = reg;
+  output_asm_insn ("ldr%?\t%0, %1", operands);
+
+  operands[0] = wcgr;
+  operands[1] = reg;
+  output_asm_insn ("tmcr%?\t%0, %1", operands);
+  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
+
+  return "";
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.
+
+   On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
+   named arg and all anonymous args onto the stack.
+   XXX I know the prologue shouldn't be pushing registers, but it is faster
+   that way.  */
+
+static void
+arm_setup_incoming_varargs (cumulative_args_t pcum_v,
+			    enum machine_mode mode,
+			    tree type,
+			    int *pretend_size,
+			    int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int nregs;
+
+  cfun->machine->uses_anonymous_args = 1;
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = pcum->aapcs_ncrn;
+      if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
+	nregs++;
+    }
+  else
+    nregs = pcum->nregs;
+
+  if (nregs < NUM_ARG_REGS)
+    *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+}
+
+/* We can't rely on the caller doing the proper promotion when
+   using APCS or ATPCS.  */
+
+static bool
+arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
+{
+    return !TARGET_AAPCS_BASED;
+}
+
+static enum machine_mode
+arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                           enum machine_mode mode,
+                           int *punsignedp ATTRIBUTE_UNUSED,
+                           const_tree fntype ATTRIBUTE_UNUSED,
+                           int for_return ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < 4)
+    return SImode;
+
+  return mode;
+}
+
+/* AAPCS based ABIs use short enums by default.  */
+
+static bool
+arm_default_short_enums (void)
+{
+  return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
+}
+
+
+/* AAPCS requires that anonymous bitfields affect structure alignment.  */
+
+static bool
+arm_align_anon_bitfield (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+/* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
+
+static tree
+arm_cxx_guard_type (void)
+{
+  return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
+}
+
+
+/* The EABI says test the least significant bit of a guard variable.  */
+
+static bool
+arm_cxx_guard_mask_bit (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+/* The EABI specifies that all array cookies are 8 bytes long.  */
+
+static tree
+arm_get_cookie_size (tree type)
+{
+  tree size;
+
+  if (!TARGET_AAPCS_BASED)
+    return default_cxx_get_cookie_size (type);
+
+  size = build_int_cst (sizetype, 8);
+  return size;
+}
+
+
+/* The EABI says that array cookies should also contain the element size.  */
+
+static bool
+arm_cookie_has_size (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+/* The EABI says constructors and destructors should return a pointer to
+   the object constructed/destroyed.  */
+
+static bool
+arm_cxx_cdtor_returns_this (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+/* The EABI says that an inline function may never be the key
+   method.  */
+
+static bool
+arm_cxx_key_method_may_be_inline (void)
+{
+  return !TARGET_AAPCS_BASED;
+}
+
+static void
+arm_cxx_determine_class_data_visibility (tree decl)
+{
+  if (!TARGET_AAPCS_BASED
+      || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+    return;
+
+  /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
+     is exported.  However, on systems without dynamic vague linkage,
+     \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
+  if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
+    DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+  else
+    DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
+  DECL_VISIBILITY_SPECIFIED (decl) = 1;
+}
+
+static bool
+arm_cxx_class_data_always_comdat (void)
+{
+  /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
+     vague linkage if the class has no key function.  */
+  return !TARGET_AAPCS_BASED;
+}
+
+
+/* The EABI says __aeabi_atexit should be used to register static
+   destructors.  */
+
+static bool
+arm_cxx_use_aeabi_atexit (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+void
+arm_set_return_address (rtx source, rtx scratch)
+{
+  arm_stack_offsets *offsets;
+  HOST_WIDE_INT delta;
+  rtx addr;
+  unsigned long saved_regs;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs = offsets->saved_regs_mask;
+
+  if ((saved_regs & (1 << LR_REGNUM)) == 0)
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
+  else
+    {
+      if (frame_pointer_needed)
+	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
+      else
+	{
+	  /* LR will be the first saved register.  */
+	  delta = offsets->outgoing_args - (offsets->frame + 4);
+
+
+	  if (delta >= 4096)
+	    {
+	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
+				     GEN_INT (delta & ~4095)));
+	      addr = scratch;
+	      delta &= 4095;
+	    }
+	  else
+	    addr = stack_pointer_rtx;
+
+	  addr = plus_constant (Pmode, addr, delta);
+	}
+      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+    }
+}
+
+
+void
+thumb_set_return_address (rtx source, rtx scratch)
+{
+  arm_stack_offsets *offsets;
+  HOST_WIDE_INT delta;
+  HOST_WIDE_INT limit;
+  int reg;
+  rtx addr;
+  unsigned long mask;
+
+  emit_use (source);
+
+  offsets = arm_get_frame_offsets ();
+  mask = offsets->saved_regs_mask;
+  if (mask & (1 << LR_REGNUM))
+    {
+      limit = 1024;
+      /* Find the saved regs.  */
+      if (frame_pointer_needed)
+	{
+	  delta = offsets->soft_frame - offsets->saved_args;
+	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
+	  if (TARGET_THUMB1)
+	    limit = 128;
+	}
+      else
+	{
+	  delta = offsets->outgoing_args - offsets->saved_args;
+	  reg = SP_REGNUM;
+	}
+      /* Allow for the stack frame.  */
+      if (TARGET_THUMB1 && TARGET_BACKTRACE)
+	delta -= 16;
+      /* The link register is always the first saved register.  */
+      delta -= 4;
+
+      /* Construct the address.  */
+      addr = gen_rtx_REG (SImode, reg);
+      if (delta > limit)
+	{
+	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
+	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
+	  addr = scratch;
+	}
+      else
+	addr = plus_constant (Pmode, addr, delta);
+
+      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+    }
+  else
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+bool
+arm_vector_mode_supported_p (enum machine_mode mode)
+{
+  /* Neon also supports V2SImode, etc. listed in the clause below.  */
+  if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
+      || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
+    return true;
+
+  if ((TARGET_NEON || TARGET_IWMMXT)
+      && ((mode == V2SImode)
+	  || (mode == V4HImode)
+	  || (mode == V8QImode)))
+    return true;
+
+  if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
+      || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
+      || mode == V2HAmode))
+    return true;
+
+  return false;
+}
+
+/* Implements target hook array_mode_supported_p.  */
+
+static bool
+arm_array_mode_supported_p (enum machine_mode mode,
+			    unsigned HOST_WIDE_INT nelems)
+{
+  if (TARGET_NEON
+      && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+      && (nelems >= 2 && nelems <= 4))
+    return true;
+
+  return false;
+}
+
+/* Use the option -mvectorize-with-neon-double to override the use of quardword
+   registers when autovectorizing for Neon, at least until multiple vector
+   widths are supported properly by the middle-end.  */
+
+static enum machine_mode
+arm_preferred_simd_mode (enum machine_mode mode)
+{
+  if (TARGET_NEON)
+    switch (mode)
+      {
+      case SFmode:
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
+      case SImode:
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
+      case HImode:
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
+      case QImode:
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
+      case DImode:
+	if (!TARGET_NEON_VECTORIZE_DOUBLE)
+	  return V2DImode;
+	break;
+
+      default:;
+      }
+
+  if (TARGET_REALLY_IWMMXT)
+    switch (mode)
+      {
+      case SImode:
+	return V2SImode;
+      case HImode:
+	return V4HImode;
+      case QImode:
+	return V8QImode;
+
+      default:;
+      }
+
+  return word_mode;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
+
+   We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
+   using r0-r4 for function arguments, r7 for the stack frame and don't have
+   enough left over to do doubleword arithmetic.  For Thumb-2 all the
+   potentially problematic instructions accept high registers so this is not
+   necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
+   that require many low registers.  */
+static bool
+arm_class_likely_spilled_p (reg_class_t rclass)
+{
+  if ((TARGET_THUMB1 && rclass == LO_REGS)
+      || rclass  == CC_REG)
+    return true;
+
+  return false;
+}
+
+/* Implements target hook small_register_classes_for_mode_p.  */
+bool
+arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return TARGET_THUMB1;
+}
+
+/* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
+   ARM insns and therefore guarantee that the shift count is modulo 256.
+   DImode shifts (those implemented by lib1funcs.S or by optabs.c)
+   guarantee no particular behavior for out-of-range counts.  */
+
+static unsigned HOST_WIDE_INT
+arm_shift_truncation_mask (enum machine_mode mode)
+{
+  return mode == SImode ? 255 : 0;
+}
+
+
+/* Map internal gcc register numbers to DWARF2 register numbers.  */
+
+unsigned int
+arm_dbx_register_number (unsigned int regno)
+{
+  if (regno < 16)
+    return regno;
+
+  if (IS_VFP_REGNUM (regno))
+    {
+      /* See comment in arm_dwarf_register_span.  */
+      if (VFP_REGNO_OK_FOR_SINGLE (regno))
+	return 64 + regno - FIRST_VFP_REGNUM;
+      else
+	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
+    }
+
+  if (IS_IWMMXT_GR_REGNUM (regno))
+    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
+
+  if (IS_IWMMXT_REGNUM (regno))
+    return 112 + regno - FIRST_IWMMXT_REGNUM;
+
+  gcc_unreachable ();
+}
+
+/* Dwarf models VFPv3 registers as 32 64-bit registers.
+   GCC models tham as 64 32-bit registers, so we need to describe this to
+   the DWARF generation code.  Other registers can use the default.  */
+static rtx
+arm_dwarf_register_span (rtx rtl)
+{
+  enum machine_mode mode;
+  unsigned regno;
+  rtx parts[16];
+  int nregs;
+  int i;
+
+  regno = REGNO (rtl);
+  if (!IS_VFP_REGNUM (regno))
+    return NULL_RTX;
+
+  /* XXX FIXME: The EABI defines two VFP register ranges:
+	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
+	256-287: D0-D31
+     The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
+     corresponding D register.  Until GDB supports this, we shall use the
+     legacy encodings.  We also use these encodings for D0-D15 for
+     compatibility with older debuggers.  */
+  mode = GET_MODE (rtl);
+  if (GET_MODE_SIZE (mode) < 8)
+    return NULL_RTX;
+
+  if (VFP_REGNO_OK_FOR_SINGLE (regno))
+    {
+      nregs = GET_MODE_SIZE (mode) / 4;
+      for (i = 0; i < nregs; i += 2)
+	if (TARGET_BIG_END)
+	  {
+	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
+	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
+	  }
+	else
+	  {
+	    parts[i] = gen_rtx_REG (SImode, regno + i);
+	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
+	  }
+    }
+  else
+    {
+      nregs = GET_MODE_SIZE (mode) / 8;
+      for (i = 0; i < nregs; i++)
+	parts[i] = gen_rtx_REG (DImode, regno + i);
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
+}
+
+#if ARM_UNWIND_INFO
+/* Emit unwind directives for a store-multiple instruction or stack pointer
+   push during alignment.
+   These should only ever be generated by the function prologue code, so
+   expect them to have a particular form.
+   The store-multiple instruction sometimes pushes pc as the last register,
+   although it should not be tracked into unwind information, or for -Os
+   sometimes pushes some dummy registers before first register that needs
+   to be tracked in unwind information; such dummy registers are there just
+   to avoid separate stack adjustment, and will not be restored in the
+   epilogue.  */
+
+static void
+arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
+{
+  int i;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT nregs;
+  int reg_size;
+  unsigned reg;
+  unsigned lastreg;
+  unsigned padfirst = 0, padlast = 0;
+  rtx e;
+
+  e = XVECEXP (p, 0, 0);
+  gcc_assert (GET_CODE (e) == SET);
+
+  /* First insn will adjust the stack pointer.  */
+  gcc_assert (GET_CODE (e) == SET
+	      && REG_P (SET_DEST (e))
+	      && REGNO (SET_DEST (e)) == SP_REGNUM
+	      && GET_CODE (SET_SRC (e)) == PLUS);
+
+  offset = -INTVAL (XEXP (SET_SRC (e), 1));
+  nregs = XVECLEN (p, 0) - 1;
+  gcc_assert (nregs);
+
+  reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
+  if (reg < 16)
+    {
+      /* For -Os dummy registers can be pushed at the beginning to
+	 avoid separate stack pointer adjustment.  */
+      e = XVECEXP (p, 0, 1);
+      e = XEXP (SET_DEST (e), 0);
+      if (GET_CODE (e) == PLUS)
+	padfirst = INTVAL (XEXP (e, 1));
+      gcc_assert (padfirst == 0 || optimize_size);
+      /* The function prologue may also push pc, but not annotate it as it is
+	 never restored.  We turn this into a stack pointer adjustment.  */
+      e = XVECEXP (p, 0, nregs);
+      e = XEXP (SET_DEST (e), 0);
+      if (GET_CODE (e) == PLUS)
+	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
+      else
+	padlast = offset - 4;
+      gcc_assert (padlast == 0 || padlast == 4);
+      if (padlast == 4)
+	fprintf (asm_out_file, "\t.pad #4\n");
+      reg_size = 4;
+      fprintf (asm_out_file, "\t.save {");
+    }
+  else if (IS_VFP_REGNUM (reg))
+    {
+      reg_size = 8;
+      fprintf (asm_out_file, "\t.vsave {");
+    }
+  else
+    /* Unknown register type.  */
+    gcc_unreachable ();
+
+  /* If the stack increment doesn't match the size of the saved registers,
+     something has gone horribly wrong.  */
+  gcc_assert (offset == padfirst + nregs * reg_size + padlast);
+
+  offset = padfirst;
+  lastreg = 0;
+  /* The remaining insns will describe the stores.  */
+  for (i = 1; i <= nregs; i++)
+    {
+      /* Expect (set (mem <addr>) (reg)).
+         Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
+      e = XVECEXP (p, 0, i);
+      gcc_assert (GET_CODE (e) == SET
+		  && MEM_P (SET_DEST (e))
+		  && REG_P (SET_SRC (e)));
+
+      reg = REGNO (SET_SRC (e));
+      gcc_assert (reg >= lastreg);
+
+      if (i != 1)
+	fprintf (asm_out_file, ", ");
+      /* We can't use %r for vfp because we need to use the
+	 double precision register names.  */
+      if (IS_VFP_REGNUM (reg))
+	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
+      else
+	asm_fprintf (asm_out_file, "%r", reg);
+
+#ifdef ENABLE_CHECKING
+      /* Check that the addresses are consecutive.  */
+      e = XEXP (SET_DEST (e), 0);
+      if (GET_CODE (e) == PLUS)
+	gcc_assert (REG_P (XEXP (e, 0))
+		    && REGNO (XEXP (e, 0)) == SP_REGNUM
+		    && CONST_INT_P (XEXP (e, 1))
+		    && offset == INTVAL (XEXP (e, 1)));
+      else
+	gcc_assert (i == 1
+		    && REG_P (e)
+		    && REGNO (e) == SP_REGNUM);
+      offset += reg_size;
+#endif
+    }
+  fprintf (asm_out_file, "}\n");
+  if (padfirst)
+    fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
+}
+
+/*  Emit unwind directives for a SET.  */
+
+static void
+arm_unwind_emit_set (FILE * asm_out_file, rtx p)
+{
+  rtx e0;
+  rtx e1;
+  unsigned reg;
+
+  e0 = XEXP (p, 0);
+  e1 = XEXP (p, 1);
+  switch (GET_CODE (e0))
+    {
+    case MEM:
+      /* Pushing a single register.  */
+      if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
+	  || !REG_P (XEXP (XEXP (e0, 0), 0))
+	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
+	abort ();
+
+      asm_fprintf (asm_out_file, "\t.save ");
+      if (IS_VFP_REGNUM (REGNO (e1)))
+	asm_fprintf(asm_out_file, "{d%d}\n",
+		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
+      else
+	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
+      break;
+
+    case REG:
+      if (REGNO (e0) == SP_REGNUM)
+	{
+	  /* A stack increment.  */
+	  if (GET_CODE (e1) != PLUS
+	      || !REG_P (XEXP (e1, 0))
+	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
+	      || !CONST_INT_P (XEXP (e1, 1)))
+	    abort ();
+
+	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
+		       -INTVAL (XEXP (e1, 1)));
+	}
+      else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
+	{
+	  HOST_WIDE_INT offset;
+
+	  if (GET_CODE (e1) == PLUS)
+	    {
+	      if (!REG_P (XEXP (e1, 0))
+		  || !CONST_INT_P (XEXP (e1, 1)))
+		abort ();
+	      reg = REGNO (XEXP (e1, 0));
+	      offset = INTVAL (XEXP (e1, 1));
+	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
+			   HARD_FRAME_POINTER_REGNUM, reg,
+			   offset);
+	    }
+	  else if (REG_P (e1))
+	    {
+	      reg = REGNO (e1);
+	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
+			   HARD_FRAME_POINTER_REGNUM, reg);
+	    }
+	  else
+	    abort ();
+	}
+      else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
+	{
+	  /* Move from sp to reg.  */
+	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
+	}
+     else if (GET_CODE (e1) == PLUS
+	      && REG_P (XEXP (e1, 0))
+	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
+	      && CONST_INT_P (XEXP (e1, 1)))
+	{
+	  /* Set reg to offset from sp.  */
+	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
+		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
+	}
+      else
+	abort ();
+      break;
+
+    default:
+      abort ();
+    }
+}
+
+
+/* Emit unwind directives for the given insn.  */
+
+static void
+arm_unwind_emit (FILE * asm_out_file, rtx insn)
+{
+  rtx note, pat;
+  bool handled_one = false;
+
+  if (arm_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  if (!(flag_unwind_tables || crtl->uses_eh_lsda)
+      && (TREE_NOTHROW (current_function_decl)
+	  || crtl->all_throwers_are_sibcalls))
+    return;
+
+  if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
+    return;
+
+  for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
+    {
+      switch (REG_NOTE_KIND (note))
+	{
+	case REG_FRAME_RELATED_EXPR:
+	  pat = XEXP (note, 0);
+	  goto found;
+
+	case REG_CFA_REGISTER:
+	  pat = XEXP (note, 0);
+	  if (pat == NULL)
+	    {
+	      pat = PATTERN (insn);
+	      if (GET_CODE (pat) == PARALLEL)
+		pat = XVECEXP (pat, 0, 0);
+	    }
+
+	  /* Only emitted for IS_STACKALIGN re-alignment.  */
+	  {
+	    rtx dest, src;
+	    unsigned reg;
+
+	    src = SET_SRC (pat);
+	    dest = SET_DEST (pat);
+
+	    gcc_assert (src == stack_pointer_rtx);
+	    reg = REGNO (dest);
+	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
+			 reg + 0x90, reg);
+	  }
+	  handled_one = true;
+	  break;
+
+	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
+	   to get correct dwarf information for shrink-wrap.  We should not
+	   emit unwind information for it because these are used either for
+	   pretend arguments or notes to adjust sp and restore registers from
+	   stack.  */
+	case REG_CFA_DEF_CFA:
+	case REG_CFA_ADJUST_CFA:
+	case REG_CFA_RESTORE:
+	  return;
+
+	case REG_CFA_EXPRESSION:
+	case REG_CFA_OFFSET:
+	  /* ??? Only handling here what we actually emit.  */
+	  gcc_unreachable ();
+
+	default:
+	  break;
+	}
+    }
+  if (handled_one)
+    return;
+  pat = PATTERN (insn);
+ found:
+
+  switch (GET_CODE (pat))
+    {
+    case SET:
+      arm_unwind_emit_set (asm_out_file, pat);
+      break;
+
+    case SEQUENCE:
+      /* Store multiple.  */
+      arm_unwind_emit_sequence (asm_out_file, pat);
+      break;
+
+    default:
+      abort();
+    }
+}
+
+
+/* Output a reference from a function exception table to the type_info
+   object X.  The EABI specifies that the symbol should be relocated by
+   an R_ARM_TARGET2 relocation.  */
+
+static bool
+arm_output_ttype (rtx x)
+{
+  fputs ("\t.word\t", asm_out_file);
+  output_addr_const (asm_out_file, x);
+  /* Use special relocations for symbol references.  */
+  if (!CONST_INT_P (x))
+    fputs ("(TARGET2)", asm_out_file);
+  fputc ('\n', asm_out_file);
+
+  return TRUE;
+}
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
+
+static void
+arm_asm_emit_except_personality (rtx personality)
+{
+  fputs ("\t.personality\t", asm_out_file);
+  output_addr_const (asm_out_file, personality);
+  fputc ('\n', asm_out_file);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
+
+static void
+arm_asm_init_sections (void)
+{
+  exception_section = get_unnamed_section (0, output_section_asm_op,
+					   "\t.handlerdata");
+}
+#endif /* ARM_UNWIND_INFO */
+
+/* Output unwind directives for the start/end of a function.  */
+
+void
+arm_output_fn_unwind (FILE * f, bool prologue)
+{
+  if (arm_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  if (prologue)
+    fputs ("\t.fnstart\n", f);
+  else
+    {
+      /* If this function will never be unwound, then mark it as such.
+         The came condition is used in arm_unwind_emit to suppress
+	 the frame annotations.  */
+      if (!(flag_unwind_tables || crtl->uses_eh_lsda)
+	  && (TREE_NOTHROW (current_function_decl)
+	      || crtl->all_throwers_are_sibcalls))
+	fputs("\t.cantunwind\n", f);
+
+      fputs ("\t.fnend\n", f);
+    }
+}
+
+static bool
+arm_emit_tls_decoration (FILE *fp, rtx x)
+{
+  enum tls_reloc reloc;
+  rtx val;
+
+  val = XVECEXP (x, 0, 0);
+  reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
+
+  output_addr_const (fp, val);
+
+  switch (reloc)
+    {
+    case TLS_GD32:
+      fputs ("(tlsgd)", fp);
+      break;
+    case TLS_LDM32:
+      fputs ("(tlsldm)", fp);
+      break;
+    case TLS_LDO32:
+      fputs ("(tlsldo)", fp);
+      break;
+    case TLS_IE32:
+      fputs ("(gottpoff)", fp);
+      break;
+    case TLS_LE32:
+      fputs ("(tpoff)", fp);
+      break;
+    case TLS_DESCSEQ:
+      fputs ("(tlsdesc)", fp);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (reloc)
+    {
+    case TLS_GD32:
+    case TLS_LDM32:
+    case TLS_IE32:
+    case TLS_DESCSEQ:
+      fputs (" + (. - ", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 2));
+      /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
+      fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 3));
+      fputc (')', fp);
+      break;
+    default:
+      break;
+    }
+
+  return TRUE;
+}
+
+/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void
+arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.word\t", file);
+  output_addr_const (file, x);
+  fputs ("(tlsldo)", file);
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+arm_output_addr_const_extra (FILE *fp, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return arm_emit_tls_decoration (fp, x);
+  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
+    {
+      char label[256];
+      int labelno = INTVAL (XVECEXP (x, 0, 0));
+
+      ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
+      assemble_name_raw (fp, label);
+
+      return TRUE;
+    }
+  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
+    {
+      assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
+      if (GOT_PCREL)
+	fputs ("+.", fp);
+      fputs ("-(", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 0));
+      fputc (')', fp);
+      return TRUE;
+    }
+  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
+    {
+      output_addr_const (fp, XVECEXP (x, 0, 0));
+      if (GOT_PCREL)
+        fputs ("+.", fp);
+      fputs ("-(", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 1));
+      fputc (')', fp);
+      return TRUE;
+    }
+  else if (GET_CODE (x) == CONST_VECTOR)
+    return arm_emit_vector_const (fp, x);
+
+  return FALSE;
+}
+
+/* Output assembly for a shift instruction.
+   SET_FLAGS determines how the instruction modifies the condition codes.
+   0 - Do not set condition codes.
+   1 - Set condition codes.
+   2 - Use smallest instruction.  */
+const char *
+arm_output_shift(rtx * operands, int set_flags)
+{
+  char pattern[100];
+  static const char flag_chars[3] = {'?', '.', '!'};
+  const char *shift;
+  HOST_WIDE_INT val;
+  char c;
+
+  c = flag_chars[set_flags];
+  if (TARGET_UNIFIED_ASM)
+    {
+      shift = shift_op(operands[3], &val);
+      if (shift)
+	{
+	  if (val != -1)
+	    operands[2] = GEN_INT(val);
+	  sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
+	}
+      else
+	sprintf (pattern, "mov%%%c\t%%0, %%1", c);
+    }
+  else
+    sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Output assembly for a WMMX immediate shift instruction.  */
+const char *
+arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
+{
+  int shift = INTVAL (operands[2]);
+  char templ[50];
+  enum machine_mode opmode = GET_MODE (operands[0]);
+
+  gcc_assert (shift >= 0);
+
+  /* If the shift value in the register versions is > 63 (for D qualifier),
+     31 (for W qualifier) or 15 (for H qualifier).  */
+  if (((opmode == V4HImode) && (shift > 15))
+	|| ((opmode == V2SImode) && (shift > 31))
+	|| ((opmode == DImode) && (shift > 63)))
+  {
+    if (wror_or_wsra)
+      {
+        sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
+        output_asm_insn (templ, operands);
+        if (opmode == DImode)
+          {
+	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
+	    output_asm_insn (templ, operands);
+          }
+      }
+    else
+      {
+        /* The destination register will contain all zeros.  */
+        sprintf (templ, "wzero\t%%0");
+        output_asm_insn (templ, operands);
+      }
+    return "";
+  }
+
+  if ((opmode == DImode) && (shift > 32))
+    {
+      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
+      output_asm_insn (templ, operands);
+      sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
+      output_asm_insn (templ, operands);
+    }
+  else
+    {
+      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
+      output_asm_insn (templ, operands);
+    }
+  return "";
+}
+
+/* Output assembly for a WMMX tinsr instruction.  */
+const char *
+arm_output_iwmmxt_tinsr (rtx *operands)
+{
+  int mask = INTVAL (operands[3]);
+  int i;
+  char templ[50];
+  int units = mode_nunits[GET_MODE (operands[0])];
+  gcc_assert ((mask & (mask - 1)) == 0);
+  for (i = 0; i < units; ++i)
+    {
+      if ((mask & 0x01) == 1)
+        {
+          break;
+        }
+      mask >>= 1;
+    }
+  gcc_assert (i < units);
+  {
+    switch (GET_MODE (operands[0]))
+      {
+      case V8QImode:
+	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
+	break;
+      case V4HImode:
+	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
+	break;
+      case V2SImode:
+	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
+	break;
+      default:
+	gcc_unreachable ();
+	break;
+      }
+    output_asm_insn (templ, operands);
+  }
+  return "";
+}
+
+/* Output a Thumb-1 casesi dispatch sequence.  */
+const char *
+thumb1_output_casesi (rtx *operands)
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE(diff_vec))
+    {
+    case QImode:
+      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
+	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
+    case HImode:
+      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
+	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
+    case SImode:
+      return "bl\t%___gnu_thumb1_case_si";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output a Thumb-2 casesi instruction.  */
+const char *
+thumb2_output_casesi (rtx *operands)
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  output_asm_insn ("cmp\t%0, %1", operands);
+  output_asm_insn ("bhi\t%l3", operands);
+  switch (GET_MODE(diff_vec))
+    {
+    case QImode:
+      return "tbb\t[%|pc, %0]";
+    case HImode:
+      return "tbh\t[%|pc, %0, lsl #1]";
+    case SImode:
+      if (flag_pic)
+	{
+	  output_asm_insn ("adr\t%4, %l2", operands);
+	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
+	  output_asm_insn ("add\t%4, %4, %5", operands);
+	  return "bx\t%4";
+	}
+      else
+	{
+	  output_asm_insn ("adr\t%4, %l2", operands);
+	  return "ldr\t%|pc, [%4, %0, lsl #2]";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Most ARM cores are single issue, but some newer ones can dual issue.
+   The scheduler descriptions rely on this being correct.  */
+static int
+arm_issue_rate (void)
+{
+  switch (arm_tune)
+    {
+    case cortexa15:
+    case cortexa57:
+      return 3;
+
+    case cortexr4:
+    case cortexr4f:
+    case cortexr5:
+    case genericv7a:
+    case cortexa5:
+    case cortexa7:
+    case cortexa8:
+    case cortexa9:
+    case cortexa12:
+    case cortexa53:
+    case fa726te:
+    case marvell_pj4:
+      return 2;
+
+    default:
+      return 1;
+    }
+}
+
+/* A table and a function to perform ARM-specific name mangling for
+   NEON vector types in order to conform to the AAPCS (see "Procedure
+   Call Standard for the ARM Architecture", Appendix A).  To qualify
+   for emission with the mangled names defined in that document, a
+   vector type must not only be of the correct mode but also be
+   composed of NEON vector element types (e.g. __builtin_neon_qi).  */
+typedef struct
+{
+  enum machine_mode mode;
+  const char *element_type_name;
+  const char *aapcs_name;
+} arm_mangle_map_entry;
+
+static arm_mangle_map_entry arm_mangle_map[] = {
+  /* 64-bit containerized types.  */
+  { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
+  { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
+  { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
+  { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
+  { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
+  { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
+  { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
+  { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
+  { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
+  { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
+
+  /* 128-bit containerized types.  */
+  { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
+  { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
+  { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
+  { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
+  { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
+  { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
+  { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
+  { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
+  { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
+  { VOIDmode, NULL, NULL }
+};
+
+const char *
+arm_mangle_type (const_tree type)
+{
+  arm_mangle_map_entry *pos = arm_mangle_map;
+
+  /* The ARM ABI documents (10th October 2008) say that "__va_list"
+     has to be managled as if it is in the "std" namespace.  */
+  if (TARGET_AAPCS_BASED
+      && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
+    return "St9__va_list";
+
+  /* Half-precision float.  */
+  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+    return "Dh";
+
+  if (TREE_CODE (type) != VECTOR_TYPE)
+    return NULL;
+
+  /* Check the mode of the vector type, and the name of the vector
+     element type, against the table.  */
+  while (pos->mode != VOIDmode)
+    {
+      tree elt_type = TREE_TYPE (type);
+
+      if (pos->mode == TYPE_MODE (type)
+	  && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
+	  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
+		      pos->element_type_name))
+        return pos->aapcs_name;
+
+      pos++;
+    }
+
+  /* Use the default mangling for unrecognized (possibly user-defined)
+     vector types.  */
+  return NULL;
+}
+
+/* Order of allocation of core registers for Thumb: this allocation is
+   written over the corresponding initial entries of the array
+   initialized with REG_ALLOC_ORDER.  We allocate all low registers
+   first.  Saving and restoring a low register is usually cheaper than
+   using a call-clobbered high register.  */
+
+static const int thumb_core_reg_alloc_order[] =
+{
+   3,  2,  1,  0,  4,  5,  6,  7,
+  14, 12,  8,  9, 10, 11
+};
+
+/* Adjust register allocation order when compiling for Thumb.  */
+
+void
+arm_order_regs_for_local_alloc (void)
+{
+  const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
+  memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
+  if (TARGET_THUMB)
+    memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
+            sizeof (thumb_core_reg_alloc_order));
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+bool
+arm_frame_pointer_required (void)
+{
+  return (cfun->has_nonlocal_label
+          || SUBTARGET_FRAME_POINTER_REQUIRED
+          || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
+}
+
+/* Only thumb1 can't support conditional execution, so return true if
+   the target is not thumb1.  */
+static bool
+arm_have_conditional_execution (void)
+{
+  return !TARGET_THUMB1;
+}
+
+tree
+arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
+   decl of the vectorized builtin for the appropriate vector mode.
+   NULL_TREE is returned if no such builtin is available.  */
+#undef ARM_CHECK_BUILTIN_MODE
+#define ARM_CHECK_BUILTIN_MODE(C) \
+  (out_mode == SFmode && out_n == C \
+   && in_mode == SFmode && in_n == C)
+
+#undef ARM_FIND_VRINT_VARIANT
+#define ARM_FIND_VRINT_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
+    : (ARM_CHECK_BUILTIN_MODE (4) \
+      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
+      : NULL_TREE))
+
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+        {
+          case BUILT_IN_FLOORF:
+            return ARM_FIND_VRINT_VARIANT (vrintm);
+          case BUILT_IN_CEILF:
+            return ARM_FIND_VRINT_VARIANT (vrintp);
+          case BUILT_IN_TRUNCF:
+            return ARM_FIND_VRINT_VARIANT (vrintz);
+          case BUILT_IN_ROUNDF:
+            return ARM_FIND_VRINT_VARIANT (vrinta);
+          default:
+            return NULL_TREE;
+        }
+    }
+  return NULL_TREE;
+}
+#undef ARM_CHECK_BUILTIN_MODE
+#undef ARM_FIND_VRINT_VARIANT
+
+/* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
+static HOST_WIDE_INT
+arm_vector_alignment (const_tree type)
+{
+  HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
+
+  if (TARGET_AAPCS_BASED)
+    align = MIN (align, 64);
+
+  return align;
+}
+
+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
+}
+
+static bool
+arm_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+  /* Vectors which aren't in packed structures will not be less aligned than
+     the natural alignment of their element type, so this is safe.  */
+  if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
+    return !is_packed;
+
+  return default_builtin_vector_alignment_reachable (type, is_packed);
+}
+
+static bool
+arm_builtin_support_vector_misalignment (enum machine_mode mode,
+					 const_tree type, int misalignment,
+					 bool is_packed)
+{
+  if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
+
+      if (is_packed)
+        return align == 1;
+
+      /* If the misalignment is unknown, we should be able to handle the access
+	 so long as it is not to a member of a packed data structure.  */
+      if (misalignment == -1)
+        return true;
+
+      /* Return true if the misalignment is a multiple of the natural alignment
+         of the vector's element type.  This is probably always going to be
+	 true in practice, since we've already established that this isn't a
+	 packed access.  */
+      return ((misalignment % align) == 0);
+    }
+
+  return default_builtin_support_vector_misalignment (mode, type, misalignment,
+						      is_packed);
+}
+
+static void
+arm_conditional_register_usage (void)
+{
+  int regno;
+
+  if (TARGET_THUMB1 && optimize_size)
+    {
+      /* When optimizing for size on Thumb-1, it's better not
+        to use the HI regs, because of the overhead of
+        stacking them.  */
+      for (regno = FIRST_HI_REGNUM;
+	   regno <= LAST_HI_REGNUM; ++regno)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+    }
+
+  /* The link register can be clobbered by any branch insn,
+     but we have no way to track that at present, so mark
+     it as unavailable.  */
+  if (TARGET_THUMB1)
+    fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
+
+  if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      /* VFPv3 registers are disabled when earlier VFP
+	 versions are selected due to the definition of
+	 LAST_VFP_REGNUM.  */
+      for (regno = FIRST_VFP_REGNUM;
+	   regno <= LAST_VFP_REGNUM; ++ regno)
+	{
+	  fixed_regs[regno] = 0;
+	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
+	    || regno >= FIRST_VFP_REGNUM + 32;
+	}
+    }
+
+  if (TARGET_REALLY_IWMMXT)
+    {
+      regno = FIRST_IWMMXT_GR_REGNUM;
+      /* The 2002/10/09 revision of the XScale ABI has wCG0
+         and wCG1 as call-preserved registers.  The 2002/11/21
+         revision changed this so that all wCG registers are
+         scratch registers.  */
+      for (regno = FIRST_IWMMXT_GR_REGNUM;
+	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
+	fixed_regs[regno] = 0;
+      /* The XScale ABI has wR0 - wR9 as scratch registers,
+	 the rest as call-preserved registers.  */
+      for (regno = FIRST_IWMMXT_REGNUM;
+	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
+	{
+	  fixed_regs[regno] = 0;
+	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
+	}
+    }
+
+  if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  else if (TARGET_APCS_STACK)
+    {
+      fixed_regs[10]     = 1;
+      call_used_regs[10] = 1;
+    }
+  /* -mcaller-super-interworking reserves r11 for calls to
+     _interwork_r11_call_via_rN().  Making the register global
+     is an easy way of ensuring that it remains valid for all
+     calls.  */
+  if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
+      || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
+    {
+      fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+      call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+      if (TARGET_CALLER_INTERWORKING)
+	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+    }
+  SUBTARGET_CONDITIONAL_REGISTER_USAGE
+}
+
+static reg_class_t
+arm_preferred_rename_class (reg_class_t rclass)
+{
+  /* Thumb-2 instructions using LO_REGS may be smaller than instructions
+     using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
+     and code size can be reduced.  */
+  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
+    return LO_REGS;
+  else
+    return NO_REGS;
+}
+
+/* Compute the atrribute "length" of insn "*push_multi".
+   So this function MUST be kept in sync with that insn pattern.  */
+int
+arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
+{
+  int i, regno, hi_reg;
+  int num_saves = XVECLEN (parallel_op, 0);
+
+  /* ARM mode.  */
+  if (TARGET_ARM)
+    return 4;
+  /* Thumb1 mode.  */
+  if (TARGET_THUMB1)
+    return 2;
+
+  /* Thumb2 mode.  */
+  regno = REGNO (first_op);
+  hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+  for (i = 1; i < num_saves && !hi_reg; i++)
+    {
+      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
+      hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+    }
+
+  if (!hi_reg)
+    return 2;
+  return 4;
+}
+
+/* Compute the number of instructions emitted by output_move_double.  */
+int
+arm_count_output_move_double_insns (rtx *operands)
+{
+  int count;
+  rtx ops[2];
+  /* output_move_double may modify the operands array, so call it
+     here on a copy of the array.  */
+  ops[0] = operands[0];
+  ops[1] = operands[1];
+  output_move_double (ops, false, &count);
+  return count;
+}
+
+int
+vfp3_const_double_for_fract_bits (rtx operand)
+{
+  REAL_VALUE_TYPE r0;
+  
+  if (!CONST_DOUBLE_P (operand))
+    return 0;
+  
+  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
+  if (exact_real_inverse (DFmode, &r0))
+    {
+      if (exact_real_truncate (DFmode, &r0))
+	{
+	  HOST_WIDE_INT value = real_to_integer (&r0);
+	  value = value & 0xffffffff;
+	  if ((value != 0) && ( (value & (value - 1)) == 0))
+	    return int_log2 (value);
+	}
+    }
+  return 0;
+}
+
+int
+vfp3_const_double_for_bits (rtx operand)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (operand))
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
+  if (exact_real_truncate (DFmode, &r0))
+    {
+      HOST_WIDE_INT value = real_to_integer (&r0);
+      value = value & 0xffffffff;
+      if ((value != 0) && ( (value & (value - 1)) == 0))
+	return int_log2 (value);
+    }
+
+  return 0;
+}
+
+/* Emit a memory barrier around an atomic sequence according to MODEL.  */
+
+static void
+arm_pre_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, true))
+    emit_insn (gen_memory_barrier ());
+}
+
+static void
+arm_post_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, false))
+    emit_insn (gen_memory_barrier ());
+}
+
+/* Emit the load-exclusive and store-exclusive instructions.
+   Use acquire and release versions if necessary.  */
+
+static void
+arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
+{
+  rtx (*gen) (rtx, rtx);
+
+  if (acq)
+    {
+      switch (mode)
+        {
+        case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
+        case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
+        case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
+        case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
+        default:
+          gcc_unreachable ();
+        }
+    }
+  else
+    {
+      switch (mode)
+        {
+        case QImode: gen = gen_arm_load_exclusiveqi; break;
+        case HImode: gen = gen_arm_load_exclusivehi; break;
+        case SImode: gen = gen_arm_load_exclusivesi; break;
+        case DImode: gen = gen_arm_load_exclusivedi; break;
+        default:
+          gcc_unreachable ();
+        }
+    }
+
+  emit_insn (gen (rval, mem));
+}
+
+static void
+arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
+                          rtx mem, bool rel)
+{
+  rtx (*gen) (rtx, rtx, rtx);
+
+  if (rel)
+    {
+      switch (mode)
+        {
+        case QImode: gen = gen_arm_store_release_exclusiveqi; break;
+        case HImode: gen = gen_arm_store_release_exclusivehi; break;
+        case SImode: gen = gen_arm_store_release_exclusivesi; break;
+        case DImode: gen = gen_arm_store_release_exclusivedi; break;
+        default:
+          gcc_unreachable ();
+        }
+    }
+  else
+    {
+      switch (mode)
+        {
+        case QImode: gen = gen_arm_store_exclusiveqi; break;
+        case HImode: gen = gen_arm_store_exclusivehi; break;
+        case SImode: gen = gen_arm_store_exclusivesi; break;
+        case DImode: gen = gen_arm_store_exclusivedi; break;
+        default:
+          gcc_unreachable ();
+        }
+    }
+
+  emit_insn (gen (bval, rval, mem));
+}
+
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+emit_unlikely_jump (rtx insn)
+{
+  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+
+  insn = emit_jump_insn (insn);
+  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
+}
+
+/* Expand a compare and swap pattern.  */
+
+void
+arm_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+  enum machine_mode mode;
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+
+  /* Normally the succ memory model must be stronger than fail, but in the
+     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
+     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
+
+  if (TARGET_HAVE_LDACQ
+      && INTVAL (mod_f) == MEMMODEL_ACQUIRE
+      && INTVAL (mod_s) == MEMMODEL_RELEASE)
+    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+      /* For narrow modes, we're going to perform the comparison in SImode,
+	 so do the zero-extension now.  */
+      rval = gen_reg_rtx (SImode);
+      oldval = convert_modes (SImode, mode, oldval, true);
+      /* FALLTHRU */
+
+    case SImode:
+      /* Force the value into a register if needed.  We waited until after
+	 the zero-extension above to do this properly.  */
+      if (!arm_add_operand (oldval, SImode))
+	oldval = force_reg (SImode, oldval);
+      break;
+
+    case DImode:
+      if (!cmpdi_operand (oldval, mode))
+	oldval = force_reg (mode, oldval);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (mode)
+    {
+    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
+    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
+    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
+    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+
+  if (mode == QImode || mode == HImode)
+    emit_move_insn (operands[1], gen_lowpart (mode, rval));
+
+  /* In all cases, we arrange for success to be signaled by Z set.
+     This arrangement allows for the boolean result to be used directly
+     in a subsequent branch, post optimization.  */
+  x = gen_rtx_REG (CCmode, CC_REGNUM);
+  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, bval, x));
+}
+
+/* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
+   another memory store between the load-exclusive and store-exclusive can
+   reset the monitor from Exclusive to Open state.  This means we must wait
+   until after reload to split the pattern, lest we get a register spill in
+   the middle of the atomic sequence.  */
+
+void
+arm_split_compare_and_swap (rtx operands[])
+{
+  rtx rval, mem, oldval, newval, scratch;
+  enum machine_mode mode;
+  enum memmodel mod_s, mod_f;
+  bool is_weak;
+  rtx label1, label2, x, cond;
+
+  rval = operands[0];
+  mem = operands[1];
+  oldval = operands[2];
+  newval = operands[3];
+  is_weak = (operands[4] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[5]);
+  mod_f = (enum memmodel) INTVAL (operands[6]);
+  scratch = operands[7];
+  mode = GET_MODE (mem);
+
+  bool use_acquire = TARGET_HAVE_LDACQ
+                     && !(mod_s == MEMMODEL_RELAXED
+                          || mod_s == MEMMODEL_CONSUME
+                          || mod_s == MEMMODEL_RELEASE);
+
+  bool use_release = TARGET_HAVE_LDACQ
+                     && !(mod_s == MEMMODEL_RELAXED
+                          || mod_s == MEMMODEL_CONSUME
+                          || mod_s == MEMMODEL_ACQUIRE);
+
+  /* Checks whether a barrier is needed and emits one accordingly.  */
+  if (!(use_acquire || use_release))
+    arm_pre_atomic_barrier (mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_label_rtx ();
+      emit_label (label1);
+    }
+  label2 = gen_label_rtx ();
+
+  arm_emit_load_exclusive (mode, rval, mem, use_acquire);
+
+  cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+  emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+
+  arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
+
+  /* Weak or strong, we want EQ to be true for success, so that we
+     match the flags that we got from the compare above.  */
+  cond = gen_rtx_REG (CCmode, CC_REGNUM);
+  x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+
+  if (!is_weak)
+    {
+      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+      emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (label2);
+
+  /* Checks whether a barrier is needed and emits one accordingly.  */
+  if (!(use_acquire || use_release))
+    arm_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (label2);
+}
+
+void
+arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
+		     rtx value, rtx model_rtx, rtx cond)
+{
+  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
+  enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode wmode = (mode == DImode ? DImode : SImode);
+  rtx label, x;
+
+  bool use_acquire = TARGET_HAVE_LDACQ
+                     && !(model == MEMMODEL_RELAXED
+                          || model == MEMMODEL_CONSUME
+                          || model == MEMMODEL_RELEASE);
+
+  bool use_release = TARGET_HAVE_LDACQ
+                     && !(model == MEMMODEL_RELAXED
+                          || model == MEMMODEL_CONSUME
+                          || model == MEMMODEL_ACQUIRE);
+
+  /* Checks whether a barrier is needed and emits one accordingly.  */
+  if (!(use_acquire || use_release))
+    arm_pre_atomic_barrier (model);
+
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  if (new_out)
+    new_out = gen_lowpart (wmode, new_out);
+  if (old_out)
+    old_out = gen_lowpart (wmode, old_out);
+  else
+    old_out = new_out;
+  value = simplify_gen_subreg (wmode, value, mode, 0);
+
+  arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
+
+  switch (code)
+    {
+    case SET:
+      new_out = value;
+      break;
+
+    case NOT:
+      x = gen_rtx_AND (wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      x = gen_rtx_NOT (wmode, new_out);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+
+    case MINUS:
+      if (CONST_INT_P (value))
+	{
+	  value = GEN_INT (-INTVAL (value));
+	  code = PLUS;
+	}
+      /* FALLTHRU */
+
+    case PLUS:
+      if (mode == DImode)
+	{
+	  /* DImode plus/minus need to clobber flags.  */
+	  /* The adddi3 and subdi3 patterns are incorrectly written so that
+	     they require matching operands, even when we could easily support
+	     three operands.  Thankfully, this can be fixed up post-splitting,
+	     as the individual add+adc patterns do accept three operands and
+	     post-reload cprop can make these moves go away.  */
+	  emit_move_insn (new_out, old_out);
+	  if (code == PLUS)
+	    x = gen_adddi3 (new_out, new_out, value);
+	  else
+	    x = gen_subdi3 (new_out, new_out, value);
+	  emit_insn (x);
+	  break;
+	}
+      /* FALLTHRU */
+
+    default:
+      x = gen_rtx_fmt_ee (code, wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+    }
+
+  arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
+                            use_release);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
+
+  /* Checks whether a barrier is needed and emits one accordingly.  */
+  if (!(use_acquire || use_release))
+    arm_post_atomic_barrier (model);
+}
+
+#define MAX_VECT_LEN 16
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool one_vector_p;
+  bool testing_p;
+};
+
+/* Generate a variable permutation.  */
+
+static void
+arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  bool one_vector_p = rtx_equal_p (op0, op1);
+
+  gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
+  gcc_checking_assert (GET_MODE (op0) == vmode);
+  gcc_checking_assert (GET_MODE (op1) == vmode);
+  gcc_checking_assert (GET_MODE (sel) == vmode);
+  gcc_checking_assert (TARGET_NEON);
+
+  if (one_vector_p)
+    {
+      if (vmode == V8QImode)
+	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
+      else
+	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
+    }
+  else
+    {
+      rtx pair;
+
+      if (vmode == V8QImode)
+	{
+	  pair = gen_reg_rtx (V16QImode);
+	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
+	  pair = gen_lowpart (TImode, pair);
+	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
+	}
+      else
+	{
+	  pair = gen_reg_rtx (OImode);
+	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
+	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
+	}
+    }
+}
+
+void
+arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
+  bool one_vector_p = rtx_equal_p (op0, op1);
+  rtx rmask[MAX_VECT_LEN], mask;
+
+  /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
+     numbering of elements for big-endian, we must reverse the order.  */
+  gcc_checking_assert (!BYTES_BIG_ENDIAN);
+
+  /* The VTBL instruction does not use a modulo index, so we must take care
+     of that ourselves.  */
+  mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
+  for (i = 0; i < nelt; ++i)
+    rmask[i] = mask;
+  mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
+  sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
+
+  arm_expand_vec_perm_1 (target, op0, op1, sel);
+}
+
+/* Generate or test for an insn that supports a constant permutation.  */
+
+/* Recognize patterns for the VUZP insns.  */
+
+static bool
+arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
+{
+  unsigned int i, odd, mask, nelt = d->nelt;
+  rtx out0, out1, in0, in1, x;
+  rtx (*gen)(rtx, rtx, rtx, rtx);
+
+  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
+    return false;
+
+  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
+  if (d->perm[0] == 0)
+    odd = 0;
+  else if (d->perm[0] == 1)
+    odd = 1;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt; i++)
+    {
+      unsigned elt = (i * 2 + odd) & mask;
+      if (d->perm[i] != elt)
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  switch (d->vmode)
+    {
+    case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
+    case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
+    case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
+    case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
+    case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
+    case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
+    case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
+    case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
+    default:
+      gcc_unreachable ();
+    }
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      odd = !odd;
+    }
+
+  out0 = d->target;
+  out1 = gen_reg_rtx (d->vmode);
+  if (odd)
+    x = out0, out0 = out1, out1 = x;
+
+  emit_insn (gen (out0, in0, in1, out1));
+  return true;
+}
+
+/* Recognize patterns for the VZIP insns.  */
+
+static bool
+arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
+{
+  unsigned int i, high, mask, nelt = d->nelt;
+  rtx out0, out1, in0, in1, x;
+  rtx (*gen)(rtx, rtx, rtx, rtx);
+
+  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
+    return false;
+
+  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
+  high = nelt / 2;
+  if (d->perm[0] == high)
+    ;
+  else if (d->perm[0] == 0)
+    high = 0;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt / 2; i++)
+    {
+      unsigned elt = (i + high) & mask;
+      if (d->perm[i * 2] != elt)
+	return false;
+      elt = (elt + nelt) & mask;
+      if (d->perm[i * 2 + 1] != elt)
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  switch (d->vmode)
+    {
+    case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
+    case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
+    case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
+    case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
+    case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
+    case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
+    case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
+    case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
+    default:
+      gcc_unreachable ();
+    }
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      high = !high;
+    }
+
+  out0 = d->target;
+  out1 = gen_reg_rtx (d->vmode);
+  if (high)
+    x = out0, out0 = out1, out1 = x;
+
+  emit_insn (gen (out0, in0, in1, out1));
+  return true;
+}
+
+/* Recognize patterns for the VREV insns.  */
+
+static bool
+arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
+{
+  unsigned int i, j, diff, nelt = d->nelt;
+  rtx (*gen)(rtx, rtx, rtx);
+
+  if (!d->one_vector_p)
+    return false;
+
+  diff = d->perm[0];
+  switch (diff)
+    {
+    case 7:
+      switch (d->vmode)
+	{
+	case V16QImode: gen = gen_neon_vrev64v16qi; break;
+	case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
+	default:
+	  return false;
+	}
+      break;
+    case 3:
+      switch (d->vmode)
+	{
+	case V16QImode: gen = gen_neon_vrev32v16qi; break;
+	case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
+	case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
+	case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
+	default:
+	  return false;
+	}
+      break;
+    case 1:
+      switch (d->vmode)
+	{
+	case V16QImode: gen = gen_neon_vrev16v16qi; break;
+	case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
+	case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
+	case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
+	case V4SImode:  gen = gen_neon_vrev64v4si;  break;
+	case V2SImode:  gen = gen_neon_vrev64v2si;  break;
+	case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
+	case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
+	default:
+	  return false;
+	}
+      break;
+    default:
+      return false;
+    }
+
+  for (i = 0; i < nelt ; i += diff + 1)
+    for (j = 0; j <= diff; j += 1)
+      {
+	/* This is guaranteed to be true as the value of diff
+	   is 7, 3, 1 and we should have enough elements in the
+	   queue to generate this. Getting a vector mask with a
+	   value of diff other than these values implies that
+	   something is wrong by the time we get here.  */
+	gcc_assert (i + j < nelt);
+	if (d->perm[i + j] != i + diff - j)
+	  return false;
+      }
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  /* ??? The third operand is an artifact of the builtin infrastructure
+     and is ignored by the actual instruction.  */
+  emit_insn (gen (d->target, d->op0, const0_rtx));
+  return true;
+}
+
+/* Recognize patterns for the VTRN insns.  */
+
+static bool
+arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
+{
+  unsigned int i, odd, mask, nelt = d->nelt;
+  rtx out0, out1, in0, in1, x;
+  rtx (*gen)(rtx, rtx, rtx, rtx);
+
+  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
+    return false;
+
+  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
+  if (d->perm[0] == 0)
+    odd = 0;
+  else if (d->perm[0] == 1)
+    odd = 1;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt; i += 2)
+    {
+      if (d->perm[i] != i + odd)
+	return false;
+      if (d->perm[i + 1] != ((i + nelt + odd) & mask))
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  switch (d->vmode)
+    {
+    case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
+    case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
+    case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
+    case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
+    case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
+    case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
+    case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
+    case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
+    default:
+      gcc_unreachable ();
+    }
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      odd = !odd;
+    }
+
+  out0 = d->target;
+  out1 = gen_reg_rtx (d->vmode);
+  if (odd)
+    x = out0, out0 = out1, out1 = x;
+
+  emit_insn (gen (out0, in0, in1, out1));
+  return true;
+}
+
+/* Recognize patterns for the VEXT insns.  */
+
+static bool
+arm_evpc_neon_vext (struct expand_vec_perm_d *d)
+{
+  unsigned int i, nelt = d->nelt;
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx offset;
+
+  unsigned int location;
+
+  unsigned int next  = d->perm[0] + 1;
+
+  /* TODO: Handle GCC's numbering of elements for big-endian.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  /* Check if the extracted indexes are increasing by one.  */
+  for (i = 1; i < nelt; next++, i++)
+    {
+      /* If we hit the most significant element of the 2nd vector in
+	 the previous iteration, no need to test further.  */
+      if (next == 2 * nelt)
+	return false;
+
+      /* If we are operating on only one vector: it could be a
+	 rotation.  If there are only two elements of size < 64, let
+	 arm_evpc_neon_vrev catch it.  */
+      if (d->one_vector_p && (next == nelt))
+	{
+	  if ((nelt == 2) && (d->vmode != V2DImode))
+	    return false;
+	  else
+	    next = 0;
+	}
+
+      if (d->perm[i] != next)
+	return false;
+    }
+
+  location = d->perm[0];
+
+  switch (d->vmode)
+    {
+    case V16QImode: gen = gen_neon_vextv16qi; break;
+    case V8QImode: gen = gen_neon_vextv8qi; break;
+    case V4HImode: gen = gen_neon_vextv4hi; break;
+    case V8HImode: gen = gen_neon_vextv8hi; break;
+    case V2SImode: gen = gen_neon_vextv2si; break;
+    case V4SImode: gen = gen_neon_vextv4si; break;
+    case V2SFmode: gen = gen_neon_vextv2sf; break;
+    case V4SFmode: gen = gen_neon_vextv4sf; break;
+    case V2DImode: gen = gen_neon_vextv2di; break;
+    default:
+      return false;
+    }
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  offset = GEN_INT (location);
+  emit_insn (gen (d->target, d->op0, d->op1, offset));
+  return true;
+}
+
+/* The NEON VTBL instruction is a fully variable permuation that's even
+   stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
+   is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
+   can do slightly better by expanding this as a constant where we don't
+   have to apply a mask.  */
+
+static bool
+arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
+{
+  rtx rperm[MAX_VECT_LEN], sel;
+  enum machine_mode vmode = d->vmode;
+  unsigned int i, nelt = d->nelt;
+
+  /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
+     numbering of elements for big-endian, we must reverse the order.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* Generic code will try constant permutation twice.  Once with the
+     original mode and again with the elements lowered to QImode.
+     So wait and don't do the selector expansion ourselves.  */
+  if (vmode != V8QImode && vmode != V16QImode)
+    return false;
+
+  for (i = 0; i < nelt; ++i)
+    rperm[i] = GEN_INT (d->perm[i]);
+  sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
+  sel = force_reg (vmode, sel);
+
+  arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
+  return true;
+}
+
+static bool
+arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  /* Check if the input mask matches vext before reordering the
+     operands.  */
+  if (TARGET_NEON)
+    if (arm_evpc_neon_vext (d))
+      return true;
+
+  /* The pattern matching functions above are written to look for a small
+     number to begin the sequence (0, 1, N/2).  If we begin with an index
+     from the second operand, we can swap the operands.  */
+  if (d->perm[0] >= d->nelt)
+    {
+      unsigned i, nelt = d->nelt;
+      rtx x;
+
+      for (i = 0; i < nelt; ++i)
+	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
+
+      x = d->op0;
+      d->op0 = d->op1;
+      d->op1 = x;
+    }
+
+  if (TARGET_NEON)
+    {
+      if (arm_evpc_neon_vuzp (d))
+	return true;
+      if (arm_evpc_neon_vzip (d))
+	return true;
+      if (arm_evpc_neon_vrev (d))
+	return true;
+      if (arm_evpc_neon_vtrn (d))
+	return true;
+      return arm_evpc_neon_vtbl (d);
+    }
+  return false;
+}
+
+/* Expand a vec_perm_const pattern.  */
+
+bool
+arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
+{
+  struct expand_vec_perm_d d;
+  int i, nelt, which;
+
+  d.target = target;
+  d.op0 = op0;
+  d.op1 = op1;
+
+  d.vmode = GET_MODE (target);
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  for (i = which = 0; i < nelt; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      int ei = INTVAL (e) & (2 * nelt - 1);
+      which |= (ei < nelt ? 1 : 2);
+      d.perm[i] = ei;
+    }
+
+  switch (which)
+    {
+    default:
+      gcc_unreachable();
+
+    case 3:
+      d.one_vector_p = false;
+      if (!rtx_equal_p (op0, op1))
+	break;
+
+      /* The elements of PERM do not suggest that only the first operand
+	 is used, but both operands are identical.  Allow easier matching
+	 of the permutation by folding the permutation into the single
+	 input vector.  */
+      /* FALLTHRU */
+    case 2:
+      for (i = 0; i < nelt; ++i)
+        d.perm[i] &= nelt - 1;
+      d.op0 = op1;
+      d.one_vector_p = true;
+      break;
+
+    case 1:
+      d.op1 = op0;
+      d.one_vector_p = true;
+      break;
+    }
+
+  return arm_expand_vec_perm_const_1 (&d);
+}
+
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
+
+static bool
+arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				 const unsigned char *sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned int i, nelt, which;
+  bool ret;
+
+  d.vmode = vmode;
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+  memcpy (d.perm, sel, nelt);
+
+  /* Categorize the set of elements in the selector.  */
+  for (i = which = 0; i < nelt; ++i)
+    {
+      unsigned char e = d.perm[i];
+      gcc_assert (e < 2 * nelt);
+      which |= (e < nelt ? 1 : 2);
+    }
+
+  /* For all elements from second vector, fold the elements to first.  */
+  if (which == 2)
+    for (i = 0; i < nelt; ++i)
+      d.perm[i] -= nelt;
+
+  /* Check whether the mask can be applied to the vector type.  */
+  d.one_vector_p = (which != 3);
+
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!d.one_vector_p)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = arm_expand_vec_perm_const_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+bool
+arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
+{
+  /* If we are soft float and we do not have ldrd
+     then all auto increment forms are ok.  */
+  if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
+    return true;
+
+  switch (code)
+    {
+      /* Post increment and Pre Decrement are supported for all
+	 instruction forms except for vector forms.  */
+    case ARM_POST_INC:
+    case ARM_PRE_DEC:
+      if (VECTOR_MODE_P (mode))
+	{
+	  if (code != ARM_PRE_DEC)
+	    return true;
+	  else
+	    return false;
+	}
+      
+      return true;
+
+    case ARM_POST_DEC:
+    case ARM_PRE_INC:
+      /* Without LDRD and mode size greater than
+	 word size, there is no point in auto-incrementing
+         because ldm and stm will not have these forms.  */
+      if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
+	return false;
+
+      /* Vector and floating point modes do not support
+	 these auto increment forms.  */
+      if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
+	return false;
+
+      return true;
+     
+    default:
+      return false;
+      
+    }
+
+  return false;
+}
+
+/* The default expansion of general 64-bit shifts in core-regs is suboptimal,
+   on ARM, since we know that shifts by negative amounts are no-ops.
+   Additionally, the default expansion code is not available or suitable
+   for post-reload insn splits (this can occur when the register allocator
+   chooses not to do a shift in NEON).
+   
+   This function is used in both initial expand and post-reload splits, and
+   handles all kinds of 64-bit shifts.
+
+   Input requirements:
+    - It is safe for the input and output to be the same register, but
+      early-clobber rules apply for the shift amount and scratch registers.
+    - Shift by register requires both scratch registers.  In all other cases
+      the scratch registers may be NULL.
+    - Ashiftrt by a register also clobbers the CC register.  */
+void
+arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
+			       rtx amount, rtx scratch1, rtx scratch2)
+{
+  rtx out_high = gen_highpart (SImode, out);
+  rtx out_low = gen_lowpart (SImode, out);
+  rtx in_high = gen_highpart (SImode, in);
+  rtx in_low = gen_lowpart (SImode, in);
+
+  /* Terminology:
+	in = the register pair containing the input value.
+	out = the destination register pair.
+	up = the high- or low-part of each pair.
+	down = the opposite part to "up".
+     In a shift, we can consider bits to shift from "up"-stream to
+     "down"-stream, so in a left-shift "up" is the low-part and "down"
+     is the high-part of each register pair.  */
+
+  rtx out_up   = code == ASHIFT ? out_low : out_high;
+  rtx out_down = code == ASHIFT ? out_high : out_low;
+  rtx in_up   = code == ASHIFT ? in_low : in_high;
+  rtx in_down = code == ASHIFT ? in_high : in_low;
+
+  gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
+  gcc_assert (out
+	      && (REG_P (out) || GET_CODE (out) == SUBREG)
+	      && GET_MODE (out) == DImode);
+  gcc_assert (in
+	      && (REG_P (in) || GET_CODE (in) == SUBREG)
+	      && GET_MODE (in) == DImode);
+  gcc_assert (amount
+	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
+		   && GET_MODE (amount) == SImode)
+		  || CONST_INT_P (amount)));
+  gcc_assert (scratch1 == NULL
+	      || (GET_CODE (scratch1) == SCRATCH)
+	      || (GET_MODE (scratch1) == SImode
+		  && REG_P (scratch1)));
+  gcc_assert (scratch2 == NULL
+	      || (GET_CODE (scratch2) == SCRATCH)
+	      || (GET_MODE (scratch2) == SImode
+		  && REG_P (scratch2)));
+  gcc_assert (!REG_P (out) || !REG_P (amount)
+	      || !HARD_REGISTER_P (out)
+	      || (REGNO (out) != REGNO (amount)
+		  && REGNO (out) + 1 != REGNO (amount)));
+
+  /* Macros to make following code more readable.  */
+  #define SUB_32(DEST,SRC) \
+	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
+  #define RSB_32(DEST,SRC) \
+	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
+  #define SUB_S_32(DEST,SRC) \
+	    gen_addsi3_compare0 ((DEST), (SRC), \
+				 GEN_INT (-32))
+  #define SET(DEST,SRC) \
+	    gen_rtx_SET (SImode, (DEST), (SRC))
+  #define SHIFT(CODE,SRC,AMOUNT) \
+	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
+  #define LSHIFT(CODE,SRC,AMOUNT) \
+	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
+			    SImode, (SRC), (AMOUNT))
+  #define REV_LSHIFT(CODE,SRC,AMOUNT) \
+	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
+			    SImode, (SRC), (AMOUNT))
+  #define ORR(A,B) \
+	    gen_rtx_IOR (SImode, (A), (B))
+  #define BRANCH(COND,LABEL) \
+	    gen_arm_cond_branch ((LABEL), \
+				 gen_rtx_ ## COND (CCmode, cc_reg, \
+						   const0_rtx), \
+				 cc_reg)
+
+  /* Shifts by register and shifts by constant are handled separately.  */
+  if (CONST_INT_P (amount))
+    {
+      /* We have a shift-by-constant.  */
+
+      /* First, handle out-of-range shift amounts.
+	 In both cases we try to match the result an ARM instruction in a
+	 shift-by-register would give.  This helps reduce execution
+	 differences between optimization levels, but it won't stop other
+         parts of the compiler doing different things.  This is "undefined
+         behaviour, in any case.  */
+      if (INTVAL (amount) <= 0)
+	emit_insn (gen_movdi (out, in));
+      else if (INTVAL (amount) >= 64)
+	{
+	  if (code == ASHIFTRT)
+	    {
+	      rtx const31_rtx = GEN_INT (31);
+	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
+	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
+	    }
+	  else
+	    emit_insn (gen_movdi (out, const0_rtx));
+	}
+
+      /* Now handle valid shifts. */
+      else if (INTVAL (amount) < 32)
+	{
+	  /* Shifts by a constant less than 32.  */
+	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
+
+	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
+	  emit_insn (SET (out_down,
+			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
+			       out_down)));
+	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
+	}
+      else
+	{
+	  /* Shifts by a constant greater than 31.  */
+	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
+
+	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
+	  if (code == ASHIFTRT)
+	    emit_insn (gen_ashrsi3 (out_up, in_up,
+				    GEN_INT (31)));
+	  else
+	    emit_insn (SET (out_up, const0_rtx));
+	}
+    }
+  else
+    {
+      /* We have a shift-by-register.  */
+      rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+
+      /* This alternative requires the scratch registers.  */
+      gcc_assert (scratch1 && REG_P (scratch1));
+      gcc_assert (scratch2 && REG_P (scratch2));
+
+      /* We will need the values "amount-32" and "32-amount" later.
+         Swapping them around now allows the later code to be more general. */
+      switch (code)
+	{
+	case ASHIFT:
+	  emit_insn (SUB_32 (scratch1, amount));
+	  emit_insn (RSB_32 (scratch2, amount));
+	  break;
+	case ASHIFTRT:
+	  emit_insn (RSB_32 (scratch1, amount));
+	  /* Also set CC = amount > 32.  */
+	  emit_insn (SUB_S_32 (scratch2, amount));
+	  break;
+	case LSHIFTRT:
+	  emit_insn (RSB_32 (scratch1, amount));
+	  emit_insn (SUB_32 (scratch2, amount));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Emit code like this:
+
+	 arithmetic-left:
+	    out_down = in_down << amount;
+	    out_down = (in_up << (amount - 32)) | out_down;
+	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
+	    out_up = in_up << amount;
+
+	 arithmetic-right:
+	    out_down = in_down >> amount;
+	    out_down = (in_up << (32 - amount)) | out_down;
+	    if (amount < 32)
+	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
+	    out_up = in_up << amount;
+
+	 logical-right:
+	    out_down = in_down >> amount;
+	    out_down = (in_up << (32 - amount)) | out_down;
+	    if (amount < 32)
+	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
+	    out_up = in_up << amount;
+
+	  The ARM and Thumb2 variants are the same but implemented slightly
+	  differently.  If this were only called during expand we could just
+	  use the Thumb2 case and let combine do the right thing, but this
+	  can also be called from post-reload splitters.  */
+
+      emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
+
+      if (!TARGET_THUMB2)
+	{
+	  /* Emit code for ARM mode.  */
+	  emit_insn (SET (out_down,
+			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
+	  if (code == ASHIFTRT)
+	    {
+	      rtx done_label = gen_label_rtx ();
+	      emit_jump_insn (BRANCH (LT, done_label));
+	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
+					     out_down)));
+	      emit_label (done_label);
+	    }
+	  else
+	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
+					   out_down)));
+	}
+      else
+	{
+	  /* Emit code for Thumb2 mode.
+	     Thumb2 can't do shift and or in one insn.  */
+	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
+	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
+
+	  if (code == ASHIFTRT)
+	    {
+	      rtx done_label = gen_label_rtx ();
+	      emit_jump_insn (BRANCH (LT, done_label));
+	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
+	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
+	      emit_label (done_label);
+	    }
+	  else
+	    {
+	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
+	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
+	    }
+	}
+
+      emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
+    }
+
+  #undef SUB_32
+  #undef RSB_32
+  #undef SUB_S_32
+  #undef SET
+  #undef SHIFT
+  #undef LSHIFT
+  #undef REV_LSHIFT
+  #undef ORR
+  #undef BRANCH
+}
+
+
+/* Returns true if a valid comparison operation and makes
+   the operands in a form that is valid.  */
+bool
+arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
+{
+  enum rtx_code code = GET_CODE (*comparison);
+  int code_int;
+  enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) 
+    ? GET_MODE (*op2) : GET_MODE (*op1);
+
+  gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
+
+  if (code == UNEQ || code == LTGT)
+    return false;
+
+  code_int = (int)code;
+  arm_canonicalize_comparison (&code_int, op1, op2, 0);
+  PUT_CODE (*comparison, (enum rtx_code)code_int);
+
+  switch (mode)
+    {
+    case SImode:
+      if (!arm_add_operand (*op1, mode))
+	*op1 = force_reg (mode, *op1);
+      if (!arm_add_operand (*op2, mode))
+	*op2 = force_reg (mode, *op2);
+      return true;
+
+    case DImode:
+      if (!cmpdi_operand (*op1, mode))
+	*op1 = force_reg (mode, *op1);
+      if (!cmpdi_operand (*op2, mode))
+	*op2 = force_reg (mode, *op2);
+      return true;
+
+    case SFmode:
+    case DFmode:
+      if (!arm_float_compare_operand (*op1, mode))
+	*op1 = force_reg (mode, *op1);
+      if (!arm_float_compare_operand (*op2, mode))
+	*op2 = force_reg (mode, *op2);
+      return true;
+    default:
+      break;
+    }
+
+  return false;
+
+}
+
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
+
+static unsigned HOST_WIDE_INT
+arm_asan_shadow_offset (void)
+{
+  return (unsigned HOST_WIDE_INT) 1 << 29;
+}
+
+#include "gt-arm.h"
diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h
new file mode 100644
index 000000000..7ca47a7ec
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm.h
@@ -0,0 +1,2398 @@
+/* Definitions of target machine for GNU compiler, for ARM.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+   and Martin Simmons (@harleqn.co.uk).
+   More major hacks by Richard Earnshaw (rearnsha@arm.com)
+   Minor hacks by Nick Clifton (nickc@cygnus.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_H
+#define GCC_ARM_H
+
+/* We can't use enum machine_mode inside a generator file because it
+   hasn't been created yet; we shouldn't be using any code that
+   needs the real definition though, so this ought to be safe.  */
+#ifdef GENERATOR_FILE
+#define MACHMODE int
+#else
+#include "insn-modes.h"
+#define MACHMODE enum machine_mode
+#endif
+
+#include "config/vxworks-dummy.h"
+
+/* The architecture define.  */
+extern char arm_arch_name[];
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+	if (TARGET_DSP_MULTIPLY)			\
+	   builtin_define ("__ARM_FEATURE_DSP");	\
+        if (TARGET_ARM_QBIT)				\
+           builtin_define ("__ARM_FEATURE_QBIT");	\
+        if (TARGET_ARM_SAT)				\
+           builtin_define ("__ARM_FEATURE_SAT");	\
+        if (TARGET_CRYPTO)				\
+	   builtin_define ("__ARM_FEATURE_CRYPTO");	\
+	if (unaligned_access)				\
+	  builtin_define ("__ARM_FEATURE_UNALIGNED");	\
+	if (TARGET_CRC32)				\
+	  builtin_define ("__ARM_FEATURE_CRC32");	\
+	if (TARGET_32BIT)				\
+	  builtin_define ("__ARM_32BIT_STATE");		\
+	if (TARGET_ARM_FEATURE_LDREX)				\
+	  builtin_define_with_int_value (			\
+	    "__ARM_FEATURE_LDREX", TARGET_ARM_FEATURE_LDREX);	\
+	if ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB)		\
+	     || TARGET_ARM_ARCH_ISA_THUMB >=2)			\
+	  builtin_define ("__ARM_FEATURE_CLZ");			\
+	if (TARGET_INT_SIMD)					\
+	  builtin_define ("__ARM_FEATURE_SIMD32");		\
+								\
+	builtin_define_with_int_value (				\
+	  "__ARM_SIZEOF_MINIMAL_ENUM",				\
+	  flag_short_enums ? 1 : 4);				\
+	builtin_define_with_int_value (				\
+	  "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE);		\
+	if (TARGET_ARM_ARCH_PROFILE)				\
+	  builtin_define_with_int_value (			\
+	    "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE);	\
+								\
+	/* Define __arm__ even when in thumb mode, for	\
+	   consistency with armcc.  */			\
+	builtin_define ("__arm__");			\
+	if (TARGET_ARM_ARCH)				\
+	  builtin_define_with_int_value (		\
+	    "__ARM_ARCH", TARGET_ARM_ARCH);		\
+	if (arm_arch_notm)				\
+	  builtin_define ("__ARM_ARCH_ISA_ARM");	\
+	builtin_define ("__APCS_32__");			\
+	if (TARGET_THUMB)				\
+	  builtin_define ("__thumb__");			\
+	if (TARGET_THUMB2)				\
+	  builtin_define ("__thumb2__");		\
+	if (TARGET_ARM_ARCH_ISA_THUMB)			\
+	  builtin_define_with_int_value (		\
+	    "__ARM_ARCH_ISA_THUMB",			\
+	    TARGET_ARM_ARCH_ISA_THUMB);			\
+							\
+	if (TARGET_BIG_END)				\
+	  {						\
+	    builtin_define ("__ARMEB__");		\
+	    builtin_define ("__ARM_BIG_ENDIAN");	\
+	    if (TARGET_THUMB)				\
+	      builtin_define ("__THUMBEB__");		\
+	    if (TARGET_LITTLE_WORDS)			\
+	      builtin_define ("__ARMWEL__");		\
+	  }						\
+        else						\
+	  {						\
+	    builtin_define ("__ARMEL__");		\
+	    if (TARGET_THUMB)				\
+	      builtin_define ("__THUMBEL__");		\
+	  }						\
+							\
+	if (TARGET_SOFT_FLOAT)				\
+	  builtin_define ("__SOFTFP__");		\
+							\
+	if (TARGET_VFP)					\
+	  builtin_define ("__VFP_FP__");		\
+							\
+	if (TARGET_ARM_FP)				\
+	  builtin_define_with_int_value (		\
+	    "__ARM_FP", TARGET_ARM_FP);			\
+	if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)		\
+	  builtin_define ("__ARM_FP16_FORMAT_IEEE");		\
+	if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)	\
+	  builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE");	\
+        if (TARGET_FMA)					\
+          builtin_define ("__ARM_FEATURE_FMA");		\
+							\
+	if (TARGET_NEON)				\
+	  {						\
+	    builtin_define ("__ARM_NEON__");		\
+	    builtin_define ("__ARM_NEON");		\
+	  }						\
+	if (TARGET_NEON_FP)				\
+	  builtin_define_with_int_value (		\
+	    "__ARM_NEON_FP", TARGET_NEON_FP);		\
+							\
+	/* Add a define for interworking.		\
+	   Needed when building libgcc.a.  */		\
+	if (arm_cpp_interwork)				\
+	  builtin_define ("__THUMB_INTERWORK__");	\
+							\
+	builtin_assert ("cpu=arm");			\
+	builtin_assert ("machine=arm");			\
+							\
+	builtin_define (arm_arch_name);			\
+	if (arm_arch_xscale)				\
+	  builtin_define ("__XSCALE__");		\
+	if (arm_arch_iwmmxt)				\
+          {						\
+	    builtin_define ("__IWMMXT__");		\
+	    builtin_define ("__ARM_WMMX");		\
+	  }						\
+	if (arm_arch_iwmmxt2)				\
+	  builtin_define ("__IWMMXT2__");		\
+	if (TARGET_AAPCS_BASED)				\
+	  {						\
+	    if (arm_pcs_default == ARM_PCS_AAPCS_VFP)	\
+	      builtin_define ("__ARM_PCS_VFP");		\
+	    else if (arm_pcs_default == ARM_PCS_AAPCS)	\
+	      builtin_define ("__ARM_PCS");		\
+	    builtin_define ("__ARM_EABI__");		\
+	  }						\
+	if (TARGET_IDIV)				\
+	  builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
+    } while (0)
+
+#include "config/arm/arm-opts.h"
+
+enum target_cpus
+{
+#define ARM_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \
+  TARGET_CPU_##INTERNAL_IDENT,
+#include "arm-cores.def"
+#undef ARM_CORE
+  TARGET_CPU_generic
+};
+
+/* The processor for which instructions should be scheduled.  */
+extern enum processor_type arm_tune;
+
+typedef enum arm_cond_code
+{
+  ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC,
+  ARM_HI, ARM_LS, ARM_GE, ARM_LT, ARM_GT, ARM_LE, ARM_AL, ARM_NV
+}
+arm_cc;
+
+extern arm_cc arm_current_cc;
+
+#define ARM_INVERSE_CONDITION_CODE(X)  ((arm_cc) (((int)X) ^ 1))
+
+/* The maximum number of instructions that is beneficial to
+   conditionally execute. */
+#undef MAX_CONDITIONAL_EXECUTE
+#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute ()
+
+extern int arm_target_label;
+extern int arm_ccfsm_state;
+extern GTY(()) rtx arm_target_insn;
+/* The label of the current constant pool.  */
+extern rtx pool_vector_label;
+/* Set to 1 when a return insn is output, this means that the epilogue
+   is not needed.  */
+extern int return_used_this_function;
+/* Callback to output language specific object attributes.  */
+extern void (*arm_lang_output_object_attributes_hook)(void);
+
+/* Just in case configure has failed to define anything.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_generic
+#endif
+
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%(subtarget_cpp_spec)					\
+%{mfloat-abi=soft:%{mfloat-abi=hard:					\
+	%e-mfloat-abi=soft and -mfloat-abi=hard may not be used together}} \
+%{mbig-endian:%{mlittle-endian:						\
+	%e-mbig-endian and -mlittle-endian may not be used together}}"
+
+#ifndef CC1_SPEC
+#define CC1_SPEC ""
+#endif
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+#define EXTRA_SPECS						\
+  { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC },           \
+  { "asm_cpu_spec",		ASM_CPU_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC      ""
+#endif
+
+/* Run-time Target Specification.  */
+#define TARGET_SOFT_FLOAT		(arm_float_abi == ARM_FLOAT_ABI_SOFT)
+/* Use hardware floating point instructions. */
+#define TARGET_HARD_FLOAT		(arm_float_abi != ARM_FLOAT_ABI_SOFT)
+/* Use hardware floating point calling convention.  */
+#define TARGET_HARD_FLOAT_ABI		(arm_float_abi == ARM_FLOAT_ABI_HARD)
+#define TARGET_VFP		(arm_fpu_desc->model == ARM_FP_MODEL_VFP)
+#define TARGET_IWMMXT			(arm_arch_iwmmxt)
+#define TARGET_IWMMXT2			(arm_arch_iwmmxt2)
+#define TARGET_REALLY_IWMMXT		(TARGET_IWMMXT && TARGET_32BIT)
+#define TARGET_REALLY_IWMMXT2		(TARGET_IWMMXT2 && TARGET_32BIT)
+#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
+#define TARGET_ARM                      (! TARGET_THUMB)
+#define TARGET_EITHER			1 /* (TARGET_ARM | TARGET_THUMB) */
+#define TARGET_BACKTRACE	        (leaf_function_p () \
+				         ? TARGET_TPCS_LEAF_FRAME \
+				         : TARGET_TPCS_FRAME)
+#define TARGET_AAPCS_BASED \
+    (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS)
+
+#define TARGET_HARD_TP			(target_thread_pointer == TP_CP15)
+#define TARGET_SOFT_TP			(target_thread_pointer == TP_SOFT)
+#define TARGET_GNU2_TLS			(target_tls_dialect == TLS_GNU2)
+
+/* Only 16-bit thumb code.  */
+#define TARGET_THUMB1			(TARGET_THUMB && !arm_arch_thumb2)
+/* Arm or Thumb-2 32-bit code.  */
+#define TARGET_32BIT			(TARGET_ARM || arm_arch_thumb2)
+/* 32-bit Thumb-2 code.  */
+#define TARGET_THUMB2			(TARGET_THUMB && arm_arch_thumb2)
+/* Thumb-1 only.  */
+#define TARGET_THUMB1_ONLY		(TARGET_THUMB1 && !arm_arch_notm)
+
+#define TARGET_LDRD			(arm_arch5e && ARM_DOUBLEWORD_ALIGN \
+                                         && !TARGET_THUMB1)
+
+#define TARGET_CRC32			(arm_arch_crc)
+
+/* The following two macros concern the ability to execute coprocessor
+   instructions for VFPv3 or NEON.  TARGET_VFP3/TARGET_VFPD32 are currently
+   only ever tested when we know we are generating for VFP hardware; we need
+   to be more careful with TARGET_NEON as noted below.  */
+
+/* FPU is has the full VFPv3/NEON register file of 32 D registers.  */
+#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32)
+
+/* FPU supports VFPv3 instructions.  */
+#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
+
+/* FPU only supports VFP single-precision instructions.  */
+#define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE)
+
+/* FPU supports VFP double-precision instructions.  */
+#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_fpu_desc->regs != VFP_REG_SINGLE)
+
+/* FPU supports half-precision floating-point with NEON element load/store.  */
+#define TARGET_NEON_FP16 \
+  (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16)
+
+/* FPU supports VFP half-precision floating-point.  */
+#define TARGET_FP16 (TARGET_VFP && arm_fpu_desc->fp16)
+
+/* FPU supports fused-multiply-add operations.  */
+#define TARGET_FMA (TARGET_VFP && arm_fpu_desc->rev >= 4)
+
+/* FPU is ARMv8 compatible.  */
+#define TARGET_FPU_ARMV8 (TARGET_VFP && arm_fpu_desc->rev >= 8)
+
+/* FPU supports Crypto extensions.  */
+#define TARGET_CRYPTO (TARGET_VFP && arm_fpu_desc->crypto)
+
+/* FPU supports Neon instructions.  The setting of this macro gets
+   revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
+   and TARGET_HARD_FLOAT to ensure that NEON instructions are
+   available.  */
+#define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
+		     && TARGET_VFP && arm_fpu_desc->neon)
+
+/* Q-bit is present.  */
+#define TARGET_ARM_QBIT \
+  (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7))
+/* Saturation operation, e.g. SSAT.  */
+#define TARGET_ARM_SAT \
+  (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7))
+/* "DSP" multiply instructions, eg. SMULxy.  */
+#define TARGET_DSP_MULTIPLY \
+  (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em))
+/* Integer SIMD instructions, and extend-accumulate instructions.  */
+#define TARGET_INT_SIMD \
+  (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))
+
+/* Should MOVW/MOVT be used in preference to a constant pool.  */
+#define TARGET_USE_MOVT \
+  (arm_arch_thumb2 \
+   && (arm_disable_literal_pool \
+       || (!optimize_size && !current_tune->prefer_constant_pool)))
+
+/* We could use unified syntax for arm mode, but for now we just use it
+   for Thumb-2.  */
+#define TARGET_UNIFIED_ASM TARGET_THUMB2
+
+/* Nonzero if this chip provides the DMB instruction.  */
+#define TARGET_HAVE_DMB		(arm_arch6m || arm_arch7)
+
+/* Nonzero if this chip implements a memory barrier via CP15.  */
+#define TARGET_HAVE_DMB_MCR	(arm_arch6 && ! TARGET_HAVE_DMB \
+				 && ! TARGET_THUMB1)
+
+/* Nonzero if this chip implements a memory barrier instruction.  */
+#define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
+
+/* Nonzero if this chip supports ldrex and strex */
+#define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
+
+/* Nonzero if this chip supports ldrex{bh} and strex{bh}.  */
+#define TARGET_HAVE_LDREXBH	((arm_arch6k && TARGET_ARM) || arm_arch7)
+
+/* Nonzero if this chip supports ldrexd and strexd.  */
+#define TARGET_HAVE_LDREXD	(((arm_arch6k && TARGET_ARM) || arm_arch7) \
+				 && arm_arch_notm)
+
+/* Nonzero if this chip supports load-acquire and store-release.  */
+#define TARGET_HAVE_LDACQ	(TARGET_ARM_ARCH >= 8)
+
+/* Nonzero if integer division instructions supported.  */
+#define TARGET_IDIV		((TARGET_ARM && arm_arch_arm_hwdiv) \
+				 || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
+
+/* Should NEON be used for 64-bits bitops.  */
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
+
+/* True iff the full BPABI is being used.  If TARGET_BPABI is true,
+   then TARGET_AAPCS_BASED must be true -- but the converse does not
+   hold.  TARGET_BPABI implies the use of the BPABI runtime library,
+   etc., in addition to just the AAPCS calling conventions.  */
+#ifndef TARGET_BPABI
+#define TARGET_BPABI false
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-arch is ignored if -march or -mcpu are specified.
+   --with-cpu is ignored if -march or -mcpu are specified, and is overridden
+    by --with-arch.
+   --with-tune is ignored if -mtune or -mcpu are specified (but not affected
+     by -march).
+   --with-float is ignored if -mfloat-abi is specified.
+   --with-fpu is ignored if -mfpu is specified.
+   --with-abi is ignored if -mabi is specified.
+   --with-tls is ignored if -mtls-dialect is specified. */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
+  {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }, \
+  {"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \
+  {"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \
+  {"mode", "%{!marm:%{!mthumb:-m%(VALUE)}}"}, \
+  {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"},
+
+/* Which floating point model to use.  */
+enum arm_fp_model
+{
+  ARM_FP_MODEL_UNKNOWN,
+  /* VFP floating point model.  */
+  ARM_FP_MODEL_VFP
+};
+
+enum vfp_reg_type
+{
+  VFP_NONE = 0,
+  VFP_REG_D16,
+  VFP_REG_D32,
+  VFP_REG_SINGLE
+};
+
+extern const struct arm_fpu_desc
+{
+  const char *name;
+  enum arm_fp_model model;
+  int rev;
+  enum vfp_reg_type regs;
+  int neon;
+  int fp16;
+  int crypto;
+} *arm_fpu_desc;
+
+/* Which floating point hardware to schedule for.  */
+extern int arm_fpu_attr;
+
+#ifndef TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
+#endif
+
+#define LARGEST_EXPONENT_IS_NORMAL(bits) \
+    ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
+
+#ifndef ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_APCS
+#endif
+
+/* Map each of the micro-architecture variants to their corresponding
+   major architecture revision.  */
+
+enum base_architecture
+{
+  BASE_ARCH_0 = 0,
+  BASE_ARCH_2 = 2,
+  BASE_ARCH_3 = 3,
+  BASE_ARCH_3M = 3,
+  BASE_ARCH_4 = 4,
+  BASE_ARCH_4T = 4,
+  BASE_ARCH_5 = 5,
+  BASE_ARCH_5E = 5,
+  BASE_ARCH_5T = 5,
+  BASE_ARCH_5TE = 5,
+  BASE_ARCH_5TEJ = 5,
+  BASE_ARCH_6 = 6,
+  BASE_ARCH_6J = 6,
+  BASE_ARCH_6ZK = 6,
+  BASE_ARCH_6K = 6,
+  BASE_ARCH_6T2 = 6,
+  BASE_ARCH_6M = 6,
+  BASE_ARCH_6Z = 6,
+  BASE_ARCH_7 = 7,
+  BASE_ARCH_7A = 7,
+  BASE_ARCH_7R = 7,
+  BASE_ARCH_7M = 7,
+  BASE_ARCH_7EM = 7,
+  BASE_ARCH_8A = 8
+};
+
+/* The major revision number of the ARM Architecture implemented by the target.  */
+extern enum base_architecture arm_base_arch;
+
+/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
+extern int arm_arch3m;
+
+/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
+extern int arm_arch4;
+
+/* Nonzero if this chip supports the ARM Architecture 4T extensions.  */
+extern int arm_arch4t;
+
+/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
+extern int arm_arch5;
+
+/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
+extern int arm_arch5e;
+
+/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
+extern int arm_arch6;
+
+/* Nonzero if this chip supports the ARM Architecture 6k extensions.  */
+extern int arm_arch6k;
+
+/* Nonzero if instructions present in ARMv6-M can be used.  */
+extern int arm_arch6m;
+
+/* Nonzero if this chip supports the ARM Architecture 7 extensions.  */
+extern int arm_arch7;
+
+/* Nonzero if instructions not present in the 'M' profile can be used.  */
+extern int arm_arch_notm;
+
+/* Nonzero if instructions present in ARMv7E-M can be used.  */
+extern int arm_arch7em;
+
+/* Nonzero if this chip supports the ARM Architecture 8 extensions.  */
+extern int arm_arch8;
+
+/* Nonzero if this chip can benefit from load scheduling.  */
+extern int arm_ld_sched;
+
+/* Nonzero if generating Thumb code, either Thumb-1 or Thumb-2.  */
+extern int thumb_code;
+
+/* Nonzero if generating Thumb-1 code.  */
+extern int thumb1_code;
+
+/* Nonzero if this chip is a StrongARM.  */
+extern int arm_tune_strongarm;
+
+/* Nonzero if this chip supports Intel XScale with Wireless MMX technology.  */
+extern int arm_arch_iwmmxt;
+
+/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
+extern int arm_arch_iwmmxt2;
+
+/* Nonzero if this chip is an XScale.  */
+extern int arm_arch_xscale;
+
+/* Nonzero if tuning for XScale.  */
+extern int arm_tune_xscale;
+
+/* Nonzero if tuning for stores via the write buffer.  */
+extern int arm_tune_wbuf;
+
+/* Nonzero if tuning for Cortex-A9.  */
+extern int arm_tune_cortex_a9;
+
+/* Nonzero if we should define __THUMB_INTERWORK__ in the
+   preprocessor.
+   XXX This is a bit of a hack, it's intended to help work around
+   problems in GLD which doesn't understand that armv5t code is
+   interworking clean.  */
+extern int arm_cpp_interwork;
+
+/* Nonzero if chip supports Thumb 2.  */
+extern int arm_arch_thumb2;
+
+/* Nonzero if chip supports integer division instruction in ARM mode.  */
+extern int arm_arch_arm_hwdiv;
+
+/* Nonzero if chip supports integer division instruction in Thumb mode.  */
+extern int arm_arch_thumb_hwdiv;
+
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+   than core registers.  */
+extern int prefer_neon_for_64bits;
+
+/* Nonzero if we shouldn't use literal pools.  */
+#ifndef USED_FOR_TARGET
+extern bool arm_disable_literal_pool;
+#endif
+
+/* Nonzero if chip supports the ARMv8 CRC instructions.  */
+extern int arm_arch_crc;
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT  (MASK_APCS_FRAME)
+#endif
+
+/* Nonzero if PIC code requires explicit qualifiers to generate
+   PLT and GOT relocs rather than the assembler doing so implicitly.
+   Subtargets can override these if required.  */
+#ifndef NEED_GOT_RELOC
+#define NEED_GOT_RELOC	0
+#endif
+#ifndef NEED_PLT_RELOC
+#define NEED_PLT_RELOC	0
+#endif
+
+#ifndef TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE
+#define TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE 1
+#endif
+
+/* Nonzero if we need to refer to the GOT with a PC-relative
+   offset.  In other words, generate
+
+   .word	_GLOBAL_OFFSET_TABLE_ - [. - (.Lxx + 8)]
+
+   rather than
+
+   .word	_GLOBAL_OFFSET_TABLE_ - (.Lxx + 8)
+
+   The default is true, which matches NetBSD.  Subtargets can
+   override this if required.  */
+#ifndef GOT_PCREL
+#define GOT_PCREL   1
+#endif
+
+/* Target machine storage Layout.  */
+
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+/* It is far faster to zero extend chars than to sign extend them */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)      	\
+    {						\
+      if (MODE == QImode)			\
+	UNSIGNEDP = 1;				\
+      else if (MODE == HImode)			\
+	UNSIGNEDP = 1;				\
+      (MODE) = SImode;				\
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   Most ARM processors are run in little endian mode, so that is the default.
+   If you want to have it run-time selectable, change the definition in a
+   cover file to be TARGET_BIG_ENDIAN.  */
+#define BYTES_BIG_ENDIAN  (TARGET_BIG_END != 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.
+   This is always false, even when in big-endian mode.  */
+#define WORDS_BIG_ENDIAN  (BYTES_BIG_ENDIAN && ! TARGET_LITTLE_WORDS)
+
+#define UNITS_PER_WORD	4
+
+/* True if natural alignment is used for doubleword types.  */
+#define ARM_DOUBLEWORD_ALIGN	TARGET_AAPCS_BASED
+
+#define DOUBLEWORD_ALIGNMENT 64
+
+#define PARM_BOUNDARY  	32
+
+#define STACK_BOUNDARY  (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32)
+
+#define PREFERRED_STACK_BOUNDARY \
+    (arm_abi == ARM_ABI_ATPCS ? 64 : STACK_BOUNDARY)
+
+#define FUNCTION_BOUNDARY  ((TARGET_THUMB && optimize_size) ? 16 : 32)
+
+/* The lowest bit is used to indicate Thumb-mode functions, so the
+   vbit must go into the delta field of pointers to member
+   functions.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+
+#define EMPTY_FIELD_BOUNDARY  32
+
+#define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32)
+
+#define MALLOC_ABI_ALIGNMENT  BIGGEST_ALIGNMENT
+
+/* XXX Blah -- this macro is used directly by libobjc.  Since it
+   supports no vector modes, cut out the complexity and fall back
+   on BIGGEST_FIELD_ALIGNMENT.  */
+#ifdef IN_TARGET_LIBS
+#define BIGGEST_FIELD_ALIGNMENT 64
+#endif
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT_FACTOR (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2)
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)				\
+   ((TREE_CODE (EXP) == STRING_CST				\
+     && !optimize_size						\
+     && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR)	\
+    ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN))
+
+/* Align definitions of arrays, unions and structures so that
+   initializations and copies can be made more efficient.  This is not
+   ABI-changing, so it only affects places where we can see the
+   definition. Increasing the alignment tends to introduce padding,
+   so don't do this when optimizing for size/conserving stack space. */
+#define ARM_EXPAND_ALIGNMENT(COND, EXP, ALIGN)				\
+  (((COND) && ((ALIGN) < BITS_PER_WORD)					\
+    && (TREE_CODE (EXP) == ARRAY_TYPE					\
+	|| TREE_CODE (EXP) == UNION_TYPE				\
+	|| TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Align global data. */
+#define DATA_ALIGNMENT(EXP, ALIGN)			\
+  ARM_EXPAND_ALIGNMENT(!optimize_size, EXP, ALIGN)
+
+/* Similarly, make sure that objects on the stack are sensibly aligned.  */
+#define LOCAL_ALIGNMENT(EXP, ALIGN)				\
+  ARM_EXPAND_ALIGNMENT(!flag_conserve_stack, EXP, ALIGN)
+
+/* Setting STRUCTURE_SIZE_BOUNDARY to 32 produces more efficient code, but the
+   value set in previous versions of this toolchain was 8, which produces more
+   compact structures.  The command line option -mstructure_size_boundary=<n>
+   can be used to change this value.  For compatibility with the ARM SDK
+   however the value should be left at 32.  ARM SDT Reference Manual (ARM DUI
+   0020D) page 2-20 says "Structures are aligned on word boundaries".
+   The AAPCS specifies a value of 8.  */
+#define STRUCTURE_SIZE_BOUNDARY arm_structure_size_boundary
+
+/* This is the value used to initialize arm_structure_size_boundary.  If a
+   particular arm target wants to change the default value it should change
+   the definition of this macro, not STRUCTURE_SIZE_BOUNDARY.  See netbsd.h
+   for an example of this.  */
+#ifndef DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 32
+#endif
+
+/* Nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* wchar_t is unsigned under the AAPCS.  */
+#ifndef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "int")
+
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+#endif
+
+/* Sized for fixed-point types.  */
+
+#define SHORT_FRACT_TYPE_SIZE 8
+#define FRACT_TYPE_SIZE 16
+#define LONG_FRACT_TYPE_SIZE 32
+#define LONG_LONG_FRACT_TYPE_SIZE 64
+
+#define SHORT_ACCUM_TYPE_SIZE 16
+#define ACCUM_TYPE_SIZE 32
+#define LONG_ACCUM_TYPE_SIZE 64
+#define LONG_LONG_ACCUM_TYPE_SIZE 64
+
+#define MAX_FIXED_MODE_SIZE 64
+
+#ifndef SIZE_TYPE
+#define SIZE_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long unsigned int")
+#endif
+
+#ifndef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int")
+#endif
+
+/* AAPCS requires that structure alignment is affected by bitfields.  */
+#ifndef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS TARGET_AAPCS_BASED
+#endif
+
+
+/* Standard register usage.  */
+
+/* Register allocation in ARM Procedure Call Standard
+   (S - saved over call).
+
+	r0	   *	argument word/integer result
+	r1-r3		argument word
+
+	r4-r8	     S	register variable
+	r9	     S	(rfp) register variable (real frame pointer)
+
+	r10  	   F S	(sl) stack limit (used by -mapcs-stack-check)
+	r11 	   F S	(fp) argument pointer
+	r12		(ip) temp workspace
+	r13  	   F S	(sp) lower end of current stack frame
+	r14		(lr) link address/workspace
+	r15	   F	(pc) program counter
+
+	cc		This is NOT a real register, but is used internally
+	                to represent things that use or set the condition
+			codes.
+	sfp             This isn't either.  It is used during rtl generation
+	                since the offset between the frame pointer and the
+			auto's isn't known until after register allocation.
+	afp		Nor this, we only need this because of non-local
+	                goto.  Without it fp appears to be used and the
+			elimination code won't get rid of sfp.  It tracks
+			fp exactly at all times.
+
+   *: See TARGET_CONDITIONAL_REGISTER_USAGE  */
+
+/*	s0-s15		VFP scratch (aka d0-d7).
+	s16-s31	      S	VFP variable (aka d8-d15).
+	vfpcc		Not a real register.  Represents the VFP condition
+			code flags.  */
+
+/* The stack backtrace structure is as follows:
+  fp points to here:  |  save code pointer  |      [fp]
+                      |  return link value  |      [fp, #-4]
+                      |  return sp value    |      [fp, #-8]
+                      |  return fp value    |      [fp, #-12]
+                     [|  saved r10 value    |]
+                     [|  saved r9 value     |]
+                     [|  saved r8 value     |]
+                     [|  saved r7 value     |]
+                     [|  saved r6 value     |]
+                     [|  saved r5 value     |]
+                     [|  saved r4 value     |]
+                     [|  saved r3 value     |]
+                     [|  saved r2 value     |]
+                     [|  saved r1 value     |]
+                     [|  saved r0 value     |]
+  r0-r3 are not normally saved in a C function.  */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS 	\
+{				\
+  /* Core regs.  */		\
+  0,0,0,0,0,0,0,0,		\
+  0,0,0,0,0,1,0,1,		\
+  /* VFP regs.  */		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  /* IWMMXT regs.  */		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,			\
+  /* Specials.  */		\
+  1,1,1,1			\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.
+   The CC is not preserved over function calls on the ARM 6, so it is
+   easier to assume this for all.  SFP is preserved, since FP is.  */
+#define CALL_USED_REGISTERS	\
+{				\
+  /* Core regs.  */		\
+  1,1,1,1,0,0,0,0,		\
+  0,0,0,0,1,1,1,1,		\
+  /* VFP Regs.  */		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  /* IWMMXT regs.  */		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,1,1,1,1,		\
+  1,1,1,1,			\
+  /* Specials.  */		\
+  1,1,1,1			\
+}
+
+#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#endif
+
+/* These are a couple of extensions to the formats accepted
+   by asm_fprintf:
+     %@ prints out ASM_COMMENT_START
+     %r prints out REGISTER_PREFIX reg_names[arg]  */
+#define ASM_FPRINTF_EXTENSIONS(FILE, ARGS, P)		\
+  case '@':						\
+    fputs (ASM_COMMENT_START, FILE);			\
+    break;						\
+							\
+  case 'r':						\
+    fputs (REGISTER_PREFIX, FILE);			\
+    fputs (reg_names [va_arg (ARGS, int)], FILE);	\
+    break;
+
+/* Round X up to the nearest word.  */
+#define ROUND_UP_WORD(X) (((X) + 3) & ~3)
+
+/* Convert fron bytes to ints.  */
+#define ARM_NUM_INTS(X) (((X) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The number of (integer) registers required to hold a quantity of type MODE.
+   Also used for VFP registers.  */
+#define ARM_NUM_REGS(MODE)				\
+  ARM_NUM_INTS (GET_MODE_SIZE (MODE))
+
+/* The number of (integer) registers required to hold a quantity of TYPE MODE.  */
+#define ARM_NUM_REGS2(MODE, TYPE)                   \
+  ARM_NUM_INTS ((MODE) == BLKmode ? 		\
+  int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE))
+
+/* The number of (integer) argument register available.  */
+#define NUM_ARG_REGS		4
+
+/* And similarly for the VFP.  */
+#define NUM_VFP_ARG_REGS	16
+
+/* Return the register number of the N'th (integer) argument.  */
+#define ARG_REGISTER(N) 	(N - 1)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* The number of the last argument register.  */
+#define LAST_ARG_REGNUM 	ARG_REGISTER (NUM_ARG_REGS)
+
+/* The numbers of the Thumb register ranges.  */
+#define FIRST_LO_REGNUM  	0
+#define LAST_LO_REGNUM  	7
+#define FIRST_HI_REGNUM		8
+#define LAST_HI_REGNUM		11
+
+/* Overridden by config/arm/bpabi.h.  */
+#ifndef ARM_UNWIND_INFO
+#define ARM_UNWIND_INFO  0
+#endif
+
+/* Use r0 and r1 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? N : INVALID_REGNUM)
+
+/* The register that holds the return address in exception handlers.  */
+#define ARM_EH_STACKADJ_REGNUM	2
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)
+
+#ifndef ARM_TARGET2_DWARF_FORMAT
+#define ARM_TARGET2_DWARF_FORMAT DW_EH_PE_pcrel
+
+/* ttype entries (the only interesting data references used)
+   use TARGET2 relocations.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \
+  (((code) == 0 && (data) == 1 && ARM_UNWIND_INFO) ? ARM_TARGET2_DWARF_FORMAT \
+			       : DW_EH_PE_absptr)
+#endif
+
+/* The native (Norcroft) Pascal compiler for the ARM passes the static chain
+   as an invisible last argument (possible since varargs don't exist in
+   Pascal), so the following is not true.  */
+#define STATIC_CHAIN_REGNUM	12
+
+/* Define this to be where the real frame pointer is if it is not possible to
+   work out the offset between the frame pointer and the automatic variables
+   until after register allocation has taken place.  FRAME_POINTER_REGNUM
+   should point to a special register that we will make sure is eliminated.
+
+   For the Thumb we have another problem.  The TPCS defines the frame pointer
+   as r11, and GCC believes that it is always possible to use the frame pointer
+   as base register for addressing purposes.  (See comments in
+   find_reloads_address()).  But - the Thumb does not allow high registers,
+   including r11, to be used as base address registers.  Hence our problem.
+
+   The solution used here, and in the old thumb port is to use r7 instead of
+   r11 as the hard frame pointer and to have special code to generate
+   backtrace structures on the stack (if required to do so via a command line
+   option) using r11.  This is the only 'user visible' use of r11 as a frame
+   pointer.  */
+#define ARM_HARD_FRAME_POINTER_REGNUM	11
+#define THUMB_HARD_FRAME_POINTER_REGNUM	 7
+
+#define HARD_FRAME_POINTER_REGNUM		\
+  (TARGET_ARM					\
+   ? ARM_HARD_FRAME_POINTER_REGNUM		\
+   : THUMB_HARD_FRAME_POINTER_REGNUM)
+
+#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
+#define HARD_FRAME_POINTER_IS_ARG_POINTER 0
+
+#define FP_REGNUM	                HARD_FRAME_POINTER_REGNUM
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM	SP_REGNUM
+
+#define FIRST_IWMMXT_REGNUM	(LAST_HI_VFP_REGNUM + 1)
+#define LAST_IWMMXT_REGNUM	(FIRST_IWMMXT_REGNUM + 15)
+
+/* Need to sync with WCGR in iwmmxt.md.  */
+#define FIRST_IWMMXT_GR_REGNUM	(LAST_IWMMXT_REGNUM + 1)
+#define LAST_IWMMXT_GR_REGNUM	(FIRST_IWMMXT_GR_REGNUM + 3)
+
+#define IS_IWMMXT_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM))
+#define IS_IWMMXT_GR_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM))
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM	102
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	103
+
+#define FIRST_VFP_REGNUM	16
+#define D7_VFP_REGNUM		(FIRST_VFP_REGNUM + 15)
+#define LAST_VFP_REGNUM	\
+  (TARGET_VFPD32 ? LAST_HI_VFP_REGNUM : LAST_LO_VFP_REGNUM)
+
+#define IS_VFP_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_VFP_REGNUM) && ((REGNUM) <= LAST_VFP_REGNUM))
+
+/* VFP registers are split into two types: those defined by VFP versions < 3
+   have D registers overlaid on consecutive pairs of S registers. VFP version 3
+   defines 16 new D registers (d16-d31) which, for simplicity and correctness
+   in various parts of the backend, we implement as "fake" single-precision
+   registers (which would be S32-S63, but cannot be used in that way).  The
+   following macros define these ranges of registers.  */
+#define LAST_LO_VFP_REGNUM	(FIRST_VFP_REGNUM + 31)
+#define FIRST_HI_VFP_REGNUM	(LAST_LO_VFP_REGNUM + 1)
+#define LAST_HI_VFP_REGNUM	(FIRST_HI_VFP_REGNUM + 31)
+
+#define VFP_REGNO_OK_FOR_SINGLE(REGNUM) \
+  ((REGNUM) <= LAST_LO_VFP_REGNUM)
+
+/* DFmode values are only valid in even register pairs.  */
+#define VFP_REGNO_OK_FOR_DOUBLE(REGNUM) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 1) == 0)
+
+/* Neon Quad values must start at a multiple of four registers.  */
+#define NEON_REGNO_OK_FOR_QUAD(REGNUM) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0)
+
+/* Neon structures of vectors must be in even register pairs and there
+   must be enough registers available.  Because of various patterns
+   requiring quad registers, we require them to start at a multiple of
+   four.  */
+#define NEON_REGNO_OK_FOR_NREGS(REGNUM, N) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \
+   && (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1))
+
+/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP.  */
+/* Intel Wireless MMX Technology registers add 16 + 4 more.  */
+/* VFP (VFP3) adds 32 (64) + 1 VFPCC.  */
+#define FIRST_PSEUDO_REGISTER   104
+
+#define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO)
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may be accessed
+   via the stack pointer) in functions that seem suitable.
+   If we have to have a frame pointer we might as well make use of it.
+   APCS says that the frame pointer does not need to be pushed in leaf
+   functions, or simple tail call functions.  */
+
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the ARM core regs are UNITS_PER_WORD bits wide.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)  	\
+  ((TARGET_32BIT			\
+    && REGNO > PC_REGNUM		\
+    && REGNO != FRAME_POINTER_REGNUM	\
+    && REGNO != ARG_POINTER_REGNUM)	\
+    && !IS_VFP_REGNUM (REGNO)		\
+   ? 1 : ARM_NUM_REGS (MODE))
+
+/* Return true if REGNO is suitable for holding a quantity of type MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  arm_hard_regno_mode_ok ((REGNO), (MODE))
+
+#define MODES_TIEABLE_P(MODE1, MODE2) arm_modes_tieable_p (MODE1, MODE2)
+
+#define VALID_IWMMXT_REG_MODE(MODE) \
+ (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
+
+/* Modes valid for Neon D registers.  */
+#define VALID_NEON_DREG_MODE(MODE) \
+  ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
+   || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
+
+/* Modes valid for Neon Q registers.  */
+#define VALID_NEON_QREG_MODE(MODE) \
+  ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
+   || (MODE) == V4SFmode || (MODE) == V2DImode)
+
+/* Structure modes valid for Neon registers.  */
+#define VALID_NEON_STRUCT_MODE(MODE) \
+  ((MODE) == TImode || (MODE) == EImode || (MODE) == OImode \
+   || (MODE) == CImode || (MODE) == XImode)
+
+/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
+extern int arm_regs_in_sequence[];
+
+/* The order in which register should be allocated.  It is good to use ip
+   since no saving is required (though calls clobber it) and it never contains
+   function parameters.  It is quite good to use lr since other calls may
+   clobber it anyway.  Allocate r0 through r3 in reverse order since r3 is
+   least likely to contain a function parameter; in addition results are
+   returned in r0.
+   For VFP/VFPv3, allocate D16-D31 first, then caller-saved registers (D0-D7),
+   then D8-D15.  The reason for doing this is to attempt to reduce register
+   pressure when both single- and double-precision registers are used in a
+   function.  */
+
+#define VREG(X)  (FIRST_VFP_REGNUM + (X))
+#define WREG(X)  (FIRST_IWMMXT_REGNUM + (X))
+#define WGREG(X) (FIRST_IWMMXT_GR_REGNUM + (X))
+
+#define REG_ALLOC_ORDER				\
+{						\
+  /* General registers.  */			\
+  3,  2,  1,  0,  12, 14,  4,  5,		\
+  6,  7,  8,  9,  10, 11,			\
+  /* High VFP registers.  */			\
+  VREG(32), VREG(33), VREG(34), VREG(35),	\
+  VREG(36), VREG(37), VREG(38), VREG(39),	\
+  VREG(40), VREG(41), VREG(42), VREG(43),	\
+  VREG(44), VREG(45), VREG(46), VREG(47),	\
+  VREG(48), VREG(49), VREG(50), VREG(51),	\
+  VREG(52), VREG(53), VREG(54), VREG(55),	\
+  VREG(56), VREG(57), VREG(58), VREG(59),	\
+  VREG(60), VREG(61), VREG(62), VREG(63),	\
+  /* VFP argument registers.  */		\
+  VREG(15), VREG(14), VREG(13), VREG(12),	\
+  VREG(11), VREG(10), VREG(9),  VREG(8),	\
+  VREG(7),  VREG(6),  VREG(5),  VREG(4),	\
+  VREG(3),  VREG(2),  VREG(1),  VREG(0),	\
+  /* VFP call-saved registers.  */		\
+  VREG(16), VREG(17), VREG(18), VREG(19),	\
+  VREG(20), VREG(21), VREG(22), VREG(23),	\
+  VREG(24), VREG(25), VREG(26), VREG(27),	\
+  VREG(28), VREG(29), VREG(30), VREG(31),	\
+  /* IWMMX registers.  */			\
+  WREG(0),  WREG(1),  WREG(2),  WREG(3),	\
+  WREG(4),  WREG(5),  WREG(6),  WREG(7),	\
+  WREG(8),  WREG(9),  WREG(10), WREG(11),	\
+  WREG(12), WREG(13), WREG(14), WREG(15),	\
+  WGREG(0), WGREG(1), WGREG(2), WGREG(3),	\
+  /* Registers not for general use.  */		\
+  CC_REGNUM, VFPCC_REGNUM,			\
+  FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM,	\
+  SP_REGNUM, PC_REGNUM 				\
+}
+
+/* Use different register alloc ordering for Thumb.  */
+#define ADJUST_REG_ALLOC_ORDER arm_order_regs_for_local_alloc ()
+
+/* Tell IRA to use the order we define rather than messing it up with its
+   own cost calculations.  */
+#define HONOR_REG_ALLOC_ORDER
+
+/* Interrupt functions can only use registers that have already been
+   saved by the prologue, even if they would normally be
+   call-clobbered.  */
+#define HARD_REGNO_RENAME_OK(SRC, DST)					\
+	(! IS_INTERRUPT (cfun->machine->func_type) ||			\
+	 df_regs_ever_live_p (DST))
+
+/* Register and constant classes.  */
+
+/* Register classes.  */
+enum reg_class
+{
+  NO_REGS,
+  LO_REGS,
+  STACK_REG,
+  BASE_REGS,
+  HI_REGS,
+  CALLER_SAVE_REGS,
+  GENERAL_REGS,
+  CORE_REGS,
+  VFP_D0_D7_REGS,
+  VFP_LO_REGS,
+  VFP_HI_REGS,
+  VFP_REGS,
+  IWMMXT_REGS,
+  IWMMXT_GR_REGS,
+  CC_REG,
+  VFPCC_REG,
+  SFP_REG,
+  AFP_REG,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES  \
+{			\
+  "NO_REGS",		\
+  "LO_REGS",		\
+  "STACK_REG",		\
+  "BASE_REGS",		\
+  "HI_REGS",		\
+  "CALLER_SAVE_REGS",	\
+  "GENERAL_REGS",	\
+  "CORE_REGS",		\
+  "VFP_D0_D7_REGS",	\
+  "VFP_LO_REGS",	\
+  "VFP_HI_REGS",	\
+  "VFP_REGS",		\
+  "IWMMXT_REGS",	\
+  "IWMMXT_GR_REGS",	\
+  "CC_REG",		\
+  "VFPCC_REG",		\
+  "SFP_REG",		\
+  "AFP_REG",		\
+  "ALL_REGS"		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */	\
+  { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */	\
+  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */	\
+  { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */	\
+  { 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */	\
+  { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
+  { 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
+  { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */	\
+  { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS  */ \
+  { 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000 }, /* VFP_LO_REGS  */ \
+  { 0x00000000, 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_HI_REGS  */ \
+  { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF, 0x00000000 }, /* VFP_REGS  */	\
+  { 0x00000000, 0x00000000, 0xFFFF0000, 0x00000000 }, /* IWMMXT_REGS */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x0000000F }, /* IWMMXT_GR_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000010 }, /* CC_REG */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000020 }, /* VFPCC_REG */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000040 }, /* SFP_REG */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000080 }, /* AFP_REG */	\
+  { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F }  /* ALL_REGS */	\
+}
+
+/* Any of the VFP register classes.  */
+#define IS_VFP_CLASS(X) \
+  ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \
+   || (X) == VFP_HI_REGS || (X) == VFP_REGS)
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+#define REGNO_REG_CLASS(REGNO)  arm_regno_class (REGNO)
+
+/* In VFPv1, VFP registers could only be accessed in the mode they
+   were set, so subregs would be invalid there.  However, we don't
+   support VFPv1 at the moment, and the restriction was lifted in
+   VFPv2.
+   In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
+   VFP registers in little-endian order.  We can't describe that accurately to
+   GCC, so avoid taking subregs of such values.
+   The only exception is going from a 128-bit to a 64-bit type.  In that case
+   the data layout happens to be consistent for big-endian, so we explicitly allow
+   that case.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (TARGET_VFP && TARGET_BIG_END					\
+   && !(GET_MODE_SIZE (FROM) == 16 && GET_MODE_SIZE (TO) == 8)	\
+   && (GET_MODE_SIZE (FROM) > UNITS_PER_WORD			\
+       || GET_MODE_SIZE (TO) > UNITS_PER_WORD)			\
+   && reg_classes_intersect_p (VFP_REGS, (CLASS)))
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS  (TARGET_THUMB1 ? LO_REGS : GENERAL_REGS)
+#define BASE_REG_CLASS   (TARGET_THUMB1 ? LO_REGS : CORE_REGS)
+
+/* For the Thumb the high registers cannot be used as base registers
+   when addressing quantities in QI or HI mode; if we don't know the
+   mode, then we must be conservative.  */
+#define MODE_BASE_REG_CLASS(MODE)					\
+  (arm_lra_flag								\
+   ? (TARGET_32BIT ? CORE_REGS						\
+      : GET_MODE_SIZE (MODE) >= 4 ? BASE_REGS				\
+      : LO_REGS)							\
+   : ((TARGET_ARM || (TARGET_THUMB2 && !optimize_size)) ? CORE_REGS	\
+      : ((MODE) == SImode) ? BASE_REGS					\
+      : LO_REGS))
+
+/* For Thumb we can not support SP+reg addressing, so we return LO_REGS
+   instead of BASE_REGS.  */
+#define MODE_BASE_REG_REG_CLASS(MODE) BASE_REG_CLASS
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  arm_small_register_classes_for_mode_p 
+
+/* Must leave BASE_REGS reloads alone */
+#define THUMB_SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  (lra_in_progress ? NO_REGS						\
+   : ((CLASS) != LO_REGS && (CLASS) != BASE_REGS			\
+      ? ((true_regnum (X) == -1 ? LO_REGS				\
+         : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS	\
+         : NO_REGS)) 							\
+      : NO_REGS))
+
+#define THUMB_SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  (lra_in_progress ? NO_REGS						\
+   : (CLASS) != LO_REGS && (CLASS) != BASE_REGS				\
+      ? ((true_regnum (X) == -1 ? LO_REGS				\
+         : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS	\
+         : NO_REGS)) 							\
+      : NO_REGS)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
+  ((TARGET_VFP && TARGET_HARD_FLOAT				\
+    && IS_VFP_CLASS (CLASS))					\
+   ? coproc_secondary_reload_class (MODE, X, FALSE)		\
+   : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS)			\
+   ? coproc_secondary_reload_class (MODE, X, TRUE)		\
+   : TARGET_32BIT						\
+   ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
+    ? GENERAL_REGS : NO_REGS)					\
+   : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
+
+/* If we need to load shorts byte-at-a-time, then we need a scratch.  */
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
+  ((TARGET_VFP && TARGET_HARD_FLOAT				\
+    && IS_VFP_CLASS (CLASS))					\
+    ? coproc_secondary_reload_class (MODE, X, FALSE) :		\
+    (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ?			\
+    coproc_secondary_reload_class (MODE, X, TRUE) :		\
+   (TARGET_32BIT ?						\
+    (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS)	\
+     && CONSTANT_P (X))						\
+    ? GENERAL_REGS :						\
+    (((MODE) == HImode && ! arm_arch4				\
+      && (MEM_P (X)					\
+	  || ((REG_P (X) || GET_CODE (X) == SUBREG)	\
+	      && true_regnum (X) == -1)))			\
+     ? GENERAL_REGS : NO_REGS)					\
+    : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.
+
+   For the ARM, we wish to handle large displacements off a base
+   register by splitting the addend across a MOV and the mem insn.
+   This can cut the number of reloads needed.  */
+#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN)	   \
+  do									   \
+    {									   \
+      if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND))	   \
+	goto WIN;							   \
+    }									   \
+  while (0)
+
+/* XXX If an HImode FP+large_offset address is converted to an HImode
+   SP+large_offset address, then reload won't know how to fix it.  It sees
+   only that SP isn't valid for HImode, and so reloads the SP into an index
+   register, but the resulting address is still invalid because the offset
+   is too big.  We fix it here instead by reloading the entire address.  */
+/* We could probably achieve better results by defining PROMOTE_MODE to help
+   cope with the variances between the Thumb's signed and unsigned byte and
+   halfword load instructions.  */
+/* ??? This should be safe for thumb2, but we may be able to do better.  */
+#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)     \
+do {									      \
+  rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \
+  if (new_x)								      \
+    {									      \
+      X = new_x;							      \
+      goto WIN;								      \
+    }									      \
+} while (0)
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)   \
+  if (TARGET_ARM)							   \
+    ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \
+  else									   \
+    THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+   ARM regs are UNITS_PER_WORD bits.  
+   FIXME: Is this true for iWMMX?  */
+#define CLASS_MAX_NREGS(CLASS, MODE)  \
+  (ARM_NUM_REGS (MODE))
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.  */
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD  1
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* The amount of scratch space needed by _interwork_{r7,r11}_call_via_rN().
+   When present, it is one word in size, and sits at the top of the frame,
+   between the soft frame pointer and either r7 or r11.
+
+   We only need _interwork_rM_call_via_rN() for -mcaller-super-interworking,
+   and only then if some outgoing arguments are passed on the stack.  It would
+   be tempting to also check whether the stack arguments are passed by indirect
+   calls, but there seems to be no reason in principle why a post-reload pass
+   couldn't convert a direct call into an indirect one.  */
+#define CALLER_INTERWORKING_SLOT_SIZE			\
+  (TARGET_CALLER_INTERWORKING				\
+   && crtl->outgoing_args_size != 0		\
+   ? UNITS_PER_WORD : 0)
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.  */
+/* The push insns do not do this rounding implicitly.
+   So don't define this.  */
+/* #define PUSH_ROUNDING(NPUSHED)  ROUND_UP_WORD (NPUSHED) */
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  (TARGET_ARM ? 4 : 0)
+
+/* Amount of memory needed for an untyped call to save all possible return
+   registers.  */
+#define APPLY_RESULT_SIZE arm_apply_result_size()
+
+/* Define DEFAULT_PCC_STRUCT_RETURN to 1 if all structure and union return
+   values must be in memory.  On the ARM, they need only do so if larger
+   than a word, or if they contain elements offset from zero in the struct.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* These bits describe the different types of function supported
+   by the ARM backend.  They are exclusive.  i.e. a function cannot be both a
+   normal function and an interworked function, for example.  Knowing the
+   type of a function is important for determining its prologue and
+   epilogue sequences.
+   Note value 7 is currently unassigned.  Also note that the interrupt
+   function types all have bit 2 set, so that they can be tested for easily.
+   Note that 0 is deliberately chosen for ARM_FT_UNKNOWN so that when the
+   machine_function structure is initialized (to zero) func_type will
+   default to unknown.  This will force the first use of arm_current_func_type
+   to call arm_compute_func_type.  */
+#define ARM_FT_UNKNOWN		 0 /* Type has not yet been determined.  */
+#define ARM_FT_NORMAL		 1 /* Your normal, straightforward function.  */
+#define ARM_FT_INTERWORKED	 2 /* A function that supports interworking.  */
+#define ARM_FT_ISR		 4 /* An interrupt service routine.  */
+#define ARM_FT_FIQ		 5 /* A fast interrupt service routine.  */
+#define ARM_FT_EXCEPTION	 6 /* An ARM exception handler (subcase of ISR).  */
+
+#define ARM_FT_TYPE_MASK	((1 << 3) - 1)
+
+/* In addition functions can have several type modifiers,
+   outlined by these bit masks:  */
+#define ARM_FT_INTERRUPT	(1 << 2) /* Note overlap with FT_ISR and above.  */
+#define ARM_FT_NAKED		(1 << 3) /* No prologue or epilogue.  */
+#define ARM_FT_VOLATILE		(1 << 4) /* Does not return.  */
+#define ARM_FT_NESTED		(1 << 5) /* Embedded inside another func.  */
+#define ARM_FT_STACKALIGN	(1 << 6) /* Called with misaligned stack.  */
+
+/* Some macros to test these flags.  */
+#define ARM_FUNC_TYPE(t)	(t & ARM_FT_TYPE_MASK)
+#define IS_INTERRUPT(t)		(t & ARM_FT_INTERRUPT)
+#define IS_VOLATILE(t)     	(t & ARM_FT_VOLATILE)
+#define IS_NAKED(t)        	(t & ARM_FT_NAKED)
+#define IS_NESTED(t)       	(t & ARM_FT_NESTED)
+#define IS_STACKALIGN(t)       	(t & ARM_FT_STACKALIGN)
+
+
+/* Structure used to hold the function stack frame layout.  Offsets are
+   relative to the stack pointer on function entry.  Positive offsets are
+   in the direction of stack growth.
+   Only soft_frame is used in thumb mode.  */
+
+typedef struct GTY(()) arm_stack_offsets
+{
+  int saved_args;	/* ARG_POINTER_REGNUM.  */
+  int frame;		/* ARM_HARD_FRAME_POINTER_REGNUM.  */
+  int saved_regs;
+  int soft_frame;	/* FRAME_POINTER_REGNUM.  */
+  int locals_base;	/* THUMB_HARD_FRAME_POINTER_REGNUM.  */
+  int outgoing_args;	/* STACK_POINTER_REGNUM.  */
+  unsigned int saved_regs_mask;
+}
+arm_stack_offsets;
+
+#ifndef GENERATOR_FILE
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Additional stack adjustment in __builtin_eh_throw.  */
+  rtx eh_epilogue_sp_ofs;
+  /* Records if LR has to be saved for far jumps.  */
+  int far_jump_used;
+  /* Records if ARG_POINTER was ever live.  */
+  int arg_pointer_live;
+  /* Records if the save of LR has been eliminated.  */
+  int lr_save_eliminated;
+  /* The size of the stack frame.  Only valid after reload.  */
+  arm_stack_offsets stack_offsets;
+  /* Records the type of the current function.  */
+  unsigned long func_type;
+  /* Record if the function has a variable argument list.  */
+  int uses_anonymous_args;
+  /* Records if sibcalls are blocked because an argument
+     register is needed to preserve stack alignment.  */
+  int sibcall_blocked;
+  /* The PIC register for this function.  This might be a pseudo.  */
+  rtx pic_reg;
+  /* Labels for per-function Thumb call-via stubs.  One per potential calling
+     register.  We can never call via LR or PC.  We can call via SP if a
+     trampoline happens to be on the top of the stack.  */
+  rtx call_via[14];
+  /* Set to 1 when a return insn is output, this means that the epilogue
+     is not needed.  */
+  int return_used_this_function;
+  /* When outputting Thumb-1 code, record the last insn that provides
+     information about condition codes, and the comparison operands.  */
+  rtx thumb1_cc_insn;
+  rtx thumb1_cc_op0;
+  rtx thumb1_cc_op1;
+  /* Also record the CC mode that is supported.  */
+  enum machine_mode thumb1_cc_mode;
+}
+machine_function;
+#endif
+
+/* As in the machine_function, a global set of call-via labels, for code 
+   that is in text_section.  */
+extern GTY(()) rtx thumb_call_via_label[14];
+
+/* The number of potential ways of assigning to a co-processor.  */
+#define ARM_NUM_COPROC_SLOTS 1
+
+/* Enumeration of procedure calling standard variants.  We don't really 
+   support all of these yet.  */
+enum arm_pcs
+{
+  ARM_PCS_AAPCS,	/* Base standard AAPCS.  */
+  ARM_PCS_AAPCS_VFP,	/* Use VFP registers for floating point values.  */
+  ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors.  */
+  /* This must be the last AAPCS variant.  */
+  ARM_PCS_AAPCS_LOCAL,	/* Private call within this compilation unit.  */
+  ARM_PCS_ATPCS,	/* ATPCS.  */
+  ARM_PCS_APCS,		/* APCS (legacy Linux etc).  */
+  ARM_PCS_UNKNOWN
+};
+
+/* Default procedure calling standard of current compilation unit. */
+extern enum arm_pcs arm_pcs_default;
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  */
+typedef struct
+{
+  /* This is the number of registers of arguments scanned so far.  */
+  int nregs;
+  /* This is the number of iWMMXt register arguments scanned so far.  */
+  int iwmmxt_nregs;
+  int named_count;
+  int nargs;
+  /* Which procedure call variant to use for this call.  */
+  enum arm_pcs pcs_variant;
+
+  /* AAPCS related state tracking.  */
+  int aapcs_arg_processed;  /* No need to lay out this argument again.  */
+  int aapcs_cprc_slot;      /* Index of co-processor rules to handle
+			       this argument, or -1 if using core
+			       registers.  */
+  int aapcs_ncrn;
+  int aapcs_next_ncrn;
+  rtx aapcs_reg;	    /* Register assigned to this argument.  */
+  int aapcs_partial;	    /* How many bytes are passed in regs (if
+			       split between core regs and stack.
+			       Zero otherwise.  */
+  int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS];
+  int can_split;	    /* Argument can be split between core regs
+			       and the stack.  */
+  /* Private data for tracking VFP register allocation */
+  unsigned aapcs_vfp_regs_free;
+  unsigned aapcs_vfp_reg_alloc;
+  int aapcs_vfp_rcount;
+  MACHMODE aapcs_vfp_rmode;
+} CUMULATIVE_ARGS;
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  (arm_pad_arg_upward (MODE, TYPE) ? upward : downward)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (arm_pad_reg_upward (MODE, TYPE, FIRST) ? upward : downward)
+
+/* For AAPCS, padding should never be below the argument. For other ABIs,
+ * mimic the default.  */
+#define PAD_VARARGS_DOWN \
+  ((TARGET_AAPCS_BASED) ? 0 : BYTES_BIG_ENDIAN)
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+   On the ARM, the offset starts at 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  arm_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL))
+
+/* 1 if N is a possible register number for function argument passing.
+   On the ARM, r0-r3 are used to pass args.  */
+#define FUNCTION_ARG_REGNO_P(REGNO)					\
+   (IN_RANGE ((REGNO), 0, 3)						\
+    || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT		\
+	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15))	\
+    || (TARGET_IWMMXT_ABI						\
+	&& IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
+
+
+/* If your target environment doesn't prefix user functions with an
+   underscore, you may wish to re-define this to prevent any conflicts.  */
+#ifndef ARM_MCOUNT_NAME
+#define ARM_MCOUNT_NAME "*mcount"
+#endif
+
+/* Call the function profiler with a given profile label.  The Acorn
+   compiler puts this BEFORE the prolog but gcc puts it afterwards.
+   On the ARM the full profile code will look like:
+	.data
+	LP1
+		.word	0
+	.text
+		mov	ip, lr
+		bl	mcount
+		.word	LP1
+
+   profile_function() in final.c outputs the .data section, FUNCTION_PROFILER
+   will output the .text section.
+
+   The ``mov ip,lr'' seems like a good idea to stick with cc convention.
+   ``prof'' doesn't seem to mind about this!
+
+   Note - this version of the code is designed to work in both ARM and
+   Thumb modes.  */
+#ifndef ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)  	\
+{							\
+  char temp[20];					\
+  rtx sym;						\
+							\
+  asm_fprintf (STREAM, "\tmov\t%r, %r\n\tbl\t",		\
+	   IP_REGNUM, LR_REGNUM);			\
+  assemble_name (STREAM, ARM_MCOUNT_NAME);		\
+  fputc ('\n', STREAM);					\
+  ASM_GENERATE_INTERNAL_LABEL (temp, "LP", LABELNO);	\
+  sym = gen_rtx_SYMBOL_REF (Pmode, temp);		\
+  assemble_aligned_integer (UNITS_PER_WORD, sym);	\
+}
+#endif
+
+#ifdef THUMB_FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM, LABELNO)		\
+  if (TARGET_ARM)					\
+    ARM_FUNCTION_PROFILER (STREAM, LABELNO)		\
+  else							\
+    THUMB_FUNCTION_PROFILER (STREAM, LABELNO)
+#else
+#define FUNCTION_PROFILER(STREAM, LABELNO)		\
+    ARM_FUNCTION_PROFILER (STREAM, LABELNO)
+#endif
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.
+
+   On the ARM, the function epilogue recovers the stack pointer from the
+   frame.  */
+#define EXIT_IGNORE_STACK 1
+
+#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == LR_REGNUM)
+
+/* Determine if the epilogue should be output as RTL.
+   You should override this if you define FUNCTION_EXTRA_EPILOGUE.  */
+#define USE_RETURN_INSN(ISCOND)				\
+  (TARGET_32BIT ? use_return_insn (ISCOND, NULL) : 0)
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have two registers that can be eliminated on the ARM.  First, the
+   arg pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the pseudo frame pointer register can always
+   be eliminated; it is replaced with either the stack or the real frame
+   pointer.  Note we have to use {ARM|THUMB}_HARD_FRAME_POINTER_REGNUM
+   because the definition of HARD_FRAME_POINTER_REGNUM is not a constant.  */
+
+#define ELIMINABLE_REGS						\
+{{ ARG_POINTER_REGNUM,        STACK_POINTER_REGNUM            },\
+ { ARG_POINTER_REGNUM,        FRAME_POINTER_REGNUM            },\
+ { ARG_POINTER_REGNUM,        ARM_HARD_FRAME_POINTER_REGNUM   },\
+ { ARG_POINTER_REGNUM,        THUMB_HARD_FRAME_POINTER_REGNUM },\
+ { FRAME_POINTER_REGNUM,      STACK_POINTER_REGNUM            },\
+ { FRAME_POINTER_REGNUM,      ARM_HARD_FRAME_POINTER_REGNUM   },\
+ { FRAME_POINTER_REGNUM,      THUMB_HARD_FRAME_POINTER_REGNUM }}
+
+/* Define the offset between two registers, one to be eliminated, and the
+   other its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  if (TARGET_ARM)							\
+    (OFFSET) = arm_compute_initial_elimination_offset (FROM, TO);	\
+  else									\
+    (OFFSET) = thumb_compute_initial_elimination_offset (FROM, TO)
+
+/* Special case handling of the location of arguments passed on the stack.  */
+#define DEBUGGER_ARG_OFFSET(value, addr) value ? value : arm_debugger_arg_offset (value, addr)
+
+/* Initialize data used by insn expanders.  This is called from insn_emit,
+   once for every function before code is generated.  */
+#define INIT_EXPANDERS  arm_init_expanders ()
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  (TARGET_32BIT ? 16 : 20)
+
+/* Alignment required for a trampoline in bits.  */
+#define TRAMPOLINE_ALIGNMENT  32
+
+/* Addressing modes, and classification of registers for them.  */
+#define HAVE_POST_INCREMENT   1
+#define HAVE_PRE_INCREMENT    TARGET_32BIT
+#define HAVE_POST_DECREMENT   TARGET_32BIT
+#define HAVE_PRE_DECREMENT    TARGET_32BIT
+#define HAVE_PRE_MODIFY_DISP  TARGET_32BIT
+#define HAVE_POST_MODIFY_DISP TARGET_32BIT
+#define HAVE_PRE_MODIFY_REG   TARGET_32BIT
+#define HAVE_POST_MODIFY_REG  TARGET_32BIT
+
+enum arm_auto_incmodes
+  {
+    ARM_POST_INC,
+    ARM_PRE_INC,
+    ARM_POST_DEC,
+    ARM_PRE_DEC
+  };
+
+#define ARM_AUTOINC_VALID_FOR_MODE_P(mode, code) \
+  (TARGET_32BIT && arm_autoinc_modes_ok_p (mode, code))
+#define USE_LOAD_POST_INCREMENT(mode) \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_POST_INC)
+#define USE_LOAD_PRE_INCREMENT(mode)  \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_PRE_INC)
+#define USE_LOAD_POST_DECREMENT(mode) \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_POST_DEC)
+#define USE_LOAD_PRE_DECREMENT(mode)  \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_PRE_DEC)
+
+#define USE_STORE_PRE_DECREMENT(mode) USE_LOAD_PRE_DECREMENT(mode)
+#define USE_STORE_PRE_INCREMENT(mode) USE_LOAD_PRE_INCREMENT(mode)
+#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode)
+#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define TEST_REGNO(R, TEST, VALUE) \
+  ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE))
+
+/* Don't allow the pc to be used.  */
+#define ARM_REGNO_OK_FOR_BASE_P(REGNO)			\
+  (TEST_REGNO (REGNO, <, PC_REGNUM)			\
+   || TEST_REGNO (REGNO, ==, FRAME_POINTER_REGNUM)	\
+   || TEST_REGNO (REGNO, ==, ARG_POINTER_REGNUM))
+
+#define THUMB1_REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE)		\
+  (TEST_REGNO (REGNO, <=, LAST_LO_REGNUM)			\
+   || (GET_MODE_SIZE (MODE) >= 4				\
+       && TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM)))
+
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE)		\
+  (TARGET_THUMB1					\
+   ? THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO, MODE)	\
+   : ARM_REGNO_OK_FOR_BASE_P (REGNO))
+
+/* Nonzero if X can be the base register in a reg+reg addressing mode.
+   For Thumb, we can not use SP + reg, so reject SP.  */
+#define REGNO_MODE_OK_FOR_REG_BASE_P(X, MODE)	\
+  REGNO_MODE_OK_FOR_BASE_P (X, QImode)
+
+/* For ARM code, we don't care about the mode, but for Thumb, the index
+   must be suitable for use in a QImode load.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO)	\
+  (REGNO_MODE_OK_FOR_BASE_P (REGNO, QImode) \
+   && !TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM))
+
+/* Maximum number of registers that can appear in a valid memory address.
+   Shifts in addresses can't be by a register.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+/* XXX We can address any constant, eventually...  */
+/* ??? Should the TARGET_ARM here also apply to thumb2?  */
+#define CONSTANT_ADDRESS_P(X)  			\
+  (GET_CODE (X) == SYMBOL_REF 			\
+   && (CONSTANT_POOL_ADDRESS_P (X)		\
+       || (TARGET_ARM && optimize > 0 && SYMBOL_REF_FLAG (X))))
+
+/* True if SYMBOL + OFFSET constants must refer to something within
+   SYMBOL's section.  */
+#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0
+
+/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32.  */
+#ifndef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 0
+#endif
+
+#ifndef SUBTARGET_NAME_ENCODING_LENGTHS
+#define SUBTARGET_NAME_ENCODING_LENGTHS
+#endif
+
+/* This is a C fragment for the inside of a switch statement.
+   Each case label should return the number of characters to
+   be stripped from the start of a function's name, if that
+   name starts with the indicated character.  */
+#define ARM_NAME_ENCODING_LENGTHS		\
+  case '*':  return 1;				\
+  SUBTARGET_NAME_ENCODING_LENGTHS
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME)		\
+   arm_asm_output_labelref (FILE, NAME)
+
+/* Output IT instructions for conditionally executed Thumb-2 instructions.  */
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
+  if (TARGET_THUMB2)			\
+    thumb2_asm_output_opcode (STREAM);
+
+/* The EABI specifies that constructors should go in .init_array.
+   Other targets use .ctors for compatibility.  */
+#ifndef ARM_EABI_CTORS_SECTION_OP
+#define ARM_EABI_CTORS_SECTION_OP \
+  "\t.section\t.init_array,\"aw\",%init_array"
+#endif
+#ifndef ARM_EABI_DTORS_SECTION_OP
+#define ARM_EABI_DTORS_SECTION_OP \
+  "\t.section\t.fini_array,\"aw\",%fini_array"
+#endif
+#define ARM_CTORS_SECTION_OP \
+  "\t.section\t.ctors,\"aw\",%progbits"
+#define ARM_DTORS_SECTION_OP \
+  "\t.section\t.dtors,\"aw\",%progbits"
+
+/* Define CTORS_SECTION_ASM_OP.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#ifndef IN_LIBGCC2
+# define CTORS_SECTION_ASM_OP \
+   (TARGET_AAPCS_BASED ? ARM_EABI_CTORS_SECTION_OP : ARM_CTORS_SECTION_OP)
+# define DTORS_SECTION_ASM_OP \
+   (TARGET_AAPCS_BASED ? ARM_EABI_DTORS_SECTION_OP : ARM_DTORS_SECTION_OP)
+#else /* !defined (IN_LIBGCC2) */
+/* In libgcc, CTORS_SECTION_ASM_OP must be a compile-time constant,
+   so we cannot use the definition above.  */
+# ifdef __ARM_EABI__
+/* The .ctors section is not part of the EABI, so we do not define
+   CTORS_SECTION_ASM_OP when in libgcc; that prevents crtstuff
+   from trying to use it.  We do define it when doing normal
+   compilation, as .init_array can be used instead of .ctors.  */
+/* There is no need to emit begin or end markers when using
+   init_array; the dynamic linker will compute the size of the
+   array itself based on special symbols created by the static
+   linker.  However, we do need to arrange to set up
+   exception-handling here.  */
+#   define CTOR_LIST_BEGIN asm (ARM_EABI_CTORS_SECTION_OP)
+#   define CTOR_LIST_END /* empty */
+#   define DTOR_LIST_BEGIN asm (ARM_EABI_DTORS_SECTION_OP)
+#   define DTOR_LIST_END /* empty */
+# else /* !defined (__ARM_EABI__) */
+#   define CTORS_SECTION_ASM_OP ARM_CTORS_SECTION_OP
+#   define DTORS_SECTION_ASM_OP ARM_DTORS_SECTION_OP
+# endif /* !defined (__ARM_EABI__) */
+#endif /* !defined (IN_LIBCC2) */
+
+/* True if the operating system can merge entities with vague linkage
+   (e.g., symbols in COMDAT group) during dynamic linking.  */
+#ifndef TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
+#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P true
+#endif
+
+#define ARM_OUTPUT_FN_UNWIND(F, PROLOGUE) arm_output_fn_unwind (F, PROLOGUE)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+   Thumb-2 has the same restrictions as arm.  */
+#ifndef REG_OK_STRICT
+
+#define ARM_REG_OK_FOR_BASE_P(X)		\
+  (REGNO (X) <= LAST_ARM_REGNUM			\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+   || REGNO (X) == FRAME_POINTER_REGNUM		\
+   || REGNO (X) == ARG_POINTER_REGNUM)
+
+#define ARM_REG_OK_FOR_INDEX_P(X)		\
+  ((REGNO (X) <= LAST_ARM_REGNUM		\
+    && REGNO (X) != STACK_POINTER_REGNUM)	\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+   || REGNO (X) == FRAME_POINTER_REGNUM		\
+   || REGNO (X) == ARG_POINTER_REGNUM)
+
+#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE)	\
+  (REGNO (X) <= LAST_LO_REGNUM			\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+   || (GET_MODE_SIZE (MODE) >= 4		\
+       && (REGNO (X) == STACK_POINTER_REGNUM	\
+	   || (X) == hard_frame_pointer_rtx	\
+	   || (X) == arg_pointer_rtx)))
+
+#define REG_STRICT_P 0
+
+#else /* REG_OK_STRICT */
+
+#define ARM_REG_OK_FOR_BASE_P(X) 		\
+  ARM_REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#define ARM_REG_OK_FOR_INDEX_P(X) 		\
+  ARM_REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE)	\
+  THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO (X), MODE)
+
+#define REG_STRICT_P 1
+
+#endif /* REG_OK_STRICT */
+
+/* Now define some helpers in terms of the above.  */
+
+#define REG_MODE_OK_FOR_BASE_P(X, MODE)		\
+  (TARGET_THUMB1				\
+   ? THUMB1_REG_MODE_OK_FOR_BASE_P (X, MODE)	\
+   : ARM_REG_OK_FOR_BASE_P (X))
+
+/* For 16-bit Thumb, a valid index register is anything that can be used in
+   a byte load instruction.  */
+#define THUMB1_REG_OK_FOR_INDEX_P(X) \
+  THUMB1_REG_MODE_OK_FOR_BASE_P (X, QImode)
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  On the Thumb, the stack pointer
+   is not suitable.  */
+#define REG_OK_FOR_INDEX_P(X)			\
+  (TARGET_THUMB1				\
+   ? THUMB1_REG_OK_FOR_INDEX_P (X)		\
+   : ARM_REG_OK_FOR_INDEX_P (X))
+
+/* Nonzero if X can be the base register in a reg+reg addressing mode.
+   For Thumb, we can not use SP + reg, so reject SP.  */
+#define REG_MODE_OK_FOR_REG_BASE_P(X, MODE)	\
+  REG_OK_FOR_INDEX_P (X)
+
+#define ARM_BASE_REGISTER_RTX_P(X)  \
+  (REG_P (X) && ARM_REG_OK_FOR_BASE_P (X))
+
+#define ARM_INDEX_REGISTER_RTX_P(X)  \
+  (REG_P (X) && ARM_REG_OK_FOR_INDEX_P (X))
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2				\
+				 || (TARGET_THUMB1			\
+				     && (optimize_size || flag_pic)))
+
+#define CASE_VECTOR_SHORTEN_MODE(min, max, body)			\
+  (TARGET_THUMB1							\
+   ? (min >= 0 && max < 512						\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode)	\
+      : min >= -256 && max < 256					\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode)	\
+      : min >= 0 && max < 8192						\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode)	\
+      : min >= -4096 && max < 4096					\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode)	\
+      : SImode)								\
+   : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode		\
+      : (max >= 0x200) ? HImode						\
+      : QImode))
+
+/* signed 'char' is most compatible, but RISC OS wants it unsigned.
+   unsigned is probably best, but may break some code.  */
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR  0
+#endif
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+#undef  MOVE_RATIO
+#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE)						\
+  (TARGET_THUMB ? ZERO_EXTEND :						\
+   ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND			\
+    : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN)))
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 0
+
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Immediate shift counts are truncated by the output routines (or was it
+   the assembler?).  Shift counts in a register are truncated by ARM.  Note
+   that the native compiler puts too large (> 32) immediate shift counts
+   into a register and shifts by the register, letting the ARM decide what
+   to do instead of doing that itself.  */
+/* This is all wrong.  Defining SHIFT_COUNT_TRUNCATED tells combine that
+   code like (X << (Y % 32)) for register X, Y is equivalent to (X << Y).
+   On the arm, Y in a register is used modulo 256 for the shift. Only for
+   rotates is modulo 32 used.  */
+/* #define SHIFT_COUNT_TRUNCATED 1 */
+
+/* All integers have the same format so truncation is easy.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+/* Calling from registers is a massive pain.  */
+#define NO_FUNCTION_CSE 1
+
+/* The machine modes of pointers and functions */
+#define Pmode  SImode
+#define FUNCTION_MODE  Pmode
+
+#define ARM_FRAME_RTX(X)					\
+  (   (X) == frame_pointer_rtx || (X) == stack_pointer_rtx	\
+   || (X) == arg_pointer_rtx)
+
+/* Try to generate sequences that don't involve branches, we can then use
+   conditional instructions.  */
+#define BRANCH_COST(speed_p, predictable_p) \
+  (current_tune->branch_cost (speed_p, predictable_p))
+
+/* False if short circuit operation is preferred.  */
+#define LOGICAL_OP_NON_SHORT_CIRCUIT				\
+  ((optimize_size)						\
+   ? (TARGET_THUMB ? false : true)				\
+   : (current_tune->logical_op_non_short_circuit[TARGET_ARM]))
+
+
+/* Position Independent Code.  */
+/* We decide which register to use based on the compilation options and
+   the assembler in use; this is more general than the APCS restriction of
+   using sb (r9) all the time.  */
+extern unsigned arm_pic_register;
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  */
+#define PIC_OFFSET_TABLE_REGNUM arm_pic_register
+
+/* We can't directly access anything that contains a symbol,
+   nor can we indirect via the constant pool.  One exception is
+   UNSPEC_TLS, which is always PIC.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+	(!(symbol_mentioned_p (X)					\
+	   || label_mentioned_p (X)					\
+	   || (GET_CODE (X) == SYMBOL_REF				\
+	       && CONSTANT_POOL_ADDRESS_P (X)				\
+	       && (symbol_mentioned_p (get_pool_constant (X))		\
+		   || label_mentioned_p (get_pool_constant (X)))))	\
+	 || tls_mentioned_p (X))
+
+/* We need to know when we are making a constant pool; this determines
+   whether data needs to be in the GOT or can be referenced via a GOT
+   offset.  */
+extern int making_const_table;
+
+/* Handle pragmas for compatibility with Intel's compilers.  */
+/* Also abuse this to register additional C specific EABI attributes.  */
+#define REGISTER_TARGET_PRAGMAS() do {					\
+  c_register_pragma (0, "long_calls", arm_pr_long_calls);		\
+  c_register_pragma (0, "no_long_calls", arm_pr_no_long_calls);		\
+  c_register_pragma (0, "long_calls_off", arm_pr_long_calls_off);	\
+  arm_lang_object_attributes_init(); \
+} while (0)
+
+/* Condition code information.  */
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+
+#define SELECT_CC_MODE(OP, X, Y)  arm_select_cc_mode (OP, X, Y)
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+#define REVERSE_CONDITION(CODE,MODE) \
+  (((MODE) == CCFPmode || (MODE) == CCFPEmode) \
+   ? reverse_condition_maybe_unordered (code) \
+   : reverse_condition (code))
+
+/* The arm5 clz instruction returns 32.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+
+#define CC_STATUS_INIT \
+  do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0)
+
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \
+		     TARGET_THUMB2 ? "\t.thumb\n" : "")
+
+/* Output a push or a pop instruction (only used when profiling).
+   We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1.  We know
+   that ASM_OUTPUT_REG_PUSH will be matched with ASM_OUTPUT_REG_POP, and
+   that r7 isn't used by the function profiler, so we can use it as a
+   scratch reg.  WARNING: This isn't safe in the general case!  It may be
+   sensitive to future changes in final.c:profile_function.  */
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)		\
+  do							\
+    {							\
+      if (TARGET_ARM)					\
+	asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n",	\
+		     STACK_POINTER_REGNUM, REGNO);	\
+      else if (TARGET_THUMB1				\
+	       && (REGNO) == STATIC_CHAIN_REGNUM)	\
+	{						\
+	  asm_fprintf (STREAM, "\tpush\t{r7}\n");	\
+	  asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\
+	  asm_fprintf (STREAM, "\tpush\t{r7}\n");	\
+	}						\
+      else						\
+	asm_fprintf (STREAM, "\tpush {%r}\n", REGNO);	\
+    } while (0)
+
+
+/* See comment for ASM_OUTPUT_REG_PUSH concerning Thumb-1 issue.  */
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO)		\
+  do							\
+    {							\
+      if (TARGET_ARM)					\
+	asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n",	\
+		     STACK_POINTER_REGNUM, REGNO);	\
+      else if (TARGET_THUMB1				\
+	       && (REGNO) == STATIC_CHAIN_REGNUM)	\
+	{						\
+	  asm_fprintf (STREAM, "\tpop\t{r7}\n");	\
+	  asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\
+	  asm_fprintf (STREAM, "\tpop\t{r7}\n");	\
+	}						\
+      else						\
+	asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);	\
+    } while (0)
+
+#define ADDR_VEC_ALIGN(JUMPTABLE)	\
+  ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0)
+
+/* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the
+   default alignment from elfos.h.  */
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) /* Empty.  */
+
+/* Make sure subsequent insns are aligned after a TBB.  */
+#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE)	\
+  do							\
+    {							\
+      if (GET_MODE (PATTERN (JUMPTABLE)) == QImode)	\
+	ASM_OUTPUT_ALIGN (FILE, 1);			\
+    }							\
+  while (0)
+
+#define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) 	\
+  do							\
+    {							\
+      if (TARGET_THUMB) 				\
+        {						\
+          if (is_called_in_ARM_mode (DECL)		\
+	      || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY	\
+		  && cfun->is_thunk))	\
+            fprintf (STREAM, "\t.code 32\n") ;		\
+          else if (TARGET_THUMB1)			\
+           fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ;	\
+          else						\
+           fprintf (STREAM, "\t.thumb\n\t.thumb_func\n") ;	\
+        }						\
+      if (TARGET_POKE_FUNCTION_NAME)			\
+        arm_poke_function_name (STREAM, (const char *) NAME);	\
+    }							\
+  while (0)
+
+/* For aliases of functions we use .thumb_set instead.  */
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL1, DECL2)		\
+  do						   		\
+    {								\
+      const char *const LABEL1 = XSTR (XEXP (DECL_RTL (decl), 0), 0); \
+      const char *const LABEL2 = IDENTIFIER_POINTER (DECL2);	\
+								\
+      if (TARGET_THUMB && TREE_CODE (DECL1) == FUNCTION_DECL)	\
+	{							\
+	  fprintf (FILE, "\t.thumb_set ");			\
+	  assemble_name (FILE, LABEL1);			   	\
+	  fprintf (FILE, ",");			   		\
+	  assemble_name (FILE, LABEL2);		   		\
+	  fprintf (FILE, "\n");					\
+	}							\
+      else							\
+	ASM_OUTPUT_DEF (FILE, LABEL1, LABEL2);			\
+    }								\
+  while (0)
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* To support -falign-* switches we need to use .p2align so
+   that alignment directives in code sections will be padded
+   with no-op instructions, rather than zeroes.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)		\
+  if ((LOG) != 0)						\
+    {								\
+      if ((MAX_SKIP) == 0)					\
+        fprintf ((FILE), "\t.p2align %d\n", (int) (LOG));	\
+      else							\
+        fprintf ((FILE), "\t.p2align %d,,%d\n",			\
+                 (int) (LOG), (int) (MAX_SKIP));		\
+    }
+#endif
+
+/* Add two bytes to the length of conditionally executed Thumb-2
+   instructions for the IT instruction.  */
+#define ADJUST_INSN_LENGTH(insn, length) \
+  if (TARGET_THUMB2 && GET_CODE (PATTERN (insn)) == COND_EXEC) \
+    length += 2;
+
+/* Only perform branch elimination (by making instructions conditional) if
+   we're optimizing.  For Thumb-2 check if any IT instructions need
+   outputting.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
+  if (TARGET_ARM && optimize)				\
+    arm_final_prescan_insn (INSN);			\
+  else if (TARGET_THUMB2)				\
+    thumb2_final_prescan_insn (INSN);			\
+  else if (TARGET_THUMB1)				\
+    thumb1_final_prescan_insn (INSN)
+
+#define ARM_SIGN_EXTEND(x)  ((HOST_WIDE_INT)			\
+  (HOST_BITS_PER_WIDE_INT <= 32 ? (unsigned HOST_WIDE_INT) (x)	\
+   : ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0xffffffff) |\
+      ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0x80000000) \
+       ? ((~ (unsigned HOST_WIDE_INT) 0)			\
+	  & ~ (unsigned HOST_WIDE_INT) 0xffffffff)		\
+       : 0))))
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+  arm_return_addr (COUNT, FRAME)
+
+/* Mask of the bits in the PC that contain the real return address
+   when running in 26-bit mode.  */
+#define RETURN_ADDR_MASK26 (0x03fffffc)
+
+/* Pick up the return address upon entry to a procedure. Used for
+   dwarf2 unwind information.  This also enables the table driven
+   mechanism.  */
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_REG (Pmode, LR_REGNUM)
+#define DWARF_FRAME_RETURN_COLUMN	DWARF_FRAME_REGNUM (LR_REGNUM)
+
+/* Used to mask out junk bits from the return address, such as
+   processor state, interrupt status, condition codes and the like.  */
+#define MASK_RETURN_ADDR \
+  /* If we are generating code for an ARM2/ARM3 machine or for an ARM6	\
+     in 26 bit mode, the condition codes must be masked out of the	\
+     return address.  This does not apply to ARM6 and later processors	\
+     when running in 32 bit mode.  */					\
+  ((arm_arch4 || TARGET_THUMB)						\
+   ? (gen_int_mode ((unsigned long)0xffffffff, Pmode))			\
+   : arm_gen_return_addr_mask ())
+
+
+/* Do not emit .note.GNU-stack by default.  */
+#ifndef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK	0
+#endif
+
+#define TARGET_ARM_ARCH	\
+  (arm_base_arch)	\
+
+#define TARGET_ARM_V6M (!arm_arch_notm && !arm_arch_thumb2)
+#define TARGET_ARM_V7M (!arm_arch_notm && arm_arch_thumb2)
+
+/* The highest Thumb instruction set version supported by the chip.  */
+#define TARGET_ARM_ARCH_ISA_THUMB 		\
+  (arm_arch_thumb2 ? 2				\
+	           : ((TARGET_ARM_ARCH >= 5 || arm_arch4t) ? 1 : 0))
+
+/* Expands to an upper-case char of the target's architectural
+   profile.  */
+#define TARGET_ARM_ARCH_PROFILE				\
+  (!arm_arch_notm					\
+    ? 'M'						\
+    : (arm_arch7					\
+      ? (strlen (arm_arch_name) >=3			\
+	? (arm_arch_name[strlen (arm_arch_name) - 3])	\
+      	: 0)						\
+      : 0))
+
+/* Bit-field indicating what size LDREX/STREX loads/stores are available.
+   Bit 0 for bytes, up to bit 3 for double-words.  */
+#define TARGET_ARM_FEATURE_LDREX				\
+  ((TARGET_HAVE_LDREX ? 4 : 0)					\
+   | (TARGET_HAVE_LDREXBH ? 3 : 0)				\
+   | (TARGET_HAVE_LDREXD ? 8 : 0))
+
+/* Set as a bit mask indicating the available widths of hardware floating
+   point types.  Where bit 1 indicates 16-bit support, bit 2 indicates
+   32-bit support, bit 3 indicates 64-bit support.  */
+#define TARGET_ARM_FP			\
+  (TARGET_VFP_SINGLE ? 4		\
+  		     : (TARGET_VFP_DOUBLE ? (TARGET_FP16 ? 14 : 12) : 0))
+
+
+/* Set as a bit mask indicating the available widths of floating point
+   types for hardware NEON floating point.  This is the same as
+   TARGET_ARM_FP without the 64-bit bit set.  */
+#ifdef TARGET_NEON
+#define TARGET_NEON_FP		\
+  (TARGET_ARM_FP & (0xff ^ 0x08))
+#endif
+
+/* The maximum number of parallel loads or stores we support in an ldm/stm
+   instruction.  */
+#define MAX_LDM_STM_OPS 4
+
+#define BIG_LITTLE_SPEC \
+   " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}"
+
+extern const char *arm_rewrite_mcpu (int argc, const char **argv);
+#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
+  { "rewrite_mcpu", arm_rewrite_mcpu },
+
+#define ASM_CPU_SPEC \
+   " %{mcpu=generic-*:-march=%*;"				\
+   "   :%{march=*:-march=%*}}"					\
+   BIG_LITTLE_SPEC
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an ARM chip.  */
+#if defined(__arm__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },			\
+  BIG_LITTLE_CPU_SPEC_FUNCTIONS
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{march=native:%<march=native %:local_cpu_detect(arch)}"		\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+
+#endif /* ! GCC_ARM_H */
diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md
new file mode 100644
index 000000000..2ddda0208
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm.md
@@ -0,0 +1,12928 @@
+;;- Machine description for ARM for GNU compiler
+;;  Copyright (C) 1991-2014 Free Software Foundation, Inc.
+;;  Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+;;  and Martin Simmons (@harleqn.co.uk).
+;;  More major hacks by Richard Earnshaw (rearnsha@arm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Beware of splitting Thumb1 patterns that output multiple
+;; assembly instructions, in particular instruction such as SBC and
+;; ADC which consume flags.  For example, in the pattern thumb_subdi3
+;; below, the output SUB implicitly sets the flags (assembled to SUBS)
+;; and then the Carry flag is used by SBC to compute the correct
+;; result.  If we split thumb_subdi3 pattern into two separate RTL
+;; insns (using define_insn_and_split), the scheduler might place
+;; other RTL insns between SUB and SBC, possibly modifying the Carry
+;; flag used by SBC.  This might happen because most Thumb1 patterns
+;; for flag-setting instructions do not have explicit RTL for setting
+;; or clobbering the flags.  Instead, they have the attribute "conds"
+;; with value "set" or "clob".  However, this attribute is not used to
+;; identify dependencies and therefore the scheduler might reorder
+;; these instruction.  Currenly, this problem cannot happen because
+;; there are no separate Thumb1 patterns for individual instruction
+;; that consume flags (except conditional execution, which is treated
+;; differently).  In particular there is no Thumb1 armv6-m pattern for
+;; sbc or adc.
+
+
+;;---------------------------------------------------------------------------
+;; Constants
+
+;; Register numbers -- All machine registers should be defined here
+(define_constants
+  [(R0_REGNUM         0)	; First CORE register
+   (R1_REGNUM	      1)	; Second CORE register
+   (IP_REGNUM	     12)	; Scratch register
+   (SP_REGNUM	     13)	; Stack pointer
+   (LR_REGNUM        14)	; Return address register
+   (PC_REGNUM	     15)	; Program counter
+   (LAST_ARM_REGNUM  15)	;
+   (CC_REGNUM       100)	; Condition code pseudo register
+   (VFPCC_REGNUM    101)	; VFP Condition code pseudo register
+  ]
+)
+;; 3rd operand to select_dominance_cc_mode
+(define_constants
+  [(DOM_CC_X_AND_Y  0)
+   (DOM_CC_NX_OR_Y  1)
+   (DOM_CC_X_OR_Y   2)
+  ]
+)
+;; conditional compare combination
+(define_constants
+  [(CMP_CMP 0)
+   (CMN_CMP 1)
+   (CMP_CMN 2)
+   (CMN_CMN 3)
+   (NUM_OF_COND_CMP 4)
+  ]
+)
+
+
+;;---------------------------------------------------------------------------
+;; Attributes
+
+;; Processor type.  This is created automatically from arm-cores.def.
+(include "arm-tune.md")
+
+;; Instruction classification types
+(include "types.md")
+
+; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
+; generating ARM code.  This is used to control the length of some insn
+; patterns that share the same RTL in both ARM and Thumb code.
+(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))
+
+; IS_ARCH6 is set to 'yes' when we are generating code form ARMv6.
+(define_attr "is_arch6" "no,yes" (const (symbol_ref "arm_arch6")))
+
+; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
+(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
+
+; We use this attribute to disable alternatives that can produce 32-bit
+; instructions inside an IT-block in Thumb2 state.  ARMv8 deprecates IT blocks
+; that contain 32-bit instructions.
+(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes"))
+
+; This attribute is used to disable a predicated alternative when we have
+; arm_restrict_it.
+(define_attr "predicable_short_it" "no,yes" (const_string "yes"))
+
+;; Operand number of an input operand that is shifted.  Zero if the
+;; given instruction does not shift one of its input operands.
+(define_attr "shift" "" (const_int 0))
+
+; Floating Point Unit.  If we only have floating point emulation, then there
+; is no point in scheduling the floating point insns.  (Well, for best
+; performance we should try and group them together).
+(define_attr "fpu" "none,vfp"
+  (const (symbol_ref "arm_fpu_attr")))
+
+(define_attr "predicated" "yes,no" (const_string "no"))
+
+; LENGTH of an instruction (in bytes)
+(define_attr "length" ""
+  (const_int 4))
+
+; The architecture which supports the instruction (or alternative).
+; This can be "a" for ARM, "t" for either of the Thumbs, "32" for
+; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode.  "v6"
+; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
+; arm_arch6.  This attribute is used to compute attribute "enabled",
+; use type "any" to enable an alternative in all cases.
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
+  (const_string "any"))
+
+(define_attr "arch_enabled" "no,yes"
+  (cond [(eq_attr "arch" "any")
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "a")
+	      (match_test "TARGET_ARM"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "t")
+	      (match_test "TARGET_THUMB"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "t1")
+	      (match_test "TARGET_THUMB1"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "t2")
+	      (match_test "TARGET_THUMB2"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "32")
+	      (match_test "TARGET_32BIT"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "v6")
+	      (match_test "TARGET_32BIT && arm_arch6"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "nov6")
+	      (match_test "TARGET_32BIT && !arm_arch6"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "avoid_neon_for_64bits")
+	      (match_test "TARGET_NEON")
+	      (not (match_test "TARGET_PREFER_NEON_64BITS")))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "neon_for_64bits")
+	      (match_test "TARGET_NEON")
+	      (match_test "TARGET_PREFER_NEON_64BITS"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "iwmmxt2")
+	      (match_test "TARGET_REALLY_IWMMXT2"))
+	 (const_string "yes")]
+
+	(const_string "no")))
+
+(define_attr "opt" "any,speed,size"
+  (const_string "any"))
+
+(define_attr "opt_enabled" "no,yes"
+  (cond [(eq_attr "opt" "any")
+         (const_string "yes")
+
+	 (and (eq_attr "opt" "speed")
+	      (match_test "optimize_function_for_speed_p (cfun)"))
+	 (const_string "yes")
+
+	 (and (eq_attr "opt" "size")
+	      (match_test "optimize_function_for_size_p (cfun)"))
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "use_literal_pool" "no,yes"
+   (cond [(and (eq_attr "type" "f_loads,f_loadd")
+	       (match_test "CONSTANT_P (operands[1])"))
+	  (const_string "yes")]
+	 (const_string "no")))
+
+; Allows an insn to disable certain alternatives for reasons other than
+; arch support.
+(define_attr "insn_enabled" "no,yes"
+  (const_string "yes"))
+
+; Enable all alternatives that are both arch_enabled and insn_enabled.
+ (define_attr "enabled" "no,yes"
+   (cond [(eq_attr "insn_enabled" "no")
+	  (const_string "no")
+
+	  (and (eq_attr "predicable_short_it" "no")
+	       (and (eq_attr "predicated" "yes")
+	            (match_test "arm_restrict_it")))
+	  (const_string "no")
+
+	  (and (eq_attr "enabled_for_depr_it" "no")
+	       (match_test "arm_restrict_it"))
+	  (const_string "no")
+
+	  (and (eq_attr "use_literal_pool" "yes")
+	       (match_test "arm_disable_literal_pool"))
+	  (const_string "no")
+
+	  (eq_attr "arch_enabled" "no")
+	  (const_string "no")
+
+	  (eq_attr "opt_enabled" "no")
+	  (const_string "no")]
+	 (const_string "yes")))
+
+; POOL_RANGE is how far away from a constant pool entry that this insn
+; can be placed.  If the distance is zero, then this insn will never
+; reference the pool.
+; Note that for Thumb constant pools the PC value is rounded down to the
+; nearest multiple of four.  Therefore, THUMB2_POOL_RANGE (and POOL_RANGE for
+; Thumb insns) should be set to <max_range> - 2.
+; NEG_POOL_RANGE is nonzero for insns that can reference a constant pool entry
+; before its address.  It is set to <max_range> - (8 + <data_size>).
+(define_attr "arm_pool_range" "" (const_int 0))
+(define_attr "thumb2_pool_range" "" (const_int 0))
+(define_attr "arm_neg_pool_range" "" (const_int 0))
+(define_attr "thumb2_neg_pool_range" "" (const_int 0))
+
+(define_attr "pool_range" ""
+  (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_pool_range")]
+	(attr "arm_pool_range")))
+(define_attr "neg_pool_range" ""
+  (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_neg_pool_range")]
+	(attr "arm_neg_pool_range")))
+
+; An assembler sequence may clobber the condition codes without us knowing.
+; If such an insn references the pool, then we have no way of knowing how,
+; so use the most conservative value for pool_range.
+(define_asm_attributes
+ [(set_attr "conds" "clob")
+  (set_attr "length" "4")
+  (set_attr "pool_range" "250")])
+
+; Load scheduling, set from the arm_ld_sched variable
+; initialized by arm_option_override()
+(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
+
+; YES if the "type" attribute assigned to the insn denotes an
+; Advanced SIMD instruction, NO otherwise.
+(define_attr "is_neon_type" "yes,no"
+	 (if_then_else (eq_attr "type"
+	 "neon_add, neon_add_q, neon_add_widen, neon_add_long,\
+          neon_qadd, neon_qadd_q, neon_add_halve, neon_add_halve_q,\
+          neon_add_halve_narrow_q,\
+          neon_sub, neon_sub_q, neon_sub_widen, neon_sub_long, neon_qsub,\
+          neon_qsub_q, neon_sub_halve, neon_sub_halve_q,\
+          neon_sub_halve_narrow_q,\
+          neon_abs, neon_abs_q, neon_neg, neon_neg_q, neon_qneg,\
+          neon_qneg_q, neon_qabs, neon_qabs_q, neon_abd, neon_abd_q,\
+          neon_abd_long, neon_minmax, neon_minmax_q, neon_compare,\
+          neon_compare_q, neon_compare_zero, neon_compare_zero_q,\
+          neon_arith_acc, neon_arith_acc_q, neon_reduc_add,\
+          neon_reduc_add_q, neon_reduc_add_long, neon_reduc_add_acc,\
+          neon_reduc_add_acc_q, neon_reduc_minmax, neon_reduc_minmax_q,\
+          neon_logic, neon_logic_q, neon_tst, neon_tst_q,\
+          neon_shift_imm, neon_shift_imm_q, neon_shift_imm_narrow_q,\
+          neon_shift_imm_long, neon_shift_reg, neon_shift_reg_q,\
+          neon_shift_acc, neon_shift_acc_q, neon_sat_shift_imm,\
+          neon_sat_shift_imm_q, neon_sat_shift_imm_narrow_q,\
+          neon_sat_shift_reg, neon_sat_shift_reg_q,\
+          neon_ins, neon_ins_q, neon_move, neon_move_q, neon_move_narrow_q,\
+          neon_permute, neon_permute_q, neon_zip, neon_zip_q, neon_tbl1,\
+          neon_tbl1_q, neon_tbl2, neon_tbl2_q, neon_tbl3, neon_tbl3_q,\
+          neon_tbl4, neon_tbl4_q, neon_bsl, neon_bsl_q, neon_cls,\
+          neon_cls_q, neon_cnt, neon_cnt_q, neon_dup, neon_dup_q,\
+          neon_ext, neon_ext_q, neon_rbit, neon_rbit_q,\
+          neon_rev, neon_rev_q, neon_mul_b, neon_mul_b_q, neon_mul_h,\
+          neon_mul_h_q, neon_mul_s, neon_mul_s_q, neon_mul_b_long,\
+          neon_mul_h_long, neon_mul_s_long, neon_mul_d_long, neon_mul_h_scalar,\
+          neon_mul_h_scalar_q, neon_mul_s_scalar, neon_mul_s_scalar_q,\
+          neon_mul_h_scalar_long, neon_mul_s_scalar_long, neon_sat_mul_b,\
+          neon_sat_mul_b_q, neon_sat_mul_h, neon_sat_mul_h_q,\
+          neon_sat_mul_s, neon_sat_mul_s_q, neon_sat_mul_b_long,\
+          neon_sat_mul_h_long, neon_sat_mul_s_long, neon_sat_mul_h_scalar,\
+          neon_sat_mul_h_scalar_q, neon_sat_mul_s_scalar,\
+          neon_sat_mul_s_scalar_q, neon_sat_mul_h_scalar_long,\
+          neon_sat_mul_s_scalar_long, neon_mla_b, neon_mla_b_q, neon_mla_h,\
+          neon_mla_h_q, neon_mla_s, neon_mla_s_q, neon_mla_b_long,\
+          neon_mla_h_long, neon_mla_s_long, neon_mla_h_scalar,\
+          neon_mla_h_scalar_q, neon_mla_s_scalar, neon_mla_s_scalar_q,\
+          neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+          neon_sat_mla_b_long, neon_sat_mla_h_long,\
+          neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+          neon_sat_mla_s_scalar_long,\
+          neon_to_gp, neon_to_gp_q, neon_from_gp, neon_from_gp_q,\
+          neon_ldr, neon_load1_1reg, neon_load1_1reg_q, neon_load1_2reg,\
+          neon_load1_2reg_q, neon_load1_3reg, neon_load1_3reg_q,\
+          neon_load1_4reg, neon_load1_4reg_q, neon_load1_all_lanes,\
+          neon_load1_all_lanes_q, neon_load1_one_lane, neon_load1_one_lane_q,\
+          neon_load2_2reg, neon_load2_2reg_q, neon_load2_4reg,\
+          neon_load2_4reg_q, neon_load2_all_lanes, neon_load2_all_lanes_q,\
+          neon_load2_one_lane, neon_load2_one_lane_q,\
+          neon_load3_3reg, neon_load3_3reg_q, neon_load3_all_lanes,\
+          neon_load3_all_lanes_q, neon_load3_one_lane, neon_load3_one_lane_q,\
+          neon_load4_4reg, neon_load4_4reg_q, neon_load4_all_lanes,\
+          neon_load4_all_lanes_q, neon_load4_one_lane, neon_load4_one_lane_q,\
+          neon_str, neon_store1_1reg, neon_store1_1reg_q, neon_store1_2reg,\
+          neon_store1_2reg_q, neon_store1_3reg, neon_store1_3reg_q,\
+          neon_store1_4reg, neon_store1_4reg_q, neon_store1_one_lane,\
+          neon_store1_one_lane_q, neon_store2_2reg, neon_store2_2reg_q,\
+          neon_store2_4reg, neon_store2_4reg_q, neon_store2_one_lane,\
+          neon_store2_one_lane_q, neon_store3_3reg, neon_store3_3reg_q,\
+          neon_store3_one_lane, neon_store3_one_lane_q, neon_store4_4reg,\
+          neon_store4_4reg_q, neon_store4_one_lane, neon_store4_one_lane_q,\
+          neon_fp_abd_s, neon_fp_abd_s_q, neon_fp_abd_d, neon_fp_abd_d_q,\
+          neon_fp_addsub_s, neon_fp_addsub_s_q, neon_fp_addsub_d,\
+          neon_fp_addsub_d_q, neon_fp_compare_s, neon_fp_compare_s_q,\
+          neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\
+          neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\
+          neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\
+          neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,
+          neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\
+          neon_fp_reduc_minmax_d_q,\
+          neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\
+          neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\
+          neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\
+          neon_fp_round_s, neon_fp_round_s_q, neon_fp_recpe_s,\
+          neon_fp_recpe_s_q,\
+          neon_fp_recpe_d, neon_fp_recpe_d_q, neon_fp_recps_s,\
+          neon_fp_recps_s_q, neon_fp_recps_d, neon_fp_recps_d_q,\
+          neon_fp_recpx_s, neon_fp_recpx_s_q, neon_fp_recpx_d,\
+          neon_fp_recpx_d_q, neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
+          neon_fp_rsqrte_d, neon_fp_rsqrte_d_q, neon_fp_rsqrts_s,\
+          neon_fp_rsqrts_s_q, neon_fp_rsqrts_d, neon_fp_rsqrts_d_q,\
+          neon_fp_mul_s, neon_fp_mul_s_q, neon_fp_mul_s_scalar,\
+          neon_fp_mul_s_scalar_q, neon_fp_mul_d, neon_fp_mul_d_q,\
+          neon_fp_mul_d_scalar_q, neon_fp_mla_s, neon_fp_mla_s_q,\
+          neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q, neon_fp_mla_d,\
+          neon_fp_mla_d_q, neon_fp_mla_d_scalar_q, neon_fp_sqrt_s,\
+          neon_fp_sqrt_s_q, neon_fp_sqrt_d, neon_fp_sqrt_d_q,\
+          neon_fp_div_s, neon_fp_div_s_q, neon_fp_div_d, neon_fp_div_d_q, crypto_aes,\
+          crypto_sha1_xor, crypto_sha1_fast, crypto_sha1_slow, crypto_sha256_fast,\
+          crypto_sha256_slow")
+        (const_string "yes")
+        (const_string "no")))
+
+; condition codes: this one is used by final_prescan_insn to speed up
+; conditionalizing instructions.  It saves having to scan the rtl to see if
+; it uses or alters the condition codes.
+; 
+; USE means that the condition codes are used by the insn in the process of
+;   outputting code, this means (at present) that we can't use the insn in
+;   inlined branches
+;
+; SET means that the purpose of the insn is to set the condition codes in a
+;   well defined manner.
+;
+; CLOB means that the condition codes are altered in an undefined manner, if
+;   they are altered at all
+;
+; UNCONDITIONAL means the instruction can not be conditionally executed and
+;   that the instruction does not use or alter the condition codes.
+;
+; NOCOND means that the instruction does not use or alter the condition
+;   codes but can be converted into a conditionally exectuted instruction.
+
+(define_attr "conds" "use,set,clob,unconditional,nocond"
+	(if_then_else
+	 (ior (eq_attr "is_thumb1" "yes")
+	      (eq_attr "type" "call"))
+	 (const_string "clob")
+	 (if_then_else (eq_attr "is_neon_type" "no")
+	 (const_string "nocond")
+	 (const_string "unconditional"))))
+
+; Predicable means that the insn can be conditionally executed based on
+; an automatically added predicate (additional patterns are generated by 
+; gen...).  We default to 'no' because no Thumb patterns match this rule
+; and not all ARM patterns do.
+(define_attr "predicable" "no,yes" (const_string "no"))
+
+; Only model the write buffer for ARM6 and ARM7.  Earlier processors don't
+; have one.  Later ones, such as StrongARM, have write-back caches, so don't
+; suffer blockages enough to warrant modelling this (and it can adversely
+; affect the schedule).
+(define_attr "model_wbuf" "no,yes" (const (symbol_ref "arm_tune_wbuf")))
+
+; WRITE_CONFLICT implies that a read following an unrelated write is likely
+; to stall the processor.  Used with model_wbuf above.
+(define_attr "write_conflict" "no,yes"
+  (if_then_else (eq_attr "type"
+		 "block,call,load1")
+		(const_string "yes")
+		(const_string "no")))
+
+; Classify the insns into those that take one cycle and those that take more
+; than one on the main cpu execution unit.
+(define_attr "core_cycles" "single,multi"
+  (if_then_else (eq_attr "type"
+    "adc_imm, adc_reg, adcs_imm, adcs_reg, adr, alu_ext, alu_imm, alu_reg,\
+    alu_shift_imm, alu_shift_reg, alus_ext, alus_imm, alus_reg,\
+    alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\
+    logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\
+    logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\
+    wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\
+    wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\
+    wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\
+    wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\
+    wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\
+    wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\
+    wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\
+    wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\
+    wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\
+    wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\
+    wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge")
+		(const_string "single")
+	        (const_string "multi")))
+
+;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a
+;; distant label.  Only applicable to Thumb code.
+(define_attr "far_jump" "yes,no" (const_string "no"))
+
+
+;; The number of machine instructions this pattern expands to.
+;; Used for Thumb-2 conditional execution.
+(define_attr "ce_count" "" (const_int 1))
+
+;;---------------------------------------------------------------------------
+;; Unspecs
+
+(include "unspecs.md")
+
+;;---------------------------------------------------------------------------
+;; Mode iterators
+
+(include "iterators.md")
+
+;;---------------------------------------------------------------------------
+;; Predicates
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;---------------------------------------------------------------------------
+;; Pipeline descriptions
+
+(define_attr "tune_cortexr4" "yes,no"
+  (const (if_then_else
+	  (eq_attr "tune" "cortexr4,cortexr4f,cortexr5")
+	  (const_string "yes")
+	  (const_string "no"))))
+
+;; True if the generic scheduling description should be used.
+
+(define_attr "generic_sched" "yes,no"
+  (const (if_then_else
+          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa12,cortexa15,cortexa53,cortexm4,marvell_pj4")
+	       (eq_attr "tune_cortexr4" "yes"))
+          (const_string "no")
+          (const_string "yes"))))
+
+(define_attr "generic_vfp" "yes,no"
+  (const (if_then_else
+	  (and (eq_attr "fpu" "vfp")
+	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4")
+	       (eq_attr "tune_cortexr4" "no"))
+	  (const_string "yes")
+	  (const_string "no"))))
+
+(include "marvell-f-iwmmxt.md")
+(include "arm-generic.md")
+(include "arm926ejs.md")
+(include "arm1020e.md")
+(include "arm1026ejs.md")
+(include "arm1136jfs.md")
+(include "fa526.md")
+(include "fa606te.md")
+(include "fa626te.md")
+(include "fmp626.md")
+(include "fa726te.md")
+(include "cortex-a5.md")
+(include "cortex-a7.md")
+(include "cortex-a8.md")
+(include "cortex-a9.md")
+(include "cortex-a15.md")
+(include "cortex-a53.md")
+(include "cortex-r4.md")
+(include "cortex-r4f.md")
+(include "cortex-m4.md")
+(include "cortex-m4-fpu.md")
+(include "vfp11.md")
+(include "marvell-pj4.md")
+
+
+;;---------------------------------------------------------------------------
+;; Insn patterns
+;;
+;; Addition insns.
+
+;; Note: For DImode insns, there is normally no reason why operands should
+;; not be in the same register, what we don't want is for something being
+;; written to partially overlap something that is an input.
+
+(define_expand "adddi3"
+ [(parallel
+   [(set (match_operand:DI           0 "s_register_operand" "")
+	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
+	           (match_operand:DI 2 "arm_adddi_operand"  "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_THUMB1)
+    {
+      if (!REG_P (operands[1]))
+        operands[1] = force_reg (DImode, operands[1]);
+      if (!REG_P (operands[2]))
+        operands[2] = force_reg (DImode, operands[2]);
+     }
+  "
+)
+
+(define_insn "*thumb1_adddi3"
+  [(set (match_operand:DI          0 "register_operand" "=l")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0")
+		 (match_operand:DI 2 "register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))
+  ]
+  "TARGET_THUMB1"
+  "add\\t%Q0, %Q0, %Q2\;adc\\t%R0, %R0, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !TARGET_NEON"
+  "#"
+  "TARGET_32BIT && reload_completed
+   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (match_dup 5))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart_mode (SImode, DImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*adddi_sesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(plus:DI (sign_extend:DI
+		  (match_operand:SI 2 "s_register_operand" "r,r"))
+		 (match_operand:DI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (plus:SI (plus:SI (ashiftrt:SI (match_dup 2)
+						     (const_int 31))
+					(match_dup 4))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*adddi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(plus:DI (zero_extend:DI
+		  (match_operand:SI 2 "s_register_operand" "r,r"))
+		 (match_operand:DI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (const_int 0))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "addsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "")
+		 (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT && CONST_INT_P (operands[2]))
+    {
+      arm_split_constant (PLUS, SImode, NULL_RTX,
+	                  INTVAL (operands[2]), operands[0], operands[1],
+			  optimize && can_create_pseudo_p ());
+      DONE;
+    }
+  "
+)
+
+; If there is a scratch available, this will be faster than synthesizing the
+; addition.
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (set (match_operand:SI          0 "arm_general_register_operand" "")
+	(plus:SI (match_operand:SI 1 "arm_general_register_operand" "")
+		 (match_operand:SI 2 "const_int_operand"  "")))]
+  "TARGET_32BIT &&
+   !(const_ok_for_arm (INTVAL (operands[2]))
+     || const_ok_for_arm (-INTVAL (operands[2])))
+    && const_ok_for_arm (~INTVAL (operands[2]))"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))]
+  ""
+)
+
+;; The r/r/k alternative is required when reloading the address
+;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
+;; put the duplicated register first, and not try the commutative version.
+(define_insn_and_split "*arm_addsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "=rk,l,l ,l ,r ,k ,r,r ,k ,r ,k,k,r ,k ,r")
+        (plus:SI (match_operand:SI 1 "s_register_operand" "%0 ,l,0 ,l ,rk,k ,r,rk,k ,rk,k,r,rk,k ,rk")
+                 (match_operand:SI 2 "reg_or_int_operand" "rk ,l,Py,Pd,rI,rI,k,Pj,Pj,L ,L,L,PJ,PJ,?n")))]
+  "TARGET_32BIT"
+  "@
+   add%?\\t%0, %0, %2
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %2, %1
+   addw%?\\t%0, %1, %2
+   addw%?\\t%0, %1, %2
+   sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
+   subw%?\\t%0, %1, #%n2
+   subw%?\\t%0, %1, #%n2
+   #"
+  "TARGET_32BIT
+   && CONST_INT_P (operands[2])
+   && !const_ok_for_op (INTVAL (operands[2]), PLUS)
+   && (reload_completed || !arm_eliminable_register (operands[1]))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant (PLUS, SImode, curr_insn,
+	              INTVAL (operands[2]), operands[0],
+		      operands[1], 0);
+  DONE;
+  "
+  [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,4,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no")
+   (set_attr "arch" "t2,t2,t2,t2,*,*,*,t2,t2,*,*,a,t2,t2,*")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+		      (const_string "alu_imm")
+		      (const_string "alu_reg")))
+ ]
+)
+
+(define_insn_and_split "*thumb1_addsi3"
+  [(set (match_operand:SI          0 "register_operand" "=l,l,l,*rk,*hk,l,k,l,l,l")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,k,k,0,l,k")
+		 (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,M,O,Pa,Pb,Pc")))]
+  "TARGET_THUMB1"
+  "*
+   static const char * const asms[] = 
+   {
+     \"add\\t%0, %0, %2\",
+     \"sub\\t%0, %0, #%n2\",
+     \"add\\t%0, %1, %2\",
+     \"add\\t%0, %0, %2\",
+     \"add\\t%0, %0, %2\",
+     \"add\\t%0, %1, %2\",
+     \"add\\t%0, %1, %2\",
+     \"#\",
+     \"#\",
+     \"#\"
+   };
+   if ((which_alternative == 2 || which_alternative == 6)
+       && CONST_INT_P (operands[2])
+       && INTVAL (operands[2]) < 0)
+     return \"sub\\t%0, %1, #%n2\";
+   return asms[which_alternative];
+  "
+  "&& reload_completed && CONST_INT_P (operands[2])
+   && ((operands[1] != stack_pointer_rtx
+        && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255))
+       || (operands[1] == stack_pointer_rtx
+ 	   && INTVAL (operands[2]) > 1020))"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))]
+  {
+    HOST_WIDE_INT offset = INTVAL (operands[2]);
+    if (operands[1] == stack_pointer_rtx)
+      offset -= 1020;
+    else
+      {
+        if (offset > 255)
+	  offset = 255;
+	else if (offset < -255)
+	  offset = -255;
+      }
+    operands[3] = GEN_INT (offset);
+    operands[2] = GEN_INT (INTVAL (operands[2]) - offset);
+  }
+  [(set_attr "length" "2,2,2,2,2,2,2,4,4,4")
+   (set_attr "type" "alus_imm,alus_imm,alus_reg,alus_reg,alus_reg,
+		     alus_reg,alus_reg,multiple,multiple,multiple")]
+)
+
+;; Reloading and elimination of the frame pointer can
+;; sometimes cause this optimization to be missed.
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (reg:SI SP_REGNUM)))]
+  "TARGET_THUMB1
+   && (unsigned HOST_WIDE_INT) (INTVAL (operands[1])) < 1024
+   && (INTVAL (operands[1]) & 3) == 0"
+  [(set (match_dup 0) (plus:SI (reg:SI SP_REGNUM) (match_dup 1)))]
+  ""
+)
+
+(define_insn "addsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "r, r,r")
+		  (match_operand:SI 2 "arm_add_operand"    "I,L,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARM"
+  "@
+   add%.\\t%0, %1, %2
+   sub%.\\t%0, %1, #%n2
+   add%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm,alus_imm,alus_reg")]
+)
+
+(define_insn "*addsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (match_operand:SI 0 "s_register_operand" "r, r, r")
+		  (match_operand:SI 1 "arm_add_operand"    "I,L, r"))
+	 (const_int 0)))]
+  "TARGET_ARM"
+  "@
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "alus_imm,alus_imm,alus_reg")]
+)
+
+(define_insn "*compare_negsi_si"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (neg:SI (match_operand:SI 0 "s_register_operand" "l,r"))
+	 (match_operand:SI 1 "s_register_operand" "l,r")))]
+  "TARGET_32BIT"
+  "cmn%?\\t%1, %0"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "2,4")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "alus_reg")]
+)
+
+;; This is the canonicalization of addsi3_compare0_for_combiner when the
+;; addend is a constant.
+(define_insn "cmpsi2_addneg"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (match_operand:SI 1 "s_register_operand" "r,r")
+	 (match_operand:SI 2 "arm_addimm_operand" "L,I")))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "arm_addimm_operand" "I,L")))]
+  "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])"
+  "@
+   add%.\\t%0, %1, %3
+   sub%.\\t%0, %1, #%n3"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_reg")]
+)
+
+;; Convert the sequence
+;;  sub  rd, rn, #1
+;;  cmn  rd, #1	(equivalent to cmp rd, #-1)
+;;  bne  dest
+;; into
+;;  subs rd, rn, #1
+;;  bcs  dest	((unsigned)rn >= 1)
+;; similarly for the beq variant using bcc.
+;; This is a common looping idiom (while (n--))
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(plus:SI (match_operand:SI 1 "arm_general_register_operand" "")
+		 (const_int -1)))
+   (set (match_operand 2 "cc_register" "")
+	(compare (match_dup 0) (const_int -1)))
+   (set (pc)
+	(if_then_else (match_operator 3 "equality_operator"
+		       [(match_dup 2) (const_int 0)])
+		      (match_operand 4 "" "")
+		      (match_operand 5 "" "")))]
+  "TARGET_32BIT && peep2_reg_dead_p (3, operands[2])"
+  [(parallel[
+    (set (match_dup 2)
+	 (compare:CC
+	  (match_dup 1) (const_int 1)))
+    (set (match_dup 0) (plus:SI (match_dup 1) (const_int -1)))])
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(match_dup 2) (const_int 0)])
+		      (match_dup 4)
+		      (match_dup 5)))]
+  "operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
+   operands[3] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE
+				  ? GEU : LTU),
+				 VOIDmode, 
+				 operands[2], const0_rtx);"
+)
+
+;; The next four insns work because they compare the result with one of
+;; the operands, and we know that the use of the condition code is
+;; either GEU or LTU, so we can use the carry flag from the addition
+;; instead of doing the compare a second time.
+(define_insn "*addsi3_compare_op1"
+  [(set (reg:CC_C CC_REGNUM)
+	(compare:CC_C
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
+		  (match_operand:SI 2 "arm_add_operand" "I,L,r"))
+	 (match_dup 1)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   add%.\\t%0, %1, %2
+   sub%.\\t%0, %1, #%n2
+   add%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type"  "alus_imm,alus_imm,alus_reg")]
+)
+
+(define_insn "*addsi3_compare_op2"
+  [(set (reg:CC_C CC_REGNUM)
+	(compare:CC_C
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
+		  (match_operand:SI 2 "arm_add_operand" "I,L,r"))
+	 (match_dup 2)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   add%.\\t%0, %1, %2
+   add%.\\t%0, %1, %2
+   sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm,alus_imm,alus_reg")]
+)
+
+(define_insn "*compare_addsi2_op0"
+  [(set (reg:CC_C CC_REGNUM)
+        (compare:CC_C
+          (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,r,r,r")
+                   (match_operand:SI 1 "arm_add_operand" "Pv,l,I,L,r"))
+          (match_dup 0)))]
+  "TARGET_32BIT"
+  "@
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*,*,*")
+   (set_attr "predicable_short_it" "yes,yes,no,no,no")
+   (set_attr "length" "2,2,4,4,4")
+   (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")]
+)
+
+(define_insn "*compare_addsi2_op1"
+  [(set (reg:CC_C CC_REGNUM)
+        (compare:CC_C
+          (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,r,r,r")
+                   (match_operand:SI 1 "arm_add_operand" "Pv,l,I,L,r"))
+          (match_dup 1)))]
+  "TARGET_32BIT"
+  "@
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*,*,*")
+   (set_attr "predicable_short_it" "yes,yes,no,no,no")
+   (set_attr "length" "2,2,4,4,4")
+   (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")]
+ )
+
+(define_insn "*addsi3_carryin_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
+        (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%l,r,r")
+                          (match_operand:SI 2 "arm_not_operand" "0,rI,K"))
+                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "@
+   adc%?\\t%0, %1, %2
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*,*")
+   (set_attr "length" "4")
+   (set_attr "predicable_short_it" "yes,no,no")
+   (set_attr "type" "adc_reg,adc_reg,adc_imm")]
+)
+
+(define_insn "*addsi3_carryin_alt2_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
+        (plus:SI (plus:SI (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))
+                          (match_operand:SI 1 "s_register_operand" "%l,r,r"))
+                 (match_operand:SI 2 "arm_rhs_operand" "l,rI,K")))]
+  "TARGET_32BIT"
+  "@
+   adc%?\\t%0, %1, %2
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*,*")
+   (set_attr "length" "4")
+   (set_attr "predicable_short_it" "yes,no,no")
+   (set_attr "type" "adc_reg,adc_reg,adc_imm")]
+)
+
+(define_insn "*addsi3_carryin_shift_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (plus:SI
+		  (match_operator:SI 2 "shift_operator"
+		    [(match_operand:SI 3 "s_register_operand" "r")
+		     (match_operand:SI 4 "reg_or_int_operand" "rM")])
+		  (match_operand:SI 1 "s_register_operand" "r"))
+		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "adc%?\\t%0, %1, %3%S2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
+		      (const_string "alu_shift_imm")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*addsi3_carryin_clobercc_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
+			  (match_operand:SI 2 "arm_rhs_operand" "rI"))
+		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_32BIT"
+   "adc%.\\t%0, %1, %2"
+   [(set_attr "conds" "set")
+    (set_attr "type" "adcs_reg")]
+)
+
+(define_insn "*subsi3_carryin"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
+                            (match_operand:SI 2 "s_register_operand" "r,r"))
+                  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "@
+   sbc%?\\t%0, %1, %2
+   rsc%?\\t%0, %2, %1"
+  [(set_attr "conds" "use")
+   (set_attr "arch" "*,a")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "adc_reg,adc_imm")]
+)
+
+(define_insn "*subsi3_carryin_const"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r")
+                           (match_operand:SI 2 "arm_not_operand" "K"))
+                  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "sbc\\t%0, %1, #%B2"
+  [(set_attr "conds" "use")
+   (set_attr "type" "adc_imm")]
+)
+
+(define_insn "*subsi3_carryin_compare"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "s_register_operand" "r")
+                    (match_operand:SI 2 "s_register_operand" "r")))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+        (minus:SI (minus:SI (match_dup 1)
+                            (match_dup 2))
+                  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "sbcs\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_reg")]
+)
+
+(define_insn "*subsi3_carryin_compare_const"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r")
+                    (match_operand:SI 2 "arm_not_operand" "K")))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+        (minus:SI (plus:SI (match_dup 1)
+                           (match_dup 2))
+                  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "sbcs\\t%0, %1, #%B2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_imm")]
+)
+
+(define_insn "*subsi3_carryin_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI (minus:SI
+		  (match_operand:SI 1 "s_register_operand" "r")
+                  (match_operator:SI 2 "shift_operator"
+                   [(match_operand:SI 3 "s_register_operand" "r")
+                    (match_operand:SI 4 "reg_or_int_operand" "rM")]))
+                 (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "sbc%?\\t%0, %1, %3%S2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
+		      (const_string "alu_shift_imm")
+                     (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*rsbsi3_carryin_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI (minus:SI
+                  (match_operator:SI 2 "shift_operator"
+                   [(match_operand:SI 3 "s_register_operand" "r")
+                    (match_operand:SI 4 "reg_or_int_operand" "rM")])
+		   (match_operand:SI 1 "s_register_operand" "r"))
+                 (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_ARM"
+  "rsc%?\\t%0, %1, %3%S2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
+		      (const_string "alu_shift_imm")
+		      (const_string "alu_shift_reg")))]
+)
+
+; transform ((x << y) - 1) to ~(~(x-1) << y)  Where X is a constant.
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(plus:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "")
+			    (match_operand:SI 2 "s_register_operand" ""))
+		 (const_int -1)))
+   (clobber (match_operand:SI 3 "s_register_operand" ""))]
+  "TARGET_32BIT"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 0) (not:SI (ashift:SI (match_dup 3) (match_dup 2))))]
+  "
+  operands[1] = GEN_INT (~(INTVAL (operands[1]) - 1));
+")
+
+(define_expand "addsf3"
+  [(set (match_operand:SF          0 "s_register_operand" "")
+	(plus:SF (match_operand:SF 1 "s_register_operand" "")
+		 (match_operand:SF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+")
+
+(define_expand "adddf3"
+  [(set (match_operand:DF          0 "s_register_operand" "")
+	(plus:DF (match_operand:DF 1 "s_register_operand" "")
+		 (match_operand:DF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+")
+
+(define_expand "subdi3"
+ [(parallel
+   [(set (match_operand:DI            0 "s_register_operand" "")
+	  (minus:DI (match_operand:DI 1 "s_register_operand" "")
+	            (match_operand:DI 2 "s_register_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_THUMB1)
+    {
+      if (!REG_P (operands[1]))
+        operands[1] = force_reg (DImode, operands[1]);
+      if (!REG_P (operands[2]))
+        operands[2] = force_reg (DImode, operands[2]);
+     }	
+  "
+)
+
+(define_insn_and_split "*arm_subdi3"
+  [(set (match_operand:DI           0 "s_register_operand" "=&r,&r,&r")
+	(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
+		  (match_operand:DI 2 "s_register_operand" "r,0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !TARGET_NEON"
+  "#"  ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+   }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb_subdi3"
+  [(set (match_operand:DI           0 "register_operand" "=l")
+	(minus:DI (match_operand:DI 1 "register_operand"  "0")
+		  (match_operand:DI 2 "register_operand"  "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB1"
+  "sub\\t%Q0, %Q0, %Q2\;sbc\\t%R0, %R0, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*subdi_di_zesidi"
+  [(set (match_operand:DI           0 "s_register_operand" "=&r,&r")
+	(minus:DI (match_operand:DI 1 "s_register_operand"  "0,r")
+		  (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"   ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5))
+                                (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = GEN_INT (~0);
+   }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*subdi_di_sesidi"
+  [(set (match_operand:DI            0 "s_register_operand" "=&r,&r")
+	(minus:DI (match_operand:DI  1 "s_register_operand"  "0,r")
+		  (sign_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"   ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (minus:SI (minus:SI (match_dup 4)
+                                         (ashiftrt:SI (match_dup 2)
+                                                      (const_int 31)))
+                                (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*subdi_zesidi_di"
+  [(set (match_operand:DI            0 "s_register_operand" "=&r,&r")
+	(minus:DI (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r,r"))
+		  (match_operand:DI  1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"   ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
+        ; is equivalent to:
+        ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 2) (match_dup 1)))
+	      (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
+   (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*subdi_sesidi_di"
+  [(set (match_operand:DI            0 "s_register_operand" "=&r,&r")
+	(minus:DI (sign_extend:DI
+		   (match_operand:SI 2 "s_register_operand"   "r,r"))
+		  (match_operand:DI  1 "s_register_operand"  "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"   ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
+        ; is equivalent to:
+        ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 2) (match_dup 1)))
+	      (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))])
+   (set (match_dup 3) (minus:SI (minus:SI
+                                (ashiftrt:SI (match_dup 2)
+                                             (const_int 31))
+                                (match_dup 4))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*subdi_zesidi_zesidi"
+  [(set (match_operand:DI            0 "s_register_operand" "=r")
+	(minus:DI (zero_extend:DI
+		   (match_operand:SI 1 "s_register_operand"  "r"))
+		  (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"   ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+       operands[3] = gen_highpart (SImode, operands[0]);
+       operands[0] = gen_lowpart (SImode, operands[0]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "subsi3"
+  [(set (match_operand:SI           0 "s_register_operand" "")
+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "")
+		  (match_operand:SI 2 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (CONST_INT_P (operands[1]))
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (MINUS, SImode, NULL_RTX,
+	                      INTVAL (operands[1]), operands[0],
+	  		      operands[2], optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        operands[1] = force_reg (SImode, operands[1]);
+    }
+  "
+)
+
+(define_insn "thumb1_subsi3_insn"
+  [(set (match_operand:SI           0 "register_operand" "=l")
+	(minus:SI (match_operand:SI 1 "register_operand" "l")
+		  (match_operand:SI 2 "reg_or_int_operand" "lPd")))]
+  "TARGET_THUMB1"
+  "sub\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")
+   (set_attr "type" "alus_reg")]
+)
+
+; ??? Check Thumb-2 split length
+(define_insn_and_split "*arm_subsi3_insn"
+  [(set (match_operand:SI           0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r")
+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n")
+		  (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))]
+  "TARGET_32BIT"
+  "@
+   sub%?\\t%0, %1, %2
+   sub%?\\t%0, %2
+   sub%?\\t%0, %1, %2
+   rsb%?\\t%0, %2, %1
+   rsb%?\\t%0, %2, %1
+   sub%?\\t%0, %1, %2
+   sub%?\\t%0, %1, %2
+   sub%?\\t%0, %1, %2
+   #"
+  "&& (CONST_INT_P (operands[1])
+       && !const_ok_for_arm (INTVAL (operands[1])))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant (MINUS, SImode, curr_insn,
+                      INTVAL (operands[1]), operands[0], operands[2], 0);
+  DONE;
+  "
+  [(set_attr "length" "4,4,4,4,4,4,4,4,16")
+   (set_attr "arch" "t2,t2,t2,t2,*,*,*,*,*")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no")
+   (set_attr "type" "alu_reg,alu_reg,alu_reg,alu_reg,alu_imm,alu_imm,alu_reg,alu_reg,multiple")]
+)
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (set (match_operand:SI 0 "arm_general_register_operand" "")
+	(minus:SI (match_operand:SI 1 "const_int_operand" "")
+		  (match_operand:SI 2 "arm_general_register_operand" "")))]
+  "TARGET_32BIT
+   && !const_ok_for_arm (INTVAL (operands[1]))
+   && const_ok_for_arm (~INTVAL (operands[1]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 0) (minus:SI (match_dup 3) (match_dup 2)))]
+  ""
+)
+
+(define_insn "*subsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
+		   (match_operand:SI 2 "arm_rhs_operand" "I,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   sub%.\\t%0, %1, %2
+   sub%.\\t%0, %1, %2
+   rsb%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type"  "alus_imm,alus_reg,alus_reg")]
+)
+
+(define_insn "subsi3_compare"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
+		    (match_operand:SI 2 "arm_rhs_operand" "I,r,r")))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   sub%.\\t%0, %1, %2
+   sub%.\\t%0, %1, %2
+   rsb%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm,alus_reg,alus_reg")]
+)
+
+(define_expand "subsf3"
+  [(set (match_operand:SF           0 "s_register_operand" "")
+	(minus:SF (match_operand:SF 1 "s_register_operand" "")
+		  (match_operand:SF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+")
+
+(define_expand "subdf3"
+  [(set (match_operand:DF           0 "s_register_operand" "")
+	(minus:DF (match_operand:DF 1 "s_register_operand" "")
+		  (match_operand:DF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+")
+
+
+;; Multiplication insns
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI 0 "s_register_operand" "")
+	(mult:HI (match_operand:HI 1 "s_register_operand" "")
+		 (match_operand:HI 2 "s_register_operand" "")))]
+  "TARGET_DSP_MULTIPLY"
+  "
+  {
+    rtx result = gen_reg_rtx (SImode);
+    emit_insn (gen_mulhisi3 (result, operands[1], operands[2]));
+    emit_move_insn (operands[0], gen_lowpart (HImode, result));
+    DONE;
+  }"
+)
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "")
+	(mult:SI (match_operand:SI 2 "s_register_operand" "")
+		 (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+;; Use `&' and then `0' to prevent the operands 0 and 1 being the same
+(define_insn "*arm_mulsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "=&r,&r")
+	(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
+		 (match_operand:SI 1 "s_register_operand" "%0,r")))]
+  "TARGET_32BIT && !arm_arch6"
+  "mul%?\\t%0, %2, %1"
+  [(set_attr "type" "mul")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_mulsi3_v6"
+  [(set (match_operand:SI          0 "s_register_operand" "=l,l,r")
+	(mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r")
+		 (match_operand:SI 2 "s_register_operand" "l,0,r")))]
+  "TARGET_32BIT && arm_arch6"
+  "mul%?\\t%0, %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*")
+   (set_attr "length" "4")
+   (set_attr "predicable_short_it" "yes,yes,no")]
+)
+
+; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
+; 1 and 2; are the same, because reload will make operand 0 match 
+; operand 1 without realizing that this conflicts with operand 2.  We fix 
+; this by adding another alternative to match this case, and then `reload' 
+; it ourselves.  This alternative must come first.
+(define_insn "*thumb_mulsi3"
+  [(set (match_operand:SI          0 "register_operand" "=&l,&l,&l")
+	(mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0")
+		 (match_operand:SI 2 "register_operand" "l,l,l")))]
+  "TARGET_THUMB1 && !arm_arch6"
+  "*
+  if (which_alternative < 2)
+    return \"mov\\t%0, %1\;mul\\t%0, %2\";
+  else
+    return \"mul\\t%0, %2\";
+  "
+  [(set_attr "length" "4,4,2")
+   (set_attr "type" "muls")]
+)
+
+(define_insn "*thumb_mulsi3_v6"
+  [(set (match_operand:SI          0 "register_operand" "=l,l,l")
+	(mult:SI (match_operand:SI 1 "register_operand" "0,l,0")
+		 (match_operand:SI 2 "register_operand" "l,0,0")))]
+  "TARGET_THUMB1 && arm_arch6"
+  "@
+   mul\\t%0, %2
+   mul\\t%0, %1
+   mul\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "muls")]
+)
+
+(define_insn "*mulsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 1 "s_register_operand" "%0,r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+	(mult:SI (match_dup 2) (match_dup 1)))]
+  "TARGET_ARM && !arm_arch6"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "muls")]
+)
+
+(define_insn "*mulsi3_compare0_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (match_dup 2) (match_dup 1)))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "muls")]
+)
+
+(define_insn "*mulsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 1 "s_register_operand" "%0,r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=&r,&r"))]
+  "TARGET_ARM && !arm_arch6"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "muls")]
+)
+
+(define_insn "*mulsi_compare0_scratch_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "muls")]
+)
+
+;; Unnamed templates to match MLA instruction.
+
+(define_insn "*mulsi3addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
+	(plus:SI
+	  (mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
+	  (match_operand:SI 3 "s_register_operand" "r,r,0,0")))]
+  "TARGET_32BIT && !arm_arch6"
+  "mla%?\\t%0, %2, %1, %3"
+  [(set_attr "type" "mla")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsi3addsi_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI
+	  (mult:SI (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))
+	  (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_32BIT && arm_arch6"
+  "mla%?\\t%0, %2, %1, %3"
+  [(set_attr "type" "mla")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "*mulsi3addsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
+		  (match_operand:SI 3 "s_register_operand" "r,r,0,0"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
+	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
+		 (match_dup 3)))]
+  "TARGET_ARM && arm_arch6"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "type" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))
+		  (match_operand:SI 3 "s_register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
+		 (match_dup 3)))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "type" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
+		  (match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
+  "TARGET_ARM && !arm_arch6"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "type" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_scratch_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))
+		  (match_operand:SI 3 "s_register_operand" "r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "type" "mlas")]
+)
+
+(define_insn "*mulsi3subsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI
+	  (match_operand:SI 3 "s_register_operand" "r")
+	  (mult:SI (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "mls%?\\t%0, %2, %1, %3"
+  [(set_attr "type" "mla")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_expand "maddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
+
+(define_insn "*mulsidi3adddi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
+	  (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "smlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "type" "smlal")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsidi3adddi_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))
+	  (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch6"
+  "smlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "type" "smlal")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+;; 32x32->64 widening multiply.
+;; As with mulsi3, the only difference between the v3-5 and v6+
+;; versions of these patterns is the requirement that the output not
+;; overlap the inputs, but that still means we have to have a named
+;; expander and two different starred insns.
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))))]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*mulsidi3_nov6"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
+	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "smull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "smull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsidi3_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch6"
+  "smull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "smull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))))]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*umulsidi3_nov6"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
+	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "umull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "umull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsidi3_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch6"
+  "umull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "umull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_expand "umaddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
+
+(define_insn "*umulsidi3adddi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
+	  (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "umlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "type" "umlal")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsidi3adddi_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))
+	  (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch6"
+  "umlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "type" "umlal")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	     (sign_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	    (const_int 32))))
+     (clobber (match_scratch:SI 3 ""))])]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*smulsi3_highpart_nov6"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r"))
+	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "smull%?\\t%3, %0, %2, %1"
+  [(set_attr "type" "smull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*smulsi3_highpart_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_32BIT && arm_arch6"
+  "smull%?\\t%3, %0, %2, %1"
+  [(set_attr "type" "smull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	      (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	    (const_int 32))))
+     (clobber (match_scratch:SI 3 ""))])]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*umulsi3_highpart_nov6"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r"))
+	   (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "umull%?\\t%3, %0, %2, %1"
+  [(set_attr "type" "umull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsi3_highpart_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_32BIT && arm_arch6"
+  "umull%?\\t%3, %0, %2, %1"
+  [(set_attr "type" "umull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "s_register_operand" "%r"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "s_register_operand" "r"))))]
+  "TARGET_DSP_MULTIPLY"
+  "smulbb%?\\t%0, %1, %2"
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulhisi3tb"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "s_register_operand" "r")
+		  (const_int 16))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "s_register_operand" "r"))))]
+  "TARGET_DSP_MULTIPLY"
+  "smultb%?\\t%0, %1, %2"
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "*mulhisi3bt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "s_register_operand" "r"))
+		 (ashiftrt:SI
+		  (match_operand:SI 2 "s_register_operand" "r")
+		  (const_int 16))))]
+  "TARGET_DSP_MULTIPLY"
+  "smulbt%?\\t%0, %1, %2"
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "*mulhisi3tt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "s_register_operand" "r")
+		  (const_int 16))
+		 (ashiftrt:SI
+		  (match_operand:SI 2 "s_register_operand" "r")
+		  (const_int 16))))]
+  "TARGET_DSP_MULTIPLY"
+  "smultt%?\\t%0, %1, %2"
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "maddhisi4"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (sign_extend:SI
+			   (match_operand:HI 1 "s_register_operand" "r"))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "s_register_operand" "r")))
+		 (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlabb%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+;; Note: there is no maddhisi4ibt because this one is canonical form
+(define_insn "*maddhisi4tb"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI
+			   (match_operand:SI 1 "s_register_operand" "r")
+			   (const_int 16))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "s_register_operand" "r")))
+		 (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlatb%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "*maddhisi4tt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI
+			   (match_operand:SI 1 "s_register_operand" "r")
+			   (const_int 16))
+			  (ashiftrt:SI
+			   (match_operand:SI 2 "s_register_operand" "r")
+			   (const_int 16)))
+		 (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlatt%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_insn "maddhidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	  (mult:DI (sign_extend:DI
+		    (match_operand:HI 1 "s_register_operand" "r"))
+		   (sign_extend:DI
+		    (match_operand:HI 2 "s_register_operand" "r")))
+	  (match_operand:DI 3 "s_register_operand" "0")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlalbb%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "smlalxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+;; Note: there is no maddhidi4ibt because this one is canonical form
+(define_insn "*maddhidi4tb"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	  (mult:DI (sign_extend:DI
+		    (ashiftrt:SI
+		     (match_operand:SI 1 "s_register_operand" "r")
+		     (const_int 16)))
+		   (sign_extend:DI
+		    (match_operand:HI 2 "s_register_operand" "r")))
+	  (match_operand:DI 3 "s_register_operand" "0")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlaltb%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "smlalxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*maddhidi4tt"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	  (mult:DI (sign_extend:DI
+		    (ashiftrt:SI
+		     (match_operand:SI 1 "s_register_operand" "r")
+		     (const_int 16)))
+		   (sign_extend:DI
+		    (ashiftrt:SI
+		     (match_operand:SI 2 "s_register_operand" "r")
+		     (const_int 16))))
+	  (match_operand:DI 3 "s_register_operand" "0")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlaltt%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "type" "smlalxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF          0 "s_register_operand" "")
+	(mult:SF (match_operand:SF 1 "s_register_operand" "")
+		 (match_operand:SF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+")
+
+(define_expand "muldf3"
+  [(set (match_operand:DF          0 "s_register_operand" "")
+	(mult:DF (match_operand:DF 1 "s_register_operand" "")
+		 (match_operand:DF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+")
+
+;; Division insns
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(div:SF (match_operand:SF 1 "s_register_operand" "")
+		(match_operand:SF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "")
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(div:DF (match_operand:DF 1 "s_register_operand" "")
+		(match_operand:DF 2 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "")
+
+;; Boolean and,ior,xor insns
+
+;; Split up double word logical operations
+
+;; Split up simple DImode logical operations.  Simply perform the logical
+;; operation on the upper and lower halves of the registers.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(match_operator:DI 6 "logical_binary_operator"
+	  [(match_operand:DI 1 "s_register_operand" "")
+	   (match_operand:DI 2 "s_register_operand" "")]))]
+  "TARGET_32BIT && reload_completed
+   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))
+   && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
+   (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+)
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(match_operator:DI 6 "logical_binary_operator"
+	  [(sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))
+	   (match_operand:DI 1 "s_register_operand" "")]))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
+   (set (match_dup 3) (match_op_dup:SI 6
+			[(ashiftrt:SI (match_dup 2) (const_int 31))
+			 (match_dup 4)]))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+)
+
+;; The zero extend of operand 2 means we can just copy the high part of
+;; operand1 into operand0.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(ior:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))
+	  (match_operand:DI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && operands[0] != operands[1] && reload_completed"
+  [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))]
+  "
+  {
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+)
+
+;; The zero extend of operand 2 means we can just copy the high part of
+;; operand1 into operand0.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(xor:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))
+	  (match_operand:DI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && operands[0] != operands[1] && reload_completed"
+  [(set (match_dup 0) (xor:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))]
+  "
+  {
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+)
+
+(define_expand "anddi3"
+  [(set (match_operand:DI         0 "s_register_operand" "")
+	(and:DI (match_operand:DI 1 "s_register_operand" "")
+		(match_operand:DI 2 "neon_inv_logic_op2" "")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn_and_split "*anddi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand"     "=w,w ,&r,&r,&r,&r,?w,?w")
+        (and:DI (match_operand:DI 1 "s_register_operand"     "%w,0 ,0 ,r ,0 ,r ,w ,0")
+                (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+{
+  switch (which_alternative)
+    {
+    case 0: /* fall through */
+    case 6: return "vand\t%P0, %P1, %P2";
+    case 1: /* fall through */
+    case 7: return neon_output_logic_immediate ("vand", &operands[2],
+                    DImode, 1, VALID_NEON_QREG_MODE (DImode));
+    case 2:
+    case 3:
+    case 4:
+    case 5: /* fall through */
+      return "#";
+    default: gcc_unreachable ();
+    }
+}
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (AND, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (AND, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "type" "neon_logic,neon_logic,multiple,multiple,\
+                     multiple,multiple,neon_logic,neon_logic")
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
+                     avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "length" "*,*,8,8,8,8,*,*")
+  ]
+)
+
+(define_insn_and_split "*anddi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (zero_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  ; The zero extend of operand 2 clears the high word of the output
+  ; operand.
+  [(set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (const_int 0))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*anddi_sesdi_di"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
+	(and:DI (sign_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI  1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(and:SI (match_operand:SI 1 "s_register_operand" "")
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (CONST_INT_P (operands[2]))
+        {
+	  if (INTVAL (operands[2]) == 255 && arm_arch6)
+	    {
+	      operands[1] = convert_to_mode (QImode, operands[1], 1);
+	      emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0],
+							 operands[1]));
+	    }
+	  else
+	    arm_split_constant (AND, SImode, NULL_RTX,
+				INTVAL (operands[2]), operands[0],
+				operands[1],
+				optimize && can_create_pseudo_p ());
+
+          DONE;
+        }
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (!CONST_INT_P (operands[2]))
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+      else
+        {
+          int i;
+	  
+          if (((unsigned HOST_WIDE_INT) ~INTVAL (operands[2])) < 256)
+  	    {
+	      operands[2] = force_reg (SImode,
+				       GEN_INT (~INTVAL (operands[2])));
+	      
+	      emit_insn (gen_thumb1_bicsi3 (operands[0], operands[2], operands[1]));
+	      
+	      DONE;
+	    }
+
+          for (i = 9; i <= 31; i++)
+	    {
+	      if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (operands[2]))
+	        {
+	          emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i),
+			 	        const0_rtx));
+	          DONE;
+	        }
+	      else if ((((HOST_WIDE_INT) 1) << i) - 1
+		       == ~INTVAL (operands[2]))
+	        {
+	          rtx shift = GEN_INT (i);
+	          rtx reg = gen_reg_rtx (SImode);
+		
+	          emit_insn (gen_lshrsi3 (reg, operands[1], shift));
+	          emit_insn (gen_ashlsi3 (operands[0], reg, shift));
+		  
+	          DONE;
+	        }
+	    }
+
+          operands[2] = force_reg (SImode, operands[2]);
+        }
+    }
+  "
+)
+
+; ??? Check split length for Thumb-2
+(define_insn_and_split "*arm_andsi3_insn"
+  [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
+	(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
+  "TARGET_32BIT"
+  "@
+   and%?\\t%0, %1, %2
+   and%?\\t%0, %1, %2
+   bic%?\\t%0, %1, #%B2
+   and%?\\t%0, %1, %2
+   #"
+  "TARGET_32BIT
+   && CONST_INT_P (operands[2])
+   && !(const_ok_for_arm (INTVAL (operands[2]))
+	|| const_ok_for_arm (~INTVAL (operands[2])))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant  (AND, SImode, curr_insn, 
+	               INTVAL (operands[2]), operands[0], operands[1], 0);
+  DONE;
+  "
+  [(set_attr "length" "4,4,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no,yes,no,no,no")
+   (set_attr "type" "logic_imm,logic_imm,logic_reg,logic_reg,logic_imm")]
+)
+
+(define_insn "*thumb1_andsi3_insn"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(and:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "and\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "type"  "logic_imm")
+   (set_attr "conds" "set")])
+
+(define_insn "*andsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
+		 (match_operand:SI 2 "arm_not_operand" "I,K,r"))
+	 (const_int 0)))
+   (set (match_operand:SI          0 "s_register_operand" "=r,r,r")
+	(and:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   and%.\\t%0, %1, %2
+   bic%.\\t%0, %1, #%B2
+   and%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_imm,logics_imm,logics_reg")]
+)
+
+(define_insn "*andsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (match_operand:SI 0 "s_register_operand" "r,r,r")
+		 (match_operand:SI 1 "arm_not_operand" "I,K,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 2 "=X,r,X"))]
+  "TARGET_32BIT"
+  "@
+   tst%?\\t%0, %1
+   bic%.\\t%2, %0, #%B1
+   tst%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type"  "logics_imm,logics_imm,logics_reg")]
+)
+
+(define_insn "*zeroextractsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (zero_extract:SI
+			  (match_operand:SI 0 "s_register_operand" "r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+			 (const_int 0)))]
+  "TARGET_32BIT
+  && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32
+      && INTVAL (operands[1]) > 0 
+      && INTVAL (operands[1]) + (INTVAL (operands[2]) & 1) <= 8
+      && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32)"
+  "*
+  operands[1] = GEN_INT (((1 << INTVAL (operands[1])) - 1)
+			 << INTVAL (operands[2]));
+  output_asm_insn (\"tst%?\\t%0, %1\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logics_imm")]
+)
+
+(define_insn_and_split "*ne_zeroextractsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ne:SI (zero_extract:SI
+		(match_operand:SI 1 "s_register_operand" "r")
+		(match_operand:SI 2 "const_int_operand" "n")
+		(match_operand:SI 3 "const_int_operand" "n"))
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)"
+  "#"
+  "TARGET_32BIT
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (const_int 1)))]
+  "
+  operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1)
+			 << INTVAL (operands[3])); 
+  "
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 12)
+		      (const_int 8)))
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*ne_zeroextractsi_shifted"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ne:SI (zero_extract:SI
+		(match_operand:SI 1 "s_register_operand" "r")
+		(match_operand:SI 2 "const_int_operand" "n")
+		(const_int 0))
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (const_int 1)))]
+  "
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*ite_ne_zeroextractsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (ne (zero_extract:SI
+			      (match_operand:SI 1 "s_register_operand" "r")
+			      (match_operand:SI 2 "const_int_operand" "n")
+			      (match_operand:SI 3 "const_int_operand" "n"))
+			     (const_int 0))
+			 (match_operand:SI 4 "arm_not_operand" "rIK")
+			 (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  "#"
+  "TARGET_ARM
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (match_dup 4)))]
+  "
+  operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1)
+			 << INTVAL (operands[3])); 
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*ite_ne_zeroextractsi_shifted"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (ne (zero_extract:SI
+			      (match_operand:SI 1 "s_register_operand" "r")
+			      (match_operand:SI 2 "const_int_operand" "n")
+			      (const_int 0))
+			     (const_int 0))
+			 (match_operand:SI 3 "arm_not_operand" "rIK")
+			 (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  "#"
+  "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (match_dup 3)))]
+  "
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_THUMB1"
+  [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 4) (match_dup 3)))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[2]);
+
+     operands[2] = GEN_INT (32 - temp - INTVAL (operands[3]));
+     operands[3] = GEN_INT (32 - temp);
+   }"
+)
+
+;; ??? Use Thumb-2 has bitfield insert/extract instructions.
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operand:SI 5 "s_register_operand" "")]))
+   (clobber (match_operand:SI 6 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(lshiftrt:SI (match_dup 6) (match_dup 4))
+	  (match_dup 5)]))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[3]);
+
+     operands[3] = GEN_INT (32 - temp - INTVAL (operands[4]));
+     operands[4] = GEN_INT (32 - temp);
+   }"
+)
+  
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_THUMB1"
+  [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 3)))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[2]);
+
+     operands[2] = GEN_INT (32 - temp - INTVAL (operands[3]));
+     operands[3] = GEN_INT (32 - temp);
+   }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operand:SI 5 "s_register_operand" "")]))
+   (clobber (match_operand:SI 6 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(ashiftrt:SI (match_dup 6) (match_dup 4))
+	  (match_dup 5)]))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[3]);
+
+     operands[3] = GEN_INT (32 - temp - INTVAL (operands[4]));
+     operands[4] = GEN_INT (32 - temp);
+   }"
+)
+  
+;;; ??? This pattern is bogus.  If operand3 has bits outside the range
+;;; represented by the bitfield, then this will produce incorrect results.
+;;; Somewhere, the value needs to be truncated.  On targets like the m68k,
+;;; which have a real bit-field insert instruction, the truncation happens
+;;; in the bit-field insert instruction itself.  Since arm does not have a
+;;; bit-field insert instruction, we would have to emit code here to truncate
+;;; the value before we insert.  This loses some of the advantage of having
+;;; this insv pattern, so this pattern needs to be reevalutated.
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+                      (match_operand 1 "general_operand" "")
+                      (match_operand 2 "general_operand" ""))
+        (match_operand 3 "reg_or_int_operand" ""))]
+  "TARGET_ARM || arm_arch_thumb2"
+  "
+  {
+    int start_bit = INTVAL (operands[2]);
+    int width = INTVAL (operands[1]);
+    HOST_WIDE_INT mask = (((HOST_WIDE_INT)1) << width) - 1;
+    rtx target, subtarget;
+
+    if (arm_arch_thumb2)
+      {
+        if (unaligned_access && MEM_P (operands[0])
+	    && s_register_operand (operands[3], GET_MODE (operands[3]))
+	    && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
+	  {
+	    rtx base_addr;
+
+	    if (BYTES_BIG_ENDIAN)
+	      start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
+			  - start_bit;
+
+	    if (width == 32)
+	      {
+	        base_addr = adjust_address (operands[0], SImode,
+					    start_bit / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
+	      }
+	    else
+	      {
+	        rtx tmp = gen_reg_rtx (HImode);
+
+	        base_addr = adjust_address (operands[0], HImode,
+					    start_bit / BITS_PER_UNIT);
+		emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+		emit_insn (gen_unaligned_storehi (base_addr, tmp));
+	      }
+	    DONE;
+	  }
+	else if (s_register_operand (operands[0], GET_MODE (operands[0])))
+	  {
+	    bool use_bfi = TRUE;
+
+	    if (CONST_INT_P (operands[3]))
+	      {
+		HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+		if (val == 0)
+		  {
+		    emit_insn (gen_insv_zero (operands[0], operands[1],
+					      operands[2]));
+		    DONE;
+		  }
+
+		/* See if the set can be done with a single orr instruction.  */
+		if (val == mask && const_ok_for_arm (val << start_bit))
+		  use_bfi = FALSE;
+	      }
+
+	    if (use_bfi)
+	      {
+		if (!REG_P (operands[3]))
+		  operands[3] = force_reg (SImode, operands[3]);
+
+		emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+					operands[3]));
+		DONE;
+	      }
+	  }
+	else
+	  FAIL;
+      }
+
+    if (!s_register_operand (operands[0], GET_MODE (operands[0])))
+      FAIL;
+
+    target = copy_rtx (operands[0]);
+    /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical 
+       subreg as the final target.  */
+    if (GET_CODE (target) == SUBREG)
+      {
+	subtarget = gen_reg_rtx (SImode);
+	if (GET_MODE_SIZE (GET_MODE (SUBREG_REG (target)))
+	    < GET_MODE_SIZE (SImode))
+	  target = SUBREG_REG (target);
+      }
+    else
+      subtarget = target;    
+
+    if (CONST_INT_P (operands[3]))
+      {
+	/* Since we are inserting a known constant, we may be able to
+	   reduce the number of bits that we have to clear so that
+	   the mask becomes simple.  */
+	/* ??? This code does not check to see if the new mask is actually
+	   simpler.  It may not be.  */
+	rtx op1 = gen_reg_rtx (SImode);
+	/* ??? Truncate operand3 to fit in the bitfield.  See comment before
+	   start of this pattern.  */
+	HOST_WIDE_INT op3_value = mask & INTVAL (operands[3]);
+	HOST_WIDE_INT mask2 = ((mask & ~op3_value) << start_bit);
+
+	emit_insn (gen_andsi3 (op1, operands[0],
+			       gen_int_mode (~mask2, SImode)));
+	emit_insn (gen_iorsi3 (subtarget, op1,
+			       gen_int_mode (op3_value << start_bit, SImode)));
+      }
+    else if (start_bit == 0
+	     && !(const_ok_for_arm (mask)
+		  || const_ok_for_arm (~mask)))
+      {
+	/* A Trick, since we are setting the bottom bits in the word,
+	   we can shift operand[3] up, operand[0] down, OR them together
+	   and rotate the result back again.  This takes 3 insns, and
+	   the third might be mergeable into another op.  */
+	/* The shift up copes with the possibility that operand[3] is
+           wider than the bitfield.  */
+	rtx op0 = gen_reg_rtx (SImode);
+	rtx op1 = gen_reg_rtx (SImode);
+
+	emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width)));
+	emit_insn (gen_lshrsi3 (op1, operands[0], operands[1]));
+	emit_insn (gen_iorsi3  (op1, op1, op0));
+	emit_insn (gen_rotlsi3 (subtarget, op1, operands[1]));
+      }
+    else if ((width + start_bit == 32)
+	     && !(const_ok_for_arm (mask)
+		  || const_ok_for_arm (~mask)))
+      {
+	/* Similar trick, but slightly less efficient.  */
+
+	rtx op0 = gen_reg_rtx (SImode);
+	rtx op1 = gen_reg_rtx (SImode);
+
+	emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width)));
+	emit_insn (gen_ashlsi3 (op1, operands[0], operands[1]));
+	emit_insn (gen_lshrsi3 (op1, op1, operands[1]));
+	emit_insn (gen_iorsi3 (subtarget, op1, op0));
+      }
+    else
+      {
+	rtx op0 = gen_int_mode (mask, SImode);
+	rtx op1 = gen_reg_rtx (SImode);
+	rtx op2 = gen_reg_rtx (SImode);
+
+	if (!(const_ok_for_arm (mask) || const_ok_for_arm (~mask)))
+	  {
+	    rtx tmp = gen_reg_rtx (SImode);
+
+	    emit_insn (gen_movsi (tmp, op0));
+	    op0 = tmp;
+	  }
+
+	/* Mask out any bits in operand[3] that are not needed.  */
+	   emit_insn (gen_andsi3 (op1, operands[3], op0));
+
+	if (CONST_INT_P (op0)
+	    && (const_ok_for_arm (mask << start_bit)
+		|| const_ok_for_arm (~(mask << start_bit))))
+	  {
+	    op0 = gen_int_mode (~(mask << start_bit), SImode);
+	    emit_insn (gen_andsi3 (op2, operands[0], op0));
+	  }
+	else
+	  {
+	    if (CONST_INT_P (op0))
+	      {
+		rtx tmp = gen_reg_rtx (SImode);
+
+		emit_insn (gen_movsi (tmp, op0));
+		op0 = tmp;
+	      }
+
+	    if (start_bit != 0)
+	      emit_insn (gen_ashlsi3 (op0, op0, operands[2]));
+	    
+	    emit_insn (gen_andsi_notsi_si (op2, operands[0], op0));
+	  }
+
+	if (start_bit != 0)
+          emit_insn (gen_ashlsi3 (op1, op1, operands[2]));
+
+	emit_insn (gen_iorsi3 (subtarget, op1, op2));
+      }
+
+    if (subtarget != target)
+      {
+	/* If TARGET is still a SUBREG, then it must be wider than a word,
+	   so we must be careful only to set the subword we were asked to.  */
+	if (GET_CODE (target) == SUBREG)
+	  emit_move_insn (target, subtarget);
+	else
+	  emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
+      }
+
+    DONE;
+  }"
+)
+
+(define_insn "insv_zero"
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
+                         (match_operand:SI 1 "const_int_operand" "M")
+                         (match_operand:SI 2 "const_int_operand" "M"))
+        (const_int 0))]
+  "arm_arch_thumb2"
+  "bfc%?\t%0, %2, %1"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
+)
+
+(define_insn "insv_t2"
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
+                         (match_operand:SI 1 "const_int_operand" "M")
+                         (match_operand:SI 2 "const_int_operand" "M"))
+        (match_operand:SI 3 "s_register_operand" "r"))]
+  "arm_arch_thumb2"
+  "bfi%?\t%0, %3, %2, %1"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
+)
+
+; constants for op 2 will never be given to these patterns.
+(define_insn_and_split "*anddi_notdi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r"))
+		(match_operand:DI 2 "s_register_operand" "r,0")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed
+   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))
+   && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+   (set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*anddi_notzesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (not:DI (zero_extend:DI
+			 (match_operand:SI 2 "s_register_operand" "r,r")))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  "@
+   bic%?\\t%Q0, %Q1, %2
+   #"
+  ; (not (zero_extend ...)) allows us to just copy the high word from
+  ; operand1 to operand0.
+  "TARGET_32BIT
+   && reload_completed
+   && operands[0] != operands[1]"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (match_dup 4))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*anddi_notsesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (not:DI (sign_extend:DI
+			 (match_operand:SI 2 "s_register_operand" "r,r")))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (and:SI (not:SI
+				(ashiftrt:SI (match_dup 2) (const_int 31)))
+			       (match_dup 4)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "andsi_notsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "bic%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg")]
+)
+
+(define_insn "thumb1_bicsi3"
+  [(set (match_operand:SI                 0 "register_operand" "=l")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "l"))
+		(match_operand:SI         2 "register_operand" "0")))]
+  "TARGET_THUMB1"
+  "bic\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")
+   (set_attr "type" "logics_reg")]
+)
+
+(define_insn "andsi_not_shiftsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_operator:SI 4 "shift_operator"
+			 [(match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 3 "arm_rhs_operand" "rM")]))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "bic%?\\t%0, %1, %2%S4"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "2")
+   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
+		      (const_string "logic_shift_imm")
+		      (const_string "logic_shift_reg")))]
+)
+
+(define_insn "*andsi_notsi_si_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		 (match_operand:SI 1 "s_register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_dup 2)) (match_dup 1)))]
+  "TARGET_32BIT"
+  "bic%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_shift_reg")]
+)
+
+(define_insn "*andsi_notsi_si_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		 (match_operand:SI 1 "s_register_operand" "r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_32BIT"
+  "bic%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_shift_reg")]
+)
+
+(define_expand "iordi3"
+  [(set (match_operand:DI         0 "s_register_operand" "")
+	(ior:DI (match_operand:DI 1 "s_register_operand" "")
+		(match_operand:DI 2 "neon_logic_op2" "")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn_and_split "*iordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand"     "=w,w ,&r,&r,&r,&r,?w,?w")
+	(ior:DI (match_operand:DI 1 "s_register_operand"     "%w,0 ,0 ,r ,0 ,r ,w ,0")
+		(match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+  {
+  switch (which_alternative)
+    {
+    case 0: /* fall through */
+    case 6: return "vorr\t%P0, %P1, %P2";
+    case 1: /* fall through */
+    case 7: return neon_output_logic_immediate ("vorr", &operands[2],
+		     DImode, 0, VALID_NEON_QREG_MODE (DImode));
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return "#";
+    default: gcc_unreachable ();
+    }
+  }
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (IOR, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (IOR, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "type" "neon_logic,neon_logic,multiple,multiple,multiple,\
+                     multiple,neon_logic,neon_logic")
+   (set_attr "length" "*,*,8,8,8,8,*,*")
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
+)
+
+(define_insn "*iordi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (zero_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  "@
+   orr%?\\t%Q0, %Q1, %2
+   #"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg,multiple")]
+)
+
+(define_insn "*iordi_sesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (sign_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(ior:SI (match_operand:SI 1 "s_register_operand" "")
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (CONST_INT_P (operands[2]))
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (IOR, SImode, NULL_RTX,
+	                      INTVAL (operands[2]), operands[0], operands[1],
+			      optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+    }
+  "
+)
+
+(define_insn_and_split "*iorsi3_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r")
+	(ior:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
+  "TARGET_32BIT"
+  "@
+   orr%?\\t%0, %1, %2
+   orr%?\\t%0, %1, %2
+   orn%?\\t%0, %1, #%B2
+   orr%?\\t%0, %1, %2
+   #"
+  "TARGET_32BIT
+   && CONST_INT_P (operands[2])
+   && !(const_ok_for_arm (INTVAL (operands[2]))
+        || (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))"
+  [(clobber (const_int 0))]
+{
+  arm_split_constant (IOR, SImode, curr_insn,
+                      INTVAL (operands[2]), operands[0], operands[1], 0);
+  DONE;
+}
+  [(set_attr "length" "4,4,4,4,16")
+   (set_attr "arch" "32,t2,t2,32,32")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no,yes,no,no,no")
+   (set_attr "type" "logic_imm,logic_reg,logic_imm,logic_reg,logic_reg")]
+)
+
+(define_insn "*thumb1_iorsi3_insn"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "orr\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")
+   (set_attr "type" "logics_reg")])
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (set (match_operand:SI 0 "arm_general_register_operand" "")
+	(ior:SI (match_operand:SI 1 "arm_general_register_operand" "")
+		(match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_ARM
+   && !const_ok_for_arm (INTVAL (operands[2]))
+   && const_ok_for_arm (~INTVAL (operands[2]))"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 1) (match_dup 3)))]
+  ""
+)
+
+(define_insn "*iorsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r")
+				 (match_operand:SI 2 "arm_rhs_operand" "I,r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(ior:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "orr%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_imm,logics_reg")]
+)
+
+(define_insn "*iorsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r")
+				 (match_operand:SI 2 "arm_rhs_operand" "I,r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "orr%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_imm,logics_reg")]
+)
+
+(define_expand "xordi3"
+  [(set (match_operand:DI         0 "s_register_operand" "")
+	(xor:DI (match_operand:DI 1 "s_register_operand" "")
+		(match_operand:DI 2 "arm_xordi_operand" "")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn_and_split "*xordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand" "=w,&r,&r,&r,&r,?w")
+	(xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w")
+		(match_operand:DI 2 "arm_xordi_operand"  "w ,r ,r ,Dg,Dg,w")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+{
+  switch (which_alternative)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:  /* fall through */
+      return "#";
+    case 0: /* fall through */
+    case 5: return "veor\t%P0, %P1, %P2";
+    default: gcc_unreachable ();
+    }
+}
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (XOR, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (XOR, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "length" "*,8,8,8,8,*")
+   (set_attr "type" "neon_logic,multiple,multiple,multiple,multiple,neon_logic")
+   (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")]
+)
+
+(define_insn "*xordi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(xor:DI (zero_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  "@
+   eor%?\\t%Q0, %Q1, %2
+   #"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg")]
+)
+
+(define_insn "*xordi_sesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(xor:DI (sign_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(xor:SI (match_operand:SI 1 "s_register_operand" "")
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "if (CONST_INT_P (operands[2]))
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (XOR, SImode, NULL_RTX,
+	                      INTVAL (operands[2]), operands[0], operands[1],
+			      optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+    }"
+)
+
+(define_insn_and_split "*arm_xorsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r")
+	(xor:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "I,l,r,?n")))]
+  "TARGET_32BIT"
+  "@
+   eor%?\\t%0, %1, %2
+   eor%?\\t%0, %1, %2
+   eor%?\\t%0, %1, %2
+   #"
+  "TARGET_32BIT
+   && CONST_INT_P (operands[2])
+   && !const_ok_for_arm (INTVAL (operands[2]))"
+  [(clobber (const_int 0))]
+{
+  arm_split_constant (XOR, SImode, curr_insn,
+                      INTVAL (operands[2]), operands[0], operands[1], 0);
+  DONE;
+}
+  [(set_attr "length" "4,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no,yes,no,no")
+   (set_attr "type"  "logic_imm,logic_reg,logic_reg,multiple")]
+)
+
+(define_insn "*thumb1_xorsi3_insn"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "eor\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")
+   (set_attr "type" "logics_reg")]
+)
+
+(define_insn "*xorsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (xor:SI (match_operand:SI 1 "s_register_operand" "r,r")
+				 (match_operand:SI 2 "arm_rhs_operand" "I,r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(xor:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "eor%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_imm,logics_reg")]
+)
+
+(define_insn "*xorsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (xor:SI (match_operand:SI 0 "s_register_operand" "r,r")
+				 (match_operand:SI 1 "arm_rhs_operand" "I,r"))
+			 (const_int 0)))]
+  "TARGET_32BIT"
+  "teq%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_imm,logics_reg")]
+)
+
+; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), 
+; (NOT D) we can sometimes merge the final NOT into one of the following
+; insns.
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ior:SI (and:SI (not:SI (match_operand:SI 1 "s_register_operand" ""))
+			(not:SI (match_operand:SI 2 "arm_rhs_operand" "")))
+		(match_operand:SI 3 "arm_rhs_operand" "")))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_32BIT"
+  [(set (match_dup 4) (and:SI (ior:SI (match_dup 1) (match_dup 2))
+			      (not:SI (match_dup 3))))
+   (set (match_dup 0) (not:SI (match_dup 4)))]
+  ""
+)
+
+(define_insn_and_split "*andsi_iorsi3_notsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
+	(and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
+			(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))
+		(not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))]
+  "TARGET_32BIT"
+  "#"   ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
+  "&& reload_completed"
+  [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "ce_count" "2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
+; ??? Are these four splitters still beneficial when the Thumb-2 bitfield
+; insns are available?
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operator:SI 9 "logical_binary_operator"
+	   [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(lshiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(match_operator:SI 9 "logical_binary_operator"
+	   [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])
+	  (zero_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(lshiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operator:SI 9 "logical_binary_operator"
+	   [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(ashiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(match_operator:SI 9 "logical_binary_operator"
+	   [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])
+	  (sign_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(ashiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+
+;; Minimum and maximum insns
+
+(define_expand "smaxsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (smax:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  "
+  if (operands[2] == const0_rtx || operands[2] == constm1_rtx)
+    {
+      /* No need for a clobber of the condition code register here.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_SMAX (SImode, operands[1],
+					    operands[2])));
+      DONE;
+    }
+")
+
+(define_insn "*smax_0"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(smax:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (const_int 0)))]
+  "TARGET_32BIT"
+  "bic%?\\t%0, %1, %1, asr #31"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_shift_reg")]
+)
+
+(define_insn "*smax_m1"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(smax:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (const_int -1)))]
+  "TARGET_32BIT"
+  "orr%?\\t%0, %1, %1, asr #31"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_shift_reg")]
+)
+
+(define_insn_and_split "*arm_smax_insn"
+  [(set (match_operand:SI          0 "s_register_operand" "=r,r")
+	(smax:SI (match_operand:SI 1 "s_register_operand"  "%0,?r")
+		 (match_operand:SI 2 "arm_rhs_operand"    "rI,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+   ; cmp\\t%1, %2\;movlt\\t%0, %2
+   ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
+  "TARGET_ARM"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0))
+                         (match_dup 1)
+                         (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "sminsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (smin:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  "
+  if (operands[2] == const0_rtx)
+    {
+      /* No need for a clobber of the condition code register here.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_SMIN (SImode, operands[1],
+					    operands[2])));
+      DONE;
+    }
+")
+
+(define_insn "*smin_0"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(smin:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (const_int 0)))]
+  "TARGET_32BIT"
+  "and%?\\t%0, %1, %1, asr #31"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_shift_reg")]
+)
+
+(define_insn_and_split "*arm_smin_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+    ; cmp\\t%1, %2\;movge\\t%0, %2
+    ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
+  "TARGET_ARM"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0))
+                         (match_dup 1)
+                         (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple,multiple")]
+)
+
+(define_expand "umaxsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (umax:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn_and_split "*arm_umaxsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+    ; cmp\\t%1, %2\;movcc\\t%0, %2
+    ; cmp\\t%1, %2\;movcs\\t%0, %1
+    ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
+  "TARGET_ARM"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+                         (match_dup 1)
+                         (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,8,12")
+   (set_attr "type" "store1")]
+)
+
+(define_expand "uminsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (umin:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn_and_split "*arm_uminsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+   ; cmp\\t%1, %2\;movcs\\t%0, %2
+   ; cmp\\t%1, %2\;movcc\\t%0, %1
+   ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
+  "TARGET_ARM"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
+                         (match_dup 1)
+                         (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,8,12")
+   (set_attr "type" "store1")]
+)
+
+(define_insn "*store_minmaxsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(match_operator:SI 3 "minmax_operator"
+	 [(match_operand:SI 1 "s_register_operand" "r")
+	  (match_operand:SI 2 "s_register_operand" "r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && optimize_function_for_size_p (cfun)"
+  "*
+  operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
+				operands[1], operands[2]);
+  output_asm_insn (\"cmp\\t%1, %2\", operands);
+  if (TARGET_THUMB2)
+    output_asm_insn (\"ite\t%d3\", operands);
+  output_asm_insn (\"str%d3\\t%1, %0\", operands);
+  output_asm_insn (\"str%D3\\t%2, %0\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 14)
+		      (const_int 12)))
+   (set_attr "type" "store1")]
+)
+
+; Reject the frame pointer in operand[1], since reloading this after
+; it has been eliminated can cause carnage.
+(define_insn "*minmax_arithsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_operator:SI 4 "shiftable_operator"
+	 [(match_operator:SI 5 "minmax_operator"
+	   [(match_operand:SI 2 "s_register_operand" "r,r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+	  (match_operand:SI 1 "s_register_operand" "0,?r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !arm_eliminable_register (operands[1]) && !arm_restrict_it"
+  "*
+  {
+    enum rtx_code code = GET_CODE (operands[4]);
+    bool need_else;
+
+    if (which_alternative != 0 || operands[3] != const0_rtx
+        || (code != PLUS && code != IOR && code != XOR))
+      need_else = true;
+    else
+      need_else = false;
+
+    operands[5] = gen_rtx_fmt_ee (minmax_code (operands[5]), SImode,
+				  operands[2], operands[3]);
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (TARGET_THUMB2)
+      {
+	if (need_else)
+	  output_asm_insn (\"ite\\t%d5\", operands);
+	else
+	  output_asm_insn (\"it\\t%d5\", operands);
+      }
+    output_asm_insn (\"%i4%d5\\t%0, %1, %2\", operands);
+    if (need_else)
+      output_asm_insn (\"%i4%D5\\t%0, %1, %3\", operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 14)
+		      (const_int 12)))
+   (set_attr "type" "multiple")]
+)
+
+; Reject the frame pointer in operand[1], since reloading this after
+; it has been eliminated can cause carnage.
+(define_insn_and_split "*minmax_arithsi_non_canon"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts")
+	(minus:SI
+	 (match_operand:SI 1 "s_register_operand" "0,?Ts")
+	  (match_operator:SI 4 "minmax_operator"
+	   [(match_operand:SI 2 "s_register_operand" "Ts,Ts")
+	    (match_operand:SI 3 "arm_rhs_operand" "TsI,TsI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !arm_eliminable_register (operands[1])
+   && !(arm_restrict_it && CONST_INT_P (operands[3]))"
+  "#"
+  "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 2) (match_dup 3)))
+
+   (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)])
+              (set (match_dup 0)
+                   (minus:SI (match_dup 1)
+                             (match_dup 2))))
+   (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)])
+              (set (match_dup 0)
+                   (match_dup 6)))]
+  {
+  enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+                                           operands[2], operands[3]);
+  enum rtx_code rc = minmax_code (operands[4]);
+  operands[4] = gen_rtx_fmt_ee (rc, VOIDmode,
+                                operands[2], operands[3]);
+
+  if (mode == CCFPmode || mode == CCFPEmode)
+    rc = reverse_condition_maybe_unordered (rc);
+  else
+    rc = reverse_condition (rc);
+  operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]);
+  if (CONST_INT_P (operands[3]))
+    operands[6] = plus_constant (SImode, operands[1], -INTVAL (operands[3]));
+  else
+    operands[6] = gen_rtx_MINUS (SImode, operands[1], operands[3]);
+  }
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 14)
+		      (const_int 12)))
+   (set_attr "type" "multiple")]
+)
+
+(define_code_iterator SAT [smin smax])
+(define_code_iterator SATrev [smin smax])
+(define_code_attr SATlo [(smin "1") (smax "2")])
+(define_code_attr SAThi [(smin "2") (smax "1")])
+
+(define_insn "*satsi_<SAT:code>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (SAT:SI (SATrev:SI (match_operand:SI 3 "s_register_operand" "r")
+                           (match_operand:SI 1 "const_int_operand" "i"))
+                (match_operand:SI 2 "const_int_operand" "i")))]
+  "TARGET_32BIT && arm_arch6 && <SAT:CODE> != <SATrev:CODE>
+   && arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>], NULL, NULL)"
+{
+  int mask;
+  bool signed_sat;
+  if (!arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>],
+                               &mask, &signed_sat))
+    gcc_unreachable ();
+
+  operands[1] = GEN_INT (mask);
+  if (signed_sat)
+    return "ssat%?\t%0, %1, %3";
+  else
+    return "usat%?\t%0, %1, %3";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alus_imm")]
+)
+
+(define_insn "*satsi_<SAT:code>_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (SAT:SI (SATrev:SI (match_operator:SI 3 "sat_shift_operator"
+                             [(match_operand:SI 4 "s_register_operand" "r")
+                              (match_operand:SI 5 "const_int_operand" "i")])
+                           (match_operand:SI 1 "const_int_operand" "i"))
+                (match_operand:SI 2 "const_int_operand" "i")))]
+  "TARGET_32BIT && arm_arch6 && <SAT:CODE> != <SATrev:CODE>
+   && arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>], NULL, NULL)"
+{
+  int mask;
+  bool signed_sat;
+  if (!arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>],
+                               &mask, &signed_sat))
+    gcc_unreachable ();
+
+  operands[1] = GEN_INT (mask);
+  if (signed_sat)
+    return "ssat%?\t%0, %1, %4%S3";
+  else
+    return "usat%?\t%0, %1, %4%S3";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "shift" "3")
+   (set_attr "type" "logic_shift_reg")])
+
+;; Shift and rotation insns
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI            0 "s_register_operand" "")
+        (ashift:DI (match_operand:DI 1 "s_register_operand" "")
+                   (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (TARGET_NEON)
+    {
+      /* Delay the decision whether to use NEON or core-regs until
+	 register allocation.  */
+      emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    {
+      /* Only the NEON case can handle in-memory shift counts.  */
+      if (!reg_or_int_operand (operands[2], SImode))
+        operands[2] = force_reg (SImode, operands[2]);
+    }
+
+  if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
+    ; /* No special preparation statements; expand pattern as above.  */
+  else
+    {
+      rtx scratch1, scratch2;
+
+      if (CONST_INT_P (operands[2])
+	  && (HOST_WIDE_INT) INTVAL (operands[2]) == 1)
+        {
+          emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
+          DONE;
+        }
+
+      /* Ideally we should use iwmmxt here if we could know that operands[1]
+         ends up already living in an iwmmxt register. Otherwise it's
+         cheaper to have the alternate code being generated than moving
+         values to iwmmxt regs and back.  */
+
+      /* If we're optimizing for size, we prefer the libgcc calls.  */
+      if (optimize_function_for_size_p (cfun))
+	FAIL;
+
+      /* Expand operation using core-registers.
+	 'FAIL' would achieve the same thing, but this is a bit smarter.  */
+      scratch1 = gen_reg_rtx (SImode);
+      scratch2 = gen_reg_rtx (SImode);
+      arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
+				     operands[2], scratch1, scratch2);
+      DONE;
+    }
+  "
+)
+
+(define_insn "arm_ashldi3_1bit"
+  [(set (match_operand:DI            0 "s_register_operand" "=r,&r")
+        (ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
+                   (const_int 1)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI            0 "s_register_operand" "")
+	(ashift:SI (match_operand:SI 1 "s_register_operand" "")
+		   (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (CONST_INT_P (operands[2])
+      && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+    {
+      emit_insn (gen_movsi (operands[0], const0_rtx));
+      DONE;
+    }
+  "
+)
+
+(define_insn "*thumb1_ashlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=l,l")
+	(ashift:SI (match_operand:SI 1 "register_operand" "l,0")
+		   (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1"
+  "lsl\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "type" "shift_imm,shift_reg")
+   (set_attr "conds" "set")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI              0 "s_register_operand" "")
+        (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "")
+                     (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (TARGET_NEON)
+    {
+      /* Delay the decision whether to use NEON or core-regs until
+	 register allocation.  */
+      emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
+    ; /* No special preparation statements; expand pattern as above.  */
+  else
+    {
+      rtx scratch1, scratch2;
+
+      if (CONST_INT_P (operands[2])
+	  && (HOST_WIDE_INT) INTVAL (operands[2]) == 1)
+        {
+          emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+          DONE;
+        }
+
+      /* Ideally we should use iwmmxt here if we could know that operands[1]
+         ends up already living in an iwmmxt register. Otherwise it's
+         cheaper to have the alternate code being generated than moving
+         values to iwmmxt regs and back.  */
+
+      /* If we're optimizing for size, we prefer the libgcc calls.  */
+      if (optimize_function_for_size_p (cfun))
+	FAIL;
+
+      /* Expand operation using core-registers.
+	 'FAIL' would achieve the same thing, but this is a bit smarter.  */
+      scratch1 = gen_reg_rtx (SImode);
+      scratch2 = gen_reg_rtx (SImode);
+      arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
+				     operands[2], scratch1, scratch2);
+      DONE;
+    }
+  "
+)
+
+(define_insn "arm_ashrdi3_1bit"
+  [(set (match_operand:DI              0 "s_register_operand" "=r,&r")
+        (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
+                     (const_int 1)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (CONST_INT_P (operands[2])
+      && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+    operands[2] = GEN_INT (31);
+  "
+)
+
+(define_insn "*thumb1_ashrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=l,l")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
+		     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1"
+  "asr\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "type" "shift_imm,shift_reg")
+   (set_attr "conds" "set")])
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI              0 "s_register_operand" "")
+        (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "")
+                     (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (TARGET_NEON)
+    {
+      /* Delay the decision whether to use NEON or core-regs until
+	 register allocation.  */
+      emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
+    ; /* No special preparation statements; expand pattern as above.  */
+  else
+    {
+      rtx scratch1, scratch2;
+
+      if (CONST_INT_P (operands[2])
+	  && (HOST_WIDE_INT) INTVAL (operands[2]) == 1)
+        {
+          emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1]));
+          DONE;
+        }
+
+      /* Ideally we should use iwmmxt here if we could know that operands[1]
+         ends up already living in an iwmmxt register. Otherwise it's
+         cheaper to have the alternate code being generated than moving
+         values to iwmmxt regs and back.  */
+
+      /* If we're optimizing for size, we prefer the libgcc calls.  */
+      if (optimize_function_for_size_p (cfun))
+	FAIL;
+
+      /* Expand operation using core-registers.
+	 'FAIL' would achieve the same thing, but this is a bit smarter.  */
+      scratch1 = gen_reg_rtx (SImode);
+      scratch2 = gen_reg_rtx (SImode);
+      arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
+				     operands[2], scratch1, scratch2);
+      DONE;
+    }
+  "
+)
+
+(define_insn "arm_lshrdi3_1bit"
+  [(set (match_operand:DI              0 "s_register_operand" "=r,&r")
+        (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
+                     (const_int 1)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (CONST_INT_P (operands[2])
+      && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+    {
+      emit_insn (gen_movsi (operands[0], const0_rtx));
+      DONE;
+    }
+  "
+)
+
+(define_insn "*thumb1_lshrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=l,l")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
+		     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1"
+  "lsr\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "type" "shift_imm,shift_reg")
+   (set_attr "conds" "set")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32);
+  else
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2]));
+      operands[2] = reg;
+    }
+  "
+)
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (CONST_INT_P (operands[2])
+          && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+        operands[2] = GEN_INT (INTVAL (operands[2]) % 32);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (CONST_INT_P (operands [2]))
+        operands [2] = force_reg (SImode, operands[2]);
+    }
+  "
+)
+
+(define_insn "*thumb1_rotrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=l")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "ror\\t%0, %0, %2"
+  [(set_attr "type" "shift_reg")
+   (set_attr "length" "2")]
+)
+
+(define_insn "*arm_shiftsi3"
+  [(set (match_operand:SI   0 "s_register_operand" "=l,l,r,r")
+	(match_operator:SI  3 "shift_operator"
+	 [(match_operand:SI 1 "s_register_operand"  "0,l,r,r")
+	  (match_operand:SI 2 "reg_or_int_operand" "l,M,M,r")]))]
+  "TARGET_32BIT"
+  "* return arm_output_shift(operands, 0);"
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*,*")
+   (set_attr "predicable_short_it" "yes,yes,no,no")
+   (set_attr "length" "4")
+   (set_attr "shift" "1")
+   (set_attr "type" "alu_shift_reg,alu_shift_imm,alu_shift_imm,alu_shift_reg")]
+)
+
+(define_insn "*shiftsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r,r")
+			   (match_operand:SI 2 "arm_rhs_operand" "M,r")])
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
+  "TARGET_32BIT"
+  "* return arm_output_shift(operands, 1);"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")]
+)
+
+(define_insn "*shiftsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r,r")
+			   (match_operand:SI 2 "arm_rhs_operand" "M,r")])
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "* return arm_output_shift(operands, 1);"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "type" "shift_imm,shift_reg")]
+)
+
+(define_insn "*not_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(not:SI (match_operator:SI 3 "shift_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r,r")
+		  (match_operand:SI 2 "shift_amount_operand" "M,rM")])))]
+  "TARGET_32BIT"
+  "mvn%?\\t%0, %1%S3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "mvn_shift,mvn_shift_reg")])
+
+(define_insn "*not_shiftsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (not:SI (match_operator:SI 3 "shift_operator"
+		  [(match_operand:SI 1 "s_register_operand" "r,r")
+		   (match_operand:SI 2 "shift_amount_operand" "M,rM")]))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "mvn_shift,mvn_shift_reg")])
+
+(define_insn "*not_shiftsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (not:SI (match_operator:SI 3 "shift_operator"
+		  [(match_operand:SI 1 "s_register_operand" "r,r")
+		   (match_operand:SI 2 "shift_amount_operand" "M,rM")]))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "mvn_shift,mvn_shift_reg")])
+
+;; We don't really have extzv, but defining this using shifts helps
+;; to reduce register pressure later on.
+
+(define_expand "extzv"
+  [(set (match_operand 0 "s_register_operand" "")
+	(zero_extract (match_operand 1 "nonimmediate_operand" "")
+		      (match_operand 2 "const_int_operand" "")
+		      (match_operand 3 "const_int_operand" "")))]
+  "TARGET_THUMB1 || arm_arch_thumb2"
+  "
+  {
+    HOST_WIDE_INT lshift = 32 - INTVAL (operands[2]) - INTVAL (operands[3]);
+    HOST_WIDE_INT rshift = 32 - INTVAL (operands[2]);
+    
+    if (arm_arch_thumb2)
+      {
+	HOST_WIDE_INT width = INTVAL (operands[2]);
+	HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+	if (unaligned_access && MEM_P (operands[1])
+	    && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
+	  {
+	    rtx base_addr;
+
+	    if (BYTES_BIG_ENDIAN)
+	      bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
+		       - bitpos;
+
+	    if (width == 32)
+              {
+		base_addr = adjust_address (operands[1], SImode,
+					    bitpos / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+              }
+	    else
+              {
+		rtx dest = operands[0];
+		rtx tmp = gen_reg_rtx (SImode);
+
+		/* We may get a paradoxical subreg here.  Strip it off.  */
+		if (GET_CODE (dest) == SUBREG
+		    && GET_MODE (dest) == SImode
+		    && GET_MODE (SUBREG_REG (dest)) == HImode)
+		  dest = SUBREG_REG (dest);
+
+		if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+		  FAIL;
+
+		base_addr = adjust_address (operands[1], HImode,
+					    bitpos / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
+		emit_move_insn (gen_lowpart (SImode, dest), tmp);
+	      }
+	    DONE;
+	  }
+	else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+	  {
+	    emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+				     operands[3]));
+	    DONE;
+	  }
+	else
+	  FAIL;
+      }
+    
+    if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+      FAIL;
+
+    operands[3] = GEN_INT (rshift);
+    
+    if (lshift == 0)
+      {
+        emit_insn (gen_lshrsi3 (operands[0], operands[1], operands[3]));
+        DONE;
+      }
+      
+    emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
+			     operands[3], gen_reg_rtx (SImode)));
+    DONE;
+  }"
+)
+
+;; Helper for extzv, for the Thumb-1 register-shifts case.
+
+(define_expand "extzv_t1"
+  [(set (match_operand:SI 4 "s_register_operand" "")
+	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 0 "s_register_operand" "")
+	(lshiftrt:SI (match_dup 4)
+		     (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_THUMB1"
+  "")
+
+(define_expand "extv"
+  [(set (match_operand 0 "s_register_operand" "")
+	(sign_extract (match_operand 1 "nonimmediate_operand" "")
+		      (match_operand 2 "const_int_operand" "")
+		      (match_operand 3 "const_int_operand" "")))]
+  "arm_arch_thumb2"
+{
+  HOST_WIDE_INT width = INTVAL (operands[2]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+  if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
+      && (bitpos % BITS_PER_UNIT)  == 0)
+    {
+      rtx base_addr;
+      
+      if (BYTES_BIG_ENDIAN)
+	bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
+      
+      if (width == 32)
+        {
+	  base_addr = adjust_address (operands[1], SImode,
+				      bitpos / BITS_PER_UNIT);
+	  emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+        }
+      else
+        {
+	  rtx dest = operands[0];
+	  rtx tmp = gen_reg_rtx (SImode);
+	  
+	  /* We may get a paradoxical subreg here.  Strip it off.  */
+	  if (GET_CODE (dest) == SUBREG
+	      && GET_MODE (dest) == SImode
+	      && GET_MODE (SUBREG_REG (dest)) == HImode)
+	    dest = SUBREG_REG (dest);
+	  
+	  if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+	    FAIL;
+	  
+	  base_addr = adjust_address (operands[1], HImode,
+				      bitpos / BITS_PER_UNIT);
+	  emit_insn (gen_unaligned_loadhis (tmp, base_addr));
+	  emit_move_insn (gen_lowpart (SImode, dest), tmp);
+	}
+
+      DONE;
+    }
+  else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+    FAIL;
+  else if (GET_MODE (operands[0]) == SImode
+	   && GET_MODE (operands[1]) == SImode)
+    {
+      emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
+				 operands[3]));
+      DONE;
+    }
+
+  FAIL;
+})
+
+; Helper to expand register forms of extv with the proper modes.
+
+(define_expand "extv_regsi"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  ""
+{
+})
+
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
+
+(define_insn "unaligned_loadsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
+		   UNSPEC_UNALIGNED_LOAD))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%?\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "load1")])
+
+(define_insn "unaligned_loadhis"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(sign_extend:SI
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+		     UNSPEC_UNALIGNED_LOAD)))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%(sh%)\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_loadhiu"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(zero_extend:SI
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+		     UNSPEC_UNALIGNED_LOAD)))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%(h%)\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_storesi"
+  [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
+	(unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "str%?\t%1, %0\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "store1")])
+
+(define_insn "unaligned_storehi"
+  [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
+	(unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "str%(h%)\t%1, %0\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "store1")])
+
+;; Unaligned double-word load and store.
+;; Split after reload into two unaligned single-word accesses.
+;; It prevents lower_subreg from splitting some other aligned
+;; double-word accesses too early. Used for internal memcpy.
+
+(define_insn_and_split "unaligned_loaddi"
+  [(set (match_operand:DI 0 "s_register_operand" "=l,r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
+		   UNSPEC_UNALIGNED_LOAD))]
+  "unaligned_access && TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
+   (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+
+    /* If the first destination register overlaps with the base address,
+       swap the order in which the loads are emitted.  */
+    if (reg_overlap_mentioned_p (operands[0], operands[1]))
+      {
+        rtx tmp = operands[1];
+        operands[1] = operands[3];
+        operands[3] = tmp;
+        tmp = operands[0];
+        operands[0] = operands[2];
+        operands[2] = tmp;
+      }
+  }
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "4,8")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load2")])
+
+(define_insn_and_split "unaligned_storedi"
+  [(set (match_operand:DI 0 "memory_operand" "=o,o")
+	(unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
+   (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "4,8")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "store2")])
+
+
+(define_insn "*extv_reg"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                         (match_operand:SI 2 "const_int_operand" "M")
+                         (match_operand:SI 3 "const_int_operand" "M")))]
+  "arm_arch_thumb2"
+  "sbfx%?\t%0, %1, %3, %2"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
+)
+
+(define_insn "extzv_t2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                         (match_operand:SI 2 "const_int_operand" "M")
+                         (match_operand:SI 3 "const_int_operand" "M")))]
+  "arm_arch_thumb2"
+  "ubfx%?\t%0, %1, %3, %2"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
+)
+
+
+;; Division instructions
+(define_insn "divsi3"
+  [(set (match_operand:SI	  0 "s_register_operand" "=r")
+	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
+		(match_operand:SI 2 "s_register_operand"  "r")))]
+  "TARGET_IDIV"
+  "sdiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "sdiv")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI	   0 "s_register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
+		 (match_operand:SI 2 "s_register_operand"  "r")))]
+  "TARGET_IDIV"
+  "udiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "udiv")]
+)
+
+
+;; Unary arithmetic insns
+
+(define_expand "negdi2"
+ [(parallel
+   [(set (match_operand:DI 0 "s_register_operand" "")
+	 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_EITHER"
+  {
+    if (TARGET_NEON)
+      {
+        emit_insn (gen_negdi2_neon (operands[0], operands[1]));
+	DONE;
+      }
+  }
+)
+
+;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1).
+;; The first alternative allows the common case of a *full* overlap.
+(define_insn_and_split "*arm_negdi2"
+  [(set (match_operand:DI         0 "s_register_operand" "=r,&r")
+	(neg:DI (match_operand:DI 1 "s_register_operand"  "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"   ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (const_int 0) (match_dup 1)))
+	      (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
+   (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
+                                (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb1_negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=&l")
+	(neg:DI (match_operand:DI 1 "register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB1"
+  "mov\\t%R0, #0\;neg\\t%Q0, %Q1\;sbc\\t%R0, %R1"
+  [(set_attr "length" "6")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "negsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(neg:SI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+(define_insn "*arm_negsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r")
+	(neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))]
+  "TARGET_32BIT"
+  "rsb%?\\t%0, %1, #0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "4")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "*thumb1_negsi2"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(neg:SI (match_operand:SI 1 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "neg\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "alu_imm")]
+)
+
+(define_expand "negsf2"
+  [(set (match_operand:SF         0 "s_register_operand" "")
+	(neg:SF (match_operand:SF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  ""
+)
+
+(define_expand "negdf2"
+  [(set (match_operand:DF         0 "s_register_operand" "")
+	(neg:DF (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "")
+
+(define_insn_and_split "*zextendsidi_negsi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (zero_extend:DI (neg:SI (match_operand:SI 1 "s_register_operand" "r"))))]
+   "TARGET_32BIT"
+   "#"
+   ""
+   [(set (match_dup 2)
+         (neg:SI (match_dup 1)))
+    (set (match_dup 3)
+         (const_int 0))]
+   {
+      operands[2] = gen_lowpart (SImode, operands[0]);
+      operands[3] = gen_highpart (SImode, operands[0]);
+   }
+ [(set_attr "length" "8")
+  (set_attr "type" "multiple")]
+)
+
+;; Negate an extended 32-bit value.
+(define_insn_and_split "*negdi_extendsidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=l,r")
+	(neg:DI (sign_extend:DI
+		 (match_operand:SI 1 "s_register_operand" "l,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    rtx low = gen_lowpart (SImode, operands[0]);
+    rtx high = gen_highpart (SImode, operands[0]);
+
+    if (reg_overlap_mentioned_p (low, operands[1]))
+      {
+	/* Input overlaps the low word of the output.  Use:
+		asr	Rhi, Rin, #31
+		rsbs	Rlo, Rin, #0
+		rsc	Rhi, Rhi, #0 (thumb2: sbc Rhi, Rhi, Rhi, lsl #1).  */
+	rtx cc_reg = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+
+	emit_insn (gen_rtx_SET (VOIDmode, high,
+				gen_rtx_ASHIFTRT (SImode, operands[1],
+						  GEN_INT (31))));
+
+	emit_insn (gen_subsi3_compare (low, const0_rtx, operands[1]));
+	if (TARGET_ARM)
+	  emit_insn (gen_rtx_SET (VOIDmode, high,
+				  gen_rtx_MINUS (SImode,
+						 gen_rtx_MINUS (SImode,
+								const0_rtx,
+								high),
+						 gen_rtx_LTU (SImode,
+							      cc_reg,
+							      const0_rtx))));
+	else
+	  {
+	    rtx two_x = gen_rtx_ASHIFT (SImode, high, GEN_INT (1));
+	    emit_insn (gen_rtx_SET (VOIDmode, high,
+				    gen_rtx_MINUS (SImode,
+						   gen_rtx_MINUS (SImode,
+								  high,
+								  two_x),
+						   gen_rtx_LTU (SImode,
+								cc_reg,
+								const0_rtx))));
+	  }
+      }
+    else
+      {
+	/* No overlap, or overlap on high word.  Use:
+		rsb	Rlo, Rin, #0
+		bic	Rhi, Rlo, Rin
+		asr	Rhi, Rhi, #31
+	   Flags not needed for this sequence.  */
+	emit_insn (gen_rtx_SET (VOIDmode, low,
+				gen_rtx_NEG (SImode, operands[1])));
+	emit_insn (gen_rtx_SET (VOIDmode, high,
+				gen_rtx_AND (SImode,
+					     gen_rtx_NOT (SImode, operands[1]),
+					     low)));
+	emit_insn (gen_rtx_SET (VOIDmode, high,
+				gen_rtx_ASHIFTRT (SImode, high,
+						  GEN_INT (31))));
+      }
+    DONE;
+  }
+  [(set_attr "length" "12")
+   (set_attr "arch" "t2,*")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*negdi_zero_extendsidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r,&r")
+	(neg:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#" ; "rsbs\\t%Q0, %1, #0\;sbc\\t%R0,%R0,%R0"
+      ;; Don't care what register is input to sbc,
+      ;; since we just just need to propagate the carry.
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+                   (compare:CC (const_int 0) (match_dup 1)))
+              (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
+   (set (match_dup 2) (minus:SI (minus:SI (match_dup 2) (match_dup 2))
+                                (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]   ;; length in thumb is 4
+)
+
+;; abssi2 doesn't really clobber the condition codes if a different register
+;; is being set.  To keep things simple, assume during rtl manipulations that
+;; it does, but tell the final scan operator the truth.  Similarly for
+;; (neg (abs...))
+
+(define_expand "abssi2"
+  [(parallel
+    [(set (match_operand:SI         0 "s_register_operand" "")
+	  (abs:SI (match_operand:SI 1 "s_register_operand" "")))
+     (clobber (match_dup 2))])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_THUMB1)
+    operands[2] = gen_rtx_SCRATCH (SImode);
+  else
+    operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
+")
+
+(define_insn_and_split "*arm_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+   /* if (which_alternative == 0) */
+   if (REGNO(operands[0]) == REGNO(operands[1]))
+     {
+      /* Emit the pattern:
+         cmp\\t%0, #0\;rsblt\\t%0, %0, #0
+         [(set (reg:CC CC_REGNUM)
+               (compare:CC (match_dup 0) (const_int 0)))
+          (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0))
+                     (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))]
+      */
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              gen_rtx_REG (CCmode, CC_REGNUM),
+                              gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
+      emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                    (gen_rtx_LT (SImode,
+                                                 gen_rtx_REG (CCmode, CC_REGNUM),
+                                                 const0_rtx)),
+                                    (gen_rtx_SET (VOIDmode,
+                                                  operands[0],
+                                                  (gen_rtx_MINUS (SImode,
+                                                                  const0_rtx,
+                                                                  operands[1]))))));
+      DONE;
+     }
+   else
+     {
+      /* Emit the pattern:
+         alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
+         [(set (match_dup 0)
+               (xor:SI (match_dup 1)
+                       (ashiftrt:SI (match_dup 1) (const_int 31))))
+          (set (match_dup 0)
+               (minus:SI (match_dup 0)
+                      (ashiftrt:SI (match_dup 1) (const_int 31))))]
+      */
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              operands[0],
+                              gen_rtx_XOR (SImode,
+                                           gen_rtx_ASHIFTRT (SImode,
+                                                             operands[1],
+                                                             GEN_INT (31)),
+                                           operands[1])));
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              operands[0],
+                              gen_rtx_MINUS (SImode,
+                                             operands[0],
+                                             gen_rtx_ASHIFTRT (SImode,
+                                                               operands[1],
+                                                               GEN_INT (31)))));
+      DONE;
+     }
+  }
+  [(set_attr "conds" "clob,*")
+   (set_attr "shift" "1")
+   (set_attr "predicable" "no, yes")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb1_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "l")))
+   (clobber (match_scratch:SI 2 "=&l"))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1 && reload_completed"
+  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))]
+  ""
+  [(set_attr "length" "6")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*arm_neg_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+   /* if (which_alternative == 0) */
+   if (REGNO (operands[0]) == REGNO (operands[1]))
+     {
+      /* Emit the pattern:
+         cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
+      */
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              gen_rtx_REG (CCmode, CC_REGNUM),
+                              gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
+      emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                    gen_rtx_GT (SImode,
+                                                gen_rtx_REG (CCmode, CC_REGNUM),
+                                                const0_rtx),
+                                    gen_rtx_SET (VOIDmode,
+                                                 operands[0],
+                                                 (gen_rtx_MINUS (SImode,
+                                                                 const0_rtx,
+                                                                 operands[1])))));
+     }
+   else
+     {
+      /* Emit the pattern:
+         eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31
+      */
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              operands[0],
+                              gen_rtx_XOR (SImode,
+                                           gen_rtx_ASHIFTRT (SImode,
+                                                             operands[1],
+                                                             GEN_INT (31)),
+                                           operands[1])));
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              operands[0],
+                              gen_rtx_MINUS (SImode,
+                                             gen_rtx_ASHIFTRT (SImode,
+                                                               operands[1],
+                                                               GEN_INT (31)),
+                                             operands[0])));
+     }
+   DONE;
+  }
+  [(set_attr "conds" "clob,*")
+   (set_attr "shift" "1")
+   (set_attr "predicable" "no, yes")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb1_neg_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "l"))))
+   (clobber (match_scratch:SI 2 "=&l"))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1 && reload_completed"
+  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
+   (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))]
+  ""
+  [(set_attr "length" "6")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "abssf2"
+  [(set (match_operand:SF         0 "s_register_operand" "")
+	(abs:SF (match_operand:SF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "")
+
+(define_expand "absdf2"
+  [(set (match_operand:DF         0 "s_register_operand" "")
+	(abs:DF (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "")
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "")
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(sqrt:DF (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "")
+
+(define_insn_and_split "one_cmpldi2"
+  [(set (match_operand:DI 0 "s_register_operand"	 "=w,&r,&r,?w")
+	(not:DI (match_operand:DI 1 "s_register_operand" " w, 0, r, w")))]
+  "TARGET_32BIT"
+  "@
+   vmvn\t%P0, %P1
+   #
+   #
+   vmvn\t%P0, %P1"
+  "TARGET_32BIT && reload_completed
+   && arm_general_register_operand (operands[0], DImode)"
+  [(set (match_dup 0) (not:SI (match_dup 1)))
+   (set (match_dup 2) (not:SI (match_dup 3)))]
+  "
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "*,8,8,*")
+   (set_attr "predicable" "no,yes,yes,no")
+   (set_attr "type" "neon_move,multiple,multiple,neon_move")
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
+)
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(not:SI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+(define_insn "*arm_one_cmplsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r")
+	(not:SI (match_operand:SI 1 "s_register_operand"  "l,r")))]
+  "TARGET_32BIT"
+  "mvn%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "4")
+   (set_attr "type" "mvn_reg")]
+)
+
+(define_insn "*thumb1_one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(not:SI (match_operand:SI 1 "register_operand"  "l")))]
+  "TARGET_THUMB1"
+  "mvn\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "mvn_reg")]
+)
+
+(define_insn "*notsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI (match_dup 1)))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "mvn_reg")]
+)
+
+(define_insn "*notsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "mvn_reg")]
+)
+
+;; Fixed <--> Floating conversion insns
+
+(define_expand "floatsihf2"
+  [(set (match_operand:HF           0 "general_operand" "")
+	(float:HF (match_operand:SI 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+(define_expand "floatdihf2"
+  [(set (match_operand:HF           0 "general_operand" "")
+	(float:HF (match_operand:DI 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+(define_expand "floatsisf2"
+  [(set (match_operand:SF           0 "s_register_operand" "")
+	(float:SF (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+")
+
+(define_expand "floatsidf2"
+  [(set (match_operand:DF           0 "s_register_operand" "")
+	(float:DF (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+")
+
+(define_expand "fix_trunchfsi2"
+  [(set (match_operand:SI         0 "general_operand" "")
+	(fix:SI (fix:HF (match_operand:HF 1 "general_operand"  ""))))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = convert_to_mode (SFmode, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  }"
+)
+
+(define_expand "fix_trunchfdi2"
+  [(set (match_operand:DI         0 "general_operand" "")
+	(fix:DI (fix:HF (match_operand:HF 1 "general_operand"  ""))))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = convert_to_mode (SFmode, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  }"
+)
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand"  ""))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+")
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand"  ""))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+")
+
+;; Truncation insns
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF  0 "s_register_operand" "")
+	(float_truncate:SF
+ 	 (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  ""
+)
+
+/* DFmode -> HFmode conversions have to go through SFmode.  */
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF  0 "general_operand" "")
+	(float_truncate:HF
+ 	 (match_operand:DF 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1;
+    op1 = convert_to_mode (SFmode, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+;; Zero and sign extension instructions.
+
+(define_insn "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,w")
+        (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
+					    "<qhs_zextenddi_cstr>")))]
+  "TARGET_32BIT <qhs_zextenddi_cond>"
+  "#"
+  [(set_attr "length" "8,4,8,8")
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
+   (set_attr "ce_count" "2")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple,mov_reg,multiple,multiple")]
+)
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,?r,w")
+        (sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
+					    "<qhs_extenddi_cstr>")))]
+  "TARGET_32BIT <qhs_sextenddi_cond>"
+  "#"
+  [(set_attr "length" "8,4,8,8,8")
+   (set_attr "ce_count" "2")
+   (set_attr "shift" "1")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")
+   (set_attr "type" "multiple,mov_reg,multiple,multiple,multiple")]
+)
+
+;; Splits for all extensions to DImode
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+        (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
+  "TARGET_32BIT && reload_completed && !IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx lo_part = gen_lowpart (SImode, operands[0]);
+  enum machine_mode src_mode = GET_MODE (operands[1]);
+
+  if (REG_P (operands[0])
+      && !reg_overlap_mentioned_p (operands[0], operands[1]))
+    emit_clobber (operands[0]);
+  if (!REG_P (lo_part) || src_mode != SImode
+      || !rtx_equal_p (lo_part, operands[1]))
+    {
+      if (src_mode == SImode)
+        emit_move_insn (lo_part, operands[1]);
+      else
+        emit_insn (gen_rtx_SET (VOIDmode, lo_part,
+				gen_rtx_ZERO_EXTEND (SImode, operands[1])));
+      operands[1] = lo_part;
+    }
+  operands[0] = gen_highpart (SImode, operands[0]);
+  operands[1] = const0_rtx;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+        (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
+  "TARGET_32BIT && reload_completed && !IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
+{
+  rtx lo_part = gen_lowpart (SImode, operands[0]);
+  enum machine_mode src_mode = GET_MODE (operands[1]);
+
+  if (REG_P (operands[0])
+      && !reg_overlap_mentioned_p (operands[0], operands[1]))
+    emit_clobber (operands[0]);
+
+  if (!REG_P (lo_part) || src_mode != SImode
+      || !rtx_equal_p (lo_part, operands[1]))
+    {
+      if (src_mode == SImode)
+        emit_move_insn (lo_part, operands[1]);
+      else
+        emit_insn (gen_rtx_SET (VOIDmode, lo_part,
+				gen_rtx_SIGN_EXTEND (SImode, operands[1])));
+      operands[1] = lo_part;
+    }
+  operands[0] = gen_highpart (SImode, operands[0]);
+})
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_EITHER"
+{
+  if (TARGET_ARM && !arm_arch4 && MEM_P (operands[1]))
+    {
+      emit_insn (gen_movhi_bytes (operands[0], operands[1]));
+      DONE;
+    }
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16)));
+      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (16)));
+      DONE;
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "s_register_operand" "")))]
+  "!TARGET_THUMB2 && !arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = gen_lowpart (SImode, operands[1]);
+})
+
+(define_insn "*thumb1_zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))]
+  "TARGET_THUMB1"
+{
+  rtx mem;
+
+  if (which_alternative == 0 && arm_arch6)
+    return "uxth\t%0, %1";
+  if (which_alternative == 0)
+    return "#";
+
+  mem = XEXP (operands[1], 0);
+
+  if (GET_CODE (mem) == CONST)
+    mem = XEXP (mem, 0);
+    
+  if (GET_CODE (mem) == PLUS)
+    {
+      rtx a = XEXP (mem, 0);
+
+      /* This can happen due to bugs in reload.  */
+      if (REG_P (a) && REGNO (a) == SP_REGNUM)
+        {
+          rtx ops[2];
+          ops[0] = operands[0];
+          ops[1] = a;
+      
+          output_asm_insn ("mov\t%0, %1", ops);
+
+          XEXP (mem, 0) = operands[0];
+       }
+    }
+    
+  return "ldrh\t%0, %1";
+}
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "is_arch6" "yes")
+				       (const_int 2) (const_int 4))
+			 (const_int 4)])
+   (set_attr "type" "extend,load_byte")]
+)
+
+(define_insn "*arm_zero_extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch4 && !arm_arch6"
+  "@
+   #
+   ldr%(h%)\\t%0, %1"
+  [(set_attr "type" "alu_shift_reg,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendhisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch6"
+  "@
+   uxth%?\\t%0, %1
+   ldr%(h%)\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend,load_byte")]
+)
+
+(define_insn "*arm_zero_extendhisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (zero_extend:SI (match_operand:HI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "uxtah%?\\t%0, %2, %1"
+  [(set_attr "type" "alu_shift_reg")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "TARGET_EITHER"
+{
+  if (TARGET_ARM && !arm_arch6 && !MEM_P (operands[1]))
+    {
+      emit_insn (gen_andsi3 (operands[0],
+			     gen_lowpart (SImode, operands[1]),
+					  GEN_INT (255)));
+      DONE;
+    }
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24)));
+      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (24)));
+      DONE;
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "s_register_operand" "")))]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+  if (TARGET_ARM)
+    {
+      emit_insn (gen_andsi3 (operands[0], operands[2], GEN_INT (255)));
+      DONE;
+    }
+})
+
+(define_insn "*thumb1_zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))]
+  "TARGET_THUMB1 && !arm_arch6"
+  "@
+   #
+   ldrb\\t%0, %1"
+  [(set_attr "length" "4,2")
+   (set_attr "type" "alu_shift_reg,load_byte")
+   (set_attr "pool_range" "*,32")]
+)
+
+(define_insn "*thumb1_zero_extendqisi2_v6"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))]
+  "TARGET_THUMB1 && arm_arch6"
+  "@
+   uxtb\\t%0, %1
+   ldrb\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "extend,load_byte")]
+)
+
+(define_insn "*arm_zero_extendqisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && !arm_arch6"
+  "@
+   #
+   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "alu_shift_reg,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendqisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch6"
+  "@
+   uxtb%(%)\\t%0, %1
+   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set_attr "type" "extend,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendqisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (zero_extend:SI (match_operand:QI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "uxtab%?\\t%0, %2, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_shift_reg")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 0)))
+   (clobber (match_operand:SI 2 "s_register_operand" ""))]
+  "TARGET_32BIT && (!MEM_P (operands[1])) && ! BYTES_BIG_ENDIAN"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 3)))
+   (clobber (match_operand:SI 2 "s_register_operand" ""))]
+  "TARGET_32BIT && (!MEM_P (operands[1])) && BYTES_BIG_ENDIAN"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))]
+  ""
+)
+
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ior_xor:SI (and:SI (ashift:SI
+			     (match_operand:SI 1 "s_register_operand" "")
+			     (match_operand:SI 2 "const_int_operand" ""))
+			    (match_operand:SI 3 "const_int_operand" ""))
+		    (zero_extend:SI
+		     (match_operator 5 "subreg_lowpart_operator"
+		      [(match_operand:SI 4 "s_register_operand" "")]))))]
+  "TARGET_32BIT
+   && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
+       == (GET_MODE_MASK (GET_MODE (operands[5]))
+           & (GET_MODE_MASK (GET_MODE (operands[5]))
+	      << (INTVAL (operands[2])))))"
+  [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
+				  (match_dup 4)))
+   (set (match_dup 0) (zero_extend:SI (match_dup 5)))]
+  "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"
+)
+
+(define_insn "*compareqi_eq0"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (match_operand:QI 0 "s_register_operand" "r")
+			 (const_int 0)))]
+  "TARGET_32BIT"
+  "tst%?\\t%0, #255"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_imm")]
+)
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_EITHER"
+{
+  if (TARGET_THUMB1)
+    {
+      emit_insn (gen_thumb1_extendhisi2 (operands[0], operands[1]));
+      DONE;
+    }
+  if (MEM_P (operands[1]) && TARGET_ARM && !arm_arch4)
+    {
+      emit_insn (gen_extendhisi2_mem (operands[0], operands[1]));
+      DONE;
+    }
+
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16)));
+      emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (16)));
+      DONE;
+    }
+})
+
+(define_split
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" "")))
+     (clobber (match_scratch:SI 2 ""))])]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+})
+
+;; We used to have an early-clobber on the scratch register here.
+;; However, there's a bug somewhere in reload which means that this
+;; can be partially ignored during spill allocation if the memory
+;; address also needs reloading; this causes us to die later on when
+;; we try to verify the operands.  Fortunately, we don't really need
+;; the early-clobber: we can always use operand 0 if operand 2
+;; overlaps the address.
+(define_insn "thumb1_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))
+   (clobber (match_scratch:SI 2 "=X,l"))]
+  "TARGET_THUMB1"
+  "*
+  {
+    rtx ops[4];
+    rtx mem;
+
+    if (which_alternative == 0 && !arm_arch6)
+      return \"#\";
+    if (which_alternative == 0)
+      return \"sxth\\t%0, %1\";
+
+    mem = XEXP (operands[1], 0);
+
+    /* This code used to try to use 'V', and fix the address only if it was
+       offsettable, but this fails for e.g. REG+48 because 48 is outside the
+       range of QImode offsets, and offsettable_address_p does a QImode
+       address check.  */
+       
+    if (GET_CODE (mem) == CONST)
+      mem = XEXP (mem, 0);
+    
+    if (GET_CODE (mem) == LABEL_REF)
+      return \"ldr\\t%0, %1\";
+    
+    if (GET_CODE (mem) == PLUS)
+      {
+        rtx a = XEXP (mem, 0);
+        rtx b = XEXP (mem, 1);
+
+        if (GET_CODE (a) == LABEL_REF
+	    && CONST_INT_P (b))
+          return \"ldr\\t%0, %1\";
+
+        if (REG_P (b))
+          return \"ldrsh\\t%0, %1\";
+	  
+        ops[1] = a;
+        ops[2] = b;
+      }
+    else
+      {
+        ops[1] = mem;
+        ops[2] = const0_rtx;
+      }
+      
+    gcc_assert (REG_P (ops[1]));
+
+    ops[0] = operands[0];
+    if (reg_mentioned_p (operands[2], ops[1]))
+      ops[3] = ops[0];
+    else
+      ops[3] = operands[2];
+    output_asm_insn (\"mov\\t%3, %2\;ldrsh\\t%0, [%1, %3]\", ops);
+    return \"\";
+  }"
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "is_arch6" "yes")
+					(const_int 2) (const_int 4))
+			  (const_int 4)])
+   (set_attr "type" "extend,load_byte")
+   (set_attr "pool_range" "*,1018")]
+)
+
+;; This pattern will only be used when ldsh is not available
+(define_expand "extendhisi2_mem"
+  [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" "")))
+   (set (match_dup 3)
+	(zero_extend:SI (match_dup 7)))
+   (set (match_dup 6) (ashift:SI (match_dup 4) (const_int 24)))
+   (set (match_operand:SI 0 "" "")
+	(ior:SI (ashiftrt:SI (match_dup 6) (const_int 16)) (match_dup 5)))]
+  "TARGET_ARM"
+  "
+  {
+    rtx mem1, mem2;
+    rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
+
+    mem1 = change_address (operands[1], QImode, addr);
+    mem2 = change_address (operands[1], QImode,
+			   plus_constant (Pmode, addr, 1));
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = mem1;
+    operands[2] = gen_reg_rtx (SImode);
+    operands[3] = gen_reg_rtx (SImode);
+    operands[6] = gen_reg_rtx (SImode);
+    operands[7] = mem2;
+
+    if (BYTES_BIG_ENDIAN)
+      {
+	operands[4] = operands[2];
+	operands[5] = operands[3];
+      }
+    else
+      {
+	operands[4] = operands[3];
+	operands[5] = operands[2];
+      }
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+})
+
+(define_insn "*arm_extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch4 && !arm_arch6"
+  "@
+   #
+   ldr%(sh%)\\t%0, %1"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "alu_shift_reg,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+;; ??? Check Thumb-2 pool range
+(define_insn "*arm_extendhisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_32BIT && arm_arch6"
+  "@
+   sxth%?\\t%0, %1
+   ldr%(sh%)\\t%0, %1"
+  [(set_attr "type" "extend,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+(define_insn "*arm_extendhisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (sign_extend:SI (match_operand:HI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "sxtah%?\\t%0, %2, %1"
+  [(set_attr "type" "alu_shift_reg")]
+)
+
+(define_expand "extendqihi2"
+  [(set (match_dup 2)
+	(ashift:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "")
+		   (const_int 24)))
+   (set (match_operand:HI 0 "s_register_operand" "")
+	(ashiftrt:SI (match_dup 2)
+		     (const_int 24)))]
+  "TARGET_ARM"
+  "
+  {
+    if (arm_arch4 && MEM_P (operands[1]))
+      {
+	emit_insn (gen_rtx_SET (VOIDmode,
+				operands[0],
+				gen_rtx_SIGN_EXTEND (HImode, operands[1])));
+	DONE;
+      }
+    if (!s_register_operand (operands[1], QImode))
+      operands[1] = copy_to_mode_reg (QImode, operands[1]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_reg_rtx (SImode);
+  }"
+)
+
+(define_insn "*arm_extendqihi_insn"
+  [(set (match_operand:HI 0 "s_register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "arm_extendqisi_mem_op" "Uq")))]
+  "TARGET_ARM && arm_arch4"
+  "ldr%(sb%)\\t%0, %1"
+  [(set_attr "type" "load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "256")
+   (set_attr "neg_pool_range" "244")]
+)
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "")))]
+  "TARGET_EITHER"
+{
+  if (!arm_arch4 && MEM_P (operands[1]))
+    operands[1] = copy_to_mode_reg (QImode, operands[1]);
+
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24)));
+      emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (24)));
+      DONE;
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+})
+
+(define_insn "*arm_extendqisi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))]
+  "TARGET_ARM && arm_arch4 && !arm_arch6"
+  "@
+   #
+   ldr%(sb%)\\t%0, %1"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "alu_shift_reg,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+(define_insn "*arm_extendqisi_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI
+	 (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))]
+  "TARGET_ARM && arm_arch6"
+  "@
+   sxtb%?\\t%0, %1
+   ldr%(sb%)\\t%0, %1"
+  [(set_attr "type" "extend,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+(define_insn "*arm_extendqisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (sign_extend:SI (match_operand:QI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "sxtab%?\\t%0, %2, %1"
+  [(set_attr "type" "alu_shift_reg")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "")))]
+  "TARGET_THUMB1 && reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (sign_extend:SI (match_dup 3)))]
+{
+  rtx addr = XEXP (operands[1], 0);
+
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1)))
+    /* No split necessary.  */
+    FAIL;
+
+  if (GET_CODE (addr) == PLUS
+      && !REG_P (XEXP (addr, 0)) && !REG_P (XEXP (addr, 1)))
+    FAIL;
+
+  if (reg_overlap_mentioned_p (operands[0], addr))
+    {
+      rtx t = gen_lowpart (QImode, operands[0]);
+      emit_move_insn (t, operands[1]);
+      emit_insn (gen_thumb1_extendqisi2 (operands[0], t));
+      DONE;
+    }
+
+  if (REG_P (addr))
+    {
+      addr = gen_rtx_PLUS (Pmode, addr, operands[0]);
+      operands[2] = const0_rtx;
+    }
+  else if (GET_CODE (addr) != PLUS)
+    FAIL;
+  else if (REG_P (XEXP (addr, 0)))
+    {
+      operands[2] = XEXP (addr, 1);
+      addr = gen_rtx_PLUS (Pmode, XEXP (addr, 0), operands[0]);
+    }
+  else
+    {
+      operands[2] = XEXP (addr, 0);
+      addr = gen_rtx_PLUS (Pmode, XEXP (addr, 1), operands[0]);
+    }
+
+  operands[3] = change_address (operands[1], QImode, addr);
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0) (match_operand 1 "const_int_operand")))
+   (set (match_operand:SI 2 "register_operand" "") (const_int 0))
+   (set (match_operand:SI 3 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 4 "memory_operand" "")))]
+  "TARGET_THUMB1
+   && GET_CODE (XEXP (operands[4], 0)) == PLUS
+   && rtx_equal_p (operands[0], XEXP (XEXP (operands[4], 0), 0))
+   && rtx_equal_p (operands[2], XEXP (XEXP (operands[4], 0), 1))
+   && (peep2_reg_dead_p (3, operands[0])
+       || rtx_equal_p (operands[0], operands[3]))
+   && (peep2_reg_dead_p (3, operands[2])
+       || rtx_equal_p (operands[2], operands[3]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (sign_extend:SI (match_dup 4)))]
+{
+  rtx addr = gen_rtx_PLUS (Pmode, operands[0], operands[2]);
+  operands[4] = change_address (operands[4], QImode, addr);
+})
+
+(define_insn "thumb1_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,l")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,V,m")))]
+  "TARGET_THUMB1"
+{
+  rtx addr;
+
+  if (which_alternative == 0 && arm_arch6)
+    return "sxtb\\t%0, %1";
+  if (which_alternative == 0)
+    return "#";
+
+  addr = XEXP (operands[1], 0);
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1)))
+    return "ldrsb\\t%0, %1";
+      
+  return "#";
+}
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "is_arch6" "yes")
+					(const_int 2) (const_int 4))
+			  (const_int 2)
+			  (if_then_else (eq_attr "is_arch6" "yes")
+					(const_int 4) (const_int 6))])
+   (set_attr "type" "extend,load_byte,load_byte")]
+)
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF                  0 "s_register_operand" "")
+	(float_extend:DF (match_operand:SF 1 "s_register_operand"  "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  ""
+)
+
+/* HFmode -> DFmode conversions have to go through SFmode.  */
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF                  0 "general_operand" "")
+	(float_extend:DF (match_operand:HF 1 "general_operand"  "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1;
+    op1 = convert_to_mode (SFmode, operands[1], 0);
+    op1 = convert_to_mode (DFmode, op1, 0);
+    emit_insn (gen_movdf (operands[0], op1));
+    DONE;
+  }"
+)
+
+;; Move insns (including loads and stores)
+
+;; XXX Just some ideas about movti.
+;; I don't think these are a good idea on the arm, there just aren't enough
+;; registers
+;;(define_expand "loadti"
+;;  [(set (match_operand:TI 0 "s_register_operand" "")
+;;	(mem:TI (match_operand:SI 1 "address_operand" "")))]
+;;  "" "")
+
+;;(define_expand "storeti"
+;;  [(set (mem:TI (match_operand:TI 0 "address_operand" ""))
+;;	(match_operand:TI 1 "s_register_operand" ""))]
+;;  "" "")
+
+;;(define_expand "movti"
+;;  [(set (match_operand:TI 0 "general_operand" "")
+;;	(match_operand:TI 1 "general_operand" ""))]
+;;  ""
+;;  "
+;;{
+;;  rtx insn;
+;;
+;;  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+;;    operands[1] = copy_to_reg (operands[1]);
+;;  if (MEM_P (operands[0]))
+;;    insn = gen_storeti (XEXP (operands[0], 0), operands[1]);
+;;  else if (MEM_P (operands[1]))
+;;    insn = gen_loadti (operands[0], XEXP (operands[1], 0));
+;;  else
+;;    FAIL;
+;;
+;;  emit_insn (insn);
+;;  DONE;
+;;}")
+
+;; Recognize garbage generated above.
+
+;;(define_insn ""
+;;  [(set (match_operand:TI 0 "general_operand" "=r,r,r,<,>,m")
+;;	(match_operand:TI 1 "general_operand" "<,>,m,r,r,r"))]
+;;  ""
+;;  "*
+;;  {
+;;    register mem = (which_alternative < 3);
+;;    register const char *template;
+;;
+;;    operands[mem] = XEXP (operands[mem], 0);
+;;    switch (which_alternative)
+;;      {
+;;      case 0: template = \"ldmdb\\t%1!, %M0\"; break;
+;;      case 1: template = \"ldmia\\t%1!, %M0\"; break;
+;;      case 2: template = \"ldmia\\t%1, %M0\"; break;
+;;      case 3: template = \"stmdb\\t%0!, %M1\"; break;
+;;      case 4: template = \"stmia\\t%0!, %M1\"; break;
+;;      case 5: template = \"stmia\\t%0, %M1\"; break;
+;;      }
+;;    output_asm_insn (template, operands);
+;;    return \"\";
+;;  }")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (can_create_pseudo_p ())
+    {
+      if (!REG_P (operands[0]))
+	operands[1] = force_reg (DImode, operands[1]);
+    }
+  "
+)
+
+(define_insn "*arm_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
+	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,q"))]
+  "TARGET_32BIT
+   && !(TARGET_HARD_FLOAT && TARGET_VFP)
+   && !TARGET_IWMMXT
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    default:
+      return output_move_double (operands, true, NULL);
+    }
+  "
+  [(set_attr "length" "8,12,16,8,8")
+   (set_attr "type" "multiple,multiple,multiple,load2,store2")
+   (set_attr "arm_pool_range" "*,*,*,1020,*")
+   (set_attr "arm_neg_pool_range" "*,*,*,1004,*")
+   (set_attr "thumb2_pool_range" "*,*,*,4094,*")
+   (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")]
+)
+
+(define_split
+  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
+	(match_operand:ANY64 1 "const_double_operand" ""))]
+  "TARGET_32BIT
+   && reload_completed
+   && (arm_const_double_inline_cost (operands[1])
+       <= arm_max_const_double_inline_cost ())"
+  [(const_int 0)]
+  "
+  arm_split_constant (SET, SImode, curr_insn,
+		      INTVAL (gen_lowpart (SImode, operands[1])),
+		      gen_lowpart (SImode, operands[0]), NULL_RTX, 0);
+  arm_split_constant (SET, SImode, curr_insn,
+		      INTVAL (gen_highpart_mode (SImode,
+						 GET_MODE (operands[0]),
+						 operands[1])),
+		      gen_highpart (SImode, operands[0]), NULL_RTX, 0);
+  DONE;
+  "
+)
+
+; If optimizing for size, or if we have load delay slots, then 
+; we want to split the constant into two separate operations. 
+; In both cases this may split a trivial part into a single data op
+; leaving a single complex constant to load.  We can also get longer
+; offsets in a LDR which means we get better chances of sharing the pool
+; entries.  Finally, we can normally do a better job of scheduling
+; LDR instructions than we can with LDM.
+; This pattern will only match if the one above did not.
+(define_split
+  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
+	(match_operand:ANY64 1 "const_double_operand" ""))]
+  "TARGET_ARM && reload_completed
+   && arm_const_double_by_parts (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_highpart_mode (SImode, GET_MODE (operands[0]),
+				   operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  "
+)
+
+(define_split
+  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
+	(match_operand:ANY64 1 "arm_general_register_operand" ""))]
+  "TARGET_EITHER && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+
+  /* Handle a partial overlap.  */
+  if (rtx_equal_p (operands[0], operands[3]))
+    {
+      rtx tmp0 = operands[0];
+      rtx tmp1 = operands[1];
+
+      operands[0] = operands[2];
+      operands[1] = operands[3];
+      operands[2] = tmp0;
+      operands[3] = tmp1;
+    }
+  "
+)
+
+;; We can't actually do base+index doubleword loads if the index and
+;; destination overlap.  Split here so that we at least have chance to
+;; schedule.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(mem:DI (plus:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand:SI 2 "s_register_operand" ""))))]
+  "TARGET_LDRD
+  && reg_overlap_mentioned_p (operands[0], operands[1])
+  && reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 4)
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+	(mem:DI (match_dup 4)))]
+  "
+  operands[4] = gen_rtx_REG (SImode, REGNO(operands[0]));
+  "
+)
+
+;;; ??? This should have alternatives for constants.
+;;; ??? This was originally identical to the movdf_insn pattern.
+;;; ??? The 'i' constraint looks funny, but it should always be replaced by
+;;; thumb_reorg with a memory reference.
+(define_insn "*thumb1_movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=l,l,l,l,>,l, m,*r")
+	(match_operand:DI 1 "general_operand"      "l, I,J,>,l,mi,l,*r"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+  {
+  switch (which_alternative)
+    {
+    default:
+    case 0:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"add\\t%0,  %1,  #0\;add\\t%H0, %H1, #0\";
+      return   \"add\\t%H0, %H1, #0\;add\\t%0,  %1,  #0\";
+    case 1:
+      return \"mov\\t%Q0, %1\;mov\\t%R0, #0\";
+    case 2:
+      operands[1] = GEN_INT (- INTVAL (operands[1]));
+      return \"mov\\t%Q0, %1\;neg\\t%Q0, %Q0\;asr\\t%R0, %Q0, #31\";
+    case 3:
+      return \"ldmia\\t%1, {%0, %H0}\";
+    case 4:
+      return \"stmia\\t%0, {%1, %H1}\";
+    case 5:
+      return thumb_load_double_from_address (operands);
+    case 6:
+      operands[2] = gen_rtx_MEM (SImode,
+			     plus_constant (Pmode, XEXP (operands[0], 0), 4));
+      output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
+      return \"\";
+    case 7:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"mov\\t%0, %1\;mov\\t%H0, %H1\";
+      return \"mov\\t%H0, %H1\;mov\\t%0, %1\";
+    }
+  }"
+  [(set_attr "length" "4,4,6,2,2,6,4,4")
+   (set_attr "type" "multiple,multiple,multiple,load2,store2,load2,store2,multiple")
+   (set_attr "pool_range" "*,*,*,*,*,1018,*,*")]
+)
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  {
+  rtx base, offset, tmp;
+
+  if (TARGET_32BIT)
+    {
+      /* Everything except mem = const or mem = mem can be done easily.  */
+      if (MEM_P (operands[0]))
+        operands[1] = force_reg (SImode, operands[1]);
+      if (arm_general_register_operand (operands[0], SImode)
+	  && CONST_INT_P (operands[1])
+          && !(const_ok_for_arm (INTVAL (operands[1]))
+               || const_ok_for_arm (~INTVAL (operands[1]))))
+        {
+           arm_split_constant (SET, SImode, NULL_RTX,
+	                       INTVAL (operands[1]), operands[0], NULL_RTX,
+			       optimize && can_create_pseudo_p ());
+          DONE;
+        }
+    }
+  else /* TARGET_THUMB1...  */
+    {
+      if (can_create_pseudo_p ())
+        {
+          if (!REG_P (operands[0]))
+	    operands[1] = force_reg (SImode, operands[1]);
+        }
+    }
+
+  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (operands[1], &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	{
+	  tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+	  emit_move_insn (tmp, base);
+	  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+	  DONE;
+	}
+    }
+
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (arm_tls_referenced_p (operands[1]))
+    {
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+        {
+          addend = XEXP (XEXP (tmp, 0), 1);
+          tmp = XEXP (XEXP (tmp, 0), 0);
+        }
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0);
+
+      tmp = legitimize_tls_address (tmp,
+				    !can_create_pseudo_p () ? operands[0] : 0);
+      if (addend)
+        {
+          tmp = gen_rtx_PLUS (SImode, tmp, addend);
+          tmp = force_operand (tmp, operands[0]);
+        }
+      operands[1] = tmp;
+    }
+  else if (flag_pic
+	   && (CONSTANT_P (operands[1])
+	       || symbol_mentioned_p (operands[1])
+	       || label_mentioned_p (operands[1])))
+      operands[1] = legitimize_pic_address (operands[1], SImode,
+					    (!can_create_pseudo_p ()
+					     ? operands[0]
+					     : 0));
+  }
+  "
+)
+
+;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and
+;; LO_SUM adds in the high bits.  Fortunately these are opaque operations
+;; so this does not matter.
+(define_insn "*arm_movt"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		   (match_operand:SI 2 "general_operand"      "i")))]
+  "arm_arch_thumb2"
+  "movt%?\t%0, #:upper16:%c2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "length" "4")
+   (set_attr "type" "mov_imm")]
+)
+
+(define_insn "*arm_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
+	(match_operand:SI 1 "general_operand"      "rk, I,K,j,mi,rk"))]
+  "TARGET_ARM && ! TARGET_IWMMXT
+   && !(TARGET_HARD_FLOAT && TARGET_VFP)
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   movw%?\\t%0, %1
+   ldr%?\\t%0, %1
+   str%?\\t%1, %0"
+  [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load1,store1")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,*,4096,*")
+   (set_attr "neg_pool_range" "*,*,*,*,4084,*")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_32BIT
+  && (!(const_ok_for_arm (INTVAL (operands[1]))
+        || const_ok_for_arm (~INTVAL (operands[1]))))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant (SET, SImode, NULL_RTX, 
+                      INTVAL (operands[1]), operands[0], NULL_RTX, 0);
+  DONE;
+  "
+)
+
+;; A normal way to do (symbol + offset) requires three instructions at least
+;; (depends on how big the offset is) as below:
+;; movw r0, #:lower16:g
+;; movw r0, #:upper16:g
+;; adds r0, #4
+;;
+;; A better way would be:
+;; movw r0, #:lower16:g+4
+;; movw r0, #:upper16:g+4
+;;
+;; The limitation of this way is that the length of offset should be a 16-bit
+;; signed value, because current assembler only supports REL type relocation for
+;; such case.  If the more powerful RELA type is supported in future, we should
+;; update this pattern to go with better way.
+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))))]
+  "TARGET_THUMB2
+   && arm_disable_literal_pool
+   && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  [(clobber (const_int 0))]
+  "
+    int offset = INTVAL (operands[2]);
+
+    if (offset < -0x8000 || offset > 0x7fff)
+      {
+	arm_emit_movpair (operands[0], operands[1]);
+	emit_insn (gen_rtx_SET (SImode, operands[0],
+				gen_rtx_PLUS (SImode, operands[0], operands[2])));
+      }
+    else
+      {
+	rtx op = gen_rtx_CONST (SImode,
+				gen_rtx_PLUS (SImode, operands[1], operands[2]));
+	arm_emit_movpair (operands[0], op);
+      }
+  "
+)
+
+;; Split symbol_refs at the later stage (after cprop), instead of generating
+;; movt/movw pair directly at expand.  Otherwise corresponding high_sum
+;; and lo_sum would be merged back into memory load at cprop.  However,
+;; if the default is to prefer movt/movw rather than a load from the constant
+;; pool, the performance is better.
+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+       (match_operand:SI 1 "general_operand" ""))]
+  "TARGET_32BIT
+   && TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF
+   && !flag_pic && !target_word_relocations
+   && !arm_tls_referenced_p (operands[1])"
+  [(clobber (const_int 0))]
+{
+  arm_emit_movpair (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "*thumb1_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k")
+	(match_operand:SI 1 "general_operand"      "l, I,J,K,>,l,mi,l,*l*h*k"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], SImode) 
+       || register_operand (operands[1], SImode))"
+  "@
+   mov	%0, %1
+   mov	%0, %1
+   #
+   #
+   ldmia\\t%1, {%0}
+   stmia\\t%0, {%1}
+   ldr\\t%0, %1
+   str\\t%1, %0
+   mov\\t%0, %1"
+  [(set_attr "length" "2,2,4,4,2,2,2,2,2")
+   (set_attr "type" "mov_reg,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg")
+   (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*")
+   (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")])
+
+(define_split 
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_THUMB1 && satisfies_constraint_J (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (neg:SI (match_dup 2)))]
+  "
+  {
+    operands[1] = GEN_INT (- INTVAL (operands[1]));
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+  }"
+)
+
+(define_split 
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_THUMB1 && satisfies_constraint_K (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))]
+  "
+  {
+    unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu;
+    unsigned HOST_WIDE_INT mask = 0xff;
+    int i;
+    
+    for (i = 0; i < 25; i++)
+      if ((val & (mask << i)) == val)
+        break;
+
+    /* Don't split if the shift is zero.  */
+    if (i == 0)
+      FAIL;
+
+    operands[1] = GEN_INT (val >> i);
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+    operands[3] = GEN_INT (i);
+  }"
+)
+
+;; For thumb1 split imm move [256-510] into mov [1-255] and add #255
+(define_split 
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_THUMB1 && satisfies_constraint_Pe (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 3)))]
+  "
+  {
+    operands[1] = GEN_INT (INTVAL (operands[1]) - 255);
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+    operands[3] = GEN_INT (255);
+  }"
+)
+
+;; When generating pic, we need to load the symbol offset into a register.
+;; So that the optimizer does not confuse this with a normal symbol load
+;; we use an unspec.  The offset will be loaded from a constant pool entry,
+;; since that is the only type of relocation we can use.
+
+;; Wrap calculation of the whole PIC address in a single pattern for the
+;; benefit of optimizers, particularly, PRE and HOIST.  Calculation of
+;; a PIC address involves two loads from memory, so we want to CSE it
+;; as often as possible.
+;; This pattern will be split into one of the pic_load_addr_* patterns
+;; and a move after GCSE optimizations.
+;;
+;; Note: Update arm.c: legitimize_pic_address() when changing this pattern.
+(define_expand "calculate_pic_address"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (unspec:SI [(match_operand:SI 2 "" "")]
+				    UNSPEC_PIC_SYM))))]
+  "flag_pic"
+)
+
+;; Split calculate_pic_address into pic_load_addr_* and a move.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (unspec:SI [(match_operand:SI 2 "" "")]
+				    UNSPEC_PIC_SYM))))]
+  "flag_pic"
+  [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM))
+   (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))]
+  "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];"
+)
+
+;; operand1 is the memory address to go into 
+;; pic_load_addr_32bit.
+;; operand2 is the PIC label to be emitted 
+;; from pic_add_dot_plus_eight.
+;; We do this to allow hoisting of the entire insn.
+(define_insn_and_split "pic_load_addr_unified"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,l")
+	(unspec:SI [(match_operand:SI 1 "" "mX,mX,mX") 
+		    (match_operand:SI 2 "" "")] 
+		    UNSPEC_PIC_UNIFIED))]
+ "flag_pic"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_PIC_SYM))
+  (set (match_dup 0) (unspec:SI [(match_dup 0) (match_dup 3)
+       		     		 (match_dup 2)] UNSPEC_PIC_BASE))]
+ "operands[3] = TARGET_THUMB ? GEN_INT (4) : GEN_INT (8);"
+ [(set_attr "type" "load1,load1,load1")
+  (set_attr "pool_range" "4096,4094,1022")
+  (set_attr "neg_pool_range" "4084,0,0")
+  (set_attr "arch"  "a,t2,t1")    
+  (set_attr "length" "8,6,4")]
+)
+
+;; The rather odd constraints on the following are to force reload to leave
+;; the insn alone, and to force the minipool generation pass to then move
+;; the GOT symbol to memory.
+
+(define_insn "pic_load_addr_32bit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
+  "TARGET_32BIT && flag_pic"
+  "ldr%?\\t%0, %1"
+  [(set_attr "type" "load1")
+   (set (attr "pool_range")
+	(if_then_else (eq_attr "is_thumb" "no")
+		      (const_int 4096)
+		      (const_int 4094)))
+   (set (attr "neg_pool_range")
+	(if_then_else (eq_attr "is_thumb" "no")
+		      (const_int 4084)
+		      (const_int 0)))]
+)
+
+(define_insn "pic_load_addr_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
+  "TARGET_THUMB1 && flag_pic"
+  "ldr\\t%0, %1"
+  [(set_attr "type" "load1")
+   (set (attr "pool_range") (const_int 1018))]
+)
+
+(define_insn "pic_add_dot_plus_four"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (const_int 4)
+		    (match_operand 2 "" "")]
+		   UNSPEC_PIC_BASE))]
+  "TARGET_THUMB"
+  "*
+  (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				     INTVAL (operands[2]));
+  return \"add\\t%0, %|pc\";
+  "
+  [(set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "pic_add_dot_plus_eight"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (const_int 8)
+		    (match_operand 2 "" "")]
+		   UNSPEC_PIC_BASE))]
+  "TARGET_ARM"
+  "*
+    (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				       INTVAL (operands[2]));
+    return \"add%?\\t%0, %|pc, %1\";
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "tls_load_dot_plus_eight"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+			    (const_int 8)
+			    (match_operand 2 "" "")]
+			   UNSPEC_PIC_BASE)))]
+  "TARGET_ARM"
+  "*
+    (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				       INTVAL (operands[2]));
+    return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\";
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "load1")]
+)
+
+;; PIC references to local variables can generate pic_add_dot_plus_eight
+;; followed by a load.  These sequences can be crunched down to
+;; tls_load_dot_plus_eight by a peephole.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 3 "register_operand" "")
+		    (const_int 8)
+		    (match_operand 1 "" "")]
+		   UNSPEC_PIC_BASE))
+   (set (match_operand:SI 2 "arm_general_register_operand" "")
+	(mem:SI (match_dup 0)))]
+  "TARGET_ARM && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(mem:SI (unspec:SI [(match_dup 3)
+			    (const_int 8)
+			    (match_dup 1)]
+			   UNSPEC_PIC_BASE)))]
+  ""
+)
+
+(define_insn "pic_offset_arm"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (unspec:SI [(match_operand:SI 2 "" "X")]
+				    UNSPEC_PIC_OFFSET))))]
+  "TARGET_VXWORKS_RTP && TARGET_ARM && flag_pic"
+  "ldr%?\\t%0, [%1,%2]"
+  [(set_attr "type" "load1")]
+)
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+  "
+{
+  /* r3 is clobbered by set/longjmp, so we can use it as a scratch
+     register.  */
+  if (arm_pic_register != INVALID_REGNUM)
+    arm_load_pic_register (1UL << 3);
+  DONE;
+}")
+
+;; If copying one reg to another we can set the condition codes according to
+;; its value.  Such a move is common after a return from subroutine and the
+;; result is being tested against zero.
+
+(define_insn "*movsi_compare0"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "s_register_operand" "0,r")
+		    (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_dup 1))]
+  "TARGET_32BIT"
+  "@
+   cmp%?\\t%0, #0
+   sub%.\\t%0, %1, #0"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm,alus_imm")]
+)
+
+;; Subroutine to store a half word from a register into memory.
+;; Operand 0 is the source register (HImode)
+;; Operand 1 is the destination address in a register (SImode)
+
+;; In both this routine and the next, we must be careful not to spill
+;; a memory address of reg+large_const into a separate PLUS insn, since this
+;; can generate unrecognizable rtl.
+
+(define_expand "storehi"
+  [;; store the low byte
+   (set (match_operand 1 "" "") (match_dup 3))
+   ;; extract the high byte
+   (set (match_dup 2)
+	(ashiftrt:SI (match_operand 0 "" "") (const_int 8)))
+   ;; store the high byte
+   (set (match_dup 4) (match_dup 5))]
+  "TARGET_ARM"
+  "
+  {
+    rtx op1 = operands[1];
+    rtx addr = XEXP (op1, 0);
+    enum rtx_code code = GET_CODE (addr);
+
+    if ((code == PLUS && !CONST_INT_P (XEXP (addr, 1)))
+	|| code == MINUS)
+      op1 = replace_equiv_address (operands[1], force_reg (SImode, addr));
+
+    operands[4] = adjust_address (op1, QImode, 1);
+    operands[1] = adjust_address (operands[1], QImode, 0);
+    operands[3] = gen_lowpart (QImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_reg_rtx (SImode);
+    operands[5] = gen_lowpart (QImode, operands[2]);
+  }"
+)
+
+(define_expand "storehi_bigend"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 2)
+	(ashiftrt:SI (match_operand 0 "" "") (const_int 8)))
+   (set (match_operand 1 "" "")	(match_dup 5))]
+  "TARGET_ARM"
+  "
+  {
+    rtx op1 = operands[1];
+    rtx addr = XEXP (op1, 0);
+    enum rtx_code code = GET_CODE (addr);
+
+    if ((code == PLUS && !CONST_INT_P (XEXP (addr, 1)))
+	|| code == MINUS)
+      op1 = replace_equiv_address (op1, force_reg (SImode, addr));
+
+    operands[4] = adjust_address (op1, QImode, 1);
+    operands[1] = adjust_address (operands[1], QImode, 0);
+    operands[3] = gen_lowpart (QImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_reg_rtx (SImode);
+    operands[5] = gen_lowpart (QImode, operands[2]);
+  }"
+)
+
+;; Subroutine to store a half word integer constant into memory.
+(define_expand "storeinthi"
+  [(set (match_operand 0 "" "")
+	(match_operand 1 "" ""))
+   (set (match_dup 3) (match_dup 2))]
+  "TARGET_ARM"
+  "
+  {
+    HOST_WIDE_INT value = INTVAL (operands[1]);
+    rtx addr = XEXP (operands[0], 0);
+    rtx op0 = operands[0];
+    enum rtx_code code = GET_CODE (addr);
+
+    if ((code == PLUS && !CONST_INT_P (XEXP (addr, 1)))
+	|| code == MINUS)
+      op0 = replace_equiv_address (op0, force_reg (SImode, addr));
+
+    operands[1] = gen_reg_rtx (SImode);
+    if (BYTES_BIG_ENDIAN)
+      {
+	emit_insn (gen_movsi (operands[1], GEN_INT ((value >> 8) & 255)));
+	if ((value & 255) == ((value >> 8) & 255))
+	  operands[2] = operands[1];
+	else
+	  {
+	    operands[2] = gen_reg_rtx (SImode);
+	    emit_insn (gen_movsi (operands[2], GEN_INT (value & 255)));
+	  }
+      }
+    else
+      {
+	emit_insn (gen_movsi (operands[1], GEN_INT (value & 255)));
+	if ((value & 255) == ((value >> 8) & 255))
+	  operands[2] = operands[1];
+	else
+	  {
+	    operands[2] = gen_reg_rtx (SImode);
+	    emit_insn (gen_movsi (operands[2], GEN_INT ((value >> 8) & 255)));
+	  }
+      }
+
+    operands[3] = adjust_address (op0, QImode, 1);
+    operands[0] = adjust_address (operands[0], QImode, 0);
+    operands[2] = gen_lowpart (QImode, operands[2]);
+    operands[1] = gen_lowpart (QImode, operands[1]);
+  }"
+)
+
+(define_expand "storehi_single_op"
+  [(set (match_operand:HI 0 "memory_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  "TARGET_32BIT && arm_arch4"
+  "
+  if (!s_register_operand (operands[1], HImode))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+  "
+)
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_ARM)
+    {
+      if (can_create_pseudo_p ())
+        {
+          if (MEM_P (operands[0]))
+	    {
+	      if (arm_arch4)
+	        {
+	          emit_insn (gen_storehi_single_op (operands[0], operands[1]));
+	          DONE;
+	        }
+	      if (CONST_INT_P (operands[1]))
+	        emit_insn (gen_storeinthi (operands[0], operands[1]));
+	      else
+	        {
+	          if (MEM_P (operands[1]))
+		    operands[1] = force_reg (HImode, operands[1]);
+	          if (BYTES_BIG_ENDIAN)
+		    emit_insn (gen_storehi_bigend (operands[1], operands[0]));
+	          else
+		   emit_insn (gen_storehi (operands[1], operands[0]));
+	        }
+	      DONE;
+	    }
+          /* Sign extend a constant, and keep it in an SImode reg.  */
+          else if (CONST_INT_P (operands[1]))
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+	      HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff;
+
+	      /* If the constant is already valid, leave it alone.  */
+	      if (!const_ok_for_arm (val))
+	        {
+	          /* If setting all the top bits will make the constant 
+		     loadable in a single instruction, then set them.  
+		     Otherwise, sign extend the number.  */
+
+	          if (const_ok_for_arm (~(val | ~0xffff)))
+		    val |= ~0xffff;
+	          else if (val & 0x8000)
+		    val |= ~0xffff;
+	        }
+
+	      emit_insn (gen_movsi (reg, GEN_INT (val)));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+	  else if (arm_arch4 && optimize && can_create_pseudo_p ()
+		   && MEM_P (operands[1]))
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_zero_extendhisi2 (reg, operands[1]));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+          else if (!arm_arch4)
+	    {
+	      if (MEM_P (operands[1]))
+	        {
+		  rtx base;
+		  rtx offset = const0_rtx;
+		  rtx reg = gen_reg_rtx (SImode);
+
+		  if ((REG_P (base = XEXP (operands[1], 0))
+		       || (GET_CODE (base) == PLUS
+			   && (CONST_INT_P (offset = XEXP (base, 1)))
+                           && ((INTVAL(offset) & 1) != 1)
+			   && REG_P (base = XEXP (base, 0))))
+		      && REGNO_POINTER_ALIGN (REGNO (base)) >= 32)
+		    {
+		      rtx new_rtx;
+
+		      new_rtx = widen_memory_access (operands[1], SImode,
+						     ((INTVAL (offset) & ~3)
+						      - INTVAL (offset)));
+		      emit_insn (gen_movsi (reg, new_rtx));
+		      if (((INTVAL (offset) & 2) != 0)
+			  ^ (BYTES_BIG_ENDIAN ? 1 : 0))
+			{
+			  rtx reg2 = gen_reg_rtx (SImode);
+
+			  emit_insn (gen_lshrsi3 (reg2, reg, GEN_INT (16)));
+			  reg = reg2;
+			}
+		    }
+		  else
+		    emit_insn (gen_movhi_bytes (reg, operands[1]));
+
+		  operands[1] = gen_lowpart (HImode, reg);
+	       }
+	   }
+        }
+      /* Handle loading a large integer during reload.  */
+      else if (CONST_INT_P (operands[1])
+	       && !const_ok_for_arm (INTVAL (operands[1]))
+	       && !const_ok_for_arm (~INTVAL (operands[1])))
+        {
+          /* Writing a constant to memory needs a scratch, which should
+	     be handled with SECONDARY_RELOADs.  */
+          gcc_assert (REG_P (operands[0]));
+
+          operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+          emit_insn (gen_movsi (operands[0], operands[1]));
+          DONE;
+       }
+    }
+  else if (TARGET_THUMB2)
+    {
+      /* Thumb-2 can do everything except mem=mem and mem=const easily.  */
+      if (can_create_pseudo_p ())
+	{
+	  if (!REG_P (operands[0]))
+	    operands[1] = force_reg (HImode, operands[1]);
+          /* Zero extend a constant, and keep it in an SImode reg.  */
+          else if (CONST_INT_P (operands[1]))
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+	      HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff;
+
+	      emit_insn (gen_movsi (reg, GEN_INT (val)));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+	}
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+	  if (CONST_INT_P (operands[1]))
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_movsi (reg, operands[1]));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+
+          /* ??? We shouldn't really get invalid addresses here, but this can
+	     happen if we are passed a SP (never OK for HImode/QImode) or 
+	     virtual register (also rejected as illegitimate for HImode/QImode)
+	     relative address.  */
+          /* ??? This should perhaps be fixed elsewhere, for instance, in
+	     fixup_stack_1, by checking for other kinds of invalid addresses,
+	     e.g. a bare reference to a virtual register.  This may confuse the
+	     alpha though, which must handle this case differently.  */
+          if (MEM_P (operands[0])
+	      && !memory_address_p (GET_MODE (operands[0]),
+				    XEXP (operands[0], 0)))
+	    operands[0]
+	      = replace_equiv_address (operands[0],
+				       copy_to_reg (XEXP (operands[0], 0)));
+   
+          if (MEM_P (operands[1])
+	      && !memory_address_p (GET_MODE (operands[1]),
+				    XEXP (operands[1], 0)))
+	    operands[1]
+	      = replace_equiv_address (operands[1],
+				       copy_to_reg (XEXP (operands[1], 0)));
+
+	  if (MEM_P (operands[1]) && optimize > 0)
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_zero_extendhisi2 (reg, operands[1]));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+
+          if (MEM_P (operands[0]))
+	    operands[1] = force_reg (HImode, operands[1]);
+        }
+      else if (CONST_INT_P (operands[1])
+	        && !satisfies_constraint_I (operands[1]))
+        {
+	  /* Handle loading a large integer during reload.  */
+
+          /* Writing a constant to memory needs a scratch, which should
+	     be handled with SECONDARY_RELOADs.  */
+          gcc_assert (REG_P (operands[0]));
+
+          operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+          emit_insn (gen_movsi (operands[0], operands[1]));
+          DONE;
+        }
+    }
+  "
+)
+
+(define_insn "*thumb1_movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
+	(match_operand:HI 1 "general_operand"       "l,m,l,*h,*r,I"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: return \"add	%0, %1, #0\";
+    case 2: return \"strh	%1, %0\";
+    case 3: return \"mov	%0, %1\";
+    case 4: return \"mov	%0, %1\";
+    case 5: return \"mov	%0, %1\";
+    default: gcc_unreachable ();
+    case 1:
+      /* The stack pointer can end up being taken as an index register.
+          Catch this case here and deal with it.  */
+      if (GET_CODE (XEXP (operands[1], 0)) == PLUS
+	  && REG_P (XEXP (XEXP (operands[1], 0), 0))
+	  && REGNO    (XEXP (XEXP (operands[1], 0), 0)) == SP_REGNUM)
+        {
+	  rtx ops[2];
+          ops[0] = operands[0];
+          ops[1] = XEXP (XEXP (operands[1], 0), 0);
+      
+          output_asm_insn (\"mov	%0, %1\", ops);
+
+          XEXP (XEXP (operands[1], 0), 0) = operands[0];
+    
+	}
+      return \"ldrh	%0, %1\";
+    }"
+  [(set_attr "length" "2,4,2,2,2,2")
+   (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
+
+
+(define_expand "movhi_bytes"
+  [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" "")))
+   (set (match_dup 3)
+	(zero_extend:SI (match_dup 6)))
+   (set (match_operand:SI 0 "" "")
+	 (ior:SI (ashift:SI (match_dup 4) (const_int 8)) (match_dup 5)))]
+  "TARGET_ARM"
+  "
+  {
+    rtx mem1, mem2;
+    rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
+
+    mem1 = change_address (operands[1], QImode, addr);
+    mem2 = change_address (operands[1], QImode,
+			   plus_constant (Pmode, addr, 1));
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = mem1;
+    operands[2] = gen_reg_rtx (SImode);
+    operands[3] = gen_reg_rtx (SImode);
+    operands[6] = mem2;
+
+    if (BYTES_BIG_ENDIAN)
+      {
+	operands[4] = operands[2];
+	operands[5] = operands[3];
+      }
+    else
+      {
+	operands[4] = operands[3];
+	operands[5] = operands[2];
+      }
+  }"
+)
+
+(define_expand "movhi_bigend"
+  [(set (match_dup 2)
+	(rotate:SI (subreg:SI (match_operand:HI 1 "memory_operand" "") 0)
+		   (const_int 16)))
+   (set (match_dup 3)
+	(ashiftrt:SI (match_dup 2) (const_int 16)))
+   (set (match_operand:HI 0 "s_register_operand" "")
+	(match_dup 4))]
+  "TARGET_ARM"
+  "
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_lowpart (HImode, operands[3]);
+  "
+)
+
+;; Pattern to recognize insn generated default case above
+(define_insn "*movhi_insn_arch4"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
+	(match_operand:HI 1 "general_operand"      "rI,K,r,mi"))]
+  "TARGET_ARM
+   && arm_arch4
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  "@
+   mov%?\\t%0, %1\\t%@ movhi
+   mvn%?\\t%0, #%B1\\t%@ movhi
+   str%(h%)\\t%1, %0\\t%@ movhi
+   ldr%(h%)\\t%0, %1\\t%@ movhi"
+  [(set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,256")
+   (set_attr "neg_pool_range" "*,*,*,244")
+   (set_attr_alternative "type"
+                         [(if_then_else (match_operand 1 "const_int_operand" "")
+                                        (const_string "mov_imm" )
+                                        (const_string "mov_reg"))
+                          (const_string "mvn_imm")
+                          (const_string "store1")
+                          (const_string "load1")])]
+)
+
+(define_insn "*movhi_bytes"
+  [(set (match_operand:HI 0 "s_register_operand" "=r,r,r")
+	(match_operand:HI 1 "arm_rhs_operand"  "I,r,K"))]
+  "TARGET_ARM"
+  "@
+   mov%?\\t%0, %1\\t%@ movhi
+   mov%?\\t%0, %1\\t%@ movhi
+   mvn%?\\t%0, #%B1\\t%@ movhi"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "mov_imm,mov_reg,mvn_imm")]
+)
+
+(define_expand "thumb_movhi_clobber"
+  [(set (match_operand:HI     0 "memory_operand"   "")
+	(match_operand:HI     1 "register_operand" ""))
+   (clobber (match_operand:DI 2 "register_operand" ""))]
+  "TARGET_THUMB1"
+  "
+  if (strict_memory_address_p (HImode, XEXP (operands[0], 0))
+      && REGNO (operands[1]) <= LAST_LO_REGNUM)
+    {
+      emit_insn (gen_movhi (operands[0], operands[1]));
+      DONE;
+    }
+  /* XXX Fixme, need to handle other cases here as well.  */
+  gcc_unreachable ();
+  "
+)
+	
+;; We use a DImode scratch because we may occasionally need an additional
+;; temporary if the address isn't offsettable -- push_reload doesn't seem
+;; to take any notice of the "o" constraints on reload_memory_operand operand.
+(define_expand "reload_outhi"
+  [(parallel [(match_operand:HI 0 "arm_reload_memory_operand" "=o")
+	      (match_operand:HI 1 "s_register_operand"        "r")
+	      (match_operand:DI 2 "s_register_operand"        "=&l")])]
+  "TARGET_EITHER"
+  "if (TARGET_ARM)
+     arm_reload_out_hi (operands);
+   else
+     thumb_reload_out_hi (operands);
+  DONE;
+  "
+)
+
+(define_expand "reload_inhi"
+  [(parallel [(match_operand:HI 0 "s_register_operand" "=r")
+	      (match_operand:HI 1 "arm_reload_memory_operand" "o")
+	      (match_operand:DI 2 "s_register_operand" "=&r")])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_ARM)
+    arm_reload_in_hi (operands);
+  else
+    thumb_reload_out_hi (operands);
+  DONE;
+")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  /* Everything except mem = const or mem = mem can be done easily */
+
+  if (can_create_pseudo_p ())
+    {
+      if (CONST_INT_P (operands[1]))
+	{
+	  rtx reg = gen_reg_rtx (SImode);
+
+	  /* For thumb we want an unsigned immediate, then we are more likely 
+	     to be able to use a movs insn.  */
+	  if (TARGET_THUMB)
+	    operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+
+	  emit_insn (gen_movsi (reg, operands[1]));
+	  operands[1] = gen_lowpart (QImode, reg);
+	}
+
+      if (TARGET_THUMB)
+	{
+          /* ??? We shouldn't really get invalid addresses here, but this can
+	     happen if we are passed a SP (never OK for HImode/QImode) or
+	     virtual register (also rejected as illegitimate for HImode/QImode)
+	     relative address.  */
+          /* ??? This should perhaps be fixed elsewhere, for instance, in
+	     fixup_stack_1, by checking for other kinds of invalid addresses,
+	     e.g. a bare reference to a virtual register.  This may confuse the
+	     alpha though, which must handle this case differently.  */
+          if (MEM_P (operands[0])
+	      && !memory_address_p (GET_MODE (operands[0]),
+		  		     XEXP (operands[0], 0)))
+	    operands[0]
+	      = replace_equiv_address (operands[0],
+				       copy_to_reg (XEXP (operands[0], 0)));
+          if (MEM_P (operands[1])
+	      && !memory_address_p (GET_MODE (operands[1]),
+				    XEXP (operands[1], 0)))
+	     operands[1]
+	       = replace_equiv_address (operands[1],
+					copy_to_reg (XEXP (operands[1], 0)));
+	}
+
+      if (MEM_P (operands[1]) && optimize > 0)
+	{
+	  rtx reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_zero_extendqisi2 (reg, operands[1]));
+	  operands[1] = gen_lowpart (QImode, reg);
+	}
+
+      if (MEM_P (operands[0]))
+	operands[1] = force_reg (QImode, operands[1]);
+    }
+  else if (TARGET_THUMB
+	   && CONST_INT_P (operands[1])
+	   && !satisfies_constraint_I (operands[1]))
+    {
+      /* Handle loading a large integer during reload.  */
+
+      /* Writing a constant to memory needs a scratch, which should
+	 be handled with SECONDARY_RELOADs.  */
+      gcc_assert (REG_P (operands[0]));
+
+      operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+      emit_insn (gen_movsi (operands[0], operands[1]));
+      DONE;
+    }
+  "
+)
+
+(define_insn "*arm_movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m")
+	(match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))]
+  "TARGET_32BIT
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   ldr%(b%)\\t%0, %1
+   str%(b%)\\t%1, %0
+   ldr%(b%)\\t%0, %1
+   str%(b%)\\t%1, %0"
+  [(set_attr "type" "mov_reg,mov_reg,mov_imm,mov_imm,mvn_imm,load1,store1,load1,store1")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no")
+   (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any")
+   (set_attr "length" "2,4,4,2,4,2,2,4,4")]
+)
+
+(define_insn "*thumb1_movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
+	(match_operand:QI 1 "general_operand"      "l, m,l,*h,*r,I"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   add\\t%0, %1, #0
+   ldrb\\t%0, %1
+   strb\\t%1, %0
+   mov\\t%0, %1
+   mov\\t%0, %1
+   mov\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "alu_imm,load1,store1,mov_reg,mov_imm,mov_imm")
+   (set_attr "pool_range" "*,32,*,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
+
+;; HFmode moves
+(define_expand "movhf"
+  [(set (match_operand:HF 0 "general_operand" "")
+	(match_operand:HF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (MEM_P (operands[0]))
+        operands[1] = force_reg (HFmode, operands[1]);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+           if (!REG_P (operands[0]))
+	     operands[1] = force_reg (HFmode, operands[1]);
+        }
+    }
+  "
+)
+
+(define_insn "*arm32_movhf"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,r,F"))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) && !arm_restrict_it
+   && (	  s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:	/* ARM register from memory */
+      return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\";
+    case 1:	/* memory from ARM register */
+      return \"str%(h%)\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* ARM register from ARM register */
+      return \"mov%?\\t%0, %1\\t%@ __fp16\";
+    case 3:	/* ARM register from constant */
+      {
+	REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+	REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw%?\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load1,store1,mov_reg,multiple")
+   (set_attr "length" "4,4,4,8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb1_movhf"
+  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,m,*r,*h")
+	(match_operand:HF     1 "general_operand"      "l,mF,l,*h,*r"))]
+  "TARGET_THUMB1
+   && (	  s_register_operand (operands[0], HFmode) 
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 1:
+      {
+	rtx addr;
+	gcc_assert (MEM_P (operands[1]));
+	addr = XEXP (operands[1], 0);
+	if (GET_CODE (addr) == LABEL_REF
+	    || (GET_CODE (addr) == CONST
+		&& GET_CODE (XEXP (addr, 0)) == PLUS
+		&& GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF
+		&& CONST_INT_P (XEXP (XEXP (addr, 0), 1))))
+	  {
+	    /* Constant pool entry.  */
+	    return \"ldr\\t%0, %1\";
+	  }
+	return \"ldrh\\t%0, %1\";
+      }
+    case 2: return \"strh\\t%1, %0\";
+    default: return \"mov\\t%0, %1\";
+    }
+  "
+  [(set_attr "length" "2")
+   (set_attr "type" "mov_reg,load1,store1,mov_reg,mov_reg")
+   (set_attr "pool_range" "*,1018,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond")])
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (MEM_P (operands[0]))
+        operands[1] = force_reg (SFmode, operands[1]);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+           if (!REG_P (operands[0]))
+	     operands[1] = force_reg (SFmode, operands[1]);
+        }
+    }
+  "
+)
+
+;; Transform a floating-point move of a constant into a core register into
+;; an SImode operation.
+(define_split
+  [(set (match_operand:SF 0 "arm_general_register_operand" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "TARGET_EITHER
+   && reload_completed
+   && CONST_DOUBLE_P (operands[1])"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[1]);
+  if (operands[2] == 0 || operands[3] == 0)
+    FAIL;
+  "
+)
+
+(define_insn "*arm_movsf_soft_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:SF 1 "general_operand"  "r,mE,r"))]
+  "TARGET_32BIT
+   && TARGET_SOFT_FLOAT
+   && (!MEM_P (operands[0])
+       || register_operand (operands[1], SFmode))"
+  "@
+   mov%?\\t%0, %1
+   ldr%?\\t%0, %1\\t%@ float
+   str%?\\t%1, %0\\t%@ float"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "mov_reg,load1,store1")
+   (set_attr "arm_pool_range" "*,4096,*")
+   (set_attr "thumb2_pool_range" "*,4094,*")
+   (set_attr "arm_neg_pool_range" "*,4084,*")
+   (set_attr "thumb2_neg_pool_range" "*,0,*")]
+)
+
+;;; ??? This should have alternatives for constants.
+(define_insn "*thumb1_movsf_insn"
+  [(set (match_operand:SF     0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h")
+	(match_operand:SF     1 "general_operand"      "l, >,l,mF,l,*h,*r"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], SFmode) 
+       || register_operand (operands[1], SFmode))"
+  "@
+   add\\t%0, %1, #0
+   ldmia\\t%1, {%0}
+   stmia\\t%0, {%1}
+   ldr\\t%0, %1
+   str\\t%1, %0
+   mov\\t%0, %1
+   mov\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "alus_imm,load1,store1,load1,store1,mov_reg,mov_reg")
+   (set_attr "pool_range" "*,*,*,1018,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")]
+)
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (MEM_P (operands[0]))
+        operands[1] = force_reg (DFmode, operands[1]);
+    }
+  else /* TARGET_THUMB */
+    {
+      if (can_create_pseudo_p ())
+        {
+          if (!REG_P (operands[0]))
+	    operands[1] = force_reg (DFmode, operands[1]);
+        }
+    }
+  "
+)
+
+;; Reloading a df mode value stored in integer regs to memory can require a
+;; scratch reg.
+(define_expand "reload_outdf"
+  [(match_operand:DF 0 "arm_reload_memory_operand" "=o")
+   (match_operand:DF 1 "s_register_operand" "r")
+   (match_operand:SI 2 "s_register_operand" "=&r")]
+  "TARGET_THUMB2"
+  "
+  {
+    enum rtx_code code = GET_CODE (XEXP (operands[0], 0));
+
+    if (code == REG)
+      operands[2] = XEXP (operands[0], 0);
+    else if (code == POST_INC || code == PRE_DEC)
+      {
+	operands[0] = gen_rtx_SUBREG (DImode, operands[0], 0);
+	operands[1] = gen_rtx_SUBREG (DImode, operands[1], 0);
+	emit_insn (gen_movdi (operands[0], operands[1]));
+	DONE;
+      }
+    else if (code == PRE_INC)
+      {
+	rtx reg = XEXP (XEXP (operands[0], 0), 0);
+
+	emit_insn (gen_addsi3 (reg, reg, GEN_INT (8)));
+	operands[2] = reg;
+      }
+    else if (code == POST_DEC)
+      operands[2] = XEXP (XEXP (operands[0], 0), 0);
+    else
+      emit_insn (gen_addsi3 (operands[2], XEXP (XEXP (operands[0], 0), 0),
+			     XEXP (XEXP (operands[0], 0), 1)));
+
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    replace_equiv_address (operands[0], operands[2]),
+			    operands[1]));
+
+    if (code == POST_DEC)
+      emit_insn (gen_addsi3 (operands[2], operands[2], GEN_INT (-8)));
+
+    DONE;
+  }"
+)
+
+(define_insn "*movdf_soft_insn"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
+	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
+  "TARGET_32BIT && TARGET_SOFT_FLOAT
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    default:
+      return output_move_double (operands, true, NULL);
+    }
+  "
+  [(set_attr "length" "8,12,16,8,8")
+   (set_attr "type" "multiple,multiple,multiple,load2,store2")
+   (set_attr "arm_pool_range" "*,*,*,1020,*")
+   (set_attr "thumb2_pool_range" "*,*,*,1018,*")
+   (set_attr "arm_neg_pool_range" "*,*,*,1004,*")
+   (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")]
+)
+
+;;; ??? This should have alternatives for constants.
+;;; ??? This was originally identical to the movdi_insn pattern.
+;;; ??? The 'F' constraint looks funny, but it should always be replaced by
+;;; thumb_reorg with a memory reference.
+(define_insn "*thumb_movdf_insn"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=l,l,>,l, m,*r")
+	(match_operand:DF 1 "general_operand"      "l, >,l,mF,l,*r"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  switch (which_alternative)
+    {
+    default:
+    case 0:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"add\\t%0, %1, #0\;add\\t%H0, %H1, #0\";
+      return \"add\\t%H0, %H1, #0\;add\\t%0, %1, #0\";
+    case 1:
+      return \"ldmia\\t%1, {%0, %H0}\";
+    case 2:
+      return \"stmia\\t%0, {%1, %H1}\";
+    case 3:
+      return thumb_load_double_from_address (operands);
+    case 4:
+      operands[2] = gen_rtx_MEM (SImode,
+				 plus_constant (Pmode,
+						XEXP (operands[0], 0), 4));
+      output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
+      return \"\";
+    case 5:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"mov\\t%0, %1\;mov\\t%H0, %H1\";
+      return \"mov\\t%H0, %H1\;mov\\t%0, %1\";
+    }
+  "
+  [(set_attr "length" "4,2,2,6,4,4")
+   (set_attr "type" "multiple,load2,store2,load2,store2,multiple")
+   (set_attr "pool_range" "*,*,*,1018,*,*")]
+)
+
+
+;; load- and store-multiple insns
+;; The arm can load/store any set of registers, provided that they are in
+;; ascending order, but these expanders assume a contiguous set.
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+                          (match_operand:SI 1 "" ""))
+                     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_32BIT"
+{
+  HOST_WIDE_INT offset = 0;
+
+  /* Support only fixed point registers.  */
+  if (!CONST_INT_P (operands[2])
+      || INTVAL (operands[2]) > 14
+      || INTVAL (operands[2]) < 2
+      || !MEM_P (operands[1])
+      || !REG_P (operands[0])
+      || REGNO (operands[0]) > (LAST_ARM_REGNUM - 1)
+      || REGNO (operands[0]) + INTVAL (operands[2]) > LAST_ARM_REGNUM)
+    FAIL;
+
+  operands[3]
+    = arm_gen_load_multiple (arm_regs_in_sequence + REGNO (operands[0]),
+			     INTVAL (operands[2]),
+			     force_reg (SImode, XEXP (operands[1], 0)),
+			     FALSE, operands[1], &offset);
+})
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+                          (match_operand:SI 1 "" ""))
+                     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_32BIT"
+{
+  HOST_WIDE_INT offset = 0;
+
+  /* Support only fixed point registers.  */
+  if (!CONST_INT_P (operands[2])
+      || INTVAL (operands[2]) > 14
+      || INTVAL (operands[2]) < 2
+      || !REG_P (operands[1])
+      || !MEM_P (operands[0])
+      || REGNO (operands[1]) > (LAST_ARM_REGNUM - 1)
+      || REGNO (operands[1]) + INTVAL (operands[2]) > LAST_ARM_REGNUM)
+    FAIL;
+
+  operands[3]
+    = arm_gen_store_multiple (arm_regs_in_sequence + REGNO (operands[1]),
+			      INTVAL (operands[2]),
+			      force_reg (SImode, XEXP (operands[0], 0)),
+			      FALSE, operands[0], &offset);
+})
+
+
+;; Move a block of memory if it is word aligned and MORE than 2 words long.
+;; We could let this apply for blocks of less than this, but it clobbers so
+;; many registers that there is then probably a better way.
+
+(define_expand "movmemqi"
+  [(match_operand:BLK 0 "general_operand" "")
+   (match_operand:BLK 1 "general_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  ""
+  "
+  if (TARGET_32BIT)
+    {
+      if (TARGET_LDRD && current_tune->prefer_ldrd_strd
+          && !optimize_function_for_size_p (cfun))
+        {
+          if (gen_movmem_ldrd_strd (operands))
+            DONE;
+          FAIL;
+        }
+
+      if (arm_gen_movmemqi (operands))
+        DONE;
+      FAIL;
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (   INTVAL (operands[3]) != 4
+          || INTVAL (operands[2]) > 48)
+        FAIL;
+
+      thumb_expand_movmemqi (operands);
+      DONE;
+    }
+  "
+)
+
+;; Thumb block-move insns
+
+(define_insn "movmem12b"
+  [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+	(mem:SI (match_operand:SI 3 "register_operand" "1")))
+   (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	(mem:SI (plus:SI (match_dup 3) (const_int 4))))
+   (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	(mem:SI (plus:SI (match_dup 3) (const_int 8))))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(plus:SI (match_dup 2) (const_int 12)))
+   (set (match_operand:SI 1 "register_operand" "=l")
+	(plus:SI (match_dup 3) (const_int 12)))
+   (clobber (match_scratch:SI 4 "=&l"))
+   (clobber (match_scratch:SI 5 "=&l"))
+   (clobber (match_scratch:SI 6 "=&l"))]
+  "TARGET_THUMB1"
+  "* return thumb_output_move_mem_multiple (3, operands);"
+  [(set_attr "length" "4")
+   ; This isn't entirely accurate...  It loads as well, but in terms of
+   ; scheduling the following insn it is better to consider it as a store
+   (set_attr "type" "store3")]
+)
+
+(define_insn "movmem8b"
+  [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+	(mem:SI (match_operand:SI 3 "register_operand" "1")))
+   (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	(mem:SI (plus:SI (match_dup 3) (const_int 4))))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(plus:SI (match_dup 2) (const_int 8)))
+   (set (match_operand:SI 1 "register_operand" "=l")
+	(plus:SI (match_dup 3) (const_int 8)))
+   (clobber (match_scratch:SI 4 "=&l"))
+   (clobber (match_scratch:SI 5 "=&l"))]
+  "TARGET_THUMB1"
+  "* return thumb_output_move_mem_multiple (2, operands);"
+  [(set_attr "length" "4")
+   ; This isn't entirely accurate...  It loads as well, but in terms of
+   ; scheduling the following insn it is better to consider it as a store
+   (set_attr "type" "store2")]
+)
+
+
+
+;; Compare & branch insns
+;; The range calculations are based as follows:
+;; For forward branches, the address calculation returns the address of
+;; the next instruction.  This is 2 beyond the branch instruction.
+;; For backward branches, the address calculation returns the address of
+;; the first instruction in this pattern (cmp).  This is 2 before the branch
+;; instruction for the shortest sequence, and 4 before the branch instruction
+;; if we have to jump around an unconditional branch.
+;; To the basic branch range the PC offset must be added (this is +4).
+;; So for forward branches we have 
+;;   (pos_range - pos_base_offs + pc_offs) = (pos_range - 2 + 4).
+;; And for backward branches we have 
+;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4).
+;;
+;; For a 'b'       pos_range = 2046, neg_range = -2048 giving (-2040->2048).
+;; For a 'b<cond>' pos_range = 254,  neg_range = -256  giving (-250 ->256).
+
+(define_expand "cbranchsi4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "expandable_comparison_operator"
+	       [(match_operand:SI 1 "s_register_operand" "")
+	        (match_operand:SI 2 "nonmemory_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_EITHER"
+  "
+  if (!TARGET_THUMB1)
+    {
+      if (!arm_validize_comparison (&operands[0], &operands[1], &operands[2]))
+        FAIL;
+      emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				      operands[3]));
+      DONE;
+    }
+  if (thumb1_cmpneg_operand (operands[2], SImode))
+    {
+      emit_jump_insn (gen_cbranchsi4_scratch (NULL, operands[1], operands[2],
+					      operands[3], operands[0]));
+      DONE;
+    }
+  if (!thumb1_cmp_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+  ")
+
+;; A pattern to recognize a special situation and optimize for it.
+;; On the thumb, zero-extension from memory is preferrable to sign-extension
+;; due to the available addressing modes.  Hence, convert a signed comparison
+;; with zero into an unsigned comparison with 127 if possible.
+(define_expand "cbranchqi4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "lt_ge_comparison_operator"
+	       [(match_operand:QI 1 "memory_operand" "")
+	        (match_operand:QI 2 "const0_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_THUMB1"
+{
+  rtx xops[4];
+  xops[1] = gen_reg_rtx (SImode);
+  emit_insn (gen_zero_extendqisi2 (xops[1], operands[1]));
+  xops[2] = GEN_INT (127);
+  xops[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]) == GE ? LEU : GTU,
+			    VOIDmode, xops[1], xops[2]);
+  xops[3] = operands[3];
+  emit_insn (gen_cbranchsi4 (xops[0], xops[1], xops[2], xops[3]));
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "expandable_comparison_operator"
+	       [(match_operand:SF 1 "s_register_operand" "")
+	        (match_operand:SF 2 "arm_float_compare_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				   operands[3])); DONE;"
+)
+
+(define_expand "cbranchdf4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "expandable_comparison_operator"
+	       [(match_operand:DF 1 "s_register_operand" "")
+	        (match_operand:DF 2 "arm_float_compare_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				   operands[3])); DONE;"
+)
+
+(define_expand "cbranchdi4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "expandable_comparison_operator"
+	       [(match_operand:DI 1 "s_register_operand" "")
+	        (match_operand:DI 2 "cmpdi_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_32BIT"
+  "{
+     if (!arm_validize_comparison (&operands[0], &operands[1], &operands[2]))
+       FAIL;
+     emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				       operands[3]));
+     DONE;
+   }"
+)
+
+(define_insn "cbranchsi4_insn"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arm_comparison_operator"
+	       [(match_operand:SI 1 "s_register_operand" "l,l*h")
+	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_THUMB1"
+{
+  rtx t = cfun->machine->thumb1_cc_insn;
+  if (t != NULL_RTX)
+    {
+      if (!rtx_equal_p (cfun->machine->thumb1_cc_op0, operands[1])
+	  || !rtx_equal_p (cfun->machine->thumb1_cc_op1, operands[2]))
+	t = NULL_RTX;
+      if (cfun->machine->thumb1_cc_mode == CC_NOOVmode)
+	{
+	  if (!noov_comparison_operator (operands[0], VOIDmode))
+	    t = NULL_RTX;
+	}
+      else if (cfun->machine->thumb1_cc_mode != CCmode)
+	t = NULL_RTX;
+    }
+  if (t == NULL_RTX)
+    {
+      output_asm_insn ("cmp\t%1, %2", operands);
+      cfun->machine->thumb1_cc_insn = insn;
+      cfun->machine->thumb1_cc_op0 = operands[1];
+      cfun->machine->thumb1_cc_op1 = operands[2];
+      cfun->machine->thumb1_cc_mode = CCmode;
+    }
+  else
+    /* Ensure we emit the right type of condition code on the jump.  */
+    XEXP (operands[0], 0) = gen_rtx_REG (cfun->machine->thumb1_cc_mode,
+					 CC_REGNUM);
+
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+}
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "cbranchsi4_scratch"
+  [(set (pc) (if_then_else
+	      (match_operator 4 "arm_comparison_operator"
+	       [(match_operand:SI 1 "s_register_operand" "l,0")
+	        (match_operand:SI 2 "thumb1_cmpneg_operand" "L,J")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (match_scratch:SI 0 "=l,l"))]
+  "TARGET_THUMB1"
+  "*
+  output_asm_insn (\"add\\t%0, %1, #%n2\", operands);
+
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d4\\t%l3\";
+    case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*negated_cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+	  [(match_operand:SI 1 "s_register_operand" "l")
+	   (neg:SI (match_operand:SI 2 "s_register_operand" "l"))])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  "TARGET_THUMB1"
+  "*
+  output_asm_insn (\"cmn\\t%1, %2\", operands);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*tbit_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+	  [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l")
+			    (const_int 1)
+			    (match_operand:SI 2 "const_int_operand" "i"))
+	   (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (clobber (match_scratch:SI 4 "=l"))]
+  "TARGET_THUMB1"
+  "*
+  {
+  rtx op[3];
+  op[0] = operands[4];
+  op[1] = operands[1];
+  op[2] = GEN_INT (32 - 1 - INTVAL (operands[2]));
+
+  output_asm_insn (\"lsl\\t%0, %1, %2\", op);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  }"
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))
+   (set_attr "type" "multiple")]
+)
+  
+(define_insn "*tlobits_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+	  [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l")
+			    (match_operand:SI 2 "const_int_operand" "i")
+			    (const_int 0))
+	   (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (clobber (match_scratch:SI 4 "=l"))]
+  "TARGET_THUMB1"
+  "*
+  {
+  rtx op[3];
+  op[0] = operands[4];
+  op[1] = operands[1];
+  op[2] = GEN_INT (32 - INTVAL (operands[2]));
+
+  output_asm_insn (\"lsl\\t%0, %1, %2\", op);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  }"
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*tstsi3_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "equality_operator"
+	  [(and:SI (match_operand:SI 0 "s_register_operand" "%l")
+		   (match_operand:SI 1 "s_register_operand" "l"))
+	   (const_int 0)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_THUMB1"
+  "*
+  {
+  output_asm_insn (\"tst\\t%0, %1\", operands);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d3\\t%l2\";
+    case 6:  return \"b%D3\\t.LCB%=\;b\\t%l2\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D3\\t.LCB%=\;bl\\t%l2\\t%@far jump\\n.LCB%=:\";
+    }
+  }"
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+	         (le (minus (match_dup 2) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 2) (pc)) (const_int -2040))
+		     (le (minus (match_dup 2) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))
+   (set_attr "type" "multiple")]
+)
+  
+(define_insn "*cbranchne_decr1"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_operator"
+		       [(match_operand:SI 2 "s_register_operand" "l,l,1,l")
+		        (const_int 0)])
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,*?h,*?m,*?m")
+	(plus:SI (match_dup 2) (const_int -1)))
+   (clobber (match_scratch:SI 1 "=X,l,&l,&l"))]
+  "TARGET_THUMB1"
+  "*
+   {
+     rtx cond[2];
+     cond[0] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE
+				? GEU : LTU),
+			       VOIDmode, operands[2], const1_rtx);
+     cond[1] = operands[4];
+
+     if (which_alternative == 0)
+       output_asm_insn (\"sub\\t%0, %2, #1\", operands);
+     else if (which_alternative == 1)
+       {
+	 /* We must provide an alternative for a hi reg because reload 
+	    cannot handle output reloads on a jump instruction, but we
+	    can't subtract into that.  Fortunately a mov from lo to hi
+	    does not clobber the condition codes.  */
+	 output_asm_insn (\"sub\\t%1, %2, #1\", operands);
+	 output_asm_insn (\"mov\\t%0, %1\", operands);
+       }
+     else
+       {
+	 /* Similarly, but the target is memory.  */
+	 output_asm_insn (\"sub\\t%1, %2, #1\", operands);
+	 output_asm_insn (\"str\\t%1, %0\", operands);
+       }
+
+     switch (get_attr_length (insn) - (which_alternative ? 2 : 0))
+       {
+	 case 4:
+	   output_asm_insn (\"b%d0\\t%l1\", cond);
+	   return \"\";
+	 case 6:
+	   output_asm_insn (\"b%D0\\t.LCB%=\", cond);
+	   return \"b\\t%l4\\t%@long jump\\n.LCB%=:\";
+	 default:
+	   output_asm_insn (\"b%D0\\t.LCB%=\", cond);
+	   return \"bl\\t%l4\\t%@far jump\\n.LCB%=:\";
+       }
+   }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (ior (and (eq (symbol_ref ("which_alternative"))
+	                  (const_int 0))
+		      (eq_attr "length" "8"))
+		 (eq_attr "length" "10"))
+	    (const_string "yes")
+            (const_string "no")))
+   (set_attr_alternative "length"
+      [
+       ;; Alternative 0
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -250))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 4)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2040))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 6)
+	   (const_int 8)))
+       ;; Alternative 1
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -248))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))
+       ;; Alternative 2
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -248))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))
+       ;; Alternative 3
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -248))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))])
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*addsi3_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 4 "arm_comparison_operator"
+	  [(plus:SI
+	    (match_operand:SI 2 "s_register_operand" "%0,l,*l,1,1,1")
+	    (match_operand:SI 3 "reg_or_int_operand" "IJ,lL,*l,lIJ,lIJ,lIJ"))
+	   (const_int 0)])
+	 (label_ref (match_operand 5 "" ""))
+	 (pc)))
+   (set
+    (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,l,*!h,*?h,*?m,*?m")
+    (plus:SI (match_dup 2) (match_dup 3)))
+   (clobber (match_scratch:SI 1 "=X,X,l,l,&l,&l"))]
+  "TARGET_THUMB1
+   && (GET_CODE (operands[4]) == EQ
+       || GET_CODE (operands[4]) == NE
+       || GET_CODE (operands[4]) == GE
+       || GET_CODE (operands[4]) == LT)"
+  "*
+   {
+     rtx cond[3];
+
+     cond[0] = (which_alternative < 2) ? operands[0] : operands[1];
+     cond[1] = operands[2];
+     cond[2] = operands[3];
+
+     if (CONST_INT_P (cond[2]) && INTVAL (cond[2]) < 0)
+       output_asm_insn (\"sub\\t%0, %1, #%n2\", cond);
+     else
+       output_asm_insn (\"add\\t%0, %1, %2\", cond);
+
+     if (which_alternative >= 2
+	 && which_alternative < 4)
+       output_asm_insn (\"mov\\t%0, %1\", operands);
+     else if (which_alternative >= 4)
+       output_asm_insn (\"str\\t%1, %0\", operands);
+
+     switch (get_attr_length (insn) - ((which_alternative >= 2) ? 2 : 0))
+       {
+	 case 4:
+	   return \"b%d4\\t%l5\";
+	 case 6:
+	   return \"b%D4\\t.LCB%=\;b\\t%l5\\t%@long jump\\n.LCB%=:\";
+	 default:
+	   return \"b%D4\\t.LCB%=\;bl\\t%l5\\t%@far jump\\n.LCB%=:\";
+       }
+   }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (ior (and (lt (symbol_ref ("which_alternative"))
+	                  (const_int 2))
+		      (eq_attr "length" "8"))
+		 (eq_attr "length" "10"))
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length")
+     (if_then_else
+       (lt (symbol_ref ("which_alternative"))
+		       (const_int 2))
+       (if_then_else
+	 (and (ge (minus (match_dup 5) (pc)) (const_int -250))
+	      (le (minus (match_dup 5) (pc)) (const_int 256)))
+	 (const_int 4)
+	 (if_then_else
+	   (and (ge (minus (match_dup 5) (pc)) (const_int -2040))
+		(le (minus (match_dup 5) (pc)) (const_int 2048)))
+	   (const_int 6)
+	   (const_int 8)))
+       (if_then_else
+	 (and (ge (minus (match_dup 5) (pc)) (const_int -248))
+	      (le (minus (match_dup 5) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 5) (pc)) (const_int -2038))
+		(le (minus (match_dup 5) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))))
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*addsi3_cbranch_scratch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "arm_comparison_operator"
+	  [(plus:SI
+	    (match_operand:SI 1 "s_register_operand" "%l,l,l,0")
+	    (match_operand:SI 2 "reg_or_int_operand" "J,l,L,IJ"))
+	   (const_int 0)])
+	 (label_ref (match_operand 4 "" ""))
+	 (pc)))
+   (clobber (match_scratch:SI 0 "=X,X,l,l"))]
+  "TARGET_THUMB1
+   && (GET_CODE (operands[3]) == EQ
+       || GET_CODE (operands[3]) == NE
+       || GET_CODE (operands[3]) == GE
+       || GET_CODE (operands[3]) == LT)"
+  "*
+   {
+     switch (which_alternative)
+       {
+       case 0:
+	 output_asm_insn (\"cmp\t%1, #%n2\", operands);
+	 break;
+       case 1:
+	 output_asm_insn (\"cmn\t%1, %2\", operands);
+	 break;
+       case 2:
+	 if (INTVAL (operands[2]) < 0)
+	   output_asm_insn (\"sub\t%0, %1, %2\", operands);
+	 else
+	   output_asm_insn (\"add\t%0, %1, %2\", operands);
+	 break;
+       case 3:
+	 if (INTVAL (operands[2]) < 0)
+	   output_asm_insn (\"sub\t%0, %0, %2\", operands);
+	 else
+	   output_asm_insn (\"add\t%0, %0, %2\", operands);
+	 break;
+       }
+
+     switch (get_attr_length (insn))
+       {
+	 case 4:
+	   return \"b%d3\\t%l4\";
+	 case 6:
+	   return \"b%D3\\t.LCB%=\;b\\t%l4\\t%@long jump\\n.LCB%=:\";
+	 default:
+	   return \"b%D3\\t.LCB%=\;bl\\t%l4\\t%@far jump\\n.LCB%=:\";
+       }
+   }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length")
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -250))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 4)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2040))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 6)
+	   (const_int 8))))
+   (set_attr "type" "multiple")]
+)
+
+
+;; Comparison and test insns
+
+(define_insn "*arm_cmpsi_insn"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r,r")
+		    (match_operand:SI 1 "arm_add_operand"    "Py,r,r,I,L")))]
+  "TARGET_32BIT"
+  "@
+   cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
+   cmn%?\\t%0, #%n1"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,any,any,any")
+   (set_attr "length" "2,2,4,4,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,yes,yes,no,no")
+   (set_attr "type" "alus_imm,alus_reg,alus_reg,alus_imm,alus_imm")]
+)
+
+(define_insn "*cmpsi_shiftsi"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI   0 "s_register_operand" "r,r,r")
+		    (match_operator:SI  3 "shift_operator"
+		     [(match_operand:SI 1 "s_register_operand" "r,r,r")
+		      (match_operand:SI 2 "shift_amount_operand" "M,r,M")])))]
+  "TARGET_32BIT"
+  "cmp\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
+
+(define_insn "*cmpsi_shiftsi_swp"
+  [(set (reg:CC_SWP CC_REGNUM)
+	(compare:CC_SWP (match_operator:SI 3 "shift_operator"
+			 [(match_operand:SI 1 "s_register_operand" "r,r,r")
+			  (match_operand:SI 2 "shift_amount_operand" "M,r,M")])
+			(match_operand:SI 0 "s_register_operand" "r,r,r")))]
+  "TARGET_32BIT"
+  "cmp%?\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
+
+(define_insn "*arm_cmpsi_negshiftsi_si"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (neg:SI (match_operator:SI 1 "shift_operator"
+		    [(match_operand:SI 2 "s_register_operand" "r")
+		     (match_operand:SI 3 "reg_or_int_operand" "rM")]))
+	 (match_operand:SI 0 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "cmn%?\\t%0, %2%S1"
+  [(set_attr "conds" "set")
+   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
+				    (const_string "alus_shift_imm")
+				    (const_string "alus_shift_reg")))
+   (set_attr "predicable" "yes")]
+)
+
+;; DImode comparisons.  The generic code generates branches that
+;; if-conversion can not reduce to a conditional compare, so we do
+;; that directly.
+
+(define_insn_and_split "*arm_cmpdi_insn"
+  [(set (reg:CC_NCV CC_REGNUM)
+	(compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r")
+			(match_operand:DI 1 "arm_di_operand"	   "rDi")))
+   (clobber (match_scratch:SI 2 "=r"))]
+  "TARGET_32BIT"
+  "#"   ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
+  "&& reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 0) (match_dup 1)))
+   (parallel [(set (reg:CC CC_REGNUM)
+                   (compare:CC (match_dup 3) (match_dup 4)))
+              (set (match_dup 2)
+                   (minus:SI (match_dup 5)
+                            (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    if (CONST_INT_P (operands[1]))
+      {
+        operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode,
+                                                           DImode,
+                                                           operands[1])));
+        operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]);
+      }
+    else
+      {
+        operands[4] = gen_highpart (SImode, operands[1]);
+        operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]);
+      }
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*arm_cmpdi_unsigned"
+  [(set (reg:CC_CZ CC_REGNUM)
+        (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r")
+                       (match_operand:DI 1 "arm_di_operand"     "Py,r,rDi")))]
+
+  "TARGET_32BIT"
+  "#"   ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
+  "&& reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 2) (match_dup 3)))
+   (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0))
+              (set (reg:CC CC_REGNUM)
+                   (compare:CC (match_dup 0) (match_dup 1))))]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    if (CONST_INT_P (operands[1]))
+      operands[3] = gen_highpart_mode (SImode, DImode, operands[1]);
+    else
+      operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "set")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "arch" "t2,t2,*")
+   (set_attr "length" "6,6,8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*arm_cmpdi_zero"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (match_operand:DI 0 "s_register_operand" "r")
+		      (const_int 0)))
+   (clobber (match_scratch:SI 1 "=r"))]
+  "TARGET_32BIT"
+  "orr%.\\t%1, %Q0, %R0"
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_reg")]
+)
+
+(define_insn "*thumb_cmpdi_zero"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (match_operand:DI 0 "s_register_operand" "l")
+		      (const_int 0)))
+   (clobber (match_scratch:SI 1 "=l"))]
+  "TARGET_THUMB1"
+  "orr\\t%1, %Q0, %R0"
+  [(set_attr "conds" "set")
+   (set_attr "length" "2")
+   (set_attr "type" "logics_reg")]
+)
+
+; This insn allows redundant compares to be removed by cse, nothing should
+; ever appear in the output file since (set (reg x) (reg x)) is a no-op that
+; is deleted later on. The match_dup will match the mode here, so that
+; mode changes of the condition codes aren't lost by this even though we don't
+; specify what they are.
+
+(define_insn "*deleted_compare"
+  [(set (match_operand 0 "cc_register" "") (match_dup 0))]
+  "TARGET_32BIT"
+  "\\t%@ deleted compare"
+  [(set_attr "conds" "set")
+   (set_attr "length" "0")
+   (set_attr "type" "no_insn")]
+)
+
+
+;; Conditional branch insns
+
+(define_expand "cbranch_cc"
+  [(set (pc)
+	(if_then_else (match_operator 0 "" [(match_operand 1 "" "")
+					    (match_operand 2 "" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_32BIT"
+  "operands[1] = arm_gen_compare_reg (GET_CODE (operands[0]),
+				      operands[1], operands[2], NULL_RTX);
+   operands[2] = const0_rtx;"
+)
+
+;;
+;; Patterns to match conditional branch insns.
+;;
+
+(define_insn "arm_cond_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "arm_comparison_operator"
+		       [(match_operand 2 "cc_register" "") (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_32BIT"
+  "*
+  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
+    {
+      arm_ccfsm_state += 2;
+      return \"\";
+    }
+  return \"b%d1\\t%l0\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else
+	   (and (match_test "TARGET_THUMB2")
+		(and (ge (minus (match_dup 0) (pc)) (const_int -250))
+		     (le (minus (match_dup 0) (pc)) (const_int 256))))
+	   (const_int 2)
+	   (const_int 4)))]
+)
+
+(define_insn "*arm_cond_branch_reversed"
+  [(set (pc)
+	(if_then_else (match_operator 1 "arm_comparison_operator"
+		       [(match_operand 2 "cc_register" "") (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_32BIT"
+  "*
+  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
+    {
+      arm_ccfsm_state += 2;
+      return \"\";
+    }
+  return \"b%D1\\t%l0\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else
+	   (and (match_test "TARGET_THUMB2")
+		(and (ge (minus (match_dup 0) (pc)) (const_int -250))
+		     (le (minus (match_dup 0) (pc)) (const_int 256))))
+	   (const_int 2)
+	   (const_int 4)))]
+)
+
+
+
+; scc insns
+
+(define_expand "cstore_cc"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "" [(match_operand 2 "" "")
+				 (match_operand 3 "" "")]))]
+  "TARGET_32BIT"
+  "operands[2] = arm_gen_compare_reg (GET_CODE (operands[1]),
+				      operands[2], operands[3], NULL_RTX);
+   operands[3] = const0_rtx;"
+)
+
+(define_insn_and_split "*mov_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
+  "TARGET_ARM"
+  "#"   ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
+  "TARGET_ARM"
+  [(set (match_dup 0)
+        (if_then_else:SI (match_dup 1)
+                         (const_int 1)
+                         (const_int 0)))]
+  ""
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*mov_negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_ARM"
+  "#"   ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
+  "TARGET_ARM"
+  [(set (match_dup 0)
+        (if_then_else:SI (match_dup 1)
+                         (match_dup 3)
+                         (const_int 0)))]
+  {
+    operands[3] = GEN_INT (~0);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*mov_notscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_ARM"
+  "#"   ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
+  "TARGET_ARM"
+  [(set (match_dup 0)
+        (if_then_else:SI (match_dup 1)
+                         (match_dup 3)
+                         (match_dup 4)))]
+  {
+    operands[3] = GEN_INT (~1);
+    operands[4] = GEN_INT (~0);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "expandable_comparison_operator"
+	 [(match_operand:SI 2 "s_register_operand" "")
+	  (match_operand:SI 3 "reg_or_int_operand" "")]))]
+  "TARGET_32BIT || TARGET_THUMB1"
+  "{
+  rtx op3, scratch, scratch2;
+
+  if (!TARGET_THUMB1)
+    {
+      if (!arm_add_operand (operands[3], SImode))
+	operands[3] = force_reg (SImode, operands[3]);
+      emit_insn (gen_cstore_cc (operands[0], operands[1],
+				operands[2], operands[3]));
+      DONE;
+    }
+
+  if (operands[3] == const0_rtx)
+    {
+      switch (GET_CODE (operands[1]))
+	{
+	case EQ:
+	  emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], operands[2]));
+	  break;
+
+	case NE:
+	  emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], operands[2]));
+	  break;
+
+	case LE:
+          scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx,
+				  NULL_RTX, 0, OPTAB_WIDEN);
+          scratch = expand_binop (SImode, ior_optab, operands[2], scratch,
+				  NULL_RTX, 0, OPTAB_WIDEN);
+          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+			operands[0], 1, OPTAB_WIDEN);
+	  break;
+
+        case GE:
+          scratch = expand_unop (SImode, one_cmpl_optab, operands[2],
+				 NULL_RTX, 1);
+          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+			NULL_RTX, 1, OPTAB_WIDEN);
+          break;
+
+        case GT:
+          scratch = expand_binop (SImode, ashr_optab, operands[2],
+				  GEN_INT (31), NULL_RTX, 0, OPTAB_WIDEN);
+          scratch = expand_binop (SImode, sub_optab, scratch, operands[2],
+				  NULL_RTX, 0, OPTAB_WIDEN);
+          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0],
+			0, OPTAB_WIDEN);
+          break;
+
+	/* LT is handled by generic code.  No need for unsigned with 0.  */
+	default:
+	  FAIL;
+	}
+      DONE;
+    }
+
+  switch (GET_CODE (operands[1]))
+    {
+    case EQ:
+      scratch = expand_binop (SImode, sub_optab, operands[2], operands[3],
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], scratch));
+      break;
+
+    case NE:
+      scratch = expand_binop (SImode, sub_optab, operands[2], operands[3],
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], scratch));
+      break;
+
+    case LE:
+      op3 = force_reg (SImode, operands[3]);
+
+      scratch = expand_binop (SImode, lshr_optab, operands[2], GEN_INT (31),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+      scratch2 = expand_binop (SImode, ashr_optab, op3, GEN_INT (31),
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2,
+					  op3, operands[2]));
+      break;
+
+    case GE:
+      op3 = operands[3];
+      if (!thumb1_cmp_operand (op3, SImode))
+        op3 = force_reg (SImode, op3);
+      scratch = expand_binop (SImode, ashr_optab, operands[2], GEN_INT (31),
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      scratch2 = expand_binop (SImode, lshr_optab, op3, GEN_INT (31),
+			       NULL_RTX, 1, OPTAB_WIDEN);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2,
+					  operands[2], op3));
+      break;
+
+    case LEU:
+      op3 = force_reg (SImode, operands[3]);
+      scratch = force_reg (SImode, const0_rtx);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
+					  op3, operands[2]));
+      break;
+
+    case GEU:
+      op3 = operands[3];
+      if (!thumb1_cmp_operand (op3, SImode))
+        op3 = force_reg (SImode, op3);
+      scratch = force_reg (SImode, const0_rtx);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
+					  operands[2], op3));
+      break;
+
+    case LTU:
+      op3 = operands[3];
+      if (!thumb1_cmp_operand (op3, SImode))
+        op3 = force_reg (SImode, op3);
+      scratch = gen_reg_rtx (SImode);
+      emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], operands[2], op3));
+      break;
+
+    case GTU:
+      op3 = force_reg (SImode, operands[3]);
+      scratch = gen_reg_rtx (SImode);
+      emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], op3, operands[2]));
+      break;
+
+    /* No good sequences for GT, LT.  */
+    default:
+      FAIL;
+    }
+  DONE;
+}")
+
+(define_expand "cstoresf4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "expandable_comparison_operator"
+	 [(match_operand:SF 2 "s_register_operand" "")
+	  (match_operand:SF 3 "arm_float_compare_operand" "")]))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "emit_insn (gen_cstore_cc (operands[0], operands[1],
+			     operands[2], operands[3])); DONE;"
+)
+
+(define_expand "cstoredf4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "expandable_comparison_operator"
+	 [(match_operand:DF 2 "s_register_operand" "")
+	  (match_operand:DF 3 "arm_float_compare_operand" "")]))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "emit_insn (gen_cstore_cc (operands[0], operands[1],
+			     operands[2], operands[3])); DONE;"
+)
+
+(define_expand "cstoredi4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "expandable_comparison_operator"
+	 [(match_operand:DI 2 "s_register_operand" "")
+	  (match_operand:DI 3 "cmpdi_operand" "")]))]
+  "TARGET_32BIT"
+  "{
+     if (!arm_validize_comparison (&operands[1],
+     				   &operands[2],
+				   &operands[3]))
+       FAIL;
+     emit_insn (gen_cstore_cc (operands[0], operands[1], operands[2],
+		      	         operands[3]));
+     DONE;
+   }"
+)
+
+(define_expand "cstoresi_eq0_thumb1"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (eq:SI (match_operand:SI 1 "s_register_operand" "")
+		 (const_int 0)))
+     (clobber (match_dup:SI 2))])]
+  "TARGET_THUMB1"
+  "operands[2] = gen_reg_rtx (SImode);"
+)
+
+(define_expand "cstoresi_ne0_thumb1"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (ne:SI (match_operand:SI 1 "s_register_operand" "")
+		 (const_int 0)))
+     (clobber (match_dup:SI 2))])]
+  "TARGET_THUMB1"
+  "operands[2] = gen_reg_rtx (SImode);"
+)
+
+(define_insn "*cstoresi_eq0_thumb1_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=&l,l")
+	(eq:SI (match_operand:SI 1 "s_register_operand" "l,0")
+	       (const_int 0)))
+   (clobber (match_operand:SI 2 "s_register_operand" "=X,l"))]
+  "TARGET_THUMB1"
+  "@
+   neg\\t%0, %1\;adc\\t%0, %0, %1
+   neg\\t%2, %1\;adc\\t%0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*cstoresi_ne0_thumb1_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(ne:SI (match_operand:SI 1 "s_register_operand" "0")
+	       (const_int 0)))
+   (clobber (match_operand:SI 2 "s_register_operand" "=l"))]
+  "TARGET_THUMB1"
+  "sub\\t%2, %1, #1\;sbc\\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+;; Used as part of the expansion of thumb ltu and gtu sequences
+(define_insn "cstoresi_nltu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+        (neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+			(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))]
+  "TARGET_THUMB1"
+  "cmp\\t%1, %2\;sbc\\t%0, %0, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "cstoresi_ltu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+        (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+		(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1"
+  [(set (match_dup 3)
+	(neg:SI (ltu:SI (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:SI (match_dup 3)))]
+  "operands[3] = gen_reg_rtx (SImode);"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+;; Used as part of the expansion of thumb les sequence.
+(define_insn "thumb1_addsi3_addgeu"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+        (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0")
+			  (match_operand:SI 2 "s_register_operand" "l"))
+		 (geu:SI (match_operand:SI 3 "s_register_operand" "l")
+			 (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))]
+  "TARGET_THUMB1"
+  "cmp\\t%3, %4\;adc\\t%0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+
+;; Conditional move insns
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
+			 (match_operand:SI 2 "arm_not_operand" "")
+			 (match_operand:SI 3 "arm_not_operand" "")))]
+  "TARGET_32BIT"
+  "
+  {
+    enum rtx_code code;
+    rtx ccreg;
+
+    if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), 
+       				  &XEXP (operands[1], 1)))
+      FAIL;
+    
+    code = GET_CODE (operands[1]);
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1), NULL_RTX);
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(if_then_else:SF (match_operand 1 "arm_cond_move_operator" "")
+			 (match_operand:SF 2 "s_register_operand" "")
+			 (match_operand:SF 3 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  {
+    enum rtx_code code = GET_CODE (operands[1]);
+    rtx ccreg;
+
+    if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), 
+       				  &XEXP (operands[1], 1)))
+       FAIL;
+
+    code = GET_CODE (operands[1]);
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1), NULL_RTX);
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_expand "movdfcc"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(if_then_else:DF (match_operand 1 "arm_cond_move_operator" "")
+			 (match_operand:DF 2 "s_register_operand" "")
+			 (match_operand:DF 3 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "
+  {
+    enum rtx_code code = GET_CODE (operands[1]);
+    rtx ccreg;
+
+    if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), 
+       				  &XEXP (operands[1], 1)))
+       FAIL;
+    code = GET_CODE (operands[1]);
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1), NULL_RTX);
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_insn "*cmov<mode>"
+    [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
+	(if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator"
+			  [(match_operand 2 "cc_register" "") (const_int 0)])
+			  (match_operand:SDF 3 "s_register_operand"
+			                      "<F_constraint>")
+			  (match_operand:SDF 4 "s_register_operand"
+			                      "<F_constraint>")))]
+  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "*
+  {
+    enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]);
+    switch (code)
+      {
+      case ARM_GE:
+      case ARM_GT:
+      case ARM_EQ:
+      case ARM_VS:
+        return \"vsel%d1.<V_if_elem>\\t%<V_reg>0, %<V_reg>3, %<V_reg>4\";
+      case ARM_LT:
+      case ARM_LE:
+      case ARM_NE:
+      case ARM_VC:
+        return \"vsel%D1.<V_if_elem>\\t%<V_reg>0, %<V_reg>4, %<V_reg>3\";
+      default:
+        gcc_unreachable ();
+      }
+    return \"\";
+  }"
+  [(set_attr "conds" "use")
+   (set_attr "type" "f_sel<vfp_type>")]
+)
+
+(define_insn_and_split "*movsicc_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 3 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K")
+	 (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))]
+  "TARGET_ARM"
+  "@
+   mov%D3\\t%0, %2
+   mvn%D3\\t%0, #%B2
+   mov%d3\\t%0, %1
+   mvn%d3\\t%0, #%B1
+   #
+   #
+   #
+   #"
+   ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+   ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+   ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    enum rtx_code rev_code;
+    enum machine_mode mode;
+    rtx rev_cond;
+
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                  operands[3],
+                                  gen_rtx_SET (VOIDmode,
+                                               operands[0],
+                                               operands[1])));
+
+    rev_code = GET_CODE (operands[3]);
+    mode = GET_MODE (operands[4]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rev_code = reverse_condition_maybe_unordered (rev_code);
+    else
+      rev_code = reverse_condition (rev_code);
+
+    rev_cond = gen_rtx_fmt_ee (rev_code,
+                               VOIDmode,
+                               operands[4],
+                               const0_rtx);
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                  rev_cond,
+                                  gen_rtx_SET (VOIDmode,
+                                               operands[0],
+                                               operands[2])));
+    DONE;
+  }
+  [(set_attr "length" "4,4,4,4,8,8,8,8")
+   (set_attr "conds" "use")
+   (set_attr_alternative "type"
+                         [(if_then_else (match_operand 2 "const_int_operand" "")
+                                        (const_string "mov_imm")
+                                        (const_string "mov_reg"))
+                          (const_string "mvn_imm")
+                          (if_then_else (match_operand 1 "const_int_operand" "")
+                                        (const_string "mov_imm")
+                                        (const_string "mov_reg"))
+                          (const_string "mvn_imm")
+                          (const_string "mov_reg")
+                          (const_string "mov_reg")
+                          (const_string "mov_reg")
+                          (const_string "mov_reg")])]
+)
+
+(define_insn "*movsfcc_soft_insn"
+  [(set (match_operand:SF 0 "s_register_operand" "=r,r")
+	(if_then_else:SF (match_operator 3 "arm_comparison_operator"
+			  [(match_operand 4 "cc_register" "") (const_int 0)])
+			 (match_operand:SF 1 "s_register_operand" "0,r")
+			 (match_operand:SF 2 "s_register_operand" "r,0")))]
+  "TARGET_ARM && TARGET_SOFT_FLOAT"
+  "@
+   mov%D3\\t%0, %2
+   mov%d3\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "type" "mov_reg")]
+)
+
+
+;; Jump and linkage insns
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+(define_insn "*arm_jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_32BIT"
+  "*
+  {
+    if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return \"b%?\\t%l0\";
+  }
+  "
+  [(set_attr "predicable" "yes")
+   (set (attr "length")
+	(if_then_else
+	   (and (match_test "TARGET_THUMB2")
+		(and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+		     (le (minus (match_dup 0) (pc)) (const_int 2048))))
+	   (const_int 2)
+	   (const_int 4)))
+   (set_attr "type" "branch")]
+)
+
+(define_insn "*thumb_jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_THUMB1"
+  "*
+  if (get_attr_length (insn) == 2)
+    return \"b\\t%l0\";
+  return \"bl\\t%l0\\t%@ far jump\";
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "yes")
+	    (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+		 (le (minus (match_dup 0) (pc)) (const_int 2048)))
+  	    (const_int 2)
+	    (const_int 4)))
+   (set_attr "type" "branch")]
+)
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+	            (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  {
+    rtx callee, pat;
+    
+    /* In an untyped call, we can get NULL for operand 2.  */
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+      
+    /* Decide if we should generate indirect calls by loading the
+       32-bit address of the callee into a register before performing the
+       branch and link.  */
+    callee = XEXP (operands[0], 0);
+    if (GET_CODE (callee) == SYMBOL_REF
+	? arm_is_long_call_p (SYMBOL_REF_DECL (callee))
+	: !REG_P (callee))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+
+    pat = gen_call_internal (operands[0], operands[1], operands[2]);
+    arm_emit_call_insn (pat, XEXP (operands[0], 0));
+    DONE;
+  }"
+)
+
+(define_expand "call_internal"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+	            (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])])
+
+(define_insn "*call_reg_armv5"
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)"
+  "blx%?\\t%0"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_reg_arm"
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
+  "*
+  return output_call (operands);
+  "
+  ;; length is worst case, normally it is only two
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+
+;; Note: not used for armv5+ because the sequence used (ldr pc, ...) is not
+;; considered a function call by the branch predictor of some cores (PR40887).
+;; Falls back to blx rN (*call_reg_armv5).
+
+(define_insn "*call_mem"
+  [(call (mem:SI (match_operand:SI 0 "call_memory_operand" "m"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
+  "*
+  return output_call_mem (operands);
+  "
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_reg_thumb1_v5"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && arm_arch5 && !SIBLING_CALL_P (insn)"
+  "blx\\t%0"
+  [(set_attr "length" "2")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_reg_thumb1"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && !arm_arch5 && !SIBLING_CALL_P (insn)"
+  "*
+  {
+    if (!TARGET_CALLER_INTERWORKING)
+      return thumb_call_via_reg (operands[0]);
+    else if (operands[1] == const0_rtx)
+      return \"bl\\t%__interwork_call_via_%0\";
+    else if (frame_pointer_needed)
+      return \"bl\\t%__interwork_r7_call_via_%0\";
+    else
+      return \"bl\\t%__interwork_r11_call_via_%0\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand       0 "" "")
+	           (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  {
+    rtx pat, callee;
+    
+    /* In an untyped call, we can get NULL for operand 2.  */
+    if (operands[3] == 0)
+      operands[3] = const0_rtx;
+      
+    /* Decide if we should generate indirect calls by loading the
+       32-bit address of the callee into a register before performing the
+       branch and link.  */
+    callee = XEXP (operands[1], 0);
+    if (GET_CODE (callee) == SYMBOL_REF
+	? arm_is_long_call_p (SYMBOL_REF_DECL (callee))
+	: !REG_P (callee))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+
+    pat = gen_call_value_internal (operands[0], operands[1],
+				   operands[2], operands[3]);
+    arm_emit_call_insn (pat, XEXP (operands[1], 0));
+    DONE;
+  }"
+)
+
+(define_expand "call_value_internal"
+  [(parallel [(set (match_operand       0 "" "")
+	           (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])])
+
+(define_insn "*call_value_reg_armv5"
+  [(set (match_operand 0 "" "")
+        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)"
+  "blx%?\\t%1"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_arm"
+  [(set (match_operand 0 "" "")
+        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)"
+  "*
+  return output_call (&operands[1]);
+  "
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+;; Note: see *call_mem
+
+(define_insn "*call_value_mem"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "call_memory_operand" "m"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))
+   && !SIBLING_CALL_P (insn)"
+  "*
+  return output_call_mem (&operands[1]);
+  "
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_thumb1_v5"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && arm_arch5"
+  "blx\\t%1"
+  [(set_attr "length" "2")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_thumb1"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && !arm_arch5"
+  "*
+  {
+    if (!TARGET_CALLER_INTERWORKING)
+      return thumb_call_via_reg (operands[1]);
+    else if (operands[2] == const0_rtx)
+      return \"bl\\t%__interwork_call_via_%1\";
+    else if (frame_pointer_needed)
+      return \"bl\\t%__interwork_r7_call_via_%1\";
+    else
+      return \"bl\\t%__interwork_r11_call_via_%1\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+;; Allow calls to SYMBOL_REFs specially as they are not valid general addresses
+;; The 'a' causes the operand to be treated as an address, i.e. no '#' output.
+
+(define_insn "*call_symbol"
+  [(call (mem:SI (match_operand:SI 0 "" ""))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_32BIT
+   && !SIBLING_CALL_P (insn)
+   && (GET_CODE (operands[0]) == SYMBOL_REF)
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
+  "*
+  {
+    return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_value_symbol"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "" ""))
+	(match_operand:SI 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_32BIT
+   && !SIBLING_CALL_P (insn)
+   && (GET_CODE (operands[1]) == SYMBOL_REF)
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
+  "*
+  {
+    return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_insn"
+  [(call (mem:SI (match_operand:SI 0 "" ""))
+	 (match_operand:SI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
+  "bl\\t%a0"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_value_insn"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "" ""))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
+  "bl\\t%a1"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
+;; We may also be able to do sibcalls for Thumb, but it's much harder...
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (return)
+	      (use (match_operand 2 "" ""))])]
+  "TARGET_32BIT"
+  "
+  {
+    if (!REG_P (XEXP (operands[0], 0))
+       && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
+     XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
+
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+  }"
+)
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (return)
+	      (use (match_operand 3 "" ""))])]
+  "TARGET_32BIT"
+  "
+  {
+    if (!REG_P (XEXP (operands[1], 0)) &&
+       (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF))
+     XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
+
+    if (operands[3] == NULL_RTX)
+      operands[3] = const0_rtx;
+  }"
+)
+
+(define_insn "*sibcall_insn"
+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "Cs, US"))
+	(match_operand 1 "" ""))
+  (return)
+  (use (match_operand 2 "" ""))]
+  "TARGET_32BIT && SIBLING_CALL_P (insn)"
+  "*
+  if (which_alternative == 1)
+    return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\";
+  else
+    {
+      if (arm_arch5 || arm_arch4t)
+	return \"bx%?\\t%0\\t%@ indirect register sibling call\";
+      else
+	return \"mov%?\\t%|pc, %0\\t%@ indirect register sibling call\";
+    }
+  "
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*sibcall_value_insn"
+ [(set (match_operand 0 "" "")
+       (call (mem:SI (match_operand:SI 1 "call_insn_operand" "Cs,US"))
+	     (match_operand 2 "" "")))
+  (return)
+  (use (match_operand 3 "" ""))]
+  "TARGET_32BIT && SIBLING_CALL_P (insn)"
+  "*
+  if (which_alternative == 1)
+   return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\";
+  else
+    {
+      if (arm_arch5 || arm_arch4t)
+	return \"bx%?\\t%1\";
+      else
+	return \"mov%?\\t%|pc, %1\\t@ indirect sibling call \";
+    }
+  "
+  [(set_attr "type" "call")]
+)
+
+(define_expand "<return_str>return"
+  [(returns)]
+  "(TARGET_ARM || (TARGET_THUMB2
+                   && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+                   && !IS_STACKALIGN (arm_current_func_type ())))
+    <return_cond_false>"
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        thumb2_expand_return (<return_simple_p>);
+        DONE;
+      }
+  }
+  "
+)
+
+;; Often the return insn will be the same as loading from memory, so set attr
+(define_insn "*arm_return"
+  [(return)]
+  "TARGET_ARM && USE_RETURN_INSN (FALSE)"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (const_true_rtx, true, false, false);
+  }"
+  [(set_attr "type" "load1")
+   (set_attr "length" "12")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*cond_<return_str>return"
+  [(set (pc)
+        (if_then_else (match_operator 0 "arm_comparison_operator"
+		       [(match_operand 1 "cc_register" "") (const_int 0)])
+                      (returns)
+                      (pc)))]
+  "TARGET_ARM  <return_cond_true>"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (operands[0], true, false,
+				      <return_simple_p>);
+  }"
+  [(set_attr "conds" "use")
+   (set_attr "length" "12")
+   (set_attr "type" "load1")]
+)
+
+(define_insn "*cond_<return_str>return_inverted"
+  [(set (pc)
+        (if_then_else (match_operator 0 "arm_comparison_operator"
+		       [(match_operand 1 "cc_register" "") (const_int 0)])
+                      (pc)
+		      (returns)))]
+  "TARGET_ARM <return_cond_true>"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (operands[0], true, true,
+				      <return_simple_p>);
+  }"
+  [(set_attr "conds" "use")
+   (set_attr "length" "12")
+   (set_attr "type" "load1")]
+)
+
+(define_insn "*arm_simple_return"
+  [(simple_return)]
+  "TARGET_ARM"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (const_true_rtx, true, false, true);
+  }"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")
+   (set_attr "predicable" "yes")]
+)
+
+;; Generate a sequence of instructions to determine if the processor is
+;; in 26-bit or 32-bit mode, and return the appropriate return address
+;; mask.
+
+(define_expand "return_addr_mask"
+  [(set (match_dup 1)
+      (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH)
+		       (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+      (if_then_else:SI (eq (match_dup 1) (const_int 0))
+		       (const_int -1)
+		       (const_int 67108860)))] ; 0x03fffffc
+  "TARGET_ARM"
+  "
+  operands[1] = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+  ")
+
+(define_insn "*check_arch2"
+  [(set (match_operand:CC_NOOV 0 "cc_register" "")
+      (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH)
+		       (const_int 0)))]
+  "TARGET_ARM"
+  "teq\\t%|r0, %|r0\;teq\\t%|pc, %|pc"
+  [(set_attr "length" "8")
+   (set_attr "conds" "set")
+   (set_attr "type" "multiple")]
+)
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "TARGET_EITHER"
+  "
+  {
+    int i;
+    rtx par = gen_rtx_PARALLEL (VOIDmode,
+				rtvec_alloc (XVECLEN (operands[2], 0)));
+    rtx addr = gen_reg_rtx (Pmode);
+    rtx mem;
+    int size = 0;
+
+    emit_move_insn (addr, XEXP (operands[1], 0));
+    mem = change_address (operands[1], BLKmode, addr);
+
+    for (i = 0; i < XVECLEN (operands[2], 0); i++)
+      {
+	rtx src = SET_SRC (XVECEXP (operands[2], 0, i));
+
+	/* Default code only uses r0 as a return value, but we could
+	   be using anything up to 4 registers.  */
+	if (REGNO (src) == R0_REGNUM)
+	  src = gen_rtx_REG (TImode, R0_REGNUM);
+
+        XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, src,
+						 GEN_INT (size));
+        size += GET_MODE_SIZE (GET_MODE (src));
+      }
+
+    emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL,
+				    const0_rtx));
+
+    size = 0;
+
+    for (i = 0; i < XVECLEN (par, 0); i++)
+      {
+	HOST_WIDE_INT offset = 0;
+	rtx reg = XEXP (XVECEXP (par, 0, i), 0);
+
+	if (size != 0)
+	  emit_move_insn (addr, plus_constant (Pmode, addr, size));
+
+	mem = change_address (mem, GET_MODE (reg), NULL);
+	if (REGNO (reg) == R0_REGNUM)
+	  {
+	    /* On thumb we have to use a write-back instruction.  */
+	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, addr,
+ 		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+	    size = TARGET_ARM ? 16 : 0;
+	  }
+	else
+	  {
+	    emit_move_insn (mem, reg);
+	    size = GET_MODE_SIZE (GET_MODE (reg));
+	  }
+      }
+
+    /* The optimizer does not know that the call sets the function value
+       registers we stored in the result block.  We avoid problems by
+       claiming that all hard registers are used and clobbered at this
+       point.  */
+    emit_insn (gen_blockage ());
+
+    DONE;
+  }"
+)
+
+(define_expand "untyped_return"
+  [(match_operand:BLK 0 "memory_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_EITHER"
+  "
+  {
+    int i;
+    rtx addr = gen_reg_rtx (Pmode);
+    rtx mem;
+    int size = 0;
+
+    emit_move_insn (addr, XEXP (operands[0], 0));
+    mem = change_address (operands[0], BLKmode, addr);
+
+    for (i = 0; i < XVECLEN (operands[1], 0); i++)
+      {
+	HOST_WIDE_INT offset = 0;
+	rtx reg = SET_DEST (XVECEXP (operands[1], 0, i));
+
+	if (size != 0)
+	  emit_move_insn (addr, plus_constant (Pmode, addr, size));
+
+	mem = change_address (mem, GET_MODE (reg), NULL);
+	if (REGNO (reg) == R0_REGNUM)
+	  {
+	    /* On thumb we have to use a write-back instruction.  */
+	    emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, addr,
+ 		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+	    size = TARGET_ARM ? 16 : 0;
+	  }
+	else
+	  {
+	    emit_move_insn (reg, mem);
+	    size = GET_MODE_SIZE (GET_MODE (reg));
+	  }
+      }
+
+    /* Emit USE insns before the return.  */
+    for (i = 0; i < XVECLEN (operands[1], 0); i++)
+      emit_use (SET_DEST (XVECEXP (operands[1], 0, i)));
+
+    /* Construct the return.  */
+    expand_naked_return ();
+
+    DONE;
+  }"
+)
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_BLOCKAGE)]
+  "TARGET_EITHER"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "block")]
+)
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "s_register_operand" "")	; index to jump on
+   (match_operand:SI 1 "const_int_operand" "")	; lower bound
+   (match_operand:SI 2 "const_int_operand" "")	; total range
+   (match_operand:SI 3 "" "")			; table label
+   (match_operand:SI 4 "" "")]			; Out of range label
+  "TARGET_32BIT || optimize_size || flag_pic"
+  "
+  {
+    enum insn_code code;
+    if (operands[1] != const0_rtx)
+      {
+	rtx reg = gen_reg_rtx (SImode);
+
+	emit_insn (gen_addsi3 (reg, operands[0],
+			       gen_int_mode (-INTVAL (operands[1]),
+			       		     SImode)));
+	operands[0] = reg;
+      }
+
+    if (TARGET_ARM)
+      code = CODE_FOR_arm_casesi_internal;
+    else if (TARGET_THUMB1)
+      code = CODE_FOR_thumb1_casesi_internal_pic;
+    else if (flag_pic)
+      code = CODE_FOR_thumb2_casesi_internal_pic;
+    else
+      code = CODE_FOR_thumb2_casesi_internal;
+
+    if (!insn_data[(int) code].operand[1].predicate(operands[2], SImode))
+      operands[2] = force_reg (SImode, operands[2]);
+
+    emit_jump_insn (GEN_FCN ((int) code) (operands[0], operands[2],
+					  operands[3], operands[4]));
+    DONE;
+  }"
+)
+
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "arm_casesi_internal"
+  [(parallel [(set (pc)
+	       (if_then_else
+		(leu (match_operand:SI 0 "s_register_operand" "r")
+		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
+		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
+				 (label_ref (match_operand 2 "" ""))))
+		(label_ref (match_operand 3 "" ""))))
+	      (clobber (reg:CC CC_REGNUM))
+	      (use (label_ref (match_dup 2)))])]
+  "TARGET_ARM"
+  "*
+    if (flag_pic)
+      return \"cmp\\t%0, %1\;addls\\t%|pc, %|pc, %0, asl #2\;b\\t%l3\";
+    return   \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "thumb1_casesi_internal_pic"
+  [(match_operand:SI 0 "s_register_operand" "")
+   (match_operand:SI 1 "thumb1_cmp_operand" "")
+   (match_operand 2 "" "")
+   (match_operand 3 "" "")]
+  "TARGET_THUMB1"
+  {
+    rtx reg0;
+    rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[1]);
+    emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[1],
+				    operands[3]));
+    reg0 = gen_rtx_REG (SImode, 0);
+    emit_move_insn (reg0, operands[0]);
+    emit_jump_insn (gen_thumb1_casesi_dispatch (operands[2]/*, operands[3]*/));
+    DONE;
+  }
+)
+
+(define_insn "thumb1_casesi_dispatch"
+  [(parallel [(set (pc) (unspec [(reg:SI 0)
+				 (label_ref (match_operand 0 "" ""))
+;;				 (label_ref (match_operand 1 "" ""))
+]
+			 UNSPEC_THUMB1_CASESI))
+	      (clobber (reg:SI IP_REGNUM))
+              (clobber (reg:SI LR_REGNUM))])]
+  "TARGET_THUMB1"
+  "* return thumb1_output_casesi(operands);"
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "s_register_operand" ""))]
+  "TARGET_EITHER"
+  "
+  /* Thumb-2 doesn't have mov pc, reg.  Explicitly set the low bit of the
+     address and use bx.  */
+  if (TARGET_THUMB2)
+    {
+      rtx tmp;
+      tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_iorsi3 (tmp, operands[0], GEN_INT(1)));
+      operands[0] = tmp;
+    }
+  "
+)
+
+;; NB Never uses BX.
+(define_insn "*arm_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "s_register_operand" "r"))]
+  "TARGET_ARM"
+  "mov%?\\t%|pc, %0\\t%@ indirect register jump"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "branch")]
+)
+
+(define_insn "*load_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "memory_operand" "m"))]
+  "TARGET_ARM"
+  "ldr%?\\t%|pc, %0\\t%@ indirect memory jump"
+  [(set_attr "type" "load1")
+   (set_attr "pool_range" "4096")
+   (set_attr "neg_pool_range" "4084")
+   (set_attr "predicable" "yes")]
+)
+
+;; NB Never uses BX.
+(define_insn "*thumb1_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "l*r"))]
+  "TARGET_THUMB1"
+  "mov\\tpc, %0"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "2")
+   (set_attr "type" "branch")]
+)
+
+
+;; Misc insns
+
+(define_insn "nop"
+  [(const_int 0)]
+  "TARGET_EITHER"
+  "*
+  if (TARGET_UNIFIED_ASM)
+    return \"nop\";
+  if (TARGET_ARM)
+    return \"mov%?\\t%|r0, %|r0\\t%@ nop\";
+  return  \"mov\\tr8, r8\";
+  "
+  [(set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 2)
+		      (const_int 4)))
+   (set_attr "type" "mov_reg")]
+)
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "*
+  if (TARGET_ARM)
+    return \".inst\\t0xe7f000f0\";
+  else
+    return \".inst\\t0xdeff\";
+  "
+  [(set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 2)
+		      (const_int 4)))
+   (set_attr "type" "trap")
+   (set_attr "conds" "unconditional")]
+)
+
+
+;; Patterns to allow combination of arithmetic, cond code and shifts
+
+(define_insn "*arith_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
+        (match_operator:SI 1 "shiftable_operator"
+          [(match_operator:SI 3 "shift_operator"
+             [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
+              (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
+           (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
+  "TARGET_32BIT"
+  "%i1%?\\t%0, %2, %4%S3"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "4")
+   (set_attr "arch" "a,t2,t2,a")
+   ;; Thumb2 doesn't allow the stack pointer to be used for 
+   ;; operand1 for all operations other than add and sub. In this case 
+   ;; the minus operation is a candidate for an rsub and hence needs
+   ;; to be disabled.
+   ;; We have to make sure to disable the fourth alternative if
+   ;; the shift_operator is MULT, since otherwise the insn will
+   ;; also match a multiply_accumulate pattern and validate_change
+   ;; will allow a replacement of the constant with a register
+   ;; despite the checks done in shift_operator.
+   (set_attr_alternative "insn_enabled"
+			 [(const_string "yes")
+			  (if_then_else
+			   (match_operand:SI 1 "add_operator" "")
+			   (const_string "yes") (const_string "no"))
+			  (const_string "yes")
+			  (if_then_else
+			   (match_operand:SI 3 "mult_operator" "")
+			   (const_string "no") (const_string "yes"))])
+   (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(match_operator:SI 2 "shiftable_operator"
+	   [(match_operator:SI 3 "shift_operator"
+	     [(match_operand:SI 4 "s_register_operand" "")
+	      (match_operand:SI 5 "reg_or_int_operand" "")])
+	    (match_operand:SI 6 "s_register_operand" "")])
+	  (match_operand:SI 7 "arm_rhs_operand" "")]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT"
+  [(set (match_dup 8)
+	(match_op_dup 2 [(match_op_dup 3 [(match_dup 4) (match_dup 5)])
+			 (match_dup 6)]))
+   (set (match_dup 0)
+	(match_op_dup 1 [(match_dup 8) (match_dup 7)]))]
+  "")
+
+(define_insn "*arith_shiftsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+	 (match_operator:SI 1 "shiftable_operator"
+	  [(match_operator:SI 3 "shift_operator"
+	    [(match_operand:SI 4 "s_register_operand" "r,r")
+	     (match_operand:SI 5 "shift_amount_operand" "M,r")])
+	   (match_operand:SI 2 "s_register_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_op_dup 1 [(match_op_dup 3 [(match_dup 4) (match_dup 5)])
+			 (match_dup 2)]))]
+  "TARGET_32BIT"
+  "%i1%.\\t%0, %2, %4%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "4")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")])
+
+(define_insn "*arith_shiftsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+	 (match_operator:SI 1 "shiftable_operator"
+	  [(match_operator:SI 3 "shift_operator"
+	    [(match_operand:SI 4 "s_register_operand" "r,r")
+	     (match_operand:SI 5 "shift_amount_operand" "M,r")])
+	   (match_operand:SI 2 "s_register_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "%i1%.\\t%0, %2, %4%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "4")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")])
+
+(define_insn "*sub_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		  (match_operator:SI 2 "shift_operator"
+		   [(match_operand:SI 3 "s_register_operand" "r,r")
+		    (match_operand:SI 4 "shift_amount_operand" "M,r")])))]
+  "TARGET_32BIT"
+  "sub%?\\t%0, %1, %3%S2"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "3")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")])
+
+(define_insn "*sub_shiftsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
+		   (match_operator:SI 2 "shift_operator"
+		    [(match_operand:SI 3 "s_register_operand" "r,r,r")
+		     (match_operand:SI 4 "shift_amount_operand" "M,r,M")]))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(minus:SI (match_dup 1)
+		  (match_op_dup 2 [(match_dup 3) (match_dup 4)])))]
+  "TARGET_32BIT"
+  "sub%.\\t%0, %1, %3%S2"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "3")
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
+
+(define_insn "*sub_shiftsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
+		   (match_operator:SI 2 "shift_operator"
+		    [(match_operand:SI 3 "s_register_operand" "r,r,r")
+		     (match_operand:SI 4 "shift_amount_operand" "M,r,M")]))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r,r"))]
+  "TARGET_32BIT"
+  "sub%.\\t%0, %1, %3%S2"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "3")
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
+
+
+(define_insn_and_split "*and_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])
+		(match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "#"   ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
+  "&& reload_completed"
+  [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
+   (cond_exec (match_dup 4) (set (match_dup 0)
+                                 (and:SI (match_dup 3) (const_int 1))))]
+  {
+    enum machine_mode mode = GET_MODE (operands[2]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    /* Note that operands[4] is the same as operands[1],
+       but with VOIDmode as the result. */
+    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+    operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "type" "multiple")
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*ior_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(ior:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])
+		(match_operand:SI 3 "s_register_operand" "0,?r")))]
+  "TARGET_ARM"
+  "@
+   orr%d1\\t%0, %3, #1
+   #"
+  "&& reload_completed
+   && REGNO (operands [0]) != REGNO (operands[3])"
+  ;; && which_alternative == 1
+  ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
+  [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
+   (cond_exec (match_dup 4) (set (match_dup 0)
+                                 (ior:SI (match_dup 3) (const_int 1))))]
+  {
+    enum machine_mode mode = GET_MODE (operands[2]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    /* Note that operands[4] is the same as operands[1],
+       but with VOIDmode as the result. */
+    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+    operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")
+   (set_attr "type" "logic_imm,multiple")]
+)
+
+; A series of splitters for the compare_scc pattern below.  Note that
+; order is important.
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(lt:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ge:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (not:SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 31)))])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(eq:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "arm_arch5 && TARGET_32BIT"
+  [(set (match_dup 0) (clz:SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(eq:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (const_int 1) (match_dup 1)))
+     (set (match_dup 0)
+	  (minus:SI (const_int 1) (match_dup 1)))])
+   (cond_exec (ltu:CC (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 0)))])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ne:SI (match_operand:SI 1 "s_register_operand" "")
+	       (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))])
+   (cond_exec (ne:CC (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))]
+{
+  operands[3] = GEN_INT (-INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ne:SI (match_operand:SI 1 "s_register_operand" "")
+	       (match_operand:SI 2 "arm_add_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel
+    [(set (reg:CC_NOOV CC_REGNUM)
+	  (compare:CC_NOOV (minus:SI (match_dup 1) (match_dup 2))
+			   (const_int 0)))
+     (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (cond_exec (ne:CC_NOOV (reg:CC_NOOV CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))])
+
+(define_insn_and_split "*compare_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand:SI 2 "s_register_operand" "r,r")
+	  (match_operand:SI 3 "arm_add_operand" "rI,L")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 3)))
+   (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))
+   (cond_exec (match_dup 5) (set (match_dup 0) (const_int 1)))]
+{
+  rtx tmp1;
+  enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					   operands[2], operands[3]);
+  enum rtx_code rc = GET_CODE (operands[1]);
+
+  tmp1 = gen_rtx_REG (mode, CC_REGNUM);
+
+  operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx);
+  if (mode == CCFPmode || mode == CCFPEmode)
+    rc = reverse_condition_maybe_unordered (rc);
+  else
+    rc = reverse_condition (rc);
+  operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx);
+}
+  [(set_attr "type" "multiple")]
+)
+
+;; Attempt to improve the sequence generated by the compare_scc splitters
+;; not to use conditional execution.
+
+;; Rd = (eq (reg1) (const_int0))  // ARMv5
+;;	clz Rd, reg1
+;;	lsr Rd, Rd, #5
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (const_int 0)))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))]
+  "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  [(set (match_dup 0) (clz:SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+;; Rd = (eq (reg1) (const_int0))  // !ARMv5
+;;	negs Rd, reg1
+;;	adc  Rd, Rd, reg1
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (const_int 0)))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))
+   (match_scratch:SI 2 "r")]
+  "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (const_int 0) (match_dup 1)))
+     (set (match_dup 2) (minus:SI (const_int 0) (match_dup 1)))])
+   (set (match_dup 0)
+	(plus:SI (plus:SI (match_dup 1) (match_dup 2))
+		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
+)
+
+;; Rd = (eq (reg1) (reg2/imm))	// ARMv5 and optimising for speed.
+;;	sub  Rd, Reg1, reg2
+;;	clz  Rd, Rd
+;;	lsr  Rd, Rd, #5
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "arm_rhs_operand" "")))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))]
+  "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)
+  && !(TARGET_THUMB2 && optimize_insn_for_size_p ())"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (clz:SI (match_dup 0)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+
+;; Rd = (eq (reg1) (reg2))	// ! ARMv5 or optimising for size.
+;;	sub  T1, Reg1, reg2
+;;	negs Rd, T1
+;;	adc  Rd, Rd, T1
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "arm_rhs_operand" "")))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))
+   (match_scratch:SI 3 "r")]
+  "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  [(set (match_dup 3) (match_dup 4))
+   (parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (const_int 0) (match_dup 3)))
+     (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))])
+   (set (match_dup 0)
+	(plus:SI (plus:SI (match_dup 0) (match_dup 3))
+		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
+  "
+  if (CONST_INT_P (operands[2]))
+    operands[4] = plus_constant (SImode, operands[1], -INTVAL (operands[2]));
+  else
+    operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[2]);
+  ")
+
+(define_insn "*cond_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI (match_operator 3 "equality_operator"
+			  [(match_operator 4 "arm_comparison_operator"
+			    [(match_operand 5 "cc_register" "") (const_int 0)])
+			   (const_int 0)])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+			 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))]
+  "TARGET_ARM"
+  "*
+    if (GET_CODE (operands[3]) == NE)
+      {
+        if (which_alternative != 1)
+	  output_asm_insn (\"mov%D4\\t%0, %2\", operands);
+        if (which_alternative != 0)
+	  output_asm_insn (\"mov%d4\\t%0, %1\", operands);
+        return \"\";
+      }
+    if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    if (which_alternative != 1)
+      output_asm_insn (\"mov%d4\\t%0, %2\", operands);
+    return \"\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "type" "mov_reg,mov_reg,multiple")
+   (set_attr "length" "4,4,8")]
+)
+
+(define_insn "*cond_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (match_operator:SI 5 "shiftable_operator" 
+	 [(match_operator:SI 4 "arm_comparison_operator"
+           [(match_operand:SI 2 "s_register_operand" "r,r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+          (match_operand:SI 1 "s_register_operand" "0,?r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
+      return \"%i5\\t%0, %1, %2, lsr #31\";
+
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (GET_CODE (operands[5]) == AND)
+      output_asm_insn (\"mov%D4\\t%0, #0\", operands);
+    else if (GET_CODE (operands[5]) == MINUS)
+      output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands);
+    else if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    return \"%i5%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*cond_sub"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
+		  (match_operator:SI 4 "arm_comparison_operator"
+                   [(match_operand:SI 2 "s_register_operand" "r,r")
+		    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    return \"sub%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*cmp_ite0"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (if_then_else:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand"
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand"
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
+	  (const_int 0))
+	 (const_int 0)))]
+  "TARGET_32BIT"
+  "*
+  {
+    static const char * const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp%d5\\t%0, %1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmp%d5\\t%0, %1\",
+       \"cmn%d4\\t%2, #%n3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmn%d4\\t%2, #%n3\"}
+    };
+    static const char * const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%2, %3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmp\\t%2, %3\",
+       \"cmn\\t%0, #%n1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmn\\t%0, #%n1\"}
+    };
+    static const char * const ite[2] =
+    {
+      \"it\\t%d5\",
+      \"it\\t%d4\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr "type" "multiple")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])]
+)
+
+(define_insn "*cmp_ite1"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (if_then_else:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand"
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand"
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
+	  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_32BIT"
+  "*
+  {
+    static const char * const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%0, %1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmp\\t%0, %1\",
+       \"cmn\\t%2, #%n3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmn\\t%2, #%n3\"}
+    };
+    static const char * const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp%d4\\t%2, %3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmp%d4\\t%2, %3\",
+       \"cmn%D5\\t%0, #%n1\"},
+      {\"cmn%d4\\t%2, #%n3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmn%d4\\t%2, #%n3\",
+       \"cmn%D5\\t%0, #%n1\"}
+    };
+    static const char * const ite[2] =
+    {
+      \"it\\t%d4\",
+      \"it\\t%D5\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]),
+			      reverse_condition (GET_CODE (operands[4])));
+
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*cmp_and"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (and:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand" 
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand" 
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand" 
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand" 
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
+	 (const_int 0)))]
+  "TARGET_32BIT"
+  "*
+  {
+    static const char *const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp%d5\\t%0, %1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmp%d5\\t%0, %1\",
+       \"cmn%d4\\t%2, #%n3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmn%d4\\t%2, #%n3\"}
+    };
+    static const char *const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%2, %3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmp\\t%2, %3\",
+       \"cmn\\t%0, #%n1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmn\\t%0, #%n1\"}
+    };
+    static const char *const ite[2] =
+    {
+      \"it\\t%d5\",
+      \"it\\t%d4\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "no")
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*cmp_ior"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (ior:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand"
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand"
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
+	 (const_int 0)))]
+  "TARGET_32BIT"
+  "*
+  {
+    static const char *const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%0, %1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmp\\t%0, %1\",
+       \"cmn\\t%2, #%n3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmn\\t%2, #%n3\"}
+    };
+    static const char *const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp%D4\\t%2, %3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmp%D4\\t%2, %3\",
+       \"cmn%D5\\t%0, #%n1\"},
+      {\"cmn%D4\\t%2, #%n3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmn%D4\\t%2, #%n3\",
+       \"cmn%D5\\t%0, #%n1\"}
+    };
+    static const char *const ite[2] =
+    {
+      \"it\\t%D4\",
+      \"it\\t%D5\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
+  }
+  "
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*ior_scc_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
+	(ior:SI (match_operator:SI 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_add_operand" "rIL")])
+		(match_operator:SI 6 "arm_comparison_operator"
+		 [(match_operand:SI 4 "s_register_operand" "r")
+		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y)
+       != CCmode)"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 7)
+	(compare
+	 (ior:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))]
+  "operands[7]
+     = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6],
+						  DOM_CC_X_OR_Y),
+		    CC_REGNUM);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
+
+; If the above pattern is followed by a CMP insn, then the compare is 
+; redundant, since we can rework the conditional instruction that follows.
+(define_insn_and_split "*ior_scc_scc_cmp"
+  [(set (match_operand 0 "dominant_cc_register" "")
+	(compare (ior:SI (match_operator:SI 3 "arm_comparison_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_add_operand" "rIL")])
+			 (match_operator:SI 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL")]))
+		 (const_int 0)))
+   (set (match_operand:SI 7 "s_register_operand" "=Ts")
+	(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(compare
+	 (ior:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))]
+  ""
+  [(set_attr "conds" "set")
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*and_scc_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
+	(and:SI (match_operator:SI 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_add_operand" "rIL")])
+		(match_operator:SI 6 "arm_comparison_operator"
+		 [(match_operand:SI 4 "s_register_operand" "r")
+		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+       != CCmode)"
+  "#"
+  "TARGET_32BIT && reload_completed
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+       != CCmode)"
+  [(set (match_dup 7)
+	(compare
+	 (and:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))]
+  "operands[7]
+     = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6],
+						  DOM_CC_X_AND_Y),
+		    CC_REGNUM);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
+
+; If the above pattern is followed by a CMP insn, then the compare is 
+; redundant, since we can rework the conditional instruction that follows.
+(define_insn_and_split "*and_scc_scc_cmp"
+  [(set (match_operand 0 "dominant_cc_register" "")
+	(compare (and:SI (match_operator:SI 3 "arm_comparison_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_add_operand" "rIL")])
+			 (match_operator:SI 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL")]))
+		 (const_int 0)))
+   (set (match_operand:SI 7 "s_register_operand" "=Ts")
+	(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(compare
+	 (and:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))]
+  ""
+  [(set_attr "conds" "set")
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
+
+;; If there is no dominance in the comparison, then we can still save an
+;; instruction in the AND case, since we can know that the second compare
+;; need only zero the value if false (if true, then the value is already
+;; correct).
+(define_insn_and_split "*and_scc_scc_nodom"
+  [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts")
+	(and:SI (match_operator:SI 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r,r,0")
+		  (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")])
+		(match_operator:SI 6 "arm_comparison_operator"
+		 [(match_operand:SI 4 "s_register_operand" "r,r,r")
+		  (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+       == CCmode)"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (match_dup 7) (match_op_dup 8 [(match_dup 4) (match_dup 5)]))
+   (set (match_dup 0)
+	(if_then_else:SI (match_op_dup 6 [(match_dup 7) (const_int 0)])
+			 (match_dup 0)
+			 (const_int 0)))]
+  "operands[7] = gen_rtx_REG (SELECT_CC_MODE (GET_CODE (operands[6]),
+					      operands[4], operands[5]),
+			      CC_REGNUM);
+   operands[8] = gen_rtx_COMPARE (GET_MODE (operands[7]), operands[4],
+				  operands[5]);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "20")
+   (set_attr "type" "multiple")]
+)
+
+(define_split
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI
+			  (and:SI (match_operand:SI 0 "s_register_operand" "")
+				  (const_int 1))
+			  (match_operator:SI 1 "arm_comparison_operator"
+			   [(match_operand:SI 2 "s_register_operand" "")
+			    (match_operand:SI 3 "arm_add_operand" "")]))
+			 (const_int 0)))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 4)
+	(ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)])
+		(match_dup 0)))
+   (set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (and:SI (match_dup 4) (const_int 1))
+			 (const_int 0)))]
+  "")
+
+(define_split
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI
+			  (match_operator:SI 1 "arm_comparison_operator"
+			   [(match_operand:SI 2 "s_register_operand" "")
+			    (match_operand:SI 3 "arm_add_operand" "")])
+			  (and:SI (match_operand:SI 0 "s_register_operand" "")
+				  (const_int 1)))
+			 (const_int 0)))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 4)
+	(ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)])
+		(match_dup 0)))
+   (set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (and:SI (match_dup 4) (const_int 1))
+			 (const_int 0)))]
+  "")
+;; ??? The conditional patterns above need checking for Thumb-2 usefulness
+
+(define_insn_and_split "*negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+
+    if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
+       {
+         /* Emit mov\\t%0, %1, asr #31 */
+         emit_insn (gen_rtx_SET (VOIDmode,
+                                 operands[0],
+                                 gen_rtx_ASHIFTRT (SImode,
+                                                   operands[1],
+                                                   GEN_INT (31))));
+         DONE;
+       }
+     else if (GET_CODE (operands[3]) == NE)
+       {
+        /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */
+        if (CONST_INT_P (operands[2]))
+          emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
+                                        GEN_INT (- INTVAL (operands[2]))));
+        else
+          emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
+
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                      gen_rtx_NE (SImode,
+                                                  cc_reg,
+                                                  const0_rtx),
+                                      gen_rtx_SET (SImode,
+                                                   operands[0],
+                                                   GEN_INT (~0))));
+        DONE;
+      }
+    else
+      {
+        /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
+        emit_insn (gen_rtx_SET (VOIDmode,
+                                cc_reg,
+                                gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
+        enum rtx_code rc = GET_CODE (operands[3]);
+
+        rc = reverse_condition (rc);
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                      gen_rtx_fmt_ee (rc,
+                                                      VOIDmode,
+                                                      cc_reg,
+                                                      const0_rtx),
+                                      gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
+        rc = GET_CODE (operands[3]);
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                      gen_rtx_fmt_ee (rc,
+                                                      VOIDmode,
+                                                      cc_reg,
+                                                      const0_rtx),
+                                      gen_rtx_SET (VOIDmode,
+                                                   operands[0],
+                                                   GEN_INT (~0))));
+        DONE;
+      }
+     FAIL;
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "movcond_addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,l,r")
+	(if_then_else:SI
+	 (match_operator 5 "comparison_operator"
+	  [(plus:SI (match_operand:SI 3 "s_register_operand" "r,r,r")
+	            (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL"))
+            (const_int 0)])
+	 (match_operand:SI 1 "arm_rhs_operand" "rI,rPy,r")
+	 (match_operand:SI 2 "arm_rhs_operand" "rI,rPy,r")))
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_32BIT"
+   "#"
+   "&& reload_completed"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (match_dup 3)
+		  (match_dup 4))
+	 (const_int 0)))
+   (set (match_dup 0) (match_dup 1))
+   (cond_exec (match_dup 6)
+	      (set (match_dup 0) (match_dup 2)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]),
+					     operands[3], operands[4]);
+    enum rtx_code rc = GET_CODE (operands[5]);
+
+    operands[6] = gen_rtx_REG (mode, CC_REGNUM);
+    gcc_assert (!(mode == CCFPmode || mode == CCFPEmode));
+    rc = reverse_condition (rc);
+
+    operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
+  }
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "enabled_for_depr_it" "no,yes,yes")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "movcond"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+	 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  if (GET_CODE (operands[5]) == LT
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && REG_P (operands[1]))
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"and\\t%0, %1, %3, asr #31\";
+	  return \"ands\\t%0, %1, %3, asr #32\;movcc\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && REG_P (operands[2]))
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"bic\\t%0, %2, %3, asr #31\";
+	  return \"bics\\t%0, %2, %3, asr #32\;movcs\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+
+  if (GET_CODE (operands[5]) == GE
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && REG_P (operands[1]))
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"bic\\t%0, %1, %3, asr #31\";
+	  return \"bics\\t%0, %1, %3, asr #32\;movcs\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && REG_P (operands[2]))
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"and\\t%0, %2, %3, asr #31\";
+	  return \"ands\\t%0, %2, %3, asr #32\;movcc\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+  if (CONST_INT_P (operands[4])
+      && !const_ok_for_arm (INTVAL (operands[4])))
+    output_asm_insn (\"cmn\\t%3, #%n4\", operands);
+  else
+    output_asm_insn (\"cmp\\t%3, %4\", operands);
+  if (which_alternative != 0)
+    output_asm_insn (\"mov%d5\\t%0, %1\", operands);
+  if (which_alternative != 1)
+    output_asm_insn (\"mov%D5\\t%0, %2\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,8,12")
+   (set_attr "type" "multiple")]
+)
+
+;; ??? The patterns below need checking for Thumb-2 usefulness.
+
+(define_insn "*ifcompare_plus_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+			 (plus:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 3 "arm_add_operand" "rIL,rIL"))
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_plus_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 5 "cc_register" "") (const_int 0)])
+	 (plus:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	  (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L"))
+	 (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI")))]
+  "TARGET_ARM"
+  "@
+   add%d4\\t%0, %2, %3
+   sub%d4\\t%0, %2, #%n3
+   add%d4\\t%0, %2, %3\;mov%D4\\t%0, %1
+   sub%d4\\t%0, %2, #%n3\;mov%D4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,4,8,8")
+   (set_attr_alternative "type"
+                         [(if_then_else (match_operand 3 "const_int_operand" "")
+                                        (const_string "alu_imm" )
+                                        (const_string "alu_reg"))
+                          (const_string "alu_imm")
+                          (const_string "alu_reg")
+                          (const_string "alu_reg")])]
+)
+
+(define_insn "*ifcompare_move_plus"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+			 (plus:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 3 "arm_add_operand" "rIL,rIL"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_move_plus"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 5 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI")
+	 (plus:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	  (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L"))))]
+  "TARGET_ARM"
+  "@
+   add%D4\\t%0, %2, %3
+   sub%D4\\t%0, %2, #%n3
+   add%D4\\t%0, %2, %3\;mov%d4\\t%0, %1
+   sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,4,8,8")
+   (set_attr "type" "alu_reg,alu_imm,multiple,multiple")]
+)
+
+(define_insn "*ifcompare_arith_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (match_operator 9 "arm_comparison_operator"
+			  [(match_operand:SI 5 "s_register_operand" "r")
+			   (match_operand:SI 6 "arm_add_operand" "rIL")])
+			 (match_operator:SI 8 "shiftable_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_rhs_operand" "rI")])
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 3 "s_register_operand" "r")
+			   (match_operand:SI 4 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_arith_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (match_operator 5 "arm_comparison_operator"
+			  [(match_operand 8 "cc_register" "") (const_int 0)])
+			 (match_operator:SI 6 "shiftable_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_rhs_operand" "rI")])
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 3 "s_register_operand" "r")
+			   (match_operand:SI 4 "arm_rhs_operand" "rI")])))]
+  "TARGET_ARM"
+  "%I6%d5\\t%0, %1, %2\;%I7%D5\\t%0, %3, %4"
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*ifcompare_arith_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r,r")
+			   (match_operand:SI 3 "arm_add_operand" "rIL,rIL")])
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_rhs_operand" "rI,rI")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  /* If we have an operation where (op x 0) is the identity operation and
+     the conditional operator is LT or GE and we are comparing against zero and
+     everything is in registers then we can do this in two instructions.  */
+  if (operands[3] == const0_rtx
+      && GET_CODE (operands[7]) != AND
+      && REG_P (operands[5])
+      && REG_P (operands[1])
+      && REGNO (operands[1]) == REGNO (operands[4])
+      && REGNO (operands[4]) != REGNO (operands[0]))
+    {
+      if (GET_CODE (operands[6]) == LT)
+	return \"and\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\";
+      else if (GET_CODE (operands[6]) == GE)
+	return \"bic\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\";
+    }
+  if (CONST_INT_P (operands[3])
+      && !const_ok_for_arm (INTVAL (operands[3])))
+    output_asm_insn (\"cmn\\t%2, #%n3\", operands);
+  else
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+  output_asm_insn (\"%I7%d6\\t%0, %4, %5\", operands);
+  if (which_alternative != 0)
+    return \"mov%D6\\t%0, %1\";
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_arith_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 4 "arm_comparison_operator"
+			  [(match_operand 6 "cc_register" "") (const_int 0)])
+			 (match_operator:SI 5 "shiftable_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r,r")
+			   (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))]
+  "TARGET_ARM"
+  "@
+   %I5%d4\\t%0, %2, %3
+   %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")
+   (set_attr "type" "alu_shift_reg,multiple")]
+)
+
+(define_insn "*ifcompare_move_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r,r")
+			   (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  /* If we have an operation where (op x 0) is the identity operation and
+     the conditional operator is LT or GE and we are comparing against zero and
+     everything is in registers then we can do this in two instructions */
+  if (operands[5] == const0_rtx
+      && GET_CODE (operands[7]) != AND
+      && REG_P (operands[3])
+      && REG_P (operands[1])
+      && REGNO (operands[1]) == REGNO (operands[2])
+      && REGNO (operands[2]) != REGNO (operands[0]))
+    {
+      if (GET_CODE (operands[6]) == GE)
+	return \"and\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\";
+      else if (GET_CODE (operands[6]) == LT)
+	return \"bic\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\";
+    }
+
+  if (CONST_INT_P (operands[5])
+      && !const_ok_for_arm (INTVAL (operands[5])))
+    output_asm_insn (\"cmn\\t%4, #%n5\", operands);
+  else
+    output_asm_insn (\"cmp\\t%4, %5\", operands);
+
+  if (which_alternative != 0)
+    output_asm_insn (\"mov%d6\\t%0, %1\", operands);
+  return \"%I7%D6\\t%0, %2, %3\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_move_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 6 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+	 (match_operator:SI 5 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))]
+  "TARGET_ARM"
+  "@
+   %I5%D4\\t%0, %2, %3
+   %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")
+   (set_attr "type" "alu_shift_reg,multiple")]
+)
+
+(define_insn "*ifcompare_move_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
+	 (not:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_move_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")
+	 (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))]
+  "TARGET_ARM"
+  "@
+   mvn%D4\\t%0, %2
+   mov%d4\\t%0, %1\;mvn%D4\\t%0, %2
+   mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2"
+  [(set_attr "conds" "use")
+   (set_attr "type" "mvn_reg")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "mvn_reg,multiple,multiple")]
+)
+
+(define_insn "*ifcompare_not_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI 
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (not:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_not_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))]
+  "TARGET_ARM"
+  "@
+   mvn%d4\\t%0, %2
+   mov%D4\\t%0, %1\;mvn%d4\\t%0, %2
+   mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2"
+  [(set_attr "conds" "use")
+   (set_attr "type" "mvn_reg,multiple,multiple")
+   (set_attr "length" "4,8,8")]
+)
+
+(define_insn "*ifcompare_shift_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r,r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+	 (match_operator:SI 7 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_shift_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 6 "cc_register" "") (const_int 0)])
+	 (match_operator:SI 4 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))]
+  "TARGET_ARM"
+  "@
+   mov%d5\\t%0, %2%S4
+   mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4
+   mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
+  [(set_attr "conds" "use")
+   (set_attr "shift" "2")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "mov_shift_reg,multiple,multiple")]
+)
+
+(define_insn "*ifcompare_move_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r,r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
+	 (match_operator:SI 7 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_move_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 6 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")
+	 (match_operator:SI 4 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")])))]
+  "TARGET_ARM"
+  "@
+   mov%D5\\t%0, %2%S4
+   mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4
+   mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
+  [(set_attr "conds" "use")
+   (set_attr "shift" "2")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "mov_shift_reg,multiple,multiple")]
+)
+
+(define_insn "*ifcompare_shift_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 7 "arm_comparison_operator"
+	  [(match_operand:SI 5 "s_register_operand" "r")
+	   (match_operand:SI 6 "arm_add_operand" "rIL")])
+	 (match_operator:SI 8 "shift_operator"
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "arm_rhs_operand" "rM")])
+	 (match_operator:SI 9 "shift_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r")
+	   (match_operand:SI 4 "arm_rhs_operand" "rM")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_shift_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 8 "cc_register" "") (const_int 0)])
+	 (match_operator:SI 6 "shift_operator"
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "arm_rhs_operand" "rM")])
+	 (match_operator:SI 7 "shift_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r")
+	   (match_operand:SI 4 "arm_rhs_operand" "rM")])))]
+  "TARGET_ARM"
+  "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
+  [(set_attr "conds" "use")
+   (set_attr "shift" "1")
+   (set_attr "length" "8")
+   (set (attr "type") (if_then_else
+		        (and (match_operand 2 "const_int_operand" "")
+                             (match_operand 4 "const_int_operand" ""))
+		      (const_string "mov_shift")
+		      (const_string "mov_shift_reg")))]
+)
+
+(define_insn "*ifcompare_not_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL")])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+	 (match_operator:SI 7 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_not_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+	 (match_operator:SI 6 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])))]
+  "TARGET_ARM"
+  "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3"
+  [(set_attr "conds" "use")
+   (set_attr "type" "mvn_reg")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*ifcompare_arith_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL")])
+	 (match_operator:SI 7 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_arith_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operator:SI 6 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_ARM"
+  "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3"
+  [(set_attr "conds" "use")
+   (set_attr "type" "multiple")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*ifcompare_neg_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_neg_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))]
+  "TARGET_ARM"
+  "@
+   rsb%d4\\t%0, %2, #0
+   mov%D4\\t%0, %1\;rsb%d4\\t%0, %2, #0
+   mvn%D4\\t%0, #%B1\;rsb%d4\\t%0, %2, #0"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "logic_shift_imm,multiple,multiple")]
+)
+
+(define_insn "*ifcompare_move_neg"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*if_move_neg"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))]
+  "TARGET_ARM"
+  "@
+   rsb%D4\\t%0, %2, #0
+   mov%d4\\t%0, %1\;rsb%D4\\t%0, %2, #0
+   mvn%d4\\t%0, #%B1\;rsb%D4\\t%0, %2, #0"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "logic_shift_imm,multiple,multiple")]
+)
+
+(define_insn "*arith_adjacentmem"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(match_operand:SI 2 "memory_operand" "m")
+	  (match_operand:SI 3 "memory_operand" "m")]))
+   (clobber (match_scratch:SI 4 "=r"))]
+  "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])"
+  "*
+  {
+    rtx ldm[3];
+    rtx arith[4];
+    rtx base_reg;
+    HOST_WIDE_INT val1 = 0, val2 = 0;
+
+    if (REGNO (operands[0]) > REGNO (operands[4]))
+      {
+	ldm[1] = operands[4];
+	ldm[2] = operands[0];
+      }
+    else
+      {
+	ldm[1] = operands[0];
+	ldm[2] = operands[4];
+      }
+
+    base_reg = XEXP (operands[2], 0);
+
+    if (!REG_P (base_reg))
+      {
+	val1 = INTVAL (XEXP (base_reg, 1));
+	base_reg = XEXP (base_reg, 0);
+      }
+
+    if (!REG_P (XEXP (operands[3], 0)))
+      val2 = INTVAL (XEXP (XEXP (operands[3], 0), 1));
+
+    arith[0] = operands[0];
+    arith[3] = operands[1];
+
+    if (val1 < val2)
+      {
+	arith[1] = ldm[1];
+	arith[2] = ldm[2];
+      }
+    else
+      {
+	arith[1] = ldm[2];
+	arith[2] = ldm[1];
+      }
+
+    ldm[0] = base_reg;
+    if (val1 !=0 && val2 != 0)
+      {
+	rtx ops[3];
+
+	if (val1 == 4 || val2 == 4)
+	  /* Other val must be 8, since we know they are adjacent and neither
+	     is zero.  */
+	  output_asm_insn (\"ldm%(ib%)\\t%0, {%1, %2}\", ldm);
+	else if (const_ok_for_arm (val1) || const_ok_for_arm (-val1))
+	  {
+	    ldm[0] = ops[0] = operands[4];
+	    ops[1] = base_reg;
+	    ops[2] = GEN_INT (val1);
+	    output_add_immediate (ops);
+	    if (val1 < val2)
+	      output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	    else
+	      output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	  }
+	else
+	  {
+	    /* Offset is out of range for a single add, so use two ldr.  */
+	    ops[0] = ldm[1];
+	    ops[1] = base_reg;
+	    ops[2] = GEN_INT (val1);
+	    output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops);
+	    ops[0] = ldm[2];
+	    ops[2] = GEN_INT (val2);
+	    output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops);
+	  }
+      }
+    else if (val1 != 0)
+      {
+	if (val1 < val2)
+	  output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	else
+	  output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+      }
+    else
+      {
+	if (val1 < val2)
+	  output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	else
+	  output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+      }
+    output_asm_insn (\"%I3%?\\t%0, %1, %2\", arith);
+    return \"\";
+  }"
+  [(set_attr "length" "12")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load1")]
+)
+
+; This pattern is never tried by combine, so do it as a peephole
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "arm_general_register_operand" ""))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (match_dup 1) (const_int 0)))]
+  "TARGET_ARM"
+  [(parallel [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0)))
+	      (set (match_dup 0) (match_dup 1))])]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(and:SI (ge:SI (match_operand:SI 1 "s_register_operand" "")
+		       (const_int 0))
+		(neg:SI (match_operator:SI 2 "arm_comparison_operator"
+			 [(match_operand:SI 3 "s_register_operand" "")
+			  (match_operand:SI 4 "arm_rhs_operand" "")]))))
+   (clobber (match_operand:SI 5 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 5) (not:SI (ashiftrt:SI (match_dup 1) (const_int 31))))
+   (set (match_dup 0) (and:SI (match_op_dup 2 [(match_dup 3) (match_dup 4)])
+			      (match_dup 5)))]
+  ""
+)
+
+;; This split can be used because CC_Z mode implies that the following
+;; branch will be an equality, or an unsigned inequality, so the sign
+;; extension is not needed.
+
+(define_split
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (ashift:SI (subreg:SI (match_operand:QI 0 "memory_operand" "") 0)
+		    (const_int 24))
+	 (match_operand 1 "const_int_operand" "")))
+   (clobber (match_scratch:SI 2 ""))]
+  "TARGET_ARM
+   && (((unsigned HOST_WIDE_INT) INTVAL (operands[1]))
+       == (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) >> 24) << 24)"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 0)))
+   (set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 1)))]
+  "
+  operands[1] = GEN_INT (((unsigned long) INTVAL (operands[1])) >> 24);
+  "
+)
+;; ??? Check the patterns above for Thumb-2 usefulness
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  "TARGET_EITHER"
+  "if (TARGET_32BIT)
+     arm_expand_prologue ();
+   else
+     thumb1_expand_prologue ();
+  DONE;
+  "
+)
+
+(define_expand "epilogue"
+  [(clobber (const_int 0))]
+  "TARGET_EITHER"
+  "
+  if (crtl->calls_eh_return)
+    emit_insn (gen_force_register_use (gen_rtx_REG (Pmode, 2)));
+  if (TARGET_THUMB1)
+   {
+     thumb1_expand_epilogue ();
+     emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+                     gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE));
+   }
+  else if (HAVE_return)
+   {
+     /* HAVE_return is testing for USE_RETURN_INSN (FALSE).  Hence,
+        no need for explicit testing again.  */
+     emit_jump_insn (gen_return ());
+   }
+  else if (TARGET_32BIT)
+   {
+    arm_expand_epilogue (true);
+   }
+  DONE;
+  "
+)
+
+(define_insn "prologue_thumb1_interwork"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_THUMB1_INTERWORK)]
+  "TARGET_THUMB1"
+  "* return thumb1_output_interwork ();"
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+;; Note - although unspec_volatile's USE all hard registers,
+;; USEs are ignored after relaod has completed.  Thus we need
+;; to add an unspec of the link register to ensure that flow
+;; does not think that it is unused by the sibcall branch that
+;; will replace the standard function epilogue.
+(define_expand "sibcall_epilogue"
+   [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_REGISTER_USE)
+               (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
+   "TARGET_32BIT"
+   "
+   arm_expand_epilogue (false);
+   DONE;
+   "
+)
+
+(define_insn "*epilogue_insns"
+  [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
+  "TARGET_THUMB1"
+  "*
+    return thumb1_unexpanded_epilogue ();
+  "
+  ; Length is absolute worst case
+  [(set_attr "length" "44")
+   (set_attr "type" "block")
+   ;; We don't clobber the conditions, but the potential length of this
+   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; unlikely to be profitable.
+   (set_attr "conds" "clob")]
+)
+
+(define_expand "eh_epilogue"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "register_operand" ""))
+   (use (match_operand:SI 2 "register_operand" ""))]
+  "TARGET_EITHER"
+  "
+  {
+    cfun->machine->eh_epilogue_sp_ofs = operands[1];
+    if (!REG_P (operands[2]) || REGNO (operands[2]) != 2)
+      {
+	rtx ra = gen_rtx_REG (Pmode, 2);
+
+	emit_move_insn (ra, operands[2]);
+	operands[2] = ra;
+      }
+    /* This is a hack -- we may have crystalized the function type too
+       early.  */
+    cfun->machine->func_type = 0;
+  }"
+)
+
+;; This split is only used during output to reduce the number of patterns
+;; that need assembler instructions adding to them.  We allowed the setting
+;; of the conditions to be implicit during rtl generation so that
+;; the conditional compare patterns would work.  However this conflicts to
+;; some extent with the conditional data operations, so we have to split them
+;; up again here.
+
+;; ??? Need to audit these splitters for Thumb-2.  Why isn't normal
+;; conditional execution sufficient?
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand 2 "" "") (match_operand 3 "" "")])
+			 (match_dup 0)
+			 (match_operand 4 "" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 5) (match_dup 6))
+   (cond_exec (match_dup 7)
+	      (set (match_dup 0) (match_dup 4)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[5] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+
+    operands[7] = gen_rtx_fmt_ee (rc, VOIDmode, operands[5], const0_rtx);
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand 2 "" "") (match_operand 3 "" "")])
+			 (match_operand 4 "" "")
+			 (match_dup 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 5) (match_dup 6))
+   (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)])
+	      (set (match_dup 0) (match_dup 4)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+
+    operands[5] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand 2 "" "") (match_operand 3 "" "")])
+			 (match_operand 4 "" "")
+			 (match_operand 5 "" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 6) (match_dup 7))
+   (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
+	      (set (match_dup 0) (match_dup 4)))
+   (cond_exec (match_dup 8)
+	      (set (match_dup 0) (match_dup 5)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[6] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+
+    operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "arm_add_operand" "")])
+			 (match_operand:SI 4 "arm_rhs_operand" "")
+			 (not:SI
+			  (match_operand:SI 5 "s_register_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 6) (match_dup 7))
+   (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
+	      (set (match_dup 0) (match_dup 4)))
+   (cond_exec (match_dup 8)
+	      (set (match_dup 0) (not:SI (match_dup 5))))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[6] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+
+    operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
+  }"
+)
+
+(define_insn "*cond_move_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 4 "arm_comparison_operator"
+			  [(match_operand 3 "cc_register" "") (const_int 0)])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+			 (not:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r"))))]
+  "TARGET_ARM"
+  "@
+   mvn%D4\\t%0, %2
+   mov%d4\\t%0, %1\;mvn%D4\\t%0, %2"
+  [(set_attr "conds" "use")
+   (set_attr "type" "mvn_reg,multiple")
+   (set_attr "length" "4,8")]
+)
+
+;; The next two patterns occur when an AND operation is followed by a
+;; scc insn sequence 
+
+(define_insn "*sign_extract_onebit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:SI 2 "const_int_operand" "n")))
+    (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+    output_asm_insn (\"ands\\t%0, %1, %2\", operands);
+    return \"mvnne\\t%0, #0\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*not_signextract_onebit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI
+	 (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+			  (const_int 1)
+			  (match_operand:SI 2 "const_int_operand" "n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+    output_asm_insn (\"tst\\t%1, %2\", operands);
+    output_asm_insn (\"mvneq\\t%0, #0\", operands);
+    return \"movne\\t%0, #0\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+;; ??? The above patterns need auditing for Thumb-2
+
+;; Push multiple registers to the stack.  Registers are in parallel (use ...)
+;; expressions.  For simplicity, the first register is also in the unspec
+;; part.
+;; To avoid the usage of GNU extension, the length attribute is computed
+;; in a C function arm_attr_length_push_multi.
+(define_insn "*push_multi"
+  [(match_parallel 2 "multi_register_push"
+    [(set (match_operand:BLK 0 "push_mult_memory_operand" "")
+	  (unspec:BLK [(match_operand:SI 1 "s_register_operand" "")]
+		      UNSPEC_PUSH_MULT))])]
+  ""
+  "*
+  {
+    int num_saves = XVECLEN (operands[2], 0);
+     
+    /* For the StrongARM at least it is faster to
+       use STR to store only a single register.
+       In Thumb mode always use push, and the assembler will pick
+       something appropriate.  */
+    if (num_saves == 1 && TARGET_ARM)
+      output_asm_insn (\"str%?\\t%1, [%m0, #-4]!\", operands);
+    else
+      {
+	int i;
+	char pattern[100];
+
+	if (TARGET_ARM)
+	    strcpy (pattern, \"stm%(fd%)\\t%m0!, {%1\");
+	else if (TARGET_THUMB2)
+	    strcpy (pattern, \"push%?\\t{%1\");
+	else
+	    strcpy (pattern, \"push\\t{%1\");
+
+	for (i = 1; i < num_saves; i++)
+	  {
+	    strcat (pattern, \", %|\");
+	    strcat (pattern,
+		    reg_names[REGNO (XEXP (XVECEXP (operands[2], 0, i), 0))]);
+	  }
+
+	strcat (pattern, \"}\");
+	output_asm_insn (pattern, operands);
+      }
+
+    return \"\";
+  }"
+  [(set_attr "type" "store4")
+   (set (attr "length")
+	(symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))]
+)
+
+(define_insn "stack_tie"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:SI 0 "s_register_operand" "rk")
+		     (match_operand:SI 1 "s_register_operand" "rk")]
+		    UNSPEC_PRLG_STK))]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "block")]
+)
+
+;; Pop (as used in epilogue RTL)
+;;
+(define_insn "*load_multiple_with_writeback"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:SI 3 "s_register_operand" "=rk")
+          (mem:SI (match_dup 1)))
+        ])]
+  "TARGET_32BIT && (reload_in_progress || reload_completed)"
+  "*
+  {
+    arm_output_multireg_pop (operands, /*return_pc=*/false,
+                                       /*cond=*/const_true_rtx,
+                                       /*reverse=*/false,
+                                       /*update=*/true);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")]
+)
+
+;; Pop with return (as used in epilogue RTL)
+;;
+;; This instruction is generated when the registers are popped at the end of
+;; epilogue.  Here, instead of popping the value into LR and then generating
+;; jump to LR, value is popped into PC directly.  Hence, the pattern is combined
+;;  with (return).
+(define_insn "*pop_multiple_with_writeback_and_return"
+  [(match_parallel 0 "pop_multiple_return"
+    [(return)
+     (set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:SI 3 "s_register_operand" "=rk")
+          (mem:SI (match_dup 1)))
+        ])]
+  "TARGET_32BIT && (reload_in_progress || reload_completed)"
+  "*
+  {
+    arm_output_multireg_pop (operands, /*return_pc=*/true,
+                                       /*cond=*/const_true_rtx,
+                                       /*reverse=*/false,
+                                       /*update=*/true);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*pop_multiple_with_return"
+  [(match_parallel 0 "pop_multiple_return"
+    [(return)
+     (set (match_operand:SI 2 "s_register_operand" "=rk")
+          (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+        ])]
+  "TARGET_32BIT && (reload_in_progress || reload_completed)"
+  "*
+  {
+    arm_output_multireg_pop (operands, /*return_pc=*/true,
+                                       /*cond=*/const_true_rtx,
+                                       /*reverse=*/false,
+                                       /*update=*/false);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")]
+)
+
+;; Load into PC and return
+(define_insn "*ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+rk"))))]
+  "TARGET_32BIT && (reload_in_progress || reload_completed)"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+;; Pop for floating point registers (as used in epilogue RTL)
+(define_insn "*vfp_pop_multiple_with_writeback"
+  [(match_parallel 0 "pop_multiple_fp"
+    [(set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:DF 3 "vfp_hard_register_operand" "")
+          (mem:DF (match_dup 1)))])]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  {
+    int num_regs = XVECLEN (operands[0], 0);
+    char pattern[100];
+    rtx op_list[2];
+    strcpy (pattern, \"fldmfdd\\t\");
+    strcat (pattern, reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+    strcat (pattern, \"!, {\");
+    op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
+    strcat (pattern, \"%P0\");
+    if ((num_regs - 1) > 1)
+      {
+        strcat (pattern, \"-%P1\");
+        op_list [1] = XEXP (XVECEXP (operands[0], 0, num_regs - 1), 0);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, op_list);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")
+   (set_attr "conds" "unconditional")
+   (set_attr "predicable" "no")]
+)
+
+;; Special patterns for dealing with the constant pool
+
+(define_insn "align_4"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (32);
+  return \"\";
+  "
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "align_8"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN8)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (64);
+  return \"\";
+  "
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)]
+  "TARGET_EITHER"
+  "*
+  making_const_table = FALSE;
+  return \"\";
+  "
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "consttable_1"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_1)]
+  "TARGET_THUMB1"
+  "*
+  making_const_table = TRUE;
+  assemble_integer (operands[0], 1, BITS_PER_WORD, 1);
+  assemble_zeros (3);
+  return \"\";
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "no_insn")]
+)
+
+(define_insn "consttable_2"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_2)]
+  "TARGET_THUMB1"
+  "*
+  making_const_table = TRUE;
+  gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT);
+  assemble_integer (operands[0], 2, BITS_PER_WORD, 1);
+  assemble_zeros (2);
+  return \"\";
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "no_insn")]
+)
+
+(define_insn "consttable_4"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_4)]
+  "TARGET_EITHER"
+  "*
+  {
+    rtx x = operands[0];
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (x)))
+      {
+      case MODE_FLOAT:
+ 	if (GET_MODE (x) == HFmode)
+ 	  arm_emit_fp16_const (x);
+ 	else
+ 	  {
+ 	    REAL_VALUE_TYPE r;
+ 	    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ 	    assemble_real (r, GET_MODE (x), BITS_PER_WORD);
+ 	  }
+ 	break;
+      default:
+	/* XXX: Sometimes gcc does something really dumb and ends up with
+	   a HIGH in a constant pool entry, usually because it's trying to
+	   load into a VFP register.  We know this will always be used in
+	   combination with a LO_SUM which ignores the high bits, so just
+	   strip off the HIGH.  */
+	if (GET_CODE (x) == HIGH)
+	  x = XEXP (x, 0);
+        assemble_integer (x, 4, BITS_PER_WORD, 1);
+	mark_symbol_refs_as_used (x);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "4")
+   (set_attr "type" "no_insn")]
+)
+
+(define_insn "consttable_8"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_8)]
+  "TARGET_EITHER"
+  "*
+  {
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (operands[0])))
+      {
+       case MODE_FLOAT:
+        {
+          REAL_VALUE_TYPE r;
+          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+          break;
+        }
+      default:
+        assemble_integer (operands[0], 8, BITS_PER_WORD, 1);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "8")
+   (set_attr "type" "no_insn")]
+)
+
+(define_insn "consttable_16"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_16)]
+  "TARGET_EITHER"
+  "*
+  {
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (operands[0])))
+      {
+       case MODE_FLOAT:
+        {
+          REAL_VALUE_TYPE r;
+          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+          break;
+        }
+      default:
+        assemble_integer (operands[0], 16, BITS_PER_WORD, 1);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "16")
+   (set_attr "type" "no_insn")]
+)
+
+;; Miscellaneous Thumb patterns
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "register_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "TARGET_THUMB1"
+  "
+  if (flag_pic)
+    {
+      /* Hopefully, CSE will eliminate this copy.  */
+      rtx reg1 = copy_addr_to_reg (gen_rtx_LABEL_REF (Pmode, operands[1]));
+      rtx reg2 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg2, operands[0], reg1));
+      operands[0] = reg2;
+    }
+  "
+)
+
+;; NB never uses BX.
+(define_insn "*thumb1_tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "l*r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_THUMB1"
+  "mov\\t%|pc, %0"
+  [(set_attr "length" "2")
+   (set_attr "type" "no_insn")]
+)
+
+;; V5 Instructions,
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(clz:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT && arm_arch5"
+  "clz%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "clz")])
+
+(define_insn "rbitsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "rbit%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "clz")])
+
+(define_expand "ctzsi2"
+ [(set (match_operand:SI           0 "s_register_operand" "")
+       (ctz:SI (match_operand:SI  1 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "
+   {
+     rtx tmp = gen_reg_rtx (SImode); 
+     emit_insn (gen_rbitsi2 (tmp, operands[1]));
+     emit_insn (gen_clzsi2 (operands[0], tmp));
+   }
+   DONE;
+  "
+)
+
+;; V5E instructions.
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "" "")
+	     (match_operand:SI 2 "" ""))]
+  "TARGET_32BIT && arm_arch5e"
+  "pld\\t%a0"
+  [(set_attr "type" "load1")]
+)
+
+;; General predication pattern
+
+(define_cond_exec
+  [(match_operator 0 "arm_comparison_operator"
+    [(match_operand 1 "cc_register" "")
+     (const_int 0)])]
+  "TARGET_32BIT"
+  ""
+[(set_attr "predicated" "yes")]
+)
+
+(define_insn "force_register_use"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_REGISTER_USE)]
+  ""
+  "%@ %0 needed"
+  [(set_attr "length" "0")
+   (set_attr "type" "no_insn")]
+)
+
+
+;; Patterns for exception handling
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  {
+    if (TARGET_32BIT)
+      emit_insn (gen_arm_eh_return (operands[0]));
+    else
+      emit_insn (gen_thumb_eh_return (operands[0]));
+    DONE;
+  }"
+)
+				   
+;; We can't expand this before we know where the link register is stored.
+(define_insn_and_split "arm_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
+		    VUNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "TARGET_ARM"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    arm_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+)
+
+(define_insn_and_split "thumb_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "l")]
+		    VUNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&l"))]
+  "TARGET_THUMB1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    thumb_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+  [(set_attr "type" "mov_reg")]
+)
+
+
+;; TLS support
+
+(define_insn "load_tp_hard"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TLS))]
+  "TARGET_HARD_TP"
+  "mrc%?\\tp15, 0, %0, c13, c0, 3\\t@ load_tp_hard"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "mrs")]
+)
+
+;; Doesn't clobber R1-R3.  Must use r0 for the first operand.
+(define_insn "load_tp_soft"
+  [(set (reg:SI 0) (unspec:SI [(const_int 0)] UNSPEC_TLS))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:SI IP_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_SOFT_TP"
+  "bl\\t__aeabi_read_tp\\t@ load_tp_soft"
+  [(set_attr "conds" "clob")
+   (set_attr "type" "branch")]
+)
+
+;; tls descriptor call
+(define_insn "tlscall"
+  [(set (reg:SI R0_REGNUM)
+        (unspec:SI [(reg:SI R0_REGNUM)
+                    (match_operand:SI 0 "" "X")
+	            (match_operand 1 "" "")] UNSPEC_TLS))
+   (clobber (reg:SI R1_REGNUM))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:SI CC_REGNUM))]
+  "TARGET_GNU2_TLS"
+  {
+    targetm.asm_out.internal_label (asm_out_file, "LPIC",
+				    INTVAL (operands[1]));
+    return "bl\\t%c0(tlscall)";
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "4")
+   (set_attr "type" "branch")]
+)
+
+;; For thread pointer builtin
+(define_expand "get_thread_pointersi"
+  [(match_operand:SI 0 "s_register_operand" "=r")]
+ ""
+ "
+ {
+   arm_load_tp (operands[0]);
+   DONE;
+ }")
+
+;;
+
+;; We only care about the lower 16 bits of the constant 
+;; being inserted into the upper 16 bits of the register.
+(define_insn "*arm_movtas_ze" 
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
+                   (const_int 16)
+                   (const_int 16))
+        (match_operand:SI 1 "const_int_operand" ""))]
+  "arm_arch_thumb2"
+  "movt%?\t%0, %L1"
+ [(set_attr "predicable" "yes")
+  (set_attr "predicable_short_it" "no")
+  (set_attr "length" "4")
+  (set_attr "type" "mov_imm")]
+)
+
+(define_insn "*arm_rev"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r")
+	(bswap:SI (match_operand:SI 1 "s_register_operand" "l,l,r")))]
+  "arm_arch6"
+  "@
+   rev\t%0, %1
+   rev%?\t%0, %1
+   rev%?\t%0, %1"
+  [(set_attr "arch" "t1,t2,32")
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
+)
+
+(define_expand "arm_legacy_rev"
+  [(set (match_operand:SI 2 "s_register_operand" "")
+	(xor:SI (rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+			     (const_int 16))
+		(match_dup 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 8)))
+   (set (match_operand:SI 3 "s_register_operand" "")
+	(rotatert:SI (match_dup 1)
+		     (const_int 8)))
+   (set (match_dup 2)
+	(and:SI (match_dup 2)
+		(const_int -65281)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+	(xor:SI (match_dup 3)
+		(match_dup 2)))]
+  "TARGET_32BIT"
+  ""
+)
+
+;; Reuse temporaries to keep register pressure down.
+(define_expand "thumb_legacy_rev"
+  [(set (match_operand:SI 2 "s_register_operand" "")
+     (ashift:SI (match_operand:SI 1 "s_register_operand" "")
+                (const_int 24)))
+   (set (match_operand:SI 3 "s_register_operand" "")
+     (lshiftrt:SI (match_dup 1)
+		  (const_int 24)))
+   (set (match_dup 3)
+     (ior:SI (match_dup 3)
+	     (match_dup 2)))
+   (set (match_operand:SI 4 "s_register_operand" "")
+     (const_int 16))
+   (set (match_operand:SI 5 "s_register_operand" "")
+     (rotatert:SI (match_dup 1)
+		  (match_dup 4)))
+   (set (match_dup 2)
+     (ashift:SI (match_dup 5)
+                (const_int 24)))
+   (set (match_dup 5)
+     (lshiftrt:SI (match_dup 5)
+		  (const_int 24)))
+   (set (match_dup 5)
+     (ior:SI (match_dup 5)
+	     (match_dup 2)))
+   (set (match_dup 5)
+     (rotatert:SI (match_dup 5)
+		  (match_dup 4)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+     (ior:SI (match_dup 5)
+             (match_dup 3)))]
+  "TARGET_THUMB"
+  ""
+)
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
+"TARGET_EITHER && (arm_arch6 || !optimize_size)"
+"
+    if (!arm_arch6)
+      {
+	rtx op2 = gen_reg_rtx (SImode);
+	rtx op3 = gen_reg_rtx (SImode);
+
+	if (TARGET_THUMB)
+	  {
+	    rtx op4 = gen_reg_rtx (SImode);
+	    rtx op5 = gen_reg_rtx (SImode);
+
+	    emit_insn (gen_thumb_legacy_rev (operands[0], operands[1],
+					     op2, op3, op4, op5));
+	  }
+	else
+	  {
+	    emit_insn (gen_arm_legacy_rev (operands[0], operands[1],
+					   op2, op3));
+	  }
+
+	DONE;
+      }
+  "
+)
+
+;; bswap16 patterns: use revsh and rev16 instructions for the signed
+;; and unsigned variants, respectively. For rev16, expose
+;; byte-swapping in the lower 16 bits only.
+(define_insn "*arm_revsh"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r")
+	(sign_extend:SI (bswap:HI (match_operand:HI 1 "s_register_operand" "l,l,r"))))]
+  "arm_arch6"
+  "@
+  revsh\t%0, %1
+  revsh%?\t%0, %1
+  revsh%?\t%0, %1"
+  [(set_attr "arch" "t1,t2,32")
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
+)
+
+(define_insn "*arm_rev16"
+  [(set (match_operand:HI 0 "s_register_operand" "=l,l,r")
+	(bswap:HI (match_operand:HI 1 "s_register_operand" "l,l,r")))]
+  "arm_arch6"
+  "@
+   rev16\t%0, %1
+   rev16%?\t%0, %1
+   rev16%?\t%0, %1"
+  [(set_attr "arch" "t1,t2,32")
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
+)
+
+(define_expand "bswaphi2"
+  [(set (match_operand:HI 0 "s_register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "s_register_operand" "r")))]
+"arm_arch6"
+""
+)
+
+;; Patterns for LDRD/STRD in Thumb2 mode
+
+(define_insn "*thumb2_ldrd"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (match_operand:SI 2 "ldrd_strd_offset_operand" "Do"))))
+   (set (match_operand:SI 3 "s_register_operand" "=r")
+        (mem:SI (plus:SI (match_dup 1)
+                         (match_operand:SI 4 "const_int_operand" ""))))]
+  "TARGET_LDRD && TARGET_THUMB2 && reload_completed
+     && current_tune->prefer_ldrd_strd
+     && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4]))
+     && (operands_ok_ldrd_strd (operands[0], operands[3],
+                                  operands[1], INTVAL (operands[2]),
+                                  false, true))"
+  "ldrd%?\t%0, %3, [%1, %2]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb2_ldrd_base"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+   (set (match_operand:SI 2 "s_register_operand" "=r")
+        (mem:SI (plus:SI (match_dup 1)
+                         (const_int 4))))]
+  "TARGET_LDRD && TARGET_THUMB2 && reload_completed
+     && current_tune->prefer_ldrd_strd
+     && (operands_ok_ldrd_strd (operands[0], operands[2],
+                                  operands[1], 0, false, true))"
+  "ldrd%?\t%0, %2, [%1]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb2_ldrd_base_neg"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (const_int -4))))
+   (set (match_operand:SI 2 "s_register_operand" "=r")
+        (mem:SI (match_dup 1)))]
+  "TARGET_LDRD && TARGET_THUMB2 && reload_completed
+     && current_tune->prefer_ldrd_strd
+     && (operands_ok_ldrd_strd (operands[0], operands[2],
+                                  operands[1], -4, false, true))"
+  "ldrd%?\t%0, %2, [%1, #-4]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb2_strd"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+                         (match_operand:SI 1 "ldrd_strd_offset_operand" "Do")))
+        (match_operand:SI 2 "s_register_operand" "r"))
+   (set (mem:SI (plus:SI (match_dup 0)
+                         (match_operand:SI 3 "const_int_operand" "")))
+        (match_operand:SI 4 "s_register_operand" "r"))]
+  "TARGET_LDRD && TARGET_THUMB2 && reload_completed
+     && current_tune->prefer_ldrd_strd
+     && ((INTVAL (operands[1]) + 4) == INTVAL (operands[3]))
+     && (operands_ok_ldrd_strd (operands[2], operands[4],
+                                  operands[0], INTVAL (operands[1]),
+                                  false, false))"
+  "strd%?\t%2, %4, [%0, %1]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb2_strd_base"
+  [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
+        (match_operand:SI 1 "s_register_operand" "r"))
+   (set (mem:SI (plus:SI (match_dup 0)
+                         (const_int 4)))
+        (match_operand:SI 2 "s_register_operand" "r"))]
+  "TARGET_LDRD && TARGET_THUMB2 && reload_completed
+     && current_tune->prefer_ldrd_strd
+     && (operands_ok_ldrd_strd (operands[1], operands[2],
+                                  operands[0], 0, false, false))"
+  "strd%?\t%1, %2, [%0]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb2_strd_base_neg"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+                         (const_int -4)))
+        (match_operand:SI 1 "s_register_operand" "r"))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 2 "s_register_operand" "r"))]
+  "TARGET_LDRD && TARGET_THUMB2 && reload_completed
+     && current_tune->prefer_ldrd_strd
+     && (operands_ok_ldrd_strd (operands[1], operands[2],
+                                  operands[0], -4, false, false))"
+  "strd%?\t%1, %2, [%0, #-4]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+;; ARMv8 CRC32 instructions.
+(define_insn "<crc_variant>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")
+                    (match_operand:<crc_mode> 2 "s_register_operand" "r")]
+         CRC))]
+  "TARGET_CRC32"
+  "<crc_variant>\\t%0, %1, %2"
+  [(set_attr "type" "crc")
+   (set_attr "conds" "unconditional")]
+)
+
+;; Load the load/store double peephole optimizations.
+(include "ldrdstrd.md")
+
+;; Load the load/store multiple patterns
+(include "ldmstm.md")
+
+;; Patterns in ldmstm.md don't cover more than 4 registers. This pattern covers
+;; large lists without explicit writeback generated for APCS_FRAME epilogue.
+(define_insn "*load_multiple"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "s_register_operand" "=rk")
+          (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+        ])]
+  "TARGET_32BIT"
+  "*
+  {
+    arm_output_multireg_pop (operands, /*return_pc=*/false,
+                                       /*cond=*/const_true_rtx,
+                                       /*reverse=*/false,
+                                       /*update=*/false);
+    return \"\";
+  }
+  "
+  [(set_attr "predicable" "yes")]
+)
+
+;; Vector bits common to IWMMXT and Neon
+(include "vec-common.md")
+;; Load the Intel Wireless Multimedia Extension patterns
+(include "iwmmxt.md")
+;; Load the VFP co-processor patterns
+(include "vfp.md")
+;; Thumb-2 patterns
+(include "thumb2.md")
+;; Neon patterns
+(include "neon.md")
+;; Crypto patterns
+(include "crypto.md")
+;; Synchronization Primitives
+(include "sync.md")
+;; Fixed-point patterns
+(include "arm-fixed.md")
diff --git a/gcc-4.9/gcc/config/arm/arm.opt b/gcc-4.9/gcc/config/arm/arm.opt
new file mode 100644
index 000000000..d80f1f13b
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm.opt
@@ -0,0 +1,277 @@
+; Options for the ARM port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/arm/arm-opts.h
+
+Enum
+Name(tls_type) Type(enum arm_tls_type)
+TLS dialect to use:
+
+EnumValue
+Enum(tls_type) String(gnu) Value(TLS_GNU)
+
+EnumValue
+Enum(tls_type) String(gnu2) Value(TLS_GNU2)
+
+mabi=
+Target RejectNegative Joined Enum(arm_abi_type) Var(arm_abi) Init(ARM_DEFAULT_ABI)
+Specify an ABI
+
+Enum
+Name(arm_abi_type) Type(enum arm_abi_type)
+Known ARM ABIs (for use with the -mabi= option):
+
+EnumValue
+Enum(arm_abi_type) String(apcs-gnu) Value(ARM_ABI_APCS)
+
+EnumValue
+Enum(arm_abi_type) String(atpcs) Value(ARM_ABI_ATPCS)
+
+EnumValue
+Enum(arm_abi_type) String(aapcs) Value(ARM_ABI_AAPCS)
+
+EnumValue
+Enum(arm_abi_type) String(iwmmxt) Value(ARM_ABI_IWMMXT)
+
+EnumValue
+Enum(arm_abi_type) String(aapcs-linux) Value(ARM_ABI_AAPCS_LINUX)
+
+mabort-on-noreturn
+Target Report Mask(ABORT_NORETURN)
+Generate a call to abort if a noreturn function returns
+
+mapcs
+Target RejectNegative Mask(APCS_FRAME) Undocumented
+
+mapcs-float
+Target Report Mask(APCS_FLOAT)
+Pass FP arguments in FP registers
+
+mapcs-frame
+Target Report Mask(APCS_FRAME)
+Generate APCS conformant stack frames
+
+mapcs-reentrant
+Target Report Mask(APCS_REENT)
+Generate re-entrant, PIC code
+
+mapcs-stack-check
+Target Report Mask(APCS_STACK) Undocumented
+
+march=
+Target RejectNegative ToLower Joined Enum(arm_arch) Var(arm_arch_option)
+Specify the name of the target architecture
+
+; Other arm_arch values are loaded from arm-tables.opt
+; but that is a generated file and this is an odd-one-out.
+EnumValue
+Enum(arm_arch) String(native) Value(-1) DriverOnly
+
+marm
+Target Report RejectNegative InverseMask(THUMB)
+Generate code in 32 bit ARM state.
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_END)
+Assume target CPU is configured as big endian
+
+mcallee-super-interworking
+Target Report Mask(CALLEE_INTERWORKING)
+Thumb: Assume non-static functions may be called from ARM code
+
+mcaller-super-interworking
+Target Report Mask(CALLER_INTERWORKING)
+Thumb: Assume function pointers may go to non-Thumb aware code
+
+mcpu=
+Target RejectNegative ToLower Joined Enum(processor_type) Var(arm_cpu_option) Init(arm_none)
+Specify the name of the target CPU
+
+mfloat-abi=
+Target RejectNegative Joined Enum(float_abi_type) Var(arm_float_abi) Init(TARGET_DEFAULT_FLOAT_ABI)
+Specify if floating point hardware should be used
+
+Enum
+Name(float_abi_type) Type(enum float_abi_type)
+Known floating-point ABIs (for use with the -mfloat-abi= option):
+
+EnumValue
+Enum(float_abi_type) String(soft) Value(ARM_FLOAT_ABI_SOFT)
+
+EnumValue
+Enum(float_abi_type) String(softfp) Value(ARM_FLOAT_ABI_SOFTFP)
+
+EnumValue
+Enum(float_abi_type) String(hard) Value(ARM_FLOAT_ABI_HARD)
+
+mfp16-format=
+Target RejectNegative Joined Enum(arm_fp16_format_type) Var(arm_fp16_format) Init(ARM_FP16_FORMAT_NONE)
+Specify the __fp16 floating-point format
+
+Enum
+Name(arm_fp16_format_type) Type(enum arm_fp16_format_type)
+Known __fp16 formats (for use with the -mfp16-format= option):
+
+EnumValue
+Enum(arm_fp16_format_type) String(none) Value(ARM_FP16_FORMAT_NONE)
+
+EnumValue
+Enum(arm_fp16_format_type) String(ieee) Value(ARM_FP16_FORMAT_IEEE)
+
+EnumValue
+Enum(arm_fp16_format_type) String(alternative) Value(ARM_FP16_FORMAT_ALTERNATIVE)
+
+mfpu=
+Target RejectNegative Joined Enum(arm_fpu) Var(arm_fpu_index)
+Specify the name of the target floating point hardware/format
+
+mlra
+Target Report Var(arm_lra_flag) Init(1) Save
+Use LRA instead of reload (transitional)
+
+mhard-float
+Target RejectNegative Alias(mfloat-abi=, hard) Undocumented
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_END)
+Assume target CPU is configured as little endian
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Generate call insns as indirect calls, if necessary
+
+mpic-data-is-text-relative
+Target Report Var(arm_pic_data_is_text_relative) Init(TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE)
+Assume data segments are relative to text segment.
+
+mpic-register=
+Target RejectNegative Joined Var(arm_pic_register_string)
+Specify the register to be used for PIC addressing
+
+mpoke-function-name
+Target Report Mask(POKE_FUNCTION_NAME)
+Store function names in object code
+
+msched-prolog
+Target Report Mask(SCHED_PROLOG)
+Permit scheduling of a function's prologue sequence
+
+msingle-pic-base
+Target Report Mask(SINGLE_PIC_BASE)
+Do not load the PIC register in function prologues
+
+msoft-float
+Target RejectNegative Alias(mfloat-abi=, soft) Undocumented
+
+mstructure-size-boundary=
+Target RejectNegative Joined UInteger Var(arm_structure_size_boundary) Init(DEFAULT_STRUCTURE_SIZE_BOUNDARY)
+Specify the minimum bit alignment of structures
+
+mthumb
+Target Report RejectNegative Mask(THUMB)
+Generate code for Thumb state
+
+mthumb-interwork
+Target Report Mask(INTERWORK)
+Support calls between Thumb and ARM instruction sets
+
+mtls-dialect=
+Target RejectNegative Joined Enum(tls_type) Var(target_tls_dialect) Init(TLS_GNU)
+Specify thread local storage scheme
+
+mtp=
+Target RejectNegative Joined Enum(arm_tp_type) Var(target_thread_pointer) Init(TP_AUTO)
+Specify how to access the thread pointer
+
+Enum
+Name(arm_tp_type) Type(enum arm_tp_type)
+Valid arguments to -mtp=:
+
+EnumValue
+Enum(arm_tp_type) String(soft) Value(TP_SOFT)
+
+EnumValue
+Enum(arm_tp_type) String(auto) Value(TP_AUTO)
+
+EnumValue
+Enum(arm_tp_type) String(cp15) Value(TP_CP15)
+
+mtpcs-frame
+Target Report Mask(TPCS_FRAME)
+Thumb: Generate (non-leaf) stack frames even if not needed
+
+mtpcs-leaf-frame
+Target Report Mask(TPCS_LEAF_FRAME)
+Thumb: Generate (leaf) stack frames even if not needed
+
+mtune=
+Target RejectNegative ToLower Joined Enum(processor_type) Var(arm_tune_option) Init(arm_none)
+Tune code for the given processor
+
+; Other processor_type values are loaded from arm-tables.opt
+; but that is a generated file and this is an odd-one-out.
+EnumValue
+Enum(processor_type) String(native) Value(-1) DriverOnly
+
+mwords-little-endian
+Target Report RejectNegative Mask(LITTLE_WORDS)
+Assume big endian bytes, little endian words.  This option is deprecated.
+
+mvectorize-with-neon-quad
+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
+Use Neon quad-word (rather than double-word) registers for vectorization
+
+mvectorize-with-neon-double
+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
+Use Neon double-word (rather than quad-word) registers for vectorization
+
+mword-relocations
+Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
+Only generate absolute relocations on word sized values.
+
+mrestrict-it
+Target Report Var(arm_restrict_it) Init(2)
+Generate IT blocks appropriate for ARMv8.
+
+mold-rtx-costs
+Target Report Mask(OLD_RTX_COSTS)
+Use the old RTX costing tables (transitional).
+
+mnew-generic-costs
+Target Report Mask(NEW_GENERIC_COSTS)
+Use the new generic RTX cost tables if new core-specific cost table not available (transitional).
+
+mfix-cortex-m3-ldrd
+Target Report Var(fix_cm3_ldrd) Init(2)
+Avoid overlapping destination and address registers on LDRD instructions
+that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
+
+mneon-for-64bits
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
+Use Neon to perform 64-bits operations rather than core registers.
+
+mslow-flash-data
+Target Report Var(target_slow_flash_data) Init(0)
+Assume loading data from flash is slower than fetching instructions.
diff --git a/gcc-4.9/gcc/config/arm/arm1020e.md b/gcc-4.9/gcc/config/arm/arm1020e.md
new file mode 100644
index 000000000..0206ea2af
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm1020e.md
@@ -0,0 +1,385 @@
+;; ARM 1020E & ARM 1022E Pipeline Description
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM1020E Technical Reference Manual, Copyright (c) 2003 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 1020E core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm1020e")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are two pipelines:
+;; 
+;; - An Arithmetic Logic Unit (ALU) pipeline.
+;;
+;;   The ALU pipeline has fetch, issue, decode, execute, memory, and
+;;   write stages. We only need to model the execute, memory and write
+;;   stages.
+;;
+;; - A Load-Store Unit (LSU) pipeline.
+;;
+;;   The LSU pipeline has decode, execute, memory, and write stages.
+;;   We only model the execute, memory and write stages.
+
+(define_cpu_unit "1020a_e,1020a_m,1020a_w" "arm1020e")
+(define_cpu_unit "1020l_e,1020l_m,1020l_w" "arm1020e")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "1020alu_op" 1 
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       multiple,no_insn"))
+ "1020a_e,1020a_m,1020a_w")
+
+;; ALU operations with a shift-by-constant operand
+(define_insn_reservation "1020alu_shift_op" 1 
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       extend,mov_shift,mvn_shift"))
+ "1020a_e,1020a_m,1020a_w")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the execute stage.
+(define_insn_reservation "1020alu_shift_reg_op" 2 
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
+ "1020a_e*2,1020a_m,1020a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times.
+
+;; The result of the "smul" and "smulw" instructions is not available
+;; until after the memory stage.
+(define_insn_reservation "1020mult1" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "smulxy,smulwy"))
+ "1020a_e,1020a_m,1020a_w")
+
+;; The "smlaxy" and "smlawx" instructions require two iterations through
+;; the execute stage; the result is available immediately following
+;; the execute stage.
+(define_insn_reservation "1020mult2" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "smlaxy,smlalxy,smlawx"))
+ "1020a_e*2,1020a_m,1020a_w")
+
+;; The "smlalxy", "mul", and "mla" instructions require two iterations
+;; through the execute stage; the result is not available until after
+;; the memory stage.
+(define_insn_reservation "1020mult3" 3
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "smlalxy,mul,mla"))
+ "1020a_e*2,1020a_m,1020a_w")
+
+;; The "muls" and "mlas" instructions loop in the execute stage for
+;; four iterations in order to set the flags.  The value result is
+;; available after three iterations.
+(define_insn_reservation "1020mult4" 3
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "muls,mlas"))
+ "1020a_e*4,1020a_m,1020a_w")
+
+;; Long multiply instructions that produce two registers of
+;; output (such as umull) make their results available in two cycles;
+;; the least significant word is available before the most significant
+;; word.  That fact is not modeled; instead, the instructions are
+;; described.as if the entire result was available at the end of the
+;; cycle in which both words are available.
+
+;; The "umull", "umlal", "smull", and "smlal" instructions all take
+;; three iterations through the execute cycle, and make their results
+;; available after the memory cycle.
+(define_insn_reservation "1020mult5" 4
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "umull,umlal,smull,smlal"))
+ "1020a_e*3,1020a_m,1020a_w")
+
+;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
+;; the execute stage for five iterations in order to set the flags.
+;; The value result is available after four iterations.
+(define_insn_reservation "1020mult6" 4
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "umulls,umlals,smulls,smlals"))
+ "1020a_e*5,1020a_m,1020a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; LSU instructions require six cycles to execute.  They use the ALU
+;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles
+;; three through six.
+;; Loads and stores which use a scaled register offset or scaled
+;; register pre-indexed addressing mode take three cycles EXCEPT for
+;; those that are base + offset with LSL of 0 or 2, or base - offset
+;; with LSL of zero.  The remainder take 1 cycle to execute.
+;; For 4byte loads there is a bypass from the load stage
+
+(define_insn_reservation "1020load1_op" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "load_byte,load1"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "1020store1_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "store1"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+;; A load's result can be stored by an immediately following store
+(define_bypass 1 "1020load1_op" "1020store1_op" "arm_no_early_store_addr_dep")
+
+;; On a LDM/STM operation, the LSU pipeline iterates until all of the
+;; registers have been processed.
+;;
+;; The time it takes to load the data depends on whether or not the
+;; base address is 64-bit aligned; if it is not, an additional cycle
+;; is required.  This model assumes that the address is always 64-bit
+;; aligned.  Because the processor can load two registers per cycle,
+;; that assumption means that we use the same instruction reservations
+;; for loading 2k and 2k - 1 registers.
+;;
+;; The ALU pipeline is decoupled after the first cycle unless there is
+;; a register dependency; the dependency is cleared as soon as the LDM/STM
+;; has dealt with the corresponding register.  So for example,
+;;  stmia sp, {r0-r3}
+;;  add	r0, r0, #4
+;; will have one fewer stalls than
+;;  stmia sp, {r0-r3}
+;;  add r3, r3, #4
+;;
+;; As with ALU operations, if one of the destination registers is the
+;; PC, there are additional stalls; that is not modeled.
+
+(define_insn_reservation "1020load2_op" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "load2"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "1020store2_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "store2"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "1020load34_op" 3
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "load3,load4"))
+ "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
+
+(define_insn_reservation "1020store34_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "store3,store4"))
+ "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The ARM
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "1020branch_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "branch"))
+ "1020a_e")
+
+;; The latency for a call is not predictable.  Therefore, we use 32 as
+;; roughly equivalent to positive infinity.
+
+(define_insn_reservation "1020call_op" 32
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "call"))
+ "1020a_e*32")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_cpu_unit "v10_fmac" "arm1020e")
+
+(define_cpu_unit "v10_ds" "arm1020e")
+
+(define_cpu_unit "v10_fmstat" "arm1020e")
+
+(define_cpu_unit "v10_ls1,v10_ls2,v10_ls3" "arm1020e")
+
+;; fmstat is a serializing instruction.  It will stall the core until
+;; the mac and ds units have completed.
+(exclusion_set "v10_fmac,v10_ds" "v10_fmstat")
+
+(define_attr "vfp10" "yes,no" 
+  (const (if_then_else (and (eq_attr "tune" "arm1020e,arm1022e")
+			    (eq_attr "fpu" "vfp"))
+		       (const_string "yes") (const_string "no"))))
+
+;; Note, no instruction can issue to the VFP if the core is stalled in the
+;; first execute state.  We model this by using 1020a_e in the first cycle.
+(define_insn_reservation "v10_ffarith" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd"))
+ "1020a_e+v10_fmac")
+
+(define_insn_reservation "v10_farith" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "faddd,fadds"))
+ "1020a_e+v10_fmac")
+
+(define_insn_reservation "v10_cvt" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_cvt,f_cvti2f,f_cvtf2i"))
+ "1020a_e+v10_fmac")
+
+(define_insn_reservation "v10_fmul" 6
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fmuls,fmacs,ffmas,fmuld,fmacd,ffmad"))
+ "1020a_e+v10_fmac*2")
+
+(define_insn_reservation "v10_fdivs" 18
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fdivs, fsqrts"))
+ "1020a_e+v10_ds*14")
+
+(define_insn_reservation "v10_fdivd" 32
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fdivd, fsqrtd"))
+ "1020a_e+v10_fmac+v10_ds*28")
+
+(define_insn_reservation "v10_floads" 4
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_loads"))
+ "1020a_e+1020l_e+v10_ls1,v10_ls2")
+
+;; We model a load of a double as needing all the vfp ls* stage in cycle 1.
+;; This gives the correct mix between single-and double loads where a flds
+;; followed by and fldd will stall for one cycle, but two back-to-back fldd
+;; insns stall for two cycles.
+(define_insn_reservation "v10_floadd" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_loadd"))
+ "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3")
+ 
+;; Moves to/from arm regs also use the load/store pipeline.
+
+(define_insn_reservation "v10_c2v" 4
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_mcr,f_mcrr"))
+ "1020a_e+1020l_e+v10_ls1,v10_ls2")
+
+(define_insn_reservation "v10_fstores" 1
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_stores"))
+ "1020a_e+1020l_e+v10_ls1,v10_ls2")
+
+(define_insn_reservation "v10_fstored" 1
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_stored"))
+ "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3")
+
+(define_insn_reservation "v10_v2c" 1
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_mrc,f_mrrc"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "v10_to_cpsr" 2
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_flag"))
+ "1020a_e+v10_fmstat,1020a_e+1020l_e,1020l_m,1020l_w")
+
+;; VFP bypasses
+
+;; There are bypasses for most operations other than store
+
+(define_bypass 3
+ "v10_c2v,v10_floads"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd,v10_cvt")
+
+(define_bypass 4
+ "v10_floadd"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+;; Arithmetic to other arithmetic saves a cycle due to forwarding
+(define_bypass 4
+ "v10_ffarith,v10_farith"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+(define_bypass 5
+ "v10_fmul"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+(define_bypass 17
+ "v10_fdivs"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+(define_bypass 31
+ "v10_fdivd"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+;; VFP anti-dependencies.
+
+;; There is one anti-dependence in the following case (not yet modelled):
+;; - After a store: one extra cycle for both fsts and fstd
+;; Note, back-to-back fstd instructions will overload the load/store datapath 
+;; causing a two-cycle stall.
diff --git a/gcc-4.9/gcc/config/arm/arm1026ejs.md b/gcc-4.9/gcc/config/arm/arm1026ejs.md
new file mode 100644
index 000000000..3f290b475
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm1026ejs.md
@@ -0,0 +1,250 @@
+;; ARM 1026EJ-S Pipeline Description
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM1026EJ-S Technical Reference Manual, Copyright (c) 2003 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 1026EJ-S core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm1026ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are two pipelines:
+;; 
+;; - An Arithmetic Logic Unit (ALU) pipeline.
+;;
+;;   The ALU pipeline has fetch, issue, decode, execute, memory, and
+;;   write stages. We only need to model the execute, memory and write
+;;   stages.
+;;
+;; - A Load-Store Unit (LSU) pipeline.
+;;
+;;   The LSU pipeline has decode, execute, memory, and write stages.
+;;   We only model the execute, memory and write stages.
+
+(define_cpu_unit "a_e,a_m,a_w" "arm1026ejs")
+(define_cpu_unit "l_e,l_m,l_w" "arm1026ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "alu_op" 1 
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       multiple,no_insn"))
+ "a_e,a_m,a_w")
+
+;; ALU operations with a shift-by-constant operand
+(define_insn_reservation "alu_shift_op" 1 
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       extend,mov_shift,mvn_shift"))
+ "a_e,a_m,a_w")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the execute stage.
+(define_insn_reservation "alu_shift_reg_op" 2 
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
+ "a_e*2,a_m,a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times.
+
+;; The result of the "smul" and "smulw" instructions is not available
+;; until after the memory stage.
+(define_insn_reservation "mult1" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "smulxy,smulwy"))
+ "a_e,a_m,a_w")
+
+;; The "smlaxy" and "smlawx" instructions require two iterations through
+;; the execute stage; the result is available immediately following
+;; the execute stage.
+(define_insn_reservation "mult2" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "smlaxy,smlalxy,smlawx"))
+ "a_e*2,a_m,a_w")
+
+;; The "smlalxy", "mul", and "mla" instructions require two iterations
+;; through the execute stage; the result is not available until after
+;; the memory stage.
+(define_insn_reservation "mult3" 3
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "smlalxy,mul,mla"))
+ "a_e*2,a_m,a_w")
+
+;; The "muls" and "mlas" instructions loop in the execute stage for
+;; four iterations in order to set the flags.  The value result is
+;; available after three iterations.
+(define_insn_reservation "mult4" 3
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "muls,mlas"))
+ "a_e*4,a_m,a_w")
+
+;; Long multiply instructions that produce two registers of
+;; output (such as umull) make their results available in two cycles;
+;; the least significant word is available before the most significant
+;; word.  That fact is not modeled; instead, the instructions are
+;; described as if the entire result was available at the end of the
+;; cycle in which both words are available.
+
+;; The "umull", "umlal", "smull", and "smlal" instructions all take
+;; three iterations through the execute cycle, and make their results
+;; available after the memory cycle.
+(define_insn_reservation "mult5" 4
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "umull,umlal,smull,smlal"))
+ "a_e*3,a_m,a_w")
+
+;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
+;; the execute stage for five iterations in order to set the flags.
+;; The value result is available after four iterations.
+(define_insn_reservation "mult6" 4
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "umulls,umlals,smulls,smlals"))
+ "a_e*5,a_m,a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; LSU instructions require six cycles to execute.  They use the ALU
+;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles
+;; three through six.
+;; Loads and stores which use a scaled register offset or scaled
+;; register pre-indexed addressing mode take three cycles EXCEPT for
+;; those that are base + offset with LSL of 0 or 2, or base - offset
+;; with LSL of zero.  The remainder take 1 cycle to execute.
+;; For 4byte loads there is a bypass from the load stage
+
+(define_insn_reservation "load1_op" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "load_byte,load1"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+(define_insn_reservation "store1_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "store1"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+;; A load's result can be stored by an immediately following store
+(define_bypass 1 "load1_op" "store1_op" "arm_no_early_store_addr_dep")
+
+;; On a LDM/STM operation, the LSU pipeline iterates until all of the
+;; registers have been processed.
+;;
+;; The time it takes to load the data depends on whether or not the
+;; base address is 64-bit aligned; if it is not, an additional cycle
+;; is required.  This model assumes that the address is always 64-bit
+;; aligned.  Because the processor can load two registers per cycle,
+;; that assumption means that we use the same instruction reservations
+;; for loading 2k and 2k - 1 registers.
+;;
+;; The ALU pipeline is stalled until the completion of the last memory
+;; stage in the LSU pipeline.  That is modeled by keeping the ALU
+;; execute stage busy until that point.
+;;
+;; As with ALU operations, if one of the destination registers is the
+;; PC, there are additional stalls; that is not modeled.
+
+(define_insn_reservation "load2_op" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "load2"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+(define_insn_reservation "store2_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "store2"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+(define_insn_reservation "load34_op" 3
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "load3,load4"))
+ "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
+
+(define_insn_reservation "store34_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "store3,store4"))
+ "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The ARM
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "branch_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; The latency for a call is not predictable.  Therefore, we use 32 as
+;; roughly equivalent to positive infinity.
+
+(define_insn_reservation "call_op" 32
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "call"))
+ "nothing")
diff --git a/gcc-4.9/gcc/config/arm/arm1136jfs.md b/gcc-4.9/gcc/config/arm/arm1136jfs.md
new file mode 100644
index 000000000..9e941da76
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm1136jfs.md
@@ -0,0 +1,387 @@
+;; ARM 1136J[F]-S Pipeline Description
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM1136JF-S Technical Reference Manual, Copyright (c) 2003 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 1136J-S and 1136JF-S cores.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm1136jfs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are three distinct pipelines (page 1-26 and following):
+;;
+;; - A 4-stage decode pipeline, shared by all three.  It has fetch (1),
+;;   fetch (2), decode, and issue stages.  Since this is always involved,
+;;   we do not model it in the scheduler.
+;;
+;; - A 4-stage ALU pipeline.  It has shifter, ALU (main integer operations),
+;;   and saturation stages.  The fourth stage is writeback; see below.
+;;
+;; - A 4-stage multiply-accumulate pipeline.  It has three stages, called
+;;   MAC1 through MAC3, and a fourth writeback stage.
+;;
+;;   The 4th-stage writeback is shared between the ALU and MAC pipelines,
+;;   which operate in lockstep.  Results from either pipeline will be
+;;   moved into the writeback stage.  Because the two pipelines operate
+;;   in lockstep, we schedule them as a single "execute" pipeline.
+;;
+;; - A 4-stage LSU pipeline.  It has address generation, data cache (1),
+;;   data cache (2), and writeback stages.  (Note that this pipeline,
+;;   including the writeback stage, is independent from the ALU & LSU pipes.)  
+
+(define_cpu_unit "e_1,e_2,e_3,e_wb" "arm1136jfs")     ; ALU and MAC
+; e_1 = Sh/Mac1, e_2 = ALU/Mac2, e_3 = SAT/Mac3
+(define_cpu_unit "l_a,l_dc1,l_dc2,l_wb" "arm1136jfs") ; Load/Store
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require eight cycles to execute, and use the ALU
+;; pipeline in each of the eight stages.  The results are available
+;; after the alu stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modelled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "11_alu_op" 2
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       multiple,no_insn"))
+ "e_1,e_2,e_3,e_wb")
+
+;; ALU operations with a shift-by-constant operand
+(define_insn_reservation "11_alu_shift_op" 2
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       extend,mov_shift,mvn_shift"))
+ "e_1,e_2,e_3,e_wb")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the shift stage.
+(define_insn_reservation "11_alu_shift_reg_op" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
+ "e_1*2,e_2,e_3,e_wb")
+
+;; alu_ops can start sooner, if there is no shifter dependency
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_alu_op")
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_alu_op")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the first two execute stages until
+;; the instruction has been passed through the multiplier array enough
+;; times.
+
+;; Multiply and multiply-accumulate results are available after four stages.
+(define_insn_reservation "11_mult1" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "mul,mla"))
+ "e_1*2,e_2,e_3,e_wb")
+
+;; The *S variants set the condition flags, which requires three more cycles.
+(define_insn_reservation "11_mult2" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "muls,mlas"))
+ "e_1*2,e_2,e_3,e_wb")
+
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_alu_op")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; Signed and unsigned multiply long results are available across two cycles;
+;; the less significant word is available one cycle before the more significant
+;; word.  Here we conservatively wait until both are available, which is
+;; after three iterations and the memory cycle.  The same is also true of
+;; the two multiply-accumulate instructions.
+(define_insn_reservation "11_mult3" 5
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "smull,umull,smlal,umlal"))
+ "e_1*3,e_2,e_3,e_wb*2")
+
+;; The *S variants set the condition flags, which requires three more cycles.
+(define_insn_reservation "11_mult4" 5
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "smulls,umulls,smlals,umlals"))
+ "e_1*3,e_2,e_3,e_wb*2")
+
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_alu_op")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; Various 16x16->32 multiplies and multiply-accumulates, using combinations
+;; of high and low halves of the argument registers.  They take a single
+;; pass through the pipeline and make the result available after three
+;; cycles.
+(define_insn_reservation "11_mult5" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\
+                       smusd,smusdx,smlsd,smlsdx"))
+ "e_1,e_2,e_3,e_wb")
+
+(define_bypass 2 "11_mult5"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 2 "11_mult5"
+	       "11_alu_op")
+(define_bypass 2 "11_mult5"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 2 "11_mult5"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 2 "11_mult5"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; The same idea, then the 32-bit result is added to a 64-bit quantity.
+(define_insn_reservation "11_mult6" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "smlalxy"))
+ "e_1*2,e_2,e_3,e_wb*2")
+
+;; Signed 32x32 multiply, then the most significant 32 bits are extracted
+;; and are available after the memory stage.
+(define_insn_reservation "11_mult7" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "smmul,smmulr"))
+ "e_1*2,e_2,e_3,e_wb")
+
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_alu_op")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; These vary greatly depending on their arguments and the results of
+;; stat prediction.  Cycle count ranges from zero (unconditional branch,
+;; folded dynamic prediction) to seven (incorrect predictions, etc).  We
+;; assume an optimal case for now, because the cost of a cache miss
+;; overwhelms the cost of everything else anyhow.
+
+(define_insn_reservation "11_branches" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "11_call" 32
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "call"))
+ "nothing")
+
+;; Branches are predicted. A correctly predicted branch will be no
+;; cost, but we're conservative here, and use the timings a
+;; late-register would give us.
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_branches")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_branches")
+(define_bypass 2 "11_load1,11_load2"
+	       "11_branches")
+(define_bypass 3 "11_load34"
+	       "11_branches")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback.
+;; These models assume that all memory references hit in dcache.  Also,
+;; if the PC is one of the registers involved, there are additional stalls
+;; not modelled here.  Addressing modes are also not modelled.
+
+(define_insn_reservation "11_load1" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load1"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+;; Load byte results are not available until the writeback stage, where
+;; the correct byte is extracted.
+
+(define_insn_reservation "11_loadb" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load_byte"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+(define_insn_reservation "11_store1" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "store1"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+;; Load/store double words into adjacent registers.  The timing and
+;; latencies are different depending on whether the address is 64-bit
+;; aligned.  This model assumes that it is.
+(define_insn_reservation "11_load2" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load2"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+(define_insn_reservation "11_store2" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "store2"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+;; Load/store multiple registers.  Two registers are stored per cycle.
+;; Actual timing depends on how many registers are affected, so we
+;; optimistically schedule a low latency.
+(define_insn_reservation "11_load34" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load3,load4"))
+ "l_a+e_1,l_dc1*2,l_dc2,l_wb")
+
+(define_insn_reservation "11_store34" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "store3,store4"))
+ "l_a+e_1,l_dc1*2,l_dc2,l_wb")
+
+;; A store can start immediately after an alu op, if that alu op does
+;; not provide part of the address to access.
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; An alu op can start sooner after a load, if that alu op does not
+;; have an early register dependency on the load
+(define_bypass 2 "11_load1"
+	       "11_alu_op")
+(define_bypass 2 "11_load1"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 2 "11_load1"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+(define_bypass 3 "11_loadb"
+	       "11_alu_op")
+(define_bypass 3 "11_loadb"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "11_loadb"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+;; A mul op can start sooner after a load, if that mul op does not
+;; have an early multiply dependency
+(define_bypass 2 "11_load1"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_load34"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_loadb"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+
+;; A store can start sooner after a load, if that load does not
+;; produce part of the address to access
+(define_bypass 2 "11_load1"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+(define_bypass 3 "11_loadb"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
diff --git a/gcc-4.9/gcc/config/arm/arm926ejs.md b/gcc-4.9/gcc/config/arm/arm926ejs.md
new file mode 100644
index 000000000..883935dcf
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm926ejs.md
@@ -0,0 +1,198 @@
+;; ARM 926EJ-S Pipeline Description
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 926EJ-S core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm926ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages. We only need to model the execute, memory and write
+;;   stages.
+
+(define_cpu_unit "e,m,w" "arm926ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "9_alu_op" 1 
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       shift_imm,shift_reg,extend,\
+                       mov_imm,mov_reg,mov_shift,\
+                       mvn_imm,mvn_reg,mvn_shift,\
+                       multiple,no_insn"))
+ "e,m,w")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the execute stage.
+(define_insn_reservation "9_alu_shift_reg_op" 2 
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
+ "e*2,m,w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times. Multiply operations occur in both the execute and memory
+;; stages of the pipeline
+
+(define_insn_reservation "9_mult1" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "smlalxy,mul,mla"))
+ "e*2,m,w")
+
+(define_insn_reservation "9_mult2" 4
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "muls,mlas"))
+ "e*3,m,w")
+
+(define_insn_reservation "9_mult3" 4
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "umull,umlal,smull,smlal"))
+ "e*3,m,w")
+
+(define_insn_reservation "9_mult4" 5
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "umulls,umlals,smulls,smlals"))
+ "e*4,m,w")
+
+(define_insn_reservation "9_mult5" 2
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "smulxy,smlaxy,smlawx"))
+ "e,m,w")
+
+(define_insn_reservation "9_mult6" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "smlalxy"))
+ "e*2,m,w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; Loads with a shifted offset take 3 cycles, and are (a) probably the
+;; most common and (b) the pessimistic assumption will lead to fewer stalls.
+(define_insn_reservation "9_load1_op" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load1,load_byte"))
+ "e*2,m,w")
+
+(define_insn_reservation "9_store1_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store1"))
+ "e,m,w")
+
+;; multiple word loads and stores
+(define_insn_reservation "9_load2_op" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load2"))
+ "e,m*2,w")
+
+(define_insn_reservation "9_load3_op" 4
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load3"))
+ "e,m*3,w")
+
+(define_insn_reservation "9_load4_op" 5
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load4"))
+ "e,m*4,w")
+
+(define_insn_reservation "9_store2_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store2"))
+ "e,m*2,w")
+
+(define_insn_reservation "9_store3_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store3"))
+ "e,m*3,w")
+
+(define_insn_reservation "9_store4_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store4"))
+ "e,m*4,w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The ARM
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "9_branch_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; The latency for a call is not predictable.  Therefore, we use 32 as
+;; roughly equivalent to positive infinity.
+
+(define_insn_reservation "9_call_op" 32
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "call"))
+ "nothing")
diff --git a/gcc-4.9/gcc/config/arm/arm_acle.h b/gcc-4.9/gcc/config/arm/arm_acle.h
new file mode 100644
index 000000000..aaa7affee
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm_acle.h
@@ -0,0 +1,100 @@
+/* ARM Non-NEON ACLE intrinsics include file.
+
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_ARM_ACLE_H
+#define _GCC_ARM_ACLE_H
+
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __ARM_FEATURE_CRC32
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32b (uint32_t __a, uint8_t __b)
+{
+  return __builtin_arm_crc32b (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32h (uint32_t __a, uint16_t __b)
+{
+  return __builtin_arm_crc32h (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32w (uint32_t __a, uint32_t __b)
+{
+  return __builtin_arm_crc32w (__a, __b);
+}
+
+#ifdef __ARM_32BIT_STATE
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32d (uint32_t __a, uint64_t __b)
+{
+  uint32_t __d;
+
+  __d = __crc32w (__crc32w (__a, __b & 0xffffffffULL), __b >> 32);
+  return __d;
+}
+#endif
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32cb (uint32_t __a, uint8_t __b)
+{
+  return __builtin_arm_crc32cb (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32ch (uint32_t __a, uint16_t __b)
+{
+  return __builtin_arm_crc32ch (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32cw (uint32_t __a, uint32_t __b)
+{
+  return __builtin_arm_crc32cw (__a, __b);
+}
+
+#ifdef __ARM_32BIT_STATE
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32cd (uint32_t __a, uint64_t __b)
+{
+  uint32_t __d;
+
+  __d = __crc32cw (__crc32cw (__a, __b & 0xffffffffULL), __b >> 32);
+  return __d;
+}
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h
new file mode 100644
index 000000000..37a6e611b
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm_neon.h
@@ -0,0 +1,13429 @@
+/* ARM NEON intrinsics include file. This file is generated automatically
+   using neon-gen.ml.  Please do not edit manually.
+
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_ARM_NEON_H
+#define _GCC_ARM_NEON_H 1
+
+#ifndef __ARM_NEON__
+#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
+#else
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+typedef __builtin_neon_qi int8x8_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_hi int16x4_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_si int32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_di int64x1_t;
+typedef __builtin_neon_hf float16x4_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_sf float32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_poly8 poly8x8_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_poly16 poly16x4_t	__attribute__ ((__vector_size__ (8)));
+#ifdef __ARM_FEATURE_CRYPTO
+typedef __builtin_neon_poly64 poly64x1_t;
+#endif
+typedef __builtin_neon_uqi uint8x8_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_uhi uint16x4_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_usi uint32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_udi uint64x1_t;
+typedef __builtin_neon_qi int8x16_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_hi int16x8_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_si int32x4_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_di int64x2_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_sf float32x4_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_poly8 poly8x16_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_poly16 poly16x8_t	__attribute__ ((__vector_size__ (16)));
+#ifdef __ARM_FEATURE_CRYPTO
+typedef __builtin_neon_poly64 poly64x2_t	__attribute__ ((__vector_size__ (16)));
+#endif
+typedef __builtin_neon_uqi uint8x16_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_uhi uint16x8_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_usi uint32x4_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_udi uint64x2_t	__attribute__ ((__vector_size__ (16)));
+
+typedef float float32_t;
+typedef __builtin_neon_poly8 poly8_t;
+typedef __builtin_neon_poly16 poly16_t;
+#ifdef __ARM_FEATURE_CRYPTO
+typedef __builtin_neon_poly64 poly64_t;
+typedef __builtin_neon_poly128 poly128_t;
+#endif
+
+typedef struct int8x8x2_t
+{
+  int8x8_t val[2];
+} int8x8x2_t;
+
+typedef struct int8x16x2_t
+{
+  int8x16_t val[2];
+} int8x16x2_t;
+
+typedef struct int16x4x2_t
+{
+  int16x4_t val[2];
+} int16x4x2_t;
+
+typedef struct int16x8x2_t
+{
+  int16x8_t val[2];
+} int16x8x2_t;
+
+typedef struct int32x2x2_t
+{
+  int32x2_t val[2];
+} int32x2x2_t;
+
+typedef struct int32x4x2_t
+{
+  int32x4_t val[2];
+} int32x4x2_t;
+
+typedef struct int64x1x2_t
+{
+  int64x1_t val[2];
+} int64x1x2_t;
+
+typedef struct int64x2x2_t
+{
+  int64x2_t val[2];
+} int64x2x2_t;
+
+typedef struct uint8x8x2_t
+{
+  uint8x8_t val[2];
+} uint8x8x2_t;
+
+typedef struct uint8x16x2_t
+{
+  uint8x16_t val[2];
+} uint8x16x2_t;
+
+typedef struct uint16x4x2_t
+{
+  uint16x4_t val[2];
+} uint16x4x2_t;
+
+typedef struct uint16x8x2_t
+{
+  uint16x8_t val[2];
+} uint16x8x2_t;
+
+typedef struct uint32x2x2_t
+{
+  uint32x2_t val[2];
+} uint32x2x2_t;
+
+typedef struct uint32x4x2_t
+{
+  uint32x4_t val[2];
+} uint32x4x2_t;
+
+typedef struct uint64x1x2_t
+{
+  uint64x1_t val[2];
+} uint64x1x2_t;
+
+typedef struct uint64x2x2_t
+{
+  uint64x2_t val[2];
+} uint64x2x2_t;
+
+typedef struct float32x2x2_t
+{
+  float32x2_t val[2];
+} float32x2x2_t;
+
+typedef struct float32x4x2_t
+{
+  float32x4_t val[2];
+} float32x4x2_t;
+
+typedef struct poly8x8x2_t
+{
+  poly8x8_t val[2];
+} poly8x8x2_t;
+
+typedef struct poly8x16x2_t
+{
+  poly8x16_t val[2];
+} poly8x16x2_t;
+
+typedef struct poly16x4x2_t
+{
+  poly16x4_t val[2];
+} poly16x4x2_t;
+
+typedef struct poly16x8x2_t
+{
+  poly16x8_t val[2];
+} poly16x8x2_t;
+
+#ifdef __ARM_FEATURE_CRYPTO
+typedef struct poly64x1x2_t
+{
+  poly64x1_t val[2];
+} poly64x1x2_t;
+#endif
+
+
+#ifdef __ARM_FEATURE_CRYPTO
+typedef struct poly64x2x2_t
+{
+  poly64x2_t val[2];
+} poly64x2x2_t;
+#endif
+
+
+typedef struct int8x8x3_t
+{
+  int8x8_t val[3];
+} int8x8x3_t;
+
+typedef struct int8x16x3_t
+{
+  int8x16_t val[3];
+} int8x16x3_t;
+
+typedef struct int16x4x3_t
+{
+  int16x4_t val[3];
+} int16x4x3_t;
+
+typedef struct int16x8x3_t
+{
+  int16x8_t val[3];
+} int16x8x3_t;
+
+typedef struct int32x2x3_t
+{
+  int32x2_t val[3];
+} int32x2x3_t;
+
+typedef struct int32x4x3_t
+{
+  int32x4_t val[3];
+} int32x4x3_t;
+
+typedef struct int64x1x3_t
+{
+  int64x1_t val[3];
+} int64x1x3_t;
+
+typedef struct int64x2x3_t
+{
+  int64x2_t val[3];
+} int64x2x3_t;
+
+typedef struct uint8x8x3_t
+{
+  uint8x8_t val[3];
+} uint8x8x3_t;
+
+typedef struct uint8x16x3_t
+{
+  uint8x16_t val[3];
+} uint8x16x3_t;
+
+typedef struct uint16x4x3_t
+{
+  uint16x4_t val[3];
+} uint16x4x3_t;
+
+typedef struct uint16x8x3_t
+{
+  uint16x8_t val[3];
+} uint16x8x3_t;
+
+typedef struct uint32x2x3_t
+{
+  uint32x2_t val[3];
+} uint32x2x3_t;
+
+typedef struct uint32x4x3_t
+{
+  uint32x4_t val[3];
+} uint32x4x3_t;
+
+typedef struct uint64x1x3_t
+{
+  uint64x1_t val[3];
+} uint64x1x3_t;
+
+typedef struct uint64x2x3_t
+{
+  uint64x2_t val[3];
+} uint64x2x3_t;
+
+typedef struct float32x2x3_t
+{
+  float32x2_t val[3];
+} float32x2x3_t;
+
+typedef struct float32x4x3_t
+{
+  float32x4_t val[3];
+} float32x4x3_t;
+
+typedef struct poly8x8x3_t
+{
+  poly8x8_t val[3];
+} poly8x8x3_t;
+
+typedef struct poly8x16x3_t
+{
+  poly8x16_t val[3];
+} poly8x16x3_t;
+
+typedef struct poly16x4x3_t
+{
+  poly16x4_t val[3];
+} poly16x4x3_t;
+
+typedef struct poly16x8x3_t
+{
+  poly16x8_t val[3];
+} poly16x8x3_t;
+
+#ifdef __ARM_FEATURE_CRYPTO
+typedef struct poly64x1x3_t
+{
+  poly64x1_t val[3];
+} poly64x1x3_t;
+#endif
+
+
+#ifdef __ARM_FEATURE_CRYPTO
+typedef struct poly64x2x3_t
+{
+  poly64x2_t val[3];
+} poly64x2x3_t;
+#endif
+
+
+typedef struct int8x8x4_t
+{
+  int8x8_t val[4];
+} int8x8x4_t;
+
+typedef struct int8x16x4_t
+{
+  int8x16_t val[4];
+} int8x16x4_t;
+
+typedef struct int16x4x4_t
+{
+  int16x4_t val[4];
+} int16x4x4_t;
+
+typedef struct int16x8x4_t
+{
+  int16x8_t val[4];
+} int16x8x4_t;
+
+typedef struct int32x2x4_t
+{
+  int32x2_t val[4];
+} int32x2x4_t;
+
+typedef struct int32x4x4_t
+{
+  int32x4_t val[4];
+} int32x4x4_t;
+
+typedef struct int64x1x4_t
+{
+  int64x1_t val[4];
+} int64x1x4_t;
+
+typedef struct int64x2x4_t
+{
+  int64x2_t val[4];
+} int64x2x4_t;
+
+typedef struct uint8x8x4_t
+{
+  uint8x8_t val[4];
+} uint8x8x4_t;
+
+typedef struct uint8x16x4_t
+{
+  uint8x16_t val[4];
+} uint8x16x4_t;
+
+typedef struct uint16x4x4_t
+{
+  uint16x4_t val[4];
+} uint16x4x4_t;
+
+typedef struct uint16x8x4_t
+{
+  uint16x8_t val[4];
+} uint16x8x4_t;
+
+typedef struct uint32x2x4_t
+{
+  uint32x2_t val[4];
+} uint32x2x4_t;
+
+typedef struct uint32x4x4_t
+{
+  uint32x4_t val[4];
+} uint32x4x4_t;
+
+typedef struct uint64x1x4_t
+{
+  uint64x1_t val[4];
+} uint64x1x4_t;
+
+typedef struct uint64x2x4_t
+{
+  uint64x2_t val[4];
+} uint64x2x4_t;
+
+typedef struct float32x2x4_t
+{
+  float32x2_t val[4];
+} float32x2x4_t;
+
+typedef struct float32x4x4_t
+{
+  float32x4_t val[4];
+} float32x4x4_t;
+
+typedef struct poly8x8x4_t
+{
+  poly8x8_t val[4];
+} poly8x8x4_t;
+
+typedef struct poly8x16x4_t
+{
+  poly8x16_t val[4];
+} poly8x16x4_t;
+
+typedef struct poly16x4x4_t
+{
+  poly16x4_t val[4];
+} poly16x4x4_t;
+
+typedef struct poly16x8x4_t
+{
+  poly16x8_t val[4];
+} poly16x8x4_t;
+
+#ifdef __ARM_FEATURE_CRYPTO
+typedef struct poly64x1x4_t
+{
+  poly64x1_t val[4];
+} poly64x1x4_t;
+#endif
+
+
+#ifdef __ARM_FEATURE_CRYPTO
+typedef struct poly64x2x4_t
+{
+  poly64x2_t val[4];
+} poly64x2x4_t;
+#endif
+
+
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vadd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vadd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vaddq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vaddlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vaddlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vaddlv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddw_s8 (int16x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddw_s16 (int32x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddw_s32 (int64x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vaddwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vaddwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vaddwv2si ((int64x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vhadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vhadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vhadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vhaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vhaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vhaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrhadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrhadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrhadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqadddi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vaddhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vaddhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vaddhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vraddhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vraddhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vraddhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmul_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmul_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmul_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmulq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (poly8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 2);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmull_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmull_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmullv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vmullv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmull_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlslv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlslv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlslv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1);
+}
+
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c, 3);
+}
+
+#endif
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c, 3);
+}
+
+#endif
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c, 3);
+}
+
+#endif
+#ifdef __ARM_FEATURE_FMA
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c, 3);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndn_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrintnv2sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndqn_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrintnv4sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrnda_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrintav2sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndqa_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrintav4sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndp_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrintpv2sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndqp_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrintpv4sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrndm_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrintmv2sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndqm_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrintmv4sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrnd_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrintzv2sf (__a);
+}
+
+#endif
+#if __ARM_ARCH >= 8
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrndq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrintzv4sf (__a);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vsub_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsub_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vsubq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vsublv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vsublv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vsublv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubw_s8 (int16x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubw_s16 (int32x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubw_s32 (int64x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vsubwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vsubwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vsubwv2si ((int64x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vhsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vhsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vhsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vhsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vhsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vhsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vhsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vhsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vhsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vhsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vhsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vhsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqsubdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqsubq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsubhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsubhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsubhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 2);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcage_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcale_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtst_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtst_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtstq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vabd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vabd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vabdv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vabd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vabdv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vabd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vabdv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabdq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vabdq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vabdq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vabdv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vabdv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vabdv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabdl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vabdlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vabdlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabdl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vabdlv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vabav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vabav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vabav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vabav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vabav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vabav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vabalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vabalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vabalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmax_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmax_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmax_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmax_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmax_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmax_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmax_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmaxq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmaxq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmaxq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmaxq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vmaxv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmaxv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmaxv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmin_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmin_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmin_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmin_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmin_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmin_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmin_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vminv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vminq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vminq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vminq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vminq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vminq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vminv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vminq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vminv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vminq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vminv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpadd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpaddl_s8 (int8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpaddl_s16 (int16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpaddl_s32 (int32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpaddl_u8 (uint8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vpaddlv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpaddl_u16 (uint16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vpaddlv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vpaddl_u32 (uint32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vpaddlv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpaddlq_s8 (int8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpaddlq_s16 (int16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpaddlq_s32 (int32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpaddlq_u8 (uint8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vpaddlv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpaddlq_u16 (uint16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vpaddlv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpaddlq_u32 (uint32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vpaddlv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadal_s8 (int16x4_t __a, int8x8_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadal_s16 (int32x2_t __a, int16x4_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpadal_s32 (int64x1_t __a, int32x2_t __b)
+{
+  return (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadal_u8 (uint16x4_t __a, uint8x8_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpadalv8qi ((int16x4_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadal_u16 (uint32x2_t __a, uint16x4_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpadalv4hi ((int32x2_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vpadal_u32 (uint64x1_t __a, uint32x2_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vpadalv2si ((int64x1_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpadalq_s8 (int16x8_t __a, int8x16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpadalq_s16 (int32x4_t __a, int16x8_t __b)
+{
+  return (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpadalq_s32 (int64x2_t __a, int32x4_t __b)
+{
+  return (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpadalq_u8 (uint16x8_t __a, uint8x16_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vpadalv16qi ((int16x8_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpadalq_u16 (uint32x4_t __a, uint16x8_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vpadalv8hi ((int32x4_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpadalq_u32 (uint64x2_t __a, uint32x4_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vpadalv4si ((int64x2_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpmax_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpmax_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpmax_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmax_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpmax_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vpmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpmax_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpmax_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpmin_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpmin_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpmin_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmin_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpmin_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vpminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpmin_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpmin_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpminv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vshldi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vshldi (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqrshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqrshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshr_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshr_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshr_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshr_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshr_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshr_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshr_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshr_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshrq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshrq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshrq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshrq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshrq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshrq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshrq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshr_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshr_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshr_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshr_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshr_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshr_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshr_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshr_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrshrq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrshrq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrshrq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrshrq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrshrq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrshrq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrshrq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqrshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshrun_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshrun_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshrun_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshrun_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshl_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshl_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshl_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshl_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshl_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshl_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshl_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshlq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshlq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshlq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshlq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshlq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshlq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshlq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshlq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshl_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshl_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshl_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vqshl_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshl_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshl_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshl_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshl_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshl_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshl_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshl_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshl_ndi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqshlq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqshlq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqshlq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqshlq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshlq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshl_nv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshlq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshl_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshlq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshl_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshlq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshl_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshlu_n_s8 (int8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshlu_n_s16 (int16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshlu_n_s32 (int32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshlu_n_s64 (int64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshlu_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshluq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshluq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshluq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshluq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshll_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshll_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshll_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshll_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshll_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshll_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshll_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshll_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshll_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 4);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsri_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsli_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabs_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vabsv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabs_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vabsv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabs_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vabsv2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vabs_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vabsv2sf (__a, 3);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vabsv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vabsv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vabsv4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vabsq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vabsv4sf (__a, 3);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqabs_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqabs_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqabs_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vqabsv2si (__a, 1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqabsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqabsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqabsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vqabsv4si (__a, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vneg_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vnegv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vneg_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vnegv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vneg_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vnegv2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vneg_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vnegv2sf (__a, 3);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vnegq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vnegv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vnegq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vnegv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vnegq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vnegv4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vnegq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vnegv4sf (__a, 3);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqneg_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqneg_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqneg_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vqnegv2si (__a, 1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqnegq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqnegq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqnegq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vqnegv4si (__a, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmvn_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmvn_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmvn_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vmvnv2si (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmvn_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmvn_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmvn_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmvn_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmvnq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmvnq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmvnq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vmvnv4si (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmvnq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmvnq_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmvnq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmvnq_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcls_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vclsv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcls_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vclsv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcls_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vclsv2si (__a, 1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vclsv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vclsv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vclsv4si (__a, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vclz_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vclzv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vclz_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vclzv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vclz_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vclzv2si (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclz_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclz_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclz_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclzq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vclzv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclzq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vclzv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclzq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vclzv4si (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclzq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclzq_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclzq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcnt_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vcntv8qi (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcnt_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcnt_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcntq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vcntv16qi (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcntq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcntq_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrecpe_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrecpev2sf (__a, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrecpe_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrecpeq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrecpev4sf (__a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrecpeq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrsqrte_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsqrte_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrsqrteq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsqrteq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vget_lane_s8 (int8x8_t __a, const int __b)
+{
+  return (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vget_lane_s16 (int16x4_t __a, const int __b)
+{
+  return (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vget_lane_s32 (int32x2_t __a, const int __b)
+{
+  return (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vget_lane_f32 (float32x2_t __a, const int __b)
+{
+  return (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vget_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vget_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vget_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32_t)__builtin_neon_vget_lanev2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vget_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return (poly8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 2);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vget_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return (poly16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 2);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vget_lane_s64 (int64x1_t __a, const int __b)
+{
+  return (int64_t)__builtin_neon_vget_lanedi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vget_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vgetq_lane_s8 (int8x16_t __a, const int __b)
+{
+  return (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vgetq_lane_s16 (int16x8_t __a, const int __b)
+{
+  return (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vgetq_lane_s32 (int32x4_t __a, const int __b)
+{
+  return (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vgetq_lane_f32 (float32x4_t __a, const int __b)
+{
+  return (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vgetq_lane_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vgetq_lane_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vgetq_lane_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32_t)__builtin_neon_vget_lanev4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vgetq_lane_p8 (poly8x16_t __a, const int __b)
+{
+  return (poly8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 2);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vgetq_lane_p16 (poly16x8_t __a, const int __b)
+{
+  return (poly16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 2);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vgetq_lane_s64 (int64x2_t __a, const int __b)
+{
+  return (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vgetq_lane_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vcreate_p64 (uint64_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcreate_s8 (uint64_t __a)
+{
+  return (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcreate_s16 (uint64_t __a)
+{
+  return (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcreate_s32 (uint64_t __a)
+{
+  return (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcreate_s64 (uint64_t __a)
+{
+  return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcreate_f32 (uint64_t __a)
+{
+  return (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcreate_u8 (uint64_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcreate_u16 (uint64_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcreate_u32 (uint64_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcreate_u64 (uint64_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcreate_p8 (uint64_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vcreate_p16 (uint64_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_n_s8 (int8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_n_s16 (int16_t __a)
+{
+  return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_n_s32 (int32_t __a)
+{
+  return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_n_f32 (float32_t __a)
+{
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_n_u8 (uint8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_n_u16 (uint16_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_n_u32 (uint32_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_n_p8 (poly8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_n_p16 (poly16_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vdup_n_p64 (poly64_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_n_s64 (int64_t __a)
+{
+  return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_n_u64 (uint64_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vdupq_n_p64 (poly64_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_n_s8 (int8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_n_s16 (int16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_n_s32 (int32_t __a)
+{
+  return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_n_f32 (float32_t __a)
+{
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_n_u8 (uint8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_n_u16 (uint16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_n_u32 (uint32_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_n_p8 (poly8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_n_p16 (poly16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_n_s64 (int64_t __a)
+{
+  return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_n_u64 (uint64_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmov_n_s8 (int8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmov_n_s16 (int16_t __a)
+{
+  return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmov_n_s32 (int32_t __a)
+{
+  return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmov_n_f32 (float32_t __a)
+{
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmov_n_u8 (uint8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmov_n_u16 (uint16_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmov_n_u32 (uint32_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmov_n_p8 (poly8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vmov_n_p16 (poly16_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vmov_n_s64 (int64_t __a)
+{
+  return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vmov_n_u64 (uint64_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmovq_n_s8 (int8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovq_n_s16 (int16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovq_n_s32 (int32_t __a)
+{
+  return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmovq_n_f32 (float32_t __a)
+{
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmovq_n_u8 (uint8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovq_n_u16 (uint16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovq_n_u32 (uint32_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmovq_n_p8 (poly8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmovq_n_p16 (poly16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovq_n_s64 (int64_t __a)
+{
+  return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovq_n_u64 (uint64_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_lane_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_lane_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_lane_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_lane_f32 (float32x2_t __a, const int __b)
+{
+  return (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vdup_lane_p64 (poly64x1_t __a, const int __b)
+{
+  return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_lane_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vdup_lanedi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vdup_lanedi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_f32 (float32x2_t __a, const int __b)
+{
+  return (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_p64 (poly64x1_t __a, const int __b)
+{
+  return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b);
+}
+
+#endif
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcombine_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcombine_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcombine_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x4_t)__builtin_neon_vcombinev2si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcombine_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcombine_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vcombinedi ((int64x1_t) __a, (int64x1_t) __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vget_high_p64 (poly64x2_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_high_s8 (int8x16_t __a)
+{
+  return (int8x8_t)__builtin_neon_vget_highv16qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_high_s16 (int16x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vget_highv8hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_high_s32 (int32x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vget_highv4si (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_high_s64 (int64x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vget_highv2di (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_high_f32 (float32x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vget_highv4sf (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_high_u8 (uint8x16_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_high_u16 (uint16x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_high_u32 (uint32x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_high_u64 (uint64x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_high_p8 (poly8x16_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_high_p16 (poly16x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_low_s8 (int8x16_t __a)
+{
+  return (int8x8_t)__builtin_neon_vget_lowv16qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_low_s16 (int16x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vget_lowv8hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_low_s32 (int32x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_low_f32 (float32x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vget_lowv4sf (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_low_u8 (uint8x16_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_low_u16 (uint16x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_low_u32 (uint32x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_low_p8 (poly8x16_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_low_p16 (poly16x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vget_low_p64 (poly64x2_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_low_s64 (int64x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vget_lowv2di (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_low_u64 (uint64x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvt_s32_f32 (float32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_s32 (int32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vcvtv2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_u32 (uint32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vcvtv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvt_u32_f32 (float32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtq_s32_f32 (float32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_f32_s32 (int32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_f32_u32 (uint32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtq_u32_f32 (float32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
+}
+
+#if ((__ARM_FP & 0x2) != 0)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_f32 (float32x4_t __a)
+{
+  return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
+}
+
+#endif
+#if ((__ARM_FP & 0x2) != 0)
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvt_f32_f16 (float16x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
+}
+
+#endif
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvt_n_s32_f32 (float32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_n_f32_s32 (int32x2_t __a, const int __b)
+{
+  return (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
+{
+  return (float32x2_t)__builtin_neon_vcvt_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvt_n_u32_f32 (float32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
+{
+  return (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
+{
+  return (float32x4_t)__builtin_neon_vcvt_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmovn_s16 (int16x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmovn_s32 (int32x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vmovnv4si (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmovn_s64 (int64x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vmovnv2di (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmovn_u16 (uint16x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmovn_u32 (uint32x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmovn_u64 (uint64x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqmovn_s16 (int16x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqmovn_s32 (int32x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqmovn_s64 (int64x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqmovn_u16 (uint16x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vqmovnv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqmovn_u32 (uint32x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vqmovnv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqmovn_u64 (uint64x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vqmovnv2di ((int64x2_t) __a, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqmovun_s16 (int16x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqmovun_s32 (int32x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqmovun_s64 (int64x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovl_s8 (int8x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovl_s16 (int16x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovl_s32 (int32x2_t __a)
+{
+  return (int64x2_t)__builtin_neon_vmovlv2si (__a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovl_u8 (uint8x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vmovlv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovl_u16 (uint16x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vmovlv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovl_u32 (uint32x2_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vmovlv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl1_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl1_p8 (poly8x8_t __a, uint8x8_t __b)
+{
+  return (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl2_s8 (int8x8x2_t __a, int8x8_t __b)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
+  return (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
+  return (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
+  return (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl3_s8 (int8x8x3_t __a, int8x8_t __b)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
+  return (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
+  return (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
+  return (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl4_s8 (int8x8x4_t __a, int8x8_t __b)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
+  return (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
+  return (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
+  return (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c)
+{
+  return (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  return (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  return (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  return (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  return (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  return (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  return (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  return (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  return (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  return (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmlal_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint64x2_t)__builtin_neon_vmlal_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmlsl_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint64x2_t)__builtin_neon_vmlsl_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vmull_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vmull_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_n_f32 (float32x2_t __a, float32_t __b)
+{
+  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_n_f32 (float32x4_t __a, float32_t __b)
+{
+  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmull_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int64x2_t)__builtin_neon_vmull_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmull_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vmull_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlal_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlal_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlsl_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlsl_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vext_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vext_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vext_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vext_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vextdi (__a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vext_f32 (float32x2_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vextdi ((int64x1_t) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev64_s8 (int8x8_t __a)
+{
+  return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev64_s16 (int16x4_t __a)
+{
+  return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrev64_s32 (int32x2_t __a)
+{
+  return (int32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrev64_f32 (float32x2_t __a)
+{
+  return (float32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev64_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev64_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrev64_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev64_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev64_p16 (poly16x4_t __a)
+{
+  return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev64q_s8 (int8x16_t __a)
+{
+  return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev64q_s16 (int16x8_t __a)
+{
+  return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrev64q_s32 (int32x4_t __a)
+{
+  return (int32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrev64q_f32 (float32x4_t __a)
+{
+  return (float32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev64q_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev64q_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrev64q_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev64q_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev64q_p16 (poly16x8_t __a)
+{
+  return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev32_s8 (int8x8_t __a)
+{
+  return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev32_s16 (int16x4_t __a)
+{
+  return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev32_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev32_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev32_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev32_p16 (poly16x4_t __a)
+{
+  return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev32q_s8 (int8x16_t __a)
+{
+  return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev32q_s16 (int16x8_t __a)
+{
+  return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev32q_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev32q_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev32q_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev32q_p16 (poly16x8_t __a)
+{
+  return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev16_s8 (int8x8_t __a)
+{
+  return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev16_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev16_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev16q_s8 (int8x16_t __a)
+{
+  return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev16q_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev16q_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
+{
+  return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
+{
+  return (int64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
+{
+  return (uint64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
+{
+  return (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
+{
+  return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
+{
+  return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
+{
+  return (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
+{
+  return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vtrn_s8 (int8x8_t __a, int8x8_t __b)
+{
+  int8x8x2_t __rv;
+  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vtrn_s16 (int16x4_t __a, int16x4_t __b)
+{
+  int16x4x2_t __rv;
+  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  uint8x8x2_t __rv;
+  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  uint16x4x2_t __rv;
+  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly8x8x2_t __rv;
+  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  poly16x4x2_t __rv;
+  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vtrn_s32 (int32x2_t __a, int32x2_t __b)
+{
+  int32x2x2_t __rv;
+  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vtrn_f32 (float32x2_t __a, float32x2_t __b)
+{
+  float32x2x2_t __rv;
+  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  uint32x2x2_t __rv;
+  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  int8x16x2_t __rv;
+  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+  return __rv;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  int16x8x2_t __rv;
+  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  int32x4x2_t __rv;
+  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  float32x4x2_t __rv;
+  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16x2_t __rv;
+  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+  return __rv;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8x2_t __rv;
+  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4x2_t __rv;
+  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly8x16x2_t __rv;
+  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+  return __rv;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  poly16x8x2_t __rv;
+  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vzip_s8 (int8x8_t __a, int8x8_t __b)
+{
+  int8x8x2_t __rv;
+  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vzip_s16 (int16x4_t __a, int16x4_t __b)
+{
+  int16x4x2_t __rv;
+  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vzip_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  uint8x8x2_t __rv;
+  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vzip_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  uint16x4x2_t __rv;
+  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vzip_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly8x8x2_t __rv;
+  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vzip_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  poly16x4x2_t __rv;
+  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vzip_s32 (int32x2_t __a, int32x2_t __b)
+{
+  int32x2x2_t __rv;
+  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vzip_f32 (float32x2_t __a, float32x2_t __b)
+{
+  float32x2x2_t __rv;
+  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vzip_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  uint32x2x2_t __rv;
+  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vzipq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  int8x16x2_t __rv;
+  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+  return __rv;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vzipq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  int16x8x2_t __rv;
+  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vzipq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  int32x4x2_t __rv;
+  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vzipq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  float32x4x2_t __rv;
+  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16x2_t __rv;
+  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+  return __rv;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8x2_t __rv;
+  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4x2_t __rv;
+  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+  return __rv;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly8x16x2_t __rv;
+  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+  return __rv;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  poly16x8x2_t __rv;
+  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vuzp_s8 (int8x8_t __a, int8x8_t __b)
+{
+  int8x8x2_t __rv;
+  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vuzp_s16 (int16x4_t __a, int16x4_t __b)
+{
+  int16x4x2_t __rv;
+  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+  return __rv;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vuzp_s32 (int32x2_t __a, int32x2_t __b)
+{
+  int32x2x2_t __rv;
+  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vuzp_f32 (float32x2_t __a, float32x2_t __b)
+{
+  float32x2x2_t __rv;
+  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  uint8x8x2_t __rv;
+  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+  return __rv;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  uint16x4x2_t __rv;
+  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+  return __rv;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  uint32x2x2_t __rv;
+  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+  return __rv;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly8x8x2_t __rv;
+  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+  return __rv;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  poly16x4x2_t __rv;
+  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+  return __rv;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vuzpq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  int8x16x2_t __rv;
+  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+  return __rv;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  int16x8x2_t __rv;
+  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+  return __rv;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vuzpq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  int32x4x2_t __rv;
+  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+  return __rv;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vuzpq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  float32x4x2_t __rv;
+  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+  return __rv;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16x2_t __rv;
+  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+  return __rv;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8x2_t __rv;
+  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+  return __rv;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4x2_t __rv;
+  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+  return __rv;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly8x16x2_t __rv;
+  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+  return __rv;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  poly16x8x2_t __rv;
+  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+  return __rv;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vld1_p64 (const poly64_t * __a)
+{
+  return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_s8 (const int8_t * __a)
+{
+  return (int8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_s16 (const int16_t * __a)
+{
+  return (int16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_s32 (const int32_t * __a)
+{
+  return (int32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_s64 (const int64_t * __a)
+{
+  return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_f32 (const float32_t * __a)
+{
+  return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_u8 (const uint8_t * __a)
+{
+  return (uint8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_u16 (const uint16_t * __a)
+{
+  return (uint16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_u32 (const uint32_t * __a)
+{
+  return (uint32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_u64 (const uint64_t * __a)
+{
+  return (uint64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_p8 (const poly8_t * __a)
+{
+  return (poly8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_p16 (const poly16_t * __a)
+{
+  return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vld1q_p64 (const poly64_t * __a)
+{
+  return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_s8 (const int8_t * __a)
+{
+  return (int8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_s16 (const int16_t * __a)
+{
+  return (int16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_s32 (const int32_t * __a)
+{
+  return (int32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_s64 (const int64_t * __a)
+{
+  return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_f32 (const float32_t * __a)
+{
+  return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_u8 (const uint8_t * __a)
+{
+  return (uint8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_u16 (const uint16_t * __a)
+{
+  return (uint16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_u32 (const uint32_t * __a)
+{
+  return (uint32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_u64 (const uint64_t * __a)
+{
+  return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_p8 (const poly8_t * __a)
+{
+  return (poly8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_p16 (const poly16_t * __a)
+{
+  return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_lane_s16 (const int16_t * __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c);
+}
+
+#endif
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_dup_s8 (const int8_t * __a)
+{
+  return (int8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_dup_s16 (const int16_t * __a)
+{
+  return (int16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_dup_s32 (const int32_t * __a)
+{
+  return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_dup_f32 (const float32_t * __a)
+{
+  return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_dup_u8 (const uint8_t * __a)
+{
+  return (uint8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_dup_u16 (const uint16_t * __a)
+{
+  return (uint16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_dup_u32 (const uint32_t * __a)
+{
+  return (uint32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_dup_p8 (const poly8_t * __a)
+{
+  return (poly8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_dup_p16 (const poly16_t * __a)
+{
+  return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vld1_dup_p64 (const poly64_t * __a)
+{
+  return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_dup_s64 (const int64_t * __a)
+{
+  return (int64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_dup_u64 (const uint64_t * __a)
+{
+  return (uint64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_s8 (const int8_t * __a)
+{
+  return (int8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_s16 (const int16_t * __a)
+{
+  return (int16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_s32 (const int32_t * __a)
+{
+  return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_f32 (const float32_t * __a)
+{
+  return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_u8 (const uint8_t * __a)
+{
+  return (uint8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_u16 (const uint16_t * __a)
+{
+  return (uint16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_u32 (const uint32_t * __a)
+{
+  return (uint32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_p8 (const poly8_t * __a)
+{
+  return (poly8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_p16 (const poly16_t * __a)
+{
+  return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_p64 (const poly64_t * __a)
+{
+  return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
+}
+
+#endif
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_s64 (const int64_t * __a)
+{
+  return (int64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_u64 (const uint64_t * __a)
+{
+  return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p64 (poly64_t * __a, poly64x1_t __b)
+{
+  __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s8 (int8_t * __a, int8x8_t __b)
+{
+  __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s16 (int16_t * __a, int16x4_t __b)
+{
+  __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s32 (int32_t * __a, int32x2_t __b)
+{
+  __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s64 (int64_t * __a, int64x1_t __b)
+{
+  __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f32 (float32_t * __a, float32x2_t __b)
+{
+  __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u8 (uint8_t * __a, uint8x8_t __b)
+{
+  __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u16 (uint16_t * __a, uint16x4_t __b)
+{
+  __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u32 (uint32_t * __a, uint32x2_t __b)
+{
+  __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, (int32x2_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u64 (uint64_t * __a, uint64x1_t __b)
+{
+  __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p8 (poly8_t * __a, poly8x8_t __b)
+{
+  __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p16 (poly16_t * __a, poly16x4_t __b)
+{
+  __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p64 (poly64_t * __a, poly64x2_t __b)
+{
+  __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s8 (int8_t * __a, int8x16_t __b)
+{
+  __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s16 (int16_t * __a, int16x8_t __b)
+{
+  __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s32 (int32_t * __a, int32x4_t __b)
+{
+  __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s64 (int64_t * __a, int64x2_t __b)
+{
+  __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f32 (float32_t * __a, float32x4_t __b)
+{
+  __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u8 (uint8_t * __a, uint8x16_t __b)
+{
+  __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u16 (uint16_t * __a, uint16x8_t __b)
+{
+  __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u32 (uint32_t * __a, uint32x4_t __b)
+{
+  __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, (int32x4_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u64 (uint64_t * __a, uint64x2_t __b)
+{
+  __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p8 (poly8_t * __a, poly8x16_t __b)
+{
+  __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p16 (poly16_t * __a, poly16x8_t __b)
+{
+  __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_s8 (const int8_t * __a)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_s16 (const int16_t * __a)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_s32 (const int32_t * __a)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_f32 (const float32_t * __a)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_u8 (const uint8_t * __a)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_u16 (const uint16_t * __a)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_u32 (const uint32_t * __a)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_p8 (const poly8_t * __a)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_p16 (const poly16_t * __a)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
+vld2_p64 (const poly64_t * __a)
+{
+  union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+#endif
+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
+vld2_s64 (const int64_t * __a)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
+vld2_u64 (const uint64_t * __a)
+{
+  union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vld2q_s8 (const int8_t * __a)
+{
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vld2q_s16 (const int16_t * __a)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vld2q_s32 (const int32_t * __a)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vld2q_f32 (const float32_t * __a)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vld2q_u8 (const uint8_t * __a)
+{
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vld2q_u16 (const uint16_t * __a)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vld2q_u32 (const uint32_t * __a)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vld2q_p8 (const poly8_t * __a)
+{
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vld2q_p16 (const poly16_t * __a)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_s8 (const int8_t * __a)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_s16 (const int16_t * __a)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_s32 (const int32_t * __a)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_f32 (const float32_t * __a)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_u8 (const uint8_t * __a)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_u16 (const uint16_t * __a)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_u32 (const uint32_t * __a)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_p8 (const poly8_t * __a)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_p16 (const poly16_t * __a)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_p64 (const poly64_t * __a)
+{
+  union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+#endif
+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_s64 (const int64_t * __a)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_u64 (const uint64_t * __a)
+{
+  union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s8 (int8_t * __a, int8x8x2_t __b)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s16 (int16_t * __a, int16x4x2_t __b)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s32 (int32_t * __a, int32x2x2_t __b)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f32 (float32_t * __a, float32x2x2_t __b)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u8 (uint8_t * __a, uint8x8x2_t __b)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u16 (uint16_t * __a, uint16x4x2_t __b)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u32 (uint32_t * __a, uint32x2x2_t __b)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p8 (poly8_t * __a, poly8x8x2_t __b)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p16 (poly16_t * __a, poly16x4x2_t __b)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p64 (poly64_t * __a, poly64x1x2_t __b)
+{
+  union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s64 (int64_t * __a, int64x1x2_t __b)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u64 (uint64_t * __a, uint64x1x2_t __b)
+{
+  union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s8 (int8_t * __a, int8x16x2_t __b)
+{
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s16 (int16_t * __a, int16x8x2_t __b)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s32 (int32_t * __a, int32x4x2_t __b)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f32 (float32_t * __a, float32x4x2_t __b)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u8 (uint8_t * __a, uint8x16x2_t __b)
+{
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u16 (uint16_t * __a, uint16x8x2_t __b)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u32 (uint32_t * __a, uint32x4x2_t __b)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p8 (poly8_t * __a, poly8x16x2_t __b)
+{
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p16 (poly16_t * __a, poly16x8x2_t __b)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_s8 (const int8_t * __a)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_s16 (const int16_t * __a)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_s32 (const int32_t * __a)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_f32 (const float32_t * __a)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_u8 (const uint8_t * __a)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_u16 (const uint16_t * __a)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_u32 (const uint32_t * __a)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_p8 (const poly8_t * __a)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_p16 (const poly16_t * __a)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
+vld3_p64 (const poly64_t * __a)
+{
+  union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+#endif
+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
+vld3_s64 (const int64_t * __a)
+{
+  union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
+vld3_u64 (const uint64_t * __a)
+{
+  union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
+vld3q_s8 (const int8_t * __a)
+{
+  union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
+vld3q_s16 (const int16_t * __a)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
+vld3q_s32 (const int32_t * __a)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
+vld3q_f32 (const float32_t * __a)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
+vld3q_u8 (const uint8_t * __a)
+{
+  union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
+vld3q_u16 (const uint16_t * __a)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
+vld3q_u32 (const uint32_t * __a)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
+vld3q_p8 (const poly8_t * __a)
+{
+  union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
+vld3q_p16 (const poly16_t * __a)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
+vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
+vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
+vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_s8 (const int8_t * __a)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_s16 (const int16_t * __a)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_s32 (const int32_t * __a)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_f32 (const float32_t * __a)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_u8 (const uint8_t * __a)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_u16 (const uint16_t * __a)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_u32 (const uint32_t * __a)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_p8 (const poly8_t * __a)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_p16 (const poly16_t * __a)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_p64 (const poly64_t * __a)
+{
+  union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+#endif
+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_s64 (const int64_t * __a)
+{
+  union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_u64 (const uint64_t * __a)
+{
+  union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s8 (int8_t * __a, int8x8x3_t __b)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s16 (int16_t * __a, int16x4x3_t __b)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s32 (int32_t * __a, int32x2x3_t __b)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f32 (float32_t * __a, float32x2x3_t __b)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u8 (uint8_t * __a, uint8x8x3_t __b)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u16 (uint16_t * __a, uint16x4x3_t __b)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u32 (uint32_t * __a, uint32x2x3_t __b)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p8 (poly8_t * __a, poly8x8x3_t __b)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p16 (poly16_t * __a, poly16x4x3_t __b)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p64 (poly64_t * __a, poly64x1x3_t __b)
+{
+  union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s64 (int64_t * __a, int64x1x3_t __b)
+{
+  union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u64 (uint64_t * __a, uint64x1x3_t __b)
+{
+  union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s8 (int8_t * __a, int8x16x3_t __b)
+{
+  union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s16 (int16_t * __a, int16x8x3_t __b)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s32 (int32_t * __a, int32x4x3_t __b)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f32 (float32_t * __a, float32x4x3_t __b)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u8 (uint8_t * __a, uint8x16x3_t __b)
+{
+  union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u16 (uint16_t * __a, uint16x8x3_t __b)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u32 (uint32_t * __a, uint32x4x3_t __b)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p8 (poly8_t * __a, poly8x16x3_t __b)
+{
+  union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p16 (poly16_t * __a, poly16x8x3_t __b)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_s8 (const int8_t * __a)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_s16 (const int16_t * __a)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_s32 (const int32_t * __a)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_f32 (const float32_t * __a)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_u8 (const uint8_t * __a)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_u16 (const uint16_t * __a)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_u32 (const uint32_t * __a)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_p8 (const poly8_t * __a)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_p16 (const poly16_t * __a)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
+vld4_p64 (const poly64_t * __a)
+{
+  union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+#endif
+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
+vld4_s64 (const int64_t * __a)
+{
+  union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
+vld4_u64 (const uint64_t * __a)
+{
+  union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
+vld4q_s8 (const int8_t * __a)
+{
+  union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
+vld4q_s16 (const int16_t * __a)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
+vld4q_s32 (const int32_t * __a)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
+vld4q_f32 (const float32_t * __a)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
+vld4q_u8 (const uint8_t * __a)
+{
+  union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
+vld4q_u16 (const uint16_t * __a)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
+vld4q_u32 (const uint32_t * __a)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
+vld4q_p8 (const poly8_t * __a)
+{
+  union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
+vld4q_p16 (const poly16_t * __a)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
+vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
+vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
+vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_s8 (const int8_t * __a)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_s16 (const int16_t * __a)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_s32 (const int32_t * __a)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_f32 (const float32_t * __a)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_u8 (const uint8_t * __a)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_u16 (const uint16_t * __a)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_u32 (const uint32_t * __a)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_p8 (const poly8_t * __a)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_p16 (const poly16_t * __a)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_p64 (const poly64_t * __a)
+{
+  union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+#endif
+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_s64 (const int64_t * __a)
+{
+  union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_u64 (const uint64_t * __a)
+{
+  union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s8 (int8_t * __a, int8x8x4_t __b)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s16 (int16_t * __a, int16x4x4_t __b)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s32 (int32_t * __a, int32x2x4_t __b)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f32 (float32_t * __a, float32x2x4_t __b)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u8 (uint8_t * __a, uint8x8x4_t __b)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u16 (uint16_t * __a, uint16x4x4_t __b)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u32 (uint32_t * __a, uint32x2x4_t __b)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p8 (poly8_t * __a, poly8x8x4_t __b)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p16 (poly16_t * __a, poly16x4x4_t __b)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p64 (poly64_t * __a, poly64x1x4_t __b)
+{
+  union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+#endif
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s64 (int64_t * __a, int64x1x4_t __b)
+{
+  union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u64 (uint64_t * __a, uint64x1x4_t __b)
+{
+  union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s8 (int8_t * __a, int8x16x4_t __b)
+{
+  union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s16 (int16_t * __a, int16x8x4_t __b)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s32 (int32_t * __a, int32x4x4_t __b)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f32 (float32_t * __a, float32x4x4_t __b)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u8 (uint8_t * __a, uint8x16x4_t __b)
+{
+  union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u16 (uint16_t * __a, uint16x8x4_t __b)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u32 (uint32_t * __a, uint32x4x4_t __b)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p8 (poly8_t * __a, poly8x16x4_t __b)
+{
+  union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p16 (poly16_t * __a, poly16x8x4_t __b)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vand_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vand_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vand_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vand_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vand_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vand_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vand_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vand_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vandq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vandq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vandq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vandq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vandq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vandq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vandq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vandq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vorr_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vorr_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vorr_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vorr_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vorr_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vorr_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vorr_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vorr_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vorrq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vorrq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vorrq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vorrq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+veor_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+veor_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+veor_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+veor_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+veor_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+veor_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+veor_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_veordi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+veor_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+veorq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+veorq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+veorq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+veorq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+veorq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+veorq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+veorq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+veorq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vbic_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vbic_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vbic_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vbic_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vbic_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vbic_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vbic_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vbic_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vbicq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vbicq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vbicq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vbicq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vorn_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vorn_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vorn_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vorn_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vorn_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vorn_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vorn_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vorn_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vornq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vornq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vornq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vornq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vornq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vornq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vornq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vornq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_p16 (poly16x4_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_f32 (float32x2_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_p64 (poly64x1_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+#endif
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s64 (int64x1_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u64 (uint64x1_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s8 (int8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s16 (int16x4_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s32 (int32x2_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u8 (uint8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u16 (uint16x4_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u32 (uint32x2_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_p8 (poly8x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_f32 (float32x2_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_p64 (poly64x1_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+#endif
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s64 (int64x1_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u64 (uint64x1_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s8 (int8x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s16 (int16x4_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s32 (int32x2_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u8 (uint8x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u16 (uint16x4_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u32 (uint32x2_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p8 (poly8x8_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p16 (poly16x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p64 (poly64x1_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a);
+}
+
+#endif
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s64 (int64x1_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u64 (uint64x1_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfdi ((int64x1_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s8 (int8x8_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s16 (int16x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s32 (int32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u8 (uint8x8_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u16 (uint16x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u32 (uint32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_p8 (poly8x8_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_p16 (poly16x4_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_f32 (float32x2_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s64 (int64x1_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdidi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u64 (uint64x1_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s8 (int8x8_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s16 (int16x4_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s32 (int32x2_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv2si (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u8 (uint8x8_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u16 (uint16x4_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u32 (uint32x2_t __a)
+{
+  return (poly64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p8 (poly8x8_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p16 (poly16x4_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_f32 (float32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p64 (poly64x1_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdidi (__a);
+}
+
+#endif
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u64 (uint64x1_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s8 (int8x8_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv8qi (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s16 (int16x4_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv4hi (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s32 (int32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv2si (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u8 (uint8x8_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u16 (uint16x4_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u32 (uint32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p8 (poly8x8_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p16 (poly16x4_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_f32 (float32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p64 (poly64x1_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a);
+}
+
+#endif
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s64 (int64x1_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s8 (int8x8_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s16 (int16x4_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s32 (int32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv2si (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u8 (uint8x8_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u16 (uint16x4_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u32 (uint32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p8 (poly8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p16 (poly16x4_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_f32 (float32x2_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p64 (poly64x1_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+#endif
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s64 (int64x1_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u64 (uint64x1_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s16 (int16x4_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s32 (int32x2_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u8 (uint8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u16 (uint16x4_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u32 (uint32x2_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p8 (poly8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p16 (poly16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_f32 (float32x2_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p64 (poly64x1_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+#endif
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s64 (int64x1_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u64 (uint64x1_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s8 (int8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s32 (int32x2_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u8 (uint8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u16 (uint16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u32 (uint32x2_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p8 (poly8x8_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p16 (poly16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_f32 (float32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p64 (poly64x1_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
+}
+
+#endif
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s64 (int64x1_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u64 (uint64x1_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s8 (int8x8_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s16 (int16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u8 (uint8x8_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u16 (uint16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u32 (uint32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p8 (poly8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p16 (poly16x4_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_f32 (float32x2_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p64 (poly64x1_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+#endif
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s64 (int64x1_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u64 (uint64x1_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s8 (int8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s16 (int16x4_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s32 (int32x2_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u16 (uint16x4_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u32 (uint32x2_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p8 (poly8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p16 (poly16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_f32 (float32x2_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p64 (poly64x1_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+#endif
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s64 (int64x1_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u64 (uint64x1_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s8 (int8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s16 (int16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s32 (int32x2_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u8 (uint8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u32 (uint32x2_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p8 (poly8x8_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p16 (poly16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_f32 (float32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p64 (poly64x1_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
+}
+
+#endif
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s64 (int64x1_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u64 (uint64x1_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s8 (int8x8_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s16 (int16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s32 (int32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u8 (uint8x8_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u16 (uint16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_p16 (poly16x8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_f32 (float32x4_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_p64 (poly64x2_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_p128 (poly128_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s64 (int64x2_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u64 (uint64x2_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s8 (int8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s16 (int16x8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s32 (int32x4_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u8 (uint8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u16 (uint16x8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u32 (uint32x4_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_p8 (poly8x16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_f32 (float32x4_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_p64 (poly64x2_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_p128 (poly128_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s64 (int64x2_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u64 (uint64x2_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s8 (int8x16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s16 (int16x8_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s32 (int32x4_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u8 (uint8x16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u16 (uint16x8_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u32 (uint32x4_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p8 (poly8x16_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p16 (poly16x8_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p64 (poly64x2_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p128 (poly128_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s64 (int64x2_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u64 (uint64x2_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s8 (int8x16_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s16 (int16x8_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s32 (int32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u8 (uint8x16_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u16 (uint16x8_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u32 (uint32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_p8 (poly8x16_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_p16 (poly16x8_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_f32 (float32x4_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_p128 (poly128_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s64 (int64x2_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div2di (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u64 (uint64x2_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s8 (int8x16_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s16 (int16x8_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s32 (int32x4_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div4si (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u8 (uint8x16_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u16 (uint16x8_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u32 (uint32x4_t __a)
+{
+  return (poly64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_p8 (poly8x16_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_p16 (poly16x8_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_f32 (float32x4_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv4sf (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_p64 (poly64x2_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_s64 (int64x2_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv2di (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_u64 (uint64x2_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_s8 (int8x16_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv16qi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_s16 (int16x8_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv8hi (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_s32 (int32x4_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv4si (__a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_u8 (uint8x16_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_u16 (uint16x8_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vreinterpretq_p128_u32 (uint32x4_t __a)
+{
+  return (poly128_t)__builtin_neon_vreinterprettiv4si ((int32x4_t) __a);
+}
+
+#endif
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p8 (poly8x16_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p16 (poly16x8_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_f32 (float32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p64 (poly64x2_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p128 (poly128_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u64 (uint64x2_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s8 (int8x16_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s16 (int16x8_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s32 (int32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u8 (uint8x16_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u16 (uint16x8_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u32 (uint32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p8 (poly8x16_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p16 (poly16x8_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f32 (float32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p64 (poly64x2_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p128 (poly128_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s64 (int64x2_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s8 (int8x16_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s16 (int16x8_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s32 (int32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u8 (uint8x16_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u16 (uint16x8_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u32 (uint32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p8 (poly8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p16 (poly16x8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_f32 (float32x4_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p64 (poly64x2_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p128 (poly128_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s64 (int64x2_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u64 (uint64x2_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s16 (int16x8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s32 (int32x4_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u8 (uint8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u16 (uint16x8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u32 (uint32x4_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p8 (poly8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p16 (poly16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_f32 (float32x4_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p64 (poly64x2_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p128 (poly128_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s64 (int64x2_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u64 (uint64x2_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s8 (int8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s32 (int32x4_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u8 (uint8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u16 (uint16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u32 (uint32x4_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p8 (poly8x16_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p16 (poly16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_f32 (float32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p64 (poly64x2_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p128 (poly128_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s64 (int64x2_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u64 (uint64x2_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s8 (int8x16_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s16 (int16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u8 (uint8x16_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u16 (uint16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u32 (uint32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p8 (poly8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p16 (poly16x8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_f32 (float32x4_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p64 (poly64x2_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p128 (poly128_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s64 (int64x2_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u64 (uint64x2_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s8 (int8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s16 (int16x8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s32 (int32x4_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u16 (uint16x8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u32 (uint32x4_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p8 (poly8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p16 (poly16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_f32 (float32x4_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p64 (poly64x2_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p128 (poly128_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s64 (int64x2_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u64 (uint64x2_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s8 (int8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s16 (int16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s32 (int32x4_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u8 (uint8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u32 (uint32x4_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p8 (poly8x16_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p16 (poly16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_f32 (float32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a);
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p64 (poly64x2_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
+}
+
+#endif
+#ifdef __ARM_FEATURE_CRYPTO
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p128 (poly128_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a);
+}
+
+#endif
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s64 (int64x2_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u64 (uint64x2_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s8 (int8x16_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s16 (int16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s32 (int32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u8 (uint8x16_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u16 (uint16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vldrq_p128 (poly128_t const * __ptr)
+{
+#ifdef __ARM_BIG_ENDIAN
+  poly64_t* __ptmp = (poly64_t*) __ptr;
+  poly64_t __d0 = vld1_p64 (__ptmp);
+  poly64_t __d1 = vld1_p64 (__ptmp + 1);
+  return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0));
+#else
+  return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr));
+#endif
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vstrq_p128 (poly128_t * __ptr, poly128_t __val)
+{
+#ifdef __ARM_BIG_ENDIAN
+  poly64x2_t __tmp = vreinterpretq_p64_p128 (__val);
+  poly64_t __d0 = vget_high_p64 (__tmp);
+  poly64_t __d1 = vget_low_p64 (__tmp);
+  vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1));
+#else
+  vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val));
+#endif
+}
+
+/* The vceq_p64 intrinsic does not map to a single instruction.
+   Instead we emulate it by performing a 32-bit variant of the vceq
+   and applying a pairwise min reduction to the result.
+   vceq_u32 will produce two 32-bit halves, each of which will contain either
+   all ones or all zeros depending on whether the corresponding 32-bit
+   halves of the poly64_t were equal.  The whole poly64_t values are equal
+   if and only if both halves are equal, i.e. vceq_u32 returns all ones.
+   If the result is all zeroes for any half then the whole result is zeroes.
+   This is what the pairwise min reduction achieves.  */
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  uint32x2_t __t_a = vreinterpret_u32_p64 (__a);
+  uint32x2_t __t_b = vreinterpret_u32_p64 (__b);
+  uint32x2_t __c = vceq_u32 (__t_a, __t_b);
+  uint32x2_t __m = vpmin_u32 (__c, __c);
+  return vreinterpret_u64_u32 (__m);
+}
+
+/* The vtst_p64 intrinsic does not map to a single instruction.
+   We emulate it in way similar to vceq_p64 above but here we do
+   a reduction with max since if any two corresponding bits
+   in the two poly64_t's match, then the whole result must be all ones.  */
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  uint32x2_t __t_a = vreinterpret_u32_p64 (__a);
+  uint32x2_t __t_b = vreinterpret_u32_p64 (__b);
+  uint32x2_t __c = vtst_u32 (__t_a, __t_b);
+  uint32x2_t __m = vpmax_u32 (__c, __c);
+  return vreinterpret_u64_u32 (__m);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
+{
+  return __builtin_arm_crypto_aese (__data, __key);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaesdq_u8 (uint8x16_t __data, uint8x16_t __key)
+{
+  return __builtin_arm_crypto_aesd (__data, __key);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaesmcq_u8 (uint8x16_t __data)
+{
+  return __builtin_arm_crypto_aesmc (__data);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaesimcq_u8 (uint8x16_t __data)
+{
+  return __builtin_arm_crypto_aesimc (__data);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vsha1h_u32 (uint32_t __hash_e)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  __t = __builtin_arm_crypto_sha1h (__t);
+  return vgetq_lane_u32 (__t, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11)
+{
+  return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15)
+{
+  return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
+{
+  return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
+{
+  return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7)
+{
+  return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15)
+{
+  return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15);
+}
+
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vmull_p64 (poly64_t __a, poly64_t __b)
+{
+  return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b);
+}
+
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+  poly64_t __t1 = vget_high_p64 (__a);
+  poly64_t __t2 = vget_high_p64 (__b);
+
+  return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2);
+}
+
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
+#endif
diff --git a/gcc-4.9/gcc/config/arm/arm_neon_builtins.def b/gcc-4.9/gcc/config/arm/arm_neon_builtins.def
new file mode 100644
index 000000000..a00951ab6
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/arm_neon_builtins.def
@@ -0,0 +1,212 @@
+/* NEON builtin definitions for ARM.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+VAR10 (BINOP, vadd,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
+VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
+VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
+VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
+VAR2 (TERNOP, vfma, v2sf, v4sf),
+VAR2 (TERNOP, vfms, v2sf, v4sf),
+VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
+VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
+VAR2 (TERNOP, vqdmlal, v4hi, v2si),
+VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
+VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
+VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
+VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
+VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
+VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
+VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
+VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
+VAR2 (BINOP, vqdmull, v4hi, v2si),
+VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
+VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
+VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
+VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
+VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
+VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
+VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
+VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR2 (BINOP, vcage, v2sf, v4sf),
+VAR2 (BINOP, vcagt, v2sf, v4sf),
+VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
+VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
+VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
+VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
+VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
+VAR2 (BINOP, vrecps, v2sf, v4sf),
+VAR2 (BINOP, vrsqrts, v2sf, v4sf),
+VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR2 (UNOP, vcnt, v8qi, v16qi),
+VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
+VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
+VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  /* FIXME: vget_lane supports more variants than this!  */
+VAR10 (GETLANE, vget_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (SETLANE, vset_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
+VAR10 (DUP, vdup_n,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (DUPLANE, vdup_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
+VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
+VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
+VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
+VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
+VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
+VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
+VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
+VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
+VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
+VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
+VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
+VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
+VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
+VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
+VAR10 (BINOP, vext,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
+VAR2 (UNOP, vrev16, v8qi, v16qi),
+VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
+VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
+VAR10 (SELECT, vbsl,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR2 (RINT, vrintn, v2sf, v4sf),
+VAR2 (RINT, vrinta, v2sf, v4sf),
+VAR2 (RINT, vrintp, v2sf, v4sf),
+VAR2 (RINT, vrintm, v2sf, v4sf),
+VAR2 (RINT, vrintz, v2sf, v4sf),
+VAR2 (RINT, vrintx, v2sf, v4sf),
+VAR1 (VTBL, vtbl1, v8qi),
+VAR1 (VTBL, vtbl2, v8qi),
+VAR1 (VTBL, vtbl3, v8qi),
+VAR1 (VTBL, vtbl4, v8qi),
+VAR1 (VTBX, vtbx1, v8qi),
+VAR1 (VTBX, vtbx2, v8qi),
+VAR1 (VTBX, vtbx3, v8qi),
+VAR1 (VTBX, vtbx4, v8qi),
+VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
+VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
+VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
+VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
+VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
+VAR6 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di, ti),
+VAR6 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di, ti),
+VAR6 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti),
+VAR6 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti),
+VAR6 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti),
+VAR6 (REINTERP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti),
+VAR10 (LOAD1, vld1,
+         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (LOAD1LANE, vld1_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (LOAD1, vld1_dup,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (STORE1, vst1,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (STORE1LANE, vst1_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR9 (LOADSTRUCT,
+	vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+VAR7 (LOADSTRUCTLANE, vld2_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
+VAR9 (STORESTRUCT, vst2,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+VAR7 (STORESTRUCTLANE, vst2_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR9 (LOADSTRUCT,
+	vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+VAR7 (LOADSTRUCTLANE, vld3_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
+VAR9 (STORESTRUCT, vst3,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+VAR7 (STORESTRUCTLANE, vst3_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR9 (LOADSTRUCT, vld4,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+VAR7 (LOADSTRUCTLANE, vld4_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
+VAR9 (STORESTRUCT, vst4,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+VAR7 (STORESTRUCTLANE, vst4_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+VAR10 (LOGICBINOP, vand,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (LOGICBINOP, vorr,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (BINOP, veor,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (LOGICBINOP, vbic,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR10 (LOGICBINOP, vorn,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h
new file mode 100644
index 000000000..bc223f8e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/bpabi.h
@@ -0,0 +1,163 @@
+/* Configuration file for ARM BPABI targets.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC   
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Use the AAPCS ABI by default.  */
+#define ARM_DEFAULT_ABI ARM_ABI_AAPCS
+
+/* Assume that AAPCS ABIs should adhere to the full BPABI.  */ 
+#define TARGET_BPABI (TARGET_AAPCS_BASED)
+
+/* BPABI targets use EABI frame unwinding tables.  */
+#undef ARM_UNWIND_INFO
+#define ARM_UNWIND_INFO 1
+
+/* Section 4.1 of the AAPCS requires the use of VFP format.  */
+#undef  FPUTYPE_DEFAULT
+#define FPUTYPE_DEFAULT "vfp"
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT MASK_BIG_END
+#else
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+/* EABI targets should enable interworking by default.  */
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_INTERWORK | TARGET_ENDIAN_DEFAULT)
+
+/* The ARM BPABI functions return a boolean; they use no special
+   calling convention.  */
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) TARGET_BPABI
+
+/* The BPABI integer comparison routines return { -1, 0, 1 }.  */
+#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
+
+#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\
+  "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
+
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define BE8_LINK_SPEC \
+  " %{!mlittle-endian:%{march=armv7-a|mcpu=cortex-a5    \
+   |mcpu=cortex-a7                                      \
+   |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15       \
+   |mcpu=cortex-a12					\
+   |mcpu=cortex-a15.cortex-a7				\
+   |mcpu=marvell-pj4					\
+   |mcpu=cortex-a53					\
+   |mcpu=cortex-a57					\
+   |mcpu=cortex-a57.cortex-a53				\
+   |mcpu=generic-armv7-a                                \
+   |march=armv7ve	                                \
+   |march=armv7-m|mcpu=cortex-m3                        \
+   |march=armv7e-m|mcpu=cortex-m4                       \
+   |march=armv6-m|mcpu=cortex-m0                        \
+   |march=armv8-a					\
+   :%{!r:--be8}}}"
+#else
+#define BE8_LINK_SPEC \
+  " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5        \
+   |mcpu=cortex-a7                                      \
+   |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15       \
+   |mcpu=cortex-a12					\
+   |mcpu=cortex-a15.cortex-a7				\
+   |mcpu=cortex-a53					\
+   |mcpu=cortex-a57					\
+   |mcpu=cortex-a57.cortex-a53				\
+   |mcpu=marvell-pj4					\
+   |mcpu=generic-armv7-a                                \
+   |march=armv7ve	                                \
+   |march=armv7-m|mcpu=cortex-m3                        \
+   |march=armv7e-m|mcpu=cortex-m4                       \
+   |march=armv6-m|mcpu=cortex-m0                        \
+   |march=armv8-a					\
+   :%{!r:--be8}}}"
+#endif
+
+/* Tell the assembler to build BPABI binaries.  */
+#undef  SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC \
+  "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
+
+#ifndef SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC ""
+#endif
+
+/* Split out the EABI common values so other targets can use it.  */
+#define EABI_LINK_SPEC \
+  TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC
+
+/* The generic link spec in elf.h does not support shared libraries.  */
+#define BPABI_LINK_SPEC \
+  "%{mbig-endian:-EB} %{mlittle-endian:-EL} "		\
+  "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} "	\
+  "-X" SUBTARGET_EXTRA_LINK_SPEC EABI_LINK_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC BPABI_LINK_SPEC
+
+/* The BPABI requires that we always use an out-of-line implementation
+   of RTTI comparison, even if the target supports weak symbols,
+   because the same object file might be used on a target that does
+   not support merging symbols across DLL boundaries.  This macro is
+   broken out separately so that it can be used within
+   TARGET_OS_CPP_BUILTINS in configuration files for systems based on
+   the BPABI.  */
+#define TARGET_BPABI_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0");	\
+    }							\
+  while (false)
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() \
+  TARGET_BPABI_CPP_BUILTINS()
+
+/* The BPABI specifies the use of .{init,fini}_array.  Therefore, we
+   do not want GCC to put anything into the .{init,fini} sections.  */
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#define INIT_ARRAY_SECTION_ASM_OP ARM_EABI_CTORS_SECTION_OP
+#define FINI_ARRAY_SECTION_ASM_OP ARM_EABI_DTORS_SECTION_OP
+
+/* The legacy _mcount implementation assumes r11 points to a
+    4-word APCS frame.  This is generally not true for EABI targets,
+    particularly not in Thumb mode.  We assume the mcount
+    implementation does not require a counter variable (No Counter).
+    Note that __gnu_mcount_nc will be entered with a misaligned stack.
+    This is OK because it uses a special calling convention anyway.  */
+
+#undef  NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+#undef  ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)  			\
+{									\
+  fprintf (STREAM, "\tpush\t{lr}\n");					\
+  fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n");				\
+}
+
+#undef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+
+/* __gnu_mcount_nc restores the original LR value before returning.  Ensure
+   that there is no unnecessary hook set up.  */
+#undef PROFILE_HOOK
diff --git a/gcc-4.9/gcc/config/arm/coff.h b/gcc-4.9/gcc/config/arm/coff.h
new file mode 100644
index 000000000..7deb23898
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/coff.h
@@ -0,0 +1,82 @@
+/* Definitions of target machine for GNU compiler.
+   For ARM with COFF object format.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Doug Evans (devans@cygnus.com).
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Note - it is important that this definition matches the one in tcoff.h.  */
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+
+/* Run-time Target Specification.  */
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+  { "marm", "mlittle-endian", "mfloat-abi=soft", "mno-thumb-interwork" }
+#endif
+
+/* This is COFF, but prefer stabs.  */
+#define SDB_DEBUGGING_INFO 1
+
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+
+#define TARGET_ASM_FILE_START_APP_OFF true
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_coff_asm_named_section
+
+/* Support the ctors/dtors and other sections.  */
+
+#undef INIT_SECTION_ASM_OP
+
+/* Define this macro if jump tables (for `tablejump' insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.  */
+/* We put ARM and Thumb-2 jump tables in the text section, because it makes
+   the code more efficient, but for Thumb-1 it's better to put them out of
+   band unless we are generating compressed tables.  */
+#define JUMP_TABLES_IN_TEXT_SECTION					\
+   (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic)))
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section .rdata"
+#undef  CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section .ctors,\"x\""
+#undef  DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP	"\t.section .dtors,\"x\""
+
+/* Support the ctors/dtors sections for g++.  */
+
+/* __CTOR_LIST__ and __DTOR_LIST__ must be defined by the linker script.  */
+#define CTOR_LISTS_DEFINED_EXTERNALLY
+
+#undef DO_GLOBAL_CTORS_BODY
+#undef DO_GLOBAL_DTORS_BODY
+
+/* The ARM development system defines __main.  */
+#define NAME__MAIN  "__gccmain"
+#define SYMBOL__MAIN __gccmain
+
+#define SUPPORTS_INIT_PRIORITY 0
diff --git a/gcc-4.9/gcc/config/arm/constraints.md b/gcc-4.9/gcc/config/arm/constraints.md
new file mode 100644
index 000000000..85dd116ce
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/constraints.md
@@ -0,0 +1,438 @@
+;; Constraint definitions for ARM and Thumb
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following register constraints have been used:
+;; - in ARM/Thumb-2 state: t, w, x, y, z
+;; - in Thumb state: h, b
+;; - in both states: l, c, k, q, US
+;; In ARM state, 'l' is an alias for 'r'
+;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
+
+;; The following normal constraints have been used:
+;; in ARM/Thumb-2 state: G, I, j, J, K, L, M
+;; in Thumb-1 state: I, J, K, L, M, N, O
+;; 'H' was previously used for FPA.
+
+;; The following multi-letter normal constraints have been used:
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Do, Dv, Dy, Di, Dt, Dp, Dz
+;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
+;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
+
+;; The following memory constraints have been used:
+;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+;; in ARM state: Uq
+;; in Thumb state: Uu, Uw
+
+
+(define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS"
+ "The VFP registers @code{s0}-@code{s31}.")
+
+(define_register_constraint "w"
+  "TARGET_32BIT ? (TARGET_VFPD32 ? VFP_REGS : VFP_LO_REGS) : NO_REGS"
+ "The VFP registers @code{d0}-@code{d15}, or @code{d0}-@code{d31} for VFPv3.")
+
+(define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS"
+ "The VFP registers @code{d0}-@code{d7}.")
+
+(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS"
+ "The Intel iWMMX co-processor registers.")
+
+(define_register_constraint "z"
+ "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS"
+ "The Intel iWMMX GR registers.")
+
+(define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS"
+ "In Thumb state the core registers @code{r0}-@code{r7}.")
+
+(define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS"
+ "In Thumb state the core registers @code{r8}-@code{r15}.")
+
+(define_constraint "j"
+ "A constant suitable for a MOVW instruction. (ARM/Thumb-2)"
+ (and (match_test "TARGET_32BIT && arm_arch_thumb2")
+      (ior (match_code "high")
+	   (and (match_code "const_int")
+                (match_test "(ival & 0xffff0000) == 0")))))
+
+(define_constraint "Pj"
+ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
+ (and (match_code "const_int")
+      (and (match_test "TARGET_THUMB2")
+	   (match_test "(ival & 0xfffff000) == 0"))))
+
+(define_constraint "PJ"
+ "@internal A constant that satisfies the Pj constrant if negated."
+ (and (match_code "const_int")
+      (and (match_test "TARGET_THUMB2")
+	   (match_test "((-ival) & 0xfffff000) == 0"))))
+
+(define_register_constraint "k" "STACK_REG"
+ "@internal The stack register.")
+
+(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS"
+  "@internal In ARM state with LDRD support, core registers, otherwise general registers.")
+
+(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS"
+ "@internal
+  Thumb only.  The union of the low registers and the stack register.")
+
+(define_register_constraint "c" "CC_REG"
+ "@internal The condition code register.")
+
+(define_register_constraint "Cs" "CALLER_SAVE_REGS"
+ "@internal The caller save registers.  Useful for sibcalls.")
+
+(define_constraint "I"
+ "In ARM/Thumb-2 state a constant that can be used as an immediate value in a
+  Data Processing instruction.  In Thumb-1 state a constant in the range
+  0-255."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? const_ok_for_arm (ival)
+		   : ival >= 0 && ival <= 255")))
+
+(define_constraint "J"
+ "In ARM/Thumb-2 state a constant in the range @minus{}4095-4095.  In Thumb-1
+  state a constant in the range @minus{}255-@minus{}1."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? (ival >= -4095 && ival <= 4095)
+		   : (ival >= -255 && ival <= -1)")))
+
+(define_constraint "K"
+ "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if
+  inverted.  In Thumb-1 state a constant that satisfies the @code{I}
+  constraint multiplied by any power of 2."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? const_ok_for_arm (~ival)
+		   : thumb_shiftable_const (ival)")))
+
+(define_constraint "L"
+ "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if
+  negated.  In Thumb-1 state a constant in the range @minus{}7-7."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? const_ok_for_arm (-ival)
+		   : (ival >= -7 && ival <= 7)")))
+
+;; The ARM state version is internal...
+;; @internal In ARM/Thumb-2 state a constant in the range 0-32 or any
+;; power of 2.
+(define_constraint "M"
+ "In Thumb-1 state a constant that is a multiple of 4 in the range 0-1020."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? ((ival >= 0 && ival <= 32)
+				 || (((ival & (ival - 1)) & 0xFFFFFFFF) == 0))
+		   : ival >= 0 && ival <= 1020 && (ival & 3) == 0")))
+
+(define_constraint "N"
+ "Thumb-1 state a constant in the range 0-31."
+ (and (match_code "const_int")
+      (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)")))
+
+(define_constraint "O"
+ "In Thumb-1 state a constant that is a multiple of 4 in the range
+  @minus{}508-508."
+ (and (match_code "const_int")
+      (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508
+		   && ((ival & 3) == 0)")))
+
+(define_constraint "Pa"
+  "@internal In Thumb-1 state a constant in the range -510 to +510"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510
+		    && (ival > 255 || ival < -255)")))
+
+(define_constraint "Pb"
+  "@internal In Thumb-1 state a constant in the range -262 to +262"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262
+		    && (ival > 255 || ival < -255)")))
+
+(define_constraint "Pc"
+  "@internal In Thumb-1 state a constant that is in the range 1021 to 1275"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1
+  		    && ival > 1020 && ival <= 1275")))
+
+(define_constraint "Pd"
+  "@internal In Thumb state a constant in the range 0 to 7"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB && ival >= 0 && ival <= 7")))
+
+(define_constraint "Pe"
+  "@internal In Thumb-1 state a constant in the range 256 to +510"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= 256 && ival <= 510")))
+
+(define_constraint "Ps"
+  "@internal In Thumb-2 state a constant in the range -255 to +255"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 255")))
+
+(define_constraint "Pt"
+  "@internal In Thumb-2 state a constant in the range -7 to +7"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -7 && ival <= 7")))
+
+(define_constraint "Pu"
+  "@internal In Thumb-2 state a constant in the range +1 to +8"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 8")))
+
+(define_constraint "Pv"
+  "@internal In Thumb-2 state a constant in the range -255 to 0"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 0")))
+
+(define_constraint "Pw"
+  "@internal In Thumb-2 state a constant in the range -255 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1")))
+
+(define_constraint "Px"
+  "@internal In Thumb-2 state a constant in the range -7 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+
+(define_constraint "Py"
+  "@internal In Thumb-2 state a constant in the range 0 to 255"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
+
+(define_constraint "Pz"
+  "@internal In Thumb-2 state the constant 0"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && (ival == 0)")))
+
+(define_constraint "G"
+ "In ARM/Thumb-2 state the floating-point constant 0."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && arm_const_double_rtx (op)")))
+
+(define_constraint "Dz"
+ "@internal
+  In ARM/Thumb-2 state a vector of constant zeros."
+ (and (match_code "const_vector")
+      (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
+
+(define_constraint "Da"
+ "@internal
+  In ARM/Thumb-2 state a const_int, const_double or const_vector that can
+  be generated with two Data Processing insns."
+ (and (match_code "const_double,const_int,const_vector")
+      (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 2")))
+
+(define_constraint "Db"
+ "@internal
+  In ARM/Thumb-2 state a const_int, const_double or const_vector that can
+  be generated with three Data Processing insns."
+ (and (match_code "const_double,const_int,const_vector")
+      (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 3")))
+
+(define_constraint "Dc"
+ "@internal
+  In ARM/Thumb-2 state a const_int, const_double or const_vector that can
+  be generated with four Data Processing insns.  This pattern is disabled
+  if optimizing for space or when we have load-delay slots to fill."
+ (and (match_code "const_double,const_int,const_vector")
+      (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
+		   && !(optimize_size || arm_ld_sched)")))
+
+(define_constraint "Dd"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn adddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
+
+(define_constraint "De"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn anddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
+
+(define_constraint "Df"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn iordi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)")))
+
+(define_constraint "Dg"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn xordi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)")))
+
+(define_constraint "Di"
+ "@internal
+  In ARM/Thumb-2 state a const_int or const_double where both the high
+  and low SImode words can be generated as immediates in 32-bit instructions."
+ (and (match_code "const_double,const_int")
+      (match_test "TARGET_32BIT && arm_const_double_by_immediates (op)")))
+
+(define_constraint "Dn"
+ "@internal
+  In ARM/Thumb-2 state a const_vector or const_int which can be loaded with a
+  Neon vmov immediate instruction."
+ (and (match_code "const_vector,const_int")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_mov_operand (op, GET_MODE (op))")))
+
+(define_constraint "Dl"
+ "@internal
+  In ARM/Thumb-2 state a const_vector which can be used with a Neon vorr or
+  vbic instruction."
+ (and (match_code "const_vector")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_logic_operand (op, GET_MODE (op))")))
+
+(define_constraint "DL"
+ "@internal
+  In ARM/Thumb-2 state a const_vector which can be used with a Neon vorn or
+  vand instruction."
+ (and (match_code "const_vector")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_inv_logic_operand (op, GET_MODE (op))")))
+
+(define_constraint "Do"
+ "@internal
+  In ARM/Thumb2 state valid offset for an ldrd/strd instruction."
+ (and (match_code "const_int")
+      (match_test "TARGET_LDRD && offset_ok_for_ldrd_strd (ival)")))
+
+(define_constraint "Dv"
+ "@internal
+  In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts
+  instruction."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)")))
+
+(define_constraint "Dy"
+ "@internal
+  In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd
+  instruction."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
+
+(define_constraint "Dt"
+ "@internal
+  In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation"
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
+
+(define_constraint "Dp"
+ "@internal
+  In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation"
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_bits (op)")))
+
+(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS"
+ "For arm_restrict_it the core registers @code{r0}-@code{r7}.  GENERAL_REGS otherwise.")
+
+(define_memory_constraint "Ua"
+ "@internal
+  An address valid for loading/storing register exclusive"
+ (match_operand 0 "mem_noofs_operand"))
+
+(define_memory_constraint "Ut"
+ "@internal
+  In ARM/Thumb-2 state an address valid for loading/storing opaque structure
+  types wider than TImode."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_struct_mem_operand (op)")))
+
+(define_memory_constraint "Uv"
+ "@internal
+  In ARM/Thumb-2 state a valid VFP load/store address."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)")))
+
+(define_memory_constraint "Uy"
+ "@internal
+  In ARM/Thumb-2 state a valid iWMMX load/store address."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)")))
+
+(define_memory_constraint "Un"
+ "@internal
+  In ARM/Thumb-2 state a valid address for Neon doubleword vector
+  load/store instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0, true)")))
+
+(define_memory_constraint "Um"
+ "@internal
+  In ARM/Thumb-2 state a valid address for Neon element and structure
+  load/store instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
+
+(define_memory_constraint "Us"
+ "@internal
+  In ARM/Thumb-2 state a valid address for non-offset loads/stores of
+  quad-word values in four ARM registers."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)")))
+
+(define_memory_constraint "Uq"
+ "@internal
+  In ARM state an address valid in ldrsb instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_ARM
+		   && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0),
+						      SIGN_EXTEND, 0)")))
+
+(define_memory_constraint "Q"
+ "@internal
+  In ARM/Thumb-2 state an address that is a single base register."
+ (and (match_code "mem")
+      (match_test "REG_P (XEXP (op, 0))")))
+
+(define_memory_constraint "Uu"
+ "@internal
+  In Thumb state an address that is valid in 16bit encoding."
+ (and (match_code "mem")
+      (match_test "TARGET_THUMB
+		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						   0)")))
+
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
+; are actually LDM/STM instructions, so cannot be used to access unaligned
+; data.
+(define_memory_constraint "Uw"
+ "@internal
+  In Thumb state an address that is valid in 16bit encoding, and that can be
+  used for unaligned accesses."
+ (and (match_code "mem")
+      (match_test "TARGET_THUMB
+		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						   0)
+		   && GET_CODE (XEXP (op, 0)) != POST_INC")))
+
+(define_constraint "US"
+ "@internal
+  US is a symbol reference."
+ (match_code "symbol_ref")
+)
+
+;; We used to have constraint letters for S and R in ARM state, but
+;; all uses of these now appear to have been removed.
+
+;; Additionally, we used to have a Q constraint in Thumb state, but
+;; this wasn't really a valid memory constraint.  Again, all uses of
+;; this now seem to have been removed.
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-a15-neon.md b/gcc-4.9/gcc/config/arm/cortex-a15-neon.md
new file mode 100644
index 000000000..02d4a530b
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a15-neon.md
@@ -0,0 +1,677 @@
+;; ARM Cortex-A15 NEON pipeline description
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "cortex_a15_neon_type"
+  "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q,
+   neon_arith_basic, neon_arith_complex,
+   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,\
+   neon_shift_imm_complex,
+   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+   neon_fp_arith_q, neon_fp_cvt_int,
+   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+   neon_fp_recpe_rsqrte_q, neon_bitops, neon_bitops_q, neon_from_gp,
+   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+   neon_load_f, neon_store_a, neon_store_b, neon_store_c, neon_store_d,
+   neon_store_e, neon_store_f, neon_store_g, neon_store_h,
+   unknown"
+  (cond [
+          (eq_attr "type" "neon_abd, neon_abd_long")
+            (const_string "neon_abd")
+          (eq_attr "type" "neon_abd_q")
+            (const_string "neon_abd_q")
+          (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+                           neon_reduc_add_acc_q")
+            (const_string "neon_arith_acc")
+          (eq_attr "type" "neon_arith_acc_q")
+            (const_string "neon_arith_acc_q")
+          (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
+                           neon_add_widen, neon_neg, neon_neg_q,\
+                           neon_reduc_add, neon_reduc_add_q,\
+                           neon_reduc_add_long, neon_sub, neon_sub_q,\
+                           neon_sub_long, neon_sub_widen, neon_logic,\
+                           neon_logic_q, neon_tst, neon_tst_q")
+            (const_string "neon_arith_basic")
+          (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\
+                           neon_add_halve, neon_add_halve_q,\
+                           neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+                           neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+                           neon_qneg_q, neon_qsub, neon_qsub_q,\
+                           neon_sub_halve_narrow_q,\
+                           neon_compare, neon_compare_q,\
+                           neon_compare_zero, neon_compare_zero_q,\
+                           neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+                           neon_reduc_minmax_q")
+            (const_string "neon_arith_complex")
+
+          (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+                           neon_mul_h_scalar, neon_mul_s_scalar,\
+                           neon_sat_mul_b, neon_sat_mul_h,\
+                           neon_sat_mul_s, neon_sat_mul_h_scalar,\
+                           neon_sat_mul_s_scalar,\
+                           neon_mul_b_long, neon_mul_h_long,\
+                           neon_mul_s_long,\
+                           neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+                           neon_sat_mul_b_long, neon_sat_mul_h_long,\
+                           neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+                           neon_sat_mul_s_scalar_long")
+            (const_string "neon_multiply")
+          (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+                           neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+                           neon_sat_mul_b_q, neon_sat_mul_h_q,\
+                           neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+                           neon_sat_mul_s_scalar_q")
+            (const_string "neon_multiply_q")
+          (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
+                           neon_mla_h_scalar, neon_mla_s_scalar,\
+                           neon_mla_b_long, neon_mla_h_long,\
+                           neon_mla_s_long,\
+                           neon_mla_h_scalar_long, neon_mla_s_scalar_long")
+            (const_string "neon_mla")
+          (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+                           neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+            (const_string "neon_mla_q")
+          (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
+                           neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+                           neon_sat_mla_s_scalar_long")
+            (const_string "neon_sat_mla_long")
+
+          (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+            (const_string "neon_shift_acc")
+          (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+                           neon_shift_imm_narrow_q, neon_shift_imm_long")
+            (const_string "neon_shift_imm_basic")
+          (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+                           neon_sat_shift_imm_narrow_q")
+            (const_string "neon_shift_imm_complex")
+          (eq_attr "type" "neon_shift_reg")
+            (const_string "neon_shift_reg_basic")
+          (eq_attr "type" "neon_shift_reg_q")
+            (const_string "neon_shift_reg_basic_q")
+          (eq_attr "type" "neon_sat_shift_reg")
+            (const_string "neon_shift_reg_complex")
+          (eq_attr "type" "neon_sat_shift_reg_q")
+            (const_string "neon_shift_reg_complex_q")
+
+          (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+                           neon_fp_abs_s, neon_fp_abs_s_q")
+            (const_string "neon_fp_negabs")
+          (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+                           neon_fp_reduc_add_s, neon_fp_compare_s,\
+                           neon_fp_minmax_s, neon_fp_minmax_s_q,\
+                           neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q")
+            (const_string "neon_fp_arith")
+          (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+                           neon_fp_reduc_add_s_q, neon_fp_compare_s_q")
+            (const_string "neon_fp_arith_q")
+          (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s")
+            (const_string "neon_fp_cvt_int")
+          (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q")
+            (const_string "neon_fp_cvt_int_q")
+          (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+            (const_string "neon_fp_cvt16")
+          (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar")
+            (const_string "neon_fp_mul")
+          (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q")
+            (const_string "neon_fp_mul_q")
+          (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar")
+            (const_string "neon_fp_mla")
+          (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q")
+            (const_string "neon_fp_mla_q")
+          (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s")
+            (const_string "neon_fp_recpe_rsqrte")
+          (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q")
+            (const_string "neon_fp_recpe_rsqrte_q")
+
+          (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+                           neon_rev, neon_permute,\
+                           neon_tbl1, neon_tbl2, neon_zip,\
+                           neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+                           neon_move, neon_move_q, neon_move_narrow_q")
+            (const_string "neon_bitops")
+          (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+                           neon_rev_q, neon_permute_q")
+            (const_string "neon_bitops_q")
+          (eq_attr "type" "neon_from_gp")
+            (const_string "neon_from_gp")
+          (eq_attr "type" "neon_from_gp_q")
+            (const_string "neon_from_gp_q")
+          (eq_attr "type" "neon_tbl3, neon_tbl4")
+            (const_string "neon_tbl3_tbl4")
+          (eq_attr "type" "neon_zip_q")
+            (const_string "neon_zip_q")
+          (eq_attr "type" "neon_to_gp, neon_to_gp_q")
+            (const_string "neon_to_gp")
+
+          (eq_attr "type" "f_loads, f_loadd,\
+                           neon_load1_1reg, neon_load1_1reg_q,\
+                           neon_load1_2reg, neon_load1_2reg_q")
+            (const_string "neon_load_a")
+          (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+                           neon_load1_4reg, neon_load1_4reg_q")
+            (const_string "neon_load_b")
+          (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+                           neon_load1_all_lanes, neon_load1_all_lanes_q,\
+                           neon_load2_2reg, neon_load2_2reg_q,\
+                           neon_load2_all_lanes, neon_load2_all_lanes_q")
+            (const_string "neon_load_c")
+          (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+                           neon_load3_3reg, neon_load3_3reg_q,\
+                           neon_load3_one_lane, neon_load3_one_lane_q,\
+                           neon_load4_4reg, neon_load4_4reg_q")
+            (const_string "neon_load_d")
+          (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+                           neon_load3_all_lanes, neon_load3_all_lanes_q,\
+                           neon_load4_all_lanes, neon_load4_all_lanes_q")
+            (const_string "neon_load_e")
+          (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+            (const_string "neon_load_f")
+
+          (eq_attr "type" "f_stores, f_stored,\
+                           neon_store1_1reg, neon_store1_1reg_q")
+            (const_string "neon_store_a")
+          (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q")
+            (const_string "neon_store_b")
+          (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q")
+            (const_string "neon_store_c")
+          (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q")
+            (const_string "neon_store_d")
+          (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\
+                           neon_store2_one_lane, neon_store2_one_lane_q")
+            (const_string "neon_store_e")
+          (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\
+                           neon_store3_one_lane, neon_store3_one_lane_q,\
+                           neon_store4_one_lane, neon_store4_one_lane_q")
+            (const_string "neon_store_f")
+          (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\
+                           neon_store4_4reg, neon_store4_4reg_q")
+            (const_string "neon_store_g")
+          (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q")
+            (const_string "neon_store_h")]
+          (const_string "unknown")))
+
+(define_automaton "cortex_a15_neon")
+
+;; Dispatch unit.
+(define_cpu_unit "ca15_cx_ij, ca15_cx_ik" "cortex_a15_neon")
+
+;; Accumulate.
+(define_cpu_unit "ca15_cx_acc" "cortex_a15_neon")
+
+;; The 32x32 integer multiply-accumulate pipeline.
+(define_cpu_unit "ca15_cx_imac1" "cortex_a15_neon")
+(define_reservation "ca15_cx_imac" "(ca15_cx_ij+ca15_cx_imac1)")
+
+
+;; The 64-bit ALU pipeline.
+(define_cpu_unit "ca15_cx_ialu1, ca15_cx_ialu2" "cortex_a15_neon")
+
+;; IALU with accumulate.
+(define_reservation "ca15_cx_ialu_with_acc" "ca15_cx_ik+ca15_cx_ialu2+ca15_cx_acc")
+
+(define_reservation "ca15_cx_ialu"
+                    "((ca15_cx_ij+ca15_cx_ialu1)|(ca15_cx_ik+ca15_cx_ialu2))")
+
+;; Integer shift pipeline.
+(define_cpu_unit "ca15_cx_ishf" "cortex_a15_neon")
+(define_reservation "ca15_cx_ishf_with_acc" "ca15_cx_ik+ca15_cx_ishf+ca15_cx_acc")
+
+;; SIMD multiply pipeline.
+(define_cpu_unit "ca15_cx_fmul1, ca15_cx_fmul2, ca15_cx_fmul3, ca15_cx_fmul4"
+                 "cortex_a15_neon")
+
+(define_reservation "ca15_cx_fmul"
+                    "(ca15_cx_ij+(ca15_cx_fmul1|ca15_cx_fmul2))|\
+                     (ca15_cx_ik+(ca15_cx_fmul3|ca15_cx_fmul4))")
+
+(define_reservation "ca15_cx_fmul_2"
+                    "(ca15_cx_ij+(ca15_cx_fmul1|ca15_cx_fmul2))+\
+                     (ca15_cx_ik+(ca15_cx_fmul3|ca15_cx_fmul4))")
+
+;; SIMD ALU pipeline.
+(define_cpu_unit "ca15_cx_falu1, ca15_cx_falu2, ca15_cx_falu3, ca15_cx_falu4"
+                 "cortex_a15_neon")
+
+(define_reservation "ca15_cx_falu"
+                    "(ca15_cx_ij+(ca15_cx_falu1|ca15_cx_falu2))|\
+                     (ca15_cx_ik+(ca15_cx_falu3|ca15_cx_falu4))")
+
+(define_reservation "ca15_cx_falu_2"
+                    "(ca15_cx_ij+(ca15_cx_falu1|ca15_cx_falu2))+\
+                     (ca15_cx_ik+(ca15_cx_falu3|ca15_cx_falu4))")
+
+;; SIMD multiply-accumulate pipeline.
+;; This can be used if fmul and falu are not reserved.
+(define_reservation "ca15_cx_fmac"
+                    "((ca15_cx_ij+ca15_cx_fmul1),nothing*2,ca15_cx_falu1)|\
+                     ((ca15_cx_ij+ca15_cx_fmul2),nothing*2,ca15_cx_falu2)|\
+                     ((ca15_cx_ik+ca15_cx_fmul3),nothing*2,ca15_cx_falu3)|\
+                     ((ca15_cx_ik+ca15_cx_fmul4),nothing*2,ca15_cx_falu4)")
+
+(define_reservation "ca15_cx_fmac_2"
+                    "(((ca15_cx_ij+ca15_cx_fmul1),nothing*2,ca15_cx_falu1)|\
+                     ((ca15_cx_ij+ca15_cx_fmul2),nothing*2,ca15_cx_falu2))+\
+                     (((ca15_cx_ik+ca15_cx_fmul3),nothing*2,ca15_cx_falu3)|\
+                     ((ca15_cx_ik+ca15_cx_fmul4),nothing*2,ca15_cx_falu4))")
+
+
+;; Vector FP multiply pipeline
+(define_cpu_unit "ca15_cx_vfp_i" "cortex_a15_neon")
+
+(define_reservation "ca15_cx_vfp" "ca15_cx_ik+ca15_cx_vfp_i")
+
+;; Load permute pipeline
+(define_reservation "ca15_cx_perm" "ca15_cx_ij|ca15_cx_ik")
+(define_reservation "ca15_cx_perm_2" "ca15_cx_ij+ca15_cx_ik")
+
+;; Integer Arithmetic Instructions.
+
+(define_insn_reservation  "cortex_a15_neon_abd" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_abd"))
+  "ca15_issue1,ca15_cx_ialu")
+
+(define_insn_reservation  "cortex_a15_neon_abd_q" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_abd_q"))
+  "ca15_issue2,ca15_cx_ialu*2")
+
+(define_insn_reservation  "cortex_a15_neon_aba" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_arith_acc"))
+  "ca15_issue1,ca15_cx_ialu_with_acc")
+
+(define_insn_reservation  "cortex_a15_neon_aba_q" 8
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_arith_acc_q"))
+  "ca15_issue2,ca15_cx_ialu_with_acc*2")
+
+(define_insn_reservation  "cortex_a15_neon_arith_basic" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_arith_basic"))
+  "ca15_issue1,ca15_cx_ialu")
+
+(define_insn_reservation  "cortex_a15_neon_arith_complex" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_arith_complex"))
+  "ca15_issue1,ca15_cx_ialu")
+
+;; Integer Multiply Instructions.
+
+(define_insn_reservation "cortex_a15_neon_multiply" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_multiply"))
+  "ca15_issue1,ca15_cx_imac")
+
+(define_insn_reservation "cortex_a15_neon_multiply_q" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_multiply_q"))
+  "ca15_issue2,ca15_cx_imac*2")
+
+(define_insn_reservation "cortex_a15_neon_mla" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_mla"))
+  "ca15_issue1,ca15_cx_imac")
+
+(define_insn_reservation "cortex_a15_neon_mla_q" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_mla_q"))
+  "ca15_issue1,ca15_cx_imac*2")
+
+(define_insn_reservation "cortex_a15_neon_sat_mla_long" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_sat_mla_long"))
+  "ca15_issue1,ca15_cx_imac")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_acc" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_acc"))
+  "ca15_issue1,ca15_cx_ishf_with_acc")
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_imm_basic" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_imm_basic"))
+  "ca15_issue1,ca15_cx_ik+ca15_cx_ishf")
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_imm_complex" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_imm_complex"))
+  "ca15_issue1,ca15_cx_ik+ca15_cx_ishf")
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_reg_basic" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_reg_basic"))
+  "ca15_issue1,ca15_cx_ik+ca15_cx_ishf")
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_reg_basic_q" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_reg_basic_q"))
+  "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf*2)")
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_reg_complex" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_reg_complex"))
+  "ca15_issue2,ca15_cx_ik+ca15_cx_ishf")
+
+(define_insn_reservation
+  "cortex_a15_neon_shift_reg_complex_q" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_shift_reg_complex_q"))
+  "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf)*2")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_negabs" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_negabs"))
+  "ca15_issue1,ca15_cx_falu")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_arith" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_arith"))
+  "ca15_issue1,ca15_cx_falu")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_arith_q" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_arith_q"))
+  "ca15_issue2,ca15_cx_falu_2")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_cvt_int" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_cvt_int"))
+  "ca15_issue1,ca15_cx_falu+ca15_cx_ishf")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_cvt_int_q" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_cvt_int_q"))
+  "ca15_issue2,(ca15_cx_falu+ca15_cx_ishf)*2")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_cvt16" 10
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_cvt16"))
+  "ca15_issue3,(ca15_cx_falu+ca15_cx_ishf)*2+ca15_cx_falu")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_mul" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_mul"))
+  "ca15_issue1,ca15_cx_fmul")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_mul_q" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_mul_q"))
+  "ca15_issue2,ca15_cx_fmul_2")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_mla" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_mla"))
+  "ca15_issue1,ca15_cx_fmul")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_mla_q" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_mla_q"))
+  "ca15_issue2,ca15_cx_fmul_2")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_recps_rsqrte" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_recpe_rsqrte"))
+  "ca15_issue1,ca15_cx_fmac")
+
+(define_insn_reservation
+  "cortex_a15_neon_fp_recps_rsqrte_q" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_fp_recpe_rsqrte_q"))
+  "ca15_issue2,ca15_cx_fmac_2")
+
+;; Miscelaaneous Instructions.
+
+(define_insn_reservation
+  "cortex_a15_neon_bitops" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_bitops"))
+  "ca15_issue1,ca15_cx_perm")
+
+(define_insn_reservation
+  "cortex_a15_neon_bitops_q" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_bitops_q"))
+  "ca15_issue2,ca15_cx_perm_2")
+
+(define_insn_reservation
+  "cortex_a15_neon_from_gp" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_from_gp"))
+  "ca15_issue2,ca15_ls1+ca15_ls2+ca15_cx_perm")
+
+(define_insn_reservation
+  "cortex_a15_neon_from_gp_q" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_from_gp_q"))
+  "ca15_issue2,ca15_ls1+ca15_ls2+ca15_cx_perm_2")
+
+(define_insn_reservation
+  "cortex_a15_neon_tbl3_tbl4" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_tbl3_tbl4"))
+  "ca15_issue2,ca15_cx_perm_2")
+
+(define_insn_reservation
+  "cortex_a15_neon_zip_q" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_zip_q"))
+  "ca15_issue3,ca15_cx_perm*3")
+
+(define_insn_reservation
+  "cortex_a15_neon_to_gp" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_to_gp"))
+  "ca15_issue2,ca15_ls1+ca15_ls2")
+
+;; Load Instructions.
+
+(define_insn_reservation
+  "cortex_a15_neon_load_a" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_load_a"))
+  "ca15_issue1,ca15_ls,ca15_ldr")
+
+(define_insn_reservation
+  "cortex_a15_neon_load_b" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_load_b"))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr")
+
+(define_insn_reservation
+  "cortex_a15_neon_load_c" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_load_c"))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr")
+
+(define_insn_reservation
+  "cortex_a15_neon_load_d" 11
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_load_d"))
+  "ca15_issue1,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2")
+
+(define_insn_reservation
+  "cortex_a15_neon_load_e" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_load_e"))
+  "ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2")
+
+(define_insn_reservation
+  "cortex_a15_neon_load_f" 11
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_load_f"))
+  "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2")
+
+;; Store Instructions.
+
+(define_insn_reservation
+  "cortex_a15_neon_store_a" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_a"))
+  "ca15_issue1,ca15_ls1+ca15_ls2,ca15_str")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_b" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_b"))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str*2")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_c" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_c"))
+  "ca15_issue3,ca15_ls1+ca15_ls2,ca15_str*3")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_d" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_d"))
+  "ca15_issue3,ca15_issue1,ca15_ls1+ca15_ls2,ca15_str*4")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_e" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_e"))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str+ca15_cx_perm")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_f" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_f"))
+  "ca15_issue3,ca15_ls1+ca15_ls2,ca15_str*2+ca15_cx_perm")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_g" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_g"))
+  "ca15_issue3,ca15_issue3+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2")
+
+(define_insn_reservation
+  "cortex_a15_neon_store_h" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "cortex_a15_neon_type" "neon_store_h"))
+  "ca15_issue3,ca15_issue2+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2")
+
+;; VFP Operations.
+
+(define_insn_reservation "cortex_a15_vfp_const" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fconsts,fconstd"))
+  "ca15_issue1,ca15_cx_perm")
+
+(define_insn_reservation "cortex_a15_vfp_adds_subs" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fadds"))
+  "ca15_issue1,ca15_cx_vfp")
+
+(define_insn_reservation "cortex_a15_vfp_addd_subd" 10
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "faddd"))
+  "ca15_issue2,ca15_cx_vfp*2")
+
+(define_insn_reservation "cortex_a15_vfp_muls" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fmuls"))
+  "ca15_issue1,ca15_cx_vfp")
+
+(define_insn_reservation "cortex_a15_vfp_muld" 12
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fmuld"))
+  "ca15_issue2,ca15_cx_vfp*2")
+
+(define_insn_reservation "cortex_a15_vfp_macs" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fmacs,ffmas"))
+  "ca15_issue1,ca15_cx_vfp")
+
+(define_insn_reservation "cortex_a15_vfp_macd" 11
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fmacd,ffmad"))
+  "ca15_issue2,ca15_cx_vfp*2")
+
+(define_insn_reservation "cortex_a15_vfp_cvt" 6
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+  "ca15_issue1,ca15_cx_vfp")
+
+(define_insn_reservation "cortex_a15_vfp_cmpd" 8
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fcmpd"))
+  "ca15_issue2,ca15_cx_perm,ca15_cx_vfp")
+
+(define_insn_reservation "cortex_a15_vfp_cmps" 8
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fcmps"))
+  "ca15_issue2,ca15_cx_perm,ca15_cx_vfp")
+
+(define_insn_reservation "cortex_a15_vfp_arithd" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "ffarithd"))
+  "ca15_issue2,ca15_cx_perm*2")
+
+(define_insn_reservation "cortex_a15_vfp_cpys" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fmov"))
+  "ca15_issue1,ca15_cx_perm")
+
+(define_insn_reservation "cortex_a15_vfp_to_from_gp" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "f_mcr, f_mcrr, f_mrc, f_mrrc"))
+  "ca15_issue1,ca15_ls1+ca15_ls2")
+
+(define_insn_reservation "cortex_a15_vfp_ariths" 7
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "ffariths"))
+  "ca15_issue1,ca15_cx_perm")
+
+(define_insn_reservation "cortex_a15_vfp_divs" 10
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "ca15_issue1,ca15_cx_ik")
+
+(define_insn_reservation "cortex_a15_vfp_divd" 18
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "ca15_issue1,ca15_cx_ik")
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-a15.md b/gcc-4.9/gcc/config/arm/cortex-a15.md
new file mode 100644
index 000000000..b3f126a72
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a15.md
@@ -0,0 +1,186 @@
+;; ARM Cortex-A15 pipeline description
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a15")
+
+;; The Cortex-A15 core is modelled as a triple issue pipeline that has
+;; the following dispatch units.
+;; 1. Two pipelines for simple integer operations: SX1, SX2
+;; 2. Individual units for Neon and FP operations as in cortex-a15-neon.md
+;; 3. One pipeline for branch operations: BX
+;; 4. One pipeline for integer multiply and divide operations: MX
+;; 5. Two pipelines for load and store operations: LS1, LS2
+;;
+;; We can issue into three pipelines per-cycle.
+;;
+;; We assume that where we have unit pairs xx1 is always filled before xx2.
+
+;; The three issue units
+(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15")
+
+(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)")
+(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))")
+(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)")
+(final_presence_set "ca15_i1" "ca15_i0")
+(final_presence_set "ca15_i2" "ca15_i1")
+
+;; The main dispatch units
+(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15")
+(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15")
+(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15")
+
+(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)")
+
+;; The extended load-store pipeline
+(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15")
+
+;; The extended ALU pipeline
+(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15")
+(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "cortex_a15_alu" 2
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,\
+                        mvn_imm,mvn_reg,\
+                        mrs,multiple,no_insn"))
+  "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
+
+;; ALU ops with immediate shift
+(define_insn_reservation "cortex_a15_alu_shift" 3
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "extend,\
+                        alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        mov_shift,mvn_shift"))
+  "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
+	       |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
+
+;; ALU ops with register controlled shift
+(define_insn_reservation "cortex_a15_alu_shift_reg" 3
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        mov_shift_reg,mvn_shift_reg"))
+  "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\
+   |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\
+   |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)")
+
+;; Multiply Execution Unit:
+;;
+;; 32-bit multiplies
+(define_insn_reservation "cortex_a15_mult32" 3
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "mul32" "yes"))
+  "ca15_issue1,ca15_mx")
+
+;; 64-bit multiplies
+(define_insn_reservation "cortex_a15_mult64" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "mul64" "yes"))
+  "ca15_issue1,ca15_mx*2")
+
+;; Integer divide
+(define_insn_reservation "cortex_a15_udiv" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "udiv"))
+  "ca15_issue1,ca15_mx")
+
+(define_insn_reservation "cortex_a15_sdiv" 10
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "sdiv"))
+  "ca15_issue1,ca15_mx")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "cortex_a15_block" 1
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "block"))
+  "ca15_issue3")
+
+;; Branch execution Unit
+;;
+;; Branches take one issue slot.
+;; No latency as there is no result
+(define_insn_reservation "cortex_a15_branch" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "branch"))
+  "ca15_issue1,ca15_bx")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "cortex_a15_load1" 4
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "load_byte,load1,load2"))
+  "ca15_issue1,ca15_ls,ca15_ldr,nothing")
+
+;; Loads of three or four words.
+(define_insn_reservation "cortex_a15_load3" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "load3,load4"))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
+
+;; Stores of up to two words.
+(define_insn_reservation "cortex_a15_store1" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "store1,store2"))
+  "ca15_issue1,ca15_ls,ca15_str")
+
+;; Stores of three or four words.
+(define_insn_reservation "cortex_a15_store3" 0
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "store3,store4"))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
+
+;; We include Neon.md here to ensure that the branch can block the Neon units.
+(include "../arm/cortex-a15-neon.md")
+
+;; We lie with calls.  They take up all issue slots, and form a block in the
+;; pipeline.  The result however is available the next cycle.
+(define_insn_reservation "cortex_a15_call" 1
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "call"))
+  "ca15_issue3,\
+   ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx_ij+ca15_cx_ik+ca15_ls1+ca15_ls2+\
+   ca15_cx_imac1+ca15_cx_ialu1+ca15_cx_ialu2+ca15_cx_ishf+\
+   ca15_cx_acc+ca15_cx_fmul1+ca15_cx_fmul2+ca15_cx_fmul3+ca15_cx_fmul4+\
+   ca15_cx_falu1+ca15_cx_falu2+ca15_cx_falu3+ca15_cx_falu4+ca15_cx_vfp_i,\
+   ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+\
+   ca15_sx2_shf+ca15_sx2_sat+ca15_ldr+ca15_str")
+
+;; Simple execution unit bypasses
+(define_bypass 1 "cortex_a15_alu"
+	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
+(define_bypass 2 "cortex_a15_alu_shift"
+	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
+(define_bypass 2 "cortex_a15_alu_shift_reg"
+	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
+(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3")
+(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3")
+(define_bypass 2 "cortex_a15_alu_shift_reg"
+	       "cortex_a15_load1,cortex_a15_load3")
diff --git a/gcc-4.9/gcc/config/arm/cortex-a5.md b/gcc-4.9/gcc/config/arm/cortex-a5.md
new file mode 100644
index 000000000..eed098ef9
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a5.md
@@ -0,0 +1,311 @@
+;; ARM Cortex-A5 pipeline description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Functional units.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The integer (ALU) pipeline.  There are five DPU pipeline
+;; stages. However the decode/issue stages operate the same for all
+;; instructions, so do not model them.  We only need to model the
+;; first execute stage because instructions always advance one stage
+;; per cycle in order.  Only branch instructions may dual-issue, so a
+;; single unit covers all of the LS, ALU, MAC and FPU pipelines.
+
+(define_cpu_unit "cortex_a5_ex1" "cortex_a5")
+
+;; The branch pipeline.  Branches can dual-issue with other instructions
+;; (except when those instructions take multiple cycles to issue).
+
+(define_cpu_unit "cortex_a5_branch" "cortex_a5")
+
+;; Pseudo-unit for blocking the multiply pipeline when a double-precision
+;; multiply is in progress.
+
+(define_cpu_unit "cortex_a5_fpmul_pipe" "cortex_a5")
+
+;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
+;; of the add pipeline by fmac instructions, etc.
+
+(define_cpu_unit "cortex_a5_fpadd_pipe" "cortex_a5")
+
+;; Floating-point div/sqrt (long latency, out-of-order completion).
+
+(define_cpu_unit "cortex_a5_fp_div_sqrt" "cortex_a5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a5_alu" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mrs,multiple,no_insn"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_alu_shift" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "extend,\
+                        alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        mov_shift,mov_shift_reg,\
+                        mvn_shift,mvn_shift_reg"))
+  "cortex_a5_ex1")
+
+;; Forwarding path for unshifted operands.
+
+(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift"
+  "cortex_a5_alu")
+
+(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift"
+  "cortex_a5_alu_shift"
+  "arm_no_early_alu_shift_dep")
+
+;; The multiplier pipeline can forward results from wr stage only so 
+;; there's no need to specify bypasses).
+
+(define_insn_reservation "cortex_a5_mul" 2
+  (and (eq_attr "tune" "cortexa5")
+       (ior (eq_attr "mul32" "yes")
+	    (eq_attr "mul64" "yes")))
+  "cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Address-generation happens in the issue stage, which is one stage behind
+;; the ex1 stage (the first stage we care about for scheduling purposes). The
+;; dc1 stage is parallel with ex1, dc2 with ex2 and rot with wr.
+
+(define_insn_reservation "cortex_a5_load1" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load_byte,load1"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store1" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store1"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_load2" 3
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load2"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store2" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store2"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_load3" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store3" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_load4" 5
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store4" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branches.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Direct branches are the only instructions we can dual-issue (also IT and
+;; nop, but those aren't very interesting for scheduling).  (The latency here
+;; is meant to represent when the branch actually takes place, but may not be
+;; entirely correct.)
+
+(define_insn_reservation "cortex_a5_branch" 3
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "branch,call"))
+  "cortex_a5_branch")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point arithmetic.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a5_fpalu" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
+                        f_cvt,f_cvtf2i,f_cvti2f,\
+			fcmps, fcmpd"))
+  "cortex_a5_ex1+cortex_a5_fpadd_pipe")
+
+;; For fconsts and fconstd, 8-bit immediate data is passed directly from
+;; f1 to f3 (which I think reduces the latency by one cycle).
+
+(define_insn_reservation "cortex_a5_fconst" 3
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fconsts,fconstd"))
+  "cortex_a5_ex1+cortex_a5_fpadd_pipe")
+
+;; We should try not to attempt to issue a single-precision multiplication in
+;; the middle of a double-precision multiplication operation (the usage of
+;; cortex_a5_fpmul_pipe).
+
+(define_insn_reservation "cortex_a5_fpmuls" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmuls"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe")
+
+;; For single-precision multiply-accumulate, the add (accumulate) is issued
+;; whilst the multiply is in F4.  The multiply result can then be forwarded
+;; from F5 to F1.  The issue unit is only used once (when we first start
+;; processing the instruction), but the usage of the FP add pipeline could
+;; block other instructions attempting to use it simultaneously.  We try to
+;; avoid that using cortex_a5_fpadd_pipe.
+
+(define_insn_reservation "cortex_a5_fpmacs" 8
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmacs,ffmas"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe")
+
+;; Non-multiply instructions can issue in the middle two instructions of a
+;; double-precision multiply.  Note that it isn't entirely clear when a branch
+;; can dual-issue when a multi-cycle multiplication is in progress; we ignore
+;; that for now though.
+
+(define_insn_reservation "cortex_a5_fpmuld" 7
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmuld"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\
+   cortex_a5_ex1+cortex_a5_fpmul_pipe")
+
+(define_insn_reservation "cortex_a5_fpmacd" 11
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmacd,ffmad"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\
+   cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point divide/square root instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ??? Not sure if the 14 cycles taken for single-precision divide to complete
+;; includes the time taken for the special instruction used to collect the
+;; result to travel down the multiply pipeline, or not.  Assuming so.  (If
+;; that's wrong, the latency should be increased by a few cycles.)
+
+;; fsqrt takes one cycle less, but that is not modelled, nor is the use of the
+;; multiply pipeline to collect the divide/square-root result.
+
+(define_insn_reservation "cortex_a5_fdivs" 14
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13")
+
+;; ??? Similarly for fdivd.
+
+(define_insn_reservation "cortex_a5_fdivd" 29
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP to/from core transfers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; FP loads take data from wr/rot/f3.
+
+;; Core-to-VFP transfers use the multiply pipeline.
+
+(define_insn_reservation "cortex_a5_r2f" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_mcr,f_mcrr"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f2r" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_mrc,f_mrrc"))
+  "cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP flag transfer.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ??? The flag forwarding from fmstat to the ex2 stage of the second
+;; instruction is not modeled at present.
+
+(define_insn_reservation "cortex_a5_f_flags" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_flag"))
+  "cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a5_f_loads" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_loads"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f_loadd" 5
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_loadd"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f_stores" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_stores"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f_stored" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_stored"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+;; Load-to-use for floating-point values has a penalty of one cycle,
+;; i.e. a latency of two.
+
+(define_bypass 2 "cortex_a5_f_loads"
+                 "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\
+		  cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\
+		  cortex_a5_f2r")
+
+(define_bypass 3 "cortex_a5_f_loadd"
+                 "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\
+		  cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\
+		  cortex_a5_f2r")
diff --git a/gcc-4.9/gcc/config/arm/cortex-a53.md b/gcc-4.9/gcc/config/arm/cortex-a53.md
new file mode 100644
index 000000000..deae8eba5
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a53.md
@@ -0,0 +1,309 @@
+;; ARM Cortex-A53 pipeline description
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a53")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Functional units.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are two main integer execution pipelines, described as
+;; slot 0 and issue slot 1.
+
+(define_cpu_unit "cortex_a53_slot0" "cortex_a53")
+(define_cpu_unit "cortex_a53_slot1" "cortex_a53")
+
+(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
+(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
+
+;; The load/store pipeline.  Load/store instructions can dual-issue from
+;; either pipeline, but two load/stores cannot simultaneously issue.
+
+(define_cpu_unit "cortex_a53_ls" "cortex_a53")
+
+;; The store pipeline.  Shared between both execution pipelines.
+
+(define_cpu_unit "cortex_a53_store" "cortex_a53")
+
+;; The branch pipeline.  Branches can dual-issue with other instructions
+;; (except when those instructions take multiple cycles to issue).
+
+(define_cpu_unit "cortex_a53_branch" "cortex_a53")
+
+;; The integer divider.
+
+(define_cpu_unit "cortex_a53_idiv" "cortex_a53")
+
+;; The floating-point add pipeline used to model the usage
+;; of the add pipeline by fmac instructions.
+
+(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
+
+;; Floating-point div/sqrt (long latency, out-of-order completion).
+
+(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_alu" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,csel,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mrs,multiple,no_insn"))
+  "cortex_a53_slot_any")
+
+(define_insn_reservation "cortex_a53_alu_shift" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        extend,mov_shift,mov_shift_reg,\
+                        mvn_shift,mvn_shift_reg"))
+  "cortex_a53_slot_any")
+
+;; Forwarding path for unshifted operands.
+
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
+  "cortex_a53_alu")
+
+(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
+  "cortex_a53_alu_shift"
+  "arm_no_early_alu_shift_dep")
+
+;; The multiplier pipeline can forward results so there's no need to specify
+;; bypasses. Multiplies can only single-issue currently.
+
+(define_insn_reservation "cortex_a53_mul" 3
+  (and (eq_attr "tune" "cortexa53")
+       (ior (eq_attr "mul32" "yes")
+            (eq_attr "mul64" "yes")))
+  "cortex_a53_single_issue")
+
+;; A multiply with a single-register result or an MLA, followed by an
+;; MLA with an accumulator dependency, has its result forwarded so two
+;; such instructions can issue back-to-back.
+
+(define_bypass 1 "cortex_a53_mul"
+               "cortex_a53_mul"
+               "arm_mac_accumulator_is_mul_result")
+
+;; Punt with a high enough latency for divides.
+(define_insn_reservation "cortex_a53_udiv" 8
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "udiv"))
+  "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
+
+(define_insn_reservation "cortex_a53_sdiv" 9
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "sdiv"))
+  "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
+
+
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
+               "cortex_a53_alu")
+(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
+               "cortex_a53_alu_shift"
+               "arm_no_early_alu_shift_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Address-generation happens in the issue stage.
+
+(define_insn_reservation "cortex_a53_load1" 3
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "load_byte,load1,load_acq"))
+  "cortex_a53_slot_any+cortex_a53_ls")
+
+(define_insn_reservation "cortex_a53_store1" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "store1,store_rel"))
+  "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
+
+(define_insn_reservation "cortex_a53_load2" 3
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "load2"))
+  "cortex_a53_single_issue+cortex_a53_ls")
+
+(define_insn_reservation "cortex_a53_store2" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "store2"))
+  "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
+
+(define_insn_reservation "cortex_a53_load3plus" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "load3,load4"))
+  "(cortex_a53_single_issue+cortex_a53_ls)*2")
+
+(define_insn_reservation "cortex_a53_store3plus" 3
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "store3,store4"))
+  "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
+
+;; Load/store addresses are required early in Issue.
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
+                 "cortex_a53_load*"
+                 "arm_early_load_addr_dep")
+(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
+                 "cortex_a53_store*"
+                 "arm_early_store_addr_dep")
+
+;; Load data can forward in the ALU pipeline
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
+               "cortex_a53_alu")
+(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
+               "cortex_a53_alu_shift"
+               "arm_no_early_alu_shift_dep")
+
+;; ALU ops can forward to stores.
+(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
+                 "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
+                 "arm_no_early_store_addr_dep")
+
+(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
+                 "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
+                 "arm_no_early_store_addr_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branches.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Currently models all branches as dual-issuable from either execution
+;; slot, which isn't true for all cases. We still need to model indirect
+;; branches.
+
+(define_insn_reservation "cortex_a53_branch" 0
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "branch,call"))
+  "cortex_a53_slot_any+cortex_a53_branch")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point arithmetic.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_fpalu" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
+                        f_cvt,f_cvtf2i,f_cvti2f,\
+			fcmps, fcmpd, fcsel"))
+  "cortex_a53_slot0+cortex_a53_fpadd_pipe")
+
+(define_insn_reservation "cortex_a53_fconst" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "fconsts,fconstd"))
+  "cortex_a53_slot0+cortex_a53_fpadd_pipe")
+
+(define_insn_reservation "cortex_a53_fpmul" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "fmuls,fmuld"))
+  "cortex_a53_slot0")
+
+;; For single-precision multiply-accumulate, the add (accumulate) is issued after
+;; the multiply completes. Model that accordingly.
+
+(define_insn_reservation "cortex_a53_fpmac" 8
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
+  "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point divide/square root instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; fsqrt really takes one cycle less, but that is not modelled.
+
+(define_insn_reservation "cortex_a53_fdivs" 14
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
+
+(define_insn_reservation "cortex_a53_fdivd" 29
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP to/from core transfers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_r2f" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_mcr,f_mcrr"))
+  "cortex_a53_slot0")
+
+(define_insn_reservation "cortex_a53_f2r" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_mrc,f_mrrc"))
+  "cortex_a53_slot0")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP flag transfer.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_f_flags" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_flag"))
+  "cortex_a53_slot0")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_f_loads" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_loads"))
+  "cortex_a53_slot0")
+
+(define_insn_reservation "cortex_a53_f_loadd" 5
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_loadd"))
+  "cortex_a53_slot0")
+
+(define_insn_reservation "cortex_a53_f_stores" 0
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_stores"))
+  "cortex_a53_slot0")
+
+(define_insn_reservation "cortex_a53_f_stored" 0
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "f_stored"))
+  "cortex_a53_slot0")
+
+;; Load-to-use for floating-point values has a penalty of one cycle,
+;; i.e. a latency of two.
+
+(define_bypass 2 "cortex_a53_f_loads"
+                 "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
+		  cortex_a53_fdivs, cortex_a53_fdivd,\
+		  cortex_a53_f2r")
+
+(define_bypass 2 "cortex_a53_f_loadd"
+                 "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
+		  cortex_a53_fdivs, cortex_a53_fdivd,\
+		  cortex_a53_f2r")
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-a7.md b/gcc-4.9/gcc/config/arm/cortex-a7.md
new file mode 100644
index 000000000..8291d7fa9
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a7.md
@@ -0,0 +1,394 @@
+;; ARM Cortex-A7 pipeline description
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; Contributed by ARM Ltd.
+;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "cortex_a7_neon_type"
+  "neon_mul, neon_mla, neon_other"
+  (cond [
+          (eq_attr "type" "neon_mul_b, neon_mul_b_q,\
+	                   neon_mul_h, neon_mul_h_q,\
+			   neon_mul_s, neon_mul_s_q,\
+			   neon_mul_b_long, neon_mul_h_long,\
+			   neon_mul_s_long, neon_mul_h_scalar,\
+			   neon_mul_h_scalar_q, neon_mul_s_scalar,\
+			   neon_mul_s_scalar_q, neon_mul_h_scalar_long,\
+			   neon_mul_s_scalar_long,\
+			   neon_sat_mul_b, neon_sat_mul_b_q,\
+			   neon_sat_mul_h, neon_sat_mul_h_q,\
+			   neon_sat_mul_s, neon_sat_mul_s_q,\
+			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
+			   neon_sat_mul_s_long,\
+			   neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\
+			   neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\
+			   neon_sat_mul_h_scalar_long,\
+			   neon_sat_mul_s_scalar_long,\
+			   neon_fp_mul_s, neon_fp_mul_s_q,\
+			   neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q")
+             (const_string "neon_mul")
+          (eq_attr "type" "neon_mla_b, neon_mla_b_q, neon_mla_h,\
+	                   neon_mla_h_q, neon_mla_s, neon_mla_s_q,\
+			   neon_mla_b_long, neon_mla_h_long,\
+                           neon_mla_s_long,\
+			   neon_mla_h_scalar, neon_mla_h_scalar_q,\
+			   neon_mla_s_scalar, neon_mla_s_scalar_q,\
+			   neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+			   neon_sat_mla_b_long, neon_sat_mla_h_long,\
+			   neon_sat_mla_s_long,\
+			   neon_sat_mla_h_scalar_long,\
+                           neon_sat_mla_s_scalar_long,\
+			   neon_fp_mla_s, neon_fp_mla_s_q,\
+			   neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q")
+             (const_string "neon_mla")]
+           (const_string "neon_other")))
+
+(define_automaton "cortex_a7")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Functional units.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The Cortex-A7 pipeline integer and vfp pipeline.  
+;; The decode is the same for all instructions, so do not model it. 
+;; We only model the first execution stage because
+;; instructions always advance one stage per cycle in order. 
+;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together. 
+
+(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7")
+
+(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2")
+
+(define_cpu_unit "cortex_a7_branch" "cortex_a7")
+
+;; Cortex-A7 is in order and can dual-issue under limited circumstances.
+;; ex2 can be reserved only after ex1 is reserved.
+
+(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1")
+
+;; Pseudo-unit for blocking the multiply pipeline when a double-precision
+;; multiply is in progress.
+
+(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7")
+
+;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
+;; of the add pipeline by fmac instructions, etc.
+
+(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7")
+
+;; Floating-point div/sqrt (long latency, out-of-order completion).
+
+(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branches.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; A direct branch can dual issue either as younger or older instruction,
+;; but branches cannot dual issue with branches.
+;; No latency as there is no result.
+
+(define_insn_reservation "cortex_a7_branch" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "branch"))
+  "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
+
+;; Call cannot dual-issue as an older instruction. It can dual-issue
+;; as a younger instruction, or single-issue.  Call cannot dual-issue
+;; with another branch instruction.  The result is available the next
+;; cycle.
+(define_insn_reservation "cortex_a7_call" 1
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "call"))
+  "(cortex_a7_ex2|cortex_a7_both)+cortex_a7_branch")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instruction with an immediate operand can dual-issue.
+(define_insn_reservation "cortex_a7_alu_imm" 2
+  (and (eq_attr "tune" "cortexa7")
+       (ior (eq_attr "type" "adr,alu_imm,alus_imm,logic_imm,logics_imm,\
+                             mov_imm,mvn_imm,extend")
+            (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg")
+                 (not (eq_attr "length" "8")))))
+  "cortex_a7_ex2|cortex_a7_ex1")
+
+;; ALU instruction with register operands can dual-issue
+;; with a younger immediate-based instruction.
+(define_insn_reservation "cortex_a7_alu_reg" 2
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        bfm,rev,\
+                        shift_imm,shift_reg,mov_reg,mvn_reg"))
+  "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_alu_shift" 2
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        mov_shift,mov_shift_reg,\
+                        mvn_shift,mvn_shift_reg,\
+                        mrs,multiple,no_insn"))
+  "cortex_a7_ex1")
+
+;; Forwarding path for unshifted operands.
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+  "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul")
+
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+  "cortex_a7_store*"
+  "arm_no_early_store_addr_dep")
+
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+  "cortex_a7_alu_shift"
+  "arm_no_early_alu_shift_dep")
+
+;; The multiplier pipeline can forward results from wr stage only so
+;; there's no need to specify bypasses.
+;; Multiply instructions cannot dual-issue.
+
+(define_insn_reservation "cortex_a7_mul" 2
+  (and (eq_attr "tune" "cortexa7")
+       (ior (eq_attr "mul32" "yes")
+            (eq_attr "mul64" "yes")))
+  "cortex_a7_both")
+
+;; Forward the result of a multiply operation to the accumulator 
+;; of the following multiply and accumulate instruction.
+(define_bypass 1 "cortex_a7_mul"
+                 "cortex_a7_mul"
+                 "arm_mac_accumulator_is_result")
+
+;; The latency depends on the operands, so we use an estimate here.
+(define_insn_reservation "cortex_a7_idiv" 5
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "udiv,sdiv"))
+  "cortex_a7_both*5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Address-generation happens in the issue stage. 
+;; Double-word accesses can be issued in a single cycle,
+;; and occupy only one pipeline stage.
+
+(define_insn_reservation "cortex_a7_load1" 2
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "load_byte,load1"))
+  "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_store1" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "store1"))
+  "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_load2" 2
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "load2"))
+  "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_store2" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "store2"))
+  "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_load3" 3
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "load3"))
+  "cortex_a7_both, cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_store3" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "store4"))
+  "cortex_a7_both, cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_load4" 3
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "load4"))
+  "cortex_a7_both, cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_store4" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "store3"))
+  "cortex_a7_both, cortex_a7_both")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point arithmetic.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Neon integer, neon floating point, and single-precision floating
+;; point instructions of the same type have the same timing
+;; characteristics, but neon instructions cannot dual-issue.
+
+(define_insn_reservation "cortex_a7_fpalu" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,\
+                        f_cvt, f_cvtf2i, f_cvti2f, fcmps, fcmpd"))
+  "cortex_a7_ex1+cortex_a7_fpadd_pipe")
+
+;; For fconsts and fconstd, 8-bit immediate data is passed directly from
+;; f1 to f3 (which I think reduces the latency by one cycle).
+
+(define_insn_reservation "cortex_a7_fconst" 3
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fconsts,fconstd"))
+  "cortex_a7_ex1+cortex_a7_fpadd_pipe")
+
+;; We should try not to attempt to issue a single-precision multiplication in
+;; the middle of a double-precision multiplication operation (the usage of
+;; cortex_a7_fpmul_pipe).
+
+(define_insn_reservation "cortex_a7_fpmuls" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fmuls"))
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+(define_insn_reservation "cortex_a7_neon_mul" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "cortex_a7_neon_type" "neon_mul"))
+  "(cortex_a7_both+cortex_a7_fpmul_pipe)*2")
+
+(define_insn_reservation "cortex_a7_fpmacs" 8
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fmacs,ffmas"))
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+(define_insn_reservation "cortex_a7_neon_mla" 8
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "cortex_a7_neon_type" "neon_mla"))
+  "cortex_a7_both+cortex_a7_fpmul_pipe")
+
+(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla"
+                 "cortex_a7_fpmacs,cortex_a7_neon_mla"
+                 "arm_mac_accumulator_is_result")
+
+;; Non-multiply instructions can issue between two cycles of a
+;; double-precision multiply. 
+
+(define_insn_reservation "cortex_a7_fpmuld" 7
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fmuld"))
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3")
+
+(define_insn_reservation "cortex_a7_fpmacd" 11
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fmacd"))
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3")
+
+(define_insn_reservation "cortex_a7_fpfmad" 8
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "ffmad"))
+  "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
+
+(define_bypass 7 "cortex_a7_fpmacd"
+                 "cortex_a7_fpmacd,cortex_a7_fpfmad"
+                 "arm_mac_accumulator_is_result")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point divide/square root instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_fdivs" 16
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13")
+
+(define_insn_reservation "cortex_a7_fdivd" 31
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP to/from core transfers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Core-to-VFP transfers.
+
+(define_insn_reservation "cortex_a7_r2f" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_mcr,f_mcrr"))
+  "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_f2r" 2
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_mrc,f_mrrc"))
+  "cortex_a7_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP flag transfer.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Fuxne: The flag forwarding from fmstat to the second instruction is
+;; not modeled at present.
+
+(define_insn_reservation "cortex_a7_f_flags" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_flag"))
+  "cortex_a7_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_f_loads" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_loads"))
+  "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_f_loadd" 4
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_loadd"))
+  "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_f_stores" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_stores"))
+  "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_f_stored" 0
+  (and (eq_attr "tune" "cortexa7")
+       (eq_attr "type" "f_stored"))
+  "cortex_a7_both")
+
+;; Load-to-use for floating-point values has a penalty of one cycle,
+;; i.e. a latency of two.
+
+(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
+                  "cortex_a7_fpalu,\
+                   cortex_a7_fpmuls,cortex_a7_fpmacs,\
+                   cortex_a7_fpmuld,cortex_a7_fpmacd, cortex_a7_fpfmad,\
+                   cortex_a7_fdivs, cortex_a7_fdivd,\
+		   cortex_a7_f2r")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; NEON
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Simple modeling for all neon instructions not covered earlier.
+
+(define_insn_reservation "cortex_a7_neon" 4
+  (and (eq_attr "tune" "cortexa7")
+       (and (eq_attr "is_neon_type" "yes")
+            (eq_attr "cortex_a7_neon_type" "neon_other")))
+  "cortex_a7_both*2")
diff --git a/gcc-4.9/gcc/config/arm/cortex-a8-neon.md b/gcc-4.9/gcc/config/arm/cortex-a8-neon.md
new file mode 100644
index 000000000..1bb0ab237
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a8-neon.md
@@ -0,0 +1,1534 @@
+;; ARM Cortex-A8 NEON scheduling description.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "cortex_a8_neon_type"
+   "neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs,
+   neon_bit_ops_q,
+   neon_vaba,neon_vaba_qqq, neon_vmov,
+   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32,
+   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,
+   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16,
+   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,
+   neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar,
+   neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,
+   neon_shift_1,neon_shift_2,neon_shift_3,
+   neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd,
+   neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd,
+   neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar,
+   neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd,
+   neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle,
+   neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs,
+   neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4,
+   neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs,
+   neon_vst2_4_regs_vst3_vst4,neon_vld1_vld2_lane,
+   neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane,
+   neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc,
+   neon_ldm_2,neon_stm_2,none,unknown"
+  (cond [
+          (eq_attr "type" "neon_logic, neon_logic_q,\
+                           neon_bsl, neon_cls, neon_cnt,\
+                           neon_add, neon_add_q")
+                          (const_string "neon_int_1")
+          (eq_attr "type" "neon_add_widen, neon_sub_widen,\
+                           neon_sub, neon_sub_q")
+                          (const_string "neon_int_2")
+          (eq_attr "type" "neon_neg, neon_neg_q,\
+                           neon_reduc_add, neon_reduc_add_q,\
+                           neon_reduc_add_long,\
+                           neon_add_long, neon_sub_long")
+                          (const_string "neon_int_3")
+          (eq_attr "type" "neon_abs, neon_abs_q,
+                           neon_compare_zero, neon_compare_zero_q,\
+                           neon_add_halve_narrow_q,\
+                           neon_sub_halve_narrow_q,\
+                           neon_add_halve, neon_add_halve_q,\
+                           neon_qadd, neon_qadd_q,\
+                           neon_tst, neon_tst_q")
+                          (const_string "neon_int_4")
+          (eq_attr "type" "neon_abd_long, neon_sub_halve, neon_sub_halve_q,\
+                           neon_qsub, neon_qsub_q,\
+                           neon_abd, neon_abd_q,\
+                           neon_compare, neon_compare_q,\
+                           neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+                           neon_reduc_minmax_q")
+                          (const_string "neon_int_5")
+          (eq_attr "type" "neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q")
+                           (const_string "neon_vqneg_vqabs")
+          (eq_attr "type" "neon_move, neon_move_q")
+                           (const_string "neon_vmov")
+          (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q")
+                           (const_string "neon_bit_ops_q")
+          (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc")
+                          (const_string "neon_vaba")
+          (eq_attr "type" "neon_arith_acc_q")
+                          (const_string "neon_vaba_qqq")
+          (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+                           neon_shift_imm_long, neon_shift_imm_narrow_q,\
+                           neon_shift_reg")
+                           (const_string "neon_shift_1")
+          (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,
+                           neon_sat_shift_imm_narrow_q,\
+                           neon_sat_shift_reg")
+                           (const_string "neon_shift_2")
+          (eq_attr "type" "neon_shift_reg_q")
+                           (const_string "neon_shift_3")
+          (eq_attr "type" "neon_sat_shift_reg_q")
+                           (const_string "neon_vqshl_vrshl_vqrshl_qqq")
+          (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+                           (const_string "neon_vsra_vrsra")
+          (eq_attr "type" "neon_mul_b, neon_mul_h,\
+                           neon_mul_b_long, neon_mul_h_long,\
+                           neon_sat_mul_b, neon_sat_mul_h,\
+                           neon_sat_mul_b_long, neon_sat_mul_h_long")
+                           (const_string
+                            "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+          (eq_attr "type" "neon_mul_b_q, neon_mul_h_q,\
+                           neon_sat_mul_b_q, neon_sat_mul_h_q")
+                           (const_string "neon_mul_qqq_8_16_32_ddd_32")
+          (eq_attr "type" "neon_mul_s, neon_mul_s_long,\
+                           neon_sat_mul_s, neon_sat_mul_s_long,\
+                           neon_mul_h_scalar_q, neon_sat_mul_h_scalar_q,\
+                           neon_mul_s_scalar, neon_sat_mul_s_scalar,\
+                           neon_mul_s_scalar_long,\
+                           neon_sat_mul_s_scalar_long")
+                           (const_string
+             "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
+          (eq_attr "type" "neon_mla_b, neon_mla_h,\
+                           neon_mla_b_long, neon_mla_h_long,\
+                           neon_sat_mla_b_long, neon_sat_mla_h_long,\
+                           neon_sat_mla_h_scalar_long")
+                           (const_string
+                             "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+          (eq_attr "type" "neon_mla_b_q, neon_mla_h_q")
+                           (const_string "neon_mla_qqq_8_16")
+          (eq_attr "type" "neon_mla_s, neon_mla_s_long,\
+                           neon_sat_mla_s_long,\
+                           neon_mla_h_scalar_q, neon_mla_s_scalar,\
+                           neon_mla_s_scalar_long,\
+                           neon_sat_mla_s_scalar_long")
+                           (const_string
+ "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
+          (eq_attr "type" "neon_mla_s_q, neon_mla_s_scalar_q")
+                           (const_string "neon_mla_qqq_32_qqd_32_scalar")
+          (eq_attr "type" "neon_mul_h_scalar, neon_sat_mul_h_scalar,\
+                           neon_mul_h_scalar_long,\
+                           neon_sat_mul_h_scalar_long")
+                          (const_string
+                            "neon_mul_ddd_16_scalar_32_16_long_scalar")
+          (eq_attr "type" "neon_mul_s_q, neon_sat_mul_s_q,\
+                           neon_mul_s_scalar_q")
+                           (const_string "neon_mul_qqd_32_scalar")
+          (eq_attr "type" "neon_mla_h_scalar, neon_mla_h_scalar_long")
+                           (const_string
+                             "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+          (eq_attr "type" "neon_fp_abd_s, neon_fp_abs_s, neon_fp_neg_s,\
+                           neon_fp_addsub_s, neon_fp_compare_s,\
+                           neon_fp_minmax_s, neon_fp_mul_s,\
+                           neon_fp_recpe_s, neon_fp_rsqrte_s,\
+                           neon_fp_to_int_s, neon_int_to_fp_s")
+                           (const_string "neon_fp_vadd_ddd_vabs_dd")
+          (eq_attr "type" "neon_fp_abd_s_q, neon_fp_abs_s_q,\
+                           neon_fp_neg_s_q,\
+                           neon_fp_addsub_s_q, neon_fp_compare_s_q,\
+                           neon_fp_minmax_s_q, neon_fp_mul_s_q,\
+                           neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+                           neon_fp_to_int_s_q, neon_int_to_fp_s_q")
+                           (const_string "neon_fp_vadd_qqq_vabs_qq")
+          (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_minmax_s,\
+                           neon_fp_reduc_add_s_q, neon_fp_reduc_minmax_s_q")
+                           (const_string "neon_fp_vsum")
+          (eq_attr "type" "neon_fp_mul_s_scalar")
+                           (const_string "neon_fp_vmul_ddd")
+          (eq_attr "type" "neon_fp_mul_s_scalar_q")
+                           (const_string "neon_fp_vmul_qqd")
+          (eq_attr "type" "neon_fp_mla_s")
+                           (const_string "neon_fp_vmla_ddd")
+          (eq_attr "type" "neon_fp_mla_s_q")
+                           (const_string "neon_fp_vmla_qqq")
+          (eq_attr "type" "neon_fp_mla_s_scalar")
+                           (const_string "neon_fp_vmla_ddd_scalar")
+          (eq_attr "type" "neon_fp_mla_s_scalar_q")
+                           (const_string "neon_fp_vmla_qqq_scalar")
+          (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s")
+                           (const_string "neon_fp_vrecps_vrsqrts_ddd")
+          (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q")
+                           (const_string "neon_fp_vrecps_vrsqrts_qqq")
+          (eq_attr "type" "neon_move_narrow_q, neon_dup,\
+                           neon_dup_q, neon_permute, neon_zip,\
+                           neon_ext, neon_rev, neon_rev_q")
+                           (const_string "neon_bp_simple")
+          (eq_attr "type" "neon_permute_q, neon_ext_q, neon_tbl1, neon_tbl2")
+                           (const_string "neon_bp_2cycle")
+          (eq_attr "type" "neon_zip_q, neon_tbl3, neon_tbl4")
+                           (const_string "neon_bp_3cycle")
+          (eq_attr "type" "neon_ldr")
+                           (const_string "neon_ldr")
+          (eq_attr "type" "neon_str")
+                           (const_string "neon_str")
+          (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\
+                           neon_load1_2reg, neon_load1_2reg_q,\
+                           neon_load2_2reg, neon_load2_2reg_q")
+                           (const_string "neon_vld1_1_2_regs")
+          (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+                           neon_load1_4reg, neon_load1_4reg_q")
+                           (const_string "neon_vld1_3_4_regs")
+          (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q,\
+                           neon_load2_all_lanes, neon_load2_all_lanes_q")
+                           (const_string
+                              "neon_vld2_2_regs_vld1_vld2_all_lanes")
+          (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q,\
+                           neon_load4_all_lanes, neon_load4_all_lanes_q,\
+                           neon_load2_4reg, neon_load2_4reg_q")
+                           (const_string "neon_vld2_4_regs")
+          (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q,\
+                           neon_load4_4reg, neon_load4_4reg_q")
+                           (const_string "neon_vld3_vld4")
+          (eq_attr "type" "f_loads, f_loadd, f_stores, f_stored,\
+                           neon_load1_one_lane, neon_load1_one_lane_q,\
+                           neon_load2_one_lane, neon_load2_one_lane_q")
+                           (const_string "neon_vld1_vld2_lane")
+          (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q,\
+                           neon_load4_one_lane, neon_load4_one_lane_q")
+                           (const_string "neon_vld3_vld4_lane")
+          (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\
+                           neon_store1_2reg, neon_store1_2reg_q,\
+                           neon_store2_2reg, neon_store2_2reg_q")
+                           (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+          (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+                           neon_store1_4reg, neon_store1_4reg_q")
+                           (const_string "neon_vst1_3_4_regs")
+          (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\
+                           neon_store3_3reg, neon_store3_3reg_q,\
+                           neon_store4_4reg, neon_store4_4reg_q")
+                           (const_string "neon_vst2_4_regs_vst3_vst4")
+          (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\
+                           neon_store2_one_lane, neon_store2_one_lane_q")
+                           (const_string "neon_vst1_vst2_lane")
+          (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q,\
+                           neon_store4_one_lane, neon_store4_one_lane_q")
+                           (const_string "neon_vst3_vst4_lane")
+          (eq_attr "type" "neon_from_gp, f_mcr")
+                           (const_string "neon_mcr")
+          (eq_attr "type" "neon_from_gp_q, f_mcrr")
+                           (const_string "neon_mcr_2_mcrr")
+          (eq_attr "type" "neon_to_gp, f_mrc")
+                           (const_string "neon_mrc")
+          (eq_attr "type" "neon_to_gp_q, f_mrrc")
+                           (const_string "neon_mrrc")]
+          (const_string "unknown")))
+
+(define_automaton "cortex_a8_neon")
+
+;; Only one load, store, permute, MCR or MRC instruction can be issued
+;; per cycle.
+(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon")
+
+;; Only one data-processing instruction can be issued per cycle.
+(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon")
+
+;; The VFPLite unit (non-pipelined).
+(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon")
+
+;; We need a special mutual exclusion (to be used in addition to
+;; cortex_a8_neon_issue_dp) for the case when an instruction such as
+;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
+;; E2 of the floating-point add pipeline.  On the cycle previous to that
+;; forward we must prevent issue of any instruction to the floating-point
+;; add pipeline, but still allow issue of a data-processing instruction
+;; to any of the other pipelines.
+(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon")
+
+;; Patterns of reservation.
+;; We model the NEON issue units as running in parallel with the core ones.
+;; We assume that multi-cycle NEON instructions get decomposed into
+;; micro-ops as they are issued into the NEON pipeline, and not as they
+;; are issued into the ARM pipeline.  Dual issue may not occur except
+;; upon the first and last cycles of a multi-cycle instruction, but it
+;; is unclear whether two multi-cycle instructions can issue together (in
+;; this model they cannot).  It is also unclear whether a pair of
+;; a multi-cycle and single-cycle instructions, that could potentially
+;; issue together, only do so if (say) the single-cycle one precedes
+;; the other.
+
+(define_reservation "cortex_a8_neon_dp"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp")
+(define_reservation "cortex_a8_neon_dp_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                     cortex_a8_neon_issue_dp")
+(define_reservation "cortex_a8_neon_dp_4"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp")
+
+(define_reservation "cortex_a8_neon_fadd"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_fadd")
+(define_reservation "cortex_a8_neon_fadd_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_fadd,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd")
+
+(define_reservation "cortex_a8_neon_perm"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_perm_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_perm_3"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+
+(define_reservation "cortex_a8_neon_ls"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_2"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_3"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_4"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_5"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+
+(define_reservation "cortex_a8_neon_fmul_then_fadd"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+		     nothing*3,\
+		     cortex_a8_neon_issue_fadd")
+(define_reservation "cortex_a8_neon_fmul_then_fadd_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+		     cortex_a8_neon_issue_dp,\
+		     nothing*2,\
+		     cortex_a8_neon_issue_fadd,\
+		     cortex_a8_neon_issue_fadd")
+
+;; VFP instructions can only be single-issued into the NEON pipeline.
+(define_reservation "cortex_a8_vfp"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_perm+cortex_a8_vfplite")
+
+;; VFP instructions.
+;; The VFPLite unit that executes these isn't pipelined; we give the
+;; worst-case latencies (and choose the double-precision ones where we
+;; do not distinguish on precision).  We assume RunFast mode is not
+;; enabled and therefore do not model the possible VFP instruction
+;; execution in the NEON floating point pipelines, nor additional
+;; latencies for the processing of subnormals.
+;;
+;; TODO: RunFast mode could potentially be enabled when -ffast-math
+;; is specified.
+
+(define_insn_reservation "cortex_a8_vfp_add_sub" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*9")
+
+(define_insn_reservation "cortex_a8_vfp_muls" 12
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmuls"))
+  "cortex_a8_vfp,cortex_a8_vfplite*11")
+
+(define_insn_reservation "cortex_a8_vfp_muld" 17
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmuld"))
+  "cortex_a8_vfp,cortex_a8_vfplite*16")
+
+(define_insn_reservation "cortex_a8_vfp_macs" 21
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmacs,ffmas"))
+  "cortex_a8_vfp,cortex_a8_vfplite*20")
+
+(define_insn_reservation "cortex_a8_vfp_macd" 26
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmacd,ffmad"))
+  "cortex_a8_vfp,cortex_a8_vfplite*25")
+
+(define_insn_reservation "cortex_a8_vfp_divs" 37
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "cortex_a8_vfp,cortex_a8_vfplite*36")
+
+(define_insn_reservation "cortex_a8_vfp_divd" 65
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*64")
+
+;; Comparisons can actually take 7 cycles sometimes instead of four,
+;; but given all the other instructions lumped into type=ffarith that
+;; take four cycles, we pick that latency.
+(define_insn_reservation "cortex_a8_vfp_farith" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*3")
+
+(define_insn_reservation "cortex_a8_vfp_cvt" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+  "cortex_a8_vfp,cortex_a8_vfplite*6")
+
+;; NEON -> core transfers.
+
+(define_insn_reservation "cortex_a8_neon_mrc" 20
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mrc"))
+  "cortex_a8_neon_ls")
+
+(define_insn_reservation "cortex_a8_neon_mrrc" 21
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mrrc"))
+  "cortex_a8_neon_ls_2")
+
+;; Arithmetic Operations
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_int_1" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_int_1"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_int_2" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_int_2"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_int_3" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_int_3"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_int_4" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_int_4"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_int_5" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_int_5"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vqneg_vqabs"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_vmov" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vmov"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_vaba" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vaba"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vaba_qqq"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_bit_ops_q" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_bit_ops_q"))
+  "cortex_a8_neon_dp_2")
+
+;; Integer Multiply/Accumulate Operations
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type"
+         "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type"
+            "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type"
+                  "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_8_16"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type"
+ "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+  "cortex_a8_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type"
+                  "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mul_qqd_32_scalar"))
+  "cortex_a8_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type"
+                  "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+  "cortex_a8_neon_dp")
+
+;; Shift Operations
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_shift_1" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_shift_1"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_shift_2" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_shift_2"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_shift_3" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_shift_3"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vsra_vrsra"))
+  "cortex_a8_neon_dp")
+
+;; Floating point Operations
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+ "cortex_a8_neon_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+  "cortex_a8_neon_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a8_neon_fp_vsum" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vsum"))
+  "cortex_a8_neon_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5.
+(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_ddd"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_qqd"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd_scalar"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq_scalar"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Permute operations.
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2.
+(define_insn_reservation "cortex_a8_neon_bp_simple" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_bp_simple"))
+  "cortex_a8_neon_perm")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_bp_2cycle"))
+  "cortex_a8_neon_perm_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_bp_3cycle"))
+  "cortex_a8_neon_perm_3")
+
+;; Load Operations.
+
+;; Instructions using this reservation produce a result at N1.
+(define_insn_reservation "cortex_a8_neon_ldr" 1
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_ldr"))
+  "cortex_a8_neon_ls")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_str" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_str"))
+  "cortex_a8_neon_ls")
+
+;; Instructions using this reservation produce a result at N1 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld1_1_2_regs"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N1 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld1_3_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld2_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 4.
+(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4"))
+  "cortex_a8_neon_ls_4")
+
+;; Store operations.
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vst1_3_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vst2_4_regs_vst3_vst4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld1_vld2_lane"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 5.
+(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4_lane"))
+  "cortex_a8_neon_ls_5")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vst1_vst2_lane"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_vst3_vst4_lane"))
+  "cortex_a8_neon_ls_3")
+
+;; Register Transfer Operations
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a8_neon_mcr" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mcr"))
+  "cortex_a8_neon_perm")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "cortex_a8_neon_type" "neon_mcr_2_mcrr"))
+  "cortex_a8_neon_perm_2")
+
+;; Exceptions to the default latencies.
+
+(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a8_neon_mcr"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_vld3_vld4"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_vld2_4_regs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "cortex_a8_neon_ldr"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_bp_3cycle"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_bp_2cycle"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a8_neon_bp_simple"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_fp_vsum"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vsra_vrsra"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_shift_3"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_shift_2"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_shift_1"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_vaba_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vaba"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_bit_ops_q"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_vqneg_vqabs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_int_5"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_int_4"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_int_3"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_int_2"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_int_1"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_bit_ops_q,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-a8.md b/gcc-4.9/gcc/config/arm/cortex-a8.md
new file mode 100644
index 000000000..b272472e0
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a8.md
@@ -0,0 +1,279 @@
+;; ARM Cortex-A8 scheduling description.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a8")
+
+;; Only one load/store instruction can be issued per cycle
+;; (although reservation of this unit is only required for single
+;; loads and stores -- see below).
+(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8")
+
+;; Only one branch instruction can be issued per cycle.
+(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8")
+
+;; The two ALU pipelines.
+(define_cpu_unit "cortex_a8_alu0" "cortex_a8")
+(define_cpu_unit "cortex_a8_alu1" "cortex_a8")
+
+;; The usual flow of an instruction through the pipelines.
+(define_reservation "cortex_a8_default"
+                    "cortex_a8_alu0|cortex_a8_alu1")
+
+;; The flow of a branch instruction through the pipelines.
+(define_reservation "cortex_a8_branch"
+                    "(cortex_a8_alu0+cortex_a8_issue_branch)|\
+                     (cortex_a8_alu1+cortex_a8_issue_branch)")
+
+;; The flow of a load or store instruction through the pipeline in
+;; the case where that instruction consists of only one micro-op...
+(define_reservation "cortex_a8_load_store_1"
+                    "(cortex_a8_alu0+cortex_a8_issue_ls)|\
+                     (cortex_a8_alu1+cortex_a8_issue_ls)")
+
+;; ...and in the case of two micro-ops.  Dual issue is altogether forbidden
+;; during the issue cycle of the first micro-op.  (Instead of modelling
+;; a separate issue unit, we instead reserve alu0 and alu1 to
+;; prevent any other instructions from being issued upon that first cycle.)
+;; Even though the load/store pipeline is usually available in either
+;; ALU pipe, multi-cycle instructions always issue in pipeline 0.
+(define_reservation "cortex_a8_load_store_2"
+                    "cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\
+                     cortex_a8_alu0+cortex_a8_issue_ls")
+
+;; The flow of a single-cycle multiplication.
+(define_reservation "cortex_a8_multiply"
+                    "cortex_a8_alu0")
+
+;; The flow of a multiplication instruction that gets decomposed into
+;; two micro-ops.  The two micro-ops will be issued to pipeline 0 on
+;; successive cycles.  Dual issue cannot happen at the same time as the
+;; first of the micro-ops.
+(define_reservation "cortex_a8_multiply_2"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; Similarly, the flow of a multiplication instruction that gets
+;; decomposed into three micro-ops.  Dual issue cannot occur except on
+;; the cycle upon which the third micro-op is issued.
+(define_reservation "cortex_a8_multiply_3"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; The model given here assumes that all instructions are unconditional.
+
+;; Data processing instructions, but not move instructions.
+
+;; We include CLZ with these since it has the same execution pattern
+;; (source read in E2 and destination available at the end of that cycle).
+(define_insn_reservation "cortex_a8_alu" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,clz,rbit,rev,\
+                        shift_imm,shift_reg,\
+                        multiple,no_insn"))
+  "cortex_a8_default")
+
+(define_insn_reservation "cortex_a8_alu_shift" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        extend"))
+  "cortex_a8_default")
+
+(define_insn_reservation "cortex_a8_alu_shift_reg" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg"))
+  "cortex_a8_default")
+
+;; Move instructions.
+
+(define_insn_reservation "cortex_a8_mov" 1
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\
+                        mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\
+                        mrs"))
+  "cortex_a8_default")
+
+;; Exceptions to the default latencies for data processing instructions.
+
+;; A move followed by an ALU instruction with no early dep.
+;; (Such a pair can be issued in parallel, hence latency zero.)
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu")
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu")
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Multiplication instructions.  These are categorized according to their
+;; reservation behavior and the need below to distinguish certain
+;; varieties for bypasses.  Results are available at the E5 stage
+;; (but some of these are multi-cycle instructions which explains the
+;; latencies below).
+
+(define_insn_reservation "cortex_a8_mul" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "mul,smulxy,smmul"))
+  "cortex_a8_multiply_2")
+
+(define_insn_reservation "cortex_a8_mla" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
+  "cortex_a8_multiply_2")
+
+(define_insn_reservation "cortex_a8_mull" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy"))
+  "cortex_a8_multiply_3")
+
+(define_insn_reservation "cortex_a8_smulwy" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "smulwy,smuad,smusd"))
+  "cortex_a8_multiply")
+
+;; smlald and smlsld are multiply-accumulate instructions but do not
+;; received bypassed data from other multiplication results; thus, they
+;; cannot go in cortex_a8_mla above.  (See below for bypass details.)
+(define_insn_reservation "cortex_a8_smlald" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "smlald,smlsld"))
+  "cortex_a8_multiply_2")
+
+;; A multiply with a single-register result or an MLA, followed by an
+;; MLA with an accumulator dependency, has its result forwarded so two
+;; such instructions can issue back-to-back.
+(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy"
+               "cortex_a8_mla"
+               "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at E2 has lower latency than one needing it at E1.
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu")
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Load instructions.
+;; The presence of any register writeback is ignored here.
+
+;; A load result has latency 3 unless the dependent instruction has
+;; no early dep, in which case it is only latency two.
+;; We assume 64-bit alignment for doubleword loads.
+(define_insn_reservation "cortex_a8_load1_2" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "load1,load2,load_byte"))
+  "cortex_a8_load_store_1")
+
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu")
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; We do not currently model the fact that loads with scaled register
+;; offsets that are not LSL #2 have an extra cycle latency (they issue
+;; as two micro-ops).
+
+;; A load multiple of three registers is usually issued as two micro-ops.
+;; The first register will be available at E3 of the first iteration,
+;; the second at E3 of the second iteration, and the third at E4 of
+;; the second iteration.  A load multiple of four registers is usually
+;; issued as two micro-ops.
+(define_insn_reservation "cortex_a8_load3_4" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "load3,load4"))
+  "cortex_a8_load_store_2")
+
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu")
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Store instructions.
+;; Writeback is again ignored.
+
+(define_insn_reservation "cortex_a8_store1_2" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "store1,store2"))
+  "cortex_a8_load_store_1")
+
+(define_insn_reservation "cortex_a8_store3_4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "store3,store4"))
+  "cortex_a8_load_store_2")
+
+;; An ALU instruction acting as a producer for a store instruction
+;; that only uses the result as the value to be stored (as opposed to
+;; using it to calculate the address) has latency zero; the store
+;; reads the value to be stored at the start of E3 and the ALU insn
+;; writes it at the end of E2.  Move instructions actually produce the
+;; result at the end of E1, but since we don't have delay slots, the
+;; scheduling behavior will be the same.
+(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\
+                  cortex_a8_alu_shift_reg,cortex_a8_mov"
+               "cortex_a8_store1_2,cortex_a8_store3_4"
+               "arm_no_early_store_addr_dep")
+
+;; Branch instructions
+
+(define_insn_reservation "cortex_a8_branch" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "branch"))
+  "cortex_a8_branch")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_a8_call" 32
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "call"))
+  "cortex_a8_issue_branch")
+
+;; NEON (including VFP) instructions.
+
+(include "cortex-a8-neon.md")
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-a9-neon.md b/gcc-4.9/gcc/config/arm/cortex-a9-neon.md
new file mode 100644
index 000000000..3ff93f924
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a9-neon.md
@@ -0,0 +1,1471 @@
+;; ARM Cortex-A9 pipeline description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;;
+;; Neon pipeline description contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "cortex_a9_neon_type"
+   "neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs,
+   neon_bit_ops_q,
+   neon_vaba,neon_vaba_qqq, neon_vmov,
+   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32,
+   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,
+   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16,
+   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,
+   neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar,
+   neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,
+   neon_shift_1,neon_shift_2,neon_shift_3,
+   neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd,
+   neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd,
+   neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar,
+   neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd,
+   neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle,
+   neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs,
+   neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4,
+   neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs,
+   neon_vst2_4_regs_vst3_vst4,neon_vld1_vld2_lane,
+   neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane,
+   neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc,
+   neon_ldm_2,neon_stm_2,none,unknown"
+  (cond [
+          (eq_attr "type" "neon_logic, neon_logic_q,\
+                           neon_bsl, neon_cls, neon_cnt,\
+                           neon_add, neon_add_q")
+                          (const_string "neon_int_1")
+          (eq_attr "type" "neon_add_widen, neon_sub_widen,\
+                           neon_sub, neon_sub_q")
+                          (const_string "neon_int_2")
+          (eq_attr "type" "neon_neg, neon_neg_q,\
+                           neon_reduc_add, neon_reduc_add_q,\
+                           neon_reduc_add_long,\
+                           neon_add_long, neon_sub_long")
+                          (const_string "neon_int_3")
+          (eq_attr "type" "neon_abs, neon_abs_q,
+                           neon_compare_zero, neon_compare_zero_q,\
+                           neon_add_halve_narrow_q,\
+                           neon_sub_halve_narrow_q,\
+                           neon_add_halve, neon_add_halve_q,\
+                           neon_qadd, neon_qadd_q,\
+                           neon_tst, neon_tst_q")
+                          (const_string "neon_int_4")
+          (eq_attr "type" "neon_abd_long, neon_sub_halve, neon_sub_halve_q,\
+                           neon_qsub, neon_qsub_q,\
+                           neon_abd, neon_abd_q,\
+                           neon_compare, neon_compare_q,\
+                           neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+                           neon_reduc_minmax_q")
+                          (const_string "neon_int_5")
+          (eq_attr "type" "neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q")
+                           (const_string "neon_vqneg_vqabs")
+          (eq_attr "type" "neon_move, neon_move_q")
+                           (const_string "neon_vmov")
+          (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q")
+                           (const_string "neon_bit_ops_q")
+          (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc")
+                          (const_string "neon_vaba")
+          (eq_attr "type" "neon_arith_acc_q")
+                          (const_string "neon_vaba_qqq")
+          (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+                           neon_shift_imm_long, neon_shift_imm_narrow_q,\
+                           neon_shift_reg")
+                           (const_string "neon_shift_1")
+          (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,
+                           neon_sat_shift_imm_narrow_q,\
+                           neon_sat_shift_reg")
+                           (const_string "neon_shift_2")
+          (eq_attr "type" "neon_shift_reg_q")
+                           (const_string "neon_shift_3")
+          (eq_attr "type" "neon_sat_shift_reg_q")
+                           (const_string "neon_vqshl_vrshl_vqrshl_qqq")
+          (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+                           (const_string "neon_vsra_vrsra")
+          (eq_attr "type" "neon_mul_b, neon_mul_h,\
+                           neon_mul_b_long, neon_mul_h_long,\
+                           neon_sat_mul_b, neon_sat_mul_h,\
+                           neon_sat_mul_b_long, neon_sat_mul_h_long")
+                           (const_string
+                            "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+          (eq_attr "type" "neon_mul_b_q, neon_mul_h_q,\
+                           neon_sat_mul_b_q, neon_sat_mul_h_q")
+                           (const_string "neon_mul_qqq_8_16_32_ddd_32")
+          (eq_attr "type" "neon_mul_s, neon_mul_s_long,\
+                           neon_sat_mul_s, neon_sat_mul_s_long,\
+                           neon_mul_h_scalar_q, neon_sat_mul_h_scalar_q,\
+                           neon_mul_s_scalar, neon_sat_mul_s_scalar,\
+                           neon_mul_s_scalar_long,\
+                           neon_sat_mul_s_scalar_long")
+                           (const_string
+             "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
+          (eq_attr "type" "neon_mla_b, neon_mla_h,\
+                           neon_mla_b_long, neon_mla_h_long,\
+                           neon_sat_mla_b_long, neon_sat_mla_h_long,\
+                           neon_sat_mla_h_scalar_long")
+                           (const_string
+                             "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+          (eq_attr "type" "neon_mla_b_q, neon_mla_h_q")
+                           (const_string "neon_mla_qqq_8_16")
+          (eq_attr "type" "neon_mla_s, neon_mla_s_long,\
+                           neon_sat_mla_s_long,\
+                           neon_mla_h_scalar_q, neon_mla_s_scalar,\
+                           neon_mla_s_scalar_long,\
+                           neon_sat_mla_s_scalar_long")
+                           (const_string
+ "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
+          (eq_attr "type" "neon_mla_s_q, neon_mla_s_scalar_q")
+                           (const_string "neon_mla_qqq_32_qqd_32_scalar")
+          (eq_attr "type" "neon_mul_h_scalar, neon_sat_mul_h_scalar,\
+                           neon_mul_h_scalar_long,\
+                           neon_sat_mul_h_scalar_long")
+                          (const_string
+                            "neon_mul_ddd_16_scalar_32_16_long_scalar")
+          (eq_attr "type" "neon_mul_s_q, neon_sat_mul_s_q,\
+                           neon_mul_s_scalar_q")
+                           (const_string "neon_mul_qqd_32_scalar")
+          (eq_attr "type" "neon_mla_h_scalar, neon_mla_h_scalar_long")
+                           (const_string
+                             "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+          (eq_attr "type" "neon_fp_abd_s, neon_fp_abs_s, neon_fp_neg_s,\
+                           neon_fp_addsub_s, neon_fp_compare_s,\
+                           neon_fp_minmax_s, neon_fp_mul_s,\
+                           neon_fp_recpe_s, neon_fp_rsqrte_s,\
+                           neon_fp_to_int_s, neon_int_to_fp_s")
+                           (const_string "neon_fp_vadd_ddd_vabs_dd")
+          (eq_attr "type" "neon_fp_abd_s_q, neon_fp_abs_s_q,\
+                           neon_fp_neg_s_q,\
+                           neon_fp_addsub_s_q, neon_fp_compare_s_q,\
+                           neon_fp_minmax_s_q, neon_fp_mul_s_q,\
+                           neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+                           neon_fp_to_int_s_q, neon_int_to_fp_s_q")
+                           (const_string "neon_fp_vadd_qqq_vabs_qq")
+          (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_minmax_s,\
+                           neon_fp_reduc_add_s_q, neon_fp_reduc_minmax_s_q")
+                           (const_string "neon_fp_vsum")
+          (eq_attr "type" "neon_fp_mul_s_scalar")
+                           (const_string "neon_fp_vmul_ddd")
+          (eq_attr "type" "neon_fp_mul_s_scalar_q")
+                           (const_string "neon_fp_vmul_qqd")
+          (eq_attr "type" "neon_fp_mla_s")
+                           (const_string "neon_fp_vmla_ddd")
+          (eq_attr "type" "neon_fp_mla_s_q")
+                           (const_string "neon_fp_vmla_qqq")
+          (eq_attr "type" "neon_fp_mla_s_scalar")
+                           (const_string "neon_fp_vmla_ddd_scalar")
+          (eq_attr "type" "neon_fp_mla_s_scalar_q")
+                           (const_string "neon_fp_vmla_qqq_scalar")
+          (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s")
+                           (const_string "neon_fp_vrecps_vrsqrts_ddd")
+          (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q")
+                           (const_string "neon_fp_vrecps_vrsqrts_qqq")
+          (eq_attr "type" "neon_move_narrow_q, neon_dup,\
+                           neon_dup_q, neon_permute, neon_zip,\
+                           neon_ext, neon_rev, neon_rev_q")
+                           (const_string "neon_bp_simple")
+          (eq_attr "type" "neon_permute_q, neon_ext_q, neon_tbl1, neon_tbl2")
+                           (const_string "neon_bp_2cycle")
+          (eq_attr "type" "neon_zip_q, neon_tbl3, neon_tbl4")
+                           (const_string "neon_bp_3cycle")
+          (eq_attr "type" "neon_ldr")
+                           (const_string "neon_ldr")
+          (eq_attr "type" "neon_str")
+                           (const_string "neon_str")
+          (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\
+                           neon_load1_2reg, neon_load1_2reg_q,\
+                           neon_load2_2reg, neon_load2_2reg_q")
+                           (const_string "neon_vld1_1_2_regs")
+          (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+                           neon_load1_4reg, neon_load1_4reg_q")
+                           (const_string "neon_vld1_3_4_regs")
+          (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q,\
+                           neon_load2_all_lanes, neon_load2_all_lanes_q")
+                           (const_string
+                              "neon_vld2_2_regs_vld1_vld2_all_lanes")
+          (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q,\
+                           neon_load4_all_lanes, neon_load4_all_lanes_q,\
+                           neon_load2_4reg, neon_load2_4reg_q")
+                           (const_string "neon_vld2_4_regs")
+          (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q,\
+                           neon_load4_4reg, neon_load4_4reg_q")
+                           (const_string "neon_vld3_vld4")
+          (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+                           neon_load2_one_lane, neon_load2_one_lane_q")
+                           (const_string "neon_vld1_vld2_lane")
+          (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q,\
+                           neon_load4_one_lane, neon_load4_one_lane_q")
+                           (const_string "neon_vld3_vld4_lane")
+          (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\
+                           neon_store1_2reg, neon_store1_2reg_q,\
+                           neon_store2_2reg, neon_store2_2reg_q")
+                           (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+          (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+                           neon_store1_4reg, neon_store1_4reg_q")
+                           (const_string "neon_vst1_3_4_regs")
+          (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\
+                           neon_store3_3reg, neon_store3_3reg_q,\
+                           neon_store4_4reg, neon_store4_4reg_q")
+                           (const_string "neon_vst2_4_regs_vst3_vst4")
+          (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\
+                           neon_store2_one_lane, neon_store2_one_lane_q")
+                           (const_string "neon_vst1_vst2_lane")
+          (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q,\
+                           neon_store4_one_lane, neon_store4_one_lane_q")
+                           (const_string "neon_vst3_vst4_lane")
+          (eq_attr "type" "neon_from_gp")
+                           (const_string "neon_mcr")
+          (eq_attr "type" "neon_from_gp_q")
+                           (const_string "neon_mcr_2_mcrr")
+          (eq_attr "type" "neon_to_gp")
+                           (const_string "neon_mrc")
+          (eq_attr "type" "neon_to_gp_q")
+                           (const_string "neon_mrrc")]
+          (const_string "unknown")))
+
+(define_automaton "cortex_a9_neon")
+
+;; Only one instruction can be issued per cycle.
+(define_cpu_unit "cortex_a9_neon_issue_perm" "cortex_a9_neon")
+
+;; Only one data-processing instruction can be issued per cycle.
+(define_cpu_unit "cortex_a9_neon_issue_dp" "cortex_a9_neon")
+
+;; We need a special mutual exclusion (to be used in addition to
+;; cortex_a9_neon_issue_dp) for the case when an instruction such as
+;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
+;; E2 of the floating-point add pipeline.  On the cycle previous to that
+;; forward we must prevent issue of any instruction to the floating-point
+;; add pipeline, but still allow issue of a data-processing instruction
+;; to any of the other pipelines.
+(define_cpu_unit "cortex_a9_neon_issue_fadd" "cortex_a9_neon")
+(define_cpu_unit "cortex_a9_neon_mcr" "cortex_a9_neon")
+
+
+;; Patterns of reservation.
+;; We model the NEON issue units as running in parallel with the core ones.
+;; We assume that multi-cycle NEON instructions get decomposed into
+;; micro-ops as they are issued into the NEON pipeline.
+
+(define_reservation "cortex_a9_neon_dp"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp")
+(define_reservation "cortex_a9_neon_dp_2"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+                     cortex_a9_neon_issue_dp")
+(define_reservation "cortex_a9_neon_dp_4"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+                     cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp")
+
+(define_reservation "cortex_a9_neon_fadd"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp +  \
+                     cortex_a9_neon_issue_fadd")
+(define_reservation "cortex_a9_neon_fadd_2"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+                     cortex_a9_neon_issue_fadd,\
+                     cortex_a9_neon_issue_dp")
+
+(define_reservation "cortex_a9_neon_perm"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_perm_2"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,  \
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_perm_3"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+
+(define_reservation "cortex_a9_neon_ls"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm+cortex_a9_ls")
+(define_reservation "cortex_a9_neon_ls_2"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_ls_3"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_ls_4"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_ls_5"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+
+(define_reservation "cortex_a9_neon_fmul_then_fadd"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+		     nothing*3,\
+		     cortex_a9_neon_issue_fadd")
+(define_reservation "cortex_a9_neon_fmul_then_fadd_2"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+		     cortex_a9_neon_issue_dp,\
+		     nothing*2,\
+		     cortex_a9_neon_issue_fadd,\
+		     cortex_a9_neon_issue_fadd")
+
+;; NEON -> core transfers.
+(define_insn_reservation "ca9_neon_mrc" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mrc"))
+  "ca9_issue_vfp_neon + cortex_a9_neon_mcr")
+
+(define_insn_reservation "ca9_neon_mrrc" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mrrc"))
+  "ca9_issue_vfp_neon + cortex_a9_neon_mcr")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_int_1" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_int_1"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_int_2" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_int_2"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_int_3" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_int_3"))
+   "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_int_4" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_int_4"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_int_5" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_int_5"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vqneg_vqabs"))
+   "cortex_a9_neon_dp")
+
+;; Instructions using this reservation produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_vmov" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vmov"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_vaba" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vaba"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vaba_qqq" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vaba_qqq"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_bit_ops_q" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_bit_ops_q"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mla_qqq_8_16"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+  "cortex_a9_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mul_qqd_32_scalar"))
+  "cortex_a9_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_shift_1" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_shift_1"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_shift_2" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_shift_2"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_shift_3" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_shift_3"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vsra_vrsra"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+  "cortex_a9_neon_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+  "cortex_a9_neon_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a9_neon_fp_vsum" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vsum"))
+  "cortex_a9_neon_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5.
+(define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vmul_ddd"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vmul_qqd"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_ddd"))
+  "cortex_a9_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_qqq"))
+  "cortex_a9_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_ddd_scalar"))
+  "cortex_a9_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_qqq_scalar"))
+  "cortex_a9_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+  "cortex_a9_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
+  "cortex_a9_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2.
+(define_insn_reservation "cortex_a9_neon_bp_simple" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_bp_simple"))
+  "cortex_a9_neon_perm")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_bp_2cycle" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_bp_2cycle"))
+  "cortex_a9_neon_perm_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_bp_3cycle" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_bp_3cycle"))
+  "cortex_a9_neon_perm_3")
+
+;; Instructions using this reservation produce a result at N1.
+(define_insn_reservation "cortex_a9_neon_ldr" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_ldr"))
+  "cortex_a9_neon_ls")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_str" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_str"))
+  "cortex_a9_neon_ls")
+
+;; Instructions using this reservation produce a result at N1 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld1_1_2_regs"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N1 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld1_3_4_regs"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld2_4_regs"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 4.
+(define_insn_reservation "cortex_a9_neon_vld3_vld4" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld3_vld4"))
+  "cortex_a9_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vst1_3_4_regs"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vst2_4_regs_vst3_vst4"))
+  "cortex_a9_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld1_vld2_lane"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 5.
+(define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld3_vld4_lane"))
+  "cortex_a9_neon_ls_5")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vst1_vst2_lane"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vst3_vst4_lane"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_vld3_vld4_all_lanes"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a9_neon_mcr" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mcr"))
+  "cortex_a9_neon_perm")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "cortex_a9_neon_type" "neon_mcr_2_mcrr"))
+  "cortex_a9_neon_perm_2")
+
+;; Exceptions to the default latencies.
+
+(define_bypass 1 "cortex_a9_neon_mcr_2_mcrr"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a9_neon_mcr"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vld3_vld4_all_lanes"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vld3_vld4_lane"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_vld1_vld2_lane"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_vld3_vld4"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_vld2_4_regs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vld1_3_4_regs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a9_neon_vld1_1_2_regs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "cortex_a9_neon_ldr"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_bp_3cycle"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_bp_2cycle"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a9_neon_bp_simple"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a9_neon_fp_vrecps_vrsqrts_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_fp_vrecps_vrsqrts_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_fp_vmul_qqd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_fp_vmul_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_fp_vsum"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_fp_vadd_qqq_vabs_qq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_fp_vadd_ddd_vabs_dd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vsra_vrsra"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_shift_3"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_shift_2"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_shift_1"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_mul_qqd_32_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_mla_qqq_32_qqd_32_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mla_qqq_8_16"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mul_qqq_8_16_32_ddd_32"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_vaba_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vaba"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vmov"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_bit_ops_q"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_vqneg_vqabs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_int_5"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_int_4"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_int_3"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_int_2"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_int_1"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_bit_ops_q,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-a9.md b/gcc-4.9/gcc/config/arm/cortex-a9.md
new file mode 100644
index 000000000..a888896c5
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-a9.md
@@ -0,0 +1,283 @@
+;; ARM Cortex-A9 pipeline description
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Originally written by CodeSourcery for VFP.
+;;
+;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+;; Integer Pipeline description contributed by ARM Ltd.
+;; VFP Pipeline description rewritten and contributed by ARM Ltd.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a9")
+
+;; The Cortex-A9 core is modelled as a dual issue pipeline that has
+;; the following components.
+;; 1. 1 Load Store Pipeline.
+;; 2. P0 / main pipeline for data processing instructions.
+;; 3. P1 / Dual pipeline for Data processing instructions.
+;; 4. MAC pipeline for multiply as well as multiply
+;;    and accumulate instructions.
+;; 5. 1 VFP and an optional Neon unit.
+;; The Load/Store, VFP and Neon issue pipeline are multiplexed.
+;; The P0 / main pipeline and M1 stage of the MAC pipeline are
+;;   multiplexed.
+;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
+;;   multiplexed.
+;; There are only 4 integer register read ports and hence at any point of
+;; time we can't have issue down the E1 and the E2 ports unless
+;; of course there are bypass paths that get exercised.
+;; Both P0 and P1 have 2 stages E1 and E2.
+;; Data processing instructions issue to E1 or E2 depending on
+;; whether they have an early shift or not.
+
+(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")
+(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
+(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
+(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
+(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9")
+(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9")
+
+(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb")
+(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default")
+(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default")
+
+(define_reservation "cortex_a9_multcycle1"
+  "cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \
+cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
+
+(define_reservation "cortex_a9_mult16"
+  "cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mac16"
+  "cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mult"
+  "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mac"
+  "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mult_long"
+  "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")
+
+;; Issue at the same time along the load store pipeline and
+;; the VFP / Neon pipeline is not possible.
+(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")
+
+;; Default data processing instruction without any shift
+;; The only exception to this is the mov instruction
+;; which can go down E2 without any problem.
+(define_insn_reservation "cortex_a9_dp" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mov_shift_reg,mov_shift,\
+                        mrs,multiple,no_insn"))
+  "cortex_a9_p0_default|cortex_a9_p1_default")
+
+;; An instruction using the shifter will go down E1.
+(define_insn_reservation "cortex_a9_dp_shift" 3
+   (and (eq_attr "tune" "cortexa9")
+        (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                         logic_shift_imm,logics_shift_imm,\
+                         alu_shift_reg,alus_shift_reg,\
+                         logic_shift_reg,logics_shift_reg,\
+                         extend,mvn_shift,mvn_shift_reg"))
+   "cortex_a9_p0_shift | cortex_a9_p1_shift")
+
+;; Loads have a latency of 4 cycles.
+;; We don't model autoincrement instructions. These
+;; instructions use the load store pipeline and 1 of
+;; the E2 units to write back the result of the increment.
+
+(define_insn_reservation "cortex_a9_load1_2" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
+  "cortex_a9_ls")
+
+;; Loads multiples and store multiples can't be issued for 2 cycles in a
+;; row. The description below assumes that addresses are 64 bit aligned.
+;; If not, there is an extra cycle latency which is not modelled.
+
+(define_insn_reservation "cortex_a9_load3_4" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "load3, load4"))
+  "cortex_a9_ls, cortex_a9_ls")
+
+(define_insn_reservation "cortex_a9_store1_2" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "store1, store2, f_stores, f_stored"))
+  "cortex_a9_ls")
+
+;; Almost all our store multiples use an auto-increment
+;; form. Don't issue back to back load and store multiples
+;; because the load store unit will stall.
+
+(define_insn_reservation "cortex_a9_store3_4" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "store3, store4"))
+  "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls")
+
+;; We get 16*16 multiply / mac results in 3 cycles.
+(define_insn_reservation "cortex_a9_mult16" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "smulxy"))
+       "cortex_a9_mult16")
+
+;; The 16*16 mac is slightly different that it
+;; reserves M1 and M2 in the same cycle.
+(define_insn_reservation "cortex_a9_mac16" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "smlaxy"))
+  "cortex_a9_mac16")
+
+(define_insn_reservation "cortex_a9_multiply" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "mul,smmul,smmulr"))
+       "cortex_a9_mult")
+
+(define_insn_reservation "cortex_a9_mac" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "mla,smmla"))
+       "cortex_a9_mac")
+
+(define_insn_reservation "cortex_a9_multiply_long" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
+       "cortex_a9_mult_long")
+
+;; An instruction with a result in E2 can be forwarded
+;; to E2 or E1 or M1 or the load store unit in the next cycle.
+
+(define_bypass 1 "cortex_a9_dp"
+                 "cortex_a9_dp_shift, cortex_a9_multiply,
+ cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, 
+ cortex_a9_multiply_long")
+
+(define_bypass 2 "cortex_a9_dp_shift"
+                 "cortex_a9_dp_shift, cortex_a9_multiply,
+ cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
+ cortex_a9_multiply_long")
+
+;; An instruction in the load store pipeline can provide
+;; read access to a DP instruction in the P0 default pipeline
+;; before the writeback stage.
+
+(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2,
+cortex_a9_store3_4, cortex_a9_store1_2")
+
+(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2,
+cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
+
+;; Calls and branches.
+
+;; Branch instructions
+
+(define_insn_reservation "cortex_a9_branch" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "branch"))
+  "cortex_a9_branch")
+
+;; Call latencies are essentially 0 but make sure
+;; dual issue doesn't happen i.e the next instruction
+;; starts at the next cycle.
+(define_insn_reservation "cortex_a9_call"  0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "call"))
+  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")
+
+
+;; Pipelining for VFP instructions.
+;; Issue happens either along load store unit or the VFP / Neon unit.
+;; Pipeline   Instruction Classification.
+;; FPS - fmov, ffariths, ffarithd,f_mcr,f_mcrr,f_mrc,f_mrrc
+;; FP_ADD   - fadds, faddd, fcmps (1)
+;; FPMUL   - fmul{s,d}, fmac{s,d}, ffma{s,d}
+;; FPDIV - fdiv{s,d}
+(define_cpu_unit "ca9fps" "cortex_a9")
+(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")
+(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")
+(define_cpu_unit "ca9fp_ds1" "cortex_a9")
+
+
+;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
+(define_insn_reservation "cortex_a9_fps" 2
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fmov, fconsts, fconstd, ffariths, ffarithd,\
+                       f_mcr, f_mcrr, f_mrc, f_mrrc, f_flag"))
+ "ca9_issue_vfp_neon + ca9fps")
+
+(define_bypass 1
+  "cortex_a9_fps"
+  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")
+
+;; Scheduling on the FP_ADD pipeline.
+(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
+
+(define_insn_reservation "cortex_a9_fadd" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fadds, faddd, f_cvt, f_cvtf2i, f_cvti2f"))
+  "ca9fp_add")
+
+(define_insn_reservation "cortex_a9_fcmp" 1
+  (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fcmps, fcmpd"))
+ "ca9_issue_vfp_neon + ca9fp_add1")
+
+;; Scheduling for the Multiply and MAC instructions.
+(define_reservation "ca9fmuls"
+  "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
+
+(define_reservation "ca9fmuld"
+  "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
+
+(define_insn_reservation "cortex_a9_fmuls" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmuls"))
+  "ca9fmuls")
+
+(define_insn_reservation "cortex_a9_fmuld" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmuld"))
+  "ca9fmuld")
+
+(define_insn_reservation "cortex_a9_fmacs" 8
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmacs,ffmas"))
+  "ca9fmuls, ca9fp_add")
+
+(define_insn_reservation "cortex_a9_fmacd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmacd,ffmad"))
+  "ca9fmuld, ca9fp_add")
+
+;; Division pipeline description.
+(define_insn_reservation "cortex_a9_fdivs" 15
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
+
+(define_insn_reservation "cortex_a9_fdivd" 25
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
+
+;; Include Neon pipeline description
+(include "cortex-a9-neon.md")
diff --git a/gcc-4.9/gcc/config/arm/cortex-m4-fpu.md b/gcc-4.9/gcc/config/arm/cortex-m4-fpu.md
new file mode 100644
index 000000000..aa81e52ef
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-m4-fpu.md
@@ -0,0 +1,117 @@
+;; ARM Cortex-M4 FPU pipeline description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Use two artificial units to model FPU.
+(define_cpu_unit "cortex_m4_v_a" "cortex_m4")
+(define_cpu_unit "cortex_m4_v_b" "cortex_m4")
+
+(define_reservation "cortex_m4_v" "cortex_m4_v_a+cortex_m4_v_b")
+(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v")
+(define_reservation "cortex_m4_exa_va" "cortex_m4_a+cortex_m4_v_a")
+(define_reservation "cortex_m4_exb_vb" "cortex_m4_b+cortex_m4_v_b")
+
+;; Integer instructions following VDIV or VSQRT complete out-of-order.
+(define_insn_reservation "cortex_m4_fdivs" 15
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "cortex_m4_ex_v,cortex_m4_v*13")
+
+(define_insn_reservation "cortex_m4_vmov_1" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fmov,fconsts"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_vmov_2" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_mrc,f_mrrc,f_mcr,f_mcrr"))
+  "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_fmuls" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fmuls"))
+  "cortex_m4_ex_v")
+
+;; Integer instructions following multiply-accumulate instructions
+;; complete out-of-order.
+(define_insn_reservation "cortex_m4_fmacs" 4
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fmacs,ffmas"))
+  "cortex_m4_ex_v,cortex_m4_v*2")
+
+(define_insn_reservation "cortex_m4_ffariths" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "ffariths"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fadds" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fadds"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fcmps" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fcmps"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_flag" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_flag"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_cvt" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_load" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_loads"))
+  "cortex_m4_exa_va,cortex_m4_exb_vb")
+
+(define_insn_reservation "cortex_m4_f_store" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_stores"))
+  "cortex_m4_exa_va")
+
+(define_insn_reservation "cortex_m4_f_loadd" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_loadd"))
+  "cortex_m4_ex_v*3")
+
+(define_insn_reservation "cortex_m4_f_stored" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_stored"))
+  "cortex_m4_ex_v*3")
+
+;; MAC instructions consume their addend one cycle later. If the result
+;; of an arithmetic instruction is consumed as the addend of the following
+;; MAC instruction, the latency can be decreased by one.
+
+(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt"
+		 "cortex_m4_fmacs"
+		 "arm_no_early_mul_dep")
+
+(define_bypass 3 "cortex_m4_fmacs"
+		 "cortex_m4_fmacs"
+		 "arm_no_early_mul_dep")
+
+(define_bypass 14 "cortex_m4_fdivs"
+		  "cortex_m4_fmacs"
+		  "arm_no_early_mul_dep")
diff --git a/gcc-4.9/gcc/config/arm/cortex-m4.md b/gcc-4.9/gcc/config/arm/cortex-m4.md
new file mode 100644
index 000000000..690ce751f
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-m4.md
@@ -0,0 +1,128 @@
+;; ARM Cortex-M4 pipeline description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_m4")
+
+;; We model the pipelining of LDR instructions by using two artificial units.
+
+(define_cpu_unit "cortex_m4_a" "cortex_m4")
+
+(define_cpu_unit "cortex_m4_b" "cortex_m4")
+
+(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b")
+
+;; ALU and multiply is one cycle.
+(define_insn_reservation "cortex_m4_alu" 1
+  (and (eq_attr "tune" "cortexm4")
+       (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                             alu_reg,alus_reg,logic_reg,logics_reg,\
+                             adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                             adr,bfm,rev,\
+                             shift_imm,shift_reg,extend,\
+                             alu_shift_imm,alus_shift_imm,\
+                             logic_shift_imm,logics_shift_imm,\
+                             alu_shift_reg,alus_shift_reg,\
+                             logic_shift_reg,logics_shift_reg,\
+                             mov_imm,mov_reg,mov_shift,mov_shift_reg,\
+                             mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\
+                             mrs,multiple,no_insn")
+	    (ior (eq_attr "mul32" "yes")
+		 (eq_attr "mul64" "yes"))))
+  "cortex_m4_ex")
+
+;; Byte, half-word and word load is two cycles.
+(define_insn_reservation "cortex_m4_load1" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load_byte,load1"))
+  "cortex_m4_a, cortex_m4_b")
+
+;; str rx, [ry, #imm] is always one cycle.
+(define_insn_reservation "cortex_m4_store1_1" 1
+  (and (and (eq_attr "tune" "cortexm4")
+	    (eq_attr "type" "store1"))
+       (match_test "arm_address_offset_is_imm (insn)"))
+  "cortex_m4_a")
+
+;; Other byte, half-word and word load is two cycles.
+(define_insn_reservation "cortex_m4_store1_2" 2
+  (and (and (eq_attr "tune" "cortexm4")
+	    (eq_attr "type" "store1"))
+       (not (match_test "arm_address_offset_is_imm (insn)")))
+  "cortex_m4_a*2")
+
+(define_insn_reservation "cortex_m4_load2" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load2"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_store2" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "store2"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_load3" 4
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load3"))
+  "cortex_m4_ex*4")
+
+(define_insn_reservation "cortex_m4_store3" 4
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "store3"))
+  "cortex_m4_ex*4")
+
+(define_insn_reservation "cortex_m4_load4" 5
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load4"))
+  "cortex_m4_ex*5")
+
+(define_insn_reservation "cortex_m4_store4" 5
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "store4"))
+  "cortex_m4_ex*5")
+
+(define_bypass 1 "cortex_m4_load1"
+                 "cortex_m4_store1_1,cortex_m4_store1_2"
+                 "arm_no_early_store_addr_dep")
+
+;; If the address of load or store depends on the result of the preceding
+;; instruction, the latency is increased by one.
+
+(define_bypass 2 "cortex_m4_alu"
+		 "cortex_m4_load1"
+		 "arm_early_load_addr_dep")
+
+(define_bypass 2 "cortex_m4_alu"
+		 "cortex_m4_store1_1,cortex_m4_store1_2"
+		 "arm_early_store_addr_dep")
+
+(define_insn_reservation "cortex_m4_branch" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "branch"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_call" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "call"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_block" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "block"))
+  "cortex_m4_ex")
diff --git a/gcc-4.9/gcc/config/arm/cortex-r4.md b/gcc-4.9/gcc/config/arm/cortex-r4.md
new file mode 100644
index 000000000..f000124cb
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-r4.md
@@ -0,0 +1,299 @@
+;; ARM Cortex-R4 scheduling description.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_r4")
+
+;; We approximate the dual-issue constraints of this core using four
+;; "issue units" and a reservation matrix as follows.  The numbers indicate
+;; the instruction groups' preferences in order.  Multiple entries for
+;; the same numbered preference indicate units that must be reserved
+;; together.
+;;
+;; Issue unit:		A	B	C	ALU
+;;
+;; ALU w/o reg shift	1st	2nd		1st and 2nd
+;; ALU w/ reg shift	1st	2nd	2nd	1st and 2nd
+;; Moves		1st	2nd		2nd
+;; Multiplication	1st			1st
+;; Division		1st			1st
+;; Load/store single	1st		1st
+;; Other load/store	1st	1st
+;; Branches			1st
+
+(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
+
+(define_reservation "cortex_r4_alu"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+                     (cortex_r4_issue_b+cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_alu_shift_reg"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+                     (cortex_r4_issue_b+cortex_r4_issue_c+\
+                      cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mov"
+                    "cortex_r4_issue_a|(cortex_r4_issue_b+\
+                     cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_mul_2"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
+;; Division instructions execute out-of-order with respect to the
+;; rest of the pipeline and only require reservations on their first and
+;; final cycles.
+(define_reservation "cortex_r4_div_9"
+                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
+                     nothing*7,\
+                     cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_div_10"
+                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
+                     nothing*8,\
+                     cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_load_store"
+                    "cortex_r4_issue_a+cortex_r4_issue_c")
+(define_reservation "cortex_r4_load_store_2"
+                    "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
+(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
+
+;; We assume that all instructions are unconditional.
+
+;; Data processing instructions.  Moves without shifts are kept separate
+;; for the purposes of the dual-issue constraints above.
+(define_insn_reservation "cortex_r4_alu" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,mvn_imm,mvn_reg"))
+  "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_mov" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "mov_imm,mov_reg"))
+  "cortex_r4_mov")
+
+(define_insn_reservation "cortex_r4_alu_shift" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        extend,mov_shift,mvn_shift"))
+  "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_alu_shift_reg" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg,\
+                       mrs,multiple,no_insn"))
+  "cortex_r4_alu_shift_reg")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; In terms of availabilities, a consumer mov could theoretically be
+;; issued together with a producer ALU instruction, without stalls.
+;; In practice this cannot happen because mov;add (in that order) is not
+;; eligible for dual issue and furthermore dual issue is not permitted
+;; when a dependency is involved.  We therefore note it as latency one.
+;; A mov followed by another of the same is also latency one.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_mov")
+
+;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
+;; media data processing instructions nor sad instructions.
+
+;; Multiplication instructions.
+
+(define_insn_reservation "cortex_r4_mul_4" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "mul,smmul"))
+  "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mul_3" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "smulxy,smulwy,smuad,smusd"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mla_4" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "mla,smmla"))
+  "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mla_3" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_smlald" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "smlald,smlsld"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mull" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "smull,umull,umlal,umaal"))
+  "cortex_r4_mul_2")
+
+;; A multiply or an MLA with a single-register result, followed by an
+;; MLA with an accumulator dependency, has its result forwarded.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
+               "cortex_r4_mla_3,cortex_r4_mla_4"
+               "arm_mac_accumulator_is_mul_result")
+
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
+               "cortex_r4_mla_3,cortex_r4_mla_4"
+               "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at ALU has lower latency than one needing it at Shift.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; A multiply followed by a mov has one cycle lower latency again.
+(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_mov")
+(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_mov")
+
+;; We guess that division of A/B using sdiv or udiv, on average, 
+;; is performed with B having ten more leading zeros than A.
+;; This gives a latency of nine for udiv and ten for sdiv.
+(define_insn_reservation "cortex_r4_udiv" 9
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "udiv"))
+  "cortex_r4_div_9")
+
+(define_insn_reservation "cortex_r4_sdiv" 10
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "sdiv"))
+  "cortex_r4_div_10")
+
+;; Branches.  We assume correct prediction.
+
+(define_insn_reservation "cortex_r4_branch" 0
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "branch"))
+  "cortex_r4_branch")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_r4_call" 32
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "call"))
+  "nothing")
+
+;; Status register access instructions are not currently emitted.
+
+;; Load instructions.
+;; We do not model the "addr_md_3cycle" cases and assume that
+;; accesses following are correctly aligned.
+
+(define_insn_reservation "cortex_r4_load_1_2" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "load1,load2"))
+  "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_load_3_4" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "load3,load4"))
+  "cortex_r4_load_store_2")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Normal Reg, there is one fewer cycle of latency.
+
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Late Reg, there are two fewer cycles of latency.  Such consumer
+;; instructions are moves and stores.
+
+(define_bypass 1 "cortex_r4_load_1_2"
+               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+(define_bypass 2 "cortex_r4_load_3_4"
+               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+
+;; If a producer's result is required as the base or offset of a load,
+;; there is an extra cycle latency.
+
+(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
+                  cortex_r4_alu_shift_reg"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+;; Store instructions.
+
+(define_insn_reservation "cortex_r4_store_1_2" 0
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "store1,store2"))
+  "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_store_3_4" 0
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "store3,store4"))
+  "cortex_r4_load_store_2")
+
diff --git a/gcc-4.9/gcc/config/arm/cortex-r4f.md b/gcc-4.9/gcc/config/arm/cortex-r4f.md
new file mode 100644
index 000000000..25d949789
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/cortex-r4f.md
@@ -0,0 +1,161 @@
+;; ARM Cortex-R4F VFP pipeline description
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; With the exception of simple VMOV <freg>, <freg> instructions and
+;; the accululate operand of a multiply-accumulate instruction, all
+;; registers are early registers.  Thus base latencies are 1 more than
+;; those listed in the TRM.
+
+;; We use the A, B abd C units from the integer core, plus two additional
+;; units to enforce VFP dual issue constraints.
+
+;;		  A B C	    V1	VMLA
+;; fcpy		  1 2
+;; farith	  1 2	    1
+;; fmrc		  1 2
+;; fconst	  1 2 *	    *
+;; ffarith	  1 2 *	    *
+;; fmac		  1 2	    1	2
+;; fdiv		  1 2	    *
+;; f_loads	  *   *	    *
+;; f_stores	  *   *	    	*
+
+(define_cpu_unit "cortex_r4_v1" "cortex_r4")
+
+(define_cpu_unit "cortex_r4_vmla" "cortex_r4")
+
+(define_reservation "cortex_r4_issue_ab"
+		    "(cortex_r4_issue_a|cortex_r4_issue_b)")
+(define_reservation "cortex_r4_single_issue"
+		    "cortex_r4_issue_a+cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fcpys" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmov"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_ffariths" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffariths,fconsts,fcmps"))
+ "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fariths" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fadds,fmuls"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fmacs" 6
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacs,ffmas"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)")
+
+(define_insn_reservation "cortex_r4_fdivs" 17
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivs, fsqrts"))
+ "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_floads" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loads"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fstores" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla")
+
+(define_insn_reservation "cortex_r4_mcr" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_mcr,f_mcrr"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_mrc" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_mrc,f_mrrc"))
+ "cortex_r4_issue_ab")
+
+;; Bypasses for normal (not early) regs.
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fcpys")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fcpys")
+(define_bypass 5 "cortex_r4_fmacs"
+		 "cortex_r4_fcpys")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fcpys")
+
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+;; mac->mac has an extra forwarding path.
+(define_bypass 3 "cortex_r4_fmacs"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fmacs"
+		  "arm_no_early_mul_dep")
+
+;; Double precision operations.  These can not dual issue.
+
+(define_insn_reservation "cortex_r4_fmacd" 20
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacd,ffmad"))
+ "cortex_r4_single_issue*13")
+
+(define_insn_reservation "cortex_r4_farith" 10
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "faddd,fmuld"))
+ "cortex_r4_single_issue*3")
+
+;; FIXME: The short cycle count suggests these instructions complete
+;; out of order.  Chances are this is not a pipelined operation.
+(define_insn_reservation "cortex_r4_fdivd" 97
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivd, fsqrtd"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_ffarithd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffarithd,fconstd"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_fcmpd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcmpd"))
+ "cortex_r4_single_issue*2")
+
+(define_insn_reservation "cortex_r4_f_cvt" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_f_memd" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loadd,f_stored"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_f_flag" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_single_issue")
+
diff --git a/gcc-4.9/gcc/config/arm/crypto.def b/gcc-4.9/gcc/config/arm/crypto.def
new file mode 100644
index 000000000..dc805d9ec
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/crypto.def
@@ -0,0 +1,34 @@
+/* Cryptographic instruction builtin definitions.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CRYPTO2 (aesd, AESD, v16uqi, v16uqi, v16uqi)
+CRYPTO2 (aese, AESE, v16uqi, v16uqi, v16uqi)
+CRYPTO1 (aesimc, AESIMC, v16uqi, v16uqi)
+CRYPTO1 (aesmc, AESMC, v16uqi, v16uqi)
+CRYPTO1 (sha1h, SHA1H, v4usi, v4usi)
+CRYPTO2 (sha1su1, SHA1SU1, v4usi, v4usi, v4usi)
+CRYPTO2 (sha256su0, SHA256SU0, v4usi, v4usi, v4usi)
+CRYPTO3 (sha1c, SHA1C, v4usi, v4usi, v4usi, v4usi)
+CRYPTO3 (sha1m, SHA1M, v4usi, v4usi, v4usi, v4usi)
+CRYPTO3 (sha1p, SHA1P, v4usi, v4usi, v4usi, v4usi)
+CRYPTO3 (sha1su0, SHA1SU0, v4usi, v4usi, v4usi, v4usi)
+CRYPTO3 (sha256h, SHA256H, v4usi, v4usi, v4usi, v4usi)
+CRYPTO3 (sha256h2, SHA256H2, v4usi, v4usi, v4usi, v4usi)
+CRYPTO3 (sha256su1, SHA256SU1, v4usi, v4usi, v4usi, v4usi)
+CRYPTO2 (vmullp64, VMULLP64, uti, udi, udi)
diff --git a/gcc-4.9/gcc/config/arm/crypto.md b/gcc-4.9/gcc/config/arm/crypto.md
new file mode 100644
index 000000000..9f249803d
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/crypto.md
@@ -0,0 +1,86 @@
+;; ARMv8-A crypto patterns.
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "crypto_<crypto_pattern>"
+  [(set (match_operand:<crypto_mode> 0 "register_operand" "=w")
+        (unspec:<crypto_mode> [(match_operand:<crypto_mode> 1
+                       "register_operand" "w")]
+         CRYPTO_UNARY))]
+  "TARGET_CRYPTO"
+  "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q1"
+  [(set_attr "type" "<crypto_type>")]
+)
+
+(define_insn "crypto_<crypto_pattern>"
+  [(set (match_operand:<crypto_mode> 0 "register_operand" "=w")
+        (unspec:<crypto_mode> [(match_operand:<crypto_mode> 1 "register_operand" "0")
+                      (match_operand:<crypto_mode> 2 "register_operand" "w")]
+         CRYPTO_BINARY))]
+  "TARGET_CRYPTO"
+  "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q2"
+  [(set_attr "type" "<crypto_type>")]
+)
+
+(define_insn "crypto_<crypto_pattern>"
+  [(set (match_operand:<crypto_mode> 0 "register_operand" "=w")
+        (unspec:<crypto_mode> [(match_operand:<crypto_mode> 1 "register_operand" "0")
+                      (match_operand:<crypto_mode> 2 "register_operand" "w")
+                      (match_operand:<crypto_mode> 3 "register_operand" "w")]
+         CRYPTO_TERNARY))]
+  "TARGET_CRYPTO"
+  "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q2, %q3"
+  [(set_attr "type" "<crypto_type>")]
+)
+
+(define_insn "crypto_sha1h"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (zero_extend:V4SI
+          (unspec:SI [(vec_select:SI
+                        (match_operand:V4SI 1 "register_operand" "w")
+                        (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
+           UNSPEC_SHA1H)))]
+  "TARGET_CRYPTO"
+  "sha1h.32\\t%q0, %q1"
+  [(set_attr "type" "crypto_sha1_fast")]
+)
+
+(define_insn "crypto_vmullp64"
+  [(set (match_operand:TI 0 "register_operand" "=w")
+        (unspec:TI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:DI 2 "register_operand" "w")]
+         UNSPEC_VMULLP64))]
+  "TARGET_CRYPTO"
+  "vmull.p64\\t%q0, %P1, %P2"
+  [(set_attr "type" "neon_mul_d_long")]
+)
+
+(define_insn "crypto_<crypto_pattern>"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+        (unspec:<crypto_mode>
+                     [(match_operand:<crypto_mode> 1 "register_operand" "0")
+                      (vec_select:SI
+                        (match_operand:<crypto_mode> 2 "register_operand" "w")
+                        (parallel [(match_operand:SI 4 "immediate_operand" "i")]))
+                      (match_operand:<crypto_mode> 3 "register_operand" "w")]
+         CRYPTO_SELECTING))]
+  "TARGET_CRYPTO"
+  "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q2, %q3"
+  [(set_attr "type" "<crypto_type>")]
+)
diff --git a/gcc-4.9/gcc/config/arm/driver-arm.c b/gcc-4.9/gcc/config/arm/driver-arm.c
new file mode 100644
index 000000000..6d9c4174c
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/driver-arm.c
@@ -0,0 +1,151 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "configargs.h"
+
+struct vendor_cpu {
+  const char *part_no;
+  const char *arch_name;
+  const char *cpu_name;
+};
+
+static struct vendor_cpu arm_cpu_table[] = {
+    {"0x926", "armv5te", "arm926ej-s"},
+    {"0xa26", "armv5te", "arm1026ej-s"},
+    {"0xb02", "armv6k", "mpcore"},
+    {"0xb36", "armv6j", "arm1136j-s"},
+    {"0xb56", "armv6t2", "arm1156t2-s"},
+    {"0xb76", "armv6zk", "arm1176jz-s"},
+    {"0xc05", "armv7-a", "cortex-a5"},
+    {"0xc07", "armv7ve", "cortex-a7"},
+    {"0xc08", "armv7-a", "cortex-a8"},
+    {"0xc09", "armv7-a", "cortex-a9"},
+    {"0xc0d", "armv7ve", "cortex-a12"},
+    {"0xc0f", "armv7ve", "cortex-a15"},
+    {"0xc14", "armv7-r", "cortex-r4"},
+    {"0xc15", "armv7-r", "cortex-r5"},
+    {"0xc20", "armv6-m", "cortex-m0"},
+    {"0xc21", "armv6-m", "cortex-m1"},
+    {"0xc23", "armv7-m", "cortex-m3"},
+    {"0xc24", "armv7e-m", "cortex-m4"},
+    {NULL, NULL, NULL}
+};
+
+static struct {
+  const char *vendor_no;
+  const struct vendor_cpu *vendor_parts;
+} vendors[] = {
+    {"0x41", arm_cpu_table},
+    {NULL, NULL}
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch", "cpu" or "tune" as argument depending on if
+   -march=native, -mcpu=native or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-march=armv7-a" on a Cortex-A8 for
+   -march=native.  If the routine can't detect a known processor,
+   the -march or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *val = NULL;
+  char buf[128];
+  FILE *f = NULL;
+  bool arch;
+  const struct vendor_cpu *cpu_table = NULL;
+
+  if (argc < 1)
+    goto not_found;
+
+  arch = strcmp (argv[0], "arch") == 0;
+  if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune"))
+    goto not_found;
+
+  f = fopen ("/proc/cpuinfo", "r");
+  if (f == NULL)
+    goto not_found;
+
+  while (fgets (buf, sizeof (buf), f) != NULL)
+    {
+      /* Ensure that CPU implementer is ARM (0x41).  */
+      if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0)
+	{
+	  int i;
+	  for (i = 0; vendors[i].vendor_no != NULL; i++)
+	    if (strstr (buf, vendors[i].vendor_no) != NULL)
+	      {
+		cpu_table = vendors[i].vendor_parts;
+		break;
+	      }
+	}
+
+      /* Detect arch/cpu.  */
+      if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0)
+	{
+	  int i;
+
+	  if (cpu_table == NULL)
+	    goto not_found;
+
+	  for (i = 0; cpu_table[i].part_no != NULL; i++)
+	    if (strstr (buf, cpu_table[i].part_no) != NULL)
+	      {
+		val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name;
+		break;
+	      }
+	  break;
+	}
+    }
+
+  fclose (f);
+
+  if (val == NULL)
+    goto not_found;
+
+  return concat ("-m", argv[0], "=", val, NULL);
+
+not_found:
+  {
+    unsigned int i;
+    unsigned int opt;
+    const char *search[] = {NULL, "arch"};
+
+    if (f)
+      fclose (f);
+
+    search[0] = argv[0];
+    for (opt = 0; opt < ARRAY_SIZE (search); opt++)
+      for (i = 0; i < ARRAY_SIZE (configure_default_options); i++)
+	if (strcmp (configure_default_options[i].name, search[opt]) == 0)
+	  return concat ("-m", search[opt], "=",
+			 configure_default_options[i].value, NULL);
+    return NULL;
+  }
+}
diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h
new file mode 100644
index 000000000..2ac8c8d04
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/elf.h
@@ -0,0 +1,159 @@
+/* Definitions of target machine for GNU compiler.
+   For ARM with ELF obj format.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Philip Blundell <philb@gnu.org> and
+   Catherine Moore <clm@cygnus.com>
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef OBJECT_FORMAT_ELF
+ #error elf.h included before elfos.h
+#endif
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "-D__ELF__"
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "subtarget_extra_asm_spec",	SUBTARGET_EXTRA_ASM_SPEC }, \
+  { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \
+  SUBSUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC ""
+#endif
+
+#ifndef SUBTARGET_ASM_FLOAT_SPEC
+#define SUBTARGET_ASM_FLOAT_SPEC "\
+%{mapcs-float:-mfloat}"
+#endif
+
+#undef SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS
+
+#ifndef ASM_SPEC
+#define ASM_SPEC "\
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%(asm_cpu_spec) \
+%{mapcs-*:-mapcs-%*} \
+%(subtarget_asm_float_spec) \
+%{mthumb-interwork:-mthumb-interwork} \
+%{mfloat-abi=*} %{mfpu=*} \
+%(subtarget_extra_asm_spec)"
+#endif
+
+/* The ARM uses @ are a comment character so we need to redefine
+   TYPE_OPERAND_FMT.  */
+#undef  TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"%%%s"
+
+/* We might need a ARM specific header to function declarations.  */
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ARM_DECLARE_FUNCTION_NAME (FILE, NAME, DECL);		\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_LABEL(FILE, NAME);				\
+      ARM_OUTPUT_FN_UNWIND (FILE, TRUE);			\
+    }								\
+  while (0)
+
+/* We might need an ARM specific trailer for function declarations.  */
+#undef  ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do								\
+    {								\
+      ARM_OUTPUT_FN_UNWIND (FILE, FALSE);			\
+      if (!flag_inhibit_size_directive)				\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+    }								\
+  while (0)
+
+/* Define this macro if jump tables (for `tablejump' insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.  */
+/* We put ARM and Thumb-2 jump tables in the text section, because it makes
+   the code more efficient, but for Thumb-1 it's better to put them out of
+   band unless we are generating compressed tables.  */
+#define JUMP_TABLES_IN_TEXT_SECTION					\
+   (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic)))
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X"
+#endif
+  
+/* Run-time Target Specification.  */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+#endif
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+  { "marm", "mlittle-endian", "mfloat-abi=soft", "mno-thumb-interwork", "fno-leading-underscore" }
+#endif
+
+#define TARGET_ASM_FILE_START_APP_OFF true
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+
+/* Output an element in the static constructor array.  */
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR arm_elf_asm_constructor
+
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR arm_elf_asm_destructor
+
+/* For PIC code we need to explicitly specify (PLT) and (GOT) relocs.  */
+#define NEED_PLT_RELOC	flag_pic
+#define NEED_GOT_RELOC	flag_pic
+
+/* The ELF assembler handles GOT addressing differently to NetBSD.  */
+#define GOT_PCREL	0
+
+/* Align output to a power of two.  Note ".align 0" is redundant,
+   and also GAS will treat it as ".align 2" which we do not want.  */
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)			\
+  do							\
+    {							\
+      if ((POWER) > 0)					\
+	fprintf (STREAM, "\t.align\t%d\n", POWER);	\
+    }							\
+  while (0)
+
+/* Horrible hack: We want to prevent some libgcc routines being included
+   for some multilibs.  */
+#ifndef __ARM_ARCH_6M__
+#undef L_fixdfsi
+#undef L_fixunsdfsi
+#undef L_truncdfsf2
+#undef L_fixsfsi
+#undef L_fixunssfsi
+#undef L_floatdidf
+#undef L_floatdisf
+#undef L_floatundidf
+#undef L_floatundisf
+#endif
+
diff --git a/gcc-4.9/gcc/config/arm/fa526.md b/gcc-4.9/gcc/config/arm/fa526.md
new file mode 100644
index 000000000..c345fdf65
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/fa526.md
@@ -0,0 +1,173 @@
+;; Faraday FA526 Pipeline Description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+;;
+;; Modeled pipeline characteristics:
+;; LD -> any use: latency = 3 (2 cycle penalty).
+;; ALU -> any use: latency = 2 (1 cycle penalty).
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA526 core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa526")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;      S      E      M      W
+
+(define_cpu_unit "fa526_core" "fa526")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "526_alu_op" 1
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       mrs,multiple,no_insn"))
+ "fa526_core")
+
+(define_insn_reservation "526_alu_shift_op" 2
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "extend,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift,mov_shift_reg,\
+                       mvn_shift,mvn_shift_reg"))
+ "fa526_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "526_mult1" 2
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy"))
+ "fa526_core")
+
+(define_insn_reservation "526_mult2" 5
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
+                       umlals,smulls,smlals,smlawx"))
+ "fa526_core*4")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "526_load1_op" 3
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load1,load_byte"))
+ "fa526_core")
+
+(define_insn_reservation "526_load2_op" 4
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load2"))
+ "fa526_core*2")
+
+(define_insn_reservation "526_load3_op" 5
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load3"))
+ "fa526_core*3")
+
+(define_insn_reservation "526_load4_op" 6
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load4"))
+ "fa526_core*4")
+
+(define_insn_reservation "526_store1_op" 0
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store1"))
+ "fa526_core")
+
+(define_insn_reservation "526_store2_op" 1
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store2"))
+ "fa526_core*2")
+
+(define_insn_reservation "526_store3_op" 2
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store3"))
+ "fa526_core*3")
+
+(define_insn_reservation "526_store4_op" 3
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store4"))
+ "fa526_core*4")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA526
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "526_branch_op" 0
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "branch"))
+ "fa526_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value.  For most cases, the return value is set
+;; by a mov instruction, which has 1 cycle latency.
+(define_insn_reservation "526_call_op" 1
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "call"))
+ "fa526_core")
+
diff --git a/gcc-4.9/gcc/config/arm/fa606te.md b/gcc-4.9/gcc/config/arm/fa606te.md
new file mode 100644
index 000000000..01ecfc88c
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/fa606te.md
@@ -0,0 +1,182 @@
+;; Faraday FA606TE Pipeline Description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA606TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; Modeled pipeline characteristics:
+;; LD -> any use: latency = 2 (1 cycle penalty).
+;; ALU -> any use: latency = 1 (0 cycle penalty).
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA606TE core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa606te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;      E      M      W
+
+(define_cpu_unit "fa606te_core" "fa606te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "606te_alu_op" 1
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,extend,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_imm,mov_reg,mov_shift,mov_shift_reg,\
+                       mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\
+                       mrs,multiple,no_insn"))
+ "fa606te_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "606te_mult1" 2
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "smlalxy"))
+ "fa606te_core")
+
+(define_insn_reservation "606te_mult2" 3
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy"))
+ "fa606te_core*2")
+
+(define_insn_reservation "606te_mult3" 4
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "mul,mla,muls,mlas"))
+ "fa606te_core*3")
+
+(define_insn_reservation "606te_mult4" 5
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
+ "fa606te_core*4")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "606te_load1_op" 2
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load1,load_byte"))
+ "fa606te_core")
+
+(define_insn_reservation "606te_load2_op" 3
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load2"))
+ "fa606te_core*2")
+
+(define_insn_reservation "606te_load3_op" 4
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load3"))
+ "fa606te_core*3")
+
+(define_insn_reservation "606te_load4_op" 5
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load4"))
+ "fa606te_core*4")
+
+(define_insn_reservation "606te_store1_op" 0
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store1"))
+ "fa606te_core")
+
+(define_insn_reservation "606te_store2_op" 1
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store2"))
+ "fa606te_core*2")
+
+(define_insn_reservation "606te_store3_op" 2
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store3"))
+ "fa606te_core*3")
+
+(define_insn_reservation "606te_store4_op" 3
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store4"))
+ "fa606te_core*4")
+
+
+;;(define_insn_reservation "606te_ldm_op" 9
+;; (and (eq_attr "tune" "fa606te")
+;;      (eq_attr "type" "load2,load3,load4,store2,store3,store4"))
+;; "fa606te_core*7")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA606TE
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "606te_branch_op" 0
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "branch"))
+ "fa606te_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value.  For most cases, the return value is set
+;; by a mov instruction, which has 1 cycle latency.
+(define_insn_reservation "606te_call_op" 1
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "call"))
+ "fa606te_core")
+
diff --git a/gcc-4.9/gcc/config/arm/fa626te.md b/gcc-4.9/gcc/config/arm/fa626te.md
new file mode 100644
index 000000000..e615bae37
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/fa626te.md
@@ -0,0 +1,177 @@
+;; Faraday FA626TE Pipeline Description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA626TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; Modeled pipeline characteristics:
+;; ALU -> simple address LDR/STR: latency = 2 (available after 2 cycles).
+;; ALU -> shifted address LDR/STR: latency = 3.
+;;		( extra 1 cycle unavoidable stall).
+;; ALU -> other use: latency = 2 (available after 2 cycles).
+;; LD  -> simple address LDR/STR: latency = 3 (available after 3 cycles).
+;; LD  -> shifted address LDR/STR: latency = 4
+;;		( extra 1 cycle unavoidable stall).
+;; LD  -> any other use: latency = 3 (available after 3 cycles).
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA626TE core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa626te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;      S      E      M      W
+
+(define_cpu_unit "fa626te_core" "fa626te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "626te_alu_op" 1
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       mrs,multiple,no_insn"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_alu_shift_op" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "extend,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift,mov_shift_reg,\
+                       mvn_shift,mvn_shift_reg"))
+ "fa626te_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "626te_mult1" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_mult2" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "mul,mla"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_mult3" 3
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+ "fa626te_core*2")
+
+(define_insn_reservation "626te_mult4" 4
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "smulls,smlals,umulls,umlals"))
+ "fa626te_core*3")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "626te_load1_op" 3
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "load1,load_byte"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_load2_op" 4
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "load2,load3"))
+ "fa626te_core*2")
+
+(define_insn_reservation "626te_load3_op" 5
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "load4"))
+ "fa626te_core*3")
+
+(define_insn_reservation "626te_store1_op" 0
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "store1"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_store2_op" 1
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "store2,store3"))
+ "fa626te_core*2")
+
+(define_insn_reservation "626te_store3_op" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "store4"))
+ "fa626te_core*3")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA626TE
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "626te_branch_op" 0
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "branch"))
+ "fa626te_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value. 
+(define_insn_reservation "626te_call_op" 1
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "call"))
+ "fa626te_core")
+
diff --git a/gcc-4.9/gcc/config/arm/fa726te.md b/gcc-4.9/gcc/config/arm/fa726te.md
new file mode 100644
index 000000000..225b2cfdd
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/fa726te.md
@@ -0,0 +1,223 @@
+;; Faraday FA726TE Pipeline Description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA726TE core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa726te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;	E1	E2	E3	E4	E5	WB
+;;______________________________________________________
+;;
+;;      <-------------- LD/ST ----------->
+;;    shifter + LU      <-- AU -->
+;;      <-- AU -->     shifter + LU    CPSR     (Pipe 0)
+;;______________________________________________________
+;;
+;;      <---------- MUL --------->
+;;    shifter + LU      <-- AU -->
+;;      <-- AU -->     shifter + LU    CPSR     (Pipe 1)
+
+
+(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")
+(define_cpu_unit "fa726te_mac_pipe" "fa726te")
+(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")
+
+;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly
+;; improve code quality.
+(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")
+(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")
+
+(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)")
+;; Reservation to restrict issue to 1.
+(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; Move instructions.
+(define_insn_reservation "726te_shift_op" 1
+  (and (eq_attr "tune" "fa726te")
+       (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\
+                        mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg"))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+
+;; ALU operations with no shifted operand will finished in 1 cycle
+;; Other ALU instructions 2 cycles.
+(define_insn_reservation "726te_alu_op" 1
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mrs,multiple,no_insn"))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+
+;; ALU operations with a shift-by-register operand.
+;; These really stall in the decoder, in order to read the shift value
+;; in the first cycle.  If the instruction uses both shifter and AU,
+;; it takes 3 cycles.
+(define_insn_reservation "726te_alu_shift_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "extend,alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm"))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+
+(define_insn_reservation "726te_alu_shift_reg_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg"))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times.  Multiply operations occur in both the execute and memory
+;; stages of the pipeline
+
+(define_insn_reservation "726te_mult_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
+                       umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
+ "fa726te_issue+fa726te_mac_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; Loads with a shifted offset take 3 cycles, and are (a) probably the
+;; most common and (b) the pessimistic assumption will lead to fewer stalls.
+
+;; Scalar loads are pipelined in FA726TE LSU pipe.
+;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.
+;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the
+;; same "bundle", and the 2nd load will introudce another ISSUE stall but is
+;; still ok to execute (and may be benefical sometimes).
+
+(define_insn_reservation "726te_load1_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "load1,load_byte"))
+ "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
+  | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
+
+(define_insn_reservation "726te_store1_op" 1
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "store1"))
+ "fa726te_blockage*2")
+
+;; Load/Store Multiple blocks all pipelines in EX stages until WB.
+;; No other instructions can be issued together.  Since they essentially
+;; prevent all scheduling opportunities, we model them together here.
+
+;; The LDM is breaking into multiple load instructions, later instruction in
+;; the pipe 1 is stalled.
+(define_insn_reservation "726te_ldm2_op" 4
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "load2,load3"))
+ "fa726te_blockage*4")
+
+(define_insn_reservation "726te_ldm3_op" 5
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "load4"))
+ "fa726te_blockage*5")
+
+(define_insn_reservation "726te_stm2_op" 2
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "store2,store3"))
+ "fa726te_blockage*3")
+
+(define_insn_reservation "726te_stm3_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "store4"))
+ "fa726te_blockage*4")
+
+(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
+                  726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")
+(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\
+                 726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"
+                 "arm_no_early_store_addr_dep")
+(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")
+(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"
+                 "726te_shift_op,726te_alu_op")
+(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
+                 "726te_alu_shift_op" "arm_no_early_alu_shift_dep")
+(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
+                 "726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")
+(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")
+
+(define_bypass 4 "726te_load1_op" "726te_mult_op")
+(define_bypass 5 "726te_ldm2_op" "726te_mult_op")
+(define_bypass 6 "726te_ldm3_op" "726te_mult_op")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA726TE
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "726te_branch_op" 0
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "branch"))
+ "fa726te_blockage")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 is ready for int return value.
+(define_insn_reservation "726te_call_op" 1
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "call"))
+ "fa726te_blockage")
+
diff --git a/gcc-4.9/gcc/config/arm/fmp626.md b/gcc-4.9/gcc/config/arm/fmp626.md
new file mode 100644
index 000000000..439054da6
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/fmp626.md
@@ -0,0 +1,191 @@
+;; Faraday FA626TE Pipeline Description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; Pipeline architecture
+;;	S	E	M	W(Q1)	Q2
+;;   ___________________________________________
+;;    shifter alu
+;;    mul1    mul2    mul3
+;;    ld/st1  ld/st2  ld/st3  ld/st4  ld/st5
+
+;; This automaton provides a pipeline description for the Faraday
+;; FMP626 core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fmp626")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+(define_cpu_unit "fmp626_core" "fmp626")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "mp626_alu_op" 1
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\
+                       logic_imm,logics_imm,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_alu_shift_op" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\
+                       extend,\
+                       mov_shift,mov_shift_reg,\
+                       mvn_shift,mvn_shift_reg"))
+ "fmp626_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "mp626_mult1" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_mult2" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "mul,mla"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_mult3" 3
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+ "fmp626_core*2")
+
+(define_insn_reservation "mp626_mult4" 4
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "smulls,smlals,umulls,umlals"))
+ "fmp626_core*3")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "mp626_load1_op" 5
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "load1,load_byte"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_load2_op" 6
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "load2,load3"))
+ "fmp626_core*2")
+
+(define_insn_reservation "mp626_load3_op" 7
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "load4"))
+ "fmp626_core*3")
+
+(define_insn_reservation "mp626_store1_op" 0
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "store1"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_store2_op" 1
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "store2,store3"))
+ "fmp626_core*2")
+
+(define_insn_reservation "mp626_store3_op" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "store4"))
+ "fmp626_core*3")
+
+(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op"
+                 "mp626_store1_op,mp626_store2_op,mp626_store3_op"
+                 "arm_no_early_store_addr_dep")
+(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\
+                  mp626_mult3,mp626_mult4" "mp626_store1_op"
+                 "arm_no_early_store_addr_dep")
+(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op")
+(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op")
+(define_bypass 2 "mp626_mult3" "mp626_alu_op")
+(define_bypass 3 "mp626_mult4" "mp626_alu_op")
+(define_bypass 4 "mp626_load1_op" "mp626_alu_op")
+(define_bypass 5 "mp626_load2_op" "mp626_alu_op")
+(define_bypass 6 "mp626_load3_op" "mp626_alu_op")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FMP626
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "mp626_branch_op" 0
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "branch"))
+ "fmp626_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value.
+(define_insn_reservation "mp626_call_op" 1
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "call"))
+ "fmp626_core")
+
diff --git a/gcc-4.9/gcc/config/arm/genopt.sh b/gcc-4.9/gcc/config/arm/genopt.sh
new file mode 100755
index 000000000..68fdb564c
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/genopt.sh
@@ -0,0 +1,95 @@
+#!/bin/sh
+# Generate arm-tables.opt from the lists in *.def.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+cat <<EOF
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from arm-cores.def, arm-arches.def
+; and arm-fpus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(processor_type) Type(enum processor_type)
+Known ARM CPUs (for use with the -mcpu= and -mtune= options):
+
+EOF
+
+awk -F'[(, 	]+' '/^ARM_CORE/ {
+    name = $2
+    enum = $3
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(processor_type) String(" name ") Value(" enum ")"
+    print ""
+}' $1/arm-cores.def
+
+cat <<EOF
+Enum
+Name(arm_arch) Type(int)
+Known ARM architectures (for use with the -march= option):
+
+EOF
+
+awk -F'[(, 	]+' 'BEGIN {
+    value = 0
+}
+/^ARM_ARCH/ {
+    name = $2
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(arm_arch) String(" name ") Value(" value ")"
+    print ""
+    value++
+}' $1/arm-arches.def
+
+cat <<EOF
+Enum
+Name(arm_fpu) Type(int)
+Known ARM FPUs (for use with the -mfpu= option):
+
+EOF
+
+awk -F'[(, 	]+' 'BEGIN {
+    value = 0
+}
+/^ARM_FPU/ {
+    name = $2
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(arm_fpu) String(" name ") Value(" value ")"
+    print ""
+    value++
+}' $1/arm-fpus.def
diff --git a/gcc-4.9/gcc/config/arm/gentune.sh b/gcc-4.9/gcc/config/arm/gentune.sh
new file mode 100755
index 000000000..3fc7be912
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/gentune.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Generate arm-tune.md, a file containing the tune attribute from the list of 
+# CPUs in arm-cores.def
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically by gentune.sh from arm-cores.def"
+
+allcores=`awk -F'[(, 	]+' '/^ARM_CORE/ { cores = cores$3"," } END { print cores } ' $1`
+
+echo "(define_attr \"tune\""
+echo "	\"$allcores\"" | sed -e 's/,"$/"/' | sed -e 's/\([a-z0-9_]\+,[a-z0-9_]\+,[a-z0-9_]\+,\)/\1\n\t/g'
+echo "	(const (symbol_ref \"((enum attr_tune) arm_tune)\")))"
diff --git a/gcc-4.9/gcc/config/arm/iterators.md b/gcc-4.9/gcc/config/arm/iterators.md
new file mode 100644
index 000000000..33e09e4ce
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/iterators.md
@@ -0,0 +1,585 @@
+;; Code and mode itertator and attribute definitions for the ARM backend
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;----------------------------------------------------------------------------
+;; Mode iterators
+;;----------------------------------------------------------------------------
+
+;; A list of modes that are exactly 64 bits in size. This is used to expand
+;; some splits that are the same for all modes when operating on ARM 
+;; registers.
+(define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF])
+
+(define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF])
+
+;; A list of integer modes that are up to one word long
+(define_mode_iterator QHSI [QI HI SI])
+
+;; A list of integer modes that are less than a word
+(define_mode_iterator NARROW [QI HI])
+
+;; A list of all the integer modes up to 64bit
+(define_mode_iterator QHSD [QI HI SI DI])
+
+;; A list of the 32bit and 64bit integer modes
+(define_mode_iterator SIDI [SI DI])
+
+;; A list of modes which the VFP unit can handle
+(define_mode_iterator SDF [(SF "TARGET_VFP") (DF "TARGET_VFP_DOUBLE")])
+
+;; Integer element sizes implemented by IWMMXT.
+(define_mode_iterator VMMX [V2SI V4HI V8QI])
+
+(define_mode_iterator VMMX2 [V4HI V2SI])
+
+;; Integer element sizes for shifts.
+(define_mode_iterator VSHFT [V4HI V2SI DI])
+
+;; Integer and float modes supported by Neon and IWMMXT.
+(define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
+
+;; Integer and float modes supported by Neon and IWMMXT, except V2DI.
+(define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
+
+;; Integer modes supported by Neon and IWMMXT
+(define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI])
+
+;; Integer modes supported by Neon and IWMMXT, except V2DI
+(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
+
+;; Double-width vector modes.
+(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+
+;; Double-width vector modes plus 64-bit elements.
+(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
+
+;; Double-width vector modes without floating-point elements.
+(define_mode_iterator VDI [V8QI V4HI V2SI])
+
+;; Quad-width vector modes.
+(define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
+
+;; Quad-width vector modes plus 64-bit elements.
+(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
+
+;; Quad-width vector modes without floating-point elements.
+(define_mode_iterator VQI [V16QI V8HI V4SI])
+
+;; Quad-width vector modes, with TImode added, for moves.
+(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI])
+
+;; Opaque structure types wider than TImode.
+(define_mode_iterator VSTRUCT [EI OI CI XI])
+
+;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
+(define_mode_iterator VTAB [TI EI OI])
+
+;; Widenable modes.
+(define_mode_iterator VW [V8QI V4HI V2SI])
+
+;; Narrowable modes.
+(define_mode_iterator VN [V8HI V4SI V2DI])
+
+;; All supported vector modes (except singleton DImode).
+(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI])
+
+;; All supported vector modes (except those with 64-bit integer elements).
+(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
+
+;; Supported integer vector modes (not 64 bit elements).
+(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Supported integer vector modes (not singleton DI)
+(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
+
+;; Vector modes, including 64-bit integer elements.
+(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI])
+
+;; Vector modes including 64-bit integer elements, but no floats.
+(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
+
+;; Vector modes for float->int conversions.
+(define_mode_iterator VCVTF [V2SF V4SF])
+
+;; Vector modes form int->float conversions.
+(define_mode_iterator VCVTI [V2SI V4SI])
+
+;; Vector modes for doubleword multiply-accumulate, etc. insns.
+(define_mode_iterator VMD [V4HI V2SI V2SF])
+
+;; Vector modes for quadword multiply-accumulate, etc. insns.
+(define_mode_iterator VMQ [V8HI V4SI V4SF])
+
+;; Above modes combined.
+(define_mode_iterator VMDQ [V4HI V2SI V2SF V8HI V4SI V4SF])
+
+;; As VMD, but integer modes only.
+(define_mode_iterator VMDI [V4HI V2SI])
+
+;; As VMQ, but integer modes only.
+(define_mode_iterator VMQI [V8HI V4SI])
+
+;; Above modes combined.
+(define_mode_iterator VMDQI [V4HI V2SI V8HI V4SI])
+
+;; Modes with 8-bit and 16-bit elements.
+(define_mode_iterator VX [V8QI V4HI V16QI V8HI])
+
+;; Modes with 8-bit elements.
+(define_mode_iterator VE [V8QI V16QI])
+
+;; Modes with 64-bit elements only.
+(define_mode_iterator V64 [DI V2DI])
+
+;; Modes with 32-bit elements only.
+(define_mode_iterator V32 [V2SI V2SF V4SI V4SF])
+
+;; Modes with 8-bit, 16-bit and 32-bit elements.
+(define_mode_iterator VU [V16QI V8HI V4SI])
+
+;; Iterators used for fixed-point support.
+(define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
+
+(define_mode_iterator ADDSUB [V4QQ V2HQ V2HA])
+
+(define_mode_iterator UQADDSUB [V4UQQ V2UHQ UQQ UHQ V2UHA UHA])
+
+(define_mode_iterator QADDSUB [V4QQ V2HQ QQ HQ V2HA HA SQ SA])
+
+(define_mode_iterator QMUL [HQ HA])
+
+;;----------------------------------------------------------------------------
+;; Code iterators
+;;----------------------------------------------------------------------------
+
+;; A list of condition codes used in compare instructions where 
+;; the carry flag from the addition is used instead of doing the 
+;; compare a second time.
+(define_code_iterator LTUGEU [ltu geu])
+
+;; A list of ...
+(define_code_iterator ior_xor [ior xor])
+
+;; Operations on two halves of a quadword vector.
+(define_code_iterator vqh_ops [plus smin smax umin umax])
+
+;; Operations on two halves of a quadword vector,
+;; without unsigned variants (for use with *SFmode pattern).
+(define_code_iterator vqhs_ops [plus smin smax])
+
+;; A list of widening operators
+(define_code_iterator SE [sign_extend zero_extend])
+
+;; Right shifts
+(define_code_iterator rshifts [ashiftrt lshiftrt])
+
+;;----------------------------------------------------------------------------
+;; Int iterators
+;;----------------------------------------------------------------------------
+
+(define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
+                            UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
+
+(define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
+                              UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
+
+(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
+                          UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
+
+(define_int_iterator CRYPTO_UNARY [UNSPEC_AESMC UNSPEC_AESIMC])
+
+(define_int_iterator CRYPTO_BINARY [UNSPEC_AESD UNSPEC_AESE
+                                    UNSPEC_SHA1SU1 UNSPEC_SHA256SU0])
+
+(define_int_iterator CRYPTO_TERNARY [UNSPEC_SHA1SU0 UNSPEC_SHA256H
+                                     UNSPEC_SHA256H2 UNSPEC_SHA256SU1])
+
+(define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M
+                                       UNSPEC_SHA1P])
+
+;;----------------------------------------------------------------------------
+;; Mode attributes
+;;----------------------------------------------------------------------------
+
+;; Determine element size suffix from vector mode.
+(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
+
+;; vtbl<n> suffix for NEON vector modes.
+(define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
+
+;; (Opposite) mode to convert to/from for NEON mode conversions.
+(define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI")
+               (V4SI "V4SF") (V4SF "V4SI")])
+
+;; As above but in lower case.
+(define_mode_attr V_cvtto [(V2SI "v2sf") (V2SF "v2si")
+                           (V4SI "v4sf") (V4SF "v4si")])
+
+;; Define element mode for each vector mode.
+(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
+              (V4HI "HI") (V8HI "HI")
+                          (V2SI "SI") (V4SI "SI")
+                          (V2SF "SF") (V4SF "SF")
+                          (DI "DI")   (V2DI "DI")])
+
+;; Element modes for vector extraction, padded up to register size.
+
+(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI")
+             (V4HI "SI") (V8HI "SI")
+             (V2SI "SI") (V4SI "SI")
+             (V2SF "SF") (V4SF "SF")
+             (DI "DI") (V2DI "DI")])
+
+;; Mode of pair of elements for each vector mode, to define transfer
+;; size for structure lane/dup loads and stores.
+(define_mode_attr V_two_elem [(V8QI "HI")   (V16QI "HI")
+                              (V4HI "SI")   (V8HI "SI")
+                              (V2SI "V2SI") (V4SI "V2SI")
+                              (V2SF "V2SF") (V4SF "V2SF")
+                              (DI "V2DI")   (V2DI "V2DI")])
+
+;; Similar, for three elements.
+(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
+                                (V4HI "BLK") (V8HI "BLK")
+                                (V2SI "BLK") (V4SI "BLK")
+                                (V2SF "BLK") (V4SF "BLK")
+                                (DI "EI")    (V2DI "EI")])
+
+;; Similar, for four elements.
+(define_mode_attr V_four_elem [(V8QI "SI")   (V16QI "SI")
+                               (V4HI "V4HI") (V8HI "V4HI")
+                               (V2SI "V4SI") (V4SI "V4SI")
+                               (V2SF "V4SF") (V4SF "V4SF")
+                               (DI "OI")     (V2DI "OI")])
+
+;; Register width from element mode
+(define_mode_attr V_reg [(V8QI "P") (V16QI "q")
+                         (V4HI "P") (V8HI  "q")
+                         (V2SI "P") (V4SI  "q")
+                         (V2SF "P") (V4SF  "q")
+                         (DI   "P") (V2DI  "q")
+                         (SF   "")  (DF    "P")])
+
+;; Wider modes with the same number of elements.
+(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
+
+;; Narrower modes with the same number of elements.
+(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")])
+
+;; Narrower modes with double the number of elements.
+(define_mode_attr V_narrow_pack [(V4SI "V8HI") (V8HI "V16QI") (V2DI "V4SI")
+				 (V4HI "V8QI") (V2SI "V4HI")  (DI "V2SI")])
+
+;; Modes with half the number of equal-sized elements.
+(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
+              (V4SI  "V2SI") (V4SF "V2SF") (V2DF "DF")
+                          (V2DI "DI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi")
+              (V4SI  "v2si") (V4SF "v2sf")
+                          (V2DI "di")])
+
+;; Modes with twice the number of equal-sized elements.
+(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
+                (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
+                            (DI "V2DI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
+                (V2SI "v4si") (V2SF "v4sf")
+                            (DI "v2di")])
+
+;; Modes with double-width elements.
+(define_mode_attr V_double_width [(V8QI "V4HI") (V16QI "V8HI")
+                  (V4HI "V2SI") (V8HI "V4SI")
+                  (V2SI "DI")   (V4SI "V2DI")])
+
+;; Double-sized modes with the same element size.
+;; Used for neon_vdup_lane, where the second operand is double-sized
+;; even when the first one is quad.
+(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
+                                        (V4SI "V2SI") (V4SF "V2SF")
+                                        (V8QI "V8QI") (V4HI "V4HI")
+                                        (V2SI "V2SI") (V2SF "V2SF")])
+
+;; Mode of result of comparison operations (and bit-select operand 1).
+(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
+                    (V4HI "V4HI") (V8HI  "V8HI")
+                                (V2SI "V2SI") (V4SI  "V4SI")
+                                (V2SF "V2SI") (V4SF  "V4SI")
+                                (DI   "DI")   (V2DI  "V2DI")])
+
+(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
+				(V4HI "v4hi") (V8HI  "v8hi")
+				(V2SI "v2si") (V4SI  "v4si")
+				(DI   "di")   (V2DI  "v2di")
+				(V2SF "v2si") (V4SF  "v4si")])
+
+;; Get element type from double-width mode, for operations where we 
+;; don't care about signedness.
+(define_mode_attr V_if_elem [(V8QI "i8")  (V16QI "i8")
+                 (V4HI "i16") (V8HI  "i16")
+                             (V2SI "i32") (V4SI  "i32")
+                             (DI   "i64") (V2DI  "i64")
+                 (V2SF "f32") (V4SF  "f32")
+                 (SF "f32") (DF "f64")])
+
+;; Same, but for operations which work on signed values.
+(define_mode_attr V_s_elem [(V8QI "s8")  (V16QI "s8")
+                (V4HI "s16") (V8HI  "s16")
+                            (V2SI "s32") (V4SI  "s32")
+                            (DI   "s64") (V2DI  "s64")
+                (V2SF "f32") (V4SF  "f32")])
+
+;; Same, but for operations which work on unsigned values.
+(define_mode_attr V_u_elem [(V8QI "u8")  (V16QI "u8")
+                (V4HI "u16") (V8HI  "u16")
+                            (V2SI "u32") (V4SI  "u32")
+                            (DI   "u64") (V2DI  "u64")
+                            (V2SF "f32") (V4SF  "f32")])
+
+;; Element types for extraction of unsigned scalars.
+(define_mode_attr V_uf_sclr [(V8QI "u8")  (V16QI "u8")
+                 (V4HI "u16") (V8HI "u16")
+                             (V2SI "32") (V4SI "32")
+                             (V2SF "32") (V4SF "32")])
+
+(define_mode_attr V_sz_elem [(V8QI "8")  (V16QI "8")
+                 (V4HI "16") (V8HI  "16")
+                             (V2SI "32") (V4SI  "32")
+                             (DI   "64") (V2DI  "64")
+                 (V2SF "32") (V4SF  "32")])
+
+(define_mode_attr V_elem_ch [(V8QI "b")  (V16QI "b")
+                             (V4HI "h") (V8HI  "h")
+                             (V2SI "s") (V4SI  "s")
+                             (DI   "d") (V2DI  "d")
+                             (V2SF "s") (V4SF  "s")])
+
+;; Element sizes for duplicating ARM registers to all elements of a vector.
+(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")])
+
+;; Opaque integer types for results of pair-forming intrinsics (vtrn, etc.)
+(define_mode_attr V_PAIR [(V8QI "TI") (V16QI "OI")
+              (V4HI "TI") (V8HI  "OI")
+                          (V2SI "TI") (V4SI  "OI")
+                          (V2SF "TI") (V4SF  "OI")
+                          (DI   "TI") (V2DI  "OI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_pair [(V8QI "ti") (V16QI "oi")
+              (V4HI "ti") (V8HI  "oi")
+                          (V2SI "ti") (V4SI  "oi")
+                          (V2SF "ti") (V4SF  "oi")
+                          (DI   "ti") (V2DI  "oi")])
+
+;; Extra suffix on some 64-bit insn names (to avoid collision with standard
+;; names which we don't want to define).
+(define_mode_attr V_suf64 [(V8QI "") (V16QI "")
+                           (V4HI "") (V8HI "")
+                           (V2SI "") (V4SI "")
+                           (V2SF "") (V4SF "")
+                           (DI "_neon") (V2DI "")])
+
+
+;; Scalars to be presented to scalar multiplication instructions
+;; must satisfy the following constraints.
+;; 1. If the mode specifies 16-bit elements, the scalar must be in D0-D7.
+;; 2. If the mode specifies 32-bit elements, the scalar must be in D0-D15.
+
+;; This mode attribute is used to obtain the correct register constraints.
+
+(define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t")
+                                         (V8HI "x") (V4SI "t") (V4SF "t")])
+
+;; Predicates used for setting type for neon instructions
+
+(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
+                 (V4HI "false") (V8HI "false")
+                 (V2SI "false") (V4SI "false")
+                 (V2SF "true") (V4SF "true")
+                 (DI "false") (V2DI "false")])
+
+(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
+                   (V4HI "true") (V8HI "true")
+                   (V2SI "false") (V4SI "false")
+                   (V2SF "false") (V4SF "false")
+                   (DI "false") (V2DI "false")])
+
+
+(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
+                            (V4HI "true") (V8HI  "false")
+                            (V2SI "true") (V4SI  "false")
+                            (V2SF "true") (V4SF  "false")
+                            (DI   "true") (V2DI  "false")])
+
+(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+                                 (V4HI "4") (V8HI "8")
+                                 (V2SI "2") (V4SI "4")
+                                 (V2SF "2") (V4SF "4")
+                                 (DI "1")   (V2DI "2")
+                                 (DF "1")   (V2DF "2")])
+
+;; Same as V_widen, but lower-case.
+(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])
+
+;; Widen. Result is half the number of elements, but widened to double-width.
+(define_mode_attr V_unpack   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+
+;; Conditions to be used in extend<mode>di patterns.
+(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
+(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
+				      (QI "&& arm_arch6")])
+(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand")
+				   (HI "nonimmediate_operand")
+				   (QI "nonimmediate_operand")])
+(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
+				   (HI "nonimmediate_operand")
+				   (QI "arm_reg_or_extendqisi_mem_op")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r,0,r,r,r") (HI "r,0,rm,rm,r") (QI "r,0,rUq,rm,r")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r,0,r,r") (HI "r,0,rm,r") (QI "r,0,rm,r")])
+
+;; Mode attributes used for fixed-point support.
+(define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16")
+			       (V2UHA "16") (UHA "16")
+			       (V4QQ "8") (V2HQ "16") (QQ "8") (HQ "16")
+			       (V2HA "16") (HA "16") (SQ "") (SA "")])
+
+;; Mode attribute for vshll.
+(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
+
+;; Mode attributes used for VFP support.
+(define_mode_attr F_constraint [(SF "t") (DF "w")])
+(define_mode_attr vfp_type [(SF "s") (DF "d")])
+(define_mode_attr vfp_double_cond [(SF "") (DF "&& TARGET_VFP_DOUBLE")])
+
+;; Mode attribute used to build the "type" attribute.
+(define_mode_attr q [(V8QI "") (V16QI "_q")
+                     (V4HI "") (V8HI "_q")
+                     (V2SI "") (V4SI "_q")
+                     (V2SF "") (V4SF "_q")
+                     (DI "")   (V2DI "_q")
+                     (DF "")   (V2DF "_q")])
+
+;;----------------------------------------------------------------------------
+;; Code attributes
+;;----------------------------------------------------------------------------
+
+;; Assembler mnemonics for vqh_ops and vqhs_ops iterators.
+(define_code_attr VQH_mnem [(plus "vadd") (smin "vmin") (smax "vmax")
+                (umin "vmin") (umax "vmax")])
+
+;; Type attributes for vqh_ops and vqhs_ops iterators.
+(define_code_attr VQH_type [(plus "add") (smin "minmax") (smax "minmax")
+                (umin "minmax") (umax "minmax")])
+
+;; Signs of above, where relevant.
+(define_code_attr VQH_sign [(plus "i") (smin "s") (smax "s") (umin "u")
+                (umax "u")])
+
+(define_code_attr cnb [(ltu "CC_C") (geu "CC")])
+(define_code_attr optab [(ltu "ltu") (geu "geu")])
+
+;; Assembler mnemonics for signedness of widening operations.
+(define_code_attr US [(sign_extend "s") (zero_extend "u")])
+
+;; Right shifts
+(define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
+(define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
+
+;;----------------------------------------------------------------------------
+;; Int attributes
+;;----------------------------------------------------------------------------
+
+;; Standard names for floating point to integral rounding instructions.
+(define_int_attr vrint_pattern [(UNSPEC_VRINTZ "btrunc") (UNSPEC_VRINTP "ceil")
+                         (UNSPEC_VRINTA "round") (UNSPEC_VRINTM "floor")
+                         (UNSPEC_VRINTR "nearbyint") (UNSPEC_VRINTX "rint")])
+
+;; Suffixes for vrint instructions specifying rounding modes.
+(define_int_attr vrint_variant [(UNSPEC_VRINTZ "z") (UNSPEC_VRINTP "p")
+                               (UNSPEC_VRINTA "a") (UNSPEC_VRINTM "m")
+                               (UNSPEC_VRINTR "r") (UNSPEC_VRINTX "x")])
+
+;; Some of the vrint instuctions are predicable.
+(define_int_attr vrint_predicable [(UNSPEC_VRINTZ "yes") (UNSPEC_VRINTP "no")
+                                  (UNSPEC_VRINTA "no") (UNSPEC_VRINTM "no")
+                                  (UNSPEC_VRINTR "yes") (UNSPEC_VRINTX "yes")])
+
+(define_int_attr vrint_conds [(UNSPEC_VRINTZ "nocond") (UNSPEC_VRINTP "unconditional")
+                              (UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional")
+                              (UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")])
+
+(define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
+                                (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
+                                (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
+
+(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h")
+                        (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32CB "crc32cb")
+                        (UNSPEC_CRC32CH "crc32ch") (UNSPEC_CRC32CW "crc32cw")])
+
+(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI")
+                        (UNSPEC_CRC32W "SI") (UNSPEC_CRC32CB "QI")
+                        (UNSPEC_CRC32CH "HI") (UNSPEC_CRC32CW "SI")])
+
+(define_int_attr crypto_pattern [(UNSPEC_SHA1H "sha1h") (UNSPEC_AESMC "aesmc")
+                          (UNSPEC_AESIMC "aesimc") (UNSPEC_AESD "aesd")
+                          (UNSPEC_AESE "aese") (UNSPEC_SHA1SU1 "sha1su1")
+                          (UNSPEC_SHA256SU0 "sha256su0") (UNSPEC_SHA1C "sha1c")
+                          (UNSPEC_SHA1M "sha1m") (UNSPEC_SHA1P "sha1p")
+                          (UNSPEC_SHA1SU0 "sha1su0") (UNSPEC_SHA256H "sha256h")
+                          (UNSPEC_SHA256H2 "sha256h2")
+                          (UNSPEC_SHA256SU1 "sha256su1")])
+
+(define_int_attr crypto_type
+ [(UNSPEC_AESE "crypto_aes") (UNSPEC_AESD "crypto_aes")
+ (UNSPEC_AESMC "crypto_aes") (UNSPEC_AESIMC "crypto_aes")
+ (UNSPEC_SHA1C "crypto_sha1_slow") (UNSPEC_SHA1P "crypto_sha1_slow")
+ (UNSPEC_SHA1M "crypto_sha1_slow") (UNSPEC_SHA1SU1 "crypto_sha1_fast")
+ (UNSPEC_SHA1SU0 "crypto_sha1_xor") (UNSPEC_SHA256H "crypto_sha256_slow")
+ (UNSPEC_SHA256H2 "crypto_sha256_slow") (UNSPEC_SHA256SU0 "crypto_sha256_fast")
+ (UNSPEC_SHA256SU1 "crypto_sha256_slow")])
+
+(define_int_attr crypto_size_sfx [(UNSPEC_SHA1H "32") (UNSPEC_AESMC "8")
+                          (UNSPEC_AESIMC "8") (UNSPEC_AESD "8")
+                          (UNSPEC_AESE "8") (UNSPEC_SHA1SU1 "32")
+                          (UNSPEC_SHA256SU0 "32") (UNSPEC_SHA1C "32")
+                          (UNSPEC_SHA1M "32") (UNSPEC_SHA1P "32")
+                          (UNSPEC_SHA1SU0 "32") (UNSPEC_SHA256H "32")
+                          (UNSPEC_SHA256H2 "32") (UNSPEC_SHA256SU1 "32")])
+
+(define_int_attr crypto_mode [(UNSPEC_SHA1H "V4SI") (UNSPEC_AESMC "V16QI")
+                          (UNSPEC_AESIMC "V16QI") (UNSPEC_AESD "V16QI")
+                          (UNSPEC_AESE "V16QI") (UNSPEC_SHA1SU1 "V4SI")
+                          (UNSPEC_SHA256SU0 "V4SI") (UNSPEC_SHA1C "V4SI")
+                          (UNSPEC_SHA1M "V4SI") (UNSPEC_SHA1P "V4SI")
+                          (UNSPEC_SHA1SU0 "V4SI") (UNSPEC_SHA256H "V4SI")
+                          (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")])
+
+;; Both kinds of return insn.
+(define_code_iterator returns [return simple_return])
+(define_code_attr return_str [(return "") (simple_return "simple_")])
+(define_code_attr return_simple_p [(return "false") (simple_return "true")])
+(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
+                               (simple_return " && use_simple_return_p ()")])
+(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)")
+                               (simple_return " && use_simple_return_p ()")])
diff --git a/gcc-4.9/gcc/config/arm/iwmmxt.md b/gcc-4.9/gcc/config/arm/iwmmxt.md
new file mode 100644
index 000000000..56ff3e9f3
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/iwmmxt.md
@@ -0,0 +1,1775 @@
+;; Patterns for the Intel Wireless MMX technology architecture.
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register numbers. Need to sync with FIRST_IWMMXT_GR_REGNUM in arm.h
+(define_constants
+  [(WCGR0           96)
+   (WCGR1           97)
+   (WCGR2           98)
+   (WCGR3           99)
+  ]
+)
+
+(define_insn "tbcstv8qi"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+        (vec_duplicate:V8QI (match_operand:QI 1 "s_register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcstb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tbcst")]
+)
+
+(define_insn "tbcstv4hi"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+        (vec_duplicate:V4HI (match_operand:HI 1 "s_register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcsth%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tbcst")]
+)
+
+(define_insn "tbcstv2si"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+        (vec_duplicate:V2SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcstw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tbcst")]
+)
+
+(define_insn "iwmmxt_iordi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (ior:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+		(match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wor%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "wmmx_wor,*,*")]
+)
+
+(define_insn "iwmmxt_xordi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (xor:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+		(match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wxor%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "wmmx_wxor,*,*")]
+)
+
+(define_insn "iwmmxt_anddi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (and:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+		(match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wand%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "wmmx_wand,*,*")]
+)
+
+(define_insn "iwmmxt_nanddi3"
+  [(set (match_operand:DI                 0 "register_operand" "=y")
+        (and:DI (match_operand:DI         1 "register_operand"  "y")
+		(not:DI (match_operand:DI 2 "register_operand"  "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wandn%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wandn")]
+)
+
+(define_insn "*iwmmxt_arm_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv")
+        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    case 3: case 4:
+      return output_move_double (operands, true, NULL);
+    case 5:
+      return \"wmov%?\\t%0,%1\";
+    case 6:
+      return \"tmcrr%?\\t%0,%Q1,%R1\";
+    case 7:
+      return \"tmrrc%?\\t%Q0,%R0,%1\";
+    case 8:
+      return \"wldrd%?\\t%0,%1\";
+    case 9:
+      return \"wstrd%?\\t%1,%0\";
+    case 10:
+      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 11:
+      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 12:
+      if (TARGET_VFP_SINGLE)
+	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+      else
+	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+    case 13: case 14:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set (attr "length") (cond [(eq_attr "alternative" "0,3,4") (const_int 8)
+                              (eq_attr "alternative" "1") (const_int 12)
+                              (eq_attr "alternative" "2") (const_int 16)
+                              (eq_attr "alternative" "12")
+                               (if_then_else
+                                 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
+                                 (const_int 8)
+                                 (const_int 4))]
+                              (const_int 4)))
+   (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,f_mcrr,f_mrrc,\
+                     ffarithd,f_loadd,f_stored")
+   (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
+   (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
+)
+
+(define_insn "*iwmmxt_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk, m,z,r,?z,?Uy,*t, r,*t,*t  ,*Uv")
+	(match_operand:SI 1 "general_operand"      " rk,I,K,j,mi,rk,r,z,Uy,  z, r,*t,*t,*Uvi, *t"))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "*
+   switch (which_alternative)
+     {
+     case 0: return \"mov\\t%0, %1\";
+     case 1: return \"mov\\t%0, %1\";
+     case 2: return \"mvn\\t%0, #%B1\";
+     case 3: return \"movw\\t%0, %1\";
+     case 4: return \"ldr\\t%0, %1\";
+     case 5: return \"str\\t%1, %0\";
+     case 6: return \"tmcr\\t%0, %1\";
+     case 7: return \"tmrc\\t%0, %1\";
+     case 8: return arm_output_load_gr (operands);
+     case 9: return \"wstrw\\t%1, %0\";
+     case 10:return \"fmsr\\t%0, %1\";
+     case 11:return \"fmrs\\t%0, %1\";
+     case 12:return \"fcpys\\t%0, %1\\t%@ int\";
+     case 13: case 14:
+       return output_move_vfp (operands);
+     default:
+       gcc_unreachable ();
+     }"
+  [(set_attr "type"           "*,*,*,*,load1,store1,*,*,*,*,f_mcr,f_mrc,\
+                               fmov,f_loads,f_stores")
+   (set_attr "length"         "*,*,*,*,*,        *,*,*,  16,     *,*,*,*,*,*")
+   (set_attr "pool_range"     "*,*,*,*,4096,     *,*,*,1024,     *,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,*,4084,     *,*,*,   *,  1012,*,*,*,1008,*")
+   ;; Note - the "predicable" attribute is not allowed to have alternatives.
+   ;; Since the wSTRw wCx instruction is not predicable, we cannot support
+   ;; predicating any of the alternatives in this template.  Instead,
+   ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn.
+   (set_attr "predicable"     "no")
+   ;; Also - we have to pretend that these insns clobber the condition code
+   ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
+   ;; them.
+   (set_attr "conds" "clob")]
+)
+
+;; Because iwmmxt_movsi_insn is not predicable, we provide the
+;; cond_exec version explicitly, with appropriate constraints.
+
+(define_insn "*cond_iwmmxt_movsi_insn"
+  [(cond_exec
+     (match_operator 2 "arm_comparison_operator"
+      [(match_operand 3 "cc_register" "")
+      (const_int 0)])
+     (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r")
+	  (match_operand:SI 1 "general_operand"      "rI,K,mi,r,r,z")))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"mov%?\\t%0, %1\";
+   case 1: return \"mvn%?\\t%0, #%B1\";
+   case 2: return \"ldr%?\\t%0, %1\";
+   case 3: return \"str%?\\t%1, %0\";
+   case 4: return \"tmcr%?\\t%0, %1\";
+   default: return \"tmrc%?\\t%0, %1\";
+  }"
+  [(set_attr "type"           "*,*,load1,store1,*,*")
+   (set_attr "pool_range"     "*,*,4096,     *,*,*")
+   (set_attr "neg_pool_range" "*,*,4084,     *,*,*")]
+)
+
+(define_insn "mov<mode>_internal"
+  [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m")
+	(match_operand:VMMX 1 "general_operand"       "y,y,mi,y,r,r,mi,r"))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"wmov%?\\t%0, %1\";
+   case 1: return \"wstrd%?\\t%1, %0\";
+   case 2: return \"wldrd%?\\t%0, %1\";
+   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
+   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
+   case 5: return \"#\";
+   default: return output_move_double (operands, true, NULL);
+   }"
+  [(set_attr "predicable" "yes")
+   (set_attr "length"         "4,     4,   4,4,4,8,   8,8")
+   (set_attr "type"           "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load1,store1")
+   (set_attr "pool_range"     "*,     *, 256,*,*,*, 256,*")
+   (set_attr "neg_pool_range" "*,     *, 244,*,*,*, 244,*")]
+)
+
+(define_expand "iwmmxt_setwcgr0"
+  [(set (reg:SI WCGR0)
+	(match_operand:SI 0 "register_operand"  ""))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_setwcgr1"
+  [(set (reg:SI WCGR1)
+	(match_operand:SI 0 "register_operand"  ""))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_setwcgr2"
+  [(set (reg:SI WCGR2)
+	(match_operand:SI 0 "register_operand"  ""))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_setwcgr3"
+  [(set (reg:SI WCGR3)
+	(match_operand:SI 0 "register_operand"  ""))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_getwcgr0"
+  [(set (match_operand:SI 0 "register_operand"  "")
+        (reg:SI WCGR0))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_getwcgr1"
+  [(set (match_operand:SI 0 "register_operand"  "")
+        (reg:SI WCGR1))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_getwcgr2"
+  [(set (match_operand:SI 0 "register_operand"  "")
+        (reg:SI WCGR2))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_expand "iwmmxt_getwcgr3"
+  [(set (match_operand:SI 0 "register_operand"  "")
+        (reg:SI WCGR3))]
+  "TARGET_REALLY_IWMMXT"
+  {}
+)
+
+(define_insn "*and<mode>3_iwmmxt"
+  [(set (match_operand:VMMX           0 "register_operand" "=y")
+        (and:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+	          (match_operand:VMMX 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wand\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wand")]
+)
+
+(define_insn "*ior<mode>3_iwmmxt"
+  [(set (match_operand:VMMX           0 "register_operand" "=y")
+        (ior:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+	          (match_operand:VMMX 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wor\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wor")]
+)
+
+(define_insn "*xor<mode>3_iwmmxt"
+  [(set (match_operand:VMMX           0 "register_operand" "=y")
+        (xor:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+	          (match_operand:VMMX 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wxor")]
+)
+
+
+;; Vector add/subtract
+
+(define_insn "*add<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (plus:VMMX (match_operand:VMMX 1 "register_operand" "y")
+	           (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wadd<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "ssaddv8qi3"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+		      (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+		      (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "ssaddv2si3"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (ss_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+		      (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+		      (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "usaddv4hi3"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+		      (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "usaddv2si3"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (us_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+		      (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "*sub<mode>3_iwmmxt"
+  [(set (match_operand:VMMX             0 "register_operand" "=y")
+        (minus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+		    (match_operand:VMMX 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsub<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "sssubv8qi3"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+		       (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubbss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "sssubv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
+		       (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubbus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "ussubv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "ussubv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsub")]
+)
+
+(define_insn "*mulv4hi3_iwmmxt"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		   (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulul%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmul")]
+)
+
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	  (truncate:V4HI
+	    (lshiftrt:V4SI
+	      (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                 (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	      (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulsm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmul")]
+)
+
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	  (truncate:V4HI
+	    (lshiftrt:V4SI
+	      (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                 (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	      (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulum%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmul")]
+)
+
+(define_insn "iwmmxt_wmacs"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
+	            (match_operand:V4HI 2 "register_operand" "y")
+	            (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmac")]
+)
+
+(define_insn "iwmmxt_wmacsz"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
+	            (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacsz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmac")]
+)
+
+(define_insn "iwmmxt_wmacu"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
+	            (match_operand:V4HI 2 "register_operand" "y")
+	            (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacu%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmac")]
+)
+
+(define_insn "iwmmxt_wmacuz"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
+	            (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacuz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmac")]
+)
+
+;; Same as xordi3, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "iwmmxt_clrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(const_int 0)] UNSPEC_CLRDI))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wxor")]
+)
+
+;; Seems like cse likes to generate these, so we have to support them.
+
+(define_insn "iwmmxt_clrv8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=y")
+        (const_vector:V8QI [(const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wxor")]
+)
+
+(define_insn "iwmmxt_clrv4hi"
+  [(set (match_operand:V4HI 0 "s_register_operand" "=y")
+        (const_vector:V4HI [(const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wxor")]
+)
+
+(define_insn "iwmmxt_clrv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (const_vector:V2SI [(const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wxor")]
+)
+
+;; Unsigned averages/sum of absolute differences
+
+(define_insn "iwmmxt_uavgrndv8qi3"
+  [(set (match_operand:V8QI                                    0 "register_operand" "=y")
+        (truncate:V8QI
+	  (lshiftrt:V8HI
+	    (plus:V8HI
+	      (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+	                 (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
+	      (const_vector:V8HI [(const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2br%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wavg2")]
+)
+
+(define_insn "iwmmxt_uavgrndv4hi3"
+  [(set (match_operand:V4HI                                    0 "register_operand" "=y")
+        (truncate:V4HI
+	  (lshiftrt:V4SI
+            (plus:V4SI
+	      (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                 (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	      (const_vector:V4SI [(const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)
+	                          (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2hr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wavg2")]
+)
+
+(define_insn "iwmmxt_uavgv8qi3"
+  [(set (match_operand:V8QI                                  0 "register_operand" "=y")
+        (truncate:V8QI
+	  (lshiftrt:V8HI
+	    (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+	               (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
+	    (const_int 1))))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2b%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wavg2")]
+)
+
+(define_insn "iwmmxt_uavgv4hi3"
+  [(set (match_operand:V4HI                                  0 "register_operand" "=y")
+        (truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	               (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	    (const_int 1))))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2h%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wavg2")]
+)
+
+;; Insert/extract/shuffle
+
+(define_insn "iwmmxt_tinsrb"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (vec_merge:V8QI
+	  (vec_duplicate:V8QI
+	    (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r")))
+	  (match_operand:V8QI              1 "register_operand"     "0")
+	  (match_operand:SI                3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   {
+     return arm_output_iwmmxt_tinsr (operands);
+   }
+   "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tinsr")]
+)
+
+(define_insn "iwmmxt_tinsrh"
+  [(set (match_operand:V4HI                0 "register_operand"    "=y")
+        (vec_merge:V4HI
+          (vec_duplicate:V4HI
+            (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r")))
+	  (match_operand:V4HI              1 "register_operand"     "0")
+	  (match_operand:SI                3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   {
+     return arm_output_iwmmxt_tinsr (operands);
+   }
+   "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tinsr")]
+)
+
+(define_insn "iwmmxt_tinsrw"
+  [(set (match_operand:V2SI   0 "register_operand"    "=y")
+        (vec_merge:V2SI
+          (vec_duplicate:V2SI
+            (match_operand:SI 2 "nonimmediate_operand" "r"))
+          (match_operand:V2SI 1 "register_operand"     "0")
+          (match_operand:SI   3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   {
+     return arm_output_iwmmxt_tinsr (operands);
+   }
+   "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tinsr")]
+)
+
+(define_insn "iwmmxt_textrmub"
+  [(set (match_operand:SI                                   0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:QI (match_operand:V8QI  1 "register_operand" "y")
+		                       (parallel
+				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_textrm")]
+)
+
+(define_insn "iwmmxt_textrmsb"
+  [(set (match_operand:SI                                   0 "register_operand" "=r")
+        (sign_extend:SI (vec_select:QI (match_operand:V8QI  1 "register_operand" "y")
+				       (parallel
+				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_textrm")]
+)
+
+(define_insn "iwmmxt_textrmuh"
+  [(set (match_operand:SI                                   0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:HI (match_operand:V4HI  1 "register_operand" "y")
+				       (parallel
+				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_textrm")]
+)
+
+(define_insn "iwmmxt_textrmsh"
+  [(set (match_operand:SI                                   0 "register_operand" "=r")
+        (sign_extend:SI (vec_select:HI (match_operand:V4HI  1 "register_operand" "y")
+				       (parallel
+				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_textrm")]
+)
+
+;; There are signed/unsigned variants of this instruction, but they are
+;; pointless.
+(define_insn "iwmmxt_textrmw"
+  [(set (match_operand:SI                           0 "register_operand" "=r")
+        (vec_select:SI (match_operand:V2SI          1 "register_operand" "y")
+		       (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_textrm")]
+)
+
+(define_insn "iwmmxt_wshufh"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:SI   2 "immediate_operand" "i")] UNSPEC_WSHUFH))]
+  "TARGET_REALLY_IWMMXT"
+  "wshufh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wshufh")]
+)
+
+;; Mask-generating comparisons
+;;
+;; Note - you cannot use patterns like these here:
+;;
+;;   (set (match:<vector>) (<comparator>:<vector> (match:<vector>) (match:<vector>)))
+;;
+;; Because GCC will assume that the truth value (1 or 0) is installed
+;; into the entire destination vector, (with the '1' going into the least
+;; significant element of the vector).  This is not how these instructions
+;; behave.
+
+(define_insn "eqv8qi3"
+  [(set (match_operand:V8QI                        0 "register_operand" "=y")
+	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
+	                       (match_operand:V8QI 2 "register_operand"  "y")]
+	                      VUNSPEC_WCMP_EQ))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpeq")]
+)
+
+(define_insn "eqv4hi3"
+  [(set (match_operand:V4HI                        0 "register_operand" "=y")
+	(unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
+		               (match_operand:V4HI 2 "register_operand"  "y")]
+	                       VUNSPEC_WCMP_EQ))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpeq")]
+)
+
+(define_insn "eqv2si3"
+  [(set (match_operand:V2SI    0 "register_operand" "=y")
+	(unspec_volatile:V2SI
+	  [(match_operand:V2SI 1 "register_operand"  "y")
+	   (match_operand:V2SI 2 "register_operand"  "y")]
+           VUNSPEC_WCMP_EQ))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpeq")]
+)
+
+(define_insn "gtuv8qi3"
+  [(set (match_operand:V8QI                        0 "register_operand" "=y")
+	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
+	                       (match_operand:V8QI 2 "register_operand"  "y")]
+	                       VUNSPEC_WCMP_GTU))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpgt")]
+)
+
+(define_insn "gtuv4hi3"
+  [(set (match_operand:V4HI                        0 "register_operand" "=y")
+        (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
+                               (match_operand:V4HI 2 "register_operand"  "y")]
+                               VUNSPEC_WCMP_GTU))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpgt")]
+)
+
+(define_insn "gtuv2si3"
+  [(set (match_operand:V2SI                        0 "register_operand" "=y")
+	(unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand"  "y")
+	                       (match_operand:V2SI 2 "register_operand"  "y")]
+	                       VUNSPEC_WCMP_GTU))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtuw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpgt")]
+)
+
+(define_insn "gtv8qi3"
+  [(set (match_operand:V8QI                        0 "register_operand" "=y")
+	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
+	                       (match_operand:V8QI 2 "register_operand"  "y")]
+	                       VUNSPEC_WCMP_GT))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpgt")]
+)
+
+(define_insn "gtv4hi3"
+  [(set (match_operand:V4HI                        0 "register_operand" "=y")
+	(unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
+	                       (match_operand:V4HI 2 "register_operand"  "y")]
+	                       VUNSPEC_WCMP_GT))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpgt")]
+)
+
+(define_insn "gtv2si3"
+  [(set (match_operand:V2SI                        0 "register_operand" "=y")
+	(unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand"  "y")
+	                       (match_operand:V2SI 2 "register_operand"  "y")]
+	                       VUNSPEC_WCMP_GT))]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wcmpgt")]
+)
+
+;; Max/min insns
+
+(define_insn "*smax<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (smax:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxs<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmax")]
+)
+
+(define_insn "*umax<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (umax:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxu<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmax")]
+)
+
+(define_insn "*smin<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (smin:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmins<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmin")]
+)
+
+(define_insn "*umin<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (umin:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminu<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmin")]
+)
+
+;; Pack/unpack insns.
+
+(define_insn "iwmmxt_wpackhss"
+  [(set (match_operand:V8QI                     0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
+	  (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wpack")]
+)
+
+(define_insn "iwmmxt_wpackwss"
+  [(set (match_operand:V4HI                     0 "register_operand" "=y")
+        (vec_concat:V4HI
+	  (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
+	  (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wpack")]
+)
+
+(define_insn "iwmmxt_wpackdss"
+  [(set (match_operand:V2SI                 0 "register_operand" "=y")
+	(vec_concat:V2SI
+	  (ss_truncate:SI (match_operand:DI 1 "register_operand" "y"))
+	  (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackdss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wpack")]
+)
+
+(define_insn "iwmmxt_wpackhus"
+  [(set (match_operand:V8QI                     0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
+	  (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wpack")]
+)
+
+(define_insn "iwmmxt_wpackwus"
+  [(set (match_operand:V4HI                     0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
+	  (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wpack")]
+)
+
+(define_insn "iwmmxt_wpackdus"
+  [(set (match_operand:V2SI                 0 "register_operand" "=y")
+	(vec_concat:V2SI
+	  (us_truncate:SI (match_operand:DI 1 "register_operand" "y"))
+	  (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackdus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wpack")]
+)
+
+(define_insn "iwmmxt_wunpckihb"
+  [(set (match_operand:V8QI                                      0 "register_operand" "=y")
+	(vec_merge:V8QI
+	  (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
+		           (parallel [(const_int 4)
+			              (const_int 0)
+			              (const_int 5)
+			              (const_int 1)
+			              (const_int 6)
+			              (const_int 2)
+			              (const_int 7)
+			              (const_int 3)]))
+          (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+			   (parallel [(const_int 0)
+			              (const_int 4)
+			              (const_int 1)
+			              (const_int 5)
+			              (const_int 2)
+			              (const_int 6)
+			              (const_int 3)
+			              (const_int 7)]))
+          (const_int 85)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckih")]
+)
+
+(define_insn "iwmmxt_wunpckihh"
+  [(set (match_operand:V4HI                                      0 "register_operand" "=y")
+	(vec_merge:V4HI
+	  (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		           (parallel [(const_int 2)
+			              (const_int 0)
+			              (const_int 3)
+			              (const_int 1)]))
+	  (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+		           (parallel [(const_int 0)
+			              (const_int 2)
+			              (const_int 1)
+			              (const_int 3)]))
+          (const_int 5)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckih")]
+)
+
+(define_insn "iwmmxt_wunpckihw"
+  [(set (match_operand:V2SI                    0 "register_operand" "=y")
+	(vec_merge:V2SI
+	  (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		           (parallel [(const_int 1)
+		                      (const_int 0)]))
+          (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+		           (parallel [(const_int 0)
+			              (const_int 1)]))
+          (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckih")]
+)
+
+(define_insn "iwmmxt_wunpckilb"
+  [(set (match_operand:V8QI                                      0 "register_operand" "=y")
+	(vec_merge:V8QI
+	  (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
+		           (parallel [(const_int 0)
+			              (const_int 4)
+			              (const_int 1)
+			              (const_int 5)
+		                      (const_int 2)
+				      (const_int 6)
+				      (const_int 3)
+				      (const_int 7)]))
+	  (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+		           (parallel [(const_int 4)
+			              (const_int 0)
+			              (const_int 5)
+			              (const_int 1)
+			              (const_int 6)
+			              (const_int 2)
+			              (const_int 7)
+			              (const_int 3)]))
+	  (const_int 85)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckil")]
+)
+
+(define_insn "iwmmxt_wunpckilh"
+  [(set (match_operand:V4HI                                      0 "register_operand" "=y")
+	(vec_merge:V4HI
+	  (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		           (parallel [(const_int 0)
+			              (const_int 2)
+			              (const_int 1)
+			              (const_int 3)]))
+	  (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+			   (parallel [(const_int 2)
+			              (const_int 0)
+			              (const_int 3)
+			              (const_int 1)]))
+	  (const_int 5)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckil")]
+)
+
+(define_insn "iwmmxt_wunpckilw"
+  [(set (match_operand:V2SI                    0 "register_operand" "=y")
+	(vec_merge:V2SI
+	  (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		           (parallel [(const_int 0)
+				      (const_int 1)]))
+	  (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+		           (parallel [(const_int 1)
+			              (const_int 0)]))
+	  (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckil")]
+)
+
+(define_insn "iwmmxt_wunpckehub"
+  [(set (match_operand:V4HI                     0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+	  (parallel [(const_int 4) (const_int 5)
+	             (const_int 6) (const_int 7)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehub%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckeh")]
+)
+
+(define_insn "iwmmxt_wunpckehuh"
+  [(set (match_operand:V2SI                     0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehuh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckeh")]
+)
+
+(define_insn "iwmmxt_wunpckehuw"
+  [(set (match_operand:DI                       0 "register_operand" "=y")
+	(vec_select:DI
+	  (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+	  (parallel [(const_int 1)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehuw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckeh")]
+)
+
+(define_insn "iwmmxt_wunpckehsb"
+  [(set (match_operand:V4HI                     0 "register_operand" "=y")
+        (vec_select:V4HI
+	  (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+	  (parallel [(const_int 4) (const_int 5)
+	             (const_int 6) (const_int 7)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckeh")]
+)
+
+(define_insn "iwmmxt_wunpckehsh"
+  [(set (match_operand:V2SI                     0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckeh")]
+)
+
+(define_insn "iwmmxt_wunpckehsw"
+  [(set (match_operand:DI                       0 "register_operand" "=y")
+	(vec_select:DI
+	  (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+	  (parallel [(const_int 1)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckeh")]
+)
+
+(define_insn "iwmmxt_wunpckelub"
+  [(set (match_operand:V4HI                     0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelub%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckel")]
+)
+
+(define_insn "iwmmxt_wunpckeluh"
+  [(set (match_operand:V2SI                     0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckeluh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckel")]
+)
+
+(define_insn "iwmmxt_wunpckeluw"
+  [(set (match_operand:DI                       0 "register_operand" "=y")
+	(vec_select:DI
+	  (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+	  (parallel [(const_int 0)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckeluw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckel")]
+)
+
+(define_insn "iwmmxt_wunpckelsb"
+  [(set (match_operand:V4HI                     0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckel")]
+)
+
+(define_insn "iwmmxt_wunpckelsh"
+  [(set (match_operand:V2SI                     0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckel")]
+)
+
+(define_insn "iwmmxt_wunpckelsw"
+  [(set (match_operand:DI                       0 "register_operand" "=y")
+        (vec_select:DI
+	  (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+	  (parallel [(const_int 0)])))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wunpckel")]
+)
+
+;; Shifts
+
+(define_insn "ror<mode>3"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
+        (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+		        (match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch  (which_alternative)
+    {
+    case 0:
+      return \"wror<MMX_char>g%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wror, wmmx_wror")]
+)
+
+(define_insn "ashr<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
+        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+			(match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch  (which_alternative)
+    {
+    case 0:
+      return \"wsra<MMX_char>g%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wsra, wmmx_wsra")]
+)
+
+(define_insn "lshr<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
+        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+			(match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch  (which_alternative)
+    {
+    case 0:
+      return \"wsrl<MMX_char>g%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
+)
+
+(define_insn "ashl<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT               0 "register_operand" "=y,y")
+        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+		      (match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch  (which_alternative)
+    {
+    case 0:
+      return \"wsll<MMX_char>g%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wsll, wmmx_wsll")]
+)
+
+(define_insn "ror<mode>3_di"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
+        (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+		        (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"wror<MMX_char>%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wror, wmmx_wror")]
+)
+
+(define_insn "ashr<mode>3_di"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
+        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+		        (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"wsra<MMX_char>%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wsra, wmmx_wsra")]
+)
+
+(define_insn "lshr<mode>3_di"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
+        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+		        (match_operand:DI    2 "register_operand" "y,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"wsrl<MMX_char>%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
+)
+
+(define_insn "ashl<mode>3_di"
+  [(set (match_operand:VSHFT               0 "register_operand" "=y,y")
+        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
+		      (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"wsll<MMX_char>%?\\t%0, %1, %2\";
+    case 1:
+      return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "arch" "*, iwmmxt2")
+   (set_attr "type" "wmmx_wsll, wmmx_wsll")]
+)
+
+(define_insn "iwmmxt_wmadds"
+  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
+	(plus:V2SI
+	  (mult:V2SI
+	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                     (parallel [(const_int 1) (const_int 3)]))
+	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
+	                     (parallel [(const_int 1) (const_int 3)])))
+	  (mult:V2SI
+	    (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
+	                     (parallel [(const_int 0) (const_int 2)]))
+	    (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
+	                     (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmadds%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmadd")]
+)
+
+(define_insn "iwmmxt_wmaddu"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+	(plus:V2SI
+	  (mult:V2SI
+	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                     (parallel [(const_int 1) (const_int 3)]))
+	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
+	                     (parallel [(const_int 1) (const_int 3)])))
+	  (mult:V2SI
+	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
+	                     (parallel [(const_int 0) (const_int 2)]))
+	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
+	                     (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaddu%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmadd")]
+)
+
+(define_insn "iwmmxt_tmia"
+  [(set (match_operand:DI                     0 "register_operand" "=y")
+	(plus:DI (match_operand:DI            1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			    (match_operand:SI 2 "register_operand" "r"))
+			  (sign_extend:DI
+			    (match_operand:SI 3 "register_operand" "r")))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmia%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmia")]
+)
+
+(define_insn "iwmmxt_tmiaph"
+  [(set (match_operand:DI                                    0 "register_operand" "=y")
+	(plus:DI (match_operand:DI                           1 "register_operand" "0")
+		 (plus:DI
+		   (mult:DI (sign_extend:DI
+			      (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+			    (sign_extend:DI
+			      (truncate:HI (match_operand:SI 3 "register_operand" "r"))))
+		   (mult:DI (sign_extend:DI
+			      (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16))))
+			    (sign_extend:DI
+			      (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiaph%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmiaph")]
+)
+
+(define_insn "iwmmxt_tmiabb"
+  [(set (match_operand:DI                                  0 "register_operand" "=y")
+	(plus:DI (match_operand:DI                         1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			    (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+			  (sign_extend:DI
+			    (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiabb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmiaxy")]
+)
+
+(define_insn "iwmmxt_tmiatb"
+  [(set (match_operand:DI                         0 "register_operand" "=y")
+	(plus:DI (match_operand:DI                1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			    (truncate:HI
+			      (ashiftrt:SI
+				(match_operand:SI 2 "register_operand" "r")
+				(const_int 16))))
+			  (sign_extend:DI
+			    (truncate:HI
+			      (match_operand:SI   3 "register_operand" "r"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiatb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmiaxy")]
+)
+
+(define_insn "iwmmxt_tmiabt"
+  [(set (match_operand:DI                         0 "register_operand" "=y")
+	(plus:DI (match_operand:DI                1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			    (truncate:HI
+			      (match_operand:SI   2 "register_operand" "r")))
+			  (sign_extend:DI
+			    (truncate:HI
+			      (ashiftrt:SI
+				(match_operand:SI 3 "register_operand" "r")
+				(const_int 16)))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiabt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmiaxy")]
+)
+
+(define_insn "iwmmxt_tmiatt"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI 1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			    (truncate:HI
+			      (ashiftrt:SI
+				(match_operand:SI 2 "register_operand" "r")
+				(const_int 16))))
+			  (sign_extend:DI
+			    (truncate:HI
+			      (ashiftrt:SI
+				(match_operand:SI 3 "register_operand" "r")
+				(const_int 16)))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiatt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmiaxy")]
+)
+
+(define_insn "iwmmxt_tmovmskb"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmovmsk")]
+)
+
+(define_insn "iwmmxt_tmovmskh"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmovmsk")]
+)
+
+(define_insn "iwmmxt_tmovmskw"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tmovmsk")]
+)
+
+(define_insn "iwmmxt_waccb"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "waccb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wacc")]
+)
+
+(define_insn "iwmmxt_wacch"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "wacch%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wacc")]
+)
+
+(define_insn "iwmmxt_waccw"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "waccw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wacc")]
+)
+
+;; use unspec here to prevent 8 * imm to be optimized by cse
+(define_insn "iwmmxt_waligni"
+  [(set (match_operand:V8QI                                0 "register_operand" "=y")
+	(unspec:V8QI [(subreg:V8QI
+		        (ashiftrt:TI
+		          (subreg:TI (vec_concat:V16QI
+				       (match_operand:V8QI 1 "register_operand" "y")
+				       (match_operand:V8QI 2 "register_operand" "y")) 0)
+		          (mult:SI
+		            (match_operand:SI              3 "immediate_operand" "i")
+		            (const_int 8))) 0)] UNSPEC_WALIGNI))]
+  "TARGET_REALLY_IWMMXT"
+  "waligni%?\\t%0, %1, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_waligni")]
+)
+
+(define_insn "iwmmxt_walignr"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y")
+	(subreg:V8QI (ashiftrt:TI
+		       (subreg:TI (vec_concat:V16QI
+				    (match_operand:V8QI 1 "register_operand" "y")
+				    (match_operand:V8QI 2 "register_operand" "y")) 0)
+		       (mult:SI
+		         (zero_extract:SI (match_operand:SI 3 "register_operand" "z") (const_int 3) (const_int 0))
+		         (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "walignr%U3%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_walignr")]
+)
+
+(define_insn "iwmmxt_walignr0"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y")
+	(subreg:V8QI (ashiftrt:TI
+		       (subreg:TI (vec_concat:V16QI
+				    (match_operand:V8QI 1 "register_operand" "y")
+				    (match_operand:V8QI 2 "register_operand" "y")) 0)
+		       (mult:SI
+		         (zero_extract:SI (reg:SI WCGR0) (const_int 3) (const_int 0))
+		         (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "walignr0%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_walignr")]
+)
+
+(define_insn "iwmmxt_walignr1"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y")
+	(subreg:V8QI (ashiftrt:TI
+		       (subreg:TI (vec_concat:V16QI
+				    (match_operand:V8QI 1 "register_operand" "y")
+				    (match_operand:V8QI 2 "register_operand" "y")) 0)
+		       (mult:SI
+		         (zero_extract:SI (reg:SI WCGR1) (const_int 3) (const_int 0))
+		         (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "walignr1%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_walignr")]
+)
+
+(define_insn "iwmmxt_walignr2"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y")
+	(subreg:V8QI (ashiftrt:TI
+		       (subreg:TI (vec_concat:V16QI
+				    (match_operand:V8QI 1 "register_operand" "y")
+				    (match_operand:V8QI 2 "register_operand" "y")) 0)
+		       (mult:SI
+		         (zero_extract:SI (reg:SI WCGR2) (const_int 3) (const_int 0))
+		         (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "walignr2%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_walignr")]
+)
+
+(define_insn "iwmmxt_walignr3"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y")
+	(subreg:V8QI (ashiftrt:TI
+		       (subreg:TI (vec_concat:V16QI
+				    (match_operand:V8QI 1 "register_operand" "y")
+				    (match_operand:V8QI 2 "register_operand" "y")) 0)
+		       (mult:SI
+		         (zero_extract:SI (reg:SI WCGR3) (const_int 3) (const_int 0))
+		         (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "walignr3%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_walignr")]
+)
+
+(define_insn "iwmmxt_wsadb"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (unspec:V2SI [
+		      (match_operand:V2SI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "register_operand" "y")
+		      (match_operand:V8QI 3 "register_operand" "y")] UNSPEC_WSAD))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsad")]
+)
+
+(define_insn "iwmmxt_wsadh"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (unspec:V2SI [
+		      (match_operand:V2SI 1 "register_operand" "0")
+		      (match_operand:V4HI 2 "register_operand" "y")
+		      (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WSAD))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadh%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsad")]
+)
+
+(define_insn "iwmmxt_wsadbz"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadbz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsad")]
+)
+
+(define_insn "iwmmxt_wsadhz"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadhz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsad")]
+)
+
+(include "iwmmxt2.md")
diff --git a/gcc-4.9/gcc/config/arm/iwmmxt2.md b/gcc-4.9/gcc/config/arm/iwmmxt2.md
new file mode 100644
index 000000000..b6e4b2476
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/iwmmxt2.md
@@ -0,0 +1,903 @@
+;; Patterns for the Intel Wireless MMX technology architecture.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Written by Marvell, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "iwmmxt_wabs<mode>3"
+  [(set (match_operand:VMMX               0 "register_operand" "=y")
+        (unspec:VMMX [(match_operand:VMMX 1 "register_operand"  "y")] UNSPEC_WABS))]
+  "TARGET_REALLY_IWMMXT"
+  "wabs<MMX_char>%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wabs")]
+)
+
+(define_insn "iwmmxt_wabsdiffb"
+  [(set (match_operand:V8QI                          0 "register_operand" "=y")
+	(truncate:V8QI
+	  (abs:V8HI
+	    (minus:V8HI
+	      (zero_extend:V8HI (match_operand:V8QI  1 "register_operand"  "y"))
+	      (zero_extend:V8HI (match_operand:V8QI  2 "register_operand"  "y"))))))]
+ "TARGET_REALLY_IWMMXT"
+ "wabsdiffb%?\\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+  (set_attr "type" "wmmx_wabsdiff")]
+)
+
+(define_insn "iwmmxt_wabsdiffh"
+  [(set (match_operand:V4HI                          0 "register_operand" "=y")
+        (truncate: V4HI
+          (abs:V4SI
+            (minus:V4SI
+              (zero_extend:V4SI (match_operand:V4HI  1 "register_operand"  "y"))
+	      (zero_extend:V4SI (match_operand:V4HI  2 "register_operand"  "y"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wabsdiffh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wabsdiff")]
+)
+
+(define_insn "iwmmxt_wabsdiffw"
+  [(set (match_operand:V2SI                          0 "register_operand" "=y")
+        (truncate: V2SI
+	  (abs:V2DI
+	    (minus:V2DI
+	      (zero_extend:V2DI (match_operand:V2SI  1 "register_operand"  "y"))
+	      (zero_extend:V2DI (match_operand:V2SI  2 "register_operand"  "y"))))))]
+ "TARGET_REALLY_IWMMXT"
+ "wabsdiffw%?\\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+  (set_attr "type" "wmmx_wabsdiff")]
+)
+
+(define_insn "iwmmxt_waddsubhx"
+  [(set (match_operand:V4HI                                        0 "register_operand" "=y")
+	(vec_merge:V4HI
+	  (ss_minus:V4HI
+	    (match_operand:V4HI                                    1 "register_operand" "y")
+	    (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
+	  (ss_plus:V4HI
+	    (match_dup 1)
+	    (vec_select:V4HI (match_dup 2)
+	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
+	  (const_int 10)))]
+  "TARGET_REALLY_IWMMXT"
+  "waddsubhx%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_waddsubhx")]
+)
+
+(define_insn "iwmmxt_wsubaddhx"
+  [(set (match_operand:V4HI                                        0 "register_operand" "=y")
+	(vec_merge:V4HI
+	  (ss_plus:V4HI
+	    (match_operand:V4HI                                    1 "register_operand" "y")
+	    (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
+	  (ss_minus:V4HI
+	    (match_dup 1)
+	    (vec_select:V4HI (match_dup 2)
+	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
+	  (const_int 10)))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubaddhx%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wsubaddhx")]
+)
+
+(define_insn "addc<mode>3"
+  [(set (match_operand:VMMX2      0 "register_operand" "=y")
+	(unspec:VMMX2
+          [(plus:VMMX2
+             (match_operand:VMMX2 1 "register_operand"  "y")
+	     (match_operand:VMMX2 2 "register_operand"  "y"))] UNSPEC_WADDC))]
+  "TARGET_REALLY_IWMMXT"
+  "wadd<MMX_char>c%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wadd")]
+)
+
+(define_insn "iwmmxt_avg4"
+[(set (match_operand:V8QI                                 0 "register_operand" "=y")
+      (truncate:V8QI
+        (vec_select:V8HI
+	  (vec_merge:V8HI
+	    (lshiftrt:V8HI
+	      (plus:V8HI
+	        (plus:V8HI
+		  (plus:V8HI
+	            (plus:V8HI
+		      (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+		      (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
+		    (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
+		                     (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
+				                (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
+		  (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
+		                   (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
+				              (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
+	        (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)
+	                            (const_int 1) (const_int 1) (const_int 1) (const_int 1)]))
+	      (const_int 2))
+	    (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
+	                        (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
+	    (const_int 254))
+	  (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
+	             (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg4%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wavg4")]
+)
+
+(define_insn "iwmmxt_avg4r"
+  [(set (match_operand:V8QI                                   0 "register_operand" "=y")
+	(truncate:V8QI
+	  (vec_select:V8HI
+	    (vec_merge:V8HI
+	      (lshiftrt:V8HI
+	        (plus:V8HI
+		  (plus:V8HI
+		    (plus:V8HI
+		      (plus:V8HI
+		        (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
+		        (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
+		      (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
+		                       (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
+				                  (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
+		    (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
+		                     (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
+				                (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
+		  (const_vector:V8HI [(const_int 2) (const_int 2) (const_int 2) (const_int 2)
+		                      (const_int 2) (const_int 2) (const_int 2) (const_int 2)]))
+	        (const_int 2))
+	      (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
+	                          (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
+	      (const_int 254))
+	    (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
+	               (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg4r%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wavg4")]
+)
+
+(define_insn "iwmmxt_wmaddsx"
+  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
+	(plus:V2SI
+	  (mult:V2SI
+	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                     (parallel [(const_int 1) (const_int 3)]))
+	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
+	                     (parallel [(const_int 0) (const_int 2)])))
+	  (mult:V2SI
+	    (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
+	                     (parallel [(const_int 0) (const_int 2)]))
+	    (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
+	                     (parallel [(const_int 1) (const_int 3)])))))]
+ "TARGET_REALLY_IWMMXT"
+  "wmaddsx%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+	(set_attr "type" "wmmx_wmadd")]
+)
+
+(define_insn "iwmmxt_wmaddux"
+  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
+	(plus:V2SI
+	  (mult:V2SI
+	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                     (parallel [(const_int 1) (const_int 3)]))
+	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
+	                     (parallel [(const_int 0) (const_int 2)])))
+	  (mult:V2SI
+	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
+	                     (parallel [(const_int 0) (const_int 2)]))
+	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
+	                     (parallel [(const_int 1) (const_int 3)])))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaddux%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmadd")]
+)
+
+(define_insn "iwmmxt_wmaddsn"
+ [(set (match_operand:V2SI                                     0 "register_operand" "=y")
+    (minus:V2SI
+      (mult:V2SI
+        (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                 (parallel [(const_int 0) (const_int 2)]))
+        (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
+	                 (parallel [(const_int 0) (const_int 2)])))
+      (mult:V2SI
+        (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
+	                 (parallel [(const_int 1) (const_int 3)]))
+        (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
+	                 (parallel [(const_int 1) (const_int 3)])))))]
+ "TARGET_REALLY_IWMMXT"
+ "wmaddsn%?\\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+  (set_attr "type" "wmmx_wmadd")]
+)
+
+(define_insn "iwmmxt_wmaddun"
+  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
+	(minus:V2SI
+	  (mult:V2SI
+	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+	                     (parallel [(const_int 0) (const_int 2)]))
+	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
+	                     (parallel [(const_int 0) (const_int 2)])))
+	  (mult:V2SI
+	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
+	                     (parallel [(const_int 1) (const_int 3)]))
+	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
+	                     (parallel [(const_int 1) (const_int 3)])))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaddun%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmadd")]
+)
+
+(define_insn "iwmmxt_wmulwsm"
+  [(set (match_operand:V2SI                         0 "register_operand" "=y")
+	(truncate:V2SI
+	  (ashiftrt:V2DI
+	    (mult:V2DI
+	      (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+	      (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
+	    (const_int 32))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulwsm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmulw")]
+)
+
+(define_insn "iwmmxt_wmulwum"
+  [(set (match_operand:V2SI                         0 "register_operand" "=y")
+	(truncate:V2SI
+          (lshiftrt:V2DI
+	    (mult:V2DI
+	      (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+	      (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
+	    (const_int 32))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulwum%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmulw")]
+)
+
+(define_insn "iwmmxt_wmulsmr"
+  [(set (match_operand:V4HI                           0 "register_operand" "=y")
+	(truncate:V4HI
+	  (ashiftrt:V4SI
+	    (plus:V4SI
+	      (mult:V4SI
+	        (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+		(sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	      (const_vector:V4SI [(const_int 32768)
+	                          (const_int 32768)
+				  (const_int 32768)]))
+	    (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulsmr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmul")]
+)
+
+(define_insn "iwmmxt_wmulumr"
+  [(set (match_operand:V4HI                           0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (mult:V4SI
+	        (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+		(zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	      (const_vector:V4SI [(const_int 32768)
+				  (const_int 32768)
+				  (const_int 32768)
+				  (const_int 32768)]))
+	  (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulumr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmul")]
+)
+
+(define_insn "iwmmxt_wmulwsmr"
+  [(set (match_operand:V2SI                           0 "register_operand" "=y")
+	(truncate:V2SI
+	  (ashiftrt:V2DI
+	    (plus:V2DI
+	      (mult:V2DI
+	        (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+		(sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
+	      (const_vector:V2DI [(const_int 2147483648)
+				  (const_int 2147483648)]))
+	    (const_int 32))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulwsmr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmul")]
+)
+
+(define_insn "iwmmxt_wmulwumr"
+  [(set (match_operand:V2SI                           0 "register_operand" "=y")
+	(truncate:V2SI
+	  (lshiftrt:V2DI
+	    (plus:V2DI
+	      (mult:V2DI
+	        (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
+		(zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
+	      (const_vector:V2DI [(const_int 2147483648)
+			          (const_int 2147483648)]))
+	    (const_int 32))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulwumr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmulw")]
+)
+
+(define_insn "iwmmxt_wmulwl"
+  [(set (match_operand:V2SI   0 "register_operand" "=y")
+        (mult:V2SI
+          (match_operand:V2SI 1 "register_operand" "y")
+	  (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulwl%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmulw")]
+)
+
+(define_insn "iwmmxt_wqmulm"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULM))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmulm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmulm")]
+)
+
+(define_insn "iwmmxt_wqmulwm"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+	(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
+		      (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWM))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmulwm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmulwm")]
+)
+
+(define_insn "iwmmxt_wqmulmr"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+	(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULMR))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmulmr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmulm")]
+)
+
+(define_insn "iwmmxt_wqmulwmr"
+  [(set (match_operand:V2SI            0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
+		      (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWMR))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmulwmr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmulwm")]
+)
+
+(define_insn "iwmmxt_waddbhusm"
+  [(set (match_operand:V8QI                          0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])
+	  (us_truncate:V4QI
+	    (ss_plus:V4HI
+	      (match_operand:V4HI                    1 "register_operand" "y")
+	      (zero_extend:V4HI
+	        (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
+	                         (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbhusm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_waddbhus")]
+)
+
+(define_insn "iwmmxt_waddbhusl"
+  [(set (match_operand:V8QI                          0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (us_truncate:V4QI
+	    (ss_plus:V4HI
+	      (match_operand:V4HI                    1 "register_operand" "y")
+	      (zero_extend:V4HI
+		(vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
+		                 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))))
+	  (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbhusl%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_waddbhus")]
+)
+
+(define_insn "iwmmxt_wqmiabb"
+  [(set (match_operand:V2SI	                             0 "register_operand" "=y")
+	(unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+		      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
+		      (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
+		      (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
+		      (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiabb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiabt"
+  [(set (match_operand:V2SI	                             0 "register_operand" "=y")
+	(unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
+		      (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
+		      (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
+		      (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiabt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiatb"
+  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
+	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
+	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
+	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiatb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiatt"
+  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
+	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
+	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
+	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiatt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiabbn"
+  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
+	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
+	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
+	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiabbn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiabtn"
+  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
+	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
+	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
+	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiabtn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiatbn"
+  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
+        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
+	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
+	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
+	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiatbn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wqmiattn"
+ [(set (match_operand:V2SI                                  0 "register_operand" "=y")
+       (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
+                     (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
+	             (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
+	             (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
+	             (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
+  "TARGET_REALLY_IWMMXT"
+  "wqmiattn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wqmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiabb"
+  [(set	(match_operand:DI	                          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
+		 (plus:DI
+		   (mult:DI
+		     (sign_extend:DI
+		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				      (parallel [(const_int 0)])))
+		     (sign_extend:DI
+		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				      (parallel [(const_int 0)]))))
+		   (mult:DI
+		     (sign_extend:DI
+		       (vec_select:HI (match_dup 2)
+			              (parallel [(const_int 2)])))
+		     (sign_extend:DI
+		       (vec_select:HI (match_dup 3)
+				      (parallel [(const_int 2)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiabb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiabt"
+  [(set	(match_operand:DI	                          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
+		 (plus:DI
+		   (mult:DI
+		     (sign_extend:DI
+		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				      (parallel [(const_int 0)])))
+		     (sign_extend:DI
+		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				      (parallel [(const_int 1)]))))
+		   (mult:DI
+		     (sign_extend:DI
+		       (vec_select:HI (match_dup 2)
+				      (parallel [(const_int 2)])))
+		     (sign_extend:DI
+		       (vec_select:HI (match_dup 3)
+				      (parallel [(const_int 3)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiabt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiatb"
+  [(set	(match_operand:DI	                          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
+		 (plus:DI
+		   (mult:DI
+		     (sign_extend:DI
+		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				      (parallel [(const_int 1)])))
+		     (sign_extend:DI
+		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				      (parallel [(const_int 0)]))))
+		   (mult:DI
+		     (sign_extend:DI
+		       (vec_select:HI (match_dup 2)
+				      (parallel [(const_int 3)])))
+		     (sign_extend:DI
+		       (vec_select:HI (match_dup 3)
+				      (parallel [(const_int 2)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiatb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiatt"
+  [(set	(match_operand:DI	                   0 "register_operand" "=y")
+        (plus:DI (match_operand:DI	           1 "register_operand" "0")
+          (plus:DI
+            (mult:DI
+              (sign_extend:DI
+                (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+	                       (parallel [(const_int 1)])))
+	      (sign_extend:DI
+	        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+	                       (parallel [(const_int 1)]))))
+            (mult:DI
+	      (sign_extend:DI
+                (vec_select:HI (match_dup 2)
+	                       (parallel [(const_int 3)])))
+              (sign_extend:DI
+                (vec_select:HI (match_dup 3)
+	                       (parallel [(const_int 3)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiatt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiabbn"
+  [(set	(match_operand:DI	                           0 "register_operand" "=y")
+	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
+		  (plus:DI
+		    (mult:DI
+		      (sign_extend:DI
+			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				       (parallel [(const_int 0)])))
+		      (sign_extend:DI
+		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				       (parallel [(const_int 0)]))))
+		    (mult:DI
+		      (sign_extend:DI
+			(vec_select:HI (match_dup 2)
+				       (parallel [(const_int 2)])))
+		      (sign_extend:DI
+		        (vec_select:HI (match_dup 3)
+				       (parallel [(const_int 2)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiabbn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiabtn"
+  [(set	(match_operand:DI	                           0 "register_operand" "=y")
+	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
+		  (plus:DI
+		    (mult:DI
+		      (sign_extend:DI
+			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				       (parallel [(const_int 0)])))
+		      (sign_extend:DI
+		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				       (parallel [(const_int 1)]))))
+		    (mult:DI
+		      (sign_extend:DI
+		        (vec_select:HI (match_dup 2)
+				       (parallel [(const_int 2)])))
+		      (sign_extend:DI
+			(vec_select:HI (match_dup 3)
+				       (parallel [(const_int 3)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiabtn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiatbn"
+  [(set (match_operand:DI	                           0 "register_operand" "=y")
+	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
+		  (plus:DI
+		    (mult:DI
+		      (sign_extend:DI
+			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				       (parallel [(const_int 1)])))
+		      (sign_extend:DI
+		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				       (parallel [(const_int 0)]))))
+		    (mult:DI
+		      (sign_extend:DI
+		        (vec_select:HI (match_dup 2)
+				       (parallel [(const_int 3)])))
+		      (sign_extend:DI
+			(vec_select:HI (match_dup 3)
+				       (parallel [(const_int 2)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiatbn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiattn"
+  [(set (match_operand:DI	                           0 "register_operand" "=y")
+	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
+		  (plus:DI
+		    (mult:DI
+		      (sign_extend:DI
+			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
+				       (parallel [(const_int 1)])))
+		      (sign_extend:DI
+			(vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
+				       (parallel [(const_int 1)]))))
+		    (mult:DI
+		      (sign_extend:DI
+			(vec_select:HI (match_dup 2)
+				       (parallel [(const_int 3)])))
+		      (sign_extend:DI
+			(vec_select:HI (match_dup 3)
+				       (parallel [(const_int 3)])))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiattn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiaxy")]
+)
+
+(define_insn "iwmmxt_wmiawbb"
+  [(set (match_operand:DI	0 "register_operand" "=y")
+	(plus:DI
+	  (match_operand:DI      1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawbb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawbt"
+  [(set (match_operand:DI	                               0 "register_operand" "=y")
+	(plus:DI
+	  (match_operand:DI                                    1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawbt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawtb"
+  [(set (match_operand:DI	                               0 "register_operand" "=y")
+	(plus:DI
+	  (match_operand:DI                                    1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawtb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawtt"
+[(set (match_operand:DI	                                     0 "register_operand" "=y")
+      (plus:DI
+	(match_operand:DI                                    1 "register_operand" "0")
+	(mult:DI
+	  (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
+	  (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawtt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawbbn"
+  [(set (match_operand:DI	                               0 "register_operand" "=y")
+	(minus:DI
+	  (match_operand:DI                                    1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawbbn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawbtn"
+  [(set (match_operand:DI	                               0 "register_operand" "=y")
+	(minus:DI
+	  (match_operand:DI                                    1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawbtn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawtbn"
+  [(set (match_operand:DI	                               0 "register_operand" "=y")
+	(minus:DI
+	  (match_operand:DI                                    1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawtbn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmiawttn"
+  [(set (match_operand:DI	                               0 "register_operand" "=y")
+	(minus:DI
+	  (match_operand:DI                                    1 "register_operand" "0")
+	  (mult:DI
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
+	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmiawttn%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmiawxy")]
+)
+
+(define_insn "iwmmxt_wmerge"
+  [(set (match_operand:DI         0 "register_operand" "=y")
+	(ior:DI
+	  (ashift:DI
+	    (match_operand:DI     2 "register_operand" "y")
+	    (minus:SI
+	      (const_int 64)
+	      (mult:SI
+	        (match_operand:SI 3 "immediate_operand" "i")
+		(const_int 8))))
+	  (lshiftrt:DI
+	    (ashift:DI
+	      (match_operand:DI   1 "register_operand" "y")
+	      (mult:SI
+	        (match_dup 3)
+		(const_int 8)))
+	    (mult:SI
+	      (match_dup 3)
+	      (const_int 8)))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmerge%?\\t%0, %1, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_wmerge")]
+)
+
+(define_insn "iwmmxt_tandc<mode>3"
+  [(set (reg:CC CC_REGNUM)
+	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TANDC) 0))
+   (unspec:CC [(reg:SI 15)] UNSPEC_TANDC)]
+  "TARGET_REALLY_IWMMXT"
+  "tandc<MMX_char>%?\\t r15"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_tandc")]
+)
+
+(define_insn "iwmmxt_torc<mode>3"
+  [(set (reg:CC CC_REGNUM)
+	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORC) 0))
+   (unspec:CC [(reg:SI 15)] UNSPEC_TORC)]
+  "TARGET_REALLY_IWMMXT"
+  "torc<MMX_char>%?\\t r15"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_torc")]
+)
+
+(define_insn "iwmmxt_torvsc<mode>3"
+  [(set (reg:CC CC_REGNUM)
+	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORVSC) 0))
+   (unspec:CC [(reg:SI 15)] UNSPEC_TORVSC)]
+  "TARGET_REALLY_IWMMXT"
+  "torvsc<MMX_char>%?\\t r15"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_torvsc")]
+)
+
+(define_insn "iwmmxt_textrc<mode>3"
+  [(set (reg:CC CC_REGNUM)
+	(subreg:CC (unspec:VMMX [(const_int 0)
+		                 (match_operand:SI 0 "immediate_operand" "i")] UNSPEC_TEXTRC) 0))
+   (unspec:CC [(reg:SI 15)] UNSPEC_TEXTRC)]
+  "TARGET_REALLY_IWMMXT"
+  "textrc<MMX_char>%?\\t r15, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "wmmx_textrc")]
+)
diff --git a/gcc-4.9/gcc/config/arm/ldmstm.md b/gcc-4.9/gcc/config/arm/ldmstm.md
new file mode 100644
index 000000000..1a2429071
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/ldmstm.md
@@ -0,0 +1,1225 @@
+/* ARM ldm/stm instruction patterns.  This file was automatically generated
+   using arm-ldmstm.ml.  Please do not edit manually.
+
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+(define_insn "*ldm4_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (match_operand:SI 5 "s_register_operand" "rk")))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_ldm4_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "low_register_operand" "")
+          (mem:SI (match_operand:SI 5 "s_register_operand" "l")))
+     (set (match_operand:SI 2 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")])
+
+(define_insn "*ldm4_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 5)))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_ldm4_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&l")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (match_operand:SI 1 "low_register_operand" "")
+          (mem:SI (match_dup 5)))
+     (set (match_operand:SI 2 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
+  "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")])
+
+(define_insn "*stm4_ia"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 5 "s_register_operand" "rk"))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm4_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_stm4_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&l")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 1 "low_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 2 "low_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 3 "low_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 4 "low_register_operand" ""))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
+  "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")])
+
+(define_insn "*ldm4_ib"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 16))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ib%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_ib_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 16))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "ldm%(ib%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ib"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int 4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(ib%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ib_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "stm%(ib%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_da"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 5)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(da%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_da_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 5)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "ldm%(da%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_da"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -12)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(da%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_da_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "stm%(da%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_db"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+                  (const_int -16))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -12))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(db%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*ldm4_db_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -16))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -12))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "ldm%(db%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm4_db"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -16)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(db%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm4_db_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -16)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "stm%(db%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "memory_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 6 "memory_operand" ""))
+   (set (match_operand:SI 3 "s_register_operand" "")
+        (match_operand:SI 7 "memory_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 4, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "memory_operand" ""))
+   (parallel
+    [(set (match_operand:SI 1 "s_register_operand" "")
+          (match_operand:SI 5 "memory_operand" ""))
+     (set (match_operand:SI 2 "s_register_operand" "")
+          (match_operand:SI 6 "memory_operand" ""))
+     (set (match_operand:SI 3 "s_register_operand" "")
+          (match_operand:SI 7 "memory_operand" ""))])]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 4, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 9 "const_int_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 10 "const_int_operand" ""))
+   (set (match_operand:SI 6 "memory_operand" "")
+        (match_dup 2))
+   (set (match_operand:SI 3 "s_register_operand" "")
+        (match_operand:SI 11 "const_int_operand" ""))
+   (set (match_operand:SI 7 "memory_operand" "")
+        (match_dup 3))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 4))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 9 "const_int_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 10 "const_int_operand" ""))
+   (set (match_operand:SI 3 "s_register_operand" "")
+        (match_operand:SI 11 "const_int_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 6 "memory_operand" "")
+        (match_dup 2))
+   (set (match_operand:SI 7 "memory_operand" "")
+        (match_dup 3))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 4))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 4 "memory_operand" "")
+        (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_operand:SI 1 "s_register_operand" ""))
+   (set (match_operand:SI 6 "memory_operand" "")
+        (match_operand:SI 2 "s_register_operand" ""))
+   (set (match_operand:SI 7 "memory_operand" "")
+        (match_operand:SI 3 "s_register_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_stm_seq (operands, 4))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "*ldm3_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (match_operand:SI 4 "s_register_operand" "rk")))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_ldm3_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "low_register_operand" "")
+          (mem:SI (match_operand:SI 4 "s_register_operand" "l")))
+     (set (match_operand:SI 2 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")])
+
+(define_insn "*ldm3_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 4)))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_ldm3_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&l")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (match_operand:SI 1 "low_register_operand" "")
+          (mem:SI (match_dup 4)))
+     (set (match_operand:SI 2 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")])
+
+(define_insn "*stm3_ia"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 4 "s_register_operand" "rk"))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm3_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_stm3_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&l")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 1 "low_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 2 "low_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 3 "low_register_operand" ""))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+  "stm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")])
+
+(define_insn "*ldm3_ib"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 12))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ib%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_ib_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 12))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ib%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ib"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int 4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(ib%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ib_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(ib%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_da"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 4)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(da%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_da_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 4)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(da%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_da"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -8)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(da%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_da_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(da%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_db"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(db%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*ldm3_db_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(db%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm3_db"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -12)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(db%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm3_db_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -12)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(db%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 4 "memory_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 5 "memory_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 3, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+    [(set (match_operand:SI 1 "s_register_operand" "")
+          (match_operand:SI 4 "memory_operand" ""))
+     (set (match_operand:SI 2 "s_register_operand" "")
+          (match_operand:SI 5 "memory_operand" ""))])]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 3, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 6 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 7 "const_int_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 2))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 3))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 6 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 7 "const_int_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 2))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 3))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 3 "memory_operand" "")
+        (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_operand:SI 1 "s_register_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_operand:SI 2 "s_register_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_stm_seq (operands, 3))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "*ldm2_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (match_operand:SI 3 "s_register_operand" "rk")))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "ldm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_ldm2_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "low_register_operand" "")
+          (mem:SI (match_operand:SI 3 "s_register_operand" "l")))
+     (set (match_operand:SI 2 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2"
+  "ldm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")])
+
+(define_insn "*ldm2_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 3)))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_ldm2_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&l")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (match_operand:SI 1 "low_register_operand" "")
+          (mem:SI (match_dup 3)))
+     (set (match_operand:SI 2 "low_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")])
+
+(define_insn "*stm2_ia"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 3 "s_register_operand" "rk"))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "stm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm2_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*thumb_stm2_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&l")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 1 "low_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 2 "low_register_operand" ""))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+  "stm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")])
+
+(define_insn "*ldm2_ib"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 8))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "ldm%(ib%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_ib_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 8))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ib%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ib"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int 4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "stm%(ib%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ib_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(ib%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_da"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+                  (const_int -4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 3)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "ldm%(da%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_da_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -4))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (match_dup 3)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(da%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_da"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "stm%(da%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_da_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(da%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_db"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "ldm%(db%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*ldm2_db_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(db%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm2_db"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -8)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "stm%(db%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "*stm2_db_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -8)))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(db%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 2, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 2))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 2))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 2 "memory_operand" "")
+        (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_operand:SI 1 "s_register_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_stm_seq (operands, 2))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+     [(set (match_operand:SI 4 "s_register_operand" "")
+           (match_operator:SI 5 "commutative_binary_operator"
+            [(match_operand:SI 6 "s_register_operand" "")
+             (match_operand:SI 7 "s_register_operand" "")]))
+      (clobber (reg:CC CC_REGNUM))])]
+  "((((REGNO (operands[6]) == REGNO (operands[0]))
+         && (REGNO (operands[7]) == REGNO (operands[1])))
+      || ((REGNO (operands[7]) == REGNO (operands[0]))
+         && (REGNO (operands[6]) == REGNO (operands[1]))))
+    && (peep2_regno_dead_p (3, REGNO (operands[0]))
+      || (REGNO (operands[0]) == REGNO (operands[4])))
+    && (peep2_regno_dead_p (3, REGNO (operands[1]))
+      || (REGNO (operands[1]) == REGNO (operands[4]))))"
+  [(parallel
+    [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  if (!gen_ldm_seq (operands, 2, true))
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "s_register_operand" "")
+        (match_operator:SI 5 "commutative_binary_operator"
+         [(match_operand:SI 6 "s_register_operand" "")
+          (match_operand:SI 7 "s_register_operand" "")]))]
+  "((((REGNO (operands[6]) == REGNO (operands[0]))
+         && (REGNO (operands[7]) == REGNO (operands[1])))
+      || ((REGNO (operands[7]) == REGNO (operands[0]))
+         && (REGNO (operands[6]) == REGNO (operands[1]))))
+    && (peep2_regno_dead_p (3, REGNO (operands[0]))
+      || (REGNO (operands[0]) == REGNO (operands[4])))
+    && (peep2_regno_dead_p (3, REGNO (operands[1]))
+      || (REGNO (operands[1]) == REGNO (operands[4]))))"
+  [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+{
+  if (!gen_ldm_seq (operands, 2, true))
+    FAIL;
+})
+
diff --git a/gcc-4.9/gcc/config/arm/ldrdstrd.md b/gcc-4.9/gcc/config/arm/ldrdstrd.md
new file mode 100644
index 000000000..064033aaa
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/ldrdstrd.md
@@ -0,0 +1,260 @@
+;; ARM ldrd/strd peephole optimizations.
+;;
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;
+;; Written by Greta Yorsh <greta.yorsh@arm.com>
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following peephole optimizations identify consecutive memory
+;; accesses, and try to rearrange the operands to enable generation of
+;; ldrd/strd.
+
+(define_peephole2 ; ldrd
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_general_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "TARGET_LDRD
+     && current_tune->prefer_ldrd_strd
+     && !optimize_function_for_size_p (cfun)"
+  [(const_int 0)]
+{
+  if (!gen_operands_ldrd_strd (operands, true, false, false))
+    FAIL;
+  else if (TARGET_ARM)
+  {
+    /* In ARM state, the destination registers of LDRD/STRD must be
+       consecutive. We emit DImode access.  */
+    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+    operands[2] = adjust_address (operands[2], DImode, 0);
+    /* Emit [(set (match_dup 0) (match_dup 2))] */
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
+    DONE;
+  }
+  else if (TARGET_THUMB2)
+  {
+    /* Emit the pattern:
+       [(parallel [(set (match_dup 0) (match_dup 2))
+                   (set (match_dup 1) (match_dup 3))])] */
+    rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
+    rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]);
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
+    DONE;
+  }
+})
+
+(define_peephole2 ; strd
+  [(set (match_operand:SI 2 "memory_operand" "")
+	(match_operand:SI 0 "arm_general_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+	(match_operand:SI 1 "arm_general_register_operand" ""))]
+  "TARGET_LDRD
+     && current_tune->prefer_ldrd_strd
+     && !optimize_function_for_size_p (cfun)"
+  [(const_int 0)]
+{
+  if (!gen_operands_ldrd_strd (operands, false, false, false))
+    FAIL;
+  else if (TARGET_ARM)
+  {
+    /* In ARM state, the destination registers of LDRD/STRD must be
+       consecutive. We emit DImode access.  */
+    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+    operands[2] = adjust_address (operands[2], DImode, 0);
+    /* Emit [(set (match_dup 2) (match_dup 0))]  */
+    emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0]));
+    DONE;
+  }
+  else if (TARGET_THUMB2)
+  {
+    /* Emit the pattern:
+       [(parallel [(set (match_dup 2) (match_dup 0))
+                   (set (match_dup 3) (match_dup 1))])]  */
+    rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
+    rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
+    DONE;
+  }
+})
+
+;; The following peepholes reorder registers to enable LDRD/STRD.
+(define_peephole2 ; strd of constants
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "arm_general_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD
+  && current_tune->prefer_ldrd_strd
+  && !optimize_function_for_size_p (cfun)"
+  [(const_int 0)]
+{
+  if (!gen_operands_ldrd_strd (operands, false, true, false))
+    FAIL;
+  else if (TARGET_ARM)
+  {
+   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
+   operands[2] = adjust_address (operands[2], DImode, 0);
+   /* Emit the pattern:
+      [(set (match_dup 0) (match_dup 4))
+      (set (match_dup 1) (match_dup 5))
+      (set (match_dup 2) tmp)]  */
+   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
+   emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
+   emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
+   DONE;
+  }
+  else if (TARGET_THUMB2)
+  {
+    /* Emit the pattern:
+       [(set (match_dup 0) (match_dup 4))
+        (set (match_dup 1) (match_dup 5))
+        (parallel [(set (match_dup 2) (match_dup 0))
+                   (set (match_dup 3) (match_dup 1))])]  */
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
+    emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
+    rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
+    rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
+    DONE;
+  }
+})
+
+(define_peephole2 ; strd of constants
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "arm_general_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD
+  && current_tune->prefer_ldrd_strd
+  && !optimize_function_for_size_p (cfun)"
+   [(const_int 0)]
+{
+  if (!gen_operands_ldrd_strd (operands, false, true, false))
+     FAIL;
+  else if (TARGET_ARM)
+  {
+   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
+   operands[2] = adjust_address (operands[2], DImode, 0);
+   /* Emit the pattern
+      [(set (match_dup 0) (match_dup 4))
+       (set (match_dup 1) (match_dup 5))
+       (set (match_dup 2) tmp)]  */
+   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
+   emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
+   emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
+   DONE;
+  }
+  else if (TARGET_THUMB2)
+  {
+    /*  Emit the pattern:
+        [(set (match_dup 0) (match_dup 4))
+         (set (match_dup 1) (match_dup 5))
+         (parallel [(set (match_dup 2) (match_dup 0))
+                    (set (match_dup 3) (match_dup 1))])]  */
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
+    emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
+    rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
+    rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
+    DONE;
+  }
+})
+
+;; The following two peephole optimizations are only relevant for ARM
+;; mode where LDRD/STRD require consecutive registers.
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation.
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_general_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "arm_general_register_operand" "")
+        (match_operator:SI 5 "commutative_binary_operator"
+			   [(match_operand 6 "arm_general_register_operand" "")
+			    (match_operand 7 "arm_general_register_operand" "") ]))]
+  "TARGET_LDRD && TARGET_ARM
+   && current_tune->prefer_ldrd_strd
+   && !optimize_function_for_size_p (cfun)
+   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
+        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+  {
+    if (!gen_operands_ldrd_strd (operands, true, false, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation that sets the flags.
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_general_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+      [(set (match_operand:SI 4 "arm_general_register_operand" "")
+	    (match_operator:SI 5 "commutative_binary_operator"
+			       [(match_operand 6 "arm_general_register_operand" "")
+				(match_operand 7 "arm_general_register_operand" "") ]))
+       (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_LDRD && TARGET_ARM
+   && current_tune->prefer_ldrd_strd
+   && !optimize_function_for_size_p (cfun)
+   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
+       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel
+      [(set (match_dup 4)
+	    (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
+       (clobber (reg:CC CC_REGNUM))])]
+  {
+    if (!gen_operands_ldrd_strd (operands, true, false, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
+
+;; TODO: Handle LDRD/STRD with writeback:
+;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
+;; (b) Patterns may be followed by an update of the base address.
diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h
new file mode 100644
index 000000000..f1f3448f1
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/linux-eabi.h
@@ -0,0 +1,122 @@
+/* Configuration file for ARM GNU/Linux EABI targets.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC   
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* On EABI GNU/Linux, we want both the BPABI builtins and the
+   GNU/Linux builtins.  */
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      TARGET_BPABI_CPP_BUILTINS();		\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+      ANDROID_TARGET_OS_CPP_BUILTINS();		\
+    }						\
+  while (false)
+
+/* We default to a soft-float ABI so that binaries can run on all
+   target hardware.  If you override this to use the hard-float ABI then
+   change the setting of GLIBC_DYNAMIC_LINKER_DEFAULT as well.  */
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
+
+/* We default to the "aapcs-linux" ABI so that enums are int-sized by
+   default.  */
+#undef  ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX
+
+/* Default to armv5t so that thumb shared libraries work.
+   The ARM10TDMI core is the default for armv5t, so set
+   SUBTARGET_CPU_DEFAULT to achieve this.  */
+#undef  SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#undef  TARGET_LINKER_EMULATION
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_LINKER_EMULATION "armelfb_linux_eabi"
+#else
+#define TARGET_LINKER_EMULATION "armelf_linux_eabi"
+#endif
+
+#undef  SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION
+
+/* GNU/Linux on ARM currently supports three dynamic linkers:
+   - ld-linux.so.2 - for the legacy ABI
+   - ld-linux.so.3 - for the EABI-derived soft-float ABI
+   - ld-linux-armhf.so.3 - for the EABI-derived hard-float ABI.
+   All the dynamic linkers live in /lib.
+   We default to soft-float, but this can be overridden by changing both
+   GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI.  */
+
+#undef  GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3"
+#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3"
+#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT
+
+#define GLIBC_DYNAMIC_LINKER \
+   "%{mfloat-abi=hard:" GLIBC_DYNAMIC_LINKER_HARD_FLOAT "} \
+    %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \
+    %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}"
+
+/* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
+   use the GNU/Linux version, not the generic BPABI version.  */
+#undef  LINK_SPEC
+#define LINK_SPEC EABI_LINK_SPEC					\
+  LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC,				\
+		       LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef  ASAN_CC1_SPEC
+#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC							\
+  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC,	\
+		       GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC " "	\
+		       ANDROID_CC1_SPEC)
+
+#define CC1PLUS_SPEC \
+  LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+
+#undef  LIB_SPEC
+#define LIB_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+/* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
+   do not use -lfloat.  */
+#undef LIBGCC_SPEC
+
+/* Clear the instruction cache from `beg' to `end'.  This is
+   implemented in lib1funcs.S, so ensure an error if this definition
+   is used.  */
+#undef  CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END) not_used
+
+#define ARM_TARGET2_DWARF_FORMAT (DW_EH_PE_pcrel | DW_EH_PE_indirect)
diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h
new file mode 100644
index 000000000..5dc3328e8
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/linux-elf.h
@@ -0,0 +1,115 @@
+/* Definitions for ARM running Linux-based GNU systems using ELF
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Philip Blundell <philb@gnu.org>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* elfos.h should have already been included.  Now just override
+   any conflicting definitions and add any extras.  */
+
+/* Run-time Target Specification.  */
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT    MASK_BIG_END
+#define TARGET_ENDIAN_OPTION     "mbig-endian"
+#define TARGET_LINKER_EMULATION  "armelfb_linux"
+#else
+#define TARGET_ENDIAN_DEFAULT    0
+#define TARGET_ENDIAN_OPTION     "mlittle-endian"
+#define TARGET_LINKER_EMULATION  "armelf_linux"
+#endif
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (TARGET_ENDIAN_DEFAULT)
+
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6
+
+#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p"
+
+/* We do not have any MULTILIB_OPTIONS specified, so there are no
+   MULTILIB_DEFAULTS.  */
+#undef  MULTILIB_DEFAULTS
+
+/* Now we define the strings used to build the spec file.  */
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#define LINUX_TARGET_LINK_SPEC  "%{h*} \
+   %{static:-Bstatic} \
+   %{shared:-shared} \
+   %{symbolic:-Bsymbolic} \
+   %{!static: \
+     %{rdynamic:-export-dynamic} \
+     -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+   -X \
+   %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+   SUBTARGET_EXTRA_LINK_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+    }						\
+  while (0)
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \
+  do					   \
+    {					   \
+      assemble_name (FILE, NAME1); 	   \
+      fputs (" = ", FILE);		   \
+      assemble_name (FILE, NAME2);	   \
+      fputc ('\n', FILE);		   \
+    }					   \
+  while (0)
+
+#undef  FPUTYPE_DEFAULT
+#define FPUTYPE_DEFAULT "vfp"
+
+/* Call the function profiler with a given profile label.  */
+#undef  ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)  			\
+{									\
+  fprintf (STREAM, "\tbl\tmcount%s\n",					\
+	   (TARGET_ARM && NEED_PLT_RELOC) ? "(PLT)" : "");		\
+}
+
+/* The GNU/Linux profiler clobbers the link register.  Make sure the
+   prologue knows to save it.  */
+#define PROFILE_HOOK(X)						\
+  emit_clobber (gen_rtx_REG (SImode, LR_REGNUM))
+
+/* The GNU/Linux profiler needs a frame pointer.  */
+#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile
+
+/* Add .note.GNU-stack.  */
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK	1
diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h
new file mode 100644
index 000000000..52a739c26
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/linux-gas.h
@@ -0,0 +1,55 @@
+/* Definitions of target machine for GNU compiler.
+   ARM Linux-based GNU systems version.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Russell King  <rmk92@ecs.soton.ac.uk>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is how we tell the assembler that a symbol is weak.
+   GAS always supports weak symbols.  */
+
+/* Unsigned chars produces much better code than signed.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Use the AAPCS type for wchar_t, or the previous Linux default for
+   non-AAPCS.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long int")
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("a1") = (unsigned long) (BEG);	\
+  register unsigned long _end __asm ("a2") = (unsigned long) (END);	\
+  register unsigned long _flg __asm ("a3") = 0;				\
+  __asm __volatile ("swi 0x9f0002		@ sys_cacheflush"	\
+		    : "=r" (_beg)					\
+		    : "0" (_beg), "r" (_end), "r" (_flg));		\
+}
diff --git a/gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md b/gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md
new file mode 100644
index 000000000..9968803ca
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md
@@ -0,0 +1,189 @@
+;; Marvell WMMX2 pipeline description
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Written by Marvell, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "marvell_f_iwmmxt")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; This is a 7-stage pipelines:
+;;
+;;    MD | MI | ME1 | ME2 | ME3 | ME4 | MW
+;;
+;; There are various bypasses modelled to a greater or lesser extent.
+;;
+;; Latencies in this file correspond to the number of cycles after
+;; the issue stage that it takes for the result of the instruction to
+;; be computed, or for its side-effects to occur.
+
+(define_cpu_unit "mf_iwmmxt_MD" "marvell_f_iwmmxt")
+(define_cpu_unit "mf_iwmmxt_MI" "marvell_f_iwmmxt")
+(define_cpu_unit "mf_iwmmxt_ME1" "marvell_f_iwmmxt")
+(define_cpu_unit "mf_iwmmxt_ME2" "marvell_f_iwmmxt")
+(define_cpu_unit "mf_iwmmxt_ME3" "marvell_f_iwmmxt")
+(define_cpu_unit "mf_iwmmxt_ME4" "marvell_f_iwmmxt")
+(define_cpu_unit "mf_iwmmxt_MW" "marvell_f_iwmmxt")
+
+(define_reservation "mf_iwmmxt_ME"
+      "mf_iwmmxt_ME1,mf_iwmmxt_ME2,mf_iwmmxt_ME3,mf_iwmmxt_ME4"
+)
+
+(define_reservation "mf_iwmmxt_pipeline"
+      "mf_iwmmxt_MD, mf_iwmmxt_MI, mf_iwmmxt_ME, mf_iwmmxt_MW"
+)
+
+;; An attribute to indicate whether our reservations are applicable.
+(define_attr "marvell_f_iwmmxt" "yes,no"
+  (const (if_then_else (symbol_ref "arm_arch_iwmmxt")
+                       (const_string "yes") (const_string "no"))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; instruction classes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; An attribute appended to instructions for classification
+
+(define_attr "wmmxt_shift" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl")
+		(const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_pack" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\
+                                 wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\
+                                 wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil")
+		(const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_mult_c1" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\
+                                 wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\
+                                 wmmx_wqmulwm")
+		(const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_mult_c2" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm")
+		(const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_alu_c1" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\
+                                 wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor")
+	        (const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_alu_c2" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\
+                                 wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\
+                                 wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\
+                                 wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx")
+		(const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_alu_c3" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_wsad")
+	        (const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_transfer_c1" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\
+                                 wmmx_tmcr, wmmx_tmcrr")
+                (const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_transfer_c2" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\
+                                 wmmx_tmrc, wmmx_tmrrc")
+	        (const_string "yes") (const_string "no"))
+)
+
+(define_attr "wmmxt_transfer_c3" "yes,no"
+  (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy")
+	        (const_string "yes") (const_string "no"))
+)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Main description
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "marvell_f_iwmmxt_alu_c1" 1
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_alu_c1" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_pack" 1
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_pack" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_shift" 1
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_shift" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_transfer_c1" 1
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_transfer_c1" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_transfer_c2" 5
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_transfer_c2" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_alu_c2" 2
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_alu_c2" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_alu_c3" 3
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_alu_c3" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_transfer_c3" 4
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_transfer_c3" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_mult_c1" 4
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_mult_c1" "yes"))
+  "mf_iwmmxt_pipeline")
+
+;There is a forwarding path from ME3 stage
+(define_insn_reservation "marvell_f_iwmmxt_mult_c2" 3
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "wmmxt_mult_c2" "yes"))
+  "mf_iwmmxt_pipeline")
+
+(define_insn_reservation "marvell_f_iwmmxt_wstr" 0
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "type" "wmmx_wstr"))
+  "mf_iwmmxt_pipeline")
+
+;There is a forwarding path from MW stage
+(define_insn_reservation "marvell_f_iwmmxt_wldr" 5
+  (and (eq_attr "marvell_f_iwmmxt" "yes")
+       (eq_attr "type" "wmmx_wldr"))
+  "mf_iwmmxt_pipeline")
diff --git a/gcc-4.9/gcc/config/arm/marvell-pj4.md b/gcc-4.9/gcc/config/arm/marvell-pj4.md
new file mode 100644
index 000000000..0b9d6ebad
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/marvell-pj4.md
@@ -0,0 +1,232 @@
+;; Marvell ARM Processor Pipeline Description
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Marvell.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Pipeline description for the Marvell PJ4, aka "Flareon".
+(define_automaton "pj4")
+
+;; Issue resources
+(define_cpu_unit    "pj4_is1,pj4_is2"        "pj4")
+(define_reservation "pj4_is"             "(pj4_is1|pj4_is2)")
+(define_reservation "pj4_isb"            "(pj4_is1+pj4_is2)")
+
+;; Functional units
+(define_cpu_unit "pj4_alu1,pj4_alu2,pj4_mul,pj4_div" "pj4")
+
+;; Completion ports
+(define_cpu_unit "pj4_w1,pj4_w2"             "pj4")
+
+;; Complete/Retire control
+(define_cpu_unit    "pj4_c1,pj4_c2"          "pj4")
+(define_reservation "pj4_cp"             "(pj4_c1|pj4_c2)")
+(define_reservation "pj4_cpb"            "(pj4_c1+pj4_c2)")
+
+;; Integer arithmetic instructions
+
+(define_insn_reservation "pj4_alu_e1" 1
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "mov_imm,mov_reg,mvn_imm,mvn_reg")
+       (not (eq_attr "conds" "set")))
+                               "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_alu_e1_conds" 4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "mov_imm,mov_reg,mvn_imm,mvn_reg")
+       (eq_attr "conds" "set"))
+                               "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_alu" 1
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\
+                        logic_imm,logics_imm,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg")
+       (not (eq_attr "conds" "set")))
+                               "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_alu_conds" 4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\
+                        logic_imm,logics_imm,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg")
+       (eq_attr "conds" "set"))
+                               "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_shift" 1
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,\
+                        alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,\
+                        alus_shift_reg,logics_shift_reg,\
+                        extend,\
+                        mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")
+       (not (eq_attr "conds" "set"))
+       (eq_attr "shift" "1"))  "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_shift_conds" 4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,\
+                        alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,\
+                        alus_shift_reg,logics_shift_reg,\
+                        extend,\
+                        mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")
+       (eq_attr "conds" "set")
+       (eq_attr "shift" "1"))  "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_alu_shift" 1
+  (and (eq_attr "tune" "marvell_pj4")
+       (not (eq_attr "conds" "set"))
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,\
+                        alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,\
+                        alus_shift_reg,logics_shift_reg,\
+                        extend,\
+                        mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg"))
+                               "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)")
+
+(define_insn_reservation "pj4_alu_shift_conds" 4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "conds" "set")
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\
+                        extend,\
+                        mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg"))
+                               "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)")
+
+(define_bypass 2 "pj4_alu_shift,pj4_shift"
+                 "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp")
+
+(define_insn_reservation "pj4_ir_mul" 3
+  (and (eq_attr "tune" "marvell_pj4")
+       (ior (eq_attr "mul32" "yes")
+            (eq_attr "mul64" "yes")))
+                     "pj4_is,pj4_mul,nothing*2,pj4_cp")
+
+(define_insn_reservation "pj4_ir_div" 20
+  (and (eq_attr "tune" "marvell_pj4") 
+       (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
+
+;; Branches and calls.
+
+(define_insn_reservation "pj4_branches" 0
+  (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "branch")) "pj4_is")
+
+(define_insn_reservation "pj4_calls" 32
+  (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "call")) "pj4_is")
+
+;; Load/store instructions
+
+(define_insn_reservation "pj4_ldr"  3
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "load_byte,load1"))
+                       "pj4_is,pj4_alu1,nothing*2,pj4_cp")
+
+(define_insn_reservation "pj4_ldrd" 3
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "load2"))
+                       "pj4_is,pj4_alu1,nothing*2,pj4_cpb")
+
+(define_insn_reservation "pj4_str"  1
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "store1"))
+                       "pj4_is,pj4_alu1,nothing*2,pj4_cp")
+
+(define_insn_reservation "pj4_strd" 1
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "store2"))
+                       "pj4_is,pj4_alu1,nothing*2,pj4_cpb")
+
+(define_insn_reservation "pj4_ldm" 4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "load3,load4")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp")
+
+(define_insn_reservation "pj4_stm" 2
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "store3,store4")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp")
+
+;; Loads forward at WR-stage to ALU pipes
+(define_bypass 2 "pj4_ldr,pj4_ldrd" "pj4_alu")
+(define_bypass 2 "pj4_ldr,pj4_ldrd" "pj4_alu_shift" "arm_no_early_alu_shift_dep")
+
+(define_bypass 4 "pj4_ldr,pj4_ldrd" "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp")
+(define_bypass 5 "pj4_ldm" "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp")
+
+;; Loads to stores can back-to-back forward
+(define_bypass 1 "pj4_ldr,pj4_ldrd" "pj4_str,pj4_strd" "arm_no_early_store_addr_dep")
+
+;; PJ4 VFP floating point unit
+(define_automaton "pj4_vfp")
+
+(define_cpu_unit "vissue" "pj4_vfp")
+(define_cpu_unit "vadd"   "pj4_vfp")
+(define_cpu_unit "vmul"   "pj4_vfp")
+(define_cpu_unit "vdiv"   "pj4_vfp")
+(define_cpu_unit "vfast"  "pj4_vfp")
+
+(define_insn_reservation "pj4_vfp_add"  5
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "fadds,faddd")) "pj4_is,nothing*2,vissue,vadd,nothing*3")
+
+(define_insn_reservation "pj4_vfp_mul"  6
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "fmuls,fmuld")) "pj4_is,nothing*2,vissue,vmul,nothing*4")
+
+(define_insn_reservation "pj4_vfp_divs" 20
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "fdivs, fsqrts"))       "pj4_is,nothing*2,vissue,vdiv*18,nothing")
+
+(define_insn_reservation "pj4_vfp_divd" 34
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "fdivd, fsqrtd"))       "pj4_is,nothing*2,vissue,vdiv*32,nothing")
+
+(define_insn_reservation "pj4_vfp_mac"  9
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "fmacs,fmacd"))
+                       "pj4_is,nothing*2,vissue,vmul,nothing*3,vadd,nothing*3")
+
+(define_bypass 5 "pj4_vfp_mac" "pj4_vfp_mac" "arm_no_early_mul_dep")
+
+(define_insn_reservation "pj4_vfp_cpy"  4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,\
+                        fcmps,fcmpd,f_cvt,f_cvtf2i,f_cvti2f"))
+"pj4_is,nothing*2,vissue,vfast,nothing*2")
+
+;; Enlarge latency, and wish that more nondependent insns are
+;; scheduled immediately after VFP load.
+(define_insn_reservation "pj4_vfp_load" 4
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "f_loads,f_loadd"))    "pj4_isb,pj4_alu1,nothing,vissue,pj4_cp")
+
+(define_insn_reservation "pj4_vfp_store" 1
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "f_stores,f_stored"))  "pj4_isb,pj4_alu1,nothing,vissue,pj4_cp")
+
+(define_insn_reservation "pj4_vfp_to_core" 7
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "f_mrc,f_mrrc,f_flag")) "pj4_isb,nothing,nothing,vissue,vfast,nothing*2")
+
+(define_insn_reservation "pj4_core_to_vfp" 2
+  (and (eq_attr "tune" "marvell_pj4")
+       (eq_attr "type" "f_mcr,f_mcrr")) "pj4_isb,pj4_alu1,pj4_w1,vissue,pj4_cp")
+
diff --git a/gcc-4.9/gcc/config/arm/mmintrin.h b/gcc-4.9/gcc/config/arm/mmintrin.h
new file mode 100644
index 000000000..b906faca4
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/mmintrin.h
@@ -0,0 +1,1836 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+#ifndef __IWMMXT__
+#error mmintrin.h included without enabling WMMX/WMMX2 instructions (e.g. -march=iwmmxt or -march=iwmmxt2)
+#endif
+
+
+#if defined __cplusplus
+extern "C" {
+/* Intrinsics use C name-mangling.  */
+#endif /* __cplusplus */
+
+/* The data type intended for user use.  */
+typedef unsigned long long __m64, __int64;
+
+/* Internal data types for implementing the intrinsics.  */
+typedef int __v2si __attribute__ ((vector_size (8)));
+typedef short __v4hi __attribute__ ((vector_size (8)));
+typedef signed char __v8qi __attribute__ ((vector_size (8)));
+
+/* Provided for source compatibility with MMX.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+}
+
+/* "Convert" __m64 and __int64 into each other.  */
+static __inline __m64
+_mm_cvtsi64_m64 (__int64 __i)
+{
+  return __i;
+}
+
+static __inline __int64
+_mm_cvtm64_si64 (__m64 __i)
+{
+  return __i;
+}
+
+static __inline int
+_mm_cvtsi64_si32 (__int64 __i)
+{
+  return __i;
+}
+
+static __inline __int64
+_mm_cvtsi32_si64 (int __i)
+{
+  return (__i & 0xffffffff);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with signed saturation.  */
+static __inline __m64
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with signed saturation.  */
+static __inline __m64
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
+   the 64-bit value from M2 into the upper 32-bits of the result, all with
+   signed saturation for values that do not fit exactly into 32-bits.  */
+static __inline __m64
+_mm_packs_pi64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackdss ((long long)__m1, (long long)__m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with unsigned saturation.  */
+static __inline __m64
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Pack the two 32-bit values from M1 into the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with unsigned saturation.  */
+static __inline __m64
+_mm_packs_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
+   the 64-bit value from M2 into the upper 32-bits of the result, all with
+   unsigned saturation for values that do not fit exactly into 32-bits.  */
+static __inline __m64
+_mm_packs_pu64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackdus ((long long)__m1, (long long)__m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+   8-bit values from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+   16-bit values from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+   value from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+   8-bit values from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+   16-bit values from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+   value from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Take the four 8-bit values from the low half of M1, sign extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pi8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsb ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the low half of M1, sign extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pi16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the low half of M1, and return it sign extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackel_pi32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the high half of M1, sign extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pi8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsb ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the high half of M1, sign extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pi16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the high half of M1, and return it sign extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackeh_pi32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the low half of M1, zero extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pu8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelub ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the low half of M1, zero extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pu16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckeluh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the low half of M1, and return it zero extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackel_pu32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckeluw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the high half of M1, zero extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pu8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehub ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the high half of M1, zero extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pu16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehuh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the high half of M1, and return it zero extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackeh_pu32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehuw ((__v2si)__m1);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
+static __inline __m64
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
+static __inline __m64
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
+static __inline __m64
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddbss ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddbus ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+   saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubbss ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   signed saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
+   signed saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubbus ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+static __inline __m64
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmadds ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+static __inline __m64
+_mm_madd_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmaddu ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+static __inline __m64
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulsm ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+static __inline __m64
+_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulum ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+   the low 16 bits of the results.  */
+static __inline __m64
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulul ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT.  */
+static __inline __m64
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsllh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsllhi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT.  */
+static __inline __m64
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsllw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsllwi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT.  */
+static __inline __m64
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wslld (__m, __count);
+}
+
+static __inline __m64
+_mm_slli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wslldi (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrah ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrahi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsraw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrawi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrad (__m, __count);
+}
+
+static __inline __m64
+_mm_srai_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsradi (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrlh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrlhi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrlw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrlwi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrld (__m, __count);
+}
+
+static __inline __m64
+_mm_srli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrldi (__m, __count);
+}
+
+/* Rotate four 16-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrorh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_rori_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrorhi ((__v4hi)__m, __count);
+}
+
+/* Rotate two 32-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrorw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_rori_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrorwi ((__v2si)__m, __count);
+}
+
+/* Rotate two 64-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrord (__m, __count);
+}
+
+static __inline __m64
+_mm_rori_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrordi (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wand (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+   64-bit value in M2.  */
+static __inline __m64
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wandn (__m2, __m1);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wor (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wxor (__m1, __m2);
+}
+
+/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
+   test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtub ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
+   the test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtuh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
+   the test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqw ((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsw ((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtuw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
+   by accumulate across all elements and __A.  */
+static __inline __m64
+_mm_mac_pu16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return __builtin_arm_wmacu (__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+/* Element-wise multiplication of signed 16-bit values __B and __C, followed
+   by accumulate across all elements and __A.  */
+static __inline __m64
+_mm_mac_pi16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return __builtin_arm_wmacs (__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
+   by accumulate across all elements.  */
+static __inline __m64
+_mm_macz_pu16 (__m64 __A, __m64 __B)
+{
+  return __builtin_arm_wmacuz ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Element-wise multiplication of signed 16-bit values __B and __C, followed
+   by accumulate across all elements.  */
+static __inline __m64
+_mm_macz_pi16 (__m64 __A, __m64 __B)
+{
+  return __builtin_arm_wmacsz ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Accumulate across all unsigned 8-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu8 (__m64 __A)
+{
+  return __builtin_arm_waccb ((__v8qi)__A);
+}
+
+/* Accumulate across all unsigned 16-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu16 (__m64 __A)
+{
+  return __builtin_arm_wacch ((__v4hi)__A);
+}
+
+/* Accumulate across all unsigned 32-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu32 (__m64 __A)
+{
+  return __builtin_arm_waccw ((__v2si)__A);
+}
+
+static __inline __m64
+_mm_mia_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmia (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miaph_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiaph (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miabb_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiabb (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miabt_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiabt (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miatb_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiatb (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miatt_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiatt (__A, __B, __C);
+}
+
+/* Extract one of the elements of A and sign extend.  The selector N must
+   be immediate.  */
+#define _mm_extract_pi8(A, N) __builtin_arm_textrmsb ((__v8qi)(A), (N))
+#define _mm_extract_pi16(A, N) __builtin_arm_textrmsh ((__v4hi)(A), (N))
+#define _mm_extract_pi32(A, N) __builtin_arm_textrmsw ((__v2si)(A), (N))
+
+/* Extract one of the elements of A and zero extend.  The selector N must
+   be immediate.  */
+#define _mm_extract_pu8(A, N) __builtin_arm_textrmub ((__v8qi)(A), (N))
+#define _mm_extract_pu16(A, N) __builtin_arm_textrmuh ((__v4hi)(A), (N))
+#define _mm_extract_pu32(A, N) __builtin_arm_textrmuw ((__v2si)(A), (N))
+
+/* Inserts word D into one of the elements of A.  The selector N must be
+   immediate.  */
+#define _mm_insert_pi8(A, D, N) \
+  ((__m64) __builtin_arm_tinsrb ((__v8qi)(A), (D), (N)))
+#define _mm_insert_pi16(A, D, N) \
+  ((__m64) __builtin_arm_tinsrh ((__v4hi)(A), (D), (N)))
+#define _mm_insert_pi32(A, D, N) \
+  ((__m64) __builtin_arm_tinsrw ((__v2si)(A), (D), (N)))
+
+/* Compute the element-wise maximum of signed 8-bit values.  */
+static __inline __m64
+_mm_max_pi8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+static __inline __m64
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise maximum of signed 32-bit values.  */
+static __inline __m64
+_mm_max_pi32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+static __inline __m64
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_max_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxuh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 32-bit values.  */
+static __inline __m64
+_mm_max_pu32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxuw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+static __inline __m64
+_mm_min_pi8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+static __inline __m64
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise minimum of signed 32-bit values.  */
+static __inline __m64
+_mm_min_pi32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_min_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminuh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 32-bit values.  */
+static __inline __m64
+_mm_min_pu32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminuw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+static __inline int
+_mm_movemask_pi8 (__m64 __A)
+{
+  return __builtin_arm_tmovmskb ((__v8qi)__A);
+}
+
+/* Create an 8-bit mask of the signs of 16-bit values.  */
+static __inline int
+_mm_movemask_pi16 (__m64 __A)
+{
+  return __builtin_arm_tmovmskh ((__v4hi)__A);
+}
+
+/* Create an 8-bit mask of the signs of 32-bit values.  */
+static __inline int
+_mm_movemask_pi32 (__m64 __A)
+{
+  return __builtin_arm_tmovmskw ((__v2si)__A);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+#define _mm_shuffle_pi16(A, N) \
+  ((__m64) __builtin_arm_wshufh ((__v4hi)(A), (N)))
+
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+static __inline __m64
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2br ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+static __inline __m64
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2hr ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the averages of the unsigned 8-bit values in A and B.  */
+static __inline __m64
+_mm_avg2_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2b ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the averages of the unsigned 16-bit values in A and B.  */
+static __inline __m64
+_mm_avg2_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2h ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
+}
+
+static __inline __m64
+_mm_sada_pu8 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return (__m64) __builtin_arm_wsadb ((__v2si)__A, (__v8qi)__B, (__v8qi)__C);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 16-bit
+   values in A and B.  Return the value in the lower 32-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sad_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
+}
+
+static __inline __m64
+_mm_sada_pu16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return (__m64) __builtin_arm_wsadh ((__v2si)__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sadz_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 16-bit
+   values in A and B.  Return the value in the lower 32-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sadz_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
+}
+
+#define _mm_align_si64(__A,__B, N) \
+  (__m64) __builtin_arm_walign ((__v8qi) (__A),(__v8qi) (__B), (N))
+
+/* Creates a 64-bit zero.  */
+static __inline __m64
+_mm_setzero_si64 (void)
+{
+  return __builtin_arm_wzero ();
+}
+
+/* Set and Get arbitrary iWMMXt Control registers.
+   Note only registers 0-3 and 8-11 are currently defined,
+   the rest are reserved.  */
+
+static __inline void
+_mm_setwcx (const int __value, const int __regno)
+{
+  switch (__regno)
+    {
+    case 0:
+      __asm __volatile ("tmcr wcid, %0" :: "r"(__value));
+      break;
+    case 1:
+      __asm __volatile ("tmcr wcon, %0" :: "r"(__value));
+      break;
+    case 2:
+      __asm __volatile ("tmcr wcssf, %0" :: "r"(__value));
+      break;
+    case 3:
+      __asm __volatile ("tmcr wcasf, %0" :: "r"(__value));
+      break;
+    case 8:
+      __builtin_arm_setwcgr0 (__value);
+      break;
+    case 9:
+      __builtin_arm_setwcgr1 (__value);
+      break;
+    case 10:
+      __builtin_arm_setwcgr2 (__value);
+      break;
+    case 11:
+      __builtin_arm_setwcgr3 (__value);
+      break;
+    default:
+      break;
+    }
+}
+
+static __inline int
+_mm_getwcx (const int __regno)
+{
+  int __value;
+  switch (__regno)
+    {
+    case 0:
+      __asm __volatile ("tmrc %0, wcid" : "=r"(__value));
+      break;
+    case 1:
+      __asm __volatile ("tmrc %0, wcon" : "=r"(__value));
+      break;
+    case 2:
+      __asm __volatile ("tmrc %0, wcssf" : "=r"(__value));
+      break;
+    case 3:
+      __asm __volatile ("tmrc %0, wcasf" : "=r"(__value));
+      break;
+    case 8:
+      return __builtin_arm_getwcgr0 ();
+    case 9:
+      return __builtin_arm_getwcgr1 ();
+    case 10:
+      return __builtin_arm_getwcgr2 ();
+    case 11:
+      return __builtin_arm_getwcgr3 ();
+    default:
+      break;
+    }
+  return __value;
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant.  */
+static __inline __m64
+_mm_set_pi32 (int __i1, int __i0)
+{
+  union
+  {
+    __m64 __q;
+    struct
+    {
+      unsigned int __i0;
+      unsigned int __i1;
+    } __s;
+  } __u;
+
+  __u.__s.__i0 = __i0;
+  __u.__s.__i1 = __i1;
+
+  return __u.__q;
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant.  */
+static __inline __m64
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+  unsigned int __i1 = (unsigned short) __w3 << 16 | (unsigned short) __w2;
+  unsigned int __i0 = (unsigned short) __w1 << 16 | (unsigned short) __w0;
+
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant.  */
+static __inline __m64
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+	     char __b3, char __b2, char __b1, char __b0)
+{
+  unsigned int __i1, __i0;
+
+  __i1 = (unsigned char)__b7;
+  __i1 = __i1 << 8 | (unsigned char)__b6;
+  __i1 = __i1 << 8 | (unsigned char)__b5;
+  __i1 = __i1 << 8 | (unsigned char)__b4;
+
+  __i0 = (unsigned char)__b3;
+  __i0 = __i0 << 8 | (unsigned char)__b2;
+  __i0 = __i0 << 8 | (unsigned char)__b1;
+  __i0 = __i0 << 8 | (unsigned char)__b0;
+
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+/* Similar, but with the arguments in reverse order.  */
+static __inline __m64
+_mm_setr_pi32 (int __i0, int __i1)
+{
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+static __inline __m64
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+static __inline __m64
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+	      char __b4, char __b5, char __b6, char __b7)
+{
+  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I.  */
+static __inline __m64
+_mm_set1_pi32 (int __i)
+{
+  return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W.  */
+static __inline __m64
+_mm_set1_pi16 (short __w)
+{
+  unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
+  return _mm_set1_pi32 (__i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing B.  */
+static __inline __m64
+_mm_set1_pi8 (char __b)
+{
+  unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
+  unsigned int __i = __w << 16 | __w;
+  return _mm_set1_pi32 (__i);
+}
+
+#ifdef __IWMMXT2__
+static __inline __m64
+_mm_abs_pi8 (__m64 m1)
+{
+  return (__m64) __builtin_arm_wabsb ((__v8qi)m1);
+}
+
+static __inline __m64
+_mm_abs_pi16 (__m64 m1)
+{
+  return (__m64) __builtin_arm_wabsh ((__v4hi)m1);
+
+}
+
+static __inline __m64
+_mm_abs_pi32 (__m64 m1)
+{
+  return (__m64) __builtin_arm_wabsw ((__v2si)m1);
+
+}
+
+static __inline __m64
+_mm_addsubhx_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_waddsubhx ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_absdiff_pu8 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wabsdiffb ((__v8qi)a, (__v8qi)b);
+}
+
+static __inline __m64
+_mm_absdiff_pu16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wabsdiffh ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_absdiff_pu32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wabsdiffw ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_addc_pu16 (__m64 a, __m64 b)
+{
+  __m64 result;
+  __asm__ __volatile__ ("waddhc	%0, %1, %2" : "=y" (result) : "y" (a),  "y" (b));
+  return result;
+}
+
+static __inline __m64
+_mm_addc_pu32 (__m64 a, __m64 b)
+{
+  __m64 result;
+  __asm__ __volatile__ ("waddwc	%0, %1, %2" : "=y" (result) : "y" (a),  "y" (b));
+  return result;
+}
+
+static __inline __m64
+_mm_avg4_pu8 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wavg4 ((__v8qi)a, (__v8qi)b);
+}
+
+static __inline __m64
+_mm_avg4r_pu8 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wavg4r ((__v8qi)a, (__v8qi)b);
+}
+
+static __inline __m64
+_mm_maddx_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmaddsx ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_maddx_pu16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmaddux ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_msub_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmaddsn ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_msub_pu16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmaddun ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_mulhi_pi32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulwsm ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_mulhi_pu32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulwum ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_mulhir_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulsmr ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_mulhir_pi32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulwsmr ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_mulhir_pu16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulumr ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_mulhir_pu32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulwumr ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_mullo_pi32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wmulwl ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_qmulm_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wqmulm ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_qmulm_pi32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wqmulwm ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_qmulmr_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wqmulmr ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_qmulmr_pi32 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wqmulwmr ((__v2si)a, (__v2si)b);
+}
+
+static __inline __m64
+_mm_subaddhx_pi16 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_wsubaddhx ((__v4hi)a, (__v4hi)b);
+}
+
+static __inline __m64
+_mm_addbhusl_pu8 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_waddbhusl ((__v4hi)a, (__v8qi)b);
+}
+
+static __inline __m64
+_mm_addbhusm_pu8 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_waddbhusm ((__v4hi)a, (__v8qi)b);
+}
+
+#define _mm_qmiabb_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiabb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiabbn_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiabbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiabt_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiabt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiabtn_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc=acc;\
+   __m64 _m1=m1;\
+   __m64 _m2=m2;\
+   _acc = (__m64) __builtin_arm_wqmiabtn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiatb_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiatb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiatbn_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiatbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiatt_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiatt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_qmiattn_pi32(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wqmiattn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiabb_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiabb (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiabbn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiabbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiabt_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiabt (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiabtn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiabtn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiatb_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiatb (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiatbn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiatbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiatt_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiatt (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiattn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiattn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawbb_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawbb (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawbbn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawbbn (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawbt_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawbt (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawbtn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawbtn (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawtb_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawtb (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawtbn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawtbn (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawtt_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawtt (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+#define _mm_wmiawttn_si64(acc, m1, m2) \
+  ({\
+   __m64 _acc = acc;\
+   __m64 _m1 = m1;\
+   __m64 _m2 = m2;\
+   _acc = (__m64) __builtin_arm_wmiawttn (_acc, (__v2si)_m1, (__v2si)_m2);\
+   _acc;\
+   })
+
+/* The third arguments should be an immediate.  */
+#define _mm_merge_si64(a, b, n) \
+  ({\
+   __m64 result;\
+   result = (__m64) __builtin_arm_wmerge ((__m64) (a), (__m64) (b), (n));\
+   result;\
+   })
+#endif  /* __IWMMXT2__ */
+
+static __inline __m64
+_mm_alignr0_si64 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_walignr0 ((__v8qi) a, (__v8qi) b);
+}
+
+static __inline __m64
+_mm_alignr1_si64 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_walignr1 ((__v8qi) a, (__v8qi) b);
+}
+
+static __inline __m64
+_mm_alignr2_si64 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_walignr2 ((__v8qi) a, (__v8qi) b);
+}
+
+static __inline __m64
+_mm_alignr3_si64 (__m64 a, __m64 b)
+{
+  return (__m64) __builtin_arm_walignr3 ((__v8qi) a, (__v8qi) b);
+}
+
+static __inline void
+_mm_tandcb ()
+{
+  __asm __volatile ("tandcb r15");
+}
+
+static __inline void
+_mm_tandch ()
+{
+  __asm __volatile ("tandch r15");
+}
+
+static __inline void
+_mm_tandcw ()
+{
+  __asm __volatile ("tandcw r15");
+}
+
+#define _mm_textrcb(n) \
+  ({\
+   __asm__ __volatile__ (\
+     "textrcb r15, %0" : : "i" (n));\
+   })
+
+#define _mm_textrch(n) \
+  ({\
+   __asm__ __volatile__ (\
+     "textrch r15, %0" : : "i" (n));\
+   })
+
+#define _mm_textrcw(n) \
+  ({\
+   __asm__ __volatile__ (\
+     "textrcw r15, %0" : : "i" (n));\
+   })
+
+static __inline void
+_mm_torcb ()
+{
+  __asm __volatile ("torcb r15");
+}
+
+static __inline void
+_mm_torch ()
+{
+  __asm __volatile ("torch r15");
+}
+
+static __inline void
+_mm_torcw ()
+{
+  __asm __volatile ("torcw r15");
+}
+
+#ifdef __IWMMXT2__
+static __inline void
+_mm_torvscb ()
+{
+  __asm __volatile ("torvscb r15");
+}
+
+static __inline void
+_mm_torvsch ()
+{
+  __asm __volatile ("torvsch r15");
+}
+
+static __inline void
+_mm_torvscw ()
+{
+  __asm __volatile ("torvscw r15");
+}
+#endif /* __IWMMXT2__ */
+
+static __inline __m64
+_mm_tbcst_pi8 (int value)
+{
+  return (__m64) __builtin_arm_tbcstb ((signed char) value);
+}
+
+static __inline __m64
+_mm_tbcst_pi16 (int value)
+{
+  return (__m64) __builtin_arm_tbcsth ((short) value);
+}
+
+static __inline __m64
+_mm_tbcst_pi32 (int value)
+{
+  return (__m64) __builtin_arm_tbcstw (value);
+}
+
+#define _m_empty _mm_empty
+#define _m_packsswb _mm_packs_pi16
+#define _m_packssdw _mm_packs_pi32
+#define _m_packuswb _mm_packs_pu16
+#define _m_packusdw _mm_packs_pu32
+#define _m_packssqd _mm_packs_pi64
+#define _m_packusqd _mm_packs_pu64
+#define _mm_packs_si64 _mm_packs_pi64
+#define _mm_packs_su64 _mm_packs_pu64
+#define _m_punpckhbw _mm_unpackhi_pi8
+#define _m_punpckhwd _mm_unpackhi_pi16
+#define _m_punpckhdq _mm_unpackhi_pi32
+#define _m_punpcklbw _mm_unpacklo_pi8
+#define _m_punpcklwd _mm_unpacklo_pi16
+#define _m_punpckldq _mm_unpacklo_pi32
+#define _m_punpckehsbw _mm_unpackeh_pi8
+#define _m_punpckehswd _mm_unpackeh_pi16
+#define _m_punpckehsdq _mm_unpackeh_pi32
+#define _m_punpckehubw _mm_unpackeh_pu8
+#define _m_punpckehuwd _mm_unpackeh_pu16
+#define _m_punpckehudq _mm_unpackeh_pu32
+#define _m_punpckelsbw _mm_unpackel_pi8
+#define _m_punpckelswd _mm_unpackel_pi16
+#define _m_punpckelsdq _mm_unpackel_pi32
+#define _m_punpckelubw _mm_unpackel_pu8
+#define _m_punpckeluwd _mm_unpackel_pu16
+#define _m_punpckeludq _mm_unpackel_pu32
+#define _m_paddb _mm_add_pi8
+#define _m_paddw _mm_add_pi16
+#define _m_paddd _mm_add_pi32
+#define _m_paddsb _mm_adds_pi8
+#define _m_paddsw _mm_adds_pi16
+#define _m_paddsd _mm_adds_pi32
+#define _m_paddusb _mm_adds_pu8
+#define _m_paddusw _mm_adds_pu16
+#define _m_paddusd _mm_adds_pu32
+#define _m_psubb _mm_sub_pi8
+#define _m_psubw _mm_sub_pi16
+#define _m_psubd _mm_sub_pi32
+#define _m_psubsb _mm_subs_pi8
+#define _m_psubsw _mm_subs_pi16
+#define _m_psubuw _mm_subs_pi32
+#define _m_psubusb _mm_subs_pu8
+#define _m_psubusw _mm_subs_pu16
+#define _m_psubusd _mm_subs_pu32
+#define _m_pmaddwd _mm_madd_pi16
+#define _m_pmadduwd _mm_madd_pu16
+#define _m_pmulhw _mm_mulhi_pi16
+#define _m_pmulhuw _mm_mulhi_pu16
+#define _m_pmullw _mm_mullo_pi16
+#define _m_pmacsw _mm_mac_pi16
+#define _m_pmacuw _mm_mac_pu16
+#define _m_pmacszw _mm_macz_pi16
+#define _m_pmacuzw _mm_macz_pu16
+#define _m_paccb _mm_acc_pu8
+#define _m_paccw _mm_acc_pu16
+#define _m_paccd _mm_acc_pu32
+#define _m_pmia _mm_mia_si64
+#define _m_pmiaph _mm_miaph_si64
+#define _m_pmiabb _mm_miabb_si64
+#define _m_pmiabt _mm_miabt_si64
+#define _m_pmiatb _mm_miatb_si64
+#define _m_pmiatt _mm_miatt_si64
+#define _m_psllw _mm_sll_pi16
+#define _m_psllwi _mm_slli_pi16
+#define _m_pslld _mm_sll_pi32
+#define _m_pslldi _mm_slli_pi32
+#define _m_psllq _mm_sll_si64
+#define _m_psllqi _mm_slli_si64
+#define _m_psraw _mm_sra_pi16
+#define _m_psrawi _mm_srai_pi16
+#define _m_psrad _mm_sra_pi32
+#define _m_psradi _mm_srai_pi32
+#define _m_psraq _mm_sra_si64
+#define _m_psraqi _mm_srai_si64
+#define _m_psrlw _mm_srl_pi16
+#define _m_psrlwi _mm_srli_pi16
+#define _m_psrld _mm_srl_pi32
+#define _m_psrldi _mm_srli_pi32
+#define _m_psrlq _mm_srl_si64
+#define _m_psrlqi _mm_srli_si64
+#define _m_prorw _mm_ror_pi16
+#define _m_prorwi _mm_rori_pi16
+#define _m_prord _mm_ror_pi32
+#define _m_prordi _mm_rori_pi32
+#define _m_prorq _mm_ror_si64
+#define _m_prorqi _mm_rori_si64
+#define _m_pand _mm_and_si64
+#define _m_pandn _mm_andnot_si64
+#define _m_por _mm_or_si64
+#define _m_pxor _mm_xor_si64
+#define _m_pcmpeqb _mm_cmpeq_pi8
+#define _m_pcmpeqw _mm_cmpeq_pi16
+#define _m_pcmpeqd _mm_cmpeq_pi32
+#define _m_pcmpgtb _mm_cmpgt_pi8
+#define _m_pcmpgtub _mm_cmpgt_pu8
+#define _m_pcmpgtw _mm_cmpgt_pi16
+#define _m_pcmpgtuw _mm_cmpgt_pu16
+#define _m_pcmpgtd _mm_cmpgt_pi32
+#define _m_pcmpgtud _mm_cmpgt_pu32
+#define _m_pextrb _mm_extract_pi8
+#define _m_pextrw _mm_extract_pi16
+#define _m_pextrd _mm_extract_pi32
+#define _m_pextrub _mm_extract_pu8
+#define _m_pextruw _mm_extract_pu16
+#define _m_pextrud _mm_extract_pu32
+#define _m_pinsrb _mm_insert_pi8
+#define _m_pinsrw _mm_insert_pi16
+#define _m_pinsrd _mm_insert_pi32
+#define _m_pmaxsb _mm_max_pi8
+#define _m_pmaxsw _mm_max_pi16
+#define _m_pmaxsd _mm_max_pi32
+#define _m_pmaxub _mm_max_pu8
+#define _m_pmaxuw _mm_max_pu16
+#define _m_pmaxud _mm_max_pu32
+#define _m_pminsb _mm_min_pi8
+#define _m_pminsw _mm_min_pi16
+#define _m_pminsd _mm_min_pi32
+#define _m_pminub _mm_min_pu8
+#define _m_pminuw _mm_min_pu16
+#define _m_pminud _mm_min_pu32
+#define _m_pmovmskb _mm_movemask_pi8
+#define _m_pmovmskw _mm_movemask_pi16
+#define _m_pmovmskd _mm_movemask_pi32
+#define _m_pshufw _mm_shuffle_pi16
+#define _m_pavgb _mm_avg_pu8
+#define _m_pavgw _mm_avg_pu16
+#define _m_pavg2b _mm_avg2_pu8
+#define _m_pavg2w _mm_avg2_pu16
+#define _m_psadbw _mm_sad_pu8
+#define _m_psadwd _mm_sad_pu16
+#define _m_psadzbw _mm_sadz_pu8
+#define _m_psadzwd _mm_sadz_pu16
+#define _m_paligniq _mm_align_si64
+#define _m_cvt_si2pi _mm_cvtsi64_m64
+#define _m_cvt_pi2si _mm_cvtm64_si64
+#define _m_from_int _mm_cvtsi32_si64
+#define _m_to_int _mm_cvtsi64_si32
+
+#if defined __cplusplus
+}; /* End "C" */
+#endif /* __cplusplus */
+
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml
new file mode 100644
index 000000000..5788a533e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/neon-docgen.ml
@@ -0,0 +1,424 @@
+(* ARM NEON documentation generator.
+
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Compile with:
+     ocamlc -c neon.ml
+     ocamlc -o neon-docgen neon.cmo neon-docgen.ml
+
+   Run with:
+     /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
+*)
+
+open Neon
+
+(* The combined "ops" and "reinterp" table.  *)
+let ops_reinterp = reinterp @ ops
+
+(* Helper functions for extracting things from the "ops" table.  *)
+let single_opcode desired_opcode () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        if opcode = desired_opcode then row :: got_so_far
+                                                   else got_so_far
+                 ) [] ops_reinterp
+
+let multiple_opcodes desired_opcodes () =
+  List.fold_left (fun got_so_far ->
+                  fun desired_opcode ->
+                    (single_opcode desired_opcode ()) @ got_so_far)
+                 [] desired_opcodes
+
+let ldx_opcode number () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
+                            row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+let stx_opcode number () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vstx n | Vstx_lane n when n = number ->
+                            row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+let tbl_opcode () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vtbl _ -> row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+let tbx_opcode () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vtbx _ -> row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+(* The groups of intrinsics.  *)
+let intrinsic_groups =
+  [ "Addition", single_opcode Vadd;
+    "Multiplication", single_opcode Vmul;
+    "Multiply-accumulate", single_opcode Vmla;
+    "Multiply-subtract", single_opcode Vmls;
+    "Fused-multiply-accumulate", single_opcode Vfma;
+    "Fused-multiply-subtract", single_opcode Vfms;
+    "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
+    "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
+    "Round to integral (towards +Inf)", single_opcode Vrintp;
+    "Round to integral (towards -Inf)", single_opcode Vrintm;
+    "Round to integral (towards 0)", single_opcode Vrintz;
+    "Subtraction", single_opcode Vsub;
+    "Comparison (equal-to)", single_opcode Vceq;
+    "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
+    "Comparison (less-than-or-equal-to)", single_opcode Vcle;
+    "Comparison (greater-than)", single_opcode Vcgt;
+    "Comparison (less-than)", single_opcode Vclt;
+    "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
+    "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
+    "Comparison (absolute greater-than)", single_opcode Vcagt;
+    "Comparison (absolute less-than)", single_opcode Vcalt;
+    "Test bits", single_opcode Vtst;
+    "Absolute difference", single_opcode Vabd;
+    "Absolute difference and accumulate", single_opcode Vaba;
+    "Maximum", single_opcode Vmax;
+    "Minimum", single_opcode Vmin;
+    "Pairwise add", single_opcode Vpadd;
+    "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
+    "Folding maximum", single_opcode Vpmax;
+    "Folding minimum", single_opcode Vpmin;
+    "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
+    "Vector shift left", single_opcode Vshl;
+    "Vector shift left by constant", single_opcode Vshl_n;
+    "Vector shift right by constant", single_opcode Vshr_n;
+    "Vector shift right by constant and accumulate", single_opcode Vsra_n;
+    "Vector shift right and insert", single_opcode Vsri;
+    "Vector shift left and insert", single_opcode Vsli;
+    "Absolute value", single_opcode Vabs;
+    "Negation", single_opcode Vneg;
+    "Bitwise not", single_opcode Vmvn;
+    "Count leading sign bits", single_opcode Vcls;
+    "Count leading zeros", single_opcode Vclz;
+    "Count number of set bits", single_opcode Vcnt;
+    "Reciprocal estimate", single_opcode Vrecpe;
+    "Reciprocal square-root estimate", single_opcode Vrsqrte;
+    "Get lanes from a vector", single_opcode Vget_lane;
+    "Set lanes in a vector", single_opcode Vset_lane;
+    "Create vector from literal bit pattern", single_opcode Vcreate;
+    "Set all lanes to the same value",
+      multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
+    "Combining vectors", single_opcode Vcombine;
+    "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
+    "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
+    "Move, single_opcode narrowing", single_opcode Vmovn;
+    "Move, single_opcode long", single_opcode Vmovl;
+    "Table lookup", tbl_opcode;
+    "Extended table lookup", tbx_opcode;
+    "Multiply, lane", single_opcode Vmul_lane;
+    "Long multiply, lane", single_opcode Vmull_lane;
+    "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
+    "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
+    "Multiply-accumulate, lane", single_opcode Vmla_lane;
+    "Multiply-subtract, lane", single_opcode Vmls_lane;
+    "Vector multiply by scalar", single_opcode Vmul_n;
+    "Vector long multiply by scalar", single_opcode Vmull_n;
+    "Vector saturating doubling long multiply by scalar",
+      single_opcode Vqdmull_n;
+    "Vector saturating doubling multiply high by scalar",
+      single_opcode Vqdmulh_n;
+    "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
+    "Vector multiply-subtract by scalar", single_opcode Vmls_n;
+    "Vector extract", single_opcode Vext;
+    "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
+    "Bit selection", single_opcode Vbsl;
+    "Transpose elements", single_opcode Vtrn;
+    "Zip elements", single_opcode Vzip;
+    "Unzip elements", single_opcode Vuzp;
+    "Element/structure loads, VLD1 variants", ldx_opcode 1;
+    "Element/structure stores, VST1 variants", stx_opcode 1;
+    "Element/structure loads, VLD2 variants", ldx_opcode 2;
+    "Element/structure stores, VST2 variants", stx_opcode 2;
+    "Element/structure loads, VLD3 variants", ldx_opcode 3;
+    "Element/structure stores, VST3 variants", stx_opcode 3;
+    "Element/structure loads, VLD4 variants", ldx_opcode 4;
+    "Element/structure stores, VST4 variants", stx_opcode 4;
+    "Logical operations (AND)", single_opcode Vand;
+    "Logical operations (OR)", single_opcode Vorr;
+    "Logical operations (exclusive OR)", single_opcode Veor;
+    "Logical operations (AND-NOT)", single_opcode Vbic;
+    "Logical operations (OR-NOT)", single_opcode Vorn;
+    "Reinterpret casts", single_opcode Vreinterp ]
+
+(* Given an intrinsic shape, produce a string to document the corresponding
+   operand shapes.  *)
+let rec analyze_shape shape =
+  let rec n_things n thing =
+    match n with
+      0 -> []
+    | n -> thing :: (n_things (n - 1) thing)
+  in
+  let rec analyze_shape_elt reg_no elt =
+    match elt with
+      Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
+    | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
+    | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
+    | Immed -> "#@var{0}"
+    | VecArray (1, elt) ->
+        let elt_regexp = analyze_shape_elt 0 elt in
+          "@{" ^ elt_regexp ^ "@}"
+    | VecArray (n, elt) ->
+      let rec f m =
+        match m with
+          0 -> []
+        | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
+      in
+      let ops = List.rev (f n) in
+        "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
+    | (PtrTo elt | CstPtrTo elt) ->
+      "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
+    | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
+    | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
+    | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
+    | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
+  in
+    match shape with
+      All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
+    | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
+              ", " ^ (analyze_shape_elt 0 Dreg)
+    | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
+              (analyze_shape_elt 0 elt)
+    | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
+              ", " ^ (analyze_shape_elt 0 Dreg)
+    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
+    | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
+                ", " ^ (analyze_shape_elt 0 Qreg)
+    | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
+    | By_scalar Dreg ->
+        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
+    | By_scalar Qreg ->
+        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
+    | By_scalar _ -> assert false
+    | Wide_lane ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Wide_scalar ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Pair_result elt ->
+      let elt_regexp = analyze_shape_elt 0 elt in
+      let elt_regexp' = analyze_shape_elt 1 elt in
+        elt_regexp ^ ", " ^ elt_regexp'
+    | Unary_scalar _ -> "FIXME Unary_scalar"
+    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
+    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
+    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
+
+(* Document a single intrinsic.  *)
+let describe_intrinsic first chan
+                       (elt_ty, (_, features, shape, name, munge, _)) =
+  let c_arity, new_elt_ty = munge shape elt_ty in
+  let c_types = strings_of_arity c_arity in
+  Printf.fprintf chan "@itemize @bullet\n";
+  let item_code = if first then "@item" else "@itemx" in
+    Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
+                   (intrinsic_name name) (string_of_elt elt_ty);
+    Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
+    if not (List.exists (fun feature -> feature = No_op) features) then
+    begin
+      let print_one_insn name =
+        Printf.fprintf chan "@code{";
+        let no_suffix = (new_elt_ty = NoElts) in
+        let name_with_suffix =
+          if no_suffix then name
+          else name ^ "." ^ (string_of_elt_dots new_elt_ty)
+        in
+        let possible_operands = analyze_all_shapes features shape
+                                                   analyze_shape
+        in
+	let rec print_one_possible_operand op =
+	  Printf.fprintf chan "%s %s}" name_with_suffix op
+        in
+          (* If the intrinsic expands to multiple instructions, we assume
+             they are all of the same form.  *)
+          print_one_possible_operand (List.hd possible_operands)
+      in
+      let rec print_insns names =
+        match names with
+          [] -> ()
+        | [name] -> print_one_insn name
+        | name::names -> (print_one_insn name;
+                          Printf.fprintf chan " @emph{or} ";
+                          print_insns names)
+      in
+      let insn_names = get_insn_names features name in
+        Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
+        print_insns insn_names;
+        Printf.fprintf chan "\n"
+    end;
+    Printf.fprintf chan "@end itemize\n";
+    Printf.fprintf chan "\n\n"
+
+(* Document a group of intrinsics.  *)
+let document_group chan (group_title, group_extractor) =
+  (* Extract the rows in question from the ops table and then turn them
+     into a list of intrinsics.  *)
+  let intrinsics =
+    List.fold_left (fun got_so_far ->
+                    fun row ->
+                      match row with
+                        (_, _, _, _, _, elt_tys) ->
+                          List.fold_left (fun got_so_far' ->
+                                          fun elt_ty ->
+                                            (elt_ty, row) :: got_so_far')
+                                         got_so_far elt_tys
+                   ) [] (group_extractor ())
+  in
+    (* Emit the title for this group.  *)
+    Printf.fprintf chan "@subsubsection %s\n\n" group_title;
+    (* Emit a description of each intrinsic.  *)
+    List.iter (describe_intrinsic true chan) intrinsics;
+    (* Close this group.  *)
+    Printf.fprintf chan "\n\n"
+
+let gnu_header chan =
+  List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
+  "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
+  "@c This is part of the GCC manual.";
+  "@c For copying conditions, see the file gcc.texi.";
+  "";
+  "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
+  "@c Please do not edit manually."]
+
+let crypto_doc =
+"
+@itemize @bullet
+@item poly128_t vldrq_p128(poly128_t const *)
+@end itemize
+
+@itemize @bullet
+@item void vstrq_p128(poly128_t *, poly128_t)
+@end itemize
+
+@itemize @bullet
+@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
+@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
+@item uint32_t vsha1h_u32 (uint32_t)
+@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
+@end itemize
+
+@itemize @bullet
+@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+
+@itemize @bullet
+@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+
+@itemize @bullet
+@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+
+@itemize @bullet
+@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+
+@itemize @bullet
+@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+
+@itemize @bullet
+@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+ 
+@itemize @bullet
+@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+ 
+@itemize @bullet
+@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
+@end itemize
+ 
+@itemize @bullet
+@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
+@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
+@end itemize
+
+@itemize @bullet
+@item poly128_t vmull_p64 (poly64_t a, poly64_t b)
+@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
+@end itemize
+
+@itemize @bullet
+@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
+@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
+@end itemize
+"
+
+(* Program entry point.  *)
+let _ =
+  if Array.length Sys.argv <> 2 then
+    failwith "Usage: neon-docgen <output filename>"
+  else
+  let file = Sys.argv.(1) in
+    try
+      let chan = open_out file in
+        gnu_header chan;
+        List.iter (document_group chan) intrinsic_groups;
+        Printf.fprintf chan "%s\n" crypto_doc;
+        close_out chan
+    with Sys_error sys ->
+      failwith ("Could not create output file " ^ file ^ ": " ^ sys)
diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml
new file mode 100644
index 000000000..f3dd86b0a
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/neon-gen.ml
@@ -0,0 +1,520 @@
+(* Auto-generate ARM Neon intrinsics header file.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Compile with:
+     ocamlc -c neon.ml
+     ocamlc -o neon-gen neon.cmo neon-gen.ml
+
+   Run with:
+     ./neon-gen > arm_neon.h
+*)
+
+open Neon
+
+(* The format codes used in the following functions are documented at:
+     http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
+     #6_printflikefunctionsforprettyprinting
+   (one line, remove the backslash.)
+*)
+
+(* Following functions can be used to approximate GNU indentation style.  *)
+let start_function () =
+  Format.printf "@[<v 0>";
+  ref 0
+
+let end_function nesting =
+  match !nesting with
+    0 -> Format.printf "@;@;@]"
+  | _ -> failwith ("Bad nesting (ending function at level "
+                   ^ (string_of_int !nesting) ^ ")")
+
+let open_braceblock nesting =
+  begin match !nesting with
+    0 -> Format.printf "@,@<0>{@[<v 2>@,"
+  | _ -> Format.printf "@,@[<v 2>  @<0>{@[<v 2>@,"
+  end;
+  incr nesting
+
+let close_braceblock nesting =
+  decr nesting;
+  match !nesting with
+    0 -> Format.printf "@]@,@<0>}"
+  | _ -> Format.printf "@]@,@<0>}@]"
+
+let print_function arity fnname body =
+  let ffmt = start_function () in
+  Format.printf "__extension__ static __inline ";
+  let inl = "__attribute__ ((__always_inline__))" in
+  begin match arity with
+    Arity0 ret ->
+      Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname
+  | Arity1 (ret, arg0) ->
+      Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname
+                                        (string_of_vectype arg0)
+  | Arity2 (ret, arg0, arg1) ->
+      Format.printf "%s %s@,%s (%s __a, %s __b)"
+        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
+	(string_of_vectype arg1)
+  | Arity3 (ret, arg0, arg1, arg2) ->
+      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)"
+        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
+	(string_of_vectype arg1) (string_of_vectype arg2)
+  | Arity4 (ret, arg0, arg1, arg2, arg3) ->
+      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
+        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
+	(string_of_vectype arg1) (string_of_vectype arg2)
+        (string_of_vectype arg3)
+  end;
+  open_braceblock ffmt;
+  let rec print_lines = function
+    []       -> ()
+  | "" :: lines -> print_lines lines
+  | [line] -> Format.printf "%s" line
+  | line::lines -> Format.printf "%s@," line ; print_lines lines in
+  print_lines body;
+  close_braceblock ffmt;
+  end_function ffmt
+
+let union_string num elts base =
+  let itype = inttype_for_array num elts in
+  let iname = string_of_inttype itype
+  and sname = string_of_vectype (T_arrayof (num, elts)) in
+  Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base
+
+let rec signed_ctype = function
+    T_uint8x8 | T_poly8x8 -> T_int8x8
+  | T_uint8x16 | T_poly8x16 -> T_int8x16
+  | T_uint16x4 | T_poly16x4 -> T_int16x4
+  | T_uint16x8 | T_poly16x8 -> T_int16x8
+  | T_uint32x2 -> T_int32x2
+  | T_uint32x4 -> T_int32x4
+  | T_uint64x1 -> T_int64x1
+  | T_uint64x2 -> T_int64x2
+  | T_poly64x2 -> T_int64x2
+  (* Cast to types defined by mode in arm.c, not random types pulled in from
+     the <stdint.h> header in use. This fixes incompatible pointer errors when
+     compiling with C++.  *)
+  | T_uint8 | T_int8 -> T_intQI
+  | T_uint16 | T_int16 -> T_intHI
+  | T_uint32 | T_int32 -> T_intSI
+  | T_uint64 | T_int64 -> T_intDI
+  | T_float16 -> T_floatHF
+  | T_float32 -> T_floatSF
+  | T_poly8 -> T_intQI
+  | T_poly16 -> T_intHI
+  | T_poly64 -> T_intDI
+  | T_poly128 -> T_intTI
+  | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
+  | T_ptrto elt -> T_ptrto (signed_ctype elt)
+  | T_const elt -> T_const (signed_ctype elt)
+  | x -> x
+
+let add_cast ctype cval =
+  let stype = signed_ctype ctype in
+  if ctype <> stype then
+    Printf.sprintf "(%s) %s" (string_of_vectype stype) cval
+  else
+    cval
+
+let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
+
+(* Return a tuple of a list of declarations to go at the start of the function,
+   and a list of statements needed to return THING.  *)
+let return arity thing =
+  match arity with
+    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
+  | Arity4 (ret, _, _, _, _) ->
+      begin match ret with
+	T_arrayof (num, vec) ->
+          let uname = union_string num vec "__rv" in
+          [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
+      | T_void ->
+	  [], [thing ^ ";"]
+      | _ ->
+	  [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
+      end
+
+let mask_shape_for_shuffle = function
+    All (num, reg) -> All (num, reg)
+  | Pair_result reg -> All (2, reg)
+  | _ -> failwith "mask_for_shuffle"
+
+let mask_elems shuffle shape elttype part =
+  let elem_size = elt_width elttype in
+  let num_elems =
+    match regmap shape 0 with
+      Dreg -> 64 / elem_size
+    | Qreg -> 128 / elem_size
+    | _ -> failwith "mask_elems" in
+  shuffle elem_size num_elems part
+
+(* Return a tuple of a list of declarations 0and a list of statements needed
+   to implement an intrinsic using __builtin_shuffle.  SHUFFLE is a function
+   which returns a list of elements suitable for using as a mask.  *)
+
+let shuffle_fn shuffle shape arity elttype =
+  let mshape = mask_shape_for_shuffle shape in
+  let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in
+  let masktype_str = string_of_vectype masktype in
+  let shuffle_res = type_for_elt mshape elttype 0 in
+  let shuffle_res_str = string_of_vectype shuffle_res in
+  match arity with
+    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
+  | Arity4 (ret, _, _, _, _) ->
+      begin match ret with
+        T_arrayof (num, vec) ->
+	  let elems1 = mask_elems shuffle mshape elttype `lo
+	  and elems2 = mask_elems shuffle mshape elttype `hi in
+	  let mask1 = (String.concat ", " (List.map string_of_int elems1))
+	  and mask2 = (String.concat ", " (List.map string_of_int elems2)) in
+	  let shuf1 = Printf.sprintf
+	    "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
+	    shuffle_res_str masktype_str mask1
+	  and shuf2 = Printf.sprintf
+	    "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
+	    shuffle_res_str masktype_str mask2 in
+	  [Printf.sprintf "%s __rv;" (string_of_vectype ret);],
+	  [shuf1; shuf2; "return __rv;"]
+      | _ ->
+          let elems = mask_elems shuffle mshape elttype `lo in
+          let mask =  (String.concat ", " (List.map string_of_int elems)) in
+	  let shuf = Printf.sprintf
+	    "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in
+	  [""],
+	  [shuf]
+      end
+
+let rec element_type ctype =
+  match ctype with
+    T_arrayof (_, v) -> element_type v
+  | _ -> ctype
+
+let params ps =
+  let pdecls = ref [] in
+  let ptype t p =
+    match t with
+      T_arrayof (num, elts) ->
+        let uname = union_string num elts (p ^ "u") in
+        let decl = Printf.sprintf "%s = { %s };" uname p in
+        pdecls := decl :: !pdecls;
+        p ^ "u.__o"
+    | _ -> add_cast t p in
+  let plist = match ps with
+    Arity0 _ -> []
+  | Arity1 (_, t1) -> [ptype t1 "__a"]
+  | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"]
+  | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
+  | Arity4 (_, t1, t2, t3, t4) ->
+      [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
+  !pdecls, plist
+
+let modify_params features plist =
+  let is_flipped =
+    List.exists (function Flipped _ -> true | _ -> false) features in
+  if is_flipped then
+    match plist with
+      [ a; b ] -> [ b; a ]
+    | _ ->
+      failwith ("Don't know how to flip args " ^ (String.concat ", " plist))
+  else
+    plist
+
+(* !!! Decide whether to add an extra information word based on the shape
+   form.  *)
+let extra_word shape features paramlist bits =
+  let use_word =
+    match shape with
+      All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow
+    | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm
+    | Narrow_imm -> true
+    | _ -> List.mem InfoWord features
+  in
+    if use_word then
+      paramlist @ [string_of_int bits]
+    else
+      paramlist
+
+(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
+   Bit 1 represents floats & polynomials (1), or ordinary integers (0).
+   Bit 2 represents rounding (1) vs none (0).  *)
+let infoword_value elttype features =
+  let bits01 =
+    match elt_class elttype with
+      Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001
+    | Poly -> 0b010
+    | Float -> 0b011
+    | _ -> 0b000
+  and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in
+  bits01 lor rounding_bit
+
+(* "Cast" type operations will throw an exception in mode_of_elt (actually in
+   elt_width, called from there). Deal with that here, and generate a suffix
+   with multiple modes (<to><from>).  *)
+let rec mode_suffix elttype shape =
+  try
+    let mode = mode_of_elt elttype shape in
+    string_of_mode mode
+  with MixedMode (dst, src) ->
+    let dstmode = mode_of_elt ~argpos:0 dst shape
+    and srcmode = mode_of_elt ~argpos:1 src shape in
+    string_of_mode dstmode ^ string_of_mode srcmode
+
+let get_shuffle features =
+  try
+    match List.find (function Use_shuffle _ -> true | _ -> false) features with
+      Use_shuffle fn -> Some fn
+    | _ -> None
+  with Not_found -> None
+
+let print_feature_test_start features =
+  try
+    match List.find (fun feature ->
+                       match feature with Requires_feature _ -> true
+                                        | Requires_arch _ -> true
+                                        | Requires_FP_bit _ -> true
+                                        | _ -> false)
+                     features with
+      Requires_feature feature ->
+        Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
+    | Requires_arch arch ->
+        Format.printf "#if __ARM_ARCH >= %d@\n" arch
+    | Requires_FP_bit bit ->
+        Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
+                      (1 lsl bit)
+    | _ -> assert false
+  with Not_found -> assert true
+
+let print_feature_test_end features =
+  let feature =
+    List.exists (function Requires_feature _ -> true
+                          | Requires_arch _ -> true
+                          | Requires_FP_bit _ -> true
+                          |  _ -> false) features in
+  if feature then Format.printf "#endif@\n"
+
+
+let print_variant opcode features shape name (ctype, asmtype, elttype) =
+  let bits = infoword_value elttype features in
+  let modesuf = mode_suffix elttype shape in
+  let pdecls, paramlist = params ctype in
+  let rdecls, stmts =
+    match get_shuffle features with
+      Some shuffle -> shuffle_fn shuffle shape ctype elttype
+    | None ->
+	let paramlist' = modify_params features paramlist in
+	let paramlist'' = extra_word shape features paramlist' bits in
+	let parstr = String.concat ", " paramlist'' in
+	let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
+                	(builtin_name features name) modesuf parstr in
+	return ctype builtin in
+  let body = pdecls @ rdecls @ stmts
+  and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
+  begin
+    print_feature_test_start features;
+    print_function ctype fnname body;
+    print_feature_test_end features;
+  end
+
+(* When this function processes the element types in the ops table, it rewrites
+   them in a list of tuples (a,b,c):
+     a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
+     b : Asm type : a single, processed element type, e.g. P16. This is the
+         type which should be attached to the asm opcode.
+     c : Variant type : the unprocessed type for this variant (e.g. in add
+         instructions which don't care about the sign, b might be i16 and c
+         might be s16.)
+*)
+
+let print_op (opcode, features, shape, name, munge, types) =
+  let sorted_types = List.sort compare types in
+  let munged_types = List.map
+    (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in
+  List.iter
+    (fun variant -> print_variant opcode features shape name variant)
+    munged_types
+
+let print_ops ops =
+  List.iter print_op ops
+
+(* Output type definitions. Table entries are:
+     cbase : "C" name for the type.
+     abase : "ARM" base name for the type (i.e. int in int8x8_t).
+     esize : element size.
+     enum : element count.
+     alevel: architecture level at which available.
+*)
+
+type fpulevel = CRYPTO | ALL
+
+let deftypes () =
+  let typeinfo = [
+    (* Doubleword vector types.  *)
+    "__builtin_neon_qi", "int", 8, 8, ALL;
+    "__builtin_neon_hi", "int", 16, 4, ALL;
+    "__builtin_neon_si", "int", 32, 2, ALL;
+    "__builtin_neon_di", "int", 64, 1, ALL;
+    "__builtin_neon_hf", "float", 16, 4, ALL;
+    "__builtin_neon_sf", "float", 32, 2, ALL;
+    "__builtin_neon_poly8", "poly", 8, 8, ALL;
+    "__builtin_neon_poly16", "poly", 16, 4, ALL;
+    "__builtin_neon_poly64", "poly", 64, 1, CRYPTO;
+    "__builtin_neon_uqi", "uint", 8, 8, ALL;
+    "__builtin_neon_uhi", "uint", 16, 4, ALL;
+    "__builtin_neon_usi", "uint", 32, 2, ALL;
+    "__builtin_neon_udi", "uint", 64, 1, ALL;
+
+    (* Quadword vector types.  *)
+    "__builtin_neon_qi", "int", 8, 16, ALL;
+    "__builtin_neon_hi", "int", 16, 8, ALL;
+    "__builtin_neon_si", "int", 32, 4, ALL;
+    "__builtin_neon_di", "int", 64, 2, ALL;
+    "__builtin_neon_sf", "float", 32, 4, ALL;
+    "__builtin_neon_poly8", "poly", 8, 16, ALL;
+    "__builtin_neon_poly16", "poly", 16, 8, ALL;
+    "__builtin_neon_poly64", "poly", 64, 2, CRYPTO;
+    "__builtin_neon_uqi", "uint", 8, 16, ALL;
+    "__builtin_neon_uhi", "uint", 16, 8, ALL;
+    "__builtin_neon_usi", "uint", 32, 4, ALL;
+    "__builtin_neon_udi", "uint", 64, 2, ALL
+  ] in
+  List.iter
+    (fun (cbase, abase, esize, enum, fpulevel) ->
+      let attr =
+        match enum with
+          1 -> ""
+        | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))"
+                              (esize * enum / 8) in
+      if fpulevel == CRYPTO then
+        Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
+      Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr;
+      if fpulevel == CRYPTO then
+        Format.printf "#endif\n";)
+    typeinfo;
+  Format.print_newline ();
+  (* Extra types not in <stdint.h>.  *)
+  Format.printf "typedef float float32_t;\n";
+  Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
+  Format.printf "typedef __builtin_neon_poly16 poly16_t;\n";
+  Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
+  Format.printf "typedef __builtin_neon_poly64 poly64_t;\n";
+  Format.printf "typedef __builtin_neon_poly128 poly128_t;\n";
+  Format.printf "#endif\n"
+
+(* Output structs containing arrays, for load & store instructions etc.
+   poly128_t is deliberately not included here because it has no array types
+   defined for it.  *)
+
+let arrtypes () =
+  let typeinfo = [
+    "int", 8, ALL;    "int", 16, ALL;
+    "int", 32, ALL;   "int", 64, ALL;
+    "uint", 8, ALL;   "uint", 16, ALL;
+    "uint", 32, ALL;  "uint", 64, ALL;
+    "float", 32, ALL; "poly", 8, ALL;
+    "poly", 16, ALL; "poly", 64, CRYPTO
+  ] in
+  let writestruct elname elsize regsize arrsize fpulevel =
+    let elnum = regsize / elsize in
+    let structname =
+      Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in
+    let sfmt = start_function () in
+    Format.printf "%stypedef struct %s"
+      (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname;
+    open_braceblock sfmt;
+    Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize;
+    close_braceblock sfmt;
+    Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else "");
+    end_function sfmt;
+  in
+    for n = 2 to 4 do
+      List.iter
+        (fun (elname, elsize, alevel) ->
+          writestruct elname elsize 64 n alevel;
+          writestruct elname elsize 128 n alevel)
+        typeinfo
+    done
+
+let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
+
+(* Do it.  *)
+
+let _ =
+  print_lines [
+"/* ARM NEON intrinsics include file. This file is generated automatically";
+"   using neon-gen.ml.  Please do not edit manually.";
+"";
+"   Copyright (C) 2006-2014 Free Software Foundation, Inc.";
+"   Contributed by CodeSourcery.";
+"";
+"   This file is part of GCC.";
+"";
+"   GCC is free software; you can redistribute it and/or modify it";
+"   under the terms of the GNU General Public License as published";
+"   by the Free Software Foundation; either version 3, or (at your";
+"   option) any later version.";
+"";
+"   GCC is distributed in the hope that it will be useful, but WITHOUT";
+"   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
+"   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public";
+"   License for more details.";
+"";
+"   Under Section 7 of GPL version 3, you are granted additional";
+"   permissions described in the GCC Runtime Library Exception, version";
+"   3.1, as published by the Free Software Foundation.";
+"";
+"   You should have received a copy of the GNU General Public License and";
+"   a copy of the GCC Runtime Library Exception along with this program;";
+"   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see";
+"   <http://www.gnu.org/licenses/>.  */";
+"";
+"#ifndef _GCC_ARM_NEON_H";
+"#define _GCC_ARM_NEON_H 1";
+"";
+"#ifndef __ARM_NEON__";
+"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
+"#else";
+"";
+"#ifdef __cplusplus";
+"extern \"C\" {";
+"#endif";
+"";
+"#include <stdint.h>";
+""];
+  deftypes ();
+  arrtypes ();
+  Format.print_newline ();
+  print_ops ops;
+  Format.print_newline ();
+  print_ops reinterp;
+  print_ops reinterpq;
+  Format.printf "%s" crypto_intrinsics;
+  print_lines [
+"#ifdef __cplusplus";
+"}";
+"#endif";
+"#endif";
+"#endif"]
diff --git a/gcc-4.9/gcc/config/arm/neon-testgen.ml b/gcc-4.9/gcc/config/arm/neon-testgen.ml
new file mode 100644
index 000000000..df429f59e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/neon-testgen.ml
@@ -0,0 +1,305 @@
+(* Auto-generate ARM Neon intrinsics tests.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Compile with:
+     ocamlc -c neon.ml
+     ocamlc -o neon-testgen neon.cmo neon-testgen.ml
+
+   Run with:
+     cd /path/to/gcc/testsuite/gcc.target/arm/neon
+     /path/to/neon-testgen
+*)
+
+open Neon
+
+type c_type_flags = Pointer | Const
+
+(* Open a test source file.  *)
+let open_test_file dir name =
+  try
+    open_out (dir ^ "/" ^ name ^ ".c")
+  with Sys_error str ->
+    failwith ("Could not create test source file " ^ name ^ ": " ^ str)
+
+(* Emit prologue code to a test source file.  *)
+let emit_prologue chan test_name effective_target =
+  Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic.  */\n" test_name;
+  Printf.fprintf chan "/* This file was autogenerated by neon-testgen.  */\n\n";
+  Printf.fprintf chan "/* { dg-do assemble } */\n";
+  Printf.fprintf chan "/* { dg-require-effective-target %s_ok } */\n"
+                 effective_target;
+  Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
+  Printf.fprintf chan "/* { dg-add-options %s } */\n" effective_target;
+  Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
+  Printf.fprintf chan "void test_%s (void)\n{\n" test_name
+
+(* Emit declarations of local variables that are going to be passed
+   to an intrinsic, together with one to take a returned value if needed.  *)
+let emit_automatics chan c_types features =
+  let emit () =
+    ignore (
+      List.fold_left (fun arg_number -> fun (flags, ty) ->
+                        let pointer_bit =
+                          if List.mem Pointer flags then "*" else ""
+                        in
+                          (* Const arguments to builtins are directly
+                             written in as constants.  *)
+                          if not (List.mem Const flags) then
+                            Printf.fprintf chan "  %s %sarg%d_%s;\n"
+                                           ty pointer_bit arg_number ty;
+                        arg_number + 1)
+                     0 (List.tl c_types))
+  in
+    match c_types with
+      (_, return_ty) :: tys ->
+        if return_ty <> "void" then begin
+          (* The intrinsic returns a value.  We need to do explict register
+             allocation for vget_low tests or they fail because of copy
+             elimination.  *)
+          ((if List.mem Fixed_vector_reg features then
+              Printf.fprintf chan "  register %s out_%s asm (\"d18\");\n"
+                             return_ty return_ty
+            else if List.mem Fixed_core_reg features then
+              Printf.fprintf chan "  register %s out_%s asm (\"r0\");\n"
+                             return_ty return_ty
+            else
+              Printf.fprintf chan "  %s out_%s;\n" return_ty return_ty);
+	   emit ())
+        end else
+          (* The intrinsic does not return a value.  *)
+          emit ()
+    | _ -> assert false
+
+(* Emit code to call an intrinsic.  *)
+let emit_call chan const_valuator c_types name elt_ty =
+  (if snd (List.hd c_types) <> "void" then
+     Printf.fprintf chan "  out_%s = " (snd (List.hd c_types))
+   else
+     Printf.fprintf chan "  ");
+  Printf.fprintf chan "%s_%s (" (intrinsic_name name) (string_of_elt elt_ty);
+  let print_arg chan arg_number (flags, ty) =
+    (* If the argument is of const type, then directly write in the
+       constant now.  *)
+    if List.mem Const flags then
+      match const_valuator with
+        None ->
+          if List.mem Pointer flags then
+            Printf.fprintf chan "0"
+          else
+            Printf.fprintf chan "1"
+      | Some f -> Printf.fprintf chan "%s" (string_of_int (f arg_number))
+    else
+      Printf.fprintf chan "arg%d_%s" arg_number ty
+  in
+  let rec print_args arg_number tys =
+    match tys with
+      [] -> ()
+    | [ty] -> print_arg chan arg_number ty
+    | ty::tys ->
+      print_arg chan arg_number ty;
+      Printf.fprintf chan ", ";
+      print_args (arg_number + 1) tys
+  in
+    print_args 0 (List.tl c_types);
+    Printf.fprintf chan ");\n"
+
+(* Emit epilogue code to a test source file.  *)
+let emit_epilogue chan features regexps =
+  let no_op = List.exists (fun feature -> feature = No_op) features in
+    Printf.fprintf chan "}\n\n";
+    (if not no_op then
+       List.iter (fun regexp ->
+                   Printf.fprintf chan
+                     "/* { dg-final { scan-assembler \"%s\" } } */\n" regexp)
+                regexps
+     else
+       ()
+    );
+    Printf.fprintf chan "/* { dg-final { cleanup-saved-temps } } */\n"
+
+(* Check a list of C types to determine which ones are pointers and which
+   ones are const.  *)
+let check_types tys =
+  let tys' =
+    List.map (fun ty ->
+                let len = String.length ty in
+                  if len > 2 && String.get ty (len - 2) = ' '
+                             && String.get ty (len - 1) = '*'
+                  then ([Pointer], String.sub ty 0 (len - 2))
+                  else ([], ty)) tys
+  in
+    List.map (fun (flags, ty) ->
+                if String.length ty > 6 && String.sub ty 0 6 = "const "
+                then (Const :: flags, String.sub ty 6 ((String.length ty) - 6))
+                else (flags, ty)) tys'
+
+(* Work out what the effective target should be.  *)
+let effective_target features =
+  try
+    match List.find (fun feature ->
+                       match feature with Requires_feature _ -> true
+                                        | Requires_arch _ -> true
+                                        | Requires_FP_bit 1 -> true
+                                        | _ -> false)
+                     features with
+      Requires_feature "FMA" -> "arm_neonv2"
+    | Requires_feature "CRYPTO" -> "arm_crypto"
+    | Requires_arch 8 -> "arm_v8_neon"
+    | Requires_FP_bit 1 -> "arm_neon_fp16"
+    | _ -> assert false
+  with Not_found -> "arm_neon"
+
+(* Given an intrinsic shape, produce a regexp that will match
+   the right-hand sides of instructions generated by an intrinsic of
+   that shape.  *)
+let rec analyze_shape shape =
+  let rec n_things n thing =
+    match n with
+      0 -> []
+    | n -> thing :: (n_things (n - 1) thing)
+  in
+  let rec analyze_shape_elt elt =
+    match elt with
+      Dreg -> "\\[dD\\]\\[0-9\\]+"
+    | Qreg -> "\\[qQ\\]\\[0-9\\]+"
+    | Corereg -> "\\[rR\\]\\[0-9\\]+"
+    | Immed -> "#\\[0-9\\]+"
+    | VecArray (1, elt) ->
+        let elt_regexp = analyze_shape_elt elt in
+          "((\\\\\\{" ^ elt_regexp ^ "\\\\\\})|(" ^ elt_regexp ^ "))"
+    | VecArray (n, elt) ->
+      let elt_regexp = analyze_shape_elt elt in
+      let alt1 = elt_regexp ^ "-" ^ elt_regexp in
+      let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in
+        "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}"
+    | (PtrTo elt | CstPtrTo elt) ->
+      "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]"
+    | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
+    | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
+    | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
+    | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")"
+  in
+    match shape with
+      All (n, elt) -> commas analyze_shape_elt (n_things n elt) ""
+    | Long -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Dreg) ^
+              ", " ^ (analyze_shape_elt Dreg)
+    | Long_noreg elt -> (analyze_shape_elt elt) ^ ", " ^ (analyze_shape_elt elt)
+    | Wide -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Qreg) ^
+              ", " ^ (analyze_shape_elt Dreg)
+    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
+    | Narrow -> (analyze_shape_elt Dreg) ^ ", " ^ (analyze_shape_elt Qreg) ^
+                ", " ^ (analyze_shape_elt Qreg)
+    | Use_operands elts -> commas analyze_shape_elt (Array.to_list elts) ""
+    | By_scalar Dreg ->
+        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
+    | By_scalar Qreg ->
+        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
+    | By_scalar _ -> assert false
+    | Wide_lane ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Wide_scalar ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Pair_result elt ->
+      let elt_regexp = analyze_shape_elt elt in
+        elt_regexp ^ ", " ^ elt_regexp
+    | Unary_scalar _ -> "FIXME Unary_scalar"
+    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
+    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
+    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
+
+(* Generate tests for one intrinsic.  *)
+let test_intrinsic dir opcode features shape name munge elt_ty =
+  (* Open the test source file.  *)
+  let test_name = name ^ (string_of_elt elt_ty) in
+  let chan = open_test_file dir test_name in
+  (* Work out what argument and return types the intrinsic has.  *)
+  let c_arity, new_elt_ty = munge shape elt_ty in
+  let c_types = check_types (strings_of_arity c_arity) in
+  (* Extract any constant valuator (a function specifying what constant
+     values are to be written into the intrinsic call) from the features
+     list.  *)
+  let const_valuator =
+    try
+      match (List.find (fun feature -> match feature with
+                                         Const_valuator _ -> true
+				       | _ -> false) features) with
+        Const_valuator f -> Some f
+      | _ -> assert false
+    with Not_found -> None
+  in
+  (* Work out what instruction name(s) to expect.  *)
+  let insns = get_insn_names features name in
+  let no_suffix = (new_elt_ty = NoElts) in
+  let insns =
+    if no_suffix then insns
+                 else List.map (fun insn ->
+                                  let suffix = string_of_elt_dots new_elt_ty in
+                                    insn ^ "\\." ^ suffix) insns
+  in
+  (* Construct a regexp to match against the expected instruction name(s).  *)
+  let insn_regexp =
+    match insns with
+      [] -> assert false
+    | [insn] -> insn
+    | _ ->
+      let rec calc_regexp insns cur_regexp =
+        match insns with
+          [] -> cur_regexp
+        | [insn] -> cur_regexp ^ "(" ^ insn ^ "))"
+        | insn::insns -> calc_regexp insns (cur_regexp ^ "(" ^ insn ^ ")|")
+      in calc_regexp insns "("
+  in
+  (* Construct regexps to match against the instructions that this
+     intrinsic expands to.  Watch out for any writeback character and
+     comments after the instruction.  *)
+  let regexps = List.map (fun regexp -> insn_regexp ^ "\\[ \t\\]+" ^ regexp ^
+			  "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n")
+                         (analyze_all_shapes features shape analyze_shape)
+  in
+  let effective_target = effective_target features
+  in
+    (* Emit file and function prologues.  *)
+    emit_prologue chan test_name effective_target;
+    (* Emit local variable declarations.  *)
+    emit_automatics chan c_types features;
+    Printf.fprintf chan "\n";
+    (* Emit the call to the intrinsic.  *)
+    emit_call chan const_valuator c_types name elt_ty;
+    (* Emit the function epilogue and the DejaGNU scan-assembler directives.  *)
+    emit_epilogue chan features regexps;
+    (* Close the test file.  *)
+    close_out chan
+
+(* Generate tests for one element of the "ops" table.  *)
+let test_intrinsic_group dir (opcode, features, shape, name, munge, types) =
+  List.iter (test_intrinsic dir opcode features shape name munge) types
+
+(* Program entry point.  *)
+let _ =
+  let directory = if Array.length Sys.argv <> 1 then Sys.argv.(1) else "." in
+    List.iter (test_intrinsic_group directory) (reinterp @ reinterpq @ ops)
+
diff --git a/gcc-4.9/gcc/config/arm/neon.md b/gcc-4.9/gcc/config/arm/neon.md
new file mode 100644
index 000000000..aad420ce7
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/neon.md
@@ -0,0 +1,5808 @@
+;; ARM NEON coprocessor Machine Description
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Attribute used to permit string comparisons against <VQH_mnem> in
+;; type attribute definitions.
+(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VDX 0 "nonimmediate_operand"
+	  "=w,Un,w, w,  ?r,?w,?r,?r, ?Us")
+	(match_operand:VDX 1 "general_operand"
+	  " w,w, Dn,Uni, w, r, r, Usi,r"))]
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  if (which_alternative == 2)
+    {
+      int width, is_valid;
+      static char templ[40];
+
+      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+        &operands[1], &width);
+
+      gcc_assert (is_valid != 0);
+
+      if (width == 0)
+        return "vmov.f32\t%P0, %1  @ <mode>";
+      else
+        sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
+
+      return templ;
+    }
+
+  switch (which_alternative)
+    {
+    case 0: return "vmov\t%P0, %P1  @ <mode>";
+    case 1: case 3: return output_move_neon (operands);
+    case 2: gcc_unreachable ();
+    case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
+    case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
+    default: return output_move_double (operands, true, NULL);
+    }
+}
+ [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
+                    neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
+                    neon_load1_2reg, neon_store1_2reg")
+  (set_attr "length" "4,4,4,4,4,4,8,8,8")
+  (set_attr "arm_pool_range"     "*,*,*,1020,*,*,*,1020,*")
+  (set_attr "thumb2_pool_range"     "*,*,*,1018,*,*,*,1018,*")
+  (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
+  	  "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
+	(match_operand:VQXMOV 1 "general_operand"
+	  " w,w, Dn,Uni, w, r, r, Usi, r"))]
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  if (which_alternative == 2)
+    {
+      int width, is_valid;
+      static char templ[40];
+
+      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+        &operands[1], &width);
+
+      gcc_assert (is_valid != 0);
+
+      if (width == 0)
+        return "vmov.f32\t%q0, %1  @ <mode>";
+      else
+        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
+
+      return templ;
+    }
+
+  switch (which_alternative)
+    {
+    case 0: return "vmov\t%q0, %q1  @ <mode>";
+    case 1: case 3: return output_move_neon (operands);
+    case 2: gcc_unreachable ();
+    case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
+    case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
+    default: return output_move_quad (operands);
+    }
+}
+  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
+                     neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
+                     mov_reg,neon_load1_4reg,neon_store1_4reg")
+   (set_attr "length" "4,8,4,8,8,8,16,8,16")
+   (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
+   (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  "TARGET_NEON"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (!REG_P (operands[0]))
+	operands[1] = force_reg (TImode, operands[1]);
+    }
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
+	(match_operand:VSTRUCT 1 "general_operand" ""))]
+  "TARGET_NEON"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (!REG_P (operands[0]))
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
+})
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"	"=w,Ut,w")
+	(match_operand:VSTRUCT 1 "general_operand"	" w,w, Ut"))]
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "#";
+    case 1: case 2: return output_move_neon (operands);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
+   (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
+
+(define_split
+  [(set (match_operand:EI 0 "s_register_operand" "")
+	(match_operand:EI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[2], src[2];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (DImode, rdest + 4);
+  src[1] = gen_rtx_REG (DImode, rsrc + 4);
+
+  neon_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+  [(set (match_operand:OI 0 "s_register_operand" "")
+	(match_operand:OI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[2], src[2];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (TImode, rdest + 4);
+  src[1] = gen_rtx_REG (TImode, rsrc + 4);
+
+  neon_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+  [(set (match_operand:CI 0 "s_register_operand" "")
+	(match_operand:CI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[3], src[3];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (TImode, rdest + 4);
+  src[1] = gen_rtx_REG (TImode, rsrc + 4);
+  dest[2] = gen_rtx_REG (TImode, rdest + 8);
+  src[2] = gen_rtx_REG (TImode, rsrc + 8);
+
+  neon_disambiguate_copy (operands, dest, src, 3);
+})
+
+(define_split
+  [(set (match_operand:XI 0 "s_register_operand" "")
+	(match_operand:XI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[4], src[4];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (TImode, rdest + 4);
+  src[1] = gen_rtx_REG (TImode, rsrc + 4);
+  dest[2] = gen_rtx_REG (TImode, rdest + 8);
+  src[2] = gen_rtx_REG (TImode, rsrc + 8);
+  dest[3] = gen_rtx_REG (TImode, rdest + 12);
+  src[3] = gen_rtx_REG (TImode, rsrc + 12);
+
+  neon_disambiguate_copy (operands, dest, src, 4);
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
+	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
+		     UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
+{
+  rtx adjust_mem;
+  /* This pattern is not permitted to fail during expansion: if both arguments
+     are non-registers (e.g. memory := constant, which can be created by the
+     auto-vectorizer), force operand 1 into a register.  */
+  if (!s_register_operand (operands[0], <MODE>mode)
+      && !s_register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (s_register_operand (operands[0], <MODE>mode))
+    adjust_mem = operands[1];
+  else
+    adjust_mem = operands[0];
+
+  /* Legitimize address.  */
+  if (!neon_vector_mem_operand (adjust_mem, 2, true))
+    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
+
+})
+
+(define_insn "*movmisalign<mode>_neon_store"
+  [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
+	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
+  "vst1.<V_sz_elem>\t{%P1}, %A0"
+  [(set_attr "type" "neon_store1_1reg<q>")])
+
+(define_insn "*movmisalign<mode>_neon_load"
+  [(set (match_operand:VDX 0 "s_register_operand"			"=w")
+	(unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
+									" Um")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
+  "vld1.<V_sz_elem>\t{%P0}, %A1"
+  [(set_attr "type" "neon_load1_1reg<q>")])
+
+(define_insn "*movmisalign<mode>_neon_store"
+  [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
+	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
+  "vst1.<V_sz_elem>\t{%q1}, %A0"
+  [(set_attr "type" "neon_store1_1reg<q>")])
+
+(define_insn "*movmisalign<mode>_neon_load"
+  [(set (match_operand:VQX 0 "s_register_operand"			"=w")
+	(unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
+									" Um")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
+  "vld1.<V_sz_elem>\t{%q0}, %A1"
+  [(set_attr "type" "neon_store1_1reg<q>")])
+
+(define_insn "vec_set<mode>_internal"
+  [(set (match_operand:VD 0 "s_register_operand" "=w,w")
+        (vec_merge:VD
+          (vec_duplicate:VD
+            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
+          (match_operand:VD 3 "s_register_operand" "0,0")
+          (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_NEON"
+{
+  int elt = ffs ((int) INTVAL (operands[2])) - 1;
+  if (BYTES_BIG_ENDIAN)
+    elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+  operands[2] = GEN_INT (elt);
+
+  if (which_alternative == 0)
+    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
+  else
+    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
+}
+  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
+
+(define_insn "vec_set<mode>_internal"
+  [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
+        (vec_merge:VQ
+          (vec_duplicate:VQ
+            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
+          (match_operand:VQ 3 "s_register_operand" "0,0")
+          (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
+  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  int elt = elem % half_elts;
+  int hi = (elem / half_elts) * 2;
+  int regno = REGNO (operands[0]);
+
+  if (BYTES_BIG_ENDIAN)
+    elt = half_elts - 1 - elt;
+
+  operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
+  operands[2] = GEN_INT (elt);
+
+  if (which_alternative == 0)
+    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
+  else
+    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
+}
+  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
+)
+
+(define_insn "vec_setv2di_internal"
+  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
+        (vec_merge:V2DI
+          (vec_duplicate:V2DI
+            (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
+          (match_operand:V2DI 3 "s_register_operand" "0,0")
+          (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
+  int regno = REGNO (operands[0]) + 2 * elem;
+
+  operands[0] = gen_rtx_REG (DImode, regno);
+
+  if (which_alternative == 0)
+    return "vld1.64\t%P0, %A1";
+  else
+    return "vmov\t%P0, %Q1, %R1";
+}
+  [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
+)
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:<V_elem> 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
+					 GEN_INT (elem), operands[0]));
+  DONE;
+})
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
+        (vec_select:<V_elem>
+          (match_operand:VD 1 "s_register_operand" "w,w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+
+  if (which_alternative == 0)
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
+  else
+    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
+)
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
+	(vec_select:<V_elem>
+          (match_operand:VQ 1 "s_register_operand" "w,w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
+  "TARGET_NEON"
+{
+  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  int elt = INTVAL (operands[2]) % half_elts;
+  int hi = (INTVAL (operands[2]) / half_elts) * 2;
+  int regno = REGNO (operands[1]);
+
+  if (BYTES_BIG_ENDIAN)
+    elt = half_elts - 1 - elt;
+
+  operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
+  operands[2] = GEN_INT (elt);
+
+  if (which_alternative == 0)
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
+  else
+    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
+)
+
+(define_insn "vec_extractv2di"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
+	(vec_select:DI
+          (match_operand:V2DI 1 "s_register_operand" "w,w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
+
+  operands[1] = gen_rtx_REG (DImode, regno);
+
+  if (which_alternative == 0)
+    return "vst1.64\t{%P1}, %A0  @ v2di";
+  else
+    return "vmov\t%Q0, %R0, %P1  @ v2di";
+}
+  [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
+)
+
+(define_expand "vec_init<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_NEON"
+{
+  neon_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;; Doubleword and quadword arithmetic.
+
+;; NOTE: some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations.
+
+(define_insn "*add<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_addsub_s<q>")
+                    (const_string "neon_add<q>")))]
+)
+
+(define_insn "adddi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: /* fall through */
+    case 3: return "vadd.i64\t%P0, %P1, %P2";
+    case 1: return "#";
+    case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
+		     multiple,multiple,multiple")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
+)
+
+(define_insn "*sub<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                   (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_addsub_s<q>")
+                    (const_string "neon_sub<q>")))]
+)
+
+(define_insn "subdi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
+        (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
+                  (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: /* fall through */
+    case 4: return "vsub.i64\t%P0, %P1, %P2";
+    case 1: /* fall through */ 
+    case 2: /* fall through */
+    case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
+   (set_attr "conds" "*,clob,clob,clob,*")
+   (set_attr "length" "*,8,8,8,*")
+   (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
+)
+
+(define_insn "*mul<mode>3_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+                   (match_operand:VDQW 2 "s_register_operand" "w")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+		    (const_string "neon_fp_mul_s<q>")
+                    (const_string "neon_mul_<V_elem_ch><q>")))]
+)
+
+(define_insn "mul<mode>3add<mode>_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
+                            (match_operand:VDQW 3 "s_register_operand" "w"))
+		  (match_operand:VDQW 1 "s_register_operand" "0")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+		    (const_string "neon_fp_mla_s<q>")
+		    (const_string "neon_mla_<V_elem_ch><q>")))]
+)
+
+(define_insn "mul<mode>3neg<mode>add<mode>_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
+                    (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
+                               (match_operand:VDQW 3 "s_register_operand" "w"))))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+		    (const_string "neon_fp_mla_s<q>")
+		    (const_string "neon_mla_<V_elem_ch><q>")))]
+)
+
+;; Fused multiply-accumulate
+;; We define each insn twice here:
+;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
+;;       to be able to use when converting to FMA.
+;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
+(define_insn "fma<VCVTF:mode>4"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
+		 (match_operand:VCVTF 2 "register_operand" "w")
+		 (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_mla_s<q>")]
+)
+
+(define_insn "fma<VCVTF:mode>4_intrinsic"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
+		 (match_operand:VCVTF 2 "register_operand" "w")
+		 (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_mla_s<q>")]
+)
+
+(define_insn "*fmsub<VCVTF:mode>4"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
+		   (match_operand:VCVTF 2 "register_operand" "w")
+		   (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_mla_s<q>")]
+)
+
+(define_insn "fmsub<VCVTF:mode>4_intrinsic"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
+		   (match_operand:VCVTF 2 "register_operand" "w")
+		   (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_mla_s<q>")]
+)
+
+(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1
+		         "s_register_operand" "w")]
+		NEON_VRINT))]
+  "TARGET_NEON && TARGET_FPU_ARMV8"
+  "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
+)
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+		 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+		     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+;; The concrete forms of the Neon immediate-logic instructions are vbic and
+;; vorr. We support the pseudo-instruction vand instead, because that
+;; corresponds to the canonical form the middle-end expects to use for
+;; immediate bitwise-ANDs.
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+    case 1: return neon_output_logic_immediate ("vand", &operands[2],
+    		     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "orn<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+		 (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+;; TODO: investigate whether we should disable 
+;; this and bicdi3_neon for the A8 in line with the other
+;; changes above. 
+(define_insn_and_split "orndi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
+		(match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
+  "TARGET_NEON"
+  "@
+   vorn\t%P0, %P1, %P2
+   #
+   #
+   #"
+  "reload_completed && 
+   (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        operands[3] = gen_highpart (SImode, operands[0]);
+        operands[0] = gen_lowpart (SImode, operands[0]);
+        operands[4] = gen_highpart (SImode, operands[2]);
+        operands[2] = gen_lowpart (SImode, operands[2]);
+        operands[5] = gen_highpart (SImode, operands[1]);
+        operands[1] = gen_lowpart (SImode, operands[1]);
+      }
+    else
+      {
+        emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
+        emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
+        DONE;
+      }
+  }"
+  [(set_attr "type" "neon_logic,multiple,multiple,multiple")
+   (set_attr "length" "*,16,8,8")
+   (set_attr "arch" "any,a,t2,t2")]
+)
+
+(define_insn "bic<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+		 (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+;; Compare to *anddi_notdi_di.
+(define_insn "bicdi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
+		(match_operand:DI 1 "s_register_operand" "w,0,r")))]
+  "TARGET_NEON"
+  "@
+   vbic\t%P0, %P1, %P2
+   #
+   #"
+  [(set_attr "type" "neon_logic,multiple,multiple")
+   (set_attr "length" "*,8,8")]
+)
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		 (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmvn\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_move<q>")]
+)
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_abs_s<q>")
+                    (const_string "neon_abs<q>")))]
+)
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_neg_s<q>")
+                    (const_string "neon_neg<q>")))]
+)
+
+(define_insn "negdi2_neon"
+  [(set (match_operand:DI 0 "s_register_operand"	 "=&w, w,r,&r")
+	(neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
+   (clobber (match_scratch:DI 2				 "= X,&w,X, X"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+; Split negdi2_neon for vfp registers
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(neg:DI (match_operand:DI 1 "s_register_operand" "")))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (const_int 0))
+   (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  {
+    if (!REG_P (operands[2]))
+      operands[2] = operands[0];
+  }
+)
+
+; Split negdi2_neon for core registers
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(neg:DI (match_operand:DI 1 "s_register_operand" "")))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed
+   && arm_general_register_operand (operands[0], DImode)"
+  [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*umin<mode>3_neon"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_minmax<q>")]
+)
+
+(define_insn "*umax<mode>3_neon"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_minmax<q>")]
+)
+
+(define_insn "*smin<mode>3_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+		   (match_operand:VDQW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_minmax_s<q>")
+                    (const_string "neon_minmax<q>")))]
+)
+
+(define_insn "*smax<mode>3_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+		   (match_operand:VDQW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_minmax_s<q>")
+                    (const_string "neon_minmax<q>")))]
+)
+
+; TODO: V2DI shifts are current disabled because there are bugs in the
+; generic vectorizer code.  It ends up creating a V2DI constructor with
+; SImode elements.
+
+(define_insn "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
+  "TARGET_NEON"
+  {
+    switch (which_alternative)
+      {
+        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+        case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
+                         			    <MODE>mode,
+						    VALID_NEON_QREG_MODE (<MODE>mode),
+						    true);
+        default: gcc_unreachable ();
+      }
+  }
+  [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
+)
+
+(define_insn "vashr<mode>3_imm"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
+  "TARGET_NEON"
+  {
+    return neon_output_shift_immediate ("vshr", 's', &operands[2],
+					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+					false);
+  }
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "vlshr<mode>3_imm"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
+  "TARGET_NEON"
+  {
+    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
+					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+					false);
+  }              
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with signed operands. This is essentially the same as ashl<mode>3
+; above, but using an unspec in case GCC tries anything tricky with negative
+; shift amounts.
+
+(define_insn "ashl<mode>3_signed"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+		      (match_operand:VDQI 2 "s_register_operand" "w")]
+		     UNSPEC_ASHIFT_SIGNED))]
+  "TARGET_NEON"
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with unsigned operands.
+
+(define_insn "ashl<mode>3_unsigned"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+		      (match_operand:VDQI 2 "s_register_operand" "w")]
+		     UNSPEC_ASHIFT_UNSIGNED))]
+  "TARGET_NEON"
+  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+(define_expand "vashr<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
+  "TARGET_NEON"
+{
+  if (s_register_operand (operands[2], <MODE>mode))
+    {
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
+  "TARGET_NEON"
+{
+  if (s_register_operand (operands[2], <MODE>mode))
+    {
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; 64-bit shifts
+
+;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
+;; leaving the upper half uninitalized.  This is OK since the shift
+;; instruction only looks at the low 8 bits anyway.  To avoid confusing
+;; data flow analysis however, we pretend the full register is set
+;; using an unspec.
+(define_insn "neon_load_count"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,w")
+        (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
+                   UNSPEC_LOAD_COUNT))]
+  "TARGET_NEON"
+  "@
+   vld1.32\t{%P0[0]}, %A1
+   vmov.32\t%P0[0], %1"
+  [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
+)
+
+(define_insn "ashldi3_neon_noclobber"
+  [(set (match_operand:DI 0 "s_register_operand"	    "=w,w")
+	(ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
+		   (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
+  "TARGET_NEON && reload_completed
+   && (!CONST_INT_P (operands[2])
+       || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
+  "@
+   vshl.u64\t%P0, %P1, %2
+   vshl.u64\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
+)
+
+(define_insn_and_split "ashldi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w,?&r,?r, ?w,w")
+	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
+		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,rUm,i")))
+   (clobber (match_scratch:SI 3				    "= X, X,?&r, X,  X,X"))
+   (clobber (match_scratch:SI 4				    "= X, X,?&r, X,  X,X"))
+   (clobber (match_scratch:DI 5				    "=&w, X,  X, X, &w,X"))
+   (clobber (reg:CC_C CC_REGNUM))]
+  "TARGET_NEON"
+  "#"
+  "TARGET_NEON && reload_completed"
+  [(const_int 0)]
+  "
+  {
+    if (IS_VFP_REGNUM (REGNO (operands[0])))
+      {
+        if (CONST_INT_P (operands[2]))
+	  {
+	    if (INTVAL (operands[2]) < 1)
+	      {
+	        emit_insn (gen_movdi (operands[0], operands[1]));
+		DONE;
+	      }
+	    else if (INTVAL (operands[2]) > 63)
+	      operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
+	  }
+	else
+	  {
+	    emit_insn (gen_neon_load_count (operands[5], operands[2]));
+	    operands[2] = operands[5];
+	  }
+
+	/* Ditch the unnecessary clobbers.  */
+	emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
+					       operands[2]));
+      }
+    else
+      {
+	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+	  /* This clobbers CC.  */
+	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
+	else
+	  arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
+					 operands[2], operands[3], operands[4]);
+      }
+    DONE;
+  }"
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,*,*")
+   (set_attr "type" "multiple")]
+)
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "signed_shift_di3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"	     "=w")
+	(unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+		    (match_operand:DI 2 "s_register_operand" " w")]
+		   UNSPEC_ASHIFT_SIGNED))]
+  "TARGET_NEON && reload_completed"
+  "vshl.s64\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_shift_reg")]
+)
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "unsigned_shift_di3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"	     "=w")
+	(unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+		    (match_operand:DI 2 "s_register_operand" " w")]
+		   UNSPEC_ASHIFT_UNSIGNED))]
+  "TARGET_NEON && reload_completed"
+  "vshl.u64\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_shift_reg")]
+)
+
+(define_insn "ashrdi3_neon_imm_noclobber"
+  [(set (match_operand:DI 0 "s_register_operand"	      "=w")
+	(ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+		     (match_operand:DI 2 "const_int_operand"  " i")))]
+  "TARGET_NEON && reload_completed
+   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
+  "vshr.s64\t%P0, %P1, %2"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
+(define_insn "lshrdi3_neon_imm_noclobber"
+  [(set (match_operand:DI 0 "s_register_operand"	      "=w")
+	(lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+		     (match_operand:DI 2 "const_int_operand"  " i")))]
+  "TARGET_NEON && reload_completed
+   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
+  "vshr.u64\t%P0, %P1, %2"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
+;; ashrdi3_neon
+;; lshrdi3_neon
+(define_insn_and_split "<shift>di3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?w,?w")
+	(rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
+		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
+   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,2r, X"))
+   (clobber (match_scratch:SI 4				     "= X, X, &r, X, X, X"))
+   (clobber (match_scratch:DI 5				     "=&w, X,  X, X,&w, X"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+  "#"
+  "TARGET_NEON && reload_completed"
+  [(const_int 0)]
+  "
+  {
+    if (IS_VFP_REGNUM (REGNO (operands[0])))
+      {
+	if (CONST_INT_P (operands[2]))
+	  {
+	    if (INTVAL (operands[2]) < 1)
+	      {
+	        emit_insn (gen_movdi (operands[0], operands[1]));
+		DONE;
+	      }
+	    else if (INTVAL (operands[2]) > 64)
+	      operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
+
+	    /* Ditch the unnecessary clobbers.  */
+	    emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
+							  operands[1],
+							  operands[2]));
+	  }
+	else 
+	  {
+	    /* We must use a negative left-shift.  */
+	    emit_insn (gen_negsi2 (operands[3], operands[2]));
+	    emit_insn (gen_neon_load_count (operands[5], operands[3]));
+	    emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
+						       operands[5]));
+	  }
+      }
+    else
+      {
+	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+	  /* This clobbers CC.  */
+	  emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
+	else
+	  /* This clobbers CC (ASHIFTRT by register only).  */
+	  arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
+				 	 operands[2], operands[3], operands[4]);
+      }
+
+    DONE;
+  }"
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,*,*")
+   (set_attr "type" "multiple")]
+)
+
+;; Widening operations
+
+(define_insn "widen_ssum<mode>3"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(plus:<V_widen> (sign_extend:<V_widen>
+			  (match_operand:VW 1 "s_register_operand" "%w"))
+		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "widen_usum<mode>3"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(plus:<V_widen> (zero_extend:<V_widen>
+			  (match_operand:VW 1 "s_register_operand" "%w"))
+		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
+;; shift-count granularity. That's good enough for the middle-end's current
+;; needs.
+
+;; Note that it's not safe to perform such an operation in big-endian mode,
+;; due to element-ordering issues.
+
+(define_expand "vec_shr_<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:VDQ 1 "s_register_operand" "")
+   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  rtx zero_reg;
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  const int width = GET_MODE_BITSIZE (<MODE>mode);
+  const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+  if (num_bits == width)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+
+  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+  operands[0] = gen_lowpart (bvecmode, operands[0]);
+  operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+  emit_insn (gen_ext (operands[0], operands[1], zero_reg,
+		      GEN_INT (num_bits / BITS_PER_UNIT)));
+  DONE;
+})
+
+(define_expand "vec_shl_<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:VDQ 1 "s_register_operand" "")
+   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  rtx zero_reg;
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  const int width = GET_MODE_BITSIZE (<MODE>mode);
+  const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+  if (num_bits == 0)
+    {
+      emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+      DONE;
+    }
+
+  num_bits = width - num_bits;
+
+  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+  operands[0] = gen_lowpart (bvecmode, operands[0]);
+  operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+  emit_insn (gen_ext (operands[0], zero_reg, operands[1],
+		      GEN_INT (num_bits / BITS_PER_UNIT)));
+  DONE;
+})
+
+;; Helpers for quad-word reduction operations
+
+; Add (or smin, smax...) the low N/2 elements of the N-element vector
+; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
+; N/2-element vector.
+
+(define_insn "quad_halves_<code>v4si"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (vqh_ops:V2SI
+          (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)]))
+          (vec_select:V2SI (match_dup 1)
+                           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_NEON"
+  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set_attr "type" "neon_reduc_<VQH_type>_q")]
+)
+
+(define_insn "quad_halves_<code>v4sf"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+        (vqhs_ops:V2SF
+          (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)]))
+          (vec_select:V2SF (match_dup 1)
+                           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_NEON && flag_unsafe_math_optimizations"
+  "<VQH_mnem>.f32\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
+)
+
+(define_insn "quad_halves_<code>v8hi"
+  [(set (match_operand:V4HI 0 "s_register_operand" "+w")
+        (vqh_ops:V4HI
+          (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)
+				      (const_int 2) (const_int 3)]))
+          (vec_select:V4HI (match_dup 1)
+                           (parallel [(const_int 4) (const_int 5)
+				      (const_int 6) (const_int 7)]))))]
+  "TARGET_NEON"
+  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set_attr "type" "neon_reduc_<VQH_type>_q")]
+)
+
+(define_insn "quad_halves_<code>v16qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "+w")
+        (vqh_ops:V8QI
+          (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)
+				      (const_int 2) (const_int 3)
+				      (const_int 4) (const_int 5)
+				      (const_int 6) (const_int 7)]))
+          (vec_select:V8QI (match_dup 1)
+                           (parallel [(const_int 8) (const_int 9)
+				      (const_int 10) (const_int 11)
+				      (const_int 12) (const_int 13)
+				      (const_int 14) (const_int 15)]))))]
+  "TARGET_NEON"
+  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set_attr "type" "neon_reduc_<VQH_type>_q")]
+)
+
+(define_expand "move_hi_quad_<mode>"
+ [(match_operand:ANY128 0 "s_register_operand" "")
+  (match_operand:<V_HALF> 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)),
+		  operands[1]);
+  DONE;
+})
+
+(define_expand "move_lo_quad_<mode>"
+ [(match_operand:ANY128 0 "s_register_operand" "")
+  (match_operand:<V_HALF> 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
+				       <MODE>mode, 0),
+		  operands[1]);
+  DONE;
+})
+
+;; Reduction operations
+
+(define_expand "reduc_splus_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpadd_internal<mode>);
+  DONE;
+})
+
+(define_expand "reduc_splus_<mode>"
+  [(match_operand:VQ 0 "s_register_operand" "")
+   (match_operand:VQ 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
+   && !BYTES_BIG_ENDIAN"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_insn "reduc_splus_v2di"
+  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
+		     UNSPEC_VPADD))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vadd.i64\t%e0, %e1, %f1"
+  [(set_attr "type" "neon_add_q")]
+)
+
+;; NEON does not distinguish between signed and unsigned addition except on
+;; widening operations.
+(define_expand "reduc_uplus_<mode>"
+  [(match_operand:VDQI 0 "s_register_operand" "")
+   (match_operand:VDQI 1 "s_register_operand" "")]
+  "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
+{
+  emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpsmin<mode>);
+  DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+  [(match_operand:VQ 0 "s_register_operand" "")
+   (match_operand:VQ 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
+   && !BYTES_BIG_ENDIAN"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpsmax<mode>);
+  DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+  [(match_operand:VQ 0 "s_register_operand" "")
+   (match_operand:VQ 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
+   && !BYTES_BIG_ENDIAN"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+  [(match_operand:VDI 0 "s_register_operand" "")
+   (match_operand:VDI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpumin<mode>);
+  DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+  [(match_operand:VQI 0 "s_register_operand" "")
+   (match_operand:VQI 1 "s_register_operand" "")]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+  [(match_operand:VDI 0 "s_register_operand" "")
+   (match_operand:VDI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpumax<mode>);
+  DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+  [(match_operand:VQI 0 "s_register_operand" "")
+   (match_operand:VQI 1 "s_register_operand" "")]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_insn "neon_vpadd_internal<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPADD))]
+  "TARGET_NEON"
+  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vadd.
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_reduc_add_s<q>")
+                    (const_string "neon_reduc_add<q>")))]
+)
+
+(define_insn "neon_vpsmin<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPSMIN))]
+  "TARGET_NEON"
+  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_reduc_minmax_s<q>")
+                    (const_string "neon_reduc_minmax<q>")))]
+)
+
+(define_insn "neon_vpsmax<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPSMAX))]
+  "TARGET_NEON"
+  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_reduc_minmax_s<q>")
+                    (const_string "neon_reduc_minmax<q>")))]
+)
+
+(define_insn "neon_vpumin<mode>"
+  [(set (match_operand:VDI 0 "s_register_operand" "=w")
+	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+		     (match_operand:VDI 2 "s_register_operand" "w")]
+                   UNSPEC_VPUMIN))]
+  "TARGET_NEON"
+  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_reduc_minmax<q>")]
+)
+
+(define_insn "neon_vpumax<mode>"
+  [(set (match_operand:VDI 0 "s_register_operand" "=w")
+	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+		     (match_operand:VDI 2 "s_register_operand" "w")]
+                   UNSPEC_VPUMAX))]
+  "TARGET_NEON"
+  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_reduc_minmax<q>")]
+)
+
+;; Saturating arithmetic
+
+; NOTE: Neon supports many more saturating variants of instructions than the
+; following, but these are all GCC currently understands.
+; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
+; yet either, although these patterns may be used by intrinsics when they're
+; added.
+
+(define_insn "*ss_add<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+                   (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_qadd<q>")]
+)
+
+(define_insn "*us_add<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+                   (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_qadd<q>")]
+)
+
+(define_insn "*ss_sub<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+                    (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_qsub<q>")]
+)
+
+(define_insn "*us_sub<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+                    (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "type" "neon_qsub<q>")]
+)
+
+;; Conditional instructions.  These are comparisons with conditional moves for
+;; vectors.  They perform the assignment:
+;;   
+;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
+;;
+;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
+;; element-wise.
+
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "")
+	(if_then_else:VDQW
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VDQW 4 "s_register_operand" "")
+	     (match_operand:VDQW 5 "nonmemory_operand" "")])
+	  (match_operand:VDQW 1 "s_register_operand" "")
+	  (match_operand:VDQW 2 "s_register_operand" "")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
+			     ? 3 : 1;
+  rtx magic_rtx = GEN_INT (magic_word);
+  int inverse = 0;
+  int use_zero_form = 0;
+  int swap_bsl_operands = 0;
+  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
+
+  rtx (*base_comparison) (rtx, rtx, rtx, rtx);
+  rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case GE:
+    case GT:
+    case LE:
+    case LT:
+    case EQ:
+      if (operands[5] == CONST0_RTX (<MODE>mode))
+	{
+	  use_zero_form = 1;
+	  break;
+	}
+      /* Fall through.  */
+    default:
+      if (!REG_P (operands[5]))
+	operands[5] = force_reg (<MODE>mode, operands[5]);
+    }
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case UNLT:
+      inverse = 1;
+      /* Fall through.  */
+    case GE:
+    case UNGE:
+    case ORDERED:
+    case UNORDERED:
+      base_comparison = gen_neon_vcge<mode>;
+      complimentary_comparison = gen_neon_vcgt<mode>;
+      break;
+    case LE:
+    case UNLE:
+      inverse = 1;
+      /* Fall through.  */
+    case GT:
+    case UNGT:
+      base_comparison = gen_neon_vcgt<mode>;
+      complimentary_comparison = gen_neon_vcge<mode>;
+      break;
+    case EQ:
+    case NE:
+    case UNEQ:
+      base_comparison = gen_neon_vceq<mode>;
+      complimentary_comparison = gen_neon_vceq<mode>;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case LE:
+    case GT:
+    case GE:
+    case EQ:
+      /* The easy case.  Here we emit one of vcge, vcgt or vceq.
+	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
+	 a GE b -> a GE b
+	 a GT b -> a GT b
+	 a LE b -> b GE a
+	 a LT b -> b GT a
+	 a EQ b -> a EQ b
+	 Note that there also exist direct comparison against 0 forms,
+	 so catch those as a special case.  */
+      if (use_zero_form)
+	{
+	  inverse = 0;
+	  switch (GET_CODE (operands[3]))
+	    {
+	    case LT:
+	      base_comparison = gen_neon_vclt<mode>;
+	      break;
+	    case LE:
+	      base_comparison = gen_neon_vcle<mode>;
+	      break;
+	    default:
+	      /* Do nothing, other zero form cases already have the correct
+		 base_comparison.  */
+	      break;
+	    }
+	}
+
+      if (!inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
+      break;
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+    case NE:
+      /* Vector compare returns false for lanes which are unordered, so if we use
+	 the inverse of the comparison we actually want to emit, then
+	 swap the operands to BSL, we will end up with the correct result.
+	 Note that a NE NaN and NaN NE b are true for all a, b.
+
+	 Our transformations are:
+	 a GE b -> !(b GT a)
+	 a GT b -> !(b GE a)
+	 a LE b -> !(a GT b)
+	 a LT b -> !(a GE b)
+	 a NE b -> !(a EQ b)  */
+
+      if (inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
+
+      swap_bsl_operands = 1;
+      break;
+    case UNEQ:
+      /* We check (a > b ||  b > a).  combining these comparisons give us
+	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
+	 will then give us (a == b ||  a UNORDERED b) as intended.  */
+
+      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
+      emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
+      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+      swap_bsl_operands = 1;
+      break;
+    case UNORDERED:
+       /* Operands are ORDERED iff (a > b || b >= a).
+	 Swapping the operands to BSL will give the UNORDERED case.  */
+     swap_bsl_operands = 1;
+     /* Fall through.  */
+    case ORDERED:
+      emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
+      emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
+      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (swap_bsl_operands)
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
+				    operands[1]));
+  else
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
+				    operands[2]));
+  DONE;
+})
+
+(define_expand "vcondu<mode><mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(if_then_else:VDQIW
+	  (match_operator 3 "arm_comparison_operator"
+	    [(match_operand:VDQIW 4 "s_register_operand" "")
+	     (match_operand:VDQIW 5 "s_register_operand" "")])
+	  (match_operand:VDQIW 1 "s_register_operand" "")
+	  (match_operand:VDQIW 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+{
+  rtx mask;
+  int inverse = 0, immediate_zero = 0;
+  
+  mask = gen_reg_rtx (<V_cmp_result>mode);
+  
+  if (operands[5] == CONST0_RTX (<MODE>mode))
+    immediate_zero = 1;
+  else if (!REG_P (operands[5]))
+    operands[5] = force_reg (<MODE>mode, operands[5]);
+  
+  switch (GET_CODE (operands[3]))
+    {
+    case GEU:
+      emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      break;
+    
+    case GTU:
+      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      break;
+    
+    case EQ:
+      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      break;
+    
+    case LEU:
+      if (immediate_zero)
+	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
+					const0_rtx));
+      else
+	emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
+					const0_rtx));
+      break;
+    
+    case LTU:
+      if (immediate_zero)
+        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
+					const0_rtx));
+      else
+	emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
+					const0_rtx));
+      break;
+    
+    case NE:
+      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      inverse = 1;
+      break;
+    
+    default:
+      gcc_unreachable ();
+    }
+  
+  if (inverse)
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
+				    operands[1]));
+  else
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
+				    operands[2]));
+
+  DONE;
+})
+
+;; Patterns for builtins.
+
+; good for plain vadd, vaddq.
+
+(define_expand "neon_vadd<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "=w")
+   (match_operand:VDQX 1 "s_register_operand" "w")
+   (match_operand:VDQX 2 "s_register_operand" "w")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
+					   operands[2]));
+  DONE;
+})
+
+; Note that NEON operations don't support the full IEEE 754 standard: in
+; particular, denormal values are flushed to zero.  This means that GCC cannot
+; use those instructions for autovectorization, etc. unless
+; -funsafe-math-optimizations is in effect (in which case flush-to-zero
+; behaviour is permissible).  Intrinsic operations (provided by the arm_neon.h
+; header) must work in either case: if -funsafe-math-optimizations is given,
+; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
+; expand to unspecs (which may potentially limit the extent to which they might
+; be optimized by generic code).
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vadd<mode>_unspec"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
+		      (match_operand:VDQX 2 "s_register_operand" "w")]
+                     UNSPEC_VADD))]
+  "TARGET_NEON"
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_addsub_s<q>")
+                    (const_string "neon_add<q>")))]
+)
+
+; operand 3 represents in bits:
+;  bit 0: signed (vs unsigned).
+;  bit 1: rounding (vs none).
+
+(define_insn "neon_vaddl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VADDL))]
+  "TARGET_NEON"
+  "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "type" "neon_add_long")]
+)
+
+(define_insn "neon_vaddw<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VADDW))]
+  "TARGET_NEON"
+  "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+; vhadd and vrhadd.
+
+(define_insn "neon_vhadd<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:VDQIW 2 "s_register_operand" "w")
+		       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VHADD))]
+  "TARGET_NEON"
+  "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_add_halve_q")]
+)
+
+(define_insn "neon_vqadd<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VQADD))]
+  "TARGET_NEON"
+  "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_qadd<q>")]
+)
+
+(define_insn "neon_vaddhn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+		            (match_operand:VN 2 "s_register_operand" "w")
+                            (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VADDHN))]
+  "TARGET_NEON"
+  "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
+  [(set_attr "type" "neon_add_halve_narrow_q")]
+)
+
+;; We cannot replace this unspec with mul<mode>3 because of the odd 
+;; polynomial multiplication case that can specified by operand 3.
+(define_insn "neon_vmul<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+		     UNSPEC_VMUL))]
+  "TARGET_NEON"
+  "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_mul_s<q>")
+                    (const_string "neon_mul_<V_elem_ch><q>")))]
+)
+
+(define_expand "neon_vmla<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "=w")
+   (match_operand:VDQW 1 "s_register_operand" "0")
+   (match_operand:VDQW 2 "s_register_operand" "w")
+   (match_operand:VDQW 3 "s_register_operand" "w")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
+				             operands[2], operands[3]));
+  else
+    emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
+					   operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vfma<VCVTF:mode>"
+  [(match_operand:VCVTF 0 "s_register_operand")
+   (match_operand:VCVTF 1 "s_register_operand")
+   (match_operand:VCVTF 2 "s_register_operand")
+   (match_operand:VCVTF 3 "s_register_operand")
+   (match_operand:SI 4 "immediate_operand")]
+  "TARGET_NEON && TARGET_FMA"
+{
+  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
+				       operands[1]));
+  DONE;
+})
+
+(define_expand "neon_vfms<VCVTF:mode>"
+  [(match_operand:VCVTF 0 "s_register_operand")
+   (match_operand:VCVTF 1 "s_register_operand")
+   (match_operand:VCVTF 2 "s_register_operand")
+   (match_operand:VCVTF 3 "s_register_operand")
+   (match_operand:SI 4 "immediate_operand")]
+  "TARGET_NEON && TARGET_FMA"
+{
+  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
+					 operands[1]));
+  DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmla<mode>_unspec"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:VDQW 3 "s_register_operand" "w")]
+		    UNSPEC_VMLA))]
+  "TARGET_NEON"
+  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_mla_s<q>")
+                    (const_string "neon_mla_<V_elem_ch><q>")))]
+)
+
+(define_insn "neon_vmlal<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VW 2 "s_register_operand" "w")
+		           (match_operand:VW 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VMLAL))]
+  "TARGET_NEON"
+  "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
+)
+
+(define_expand "neon_vmls<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "=w")
+   (match_operand:VDQW 1 "s_register_operand" "0")
+   (match_operand:VDQW 2 "s_register_operand" "w")
+   (match_operand:VDQW 3 "s_register_operand" "w")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
+		 operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
+					   operands[2], operands[3]));
+  DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmls<mode>_unspec"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:VDQW 3 "s_register_operand" "w")]
+		    UNSPEC_VMLS))]
+  "TARGET_NEON"
+  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_mla_s<q>")
+                    (const_string "neon_mla_<V_elem_ch><q>")))]
+)
+
+(define_insn "neon_vmlsl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VW 2 "s_register_operand" "w")
+		           (match_operand:VW 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VMLSL))]
+  "TARGET_NEON"
+  "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
+)
+
+(define_insn "neon_vqdmulh<mode>"
+  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
+        (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
+		       (match_operand:VMDQI 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQDMULH))]
+  "TARGET_NEON"
+  "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
+)
+
+(define_insn "neon_vqdmlal<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VMDI 2 "s_register_operand" "w")
+		           (match_operand:VMDI 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VQDMLAL))]
+  "TARGET_NEON"
+  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
+  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
+)
+
+(define_insn "neon_vqdmlsl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VMDI 2 "s_register_operand" "w")
+		           (match_operand:VMDI 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VQDMLSL))]
+  "TARGET_NEON"
+  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
+  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
+)
+
+(define_insn "neon_vmull<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+		           (match_operand:VW 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VMULL))]
+  "TARGET_NEON"
+  "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
+)
+
+(define_insn "neon_vqdmull<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
+		           (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VQDMULL))]
+  "TARGET_NEON"
+  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
+  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
+)
+
+(define_expand "neon_vsub<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "=w")
+   (match_operand:VDQX 1 "s_register_operand" "w")
+   (match_operand:VDQX 2 "s_register_operand" "w")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
+					   operands[2]));
+  DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vsub<mode>_unspec"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
+		      (match_operand:VDQX 2 "s_register_operand" "w")]
+                     UNSPEC_VSUB))]
+  "TARGET_NEON"
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_addsub_s<q>")
+                    (const_string "neon_sub<q>")))]
+)
+
+(define_insn "neon_vsubl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VSUBL))]
+  "TARGET_NEON"
+  "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "type" "neon_sub_long")]
+)
+
+(define_insn "neon_vsubw<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VSUBW))]
+  "TARGET_NEON"
+  "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
+  [(set_attr "type" "neon_sub_widen")]
+)
+
+(define_insn "neon_vqsub<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VQSUB))]
+  "TARGET_NEON"
+  "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_qsub<q>")]
+)
+
+(define_insn "neon_vhsub<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:VDQIW 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VHSUB))]
+  "TARGET_NEON"
+  "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_sub_halve<q>")]
+)
+
+(define_insn "neon_vsubhn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+		            (match_operand:VN 2 "s_register_operand" "w")
+                            (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VSUBHN))]
+  "TARGET_NEON"
+  "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
+  [(set_attr "type" "neon_sub_halve_narrow_q")]
+)
+
+(define_insn "neon_vceq<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
+	   (match_operand:SI 3 "immediate_operand" "i,i")]
+          UNSPEC_VCEQ))]
+  "TARGET_NEON"
+  "@
+  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_compare_s<q>")
+                    (if_then_else (match_operand 2 "zero_operand")
+                      (const_string "neon_compare_zero<q>")
+                      (const_string "neon_compare<q>"))))]
+)
+
+(define_insn "neon_vcge<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
+	   (match_operand:SI 3 "immediate_operand" "i,i")]
+          UNSPEC_VCGE))]
+  "TARGET_NEON"
+  "@
+  vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+  vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_compare_s<q>")
+                    (if_then_else (match_operand 2 "zero_operand")
+                      (const_string "neon_compare_zero<q>")
+                      (const_string "neon_compare<q>"))))]
+)
+
+(define_insn "neon_vcgeu<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQIW 1 "s_register_operand" "w")
+	   (match_operand:VDQIW 2 "s_register_operand" "w")
+           (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCGEU))]
+  "TARGET_NEON"
+  "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_compare<q>")]
+)
+
+(define_insn "neon_vcgt<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+	   (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
+           (match_operand:SI 3 "immediate_operand" "i,i")]
+          UNSPEC_VCGT))]
+  "TARGET_NEON"
+  "@
+  vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+  vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_compare_s<q>")
+                    (if_then_else (match_operand 2 "zero_operand")
+                      (const_string "neon_compare_zero<q>")
+                      (const_string "neon_compare<q>"))))]
+)
+
+(define_insn "neon_vcgtu<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQIW 1 "s_register_operand" "w")
+	   (match_operand:VDQIW 2 "s_register_operand" "w")
+           (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCGTU))]
+  "TARGET_NEON"
+  "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_compare<q>")]
+)
+
+;; VCLE and VCLT only support comparisons with immediate zero (register
+;; variants are VCGE and VCGT with operands reversed).
+
+(define_insn "neon_vcle<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w")
+	   (match_operand:VDQW 2 "zero_operand" "Dz")
+	   (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCLE))]
+  "TARGET_NEON"
+  "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_compare_s<q>")
+                    (if_then_else (match_operand 2 "zero_operand")
+                      (const_string "neon_compare_zero<q>")
+                      (const_string "neon_compare<q>"))))]
+)
+
+(define_insn "neon_vclt<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w")
+	   (match_operand:VDQW 2 "zero_operand" "Dz")
+	   (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCLT))]
+  "TARGET_NEON"
+  "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "type")
+      (if_then_else (match_test "<Is_float_mode>")
+                    (const_string "neon_fp_compare_s<q>")
+                    (if_then_else (match_operand 2 "zero_operand")
+                      (const_string "neon_compare_zero<q>")
+                      (const_string "neon_compare<q>"))))]
+)
+
+(define_insn "neon_vcage<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
+		                (match_operand:VCVTF 2 "s_register_operand" "w")
+                                (match_operand:SI 3 "immediate_operand" "i")]
+                               UNSPEC_VCAGE))]
+  "TARGET_NEON"
+  "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_compare_s<q>")]
+)
+
+(define_insn "neon_vcagt<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
+		                (match_operand:VCVTF 2 "s_register_operand" "w")
+                                (match_operand:SI 3 "immediate_operand" "i")]
+                               UNSPEC_VCAGT))]
+  "TARGET_NEON"
+  "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_compare_s<q>")]
+)
+
+(define_insn "neon_vtst<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:VDQIW 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VTST))]
+  "TARGET_NEON"
+  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_tst<q>")]
+)
+
+(define_insn "neon_vabd<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+		     UNSPEC_VABD))]
+  "TARGET_NEON"
+  "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_abd_s<q>")
+                   (const_string "neon_abd<q>")))]
+)
+
+(define_insn "neon_vabdl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+		           (match_operand:VW 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VABDL))]
+  "TARGET_NEON"
+  "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "type" "neon_abd_long")]
+)
+
+(define_insn "neon_vaba<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
+		                   (match_operand:VDQIW 3 "s_register_operand" "w")
+                                   (match_operand:SI 4 "immediate_operand" "i")]
+		                  UNSPEC_VABD)
+		    (match_operand:VDQIW 1 "s_register_operand" "0")))]
+  "TARGET_NEON"
+  "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "neon_vabal<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
+                                           (match_operand:VW 3 "s_register_operand" "w")
+                                           (match_operand:SI 4 "immediate_operand" "i")]
+					   UNSPEC_VABDL)
+			 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
+  "TARGET_NEON"
+  "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "neon_vmax<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VMAX))]
+  "TARGET_NEON"
+  "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+    (if_then_else (match_test "<Is_float_mode>")
+                  (const_string "neon_fp_minmax_s<q>")
+                  (const_string "neon_minmax<q>")))]
+)
+
+(define_insn "neon_vmin<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VMIN))]
+  "TARGET_NEON"
+  "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+    (if_then_else (match_test "<Is_float_mode>")
+                  (const_string "neon_fp_minmax_s<q>")
+                  (const_string "neon_minmax<q>")))]
+)
+
+(define_expand "neon_vpadd<mode>"
+  [(match_operand:VD 0 "s_register_operand" "=w")
+   (match_operand:VD 1 "s_register_operand" "w")
+   (match_operand:VD 2 "s_register_operand" "w")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
+					    operands[2]));
+  DONE;
+})
+
+(define_insn "neon_vpaddl<mode>"
+  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
+        (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
+                                  (match_operand:SI 2 "immediate_operand" "i")]
+                                 UNSPEC_VPADDL))]
+  "TARGET_NEON"
+  "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_reduc_add_long")]
+)
+
+(define_insn "neon_vpadal<mode>"
+  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
+        (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
+                                  (match_operand:VDQIW 2 "s_register_operand" "w")
+                                  (match_operand:SI 3 "immediate_operand" "i")]
+                                 UNSPEC_VPADAL))]
+  "TARGET_NEON"
+  "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set_attr "type" "neon_reduc_add_acc")]
+)
+
+(define_insn "neon_vpmax<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")
+                    (match_operand:SI 3 "immediate_operand" "i")]
+                   UNSPEC_VPMAX))]
+  "TARGET_NEON"
+  "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+    (if_then_else (match_test "<Is_float_mode>")
+                  (const_string "neon_fp_reduc_minmax_s<q>")
+                  (const_string "neon_reduc_minmax<q>")))]
+)
+
+(define_insn "neon_vpmin<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")
+                    (match_operand:SI 3 "immediate_operand" "i")]
+                   UNSPEC_VPMIN))]
+  "TARGET_NEON"
+  "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "type")
+    (if_then_else (match_test "<Is_float_mode>")
+                  (const_string "neon_fp_reduc_minmax_s<q>")
+                  (const_string "neon_reduc_minmax<q>")))]
+)
+
+(define_insn "neon_vrecps<mode>"
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		       (match_operand:VCVTF 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VRECPS))]
+  "TARGET_NEON"
+  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_recps_s<q>")]
+)
+
+(define_insn "neon_vrsqrts<mode>"
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		       (match_operand:VCVTF 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VRSQRTS))]
+  "TARGET_NEON"
+  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
+)
+
+(define_expand "neon_vabs<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "")
+   (match_operand:VDQW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vqabs<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+		      UNSPEC_VQABS))]
+  "TARGET_NEON"
+  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_qabs<q>")]
+)
+
+(define_expand "neon_vneg<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "")
+   (match_operand:VDQW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vqneg<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+		      UNSPEC_VQNEG))]
+  "TARGET_NEON"
+  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_qneg<q>")]
+)
+
+(define_insn "neon_vcls<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+		      UNSPEC_VCLS))]
+  "TARGET_NEON"
+  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_cls<q>")]
+)
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_cnt<q>")]
+)
+
+(define_expand "neon_vclz<mode>"
+  [(match_operand:VDQIW 0 "s_register_operand" "")
+   (match_operand:VDQIW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:VE 0 "s_register_operand" "=w")
+        (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_cnt<q>")]
+)
+
+(define_expand "neon_vcnt<mode>"
+  [(match_operand:VE 0 "s_register_operand" "=w")
+   (match_operand:VE 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vrecpe<mode>"
+  [(set (match_operand:V32 0 "s_register_operand" "=w")
+	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
+                     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_VRECPE))]
+  "TARGET_NEON"
+  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_recpe_s<q>")]
+)
+
+(define_insn "neon_vrsqrte<mode>"
+  [(set (match_operand:V32 0 "s_register_operand" "=w")
+	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
+                     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_VRSQRTE))]
+  "TARGET_NEON"
+  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
+)
+
+(define_expand "neon_vmvn<mode>"
+  [(match_operand:VDQIW 0 "s_register_operand" "")
+   (match_operand:VDQIW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vget_lane<mode>_sext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VD 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_to_gp")]
+)
+
+(define_insn "neon_vget_lane<mode>_zext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(zero_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VD 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_to_gp")]
+)
+
+(define_insn "neon_vget_lane<mode>_sext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VQ 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  rtx ops[3];
+  int regno = REGNO (operands[1]);
+  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  unsigned int elt = INTVAL (operands[2]);
+  unsigned int elt_adj = elt % halfelts;
+
+  if (BYTES_BIG_ENDIAN)
+    elt_adj = halfelts - 1 - elt_adj;
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
+  ops[2] = GEN_INT (elt_adj);
+  output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_to_gp_q")]
+)
+
+(define_insn "neon_vget_lane<mode>_zext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(zero_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VQ 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  rtx ops[3];
+  int regno = REGNO (operands[1]);
+  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  unsigned int elt = INTVAL (operands[2]);
+  unsigned int elt_adj = elt % halfelts;
+
+  if (BYTES_BIG_ENDIAN)
+    elt_adj = halfelts - 1 - elt_adj;
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
+  ops[2] = GEN_INT (elt_adj);
+  output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_to_gp_q")]
+)
+
+(define_expand "neon_vget_lane<mode>"
+  [(match_operand:<V_ext> 0 "s_register_operand" "")
+   (match_operand:VDQW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT magic = INTVAL (operands[3]);
+  rtx insn;
+
+  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      /* The intrinsics are defined in terms of a model where the
+	 element ordering in memory is vldm order, whereas the generic
+	 RTL is defined in terms of a model where the element ordering
+	 in memory is array order.  Convert the lane number to conform
+	 to this model.  */
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+	= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+
+  if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
+    insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
+  else
+    {
+      if ((magic & 1) != 0)
+	insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
+						       operands[2]);
+      else
+	insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
+						       operands[2]);
+    }
+  emit_insn (insn);
+  DONE;
+})
+
+; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
+; elements.
+
+(define_expand "neon_vget_lanedi"
+  [(match_operand:DI 0 "s_register_operand" "=r")
+   (match_operand:DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vget_lanev2di"
+  [(match_operand:DI 0 "s_register_operand" "")
+   (match_operand:V2DI 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
+      break;
+    case 1:
+      emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
+      break;
+    default:
+      neon_lane_bounds (operands[2], 0, 1);
+      FAIL;
+    }
+  DONE;
+})
+
+(define_expand "neon_vset_lane<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "=w")
+   (match_operand:<V_elem> 1 "s_register_operand" "r")
+   (match_operand:VDQ 2 "s_register_operand" "0")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  unsigned int elt = INTVAL (operands[3]);
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int reg_nelts
+	= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
+      elt ^= reg_nelts - 1;
+    }
+
+  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
+                                         GEN_INT (1 << elt), operands[2]));
+  DONE;
+})
+
+; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
+
+(define_expand "neon_vset_lanedi"
+  [(match_operand:DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "r")
+   (match_operand:DI 2 "s_register_operand" "0")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vcreate<mode>"
+  [(match_operand:VDX 0 "s_register_operand" "")
+   (match_operand:DI 1 "general_operand" "")]
+  "TARGET_NEON"
+{
+  rtx src = gen_lowpart (<MODE>mode, operands[1]);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_insn "neon_vdup_n<mode>"
+  [(set (match_operand:VX 0 "s_register_operand" "=w")
+        (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
+  "TARGET_NEON"
+  "vdup.<V_sz_elem>\t%<V_reg>0, %1"
+  [(set_attr "type" "neon_from_gp<q>")]
+)
+
+(define_insn "neon_vdup_n<mode>"
+  [(set (match_operand:V32 0 "s_register_operand" "=w,w")
+        (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
+  "TARGET_NEON"
+  "@
+  vdup.<V_sz_elem>\t%<V_reg>0, %1
+  vdup.<V_sz_elem>\t%<V_reg>0, %y1"
+  [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
+)
+
+(define_expand "neon_vdup_ndi"
+  [(match_operand:DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "r")]
+  "TARGET_NEON"
+{
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+}
+)
+
+(define_insn "neon_vdup_nv2di"
+  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
+        (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
+  "TARGET_NEON"
+  "@
+  vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
+  vmov\t%e0, %P1\;vmov\t%f0, %P1"
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "neon_vdup_lane<mode>_internal"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+  	(vec_duplicate:VDQW 
+          (vec_select:<V_elem>
+            (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  if (<Is_d_reg>)
+    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+  else
+    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_dup<q>")]
+)
+
+(define_expand "neon_vdup_lane<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "=w")
+   (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+	= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+    emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+                                                  operands[2]));
+    DONE;
+})
+
+; Scalar index is ignored, since only zero is valid here.
+(define_expand "neon_vdup_lanedi"
+  [(match_operand:DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+; Likewise for v2di, as the DImode second operand has only a single element.
+(define_expand "neon_vdup_lanev2di"
+  [(match_operand:V2DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 1);
+  emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
+  DONE;
+})
+
+; Disabled before reload because we don't want combine doing something silly,
+; but used by the post-reload expansion of neon_vcombine.
+(define_insn "*neon_vswp<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "+w")
+	(match_operand:VDQX 1 "s_register_operand" "+w"))
+   (set (match_dup 1) (match_dup 0))]
+  "TARGET_NEON && reload_completed"
+  "vswp\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_permute<q>")]
+)
+
+;; In this insn, operand 1 should be low, and operand 2 the high part of the
+;; dest vector.
+;; FIXME: A different implementation of this builtin could make it much
+;; more likely that we wouldn't actually need to output anything (we could make
+;; it so that the reg allocator puts things in the right places magically
+;; instead). Lack of subregs for vectors makes that tricky though, I think.
+
+(define_insn_and_split "neon_vcombine<mode>"
+  [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
+        (vec_concat:<V_DOUBLE>
+	  (match_operand:VDX 1 "s_register_operand" "w")
+	  (match_operand:VDX 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  neon_split_vcombine (operands);
+  DONE;
+}
+[(set_attr "type" "multiple")]
+)
+
+(define_expand "neon_vget_high<mode>"
+  [(match_operand:<V_HALF> 0 "s_register_operand")
+   (match_operand:VQX 1 "s_register_operand")]
+  "TARGET_NEON"
+{
+  emit_move_insn (operands[0],
+		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)));
+  DONE;
+})
+
+(define_expand "neon_vget_low<mode>"
+  [(match_operand:<V_HALF> 0 "s_register_operand")
+   (match_operand:VQX 1 "s_register_operand")]
+  "TARGET_NEON"
+{
+  emit_move_insn (operands[0],
+		  simplify_gen_subreg (<V_HALF>mode, operands[1],
+				       <MODE>mode, 0));
+  DONE;
+})
+
+(define_insn "float<mode><V_cvtto>2"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+        (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
+  "TARGET_NEON && !flag_rounding_math"
+  "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
+)
+
+(define_insn "floatuns<mode><V_cvtto>2"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+        (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 
+  "TARGET_NEON && !flag_rounding_math"
+  "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
+)
+
+(define_insn "fix_trunc<mode><V_cvtto>2"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+        (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
+)
+
+(define_insn "fixuns_trunc<mode><V_cvtto>2"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+        (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
+)
+
+(define_insn "neon_vcvt<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON"
+  "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
+)
+
+(define_insn "neon_vcvt<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON"
+  "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
+)
+
+(define_insn "neon_vcvtv4sfv4hf"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+	(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON && TARGET_FP16"
+  "vcvt.f32.f16\t%q0, %P1"
+  [(set_attr "type" "neon_fp_cvt_widen_h")]
+)
+
+(define_insn "neon_vcvtv4hfv4sf"
+  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
+	(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON && TARGET_FP16"
+  "vcvt.f16.f32\t%P0, %q1"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "neon_vcvt_n<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VCVT_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, 33);
+  return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
+)
+
+(define_insn "neon_vcvt_n<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VCVT_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, 33);
+  return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
+)
+
+(define_insn "neon_vmovn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+                           UNSPEC_VMOVN))]
+  "TARGET_NEON"
+  "vmovn.<V_if_elem>\t%P0, %q1"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "neon_vqmovn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+                           UNSPEC_VQMOVN))]
+  "TARGET_NEON"
+  "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "neon_vqmovun<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+                           UNSPEC_VQMOVUN))]
+  "TARGET_NEON"
+  "vqmovun.<V_s_elem>\t%P0, %q1"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "neon_vmovl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+                          UNSPEC_VMOVL))]
+  "TARGET_NEON"
+  "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "neon_vmul_lane<mode>"
+  [(set (match_operand:VMD 0 "s_register_operand" "=w")
+	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
+		     (match_operand:VMD 2 "s_register_operand"
+                                        "<scalar_mul_constraint>")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+                    UNSPEC_VMUL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
+}
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_mul_s_scalar<q>")
+                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
+)
+
+(define_insn "neon_vmul_lane<mode>"
+  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
+	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
+		     (match_operand:<V_HALF> 2 "s_register_operand"
+                                             "<scalar_mul_constraint>")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+                    UNSPEC_VMUL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
+  return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
+}
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_mul_s_scalar<q>")
+                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
+)
+
+(define_insn "neon_vmull_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
+		           (match_operand:VMDI 2 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 3 "immediate_operand" "i")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VMULL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
+}
+  [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
+)
+
+(define_insn "neon_vqdmull_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
+		           (match_operand:VMDI 2 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 3 "immediate_operand" "i")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VQDMULL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
+}
+  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
+)
+
+(define_insn "neon_vqdmulh_lane<mode>"
+  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
+	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
+		      (match_operand:<V_HALF> 2 "s_register_operand"
+					      "<scalar_mul_constraint>")
+                      (match_operand:SI 3 "immediate_operand" "i")
+                      (match_operand:SI 4 "immediate_operand" "i")]
+                      UNSPEC_VQDMULH_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
+}
+  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
+)
+
+(define_insn "neon_vqdmulh_lane<mode>"
+  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
+	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
+		      (match_operand:VMDI 2 "s_register_operand"
+					  "<scalar_mul_constraint>")
+                      (match_operand:SI 3 "immediate_operand" "i")
+                      (match_operand:SI 4 "immediate_operand" "i")]
+                      UNSPEC_VQDMULH_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
+}
+  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
+)
+
+(define_insn "neon_vmla_lane<mode>"
+  [(set (match_operand:VMD 0 "s_register_operand" "=w")
+	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
+		     (match_operand:VMD 2 "s_register_operand" "w")
+                     (match_operand:VMD 3 "s_register_operand"
+					"<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                     UNSPEC_VMLA_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
+}
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_mla_s_scalar<q>")
+                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
+)
+
+(define_insn "neon_vmla_lane<mode>"
+  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
+	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
+		     (match_operand:VMQ 2 "s_register_operand" "w")
+                     (match_operand:<V_HALF> 3 "s_register_operand"
+					     "<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                     UNSPEC_VMLA_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
+}
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_mla_s_scalar<q>")
+                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
+)
+
+(define_insn "neon_vmlal_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VMLAL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
+)
+
+(define_insn "neon_vqdmlal_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VQDMLAL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
+)
+
+(define_insn "neon_vmls_lane<mode>"
+  [(set (match_operand:VMD 0 "s_register_operand" "=w")
+	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
+		     (match_operand:VMD 2 "s_register_operand" "w")
+                     (match_operand:VMD 3 "s_register_operand"
+					"<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                    UNSPEC_VMLS_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
+}
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_mla_s_scalar<q>")
+                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
+)
+
+(define_insn "neon_vmls_lane<mode>"
+  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
+	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
+		     (match_operand:VMQ 2 "s_register_operand" "w")
+                     (match_operand:<V_HALF> 3 "s_register_operand"
+					     "<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                    UNSPEC_VMLS_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
+}
+  [(set (attr "type")
+     (if_then_else (match_test "<Is_float_mode>")
+                   (const_string "neon_fp_mla_s_scalar<q>")
+                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
+)
+
+(define_insn "neon_vmlsl_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VMLSL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
+)
+
+(define_insn "neon_vqdmlsl_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VQDMLSL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
+)
+
+; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
+; core register into a temp register, then use a scalar taken from that. This
+; isn't an optimal solution if e.g. the scalar has just been read from memory
+; or extracted from another vector. The latter case it's currently better to
+; use the "_lane" variant, and the former case can probably be implemented
+; using vld1_lane, but that hasn't been done yet.
+
+(define_expand "neon_vmul_n<mode>"
+  [(match_operand:VMD 0 "s_register_operand" "")
+   (match_operand:VMD 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
+				       const0_rtx, const0_rtx));
+  DONE;
+})
+
+(define_expand "neon_vmul_n<mode>"
+  [(match_operand:VMQ 0 "s_register_operand" "")
+   (match_operand:VMQ 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
+				       const0_rtx, const0_rtx));
+  DONE;
+})
+
+(define_expand "neon_vmull_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:VMDI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
+				        const0_rtx, operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vqdmull_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:VMDI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
+				          const0_rtx, const0_rtx));
+  DONE;
+})
+
+(define_expand "neon_vqdmulh_n<mode>"
+  [(match_operand:VMDI 0 "s_register_operand" "")
+   (match_operand:VMDI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
+				          const0_rtx, operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vqdmulh_n<mode>"
+  [(match_operand:VMQI 0 "s_register_operand" "")
+   (match_operand:VMQI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
+				          const0_rtx, operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vmla_n<mode>"
+  [(match_operand:VMD 0 "s_register_operand" "")
+   (match_operand:VMD 1 "s_register_operand" "")
+   (match_operand:VMD 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmla_n<mode>"
+  [(match_operand:VMQ 0 "s_register_operand" "")
+   (match_operand:VMQ 1 "s_register_operand" "")
+   (match_operand:VMQ 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmlal_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
+					tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vqdmlal_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
+					  tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmls_n<mode>"
+  [(match_operand:VMD 0 "s_register_operand" "")
+   (match_operand:VMD 1 "s_register_operand" "")
+   (match_operand:VMD 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmls_n<mode>"
+  [(match_operand:VMQ 0 "s_register_operand" "")
+   (match_operand:VMQ 1 "s_register_operand" "")
+   (match_operand:VMQ 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmlsl_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
+					tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vqdmlsl_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
+					  tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_insn "neon_vext<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
+		      (match_operand:VDQX 2 "s_register_operand" "w")
+                      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VEXT))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
+}
+  [(set_attr "type" "neon_ext<q>")]
+)
+
+(define_insn "neon_vrev64<mode>"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+		     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_VREV64))]
+  "TARGET_NEON"
+  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
+(define_insn "neon_vrev32<mode>"
+  [(set (match_operand:VX 0 "s_register_operand" "=w")
+	(unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_VREV32))]
+  "TARGET_NEON"
+  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
+(define_insn "neon_vrev16<mode>"
+  [(set (match_operand:VE 0 "s_register_operand" "=w")
+	(unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_VREV16))]
+  "TARGET_NEON"
+  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
+; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
+; allocation. For an intrinsic of form:
+;   rD = vbsl_* (rS, rN, rM)
+; We can use any of:
+;   vbsl rS, rN, rM  (if D = S)
+;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
+;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
+
+(define_insn "neon_vbsl<mode>_internal"
+  [(set (match_operand:VDQX 0 "s_register_operand"		 "=w,w,w")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
+		      (match_operand:VDQX 2 "s_register_operand" " w,w,0")
+                      (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
+                     UNSPEC_VBSL))]
+  "TARGET_NEON"
+  "@
+  vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
+  vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
+  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
+  [(set_attr "type" "neon_bsl<q>")]
+)
+
+(define_expand "neon_vbsl<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "")
+        (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
+                      (match_operand:VDQX 2 "s_register_operand" "")
+                      (match_operand:VDQX 3 "s_register_operand" "")]
+                     UNSPEC_VBSL))]
+  "TARGET_NEON"
+{
+  /* We can't alias operands together if they have different modes.  */
+  operands[1] = gen_lowpart (<MODE>mode, operands[1]);
+})
+
+(define_insn "neon_vshl<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSHL))]
+  "TARGET_NEON"
+  "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "neon_vqshl<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQSHL))]
+  "TARGET_NEON"
+  "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
+)
+
+(define_insn "neon_vshr_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSHR_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
+  return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "neon_vshrn_n<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")
+			    (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VSHRN_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
+  return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
+}
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "neon_vqshrn_n<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")
+			    (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VQSHRN_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
+  return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
+}
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "neon_vqshrun_n<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")
+			    (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VQSHRUN_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
+  return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
+}
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "neon_vshl_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSHL_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "type" "neon_shift_imm<q>")]
+)
+
+(define_insn "neon_vqshl_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQSHL_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
+)
+
+(define_insn "neon_vqshlu_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQSHLU_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
+)
+
+(define_insn "neon_vshll_n<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")
+			   (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VSHLL_N))]
+  "TARGET_NEON"
+{
+  /* The boundaries are: 0 < imm <= size.  */
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
+  return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
+}
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "neon_vsra_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")
+                       (match_operand:SI 4 "immediate_operand" "i")]
+                      UNSPEC_VSRA_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
+  return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
+}
+  [(set_attr "type" "neon_shift_acc<q>")]
+)
+
+(define_insn "neon_vsri_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
+        	       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSRI))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
+  return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
+}
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+(define_insn "neon_vsli_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
+        	       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSLI))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
+  return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
+}
+  [(set_attr "type" "neon_shift_reg<q>")]
+)
+
+(define_insn "neon_vtbl1v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+  "vtbl.8\t%P0, {%P1}, %P2"
+  [(set_attr "type" "neon_tbl1")]
+)
+
+(define_insn "neon_vtbl2v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+{
+  rtx ops[4];
+  int tabbase = REGNO (operands[1]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = operands[2];
+  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_tbl2")]
+)
+
+(define_insn "neon_vtbl3v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+{
+  rtx ops[5];
+  int tabbase = REGNO (operands[1]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = operands[2];
+  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_tbl3")]
+)
+
+(define_insn "neon_vtbl4v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+{
+  rtx ops[6];
+  int tabbase = REGNO (operands[1]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
+  ops[5] = operands[2];
+  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_tbl4")]
+)
+
+;; These three are used by the vec_perm infrastructure for V16QImode.
+(define_insn_and_split "neon_vtbl1v16qi"
+  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
+	(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
+		       (match_operand:V16QI 2 "s_register_operand" "w")]
+		      UNSPEC_VTBL))]
+  "TARGET_NEON"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0, op1, op2, part0, part2;
+  unsigned ofs;
+
+  op0 = operands[0];
+  op1 = gen_lowpart (TImode, operands[1]);
+  op2 = operands[2];
+
+  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
+  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
+  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
+  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
+
+  ofs = subreg_highpart_offset (V8QImode, V16QImode);
+  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
+  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
+  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
+  DONE;
+}
+  [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "neon_vtbl2v16qi"
+  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
+	(unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
+		       (match_operand:V16QI 2 "s_register_operand" "w")]
+		      UNSPEC_VTBL))]
+  "TARGET_NEON"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0, op1, op2, part0, part2;
+  unsigned ofs;
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = operands[2];
+
+  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
+  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
+  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
+  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
+
+  ofs = subreg_highpart_offset (V8QImode, V16QImode);
+  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
+  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
+  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
+  DONE;
+}
+  [(set_attr "type" "multiple")]
+)
+
+;; ??? Logically we should extend the regular neon_vcombine pattern to
+;; handle quad-word input modes, producing octa-word output modes.  But
+;; that requires us to add support for octa-word vector modes in moves.
+;; That seems overkill for this one use in vec_perm.
+(define_insn_and_split "neon_vcombinev16qi"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+	(unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
+		    (match_operand:V16QI 2 "s_register_operand" "w")]
+		   UNSPEC_VCONCAT))]
+  "TARGET_NEON"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  neon_split_vcombine (operands);
+  DONE;
+}
+[(set_attr "type" "multiple")]
+)
+
+(define_insn "neon_vtbx1v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:V8QI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+  "vtbx.8\t%P0, {%P2}, %P3"
+  [(set_attr "type" "neon_tbl1")]
+)
+
+(define_insn "neon_vtbx2v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:TI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+{
+  rtx ops[4];
+  int tabbase = REGNO (operands[2]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = operands[3];
+  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_tbl2")]
+)
+
+(define_insn "neon_vtbx3v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:EI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+{
+  rtx ops[5];
+  int tabbase = REGNO (operands[2]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = operands[3];
+  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_tbl3")]
+)
+
+(define_insn "neon_vtbx4v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:OI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+{
+  rtx ops[6];
+  int tabbase = REGNO (operands[2]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
+  ops[5] = operands[3];
+  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
+
+  return "";
+}
+  [(set_attr "type" "neon_tbl4")]
+)
+
+(define_expand "neon_vtrn<mode>_internal"
+  [(parallel
+    [(set (match_operand:VDQW 0 "s_register_operand" "")
+	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
+			(match_operand:VDQW 2 "s_register_operand" "")]
+	   UNSPEC_VTRN1))
+     (set (match_operand:VDQW 3 "s_register_operand" "")
+          (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
+  "TARGET_NEON"
+  ""
+)
+
+;; Note: Different operand numbering to handle tied registers correctly.
+(define_insn "*neon_vtrn<mode>_insn"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 3 "s_register_operand" "2")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 2 "s_register_operand" "=w")
+         (unspec:VDQW [(match_dup 1) (match_dup 3)]
+                     UNSPEC_VTRN2))]
+  "TARGET_NEON"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set_attr "type" "neon_permute<q>")]
+)
+
+(define_expand "neon_vtrn<mode>"
+  [(match_operand:SI 0 "s_register_operand" "r")
+   (match_operand:VDQW 1 "s_register_operand" "w")
+   (match_operand:VDQW 2 "s_register_operand" "w")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vzip<mode>_internal"
+  [(parallel
+    [(set (match_operand:VDQW 0 "s_register_operand" "")
+	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
+	  	        (match_operand:VDQW 2 "s_register_operand" "")]
+		       UNSPEC_VZIP1))
+    (set (match_operand:VDQW 3 "s_register_operand" "")
+	 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
+  "TARGET_NEON"
+  ""
+)
+
+;; Note: Different operand numbering to handle tied registers correctly.
+(define_insn "*neon_vzip<mode>_insn"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 3 "s_register_operand" "2")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 2 "s_register_operand" "=w")
+        (unspec:VDQW [(match_dup 1) (match_dup 3)]
+                     UNSPEC_VZIP2))]
+  "TARGET_NEON"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set_attr "type" "neon_zip<q>")]
+)
+
+(define_expand "neon_vzip<mode>"
+  [(match_operand:SI 0 "s_register_operand" "r")
+   (match_operand:VDQW 1 "s_register_operand" "w")
+   (match_operand:VDQW 2 "s_register_operand" "w")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vuzp<mode>_internal"
+  [(parallel
+    [(set (match_operand:VDQW 0 "s_register_operand" "")
+	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
+			(match_operand:VDQW 2 "s_register_operand" "")]
+	   UNSPEC_VUZP1))
+     (set (match_operand:VDQW 3 "s_register_operand" "")
+	  (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
+  "TARGET_NEON"
+  ""
+)
+
+;; Note: Different operand numbering to handle tied registers correctly.
+(define_insn "*neon_vuzp<mode>_insn"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 3 "s_register_operand" "2")]
+                     UNSPEC_VUZP1))
+   (set (match_operand:VDQW 2 "s_register_operand" "=w")
+        (unspec:VDQW [(match_dup 1) (match_dup 3)]
+                     UNSPEC_VUZP2))]
+  "TARGET_NEON"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  [(set_attr "type" "neon_zip<q>")]
+)
+
+(define_expand "neon_vuzp<mode>"
+  [(match_operand:SI 0 "s_register_operand" "r")
+   (match_operand:VDQW 1 "s_register_operand" "w")
+   (match_operand:VDQW 2 "s_register_operand" "w")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv8qi<mode>"
+  [(match_operand:V8QI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv4hi<mode>"
+  [(match_operand:V4HI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv2si<mode>"
+  [(match_operand:V2SI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv2sf<mode>"
+  [(match_operand:V2SF 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretdi<mode>"
+  [(match_operand:DI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretti<mode>"
+  [(match_operand:TI 0 "s_register_operand" "")
+   (match_operand:VQXMOV 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+
+(define_expand "neon_vreinterpretv16qi<mode>"
+  [(match_operand:V16QI 0 "s_register_operand" "")
+   (match_operand:VQXMOV 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv8hi<mode>"
+  [(match_operand:V8HI 0 "s_register_operand" "")
+   (match_operand:VQXMOV 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv4si<mode>"
+  [(match_operand:V4SI 0 "s_register_operand" "")
+   (match_operand:VQXMOV 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv4sf<mode>"
+  [(match_operand:V4SF 0 "s_register_operand" "")
+   (match_operand:VQXMOV 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv2di<mode>"
+  [(match_operand:V2DI 0 "s_register_operand" "")
+   (match_operand:VQXMOV 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_load_lanes<mode><mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand")
+        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
+                     UNSPEC_VLD1))]
+  "TARGET_NEON")
+
+(define_insn "neon_vld1<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
+                    UNSPEC_VLD1))]
+  "TARGET_NEON"
+  "vld1.<V_sz_elem>\t%h0, %A1"
+  [(set_attr "type" "neon_load1_1reg<q>")]
+)
+
+(define_insn "neon_vld1_lane<mode>"
+  [(set (match_operand:VDX 0 "s_register_operand" "=w")
+        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:VDX 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")]
+                    UNSPEC_VLD1_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  if (max == 1)
+    return "vld1.<V_sz_elem>\t%P0, %A1";
+  else
+    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
+}
+  [(set_attr "type" "neon_load1_one_lane<q>")]
+)
+
+(define_insn "neon_vld1_lane<mode>"
+  [(set (match_operand:VQX 0 "s_register_operand" "=w")
+        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:VQX 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")]
+                    UNSPEC_VLD1_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+      operands[3] = GEN_INT (lane);
+    }
+  operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
+  if (max == 2)
+    return "vld1.<V_sz_elem>\t%P0, %A1";
+  else
+    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
+}
+  [(set_attr "type" "neon_load1_one_lane<q>")]
+)
+
+(define_insn "neon_vld1_dup<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
+  "TARGET_NEON"
+  "vld1.<V_sz_elem>\t{%P0[]}, %A1"
+  [(set_attr "type" "neon_load1_all_lanes<q>")]
+)
+
+;; Special case for DImode.  Treat it exactly like a simple load.
+(define_expand "neon_vld1_dupdi"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+        (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
+		   UNSPEC_VLD1))]
+  "TARGET_NEON"
+  ""
+)
+
+(define_insn "neon_vld1_dup<mode>"
+  [(set (match_operand:VQ 0 "s_register_operand" "=w")
+        (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
+  "TARGET_NEON"
+{
+  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
+}
+  [(set_attr "type" "neon_load1_all_lanes<q>")]
+)
+
+(define_insn_and_split "neon_vld1_dupv2di"
+   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
+   "TARGET_NEON"
+   "#"
+   "&& reload_completed"
+   [(const_int 0)]
+   {
+    rtx tmprtx = gen_lowpart (DImode, operands[0]);
+    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
+    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
+    DONE;
+    }
+  [(set_attr "length" "8")
+   (set_attr "type" "neon_load1_all_lanes_q")]
+)
+
+(define_expand "vec_store_lanes<mode><mode>"
+  [(set (match_operand:VDQX 0 "neon_struct_operand")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
+		     UNSPEC_VST1))]
+  "TARGET_NEON")
+
+(define_insn "neon_vst1<mode>"
+  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
+		     UNSPEC_VST1))]
+  "TARGET_NEON"
+  "vst1.<V_sz_elem>\t%h1, %A0"
+  [(set_attr "type" "neon_store1_1reg<q>")])
+
+(define_insn "neon_vst1_lane<mode>"
+  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
+	(unspec:<V_elem>
+	  [(match_operand:VDX 1 "s_register_operand" "w")
+	   (match_operand:SI 2 "immediate_operand" "i")]
+	  UNSPEC_VST1_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  if (max == 1)
+    return "vst1.<V_sz_elem>\t{%P1}, %A0";
+  else
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
+}
+  [(set_attr "type" "neon_store1_one_lane<q>")]
+)
+
+(define_insn "neon_vst1_lane<mode>"
+  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
+	(unspec:<V_elem>
+	  [(match_operand:VQX 1 "s_register_operand" "w")
+	   (match_operand:SI 2 "immediate_operand" "i")]
+	  UNSPEC_VST1_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+      operands[2] = GEN_INT (lane);
+    }
+  operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
+  if (max == 2)
+    return "vst1.<V_sz_elem>\t{%P1}, %A0";
+  else
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
+}
+  [(set_attr "type" "neon_store1_one_lane<q>")]
+)
+
+(define_expand "vec_load_lanesti<mode>"
+  [(set (match_operand:TI 0 "s_register_operand")
+        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD2))]
+  "TARGET_NEON")
+
+(define_insn "neon_vld2<mode>"
+  [(set (match_operand:TI 0 "s_register_operand" "=w")
+        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vld1.64\t%h0, %A1";
+  else
+    return "vld2.<V_sz_elem>\t%h0, %A1";
+}
+  [(set (attr "type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_load1_2reg<q>")
+                    (const_string "neon_load2_2reg<q>")))]
+)
+
+(define_expand "vec_load_lanesoi<mode>"
+  [(set (match_operand:OI 0 "s_register_operand")
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD2))]
+  "TARGET_NEON")
+
+(define_insn "neon_vld2<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2))]
+  "TARGET_NEON"
+  "vld2.<V_sz_elem>\t%h0, %A1"
+  [(set_attr "type" "neon_load2_2reg_q")])
+
+(define_insn "neon_vld2_lane<mode>"
+  [(set (match_operand:TI 0 "s_register_operand" "=w")
+        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
+                    (match_operand:TI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = operands[1];
+  ops[3] = operands[3];
+  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
+  return "";
+}
+  [(set_attr "type" "neon_load2_one_lane<q>")]
+)
+
+(define_insn "neon_vld2_lane<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
+                    (match_operand:OI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = operands[1];
+  ops[3] = GEN_INT (lane);
+  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
+  return "";
+}
+  [(set_attr "type" "neon_load2_one_lane<q>")]
+)
+
+(define_insn "neon_vld2_dup<mode>"
+  [(set (match_operand:TI 0 "s_register_operand" "=w")
+        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
+  else
+    return "vld1.<V_sz_elem>\t%h0, %A1";
+}
+  [(set (attr "type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_load2_all_lanes<q>")
+                    (const_string "neon_load1_1reg<q>")))]
+)
+
+(define_expand "vec_store_lanesti<mode>"
+  [(set (match_operand:TI 0 "neon_struct_operand")
+	(unspec:TI [(match_operand:TI 1 "s_register_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "TARGET_NEON")
+
+(define_insn "neon_vst2<mode>"
+  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
+        (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vst1.64\t%h1, %A0";
+  else
+    return "vst2.<V_sz_elem>\t%h1, %A0";
+}
+  [(set (attr "type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_store1_2reg<q>")
+                    (const_string "neon_store2_one_lane<q>")))]
+)
+
+(define_expand "vec_store_lanesoi<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand")
+	(unspec:OI [(match_operand:OI 1 "s_register_operand")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "TARGET_NEON")
+
+(define_insn "neon_vst2<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VST2))]
+  "TARGET_NEON"
+  "vst2.<V_sz_elem>\t%h1, %A0"
+  [(set_attr "type" "neon_store2_4reg<q>")]
+)
+
+(define_insn "neon_vst2_lane<mode>"
+  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
+	(unspec:<V_two_elem>
+	  [(match_operand:TI 1 "s_register_operand" "w")
+	   (match_operand:SI 2 "immediate_operand" "i")
+	   (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+	  UNSPEC_VST2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 2);
+  ops[3] = operands[2];
+  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
+  return "";
+}
+  [(set_attr "type" "neon_store2_one_lane<q>")]
+)
+
+(define_insn "neon_vst2_lane<mode>"
+  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
+        (unspec:<V_two_elem>
+           [(match_operand:OI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = GEN_INT (lane);
+  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
+  return "";
+}
+  [(set_attr "type" "neon_store2_one_lane<q>")]
+)
+
+(define_expand "vec_load_lanesei<mode>"
+  [(set (match_operand:EI 0 "s_register_operand")
+        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD3))]
+  "TARGET_NEON")
+
+(define_insn "neon_vld3<mode>"
+  [(set (match_operand:EI 0 "s_register_operand" "=w")
+        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vld1.64\t%h0, %A1";
+  else
+    return "vld3.<V_sz_elem>\t%h0, %A1";
+}
+  [(set (attr "type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_load1_3reg<q>")
+                    (const_string "neon_load3_3reg<q>")))]
+)
+
+(define_expand "vec_load_lanesci<mode>"
+  [(match_operand:CI 0 "s_register_operand")
+   (match_operand:CI 1 "neon_struct_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "neon_vld3<mode>"
+  [(match_operand:CI 0 "s_register_operand")
+   (match_operand:CI 1 "neon_struct_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  rtx mem;
+
+  mem = adjust_address (operands[1], EImode, 0);
+  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
+  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
+  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
+  DONE;
+})
+
+(define_insn "neon_vld3qa<mode>"
+  [(set (match_operand:CI 0 "s_register_operand" "=w")
+        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3A))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = operands[1];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
+  return "";
+}
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
+
+(define_insn "neon_vld3qb<mode>"
+  [(set (match_operand:CI 0 "s_register_operand" "=w")
+        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
+                    (match_operand:CI 2 "s_register_operand" "0")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3B))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  ops[0] = gen_rtx_REG (DImode, regno + 2);
+  ops[1] = gen_rtx_REG (DImode, regno + 6);
+  ops[2] = gen_rtx_REG (DImode, regno + 10);
+  ops[3] = operands[1];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
+  return "";
+}
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
+
+(define_insn "neon_vld3_lane<mode>"
+  [(set (match_operand:EI 0 "s_register_operand" "=w")
+        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
+                    (match_operand:EI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = operands[1];
+  ops[4] = operands[3];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_load3_one_lane<q>")]
+)
+
+(define_insn "neon_vld3_lane<mode>"
+  [(set (match_operand:CI 0 "s_register_operand" "=w")
+        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
+                    (match_operand:CI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = operands[1];
+  ops[4] = GEN_INT (lane);
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_load3_one_lane<q>")]
+)
+
+(define_insn "neon_vld3_dup<mode>"
+  [(set (match_operand:EI 0 "s_register_operand" "=w")
+        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    {
+      int regno = REGNO (operands[0]);
+      rtx ops[4];
+      ops[0] = gen_rtx_REG (DImode, regno);
+      ops[1] = gen_rtx_REG (DImode, regno + 2);
+      ops[2] = gen_rtx_REG (DImode, regno + 4);
+      ops[3] = operands[1];
+      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
+      return "";
+    }
+  else
+    return "vld1.<V_sz_elem>\t%h0, %A1";
+}
+  [(set (attr "type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_load3_all_lanes<q>")
+                    (const_string "neon_load1_1reg<q>")))])
+
+(define_expand "vec_store_lanesei<mode>"
+  [(set (match_operand:EI 0 "neon_struct_operand")
+	(unspec:EI [(match_operand:EI 1 "s_register_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3))]
+  "TARGET_NEON")
+
+(define_insn "neon_vst3<mode>"
+  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+        (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vst1.64\t%h1, %A0";
+  else
+    return "vst3.<V_sz_elem>\t%h1, %A0";
+}
+  [(set (attr "type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_store1_3reg<q>")
+                    (const_string "neon_store3_one_lane<q>")))])
+
+(define_expand "vec_store_lanesci<mode>"
+  [(match_operand:CI 0 "neon_struct_operand")
+   (match_operand:CI 1 "s_register_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "neon_vst3<mode>"
+  [(match_operand:CI 0 "neon_struct_operand")
+   (match_operand:CI 1 "s_register_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  rtx mem;
+
+  mem = adjust_address (operands[0], EImode, 0);
+  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
+  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
+  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vst3qa<mode>"
+  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3A))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[1]);
+  rtx ops[4];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
+  return "";
+}
+  [(set_attr "type" "neon_store3_3reg<q>")]
+)
+
+(define_insn "neon_vst3qb<mode>"
+  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3B))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[1]);
+  rtx ops[4];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 6);
+  ops[3] = gen_rtx_REG (DImode, regno + 10);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
+  return "";
+}
+  [(set_attr "type" "neon_store3_3reg<q>")]
+)
+
+(define_insn "neon_vst3_lane<mode>"
+  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
+        (unspec:<V_three_elem>
+           [(match_operand:EI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 2);
+  ops[3] = gen_rtx_REG (DImode, regno + 4);
+  ops[4] = operands[2];
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
+(define_insn "neon_vst3_lane<mode>"
+  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
+        (unspec:<V_three_elem>
+           [(match_operand:CI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  ops[4] = GEN_INT (lane);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
+(define_expand "vec_load_lanesoi<mode>"
+  [(set (match_operand:OI 0 "s_register_operand")
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD4))]
+  "TARGET_NEON")
+
+(define_insn "neon_vld4<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vld1.64\t%h0, %A1";
+  else
+    return "vld4.<V_sz_elem>\t%h0, %A1";
+}
+  [(set (attr "type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_load1_4reg<q>")
+                    (const_string "neon_load4_4reg<q>")))]
+)
+
+(define_expand "vec_load_lanesxi<mode>"
+  [(match_operand:XI 0 "s_register_operand")
+   (match_operand:XI 1 "neon_struct_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "neon_vld4<mode>"
+  [(match_operand:XI 0 "s_register_operand")
+   (match_operand:XI 1 "neon_struct_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  rtx mem;
+
+  mem = adjust_address (operands[1], OImode, 0);
+  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
+  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
+  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
+  DONE;
+})
+
+(define_insn "neon_vld4qa<mode>"
+  [(set (match_operand:XI 0 "s_register_operand" "=w")
+        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4A))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = gen_rtx_REG (DImode, regno + 12);
+  ops[4] = operands[1];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
+  return "";
+}
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
+
+(define_insn "neon_vld4qb<mode>"
+  [(set (match_operand:XI 0 "s_register_operand" "=w")
+        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                    (match_operand:XI 2 "s_register_operand" "0")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4B))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  ops[0] = gen_rtx_REG (DImode, regno + 2);
+  ops[1] = gen_rtx_REG (DImode, regno + 6);
+  ops[2] = gen_rtx_REG (DImode, regno + 10);
+  ops[3] = gen_rtx_REG (DImode, regno + 14);
+  ops[4] = operands[1];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
+  return "";
+}
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
+
+(define_insn "neon_vld4_lane<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
+                    (match_operand:OI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 6);
+  ops[4] = operands[1];
+  ops[5] = operands[3];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_load4_one_lane<q>")]
+)
+
+(define_insn "neon_vld4_lane<mode>"
+  [(set (match_operand:XI 0 "s_register_operand" "=w")
+        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
+                    (match_operand:XI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = gen_rtx_REG (DImode, regno + 12);
+  ops[4] = operands[1];
+  ops[5] = GEN_INT (lane);
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_load4_one_lane<q>")]
+)
+
+(define_insn "neon_vld4_dup<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    {
+      int regno = REGNO (operands[0]);
+      rtx ops[5];
+      ops[0] = gen_rtx_REG (DImode, regno);
+      ops[1] = gen_rtx_REG (DImode, regno + 2);
+      ops[2] = gen_rtx_REG (DImode, regno + 4);
+      ops[3] = gen_rtx_REG (DImode, regno + 6);
+      ops[4] = operands[1];
+      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
+                       ops);
+      return "";
+    }
+  else
+    return "vld1.<V_sz_elem>\t%h0, %A1";
+}
+  [(set (attr "type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_load4_all_lanes<q>")
+                    (const_string "neon_load1_1reg<q>")))]
+)
+
+(define_expand "vec_store_lanesoi<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand")
+	(unspec:OI [(match_operand:OI 1 "s_register_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4))]
+  "TARGET_NEON")
+
+(define_insn "neon_vst4<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+        (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vst1.64\t%h1, %A0";
+  else
+    return "vst4.<V_sz_elem>\t%h1, %A0";
+}
+  [(set (attr "type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_store1_4reg<q>")
+                    (const_string "neon_store4_4reg<q>")))]
+)
+
+(define_expand "vec_store_lanesxi<mode>"
+  [(match_operand:XI 0 "neon_struct_operand")
+   (match_operand:XI 1 "s_register_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "neon_vst4<mode>"
+  [(match_operand:XI 0 "neon_struct_operand")
+   (match_operand:XI 1 "s_register_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  rtx mem;
+
+  mem = adjust_address (operands[0], OImode, 0);
+  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
+  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
+  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vst4qa<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4A))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[1]);
+  rtx ops[5];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  ops[4] = gen_rtx_REG (DImode, regno + 12);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
+  return "";
+}
+  [(set_attr "type" "neon_store4_4reg<q>")]
+)
+
+(define_insn "neon_vst4qb<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4B))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[1]);
+  rtx ops[5];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 6);
+  ops[3] = gen_rtx_REG (DImode, regno + 10);
+  ops[4] = gen_rtx_REG (DImode, regno + 14);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
+  return "";
+}
+  [(set_attr "type" "neon_store4_4reg<q>")]
+)
+
+(define_insn "neon_vst4_lane<mode>"
+  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
+        (unspec:<V_four_elem>
+           [(match_operand:OI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 2);
+  ops[3] = gen_rtx_REG (DImode, regno + 4);
+  ops[4] = gen_rtx_REG (DImode, regno + 6);
+  ops[5] = operands[2];
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_store4_one_lane<q>")]
+)
+
+(define_insn "neon_vst4_lane<mode>"
+  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
+        (unspec:<V_four_elem>
+           [(match_operand:XI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  ops[4] = gen_rtx_REG (DImode, regno + 12);
+  ops[5] = GEN_INT (lane);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
+                   ops);
+  return "";
+}
+  [(set_attr "type" "neon_store4_4reg<q>")]
+)
+
+(define_expand "neon_vand<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vorr<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_veor<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vbic<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vorn<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "neon_vec_unpack<US>_lo_<mode>"
+  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+        (SE:<V_unpack> (vec_select:<V_HALF>
+			  (match_operand:VU 1 "register_operand" "w")
+			  (match_operand:VU 2 "vect_par_constant_low" ""))))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vmovl.<US><V_sz_elem> %q0, %e1"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "neon_vec_unpack<US>_hi_<mode>"
+  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+        (SE:<V_unpack> (vec_select:<V_HALF>
+			  (match_operand:VU 1 "register_operand" "w")
+			  (match_operand:VU 2 "vect_par_constant_high" ""))))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vmovl.<US><V_sz_elem> %q0, %f1"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_expand "vec_unpack<US>_hi_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2); i++)
+     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
+  
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+   emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 
+                                                 operands[1], 
+					         t1));
+   DONE;
+  }
+)
+
+(define_expand "vec_unpack<US>_lo_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+     RTVEC_ELT (v, i) = GEN_INT (i);
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+   emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 
+                                                 operands[1], 
+				   	         t1));
+   DONE;
+  }
+)
+
+(define_insn "neon_vec_<US>mult_lo_<mode>"
+ [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
+			   (match_operand:VU 1 "register_operand" "w") 
+                           (match_operand:VU 2 "vect_par_constant_low" "")))
+ 		        (SE:<V_unpack> (vec_select:<V_HALF>
+                           (match_operand:VU 3 "register_operand" "w") 
+                           (match_dup 2)))))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vmull.<US><V_sz_elem> %q0, %e1, %e3"
+  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
+)
+
+(define_expand "vec_widen_<US>mult_lo_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+     RTVEC_ELT (v, i) = GEN_INT (i);
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+
+   emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
+ 					       operands[1],
+					       t1,
+					       operands[2]));
+   DONE;
+ }
+)
+
+(define_insn "neon_vec_<US>mult_hi_<mode>"
+ [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+      (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
+			    (match_operand:VU 1 "register_operand" "w") 
+			    (match_operand:VU 2 "vect_par_constant_high" "")))
+		       (SE:<V_unpack> (vec_select:<V_HALF>
+			    (match_operand:VU 3 "register_operand" "w") 
+			    (match_dup 2)))))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vmull.<US><V_sz_elem> %q0, %f1, %f3"
+  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
+)
+
+(define_expand "vec_widen_<US>mult_hi_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+     RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+
+   emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
+ 					       operands[1],
+					       t1,
+					       operands[2]));
+   DONE;
+
+ }
+)
+
+(define_insn "neon_vec_<US>shiftl_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
+       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
+  "TARGET_NEON"
+{
+  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
+}
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
+		simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
+		operands[2]));
+   DONE;
+ }
+)
+
+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
+                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+				     GET_MODE_SIZE (<V_HALF>mode)),
+                operands[2]));
+   DONE;
+ }
+)
+
+;; Vectorize for non-neon-quad case
+(define_insn "neon_unpack<US>_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+       (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
+ "TARGET_NEON"
+ "vmovl.<US><V_sz_elem> %q0, %P1"
+  [(set_attr "type" "neon_move")]
+)
+
+(define_expand "vec_unpack<US>_lo_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
+ "TARGET_NEON"
+{
+  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
+  emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+
+  DONE;
+}
+)
+
+(define_expand "vec_unpack<US>_hi_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
+ "TARGET_NEON"
+{
+  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
+  emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+
+  DONE;
+}
+)
+
+(define_insn "neon_vec_<US>mult_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+       (mult:<V_widen> (SE:<V_widen> 
+		 	   (match_operand:VDI 1 "register_operand" "w"))
+ 		       (SE:<V_widen> 
+			   (match_operand:VDI 2 "register_operand" "w"))))]
+  "TARGET_NEON"
+  "vmull.<US><V_sz_elem> %q0, %P1, %P2"
+  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
+)
+
+(define_expand "vec_widen_<US>mult_hi_<mode>"
+  [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+ 					    
+   DONE;
+
+ }
+)
+
+(define_expand "vec_widen_<US>mult_lo_<mode>"
+  [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+ 					    
+   DONE;
+
+ }
+)
+
+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+
+   DONE;
+ }
+)
+
+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
+  [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+
+   DONE;
+ }
+)
+
+; FIXME: These instruction patterns can't be used safely in big-endian mode
+; because the ordering of vector elements in Q registers is different from what
+; the semantics of the instructions require.
+
+(define_insn "vec_pack_trunc_<mode>"
+ [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
+       (vec_concat:<V_narrow_pack> 
+		(truncate:<V_narrow> 
+			(match_operand:VN 1 "register_operand" "w"))
+		(truncate:<V_narrow>
+			(match_operand:VN 2 "register_operand" "w"))))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
+ [(set_attr "type" "multiple")
+  (set_attr "length" "8")]
+)
+
+;; For the non-quad case.
+(define_insn "neon_vec_pack_trunc_<mode>"
+ [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
+       (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ "vmovn.i<V_sz_elem>\t%P0, %q1"
+ [(set_attr "type" "neon_move_narrow_q")]
+)
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<V_narrow_pack> 0 "register_operand" "")
+  (match_operand:VSHFT 1 "register_operand" "")
+  (match_operand:VSHFT 2 "register_operand")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
+  
+  emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 
+  emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 
+  emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
+  DONE;
+})
+
+(define_insn "neon_vabd<mode>_2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+       (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                           (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "type")
+       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                     (const_string "neon_fp_abd_s<q>")
+                     (const_string "neon_abd<q>")))]
+)
+
+(define_insn "neon_vabd<mode>_3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+       (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+                             (match_operand:VDQ 2 "s_register_operand" "w")]
+                 UNSPEC_VSUB)))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "type")
+       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                     (const_string "neon_fp_abd_s<q>")
+                     (const_string "neon_abd<q>")))]
+)
+
+;; Copy from core-to-neon regs, then extend, not vice-versa
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
+  {
+    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+  })
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
+  {
+    operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
+  })
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
+  {
+    operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
+  })
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
+  {
+    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+  })
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
+  {
+    operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
+  })
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
+  {
+    operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
+  })
diff --git a/gcc-4.9/gcc/config/arm/neon.ml b/gcc-4.9/gcc/config/arm/neon.ml
new file mode 100644
index 000000000..4289b8ce0
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/neon.ml
@@ -0,0 +1,2355 @@
+(* Common code for ARM NEON header file, documentation and test case
+   generators.
+
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  *)
+
+(* Shorthand types for vector elements.  *)
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
+          | P64 | P128 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
+          | Cast of elts * elts | NoElts
+
+type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
+	      | ConvClass of eltclass * eltclass | NoType
+
+(* These vector types correspond directly to C types.  *)
+type vectype = T_int8x8    | T_int8x16
+             | T_int16x4   | T_int16x8
+	     | T_int32x2   | T_int32x4
+	     | T_int64x1   | T_int64x2
+	     | T_uint8x8   | T_uint8x16
+	     | T_uint16x4  | T_uint16x8
+	     | T_uint32x2  | T_uint32x4
+	     | T_uint64x1  | T_uint64x2
+	     | T_float16x4
+	     | T_float32x2 | T_float32x4
+	     | T_poly8x8   | T_poly8x16
+	     | T_poly16x4  | T_poly16x8
+	     | T_immediate of int * int
+             | T_int8      | T_int16
+             | T_int32     | T_int64
+             | T_uint8     | T_uint16
+             | T_uint32    | T_uint64
+             | T_poly8     | T_poly16
+             | T_poly64    | T_poly64x1
+             | T_poly64x2  | T_poly128
+             | T_float16   | T_float32
+             | T_arrayof of int * vectype
+             | T_ptrto of vectype | T_const of vectype
+             | T_void      | T_intQI
+             | T_intHI     | T_intSI
+             | T_intDI     | T_intTI
+             | T_floatHF   | T_floatSF
+
+(* The meanings of the following are:
+     TImode : "Tetra", two registers (four words).
+     EImode : "hExa", three registers (six words).
+     OImode : "Octa", four registers (eight words).
+     CImode : "dodeCa", six registers (twelve words).
+     XImode : "heXadeca", eight registers (sixteen words).
+*)
+
+type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
+
+type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
+               | PtrTo of shape_elt | CstPtrTo of shape_elt
+	       (* These next ones are used only in the test generator.  *)
+	       | Element_of_dreg	(* Used for "lane" variants.  *)
+	       | Element_of_qreg	(* Likewise.  *)
+	       | All_elements_of_dreg	(* Used for "dup" variants.  *)
+	       | Alternatives of shape_elt list (* Used for multiple valid operands *)
+
+type shape_form = All of int * shape_elt
+                | Long
+		| Long_noreg of shape_elt
+		| Wide
+		| Wide_noreg of shape_elt
+		| Narrow
+                | Long_imm
+                | Narrow_imm
+                | Binary_imm of shape_elt
+                | Use_operands of shape_elt array
+                | By_scalar of shape_elt
+                | Unary_scalar of shape_elt
+                | Wide_lane
+                | Wide_scalar
+                | Pair_result of shape_elt
+
+type arity = Arity0 of vectype
+           | Arity1 of vectype * vectype
+	   | Arity2 of vectype * vectype * vectype
+	   | Arity3 of vectype * vectype * vectype * vectype
+           | Arity4 of vectype * vectype * vectype * vectype * vectype
+
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
+             | V16QI | V8HI | V4SI | V4SF | V2DI | TI
+             | QI | HI | SI | SF
+
+type opcode =
+  (* Binary ops.  *)
+    Vadd
+  | Vmul
+  | Vmla
+  | Vmls
+  | Vfma
+  | Vfms
+  | Vsub
+  | Vceq
+  | Vcge
+  | Vcgt
+  | Vcle
+  | Vclt
+  | Vcage
+  | Vcagt
+  | Vcale
+  | Vcalt
+  | Vtst
+  | Vabd
+  | Vaba
+  | Vmax
+  | Vmin
+  | Vpadd
+  | Vpada
+  | Vpmax
+  | Vpmin
+  | Vrecps
+  | Vrsqrts
+  | Vshl
+  | Vshr_n
+  | Vshl_n
+  | Vsra_n
+  | Vsri
+  | Vsli
+  (* Logic binops.  *)
+  | Vand
+  | Vorr
+  | Veor
+  | Vbic
+  | Vorn
+  | Vbsl
+  (* Ops with scalar.  *)
+  | Vmul_lane
+  | Vmla_lane
+  | Vmls_lane
+  | Vmul_n
+  | Vmla_n
+  | Vmls_n
+  | Vmull_n
+  | Vmull_lane
+  | Vqdmull_n
+  | Vqdmull_lane
+  | Vqdmulh_n
+  | Vqdmulh_lane
+  (* Unary ops.  *)
+  | Vrintn
+  | Vrinta
+  | Vrintp
+  | Vrintm
+  | Vrintz
+  | Vabs
+  | Vneg
+  | Vcls
+  | Vclz
+  | Vcnt
+  | Vrecpe
+  | Vrsqrte
+  | Vmvn
+  (* Vector extract.  *)
+  | Vext
+  (* Reverse elements.  *)
+  | Vrev64
+  | Vrev32
+  | Vrev16
+  (* Transposition ops.  *)
+  | Vtrn
+  | Vzip
+  | Vuzp
+  (* Loads and stores (VLD1/VST1/VLD2...), elements and structures.  *)
+  | Vldx of int
+  | Vstx of int
+  | Vldx_lane of int
+  | Vldx_dup of int
+  | Vstx_lane of int
+  (* Set/extract lanes from a vector.  *)
+  | Vget_lane
+  | Vset_lane
+  (* Initialize vector from bit pattern.  *)
+  | Vcreate
+  (* Set all lanes to same value.  *)
+  | Vdup_n
+  | Vmov_n  (* Is this the same?  *)
+  (* Duplicate scalar to all lanes of vector.  *)
+  | Vdup_lane
+  (* Combine vectors.  *)
+  | Vcombine
+  (* Get quadword high/low parts.  *)
+  | Vget_high
+  | Vget_low
+  (* Convert vectors.  *)
+  | Vcvt
+  | Vcvt_n
+  (* Narrow/lengthen vectors.  *)
+  | Vmovn
+  | Vmovl
+  (* Table lookup.  *)
+  | Vtbl of int
+  | Vtbx of int
+  (* Reinterpret casts.  *)
+  | Vreinterp
+
+let rev_elems revsize elsize nelts _ =
+  let mask = (revsize / elsize) - 1 in
+  let arr = Array.init nelts
+    (fun i -> i lxor mask) in
+  Array.to_list arr
+
+let permute_range i stride nelts increment =
+  let rec build i = function
+    0 -> []
+  | nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in
+  build i nelts
+
+(* Generate a list of integers suitable for vzip.  *)
+let zip_range i stride nelts = permute_range i stride nelts 1
+
+(* Generate a list of integers suitable for vunzip.  *)
+let uzip_range i stride nelts = permute_range i stride nelts 4
+
+(* Generate a list of integers suitable for trn.  *)
+let trn_range i stride nelts = permute_range i stride nelts 2
+
+let zip_elems _ nelts part =
+  match part with
+    `lo -> zip_range 0 nelts (nelts / 2)
+  | `hi -> zip_range (nelts / 2) nelts (nelts / 2)
+
+let uzip_elems _ nelts part =
+  match part with
+    `lo -> uzip_range 0 2 (nelts / 2)
+  | `hi -> uzip_range 1 2 (nelts / 2)
+
+let trn_elems _ nelts part =
+  match part with
+    `lo -> trn_range 0 nelts (nelts / 2)
+  | `hi -> trn_range 1 nelts (nelts / 2)
+
+(* Features used for documentation, to distinguish between some instruction
+   variants, and to signal special requirements (e.g. swapping arguments).  *)
+
+type features =
+    Halving
+  | Rounding
+  | Saturating
+  | Dst_unsign
+  | High_half
+  | Doubling
+  | Flipped of string  (* Builtin name to use with flipped arguments.  *)
+  | InfoWord  (* Pass an extra word for signage/rounding etc. (always passed
+                 for All _, Long, Wide, Narrow shape_forms.  *)
+    (* Implement builtin as shuffle.  The parameter is a function which returns
+       masks suitable for __builtin_shuffle: arguments are (element size,
+       number of elements, high/low part selector).  *)
+  | Use_shuffle of (int -> int -> [`lo|`hi] -> int list)
+    (* A specification as to the shape of instruction expected upon
+       disassembly, used if it differs from the shape used to build the
+       intrinsic prototype.  Multiple entries in the constructor's argument
+       indicate that the intrinsic expands to more than one assembly
+       instruction, each with a corresponding shape specified here.  *)
+  | Disassembles_as of shape_form list
+  | Builtin_name of string  (* Override the name of the builtin.  *)
+    (* Override the name of the instruction.  If more than one name
+       is specified, it means that the instruction can have any of those
+       names.  *)
+  | Instruction_name of string list
+    (* Mark that the intrinsic yields no instructions, or expands to yield
+       behavior that the test generator cannot test.  *)
+  | No_op
+    (* Mark that the intrinsic has constant arguments that cannot be set
+       to the defaults (zero for pointers and one otherwise) in the test
+       cases.  The function supplied must return the integer to be written
+       into the testcase for the argument number (0-based) supplied to it.  *)
+  | Const_valuator of (int -> int)
+  | Fixed_vector_reg
+  | Fixed_core_reg
+    (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined.  *)
+  | Requires_feature of string
+    (* Mark that the intrinsic requires a particular architecture version.  *)
+  | Requires_arch of int
+    (* Mark that the intrinsic requires a particular bit in __ARM_FP to
+    be set.   *)
+  | Requires_FP_bit of int
+
+exception MixedMode of elts * elts
+
+let rec elt_width = function
+    S8 | U8 | P8 | I8 | B8 -> 8
+  | S16 | U16 | P16 | I16 | B16 | F16 -> 16
+  | S32 | F32 | U32 | I32 | B32 -> 32
+  | S64 | U64 | P64 | I64 | B64 -> 64
+  | P128 -> 128
+  | Conv (a, b) ->
+      let wa = elt_width a and wb = elt_width b in
+      if wa = wb then wa else raise (MixedMode (a, b))
+  | Cast (a, b) -> raise (MixedMode (a, b))
+  | NoElts -> failwith "No elts"
+
+let rec elt_class = function
+    S8 | S16 | S32 | S64 -> Signed
+  | U8 | U16 | U32 | U64 -> Unsigned
+  | P8 | P16 | P64 | P128 -> Poly
+  | F16 | F32 -> Float
+  | I8 | I16 | I32 | I64 -> Int
+  | B8 | B16 | B32 | B64 -> Bits
+  | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
+  | NoElts -> NoType
+
+let elt_of_class_width c w =
+  match c, w with
+    Signed, 8 -> S8
+  | Signed, 16 -> S16
+  | Signed, 32 -> S32
+  | Signed, 64 -> S64
+  | Float, 16 -> F16
+  | Float, 32 -> F32
+  | Unsigned, 8 -> U8
+  | Unsigned, 16 -> U16
+  | Unsigned, 32 -> U32
+  | Unsigned, 64 -> U64
+  | Poly, 8 -> P8
+  | Poly, 16 -> P16
+  | Poly, 64 -> P64
+  | Poly, 128 -> P128
+  | Int, 8 -> I8
+  | Int, 16 -> I16
+  | Int, 32 -> I32
+  | Int, 64 -> I64
+  | Bits, 8 -> B8
+  | Bits, 16 -> B16
+  | Bits, 32 -> B32
+  | Bits, 64 -> B64
+  | _ -> failwith "Bad element type"
+
+(* Return unsigned integer element the same width as argument.  *)
+let unsigned_of_elt elt =
+  elt_of_class_width Unsigned (elt_width elt)
+
+let signed_of_elt elt =
+  elt_of_class_width Signed (elt_width elt)
+
+(* Return untyped bits element the same width as argument.  *)
+let bits_of_elt elt =
+  elt_of_class_width Bits (elt_width elt)
+
+let non_signed_variant = function
+    S8 -> I8
+  | S16 -> I16
+  | S32 -> I32
+  | S64 -> I64
+  | U8 -> I8
+  | U16 -> I16
+  | U32 -> I32
+  | U64 -> I64
+  | x -> x
+
+let poly_unsigned_variant v =
+  let elclass = match elt_class v with
+    Poly -> Unsigned
+  | x -> x in
+  elt_of_class_width elclass (elt_width v)
+
+let widen_elt elt =
+  let w = elt_width elt
+  and c = elt_class elt in
+  elt_of_class_width c (w * 2)
+
+let narrow_elt elt =
+  let w = elt_width elt
+  and c = elt_class elt in
+  elt_of_class_width c (w / 2)
+
+(* If we're trying to find a mode from a "Use_operands" instruction, use the
+   last vector operand as the dominant mode used to invoke the correct builtin.
+   We must stick to this rule in neon.md.  *)
+let find_key_operand operands =
+  let rec scan opno =
+    match operands.(opno) with
+      Qreg -> Qreg
+    | Dreg -> Dreg
+    | VecArray (_, Qreg) -> Qreg
+    | VecArray (_, Dreg) -> Dreg
+    | _ -> scan (opno-1)
+  in
+    scan ((Array.length operands) - 1)
+
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
+   SHAPE.  For a Use_operands shape, if ARGPOS is passed then return the mode
+   for the given argument position, else determine which argument to return a
+   mode for automatically.  *)
+
+let rec mode_of_elt ?argpos elt shape =
+  let flt = match elt_class elt with
+    Float | ConvClass(_, Float) -> true | _ -> false in
+  let idx =
+    match elt_width elt with
+      8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | 128 -> 4
+    | _ -> failwith "Bad element width"
+  in match shape with
+    All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
+  | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
+      if flt then
+        [| V8QI; V4HF; V2SF; DI |].(idx)
+      else
+        [| V8QI; V4HI; V2SI; DI |].(idx)
+  | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
+  | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
+      [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI; TI|].(idx)
+  | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
+      [| QI; HI; if flt then SF else SI; DI |].(idx)
+  | Long | Wide | Wide_lane | Wide_scalar
+  | Long_imm ->
+      [| V8QI; V4HI; V2SI; DI |].(idx)
+  | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
+  | Use_operands ops ->
+      begin match argpos with
+        None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
+      | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
+      end
+  | _ -> failwith "invalid shape"
+
+(* Modify an element type dependent on the shape of the instruction and the
+   operand number.  *)
+
+let shapemap shape no =
+  let ident = fun x -> x in
+  match shape with
+    All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
+  | Binary_imm _ -> ident
+  | Long | Long_noreg _ | Wide_scalar | Long_imm ->
+      [| widen_elt; ident; ident |].(no)
+  | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
+  | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
+  | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
+
+(* Register type (D/Q) of an operand, based on shape and operand number.  *)
+
+let regmap shape no =
+  match shape with
+    All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
+  | Long -> [| Qreg; Dreg; Dreg |].(no)
+  | Wide -> [| Qreg; Qreg; Dreg |].(no)
+  | Narrow -> [| Dreg; Qreg; Qreg |].(no)
+  | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
+  | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
+  | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
+  | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
+  | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
+  | Binary_imm reg -> [| reg; reg; Immed |].(no)
+  | Long_imm -> [| Qreg; Dreg; Immed |].(no)
+  | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
+  | Use_operands these -> these.(no)
+
+let type_for_elt shape elt no =
+  let elt = (shapemap shape no) elt in
+  let reg = regmap shape no in
+  let rec type_for_reg_elt reg elt =
+    match reg with
+      Dreg ->
+        begin match elt with
+          S8 -> T_int8x8
+        | S16 -> T_int16x4
+        | S32 -> T_int32x2
+        | S64 -> T_int64x1
+        | U8 -> T_uint8x8
+        | U16 -> T_uint16x4
+        | U32 -> T_uint32x2
+        | U64 -> T_uint64x1
+        | P64 -> T_poly64x1
+        | P128 -> T_poly128
+        | F16 -> T_float16x4
+        | F32 -> T_float32x2
+        | P8 -> T_poly8x8
+        | P16 -> T_poly16x4
+        | _ -> failwith "Bad elt type for Dreg"
+        end
+    | Qreg ->
+        begin match elt with
+          S8 -> T_int8x16
+        | S16 -> T_int16x8
+        | S32 -> T_int32x4
+        | S64 -> T_int64x2
+        | U8 -> T_uint8x16
+        | U16 -> T_uint16x8
+        | U32 -> T_uint32x4
+        | U64 -> T_uint64x2
+        | F32 -> T_float32x4
+        | P8 -> T_poly8x16
+        | P16 -> T_poly16x8
+        | P64 -> T_poly64x2
+        | P128 -> T_poly128
+        | _ -> failwith "Bad elt type for Qreg"
+        end
+    | Corereg ->
+        begin match elt with
+          S8 -> T_int8
+        | S16 -> T_int16
+        | S32 -> T_int32
+        | S64 -> T_int64
+        | U8 -> T_uint8
+        | U16 -> T_uint16
+        | U32 -> T_uint32
+        | U64 -> T_uint64
+        | P8 -> T_poly8
+        | P16 -> T_poly16
+        | P64 -> T_poly64
+        | P128 -> T_poly128
+        | F32 -> T_float32
+        | _ -> failwith "Bad elt type for Corereg"
+        end
+    | Immed ->
+        T_immediate (0, 0)
+    | VecArray (num, sub) ->
+        T_arrayof (num, type_for_reg_elt sub elt)
+    | PtrTo x ->
+        T_ptrto (type_for_reg_elt x elt)
+    | CstPtrTo x ->
+        T_ptrto (T_const (type_for_reg_elt x elt))
+    (* Anything else is solely for the use of the test generator.  *)
+    | _ -> assert false
+  in
+    type_for_reg_elt reg elt
+
+(* Return size of a vector type, in bits.  *)
+let vectype_size = function
+    T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
+  | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
+  | T_float32x2 | T_poly8x8 | T_poly64x1 | T_poly16x4 | T_float16x4 -> 64
+  | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
+  | T_uint8x16 | T_uint16x8  | T_uint32x4  | T_uint64x2
+  | T_float32x4 | T_poly8x16 | T_poly64x2 | T_poly16x8 -> 128
+  | _ -> raise Not_found
+
+let inttype_for_array num elttype =
+  let eltsize = vectype_size elttype in
+  let numwords = (num * eltsize) / 32 in
+  match numwords with
+    4 -> B_TImode
+  | 6 -> B_EImode
+  | 8 -> B_OImode
+  | 12 -> B_CImode
+  | 16 -> B_XImode
+  | _ -> failwith ("no int type for size " ^ string_of_int numwords)
+
+(* These functions return pairs of (internal, external) types, where "internal"
+   types are those seen by GCC, and "external" are those seen by the assembler.
+   These types aren't necessarily the same, since the intrinsics can munge more
+   than one C type into each assembler opcode.  *)
+
+let make_sign_invariant func shape elt =
+  let arity, elt' = func shape elt in
+  arity, non_signed_variant elt'
+
+(* Don't restrict any types.  *)
+
+let elts_same make_arity shape elt =
+  let vtype = type_for_elt shape elt in
+  make_arity vtype, elt
+
+(* As sign_invar_*, but when sign matters.  *)
+let elts_same_io_lane =
+  elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
+
+let elts_same_io =
+  elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
+
+let elts_same_2_lane =
+  elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
+
+let elts_same_3 = elts_same_2_lane
+
+let elts_same_2 =
+  elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
+
+let elts_same_1 =
+  elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
+
+(* Use for signed/unsigned invariant operations (i.e. where the operation
+   doesn't depend on the sign of the data.  *)
+
+let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
+let sign_invar_io = make_sign_invariant elts_same_io
+let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
+let sign_invar_2 = make_sign_invariant elts_same_2
+let sign_invar_1 = make_sign_invariant elts_same_1
+
+(* Sign-sensitive comparison.  *)
+
+let cmp_sign_matters shape elt =
+  let vtype = type_for_elt shape elt
+  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
+  Arity2 (rtype, vtype 1, vtype 2), elt
+
+(* Signed/unsigned invariant comparison.  *)
+
+let cmp_sign_invar shape elt =
+  let shape', elt' = cmp_sign_matters shape elt in
+  let elt'' =
+    match non_signed_variant elt' with
+      P8 -> I8
+    | x -> x
+  in
+    shape', elt''
+
+(* Comparison (VTST) where only the element width matters.  *)
+
+let cmp_bits shape elt =
+  let vtype = type_for_elt shape elt
+  and rtype = type_for_elt shape (unsigned_of_elt elt) 0
+  and bits_only = bits_of_elt elt in
+  Arity2 (rtype, vtype 1, vtype 2), bits_only
+
+let reg_shift shape elt =
+  let vtype = type_for_elt shape elt
+  and op2type = type_for_elt shape (signed_of_elt elt) 2 in
+  Arity2 (vtype 0, vtype 1, op2type), elt
+
+(* Genericised constant-shift type-generating function.  *)
+
+let const_shift mkimm ?arity ?result shape elt =
+  let op2type = (shapemap shape 2) elt in
+  let op2width = elt_width op2type in
+  let op2 = mkimm op2width
+  and op1 = type_for_elt shape elt 1
+  and r_elt =
+    match result with
+      None -> elt
+    | Some restriction -> restriction elt in
+  let rtype = type_for_elt shape r_elt 0 in
+  match arity with
+    None -> Arity2 (rtype, op1, op2), elt
+  | Some mkarity -> mkarity rtype op1 op2, elt
+
+(* Use for immediate right-shifts.  *)
+
+let shift_right shape elt =
+  const_shift (fun imm -> T_immediate (1, imm)) shape elt
+
+let shift_right_acc shape elt =
+  const_shift (fun imm -> T_immediate (1, imm))
+    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
+
+(* Use for immediate right-shifts when the operation doesn't care about
+   signedness.  *)
+
+let shift_right_sign_invar =
+  make_sign_invariant shift_right
+
+(* Immediate right-shift; result is unsigned even when operand is signed.  *)
+
+let shift_right_to_uns shape elt =
+  const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
+    shape elt
+
+(* Immediate left-shift.  *)
+
+let shift_left shape elt =
+  const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
+
+(* Immediate left-shift, unsigned result.  *)
+
+let shift_left_to_uns shape elt =
+  const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
+    shape elt
+
+(* Immediate left-shift, don't care about signs.  *)
+
+let shift_left_sign_invar =
+  make_sign_invariant shift_left
+
+(* Shift left/right and insert: only element size matters.  *)
+
+let shift_insert shape elt =
+  let arity, elt =
+    const_shift (fun imm -> T_immediate (1, imm))
+    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
+  arity, bits_of_elt elt
+
+(* Get/set lane.  *)
+
+let get_lane shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity2 (vtype 0, vtype 1, vtype 2),
+    (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
+
+let set_lane shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
+
+let set_lane_notype shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
+
+let create_vector shape elt =
+  let vtype = type_for_elt shape U64 1
+  and rtype = type_for_elt shape elt 0 in
+  Arity1 (rtype, vtype), elt
+
+let conv make_arity shape elt =
+  let edest, esrc = match elt with
+    Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
+  | _ -> failwith "Non-conversion element in conversion" in
+  let vtype = type_for_elt shape esrc
+  and rtype = type_for_elt shape edest 0 in
+  make_arity rtype vtype, elt
+
+let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
+let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
+
+(* Operation has an unsigned result even if operands are signed.  *)
+
+let dst_unsign make_arity shape elt =
+  let vtype = type_for_elt shape elt
+  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
+  make_arity rtype vtype, elt
+
+let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
+
+let make_bits_only func shape elt =
+  let arity, elt' = func shape elt in
+  arity, bits_of_elt elt'
+
+(* Extend operation.  *)
+
+let extend shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
+
+(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
+   integer ops respectively, or unsigned for polynomial ops.  *)
+
+let table mkarity shape elt =
+  let vtype = type_for_elt shape elt in
+  let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
+  mkarity vtype op2, bits_of_elt elt
+
+let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
+let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
+
+(* Operations where only bits matter.  *)
+
+let bits_1 = make_bits_only elts_same_1
+let bits_2 = make_bits_only elts_same_2
+let bits_3 = make_bits_only elts_same_3
+
+(* Store insns.  *)
+let store_1 shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
+
+let store_3 shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
+
+let make_notype func shape elt =
+  let arity, _ = func shape elt in
+  arity, NoElts
+
+let notype_1 = make_notype elts_same_1
+let notype_2 = make_notype elts_same_2
+let notype_3 = make_notype elts_same_3
+
+(* Bit-select operations (first operand is unsigned int).  *)
+
+let bit_select shape elt =
+  let vtype = type_for_elt shape elt
+  and itype = type_for_elt shape (unsigned_of_elt elt) in
+  Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
+
+(* Common lists of supported element types.  *)
+
+let s_8_32 = [S8; S16; S32]
+let u_8_32 = [U8; U16; U32]
+let su_8_32 = [S8; S16; S32; U8; U16; U32]
+let su_8_64 = S64 :: U64 :: su_8_32
+let su_16_64 = [S16; S32; S64; U16; U32; U64]
+let pf_su_8_16 = [P8; P16; S8; S16; U8; U16]
+let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
+let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
+let suf_32 = [S32; U32; F32]
+
+let ops =
+  [
+    (* Addition.  *)
+    Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
+    Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
+    Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
+    Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
+    Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
+    Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
+    Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
+    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
+      All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
+    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
+      All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
+    Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
+    Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
+    Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
+    Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
+      Narrow, "vRaddhn", sign_invar_2, su_16_64;
+
+    (* Multiplication.  *)
+    Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
+    Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
+    Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
+      elts_same_2, [S16; S32];
+    Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
+      elts_same_2, [S16; S32];
+    Vmul,
+      [Saturating; Rounding; Doubling; High_half;
+       Instruction_name ["vqrdmulh"]],
+      All (3, Dreg), "vqRdmulh",
+      elts_same_2, [S16; S32];
+    Vmul,
+      [Saturating; Rounding; Doubling; High_half;
+       Instruction_name ["vqrdmulh"]],
+      All (3, Qreg), "vqRdmulhQ",
+      elts_same_2, [S16; S32];
+    Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
+    Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
+
+    (* Multiply-accumulate. *)
+    Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
+    Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
+    Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
+    Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
+
+    (* Multiply-subtract.  *)
+    Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
+    Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
+    Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
+    Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
+
+    (* Fused-multiply-accumulate. *)
+    Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32];
+    Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32];
+    Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32];
+    Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32];
+
+    (* Round to integral. *)
+    Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
+            "vrndn", elts_same_1, [F32];
+    Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
+            "vrndqn", elts_same_1, [F32];
+    Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
+            "vrnda", elts_same_1, [F32];
+    Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
+            "vrndqa", elts_same_1, [F32];
+    Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
+            "vrndp", elts_same_1, [F32];
+    Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
+            "vrndqp", elts_same_1, [F32];
+    Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
+            "vrndm", elts_same_1, [F32];
+    Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
+            "vrndqm", elts_same_1, [F32];
+    Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
+            "vrnd", elts_same_1, [F32];
+    Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
+            "vrndq", elts_same_1, [F32];
+    (* Subtraction.  *)
+    Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
+    Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2,  [S64; U64];
+    Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
+    Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
+    Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
+    Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
+    Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
+    Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
+    Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
+    Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
+    Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
+      Narrow, "vRsubhn", sign_invar_2, su_16_64;
+
+    (* Comparison, equal.  *)
+    Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
+    Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
+
+    (* Comparison, greater-than or equal.  *)
+    Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
+    Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
+      All (3, Dreg), "vcge", cmp_sign_matters,
+      u_8_32;
+    Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
+    Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
+      All (3, Qreg), "vcgeQ", cmp_sign_matters,
+      u_8_32;
+
+    (* Comparison, less-than or equal.  *)
+    Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
+      F32 :: s_8_32;
+    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"],
+      All (3, Dreg), "vcle", cmp_sign_matters,
+      u_8_32;
+    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
+      All (3, Qreg), "vcleQ", cmp_sign_matters,
+      F32 :: s_8_32;
+    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
+      All (3, Qreg), "vcleQ", cmp_sign_matters,
+      u_8_32;
+
+    (* Comparison, greater-than.  *)
+    Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
+    Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
+      All (3, Dreg), "vcgt", cmp_sign_matters,
+      u_8_32;
+    Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
+    Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
+      All (3, Qreg), "vcgtQ", cmp_sign_matters,
+      u_8_32;
+
+    (* Comparison, less-than.  *)
+    Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
+      F32 :: s_8_32;
+    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"],
+      All (3, Dreg), "vclt", cmp_sign_matters,
+      u_8_32;
+    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
+      All (3, Qreg), "vcltQ", cmp_sign_matters,
+      F32 :: s_8_32;
+    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
+      All (3, Qreg), "vcltQ", cmp_sign_matters,
+      u_8_32;
+
+    (* Compare absolute greater-than or equal.  *)
+    Vcage, [Instruction_name ["vacge"]],
+      All (3, Dreg), "vcage", cmp_sign_matters, [F32];
+    Vcage, [Instruction_name ["vacge"]],
+      All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
+
+    (* Compare absolute less-than or equal.  *)
+    Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
+      All (3, Dreg), "vcale", cmp_sign_matters, [F32];
+    Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
+      All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
+
+    (* Compare absolute greater-than or equal.  *)
+    Vcagt, [Instruction_name ["vacgt"]],
+      All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
+    Vcagt, [Instruction_name ["vacgt"]],
+      All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
+
+    (* Compare absolute less-than or equal.  *)
+    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
+      All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
+    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
+      All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
+
+    (* Test bits.  *)
+    Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
+    Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
+
+    (* Absolute difference.  *)
+    Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
+    Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
+    Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
+
+    (* Absolute difference and accumulate.  *)
+    Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
+    Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
+    Vaba, [], Long, "vabal", elts_same_io, su_8_32;
+
+    (* Max.  *)
+    Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
+    Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
+
+    (* Min.  *)
+    Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
+    Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
+
+    (* Pairwise add.  *)
+    Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
+    Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
+    Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
+
+    (* Pairwise add, widen and accumulate.  *)
+    Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
+    Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
+
+    (* Folding maximum, minimum.  *)
+    Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
+    Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
+
+    (* Reciprocal step.  *)
+    Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
+    Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
+    Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
+    Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
+
+    (* Vector shift left.  *)
+    Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
+    Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vrshl"]; Rounding],
+      All (3, Dreg), "vRshl", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vrshl"]; Rounding],
+      All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
+    Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
+    Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
+      All (3, Dreg), "vqRshl", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
+      All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
+
+    (* Vector shift right by constant.  *)
+    Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
+    Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
+    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
+      "vRshr_n", shift_right, su_8_64;
+    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
+      "vRshrQ_n", shift_right, su_8_64;
+    Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
+    Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
+      shift_right_sign_invar, su_16_64;
+    Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
+    Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
+      "vqRshrn_n", shift_right, su_16_64;
+    Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
+      shift_right_to_uns, [S16; S32; S64];
+    Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
+      Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
+
+    (* Vector shift left by constant.  *)
+    Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
+    Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
+    Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
+    Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
+    Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
+      shift_left_to_uns, [S8; S16; S32; S64];
+    Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
+      shift_left_to_uns, [S8; S16; S32; S64];
+    Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
+
+    (* Vector shift right by constant and accumulate.  *)
+    Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
+    Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
+    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
+      "vRsra_n", shift_right_acc, su_8_64;
+    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
+      "vRsraQ_n", shift_right_acc, su_8_64;
+
+    (* Vector shift right and insert.  *)
+    Vsri, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
+      [P64];
+    Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+    Vsri, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
+      [P64];
+    Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+
+    (* Vector shift left and insert.  *)
+    Vsli, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
+      [P64];
+    Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+    Vsli, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
+      [P64];
+    Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+
+    (* Absolute value.  *)
+    Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
+    Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
+    Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
+    Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
+
+    (* Negate.  *)
+    Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
+    Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
+    Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
+    Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
+
+    (* Bitwise not.  *)
+    Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
+    Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
+
+    (* Count leading sign bits.  *)
+    Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
+    Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
+
+    (* Count leading zeros.  *)
+    Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
+    Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
+
+    (* Count number of set bits.  *)
+    Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
+    Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
+
+    (* Reciprocal estimate.  *)
+    Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
+    Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
+
+    (* Reciprocal square-root estimate.  *)
+    Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
+    Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
+
+    (* Get lanes from a vector.  *)
+    Vget_lane,
+      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
+       Instruction_name ["vmov"]],
+      Use_operands [| Corereg; Dreg; Immed |],
+      "vget_lane", get_lane, pf_su_8_32;
+    Vget_lane,
+      [No_op;
+       InfoWord;
+       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
+       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Corereg; Dreg; Immed |],
+      "vget_lane", notype_2, [S64; U64];
+    Vget_lane,
+      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
+       Instruction_name ["vmov"]],
+      Use_operands [| Corereg; Qreg; Immed |],
+      "vgetQ_lane", get_lane, pf_su_8_32;
+    Vget_lane,
+      [InfoWord;
+       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
+       Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0);
+       Fixed_core_reg],
+      Use_operands [| Corereg; Qreg; Immed |],
+      "vgetQ_lane", notype_2, [S64; U64];
+
+    (* Set lanes in a vector.  *)
+    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
+                Instruction_name ["vmov"]],
+      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
+      set_lane, pf_su_8_32;
+    Vset_lane, [No_op;
+                Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
+                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
+      set_lane_notype, [S64; U64];
+    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
+                Instruction_name ["vmov"]],
+      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
+      set_lane, pf_su_8_32;
+    Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
+                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
+      set_lane_notype, [S64; U64];
+
+    (* Create vector from literal bit pattern.  *)
+    Vcreate,
+      [Requires_feature "CRYPTO"; No_op], (* Not really, but it can yield various things that are too
+                                   hard for the test generator at this time.  *)
+      Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
+      [P64];
+    Vcreate,
+      [No_op], (* Not really, but it can yield various things that are too
+                  hard for the test generator at this time.  *)
+      Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
+      pf_su_8_64;
+
+    (* Set all lanes to the same value.  *)
+    Vdup_n,
+      [Disassembles_as [Use_operands [| Dreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
+      pf_su_8_32;
+    Vdup_n,
+      [No_op; Requires_feature "CRYPTO";
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
+      [P64];
+    Vdup_n,
+      [No_op;
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
+      [S64; U64];
+    Vdup_n,
+      [No_op; Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| Qreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
+      [P64];
+    Vdup_n,
+      [Disassembles_as [Use_operands [| Qreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
+      pf_su_8_32;
+    Vdup_n,
+      [No_op;
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
+                        Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
+      [S64; U64];
+
+    (* These are just aliases for the above.  *)
+    Vmov_n,
+      [Builtin_name "vdup_n";
+       Disassembles_as [Use_operands [| Dreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Dreg; Corereg |],
+      "vmov_n", bits_1, pf_su_8_32;
+    Vmov_n,
+      [No_op;
+       Builtin_name "vdup_n";
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Dreg; Corereg |],
+      "vmov_n", notype_1, [S64; U64];
+    Vmov_n,
+      [Builtin_name "vdupQ_n";
+       Disassembles_as [Use_operands [| Qreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Qreg; Corereg |],
+      "vmovQ_n", bits_1, pf_su_8_32;
+    Vmov_n,
+      [No_op;
+       Builtin_name "vdupQ_n";
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
+                        Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Qreg; Corereg |],
+      "vmovQ_n", notype_1, [S64; U64];
+
+    (* Duplicate, lane version.  We can't use Use_operands here because the
+       rightmost register (always Dreg) would be picked up by find_key_operand,
+       when we want the leftmost register to be used in this case (otherwise
+       the modes are indistinguishable in neon.md, etc.  *)
+    Vdup_lane,
+      [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
+      Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
+    Vdup_lane,
+      [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)],
+      Unary_scalar Dreg, "vdup_lane", bits_2, [P64];
+    Vdup_lane,
+      [No_op; Const_valuator (fun _ -> 0)],
+      Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
+    Vdup_lane,
+      [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
+      Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
+    Vdup_lane,
+      [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)],
+      Unary_scalar Qreg, "vdupQ_lane", bits_2, [P64];
+    Vdup_lane,
+      [No_op; Const_valuator (fun _ -> 0)],
+      Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
+
+    (* Combining vectors.  *)
+    Vcombine, [Requires_feature "CRYPTO"; No_op],
+      Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
+      [P64];
+    Vcombine, [No_op],
+      Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
+      pf_su_8_64;
+
+    (* Splitting vectors.  *)
+    Vget_high, [Requires_feature "CRYPTO"; No_op],
+      Use_operands [| Dreg; Qreg |], "vget_high",
+      notype_1, [P64];
+    Vget_high, [No_op],
+      Use_operands [| Dreg; Qreg |], "vget_high",
+      notype_1, pf_su_8_64;
+    Vget_low, [Instruction_name ["vmov"];
+               Disassembles_as [Use_operands [| Dreg; Dreg |]];
+	       Fixed_vector_reg],
+      Use_operands [| Dreg; Qreg |], "vget_low",
+      notype_1, pf_su_8_32;
+    Vget_low, [Requires_feature "CRYPTO"; No_op],
+      Use_operands [| Dreg; Qreg |], "vget_low",
+      notype_1, [P64];
+    Vget_low, [No_op],
+      Use_operands [| Dreg; Qreg |], "vget_low",
+      notype_1, [S64; U64];
+
+    (* Conversions.  *)
+    Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+          Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
+    Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+          Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
+    Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+
+    (* Move, narrowing.  *)
+    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
+      Narrow, "vmovn", sign_invar_1, su_16_64;
+    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
+      Narrow, "vqmovn", elts_same_1, su_16_64;
+    Vmovn,
+      [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
+      Narrow, "vqmovun", dst_unsign_1,
+      [S16; S32; S64];
+
+    (* Move, long.  *)
+    Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
+      Long, "vmovl", elts_same_1, su_8_32;
+
+    (* Table lookup.  *)
+    Vtbl 1,
+      [Instruction_name ["vtbl"];
+       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
+    Vtbl 2, [Instruction_name ["vtbl"]],
+      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
+      [U8; S8; P8];
+    Vtbl 3, [Instruction_name ["vtbl"]],
+      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
+      [U8; S8; P8];
+    Vtbl 4, [Instruction_name ["vtbl"]],
+      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
+      [U8; S8; P8];
+
+    (* Extended table lookup.  *)
+    Vtbx 1,
+      [Instruction_name ["vtbx"];
+       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
+    Vtbx 2, [Instruction_name ["vtbx"]],
+      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
+      [U8; S8; P8];
+    Vtbx 3, [Instruction_name ["vtbx"]],
+      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
+      [U8; S8; P8];
+    Vtbx 4, [Instruction_name ["vtbx"]],
+      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
+      [U8; S8; P8];
+
+    (* Multiply, lane.  (note: these were undocumented at the time of
+       writing).  *)
+    Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
+      [S16; S32; U16; U32; F32];
+    Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
+      [S16; S32; U16; U32; F32];
+
+    (* Multiply-accumulate, lane.  *)
+    Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
+      [S16; S32; U16; U32];
+    Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
+      elts_same_io_lane, [S16; S32];
+
+    (* Multiply-subtract, lane.  *)
+    Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
+      [S16; S32; U16; U32];
+    Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
+      elts_same_io_lane, [S16; S32];
+
+    (* Long multiply, lane.  *)
+    Vmull_lane, [],
+      Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
+
+    (* Saturating doubling long multiply, lane.  *)
+    Vqdmull_lane, [Saturating; Doubling],
+      Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
+
+    (* Saturating doubling long multiply high, lane.  *)
+    Vqdmulh_lane, [Saturating; Halving],
+      By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
+    Vqdmulh_lane, [Saturating; Halving],
+      By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
+    Vqdmulh_lane, [Saturating; Halving; Rounding;
+		   Instruction_name ["vqrdmulh"]],
+      By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
+    Vqdmulh_lane, [Saturating; Halving; Rounding;
+		   Instruction_name ["vqrdmulh"]],
+      By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
+
+    (* Vector multiply by scalar.  *)
+    Vmul_n, [InfoWord;
+             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+             Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
+      sign_invar_2, [S16; S32; U16; U32; F32];
+    Vmul_n, [InfoWord;
+             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+             Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
+      sign_invar_2, [S16; S32; U16; U32; F32];
+
+    (* Vector long multiply by scalar.  *)
+    Vmull_n, [Instruction_name ["vmull"];
+              Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
+              Wide_scalar, "vmull_n",
+      elts_same_2, [S16; S32; U16; U32];
+
+    (* Vector saturating doubling long multiply by scalar.  *)
+    Vqdmull_n, [Saturating; Doubling;
+	        Disassembles_as [Use_operands [| Qreg; Dreg;
+						 Element_of_dreg |]]],
+                Wide_scalar, "vqdmull_n",
+      elts_same_2, [S16; S32];
+
+    (* Vector saturating doubling long multiply high by scalar.  *)
+    Vqdmulh_n,
+      [Saturating; Halving; InfoWord;
+       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |],
+      "vqdmulhQ_n", elts_same_2, [S16; S32];
+    Vqdmulh_n,
+      [Saturating; Halving; InfoWord;
+       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |],
+      "vqdmulh_n", elts_same_2, [S16; S32];
+    Vqdmulh_n,
+      [Saturating; Halving; Rounding; InfoWord;
+       Instruction_name ["vqrdmulh"];
+       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |],
+      "vqRdmulhQ_n", elts_same_2, [S16; S32];
+    Vqdmulh_n,
+      [Saturating; Halving; Rounding; InfoWord;
+       Instruction_name ["vqrdmulh"];
+       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |],
+      "vqRdmulh_n", elts_same_2, [S16; S32];
+
+    (* Vector multiply-accumulate by scalar.  *)
+    Vmla_n, [InfoWord;
+             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmla_n, [InfoWord;
+             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
+    Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
+      [S16; S32];
+
+    (* Vector multiply subtract by scalar.  *)
+    Vmls_n, [InfoWord;
+             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmls_n, [InfoWord;
+             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
+    Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
+      [S16; S32];
+
+    (* Vector extract.  *)
+    Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
+      [P64];
+    Vext, [Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
+      pf_su_8_64;
+    Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)],
+      Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
+      [P64];
+    Vext, [Const_valuator (fun _ -> 0)],
+      Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
+      pf_su_8_64;
+
+    (* Reverse elements.  *)
+    Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1,
+      P8 :: P16 :: F32 :: su_8_32;
+    Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1,
+      P8 :: P16 :: F32 :: su_8_32;
+    Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1,
+      [P8; P16; S8; U8; S16; U16];
+    Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1,
+      [P8; P16; S8; U8; S16; U16];
+    Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1,
+      [P8; S8; U8];
+    Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1,
+      [P8; S8; U8];
+
+    (* Bit selection.  *)
+    Vbsl,
+      [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"];
+       Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
+      [P64];
+    Vbsl,
+      [Instruction_name ["vbsl"; "vbit"; "vbif"];
+       Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
+      pf_su_8_64;
+    Vbsl,
+      [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"];
+       Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
+      Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
+      [P64];
+    Vbsl,
+      [Instruction_name ["vbsl"; "vbit"; "vbif"];
+       Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
+      Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
+      pf_su_8_64;
+
+    Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16;
+    Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32;
+    Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
+    (* Zip elements.  *)
+    Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16;
+    Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32;
+    Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; 
+
+    (* Unzip elements.  *)
+    Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2,
+      pf_su_8_32;
+    Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2,
+      pf_su_8_32;
+
+    (* Element/structure loads.  VLD1 variants.  *)
+    Vldx 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
+      [P64];
+    Vldx 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
+      pf_su_8_64;
+    Vldx 1, [Requires_feature "CRYPTO";
+             Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
+      [P64];
+    Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
+      pf_su_8_64;
+
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
+      "vld1_lane", bits_3, pf_su_8_32;
+    Vldx_lane 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]];
+       Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
+      "vld1_lane", bits_3, [P64];
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]];
+       Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
+      "vld1_lane", bits_3, [S64; U64];
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
+      "vld1Q_lane", bits_3, pf_su_8_32;
+    Vldx_lane 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
+      "vld1Q_lane", bits_3, [P64];
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
+      "vld1Q_lane", bits_3, [S64; U64];
+
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
+      bits_1, pf_su_8_32;
+    Vldx_dup 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
+      bits_1, [P64];
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
+      bits_1, [S64; U64];
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
+      bits_1, pf_su_8_32;
+    (* Treated identically to vld1_dup above as we now
+       do a single load followed by a duplicate.  *)
+    Vldx_dup 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
+      bits_1, [P64];
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
+      bits_1, [S64; U64];
+
+    (* VST1 variants.  *)
+    Vstx 1, [Requires_feature "CRYPTO";
+             Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Dreg |], "vst1",
+      store_1, [P64];
+    Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Dreg |], "vst1",
+      store_1, pf_su_8_64;
+    Vstx 1, [Requires_feature "CRYPTO";
+             Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
+      store_1, [P64];
+    Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
+      store_1, pf_su_8_64;
+
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Dreg; Immed |],
+      "vst1_lane", store_3, pf_su_8_32;
+    Vstx_lane 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]];
+       Const_valuator (fun _ -> 0)],
+      Use_operands [| PtrTo Corereg; Dreg; Immed |],
+      "vst1_lane", store_3, [P64];
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]];
+       Const_valuator (fun _ -> 0)],
+      Use_operands [| PtrTo Corereg; Dreg; Immed |],
+      "vst1_lane", store_3, [U64; S64];
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg; Immed |],
+      "vst1Q_lane", store_3, pf_su_8_32;
+    Vstx_lane 1,
+      [Requires_feature "CRYPTO";
+       Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg; Immed |],
+      "vst1Q_lane", store_3, [P64];
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg; Immed |],
+      "vst1Q_lane", store_3, [U64; S64];
+
+    (* VLD2 variants.  *)
+    Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2", bits_1, pf_su_8_32;
+    Vldx 2, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]],
+       Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2", bits_1, [P64];
+    Vldx 2, [Instruction_name ["vld1"]],
+       Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2", bits_1, [S64; U64];
+    Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              CstPtrTo Corereg |];
+                              Use_operands [| VecArray (2, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
+      "vld2Q", bits_1, pf_su_8_32;
+
+    Vldx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
+                      VecArray (2, Dreg); Immed |],
+      "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
+    Vldx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
+ 	              VecArray (2, Qreg); Immed |],
+      "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
+
+    Vldx_dup 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2_dup", bits_1, pf_su_8_32;
+    Vldx_dup 2,
+      [Requires_feature "CRYPTO";
+       Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2_dup", bits_1, [P64];
+    Vldx_dup 2,
+      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2_dup", bits_1, [S64; U64];
+
+    (* VST2 variants.  *)
+    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
+      store_1, pf_su_8_32;
+    Vstx 2, [Requires_feature "CRYPTO";
+             Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
+      store_1, [P64];
+    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
+      store_1, [S64; U64];
+    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      PtrTo Corereg |];
+                              Use_operands [| VecArray (2, Dreg);
+				              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
+      store_1, pf_su_8_32;
+
+    Vstx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
+      store_3, P8 :: P16 :: F32 :: su_8_32;
+    Vstx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
+      store_3, [P16; F32; U16; U32; S16; S32];
+
+    (* VLD3 variants.  *)
+    Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3", bits_1, pf_su_8_32;
+    Vldx 3, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3", bits_1, [P64];
+    Vldx 3, [Instruction_name ["vld1"]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3", bits_1, [S64; U64];
+    Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
+					      CstPtrTo Corereg |];
+                              Use_operands [| VecArray (3, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
+      "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
+
+    Vldx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
+                                     VecArray (3, Dreg); Immed |],
+      "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
+    Vldx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
+				     VecArray (3, Qreg); Immed |],
+      "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
+
+    Vldx_dup 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3_dup", bits_1, pf_su_8_32;
+    Vldx_dup 3,
+      [Requires_feature "CRYPTO";
+       Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3_dup", bits_1, [P64];
+    Vldx_dup 3,
+      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3_dup", bits_1, [S64; U64];
+
+    (* VST3 variants.  *)
+    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
+      store_1, pf_su_8_32;
+    Vstx 3, [Requires_feature "CRYPTO";
+             Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
+      store_1, [P64];
+    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
+      store_1, [S64; U64];
+    Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
+					      PtrTo Corereg |];
+                              Use_operands [| VecArray (3, Dreg);
+					      PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
+      store_1, pf_su_8_32;
+
+    Vstx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
+      store_3, P8 :: P16 :: F32 :: su_8_32;
+    Vstx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
+      store_3, [P16; F32; U16; U32; S16; S32];
+
+    (* VLD4/VST4 variants.  *)
+    Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4", bits_1, pf_su_8_32;
+    Vldx 4, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4", bits_1, [P64];
+    Vldx 4, [Instruction_name ["vld1"]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4", bits_1, [S64; U64];
+    Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+					      CstPtrTo Corereg |];
+                              Use_operands [| VecArray (4, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
+      "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
+
+    Vldx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
+                                     VecArray (4, Dreg); Immed |],
+      "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
+    Vldx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
+   	              VecArray (4, Qreg); Immed |],
+      "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
+
+    Vldx_dup 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4_dup", bits_1, pf_su_8_32;
+    Vldx_dup 4,
+      [Requires_feature "CRYPTO";
+       Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4_dup", bits_1, [P64];
+    Vldx_dup 4,
+      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4_dup", bits_1, [S64; U64];
+
+    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
+      store_1, pf_su_8_32;
+    Vstx 4, [Requires_feature "CRYPTO";
+             Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
+      store_1, [P64];
+    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
+      store_1, [S64; U64];
+    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+					      PtrTo Corereg |];
+                              Use_operands [| VecArray (4, Dreg);
+					      PtrTo Corereg |]]],
+     Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
+      store_1, pf_su_8_32;
+
+    Vstx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
+      store_3, P8 :: P16 :: F32 :: su_8_32;
+    Vstx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
+      store_3, [P16; F32; U16; U32; S16; S32];
+
+    (* Logical operations. And.  *)
+    Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
+    Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
+    Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
+
+    (* Or.  *)
+    Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
+    Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
+    Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
+
+    (* Eor.  *)
+    Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
+    Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
+    Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
+
+    (* Bic (And-not).  *)
+    Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
+    Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
+    Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
+
+    (* Or-not.  *)
+    Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
+    Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
+    Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
+  ]
+
+let type_in_crypto_only t
+  = (t == P64) or (t == P128)
+
+let cross_product s1 s2
+  = List.filter (fun (e, e') -> e <> e')
+                (List.concat (List.map (fun e1 -> List.map (fun e2 -> (e1,e2)) s1) s2))
+
+let reinterp =
+  let elems = P8 :: P16 :: F32 :: P64 :: su_8_64 in
+  let casts = cross_product elems elems in
+  List.map
+    (fun (convto, convfrom) ->
+       Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom)
+                   then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Dreg; Dreg |],
+                   "vreinterpret", conv_1, [Cast (convto, convfrom)])
+    casts
+
+let reinterpq =
+  let elems = P8 :: P16 :: F32 :: P64 :: P128 :: su_8_64 in
+  let casts = cross_product elems elems in
+  List.map
+    (fun (convto, convfrom) ->
+       Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom)
+                   then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Qreg; Qreg |],
+                   "vreinterpretQ", conv_1, [Cast (convto, convfrom)])
+    casts
+
+(* Output routines.  *)
+
+let rec string_of_elt = function
+    S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
+  | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
+  | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
+  | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
+  | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
+  | P64 -> "p64" | P128 -> "p128"
+  | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
+  | NoElts -> failwith "No elts"
+
+let string_of_elt_dots elt =
+  match elt with
+    Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
+  | _ -> string_of_elt elt
+
+let string_of_vectype vt =
+  let rec name affix = function
+    T_int8x8 -> affix "int8x8"
+  | T_int8x16 -> affix "int8x16"
+  | T_int16x4 -> affix "int16x4"
+  | T_int16x8 -> affix "int16x8"
+  | T_int32x2 -> affix "int32x2"
+  | T_int32x4 -> affix "int32x4"
+  | T_int64x1 -> affix "int64x1"
+  | T_int64x2 -> affix "int64x2"
+  | T_uint8x8 -> affix "uint8x8"
+  | T_uint8x16 -> affix "uint8x16"
+  | T_uint16x4 -> affix "uint16x4"
+  | T_uint16x8 -> affix "uint16x8"
+  | T_uint32x2 -> affix "uint32x2"
+  | T_uint32x4 -> affix "uint32x4"
+  | T_uint64x1 -> affix "uint64x1"
+  | T_uint64x2 -> affix "uint64x2"
+  | T_float16x4 -> affix "float16x4"
+  | T_float32x2 -> affix "float32x2"
+  | T_float32x4 -> affix "float32x4"
+  | T_poly8x8 -> affix "poly8x8"
+  | T_poly8x16 -> affix "poly8x16"
+  | T_poly16x4 -> affix "poly16x4"
+  | T_poly16x8 -> affix "poly16x8"
+  | T_int8 -> affix "int8"
+  | T_int16 -> affix "int16"
+  | T_int32 -> affix "int32"
+  | T_int64 -> affix "int64"
+  | T_uint8 -> affix "uint8"
+  | T_uint16 -> affix "uint16"
+  | T_uint32 -> affix "uint32"
+  | T_uint64 -> affix "uint64"
+  | T_poly8 -> affix "poly8"
+  | T_poly16 -> affix "poly16"
+  | T_poly64 -> affix "poly64"
+  | T_poly64x1 -> affix "poly64x1"
+  | T_poly64x2 -> affix "poly64x2"
+  | T_poly128 -> affix "poly128"
+  | T_float16 -> affix "float16"
+  | T_float32 -> affix "float32"
+  | T_immediate _ -> "const int"
+  | T_void -> "void"
+  | T_intQI -> "__builtin_neon_qi"
+  | T_intHI -> "__builtin_neon_hi"
+  | T_intSI -> "__builtin_neon_si"
+  | T_intDI -> "__builtin_neon_di"
+  | T_intTI -> "__builtin_neon_ti"
+  | T_floatHF -> "__builtin_neon_hf"
+  | T_floatSF -> "__builtin_neon_sf"
+  | T_arrayof (num, base) ->
+      let basename = name (fun x -> x) base in
+      affix (Printf.sprintf "%sx%d" basename num)
+  | T_ptrto x ->
+      let basename = name affix x in
+      Printf.sprintf "%s *" basename
+  | T_const x ->
+      let basename = name affix x in
+      Printf.sprintf "const %s" basename
+  in
+    name (fun x -> x ^ "_t") vt
+
+let string_of_inttype = function
+    B_TImode -> "__builtin_neon_ti"
+  | B_EImode -> "__builtin_neon_ei"
+  | B_OImode -> "__builtin_neon_oi"
+  | B_CImode -> "__builtin_neon_ci"
+  | B_XImode -> "__builtin_neon_xi"
+
+let string_of_mode = function
+    V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF  -> "v4hf"  | V2SI -> "v2si"
+  | V2SF -> "v2sf" | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi"
+  | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI   -> "qi"
+  | HI -> "hi" | SI -> "si" | SF -> "sf" | TI -> "ti"
+
+(* Use uppercase chars for letters which form part of the intrinsic name, but
+   should be omitted from the builtin name (the info is passed in an extra
+   argument, instead).  *)
+let intrinsic_name name = String.lowercase name
+
+(* Allow the name of the builtin to be overridden by things (e.g. Flipped)
+   found in the features list.  *)
+let builtin_name features name =
+  let name = List.fold_right
+               (fun el name ->
+                 match el with
+                   Flipped x | Builtin_name x -> x
+                 | _ -> name)
+               features name in
+  let islower x = let str = String.make 1 x in (String.lowercase str) = str
+  and buf = Buffer.create (String.length name) in
+  String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
+  Buffer.contents buf
+
+(* Transform an arity into a list of strings.  *)
+let strings_of_arity a =
+  match a with
+  | Arity0 vt -> [string_of_vectype vt]
+  | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
+  | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
+			       string_of_vectype vt2;
+                               string_of_vectype vt3]
+  | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
+                                    string_of_vectype vt2;
+                                    string_of_vectype vt3;
+                                    string_of_vectype vt4]
+  | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
+                                         string_of_vectype vt2;
+                                         string_of_vectype vt3;
+                                         string_of_vectype vt4;
+                                         string_of_vectype vt5]
+
+(* Suffixes on the end of builtin names that are to be stripped in order
+   to obtain the name used as an instruction.  They are only stripped if
+   preceded immediately by an underscore.  *)
+let suffixes_to_strip = [ "n"; "lane"; "dup" ]
+
+(* Get the possible names of an instruction corresponding to a "name" from the
+   ops table.  This is done by getting the equivalent builtin name and
+   stripping any suffixes from the list at the top of this file, unless
+   the features list presents with an Instruction_name entry, in which
+   case that is used; or unless the features list presents with a Flipped
+   entry, in which case that is used.  If both such entries are present,
+   the first in the list will be chosen.  *)
+let get_insn_names features name =
+  let names = try
+  begin
+    match List.find (fun feature -> match feature with
+                                      Instruction_name _ -> true
+				    | Flipped _ -> true
+				    | _ -> false) features
+    with
+      Instruction_name names -> names
+    | Flipped name -> [name]
+    | _ -> assert false
+  end
+  with Not_found -> [builtin_name features name]
+  in
+  begin
+    List.map (fun name' ->
+      try
+        let underscore = String.rindex name' '_' in
+        let our_suffix = String.sub name' (underscore + 1)
+                                    ((String.length name') - underscore - 1)
+        in
+          let rec strip remaining_suffixes =
+            match remaining_suffixes with
+              [] -> name'
+            | s::ss when our_suffix = s -> String.sub name' 0 underscore
+            | _::ss -> strip ss
+          in
+            strip suffixes_to_strip
+      with (Not_found | Invalid_argument _) -> name') names
+  end
+
+(* Apply a function to each element of a list and then comma-separate
+   the resulting strings.  *)
+let rec commas f elts acc =
+  match elts with
+    [] -> acc
+  | [elt] -> acc ^ (f elt)
+  | elt::elts ->
+    commas f elts (acc ^ (f elt) ^ ", ")
+
+(* Given a list of features and the shape specified in the "ops" table, apply
+   a function to each possible shape that the instruction may have.
+   By default, this is the "shape" entry in "ops".  If the features list
+   contains a Disassembles_as entry, the shapes contained in that entry are
+   mapped to corresponding outputs and returned in a list.  If there is more
+   than one Disassembles_as entry, only the first is used.  *)
+let analyze_all_shapes features shape f =
+  try
+    match List.find (fun feature ->
+                       match feature with Disassembles_as _ -> true
+                                        | _ -> false)
+                    features with
+      Disassembles_as shapes -> List.map f shapes
+    | _ -> assert false
+  with Not_found -> [f shape]
+
+(* The crypto intrinsics have unconventional shapes and are not that
+   numerous to be worth the trouble of encoding here.  We implement them
+   explicitly here.  *)
+let crypto_intrinsics =
+"
+#ifdef __ARM_FEATURE_CRYPTO
+
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vldrq_p128 (poly128_t const * __ptr)
+{
+#ifdef __ARM_BIG_ENDIAN
+  poly64_t* __ptmp = (poly64_t*) __ptr;
+  poly64_t __d0 = vld1_p64 (__ptmp);
+  poly64_t __d1 = vld1_p64 (__ptmp + 1);
+  return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0));
+#else
+  return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr));
+#endif
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vstrq_p128 (poly128_t * __ptr, poly128_t __val)
+{
+#ifdef __ARM_BIG_ENDIAN
+  poly64x2_t __tmp = vreinterpretq_p64_p128 (__val);
+  poly64_t __d0 = vget_high_p64 (__tmp);
+  poly64_t __d1 = vget_low_p64 (__tmp);
+  vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1));
+#else
+  vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val));
+#endif
+}
+
+/* The vceq_p64 intrinsic does not map to a single instruction.
+   Instead we emulate it by performing a 32-bit variant of the vceq
+   and applying a pairwise min reduction to the result.
+   vceq_u32 will produce two 32-bit halves, each of which will contain either
+   all ones or all zeros depending on whether the corresponding 32-bit
+   halves of the poly64_t were equal.  The whole poly64_t values are equal
+   if and only if both halves are equal, i.e. vceq_u32 returns all ones.
+   If the result is all zeroes for any half then the whole result is zeroes.
+   This is what the pairwise min reduction achieves.  */
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  uint32x2_t __t_a = vreinterpret_u32_p64 (__a);
+  uint32x2_t __t_b = vreinterpret_u32_p64 (__b);
+  uint32x2_t __c = vceq_u32 (__t_a, __t_b);
+  uint32x2_t __m = vpmin_u32 (__c, __c);
+  return vreinterpret_u64_u32 (__m);
+}
+
+/* The vtst_p64 intrinsic does not map to a single instruction.
+   We emulate it in way similar to vceq_p64 above but here we do
+   a reduction with max since if any two corresponding bits
+   in the two poly64_t's match, then the whole result must be all ones.  */
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  uint32x2_t __t_a = vreinterpret_u32_p64 (__a);
+  uint32x2_t __t_b = vreinterpret_u32_p64 (__b);
+  uint32x2_t __c = vtst_u32 (__t_a, __t_b);
+  uint32x2_t __m = vpmax_u32 (__c, __c);
+  return vreinterpret_u64_u32 (__m);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
+{
+  return __builtin_arm_crypto_aese (__data, __key);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaesdq_u8 (uint8x16_t __data, uint8x16_t __key)
+{
+  return __builtin_arm_crypto_aesd (__data, __key);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaesmcq_u8 (uint8x16_t __data)
+{
+  return __builtin_arm_crypto_aesmc (__data);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaesimcq_u8 (uint8x16_t __data)
+{
+  return __builtin_arm_crypto_aesimc (__data);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vsha1h_u32 (uint32_t __hash_e)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  __t = __builtin_arm_crypto_sha1h (__t);
+  return vgetq_lane_u32 (__t, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  uint32x4_t __t = vdupq_n_u32 (0);
+  __t = vsetq_lane_u32 (__hash_e, __t, 0);
+  return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11)
+{
+  return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15)
+{
+  return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
+{
+  return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
+{
+  return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7)
+{
+  return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15)
+{
+  return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15);
+}
+
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vmull_p64 (poly64_t __a, poly64_t __b)
+{
+  return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b);
+}
+
+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
+vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+  poly64_t __t1 = vget_high_p64 (__a);
+  poly64_t __t2 = vget_high_p64 (__b);
+
+  return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2);
+}
+
+#endif
+"
diff --git a/gcc-4.9/gcc/config/arm/netbsd-elf.h b/gcc-4.9/gcc/config/arm/netbsd-elf.h
new file mode 100644
index 000000000..9deda9679
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/netbsd-elf.h
@@ -0,0 +1,154 @@
+/* Definitions of target machine for GNU compiler, NetBSD/arm ELF version.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+
+/* arm.h defaults to ARM6 CPU.  */
+
+/* This defaults us to little-endian.  */
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+#undef MULTILIB_DEFAULTS
+
+/* Default it to use ATPCS with soft-VFP.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT			\
+  (MASK_APCS_FRAME			\
+   | TARGET_ENDIAN_DEFAULT)
+
+#undef ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_ATPCS
+
+#define TARGET_OS_CPP_BUILTINS()	\
+  do					\
+    {					\
+      NETBSD_OS_CPP_BUILTINS_ELF();	\
+    }					\
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC
+
+#undef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC	\
+  "-matpcs %{fpic|fpie:-k} %{fPIC|fPIE:-k}"
+
+/* Default to full VFP if -mfloat-abi=hard is specified.  */
+#undef SUBTARGET_ASM_FLOAT_SPEC
+#define SUBTARGET_ASM_FLOAT_SPEC	\
+  "%{mfloat-abi=hard:{!mfpu=*:-mfpu=vfp}}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS				\
+  { "subtarget_extra_asm_spec",	SUBTARGET_EXTRA_ASM_SPEC }, \
+  { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF },	\
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-X %{mbig-endian:-EB} %{mlittle-endian:-EL} \
+   %(netbsd_link_spec)"
+
+/* Make GCC agree with <machine/ansi.h>.  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* We don't have any limit on the length as out debugger is GDB.  */
+#undef DBX_CONTIN_LENGTH
+
+/* NetBSD does its profiling differently to the Acorn compiler. We      
+   don't need a word following the mcount call; and to skip it
+   requires either an assembly stub or use of fomit-frame-pointer when  
+   compiling the profiling functions.  Since we break Acorn CC
+   compatibility below a little more won't hurt.  */
+   
+#undef ARM_FUNCTION_PROFILER                                  
+#define ARM_FUNCTION_PROFILER(STREAM,LABELNO)		\
+{							\
+  asm_fprintf (STREAM, "\tmov\t%Rip, %Rlr\n");		\
+  asm_fprintf (STREAM, "\tbl\t__mcount%s\n",		\
+	       (TARGET_ARM && NEED_PLT_RELOC)		\
+	       ? "(PLT)" : "");				\
+}
+
+/* VERY BIG NOTE: Change of structure alignment for NetBSD/arm.
+   There are consequences you should be aware of...
+
+   Normally GCC/arm uses a structure alignment of 32 for compatibility
+   with armcc.  This means that structures are padded to a word
+   boundary.  However this causes problems with bugged NetBSD kernel
+   code (possibly userland code as well - I have not checked every
+   binary).  The nature of this bugged code is to rely on sizeof()
+   returning the correct size of various structures rounded to the  
+   nearest byte (SCSI and ether code are two examples, the vm system
+   is another).  This code breaks when the structure alignment is 32
+   as sizeof() will report a word=rounded size.  By changing the        
+   structure alignment to 8. GCC will conform to what is expected by
+   NetBSD.
+   
+   This has several side effects that should be considered.
+   1. Structures will only be aligned to the size of the largest member.
+      i.e. structures containing only bytes will be byte aligned.
+           structures containing shorts will be half word aligned.          
+           structures containing ints will be word aligned.                 
+  
+      This means structures should be padded to a word boundary if
+      alignment of 32 is required for byte structures etc.
+       
+   2. A potential performance penalty may exist if strings are no longer
+      word aligned.  GCC will not be able to use word load/stores to copy
+      short strings.
+
+   This modification is not encouraged but with the present state of the
+   NetBSD source tree it is currently the only solution that meets the
+   requirements.  */
+
+#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8
+
+/* Clear the instruction cache from `BEG' to `END'.  This makes a
+   call to the ARM_SYNC_ICACHE architecture specific syscall.  */
+#define CLEAR_INSN_CACHE(BEG, END)					\
+do									\
+  {									\
+    extern int sysarch(int number, void *args);				\
+    struct								\
+      {									\
+	unsigned int addr;						\
+	int          len;						\
+      } s;								\
+    s.addr = (unsigned int)(BEG);					\
+    s.len = (END) - (BEG);						\
+    (void) sysarch (0, &s);						\
+  }									\
+while (0)
+
+#undef FPUTYPE_DEFAULT
+#define FPUTYPE_DEFAULT "vfp"
+
diff --git a/gcc-4.9/gcc/config/arm/predicates.md b/gcc-4.9/gcc/config/arm/predicates.md
new file mode 100644
index 000000000..ce5c9a830
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/predicates.md
@@ -0,0 +1,677 @@
+;; Predicate definitions for ARM and Thumb
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "s_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  /* We don't consider registers whose class is NO_REGS
+     to be a register operand.  */
+  /* XXX might have to check for lo regs only for thumb ??? */
+  return (REG_P (op)
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
+})
+
+(define_predicate "imm_for_neon_inv_logic_operand"
+  (match_code "const_vector")
+{
+  return (TARGET_NEON
+          && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
+})
+
+(define_predicate "neon_inv_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "imm_for_neon_logic_operand"
+  (match_code "const_vector")
+{
+  return (TARGET_NEON
+          && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
+})
+
+(define_predicate "neon_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
+;; Any general register.
+(define_predicate "arm_hard_general_register_operand"
+  (match_code "reg")
+{
+  return REGNO (op) <= LAST_ARM_REGNUM;
+})
+
+;; A low register.
+(define_predicate "low_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) <= LAST_LO_REGNUM")))
+
+;; A low register or const_int.
+(define_predicate "low_reg_or_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "low_register_operand")))
+
+;; Any core register, or any pseudo.  */ 
+(define_predicate "arm_general_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (REG_P (op)
+	  && (REGNO (op) <= LAST_ARM_REGNUM
+	      || REGNO (op) >= FIRST_PSEUDO_REGISTER));
+})
+
+(define_predicate "vfp_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* We don't consider registers whose class is NO_REGS
+     to be a register operand.  */
+  return (REG_P (op)
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == VFP_D0_D7_REGS
+	      || REGNO_REG_CLASS (REGNO (op)) == VFP_LO_REGS
+	      || (TARGET_VFPD32
+		  && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS)));
+})
+
+(define_predicate "vfp_hard_register_operand"
+  (match_code "reg")
+{
+  return (IS_VFP_REGNUM (REGNO (op)));
+})
+
+(define_predicate "zero_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Match a register, or zero in the appropriate mode.
+(define_predicate "reg_or_zero_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "zero_operand")))
+
+(define_special_predicate "subreg_lowpart_operator"
+  (and (match_code "subreg")
+       (match_test "subreg_lowpart_p (op)")))
+
+;; Reg, subreg(reg) or const_int.
+(define_predicate "reg_or_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "arm_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "const_ok_for_arm (INTVAL (op))")))
+
+;; A constant value which fits into two instructions, each taking
+;; an arithmetic constant operand for one of the words.
+(define_predicate "arm_immediate_di_operand"
+  (and (match_code "const_int,const_double")
+       (match_test "arm_const_double_by_immediates (op)")))
+
+(define_predicate "arm_neg_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "const_ok_for_arm (-INTVAL (op))")))
+
+(define_predicate "arm_not_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "const_ok_for_arm (~INTVAL (op))")))
+
+(define_predicate "const0_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 0")))
+
+;; Something valid on the RHS of an ARM data-processing instruction
+(define_predicate "arm_rhs_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "arm_immediate_operand")))
+
+(define_predicate "arm_rhsm_operand"
+  (ior (match_operand 0 "arm_rhs_operand")
+       (match_operand 0 "memory_operand")))
+
+;; This doesn't have to do much because the constant is already checked
+;; in the shift_operator predicate.
+(define_predicate "shift_amount_operand"
+  (ior (and (match_test "TARGET_ARM")
+	    (match_operand 0 "s_register_operand"))
+       (match_operand 0 "const_int_operand")))
+
+(define_predicate "const_neon_scalar_shift_amount_operand"
+  (and (match_code "const_int")
+       (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode)
+	&& ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0")))
+
+(define_predicate "ldrd_strd_offset_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "TARGET_LDRD && offset_ok_for_ldrd_strd (INTVAL (op))")))
+
+(define_predicate "arm_add_operand"
+  (ior (match_operand 0 "arm_rhs_operand")
+       (match_operand 0 "arm_neg_immediate_operand")))
+
+(define_predicate "arm_anddi_operand_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
+       (match_operand 0 "neon_inv_logic_op2")))
+
+(define_predicate "arm_iordi_operand_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)"))
+       (match_operand 0 "neon_logic_op2")))
+
+(define_predicate "arm_xordi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)"))))
+
+(define_predicate "arm_adddi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
+
+(define_predicate "arm_addimm_operand"
+  (ior (match_operand 0 "arm_immediate_operand")
+       (match_operand 0 "arm_neg_immediate_operand")))
+
+(define_predicate "arm_not_operand"
+  (ior (match_operand 0 "arm_rhs_operand")
+       (match_operand 0 "arm_not_immediate_operand")))
+
+(define_predicate "arm_di_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "arm_immediate_di_operand")))
+
+;; True if the operand is a memory reference which contains an
+;; offsettable address.
+(define_predicate "offsettable_memory_operand"
+  (and (match_code "mem")
+       (match_test
+        "offsettable_address_p (reload_completed | reload_in_progress,
+				mode, XEXP (op, 0))")))
+
+;; True if the operand is a memory operand that does not have an
+;; automodified base register (and thus will not generate output reloads).
+(define_predicate "call_memory_operand"
+  (and (match_code "mem")
+       (and (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0)))
+			 != RTX_AUTOINC")
+	    (match_operand 0 "memory_operand"))))
+
+(define_predicate "arm_reload_memory_operand"
+  (and (match_code "mem,reg,subreg")
+       (match_test "(!CONSTANT_P (op)
+		     && (true_regnum(op) == -1
+			 || (REG_P (op)
+			     && REGNO (op) >= FIRST_PSEUDO_REGISTER)))")))
+
+(define_predicate "vfp_compare_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_double")
+	    (match_test "arm_const_double_rtx (op)"))))
+
+(define_predicate "arm_float_compare_operand"
+  (if_then_else (match_test "TARGET_VFP")
+		(match_operand 0 "vfp_compare_operand")
+		(match_operand 0 "s_register_operand")))
+
+;; True for valid index operands.
+(define_predicate "index_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_operand 0 "immediate_operand")
+	    (match_test "(!CONST_INT_P (op)
+			  || (INTVAL (op) < 4096 && INTVAL (op) > -4096))"))))
+
+;; True for operators that can be combined with a shift in ARM state.
+(define_special_predicate "shiftable_operator"
+  (and (match_code "plus,minus,ior,xor,and")
+       (match_test "mode == GET_MODE (op)")))
+
+(define_special_predicate "shiftable_operator_strict_it"
+  (and (match_code "plus,and")
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for logical binary operators.
+(define_special_predicate "logical_binary_operator"
+  (and (match_code "ior,xor,and")
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for commutative operators
+(define_special_predicate "commutative_binary_operator"
+  (and (match_code "ior,xor,and,plus")
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for shift operators.
+;; Notes:
+;;  * mult is only permitted with a constant shift amount
+;;  * patterns that permit register shift amounts only in ARM mode use
+;;    shift_amount_operand, patterns that always allow registers do not,
+;;    so we don't have to worry about that sort of thing here.
+(define_special_predicate "shift_operator"
+  (and (ior (ior (and (match_code "mult")
+		      (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
+		 (and (match_code "rotate")
+		      (match_test "CONST_INT_P (XEXP (op, 1))
+				   && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+	    (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
+		 (match_test "!CONST_INT_P (XEXP (op, 1))
+			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for shift operators which can be used with saturation instructions.
+(define_special_predicate "sat_shift_operator"
+  (and (ior (and (match_code "mult")
+                 (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
+            (and (match_code "ashift,ashiftrt")
+                 (match_test "CONST_INT_P (XEXP (op, 1))
+		              && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1)) < 32)")))
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for MULT, to identify which variant of shift_operator is in use.
+(define_special_predicate "mult_operator"
+  (match_code "mult"))
+
+;; True for operators that have 16-bit thumb variants.  */
+(define_special_predicate "thumb_16bit_operator"
+  (match_code "plus,minus,and,ior,xor"))
+
+;; True for EQ & NE
+(define_special_predicate "equality_operator"
+  (match_code "eq,ne"))
+
+;; True for integer comparisons and, if FP is active, for comparisons
+;; other than LTGT or UNEQ.
+(define_special_predicate "expandable_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+	       unordered,ordered,unlt,unle,unge,ungt"))
+
+;; Likewise, but only accept comparisons that are directly supported
+;; by ARM condition codes.
+(define_special_predicate "arm_comparison_operator"
+  (and (match_operand 0 "expandable_comparison_operator")
+       (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
+
+(define_special_predicate "lt_ge_comparison_operator"
+  (match_code "lt,ge"))
+
+;; The vsel instruction only accepts the ARM condition codes listed below.
+(define_special_predicate "arm_vsel_comparison_operator"
+  (and (match_operand 0 "expandable_comparison_operator")
+       (match_test "maybe_get_arm_condition_code (op) == ARM_GE
+                    || maybe_get_arm_condition_code (op) == ARM_GT
+                    || maybe_get_arm_condition_code (op) == ARM_EQ
+                    || maybe_get_arm_condition_code (op) == ARM_VS
+                    || maybe_get_arm_condition_code (op) == ARM_LT
+                    || maybe_get_arm_condition_code (op) == ARM_LE
+                    || maybe_get_arm_condition_code (op) == ARM_NE
+                    || maybe_get_arm_condition_code (op) == ARM_VC")))
+
+(define_special_predicate "arm_cond_move_operator"
+  (if_then_else (match_test "arm_restrict_it")
+                (and (match_test "TARGET_FPU_ARMV8")
+                     (match_operand 0 "arm_vsel_comparison_operator"))
+                (match_operand 0 "expandable_comparison_operator")))
+
+(define_special_predicate "noov_comparison_operator"
+  (match_code "lt,ge,eq,ne"))
+
+(define_special_predicate "minmax_operator"
+  (and (match_code "smin,smax,umin,umax")
+       (match_test "mode == GET_MODE (op)")))
+
+(define_special_predicate "cc_register"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) == CC_REGNUM")
+	    (ior (match_test "mode == GET_MODE (op)")
+		 (match_test "mode == VOIDmode && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))))
+
+(define_special_predicate "dominant_cc_register"
+  (match_code "reg")
+{
+  if (mode == VOIDmode)
+    {
+      mode = GET_MODE (op);
+      
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+	return false;
+    }
+
+  return (cc_register (op, mode)
+	  && (mode == CC_DNEmode
+	     || mode == CC_DEQmode
+	     || mode == CC_DLEmode
+	     || mode == CC_DLTmode
+	     || mode == CC_DGEmode
+	     || mode == CC_DGTmode
+	     || mode == CC_DLEUmode
+	     || mode == CC_DLTUmode
+	     || mode == CC_DGEUmode
+	     || mode == CC_DGTUmode));
+})
+
+(define_special_predicate "arm_extendqisi_mem_op"
+  (and (match_operand 0 "memory_operand")
+       (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode,
+                                                                 XEXP (op, 0),
+						                 SIGN_EXTEND,
+								 0)
+                               : memory_address_p (QImode, XEXP (op, 0))")))
+
+(define_special_predicate "arm_reg_or_extendqisi_mem_op"
+  (ior (match_operand 0 "arm_extendqisi_mem_op")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "power_of_two_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT value = INTVAL (op) & 0xffffffff;
+
+  return value != 0 && (value & (value - 1)) == 0;
+})
+
+(define_predicate "nonimmediate_di_operand"
+  (match_code "reg,subreg,mem")
+{
+   if (s_register_operand (op, mode))
+     return true;
+
+   if (GET_CODE (op) == SUBREG)
+     op = SUBREG_REG (op);
+
+   return MEM_P (op) && memory_address_p (DImode, XEXP (op, 0));
+})
+
+(define_predicate "di_operand"
+  (ior (match_code "const_int,const_double")
+       (and (match_code "reg,subreg,mem")
+	    (match_operand 0 "nonimmediate_di_operand"))))
+
+(define_predicate "nonimmediate_soft_df_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (s_register_operand (op, mode))
+    return true;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return MEM_P (op) && memory_address_p (DFmode, XEXP (op, 0));
+})
+
+(define_predicate "soft_df_operand"
+  (ior (match_code "const_double")
+       (and (match_code "reg,subreg,mem")
+	    (match_operand 0 "nonimmediate_soft_df_operand"))))
+
+(define_special_predicate "load_multiple_operation"
+  (match_code "parallel")
+{
+ return ldm_stm_operation_p (op, /*load=*/true, SImode,
+                                 /*consecutive=*/false,
+                                 /*return_pc=*/false);
+})
+
+(define_special_predicate "store_multiple_operation"
+  (match_code "parallel")
+{
+ return ldm_stm_operation_p (op, /*load=*/false, SImode,
+                                 /*consecutive=*/false,
+                                 /*return_pc=*/false);
+})
+
+(define_special_predicate "pop_multiple_return"
+  (match_code "parallel")
+{
+ return ldm_stm_operation_p (op, /*load=*/true, SImode,
+                                 /*consecutive=*/false,
+                                 /*return_pc=*/true);
+})
+
+(define_special_predicate "pop_multiple_fp"
+  (match_code "parallel")
+{
+ return ldm_stm_operation_p (op, /*load=*/true, DFmode,
+                                 /*consecutive=*/true,
+                                 /*return_pc=*/false);
+})
+
+(define_special_predicate "multi_register_push"
+  (match_code "parallel")
+{
+  if ((GET_CODE (XVECEXP (op, 0, 0)) != SET)
+      || (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC)
+      || (XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPEC_PUSH_MULT))
+    return false;
+
+  return true;
+})
+
+(define_predicate "push_mult_memory_operand"
+  (match_code "mem")
+{
+  /* ??? Given how PUSH_MULT is generated in the prologues, is there
+     any point in testing for thumb1 specially?  All of the variants
+     use the same form.  */
+  if (TARGET_THUMB1)
+    {
+      /* ??? No attempt is made to represent STMIA, or validate that
+	 the stack adjustment matches the register count.  This is
+	 true of the ARM/Thumb2 path as well.  */
+      rtx x = XEXP (op, 0);
+      if (GET_CODE (x) != PRE_MODIFY)
+	return false;
+      if (XEXP (x, 0) != stack_pointer_rtx)
+	return false;
+      x = XEXP (x, 1);
+      if (GET_CODE (x) != PLUS)
+	return false;
+      if (XEXP (x, 0) != stack_pointer_rtx)
+	return false;
+      return CONST_INT_P (XEXP (x, 1));
+    }
+
+  /* ARM and Thumb2 handle pre-modify in their legitimate_address.  */
+  return memory_operand (op, mode);
+})
+
+;;-------------------------------------------------------------------------
+;;
+;; Thumb predicates
+;;
+
+(define_predicate "thumb1_cmp_operand"
+  (ior (and (match_code "reg,subreg")
+	    (match_operand 0 "s_register_operand"))
+       (and (match_code "const_int")
+	    (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 256"))))
+
+(define_predicate "thumb1_cmpneg_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) < 0 && INTVAL (op) > -256")))
+
+;; Return TRUE if a result can be stored in OP without clobbering the
+;; condition code register.  Prior to reload we only accept a
+;; register.  After reload we have to be able to handle memory as
+;; well, since a pseudo may not get a hard reg and reload cannot
+;; handle output-reloads on jump insns.
+
+;; We could possibly handle mem before reload as well, but that might
+;; complicate things with the need to handle increment
+;; side-effects.
+(define_predicate "thumb_cbrch_target_operand"
+  (and (match_code "reg,subreg,mem")
+       (ior (match_operand 0 "s_register_operand")
+	    (and (match_test "reload_in_progress || reload_completed")
+		 (match_operand 0 "memory_operand")))))
+
+;;-------------------------------------------------------------------------
+;;
+;; iWMMXt predicates
+;;
+
+(define_predicate "imm_or_reg_operand"
+  (ior (match_operand 0 "immediate_operand")
+       (match_operand 0 "register_operand")))
+
+;; Neon predicates
+
+(define_predicate "const_multiple_of_8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT val = INTVAL (op);
+  return (val & 7) == 0;
+})
+
+(define_predicate "imm_for_neon_mov_operand"
+  (match_code "const_vector,const_int")
+{
+  return neon_immediate_valid_for_move (op, mode, NULL, NULL);
+})
+
+(define_predicate "imm_for_neon_lshift_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true);
+})
+
+(define_predicate "imm_for_neon_rshift_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false);
+})
+
+(define_predicate "imm_lshift_or_reg_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "imm_for_neon_lshift_operand")))
+
+(define_predicate "imm_rshift_or_reg_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "imm_for_neon_rshift_operand")))
+
+;; Predicates for named expanders that overlap multiple ISAs.
+
+(define_predicate "cmpdi_operand"
+  (and (match_test "TARGET_32BIT")
+       (match_operand 0 "arm_di_operand")))
+
+;; True if the operand is memory reference suitable for a ldrex/strex.
+(define_predicate "arm_sync_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_code "reg" "0")))
+
+;; Predicates for parallel expanders based on mode.
+(define_special_predicate "vect_par_constant_high" 
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  int i;
+  int base = GET_MODE_NUNITS (mode);
+
+  if ((count < 1)
+      || (count != base/2))
+    return false;
+    
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  for (i = 0; i < count; i++)
+   {
+     rtx elt = XVECEXP (op, 0, i);
+     int val;
+
+     if (!CONST_INT_P (elt))
+       return false;
+
+     val = INTVAL (elt);
+     if (val != (base/2) + i)
+       return false;
+   }
+  return true; 
+})
+
+(define_special_predicate "vect_par_constant_low"
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  int i;
+  int base = GET_MODE_NUNITS (mode);
+
+  if ((count < 1)
+      || (count != base/2))
+    return false;
+    
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  for (i = 0; i < count; i++)
+   {
+     rtx elt = XVECEXP (op, 0, i);
+     int val;
+
+     if (!CONST_INT_P (elt))
+       return false;
+
+     val = INTVAL (elt);
+     if (val != i)
+       return false;
+   } 
+  return true; 
+})
+
+(define_predicate "const_double_vcvt_power_of_two_reciprocal"
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP
+                   && vfp3_const_double_for_fract_bits (op)")))
+
+(define_predicate "const_double_vcvt_power_of_two"
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP
+                   && vfp3_const_double_for_bits (op)")))
+
+(define_predicate "neon_struct_operand"
+  (and (match_code "mem")
+       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
+
+(define_predicate "neon_permissive_struct_operand"
+  (and (match_code "mem")
+       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, false)")))
+
+(define_predicate "neon_perm_struct_or_reg_operand"
+  (ior (match_operand 0 "neon_permissive_struct_operand")
+       (match_operand 0 "s_register_operand")))
+
+(define_special_predicate "add_operator"
+  (match_code "plus"))
+
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+(define_predicate "call_insn_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "s_register_operand")))
diff --git a/gcc-4.9/gcc/config/arm/rtems-eabi.h b/gcc-4.9/gcc/config/arm/rtems-eabi.h
new file mode 100644
index 000000000..4bdcf0d87
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/rtems-eabi.h
@@ -0,0 +1,29 @@
+/* Definitions for RTEMS based ARM systems using EABI.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+ 
+   This file is part of GCC.
+ 
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+ 
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define HAS_INIT_SECTION
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+	TARGET_BPABI_CPP_BUILTINS();    	\
+    } while (0)
diff --git a/gcc-4.9/gcc/config/arm/semi.h b/gcc-4.9/gcc/config/arm/semi.h
new file mode 100644
index 000000000..f937e47b9
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/semi.h
@@ -0,0 +1,68 @@
+/* Definitions of target machine for GNU compiler.  ARM on semi-hosted platform
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STARTFILE_SPEC  "crt0.o%s"
+
+#ifndef LIB_SPEC
+#define LIB_SPEC "-lc"
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "-D__semi__"
+#endif
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} -X"
+#endif
+
+#ifndef TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD
+#endif
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "subtarget_extra_asm_spec",	SUBTARGET_EXTRA_ASM_SPEC },
+#endif
+
+#ifndef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC ""
+#endif
+
+/* The compiler supports PIC code generation, even though the binutils
+   may not.  If we are asked to compile position independent code, we
+   always pass -k to the assembler.  If it doesn't recognize it, then
+   it will barf, which probably means that it doesn't know how to
+   assemble PIC code.  This is what we want, since otherwise tools
+   may incorrectly assume we support PIC compilation even if the
+   binutils can't.  */
+#ifndef ASM_SPEC
+#define ASM_SPEC "\
+%{fpic|fpie: -k} %{fPIC|fPIE: -k} \
+%{mbig-endian:-EB} \
+%(arm_cpu_spec) \
+%{mapcs-float:-mfloat} \
+%{mfloat-abi=*} %{mfpu=*} \
+%{mthumb-interwork:-mthumb-interwork} \
+%(subtarget_extra_asm_spec)"
+#endif
diff --git a/gcc-4.9/gcc/config/arm/symbian.h b/gcc-4.9/gcc/config/arm/symbian.h
new file mode 100644
index 000000000..777742d6e
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/symbian.h
@@ -0,0 +1,101 @@
+/* Configuration file for Symbian OS on ARM processors.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC   
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Do not expand builtin functions (unless explicitly prefixed with
+   "__builtin").  Symbian OS code relies on properties of the standard
+   library that go beyond those guaranteed by the ANSI/ISO standard.
+   For example, "memcpy" works even with overlapping memory, like
+   "memmove".  We cannot simply set flag_no_builtin in arm.c because
+   (a) flag_no_builtin is not declared in language-independent code,
+   and (b) that would prevent users from explicitly overriding the
+   default with -fbuiltin, which may sometimes be useful.
+
+   Make all symbols hidden by default.  Symbian OS expects that all
+   exported symbols will be explicitly marked with
+   "__declspec(dllexport)".  
+
+   Enumeration types use 4 bytes, even if the enumerals are small,
+   unless explicitly overridden.
+
+   The wchar_t type is a 2-byte type, unless explicitly
+   overridden.  */
+#define CC1_SPEC						\
+  "%{!fbuiltin:%{!fno-builtin:-fno-builtin}} "			\
+  "%{!fvisibility=*:-fvisibility=hidden} "			\
+  "%{!fshort-enums:%{!fno-short-enums:-fno-short-enums}} "	\
+  "%{!fshort-wchar:%{!fno-short-wchar:-fshort-wchar}} "
+#define CC1PLUS_SPEC CC1_SPEC
+
+/* Symbian OS does not use crt*.o, unlike the generic unknown-elf
+   configuration.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+
+/* Do not link with any libraries by default.  On Symbian OS, the user
+   must supply all required libraries on the command line.  */
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+/* Support the "dllimport" attribute.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+/* Symbian OS assumes ARM V5 or above.  Since -march=armv5 is
+   equivalent to making the ARM 10TDMI core the default, we can set
+   SUBTARGET_CPU_DEFAULT and get an equivalent effect.  */
+#undef SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi
+
+/* The assembler should assume VFP FPU format, and armv5t.  */
+#undef SUBTARGET_ASM_FLOAT_SPEC
+#define SUBTARGET_ASM_FLOAT_SPEC \
+  "%{!mfpu=*:-mfpu=vfp} %{!mcpu=*:%{!march=*:-march=armv5t}}"
+  
+/* Define the __symbian__ macro.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+      /* Include the default BPABI stuff.  */			\
+      TARGET_BPABI_CPP_BUILTINS ();				\
+      /* Symbian OS does not support merging symbols across DLL	\
+	 boundaries.  */					\
+      builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0");		\
+      builtin_define ("__symbian__");				\
+    }								\
+  while (false)
+
+/* On SymbianOS, these sections are not writable, so we use "a",
+   rather than "aw", for the section attributes.  */
+#undef ARM_EABI_CTORS_SECTION_OP
+#define ARM_EABI_CTORS_SECTION_OP \
+  "\t.section\t.init_array,\"a\",%init_array"
+#undef ARM_EABI_DTORS_SECTION_OP
+#define ARM_EABI_DTORS_SECTION_OP \
+  "\t.section\t.fini_array,\"a\",%fini_array"
+
+/* SymbianOS cannot merge entities with vague linkage at runtime.  */
+#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
+
+#define ARM_TARGET2_DWARF_FORMAT DW_EH_PE_absptr
diff --git a/gcc-4.9/gcc/config/arm/sync.md b/gcc-4.9/gcc/config/arm/sync.md
new file mode 100644
index 000000000..aa8e9abcf
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/sync.md
@@ -0,0 +1,472 @@
+;; Machine description for ARM processor synchronization primitives.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
+;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_mode_attr sync_predtab
+  [(QI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER")
+   (HI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER")
+   (SI "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER")
+   (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN
+	&& TARGET_HAVE_MEMORY_BARRIER")])
+
+(define_code_iterator syncop [plus minus ior xor and])
+
+(define_code_attr sync_optab
+  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+
+(define_mode_attr sync_sfx
+  [(QI "b") (HI "h") (SI "") (DI "d")])
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_HAVE_MEMORY_BARRIER"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_HAVE_MEMORY_BARRIER"
+  {
+    if (TARGET_HAVE_DMB)
+      {
+	/* Note we issue a system level barrier. We should consider issuing
+	   a inner shareabilty zone barrier here instead, ie. "DMB ISH".  */
+	/* ??? Differentiate based on SEQ_CST vs less strict?  */
+	return "dmb\tsy";
+      }
+
+    if (TARGET_HAVE_DMB_MCR)
+      return "mcr\tp15, 0, r0, c7, c10, 5";
+
+    gcc_unreachable ();
+  }
+  [(set_attr "length" "4")
+   (set_attr "conds" "unconditional")
+   (set_attr "predicable" "no")])
+
+(define_insn "atomic_load<mode>"
+  [(set (match_operand:QHSI 0 "register_operand" "=r")
+    (unspec_volatile:QHSI
+      [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q")
+       (match_operand:SI 2 "const_int_operand")]		;; model
+      VUNSPEC_LDA))]
+  "TARGET_HAVE_LDACQ"
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+        || model == MEMMODEL_CONSUME
+        || model == MEMMODEL_RELEASE)
+      return \"ldr<sync_sfx>\\t%0, %1\";
+    else
+      return \"lda<sync_sfx>\\t%0, %1\";
+  }
+)
+
+(define_insn "atomic_store<mode>"
+  [(set (match_operand:QHSI 0 "memory_operand" "=Q")
+    (unspec_volatile:QHSI
+      [(match_operand:QHSI 1 "general_operand" "r")
+       (match_operand:SI 2 "const_int_operand")]		;; model
+      VUNSPEC_STL))]
+  "TARGET_HAVE_LDACQ"
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+        || model == MEMMODEL_CONSUME
+        || model == MEMMODEL_ACQUIRE)
+      return \"str<sync_sfx>\t%1, %0\";
+    else
+      return \"stl<sync_sfx>\t%1, %0\";
+  }
+)
+
+;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic,
+;; even for a 64-bit aligned address.  Instead we use a ldrexd unparied
+;; with a store.
+(define_expand "atomic_loaddi"
+  [(match_operand:DI 0 "s_register_operand")		;; val out
+   (match_operand:DI 1 "mem_noofs_operand")		;; memory
+   (match_operand:SI 2 "const_int_operand")]		;; model
+  "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+  expand_mem_thread_fence (model);
+  emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
+  if (model == MEMMODEL_SEQ_CST)
+    expand_mem_thread_fence (model);
+  DONE;
+})
+
+(define_insn "atomic_loaddi_1"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
+		   UNSPEC_LL))]
+  "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
+  "ldrexd%?\t%0, %H0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "s_register_operand" "")		;; bool out
+   (match_operand:QHSD 1 "s_register_operand" "")	;; val out
+   (match_operand:QHSD 2 "mem_noofs_operand" "")	;; memory
+   (match_operand:QHSD 3 "general_operand" "")		;; expected
+   (match_operand:QHSD 4 "s_register_operand" "")	;; desired
+   (match_operand:SI 5 "const_int_operand")		;; is_weak
+   (match_operand:SI 6 "const_int_operand")		;; mod_s
+   (match_operand:SI 7 "const_int_operand")]		;; mod_f
+  "<sync_predtab>"
+{
+  arm_expand_compare_and_swap (operands);
+  DONE;
+})
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC_Z CC_REGNUM)					;; bool out
+	(unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
+   (set (match_operand:SI 0 "s_register_operand" "=&r")		;; val out
+	(zero_extend:SI
+	  (match_operand:NARROW 1 "mem_noofs_operand" "+Ua")))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:NARROW
+	  [(match_operand:SI 2 "arm_add_operand" "rIL")		;; expected
+	   (match_operand:NARROW 3 "s_register_operand" "r")	;; desired
+	   (match_operand:SI 4 "const_int_operand")		;; is_weak
+	   (match_operand:SI 5 "const_int_operand")		;; mod_s
+	   (match_operand:SI 6 "const_int_operand")]		;; mod_f
+	  VUNSPEC_ATOMIC_CAS))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_compare_and_swap (operands);
+    DONE;
+  })
+
+(define_mode_attr cas_cmp_operand
+  [(SI "arm_add_operand") (DI "cmpdi_operand")])
+(define_mode_attr cas_cmp_str
+  [(SI "rIL") (DI "rDi")])
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC_Z CC_REGNUM)					;; bool out
+	(unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
+   (set (match_operand:SIDI 0 "s_register_operand" "=&r")	;; val out
+	(match_operand:SIDI 1 "mem_noofs_operand" "+Ua"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:SIDI
+	  [(match_operand:SIDI 2 "<cas_cmp_operand>" "<cas_cmp_str>") ;; expect
+	   (match_operand:SIDI 3 "s_register_operand" "r")	;; desired
+	   (match_operand:SI 4 "const_int_operand")		;; is_weak
+	   (match_operand:SI 5 "const_int_operand")		;; mod_s
+	   (match_operand:SI 6 "const_int_operand")]		;; mod_f
+	  VUNSPEC_ATOMIC_CAS))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_compare_and_swap (operands);
+    DONE;
+  })
+
+(define_insn_and_split "atomic_exchange<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")	;; output
+	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:QHSD
+	  [(match_operand:QHSD 2 "s_register_operand" "r")	;; input
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  VUNSPEC_ATOMIC_XCHG))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (SET, operands[0], NULL, operands[1],
+			 operands[2], operands[3], operands[4]);
+    DONE;
+  })
+
+(define_mode_attr atomic_op_operand
+  [(QI "reg_or_int_operand")
+   (HI "reg_or_int_operand")
+   (SI "reg_or_int_operand")
+   (DI "s_register_operand")])
+
+(define_mode_attr atomic_op_str
+  [(QI "rn") (HI "rn") (SI "rn") (DI "r")])
+
+(define_insn_and_split "atomic_<sync_optab><mode>"
+  [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua")
+	(unspec_volatile:QHSD
+	  [(syncop:QHSD (match_dup 0)
+	     (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>"))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:QHSD 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
+			 operands[1], operands[2], operands[4]);
+    DONE;
+  })
+
+(define_insn_and_split "atomic_nand<mode>"
+  [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua")
+	(unspec_volatile:QHSD
+	  [(not:QHSD
+	     (and:QHSD (match_dup 0)
+	       (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>")))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:QHSD 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (NOT, NULL, operands[3], operands[0],
+			 operands[1], operands[2], operands[4]);
+    DONE;
+  })
+
+(define_insn_and_split "atomic_fetch_<sync_optab><mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))
+   (set (match_dup 1)
+	(unspec_volatile:QHSD
+	  [(syncop:QHSD (match_dup 1)
+	     (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>"))
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:QHSD 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (<CODE>, operands[0], operands[4], operands[1],
+			 operands[2], operands[3], operands[5]);
+    DONE;
+  })
+
+(define_insn_and_split "atomic_fetch_nand<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))
+   (set (match_dup 1)
+	(unspec_volatile:QHSD
+	  [(not:QHSD
+	     (and:QHSD (match_dup 1)
+	       (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>")))
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:QHSD 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (NOT, operands[0], operands[4], operands[1],
+			 operands[2], operands[3], operands[5]);
+    DONE;
+  })
+
+(define_insn_and_split "atomic_<sync_optab>_fetch<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(syncop:QHSD
+	  (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")
+	  (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>")))
+   (set (match_dup 1)
+	(unspec_volatile:QHSD
+	  [(match_dup 1) (match_dup 2)
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (<CODE>, NULL, operands[0], operands[1],
+			 operands[2], operands[3], operands[4]);
+    DONE;
+  })
+
+(define_insn_and_split "atomic_nand_fetch<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(not:QHSD
+	  (and:QHSD
+	    (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")
+	    (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>"))))
+   (set (match_dup 1)
+	(unspec_volatile:QHSD
+	  [(match_dup 1) (match_dup 2)
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arm_split_atomic_op (NOT, NULL, operands[0], operands[1],
+			 operands[2], operands[3], operands[4]);
+    DONE;
+  })
+
+(define_insn "arm_load_exclusive<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (zero_extend:SI
+	  (unspec_volatile:NARROW
+	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
+	    VUNSPEC_LL)))]
+  "TARGET_HAVE_LDREXBH"
+  "ldrex<sync_sfx>%?\t%0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_load_acquire_exclusive<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (zero_extend:SI
+	  (unspec_volatile:NARROW
+	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
+	    VUNSPEC_LAX)))]
+  "TARGET_HAVE_LDACQ"
+  "ldaex<sync_sfx>%?\\t%0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_load_exclusivesi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
+	  VUNSPEC_LL))]
+  "TARGET_HAVE_LDREX"
+  "ldrex%?\t%0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_load_acquire_exclusivesi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
+	  VUNSPEC_LAX))]
+  "TARGET_HAVE_LDACQ"
+  "ldaex%?\t%0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_load_exclusivedi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(unspec_volatile:DI
+	  [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
+	  VUNSPEC_LL))]
+  "TARGET_HAVE_LDREXD"
+  "ldrexd%?\t%0, %H0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_load_acquire_exclusivedi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(unspec_volatile:DI
+	  [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
+	  VUNSPEC_LAX))]
+  "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
+  "ldaexd%?\t%0, %H0, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_store_exclusive<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+	(unspec_volatile:SI [(const_int 0)] VUNSPEC_SC))
+   (set (match_operand:QHSD 1 "mem_noofs_operand" "=Ua")
+	(unspec_volatile:QHSD
+	  [(match_operand:QHSD 2 "s_register_operand" "r")]
+	  VUNSPEC_SC))]
+  "<sync_predtab>"
+  {
+    if (<MODE>mode == DImode)
+      {
+	rtx value = operands[2];
+	/* The restrictions on target registers in ARM mode are that the two
+	   registers are consecutive and the first one is even; Thumb is
+	   actually more flexible, but DI should give us this anyway.
+	   Note that the 1st register always gets the lowest word in memory.  */
+	gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
+	operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
+	return "strexd%?\t%0, %2, %3, %C1";
+      }
+    return "strex<sync_sfx>%?\t%0, %2, %C1";
+  }
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_store_release_exclusivedi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+	(unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
+   (set (match_operand:DI 1 "mem_noofs_operand" "=Ua")
+	(unspec_volatile:DI
+	  [(match_operand:DI 2 "s_register_operand" "r")]
+	  VUNSPEC_SLX))]
+  "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
+  {
+    rtx value = operands[2];
+    /* See comment in arm_store_exclusive<mode> above.  */
+    gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
+    operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
+    return "stlexd%?\t%0, %2, %3, %C1";
+  }
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
+
+(define_insn "arm_store_release_exclusive<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+	(unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX))
+   (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua")
+	(unspec_volatile:QHSI
+	  [(match_operand:QHSI 2 "s_register_operand" "r")]
+	  VUNSPEC_SLX))]
+  "TARGET_HAVE_LDACQ"
+  "stlex<sync_sfx>%?\t%0, %2, %C1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
diff --git a/gcc-4.9/gcc/config/arm/t-aprofile b/gcc-4.9/gcc/config/arm/t-aprofile
new file mode 100644
index 000000000..b968711c1
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-aprofile
@@ -0,0 +1,178 @@
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This is a target makefile fragment that attempts to get
+# multilibs built for the range of CPU's, FPU's and ABI's that
+# are relevant for the A-profile architecture.  It should
+# not be used in conjunction with another make file fragment and
+# assumes --with-arch, --with-cpu, --with-fpu, --with-float, --with-mode
+# have their default values during the configure step.  We enforce
+# this during the top-level configury.
+
+MULTILIB_OPTIONS     =
+MULTILIB_DIRNAMES    =
+MULTILIB_EXCEPTIONS  =
+MULTILIB_MATCHES     =
+MULTILIB_REUSE	     =
+
+# We have the following hierachy:
+#   ISA: A32 (.) or T32 (thumb)
+#   Architecture: ARMv7-A (v7-a), ARMv7VE (v7ve), or ARMv8-A (v8-a).
+#   FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), VFPv4-D16 (fpv4),
+#        NEON-VFPV4 (simdvfpv4), NEON for ARMv8 (simdv8), or None (.).
+#   Float-abi: Soft (.), softfp (softfp), or hard (hardfp).
+
+MULTILIB_OPTIONS       += mthumb
+MULTILIB_DIRNAMES      += thumb
+
+MULTILIB_OPTIONS       += march=armv7-a/march=armv7ve/march=armv8-a
+MULTILIB_DIRNAMES      += v7-a v7ve v8-a
+
+MULTILIB_OPTIONS       += mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8
+MULTILIB_DIRNAMES      += fpv3 simdv1 fpv4 simdvfpv4 simdv8
+
+MULTILIB_OPTIONS       += mfloat-abi=softfp/mfloat-abi=hard
+MULTILIB_DIRNAMES      += softfp hard
+
+# We don't build no-float libraries with an FPU.
+MULTILIB_EXCEPTIONS    += *mfpu=vfpv3-d16
+MULTILIB_EXCEPTIONS    += *mfpu=neon
+MULTILIB_EXCEPTIONS    += *mfpu=vfpv4-d16
+MULTILIB_EXCEPTIONS    += *mfpu=neon-vfpv4
+MULTILIB_EXCEPTIONS    += *mfpu=neon-fp-armv8
+
+# We don't build libraries requiring an FPU at the CPU/Arch/ISA level.
+MULTILIB_EXCEPTIONS    += mfloat-abi=*
+MULTILIB_EXCEPTIONS    += mfpu=*
+MULTILIB_EXCEPTIONS    += mthumb/mfloat-abi=*
+MULTILIB_EXCEPTIONS    += mthumb/mfpu=*
+MULTILIB_EXCEPTIONS    += *march=armv7-a/mfloat-abi=*
+MULTILIB_EXCEPTIONS    += *march=armv7ve/mfloat-abi=*
+MULTILIB_EXCEPTIONS    += *march=armv8-a/mfloat-abi=*
+
+# Ensure the correct FPU variants apply to the correct base architectures.
+MULTILIB_EXCEPTIONS    += *march=armv7ve/*mfpu=vfpv3-d16*
+MULTILIB_EXCEPTIONS    += *march=armv7ve/*mfpu=neon/*
+MULTILIB_EXCEPTIONS    += *march=armv8-a/*mfpu=vfpv3-d16*
+MULTILIB_EXCEPTIONS    += *march=armv8-a/*mfpu=neon/*
+MULTILIB_EXCEPTIONS    += *march=armv7-a/*mfpu=vfpv4-d16*
+MULTILIB_EXCEPTIONS    += *march=armv7-a/*mfpu=neon-vfpv4*
+MULTILIB_EXCEPTIONS    += *march=armv8-a/*mfpu=vfpv4-d16*
+MULTILIB_EXCEPTIONS    += *march=armv8-a/*mfpu=neon-vfpv4*
+MULTILIB_EXCEPTIONS    += *march=armv7-a/*mfpu=neon-fp-armv8*
+MULTILIB_EXCEPTIONS    += *march=armv7ve/*mfpu=neon-fp-armv8*
+
+# CPU Matches
+MULTILIB_MATCHES       += march?armv7-a=mcpu?cortex-a8
+MULTILIB_MATCHES       += march?armv7-a=mcpu?cortex-a9
+MULTILIB_MATCHES       += march?armv7-a=mcpu?cortex-a5
+MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a15=mcpu?cortex-a12
+MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a15.cortex-a7
+MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a53
+MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a57
+MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a57.cortex-a53
+
+# FPU matches
+MULTILIB_MATCHES       += mfpu?vfpv3-d16=mfpu?vfpv3
+MULTILIB_MATCHES       += mfpu?vfpv3-d16=mfpu?vfpv3-fp16
+MULTILIB_MATCHES       += mfpu?vfpv3-d16=mfpu?vfpv3-fp16-d16
+MULTILIB_MATCHES       += mfpu?vfpv4-d16=mfpu?vfpv4
+MULTILIB_MATCHES       += mfpu?neon-fp-armv8=mfpu?crypto-neon-fp-armv8
+
+
+# Map all requests for vfpv3 with a later CPU to vfpv3-d16 v7-a.
+# So if new CPUs are added above at the newer architecture levels,
+# do something to map them below here.
+# We take the approach of mapping down to v7-a regardless of what
+# the fp option is if the integer architecture brings things down.
+# This applies to any similar combination at the v7ve and v8-a arch
+# levels.
+
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp
+
+
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7ve/mfpu.neon/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7ve/mfpu.neon/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv8-a/mfpu.neon/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv8-a/mfpu.neon/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp
+
+
+MULTILIB_REUSE	      += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv7ve/mfpu.fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv7ve/mfpu.fp-armv8/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp
+
+
+MULTILIB_REUSE	      += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.softfp
+MULTILIB_REUSE	      += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.softfp
+
+
+
+# And again for mthumb.
+
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp
+
+
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.neon/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.neon/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.neon/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.neon/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp
+
+
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.fp-armv8/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp
+
+
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.softfp
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.hard
+MULTILIB_REUSE	      += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.softfp
diff --git a/gcc-4.9/gcc/config/arm/t-arm b/gcc-4.9/gcc/config/arm/t-arm
new file mode 100644
index 000000000..99bd696e4
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-arm
@@ -0,0 +1,100 @@
+# Rules common to all arm targets
+#
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/arm/arm-cores.def
+OPTIONS_H_EXTRA += $(srcdir)/config/arm/arm-cores.def
+
+# All md files - except for arm.md.
+# This list should be kept in alphabetical order and updated whenever an md
+# file is added or removed.
+MD_INCLUDES=	$(srcdir)/config/arm/arm1020e.md \
+		$(srcdir)/config/arm/arm1026ejs.md \
+		$(srcdir)/config/arm/arm1136jfs.md \
+		$(srcdir)/config/arm/arm926ejs.md \
+		$(srcdir)/config/arm/arm-fixed.md \
+		$(srcdir)/config/arm/arm-generic.md \
+		$(srcdir)/config/arm/arm-tune.md \
+		$(srcdir)/config/arm/constraints.md \
+		$(srcdir)/config/arm/cortex-a15.md \
+		$(srcdir)/config/arm/cortex-a5.md \
+		$(srcdir)/config/arm/cortex-a7.md \
+		$(srcdir)/config/arm/cortex-a8.md \
+		$(srcdir)/config/arm/cortex-a8-neon.md \
+		$(srcdir)/config/arm/cortex-a9.md \
+		$(srcdir)/config/arm/cortex-a9-neon.md \
+		$(srcdir)/config/arm/cortex-a53.md \
+		$(srcdir)/config/arm/cortex-m4-fpu.md \
+		$(srcdir)/config/arm/cortex-m4.md \
+		$(srcdir)/config/arm/cortex-r4f.md \
+		$(srcdir)/config/arm/cortex-r4.md \
+		$(srcdir)/config/arm/fa526.md \
+		$(srcdir)/config/arm/fa606te.md \
+		$(srcdir)/config/arm/fa626te.md \
+		$(srcdir)/config/arm/fa726te.md \
+		$(srcdir)/config/arm/fmp626.md \
+		$(srcdir)/config/arm/iterators.md \
+		$(srcdir)/config/arm/iwmmxt.md \
+		$(srcdir)/config/arm/iwmmxt2.md \
+		$(srcdir)/config/arm/ldmstm.md \
+		$(srcdir)/config/arm/ldrdstrd.md \
+		$(srcdir)/config/arm/marvell-f-iwmmxt.md \
+		$(srcdir)/config/arm/neon.md \
+		$(srcdir)/config/arm/predicates.md \
+		$(srcdir)/config/arm/sync.md \
+		$(srcdir)/config/arm/thumb2.md \
+		$(srcdir)/config/arm/vec-common.md \
+		$(srcdir)/config/arm/vfp11.md \
+		$(srcdir)/config/arm/vfp.md
+
+s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
+	s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
+
+$(srcdir)/config/arm/arm-tune.md: $(srcdir)/config/arm/gentune.sh \
+	$(srcdir)/config/arm/arm-cores.def
+	$(SHELL) $(srcdir)/config/arm/gentune.sh \
+		$(srcdir)/config/arm/arm-cores.def > \
+		$(srcdir)/config/arm/arm-tune.md
+
+$(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \
+  $(srcdir)/config/arm/arm-cores.def $(srcdir)/config/arm/arm-arches.def \
+  $(srcdir)/config/arm/arm-fpus.def
+	$(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \
+		$(srcdir)/config/arm/arm-tables.opt
+
+aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/aarch-common.c
+
+arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \
+  $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \
+  $(srcdir)/config/arm/arm_neon_builtins.def
+
+arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/arm-c.c
diff --git a/gcc-4.9/gcc/config/arm/t-arm-elf b/gcc-4.9/gcc/config/arm/t-arm-elf
new file mode 100644
index 000000000..8ef6b04ff
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-arm-elf
@@ -0,0 +1,90 @@
+# Copyright (C) 1998-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS     = marm/mthumb
+MULTILIB_DIRNAMES    = arm thumb
+MULTILIB_EXCEPTIONS  = 
+MULTILIB_MATCHES     =
+
+#MULTILIB_OPTIONS     += mcpu=fa526/mcpu=fa626/mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
+#MULTILIB_DIRNAMES    += fa526 fa626 fa606te fa626te fmp626 fa726te
+#MULTILIB_EXCEPTIONS  += *mthumb*/*mcpu=fa526 *mthumb*/*mcpu=fa626
+
+#MULTILIB_OPTIONS      += march=armv7
+#MULTILIB_DIRNAMES     += thumb2
+#MULTILIB_EXCEPTIONS   += march=armv7* marm/*march=armv7*
+#MULTILIB_MATCHES      += march?armv7=march?armv7-a
+#MULTILIB_MATCHES      += march?armv7=march?armv7-r
+#MULTILIB_MATCHES      += march?armv7=march?armv7-m
+#MULTILIB_MATCHES      += march?armv7=mcpu?cortex-a8
+#MULTILIB_MATCHES      += march?armv7=mcpu?cortex-r4
+#MULTILIB_MATCHES      += march?armv7=mcpu?cortex-m3
+
+# Not quite true.  We can support hard-vfp calling in Thumb2, but how do we
+# express that here?  Also, we really need architecture v5e or later
+# (mcrr etc).
+MULTILIB_OPTIONS       += mfloat-abi=hard
+MULTILIB_DIRNAMES      += fpu
+MULTILIB_EXCEPTIONS    += *mthumb/*mfloat-abi=hard*
+#MULTILIB_EXCEPTIONS    += *mcpu=fa526/*mfloat-abi=hard*
+#MULTILIB_EXCEPTIONS    += *mcpu=fa626/*mfloat-abi=hard*
+
+# MULTILIB_OPTIONS    += mcpu=ep9312
+# MULTILIB_DIRNAMES   += ep9312
+# MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312*
+# 	
+# MULTILIB_OPTIONS     += mlittle-endian/mbig-endian
+# MULTILIB_DIRNAMES    += le be
+# MULTILIB_MATCHES     += mbig-endian=mbe mlittle-endian=mle
+# 
+# MULTILIB_OPTIONS    += mfloat-abi=hard/mfloat-abi=soft
+# MULTILIB_DIRNAMES   += fpu soft
+# MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard*
+# 
+# MULTILIB_OPTIONS    += mno-thumb-interwork/mthumb-interwork
+# MULTILIB_DIRNAMES   += normal interwork
+# 
+# MULTILIB_OPTIONS    += fno-leading-underscore/fleading-underscore
+# MULTILIB_DIRNAMES   += elf under
+# 
+# MULTILIB_OPTIONS    += mcpu=arm7
+# MULTILIB_DIRNAMES   += nofmult
+# MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=arm7*
+# # Note: the multilib_exceptions matches both -mthumb and
+# # -mthumb-interwork
+# #
+# # We have to match all the arm cpu variants which do not have the
+# # multiply instruction and treat them as if the user had specified
+# # -mcpu=arm7.  Note that in the following the ? is interpreted as
+# # an = for the purposes of matching command line options.
+# # FIXME: There ought to be a better way to do this.
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7d
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7di
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm70
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm700
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm700i
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm710
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm710c
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7100
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7500
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7500fe
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm6
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm60
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm600
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm610
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm620
diff --git a/gcc-4.9/gcc/config/arm/t-bpabi b/gcc-4.9/gcc/config/arm/t-bpabi
new file mode 100644
index 000000000..ef019ea37
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-bpabi
@@ -0,0 +1 @@
+EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
diff --git a/gcc-4.9/gcc/config/arm/t-linux-androideabi b/gcc-4.9/gcc/config/arm/t-linux-androideabi
new file mode 100644
index 000000000..8f1307c55
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-linux-androideabi
@@ -0,0 +1,10 @@
+MULTILIB_OPTIONS     = march=armv7-a mthumb
+MULTILIB_DIRNAMES    = armv7-a thumb
+MULTILIB_EXCEPTIONS  =
+MULTILIB_MATCHES     =
+MULTILIB_OSDIRNAMES  =
+
+# The "special" multilib can be used to build native applications for Android,
+# as opposed to native shared libraries that are then called via JNI.
+#MULTILIB_OPTIONS    += tno-android-cc
+#MULTILIB_DIRNAMES   += special
diff --git a/gcc-4.9/gcc/config/arm/t-linux-eabi b/gcc-4.9/gcc/config/arm/t-linux-eabi
new file mode 100644
index 000000000..1087914b5
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-linux-eabi
@@ -0,0 +1,31 @@
+# Copyright (C) 2005-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We do not build a Thumb multilib for Linux because the definition of
+# CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode.
+# If you set MULTILIB_OPTIONS to a non-empty value you should also set
+# MULTILIB_DEFAULTS in linux-elf.h.
+MULTILIB_OPTIONS	=
+MULTILIB_DIRNAMES	=
+
+#MULTILIB_OPTIONS     += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
+#MULTILIB_DIRNAMES    += fa606te fa626te fmp626 fa726te
+#MULTILIB_EXCEPTIONS  += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te*
+
+ARM_EB = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),eb)
+MULTIARCH_DIRNAME = $(call if_multiarch,arm$(ARM_EB)-linux-gnueabi$(if $(filter hard,$(with_float)),hf))
diff --git a/gcc-4.9/gcc/config/arm/t-rtems-eabi b/gcc-4.9/gcc/config/arm/t-rtems-eabi
new file mode 100644
index 000000000..d81fbf7ec
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-rtems-eabi
@@ -0,0 +1,47 @@
+# Custom RTEMS EABI multilibs
+
+MULTILIB_OPTIONS  = mthumb march=armv6-m/march=armv7-a/march=armv7-r/march=armv7-m mfpu=neon mfloat-abi=hard
+MULTILIB_DIRNAMES = thumb armv6-m armv7-a armv7-r armv7-m neon hard
+
+# Enumeration of multilibs
+
+MULTILIB_EXCEPTIONS =
+MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=neon
+MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfloat-abi=hard
+# MULTILIB_EXCEPTIONS += mthumb/march=armv6-m
+# MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=neon
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfloat-abi=hard
+# MULTILIB_EXCEPTIONS += mthumb/march=armv7-a
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=neon
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfloat-abi=hard
+# MULTILIB_EXCEPTIONS += mthumb/march=armv7-r
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=neon
+MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfloat-abi=hard
+# MULTILIB_EXCEPTIONS += mthumb/march=armv7-m
+MULTILIB_EXCEPTIONS += mthumb/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += mthumb/mfpu=neon
+MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=hard
+# MULTILIB_EXCEPTIONS += mthumb
+MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=neon
+MULTILIB_EXCEPTIONS += march=armv6-m/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv6-m
+MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon
+MULTILIB_EXCEPTIONS += march=armv7-a/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv7-a
+MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=neon
+MULTILIB_EXCEPTIONS += march=armv7-r/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv7-r
+MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=neon
+MULTILIB_EXCEPTIONS += march=armv7-m/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += march=armv7-m
+MULTILIB_EXCEPTIONS += mfpu=neon/mfloat-abi=hard
+MULTILIB_EXCEPTIONS += mfpu=neon
+MULTILIB_EXCEPTIONS += mfloat-abi=hard
diff --git a/gcc-4.9/gcc/config/arm/t-symbian b/gcc-4.9/gcc/config/arm/t-symbian
new file mode 100644
index 000000000..35ee02889
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-symbian
@@ -0,0 +1,26 @@
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
+
+# Create a multilib for processors with VFP floating-point, and a
+# multilib for those without -- using the soft-float ABI in both
+# cases.  Symbian OS object should be compiled with interworking
+# enabled, so there are no separate thumb-mode libraries.
+MULTILIB_OPTIONS     = mfloat-abi=softfp
+MULTILIB_DIRNAMES    = softfp
diff --git a/gcc-4.9/gcc/config/arm/t-vxworks b/gcc-4.9/gcc/config/arm/t-vxworks
new file mode 100644
index 000000000..802d8e4bd
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/t-vxworks
@@ -0,0 +1,24 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = \
+  mrtp fPIC \
+  t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe
+MULTILIB_MATCHES = fPIC=fpic
+# Don't build -fPIC multilibs for kernel or Thumb code.
+MULTILIB_EXCEPTIONS = fPIC* mrtp/fPIC/*t[45]t*
diff --git a/gcc-4.9/gcc/config/arm/thumb2.md b/gcc-4.9/gcc/config/arm/thumb2.md
new file mode 100644
index 000000000..d84938f30
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/thumb2.md
@@ -0,0 +1,1495 @@
+;; ARM Thumb-2 Machine Description
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; Note: Thumb-2 is the variant of the Thumb architecture that adds
+;; 32-bit encodings of [almost all of] the Arm instruction set.
+;; Some old documents refer to the relatively minor interworking
+;; changes made in armv5t as "thumb2".  These are considered part
+;; the 16-bit Thumb-1 instruction set.
+
+;; Thumb-2 only allows shift by constant on data processing instructions
+(define_insn "*thumb_andsi_not_shiftsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_operator:SI 4 "shift_operator"
+			 [(match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 3 "const_int_operand" "M")]))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "bic%?\\t%0, %1, %2%S4"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "shift" "2")
+   (set_attr "type" "alu_shift_imm")]
+)
+
+;; We use the '0' constraint for operand 1 because reload should
+;; be smart enough to generate an appropriate move for the r/r/r case.
+(define_insn_and_split "*thumb2_smaxsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "=r,l,r")
+	(smax:SI (match_operand:SI 1 "s_register_operand" "%0,0,0")
+		 (match_operand:SI 2 "arm_rhs_operand"    "r,Py,I")))
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_THUMB2"
+   "#"
+   ; cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2
+  "TARGET_THUMB2 && reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (cond_exec (lt:SI (reg:CC CC_REGNUM) (const_int 0))
+              (set (match_dup 0)
+                   (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "length" "6,6,10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_sminsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,l,r")
+	(smin:SI (match_operand:SI 1 "s_register_operand" "%0,0,0")
+		 (match_operand:SI 2 "arm_rhs_operand" "r,Py,I")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#"
+  ; cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2
+  "TARGET_THUMB2 && reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (cond_exec (ge:SI (reg:CC CC_REGNUM) (const_int 0))
+              (set (match_dup 0)
+                   (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "length" "6,6,10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb32_umaxsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,l,r")
+	(umax:SI (match_operand:SI 1 "s_register_operand" "%0,0,0")
+		 (match_operand:SI 2 "arm_rhs_operand" "r,Py,I")))
+  (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#"
+  ; cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2
+  "TARGET_THUMB2 && reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (cond_exec (ltu:SI (reg:CC CC_REGNUM) (const_int 0))
+              (set (match_dup 0)
+                   (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "length" "6,6,10")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_uminsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,l,r")
+	(umin:SI (match_operand:SI 1 "s_register_operand" "%0,0,0")
+		 (match_operand:SI 2 "arm_rhs_operand" "r,Py,I")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#"
+  ; cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2
+  "TARGET_THUMB2 && reload_completed"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (cond_exec (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+              (set (match_dup 0)
+                   (match_dup 2)))]
+  ""
+  [(set_attr "conds" "clob")
+   (set_attr "length" "6,6,10")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "type" "multiple")]
+)
+
+;; Thumb-2 does not have rsc, so use a clever trick with shifter operands.
+(define_insn_and_split "*thumb2_negdi2"
+  [(set (match_operand:DI         0 "s_register_operand" "=&r,r")
+	(neg:DI (match_operand:DI 1 "s_register_operand"  "?r,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#" ; negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1
+  "&& reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (const_int 0) (match_dup 1)))
+	      (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
+   (set (match_dup 2) (minus:SI (minus:SI (match_dup 3)
+                                          (ashift:SI (match_dup 3)
+                                                     (const_int 1)))
+                                (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_abssi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=&r,l,r")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#"
+   ; eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31
+   ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0
+   ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    if (REGNO(operands[0]) == REGNO(operands[1]))
+      {
+       rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+
+       emit_insn (gen_rtx_SET (VOIDmode,
+                               cc_reg,
+                               gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
+       emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                    (gen_rtx_LT (SImode,
+                                                 cc_reg,
+                                                 const0_rtx)),
+                                    (gen_rtx_SET (VOIDmode,
+                                                  operands[0],
+                                                  (gen_rtx_MINUS (SImode,
+                                                                  const0_rtx,
+                                                                  operands[1]))))));
+      }
+    else
+      {
+        emit_insn (gen_rtx_SET (VOIDmode,
+                                operands[0],
+                                gen_rtx_XOR (SImode,
+                                             gen_rtx_ASHIFTRT (SImode,
+                                                               operands[1],
+                                                               GEN_INT (31)),
+                                             operands[1])));
+        emit_insn (gen_rtx_SET (VOIDmode,
+                                operands[0],
+                                gen_rtx_MINUS (SImode,
+                                               operands[0],
+                                               gen_rtx_ASHIFTRT (SImode,
+                                                                 operands[1],
+                                                                 GEN_INT (31)))));
+      }
+    DONE;
+  }
+  [(set_attr "conds" "*,clob,clob")
+   (set_attr "shift" "1")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "ce_count" "2")
+   (set_attr "length" "8,6,10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_neg_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,l,r")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#"
+   ; eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31
+   ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0
+   ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    if (REGNO(operands[0]) == REGNO(operands[1]))
+      {
+       rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+
+       emit_insn (gen_rtx_SET (VOIDmode,
+                               cc_reg,
+                               gen_rtx_COMPARE (CCmode, operands[0], const0_rtx)));
+       emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                    (gen_rtx_GT (SImode,
+                                                 cc_reg,
+                                                 const0_rtx)),
+                                    (gen_rtx_SET (VOIDmode,
+                                                  operands[0],
+                                                  (gen_rtx_MINUS (SImode,
+                                                                  const0_rtx,
+                                                                  operands[1]))))));
+      }
+    else
+      {
+        emit_insn (gen_rtx_SET (VOIDmode,
+                                operands[0],
+                                gen_rtx_XOR (SImode,
+                                             gen_rtx_ASHIFTRT (SImode,
+                                                               operands[1],
+                                                               GEN_INT (31)),
+                                             operands[1])));
+        emit_insn (gen_rtx_SET (VOIDmode,
+                                operands[0],
+                                gen_rtx_MINUS (SImode,
+                                               gen_rtx_ASHIFTRT (SImode,
+                                                                 operands[1],
+                                                                 GEN_INT (31)),
+                                               operands[0])));
+      }
+    DONE;
+  }
+  [(set_attr "conds" "*,clob,clob")
+   (set_attr "shift" "1")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "ce_count" "2")
+   (set_attr "length" "8,6,10")
+   (set_attr "type" "multiple")]
+)
+
+;; We have two alternatives here for memory loads (and similarly for stores)
+;; to reflect the fact that the permissible constant pool ranges differ
+;; between ldr instructions taking low regs and ldr instructions taking high
+;; regs.  The high register alternatives are not taken into account when
+;; choosing register preferences in order to reflect their expense.
+(define_insn "*thumb2_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk"))]
+  "TARGET_THUMB2 && ! TARGET_IWMMXT
+   && !(TARGET_HARD_FLOAT && TARGET_VFP)
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   movw%?\\t%0, %1
+   ldr%?\\t%0, %1
+   ldr%?\\t%0, %1
+   str%?\\t%1, %0
+   str%?\\t%1, %0"
+  [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
+   (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")]
+)
+
+(define_insn "tls_load_dot_plus_four"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,r,r")
+	(mem:SI (unspec:SI [(match_operand:SI 2 "register_operand" "0,1,0,1")
+			    (const_int 4)
+			    (match_operand 3 "" "")]
+			   UNSPEC_PIC_BASE)))
+   (clobber (match_scratch:SI 1 "=X,l,X,r"))]
+  "TARGET_THUMB2"
+  "*
+  (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+			     INTVAL (operands[3]));
+  return \"add\\t%2, %|pc\;ldr%?\\t%0, [%2]\";
+  "
+  [(set_attr "length" "4,4,6,6")
+   (set_attr "type" "multiple")]
+)
+
+;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot
+;; of the messiness associated with the ARM patterns.
+(define_insn "*thumb2_movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,l,r,m,r")
+	(match_operand:HI 1 "general_operand"      "r,I,Py,n,r,m"))]
+  "TARGET_THUMB2
+  && (register_operand (operands[0], HImode)
+     || register_operand (operands[1], HImode))"
+  "@
+   mov%?\\t%0, %1\\t%@ movhi
+   mov%?\\t%0, %1\\t%@ movhi
+   mov%?\\t%0, %1\\t%@ movhi
+   movw%?\\t%0, %L1\\t%@ movhi
+   str%(h%)\\t%1, %0\\t%@ movhi
+   ldr%(h%)\\t%0, %1\\t%@ movhi"
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no")
+   (set_attr "length" "2,4,2,4,4,4")
+   (set_attr "pool_range" "*,*,*,*,*,4094")
+   (set_attr "neg_pool_range" "*,*,*,*,*,250")]
+)
+
+(define_insn "*thumb2_storewb_pairsi"
+  [(set (match_operand:SI 0 "register_operand" "=&kr")
+	(plus:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (set (mem:SI (plus:SI (match_dup 0) (match_dup 2)))
+	(match_operand:SI 3 "register_operand" "r"))
+   (set (mem:SI (plus:SI (match_dup 0)
+			 (match_operand:SI 5 "const_int_operand" "n")))
+	(match_operand:SI 4 "register_operand" "r"))]
+  "TARGET_THUMB2
+   && INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
+  "strd\\t%3, %4, [%0, %2]!"
+  [(set_attr "type" "store2")]
+)
+
+(define_insn "*thumb2_cmpsi_neg_shiftsi"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 0 "s_register_operand" "r")
+		    (neg:SI (match_operator:SI 3 "shift_operator"
+			     [(match_operand:SI 1 "s_register_operand" "r")
+			      (match_operand:SI 2 "const_int_operand" "M")]))))]
+  "TARGET_THUMB2"
+  "cmn%?\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "type" "alus_shift_imm")]
+)
+
+(define_insn_and_split "*thumb2_mov_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
+  "TARGET_THUMB2"
+  "#"   ; "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
+  "TARGET_THUMB2"
+  [(set (match_dup 0)
+        (if_then_else:SI (match_dup 1)
+                         (const_int 1)
+                         (const_int 0)))]
+  ""
+  [(set_attr "conds" "use")
+   (set_attr "enabled_for_depr_it" "yes,no")
+   (set_attr "length" "8,10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_mov_negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2 && !arm_restrict_it"
+  "#"   ; "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
+  "TARGET_THUMB2"
+  [(set (match_dup 0)
+        (if_then_else:SI (match_dup 1)
+                         (match_dup 3)
+                         (const_int 0)))]
+  {
+    operands[3] = GEN_INT (~0);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_mov_negscc_strict_it"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2 && arm_restrict_it"
+  "#"   ; ";mvn\\t%0, #0 ;it\\t%D1\;mov%D1\\t%0, #0\"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (match_dup 3))
+   (cond_exec (match_dup 4)
+              (set (match_dup 0)
+                   (const_int 0)))]
+  {
+    operands[3] = GEN_INT (~0);
+    enum machine_mode mode = GET_MODE (operands[2]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_mov_notscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2 && !arm_restrict_it"
+  "#"   ; "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
+  "TARGET_THUMB2"
+  [(set (match_dup 0)
+        (if_then_else:SI (match_dup 1)
+                         (match_dup 3)
+                         (match_dup 4)))]
+  {
+    operands[3] = GEN_INT (~1);
+    operands[4] = GEN_INT (~0);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_mov_notscc_strict_it"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+        (not:SI (match_operator:SI 1 "arm_comparison_operator"
+                 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2 && arm_restrict_it"
+  "#"   ; "mvn %0, #0 ; it%d1 ; lsl%d1 %0, %0, #1"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (match_dup 3))
+   (cond_exec (match_dup 4)
+              (set (match_dup 0)
+                   (ashift:SI (match_dup 0)
+                              (const_int 1))))]
+  {
+    operands[3] = GEN_INT (~0);
+    operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]),
+                                  VOIDmode, operands[2], const0_rtx);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_movsicc_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 3 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r")
+	 (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))]
+  "TARGET_THUMB2"
+  "@
+   it\\t%D3\;mov%D3\\t%0, %2
+   it\\t%d3\;mov%d3\\t%0, %1
+   it\\t%D3\;mov%D3\\t%0, %2
+   it\\t%D3\;mvn%D3\\t%0, #%B2
+   it\\t%d3\;mov%d3\\t%0, %1
+   it\\t%d3\;mvn%d3\\t%0, #%B1
+   #
+   #
+   #
+   #
+   #"
+   ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+   ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+   ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
+   ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    enum rtx_code rev_code;
+    enum machine_mode mode;
+    rtx rev_cond;
+
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                  operands[3],
+                                  gen_rtx_SET (VOIDmode,
+                                               operands[0],
+                                               operands[1])));
+    rev_code = GET_CODE (operands[3]);
+    mode = GET_MODE (operands[4]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rev_code = reverse_condition_maybe_unordered (rev_code);
+    else
+      rev_code = reverse_condition (rev_code);
+
+    rev_cond = gen_rtx_fmt_ee (rev_code,
+                               VOIDmode,
+                               gen_rtx_REG (mode, CC_REGNUM),
+                               const0_rtx);
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                  rev_cond,
+                                  gen_rtx_SET (VOIDmode,
+                                               operands[0],
+                                               operands[2])));
+    DONE;
+  }
+  [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6")
+   (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes")
+   (set_attr "conds" "use")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_movsfcc_soft_insn"
+  [(set (match_operand:SF 0 "s_register_operand" "=r,r")
+	(if_then_else:SF (match_operator 3 "arm_comparison_operator"
+			  [(match_operand 4 "cc_register" "") (const_int 0)])
+			 (match_operand:SF 1 "s_register_operand" "0,r")
+			 (match_operand:SF 2 "s_register_operand" "r,0")))]
+  "TARGET_THUMB2 && TARGET_SOFT_FLOAT"
+  "@
+   it\\t%D3\;mov%D3\\t%0, %2
+   it\\t%d3\;mov%d3\\t%0, %1"
+  [(set_attr "length" "6,6")
+   (set_attr "conds" "use")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*call_reg_thumb2"
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB2"
+  "blx%?\\t%0"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_thumb2"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB2"
+  "blx\\t%1"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*thumb2_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "l*r"))]
+  "TARGET_THUMB2"
+  "bx\\t%0"
+  [(set_attr "conds" "clob")
+   (set_attr "type" "branch")]
+)
+;; Don't define thumb2_load_indirect_jump because we can't guarantee label
+;; addresses will have the thumb bit set correctly.
+
+
+(define_insn_and_split "*thumb2_and_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
+	(and:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])
+		(match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "#"   ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (and:SI (match_dup 3) (const_int 1)))
+   (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))]
+  {
+    enum machine_mode mode = GET_MODE (operands[2]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "type" "multiple")
+   (set (attr "length") (if_then_else (match_test "arm_restrict_it")
+                                      (const_int 8)
+                                      (const_int 10)))]
+)
+
+(define_insn_and_split "*thumb2_ior_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(ior:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])
+		(match_operand:SI 3 "s_register_operand" "0,?r")))]
+  "TARGET_THUMB2 && !arm_restrict_it"
+  "@
+   it\\t%d1\;orr%d1\\t%0, %3, #1
+   #"
+   ; alt 1: ite\\t%D1\;mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1
+   "&& reload_completed
+    && REGNO (operands [0]) != REGNO (operands[3])"
+   [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3)))
+    (cond_exec (match_dup 4) (set (match_dup 0)
+                                  (ior:SI (match_dup 3) (const_int 1))))]
+  {
+    enum machine_mode mode = GET_MODE (operands[2]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+    operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+  }
+  [(set_attr "conds" "use")
+   (set_attr "length" "6,10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_ior_scc_strict_it"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+	(ior:SI (match_operator:SI 2 "arm_comparison_operator"
+		 [(match_operand 3 "cc_register" "") (const_int 0)])
+		(match_operand:SI 1 "s_register_operand" "0,?l")))]
+  "TARGET_THUMB2 && arm_restrict_it"
+  "@
+   it\\t%d2\;mov%d2\\t%0, #1\;it\\t%d2\;orr%d2\\t%0, %1
+   mov\\t%0, #1\;orr\\t%0, %1\;it\\t%D2\;mov%D2\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_cond_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI (match_operator 3 "equality_operator"
+			  [(match_operator 4 "arm_comparison_operator"
+			    [(match_operand 5 "cc_register" "") (const_int 0)])
+			   (const_int 0)])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+			 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))]
+  "TARGET_THUMB2"
+  "*
+    if (GET_CODE (operands[3]) == NE)
+      {
+        if (which_alternative != 1)
+	  output_asm_insn (\"it\\t%D4\;mov%D4\\t%0, %2\", operands);
+        if (which_alternative != 0)
+	  output_asm_insn (\"it\\t%d4\;mov%d4\\t%0, %1\", operands);
+        return \"\";
+      }
+    switch (which_alternative)
+      {
+      case 0:
+	output_asm_insn (\"it\\t%d4\", operands);
+	break;
+      case 1:
+	output_asm_insn (\"it\\t%D4\", operands);
+	break;
+      case 2:
+	if (arm_restrict_it)
+	  output_asm_insn (\"it\\t%D4\", operands);
+	else
+	  output_asm_insn (\"ite\\t%D4\", operands);
+	break;
+      default:
+	abort();
+      }
+    if (which_alternative != 0)
+      {
+        output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+        if (arm_restrict_it && which_alternative == 2)
+          output_asm_insn (\"it\\t%d4\", operands);
+      }
+    if (which_alternative != 1)
+      output_asm_insn (\"mov%d4\\t%0, %2\", operands);
+    return \"\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "length" "6,6,10")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_cond_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (match_operator:SI 5 "shiftable_operator"
+	 [(match_operator:SI 4 "arm_comparison_operator"
+           [(match_operand:SI 2 "s_register_operand" "r,r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+          (match_operand:SI 1 "s_register_operand" "0,?r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && !arm_restrict_it"
+  "*
+    if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
+      return \"%i5\\t%0, %1, %2, lsr #31\";
+
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (GET_CODE (operands[5]) == AND)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"mov%D4\\t%0, #0\", operands);
+      }
+    else if (GET_CODE (operands[5]) == MINUS)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands);
+      }
+    else if (which_alternative != 0)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+      }
+    else
+      output_asm_insn (\"it\\t%d4\", operands);
+    return \"%i5%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "14")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_cond_arith_strict_it"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+        (match_operator:SI 5 "shiftable_operator_strict_it"
+	 [(match_operator:SI 4 "arm_comparison_operator"
+           [(match_operand:SI 2 "s_register_operand" "r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI")])
+          (match_operand:SI 1 "s_register_operand" "0")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && arm_restrict_it"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
+      {
+        /*  %i5 %0, %1, %2, lsr #31  */
+        rtx shifted_op = gen_rtx_LSHIFTRT (SImode, operands[2], GEN_INT (31));
+        rtx op = NULL_RTX;
+
+        switch (GET_CODE (operands[5]))
+          {
+          case AND:
+            op = gen_rtx_AND (SImode, shifted_op, operands[1]);
+            break;
+           case PLUS:
+            op = gen_rtx_PLUS (SImode, shifted_op, operands[1]);
+            break;
+          default: gcc_unreachable ();
+          }
+        emit_insn (gen_rtx_SET (VOIDmode, operands[0], op));
+        DONE;
+      }
+
+    /*  "cmp  %2, %3"  */
+    emit_insn (gen_rtx_SET (VOIDmode,
+                               gen_rtx_REG (CCmode, CC_REGNUM),
+                               gen_rtx_COMPARE (CCmode, operands[2], operands[3])));
+
+    if (GET_CODE (operands[5]) == AND)
+      {
+        /*  %i5  %0, %1, #1
+            it%D4
+            mov%D4  %0, #0  */
+        enum rtx_code rc = reverse_condition (GET_CODE (operands[4]));
+        emit_insn (gen_rtx_SET (VOIDmode, operands[0], gen_rtx_AND (SImode, operands[1], GEN_INT (1))));
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                      gen_rtx_fmt_ee (rc, VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx),
+                                      gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
+        DONE;
+      }
+    else
+      {
+        /*  it\\t%d4
+            %i5%d4\\t%0, %1, #1   */
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode, gen_rtx_fmt_ee (GET_CODE (operands[4]),
+                                                                VOIDmode,
+                                                                gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx),
+                                                gen_rtx_SET(VOIDmode, operands[0],
+                                                            gen_rtx_PLUS (SImode,
+                                                                          operands[1],
+                                                                          GEN_INT (1)))));
+        DONE;
+      }
+     FAIL;
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_cond_sub"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts")
+        (minus:SI (match_operand:SI 1 "s_register_operand" "0,?Ts")
+		  (match_operator:SI 4 "arm_comparison_operator"
+                   [(match_operand:SI 2 "s_register_operand" "r,r")
+		    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (which_alternative != 0)
+      {
+	if (arm_restrict_it)
+	  {
+	    output_asm_insn (\"mov\\t%0, %1\", operands);
+	    output_asm_insn (\"it\\t%d4\", operands);
+	  }
+	else
+	{
+	  output_asm_insn (\"ite\\t%D4\", operands);
+	  output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+	}
+      }
+    else
+      output_asm_insn (\"it\\t%d4\", operands);
+    return \"sub%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,14")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*thumb2_negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
+	(neg:SI (match_operator 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+
+    if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
+      {
+        /* Emit asr\\t%0, %1, #31 */
+        emit_insn (gen_rtx_SET (VOIDmode,
+                                operands[0],
+                                gen_rtx_ASHIFTRT (SImode,
+                                                  operands[1],
+                                                  GEN_INT (31))));
+        DONE;
+      }
+    else if (GET_CODE (operands[3]) == NE && !arm_restrict_it)
+      {
+        /* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */
+        if (CONST_INT_P (operands[2]))
+          emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
+                                        GEN_INT (- INTVAL (operands[2]))));
+        else
+          emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2]));
+
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                      gen_rtx_NE (SImode,
+                                                  cc_reg,
+                                                  const0_rtx),
+                                      gen_rtx_SET (SImode,
+                                                   operands[0],
+                                                   GEN_INT (~0))));
+        DONE;
+      }
+    else
+      {
+       /* Emit:  cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/
+       enum rtx_code rc = reverse_condition (GET_CODE (operands[3]));
+       enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]);
+       rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM);
+
+       emit_insn (gen_rtx_SET (VOIDmode,
+                               cc_reg,
+                               gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
+
+       emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0)));
+
+       emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+                                     gen_rtx_fmt_ee (rc,
+                                                     VOIDmode,
+                                                     tmp1,
+                                                     const0_rtx),
+                                     gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
+       DONE;
+      }
+    FAIL;
+  }
+  [(set_attr "conds" "clob")
+   (set_attr "length" "14")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_movcond"
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts,Ts")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,TsI,?TsI")
+	 (match_operand:SI 2 "arm_rhs_operand" "TsI,0,TsI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (GET_CODE (operands[5]) == LT
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && REG_P (operands[1]))
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"and\\t%0, %1, %3, asr #31\";
+	  return \"ands\\t%0, %1, %3, asr #32\;it\\tcc\;movcc\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && REG_P (operands[2]))
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"bic\\t%0, %2, %3, asr #31\";
+	  return \"bics\\t%0, %2, %3, asr #32\;it\\tcs\;movcs\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+
+  if (GET_CODE (operands[5]) == GE
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && REG_P (operands[1]))
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"bic\\t%0, %1, %3, asr #31\";
+	  return \"bics\\t%0, %1, %3, asr #32\;it\\tcs\;movcs\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && REG_P (operands[2]))
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"and\\t%0, %2, %3, asr #31\";
+	  return \"ands\\t%0, %2, %3, asr #32\;it\tcc\;movcc\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+  if (CONST_INT_P (operands[4])
+      && !const_ok_for_arm (INTVAL (operands[4])))
+    output_asm_insn (\"cmn\\t%3, #%n4\", operands);
+  else
+    output_asm_insn (\"cmp\\t%3, %4\", operands);
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"it\\t%D5\", operands);
+      break;
+    case 1:
+      output_asm_insn (\"it\\t%d5\", operands);
+      break;
+    case 2:
+      if (arm_restrict_it)
+        {
+          output_asm_insn (\"mov\\t%0, %1\", operands);
+          output_asm_insn (\"it\\t%D5\", operands);
+        }
+      else
+        output_asm_insn (\"ite\\t%d5\", operands);
+      break;
+    default:
+      abort();
+    }
+  if (which_alternative != 0 && !(arm_restrict_it && which_alternative == 2))
+    output_asm_insn (\"mov%d5\\t%0, %1\", operands);
+  if (which_alternative != 1)
+    output_asm_insn (\"mov%D5\\t%0, %2\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,10,14")
+   (set_attr "type" "multiple")]
+)
+
+;; Zero and sign extension instructions.
+
+;; All supported Thumb2 implementations are armv6, so only that case is
+;; provided.
+(define_insn "*thumb2_extendqisi_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_THUMB2 && arm_arch6"
+  "@
+   sxtb%?\\t%0, %1
+   ldr%(sb%)\\t%0, %1"
+  [(set_attr "type" "extend,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "pool_range" "*,4094")
+   (set_attr "neg_pool_range" "*,250")]
+)
+
+(define_insn "*thumb2_zero_extendhisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_THUMB2 && arm_arch6"
+  "@
+   uxth%?\\t%0, %1
+   ldr%(h%)\\t%0, %1"
+  [(set_attr "type" "extend,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "pool_range" "*,4094")
+   (set_attr "neg_pool_range" "*,250")]
+)
+
+(define_insn "thumb2_zero_extendqisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_THUMB2 && arm_arch6"
+  "@
+   uxtb%(%)\\t%0, %1
+   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set_attr "type" "extend,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "pool_range" "*,4094")
+   (set_attr "neg_pool_range" "*,250")]
+)
+
+(define_insn "thumb2_casesi_internal"
+  [(parallel [(set (pc)
+	       (if_then_else
+		(leu (match_operand:SI 0 "s_register_operand" "r")
+		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
+		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
+				 (label_ref (match_operand 2 "" ""))))
+		(label_ref (match_operand 3 "" ""))))
+	      (clobber (reg:CC CC_REGNUM))
+	      (clobber (match_scratch:SI 4 "=&r"))
+	      (use (label_ref (match_dup 2)))])]
+  "TARGET_THUMB2 && !flag_pic"
+  "* return thumb2_output_casesi(operands);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "thumb2_casesi_internal_pic"
+  [(parallel [(set (pc)
+	       (if_then_else
+		(leu (match_operand:SI 0 "s_register_operand" "r")
+		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
+		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
+				 (label_ref (match_operand 2 "" ""))))
+		(label_ref (match_operand 3 "" ""))))
+	      (clobber (reg:CC CC_REGNUM))
+	      (clobber (match_scratch:SI 4 "=&r"))
+	      (clobber (match_scratch:SI 5 "=r"))
+	      (use (label_ref (match_dup 2)))])]
+  "TARGET_THUMB2 && flag_pic"
+  "* return thumb2_output_casesi(operands);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "20")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*thumb2_return"
+  [(simple_return)]
+  "TARGET_THUMB2"
+  "* return output_return_instruction (const_true_rtx, true, false, true);"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")]
+)
+
+(define_insn_and_split "thumb2_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
+		    VUNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "TARGET_THUMB2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    thumb_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+)
+
+(define_insn "*thumb2_alusi3_short"
+  [(set (match_operand:SI          0 "s_register_operand" "=l")
+        (match_operator:SI 3 "thumb_16bit_operator"
+	 [(match_operand:SI 1 "s_register_operand" "0")
+	  (match_operand:SI 2 "s_register_operand" "l")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed
+   && GET_CODE(operands[3]) != PLUS
+   && GET_CODE(operands[3]) != MINUS"
+  "%I3%!\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "*thumb2_shiftsi3_short"
+  [(set (match_operand:SI   0 "low_register_operand" "=l,l")
+	(match_operator:SI  3 "shift_operator"
+	 [(match_operand:SI 1 "low_register_operand"  "0,l")
+	  (match_operand:SI 2 "low_reg_or_int_operand" "l,M")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed
+   && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT)
+       || REG_P (operands[2]))"
+  "* return arm_output_shift(operands, 2);"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "1")
+   (set_attr "length" "2")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+		      (const_string "alu_shift_imm")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*thumb2_mov<mode>_shortim"
+  [(set (match_operand:QHSI 0 "low_register_operand" "=l")
+	(match_operand:QHSI 1 "const_int_operand" "I"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "mov%!\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "mov_imm")]
+)
+
+(define_insn "*thumb2_addsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l,l")
+	(plus:SI (match_operand:SI 1 "low_register_operand" "l,0")
+		 (match_operand:SI 2 "low_reg_or_int_operand" "lPt,Ps")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "*
+    HOST_WIDE_INT val;
+
+    if (CONST_INT_P (operands[2]))
+      val = INTVAL(operands[2]);
+    else
+      val = 0;
+
+    /* We prefer eg. subs rn, rn, #1 over adds rn, rn, #0xffffffff.  */
+    if (val < 0 && const_ok_for_arm(ARM_SIGN_EXTEND (-val)))
+      return \"sub%!\\t%0, %1, #%n2\";
+    else
+      return \"add%!\\t%0, %1, %2\";
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "*thumb2_subsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(minus:SI (match_operand:SI 1 "low_register_operand" "l")
+		  (match_operand:SI 2 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "sub%!\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_peephole2
+  [(set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_reg_dead_p (1, operands[1])
+   && satisfies_constraint_Pw (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])]
+  "operands[3] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_peephole2
+  [(match_scratch:SI 3 "l")
+   (set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && satisfies_constraint_Px (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])]
+  "operands[4] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_insn "thumb2_addsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	  (plus:SI (match_operand:SI 1 "s_register_operand" "l,  0, r")
+		   (match_operand:SI 2 "arm_add_operand"    "lPt,Ps,rIL"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=l,l,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_THUMB2"
+  "*
+    HOST_WIDE_INT val;
+
+    if (CONST_INT_P (operands[2]))
+      val = INTVAL (operands[2]);
+    else
+      val = 0;
+
+    if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val)))
+      return \"subs\\t%0, %1, #%n2\";
+    else
+      return \"adds\\t%0, %1, %2\";
+  "
+  [(set_attr "conds" "set")
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "*thumb2_addsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	  (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,  r,r")
+		   (match_operand:SI 1 "arm_add_operand"    "Pv,l,IL,r"))
+	  (const_int 0)))]
+  "TARGET_THUMB2"
+  "*
+    HOST_WIDE_INT val;
+
+    if (CONST_INT_P (operands[1]))
+      val = INTVAL (operands[1]);
+    else
+      val = 0;
+
+    if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val)))
+      return \"cmp\\t%0, #%n1\";
+    else
+      return \"cmn\\t%0, %1\";
+  "
+  [(set_attr "conds" "set")
+   (set_attr "length" "2,2,4,4")
+   (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_reg")]
+)
+
+(define_insn "*thumb2_mulsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+        (mult:SI (match_operand:SI 1 "low_register_operand" "%0")
+                 (match_operand:SI 2 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && optimize_size && reload_completed"
+  "mul%!\\t%0, %2, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "muls")])
+
+(define_insn "*thumb2_mulsi_short_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+         (mult:SI (match_operand:SI 1 "register_operand" "%0")
+	          (match_operand:SI 2 "register_operand" "l"))
+         (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_THUMB2 && optimize_size"
+  "muls\\t%0, %2, %0"
+  [(set_attr "length" "2")
+   (set_attr "type" "muls")])
+
+(define_insn "*thumb2_mulsi_short_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+         (mult:SI (match_operand:SI 1 "register_operand" "%0")
+	          (match_operand:SI 2 "register_operand" "l"))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=l"))]
+  "TARGET_THUMB2 && optimize_size"
+  "muls\\t%0, %2, %0"
+  [(set_attr "length" "2")
+   (set_attr "type" "muls")])
+
+(define_insn "*thumb2_cbz"
+  [(set (pc) (if_then_else
+	      (eq (match_operand:SI 0 "s_register_operand" "l,?r")
+		  (const_int 0))
+	      (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (get_attr_length (insn) == 2)
+    return \"cbz\\t%0, %l1\";
+  else
+    return \"cmp\\t%0, #0\;beq\\t%l1\";
+  "
+  [(set (attr "length")
+        (if_then_else
+	    (and (ge (minus (match_dup 1) (pc)) (const_int 2))
+	         (le (minus (match_dup 1) (pc)) (const_int 128))
+	         (not (match_test "which_alternative")))
+	    (const_int 2)
+	    (const_int 8)))
+   (set_attr "type" "branch,multiple")]
+)
+
+(define_insn "*thumb2_cbnz"
+  [(set (pc) (if_then_else
+	      (ne (match_operand:SI 0 "s_register_operand" "l,?r")
+		  (const_int 0))
+	      (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (get_attr_length (insn) == 2)
+    return \"cbnz\\t%0, %l1\";
+  else
+    return \"cmp\\t%0, #0\;bne\\t%l1\";
+  "
+  [(set (attr "length")
+        (if_then_else
+	    (and (ge (minus (match_dup 1) (pc)) (const_int 2))
+	         (le (minus (match_dup 1) (pc)) (const_int 128))
+	         (not (match_test "which_alternative")))
+	    (const_int 2)
+	    (const_int 8)))
+   (set_attr "type" "branch,multiple")]
+)
+
+(define_insn "*thumb2_one_cmplsi2_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(not:SI (match_operand:SI 1 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "mvn%!\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "mvn_reg")]
+)
+
+(define_insn "*thumb2_negsi2_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(neg:SI (match_operand:SI 1 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "neg%!\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
+)
+
+(define_insn "*orsi_notsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "orn%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg")]
+)
+
+(define_insn "*orsi_not_shiftsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ior:SI (not:SI (match_operator:SI 4 "shift_operator"
+			 [(match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 3 "const_int_operand" "M")]))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "orn%?\\t%0, %1, %2%S4"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "shift" "2")
+   (set_attr "type" "alu_shift_imm")]
+)
+
+(define_peephole2
+  [(set (match_operand:CC_NOOV 0 "cc_register" "")
+	(compare:CC_NOOV (zero_extract:SI
+			  (match_operand:SI 1 "low_register_operand" "")
+			  (const_int 1)
+			  (match_operand:SI 2 "const_int_operand" ""))
+			 (const_int 0)))
+   (match_scratch:SI 3 "l")
+   (set (pc)
+	(if_then_else (match_operator:CC_NOOV 4 "equality_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))]
+  "TARGET_THUMB2
+   && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32)"
+  [(parallel [(set (match_dup 0)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (clobber (match_dup 3))])
+   (set (pc)
+	(if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)])
+		      (match_dup 5) (match_dup 6)))]
+  "
+  operands[2] = GEN_INT (31 - INTVAL (operands[2]));
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? LT : GE,
+				VOIDmode, operands[0], const0_rtx);
+  ")
+
+(define_peephole2
+  [(set (match_operand:CC_NOOV 0 "cc_register" "")
+	(compare:CC_NOOV (zero_extract:SI
+			  (match_operand:SI 1 "low_register_operand" "")
+			  (match_operand:SI 2 "const_int_operand" "")
+			  (const_int 0))
+			 (const_int 0)))
+   (match_scratch:SI 3 "l")
+   (set (pc)
+	(if_then_else (match_operator:CC_NOOV 4 "equality_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))]
+  "TARGET_THUMB2
+   && (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 32)"
+  [(parallel [(set (match_dup 0)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (clobber (match_dup 3))])
+   (set (pc)
+	(if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)])
+		      (match_dup 5) (match_dup 6)))]
+  "
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  ")
+
+;; Define the subtract-one-and-jump insns so loop.c
+;; knows what to generate.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))      ; loop pseudo
+   (use (match_operand 1 "" ""))]     ; label
+  "TARGET_32BIT"
+  "
+ {
+   /* Currently SMS relies on the do-loop pattern to recognize loops
+      where (1) the control part consists of all insns defining and/or
+      using a certain 'count' register and (2) the loop count can be
+      adjusted by modifying this register prior to the loop.
+      ??? The possible introduction of a new block to initialize the
+      new IV can potentially affect branch optimizations.  */
+   if (optimize > 0 && flag_modulo_sched)
+   {
+     rtx s0;
+     rtx bcomp;
+     rtx loc_ref;
+     rtx cc_reg;
+     rtx insn;
+     rtx cmp;
+
+     if (GET_MODE (operands[0]) != SImode)
+       FAIL;
+
+     s0 = operands [0];
+     if (TARGET_THUMB2)
+       insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+     else
+       insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+
+     cmp = XVECEXP (PATTERN (insn), 0, 0);
+     cc_reg = SET_DEST (cmp);
+     bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
+     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                        loc_ref, pc_rtx)));
+     DONE;
+   }else
+      FAIL;
+ }")
+
diff --git a/gcc-4.9/gcc/config/arm/types.md b/gcc-4.9/gcc/config/arm/types.md
new file mode 100644
index 000000000..cc39cd11f
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/types.md
@@ -0,0 +1,1077 @@
+;; Instruction Classification for ARM for GNU compiler.
+
+;; Copyright (C) 1991-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; TYPE attribute is used to classify instructions for use in scheduling.
+;
+; Instruction classification:
+;
+; adc_imm            add/subtract with carry and with an immediate operand.
+; adc_reg            add/subtract with carry and no immediate operand.
+; adcs_imm           as adc_imm, setting condition flags.
+; adcs_reg           as adc_reg, setting condition flags.
+; adr                calculate address.
+; alu_ext            From ARMv8-A: any arithmetic instruction that has a
+;                    sign/zero-extended.
+;                    AArch64 Only.
+;                    source operand
+; alu_imm            any arithmetic instruction that doesn't have a shifted
+;                    operand and has an immediate operand.  This
+;                    excludes MOV, MVN and RSB(S) immediate.
+; alu_reg            any arithmetic instruction that doesn't have a shifted
+;                    or an immediate operand.  This excludes
+;                    MOV and MVN but includes MOVT.  This is also the default.
+; alu_shift_imm      any arithmetic instruction that has a source operand
+;                    shifted by a constant.  This excludes simple shifts.
+; alu_shift_reg      as alu_shift_imm, with the shift amount specified in a
+;                    register.
+; alus_ext           From ARMv8-A: as alu_ext, setting condition flags.
+;                    AArch64 Only.
+; alus_imm           as alu_imm, setting condition flags.
+; alus_reg           as alu_reg, setting condition flags.
+; alus_shift_imm     as alu_shift_imm, setting condition flags.
+; alus_shift_reg     as alu_shift_reg, setting condition flags.
+; bfm                bitfield move operation.
+; block              blockage insn, this blocks all functional units.
+; branch             branch.
+; call               subroutine call.
+; clz                count leading zeros (CLZ).
+; csel               From ARMv8-A: conditional select.
+; extend             extend instruction (SXTB, SXTH, UXTB, UXTH).
+; f_cvt              conversion between float representations.
+; f_cvtf2i           conversion between float and integral types.
+; f_cvti2f           conversion between integral and float types.
+; f_flag             transfer of co-processor flags to the CPSR.
+; f_load[d,s]        double/single load from memory.  Used for VFP unit.
+; f_mcr              transfer arm to vfp reg.
+; f_mcrr             transfer two arm regs to vfp reg.
+; f_minmax[d,s]      double/single floating point minimum/maximum.
+; f_mrc              transfer vfp to arm reg.
+; f_mrrc             transfer vfp to two arm regs.
+; f_rint[d,s]        double/single floating point rount to integral.
+; f_sel[d,s]         double/single floating byte select.
+; f_store[d,s]       double/single store to memory.  Used for VFP unit.
+; fadd[d,s]          double/single floating-point scalar addition.
+; fcmp[d,s]          double/single floating-point compare.
+; fconst[d,s]        double/single load immediate.
+; fcsel              From ARMv8-A: Floating-point conditional select.
+; fdiv[d,s]          double/single precision floating point division.
+; ffarith[d,s]       double/single floating point abs/neg/cpy.
+; ffma[d,s]          double/single floating point fused multiply-accumulate.
+; float              floating point arithmetic operation.
+; fmac[d,s]          double/single floating point multiply-accumulate.
+; fmov               floating point to floating point register move.
+; fmul[d,s]          double/single floating point multiply.
+; fsqrt[d,s]         double/single precision floating point square root.
+; load_acq           load-acquire.
+; load_byte          load byte(s) from memory to arm registers.
+; load1              load 1 word from memory to arm registers.
+; load2              load 2 words from memory to arm registers.
+; load3              load 3 words from memory to arm registers.
+; load4              load 4 words from memory to arm registers.
+; logic_imm          any logical instruction that doesn't have a shifted
+;                    operand and has an immediate operand.
+; logic_reg          any logical instruction that doesn't have a shifted
+;                    operand or an immediate operand.
+; logic_shift_imm    any logical instruction that has a source operand
+;                    shifted by a constant.  This excludes simple shifts.
+; logic_shift_reg    as logic_shift_imm, with the shift amount specified in a
+;                    register.
+; logics_imm         as logic_imm, setting condition flags.
+; logics_reg         as logic_reg, setting condition flags.
+; logics_shift_imm   as logic_shift_imm, setting condition flags.
+; logics_shift_reg   as logic_shift_reg, setting condition flags.
+; mla                integer multiply accumulate.
+; mlas               integer multiply accumulate, flag setting.
+; mov_imm            simple MOV instruction that moves an immediate to
+;                    register.  This includes MOVW, but not MOVT.
+; mov_reg            simple MOV instruction that moves a register to another
+;                    register.  This includes MOVW, but not MOVT.
+; mov_shift          simple MOV instruction, shifted operand by a constant.
+; mov_shift_reg      simple MOV instruction, shifted operand by a register.
+; mrs                system/special/co-processor register move.
+; mul                integer multiply.
+; muls               integer multiply, flag setting.
+; multiple           more than one instruction, candidate for future
+;                    splitting, or better modeling.
+; mvn_imm            inverting move instruction, immediate.
+; mvn_reg            inverting move instruction, register.
+; mvn_shift          inverting move instruction, shifted operand by a constant.
+; mvn_shift_reg      inverting move instruction, shifted operand by a register.
+; no_insn            an insn which does not represent an instruction in the
+;                    final output, thus having no impact on scheduling.
+; rbit               reverse bits.
+; rev                reverse bytes.
+; sdiv               signed division.
+; shift_imm          simple shift operation (LSL, LSR, ASR, ROR) with an
+;                    immediate.
+; shift_reg          simple shift by a register.
+; smlad              signed multiply accumulate dual.
+; smladx             signed multiply accumulate dual reverse.
+; smlal              signed multiply accumulate long.
+; smlald             signed multiply accumulate long dual.
+; smlals             signed multiply accumulate long, flag setting.
+; smlalxy            signed multiply accumulate, 16x16-bit, 64-bit accumulate.
+; smlawx             signed multiply accumulate, 32x16-bit, 32-bit accumulate.
+; smlawy             signed multiply accumulate wide, 32x16-bit,
+;                    32-bit accumulate.
+; smlaxy             signed multiply accumulate, 16x16-bit, 32-bit accumulate.
+; smlsd              signed multiply subtract dual.
+; smlsdx             signed multiply subtract dual reverse.
+; smlsld             signed multiply subtract long dual.
+; smmla              signed most significant word multiply accumulate.
+; smmul              signed most significant word multiply.
+; smmulr             signed most significant word multiply, rounded.
+; smuad              signed dual multiply add.
+; smuadx             signed dual multiply add reverse.
+; smull              signed multiply long.
+; smulls             signed multiply long, flag setting.
+; smulwy             signed multiply wide, 32x16-bit, 32-bit accumulate.
+; smulxy             signed multiply, 16x16-bit, 32-bit accumulate.
+; smusd              signed dual multiply subtract.
+; smusdx             signed dual multiply subtract reverse.
+; store_rel          store-release.
+; store1             store 1 word to memory from arm registers.
+; store2             store 2 words to memory from arm registers.
+; store3             store 3 words to memory from arm registers.
+; store4             store 4 (or more) words to memory from arm registers.
+; trap               cause a trap in the kernel.
+; udiv               unsigned division.
+; umaal              unsigned multiply accumulate accumulate long.
+; umlal              unsigned multiply accumulate long.
+; umlals             unsigned multiply accumulate long, flag setting.
+; umull              unsigned multiply long.
+; umulls             unsigned multiply long, flag setting.
+; untyped            insn without type information - default, and error,
+;                    case.
+;
+; The classification below is for instructions used by the Wireless MMX
+; Technology. Each attribute value is used to classify an instruction of the
+; same name or family.
+;
+; wmmx_tandc
+; wmmx_tbcst
+; wmmx_textrc
+; wmmx_textrm
+; wmmx_tinsr
+; wmmx_tmcr
+; wmmx_tmcrr
+; wmmx_tmia
+; wmmx_tmiaph
+; wmmx_tmiaxy
+; wmmx_tmrc
+; wmmx_tmrrc
+; wmmx_tmovmsk
+; wmmx_torc
+; wmmx_torvsc
+; wmmx_wabs
+; wmmx_wdiff
+; wmmx_wacc
+; wmmx_wadd
+; wmmx_waddbhus
+; wmmx_waddsubhx
+; wmmx_waligni
+; wmmx_walignr
+; wmmx_wand
+; wmmx_wandn
+; wmmx_wavg2
+; wmmx_wavg4
+; wmmx_wcmpeq
+; wmmx_wcmpgt
+; wmmx_wmac
+; wmmx_wmadd
+; wmmx_wmax
+; wmmx_wmerge
+; wmmx_wmiawxy
+; wmmx_wmiaxy
+; wmmx_wmin
+; wmmx_wmov
+; wmmx_wmul
+; wmmx_wmulw
+; wmmx_wldr
+; wmmx_wor
+; wmmx_wpack
+; wmmx_wqmiaxy
+; wmmx_wqmulm
+; wmmx_wqmulwm
+; wmmx_wror
+; wmmx_wsad
+; wmmx_wshufh
+; wmmx_wsll
+; wmmx_wsra
+; wmmx_wsrl
+; wmmx_wstr
+; wmmx_wsub
+; wmmx_wsubaddhx
+; wmmx_wunpckeh
+; wmmx_wunpckel
+; wmmx_wunpckih
+; wmmx_wunpckil
+; wmmx_wxor
+;
+; The classification below is for NEON instructions.
+;
+; neon_add
+; neon_add_q
+; neon_add_widen
+; neon_add_long
+; neon_qadd
+; neon_qadd_q
+; neon_add_halve
+; neon_add_halve_q
+; neon_add_halve_narrow_q
+; neon_sub
+; neon_sub_q
+; neon_sub_widen
+; neon_sub_long
+; neon_qsub
+; neon_qsub_q
+; neon_sub_halve
+; neon_sub_halve_q
+; neon_sub_halve_narrow_q
+; neon_abs
+; neon_abs_q
+; neon_neg
+; neon_neg_q
+; neon_qneg
+; neon_qneg_q
+; neon_qabs
+; neon_qabs_q
+; neon_abd
+; neon_abd_q
+; neon_abd_long
+; neon_minmax
+; neon_minmax_q
+; neon_compare
+; neon_compare_q
+; neon_compare_zero
+; neon_compare_zero_q
+; neon_arith_acc
+; neon_arith_acc_q
+; neon_reduc_add
+; neon_reduc_add_q
+; neon_reduc_add_long
+; neon_reduc_add_acc
+; neon_reduc_add_acc_q
+; neon_reduc_minmax
+; neon_reduc_minmax_q
+; neon_logic
+; neon_logic_q
+; neon_tst
+; neon_tst_q
+; neon_shift_imm
+; neon_shift_imm_q
+; neon_shift_imm_narrow_q
+; neon_shift_imm_long
+; neon_shift_reg
+; neon_shift_reg_q
+; neon_shift_acc
+; neon_shift_acc_q
+; neon_sat_shift_imm
+; neon_sat_shift_imm_q
+; neon_sat_shift_imm_narrow_q
+; neon_sat_shift_reg
+; neon_sat_shift_reg_q
+; neon_ins
+; neon_ins_q
+; neon_move
+; neon_move_q
+; neon_move_narrow_q
+; neon_permute
+; neon_permute_q
+; neon_zip
+; neon_zip_q
+; neon_tbl1
+; neon_tbl1_q
+; neon_tbl2
+; neon_tbl2_q
+; neon_tbl3
+; neon_tbl3_q
+; neon_tbl4
+; neon_tbl4_q
+; neon_bsl
+; neon_bsl_q
+; neon_cls
+; neon_cls_q
+; neon_cnt
+; neon_cnt_q
+; neon_ext
+; neon_ext_q
+; neon_rbit
+; neon_rbit_q
+; neon_rev
+; neon_rev_q
+; neon_mul_b
+; neon_mul_b_q
+; neon_mul_h
+; neon_mul_h_q
+; neon_mul_s
+; neon_mul_s_q
+; neon_mul_b_long
+; neon_mul_h_long
+; neon_mul_s_long
+; neon_mul_d_long
+; neon_mul_h_scalar
+; neon_mul_h_scalar_q
+; neon_mul_s_scalar
+; neon_mul_s_scalar_q
+; neon_mul_h_scalar_long
+; neon_mul_s_scalar_long
+; neon_sat_mul_b
+; neon_sat_mul_b_q
+; neon_sat_mul_h
+; neon_sat_mul_h_q
+; neon_sat_mul_s
+; neon_sat_mul_s_q
+; neon_sat_mul_b_long
+; neon_sat_mul_h_long
+; neon_sat_mul_s_long
+; neon_sat_mul_h_scalar
+; neon_sat_mul_h_scalar_q
+; neon_sat_mul_s_scalar
+; neon_sat_mul_s_scalar_q
+; neon_sat_mul_h_scalar_long
+; neon_sat_mul_s_scalar_long
+; neon_mla_b
+; neon_mla_b_q
+; neon_mla_h
+; neon_mla_h_q
+; neon_mla_s
+; neon_mla_s_q
+; neon_mla_b_long
+; neon_mla_h_long
+; neon_mla_s_long
+; neon_mla_h_scalar
+; neon_mla_h_scalar_q
+; neon_mla_s_scalar
+; neon_mla_s_scalar_q
+; neon_mla_h_scalar_long
+; neon_mla_s_scalar_long
+; neon_sat_mla_b_long
+; neon_sat_mla_h_long
+; neon_sat_mla_s_long
+; neon_sat_mla_h_scalar_long
+; neon_sat_mla_s_scalar_long
+; neon_to_gp
+; neon_to_gp_q
+; neon_from_gp
+; neon_from_gp_q
+; neon_ldr
+; neon_load1_1reg
+; neon_load1_1reg_q
+; neon_load1_2reg
+; neon_load1_2reg_q
+; neon_load1_3reg
+; neon_load1_3reg_q
+; neon_load1_4reg
+; neon_load1_4reg_q
+; neon_load1_all_lanes
+; neon_load1_all_lanes_q
+; neon_load1_one_lane
+; neon_load1_one_lane_q
+; neon_load2_2reg
+; neon_load2_2reg_q
+; neon_load2_4reg
+; neon_load2_4reg_q
+; neon_load2_all_lanes
+; neon_load2_all_lanes_q
+; neon_load2_one_lane
+; neon_load2_one_lane_q
+; neon_load3_3reg
+; neon_load3_3reg_q
+; neon_load3_all_lanes
+; neon_load3_all_lanes_q
+; neon_load3_one_lane
+; neon_load3_one_lane_q
+; neon_load4_4reg
+; neon_load4_4reg_q
+; neon_load4_all_lanes
+; neon_load4_all_lanes_q
+; neon_load4_one_lane
+; neon_load4_one_lane_q
+; neon_str
+; neon_store1_1reg
+; neon_store1_1reg_q
+; neon_store1_2reg
+; neon_store1_2reg_q
+; neon_store1_3reg
+; neon_store1_3reg_q
+; neon_store1_4reg
+; neon_store1_4reg_q
+; neon_store1_one_lane
+; neon_store1_one_lane_q
+; neon_store2_2reg
+; neon_store2_2reg_q
+; neon_store2_4reg
+; neon_store2_4reg_q
+; neon_store2_one_lane
+; neon_store2_one_lane_q
+; neon_store3_3reg
+; neon_store3_3reg_q
+; neon_store3_one_lane
+; neon_store3_one_lane_q
+; neon_store4_4reg
+; neon_store4_4reg_q
+; neon_store4_one_lane
+; neon_store4_one_lane_q
+; neon_fp_abs_s
+; neon_fp_abs_s_q
+; neon_fp_abs_d
+; neon_fp_abs_d_q
+; neon_fp_neg_s
+; neon_fp_neg_s_q
+; neon_fp_neg_d
+; neon_fp_neg_d_q
+; neon_fp_abd_s
+; neon_fp_abd_s_q
+; neon_fp_abd_d
+; neon_fp_abd_d_q
+; neon_fp_addsub_s
+; neon_fp_addsub_s_q
+; neon_fp_addsub_d
+; neon_fp_addsub_d_q
+; neon_fp_compare_s
+; neon_fp_compare_s_q
+; neon_fp_compare_d
+; neon_fp_compare_d_q
+; neon_fp_minmax_s
+; neon_fp_minmax_s_q
+; neon_fp_minmax_d
+; neon_fp_minmax_d_q
+; neon_fp_reduc_add_s
+; neon_fp_reduc_add_s_q
+; neon_fp_reduc_add_d
+; neon_fp_reduc_add_d_q
+; neon_fp_reduc_minmax_s
+; neon_fp_reduc_minmax_s_q
+; neon_fp_reduc_minmax_d
+; neon_fp_reduc_minmax_d_q
+; neon_fp_cvt_narrow_s_q
+; neon_fp_cvt_narrow_d_q
+; neon_fp_cvt_widen_h
+; neon_fp_cvt_widen_s
+; neon_fp_to_int_s
+; neon_fp_to_int_s_q
+; neon_fp_to_int_d
+; neon_fp_to_int_d_q
+; neon_int_to_fp_s
+; neon_int_to_fp_s_q
+; neon_int_to_fp_d
+; neon_int_to_fp_d_q
+; neon_fp_round_s
+; neon_fp_round_s_q
+; neon_fp_round_d
+; neon_fp_round_d_q
+; neon_fp_recpe_s
+; neon_fp_recpe_s_q
+; neon_fp_recpe_d
+; neon_fp_recpe_d_q
+; neon_fp_recps_s
+; neon_fp_recps_s_q
+; neon_fp_recps_d
+; neon_fp_recps_d_q
+; neon_fp_recpx_s
+; neon_fp_recpx_s_q
+; neon_fp_recpx_d
+; neon_fp_recpx_d_q
+; neon_fp_rsqrte_s
+; neon_fp_rsqrte_s_q
+; neon_fp_rsqrte_d
+; neon_fp_rsqrte_d_q
+; neon_fp_rsqrts_s
+; neon_fp_rsqrts_s_q
+; neon_fp_rsqrts_d
+; neon_fp_rsqrts_d_q
+; neon_fp_mul_s
+; neon_fp_mul_s_q
+; neon_fp_mul_s_scalar
+; neon_fp_mul_s_scalar_q
+; neon_fp_mul_d
+; neon_fp_mul_d_q
+; neon_fp_mul_d_scalar_q
+; neon_fp_mla_s
+; neon_fp_mla_s_q
+; neon_fp_mla_s_scalar
+; neon_fp_mla_s_scalar_q
+; neon_fp_mla_d
+; neon_fp_mla_d_q
+; neon_fp_mla_d_scalar_q
+; neon_fp_sqrt_s
+; neon_fp_sqrt_s_q
+; neon_fp_sqrt_d
+; neon_fp_sqrt_d_q
+; neon_fp_div_s
+; neon_fp_div_s_q
+; neon_fp_div_d
+; neon_fp_div_d_q
+;
+; The classification below is for Crypto instructions.
+;
+; crypto_aes
+; crypto_sha1_xor
+; crypto_sha1_fast
+; crypto_sha1_slow
+; crypto_sha256_fast
+; crypto_sha256_slow
+
+(define_attr "type"
+ "adc_imm,\
+  adc_reg,\
+  adcs_imm,\
+  adcs_reg,\
+  adr,\
+  alu_ext,\
+  alu_imm,\
+  alu_reg,\
+  alu_shift_imm,\
+  alu_shift_reg,\
+  alus_ext,\
+  alus_imm,\
+  alus_reg,\
+  alus_shift_imm,\
+  alus_shift_reg,\
+  bfm,\
+  block,\
+  branch,\
+  call,\
+  clz,\
+  no_insn,\
+  csel,\
+  crc,\
+  extend,\
+  f_cvt,\
+  f_cvtf2i,\
+  f_cvti2f,\
+  f_flag,\
+  f_loadd,\
+  f_loads,\
+  f_mcr,\
+  f_mcrr,\
+  f_minmaxd,\
+  f_minmaxs,\
+  f_mrc,\
+  f_mrrc,\
+  f_rintd,\
+  f_rints,\
+  f_seld,\
+  f_sels,\
+  f_stored,\
+  f_stores,\
+  faddd,\
+  fadds,\
+  fcmpd,\
+  fcmps,\
+  fconstd,\
+  fconsts,\
+  fcsel,\
+  fdivd,\
+  fdivs,\
+  ffarithd,\
+  ffariths,\
+  ffmad,\
+  ffmas,\
+  float,\
+  fmacd,\
+  fmacs,\
+  fmov,\
+  fmuld,\
+  fmuls,\
+  fsqrts,\
+  fsqrtd,\
+  load_acq,\
+  load_byte,\
+  load1,\
+  load2,\
+  load3,\
+  load4,\
+  logic_imm,\
+  logic_reg,\
+  logic_shift_imm,\
+  logic_shift_reg,\
+  logics_imm,\
+  logics_reg,\
+  logics_shift_imm,\
+  logics_shift_reg,\
+  mla,\
+  mlas,\
+  mov_imm,\
+  mov_reg,\
+  mov_shift,\
+  mov_shift_reg,\
+  mrs,\
+  mul,\
+  muls,\
+  multiple,\
+  mvn_imm,\
+  mvn_reg,\
+  mvn_shift,\
+  mvn_shift_reg,\
+  nop,\
+  rbit,\
+  rev,\
+  sdiv,\
+  shift_imm,\
+  shift_reg,\
+  smlad,\
+  smladx,\
+  smlal,\
+  smlald,\
+  smlals,\
+  smlalxy,\
+  smlawx,\
+  smlawy,\
+  smlaxy,\
+  smlsd,\
+  smlsdx,\
+  smlsld,\
+  smmla,\
+  smmul,\
+  smmulr,\
+  smuad,\
+  smuadx,\
+  smull,\
+  smulls,\
+  smulwy,\
+  smulxy,\
+  smusd,\
+  smusdx,\
+  store_rel,\
+  store1,\
+  store2,\
+  store3,\
+  store4,\
+  trap,\
+  udiv,\
+  umaal,\
+  umlal,\
+  umlals,\
+  umull,\
+  umulls,\
+  untyped,\
+  wmmx_tandc,\
+  wmmx_tbcst,\
+  wmmx_textrc,\
+  wmmx_textrm,\
+  wmmx_tinsr,\
+  wmmx_tmcr,\
+  wmmx_tmcrr,\
+  wmmx_tmia,\
+  wmmx_tmiaph,\
+  wmmx_tmiaxy,\
+  wmmx_tmrc,\
+  wmmx_tmrrc,\
+  wmmx_tmovmsk,\
+  wmmx_torc,\
+  wmmx_torvsc,\
+  wmmx_wabs,\
+  wmmx_wabsdiff,\
+  wmmx_wacc,\
+  wmmx_wadd,\
+  wmmx_waddbhus,\
+  wmmx_waddsubhx,\
+  wmmx_waligni,\
+  wmmx_walignr,\
+  wmmx_wand,\
+  wmmx_wandn,\
+  wmmx_wavg2,\
+  wmmx_wavg4,\
+  wmmx_wcmpeq,\
+  wmmx_wcmpgt,\
+  wmmx_wmac,\
+  wmmx_wmadd,\
+  wmmx_wmax,\
+  wmmx_wmerge,\
+  wmmx_wmiawxy,\
+  wmmx_wmiaxy,\
+  wmmx_wmin,\
+  wmmx_wmov,\
+  wmmx_wmul,\
+  wmmx_wmulw,\
+  wmmx_wldr,\
+  wmmx_wor,\
+  wmmx_wpack,\
+  wmmx_wqmiaxy,\
+  wmmx_wqmulm,\
+  wmmx_wqmulwm,\
+  wmmx_wror,\
+  wmmx_wsad,\
+  wmmx_wshufh,\
+  wmmx_wsll,\
+  wmmx_wsra,\
+  wmmx_wsrl,\
+  wmmx_wstr,\
+  wmmx_wsub,\
+  wmmx_wsubaddhx,\
+  wmmx_wunpckeh,\
+  wmmx_wunpckel,\
+  wmmx_wunpckih,\
+  wmmx_wunpckil,\
+  wmmx_wxor,\
+\
+  neon_add,\
+  neon_add_q,\
+  neon_add_widen,\
+  neon_add_long,\
+  neon_qadd,\
+  neon_qadd_q,\
+  neon_add_halve,\
+  neon_add_halve_q,\
+  neon_add_halve_narrow_q,\
+\
+  neon_sub,\
+  neon_sub_q,\
+  neon_sub_widen,\
+  neon_sub_long,\
+  neon_qsub,\
+  neon_qsub_q,\
+  neon_sub_halve,\
+  neon_sub_halve_q,\
+  neon_sub_halve_narrow_q,\
+\
+  neon_abs,\
+  neon_abs_q,\
+  neon_neg,\
+  neon_neg_q,\
+  neon_qneg,\
+  neon_qneg_q,\
+  neon_qabs,\
+  neon_qabs_q,\
+  neon_abd,\
+  neon_abd_q,\
+  neon_abd_long,\
+\
+  neon_minmax,\
+  neon_minmax_q,\
+  neon_compare,\
+  neon_compare_q,\
+  neon_compare_zero,\
+  neon_compare_zero_q,\
+\
+  neon_arith_acc,\
+  neon_arith_acc_q,\
+  neon_reduc_add,\
+  neon_reduc_add_q,\
+  neon_reduc_add_long,\
+  neon_reduc_add_acc,\
+  neon_reduc_add_acc_q,\
+  neon_reduc_minmax,\
+  neon_reduc_minmax_q,\
+  neon_logic,\
+  neon_logic_q,\
+  neon_tst,\
+  neon_tst_q,\
+\
+  neon_shift_imm,\
+  neon_shift_imm_q,\
+  neon_shift_imm_narrow_q,\
+  neon_shift_imm_long,\
+  neon_shift_reg,\
+  neon_shift_reg_q,\
+  neon_shift_acc,\
+  neon_shift_acc_q,\
+  neon_sat_shift_imm,\
+  neon_sat_shift_imm_q,\
+  neon_sat_shift_imm_narrow_q,\
+  neon_sat_shift_reg,\
+  neon_sat_shift_reg_q,\
+\
+  neon_ins,\
+  neon_ins_q,\
+  neon_move,\
+  neon_move_q,\
+  neon_move_narrow_q,\
+  neon_permute,\
+  neon_permute_q,\
+  neon_zip,\
+  neon_zip_q,\
+  neon_tbl1,\
+  neon_tbl1_q,\
+  neon_tbl2,\
+  neon_tbl2_q,\
+  neon_tbl3,\
+  neon_tbl3_q,\
+  neon_tbl4,\
+  neon_tbl4_q,\
+\
+  neon_bsl,\
+  neon_bsl_q,\
+  neon_cls,\
+  neon_cls_q,\
+  neon_cnt,\
+  neon_cnt_q,\
+  neon_dup,\
+  neon_dup_q,\
+  neon_ext,\
+  neon_ext_q,\
+  neon_rbit,\
+  neon_rbit_q,\
+  neon_rev,\
+  neon_rev_q,\
+\
+  neon_mul_b,\
+  neon_mul_b_q,\
+  neon_mul_h,\
+  neon_mul_h_q,\
+  neon_mul_s,\
+  neon_mul_s_q,\
+  neon_mul_b_long,\
+  neon_mul_h_long,\
+  neon_mul_s_long,\
+  neon_mul_d_long,\
+  neon_mul_h_scalar,\
+  neon_mul_h_scalar_q,\
+  neon_mul_s_scalar,\
+  neon_mul_s_scalar_q,\
+  neon_mul_h_scalar_long,\
+  neon_mul_s_scalar_long,\
+\
+  neon_sat_mul_b,\
+  neon_sat_mul_b_q,\
+  neon_sat_mul_h,\
+  neon_sat_mul_h_q,\
+  neon_sat_mul_s,\
+  neon_sat_mul_s_q,\
+  neon_sat_mul_b_long,\
+  neon_sat_mul_h_long,\
+  neon_sat_mul_s_long,\
+  neon_sat_mul_h_scalar,\
+  neon_sat_mul_h_scalar_q,\
+  neon_sat_mul_s_scalar,\
+  neon_sat_mul_s_scalar_q,\
+  neon_sat_mul_h_scalar_long,\
+  neon_sat_mul_s_scalar_long,\
+\
+  neon_mla_b,\
+  neon_mla_b_q,\
+  neon_mla_h,\
+  neon_mla_h_q,\
+  neon_mla_s,\
+  neon_mla_s_q,\
+  neon_mla_b_long,\
+  neon_mla_h_long,\
+  neon_mla_s_long,\
+  neon_mla_h_scalar,\
+  neon_mla_h_scalar_q,\
+  neon_mla_s_scalar,\
+  neon_mla_s_scalar_q,\
+  neon_mla_h_scalar_long,\
+  neon_mla_s_scalar_long,\
+\
+  neon_sat_mla_b_long,\
+  neon_sat_mla_h_long,\
+  neon_sat_mla_s_long,\
+  neon_sat_mla_h_scalar_long,\
+  neon_sat_mla_s_scalar_long,\
+\
+  neon_to_gp,\
+  neon_to_gp_q,\
+  neon_from_gp,\
+  neon_from_gp_q,\
+\
+  neon_ldr,\
+  neon_load1_1reg,\
+  neon_load1_1reg_q,\
+  neon_load1_2reg,\
+  neon_load1_2reg_q,\
+  neon_load1_3reg,\
+  neon_load1_3reg_q,\
+  neon_load1_4reg,\
+  neon_load1_4reg_q,\
+  neon_load1_all_lanes,\
+  neon_load1_all_lanes_q,\
+  neon_load1_one_lane,\
+  neon_load1_one_lane_q,\
+\
+  neon_load2_2reg,\
+  neon_load2_2reg_q,\
+  neon_load2_4reg,\
+  neon_load2_4reg_q,\
+  neon_load2_all_lanes,\
+  neon_load2_all_lanes_q,\
+  neon_load2_one_lane,\
+  neon_load2_one_lane_q,\
+\
+  neon_load3_3reg,\
+  neon_load3_3reg_q,\
+  neon_load3_all_lanes,\
+  neon_load3_all_lanes_q,\
+  neon_load3_one_lane,\
+  neon_load3_one_lane_q,\
+\
+  neon_load4_4reg,\
+  neon_load4_4reg_q,\
+  neon_load4_all_lanes,\
+  neon_load4_all_lanes_q,\
+  neon_load4_one_lane,\
+  neon_load4_one_lane_q,\
+\
+  neon_str,\
+  neon_store1_1reg,\
+  neon_store1_1reg_q,\
+  neon_store1_2reg,\
+  neon_store1_2reg_q,\
+  neon_store1_3reg,\
+  neon_store1_3reg_q,\
+  neon_store1_4reg,\
+  neon_store1_4reg_q,\
+  neon_store1_one_lane,\
+  neon_store1_one_lane_q,\
+\
+  neon_store2_2reg,\
+  neon_store2_2reg_q,\
+  neon_store2_4reg,\
+  neon_store2_4reg_q,\
+  neon_store2_one_lane,\
+  neon_store2_one_lane_q,\
+\
+  neon_store3_3reg,\
+  neon_store3_3reg_q,\
+  neon_store3_one_lane,\
+  neon_store3_one_lane_q,\
+\
+  neon_store4_4reg,\
+  neon_store4_4reg_q,\
+  neon_store4_one_lane,\
+  neon_store4_one_lane_q,\
+\
+  neon_fp_abs_s,\
+  neon_fp_abs_s_q,\
+  neon_fp_abs_d,\
+  neon_fp_abs_d_q,\
+  neon_fp_neg_s,\
+  neon_fp_neg_s_q,\
+  neon_fp_neg_d,\
+  neon_fp_neg_d_q,\
+\
+  neon_fp_abd_s,\
+  neon_fp_abd_s_q,\
+  neon_fp_abd_d,\
+  neon_fp_abd_d_q,\
+  neon_fp_addsub_s,\
+  neon_fp_addsub_s_q,\
+  neon_fp_addsub_d,\
+  neon_fp_addsub_d_q,\
+  neon_fp_compare_s,\
+  neon_fp_compare_s_q,\
+  neon_fp_compare_d,\
+  neon_fp_compare_d_q,\
+  neon_fp_minmax_s,\
+  neon_fp_minmax_s_q,\
+  neon_fp_minmax_d,\
+  neon_fp_minmax_d_q,\
+\
+  neon_fp_reduc_add_s,\
+  neon_fp_reduc_add_s_q,\
+  neon_fp_reduc_add_d,\
+  neon_fp_reduc_add_d_q,\
+  neon_fp_reduc_minmax_s,\
+  neon_fp_reduc_minmax_s_q,\
+  neon_fp_reduc_minmax_d,\
+  neon_fp_reduc_minmax_d_q,\
+\
+  neon_fp_cvt_narrow_s_q,\
+  neon_fp_cvt_narrow_d_q,\
+  neon_fp_cvt_widen_h,\
+  neon_fp_cvt_widen_s,\
+\
+  neon_fp_to_int_s,\
+  neon_fp_to_int_s_q,\
+  neon_fp_to_int_d,\
+  neon_fp_to_int_d_q,\
+  neon_int_to_fp_s,\
+  neon_int_to_fp_s_q,\
+  neon_int_to_fp_d,\
+  neon_int_to_fp_d_q,\
+  neon_fp_round_s,\
+  neon_fp_round_s_q,\
+  neon_fp_round_d,\
+  neon_fp_round_d_q,\
+\
+  neon_fp_recpe_s,\
+  neon_fp_recpe_s_q,\
+  neon_fp_recpe_d,\
+  neon_fp_recpe_d_q,\
+  neon_fp_recps_s,\
+  neon_fp_recps_s_q,\
+  neon_fp_recps_d,\
+  neon_fp_recps_d_q,\
+  neon_fp_recpx_s,\
+  neon_fp_recpx_s_q,\
+  neon_fp_recpx_d,\
+  neon_fp_recpx_d_q,\
+\
+  neon_fp_rsqrte_s,\
+  neon_fp_rsqrte_s_q,\
+  neon_fp_rsqrte_d,\
+  neon_fp_rsqrte_d_q,\
+  neon_fp_rsqrts_s,\
+  neon_fp_rsqrts_s_q,\
+  neon_fp_rsqrts_d,\
+  neon_fp_rsqrts_d_q,\
+\
+  neon_fp_mul_s,\
+  neon_fp_mul_s_q,\
+  neon_fp_mul_s_scalar,\
+  neon_fp_mul_s_scalar_q,\
+  neon_fp_mul_d,\
+  neon_fp_mul_d_q,\
+  neon_fp_mul_d_scalar_q,\
+\
+  neon_fp_mla_s,\
+  neon_fp_mla_s_q,\
+  neon_fp_mla_s_scalar,\
+  neon_fp_mla_s_scalar_q,\
+  neon_fp_mla_d,\
+  neon_fp_mla_d_q,\
+  neon_fp_mla_d_scalar_q,\
+\
+  neon_fp_sqrt_s,\
+  neon_fp_sqrt_s_q,\
+  neon_fp_sqrt_d,\
+  neon_fp_sqrt_d_q,\
+  neon_fp_div_s,\
+  neon_fp_div_s_q,\
+  neon_fp_div_d,\
+  neon_fp_div_d_q,\
+\
+  crypto_aes,\
+  crypto_sha1_xor,\
+  crypto_sha1_fast,\
+  crypto_sha1_slow,\
+  crypto_sha256_fast,\
+  crypto_sha256_slow"
+   (const_string "untyped"))
+
+; Is this an (integer side) multiply with a 32-bit (or smaller) result?
+(define_attr "mul32" "no,yes"
+  (if_then_else
+    (eq_attr "type"
+     "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\
+      smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld")
+    (const_string "yes")
+    (const_string "no")))
+
+; Is this an (integer side) multiply with a 64-bit result?
+(define_attr "mul64" "no,yes"
+  (if_then_else
+    (eq_attr "type"
+     "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals")
+    (const_string "yes")
+    (const_string "no")))
diff --git a/gcc-4.9/gcc/config/arm/uclinux-eabi.h b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
new file mode 100644
index 000000000..b5055ce40
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
@@ -0,0 +1,67 @@
+/* Definitions for ARM EABI ucLinux
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Paul Brook <paul@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override settings that are different to the uclinux-elf or
+   bpabi defaults.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE | MASK_INTERWORK)
+
+/* On EABI GNU/Linux, we want both the BPABI builtins and the
+   GNU/Linux builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      TARGET_BPABI_CPP_BUILTINS();		\
+      builtin_define ("__uClinux__");		\
+      builtin_define ("__gnu_linux__");         \
+      builtin_define_std ("linux");             \
+      builtin_define_std ("unix");              \
+      builtin_assert ("system=linux");          \
+      builtin_assert ("system=unix");           \
+      builtin_assert ("system=posix");          \
+    }						\
+  while (false)
+
+#undef SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux_eabi -elf2flt" \
+  " --pic-veneer --target2=abs"
+
+/* We default to the "aapcs-linux" ABI so that enums are int-sized by
+   default.  */
+#undef ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("a1") = (unsigned long) (BEG);	\
+  register unsigned long _end __asm ("a2") = (unsigned long) (END);	\
+  register unsigned long _flg __asm ("a3") = 0;				\
+  register unsigned long _scno __asm ("r7") = 0xf0002;			\
+  __asm __volatile ("swi 0x0		@ sys_cacheflush"		\
+		    : "=r" (_beg)					\
+		    : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno));	\
+}
+
+#define ARM_TARGET2_DWARF_FORMAT DW_EH_PE_absptr
diff --git a/gcc-4.9/gcc/config/arm/uclinux-elf.h b/gcc-4.9/gcc/config/arm/uclinux-elf.h
new file mode 100644
index 000000000..5cd4fe527
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/uclinux-elf.h
@@ -0,0 +1,84 @@
+/* Definitions for ARM running ucLinux using ELF
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Philip Blundell <pb@nexus.co.uk>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* We don't want a PLT.  */
+#undef  NEED_PLT_RELOC
+#define NEED_PLT_RELOC 0
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE)
+
+/* NOTE: The remaining definitions in this file are needed because uclinux
+   does not use config/linux.h.  */
+
+/* Add GNU/Linux builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      builtin_define ("__uClinux__");		\
+      builtin_define ("__gnu_linux__");         \
+      builtin_define_std ("linux");             \
+      builtin_define_std ("unix");              \
+      builtin_assert ("system=linux");          \
+      builtin_assert ("system=unix");           \
+      builtin_assert ("system=posix");          \
+    }						\
+  while (false)
+
+/* Do not assume anything about header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux"
+
+/* Now we define the strings used to build the spec file.  */
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC	"crt1%O%s crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{profile:-p}"
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G %L}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X -elf2flt"
+
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc-4.9/gcc/config/arm/unknown-elf.h b/gcc-4.9/gcc/config/arm/unknown-elf.h
new file mode 100644
index 000000000..ec6f9a488
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/unknown-elf.h
@@ -0,0 +1,96 @@
+/* Definitions for non-Linux based ARM systems using ELF
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Catherine Moore <clm@cygnus.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* elfos.h should have already been included.  Now just override
+   any conflicting definitions and add any extras.  */
+
+/* Run-time Target Specification.  */
+
+/* Default to using software floating point.  */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT	(0)
+#endif
+
+/* Now we define the strings used to build the spec file.  */
+#define UNKNOWN_ELF_STARTFILE_SPEC	" crti%O%s crtbegin%O%s crt0%O%s"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC	UNKNOWN_ELF_STARTFILE_SPEC
+
+#define UNKNOWN_ELF_ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	UNKNOWN_ELF_ENDFILE_SPEC
+
+/* The __USES_INITFINI__ define is tested in newlib/libc/sys/arm/crt0.S
+   to see if it needs to invoked _init() and _fini().  */
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "-D__USES_INITFINI__"
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Return a nonzero value if DECL has a section attribute.  */
+#define IN_NAMED_SECTION_P(DECL)					\
+  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL)	\
+   && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
+#undef  ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)   	\
+  do									\
+    {									\
+      if (IN_NAMED_SECTION_P (DECL))					\
+	switch_to_section (get_named_section (DECL, NULL, 0));		\
+      else								\
+	switch_to_section (bss_section);				\
+      									\
+      ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+									\
+      last_assemble_variable_decl = DECL;				\
+      ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);			\
+      ASM_OUTPUT_SKIP (FILE, SIZE ? (int)(SIZE) : 1);			\
+    } 									\
+  while (0)
+
+#undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+  do									\
+    {									\
+      if ((DECL) != NULL && IN_NAMED_SECTION_P (DECL))			\
+	switch_to_section (get_named_section (DECL, NULL, 0));		\
+      else								\
+	switch_to_section (bss_section);				\
+									\
+      ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL (FILE, NAME);					\
+      fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1);		\
+    }									\
+  while (0)
+
+#ifndef SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT 		TARGET_CPU_arm7tdmi
+#endif
+
+/* The libgcc udivmod functions may throw exceptions.  If newlib is
+   configured to support long longs in I/O, then printf will depend on
+   udivmoddi4, which will depend on the exception unwind routines,
+   which will depend on abort, which is defined in libc.  */ 
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "--start-group %G %L --end-group"
diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md
new file mode 100644
index 000000000..8caa953bc
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/unspecs.md
@@ -0,0 +1,305 @@
+;; Unspec defintions.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; UNSPEC Usage:
+;; Note: sin and cos are no-longer used.
+;; Unspec enumerators for Neon are defined in neon.md.
+;; Unspec enumerators for iwmmxt2 are defined in iwmmxt2.md
+
+(define_c_enum "unspec" [
+  UNSPEC_PUSH_MULT      ; `push multiple' operation:
+                        ;   operand 0 is the first register,
+                        ;   subsequent registers are in parallel (use ...)
+                        ;   expressions.
+  UNSPEC_PIC_SYM        ; A symbol that has been treated properly for pic
+                        ; usage, that is, we will add the pic_register
+                        ; value to it before trying to dereference it.
+  UNSPEC_PIC_BASE       ; Add PC and all but the last operand together,
+                        ; The last operand is the number of a PIC_LABEL
+                        ; that points at the containing instruction.
+  UNSPEC_PRLG_STK       ; A special barrier that prevents frame accesses
+                        ; being scheduled before the stack adjustment insn.
+  UNSPEC_REGISTER_USE   ; As USE insns are not meaningful after reload,
+                        ; this unspec is used to prevent the deletion of
+                        ; instructions setting registers for EH handling
+                        ; and stack frame generation.  Operand 0 is the
+                        ; register to "use".
+  UNSPEC_CHECK_ARCH     ; Set CCs to indicate 26-bit or 32-bit mode.
+  UNSPEC_WSHUFH         ; Used by the intrinsic form of the iWMMXt WSHUFH instruction.
+  UNSPEC_WACC           ; Used by the intrinsic form of the iWMMXt WACC instruction.
+  UNSPEC_TMOVMSK        ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction.
+  UNSPEC_WSAD           ; Used by the intrinsic form of the iWMMXt WSAD instruction.
+  UNSPEC_WSADZ          ; Used by the intrinsic form of the iWMMXt WSADZ instruction.
+  UNSPEC_WMACS          ; Used by the intrinsic form of the iWMMXt WMACS instruction.
+  UNSPEC_WMACU          ; Used by the intrinsic form of the iWMMXt WMACU instruction.
+  UNSPEC_WMACSZ         ; Used by the intrinsic form of the iWMMXt WMACSZ instruction.
+  UNSPEC_WMACUZ         ; Used by the intrinsic form of the iWMMXt WMACUZ instruction.
+  UNSPEC_CLRDI          ; Used by the intrinsic form of the iWMMXt CLRDI instruction.
+  UNSPEC_WALIGNI        ; Used by the intrinsic form of the iWMMXt WALIGN instruction.
+  UNSPEC_TLS            ; A symbol that has been treated properly for TLS usage.
+  UNSPEC_PIC_LABEL      ; A label used for PIC access that does not appear in the
+                        ; instruction stream.
+  UNSPEC_PIC_OFFSET     ; A symbolic 12-bit OFFSET that has been treated
+                        ; correctly for PIC usage.
+  UNSPEC_GOTSYM_OFF     ; The offset of the start of the GOT from a
+                        ; a given symbolic address.
+  UNSPEC_THUMB1_CASESI  ; A Thumb1 compressed dispatch-table call.
+  UNSPEC_RBIT           ; rbit operation.
+  UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
+                        ; another symbolic address.
+  UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_UNALIGNED_LOAD	; Used to represent ldr/ldrh instructions that access
+			; unaligned locations, on architectures which support
+			; that.
+  UNSPEC_UNALIGNED_STORE ; Same for str/strh.
+  UNSPEC_PIC_UNIFIED    ; Create a common pic addressing form.
+  UNSPEC_LL		; Represent an unpaired load-register-exclusive.
+  UNSPEC_VRINTZ         ; Represent a float to integral float rounding
+                        ; towards zero.
+  UNSPEC_VRINTP         ; Represent a float to integral float rounding
+                        ; towards +Inf.
+  UNSPEC_VRINTM         ; Represent a float to integral float rounding
+                        ; towards -Inf.
+  UNSPEC_VRINTR         ; Represent a float to integral float rounding
+                        ; FPSCR rounding mode.
+  UNSPEC_VRINTX         ; Represent a float to integral float rounding
+                        ; FPSCR rounding mode and signal inexactness.
+  UNSPEC_VRINTA         ; Represent a float to integral float rounding
+                        ; towards nearest, ties away from zero.
+])
+
+(define_c_enum "unspec" [
+  UNSPEC_WADDC		; Used by the intrinsic form of the iWMMXt WADDC instruction.
+  UNSPEC_WABS		; Used by the intrinsic form of the iWMMXt WABS instruction.
+  UNSPEC_WQMULWMR	; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
+  UNSPEC_WQMULMR	; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
+  UNSPEC_WQMULWM	; Used by the intrinsic form of the iWMMXt WQMULWM instruction.
+  UNSPEC_WQMULM		; Used by the intrinsic form of the iWMMXt WQMULM instruction.
+  UNSPEC_WQMIAxyn	; Used by the intrinsic form of the iWMMXt WMIAxyn instruction.
+  UNSPEC_WQMIAxy	; Used by the intrinsic form of the iWMMXt WMIAxy instruction.
+  UNSPEC_TANDC		; Used by the intrinsic form of the iWMMXt TANDC instruction.
+  UNSPEC_TORC		; Used by the intrinsic form of the iWMMXt TORC instruction.
+  UNSPEC_TORVSC		; Used by the intrinsic form of the iWMMXt TORVSC instruction.
+  UNSPEC_TEXTRC		; Used by the intrinsic form of the iWMMXt TEXTRC instruction.
+])
+
+
+;; UNSPEC_VOLATILE Usage:
+
+(define_c_enum "unspecv" [
+  VUNSPEC_BLOCKAGE      ; `blockage' insn to prevent scheduling across an
+                        ;   insn in the code.
+  VUNSPEC_EPILOGUE      ; `epilogue' insn, used to represent any part of the
+                        ;   instruction epilogue sequence that isn't expanded
+                        ;   into normal RTL.  Used for both normal and sibcall
+                        ;   epilogues.
+  VUNSPEC_THUMB1_INTERWORK ; `prologue_thumb1_interwork' insn, used to swap
+			;   modes from arm to thumb.
+  VUNSPEC_ALIGN         ; `align' insn.  Used at the head of a minipool table
+                        ;   for inlined constants.
+  VUNSPEC_POOL_END      ; `end-of-table'.  Used to mark the end of a minipool
+                        ;   table.
+  VUNSPEC_POOL_1        ; `pool-entry(1)'.  An entry in the constant pool for
+                        ;   an 8-bit object.
+  VUNSPEC_POOL_2        ; `pool-entry(2)'.  An entry in the constant pool for
+                        ;   a 16-bit object.
+  VUNSPEC_POOL_4        ; `pool-entry(4)'.  An entry in the constant pool for
+                        ;   a 32-bit object.
+  VUNSPEC_POOL_8        ; `pool-entry(8)'.  An entry in the constant pool for
+                        ;   a 64-bit object.
+  VUNSPEC_POOL_16       ; `pool-entry(16)'.  An entry in the constant pool for
+                        ;   a 128-bit object.
+  VUNSPEC_TMRC          ; Used by the iWMMXt TMRC instruction.
+  VUNSPEC_TMCR          ; Used by the iWMMXt TMCR instruction.
+  VUNSPEC_ALIGN8        ; 8-byte alignment version of VUNSPEC_ALIGN
+  VUNSPEC_WCMP_EQ       ; Used by the iWMMXt WCMPEQ instructions
+  VUNSPEC_WCMP_GTU      ; Used by the iWMMXt WCMPGTU instructions
+  VUNSPEC_WCMP_GT       ; Used by the iwMMXT WCMPGT instructions
+  VUNSPEC_EH_RETURN     ; Use to override the return address for exception
+                        ; handling.
+  VUNSPEC_ATOMIC_CAS	; Represent an atomic compare swap.
+  VUNSPEC_ATOMIC_XCHG	; Represent an atomic exchange.
+  VUNSPEC_ATOMIC_OP	; Represent an atomic operation.
+  VUNSPEC_LL		; Represent a load-register-exclusive.
+  VUNSPEC_SC		; Represent a store-register-exclusive.
+  VUNSPEC_LAX		; Represent a load-register-acquire-exclusive.
+  VUNSPEC_SLX		; Represent a store-register-release-exclusive.
+  VUNSPEC_LDA		; Represent a store-register-acquire.
+  VUNSPEC_STL		; Represent a store-register-release.
+])
+
+;; Enumerators for NEON unspecs.
+(define_c_enum "unspec" [
+  UNSPEC_ASHIFT_SIGNED
+  UNSPEC_ASHIFT_UNSIGNED
+  UNSPEC_CRC32B
+  UNSPEC_CRC32H
+  UNSPEC_CRC32W
+  UNSPEC_CRC32CB
+  UNSPEC_CRC32CH
+  UNSPEC_CRC32CW
+  UNSPEC_AESD
+  UNSPEC_AESE
+  UNSPEC_AESIMC
+  UNSPEC_AESMC
+  UNSPEC_SHA1C
+  UNSPEC_SHA1M
+  UNSPEC_SHA1P
+  UNSPEC_SHA1H
+  UNSPEC_SHA1SU0
+  UNSPEC_SHA1SU1
+  UNSPEC_SHA256H
+  UNSPEC_SHA256H2
+  UNSPEC_SHA256SU0
+  UNSPEC_SHA256SU1
+  UNSPEC_VMULLP64
+  UNSPEC_LOAD_COUNT
+  UNSPEC_VABD
+  UNSPEC_VABDL
+  UNSPEC_VADD
+  UNSPEC_VADDHN
+  UNSPEC_VADDL
+  UNSPEC_VADDW
+  UNSPEC_VBSL
+  UNSPEC_VCAGE
+  UNSPEC_VCAGT
+  UNSPEC_VCEQ
+  UNSPEC_VCGE
+  UNSPEC_VCGEU
+  UNSPEC_VCGT
+  UNSPEC_VCGTU
+  UNSPEC_VCLS
+  UNSPEC_VCONCAT
+  UNSPEC_VCVT
+  UNSPEC_VCVT_N
+  UNSPEC_VEXT
+  UNSPEC_VHADD
+  UNSPEC_VHSUB
+  UNSPEC_VLD1
+  UNSPEC_VLD1_LANE
+  UNSPEC_VLD2
+  UNSPEC_VLD2_DUP
+  UNSPEC_VLD2_LANE
+  UNSPEC_VLD3
+  UNSPEC_VLD3A
+  UNSPEC_VLD3B
+  UNSPEC_VLD3_DUP
+  UNSPEC_VLD3_LANE
+  UNSPEC_VLD4
+  UNSPEC_VLD4A
+  UNSPEC_VLD4B
+  UNSPEC_VLD4_DUP
+  UNSPEC_VLD4_LANE
+  UNSPEC_VMAX
+  UNSPEC_VMIN
+  UNSPEC_VMLA
+  UNSPEC_VMLAL
+  UNSPEC_VMLA_LANE
+  UNSPEC_VMLAL_LANE
+  UNSPEC_VMLS
+  UNSPEC_VMLSL
+  UNSPEC_VMLS_LANE
+  UNSPEC_VMLSL_LANE
+  UNSPEC_VMOVL
+  UNSPEC_VMOVN
+  UNSPEC_VMUL
+  UNSPEC_VMULL
+  UNSPEC_VMUL_LANE
+  UNSPEC_VMULL_LANE
+  UNSPEC_VPADAL
+  UNSPEC_VPADD
+  UNSPEC_VPADDL
+  UNSPEC_VPMAX
+  UNSPEC_VPMIN
+  UNSPEC_VPSMAX
+  UNSPEC_VPSMIN
+  UNSPEC_VPUMAX
+  UNSPEC_VPUMIN
+  UNSPEC_VQABS
+  UNSPEC_VQADD
+  UNSPEC_VQDMLAL
+  UNSPEC_VQDMLAL_LANE
+  UNSPEC_VQDMLSL
+  UNSPEC_VQDMLSL_LANE
+  UNSPEC_VQDMULH
+  UNSPEC_VQDMULH_LANE
+  UNSPEC_VQDMULL
+  UNSPEC_VQDMULL_LANE
+  UNSPEC_VQMOVN
+  UNSPEC_VQMOVUN
+  UNSPEC_VQNEG
+  UNSPEC_VQSHL
+  UNSPEC_VQSHL_N
+  UNSPEC_VQSHLU_N
+  UNSPEC_VQSHRN_N
+  UNSPEC_VQSHRUN_N
+  UNSPEC_VQSUB
+  UNSPEC_VRECPE
+  UNSPEC_VRECPS
+  UNSPEC_VREV16
+  UNSPEC_VREV32
+  UNSPEC_VREV64
+  UNSPEC_VRSQRTE
+  UNSPEC_VRSQRTS
+  UNSPEC_VSHL
+  UNSPEC_VSHLL_N
+  UNSPEC_VSHL_N
+  UNSPEC_VSHR_N
+  UNSPEC_VSHRN_N
+  UNSPEC_VSLI
+  UNSPEC_VSRA_N
+  UNSPEC_VSRI
+  UNSPEC_VST1
+  UNSPEC_VST1_LANE
+  UNSPEC_VST2
+  UNSPEC_VST2_LANE
+  UNSPEC_VST3
+  UNSPEC_VST3A
+  UNSPEC_VST3B
+  UNSPEC_VST3_LANE
+  UNSPEC_VST4
+  UNSPEC_VST4A
+  UNSPEC_VST4B
+  UNSPEC_VST4_LANE
+  UNSPEC_VSTRUCTDUMMY
+  UNSPEC_VSUB
+  UNSPEC_VSUBHN
+  UNSPEC_VSUBL
+  UNSPEC_VSUBW
+  UNSPEC_VTBL
+  UNSPEC_VTBX
+  UNSPEC_VTRN1
+  UNSPEC_VTRN2
+  UNSPEC_VTST
+  UNSPEC_VUZP1
+  UNSPEC_VUZP2
+  UNSPEC_VZIP1
+  UNSPEC_VZIP2
+  UNSPEC_MISALIGNED_ACCESS
+  UNSPEC_VCLE
+  UNSPEC_VCLT
+  UNSPEC_NVRINTZ
+  UNSPEC_NVRINTP
+  UNSPEC_NVRINTM
+  UNSPEC_NVRINTX
+  UNSPEC_NVRINTA
+  UNSPEC_NVRINTN
+])
+
diff --git a/gcc-4.9/gcc/config/arm/vec-common.md b/gcc-4.9/gcc/config/arm/vec-common.md
new file mode 100644
index 000000000..ba0b58806
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/vec-common.md
@@ -0,0 +1,136 @@
+;; Machine Description for shared bits common to IWMMXT and Neon.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Vector Moves
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
+	(match_operand:VALL 1 "general_operand" ""))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (!REG_P (operands[0]))
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      else if (TARGET_NEON && CONSTANT_P (operands[1]))
+	{
+	  operands[1] = neon_make_constant (operands[1]);
+	  gcc_assert (operands[1] != NULL_RTX);
+	}
+    }
+})
+
+;; Vector arithmetic. Expanders are blank, then unnamed insns implement
+;; patterns separately for IWMMXT and Neon.
+
+(define_expand "add<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (plus:VALL (match_operand:VALL 1 "s_register_operand" "")
+                   (match_operand:VALL 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (minus:VALL (match_operand:VALL 1 "s_register_operand" "")
+                    (match_operand:VALL 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VALLW 0 "s_register_operand" "")
+        (mult:VALLW (match_operand:VALLW 1 "s_register_operand" "")
+		    (match_operand:VALLW 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
+{
+})
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VALLW 0 "s_register_operand" "")
+	(smin:VALLW (match_operand:VALLW 1 "s_register_operand" "")
+		    (match_operand:VALLW 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "umin<mode>3"
+  [(set (match_operand:VINTW 0 "s_register_operand" "")
+	(umin:VINTW (match_operand:VINTW 1 "s_register_operand" "")
+		    (match_operand:VINTW 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VALLW 0 "s_register_operand" "")
+	(smax:VALLW (match_operand:VALLW 1 "s_register_operand" "")
+		    (match_operand:VALLW 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "umax<mode>3"
+  [(set (match_operand:VINTW 0 "s_register_operand" "")
+	(umax:VINTW (match_operand:VINTW 1 "s_register_operand" "")
+		    (match_operand:VINTW 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VALL 0 "s_register_operand" "")
+   (match_operand:VALL 1 "s_register_operand" "")
+   (match_operand:VALL 2 "s_register_operand" "")
+   (match_operand:<V_cmp_result> 3 "" "")]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+  if (arm_expand_vec_perm_const (operands[0], operands[1],
+				 operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "vec_perm<mode>"
+  [(match_operand:VE 0 "s_register_operand" "")
+   (match_operand:VE 1 "s_register_operand" "")
+   (match_operand:VE 2 "s_register_operand" "")
+   (match_operand:VE 3 "s_register_operand" "")]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  arm_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]);
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/arm/vfp.md b/gcc-4.9/gcc/config/arm/vfp.md
new file mode 100644
index 000000000..e1a48eeea
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/vfp.md
@@ -0,0 +1,1330 @@
+;; ARM VFP instruction patterns
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; SImode moves
+;; ??? For now do not allow loading constants into vfp regs.  This causes
+;; problems because small constants get converted into adds.
+(define_insn "*arm_movsi_vfp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv")
+      (match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))]
+  "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
+   && (   s_register_operand (operands[0], SImode)
+       || s_register_operand (operands[1], SImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 1:
+      return \"mov%?\\t%0, %1\";
+    case 2:
+      return \"mvn%?\\t%0, #%B1\";
+    case 3:
+      return \"movw%?\\t%0, %1\";
+    case 4:
+      return \"ldr%?\\t%0, %1\";
+    case 5:
+      return \"str%?\\t%1, %0\";
+    case 6:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 7:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 8:
+      return \"fcpys%?\\t%0, %1\\t%@ int\";
+    case 9: case 10:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
+   (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
+)
+
+;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
+;; high/low register alternatives for loads and stores here.
+;; The l/Py alternative should come after r/I to ensure that the short variant
+;; is chosen with length 2 when the instruction is predicated for
+;; arm_restrict_it.
+(define_insn "*thumb2_movsi_vfp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t,  *Uv")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
+  "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
+   && (   s_register_operand (operands[0], SImode)
+       || s_register_operand (operands[1], SImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return \"mov%?\\t%0, %1\";
+    case 3:
+      return \"mvn%?\\t%0, #%B1\";
+    case 4:
+      return \"movw%?\\t%0, %1\";
+    case 5:
+    case 6:
+      return \"ldr%?\\t%0, %1\";
+    case 7:
+    case 8:
+      return \"str%?\\t%1, %0\";
+    case 9:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 10:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 11:
+      return \"fcpys%?\\t%0, %1\\t%@ int\";
+    case 12: case 13:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
+)
+
+
+;; DImode moves
+
+(define_insn "*movdi_vfp"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
+       (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))
+   && !(TARGET_NEON && CONST_INT_P (operands[1])
+        && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: 
+    case 1:
+    case 2:
+    case 3:
+      return \"#\";
+    case 4:
+    case 5:
+    case 6:
+      return output_move_double (operands, true, NULL);
+    case 7:
+      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 8:
+      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 9:
+      if (TARGET_VFP_SINGLE)
+	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+      else
+	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+    case 10: case 11:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
+   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
+                              (eq_attr "alternative" "2") (const_int 12)
+                              (eq_attr "alternative" "3") (const_int 16)
+                              (eq_attr "alternative" "9")
+                               (if_then_else
+                                 (match_test "TARGET_VFP_SINGLE")
+                                 (const_int 8)
+                                 (const_int 4))]
+                              (const_int 4)))
+   (set_attr "arm_pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
+   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
+   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
+)
+
+(define_insn "*movdi_vfp_cortexa8"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
+       (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune == cortexa8
+    && (   register_operand (operands[0], DImode)
+        || register_operand (operands[1], DImode))
+    && !(TARGET_NEON && CONST_INT_P (operands[1])
+	 && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: 
+    case 1:
+    case 2:
+    case 3:
+      return \"#\";
+    case 4:
+    case 5:
+    case 6:
+      return output_move_double (operands, true, NULL);
+    case 7:
+      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 8:
+      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 9:
+      return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+    case 10: case 11:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
+   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
+                               (eq_attr "alternative" "2") (const_int 12)
+                               (eq_attr "alternative" "3") (const_int 16)
+                               (eq_attr "alternative" "4,5,6") 
+			       (symbol_ref 
+				"arm_count_output_move_double_insns (operands) \
+                                 * 4")]
+                              (const_int 4)))
+   (set_attr "predicable"    "yes")
+   (set_attr "arm_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
+   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
+   (set (attr "ce_count") 
+	(symbol_ref "get_attr_length (insn) / 4"))
+   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
+ )
+
+;; HFmode moves
+(define_insn "*movhf_vfp_neon"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* S register from memory */
+      return \"vld1.16\\t{%z0}, %A1\";
+    case 1:     /* memory from S register */
+      return \"vst1.16\\t{%z1}, %A0\";
+    case 2:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 3:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 4:	/* S register from S register */
+      return \"fcpys\\t%0, %1\";
+    case 5:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 6:	/* S register from ARM register */
+      return \"fmsr\\t%0, %1\";
+    case 7:	/* ARM register from S register */
+      return \"fmrs\\t%0, %1\";
+    case 8:	/* ARM register from constant */
+      {
+        REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "neon_load1_1reg,neon_store1_1reg,\
+                     load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple")
+   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
+)
+
+;; FP16 without element load/store instructions.
+(define_insn "*movhf_vfp"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,t,r,r,t,F"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 1:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* S register from S register */
+      return \"fcpys\\t%0, %1\";
+    case 3:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 4:	/* S register from ARM register */
+      return \"fmsr\\t%0, %1\";
+    case 5:	/* ARM register from S register */
+      return \"fmrs\\t%0, %1\";
+    case 6:	/* ARM register from constant */
+      {
+        REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple")
+   (set_attr "length" "4,4,4,4,4,4,8")]
+)
+
+
+;; SFmode moves
+;; Disparage the w<->r cases because reloading an invalid address is
+;; preferable to loading the value via integer registers.
+
+(define_insn "*movsf_vfp"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   s_register_operand (operands[0], SFmode)
+       || s_register_operand (operands[1], SFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"fmsr%?\\t%0, %1\";
+    case 1:
+      return \"fmrs%?\\t%0, %1\";
+    case 2:
+      return \"fconsts%?\\t%0, #%G1\";
+    case 3: case 4:
+      return output_move_vfp (operands);
+    case 5:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 6:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 7:
+      return \"fcpys%?\\t%0, %1\";
+    case 8:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type"
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg")
+   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
+)
+
+(define_insn "*thumb2_movsf_vfp"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   s_register_operand (operands[0], SFmode)
+       || s_register_operand (operands[1], SFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"fmsr%?\\t%0, %1\";
+    case 1:
+      return \"fmrs%?\\t%0, %1\";
+    case 2:
+      return \"fconsts%?\\t%0, #%G1\";
+    case 3: case 4:
+      return output_move_vfp (operands);
+    case 5:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 6:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 7:
+      return \"fcpys%?\\t%0, %1\";
+    case 8:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type"
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg")
+   (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+)
+
+;; DFmode moves
+
+(define_insn "*movdf_vfp"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r, m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+      case 1:
+	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+      case 2:
+	gcc_assert (TARGET_VFP_DOUBLE);
+        return \"fconstd%?\\t%P0, #%G1\";
+      case 3: case 4:
+	return output_move_vfp (operands);
+      case 5: case 6:
+	return output_move_double (operands, true, NULL);
+      case 7:
+	if (TARGET_VFP_SINGLE)
+	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	else
+	  return \"fcpyd%?\\t%P0, %P1\";
+      case 8:
+        return \"#\";
+      default:
+	gcc_unreachable ();
+      }
+    }
+  "
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+                     load2,store2,ffarithd,multiple")
+   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
+			       (eq_attr "alternative" "7")
+				(if_then_else
+				 (match_test "TARGET_VFP_SINGLE")
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
+)
+
+(define_insn "*thumb2_movdf_vfp"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r ,m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+      case 1:
+	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+      case 2:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"fconstd%?\\t%P0, #%G1\";
+      case 3: case 4:
+	return output_move_vfp (operands);
+      case 5: case 6: case 8:
+	return output_move_double (operands, true, NULL);
+      case 7:
+	if (TARGET_VFP_SINGLE)
+	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	else
+	  return \"fcpyd%?\\t%P0, %P1\";
+      default:
+	abort ();
+      }
+    }
+  "
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+                     f_stored,load2,store2,ffarithd,multiple")
+   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
+			       (eq_attr "alternative" "7")
+				(if_then_else
+				 (match_test "TARGET_VFP_SINGLE")
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
+   (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+)
+
+
+;; Conditional move patterns
+
+(define_insn "*movsfcc_vfp"
+  [(set (match_operand:SF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
+	(if_then_else:SF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fcpys%D3\\t%0, %2
+   fcpys%d3\\t%0, %1
+   fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
+   fmsr%D3\\t%0, %2
+   fmsr%d3\\t%0, %1
+   fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
+   fmrs%D3\\t%0, %2
+   fmrs%d3\\t%0, %1
+   fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,4,8,4,4,8,4,4,8")
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
+)
+
+(define_insn "*thumb2_movsfcc_vfp"
+  [(set (match_operand:SF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
+	(if_then_else:SF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
+  "@
+   it\\t%D3\;fcpys%D3\\t%0, %2
+   it\\t%d3\;fcpys%d3\\t%0, %1
+   ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
+   it\\t%D3\;fmsr%D3\\t%0, %2
+   it\\t%d3\;fmsr%d3\\t%0, %1
+   ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
+   it\\t%D3\;fmrs%D3\\t%0, %2
+   it\\t%d3\;fmrs%d3\\t%0, %1
+   ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "6,6,10,6,6,10,6,6,10")
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
+)
+
+(define_insn "*movdfcc_vfp"
+  [(set (match_operand:DF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
+	(if_then_else:DF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
+	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fcpyd%D3\\t%P0, %P2
+   fcpyd%d3\\t%P0, %P1
+   fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
+   fmdrr%D3\\t%P0, %Q2, %R2
+   fmdrr%d3\\t%P0, %Q1, %R1
+   fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
+   fmrrd%D3\\t%Q0, %R0, %P2
+   fmrrd%d3\\t%Q0, %R0, %P1
+   fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,4,8,4,4,8,4,4,8")
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")]
+)
+
+(define_insn "*thumb2_movdfcc_vfp"
+  [(set (match_operand:DF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
+	(if_then_else:DF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
+	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
+  "@
+   it\\t%D3\;fcpyd%D3\\t%P0, %P2
+   it\\t%d3\;fcpyd%d3\\t%P0, %P1
+   ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
+   it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2
+   it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1
+   ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
+   it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2
+   it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1
+   ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "6,6,10,6,6,10,6,6,10")
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")]
+)
+
+
+;; Sign manipulation functions
+
+(define_insn "*abssf2_vfp"
+  [(set (match_operand:SF	  0 "s_register_operand" "=t")
+	(abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fabss%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffariths")]
+)
+
+(define_insn "*absdf2_vfp"
+  [(set (match_operand:DF	  0 "s_register_operand" "=w")
+	(abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fabsd%?\\t%P0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffarithd")]
+)
+
+(define_insn "*negsf2_vfp"
+  [(set (match_operand:SF	  0 "s_register_operand" "=t,?r")
+	(neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fnegs%?\\t%0, %1
+   eor%?\\t%0, %1, #-2147483648"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffariths")]
+)
+
+(define_insn_and_split "*negdf2_vfp"
+  [(set (match_operand:DF	  0 "s_register_operand" "=w,?r,?r")
+	(neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fnegd%?\\t%P0, %P1
+   #
+   #"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
+   && arm_general_register_operand (operands[0], DFmode)"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    {
+      operands[0] = gen_highpart (SImode, operands[0]);
+      operands[1] = gen_rtx_XOR (SImode, operands[0], GEN_INT (0x80000000));
+    }
+  else
+    {
+      rtx in_hi, in_lo, out_hi, out_lo;
+
+      in_hi = gen_rtx_XOR (SImode, gen_highpart (SImode, operands[1]),
+			   GEN_INT (0x80000000));
+      in_lo = gen_lowpart (SImode, operands[1]);
+      out_hi = gen_highpart (SImode, operands[0]);
+      out_lo = gen_lowpart (SImode, operands[0]);
+
+      if (REGNO (in_lo) == REGNO (out_hi))
+        {
+          emit_insn (gen_rtx_SET (SImode, out_lo, in_lo));
+	  operands[0] = out_hi;
+          operands[1] = in_hi;
+        }
+      else
+        {
+          emit_insn (gen_rtx_SET (SImode, out_hi, in_hi));
+	  operands[0] = out_lo;
+          operands[1] = in_lo;
+        }
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "length" "4,4,8")
+   (set_attr "type" "ffarithd")]
+)
+
+
+;; Arithmetic insns
+
+(define_insn "*addsf3_vfp"
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(plus:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fadds%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fadds")]
+)
+
+(define_insn "*adddf3_vfp"
+  [(set (match_operand:DF	   0 "s_register_operand" "=w")
+	(plus:DF (match_operand:DF 1 "s_register_operand" "w")
+		 (match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "faddd%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "faddd")]
+)
+
+
+(define_insn "*subsf3_vfp"
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(minus:SF (match_operand:SF 1 "s_register_operand" "t")
+		  (match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fsubs%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fadds")]
+)
+
+(define_insn "*subdf3_vfp"
+  [(set (match_operand:DF	    0 "s_register_operand" "=w")
+	(minus:DF (match_operand:DF 1 "s_register_operand" "w")
+		  (match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fsubd%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "faddd")]
+)
+
+
+;; Division insns
+
+(define_insn "*divsf3_vfp"
+  [(set (match_operand:SF	  0 "s_register_operand" "=t")
+	(div:SF (match_operand:SF 1 "s_register_operand" "t")
+		(match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fdivs%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fdivs")]
+)
+
+(define_insn "*divdf3_vfp"
+  [(set (match_operand:DF	  0 "s_register_operand" "=w")
+	(div:DF (match_operand:DF 1 "s_register_operand" "w")
+		(match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fdivd%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fdivd")]
+)
+
+
+;; Multiplication insns
+
+(define_insn "*mulsf3_vfp"
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(mult:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmuls%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmuls")]
+)
+
+(define_insn "*muldf3_vfp"
+  [(set (match_operand:DF	   0 "s_register_operand" "=w")
+	(mult:DF (match_operand:DF 1 "s_register_operand" "w")
+		 (match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fmuld%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmuld")]
+)
+
+(define_insn "*mulsf3negsf_vfp"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
+		 (match_operand:SF	   2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fnmuls%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmuls")]
+)
+
+(define_insn "*muldf3negdf_vfp"
+  [(set (match_operand:DF		   0 "s_register_operand" "=w")
+	(mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w"))
+		 (match_operand:DF	   2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fnmuld%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmuld")]
+)
+
+
+;; Multiply-accumulate insns
+
+;; 0 = 1 * 2 + 0
+(define_insn "*mulsf3addsf_vfp"
+  [(set (match_operand:SF		    0 "s_register_operand" "=t")
+	(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			  (match_operand:SF 3 "s_register_operand" "t"))
+		 (match_operand:SF	    1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*muldf3adddf_vfp"
+  [(set (match_operand:DF		    0 "s_register_operand" "=w")
+	(plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w")
+			  (match_operand:DF 3 "s_register_operand" "w"))
+		 (match_operand:DF	    1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fmacd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacd")]
+)
+
+;; 0 = 1 * 2 - 0
+(define_insn "*mulsf3subsf_vfp"
+  [(set (match_operand:SF		     0 "s_register_operand" "=t")
+	(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			   (match_operand:SF 3 "s_register_operand" "t"))
+		  (match_operand:SF	     1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmscs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*muldf3subdf_vfp"
+  [(set (match_operand:DF		     0 "s_register_operand" "=w")
+	(minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w")
+			   (match_operand:DF 3 "s_register_operand" "w"))
+		  (match_operand:DF	     1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fmscd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacd")]
+)
+
+;; 0 = -(1 * 2) + 0
+(define_insn "*mulsf3negsfaddsf_vfp"
+  [(set (match_operand:SF		     0 "s_register_operand" "=t")
+	(minus:SF (match_operand:SF	     1 "s_register_operand" "0")
+		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			   (match_operand:SF 3 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fnmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*fmuldf3negdfadddf_vfp"
+  [(set (match_operand:DF		     0 "s_register_operand" "=w")
+	(minus:DF (match_operand:DF	     1 "s_register_operand" "0")
+		  (mult:DF (match_operand:DF 2 "s_register_operand" "w")
+			   (match_operand:DF 3 "s_register_operand" "w"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fnmacd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacd")]
+)
+
+
+;; 0 = -(1 * 2) - 0
+(define_insn "*mulsf3negsfsubsf_vfp"
+  [(set (match_operand:SF		      0 "s_register_operand" "=t")
+	(minus:SF (mult:SF
+		    (neg:SF (match_operand:SF 2 "s_register_operand" "t"))
+		    (match_operand:SF	      3 "s_register_operand" "t"))
+		  (match_operand:SF	      1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fnmscs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*muldf3negdfsubdf_vfp"
+  [(set (match_operand:DF		      0 "s_register_operand" "=w")
+	(minus:DF (mult:DF
+		    (neg:DF (match_operand:DF 2 "s_register_operand" "w"))
+		    (match_operand:DF	      3 "s_register_operand" "w"))
+		  (match_operand:DF	      1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fnmscd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacd")]
+)
+
+;; Fused-multiply-accumulate
+
+(define_insn "fma<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+(define_insn "*fmsub<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
+					     "<F_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+(define_insn "*fnmsub<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+	(fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+(define_insn "*fnmadd<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
+					       "<F_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+
+;; Conversion routines
+
+(define_insn "*extendsfdf2_vfp"
+  [(set (match_operand:DF		   0 "s_register_operand" "=w")
+	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fcvtds%?\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*truncdfsf2_vfp"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fcvtsd%?\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "extendhfsf2"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "vcvtb%?.f32.f16\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF		   0 "s_register_operand" "=t")
+	(float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "vcvtb%?.f16.f32\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*truncsisf2_vfp"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "ftosizs%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
+)
+
+(define_insn "*truncsidf2_vfp"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "ftosizd%?\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
+)
+
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "ftouizs%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
+)
+
+(define_insn "fixuns_truncdfsi2"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "ftouizd%?\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
+)
+
+
+(define_insn "*floatsisf2_vfp"
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(float:SF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fsitos%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
+)
+
+(define_insn "*floatsidf2_vfp"
+  [(set (match_operand:DF	    0 "s_register_operand" "=w")
+	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fsitod%?\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
+)
+
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fuitos%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
+)
+
+(define_insn "floatunssidf2"
+  [(set (match_operand:DF	    0 "s_register_operand" "=w")
+	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fuitod%?\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
+)
+
+
+;; Sqrt insns.
+
+(define_insn "*sqrtsf2_vfp"
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fsqrts%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fsqrts")]
+)
+
+(define_insn "*sqrtdf2_vfp"
+  [(set (match_operand:DF	   0 "s_register_operand" "=w")
+	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fsqrtd%?\\t%P0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fsqrtd")]
+)
+
+
+;; Patterns to split/copy vfp condition flags.
+
+(define_insn "*movcc_vfp"
+  [(set (reg CC_REGNUM)
+	(reg VFPCC_REGNUM))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmstat%?"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_flag")]
+)
+
+(define_insn_and_split "*cmpsf_split_vfp"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t")
+		      (match_operand:SF 1 "vfp_compare_operand" "tG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_dup 0)
+		      (match_dup 1)))
+   (set (reg:CCFP CC_REGNUM)
+	(reg:CCFP VFPCC_REGNUM))]
+  ""
+)
+
+(define_insn_and_split "*cmpsf_trap_split_vfp"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t")
+		       (match_operand:SF 1 "vfp_compare_operand" "tG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_dup 0)
+		       (match_dup 1)))
+   (set (reg:CCFPE CC_REGNUM)
+	(reg:CCFPE VFPCC_REGNUM))]
+  ""
+)
+
+(define_insn_and_split "*cmpdf_split_vfp"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "s_register_operand"  "w")
+		      (match_operand:DF 1 "vfp_compare_operand" "wG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_dup 0)
+		       (match_dup 1)))
+   (set (reg:CCFP CC_REGNUM)
+	(reg:CCFP VFPCC_REGNUM))]
+  ""
+)
+
+(define_insn_and_split "*cmpdf_trap_split_vfp"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:DF 0 "s_register_operand"  "w")
+		       (match_operand:DF 1 "vfp_compare_operand" "wG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_dup 0)
+		       (match_dup 1)))
+   (set (reg:CCFPE CC_REGNUM)
+	(reg:CCFPE VFPCC_REGNUM))]
+  ""
+)
+
+
+;; Comparison patterns
+
+(define_insn "*cmpsf_vfp"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t,t")
+		      (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fcmps%?\\t%0, %1
+   fcmpzs%?\\t%0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fcmps")]
+)
+
+(define_insn "*cmpsf_trap_vfp"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t,t")
+		       (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fcmpes%?\\t%0, %1
+   fcmpezs%?\\t%0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fcmps")]
+)
+
+(define_insn "*cmpdf_vfp"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "s_register_operand"  "w,w")
+		      (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fcmpd%?\\t%P0, %P1
+   fcmpzd%?\\t%P0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fcmpd")]
+)
+
+(define_insn "*cmpdf_trap_vfp"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_operand:DF 0 "s_register_operand"  "w,w")
+		       (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fcmped%?\\t%P0, %P1
+   fcmpezd%?\\t%P0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fcmpd")]
+)
+
+;; Fixed point to floating point conversions.
+(define_code_iterator FCVT [unsigned_float float])
+(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
+
+(define_insn "*combine_vcvt_f32_<FCVTI32typename>"
+  [(set (match_operand:SF 0 "s_register_operand" "=t")
+	(mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
+		 (match_operand 2
+			"const_double_vcvt_power_of_two_reciprocal" "Dt")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
+  "vcvt%?.f32.<FCVTI32typename>\\t%0, %1, %v2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
+)
+
+;; Not the ideal way of implementing this. Ideally we would be able to split
+;; this into a move to a DP register and then a vcvt.f64.i32
+(define_insn "*combine_vcvt_f64_<FCVTI32typename>"
+  [(set (match_operand:DF 0 "s_register_operand" "=x,x,w")
+	(mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r"))
+		 (match_operand 2
+		     "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math
+  && !TARGET_VFP_SINGLE"
+  "@
+  vmov%?.f32\\t%0, %1\;vcvt%?.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+  vmov%?.f32\\t%0, %1\;vcvt%?.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+  vmov%?.f64\\t%P0, %1, %1\;vcvt%?.f64.<FCVTI32typename>\\t%P0, %P0, %v2"
+  [(set_attr "predicable" "yes")
+   (set_attr "ce_count" "2")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*combine_vcvtf2i"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(fix:SI (fix:SF (mult:SF (match_operand:SF 1 "s_register_operand" "t")
+				 (match_operand 2
+				 "const_double_vcvt_power_of_two" "Dp")))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
+  "vcvt%?.s32.f32\\t%1, %1, %v2\;vmov%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "ce_count" "2")
+   (set_attr "type" "f_cvtf2i")
+   (set_attr "length" "8")]
+ )
+
+;; Store multiple insn used in function prologue.
+(define_insn "*push_multi_vfp"
+  [(match_parallel 2 "multi_register_push"
+    [(set (match_operand:BLK 0 "memory_operand" "=m")
+	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
+		      UNSPEC_PUSH_MULT))])]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "* return vfp_output_fstmd (operands);"
+  [(set_attr "type" "f_stored")]
+)
+
+;; VRINT round to integral instructions.
+;; Invoked for the patterns: btruncsf2, btruncdf2, ceilsf2, ceildf2,
+;; roundsf2, rounddf2, floorsf2, floordf2, nearbyintsf2, nearbyintdf2,
+;; rintsf2, rintdf2.
+(define_insn "<vrint_pattern><SDF:mode>2"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (unspec:SDF [(match_operand:SDF 1
+		         "register_operand" "<F_constraint>")]
+         VRINT))]
+  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "predicable" "<vrint_predicable>")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_rint<vfp_type>")
+   (set_attr "conds" "<vrint_conds>")]
+)
+
+;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
+;; The 'smax' and 'smin' RTL standard pattern names do not specify which
+;; operand will be returned when both operands are zero (i.e. they may not
+;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
+;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
+;; NaNs.
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
+  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
+)
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
+  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
+)
+
+;; Unimplemented insns:
+;; fldm*
+;; fstm*
+;; fmdhr et al (VFPv1)
+;; Support for xD (single precision only) variants.
+;; fmrrs, fmsrr
diff --git a/gcc-4.9/gcc/config/arm/vfp11.md b/gcc-4.9/gcc/config/arm/vfp11.md
new file mode 100644
index 000000000..2dbb20100
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/vfp11.md
@@ -0,0 +1,93 @@
+;; ARM VFP11 pipeline description
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "vfp11")
+
+;; There are 3 pipelines in the VFP11 unit.
+;;
+;; - A 8-stage FMAC pipeline (7 execute + writeback) with forward from
+;;   fourth stage for simple operations.
+;;
+;; - A 5-stage DS pipeline (4 execute + writeback) for divide/sqrt insns.
+;;   These insns also uses first execute stage of FMAC pipeline.
+;;
+;; - A 4-stage LS pipeline (execute + 2 memory + writeback) with forward from
+;;   second memory stage for loads.
+
+;; We do not model Write-After-Read hazards.
+;; We do not do write scheduling with the arm core, so it is only necessary
+;; to model the first stage of each pipeline
+;; ??? Need to model LS pipeline properly for load/store multiple?
+;; We do not model fmstat properly.  This could be done by modeling pipelines
+;; properly and defining an absence set between a dummy fmstat unit and all
+;; other vfp units.
+
+(define_cpu_unit "fmac" "vfp11")
+
+(define_cpu_unit "ds" "vfp11")
+
+(define_cpu_unit "vfp_ls" "vfp11")
+
+(define_cpu_unit "fmstat" "vfp11")
+
+(exclusion_set "fmac,ds" "fmstat")
+
+(define_insn_reservation "vfp_ffarith" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd"))
+ "fmac")
+
+(define_insn_reservation "vfp_farith" 8
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,f_cvtf2i,f_cvti2f,\
+                       fmuls,fmacs,ffmas"))
+ "fmac")
+
+(define_insn_reservation "vfp_fmul" 9
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fmuld,fmacd,ffmad"))
+ "fmac*2")
+
+(define_insn_reservation "vfp_fdivs" 19
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fdivs, fsqrts"))
+ "ds*15")
+
+(define_insn_reservation "vfp_fdivd" 33
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fdivd, fsqrtd"))
+ "fmac+ds*29")
+
+;; Moves to/from arm regs also use the load/store pipeline.
+(define_insn_reservation "vfp_fload" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "f_loads,f_loadd,f_mcr,f_mcrr"))
+ "vfp_ls")
+
+(define_insn_reservation "vfp_fstore" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "f_stores,f_stored,f_mrc,f_mrrc"))
+ "vfp_ls")
+
+(define_insn_reservation "vfp_to_cpsr" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "f_flag"))
+ "fmstat,vfp_ls*3")
+
diff --git a/gcc-4.9/gcc/config/arm/vxworks.h b/gcc-4.9/gcc/config/arm/vxworks.h
new file mode 100644
index 000000000..8bef16bc4
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/vxworks.h
@@ -0,0 +1,109 @@
+/* Definitions of target machine for GCC,
+   for ARM with targeting the VXWorks run time environment. 
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+   Contributed by: Mike Stump <mrs@wrs.com>
+   Brought up to date by CodeSourcery, LLC.
+   
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do {						\
+    if (TARGET_BIG_END)				\
+      builtin_define ("ARMEB");			\
+    else					\
+      builtin_define ("ARMEL");			\
+						\
+    if (arm_arch_xscale)			\
+      builtin_define ("CPU=XSCALE");		\
+    else if (arm_arch5)				\
+      builtin_define ("CPU=ARMARCH5");		\
+    else if (arm_arch4)				\
+      {						\
+	if (thumb_code)				\
+	  builtin_define ("CPU=ARMARCH4_T");	\
+	else					\
+	  builtin_define ("CPU=ARMARCH4");	\
+      }						\
+    VXWORKS_OS_CPP_BUILTINS ();			\
+  } while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+/* Subsume the arm/elf.h definition, and add RTP hooks.  */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "-D__ELF__" VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef  CC1_SPEC
+#define CC1_SPEC							\
+"%{tstrongarm:-mlittle-endian -mcpu=strongarm ;				\
+   t4:        -mlittle-endian -march=armv4 ;				\
+   t4be:      -mbig-endian -march=armv4 ;				\
+   t4t:       -mthumb -mthumb-interwork -mlittle-endian -march=armv4t ;	\
+   t4tbe:     -mthumb -mthumb-interwork -mbig-endian -march=armv4t ;	\
+   t5:        -mlittle-endian -march=armv5 ;				\
+   t5be:      -mbig-endian -march=armv5 ;				\
+   t5t:       -mthumb -mthumb-interwork -mlittle-endian -march=armv5 ;	\
+   t5tbe:     -mthumb -mthumb-interwork -mbig-endian -march=armv5 ;	\
+   txscale:   -mlittle-endian -mcpu=xscale ;				\
+   txscalebe: -mbig-endian -mcpu=xscale ;				\
+            : -march=armv4}"
+
+/* Pass -EB for big-endian targets.  */
+#define VXWORKS_ENDIAN_SPEC \
+  "%{mbig-endian|t4be|t4tbe|t5be|t5tbe|txscalebe:-EB}"
+
+#undef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC VXWORKS_ENDIAN_SPEC
+
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_ENDIAN_SPEC
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#define FPUTYPE_DEFAULT "vfp"
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+/* We want to be compatible with a version of "2.96" at one point in
+   the past before this macro was changed.  */
+#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8
+
+/* The kernel loader does not allow relocations to overflow, so we
+   cannot allow arbitrary relocation addends in kernel modules or RTP
+   executables.  Also, the dynamic loader uses the resolved relocation
+   value to distinguish references to the text and data segments, so we
+   cannot allow arbitrary offsets for shared libraries either.  */
+#undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
+#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
+
+#undef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc-4.9/gcc/config/arm/vxworks.opt b/gcc-4.9/gcc/config/arm/vxworks.opt
new file mode 100644
index 000000000..ae83422f8
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/vxworks.opt
@@ -0,0 +1,59 @@
+; ARM VxWorks options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+t4
+Driver
+
+t4be
+Driver
+
+t4t
+Driver
+
+t4tbe
+Driver
+
+t5
+Driver
+
+t5be
+Driver
+
+t5t
+Driver
+
+t5tbe
+Driver
+
+tstrongarm
+Driver
+
+txscale
+Driver
+
+txscalebe
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/arm/x-arm b/gcc-4.9/gcc/config/arm/x-arm
new file mode 100644
index 000000000..51cff1ed4
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/x-arm
@@ -0,0 +1,3 @@
+driver-arm.o: $(srcdir)/config/arm/driver-arm.c \
+  $(CONFIG_H) $(SYSTEM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/avr/avr-arch.h b/gcc-4.9/gcc/config/avr/avr-arch.h
new file mode 100644
index 000000000..6357e997c
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-arch.h
@@ -0,0 +1,156 @@
+/* Definitions of types that are used to store AVR architecture and
+   device information.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Georg-Johann Lay (avr@gjlay.de)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+    
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This enum supplies indices into the avr_arch_types[] table below. */
+
+enum avr_arch
+{
+  ARCH_UNKNOWN,
+  ARCH_AVR1,
+  ARCH_AVR2,
+  ARCH_AVR25,
+  ARCH_AVR3,
+  ARCH_AVR31,
+  ARCH_AVR35,
+  ARCH_AVR4,
+  ARCH_AVR5,
+  ARCH_AVR51,
+  ARCH_AVR6,
+  ARCH_AVRXMEGA2,
+  ARCH_AVRXMEGA4,
+  ARCH_AVRXMEGA5,
+  ARCH_AVRXMEGA6,
+  ARCH_AVRXMEGA7
+};
+
+
+/* Architecture-specific properties.  */
+
+typedef struct
+{
+  /* Assembler only.  */
+  int asm_only;
+
+  /* Core have 'MUL*' instructions.  */
+  int have_mul;
+
+  /* Core have 'CALL' and 'JMP' instructions.  */
+  int have_jmp_call;
+
+  /* Core have 'MOVW' and 'LPM Rx,Z' instructions.  */
+  int have_movw_lpmx;
+
+  /* Core have 'ELPM' instructions.  */
+  int have_elpm;
+
+  /* Core have 'ELPM Rx,Z' instructions.  */
+  int have_elpmx;
+
+  /* Core have 'EICALL' and 'EIJMP' instructions.  */
+  int have_eijmp_eicall;
+
+  /* This is an XMEGA core.  */
+  int xmega_p;
+
+  /* This core has the RAMPD special function register
+     and thus also the RAMPX, RAMPY and RAMPZ registers.  */
+  int have_rampd;
+
+  /* Default start of data section address for architecture.  */
+  int default_data_section_start;
+
+  /* Offset between SFR address and RAM address:
+     SFR-address = RAM-address - sfr_offset  */
+  int sfr_offset;
+
+  /* Architecture id to built-in define __AVR_ARCH__ (NULL -> no macro) */
+  const char *const macro;
+
+  /* Architecture name.  */
+  const char *const arch_name;
+} avr_arch_t;
+
+
+/* Device-specific properties.  */
+
+typedef struct
+{
+  /* Device name.  */
+  const char *const name;
+
+  /* Index in avr_arch_types[].  */
+  enum avr_arch arch;
+
+  /* Must lie outside user's namespace.  NULL == no macro.  */
+  const char *const macro;
+
+  /* Stack pointer have 8 bits width.  */
+  int short_sp;
+
+  /* Some AVR devices have a core erratum when skipping a 2-word instruction.
+     Skip instructions are:  SBRC, SBRS, SBIC, SBIS, CPSE.
+     Problems will occur with return address is IRQ executes during the
+     skip sequence.
+
+     A support ticket from Atmel returned the following information:
+
+         Subject: (ATTicket:644469) On AVR skip-bug core Erratum
+         From: avr@atmel.com                    Date: 2011-07-27
+         (Please keep the subject when replying to this mail)
+
+         This errata exists only in AT90S8515 and ATmega103 devices.
+
+         For information please refer the following respective errata links
+            http://www.atmel.com/dyn/resources/prod_documents/doc2494.pdf
+            http://www.atmel.com/dyn/resources/prod_documents/doc1436.pdf  */
+
+  /* Core Erratum:  Must not skip 2-word instruction.  */
+  int errata_skip;
+
+  /* Start of data section.  */
+  int data_section_start;
+
+  /* Number of 64k segments in the flash.  */
+  int n_flash;
+
+  /* Name of device library.  */
+  const char *const library_name;
+} avr_mcu_t;
+
+/* Map architecture to its texinfo string.  */
+
+typedef struct
+{
+  /* Architecture ID.  */
+  enum avr_arch arch;
+
+  /* textinfo source to describe the archtiecture.  */
+  const char *texinfo;
+} avr_arch_info_t;
+
+/* Preprocessor macros to define depending on MCU type.  */
+
+extern const avr_arch_t avr_arch_types[];
+extern const avr_arch_t *avr_current_arch;
+
+extern const avr_mcu_t avr_mcu_types[];
+extern const avr_mcu_t *avr_current_device;
diff --git a/gcc-4.9/gcc/config/avr/avr-c.c b/gcc-4.9/gcc/config/avr/avr-c.c
new file mode 100644
index 000000000..101d28092
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-c.c
@@ -0,0 +1,402 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Anatoly Sokolov (aesok@post.ru)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Not included in avr.c since this requires C front end.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "target.h"
+#include "c-family/c-common.h"
+#include "langhooks.h"
+
+
+/* IDs for all the AVR builtins.  */
+
+enum avr_builtin_id
+  {
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)  \
+    AVR_BUILTIN_ ## NAME,
+#include "builtins.def"
+#undef DEF_BUILTIN
+
+    AVR_BUILTIN_COUNT
+  };
+
+
+/* Implement `TARGET_RESOLVE_OVERLOADED_PLUGIN'.  */
+
+static tree
+avr_resolve_overloaded_builtin (unsigned int iloc, tree fndecl, void *vargs)
+{
+  tree type0, type1, fold = NULL_TREE;
+  enum avr_builtin_id id = AVR_BUILTIN_COUNT;
+  location_t loc = (location_t) iloc;
+  vec<tree, va_gc> &args = * (vec<tree, va_gc>*) vargs;
+
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    default:
+      break;
+
+    case AVR_BUILTIN_ABSFX:
+      if (args.length() != 1)
+        {
+          error_at (loc, "%qs expects 1 argument but %d given",
+                    "absfx", (int) args.length());
+
+          fold = error_mark_node;
+          break;
+        }
+
+      type0 = TREE_TYPE (args[0]);
+
+      if (!FIXED_POINT_TYPE_P (type0))
+        {
+          error_at (loc, "%qs expects a fixed-point value as argument",
+                    "absfx");
+
+          fold = error_mark_node;
+        }
+
+      switch (TYPE_MODE (type0))
+        {
+        case QQmode: id = AVR_BUILTIN_ABSHR; break;
+        case HQmode: id = AVR_BUILTIN_ABSR; break;
+        case SQmode: id = AVR_BUILTIN_ABSLR; break;
+        case DQmode: id = AVR_BUILTIN_ABSLLR; break;
+
+        case HAmode: id = AVR_BUILTIN_ABSHK; break;
+        case SAmode: id = AVR_BUILTIN_ABSK; break;
+        case DAmode: id = AVR_BUILTIN_ABSLK; break;
+        case TAmode: id = AVR_BUILTIN_ABSLLK; break;
+
+        case UQQmode:
+        case UHQmode:
+        case USQmode:
+        case UDQmode:
+        case UHAmode:
+        case USAmode:
+        case UDAmode:
+        case UTAmode:
+          warning_at (loc, 0, "using %qs with unsigned type has no effect",
+                      "absfx");
+          return args[0];
+
+        default:
+          error_at (loc, "no matching fixed-point overload found for %qs",
+                    "absfx");
+
+          fold = error_mark_node;
+          break;
+        }
+
+      fold = targetm.builtin_decl (id, true);
+
+      if (fold != error_mark_node)
+        fold = build_function_call_vec (loc, vNULL, fold, &args, NULL);
+
+      break; // absfx
+
+    case AVR_BUILTIN_ROUNDFX:
+      if (args.length() != 2)
+        {
+          error_at (loc, "%qs expects 2 arguments but %d given",
+                    "roundfx", (int) args.length());
+
+          fold = error_mark_node;
+          break;
+        }
+
+      type0 = TREE_TYPE (args[0]);
+      type1 = TREE_TYPE (args[1]);
+
+      if (!FIXED_POINT_TYPE_P (type0))
+        {
+          error_at (loc, "%qs expects a fixed-point value as first argument",
+                    "roundfx");
+
+          fold = error_mark_node;
+        }
+
+      if (!INTEGRAL_TYPE_P (type1))
+        {
+          error_at (loc, "%qs expects an integer value as second argument",
+                    "roundfx");
+
+          fold = error_mark_node;
+        }
+
+      switch (TYPE_MODE (type0))
+        {
+        case QQmode: id = AVR_BUILTIN_ROUNDHR; break;
+        case HQmode: id = AVR_BUILTIN_ROUNDR; break;
+        case SQmode: id = AVR_BUILTIN_ROUNDLR; break;
+        case DQmode: id = AVR_BUILTIN_ROUNDLLR; break;
+
+        case UQQmode: id = AVR_BUILTIN_ROUNDUHR; break;
+        case UHQmode: id = AVR_BUILTIN_ROUNDUR; break;
+        case USQmode: id = AVR_BUILTIN_ROUNDULR; break;
+        case UDQmode: id = AVR_BUILTIN_ROUNDULLR; break;
+
+        case HAmode: id = AVR_BUILTIN_ROUNDHK; break;
+        case SAmode: id = AVR_BUILTIN_ROUNDK; break;
+        case DAmode: id = AVR_BUILTIN_ROUNDLK; break;
+        case TAmode: id = AVR_BUILTIN_ROUNDLLK; break;
+
+        case UHAmode: id = AVR_BUILTIN_ROUNDUHK; break;
+        case USAmode: id = AVR_BUILTIN_ROUNDUK; break;
+        case UDAmode: id = AVR_BUILTIN_ROUNDULK; break;
+        case UTAmode: id = AVR_BUILTIN_ROUNDULLK; break;
+
+        default:
+          error_at (loc, "no matching fixed-point overload found for %qs",
+                    "roundfx");
+
+          fold = error_mark_node;
+          break;
+        }
+
+      fold = targetm.builtin_decl (id, true);
+
+      if (fold != error_mark_node)
+        fold = build_function_call_vec (loc, vNULL, fold, &args, NULL);
+
+      break; // roundfx
+
+    case AVR_BUILTIN_COUNTLSFX:
+      if (args.length() != 1)
+        {
+          error_at (loc, "%qs expects 1 argument but %d given",
+                    "countlsfx", (int) args.length());
+
+          fold = error_mark_node;
+          break;
+        }
+
+      type0 = TREE_TYPE (args[0]);
+
+      if (!FIXED_POINT_TYPE_P (type0))
+        {
+          error_at (loc, "%qs expects a fixed-point value as first argument",
+                    "countlsfx");
+
+          fold = error_mark_node;
+        }
+
+      switch (TYPE_MODE (type0))
+        {
+        case QQmode: id = AVR_BUILTIN_COUNTLSHR; break;
+        case HQmode: id = AVR_BUILTIN_COUNTLSR; break;
+        case SQmode: id = AVR_BUILTIN_COUNTLSLR; break;
+        case DQmode: id = AVR_BUILTIN_COUNTLSLLR; break;
+
+        case UQQmode: id = AVR_BUILTIN_COUNTLSUHR; break;
+        case UHQmode: id = AVR_BUILTIN_COUNTLSUR; break;
+        case USQmode: id = AVR_BUILTIN_COUNTLSULR; break;
+        case UDQmode: id = AVR_BUILTIN_COUNTLSULLR; break;
+
+        case HAmode: id = AVR_BUILTIN_COUNTLSHK; break;
+        case SAmode: id = AVR_BUILTIN_COUNTLSK; break;
+        case DAmode: id = AVR_BUILTIN_COUNTLSLK; break;
+        case TAmode: id = AVR_BUILTIN_COUNTLSLLK; break;
+
+        case UHAmode: id = AVR_BUILTIN_COUNTLSUHK; break;
+        case USAmode: id = AVR_BUILTIN_COUNTLSUK; break;
+        case UDAmode: id = AVR_BUILTIN_COUNTLSULK; break;
+        case UTAmode: id = AVR_BUILTIN_COUNTLSULLK; break;
+
+        default:
+          error_at (loc, "no matching fixed-point overload found for %qs",
+                    "countlsfx");
+
+          fold = error_mark_node;
+          break;
+        }
+
+      fold = targetm.builtin_decl (id, true);
+
+      if (fold != error_mark_node)
+        fold = build_function_call_vec (loc, vNULL, fold, &args, NULL);
+
+      break; // countlsfx
+    }
+
+  return fold;
+}
+  
+
+/* Implement `REGISTER_TARGET_PRAGMAS'.  */
+
+void
+avr_register_target_pragmas (void)
+{
+  int i;
+
+  gcc_assert (ADDR_SPACE_GENERIC == ADDR_SPACE_RAM);
+
+  /* Register address spaces.  The order must be the same as in the respective
+     enum from avr.h (or designated initializers must be used in avr.c).  */
+
+  for (i = 0; i < ADDR_SPACE_COUNT; i++)
+    {
+      gcc_assert (i == avr_addrspace[i].id);
+
+      if (!ADDR_SPACE_GENERIC_P (i))
+        c_register_addr_space (avr_addrspace[i].name, avr_addrspace[i].id);
+    }
+
+  targetm.resolve_overloaded_builtin = avr_resolve_overloaded_builtin;
+}
+
+
+/* Transform LO into uppercase and write the result to UP.
+   You must provide enough space for UP.  Return UP.  */
+
+static char*
+avr_toupper (char *up, const char *lo)
+{
+  char *up0 = up;
+
+  for (; *lo; lo++, up++)
+    *up = TOUPPER (*lo);
+
+  *up = '\0';
+
+  return up0;
+}
+
+/* Worker function for TARGET_CPU_CPP_BUILTINS.  */
+
+void
+avr_cpu_cpp_builtins (struct cpp_reader *pfile)
+{
+  int i;
+
+  builtin_define_std ("AVR");
+
+  if (avr_current_arch->macro)
+    cpp_define_formatted (pfile, "__AVR_ARCH__=%s", avr_current_arch->macro);
+  if (avr_current_device->macro)
+    cpp_define (pfile, avr_current_device->macro);
+  if (AVR_HAVE_RAMPD)    cpp_define (pfile, "__AVR_HAVE_RAMPD__");
+  if (AVR_HAVE_RAMPX)    cpp_define (pfile, "__AVR_HAVE_RAMPX__");
+  if (AVR_HAVE_RAMPY)    cpp_define (pfile, "__AVR_HAVE_RAMPY__");
+  if (AVR_HAVE_RAMPZ)    cpp_define (pfile, "__AVR_HAVE_RAMPZ__");
+  if (AVR_HAVE_ELPM)     cpp_define (pfile, "__AVR_HAVE_ELPM__");
+  if (AVR_HAVE_ELPMX)    cpp_define (pfile, "__AVR_HAVE_ELPMX__");
+  if (AVR_HAVE_MOVW)     cpp_define (pfile, "__AVR_HAVE_MOVW__");
+  if (AVR_HAVE_LPMX)     cpp_define (pfile, "__AVR_HAVE_LPMX__");
+
+  if (avr_current_arch->asm_only)
+    cpp_define (pfile, "__AVR_ASM_ONLY__");
+  if (AVR_HAVE_MUL)
+    {
+      cpp_define (pfile, "__AVR_ENHANCED__");
+      cpp_define (pfile, "__AVR_HAVE_MUL__");
+    }
+  if (avr_current_arch->have_jmp_call)
+    {
+      cpp_define (pfile, "__AVR_MEGA__");
+      cpp_define (pfile, "__AVR_HAVE_JMP_CALL__");
+    }
+  if (AVR_XMEGA)
+    cpp_define (pfile, "__AVR_XMEGA__");
+  if (avr_current_arch->have_eijmp_eicall)
+    {
+      cpp_define (pfile, "__AVR_HAVE_EIJMP_EICALL__");
+      cpp_define (pfile, "__AVR_3_BYTE_PC__");
+    }
+  else
+    {
+      cpp_define (pfile, "__AVR_2_BYTE_PC__");
+    }
+
+  if (AVR_HAVE_8BIT_SP)
+    cpp_define (pfile, "__AVR_HAVE_8BIT_SP__");
+  else
+    cpp_define (pfile, "__AVR_HAVE_16BIT_SP__");
+
+  if (avr_sp8)
+    cpp_define (pfile, "__AVR_SP8__");
+
+  if (AVR_HAVE_SPH)
+    cpp_define (pfile, "__AVR_HAVE_SPH__");
+
+  if (TARGET_NO_INTERRUPTS)
+    cpp_define (pfile, "__NO_INTERRUPTS__");
+
+  if (avr_current_device->errata_skip)
+    {
+      cpp_define (pfile, "__AVR_ERRATA_SKIP__");
+
+      if (avr_current_arch->have_jmp_call)
+        cpp_define (pfile, "__AVR_ERRATA_SKIP_JMP_CALL__");
+    }
+
+  cpp_define_formatted (pfile, "__AVR_SFR_OFFSET__=0x%x",
+                        avr_current_arch->sfr_offset);
+
+#ifdef WITH_AVRLIBC
+  cpp_define (pfile, "__WITH_AVRLIBC__");
+#endif /* WITH_AVRLIBC */
+
+  /* Define builtin macros so that the user can easily query whether
+     non-generic address spaces (and which) are supported or not.
+     This is only supported for C.  For C++, a language extension is needed
+     (as mentioned in ISO/IEC DTR 18037; Annex F.2) which is not
+     implemented in GCC up to now.  */
+
+  if (!strcmp (lang_hooks.name, "GNU C"))
+    {
+      for (i = 0; i < ADDR_SPACE_COUNT; i++)
+        if (!ADDR_SPACE_GENERIC_P (i)
+            /* Only supply __FLASH<n> macro if the address space is reasonable
+               for this target.  The address space qualifier itself is still
+               supported, but using it will throw an error.  */
+            && avr_addrspace[i].segment < avr_current_device->n_flash)
+          {
+            const char *name = avr_addrspace[i].name;
+            char *Name = (char*) alloca (1 + strlen (name));
+
+            cpp_define (pfile, avr_toupper (Name, name));
+          }
+    }
+
+  /* Define builtin macros so that the user can easily query whether or
+     not a specific builtin is available. */
+
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)  \
+  cpp_define (pfile, "__BUILTIN_AVR_" #NAME);
+#include "builtins.def"
+#undef DEF_BUILTIN
+
+  /* Builtin macros for the __int24 and __uint24 type.  */
+
+  cpp_define_formatted (pfile, "__INT24_MAX__=8388607%s",
+                        INT_TYPE_SIZE == 8 ? "LL" : "L");
+  cpp_define (pfile, "__INT24_MIN__=(-__INT24_MAX__-1)");
+  cpp_define_formatted (pfile, "__UINT24_MAX__=16777215%s",
+                        INT_TYPE_SIZE == 8 ? "ULL" : "UL");
+}
diff --git a/gcc-4.9/gcc/config/avr/avr-devices.c b/gcc-4.9/gcc/config/avr/avr-devices.c
new file mode 100644
index 000000000..177f1961f
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-devices.c
@@ -0,0 +1,114 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Anatoly Sokolov (aesok@post.ru)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef IN_GEN_AVR_MMCU_TEXI
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#endif /* IN_GEN_AVR_MMCU_TEXI */
+
+/* List of all known AVR MCU architectures.
+   Order as of enum avr_arch from avr.h.  */
+
+const avr_arch_t
+avr_arch_types[] =
+{
+  /* unknown device specified */
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, 32, NULL,              "avr2"  },
+  /*
+    A  M  J  LM E  E  E  X  R   d S   S O   A
+    S  U  M  PO L  L  I  M  A   a t   F ff  r
+    M  L  P  MV P  P  J  E  M   t a   R s   c
+             XW M  M  M  G  P   a r     e   h
+                   X  P  A  D     t     t   ID   */
+  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, 32, "1",   "avr1"  },
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, 32, "2",   "avr2"  },
+  { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0x0060, 32, "25",  "avr25" },
+  { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0x0060, 32, "3",   "avr3"  },
+  { 0, 0, 1, 0, 1, 0, 0, 0, 0, 0x0060, 32, "31",  "avr31" },
+  { 0, 0, 1, 1, 0, 0, 0, 0, 0, 0x0060, 32, "35",  "avr35" },
+  { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0x0060, 32, "4",   "avr4"  },
+  { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0x0060, 32, "5",   "avr5"  },
+  { 0, 1, 1, 1, 1, 1, 0, 0, 0, 0x0060, 32, "51",  "avr51" },
+  { 0, 1, 1, 1, 1, 1, 1, 0, 0, 0x0060, 32, "6",   "avr6"  },
+
+  { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0x2000,  0, "102", "avrxmega2" },
+  { 0, 1, 1, 1, 1, 1, 0, 1, 0, 0x2000,  0, "104", "avrxmega4" },
+  { 0, 1, 1, 1, 1, 1, 0, 1, 1, 0x2000,  0, "105", "avrxmega5" },
+  { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0x2000,  0, "106", "avrxmega6" },
+  { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0x2000,  0, "107", "avrxmega7" }
+};
+
+const avr_arch_info_t
+avr_texinfo[] =
+{
+  { ARCH_AVR1,
+    "This ISA is implemented by the minimal AVR core and supported "
+    "for assembler only." },
+  { ARCH_AVR2,
+    "``Classic'' devices with up to 8@tie{}KiB of program memory." },
+  { ARCH_AVR25,
+    "``Classic'' devices with up to 8@tie{}KiB of program memory and with "
+    "the @code{MOVW} instruction." },
+  { ARCH_AVR3,
+    "``Classic'' devices with 16@tie{}KiB up to 64@tie{}KiB of "
+    " program memory." },
+  { ARCH_AVR31,
+    "``Classic'' devices with 128@tie{}KiB of program memory." },
+  { ARCH_AVR35,
+    "``Classic'' devices with 16@tie{}KiB up to 64@tie{}KiB of "
+    "program memory and with the @code{MOVW} instruction." },
+  { ARCH_AVR4,
+    "``Enhanced'' devices with up to 8@tie{}KiB of program memory." },
+  { ARCH_AVR5,
+    "``Enhanced'' devices with 16@tie{}KiB up to 64@tie{}KiB of "
+    "program memory." },
+  { ARCH_AVR51,
+    "``Enhanced'' devices with 128@tie{}KiB of program memory." },
+  { ARCH_AVR6,
+    "``Enhanced'' devices with 3-byte PC, i.e.@: with more than 128@tie{}KiB "
+    "of program memory." },
+  { ARCH_AVRXMEGA2,
+    "``XMEGA'' devices with more than 8@tie{}KiB and up to 64@tie{}KiB "
+    "of program memory." },
+  { ARCH_AVRXMEGA4,
+    "``XMEGA'' devices with more than 64@tie{}KiB and up to 128@tie{}KiB "
+    "of program memory." },
+  { ARCH_AVRXMEGA5,
+    "``XMEGA'' devices with more than 64@tie{}KiB and up to 128@tie{}KiB "
+    "of program memory and more than 64@tie{}KiB of RAM." },
+  { ARCH_AVRXMEGA6,
+    "``XMEGA'' devices with more than 128@tie{}KiB of program memory." },
+  { ARCH_AVRXMEGA7,
+    "``XMEGA'' devices with more than 128@tie{}KiB of program memory "
+    "and more than 64@tie{}KiB of RAM." }
+};
+
+const avr_mcu_t
+avr_mcu_types[] =
+{
+#define AVR_MCU(NAME, ARCH, MACRO, SP8, ERR_SKIP, DATA_SEC, N_FLASH, LIBNAME)\
+  { NAME, ARCH, MACRO, SP8, ERR_SKIP, DATA_SEC, N_FLASH, LIBNAME },
+#include "avr-mcus.def"
+#undef AVR_MCU
+    /* End of list.  */
+  { NULL, ARCH_UNKNOWN, NULL, 0, 0, 0, 0, NULL }
+};
+
diff --git a/gcc-4.9/gcc/config/avr/avr-dimode.md b/gcc-4.9/gcc/config/avr/avr-dimode.md
new file mode 100644
index 000000000..639810518
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-dimode.md
@@ -0,0 +1,479 @@
+;;   Machine description for GNU compiler,
+;;   for Atmel AVR micro controllers.
+;;   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+;;   Contributed by Georg Lay (avr@gjlay.de)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The purpose of this file is to provide a light-weight DImode
+;; implementation for AVR.  The trouble with DImode is that tree -> RTL
+;; lowering leads to really unpleasant code for operations that don't
+;; work byte-wise like NEG, PLUS, MINUS, etc.  Defining optabs entries for
+;; them won't help because the optab machinery assumes these operations
+;; are cheap and does not check if a libgcc implementation is available.
+;;
+;; The DImode insns are all straight forward -- except movdi.  The approach
+;; of this implementation is to provide DImode insns without the burden of
+;; introducing movdi.
+;;
+;; The caveat is that if there are insns for some mode, there must also be a
+;; respective move insn that describes reloads.  Therefore, this
+;; implementation uses an accumulator-based model with two hard-coded,
+;; accumulator-like registers
+;;
+;;    A[] = reg:DI 18
+;;    B[] = reg:DI 10
+;;
+;; so that no DImode insn contains pseudos or needs reloading.
+
+(define_constants
+  [(ACC_A	18)
+   (ACC_B	10)])
+
+;; Supported modes that are 8 bytes wide
+(define_mode_iterator ALL8 [DI DQ UDQ DA UDA TA UTA])
+
+(define_mode_iterator ALL8U [UDQ UDA UTA])
+(define_mode_iterator ALL8S [ DQ  DA  TA])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Addition
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; "adddi3"
+;; "adddq3" "addudq3"
+;; "addda3" "adduda3"
+;; "addta3" "adduta3"
+(define_expand "add<mode>3"
+  [(parallel [(match_operand:ALL8 0 "general_operand" "")
+              (match_operand:ALL8 1 "general_operand" "")
+              (match_operand:ALL8 2 "general_operand" "")])]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+
+    if (DImode == <MODE>mode
+        && s8_operand (operands[2], VOIDmode))
+      {
+        emit_move_insn (gen_rtx_REG (QImode, REG_X), operands[2]);
+        emit_insn (gen_adddi3_const8_insn ());
+      }
+    else if (const_operand (operands[2], GET_MODE (operands[2])))
+      {
+        emit_insn (gen_add<mode>3_const_insn (operands[2]));
+      }
+    else
+      {
+        emit_move_insn (gen_rtx_REG (<MODE>mode, ACC_B), operands[2]);
+        emit_insn (gen_add<mode>3_insn ());
+      }
+
+    emit_move_insn (operands[0], acc_a);
+    DONE;
+  })
+
+;; "adddi3_insn"
+;; "adddq3_insn" "addudq3_insn"
+;; "addda3_insn" "adduda3_insn"
+;; "addta3_insn" "adduta3_insn"
+(define_insn "add<mode>3_insn"
+  [(set (reg:ALL8 ACC_A)
+        (plus:ALL8 (reg:ALL8 ACC_A)
+                   (reg:ALL8 ACC_B)))]
+  "avr_have_dimode"
+  "%~call __adddi3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
+
+(define_insn "adddi3_const8_insn"
+  [(set (reg:DI ACC_A)
+        (plus:DI (reg:DI ACC_A)
+                 (sign_extend:DI (reg:QI REG_X))))]
+  "avr_have_dimode"
+  "%~call __adddi3_s8"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
+
+;; "adddi3_const_insn"
+;; "adddq3_const_insn" "addudq3_const_insn"
+;; "addda3_const_insn" "adduda3_const_insn"
+;; "addta3_const_insn" "adduta3_const_insn"
+(define_insn "add<mode>3_const_insn"
+  [(set (reg:ALL8 ACC_A)
+        (plus:ALL8 (reg:ALL8 ACC_A)
+                   (match_operand:ALL8 0 "const_operand" "n Ynn")))]
+  "avr_have_dimode
+   && !s8_operand (operands[0], VOIDmode)"
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "adjust_len" "plus")
+   (set_attr "cc" "clobber")])
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Subtraction
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; "subdi3"
+;; "subdq3" "subudq3"
+;; "subda3" "subuda3"
+;; "subta3" "subuta3"
+(define_expand "sub<mode>3"
+  [(parallel [(match_operand:ALL8 0 "general_operand" "")
+              (match_operand:ALL8 1 "general_operand" "")
+              (match_operand:ALL8 2 "general_operand" "")])]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+
+    if (const_operand (operands[2], GET_MODE (operands[2])))
+      {
+        emit_insn (gen_sub<mode>3_const_insn (operands[2]));
+      }
+    else
+     {
+       emit_move_insn (gen_rtx_REG (<MODE>mode, ACC_B), operands[2]);
+       emit_insn (gen_sub<mode>3_insn ());
+     }
+
+    emit_move_insn (operands[0], acc_a);
+    DONE;
+  })
+
+;; "subdi3_insn"
+;; "subdq3_insn" "subudq3_insn"
+;; "subda3_insn" "subuda3_insn"
+;; "subta3_insn" "subuta3_insn"
+(define_insn "sub<mode>3_insn"
+  [(set (reg:ALL8 ACC_A)
+        (minus:ALL8 (reg:ALL8 ACC_A)
+                    (reg:ALL8 ACC_B)))]
+  "avr_have_dimode"
+  "%~call __subdi3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "set_czn")])
+
+;; "subdi3_const_insn"
+;; "subdq3_const_insn" "subudq3_const_insn"
+;; "subda3_const_insn" "subuda3_const_insn"
+;; "subta3_const_insn" "subuta3_const_insn"
+(define_insn "sub<mode>3_const_insn"
+  [(set (reg:ALL8 ACC_A)
+        (minus:ALL8 (reg:ALL8 ACC_A)
+                    (match_operand:ALL8 0 "const_operand" "n Ynn")))]
+  "avr_have_dimode"
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "adjust_len" "plus")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Signed Saturating Addition and Subtraction
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "<code_stdname><mode>3"
+  [(set (match_operand:ALL8S 0 "general_operand" "")
+        (ss_addsub:ALL8S (match_operand:ALL8S 1 "general_operand" "")
+                         (match_operand:ALL8S 2 "general_operand" "")))]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+
+    if (const_operand (operands[2], GET_MODE (operands[2])))
+      {
+        emit_insn (gen_<code_stdname><mode>3_const_insn (operands[2]));
+      }
+    else
+      {
+        emit_move_insn (gen_rtx_REG (<MODE>mode, ACC_B), operands[2]);
+        emit_insn (gen_<code_stdname><mode>3_insn ());
+      }
+
+    emit_move_insn (operands[0], acc_a);
+    DONE;
+  })
+
+(define_insn "<code_stdname><mode>3_insn"
+  [(set (reg:ALL8S ACC_A)
+        (ss_addsub:ALL8S (reg:ALL8S ACC_A)
+                         (reg:ALL8S ACC_B)))]
+  "avr_have_dimode"
+  "%~call __<code_stdname><mode>3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
+
+(define_insn "<code_stdname><mode>3_const_insn"
+  [(set (reg:ALL8S ACC_A)
+        (ss_addsub:ALL8S (reg:ALL8S ACC_A)
+                         (match_operand:ALL8S 0 "const_operand" "n Ynn")))]
+  "avr_have_dimode"
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "adjust_len" "plus")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Unsigned Saturating Addition and Subtraction
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "<code_stdname><mode>3"
+  [(set (match_operand:ALL8U 0 "general_operand" "")
+        (us_addsub:ALL8U (match_operand:ALL8U 1 "general_operand" "")
+                         (match_operand:ALL8U 2 "general_operand" "")))]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+
+    if (const_operand (operands[2], GET_MODE (operands[2])))
+      {
+        emit_insn (gen_<code_stdname><mode>3_const_insn (operands[2]));
+      }
+    else
+      {
+        emit_move_insn (gen_rtx_REG (<MODE>mode, ACC_B), operands[2]);
+        emit_insn (gen_<code_stdname><mode>3_insn ());
+      }
+
+    emit_move_insn (operands[0], acc_a);
+    DONE;
+  })
+
+(define_insn "<code_stdname><mode>3_insn"
+  [(set (reg:ALL8U ACC_A)
+        (us_addsub:ALL8U (reg:ALL8U ACC_A)
+                         (reg:ALL8U ACC_B)))]
+  "avr_have_dimode"
+  "%~call __<code_stdname><mode>3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
+
+(define_insn "<code_stdname><mode>3_const_insn"
+  [(set (reg:ALL8U ACC_A)
+        (us_addsub:ALL8U (reg:ALL8U ACC_A)
+                         (match_operand:ALL8U 0 "const_operand" "n Ynn")))]
+  "avr_have_dimode"
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "adjust_len" "plus")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Negation
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "negdi2"
+  [(parallel [(match_operand:DI 0 "general_operand" "")
+              (match_operand:DI 1 "general_operand" "")])]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (DImode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+    emit_insn (gen_negdi2_insn ());
+    emit_move_insn (operands[0], acc_a);
+    DONE;
+  })
+
+(define_insn "negdi2_insn"
+  [(set (reg:DI ACC_A)
+        (neg:DI (reg:DI ACC_A)))]
+  "avr_have_dimode"
+  "%~call __negdi2"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Comparison
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "conditional_jump"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "avr_have_dimode")
+
+;; "cbranchdi4"
+;; "cbranchdq4" "cbranchudq4"
+;; "cbranchda4" "cbranchuda4"
+;; "cbranchta4" "cbranchuta4"
+(define_expand "cbranch<mode>4"
+  [(parallel [(match_operand:ALL8 1 "register_operand" "")
+              (match_operand:ALL8 2 "nonmemory_operand" "")
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))])]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+
+    if (s8_operand (operands[2], VOIDmode))
+      {
+        emit_move_insn (gen_rtx_REG (QImode, REG_X), operands[2]);
+        emit_insn (gen_compare_const8_di2 ());
+      }
+    else if (const_operand (operands[2], GET_MODE (operands[2])))
+      {
+        emit_insn (gen_compare_const_<mode>2 (operands[2]));
+      }
+    else
+      {
+        emit_move_insn (gen_rtx_REG (<MODE>mode, ACC_B), operands[2]);
+        emit_insn (gen_compare_<mode>2 ());
+      }
+
+    emit_jump_insn (gen_conditional_jump (operands[0], operands[3]));
+    DONE;
+  })
+
+;; "compare_di2"
+;; "compare_dq2" "compare_udq2"
+;; "compare_da2" "compare_uda2"
+;; "compare_ta2" "compare_uta2"
+(define_insn "compare_<mode>2"
+  [(set (cc0)
+        (compare (reg:ALL8 ACC_A)
+                 (reg:ALL8 ACC_B)))]
+  "avr_have_dimode"
+  "%~call __cmpdi2"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "compare")])
+
+(define_insn "compare_const8_di2"
+  [(set (cc0)
+        (compare (reg:DI ACC_A)
+                 (sign_extend:DI (reg:QI REG_X))))]
+  "avr_have_dimode"
+  "%~call __cmpdi2_s8"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "compare")])
+
+;; "compare_const_di2"
+;; "compare_const_dq2" "compare_const_udq2"
+;; "compare_const_da2" "compare_const_uda2"
+;; "compare_const_ta2" "compare_const_uta2"
+(define_insn "compare_const_<mode>2"
+  [(set (cc0)
+        (compare (reg:ALL8 ACC_A)
+                 (match_operand:ALL8 0 "const_operand" "n Ynn")))
+   (clobber (match_scratch:QI 1 "=&d"))]
+  "avr_have_dimode
+   && !s8_operand (operands[0], VOIDmode)"
+  {
+    return avr_out_compare64 (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "compare64")
+   (set_attr "cc" "compare")])
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Shifts and Rotate
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_code_iterator di_shifts
+  [ashift ashiftrt lshiftrt rotate])
+
+;; Shift functions from libgcc are called without defining these insns,
+;; but with them we can describe their reduced register footprint.
+
+;; "ashldi3"   "ashrdi3"   "lshrdi3"   "rotldi3"
+;; "ashldq3"   "ashrdq3"   "lshrdq3"   "rotldq3"
+;; "ashlda3"   "ashrda3"   "lshrda3"   "rotlda3"
+;; "ashlta3"   "ashrta3"   "lshrta3"   "rotlta3"
+;; "ashludq3"  "ashrudq3"  "lshrudq3"  "rotludq3"
+;; "ashluda3"  "ashruda3"  "lshruda3"  "rotluda3"
+;; "ashluta3"  "ashruta3"  "lshruta3"  "rotluta3"
+(define_expand "<code_stdname><mode>3"
+  [(parallel [(match_operand:ALL8 0 "general_operand" "")
+              (di_shifts:ALL8 (match_operand:ALL8 1 "general_operand" "")
+                              (match_operand:QI 2 "general_operand" ""))])]
+  "avr_have_dimode"
+  {
+    rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+
+    emit_move_insn (acc_a, operands[1]);
+    emit_move_insn (gen_rtx_REG (QImode, 16), operands[2]);
+    emit_insn (gen_<code_stdname><mode>3_insn ());
+    emit_move_insn (operands[0], acc_a);
+    DONE;
+  })
+
+;; "ashldi3_insn"   "ashrdi3_insn"   "lshrdi3_insn"   "rotldi3_insn"
+;; "ashldq3_insn"   "ashrdq3_insn"   "lshrdq3_insn"   "rotldq3_insn"
+;; "ashlda3_insn"   "ashrda3_insn"   "lshrda3_insn"   "rotlda3_insn"
+;; "ashlta3_insn"   "ashrta3_insn"   "lshrta3_insn"   "rotlta3_insn"
+;; "ashludq3_insn"  "ashrudq3_insn"  "lshrudq3_insn"  "rotludq3_insn"
+;; "ashluda3_insn"  "ashruda3_insn"  "lshruda3_insn"  "rotluda3_insn"
+;; "ashluta3_insn"  "ashruta3_insn"  "lshruta3_insn"  "rotluta3_insn"
+(define_insn "<code_stdname><mode>3_insn"
+  [(set (reg:ALL8 ACC_A)
+        (di_shifts:ALL8 (reg:ALL8 ACC_A)
+                        (reg:QI 16)))]
+  "avr_have_dimode"
+  "%~call __<code_stdname>di3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
+
+;; "umulsidi3"
+;; "mulsidi3"
+(define_expand "<extend_u>mulsidi3"
+  [(parallel [(match_operand:DI 0 "register_operand" "")
+              (match_operand:SI 1 "general_operand" "")
+              (match_operand:SI 2 "general_operand" "")
+              ;; Just to mention the iterator 
+              (clobber (any_extend:SI (match_dup 1)))])]
+  "avr_have_dimode"
+  {
+    emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]);
+    emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]);
+    emit_insn (gen_<extend_u>mulsidi3_insn());
+    // Use emit_move_insn and not open-coded expand because of missing movdi
+    emit_move_insn (operands[0], gen_rtx_REG (DImode, ACC_A));
+    DONE;
+  })
+
+;; "umulsidi3_insn"
+;; "mulsidi3_insn"
+(define_insn "<extend_u>mulsidi3_insn"
+  [(set (reg:DI ACC_A)
+        (mult:DI (any_extend:DI (reg:SI 18))
+                 (any_extend:DI (reg:SI 22))))
+   (clobber (reg:HI REG_X))
+   (clobber (reg:HI REG_Z))]
+  "avr_have_dimode"
+  "%~call __<extend_u>mulsidi3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md
new file mode 100644
index 000000000..1652415b1
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-fixed.md
@@ -0,0 +1,497 @@
+;;   This file contains instructions that support fixed-point operations
+;;   for Atmel AVR micro controllers.
+;;   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;;   Contributed by Sean D'Epagnier  (sean@depagnier.com)
+;;                  Georg-Johann Lay (avr@gjlay.de)
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator ALL1Q  [QQ UQQ])
+(define_mode_iterator ALL2Q  [HQ UHQ])
+(define_mode_iterator ALL2A  [HA UHA])
+(define_mode_iterator ALL4A  [SA USA])
+(define_mode_iterator ALL2QA [HQ UHQ HA UHA])
+(define_mode_iterator ALL4QA [SQ USQ SA USA])
+(define_mode_iterator ALL124QA [ QQ   HQ  HA  SA  SQ
+                                UQQ  UHQ UHA USA USQ])
+
+(define_mode_iterator ALL2S [HQ HA])
+(define_mode_iterator ALL4S [SA SQ])
+(define_mode_iterator ALL24S  [     HQ  HA  SA  SQ])
+(define_mode_iterator ALL124S [ QQ  HQ  HA  SA  SQ])
+(define_mode_iterator ALL124U [UQQ UHQ UHA USA USQ])
+
+;;; Conversions
+
+(define_mode_iterator FIXED_A
+  [QQ UQQ
+   HQ UHQ HA UHA
+   SQ USQ SA USA
+   DQ UDQ DA UDA
+   TA UTA
+   QI HI SI DI])
+
+;; Same so that be can build cross products
+
+(define_mode_iterator FIXED_B
+  [QQ UQQ
+   HQ UHQ HA UHA
+   SQ USQ SA USA
+   DQ UDQ DA UDA
+   TA UTA
+   QI HI SI DI])
+
+(define_insn "fract<FIXED_B:mode><FIXED_A:mode>2"
+  [(set (match_operand:FIXED_A 0 "register_operand" "=r")
+        (fract_convert:FIXED_A
+         (match_operand:FIXED_B 1 "register_operand" "r")))]
+  "<FIXED_B:MODE>mode != <FIXED_A:MODE>mode"
+  {
+    return avr_out_fract (insn, operands, true, NULL);
+  }
+  [(set_attr "cc" "clobber")
+   (set_attr "adjust_len" "sfract")])
+
+(define_insn "fractuns<FIXED_B:mode><FIXED_A:mode>2"
+  [(set (match_operand:FIXED_A 0 "register_operand" "=r")
+        (unsigned_fract_convert:FIXED_A
+         (match_operand:FIXED_B 1 "register_operand" "r")))]
+  "<FIXED_B:MODE>mode != <FIXED_A:MODE>mode"
+  {
+    return avr_out_fract (insn, operands, false, NULL);
+  }
+  [(set_attr "cc" "clobber")
+   (set_attr "adjust_len" "ufract")])
+
+;******************************************************************************
+;** Saturated Addition and Subtraction
+;******************************************************************************
+
+;; Fixme:  It would be nice if we could expand the 32-bit versions to a
+;;    transparent libgcc call if $2 is a REG.  Problem is that it is
+;;    not possible to describe that addition is commutative.
+;;    And defining register classes/constraintrs for the involved hard
+;;    registers and let IRA do the work, yields inacceptable bloated code.
+;;    Thus, we have to live with the up to 11 instructions that are output
+;;    for these 32-bit saturated operations.
+
+;; "ssaddqq3"  "ssaddhq3"  "ssaddha3"  "ssaddsq3"  "ssaddsa3"
+;; "sssubqq3"  "sssubhq3"  "sssubha3"  "sssubsq3"  "sssubsa3"
+(define_insn "<code_stdname><mode>3"
+  [(set (match_operand:ALL124S 0 "register_operand"                          "=??d,d")
+        (ss_addsub:ALL124S (match_operand:ALL124S 1 "register_operand" "<abelian>0,0")
+                           (match_operand:ALL124S 2 "nonmemory_operand"         "r,Ynn")))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "cc" "clobber")
+   (set_attr "adjust_len" "plus")])
+
+;; "usadduqq3"  "usadduhq3"  "usadduha3" "usaddusq3"  "usaddusa3"
+;; "ussubuqq3"  "ussubuhq3"  "ussubuha3" "ussubusq3"  "ussubusa3"
+(define_insn "<code_stdname><mode>3"
+  [(set (match_operand:ALL124U 0 "register_operand"                          "=??r,d")
+        (us_addsub:ALL124U (match_operand:ALL124U 1 "register_operand" "<abelian>0,0")
+                           (match_operand:ALL124U 2 "nonmemory_operand"         "r,Ynn")))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "cc" "clobber")
+   (set_attr "adjust_len" "plus")])
+
+;******************************************************************************
+;** Saturated Negation and Absolute Value
+;******************************************************************************
+
+;; Fixme: This will always result in 0.  Dunno why simplify-rtx.c says
+;;   "unknown" on how to optimize this.  libgcc call would be in order,
+;;   but the performance is *PLAIN* *HORROR* because the optimizers don't
+;;   manage to optimize out MEMCPY that's sprincled all over fixed-bit.c  */
+
+(define_expand "usneg<mode>2"
+  [(parallel [(match_operand:ALL124U 0 "register_operand" "")
+              (match_operand:ALL124U 1 "nonmemory_operand" "")])]
+  ""
+  {
+    emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+    DONE;
+  })
+
+(define_insn "ssnegqq2"
+  [(set (match_operand:QQ 0 "register_operand"            "=r")
+        (ss_neg:QQ (match_operand:QQ 1 "register_operand"  "0")))]
+  ""
+  "neg %0\;brvc 0f\;dec %0\;0:"
+  [(set_attr "cc" "clobber")
+   (set_attr "length" "3")])
+
+(define_insn "ssabsqq2"
+  [(set (match_operand:QQ 0 "register_operand"            "=r")
+        (ss_abs:QQ (match_operand:QQ 1 "register_operand"  "0")))]
+  ""
+  "sbrc %0,7\;neg %0\;sbrc %0,7\;dec %0"
+  [(set_attr "cc" "clobber")
+   (set_attr "length" "4")])
+
+;; "ssneghq2"  "ssnegha2"  "ssnegsq2"  "ssnegsa2"
+;; "ssabshq2"  "ssabsha2"  "ssabssq2"  "ssabssa2"
+(define_expand "<code_stdname><mode>2"
+  [(set (match_dup 2)
+        (match_operand:ALL24S 1 "register_operand" ""))
+   (set (match_dup 2)
+        (ss_abs_neg:ALL24S (match_dup 2)))
+   (set (match_operand:ALL24S 0 "register_operand" "")
+        (match_dup 2))]
+  ""
+  {
+    operands[2] = gen_rtx_REG (<MODE>mode, 26 - GET_MODE_SIZE (<MODE>mode));
+  })
+
+;; "*ssneghq2"  "*ssnegha2"
+;; "*ssabshq2"  "*ssabsha2"
+(define_insn "*<code_stdname><mode>2"
+  [(set (reg:ALL2S 24)
+        (ss_abs_neg:ALL2S (reg:ALL2S 24)))]
+  ""
+  "%~call __<code_stdname>_2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "*ssnegsq2"  "*ssnegsa2"
+;; "*ssabssq2"  "*ssabssa2"
+(define_insn "*<code_stdname><mode>2"
+  [(set (reg:ALL4S 22)
+        (ss_abs_neg:ALL4S (reg:ALL4S 22)))]
+  ""
+  "%~call __<code_stdname>_4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;******************************************************************************
+; mul
+
+;; "mulqq3" "muluqq3"
+(define_expand "mul<mode>3"
+  [(parallel [(match_operand:ALL1Q 0 "register_operand" "")
+              (match_operand:ALL1Q 1 "register_operand" "")
+              (match_operand:ALL1Q 2 "register_operand" "")])]
+  ""
+  {
+    emit_insn (AVR_HAVE_MUL
+      ? gen_mul<mode>3_enh (operands[0], operands[1], operands[2])
+      : gen_mul<mode>3_nomul (operands[0], operands[1], operands[2]));
+    DONE;
+  })
+
+(define_insn "mulqq3_enh"
+  [(set (match_operand:QQ 0 "register_operand"         "=r")
+        (mult:QQ (match_operand:QQ 1 "register_operand" "a")
+                 (match_operand:QQ 2 "register_operand" "a")))]
+  "AVR_HAVE_MUL"
+  "fmuls %1,%2\;dec r1\;brvs 0f\;inc r1\;0:\;mov %0,r1\;clr __zero_reg__"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "muluqq3_enh"
+  [(set (match_operand:UQQ 0 "register_operand"          "=r")
+        (mult:UQQ (match_operand:UQQ 1 "register_operand" "r")
+                  (match_operand:UQQ 2 "register_operand" "r")))]
+  "AVR_HAVE_MUL"
+  "mul %1,%2\;mov %0,r1\;clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_expand "mulqq3_nomul"
+  [(set (reg:QQ 24)
+        (match_operand:QQ 1 "register_operand" ""))
+   (set (reg:QQ 25)
+        (match_operand:QQ 2 "register_operand" ""))
+   ;; "*mulqq3.call"
+   (parallel [(set (reg:QQ 23)
+                   (mult:QQ (reg:QQ 24)
+                            (reg:QQ 25)))
+              (clobber (reg:QI 22))
+              (clobber (reg:HI 24))])
+   (set (match_operand:QQ 0 "register_operand" "")
+        (reg:QQ 23))]
+  "!AVR_HAVE_MUL")
+
+(define_expand "muluqq3_nomul"
+  [(set (reg:UQQ 22)
+        (match_operand:UQQ 1 "register_operand" ""))
+   (set (reg:UQQ 24)
+        (match_operand:UQQ 2 "register_operand" ""))
+   ;; "*umulqihi3.call"
+   (parallel [(set (reg:HI 24)
+                   (mult:HI (zero_extend:HI (reg:QI 22))
+                            (zero_extend:HI (reg:QI 24))))
+              (clobber (reg:QI 21))
+              (clobber (reg:HI 22))])
+   (set (match_operand:UQQ 0 "register_operand" "")
+        (reg:UQQ 25))]
+  "!AVR_HAVE_MUL")
+
+(define_insn "*mulqq3.call"
+  [(set (reg:QQ 23)
+        (mult:QQ (reg:QQ 24)
+                 (reg:QQ 25)))
+   (clobber (reg:QI 22))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __mulqq3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; "mulhq3" "muluhq3"
+;; "mulha3" "muluha3"
+(define_expand "mul<mode>3"
+  [(set (reg:ALL2QA 18)
+        (match_operand:ALL2QA 1 "register_operand" ""))
+   (set (reg:ALL2QA 26)
+        (match_operand:ALL2QA 2 "register_operand" ""))
+   ;; "*mulhq3.call.enh"
+   (parallel [(set (reg:ALL2QA 24)
+                   (mult:ALL2QA (reg:ALL2QA 18)
+                                (reg:ALL2QA 26)))
+              (clobber (reg:HI 22))])
+   (set (match_operand:ALL2QA 0 "register_operand" "")
+        (reg:ALL2QA 24))]
+  "AVR_HAVE_MUL")
+
+;; "*mulhq3.call"  "*muluhq3.call"
+;; "*mulha3.call"  "*muluha3.call"
+(define_insn "*mul<mode>3.call"
+  [(set (reg:ALL2QA 24)
+        (mult:ALL2QA (reg:ALL2QA 18)
+                     (reg:ALL2QA 26)))
+   (clobber (reg:HI 22))]
+  "AVR_HAVE_MUL"
+  "%~call __mul<mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; On the enhanced core, don't clobber either input and use a separate output
+
+;; "mulsa3" "mulusa3"
+(define_expand "mul<mode>3"
+  [(set (reg:ALL4A 16)
+        (match_operand:ALL4A 1 "register_operand" ""))
+   (set (reg:ALL4A 20)
+        (match_operand:ALL4A 2 "register_operand" ""))
+   (set (reg:ALL4A 24)
+        (mult:ALL4A (reg:ALL4A 16)
+                    (reg:ALL4A 20)))
+   (set (match_operand:ALL4A 0 "register_operand" "")
+        (reg:ALL4A 24))]
+  "AVR_HAVE_MUL")
+
+;; "*mulsa3.call" "*mulusa3.call"
+(define_insn "*mul<mode>3.call"
+  [(set (reg:ALL4A 24)
+        (mult:ALL4A (reg:ALL4A 16)
+                    (reg:ALL4A 20)))]
+  "AVR_HAVE_MUL"
+  "%~call __mul<mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+; / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
+; div
+
+(define_code_iterator usdiv [udiv div])
+
+;; "divqq3" "udivuqq3"
+(define_expand "<code><mode>3"
+  [(set (reg:ALL1Q 25)
+        (match_operand:ALL1Q 1 "register_operand" ""))
+   (set (reg:ALL1Q 22)
+        (match_operand:ALL1Q 2 "register_operand" ""))
+   (parallel [(set (reg:ALL1Q 24)
+                   (usdiv:ALL1Q (reg:ALL1Q 25)
+                                (reg:ALL1Q 22)))
+              (clobber (reg:QI 25))])
+   (set (match_operand:ALL1Q 0 "register_operand" "")
+        (reg:ALL1Q 24))])
+
+;; "*divqq3.call" "*udivuqq3.call"
+(define_insn "*<code><mode>3.call"
+  [(set (reg:ALL1Q 24)
+        (usdiv:ALL1Q (reg:ALL1Q 25)
+                     (reg:ALL1Q 22)))
+   (clobber (reg:QI 25))]
+  ""
+  "%~call __<code><mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "divhq3" "udivuhq3"
+;; "divha3" "udivuha3"
+(define_expand "<code><mode>3"
+  [(set (reg:ALL2QA 26)
+        (match_operand:ALL2QA 1 "register_operand" ""))
+   (set (reg:ALL2QA 22)
+        (match_operand:ALL2QA 2 "register_operand" ""))
+   (parallel [(set (reg:ALL2QA 24)
+                   (usdiv:ALL2QA (reg:ALL2QA 26)
+                                 (reg:ALL2QA 22)))
+              (clobber (reg:HI 26))
+              (clobber (reg:QI 21))])
+   (set (match_operand:ALL2QA 0 "register_operand" "")
+        (reg:ALL2QA 24))])
+
+;; "*divhq3.call" "*udivuhq3.call"
+;; "*divha3.call" "*udivuha3.call"
+(define_insn "*<code><mode>3.call"
+  [(set (reg:ALL2QA 24)
+        (usdiv:ALL2QA (reg:ALL2QA 26)
+                      (reg:ALL2QA 22)))
+   (clobber (reg:HI 26))
+   (clobber (reg:QI 21))]
+  ""
+  "%~call __<code><mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Note the first parameter gets passed in already offset by 2 bytes
+
+;; "divsa3" "udivusa3"
+(define_expand "<code><mode>3"
+  [(set (reg:ALL4A 24)
+        (match_operand:ALL4A 1 "register_operand" ""))
+   (set (reg:ALL4A 18)
+        (match_operand:ALL4A 2 "register_operand" ""))
+   (parallel [(set (reg:ALL4A 22)
+                   (usdiv:ALL4A (reg:ALL4A 24)
+                                (reg:ALL4A 18)))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])
+   (set (match_operand:ALL4A 0 "register_operand" "")
+        (reg:ALL4A 22))])
+
+;; "*divsa3.call" "*udivusa3.call"
+(define_insn "*<code><mode>3.call"
+  [(set (reg:ALL4A 22)
+        (usdiv:ALL4A (reg:ALL4A 24)
+                     (reg:ALL4A 18)))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  ""
+  "%~call __<code><mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;******************************************************************************
+;** Rounding
+;******************************************************************************
+
+;; "roundqq3"  "rounduqq3"
+;; "roundhq3"  "rounduhq3"  "roundha3"  "rounduha3"
+;; "roundsq3"  "roundusq3"  "roundsa3"  "roundusa3"
+(define_expand "round<mode>3"
+  [(set (match_dup 4)
+        (match_operand:ALL124QA 1 "register_operand" ""))
+   (set (reg:QI 24)
+        (match_dup 5))
+   (parallel [(set (match_dup 3)
+                   (unspec:ALL124QA [(match_dup 4)
+                                     (reg:QI 24)] UNSPEC_ROUND))
+              (clobber (match_dup 4))])
+   (set (match_operand:ALL124QA 0 "register_operand" "")
+        (match_dup 3))
+   (use (match_operand:HI 2 "nonmemory_operand" ""))]
+  ""
+  {
+    if (CONST_INT_P (operands[2])
+        && !(optimize_size
+             && 4 == GET_MODE_SIZE (<MODE>mode)))
+      {
+        emit_insn (gen_round<mode>3_const (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+
+    // Input and output of the libgcc function
+    const unsigned int regno_in[]  = { -1, 22, 22, -1, 18 };
+    const unsigned int regno_out[] = { -1, 24, 24, -1, 22 };
+
+    operands[3] = gen_rtx_REG (<MODE>mode, regno_out[(size_t) GET_MODE_SIZE (<MODE>mode)]);
+    operands[4] = gen_rtx_REG (<MODE>mode,  regno_in[(size_t) GET_MODE_SIZE (<MODE>mode)]);
+    operands[5] = simplify_gen_subreg (QImode, force_reg (HImode, operands[2]), HImode, 0);
+    // $2 is no more needed, but is referenced for expand.
+    operands[2] = const0_rtx;
+  })
+
+;; Expand rounding with known rounding points inline so that the addend / mask
+;; will be consumed by operation with immediate operands and there is no
+;; need for a shift with variable offset.
+
+;; "roundqq3_const"  "rounduqq3_const"
+;; "roundhq3_const"  "rounduhq3_const"  "roundha3_const"  "rounduha3_const"
+;; "roundsq3_const"  "roundusq3_const"  "roundsa3_const"  "roundusa3_const"
+(define_insn "round<mode>3_const"
+  [(set (match_operand:ALL124QA 0 "register_operand"                  "=d")
+        (unspec:ALL124QA [(match_operand:ALL124QA 1 "register_operand" "0")
+                          (match_operand:HI 2 "const_int_operand"      "n")
+                          (const_int 0)]
+                         UNSPEC_ROUND))]
+  ""
+  {
+    return avr_out_round (insn, operands);
+  }
+  [(set_attr "cc" "clobber")
+   (set_attr "adjust_len" "round")])
+
+
+;; "*roundqq3.libgcc"  "*rounduqq3.libgcc"
+(define_insn "*round<mode>3.libgcc"
+  [(set (reg:ALL1Q 24)
+        (unspec:ALL1Q [(reg:ALL1Q 22)
+                       (reg:QI 24)] UNSPEC_ROUND))
+   (clobber (reg:ALL1Q 22))]
+  ""
+  "%~call __round<mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "*roundhq3.libgcc"  "*rounduhq3.libgcc"
+;; "*roundha3.libgcc"  "*rounduha3.libgcc"
+(define_insn "*round<mode>3.libgcc"
+  [(set (reg:ALL2QA 24)
+        (unspec:ALL2QA [(reg:ALL2QA 22)
+                        (reg:QI 24)] UNSPEC_ROUND))
+   (clobber (reg:ALL2QA 22))]
+  ""
+  "%~call __round<mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "*roundsq3.libgcc"  "*roundusq3.libgcc"
+;; "*roundsa3.libgcc"  "*roundusa3.libgcc"
+(define_insn "*round<mode>3.libgcc"
+  [(set (reg:ALL4QA 22)
+        (unspec:ALL4QA [(reg:ALL4QA 18)
+                        (reg:QI 24)] UNSPEC_ROUND))
+   (clobber (reg:ALL4QA 18))]
+  ""
+  "%~call __round<mode>3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
diff --git a/gcc-4.9/gcc/config/avr/avr-log.c b/gcc-4.9/gcc/config/avr/avr-log.c
new file mode 100644
index 000000000..8e27cec6d
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-log.c
@@ -0,0 +1,351 @@
+/* Subroutines for log output for Atmel AVR back end.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Georg-Johann Lay (avr@gjlay.de)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "print-tree.h"
+#include "output.h"
+#include "input.h"
+#include "function.h"
+#include "tm_p.h"
+#include "tree-pass.h"	/* for current_pass */
+
+/* This file supplies some functions for AVR back-end developers
+   with a printf-like interface.  The functions are called through
+   macros avr_edump or avr_fdump from avr-protos.h:
+
+      avr_edump (const char *fmt, ...);
+
+      avr_fdump (FILE *stream, const char *fmt, ...);
+
+   avr_edump (fmt, ...) is a shortcut for avr_fdump (stderr, fmt, ...)
+
+  == known %-codes ==
+
+  b: bool
+  r: rtx
+  t: tree
+  T: tree (brief)
+  C: enum rtx_code
+  m: enum machine_mode
+  R: enum reg_class
+  L: insn list
+  H: location_t
+
+  == no arguments ==
+
+  A: call abort()
+  f: current_function_name()
+  F: caller (via __FUNCTION__)
+  P: Pass name and number
+  ?: Print caller, current function and pass info
+  !: Ditto, but only print if in a pass with static pass number,
+     else return.
+
+  == same as printf ==
+
+  %: %
+  c: char
+  s: string
+  d: int (decimal)
+  x: int (hex)
+*/
+
+/* Set according to -mlog= option.  */
+avr_log_t avr_log;
+
+/* The caller as of __FUNCTION__ */
+static const char *avr_log_caller = "?";
+
+/* The worker function implementing the %-codes */
+static void avr_log_vadump (FILE*, const char*, va_list);
+
+/* As we have no variadic macros, avr_edump maps to a call to
+   avr_log_set_caller_e which saves __FUNCTION__ to avr_log_caller and
+   returns a function pointer to avr_log_fdump_e.  avr_log_fdump_e
+   gets the printf-like arguments and calls avr_log_vadump, the
+   worker function.  avr_fdump works the same way.  */
+
+/* Provide avr_log_fdump_e/f so that avr_log_set_caller_e/_f can return
+   their address.  */
+
+static int
+avr_log_fdump_e (const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start (ap, fmt);
+  avr_log_vadump (stderr, fmt, ap);
+  va_end (ap);
+
+  return 1;
+}
+
+static int
+avr_log_fdump_f (FILE *stream, const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start (ap, fmt);
+  if (stream)
+    avr_log_vadump (stream, fmt, ap);
+  va_end (ap);
+
+  return 1;
+}
+
+/* Macros avr_edump/avr_fdump map to calls of the following two functions,
+   respectively.  You don't need to call them directly.  */
+
+int (*
+avr_log_set_caller_e (const char *caller)
+     )(const char*, ...)
+{
+  avr_log_caller = caller;
+
+  return avr_log_fdump_e;
+}
+
+int (*
+avr_log_set_caller_f (const char *caller)
+     )(FILE*, const char*, ...)
+{
+  avr_log_caller = caller;
+
+  return avr_log_fdump_f;
+}
+
+
+/* Worker function implementing the %-codes and forwarding to
+   respective print/dump function.  */
+
+static void
+avr_log_vadump (FILE *file, const char *fmt, va_list ap)
+{
+  char bs[3] = {'\\', '?', '\0'};
+
+  while (*fmt)
+    {
+      switch (*fmt++)
+        {
+        default:
+          fputc (*(fmt-1), file);
+          break;
+
+        case '\\':
+          bs[1] = *fmt++;
+          fputs (bs, file);
+          break;
+
+        case '%':
+          switch (*fmt++)
+            {
+            case '%':
+              fputc ('%', file);
+              break;
+
+            case 't':
+              {
+                tree t = va_arg (ap, tree);
+                if (NULL_TREE == t)
+                  fprintf (file, "<NULL-TREE>");
+                else
+                  {
+                    if (stderr == file)
+                      debug_tree (t);
+                    else
+                      {
+                        print_node (file, "", t, 0);
+                        putc ('\n', file);
+                      }
+                  }
+                break;
+              }
+
+            case 'T':
+              print_node_brief (file, "", va_arg (ap, tree), 3);
+              break;
+
+            case 'd':
+              fprintf (file, "%d", va_arg (ap, int));
+              break;
+
+            case 'x':
+              fprintf (file, "%x", va_arg (ap, int));
+              break;
+
+            case 'b':
+              fprintf (file, "%s", va_arg (ap, int) ? "true" : "false");
+              break;
+
+            case 'c':
+              fputc (va_arg (ap, int), file);
+              break;
+
+            case 'r':
+              print_inline_rtx (file, va_arg (ap, rtx), 0);
+              break;
+
+            case 'L':
+              {
+                rtx insn = va_arg (ap, rtx);
+
+                while (insn)
+                  {
+                    print_inline_rtx (file, insn, 0);
+                    fprintf (file, "\n");
+                    insn = NEXT_INSN (insn);
+                  }
+                break;
+              }
+
+            case 'f':
+              if (cfun && cfun->decl)
+                fputs (current_function_name(), file);
+              break;
+
+            case 's':
+              {
+                const char *str = va_arg (ap, char*);
+                fputs (str ? str : "(null)", file);
+              }
+              break;
+
+            case 'm':
+              fputs (GET_MODE_NAME ((enum machine_mode) va_arg (ap, int)),
+                     file);
+              break;
+
+            case 'C':
+              fputs (rtx_name[va_arg (ap, int)], file);
+              break;
+
+            case 'R':
+              fputs (reg_class_names[va_arg (ap, int)], file);
+              break;
+
+            case 'F':
+              fputs (avr_log_caller, file);
+              break;
+
+            case 'H':
+              {
+                location_t loc = va_arg (ap, location_t);
+
+                if (BUILTINS_LOCATION == loc)
+                  fprintf (file, "<BUILTIN-LOCATION>");
+                else if (UNKNOWN_LOCATION == loc)
+                  fprintf (file, "<UNKNOWN-LOCATION>");
+                else
+                  fprintf (file, "%s:%d",
+                           LOCATION_FILE (loc), LOCATION_LINE (loc));
+
+                break;
+              }
+
+            case '!':
+              if (!current_pass)
+                return;
+              /* FALLTHRU */
+
+            case '?':
+              avr_log_fdump_f (file, "%F[%f:%P]");
+              break;
+
+            case 'P':
+              if (current_pass)
+                fprintf (file, "%s(%d)",
+                         current_pass->name,
+                         current_pass->static_pass_number);
+              else
+                fprintf (file, "pass=?");
+
+              break;
+
+            case 'A':
+              fflush (file);
+              abort();
+
+            default:
+              /* Unknown %-code: Stop printing */
+
+              fprintf (file, "??? %%%c ???%s\n", *(fmt-1), fmt);
+              fmt = "";
+
+              break;
+            }
+          break; /* % */
+        }
+    }
+
+  fflush (file);
+}
+
+
+/* Called from avr.c:avr_option_override().
+   Parse argument of -mlog= and set respective fields in avr_log.  */
+
+void
+avr_log_set_avr_log (void)
+{
+  bool all = TARGET_ALL_DEBUG != 0;
+
+  if (all || avr_log_details)
+    {
+      /* Adding , at beginning and end of string makes searching easier.  */
+
+      char *str = (char*) alloca (3 + strlen (avr_log_details));
+      bool info;
+
+      str[0] = ',';
+      strcat (stpcpy (str+1, avr_log_details), ",");
+
+      all |= NULL != strstr (str, ",all,");
+      info = NULL != strstr (str, ",?,");
+
+      if (info)
+        fprintf (stderr, "\n-mlog=");
+
+#define SET_DUMP_DETAIL(S)                                       \
+      do {                                                       \
+        avr_log.S = (all || NULL != strstr (str, "," #S ","));   \
+        if (info)                                                \
+          fprintf (stderr, #S ",");                              \
+      } while (0)
+
+      SET_DUMP_DETAIL (address_cost);
+      SET_DUMP_DETAIL (builtin);
+      SET_DUMP_DETAIL (constraints);
+      SET_DUMP_DETAIL (legitimate_address_p);
+      SET_DUMP_DETAIL (legitimize_address);
+      SET_DUMP_DETAIL (legitimize_reload_address);
+      SET_DUMP_DETAIL (progmem);
+      SET_DUMP_DETAIL (rtx_costs);
+
+#undef SET_DUMP_DETAIL
+
+      if (info)
+        fprintf (stderr, "?\n\n");
+    }
+}
diff --git a/gcc-4.9/gcc/config/avr/avr-mcus.def b/gcc-4.9/gcc/config/avr/avr-mcus.def
new file mode 100644
index 000000000..d068f5e80
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-mcus.def
@@ -0,0 +1,323 @@
+/* AVR MCUs.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* List of all known AVR MCU types.  If updated, cd to $(builddir)/gcc and run
+
+   $ make avr-mcus
+
+   This will regenerate / update the following source files:
+
+   -  $(srcdir)/config/avr/t-multilib
+   -  $(srcdir)/config/avr/avr-tables.opt
+   -  $(srcdir)/doc/avr-mmcu.texi
+
+   After that, rebuild everything and check-in the new sources to the repo.
+   The device list below has to be kept in sync with AVR-LibC.
+
+
+   Before including this file, define a macro:
+
+   AVR_MCU (NAME, ARCH, MACRO, SHORT_SP, ERRATA_SKIP, DATA_SEC, N_FLASH,
+            LIBRARY_NAME)
+
+   where the arguments are the fields of avr_mcu_t:
+   
+       NAME         Accept -mmcu=<NAME>
+
+       ARCH         Specifies the multilib variant together with SHORT_SP
+
+       MACRO        If NULL, this is a core and not a device.  If non-NULL,
+                    supply respective built-in macro.
+
+       SHORT_SP     The device / multilib has an 8-bit stack pointer (no SPH).
+
+       ERRATA_SKIP  Apply work-around for the "skip 32-bit instruction"
+                    silicon bug:  Don't skip 32-bit instrctions.
+
+       DATA_SEC     First address of SRAM, used in  -Tdata=  by the driver.
+
+       N_FLASH      Number of 64 KiB flash segments, rounded up.
+
+       LIBRARY_NAME Used by the driver to linke startup code from avr-libc
+                    as of  crt<LIBRARY_NAME>.o
+
+   "avr2" must be first for the "0" default to work as intended.  */
+
+/* Classic, <= 8K.  */
+AVR_MCU ("avr2",                 ARCH_AVR2, NULL,                        0, 1, 0x0060, 6, "s8515")
+AVR_MCU ("at90s2313",            ARCH_AVR2, "__AVR_AT90S2313__",         1, 0, 0x0060, 1, "s2313")
+AVR_MCU ("at90s2323",            ARCH_AVR2, "__AVR_AT90S2323__",         1, 0, 0x0060, 1, "s2323")
+AVR_MCU ("at90s2333",            ARCH_AVR2, "__AVR_AT90S2333__",         1, 0, 0x0060, 1, "s2333")
+AVR_MCU ("at90s2343",            ARCH_AVR2, "__AVR_AT90S2343__",         1, 0, 0x0060, 1, "s2343")
+AVR_MCU ("attiny22",             ARCH_AVR2, "__AVR_ATtiny22__",          1, 0, 0x0060, 1, "tn22")
+AVR_MCU ("attiny26",             ARCH_AVR2, "__AVR_ATtiny26__",          1, 0, 0x0060, 1, "tn26")
+AVR_MCU ("at90s4414",            ARCH_AVR2, "__AVR_AT90S4414__",         0, 0, 0x0060, 1, "s4414")
+AVR_MCU ("at90s4433",            ARCH_AVR2, "__AVR_AT90S4433__",         1, 0, 0x0060, 1, "s4433")
+AVR_MCU ("at90s4434",            ARCH_AVR2, "__AVR_AT90S4434__",         0, 0, 0x0060, 1, "s4434")
+AVR_MCU ("at90s8515",            ARCH_AVR2, "__AVR_AT90S8515__",         0, 1, 0x0060, 1, "s8515")
+AVR_MCU ("at90c8534",            ARCH_AVR2, "__AVR_AT90C8534__",         0, 0, 0x0060, 1, "c8534")
+AVR_MCU ("at90s8535",            ARCH_AVR2, "__AVR_AT90S8535__",         0, 0, 0x0060, 1, "s8535")
+/* Classic + MOVW, <= 8K.  */
+AVR_MCU ("avr25",                ARCH_AVR25, NULL,                       0, 0, 0x0060, 1, "tn85")
+AVR_MCU ("ata6289",              ARCH_AVR25, "__AVR_ATA6289__",          0, 0, 0x0100, 1, "a6289")
+AVR_MCU ("ata5272",              ARCH_AVR25, "__AVR_ATA5272__",          0, 0, 0x0100, 1, "a5272")
+AVR_MCU ("attiny13",             ARCH_AVR25, "__AVR_ATtiny13__",         1, 0, 0x0060, 1, "tn13")
+AVR_MCU ("attiny13a",            ARCH_AVR25, "__AVR_ATtiny13A__",        1, 0, 0x0060, 1, "tn13a")
+AVR_MCU ("attiny2313",           ARCH_AVR25, "__AVR_ATtiny2313__",       1, 0, 0x0060, 1, "tn2313")
+AVR_MCU ("attiny2313a",          ARCH_AVR25, "__AVR_ATtiny2313A__",      1, 0, 0x0060, 1, "tn2313a")
+AVR_MCU ("attiny24",             ARCH_AVR25, "__AVR_ATtiny24__",         1, 0, 0x0060, 1, "tn24")
+AVR_MCU ("attiny24a",            ARCH_AVR25, "__AVR_ATtiny24A__",        1, 0, 0x0060, 1, "tn24a")
+AVR_MCU ("attiny4313",           ARCH_AVR25, "__AVR_ATtiny4313__",       0, 0, 0x0060, 1, "tn4313")
+AVR_MCU ("attiny44",             ARCH_AVR25, "__AVR_ATtiny44__",         0, 0, 0x0060, 1, "tn44")
+AVR_MCU ("attiny44a",            ARCH_AVR25, "__AVR_ATtiny44A__",        0, 0, 0x0060, 1, "tn44a")
+AVR_MCU ("attiny84",             ARCH_AVR25, "__AVR_ATtiny84__",         0, 0, 0x0060, 1, "tn84")
+AVR_MCU ("attiny84a",            ARCH_AVR25, "__AVR_ATtiny84A__",        0, 0, 0x0060, 1, "tn84")
+AVR_MCU ("attiny25",             ARCH_AVR25, "__AVR_ATtiny25__",         1, 0, 0x0060, 1, "tn25")
+AVR_MCU ("attiny45",             ARCH_AVR25, "__AVR_ATtiny45__",         0, 0, 0x0060, 1, "tn45")
+AVR_MCU ("attiny85",             ARCH_AVR25, "__AVR_ATtiny85__",         0, 0, 0x0060, 1, "tn85")
+AVR_MCU ("attiny261",            ARCH_AVR25, "__AVR_ATtiny261__",        1, 0, 0x0060, 1, "tn261")
+AVR_MCU ("attiny261a",           ARCH_AVR25, "__AVR_ATtiny261A__",       1, 0, 0x0060, 1, "tn261a")
+AVR_MCU ("attiny461",            ARCH_AVR25, "__AVR_ATtiny461__",        0, 0, 0x0060, 1, "tn461")
+AVR_MCU ("attiny461a",           ARCH_AVR25, "__AVR_ATtiny461A__",       0, 0, 0x0060, 1, "tn461a")
+AVR_MCU ("attiny861",            ARCH_AVR25, "__AVR_ATtiny861__",        0, 0, 0x0060, 1, "tn861")
+AVR_MCU ("attiny861a",           ARCH_AVR25, "__AVR_ATtiny861A__",       0, 0, 0x0060, 1, "tn861a")
+AVR_MCU ("attiny43u",            ARCH_AVR25, "__AVR_ATtiny43U__",        0, 0, 0x0060, 1, "tn43u")
+AVR_MCU ("attiny87",             ARCH_AVR25, "__AVR_ATtiny87__",         0, 0, 0x0100, 1, "tn87")
+AVR_MCU ("attiny48",             ARCH_AVR25, "__AVR_ATtiny48__",         0, 0, 0x0100, 1, "tn48")
+AVR_MCU ("attiny88",             ARCH_AVR25, "__AVR_ATtiny88__",         0, 0, 0x0100, 1, "tn88")
+AVR_MCU ("at86rf401",            ARCH_AVR25, "__AVR_AT86RF401__",        0, 0, 0x0060, 1, "86401")
+/* Classic, > 8K, <= 64K.  */
+AVR_MCU ("avr3",                 ARCH_AVR3, NULL,                        0, 0, 0x0060, 1, "43355")
+AVR_MCU ("at43usb355",           ARCH_AVR3, "__AVR_AT43USB355__",        0, 0, 0x0060, 1, "43355")
+AVR_MCU ("at76c711",             ARCH_AVR3, "__AVR_AT76C711__",          0, 0, 0x0060, 1, "76711")
+/* Classic, == 128K.  */
+AVR_MCU ("avr31",                ARCH_AVR31, NULL,                       0, 1, 0x0060, 2, "m103")
+AVR_MCU ("atmega103",            ARCH_AVR31, "__AVR_ATmega103__",        0, 1, 0x0060, 2, "m103")
+AVR_MCU ("at43usb320",           ARCH_AVR31, "__AVR_AT43USB320__",       0, 0, 0x0060, 2, "43320")
+/* Classic + MOVW + JMP/CALL.  */
+AVR_MCU ("avr35",                ARCH_AVR35, NULL,                       0, 0, 0x0100, 1, "usb162")
+AVR_MCU ("ata5505",              ARCH_AVR35, "__AVR_ATA5505__",          0, 0, 0x0100, 1, "a5505")
+AVR_MCU ("at90usb82",            ARCH_AVR35, "__AVR_AT90USB82__",        0, 0, 0x0100, 1, "usb82")
+AVR_MCU ("at90usb162",           ARCH_AVR35, "__AVR_AT90USB162__",       0, 0, 0x0100, 1, "usb162")
+AVR_MCU ("atmega8u2",            ARCH_AVR35, "__AVR_ATmega8U2__",        0, 0, 0x0100, 1, "m8u2")
+AVR_MCU ("atmega16u2",           ARCH_AVR35, "__AVR_ATmega16U2__",       0, 0, 0x0100, 1, "m16u2")
+AVR_MCU ("atmega32u2",           ARCH_AVR35, "__AVR_ATmega32U2__",       0, 0, 0x0100, 1, "m32u2")
+AVR_MCU ("attiny167",            ARCH_AVR35, "__AVR_ATtiny167__",        0, 0, 0x0100, 1, "tn167")
+AVR_MCU ("attiny1634",           ARCH_AVR35, "__AVR_ATtiny1634__",       0, 0, 0x0100, 1, "tn1634")
+/* Enhanced, <= 8K.  */
+AVR_MCU ("avr4",                 ARCH_AVR4, NULL,                        0, 0, 0x0060, 1, "m8")
+AVR_MCU ("ata6285",              ARCH_AVR4, "__AVR_ATA6285__",           0, 0, 0x0100, 1, "a6285")
+AVR_MCU ("ata6286",              ARCH_AVR4, "__AVR_ATA6286__",           0, 0, 0x0100, 1, "a6286")
+AVR_MCU ("atmega8",              ARCH_AVR4, "__AVR_ATmega8__",           0, 0, 0x0060, 1, "m8")
+AVR_MCU ("atmega8a",             ARCH_AVR4, "__AVR_ATmega8A__",          0, 0, 0x0060, 1, "m8a")
+AVR_MCU ("atmega48",             ARCH_AVR4, "__AVR_ATmega48__",          0, 0, 0x0100, 1, "m48")
+AVR_MCU ("atmega48a",            ARCH_AVR4, "__AVR_ATmega48A__",         0, 0, 0x0100, 1, "m48a")
+AVR_MCU ("atmega48p",            ARCH_AVR4, "__AVR_ATmega48P__",         0, 0, 0x0100, 1, "m48p")
+AVR_MCU ("atmega48pa",           ARCH_AVR4, "__AVR_ATmega48PA__",        0, 0, 0x0100, 1, "m48pa")
+AVR_MCU ("atmega88",             ARCH_AVR4, "__AVR_ATmega88__",          0, 0, 0x0100, 1, "m88")
+AVR_MCU ("atmega88a",            ARCH_AVR4, "__AVR_ATmega88A__",         0, 0, 0x0100, 1, "m88a")
+AVR_MCU ("atmega88p",            ARCH_AVR4, "__AVR_ATmega88P__",         0, 0, 0x0100, 1, "m88p")
+AVR_MCU ("atmega88pa",           ARCH_AVR4, "__AVR_ATmega88PA__",        0, 0, 0x0100, 1, "m88pa")
+AVR_MCU ("atmega8515",           ARCH_AVR4, "__AVR_ATmega8515__",        0, 0, 0x0060, 1, "m8515")
+AVR_MCU ("atmega8535",           ARCH_AVR4, "__AVR_ATmega8535__",        0, 0, 0x0060, 1, "m8535")
+AVR_MCU ("atmega8hva",           ARCH_AVR4, "__AVR_ATmega8HVA__",        0, 0, 0x0100, 1, "m8hva")
+AVR_MCU ("at90pwm1",             ARCH_AVR4, "__AVR_AT90PWM1__",          0, 0, 0x0100, 1, "90pwm1")
+AVR_MCU ("at90pwm2",             ARCH_AVR4, "__AVR_AT90PWM2__",          0, 0, 0x0100, 1, "90pwm2")
+AVR_MCU ("at90pwm2b",            ARCH_AVR4, "__AVR_AT90PWM2B__",         0, 0, 0x0100, 1, "90pwm2b")
+AVR_MCU ("at90pwm3",             ARCH_AVR4, "__AVR_AT90PWM3__",          0, 0, 0x0100, 1, "90pwm3")
+AVR_MCU ("at90pwm3b",            ARCH_AVR4, "__AVR_AT90PWM3B__",         0, 0, 0x0100, 1, "90pwm3b")
+AVR_MCU ("at90pwm81",            ARCH_AVR4, "__AVR_AT90PWM81__",         0, 0, 0x0100, 1, "90pwm81")
+/* Enhanced, > 8K, <= 64K.  */
+AVR_MCU ("avr5",                 ARCH_AVR5, NULL,                        0, 0, 0x0060, 1, "m16")
+AVR_MCU ("ata5790",              ARCH_AVR5, "__AVR_ATA5790__",           0, 0, 0x0100, 1, "a5790")
+AVR_MCU ("ata5790n",             ARCH_AVR5, "__AVR_ATA5790N__",          0, 0, 0x0100, 1, "a5790n")
+AVR_MCU ("ata5795",              ARCH_AVR5, "__AVR_ATA5795__",           0, 0, 0x0100, 1, "a5795")
+AVR_MCU ("atmega16",             ARCH_AVR5, "__AVR_ATmega16__",          0, 0, 0x0060, 1, "m16")
+AVR_MCU ("atmega16a",            ARCH_AVR5, "__AVR_ATmega16A__",         0, 0, 0x0060, 1, "m16a")
+AVR_MCU ("atmega161",            ARCH_AVR5, "__AVR_ATmega161__",         0, 0, 0x0060, 1, "m161")
+AVR_MCU ("atmega162",            ARCH_AVR5, "__AVR_ATmega162__",         0, 0, 0x0100, 1, "m162")
+AVR_MCU ("atmega163",            ARCH_AVR5, "__AVR_ATmega163__",         0, 0, 0x0060, 1, "m163")
+AVR_MCU ("atmega164a",           ARCH_AVR5, "__AVR_ATmega164A__",        0, 0, 0x0100, 1, "m164a")
+AVR_MCU ("atmega164p",           ARCH_AVR5, "__AVR_ATmega164P__",        0, 0, 0x0100, 1, "m164p")
+AVR_MCU ("atmega164pa",          ARCH_AVR5, "__AVR_ATmega164PA__",       0, 0, 0x0100, 1, "m164pa")
+AVR_MCU ("atmega165",            ARCH_AVR5, "__AVR_ATmega165__",         0, 0, 0x0100, 1, "m165")
+AVR_MCU ("atmega165a",           ARCH_AVR5, "__AVR_ATmega165A__",        0, 0, 0x0100, 1, "m165a")
+AVR_MCU ("atmega165p",           ARCH_AVR5, "__AVR_ATmega165P__",        0, 0, 0x0100, 1, "m165p")
+AVR_MCU ("atmega165pa",          ARCH_AVR5, "__AVR_ATmega165PA__",       0, 0, 0x0100, 1, "m165pa")
+AVR_MCU ("atmega168",            ARCH_AVR5, "__AVR_ATmega168__",         0, 0, 0x0100, 1, "m168")
+AVR_MCU ("atmega168a",           ARCH_AVR5, "__AVR_ATmega168A__",        0, 0, 0x0100, 1, "m168a")
+AVR_MCU ("atmega168p",           ARCH_AVR5, "__AVR_ATmega168P__",        0, 0, 0x0100, 1, "m168p")
+AVR_MCU ("atmega168pa",          ARCH_AVR5, "__AVR_ATmega168PA__",       0, 0, 0x0100, 1, "m168pa")
+AVR_MCU ("atmega169",            ARCH_AVR5, "__AVR_ATmega169__",         0, 0, 0x0100, 1, "m169")
+AVR_MCU ("atmega169a",           ARCH_AVR5, "__AVR_ATmega169A__",        0, 0, 0x0100, 1, "m169a")
+AVR_MCU ("atmega169p",           ARCH_AVR5, "__AVR_ATmega169P__",        0, 0, 0x0100, 1, "m169p")
+AVR_MCU ("atmega169pa",          ARCH_AVR5, "__AVR_ATmega169PA__",       0, 0, 0x0100, 1, "m169pa")
+AVR_MCU ("atmega16hvb",          ARCH_AVR5, "__AVR_ATmega16HVB__",       0, 0, 0x0100, 1, "m16hvb")
+AVR_MCU ("atmega16hvbrevb",      ARCH_AVR5, "__AVR_ATmega16HVBREVB__",   0, 0, 0x0100, 1, "m16hvbrevb")
+AVR_MCU ("atmega16m1",           ARCH_AVR5, "__AVR_ATmega16M1__",        0, 0, 0x0100, 1, "m16m1")
+AVR_MCU ("atmega16u4",           ARCH_AVR5, "__AVR_ATmega16U4__",        0, 0, 0x0100, 1, "m16u4")
+AVR_MCU ("atmega26hvg",          ARCH_AVR5, "__AVR_ATmega26HVG__",       0, 0, 0x0100, 1, "m26hvg")
+AVR_MCU ("atmega32a",            ARCH_AVR5, "__AVR_ATmega32A__",         0, 0, 0x0060, 1, "m32a")
+AVR_MCU ("atmega32",             ARCH_AVR5, "__AVR_ATmega32__",          0, 0, 0x0060, 1, "m32")
+AVR_MCU ("atmega323",            ARCH_AVR5, "__AVR_ATmega323__",         0, 0, 0x0060, 1, "m323")
+AVR_MCU ("atmega324a",           ARCH_AVR5, "__AVR_ATmega324A__",        0, 0, 0x0100, 1, "m324a")
+AVR_MCU ("atmega324p",           ARCH_AVR5, "__AVR_ATmega324P__",        0, 0, 0x0100, 1, "m324p")
+AVR_MCU ("atmega324pa",          ARCH_AVR5, "__AVR_ATmega324PA__",       0, 0, 0x0100, 1, "m324pa")
+AVR_MCU ("atmega325",            ARCH_AVR5, "__AVR_ATmega325__",         0, 0, 0x0100, 1, "m325")
+AVR_MCU ("atmega325a",           ARCH_AVR5, "__AVR_ATmega325A__",        0, 0, 0x0100, 1, "m325a")
+AVR_MCU ("atmega325p",           ARCH_AVR5, "__AVR_ATmega325P__",        0, 0, 0x0100, 1, "m325p")
+AVR_MCU ("atmega3250",           ARCH_AVR5, "__AVR_ATmega3250__",        0, 0, 0x0100, 1, "m3250")
+AVR_MCU ("atmega3250a",          ARCH_AVR5, "__AVR_ATmega3250A__",       0, 0, 0x0100, 1, "m3250a")
+AVR_MCU ("atmega3250p",          ARCH_AVR5, "__AVR_ATmega3250P__",       0, 0, 0x0100, 1, "m3250p")
+AVR_MCU ("atmega3250pa",         ARCH_AVR5, "__AVR_ATmega3250PA__",      0, 0, 0x0100, 1, "m3250pa")
+AVR_MCU ("atmega328",            ARCH_AVR5, "__AVR_ATmega328__",         0, 0, 0x0100, 1, "m328")
+AVR_MCU ("atmega328p",           ARCH_AVR5, "__AVR_ATmega328P__",        0, 0, 0x0100, 1, "m328p")
+AVR_MCU ("atmega329",            ARCH_AVR5, "__AVR_ATmega329__",         0, 0, 0x0100, 1, "m329")
+AVR_MCU ("atmega329a",           ARCH_AVR5, "__AVR_ATmega329A__",        0, 0, 0x0100, 1, "m329a")
+AVR_MCU ("atmega329p",           ARCH_AVR5, "__AVR_ATmega329P__",        0, 0, 0x0100, 1, "m329p")
+AVR_MCU ("atmega329pa",          ARCH_AVR5, "__AVR_ATmega329PA__",       0, 0, 0x0100, 1, "m329pa")
+AVR_MCU ("atmega3290",           ARCH_AVR5, "__AVR_ATmega3290__",        0, 0, 0x0100, 1, "m3290")
+AVR_MCU ("atmega3290a",          ARCH_AVR5, "__AVR_ATmega3290A__",       0, 0, 0x0100, 1, "m3290a")
+AVR_MCU ("atmega3290p",          ARCH_AVR5, "__AVR_ATmega3290P__",       0, 0, 0x0100, 1, "m3290p")
+AVR_MCU ("atmega3290pa",         ARCH_AVR5, "__AVR_ATmega3290PA__",      0, 0, 0x0100, 1, "m3290pa")
+AVR_MCU ("atmega32c1",           ARCH_AVR5, "__AVR_ATmega32C1__",        0, 0, 0x0100, 1, "m32c1")
+AVR_MCU ("atmega32m1",           ARCH_AVR5, "__AVR_ATmega32M1__",        0, 0, 0x0100, 1, "m32m1")
+AVR_MCU ("atmega32u4",           ARCH_AVR5, "__AVR_ATmega32U4__",        0, 0, 0x0100, 1, "m32u4")
+AVR_MCU ("atmega32u6",           ARCH_AVR5, "__AVR_ATmega32U6__",        0, 0, 0x0100, 1, "m32u6")
+AVR_MCU ("atmega406",            ARCH_AVR5, "__AVR_ATmega406__",         0, 0, 0x0100, 1, "m406")
+AVR_MCU ("atmega64",             ARCH_AVR5, "__AVR_ATmega64__",          0, 0, 0x0100, 1, "m64")
+AVR_MCU ("atmega64a",            ARCH_AVR5, "__AVR_ATmega64A__",         0, 0, 0x0100, 1, "m64a")
+AVR_MCU ("atmega640",            ARCH_AVR5, "__AVR_ATmega640__",         0, 0, 0x0200, 1, "m640")
+AVR_MCU ("atmega644",            ARCH_AVR5, "__AVR_ATmega644__",         0, 0, 0x0100, 1, "m644")
+AVR_MCU ("atmega644a",           ARCH_AVR5, "__AVR_ATmega644A__",        0, 0, 0x0100, 1, "m644a")
+AVR_MCU ("atmega644p",           ARCH_AVR5, "__AVR_ATmega644P__",        0, 0, 0x0100, 1, "m644p")
+AVR_MCU ("atmega644pa",          ARCH_AVR5, "__AVR_ATmega644PA__",       0, 0, 0x0100, 1, "m644pa")
+AVR_MCU ("atmega645",            ARCH_AVR5, "__AVR_ATmega645__",         0, 0, 0x0100, 1, "m645")
+AVR_MCU ("atmega645a",           ARCH_AVR5, "__AVR_ATmega645A__",        0, 0, 0x0100, 1, "m645a")
+AVR_MCU ("atmega645p",           ARCH_AVR5, "__AVR_ATmega645P__",        0, 0, 0x0100, 1, "m645p")
+AVR_MCU ("atmega6450",           ARCH_AVR5, "__AVR_ATmega6450__",        0, 0, 0x0100, 1, "m6450")
+AVR_MCU ("atmega6450a",          ARCH_AVR5, "__AVR_ATmega6450A__",       0, 0, 0x0100, 1, "m6450a")
+AVR_MCU ("atmega6450p",          ARCH_AVR5, "__AVR_ATmega6450P__",       0, 0, 0x0100, 1, "m6450p")
+AVR_MCU ("atmega649",            ARCH_AVR5, "__AVR_ATmega649__",         0, 0, 0x0100, 1, "m649")
+AVR_MCU ("atmega649a",           ARCH_AVR5, "__AVR_ATmega649A__",        0, 0, 0x0100, 1, "m649a")
+AVR_MCU ("atmega649p",           ARCH_AVR5, "__AVR_ATmega649P__",        0, 0, 0x0100, 1, "m649p")
+AVR_MCU ("atmega6490",           ARCH_AVR5, "__AVR_ATmega6490__",        0, 0, 0x0100, 1, "m6490")
+AVR_MCU ("atmega16hva",          ARCH_AVR5, "__AVR_ATmega16HVA__",       0, 0, 0x0100, 1, "m16hva")
+AVR_MCU ("atmega16hva2",         ARCH_AVR5, "__AVR_ATmega16HVA2__",      0, 0, 0x0100, 1, "m16hva2")
+AVR_MCU ("atmega32hvb",          ARCH_AVR5, "__AVR_ATmega32HVB__",       0, 0, 0x0100, 1, "m32hvb")
+AVR_MCU ("atmega6490a",          ARCH_AVR5, "__AVR_ATmega6490A__",       0, 0, 0x0100, 1, "m6490a")
+AVR_MCU ("atmega6490p",          ARCH_AVR5, "__AVR_ATmega6490P__",       0, 0, 0x0100, 1, "m6490p")
+AVR_MCU ("atmega64c1",           ARCH_AVR5, "__AVR_ATmega64C1__",        0, 0, 0x0100, 1, "m64c1")
+AVR_MCU ("atmega64m1",           ARCH_AVR5, "__AVR_ATmega64M1__",        0, 0, 0x0100, 1, "m64m1")
+AVR_MCU ("atmega64hve",          ARCH_AVR5, "__AVR_ATmega64HVE__",       0, 0, 0x0100, 1, "m64hve")
+AVR_MCU ("atmega64rfa2",         ARCH_AVR5, "__AVR_ATmega64RFA2__",      0, 0, 0x0200, 1, "m64rfa2")
+AVR_MCU ("atmega64rfr2",         ARCH_AVR5, "__AVR_ATmega64RFR2__",      0, 0, 0x0200, 1, "m64rfr2")
+AVR_MCU ("atmega32hvbrevb",      ARCH_AVR5, "__AVR_ATmega32HVBREVB__",   0, 0, 0x0100, 1, "m32hvbrevb")
+AVR_MCU ("atmega48hvf",          ARCH_AVR5, "__AVR_ATmega48HVF__",       0, 0, 0x0100, 1, "m48hvf")
+AVR_MCU ("at90can32",            ARCH_AVR5, "__AVR_AT90CAN32__",         0, 0, 0x0100, 1, "can32")
+AVR_MCU ("at90can64",            ARCH_AVR5, "__AVR_AT90CAN64__",         0, 0, 0x0100, 1, "can64")
+AVR_MCU ("at90pwm161",           ARCH_AVR5, "__AVR_AT90PWM161__",        0, 0, 0x0100, 1, "90pwm161")
+AVR_MCU ("at90pwm216",           ARCH_AVR5, "__AVR_AT90PWM216__",        0, 0, 0x0100, 1, "90pwm216")
+AVR_MCU ("at90pwm316",           ARCH_AVR5, "__AVR_AT90PWM316__",        0, 0, 0x0100, 1, "90pwm316")
+AVR_MCU ("at90scr100",           ARCH_AVR5, "__AVR_AT90SCR100__",        0, 0, 0x0100, 1, "90scr100")
+AVR_MCU ("at90usb646",           ARCH_AVR5, "__AVR_AT90USB646__",        0, 0, 0x0100, 1, "usb646")
+AVR_MCU ("at90usb647",           ARCH_AVR5, "__AVR_AT90USB647__",        0, 0, 0x0100, 1, "usb647")
+AVR_MCU ("at94k",                ARCH_AVR5, "__AVR_AT94K__",             0, 0, 0x0060, 1, "at94k")
+AVR_MCU ("m3000",                ARCH_AVR5, "__AVR_M3000__",             0, 0, 0x1000, 1, "m3000")
+/* Enhanced, == 128K.  */
+AVR_MCU ("avr51",                ARCH_AVR51, NULL,                       0, 0, 0x0100, 2, "m128")
+AVR_MCU ("atmega128",            ARCH_AVR51, "__AVR_ATmega128__",        0, 0, 0x0100, 2, "m128")
+AVR_MCU ("atmega128a",           ARCH_AVR51, "__AVR_ATmega128A__",       0, 0, 0x0100, 2, "m128a")
+AVR_MCU ("atmega1280",           ARCH_AVR51, "__AVR_ATmega1280__",       0, 0, 0x0200, 2, "m1280")
+AVR_MCU ("atmega1281",           ARCH_AVR51, "__AVR_ATmega1281__",       0, 0, 0x0200, 2, "m1281")
+AVR_MCU ("atmega1284",           ARCH_AVR51, "__AVR_ATmega1284__",       0, 0, 0x0100, 2, "m1284")
+AVR_MCU ("atmega1284p",          ARCH_AVR51, "__AVR_ATmega1284P__",      0, 0, 0x0100, 2, "m1284p")
+AVR_MCU ("atmega128rfa1",        ARCH_AVR51, "__AVR_ATmega128RFA1__",    0, 0, 0x0200, 2, "m128rfa1")
+AVR_MCU ("at90can128",           ARCH_AVR51, "__AVR_AT90CAN128__",       0, 0, 0x0100, 2, "can128")
+AVR_MCU ("at90usb1286",          ARCH_AVR51, "__AVR_AT90USB1286__",      0, 0, 0x0100, 2, "usb1286")
+AVR_MCU ("at90usb1287",          ARCH_AVR51, "__AVR_AT90USB1287__",      0, 0, 0x0100, 2, "usb1287")
+/* 3-Byte PC.  */
+AVR_MCU ("avr6",                 ARCH_AVR6, NULL,                        0, 0, 0x0200, 4, "m2561")
+AVR_MCU ("atmega2560",           ARCH_AVR6, "__AVR_ATmega2560__",        0, 0, 0x0200, 4, "m2560")
+AVR_MCU ("atmega2561",           ARCH_AVR6, "__AVR_ATmega2561__",        0, 0, 0x0200, 4, "m2561")
+/* Xmega, 16K <= Flash < 64K, RAM <= 64K */
+AVR_MCU ("avrxmega2",        ARCH_AVRXMEGA2, NULL,                       0, 0, 0x2000, 1, "x32a4")
+AVR_MCU ("atxmega16a4",      ARCH_AVRXMEGA2, "__AVR_ATxmega16A4__",      0, 0, 0x2000, 1, "x16a4")
+AVR_MCU ("atxmega16d4",      ARCH_AVRXMEGA2, "__AVR_ATxmega16D4__",      0, 0, 0x2000, 1, "x16d4")
+AVR_MCU ("atxmega32a4",      ARCH_AVRXMEGA2, "__AVR_ATxmega32A4__",      0, 0, 0x2000, 1, "x32a4")
+AVR_MCU ("atxmega32d4",      ARCH_AVRXMEGA2, "__AVR_ATxmega32D4__",      0, 0, 0x2000, 1, "x32d4")
+AVR_MCU ("atxmega32x1",      ARCH_AVRXMEGA2, "__AVR_ATxmega32X1__",      0, 0, 0x2000, 1, "x32x1")
+AVR_MCU ("atmxt112sl",       ARCH_AVRXMEGA2, "__AVR_ATMXT112SL__",       0, 0, 0x2000, 1, "mxt112sl")
+AVR_MCU ("atmxt224",         ARCH_AVRXMEGA2, "__AVR_ATMXT224__",         0, 0, 0x2000, 1, "mxt224")
+AVR_MCU ("atmxt224e",        ARCH_AVRXMEGA2, "__AVR_ATMXT224E__",        0, 0, 0x2000, 1, "mxt224e")
+AVR_MCU ("atmxt336s",        ARCH_AVRXMEGA2, "__AVR_ATMXT336S__",        0, 0, 0x2000, 1, "mxt336s")
+AVR_MCU ("atxmega16a4u",     ARCH_AVRXMEGA2, "__AVR_ATxmega16A4U__",     0, 0, 0x2000, 1, "x16a4u")
+AVR_MCU ("atxmega16c4",      ARCH_AVRXMEGA2, "__AVR_ATxmega16C4__",      0, 0, 0x2000, 1, "x16c4")
+AVR_MCU ("atxmega32a4u",     ARCH_AVRXMEGA2, "__AVR_ATxmega32A4U__",     0, 0, 0x2000, 1, "x32a4u")
+AVR_MCU ("atxmega32c4",      ARCH_AVRXMEGA2, "__AVR_ATxmega32C4__",      0, 0, 0x2000, 1, "x32c4")
+AVR_MCU ("atxmega32e5",      ARCH_AVRXMEGA2, "__AVR_ATxmega32E5__",      0, 0, 0x2000, 1, "x32e5")
+/* Xmega, 64K < Flash <= 128K, RAM <= 64K */
+AVR_MCU ("avrxmega4",        ARCH_AVRXMEGA4, NULL,                       0, 0, 0x2000, 2, "x64a4")
+AVR_MCU ("atxmega64a3",      ARCH_AVRXMEGA4, "__AVR_ATxmega64A3__",      0, 0, 0x2000, 2, "x64a3")
+AVR_MCU ("atxmega64d3",      ARCH_AVRXMEGA4, "__AVR_ATxmega64D3__",      0, 0, 0x2000, 2, "x64d3")
+AVR_MCU ("atxmega64a3u",     ARCH_AVRXMEGA4, "__AVR_ATxmega64A3U__",     0, 0, 0x2000, 2, "x64a3u")
+AVR_MCU ("atxmega64a4u",     ARCH_AVRXMEGA4, "__AVR_ATxmega64A4U__",     0, 0, 0x2000, 2, "x64a4u")
+AVR_MCU ("atxmega64b1",      ARCH_AVRXMEGA4, "__AVR_ATxmega64B1__",      0, 0, 0x2000, 2, "x64b1")
+AVR_MCU ("atxmega64b3",      ARCH_AVRXMEGA4, "__AVR_ATxmega64B3__",      0, 0, 0x2000, 2, "x64b3")
+AVR_MCU ("atxmega64c3",      ARCH_AVRXMEGA4, "__AVR_ATxmega64C3__",      0, 0, 0x2000, 2, "x64c3")
+AVR_MCU ("atxmega64d4",      ARCH_AVRXMEGA4, "__AVR_ATxmega64D4__",      0, 0, 0x2000, 2, "x64d4")
+/* Xmega, 64K < Flash <= 128K, RAM > 64K */
+AVR_MCU ("avrxmega5",        ARCH_AVRXMEGA5, NULL,                       0, 0, 0x2000, 2, "x64a1")
+AVR_MCU ("atxmega64a1",      ARCH_AVRXMEGA5, "__AVR_ATxmega64A1__",      0, 0, 0x2000, 2, "x64a1")
+AVR_MCU ("atxmega64a1u",     ARCH_AVRXMEGA5, "__AVR_ATxmega64A1U__",     0, 0, 0x2000, 2, "x64a1u")
+/* Xmega, 128K < Flash, RAM <= 64K */
+AVR_MCU ("avrxmega6",        ARCH_AVRXMEGA6, NULL,                       0, 0, 0x2000, 6, "x128a3")
+AVR_MCU ("atxmega128a3",     ARCH_AVRXMEGA6, "__AVR_ATxmega128A3__",     0, 0, 0x2000, 3, "x128a3")
+AVR_MCU ("atxmega128d3",     ARCH_AVRXMEGA6, "__AVR_ATxmega128D3__",     0, 0, 0x2000, 3, "x128d3")
+AVR_MCU ("atxmega192a3",     ARCH_AVRXMEGA6, "__AVR_ATxmega192A3__",     0, 0, 0x2000, 4, "x192a3")
+AVR_MCU ("atxmega192d3",     ARCH_AVRXMEGA6, "__AVR_ATxmega192D3__",     0, 0, 0x2000, 4, "x192d3")
+AVR_MCU ("atxmega256a3",     ARCH_AVRXMEGA6, "__AVR_ATxmega256A3__",     0, 0, 0x2000, 5, "x256a3")
+AVR_MCU ("atxmega256a3b",    ARCH_AVRXMEGA6, "__AVR_ATxmega256A3B__",    0, 0, 0x2000, 5, "x256a3b")
+AVR_MCU ("atxmega256a3bu",   ARCH_AVRXMEGA6, "__AVR_ATxmega256A3BU__",   0, 0, 0x2000, 5, "x256a3bu")
+AVR_MCU ("atxmega256d3",     ARCH_AVRXMEGA6, "__AVR_ATxmega256D3__",     0, 0, 0x2000, 5, "x256d3")
+AVR_MCU ("atxmega128a3u",    ARCH_AVRXMEGA6, "__AVR_ATxmega128A3U__",    0, 0, 0x2000, 3, "x128a3u")
+AVR_MCU ("atxmega128b1",     ARCH_AVRXMEGA6, "__AVR_ATxmega128B1__",     0, 0, 0x2000, 3, "x128b1")
+AVR_MCU ("atxmega128b3",     ARCH_AVRXMEGA6, "__AVR_ATxmega128B3__",     0, 0, 0x2000, 3, "x128b3")
+AVR_MCU ("atxmega128c3",     ARCH_AVRXMEGA6, "__AVR_ATxmega128C3__",     0, 0, 0x2000, 3, "x128c3")
+AVR_MCU ("atxmega128d4",     ARCH_AVRXMEGA6, "__AVR_ATxmega128D4__",     0, 0, 0x2000, 3, "x128d4")
+AVR_MCU ("atmxt540s",        ARCH_AVRXMEGA6, "__AVR_ATMXT540S__",        0, 0, 0x2000, 2, "mxt540s")
+AVR_MCU ("atmxt540sreva",    ARCH_AVRXMEGA6, "__AVR_ATMXT540SREVA__",    0, 0, 0x2000, 2, "mxt540sreva")
+AVR_MCU ("atxmega192a3u",    ARCH_AVRXMEGA6, "__AVR_ATxmega192A3U__",    0, 0, 0x2000, 4, "x192a3u")
+AVR_MCU ("atxmega192c3",     ARCH_AVRXMEGA6, "__AVR_ATxmega192C3__",     0, 0, 0x2000, 4, "x192c3")
+AVR_MCU ("atxmega256a3u",    ARCH_AVRXMEGA6, "__AVR_ATxmega256A3U__",    0, 0, 0x2000, 5, "x256a3u")
+AVR_MCU ("atxmega256c3",     ARCH_AVRXMEGA6, "__AVR_ATxmega256C3__",     0, 0, 0x2000, 5, "x256c3")
+AVR_MCU ("atxmega384c3",     ARCH_AVRXMEGA6, "__AVR_ATxmega384C3__",     0, 0, 0x2000, 6, "x384c3")
+AVR_MCU ("atxmega384d3",     ARCH_AVRXMEGA6, "__AVR_ATxmega384D3__",     0, 0, 0x2000, 6, "x384d3")
+/* Xmega, 128K < Flash, RAM > 64K RAM.  */
+AVR_MCU ("avrxmega7",        ARCH_AVRXMEGA7, NULL,                       0, 0, 0x2000, 3, "x128a1")
+AVR_MCU ("atxmega128a1",     ARCH_AVRXMEGA7, "__AVR_ATxmega128A1__",     0, 0, 0x2000, 3, "x128a1")
+AVR_MCU ("atxmega128a1u",    ARCH_AVRXMEGA7, "__AVR_ATxmega128A1U__",    0, 0, 0x2000, 3, "x128a1u")
+AVR_MCU ("atxmega128a4u",    ARCH_AVRXMEGA7, "__AVR_ATxmega128A4U__",    0, 0, 0x2000, 3, "x128a4u")
+/* Assembler only.  */
+AVR_MCU ("avr1",                 ARCH_AVR1, NULL,                        0, 0, 0x0060, 1, "s1200")
+AVR_MCU ("at90s1200",            ARCH_AVR1, "__AVR_AT90S1200__",         0, 0, 0x0060, 1, "s1200")
+AVR_MCU ("attiny11",             ARCH_AVR1, "__AVR_ATtiny11__",          0, 0, 0x0060, 1, "tn11")
+AVR_MCU ("attiny12",             ARCH_AVR1, "__AVR_ATtiny12__",          0, 0, 0x0060, 1, "tn12")
+AVR_MCU ("attiny15",             ARCH_AVR1, "__AVR_ATtiny15__",          0, 0, 0x0060, 1, "tn15")
+AVR_MCU ("attiny28",             ARCH_AVR1, "__AVR_ATtiny28__",          0, 0, 0x0060, 1, "tn28")
diff --git a/gcc-4.9/gcc/config/avr/avr-modes.def b/gcc-4.9/gcc/config/avr/avr-modes.def
new file mode 100644
index 000000000..7d380b068
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-modes.def
@@ -0,0 +1,33 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+FRACTIONAL_INT_MODE (PSI, 24, 3);
+
+/* Make TA and UTA 64 bits wide.
+   128 bit wide modes would be insane on a 8-bit machine.
+   This needs special treatment in avr.c and avr-lib.h.  */
+
+ADJUST_BYTESIZE  (TA, 8);
+ADJUST_ALIGNMENT (TA, 1);
+ADJUST_IBIT (TA, 16);
+ADJUST_FBIT (TA, 47);
+
+ADJUST_BYTESIZE  (UTA, 8);
+ADJUST_ALIGNMENT (UTA, 1);
+ADJUST_IBIT (UTA, 16);
+ADJUST_FBIT (UTA, 48);
diff --git a/gcc-4.9/gcc/config/avr/avr-protos.h b/gcc-4.9/gcc/config/avr/avr-protos.h
new file mode 100644
index 000000000..c5ce78429
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-protos.h
@@ -0,0 +1,164 @@
+/* Prototypes for exported functions defined in avr.c
+   
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+extern int avr_function_arg_regno_p (int r);
+extern void avr_cpu_cpp_builtins (struct cpp_reader * pfile);
+extern enum reg_class avr_regno_reg_class (int r);
+extern void asm_globalize_label (FILE *file, const char *name);
+extern void avr_adjust_reg_alloc_order (void);
+extern int avr_initial_elimination_offset (int from, int to);
+extern int avr_simple_epilogue (void);
+extern int avr_hard_regno_rename_ok (unsigned int, unsigned int);
+extern rtx avr_return_addr_rtx (int count, rtx tem);
+extern void avr_register_target_pragmas (void);
+extern void avr_init_expanders (void);
+
+#ifdef TREE_CODE
+extern void avr_asm_output_aligned_decl_common (FILE*, const_tree, const char*, unsigned HOST_WIDE_INT, unsigned int, bool);
+extern void asm_output_external (FILE *file, tree decl, char *name);
+extern int avr_progmem_p (tree decl, tree attributes);
+
+#ifdef RTX_CODE /* inside TREE_CODE */
+extern void avr_init_cumulative_args (CUMULATIVE_ARGS*, tree, rtx, tree);
+#endif /* RTX_CODE inside TREE_CODE */
+
+#endif /* TREE_CODE */
+
+#ifdef RTX_CODE
+extern int avr_hard_regno_call_part_clobbered (unsigned, enum machine_mode);
+extern const char *output_movqi (rtx insn, rtx operands[], int *l);
+extern const char *output_movhi (rtx insn, rtx operands[], int *l);
+extern const char *output_movsisf (rtx insn, rtx operands[], int *l);
+extern const char *avr_out_tstsi (rtx, rtx*, int*);
+extern const char *avr_out_tsthi (rtx, rtx*, int*);
+extern const char *avr_out_tstpsi (rtx, rtx*, int*);
+extern const char *avr_out_compare (rtx, rtx*, int*);
+extern const char *avr_out_compare64 (rtx, rtx*, int*);
+extern const char *ret_cond_branch (rtx x, int len, int reverse);
+extern const char *avr_out_movpsi (rtx, rtx*, int*);
+
+extern const char *ashlqi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashlhi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashlsi3_out (rtx insn, rtx operands[], int *len);
+
+extern const char *ashrqi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashrhi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashrsi3_out (rtx insn, rtx operands[], int *len);
+
+extern const char *lshrqi3_out (rtx insn, rtx operands[], int *len);
+extern const char *lshrhi3_out (rtx insn, rtx operands[], int *len);
+extern const char *lshrsi3_out (rtx insn, rtx operands[], int *len);
+
+extern const char *avr_out_ashlpsi3 (rtx, rtx*, int*);
+extern const char *avr_out_ashrpsi3 (rtx, rtx*, int*);
+extern const char *avr_out_lshrpsi3 (rtx, rtx*, int*);
+
+extern bool avr_rotate_bytes (rtx operands[]);
+
+extern const char* avr_out_fract (rtx, rtx[], bool, int*);
+extern rtx avr_to_int_mode (rtx);
+
+extern void avr_expand_prologue (void);
+extern void avr_expand_epilogue (bool);
+extern bool avr_emit_movmemhi (rtx*);
+extern int avr_epilogue_uses (int regno);
+extern int avr_starting_frame_offset (void);
+
+extern void avr_output_addr_vec_elt (FILE *stream, int value);
+extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
+extern const char* avr_out_bitop (rtx, rtx*, int*);
+extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true);
+extern const char* avr_out_round (rtx, rtx*, int* =NULL);
+extern const char* avr_out_addto_sp (rtx*, int*);
+extern const char* avr_out_xload (rtx, rtx*, int*);
+extern const char* avr_out_movmem (rtx, rtx*, int*);
+extern const char* avr_out_insert_bits (rtx*, int*);
+extern bool avr_popcount_each_byte (rtx, int, int);
+extern bool avr_has_nibble_0xf (rtx);
+
+extern int extra_constraint_Q (rtx x);
+extern int avr_adjust_insn_length (rtx insn, int len);
+extern const char* output_reload_inhi (rtx*, rtx, int*);
+extern const char* output_reload_insisf (rtx*, rtx, int*);
+extern const char* avr_out_reload_inpsi (rtx*, rtx, int*);
+extern const char* avr_out_lpm (rtx, rtx*, int*);
+extern void avr_notice_update_cc (rtx body, rtx insn);
+extern int reg_unused_after (rtx insn, rtx reg);
+extern int _reg_unused_after (rtx insn, rtx reg);
+extern int avr_jump_mode (rtx x, rtx insn);
+extern int test_hard_reg_class (enum reg_class rclass, rtx x);
+extern int jump_over_one_insn_p (rtx insn, rtx dest);
+
+extern int avr_hard_regno_mode_ok (int regno, enum machine_mode mode);
+extern void avr_final_prescan_insn (rtx insn, rtx *operand, int num_operands);
+extern int avr_simplify_comparison_p (enum machine_mode mode,
+				      RTX_CODE op, rtx x);
+extern RTX_CODE avr_normalize_condition (RTX_CODE condition);
+extern void out_shift_with_cnt (const char *templ, rtx insn,
+				rtx operands[], int *len, int t_len);
+extern enum reg_class avr_mode_code_base_reg_class (enum machine_mode, addr_space_t, RTX_CODE, RTX_CODE);
+extern bool avr_regno_mode_code_ok_for_base_p (int, enum machine_mode, addr_space_t, RTX_CODE, RTX_CODE);
+extern rtx avr_incoming_return_addr_rtx (void);
+extern rtx avr_legitimize_reload_address (rtx*, enum machine_mode, int, int, int, int, rtx (*)(rtx,int));
+extern bool avr_mem_flash_p (rtx);
+extern bool avr_mem_memx_p (rtx);
+extern bool avr_load_libgcc_p (rtx);
+extern bool avr_xload_libgcc_p (enum machine_mode);
+
+extern rtx lpm_reg_rtx;
+extern rtx lpm_addr_reg_rtx;
+extern rtx tmp_reg_rtx;
+extern rtx zero_reg_rtx;
+extern rtx all_regs_rtx[32];
+extern rtx rampz_rtx;
+
+#endif /* RTX_CODE */
+
+#ifdef REAL_VALUE_TYPE
+extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n);
+#endif
+
+extern bool avr_have_dimode;
+
+/* From avr-log.c */
+
+#define avr_edump (avr_log_set_caller_e (__FUNCTION__))
+#define avr_fdump (avr_log_set_caller_f (__FUNCTION__))
+
+extern int (*avr_log_set_caller_e (const char*))(const char*, ...);
+extern int (*avr_log_set_caller_f (const char*))(FILE*, const char*, ...);
+
+extern void avr_log_set_avr_log (void);
+
+typedef struct
+{
+  unsigned address_cost :1;
+  unsigned builtin :1;
+  unsigned constraints :1;
+  unsigned legitimate_address_p :1;
+  unsigned legitimize_address :1;
+  unsigned legitimize_reload_address :1;
+  unsigned progmem :1;
+  unsigned rtx_costs :1;
+} avr_log_t;
+
+extern avr_log_t avr_log;
diff --git a/gcc-4.9/gcc/config/avr/avr-stdint.h b/gcc-4.9/gcc/config/avr/avr-stdint.h
new file mode 100644
index 000000000..3ecc26895
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-stdint.h
@@ -0,0 +1,66 @@
+/* Definitions for <stdint.h> types on systems using newlib.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+   The intention of this file is to supply definitions that work with
+   avr-gcc's -mint8 that sets int to an 8-bit type.
+
+   This file is intended to yield the same results as newlib-stdint.h,
+   but there are some differences to newlib-stdint.h:
+
+   - AVR is an 8-bit architecture that cannot access 16-bit values
+     atomically, this SIG_ATOMIC_TYPE is "char".
+
+   - For the same reason, [u]int_fast8_t is defined as 8-bit type.
+
+*/
+
+#define SIG_ATOMIC_TYPE "char"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "int" : "long int")
+#define INT32_TYPE (INT_TYPE_SIZE == 16 ? "long int" : "long long int")
+#define INT64_TYPE (INT_TYPE_SIZE == 16 ? "long long int" : 0)
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : "long unsigned int")
+#define UINT32_TYPE (INT_TYPE_SIZE == 16 ? "long unsigned int" : "long long unsigned int")
+#define UINT64_TYPE (INT_TYPE_SIZE == 16 ? "long long unsigned int" : 0)
+
+#define INT_LEAST8_TYPE INT8_TYPE
+#define INT_LEAST16_TYPE INT16_TYPE
+#define INT_LEAST32_TYPE INT32_TYPE
+#define INT_LEAST64_TYPE INT64_TYPE
+#define UINT_LEAST8_TYPE UINT8_TYPE
+#define UINT_LEAST16_TYPE UINT16_TYPE
+#define UINT_LEAST32_TYPE UINT32_TYPE
+#define UINT_LEAST64_TYPE UINT64_TYPE
+
+#define INT_FAST8_TYPE INT8_TYPE
+#define INT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "int" : INT16_TYPE)
+#define INT_FAST32_TYPE INT32_TYPE
+#define INT_FAST64_TYPE INT64_TYPE
+#define UINT_FAST8_TYPE UINT8_TYPE
+#define UINT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : UINT16_TYPE)
+#define UINT_FAST32_TYPE UINT32_TYPE
+#define UINT_FAST64_TYPE UINT64_TYPE
+
+#define INTPTR_TYPE PTRDIFF_TYPE
+#ifndef UINTPTR_TYPE
+#define UINTPTR_TYPE SIZE_TYPE
+#endif
diff --git a/gcc-4.9/gcc/config/avr/avr-tables.opt b/gcc-4.9/gcc/config/avr/avr-tables.opt
new file mode 100644
index 000000000..b5c6d8290
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr-tables.opt
@@ -0,0 +1,766 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from avr-mcus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(avr_mcu) Type(int)
+Known MCU names:
+
+EnumValue
+Enum(avr_mcu) String(avr2) Value(0)
+
+EnumValue
+Enum(avr_mcu) String(at90s2313) Value(1)
+
+EnumValue
+Enum(avr_mcu) String(at90s2323) Value(2)
+
+EnumValue
+Enum(avr_mcu) String(at90s2333) Value(3)
+
+EnumValue
+Enum(avr_mcu) String(at90s2343) Value(4)
+
+EnumValue
+Enum(avr_mcu) String(attiny22) Value(5)
+
+EnumValue
+Enum(avr_mcu) String(attiny26) Value(6)
+
+EnumValue
+Enum(avr_mcu) String(at90s4414) Value(7)
+
+EnumValue
+Enum(avr_mcu) String(at90s4433) Value(8)
+
+EnumValue
+Enum(avr_mcu) String(at90s4434) Value(9)
+
+EnumValue
+Enum(avr_mcu) String(at90s8515) Value(10)
+
+EnumValue
+Enum(avr_mcu) String(at90c8534) Value(11)
+
+EnumValue
+Enum(avr_mcu) String(at90s8535) Value(12)
+
+EnumValue
+Enum(avr_mcu) String(avr25) Value(13)
+
+EnumValue
+Enum(avr_mcu) String(ata6289) Value(14)
+
+EnumValue
+Enum(avr_mcu) String(ata5272) Value(15)
+
+EnumValue
+Enum(avr_mcu) String(attiny13) Value(16)
+
+EnumValue
+Enum(avr_mcu) String(attiny13a) Value(17)
+
+EnumValue
+Enum(avr_mcu) String(attiny2313) Value(18)
+
+EnumValue
+Enum(avr_mcu) String(attiny2313a) Value(19)
+
+EnumValue
+Enum(avr_mcu) String(attiny24) Value(20)
+
+EnumValue
+Enum(avr_mcu) String(attiny24a) Value(21)
+
+EnumValue
+Enum(avr_mcu) String(attiny4313) Value(22)
+
+EnumValue
+Enum(avr_mcu) String(attiny44) Value(23)
+
+EnumValue
+Enum(avr_mcu) String(attiny44a) Value(24)
+
+EnumValue
+Enum(avr_mcu) String(attiny84) Value(25)
+
+EnumValue
+Enum(avr_mcu) String(attiny84a) Value(26)
+
+EnumValue
+Enum(avr_mcu) String(attiny25) Value(27)
+
+EnumValue
+Enum(avr_mcu) String(attiny45) Value(28)
+
+EnumValue
+Enum(avr_mcu) String(attiny85) Value(29)
+
+EnumValue
+Enum(avr_mcu) String(attiny261) Value(30)
+
+EnumValue
+Enum(avr_mcu) String(attiny261a) Value(31)
+
+EnumValue
+Enum(avr_mcu) String(attiny461) Value(32)
+
+EnumValue
+Enum(avr_mcu) String(attiny461a) Value(33)
+
+EnumValue
+Enum(avr_mcu) String(attiny861) Value(34)
+
+EnumValue
+Enum(avr_mcu) String(attiny861a) Value(35)
+
+EnumValue
+Enum(avr_mcu) String(attiny43u) Value(36)
+
+EnumValue
+Enum(avr_mcu) String(attiny87) Value(37)
+
+EnumValue
+Enum(avr_mcu) String(attiny48) Value(38)
+
+EnumValue
+Enum(avr_mcu) String(attiny88) Value(39)
+
+EnumValue
+Enum(avr_mcu) String(at86rf401) Value(40)
+
+EnumValue
+Enum(avr_mcu) String(avr3) Value(41)
+
+EnumValue
+Enum(avr_mcu) String(at43usb355) Value(42)
+
+EnumValue
+Enum(avr_mcu) String(at76c711) Value(43)
+
+EnumValue
+Enum(avr_mcu) String(avr31) Value(44)
+
+EnumValue
+Enum(avr_mcu) String(atmega103) Value(45)
+
+EnumValue
+Enum(avr_mcu) String(at43usb320) Value(46)
+
+EnumValue
+Enum(avr_mcu) String(avr35) Value(47)
+
+EnumValue
+Enum(avr_mcu) String(ata5505) Value(48)
+
+EnumValue
+Enum(avr_mcu) String(at90usb82) Value(49)
+
+EnumValue
+Enum(avr_mcu) String(at90usb162) Value(50)
+
+EnumValue
+Enum(avr_mcu) String(atmega8u2) Value(51)
+
+EnumValue
+Enum(avr_mcu) String(atmega16u2) Value(52)
+
+EnumValue
+Enum(avr_mcu) String(atmega32u2) Value(53)
+
+EnumValue
+Enum(avr_mcu) String(attiny167) Value(54)
+
+EnumValue
+Enum(avr_mcu) String(attiny1634) Value(55)
+
+EnumValue
+Enum(avr_mcu) String(avr4) Value(56)
+
+EnumValue
+Enum(avr_mcu) String(ata6285) Value(57)
+
+EnumValue
+Enum(avr_mcu) String(ata6286) Value(58)
+
+EnumValue
+Enum(avr_mcu) String(atmega8) Value(59)
+
+EnumValue
+Enum(avr_mcu) String(atmega8a) Value(60)
+
+EnumValue
+Enum(avr_mcu) String(atmega48) Value(61)
+
+EnumValue
+Enum(avr_mcu) String(atmega48a) Value(62)
+
+EnumValue
+Enum(avr_mcu) String(atmega48p) Value(63)
+
+EnumValue
+Enum(avr_mcu) String(atmega48pa) Value(64)
+
+EnumValue
+Enum(avr_mcu) String(atmega88) Value(65)
+
+EnumValue
+Enum(avr_mcu) String(atmega88a) Value(66)
+
+EnumValue
+Enum(avr_mcu) String(atmega88p) Value(67)
+
+EnumValue
+Enum(avr_mcu) String(atmega88pa) Value(68)
+
+EnumValue
+Enum(avr_mcu) String(atmega8515) Value(69)
+
+EnumValue
+Enum(avr_mcu) String(atmega8535) Value(70)
+
+EnumValue
+Enum(avr_mcu) String(atmega8hva) Value(71)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm1) Value(72)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm2) Value(73)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm2b) Value(74)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm3) Value(75)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm3b) Value(76)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm81) Value(77)
+
+EnumValue
+Enum(avr_mcu) String(avr5) Value(78)
+
+EnumValue
+Enum(avr_mcu) String(ata5790) Value(79)
+
+EnumValue
+Enum(avr_mcu) String(ata5790n) Value(80)
+
+EnumValue
+Enum(avr_mcu) String(ata5795) Value(81)
+
+EnumValue
+Enum(avr_mcu) String(atmega16) Value(82)
+
+EnumValue
+Enum(avr_mcu) String(atmega16a) Value(83)
+
+EnumValue
+Enum(avr_mcu) String(atmega161) Value(84)
+
+EnumValue
+Enum(avr_mcu) String(atmega162) Value(85)
+
+EnumValue
+Enum(avr_mcu) String(atmega163) Value(86)
+
+EnumValue
+Enum(avr_mcu) String(atmega164a) Value(87)
+
+EnumValue
+Enum(avr_mcu) String(atmega164p) Value(88)
+
+EnumValue
+Enum(avr_mcu) String(atmega164pa) Value(89)
+
+EnumValue
+Enum(avr_mcu) String(atmega165) Value(90)
+
+EnumValue
+Enum(avr_mcu) String(atmega165a) Value(91)
+
+EnumValue
+Enum(avr_mcu) String(atmega165p) Value(92)
+
+EnumValue
+Enum(avr_mcu) String(atmega165pa) Value(93)
+
+EnumValue
+Enum(avr_mcu) String(atmega168) Value(94)
+
+EnumValue
+Enum(avr_mcu) String(atmega168a) Value(95)
+
+EnumValue
+Enum(avr_mcu) String(atmega168p) Value(96)
+
+EnumValue
+Enum(avr_mcu) String(atmega168pa) Value(97)
+
+EnumValue
+Enum(avr_mcu) String(atmega169) Value(98)
+
+EnumValue
+Enum(avr_mcu) String(atmega169a) Value(99)
+
+EnumValue
+Enum(avr_mcu) String(atmega169p) Value(100)
+
+EnumValue
+Enum(avr_mcu) String(atmega169pa) Value(101)
+
+EnumValue
+Enum(avr_mcu) String(atmega16hvb) Value(102)
+
+EnumValue
+Enum(avr_mcu) String(atmega16hvbrevb) Value(103)
+
+EnumValue
+Enum(avr_mcu) String(atmega16m1) Value(104)
+
+EnumValue
+Enum(avr_mcu) String(atmega16u4) Value(105)
+
+EnumValue
+Enum(avr_mcu) String(atmega26hvg) Value(106)
+
+EnumValue
+Enum(avr_mcu) String(atmega32a) Value(107)
+
+EnumValue
+Enum(avr_mcu) String(atmega32) Value(108)
+
+EnumValue
+Enum(avr_mcu) String(atmega323) Value(109)
+
+EnumValue
+Enum(avr_mcu) String(atmega324a) Value(110)
+
+EnumValue
+Enum(avr_mcu) String(atmega324p) Value(111)
+
+EnumValue
+Enum(avr_mcu) String(atmega324pa) Value(112)
+
+EnumValue
+Enum(avr_mcu) String(atmega325) Value(113)
+
+EnumValue
+Enum(avr_mcu) String(atmega325a) Value(114)
+
+EnumValue
+Enum(avr_mcu) String(atmega325p) Value(115)
+
+EnumValue
+Enum(avr_mcu) String(atmega3250) Value(116)
+
+EnumValue
+Enum(avr_mcu) String(atmega3250a) Value(117)
+
+EnumValue
+Enum(avr_mcu) String(atmega3250p) Value(118)
+
+EnumValue
+Enum(avr_mcu) String(atmega3250pa) Value(119)
+
+EnumValue
+Enum(avr_mcu) String(atmega328) Value(120)
+
+EnumValue
+Enum(avr_mcu) String(atmega328p) Value(121)
+
+EnumValue
+Enum(avr_mcu) String(atmega329) Value(122)
+
+EnumValue
+Enum(avr_mcu) String(atmega329a) Value(123)
+
+EnumValue
+Enum(avr_mcu) String(atmega329p) Value(124)
+
+EnumValue
+Enum(avr_mcu) String(atmega329pa) Value(125)
+
+EnumValue
+Enum(avr_mcu) String(atmega3290) Value(126)
+
+EnumValue
+Enum(avr_mcu) String(atmega3290a) Value(127)
+
+EnumValue
+Enum(avr_mcu) String(atmega3290p) Value(128)
+
+EnumValue
+Enum(avr_mcu) String(atmega3290pa) Value(129)
+
+EnumValue
+Enum(avr_mcu) String(atmega32c1) Value(130)
+
+EnumValue
+Enum(avr_mcu) String(atmega32m1) Value(131)
+
+EnumValue
+Enum(avr_mcu) String(atmega32u4) Value(132)
+
+EnumValue
+Enum(avr_mcu) String(atmega32u6) Value(133)
+
+EnumValue
+Enum(avr_mcu) String(atmega406) Value(134)
+
+EnumValue
+Enum(avr_mcu) String(atmega64) Value(135)
+
+EnumValue
+Enum(avr_mcu) String(atmega64a) Value(136)
+
+EnumValue
+Enum(avr_mcu) String(atmega640) Value(137)
+
+EnumValue
+Enum(avr_mcu) String(atmega644) Value(138)
+
+EnumValue
+Enum(avr_mcu) String(atmega644a) Value(139)
+
+EnumValue
+Enum(avr_mcu) String(atmega644p) Value(140)
+
+EnumValue
+Enum(avr_mcu) String(atmega644pa) Value(141)
+
+EnumValue
+Enum(avr_mcu) String(atmega645) Value(142)
+
+EnumValue
+Enum(avr_mcu) String(atmega645a) Value(143)
+
+EnumValue
+Enum(avr_mcu) String(atmega645p) Value(144)
+
+EnumValue
+Enum(avr_mcu) String(atmega6450) Value(145)
+
+EnumValue
+Enum(avr_mcu) String(atmega6450a) Value(146)
+
+EnumValue
+Enum(avr_mcu) String(atmega6450p) Value(147)
+
+EnumValue
+Enum(avr_mcu) String(atmega649) Value(148)
+
+EnumValue
+Enum(avr_mcu) String(atmega649a) Value(149)
+
+EnumValue
+Enum(avr_mcu) String(atmega649p) Value(150)
+
+EnumValue
+Enum(avr_mcu) String(atmega6490) Value(151)
+
+EnumValue
+Enum(avr_mcu) String(atmega16hva) Value(152)
+
+EnumValue
+Enum(avr_mcu) String(atmega16hva2) Value(153)
+
+EnumValue
+Enum(avr_mcu) String(atmega32hvb) Value(154)
+
+EnumValue
+Enum(avr_mcu) String(atmega6490a) Value(155)
+
+EnumValue
+Enum(avr_mcu) String(atmega6490p) Value(156)
+
+EnumValue
+Enum(avr_mcu) String(atmega64c1) Value(157)
+
+EnumValue
+Enum(avr_mcu) String(atmega64m1) Value(158)
+
+EnumValue
+Enum(avr_mcu) String(atmega64hve) Value(159)
+
+EnumValue
+Enum(avr_mcu) String(atmega64rfa2) Value(160)
+
+EnumValue
+Enum(avr_mcu) String(atmega64rfr2) Value(161)
+
+EnumValue
+Enum(avr_mcu) String(atmega32hvbrevb) Value(162)
+
+EnumValue
+Enum(avr_mcu) String(atmega48hvf) Value(163)
+
+EnumValue
+Enum(avr_mcu) String(at90can32) Value(164)
+
+EnumValue
+Enum(avr_mcu) String(at90can64) Value(165)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm161) Value(166)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm216) Value(167)
+
+EnumValue
+Enum(avr_mcu) String(at90pwm316) Value(168)
+
+EnumValue
+Enum(avr_mcu) String(at90scr100) Value(169)
+
+EnumValue
+Enum(avr_mcu) String(at90usb646) Value(170)
+
+EnumValue
+Enum(avr_mcu) String(at90usb647) Value(171)
+
+EnumValue
+Enum(avr_mcu) String(at94k) Value(172)
+
+EnumValue
+Enum(avr_mcu) String(m3000) Value(173)
+
+EnumValue
+Enum(avr_mcu) String(avr51) Value(174)
+
+EnumValue
+Enum(avr_mcu) String(atmega128) Value(175)
+
+EnumValue
+Enum(avr_mcu) String(atmega128a) Value(176)
+
+EnumValue
+Enum(avr_mcu) String(atmega1280) Value(177)
+
+EnumValue
+Enum(avr_mcu) String(atmega1281) Value(178)
+
+EnumValue
+Enum(avr_mcu) String(atmega1284) Value(179)
+
+EnumValue
+Enum(avr_mcu) String(atmega1284p) Value(180)
+
+EnumValue
+Enum(avr_mcu) String(atmega128rfa1) Value(181)
+
+EnumValue
+Enum(avr_mcu) String(at90can128) Value(182)
+
+EnumValue
+Enum(avr_mcu) String(at90usb1286) Value(183)
+
+EnumValue
+Enum(avr_mcu) String(at90usb1287) Value(184)
+
+EnumValue
+Enum(avr_mcu) String(avr6) Value(185)
+
+EnumValue
+Enum(avr_mcu) String(atmega2560) Value(186)
+
+EnumValue
+Enum(avr_mcu) String(atmega2561) Value(187)
+
+EnumValue
+Enum(avr_mcu) String(avrxmega2) Value(188)
+
+EnumValue
+Enum(avr_mcu) String(atxmega16a4) Value(189)
+
+EnumValue
+Enum(avr_mcu) String(atxmega16d4) Value(190)
+
+EnumValue
+Enum(avr_mcu) String(atxmega32a4) Value(191)
+
+EnumValue
+Enum(avr_mcu) String(atxmega32d4) Value(192)
+
+EnumValue
+Enum(avr_mcu) String(atxmega32x1) Value(193)
+
+EnumValue
+Enum(avr_mcu) String(atmxt112sl) Value(194)
+
+EnumValue
+Enum(avr_mcu) String(atmxt224) Value(195)
+
+EnumValue
+Enum(avr_mcu) String(atmxt224e) Value(196)
+
+EnumValue
+Enum(avr_mcu) String(atmxt336s) Value(197)
+
+EnumValue
+Enum(avr_mcu) String(atxmega16a4u) Value(198)
+
+EnumValue
+Enum(avr_mcu) String(atxmega16c4) Value(199)
+
+EnumValue
+Enum(avr_mcu) String(atxmega32a4u) Value(200)
+
+EnumValue
+Enum(avr_mcu) String(atxmega32c4) Value(201)
+
+EnumValue
+Enum(avr_mcu) String(atxmega32e5) Value(202)
+
+EnumValue
+Enum(avr_mcu) String(avrxmega4) Value(203)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64a3) Value(204)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64d3) Value(205)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64a3u) Value(206)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64a4u) Value(207)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64b1) Value(208)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64b3) Value(209)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64c3) Value(210)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64d4) Value(211)
+
+EnumValue
+Enum(avr_mcu) String(avrxmega5) Value(212)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64a1) Value(213)
+
+EnumValue
+Enum(avr_mcu) String(atxmega64a1u) Value(214)
+
+EnumValue
+Enum(avr_mcu) String(avrxmega6) Value(215)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128a3) Value(216)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128d3) Value(217)
+
+EnumValue
+Enum(avr_mcu) String(atxmega192a3) Value(218)
+
+EnumValue
+Enum(avr_mcu) String(atxmega192d3) Value(219)
+
+EnumValue
+Enum(avr_mcu) String(atxmega256a3) Value(220)
+
+EnumValue
+Enum(avr_mcu) String(atxmega256a3b) Value(221)
+
+EnumValue
+Enum(avr_mcu) String(atxmega256a3bu) Value(222)
+
+EnumValue
+Enum(avr_mcu) String(atxmega256d3) Value(223)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128a3u) Value(224)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128b1) Value(225)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128b3) Value(226)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128c3) Value(227)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128d4) Value(228)
+
+EnumValue
+Enum(avr_mcu) String(atmxt540s) Value(229)
+
+EnumValue
+Enum(avr_mcu) String(atmxt540sreva) Value(230)
+
+EnumValue
+Enum(avr_mcu) String(atxmega192a3u) Value(231)
+
+EnumValue
+Enum(avr_mcu) String(atxmega192c3) Value(232)
+
+EnumValue
+Enum(avr_mcu) String(atxmega256a3u) Value(233)
+
+EnumValue
+Enum(avr_mcu) String(atxmega256c3) Value(234)
+
+EnumValue
+Enum(avr_mcu) String(atxmega384c3) Value(235)
+
+EnumValue
+Enum(avr_mcu) String(atxmega384d3) Value(236)
+
+EnumValue
+Enum(avr_mcu) String(avrxmega7) Value(237)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128a1) Value(238)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128a1u) Value(239)
+
+EnumValue
+Enum(avr_mcu) String(atxmega128a4u) Value(240)
+
+EnumValue
+Enum(avr_mcu) String(avr1) Value(241)
+
+EnumValue
+Enum(avr_mcu) String(at90s1200) Value(242)
+
+EnumValue
+Enum(avr_mcu) String(attiny11) Value(243)
+
+EnumValue
+Enum(avr_mcu) String(attiny12) Value(244)
+
+EnumValue
+Enum(avr_mcu) String(attiny15) Value(245)
+
+EnumValue
+Enum(avr_mcu) String(attiny28) Value(246)
+
diff --git a/gcc-4.9/gcc/config/avr/avr.c b/gcc-4.9/gcc/config/avr/avr.c
new file mode 100644
index 000000000..8ca7de0b3
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr.c
@@ -0,0 +1,12522 @@
+/* Subroutines for insn-output.c for ATMEL AVR micro controllers
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "flags.h"
+#include "reload.h"
+#include "tree.h"
+#include "print-tree.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "output.h"
+#include "expr.h"
+#include "c-family/c-common.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "function.h"
+#include "recog.h"
+#include "optabs.h"
+#include "ggc.h"
+#include "langhooks.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "params.h"
+#include "df.h"
+
+/* Maximal allowed offset for an address in the LD command */
+#define MAX_LD_OFFSET(MODE) (64 - (signed)GET_MODE_SIZE (MODE))
+
+/* Return true if STR starts with PREFIX and false, otherwise.  */
+#define STR_PREFIX_P(STR,PREFIX) (0 == strncmp (STR, PREFIX, strlen (PREFIX)))
+
+/* The 4 bits starting at SECTION_MACH_DEP are reserved to store the
+   address space where data is to be located.
+   As the only non-generic address spaces are all located in flash,
+   this can be used to test if data shall go into some .progmem* section.
+   This must be the rightmost field of machine dependent section flags.  */
+#define AVR_SECTION_PROGMEM (0xf * SECTION_MACH_DEP)
+
+/* Similar 4-bit region for SYMBOL_REF_FLAGS.  */
+#define AVR_SYMBOL_FLAG_PROGMEM (0xf * SYMBOL_FLAG_MACH_DEP)
+
+/* Similar 4-bit region in SYMBOL_REF_FLAGS:
+   Set address-space AS in SYMBOL_REF_FLAGS of SYM  */
+#define AVR_SYMBOL_SET_ADDR_SPACE(SYM,AS)                       \
+  do {                                                          \
+    SYMBOL_REF_FLAGS (sym) &= ~AVR_SYMBOL_FLAG_PROGMEM;         \
+    SYMBOL_REF_FLAGS (sym) |= (AS) * SYMBOL_FLAG_MACH_DEP;      \
+  } while (0)
+
+/* Read address-space from SYMBOL_REF_FLAGS of SYM  */
+#define AVR_SYMBOL_GET_ADDR_SPACE(SYM)                          \
+  ((SYMBOL_REF_FLAGS (sym) & AVR_SYMBOL_FLAG_PROGMEM)           \
+   / SYMBOL_FLAG_MACH_DEP)
+
+/* Known address spaces.  The order must be the same as in the respective
+   enum from avr.h (or designated initialized must be used).  */
+const avr_addrspace_t avr_addrspace[ADDR_SPACE_COUNT] =
+{
+  { ADDR_SPACE_RAM,  0, 2, "", 0, NULL },
+  { ADDR_SPACE_FLASH,  1, 2, "__flash",   0, ".progmem.data" },
+  { ADDR_SPACE_FLASH1, 1, 2, "__flash1",  1, ".progmem1.data" },
+  { ADDR_SPACE_FLASH2, 1, 2, "__flash2",  2, ".progmem2.data" },
+  { ADDR_SPACE_FLASH3, 1, 2, "__flash3",  3, ".progmem3.data" },
+  { ADDR_SPACE_FLASH4, 1, 2, "__flash4",  4, ".progmem4.data" },
+  { ADDR_SPACE_FLASH5, 1, 2, "__flash5",  5, ".progmem5.data" },
+  { ADDR_SPACE_MEMX, 1, 3, "__memx",  0, ".progmemx.data" },
+};
+
+
+/* Holding RAM addresses of some SFRs used by the compiler and that
+   are unique over all devices in an architecture like 'avr4'.  */
+
+typedef struct
+{
+  /* SREG: The processor status */
+  int sreg;
+
+  /* RAMPX, RAMPY, RAMPD and CCP of XMEGA */
+  int ccp;
+  int rampd;
+  int rampx;
+  int rampy;
+
+  /* RAMPZ: The high byte of 24-bit address used with ELPM */
+  int rampz;
+
+  /* SP: The stack pointer and its low and high byte */
+  int sp_l;
+  int sp_h;
+} avr_addr_t;
+
+static avr_addr_t avr_addr;
+
+
+/* Prototypes for local helper functions.  */
+
+static const char* out_movqi_r_mr (rtx, rtx[], int*);
+static const char* out_movhi_r_mr (rtx, rtx[], int*);
+static const char* out_movsi_r_mr (rtx, rtx[], int*);
+static const char* out_movqi_mr_r (rtx, rtx[], int*);
+static const char* out_movhi_mr_r (rtx, rtx[], int*);
+static const char* out_movsi_mr_r (rtx, rtx[], int*);
+
+static int get_sequence_length (rtx insns);
+static int sequent_regs_live (void);
+static const char *ptrreg_to_str (int);
+static const char *cond_string (enum rtx_code);
+static int avr_num_arg_regs (enum machine_mode, const_tree);
+static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code,
+                                 int, bool);
+static void output_reload_in_const (rtx*, rtx, int*, bool);
+static struct machine_function * avr_init_machine_status (void);
+
+
+/* Prototypes for hook implementors if needed before their implementation.  */
+
+static bool avr_rtx_costs (rtx, int, int, int, int*, bool);
+
+
+/* Allocate registers from r25 to r8 for parameters for function calls.  */
+#define FIRST_CUM_REG 26
+
+/* Implicit target register of LPM instruction (R0) */
+extern GTY(()) rtx lpm_reg_rtx;
+rtx lpm_reg_rtx;
+
+/* (Implicit) address register of LPM instruction (R31:R30 = Z) */
+extern GTY(()) rtx lpm_addr_reg_rtx;
+rtx lpm_addr_reg_rtx;
+
+/* Temporary register RTX (reg:QI TMP_REGNO) */
+extern GTY(()) rtx tmp_reg_rtx;
+rtx tmp_reg_rtx;
+
+/* Zeroed register RTX (reg:QI ZERO_REGNO) */
+extern GTY(()) rtx zero_reg_rtx;
+rtx zero_reg_rtx;
+
+/* RTXs for all general purpose registers as QImode */
+extern GTY(()) rtx all_regs_rtx[32];
+rtx all_regs_rtx[32];
+
+/* SREG, the processor status */
+extern GTY(()) rtx sreg_rtx;
+rtx sreg_rtx;
+
+/* RAMP* special function registers */
+extern GTY(()) rtx rampd_rtx;
+extern GTY(()) rtx rampx_rtx;
+extern GTY(()) rtx rampy_rtx;
+extern GTY(()) rtx rampz_rtx;
+rtx rampd_rtx;
+rtx rampx_rtx;
+rtx rampy_rtx;
+rtx rampz_rtx;
+
+/* RTX containing the strings "" and "e", respectively */
+static GTY(()) rtx xstring_empty;
+static GTY(()) rtx xstring_e;
+
+/* Current architecture.  */
+const avr_arch_t *avr_current_arch;
+
+/* Current device.  */
+const avr_mcu_t *avr_current_device;
+
+/* Section to put switch tables in.  */
+static GTY(()) section *progmem_swtable_section;
+
+/* Unnamed sections associated to __attribute__((progmem)) aka. PROGMEM
+   or to address space __flash* or __memx.  Only used as singletons inside
+   avr_asm_select_section, but it must not be local there because of GTY.  */
+static GTY(()) section *progmem_section[ADDR_SPACE_COUNT];
+
+/* Condition for insns/expanders from avr-dimode.md.  */
+bool avr_have_dimode = true;
+
+/* To track if code will use .bss and/or .data.  */
+bool avr_need_clear_bss_p = false;
+bool avr_need_copy_data_p = false;
+
+
+/* Transform UP into lowercase and write the result to LO.
+   You must provide enough space for LO.  Return LO.  */
+
+static char*
+avr_tolower (char *lo, const char *up)
+{
+  char *lo0 = lo;
+
+  for (; *up; up++, lo++)
+    *lo = TOLOWER (*up);
+
+  *lo = '\0';
+
+  return lo0;
+}
+
+
+/* Custom function to count number of set bits.  */
+
+static inline int
+avr_popcount (unsigned int val)
+{
+  int pop = 0;
+
+  while (val)
+    {
+      val &= val-1;
+      pop++;
+    }
+
+  return pop;
+}
+
+
+/* Constraint helper function.  XVAL is a CONST_INT or a CONST_DOUBLE.
+   Return true if the least significant N_BYTES bytes of XVAL all have a
+   popcount in POP_MASK and false, otherwise.  POP_MASK represents a subset
+   of integers which contains an integer N iff bit N of POP_MASK is set.  */
+
+bool
+avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask)
+{
+  int i;
+
+  enum machine_mode mode = GET_MODE (xval);
+
+  if (VOIDmode == mode)
+    mode = SImode;
+
+  for (i = 0; i < n_bytes; i++)
+    {
+      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
+      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+
+      if (0 == (pop_mask & (1 << avr_popcount (val8))))
+        return false;
+    }
+
+  return true;
+}
+
+
+/* Access some RTX as INT_MODE.  If X is a CONST_FIXED we can get
+   the bit representation of X by "casting" it to CONST_INT.  */
+
+rtx
+avr_to_int_mode (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  return VOIDmode == mode
+    ? x
+    : simplify_gen_subreg (int_mode_for_mode (mode), x, mode, 0);
+}
+
+
+/* Implement `TARGET_OPTION_OVERRIDE'.  */
+
+static void
+avr_option_override (void)
+{
+  flag_delete_null_pointer_checks = 0;
+
+  /* caller-save.c looks for call-clobbered hard registers that are assigned
+     to pseudos that cross calls and tries so save-restore them around calls
+     in order to reduce the number of stack slots needed.
+
+     This might lead to situations where reload is no more able to cope
+     with the challenge of AVR's very few address registers and fails to
+     perform the requested spills.  */
+
+  if (avr_strict_X)
+    flag_caller_saves = 0;
+
+  /* Unwind tables currently require a frame pointer for correctness,
+     see toplev.c:process_options().  */
+
+  if ((flag_unwind_tables
+       || flag_non_call_exceptions
+       || flag_asynchronous_unwind_tables)
+      && !ACCUMULATE_OUTGOING_ARGS)
+    {
+      flag_omit_frame_pointer = 0;
+    }
+
+  if (flag_pic == 1)
+    warning (OPT_fpic, "-fpic is not supported");
+  if (flag_pic == 2)
+    warning (OPT_fPIC, "-fPIC is not supported");
+  if (flag_pie == 1)
+    warning (OPT_fpie, "-fpie is not supported");
+  if (flag_pie == 2)
+    warning (OPT_fPIE, "-fPIE is not supported");
+
+  avr_current_device = &avr_mcu_types[avr_mcu_index];
+  avr_current_arch = &avr_arch_types[avr_current_device->arch];
+
+  /* RAM addresses of some SFRs common to all devices in respective arch. */
+
+  /* SREG: Status Register containing flags like I (global IRQ) */
+  avr_addr.sreg = 0x3F + avr_current_arch->sfr_offset;
+
+  /* RAMPZ: Address' high part when loading via ELPM */
+  avr_addr.rampz = 0x3B + avr_current_arch->sfr_offset;
+
+  avr_addr.rampy = 0x3A + avr_current_arch->sfr_offset;
+  avr_addr.rampx = 0x39 + avr_current_arch->sfr_offset;
+  avr_addr.rampd = 0x38 + avr_current_arch->sfr_offset;
+  avr_addr.ccp = 0x34 + avr_current_arch->sfr_offset;
+
+  /* SP: Stack Pointer (SP_H:SP_L) */
+  avr_addr.sp_l = 0x3D + avr_current_arch->sfr_offset;
+  avr_addr.sp_h = avr_addr.sp_l + 1;
+
+  init_machine_status = avr_init_machine_status;
+
+  avr_log_set_avr_log();
+}
+
+/* Function to set up the backend function structure.  */
+
+static struct machine_function *
+avr_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Implement `INIT_EXPANDERS'.  */
+/* The function works like a singleton.  */
+
+void
+avr_init_expanders (void)
+{
+  int regno;
+
+  for (regno = 0; regno < 32; regno ++)
+    all_regs_rtx[regno] = gen_rtx_REG (QImode, regno);
+
+  lpm_reg_rtx  = all_regs_rtx[LPM_REGNO];
+  tmp_reg_rtx  = all_regs_rtx[TMP_REGNO];
+  zero_reg_rtx = all_regs_rtx[ZERO_REGNO];
+
+  lpm_addr_reg_rtx = gen_rtx_REG (HImode, REG_Z);
+
+  sreg_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.sreg));
+  rampd_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampd));
+  rampx_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampx));
+  rampy_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampy));
+  rampz_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampz));
+
+  xstring_empty = gen_rtx_CONST_STRING (VOIDmode, "");
+  xstring_e = gen_rtx_CONST_STRING (VOIDmode, "e");
+}
+
+
+/* Implement `REGNO_REG_CLASS'.  */
+/* Return register class for register R.  */
+
+enum reg_class
+avr_regno_reg_class (int r)
+{
+  static const enum reg_class reg_class_tab[] =
+    {
+      R0_REG,
+      /* r1 - r15 */
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      /* r16 - r23 */
+      SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS,
+      SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS,
+      /* r24, r25 */
+      ADDW_REGS, ADDW_REGS,
+      /* X: r26, 27 */
+      POINTER_X_REGS, POINTER_X_REGS,
+      /* Y: r28, r29 */
+      POINTER_Y_REGS, POINTER_Y_REGS,
+      /* Z: r30, r31 */
+      POINTER_Z_REGS, POINTER_Z_REGS,
+      /* SP: SPL, SPH */
+      STACK_REG, STACK_REG
+    };
+
+  if (r <= 33)
+    return reg_class_tab[r];
+
+  return ALL_REGS;
+}
+
+
+/* Implement `TARGET_SCALAR_MODE_SUPPORTED_P'.  */
+
+static bool
+avr_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (ALL_FIXED_POINT_MODE_P (mode))
+    return true;
+
+  if (PSImode == mode)
+    return true;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+
+/* Return TRUE if DECL is a VAR_DECL located in flash and FALSE, otherwise.  */
+
+static bool
+avr_decl_flash_p (tree decl)
+{
+  if (TREE_CODE (decl) != VAR_DECL
+      || TREE_TYPE (decl) == error_mark_node)
+    {
+      return false;
+    }
+
+  return !ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (decl)));
+}
+
+
+/* Return TRUE if DECL is a VAR_DECL located in the 24-bit flash
+   address space and FALSE, otherwise.  */
+
+static bool
+avr_decl_memx_p (tree decl)
+{
+  if (TREE_CODE (decl) != VAR_DECL
+      || TREE_TYPE (decl) == error_mark_node)
+    {
+      return false;
+    }
+
+  return (ADDR_SPACE_MEMX == TYPE_ADDR_SPACE (TREE_TYPE (decl)));
+}
+
+
+/* Return TRUE if X is a MEM rtx located in flash and FALSE, otherwise.  */
+
+bool
+avr_mem_flash_p (rtx x)
+{
+  return (MEM_P (x)
+          && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)));
+}
+
+
+/* Return TRUE if X is a MEM rtx located in the 24-bit flash
+   address space and FALSE, otherwise.  */
+
+bool
+avr_mem_memx_p (rtx x)
+{
+  return (MEM_P (x)
+          && ADDR_SPACE_MEMX == MEM_ADDR_SPACE (x));
+}
+
+
+/* A helper for the subsequent function attribute used to dig for
+   attribute 'name' in a FUNCTION_DECL or FUNCTION_TYPE */
+
+static inline int
+avr_lookup_function_attribute1 (const_tree func, const char *name)
+{
+  if (FUNCTION_DECL == TREE_CODE (func))
+    {
+      if (NULL_TREE != lookup_attribute (name, DECL_ATTRIBUTES (func)))
+        {
+          return true;
+        }
+
+      func = TREE_TYPE (func);
+    }
+
+  gcc_assert (TREE_CODE (func) == FUNCTION_TYPE
+              || TREE_CODE (func) == METHOD_TYPE);
+
+  return NULL_TREE != lookup_attribute (name, TYPE_ATTRIBUTES (func));
+}
+
+/* Return nonzero if FUNC is a naked function.  */
+
+static int
+avr_naked_function_p (tree func)
+{
+  return avr_lookup_function_attribute1 (func, "naked");
+}
+
+/* Return nonzero if FUNC is an interrupt function as specified
+   by the "interrupt" attribute.  */
+
+static int
+avr_interrupt_function_p (tree func)
+{
+  return avr_lookup_function_attribute1 (func, "interrupt");
+}
+
+/* Return nonzero if FUNC is a signal function as specified
+   by the "signal" attribute.  */
+
+static int
+avr_signal_function_p (tree func)
+{
+  return avr_lookup_function_attribute1 (func, "signal");
+}
+
+/* Return nonzero if FUNC is an OS_task function.  */
+
+static int
+avr_OS_task_function_p (tree func)
+{
+  return avr_lookup_function_attribute1 (func, "OS_task");
+}
+
+/* Return nonzero if FUNC is an OS_main function.  */
+
+static int
+avr_OS_main_function_p (tree func)
+{
+  return avr_lookup_function_attribute1 (func, "OS_main");
+}
+
+
+/* Implement `TARGET_SET_CURRENT_FUNCTION'.  */
+/* Sanity cheching for above function attributes.  */
+
+static void
+avr_set_current_function (tree decl)
+{
+  location_t loc;
+  const char *isr;
+
+  if (decl == NULL_TREE
+      || current_function_decl == NULL_TREE
+      || current_function_decl == error_mark_node
+      || ! cfun->machine
+      || cfun->machine->attributes_checked_p)
+    return;
+
+  loc = DECL_SOURCE_LOCATION (decl);
+
+  cfun->machine->is_naked = avr_naked_function_p (decl);
+  cfun->machine->is_signal = avr_signal_function_p (decl);
+  cfun->machine->is_interrupt = avr_interrupt_function_p (decl);
+  cfun->machine->is_OS_task = avr_OS_task_function_p (decl);
+  cfun->machine->is_OS_main = avr_OS_main_function_p (decl);
+
+  isr = cfun->machine->is_interrupt ? "interrupt" : "signal";
+
+  /* Too much attributes make no sense as they request conflicting features. */
+
+  if (cfun->machine->is_OS_task + cfun->machine->is_OS_main
+      + (cfun->machine->is_signal || cfun->machine->is_interrupt) > 1)
+    error_at (loc, "function attributes %qs, %qs and %qs are mutually"
+               " exclusive", "OS_task", "OS_main", isr);
+
+  /* 'naked' will hide effects of 'OS_task' and 'OS_main'.  */
+
+  if (cfun->machine->is_naked
+      && (cfun->machine->is_OS_task || cfun->machine->is_OS_main))
+    warning_at (loc, OPT_Wattributes, "function attributes %qs and %qs have"
+                " no effect on %qs function", "OS_task", "OS_main", "naked");
+
+  if (cfun->machine->is_interrupt || cfun->machine->is_signal)
+    {
+      tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
+      tree ret = TREE_TYPE (TREE_TYPE (decl));
+      const char *name;
+
+      name = DECL_ASSEMBLER_NAME_SET_P (decl)
+        ? IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))
+        : IDENTIFIER_POINTER (DECL_NAME (decl));
+
+      /* Skip a leading '*' that might still prefix the assembler name,
+         e.g. in non-LTO runs.  */
+
+      name = default_strip_name_encoding (name);
+
+      /* Silently ignore 'signal' if 'interrupt' is present.  AVR-LibC startet
+         using this when it switched from SIGNAL and INTERRUPT to ISR.  */
+
+      if (cfun->machine->is_interrupt)
+        cfun->machine->is_signal = 0;
+
+      /* Interrupt handlers must be  void __vector (void)  functions.  */
+
+      if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
+        error_at (loc, "%qs function cannot have arguments", isr);
+
+      if (TREE_CODE (ret) != VOID_TYPE)
+        error_at (loc, "%qs function cannot return a value", isr);
+
+      /* If the function has the 'signal' or 'interrupt' attribute, ensure
+         that the name of the function is "__vector_NN" so as to catch
+         when the user misspells the vector name.  */
+
+      if (!STR_PREFIX_P (name, "__vector"))
+        warning_at (loc, 0, "%qs appears to be a misspelled %s handler",
+                    name, isr);
+    }
+
+  /* Don't print the above diagnostics more than once.  */
+
+  cfun->machine->attributes_checked_p = 1;
+}
+
+
+/* Implement `ACCUMULATE_OUTGOING_ARGS'.  */
+
+int
+avr_accumulate_outgoing_args (void)
+{
+  if (!cfun)
+    return TARGET_ACCUMULATE_OUTGOING_ARGS;
+
+  /* FIXME: For setjmp and in avr_builtin_setjmp_frame_value we don't know
+        what offset is correct.  In some cases it is relative to
+        virtual_outgoing_args_rtx and in others it is relative to
+        virtual_stack_vars_rtx.  For example code see
+            gcc.c-torture/execute/built-in-setjmp.c
+            gcc.c-torture/execute/builtins/sprintf-chk.c   */
+
+  return (TARGET_ACCUMULATE_OUTGOING_ARGS
+          && !(cfun->calls_setjmp
+               || cfun->has_nonlocal_label));
+}
+
+
+/* Report contribution of accumulated outgoing arguments to stack size.  */
+
+static inline int
+avr_outgoing_args_size (void)
+{
+  return ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0;
+}
+
+
+/* Implement `STARTING_FRAME_OFFSET'.  */
+/* This is the offset from the frame pointer register to the first stack slot
+   that contains a variable living in the frame.  */
+
+int
+avr_starting_frame_offset (void)
+{
+  return 1 + avr_outgoing_args_size ();
+}
+
+
+/* Return the number of hard registers to push/pop in the prologue/epilogue
+   of the current function, and optionally store these registers in SET.  */
+
+static int
+avr_regs_to_save (HARD_REG_SET *set)
+{
+  int reg, count;
+  int int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
+
+  if (set)
+    CLEAR_HARD_REG_SET (*set);
+  count = 0;
+
+  /* No need to save any registers if the function never returns or
+     has the "OS_task" or "OS_main" attribute.  */
+
+  if (TREE_THIS_VOLATILE (current_function_decl)
+      || cfun->machine->is_OS_task
+      || cfun->machine->is_OS_main)
+    return 0;
+
+  for (reg = 0; reg < 32; reg++)
+    {
+      /* Do not push/pop __tmp_reg__, __zero_reg__, as well as
+         any global register variables.  */
+
+      if (fixed_regs[reg])
+        continue;
+
+      if ((int_or_sig_p && !crtl->is_leaf && call_used_regs[reg])
+          || (df_regs_ever_live_p (reg)
+              && (int_or_sig_p || !call_used_regs[reg])
+              /* Don't record frame pointer registers here.  They are treated
+                 indivitually in prologue.  */
+              && !(frame_pointer_needed
+                   && (reg == REG_Y || reg == (REG_Y+1)))))
+        {
+          if (set)
+            SET_HARD_REG_BIT (*set, reg);
+          count++;
+        }
+    }
+  return count;
+}
+
+
+/* Implement `TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS' */
+
+static bool
+avr_allocate_stack_slots_for_args (void)
+{
+  return !cfun->machine->is_naked;
+}
+
+
+/* Return true if register FROM can be eliminated via register TO.  */
+
+static bool
+avr_can_eliminate (const int from, const int to)
+{
+  return ((frame_pointer_needed && to == FRAME_POINTER_REGNUM)
+          || !frame_pointer_needed);
+}
+
+
+/* Implement `TARGET_WARN_FUNC_RETURN'.  */
+
+static bool
+avr_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+
+  return !avr_naked_function_p (decl);
+}
+
+/* Compute offset between arg_pointer and frame_pointer.  */
+
+int
+avr_initial_elimination_offset (int from, int to)
+{
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return 0;
+  else
+    {
+      int offset = frame_pointer_needed ? 2 : 0;
+      int avr_pc_size = AVR_HAVE_EIJMP_EICALL ? 3 : 2;
+
+      offset += avr_regs_to_save (NULL);
+      return (get_frame_size () + avr_outgoing_args_size()
+              + avr_pc_size + 1 + offset);
+    }
+}
+
+
+/* Helper for the function below.  */
+
+static void
+avr_adjust_type_node (tree *node, enum machine_mode mode, int sat_p)
+{
+  *node = make_node (FIXED_POINT_TYPE);
+  TYPE_SATURATING (*node) = sat_p;
+  TYPE_UNSIGNED (*node) = UNSIGNED_FIXED_POINT_MODE_P (mode);
+  TYPE_IBIT (*node) = GET_MODE_IBIT (mode);
+  TYPE_FBIT (*node) = GET_MODE_FBIT (mode);
+  TYPE_PRECISION (*node) = GET_MODE_BITSIZE (mode);
+  TYPE_ALIGN (*node) = 8;
+  SET_TYPE_MODE (*node, mode);
+
+  layout_type (*node);
+}
+
+
+/* Implement `TARGET_BUILD_BUILTIN_VA_LIST'.  */
+
+static tree
+avr_build_builtin_va_list (void)
+{
+  /* avr-modes.def adjusts [U]TA to be 64-bit modes with 48 fractional bits.
+     This is more appropriate for the 8-bit machine AVR than 128-bit modes.
+     The ADJUST_IBIT/FBIT are handled in toplev:init_adjust_machine_modes()
+     which is auto-generated by genmodes, but the compiler assigns [U]DAmode
+     to the long long accum modes instead of the desired [U]TAmode.
+
+     Fix this now, right after node setup in tree.c:build_common_tree_nodes().
+     This must run before c-cppbuiltin.c:builtin_define_fixed_point_constants()
+     which built-in defines macros like __ULLACCUM_FBIT__ that are used by
+     libgcc to detect IBIT and FBIT.  */
+
+  avr_adjust_type_node (&ta_type_node, TAmode, 0);
+  avr_adjust_type_node (&uta_type_node, UTAmode, 0);
+  avr_adjust_type_node (&sat_ta_type_node, TAmode, 1);
+  avr_adjust_type_node (&sat_uta_type_node, UTAmode, 1);
+
+  unsigned_long_long_accum_type_node = uta_type_node;
+  long_long_accum_type_node = ta_type_node;
+  sat_unsigned_long_long_accum_type_node = sat_uta_type_node;
+  sat_long_long_accum_type_node = sat_ta_type_node;
+
+  /* Dispatch to the default handler.  */
+
+  return std_build_builtin_va_list ();
+}
+
+
+/* Implement `TARGET_BUILTIN_SETJMP_FRAME_VALUE'.  */
+/* Actual start of frame is virtual_stack_vars_rtx this is offset from
+   frame pointer by +STARTING_FRAME_OFFSET.
+   Using saved frame = virtual_stack_vars_rtx - STARTING_FRAME_OFFSET
+   avoids creating add/sub of offset in nonlocal goto and setjmp.  */
+
+static rtx
+avr_builtin_setjmp_frame_value (void)
+{
+  rtx xval = gen_reg_rtx (Pmode);
+  emit_insn (gen_subhi3 (xval, virtual_stack_vars_rtx,
+                         gen_int_mode (STARTING_FRAME_OFFSET, Pmode)));
+  return xval;
+}
+
+
+/* Return contents of MEM at frame pointer + stack size + 1 (+2 if 3-byte PC).
+   This is return address of function.  */
+
+rtx
+avr_return_addr_rtx (int count, rtx tem)
+{
+  rtx r;
+
+  /* Can only return this function's return address. Others not supported.  */
+  if (count)
+     return NULL;
+
+  if (AVR_3_BYTE_PC)
+    {
+      r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+2");
+      warning (0, "%<builtin_return_address%> contains only 2 bytes"
+               " of address");
+    }
+  else
+    r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+1");
+
+  r = gen_rtx_PLUS (Pmode, tem, r);
+  r = gen_frame_mem (Pmode, memory_address (Pmode, r));
+  r = gen_rtx_ROTATE (HImode, r, GEN_INT (8));
+  return  r;
+}
+
+/* Return 1 if the function epilogue is just a single "ret".  */
+
+int
+avr_simple_epilogue (void)
+{
+  return (! frame_pointer_needed
+          && get_frame_size () == 0
+          && avr_outgoing_args_size() == 0
+          && avr_regs_to_save (NULL) == 0
+          && ! cfun->machine->is_interrupt
+          && ! cfun->machine->is_signal
+          && ! cfun->machine->is_naked
+          && ! TREE_THIS_VOLATILE (current_function_decl));
+}
+
+/* This function checks sequence of live registers.  */
+
+static int
+sequent_regs_live (void)
+{
+  int reg;
+  int live_seq = 0;
+  int cur_seq = 0;
+
+  for (reg = 0; reg < 18; ++reg)
+    {
+      if (fixed_regs[reg])
+        {
+          /* Don't recognize sequences that contain global register
+             variables.  */
+
+          if (live_seq != 0)
+            return 0;
+          else
+            continue;
+        }
+
+      if (!call_used_regs[reg])
+        {
+          if (df_regs_ever_live_p (reg))
+            {
+              ++live_seq;
+              ++cur_seq;
+            }
+          else
+            cur_seq = 0;
+        }
+    }
+
+  if (!frame_pointer_needed)
+    {
+      if (df_regs_ever_live_p (REG_Y))
+        {
+          ++live_seq;
+          ++cur_seq;
+        }
+      else
+        cur_seq = 0;
+
+      if (df_regs_ever_live_p (REG_Y+1))
+        {
+          ++live_seq;
+          ++cur_seq;
+        }
+      else
+        cur_seq = 0;
+    }
+  else
+    {
+      cur_seq += 2;
+      live_seq += 2;
+    }
+  return (cur_seq == live_seq) ? live_seq : 0;
+}
+
+/* Obtain the length sequence of insns.  */
+
+int
+get_sequence_length (rtx insns)
+{
+  rtx insn;
+  int length;
+
+  for (insn = insns, length = 0; insn; insn = NEXT_INSN (insn))
+    length += get_attr_length (insn);
+
+  return length;
+}
+
+
+/*  Implement `INCOMING_RETURN_ADDR_RTX'.  */
+
+rtx
+avr_incoming_return_addr_rtx (void)
+{
+  /* The return address is at the top of the stack.  Note that the push
+     was via post-decrement, which means the actual address is off by one.  */
+  return gen_frame_mem (HImode, plus_constant (Pmode, stack_pointer_rtx, 1));
+}
+
+/*  Helper for expand_prologue.  Emit a push of a byte register.  */
+
+static void
+emit_push_byte (unsigned regno, bool frame_related_p)
+{
+  rtx mem, reg, insn;
+
+  mem = gen_rtx_POST_DEC (HImode, stack_pointer_rtx);
+  mem = gen_frame_mem (QImode, mem);
+  reg = gen_rtx_REG (QImode, regno);
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
+  if (frame_related_p)
+    RTX_FRAME_RELATED_P (insn) = 1;
+
+  cfun->machine->stack_usage++;
+}
+
+
+/*  Helper for expand_prologue.  Emit a push of a SFR via tmp_reg.
+    SFR is a MEM representing the memory location of the SFR.
+    If CLR_P then clear the SFR after the push using zero_reg.  */
+
+static void
+emit_push_sfr (rtx sfr, bool frame_related_p, bool clr_p)
+{
+  rtx insn;
+
+  gcc_assert (MEM_P (sfr));
+
+  /* IN __tmp_reg__, IO(SFR) */
+  insn = emit_move_insn (tmp_reg_rtx, sfr);
+  if (frame_related_p)
+    RTX_FRAME_RELATED_P (insn) = 1;
+
+  /* PUSH __tmp_reg__ */
+  emit_push_byte (TMP_REGNO, frame_related_p);
+
+  if (clr_p)
+    {
+      /* OUT IO(SFR), __zero_reg__ */
+      insn = emit_move_insn (sfr, const0_rtx);
+      if (frame_related_p)
+        RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+static void
+avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
+{
+  rtx insn;
+  bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
+  int live_seq = sequent_regs_live ();
+
+  HOST_WIDE_INT size_max
+    = (HOST_WIDE_INT) GET_MODE_MASK (AVR_HAVE_8BIT_SP ? QImode : Pmode);
+
+  bool minimize = (TARGET_CALL_PROLOGUES
+                   && size < size_max
+                   && live_seq
+                   && !isr_p
+                   && !cfun->machine->is_OS_task
+                   && !cfun->machine->is_OS_main);
+
+  if (minimize
+      && (frame_pointer_needed
+          || avr_outgoing_args_size() > 8
+          || (AVR_2_BYTE_PC && live_seq > 6)
+          || live_seq > 7))
+    {
+      rtx pattern;
+      int first_reg, reg, offset;
+
+      emit_move_insn (gen_rtx_REG (HImode, REG_X),
+                      gen_int_mode (size, HImode));
+
+      pattern = gen_call_prologue_saves (gen_int_mode (live_seq, HImode),
+                                         gen_int_mode (live_seq+size, HImode));
+      insn = emit_insn (pattern);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Describe the effect of the unspec_volatile call to prologue_saves.
+         Note that this formulation assumes that add_reg_note pushes the
+         notes to the front.  Thus we build them in the reverse order of
+         how we want dwarf2out to process them.  */
+
+      /* The function does always set frame_pointer_rtx, but whether that
+         is going to be permanent in the function is frame_pointer_needed.  */
+
+      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                    gen_rtx_SET (VOIDmode, (frame_pointer_needed
+                                            ? frame_pointer_rtx
+                                            : stack_pointer_rtx),
+                                 plus_constant (Pmode, stack_pointer_rtx,
+                                                -(size + live_seq))));
+
+      /* Note that live_seq always contains r28+r29, but the other
+         registers to be saved are all below 18.  */
+
+      first_reg = 18 - (live_seq - 2);
+
+      for (reg = 29, offset = -live_seq + 1;
+           reg >= first_reg;
+           reg = (reg == 28 ? 17 : reg - 1), ++offset)
+        {
+          rtx m, r;
+
+          m = gen_rtx_MEM (QImode, plus_constant (Pmode, stack_pointer_rtx,
+                                                  offset));
+          r = gen_rtx_REG (QImode, reg);
+          add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, m, r));
+        }
+
+      cfun->machine->stack_usage += size + live_seq;
+    }
+  else /* !minimize */
+    {
+      int reg;
+
+      for (reg = 0; reg < 32; ++reg)
+        if (TEST_HARD_REG_BIT (set, reg))
+          emit_push_byte (reg, true);
+
+      if (frame_pointer_needed
+          && (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main)))
+        {
+          /* Push frame pointer.  Always be consistent about the
+             ordering of pushes -- epilogue_restores expects the
+             register pair to be pushed low byte first.  */
+
+          emit_push_byte (REG_Y, true);
+          emit_push_byte (REG_Y + 1, true);
+        }
+
+      if (frame_pointer_needed
+          && size == 0)
+        {
+          insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+          RTX_FRAME_RELATED_P (insn) = 1;
+        }
+
+      if (size != 0)
+        {
+          /*  Creating a frame can be done by direct manipulation of the
+              stack or via the frame pointer. These two methods are:
+                  fp =  sp
+                  fp -= size
+                  sp =  fp
+              or
+                  sp -= size
+                  fp =  sp    (*)
+              the optimum method depends on function type, stack and
+              frame size.  To avoid a complex logic, both methods are
+              tested and shortest is selected.
+
+              There is also the case where SIZE != 0 and no frame pointer is
+              needed; this can occur if ACCUMULATE_OUTGOING_ARGS is on.
+              In that case, insn (*) is not needed in that case.
+              We use the X register as scratch. This is save because in X
+              is call-clobbered.
+                 In an interrupt routine, the case of SIZE != 0 together with
+              !frame_pointer_needed can only occur if the function is not a
+              leaf function and thus X has already been saved.  */
+
+          int irq_state = -1;
+          HOST_WIDE_INT size_cfa = size, neg_size;
+          rtx fp_plus_insns, fp, my_fp;
+
+          gcc_assert (frame_pointer_needed
+                      || !isr_p
+                      || !crtl->is_leaf);
+
+          fp = my_fp = (frame_pointer_needed
+                        ? frame_pointer_rtx
+                        : gen_rtx_REG (Pmode, REG_X));
+
+          if (AVR_HAVE_8BIT_SP)
+            {
+              /* The high byte (r29) does not change:
+                 Prefer SUBI (1 cycle) over SBIW (2 cycles, same size).  */
+
+              my_fp = all_regs_rtx[FRAME_POINTER_REGNUM];
+            }
+
+          /* Cut down size and avoid size = 0 so that we don't run
+             into ICE like PR52488 in the remainder.  */
+
+          if (size > size_max)
+            {
+              /* Don't error so that insane code from newlib still compiles
+                 and does not break building newlib.  As PR51345 is implemented
+                 now, there are multilib variants with -msp8.
+
+                 If user wants sanity checks he can use -Wstack-usage=
+                 or similar options.
+
+                 For CFA we emit the original, non-saturated size so that
+                 the generic machinery is aware of the real stack usage and
+                 will print the above diagnostic as expected.  */
+
+              size = size_max;
+            }
+
+          size = trunc_int_for_mode (size, GET_MODE (my_fp));
+          neg_size = trunc_int_for_mode (-size, GET_MODE (my_fp));
+
+          /************  Method 1: Adjust frame pointer  ************/
+
+          start_sequence ();
+
+          /* Normally, the dwarf2out frame-related-expr interpreter does
+             not expect to have the CFA change once the frame pointer is
+             set up.  Thus, we avoid marking the move insn below and
+             instead indicate that the entire operation is complete after
+             the frame pointer subtraction is done.  */
+
+          insn = emit_move_insn (fp, stack_pointer_rtx);
+          if (frame_pointer_needed)
+            {
+              RTX_FRAME_RELATED_P (insn) = 1;
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx));
+            }
+
+          insn = emit_move_insn (my_fp, plus_constant (GET_MODE (my_fp),
+                                                       my_fp, neg_size));
+
+          if (frame_pointer_needed)
+            {
+              RTX_FRAME_RELATED_P (insn) = 1;
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, fp,
+                                         plus_constant (Pmode, fp,
+                                                        -size_cfa)));
+            }
+
+          /* Copy to stack pointer.  Note that since we've already
+             changed the CFA to the frame pointer this operation
+             need not be annotated if frame pointer is needed.
+             Always move through unspec, see PR50063.
+             For meaning of irq_state see movhi_sp_r insn.  */
+
+          if (cfun->machine->is_interrupt)
+            irq_state = 1;
+
+          if (TARGET_NO_INTERRUPTS
+              || cfun->machine->is_signal
+              || cfun->machine->is_OS_main)
+            irq_state = 0;
+
+          if (AVR_HAVE_8BIT_SP)
+            irq_state = 2;
+
+          insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx,
+                                            fp, GEN_INT (irq_state)));
+          if (!frame_pointer_needed)
+            {
+              RTX_FRAME_RELATED_P (insn) = 1;
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+                                         plus_constant (Pmode,
+                                                        stack_pointer_rtx,
+                                                        -size_cfa)));
+            }
+
+          fp_plus_insns = get_insns ();
+          end_sequence ();
+
+          /************  Method 2: Adjust Stack pointer  ************/
+
+          /* Stack adjustment by means of RCALL . and/or PUSH __TMP_REG__
+             can only handle specific offsets.  */
+
+          if (avr_sp_immediate_operand (gen_int_mode (-size, HImode), HImode))
+            {
+              rtx sp_plus_insns;
+
+              start_sequence ();
+
+              insn = emit_move_insn (stack_pointer_rtx,
+                                     plus_constant (Pmode, stack_pointer_rtx,
+                                                    -size));
+              RTX_FRAME_RELATED_P (insn) = 1;
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+                                         plus_constant (Pmode,
+                                                        stack_pointer_rtx,
+                                                        -size_cfa)));
+              if (frame_pointer_needed)
+                {
+                  insn = emit_move_insn (fp, stack_pointer_rtx);
+                  RTX_FRAME_RELATED_P (insn) = 1;
+                }
+
+              sp_plus_insns = get_insns ();
+              end_sequence ();
+
+              /************ Use shortest method  ************/
+
+              emit_insn (get_sequence_length (sp_plus_insns)
+                         < get_sequence_length (fp_plus_insns)
+                         ? sp_plus_insns
+                         : fp_plus_insns);
+            }
+          else
+            {
+              emit_insn (fp_plus_insns);
+            }
+
+          cfun->machine->stack_usage += size_cfa;
+        } /* !minimize && size != 0 */
+    } /* !minimize */
+}
+
+
+/*  Output function prologue.  */
+
+void
+avr_expand_prologue (void)
+{
+  HARD_REG_SET set;
+  HOST_WIDE_INT size;
+
+  size = get_frame_size() + avr_outgoing_args_size();
+
+  cfun->machine->stack_usage = 0;
+
+  /* Prologue: naked.  */
+  if (cfun->machine->is_naked)
+    {
+      return;
+    }
+
+  avr_regs_to_save (&set);
+
+  if (cfun->machine->is_interrupt || cfun->machine->is_signal)
+    {
+      /* Enable interrupts.  */
+      if (cfun->machine->is_interrupt)
+        emit_insn (gen_enable_interrupt ());
+
+      /* Push zero reg.  */
+      emit_push_byte (ZERO_REGNO, true);
+
+      /* Push tmp reg.  */
+      emit_push_byte (TMP_REGNO, true);
+
+      /* Push SREG.  */
+      /* ??? There's no dwarf2 column reserved for SREG.  */
+      emit_push_sfr (sreg_rtx, false, false /* clr */);
+
+      /* Clear zero reg.  */
+      emit_move_insn (zero_reg_rtx, const0_rtx);
+
+      /* Prevent any attempt to delete the setting of ZERO_REG!  */
+      emit_use (zero_reg_rtx);
+
+      /* Push and clear RAMPD/X/Y/Z if present and low-part register is used.
+         ??? There are no dwarf2 columns reserved for RAMPD/X/Y/Z.  */
+
+      if (AVR_HAVE_RAMPD)
+        emit_push_sfr (rampd_rtx, false /* frame-related */, true /* clr */);
+
+      if (AVR_HAVE_RAMPX
+          && TEST_HARD_REG_BIT (set, REG_X)
+          && TEST_HARD_REG_BIT (set, REG_X + 1))
+        {
+          emit_push_sfr (rampx_rtx, false /* frame-related */, true /* clr */);
+        }
+
+      if (AVR_HAVE_RAMPY
+          && (frame_pointer_needed
+              || (TEST_HARD_REG_BIT (set, REG_Y)
+                  && TEST_HARD_REG_BIT (set, REG_Y + 1))))
+        {
+          emit_push_sfr (rampy_rtx, false /* frame-related */, true /* clr */);
+        }
+
+      if (AVR_HAVE_RAMPZ
+          && TEST_HARD_REG_BIT (set, REG_Z)
+          && TEST_HARD_REG_BIT (set, REG_Z + 1))
+        {
+          emit_push_sfr (rampz_rtx, false /* frame-related */, AVR_HAVE_RAMPD);
+        }
+    }  /* is_interrupt is_signal */
+
+  avr_prologue_setup_frame (size, set);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = cfun->machine->stack_usage;
+}
+
+
+/* Implement `TARGET_ASM_FUNCTION_END_PROLOGUE'.  */
+/* Output summary at end of function prologue.  */
+
+static void
+avr_asm_function_end_prologue (FILE *file)
+{
+  if (cfun->machine->is_naked)
+    {
+      fputs ("/* prologue: naked */\n", file);
+    }
+  else
+    {
+      if (cfun->machine->is_interrupt)
+        {
+          fputs ("/* prologue: Interrupt */\n", file);
+        }
+      else if (cfun->machine->is_signal)
+        {
+          fputs ("/* prologue: Signal */\n", file);
+        }
+      else
+        fputs ("/* prologue: function */\n", file);
+    }
+
+  if (ACCUMULATE_OUTGOING_ARGS)
+    fprintf (file, "/* outgoing args size = %d */\n",
+             avr_outgoing_args_size());
+
+  fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n",
+                 get_frame_size());
+  fprintf (file, "/* stack size = %d */\n",
+                 cfun->machine->stack_usage);
+  /* Create symbol stack offset here so all functions have it. Add 1 to stack
+     usage for offset so that SP + .L__stack_offset = return address.  */
+  fprintf (file, ".L__stack_usage = %d\n", cfun->machine->stack_usage);
+}
+
+
+/* Implement `EPILOGUE_USES'.  */
+
+int
+avr_epilogue_uses (int regno ATTRIBUTE_UNUSED)
+{
+  if (reload_completed
+      && cfun->machine
+      && (cfun->machine->is_interrupt || cfun->machine->is_signal))
+    return 1;
+  return 0;
+}
+
+/*  Helper for avr_expand_epilogue.  Emit a pop of a byte register.  */
+
+static void
+emit_pop_byte (unsigned regno)
+{
+  rtx mem, reg;
+
+  mem = gen_rtx_PRE_INC (HImode, stack_pointer_rtx);
+  mem = gen_frame_mem (QImode, mem);
+  reg = gen_rtx_REG (QImode, regno);
+
+  emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+}
+
+/*  Output RTL epilogue.  */
+
+void
+avr_expand_epilogue (bool sibcall_p)
+{
+  int reg;
+  int live_seq;
+  HARD_REG_SET set;
+  int minimize;
+  HOST_WIDE_INT size;
+  bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
+
+  size = get_frame_size() + avr_outgoing_args_size();
+
+  /* epilogue: naked  */
+  if (cfun->machine->is_naked)
+    {
+      gcc_assert (!sibcall_p);
+
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  avr_regs_to_save (&set);
+  live_seq = sequent_regs_live ();
+
+  minimize = (TARGET_CALL_PROLOGUES
+              && live_seq
+              && !isr_p
+              && !cfun->machine->is_OS_task
+              && !cfun->machine->is_OS_main);
+
+  if (minimize
+      && (live_seq > 4
+          || frame_pointer_needed
+          || size))
+    {
+      /*  Get rid of frame.  */
+
+      if (!frame_pointer_needed)
+        {
+          emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+        }
+
+      if (size)
+        {
+          emit_move_insn (frame_pointer_rtx,
+                          plus_constant (Pmode, frame_pointer_rtx, size));
+        }
+
+      emit_insn (gen_epilogue_restores (gen_int_mode (live_seq, HImode)));
+      return;
+    }
+
+  if (size)
+    {
+      /* Try two methods to adjust stack and select shortest.  */
+
+      int irq_state = -1;
+      rtx fp, my_fp;
+      rtx fp_plus_insns;
+      HOST_WIDE_INT size_max;
+
+      gcc_assert (frame_pointer_needed
+                  || !isr_p
+                  || !crtl->is_leaf);
+
+      fp = my_fp = (frame_pointer_needed
+                    ? frame_pointer_rtx
+                    : gen_rtx_REG (Pmode, REG_X));
+
+      if (AVR_HAVE_8BIT_SP)
+        {
+          /* The high byte (r29) does not change:
+             Prefer SUBI (1 cycle) over SBIW (2 cycles).  */
+
+          my_fp = all_regs_rtx[FRAME_POINTER_REGNUM];
+        }
+
+      /* For rationale see comment in prologue generation.  */
+
+      size_max = (HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (my_fp));
+      if (size > size_max)
+        size = size_max;
+      size = trunc_int_for_mode (size, GET_MODE (my_fp));
+
+      /********** Method 1: Adjust fp register  **********/
+
+      start_sequence ();
+
+      if (!frame_pointer_needed)
+        emit_move_insn (fp, stack_pointer_rtx);
+
+      emit_move_insn (my_fp, plus_constant (GET_MODE (my_fp), my_fp, size));
+
+      /* Copy to stack pointer.  */
+
+      if (TARGET_NO_INTERRUPTS)
+        irq_state = 0;
+
+      if (AVR_HAVE_8BIT_SP)
+        irq_state = 2;
+
+      emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp,
+                                 GEN_INT (irq_state)));
+
+      fp_plus_insns = get_insns ();
+      end_sequence ();
+
+      /********** Method 2: Adjust Stack pointer  **********/
+
+      if (avr_sp_immediate_operand (gen_int_mode (size, HImode), HImode))
+        {
+          rtx sp_plus_insns;
+
+          start_sequence ();
+
+          emit_move_insn (stack_pointer_rtx,
+                          plus_constant (Pmode, stack_pointer_rtx, size));
+
+          sp_plus_insns = get_insns ();
+          end_sequence ();
+
+          /************ Use shortest method  ************/
+
+          emit_insn (get_sequence_length (sp_plus_insns)
+                     < get_sequence_length (fp_plus_insns)
+                     ? sp_plus_insns
+                     : fp_plus_insns);
+        }
+      else
+        emit_insn (fp_plus_insns);
+    } /* size != 0 */
+
+  if (frame_pointer_needed
+      && !(cfun->machine->is_OS_task || cfun->machine->is_OS_main))
+    {
+      /* Restore previous frame_pointer.  See avr_expand_prologue for
+         rationale for not using pophi.  */
+
+      emit_pop_byte (REG_Y + 1);
+      emit_pop_byte (REG_Y);
+    }
+
+  /* Restore used registers.  */
+
+  for (reg = 31; reg >= 0; --reg)
+    if (TEST_HARD_REG_BIT (set, reg))
+      emit_pop_byte (reg);
+
+  if (isr_p)
+    {
+      /* Restore RAMPZ/Y/X/D using tmp_reg as scratch.
+         The conditions to restore them must be tha same as in prologue.  */
+
+      if (AVR_HAVE_RAMPZ
+          && TEST_HARD_REG_BIT (set, REG_Z)
+          && TEST_HARD_REG_BIT (set, REG_Z + 1))
+        {
+          emit_pop_byte (TMP_REGNO);
+          emit_move_insn (rampz_rtx, tmp_reg_rtx);
+        }
+
+      if (AVR_HAVE_RAMPY
+          && (frame_pointer_needed
+              || (TEST_HARD_REG_BIT (set, REG_Y)
+                  && TEST_HARD_REG_BIT (set, REG_Y + 1))))
+        {
+          emit_pop_byte (TMP_REGNO);
+          emit_move_insn (rampy_rtx, tmp_reg_rtx);
+        }
+
+      if (AVR_HAVE_RAMPX
+          && TEST_HARD_REG_BIT (set, REG_X)
+          && TEST_HARD_REG_BIT (set, REG_X + 1))
+        {
+          emit_pop_byte (TMP_REGNO);
+          emit_move_insn (rampx_rtx, tmp_reg_rtx);
+        }
+
+      if (AVR_HAVE_RAMPD)
+        {
+          emit_pop_byte (TMP_REGNO);
+          emit_move_insn (rampd_rtx, tmp_reg_rtx);
+        }
+
+      /* Restore SREG using tmp_reg as scratch.  */
+
+      emit_pop_byte (TMP_REGNO);
+      emit_move_insn (sreg_rtx, tmp_reg_rtx);
+
+      /* Restore tmp REG.  */
+      emit_pop_byte (TMP_REGNO);
+
+      /* Restore zero REG.  */
+      emit_pop_byte (ZERO_REGNO);
+    }
+
+  if (!sibcall_p)
+    emit_jump_insn (gen_return ());
+}
+
+
+/* Implement `TARGET_ASM_FUNCTION_BEGIN_EPILOGUE'.  */
+
+static void
+avr_asm_function_begin_epilogue (FILE *file)
+{
+  fprintf (file, "/* epilogue start */\n");
+}
+
+
+/* Implement `TARGET_CANNOT_MODITY_JUMPS_P'.  */
+
+static bool
+avr_cannot_modify_jumps_p (void)
+{
+
+  /* Naked Functions must not have any instructions after
+     their epilogue, see PR42240 */
+
+  if (reload_completed
+      && cfun->machine
+      && cfun->machine->is_naked)
+    {
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Implement `TARGET_MODE_DEPENDENT_ADDRESS_P'.  */
+
+static bool
+avr_mode_dependent_address_p (const_rtx addr ATTRIBUTE_UNUSED, addr_space_t as)
+{
+  /* FIXME:  Non-generic addresses are not mode-dependent in themselves.
+       This hook just serves to hack around PR rtl-optimization/52543 by
+       claiming that non-generic addresses were mode-dependent so that
+       lower-subreg.c will skip these addresses.  lower-subreg.c sets up fake
+       RTXes to probe SET and MEM costs and assumes that MEM is always in the
+       generic address space which is not true.  */
+
+  return !ADDR_SPACE_GENERIC_P (as);
+}
+
+
+/* Helper function for `avr_legitimate_address_p'.  */
+
+static inline bool
+avr_reg_ok_for_addr_p (rtx reg, addr_space_t as,
+                       RTX_CODE outer_code, bool strict)
+{
+  return (REG_P (reg)
+          && (avr_regno_mode_code_ok_for_base_p (REGNO (reg), QImode,
+                                                 as, outer_code, UNKNOWN)
+              || (!strict
+                  && REGNO (reg) >= FIRST_PSEUDO_REGISTER)));
+}
+
+
+/* Return nonzero if X (an RTX) is a legitimate memory address on the target
+   machine for a memory operand of mode MODE.  */
+
+static bool
+avr_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  bool ok = CONSTANT_ADDRESS_P (x);
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      ok = avr_reg_ok_for_addr_p (x, ADDR_SPACE_GENERIC,
+                                  MEM, strict);
+
+      if (strict
+          && GET_MODE_SIZE (mode) > 4
+          && REG_X == REGNO (x))
+        {
+          ok = false;
+        }
+      break;
+
+    case POST_INC:
+    case PRE_DEC:
+      ok = avr_reg_ok_for_addr_p (XEXP (x, 0), ADDR_SPACE_GENERIC,
+                                  GET_CODE (x), strict);
+      break;
+
+    case PLUS:
+      {
+        rtx reg = XEXP (x, 0);
+        rtx op1 = XEXP (x, 1);
+
+        if (REG_P (reg)
+            && CONST_INT_P (op1)
+            && INTVAL (op1) >= 0)
+          {
+            bool fit = IN_RANGE (INTVAL (op1), 0, MAX_LD_OFFSET (mode));
+
+            if (fit)
+              {
+                ok = (! strict
+                      || avr_reg_ok_for_addr_p (reg, ADDR_SPACE_GENERIC,
+                                                PLUS, strict));
+
+                if (reg == frame_pointer_rtx
+                    || reg == arg_pointer_rtx)
+                  {
+                    ok = true;
+                  }
+              }
+            else if (frame_pointer_needed
+                     && reg == frame_pointer_rtx)
+              {
+                ok = true;
+              }
+          }
+      }
+      break;
+
+    default:
+      break;
+    }
+
+  if (avr_log.legitimate_address_p)
+    {
+      avr_edump ("\n%?: ret=%d, mode=%m strict=%d "
+                 "reload_completed=%d reload_in_progress=%d %s:",
+                 ok, mode, strict, reload_completed, reload_in_progress,
+                 reg_renumber ? "(reg_renumber)" : "");
+
+      if (GET_CODE (x) == PLUS
+          && REG_P (XEXP (x, 0))
+          && CONST_INT_P (XEXP (x, 1))
+          && IN_RANGE (INTVAL (XEXP (x, 1)), 0, MAX_LD_OFFSET (mode))
+          && reg_renumber)
+        {
+          avr_edump ("(r%d ---> r%d)", REGNO (XEXP (x, 0)),
+                     true_regnum (XEXP (x, 0)));
+        }
+
+      avr_edump ("\n%r\n", x);
+    }
+
+  return ok;
+}
+
+
+/* Former implementation of TARGET_LEGITIMIZE_ADDRESS,
+   now only a helper for avr_addr_space_legitimize_address.  */
+/* Attempts to replace X with a valid
+   memory address for an operand of mode MODE  */
+
+static rtx
+avr_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  bool big_offset_p = false;
+
+  x = oldx;
+
+  if (GET_CODE (oldx) == PLUS
+      && REG_P (XEXP (oldx, 0)))
+    {
+      if (REG_P (XEXP (oldx, 1)))
+        x = force_reg (GET_MODE (oldx), oldx);
+      else if (CONST_INT_P (XEXP (oldx, 1)))
+        {
+          int offs = INTVAL (XEXP (oldx, 1));
+          if (frame_pointer_rtx != XEXP (oldx, 0)
+              && offs > MAX_LD_OFFSET (mode))
+            {
+              big_offset_p = true;
+              x = force_reg (GET_MODE (oldx), oldx);
+            }
+        }
+    }
+
+  if (avr_log.legitimize_address)
+    {
+      avr_edump ("\n%?: mode=%m\n %r\n", mode, oldx);
+
+      if (x != oldx)
+        avr_edump (" %s --> %r\n", big_offset_p ? "(big offset)" : "", x);
+    }
+
+  return x;
+}
+
+
+/* Implement `LEGITIMIZE_RELOAD_ADDRESS'.  */
+/* This will allow register R26/27 to be used where it is no worse than normal
+   base pointers R28/29 or R30/31.  For example, if base offset is greater
+   than 63 bytes or for R++ or --R addressing.  */
+
+rtx
+avr_legitimize_reload_address (rtx *px, enum machine_mode mode,
+                               int opnum, int type, int addr_type,
+                               int ind_levels ATTRIBUTE_UNUSED,
+                               rtx (*mk_memloc)(rtx,int))
+{
+  rtx x = *px;
+
+  if (avr_log.legitimize_reload_address)
+    avr_edump ("\n%?:%m %r\n", mode, x);
+
+  if (1 && (GET_CODE (x) == POST_INC
+            || GET_CODE (x) == PRE_DEC))
+    {
+      push_reload (XEXP (x, 0), XEXP (x, 0), &XEXP (x, 0), &XEXP (x, 0),
+                   POINTER_REGS, GET_MODE (x), GET_MODE (x), 0, 0,
+                   opnum, RELOAD_OTHER);
+
+      if (avr_log.legitimize_reload_address)
+        avr_edump (" RCLASS.1 = %R\n IN = %r\n OUT = %r\n",
+                   POINTER_REGS, XEXP (x, 0), XEXP (x, 0));
+
+      return x;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && 0 == reg_equiv_constant (REGNO (XEXP (x, 0)))
+      && CONST_INT_P (XEXP (x, 1))
+      && INTVAL (XEXP (x, 1)) >= 1)
+    {
+      bool fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode);
+
+      if (fit)
+        {
+          if (reg_equiv_address (REGNO (XEXP (x, 0))) != 0)
+            {
+              int regno = REGNO (XEXP (x, 0));
+              rtx mem = mk_memloc (x, regno);
+
+              push_reload (XEXP (mem, 0), NULL_RTX, &XEXP (mem, 0), NULL,
+                           POINTER_REGS, Pmode, VOIDmode, 0, 0,
+                           1, (enum reload_type) addr_type);
+
+              if (avr_log.legitimize_reload_address)
+                avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n",
+                           POINTER_REGS, XEXP (mem, 0), NULL_RTX);
+
+              push_reload (mem, NULL_RTX, &XEXP (x, 0), NULL,
+                           BASE_POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
+                           opnum, (enum reload_type) type);
+
+              if (avr_log.legitimize_reload_address)
+                avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n",
+                           BASE_POINTER_REGS, mem, NULL_RTX);
+
+              return x;
+            }
+        }
+      else if (! (frame_pointer_needed
+                  && XEXP (x, 0) == frame_pointer_rtx))
+        {
+          push_reload (x, NULL_RTX, px, NULL,
+                       POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
+                       opnum, (enum reload_type) type);
+
+          if (avr_log.legitimize_reload_address)
+            avr_edump (" RCLASS.3 = %R\n IN = %r\n OUT = %r\n",
+                       POINTER_REGS, x, NULL_RTX);
+
+          return x;
+        }
+    }
+
+  return NULL_RTX;
+}
+
+
+/* Implement `TARGET_SECONDARY_RELOAD' */
+
+static reg_class_t
+avr_secondary_reload (bool in_p, rtx x,
+                      reg_class_t reload_class ATTRIBUTE_UNUSED,
+                      enum machine_mode mode, secondary_reload_info *sri)
+{
+  if (in_p
+      && MEM_P (x)
+      && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
+      && ADDR_SPACE_MEMX != MEM_ADDR_SPACE (x))
+    {
+      /* For the non-generic 16-bit spaces we need a d-class scratch.  */
+
+      switch (mode)
+        {
+        default:
+          gcc_unreachable();
+
+        case QImode:  sri->icode = CODE_FOR_reload_inqi; break;
+        case QQmode:  sri->icode = CODE_FOR_reload_inqq; break;
+        case UQQmode: sri->icode = CODE_FOR_reload_inuqq; break;
+
+        case HImode:  sri->icode = CODE_FOR_reload_inhi; break;
+        case HQmode:  sri->icode = CODE_FOR_reload_inhq; break;
+        case HAmode:  sri->icode = CODE_FOR_reload_inha; break;
+        case UHQmode: sri->icode = CODE_FOR_reload_inuhq; break;
+        case UHAmode: sri->icode = CODE_FOR_reload_inuha; break;
+
+        case PSImode: sri->icode = CODE_FOR_reload_inpsi; break;
+
+        case SImode:  sri->icode = CODE_FOR_reload_insi; break;
+        case SFmode:  sri->icode = CODE_FOR_reload_insf; break;
+        case SQmode:  sri->icode = CODE_FOR_reload_insq; break;
+        case SAmode:  sri->icode = CODE_FOR_reload_insa; break;
+        case USQmode: sri->icode = CODE_FOR_reload_inusq; break;
+        case USAmode: sri->icode = CODE_FOR_reload_inusa; break;
+        }
+    }
+
+  return NO_REGS;
+}
+
+
+/* Helper function to print assembler resp. track instruction
+   sequence lengths.  Always return "".
+
+   If PLEN == NULL:
+       Output assembler code from template TPL with operands supplied
+       by OPERANDS.  This is just forwarding to output_asm_insn.
+
+   If PLEN != NULL:
+       If N_WORDS >= 0  Add N_WORDS to *PLEN.
+       If N_WORDS < 0   Set *PLEN to -N_WORDS.
+       Don't output anything.
+*/
+
+static const char*
+avr_asm_len (const char* tpl, rtx* operands, int* plen, int n_words)
+{
+  if (NULL == plen)
+    {
+      output_asm_insn (tpl, operands);
+    }
+  else
+    {
+      if (n_words < 0)
+        *plen = -n_words;
+      else
+        *plen += n_words;
+    }
+
+  return "";
+}
+
+
+/* Return a pointer register name as a string.  */
+
+static const char*
+ptrreg_to_str (int regno)
+{
+  switch (regno)
+    {
+    case REG_X: return "X";
+    case REG_Y: return "Y";
+    case REG_Z: return "Z";
+    default:
+      output_operand_lossage ("address operand requires constraint for"
+                              " X, Y, or Z register");
+    }
+  return NULL;
+}
+
+/* Return the condition name as a string.
+   Used in conditional jump constructing  */
+
+static const char*
+cond_string (enum rtx_code code)
+{
+  switch (code)
+    {
+    case NE:
+      return "ne";
+    case EQ:
+      return "eq";
+    case GE:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+        return "pl";
+      else
+        return "ge";
+    case LT:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+        return "mi";
+      else
+        return "lt";
+    case GEU:
+      return "sh";
+    case LTU:
+      return "lo";
+    default:
+      gcc_unreachable ();
+    }
+
+  return "";
+}
+
+
+/* Implement `TARGET_PRINT_OPERAND_ADDRESS'.  */
+/* Output ADDR to FILE as address.  */
+
+static void
+avr_print_operand_address (FILE *file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, ptrreg_to_str (REGNO (addr)));
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "-%s", ptrreg_to_str (REGNO (XEXP (addr, 0))));
+      break;
+
+    case POST_INC:
+      fprintf (file, "%s+", ptrreg_to_str (REGNO (XEXP (addr, 0))));
+      break;
+
+    default:
+      if (CONSTANT_ADDRESS_P (addr)
+          && text_segment_operand (addr, VOIDmode))
+        {
+          rtx x = addr;
+          if (GET_CODE (x) == CONST)
+            x = XEXP (x, 0);
+          if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x,1)) == CONST_INT)
+            {
+              /* Assembler gs() will implant word address.  Make offset
+                 a byte offset inside gs() for assembler.  This is
+                 needed because the more logical (constant+gs(sym)) is not
+                 accepted by gas.  For 128K and smaller devices this is ok.
+                 For large devices it will create a trampoline to offset
+                 from symbol which may not be what the user really wanted.  */
+
+              fprintf (file, "gs(");
+              output_addr_const (file, XEXP (x,0));
+              fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC ")",
+                       2 * INTVAL (XEXP (x, 1)));
+              if (AVR_3_BYTE_PC)
+                if (warning (0, "pointer offset from symbol maybe incorrect"))
+                  {
+                    output_addr_const (stderr, addr);
+                    fprintf(stderr,"\n");
+                  }
+            }
+          else
+            {
+              fprintf (file, "gs(");
+              output_addr_const (file, addr);
+              fprintf (file, ")");
+            }
+        }
+      else
+        output_addr_const (file, addr);
+    }
+}
+
+
+/* Implement `TARGET_PRINT_OPERAND_PUNCT_VALID_P'.  */
+
+static bool
+avr_print_operand_punct_valid_p (unsigned char code)
+{
+  return code == '~' || code == '!';
+}
+
+
+/* Implement `TARGET_PRINT_OPERAND'.  */
+/* Output X as assembler operand to file FILE.
+   For a description of supported %-codes, see top of avr.md.  */
+
+static void
+avr_print_operand (FILE *file, rtx x, int code)
+{
+  int abcd = 0;
+
+  if (code >= 'A' && code <= 'D')
+    abcd = code - 'A';
+
+  if (code == '~')
+    {
+      if (!AVR_HAVE_JMP_CALL)
+        fputc ('r', file);
+    }
+  else if (code == '!')
+    {
+      if (AVR_HAVE_EIJMP_EICALL)
+        fputc ('e', file);
+    }
+  else if (code == 't'
+           || code == 'T')
+    {
+      static int t_regno = -1;
+      static int t_nbits = -1;
+
+      if (REG_P (x) && t_regno < 0 && code == 'T')
+        {
+          t_regno = REGNO (x);
+          t_nbits = GET_MODE_BITSIZE (GET_MODE (x));
+        }
+      else if (CONST_INT_P (x) && t_regno >= 0
+               && IN_RANGE (INTVAL (x), 0, t_nbits - 1))
+        {
+          int bpos = INTVAL (x);
+
+          fprintf (file, "%s", reg_names[t_regno + bpos / 8]);
+          if (code == 'T')
+            fprintf (file, ",%d", bpos % 8);
+
+          t_regno = -1;
+        }
+      else
+        fatal_insn ("operands to %T/%t must be reg + const_int:", x);
+    }
+  else if (REG_P (x))
+    {
+      if (x == zero_reg_rtx)
+        fprintf (file, "__zero_reg__");
+      else if (code == 'r' && REGNO (x) < 32)
+        fprintf (file, "%d", (int) REGNO (x));
+      else
+        fprintf (file, reg_names[REGNO (x) + abcd]);
+    }
+  else if (CONST_INT_P (x))
+    {
+      HOST_WIDE_INT ival = INTVAL (x);
+
+      if ('i' != code)
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival + abcd);
+      else if (low_io_address_operand (x, VOIDmode)
+               || high_io_address_operand (x, VOIDmode))
+        {
+          if (AVR_HAVE_RAMPZ && ival == avr_addr.rampz)
+            fprintf (file, "__RAMPZ__");
+          else if (AVR_HAVE_RAMPY && ival == avr_addr.rampy)
+            fprintf (file, "__RAMPY__");
+          else if (AVR_HAVE_RAMPX && ival == avr_addr.rampx)
+            fprintf (file, "__RAMPX__");
+          else if (AVR_HAVE_RAMPD && ival == avr_addr.rampd)
+            fprintf (file, "__RAMPD__");
+          else if (AVR_XMEGA && ival == avr_addr.ccp)
+            fprintf (file, "__CCP__");
+          else if (ival == avr_addr.sreg)   fprintf (file, "__SREG__");
+          else if (ival == avr_addr.sp_l)   fprintf (file, "__SP_L__");
+          else if (ival == avr_addr.sp_h)   fprintf (file, "__SP_H__");
+          else
+            {
+              fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+                       ival - avr_current_arch->sfr_offset);
+            }
+        }
+      else
+        fatal_insn ("bad address, not an I/O address:", x);
+    }
+  else if (MEM_P (x))
+    {
+      rtx addr = XEXP (x, 0);
+
+      if (code == 'm')
+        {
+          if (!CONSTANT_P (addr))
+            fatal_insn ("bad address, not a constant:", addr);
+          /* Assembler template with m-code is data - not progmem section */
+          if (text_segment_operand (addr, VOIDmode))
+            if (warning (0, "accessing data memory with"
+                         " program memory address"))
+              {
+                output_addr_const (stderr, addr);
+                fprintf(stderr,"\n");
+              }
+          output_addr_const (file, addr);
+        }
+      else if (code == 'i')
+        {
+          avr_print_operand (file, addr, 'i');
+        }
+      else if (code == 'o')
+        {
+          if (GET_CODE (addr) != PLUS)
+            fatal_insn ("bad address, not (reg+disp):", addr);
+
+          avr_print_operand (file, XEXP (addr, 1), 0);
+        }
+      else if (code == 'p' || code == 'r')
+        {
+          if (GET_CODE (addr) != POST_INC && GET_CODE (addr) != PRE_DEC)
+            fatal_insn ("bad address, not post_inc or pre_dec:", addr);
+
+          if (code == 'p')
+            avr_print_operand_address (file, XEXP (addr, 0));  /* X, Y, Z */
+          else
+            avr_print_operand (file, XEXP (addr, 0), 0);  /* r26, r28, r30 */
+        }
+      else if (GET_CODE (addr) == PLUS)
+        {
+          avr_print_operand_address (file, XEXP (addr,0));
+          if (REGNO (XEXP (addr, 0)) == REG_X)
+            fatal_insn ("internal compiler error.  Bad address:"
+                        ,addr);
+          fputc ('+', file);
+          avr_print_operand (file, XEXP (addr,1), code);
+        }
+      else
+        avr_print_operand_address (file, addr);
+    }
+  else if (code == 'i')
+    {
+      fatal_insn ("bad address, not an I/O address:", x);
+    }
+  else if (code == 'x')
+    {
+      /* Constant progmem address - like used in jmp or call */
+      if (0 == text_segment_operand (x, VOIDmode))
+        if (warning (0, "accessing program memory"
+                     " with data memory address"))
+          {
+            output_addr_const (stderr, x);
+            fprintf(stderr,"\n");
+          }
+      /* Use normal symbol for direct address no linker trampoline needed */
+      output_addr_const (file, x);
+    }
+  else if (CONST_FIXED_P (x))
+    {
+      HOST_WIDE_INT ival = INTVAL (avr_to_int_mode (x));
+      if (code != 0)
+        output_operand_lossage ("Unsupported code '%c' for fixed-point:",
+                                code);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      long val;
+      REAL_VALUE_TYPE rv;
+      if (GET_MODE (x) != SFmode)
+        fatal_insn ("internal compiler error.  Unknown mode:", x);
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+      fprintf (file, "0x%lx", val);
+    }
+  else if (GET_CODE (x) == CONST_STRING)
+    fputs (XSTR (x, 0), file);
+  else if (code == 'j')
+    fputs (cond_string (GET_CODE (x)), file);
+  else if (code == 'k')
+    fputs (cond_string (reverse_condition (GET_CODE (x))), file);
+  else
+    avr_print_operand_address (file, x);
+}
+
+
+/* Worker function for `NOTICE_UPDATE_CC'.  */
+/* Update the condition code in the INSN.  */
+
+void
+avr_notice_update_cc (rtx body ATTRIBUTE_UNUSED, rtx insn)
+{
+  rtx set;
+  enum attr_cc cc = get_attr_cc (insn);
+
+  switch (cc)
+    {
+    default:
+      break;
+
+    case CC_PLUS:
+    case CC_LDI:
+      {
+        rtx *op = recog_data.operand;
+        int len_dummy, icc;
+
+        /* Extract insn's operands.  */
+        extract_constrain_insn_cached (insn);
+
+        switch (cc)
+          {
+          default:
+            gcc_unreachable();
+
+          case CC_PLUS:
+            avr_out_plus (insn, op, &len_dummy, &icc);
+            cc = (enum attr_cc) icc;
+            break;
+
+          case CC_LDI:
+
+            cc = (op[1] == CONST0_RTX (GET_MODE (op[0]))
+                  && reg_overlap_mentioned_p (op[0], zero_reg_rtx))
+              /* Loading zero-reg with 0 uses CLR and thus clobbers cc0.  */
+              ? CC_CLOBBER
+              /* Any other "r,rL" combination does not alter cc0.  */
+              : CC_NONE;
+
+            break;
+          } /* inner switch */
+
+        break;
+      }
+    } /* outer swicth */
+
+  switch (cc)
+    {
+    default:
+      /* Special values like CC_OUT_PLUS from above have been
+         mapped to "standard" CC_* values so we never come here.  */
+
+      gcc_unreachable();
+      break;
+
+    case CC_NONE:
+      /* Insn does not affect CC at all.  */
+      break;
+
+    case CC_SET_N:
+      CC_STATUS_INIT;
+      break;
+
+    case CC_SET_ZN:
+      set = single_set (insn);
+      CC_STATUS_INIT;
+      if (set)
+        {
+          cc_status.flags |= CC_NO_OVERFLOW;
+          cc_status.value1 = SET_DEST (set);
+        }
+      break;
+
+    case CC_SET_CZN:
+      /* Insn sets the Z,N,C flags of CC to recog_operand[0].
+         The V flag may or may not be known but that's ok because
+         alter_cond will change tests to use EQ/NE.  */
+      set = single_set (insn);
+      CC_STATUS_INIT;
+      if (set)
+        {
+          cc_status.value1 = SET_DEST (set);
+          cc_status.flags |= CC_OVERFLOW_UNUSABLE;
+        }
+      break;
+
+    case CC_COMPARE:
+      set = single_set (insn);
+      CC_STATUS_INIT;
+      if (set)
+        cc_status.value1 = SET_SRC (set);
+      break;
+
+    case CC_CLOBBER:
+      /* Insn doesn't leave CC in a usable state.  */
+      CC_STATUS_INIT;
+      break;
+    }
+}
+
+/* Choose mode for jump insn:
+   1 - relative jump in range -63 <= x <= 62 ;
+   2 - relative jump in range -2046 <= x <= 2045 ;
+   3 - absolute jump (only for ATmega[16]03).  */
+
+int
+avr_jump_mode (rtx x, rtx insn)
+{
+  int dest_addr = INSN_ADDRESSES (INSN_UID (GET_CODE (x) == LABEL_REF
+                                            ? XEXP (x, 0) : x));
+  int cur_addr = INSN_ADDRESSES (INSN_UID (insn));
+  int jump_distance = cur_addr - dest_addr;
+
+  if (-63 <= jump_distance && jump_distance <= 62)
+    return 1;
+  else if (-2046 <= jump_distance && jump_distance <= 2045)
+    return 2;
+  else if (AVR_HAVE_JMP_CALL)
+    return 3;
+
+  return 2;
+}
+
+/* Return an AVR condition jump commands.
+   X is a comparison RTX.
+   LEN is a number returned by avr_jump_mode function.
+   If REVERSE nonzero then condition code in X must be reversed.  */
+
+const char*
+ret_cond_branch (rtx x, int len, int reverse)
+{
+  RTX_CODE cond = reverse ? reverse_condition (GET_CODE (x)) : GET_CODE (x);
+
+  switch (cond)
+    {
+    case GT:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+	return (len == 1 ? ("breq .+2" CR_TAB
+			    "brpl %0") :
+		len == 2 ? ("breq .+4" CR_TAB
+			    "brmi .+2" CR_TAB
+			    "rjmp %0") :
+		("breq .+6" CR_TAB
+		 "brmi .+4" CR_TAB
+		 "jmp %0"));
+
+      else
+	return (len == 1 ? ("breq .+2" CR_TAB
+			    "brge %0") :
+		len == 2 ? ("breq .+4" CR_TAB
+			    "brlt .+2" CR_TAB
+			    "rjmp %0") :
+		("breq .+6" CR_TAB
+		 "brlt .+4" CR_TAB
+		 "jmp %0"));
+    case GTU:
+      return (len == 1 ? ("breq .+2" CR_TAB
+                          "brsh %0") :
+              len == 2 ? ("breq .+4" CR_TAB
+                          "brlo .+2" CR_TAB
+                          "rjmp %0") :
+              ("breq .+6" CR_TAB
+               "brlo .+4" CR_TAB
+               "jmp %0"));
+    case LE:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+	return (len == 1 ? ("breq %0" CR_TAB
+			    "brmi %0") :
+		len == 2 ? ("breq .+2" CR_TAB
+			    "brpl .+2" CR_TAB
+			    "rjmp %0") :
+		("breq .+2" CR_TAB
+		 "brpl .+4" CR_TAB
+		 "jmp %0"));
+      else
+	return (len == 1 ? ("breq %0" CR_TAB
+			    "brlt %0") :
+		len == 2 ? ("breq .+2" CR_TAB
+			    "brge .+2" CR_TAB
+			    "rjmp %0") :
+		("breq .+2" CR_TAB
+		 "brge .+4" CR_TAB
+		 "jmp %0"));
+    case LEU:
+      return (len == 1 ? ("breq %0" CR_TAB
+                          "brlo %0") :
+              len == 2 ? ("breq .+2" CR_TAB
+                          "brsh .+2" CR_TAB
+			  "rjmp %0") :
+              ("breq .+2" CR_TAB
+               "brsh .+4" CR_TAB
+	       "jmp %0"));
+    default:
+      if (reverse)
+	{
+	  switch (len)
+	    {
+	    case 1:
+	      return "br%k1 %0";
+	    case 2:
+	      return ("br%j1 .+2" CR_TAB
+		      "rjmp %0");
+	    default:
+	      return ("br%j1 .+4" CR_TAB
+		      "jmp %0");
+	    }
+	}
+      else
+        {
+          switch (len)
+            {
+            case 1:
+              return "br%j1 %0";
+            case 2:
+              return ("br%k1 .+2" CR_TAB
+                      "rjmp %0");
+            default:
+              return ("br%k1 .+4" CR_TAB
+                      "jmp %0");
+            }
+        }
+    }
+  return "";
+}
+
+
+/* Worker function for `FINAL_PRESCAN_INSN'.  */
+/* Output insn cost for next insn.  */
+
+void
+avr_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+                        int num_operands ATTRIBUTE_UNUSED)
+{
+  if (avr_log.rtx_costs)
+    {
+      rtx set = single_set (insn);
+
+      if (set)
+        fprintf (asm_out_file, "/* DEBUG: cost = %d.  */\n",
+                 set_src_cost (SET_SRC (set), optimize_insn_for_speed_p ()));
+      else
+        fprintf (asm_out_file, "/* DEBUG: pattern-cost = %d.  */\n",
+                 rtx_cost (PATTERN (insn), INSN, 0,
+                           optimize_insn_for_speed_p()));
+    }
+}
+
+/* Return 0 if undefined, 1 if always true or always false.  */
+
+int
+avr_simplify_comparison_p (enum machine_mode mode, RTX_CODE op, rtx x)
+{
+  unsigned int max = (mode == QImode ? 0xff :
+                      mode == HImode ? 0xffff :
+                      mode == PSImode ? 0xffffff :
+                      mode == SImode ? 0xffffffff : 0);
+  if (max && op && CONST_INT_P (x))
+    {
+      if (unsigned_condition (op) != op)
+        max >>= 1;
+
+      if (max != (INTVAL (x) & max)
+          && INTVAL (x) != 0xff)
+        return 1;
+    }
+  return 0;
+}
+
+
+/* Worker function for `FUNCTION_ARG_REGNO_P'.  */
+/* Returns nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  */
+
+int
+avr_function_arg_regno_p(int r)
+{
+  return (r >= 8 && r <= 25);
+}
+
+
+/* Worker function for `INIT_CUMULATIVE_ARGS'.  */
+/* Initializing the variable cum for the state at the beginning
+   of the argument list.  */
+
+void
+avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname,
+                          tree fndecl ATTRIBUTE_UNUSED)
+{
+  cum->nregs = 18;
+  cum->regno = FIRST_CUM_REG;
+  if (!libname && stdarg_p (fntype))
+    cum->nregs = 0;
+
+  /* Assume the calle may be tail called */
+
+  cfun->machine->sibcall_fails = 0;
+}
+
+/* Returns the number of registers to allocate for a function argument.  */
+
+static int
+avr_num_arg_regs (enum machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  /* Align all function arguments to start in even-numbered registers.
+     Odd-sized arguments leave holes above them.  */
+
+  return (size + 1) & ~1;
+}
+
+
+/* Implement `TARGET_FUNCTION_ARG'.  */
+/* Controls whether a function argument is passed
+   in a register, and which register.  */
+
+static rtx
+avr_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+                  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int bytes = avr_num_arg_regs (mode, type);
+
+  if (cum->nregs && bytes <= cum->nregs)
+    return gen_rtx_REG (mode, cum->regno - bytes);
+
+  return NULL_RTX;
+}
+
+
+/* Implement `TARGET_FUNCTION_ARG_ADVANCE'.  */
+/* Update the summarizer variable CUM to advance past an argument
+   in the argument list.  */
+
+static void
+avr_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+                          const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int bytes = avr_num_arg_regs (mode, type);
+
+  cum->nregs -= bytes;
+  cum->regno -= bytes;
+
+  /* A parameter is being passed in a call-saved register.  As the original
+     contents of these regs has to be restored before leaving the function,
+     a function must not pass arguments in call-saved regs in order to get
+     tail-called.  */
+
+  if (cum->regno >= 8
+      && cum->nregs >= 0
+      && !call_used_regs[cum->regno])
+    {
+      /* FIXME: We ship info on failing tail-call in struct machine_function.
+         This uses internals of calls.c:expand_call() and the way args_so_far
+         is used.  targetm.function_ok_for_sibcall() needs to be extended to
+         pass &args_so_far, too.  At present, CUMULATIVE_ARGS is target
+         dependent so that such an extension is not wanted.  */
+
+      cfun->machine->sibcall_fails = 1;
+    }
+
+  /* Test if all registers needed by the ABI are actually available.  If the
+     user has fixed a GPR needed to pass an argument, an (implicit) function
+     call will clobber that fixed register.  See PR45099 for an example.  */
+
+  if (cum->regno >= 8
+      && cum->nregs >= 0)
+    {
+      int regno;
+
+      for (regno = cum->regno; regno < cum->regno + bytes; regno++)
+        if (fixed_regs[regno])
+          warning (0, "fixed register %s used to pass parameter to function",
+                   reg_names[regno]);
+    }
+
+  if (cum->nregs <= 0)
+    {
+      cum->nregs = 0;
+      cum->regno = FIRST_CUM_REG;
+    }
+}
+
+/* Implement `TARGET_FUNCTION_OK_FOR_SIBCALL' */
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+
+static bool
+avr_function_ok_for_sibcall (tree decl_callee, tree exp_callee)
+{
+  tree fntype_callee;
+
+  /* Tail-calling must fail if callee-saved regs are used to pass
+     function args.  We must not tail-call when `epilogue_restores'
+     is used.  Unfortunately, we cannot tell at this point if that
+     actually will happen or not, and we cannot step back from
+     tail-calling.  Thus, we inhibit tail-calling with -mcall-prologues.  */
+
+  if (cfun->machine->sibcall_fails
+      || TARGET_CALL_PROLOGUES)
+    {
+      return false;
+    }
+
+  fntype_callee = TREE_TYPE (CALL_EXPR_FN (exp_callee));
+
+  if (decl_callee)
+    {
+      decl_callee = TREE_TYPE (decl_callee);
+    }
+  else
+    {
+      decl_callee = fntype_callee;
+
+      while (FUNCTION_TYPE != TREE_CODE (decl_callee)
+             && METHOD_TYPE != TREE_CODE (decl_callee))
+        {
+          decl_callee = TREE_TYPE (decl_callee);
+        }
+    }
+
+  /* Ensure that caller and callee have compatible epilogues */
+
+  if (cfun->machine->is_interrupt
+      || cfun->machine->is_signal
+      || cfun->machine->is_naked
+      || avr_naked_function_p (decl_callee)
+      /* FIXME: For OS_task and OS_main, this might be over-conservative.  */
+      || (avr_OS_task_function_p (decl_callee)
+          != cfun->machine->is_OS_task)
+      || (avr_OS_main_function_p (decl_callee)
+          != cfun->machine->is_OS_main))
+    {
+      return false;
+    }
+
+  return true;
+}
+
+/***********************************************************************
+  Functions for outputting various mov's for a various modes
+************************************************************************/
+
+/* Return true if a value of mode MODE is read from flash by
+   __load_* function from libgcc.  */
+
+bool
+avr_load_libgcc_p (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+  int n_bytes = GET_MODE_SIZE (mode);
+
+  return (n_bytes > 2
+          && !AVR_HAVE_LPMX
+          && avr_mem_flash_p (op));
+}
+
+/* Return true if a value of mode MODE is read by __xload_* function.  */
+
+bool
+avr_xload_libgcc_p (enum machine_mode mode)
+{
+  int n_bytes = GET_MODE_SIZE (mode);
+
+  return (n_bytes > 1
+          || avr_current_device->n_flash > 1);
+}
+
+
+/* Fixme: This is a hack because secondary reloads don't works as expected.
+
+   Find an unused d-register to be used as scratch in INSN.
+   EXCLUDE is either NULL_RTX or some register. In the case where EXCLUDE
+   is a register, skip all possible return values that overlap EXCLUDE.
+   The policy for the returned register is similar to that of
+   `reg_unused_after', i.e. the returned register may overlap the SET_DEST
+   of INSN.
+
+   Return a QImode d-register or NULL_RTX if nothing found.  */
+
+static rtx
+avr_find_unused_d_reg (rtx insn, rtx exclude)
+{
+  int regno;
+  bool isr_p = (avr_interrupt_function_p (current_function_decl)
+                || avr_signal_function_p (current_function_decl));
+
+  for (regno = 16; regno < 32; regno++)
+    {
+      rtx reg = all_regs_rtx[regno];
+
+      if ((exclude
+           && reg_overlap_mentioned_p (exclude, reg))
+          || fixed_regs[regno])
+        {
+          continue;
+        }
+
+      /* Try non-live register */
+
+      if (!df_regs_ever_live_p (regno)
+          && (TREE_THIS_VOLATILE (current_function_decl)
+              || cfun->machine->is_OS_task
+              || cfun->machine->is_OS_main
+              || (!isr_p && call_used_regs[regno])))
+        {
+          return reg;
+        }
+
+      /* Any live register can be used if it is unused after.
+         Prologue/epilogue will care for it as needed.  */
+
+      if (df_regs_ever_live_p (regno)
+          && reg_unused_after (insn, reg))
+        {
+          return reg;
+        }
+    }
+
+  return NULL_RTX;
+}
+
+
+/* Helper function for the next function in the case where only restricted
+   version of LPM instruction is available.  */
+
+static const char*
+avr_out_lpm_no_lpmx (rtx insn, rtx *xop, int *plen)
+{
+  rtx dest = xop[0];
+  rtx addr = xop[1];
+  int n_bytes = GET_MODE_SIZE (GET_MODE (dest));
+  int regno_dest;
+
+  regno_dest = REGNO (dest);
+
+  /* The implicit target register of LPM.  */
+  xop[3] = lpm_reg_rtx;
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      gcc_unreachable();
+
+    case REG:
+
+      gcc_assert (REG_Z == REGNO (addr));
+
+      switch (n_bytes)
+        {
+        default:
+          gcc_unreachable();
+
+        case 1:
+          avr_asm_len ("%4lpm", xop, plen, 1);
+
+          if (regno_dest != LPM_REGNO)
+            avr_asm_len ("mov %0,%3", xop, plen, 1);
+
+          return "";
+
+        case 2:
+          if (REGNO (dest) == REG_Z)
+            return avr_asm_len ("%4lpm"      CR_TAB
+                                "push %3"    CR_TAB
+                                "adiw %2,1"  CR_TAB
+                                "%4lpm"      CR_TAB
+                                "mov %B0,%3" CR_TAB
+                                "pop %A0", xop, plen, 6);
+
+          avr_asm_len ("%4lpm"      CR_TAB
+                       "mov %A0,%3" CR_TAB
+                       "adiw %2,1"  CR_TAB
+                       "%4lpm"      CR_TAB
+                       "mov %B0,%3", xop, plen, 5);
+
+          if (!reg_unused_after (insn, addr))
+            avr_asm_len ("sbiw %2,1", xop, plen, 1);
+
+          break; /* 2 */
+        }
+
+      break; /* REG */
+
+    case POST_INC:
+
+      gcc_assert (REG_Z == REGNO (XEXP (addr, 0))
+                  && n_bytes <= 4);
+
+      if (regno_dest == LPM_REGNO)
+        avr_asm_len ("%4lpm"      CR_TAB
+                     "adiw %2,1", xop, plen, 2);
+      else
+        avr_asm_len ("%4lpm"      CR_TAB
+                     "mov %A0,%3" CR_TAB
+                     "adiw %2,1", xop, plen, 3);
+
+      if (n_bytes >= 2)
+        avr_asm_len ("%4lpm"      CR_TAB
+                     "mov %B0,%3" CR_TAB
+                     "adiw %2,1", xop, plen, 3);
+
+      if (n_bytes >= 3)
+        avr_asm_len ("%4lpm"      CR_TAB
+                     "mov %C0,%3" CR_TAB
+                     "adiw %2,1", xop, plen, 3);
+
+      if (n_bytes >= 4)
+        avr_asm_len ("%4lpm"      CR_TAB
+                     "mov %D0,%3" CR_TAB
+                     "adiw %2,1", xop, plen, 3);
+
+      break; /* POST_INC */
+
+    } /* switch CODE (addr) */
+
+  return "";
+}
+
+
+/* If PLEN == NULL: Ouput instructions to load a value from a memory location
+   OP[1] in AS1 to register OP[0].
+   If PLEN != 0 set *PLEN to the length in words of the instruction sequence.
+   Return "".  */
+
+const char*
+avr_out_lpm (rtx insn, rtx *op, int *plen)
+{
+  rtx xop[7];
+  rtx dest = op[0];
+  rtx src = SET_SRC (single_set (insn));
+  rtx addr;
+  int n_bytes = GET_MODE_SIZE (GET_MODE (dest));
+  int segment;
+  RTX_CODE code;
+  addr_space_t as = MEM_ADDR_SPACE (src);
+
+  if (plen)
+    *plen = 0;
+
+  if (MEM_P (dest))
+    {
+      warning (0, "writing to address space %qs not supported",
+               avr_addrspace[MEM_ADDR_SPACE (dest)].name);
+
+      return "";
+    }
+
+  addr = XEXP (src, 0);
+  code = GET_CODE (addr);
+
+  gcc_assert (REG_P (dest));
+  gcc_assert (REG == code || POST_INC == code);
+
+  xop[0] = dest;
+  xop[1] = addr;
+  xop[2] = lpm_addr_reg_rtx;
+  xop[4] = xstring_empty;
+  xop[5] = tmp_reg_rtx;
+  xop[6] = XEXP (rampz_rtx, 0);
+
+  segment = avr_addrspace[as].segment;
+
+  /* Set RAMPZ as needed.  */
+
+  if (segment)
+    {
+      xop[4] = GEN_INT (segment);
+      xop[3] = avr_find_unused_d_reg (insn, lpm_addr_reg_rtx);
+
+      if (xop[3] != NULL_RTX)
+        {
+          avr_asm_len ("ldi %3,%4" CR_TAB
+                       "out %i6,%3", xop, plen, 2);
+        }
+      else if (segment == 1)
+        {
+          avr_asm_len ("clr %5" CR_TAB
+                       "inc %5" CR_TAB
+                       "out %i6,%5", xop, plen, 3);
+        }
+      else
+        {
+          avr_asm_len ("mov %5,%2"         CR_TAB
+                       "ldi %2,%4"         CR_TAB
+                       "out %i6,%2"  CR_TAB
+                       "mov %2,%5", xop, plen, 4);
+        }
+
+      xop[4] = xstring_e;
+
+      if (!AVR_HAVE_ELPMX)
+        return avr_out_lpm_no_lpmx (insn, xop, plen);
+    }
+  else if (!AVR_HAVE_LPMX)
+    {
+      return avr_out_lpm_no_lpmx (insn, xop, plen);
+    }
+
+  /* We have [E]LPMX: Output reading from Flash the comfortable way.  */
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      gcc_unreachable();
+
+    case REG:
+
+      gcc_assert (REG_Z == REGNO (addr));
+
+      switch (n_bytes)
+        {
+        default:
+          gcc_unreachable();
+
+        case 1:
+          return avr_asm_len ("%4lpm %0,%a2", xop, plen, 1);
+
+        case 2:
+          if (REGNO (dest) == REG_Z)
+            return avr_asm_len ("%4lpm %5,%a2+" CR_TAB
+                                "%4lpm %B0,%a2" CR_TAB
+                                "mov %A0,%5", xop, plen, 3);
+          else
+            {
+              avr_asm_len ("%4lpm %A0,%a2+" CR_TAB
+                           "%4lpm %B0,%a2", xop, plen, 2);
+
+              if (!reg_unused_after (insn, addr))
+                avr_asm_len ("sbiw %2,1", xop, plen, 1);
+            }
+
+          break; /* 2 */
+
+        case 3:
+
+          avr_asm_len ("%4lpm %A0,%a2+" CR_TAB
+                       "%4lpm %B0,%a2+" CR_TAB
+                       "%4lpm %C0,%a2", xop, plen, 3);
+
+          if (!reg_unused_after (insn, addr))
+            avr_asm_len ("sbiw %2,2", xop, plen, 1);
+
+          break; /* 3 */
+
+        case 4:
+
+          avr_asm_len ("%4lpm %A0,%a2+" CR_TAB
+                       "%4lpm %B0,%a2+", xop, plen, 2);
+
+          if (REGNO (dest) == REG_Z - 2)
+            return avr_asm_len ("%4lpm %5,%a2+" CR_TAB
+                                "%4lpm %C0,%a2"          CR_TAB
+                                "mov %D0,%5", xop, plen, 3);
+          else
+            {
+              avr_asm_len ("%4lpm %C0,%a2+" CR_TAB
+                           "%4lpm %D0,%a2", xop, plen, 2);
+
+              if (!reg_unused_after (insn, addr))
+                avr_asm_len ("sbiw %2,3", xop, plen, 1);
+            }
+
+          break; /* 4 */
+        } /* n_bytes */
+
+      break; /* REG */
+
+    case POST_INC:
+
+      gcc_assert (REG_Z == REGNO (XEXP (addr, 0))
+                  && n_bytes <= 4);
+
+      avr_asm_len                    ("%4lpm %A0,%a2+", xop, plen, 1);
+      if (n_bytes >= 2)  avr_asm_len ("%4lpm %B0,%a2+", xop, plen, 1);
+      if (n_bytes >= 3)  avr_asm_len ("%4lpm %C0,%a2+", xop, plen, 1);
+      if (n_bytes >= 4)  avr_asm_len ("%4lpm %D0,%a2+", xop, plen, 1);
+
+      break; /* POST_INC */
+
+    } /* switch CODE (addr) */
+
+  if (xop[4] == xstring_e && AVR_HAVE_RAMPD)
+    {
+      /* Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM.  */
+
+      xop[0] = zero_reg_rtx;
+      avr_asm_len ("out %i6,%0", xop, plen, 1);
+    }
+
+  return "";
+}
+
+
+/* Worker function for xload_8 insn.  */
+
+const char*
+avr_out_xload (rtx insn ATTRIBUTE_UNUSED, rtx *op, int *plen)
+{
+  rtx xop[4];
+
+  xop[0] = op[0];
+  xop[1] = op[1];
+  xop[2] = lpm_addr_reg_rtx;
+  xop[3] = AVR_HAVE_LPMX ? op[0] : lpm_reg_rtx;
+
+  avr_asm_len (AVR_HAVE_LPMX ? "lpm %3,%a2" : "lpm", xop, plen, -1);
+
+  avr_asm_len ("sbrc %1,7" CR_TAB
+               "ld %3,%a2", xop, plen, 2);
+
+  if (REGNO (xop[0]) != REGNO (xop[3]))
+    avr_asm_len ("mov %0,%3", xop, plen, 1);
+
+  return "";
+}
+
+
+const char*
+output_movqi (rtx insn, rtx operands[], int *plen)
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+
+  if (avr_mem_flash_p (src)
+      || avr_mem_flash_p (dest))
+    {
+      return avr_out_lpm (insn, operands, plen);
+    }
+
+  gcc_assert (1 == GET_MODE_SIZE (GET_MODE (dest)));
+
+  if (REG_P (dest))
+    {
+      if (REG_P (src)) /* mov r,r */
+        {
+          if (test_hard_reg_class (STACK_REG, dest))
+            return avr_asm_len ("out %0,%1", operands, plen, -1);
+          else if (test_hard_reg_class (STACK_REG, src))
+            return avr_asm_len ("in %0,%1", operands, plen, -1);
+
+          return avr_asm_len ("mov %0,%1", operands, plen, -1);
+        }
+      else if (CONSTANT_P (src))
+        {
+          output_reload_in_const (operands, NULL_RTX, plen, false);
+          return "";
+        }
+      else if (MEM_P (src))
+        return out_movqi_r_mr (insn, operands, plen); /* mov r,m */
+    }
+  else if (MEM_P (dest))
+    {
+      rtx xop[2];
+
+      xop[0] = dest;
+      xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src;
+
+      return out_movqi_mr_r (insn, xop, plen);
+    }
+
+  return "";
+}
+
+
+const char *
+output_movhi (rtx insn, rtx xop[], int *plen)
+{
+  rtx dest = xop[0];
+  rtx src = xop[1];
+
+  gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2);
+
+  if (avr_mem_flash_p (src)
+      || avr_mem_flash_p (dest))
+    {
+      return avr_out_lpm (insn, xop, plen);
+    }
+
+  gcc_assert (2 == GET_MODE_SIZE (GET_MODE (dest)));
+
+  if (REG_P (dest))
+    {
+      if (REG_P (src)) /* mov r,r */
+        {
+          if (test_hard_reg_class (STACK_REG, dest))
+            {
+              if (AVR_HAVE_8BIT_SP)
+                return avr_asm_len ("out __SP_L__,%A1", xop, plen, -1);
+
+              if (AVR_XMEGA)
+                return avr_asm_len ("out __SP_L__,%A1" CR_TAB
+                                    "out __SP_H__,%B1", xop, plen, -2);
+
+              /* Use simple load of SP if no interrupts are  used.  */
+
+              return TARGET_NO_INTERRUPTS
+                ? avr_asm_len ("out __SP_H__,%B1" CR_TAB
+                               "out __SP_L__,%A1", xop, plen, -2)
+                : avr_asm_len ("in __tmp_reg__,__SREG__"  CR_TAB
+                               "cli"                      CR_TAB
+                               "out __SP_H__,%B1"         CR_TAB
+                               "out __SREG__,__tmp_reg__" CR_TAB
+                               "out __SP_L__,%A1", xop, plen, -5);
+            }
+          else if (test_hard_reg_class (STACK_REG, src))
+            {
+              return !AVR_HAVE_SPH
+                ? avr_asm_len ("in %A0,__SP_L__" CR_TAB
+                               "clr %B0", xop, plen, -2)
+
+                : avr_asm_len ("in %A0,__SP_L__" CR_TAB
+                               "in %B0,__SP_H__", xop, plen, -2);
+            }
+
+          return AVR_HAVE_MOVW
+            ? avr_asm_len ("movw %0,%1", xop, plen, -1)
+
+            : avr_asm_len ("mov %A0,%A1" CR_TAB
+                           "mov %B0,%B1", xop, plen, -2);
+        } /* REG_P (src) */
+      else if (CONSTANT_P (src))
+        {
+          return output_reload_inhi (xop, NULL, plen);
+        }
+      else if (MEM_P (src))
+        {
+          return out_movhi_r_mr (insn, xop, plen); /* mov r,m */
+        }
+    }
+  else if (MEM_P (dest))
+    {
+      rtx xop[2];
+
+      xop[0] = dest;
+      xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src;
+
+      return out_movhi_mr_r (insn, xop, plen);
+    }
+
+  fatal_insn ("invalid insn:", insn);
+
+  return "";
+}
+
+static const char*
+out_movqi_r_mr (rtx insn, rtx op[], int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx x = XEXP (src, 0);
+
+  if (CONSTANT_ADDRESS_P (x))
+    {
+      return optimize > 0 && io_address_operand (x, QImode)
+        ? avr_asm_len ("in %0,%i1", op, plen, -1)
+        : avr_asm_len ("lds %0,%m1", op, plen, -2);
+    }
+  else if (GET_CODE (x) == PLUS
+           && REG_P (XEXP (x, 0))
+           && CONST_INT_P (XEXP (x, 1)))
+    {
+      /* memory access by reg+disp */
+
+      int disp = INTVAL (XEXP (x, 1));
+
+      if (disp - GET_MODE_SIZE (GET_MODE (src)) >= 63)
+        {
+          if (REGNO (XEXP (x, 0)) != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
+            return avr_asm_len ("adiw r28,%o1-63" CR_TAB
+                                "ldd %0,Y+63"     CR_TAB
+                                "sbiw r28,%o1-63", op, plen, -3);
+
+          return avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
+                              "sbci r29,hi8(-%o1)" CR_TAB
+                              "ld %0,Y"            CR_TAB
+                              "subi r28,lo8(%o1)"  CR_TAB
+                              "sbci r29,hi8(%o1)", op, plen, -5);
+        }
+      else if (REGNO (XEXP (x, 0)) == REG_X)
+        {
+          /* This is a paranoid case LEGITIMIZE_RELOAD_ADDRESS must exclude
+             it but I have this situation with extremal optimizing options.  */
+
+          avr_asm_len ("adiw r26,%o1" CR_TAB
+                       "ld %0,X", op, plen, -2);
+
+          if (!reg_overlap_mentioned_p (dest, XEXP (x,0))
+              && !reg_unused_after (insn, XEXP (x,0)))
+            {
+              avr_asm_len ("sbiw r26,%o1", op, plen, 1);
+            }
+
+          return "";
+        }
+
+      return avr_asm_len ("ldd %0,%1", op, plen, -1);
+    }
+
+  return avr_asm_len ("ld %0,%1", op, plen, -1);
+}
+
+static const char*
+out_movhi_r_mr (rtx insn, rtx op[], int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (src, 0);
+  int reg_dest = true_regnum (dest);
+  int reg_base = true_regnum (base);
+  /* "volatile" forces reading low byte first, even if less efficient,
+     for correct operation with 16-bit I/O registers.  */
+  int mem_volatile_p = MEM_VOLATILE_P (src);
+
+  if (reg_base > 0)
+    {
+      if (reg_dest == reg_base)         /* R = (R) */
+        return avr_asm_len ("ld __tmp_reg__,%1+" CR_TAB
+                            "ld %B0,%1"          CR_TAB
+                            "mov %A0,__tmp_reg__", op, plen, -3);
+
+      if (reg_base != REG_X)
+        return avr_asm_len ("ld %A0,%1" CR_TAB
+                            "ldd %B0,%1+1", op, plen, -2);
+
+      avr_asm_len ("ld %A0,X+" CR_TAB
+                   "ld %B0,X", op, plen, -2);
+
+      if (!reg_unused_after (insn, base))
+        avr_asm_len ("sbiw r26,1", op, plen, 1);
+
+      return "";
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      int reg_base = true_regnum (XEXP (base, 0));
+
+      if (disp > MAX_LD_OFFSET (GET_MODE (src)))
+        {
+          if (REGNO (XEXP (base, 0)) != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          return disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))
+            ? avr_asm_len ("adiw r28,%o1-62" CR_TAB
+                           "ldd %A0,Y+62"    CR_TAB
+                           "ldd %B0,Y+63"    CR_TAB
+                           "sbiw r28,%o1-62", op, plen, -4)
+
+            : avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
+                           "sbci r29,hi8(-%o1)" CR_TAB
+                           "ld %A0,Y"           CR_TAB
+                           "ldd %B0,Y+1"        CR_TAB
+                           "subi r28,lo8(%o1)"  CR_TAB
+                           "sbci r29,hi8(%o1)", op, plen, -6);
+        }
+
+      /* This is a paranoid case. LEGITIMIZE_RELOAD_ADDRESS must exclude
+         it but I have this situation with extremal
+         optimization options.  */
+
+      if (reg_base == REG_X)
+        return reg_base == reg_dest
+          ? avr_asm_len ("adiw r26,%o1"      CR_TAB
+                         "ld __tmp_reg__,X+" CR_TAB
+                         "ld %B0,X"          CR_TAB
+                         "mov %A0,__tmp_reg__", op, plen, -4)
+
+          : avr_asm_len ("adiw r26,%o1" CR_TAB
+                         "ld %A0,X+"    CR_TAB
+                         "ld %B0,X"     CR_TAB
+                         "sbiw r26,%o1+1", op, plen, -4);
+
+      return reg_base == reg_dest
+        ? avr_asm_len ("ldd __tmp_reg__,%A1" CR_TAB
+                       "ldd %B0,%B1"         CR_TAB
+                       "mov %A0,__tmp_reg__", op, plen, -3)
+
+        : avr_asm_len ("ldd %A0,%A1" CR_TAB
+                       "ldd %B0,%B1", op, plen, -2);
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    {
+      if (reg_overlap_mentioned_p (dest, XEXP (base, 0)))
+        fatal_insn ("incorrect insn:", insn);
+
+      if (!mem_volatile_p)
+        return avr_asm_len ("ld %B0,%1" CR_TAB
+                            "ld %A0,%1", op, plen, -2);
+
+      return REGNO (XEXP (base, 0)) == REG_X
+        ? avr_asm_len ("sbiw r26,2"  CR_TAB
+                       "ld %A0,X+"   CR_TAB
+                       "ld %B0,X"    CR_TAB
+                       "sbiw r26,1", op, plen, -4)
+
+        : avr_asm_len ("sbiw %r1,2"  CR_TAB
+                       "ld %A0,%p1"  CR_TAB
+                       "ldd %B0,%p1+1", op, plen, -3);
+    }
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    {
+      if (reg_overlap_mentioned_p (dest, XEXP (base, 0)))
+        fatal_insn ("incorrect insn:", insn);
+
+      return avr_asm_len ("ld %A0,%1"  CR_TAB
+                          "ld %B0,%1", op, plen, -2);
+    }
+  else if (CONSTANT_ADDRESS_P (base))
+    {
+      return optimize > 0 && io_address_operand (base, HImode)
+        ? avr_asm_len ("in %A0,%i1" CR_TAB
+                       "in %B0,%i1+1", op, plen, -2)
+
+        : avr_asm_len ("lds %A0,%m1" CR_TAB
+                       "lds %B0,%m1+1", op, plen, -4);
+    }
+
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+static const char*
+out_movsi_r_mr (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (src, 0);
+  int reg_dest = true_regnum (dest);
+  int reg_base = true_regnum (base);
+  int tmp;
+
+  if (!l)
+    l = &tmp;
+
+  if (reg_base > 0)
+    {
+      if (reg_base == REG_X)        /* (R26) */
+        {
+          if (reg_dest == REG_X)
+	    /* "ld r26,-X" is undefined */
+	    return *l=7, ("adiw r26,3"        CR_TAB
+			  "ld r29,X"          CR_TAB
+			  "ld r28,-X"         CR_TAB
+			  "ld __tmp_reg__,-X" CR_TAB
+			  "sbiw r26,1"        CR_TAB
+			  "ld r26,X"          CR_TAB
+			  "mov r27,__tmp_reg__");
+          else if (reg_dest == REG_X - 2)
+            return *l=5, ("ld %A0,X+"          CR_TAB
+                          "ld %B0,X+"          CR_TAB
+                          "ld __tmp_reg__,X+"  CR_TAB
+                          "ld %D0,X"           CR_TAB
+                          "mov %C0,__tmp_reg__");
+          else if (reg_unused_after (insn, base))
+            return  *l=4, ("ld %A0,X+"  CR_TAB
+                           "ld %B0,X+" CR_TAB
+                           "ld %C0,X+" CR_TAB
+                           "ld %D0,X");
+          else
+            return  *l=5, ("ld %A0,X+"  CR_TAB
+                           "ld %B0,X+" CR_TAB
+                           "ld %C0,X+" CR_TAB
+                           "ld %D0,X"  CR_TAB
+                           "sbiw r26,3");
+        }
+      else
+        {
+          if (reg_dest == reg_base)
+            return *l=5, ("ldd %D0,%1+3" CR_TAB
+                          "ldd %C0,%1+2" CR_TAB
+                          "ldd __tmp_reg__,%1+1"  CR_TAB
+                          "ld %A0,%1"  CR_TAB
+                          "mov %B0,__tmp_reg__");
+          else if (reg_base == reg_dest + 2)
+            return *l=5, ("ld %A0,%1"             CR_TAB
+                          "ldd %B0,%1+1"          CR_TAB
+                          "ldd __tmp_reg__,%1+2"  CR_TAB
+                          "ldd %D0,%1+3"          CR_TAB
+                          "mov %C0,__tmp_reg__");
+          else
+            return *l=4, ("ld %A0,%1"    CR_TAB
+                          "ldd %B0,%1+1" CR_TAB
+                          "ldd %C0,%1+2" CR_TAB
+                          "ldd %D0,%1+3");
+        }
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+
+      if (disp > MAX_LD_OFFSET (GET_MODE (src)))
+	{
+	  if (REGNO (XEXP (base, 0)) != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
+	    return *l = 6, ("adiw r28,%o1-60" CR_TAB
+			    "ldd %A0,Y+60"    CR_TAB
+			    "ldd %B0,Y+61"    CR_TAB
+			    "ldd %C0,Y+62"    CR_TAB
+			    "ldd %D0,Y+63"    CR_TAB
+			    "sbiw r28,%o1-60");
+
+	  return *l = 8, ("subi r28,lo8(-%o1)" CR_TAB
+			  "sbci r29,hi8(-%o1)" CR_TAB
+			  "ld %A0,Y"           CR_TAB
+			  "ldd %B0,Y+1"        CR_TAB
+			  "ldd %C0,Y+2"        CR_TAB
+			  "ldd %D0,Y+3"        CR_TAB
+			  "subi r28,lo8(%o1)"  CR_TAB
+			  "sbci r29,hi8(%o1)");
+	}
+
+      reg_base = true_regnum (XEXP (base, 0));
+      if (reg_base == REG_X)
+	{
+	  /* R = (X + d) */
+	  if (reg_dest == REG_X)
+	    {
+	      *l = 7;
+	      /* "ld r26,-X" is undefined */
+	      return ("adiw r26,%o1+3"    CR_TAB
+		      "ld r29,X"          CR_TAB
+		      "ld r28,-X"         CR_TAB
+		      "ld __tmp_reg__,-X" CR_TAB
+		      "sbiw r26,1"        CR_TAB
+		      "ld r26,X"          CR_TAB
+		      "mov r27,__tmp_reg__");
+	    }
+	  *l = 6;
+	  if (reg_dest == REG_X - 2)
+	    return ("adiw r26,%o1"      CR_TAB
+		    "ld r24,X+"         CR_TAB
+		    "ld r25,X+"         CR_TAB
+		    "ld __tmp_reg__,X+" CR_TAB
+		    "ld r27,X"          CR_TAB
+		    "mov r26,__tmp_reg__");
+
+	  return ("adiw r26,%o1" CR_TAB
+		  "ld %A0,X+"    CR_TAB
+		  "ld %B0,X+"    CR_TAB
+		  "ld %C0,X+"    CR_TAB
+		  "ld %D0,X"     CR_TAB
+		  "sbiw r26,%o1+3");
+	}
+      if (reg_dest == reg_base)
+        return *l=5, ("ldd %D0,%D1"          CR_TAB
+                      "ldd %C0,%C1"          CR_TAB
+                      "ldd __tmp_reg__,%B1"  CR_TAB
+                      "ldd %A0,%A1"          CR_TAB
+                      "mov %B0,__tmp_reg__");
+      else if (reg_dest == reg_base - 2)
+        return *l=5, ("ldd %A0,%A1"          CR_TAB
+                      "ldd %B0,%B1"          CR_TAB
+                      "ldd __tmp_reg__,%C1"  CR_TAB
+                      "ldd %D0,%D1"          CR_TAB
+                      "mov %C0,__tmp_reg__");
+      return *l=4, ("ldd %A0,%A1" CR_TAB
+                    "ldd %B0,%B1" CR_TAB
+                    "ldd %C0,%C1" CR_TAB
+                    "ldd %D0,%D1");
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return *l=4, ("ld %D0,%1" CR_TAB
+		  "ld %C0,%1" CR_TAB
+		  "ld %B0,%1" CR_TAB
+		  "ld %A0,%1");
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    return *l=4, ("ld %A0,%1" CR_TAB
+		  "ld %B0,%1" CR_TAB
+		  "ld %C0,%1" CR_TAB
+		  "ld %D0,%1");
+  else if (CONSTANT_ADDRESS_P (base))
+    return *l=8, ("lds %A0,%m1"   CR_TAB
+                  "lds %B0,%m1+1" CR_TAB
+                  "lds %C0,%m1+2" CR_TAB
+                  "lds %D0,%m1+3");
+
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+static const char*
+out_movsi_mr_r (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (dest, 0);
+  int reg_base = true_regnum (base);
+  int reg_src = true_regnum (src);
+  int tmp;
+
+  if (!l)
+    l = &tmp;
+
+  if (CONSTANT_ADDRESS_P (base))
+    return *l=8,("sts %m0,%A1" CR_TAB
+                 "sts %m0+1,%B1" CR_TAB
+                 "sts %m0+2,%C1" CR_TAB
+                 "sts %m0+3,%D1");
+  if (reg_base > 0)                 /* (r) */
+    {
+      if (reg_base == REG_X)                /* (R26) */
+        {
+          if (reg_src == REG_X)
+            {
+	      /* "st X+,r26" is undefined */
+              if (reg_unused_after (insn, base))
+		return *l=6, ("mov __tmp_reg__,r27" CR_TAB
+			      "st X,r26"            CR_TAB
+			      "adiw r26,1"          CR_TAB
+			      "st X+,__tmp_reg__"   CR_TAB
+			      "st X+,r28"           CR_TAB
+			      "st X,r29");
+              else
+                return *l=7, ("mov __tmp_reg__,r27" CR_TAB
+			      "st X,r26"            CR_TAB
+			      "adiw r26,1"          CR_TAB
+			      "st X+,__tmp_reg__"   CR_TAB
+			      "st X+,r28"           CR_TAB
+			      "st X,r29"            CR_TAB
+			      "sbiw r26,3");
+            }
+          else if (reg_base == reg_src + 2)
+            {
+              if (reg_unused_after (insn, base))
+                return *l=7, ("mov __zero_reg__,%C1" CR_TAB
+                              "mov __tmp_reg__,%D1"  CR_TAB
+                              "st %0+,%A1"           CR_TAB
+                              "st %0+,%B1"           CR_TAB
+                              "st %0+,__zero_reg__"  CR_TAB
+                              "st %0,__tmp_reg__"    CR_TAB
+                              "clr __zero_reg__");
+              else
+                return *l=8, ("mov __zero_reg__,%C1" CR_TAB
+                              "mov __tmp_reg__,%D1"  CR_TAB
+                              "st %0+,%A1"           CR_TAB
+                              "st %0+,%B1"           CR_TAB
+                              "st %0+,__zero_reg__"  CR_TAB
+                              "st %0,__tmp_reg__"    CR_TAB
+                              "clr __zero_reg__"     CR_TAB
+                              "sbiw r26,3");
+            }
+          return *l=5, ("st %0+,%A1" CR_TAB
+                        "st %0+,%B1" CR_TAB
+                        "st %0+,%C1" CR_TAB
+                        "st %0,%D1"  CR_TAB
+                        "sbiw r26,3");
+        }
+      else
+        return *l=4, ("st %0,%A1"    CR_TAB
+		      "std %0+1,%B1" CR_TAB
+		      "std %0+2,%C1" CR_TAB
+		      "std %0+3,%D1");
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      reg_base = REGNO (XEXP (base, 0));
+      if (disp > MAX_LD_OFFSET (GET_MODE (dest)))
+	{
+	  if (reg_base != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)))
+	    return *l = 6, ("adiw r28,%o0-60" CR_TAB
+			    "std Y+60,%A1"    CR_TAB
+			    "std Y+61,%B1"    CR_TAB
+			    "std Y+62,%C1"    CR_TAB
+			    "std Y+63,%D1"    CR_TAB
+			    "sbiw r28,%o0-60");
+
+	  return *l = 8, ("subi r28,lo8(-%o0)" CR_TAB
+			  "sbci r29,hi8(-%o0)" CR_TAB
+			  "st Y,%A1"           CR_TAB
+			  "std Y+1,%B1"        CR_TAB
+			  "std Y+2,%C1"        CR_TAB
+			  "std Y+3,%D1"        CR_TAB
+			  "subi r28,lo8(%o0)"  CR_TAB
+			  "sbci r29,hi8(%o0)");
+	}
+      if (reg_base == REG_X)
+	{
+	  /* (X + d) = R */
+	  if (reg_src == REG_X)
+	    {
+	      *l = 9;
+	      return ("mov __tmp_reg__,r26"  CR_TAB
+		      "mov __zero_reg__,r27" CR_TAB
+		      "adiw r26,%o0"         CR_TAB
+		      "st X+,__tmp_reg__"    CR_TAB
+		      "st X+,__zero_reg__"   CR_TAB
+		      "st X+,r28"            CR_TAB
+		      "st X,r29"             CR_TAB
+		      "clr __zero_reg__"     CR_TAB
+		      "sbiw r26,%o0+3");
+	    }
+	  else if (reg_src == REG_X - 2)
+	    {
+	      *l = 9;
+	      return ("mov __tmp_reg__,r26"  CR_TAB
+		      "mov __zero_reg__,r27" CR_TAB
+		      "adiw r26,%o0"         CR_TAB
+		      "st X+,r24"            CR_TAB
+		      "st X+,r25"            CR_TAB
+		      "st X+,__tmp_reg__"    CR_TAB
+		      "st X,__zero_reg__"    CR_TAB
+		      "clr __zero_reg__"     CR_TAB
+		      "sbiw r26,%o0+3");
+	    }
+	  *l = 6;
+	  return ("adiw r26,%o0" CR_TAB
+		  "st X+,%A1"    CR_TAB
+		  "st X+,%B1"    CR_TAB
+		  "st X+,%C1"    CR_TAB
+		  "st X,%D1"     CR_TAB
+		  "sbiw r26,%o0+3");
+	}
+      return *l=4, ("std %A0,%A1" CR_TAB
+		    "std %B0,%B1" CR_TAB
+		    "std %C0,%C1" CR_TAB
+		    "std %D0,%D1");
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return *l=4, ("st %0,%D1" CR_TAB
+		  "st %0,%C1" CR_TAB
+		  "st %0,%B1" CR_TAB
+		  "st %0,%A1");
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    return *l=4, ("st %0,%A1" CR_TAB
+		  "st %0,%B1" CR_TAB
+		  "st %0,%C1" CR_TAB
+		  "st %0,%D1");
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+const char *
+output_movsisf (rtx insn, rtx operands[], int *l)
+{
+  int dummy;
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  int *real_l = l;
+
+  if (avr_mem_flash_p (src)
+      || avr_mem_flash_p (dest))
+    {
+      return avr_out_lpm (insn, operands, real_l);
+    }
+
+  if (!l)
+    l = &dummy;
+
+  gcc_assert (4 == GET_MODE_SIZE (GET_MODE (dest)));
+  if (REG_P (dest))
+    {
+      if (REG_P (src)) /* mov r,r */
+	{
+	  if (true_regnum (dest) > true_regnum (src))
+	    {
+	      if (AVR_HAVE_MOVW)
+		{
+		  *l = 2;
+		  return ("movw %C0,%C1" CR_TAB
+			  "movw %A0,%A1");
+		}
+	      *l = 4;
+	      return ("mov %D0,%D1" CR_TAB
+		      "mov %C0,%C1" CR_TAB
+		      "mov %B0,%B1" CR_TAB
+		      "mov %A0,%A1");
+	    }
+	  else
+	    {
+	      if (AVR_HAVE_MOVW)
+		{
+		  *l = 2;
+		  return ("movw %A0,%A1" CR_TAB
+			  "movw %C0,%C1");
+		}
+	      *l = 4;
+	      return ("mov %A0,%A1" CR_TAB
+		      "mov %B0,%B1" CR_TAB
+		      "mov %C0,%C1" CR_TAB
+		      "mov %D0,%D1");
+	    }
+	}
+      else if (CONSTANT_P (src))
+	{
+          return output_reload_insisf (operands, NULL_RTX, real_l);
+        }
+      else if (MEM_P (src))
+	return out_movsi_r_mr (insn, operands, real_l); /* mov r,m */
+    }
+  else if (MEM_P (dest))
+    {
+      const char *templ;
+
+      if (src == CONST0_RTX (GET_MODE (dest)))
+	  operands[1] = zero_reg_rtx;
+
+      templ = out_movsi_mr_r (insn, operands, real_l);
+
+      if (!real_l)
+	output_asm_insn (templ, operands);
+
+      operands[1] = src;
+      return "";
+    }
+  fatal_insn ("invalid insn:", insn);
+  return "";
+}
+
+
+/* Handle loads of 24-bit types from memory to register.  */
+
+static const char*
+avr_out_load_psi (rtx insn, rtx *op, int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (src, 0);
+  int reg_dest = true_regnum (dest);
+  int reg_base = true_regnum (base);
+
+  if (reg_base > 0)
+    {
+      if (reg_base == REG_X)        /* (R26) */
+        {
+          if (reg_dest == REG_X)
+            /* "ld r26,-X" is undefined */
+            return avr_asm_len ("adiw r26,2"        CR_TAB
+                                "ld r28,X"          CR_TAB
+                                "ld __tmp_reg__,-X" CR_TAB
+                                "sbiw r26,1"        CR_TAB
+                                "ld r26,X"          CR_TAB
+                                "mov r27,__tmp_reg__", op, plen, -6);
+          else
+            {
+              avr_asm_len ("ld %A0,X+" CR_TAB
+                           "ld %B0,X+" CR_TAB
+                           "ld %C0,X", op, plen, -3);
+
+              if (reg_dest != REG_X - 2
+                  && !reg_unused_after (insn, base))
+                {
+                  avr_asm_len ("sbiw r26,2", op, plen, 1);
+                }
+
+              return "";
+            }
+        }
+      else /* reg_base != REG_X */
+        {
+          if (reg_dest == reg_base)
+            return avr_asm_len ("ldd %C0,%1+2"          CR_TAB
+                                "ldd __tmp_reg__,%1+1"  CR_TAB
+                                "ld  %A0,%1"            CR_TAB
+                                "mov %B0,__tmp_reg__", op, plen, -4);
+          else
+            return avr_asm_len ("ld  %A0,%1"    CR_TAB
+                                "ldd %B0,%1+1"  CR_TAB
+                                "ldd %C0,%1+2", op, plen, -3);
+        }
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+
+      if (disp > MAX_LD_OFFSET (GET_MODE (src)))
+        {
+          if (REGNO (XEXP (base, 0)) != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
+            return avr_asm_len ("adiw r28,%o1-61" CR_TAB
+                                "ldd %A0,Y+61"    CR_TAB
+                                "ldd %B0,Y+62"    CR_TAB
+                                "ldd %C0,Y+63"    CR_TAB
+                                "sbiw r28,%o1-61", op, plen, -5);
+
+          return avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
+                              "sbci r29,hi8(-%o1)" CR_TAB
+                              "ld  %A0,Y"           CR_TAB
+                              "ldd %B0,Y+1"        CR_TAB
+                              "ldd %C0,Y+2"        CR_TAB
+                              "subi r28,lo8(%o1)"  CR_TAB
+                              "sbci r29,hi8(%o1)", op, plen, -7);
+        }
+
+      reg_base = true_regnum (XEXP (base, 0));
+      if (reg_base == REG_X)
+        {
+          /* R = (X + d) */
+          if (reg_dest == REG_X)
+            {
+              /* "ld r26,-X" is undefined */
+              return avr_asm_len ("adiw r26,%o1+2"     CR_TAB
+                                  "ld  r28,X"          CR_TAB
+                                  "ld  __tmp_reg__,-X" CR_TAB
+                                  "sbiw r26,1"         CR_TAB
+                                  "ld  r26,X"          CR_TAB
+                                  "mov r27,__tmp_reg__", op, plen, -6);
+            }
+
+          avr_asm_len ("adiw r26,%o1" CR_TAB
+                       "ld %A0,X+"    CR_TAB
+                       "ld %B0,X+"    CR_TAB
+                       "ld %C0,X", op, plen, -4);
+
+          if (reg_dest != REG_W
+              && !reg_unused_after (insn, XEXP (base, 0)))
+            avr_asm_len ("sbiw r26,%o1+2", op, plen, 1);
+
+          return "";
+        }
+
+      if (reg_dest == reg_base)
+        return avr_asm_len ("ldd %C0,%C1" CR_TAB
+                            "ldd __tmp_reg__,%B1"  CR_TAB
+                            "ldd %A0,%A1" CR_TAB
+                            "mov %B0,__tmp_reg__", op, plen, -4);
+
+        return avr_asm_len ("ldd %A0,%A1" CR_TAB
+                            "ldd %B0,%B1" CR_TAB
+                            "ldd %C0,%C1", op, plen, -3);
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return avr_asm_len ("ld %C0,%1" CR_TAB
+                        "ld %B0,%1" CR_TAB
+                        "ld %A0,%1", op, plen, -3);
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    return avr_asm_len ("ld %A0,%1" CR_TAB
+                        "ld %B0,%1" CR_TAB
+                        "ld %C0,%1", op, plen, -3);
+
+  else if (CONSTANT_ADDRESS_P (base))
+    return avr_asm_len ("lds %A0,%m1" CR_TAB
+                        "lds %B0,%m1+1" CR_TAB
+                        "lds %C0,%m1+2", op, plen , -6);
+
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+/* Handle store of 24-bit type from register or zero to memory.  */
+
+static const char*
+avr_out_store_psi (rtx insn, rtx *op, int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (dest, 0);
+  int reg_base = true_regnum (base);
+
+  if (CONSTANT_ADDRESS_P (base))
+    return avr_asm_len ("sts %m0,%A1"   CR_TAB
+                        "sts %m0+1,%B1" CR_TAB
+                        "sts %m0+2,%C1", op, plen, -6);
+
+  if (reg_base > 0)                 /* (r) */
+    {
+      if (reg_base == REG_X)        /* (R26) */
+        {
+          gcc_assert (!reg_overlap_mentioned_p (base, src));
+
+          avr_asm_len ("st %0+,%A1"  CR_TAB
+                       "st %0+,%B1" CR_TAB
+                       "st %0,%C1", op, plen, -3);
+
+          if (!reg_unused_after (insn, base))
+            avr_asm_len ("sbiw r26,2", op, plen, 1);
+
+          return "";
+        }
+      else
+        return avr_asm_len ("st %0,%A1"    CR_TAB
+                            "std %0+1,%B1" CR_TAB
+                            "std %0+2,%C1", op, plen, -3);
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      reg_base = REGNO (XEXP (base, 0));
+
+      if (disp > MAX_LD_OFFSET (GET_MODE (dest)))
+        {
+          if (reg_base != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)))
+            return avr_asm_len ("adiw r28,%o0-61" CR_TAB
+                                "std Y+61,%A1"    CR_TAB
+                                "std Y+62,%B1"    CR_TAB
+                                "std Y+63,%C1"    CR_TAB
+                                "sbiw r28,%o0-60", op, plen, -5);
+
+          return avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB
+                              "sbci r29,hi8(-%o0)" CR_TAB
+                              "st Y,%A1"           CR_TAB
+                              "std Y+1,%B1"        CR_TAB
+                              "std Y+2,%C1"        CR_TAB
+                              "subi r28,lo8(%o0)"  CR_TAB
+                              "sbci r29,hi8(%o0)", op, plen, -7);
+        }
+      if (reg_base == REG_X)
+        {
+          /* (X + d) = R */
+          gcc_assert (!reg_overlap_mentioned_p (XEXP (base, 0), src));
+
+          avr_asm_len ("adiw r26,%o0" CR_TAB
+                       "st X+,%A1"    CR_TAB
+                       "st X+,%B1"    CR_TAB
+                       "st X,%C1", op, plen, -4);
+
+          if (!reg_unused_after (insn, XEXP (base, 0)))
+            avr_asm_len ("sbiw r26,%o0+2", op, plen, 1);
+
+          return "";
+        }
+
+      return avr_asm_len ("std %A0,%A1" CR_TAB
+                          "std %B0,%B1" CR_TAB
+                          "std %C0,%C1", op, plen, -3);
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return avr_asm_len ("st %0,%C1" CR_TAB
+                        "st %0,%B1" CR_TAB
+                        "st %0,%A1", op, plen, -3);
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    return avr_asm_len ("st %0,%A1" CR_TAB
+                        "st %0,%B1" CR_TAB
+                        "st %0,%C1", op, plen, -3);
+
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+
+/* Move around 24-bit stuff.  */
+
+const char *
+avr_out_movpsi (rtx insn, rtx *op, int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+
+  if (avr_mem_flash_p (src)
+      || avr_mem_flash_p (dest))
+    {
+      return avr_out_lpm (insn, op, plen);
+    }
+
+  if (register_operand (dest, VOIDmode))
+    {
+      if (register_operand (src, VOIDmode)) /* mov r,r */
+        {
+          if (true_regnum (dest) > true_regnum (src))
+            {
+              avr_asm_len ("mov %C0,%C1", op, plen, -1);
+
+              if (AVR_HAVE_MOVW)
+                return avr_asm_len ("movw %A0,%A1", op, plen, 1);
+              else
+                return avr_asm_len ("mov %B0,%B1"  CR_TAB
+                                    "mov %A0,%A1", op, plen, 2);
+            }
+          else
+            {
+              if (AVR_HAVE_MOVW)
+                avr_asm_len ("movw %A0,%A1", op, plen, -1);
+              else
+                avr_asm_len ("mov %A0,%A1"  CR_TAB
+                             "mov %B0,%B1", op, plen, -2);
+
+              return avr_asm_len ("mov %C0,%C1", op, plen, 1);
+            }
+        }
+      else if (CONSTANT_P (src))
+        {
+          return avr_out_reload_inpsi (op, NULL_RTX, plen);
+        }
+      else if (MEM_P (src))
+        return avr_out_load_psi (insn, op, plen); /* mov r,m */
+    }
+  else if (MEM_P (dest))
+    {
+      rtx xop[2];
+
+      xop[0] = dest;
+      xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src;
+
+      return avr_out_store_psi (insn, xop, plen);
+    }
+
+  fatal_insn ("invalid insn:", insn);
+  return "";
+}
+
+
+static const char*
+out_movqi_mr_r (rtx insn, rtx op[], int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx x = XEXP (dest, 0);
+
+  if (CONSTANT_ADDRESS_P (x))
+    {
+      return optimize > 0 && io_address_operand (x, QImode)
+        ? avr_asm_len ("out %i0,%1", op, plen, -1)
+        : avr_asm_len ("sts %m0,%1", op, plen, -2);
+    }
+  else if (GET_CODE (x) == PLUS
+           && REG_P (XEXP (x, 0))
+           && CONST_INT_P (XEXP (x, 1)))
+    {
+      /* memory access by reg+disp */
+
+      int disp = INTVAL (XEXP (x, 1));
+
+      if (disp - GET_MODE_SIZE (GET_MODE (dest)) >= 63)
+        {
+          if (REGNO (XEXP (x, 0)) != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)))
+            return avr_asm_len ("adiw r28,%o0-63" CR_TAB
+                                "std Y+63,%1"     CR_TAB
+                                "sbiw r28,%o0-63", op, plen, -3);
+
+          return avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB
+                              "sbci r29,hi8(-%o0)" CR_TAB
+                              "st Y,%1"            CR_TAB
+                              "subi r28,lo8(%o0)"  CR_TAB
+                              "sbci r29,hi8(%o0)", op, plen, -5);
+        }
+      else if (REGNO (XEXP (x,0)) == REG_X)
+        {
+          if (reg_overlap_mentioned_p (src, XEXP (x, 0)))
+            {
+              avr_asm_len ("mov __tmp_reg__,%1" CR_TAB
+                           "adiw r26,%o0"       CR_TAB
+                           "st X,__tmp_reg__", op, plen, -3);
+            }
+          else
+            {
+              avr_asm_len ("adiw r26,%o0" CR_TAB
+                           "st X,%1", op, plen, -2);
+            }
+
+          if (!reg_unused_after (insn, XEXP (x,0)))
+            avr_asm_len ("sbiw r26,%o0", op, plen, 1);
+
+          return "";
+        }
+
+      return avr_asm_len ("std %0,%1", op, plen, -1);
+    }
+
+  return avr_asm_len ("st %0,%1", op, plen, -1);
+}
+
+
+/* Helper for the next function for XMEGA.  It does the same
+   but with low byte first.  */
+
+static const char*
+avr_out_movhi_mr_r_xmega (rtx insn, rtx op[], int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (dest, 0);
+  int reg_base = true_regnum (base);
+  int reg_src = true_regnum (src);
+
+  /* "volatile" forces writing low byte first, even if less efficient,
+     for correct operation with 16-bit I/O registers like SP.  */
+  int mem_volatile_p = MEM_VOLATILE_P (dest);
+
+  if (CONSTANT_ADDRESS_P (base))
+    return optimize > 0 && io_address_operand (base, HImode)
+      ? avr_asm_len ("out %i0,%A1" CR_TAB
+                     "out %i0+1,%B1", op, plen, -2)
+
+      : avr_asm_len ("sts %m0,%A1" CR_TAB
+                     "sts %m0+1,%B1", op, plen, -4);
+
+  if (reg_base > 0)
+    {
+      if (reg_base != REG_X)
+        return avr_asm_len ("st %0,%A1" CR_TAB
+                            "std %0+1,%B1", op, plen, -2);
+
+      if (reg_src == REG_X)
+        /* "st X+,r26" and "st -X,r26" are undefined.  */
+        avr_asm_len ("mov __tmp_reg__,r27" CR_TAB
+                     "st X,r26"            CR_TAB
+                     "adiw r26,1"          CR_TAB
+                     "st X,__tmp_reg__", op, plen, -4);
+      else
+        avr_asm_len ("st X+,%A1" CR_TAB
+                     "st X,%B1", op, plen, -2);
+
+      return reg_unused_after (insn, base)
+        ? ""
+        : avr_asm_len ("sbiw r26,1", op, plen, 1);
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      reg_base = REGNO (XEXP (base, 0));
+      if (disp > MAX_LD_OFFSET (GET_MODE (dest)))
+        {
+          if (reg_base != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          return disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))
+            ? avr_asm_len ("adiw r28,%o0-62" CR_TAB
+                           "std Y+62,%A1"    CR_TAB
+                           "std Y+63,%B1"    CR_TAB
+                           "sbiw r28,%o0-62", op, plen, -4)
+
+            : avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB
+                           "sbci r29,hi8(-%o0)" CR_TAB
+                           "st Y,%A1"           CR_TAB
+                           "std Y+1,%B1"        CR_TAB
+                           "subi r28,lo8(%o0)"  CR_TAB
+                           "sbci r29,hi8(%o0)", op, plen, -6);
+        }
+
+      if (reg_base != REG_X)
+        return avr_asm_len ("std %A0,%A1" CR_TAB
+                            "std %B0,%B1", op, plen, -2);
+      /* (X + d) = R */
+      return reg_src == REG_X
+        ? avr_asm_len ("mov __tmp_reg__,r26"  CR_TAB
+                       "mov __zero_reg__,r27" CR_TAB
+                       "adiw r26,%o0"         CR_TAB
+                       "st X+,__tmp_reg__"    CR_TAB
+                       "st X,__zero_reg__"    CR_TAB
+                       "clr __zero_reg__"     CR_TAB
+                       "sbiw r26,%o0+1", op, plen, -7)
+
+        : avr_asm_len ("adiw r26,%o0" CR_TAB
+                       "st X+,%A1"    CR_TAB
+                       "st X,%B1"     CR_TAB
+                       "sbiw r26,%o0+1", op, plen, -4);
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    {
+      if (!mem_volatile_p)
+        return avr_asm_len ("st %0,%B1" CR_TAB
+                            "st %0,%A1", op, plen, -2);
+
+      return REGNO (XEXP (base, 0)) == REG_X
+        ? avr_asm_len ("sbiw r26,2"  CR_TAB
+                       "st X+,%A1"   CR_TAB
+                       "st X,%B1"    CR_TAB
+                       "sbiw r26,1", op, plen, -4)
+
+        : avr_asm_len ("sbiw %r0,2"  CR_TAB
+                       "st %p0,%A1"  CR_TAB
+                       "std %p0+1,%B1", op, plen, -3);
+    }
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    {
+      return avr_asm_len ("st %0,%A1"  CR_TAB
+                          "st %0,%B1", op, plen, -2);
+
+    }
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+
+static const char*
+out_movhi_mr_r (rtx insn, rtx op[], int *plen)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (dest, 0);
+  int reg_base = true_regnum (base);
+  int reg_src = true_regnum (src);
+  int mem_volatile_p;
+
+  /* "volatile" forces writing high-byte first (no-xmega) resp.
+     low-byte first (xmega) even if less efficient, for correct
+     operation with 16-bit I/O registers like.  */
+
+  if (AVR_XMEGA)
+    return avr_out_movhi_mr_r_xmega (insn, op, plen);
+
+  mem_volatile_p = MEM_VOLATILE_P (dest);
+
+  if (CONSTANT_ADDRESS_P (base))
+    return optimize > 0 && io_address_operand (base, HImode)
+      ? avr_asm_len ("out %i0+1,%B1" CR_TAB
+                     "out %i0,%A1", op, plen, -2)
+
+      : avr_asm_len ("sts %m0+1,%B1" CR_TAB
+                     "sts %m0,%A1", op, plen, -4);
+
+  if (reg_base > 0)
+    {
+      if (reg_base != REG_X)
+        return avr_asm_len ("std %0+1,%B1" CR_TAB
+                            "st %0,%A1", op, plen, -2);
+
+      if (reg_src == REG_X)
+        /* "st X+,r26" and "st -X,r26" are undefined.  */
+        return !mem_volatile_p && reg_unused_after (insn, src)
+          ? avr_asm_len ("mov __tmp_reg__,r27" CR_TAB
+                         "st X,r26"            CR_TAB
+                         "adiw r26,1"          CR_TAB
+                         "st X,__tmp_reg__", op, plen, -4)
+
+          : avr_asm_len ("mov __tmp_reg__,r27" CR_TAB
+                         "adiw r26,1"          CR_TAB
+                         "st X,__tmp_reg__"    CR_TAB
+                         "sbiw r26,1"          CR_TAB
+                         "st X,r26", op, plen, -5);
+
+      return !mem_volatile_p && reg_unused_after (insn, base)
+        ? avr_asm_len ("st X+,%A1" CR_TAB
+                       "st X,%B1", op, plen, -2)
+        : avr_asm_len ("adiw r26,1" CR_TAB
+                       "st X,%B1"   CR_TAB
+                       "st -X,%A1", op, plen, -3);
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      reg_base = REGNO (XEXP (base, 0));
+      if (disp > MAX_LD_OFFSET (GET_MODE (dest)))
+        {
+          if (reg_base != REG_Y)
+            fatal_insn ("incorrect insn:",insn);
+
+          return disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))
+            ? avr_asm_len ("adiw r28,%o0-62" CR_TAB
+                           "std Y+63,%B1"    CR_TAB
+                           "std Y+62,%A1"    CR_TAB
+                           "sbiw r28,%o0-62", op, plen, -4)
+
+            : avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB
+                           "sbci r29,hi8(-%o0)" CR_TAB
+                           "std Y+1,%B1"        CR_TAB
+                           "st Y,%A1"           CR_TAB
+                           "subi r28,lo8(%o0)"  CR_TAB
+                           "sbci r29,hi8(%o0)", op, plen, -6);
+        }
+
+      if (reg_base != REG_X)
+        return avr_asm_len ("std %B0,%B1" CR_TAB
+                            "std %A0,%A1", op, plen, -2);
+      /* (X + d) = R */
+      return reg_src == REG_X
+        ? avr_asm_len ("mov __tmp_reg__,r26"  CR_TAB
+                       "mov __zero_reg__,r27" CR_TAB
+                       "adiw r26,%o0+1"       CR_TAB
+                       "st X,__zero_reg__"    CR_TAB
+                       "st -X,__tmp_reg__"    CR_TAB
+                       "clr __zero_reg__"     CR_TAB
+                       "sbiw r26,%o0", op, plen, -7)
+
+        : avr_asm_len ("adiw r26,%o0+1" CR_TAB
+                       "st X,%B1"       CR_TAB
+                       "st -X,%A1"      CR_TAB
+                       "sbiw r26,%o0", op, plen, -4);
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    {
+      return avr_asm_len ("st %0,%B1" CR_TAB
+                          "st %0,%A1", op, plen, -2);
+    }
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    {
+      if (!mem_volatile_p)
+        return avr_asm_len ("st %0,%A1"  CR_TAB
+                            "st %0,%B1", op, plen, -2);
+
+      return REGNO (XEXP (base, 0)) == REG_X
+        ? avr_asm_len ("adiw r26,1"  CR_TAB
+                       "st X,%B1"    CR_TAB
+                       "st -X,%A1"   CR_TAB
+                       "adiw r26,2", op, plen, -4)
+
+        : avr_asm_len ("std %p0+1,%B1" CR_TAB
+                       "st %p0,%A1"    CR_TAB
+                       "adiw %r0,2", op, plen, -3);
+    }
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+/* Return 1 if frame pointer for current function required.  */
+
+static bool
+avr_frame_pointer_required_p (void)
+{
+  return (cfun->calls_alloca
+          || cfun->calls_setjmp
+          || cfun->has_nonlocal_label
+          || crtl->args.info.nregs == 0
+          || get_frame_size () > 0);
+}
+
+/* Returns the condition of compare insn INSN, or UNKNOWN.  */
+
+static RTX_CODE
+compare_condition (rtx insn)
+{
+  rtx next = next_real_insn (insn);
+
+  if (next && JUMP_P (next))
+    {
+      rtx pat = PATTERN (next);
+      rtx src = SET_SRC (pat);
+
+      if (IF_THEN_ELSE == GET_CODE (src))
+        return GET_CODE (XEXP (src, 0));
+    }
+
+  return UNKNOWN;
+}
+
+
+/* Returns true iff INSN is a tst insn that only tests the sign.  */
+
+static bool
+compare_sign_p (rtx insn)
+{
+  RTX_CODE cond = compare_condition (insn);
+  return (cond == GE || cond == LT);
+}
+
+
+/* Returns true iff the next insn is a JUMP_INSN with a condition
+   that needs to be swapped (GT, GTU, LE, LEU).  */
+
+static bool
+compare_diff_p (rtx insn)
+{
+  RTX_CODE cond = compare_condition (insn);
+  return (cond == GT || cond == GTU || cond == LE || cond == LEU) ? cond : 0;
+}
+
+/* Returns true iff INSN is a compare insn with the EQ or NE condition.  */
+
+static bool
+compare_eq_p (rtx insn)
+{
+  RTX_CODE cond = compare_condition (insn);
+  return (cond == EQ || cond == NE);
+}
+
+
+/* Output compare instruction
+
+      compare (XOP[0], XOP[1])
+
+   for a register XOP[0] and a compile-time constant XOP[1].  Return "".
+   XOP[2] is an 8-bit scratch register as needed.
+
+   PLEN == NULL:  Output instructions.
+   PLEN != NULL:  Set *PLEN to the length (in words) of the sequence.
+                  Don't output anything.  */
+
+const char*
+avr_out_compare (rtx insn, rtx *xop, int *plen)
+{
+  /* Register to compare and value to compare against. */
+  rtx xreg = xop[0];
+  rtx xval = xop[1];
+
+  /* MODE of the comparison.  */
+  enum machine_mode mode;
+
+  /* Number of bytes to operate on.  */
+  int i, n_bytes = GET_MODE_SIZE (GET_MODE (xreg));
+
+  /* Value (0..0xff) held in clobber register xop[2] or -1 if unknown.  */
+  int clobber_val = -1;
+
+  /* Map fixed mode operands to integer operands with the same binary
+     representation.  They are easier to handle in the remainder.  */
+
+  if (CONST_FIXED_P (xval))
+    {
+      xreg = avr_to_int_mode (xop[0]);
+      xval = avr_to_int_mode (xop[1]);
+    }
+
+  mode = GET_MODE (xreg);
+
+  gcc_assert (REG_P (xreg));
+  gcc_assert ((CONST_INT_P (xval) && n_bytes <= 4)
+              || (const_double_operand (xval, VOIDmode) && n_bytes == 8));
+
+  if (plen)
+    *plen = 0;
+
+  /* Comparisons == +/-1 and != +/-1 can be done similar to camparing
+     against 0 by ORing the bytes.  This is one instruction shorter.
+     Notice that 64-bit comparisons are always against reg:ALL8 18 (ACC_A)
+     and therefore don't use this.  */
+
+  if (!test_hard_reg_class (LD_REGS, xreg)
+      && compare_eq_p (insn)
+      && reg_unused_after (insn, xreg))
+    {
+      if (xval == const1_rtx)
+        {
+          avr_asm_len ("dec %A0" CR_TAB
+                       "or %A0,%B0", xop, plen, 2);
+
+          if (n_bytes >= 3)
+            avr_asm_len ("or %A0,%C0", xop, plen, 1);
+
+          if (n_bytes >= 4)
+            avr_asm_len ("or %A0,%D0", xop, plen, 1);
+
+          return "";
+        }
+      else if (xval == constm1_rtx)
+        {
+          if (n_bytes >= 4)
+            avr_asm_len ("and %A0,%D0", xop, plen, 1);
+
+          if (n_bytes >= 3)
+            avr_asm_len ("and %A0,%C0", xop, plen, 1);
+
+          return avr_asm_len ("and %A0,%B0" CR_TAB
+                              "com %A0", xop, plen, 2);
+        }
+    }
+
+  for (i = 0; i < n_bytes; i++)
+    {
+      /* We compare byte-wise.  */
+      rtx reg8 = simplify_gen_subreg (QImode, xreg, mode, i);
+      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
+
+      /* 8-bit value to compare with this byte.  */
+      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+
+      /* Registers R16..R31 can operate with immediate.  */
+      bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8);
+
+      xop[0] = reg8;
+      xop[1] = gen_int_mode (val8, QImode);
+
+      /* Word registers >= R24 can use SBIW/ADIW with 0..63.  */
+
+      if (i == 0
+          && test_hard_reg_class (ADDW_REGS, reg8))
+        {
+          int val16 = trunc_int_for_mode (INTVAL (xval), HImode);
+
+          if (IN_RANGE (val16, 0, 63)
+              && (val8 == 0
+                  || reg_unused_after (insn, xreg)))
+            {
+              avr_asm_len ("sbiw %0,%1", xop, plen, 1);
+              i++;
+              continue;
+            }
+
+          if (n_bytes == 2
+              && IN_RANGE (val16, -63, -1)
+              && compare_eq_p (insn)
+              && reg_unused_after (insn, xreg))
+            {
+              return avr_asm_len ("adiw %0,%n1", xop, plen, 1);
+            }
+        }
+
+      /* Comparing against 0 is easy.  */
+
+      if (val8 == 0)
+        {
+          avr_asm_len (i == 0
+                       ? "cp %0,__zero_reg__"
+                       : "cpc %0,__zero_reg__", xop, plen, 1);
+          continue;
+        }
+
+      /* Upper registers can compare and subtract-with-carry immediates.
+         Notice that compare instructions do the same as respective subtract
+         instruction; the only difference is that comparisons don't write
+         the result back to the target register.  */
+
+      if (ld_reg_p)
+        {
+          if (i == 0)
+            {
+              avr_asm_len ("cpi %0,%1", xop, plen, 1);
+              continue;
+            }
+          else if (reg_unused_after (insn, xreg))
+            {
+              avr_asm_len ("sbci %0,%1", xop, plen, 1);
+              continue;
+            }
+        }
+
+      /* Must load the value into the scratch register.  */
+
+      gcc_assert (REG_P (xop[2]));
+
+      if (clobber_val != (int) val8)
+        avr_asm_len ("ldi %2,%1", xop, plen, 1);
+      clobber_val = (int) val8;
+
+      avr_asm_len (i == 0
+                   ? "cp %0,%2"
+                   : "cpc %0,%2", xop, plen, 1);
+    }
+
+  return "";
+}
+
+
+/* Prepare operands of compare_const_di2 to be used with avr_out_compare.  */
+
+const char*
+avr_out_compare64 (rtx insn, rtx *op, int *plen)
+{
+  rtx xop[3];
+
+  xop[0] = gen_rtx_REG (DImode, 18);
+  xop[1] = op[0];
+  xop[2] = op[1];
+
+  return avr_out_compare (insn, xop, plen);
+}
+
+/* Output test instruction for HImode.  */
+
+const char*
+avr_out_tsthi (rtx insn, rtx *op, int *plen)
+{
+  if (compare_sign_p (insn))
+    {
+      avr_asm_len ("tst %B0", op, plen, -1);
+    }
+  else if (reg_unused_after (insn, op[0])
+           && compare_eq_p (insn))
+    {
+      /* Faster than sbiw if we can clobber the operand.  */
+      avr_asm_len ("or %A0,%B0", op, plen, -1);
+    }
+  else
+    {
+      avr_out_compare (insn, op, plen);
+    }
+
+  return "";
+}
+
+
+/* Output test instruction for PSImode.  */
+
+const char*
+avr_out_tstpsi (rtx insn, rtx *op, int *plen)
+{
+  if (compare_sign_p (insn))
+    {
+      avr_asm_len ("tst %C0", op, plen, -1);
+    }
+  else if (reg_unused_after (insn, op[0])
+           && compare_eq_p (insn))
+    {
+      /* Faster than sbiw if we can clobber the operand.  */
+      avr_asm_len ("or %A0,%B0" CR_TAB
+                   "or %A0,%C0", op, plen, -2);
+    }
+  else
+    {
+      avr_out_compare (insn, op, plen);
+    }
+
+  return "";
+}
+
+
+/* Output test instruction for SImode.  */
+
+const char*
+avr_out_tstsi (rtx insn, rtx *op, int *plen)
+{
+  if (compare_sign_p (insn))
+    {
+      avr_asm_len ("tst %D0", op, plen, -1);
+    }
+  else if (reg_unused_after (insn, op[0])
+           && compare_eq_p (insn))
+    {
+      /* Faster than sbiw if we can clobber the operand.  */
+      avr_asm_len ("or %A0,%B0" CR_TAB
+                   "or %A0,%C0" CR_TAB
+                   "or %A0,%D0", op, plen, -3);
+    }
+  else
+    {
+      avr_out_compare (insn, op, plen);
+    }
+
+  return "";
+}
+
+
+/* Generate asm equivalent for various shifts.  This only handles cases
+   that are not already carefully hand-optimized in ?sh??i3_out.
+
+   OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
+   OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
+   OPERANDS[3] is a QImode scratch register from LD regs if
+               available and SCRATCH, otherwise (no scratch available)
+
+   TEMPL is an assembler template that shifts by one position.
+   T_LEN is the length of this template.  */
+
+void
+out_shift_with_cnt (const char *templ, rtx insn, rtx operands[],
+		    int *plen, int t_len)
+{
+  bool second_label = true;
+  bool saved_in_tmp = false;
+  bool use_zero_reg = false;
+  rtx op[5];
+
+  op[0] = operands[0];
+  op[1] = operands[1];
+  op[2] = operands[2];
+  op[3] = operands[3];
+
+  if (plen)
+    *plen = 0;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      bool scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
+                      && REG_P (operands[3]));
+      int count = INTVAL (operands[2]);
+      int max_len = 10;  /* If larger than this, always use a loop.  */
+
+      if (count <= 0)
+          return;
+
+      if (count < 8 && !scratch)
+        use_zero_reg = true;
+
+      if (optimize_size)
+        max_len = t_len + (scratch ? 3 : (use_zero_reg ? 4 : 5));
+
+      if (t_len * count <= max_len)
+        {
+          /* Output shifts inline with no loop - faster.  */
+
+          while (count-- > 0)
+            avr_asm_len (templ, op, plen, t_len);
+
+          return;
+        }
+
+      if (scratch)
+        {
+          avr_asm_len ("ldi %3,%2", op, plen, 1);
+        }
+      else if (use_zero_reg)
+        {
+          /* Hack to save one word: use __zero_reg__ as loop counter.
+             Set one bit, then shift in a loop until it is 0 again.  */
+
+          op[3] = zero_reg_rtx;
+
+          avr_asm_len ("set" CR_TAB
+                       "bld %3,%2-1", op, plen, 2);
+        }
+      else
+        {
+          /* No scratch register available, use one from LD_REGS (saved in
+             __tmp_reg__) that doesn't overlap with registers to shift.  */
+
+          op[3] = all_regs_rtx[((REGNO (op[0]) - 1) & 15) + 16];
+          op[4] = tmp_reg_rtx;
+          saved_in_tmp = true;
+
+          avr_asm_len ("mov %4,%3" CR_TAB
+                       "ldi %3,%2", op, plen, 2);
+        }
+
+      second_label = false;
+    }
+  else if (MEM_P (op[2]))
+    {
+      rtx op_mov[2];
+
+      op_mov[0] = op[3] = tmp_reg_rtx;
+      op_mov[1] = op[2];
+
+      out_movqi_r_mr (insn, op_mov, plen);
+    }
+  else if (register_operand (op[2], QImode))
+    {
+      op[3] = op[2];
+
+      if (!reg_unused_after (insn, op[2])
+          || reg_overlap_mentioned_p (op[0], op[2]))
+        {
+          op[3] = tmp_reg_rtx;
+          avr_asm_len ("mov %3,%2", op, plen, 1);
+        }
+    }
+  else
+    fatal_insn ("bad shift insn:", insn);
+
+  if (second_label)
+      avr_asm_len ("rjmp 2f", op, plen, 1);
+
+  avr_asm_len ("1:", op, plen, 0);
+  avr_asm_len (templ, op, plen, t_len);
+
+  if (second_label)
+    avr_asm_len ("2:", op, plen, 0);
+
+  avr_asm_len (use_zero_reg ? "lsr %3" : "dec %3", op, plen, 1);
+  avr_asm_len (second_label ? "brpl 1b" : "brne 1b", op, plen, 1);
+
+  if (saved_in_tmp)
+    avr_asm_len ("mov %3,%4", op, plen, 1);
+}
+
+
+/* 8bit shift left ((char)x << i)   */
+
+const char *
+ashlqi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 8)
+	    break;
+
+	  *len = 1;
+	  return "clr %0";
+
+	case 1:
+	  *len = 1;
+	  return "lsl %0";
+
+	case 2:
+	  *len = 2;
+	  return ("lsl %0" CR_TAB
+		  "lsl %0");
+
+	case 3:
+	  *len = 3;
+	  return ("lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0");
+
+	case 4:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 2;
+	      return ("swap %0" CR_TAB
+		      "andi %0,0xf0");
+	    }
+	  *len = 4;
+	  return ("lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0");
+
+	case 5:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 3;
+	      return ("swap %0" CR_TAB
+		      "lsl %0"  CR_TAB
+		      "andi %0,0xe0");
+	    }
+	  *len = 5;
+	  return ("lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0");
+
+	case 6:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 4;
+	      return ("swap %0" CR_TAB
+		      "lsl %0"  CR_TAB
+		      "lsl %0"  CR_TAB
+		      "andi %0,0xc0");
+	    }
+	  *len = 6;
+	  return ("lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0" CR_TAB
+		  "lsl %0");
+
+	case 7:
+	  *len = 3;
+	  return ("ror %0" CR_TAB
+		  "clr %0" CR_TAB
+		  "ror %0");
+	}
+    }
+  else if (CONSTANT_P (operands[2]))
+    fatal_insn ("internal compiler error.  Incorrect shift:", insn);
+
+  out_shift_with_cnt ("lsl %0",
+                      insn, operands, len, 1);
+  return "";
+}
+
+
+/* 16bit shift left ((short)x << i)   */
+
+const char *
+ashlhi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 16)
+	    break;
+
+	  *len = 2;
+	  return ("clr %B0" CR_TAB
+		  "clr %A0");
+
+	case 4:
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (ldi_ok)
+	    {
+	      *len = 6;
+	      return ("swap %A0"      CR_TAB
+		      "swap %B0"      CR_TAB
+		      "andi %B0,0xf0" CR_TAB
+		      "eor %B0,%A0"   CR_TAB
+		      "andi %A0,0xf0" CR_TAB
+		      "eor %B0,%A0");
+	    }
+	  if (scratch)
+	    {
+	      *len = 7;
+	      return ("swap %A0"    CR_TAB
+		      "swap %B0"    CR_TAB
+		      "ldi %3,0xf0" CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      "eor %B0,%A0" CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      "eor %B0,%A0");
+	    }
+	  break;  /* optimize_size ? 6 : 8 */
+
+	case 5:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  if (ldi_ok)
+	    {
+	      *len = 8;
+	      return ("lsl %A0"       CR_TAB
+		      "rol %B0"       CR_TAB
+		      "swap %A0"      CR_TAB
+		      "swap %B0"      CR_TAB
+		      "andi %B0,0xf0" CR_TAB
+		      "eor %B0,%A0"   CR_TAB
+		      "andi %A0,0xf0" CR_TAB
+		      "eor %B0,%A0");
+	    }
+	  if (scratch)
+	    {
+	      *len = 9;
+	      return ("lsl %A0"     CR_TAB
+		      "rol %B0"     CR_TAB
+		      "swap %A0"    CR_TAB
+		      "swap %B0"    CR_TAB
+		      "ldi %3,0xf0" CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      "eor %B0,%A0" CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      "eor %B0,%A0");
+	    }
+	  break;  /* 10 */
+
+	case 6:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  *len = 9;
+	  return ("clr __tmp_reg__" CR_TAB
+		  "lsr %B0"         CR_TAB
+		  "ror %A0"         CR_TAB
+		  "ror __tmp_reg__" CR_TAB
+		  "lsr %B0"         CR_TAB
+		  "ror %A0"         CR_TAB
+		  "ror __tmp_reg__" CR_TAB
+		  "mov %B0,%A0"     CR_TAB
+		  "mov %A0,__tmp_reg__");
+
+	case 7:
+	  *len = 5;
+	  return ("lsr %B0"     CR_TAB
+		  "mov %B0,%A0" CR_TAB
+		  "clr %A0"     CR_TAB
+		  "ror %B0"     CR_TAB
+		  "ror %A0");
+
+	case 8:
+	  return *len = 2, ("mov %B0,%A1" CR_TAB
+			    "clr %A0");
+
+	case 9:
+	  *len = 3;
+	  return ("mov %B0,%A0" CR_TAB
+		  "clr %A0"     CR_TAB
+		  "lsl %B0");
+
+	case 10:
+	  *len = 4;
+	  return ("mov %B0,%A0" CR_TAB
+		  "clr %A0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0");
+
+	case 11:
+	  *len = 5;
+	  return ("mov %B0,%A0" CR_TAB
+		  "clr %A0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0");
+
+	case 12:
+	  if (ldi_ok)
+	    {
+	      *len = 4;
+	      return ("mov %B0,%A0" CR_TAB
+		      "clr %A0"     CR_TAB
+		      "swap %B0"    CR_TAB
+		      "andi %B0,0xf0");
+	    }
+	  if (scratch)
+	    {
+	      *len = 5;
+	      return ("mov %B0,%A0" CR_TAB
+		      "clr %A0"     CR_TAB
+		      "swap %B0"    CR_TAB
+		      "ldi %3,0xf0" CR_TAB
+		      "and %B0,%3");
+	    }
+	  *len = 6;
+	  return ("mov %B0,%A0" CR_TAB
+		  "clr %A0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0");
+
+	case 13:
+	  if (ldi_ok)
+	    {
+	      *len = 5;
+	      return ("mov %B0,%A0" CR_TAB
+		      "clr %A0"     CR_TAB
+		      "swap %B0"    CR_TAB
+		      "lsl %B0"     CR_TAB
+		      "andi %B0,0xe0");
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return ("ldi %3,0x20" CR_TAB
+		      "mul %A0,%3"  CR_TAB
+		      "mov %B0,r0"  CR_TAB
+		      "clr %A0"     CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (scratch)
+	    {
+	      *len = 6;
+	      return ("mov %B0,%A0" CR_TAB
+		      "clr %A0"     CR_TAB
+		      "swap %B0"    CR_TAB
+		      "lsl %B0"     CR_TAB
+		      "ldi %3,0xe0" CR_TAB
+		      "and %B0,%3");
+	    }
+	  if (AVR_HAVE_MUL)
+	    {
+	      *len = 6;
+	      return ("set"            CR_TAB
+		      "bld r1,5"   CR_TAB
+		      "mul %A0,r1" CR_TAB
+		      "mov %B0,r0" CR_TAB
+		      "clr %A0"    CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  *len = 7;
+	  return ("mov %B0,%A0" CR_TAB
+		  "clr %A0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "lsl %B0");
+
+	case 14:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("ldi %B0,0x40" CR_TAB
+		      "mul %A0,%B0"  CR_TAB
+		      "mov %B0,r0"   CR_TAB
+		      "clr %A0"      CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return ("ldi %3,0x40" CR_TAB
+		      "mul %A0,%3"  CR_TAB
+		      "mov %B0,r0"  CR_TAB
+		      "clr %A0"     CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("mov %B0,%A0" CR_TAB
+		      "ldi %A0,6" "\n1:\t"
+		      "lsl %B0"     CR_TAB
+		      "dec %A0"     CR_TAB
+		      "brne 1b");
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 6;
+	  return ("clr %B0" CR_TAB
+		  "lsr %A0" CR_TAB
+		  "ror %B0" CR_TAB
+		  "lsr %A0" CR_TAB
+		  "ror %B0" CR_TAB
+		  "clr %A0");
+
+	case 15:
+	  *len = 4;
+	  return ("clr %B0" CR_TAB
+		  "lsr %A0" CR_TAB
+		  "ror %B0" CR_TAB
+		  "clr %A0");
+	}
+      len = t;
+    }
+  out_shift_with_cnt ("lsl %A0" CR_TAB
+                      "rol %B0", insn, operands, len, 2);
+  return "";
+}
+
+
+/* 24-bit shift left */
+
+const char*
+avr_out_ashlpsi3 (rtx insn, rtx *op, int *plen)
+{
+  if (plen)
+    *plen = 0;
+
+  if (CONST_INT_P (op[2]))
+    {
+      switch (INTVAL (op[2]))
+        {
+        default:
+          if (INTVAL (op[2]) < 24)
+            break;
+
+          return avr_asm_len ("clr %A0" CR_TAB
+                              "clr %B0" CR_TAB
+                              "clr %C0", op, plen, 3);
+
+        case 8:
+          {
+            int reg0 = REGNO (op[0]);
+            int reg1 = REGNO (op[1]);
+
+            if (reg0 >= reg1)
+              return avr_asm_len ("mov %C0,%B1"  CR_TAB
+                                  "mov %B0,%A1"  CR_TAB
+                                  "clr %A0", op, plen, 3);
+            else
+              return avr_asm_len ("clr %A0"      CR_TAB
+                                  "mov %B0,%A1"  CR_TAB
+                                  "mov %C0,%B1", op, plen, 3);
+          }
+
+        case 16:
+          {
+            int reg0 = REGNO (op[0]);
+            int reg1 = REGNO (op[1]);
+
+            if (reg0 + 2 != reg1)
+              avr_asm_len ("mov %C0,%A0", op, plen, 1);
+
+            return avr_asm_len ("clr %B0"  CR_TAB
+                                "clr %A0", op, plen, 2);
+          }
+
+        case 23:
+          return avr_asm_len ("clr %C0" CR_TAB
+                              "lsr %A0" CR_TAB
+                              "ror %C0" CR_TAB
+                              "clr %B0" CR_TAB
+                              "clr %A0", op, plen, 5);
+        }
+    }
+
+  out_shift_with_cnt ("lsl %A0" CR_TAB
+                      "rol %B0" CR_TAB
+                      "rol %C0", insn, op, plen, 3);
+  return "";
+}
+
+
+/* 32bit shift left ((long)x << i)   */
+
+const char *
+ashlsi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 32)
+	    break;
+
+	  if (AVR_HAVE_MOVW)
+	    return *len = 3, ("clr %D0" CR_TAB
+			      "clr %C0" CR_TAB
+			      "movw %A0,%C0");
+	  *len = 4;
+	  return ("clr %D0" CR_TAB
+		  "clr %C0" CR_TAB
+		  "clr %B0" CR_TAB
+		  "clr %A0");
+
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    *len = 4;
+	    if (reg0 >= reg1)
+	      return ("mov %D0,%C1"  CR_TAB
+		      "mov %C0,%B1"  CR_TAB
+		      "mov %B0,%A1"  CR_TAB
+		      "clr %A0");
+	    else
+	      return ("clr %A0"      CR_TAB
+		      "mov %B0,%A1"  CR_TAB
+		      "mov %C0,%B1"  CR_TAB
+		      "mov %D0,%C1");
+	  }
+
+	case 16:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    if (reg0 + 2 == reg1)
+	      return *len = 2, ("clr %B0"      CR_TAB
+				"clr %A0");
+	    if (AVR_HAVE_MOVW)
+	      return *len = 3, ("movw %C0,%A1" CR_TAB
+				"clr %B0"      CR_TAB
+				"clr %A0");
+	    else
+	      return *len = 4, ("mov %C0,%A1"  CR_TAB
+				"mov %D0,%B1"  CR_TAB
+				"clr %B0"      CR_TAB
+				"clr %A0");
+	  }
+
+	case 24:
+	  *len = 4;
+	  return ("mov %D0,%A1"  CR_TAB
+		  "clr %C0"      CR_TAB
+		  "clr %B0"      CR_TAB
+		  "clr %A0");
+
+	case 31:
+	  *len = 6;
+	  return ("clr %D0" CR_TAB
+		  "lsr %A0" CR_TAB
+		  "ror %D0" CR_TAB
+		  "clr %C0" CR_TAB
+		  "clr %B0" CR_TAB
+		  "clr %A0");
+	}
+      len = t;
+    }
+  out_shift_with_cnt ("lsl %A0" CR_TAB
+                      "rol %B0" CR_TAB
+                      "rol %C0" CR_TAB
+                      "rol %D0", insn, operands, len, 4);
+  return "";
+}
+
+/* 8bit arithmetic shift right  ((signed char)x >> i) */
+
+const char *
+ashrqi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	case 1:
+	  *len = 1;
+	  return "asr %0";
+
+	case 2:
+	  *len = 2;
+	  return ("asr %0" CR_TAB
+		  "asr %0");
+
+	case 3:
+	  *len = 3;
+	  return ("asr %0" CR_TAB
+		  "asr %0" CR_TAB
+		  "asr %0");
+
+	case 4:
+	  *len = 4;
+	  return ("asr %0" CR_TAB
+		  "asr %0" CR_TAB
+		  "asr %0" CR_TAB
+		  "asr %0");
+
+	case 5:
+	  *len = 5;
+	  return ("asr %0" CR_TAB
+		  "asr %0" CR_TAB
+		  "asr %0" CR_TAB
+		  "asr %0" CR_TAB
+		  "asr %0");
+
+	case 6:
+	  *len = 4;
+	  return ("bst %0,6"  CR_TAB
+		  "lsl %0"    CR_TAB
+		  "sbc %0,%0" CR_TAB
+		  "bld %0,0");
+
+	default:
+	  if (INTVAL (operands[2]) < 8)
+	    break;
+
+	  /* fall through */
+
+	case 7:
+	  *len = 2;
+	  return ("lsl %0" CR_TAB
+		  "sbc %0,%0");
+	}
+    }
+  else if (CONSTANT_P (operands[2]))
+    fatal_insn ("internal compiler error.  Incorrect shift:", insn);
+
+  out_shift_with_cnt ("asr %0",
+                      insn, operands, len, 1);
+  return "";
+}
+
+
+/* 16bit arithmetic shift right  ((signed short)x >> i) */
+
+const char *
+ashrhi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	case 4:
+	case 5:
+	  /* XXX try to optimize this too? */
+	  break;
+
+	case 6:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  *len = 8;
+	  return ("mov __tmp_reg__,%A0" CR_TAB
+		  "mov %A0,%B0"         CR_TAB
+		  "lsl __tmp_reg__"     CR_TAB
+		  "rol %A0"             CR_TAB
+		  "sbc %B0,%B0"         CR_TAB
+		  "lsl __tmp_reg__"     CR_TAB
+		  "rol %A0"             CR_TAB
+		  "rol %B0");
+
+	case 7:
+	  *len = 4;
+	  return ("lsl %A0"     CR_TAB
+		  "mov %A0,%B0" CR_TAB
+		  "rol %A0"     CR_TAB
+		  "sbc %B0,%B0");
+
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+
+	    if (reg0 == reg1)
+	      return *len = 3, ("mov %A0,%B0" CR_TAB
+				"lsl %B0"     CR_TAB
+				"sbc %B0,%B0");
+	    else
+	      return *len = 4, ("mov %A0,%B1" CR_TAB
+			        "clr %B0"     CR_TAB
+			        "sbrc %A0,7"  CR_TAB
+			        "dec %B0");
+	  }
+
+	case 9:
+	  *len = 4;
+	  return ("mov %A0,%B0" CR_TAB
+		  "lsl %B0"      CR_TAB
+		  "sbc %B0,%B0" CR_TAB
+		  "asr %A0");
+
+	case 10:
+	  *len = 5;
+	  return ("mov %A0,%B0" CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "sbc %B0,%B0" CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0");
+
+	case 11:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("ldi %A0,0x20" CR_TAB
+		      "muls %B0,%A0" CR_TAB
+		      "mov %A0,r1"   CR_TAB
+		      "sbc %B0,%B0"  CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 6;
+	  return ("mov %A0,%B0" CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "sbc %B0,%B0" CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0");
+
+	case 12:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("ldi %A0,0x10" CR_TAB
+		      "muls %B0,%A0" CR_TAB
+		      "mov %A0,r1"   CR_TAB
+		      "sbc %B0,%B0"  CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 7;
+	  return ("mov %A0,%B0" CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "sbc %B0,%B0" CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0");
+
+	case 13:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("ldi %A0,0x08" CR_TAB
+		      "muls %B0,%A0" CR_TAB
+		      "mov %A0,r1"   CR_TAB
+		      "sbc %B0,%B0"  CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 7 */
+	  *len = 8;
+	  return ("mov %A0,%B0" CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "sbc %B0,%B0" CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0"     CR_TAB
+		  "asr %A0");
+
+	case 14:
+	  *len = 5;
+	  return ("lsl %B0"     CR_TAB
+		  "sbc %A0,%A0" CR_TAB
+		  "lsl %B0"     CR_TAB
+		  "mov %B0,%A0" CR_TAB
+		  "rol %A0");
+
+	default:
+	  if (INTVAL (operands[2]) < 16)
+	    break;
+
+	  /* fall through */
+
+	case 15:
+	  return *len = 3, ("lsl %B0"     CR_TAB
+			    "sbc %A0,%A0" CR_TAB
+			    "mov %B0,%A0");
+	}
+      len = t;
+    }
+  out_shift_with_cnt ("asr %B0" CR_TAB
+                      "ror %A0", insn, operands, len, 2);
+  return "";
+}
+
+
+/* 24-bit arithmetic shift right */
+
+const char*
+avr_out_ashrpsi3 (rtx insn, rtx *op, int *plen)
+{
+  int dest = REGNO (op[0]);
+  int src = REGNO (op[1]);
+
+  if (CONST_INT_P (op[2]))
+    {
+      if (plen)
+        *plen = 0;
+
+      switch (INTVAL (op[2]))
+        {
+        case 8:
+          if (dest <= src)
+            return avr_asm_len ("mov %A0,%B1" CR_TAB
+                                "mov %B0,%C1" CR_TAB
+                                "clr %C0"     CR_TAB
+                                "sbrc %B0,7"  CR_TAB
+                                "dec %C0", op, plen, 5);
+          else
+            return avr_asm_len ("clr %C0"     CR_TAB
+                                "sbrc %C1,7"  CR_TAB
+                                "dec %C0"     CR_TAB
+                                "mov %B0,%C1" CR_TAB
+                                "mov %A0,%B1", op, plen, 5);
+
+        case 16:
+          if (dest != src + 2)
+            avr_asm_len ("mov %A0,%C1", op, plen, 1);
+
+          return avr_asm_len ("clr %B0"     CR_TAB
+                              "sbrc %A0,7"  CR_TAB
+                              "com %B0"     CR_TAB
+                              "mov %C0,%B0", op, plen, 4);
+
+        default:
+          if (INTVAL (op[2]) < 24)
+            break;
+
+          /* fall through */
+
+        case 23:
+          return avr_asm_len ("lsl %C0"     CR_TAB
+                              "sbc %A0,%A0" CR_TAB
+                              "mov %B0,%A0" CR_TAB
+                              "mov %C0,%A0", op, plen, 4);
+        } /* switch */
+    }
+
+  out_shift_with_cnt ("asr %C0" CR_TAB
+                      "ror %B0" CR_TAB
+                      "ror %A0", insn, op, plen, 3);
+  return "";
+}
+
+
+/* 32-bit arithmetic shift right  ((signed long)x >> i) */
+
+const char *
+ashrsi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    *len=6;
+	    if (reg0 <= reg1)
+	      return ("mov %A0,%B1" CR_TAB
+		      "mov %B0,%C1" CR_TAB
+		      "mov %C0,%D1" CR_TAB
+		      "clr %D0"     CR_TAB
+		      "sbrc %C0,7"  CR_TAB
+		      "dec %D0");
+	    else
+	      return ("clr %D0"     CR_TAB
+		      "sbrc %D1,7"  CR_TAB
+		      "dec %D0"     CR_TAB
+		      "mov %C0,%D1" CR_TAB
+		      "mov %B0,%C1" CR_TAB
+		      "mov %A0,%B1");
+	  }
+
+	case 16:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+
+	    if (reg0 == reg1 + 2)
+	      return *len = 4, ("clr %D0"     CR_TAB
+				"sbrc %B0,7"  CR_TAB
+				"com %D0"     CR_TAB
+				"mov %C0,%D0");
+	    if (AVR_HAVE_MOVW)
+	      return *len = 5, ("movw %A0,%C1" CR_TAB
+				"clr %D0"      CR_TAB
+				"sbrc %B0,7"   CR_TAB
+				"com %D0"      CR_TAB
+				"mov %C0,%D0");
+	    else
+	      return *len = 6, ("mov %B0,%D1" CR_TAB
+				"mov %A0,%C1" CR_TAB
+				"clr %D0"     CR_TAB
+				"sbrc %B0,7"  CR_TAB
+				"com %D0"     CR_TAB
+				"mov %C0,%D0");
+	  }
+
+	case 24:
+	  return *len = 6, ("mov %A0,%D1" CR_TAB
+			    "clr %D0"     CR_TAB
+			    "sbrc %A0,7"  CR_TAB
+			    "com %D0"     CR_TAB
+			    "mov %B0,%D0" CR_TAB
+			    "mov %C0,%D0");
+
+	default:
+	  if (INTVAL (operands[2]) < 32)
+	    break;
+
+	  /* fall through */
+
+	case 31:
+	  if (AVR_HAVE_MOVW)
+	    return *len = 4, ("lsl %D0"     CR_TAB
+			      "sbc %A0,%A0" CR_TAB
+			      "mov %B0,%A0" CR_TAB
+			      "movw %C0,%A0");
+	  else
+	    return *len = 5, ("lsl %D0"     CR_TAB
+			      "sbc %A0,%A0" CR_TAB
+			      "mov %B0,%A0" CR_TAB
+			      "mov %C0,%A0" CR_TAB
+			      "mov %D0,%A0");
+	}
+      len = t;
+    }
+  out_shift_with_cnt ("asr %D0" CR_TAB
+                      "ror %C0" CR_TAB
+                      "ror %B0" CR_TAB
+                      "ror %A0", insn, operands, len, 4);
+  return "";
+}
+
+/* 8-bit logic shift right ((unsigned char)x >> i) */
+
+const char *
+lshrqi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 8)
+	    break;
+
+	  *len = 1;
+	  return "clr %0";
+
+	case 1:
+	  *len = 1;
+	  return "lsr %0";
+
+	case 2:
+	  *len = 2;
+	  return ("lsr %0" CR_TAB
+		  "lsr %0");
+	case 3:
+	  *len = 3;
+	  return ("lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0");
+
+	case 4:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len=2;
+	      return ("swap %0" CR_TAB
+		      "andi %0,0x0f");
+	    }
+	  *len = 4;
+	  return ("lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0");
+
+	case 5:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 3;
+	      return ("swap %0" CR_TAB
+		      "lsr %0"  CR_TAB
+		      "andi %0,0x7");
+	    }
+	  *len = 5;
+	  return ("lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0");
+
+	case 6:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 4;
+	      return ("swap %0" CR_TAB
+		      "lsr %0"  CR_TAB
+		      "lsr %0"  CR_TAB
+		      "andi %0,0x3");
+	    }
+	  *len = 6;
+	  return ("lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0" CR_TAB
+		  "lsr %0");
+
+	case 7:
+	  *len = 3;
+	  return ("rol %0" CR_TAB
+		  "clr %0" CR_TAB
+		  "rol %0");
+	}
+    }
+  else if (CONSTANT_P (operands[2]))
+    fatal_insn ("internal compiler error.  Incorrect shift:", insn);
+
+  out_shift_with_cnt ("lsr %0",
+                      insn, operands, len, 1);
+  return "";
+}
+
+/* 16-bit logic shift right ((unsigned short)x >> i) */
+
+const char *
+lshrhi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 16)
+	    break;
+
+	  *len = 2;
+	  return ("clr %B0" CR_TAB
+		  "clr %A0");
+
+	case 4:
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (ldi_ok)
+	    {
+	      *len = 6;
+	      return ("swap %B0"      CR_TAB
+		      "swap %A0"      CR_TAB
+		      "andi %A0,0x0f" CR_TAB
+		      "eor %A0,%B0"   CR_TAB
+		      "andi %B0,0x0f" CR_TAB
+		      "eor %A0,%B0");
+	    }
+	  if (scratch)
+	    {
+	      *len = 7;
+	      return ("swap %B0"    CR_TAB
+		      "swap %A0"    CR_TAB
+		      "ldi %3,0x0f" CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      "eor %A0,%B0" CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      "eor %A0,%B0");
+	    }
+	  break;  /* optimize_size ? 6 : 8 */
+
+	case 5:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  if (ldi_ok)
+	    {
+	      *len = 8;
+	      return ("lsr %B0"       CR_TAB
+		      "ror %A0"       CR_TAB
+		      "swap %B0"      CR_TAB
+		      "swap %A0"      CR_TAB
+		      "andi %A0,0x0f" CR_TAB
+		      "eor %A0,%B0"   CR_TAB
+		      "andi %B0,0x0f" CR_TAB
+		      "eor %A0,%B0");
+	    }
+	  if (scratch)
+	    {
+	      *len = 9;
+	      return ("lsr %B0"     CR_TAB
+		      "ror %A0"     CR_TAB
+		      "swap %B0"    CR_TAB
+		      "swap %A0"    CR_TAB
+		      "ldi %3,0x0f" CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      "eor %A0,%B0" CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      "eor %A0,%B0");
+	    }
+	  break;  /* 10 */
+
+	case 6:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  *len = 9;
+	  return ("clr __tmp_reg__" CR_TAB
+		  "lsl %A0"         CR_TAB
+		  "rol %B0"         CR_TAB
+		  "rol __tmp_reg__" CR_TAB
+		  "lsl %A0"         CR_TAB
+		  "rol %B0"         CR_TAB
+		  "rol __tmp_reg__" CR_TAB
+		  "mov %A0,%B0"     CR_TAB
+		  "mov %B0,__tmp_reg__");
+
+	case 7:
+	  *len = 5;
+	  return ("lsl %A0"     CR_TAB
+		  "mov %A0,%B0" CR_TAB
+		  "rol %A0"     CR_TAB
+		  "sbc %B0,%B0" CR_TAB
+		  "neg %B0");
+
+	case 8:
+	  return *len = 2, ("mov %A0,%B1" CR_TAB
+			    "clr %B0");
+
+	case 9:
+	  *len = 3;
+	  return ("mov %A0,%B0" CR_TAB
+		  "clr %B0"     CR_TAB
+		  "lsr %A0");
+
+	case 10:
+	  *len = 4;
+	  return ("mov %A0,%B0" CR_TAB
+		  "clr %B0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0");
+
+	case 11:
+	  *len = 5;
+	  return ("mov %A0,%B0" CR_TAB
+		  "clr %B0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0");
+
+	case 12:
+	  if (ldi_ok)
+	    {
+	      *len = 4;
+	      return ("mov %A0,%B0" CR_TAB
+		      "clr %B0"     CR_TAB
+		      "swap %A0"    CR_TAB
+		      "andi %A0,0x0f");
+	    }
+	  if (scratch)
+	    {
+	      *len = 5;
+	      return ("mov %A0,%B0" CR_TAB
+		      "clr %B0"     CR_TAB
+		      "swap %A0"    CR_TAB
+		      "ldi %3,0x0f" CR_TAB
+		      "and %A0,%3");
+	    }
+	  *len = 6;
+	  return ("mov %A0,%B0" CR_TAB
+		  "clr %B0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0");
+
+	case 13:
+	  if (ldi_ok)
+	    {
+	      *len = 5;
+	      return ("mov %A0,%B0" CR_TAB
+		      "clr %B0"     CR_TAB
+		      "swap %A0"    CR_TAB
+		      "lsr %A0"     CR_TAB
+		      "andi %A0,0x07");
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return ("ldi %3,0x08" CR_TAB
+		      "mul %B0,%3"  CR_TAB
+		      "mov %A0,r1"  CR_TAB
+		      "clr %B0"     CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (scratch)
+	    {
+	      *len = 6;
+	      return ("mov %A0,%B0" CR_TAB
+		      "clr %B0"     CR_TAB
+		      "swap %A0"    CR_TAB
+		      "lsr %A0"     CR_TAB
+		      "ldi %3,0x07" CR_TAB
+		      "and %A0,%3");
+	    }
+	  if (AVR_HAVE_MUL)
+	    {
+	      *len = 6;
+	      return ("set"            CR_TAB
+		      "bld r1,3"   CR_TAB
+		      "mul %B0,r1" CR_TAB
+		      "mov %A0,r1" CR_TAB
+		      "clr %B0"    CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  *len = 7;
+	  return ("mov %A0,%B0" CR_TAB
+		  "clr %B0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0"     CR_TAB
+		  "lsr %A0");
+
+	case 14:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("ldi %A0,0x04" CR_TAB
+		      "mul %B0,%A0"  CR_TAB
+		      "mov %A0,r1"   CR_TAB
+		      "clr %B0"      CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return ("ldi %3,0x04" CR_TAB
+		      "mul %B0,%3"  CR_TAB
+		      "mov %A0,r1"  CR_TAB
+		      "clr %B0"     CR_TAB
+		      "clr __zero_reg__");
+	    }
+	  if (optimize_size && ldi_ok)
+	    {
+	      *len = 5;
+	      return ("mov %A0,%B0" CR_TAB
+		      "ldi %B0,6" "\n1:\t"
+		      "lsr %A0"     CR_TAB
+		      "dec %B0"     CR_TAB
+		      "brne 1b");
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 6;
+	  return ("clr %A0" CR_TAB
+		  "lsl %B0" CR_TAB
+		  "rol %A0" CR_TAB
+		  "lsl %B0" CR_TAB
+		  "rol %A0" CR_TAB
+		  "clr %B0");
+
+	case 15:
+	  *len = 4;
+	  return ("clr %A0" CR_TAB
+		  "lsl %B0" CR_TAB
+		  "rol %A0" CR_TAB
+		  "clr %B0");
+	}
+      len = t;
+    }
+  out_shift_with_cnt ("lsr %B0" CR_TAB
+                      "ror %A0", insn, operands, len, 2);
+  return "";
+}
+
+
+/* 24-bit logic shift right */
+
+const char*
+avr_out_lshrpsi3 (rtx insn, rtx *op, int *plen)
+{
+  int dest = REGNO (op[0]);
+  int src = REGNO (op[1]);
+
+  if (CONST_INT_P (op[2]))
+    {
+      if (plen)
+        *plen = 0;
+
+      switch (INTVAL (op[2]))
+        {
+        case 8:
+          if (dest <= src)
+            return avr_asm_len ("mov %A0,%B1" CR_TAB
+                                "mov %B0,%C1" CR_TAB
+                                "clr %C0", op, plen, 3);
+          else
+            return avr_asm_len ("clr %C0"     CR_TAB
+                                "mov %B0,%C1" CR_TAB
+                                "mov %A0,%B1", op, plen, 3);
+
+        case 16:
+          if (dest != src + 2)
+            avr_asm_len ("mov %A0,%C1", op, plen, 1);
+
+          return avr_asm_len ("clr %B0"  CR_TAB
+                              "clr %C0", op, plen, 2);
+
+        default:
+          if (INTVAL (op[2]) < 24)
+            break;
+
+          /* fall through */
+
+        case 23:
+          return avr_asm_len ("clr %A0"    CR_TAB
+                              "sbrc %C0,7" CR_TAB
+                              "inc %A0"    CR_TAB
+                              "clr %B0"    CR_TAB
+                              "clr %C0", op, plen, 5);
+        } /* switch */
+    }
+
+  out_shift_with_cnt ("lsr %C0" CR_TAB
+                      "ror %B0" CR_TAB
+                      "ror %A0", insn, op, plen, 3);
+  return "";
+}
+
+
+/* 32-bit logic shift right ((unsigned int)x >> i) */
+
+const char *
+lshrsi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 32)
+	    break;
+
+	  if (AVR_HAVE_MOVW)
+	    return *len = 3, ("clr %D0" CR_TAB
+			      "clr %C0" CR_TAB
+			      "movw %A0,%C0");
+	  *len = 4;
+	  return ("clr %D0" CR_TAB
+		  "clr %C0" CR_TAB
+		  "clr %B0" CR_TAB
+		  "clr %A0");
+
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    *len = 4;
+	    if (reg0 <= reg1)
+	      return ("mov %A0,%B1" CR_TAB
+		      "mov %B0,%C1" CR_TAB
+		      "mov %C0,%D1" CR_TAB
+		      "clr %D0");
+	    else
+	      return ("clr %D0"     CR_TAB
+		      "mov %C0,%D1" CR_TAB
+		      "mov %B0,%C1" CR_TAB
+		      "mov %A0,%B1");
+	  }
+
+	case 16:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+
+	    if (reg0 == reg1 + 2)
+	      return *len = 2, ("clr %C0"     CR_TAB
+				"clr %D0");
+	    if (AVR_HAVE_MOVW)
+	      return *len = 3, ("movw %A0,%C1" CR_TAB
+				"clr %C0"      CR_TAB
+				"clr %D0");
+	    else
+	      return *len = 4, ("mov %B0,%D1" CR_TAB
+				"mov %A0,%C1" CR_TAB
+				"clr %C0"     CR_TAB
+				"clr %D0");
+	  }
+
+	case 24:
+	  return *len = 4, ("mov %A0,%D1" CR_TAB
+			    "clr %B0"     CR_TAB
+			    "clr %C0"     CR_TAB
+			    "clr %D0");
+
+	case 31:
+	  *len = 6;
+	  return ("clr %A0"    CR_TAB
+		  "sbrc %D0,7" CR_TAB
+		  "inc %A0"    CR_TAB
+		  "clr %B0"    CR_TAB
+		  "clr %C0"    CR_TAB
+		  "clr %D0");
+	}
+      len = t;
+    }
+  out_shift_with_cnt ("lsr %D0" CR_TAB
+                      "ror %C0" CR_TAB
+                      "ror %B0" CR_TAB
+                      "ror %A0", insn, operands, len, 4);
+  return "";
+}
+
+
+/* Output addition of register XOP[0] and compile time constant XOP[2].
+   CODE == PLUS:  perform addition by using ADD instructions or
+   CODE == MINUS: perform addition by using SUB instructions:
+
+      XOP[0] = XOP[0] + XOP[2]
+
+   Or perform addition/subtraction with register XOP[2] depending on CODE:
+
+      XOP[0] = XOP[0] +/- XOP[2]
+
+   If PLEN == NULL, print assembler instructions to perform the operation;
+   otherwise, set *PLEN to the length of the instruction sequence (in words)
+   printed with PLEN == NULL.  XOP[3] is an 8-bit scratch register or NULL_RTX.
+   Set *PCC to effect on cc0 according to respective CC_* insn attribute.
+
+   CODE_SAT == UNKNOWN: Perform ordinary, non-saturating operation.
+   CODE_SAT != UNKNOWN: Perform operation and saturate according to CODE_SAT.
+   If  CODE_SAT != UNKNOWN  then SIGN contains the sign of the summand resp.
+   the subtrahend in the original insn, provided it is a compile time constant.
+   In all other cases, SIGN is 0.
+
+   If OUT_LABEL is true, print the final 0: label which is needed for
+   saturated addition / subtraction.  The only case where OUT_LABEL = false
+   is useful is for saturated addition / subtraction performed during
+   fixed-point rounding, cf. `avr_out_round'.  */
+
+static void
+avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
+                enum rtx_code code_sat, int sign, bool out_label)
+{
+  /* MODE of the operation.  */
+  enum machine_mode mode = GET_MODE (xop[0]);
+
+  /* INT_MODE of the same size.  */
+  enum machine_mode imode = int_mode_for_mode (mode);
+
+  /* Number of bytes to operate on.  */
+  int i, n_bytes = GET_MODE_SIZE (mode);
+
+  /* Value (0..0xff) held in clobber register op[3] or -1 if unknown.  */
+  int clobber_val = -1;
+
+  /* op[0]: 8-bit destination register
+     op[1]: 8-bit const int
+     op[2]: 8-bit scratch register */
+  rtx op[3];
+
+  /* Started the operation?  Before starting the operation we may skip
+     adding 0.  This is no more true after the operation started because
+     carry must be taken into account.  */
+  bool started = false;
+
+  /* Value to add.  There are two ways to add VAL: R += VAL and R -= -VAL.  */
+  rtx xval = xop[2];
+
+  /* Output a BRVC instruction.  Only needed with saturation.  */
+  bool out_brvc = true;
+
+  if (plen)
+    *plen = 0;
+
+  if (REG_P (xop[2]))
+    {
+      *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_SET_N;
+
+      for (i = 0; i < n_bytes; i++)
+        {
+          /* We operate byte-wise on the destination.  */
+          op[0] = simplify_gen_subreg (QImode, xop[0], mode, i);
+          op[1] = simplify_gen_subreg (QImode, xop[2], mode, i);
+
+          if (i == 0)
+            avr_asm_len (code == PLUS ? "add %0,%1" : "sub %0,%1",
+                         op, plen, 1);
+          else
+            avr_asm_len (code == PLUS ? "adc %0,%1" : "sbc %0,%1",
+                         op, plen, 1);
+        }
+
+      if (reg_overlap_mentioned_p (xop[0], xop[2]))
+        {
+          gcc_assert (REGNO (xop[0]) == REGNO (xop[2]));
+
+          if (MINUS == code)
+            return;
+        }
+
+      goto saturate;
+    }
+
+  /* Except in the case of ADIW with 16-bit register (see below)
+     addition does not set cc0 in a usable way.  */
+
+  *pcc = (MINUS == code) ? CC_SET_CZN : CC_CLOBBER;
+
+  if (CONST_FIXED_P (xval))
+    xval = avr_to_int_mode (xval);
+
+  /* Adding/Subtracting zero is a no-op.  */
+
+  if (xval == const0_rtx)
+    {
+      *pcc = CC_NONE;
+      return;
+    }
+
+  if (MINUS == code)
+    xval = simplify_unary_operation (NEG, imode, xval, imode);
+
+  op[2] = xop[3];
+
+  if (SS_PLUS == code_sat && MINUS == code
+      && sign < 0
+      && 0x80 == (INTVAL (simplify_gen_subreg (QImode, xval, imode, n_bytes-1))
+                  & GET_MODE_MASK (QImode)))
+    {
+      /* We compute x + 0x80 by means of SUB instructions.  We negated the
+         constant subtrahend above and are left with  x - (-128)  so that we
+         need something like SUBI r,128 which does not exist because SUBI sets
+         V according to the sign of the subtrahend.  Notice the only case
+         where this must be done is when NEG overflowed in case [2s] because
+         the V computation needs the right sign of the subtrahend.  */
+
+      rtx msb = simplify_gen_subreg (QImode, xop[0], mode, n_bytes-1);
+
+      avr_asm_len ("subi %0,128" CR_TAB
+                   "brmi 0f", &msb, plen, 2);
+      out_brvc = false;
+
+      goto saturate;
+    }
+
+  for (i = 0; i < n_bytes; i++)
+    {
+      /* We operate byte-wise on the destination.  */
+      rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i);
+      rtx xval8 = simplify_gen_subreg (QImode, xval, imode, i);
+
+      /* 8-bit value to operate with this byte. */
+      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+
+      /* Registers R16..R31 can operate with immediate.  */
+      bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8);
+
+      op[0] = reg8;
+      op[1] = gen_int_mode (val8, QImode);
+
+      /* To get usable cc0 no low-bytes must have been skipped.  */
+
+      if (i && !started)
+        *pcc = CC_CLOBBER;
+
+      if (!started
+          && i % 2 == 0
+          && i + 2 <= n_bytes
+          && test_hard_reg_class (ADDW_REGS, reg8))
+        {
+          rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i);
+          unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode);
+
+          /* Registers R24, X, Y, Z can use ADIW/SBIW with constants < 64
+             i.e. operate word-wise.  */
+
+          if (val16 < 64)
+            {
+              if (val16 != 0)
+                {
+                  started = true;
+                  avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1",
+                               op, plen, 1);
+
+                  if (n_bytes == 2 && PLUS == code)
+                    *pcc = CC_SET_ZN;
+                }
+
+              i++;
+              continue;
+            }
+        }
+
+      if (val8 == 0)
+        {
+          if (started)
+            avr_asm_len (code == PLUS
+                         ? "adc %0,__zero_reg__" : "sbc %0,__zero_reg__",
+                         op, plen, 1);
+          continue;
+        }
+      else if ((val8 == 1 || val8 == 0xff)
+               && UNKNOWN == code_sat
+               && !started
+               && i == n_bytes - 1)
+        {
+          avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0",
+                       op, plen, 1);
+          break;
+        }
+
+      switch (code)
+        {
+        case PLUS:
+
+          gcc_assert (plen != NULL || (op[2] && REG_P (op[2])));
+
+          if (plen != NULL && UNKNOWN != code_sat)
+            {
+              /* This belongs to the x + 0x80 corner case.  The code with
+                 ADD instruction is not smaller, thus make this case
+                 expensive so that the caller won't pick it.  */
+
+              *plen += 10;
+              break;
+            }
+
+          if (clobber_val != (int) val8)
+            avr_asm_len ("ldi %2,%1", op, plen, 1);
+          clobber_val = (int) val8;
+
+          avr_asm_len (started ? "adc %0,%2" : "add %0,%2", op, plen, 1);
+
+          break; /* PLUS */
+
+        case MINUS:
+
+          if (ld_reg_p)
+            avr_asm_len (started ? "sbci %0,%1" : "subi %0,%1", op, plen, 1);
+          else
+            {
+              gcc_assert (plen != NULL || REG_P (op[2]));
+
+              if (clobber_val != (int) val8)
+                avr_asm_len ("ldi %2,%1", op, plen, 1);
+              clobber_val = (int) val8;
+
+              avr_asm_len (started ? "sbc %0,%2" : "sub %0,%2", op, plen, 1);
+            }
+
+          break; /* MINUS */
+
+        default:
+          /* Unknown code */
+          gcc_unreachable();
+        }
+
+      started = true;
+
+    } /* for all sub-bytes */
+
+ saturate:
+
+  if (UNKNOWN == code_sat)
+    return;
+
+  *pcc = (int) CC_CLOBBER;
+
+  /* Vanilla addition/subtraction is done.  We are left with saturation.
+
+     We have to compute  A = A <op> B  where  A  is a register and
+     B is a register or a non-zero compile time constant CONST.
+     A is register class "r" if unsigned && B is REG.  Otherwise, A is in "d".
+     B stands for the original operand $2 in INSN.  In the case of B = CONST,
+     SIGN in { -1, 1 } is the sign of B.  Otherwise, SIGN is 0.
+
+     CODE is the instruction flavor we use in the asm sequence to perform <op>.
+
+
+     unsigned
+     operation        |  code |  sat if  |    b is      | sat value |  case
+     -----------------+-------+----------+--------------+-----------+-------
+     +  as  a + b     |  add  |  C == 1  |  const, reg  | u+ = 0xff |  [1u]
+     +  as  a - (-b)  |  sub  |  C == 0  |  const       | u+ = 0xff |  [2u]
+     -  as  a - b     |  sub  |  C == 1  |  const, reg  | u- = 0    |  [3u]
+     -  as  a + (-b)  |  add  |  C == 0  |  const       | u- = 0    |  [4u]
+
+
+     signed
+     operation        |  code |  sat if  |    b is      | sat value |  case
+     -----------------+-------+----------+--------------+-----------+-------
+     +  as  a + b     |  add  |  V == 1  |  const, reg  | s+        |  [1s]
+     +  as  a - (-b)  |  sub  |  V == 1  |  const       | s+        |  [2s]
+     -  as  a - b     |  sub  |  V == 1  |  const, reg  | s-        |  [3s]
+     -  as  a + (-b)  |  add  |  V == 1  |  const       | s-        |  [4s]
+
+     s+  =  b < 0  ?  -0x80 :  0x7f
+     s-  =  b < 0  ?   0x7f : -0x80
+
+     The cases a - b actually perform  a - (-(-b))  if B is CONST.
+  */
+
+  op[0] = simplify_gen_subreg (QImode, xop[0], mode, n_bytes-1);
+  op[1] = n_bytes > 1
+    ? simplify_gen_subreg (QImode, xop[0], mode, n_bytes-2)
+    : NULL_RTX;
+
+  bool need_copy = true;
+  int len_call = 1 + AVR_HAVE_JMP_CALL;
+
+  switch (code_sat)
+    {
+    default:
+      gcc_unreachable();
+
+    case SS_PLUS:
+    case SS_MINUS:
+
+      if (out_brvc)
+        avr_asm_len ("brvc 0f", op, plen, 1);
+
+      if (reg_overlap_mentioned_p (xop[0], xop[2]))
+        {
+          /* [1s,reg] */
+
+          if (n_bytes == 1)
+            avr_asm_len ("ldi %0,0x7f" CR_TAB
+                         "adc %0,__zero_reg__", op, plen, 2);
+          else
+            avr_asm_len ("ldi %0,0x7f" CR_TAB
+                         "ldi %1,0xff" CR_TAB
+                         "adc %1,__zero_reg__" CR_TAB
+                         "adc %0,__zero_reg__", op, plen, 4);
+        }
+      else if (sign == 0 && PLUS == code)
+        {
+          /* [1s,reg] */
+
+          op[2] = simplify_gen_subreg (QImode, xop[2], mode, n_bytes-1);
+
+          if (n_bytes == 1)
+            avr_asm_len ("ldi %0,0x80" CR_TAB
+                         "sbrs %2,7"   CR_TAB
+                         "dec %0", op, plen, 3);
+          else
+            avr_asm_len ("ldi %0,0x80" CR_TAB
+                         "cp %2,%0"    CR_TAB
+                         "sbc %1,%1"   CR_TAB
+                         "sbci %0,0", op, plen, 4);
+        }
+      else if (sign == 0 && MINUS == code)
+        {
+          /* [3s,reg] */
+
+          op[2] = simplify_gen_subreg (QImode, xop[2], mode, n_bytes-1);
+
+          if (n_bytes == 1)
+            avr_asm_len ("ldi %0,0x7f" CR_TAB
+                         "sbrs %2,7"   CR_TAB
+                         "inc %0", op, plen, 3);
+          else
+            avr_asm_len ("ldi %0,0x7f" CR_TAB
+                         "cp %0,%2"    CR_TAB
+                         "sbc %1,%1"   CR_TAB
+                         "sbci %0,-1", op, plen, 4);
+        }
+      else if ((sign < 0) ^ (SS_MINUS == code_sat))
+        {
+          /* [1s,const,B < 0] [2s,B < 0] */
+          /* [3s,const,B > 0] [4s,B > 0] */
+
+          if (n_bytes == 8)
+            {
+              avr_asm_len ("%~call __clr_8", op, plen, len_call);
+              need_copy = false;
+            }
+
+          avr_asm_len ("ldi %0,0x80", op, plen, 1);
+          if (n_bytes > 1 && need_copy)
+            avr_asm_len ("clr %1", op, plen, 1);
+        }
+      else if ((sign > 0) ^ (SS_MINUS == code_sat))
+        {
+          /* [1s,const,B > 0] [2s,B > 0] */
+          /* [3s,const,B < 0] [4s,B < 0] */
+
+          if (n_bytes == 8)
+            {
+              avr_asm_len ("sec" CR_TAB
+                           "%~call __sbc_8", op, plen, 1 + len_call);
+              need_copy = false;
+            }
+
+          avr_asm_len ("ldi %0,0x7f", op, plen, 1);
+          if (n_bytes > 1 && need_copy)
+            avr_asm_len ("ldi %1,0xff", op, plen, 1);
+        }
+      else
+        gcc_unreachable();
+
+      break;
+
+    case US_PLUS:
+      /* [1u] : [2u] */
+
+      avr_asm_len (PLUS == code ? "brcc 0f" : "brcs 0f", op, plen, 1);
+
+      if (n_bytes == 8)
+        {
+          if (MINUS == code)
+            avr_asm_len ("sec", op, plen, 1);
+          avr_asm_len ("%~call __sbc_8", op, plen, len_call);
+
+          need_copy = false;
+        }
+      else
+        {
+          if (MINUS == code && !test_hard_reg_class (LD_REGS, op[0]))
+            avr_asm_len ("sec" CR_TAB "sbc %0,%0", op, plen, 2);
+          else
+            avr_asm_len (PLUS == code ? "sbc %0,%0" : "ldi %0,0xff",
+                         op, plen, 1);
+        }
+      break; /* US_PLUS */
+
+    case US_MINUS:
+      /* [4u] : [3u] */
+
+      avr_asm_len (PLUS == code ? "brcs 0f" : "brcc 0f", op, plen, 1);
+
+      if (n_bytes == 8)
+        {
+          avr_asm_len ("%~call __clr_8", op, plen, len_call);
+          need_copy = false;
+        }
+      else
+        avr_asm_len ("clr %0", op, plen, 1);
+
+      break;
+    }
+
+  /* We set the MSB in the unsigned case and the 2 MSBs in the signed case.
+     Now copy the right value to the LSBs.  */
+
+  if (need_copy && n_bytes > 1)
+    {
+      if (US_MINUS == code_sat || US_PLUS == code_sat)
+        {
+          avr_asm_len ("mov %1,%0", op, plen, 1);
+
+          if (n_bytes > 2)
+            {
+              op[0] = xop[0];
+              if (AVR_HAVE_MOVW)
+                avr_asm_len ("movw %0,%1", op, plen, 1);
+              else
+                avr_asm_len ("mov %A0,%1" CR_TAB
+                             "mov %B0,%1", op, plen, 2);
+            }
+        }
+      else if (n_bytes > 2)
+        {
+          op[0] = xop[0];
+          avr_asm_len ("mov %A0,%1" CR_TAB
+                       "mov %B0,%1", op, plen, 2);
+        }
+    }
+
+  if (need_copy && n_bytes == 8)
+    {
+      if (AVR_HAVE_MOVW)
+        avr_asm_len ("movw %r0+2,%0" CR_TAB
+                     "movw %r0+4,%0", xop, plen, 2);
+      else
+        avr_asm_len ("mov %r0+2,%0" CR_TAB
+                     "mov %r0+3,%0" CR_TAB
+                     "mov %r0+4,%0" CR_TAB
+                     "mov %r0+5,%0", xop, plen, 4);
+    }
+
+  if (out_label)
+    avr_asm_len ("0:", op, plen, 0);
+}
+
+
+/* Output addition/subtraction of register XOP[0] and a constant XOP[2] that
+   is ont a compile-time constant:
+
+      XOP[0] = XOP[0] +/- XOP[2]
+
+   This is a helper for the function below.  The only insns that need this
+   are additions/subtraction for pointer modes, i.e. HImode and PSImode.  */
+
+static const char*
+avr_out_plus_symbol (rtx *xop, enum rtx_code code, int *plen, int *pcc)
+{
+  enum machine_mode mode = GET_MODE (xop[0]);
+
+  /* Only pointer modes want to add symbols.  */
+
+  gcc_assert (mode == HImode || mode == PSImode);
+
+  *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_SET_N;
+
+  avr_asm_len (PLUS == code
+               ? "subi %A0,lo8(-(%2))" CR_TAB "sbci %B0,hi8(-(%2))"
+               : "subi %A0,lo8(%2)"    CR_TAB "sbci %B0,hi8(%2)",
+               xop, plen, -2);
+
+  if (PSImode == mode)
+    avr_asm_len (PLUS == code
+                 ? "sbci %C0,hlo8(-(%2))"
+                 : "sbci %C0,hlo8(%2)", xop, plen, 1);
+  return "";
+}
+
+
+/* Prepare operands of addition/subtraction to be used with avr_out_plus_1.
+
+   INSN is a single_set insn or an insn pattern with a binary operation as
+   SET_SRC that is one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS.
+
+   XOP are the operands of INSN.  In the case of 64-bit operations with
+   constant XOP[] has just one element:  The summand/subtrahend in XOP[0].
+   The non-saturating insns up to 32 bits may or may not supply a "d" class
+   scratch as XOP[3].
+
+   If PLEN == NULL output the instructions.
+   If PLEN != NULL set *PLEN to the length of the sequence in words.
+
+   PCC is a pointer to store the instructions' effect on cc0.
+   PCC may be NULL.
+
+   PLEN and PCC default to NULL.
+
+   OUT_LABEL defaults to TRUE.  For a description, see AVR_OUT_PLUS_1.
+
+   Return ""  */
+
+const char*
+avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc, bool out_label)
+{
+  int cc_plus, cc_minus, cc_dummy;
+  int len_plus, len_minus;
+  rtx op[4];
+  rtx xpattern = INSN_P (insn) ? single_set (insn) : insn;
+  rtx xdest = SET_DEST (xpattern);
+  enum machine_mode mode = GET_MODE (xdest);
+  enum machine_mode imode = int_mode_for_mode (mode);
+  int n_bytes = GET_MODE_SIZE (mode);
+  enum rtx_code code_sat = GET_CODE (SET_SRC (xpattern));
+  enum rtx_code code
+    = (PLUS == code_sat || SS_PLUS == code_sat || US_PLUS == code_sat
+       ? PLUS : MINUS);
+
+  if (!pcc)
+    pcc = &cc_dummy;
+
+  /* PLUS and MINUS don't saturate:  Use modular wrap-around.  */
+
+  if (PLUS == code_sat || MINUS == code_sat)
+    code_sat = UNKNOWN;
+
+  if (n_bytes <= 4 && REG_P (xop[2]))
+    {
+      avr_out_plus_1 (xop, plen, code, pcc, code_sat, 0, out_label);
+      return "";
+    }
+
+  if (8 == n_bytes)
+    {
+      op[0] = gen_rtx_REG (DImode, ACC_A);
+      op[1] = gen_rtx_REG (DImode, ACC_A);
+      op[2] = avr_to_int_mode (xop[0]);
+    }
+  else
+    {
+      if (!REG_P (xop[2])
+          && !CONST_INT_P (xop[2])
+          && !CONST_FIXED_P (xop[2]))
+        {
+          return avr_out_plus_symbol (xop, code, plen, pcc);
+        }
+
+      op[0] = avr_to_int_mode (xop[0]);
+      op[1] = avr_to_int_mode (xop[1]);
+      op[2] = avr_to_int_mode (xop[2]);
+    }
+
+  /* Saturations and 64-bit operations don't have a clobber operand.
+     For the other cases, the caller will provide a proper XOP[3].  */
+
+  xpattern = INSN_P (insn) ? PATTERN (insn) : insn;
+  op[3] = PARALLEL == GET_CODE (xpattern) ? xop[3] : NULL_RTX;
+
+  /* Saturation will need the sign of the original operand.  */
+
+  rtx xmsb = simplify_gen_subreg (QImode, op[2], imode, n_bytes-1);
+  int sign = INTVAL (xmsb) < 0 ? -1 : 1;
+
+  /* If we subtract and the subtrahend is a constant, then negate it
+     so that avr_out_plus_1 can be used.  */
+
+  if (MINUS == code)
+    op[2] = simplify_unary_operation (NEG, imode, op[2], imode);
+
+  /* Work out the shortest sequence.  */
+
+  avr_out_plus_1 (op, &len_minus, MINUS, &cc_minus, code_sat, sign, out_label);
+  avr_out_plus_1 (op, &len_plus, PLUS, &cc_plus, code_sat, sign, out_label);
+
+  if (plen)
+    {
+      *plen = (len_minus <= len_plus) ? len_minus : len_plus;
+      *pcc  = (len_minus <= len_plus) ? cc_minus : cc_plus;
+    }
+  else if (len_minus <= len_plus)
+    avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign, out_label);
+  else
+    avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign, out_label);
+
+  return "";
+}
+
+
+/* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile
+   time constant XOP[2]:
+
+      XOP[0] = XOP[0] <op> XOP[2]
+
+   and return "".  If PLEN == NULL, print assembler instructions to perform the
+   operation; otherwise, set *PLEN to the length of the instruction sequence
+   (in words) printed with PLEN == NULL.  XOP[3] is either an 8-bit clobber
+   register or SCRATCH if no clobber register is needed for the operation.
+   INSN is an INSN_P or a pattern of an insn.  */
+
+const char*
+avr_out_bitop (rtx insn, rtx *xop, int *plen)
+{
+  /* CODE and MODE of the operation.  */
+  rtx xpattern = INSN_P (insn) ? single_set (insn) : insn;
+  enum rtx_code code = GET_CODE (SET_SRC (xpattern));
+  enum machine_mode mode = GET_MODE (xop[0]);
+
+  /* Number of bytes to operate on.  */
+  int i, n_bytes = GET_MODE_SIZE (mode);
+
+  /* Value of T-flag (0 or 1) or -1 if unknow.  */
+  int set_t = -1;
+
+  /* Value (0..0xff) held in clobber register op[3] or -1 if unknown.  */
+  int clobber_val = -1;
+
+  /* op[0]: 8-bit destination register
+     op[1]: 8-bit const int
+     op[2]: 8-bit clobber register or SCRATCH
+     op[3]: 8-bit register containing 0xff or NULL_RTX  */
+  rtx op[4];
+
+  op[2] = xop[3];
+  op[3] = NULL_RTX;
+
+  if (plen)
+    *plen = 0;
+
+  for (i = 0; i < n_bytes; i++)
+    {
+      /* We operate byte-wise on the destination.  */
+      rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i);
+      rtx xval8 = simplify_gen_subreg (QImode, xop[2], mode, i);
+
+      /* 8-bit value to operate with this byte. */
+      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+
+      /* Number of bits set in the current byte of the constant.  */
+      int pop8 = avr_popcount (val8);
+
+      /* Registers R16..R31 can operate with immediate.  */
+      bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8);
+
+      op[0] = reg8;
+      op[1] = GEN_INT (val8);
+
+      switch (code)
+        {
+        case IOR:
+
+          if (0 == pop8)
+            continue;
+          else if (ld_reg_p)
+            avr_asm_len ("ori %0,%1", op, plen, 1);
+          else if (1 == pop8)
+            {
+              if (set_t != 1)
+                avr_asm_len ("set", op, plen, 1);
+              set_t = 1;
+
+              op[1] = GEN_INT (exact_log2 (val8));
+              avr_asm_len ("bld %0,%1", op, plen, 1);
+            }
+          else if (8 == pop8)
+            {
+              if (op[3] != NULL_RTX)
+                avr_asm_len ("mov %0,%3", op, plen, 1);
+              else
+                avr_asm_len ("clr %0" CR_TAB
+                             "dec %0", op, plen, 2);
+
+              op[3] = op[0];
+            }
+          else
+            {
+              if (clobber_val != (int) val8)
+                avr_asm_len ("ldi %2,%1", op, plen, 1);
+              clobber_val = (int) val8;
+
+              avr_asm_len ("or %0,%2", op, plen, 1);
+            }
+
+          continue; /* IOR */
+
+        case AND:
+
+          if (8 == pop8)
+            continue;
+          else if (0 == pop8)
+            avr_asm_len ("clr %0", op, plen, 1);
+          else if (ld_reg_p)
+            avr_asm_len ("andi %0,%1", op, plen, 1);
+          else if (7 == pop8)
+            {
+              if (set_t != 0)
+                avr_asm_len ("clt", op, plen, 1);
+              set_t = 0;
+
+              op[1] = GEN_INT (exact_log2 (GET_MODE_MASK (QImode) & ~val8));
+              avr_asm_len ("bld %0,%1", op, plen, 1);
+            }
+          else
+            {
+              if (clobber_val != (int) val8)
+                avr_asm_len ("ldi %2,%1", op, plen, 1);
+              clobber_val = (int) val8;
+
+              avr_asm_len ("and %0,%2", op, plen, 1);
+            }
+
+          continue; /* AND */
+
+        case XOR:
+
+          if (0 == pop8)
+            continue;
+          else if (8 == pop8)
+            avr_asm_len ("com %0", op, plen, 1);
+          else if (ld_reg_p && val8 == (1 << 7))
+            avr_asm_len ("subi %0,%1", op, plen, 1);
+          else
+            {
+              if (clobber_val != (int) val8)
+                avr_asm_len ("ldi %2,%1", op, plen, 1);
+              clobber_val = (int) val8;
+
+              avr_asm_len ("eor %0,%2", op, plen, 1);
+            }
+
+          continue; /* XOR */
+
+        default:
+          /* Unknown rtx_code */
+          gcc_unreachable();
+        }
+    } /* for all sub-bytes */
+
+  return "";
+}
+
+
+/* PLEN == NULL: Output code to add CONST_INT OP[0] to SP.
+   PLEN != NULL: Set *PLEN to the length of that sequence.
+   Return "".  */
+
+const char*
+avr_out_addto_sp (rtx *op, int *plen)
+{
+  int pc_len = AVR_2_BYTE_PC ? 2 : 3;
+  int addend = INTVAL (op[0]);
+
+  if (plen)
+    *plen = 0;
+
+  if (addend < 0)
+    {
+      if (flag_verbose_asm || flag_print_asm_name)
+        avr_asm_len (ASM_COMMENT_START "SP -= %n0", op, plen, 0);
+
+      while (addend <= -pc_len)
+        {
+          addend += pc_len;
+          avr_asm_len ("rcall .", op, plen, 1);
+        }
+
+      while (addend++ < 0)
+        avr_asm_len ("push __zero_reg__", op, plen, 1);
+    }
+  else if (addend > 0)
+    {
+      if (flag_verbose_asm || flag_print_asm_name)
+        avr_asm_len (ASM_COMMENT_START "SP += %0", op, plen, 0);
+
+      while (addend-- > 0)
+        avr_asm_len ("pop __tmp_reg__", op, plen, 1);
+    }
+
+  return "";
+}
+
+
+/* Outputs instructions needed for fixed point type conversion.
+   This includes converting between any fixed point type, as well
+   as converting to any integer type.  Conversion between integer
+   types is not supported.
+
+   Converting signed fractional types requires a bit shift if converting
+   to or from any unsigned fractional type because the decimal place is
+   shifted by 1 bit.  When the destination is a signed fractional, the sign
+   is stored in either the carry or T bit.  */
+
+const char*
+avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
+{
+  size_t i;
+  rtx xop[6];
+  RTX_CODE shift = UNKNOWN;
+  bool sign_in_carry = false;
+  bool msb_in_carry = false;
+  bool lsb_in_tmp_reg = false;
+  bool lsb_in_carry = false;
+  bool frac_rounded = false;
+  const char *code_ashift = "lsl %0";
+
+
+#define MAY_CLOBBER(RR)                                                 \
+  /* Shorthand used below.  */                                          \
+  ((sign_bytes                                                          \
+    && IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb))  \
+   || (offset && IN_RANGE (RR, dest.regno, dest.regno_msb))		\
+   || (reg_unused_after (insn, all_regs_rtx[RR])                        \
+       && !IN_RANGE (RR, dest.regno, dest.regno_msb)))
+
+  struct
+  {
+    /* bytes       : Length of operand in bytes.
+       ibyte       : Length of integral part in bytes.
+       fbyte, fbit : Length of fractional part in bytes, bits.  */
+
+    bool sbit;
+    unsigned fbit, bytes, ibyte, fbyte;
+    unsigned regno, regno_msb;
+  } dest, src, *val[2] = { &dest, &src };
+
+  if (plen)
+    *plen = 0;
+
+  /* Step 0:  Determine information on source and destination operand we
+     ======   will need in the remainder.  */
+
+  for (i = 0; i < sizeof (val) / sizeof (*val); i++)
+    {
+      enum machine_mode mode;
+
+      xop[i] = operands[i];
+
+      mode = GET_MODE (xop[i]);
+
+      val[i]->bytes = GET_MODE_SIZE (mode);
+      val[i]->regno = REGNO (xop[i]);
+      val[i]->regno_msb = REGNO (xop[i]) + val[i]->bytes - 1;
+
+      if (SCALAR_INT_MODE_P (mode))
+        {
+          val[i]->sbit = intsigned;
+          val[i]->fbit = 0;
+        }
+      else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+        {
+          val[i]->sbit = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
+          val[i]->fbit = GET_MODE_FBIT (mode);
+        }
+      else
+        fatal_insn ("unsupported fixed-point conversion", insn);
+
+      val[i]->fbyte = (1 + val[i]->fbit) / BITS_PER_UNIT;
+      val[i]->ibyte = val[i]->bytes - val[i]->fbyte;
+    }
+
+  // Byte offset of the decimal point taking into account different place
+  // of the decimal point in input and output and different register numbers
+  // of input and output.
+  int offset = dest.regno - src.regno + dest.fbyte - src.fbyte;
+
+  // Number of destination bytes that will come from sign / zero extension.
+  int sign_bytes = (dest.ibyte - src.ibyte) * (dest.ibyte > src.ibyte);
+
+  // Number of bytes at the low end to be filled with zeros.
+  int zero_bytes = (dest.fbyte - src.fbyte) * (dest.fbyte > src.fbyte);
+
+  // Do we have a 16-Bit register that is cleared?
+  rtx clrw = NULL_RTX;
+
+  bool sign_extend = src.sbit && sign_bytes;
+
+  if (0 == dest.fbit % 8 && 7 == src.fbit % 8)
+    shift = ASHIFT;
+  else if (7 == dest.fbit % 8 && 0 == src.fbit % 8)
+    shift = ASHIFTRT;
+  else if (dest.fbit % 8 == src.fbit % 8)
+    shift = UNKNOWN;
+  else
+    gcc_unreachable();
+
+  /* If we need to round the fraction part, we might need to save/round it
+     before clobbering any of it in Step 1.  Also, we might to want to do
+     the rounding now to make use of LD_REGS.  */
+  if (SCALAR_INT_MODE_P (GET_MODE (xop[0]))
+      && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1]))
+      && !TARGET_FRACT_CONV_TRUNC)
+    {
+      bool overlap
+	= (src.regno <=
+	   (offset ? dest.regno_msb - sign_bytes : dest.regno + zero_bytes - 1)
+	   && dest.regno - offset -1 >= dest.regno);
+      unsigned s0 = dest.regno - offset -1;
+      bool use_src = true;
+      unsigned sn;
+      unsigned copied_msb = src.regno_msb;
+      bool have_carry = false;
+
+      if (src.ibyte > dest.ibyte)
+	copied_msb -= src.ibyte - dest.ibyte;
+
+      for (sn = s0; sn <= copied_msb; sn++)
+	if (!IN_RANGE (sn, dest.regno, dest.regno_msb)
+	    && !reg_unused_after (insn, all_regs_rtx[sn]))
+	  use_src = false;
+      if (use_src && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0))
+	{
+	  avr_asm_len ("tst %0" CR_TAB "brpl 0f",
+		       &all_regs_rtx[src.regno_msb], plen, 2);
+	  sn = src.regno;
+	  if (sn < s0)
+	    {
+	      if (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], sn))
+		avr_asm_len ("cpi %0,1", &all_regs_rtx[sn], plen, 1);
+	      else
+		avr_asm_len ("sec" CR_TAB "cpc %0,__zero_reg__",
+			     &all_regs_rtx[sn], plen, 2);
+	      have_carry = true;
+	    }
+	  while (++sn < s0)
+	    avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	  avr_asm_len (have_carry ? "sbci %0,128" : "subi %0,129",
+		       &all_regs_rtx[s0], plen, 1);
+	  for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++)
+	    avr_asm_len ("sbci %0,255", &all_regs_rtx[sn], plen, 1);
+	  avr_asm_len ("\n0:", NULL, plen, 0);
+	  frac_rounded = true;
+	}
+      else if (use_src && overlap)
+	{
+	  avr_asm_len ("clr __tmp_reg__" CR_TAB
+		       "sbrc %1,0" CR_TAB "dec __tmp_reg__", xop, plen, 1);
+	  sn = src.regno;
+	  if (sn < s0)
+	    {
+	      avr_asm_len ("add %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1);
+	      have_carry = true;
+	    }
+	  while (++sn < s0)
+	    avr_asm_len ("adc %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1);
+	  if (have_carry)
+	    avr_asm_len ("clt" CR_TAB "bld __tmp_reg__,7" CR_TAB
+			 "adc %0,__tmp_reg__",
+			 &all_regs_rtx[s0], plen, 1);
+	  else
+	    avr_asm_len ("lsr __tmp_reg" CR_TAB "add %0,__tmp_reg__",
+			 &all_regs_rtx[s0], plen, 2);
+	  for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++)
+	    avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	  frac_rounded = true;
+	}
+      else if (overlap)
+	{
+	  bool use_src
+	    = (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0)
+	       && (IN_RANGE (s0, dest.regno, dest.regno_msb)
+		   || reg_unused_after (insn, all_regs_rtx[s0])));
+	  xop[2] = all_regs_rtx[s0];
+	  unsigned sn = src.regno;
+	  if (!use_src || sn == s0)
+	    avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1);
+	  /* We need to consider to-be-discarded bits
+	     if the value is negative.  */
+	  if (sn < s0)
+	    {
+	      avr_asm_len ("tst %0" CR_TAB "brpl 0f",
+			   &all_regs_rtx[src.regno_msb], plen, 2);
+	      /* Test to-be-discarded bytes for any nozero bits.
+		 ??? Could use OR or SBIW to test two registers at once.  */
+	      if (sn < s0)
+		avr_asm_len ("cp %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	      while (++sn < s0)
+		avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	      /* Set bit 0 in __tmp_reg__ if any of the lower bits was set.  */
+	      if (use_src)
+		avr_asm_len ("breq 0f" CR_TAB
+			     "ori %2,1" "\n0:\t" "mov __tmp_reg__,%2",
+			     xop, plen, 3);
+	      else
+		avr_asm_len ("breq 0f" CR_TAB
+			     "set" CR_TAB "bld __tmp_reg__,0\n0:",
+			     xop, plen, 3);
+	    }
+	  lsb_in_tmp_reg = true;
+	}
+    }
+
+  /* Step 1:  Clear bytes at the low end and copy payload bits from source
+     ======   to destination.  */
+
+  int step = offset < 0 ? 1 : -1;
+  unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb;
+
+  // We cleared at least that number of registers.
+  int clr_n = 0;
+
+  for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step)
+    {
+      // Next regno of destination is needed for MOVW
+      unsigned d1 = d0 + step;
+
+      // Current and next regno of source
+      signed s0 = d0 - offset;
+      signed s1 = s0 + step;
+
+      // Must current resp. next regno be CLRed?  This applies to the low
+      // bytes of the destination that have no associated source bytes.
+      bool clr0 = s0 < (signed) src.regno;
+      bool clr1 = s1 < (signed) src.regno && d1 >= dest.regno;
+
+      // First gather what code to emit (if any) and additional step to
+      // apply if a MOVW is in use.  xop[2] is destination rtx and xop[3]
+      // is the source rtx for the current loop iteration.
+      const char *code = NULL;
+      int stepw = 0;
+
+      if (clr0)
+        {
+          if (AVR_HAVE_MOVW && clr1 && clrw)
+            {
+              xop[2] = all_regs_rtx[d0 & ~1];
+              xop[3] = clrw;
+              code = "movw %2,%3";
+              stepw = step;
+            }
+          else
+            {
+              xop[2] = all_regs_rtx[d0];
+              code = "clr %2";
+
+              if (++clr_n >= 2
+                  && !clrw
+                  && d0 % 2 == (step > 0))
+                {
+                  clrw = all_regs_rtx[d0 & ~1];
+                }
+            }
+        }
+      else if (offset && s0 <= (signed) src.regno_msb)
+        {
+          int movw = AVR_HAVE_MOVW && offset % 2 == 0
+            && d0 % 2 == (offset > 0)
+            && d1 <= dest.regno_msb && d1 >= dest.regno
+            && s1 <= (signed) src.regno_msb  && s1 >= (signed) src.regno;
+
+          xop[2] = all_regs_rtx[d0 & ~movw];
+          xop[3] = all_regs_rtx[s0 & ~movw];
+          code = movw ? "movw %2,%3" : "mov %2,%3";
+          stepw = step * movw;
+        }
+
+      if (code)
+        {
+          if (sign_extend && shift != ASHIFT && !sign_in_carry
+              && (d0 == src.regno_msb || d0 + stepw == src.regno_msb))
+            {
+              /* We are going to override the sign bit.  If we sign-extend,
+                 store the sign in the Carry flag.  This is not needed if
+                 the destination will be ASHIFT is the remainder because
+                 the ASHIFT will set Carry without extra instruction.  */
+
+              avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1);
+              sign_in_carry = true;
+            }
+
+          unsigned src_msb = dest.regno_msb - sign_bytes - offset + 1;
+
+          if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
+              && src.ibyte > dest.ibyte
+              && (d0 == src_msb || d0 + stepw == src_msb))
+            {
+              /* We are going to override the MSB.  If we shift right,
+                 store the MSB in the Carry flag.  This is only needed if
+                 we don't sign-extend becaue with sign-extension the MSB
+                 (the sign) will be produced by the sign extension.  */
+
+              avr_asm_len ("lsr %0", &all_regs_rtx[src_msb], plen, 1);
+              msb_in_carry = true;
+            }
+
+          unsigned src_lsb = dest.regno - offset -1;
+
+          if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry
+	      && !lsb_in_tmp_reg
+              && (d0 == src_lsb || d0 + stepw == src_lsb))
+            {
+              /* We are going to override the new LSB; store it into carry.  */
+
+              avr_asm_len ("lsl %0", &all_regs_rtx[src_lsb], plen, 1);
+              code_ashift = "rol %0";
+              lsb_in_carry = true;
+            }
+
+          avr_asm_len (code, xop, plen, 1);
+          d0 += stepw;
+        }
+    }
+
+  /* Step 2:  Shift destination left by 1 bit position.  This might be needed
+     ======   for signed input and unsigned output.  */
+
+  if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry)
+    {
+      unsigned s0 = dest.regno - offset -1;
+
+      /* n1169 4.1.4 says:
+	 "Conversions from a fixed-point to an integer type round toward zero."
+	 Hence, converting a fract type to integer only gives a non-zero result
+	 for -1.  */
+      if (SCALAR_INT_MODE_P (GET_MODE (xop[0]))
+	  && SCALAR_FRACT_MODE_P (GET_MODE (xop[1]))
+	  && !TARGET_FRACT_CONV_TRUNC)
+	{
+	  gcc_assert (s0 == src.regno_msb);
+	  /* Check if the input is -1.  We do that by checking if negating
+	     the input causes an integer overflow.  */
+	  unsigned sn = src.regno;
+	  avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1);
+	  while (sn <= s0)
+	    avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1);
+
+	  /* Overflow goes with set carry.  Clear carry otherwise.  */
+	  avr_asm_len ("brvs 0f" CR_TAB "clc\n0:", NULL, plen, 2);
+	}
+      /* Likewise, when converting from accumulator types to integer, we
+	 need to round up negative values.  */
+      else if (SCALAR_INT_MODE_P (GET_MODE (xop[0]))
+	       && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1]))
+	       && !TARGET_FRACT_CONV_TRUNC
+	       && !frac_rounded)
+	{
+	  bool have_carry = false;
+
+	  xop[2] = all_regs_rtx[s0];
+	  if (!lsb_in_tmp_reg && !MAY_CLOBBER (s0))
+	    avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1);
+	  avr_asm_len ("tst %0" CR_TAB "brpl 0f",
+		       &all_regs_rtx[src.regno_msb], plen, 2);
+	  if (!lsb_in_tmp_reg)
+	    {
+	      unsigned sn = src.regno;
+	      if (sn < s0)
+		{
+		  avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn],
+			       plen, 1);
+		  have_carry = true;
+		}
+	      while (++sn < s0)
+		avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn], plen, 1);
+	      lsb_in_tmp_reg = !MAY_CLOBBER (s0);
+	    }
+	  /* Add in C and the rounding value 127.  */
+	  /* If the destination msb is a sign byte, and in LD_REGS,
+	     grab it as a temporary.  */
+	  if (sign_bytes
+	      && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS],
+				    dest.regno_msb))
+	    {
+	      xop[3] = all_regs_rtx[dest.regno_msb];
+	      avr_asm_len ("ldi %3,127", xop, plen, 1);
+	      avr_asm_len ((have_carry && lsb_in_tmp_reg ? "adc __tmp_reg__,%3"
+			   : have_carry ? "adc %2,%3"
+			   : lsb_in_tmp_reg ? "add __tmp_reg__,%3"
+			   : "add %2,%3"),
+			   xop, plen, 1);
+	    }
+	  else
+	    {
+	      /* Fall back to use __zero_reg__ as a temporary.  */
+	      avr_asm_len ("dec __zero_reg__", NULL, plen, 1);
+	      if (have_carry)
+		avr_asm_len ("clt" CR_TAB "bld __zero_reg__,7", NULL, plen, 2);
+	      else
+		avr_asm_len ("lsr __zero_reg__", NULL, plen, 1);
+	      avr_asm_len ((have_carry && lsb_in_tmp_reg
+			   ? "adc __tmp_reg__,__zero_reg__"
+			   : have_carry ? "adc %2,__zero_reg__"
+			   : lsb_in_tmp_reg ? "add __tmp_reg__,__zero_reg__"
+			   : "add %2,__zero_reg__"),
+			   xop, plen, 1);
+	      avr_asm_len ("eor __zero_reg__,__zero_reg__", NULL, plen, 1);
+	    }
+	  for (d0 = dest.regno + zero_bytes;
+	       d0 <= dest.regno_msb - sign_bytes; d0++)
+	    avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[d0], plen, 1);
+	  avr_asm_len (lsb_in_tmp_reg
+		       ? "\n0:\t" "lsl __tmp_reg__" : "\n0:\t" "lsl %2",
+		       xop, plen, 1);
+	}
+      else if (MAY_CLOBBER (s0))
+        avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
+      else
+        avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
+                     "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
+
+      code_ashift = "rol %0";
+      lsb_in_carry = true;
+    }
+
+  if (shift == ASHIFT)
+    {
+      for (d0 = dest.regno + zero_bytes;
+           d0 <= dest.regno_msb - sign_bytes; d0++)
+        {
+          avr_asm_len (code_ashift, &all_regs_rtx[d0], plen, 1);
+          code_ashift = "rol %0";
+        }
+
+      lsb_in_carry = false;
+      sign_in_carry = true;
+    }
+
+  /* Step 4a:  Store MSB in carry if we don't already have it or will produce
+     =======   it in sign-extension below.  */
+
+  if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
+      && src.ibyte > dest.ibyte)
+    {
+      unsigned s0 = dest.regno_msb - sign_bytes - offset + 1;
+
+      if (MAY_CLOBBER (s0))
+        avr_asm_len ("lsr %0", &all_regs_rtx[s0], plen, 1);
+      else
+        avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
+                     "lsr __tmp_reg__", &all_regs_rtx[s0], plen, 2);
+
+      msb_in_carry = true;
+    }
+
+  /* Step 3:  Sign-extend or zero-extend the destination as needed.
+     ======   */
+
+  if (sign_extend && !sign_in_carry)
+    {
+      unsigned s0 = src.regno_msb;
+
+      if (MAY_CLOBBER (s0))
+        avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
+      else
+        avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
+                     "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
+
+      sign_in_carry = true;
+  }
+
+  gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1);
+
+  unsigned copies = 0;
+  rtx movw = sign_extend ? NULL_RTX : clrw;
+
+  for (d0 = dest.regno_msb - sign_bytes + 1; d0 <= dest.regno_msb; d0++)
+    {
+      if (AVR_HAVE_MOVW && movw
+          && d0 % 2 == 0 && d0 + 1 <= dest.regno_msb)
+        {
+          xop[2] = all_regs_rtx[d0];
+          xop[3] = movw;
+          avr_asm_len ("movw %2,%3", xop, plen, 1);
+          d0++;
+        }
+      else
+        {
+          avr_asm_len (sign_extend ? "sbc %0,%0" : "clr %0",
+                       &all_regs_rtx[d0], plen, 1);
+
+          if (++copies >= 2 && !movw && d0 % 2 == 1)
+            movw = all_regs_rtx[d0-1];
+        }
+    } /* for */
+
+
+  /* Step 4:  Right shift the destination.  This might be needed for
+     ======   conversions from unsigned to signed.  */
+
+  if (shift == ASHIFTRT)
+    {
+      const char *code_ashiftrt = "lsr %0";
+
+      if (sign_extend || msb_in_carry)
+        code_ashiftrt = "ror %0";
+
+      if (src.sbit && src.ibyte == dest.ibyte)
+        code_ashiftrt = "asr %0";
+
+      for (d0 = dest.regno_msb - sign_bytes;
+           d0 >= dest.regno + zero_bytes - 1 && d0 >= dest.regno; d0--)
+        {
+          avr_asm_len (code_ashiftrt, &all_regs_rtx[d0], plen, 1);
+          code_ashiftrt = "ror %0";
+        }
+    }
+
+#undef MAY_CLOBBER
+
+  return "";
+}
+
+
+/* Output fixed-point rounding.  XOP[0] = XOP[1] is the operand to round.
+   XOP[2] is the rounding point, a CONST_INT.  The function prints the
+   instruction sequence if PLEN = NULL and computes the length in words
+   of the sequence if PLEN != NULL.  Most of this function deals with
+   preparing operands for calls to `avr_out_plus' and `avr_out_bitop'.  */
+
+const char*
+avr_out_round (rtx insn ATTRIBUTE_UNUSED, rtx *xop, int *plen)
+{
+  enum machine_mode mode = GET_MODE (xop[0]);
+  enum machine_mode imode = int_mode_for_mode (mode);
+  // The smallest fractional bit not cleared by the rounding is 2^(-RP).
+  int fbit = (int) GET_MODE_FBIT (mode);
+  double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (xop[2]));
+  // Lengths of PLUS and AND parts.
+  int len_add = 0, *plen_add = plen ? &len_add : NULL;
+  int len_and = 0, *plen_and = plen ? &len_and : NULL;
+
+  // Add-Saturate  1/2 * 2^(-RP).  Don't print the label "0:" when printing
+  // the saturated addition so that we can emit the "rjmp 1f" before the
+  // "0:" below.
+
+  rtx xadd = const_fixed_from_double_int (i_add, mode);
+  rtx xpattern, xsrc, op[4];
+
+  xsrc = SIGNED_FIXED_POINT_MODE_P (mode)
+    ? gen_rtx_SS_PLUS (mode, xop[1], xadd)
+    : gen_rtx_US_PLUS (mode, xop[1], xadd);
+  xpattern = gen_rtx_SET (VOIDmode, xop[0], xsrc);
+
+  op[0] = xop[0];
+  op[1] = xop[1];
+  op[2] = xadd;
+  avr_out_plus (xpattern, op, plen_add, NULL, false /* Don't print "0:" */);
+
+  avr_asm_len ("rjmp 1f" CR_TAB
+               "0:", NULL, plen_add, 1);
+
+  // Keep  all bits from RP and higher:   ... 2^(-RP)
+  // Clear all bits from RP+1 and lower:              2^(-RP-1) ...
+  // Rounding point                           ^^^^^^^
+  // Added above                                      ^^^^^^^^^
+  rtx xreg = simplify_gen_subreg (imode, xop[0], mode, 0);
+  rtx xmask = immed_double_int_const (-i_add - i_add, imode);
+
+  xpattern = gen_rtx_SET (VOIDmode, xreg, gen_rtx_AND (imode, xreg, xmask));
+
+  op[0] = xreg;
+  op[1] = xreg;
+  op[2] = xmask;
+  op[3] = gen_rtx_SCRATCH (QImode);
+  avr_out_bitop (xpattern, op, plen_and);
+  avr_asm_len ("1:", NULL, plen, 0);
+
+  if (plen)
+    *plen = len_add + len_and;
+
+  return "";
+}
+
+
+/* Create RTL split patterns for byte sized rotate expressions.  This
+  produces a series of move instructions and considers overlap situations.
+  Overlapping non-HImode operands need a scratch register.  */
+
+bool
+avr_rotate_bytes (rtx operands[])
+{
+    int i, j;
+    enum machine_mode mode = GET_MODE (operands[0]);
+    bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
+    bool same_reg = rtx_equal_p (operands[0], operands[1]);
+    int num = INTVAL (operands[2]);
+    rtx scratch = operands[3];
+    /* Work out if byte or word move is needed.  Odd byte rotates need QImode.
+       Word move if no scratch is needed, otherwise use size of scratch.  */
+    enum machine_mode move_mode = QImode;
+    int move_size, offset, size;
+
+    if (num & 0xf)
+      move_mode = QImode;
+    else if ((mode == SImode && !same_reg) || !overlapped)
+      move_mode = HImode;
+    else
+      move_mode = GET_MODE (scratch);
+
+    /* Force DI rotate to use QI moves since other DI moves are currently split
+       into QI moves so forward propagation works better.  */
+    if (mode == DImode)
+      move_mode = QImode;
+    /* Make scratch smaller if needed.  */
+    if (SCRATCH != GET_CODE (scratch)
+        && HImode == GET_MODE (scratch)
+        && QImode == move_mode)
+      scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
+
+    move_size = GET_MODE_SIZE (move_mode);
+    /* Number of bytes/words to rotate.  */
+    offset = (num  >> 3) / move_size;
+    /* Number of moves needed.  */
+    size = GET_MODE_SIZE (mode) / move_size;
+    /* Himode byte swap is special case to avoid a scratch register.  */
+    if (mode == HImode && same_reg)
+      {
+	/* HImode byte swap, using xor.  This is as quick as using scratch.  */
+	rtx src, dst;
+	src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
+	dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
+	if (!rtx_equal_p (dst, src))
+	  {
+	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+	     emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
+	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+	  }
+      }
+    else
+      {
+#define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode)  */
+	/* Create linked list of moves to determine move order.  */
+	struct {
+	  rtx src, dst;
+	  int links;
+	} move[MAX_SIZE + 8];
+	int blocked, moves;
+
+	gcc_assert (size <= MAX_SIZE);
+	/* Generate list of subreg moves.  */
+	for (i = 0; i < size; i++)
+	  {
+	    int from = i;
+	    int to = (from + offset) % size;
+	    move[i].src = simplify_gen_subreg (move_mode, operands[1],
+						mode, from * move_size);
+	    move[i].dst = simplify_gen_subreg (move_mode, operands[0],
+						mode, to   * move_size);
+	    move[i].links = -1;
+	   }
+	/* Mark dependence where a dst of one move is the src of another move.
+	   The first move is a conflict as it must wait until second is
+	   performed.  We ignore moves to self - we catch this later.  */
+	if (overlapped)
+	  for (i = 0; i < size; i++)
+	    if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
+	      for (j = 0; j < size; j++)
+		if (j != i && rtx_equal_p (move[j].src, move[i].dst))
+		  {
+		    /* The dst of move i is the src of move j.  */
+		    move[i].links = j;
+		    break;
+		  }
+
+	blocked = -1;
+	moves = 0;
+	/* Go through move list and perform non-conflicting moves.  As each
+	   non-overlapping move is made, it may remove other conflicts
+	   so the process is repeated until no conflicts remain.  */
+	do
+	  {
+	    blocked = -1;
+	    moves = 0;
+	    /* Emit move where dst is not also a src or we have used that
+	       src already.  */
+	    for (i = 0; i < size; i++)
+	      if (move[i].src != NULL_RTX)
+		{
+		  if (move[i].links == -1
+		      || move[move[i].links].src == NULL_RTX)
+		    {
+		      moves++;
+		      /* Ignore NOP moves to self.  */
+		      if (!rtx_equal_p (move[i].dst, move[i].src))
+			emit_move_insn (move[i].dst, move[i].src);
+
+		      /* Remove  conflict from list.  */
+		      move[i].src = NULL_RTX;
+		    }
+		  else
+		    blocked = i;
+		}
+
+	    /* Check for deadlock. This is when no moves occurred and we have
+	       at least one blocked move.  */
+	    if (moves == 0 && blocked != -1)
+	      {
+		/* Need to use scratch register to break deadlock.
+		   Add move to put dst of blocked move into scratch.
+		   When this move occurs, it will break chain deadlock.
+		   The scratch register is substituted for real move.  */
+
+		gcc_assert (SCRATCH != GET_CODE (scratch));
+
+		move[size].src = move[blocked].dst;
+		move[size].dst =  scratch;
+		/* Scratch move is never blocked.  */
+		move[size].links = -1;
+		/* Make sure we have valid link.  */
+		gcc_assert (move[blocked].links != -1);
+		/* Replace src of  blocking move with scratch reg.  */
+		move[move[blocked].links].src = scratch;
+		/* Make dependent on scratch move occurring.  */
+		move[blocked].links = size;
+		size=size+1;
+	      }
+	  }
+	while (blocked != -1);
+      }
+    return true;
+}
+
+
+/* Worker function for `ADJUST_INSN_LENGTH'.  */
+/* Modifies the length assigned to instruction INSN
+   LEN is the initially computed length of the insn.  */
+
+int
+avr_adjust_insn_length (rtx insn, int len)
+{
+  rtx *op = recog_data.operand;
+  enum attr_adjust_len adjust_len;
+
+  /* Some complex insns don't need length adjustment and therefore
+     the length need not/must not be adjusted for these insns.
+     It is easier to state this in an insn attribute "adjust_len" than
+     to clutter up code here...  */
+
+  if (-1 == recog_memoized (insn))
+    {
+      return len;
+    }
+
+  /* Read from insn attribute "adjust_len" if/how length is to be adjusted.  */
+
+  adjust_len = get_attr_adjust_len (insn);
+
+  if (adjust_len == ADJUST_LEN_NO)
+    {
+      /* Nothing to adjust: The length from attribute "length" is fine.
+         This is the default.  */
+
+      return len;
+    }
+
+  /* Extract insn's operands.  */
+
+  extract_constrain_insn_cached (insn);
+
+  /* Dispatch to right function.  */
+
+  switch (adjust_len)
+    {
+    case ADJUST_LEN_RELOAD_IN16: output_reload_inhi (op, op[2], &len); break;
+    case ADJUST_LEN_RELOAD_IN24: avr_out_reload_inpsi (op, op[2], &len); break;
+    case ADJUST_LEN_RELOAD_IN32: output_reload_insisf (op, op[2], &len); break;
+
+    case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break;
+
+    case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break;
+    case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break;
+
+    case ADJUST_LEN_MOV8:  output_movqi (insn, op, &len); break;
+    case ADJUST_LEN_MOV16: output_movhi (insn, op, &len); break;
+    case ADJUST_LEN_MOV24: avr_out_movpsi (insn, op, &len); break;
+    case ADJUST_LEN_MOV32: output_movsisf (insn, op, &len); break;
+    case ADJUST_LEN_MOVMEM: avr_out_movmem (insn, op, &len); break;
+    case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break;
+    case ADJUST_LEN_LPM: avr_out_lpm (insn, op, &len); break;
+
+    case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break;
+    case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break;
+    case ADJUST_LEN_ROUND: avr_out_round (insn, op, &len); break;
+
+    case ADJUST_LEN_TSTHI: avr_out_tsthi (insn, op, &len); break;
+    case ADJUST_LEN_TSTPSI: avr_out_tstpsi (insn, op, &len); break;
+    case ADJUST_LEN_TSTSI: avr_out_tstsi (insn, op, &len); break;
+    case ADJUST_LEN_COMPARE: avr_out_compare (insn, op, &len); break;
+    case ADJUST_LEN_COMPARE64: avr_out_compare64 (insn, op, &len); break;
+
+    case ADJUST_LEN_LSHRQI: lshrqi3_out (insn, op, &len); break;
+    case ADJUST_LEN_LSHRHI: lshrhi3_out (insn, op, &len); break;
+    case ADJUST_LEN_LSHRSI: lshrsi3_out (insn, op, &len); break;
+
+    case ADJUST_LEN_ASHRQI: ashrqi3_out (insn, op, &len); break;
+    case ADJUST_LEN_ASHRHI: ashrhi3_out (insn, op, &len); break;
+    case ADJUST_LEN_ASHRSI: ashrsi3_out (insn, op, &len); break;
+
+    case ADJUST_LEN_ASHLQI: ashlqi3_out (insn, op, &len); break;
+    case ADJUST_LEN_ASHLHI: ashlhi3_out (insn, op, &len); break;
+    case ADJUST_LEN_ASHLSI: ashlsi3_out (insn, op, &len); break;
+
+    case ADJUST_LEN_ASHLPSI: avr_out_ashlpsi3 (insn, op, &len); break;
+    case ADJUST_LEN_ASHRPSI: avr_out_ashrpsi3 (insn, op, &len); break;
+    case ADJUST_LEN_LSHRPSI: avr_out_lshrpsi3 (insn, op, &len); break;
+
+    case ADJUST_LEN_CALL: len = AVR_HAVE_JMP_CALL ? 2 : 1; break;
+
+    case ADJUST_LEN_INSERT_BITS: avr_out_insert_bits (op, &len); break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  return len;
+}
+
+/* Return nonzero if register REG dead after INSN.  */
+
+int
+reg_unused_after (rtx insn, rtx reg)
+{
+  return (dead_or_set_p (insn, reg)
+	  || (REG_P(reg) && _reg_unused_after (insn, reg)));
+}
+
+/* Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+
+int
+_reg_unused_after (rtx insn, rtx reg)
+{
+  enum rtx_code code;
+  rtx set;
+
+  /* If the reg is set by this instruction, then it is safe for our
+     case.  Disregard the case where this is a store to memory, since
+     we are checking a register used in the store address.  */
+  set = single_set (insn);
+  if (set && GET_CODE (SET_DEST (set)) != MEM
+      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+    return 1;
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      rtx set;
+      code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 if dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return 1;
+      /* else */
+#endif
+
+      if (!INSN_P (insn))
+	continue;
+
+      if (code == JUMP_INSN)
+	return 0;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+	      rtx set = single_set (this_insn);
+
+	      if (CALL_P (this_insn))
+		code = CALL_INSN;
+	      else if (JUMP_P (this_insn))
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return 0;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return 0;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (GET_CODE (SET_DEST (set)) != MEM)
+		    retval = 1;
+		  else
+		    return 0;
+		}
+	      if (set == 0
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return 0;
+	    }
+	  if (retval == 1)
+	    return 1;
+	  else if (code == JUMP_INSN)
+	    return 0;
+	}
+
+      if (code == CALL_INSN)
+	{
+	  rtx tem;
+	  for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1))
+	    if (GET_CODE (XEXP (tem, 0)) == USE
+		&& REG_P (XEXP (XEXP (tem, 0), 0))
+		&& reg_overlap_mentioned_p (reg, XEXP (XEXP (tem, 0), 0)))
+	      return 0;
+	  if (call_used_regs[REGNO (reg)])
+	    return 1;
+	}
+
+      set = single_set (insn);
+
+      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	return 0;
+      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return GET_CODE (SET_DEST (set)) != MEM;
+      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	return 0;
+    }
+  return 1;
+}
+
+
+/* Implement `TARGET_ASM_INTEGER'.  */
+/* Target hook for assembling integer objects.  The AVR version needs
+   special handling for references to certain labels.  */
+
+static bool
+avr_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == POINTER_SIZE / BITS_PER_UNIT && aligned_p
+      && text_segment_operand (x, VOIDmode))
+    {
+      fputs ("\t.word\tgs(", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")\n", asm_out_file);
+
+      return true;
+    }
+  else if (GET_MODE (x) == PSImode)
+    {
+      /* This needs binutils 2.23+, see PR binutils/13503  */
+
+      fputs ("\t.byte\tlo8(", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")" ASM_COMMENT_START "need binutils PR13503\n", asm_out_file);
+
+      fputs ("\t.byte\thi8(", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")" ASM_COMMENT_START "need binutils PR13503\n", asm_out_file);
+
+      fputs ("\t.byte\thh8(", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")" ASM_COMMENT_START "need binutils PR13503\n", asm_out_file);
+
+      return true;
+    }
+  else if (CONST_FIXED_P (x))
+    {
+      unsigned n;
+
+      /* varasm fails to handle big fixed modes that don't fit in hwi.  */
+
+      for (n = 0; n < size; n++)
+        {
+          rtx xn = simplify_gen_subreg (QImode, x, GET_MODE (x), n);
+          default_assemble_integer (xn, 1, aligned_p);
+        }
+
+      return true;
+    }
+
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+
+/* Implement `TARGET_CLASS_LIKELY_SPILLED_P'.  */
+/* Return value is nonzero if pseudos that have been
+   assigned to registers of class CLASS would likely be spilled
+   because registers of CLASS are needed for spill registers.  */
+
+static bool
+avr_class_likely_spilled_p (reg_class_t c)
+{
+  return (c != ALL_REGS && c != ADDW_REGS);
+}
+
+
+/* Valid attributes:
+   progmem   -  Put data to program memory.
+   signal    -  Make a function to be hardware interrupt.
+                After function prologue interrupts remain disabled.
+   interrupt -  Make a function to be hardware interrupt. Before function
+                prologue interrupts are enabled by means of SEI.
+   naked     -  Don't generate function prologue/epilogue and RET
+                instruction.  */
+
+/* Handle a "progmem" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+avr_handle_progmem_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) == TYPE_DECL)
+	{
+	  /* This is really a decl attribute, not a type attribute,
+	     but try to handle it for GCC 3.0 backwards compatibility.  */
+
+	  tree type = TREE_TYPE (*node);
+	  tree attr = tree_cons (name, args, TYPE_ATTRIBUTES (type));
+	  tree newtype = build_type_attribute_variant (type, attr);
+
+	  TYPE_MAIN_VARIANT (newtype) = TYPE_MAIN_VARIANT (type);
+	  TREE_TYPE (*node) = newtype;
+	  *no_add_attrs = true;
+	}
+      else if (TREE_STATIC (*node) || DECL_EXTERNAL (*node))
+	{
+          *no_add_attrs = false;
+	}
+      else
+	{
+	  warning (OPT_Wattributes, "%qE attribute ignored",
+		   name);
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+avr_handle_fndecl_attribute (tree *node, tree name,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+avr_handle_fntype_attribute (tree *node, tree name,
+                             tree args ATTRIBUTE_UNUSED,
+                             int flags ATTRIBUTE_UNUSED,
+                             bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+
+/* AVR attributes.  */
+static const struct attribute_spec
+avr_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "progmem",   0, 0, false, false, false,  avr_handle_progmem_attribute,
+    false },
+  { "signal",    0, 0, true,  false, false,  avr_handle_fndecl_attribute,
+    false },
+  { "interrupt", 0, 0, true,  false, false,  avr_handle_fndecl_attribute,
+    false },
+  { "naked",     0, 0, false, true,  true,   avr_handle_fntype_attribute,
+    false },
+  { "OS_task",   0, 0, false, true,  true,   avr_handle_fntype_attribute,
+    false },
+  { "OS_main",   0, 0, false, true,  true,   avr_handle_fntype_attribute,
+    false },
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+
+/* Look if DECL shall be placed in program memory space by
+   means of attribute `progmem' or some address-space qualifier.
+   Return non-zero if DECL is data that must end up in Flash and
+   zero if the data lives in RAM (.bss, .data, .rodata, ...).
+
+   Return 2   if DECL is located in 24-bit flash address-space
+   Return 1   if DECL is located in 16-bit flash address-space
+   Return -1  if attribute `progmem' occurs in DECL or ATTRIBUTES
+   Return 0   otherwise  */
+
+int
+avr_progmem_p (tree decl, tree attributes)
+{
+  tree a;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return 0;
+
+  if (avr_decl_memx_p (decl))
+    return 2;
+
+  if (avr_decl_flash_p (decl))
+    return 1;
+
+  if (NULL_TREE
+      != lookup_attribute ("progmem", attributes))
+    return -1;
+
+  a = decl;
+
+  do
+    a = TREE_TYPE(a);
+  while (TREE_CODE (a) == ARRAY_TYPE);
+
+  if (a == error_mark_node)
+    return 0;
+
+  if (NULL_TREE != lookup_attribute ("progmem", TYPE_ATTRIBUTES (a)))
+    return -1;
+
+  return 0;
+}
+
+
+/* Scan type TYP for pointer references to address space ASn.
+   Return ADDR_SPACE_GENERIC (i.e. 0) if all pointers targeting
+   the AS are also declared to be CONST.
+   Otherwise, return the respective address space, i.e. a value != 0.  */
+
+static addr_space_t
+avr_nonconst_pointer_addrspace (tree typ)
+{
+  while (ARRAY_TYPE == TREE_CODE (typ))
+    typ = TREE_TYPE (typ);
+
+  if (POINTER_TYPE_P (typ))
+    {
+      addr_space_t as;
+      tree target = TREE_TYPE (typ);
+
+      /* Pointer to function: Test the function's return type.  */
+
+      if (FUNCTION_TYPE == TREE_CODE (target))
+        return avr_nonconst_pointer_addrspace (TREE_TYPE (target));
+
+      /* "Ordinary" pointers... */
+
+      while (TREE_CODE (target) == ARRAY_TYPE)
+        target = TREE_TYPE (target);
+
+      /* Pointers to non-generic address space must be const.
+         Refuse address spaces outside the device's flash.  */
+
+      as = TYPE_ADDR_SPACE (target);
+
+      if (!ADDR_SPACE_GENERIC_P (as)
+          && (!TYPE_READONLY (target)
+              || avr_addrspace[as].segment >= avr_current_device->n_flash))
+        {
+          return as;
+        }
+
+      /* Scan pointer's target type.  */
+
+      return avr_nonconst_pointer_addrspace (target);
+    }
+
+  return ADDR_SPACE_GENERIC;
+}
+
+
+/* Sanity check NODE so that all pointers targeting non-generic address spaces
+   go along with CONST qualifier.  Writing to these address spaces should
+   be detected and complained about as early as possible.  */
+
+static bool
+avr_pgm_check_var_decl (tree node)
+{
+  const char *reason = NULL;
+
+  addr_space_t as = ADDR_SPACE_GENERIC;
+
+  gcc_assert (as == 0);
+
+  if (avr_log.progmem)
+    avr_edump ("%?: %t\n", node);
+
+  switch (TREE_CODE (node))
+    {
+    default:
+      break;
+
+    case VAR_DECL:
+      if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (node)), as)
+        reason = "variable";
+      break;
+
+    case PARM_DECL:
+      if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (node)), as)
+        reason = "function parameter";
+      break;
+
+    case FIELD_DECL:
+      if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (node)), as)
+        reason = "structure field";
+      break;
+
+    case FUNCTION_DECL:
+      if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (TREE_TYPE (node))),
+          as)
+        reason = "return type of function";
+      break;
+
+    case POINTER_TYPE:
+      if (as = avr_nonconst_pointer_addrspace (node), as)
+        reason = "pointer";
+      break;
+    }
+
+  if (reason)
+    {
+      if (avr_addrspace[as].segment >= avr_current_device->n_flash)
+        {
+          if (TYPE_P (node))
+            error ("%qT uses address space %qs beyond flash of %qs",
+                   node, avr_addrspace[as].name, avr_current_device->name);
+          else
+            error ("%s %q+D uses address space %qs beyond flash of %qs",
+                   reason, node, avr_addrspace[as].name,
+                   avr_current_device->name);
+        }
+      else
+        {
+          if (TYPE_P (node))
+            error ("pointer targeting address space %qs must be const in %qT",
+                   avr_addrspace[as].name, node);
+          else
+            error ("pointer targeting address space %qs must be const"
+                   " in %s %q+D",
+                   avr_addrspace[as].name, reason, node);
+        }
+    }
+
+  return reason == NULL;
+}
+
+
+/* Add the section attribute if the variable is in progmem.  */
+
+static void
+avr_insert_attributes (tree node, tree *attributes)
+{
+  avr_pgm_check_var_decl (node);
+
+  if (TREE_CODE (node) == VAR_DECL
+      && (TREE_STATIC (node) || DECL_EXTERNAL (node))
+      && avr_progmem_p (node, *attributes))
+    {
+      addr_space_t as;
+      tree node0 = node;
+
+      /* For C++, we have to peel arrays in order to get correct
+         determination of readonlyness.  */
+
+      do
+        node0 = TREE_TYPE (node0);
+      while (TREE_CODE (node0) == ARRAY_TYPE);
+
+      if (error_mark_node == node0)
+        return;
+
+      as = TYPE_ADDR_SPACE (TREE_TYPE (node));
+
+      if (avr_addrspace[as].segment >= avr_current_device->n_flash)
+        {
+          error ("variable %q+D located in address space %qs"
+                 " beyond flash of %qs",
+                 node, avr_addrspace[as].name, avr_current_device->name);
+        }
+
+      if (!TYPE_READONLY (node0)
+          && !TREE_READONLY (node))
+        {
+          const char *reason = "__attribute__((progmem))";
+
+          if (!ADDR_SPACE_GENERIC_P (as))
+            reason = avr_addrspace[as].name;
+
+          if (avr_log.progmem)
+            avr_edump ("\n%?: %t\n%t\n", node, node0);
+
+          error ("variable %q+D must be const in order to be put into"
+                 " read-only section by means of %qs", node, reason);
+        }
+    }
+}
+
+
+/* Implement `ASM_OUTPUT_ALIGNED_DECL_LOCAL'.  */
+/* Implement `ASM_OUTPUT_ALIGNED_DECL_COMMON'.  */
+/* Track need of __do_clear_bss.  */
+
+void
+avr_asm_output_aligned_decl_common (FILE * stream,
+                                    const_tree decl ATTRIBUTE_UNUSED,
+                                    const char *name,
+                                    unsigned HOST_WIDE_INT size,
+                                    unsigned int align, bool local_p)
+{
+  /* __gnu_lto_v1 etc. are just markers for the linker injected by toplev.c.
+     There is no need to trigger __do_clear_bss code for them.  */
+
+  if (!STR_PREFIX_P (name, "__gnu_lto"))
+    avr_need_clear_bss_p = true;
+
+  if (local_p)
+    ASM_OUTPUT_ALIGNED_LOCAL (stream, name, size, align);
+  else
+    ASM_OUTPUT_ALIGNED_COMMON (stream, name, size, align);
+}
+
+
+/* Unnamed section callback for data_section
+   to track need of __do_copy_data.  */
+
+static void
+avr_output_data_section_asm_op (const void *data)
+{
+  avr_need_copy_data_p = true;
+
+  /* Dispatch to default.  */
+  output_section_asm_op (data);
+}
+
+
+/* Unnamed section callback for bss_section
+   to track need of __do_clear_bss.  */
+
+static void
+avr_output_bss_section_asm_op (const void *data)
+{
+  avr_need_clear_bss_p = true;
+
+  /* Dispatch to default.  */
+  output_section_asm_op (data);
+}
+
+
+/* Unnamed section callback for progmem*.data sections.  */
+
+static void
+avr_output_progmem_section_asm_op (const void *data)
+{
+  fprintf (asm_out_file, "\t.section\t%s,\"a\",@progbits\n",
+           (const char*) data);
+}
+
+
+/* Implement `TARGET_ASM_INIT_SECTIONS'.  */
+
+static void
+avr_asm_init_sections (void)
+{
+  /* Set up a section for jump tables.  Alignment is handled by
+     ASM_OUTPUT_BEFORE_CASE_LABEL.  */
+
+  if (AVR_HAVE_JMP_CALL)
+    {
+      progmem_swtable_section
+        = get_unnamed_section (0, output_section_asm_op,
+                               "\t.section\t.progmem.gcc_sw_table"
+                               ",\"a\",@progbits");
+    }
+  else
+    {
+      progmem_swtable_section
+        = get_unnamed_section (SECTION_CODE, output_section_asm_op,
+                               "\t.section\t.progmem.gcc_sw_table"
+                               ",\"ax\",@progbits");
+    }
+
+  /* Override section callbacks to keep track of `avr_need_clear_bss_p'
+     resp. `avr_need_copy_data_p'.  */
+
+  readonly_data_section->unnamed.callback = avr_output_data_section_asm_op;
+  data_section->unnamed.callback = avr_output_data_section_asm_op;
+  bss_section->unnamed.callback = avr_output_bss_section_asm_op;
+}
+
+
+/* Implement `TARGET_ASM_FUNCTION_RODATA_SECTION'.  */
+
+static section*
+avr_asm_function_rodata_section (tree decl)
+{
+  /* If a function is unused and optimized out by -ffunction-sections
+     and --gc-sections, ensure that the same will happen for its jump
+     tables by putting them into individual sections.  */
+
+  unsigned int flags;
+  section * frodata;
+
+  /* Get the frodata section from the default function in varasm.c
+     but treat function-associated data-like jump tables as code
+     rather than as user defined data.  AVR has no constant pools.  */
+  {
+    int fdata = flag_data_sections;
+
+    flag_data_sections = flag_function_sections;
+    frodata = default_function_rodata_section (decl);
+    flag_data_sections = fdata;
+    flags = frodata->common.flags;
+  }
+
+  if (frodata != readonly_data_section
+      && flags & SECTION_NAMED)
+    {
+      /* Adjust section flags and replace section name prefix.  */
+
+      unsigned int i;
+
+      static const char* const prefix[] =
+        {
+          ".rodata",          ".progmem.gcc_sw_table",
+          ".gnu.linkonce.r.", ".gnu.linkonce.t."
+        };
+
+      for (i = 0; i < sizeof (prefix) / sizeof (*prefix); i += 2)
+        {
+          const char * old_prefix = prefix[i];
+          const char * new_prefix = prefix[i+1];
+          const char * name = frodata->named.name;
+
+          if (STR_PREFIX_P (name, old_prefix))
+            {
+              const char *rname = ACONCAT ((new_prefix,
+                                            name + strlen (old_prefix), NULL));
+              flags &= ~SECTION_CODE;
+              flags |= AVR_HAVE_JMP_CALL ? 0 : SECTION_CODE;
+
+              return get_section (rname, flags, frodata->named.decl);
+            }
+        }
+    }
+
+  return progmem_swtable_section;
+}
+
+
+/* Implement `TARGET_ASM_NAMED_SECTION'.  */
+/* Track need of __do_clear_bss, __do_copy_data for named sections.  */
+
+static void
+avr_asm_named_section (const char *name, unsigned int flags, tree decl)
+{
+  if (flags & AVR_SECTION_PROGMEM)
+    {
+      addr_space_t as = (flags & AVR_SECTION_PROGMEM) / SECTION_MACH_DEP;
+      const char *old_prefix = ".rodata";
+      const char *new_prefix = avr_addrspace[as].section_name;
+
+      if (STR_PREFIX_P (name, old_prefix))
+        {
+          const char *sname = ACONCAT ((new_prefix,
+                                        name + strlen (old_prefix), NULL));
+          default_elf_asm_named_section (sname, flags, decl);
+          return;
+        }
+
+      default_elf_asm_named_section (new_prefix, flags, decl);
+      return;
+    }
+
+  if (!avr_need_copy_data_p)
+    avr_need_copy_data_p = (STR_PREFIX_P (name, ".data")
+                            || STR_PREFIX_P (name, ".rodata")
+                            || STR_PREFIX_P (name, ".gnu.linkonce.d"));
+
+  if (!avr_need_clear_bss_p)
+    avr_need_clear_bss_p = STR_PREFIX_P (name, ".bss");
+
+  default_elf_asm_named_section (name, flags, decl);
+}
+
+
+/* Implement `TARGET_SECTION_TYPE_FLAGS'.  */
+
+static unsigned int
+avr_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (STR_PREFIX_P (name, ".noinit"))
+    {
+      if (decl && TREE_CODE (decl) == VAR_DECL
+	  && DECL_INITIAL (decl) == NULL_TREE)
+	flags |= SECTION_BSS;  /* @nobits */
+      else
+	warning (0, "only uninitialized variables can be placed in the "
+		 ".noinit section");
+    }
+
+  if (decl && DECL_P (decl)
+      && avr_progmem_p (decl, DECL_ATTRIBUTES (decl)))
+    {
+      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (decl));
+
+      /* Attribute progmem puts data in generic address space.
+         Set section flags as if it was in __flash to get the right
+         section prefix in the remainder.  */
+
+      if (ADDR_SPACE_GENERIC_P (as))
+        as = ADDR_SPACE_FLASH;
+
+      flags |= as * SECTION_MACH_DEP;
+      flags &= ~SECTION_WRITE;
+      flags &= ~SECTION_BSS;
+    }
+
+  return flags;
+}
+
+
+/* Implement `TARGET_ENCODE_SECTION_INFO'.  */
+
+static void
+avr_encode_section_info (tree decl, rtx rtl, int new_decl_p)
+{
+  /* In avr_handle_progmem_attribute, DECL_INITIAL is not yet
+     readily available, see PR34734.  So we postpone the warning
+     about uninitialized data in program memory section until here.  */
+
+  if (new_decl_p
+      && decl && DECL_P (decl)
+      && NULL_TREE == DECL_INITIAL (decl)
+      && !DECL_EXTERNAL (decl)
+      && avr_progmem_p (decl, DECL_ATTRIBUTES (decl)))
+    {
+      warning (OPT_Wuninitialized,
+               "uninitialized variable %q+D put into "
+               "program memory area", decl);
+    }
+
+  default_encode_section_info (decl, rtl, new_decl_p);
+
+  if (decl && DECL_P (decl)
+      && TREE_CODE (decl) != FUNCTION_DECL
+      && MEM_P (rtl)
+      && SYMBOL_REF == GET_CODE (XEXP (rtl, 0)))
+   {
+      rtx sym = XEXP (rtl, 0);
+      tree type = TREE_TYPE (decl);
+      if (type == error_mark_node)
+	return;
+      addr_space_t as = TYPE_ADDR_SPACE (type);
+
+      /* PSTR strings are in generic space but located in flash:
+         patch address space.  */
+
+      if (-1 == avr_progmem_p (decl, DECL_ATTRIBUTES (decl)))
+        as = ADDR_SPACE_FLASH;
+
+      AVR_SYMBOL_SET_ADDR_SPACE (sym, as);
+    }
+}
+
+
+/* Implement `TARGET_ASM_SELECT_SECTION' */
+
+static section *
+avr_asm_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  section * sect = default_elf_select_section (decl, reloc, align);
+
+  if (decl && DECL_P (decl)
+      && avr_progmem_p (decl, DECL_ATTRIBUTES (decl)))
+    {
+      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (decl));
+
+      /* __progmem__ goes in generic space but shall be allocated to
+         .progmem.data  */
+
+      if (ADDR_SPACE_GENERIC_P (as))
+        as = ADDR_SPACE_FLASH;
+
+      if (sect->common.flags & SECTION_NAMED)
+        {
+          const char * name = sect->named.name;
+          const char * old_prefix = ".rodata";
+          const char * new_prefix = avr_addrspace[as].section_name;
+
+          if (STR_PREFIX_P (name, old_prefix))
+            {
+              const char *sname = ACONCAT ((new_prefix,
+                                            name + strlen (old_prefix), NULL));
+              return get_section (sname, sect->common.flags, sect->named.decl);
+            }
+        }
+
+      if (!progmem_section[as])
+        {
+          progmem_section[as]
+            = get_unnamed_section (0, avr_output_progmem_section_asm_op,
+                                   avr_addrspace[as].section_name);
+        }
+
+      return progmem_section[as];
+    }
+
+  return sect;
+}
+
+/* Implement `TARGET_ASM_FILE_START'.  */
+/* Outputs some text at the start of each assembler file.  */
+
+static void
+avr_file_start (void)
+{
+  int sfr_offset = avr_current_arch->sfr_offset;
+
+  if (avr_current_arch->asm_only)
+    error ("MCU %qs supported for assembler only", avr_current_device->name);
+
+  default_file_start ();
+
+  /* Print I/O addresses of some SFRs used with IN and OUT.  */
+
+  if (AVR_HAVE_SPH)
+    fprintf (asm_out_file, "__SP_H__ = 0x%02x\n", avr_addr.sp_h - sfr_offset);
+
+  fprintf (asm_out_file, "__SP_L__ = 0x%02x\n", avr_addr.sp_l - sfr_offset);
+  fprintf (asm_out_file, "__SREG__ = 0x%02x\n", avr_addr.sreg - sfr_offset);
+  if (AVR_HAVE_RAMPZ)
+    fprintf (asm_out_file, "__RAMPZ__ = 0x%02x\n", avr_addr.rampz - sfr_offset);
+  if (AVR_HAVE_RAMPY)
+    fprintf (asm_out_file, "__RAMPY__ = 0x%02x\n", avr_addr.rampy - sfr_offset);
+  if (AVR_HAVE_RAMPX)
+    fprintf (asm_out_file, "__RAMPX__ = 0x%02x\n", avr_addr.rampx - sfr_offset);
+  if (AVR_HAVE_RAMPD)
+    fprintf (asm_out_file, "__RAMPD__ = 0x%02x\n", avr_addr.rampd - sfr_offset);
+  if (AVR_XMEGA)
+    fprintf (asm_out_file, "__CCP__ = 0x%02x\n", avr_addr.ccp - sfr_offset);
+  fprintf (asm_out_file, "__tmp_reg__ = %d\n", TMP_REGNO);
+  fprintf (asm_out_file, "__zero_reg__ = %d\n", ZERO_REGNO);
+}
+
+
+/* Implement `TARGET_ASM_FILE_END'.  */
+/* Outputs to the stdio stream FILE some
+   appropriate text to go at the end of an assembler file.  */
+
+static void
+avr_file_end (void)
+{
+  /* Output these only if there is anything in the
+     .data* / .rodata* / .gnu.linkonce.* resp. .bss* or COMMON
+     input section(s) - some code size can be saved by not
+     linking in the initialization code from libgcc if resp.
+     sections are empty, see PR18145.  */
+
+  if (avr_need_copy_data_p)
+    fputs (".global __do_copy_data\n", asm_out_file);
+
+  if (avr_need_clear_bss_p)
+    fputs (".global __do_clear_bss\n", asm_out_file);
+}
+
+
+/* Worker function for `ADJUST_REG_ALLOC_ORDER'.  */
+/* Choose the order in which to allocate hard registers for
+   pseudo-registers local to a basic block.
+
+   Store the desired register order in the array `reg_alloc_order'.
+   Element 0 should be the register to allocate first; element 1, the
+   next register; and so on.  */
+
+void
+avr_adjust_reg_alloc_order (void)
+{
+  unsigned int i;
+  static const int order_0[] =
+    {
+      24, 25,
+      18, 19, 20, 21, 22, 23,
+      30, 31,
+      26, 27, 28, 29,
+      17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
+      0, 1,
+      32, 33, 34, 35
+  };
+  static const int order_1[] =
+    {
+      18, 19, 20, 21, 22, 23, 24, 25,
+      30, 31,
+      26, 27, 28, 29,
+      17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
+      0, 1,
+      32, 33, 34, 35
+  };
+  static const int order_2[] =
+    {
+      25, 24, 23, 22, 21, 20, 19, 18,
+      30, 31,
+      26, 27, 28, 29,
+      17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
+      1, 0,
+      32, 33, 34, 35
+  };
+
+  const int *order = (TARGET_ORDER_1 ? order_1 :
+		      TARGET_ORDER_2 ? order_2 :
+		      order_0);
+  for (i = 0; i < ARRAY_SIZE (order_0); ++i)
+      reg_alloc_order[i] = order[i];
+}
+
+
+/* Implement `TARGET_REGISTER_MOVE_COST' */
+
+static int
+avr_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                        reg_class_t from, reg_class_t to)
+{
+  return (from == STACK_REG ? 6
+          : to == STACK_REG ? 12
+          : 2);
+}
+
+
+/* Implement `TARGET_MEMORY_MOVE_COST' */
+
+static int
+avr_memory_move_cost (enum machine_mode mode,
+                      reg_class_t rclass ATTRIBUTE_UNUSED,
+                      bool in ATTRIBUTE_UNUSED)
+{
+  return (mode == QImode ? 2
+          : mode == HImode ? 4
+          : mode == SImode ? 8
+          : mode == SFmode ? 8
+          : 16);
+}
+
+
+/* Mutually recursive subroutine of avr_rtx_cost for calculating the
+   cost of an RTX operand given its context.  X is the rtx of the
+   operand, MODE is its mode, and OUTER is the rtx_code of this
+   operand's parent operator.  */
+
+static int
+avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer,
+		      int opno, bool speed)
+{
+  enum rtx_code code = GET_CODE (x);
+  int total;
+
+  switch (code)
+    {
+    case REG:
+    case SUBREG:
+      return 0;
+
+    case CONST_INT:
+    case CONST_FIXED:
+    case CONST_DOUBLE:
+      return COSTS_N_INSNS (GET_MODE_SIZE (mode));
+
+    default:
+      break;
+    }
+
+  total = 0;
+  avr_rtx_costs (x, code, outer, opno, &total, speed);
+  return total;
+}
+
+/* Worker function for AVR backend's rtx_cost function.
+   X is rtx expression whose cost is to be calculated.
+   Return true if the complete cost has been computed.
+   Return false if subexpressions should be scanned.
+   In either case, *TOTAL contains the cost result.  */
+
+static bool
+avr_rtx_costs_1 (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED,
+                 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
+{
+  enum rtx_code code = (enum rtx_code) codearg;
+  enum machine_mode mode = GET_MODE (x);
+  HOST_WIDE_INT val;
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_FIXED:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+    case CONST:
+    case LABEL_REF:
+      /* Immediate constants are as cheap as registers.  */
+      *total = 0;
+      return true;
+
+    case MEM:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      return true;
+
+    case NEG:
+      switch (mode)
+	{
+	case QImode:
+	case SFmode:
+	  *total = COSTS_N_INSNS (1);
+	  break;
+
+        case HImode:
+        case PSImode:
+        case SImode:
+          *total = COSTS_N_INSNS (2 * GET_MODE_SIZE (mode) - 1);
+          break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case ABS:
+      switch (mode)
+	{
+	case QImode:
+	case SFmode:
+	  *total = COSTS_N_INSNS (1);
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case ZERO_EXTEND:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
+			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case SIGN_EXTEND:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + 2
+			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case PLUS:
+      switch (mode)
+	{
+	case QImode:
+          if (AVR_HAVE_MUL
+              && MULT == GET_CODE (XEXP (x, 0))
+              && register_operand (XEXP (x, 1), QImode))
+            {
+              /* multiply-add */
+              *total = COSTS_N_INSNS (speed ? 4 : 3);
+              /* multiply-add with constant: will be split and load constant. */
+              if (CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+                *total = COSTS_N_INSNS (1) + *total;
+              return true;
+            }
+	  *total = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+	  break;
+
+	case HImode:
+          if (AVR_HAVE_MUL
+              && (MULT == GET_CODE (XEXP (x, 0))
+                  || ASHIFT == GET_CODE (XEXP (x, 0)))
+              && register_operand (XEXP (x, 1), HImode)
+              && (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0))
+                  || SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0))))
+            {
+              /* multiply-add */
+              *total = COSTS_N_INSNS (speed ? 5 : 4);
+              /* multiply-add with constant: will be split and load constant. */
+              if (CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+                *total = COSTS_N_INSNS (1) + *total;
+              return true;
+            }
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (2);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	  break;
+
+        case PSImode:
+          if (!CONST_INT_P (XEXP (x, 1)))
+            {
+              *total = COSTS_N_INSNS (3);
+              *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+                                              speed);
+            }
+          else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+            *total = COSTS_N_INSNS (2);
+          else
+            *total = COSTS_N_INSNS (3);
+          break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (4);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (4);
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case MINUS:
+      if (AVR_HAVE_MUL
+          && QImode == mode
+          && register_operand (XEXP (x, 0), QImode)
+          && MULT == GET_CODE (XEXP (x, 1)))
+        {
+          /* multiply-sub */
+          *total = COSTS_N_INSNS (speed ? 4 : 3);
+          /* multiply-sub with constant: will be split and load constant. */
+          if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+            *total = COSTS_N_INSNS (1) + *total;
+          return true;
+        }
+      if (AVR_HAVE_MUL
+          && HImode == mode
+          && register_operand (XEXP (x, 0), HImode)
+          && (MULT == GET_CODE (XEXP (x, 1))
+              || ASHIFT == GET_CODE (XEXP (x, 1)))
+          && (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0))
+              || SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0))))
+        {
+          /* multiply-sub */
+          *total = COSTS_N_INSNS (speed ? 5 : 4);
+          /* multiply-sub with constant: will be split and load constant. */
+          if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+            *total = COSTS_N_INSNS (1) + *total;
+          return true;
+        }
+      /* FALLTHRU */
+    case AND:
+    case IOR:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+      return true;
+
+    case XOR:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+      return true;
+
+    case MULT:
+      switch (mode)
+	{
+	case QImode:
+	  if (AVR_HAVE_MUL)
+	    *total = COSTS_N_INSNS (!speed ? 3 : 4);
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+	  else
+	    return false;
+	  break;
+
+	case HImode:
+	  if (AVR_HAVE_MUL)
+            {
+              rtx op0 = XEXP (x, 0);
+              rtx op1 = XEXP (x, 1);
+              enum rtx_code code0 = GET_CODE (op0);
+              enum rtx_code code1 = GET_CODE (op1);
+              bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0;
+              bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1;
+
+              if (ex0
+                  && (u8_operand (op1, HImode)
+                      || s8_operand (op1, HImode)))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 4 : 6);
+                  return true;
+                }
+              if (ex0
+                  && register_operand (op1, HImode))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 5 : 8);
+                  return true;
+                }
+              else if (ex0 || ex1)
+                {
+                  *total = COSTS_N_INSNS (!speed ? 3 : 5);
+                  return true;
+                }
+              else if (register_operand (op0, HImode)
+                       && (u8_operand (op1, HImode)
+                           || s8_operand (op1, HImode)))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 6 : 9);
+                  return true;
+                }
+              else
+                *total = COSTS_N_INSNS (!speed ? 7 : 10);
+            }
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+	  else
+	    return false;
+	  break;
+
+        case PSImode:
+          if (!speed)
+            *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+          else
+            *total = 10;
+          break;
+
+	case SImode:
+	  if (AVR_HAVE_MUL)
+            {
+              if (!speed)
+                {
+                  /* Add some additional costs besides CALL like moves etc.  */
+
+                  *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+                }
+              else
+                {
+                  /* Just a rough estimate.  Even with -O2 we don't want bulky
+                     code expanded inline.  */
+
+                  *total = COSTS_N_INSNS (25);
+                }
+            }
+          else
+            {
+              if (speed)
+                *total = COSTS_N_INSNS (300);
+              else
+                /* Add some additional costs besides CALL like moves etc.  */
+                *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+            }
+
+          return true;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      if (!speed)
+        *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+      else
+        *total = COSTS_N_INSNS (15 * GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      /* For div/mod with const-int divisor we have at least the cost of
+         loading the divisor. */
+      if (CONST_INT_P (XEXP (x, 1)))
+        *total += COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      /* Add some overall penaly for clobbering and moving around registers */
+      *total += COSTS_N_INSNS (2);
+      return true;
+
+    case ROTATE:
+      switch (mode)
+	{
+	case QImode:
+	  if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 4)
+	    *total = COSTS_N_INSNS (1);
+
+	  break;
+
+	case HImode:
+	  if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 8)
+	    *total = COSTS_N_INSNS (3);
+
+	  break;
+
+	case SImode:
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 8:
+	      case 24:
+		*total = COSTS_N_INSNS (5);
+		break;
+	      case 16:
+		*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 6);
+		break;
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case ASHIFT:
+      switch (mode)
+	{
+	case QImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    {
+	      val = INTVAL (XEXP (x, 1));
+	      if (val == 7)
+		*total = COSTS_N_INSNS (3);
+	      else if (val >= 0 && val <= 7)
+		*total = COSTS_N_INSNS (val);
+	      else
+		*total = COSTS_N_INSNS (1);
+	    }
+	  break;
+
+	case HImode:
+          if (AVR_HAVE_MUL)
+            {
+              if (const_2_to_7_operand (XEXP (x, 1), HImode)
+                  && (SIGN_EXTEND == GET_CODE (XEXP (x, 0))
+                      || ZERO_EXTEND == GET_CODE (XEXP (x, 0))))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 4 : 6);
+                  return true;
+                }
+            }
+
+          if (const1_rtx == (XEXP (x, 1))
+              && SIGN_EXTEND == GET_CODE (XEXP (x, 0)))
+            {
+              *total = COSTS_N_INSNS (2);
+              return true;
+            }
+
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+	      case 8:
+		*total = COSTS_N_INSNS (2);
+		break;
+	      case 9:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 2:
+	      case 3:
+	      case 10:
+	      case 15:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 7:
+	      case 11:
+	      case 12:
+		*total = COSTS_N_INSNS (5);
+		break;
+	      case 4:
+		*total = COSTS_N_INSNS (!speed ? 5 : 8);
+		break;
+	      case 6:
+		*total = COSTS_N_INSNS (!speed ? 5 : 9);
+		break;
+	      case 5:
+		*total = COSTS_N_INSNS (!speed ? 5 : 10);
+		break;
+	      default:
+	        *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	        *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+						speed);
+	      }
+	  break;
+
+        case PSImode:
+          if (!CONST_INT_P (XEXP (x, 1)))
+            {
+              *total = COSTS_N_INSNS (!speed ? 6 : 73);
+            }
+          else
+            switch (INTVAL (XEXP (x, 1)))
+              {
+              case 0:
+                *total = 0;
+                break;
+              case 1:
+              case 8:
+              case 16:
+                *total = COSTS_N_INSNS (3);
+                break;
+              case 23:
+                *total = COSTS_N_INSNS (5);
+                break;
+              default:
+                *total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1)));
+                break;
+              }
+          break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 24:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 1:
+	      case 8:
+	      case 16:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 31:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      case 2:
+		*total = COSTS_N_INSNS (!speed ? 7 : 8);
+		break;
+	      default:
+		*total = COSTS_N_INSNS (!speed ? 7 : 113);
+		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+						speed);
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case ASHIFTRT:
+      switch (mode)
+	{
+	case QImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    {
+	      val = INTVAL (XEXP (x, 1));
+	      if (val == 6)
+		*total = COSTS_N_INSNS (4);
+	      else if (val == 7)
+		*total = COSTS_N_INSNS (2);
+	      else if (val >= 0 && val <= 7)
+		*total = COSTS_N_INSNS (val);
+	      else
+		*total = COSTS_N_INSNS (1);
+	    }
+	  break;
+
+	case HImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+		*total = COSTS_N_INSNS (2);
+		break;
+	      case 15:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 2:
+	      case 7:
+              case 8:
+              case 9:
+		*total = COSTS_N_INSNS (4);
+		break;
+              case 10:
+	      case 14:
+		*total = COSTS_N_INSNS (5);
+		break;
+              case 11:
+                *total = COSTS_N_INSNS (!speed ? 5 : 6);
+		break;
+              case 12:
+                *total = COSTS_N_INSNS (!speed ? 5 : 7);
+		break;
+              case 6:
+	      case 13:
+                *total = COSTS_N_INSNS (!speed ? 5 : 8);
+		break;
+	      default:
+	        *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	        *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+						speed);
+	      }
+	  break;
+
+        case PSImode:
+          if (!CONST_INT_P (XEXP (x, 1)))
+            {
+              *total = COSTS_N_INSNS (!speed ? 6 : 73);
+            }
+          else
+            switch (INTVAL (XEXP (x, 1)))
+              {
+              case 0:
+                *total = 0;
+                break;
+              case 1:
+                *total = COSTS_N_INSNS (3);
+                break;
+              case 16:
+              case 8:
+                *total = COSTS_N_INSNS (5);
+                break;
+              case 23:
+                *total = COSTS_N_INSNS (4);
+                break;
+              default:
+                *total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1)));
+                break;
+              }
+          break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 8:
+	      case 16:
+	      case 24:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      case 2:
+		*total = COSTS_N_INSNS (!speed ? 7 : 8);
+		break;
+	      case 31:
+		*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5);
+		break;
+	      default:
+		*total = COSTS_N_INSNS (!speed ? 7 : 113);
+		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+						speed);
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case LSHIFTRT:
+      switch (mode)
+	{
+	case QImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    {
+	      val = INTVAL (XEXP (x, 1));
+	      if (val == 7)
+		*total = COSTS_N_INSNS (3);
+	      else if (val >= 0 && val <= 7)
+		*total = COSTS_N_INSNS (val);
+	      else
+		*total = COSTS_N_INSNS (1);
+	    }
+	  break;
+
+	case HImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+	      case 8:
+		*total = COSTS_N_INSNS (2);
+		break;
+	      case 9:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 2:
+	      case 10:
+	      case 15:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 7:
+              case 11:
+		*total = COSTS_N_INSNS (5);
+		break;
+	      case 3:
+	      case 12:
+	      case 13:
+	      case 14:
+		*total = COSTS_N_INSNS (!speed ? 5 : 6);
+		break;
+	      case 4:
+		*total = COSTS_N_INSNS (!speed ? 5 : 7);
+		break;
+	      case 5:
+	      case 6:
+		*total = COSTS_N_INSNS (!speed ? 5 : 9);
+		break;
+	      default:
+	        *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	        *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+						speed);
+	      }
+	  break;
+
+        case PSImode:
+          if (!CONST_INT_P (XEXP (x, 1)))
+            {
+              *total = COSTS_N_INSNS (!speed ? 6 : 73);
+            }
+          else
+            switch (INTVAL (XEXP (x, 1)))
+              {
+              case 0:
+                *total = 0;
+                break;
+              case 1:
+              case 8:
+              case 16:
+                *total = COSTS_N_INSNS (3);
+                break;
+              case 23:
+                *total = COSTS_N_INSNS (5);
+                break;
+              default:
+                *total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1)));
+                break;
+              }
+          break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+					      speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 2:
+		*total = COSTS_N_INSNS (!speed ? 7 : 8);
+		break;
+	      case 8:
+	      case 16:
+	      case 24:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 31:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      default:
+		*total = COSTS_N_INSNS (!speed ? 7 : 113);
+		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
+						speed);
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case COMPARE:
+      switch (GET_MODE (XEXP (x, 0)))
+	{
+	case QImode:
+	  *total = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+	  break;
+
+        case HImode:
+	  *total = COSTS_N_INSNS (2);
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+            *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+	  else if (INTVAL (XEXP (x, 1)) != 0)
+	    *total += COSTS_N_INSNS (1);
+          break;
+
+        case PSImode:
+          *total = COSTS_N_INSNS (3);
+          if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) != 0)
+            *total += COSTS_N_INSNS (2);
+          break;
+
+        case SImode:
+          *total = COSTS_N_INSNS (4);
+          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+            *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+	  else if (INTVAL (XEXP (x, 1)) != 0)
+	    *total += COSTS_N_INSNS (3);
+          break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+      return true;
+
+    case TRUNCATE:
+      if (AVR_HAVE_MUL
+          && LSHIFTRT == GET_CODE (XEXP (x, 0))
+          && MULT == GET_CODE (XEXP (XEXP (x, 0), 0))
+          && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+        {
+          if (QImode == mode || HImode == mode)
+            {
+              *total = COSTS_N_INSNS (2);
+              return true;
+            }
+        }
+      break;
+
+    default:
+      break;
+    }
+  return false;
+}
+
+
+/* Implement `TARGET_RTX_COSTS'.  */
+
+static bool
+avr_rtx_costs (rtx x, int codearg, int outer_code,
+	       int opno, int *total, bool speed)
+{
+  bool done = avr_rtx_costs_1 (x, codearg, outer_code,
+                               opno, total, speed);
+
+  if (avr_log.rtx_costs)
+    {
+      avr_edump ("\n%?=%b (%s) total=%d, outer=%C:\n%r\n",
+                 done, speed ? "speed" : "size", *total, outer_code, x);
+    }
+
+  return done;
+}
+
+
+/* Implement `TARGET_ADDRESS_COST'.  */
+
+static int
+avr_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+                  addr_space_t as ATTRIBUTE_UNUSED,
+                  bool speed ATTRIBUTE_UNUSED)
+{
+  int cost = 4;
+
+  if (GET_CODE (x) == PLUS
+      && CONST_INT_P (XEXP (x, 1))
+      && (REG_P (XEXP (x, 0))
+          || GET_CODE (XEXP (x, 0)) == SUBREG))
+    {
+      if (INTVAL (XEXP (x, 1)) >= 61)
+        cost = 18;
+    }
+  else if (CONSTANT_ADDRESS_P (x))
+    {
+      if (optimize > 0
+          && io_address_operand (x, QImode))
+        cost = 2;
+    }
+
+  if (avr_log.address_cost)
+    avr_edump ("\n%?: %d = %r\n", cost, x);
+
+  return cost;
+}
+
+/* Test for extra memory constraint 'Q'.
+   It's a memory address based on Y or Z pointer with valid displacement.  */
+
+int
+extra_constraint_Q (rtx x)
+{
+  int ok = 0;
+
+  if (GET_CODE (XEXP (x,0)) == PLUS
+      && REG_P (XEXP (XEXP (x,0), 0))
+      && GET_CODE (XEXP (XEXP (x,0), 1)) == CONST_INT
+      && (INTVAL (XEXP (XEXP (x,0), 1))
+	  <= MAX_LD_OFFSET (GET_MODE (x))))
+    {
+      rtx xx = XEXP (XEXP (x,0), 0);
+      int regno = REGNO (xx);
+
+      ok = (/* allocate pseudos */
+            regno >= FIRST_PSEUDO_REGISTER
+            /* strictly check */
+            || regno == REG_Z || regno == REG_Y
+            /* XXX frame & arg pointer checks */
+            || xx == frame_pointer_rtx
+            || xx == arg_pointer_rtx);
+
+      if (avr_log.constraints)
+        avr_edump ("\n%?=%d reload_completed=%d reload_in_progress=%d\n %r\n",
+                   ok, reload_completed, reload_in_progress, x);
+    }
+
+  return ok;
+}
+
+/* Convert condition code CONDITION to the valid AVR condition code.  */
+
+RTX_CODE
+avr_normalize_condition (RTX_CODE condition)
+{
+  switch (condition)
+    {
+    case GT:
+      return GE;
+    case GTU:
+      return GEU;
+    case LE:
+      return LT;
+    case LEU:
+      return LTU;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Helper function for `avr_reorg'.  */
+
+static rtx
+avr_compare_pattern (rtx insn)
+{
+  rtx pattern = single_set (insn);
+
+  if (pattern
+      && NONJUMP_INSN_P (insn)
+      && SET_DEST (pattern) == cc0_rtx
+      && GET_CODE (SET_SRC (pattern)) == COMPARE)
+    {
+      enum machine_mode mode0 = GET_MODE (XEXP (SET_SRC (pattern), 0));
+      enum machine_mode mode1 = GET_MODE (XEXP (SET_SRC (pattern), 1));
+
+      /* The 64-bit comparisons have fixed operands ACC_A and ACC_B.
+         They must not be swapped, thus skip them.  */
+
+      if ((mode0 == VOIDmode || GET_MODE_SIZE (mode0) <= 4)
+          && (mode1 == VOIDmode || GET_MODE_SIZE (mode1) <= 4))
+        return pattern;
+    }
+
+  return NULL_RTX;
+}
+
+/* Helper function for `avr_reorg'.  */
+
+/* Expansion of switch/case decision trees leads to code like
+
+       cc0 = compare (Reg, Num)
+       if (cc0 == 0)
+         goto L1
+
+       cc0 = compare (Reg, Num)
+       if (cc0 > 0)
+         goto L2
+
+   The second comparison is superfluous and can be deleted.
+   The second jump condition can be transformed from a
+   "difficult" one to a "simple" one because "cc0 > 0" and
+   "cc0 >= 0" will have the same effect here.
+
+   This function relies on the way switch/case is being expaned
+   as binary decision tree.  For example code see PR 49903.
+
+   Return TRUE if optimization performed.
+   Return FALSE if nothing changed.
+
+   INSN1 is a comparison, i.e. avr_compare_pattern != 0.
+
+   We don't want to do this in text peephole because it is
+   tedious to work out jump offsets there and the second comparison
+   might have been transormed by `avr_reorg'.
+
+   RTL peephole won't do because peephole2 does not scan across
+   basic blocks.  */
+
+static bool
+avr_reorg_remove_redundant_compare (rtx insn1)
+{
+  rtx comp1, ifelse1, xcond1, branch1;
+  rtx comp2, ifelse2, xcond2, branch2, insn2;
+  enum rtx_code code;
+  rtx jump, target, cond;
+
+  /* Look out for:  compare1 - branch1 - compare2 - branch2  */
+
+  branch1 = next_nonnote_nondebug_insn (insn1);
+  if (!branch1 || !JUMP_P (branch1))
+    return false;
+
+  insn2 = next_nonnote_nondebug_insn (branch1);
+  if (!insn2 || !avr_compare_pattern (insn2))
+    return false;
+
+  branch2 = next_nonnote_nondebug_insn (insn2);
+  if (!branch2 || !JUMP_P (branch2))
+    return false;
+
+  comp1 = avr_compare_pattern (insn1);
+  comp2 = avr_compare_pattern (insn2);
+  xcond1 = single_set (branch1);
+  xcond2 = single_set (branch2);
+
+  if (!comp1 || !comp2
+      || !rtx_equal_p (comp1, comp2)
+      || !xcond1 || SET_DEST (xcond1) != pc_rtx
+      || !xcond2 || SET_DEST (xcond2) != pc_rtx
+      || IF_THEN_ELSE != GET_CODE (SET_SRC (xcond1))
+      || IF_THEN_ELSE != GET_CODE (SET_SRC (xcond2)))
+    {
+      return false;
+    }
+
+  comp1 = SET_SRC (comp1);
+  ifelse1 = SET_SRC (xcond1);
+  ifelse2 = SET_SRC (xcond2);
+
+  /* comp<n> is COMPARE now and ifelse<n> is IF_THEN_ELSE.  */
+
+  if (EQ != GET_CODE (XEXP (ifelse1, 0))
+      || !REG_P (XEXP (comp1, 0))
+      || !CONST_INT_P (XEXP (comp1, 1))
+      || XEXP (ifelse1, 2) != pc_rtx
+      || XEXP (ifelse2, 2) != pc_rtx
+      || LABEL_REF != GET_CODE (XEXP (ifelse1, 1))
+      || LABEL_REF != GET_CODE (XEXP (ifelse2, 1))
+      || !COMPARISON_P (XEXP (ifelse2, 0))
+      || cc0_rtx != XEXP (XEXP (ifelse1, 0), 0)
+      || cc0_rtx != XEXP (XEXP (ifelse2, 0), 0)
+      || const0_rtx != XEXP (XEXP (ifelse1, 0), 1)
+      || const0_rtx != XEXP (XEXP (ifelse2, 0), 1))
+    {
+      return false;
+    }
+
+  /* We filtered the insn sequence to look like
+
+        (set (cc0)
+             (compare (reg:M N)
+                      (const_int VAL)))
+        (set (pc)
+             (if_then_else (eq (cc0)
+                               (const_int 0))
+                           (label_ref L1)
+                           (pc)))
+
+        (set (cc0)
+             (compare (reg:M N)
+                      (const_int VAL)))
+        (set (pc)
+             (if_then_else (CODE (cc0)
+                                 (const_int 0))
+                           (label_ref L2)
+                           (pc)))
+  */
+
+  code = GET_CODE (XEXP (ifelse2, 0));
+
+  /* Map GT/GTU to GE/GEU which is easier for AVR.
+     The first two instructions compare/branch on EQ
+     so we may replace the difficult
+
+        if (x == VAL)   goto L1;
+        if (x > VAL)    goto L2;
+
+     with easy
+
+         if (x == VAL)   goto L1;
+         if (x >= VAL)   goto L2;
+
+     Similarly, replace LE/LEU by LT/LTU.  */
+
+  switch (code)
+    {
+    case EQ:
+    case LT:  case LTU:
+    case GE:  case GEU:
+      break;
+
+    case LE:  case LEU:
+    case GT:  case GTU:
+      code = avr_normalize_condition (code);
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Wrap the branches into UNSPECs so they won't be changed or
+     optimized in the remainder.  */
+
+  target = XEXP (XEXP (ifelse1, 1), 0);
+  cond = XEXP (ifelse1, 0);
+  jump = emit_jump_insn_after (gen_branch_unspec (target, cond), insn1);
+
+  JUMP_LABEL (jump) = JUMP_LABEL (branch1);
+
+  target = XEXP (XEXP (ifelse2, 1), 0);
+  cond = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+  jump = emit_jump_insn_after (gen_branch_unspec (target, cond), insn2);
+
+  JUMP_LABEL (jump) = JUMP_LABEL (branch2);
+
+  /* The comparisons in insn1 and insn2 are exactly the same;
+     insn2 is superfluous so delete it.  */
+
+  delete_insn (insn2);
+  delete_insn (branch1);
+  delete_insn (branch2);
+
+  return true;
+}
+
+
+/* Implement `TARGET_MACHINE_DEPENDENT_REORG'.  */
+/* Optimize conditional jumps.  */
+
+static void
+avr_reorg (void)
+{
+  rtx insn = get_insns();
+
+  for (insn = next_real_insn (insn); insn; insn = next_real_insn (insn))
+    {
+      rtx pattern = avr_compare_pattern (insn);
+
+      if (!pattern)
+        continue;
+
+      if (optimize
+          && avr_reorg_remove_redundant_compare (insn))
+        {
+          continue;
+        }
+
+      if (compare_diff_p (insn))
+	{
+          /* Now we work under compare insn with difficult branch.  */
+
+          rtx next = next_real_insn (insn);
+          rtx pat = PATTERN (next);
+
+          pattern = SET_SRC (pattern);
+
+          if (true_regnum (XEXP (pattern, 0)) >= 0
+              && true_regnum (XEXP (pattern, 1)) >= 0)
+            {
+              rtx x = XEXP (pattern, 0);
+              rtx src = SET_SRC (pat);
+              rtx t = XEXP (src,0);
+              PUT_CODE (t, swap_condition (GET_CODE (t)));
+              XEXP (pattern, 0) = XEXP (pattern, 1);
+              XEXP (pattern, 1) = x;
+              INSN_CODE (next) = -1;
+            }
+          else if (true_regnum (XEXP (pattern, 0)) >= 0
+                   && XEXP (pattern, 1) == const0_rtx)
+            {
+              /* This is a tst insn, we can reverse it.  */
+              rtx src = SET_SRC (pat);
+              rtx t = XEXP (src,0);
+
+              PUT_CODE (t, swap_condition (GET_CODE (t)));
+              XEXP (pattern, 1) = XEXP (pattern, 0);
+              XEXP (pattern, 0) = const0_rtx;
+              INSN_CODE (next) = -1;
+              INSN_CODE (insn) = -1;
+            }
+          else if (true_regnum (XEXP (pattern, 0)) >= 0
+                   && CONST_INT_P (XEXP (pattern, 1)))
+            {
+              rtx x = XEXP (pattern, 1);
+              rtx src = SET_SRC (pat);
+              rtx t = XEXP (src,0);
+              enum machine_mode mode = GET_MODE (XEXP (pattern, 0));
+
+              if (avr_simplify_comparison_p (mode, GET_CODE (t), x))
+                {
+                  XEXP (pattern, 1) = gen_int_mode (INTVAL (x) + 1, mode);
+                  PUT_CODE (t, avr_normalize_condition (GET_CODE (t)));
+                  INSN_CODE (next) = -1;
+                  INSN_CODE (insn) = -1;
+                }
+            }
+        }
+    }
+}
+
+/* Returns register number for function return value.*/
+
+static inline unsigned int
+avr_ret_register (void)
+{
+  return 24;
+}
+
+
+/* Implement `TARGET_FUNCTION_VALUE_REGNO_P'.  */
+
+static bool
+avr_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == avr_ret_register ());
+}
+
+
+/* Implement `TARGET_LIBCALL_VALUE'.  */
+/* Create an RTX representing the place where a
+   library function returns a value of mode MODE.  */
+
+static rtx
+avr_libcall_value (enum machine_mode mode,
+		   const_rtx func ATTRIBUTE_UNUSED)
+{
+  int offs = GET_MODE_SIZE (mode);
+
+  if (offs <= 4)
+    offs = (offs + 1) & ~1;
+
+  return gen_rtx_REG (mode, avr_ret_register () + 2 - offs);
+}
+
+
+/* Implement `TARGET_FUNCTION_VALUE'.  */
+/* Create an RTX representing the place where a
+   function returns a value of data type VALTYPE.  */
+
+static rtx
+avr_function_value (const_tree type,
+                    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+                    bool outgoing ATTRIBUTE_UNUSED)
+{
+  unsigned int offs;
+
+  if (TYPE_MODE (type) != BLKmode)
+    return avr_libcall_value (TYPE_MODE (type), NULL_RTX);
+
+  offs = int_size_in_bytes (type);
+  if (offs < 2)
+    offs = 2;
+  if (offs > 2 && offs < GET_MODE_SIZE (SImode))
+    offs = GET_MODE_SIZE (SImode);
+  else if (offs > GET_MODE_SIZE (SImode) && offs < GET_MODE_SIZE (DImode))
+    offs = GET_MODE_SIZE (DImode);
+
+  return gen_rtx_REG (BLKmode, avr_ret_register () + 2 - offs);
+}
+
+int
+test_hard_reg_class (enum reg_class rclass, rtx x)
+{
+  int regno = true_regnum (x);
+  if (regno < 0)
+    return 0;
+
+  if (TEST_HARD_REG_CLASS (rclass, regno))
+    return 1;
+
+  return 0;
+}
+
+
+/* Helper for jump_over_one_insn_p:  Test if INSN is a 2-word instruction
+   and thus is suitable to be skipped by CPSE, SBRC, etc.  */
+
+static bool
+avr_2word_insn_p (rtx insn)
+{
+  if (avr_current_device->errata_skip
+      || !insn
+      || 2 != get_attr_length (insn))
+    {
+      return false;
+    }
+
+  switch (INSN_CODE (insn))
+    {
+    default:
+      return false;
+
+    case CODE_FOR_movqi_insn:
+    case CODE_FOR_movuqq_insn:
+    case CODE_FOR_movqq_insn:
+      {
+        rtx set  = single_set (insn);
+        rtx src  = SET_SRC (set);
+        rtx dest = SET_DEST (set);
+
+        /* Factor out LDS and STS from movqi_insn.  */
+
+        if (MEM_P (dest)
+            && (REG_P (src) || src == CONST0_RTX (GET_MODE (dest))))
+          {
+            return CONSTANT_ADDRESS_P (XEXP (dest, 0));
+          }
+        else if (REG_P (dest)
+                 && MEM_P (src))
+          {
+            return CONSTANT_ADDRESS_P (XEXP (src, 0));
+          }
+
+        return false;
+      }
+
+    case CODE_FOR_call_insn:
+    case CODE_FOR_call_value_insn:
+      return true;
+    }
+}
+
+
+int
+jump_over_one_insn_p (rtx insn, rtx dest)
+{
+  int uid = INSN_UID (GET_CODE (dest) == LABEL_REF
+		      ? XEXP (dest, 0)
+		      : dest);
+  int jump_addr = INSN_ADDRESSES (INSN_UID (insn));
+  int dest_addr = INSN_ADDRESSES (uid);
+  int jump_offset = dest_addr - jump_addr - get_attr_length (insn);
+
+  return (jump_offset == 1
+          || (jump_offset == 2
+              && avr_2word_insn_p (next_active_insn (insn))));
+}
+
+
+/* Worker function for `HARD_REGNO_MODE_OK'.  */
+/* Returns 1 if a value of mode MODE can be stored starting with hard
+   register number REGNO.  On the enhanced core, anything larger than
+   1 byte must start in even numbered register for "movw" to work
+   (this way we don't have to check for odd registers everywhere).  */
+
+int
+avr_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* NOTE: 8-bit values must not be disallowed for R28 or R29.
+        Disallowing QI et al. in these regs might lead to code like
+            (set (subreg:QI (reg:HI 28) n) ...)
+        which will result in wrong code because reload does not
+        handle SUBREGs of hard regsisters like this.
+        This could be fixed in reload.  However, it appears
+        that fixing reload is not wanted by reload people.  */
+
+  /* Any GENERAL_REGS register can hold 8-bit values.  */
+
+  if (GET_MODE_SIZE (mode) == 1)
+    return 1;
+
+  /* FIXME: Ideally, the following test is not needed.
+        However, it turned out that it can reduce the number
+        of spill fails.  AVR and it's poor endowment with
+        address registers is extreme stress test for reload.  */
+
+  if (GET_MODE_SIZE (mode) >= 4
+      && regno >= REG_X)
+    return 0;
+
+  /* All modes larger than 8 bits should start in an even register.  */
+
+  return !(regno & 1);
+}
+
+
+/* Implement `HARD_REGNO_CALL_PART_CLOBBERED'.  */
+
+int
+avr_hard_regno_call_part_clobbered (unsigned regno, enum machine_mode mode)
+{
+  /* FIXME: This hook gets called with MODE:REGNO combinations that don't
+        represent valid hard registers like, e.g. HI:29.  Returning TRUE
+        for such registers can lead to performance degradation as mentioned
+        in PR53595.  Thus, report invalid hard registers as FALSE.  */
+
+  if (!avr_hard_regno_mode_ok (regno, mode))
+    return 0;
+
+  /* Return true if any of the following boundaries is crossed:
+     17/18, 27/28 and 29/30.  */
+
+  return ((regno < 18 && regno + GET_MODE_SIZE (mode) > 18)
+          || (regno < REG_Y && regno + GET_MODE_SIZE (mode) > REG_Y)
+          || (regno < REG_Z && regno + GET_MODE_SIZE (mode) > REG_Z));
+}
+
+
+/* Implement `MODE_CODE_BASE_REG_CLASS'.  */
+
+enum reg_class
+avr_mode_code_base_reg_class (enum machine_mode mode ATTRIBUTE_UNUSED,
+                              addr_space_t as, RTX_CODE outer_code,
+                              RTX_CODE index_code ATTRIBUTE_UNUSED)
+{
+  if (!ADDR_SPACE_GENERIC_P (as))
+    {
+      return POINTER_Z_REGS;
+    }
+
+  if (!avr_strict_X)
+    return reload_completed ? BASE_POINTER_REGS : POINTER_REGS;
+
+  return PLUS == outer_code ? BASE_POINTER_REGS : POINTER_REGS;
+}
+
+
+/* Implement `REGNO_MODE_CODE_OK_FOR_BASE_P'.  */
+
+bool
+avr_regno_mode_code_ok_for_base_p (int regno,
+                                   enum machine_mode mode ATTRIBUTE_UNUSED,
+                                   addr_space_t as ATTRIBUTE_UNUSED,
+                                   RTX_CODE outer_code,
+                                   RTX_CODE index_code ATTRIBUTE_UNUSED)
+{
+  bool ok = false;
+
+  if (!ADDR_SPACE_GENERIC_P (as))
+    {
+      if (regno < FIRST_PSEUDO_REGISTER
+          && regno == REG_Z)
+        {
+          return true;
+        }
+
+      if (reg_renumber)
+        {
+          regno = reg_renumber[regno];
+
+          if (regno == REG_Z)
+            {
+              return true;
+            }
+        }
+
+      return false;
+    }
+
+  if (regno < FIRST_PSEUDO_REGISTER
+      && (regno == REG_X
+          || regno == REG_Y
+          || regno == REG_Z
+          || regno == ARG_POINTER_REGNUM))
+    {
+      ok = true;
+    }
+  else if (reg_renumber)
+    {
+      regno = reg_renumber[regno];
+
+      if (regno == REG_X
+          || regno == REG_Y
+          || regno == REG_Z
+          || regno == ARG_POINTER_REGNUM)
+        {
+          ok = true;
+        }
+    }
+
+  if (avr_strict_X
+      && PLUS == outer_code
+      && regno == REG_X)
+    {
+      ok = false;
+    }
+
+  return ok;
+}
+
+
+/* A helper for `output_reload_insisf' and `output_reload_inhi'.  */
+/* Set 32-bit register OP[0] to compile-time constant OP[1].
+   CLOBBER_REG is a QI clobber register or NULL_RTX.
+   LEN == NULL: output instructions.
+   LEN != NULL: set *LEN to the length of the instruction sequence
+                (in words) printed with LEN = NULL.
+   If CLEAR_P is true, OP[0] had been cleard to Zero already.
+   If CLEAR_P is false, nothing is known about OP[0].
+
+   The effect on cc0 is as follows:
+
+   Load 0 to any register except ZERO_REG : NONE
+   Load ld register with any value        : NONE
+   Anything else:                         : CLOBBER  */
+
+static void
+output_reload_in_const (rtx *op, rtx clobber_reg, int *len, bool clear_p)
+{
+  rtx src = op[1];
+  rtx dest = op[0];
+  rtx xval, xdest[4];
+  int ival[4];
+  int clobber_val = 1234;
+  bool cooked_clobber_p = false;
+  bool set_p = false;
+  enum machine_mode mode = GET_MODE (dest);
+  int n, n_bytes = GET_MODE_SIZE (mode);
+
+  gcc_assert (REG_P (dest)
+              && CONSTANT_P (src));
+
+  if (len)
+    *len = 0;
+
+  /* (REG:SI 14) is special: It's neither in LD_REGS nor in NO_LD_REGS
+     but has some subregs that are in LD_REGS.  Use the MSB (REG:QI 17).  */
+
+  if (REGNO (dest) < 16
+      && REGNO (dest) + GET_MODE_SIZE (mode) > 16)
+    {
+      clobber_reg = all_regs_rtx[REGNO (dest) + n_bytes - 1];
+    }
+
+  /* We might need a clobber reg but don't have one.  Look at the value to
+     be loaded more closely.  A clobber is only needed if it is a symbol
+     or contains a byte that is neither 0, -1 or a power of 2.  */
+
+  if (NULL_RTX == clobber_reg
+      && !test_hard_reg_class (LD_REGS, dest)
+      && (! (CONST_INT_P (src) || CONST_FIXED_P (src) || CONST_DOUBLE_P (src))
+          || !avr_popcount_each_byte (src, n_bytes,
+                                      (1 << 0) | (1 << 1) | (1 << 8))))
+    {
+      /* We have no clobber register but need one.  Cook one up.
+         That's cheaper than loading from constant pool.  */
+
+      cooked_clobber_p = true;
+      clobber_reg = all_regs_rtx[REG_Z + 1];
+      avr_asm_len ("mov __tmp_reg__,%0", &clobber_reg, len, 1);
+    }
+
+  /* Now start filling DEST from LSB to MSB.  */
+
+  for (n = 0; n < n_bytes; n++)
+    {
+      int ldreg_p;
+      bool done_byte = false;
+      int j;
+      rtx xop[3];
+
+      /* Crop the n-th destination byte.  */
+
+      xdest[n] = simplify_gen_subreg (QImode, dest, mode, n);
+      ldreg_p = test_hard_reg_class (LD_REGS, xdest[n]);
+
+      if (!CONST_INT_P (src)
+          && !CONST_FIXED_P (src)
+          && !CONST_DOUBLE_P (src))
+        {
+          static const char* const asm_code[][2] =
+            {
+              { "ldi %2,lo8(%1)"  CR_TAB "mov %0,%2",    "ldi %0,lo8(%1)"  },
+              { "ldi %2,hi8(%1)"  CR_TAB "mov %0,%2",    "ldi %0,hi8(%1)"  },
+              { "ldi %2,hlo8(%1)" CR_TAB "mov %0,%2",    "ldi %0,hlo8(%1)" },
+              { "ldi %2,hhi8(%1)" CR_TAB "mov %0,%2",    "ldi %0,hhi8(%1)" }
+            };
+
+          xop[0] = xdest[n];
+          xop[1] = src;
+          xop[2] = clobber_reg;
+
+          avr_asm_len (asm_code[n][ldreg_p], xop, len, ldreg_p ? 1 : 2);
+
+          continue;
+        }
+
+      /* Crop the n-th source byte.  */
+
+      xval = simplify_gen_subreg (QImode, src, mode, n);
+      ival[n] = INTVAL (xval);
+
+      /* Look if we can reuse the low word by means of MOVW.  */
+
+      if (n == 2
+          && n_bytes >= 4
+          && AVR_HAVE_MOVW)
+        {
+          rtx lo16 = simplify_gen_subreg (HImode, src, mode, 0);
+          rtx hi16 = simplify_gen_subreg (HImode, src, mode, 2);
+
+          if (INTVAL (lo16) == INTVAL (hi16))
+            {
+              if (0 != INTVAL (lo16)
+                  || !clear_p)
+                {
+                  avr_asm_len ("movw %C0,%A0", &op[0], len, 1);
+                }
+
+              break;
+            }
+        }
+
+      /* Don't use CLR so that cc0 is set as expected.  */
+
+      if (ival[n] == 0)
+        {
+          if (!clear_p)
+            avr_asm_len (ldreg_p ? "ldi %0,0"
+                         : ZERO_REGNO == REGNO (xdest[n]) ? "clr %0"
+                         : "mov %0,__zero_reg__",
+                         &xdest[n], len, 1);
+          continue;
+        }
+
+      if (clobber_val == ival[n]
+          && REGNO (clobber_reg) == REGNO (xdest[n]))
+        {
+          continue;
+        }
+
+      /* LD_REGS can use LDI to move a constant value */
+
+      if (ldreg_p)
+        {
+          xop[0] = xdest[n];
+          xop[1] = xval;
+          avr_asm_len ("ldi %0,lo8(%1)", xop, len, 1);
+          continue;
+        }
+
+      /* Try to reuse value already loaded in some lower byte. */
+
+      for (j = 0; j < n; j++)
+        if (ival[j] == ival[n])
+          {
+            xop[0] = xdest[n];
+            xop[1] = xdest[j];
+
+            avr_asm_len ("mov %0,%1", xop, len, 1);
+            done_byte = true;
+            break;
+          }
+
+      if (done_byte)
+        continue;
+
+      /* Need no clobber reg for -1: Use CLR/DEC */
+
+      if (-1 == ival[n])
+        {
+          if (!clear_p)
+            avr_asm_len ("clr %0", &xdest[n], len, 1);
+
+          avr_asm_len ("dec %0", &xdest[n], len, 1);
+          continue;
+        }
+      else if (1 == ival[n])
+        {
+          if (!clear_p)
+            avr_asm_len ("clr %0", &xdest[n], len, 1);
+
+          avr_asm_len ("inc %0", &xdest[n], len, 1);
+          continue;
+        }
+
+      /* Use T flag or INC to manage powers of 2 if we have
+         no clobber reg.  */
+
+      if (NULL_RTX == clobber_reg
+          && single_one_operand (xval, QImode))
+        {
+          xop[0] = xdest[n];
+          xop[1] = GEN_INT (exact_log2 (ival[n] & GET_MODE_MASK (QImode)));
+
+          gcc_assert (constm1_rtx != xop[1]);
+
+          if (!set_p)
+            {
+              set_p = true;
+              avr_asm_len ("set", xop, len, 1);
+            }
+
+          if (!clear_p)
+            avr_asm_len ("clr %0", xop, len, 1);
+
+          avr_asm_len ("bld %0,%1", xop, len, 1);
+          continue;
+        }
+
+      /* We actually need the LD_REGS clobber reg.  */
+
+      gcc_assert (NULL_RTX != clobber_reg);
+
+      xop[0] = xdest[n];
+      xop[1] = xval;
+      xop[2] = clobber_reg;
+      clobber_val = ival[n];
+
+      avr_asm_len ("ldi %2,lo8(%1)" CR_TAB
+                   "mov %0,%2", xop, len, 2);
+    }
+
+  /* If we cooked up a clobber reg above, restore it.  */
+
+  if (cooked_clobber_p)
+    {
+      avr_asm_len ("mov %0,__tmp_reg__", &clobber_reg, len, 1);
+    }
+}
+
+
+/* Reload the constant OP[1] into the HI register OP[0].
+   CLOBBER_REG is a QI clobber reg needed to move vast majority of consts
+   into a NO_LD_REGS register.  If CLOBBER_REG is NULL_RTX we either don't
+   need a clobber reg or have to cook one up.
+
+   PLEN == NULL: Output instructions.
+   PLEN != NULL: Output nothing.  Set *PLEN to number of words occupied
+                 by the insns printed.
+
+   Return "".  */
+
+const char*
+output_reload_inhi (rtx *op, rtx clobber_reg, int *plen)
+{
+  output_reload_in_const (op, clobber_reg, plen, false);
+  return "";
+}
+
+
+/* Reload a SI or SF compile time constant OP[1] into the register OP[0].
+   CLOBBER_REG is a QI clobber reg needed to move vast majority of consts
+   into a NO_LD_REGS register.  If CLOBBER_REG is NULL_RTX we either don't
+   need a clobber reg or have to cook one up.
+
+   LEN == NULL: Output instructions.
+
+   LEN != NULL: Output nothing.  Set *LEN to number of words occupied
+                by the insns printed.
+
+   Return "".  */
+
+const char *
+output_reload_insisf (rtx *op, rtx clobber_reg, int *len)
+{
+  if (AVR_HAVE_MOVW
+      && !test_hard_reg_class (LD_REGS, op[0])
+      && (CONST_INT_P (op[1])
+          || CONST_FIXED_P (op[1])
+          || CONST_DOUBLE_P (op[1])))
+    {
+      int len_clr, len_noclr;
+
+      /* In some cases it is better to clear the destination beforehand, e.g.
+
+             CLR R2   CLR R3   MOVW R4,R2   INC R2
+
+         is shorther than
+
+             CLR R2   INC R2   CLR  R3      CLR R4   CLR R5
+
+         We find it too tedious to work that out in the print function.
+         Instead, we call the print function twice to get the lengths of
+         both methods and use the shortest one.  */
+
+      output_reload_in_const (op, clobber_reg, &len_clr, true);
+      output_reload_in_const (op, clobber_reg, &len_noclr, false);
+
+      if (len_noclr - len_clr == 4)
+        {
+          /* Default needs 4 CLR instructions: clear register beforehand.  */
+
+          avr_asm_len ("mov %A0,__zero_reg__" CR_TAB
+                       "mov %B0,__zero_reg__" CR_TAB
+                       "movw %C0,%A0", &op[0], len, 3);
+
+          output_reload_in_const (op, clobber_reg, len, true);
+
+          if (len)
+            *len += 3;
+
+          return "";
+        }
+    }
+
+  /* Default: destination not pre-cleared.  */
+
+  output_reload_in_const (op, clobber_reg, len, false);
+  return "";
+}
+
+const char*
+avr_out_reload_inpsi (rtx *op, rtx clobber_reg, int *len)
+{
+  output_reload_in_const (op, clobber_reg, len, false);
+  return "";
+}
+
+
+/* Worker function for `ASM_OUTPUT_ADDR_VEC_ELT'.  */
+
+void
+avr_output_addr_vec_elt (FILE *stream, int value)
+{
+  if (AVR_HAVE_JMP_CALL)
+    fprintf (stream, "\t.word gs(.L%d)\n", value);
+  else
+    fprintf (stream, "\trjmp .L%d\n", value);
+}
+
+
+/* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.  */
+/* Returns true if SCRATCH are safe to be allocated as a scratch
+   registers (for a define_peephole2) in the current function.  */
+
+static bool
+avr_hard_regno_scratch_ok (unsigned int regno)
+{
+  /* Interrupt functions can only use registers that have already been saved
+     by the prologue, even if they would normally be call-clobbered.  */
+
+  if ((cfun->machine->is_interrupt || cfun->machine->is_signal)
+      && !df_regs_ever_live_p (regno))
+    return false;
+
+  /* Don't allow hard registers that might be part of the frame pointer.
+     Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
+     and don't care for a frame pointer that spans more than one register.  */
+
+  if ((!reload_completed || frame_pointer_needed)
+      && (regno == REG_Y || regno == REG_Y + 1))
+    {
+      return false;
+    }
+
+  return true;
+}
+
+
+/* Worker function for `HARD_REGNO_RENAME_OK'.  */
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+int
+avr_hard_regno_rename_ok (unsigned int old_reg,
+			  unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if ((cfun->machine->is_interrupt || cfun->machine->is_signal)
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  /* Don't allow hard registers that might be part of the frame pointer.
+     Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
+     and don't care for a frame pointer that spans more than one register.  */
+
+  if ((!reload_completed || frame_pointer_needed)
+      && (old_reg == REG_Y || old_reg == REG_Y + 1
+          || new_reg == REG_Y || new_reg == REG_Y + 1))
+    {
+      return 0;
+    }
+
+  return 1;
+}
+
+/* Output a branch that tests a single bit of a register (QI, HI, SI or DImode)
+   or memory location in the I/O space (QImode only).
+
+   Operand 0: comparison operator (must be EQ or NE, compare bit to zero).
+   Operand 1: register operand to test, or CONST_INT memory address.
+   Operand 2: bit number.
+   Operand 3: label to jump to if the test is true.  */
+
+const char*
+avr_out_sbxx_branch (rtx insn, rtx operands[])
+{
+  enum rtx_code comp = GET_CODE (operands[0]);
+  bool long_jump = get_attr_length (insn) >= 4;
+  bool reverse = long_jump || jump_over_one_insn_p (insn, operands[3]);
+
+  if (comp == GE)
+    comp = EQ;
+  else if (comp == LT)
+    comp = NE;
+
+  if (reverse)
+    comp = reverse_condition (comp);
+
+  switch (GET_CODE (operands[1]))
+    {
+    default:
+      gcc_unreachable();
+
+    case CONST_INT:
+
+      if (low_io_address_operand (operands[1], QImode))
+        {
+          if (comp == EQ)
+            output_asm_insn ("sbis %i1,%2", operands);
+          else
+            output_asm_insn ("sbic %i1,%2", operands);
+        }
+      else
+        {
+          output_asm_insn ("in __tmp_reg__,%i1", operands);
+          if (comp == EQ)
+            output_asm_insn ("sbrs __tmp_reg__,%2", operands);
+          else
+            output_asm_insn ("sbrc __tmp_reg__,%2", operands);
+        }
+
+      break; /* CONST_INT */
+
+    case REG:
+
+      if (comp == EQ)
+        output_asm_insn ("sbrs %T1%T2", operands);
+      else
+        output_asm_insn ("sbrc %T1%T2", operands);
+
+      break; /* REG */
+    }        /* switch */
+
+  if (long_jump)
+    return ("rjmp .+4" CR_TAB
+            "jmp %x3");
+
+  if (!reverse)
+    return "rjmp %x3";
+
+  return "";
+}
+
+/* Worker function for `TARGET_ASM_CONSTRUCTOR'.  */
+
+static void
+avr_asm_out_ctor (rtx symbol, int priority)
+{
+  fputs ("\t.global __do_global_ctors\n", asm_out_file);
+  default_ctor_section_asm_out_constructor (symbol, priority);
+}
+
+
+/* Worker function for `TARGET_ASM_DESTRUCTOR'.  */
+
+static void
+avr_asm_out_dtor (rtx symbol, int priority)
+{
+  fputs ("\t.global __do_global_dtors\n", asm_out_file);
+  default_dtor_section_asm_out_destructor (symbol, priority);
+}
+
+
+/* Worker function for `TARGET_RETURN_IN_MEMORY'.  */
+
+static bool
+avr_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (TYPE_MODE (type) == BLKmode)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      return (size == -1 || size > 8);
+    }
+  else
+    return false;
+}
+
+
+/* Implement `CASE_VALUES_THRESHOLD'.  */
+/* Supply the default for --param case-values-threshold=0  */
+
+static unsigned int
+avr_case_values_threshold (void)
+{
+  /* The exact break-even point between a jump table and an if-else tree
+     depends on several factors not available here like, e.g. if 8-bit
+     comparisons can be used in the if-else tree or not, on the
+     range of the case values, if the case value can be reused, on the
+     register allocation, etc.  '7' appears to be a good choice.  */
+
+  return 7;
+}
+
+
+/* Implement `TARGET_ADDR_SPACE_ADDRESS_MODE'.  */
+
+static enum machine_mode
+avr_addr_space_address_mode (addr_space_t as)
+{
+  return avr_addrspace[as].pointer_size == 3 ? PSImode : HImode;
+}
+
+
+/* Implement `TARGET_ADDR_SPACE_POINTER_MODE'.  */
+
+static enum machine_mode
+avr_addr_space_pointer_mode (addr_space_t as)
+{
+  return avr_addr_space_address_mode (as);
+}
+
+
+/* Helper for following function.  */
+
+static bool
+avr_reg_ok_for_pgm_addr (rtx reg, bool strict)
+{
+  gcc_assert (REG_P (reg));
+
+  if (strict)
+    {
+      return REGNO (reg) == REG_Z;
+    }
+
+  /* Avoid combine to propagate hard regs.  */
+
+  if (can_create_pseudo_p()
+      && REGNO (reg) < REG_Z)
+    {
+      return false;
+    }
+
+  return true;
+}
+
+
+/* Implement `TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P'.  */
+
+static bool
+avr_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+                                     bool strict, addr_space_t as)
+{
+  bool ok = false;
+
+  switch (as)
+    {
+    default:
+      gcc_unreachable();
+
+    case ADDR_SPACE_GENERIC:
+      return avr_legitimate_address_p (mode, x, strict);
+
+    case ADDR_SPACE_FLASH:
+    case ADDR_SPACE_FLASH1:
+    case ADDR_SPACE_FLASH2:
+    case ADDR_SPACE_FLASH3:
+    case ADDR_SPACE_FLASH4:
+    case ADDR_SPACE_FLASH5:
+
+      switch (GET_CODE (x))
+        {
+        case REG:
+          ok = avr_reg_ok_for_pgm_addr (x, strict);
+          break;
+
+        case POST_INC:
+          ok = avr_reg_ok_for_pgm_addr (XEXP (x, 0), strict);
+          break;
+
+        default:
+          break;
+        }
+
+      break; /* FLASH */
+
+    case ADDR_SPACE_MEMX:
+      if (REG_P (x))
+        ok = (!strict
+              && can_create_pseudo_p());
+
+      if (LO_SUM == GET_CODE (x))
+        {
+          rtx hi = XEXP (x, 0);
+          rtx lo = XEXP (x, 1);
+
+          ok = (REG_P (hi)
+                && (!strict || REGNO (hi) < FIRST_PSEUDO_REGISTER)
+                && REG_P (lo)
+                && REGNO (lo) == REG_Z);
+        }
+
+      break; /* MEMX */
+    }
+
+  if (avr_log.legitimate_address_p)
+    {
+      avr_edump ("\n%?: ret=%b, mode=%m strict=%d "
+                 "reload_completed=%d reload_in_progress=%d %s:",
+                 ok, mode, strict, reload_completed, reload_in_progress,
+                 reg_renumber ? "(reg_renumber)" : "");
+
+      if (GET_CODE (x) == PLUS
+          && REG_P (XEXP (x, 0))
+          && CONST_INT_P (XEXP (x, 1))
+          && IN_RANGE (INTVAL (XEXP (x, 1)), 0, MAX_LD_OFFSET (mode))
+          && reg_renumber)
+        {
+          avr_edump ("(r%d ---> r%d)", REGNO (XEXP (x, 0)),
+                     true_regnum (XEXP (x, 0)));
+        }
+
+      avr_edump ("\n%r\n", x);
+    }
+
+  return ok;
+}
+
+
+/* Implement `TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS'.  */
+
+static rtx
+avr_addr_space_legitimize_address (rtx x, rtx old_x,
+                                   enum machine_mode mode, addr_space_t as)
+{
+  if (ADDR_SPACE_GENERIC_P (as))
+    return avr_legitimize_address (x, old_x, mode);
+
+  if (avr_log.legitimize_address)
+    {
+      avr_edump ("\n%?: mode=%m\n %r\n", mode, old_x);
+    }
+
+  return old_x;
+}
+
+
+/* Implement `TARGET_ADDR_SPACE_CONVERT'.  */
+
+static rtx
+avr_addr_space_convert (rtx src, tree type_from, tree type_to)
+{
+  addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (type_from));
+  addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (type_to));
+
+  if (avr_log.progmem)
+    avr_edump ("\n%!: op = %r\nfrom = %t\nto = %t\n",
+               src, type_from, type_to);
+
+  /* Up-casting from 16-bit to 24-bit pointer.  */
+
+  if (as_from != ADDR_SPACE_MEMX
+      && as_to == ADDR_SPACE_MEMX)
+    {
+      int msb;
+      rtx sym = src;
+      rtx reg = gen_reg_rtx (PSImode);
+
+      while (CONST == GET_CODE (sym) || PLUS == GET_CODE (sym))
+        sym = XEXP (sym, 0);
+
+      /* Look at symbol flags:  avr_encode_section_info set the flags
+         also if attribute progmem was seen so that we get the right
+         promotion for, e.g. PSTR-like strings that reside in generic space
+         but are located in flash.  In that case we patch the incoming
+         address space.  */
+
+      if (SYMBOL_REF == GET_CODE (sym)
+          && ADDR_SPACE_FLASH == AVR_SYMBOL_GET_ADDR_SPACE (sym))
+        {
+          as_from = ADDR_SPACE_FLASH;
+        }
+
+      /* Linearize memory: RAM has bit 23 set.  */
+
+      msb = ADDR_SPACE_GENERIC_P (as_from)
+        ? 0x80
+        : avr_addrspace[as_from].segment;
+
+      src = force_reg (Pmode, src);
+
+      emit_insn (msb == 0
+                 ? gen_zero_extendhipsi2 (reg, src)
+                 : gen_n_extendhipsi2 (reg, gen_int_mode (msb, QImode), src));
+
+      return reg;
+    }
+
+  /* Down-casting from 24-bit to 16-bit throws away the high byte.  */
+
+  if (as_from == ADDR_SPACE_MEMX
+      && as_to != ADDR_SPACE_MEMX)
+    {
+      rtx new_src = gen_reg_rtx (Pmode);
+
+      src = force_reg (PSImode, src);
+
+      emit_move_insn (new_src,
+                      simplify_gen_subreg (Pmode, src, PSImode, 0));
+      return new_src;
+    }
+
+  return src;
+}
+
+
+/* Implement `TARGET_ADDR_SPACE_SUBSET_P'.  */
+
+static bool
+avr_addr_space_subset_p (addr_space_t subset ATTRIBUTE_UNUSED,
+                         addr_space_t superset ATTRIBUTE_UNUSED)
+{
+  /* Allow any kind of pointer mess.  */
+
+  return true;
+}
+
+
+/* Implement `TARGET_CONVERT_TO_TYPE'.  */
+
+static tree
+avr_convert_to_type (tree type, tree expr)
+{
+  /* Print a diagnose for pointer conversion that changes the address
+     space of the pointer target to a non-enclosing address space,
+     provided -Waddr-space-convert is on.
+
+     FIXME: Filter out cases where the target object is known to
+            be located in the right memory, like in
+
+                (const __flash*) PSTR ("text")
+
+            Also try to distinguish between explicit casts requested by
+            the user and implicit casts like
+
+                void f (const __flash char*);
+
+                void g (const char *p)
+                {
+                    f ((const __flash*) p);
+                }
+
+            under the assumption that an explicit casts means that the user
+            knows what he is doing, e.g. interface with PSTR or old style
+            code with progmem and pgm_read_xxx.
+  */
+
+  if (avr_warn_addr_space_convert
+      && expr != error_mark_node
+      && POINTER_TYPE_P (type)
+      && POINTER_TYPE_P (TREE_TYPE (expr)))
+    {
+      addr_space_t as_old = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (expr)));
+      addr_space_t as_new = TYPE_ADDR_SPACE (TREE_TYPE (type));
+        
+      if (avr_log.progmem)
+        avr_edump ("%?: type = %t\nexpr = %t\n\n", type, expr);
+
+      if (as_new != ADDR_SPACE_MEMX
+          && as_new != as_old)
+        {
+          location_t loc = EXPR_LOCATION (expr);
+          const char *name_old = avr_addrspace[as_old].name;
+          const char *name_new = avr_addrspace[as_new].name;
+
+          warning (OPT_Waddr_space_convert,
+                   "conversion from address space %qs to address space %qs",
+                   ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
+                   ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
+
+          return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr);
+        }
+    }
+
+  return NULL_TREE;
+}
+
+
+/* Worker function for movmemhi expander.
+   XOP[0]  Destination as MEM:BLK
+   XOP[1]  Source      "     "
+   XOP[2]  # Bytes to copy
+
+   Return TRUE  if the expansion is accomplished.
+   Return FALSE if the operand compination is not supported.  */
+
+bool
+avr_emit_movmemhi (rtx *xop)
+{
+  HOST_WIDE_INT count;
+  enum machine_mode loop_mode;
+  addr_space_t as = MEM_ADDR_SPACE (xop[1]);
+  rtx loop_reg, addr1, a_src, a_dest, insn, xas;
+  rtx a_hi8 = NULL_RTX;
+
+  if (avr_mem_flash_p (xop[0]))
+    return false;
+
+  if (!CONST_INT_P (xop[2]))
+    return false;
+
+  count = INTVAL (xop[2]);
+  if (count <= 0)
+    return false;
+
+  a_src  = XEXP (xop[1], 0);
+  a_dest = XEXP (xop[0], 0);
+
+  if (PSImode == GET_MODE (a_src))
+    {
+      gcc_assert (as == ADDR_SPACE_MEMX);
+
+      loop_mode = (count < 0x100) ? QImode : HImode;
+      loop_reg = gen_rtx_REG (loop_mode, 24);
+      emit_move_insn (loop_reg, gen_int_mode (count, loop_mode));
+
+      addr1 = simplify_gen_subreg (HImode, a_src, PSImode, 0);
+      a_hi8 = simplify_gen_subreg (QImode, a_src, PSImode, 2);
+    }
+  else
+    {
+      int segment = avr_addrspace[as].segment;
+
+      if (segment
+          && avr_current_device->n_flash > 1)
+        {
+          a_hi8 = GEN_INT (segment);
+          emit_move_insn (rampz_rtx, a_hi8 = copy_to_mode_reg (QImode, a_hi8));
+        }
+      else if (!ADDR_SPACE_GENERIC_P (as))
+        {
+          as = ADDR_SPACE_FLASH;
+        }
+
+      addr1 = a_src;
+
+      loop_mode = (count <= 0x100) ? QImode : HImode;
+      loop_reg = copy_to_mode_reg (loop_mode, gen_int_mode (count, loop_mode));
+    }
+
+  xas = GEN_INT (as);
+
+  /* FIXME: Register allocator might come up with spill fails if it is left
+        on its own.  Thus, we allocate the pointer registers by hand:
+        Z = source address
+        X = destination address  */
+
+  emit_move_insn (lpm_addr_reg_rtx, addr1);
+  emit_move_insn (gen_rtx_REG (HImode, REG_X), a_dest);
+
+  /* FIXME: Register allocator does a bad job and might spill address
+        register(s) inside the loop leading to additional move instruction
+        to/from stack which could clobber tmp_reg.  Thus, do *not* emit
+        load and store as separate insns.  Instead, we perform the copy
+        by means of one monolithic insn.  */
+
+  gcc_assert (TMP_REGNO == LPM_REGNO);
+
+  if (as != ADDR_SPACE_MEMX)
+    {
+      /* Load instruction ([E]LPM or LD) is known at compile time:
+         Do the copy-loop inline.  */
+
+      rtx (*fun) (rtx, rtx, rtx)
+        = QImode == loop_mode ? gen_movmem_qi : gen_movmem_hi;
+
+      insn = fun (xas, loop_reg, loop_reg);
+    }
+  else
+    {
+      rtx (*fun) (rtx, rtx)
+        = QImode == loop_mode ? gen_movmemx_qi : gen_movmemx_hi;
+
+      emit_move_insn (gen_rtx_REG (QImode, 23), a_hi8);
+
+      insn = fun (xas, GEN_INT (avr_addr.rampz));
+    }
+
+  set_mem_addr_space (SET_SRC (XVECEXP (insn, 0, 0)), as);
+  emit_insn (insn);
+
+  return true;
+}
+
+
+/* Print assembler for movmem_qi, movmem_hi insns...
+       $0     : Address Space
+       $1, $2 : Loop register
+       Z      : Source address
+       X      : Destination address
+*/
+
+const char*
+avr_out_movmem (rtx insn ATTRIBUTE_UNUSED, rtx *op, int *plen)
+{
+  addr_space_t as = (addr_space_t) INTVAL (op[0]);
+  enum machine_mode loop_mode = GET_MODE (op[1]);
+  bool sbiw_p = test_hard_reg_class (ADDW_REGS, op[1]);
+  rtx xop[3];
+
+  if (plen)
+    *plen = 0;
+
+  xop[0] = op[0];
+  xop[1] = op[1];
+  xop[2] = tmp_reg_rtx;
+
+  /* Loop label */
+
+  avr_asm_len ("0:", xop, plen, 0);
+
+  /* Load with post-increment */
+
+  switch (as)
+    {
+    default:
+      gcc_unreachable();
+
+    case ADDR_SPACE_GENERIC:
+
+      avr_asm_len ("ld %2,Z+", xop, plen, 1);
+      break;
+
+    case ADDR_SPACE_FLASH:
+
+      if (AVR_HAVE_LPMX)
+        avr_asm_len ("lpm %2,Z+", xop, plen, 1);
+      else
+        avr_asm_len ("lpm" CR_TAB
+                     "adiw r30,1", xop, plen, 2);
+      break;
+
+    case ADDR_SPACE_FLASH1:
+    case ADDR_SPACE_FLASH2:
+    case ADDR_SPACE_FLASH3:
+    case ADDR_SPACE_FLASH4:
+    case ADDR_SPACE_FLASH5:
+
+      if (AVR_HAVE_ELPMX)
+        avr_asm_len ("elpm %2,Z+", xop, plen, 1);
+      else
+        avr_asm_len ("elpm" CR_TAB
+                     "adiw r30,1", xop, plen, 2);
+      break;
+    }
+
+  /* Store with post-increment */
+
+  avr_asm_len ("st X+,%2", xop, plen, 1);
+
+  /* Decrement loop-counter and set Z-flag */
+
+  if (QImode == loop_mode)
+    {
+      avr_asm_len ("dec %1", xop, plen, 1);
+    }
+  else if (sbiw_p)
+    {
+      avr_asm_len ("sbiw %1,1", xop, plen, 1);
+    }
+  else
+    {
+      avr_asm_len ("subi %A1,1" CR_TAB
+                   "sbci %B1,0", xop, plen, 2);
+    }
+
+  /* Loop until zero */
+
+  return avr_asm_len ("brne 0b", xop, plen, 1);
+}
+
+
+
+/* Helper for __builtin_avr_delay_cycles */
+
+static rtx
+avr_mem_clobber (void)
+{
+  rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (mem) = 1;
+  return mem;
+}
+
+static void
+avr_expand_delay_cycles (rtx operands0)
+{
+  unsigned HOST_WIDE_INT cycles = UINTVAL (operands0) & GET_MODE_MASK (SImode);
+  unsigned HOST_WIDE_INT cycles_used;
+  unsigned HOST_WIDE_INT loop_count;
+
+  if (IN_RANGE (cycles, 83886082, 0xFFFFFFFF))
+    {
+      loop_count = ((cycles - 9) / 6) + 1;
+      cycles_used = ((loop_count - 1) * 6) + 9;
+      emit_insn (gen_delay_cycles_4 (gen_int_mode (loop_count, SImode),
+                                     avr_mem_clobber()));
+      cycles -= cycles_used;
+    }
+
+  if (IN_RANGE (cycles, 262145, 83886081))
+    {
+      loop_count = ((cycles - 7) / 5) + 1;
+      if (loop_count > 0xFFFFFF)
+        loop_count = 0xFFFFFF;
+      cycles_used = ((loop_count - 1) * 5) + 7;
+      emit_insn (gen_delay_cycles_3 (gen_int_mode (loop_count, SImode),
+                                     avr_mem_clobber()));
+      cycles -= cycles_used;
+    }
+
+  if (IN_RANGE (cycles, 768, 262144))
+    {
+      loop_count = ((cycles - 5) / 4) + 1;
+      if (loop_count > 0xFFFF)
+        loop_count = 0xFFFF;
+      cycles_used = ((loop_count - 1) * 4) + 5;
+      emit_insn (gen_delay_cycles_2 (gen_int_mode (loop_count, HImode),
+                                     avr_mem_clobber()));
+      cycles -= cycles_used;
+    }
+
+  if (IN_RANGE (cycles, 6, 767))
+    {
+      loop_count = cycles / 3;
+      if (loop_count > 255)
+        loop_count = 255;
+      cycles_used = loop_count * 3;
+      emit_insn (gen_delay_cycles_1 (gen_int_mode (loop_count, QImode),
+                                     avr_mem_clobber()));
+      cycles -= cycles_used;
+      }
+
+  while (cycles >= 2)
+    {
+      emit_insn (gen_nopv (GEN_INT(2)));
+      cycles -= 2;
+    }
+
+  if (cycles == 1)
+    {
+      emit_insn (gen_nopv (GEN_INT(1)));
+      cycles--;
+    }
+}
+
+
+/* Compute the image of x under f, i.e. perform   x --> f(x)    */
+
+static int
+avr_map (unsigned int f, int x)
+{
+  return x < 8 ? (f >> (4 * x)) & 0xf : 0;
+}
+
+
+/* Return some metrics of map A.  */
+
+enum
+  {
+    /* Number of fixed points in { 0 ... 7 } */
+    MAP_FIXED_0_7,
+
+    /* Size of preimage of non-fixed points in { 0 ... 7 } */
+    MAP_NONFIXED_0_7,
+
+    /* Mask representing the fixed points in { 0 ... 7 } */
+    MAP_MASK_FIXED_0_7,
+
+    /* Size of the preimage of { 0 ... 7 } */
+    MAP_PREIMAGE_0_7,
+
+    /* Mask that represents the preimage of { f } */
+    MAP_MASK_PREIMAGE_F
+  };
+
+static unsigned
+avr_map_metric (unsigned int a, int mode)
+{
+  unsigned i, metric = 0;
+
+  for (i = 0; i < 8; i++)
+    {
+      unsigned ai = avr_map (a, i);
+
+      if (mode == MAP_FIXED_0_7)
+        metric += ai == i;
+      else if (mode == MAP_NONFIXED_0_7)
+        metric += ai < 8 && ai != i;
+      else if (mode == MAP_MASK_FIXED_0_7)
+        metric |= ((unsigned) (ai == i)) << i;
+      else if (mode == MAP_PREIMAGE_0_7)
+        metric += ai < 8;
+      else if (mode == MAP_MASK_PREIMAGE_F)
+        metric |= ((unsigned) (ai == 0xf)) << i;
+      else
+        gcc_unreachable();
+    }
+
+  return metric;
+}
+
+
+/* Return true if IVAL has a 0xf in its hexadecimal representation
+   and false, otherwise.  Only nibbles 0..7 are taken into account.
+   Used as constraint helper for C0f and Cxf.  */
+
+bool
+avr_has_nibble_0xf (rtx ival)
+{
+  unsigned int map = UINTVAL (ival) & GET_MODE_MASK (SImode);
+  return 0 != avr_map_metric (map, MAP_MASK_PREIMAGE_F);
+}
+
+
+/* We have a set of bits that are mapped by a function F.
+   Try to decompose F by means of a second function G so that
+
+      F = F o G^-1 o G
+
+   and
+
+      cost (F o G^-1) + cost (G)  <  cost (F)
+
+   Example:  Suppose builtin insert_bits supplies us with the map
+   F = 0x3210ffff.  Instead of doing 4 bit insertions to get the high
+   nibble of the result, we can just as well rotate the bits before inserting
+   them and use the map 0x7654ffff which is cheaper than the original map.
+   For this example G = G^-1 = 0x32107654 and F o G^-1 = 0x7654ffff.  */
+
+typedef struct
+{
+  /* tree code of binary function G */
+  enum tree_code code;
+
+  /* The constant second argument of G */
+  int arg;
+
+  /* G^-1, the inverse of G (*, arg) */
+  unsigned ginv;
+
+  /* The cost of appplying G (*, arg) */
+  int cost;
+
+  /* The composition F o G^-1 (*, arg) for some function F */
+  unsigned int map;
+
+  /* For debug purpose only */
+  const char *str;
+} avr_map_op_t;
+
+static const avr_map_op_t avr_map_op[] =
+  {
+    { LROTATE_EXPR, 0, 0x76543210, 0, 0, "id" },
+    { LROTATE_EXPR, 1, 0x07654321, 2, 0, "<<<" },
+    { LROTATE_EXPR, 2, 0x10765432, 4, 0, "<<<" },
+    { LROTATE_EXPR, 3, 0x21076543, 4, 0, "<<<" },
+    { LROTATE_EXPR, 4, 0x32107654, 1, 0, "<<<" },
+    { LROTATE_EXPR, 5, 0x43210765, 3, 0, "<<<" },
+    { LROTATE_EXPR, 6, 0x54321076, 5, 0, "<<<" },
+    { LROTATE_EXPR, 7, 0x65432107, 3, 0, "<<<" },
+    { RSHIFT_EXPR, 1, 0x6543210c, 1, 0, ">>" },
+    { RSHIFT_EXPR, 1, 0x7543210c, 1, 0, ">>" },
+    { RSHIFT_EXPR, 2, 0x543210cc, 2, 0, ">>" },
+    { RSHIFT_EXPR, 2, 0x643210cc, 2, 0, ">>" },
+    { RSHIFT_EXPR, 2, 0x743210cc, 2, 0, ">>" },
+    { LSHIFT_EXPR, 1, 0xc7654321, 1, 0, "<<" },
+    { LSHIFT_EXPR, 2, 0xcc765432, 2, 0, "<<" }
+  };
+
+
+/* Try to decompose F as F = (F o G^-1) o G as described above.
+   The result is a struct representing F o G^-1 and G.
+   If result.cost < 0 then such a decomposition does not exist.  */
+
+static avr_map_op_t
+avr_map_decompose (unsigned int f, const avr_map_op_t *g, bool val_const_p)
+{
+  int i;
+  bool val_used_p = 0 != avr_map_metric (f, MAP_MASK_PREIMAGE_F);
+  avr_map_op_t f_ginv = *g;
+  unsigned int ginv = g->ginv;
+
+  f_ginv.cost = -1;
+
+  /* Step 1:  Computing F o G^-1  */
+
+  for (i = 7; i >= 0; i--)
+    {
+      int x = avr_map (f, i);
+
+      if (x <= 7)
+        {
+          x = avr_map (ginv, x);
+
+          /* The bit is no element of the image of G: no avail (cost = -1)  */
+
+          if (x > 7)
+            return f_ginv;
+        }
+
+      f_ginv.map = (f_ginv.map << 4) + x;
+    }
+
+  /* Step 2:  Compute the cost of the operations.
+     The overall cost of doing an operation prior to the insertion is
+      the cost of the insertion plus the cost of the operation.  */
+
+  /* Step 2a:  Compute cost of F o G^-1  */
+
+  if (0 == avr_map_metric (f_ginv.map, MAP_NONFIXED_0_7))
+    {
+      /* The mapping consists only of fixed points and can be folded
+         to AND/OR logic in the remainder.  Reasonable cost is 3. */
+
+      f_ginv.cost = 2 + (val_used_p && !val_const_p);
+    }
+  else
+    {
+      rtx xop[4];
+
+      /* Get the cost of the insn by calling the output worker with some
+         fake values.  Mimic effect of reloading xop[3]: Unused operands
+         are mapped to 0 and used operands are reloaded to xop[0].  */
+
+      xop[0] = all_regs_rtx[24];
+      xop[1] = gen_int_mode (f_ginv.map, SImode);
+      xop[2] = all_regs_rtx[25];
+      xop[3] = val_used_p ? xop[0] : const0_rtx;
+
+      avr_out_insert_bits (xop, &f_ginv.cost);
+
+      f_ginv.cost += val_const_p && val_used_p ? 1 : 0;
+    }
+
+  /* Step 2b:  Add cost of G  */
+
+  f_ginv.cost += g->cost;
+
+  if (avr_log.builtin)
+    avr_edump (" %s%d=%d", g->str, g->arg, f_ginv.cost);
+
+  return f_ginv;
+}
+
+
+/* Insert bits from XOP[1] into XOP[0] according to MAP.
+   XOP[0] and XOP[1] don't overlap.
+   If FIXP_P = true:  Move all bits according to MAP using BLD/BST sequences.
+   If FIXP_P = false: Just move the bit if its position in the destination
+   is different to its source position.  */
+
+static void
+avr_move_bits (rtx *xop, unsigned int map, bool fixp_p, int *plen)
+{
+  int bit_dest, b;
+
+  /* T-flag contains this bit of the source, i.e. of XOP[1]  */
+  int t_bit_src = -1;
+
+  /* We order the operations according to the requested source bit b.  */
+
+  for (b = 0; b < 8; b++)
+    for (bit_dest = 0; bit_dest < 8; bit_dest++)
+      {
+        int bit_src = avr_map (map, bit_dest);
+
+        if (b != bit_src
+            || bit_src >= 8
+            /* Same position: No need to copy as requested by FIXP_P.  */
+            || (bit_dest == bit_src && !fixp_p))
+          continue;
+
+        if (t_bit_src != bit_src)
+          {
+            /* Source bit is not yet in T: Store it to T.  */
+
+            t_bit_src = bit_src;
+
+            xop[3] = GEN_INT (bit_src);
+            avr_asm_len ("bst %T1%T3", xop, plen, 1);
+          }
+
+        /* Load destination bit with T.  */
+
+        xop[3] = GEN_INT (bit_dest);
+        avr_asm_len ("bld %T0%T3", xop, plen, 1);
+      }
+}
+
+
+/* PLEN == 0: Print assembler code for `insert_bits'.
+   PLEN != 0: Compute code length in bytes.
+
+   OP[0]:  Result
+   OP[1]:  The mapping composed of nibbles. If nibble no. N is
+           0:   Bit N of result is copied from bit OP[2].0
+           ...  ...
+           7:   Bit N of result is copied from bit OP[2].7
+           0xf: Bit N of result is copied from bit OP[3].N
+   OP[2]:  Bits to be inserted
+   OP[3]:  Target value  */
+
+const char*
+avr_out_insert_bits (rtx *op, int *plen)
+{
+  unsigned int map = UINTVAL (op[1]) & GET_MODE_MASK (SImode);
+  unsigned mask_fixed;
+  bool fixp_p = true;
+  rtx xop[4];
+
+  xop[0] = op[0];
+  xop[1] = op[2];
+  xop[2] = op[3];
+
+  gcc_assert (REG_P (xop[2]) || CONST_INT_P (xop[2]));
+
+  if (plen)
+    *plen = 0;
+  else if (flag_print_asm_name)
+    fprintf (asm_out_file, ASM_COMMENT_START "map = 0x%08x\n", map);
+
+  /* If MAP has fixed points it might be better to initialize the result
+     with the bits to be inserted instead of moving all bits by hand.  */
+
+  mask_fixed = avr_map_metric (map, MAP_MASK_FIXED_0_7);
+
+  if (REGNO (xop[0]) == REGNO (xop[1]))
+    {
+      /* Avoid early-clobber conflicts */
+
+      avr_asm_len ("mov __tmp_reg__,%1", xop, plen, 1);
+      xop[1] = tmp_reg_rtx;
+      fixp_p = false;
+    }
+
+  if (avr_map_metric (map, MAP_MASK_PREIMAGE_F))
+    {
+      /* XOP[2] is used and reloaded to XOP[0] already */
+
+      int n_fix = 0, n_nofix = 0;
+
+      gcc_assert (REG_P (xop[2]));
+
+      /* Get the code size of the bit insertions; once with all bits
+         moved and once with fixed points omitted.  */
+
+      avr_move_bits (xop, map, true, &n_fix);
+      avr_move_bits (xop, map, false, &n_nofix);
+
+      if (fixp_p && n_fix - n_nofix > 3)
+        {
+          xop[3] = gen_int_mode (~mask_fixed, QImode);
+
+          avr_asm_len ("eor %0,%1"   CR_TAB
+                       "andi %0,%3"  CR_TAB
+                       "eor %0,%1", xop, plen, 3);
+          fixp_p = false;
+        }
+    }
+  else
+    {
+      /* XOP[2] is unused */
+
+      if (fixp_p && mask_fixed)
+        {
+          avr_asm_len ("mov %0,%1", xop, plen, 1);
+          fixp_p = false;
+        }
+    }
+
+  /* Move/insert remaining bits.  */
+
+  avr_move_bits (xop, map, fixp_p, plen);
+
+  return "";
+}
+
+
+/* IDs for all the AVR builtins.  */
+
+enum avr_builtin_id
+  {
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)  \
+    AVR_BUILTIN_ ## NAME,
+#include "builtins.def"
+#undef DEF_BUILTIN
+
+    AVR_BUILTIN_COUNT
+  };
+
+struct GTY(()) avr_builtin_description
+{
+  enum insn_code icode;
+  int n_args;
+  tree fndecl;
+};
+
+
+/* Notice that avr_bdesc[] and avr_builtin_id are initialized in such a way
+   that a built-in's ID can be used to access the built-in by means of
+   avr_bdesc[ID]  */
+
+static GTY(()) struct avr_builtin_description
+avr_bdesc[AVR_BUILTIN_COUNT] =
+  {
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME)         \
+    { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
+#include "builtins.def"
+#undef DEF_BUILTIN
+  };
+
+
+/* Implement `TARGET_BUILTIN_DECL'.  */
+
+static tree
+avr_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (id < AVR_BUILTIN_COUNT)
+    return avr_bdesc[id].fndecl;
+
+  return error_mark_node;
+}
+
+
+static void
+avr_init_builtin_int24 (void)
+{
+  tree int24_type  = make_signed_type (GET_MODE_BITSIZE (PSImode));
+  tree uint24_type = make_unsigned_type (GET_MODE_BITSIZE (PSImode));
+
+  lang_hooks.types.register_builtin_type (int24_type, "__int24");
+  lang_hooks.types.register_builtin_type (uint24_type, "__uint24");
+}
+
+
+/* Implement `TARGET_INIT_BUILTINS' */
+/* Set up all builtin functions for this target.  */
+
+static void
+avr_init_builtins (void)
+{
+  tree void_ftype_void
+    = build_function_type_list (void_type_node, NULL_TREE);
+  tree uchar_ftype_uchar
+    = build_function_type_list (unsigned_char_type_node,
+                                unsigned_char_type_node,
+                                NULL_TREE);
+  tree uint_ftype_uchar_uchar
+    = build_function_type_list (unsigned_type_node,
+                                unsigned_char_type_node,
+                                unsigned_char_type_node,
+                                NULL_TREE);
+  tree int_ftype_char_char
+    = build_function_type_list (integer_type_node,
+                                char_type_node,
+                                char_type_node,
+                                NULL_TREE);
+  tree int_ftype_char_uchar
+    = build_function_type_list (integer_type_node,
+                                char_type_node,
+                                unsigned_char_type_node,
+                                NULL_TREE);
+  tree void_ftype_ulong
+    = build_function_type_list (void_type_node,
+                                long_unsigned_type_node,
+                                NULL_TREE);
+
+  tree uchar_ftype_ulong_uchar_uchar
+    = build_function_type_list (unsigned_char_type_node,
+                                long_unsigned_type_node,
+                                unsigned_char_type_node,
+                                unsigned_char_type_node,
+                                NULL_TREE);
+
+  tree const_memx_void_node
+    = build_qualified_type (void_type_node,
+                            TYPE_QUAL_CONST
+                            | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_MEMX));
+
+  tree const_memx_ptr_type_node
+    = build_pointer_type_for_mode (const_memx_void_node, PSImode, false);
+
+  tree char_ftype_const_memx_ptr
+    = build_function_type_list (char_type_node,
+                                const_memx_ptr_type_node,
+                                NULL);
+
+#define ITYP(T)                                                         \
+  lang_hooks.types.type_for_size (TYPE_PRECISION (T), TYPE_UNSIGNED (T))
+  
+#define FX_FTYPE_FX(fx)                                                 \
+  tree fx##r_ftype_##fx##r                                              \
+    = build_function_type_list (node_##fx##r, node_##fx##r, NULL);      \
+  tree fx##k_ftype_##fx##k                                              \
+    = build_function_type_list (node_##fx##k, node_##fx##k, NULL)
+
+#define FX_FTYPE_FX_INT(fx)                                             \
+  tree fx##r_ftype_##fx##r_int                                          \
+    = build_function_type_list (node_##fx##r, node_##fx##r,             \
+                                integer_type_node, NULL);               \
+  tree fx##k_ftype_##fx##k_int                                          \
+    = build_function_type_list (node_##fx##k, node_##fx##k,             \
+                                integer_type_node, NULL)
+  
+#define INT_FTYPE_FX(fx)                                                \
+  tree int_ftype_##fx##r                                                \
+    = build_function_type_list (integer_type_node, node_##fx##r, NULL); \
+  tree int_ftype_##fx##k                                                \
+    = build_function_type_list (integer_type_node, node_##fx##k, NULL)
+
+#define INTX_FTYPE_FX(fx)                                               \
+  tree int##fx##r_ftype_##fx##r                                         \
+    = build_function_type_list (ITYP (node_##fx##r), node_##fx##r, NULL); \
+  tree int##fx##k_ftype_##fx##k                                         \
+    = build_function_type_list (ITYP (node_##fx##k), node_##fx##k, NULL)
+
+#define FX_FTYPE_INTX(fx)                                               \
+  tree fx##r_ftype_int##fx##r                                           \
+    = build_function_type_list (node_##fx##r, ITYP (node_##fx##r), NULL); \
+  tree fx##k_ftype_int##fx##k                                           \
+    = build_function_type_list (node_##fx##k, ITYP (node_##fx##k), NULL)
+
+  tree node_hr = short_fract_type_node;
+  tree node_nr = fract_type_node;
+  tree node_lr = long_fract_type_node;
+  tree node_llr = long_long_fract_type_node;
+
+  tree node_uhr = unsigned_short_fract_type_node;
+  tree node_unr = unsigned_fract_type_node;
+  tree node_ulr = unsigned_long_fract_type_node;
+  tree node_ullr = unsigned_long_long_fract_type_node;
+
+  tree node_hk = short_accum_type_node;
+  tree node_nk = accum_type_node;
+  tree node_lk = long_accum_type_node;
+  tree node_llk = long_long_accum_type_node;
+
+  tree node_uhk = unsigned_short_accum_type_node;
+  tree node_unk = unsigned_accum_type_node;
+  tree node_ulk = unsigned_long_accum_type_node;
+  tree node_ullk = unsigned_long_long_accum_type_node;
+
+
+  /* For absfx builtins.  */
+
+  FX_FTYPE_FX (h);
+  FX_FTYPE_FX (n);
+  FX_FTYPE_FX (l);
+  FX_FTYPE_FX (ll);
+
+  /* For roundfx builtins.  */
+
+  FX_FTYPE_FX_INT (h);
+  FX_FTYPE_FX_INT (n);
+  FX_FTYPE_FX_INT (l);
+  FX_FTYPE_FX_INT (ll);
+
+  FX_FTYPE_FX_INT (uh);
+  FX_FTYPE_FX_INT (un);
+  FX_FTYPE_FX_INT (ul);
+  FX_FTYPE_FX_INT (ull);
+
+  /* For countlsfx builtins.  */
+
+  INT_FTYPE_FX (h);
+  INT_FTYPE_FX (n);
+  INT_FTYPE_FX (l);
+  INT_FTYPE_FX (ll);
+
+  INT_FTYPE_FX (uh);
+  INT_FTYPE_FX (un);
+  INT_FTYPE_FX (ul);
+  INT_FTYPE_FX (ull);
+
+  /* For bitsfx builtins.  */
+
+  INTX_FTYPE_FX (h);
+  INTX_FTYPE_FX (n);
+  INTX_FTYPE_FX (l);
+  INTX_FTYPE_FX (ll);
+
+  INTX_FTYPE_FX (uh);
+  INTX_FTYPE_FX (un);
+  INTX_FTYPE_FX (ul);
+  INTX_FTYPE_FX (ull);
+
+  /* For fxbits builtins.  */
+
+  FX_FTYPE_INTX (h);
+  FX_FTYPE_INTX (n);
+  FX_FTYPE_INTX (l);
+  FX_FTYPE_INTX (ll);
+
+  FX_FTYPE_INTX (uh);
+  FX_FTYPE_INTX (un);
+  FX_FTYPE_INTX (ul);
+  FX_FTYPE_INTX (ull);
+
+
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)                  \
+  {                                                                     \
+    int id = AVR_BUILTIN_ ## NAME;                                      \
+    const char *Name = "__builtin_avr_" #NAME;                          \
+    char *name = (char*) alloca (1 + strlen (Name));                    \
+                                                                        \
+    gcc_assert (id < AVR_BUILTIN_COUNT);                                \
+    avr_bdesc[id].fndecl                                                \
+      = add_builtin_function (avr_tolower (name, Name), TYPE, id,       \
+                              BUILT_IN_MD, LIBNAME, NULL_TREE);         \
+  }
+#include "builtins.def"
+#undef DEF_BUILTIN
+
+  avr_init_builtin_int24 ();
+}
+
+
+/* Subroutine of avr_expand_builtin to expand vanilla builtins
+   with non-void result and 1 ... 3 arguments.  */
+
+static rtx
+avr_default_expand_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, xop[3];
+  int n, n_args = call_expr_nargs (exp);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+  gcc_assert (n_args >= 1 && n_args <= 3);
+
+  if (target == NULL_RTX
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    {
+      target = gen_reg_rtx (tmode);
+    }
+
+  for (n = 0; n < n_args; n++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, n);
+      rtx op = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      enum machine_mode opmode = GET_MODE (op);
+      enum machine_mode mode = insn_data[icode].operand[n+1].mode;
+
+      if ((opmode == SImode || opmode == VOIDmode) && mode == HImode)
+        {
+          opmode = HImode;
+          op = gen_lowpart (HImode, op);
+        }
+
+      /* In case the insn wants input operands in modes different from
+         the result, abort.  */
+
+      gcc_assert (opmode == mode || opmode == VOIDmode);
+
+      if (!insn_data[icode].operand[n+1].predicate (op, mode))
+        op = copy_to_mode_reg (mode, op);
+
+      xop[n] = op;
+    }
+
+  switch (n_args)
+    {
+    case 1: pat = GEN_FCN (icode) (target, xop[0]); break;
+    case 2: pat = GEN_FCN (icode) (target, xop[0], xop[1]); break;
+    case 3: pat = GEN_FCN (icode) (target, xop[0], xop[1], xop[2]); break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  if (pat == NULL_RTX)
+    return NULL_RTX;
+
+  emit_insn (pat);
+
+  return target;
+}
+
+
+/* Implement `TARGET_EXPAND_BUILTIN'.  */
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+avr_expand_builtin (tree exp, rtx target,
+                    rtx subtarget ATTRIBUTE_UNUSED,
+                    enum machine_mode mode ATTRIBUTE_UNUSED,
+                    int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+  unsigned int id = DECL_FUNCTION_CODE (fndecl);
+  const struct avr_builtin_description *d = &avr_bdesc[id];
+  tree arg0;
+  rtx op0;
+
+  gcc_assert (id < AVR_BUILTIN_COUNT);
+
+  switch (id)
+    {
+    case AVR_BUILTIN_NOP:
+      emit_insn (gen_nopv (GEN_INT(1)));
+      return 0;
+
+    case AVR_BUILTIN_DELAY_CYCLES:
+      {
+        arg0 = CALL_EXPR_ARG (exp, 0);
+        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+        if (!CONST_INT_P (op0))
+          error ("%s expects a compile time integer constant", bname);
+        else
+          avr_expand_delay_cycles (op0);
+
+        return NULL_RTX;
+      }
+
+    case AVR_BUILTIN_INSERT_BITS:
+      {
+        arg0 = CALL_EXPR_ARG (exp, 0);
+        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+        if (!CONST_INT_P (op0))
+          {
+            error ("%s expects a compile time long integer constant"
+                   " as first argument", bname);
+            return target;
+          }
+
+        break;
+      }
+
+    case AVR_BUILTIN_ROUNDHR:   case AVR_BUILTIN_ROUNDUHR:
+    case AVR_BUILTIN_ROUNDR:    case AVR_BUILTIN_ROUNDUR:
+    case AVR_BUILTIN_ROUNDLR:   case AVR_BUILTIN_ROUNDULR:
+    case AVR_BUILTIN_ROUNDLLR:  case AVR_BUILTIN_ROUNDULLR:
+
+    case AVR_BUILTIN_ROUNDHK:   case AVR_BUILTIN_ROUNDUHK:
+    case AVR_BUILTIN_ROUNDK:    case AVR_BUILTIN_ROUNDUK:
+    case AVR_BUILTIN_ROUNDLK:   case AVR_BUILTIN_ROUNDULK:
+    case AVR_BUILTIN_ROUNDLLK:  case AVR_BUILTIN_ROUNDULLK:
+
+      /* Warn about odd rounding.  Rounding points >= FBIT will have
+         no effect.  */
+      
+      if (TREE_CODE (CALL_EXPR_ARG (exp, 1)) != INTEGER_CST)
+        break;
+
+      int rbit = (int) TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1));
+
+      if (rbit >= (int) GET_MODE_FBIT (mode))
+        {
+          warning (OPT_Wextra, "rounding to %d bits has no effect for "
+                   "fixed-point value with %d fractional bits",
+                   rbit, GET_MODE_FBIT (mode));
+
+          return expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, mode,
+                              EXPAND_NORMAL);
+        }
+      else if (rbit <= - (int) GET_MODE_IBIT (mode))
+        {
+          warning (0, "rounding result will always be 0");
+          return CONST0_RTX (mode);
+        }
+
+      /* The rounding points RP satisfies now:  -IBIT < RP < FBIT.
+
+         TR 18037 only specifies results for  RP > 0.  However, the
+         remaining cases of  -IBIT < RP <= 0  can easily be supported
+         without any additional overhead.  */
+
+      break; /* round */
+    }
+
+  /* No fold found and no insn:  Call support function from libgcc.  */
+
+  if (d->icode == CODE_FOR_nothing
+      && DECL_ASSEMBLER_NAME (get_callee_fndecl (exp)) != NULL_TREE)
+    {
+      return expand_call (exp, target, ignore);
+    }
+
+  /* No special treatment needed: vanilla expand.  */
+
+  gcc_assert (d->icode != CODE_FOR_nothing);
+  gcc_assert (d->n_args == call_expr_nargs (exp));
+
+  if (d->n_args == 0)
+    {
+      emit_insn ((GEN_FCN (d->icode)) (target));
+      return NULL_RTX;
+    }
+
+  return avr_default_expand_builtin (d->icode, exp, target);
+}
+
+
+/* Helper for `avr_fold_builtin' that folds  absfx (FIXED_CST).  */
+
+static tree
+avr_fold_absfx (tree tval)
+{
+  if (FIXED_CST != TREE_CODE (tval))
+    return NULL_TREE;
+
+  /* Our fixed-points have no padding:  Use double_int payload directly.  */
+
+  FIXED_VALUE_TYPE fval = TREE_FIXED_CST (tval);
+  unsigned int bits = GET_MODE_BITSIZE (fval.mode);
+  double_int ival = fval.data.sext (bits);
+
+  if (!ival.is_negative())
+    return tval;
+
+  /* ISO/IEC TR 18037, 7.18a.6.2:  The absfx functions are saturating.  */
+
+  fval.data = (ival == double_int::min_value (bits, false).sext (bits))
+    ? double_int::max_value (bits, false)
+    : -ival;
+
+  return build_fixed (TREE_TYPE (tval), fval);
+}
+
+
+/* Implement `TARGET_FOLD_BUILTIN'.  */
+
+static tree
+avr_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *arg,
+                  bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree val_type = TREE_TYPE (TREE_TYPE (fndecl));
+
+  if (!optimize)
+    return NULL_TREE;
+
+  switch (fcode)
+    {
+    default:
+      break;
+
+    case AVR_BUILTIN_SWAP:
+      {
+        return fold_build2 (LROTATE_EXPR, val_type, arg[0],
+                            build_int_cst (val_type, 4));
+      }
+
+    case AVR_BUILTIN_ABSHR:
+    case AVR_BUILTIN_ABSR:
+    case AVR_BUILTIN_ABSLR:
+    case AVR_BUILTIN_ABSLLR:
+
+    case AVR_BUILTIN_ABSHK:
+    case AVR_BUILTIN_ABSK:
+    case AVR_BUILTIN_ABSLK:
+    case AVR_BUILTIN_ABSLLK:
+      /* GCC is not good with folding ABS for fixed-point.  Do it by hand.  */
+
+      return avr_fold_absfx (arg[0]);
+
+    case AVR_BUILTIN_BITSHR:    case AVR_BUILTIN_HRBITS:
+    case AVR_BUILTIN_BITSHK:    case AVR_BUILTIN_HKBITS:
+    case AVR_BUILTIN_BITSUHR:   case AVR_BUILTIN_UHRBITS:
+    case AVR_BUILTIN_BITSUHK:   case AVR_BUILTIN_UHKBITS:
+
+    case AVR_BUILTIN_BITSR:     case AVR_BUILTIN_RBITS:
+    case AVR_BUILTIN_BITSK:     case AVR_BUILTIN_KBITS:
+    case AVR_BUILTIN_BITSUR:    case AVR_BUILTIN_URBITS:
+    case AVR_BUILTIN_BITSUK:    case AVR_BUILTIN_UKBITS:
+
+    case AVR_BUILTIN_BITSLR:    case AVR_BUILTIN_LRBITS:
+    case AVR_BUILTIN_BITSLK:    case AVR_BUILTIN_LKBITS:
+    case AVR_BUILTIN_BITSULR:   case AVR_BUILTIN_ULRBITS:
+    case AVR_BUILTIN_BITSULK:   case AVR_BUILTIN_ULKBITS:
+
+    case AVR_BUILTIN_BITSLLR:   case AVR_BUILTIN_LLRBITS:
+    case AVR_BUILTIN_BITSLLK:   case AVR_BUILTIN_LLKBITS:
+    case AVR_BUILTIN_BITSULLR:  case AVR_BUILTIN_ULLRBITS:
+    case AVR_BUILTIN_BITSULLK:  case AVR_BUILTIN_ULLKBITS:
+
+      gcc_assert (TYPE_PRECISION (val_type)
+                  == TYPE_PRECISION (TREE_TYPE (arg[0])));
+
+      return build1 (VIEW_CONVERT_EXPR, val_type, arg[0]);
+
+    case AVR_BUILTIN_INSERT_BITS:
+      {
+        tree tbits = arg[1];
+        tree tval = arg[2];
+        tree tmap;
+        tree map_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+        unsigned int map;
+        bool changed = false;
+        unsigned i;
+        avr_map_op_t best_g;
+
+        if (TREE_CODE (arg[0]) != INTEGER_CST)
+          {
+            /* No constant as first argument: Don't fold this and run into
+               error in avr_expand_builtin.  */
+
+            break;
+          }
+
+        tmap = double_int_to_tree (map_type, tree_to_double_int (arg[0]));
+        map = TREE_INT_CST_LOW (tmap);
+
+        if (TREE_CODE (tval) != INTEGER_CST
+            && 0 == avr_map_metric (map, MAP_MASK_PREIMAGE_F))
+          {
+            /* There are no F in the map, i.e. 3rd operand is unused.
+               Replace that argument with some constant to render
+               respective input unused.  */
+
+            tval = build_int_cst (val_type, 0);
+            changed = true;
+          }
+
+        if (TREE_CODE (tbits) != INTEGER_CST
+            && 0 == avr_map_metric (map, MAP_PREIMAGE_0_7))
+          {
+            /* Similar for the bits to be inserted. If they are unused,
+               we can just as well pass 0.  */
+
+            tbits = build_int_cst (val_type, 0);
+          }
+
+        if (TREE_CODE (tbits) == INTEGER_CST)
+          {
+            /* Inserting bits known at compile time is easy and can be
+               performed by AND and OR with appropriate masks.  */
+
+            int bits = TREE_INT_CST_LOW (tbits);
+            int mask_ior = 0, mask_and = 0xff;
+
+            for (i = 0; i < 8; i++)
+              {
+                int mi = avr_map (map, i);
+
+                if (mi < 8)
+                  {
+                    if (bits & (1 << mi))     mask_ior |=  (1 << i);
+                    else                      mask_and &= ~(1 << i);
+                  }
+              }
+
+            tval = fold_build2 (BIT_IOR_EXPR, val_type, tval,
+                                build_int_cst (val_type, mask_ior));
+            return fold_build2 (BIT_AND_EXPR, val_type, tval,
+                                build_int_cst (val_type, mask_and));
+          }
+
+        if (changed)
+          return build_call_expr (fndecl, 3, tmap, tbits, tval);
+
+        /* If bits don't change their position we can use vanilla logic
+           to merge the two arguments.  */
+
+        if (0 == avr_map_metric (map, MAP_NONFIXED_0_7))
+          {
+            int mask_f = avr_map_metric (map, MAP_MASK_PREIMAGE_F);
+            tree tres, tmask = build_int_cst (val_type, mask_f ^ 0xff);
+
+            tres = fold_build2 (BIT_XOR_EXPR, val_type, tbits, tval);
+            tres = fold_build2 (BIT_AND_EXPR, val_type, tres, tmask);
+            return fold_build2 (BIT_XOR_EXPR, val_type, tres, tval);
+          }
+
+        /* Try to decomposing map to reduce overall cost.  */
+
+        if (avr_log.builtin)
+          avr_edump ("\n%?: %x\n%?: ROL cost: ", map);
+
+        best_g = avr_map_op[0];
+        best_g.cost = 1000;
+
+        for (i = 0; i < sizeof (avr_map_op) / sizeof (*avr_map_op); i++)
+          {
+            avr_map_op_t g
+              = avr_map_decompose (map, avr_map_op + i,
+                                   TREE_CODE (tval) == INTEGER_CST);
+
+            if (g.cost >= 0 && g.cost < best_g.cost)
+              best_g = g;
+          }
+
+        if (avr_log.builtin)
+          avr_edump ("\n");
+
+        if (best_g.arg == 0)
+          /* No optimization found */
+          break;
+
+        /* Apply operation G to the 2nd argument.  */
+
+        if (avr_log.builtin)
+          avr_edump ("%?: using OP(%s%d, %x) cost %d\n",
+                     best_g.str, best_g.arg, best_g.map, best_g.cost);
+
+        /* Do right-shifts arithmetically: They copy the MSB instead of
+           shifting in a non-usable value (0) as with logic right-shift.  */
+
+        tbits = fold_convert (signed_char_type_node, tbits);
+        tbits = fold_build2 (best_g.code, signed_char_type_node, tbits,
+                             build_int_cst (val_type, best_g.arg));
+        tbits = fold_convert (val_type, tbits);
+
+        /* Use map o G^-1 instead of original map to undo the effect of G.  */
+
+        tmap = double_int_to_tree (map_type,
+				   double_int::from_uhwi (best_g.map));
+
+        return build_call_expr (fndecl, 3, tmap, tbits, tval);
+      } /* AVR_BUILTIN_INSERT_BITS */
+    }
+
+  return NULL_TREE;
+}
+
+
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.long\t"
+#undef  TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.word\t"
+#undef  TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER avr_assemble_integer
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START avr_file_start
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END avr_file_end
+
+#undef  TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE avr_asm_function_end_prologue
+#undef  TARGET_ASM_FUNCTION_BEGIN_EPILOGUE
+#define TARGET_ASM_FUNCTION_BEGIN_EPILOGUE avr_asm_function_begin_epilogue
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE avr_function_value
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE avr_libcall_value
+#undef  TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P avr_function_value_regno_p
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE avr_attribute_table
+#undef  TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES avr_insert_attributes
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS avr_section_type_flags
+
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION avr_asm_named_section
+#undef  TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS avr_asm_init_sections
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO avr_encode_section_info
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION avr_asm_select_section
+
+#undef  TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST avr_register_move_cost
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST avr_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS avr_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST avr_address_cost
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG avr_reorg
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG avr_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE avr_function_arg_advance
+
+#undef  TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION avr_set_current_function
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY avr_return_in_memory
+
+#undef  TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+
+#undef  TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE avr_builtin_setjmp_frame_value
+
+#undef  TARGET_HARD_REGNO_SCRATCH_OK
+#define TARGET_HARD_REGNO_SCRATCH_OK avr_hard_regno_scratch_ok
+#undef  TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD avr_case_values_threshold
+
+#undef  TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED avr_frame_pointer_required_p
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE avr_can_eliminate
+
+#undef  TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS avr_allocate_stack_slots_for_args
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN avr_warn_func_return
+
+#undef  TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P avr_class_likely_spilled_p
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE avr_option_override
+
+#undef  TARGET_CANNOT_MODIFY_JUMPS_P
+#define TARGET_CANNOT_MODIFY_JUMPS_P avr_cannot_modify_jumps_p
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL avr_function_ok_for_sibcall
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS avr_init_builtins
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL avr_builtin_decl
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN avr_expand_builtin
+
+#undef  TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN avr_fold_builtin
+
+#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION avr_asm_function_rodata_section
+
+#undef  TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P avr_scalar_mode_supported_p
+
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST avr_build_builtin_va_list
+
+#undef  TARGET_FIXED_POINT_SUPPORTED_P
+#define TARGET_FIXED_POINT_SUPPORTED_P hook_bool_void_true
+
+#undef  TARGET_CONVERT_TO_TYPE
+#define TARGET_CONVERT_TO_TYPE avr_convert_to_type
+
+#undef  TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P avr_addr_space_subset_p
+
+#undef  TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT avr_addr_space_convert
+
+#undef  TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE avr_addr_space_address_mode
+
+#undef  TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE avr_addr_space_pointer_mode
+
+#undef  TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P  \
+  avr_addr_space_legitimate_address_p
+
+#undef  TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS avr_addr_space_legitimize_address
+
+#undef  TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P avr_mode_dependent_address_p
+
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD avr_secondary_reload
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND avr_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS avr_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P avr_print_operand_punct_valid_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+#include "gt-avr.h"
diff --git a/gcc-4.9/gcc/config/avr/avr.h b/gcc-4.9/gcc/config/avr/avr.h
new file mode 100644
index 000000000..74be83c8a
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr.h
@@ -0,0 +1,606 @@
+/* Definitions of target machine for GNU compiler,
+   for ATMEL AVR at90s8515, ATmega103/103L, ATmega603/603L microcontrollers.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+typedef struct
+{
+  /* Id of the address space as used in c_register_addr_space */
+  unsigned char id;
+
+  /* Flavour of memory: 0 = RAM, 1 = Flash */
+  int memory_class;
+
+  /* Width of pointer (in bytes) */
+  int pointer_size;
+
+  /* Name of the address space as visible to the user */
+  const char *name;
+
+  /* Segment (i.e. 64k memory chunk) number.  */
+  int segment;
+
+  /* Section prefix, e.g. ".progmem1.data"  */
+  const char *section_name;
+} avr_addrspace_t;
+
+extern const avr_addrspace_t avr_addrspace[];
+
+/* Known address spaces */
+
+enum
+  {
+    ADDR_SPACE_RAM, /* ADDR_SPACE_GENERIC */
+    ADDR_SPACE_FLASH,
+    ADDR_SPACE_FLASH1,
+    ADDR_SPACE_FLASH2,
+    ADDR_SPACE_FLASH3,
+    ADDR_SPACE_FLASH4,
+    ADDR_SPACE_FLASH5,
+    ADDR_SPACE_MEMX,
+    /* Sentinel */
+    ADDR_SPACE_COUNT
+  };
+
+#define TARGET_CPU_CPP_BUILTINS()	avr_cpu_cpp_builtins (pfile)
+
+#define AVR_HAVE_JMP_CALL (avr_current_arch->have_jmp_call)
+#define AVR_HAVE_MUL (avr_current_arch->have_mul)
+#define AVR_HAVE_MOVW (avr_current_arch->have_movw_lpmx)
+#define AVR_HAVE_LPMX (avr_current_arch->have_movw_lpmx)
+#define AVR_HAVE_ELPM (avr_current_arch->have_elpm)
+#define AVR_HAVE_ELPMX (avr_current_arch->have_elpmx)
+#define AVR_HAVE_RAMPD (avr_current_arch->have_rampd)
+#define AVR_HAVE_RAMPX (avr_current_arch->have_rampd)
+#define AVR_HAVE_RAMPY (avr_current_arch->have_rampd)
+#define AVR_HAVE_RAMPZ (avr_current_arch->have_elpm             \
+                        || avr_current_arch->have_rampd)
+#define AVR_HAVE_EIJMP_EICALL (avr_current_arch->have_eijmp_eicall)
+
+/* Handling of 8-bit SP versus 16-bit SP is as follows:
+
+   -msp8 is used internally to select the right multilib for targets with
+   8-bit SP.  -msp8 is set automatically by DRIVER_SELF_SPECS for devices
+   with 8-bit SP or by multilib generation machinery.  If a frame pointer is
+   needed and SP is only 8 bits wide, SP is zero-extended to get FP.
+
+   TARGET_TINY_STACK is triggered by -mtiny-stack which is a user option.
+   This option has no effect on multilib selection.  It serves to save some
+   bytes on 16-bit SP devices by only changing SP_L and leaving SP_H alone.
+
+   These two properties are reflected by built-in macros __AVR_SP8__ resp.
+   __AVR_HAVE_8BIT_SP__ and __AVR_HAVE_16BIT_SP__.  During multilib generation
+   there is always __AVR_SP8__ == __AVR_HAVE_8BIT_SP__.  */
+
+#define AVR_HAVE_8BIT_SP                                                \
+  (avr_current_device->short_sp || TARGET_TINY_STACK || avr_sp8)
+
+#define AVR_HAVE_SPH (!avr_sp8)
+
+#define AVR_2_BYTE_PC (!AVR_HAVE_EIJMP_EICALL)
+#define AVR_3_BYTE_PC (AVR_HAVE_EIJMP_EICALL)
+
+#define AVR_XMEGA (avr_current_arch->xmega_p)
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#ifdef IN_LIBGCC2
+/* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits).  */
+#define UNITS_PER_WORD 4
+#else
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 1
+#endif
+
+#define POINTER_SIZE 16
+
+
+/* Maximum sized of reasonable data type
+   DImode or Dfmode ...  */
+#define MAX_FIXED_MODE_SIZE 32
+
+#define PARM_BOUNDARY 8
+
+#define FUNCTION_BOUNDARY 8
+
+#define EMPTY_FIELD_BOUNDARY 8
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 8
+
+#define TARGET_VTABLE_ENTRY_ALIGN 8
+
+#define STRICT_ALIGNMENT 0
+
+#define INT_TYPE_SIZE (TARGET_INT8 ? 8 : 16)
+#define SHORT_TYPE_SIZE (INT_TYPE_SIZE == 8 ? INT_TYPE_SIZE : 16)
+#define LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 16 : 32)
+#define LONG_LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 32 : 64)
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 32
+#define LONG_DOUBLE_TYPE_SIZE 32
+#define LONG_LONG_ACCUM_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE (INT_TYPE_SIZE == 8 ? "long unsigned int" : "unsigned int")
+#define PTRDIFF_TYPE (INT_TYPE_SIZE == 8 ? "long int" :"int")
+
+#define WCHAR_TYPE_SIZE 16
+
+#define FIRST_PSEUDO_REGISTER 36
+
+#define FIXED_REGISTERS {\
+  1,1,/* r0 r1 */\
+  0,0,/* r2 r3 */\
+  0,0,/* r4 r5 */\
+  0,0,/* r6 r7 */\
+  0,0,/* r8 r9 */\
+  0,0,/* r10 r11 */\
+  0,0,/* r12 r13 */\
+  0,0,/* r14 r15 */\
+  0,0,/* r16 r17 */\
+  0,0,/* r18 r19 */\
+  0,0,/* r20 r21 */\
+  0,0,/* r22 r23 */\
+  0,0,/* r24 r25 */\
+  0,0,/* r26 r27 */\
+  0,0,/* r28 r29 */\
+  0,0,/* r30 r31 */\
+  1,1,/*  STACK */\
+  1,1 /* arg pointer */  }
+
+#define CALL_USED_REGISTERS {			\
+  1,1,/* r0 r1 */				\
+    0,0,/* r2 r3 */				\
+    0,0,/* r4 r5 */				\
+    0,0,/* r6 r7 */				\
+    0,0,/* r8 r9 */				\
+    0,0,/* r10 r11 */				\
+    0,0,/* r12 r13 */				\
+    0,0,/* r14 r15 */				\
+    0,0,/* r16 r17 */				\
+    1,1,/* r18 r19 */				\
+    1,1,/* r20 r21 */				\
+    1,1,/* r22 r23 */				\
+    1,1,/* r24 r25 */				\
+    1,1,/* r26 r27 */				\
+    0,0,/* r28 r29 */				\
+    1,1,/* r30 r31 */				\
+    1,1,/*  STACK */				\
+    1,1 /* arg pointer */  }
+
+#define REG_ALLOC_ORDER {			\
+    24,25,					\
+    18,19,					\
+    20,21,					\
+    22,23,					\
+    30,31,					\
+    26,27,					\
+    28,29,					\
+    17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,	\
+    0,1,					\
+    32,33,34,35					\
+    }
+
+#define ADJUST_REG_ALLOC_ORDER avr_adjust_reg_alloc_order()
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE)                                   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) avr_hard_regno_mode_ok(REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+enum reg_class {
+  NO_REGS,
+  R0_REG,			/* r0 */
+  POINTER_X_REGS,		/* r26 - r27 */
+  POINTER_Y_REGS,		/* r28 - r29 */
+  POINTER_Z_REGS,		/* r30 - r31 */
+  STACK_REG,			/* STACK */
+  BASE_POINTER_REGS,		/* r28 - r31 */
+  POINTER_REGS,			/* r26 - r31 */
+  ADDW_REGS,			/* r24 - r31 */
+  SIMPLE_LD_REGS,		/* r16 - r23 */
+  LD_REGS,			/* r16 - r31 */
+  NO_LD_REGS,			/* r0 - r15 */
+  GENERAL_REGS,			/* r0 - r31 */
+  ALL_REGS, LIM_REG_CLASSES
+};
+
+
+#define N_REG_CLASSES (int)LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {					\
+		 "NO_REGS",					\
+		   "R0_REG",	/* r0 */                        \
+		   "POINTER_X_REGS", /* r26 - r27 */		\
+		   "POINTER_Y_REGS", /* r28 - r29 */		\
+		   "POINTER_Z_REGS", /* r30 - r31 */		\
+		   "STACK_REG",	/* STACK */			\
+		   "BASE_POINTER_REGS",	/* r28 - r31 */		\
+		   "POINTER_REGS", /* r26 - r31 */		\
+		   "ADDW_REGS",	/* r24 - r31 */			\
+                   "SIMPLE_LD_REGS", /* r16 - r23 */            \
+		   "LD_REGS",	/* r16 - r31 */			\
+                   "NO_LD_REGS", /* r0 - r15 */                 \
+		   "GENERAL_REGS", /* r0 - r31 */		\
+		   "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS {						\
+  {0x00000000,0x00000000},	/* NO_REGS */				\
+  {0x00000001,0x00000000},	/* R0_REG */                            \
+  {3 << REG_X,0x00000000},      /* POINTER_X_REGS, r26 - r27 */		\
+  {3 << REG_Y,0x00000000},      /* POINTER_Y_REGS, r28 - r29 */		\
+  {3 << REG_Z,0x00000000},      /* POINTER_Z_REGS, r30 - r31 */		\
+  {0x00000000,0x00000003},	/* STACK_REG, STACK */			\
+  {(3 << REG_Y) | (3 << REG_Z),						\
+     0x00000000},		/* BASE_POINTER_REGS, r28 - r31 */	\
+  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z),				\
+     0x00000000},		/* POINTER_REGS, r26 - r31 */		\
+  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W),		\
+     0x00000000},		/* ADDW_REGS, r24 - r31 */		\
+  {0x00ff0000,0x00000000},	/* SIMPLE_LD_REGS r16 - r23 */          \
+  {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16),	\
+     0x00000000},	/* LD_REGS, r16 - r31 */			\
+  {0x0000ffff,0x00000000},	/* NO_LD_REGS  r0 - r15 */              \
+  {0xffffffff,0x00000000},	/* GENERAL_REGS, r0 - r31 */		\
+  {0xffffffff,0x00000003}	/* ALL_REGS */				\
+}
+
+#define REGNO_REG_CLASS(R) avr_regno_reg_class(R)
+
+#define MODE_CODE_BASE_REG_CLASS(mode, as, outer_code, index_code)   \
+  avr_mode_code_base_reg_class (mode, as, outer_code, index_code)
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(num, mode, as, outer_code, index_code) \
+  avr_regno_mode_code_ok_for_base_p (num, mode, as, outer_code, index_code)
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)     \
+  avr_hard_regno_call_part_clobbered (REGNO, MODE)
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+#define STACK_PUSH_CODE POST_DEC
+
+#define STACK_GROWS_DOWNWARD
+
+#define STARTING_FRAME_OFFSET avr_starting_frame_offset()
+
+#define STACK_POINTER_OFFSET 1
+
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+#define STACK_BOUNDARY 8
+
+#define STACK_POINTER_REGNUM 32
+
+#define FRAME_POINTER_REGNUM REG_Y
+
+#define ARG_POINTER_REGNUM 34
+
+#define STATIC_CHAIN_REGNUM 2
+
+#define ELIMINABLE_REGS {					\
+      {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+      {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},		\
+	{FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}		\
+       ,{FRAME_POINTER_REGNUM+1,STACK_POINTER_REGNUM+1}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  OFFSET = avr_initial_elimination_offset (FROM, TO)
+
+#define RETURN_ADDR_RTX(count, tem) avr_return_addr_rtx (count, tem)
+
+/* Don't use Push rounding. expr.c: emit_single_push_insn is broken 
+   for POST_DEC targets (PR27386).  */
+/*#define PUSH_ROUNDING(NPUSHED) (NPUSHED)*/
+
+typedef struct avr_args
+{
+  /* # Registers available for passing */
+  int nregs;
+
+  /* Next available register number */
+  int regno;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  avr_init_cumulative_args (&(CUM), FNTYPE, LIBNAME, FNDECL)
+
+#define FUNCTION_ARG_REGNO_P(r) avr_function_arg_regno_p(r)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define EPILOGUE_USES(REGNO) avr_epilogue_uses(REGNO)
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN)          \
+  do {                                                                  \
+    rtx new_x = avr_legitimize_reload_address (&(X), MODE, OPNUM, TYPE, \
+                                               ADDR_TYPE (TYPE),        \
+                                               IND_L, make_memloc);     \
+    if (new_x)                                                          \
+      {                                                                 \
+        X = new_x;                                                      \
+        goto WIN;                                                       \
+      }                                                                 \
+  } while (0)
+
+#define BRANCH_COST(speed_p, predictable_p) avr_branch_cost
+
+#define SLOW_BYTE_ACCESS 0
+
+#define NO_FUNCTION_CSE
+
+#define REGISTER_TARGET_PRAGMAS()                                       \
+  do {                                                                  \
+    avr_register_target_pragmas();                                      \
+  } while (0)
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+   There are no shared libraries on this target, and these sections are
+   placed in the read-only program memory, so they are not writable.  */
+
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP "\t.section .ctors,\"a\",@progbits"
+
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP "\t.section .dtors,\"a\",@progbits"
+
+#define TARGET_ASM_CONSTRUCTOR avr_asm_out_ctor
+
+#define TARGET_ASM_DESTRUCTOR avr_asm_out_dtor
+
+#define SUPPORTS_INIT_PRIORITY 0
+
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+#define ASM_COMMENT_START " ; "
+
+#define ASM_APP_ON "/* #APP */\n"
+
+#define ASM_APP_OFF "/* #NOAPP */\n"
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '\n' || ((C) == '$'))
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGN) \
+  avr_asm_output_aligned_decl_common (STREAM, DECL, NAME, SIZE, ALIGN, false)
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGN)  \
+  avr_asm_output_aligned_decl_common (STREAM, DECL, NAME, SIZE, ALIGN, true)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".global\t"
+
+#define SUPPORTS_WEAK 1
+
+#define HAS_INIT_SECTION 1
+
+#define REGISTER_NAMES {				\
+  "r0","r1","r2","r3","r4","r5","r6","r7",		\
+    "r8","r9","r10","r11","r12","r13","r14","r15",	\
+    "r16","r17","r18","r19","r20","r21","r22","r23",	\
+    "r24","r25","r26","r27","r28","r29","r30","r31",	\
+    "__SP_L__","__SP_H__","argL","argH"}
+
+#define FINAL_PRESCAN_INSN(insn, operand, nop)  \
+  avr_final_prescan_insn (insn, operand,nop)
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)	\
+{						\
+  gcc_assert (REGNO < 32);			\
+  fprintf (STREAM, "\tpush\tr%d", REGNO);	\
+}
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO)	\
+{						\
+  gcc_assert (REGNO < 32);			\
+  fprintf (STREAM, "\tpop\tr%d", REGNO);	\
+}
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)  \
+  avr_output_addr_vec_elt (STREAM, VALUE)
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)                 \
+  do {                                                  \
+    if ((POWER) > 0)                                    \
+      fprintf (STREAM, "\t.p2align\t%d\n", POWER);      \
+  } while (0)
+
+#define CASE_VECTOR_MODE HImode
+
+#undef WORD_REGISTER_OPERATIONS
+
+#define MOVE_MAX 4
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define Pmode HImode
+
+#define FUNCTION_MODE HImode
+
+#define DOLLARS_IN_IDENTIFIERS 0
+
+#define TRAMPOLINE_SIZE 4
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) avr_notice_update_cc (EXP, INSN)
+
+/* The add insns don't set overflow in a usable way.  */
+#define CC_OVERFLOW_UNUSABLE 01000
+/* The mov,and,or,xor insns don't set carry.  That's ok though as the
+   Z bit is all we need when doing unsigned comparisons on the result of
+   these insns (since they're always with 0).  However, conditions.h has
+   CC_NO_OVERFLOW defined for this purpose.  Rename it to something more
+   understandable.  */
+#define CC_NO_CARRY CC_NO_OVERFLOW
+
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fprintf (FILE, "/* profiler %d */", (LABELNO))
+
+#define ADJUST_INSN_LENGTH(INSN, LENGTH)                \
+    (LENGTH = avr_adjust_insn_length (INSN, LENGTH))
+
+extern const char *avr_device_to_as (int argc, const char **argv);
+extern const char *avr_device_to_ld (int argc, const char **argv);
+extern const char *avr_device_to_data_start (int argc, const char **argv);
+extern const char *avr_device_to_startfiles (int argc, const char **argv);
+extern const char *avr_device_to_devicelib (int argc, const char **argv);
+extern const char *avr_device_to_sp8 (int argc, const char **argv);
+
+#define EXTRA_SPEC_FUNCTIONS                            \
+  { "device_to_as", avr_device_to_as },                 \
+  { "device_to_ld", avr_device_to_ld },                 \
+  { "device_to_data_start", avr_device_to_data_start }, \
+  { "device_to_startfile", avr_device_to_startfiles },  \
+  { "device_to_devicelib", avr_device_to_devicelib },   \
+  { "device_to_sp8", avr_device_to_sp8 },
+
+#define DRIVER_SELF_SPECS " %:device_to_sp8(%{mmcu=*:%*}) "
+#define CPP_SPEC ""
+
+#define CC1_SPEC ""
+
+#define CC1PLUS_SPEC "%{!frtti:-fno-rtti} \
+    %{!fenforce-eh-specs:-fno-enforce-eh-specs} \
+    %{!fexceptions:-fno-exceptions}"
+
+#define ASM_SPEC "%:device_to_as(%{mmcu=*:%*}) "
+  
+#define LINK_SPEC "\
+%{mrelax:--relax\
+         %{mpmem-wrap-around:%{mmcu=at90usb8*:--pmem-wrap-around=8k}\
+                             %{mmcu=atmega16*:--pmem-wrap-around=16k}\
+                             %{mmcu=atmega32*|\
+                               mmcu=at90can32*:--pmem-wrap-around=32k}\
+                             %{mmcu=atmega64*|\
+                               mmcu=at90can64*|\
+                               mmcu=at90usb64*:--pmem-wrap-around=64k}}}\
+%:device_to_ld(%{mmcu=*:%*})\
+%:device_to_data_start(%{mmcu=*:%*})\
+%{shared:%eshared is not supported}"
+
+#define LIB_SPEC \
+  "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lc }}}}}"
+
+#define LIBSTDCXX "gcc"
+/* No libstdc++ for now.  Empty string doesn't work.  */
+
+#define LIBGCC_SPEC \
+  "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lgcc }}}}}"
+
+#define STARTFILE_SPEC "%:device_to_startfile(%{mmcu=*:%*})"
+
+#define ENDFILE_SPEC ""
+
+/* This is the default without any -mmcu=* option (AT90S*).  */
+#define MULTILIB_DEFAULTS { "mmcu=avr2" }
+
+#define TEST_HARD_REG_CLASS(CLASS, REGNO) \
+  TEST_HARD_REG_BIT (reg_class_contents[ (int) (CLASS)], REGNO)
+
+#define CR_TAB "\n\t"
+
+#define DWARF2_ADDR_SIZE 4
+
+#define INCOMING_RETURN_ADDR_RTX   avr_incoming_return_addr_rtx ()
+#define INCOMING_FRAME_SP_OFFSET   (AVR_3_BYTE_PC ? 3 : 2)
+
+/* The caller's stack pointer value immediately before the call
+   is one byte below the first argument.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL)  -1
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  avr_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct GTY(()) machine_function
+{
+  /* 'true' - if current function is a naked function.  */
+  int is_naked;
+
+  /* 'true' - if current function is an interrupt function 
+     as specified by the "interrupt" attribute.  */
+  int is_interrupt;
+
+  /* 'true' - if current function is a signal function 
+     as specified by the "signal" attribute.  */
+  int is_signal;
+  
+  /* 'true' - if current function is a 'task' function 
+     as specified by the "OS_task" attribute.  */
+  int is_OS_task;
+
+  /* 'true' - if current function is a 'main' function 
+     as specified by the "OS_main" attribute.  */
+  int is_OS_main;
+  
+  /* Current function stack size.  */
+  int stack_usage;
+
+  /* 'true' if a callee might be tail called */
+  int sibcall_fails;
+
+  /* 'true' if the above is_foo predicates are sanity-checked to avoid
+     multiple diagnose for the same function.  */
+  int attributes_checked_p;
+};
+
+/* AVR does not round pushes, but the existence of this macro is
+   required in order for pushes to be generated.  */
+#define PUSH_ROUNDING(X)	(X)
+
+/* Define prototype here to avoid build warning.  Some files using
+   ACCUMULATE_OUTGOING_ARGS (directly or indirectly) include
+   tm.h but not tm_p.h.  */
+extern int avr_accumulate_outgoing_args (void);
+#define ACCUMULATE_OUTGOING_ARGS avr_accumulate_outgoing_args()
+
+#define INIT_EXPANDERS avr_init_expanders()
diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md
new file mode 100644
index 000000000..f2d8605cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr.md
@@ -0,0 +1,6358 @@
+;;   Machine description for GNU compiler,
+;;   for ATMEL AVR micro controllers.
+;;   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+;;   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Special characters after '%':
+;;  A  No effect (add 0).
+;;  B  Add 1 to REG number, MEM address or CONST_INT.
+;;  C  Add 2.
+;;  D  Add 3.
+;;  j  Branch condition.
+;;  k  Reverse branch condition.
+;;..m..Constant Direct Data memory address.
+;;  i  Print the SFR address quivalent of a CONST_INT or a CONST_INT
+;;     RAM address.  The resulting address is suitable to be used in IN/OUT.
+;;  o  Displacement for (mem (plus (reg) (const_int))) operands.
+;;  p  POST_INC or PRE_DEC address as a pointer (X, Y, Z)
+;;  r  POST_INC or PRE_DEC address as a register (r26, r28, r30)
+;;  r  Print a REG without the register prefix 'r'.
+;; T/T Print operand suitable for BLD/BST instruction, i.e. register and
+;;     bit number.  This gets 2 operands: The first %T gets a REG_P and
+;;     just cashes the operand for the next %T.  The second %T gets
+;;     a CONST_INT that represents a bit position.
+;;     Example: With %0 = (reg:HI 18)  and  %1 = (const_int 13)
+;;              "%T0%T1" it will print "r19,5".
+;;     Notice that you must not write a comma between %T0 and %T1.
+;; T/t Similar to above, but don't print the comma and the bit number.
+;;     Example: With %0 = (reg:HI 18)  and  %1 = (const_int 13)
+;;              "%T0%t1" it will print "r19".
+;;..x..Constant Direct Program memory address.
+;;  ~  Output 'r' if not AVR_HAVE_JMP_CALL.
+;;  !  Output 'e' if AVR_HAVE_EIJMP_EICALL.
+
+
+(define_constants
+  [(REG_X       26)
+   (REG_Y       28)
+   (REG_Z       30)
+   (REG_W       24)
+   (REG_SP      32)
+   (LPM_REGNO   0)      ; implicit target register of LPM
+   (TMP_REGNO   0)      ; temporary register r0
+   (ZERO_REGNO  1)      ; zero register r1
+   ])
+
+(define_c_enum "unspec"
+  [UNSPEC_STRLEN
+   UNSPEC_MOVMEM
+   UNSPEC_INDEX_JMP
+   UNSPEC_FMUL
+   UNSPEC_FMULS
+   UNSPEC_FMULSU
+   UNSPEC_COPYSIGN
+   UNSPEC_IDENTITY
+   UNSPEC_INSERT_BITS
+   UNSPEC_ROUND
+   ])
+
+(define_c_enum "unspecv"
+  [UNSPECV_PROLOGUE_SAVES
+   UNSPECV_EPILOGUE_RESTORES
+   UNSPECV_WRITE_SP
+   UNSPECV_GOTO_RECEIVER
+   UNSPECV_ENABLE_IRQS
+   UNSPECV_MEMORY_BARRIER
+   UNSPECV_NOP
+   UNSPECV_SLEEP
+   UNSPECV_WDR
+   UNSPECV_DELAY_CYCLES
+   ])
+
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Condition code settings.
+(define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber,
+                   plus,ldi"
+  (const_string "none"))
+
+(define_attr "type" "branch,branch1,arith,xcall"
+  (const_string "arith"))
+
+;; The size of instructions in bytes.
+;; XXX may depend from "cc"
+
+(define_attr "length" ""
+  (cond [(eq_attr "type" "branch")
+         (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                (const_int -63))
+                            (le (minus (pc) (match_dup 0))
+                                (const_int 62)))
+                       (const_int 1)
+                       (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                              (const_int -2045))
+                                          (le (minus (pc) (match_dup 0))
+                                              (const_int 2045)))
+                                     (const_int 2)
+                                     (const_int 3)))
+         (eq_attr "type" "branch1")
+         (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                (const_int -62))
+                            (le (minus (pc) (match_dup 0))
+                                (const_int 61)))
+                       (const_int 2)
+                       (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                              (const_int -2044))
+                                          (le (minus (pc) (match_dup 0))
+                                              (const_int 2043)))
+                                     (const_int 3)
+                                     (const_int 4)))
+         (eq_attr "type" "xcall")
+         (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                       (const_int 1)
+                       (const_int 2))]
+        (const_int 2)))
+
+;; Lengths of several insns are adjusted in avr.c:adjust_insn_length().
+;; Following insn attribute tells if and how the adjustment has to be
+;; done:
+;;     no     No adjustment needed; attribute "length" is fine.
+;; Otherwise do special processing depending on the attribute.
+
+(define_attr "adjust_len"
+  "out_bitop, plus, addto_sp,
+   tsthi, tstpsi, tstsi, compare, compare64, call,
+   mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32,
+   ufract, sfract, round,
+   xload, lpm, movmem,
+   ashlqi, ashrqi, lshrqi,
+   ashlhi, ashrhi, lshrhi,
+   ashlsi, ashrsi, lshrsi,
+   ashlpsi, ashrpsi, lshrpsi,
+   insert_bits,
+   no"
+  (const_string "no"))
+
+;; Flavours of instruction set architecture (ISA), used in enabled attribute
+
+;; mov  : ISA has no MOVW                movw  : ISA has MOVW
+;; rjmp : ISA has no CALL/JMP            jmp   : ISA has CALL/JMP
+;; ijmp : ISA has no EICALL/EIJMP        eijmp : ISA has EICALL/EIJMP
+;; lpm  : ISA has no LPMX                lpmx  : ISA has LPMX
+;; elpm : ISA has ELPM but no ELPMX      elpmx : ISA has ELPMX
+;; no_xmega: non-XMEGA core              xmega : XMEGA core
+
+(define_attr "isa"
+  "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
+   standard"
+  (const_string "standard"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "isa" "standard")
+         (const_int 1)
+
+         (and (eq_attr "isa" "mov")
+              (match_test "!AVR_HAVE_MOVW"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "movw")
+              (match_test "AVR_HAVE_MOVW"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "rjmp")
+              (match_test "!AVR_HAVE_JMP_CALL"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "jmp")
+              (match_test "AVR_HAVE_JMP_CALL"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "ijmp")
+              (match_test "!AVR_HAVE_EIJMP_EICALL"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "eijmp")
+              (match_test "AVR_HAVE_EIJMP_EICALL"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "lpm")
+              (match_test "!AVR_HAVE_LPMX"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "lpmx")
+              (match_test "AVR_HAVE_LPMX"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "elpm")
+              (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "elpmx")
+              (match_test "AVR_HAVE_ELPMX"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "xmega")
+              (match_test "AVR_XMEGA"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "no_xmega")
+              (match_test "!AVR_XMEGA"))
+         (const_int 1)
+         ] (const_int 0)))
+
+
+;; Define mode iterators
+(define_mode_iterator QIHI  [QI HI])
+(define_mode_iterator QIHI2 [QI HI])
+(define_mode_iterator QISI  [QI HI PSI SI])
+(define_mode_iterator QIDI  [QI HI PSI SI DI])
+(define_mode_iterator HISI  [HI PSI SI])
+
+(define_mode_iterator ALL1 [QI QQ UQQ])
+(define_mode_iterator ALL2 [HI HQ UHQ HA UHA])
+(define_mode_iterator ALL4 [SI SQ USQ SA USA])
+
+;; All supported move-modes
+(define_mode_iterator MOVMODE [QI QQ UQQ
+                               HI HQ UHQ HA UHA
+                               SI SQ USQ SA USA
+                               SF PSI])
+
+;; Supported ordered modes that are 2, 3, 4 bytes wide
+(define_mode_iterator ORDERED234 [HI SI PSI
+                                  HQ UHQ HA UHA
+                                  SQ USQ SA USA])
+
+;; Define code iterators
+;; Define two incarnations so that we can build the cross product.
+(define_code_iterator any_extend  [sign_extend zero_extend])
+(define_code_iterator any_extend2 [sign_extend zero_extend])
+
+(define_code_iterator xior [xor ior])
+(define_code_iterator eqne [eq ne])
+
+(define_code_iterator ss_addsub [ss_plus ss_minus])
+(define_code_iterator us_addsub [us_plus us_minus])
+(define_code_iterator ss_abs_neg [ss_abs ss_neg])
+
+;; Define code attributes
+(define_code_attr extend_su
+  [(sign_extend "s")
+   (zero_extend "u")])
+
+(define_code_attr extend_u
+  [(sign_extend "")
+   (zero_extend "u")])
+
+(define_code_attr extend_s
+  [(sign_extend "s")
+   (zero_extend "")])
+
+;; Constrain input operand of widening multiply, i.e. MUL resp. MULS.
+(define_code_attr mul_r_d
+  [(zero_extend "r")
+   (sign_extend "d")])
+
+(define_code_attr abelian
+  [(ss_minus "") (us_minus "")
+   (ss_plus "%") (us_plus "%")])
+
+;; Map RTX code to its standard insn name
+(define_code_attr code_stdname
+  [(ashift   "ashl")
+   (ashiftrt "ashr")
+   (lshiftrt "lshr")
+   (ior      "ior")
+   (xor      "xor")
+   (rotate   "rotl")
+   (ss_plus  "ssadd")  (ss_minus "sssub")  (ss_neg "ssneg")  (ss_abs "ssabs")
+   (us_plus  "usadd")  (us_minus "ussub")  (us_neg "usneg")
+   ])
+
+;;========================================================================
+;; The following is used by nonlocal_goto and setjmp.
+;; The receiver pattern will create no instructions since internally
+;; virtual_stack_vars = hard_frame_pointer + 1 so the RTL become R28=R28
+;; This avoids creating add/sub offsets in frame_pointer save/resore.
+;; The 'null' receiver also avoids  problems with optimisation
+;; not recognising incoming jmp and removing code that resets frame_pointer.
+;; The code derived from builtins.c.
+
+(define_expand "nonlocal_goto_receiver"
+  [(set (reg:HI REG_Y)
+        (unspec_volatile:HI [(const_int 0)] UNSPECV_GOTO_RECEIVER))]
+  ""
+  {
+    emit_move_insn (virtual_stack_vars_rtx,
+                    gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
+                                  gen_int_mode (STARTING_FRAME_OFFSET,
+                                                Pmode)));
+    /* ; This might change the hard frame pointer in ways that aren't
+       ; apparent to early optimization passes, so force a clobber.  */
+    emit_clobber (hard_frame_pointer_rtx);
+    DONE;
+  })
+
+
+;; Defining nonlocal_goto_receiver means we must also define this.
+;; even though its function is identical to that in builtins.c
+
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand"))
+   (use (match_operand 1 "general_operand"))
+   (use (match_operand 2 "general_operand"))
+   (use (match_operand 3 "general_operand"))]
+  ""
+  {
+    rtx r_label = copy_to_reg (operands[1]);
+    rtx r_fp = operands[3];
+    rtx r_sp = operands[2];
+
+    emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+
+    emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+    emit_move_insn (hard_frame_pointer_rtx, r_fp);
+    emit_stack_restore (SAVE_NONLOCAL, r_sp);
+
+    emit_use (hard_frame_pointer_rtx);
+    emit_use (stack_pointer_rtx);
+
+    emit_indirect_jump (r_label);
+
+    DONE;
+  })
+
+;; "pushqi1"
+;; "pushqq1"  "pushuqq1"
+(define_insn "push<mode>1"
+  [(set (mem:ALL1 (post_dec:HI (reg:HI REG_SP)))
+        (match_operand:ALL1 0 "reg_or_0_operand" "r,Y00"))]
+  ""
+  "@
+	push %0
+	push __zero_reg__"
+  [(set_attr "length" "1,1")])
+
+;; All modes for a multi-byte push.  We must include complex modes here too,
+;; lest emit_single_push_insn "helpfully" create the auto-inc itself.
+(define_mode_iterator MPUSH
+  [CQI
+   HI CHI HA UHA HQ UHQ
+   SI CSI SA USA SQ USQ
+   DI CDI DA UDA DQ UDQ
+   TA UTA
+   SF SC
+   PSI])
+
+(define_expand "push<mode>1"
+  [(match_operand:MPUSH 0 "" "")]
+  ""
+  {
+    int i;
+    for (i = GET_MODE_SIZE (<MODE>mode) - 1; i >= 0; --i)
+      {
+        rtx part = simplify_gen_subreg (QImode, operands[0], <MODE>mode, i);
+        if (part != const0_rtx)
+          part = force_reg (QImode, part);
+        emit_insn (gen_pushqi1 (part));
+      }
+    DONE;
+  })
+
+;; Notice a special-case when adding N to SP where N results in a
+;; zero REG_ARGS_SIZE.  This is equivalent to a move from FP.
+(define_split
+  [(set (reg:HI REG_SP)
+        (match_operand:HI 0 "register_operand" ""))]
+  "reload_completed
+   && frame_pointer_needed
+   && !cfun->calls_alloca
+   && find_reg_note (insn, REG_ARGS_SIZE, const0_rtx)"
+  [(set (reg:HI REG_SP)
+        (reg:HI REG_Y))])
+
+;;========================================================================
+;; Move stuff around
+
+;; Secondary input reload from non-generic 16-bit address spaces
+(define_insn "reload_in<mode>"
+  [(set (match_operand:MOVMODE 0 "register_operand"   "=r")
+        (match_operand:MOVMODE 1 "flash_operand"       "m"))
+   (clobber (match_operand:QI 2 "d_register_operand"  "=d"))]
+  ;; Fixme: The insn condition must not test the address space.
+  ;;   Because the gen tools refuse to generate insns for address spaces
+  ;;   and will generate insn-codes.h to look like:
+  ;;   #define CODE_FOR_reload_inhi CODE_FOR_nothing
+  "reload_completed || reload_in_progress"
+  {
+    return avr_out_lpm (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "lpm")
+   (set_attr "cc" "clobber")])
+
+
+;; "loadqi_libgcc"
+;; "loadhi_libgcc"
+;; "loadpsi_libgcc"
+;; "loadsi_libgcc"
+;; "loadsf_libgcc"
+(define_expand "load<mode>_libgcc"
+  [(set (match_dup 3)
+        (match_dup 2))
+   (set (reg:MOVMODE 22)
+        (match_operand:MOVMODE 1 "memory_operand" ""))
+   (set (match_operand:MOVMODE 0 "register_operand" "")
+        (reg:MOVMODE 22))]
+  "avr_load_libgcc_p (operands[1])"
+  {
+    operands[3] = gen_rtx_REG (HImode, REG_Z);
+    operands[2] = force_operand (XEXP (operands[1], 0), NULL_RTX);
+    operands[1] = replace_equiv_address (operands[1], operands[3]);
+    set_mem_addr_space (operands[1], ADDR_SPACE_FLASH);
+  })
+
+;; "load_qi_libgcc"
+;; "load_hi_libgcc"
+;; "load_psi_libgcc"
+;; "load_si_libgcc"
+;; "load_sf_libgcc"
+(define_insn "load_<mode>_libgcc"
+  [(set (reg:MOVMODE 22)
+        (match_operand:MOVMODE 0 "memory_operand" "m,m"))]
+  "avr_load_libgcc_p (operands[0])
+   && REG_P (XEXP (operands[0], 0))
+   && REG_Z == REGNO (XEXP (operands[0], 0))"
+  {
+    operands[0] = GEN_INT (GET_MODE_SIZE (<MODE>mode));
+    return "%~call __load_%0";
+  }
+  [(set_attr "length" "1,2")
+   (set_attr "isa" "rjmp,jmp")
+   (set_attr "cc" "clobber")])
+
+
+;; "xload8qi_A"
+;; "xload8qq_A" "xload8uqq_A"
+(define_insn_and_split "xload8<mode>_A"
+  [(set (match_operand:ALL1 0 "register_operand" "=r")
+        (match_operand:ALL1 1 "memory_operand"    "m"))
+   (clobber (reg:HI REG_Z))]
+  "can_create_pseudo_p()
+   && !avr_xload_libgcc_p (<MODE>mode)
+   && avr_mem_memx_p (operands[1])
+   && REG_P (XEXP (operands[1], 0))"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(clobber (const_int 0))]
+  {
+    /* ; Split away the high part of the address.  GCC's register allocator
+       ; in not able to allocate segment registers and reload the resulting
+       ; expressions.  Notice that no address register can hold a PSImode.  */
+
+    rtx insn, addr = XEXP (operands[1], 0);
+    rtx hi8 = gen_reg_rtx (QImode);
+    rtx reg_z = gen_rtx_REG (HImode, REG_Z);
+
+    emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0));
+    emit_move_insn (hi8, simplify_gen_subreg (QImode, addr, PSImode, 2));
+
+    insn = emit_insn (gen_xload<mode>_8 (operands[0], hi8));
+    set_mem_addr_space (SET_SRC (single_set (insn)),
+                                 MEM_ADDR_SPACE (operands[1]));
+    DONE;
+  })
+
+;; "xloadqi_A" "xloadqq_A" "xloaduqq_A"
+;; "xloadhi_A" "xloadhq_A" "xloaduhq_A" "xloadha_A" "xloaduha_A"
+;; "xloadsi_A" "xloadsq_A" "xloadusq_A" "xloadsa_A" "xloadusa_A"
+;; "xloadpsi_A"
+;; "xloadsf_A"
+(define_insn_and_split "xload<mode>_A"
+  [(set (match_operand:MOVMODE 0 "register_operand" "=r")
+        (match_operand:MOVMODE 1 "memory_operand"    "m"))
+   (clobber (reg:MOVMODE 22))
+   (clobber (reg:QI 21))
+   (clobber (reg:HI REG_Z))]
+  "can_create_pseudo_p()
+   && avr_mem_memx_p (operands[1])
+   && REG_P (XEXP (operands[1], 0))"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(clobber (const_int 0))]
+  {
+    rtx addr = XEXP (operands[1], 0);
+    rtx reg_z = gen_rtx_REG (HImode, REG_Z);
+    rtx addr_hi8 = simplify_gen_subreg (QImode, addr, PSImode, 2);
+    addr_space_t as = MEM_ADDR_SPACE (operands[1]);
+    rtx insn;
+
+    /* Split the address to R21:Z */
+    emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0));
+    emit_move_insn (gen_rtx_REG (QImode, 21), addr_hi8);
+
+    /* Load with code from libgcc */
+    insn = emit_insn (gen_xload_<mode>_libgcc ());
+    set_mem_addr_space (SET_SRC (single_set (insn)), as);
+
+    /* Move to destination */
+    emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, 22));
+
+    DONE;
+  })
+
+;; Move value from address space memx to a register
+;; These insns must be prior to respective generic move insn.
+
+;; "xloadqi_8"
+;; "xloadqq_8" "xloaduqq_8"
+(define_insn "xload<mode>_8"
+  [(set (match_operand:ALL1 0 "register_operand"                   "=&r,r")
+        (mem:ALL1 (lo_sum:PSI (match_operand:QI 1 "register_operand" "r,r")
+                              (reg:HI REG_Z))))]
+  "!avr_xload_libgcc_p (<MODE>mode)"
+  {
+    return avr_out_xload (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,4")
+   (set_attr "adjust_len" "*,xload")
+   (set_attr "isa" "lpmx,lpm")
+   (set_attr "cc" "none")])
+
+;; R21:Z : 24-bit source address
+;; R22   : 1-4 byte output
+
+;; "xload_qi_libgcc" "xload_qq_libgcc" "xload_uqq_libgcc"
+;; "xload_hi_libgcc" "xload_hq_libgcc" "xload_uhq_libgcc" "xload_ha_libgcc" "xload_uha_libgcc"
+;; "xload_si_libgcc" "xload_sq_libgcc" "xload_usq_libgcc" "xload_sa_libgcc" "xload_usa_libgcc"
+;; "xload_sf_libgcc"
+;; "xload_psi_libgcc"
+(define_insn "xload_<mode>_libgcc"
+  [(set (reg:MOVMODE 22)
+        (mem:MOVMODE (lo_sum:PSI (reg:QI 21)
+                                 (reg:HI REG_Z))))
+   (clobber (reg:QI 21))
+   (clobber (reg:HI REG_Z))]
+  "avr_xload_libgcc_p (<MODE>mode)"
+  {
+    rtx x_bytes = GEN_INT (GET_MODE_SIZE (<MODE>mode));
+
+    output_asm_insn ("%~call __xload_%0", &x_bytes);
+    return "";
+  }
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; General move expanders
+
+;; "movqi" "movqq" "movuqq"
+;; "movhi" "movhq" "movuhq" "movha" "movuha"
+;; "movsi" "movsq" "movusq" "movsa" "movusa"
+;; "movsf"
+;; "movpsi"
+(define_expand "mov<mode>"
+  [(set (match_operand:MOVMODE 0 "nonimmediate_operand" "")
+        (match_operand:MOVMODE 1 "general_operand" ""))]
+  ""
+  {
+    rtx dest = operands[0];
+    rtx src  = operands[1];
+
+    if (avr_mem_flash_p (dest))
+      DONE;
+
+    /* One of the operands has to be in a register.  */
+    if (!register_operand (dest, <MODE>mode)
+        && !reg_or_0_operand (src, <MODE>mode))
+      {
+        operands[1] = src = copy_to_mode_reg (<MODE>mode, src);
+      }
+
+    if (avr_mem_memx_p (src))
+      {
+        rtx addr = XEXP (src, 0);
+
+        if (!REG_P (addr))
+          src = replace_equiv_address (src, copy_to_mode_reg (PSImode, addr));
+
+        if (!avr_xload_libgcc_p (<MODE>mode))
+          /* ; No <mode> here because gen_xload8<mode>_A only iterates over ALL1.
+             ; insn-emit does not depend on the mode, it's all about operands.  */
+          emit_insn (gen_xload8qi_A (dest, src));
+        else
+          emit_insn (gen_xload<mode>_A (dest, src));
+
+        DONE;
+      }
+
+    if (avr_load_libgcc_p (src))
+      {
+        /* For the small devices, do loads per libgcc call.  */
+        emit_insn (gen_load<mode>_libgcc (dest, src));
+        DONE;
+      }
+  })
+
+;;========================================================================
+;; move byte
+;; The last alternative (any immediate constant to any register) is
+;; very expensive.  It should be optimized by peephole2 if a scratch
+;; register is available, but then that register could just as well be
+;; allocated for the variable we are loading.  But, most of NO_LD_REGS
+;; are call-saved registers, and most of LD_REGS are call-used registers,
+;; so this may still be a win for registers live across function calls.
+
+;; "movqi_insn"
+;; "movqq_insn" "movuqq_insn"
+(define_insn "mov<mode>_insn"
+  [(set (match_operand:ALL1 0 "nonimmediate_operand" "=r    ,d    ,Qm   ,r ,q,r,*r")
+        (match_operand:ALL1 1 "nox_general_operand"   "r Y00,n Ynn,r Y00,Qm,r,q,i"))]
+  "register_operand (operands[0], <MODE>mode)
+   || reg_or_0_operand (operands[1], <MODE>mode)"
+  {
+    return output_movqi (insn, operands, NULL);
+  }
+  [(set_attr "length" "1,1,5,5,1,1,4")
+   (set_attr "adjust_len" "mov8")
+   (set_attr "cc" "ldi,none,clobber,clobber,none,none,clobber")])
+
+;; This is used in peephole2 to optimize loading immediate constants
+;; if a scratch register from LD_REGS happens to be available.
+
+;; "*reload_inqi"
+;; "*reload_inqq" "*reload_inuqq"
+(define_insn "*reload_in<mode>"
+  [(set (match_operand:ALL1 0 "register_operand"    "=l")
+        (match_operand:ALL1 1 "const_operand"        "i"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  "ldi %2,lo8(%1)
+	mov %0,%2"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_peephole2
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:ALL1 0 "l_register_operand" "")
+        (match_operand:ALL1 1 "const_operand" ""))]
+  ; No need for a clobber reg for 0x0, 0x01 or 0xff
+  "!satisfies_constraint_Y00 (operands[1])
+   && !satisfies_constraint_Y01 (operands[1])
+   && !satisfies_constraint_Ym1 (operands[1])"
+  [(parallel [(set (match_dup 0)
+                   (match_dup 1))
+              (clobber (match_dup 2))])])
+
+;;============================================================================
+;; move word (16 bit)
+
+;; Move register $1 to the Stack Pointer register SP.
+;; This insn is emit during function prologue/epilogue generation.
+;;    $2 =  0: We know that IRQs are off
+;;    $2 =  1: We know that IRQs are on
+;;    $2 =  2: SP has 8 bits only, IRQ state does not matter
+;;    $2 = -1: We don't know anything about IRQ on/off
+;; Always write SP via unspec, see PR50063
+
+(define_insn "movhi_sp_r"
+  [(set (match_operand:HI 0 "stack_register_operand"                "=q,q,q,q,q")
+        (unspec_volatile:HI [(match_operand:HI 1 "register_operand"  "r,r,r,r,r")
+                             (match_operand:HI 2 "const_int_operand" "L,P,N,K,LPN")]
+                            UNSPECV_WRITE_SP))]
+  ""
+  "@
+	out %B0,%B1\;out %A0,%A1
+	cli\;out %B0,%B1\;sei\;out %A0,%A1
+	in __tmp_reg__,__SREG__\;cli\;out %B0,%B1\;out __SREG__,__tmp_reg__\;out %A0,%A1
+	out %A0,%A1
+	out %A0,%A1\;out %B0,%B1"
+  [(set_attr "length" "2,4,5,1,2")
+   (set_attr "isa" "no_xmega,no_xmega,no_xmega,*,xmega")
+   (set_attr "cc" "none")])
+
+(define_peephole2
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:ALL2 0 "l_register_operand" "")
+        (match_operand:ALL2 1 "const_or_immediate_operand" ""))]
+  "operands[1] != CONST0_RTX (<MODE>mode)"
+  [(parallel [(set (match_dup 0)
+                   (match_dup 1))
+              (clobber (match_dup 2))])])
+
+;; '*' because it is not used in rtl generation, only in above peephole
+;; "*reload_inhi"
+;; "*reload_inhq" "*reload_inuhq"
+;; "*reload_inha" "*reload_inuha"
+(define_insn "*reload_in<mode>"
+  [(set (match_operand:ALL2 0 "l_register_operand"  "=l")
+        (match_operand:ALL2 1 "immediate_operand"    "i"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  {
+    return output_reload_inhi (operands, operands[2], NULL);
+  }
+  [(set_attr "length" "4")
+   (set_attr "adjust_len" "reload_in16")
+   (set_attr "cc" "clobber")])
+
+;; "*movhi"
+;; "*movhq" "*movuhq"
+;; "*movha" "*movuha"
+(define_insn "*mov<mode>"
+  [(set (match_operand:ALL2 0 "nonimmediate_operand" "=r,r  ,r,m    ,d,*r,q,r")
+        (match_operand:ALL2 1 "nox_general_operand"   "r,Y00,m,r Y00,i,i ,r,q"))]
+  "register_operand (operands[0], <MODE>mode)
+   || reg_or_0_operand (operands[1], <MODE>mode)"
+  {
+    return output_movhi (insn, operands, NULL);
+  }
+  [(set_attr "length" "2,2,6,7,2,6,5,2")
+   (set_attr "adjust_len" "mov16")
+   (set_attr "cc" "none,none,clobber,clobber,none,clobber,none,none")])
+
+(define_peephole2 ; movw
+  [(set (match_operand:ALL1 0 "even_register_operand" "")
+        (match_operand:ALL1 1 "even_register_operand" ""))
+   (set (match_operand:ALL1 2 "odd_register_operand" "")
+        (match_operand:ALL1 3 "odd_register_operand" ""))]
+  "AVR_HAVE_MOVW
+   && REGNO (operands[0]) == REGNO (operands[2]) - 1
+   && REGNO (operands[1]) == REGNO (operands[3]) - 1"
+  [(set (match_dup 4)
+        (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[0]));
+    operands[5] = gen_rtx_REG (HImode, REGNO (operands[1]));
+  })
+
+(define_peephole2 ; movw_r
+  [(set (match_operand:ALL1 0 "odd_register_operand" "")
+        (match_operand:ALL1 1 "odd_register_operand" ""))
+   (set (match_operand:ALL1 2 "even_register_operand" "")
+        (match_operand:ALL1 3 "even_register_operand" ""))]
+  "AVR_HAVE_MOVW
+   && REGNO (operands[2]) == REGNO (operands[0]) - 1
+   && REGNO (operands[3]) == REGNO (operands[1]) - 1"
+  [(set (match_dup 4)
+        (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[2]));
+    operands[5] = gen_rtx_REG (HImode, REGNO (operands[3]));
+  })
+
+;; For LPM loads from AS1 we split
+;;    R = *Z
+;; to
+;;    R = *Z++
+;;    Z = Z - sizeof (R)
+;;
+;; so that the second instruction can be optimized out.
+
+(define_split ; "split-lpmx"
+  [(set (match_operand:HISI 0 "register_operand" "")
+        (match_operand:HISI 1 "memory_operand" ""))]
+  "reload_completed
+   && AVR_HAVE_LPMX"
+  [(set (match_dup 0)
+        (match_dup 2))
+   (set (match_dup 3)
+        (plus:HI (match_dup 3)
+                 (match_dup 4)))]
+  {
+     rtx addr = XEXP (operands[1], 0);
+
+     if (!avr_mem_flash_p (operands[1])
+         || !REG_P (addr)
+         || reg_overlap_mentioned_p (addr, operands[0]))
+       {
+         FAIL;
+       }
+
+    operands[2] = replace_equiv_address (operands[1],
+                                         gen_rtx_POST_INC (Pmode, addr));
+    operands[3] = addr;
+    operands[4] = gen_int_mode (-GET_MODE_SIZE (<MODE>mode), HImode);
+  })
+
+;;==========================================================================
+;; xpointer move (24 bit)
+
+(define_peephole2 ; *reload_inpsi
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:PSI 0 "l_register_operand" "")
+        (match_operand:PSI 1 "immediate_operand" ""))
+   (match_dup 2)]
+  "operands[1] != const0_rtx
+   && operands[1] != constm1_rtx"
+  [(parallel [(set (match_dup 0)
+                   (match_dup 1))
+              (clobber (match_dup 2))])])
+
+;; '*' because it is not used in rtl generation.
+(define_insn "*reload_inpsi"
+  [(set (match_operand:PSI 0 "register_operand" "=r")
+        (match_operand:PSI 1 "immediate_operand" "i"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  {
+    return avr_out_reload_inpsi (operands, operands[2], NULL);
+  }
+  [(set_attr "length" "6")
+   (set_attr "adjust_len" "reload_in24")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movpsi"
+  [(set (match_operand:PSI 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r")
+        (match_operand:PSI 1 "nox_general_operand"   "r,L,Qm,rL,i ,i"))]
+  "register_operand (operands[0], PSImode)
+   || register_operand (operands[1], PSImode)
+   || const0_rtx == operands[1]"
+  {
+    return avr_out_movpsi (insn, operands, NULL);
+  }
+  [(set_attr "length" "3,3,8,9,4,10")
+   (set_attr "adjust_len" "mov24")
+   (set_attr "cc" "none,none,clobber,clobber,none,clobber")])
+
+;;==========================================================================
+;; move double word (32 bit)
+
+(define_peephole2 ; *reload_insi
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:ALL4 0 "l_register_operand" "")
+        (match_operand:ALL4 1 "immediate_operand" ""))
+   (match_dup 2)]
+  "operands[1] != CONST0_RTX (<MODE>mode)"
+  [(parallel [(set (match_dup 0)
+                   (match_dup 1))
+              (clobber (match_dup 2))])])
+
+;; '*' because it is not used in rtl generation.
+;; "*reload_insi"
+;; "*reload_insq" "*reload_inusq"
+;; "*reload_insa" "*reload_inusa"
+(define_insn "*reload_insi"
+  [(set (match_operand:ALL4 0 "register_operand"   "=r")
+        (match_operand:ALL4 1 "immediate_operand"   "n Ynn"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  {
+    return output_reload_insisf (operands, operands[2], NULL);
+  }
+  [(set_attr "length" "8")
+   (set_attr "adjust_len" "reload_in32")
+   (set_attr "cc" "clobber")])
+
+
+;; "*movsi"
+;; "*movsq" "*movusq"
+;; "*movsa" "*movusa"
+(define_insn "*mov<mode>"
+  [(set (match_operand:ALL4 0 "nonimmediate_operand" "=r,r  ,r ,Qm   ,!d,r")
+        (match_operand:ALL4 1 "nox_general_operand"   "r,Y00,Qm,r Y00,i ,i"))]
+  "register_operand (operands[0], <MODE>mode)
+   || reg_or_0_operand (operands[1], <MODE>mode)"
+  {
+    return output_movsisf (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,4,8,9,4,10")
+   (set_attr "adjust_len" "mov32")
+   (set_attr "cc" "none,none,clobber,clobber,none,clobber")])
+
+;; fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+;; move floating point numbers (32 bit)
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r")
+        (match_operand:SF 1 "nox_general_operand"   "r,G,Qm,rG,F ,F"))]
+  "register_operand (operands[0], SFmode)
+   || reg_or_0_operand (operands[1], SFmode)"
+  {
+    return output_movsisf (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,4,8,9,4,10")
+   (set_attr "adjust_len" "mov32")
+   (set_attr "cc" "none,none,clobber,clobber,none,clobber")])
+
+(define_peephole2 ; *reload_insf
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:SF 0 "l_register_operand" "")
+        (match_operand:SF 1 "const_double_operand" ""))
+   (match_dup 2)]
+  "operands[1] != CONST0_RTX (SFmode)"
+  [(parallel [(set (match_dup 0)
+                   (match_dup 1))
+              (clobber (match_dup 2))])])
+
+;; '*' because it is not used in rtl generation.
+(define_insn "*reload_insf"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (match_operand:SF 1 "const_double_operand" "F"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  {
+    return output_reload_insisf (operands, operands[2], NULL);
+  }
+  [(set_attr "length" "8")
+   (set_attr "adjust_len" "reload_in32")
+   (set_attr "cc" "clobber")])
+
+;;=========================================================================
+;; move string (like memcpy)
+
+(define_expand "movmemhi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+                   (match_operand:BLK 1 "memory_operand" ""))
+              (use (match_operand:HI 2 "const_int_operand" ""))
+              (use (match_operand:HI 3 "const_int_operand" ""))])]
+  ""
+  {
+    if (avr_emit_movmemhi (operands))
+      DONE;
+
+    FAIL;
+  })
+
+(define_mode_attr MOVMEM_r_d [(QI "r")
+                              (HI "wd")])
+
+;; $0     : Address Space
+;; $1, $2 : Loop register
+;; R30    : source address
+;; R26    : destination address
+
+;; "movmem_qi"
+;; "movmem_hi"
+(define_insn "movmem_<mode>"
+  [(set (mem:BLK (reg:HI REG_X))
+        (mem:BLK (reg:HI REG_Z)))
+   (unspec [(match_operand:QI 0 "const_int_operand" "n")]
+           UNSPEC_MOVMEM)
+   (use (match_operand:QIHI 1 "register_operand" "<MOVMEM_r_d>"))
+   (clobber (reg:HI REG_X))
+   (clobber (reg:HI REG_Z))
+   (clobber (reg:QI LPM_REGNO))
+   (clobber (match_operand:QIHI 2 "register_operand" "=1"))]
+  ""
+  {
+    return avr_out_movmem (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "movmem")
+   (set_attr "cc" "clobber")])
+
+
+;; $0    : Address Space
+;; $1    : RAMPZ RAM address
+;; R24   : #bytes and loop register
+;; R23:Z : 24-bit source address
+;; R26   : 16-bit destination address
+
+;; "movmemx_qi"
+;; "movmemx_hi"
+(define_insn "movmemx_<mode>"
+  [(set (mem:BLK (reg:HI REG_X))
+        (mem:BLK (lo_sum:PSI (reg:QI 23)
+                             (reg:HI REG_Z))))
+   (unspec [(match_operand:QI 0 "const_int_operand" "n")]
+           UNSPEC_MOVMEM)
+   (use (reg:QIHI 24))
+   (clobber (reg:HI REG_X))
+   (clobber (reg:HI REG_Z))
+   (clobber (reg:QI LPM_REGNO))
+   (clobber (reg:HI 24))
+   (clobber (reg:QI 23))
+   (clobber (mem:QI (match_operand:QI 1 "io_address_operand" "n")))]
+  ""
+  "%~call __movmemx_<mode>"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2
+;; memset (%0, %2, %1)
+
+(define_expand "setmemhi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+                   (match_operand 2 "const_int_operand" ""))
+              (use (match_operand:HI 1 "const_int_operand" ""))
+              (use (match_operand:HI 3 "const_int_operand" ""))
+              (clobber (match_scratch:HI 4 ""))
+              (clobber (match_dup 5))])]
+  ""
+  {
+    rtx addr0;
+    enum machine_mode mode;
+
+    /* If value to set is not zero, use the library routine.  */
+    if (operands[2] != const0_rtx)
+      FAIL;
+
+    if (!CONST_INT_P (operands[1]))
+      FAIL;
+
+    mode = u8_operand (operands[1], VOIDmode) ? QImode : HImode;
+    operands[5] = gen_rtx_SCRATCH (mode);
+    operands[1] = copy_to_mode_reg (mode,
+                                    gen_int_mode (INTVAL (operands[1]), mode));
+    addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+    operands[0] = gen_rtx_MEM (BLKmode, addr0);
+  })
+
+
+(define_insn "*clrmemqi"
+  [(set (mem:BLK (match_operand:HI 0 "register_operand" "e"))
+        (const_int 0))
+   (use (match_operand:QI 1 "register_operand" "r"))
+   (use (match_operand:QI 2 "const_int_operand" "n"))
+   (clobber (match_scratch:HI 3 "=0"))
+   (clobber (match_scratch:QI 4 "=&1"))]
+  ""
+  "0:\;st %a0+,__zero_reg__\;dec %1\;brne 0b"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+
+(define_insn "*clrmemhi"
+  [(set (mem:BLK (match_operand:HI 0 "register_operand" "e,e"))
+        (const_int 0))
+   (use (match_operand:HI 1 "register_operand" "!w,d"))
+   (use (match_operand:HI 2 "const_int_operand" "n,n"))
+   (clobber (match_scratch:HI 3 "=0,0"))
+   (clobber (match_scratch:HI 4 "=&1,&1"))]
+  ""
+  "@
+	0:\;st %a0+,__zero_reg__\;sbiw %A1,1\;brne 0b
+	0:\;st %a0+,__zero_reg__\;subi %A1,1\;sbci %B1,0\;brne 0b"
+  [(set_attr "length" "3,4")
+   (set_attr "cc" "clobber,clobber")])
+
+(define_expand "strlenhi"
+  [(set (match_dup 4)
+        (unspec:HI [(match_operand:BLK 1 "memory_operand" "")
+                    (match_operand:QI 2 "const_int_operand" "")
+                    (match_operand:HI 3 "immediate_operand" "")]
+                   UNSPEC_STRLEN))
+   (set (match_dup 4)
+        (plus:HI (match_dup 4)
+                 (const_int -1)))
+   (parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (minus:HI (match_dup 4)
+                             (match_dup 5)))
+              (clobber (scratch:QI))])]
+  ""
+  {
+    rtx addr;
+    if (operands[2] != const0_rtx)
+      FAIL;
+    addr = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+    operands[1] = gen_rtx_MEM (BLKmode, addr);
+    operands[5] = addr;
+    operands[4] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*strlenhi"
+  [(set (match_operand:HI 0 "register_operand"                      "=e")
+        (unspec:HI [(mem:BLK (match_operand:HI 1 "register_operand"  "0"))
+                    (const_int 0)
+                    (match_operand:HI 2 "immediate_operand"          "i")]
+                   UNSPEC_STRLEN))]
+  ""
+  "0:\;ld __tmp_reg__,%a0+\;tst __tmp_reg__\;brne 0b"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+; add bytes
+
+;; "addqi3"
+;; "addqq3" "adduqq3"
+(define_insn "add<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand"            "=r,d    ,r  ,r  ,r  ,r")
+        (plus:ALL1 (match_operand:ALL1 1 "register_operand" "%0,0    ,0  ,0  ,0  ,0")
+                   (match_operand:ALL1 2 "nonmemory_operand" "r,n Ynn,Y01,Ym1,Y02,Ym2")))]
+  ""
+  "@
+	add %0,%2
+	subi %0,lo8(-(%2))
+	inc %0
+	dec %0
+	inc %0\;inc %0
+	dec %0\;dec %0"
+  [(set_attr "length" "1,1,1,1,2,2")
+   (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")])
+
+;; "addhi3"
+;; "addhq3" "adduhq3"
+;; "addha3" "adduha3"
+(define_expand "add<mode>3"
+  [(set (match_operand:ALL2 0 "register_operand" "")
+        (plus:ALL2 (match_operand:ALL2 1 "register_operand" "")
+                   (match_operand:ALL2 2 "nonmemory_or_const_operand" "")))]
+  ""
+  {
+    if (CONST_INT_P (operands[2]))
+      {
+        operands[2] = gen_int_mode (INTVAL (operands[2]), HImode);
+
+        if (can_create_pseudo_p()
+            && !stack_register_operand (operands[0], HImode)
+            && !stack_register_operand (operands[1], HImode)
+            && !d_register_operand (operands[0], HImode)
+            && !d_register_operand (operands[1], HImode))
+          {
+            emit_insn (gen_addhi3_clobber (operands[0], operands[1], operands[2]));
+            DONE;
+          }
+      }
+
+    if (CONST_FIXED_P (operands[2]))
+      {
+        emit_insn (gen_add<mode>3_clobber (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  })
+
+
+(define_insn "*addhi3_zero_extend"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%1\;adc %B0,__zero_reg__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addhi3_zero_extend1"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (plus:HI (match_operand:HI 1 "register_operand"                 "0")
+                 (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "add %A0,%2\;adc %B0,__zero_reg__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addhi3.sign_extend1"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (plus:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "register_operand"                 "0")))]
+  ""
+  {
+    return reg_overlap_mentioned_p (operands[0], operands[1])
+      ? "mov __tmp_reg__,%1\;add %A0,%1\;adc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;dec %B0"
+      : "add %A0,%1\;adc %B0,__zero_reg__\;sbrc %1,7\;dec %B0";
+  }
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addhi3_sp"
+  [(set (match_operand:HI 1 "stack_register_operand"           "=q")
+        (plus:HI (match_operand:HI 2 "stack_register_operand"   "q")
+                 (match_operand:HI 0 "avr_sp_immediate_operand" "Csp")))]
+  ""
+  {
+    return avr_out_addto_sp (operands, NULL);
+  }
+  [(set_attr "length" "6")
+   (set_attr "adjust_len" "addto_sp")])
+
+;; "*addhi3"
+;; "*addhq3" "*adduhq3"
+;; "*addha3" "*adduha3"
+(define_insn "*add<mode>3"
+  [(set (match_operand:ALL2 0 "register_operand"                   "=??r,d,!w    ,d")
+        (plus:ALL2 (match_operand:ALL2 1 "register_operand"          "%0,0,0     ,0")
+                   (match_operand:ALL2 2 "nonmemory_or_const_operand" "r,s,IJ YIJ,n Ynn")))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "length" "2")
+   (set_attr "adjust_len" "plus")
+   (set_attr "cc" "plus")])
+
+;; Adding a constant to NO_LD_REGS might have lead to a reload of
+;; that constant to LD_REGS.  We don't add a scratch to *addhi3
+;; itself because that insn is special to reload.
+
+(define_peephole2 ; addhi3_clobber
+  [(set (match_operand:ALL2 0 "d_register_operand" "")
+        (match_operand:ALL2 1 "const_operand" ""))
+   (set (match_operand:ALL2 2 "l_register_operand" "")
+        (plus:ALL2 (match_dup 2)
+                   (match_dup 0)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (match_dup 2)
+                   (plus:ALL2 (match_dup 2)
+                              (match_dup 1)))
+              (clobber (match_dup 3))])]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, 0);
+  })
+
+;; Same, but with reload to NO_LD_REGS
+;; Combine *reload_inhi with *addhi3
+
+(define_peephole2 ; addhi3_clobber
+  [(parallel [(set (match_operand:ALL2 0 "l_register_operand" "")
+                   (match_operand:ALL2 1 "const_operand" ""))
+              (clobber (match_operand:QI 2 "d_register_operand" ""))])
+   (set (match_operand:ALL2 3 "l_register_operand" "")
+        (plus:ALL2 (match_dup 3)
+                   (match_dup 0)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (match_dup 3)
+                   (plus:ALL2 (match_dup 3)
+                              (match_dup 1)))
+              (clobber (match_dup 2))])])
+
+;; "addhi3_clobber"
+;; "addhq3_clobber" "adduhq3_clobber"
+;; "addha3_clobber" "adduha3_clobber"
+(define_insn "add<mode>3_clobber"
+  [(set (match_operand:ALL2 0 "register_operand"            "=!w    ,d    ,r")
+        (plus:ALL2 (match_operand:ALL2 1 "register_operand"  "%0    ,0    ,0")
+                   (match_operand:ALL2 2 "const_operand"     "IJ YIJ,n Ynn,n Ynn")))
+   (clobber (match_scratch:QI 3                             "=X     ,X    ,&d"))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "length" "4")
+   (set_attr "adjust_len" "plus")
+   (set_attr "cc" "plus")])
+
+
+;; "addsi3"
+;; "addsq3" "addusq3"
+;; "addsa3" "addusa3"
+(define_insn "add<mode>3"
+  [(set (match_operand:ALL4 0 "register_operand"          "=??r,d ,r")
+        (plus:ALL4 (match_operand:ALL4 1 "register_operand" "%0,0 ,0")
+                   (match_operand:ALL4 2 "nonmemory_operand" "r,i ,n Ynn")))
+   (clobber (match_scratch:QI 3                             "=X,X ,&d"))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "length" "4")
+   (set_attr "adjust_len" "plus")
+   (set_attr "cc" "plus")])
+
+(define_insn "*addpsi3_zero_extend.qi"
+  [(set (match_operand:PSI 0 "register_operand"                          "=r")
+        (plus:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))
+                  (match_operand:PSI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%A1\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addpsi3_zero_extend.hi"
+  [(set (match_operand:PSI 0 "register_operand"                          "=r")
+        (plus:PSI (zero_extend:PSI (match_operand:HI 1 "register_operand" "r"))
+                  (match_operand:PSI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%A1\;adc %B0,%B1\;adc %C0,__zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addpsi3_sign_extend.hi"
+  [(set (match_operand:PSI 0 "register_operand"                          "=r")
+        (plus:PSI (sign_extend:PSI (match_operand:HI 1 "register_operand" "r"))
+                  (match_operand:PSI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%1\;adc %B0,%B1\;adc %C0,__zero_reg__\;sbrc %B1,7\;dec %C0"
+  [(set_attr "length" "5")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addsi3_zero_extend"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+        (plus:SI (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:SI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%1\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addsi3_zero_extend.hi"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+        (plus:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+                 (match_operand:SI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%1\;adc %B0,%B1\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+(define_insn "addpsi3"
+  [(set (match_operand:PSI 0 "register_operand"         "=??r,d ,d,r")
+        (plus:PSI (match_operand:PSI 1 "register_operand" "%0,0 ,0,0")
+                  (match_operand:PSI 2 "nonmemory_operand" "r,s ,n,n")))
+   (clobber (match_scratch:QI 3                           "=X,X ,X,&d"))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "length" "3")
+   (set_attr "adjust_len" "plus")
+   (set_attr "cc" "plus")])
+
+(define_insn "subpsi3"
+  [(set (match_operand:PSI 0 "register_operand"           "=r")
+        (minus:PSI (match_operand:PSI 1 "register_operand" "0")
+                   (match_operand:PSI 2 "register_operand" "r")))]
+  ""
+  "sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "*subpsi3_zero_extend.qi"
+  [(set (match_operand:PSI 0 "register_operand"                           "=r")
+        (minus:PSI (match_operand:SI 1 "register_operand"                  "0")
+                   (zero_extend:PSI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "*subpsi3_zero_extend.hi"
+  [(set (match_operand:PSI 0 "register_operand"                           "=r")
+        (minus:PSI (match_operand:PSI 1 "register_operand"                 "0")
+                   (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2\;sbc %B0,%B2\;sbc %C0,__zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "*subpsi3_sign_extend.hi"
+  [(set (match_operand:PSI 0 "register_operand"                           "=r")
+        (minus:PSI (match_operand:PSI 1 "register_operand"                 "0")
+                   (sign_extend:PSI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%A2\;sbc %B0,%B2\;sbc %C0,__zero_reg__\;sbrc %B2,7\;inc %C0"
+  [(set_attr "length" "5")
+   (set_attr "cc" "set_czn")])
+
+;-----------------------------------------------------------------------------
+; sub bytes
+
+;; "subqi3"
+;; "subqq3" "subuqq3"
+(define_insn "sub<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand"                    "=??r,d    ,r  ,r  ,r  ,r")
+        (minus:ALL1 (match_operand:ALL1 1 "register_operand"           "0,0    ,0  ,0  ,0  ,0")
+                    (match_operand:ALL1 2 "nonmemory_or_const_operand" "r,n Ynn,Y01,Ym1,Y02,Ym2")))]
+  ""
+  "@
+	sub %0,%2
+	subi %0,lo8(%2)
+	dec %0
+	inc %0
+	dec %0\;dec %0
+	inc %0\;inc %0"
+  [(set_attr "length" "1,1,1,1,2,2")
+   (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")])
+
+;; "subhi3"
+;; "subhq3" "subuhq3"
+;; "subha3" "subuha3"
+(define_insn "sub<mode>3"
+  [(set (match_operand:ALL2 0 "register_operand"                    "=??r,d    ,*r")
+        (minus:ALL2 (match_operand:ALL2 1 "register_operand"           "0,0    ,0")
+                    (match_operand:ALL2 2 "nonmemory_or_const_operand" "r,i Ynn,Ynn")))
+   (clobber (match_scratch:QI 3                                       "=X,X    ,&d"))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "adjust_len" "plus")
+   (set_attr "cc" "plus")])
+
+(define_insn "*subhi3_zero_extend1"
+  [(set (match_operand:HI 0 "register_operand"                          "=r")
+        (minus:HI (match_operand:HI 1 "register_operand"                 "0")
+                  (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2\;sbc %B0,__zero_reg__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "*subhi3.sign_extend2"
+  [(set (match_operand:HI 0 "register_operand"                          "=r")
+        (minus:HI (match_operand:HI 1 "register_operand"                 "0")
+                  (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  {
+    return reg_overlap_mentioned_p (operands[0], operands[2])
+      ? "mov __tmp_reg__,%2\;sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;inc %B0"
+      : "sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc %2,7\;inc %B0";
+  }
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+;; "subsi3"
+;; "subsq3" "subusq3"
+;; "subsa3" "subusa3"
+(define_insn "sub<mode>3"
+  [(set (match_operand:ALL4 0 "register_operand"                    "=??r,d    ,r")
+        (minus:ALL4 (match_operand:ALL4 1 "register_operand"           "0,0    ,0")
+                    (match_operand:ALL4 2 "nonmemory_or_const_operand" "r,n Ynn,Ynn")))
+   (clobber (match_scratch:QI 3                                       "=X,X    ,&d"))]
+  ""
+  {
+    return avr_out_plus (insn, operands);
+  }
+  [(set_attr "adjust_len" "plus")
+   (set_attr "cc" "plus")])
+
+(define_insn "*subsi3_zero_extend"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+        (minus:SI (match_operand:SI 1 "register_operand"                 "0")
+                  (zero_extend:SI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "*subsi3_zero_extend.hi"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+        (minus:SI (match_operand:SI 1 "register_operand"                 "0")
+                  (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2\;sbc %B0,%B2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_czn")])
+
+;******************************************************************************
+; mul
+
+(define_expand "mulqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (mult:QI (match_operand:QI 1 "register_operand" "")
+                 (match_operand:QI 2 "register_operand" "")))]
+  ""
+  {
+    if (!AVR_HAVE_MUL)
+      {
+        emit_insn (gen_mulqi3_call (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  })
+
+(define_insn "*mulqi3_enh"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (mult:QI (match_operand:QI 1 "register_operand" "r")
+                 (match_operand:QI 2 "register_operand" "r")))]
+  "AVR_HAVE_MUL"
+  "mul %1,%2
+	mov %0,r0
+	clr r1"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_expand "mulqi3_call"
+  [(set (reg:QI 24) (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 22) (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
+              (clobber (reg:QI 22))])
+   (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))])
+
+(define_insn "*mulqi3_call"
+  [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
+   (clobber (reg:QI 22))]
+  "!AVR_HAVE_MUL"
+  "%~call __mulqi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "umulqi3_highpart"
+;; "smulqi3_highpart"
+(define_insn "<extend_su>mulqi3_highpart"
+  [(set (match_operand:QI 0 "register_operand"                                       "=r")
+        (truncate:QI
+         (lshiftrt:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                               (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>")))
+                      (const_int 8))))]
+  "AVR_HAVE_MUL"
+  "mul<extend_s> %1,%2
+	mov %0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+
+;; Used when expanding div or mod inline for some special values
+(define_insn "*subqi3.ashiftrt7"
+  [(set (match_operand:QI 0 "register_operand"                       "=r")
+        (minus:QI (match_operand:QI 1 "register_operand"              "0")
+                  (ashiftrt:QI (match_operand:QI 2 "register_operand" "r")
+                               (const_int 7))))]
+  ""
+  "sbrc %2,7\;inc %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addqi3.lt0"
+  [(set (match_operand:QI 0 "register_operand"                 "=r")
+        (plus:QI (lt:QI (match_operand:QI 1 "register_operand"  "r")
+                        (const_int 0))
+                 (match_operand:QI 2 "register_operand"         "0")))]
+  ""
+  "sbrc %1,7\;inc %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addhi3.lt0"
+  [(set (match_operand:HI 0 "register_operand"                   "=w,r")
+        (plus:HI (lt:HI (match_operand:QI 1 "register_operand"    "r,r")
+                        (const_int 0))
+                 (match_operand:HI 2 "register_operand"           "0,0")))
+   (clobber (match_scratch:QI 3                                  "=X,&1"))]
+  ""
+  "@
+	sbrc %1,7\;adiw %0,1
+	lsl %1\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__"
+  [(set_attr "length" "2,3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addpsi3.lt0"
+  [(set (match_operand:PSI 0 "register_operand"                         "=r")
+        (plus:PSI (lshiftrt:PSI (match_operand:PSI 1 "register_operand"  "r")
+                                (const_int 23))
+                 (match_operand:PSI 2 "register_operand"                 "0")))]
+  ""
+  "mov __tmp_reg__,%C1\;lsl __tmp_reg__
+	adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addsi3.lt0"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+        (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "r")
+                              (const_int 31))
+                 (match_operand:SI 2 "register_operand"               "0")))]
+  ""
+  "mov __tmp_reg__,%D1\;lsl __tmp_reg__
+	adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*umulqihi3.call"
+  [(set (reg:HI 24)
+        (mult:HI (zero_extend:HI (reg:QI 22))
+                 (zero_extend:HI (reg:QI 24))))
+   (clobber (reg:QI 21))
+   (clobber (reg:HI 22))]
+  "!AVR_HAVE_MUL"
+  "%~call __umulqihi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "umulqihi3"
+;; "mulqihi3"
+(define_insn "<extend_u>mulqihi3"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                 (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>"))))]
+  "AVR_HAVE_MUL"
+  "mul<extend_s> %1,%2
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "usmulqihi3"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))]
+  "AVR_HAVE_MUL"
+  "mulsu %2,%1
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+;; Above insn is not canonicalized by insn combine, so here is a version with
+;; operands swapped.
+
+(define_insn "*sumulqihi3"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (zero_extend:HI (match_operand:QI 2 "register_operand" "a"))))]
+  "AVR_HAVE_MUL"
+  "mulsu %1,%2
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+;; One-extend operand 1
+
+(define_insn "*osmulqihi3"
+  [(set (match_operand:HI 0 "register_operand"                                        "=&r")
+        (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "a"))))
+                 (sign_extend:HI (match_operand:QI 2 "register_operand"                 "a"))))]
+  "AVR_HAVE_MUL"
+  "mulsu %2,%1
+	movw %0,r0
+	sub %B0,%2
+	clr __zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*oumulqihi3"
+  [(set (match_operand:HI 0 "register_operand"                                        "=&r")
+        (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "r"))))
+                 (zero_extend:HI (match_operand:QI 2 "register_operand"                 "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %2,%1
+	movw %0,r0
+	sub %B0,%2
+	clr __zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;******************************************************************************
+; multiply-add/sub QI: $0 = $3 +/- $1*$2
+;******************************************************************************
+
+(define_insn "*maddqi4"
+  [(set (match_operand:QI 0 "register_operand"                  "=r")
+        (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r")
+                          (match_operand:QI 2 "register_operand" "r"))
+                 (match_operand:QI 3 "register_operand"          "0")))]
+
+  "AVR_HAVE_MUL"
+  "mul %1,%2
+	add %A0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*msubqi4"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+        (minus:QI (match_operand:QI 3 "register_operand"          "0")
+                  (mult:QI (match_operand:QI 1 "register_operand" "r")
+                           (match_operand:QI 2 "register_operand" "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %1,%2
+	sub %A0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*maddqi4.const"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+        (plus:QI (mult:QI (match_operand:QI 1 "register_operand"  "r")
+                          (match_operand:QI 2 "const_int_operand" "n"))
+                 (match_operand:QI 3 "register_operand"           "0")))
+   (clobber (match_scratch:QI 4                                 "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *maddqi4
+   (set (match_dup 0)
+        (plus:QI (mult:QI (match_dup 1)
+                          (match_dup 4))
+                 (match_dup 3)))])
+
+(define_insn_and_split "*msubqi4.const"
+  [(set (match_operand:QI 0 "register_operand"                    "=r")
+        (minus:QI (match_operand:QI 3 "register_operand"           "0")
+                  (mult:QI (match_operand:QI 1 "register_operand"  "r")
+                           (match_operand:QI 2 "const_int_operand" "n"))))
+   (clobber (match_scratch:QI 4                                  "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *msubqi4
+   (set (match_dup 0)
+        (minus:QI (match_dup 3)
+                  (mult:QI (match_dup 1)
+                           (match_dup 4))))])
+
+
+;******************************************************************************
+; multiply-add/sub HI: $0 = $3 +/- $1*$2  with 8-bit values $1, $2
+;******************************************************************************
+
+;; We don't use standard insns/expanders as they lead to cumbersome code for,
+;; e.g,
+;;
+;;     int foo (unsigned char z)
+;;     {
+;;       extern int aInt[];
+;;       return aInt[3*z+2];
+;;     }
+;;
+;; because the constant +4 then is added explicitely instead of consuming it
+;; with the aInt symbol.  Therefore, we rely on insn combine which takes costs
+;; into account more accurately and doesn't do burte-force multiply-add/sub.
+;; The implementational effort is the same so we are fine with that approach.
+
+
+;; "*maddqihi4"
+;; "*umaddqihi4"
+(define_insn "*<extend_u>maddqihi4"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                          (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>")))
+                 (match_operand:HI 3 "register_operand"                         "0")))]
+
+  "AVR_HAVE_MUL"
+  "mul<extend_s> %1,%2
+	add %A0,r0
+	adc %B0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; "*msubqihi4"
+;; "*umsubqihi4"
+(define_insn "*<extend_u>msubqihi4"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (minus:HI (match_operand:HI 3 "register_operand"                         "0")
+                  (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                           (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>")))))]
+  "AVR_HAVE_MUL"
+  "mul<extend_s> %1,%2
+	sub %A0,r0
+	sbc %B0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; "*usmaddqihi4"
+;; "*sumaddqihi4"
+(define_insn "*<any_extend:extend_su><any_extend2:extend_su>msubqihi4"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (plus:HI (mult:HI (any_extend:HI  (match_operand:QI 1 "register_operand" "a"))
+                          (any_extend2:HI (match_operand:QI 2 "register_operand" "a")))
+                 (match_operand:HI 3 "register_operand"                          "0")))]
+  "AVR_HAVE_MUL
+   && reload_completed
+   && <any_extend:CODE> != <any_extend2:CODE>"
+  {
+    output_asm_insn (<any_extend:CODE> == SIGN_EXTEND
+                     ? "mulsu %1,%2" : "mulsu %2,%1", operands);
+
+    return "add %A0,r0\;adc %B0,r1\;clr __zero_reg__";
+  }
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; "*usmsubqihi4"
+;; "*sumsubqihi4"
+(define_insn "*<any_extend:extend_su><any_extend2:extend_su>msubqihi4"
+  [(set (match_operand:HI 0 "register_operand"                                   "=r")
+        (minus:HI (match_operand:HI 3 "register_operand"                          "0")
+                  (mult:HI (any_extend:HI  (match_operand:QI 1 "register_operand" "a"))
+                           (any_extend2:HI (match_operand:QI 2 "register_operand" "a")))))]
+  "AVR_HAVE_MUL
+   && reload_completed
+   && <any_extend:CODE> != <any_extend2:CODE>"
+  {
+    output_asm_insn (<any_extend:CODE> == SIGN_EXTEND
+                     ? "mulsu %1,%2" : "mulsu %2,%1", operands);
+
+    return "sub %A0,r0\;sbc %B0,r1\;clr __zero_reg__";
+  }
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; Handle small constants
+
+;; Special case of a += 2*b as frequently seen with accesses to int arrays.
+;; This is shorter, faster than MUL and has lower register pressure.
+
+(define_insn_and_split "*umaddqihi4.2"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                          (const_int 2))
+                 (match_operand:HI 2 "register_operand"                          "r")))]
+  "!reload_completed
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 0)
+        (match_dup 2))
+   ; *addhi3_zero_extend
+   (set (match_dup 0)
+        (plus:HI (zero_extend:HI (match_dup 1))
+                 (match_dup 0)))
+   ; *addhi3_zero_extend
+   (set (match_dup 0)
+        (plus:HI (zero_extend:HI (match_dup 1))
+                 (match_dup 0)))])
+
+;; "umaddqihi4.uconst"
+;; "maddqihi4.sconst"
+(define_insn_and_split "*<extend_u>maddqihi4.<extend_su>const"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                          (match_operand:HI 2 "<extend_su>8_operand"             "n"))
+                 (match_operand:HI 3 "register_operand"                          "0")))
+   (clobber (match_scratch:QI 4                                                 "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *umaddqihi4 resp. *maddqihi4
+   (set (match_dup 0)
+        (plus:HI (mult:HI (any_extend:HI (match_dup 1))
+                          (any_extend:HI (match_dup 4)))
+                 (match_dup 3)))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+;; "*umsubqihi4.uconst"
+;; "*msubqihi4.sconst"
+(define_insn_and_split "*<extend_u>msubqihi4.<extend_su>const"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (minus:HI (match_operand:HI 3 "register_operand"                         "0")
+                  (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                           (match_operand:HI 2 "<extend_su>8_operand"            "n"))))
+   (clobber (match_scratch:QI 4                                                 "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *umsubqihi4 resp. *msubqihi4
+   (set (match_dup 0)
+        (minus:HI (match_dup 3)
+                  (mult:HI (any_extend:HI (match_dup 1))
+                           (any_extend:HI (match_dup 4)))))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+;; Same as the insn above, but combiner tries versions canonicalized to ASHIFT
+;; for MULT with power of 2 and skips trying MULT insn above.
+
+(define_insn_and_split "*umsubqihi4.uconst.ashift"
+  [(set (match_operand:HI 0 "register_operand"                                     "=r")
+        (minus:HI (match_operand:HI 3 "register_operand"                            "0")
+                  (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                             (match_operand:HI 2 "const_2_to_7_operand"             "n"))))
+   (clobber (match_scratch:QI 4                                                   "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *umsubqihi4
+   (set (match_dup 0)
+        (minus:HI (match_dup 3)
+                  (mult:HI (zero_extend:HI (match_dup 1))
+                           (zero_extend:HI (match_dup 4)))))]
+  {
+    operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode);
+  })
+
+;; Same as the insn above, but combiner tries versions canonicalized to ASHIFT
+;; for MULT with power of 2 and skips trying MULT insn above.  We omit 128
+;; because this would require an extra pattern for just one value.
+
+(define_insn_and_split "*msubqihi4.sconst.ashift"
+  [(set (match_operand:HI 0 "register_operand"                                     "=r")
+        (minus:HI (match_operand:HI 3 "register_operand"                            "0")
+                  (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+                             (match_operand:HI 2 "const_1_to_6_operand"             "M"))))
+   (clobber (match_scratch:QI 4                                                   "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *smsubqihi4
+   (set (match_dup 0)
+        (minus:HI (match_dup 3)
+                  (mult:HI (sign_extend:HI (match_dup 1))
+                           (sign_extend:HI (match_dup 4)))))]
+  {
+    operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode);
+  })
+
+;; For signed/unsigned combinations that require narrow constraint "a"
+;; just provide a pattern if signed/unsigned combination is actually needed.
+
+(define_insn_and_split "*sumaddqihi4.uconst"
+  [(set (match_operand:HI 0 "register_operand"                                  "=r")
+        (plus:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                          (match_operand:HI 2 "u8_operand"                       "M"))
+                 (match_operand:HI 3 "register_operand"                          "0")))
+   (clobber (match_scratch:QI 4                                                "=&a"))]
+  "AVR_HAVE_MUL
+   && !s8_operand (operands[2], VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *sumaddqihi4
+   (set (match_dup 0)
+        (plus:HI (mult:HI (sign_extend:HI (match_dup 1))
+                          (zero_extend:HI (match_dup 4)))
+                 (match_dup 3)))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+(define_insn_and_split "*sumsubqihi4.uconst"
+  [(set (match_operand:HI 0 "register_operand"                                   "=r")
+        (minus:HI (match_operand:HI 3 "register_operand"                          "0")
+                  (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                           (match_operand:HI 2 "u8_operand"                       "M"))))
+   (clobber (match_scratch:QI 4                                                 "=&a"))]
+  "AVR_HAVE_MUL
+   && !s8_operand (operands[2], VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (match_dup 2))
+   ; *sumsubqihi4
+   (set (match_dup 0)
+        (minus:HI (match_dup 3)
+                  (mult:HI (sign_extend:HI (match_dup 1))
+                           (zero_extend:HI (match_dup 4)))))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+;******************************************************************************
+; mul HI: $1 = sign/zero-extend, $2 = small constant
+;******************************************************************************
+
+;; "*muluqihi3.uconst"
+;; "*mulsqihi3.sconst"
+(define_insn_and_split "*mul<extend_su>qihi3.<extend_su>const"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+                 (match_operand:HI 2 "<extend_su>8_operand"            "n")))
+   (clobber (match_scratch:QI 3                                       "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; umulqihi3 resp. mulqihi3
+   (set (match_dup 0)
+        (mult:HI (any_extend:HI (match_dup 1))
+                 (any_extend:HI (match_dup 3))))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+(define_insn_and_split "*muluqihi3.sconst"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "s8_operand"                       "n")))
+   (clobber (match_scratch:QI 3                                       "=&a"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; usmulqihi3
+   (set (match_dup 0)
+        (mult:HI (zero_extend:HI (match_dup 1))
+                 (sign_extend:HI (match_dup 3))))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+(define_insn_and_split "*mulsqihi3.uconst"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "u8_operand"                       "M")))
+   (clobber (match_scratch:QI 3                                       "=&a"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; usmulqihi3
+   (set (match_dup 0)
+        (mult:HI (zero_extend:HI (match_dup 3))
+                 (sign_extend:HI (match_dup 1))))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+(define_insn_and_split "*mulsqihi3.oconst"
+  [(set (match_operand:HI 0 "register_operand"                        "=&r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "o8_operand"                       "n")))
+   (clobber (match_scratch:QI 3                                       "=&a"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *osmulqihi3
+   (set (match_dup 0)
+        (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 3))))
+                 (sign_extend:HI (match_dup 1))))]
+  {
+    operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+;; The EXTEND of $1 only appears in combine, we don't see it in expand so that
+;; expand decides to use ASHIFT instead of MUL because ASHIFT costs are cheaper
+;; at that time.  Fix that.
+
+(define_insn "*ashiftqihi2.signx.1"
+  [(set (match_operand:HI 0 "register_operand"                           "=r,*r")
+        (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0,r"))
+                   (const_int 1)))]
+  ""
+  "@
+	lsl %A0\;sbc %B0,%B0
+	mov %A0,%1\;lsl %A0\;sbc %B0,%B0"
+  [(set_attr "length" "2,3")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*ashifthi3.signx.const"
+  [(set (match_operand:HI 0 "register_operand"                           "=r")
+        (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+                   (match_operand:HI 2 "const_2_to_6_operand"             "I")))
+   (clobber (match_scratch:QI 3                                         "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; mulqihi3
+   (set (match_dup 0)
+        (mult:HI (sign_extend:HI (match_dup 1))
+                 (sign_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+  })
+
+(define_insn_and_split "*ashifthi3.signx.const7"
+  [(set (match_operand:HI 0 "register_operand"                           "=r")
+        (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                   (const_int 7)))
+   (clobber (match_scratch:QI 2                                         "=&a"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+        (match_dup 3))
+   ; usmulqihi3
+   (set (match_dup 0)
+        (mult:HI (zero_extend:HI (match_dup 2))
+                 (sign_extend:HI (match_dup 1))))]
+  {
+    operands[3] = gen_int_mode (1 << 7, QImode);
+  })
+
+(define_insn_and_split "*ashifthi3.zerox.const"
+  [(set (match_operand:HI 0 "register_operand"                           "=r")
+        (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                   (match_operand:HI 2 "const_2_to_7_operand"             "I")))
+   (clobber (match_scratch:QI 3                                         "=&d"))]
+  "AVR_HAVE_MUL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; umulqihi3
+   (set (match_dup 0)
+        (mult:HI (zero_extend:HI (match_dup 1))
+                 (zero_extend:HI (match_dup 3))))]
+  {
+    operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode);
+  })
+
+;******************************************************************************
+; mul HI: $1 = sign-/zero-/one-extend, $2 = reg
+;******************************************************************************
+
+(define_insn "mulsqihi3"
+  [(set (match_operand:HI 0 "register_operand"                        "=&r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "register_operand"                 "a")))]
+  "AVR_HAVE_MUL"
+  "mulsu %1,%A2
+	movw %0,r0
+	mul %1,%B2
+	add %B0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+(define_insn "muluqihi3"
+  [(set (match_operand:HI 0 "register_operand"                        "=&r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "register_operand"                 "r")))]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %0,r0
+	mul %1,%B2
+	add %B0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+;; one-extend operand 1
+
+(define_insn "muloqihi3"
+  [(set (match_operand:HI 0 "register_operand"                                        "=&r")
+        (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "r"))))
+                 (match_operand:HI 2 "register_operand"                                 "r")))]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %0,r0
+	mul %1,%B2
+	add %B0,r0
+	sub %B0,%A2
+	clr __zero_reg__"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+;******************************************************************************
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+        (mult:HI (match_operand:HI 1 "register_operand" "")
+                 (match_operand:HI 2 "register_or_s9_operand" "")))]
+  ""
+  {
+    if (!AVR_HAVE_MUL)
+      {
+        if (!register_operand (operands[2], HImode))
+          operands[2] = force_reg (HImode, operands[2]);
+
+        emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+
+    /* For small constants we can do better by extending them on the fly.
+       The constant can be loaded in one instruction and the widening
+       multiplication is shorter.  First try the unsigned variant because it
+       allows constraint "d" instead of "a" for the signed version.  */
+
+    if (s9_operand (operands[2], HImode))
+      {
+        rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
+
+        if (u8_operand (operands[2], HImode))
+          {
+            emit_insn (gen_muluqihi3 (operands[0], reg, operands[1]));
+          }
+        else if (s8_operand (operands[2], HImode))
+          {
+            emit_insn (gen_mulsqihi3 (operands[0], reg, operands[1]));
+          }
+        else
+          {
+            emit_insn (gen_muloqihi3 (operands[0], reg, operands[1]));
+          }
+
+        DONE;
+      }
+
+    if (!register_operand (operands[2], HImode))
+      operands[2] = force_reg (HImode, operands[2]);
+  })
+
+(define_insn "*mulhi3_enh"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+        (mult:HI (match_operand:HI 1 "register_operand" "r")
+                 (match_operand:HI 2 "register_operand" "r")))]
+  "AVR_HAVE_MUL"
+  {
+    return REGNO (operands[1]) == REGNO (operands[2])
+           ? "mul %A1,%A1\;movw %0,r0\;mul %A1,%B1\;add %B0,r0\;add %B0,r0\;clr r1"
+           : "mul %A1,%A2\;movw %0,r0\;mul %A1,%B2\;add %B0,r0\;mul %B1,%A2\;add %B0,r0\;clr r1";
+  }
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_expand "mulhi3_call"
+  [(set (reg:HI 24) (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 22) (match_operand:HI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
+              (clobber (reg:HI 22))
+              (clobber (reg:QI 21))])
+   (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))])
+
+(define_insn "*mulhi3_call"
+  [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
+   (clobber (reg:HI 22))
+   (clobber (reg:QI 21))]
+  "!AVR_HAVE_MUL"
+  "%~call __mulhi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; To support widening multiplication with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation.  Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
+(define_expand "mulsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (match_operand:SI 1 "register_operand" "")
+                            (match_operand:SI 2 "nonmemory_operand" "")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+
+    if (o16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+  })
+
+(define_insn_and_split "*mulsi3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
+        (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:SI 18)
+        (match_dup 1))
+   (set (reg:SI 22)
+        (match_dup 2))
+   (parallel [(set (reg:SI 22)
+                   (mult:SI (reg:SI 22)
+                            (reg:SI 18)))
+              (clobber (reg:HI 26))])
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+
+    if (o16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+  })
+
+;; "muluqisi3"
+;; "muluhisi3"
+(define_insn_and_split "mulu<mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
+        (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 26))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+
+    if (QImode == <MODE>mode)
+      operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
+
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[1] = force_reg (HImode, operands[1]);
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  })
+
+;; "mulsqisi3"
+;; "mulshisi3"
+(define_insn_and_split "muls<mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
+        (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (sign_extend:SI (reg:HI 26))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+
+    if (QImode == <MODE>mode)
+      operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]);
+
+    if (u16_operand (operands[2], SImode)
+        || s16_operand (operands[2], SImode))
+      {
+        rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+
+        operands[1] = force_reg (HImode, operands[1]);
+
+        if (u16_operand (operands[2], SImode))
+          emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1]));
+        else
+          emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2));
+
+        DONE;
+      }
+  })
+
+;; One-extend operand 1
+
+(define_insn_and_split "mulohisi3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                          "=r")
+        (mult:SI (not:SI (zero_extend:SI
+                          (not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"     "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))])
+
+;; "mulhisi3"
+;; "umulhisi3"
+(define_expand "<extend_u>mulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (any_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (any_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL")
+
+(define_expand "usmulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL")
+
+;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
+;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
+;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3"
+;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3"
+(define_insn_and_split
+  "*<any_extend:extend_su><any_extend2:extend_su>mul<QIHI:mode><QIHI2:mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                            "=r")
+        (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand"   "r"))
+                 (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r"))))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 18)
+        (match_dup 1))
+   (set (reg:HI 26)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (match_dup 3)
+                 (match_dup 4)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    rtx xop1 = operands[1];
+    rtx xop2 = operands[2];
+
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+
+    if (QImode == <QIHI:MODE>mode)
+      xop1 = gen_rtx_fmt_e (<any_extend:CODE>, HImode, xop1);
+
+    if (QImode == <QIHI2:MODE>mode)
+      xop2 = gen_rtx_fmt_e (<any_extend2:CODE>, HImode, xop2);
+
+    if (<any_extend:CODE> == <any_extend2:CODE>
+        || <any_extend:CODE> == ZERO_EXTEND)
+      {
+        operands[1] = xop1;
+        operands[2] = xop2;
+        operands[3] = gen_rtx_fmt_e (<any_extend:CODE>, SImode, gen_rtx_REG (HImode, 18));
+        operands[4] = gen_rtx_fmt_e (<any_extend2:CODE>, SImode, gen_rtx_REG (HImode, 26));
+      }
+    else
+      {
+        /* <any_extend:CODE>  = SIGN_EXTEND */
+        /* <any_extend2:CODE> = ZERO_EXTEND */
+
+        operands[1] = xop2;
+        operands[2] = xop1;
+        operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18));
+        operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26));
+      }
+  })
+
+;; "smulhi3_highpart"
+;; "umulhi3_highpart"
+(define_expand "<extend_su>mulhi3_highpart"
+  [(set (reg:HI 18)
+        (match_operand:HI 1 "nonmemory_operand" ""))
+   (set (reg:HI 26)
+        (match_operand:HI 2 "nonmemory_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18))
+                                                      (any_extend:SI (reg:HI 26)))
+                                             (const_int 16))))
+              (clobber (reg:HI 22))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  "AVR_HAVE_MUL")
+
+
+(define_insn "*mulsi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (reg:SI 22)
+                 (reg:SI 18)))
+   (clobber (reg:HI 26))]
+  "AVR_HAVE_MUL"
+  "%~call __mulsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "*mulhisi3_call"
+;; "*umulhisi3_call"
+(define_insn "*<extend_u>mulhisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (any_extend:SI (reg:HI 18))
+                 (any_extend:SI (reg:HI 26))))]
+  "AVR_HAVE_MUL"
+  "%~call __<extend_u>mulhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "*umulhi3_highpart_call"
+;; "*smulhi3_highpart_call"
+(define_insn "*<extend_su>mulhi3_highpart_call"
+  [(set (reg:HI 24)
+        (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18))
+                                           (any_extend:SI (reg:HI 26)))
+                                  (const_int 16))))
+   (clobber (reg:HI 22))]
+  "AVR_HAVE_MUL"
+  "%~call __<extend_u>mulhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*usmulhisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 18))
+                 (sign_extend:SI (reg:HI 26))))]
+  "AVR_HAVE_MUL"
+  "%~call __usmulhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mul<extend_su>hisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (any_extend:SI (reg:HI 26))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __mul<extend_su>hisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulohisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulohisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
+; divmod
+
+;; Generate lib1funcs.S calls ourselves, because:
+;;  - we know exactly which registers are clobbered (for QI and HI
+;;    modes, some of the call-used registers are preserved)
+;;  - we get both the quotient and the remainder at no extra cost
+;;  - we split the patterns only after the first CSE passes because
+;;    CSE has problems to operate on hard regs.
+;;
+(define_insn_and_split "divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "")
+                   (div:QI (match_operand:QI 1 "pseudo_register_operand" "")
+                           (match_operand:QI 2 "pseudo_register_operand" "")))
+              (set (match_operand:QI 3 "pseudo_register_operand" "")
+                   (mod:QI (match_dup 1) (match_dup 2)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 23))
+              (clobber (reg:QI 24))
+              (clobber (reg:QI 25))])]
+  ""
+  "this divmodqi4 pattern should have been splitted;"
+  ""
+  [(set (reg:QI 24) (match_dup 1))
+   (set (reg:QI 22) (match_dup 2))
+   (parallel [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
+              (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 23))])
+   (set (match_dup 0) (reg:QI 24))
+   (set (match_dup 3) (reg:QI 25))])
+
+(define_insn "*divmodqi4_call"
+  [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
+   (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22)))
+   (clobber (reg:QI 22))
+   (clobber (reg:QI 23))]
+  ""
+  "%~call __divmodqi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodqi4"
+ [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "")
+                  (udiv:QI (match_operand:QI 1 "pseudo_register_operand" "")
+                           (match_operand:QI 2 "pseudo_register_operand" "")))
+             (set (match_operand:QI 3 "pseudo_register_operand" "")
+                  (umod:QI (match_dup 1) (match_dup 2)))
+             (clobber (reg:QI 22))
+             (clobber (reg:QI 23))
+             (clobber (reg:QI 24))
+             (clobber (reg:QI 25))])]
+  ""
+  "this udivmodqi4 pattern should have been splitted;"
+  ""
+  [(set (reg:QI 24) (match_dup 1))
+   (set (reg:QI 22) (match_dup 2))
+   (parallel [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
+              (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22)))
+              (clobber (reg:QI 23))])
+   (set (match_dup 0) (reg:QI 24))
+   (set (match_dup 3) (reg:QI 25))])
+
+(define_insn "*udivmodqi4_call"
+  [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
+   (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22)))
+   (clobber (reg:QI 23))]
+  ""
+  "%~call __udivmodqi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "divmodhi4"
+  [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "")
+                   (div:HI (match_operand:HI 1 "pseudo_register_operand" "")
+                           (match_operand:HI 2 "pseudo_register_operand" "")))
+              (set (match_operand:HI 3 "pseudo_register_operand" "")
+                   (mod:HI (match_dup 1) (match_dup 2)))
+              (clobber (reg:QI 21))
+              (clobber (reg:HI 22))
+              (clobber (reg:HI 24))
+              (clobber (reg:HI 26))])]
+  ""
+  "this should have been splitted;"
+  ""
+  [(set (reg:HI 24) (match_dup 1))
+   (set (reg:HI 22) (match_dup 2))
+   (parallel [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
+              (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22)))
+              (clobber (reg:HI 26))
+              (clobber (reg:QI 21))])
+   (set (match_dup 0) (reg:HI 22))
+   (set (match_dup 3) (reg:HI 24))])
+
+(define_insn "*divmodhi4_call"
+  [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
+   (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22)))
+   (clobber (reg:HI 26))
+   (clobber (reg:QI 21))]
+  ""
+  "%~call __divmodhi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodhi4"
+  [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "")
+                   (udiv:HI (match_operand:HI 1 "pseudo_register_operand" "")
+                            (match_operand:HI 2 "pseudo_register_operand" "")))
+              (set (match_operand:HI 3 "pseudo_register_operand" "")
+                   (umod:HI (match_dup 1) (match_dup 2)))
+              (clobber (reg:QI 21))
+              (clobber (reg:HI 22))
+              (clobber (reg:HI 24))
+              (clobber (reg:HI 26))])]
+  ""
+  "this udivmodhi4 pattern should have been splitted.;"
+  ""
+  [(set (reg:HI 24) (match_dup 1))
+   (set (reg:HI 22) (match_dup 2))
+   (parallel [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
+              (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22)))
+              (clobber (reg:HI 26))
+              (clobber (reg:QI 21))])
+   (set (match_dup 0) (reg:HI 22))
+   (set (match_dup 3) (reg:HI 24))])
+
+(define_insn "*udivmodhi4_call"
+  [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
+   (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22)))
+   (clobber (reg:HI 26))
+   (clobber (reg:QI 21))]
+  ""
+  "%~call __udivmodhi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 24-bit multiply
+
+;; To support widening multiplication with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation.  Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
+(define_expand "mulpsi3"
+  [(parallel [(set (match_operand:PSI 0 "register_operand" "")
+                   (mult:PSI (match_operand:PSI 1 "register_operand" "")
+                             (match_operand:PSI 2 "nonmemory_operand" "")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    if (s8_operand (operands[2], PSImode))
+      {
+        rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
+        emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
+        DONE;
+      }
+  })
+
+(define_insn "*umulqihipsi3"
+  [(set (match_operand:PSI 0 "register_operand"                         "=&r")
+        (mult:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))
+                  (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %A0,r0
+	mul %1,%B2
+	clr %C0
+	add %B0,r0
+	adc %C0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*umulhiqipsi3"
+  [(set (match_operand:PSI 0 "register_operand"                         "=&r")
+        (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
+                  (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %A0,r0
+	mul %1,%B2
+	add %B0,r0
+	mov %C0,r1
+	clr __zero_reg__
+	adc %C0,__zero_reg__"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "mulsqipsi3"
+  [(set (match_operand:PSI 0 "pseudo_register_operand"                          "=r")
+        (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r"))
+                  (match_operand:PSI 2 "pseudo_register_or_const_int_operand"    "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:QI 25)
+        (match_dup 1))
+   (set (reg:PSI 22)
+        (match_dup 2))
+   (set (reg:PSI 18)
+        (mult:PSI (sign_extend:PSI (reg:QI 25))
+                  (reg:PSI 22)))
+   (set (match_dup 0)
+        (reg:PSI 18))])
+
+(define_insn_and_split "*mulpsi3"
+  [(set (match_operand:PSI 0 "pseudo_register_operand"                       "=r")
+        (mult:PSI (match_operand:PSI 1 "pseudo_register_operand"              "r")
+                  (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:PSI 18)
+        (match_dup 1))
+   (set (reg:PSI 22)
+        (match_dup 2))
+   (parallel [(set (reg:PSI 22)
+                   (mult:PSI (reg:PSI 22)
+                             (reg:PSI 18)))
+              (clobber (reg:QI 21))
+              (clobber (reg:QI 25))
+              (clobber (reg:HI 26))])
+   (set (match_dup 0)
+        (reg:PSI 22))]
+  {
+    if (s8_operand (operands[2], PSImode))
+      {
+        rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
+        emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
+        DONE;
+      }
+  })
+
+(define_insn "*mulsqipsi3.libgcc"
+  [(set (reg:PSI 18)
+        (mult:PSI (sign_extend:PSI (reg:QI 25))
+                  (reg:PSI 22)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulsqipsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulpsi3.libgcc"
+  [(set (reg:PSI 22)
+        (mult:PSI (reg:PSI 22)
+                  (reg:PSI 18)))
+   (clobber (reg:QI 21))
+   (clobber (reg:QI 25))
+   (clobber (reg:HI 26))]
+  "AVR_HAVE_MUL"
+  "%~call __mulpsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 24-bit signed/unsigned division and modulo.
+;; Notice that the libgcc implementation return the quotient in R22
+;; and the remainder in R18 whereas the 32-bit [u]divmodsi4
+;; implementation works the other way round.
+
+(define_insn_and_split "divmodpsi4"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (div:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
+                            (match_operand:PSI 2 "pseudo_register_operand" "")))
+              (set (match_operand:PSI 3 "pseudo_register_operand" "")
+                   (mod:PSI (match_dup 1)
+                            (match_dup 2)))
+              (clobber (reg:DI 18))
+              (clobber (reg:QI 26))])]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (reg:PSI 22) (match_dup 1))
+   (set (reg:PSI 18) (match_dup 2))
+   (parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
+              (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
+              (clobber (reg:QI 21))
+              (clobber (reg:QI 25))
+              (clobber (reg:QI 26))])
+   (set (match_dup 0) (reg:PSI 22))
+   (set (match_dup 3) (reg:PSI 18))])
+
+(define_insn "*divmodpsi4_call"
+  [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
+   (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
+   (clobber (reg:QI 21))
+   (clobber (reg:QI 25))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __divmodpsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodpsi4"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (udiv:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
+                             (match_operand:PSI 2 "pseudo_register_operand" "")))
+              (set (match_operand:PSI 3 "pseudo_register_operand" "")
+                   (umod:PSI (match_dup 1)
+                             (match_dup 2)))
+              (clobber (reg:DI 18))
+              (clobber (reg:QI 26))])]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (reg:PSI 22) (match_dup 1))
+   (set (reg:PSI 18) (match_dup 2))
+   (parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
+              (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
+              (clobber (reg:QI 21))
+              (clobber (reg:QI 25))
+              (clobber (reg:QI 26))])
+   (set (match_dup 0) (reg:PSI 22))
+   (set (match_dup 3) (reg:PSI 18))])
+
+(define_insn "*udivmodpsi4_call"
+  [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
+   (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
+   (clobber (reg:QI 21))
+   (clobber (reg:QI 25))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __udivmodpsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "divmodsi4"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+                   (div:SI (match_operand:SI 1 "pseudo_register_operand" "")
+                           (match_operand:SI 2 "pseudo_register_operand" "")))
+              (set (match_operand:SI 3 "pseudo_register_operand" "")
+                   (mod:SI (match_dup 1) (match_dup 2)))
+              (clobber (reg:SI 18))
+              (clobber (reg:SI 22))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])]
+  ""
+  "this divmodsi4 pattern should have been splitted;"
+  ""
+  [(set (reg:SI 22) (match_dup 1))
+   (set (reg:SI 18) (match_dup 2))
+   (parallel [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
+              (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18)))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])
+   (set (match_dup 0) (reg:SI 18))
+   (set (match_dup 3) (reg:SI 22))])
+
+(define_insn "*divmodsi4_call"
+  [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
+   (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18)))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  ""
+  "%~call __divmodsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodsi4"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+                   (udiv:SI (match_operand:SI 1 "pseudo_register_operand" "")
+                           (match_operand:SI 2 "pseudo_register_operand" "")))
+              (set (match_operand:SI 3 "pseudo_register_operand" "")
+                   (umod:SI (match_dup 1) (match_dup 2)))
+              (clobber (reg:SI 18))
+              (clobber (reg:SI 22))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])]
+  ""
+  "this udivmodsi4 pattern should have been splitted;"
+  ""
+  [(set (reg:SI 22) (match_dup 1))
+   (set (reg:SI 18) (match_dup 2))
+   (parallel [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
+              (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18)))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])
+   (set (match_dup 0) (reg:SI 18))
+   (set (match_dup 3) (reg:SI 22))])
+
+(define_insn "*udivmodsi4_call"
+  [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
+   (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18)))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  ""
+  "%~call __udivmodsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
+; and
+
+(define_insn "andqi3"
+  [(set (match_operand:QI 0 "register_operand"       "=??r,d")
+        (and:QI (match_operand:QI 1 "register_operand" "%0,0")
+                (match_operand:QI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	and %0,%2
+	andi %0,lo8(%2)"
+  [(set_attr "length" "1,1")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r")
+        (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0  ,0")
+                (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "and %A0,%A2\;and %B0,%B2";
+    else if (which_alternative == 1)
+      return "andi %A0,lo8(%2)\;andi %B0,hi8(%2)";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "2,2,2,4,4")
+   (set_attr "adjust_len" "*,*,out_bitop,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,set_n,clobber,clobber,clobber")])
+
+(define_insn "andpsi3"
+  [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r")
+        (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0  ,0")
+                 (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,n")))
+   (clobber (match_scratch:QI 3                          "=X,X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "and %A0,%A2" CR_TAB
+             "and %B0,%B2" CR_TAB
+             "and %C0,%C2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "3,3,6,6")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber,clobber")])
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r")
+        (and:SI (match_operand:SI 1 "register_operand" "%0,0,0  ,0")
+                (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "and %0,%2"   CR_TAB
+             "and %B0,%B2" CR_TAB
+             "and %C0,%C2" CR_TAB
+             "and %D0,%D2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber,clobber")])
+
+(define_peephole2 ; andi
+  [(set (match_operand:QI 0 "d_register_operand" "")
+        (and:QI (match_dup 0)
+                (match_operand:QI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+        (and:QI (match_dup 0)
+                (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  {
+    operands[1] = GEN_INT (INTVAL (operands[1]) & INTVAL (operands[2]));
+  })
+
+;;|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+;; ior
+
+(define_insn "iorqi3"
+  [(set (match_operand:QI 0 "register_operand"       "=??r,d")
+        (ior:QI (match_operand:QI 1 "register_operand" "%0,0")
+                (match_operand:QI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	or %0,%2
+	ori %0,lo8(%2)"
+  [(set_attr "length" "1,1")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r")
+        (ior:HI (match_operand:HI 1 "register_operand" "%0,0,0,0  ,0")
+                (match_operand:HI 2 "nonmemory_operand" "r,s,n,Co2,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "or %A0,%A2\;or %B0,%B2";
+    else if (which_alternative == 1)
+      return "ori %A0,lo8(%2)\;ori %B0,hi8(%2)";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "2,2,2,4,4")
+   (set_attr "adjust_len" "*,*,out_bitop,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,set_n,clobber,clobber,clobber")])
+
+(define_insn "iorpsi3"
+  [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r")
+        (ior:PSI (match_operand:PSI 1 "register_operand" "%0,0,0  ,0")
+                 (match_operand:PSI 2 "nonmemory_operand" "r,n,Co3,n")))
+   (clobber (match_scratch:QI 3                          "=X,X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "or %A0,%A2" CR_TAB
+             "or %B0,%B2" CR_TAB
+             "or %C0,%C2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "3,3,6,6")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber,clobber")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r")
+        (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0  ,0")
+                (match_operand:SI 2 "nonmemory_operand" "r,n,Co4,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "or %0,%2"   CR_TAB
+             "or %B0,%B2" CR_TAB
+             "or %C0,%C2" CR_TAB
+             "or %D0,%D2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber,clobber")])
+
+;;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+;; xor
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (xor:QI (match_operand:QI 1 "register_operand" "%0")
+                (match_operand:QI 2 "register_operand" "r")))]
+  ""
+  "eor %0,%2"
+  [(set_attr "length" "1")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand"       "=??r,r  ,r")
+        (xor:HI (match_operand:HI 1 "register_operand" "%0,0  ,0")
+                (match_operand:HI 2 "nonmemory_operand" "r,Cx2,n")))
+   (clobber (match_scratch:QI 3                        "=X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "eor %A0,%A2\;eor %B0,%B2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "2,2,4")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber")])
+
+(define_insn "xorpsi3"
+  [(set (match_operand:PSI 0 "register_operand"        "=??r,r  ,r")
+        (xor:PSI (match_operand:PSI 1 "register_operand" "%0,0  ,0")
+                 (match_operand:PSI 2 "nonmemory_operand" "r,Cx3,n")))
+   (clobber (match_scratch:QI 3                          "=X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "eor %A0,%A2" CR_TAB
+             "eor %B0,%B2" CR_TAB
+             "eor %C0,%C2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "3,6,6")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"       "=??r,r  ,r")
+        (xor:SI (match_operand:SI 1 "register_operand" "%0,0  ,0")
+                (match_operand:SI 2 "nonmemory_operand" "r,Cx4,n")))
+   (clobber (match_scratch:QI 3                        "=X,X  ,&d"))]
+  ""
+  {
+    if (which_alternative == 0)
+      return "eor %0,%2"   CR_TAB
+             "eor %B0,%B2" CR_TAB
+             "eor %C0,%C2" CR_TAB
+             "eor %D0,%D2";
+
+    return avr_out_bitop (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,8,8")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop")
+   (set_attr "cc" "set_n,clobber,clobber")])
+
+;; swap swap swap swap swap swap swap swap swap swap swap swap swap swap swap
+;; swap
+
+(define_expand "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (rotate:QI (match_operand:QI 1 "register_operand" "")
+                   (match_operand:QI 2 "const_0_to_7_operand" "")))]
+  ""
+  {
+    if (!CONST_INT_P (operands[2]))
+      FAIL;
+
+    operands[2] = gen_int_mode (INTVAL (operands[2]) & 7, QImode);
+  })
+
+;; Expander used by __builtin_avr_swap
+(define_expand "rotlqi3_4"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (rotate:QI (match_operand:QI 1 "register_operand" "")
+                   (const_int 4)))])
+
+(define_insn "*rotlqi3"
+  [(set (match_operand:QI 0 "register_operand"               "=r,r,r  ,r  ,r  ,r  ,r  ,r")
+        (rotate:QI (match_operand:QI 1 "register_operand"     "0,0,0  ,0  ,0  ,0  ,0  ,0")
+                   (match_operand:QI 2 "const_0_to_7_operand" "P,K,C03,C04,C05,C06,C07,L")))]
+  ""
+  "@
+	lsl %0\;adc %0,__zero_reg__
+	lsl %0\;adc %0,__zero_reg__\;lsl %0\;adc %0,__zero_reg__
+	swap %0\;bst %0,0\;ror %0\;bld %0,7
+	swap %0
+	swap %0\;lsl %0\;adc %0,__zero_reg__
+	swap %0\;lsl %0\;adc %0,__zero_reg__\;lsl %0\;adc %0,__zero_reg__
+	bst %0,0\;ror %0\;bld %0,7
+	"
+  [(set_attr "length" "2,4,4,1,3,5,3,0")
+   (set_attr "cc" "set_n,set_n,clobber,none,set_n,set_n,clobber,none")])
+
+;; Split all rotates of HI,SI and PSImode registers where rotation is by
+;; a whole number of bytes.  The split creates the appropriate moves and
+;; considers all overlap situations.
+
+;; HImode does not need scratch.  Use attribute for this constraint.
+
+(define_mode_attr rotx [(SI "&r,&r,X") (PSI "&r,&r,X") (HI "X,X,X")])
+(define_mode_attr rotsmode [(SI "HI") (PSI "QI") (HI "QI")])
+
+;; "rotlhi3"
+;; "rotlpsi3"
+;; "rotlsi3"
+(define_expand "rotl<mode>3"
+  [(parallel [(set (match_operand:HISI 0 "register_operand" "")
+                   (rotate:HISI (match_operand:HISI 1 "register_operand" "")
+                                (match_operand:VOID 2 "const_int_operand" "")))
+              (clobber (match_dup 3))])]
+  ""
+  {
+    int offset;
+
+    if (!CONST_INT_P (operands[2]))
+      FAIL;
+
+    offset = INTVAL (operands[2]);
+
+    if (0 == offset % 8)
+      {
+        if (AVR_HAVE_MOVW && 0 == offset % 16)
+          operands[3] = gen_rtx_SCRATCH (<rotsmode>mode);
+        else
+          operands[3] = gen_rtx_SCRATCH (QImode);
+      }
+    else if (offset == 1
+             || offset == GET_MODE_BITSIZE (<MODE>mode) -1)
+      {
+        /*; Support rotate left/right by 1  */
+
+        emit_move_insn (operands[0],
+                        gen_rtx_ROTATE (<MODE>mode, operands[1], operands[2]));
+        DONE;
+      }
+    else
+      FAIL;
+  })
+
+(define_insn "*rotlhi2.1"
+  [(set (match_operand:HI 0 "register_operand"           "=r")
+        (rotate:HI (match_operand:HI 1 "register_operand" "0")
+                   (const_int 1)))]
+  ""
+  "lsl %A0\;rol %B0\;adc %A0,__zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*rotlhi2.15"
+  [(set (match_operand:HI 0 "register_operand"           "=r")
+        (rotate:HI (match_operand:HI 1 "register_operand" "0")
+                   (const_int 15)))]
+  ""
+  "bst %A0,0\;ror %B0\;ror %A0\;bld %B0,7"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*rotlpsi2.1"
+  [(set (match_operand:PSI 0 "register_operand"            "=r")
+        (rotate:PSI (match_operand:PSI 1 "register_operand" "0")
+                    (const_int 1)))]
+  ""
+  "lsl %A0\;rol %B0\;rol %C0\;adc %A0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*rotlpsi2.23"
+  [(set (match_operand:PSI 0 "register_operand"            "=r")
+        (rotate:PSI (match_operand:PSI 1 "register_operand" "0")
+                    (const_int 23)))]
+  ""
+  "bst %A0,0\;ror %C0\;ror %B0\;ror %A0\;bld %C0,7"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*rotlsi2.1"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+        (rotate:SI (match_operand:SI 1 "register_operand" "0")
+                   (const_int 1)))]
+  ""
+  "lsl %A0\;rol %B0\;rol %C0\;rol %D0\;adc %A0,__zero_reg__"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*rotlsi2.31"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+        (rotate:SI (match_operand:SI 1 "register_operand" "0")
+                   (const_int 31)))]
+  ""
+  "bst %A0,0\;ror %D0\;ror %C0\;ror %B0\;ror %A0\;bld %D0,7"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+;; Overlapping non-HImode registers often (but not always) need a scratch.
+;; The best we can do is use early clobber alternative "#&r" so that
+;; completely non-overlapping operands dont get a scratch but # so register
+;; allocation does not prefer non-overlapping.
+
+
+;; Split word aligned rotates using scratch that is mode dependent.
+
+;; "*rotwhi"
+;; "*rotwsi"
+(define_insn_and_split "*rotw<mode>"
+  [(set (match_operand:HISI 0 "register_operand"             "=r,r,#&r")
+        (rotate:HISI (match_operand:HISI 1 "register_operand" "0,r,r")
+                     (match_operand 2 "const_int_operand"     "n,n,n")))
+   (clobber (match_scratch:<rotsmode> 3 "=<rotx>"))]
+  "AVR_HAVE_MOVW
+   && CONST_INT_P (operands[2])
+   && GET_MODE_SIZE (<MODE>mode) % 2 == 0
+   && 0 == INTVAL (operands[2]) % 16"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    avr_rotate_bytes (operands);
+    DONE;
+  })
+
+
+;; Split byte aligned rotates using scratch that is always QI mode.
+
+;; "*rotbhi"
+;; "*rotbpsi"
+;; "*rotbsi"
+(define_insn_and_split "*rotb<mode>"
+  [(set (match_operand:HISI 0 "register_operand"             "=r,r,#&r")
+        (rotate:HISI (match_operand:HISI 1 "register_operand" "0,r,r")
+                     (match_operand 2 "const_int_operand"     "n,n,n")))
+   (clobber (match_scratch:QI 3 "=<rotx>"))]
+  "CONST_INT_P (operands[2])
+   && (8 == INTVAL (operands[2]) % 16
+       || ((!AVR_HAVE_MOVW
+            || GET_MODE_SIZE (<MODE>mode) % 2 != 0)
+           && 0 == INTVAL (operands[2]) % 16))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    avr_rotate_bytes (operands);
+    DONE;
+  })
+
+
+;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
+;; arithmetic shift left
+
+;; "ashlqi3"
+;; "ashlqq3"  "ashluqq3"
+(define_expand "ashl<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand" "")
+        (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "")
+                     (match_operand:QI 2 "nop_general_operand" "")))])
+
+(define_split ; ashlqi3_const4
+  [(set (match_operand:ALL1 0 "d_register_operand" "")
+        (ashift:ALL1 (match_dup 0)
+                     (const_int 4)))]
+  ""
+  [(set (match_dup 1)
+        (rotate:QI (match_dup 1)
+                   (const_int 4)))
+   (set (match_dup 1)
+        (and:QI (match_dup 1)
+                (const_int -16)))]
+  {
+    operands[1] = avr_to_int_mode (operands[0]);
+  })
+
+(define_split ; ashlqi3_const5
+  [(set (match_operand:ALL1 0 "d_register_operand" "")
+        (ashift:ALL1 (match_dup 0)
+                     (const_int 5)))]
+  ""
+  [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4)))
+   (set (match_dup 1) (ashift:QI (match_dup 1) (const_int 1)))
+   (set (match_dup 1) (and:QI (match_dup 1) (const_int -32)))]
+  {
+    operands[1] = avr_to_int_mode (operands[0]);
+  })
+
+(define_split ; ashlqi3_const6
+  [(set (match_operand:ALL1 0 "d_register_operand" "")
+        (ashift:ALL1 (match_dup 0)
+                     (const_int 6)))]
+  ""
+  [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4)))
+   (set (match_dup 1) (ashift:QI (match_dup 1) (const_int 2)))
+   (set (match_dup 1) (and:QI (match_dup 1) (const_int -64)))]
+  {
+    operands[1] = avr_to_int_mode (operands[0]);
+  })
+
+;; "*ashlqi3"
+;; "*ashlqq3"  "*ashluqq3"
+(define_insn "*ashl<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand"              "=r,r,r,r,!d,r,r")
+        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0,0,0,0 ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,L,P,K,n ,n,Qm")))]
+  ""
+  {
+    return ashlqi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "5,0,1,2,4,6,9")
+   (set_attr "adjust_len" "ashlqi")
+   (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")])
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:ALL2 0 "register_operand"              "=r,r,r,r,r,r,r")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))]
+  ""
+  {
+    return ashlhi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "6,0,2,2,4,10,10")
+   (set_attr "adjust_len" "ashlhi")
+   (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")])
+
+
+;; Insns like the following are generated when (implicitly) extending 8-bit shifts
+;; like char1 = char2 << char3.  Only the low-byte is needed in that situation.
+
+;; "*ashluqihiqi3"
+;; "*ashlsqihiqi3"
+(define_insn_and_split "*ashl<extend_su>qihiqi3"
+  [(set (match_operand:QI 0 "register_operand"                                     "=r")
+        (subreg:QI (ashift:HI (any_extend:HI (match_operand:QI 1 "register_operand" "0"))
+                              (match_operand:QI 2 "register_operand"                "r"))
+                   0))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0)
+        (ashift:QI (match_dup 1)
+                   (match_dup 2)))])
+
+;; ??? Combiner does not recognize that it could split the following insn;
+;;     presumably because he has no register handy?
+
+;; "*ashluqihiqi3.mem"
+;; "*ashlsqihiqi3.mem"
+(define_insn_and_split "*ashl<extend_su>qihiqi3.mem"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+        (subreg:QI (ashift:HI (any_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                              (match_operand:QI 2 "register_operand" "r"))
+                   0))]
+  "!reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (ashift:QI (match_dup 1)
+                   (match_dup 2)))
+   (set (match_dup 0)
+        (match_dup 3))]
+  {
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+;; Similar.
+
+(define_insn_and_split "*ashlhiqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r")
+        (subreg:QI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+                              (match_operand:QI 2 "register_operand" "r")) 0))]
+  "!reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 4)
+        (ashift:QI (match_dup 3)
+                   (match_dup 2)))
+   (set (match_dup 0)
+        (match_dup 4))]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[1], HImode, 0);
+    operands[4] = gen_reg_rtx (QImode);
+  })
+
+;; High part of 16-bit shift is unused after the instruction:
+;; No need to compute it, map to 8-bit shift.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+        (ashift:HI (match_dup 0)
+                   (match_operand:QI 1 "register_operand" "")))]
+  ""
+  [(set (match_dup 2)
+        (ashift:QI (match_dup 2)
+                   (match_dup 1)))
+   (clobber (match_dup 3))]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 1);
+
+    if (!peep2_reg_dead_p (1, operands[3]))
+      FAIL;
+
+    operands[2] = simplify_gen_subreg (QImode, operands[0], HImode, 0);
+  })
+
+
+;; "ashlsi3"
+;; "ashlsq3"  "ashlusq3"
+;; "ashlsa3"  "ashlusa3"
+(define_insn "ashl<mode>3"
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r,r,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))]
+  ""
+  {
+    return ashlsi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "8,0,4,4,8,10,12")
+   (set_attr "adjust_len" "ashlsi")
+   (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")])
+
+;; Optimize if a scratch register from LD_REGS happens to be available.
+
+(define_peephole2 ; ashlqi3_l_const4
+  [(set (match_operand:ALL1 0 "l_register_operand" "")
+        (ashift:ALL1 (match_dup 0)
+                     (const_int 4)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4)))
+   (set (match_dup 1) (const_int -16))
+   (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))]
+  {
+    operands[2] = avr_to_int_mode (operands[0]);
+  })
+
+(define_peephole2 ; ashlqi3_l_const5
+  [(set (match_operand:ALL1 0 "l_register_operand" "")
+        (ashift:ALL1 (match_dup 0)
+                     (const_int 5)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4)))
+   (set (match_dup 2) (ashift:QI (match_dup 2) (const_int 1)))
+   (set (match_dup 1) (const_int -32))
+   (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))]
+  {
+    operands[2] = avr_to_int_mode (operands[0]);
+  })
+
+(define_peephole2 ; ashlqi3_l_const6
+  [(set (match_operand:ALL1 0 "l_register_operand" "")
+        (ashift:ALL1 (match_dup 0)
+                     (const_int 6)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4)))
+   (set (match_dup 2) (ashift:QI (match_dup 2) (const_int 2)))
+   (set (match_dup 1) (const_int -64))
+   (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))]
+  {
+    operands[2] = avr_to_int_mode (operands[0]);
+  })
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:ALL2 0 "register_operand" "")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "")
+                     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ashift:ALL2 (match_dup 1)
+                                (match_dup 2)))
+              (clobber (match_dup 3))])])
+
+;; "*ashlhi3_const"
+;; "*ashlhq3_const"  "*ashluhq3_const"
+;; "*ashlha3_const"  "*ashluha3_const"
+(define_insn "*ashl<mode>3_const"
+  [(set (match_operand:ALL2 0 "register_operand"              "=r,r,r,r,r")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
+                     (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
+   (clobber (match_scratch:QI 3                               "=X,X,X,X,&d"))]
+  "reload_completed"
+  {
+    return ashlhi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "0,2,2,4,10")
+   (set_attr "adjust_len" "ashlhi")
+   (set_attr "cc" "none,set_n,clobber,set_n,clobber")])
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:ALL4 0 "register_operand" "")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "")
+                     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ashift:ALL4 (match_dup 1)
+                                (match_dup 2)))
+              (clobber (match_dup 3))])])
+
+;; "*ashlsi3_const"
+;; "*ashlsq3_const"  "*ashlusq3_const"
+;; "*ashlsa3_const"  "*ashlusa3_const"
+(define_insn "*ashl<mode>3_const"
+  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r,0")
+                     (match_operand:QI 2 "const_int_operand"   "L,P,O,n")))
+   (clobber (match_scratch:QI 3                               "=X,X,X,&d"))]
+  "reload_completed"
+  {
+    return ashlsi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "0,4,4,10")
+   (set_attr "adjust_len" "ashlsi")
+   (set_attr "cc" "none,set_n,clobber,clobber")])
+
+(define_expand "ashlpsi3"
+  [(parallel [(set (match_operand:PSI 0 "register_operand"             "")
+                   (ashift:PSI (match_operand:PSI 1 "register_operand" "")
+                               (match_operand:QI 2 "nonmemory_operand" "")))
+              (clobber (scratch:QI))])]
+  ""
+  {
+    if (AVR_HAVE_MUL
+        && CONST_INT_P (operands[2]))
+      {
+        if (IN_RANGE (INTVAL (operands[2]), 3, 6))
+          {
+            rtx xoffset = force_reg (QImode, gen_int_mode (1 << INTVAL (operands[2]), QImode));
+            emit_insn (gen_mulsqipsi3 (operands[0], xoffset, operands[1]));
+            DONE;
+          }
+        else if (optimize_insn_for_speed_p ()
+                 && INTVAL (operands[2]) != 16
+                 && IN_RANGE (INTVAL (operands[2]), 9, 22))
+          {
+            rtx xoffset = force_reg (PSImode, gen_int_mode (1 << INTVAL (operands[2]), PSImode));
+            emit_insn (gen_mulpsi3 (operands[0], operands[1], xoffset));
+            DONE;
+          }
+      }
+  })
+
+(define_insn "*ashlpsi3"
+  [(set (match_operand:PSI 0 "register_operand"             "=r,r,r,r")
+        (ashift:PSI (match_operand:PSI 1 "register_operand"  "0,0,r,0")
+                    (match_operand:QI 2 "nonmemory_operand"  "r,P,O,n")))
+   (clobber (match_scratch:QI 3                             "=X,X,X,&d"))]
+  ""
+  {
+    return avr_out_ashlpsi3 (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "ashlpsi")
+   (set_attr "cc" "clobber")])
+
+;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >>
+;; arithmetic shift right
+
+;; "ashrqi3"
+;; "ashrqq3"  "ashruqq3"
+(define_insn "ashr<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r          ,r      ,r")
+        (ashiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,0          ,0      ,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C03 C04 C05,C06 C07,Qm")))]
+  ""
+  {
+    return ashrqi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "5,0,1,2,5,4,9")
+   (set_attr "adjust_len" "ashrqi")
+   (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,clobber,clobber")])
+
+;; "ashrhi3"
+;; "ashrhq3"  "ashruhq3"
+;; "ashrha3"  "ashruha3"
+(define_insn "ashr<mode>3"
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r,r,r")
+        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
+                       (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))]
+  ""
+  {
+    return ashrhi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "6,0,2,4,4,10,10")
+   (set_attr "adjust_len" "ashrhi")
+   (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")])
+
+(define_insn "ashrpsi3"
+  [(set (match_operand:PSI 0 "register_operand"                 "=r,r,r,r,r")
+        (ashiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0,0,r,0")
+                      (match_operand:QI 2 "nonmemory_operand"    "r,P,K,O,n")))
+   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))]
+  ""
+  {
+    return avr_out_ashrpsi3 (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "ashrpsi")
+   (set_attr "cc" "clobber")])
+
+;; "ashrsi3"
+;; "ashrsq3"  "ashrusq3"
+;; "ashrsa3"  "ashrusa3"
+(define_insn "ashr<mode>3"
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r,r,r,r,r,r")
+        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))]
+  ""
+  {
+    return ashrsi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "8,0,4,6,8,10,12")
+   (set_attr "adjust_len" "ashrsi")
+   (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")])
+
+;; Optimize if a scratch register from LD_REGS happens to be available.
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:ALL2 0 "register_operand" "")
+        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "")
+                       (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ashiftrt:ALL2 (match_dup 1)
+                                  (match_dup 2)))
+              (clobber (match_dup 3))])])
+
+;; "*ashrhi3_const"
+;; "*ashrhq3_const"  "*ashruhq3_const"
+;; "*ashrha3_const"  "*ashruha3_const"
+(define_insn "*ashr<mode>3_const"
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r")
+        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
+                       (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
+   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))]
+  "reload_completed"
+  {
+    return ashrhi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "0,2,4,4,10")
+   (set_attr "adjust_len" "ashrhi")
+   (set_attr "cc" "none,clobber,set_n,clobber,clobber")])
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:ALL4 0 "register_operand" "")
+        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "")
+                       (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ashiftrt:ALL4 (match_dup 1)
+                                  (match_dup 2)))
+              (clobber (match_dup 3))])])
+
+;; "*ashrsi3_const"
+;; "*ashrsq3_const"  "*ashrusq3_const"
+;; "*ashrsa3_const"  "*ashrusa3_const"
+(define_insn "*ashr<mode>3_const"
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r")
+        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r,0")
+                       (match_operand:QI 2 "const_int_operand"   "L,P,O,n")))
+   (clobber (match_scratch:QI 3                                 "=X,X,X,&d"))]
+  "reload_completed"
+  {
+    return ashrsi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "0,4,4,10")
+   (set_attr "adjust_len" "ashrsi")
+   (set_attr "cc" "none,clobber,set_n,clobber")])
+
+;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >>
+;; logical shift right
+
+;; "lshrqi3"
+;; "lshrqq3 "lshruqq3"
+(define_expand "lshr<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand" "")
+        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "")
+                       (match_operand:QI 2 "nop_general_operand" "")))])
+
+(define_split	; lshrqi3_const4
+  [(set (match_operand:ALL1 0 "d_register_operand" "")
+        (lshiftrt:ALL1 (match_dup 0)
+                       (const_int 4)))]
+  ""
+  [(set (match_dup 1)
+        (rotate:QI (match_dup 1)
+                   (const_int 4)))
+   (set (match_dup 1)
+        (and:QI (match_dup 1)
+                (const_int 15)))]
+  {
+    operands[1] = avr_to_int_mode (operands[0]);
+  })
+
+(define_split	; lshrqi3_const5
+  [(set (match_operand:ALL1 0 "d_register_operand" "")
+        (lshiftrt:ALL1 (match_dup 0)
+                       (const_int 5)))]
+  ""
+  [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4)))
+   (set (match_dup 1) (lshiftrt:QI (match_dup 1) (const_int 1)))
+   (set (match_dup 1) (and:QI (match_dup 1) (const_int 7)))]
+  {
+    operands[1] = avr_to_int_mode (operands[0]);
+  })
+
+(define_split	; lshrqi3_const6
+  [(set (match_operand:QI 0 "d_register_operand" "")
+        (lshiftrt:QI (match_dup 0)
+                     (const_int 6)))]
+  ""
+  [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4)))
+   (set (match_dup 1) (lshiftrt:QI (match_dup 1) (const_int 2)))
+   (set (match_dup 1) (and:QI (match_dup 1) (const_int 3)))]
+  {
+    operands[1] = avr_to_int_mode (operands[0]);
+  })
+
+;; "*lshrqi3"
+;; "*lshrqq3"
+;; "*lshruqq3"
+(define_insn "*lshr<mode>3"
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,!d,r,r")
+        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,0 ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,n ,n,Qm")))]
+  ""
+  {
+    return lshrqi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "5,0,1,2,4,6,9")
+   (set_attr "adjust_len" "lshrqi")
+   (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")])
+
+;; "lshrhi3"
+;; "lshrhq3"  "lshruhq3"
+;; "lshrha3"  "lshruha3"
+(define_insn "lshr<mode>3"
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r,r,r")
+        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"    "0,0,0,r,0,0,0")
+                       (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))]
+  ""
+  {
+    return lshrhi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "6,0,2,2,4,10,10")
+   (set_attr "adjust_len" "lshrhi")
+   (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")])
+
+(define_insn "lshrpsi3"
+  [(set (match_operand:PSI 0 "register_operand"                 "=r,r,r,r,r")
+        (lshiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0,r,0,0")
+                      (match_operand:QI 2 "nonmemory_operand"    "r,P,O,K,n")))
+   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))]
+  ""
+  {
+    return avr_out_lshrpsi3 (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "lshrpsi")
+   (set_attr "cc" "clobber")])
+
+;; "lshrsi3"
+;; "lshrsq3"  "lshrusq3"
+;; "lshrsa3"  "lshrusa3"
+(define_insn "lshr<mode>3"
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r,r,r,r,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))]
+  ""
+  {
+    return lshrsi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "8,0,4,4,8,10,12")
+   (set_attr "adjust_len" "lshrsi")
+   (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")])
+
+;; Optimize if a scratch register from LD_REGS happens to be available.
+
+(define_peephole2 ; lshrqi3_l_const4
+  [(set (match_operand:ALL1 0 "l_register_operand" "")
+        (lshiftrt:ALL1 (match_dup 0)
+                       (const_int 4)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4)))
+   (set (match_dup 1) (const_int 15))
+   (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))]
+  {
+    operands[2] = avr_to_int_mode (operands[0]);
+  })
+
+(define_peephole2 ; lshrqi3_l_const5
+  [(set (match_operand:ALL1 0 "l_register_operand" "")
+        (lshiftrt:ALL1 (match_dup 0)
+                       (const_int 5)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4)))
+   (set (match_dup 2) (lshiftrt:QI (match_dup 2) (const_int 1)))
+   (set (match_dup 1) (const_int 7))
+   (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))]
+  {
+    operands[2] = avr_to_int_mode (operands[0]);
+  })
+
+(define_peephole2 ; lshrqi3_l_const6
+  [(set (match_operand:ALL1 0 "l_register_operand" "")
+        (lshiftrt:ALL1 (match_dup 0)
+                       (const_int 6)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4)))
+   (set (match_dup 2) (lshiftrt:QI (match_dup 2) (const_int 2)))
+   (set (match_dup 1) (const_int 3))
+   (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))]
+  {
+    operands[2] = avr_to_int_mode (operands[0]);
+  })
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:ALL2 0 "register_operand" "")
+        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "")
+                       (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (lshiftrt:ALL2 (match_dup 1)
+                                  (match_dup 2)))
+              (clobber (match_dup 3))])])
+
+;; "*lshrhi3_const"
+;; "*lshrhq3_const"  "*lshruhq3_const"
+;; "*lshrha3_const"  "*lshruha3_const"
+(define_insn "*lshr<mode>3_const"
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r")
+        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
+                       (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
+   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))]
+  "reload_completed"
+  {
+    return lshrhi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "0,2,2,4,10")
+   (set_attr "adjust_len" "lshrhi")
+   (set_attr "cc" "none,clobber,clobber,clobber,clobber")])
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:ALL4 0 "register_operand" "")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "")
+                       (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (lshiftrt:ALL4 (match_dup 1)
+                                  (match_dup 2)))
+              (clobber (match_dup 3))])])
+
+;; "*lshrsi3_const"
+;; "*lshrsq3_const"  "*lshrusq3_const"
+;; "*lshrsa3_const"  "*lshrusa3_const"
+(define_insn "*lshr<mode>3_const"
+  [(set (match_operand:ALL4 0 "register_operand"               "=r,r,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r,0")
+                       (match_operand:QI 2 "const_int_operand"  "L,P,O,n")))
+   (clobber (match_scratch:QI 3                                "=X,X,X,&d"))]
+  "reload_completed"
+  {
+    return lshrsi3_out (insn, operands, NULL);
+  }
+  [(set_attr "length" "0,4,4,10")
+   (set_attr "adjust_len" "lshrsi")
+   (set_attr "cc" "none,clobber,clobber,clobber")])
+
+;; abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x)
+;; abs
+
+(define_insn "absqi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (abs:QI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "sbrc %0,7
+	neg %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=d,r")
+        (abs:SF (match_operand:SF 1 "register_operand" "0,0")))]
+  ""
+  "@
+	andi %D0,0x7f
+	clt\;bld %D0,7"
+  [(set_attr "length" "1,2")
+   (set_attr "cc" "set_n,clobber")])
+
+;; 0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x
+;; neg
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (neg:QI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "neg %0"
+  [(set_attr "length" "1")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*negqihi2"
+  [(set (match_operand:HI 0 "register_operand"                        "=r")
+        (neg:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0"))))]
+  ""
+  "clr %B0\;neg %A0\;brge .+2\;com %B0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "register_operand"        "=r,&r")
+        (neg:HI (match_operand:HI 1 "register_operand" "0,r")))]
+  ""
+  "@
+	neg %B0\;neg %A0\;sbc %B0,__zero_reg__
+	clr %A0\;clr %B0\;sub %A0,%A1\;sbc %B0,%B1"
+  [(set_attr "length" "3,4")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "negpsi2"
+  [(set (match_operand:PSI 0 "register_operand"        "=!d,r,&r")
+        (neg:PSI (match_operand:PSI 1 "register_operand" "0,0,r")))]
+  ""
+  "@
+	com %C0\;com %B0\;neg %A0\;sbci %B0,-1\;sbci %C0,-1
+	com %C0\;com %B0\;com %A0\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__
+	clr %A0\;clr %B0\;clr %C0\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1"
+  [(set_attr "length" "5,6,6")
+   (set_attr "cc" "set_czn,set_n,set_czn")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"       "=!d,r,&r,&r")
+        (neg:SI (match_operand:SI 1 "register_operand" "0,0,r ,r")))]
+  ""
+  "@
+	com %D0\;com %C0\;com %B0\;neg %A0\;sbci %B0,lo8(-1)\;sbci %C0,lo8(-1)\;sbci %D0,lo8(-1)
+	com %D0\;com %C0\;com %B0\;com %A0\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__
+	clr %A0\;clr %B0\;clr %C0\;clr %D0\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1\;sbc %D0,%D1
+	clr %A0\;clr %B0\;movw %C0,%A0\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1\;sbc %D0,%D1"
+  [(set_attr "length" "7,8,8,7")
+   (set_attr "isa"    "*,*,mov,movw")
+   (set_attr "cc" "set_czn,set_n,set_czn,set_czn")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=d,r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0,0")))]
+  ""
+  "@
+	subi %D0,0x80
+	bst %D0,7\;com %D0\;bld %D0,7\;com %D0"
+  [(set_attr "length" "1,4")
+   (set_attr "cc" "set_n,set_n")])
+
+;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+;; not
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (not:QI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "com %0"
+  [(set_attr "length" "1")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (not:HI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "com %0
+	com %B0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "one_cmplpsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=r")
+        (not:PSI (match_operand:PSI 1 "register_operand" "0")))]
+  ""
+  "com %0\;com %B0\;com %C0"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_n")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (not:SI (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "com %0
+	com %B0
+	com %C0
+	com %D0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
+;; sign extend
+
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends.  A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't be combined to a widening
+;; multiplication.  There is no need for combine to propagate hard registers,
+;; register allocation can do it just as well.
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
+  ""
+  "@
+	clr %B0\;sbrc %0,7\;com %B0
+	mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0"
+  [(set_attr "length" "3,4")
+   (set_attr "cc" "set_n,set_n")])
+
+(define_insn "extendqipsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=r,r")
+        (sign_extend:PSI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
+  ""
+  "@
+	clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0
+	mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0"
+  [(set_attr "length" "4,5")
+   (set_attr "cc" "set_n,set_n")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
+  ""
+  "@
+	clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
+	mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0"
+  [(set_attr "length" "5,6")
+   (set_attr "cc" "set_n,set_n")])
+
+(define_insn "extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand"                               "=r,r ,r")
+        (sign_extend:PSI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r,*r")))]
+  ""
+  "@
+	clr %C0\;sbrc %B0,7\;com %C0
+	mov %A0,%A1\;mov %B0,%B1\;clr %C0\;sbrc %B0,7\;com %C0
+	movw %A0,%A1\;clr %C0\;sbrc %B0,7\;com %C0"
+  [(set_attr "length" "3,5,4")
+   (set_attr "isa" "*,mov,movw")
+   (set_attr "cc" "set_n")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                               "=r,r ,r")
+        (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r,*r")))]
+  ""
+  "@
+	clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
+	mov %A0,%A1\;mov %B0,%B1\;clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
+	movw %A0,%A1\;clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0"
+  [(set_attr "length" "4,6,5")
+   (set_attr "isa" "*,mov,movw")
+   (set_attr "cc" "set_n")])
+
+(define_insn "extendpsisi2"
+  [(set (match_operand:SI 0 "register_operand"                                "=r")
+        (sign_extend:SI (match_operand:PSI 1 "combine_pseudo_register_operand" "0")))]
+  ""
+  "clr %D0\;sbrc %C0,7\;com %D0"
+  [(set_attr "length" "3")
+   (set_attr "cc" "set_n")])
+
+;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
+;; zero extend
+
+(define_insn_and_split "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+  {
+    unsigned int low_off = subreg_lowpart_offset (QImode, HImode);
+    unsigned int high_off = subreg_highpart_offset (QImode, HImode);
+
+    operands[2] = simplify_gen_subreg (QImode, operands[0], HImode, low_off);
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, high_off);
+  })
+
+(define_insn_and_split "zero_extendqipsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=r")
+        (zero_extend:PSI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))
+   (set (match_dup 4) (const_int 0))]
+  {
+    operands[2] = simplify_gen_subreg (QImode, operands[0], PSImode, 0);
+    operands[3] = simplify_gen_subreg (QImode, operands[0], PSImode, 1);
+    operands[4] = simplify_gen_subreg (QImode, operands[0], PSImode, 2);
+  })
+
+(define_insn_and_split "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (zero_extend:HI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  {
+    unsigned int low_off = subreg_lowpart_offset (HImode, SImode);
+    unsigned int high_off = subreg_highpart_offset (HImode, SImode);
+
+    operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off);
+    operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off);
+  })
+
+(define_insn_and_split "zero_extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand"                               "=r")
+        (zero_extend:PSI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+  {
+    operands[2] = simplify_gen_subreg (HImode, operands[0], PSImode, 0);
+    operands[3] = simplify_gen_subreg (QImode, operands[0], PSImode, 2);
+  })
+
+(define_insn_and_split "n_extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand"            "=r,r,d,r")
+        (lo_sum:PSI (match_operand:QI 1 "const_int_operand" "L,P,n,n")
+                    (match_operand:HI 2 "register_operand"  "r,r,r,r")))
+   (clobber (match_scratch:QI 3                            "=X,X,X,&d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 3) (match_dup 6))
+   ; no-op move in the case where no scratch is needed
+   (set (match_dup 5) (match_dup 3))]
+  {
+    operands[4] = simplify_gen_subreg (HImode, operands[0], PSImode, 0);
+    operands[5] = simplify_gen_subreg (QImode, operands[0], PSImode, 2);
+    operands[6] = operands[1];
+
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = operands[5];
+  })
+
+(define_insn_and_split "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                               "=r")
+        (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+  {
+    unsigned int low_off = subreg_lowpart_offset (HImode, SImode);
+    unsigned int high_off = subreg_highpart_offset (HImode, SImode);
+
+    operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off);
+    operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off);
+  })
+
+(define_insn_and_split "zero_extendpsisi2"
+  [(set (match_operand:SI 0 "register_operand"                                "=r")
+        (zero_extend:SI (match_operand:PSI 1 "combine_pseudo_register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+  {
+    operands[2] = simplify_gen_subreg (PSImode, operands[0], SImode, 0);
+    operands[3] = simplify_gen_subreg (QImode, operands[0], SImode, 3);
+  })
+
+(define_insn_and_split "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  {
+    unsigned int low_off = subreg_lowpart_offset (SImode, DImode);
+    unsigned int high_off = subreg_highpart_offset (SImode, DImode);
+
+    operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off);
+    operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off);
+  })
+
+(define_insn_and_split "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  {
+    unsigned int low_off = subreg_lowpart_offset (SImode, DImode);
+    unsigned int high_off = subreg_highpart_offset (SImode, DImode);
+
+    operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off);
+    operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off);
+  })
+
+(define_insn_and_split "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+  {
+    unsigned int low_off = subreg_lowpart_offset (SImode, DImode);
+    unsigned int high_off = subreg_highpart_offset (SImode, DImode);
+
+    operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off);
+    operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off);
+  })
+
+;;<=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=>
+;; compare
+
+; Optimize negated tests into reverse compare if overflow is undefined.
+(define_insn "*negated_tstqi"
+  [(set (cc0)
+        (compare (neg:QI (match_operand:QI 0 "register_operand" "r"))
+                 (const_int 0)))]
+  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "cp __zero_reg__,%0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*reversed_tstqi"
+  [(set (cc0)
+        (compare (const_int 0)
+                 (match_operand:QI 0 "register_operand" "r")))]
+  ""
+  "cp __zero_reg__,%0"
+[(set_attr "cc" "compare")
+ (set_attr "length" "2")])
+
+(define_insn "*negated_tsthi"
+  [(set (cc0)
+        (compare (neg:HI (match_operand:HI 0 "register_operand" "r"))
+                 (const_int 0)))]
+  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0"
+[(set_attr "cc" "compare")
+ (set_attr "length" "2")])
+
+;; Leave here the clobber used by the cmphi pattern for simplicity, even
+;; though it is unused, because this pattern is synthesized by avr_reorg.
+(define_insn "*reversed_tsthi"
+  [(set (cc0)
+        (compare (const_int 0)
+                 (match_operand:HI 0 "register_operand" "r")))
+   (clobber (match_scratch:QI 1 "=X"))]
+  ""
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0"
+[(set_attr "cc" "compare")
+ (set_attr "length" "2")])
+
+(define_insn "*negated_tstpsi"
+  [(set (cc0)
+        (compare (neg:PSI (match_operand:PSI 0 "register_operand" "r"))
+                 (const_int 0)))]
+  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "cp __zero_reg__,%A0\;cpc __zero_reg__,%B0\;cpc __zero_reg__,%C0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "3")])
+
+(define_insn "*reversed_tstpsi"
+  [(set (cc0)
+        (compare (const_int 0)
+                 (match_operand:PSI 0 "register_operand" "r")))
+   (clobber (match_scratch:QI 1 "=X"))]
+  ""
+  "cp __zero_reg__,%A0\;cpc __zero_reg__,%B0\;cpc __zero_reg__,%C0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "3")])
+
+(define_insn "*negated_tstsi"
+  [(set (cc0)
+        (compare (neg:SI (match_operand:SI 0 "register_operand" "r"))
+                 (const_int 0)))]
+  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0
+	cpc __zero_reg__,%C0
+	cpc __zero_reg__,%D0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "4")])
+
+;; "*reversed_tstsi"
+;; "*reversed_tstsq" "*reversed_tstusq"
+;; "*reversed_tstsa" "*reversed_tstusa"
+(define_insn "*reversed_tst<mode>"
+  [(set (cc0)
+        (compare (match_operand:ALL4 0 "const0_operand"   "Y00")
+                 (match_operand:ALL4 1 "register_operand" "r")))
+   (clobber (match_scratch:QI 2 "=X"))]
+  ""
+  "cp __zero_reg__,%A1
+	cpc __zero_reg__,%B1
+	cpc __zero_reg__,%C1
+	cpc __zero_reg__,%D1"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "4")])
+
+
+;; "*cmpqi"
+;; "*cmpqq" "*cmpuqq"
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+        (compare (match_operand:ALL1 0 "register_operand"  "r  ,r,d")
+                 (match_operand:ALL1 1 "nonmemory_operand" "Y00,r,i")))]
+  ""
+  "@
+	tst %0
+	cp %0,%1
+	cpi %0,lo8(%1)"
+  [(set_attr "cc" "compare,compare,compare")
+   (set_attr "length" "1,1,1")])
+
+(define_insn "*cmpqi_sign_extend"
+  [(set (cc0)
+        (compare (sign_extend:HI (match_operand:QI 0 "register_operand" "d"))
+                 (match_operand:HI 1 "s8_operand"                       "n")))]
+  ""
+  "cpi %0,lo8(%1)"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "1")])
+
+;; "*cmphi"
+;; "*cmphq" "*cmpuhq"
+;; "*cmpha" "*cmpuha"
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+        (compare (match_operand:ALL2 0 "register_operand"  "!w  ,r  ,r,d ,r  ,d,r")
+                 (match_operand:ALL2 1 "nonmemory_operand"  "Y00,Y00,r,s ,s  ,M,n Ynn")))
+   (clobber (match_scratch:QI 2                            "=X  ,X  ,X,&d,&d ,X,&d"))]
+  ""
+  {
+    switch (which_alternative)
+      {
+      case 0:
+      case 1:
+        return avr_out_tsthi (insn, operands, NULL);
+
+      case 2:
+        return "cp %A0,%A1\;cpc %B0,%B1";
+
+      case 3:
+        if (<MODE>mode != HImode)
+          break;
+        return reg_unused_after (insn, operands[0])
+               ? "subi %A0,lo8(%1)\;sbci %B0,hi8(%1)"
+               : "ldi %2,hi8(%1)\;cpi %A0,lo8(%1)\;cpc %B0,%2";
+
+      case 4:
+        if (<MODE>mode != HImode)
+          break;
+        return "ldi %2,lo8(%1)\;cp %A0,%2\;ldi %2,hi8(%1)\;cpc %B0,%2";
+      }
+
+    return avr_out_compare (insn, operands, NULL);
+  }
+  [(set_attr "cc" "compare")
+   (set_attr "length" "1,2,2,3,4,2,4")
+   (set_attr "adjust_len" "tsthi,tsthi,*,*,*,compare,compare")])
+
+(define_insn "*cmppsi"
+  [(set (cc0)
+        (compare (match_operand:PSI 0 "register_operand"  "r,r,d ,r  ,d,r")
+                 (match_operand:PSI 1 "nonmemory_operand" "L,r,s ,s  ,M,n")))
+   (clobber (match_scratch:QI 2                          "=X,X,&d,&d ,X,&d"))]
+  ""
+  {
+    switch (which_alternative)
+      {
+      case 0:
+        return avr_out_tstpsi (insn, operands, NULL);
+
+      case 1:
+        return "cp %A0,%A1\;cpc %B0,%B1\;cpc %C0,%C1";
+
+      case 2:
+        return reg_unused_after (insn, operands[0])
+               ? "subi %A0,lo8(%1)\;sbci %B0,hi8(%1)\;sbci %C0,hh8(%1)"
+               : "cpi %A0,lo8(%1)\;ldi %2,hi8(%1)\;cpc %B0,%2\;ldi %2,hh8(%1)\;cpc %C0,%2";
+
+      case 3:
+        return "ldi %2,lo8(%1)\;cp %A0,%2\;ldi %2,hi8(%1)\;cpc %B0,%2\;ldi %2,hh8(%1)\;cpc %C0,%2";
+      }
+
+    return avr_out_compare (insn, operands, NULL);
+  }
+  [(set_attr "cc" "compare")
+   (set_attr "length" "3,3,5,6,3,7")
+   (set_attr "adjust_len" "tstpsi,*,*,*,compare,compare")])
+
+;; "*cmpsi"
+;; "*cmpsq" "*cmpusq"
+;; "*cmpsa" "*cmpusa"
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+        (compare (match_operand:ALL4 0 "register_operand"  "r  ,r ,d,r ,r")
+                 (match_operand:ALL4 1 "nonmemory_operand" "Y00,r ,M,M ,n Ynn")))
+   (clobber (match_scratch:QI 2                           "=X  ,X ,X,&d,&d"))]
+  ""
+  {
+    if (0 == which_alternative)
+      return avr_out_tstsi (insn, operands, NULL);
+    else if (1 == which_alternative)
+      return "cp %A0,%A1\;cpc %B0,%B1\;cpc %C0,%C1\;cpc %D0,%D1";
+
+    return avr_out_compare (insn, operands, NULL);
+  }
+  [(set_attr "cc" "compare")
+   (set_attr "length" "4,4,4,5,8")
+   (set_attr "adjust_len" "tstsi,*,compare,compare,compare")])
+
+
+;; ----------------------------------------------------------------------
+;; JUMP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+;; Conditional jump instructions
+
+;; "cbranchqi4"
+;; "cbranchqq4"  "cbranchuqq4"
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:ALL1 1 "register_operand" "")
+                 (match_operand:ALL1 2 "nonmemory_operand" "")))
+   (set (pc)
+        (if_then_else
+         (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                          (const_int 0)])
+         (label_ref (match_operand 3 "" ""))
+         (pc)))])
+
+;; "cbranchhi4"  "cbranchhq4"  "cbranchuhq4"  "cbranchha4"  "cbranchuha4"
+;; "cbranchsi4"  "cbranchsq4"  "cbranchusq4"  "cbranchsa4"  "cbranchusa4"
+;; "cbranchpsi4"
+(define_expand "cbranch<mode>4"
+  [(parallel [(set (cc0)
+                   (compare (match_operand:ORDERED234 1 "register_operand" "")
+                            (match_operand:ORDERED234 2 "nonmemory_operand" "")))
+              (clobber (match_scratch:QI 4 ""))])
+   (set (pc)
+        (if_then_else
+         (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                          (const_int 0)])
+         (label_ref (match_operand 3 "" ""))
+         (pc)))])
+
+
+;; Test a single bit in a QI/HI/SImode register.
+;; Combine will create zero extract patterns for single bit tests.
+;; permit any mode in source pattern by using VOIDmode.
+
+(define_insn "*sbrx_branch<mode>"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "eqne_operator"
+                         [(zero_extract:QIDI
+                           (match_operand:VOID 1 "register_operand" "r")
+                           (const_int 1)
+                           (match_operand 2 "const_int_operand" "n"))
+                          (const_int 0)])
+         (label_ref (match_operand 3 "" ""))
+         (pc)))]
+  ""
+  {
+    return avr_out_sbxx_branch (insn, operands);
+  }
+  [(set (attr "length")
+        (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+                           (le (minus (pc) (match_dup 3)) (const_int 2046)))
+                      (const_int 2)
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 2)
+                                    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Same test based on bitwise AND.  Keep this in case gcc changes patterns.
+;; or for old peepholes.
+;; Fixme - bitwise Mask will not work for DImode
+
+(define_insn "*sbrx_and_branch<mode>"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "eqne_operator"
+                         [(and:QISI
+                           (match_operand:QISI 1 "register_operand" "r")
+                           (match_operand:QISI 2 "single_one_operand" "n"))
+                          (const_int 0)])
+         (label_ref (match_operand 3 "" ""))
+         (pc)))]
+  ""
+  {
+    HOST_WIDE_INT bitnumber;
+    bitnumber = exact_log2 (GET_MODE_MASK (<MODE>mode) & INTVAL (operands[2]));
+    operands[2] = GEN_INT (bitnumber);
+    return avr_out_sbxx_branch (insn, operands);
+  }
+  [(set (attr "length")
+        (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+                           (le (minus (pc) (match_dup 3)) (const_int 2046)))
+                      (const_int 2)
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 2)
+                                    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Convert sign tests to bit 7/15/31 tests that match the above insns.
+(define_peephole2
+  [(set (cc0) (compare (match_operand:QI 0 "register_operand" "")
+                       (const_int 0)))
+   (set (pc) (if_then_else (ge (cc0) (const_int 0))
+                           (label_ref (match_operand 1 "" ""))
+                           (pc)))]
+  ""
+  [(set (pc) (if_then_else (eq (zero_extract:HI (match_dup 0)
+                                                (const_int 1)
+                                                (const_int 7))
+                               (const_int 0))
+                           (label_ref (match_dup 1))
+                           (pc)))])
+
+(define_peephole2
+  [(set (cc0) (compare (match_operand:QI 0 "register_operand" "")
+                       (const_int 0)))
+   (set (pc) (if_then_else (lt (cc0) (const_int 0))
+                           (label_ref (match_operand 1 "" ""))
+                           (pc)))]
+  ""
+  [(set (pc) (if_then_else (ne (zero_extract:HI (match_dup 0)
+                                                (const_int 1)
+                                                (const_int 7))
+                               (const_int 0))
+                           (label_ref (match_dup 1))
+                           (pc)))])
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "")
+                                  (const_int 0)))
+              (clobber (match_operand:HI 2 ""))])
+   (set (pc) (if_then_else (ge (cc0) (const_int 0))
+                           (label_ref (match_operand 1 "" ""))
+                           (pc)))]
+  ""
+  [(set (pc) (if_then_else (eq (and:HI (match_dup 0) (const_int -32768))
+                               (const_int 0))
+                           (label_ref (match_dup 1))
+                           (pc)))])
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "")
+                                  (const_int 0)))
+              (clobber (match_operand:HI 2 ""))])
+   (set (pc) (if_then_else (lt (cc0) (const_int 0))
+                           (label_ref (match_operand 1 "" ""))
+                           (pc)))]
+  ""
+  [(set (pc) (if_then_else (ne (and:HI (match_dup 0) (const_int -32768))
+                               (const_int 0))
+                           (label_ref (match_dup 1))
+                           (pc)))])
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "")
+                                  (const_int 0)))
+              (clobber (match_operand:SI 2 ""))])
+   (set (pc) (if_then_else (ge (cc0) (const_int 0))
+                           (label_ref (match_operand 1 "" ""))
+                           (pc)))]
+  ""
+  [(set (pc) (if_then_else (eq (and:SI (match_dup 0) (match_dup 2))
+                               (const_int 0))
+                           (label_ref (match_dup 1))
+                           (pc)))]
+  "operands[2] = gen_int_mode (-2147483647 - 1, SImode);")
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "")
+                                  (const_int 0)))
+              (clobber (match_operand:SI 2 ""))])
+   (set (pc) (if_then_else (lt (cc0) (const_int 0))
+                           (label_ref (match_operand 1 "" ""))
+                           (pc)))]
+  ""
+  [(set (pc) (if_then_else (ne (and:SI (match_dup 0) (match_dup 2))
+                               (const_int 0))
+                           (label_ref (match_dup 1))
+                           (pc)))]
+  "operands[2] = gen_int_mode (-2147483647 - 1, SImode);")
+
+;; ************************************************************************
+;; Implementation of conditional jumps here.
+;;  Compare with 0 (test) jumps
+;; ************************************************************************
+
+(define_insn "branch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "simple_comparison_operator"
+                                      [(cc0)
+                                       (const_int 0)])
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  ""
+  {
+    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0);
+  }
+  [(set_attr "type" "branch")
+   (set_attr "cc" "clobber")])
+
+
+;; Same as above but wrap SET_SRC so that this branch won't be transformed
+;; or optimized in the remainder.
+
+(define_insn "branch_unspec"
+  [(set (pc)
+        (unspec [(if_then_else (match_operator 1 "simple_comparison_operator"
+                                               [(cc0)
+                                                (const_int 0)])
+                               (label_ref (match_operand 0 "" ""))
+                               (pc))
+                 ] UNSPEC_IDENTITY))]
+  ""
+  {
+    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0);
+  }
+  [(set_attr "type" "branch")
+   (set_attr "cc" "none")])
+
+;; ****************************************************************
+;; AVR does not have following conditional jumps: LE,LEU,GT,GTU.
+;; Convert them all to proper jumps.
+;; ****************************************************************/
+
+(define_insn "difficult_branch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "difficult_comparison_operator"
+                        [(cc0)
+                         (const_int 0)])
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  ""
+  {
+    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0);
+  }
+  [(set_attr "type" "branch1")
+   (set_attr "cc" "clobber")])
+
+;; revers branch
+
+(define_insn "rvbranch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "simple_comparison_operator"
+                                      [(cc0)
+                                       (const_int 0)])
+                      (pc)
+                      (label_ref (match_operand 0 "" ""))))]
+  ""
+  {
+    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1);
+  }
+  [(set_attr "type" "branch1")
+   (set_attr "cc" "clobber")])
+
+(define_insn "difficult_rvbranch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "difficult_comparison_operator"
+                                      [(cc0)
+                                       (const_int 0)])
+                      (pc)
+                      (label_ref (match_operand 0 "" ""))))]
+  ""
+  {
+    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1);
+  }
+  [(set_attr "type" "branch")
+   (set_attr "cc" "clobber")])
+
+;; **************************************************************************
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+  {
+    return AVR_HAVE_JMP_CALL && get_attr_length (insn) != 1
+           ? "jmp %x0"
+           : "rjmp %x0";
+  }
+  [(set (attr "length")
+        (if_then_else (match_operand 0 "symbol_ref_operand" "")	
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 1)
+                                    (const_int 2))
+                      (if_then_else (and (ge (minus (pc) (match_dup 0)) (const_int -2047))
+                                         (le (minus (pc) (match_dup 0)) (const_int 2047)))
+                                    (const_int 1)
+                                    (const_int 2))))
+   (set_attr "cc" "none")])
+
+;; call
+
+;; Operand 1 not used on the AVR.
+;; Operand 2 is 1 for tail-call, 0 otherwise.
+(define_expand "call"
+  [(parallel[(call (match_operand:HI 0 "call_insn_operand" "")
+                   (match_operand:HI 1 "general_operand" ""))
+             (use (const_int 0))])])
+
+;; Operand 1 not used on the AVR.
+;; Operand 2 is 1 for tail-call, 0 otherwise.
+(define_expand "sibcall"
+  [(parallel[(call (match_operand:HI 0 "call_insn_operand" "")
+                   (match_operand:HI 1 "general_operand" ""))
+             (use (const_int 1))])])
+
+;; call value
+
+;; Operand 2 not used on the AVR.
+;; Operand 3 is 1 for tail-call, 0 otherwise.
+(define_expand "call_value"
+  [(parallel[(set (match_operand 0 "register_operand" "")
+                  (call (match_operand:HI 1 "call_insn_operand" "")
+                        (match_operand:HI 2 "general_operand" "")))
+             (use (const_int 0))])])
+
+;; Operand 2 not used on the AVR.
+;; Operand 3 is 1 for tail-call, 0 otherwise.
+(define_expand "sibcall_value"
+  [(parallel[(set (match_operand 0 "register_operand" "")
+                  (call (match_operand:HI 1 "call_insn_operand" "")
+                        (match_operand:HI 2 "general_operand" "")))
+             (use (const_int 1))])])
+
+(define_insn "call_insn"
+  [(parallel[(call (mem:HI (match_operand:HI 0 "nonmemory_operand" "z,s,z,s"))
+                   (match_operand:HI 1 "general_operand"           "X,X,X,X"))
+             (use (match_operand:HI 2 "const_int_operand"          "L,L,P,P"))])]
+  ;; Operand 1 not used on the AVR.
+  ;; Operand 2 is 1 for tail-call, 0 otherwise.
+  ""
+  "@
+    %!icall
+    %~call %x0
+    %!ijmp
+    %~jmp %x0"
+  [(set_attr "cc" "clobber")
+   (set_attr "length" "1,*,1,*")
+   (set_attr "adjust_len" "*,call,*,call")])
+
+(define_insn "call_value_insn"
+  [(parallel[(set (match_operand 0 "register_operand"                   "=r,r,r,r")
+                  (call (mem:HI (match_operand:HI 1 "nonmemory_operand"  "z,s,z,s"))
+                        (match_operand:HI 2 "general_operand"            "X,X,X,X")))
+             (use (match_operand:HI 3 "const_int_operand"                "L,L,P,P"))])]
+  ;; Operand 2 not used on the AVR.
+  ;; Operand 3 is 1 for tail-call, 0 otherwise.
+  ""
+  "@
+    %!icall
+    %~call %x1
+    %!ijmp
+    %~jmp %x1"
+  [(set_attr "cc" "clobber")
+   (set_attr "length" "1,*,1,*")
+   (set_attr "adjust_len" "*,call,*,call")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+; indirect jump
+
+(define_expand "indirect_jump"
+  [(set (pc)
+        (match_operand:HI 0 "nonmemory_operand" ""))]
+  ""
+  {
+    if (!AVR_HAVE_JMP_CALL && !register_operand (operands[0], HImode))
+      {
+        operands[0] = copy_to_mode_reg (HImode, operands[0]);
+      }
+  })
+
+; indirect jump
+(define_insn "*indirect_jump"
+  [(set (pc)
+        (match_operand:HI 0 "nonmemory_operand" "i,i,!z,*r,z"))]
+  ""
+  "@
+	rjmp %x0
+	jmp %x0
+	ijmp
+	push %A0\;push %B0\;ret
+	eijmp"
+  [(set_attr "length" "1,2,1,3,1")
+   (set_attr "isa" "rjmp,jmp,ijmp,ijmp,eijmp")
+   (set_attr "cc" "none")])
+
+;; table jump
+;; For entries in jump table see avr_output_addr_vec_elt.
+
+;; Table made from
+;;    "rjmp .L<n>"   instructions for <= 8K devices
+;;    ".word gs(.L<n>)" addresses for >  8K devices
+(define_insn "*tablejump"
+  [(set (pc)
+        (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")]
+                   UNSPEC_INDEX_JMP))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  ""
+  "@
+	ijmp
+	push %A0\;push %B0\;ret
+	jmp __tablejump2__"
+  [(set_attr "length" "1,3,2")
+   (set_attr "isa" "rjmp,rjmp,jmp")
+   (set_attr "cc" "none,none,clobber")])
+
+
+(define_expand "casesi"
+  [(parallel [(set (match_dup 6)
+                   (minus:HI (subreg:HI (match_operand:SI 0 "register_operand" "") 0)
+                             (match_operand:HI 1 "register_operand" "")))
+              (clobber (scratch:QI))])
+   (parallel [(set (cc0)
+                   (compare (match_dup 6)
+                            (match_operand:HI 2 "register_operand" "")))
+              (clobber (match_scratch:QI 9 ""))])
+
+   (set (pc)
+        (if_then_else (gtu (cc0)
+                           (const_int 0))
+                      (label_ref (match_operand 4 "" ""))
+                      (pc)))
+
+   (set (match_dup 6)
+        (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" ""))))
+
+   (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP))
+              (use (label_ref (match_dup 3)))
+              (clobber (match_dup 6))])]
+  ""
+  {
+    operands[6] = gen_reg_rtx (HImode);
+  })
+
+
+;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+;; This instruction sets Z flag
+
+(define_insn "sez"
+  [(set (cc0) (const_int 0))]
+  ""
+  "sez"
+  [(set_attr "length" "1")
+   (set_attr "cc" "compare")])
+
+;; Clear/set/test a single bit in I/O address space.
+
+(define_insn "*cbi"
+  [(set (mem:QI (match_operand 0 "low_io_address_operand" "n"))
+        (and:QI (mem:QI (match_dup 0))
+                (match_operand:QI 1 "single_zero_operand" "n")))]
+  ""
+  {
+    operands[2] = GEN_INT (exact_log2 (~INTVAL (operands[1]) & 0xff));
+    return "cbi %i0,%2";
+  }
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+(define_insn "*sbi"
+  [(set (mem:QI (match_operand 0 "low_io_address_operand" "n"))
+        (ior:QI (mem:QI (match_dup 0))
+                (match_operand:QI 1 "single_one_operand" "n")))]
+  ""
+  {
+    operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1]) & 0xff));
+    return "sbi %i0,%2";
+  }
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; Lower half of the I/O space - use sbic/sbis directly.
+(define_insn "*sbix_branch"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "eqne_operator"
+                         [(zero_extract:QIHI
+                           (mem:QI (match_operand 1 "low_io_address_operand" "n"))
+                           (const_int 1)
+                           (match_operand 2 "const_int_operand" "n"))
+                          (const_int 0)])
+         (label_ref (match_operand 3 "" ""))
+         (pc)))]
+  ""
+  {
+    return avr_out_sbxx_branch (insn, operands);
+  }
+  [(set (attr "length")
+        (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+                           (le (minus (pc) (match_dup 3)) (const_int 2046)))
+                      (const_int 2)
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 2)
+                                    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Tests of bit 7 are pessimized to sign tests, so we need this too...
+(define_insn "*sbix_branch_bit7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "gelt_operator"
+                         [(mem:QI (match_operand 1 "low_io_address_operand" "n"))
+                          (const_int 0)])
+         (label_ref (match_operand 2 "" ""))
+         (pc)))]
+  ""
+  {
+    operands[3] = operands[2];
+    operands[2] = GEN_INT (7);
+    return avr_out_sbxx_branch (insn, operands);
+  }
+  [(set (attr "length")
+        (if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046))
+                           (le (minus (pc) (match_dup 2)) (const_int 2046)))
+                      (const_int 2)
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 2)
+                                    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Upper half of the I/O space - read port to __tmp_reg__ and use sbrc/sbrs.
+(define_insn "*sbix_branch_tmp"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "eqne_operator"
+                         [(zero_extract:QIHI
+                           (mem:QI (match_operand 1 "high_io_address_operand" "n"))
+                           (const_int 1)
+                           (match_operand 2 "const_int_operand" "n"))
+                          (const_int 0)])
+         (label_ref (match_operand 3 "" ""))
+         (pc)))]
+  ""
+  {
+    return avr_out_sbxx_branch (insn, operands);
+  }
+  [(set (attr "length")
+        (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+                           (le (minus (pc) (match_dup 3)) (const_int 2045)))
+                      (const_int 3)
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 3)
+                                    (const_int 5))))
+   (set_attr "cc" "clobber")])
+
+(define_insn "*sbix_branch_tmp_bit7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "gelt_operator"
+                         [(mem:QI (match_operand 1 "high_io_address_operand" "n"))
+                          (const_int 0)])
+         (label_ref (match_operand 2 "" ""))
+         (pc)))]
+  ""
+  {
+    operands[3] = operands[2];
+    operands[2] = GEN_INT (7);
+    return avr_out_sbxx_branch (insn, operands);
+  }
+  [(set (attr "length")
+        (if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046))
+                           (le (minus (pc) (match_dup 2)) (const_int 2045)))
+                      (const_int 3)
+                      (if_then_else (match_test "!AVR_HAVE_JMP_CALL")
+                                    (const_int 3)
+                                    (const_int 5))))
+   (set_attr "cc" "clobber")])
+
+;; ************************* Peepholes ********************************
+
+(define_peephole ; "*dec-and-branchsi!=-1.d.clobber"
+  [(parallel [(set (match_operand:SI 0 "d_register_operand" "")
+                   (plus:SI (match_dup 0)
+                            (const_int -1)))
+              (clobber (scratch:QI))])
+   (parallel [(set (cc0)
+                   (compare (match_dup 0)
+                            (const_int -1)))
+              (clobber (match_operand:QI 1 "d_register_operand" ""))])
+   (set (pc)
+        (if_then_else (eqne (cc0)
+                            (const_int 0))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+  {
+    const char *op;
+    int jump_mode;
+    CC_STATUS_INIT;
+    if (test_hard_reg_class (ADDW_REGS, operands[0]))
+      output_asm_insn ("sbiw %0,1" CR_TAB
+                       "sbc %C0,__zero_reg__" CR_TAB
+                       "sbc %D0,__zero_reg__", operands);
+    else
+      output_asm_insn ("subi %A0,1" CR_TAB
+                       "sbc %B0,__zero_reg__" CR_TAB
+                       "sbc %C0,__zero_reg__" CR_TAB
+                       "sbc %D0,__zero_reg__", operands);
+
+    jump_mode = avr_jump_mode (operands[2], insn);
+    op = ((EQ == <CODE>) ^ (jump_mode == 1)) ? "brcc" : "brcs";
+    operands[1] = gen_rtx_CONST_STRING (VOIDmode, op);
+
+    switch (jump_mode)
+      {
+      case 1: return "%1 %2";
+      case 2: return "%1 .+2\;rjmp %2";
+      case 3: return "%1 .+4\;jmp %2";
+      }
+
+    gcc_unreachable();
+    return "";
+  })
+
+(define_peephole ; "*dec-and-branchhi!=-1"
+  [(set (match_operand:HI 0 "d_register_operand" "")
+        (plus:HI (match_dup 0)
+                 (const_int -1)))
+   (parallel [(set (cc0)
+                   (compare (match_dup 0)
+                            (const_int -1)))
+              (clobber (match_operand:QI 1 "d_register_operand" ""))])
+   (set (pc)
+        (if_then_else (eqne (cc0)
+                            (const_int 0))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+  {
+    const char *op;
+    int jump_mode;
+    CC_STATUS_INIT;
+    if (test_hard_reg_class (ADDW_REGS, operands[0]))
+      output_asm_insn ("sbiw %0,1", operands);
+    else
+      output_asm_insn ("subi %A0,1" CR_TAB
+                       "sbc %B0,__zero_reg__", operands);
+
+    jump_mode = avr_jump_mode (operands[2], insn);
+    op = ((EQ == <CODE>) ^ (jump_mode == 1)) ? "brcc" : "brcs";
+    operands[1] = gen_rtx_CONST_STRING (VOIDmode, op);
+
+    switch (jump_mode)
+      {
+      case 1: return "%1 %2";
+      case 2: return "%1 .+2\;rjmp %2";
+      case 3: return "%1 .+4\;jmp %2";
+      }
+
+    gcc_unreachable();
+    return "";
+  })
+
+;; Same as above but with clobber flavour of addhi3
+(define_peephole ; "*dec-and-branchhi!=-1.d.clobber"
+  [(parallel [(set (match_operand:HI 0 "d_register_operand" "")
+                   (plus:HI (match_dup 0)
+                            (const_int -1)))
+              (clobber (scratch:QI))])
+   (parallel [(set (cc0)
+                   (compare (match_dup 0)
+                            (const_int -1)))
+              (clobber (match_operand:QI 1 "d_register_operand" ""))])
+   (set (pc)
+        (if_then_else (eqne (cc0)
+                            (const_int 0))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+  {
+    const char *op;
+    int jump_mode;
+    CC_STATUS_INIT;
+    if (test_hard_reg_class (ADDW_REGS, operands[0]))
+      output_asm_insn ("sbiw %0,1", operands);
+    else
+      output_asm_insn ("subi %A0,1" CR_TAB
+                       "sbc %B0,__zero_reg__", operands);
+
+    jump_mode = avr_jump_mode (operands[2], insn);
+    op = ((EQ == <CODE>) ^ (jump_mode == 1)) ? "brcc" : "brcs";
+    operands[1] = gen_rtx_CONST_STRING (VOIDmode, op);
+
+    switch (jump_mode)
+      {
+      case 1: return "%1 %2";
+      case 2: return "%1 .+2\;rjmp %2";
+      case 3: return "%1 .+4\;jmp %2";
+      }
+
+    gcc_unreachable();
+    return "";
+  })
+
+;; Same as above but with clobber flavour of addhi3
+(define_peephole ; "*dec-and-branchhi!=-1.l.clobber"
+  [(parallel [(set (match_operand:HI 0 "l_register_operand" "")
+                   (plus:HI (match_dup 0)
+                            (const_int -1)))
+              (clobber (match_operand:QI 3 "d_register_operand" ""))])
+   (parallel [(set (cc0)
+                   (compare (match_dup 0)
+                            (const_int -1)))
+              (clobber (match_operand:QI 1 "d_register_operand" ""))])
+   (set (pc)
+        (if_then_else (eqne (cc0)
+                            (const_int 0))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+  {
+    const char *op;
+    int jump_mode;
+    CC_STATUS_INIT;
+    output_asm_insn ("ldi %3,1"   CR_TAB
+                     "sub %A0,%3" CR_TAB
+                     "sbc %B0,__zero_reg__", operands);
+
+    jump_mode = avr_jump_mode (operands[2], insn);
+    op = ((EQ == <CODE>) ^ (jump_mode == 1)) ? "brcc" : "brcs";
+    operands[1] = gen_rtx_CONST_STRING (VOIDmode, op);
+
+    switch (jump_mode)
+      {
+      case 1: return "%1 %2";
+      case 2: return "%1 .+2\;rjmp %2";
+      case 3: return "%1 .+4\;jmp %2";
+      }
+
+    gcc_unreachable();
+    return "";
+  })
+
+(define_peephole ; "*dec-and-branchqi!=-1"
+  [(set (match_operand:QI 0 "d_register_operand" "")
+        (plus:QI (match_dup 0)
+                 (const_int -1)))
+   (set (cc0)
+        (compare (match_dup 0)
+                 (const_int -1)))
+   (set (pc)
+        (if_then_else (eqne (cc0)
+                            (const_int 0))
+                      (label_ref (match_operand 1 "" ""))
+                      (pc)))]
+  ""
+  {
+    const char *op;
+    int jump_mode;
+    CC_STATUS_INIT;
+    cc_status.value1 = operands[0];
+    cc_status.flags |= CC_OVERFLOW_UNUSABLE;
+
+    output_asm_insn ("subi %A0,1", operands);
+
+    jump_mode = avr_jump_mode (operands[1], insn);
+    op = ((EQ == <CODE>) ^ (jump_mode == 1)) ? "brcc" : "brcs";
+    operands[0] = gen_rtx_CONST_STRING (VOIDmode, op);
+
+    switch (jump_mode)
+      {
+      case 1: return "%0 %1";
+      case 2: return "%0 .+2\;rjmp %1";
+      case 3: return "%0 .+4\;jmp %1";
+      }
+
+    gcc_unreachable();
+    return "";
+  })
+
+
+(define_peephole ; "*cpse.eq"
+  [(set (cc0)
+        (compare (match_operand:ALL1 1 "register_operand" "r,r")
+                 (match_operand:ALL1 2 "reg_or_0_operand" "r,Y00")))
+   (set (pc)
+        (if_then_else (eq (cc0)
+                          (const_int 0))
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  "jump_over_one_insn_p (insn, operands[0])"
+  "@
+	cpse %1,%2
+	cpse %1,__zero_reg__")
+
+;; This peephole avoids code like
+;;
+;;     TST   Rn     ; *cmpqi
+;;     BREQ  .+2    ; branch
+;;     RJMP  .Lm
+;;
+;; Notice that the peephole is always shorter than cmpqi + branch.
+;; The reason to write it as peephole is that sequences like
+;;
+;;     AND   Rm, Rn
+;;     BRNE  .La
+;;
+;; shall not be superseeded.  With a respective combine pattern
+;; the latter sequence would be
+;;
+;;     AND   Rm, Rn
+;;     CPSE  Rm, __zero_reg__
+;;     RJMP  .La
+;;
+;; and thus longer and slower and not easy to be rolled back.
+
+(define_peephole ; "*cpse.ne"
+  [(set (cc0)
+        (compare (match_operand:ALL1 1 "register_operand" "")
+                 (match_operand:ALL1 2 "reg_or_0_operand" "")))
+   (set (pc)
+        (if_then_else (ne (cc0)
+                          (const_int 0))
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  "!AVR_HAVE_JMP_CALL
+   || !avr_current_device->errata_skip"
+  {
+    if (operands[2] == CONST0_RTX (<MODE>mode))
+      operands[2] = zero_reg_rtx;
+
+    return 3 == avr_jump_mode (operands[0], insn)
+      ? "cpse %1,%2\;jmp %0"
+      : "cpse %1,%2\;rjmp %0";
+  })
+
+;;pppppppppppppppppppppppppppppppppppppppppppppppppppp
+;;prologue/epilogue support instructions
+
+(define_insn "popqi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (mem:QI (pre_inc:HI (reg:HI REG_SP))))]
+  ""
+  "pop %0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+;; Enable Interrupts
+(define_expand "enable_interrupt"
+  [(clobber (const_int 0))]
+  ""
+  {
+    rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (mem) = 1;
+    emit_insn (gen_cli_sei (const1_rtx, mem));
+    DONE;
+  })
+
+;; Disable Interrupts
+(define_expand "disable_interrupt"
+  [(clobber (const_int 0))]
+  ""
+  {
+    rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (mem) = 1;
+    emit_insn (gen_cli_sei (const0_rtx, mem));
+    DONE;
+  })
+
+(define_insn "cli_sei"
+  [(unspec_volatile [(match_operand:QI 0 "const_int_operand" "L,P")]
+                    UNSPECV_ENABLE_IRQS)
+   (set (match_operand:BLK 1 "" "")
+	(unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))]
+  ""
+  "@
+	cli
+	sei"
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;;  Library prologue saves
+(define_insn "call_prologue_saves"
+  [(unspec_volatile:HI [(const_int 0)] UNSPECV_PROLOGUE_SAVES)
+   (match_operand:HI 0 "immediate_operand" "i,i")
+   (set (reg:HI REG_SP)
+        (minus:HI (reg:HI REG_SP)
+                  (match_operand:HI 1 "immediate_operand" "i,i")))
+   (use (reg:HI REG_X))
+   (clobber (reg:HI REG_Z))]
+  ""
+  "ldi r30,lo8(gs(1f))
+	ldi r31,hi8(gs(1f))
+	%~jmp __prologue_saves__+((18 - %0) * 2)
+1:"
+  [(set_attr "length" "5,6")
+   (set_attr "cc" "clobber")
+   (set_attr "isa" "rjmp,jmp")])
+
+;  epilogue  restores using library
+(define_insn "epilogue_restores"
+  [(unspec_volatile:QI [(const_int 0)] UNSPECV_EPILOGUE_RESTORES)
+   (set (reg:HI REG_Y)
+        (plus:HI (reg:HI REG_Y)
+                 (match_operand:HI 0 "immediate_operand" "i,i")))
+   (set (reg:HI REG_SP)
+        (plus:HI (reg:HI REG_Y)
+                 (match_dup 0)))
+   (clobber (reg:QI REG_Z))]
+  ""
+  "ldi r30, lo8(%0)
+	%~jmp __epilogue_restores__ + ((18 - %0) * 2)"
+  [(set_attr "length" "2,3")
+   (set_attr "cc" "clobber")
+   (set_attr "isa" "rjmp,jmp")])
+
+; return
+(define_insn "return"
+  [(return)]
+  "reload_completed && avr_simple_epilogue ()"
+  "ret"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+(define_insn "return_from_epilogue"
+  [(return)]
+  "reload_completed
+   && cfun->machine
+   && !(cfun->machine->is_interrupt || cfun->machine->is_signal)
+   && !cfun->machine->is_naked"
+  "ret"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+(define_insn "return_from_interrupt_epilogue"
+  [(return)]
+  "reload_completed
+   && cfun->machine
+   && (cfun->machine->is_interrupt || cfun->machine->is_signal)
+   && !cfun->machine->is_naked"
+  "reti"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+(define_insn "return_from_naked_epilogue"
+  [(return)]
+  "reload_completed
+   && cfun->machine
+   && cfun->machine->is_naked"
+  ""
+  [(set_attr "cc" "none")
+   (set_attr "length" "0")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  {
+    avr_expand_prologue ();
+    DONE;
+  })
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  {
+    avr_expand_epilogue (false /* sibcall_p */);
+    DONE;
+  })
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+  {
+    avr_expand_epilogue (true /* sibcall_p */);
+    DONE;
+  })
+
+;; Some instructions resp. instruction sequences available
+;; via builtins.
+
+(define_insn "delay_cycles_1"
+  [(unspec_volatile [(match_operand:QI 0 "const_int_operand" "n")
+                     (const_int 1)]
+                    UNSPECV_DELAY_CYCLES)
+   (set (match_operand:BLK 1 "" "")
+	(unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
+   (clobber (match_scratch:QI 2 "=&d"))]
+  ""
+  "ldi %2,lo8(%0)
+	1: dec %2
+	brne 1b"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "delay_cycles_2"
+  [(unspec_volatile [(match_operand:HI 0 "const_int_operand" "n")
+                     (const_int 2)]
+                    UNSPECV_DELAY_CYCLES)
+   (set (match_operand:BLK 1 "" "")
+	(unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
+   (clobber (match_scratch:HI 2 "=&w"))]
+  ""
+  "ldi %A2,lo8(%0)
+	ldi %B2,hi8(%0)
+	1: sbiw %A2,1
+	brne 1b"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_insn "delay_cycles_3"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
+                     (const_int 3)]
+                    UNSPECV_DELAY_CYCLES)
+   (set (match_operand:BLK 1 "" "")
+	(unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
+   (clobber (match_scratch:QI 2 "=&d"))
+   (clobber (match_scratch:QI 3 "=&d"))
+   (clobber (match_scratch:QI 4 "=&d"))]
+  ""
+  "ldi %2,lo8(%0)
+	ldi %3,hi8(%0)
+	ldi %4,hlo8(%0)
+	1: subi %2,1
+	sbci %3,0
+	sbci %4,0
+	brne 1b"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn "delay_cycles_4"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
+                     (const_int 4)]
+                    UNSPECV_DELAY_CYCLES)
+   (set (match_operand:BLK 1 "" "")
+	(unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
+   (clobber (match_scratch:QI 2 "=&d"))
+   (clobber (match_scratch:QI 3 "=&d"))
+   (clobber (match_scratch:QI 4 "=&d"))
+   (clobber (match_scratch:QI 5 "=&d"))]
+  ""
+  "ldi %2,lo8(%0)
+	ldi %3,hi8(%0)
+	ldi %4,hlo8(%0)
+	ldi %5,hhi8(%0)
+	1: subi %2,1
+	sbci %3,0
+	sbci %4,0
+	sbci %5,0
+	brne 1b"
+  [(set_attr "length" "9")
+   (set_attr "cc" "clobber")])
+
+
+;; __builtin_avr_insert_bits
+
+(define_insn "insert_bits"
+  [(set (match_operand:QI 0 "register_operand"              "=r  ,d  ,r")
+        (unspec:QI [(match_operand:SI 1 "const_int_operand"  "C0f,Cxf,C0f")
+                    (match_operand:QI 2 "register_operand"   "r  ,r  ,r")
+                    (match_operand:QI 3 "nonmemory_operand"  "n  ,0  ,0")]
+                   UNSPEC_INSERT_BITS))]
+  ""
+  {
+    return avr_out_insert_bits (operands, NULL);
+  }
+  [(set_attr "adjust_len" "insert_bits")
+   (set_attr "cc" "clobber")])
+
+
+;; __builtin_avr_flash_segment
+
+;; Just a helper for the next "official" expander.
+
+(define_expand "flash_segment1"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (subreg:QI (match_operand:PSI 1 "register_operand" "")
+                   2))
+   (set (cc0)
+        (compare (match_dup 0)
+                 (const_int 0)))
+   (set (pc)
+        (if_then_else (ge (cc0)
+                          (const_int 0))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))
+   (set (match_dup 0)
+        (const_int -1))])
+
+(define_expand "flash_segment"
+  [(parallel [(match_operand:QI 0 "register_operand" "")
+              (match_operand:PSI 1 "register_operand" "")])]
+  ""
+  {
+    rtx label = gen_label_rtx ();
+    emit (gen_flash_segment1 (operands[0], operands[1], label));
+    emit_label (label);
+    DONE;
+  })
+
+;; Actually, it's too late now to work out address spaces known at compiletime.
+;; Best place would be to fold ADDR_SPACE_CONVERT_EXPR in avr_fold_builtin.
+;; However, avr_addr_space_convert can add some built-in knowledge for PSTR
+;; so that ADDR_SPACE_CONVERT_EXPR in the built-in must not be resolved.
+
+(define_insn_and_split "*split.flash_segment"
+  [(set (match_operand:QI 0 "register_operand"                        "=d")
+        (subreg:QI (lo_sum:PSI (match_operand:QI 1 "nonmemory_operand" "ri")
+                               (match_operand:HI 2 "register_operand"  "r"))
+                   2))]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (match_dup 0)
+        (match_dup 1))])
+
+
+;; Parity
+
+;; Postpone expansion of 16-bit parity to libgcc call until after combine for
+;; better 8-bit parity recognition.
+
+(define_expand "parityhi2"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (parity:HI (match_operand:HI 1 "register_operand" "")))
+              (clobber (reg:HI 24))])])
+
+(define_insn_and_split "*parityhi2"
+  [(set (match_operand:HI 0 "register_operand"           "=r")
+        (parity:HI (match_operand:HI 1 "register_operand" "r")))
+   (clobber (reg:HI 24))]
+  "!reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 24)
+        (match_dup 1))
+   (set (reg:HI 24)
+        (parity:HI (reg:HI 24)))
+   (set (match_dup 0)
+        (reg:HI 24))])
+
+(define_insn_and_split "*parityqihi2"
+  [(set (match_operand:HI 0 "register_operand"           "=r")
+        (parity:HI (match_operand:QI 1 "register_operand" "r")))
+   (clobber (reg:HI 24))]
+  "!reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:QI 24)
+        (match_dup 1))
+   (set (reg:HI 24)
+        (zero_extend:HI (parity:QI (reg:QI 24))))
+   (set (match_dup 0)
+        (reg:HI 24))])
+
+(define_expand "paritysi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (truncate:HI (parity:SI (reg:SI 22))))
+   (set (match_dup 2)
+        (reg:HI 24))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_dup 2)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*parityhi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:HI 24)))]
+  ""
+  "%~call __parityhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+  [(set (reg:HI 24)
+        (zero_extend:HI (parity:QI (reg:QI 24))))]
+  ""
+  "%~call __parityqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*paritysihi2.libgcc"
+  [(set (reg:HI 24)
+        (truncate:HI (parity:SI (reg:SI 22))))]
+  ""
+  "%~call __paritysi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; Popcount
+
+(define_expand "popcounthi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "popcountsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (truncate:HI (popcount:SI (reg:SI 22))))
+   (set (match_dup 2)
+        (reg:HI 24))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_dup 2)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*popcounthi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))]
+  ""
+  "%~call __popcounthi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+  [(set (reg:HI 24)
+        (truncate:HI (popcount:SI (reg:SI 22))))]
+  ""
+  "%~call __popcountsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))]
+  ""
+  "%~call __popcountqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+  [(set (reg:HI 24)
+        (zero_extend:HI (popcount:QI (reg:QI 24))))]
+  ""
+  "#"
+  ""
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))
+   (set (reg:QI 25)
+        (const_int 0))])
+
+;; Count Leading Zeros
+
+(define_expand "clzhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (clz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))])
+
+(define_expand "clzsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (truncate:HI (clz:SI (reg:SI 22))))
+              (clobber (reg:QI 26))])
+   (set (match_dup 2)
+        (reg:HI 24))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_dup 2)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*clzhi2.libgcc"
+  [(set (reg:HI 24)
+        (clz:HI (reg:HI 24)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __clzhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*clzsihi2.libgcc"
+  [(set (reg:HI 24)
+        (truncate:HI (clz:SI (reg:SI 22))))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __clzsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Count Trailing Zeros
+
+(define_expand "ctzhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ctz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))])
+
+(define_expand "ctzsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (truncate:HI (ctz:SI (reg:SI 22))))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])
+   (set (match_dup 2)
+        (reg:HI 24))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_dup 2)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*ctzhi2.libgcc"
+  [(set (reg:HI 24)
+        (ctz:HI (reg:HI 24)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ctzhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*ctzsihi2.libgcc"
+  [(set (reg:HI 24)
+        (truncate:HI (ctz:SI (reg:SI 22))))
+   (clobber (reg:QI 22))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ctzsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Find First Set
+
+(define_expand "ffshi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))])
+
+(define_expand "ffssi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (truncate:HI (ffs:SI (reg:SI 22))))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])
+   (set (match_dup 2)
+        (reg:HI 24))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_dup 2)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*ffshi2.libgcc"
+  [(set (reg:HI 24)
+        (ffs:HI (reg:HI 24)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ffshi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*ffssihi2.libgcc"
+  [(set (reg:HI 24)
+        (truncate:HI (ffs:SI (reg:SI 22))))
+   (clobber (reg:QI 22))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ffssi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Copysign
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand"             "=r")
+        (unspec:SF [(match_operand:SF 1 "register_operand"  "0")
+                    (match_operand:SF 2 "register_operand"  "r")]
+                   UNSPEC_COPYSIGN))]
+  ""
+  "bst %D2,7\;bld %D0,7"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; Swap Bytes (change byte-endianess)
+
+(define_expand "bswapsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 22)
+        (bswap:SI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (reg:SI 22))])
+
+(define_insn "*bswapsi2.libgcc"
+  [(set (reg:SI 22)
+        (bswap:SI (reg:SI 22)))]
+  ""
+  "%~call __bswapsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; CPU instructions
+
+;; NOP taking 1 or 2 Ticks
+(define_expand "nopv"
+  [(parallel [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
+                               UNSPECV_NOP)
+              (set (match_dup 1)
+                   (unspec_volatile:BLK [(match_dup 1)]
+                                        UNSPECV_MEMORY_BARRIER))])]
+  ""
+  {
+    operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (operands[1]) = 1;
+  })
+
+(define_insn "*nopv"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "P,K")]
+                    UNSPECV_NOP)
+   (set (match_operand:BLK 1 "" "")
+	(unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))]
+  ""
+  "@
+	nop
+	rjmp ."
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; SLEEP
+(define_expand "sleep"
+  [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_SLEEP)
+              (set (match_dup 0)
+                   (unspec_volatile:BLK [(match_dup 0)]
+                                        UNSPECV_MEMORY_BARRIER))])]
+  ""
+  {
+    operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (operands[0]) = 1;
+  })
+
+(define_insn "*sleep"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SLEEP)
+   (set (match_operand:BLK 0 "" "")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMORY_BARRIER))]
+  ""
+  "sleep"
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; WDR
+(define_expand "wdr"
+  [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_WDR)
+              (set (match_dup 0)
+                   (unspec_volatile:BLK [(match_dup 0)]
+                                        UNSPECV_MEMORY_BARRIER))])]
+  ""
+  {
+    operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (operands[0]) = 1;
+  })
+
+(define_insn "*wdr"
+  [(unspec_volatile [(const_int 0)] UNSPECV_WDR)
+   (set (match_operand:BLK 0 "" "")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMORY_BARRIER))]
+  ""
+  "wdr"
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; FMUL
+(define_expand "fmul"
+  [(set (reg:QI 24)
+        (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 25)
+        (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 22)
+                   (unspec:HI [(reg:QI 24)
+                               (reg:QI 25)] UNSPEC_FMUL))
+              (clobber (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 22))]
+  ""
+  {
+    if (AVR_HAVE_MUL)
+      {
+        emit_insn (gen_fmul_insn (operand0, operand1, operand2));
+        DONE;
+      }
+  })
+
+(define_insn "fmul_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (unspec:HI [(match_operand:QI 1 "register_operand" "a")
+                    (match_operand:QI 2 "register_operand" "a")]
+                   UNSPEC_FMUL))]
+  "AVR_HAVE_MUL"
+  "fmul %1,%2
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*fmul.call"
+  [(set (reg:HI 22)
+        (unspec:HI [(reg:QI 24)
+                    (reg:QI 25)] UNSPEC_FMUL))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __fmul"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; FMULS
+(define_expand "fmuls"
+  [(set (reg:QI 24)
+        (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 25)
+        (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 22)
+                   (unspec:HI [(reg:QI 24)
+                               (reg:QI 25)] UNSPEC_FMULS))
+              (clobber (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 22))]
+  ""
+  {
+    if (AVR_HAVE_MUL)
+      {
+        emit_insn (gen_fmuls_insn (operand0, operand1, operand2));
+        DONE;
+      }
+  })
+
+(define_insn "fmuls_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (unspec:HI [(match_operand:QI 1 "register_operand" "a")
+                    (match_operand:QI 2 "register_operand" "a")]
+                   UNSPEC_FMULS))]
+  "AVR_HAVE_MUL"
+  "fmuls %1,%2
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*fmuls.call"
+  [(set (reg:HI 22)
+        (unspec:HI [(reg:QI 24)
+                    (reg:QI 25)] UNSPEC_FMULS))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __fmuls"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; FMULSU
+(define_expand "fmulsu"
+  [(set (reg:QI 24)
+        (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 25)
+        (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 22)
+                   (unspec:HI [(reg:QI 24)
+                               (reg:QI 25)] UNSPEC_FMULSU))
+              (clobber (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 22))]
+  ""
+  {
+    if (AVR_HAVE_MUL)
+      {
+        emit_insn (gen_fmulsu_insn (operand0, operand1, operand2));
+        DONE;
+      }
+  })
+
+(define_insn "fmulsu_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (unspec:HI [(match_operand:QI 1 "register_operand" "a")
+                    (match_operand:QI 2 "register_operand" "a")]
+                   UNSPEC_FMULSU))]
+  "AVR_HAVE_MUL"
+  "fmulsu %1,%2
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*fmulsu.call"
+  [(set (reg:HI 22)
+        (unspec:HI [(reg:QI 24)
+                    (reg:QI 25)] UNSPEC_FMULSU))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __fmulsu"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; Some combiner patterns dealing with bits.
+;; See PR42210
+
+;; Move bit $3.0 into bit $0.$4
+(define_insn "*movbitqi.1-6.a"
+  [(set (match_operand:QI 0 "register_operand"                               "=r")
+        (ior:QI (and:QI (match_operand:QI 1 "register_operand"                "0")
+                        (match_operand:QI 2 "single_zero_operand"             "n"))
+                (and:QI (ashift:QI (match_operand:QI 3 "register_operand"     "r")
+                                   (match_operand:QI 4 "const_0_to_7_operand" "n"))
+                        (match_operand:QI 5 "single_one_operand"              "n"))))]
+  "INTVAL(operands[4]) == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))
+   && INTVAL(operands[4]) == exact_log2 (INTVAL(operands[5]) & GET_MODE_MASK (QImode))"
+  "bst %3,0\;bld %0,%4"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; Move bit $3.0 into bit $0.$4
+;; Variation of above. Unfortunately, there is no canonicalized representation
+;; of moving around bits.  So what we see here depends on how user writes down
+;; bit manipulations.
+(define_insn "*movbitqi.1-6.b"
+  [(set (match_operand:QI 0 "register_operand"                            "=r")
+        (ior:QI (and:QI (match_operand:QI 1 "register_operand"             "0")
+                        (match_operand:QI 2 "single_zero_operand"          "n"))
+                (ashift:QI (and:QI (match_operand:QI 3 "register_operand"  "r")
+                                   (const_int 1))
+                           (match_operand:QI 4 "const_0_to_7_operand"      "n"))))]
+  "INTVAL(operands[4]) == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))"
+  "bst %3,0\;bld %0,%4"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; Move bit $3.0 into bit $0.0.
+;; For bit 0, combiner generates slightly different pattern.
+(define_insn "*movbitqi.0"
+  [(set (match_operand:QI 0 "register_operand"                     "=r")
+        (ior:QI (and:QI (match_operand:QI 1 "register_operand"      "0")
+                        (match_operand:QI 2 "single_zero_operand"   "n"))
+                (and:QI (match_operand:QI 3 "register_operand"      "r")
+                        (const_int 1))))]
+  "0 == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))"
+  "bst %3,0\;bld %0,0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; Move bit $2.0 into bit $0.7.
+;; For bit 7, combiner generates slightly different pattern
+(define_insn "*movbitqi.7"
+  [(set (match_operand:QI 0 "register_operand"                      "=r")
+        (ior:QI (and:QI (match_operand:QI 1 "register_operand"       "0")
+                        (const_int 127))
+                (ashift:QI (match_operand:QI 2 "register_operand"    "r")
+                           (const_int 7))))]
+  ""
+  "bst %2,0\;bld %0,7"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; Combiner transforms above four pattern into ZERO_EXTRACT if it sees MEM
+;; and input/output match.  We provide a special pattern for this, because
+;; in contrast to a IN/BST/BLD/OUT sequence we need less registers and the
+;; operation on I/O is atomic.
+(define_insn "*insv.io"
+  [(set (zero_extract:QI (mem:QI (match_operand 0 "low_io_address_operand" "n,n,n"))
+                         (const_int 1)
+                         (match_operand:QI 1 "const_0_to_7_operand"        "n,n,n"))
+        (match_operand:QI 2 "nonmemory_operand"                            "L,P,r"))]
+  ""
+  "@
+	cbi %i0,%1
+	sbi %i0,%1
+	sbrc %2,0\;sbi %i0,%1\;sbrs %2,0\;cbi %i0,%1"
+  [(set_attr "length" "1,1,4")
+   (set_attr "cc" "none")])
+
+(define_insn "*insv.not.io"
+  [(set (zero_extract:QI (mem:QI (match_operand 0 "low_io_address_operand" "n"))
+                         (const_int 1)
+                         (match_operand:QI 1 "const_0_to_7_operand"        "n"))
+        (not:QI (match_operand:QI 2 "register_operand"                     "r")))]
+  ""
+  "sbrs %2,0\;sbi %i0,%1\;sbrc %2,0\;cbi %i0,%1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none")])
+
+;; The insv expander.
+;; We only support 1-bit inserts
+(define_expand "insv"
+  [(set (zero_extract:QI (match_operand:QI 0 "register_operand" "")
+                         (match_operand:QI 1 "const1_operand" "")        ; width
+                         (match_operand:QI 2 "const_0_to_7_operand" "")) ; pos
+        (match_operand:QI 3 "nonmemory_operand" ""))]
+  "optimize")
+
+;; Insert bit $2.0 into $0.$1
+(define_insn "*insv.reg"
+  [(set (zero_extract:QI (match_operand:QI 0 "register_operand"    "+r,d,d,l,l")
+                         (const_int 1)
+                         (match_operand:QI 1 "const_0_to_7_operand" "n,n,n,n,n"))
+        (match_operand:QI 2 "nonmemory_operand"                     "r,L,P,L,P"))]
+  ""
+  "@
+	bst %2,0\;bld %0,%1
+	andi %0,lo8(~(1<<%1))
+	ori %0,lo8(1<<%1)
+	clt\;bld %0,%1
+	set\;bld %0,%1"
+  [(set_attr "length" "2,1,1,2,2")
+   (set_attr "cc" "none,set_zn,set_zn,none,none")])
+
+
+;; Some combine patterns that try to fix bad code when a value is composed
+;; from byte parts like in PR27663.
+;; The patterns give some release but the code still is not optimal,
+;; in particular when subreg lowering (-fsplit-wide-types) is turned on.
+;; That switch obfuscates things here and in many other places.
+
+;; "*iorhiqi.byte0"   "*iorpsiqi.byte0"   "*iorsiqi.byte0"
+;; "*xorhiqi.byte0"   "*xorpsiqi.byte0"   "*xorsiqi.byte0"
+(define_insn_and_split "*<code_stdname><mode>qi.byte0"
+  [(set (match_operand:HISI 0 "register_operand"                 "=r")
+        (xior:HISI
+         (zero_extend:HISI (match_operand:QI 1 "register_operand" "r"))
+         (match_operand:HISI 2 "register_operand"                 "0")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3)
+        (xior:QI (match_dup 3)
+                 (match_dup 1)))]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, 0);
+  })
+
+;; "*iorhiqi.byte1-3"  "*iorpsiqi.byte1-3"  "*iorsiqi.byte1-3"
+;; "*xorhiqi.byte1-3"  "*xorpsiqi.byte1-3"  "*xorsiqi.byte1-3"
+(define_insn_and_split "*<code_stdname><mode>qi.byte1-3"
+  [(set (match_operand:HISI 0 "register_operand"                              "=r")
+        (xior:HISI
+         (ashift:HISI (zero_extend:HISI (match_operand:QI 1 "register_operand" "r"))
+                      (match_operand:QI 2 "const_8_16_24_operand"              "n"))
+         (match_operand:HISI 3 "register_operand"                              "0")))]
+  "INTVAL(operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (xior:QI (match_dup 4)
+                 (match_dup 1)))]
+  {
+    int byteno = INTVAL(operands[2]) / BITS_PER_UNIT;
+    operands[4] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, byteno);
+  })
+
+(define_expand "extzv"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (zero_extract:QI (match_operand:QI 1 "register_operand"  "")
+                         (match_operand:QI 2 "const1_operand" "")
+                         (match_operand:QI 3 "const_0_to_7_operand" "")))])
+
+(define_insn "*extzv"
+  [(set (match_operand:QI 0 "register_operand"                   "=*d,*d,*d,*d,r")
+        (zero_extract:QI (match_operand:QI 1 "register_operand"     "0,r,0,0,r")
+                         (const_int 1)
+                         (match_operand:QI 2 "const_0_to_7_operand" "L,L,P,C04,n")))]
+  ""
+  "@
+	andi %0,1
+	mov %0,%1\;andi %0,1
+	lsr %0\;andi %0,1
+	swap %0\;andi %0,1
+	bst %1,%2\;clr %0\;bld %0,0"
+  [(set_attr "length" "1,2,2,2,3")
+   (set_attr "cc" "set_zn,set_zn,set_zn,set_zn,clobber")])
+
+(define_insn_and_split "*extzv.qihi1"
+  [(set (match_operand:HI 0 "register_operand"                     "=r")
+        (zero_extract:HI (match_operand:QI 1 "register_operand"     "r")
+                         (const_int 1)
+                         (match_operand:QI 2 "const_0_to_7_operand" "n")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 3)
+        (zero_extract:QI (match_dup 1)
+                         (const_int 1)
+                         (match_dup 2)))
+   (set (match_dup 4)
+        (const_int 0))]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 0);
+    operands[4] = simplify_gen_subreg (QImode, operands[0], HImode, 1);
+  })
+
+(define_insn_and_split "*extzv.qihi2"
+  [(set (match_operand:HI 0 "register_operand"                      "=r")
+        (zero_extend:HI
+         (zero_extract:QI (match_operand:QI 1 "register_operand"     "r")
+                          (const_int 1)
+                          (match_operand:QI 2 "const_0_to_7_operand" "n"))))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 3)
+        (zero_extract:QI (match_dup 1)
+                         (const_int 1)
+                         (match_dup 2)))
+   (set (match_dup 4)
+        (const_int 0))]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 0);
+    operands[4] = simplify_gen_subreg (QImode, operands[0], HImode, 1);
+  })
+
+
+;; Fixed-point instructions
+(include "avr-fixed.md")
+
+;; Operations on 64-bit registers
+(include "avr-dimode.md")
diff --git a/gcc-4.9/gcc/config/avr/avr.opt b/gcc-4.9/gcc/config/avr/avr.opt
new file mode 100644
index 000000000..5be80aa2d
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avr.opt
@@ -0,0 +1,84 @@
+; Options for the ATMEL AVR port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mcall-prologues
+Target Report Mask(CALL_PROLOGUES)
+Use subroutines for function prologues and epilogues
+
+mmcu=
+Target RejectNegative Joined Var(avr_mcu_index) Init(0) Enum(avr_mcu)
+-mmcu=MCU	Select the target MCU
+
+mdeb
+Target Report Undocumented Mask(ALL_DEBUG)
+
+mlog=
+Target RejectNegative Joined Undocumented Var(avr_log_details)
+
+mint8
+Target Report Mask(INT8)
+Use an 8-bit 'int' type
+
+mno-interrupts
+Target Report RejectNegative Mask(NO_INTERRUPTS)
+Change the stack pointer without disabling interrupts
+
+mbranch-cost=
+Target Report Joined RejectNegative UInteger Var(avr_branch_cost) Init(0)
+Set the branch costs for conditional branch instructions.  Reasonable values are small, non-negative integers.  The default branch cost is 0.
+
+morder1
+Target Report Undocumented Mask(ORDER_1)
+
+morder2
+Target Report Undocumented Mask(ORDER_2)
+
+mtiny-stack
+Target Report Mask(TINY_STACK)
+Change only the low 8 bits of the stack pointer
+
+mrelax
+Target Report
+Relax branches
+
+mpmem-wrap-around
+Target Report
+Make the linker relaxation machine assume that a program counter wrap-around occurs.
+
+maccumulate-args
+Target Report Mask(ACCUMULATE_OUTGOING_ARGS)
+Accumulate outgoing function arguments and acquire/release the needed stack space for outpoing function arguments in function prologue/epilogue.  Without this option, outgoing arguments are pushed before calling a function and popped afterwards.  This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
+
+mstrict-X
+Target Report Var(avr_strict_X) Init(0)
+When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register.  Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
+
+;; For rationale behind -msp8 see explanation in avr.h.
+msp8
+Target Report RejectNegative Var(avr_sp8) Init(0)
+The device has no SPH special function register. This option will be overridden by the compiler driver with the correct setting if presence/absence of SPH can be deduced from -mmcu=MCU.
+
+Waddr-space-convert
+Warning C Report Var(avr_warn_addr_space_convert) Init(0)
+Warn if the address space of an address is changed.
+
+mfract-convert-truncate
+Target Report Mask(FRACT_CONV_TRUNC)
+Allow to use truncation instead of rounding towards 0 for fractional int types
diff --git a/gcc-4.9/gcc/config/avr/avrlibc.h b/gcc-4.9/gcc/config/avr/avrlibc.h
new file mode 100644
index 000000000..fee685b6a
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/avrlibc.h
@@ -0,0 +1,30 @@
+/* Definitions of target machine for the GNU compiler collection
+   for Atmel AVR micro controller if configured for AVR-Libc.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Georg-Johann Lay (avr@gjlay.de)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* AVR-Libc implements functions from libgcc.a in libm.a, see PR54461.  */
+
+#undef  LIBGCC_SPEC
+#define LIBGCC_SPEC                                                     \
+  "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lgcc -lm }}}}}"
+
+#undef  LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "--start-group %G %L --end-group"
diff --git a/gcc-4.9/gcc/config/avr/builtins.def b/gcc-4.9/gcc/config/avr/builtins.def
new file mode 100644
index 000000000..affcbaa34
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/builtins.def
@@ -0,0 +1,169 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains the definitions and documentation for the
+   builtins defined in the AVR part of the GNU compiler.
+   Befor including this file, define a macro
+
+   DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME)
+
+   NAME:    `__builtin_avr_name' will be the user-level name of the builtin.
+            `AVR_BUILTIN_NAME' will be the internal builtin's id.
+   N_ARGS:  Number of input arguments.  If special treatment is needed,
+            set to -1 and handle it by hand, see avr.c:avr_expand_builtin().
+   TYPE:    A tree node describing the prototype of the built-in.
+   ICODE:   Name of attached insn or expander.  If special treatment in avr.c
+            is needed to expand the built-in, use `nothing'.
+   LIBNAME: Name of the attached implementation in libgcc which is used if
+            the builtin cannot be folded away and there is no insn.  */
+
+/* Mapped to respective instruction.  */
+
+DEF_BUILTIN (NOP,  -1, void_ftype_void, nothing, NULL)
+DEF_BUILTIN (SEI,   0, void_ftype_void, enable_interrupt, NULL)
+DEF_BUILTIN (CLI,   0, void_ftype_void, disable_interrupt, NULL)
+DEF_BUILTIN (WDR,   0, void_ftype_void, wdr, NULL)
+DEF_BUILTIN (SLEEP, 0, void_ftype_void, sleep, NULL)
+
+/* Mapped to respective instruction but might also be folded away
+   or emit as libgcc call if ISA does not provide the instruction.  */
+
+DEF_BUILTIN (SWAP,   1, uchar_ftype_uchar,      rotlqi3_4, NULL)
+DEF_BUILTIN (FMUL,   2, uint_ftype_uchar_uchar, fmul, NULL)
+DEF_BUILTIN (FMULS,  2, int_ftype_char_char,    fmuls, NULL)
+DEF_BUILTIN (FMULSU, 2, int_ftype_char_uchar,   fmulsu, NULL)
+
+/* More complex stuff that cannot be mapped 1:1 to an instruction.  */
+
+DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_ulong, nothing, NULL)
+DEF_BUILTIN (INSERT_BITS, 3, uchar_ftype_ulong_uchar_uchar, insert_bits, NULL)
+DEF_BUILTIN (FLASH_SEGMENT, 1, char_ftype_const_memx_ptr, flash_segment, NULL)
+
+/* ISO/IEC TR 18037 "Embedded C"
+   The following builtins are undocumented and used by stdfix.h.  */
+
+/* 7.18a.6 The fixed-point intrinsic functions. */
+
+/* 7.18a.6.2 The fixed-point absolute value functions. */
+
+DEF_BUILTIN (ABSHR,   1, hr_ftype_hr,   ssabsqq2, "__ssabs_1")
+DEF_BUILTIN (ABSR,    1, nr_ftype_nr,   ssabshq2, "__ssabs_2")
+DEF_BUILTIN (ABSLR,   1, lr_ftype_lr,   ssabssq2, "__ssabs_4")
+DEF_BUILTIN (ABSLLR, -1, llr_ftype_llr, nothing,  "__ssabsdq2") // GCC extension
+
+DEF_BUILTIN (ABSHK,   1, hk_ftype_hk,   ssabsha2, "__ssabs_2")
+DEF_BUILTIN (ABSK,    1, nk_ftype_nk,   ssabssa2, "__ssabs_4")
+DEF_BUILTIN (ABSLK,  -1, lk_ftype_lk,   nothing,  "__ssabsda2")
+DEF_BUILTIN (ABSLLK, -1, llk_ftype_llk, nothing,  "__ssabsta2") // GCC extension
+
+/* 7.18a.6.3 The fixed-point round functions. */
+
+DEF_BUILTIN (ROUNDHR,    2, hr_ftype_hr_int,     roundqq3,  "__roundhr")
+DEF_BUILTIN (ROUNDR,     2, nr_ftype_nr_int,     roundhq3,  "__roundr")
+DEF_BUILTIN (ROUNDLR,    2, lr_ftype_lr_int,     roundsq3,  "__roundlr")
+DEF_BUILTIN (ROUNDLLR,  -1, llr_ftype_llr_int,   nothing,   "__rounddq3") // GCC extension
+
+DEF_BUILTIN (ROUNDUHR,   2, uhr_ftype_uhr_int,   rounduqq3, "__rounduhr")
+DEF_BUILTIN (ROUNDUR,    2, unr_ftype_unr_int,   rounduhq3, "__roundur")
+DEF_BUILTIN (ROUNDULR,   2, ulr_ftype_ulr_int,   roundusq3, "__roundulr")
+DEF_BUILTIN (ROUNDULLR, -1, ullr_ftype_ullr_int, nothing,   "__roundudq3") // GCC extension
+
+DEF_BUILTIN (ROUNDHK,    2, hk_ftype_hk_int,     roundha3,  "__roundhk")
+DEF_BUILTIN (ROUNDK,     2, nk_ftype_nk_int,     roundsa3,  "__roundk")
+DEF_BUILTIN (ROUNDLK,   -1, lk_ftype_lk_int,     nothing,   "__roundda3")
+DEF_BUILTIN (ROUNDLLK,  -1, llk_ftype_llk_int,   nothing,   "__roundta3") // GCC extension
+
+DEF_BUILTIN (ROUNDUHK,   2, uhk_ftype_uhk_int,   rounduha3, "__rounduhk")
+DEF_BUILTIN (ROUNDUK,    2, unk_ftype_unk_int,   roundusa3, "__rounduk")
+DEF_BUILTIN (ROUNDULK,  -1, ulk_ftype_ulk_int,   nothing,   "__rounduda3")
+DEF_BUILTIN (ROUNDULLK, -1, ullk_ftype_ullk_int, nothing,   "__rounduta3") // GCC extension
+
+/* 7.18a.6.4 The fixed-point bit countls functions. */
+
+DEF_BUILTIN (COUNTLSHR,   -1, int_ftype_hr,   nothing, "__countlsqi2")
+DEF_BUILTIN (COUNTLSR,    -1, int_ftype_nr,   nothing, "__countlshi2")
+DEF_BUILTIN (COUNTLSLR,   -1, int_ftype_lr,   nothing, "__countlssi2")
+DEF_BUILTIN (COUNTLSLLR,  -1, int_ftype_llr,  nothing, "__countlsdi2") // GCC extension
+
+DEF_BUILTIN (COUNTLSUHR,  -1, int_ftype_uhr,  nothing, "__countlsuqi2")
+DEF_BUILTIN (COUNTLSUR,   -1, int_ftype_unr,  nothing, "__countlsuhi2")
+DEF_BUILTIN (COUNTLSULR,  -1, int_ftype_ulr,  nothing, "__countlsusi2")
+DEF_BUILTIN (COUNTLSULLR, -1, int_ftype_ullr, nothing, "__countlsudi2") // GCC extension
+
+DEF_BUILTIN (COUNTLSHK,   -1, int_ftype_hk,   nothing, "__countlshi2")
+DEF_BUILTIN (COUNTLSK,    -1, int_ftype_nk,   nothing, "__countlssi2")
+DEF_BUILTIN (COUNTLSLK,   -1, int_ftype_lk,   nothing, "__countlsdi2")
+DEF_BUILTIN (COUNTLSLLK,  -1, int_ftype_llk,  nothing, "__countlsdi2") // GCC extension
+
+DEF_BUILTIN (COUNTLSUHK,  -1, int_ftype_uhk,  nothing, "__countlsuhi2")
+DEF_BUILTIN (COUNTLSUK,   -1, int_ftype_unk,  nothing, "__countlsusi2")
+DEF_BUILTIN (COUNTLSULK,  -1, int_ftype_ulk,  nothing, "__countlsudi2")
+DEF_BUILTIN (COUNTLSULLK, -1, int_ftype_ullk, nothing, "__countlsudi2") // GCC extension
+
+/* 7.18a.6.5 The bitwise fixed-point to integer conversion functions. */
+
+DEF_BUILTIN (BITSHR,   -1,   inthr_ftype_hr,   nothing, "__ret")
+DEF_BUILTIN (BITSR,    -1,   intnr_ftype_nr,   nothing, "__ret")
+DEF_BUILTIN (BITSLR,   -1,   intlr_ftype_lr,   nothing, "__ret")
+DEF_BUILTIN (BITSLLR,  -1,  intllr_ftype_llr,  nothing, "__ret") // GCC extension
+
+DEF_BUILTIN (BITSUHR,  -1,  intuhr_ftype_uhr,  nothing, "__ret")
+DEF_BUILTIN (BITSUR,   -1,  intunr_ftype_unr,  nothing, "__ret")
+DEF_BUILTIN (BITSULR,  -1,  intulr_ftype_ulr,  nothing, "__ret")
+DEF_BUILTIN (BITSULLR, -1, intullr_ftype_ullr, nothing, "__ret") // GCC extension
+
+DEF_BUILTIN (BITSHK,   -1,   inthk_ftype_hk,   nothing, "__ret")
+DEF_BUILTIN (BITSK,    -1,   intnk_ftype_nk,   nothing, "__ret")
+DEF_BUILTIN (BITSLK,   -1,   intlk_ftype_lk,   nothing, "__ret")
+DEF_BUILTIN (BITSLLK,  -1,  intllk_ftype_llk,  nothing, "__ret") // GCC extension
+
+DEF_BUILTIN (BITSUHK,  -1,  intuhk_ftype_uhk,  nothing, "__ret")
+DEF_BUILTIN (BITSUK,   -1,  intunk_ftype_unk,  nothing, "__ret")
+DEF_BUILTIN (BITSULK,  -1,  intulk_ftype_ulk,  nothing, "__ret")
+DEF_BUILTIN (BITSULLK, -1, intullk_ftype_ullk, nothing, "__ret") // GCC extension
+
+
+/* 7.18a.6.6 The bitwise integer to fixed-point conversion functions. */
+
+DEF_BUILTIN (  HRBITS, -1,   hr_ftype_inthr,   nothing, "__ret")
+DEF_BUILTIN (   RBITS, -1,   nr_ftype_intnr,   nothing, "__ret")
+DEF_BUILTIN (  LRBITS, -1,   lr_ftype_intlr,   nothing, "__ret")
+DEF_BUILTIN ( LLRBITS, -1,  llr_ftype_intllr,  nothing, "__ret") // GCC extension
+
+DEF_BUILTIN ( UHRBITS, -1,  uhr_ftype_intuhr,  nothing, "__ret")
+DEF_BUILTIN (  URBITS, -1,  unr_ftype_intunr,  nothing, "__ret")
+DEF_BUILTIN ( ULRBITS, -1,  ulr_ftype_intulr,  nothing, "__ret")
+DEF_BUILTIN (ULLRBITS, -1, ullr_ftype_intullr, nothing, "__ret") // GCC extension
+
+DEF_BUILTIN (  HKBITS, -1,   hk_ftype_inthk,   nothing, "__ret")
+DEF_BUILTIN (   KBITS, -1,   nk_ftype_intnk,   nothing, "__ret")
+DEF_BUILTIN (  LKBITS, -1,   lk_ftype_intlk,   nothing, "__ret")
+DEF_BUILTIN ( LLKBITS, -1,  llk_ftype_intllk,  nothing, "__ret") // GCC extension
+
+DEF_BUILTIN ( UHKBITS, -1,  uhk_ftype_intuhk,  nothing, "__ret")
+DEF_BUILTIN (  UKBITS, -1,  unk_ftype_intunk,  nothing, "__ret")
+DEF_BUILTIN ( ULKBITS, -1,  ulk_ftype_intulk,  nothing, "__ret")
+DEF_BUILTIN (ULLKBITS, -1, ullk_ftype_intullk, nothing, "__ret") // GCC extension
+
+/* Overloaded */
+
+/* 7.18a.6.7  Type-generic fixed-point functions. */
+
+DEF_BUILTIN (ABSFX,     -1, void_ftype_void /* dummy */, nothing, NULL)
+DEF_BUILTIN (ROUNDFX,   -1, void_ftype_void /* dummy */, nothing, NULL)
+DEF_BUILTIN (COUNTLSFX, -1, void_ftype_void /* dummy */, nothing, NULL)
diff --git a/gcc-4.9/gcc/config/avr/constraints.md b/gcc-4.9/gcc/config/avr/constraints.md
new file mode 100644
index 000000000..2f6e4ea1b
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/constraints.md
@@ -0,0 +1,238 @@
+;; Constraint definitions for ATMEL AVR micro controllers.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+(define_register_constraint "t" "R0_REG"
+  "Temporary register r0")
+
+(define_register_constraint "b" "BASE_POINTER_REGS"
+  "Base pointer registers (r28--r31)")
+
+(define_register_constraint "e" "POINTER_REGS"
+  "Pointer registers (r26--r31)")
+
+(define_register_constraint "w" "ADDW_REGS"
+  "Registers from r24 to r31.  These registers
+   can be used in @samp{adiw} command.")
+
+(define_register_constraint "d" "LD_REGS"
+  "Registers from r16 to r31.")
+
+(define_register_constraint "l" "NO_LD_REGS"
+  "Registers from r0 to r15.")
+
+(define_register_constraint "a" "SIMPLE_LD_REGS"
+  "Registers from r16 to r23.")
+
+(define_register_constraint "x" "POINTER_X_REGS"
+  "Register pair X (r27:r26).")
+
+(define_register_constraint "y" "POINTER_Y_REGS"
+  "Register pair Y (r29:r28).")
+
+(define_register_constraint "z" "POINTER_Z_REGS"
+  "Register pair Z (r31:r30).")
+
+(define_register_constraint "q" "STACK_REG"
+  "Stack pointer register (SPH:SPL).")
+
+(define_constraint "I"
+  "Integer constant in the range 0 @dots{} 63."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 63")))
+
+(define_constraint "J"
+  "Integer constant in the range -63 @dots{} 0."
+  (and (match_code "const_int")
+       (match_test "ival <= 0 && ival >= -63")))
+
+(define_constraint "K"
+  "Integer constant 2."
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "L"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "M"
+  "Integer constant in the range 0 @dots{} 0xff."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 0xff")))
+
+(define_constraint "N"
+  "Constant integer @minus{}1."
+  (and (match_code "const_int")
+       (match_test "ival == -1")))
+
+(define_constraint "O"
+  "Constant integer 8, 16, or 24."
+  (and (match_code "const_int")
+       (match_test "ival == 8 || ival == 16 || ival == 24")))
+
+(define_constraint "P"
+  "Constant integer 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "G"
+  "Constant float 0."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (SFmode)")))
+
+(define_memory_constraint "Q"
+  "A memory address based on Y or Z pointer with displacement."
+  (and (match_code "mem")
+       (match_test "extra_constraint_Q (op)")))
+
+(define_constraint "Cm2"
+  "Constant integer @minus{}2."
+  (and (match_code "const_int")
+       (match_test "ival == -2")))
+
+(define_constraint "C03"
+  "Constant integer 3."
+  (and (match_code "const_int")
+       (match_test "ival == 3")))
+
+(define_constraint "C04"
+  "Constant integer 4."
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "C05"
+  "Constant integer 5."
+  (and (match_code "const_int")
+       (match_test "ival == 5")))
+
+(define_constraint "C06"
+  "Constant integer 6."
+  (and (match_code "const_int")
+       (match_test "ival == 6")))
+
+(define_constraint "C07"
+  "Constant integer 7."
+  (and (match_code "const_int")
+       (match_test "ival == 7")))
+
+(define_constraint "Ca2"
+  "Constant 2-byte integer that allows AND without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 2, (1<<0) | (1<<7) | (1<<8))")))
+
+(define_constraint "Ca3"
+  "Constant 3-byte integer that allows AND without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 3, (1<<0) | (1<<7) | (1<<8))")))
+
+(define_constraint "Ca4"
+  "Constant 4-byte integer that allows AND without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<7) | (1<<8))")))
+
+(define_constraint "Co2"
+  "Constant 2-byte integer that allows OR without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 2, (1<<0) | (1<<1) | (1<<8))")))
+
+(define_constraint "Co3"
+  "Constant 3-byte integer that allows OR without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 3, (1<<0) | (1<<1) | (1<<8))")))
+
+(define_constraint "Co4"
+  "Constant 4-byte integer that allows OR without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<1) | (1<<8))")))
+
+(define_constraint "Cx2"
+  "Constant 2-byte integer that allows XOR without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 2, (1<<0) | (1<<8))")))
+
+(define_constraint "Cx3"
+  "Constant 3-byte integer that allows XOR without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 3, (1<<0) | (1<<8))")))
+
+(define_constraint "Cx4"
+  "Constant 4-byte integer that allows XOR without clobber register."
+  (and (match_code "const_int")
+       (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<8))")))
+
+(define_constraint "Csp"
+  "Integer constant in the range -6 @dots{} 6."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -6, 6)")))
+
+(define_constraint "Cxf"
+  "32-bit integer constant where at least one nibble is 0xf."
+  (and (match_code "const_int")
+       (match_test "avr_has_nibble_0xf (op)")))
+
+(define_constraint "C0f"
+  "32-bit integer constant where no nibble equals 0xf."
+  (and (match_code "const_int")
+       (match_test "!avr_has_nibble_0xf (op)")))
+
+;; CONST_FIXED is no element of 'n' so cook our own.
+;; "i" or "s" would match but because the insn uses iterators that cover
+;; INT_MODE, "i" or "s" is not always possible.
+
+(define_constraint "Ynn"
+  "Fixed-point constant known at compile time."
+  (match_code "const_fixed"))
+
+(define_constraint "Y00"
+  "Fixed-point or integer constant with bit representation 0x0"
+  (and (match_code "const_fixed,const_int")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_constraint "Y01"
+  "Fixed-point or integer constant with bit representation 0x1"
+  (ior (and (match_code "const_fixed")
+            (match_test "1 == INTVAL (avr_to_int_mode (op))"))
+       (match_test "satisfies_constraint_P (op)")))
+
+(define_constraint "Ym1"
+  "Fixed-point or integer constant with bit representation -0x1"
+  (ior (and (match_code "const_fixed")
+            (match_test "-1 == INTVAL (avr_to_int_mode (op))"))
+       (match_test "satisfies_constraint_N (op)")))
+
+(define_constraint "Y02"
+  "Fixed-point or integer constant with bit representation 0x2"
+  (ior (and (match_code "const_fixed")
+            (match_test "2 == INTVAL (avr_to_int_mode (op))"))
+       (match_test "satisfies_constraint_K (op)")))
+
+(define_constraint "Ym2"
+  "Fixed-point or integer constant with bit representation -0x2"
+  (ior (and (match_code "const_fixed")
+            (match_test "-2 == INTVAL (avr_to_int_mode (op))"))
+       (match_test "satisfies_constraint_Cm2 (op)")))
+
+;; Similar to "IJ" used with ADIW/SBIW, but for CONST_FIXED.
+
+(define_constraint "YIJ"
+  "Fixed-point constant from @minus{}0x003f to 0x003f."
+  (and (match_code "const_fixed")
+       (match_test "IN_RANGE (INTVAL (avr_to_int_mode (op)), -63, 63)")))
diff --git a/gcc-4.9/gcc/config/avr/driver-avr.c b/gcc-4.9/gcc/config/avr/driver-avr.c
new file mode 100644
index 000000000..cb5dd1d1d
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/driver-avr.c
@@ -0,0 +1,150 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Anatoly Sokolov <aesok@post.ru>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* Current architecture.  */
+const avr_arch_t *avr_current_arch = NULL;
+
+/* Current device.  */
+const avr_mcu_t *avr_current_device = NULL;
+
+/* Initialize avr_current_arch and avr_current_device variables.  */
+
+static void
+avr_set_current_device (const char *name)
+{
+ 
+ if (NULL != avr_current_arch)
+   return;
+ 
+  for (avr_current_device = avr_mcu_types; avr_current_device->name;
+       avr_current_device++)
+    {
+      if (strcmp (avr_current_device->name, name) == 0)
+        break;
+    }
+
+  avr_current_arch = &avr_arch_types[avr_current_device->arch];
+}
+
+/* Returns command line parameters to pass to as.  */
+
+const char*
+avr_device_to_as (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("-mmcu=", avr_current_arch->arch_name,
+                 avr_current_device->errata_skip ? "" : " -mno-skip-bug",
+                 NULL);
+}
+
+/* Returns command line parameters to pass to ld.  */
+
+const char*
+avr_device_to_ld (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("-m ", avr_current_arch->arch_name, NULL);
+}
+
+/* Returns command line parameters that describe start of date section.  */
+
+const char *
+avr_device_to_data_start (int argc, const char **argv)
+{
+  unsigned long data_section_start;
+  char data_section_start_str[16];
+
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+  
+  if (avr_current_device->data_section_start 
+      == avr_current_arch->default_data_section_start)
+    return NULL;
+    
+  data_section_start = 0x800000 + avr_current_device->data_section_start;
+  
+  snprintf (data_section_start_str, sizeof(data_section_start_str) - 1,
+            "0x%lX", data_section_start);
+  
+  return concat ("-Tdata ", data_section_start_str, NULL);    
+}
+
+/* Returns command line parameters that describe the device startfile.  */
+
+const char *
+avr_device_to_startfiles (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("crt", avr_current_device->library_name, ".o%s", NULL);
+}
+
+/* Returns command line parameters that describe the device library.  */
+
+const char *
+avr_device_to_devicelib (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("-l", avr_current_device->library_name, NULL);
+}
+
+const char*
+avr_device_to_sp8 (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  /* Leave "avr2" and "avr25" alone.  These two architectures are
+     the only ones that mix devices with 8-bit SP and 16-bit SP.
+     -msp8 is set by mmultilib machinery.  */
+
+  if (avr_current_device->macro == NULL
+      && (avr_current_device->arch == ARCH_AVR2
+          || avr_current_device->arch == ARCH_AVR25))
+    return "";
+
+  return avr_current_device->short_sp
+    ? "-msp8"
+    : "%<msp8";
+}
diff --git a/gcc-4.9/gcc/config/avr/elf.h b/gcc-4.9/gcc/config/avr/elf.h
new file mode 100644
index 000000000..dc163e44e
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/elf.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Georg-Johann Lay (avr@gjlay.de)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Overriding some definitions from elfos.h for AVR.  */
+
+#undef PCC_BITFIELD_TYPE_MATTERS
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (32768 * 8)
+
+#undef STRING_LIMIT
+#define STRING_LIMIT ((unsigned) 64)
+
+/* Output alignment 2**1 for jump tables.  */
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
+  ASM_OUTPUT_ALIGN (FILE, 1);
+
+/* Be conservative in crtstuff.c.  */
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
diff --git a/gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c b/gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c
new file mode 100644
index 000000000..ea3e6f1ba
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c
@@ -0,0 +1,144 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Georg-Johann Lay (avr@gjlay.de)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define IN_GEN_AVR_MMCU_TEXI
+
+#include "avr-arch.h"
+#include "avr-devices.c"
+
+static const char*
+mcu_name[sizeof avr_mcu_types / sizeof avr_mcu_types[0]];
+
+static int letter (char c)
+{
+  return c >= 'a' && c <= 'z';
+}
+
+static int digit (char c)
+{
+  return c >= '0' && c <= '9';
+}
+
+static int
+comparator (const void *va, const void *vb)
+{
+  const char *a = *(const char* const*) va;
+  const char *b = *(const char* const*) vb;
+
+  while (*a && *b)
+    {
+      /* Make letters smaller than digits so that `atmega16a' follows
+         `atmega16' without `atmega161' etc. between them.  */
+      
+      if (letter (*a) && digit (*b))
+        return -1;
+
+      if (digit (*a) && letter (*b))
+        return 1;
+
+      if (*a != *b)
+        return *a - *b;
+      
+      a++;
+      b++;
+    }
+
+  return *a - *b;
+} 
+
+static void
+print_mcus (size_t n_mcus)
+{
+  int duplicate = 0;
+  size_t i;
+    
+  if (!n_mcus)
+    return;
+    
+  qsort (mcu_name, n_mcus, sizeof (char*), comparator);
+
+  printf ("@*@var{mcu}@tie{}=");
+
+  for (i = 0; i < n_mcus; i++)
+    {
+      printf (" @code{%s}%s", mcu_name[i], i == n_mcus-1 ? ".\n\n" : ",");
+
+      if (i && !strcmp (mcu_name[i], mcu_name[i-1]))
+        {
+          /* Sanity-check: Fail on devices that are present more than once.  */
+
+          duplicate = 1;
+          fprintf (stderr, "error: duplicate device: %s\n", mcu_name[i]);
+        }
+    }
+
+  if (duplicate)
+    exit (1);
+}
+
+int main (void)
+{
+  enum avr_arch arch = ARCH_UNKNOWN;
+  size_t i, n_mcus = 0;
+  const avr_mcu_t *mcu;
+
+  printf ("@c Copyright (C) 2012-2014 Free Software Foundation, Inc.\n");
+  printf ("@c This is part of the GCC manual.\n");
+  printf ("@c For copying conditions, see the file "
+          "gcc/doc/include/fdl.texi.\n\n");
+
+  printf ("@c This file is generated automatically using\n");
+  printf ("@c gcc/config/avr/gen-avr-mmcu-texi.c from:\n");
+  printf ("@c    gcc/config/avr/avr-arch.h\n");
+  printf ("@c    gcc/config/avr/avr-devices.c\n");
+  printf ("@c    gcc/config/avr/avr-mcus.def\n\n");
+
+  printf ("@c Please do not edit manually.\n\n");
+
+  printf ("@table @code\n\n");
+
+  for (mcu = avr_mcu_types; mcu->name; mcu++)
+    {
+      if (mcu->macro == NULL)
+        {
+          arch = mcu->arch;
+
+          /* Start a new architecture:  Flush the MCUs collected so far.  */
+
+          print_mcus (n_mcus);
+          n_mcus = 0;
+
+          for (i = 0; i < sizeof (avr_texinfo) / sizeof (*avr_texinfo); i++)
+            if (arch == avr_texinfo[i].arch)
+              printf ("@item %s\n%s\n", mcu->name, avr_texinfo[i].texinfo);
+        }
+      else if (arch == (enum avr_arch) mcu->arch)
+        {
+          mcu_name[n_mcus++] = mcu->name;
+        }
+    }
+
+  print_mcus (n_mcus);
+  printf ("@end table\n");
+
+  return EXIT_SUCCESS;
+}
diff --git a/gcc-4.9/gcc/config/avr/genmultilib.awk b/gcc-4.9/gcc/config/avr/genmultilib.awk
new file mode 100644
index 000000000..90e5e5cfd
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/genmultilib.awk
@@ -0,0 +1,216 @@
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+##################################################################
+#  
+# Transform Core/Device Information from avr-mcus.def to a
+# Representation that is understood by GCC's multilib Machinery.
+#
+# The Script works as a Filter from STDIN to STDOUT.
+# 
+# FORMAT = "Makefile": Generate Makefile Snipet that sets some
+#                      MULTILIB_* Variables as needed.
+#
+##################################################################
+
+BEGIN {
+    FS ="[(, \t]+"
+    option[""] = ""
+    tiny_stack[""] = 1
+    comment = 1
+    n_mcu = 0
+    n_cores = 0
+
+    mtiny[0] = ""
+    mtiny[1] = "tiny-stack"
+    option["tiny-stack"] = "msp8"
+}
+
+##################################################################
+# Add some Comments to the generated Files and copy-paste
+# Copyright Notice from above.
+##################################################################
+
+/^#/ {
+    if (!comment)
+	next
+    else if (comment == 1)
+    {
+	if (FORMAT == "Makefile")
+	{
+	    print "# Auto-generated Makefile Snip"
+	    print "# Generated by    : ./gcc/config/avr/genmultilib.awk"
+	    print "# Generated from  : ./gcc/config/avr/avr-mcus.def"
+	    print "# Used by         : tmake_file from Makefile and genmultilib"
+	    print ""
+	}
+    }
+
+    comment = 2;
+
+    print
+}
+
+/^$/ {
+    # The first empty line stops copy-pasting the GPL comments
+    # from this file to the generated file.
+
+    comment = 0
+}
+
+##################################################################
+# Run over all AVR_MCU Lines and gather Information:
+# cores[]     : Enumerates the Cores (avr2, avr25, ...)
+# mcu[]       : Enumerates the Devices
+# tiny_stack[]: Maps Core/Device to 0 (2-byte SP) or 1 (1-byte SP)
+# option[]    : Maps Core/Device to the mmcu= option to get it
+# toCore[]    : Maps Device to its Core
+##################################################################
+
+/^AVR_MCU/ {
+    name = $2
+    gsub ("\"", "", name)
+
+    if ($4 == "NULL")
+    {
+	core = name
+
+	# avr1 is supported for Assembler only:  It gets no multilib
+	if (core == "avr1")
+	    next
+
+	cores[n_cores] = core
+	n_cores++
+	tiny_stack[core] = 0
+	option[core] = "mmcu=" core
+
+	next
+    }
+
+    # avr1 is supported for Assembler only:  Its Devices are ignored
+    if (core == "avr1")
+	next
+
+    tiny_stack[name]  = $5
+    mcu[n_mcu] = name
+    n_mcu++
+    option[name]      = "mmcu=" name
+    toCore[name]      = core
+
+    if (tiny_stack[name] == 1)
+	tiny_stack[core] = 1
+}
+
+##################################################################
+# 
+# We gathered all the Information, now build/output the following:
+#
+#    awk Variable         target Variable          FORMAT
+#  -----------------------------------------------------------
+#    m_options     <->    MULTILIB_OPTIONS         Makefile
+#    m_dirnames    <->    MULTILIB_DIRNAMES           "
+#    m_exceptions  <->    MULTILIB_EXCEPTIONS         "
+#    m_matches     <->    MULTILIB_MATCHES            "
+#
+##################################################################
+
+END {
+    m_options    = "\nMULTILIB_OPTIONS = "
+    m_dirnames   = "\nMULTILIB_DIRNAMES ="
+    m_exceptions = "\nMULTILIB_EXCEPTIONS ="
+    m_matches    = "\nMULTILIB_MATCHES ="
+
+    ##############################################################
+    # Compose MULTILIB_OPTIONS.  This represents the Cross-Product
+    #    (avr2, avr25, ...) x msp8
+
+    sep = ""
+    for (c = 0; c < n_cores; c++)
+    {
+	m_options = m_options sep option[cores[c]]
+	sep = "/"
+    }
+
+    # The ... x msp8
+    m_options = m_options " " option[mtiny[1]]
+
+    ##############################################################
+    # Map Device to its multilib
+
+    for (t = 0; t < n_mcu; t++)
+    {
+	core = toCore[mcu[t]]
+	
+	line = option[core] ":" option[mcu[t]]
+	gsub ("=", "?", line)
+	gsub (":", "=", line)
+
+	m_matches = m_matches " \\\n\t" line
+    }
+
+    ####################################################################
+    # Compose MULTILIB_DIRNAMES and MULTILIB_EXEPTIONS
+
+    n_mtiny = 2
+    for (t = 0; t < n_mtiny; t++)
+	for (c = -1; c < n_cores; c++)
+	{
+	    if (c == -1)
+		core = ""
+	    else
+		core = cores[c]
+
+	    # The Directory Name for this multilib
+
+	    if (core != "" && mtiny[t] != "")
+	    {
+		mdir = core "/" mtiny[t]
+		mopt = option[core] "/" option[mtiny[t]]
+	    }
+	    else
+	    {
+		mdir = core mtiny[t]
+		mopt = option[core] option[mtiny[t]]
+	    }
+
+	    if (core != "" && tiny_stack[core] == 0 && mtiny[t] != "")
+	    {
+		# There's not a single SP = 8 Devices for this Core:
+		# Don't build respective multilib
+		m_exceptions = m_exceptions " \\\n\t" mopt
+		continue
+	    }
+
+	    if (core != "avr2" || mtiny[t] == "")
+		m_dirnames = m_dirnames " " mdir
+	}
+
+    ############################################################
+    # Output that Stuff
+    ############################################################
+
+    if (FORMAT == "Makefile")
+    {
+	# Intended Target: ./gcc/config/avr/t-multilib
+
+	print m_options
+	print m_dirnames
+	print m_exceptions
+	print m_matches
+    }
+}
diff --git a/gcc-4.9/gcc/config/avr/genopt.sh b/gcc-4.9/gcc/config/avr/genopt.sh
new file mode 100755
index 000000000..9838ec25a
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/genopt.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Generate avr-tables.opt from the list in avr-mcus.def.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+cat <<EOF
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from avr-mcus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(avr_mcu) Type(int)
+Known MCU names:
+
+EOF
+
+awk -F'[(, 	]+' 'BEGIN {
+    value = 0
+}
+/^AVR_MCU/ {
+    name = $2
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(avr_mcu) String(" name ") Value(" value ")"
+    print ""
+    value++
+}' $1
diff --git a/gcc-4.9/gcc/config/avr/predicates.md b/gcc-4.9/gcc/config/avr/predicates.md
new file mode 100644
index 000000000..85612e14a
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/predicates.md
@@ -0,0 +1,275 @@
+;; Predicate definitions for ATMEL AVR micro controllers.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Registers from r0 to r15.
+(define_predicate "l_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) <= 15")))
+
+;; Registers from r16 to r31.
+(define_predicate "d_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) >= 16 && REGNO (op) <= 31")))
+
+(define_predicate "even_register_operand"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) <= 31")
+            (match_test "(REGNO (op) & 1) == 0"))))
+
+(define_predicate "odd_register_operand"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) <= 31")
+            (match_test "(REGNO (op) & 1) != 0"))))
+
+;; SP register.
+(define_predicate "stack_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_SP")))
+
+;; Return true if OP is a valid address for lower half of I/O space.
+(define_predicate "low_io_address_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op) - avr_current_arch->sfr_offset,
+                              0, 0x1f)")))
+
+;; Return true if OP is a valid address for high half of I/O space.
+(define_predicate "high_io_address_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op) - avr_current_arch->sfr_offset,
+                              0x20, 0x3F)")))
+
+;; Return true if OP is a valid address of I/O space.
+(define_predicate "io_address_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op) - avr_current_arch->sfr_offset,
+                              0, 0x40 - GET_MODE_SIZE (mode))")))
+
+;; Return 1 if OP is a general operand not in flash memory
+(define_predicate "nop_general_operand"
+  (and (match_operand 0 "general_operand")
+       (match_test "!avr_mem_flash_p (op)")))
+
+;; Return 1 if OP is an "ordinary" general operand, i.e. a general
+;; operand whose load is not handled by a libgcc call or ELPM.
+(define_predicate "nox_general_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_test "avr_load_libgcc_p (op)"))
+       (not (match_test "avr_mem_memx_p (op)"))))
+
+;; Return 1 if OP is a memory operand in one of the __flash* address spaces
+(define_predicate "flash_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "Pmode == mode")
+       (ior (match_test "!MEM_P (op)")
+            (match_test "avr_mem_flash_p (op)"))))
+
+;; Return 1 if OP is the zero constant for MODE.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_fixed,const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return 1 if OP is the one constant integer for MODE.
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == CONST1_RTX (mode)")))
+
+
+;; Return 1 if OP is constant integer 0..7 for MODE.
+(define_predicate "const_0_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+;; Return 1 if OP is constant integer 2..7 for MODE.
+(define_predicate "const_2_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 7)")))
+
+;; Return 1 if OP is constant integer 1..6 for MODE.
+(define_predicate "const_1_to_6_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 6)")))
+
+;; Return 1 if OP is constant integer 2..6 for MODE.
+(define_predicate "const_2_to_6_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 6)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Returns 1 if OP is a SYMBOL_REF.
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; Return true if OP is a text segment reference.
+;; This is needed for program memory address expressions.
+(define_predicate "text_segment_operand"
+  (match_code "code_label,label_ref,symbol_ref,plus,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CODE_LABEL:
+      return true;
+    case LABEL_REF :
+      return true;
+    case SYMBOL_REF :
+      return SYMBOL_REF_FUNCTION_P (op);
+    case PLUS :
+      /* Assume canonical format of symbol + constant.
+	 Fall through.  */
+    case CONST :
+      return text_segment_operand (XEXP (op, 0), VOIDmode);
+    default :
+      return false;
+    }
+})
+
+;; Return true if OP is a constant that contains only one 1 in its
+;; binary representation.
+(define_predicate "single_one_operand"
+  (and (match_code "const_int")
+       (match_test "exact_log2(INTVAL (op) & GET_MODE_MASK (mode)) >= 0")))
+
+;; Return true if OP is a constant that contains only one 0 in its
+;; binary representation.
+(define_predicate "single_zero_operand"
+  (and (match_code "const_int")
+       (match_test "exact_log2(~INTVAL (op) & GET_MODE_MASK (mode)) >= 0")))
+
+;;
+(define_predicate "avr_sp_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_Csp (op)")))
+
+;; True for EQ & NE
+(define_predicate "eqne_operator"
+  (match_code "eq,ne"))
+
+;; True for GE & LT
+(define_predicate "gelt_operator"
+  (match_code "ge,lt"))
+
+;; True for GT, GTU, LE & LEU
+(define_predicate "difficult_comparison_operator"
+  (match_code "gt,gtu,le,leu"))
+
+;; False for GT, GTU, LE & LEU
+(define_predicate "simple_comparison_operator"
+  (and (match_operand 0 "comparison_operator")
+       (not (match_code "gt,gtu,le,leu"))))
+
+;; Return true if OP is a valid call operand.
+(define_predicate "call_insn_operand"
+  (and (match_code "mem")
+       (ior (match_test "register_operand (XEXP (op, 0), mode)")
+            (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
+
+;; For some insns we must ensure that no hard register is inserted
+;; into their operands because the insns are split and the split
+;; involves hard registers.  An example are divmod insn that are
+;; split to insns that represent implicit library calls.
+
+;; True for register that is pseudo register.
+(define_predicate "pseudo_register_operand"
+  (and (match_operand 0 "register_operand")
+       (not (and (match_code "reg")
+                 (match_test "HARD_REGISTER_P (op)")))))
+
+;; True for operand that is pseudo register or CONST_INT.
+(define_predicate "pseudo_register_or_const_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "pseudo_register_operand")))
+
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends.  A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't combine to a widening
+;; multiplication.  There is no need for combine to propagate or insert
+;; hard registers, register allocation can do it just as well.
+
+;; True for operand that is pseudo register at combine time.
+(define_predicate "combine_pseudo_register_operand"
+  (ior (match_operand 0 "pseudo_register_operand")
+       (and (match_operand 0 "register_operand")
+            (match_test "reload_completed || reload_in_progress"))))
+
+;; Return true if OP is a constant integer that is either
+;; 8 or 16 or 24.
+(define_predicate "const_8_16_24_operand"
+  (and (match_code "const_int")
+       (match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)")))
+
+;; Unsigned CONST_INT that fits in 8 bits, i.e. 0..255.
+(define_predicate "u8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+;; Signed CONST_INT that fits in 8 bits, i.e. -128..127.
+(define_predicate "s8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -128, 127)")))
+
+;; One-extended CONST_INT that fits in 8 bits, i.e. -256..-1.
+(define_predicate "o8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -256, -1)")))
+
+;; Signed CONST_INT that fits in 9 bits, i.e. -256..255.
+(define_predicate "s9_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -256, 255)")))
+
+(define_predicate "register_or_s9_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "s9_operand")))
+
+;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536.
+(define_predicate "u16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)")))
+
+;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767.
+(define_predicate "s16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)")))
+
+;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1.
+(define_predicate "o16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)")))
+
+;; Const int, fixed, or double operand
+(define_predicate "const_operand"
+  (ior (match_code "const_fixed")
+       (match_code "const_double")
+       (match_operand 0 "const_int_operand")))
+
+;; Const int, const fixed, or const double operand
+(define_predicate "nonmemory_or_const_operand"
+  (ior (match_code "const_fixed")
+       (match_code "const_double")
+       (match_operand 0 "nonmemory_operand")))
+
+;; Immediate, const fixed, or const double operand
+(define_predicate "const_or_immediate_operand"
+  (ior (match_code "const_fixed")
+       (match_code "const_double")
+       (match_operand 0 "immediate_operand")))
diff --git a/gcc-4.9/gcc/config/avr/rtems.h b/gcc-4.9/gcc/config/avr/rtems.h
new file mode 100644
index 000000000..473273b99
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/rtems.h
@@ -0,0 +1,27 @@
+/* Definitions for rtems targeting a AVR using ELF.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Ralf Corsepius (ralf.corsepius@rtems.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()	\
+do {					\
+  builtin_define ("__rtems__");		\
+  builtin_assert ("system=rtems");	\
+} while (0)
diff --git a/gcc-4.9/gcc/config/avr/stdfix.h b/gcc-4.9/gcc/config/avr/stdfix.h
new file mode 100644
index 000000000..38d80e4dc
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/stdfix.h
@@ -0,0 +1,236 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ISO/IEC JTC1 SC22 WG14 N1169
+ * Date: 2006-04-04
+ * ISO/IEC TR 18037
+ * Programming languages - C - Extensions to support embedded processors
+ */
+
+#ifndef _AVRGCC_STDFIX_H
+#define _AVRGCC_STDFIX_H
+
+/* 7.18a.1 Introduction.  */
+/* 7.18a.3 Precision macros.  */
+
+#include <stdfix-gcc.h>
+
+
+#if __SIZEOF_INT__ == 2
+
+typedef signed char int_hr_t;
+typedef unsigned char uint_uhr_t;
+
+typedef short int int_r_t;
+typedef short unsigned int uint_ur_t;
+
+typedef short int int_hk_t;
+typedef short unsigned int uint_uhk_t;
+
+typedef long int int_lr_t;
+typedef long unsigned int uint_ulr_t;
+
+typedef long int int_k_t;
+typedef long unsigned int uint_uk_t;
+
+typedef long long int int_llr_t;
+typedef long long unsigned int uint_ullr_t;
+
+typedef long long int int_lk_t;
+typedef long long unsigned int uint_ulk_t;
+
+typedef long long int int_llk_t;
+typedef long long unsigned int uint_ullk_t;
+
+#elif __SIZEOF_INT__ == 1  /*  -mint8  */
+
+typedef signed char int_hr_t;
+typedef unsigned char uint_uhr_t;
+
+typedef long int int_r_t;
+typedef long unsigned int uint_ur_t;
+
+typedef long int int_hk_t;
+typedef long unsigned int uint_uhk_t;
+
+typedef long long int int_lr_t;
+typedef long long unsigned int uint_ulr_t;
+
+typedef long long int int_k_t;
+typedef long long unsigned int uint_uk_t;
+
+#endif /* __SIZEOF_INT__ == 1, 2 */
+
+
+/* 7.18a.6  The fixed-point intrinsic functions.  */
+
+
+/* 7.18a.6.2  The fixed-point absolute value functions.  */
+
+#define abshr __builtin_avr_abshr
+#define absr  __builtin_avr_absr
+#define abslr __builtin_avr_abslr
+
+#define abshk __builtin_avr_abshk
+#define absk  __builtin_avr_absk
+
+#if __SIZEOF_INT__ == 2
+
+#define abslk  __builtin_avr_abslk
+#define absllr __builtin_avr_absllr  /* GCC Extension */
+#define absllk __builtin_avr_absllk  /* GCC Extension */
+
+#endif /* sizeof (int) == 2 */
+
+
+/* 7.18a.6.3  The fixed-point round functions.  */
+
+/* The Embedded-C paper specifies results only for rounding points
+
+       0 < RP < FBIT
+  
+   As an extension, the following functions work as expected
+   with rounding points
+
+       -IBIT < RP < FBIT
+ 
+   For example, rounding an accum with a rounding point of -1 will
+   result in an even integer value.  */
+
+#define roundhr  __builtin_avr_roundhr
+#define roundr   __builtin_avr_roundr
+#define roundlr  __builtin_avr_roundlr
+
+#define rounduhr __builtin_avr_rounduhr
+#define roundur  __builtin_avr_roundur
+#define roundulr __builtin_avr_roundulr
+
+#define roundhk  __builtin_avr_roundhk
+#define roundk   __builtin_avr_roundk
+
+#define rounduhk __builtin_avr_rounduhk
+#define rounduk  __builtin_avr_rounduk
+
+#if __SIZEOF_INT__ == 2
+
+#define roundlk   __builtin_avr_roundlk
+#define roundulk  __builtin_avr_roundulk
+#define roundllr  __builtin_avr_roundllr  /* GCC Extension */
+#define roundullr __builtin_avr_roundullr /* GCC Extension */
+#define roundllk  __builtin_avr_roundllk  /* GCC Extension */
+#define roundullk __builtin_avr_roundullk /* GCC Extension */
+
+#endif /* sizeof (int) == 2 */
+
+
+/* 7.18a.6.4  The fixed-point bit countls functions.  */
+
+#define countlshr  __builtin_avr_countlshr
+#define countlsr   __builtin_avr_countlsr
+#define countlslr  __builtin_avr_countlslr
+
+#define countlsuhr __builtin_avr_countlsuhr
+#define countlsur  __builtin_avr_countlsur
+#define countlsulr __builtin_avr_countlsulr
+
+#define countlshk  __builtin_avr_countlshk
+#define countlsk   __builtin_avr_countlsk
+
+#define countlsuhk __builtin_avr_countlsuhk
+#define countlsuk  __builtin_avr_countlsuk
+
+#if __SIZEOF_INT__ == 2
+
+#define countlslk   __builtin_avr_countlslk
+#define countlsulk  __builtin_avr_countlsulk
+#define countlsllr  __builtin_avr_countlsllr  /* GCC Extension */
+#define countlsullr __builtin_avr_countlsullr /* GCC Extension */
+#define countlsllk  __builtin_avr_countlsllk  /* GCC Extension */
+#define countlsullk __builtin_avr_countlsullk /* GCC Extension */
+
+#endif /* sizeof (int) == 2 */
+
+
+/* 7.18a.6.5  The bitwise fixed-point to integer conversion functions. */
+
+#define bitshr  __builtin_avr_bitshr
+#define bitsr   __builtin_avr_bitsr
+#define bitslr  __builtin_avr_bitslr
+
+#define bitsuhr __builtin_avr_bitsuhr
+#define bitsur  __builtin_avr_bitsur
+#define bitsulr __builtin_avr_bitsulr
+
+#define bitshk  __builtin_avr_bitshk
+#define bitsk   __builtin_avr_bitsk
+
+#define bitsuhk __builtin_avr_bitsuhk
+#define bitsuk  __builtin_avr_bitsuk
+
+#if __SIZEOF_INT__ == 2
+
+#define bitslk   __builtin_avr_bitslk
+#define bitsulk  __builtin_avr_bitsulk
+#define bitsllr  __builtin_avr_bitsllr  /* GCC Extension */
+#define bitsullr __builtin_avr_bitsullr /* GCC Extension */
+#define bitsllk  __builtin_avr_bitsllk  /* GCC Extension */
+#define bitsullk __builtin_avr_bitsullk /* GCC Extension */
+
+#endif /* sizeof (int) == 2 */
+
+
+/* 7.18a.6.6  The bitwise integer to fixed-point conversion functions. */
+
+#define hrbits  __builtin_avr_hrbits
+#define rbits   __builtin_avr_rbits
+#define lrbits  __builtin_avr_lrbits
+
+#define uhrbits __builtin_avr_uhrbits
+#define urbits  __builtin_avr_urbits
+#define ulrbits __builtin_avr_ulrbits
+
+#define hkbits  __builtin_avr_hkbits
+#define kbits   __builtin_avr_kbits
+
+#define uhkbits __builtin_avr_uhkbits
+#define ukbits  __builtin_avr_ukbits
+
+#if __SIZEOF_INT__ == 2
+
+#define lkbits   __builtin_avr_lkbits
+#define ulkbits  __builtin_avr_ulkbits
+#define llrbits  __builtin_avr_llrbits  /* GCC Extension */
+#define ullrbits __builtin_avr_ullrbits /* GCC Extension */
+#define llkbits  __builtin_avr_llkbits  /* GCC Extension */
+#define ullkbits __builtin_avr_ullkbits /* GCC Extension */
+
+#endif /* sizeof (int) == 2 */
+
+
+/* 7.18a.6.7  Type-generic fixed-point functions.  */
+
+#define absfx     __builtin_avr_absfx
+#define roundfx   __builtin_avr_roundfx
+#define countlsfx __builtin_avr_countlsfx
+
+#endif /* _AVRGCC_STDFIX_H */
diff --git a/gcc-4.9/gcc/config/avr/t-avr b/gcc-4.9/gcc/config/avr/t-avr
new file mode 100644
index 000000000..75120ef1e
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/t-avr
@@ -0,0 +1,83 @@
+# Copyright (C) 2000-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+driver-avr.o: $(srcdir)/config/avr/driver-avr.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+avr-devices.o: $(srcdir)/config/avr/avr-devices.c \
+  $(srcdir)/config/avr/avr-mcus.def \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+avr-c.o: $(srcdir)/config/avr/avr-c.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+avr-log.o: $(srcdir)/config/avr/avr-log.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(INPUT_H) dumpfile.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+avr.o avr-c.o: $(srcdir)/config/avr/builtins.def
+
+# This overrides stdfix.h from USER_H which we supply and include
+# in our own stdint.h as stdint-gcc.h.
+
+EXTRA_HEADERS = $(srcdir)/config/avr/stdfix.h \
+	      	stdfix-gcc.h
+
+stdfix-gcc.h: $(srcdir)/ginclude/stdfix.h
+	-cp $< $@
+
+# Files and Variables auto-generated from avr-mcus.def
+
+AVR_MCUS = $(srcdir)/config/avr/avr-mcus.def
+
+# Run `avr-mcus' after you changed or added devices in  avr-mcus.def
+
+.PHONY: avr-mcus
+
+avr-mcus: $(srcdir)/config/avr/t-multilib \
+	  $(srcdir)/config/avr/avr-tables.opt \
+	  $(srcdir)/doc/avr-mmcu.texi ; @true
+
+# Make sure that -mmcu= is supported for devices from avr-mcus.def and
+# all -mmcu= values are displayed on the help screen
+$(srcdir)/config/avr/avr-tables.opt: $(srcdir)/config/avr/genopt.sh $(AVR_MCUS)
+	$(SHELL) $< $(AVR_MCUS) > $@
+
+# Make sure that -mmcu= support is in sync with -mmcu= documentation.
+gen-avr-mmcu-texi$(build_exeext): $(srcdir)/config/avr/gen-avr-mmcu-texi.c \
+  $(AVR_MCUS) $(srcdir)/config/avr/avr-devices.c \
+  $(srcdir)/config/avr/avr-arch.h
+	$(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $< -o $@
+
+$(srcdir)/doc/avr-mmcu.texi: gen-avr-mmcu-texi$(build_exeext)
+	$(RUN_GEN) ./$< > $@
+
+# Map -mmcu= to the right multilib variant
+# MULTILIB_OPTIONS
+# MULTILIB_DIRNAMES
+# MULTILIB_EXCEPTIONS
+# MULTILIB_MATCHES
+
+s-mlib: $(srcdir)/config/avr/t-multilib
+
+$(srcdir)/config/avr/t-multilib: $(srcdir)/config/avr/genmultilib.awk \
+				 $(AVR_MCUS)
+	$(AWK) -f $< -v FORMAT=Makefile   $< $(AVR_MCUS) > $@
diff --git a/gcc-4.9/gcc/config/avr/t-multilib b/gcc-4.9/gcc/config/avr/t-multilib
new file mode 100644
index 000000000..301f86496
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/t-multilib
@@ -0,0 +1,269 @@
+# Auto-generated Makefile Snip
+# Generated by    : ./gcc/config/avr/genmultilib.awk
+# Generated from  : ./gcc/config/avr/avr-mcus.def
+# Used by         : tmake_file from Makefile and genmultilib
+
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mmcu=avr2/mmcu=avr25/mmcu=avr3/mmcu=avr31/mmcu=avr35/mmcu=avr4/mmcu=avr5/mmcu=avr51/mmcu=avr6/mmcu=avrxmega2/mmcu=avrxmega4/mmcu=avrxmega5/mmcu=avrxmega6/mmcu=avrxmega7 msp8
+
+MULTILIB_DIRNAMES =  avr2 avr25 avr3 avr31 avr35 avr4 avr5 avr51 avr6 avrxmega2 avrxmega4 avrxmega5 avrxmega6 avrxmega7 tiny-stack avr25/tiny-stack
+
+MULTILIB_EXCEPTIONS = \
+	mmcu=avr3/msp8 \
+	mmcu=avr31/msp8 \
+	mmcu=avr35/msp8 \
+	mmcu=avr4/msp8 \
+	mmcu=avr5/msp8 \
+	mmcu=avr51/msp8 \
+	mmcu=avr6/msp8 \
+	mmcu=avrxmega2/msp8 \
+	mmcu=avrxmega4/msp8 \
+	mmcu=avrxmega5/msp8 \
+	mmcu=avrxmega6/msp8 \
+	mmcu=avrxmega7/msp8
+
+MULTILIB_MATCHES = \
+	mmcu?avr2=mmcu?at90s2313 \
+	mmcu?avr2=mmcu?at90s2323 \
+	mmcu?avr2=mmcu?at90s2333 \
+	mmcu?avr2=mmcu?at90s2343 \
+	mmcu?avr2=mmcu?attiny22 \
+	mmcu?avr2=mmcu?attiny26 \
+	mmcu?avr2=mmcu?at90s4414 \
+	mmcu?avr2=mmcu?at90s4433 \
+	mmcu?avr2=mmcu?at90s4434 \
+	mmcu?avr2=mmcu?at90s8515 \
+	mmcu?avr2=mmcu?at90c8534 \
+	mmcu?avr2=mmcu?at90s8535 \
+	mmcu?avr25=mmcu?ata6289 \
+	mmcu?avr25=mmcu?ata5272 \
+	mmcu?avr25=mmcu?attiny13 \
+	mmcu?avr25=mmcu?attiny13a \
+	mmcu?avr25=mmcu?attiny2313 \
+	mmcu?avr25=mmcu?attiny2313a \
+	mmcu?avr25=mmcu?attiny24 \
+	mmcu?avr25=mmcu?attiny24a \
+	mmcu?avr25=mmcu?attiny4313 \
+	mmcu?avr25=mmcu?attiny44 \
+	mmcu?avr25=mmcu?attiny44a \
+	mmcu?avr25=mmcu?attiny84 \
+	mmcu?avr25=mmcu?attiny84a \
+	mmcu?avr25=mmcu?attiny25 \
+	mmcu?avr25=mmcu?attiny45 \
+	mmcu?avr25=mmcu?attiny85 \
+	mmcu?avr25=mmcu?attiny261 \
+	mmcu?avr25=mmcu?attiny261a \
+	mmcu?avr25=mmcu?attiny461 \
+	mmcu?avr25=mmcu?attiny461a \
+	mmcu?avr25=mmcu?attiny861 \
+	mmcu?avr25=mmcu?attiny861a \
+	mmcu?avr25=mmcu?attiny43u \
+	mmcu?avr25=mmcu?attiny87 \
+	mmcu?avr25=mmcu?attiny48 \
+	mmcu?avr25=mmcu?attiny88 \
+	mmcu?avr25=mmcu?at86rf401 \
+	mmcu?avr3=mmcu?at43usb355 \
+	mmcu?avr3=mmcu?at76c711 \
+	mmcu?avr31=mmcu?atmega103 \
+	mmcu?avr31=mmcu?at43usb320 \
+	mmcu?avr35=mmcu?ata5505 \
+	mmcu?avr35=mmcu?at90usb82 \
+	mmcu?avr35=mmcu?at90usb162 \
+	mmcu?avr35=mmcu?atmega8u2 \
+	mmcu?avr35=mmcu?atmega16u2 \
+	mmcu?avr35=mmcu?atmega32u2 \
+	mmcu?avr35=mmcu?attiny167 \
+	mmcu?avr35=mmcu?attiny1634 \
+	mmcu?avr4=mmcu?ata6285 \
+	mmcu?avr4=mmcu?ata6286 \
+	mmcu?avr4=mmcu?atmega8 \
+	mmcu?avr4=mmcu?atmega8a \
+	mmcu?avr4=mmcu?atmega48 \
+	mmcu?avr4=mmcu?atmega48a \
+	mmcu?avr4=mmcu?atmega48p \
+	mmcu?avr4=mmcu?atmega48pa \
+	mmcu?avr4=mmcu?atmega88 \
+	mmcu?avr4=mmcu?atmega88a \
+	mmcu?avr4=mmcu?atmega88p \
+	mmcu?avr4=mmcu?atmega88pa \
+	mmcu?avr4=mmcu?atmega8515 \
+	mmcu?avr4=mmcu?atmega8535 \
+	mmcu?avr4=mmcu?atmega8hva \
+	mmcu?avr4=mmcu?at90pwm1 \
+	mmcu?avr4=mmcu?at90pwm2 \
+	mmcu?avr4=mmcu?at90pwm2b \
+	mmcu?avr4=mmcu?at90pwm3 \
+	mmcu?avr4=mmcu?at90pwm3b \
+	mmcu?avr4=mmcu?at90pwm81 \
+	mmcu?avr5=mmcu?ata5790 \
+	mmcu?avr5=mmcu?ata5790n \
+	mmcu?avr5=mmcu?ata5795 \
+	mmcu?avr5=mmcu?atmega16 \
+	mmcu?avr5=mmcu?atmega16a \
+	mmcu?avr5=mmcu?atmega161 \
+	mmcu?avr5=mmcu?atmega162 \
+	mmcu?avr5=mmcu?atmega163 \
+	mmcu?avr5=mmcu?atmega164a \
+	mmcu?avr5=mmcu?atmega164p \
+	mmcu?avr5=mmcu?atmega164pa \
+	mmcu?avr5=mmcu?atmega165 \
+	mmcu?avr5=mmcu?atmega165a \
+	mmcu?avr5=mmcu?atmega165p \
+	mmcu?avr5=mmcu?atmega165pa \
+	mmcu?avr5=mmcu?atmega168 \
+	mmcu?avr5=mmcu?atmega168a \
+	mmcu?avr5=mmcu?atmega168p \
+	mmcu?avr5=mmcu?atmega168pa \
+	mmcu?avr5=mmcu?atmega169 \
+	mmcu?avr5=mmcu?atmega169a \
+	mmcu?avr5=mmcu?atmega169p \
+	mmcu?avr5=mmcu?atmega169pa \
+	mmcu?avr5=mmcu?atmega16hvb \
+	mmcu?avr5=mmcu?atmega16hvbrevb \
+	mmcu?avr5=mmcu?atmega16m1 \
+	mmcu?avr5=mmcu?atmega16u4 \
+	mmcu?avr5=mmcu?atmega26hvg \
+	mmcu?avr5=mmcu?atmega32a \
+	mmcu?avr5=mmcu?atmega32 \
+	mmcu?avr5=mmcu?atmega323 \
+	mmcu?avr5=mmcu?atmega324a \
+	mmcu?avr5=mmcu?atmega324p \
+	mmcu?avr5=mmcu?atmega324pa \
+	mmcu?avr5=mmcu?atmega325 \
+	mmcu?avr5=mmcu?atmega325a \
+	mmcu?avr5=mmcu?atmega325p \
+	mmcu?avr5=mmcu?atmega3250 \
+	mmcu?avr5=mmcu?atmega3250a \
+	mmcu?avr5=mmcu?atmega3250p \
+	mmcu?avr5=mmcu?atmega3250pa \
+	mmcu?avr5=mmcu?atmega328 \
+	mmcu?avr5=mmcu?atmega328p \
+	mmcu?avr5=mmcu?atmega329 \
+	mmcu?avr5=mmcu?atmega329a \
+	mmcu?avr5=mmcu?atmega329p \
+	mmcu?avr5=mmcu?atmega329pa \
+	mmcu?avr5=mmcu?atmega3290 \
+	mmcu?avr5=mmcu?atmega3290a \
+	mmcu?avr5=mmcu?atmega3290p \
+	mmcu?avr5=mmcu?atmega3290pa \
+	mmcu?avr5=mmcu?atmega32c1 \
+	mmcu?avr5=mmcu?atmega32m1 \
+	mmcu?avr5=mmcu?atmega32u4 \
+	mmcu?avr5=mmcu?atmega32u6 \
+	mmcu?avr5=mmcu?atmega406 \
+	mmcu?avr5=mmcu?atmega64 \
+	mmcu?avr5=mmcu?atmega64a \
+	mmcu?avr5=mmcu?atmega640 \
+	mmcu?avr5=mmcu?atmega644 \
+	mmcu?avr5=mmcu?atmega644a \
+	mmcu?avr5=mmcu?atmega644p \
+	mmcu?avr5=mmcu?atmega644pa \
+	mmcu?avr5=mmcu?atmega645 \
+	mmcu?avr5=mmcu?atmega645a \
+	mmcu?avr5=mmcu?atmega645p \
+	mmcu?avr5=mmcu?atmega6450 \
+	mmcu?avr5=mmcu?atmega6450a \
+	mmcu?avr5=mmcu?atmega6450p \
+	mmcu?avr5=mmcu?atmega649 \
+	mmcu?avr5=mmcu?atmega649a \
+	mmcu?avr5=mmcu?atmega649p \
+	mmcu?avr5=mmcu?atmega6490 \
+	mmcu?avr5=mmcu?atmega16hva \
+	mmcu?avr5=mmcu?atmega16hva2 \
+	mmcu?avr5=mmcu?atmega32hvb \
+	mmcu?avr5=mmcu?atmega6490a \
+	mmcu?avr5=mmcu?atmega6490p \
+	mmcu?avr5=mmcu?atmega64c1 \
+	mmcu?avr5=mmcu?atmega64m1 \
+	mmcu?avr5=mmcu?atmega64hve \
+	mmcu?avr5=mmcu?atmega64rfa2 \
+	mmcu?avr5=mmcu?atmega64rfr2 \
+	mmcu?avr5=mmcu?atmega32hvbrevb \
+	mmcu?avr5=mmcu?atmega48hvf \
+	mmcu?avr5=mmcu?at90can32 \
+	mmcu?avr5=mmcu?at90can64 \
+	mmcu?avr5=mmcu?at90pwm161 \
+	mmcu?avr5=mmcu?at90pwm216 \
+	mmcu?avr5=mmcu?at90pwm316 \
+	mmcu?avr5=mmcu?at90scr100 \
+	mmcu?avr5=mmcu?at90usb646 \
+	mmcu?avr5=mmcu?at90usb647 \
+	mmcu?avr5=mmcu?at94k \
+	mmcu?avr5=mmcu?m3000 \
+	mmcu?avr51=mmcu?atmega128 \
+	mmcu?avr51=mmcu?atmega128a \
+	mmcu?avr51=mmcu?atmega1280 \
+	mmcu?avr51=mmcu?atmega1281 \
+	mmcu?avr51=mmcu?atmega1284 \
+	mmcu?avr51=mmcu?atmega1284p \
+	mmcu?avr51=mmcu?atmega128rfa1 \
+	mmcu?avr51=mmcu?at90can128 \
+	mmcu?avr51=mmcu?at90usb1286 \
+	mmcu?avr51=mmcu?at90usb1287 \
+	mmcu?avr6=mmcu?atmega2560 \
+	mmcu?avr6=mmcu?atmega2561 \
+	mmcu?avrxmega2=mmcu?atxmega16a4 \
+	mmcu?avrxmega2=mmcu?atxmega16d4 \
+	mmcu?avrxmega2=mmcu?atxmega32a4 \
+	mmcu?avrxmega2=mmcu?atxmega32d4 \
+	mmcu?avrxmega2=mmcu?atxmega32x1 \
+	mmcu?avrxmega2=mmcu?atmxt112sl \
+	mmcu?avrxmega2=mmcu?atmxt224 \
+	mmcu?avrxmega2=mmcu?atmxt224e \
+	mmcu?avrxmega2=mmcu?atmxt336s \
+	mmcu?avrxmega2=mmcu?atxmega16a4u \
+	mmcu?avrxmega2=mmcu?atxmega16c4 \
+	mmcu?avrxmega2=mmcu?atxmega32a4u \
+	mmcu?avrxmega2=mmcu?atxmega32c4 \
+	mmcu?avrxmega2=mmcu?atxmega32e5 \
+	mmcu?avrxmega4=mmcu?atxmega64a3 \
+	mmcu?avrxmega4=mmcu?atxmega64d3 \
+	mmcu?avrxmega4=mmcu?atxmega64a3u \
+	mmcu?avrxmega4=mmcu?atxmega64a4u \
+	mmcu?avrxmega4=mmcu?atxmega64b1 \
+	mmcu?avrxmega4=mmcu?atxmega64b3 \
+	mmcu?avrxmega4=mmcu?atxmega64c3 \
+	mmcu?avrxmega4=mmcu?atxmega64d4 \
+	mmcu?avrxmega5=mmcu?atxmega64a1 \
+	mmcu?avrxmega5=mmcu?atxmega64a1u \
+	mmcu?avrxmega6=mmcu?atxmega128a3 \
+	mmcu?avrxmega6=mmcu?atxmega128d3 \
+	mmcu?avrxmega6=mmcu?atxmega192a3 \
+	mmcu?avrxmega6=mmcu?atxmega192d3 \
+	mmcu?avrxmega6=mmcu?atxmega256a3 \
+	mmcu?avrxmega6=mmcu?atxmega256a3b \
+	mmcu?avrxmega6=mmcu?atxmega256a3bu \
+	mmcu?avrxmega6=mmcu?atxmega256d3 \
+	mmcu?avrxmega6=mmcu?atxmega128a3u \
+	mmcu?avrxmega6=mmcu?atxmega128b1 \
+	mmcu?avrxmega6=mmcu?atxmega128b3 \
+	mmcu?avrxmega6=mmcu?atxmega128c3 \
+	mmcu?avrxmega6=mmcu?atxmega128d4 \
+	mmcu?avrxmega6=mmcu?atmxt540s \
+	mmcu?avrxmega6=mmcu?atmxt540sreva \
+	mmcu?avrxmega6=mmcu?atxmega192a3u \
+	mmcu?avrxmega6=mmcu?atxmega192c3 \
+	mmcu?avrxmega6=mmcu?atxmega256a3u \
+	mmcu?avrxmega6=mmcu?atxmega256c3 \
+	mmcu?avrxmega6=mmcu?atxmega384c3 \
+	mmcu?avrxmega6=mmcu?atxmega384d3 \
+	mmcu?avrxmega7=mmcu?atxmega128a1 \
+	mmcu?avrxmega7=mmcu?atxmega128a1u \
+	mmcu?avrxmega7=mmcu?atxmega128a4u
diff --git a/gcc-4.9/gcc/config/avr/t-rtems b/gcc-4.9/gcc/config/avr/t-rtems
new file mode 100644
index 000000000..a3ef8bd80
--- /dev/null
+++ b/gcc-4.9/gcc/config/avr/t-rtems
@@ -0,0 +1,3 @@
+# Multilibs for avr RTEMS targets.
+
+# ATM, this is just a stub
diff --git a/gcc-4.9/gcc/config/bfin/bfin-modes.def b/gcc-4.9/gcc/config/bfin/bfin-modes.def
new file mode 100644
index 000000000..9006b169f
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin-modes.def
@@ -0,0 +1,28 @@
+/* Definitions of target machine for GNU compiler, for Blackfin.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* PDImode for the 40-bit accumulators.  */
+PARTIAL_INT_MODE (DI, 40, PDI);
+
+/* Two of those - covering both accumulators for vector multiplications.  */
+VECTOR_MODE (INT, PDI, 2);
+
+VECTOR_MODE (INT, HI, 2); /* V2HI */
+VECTOR_MODE (INT, SI, 2); /* V2SI - occasionally used.  */
diff --git a/gcc-4.9/gcc/config/bfin/bfin-opts.h b/gcc-4.9/gcc/config/bfin/bfin-opts.h
new file mode 100644
index 000000000..b7cb39e8f
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin-opts.h
@@ -0,0 +1,59 @@
+/* Definitions for the Blackfin port needed for option handling.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef BFIN_OPTS_H
+#define BFIN_OPTS_H
+
+/* CPU type.  */
+typedef enum bfin_cpu_type
+{
+  BFIN_CPU_UNKNOWN,
+  BFIN_CPU_BF512,
+  BFIN_CPU_BF514,
+  BFIN_CPU_BF516,
+  BFIN_CPU_BF518,
+  BFIN_CPU_BF522,
+  BFIN_CPU_BF523,
+  BFIN_CPU_BF524,
+  BFIN_CPU_BF525,
+  BFIN_CPU_BF526,
+  BFIN_CPU_BF527,
+  BFIN_CPU_BF531,
+  BFIN_CPU_BF532,
+  BFIN_CPU_BF533,
+  BFIN_CPU_BF534,
+  BFIN_CPU_BF536,
+  BFIN_CPU_BF537,
+  BFIN_CPU_BF538,
+  BFIN_CPU_BF539,
+  BFIN_CPU_BF542,
+  BFIN_CPU_BF542M,
+  BFIN_CPU_BF544,
+  BFIN_CPU_BF544M,
+  BFIN_CPU_BF547,
+  BFIN_CPU_BF547M,
+  BFIN_CPU_BF548,
+  BFIN_CPU_BF548M,
+  BFIN_CPU_BF549,
+  BFIN_CPU_BF549M,
+  BFIN_CPU_BF561,
+  BFIN_CPU_BF592
+} bfin_cpu_t;
+
+#endif
diff --git a/gcc-4.9/gcc/config/bfin/bfin-protos.h b/gcc-4.9/gcc/config/bfin/bfin-protos.h
new file mode 100644
index 000000000..be26ad105
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin-protos.h
@@ -0,0 +1,117 @@
+/* Prototypes for Blackfin functions used in the md file & elsewhere.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+   This file is part of GNU CC.
+
+   GNU CC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GNU CC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Function prototypes that cannot exist in bfin.h due to dependency
+   complications.  */
+#ifndef GCC_BFIN_PROTOS_H
+#define GCC_BFIN_PROTOS_H
+
+/* For the anomaly 05-00-0245 */
+#define WA_SPECULATIVE_LOADS 0x00000001
+#define ENABLE_WA_SPECULATIVE_LOADS \
+  (bfin_workarounds & WA_SPECULATIVE_LOADS)
+
+/* For the anomaly 05-00-0244 */
+#define WA_SPECULATIVE_SYNCS 0x00000002
+#define ENABLE_WA_SPECULATIVE_SYNCS \
+  (bfin_workarounds & WA_SPECULATIVE_SYNCS)
+
+/* For the anomaly 05-00-0371 */
+#define WA_RETS 0x00000004
+#define ENABLE_WA_RETS \
+  (bfin_workarounds & WA_RETS)
+
+/* For the anomaly 05-00-0426 */
+#define WA_INDIRECT_CALLS 0x00000008
+#define ENABLE_WA_INDIRECT_CALLS \
+  ((bfin_workarounds & WA_INDIRECT_CALLS) && !TARGET_ICPLB)
+
+#define WA_05000257 0x00000010
+#define ENABLE_WA_05000257 \
+  (bfin_workarounds & WA_05000257)
+
+#define WA_05000283 0x00000020
+#define ENABLE_WA_05000283 \
+  (bfin_workarounds & WA_05000283)
+
+#define WA_05000315 0x00000040
+#define ENABLE_WA_05000315 \
+  (bfin_workarounds & WA_05000315)
+
+/* For the anomaly 05-00-0312 */
+#define WA_LOAD_LCREGS 0x00000080
+#define ENABLE_WA_LOAD_LCREGS \
+  (bfin_workarounds & WA_LOAD_LCREGS)
+
+#define WA_05000074 0x00000100
+#define ENABLE_WA_05000074 \
+  (bfin_workarounds & WA_05000074)
+
+extern bool function_arg_regno_p (int);
+
+extern const char *output_load_immediate (rtx *);
+extern const char *output_casesi_internal (rtx *);
+extern char *bfin_asm_long (void);
+extern char *bfin_asm_short (void);
+extern int log2constp (unsigned HOST_WIDE_INT);
+
+extern int hard_regno_mode_ok (int, enum machine_mode);
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);	  
+extern HOST_WIDE_INT bfin_initial_elimination_offset (int, int);
+
+extern int effective_address_32bit_p (rtx, enum machine_mode);
+extern int symbolic_reference_mentioned_p (rtx);
+extern rtx bfin_gen_compare (rtx, enum machine_mode);
+extern bool expand_move (rtx *, enum machine_mode);
+extern void bfin_expand_call (rtx, rtx, rtx, rtx, int);
+extern bool bfin_longcall_p (rtx, int);
+extern bool bfin_dsp_memref_p (rtx);
+extern bool bfin_expand_movmem (rtx, rtx, rtx, rtx);
+
+extern enum reg_class secondary_input_reload_class (enum reg_class,
+						    enum machine_mode,
+						    rtx);
+extern enum reg_class secondary_output_reload_class (enum reg_class,
+						     enum machine_mode,
+						     rtx);
+extern char *section_asm_op_1 (SECT_ENUM_T);
+extern char *section_asm_op (SECT_ENUM_T);
+extern void print_operand (FILE *,  rtx, char);
+extern void print_address_operand (FILE *, rtx);
+extern void split_di (rtx [], int, rtx [], rtx []);
+extern int split_load_immediate (rtx []);
+extern void emit_pic_move (rtx *, enum machine_mode);
+extern void asm_conditional_branch (rtx, rtx *, int, int);
+extern rtx bfin_gen_compare (rtx, enum machine_mode);
+
+extern unsigned bfin_local_alignment (tree, unsigned);
+extern rtx bfin_va_arg (tree, tree);
+
+extern void bfin_expand_prologue (void);
+extern void bfin_expand_epilogue (int, int, bool);
+extern int analyze_push_multiple_operation (rtx);
+extern int analyze_pop_multiple_operation (rtx);
+extern void output_push_multiple (rtx, rtx *);
+extern void output_pop_multiple (rtx, rtx *);
+extern int bfin_hard_regno_rename_ok (unsigned int, unsigned int);
+extern rtx bfin_return_addr_rtx (int);
+extern void bfin_hardware_loop (void);
+
+#endif
+
diff --git a/gcc-4.9/gcc/config/bfin/bfin.c b/gcc-4.9/gcc/config/bfin/bfin.c
new file mode 100644
index 000000000..8b2821189
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin.c
@@ -0,0 +1,5834 @@
+/* The Blackfin code generation auxiliary output file.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "tree.h"
+#include "varasm.h"
+#include "calls.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "input.h"
+#include "target.h"
+#include "target-def.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "optabs.h"
+#include "ggc.h"
+#include "cgraph.h"
+#include "langhooks.h"
+#include "bfin-protos.h"
+#include "tm_p.h"
+#include "tm-preds.h"
+#include "tm-constrs.h"
+#include "gt-bfin.h"
+#include "basic-block.h"
+#include "timevar.h"
+#include "df.h"
+#include "sel-sched.h"
+#include "hw-doloop.h"
+#include "opts.h"
+#include "dumpfile.h"
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct GTY(()) machine_function
+{
+  /* Set if we are notified by the doloop pass that a hardware loop
+     was created.  */
+  int has_hardware_loops;
+
+  /* Set if we create a memcpy pattern that uses loop registers.  */
+  int has_loopreg_clobber;
+};
+
+/* RTX for condition code flag register and RETS register */
+extern GTY(()) rtx bfin_cc_rtx;
+extern GTY(()) rtx bfin_rets_rtx;
+rtx bfin_cc_rtx, bfin_rets_rtx;
+
+int max_arg_registers = 0;
+
+/* Arrays used when emitting register names.  */
+const char *short_reg_names[]  =  SHORT_REGISTER_NAMES;
+const char *high_reg_names[]   =  HIGH_REGISTER_NAMES;
+const char *dregs_pair_names[] =  DREGS_PAIR_NAMES;
+const char *byte_reg_names[]   =  BYTE_REGISTER_NAMES;
+
+static int arg_regs[] = FUNCTION_ARG_REGISTERS;
+static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
+
+int splitting_for_sched, splitting_loops;
+
+static void
+bfin_globalize_label (FILE *stream, const char *name)
+{
+  fputs (".global ", stream);
+  assemble_name (stream, name);
+  fputc (';',stream);
+  fputc ('\n',stream);
+}
+
+static void 
+output_file_start (void) 
+{
+  FILE *file = asm_out_file;
+  int i;
+
+  fprintf (file, ".file \"%s\";\n", LOCATION_FILE (input_location));
+  
+  for (i = 0; arg_regs[i] >= 0; i++)
+    ;
+  max_arg_registers = i;	/* how many arg reg used  */
+}
+
+/* Examine machine-dependent attributes of function type FUNTYPE and return its
+   type.  See the definition of E_FUNKIND.  */
+
+static e_funkind
+funkind (const_tree funtype)
+{
+  tree attrs = TYPE_ATTRIBUTES (funtype);
+  if (lookup_attribute ("interrupt_handler", attrs))
+    return INTERRUPT_HANDLER;
+  else if (lookup_attribute ("exception_handler", attrs))
+    return EXCPT_HANDLER;
+  else if (lookup_attribute ("nmi_handler", attrs))
+    return NMI_HANDLER;
+  else
+    return SUBROUTINE;
+}
+
+/* Legitimize PIC addresses.  If the address is already position-independent,
+   we return ORIG.  Newly generated position-independent addresses go into a
+   reg.  This is REG if nonzero, otherwise we allocate register(s) as
+   necessary.  PICREG is the register holding the pointer to the PIC offset
+   table.  */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
+{
+  rtx addr = orig;
+  rtx new_rtx = orig;
+
+  if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
+    {
+      int unspec;
+      rtx tmp;
+
+      if (TARGET_ID_SHARED_LIBRARY)
+	unspec = UNSPEC_MOVE_PIC;
+      else if (GET_CODE (addr) == SYMBOL_REF
+	       && SYMBOL_REF_FUNCTION_P (addr))
+	unspec = UNSPEC_FUNCDESC_GOT17M4;
+      else
+	unspec = UNSPEC_MOVE_FDPIC;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
+      new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
+
+      emit_move_insn (reg, new_rtx);
+      if (picreg == pic_offset_table_rtx)
+	crtl->uses_pic_offset_table = 1;
+      return reg;
+    }
+
+  else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
+    {
+      rtx base;
+
+      if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	}
+
+      if (XEXP (addr, 0) == picreg)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
+      addr = legitimize_pic_address (XEXP (addr, 1),
+				     base == reg ? NULL_RTX : reg,
+				     picreg);
+
+      if (GET_CODE (addr) == CONST_INT)
+	{
+	  gcc_assert (! reload_in_progress && ! reload_completed);
+	  addr = force_reg (Pmode, addr);
+	}
+
+      if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
+	{
+	  base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
+	  addr = XEXP (addr, 1);
+	}
+
+      return gen_rtx_PLUS (Pmode, base, addr);
+    }
+
+  return new_rtx;
+}
+
+/* Stack frame layout. */
+
+/* For a given REGNO, determine whether it must be saved in the function
+   prologue.  IS_INTHANDLER specifies whether we're generating a normal
+   prologue or an interrupt/exception one.  */
+static bool
+must_save_p (bool is_inthandler, unsigned regno)
+{
+  if (D_REGNO_P (regno))
+    {
+      bool is_eh_return_reg = false;
+      if (crtl->calls_eh_return)
+	{
+	  unsigned j;
+	  for (j = 0; ; j++)
+	    {
+	      unsigned test = EH_RETURN_DATA_REGNO (j);
+	      if (test == INVALID_REGNUM)
+		break;
+	      if (test == regno)
+		is_eh_return_reg = true;
+	    }
+	}
+
+      return (is_eh_return_reg
+	      || (df_regs_ever_live_p (regno)
+		  && !fixed_regs[regno]
+		  && (is_inthandler || !call_used_regs[regno])));
+    }
+  else if (P_REGNO_P (regno))
+    {
+      return ((df_regs_ever_live_p (regno)
+	       && !fixed_regs[regno]
+	       && (is_inthandler || !call_used_regs[regno]))
+	      || (is_inthandler
+		  && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
+		  && regno == REG_P5)
+	      || (!TARGET_FDPIC
+		  && regno == PIC_OFFSET_TABLE_REGNUM
+		  && (crtl->uses_pic_offset_table
+		      || (TARGET_ID_SHARED_LIBRARY && !crtl->is_leaf))));
+    }
+  else
+    return ((is_inthandler || !call_used_regs[regno])
+	    && (df_regs_ever_live_p (regno)
+		|| (!leaf_function_p () && call_used_regs[regno])));
+
+}
+
+/* Compute the number of DREGS to save with a push_multiple operation.
+   This could include registers that aren't modified in the function,
+   since push_multiple only takes a range of registers.
+   If IS_INTHANDLER, then everything that is live must be saved, even
+   if normally call-clobbered.
+   If CONSECUTIVE, return the number of registers we can save in one
+   instruction with a push/pop multiple instruction.  */
+
+static int
+n_dregs_to_save (bool is_inthandler, bool consecutive)
+{
+  int count = 0;
+  unsigned i;
+
+  for (i = REG_R7 + 1; i-- != REG_R0;)
+    {
+      if (must_save_p (is_inthandler, i))
+	count++;
+      else if (consecutive)
+	return count;
+    }
+  return count;
+}
+
+/* Like n_dregs_to_save, but compute number of PREGS to save.  */
+
+static int
+n_pregs_to_save (bool is_inthandler, bool consecutive)
+{
+  int count = 0;
+  unsigned i;
+
+  for (i = REG_P5 + 1; i-- != REG_P0;)
+    if (must_save_p (is_inthandler, i))
+      count++;
+    else if (consecutive)
+      return count;
+  return count;
+}
+
+/* Determine if we are going to save the frame pointer in the prologue.  */
+
+static bool
+must_save_fp_p (void)
+{
+  return df_regs_ever_live_p (REG_FP);
+}
+
+/* Determine if we are going to save the RETS register.  */
+static bool
+must_save_rets_p (void)
+{
+  return df_regs_ever_live_p (REG_RETS);
+}
+
+static bool
+stack_frame_needed_p (void)
+{
+  /* EH return puts a new return address into the frame using an
+     address relative to the frame pointer.  */
+  if (crtl->calls_eh_return)
+    return true;
+  return frame_pointer_needed;
+}
+
+/* Emit code to save registers in the prologue.  SAVEALL is nonzero if we
+   must save all registers; this is used for interrupt handlers.
+   SPREG contains (reg:SI REG_SP).  IS_INTHANDLER is true if we're doing
+   this for an interrupt (or exception) handler.  */
+
+static void
+expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
+{
+  rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
+  rtx predec = gen_rtx_MEM (SImode, predec1);
+  int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
+  int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
+  int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
+  int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
+  int dregno, pregno;
+  int total_consec = ndregs_consec + npregs_consec;
+  int i, d_to_save;
+
+  if (saveall || is_inthandler)
+    {
+      rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
+	if (! crtl->is_leaf
+	    || cfun->machine->has_hardware_loops
+	    || cfun->machine->has_loopreg_clobber
+	    || (ENABLE_WA_05000257
+		&& (dregno == REG_LC0 || dregno == REG_LC1)))
+	  {
+	    insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+    }
+
+  if (total_consec != 0)
+    {
+      rtx insn;
+      rtx val = GEN_INT (-total_consec * 4);
+      rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2));
+
+      XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val),
+					    UNSPEC_PUSH_MULTIPLE);
+      XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg,
+							gen_rtx_PLUS (Pmode,
+								      spreg,
+								      val));
+      RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1;
+      d_to_save = ndregs_consec;
+      dregno = REG_R7 + 1 - ndregs_consec;
+      pregno = REG_P5 + 1 - npregs_consec;
+      for (i = 0; i < total_consec; i++)
+	{
+	  rtx memref = gen_rtx_MEM (word_mode,
+				    gen_rtx_PLUS (Pmode, spreg,
+						  GEN_INT (- i * 4 - 4)));
+	  rtx subpat;
+	  if (d_to_save > 0)
+	    {
+	      subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
+								   dregno++));
+	      d_to_save--;
+	    }
+	  else
+	    {
+	      subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
+								   pregno++));
+	    }
+	  XVECEXP (pat, 0, i + 1) = subpat;
+	  RTX_FRAME_RELATED_P (subpat) = 1;
+	}
+      insn = emit_insn (pat);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  for (dregno = REG_R0; ndregs != ndregs_consec; dregno++)
+    {
+      if (must_save_p (is_inthandler, dregno))
+	{
+	  rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  ndregs--;
+	}
+    }
+  for (pregno = REG_P0; npregs != npregs_consec; pregno++)
+    {
+      if (must_save_p (is_inthandler, pregno))
+	{
+	  rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  npregs--;
+	}
+    }
+  for (i = REG_P7 + 1; i < REG_CC; i++)
+    if (saveall 
+	|| (is_inthandler
+	    && (df_regs_ever_live_p (i)
+		|| (!leaf_function_p () && call_used_regs[i]))))
+      {
+	rtx insn;
+	if (i == REG_A0 || i == REG_A1)
+	  insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1),
+				 gen_rtx_REG (PDImode, i));
+	else
+	  insn = emit_move_insn (predec, gen_rtx_REG (SImode, i));
+	RTX_FRAME_RELATED_P (insn) = 1;
+      }
+}
+
+/* Emit code to restore registers in the epilogue.  SAVEALL is nonzero if we
+   must save all registers; this is used for interrupt handlers.
+   SPREG contains (reg:SI REG_SP).  IS_INTHANDLER is true if we're doing
+   this for an interrupt (or exception) handler.  */
+
+static void
+expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
+{
+  rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
+  rtx postinc = gen_rtx_MEM (SImode, postinc1);
+
+  int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
+  int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
+  int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
+  int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
+  int total_consec = ndregs_consec + npregs_consec;
+  int i, regno;
+  rtx insn;
+
+  /* A slightly crude technique to stop flow from trying to delete "dead"
+     insns.  */
+  MEM_VOLATILE_P (postinc) = 1;
+
+  for (i = REG_CC - 1; i > REG_P7; i--)
+    if (saveall
+	|| (is_inthandler
+	    && (df_regs_ever_live_p (i)
+		|| (!leaf_function_p () && call_used_regs[i]))))
+      {
+	if (i == REG_A0 || i == REG_A1)
+	  {
+	    rtx mem = gen_rtx_MEM (PDImode, postinc1);
+	    MEM_VOLATILE_P (mem) = 1;
+	    emit_move_insn (gen_rtx_REG (PDImode, i), mem);
+	  }
+	else
+	  emit_move_insn (gen_rtx_REG (SImode, i), postinc);
+      }
+
+  regno = REG_P5 - npregs_consec;
+  for (; npregs != npregs_consec; regno--)
+    {
+      if (must_save_p (is_inthandler, regno))
+	{
+	  emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
+	  npregs--;
+	}
+    }
+  regno = REG_R7 - ndregs_consec;
+  for (; ndregs != ndregs_consec; regno--)
+    {
+      if (must_save_p (is_inthandler, regno))
+	{
+	  emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
+	  ndregs--;
+	}
+    }
+
+  if (total_consec != 0)
+    {
+      rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1));
+      XVECEXP (pat, 0, 0)
+	= gen_rtx_SET (VOIDmode, spreg,
+		       gen_rtx_PLUS (Pmode, spreg,
+				     GEN_INT (total_consec * 4)));
+
+      if (npregs_consec > 0)
+	regno = REG_P5 + 1;
+      else
+	regno = REG_R7 + 1;
+
+      for (i = 0; i < total_consec; i++)
+	{
+	  rtx addr = (i > 0
+		      ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4))
+		      : spreg);
+	  rtx memref = gen_rtx_MEM (word_mode, addr);
+
+	  regno--;
+	  XVECEXP (pat, 0, i + 1)
+	    = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref);
+
+	  if (npregs_consec > 0)
+	    {
+	      if (--npregs_consec == 0)
+		regno = REG_R7 + 1;
+	    }
+	}
+
+      insn = emit_insn (pat);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  if (saveall || is_inthandler)
+    {
+      for (regno = REG_LB1; regno >= REG_LT0; regno--)
+	if (! crtl->is_leaf
+	    || cfun->machine->has_hardware_loops
+	    || cfun->machine->has_loopreg_clobber
+	    || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
+	  emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
+
+      emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
+    }
+}
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.
+
+   CUM is as above.
+
+   MODE and TYPE are the mode and type of the current parameter.
+
+   PRETEND_SIZE is a variable that should be set to the amount of stack
+   that must be pushed by the prolog to pretend that our caller pushed
+   it.
+
+   Normally, this macro will push all remaining incoming registers on the
+   stack and set PRETEND_SIZE to the length of the registers pushed.  
+
+   Blackfin specific :
+   - VDSP C compiler manual (our ABI) says that a variable args function
+     should save the R0, R1 and R2 registers in the stack.
+   - The caller will always leave space on the stack for the
+     arguments that are passed in registers, so we dont have
+     to leave any extra space.
+   - now, the vastart pointer can access all arguments from the stack.  */
+
+static void
+setup_incoming_varargs (cumulative_args_t cum,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			tree type ATTRIBUTE_UNUSED, int *pretend_size,
+			int no_rtl)
+{
+  rtx mem;
+  int i;
+
+  if (no_rtl)
+    return;
+
+  /* The move for named arguments will be generated automatically by the
+     compiler.  We need to generate the move rtx for the unnamed arguments
+     if they are in the first 3 words.  We assume at least 1 named argument
+     exists, so we never generate [ARGP] = R0 here.  */
+
+  for (i = get_cumulative_args (cum)->words + 1; i < max_arg_registers; i++)
+    {
+      mem = gen_rtx_MEM (Pmode,
+			 plus_constant (Pmode, arg_pointer_rtx,
+					(i * UNITS_PER_WORD)));
+      emit_move_insn (mem, gen_rtx_REG (Pmode, i));
+    }
+
+  *pretend_size = 0;
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may
+   be accessed via the stack pointer) in functions that seem suitable.  */
+
+static bool
+bfin_frame_pointer_required (void) 
+{
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+
+  if (fkind != SUBROUTINE)
+    return true;
+
+  /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
+     so we have to override it for non-leaf functions.  */
+  if (TARGET_OMIT_LEAF_FRAME_POINTER && ! crtl->is_leaf)
+    return true;
+
+  return false;
+}
+
+/* Return the number of registers pushed during the prologue.  */
+
+static int
+n_regs_saved_by_prologue (void)
+{
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  bool is_inthandler = fkind != SUBROUTINE;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE
+	      || (is_inthandler && !crtl->is_leaf));
+  int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false);
+  int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false);
+  int n = ndregs + npregs;
+  int i;
+
+  if (all || stack_frame_needed_p ())
+    n += 2;
+  else
+    {
+      if (must_save_fp_p ())
+	n++;
+      if (must_save_rets_p ())
+	n++;
+    }
+
+  if (fkind != SUBROUTINE || all)
+    {
+      /* Increment once for ASTAT.  */
+      n++;
+      if (! crtl->is_leaf
+	  || cfun->machine->has_hardware_loops
+	  || cfun->machine->has_loopreg_clobber)
+	{
+	  n += 6;
+	}
+    }
+
+  if (fkind != SUBROUTINE)
+    {
+      /* RETE/X/N.  */
+      if (lookup_attribute ("nesting", attrs))
+	n++;
+    }
+
+  for (i = REG_P7 + 1; i < REG_CC; i++)
+    if (all
+	|| (fkind != SUBROUTINE
+	    && (df_regs_ever_live_p (i)
+		|| (!leaf_function_p () && call_used_regs[i]))))
+      n += i == REG_A0 || i == REG_A1 ? 2 : 1;
+
+  return n;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  Frame pointer elimination is automatically handled.
+
+   All other eliminations are valid.  */
+
+static bool
+bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+bfin_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset = 0;
+
+  if (from == ARG_POINTER_REGNUM)
+    offset = n_regs_saved_by_prologue () * 4;
+
+  if (to == STACK_POINTER_REGNUM)
+    {
+      if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
+	offset += crtl->outgoing_args_size;
+      else if (crtl->outgoing_args_size)
+	offset += FIXED_STACK_AREA;
+
+      offset += get_frame_size ();
+    }
+
+  return offset;
+}
+
+/* Emit code to load a constant CONSTANT into register REG; setting
+   RTX_FRAME_RELATED_P on all insns we generate if RELATED is true.
+   Make sure that the insns we generate need not be split.  */
+
+static void
+frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related)
+{
+  rtx insn;
+  rtx cst = GEN_INT (constant);
+
+  if (constant >= -32768 && constant < 65536)
+    insn = emit_move_insn (reg, cst);
+  else
+    {
+      /* We don't call split_load_immediate here, since dwarf2out.c can get
+	 confused about some of the more clever sequences it can generate.  */
+      insn = emit_insn (gen_movsi_high (reg, cst));
+      if (related)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_movsi_low (reg, reg, cst));
+    }
+  if (related)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Generate efficient code to add a value to a P register.
+   Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero.
+   EPILOGUE_P is zero if this function is called for prologue,
+   otherwise it's nonzero. And it's less than zero if this is for
+   sibcall epilogue.  */
+
+static void
+add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p)
+{
+  if (value == 0)
+    return;
+
+  /* Choose whether to use a sequence using a temporary register, or
+     a sequence with multiple adds.  We can add a signed 7-bit value
+     in one instruction.  */
+  if (value > 120 || value < -120)
+    {
+      rtx tmpreg;
+      rtx tmpreg2;
+      rtx insn;
+
+      tmpreg2 = NULL_RTX;
+
+      /* For prologue or normal epilogue, P1 can be safely used
+	 as the temporary register. For sibcall epilogue, we try to find
+	 a call used P register, which will be restored in epilogue.
+	 If we cannot find such a P register, we have to use one I register
+	 to help us.  */
+
+      if (epilogue_p >= 0)
+	tmpreg = gen_rtx_REG (SImode, REG_P1);
+      else
+	{
+	  int i;
+	  for (i = REG_P0; i <= REG_P5; i++)
+	    if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
+		|| (!TARGET_FDPIC
+		    && i == PIC_OFFSET_TABLE_REGNUM
+		    && (crtl->uses_pic_offset_table
+			|| (TARGET_ID_SHARED_LIBRARY
+			    && ! crtl->is_leaf))))
+	      break;
+	  if (i <= REG_P5)
+	    tmpreg = gen_rtx_REG (SImode, i);
+	  else
+	    {
+	      tmpreg = gen_rtx_REG (SImode, REG_P1);
+	      tmpreg2 = gen_rtx_REG (SImode, REG_I0);
+	      emit_move_insn (tmpreg2, tmpreg);
+	    }
+	}
+
+      if (frame)
+	frame_related_constant_load (tmpreg, value, TRUE);
+      else
+	insn = emit_move_insn (tmpreg, GEN_INT (value));
+
+      insn = emit_insn (gen_addsi3 (reg, reg, tmpreg));
+      if (frame)
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (tmpreg2 != NULL_RTX)
+	emit_move_insn (tmpreg, tmpreg2);
+    }
+  else
+    do
+      {
+	int size = value;
+	rtx insn;
+
+	if (size > 60)
+	  size = 60;
+	else if (size < -60)
+	  /* We could use -62, but that would leave the stack unaligned, so
+	     it's no good.  */
+	  size = -60;
+
+	insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+	if (frame)
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	value -= size;
+      }
+    while (value != 0);
+}
+
+/* Generate a LINK insn for a frame sized FRAME_SIZE.  If this constant
+   is too large, generate a sequence of insns that has the same effect.
+   SPREG contains (reg:SI REG_SP).  */
+
+static void
+emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size)
+{
+  HOST_WIDE_INT link_size = frame_size;
+  rtx insn;
+  int i;
+
+  if (link_size > 262140)
+    link_size = 262140;
+
+  /* Use a LINK insn with as big a constant as possible, then subtract
+     any remaining size from the SP.  */
+  insn = emit_insn (gen_link (GEN_INT (-8 - link_size)));
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+    {
+      rtx set = XVECEXP (PATTERN (insn), 0, i);
+      gcc_assert (GET_CODE (set) == SET);
+      RTX_FRAME_RELATED_P (set) = 1;
+    }
+
+  frame_size -= link_size;
+
+  if (frame_size > 0)
+    {
+      /* Must use a call-clobbered PREG that isn't the static chain.  */
+      rtx tmpreg = gen_rtx_REG (Pmode, REG_P1);
+
+      frame_related_constant_load (tmpreg, -frame_size, TRUE);
+      insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Return the number of bytes we must reserve for outgoing arguments
+   in the current function's stack frame.  */
+
+static HOST_WIDE_INT
+arg_area_size (void)
+{
+  if (crtl->outgoing_args_size)
+    {
+      if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
+	return crtl->outgoing_args_size;
+      else
+	return FIXED_STACK_AREA;
+    }
+  return 0;
+}
+
+/* Save RETS and FP, and allocate a stack frame.  ALL is true if the
+   function must save all its registers (true only for certain interrupt
+   handlers).  */
+
+static void
+do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all)
+{
+  frame_size += arg_area_size ();
+
+  if (all
+      || stack_frame_needed_p ()
+      || (must_save_rets_p () && must_save_fp_p ()))
+    emit_link_insn (spreg, frame_size);
+  else
+    {
+      if (must_save_rets_p ())
+	{
+	  rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
+					    gen_rtx_PRE_DEC (Pmode, spreg)),
+			       bfin_rets_rtx);
+	  rtx insn = emit_insn (pat);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      if (must_save_fp_p ())
+	{
+	  rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
+					    gen_rtx_PRE_DEC (Pmode, spreg)),
+			       gen_rtx_REG (Pmode, REG_FP));
+	  rtx insn = emit_insn (pat);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      add_to_reg (spreg, -frame_size, 1, 0);
+    }
+}
+
+/* Like do_link, but used for epilogues to deallocate the stack frame.
+   EPILOGUE_P is zero if this function is called for prologue,
+   otherwise it's nonzero. And it's less than zero if this is for
+   sibcall epilogue.  */
+
+static void
+do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
+{
+  frame_size += arg_area_size ();
+
+  if (stack_frame_needed_p ())
+    emit_insn (gen_unlink ());
+  else 
+    {
+      rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
+
+      add_to_reg (spreg, frame_size, 0, epilogue_p);
+      if (all || must_save_fp_p ())
+	{
+	  rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
+	  emit_move_insn (fpreg, postinc);
+	  emit_use (fpreg);
+	}
+      if (all || must_save_rets_p ())
+	{
+	  emit_move_insn (bfin_rets_rtx, postinc);
+	  emit_use (bfin_rets_rtx);
+	}
+    }
+}
+
+/* Generate a prologue suitable for a function of kind FKIND.  This is
+   called for interrupt and exception handler prologues.
+   SPREG contains (reg:SI REG_SP).  */
+
+static void
+expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
+{
+  HOST_WIDE_INT frame_size = get_frame_size ();
+  rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
+  rtx predec = gen_rtx_MEM (SImode, predec1);
+  rtx insn;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  tree kspisusp = lookup_attribute ("kspisusp", attrs);
+
+  if (kspisusp)
+    {
+      insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* We need space on the stack in case we need to save the argument
+     registers.  */
+  if (fkind == EXCPT_HANDLER)
+    {
+      insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* If we're calling other functions, they won't save their call-clobbered
+     registers, so we must save everything here.  */
+  if (!crtl->is_leaf)
+    all = true;
+  expand_prologue_reg_save (spreg, all, true);
+
+  if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
+    {
+      rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
+      rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
+      emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
+      emit_insn (gen_movsi_high (p5reg, chipid));
+      emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
+      emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
+    }
+  
+  if (lookup_attribute ("nesting", attrs))
+    {
+      rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
+      insn = emit_move_insn (predec, srcreg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  do_link (spreg, frame_size, all);
+
+  if (fkind == EXCPT_HANDLER)
+    {
+      rtx r0reg = gen_rtx_REG (SImode, REG_R0);
+      rtx r1reg = gen_rtx_REG (SImode, REG_R1);
+      rtx r2reg = gen_rtx_REG (SImode, REG_R2);
+
+      emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT));
+      emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26)));
+      emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26)));
+      emit_move_insn (r1reg, spreg);
+      emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP));
+      emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8)));
+    }
+}
+
+/* Generate an epilogue suitable for a function of kind FKIND.  This is
+   called for interrupt and exception handler epilogues.
+   SPREG contains (reg:SI REG_SP).  */
+
+static void
+expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all)
+{
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
+  rtx postinc = gen_rtx_MEM (SImode, postinc1);
+
+  /* A slightly crude technique to stop flow from trying to delete "dead"
+     insns.  */
+  MEM_VOLATILE_P (postinc) = 1;
+
+  do_unlink (spreg, get_frame_size (), all, 1);
+
+  if (lookup_attribute ("nesting", attrs))
+    {
+      rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
+      emit_move_insn (srcreg, postinc);
+    }
+
+  /* If we're calling other functions, they won't save their call-clobbered
+     registers, so we must save (and restore) everything here.  */
+  if (!crtl->is_leaf)
+    all = true;
+
+  expand_epilogue_reg_restore (spreg, all, true);
+
+  /* Deallocate any space we left on the stack in case we needed to save the
+     argument registers.  */
+  if (fkind == EXCPT_HANDLER)
+    emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
+}
+
+/* Used while emitting the prologue to generate code to load the correct value
+   into the PIC register, which is passed in DEST.  */
+
+static rtx
+bfin_load_pic_reg (rtx dest)
+{
+  struct cgraph_local_info *i = NULL;
+  rtx addr;
+ 
+  i = cgraph_local_info (current_function_decl);
+ 
+  /* Functions local to the translation unit don't need to reload the
+     pic reg, since the caller always passes a usable one.  */
+  if (i && i->local)
+    return pic_offset_table_rtx;
+      
+  if (global_options_set.x_bfin_library_id)
+    addr = plus_constant (Pmode, pic_offset_table_rtx,
+			   -4 - bfin_library_id * 4);
+  else
+    addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+			 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+					 UNSPEC_LIBRARY_OFFSET));
+  emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr)));
+  return dest;
+}
+
+/* Generate RTL for the prologue of the current function.  */
+
+void
+bfin_expand_prologue (void)
+{
+  HOST_WIDE_INT frame_size = get_frame_size ();
+  rtx spreg = gen_rtx_REG (Pmode, REG_SP);
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  rtx pic_reg_loaded = NULL_RTX;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
+
+  if (fkind != SUBROUTINE)
+    {
+      expand_interrupt_handler_prologue (spreg, fkind, all);
+      return;
+    }
+
+  if (crtl->limit_stack
+      || (TARGET_STACK_CHECK_L1
+	  && !DECL_NO_LIMIT_STACK (current_function_decl)))
+    {
+      HOST_WIDE_INT offset
+	= bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
+					   STACK_POINTER_REGNUM);
+      rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
+      rtx tmp = gen_rtx_REG (Pmode, REG_R3);
+      rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
+
+      emit_move_insn (tmp, p2reg);
+      if (!lim)
+	{
+	  emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode));
+	  emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg));
+	  lim = p2reg;
+	}
+      if (GET_CODE (lim) == SYMBOL_REF)
+	{
+	  if (TARGET_ID_SHARED_LIBRARY)
+	    {
+	      rtx p1reg = gen_rtx_REG (Pmode, REG_P1);
+	      rtx val;
+	      pic_reg_loaded = bfin_load_pic_reg (p2reg);
+	      val = legitimize_pic_address (stack_limit_rtx, p1reg,
+					    pic_reg_loaded);
+	      emit_move_insn (p1reg, val);
+	      frame_related_constant_load (p2reg, offset, FALSE);
+	      emit_insn (gen_addsi3 (p2reg, p2reg, p1reg));
+	      lim = p2reg;
+	    }
+	  else
+	    {
+	      rtx limit = plus_constant (Pmode, lim, offset);
+	      emit_move_insn (p2reg, limit);
+	      lim = p2reg;
+	    }
+	}
+      else
+	{
+	  if (lim != p2reg)
+	    emit_move_insn (p2reg, lim);
+	  add_to_reg (p2reg, offset, 0, 0);
+	  lim = p2reg;
+	}
+      emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim));
+      emit_insn (gen_trapifcc ());
+      emit_move_insn (p2reg, tmp);
+    }
+  expand_prologue_reg_save (spreg, all, false);
+
+  do_link (spreg, frame_size, all);
+
+  if (TARGET_ID_SHARED_LIBRARY
+      && !TARGET_SEP_DATA
+      && (crtl->uses_pic_offset_table
+	  || !crtl->is_leaf))
+    bfin_load_pic_reg (pic_offset_table_rtx);
+}
+
+/* Generate RTL for the epilogue of the current function.  NEED_RETURN is zero
+   if this is for a sibcall.  EH_RETURN is nonzero if we're expanding an
+   eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue,
+   false otherwise.  */
+
+void
+bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p)
+{
+  rtx spreg = gen_rtx_REG (Pmode, REG_SP);
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  int e = sibcall_p ? -1 : 1;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
+
+  if (fkind != SUBROUTINE)
+    {
+      expand_interrupt_handler_epilogue (spreg, fkind, all);
+      return;
+    }
+
+  do_unlink (spreg, get_frame_size (), all, e);
+
+  expand_epilogue_reg_restore (spreg, all, false);
+
+  /* Omit the return insn if this is for a sibcall.  */
+  if (! need_return)
+    return;
+
+  if (eh_return)
+    emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+int
+bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			   unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  */
+static void
+bfin_extra_live_on_entry (bitmap regs)
+{
+  if (TARGET_FDPIC)
+    bitmap_set_bit (regs, FDPIC_REGNO);
+}
+
+/* Return the value of the return address for the frame COUNT steps up
+   from the current frame, after the prologue.
+   We punt for everything but the current frame by returning const0_rtx.  */
+
+rtx
+bfin_return_addr_rtx (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, REG_RETS);
+}
+
+static rtx
+bfin_delegitimize_address (rtx orig_x)
+{
+  rtx x = orig_x;
+
+  if (GET_CODE (x) != MEM)
+    return orig_x;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == UNSPEC
+      && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+    return XVECEXP (XEXP (x, 1), 0, 0);
+
+  return orig_x;
+}
+
+/* This predicate is used to compute the length of a load/store insn.
+   OP is a MEM rtx, we return nonzero if its addressing mode requires a
+   32-bit instruction.  */
+
+int
+effective_address_32bit_p (rtx op, enum machine_mode mode) 
+{
+  HOST_WIDE_INT offset;
+
+  mode = GET_MODE (op);
+  op = XEXP (op, 0);
+
+  if (GET_CODE (op) != PLUS)
+    {
+      gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC
+		  || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC);
+      return 0;
+    }
+
+  if (GET_CODE (XEXP (op, 1)) == UNSPEC)
+    return 1;
+
+  offset = INTVAL (XEXP (op, 1));
+
+  /* All byte loads use a 16-bit offset.  */
+  if (GET_MODE_SIZE (mode) == 1)
+    return 1;
+
+  if (GET_MODE_SIZE (mode) == 4)
+    {
+      /* Frame pointer relative loads can use a negative offset, all others
+	 are restricted to a small positive one.  */
+      if (XEXP (op, 0) == frame_pointer_rtx)
+	return offset < -128 || offset > 60;
+      return offset < 0 || offset > 60;
+    }
+
+  /* Must be HImode now.  */
+  return offset < 0 || offset > 30;
+}
+
+/* Returns true if X is a memory reference using an I register.  */
+bool
+bfin_dsp_memref_p (rtx x)
+{
+  if (! MEM_P (x))
+    return false;
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC
+      || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC)
+    x = XEXP (x, 0);
+  return IREG_P (x);
+}
+
+/* Return cost of the memory address ADDR.
+   All addressing modes are equally cheap on the Blackfin.  */
+
+static int
+bfin_address_cost (rtx addr ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Subroutine of print_operand; used to print a memory reference X to FILE.  */
+
+void
+print_address_operand (FILE *file, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case PLUS:
+      output_address (XEXP (x, 0));
+      fprintf (file, "+");
+      output_address (XEXP (x, 1));
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "--");
+      output_address (XEXP (x, 0));    
+      break;
+    case POST_INC:
+      output_address (XEXP (x, 0));
+      fprintf (file, "++");
+      break;
+    case POST_DEC:
+      output_address (XEXP (x, 0));
+      fprintf (file, "--");
+      break;
+
+    default:
+      gcc_assert (GET_CODE (x) != MEM);
+      print_operand (file, x, 0);
+      break;
+    }
+}
+
+/* Adding intp DImode support by Tony
+ * -- Q: (low  word)
+ * -- R: (high word)
+ */
+
+void
+print_operand (FILE *file, rtx x, char code)
+{
+  enum machine_mode mode;
+
+  if (code == '!')
+    {
+      if (GET_MODE (current_output_insn) == SImode)
+	fprintf (file, " ||");
+      else
+	fprintf (file, ";");
+      return;
+    }
+
+  mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case 'j':
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fprintf (file, "e");
+	  break;
+	case NE:
+	  fprintf (file, "ne");
+	  break;
+	case GT:
+	  fprintf (file, "g");
+	  break;
+	case LT:
+	  fprintf (file, "l");
+	  break;
+	case GE:
+	  fprintf (file, "ge");
+	  break;
+	case LE:
+	  fprintf (file, "le");
+	  break;
+	case GTU:
+	  fprintf (file, "g");
+	  break;
+	case LTU:
+	  fprintf (file, "l");
+	  break;
+	case GEU:
+	  fprintf (file, "ge");
+	  break;
+	case LEU:
+	  fprintf (file, "le");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%j value");
+	}
+      break;
+    
+    case 'J':					 /* reverse logic */
+      switch (GET_CODE(x))
+	{
+	case EQ:
+	  fprintf (file, "ne");
+	  break;
+	case NE:
+	  fprintf (file, "e");
+	  break;
+	case GT:
+	  fprintf (file, "le");
+	  break;
+	case LT:
+	  fprintf (file, "ge");
+	  break;
+	case GE:
+	  fprintf (file, "l");
+	  break;
+	case LE:
+	  fprintf (file, "g");
+	  break;
+	case GTU:
+	  fprintf (file, "le");
+	  break;
+	case LTU:
+	  fprintf (file, "ge");
+	  break;
+	case GEU:
+	  fprintf (file, "l");
+	  break;
+	case LEU:
+	  fprintf (file, "g");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%J value");
+	}
+      break;
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (code == 'h')
+	    {
+	      if (REGNO (x) < 32)
+		fprintf (file, "%s", short_reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'd')
+	    {
+	      if (REGNO (x) < 32)
+		fprintf (file, "%s", high_reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'w')
+	    {
+	      if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
+		fprintf (file, "%s.w", reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'x')
+	    {
+	      if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
+		fprintf (file, "%s.x", reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'v')
+	    {
+	      if (REGNO (x) == REG_A0)
+		fprintf (file, "AV0");
+	      else if (REGNO (x) == REG_A1)
+		fprintf (file, "AV1");
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'D')
+	    {
+	      if (D_REGNO_P (REGNO (x)))
+		fprintf (file, "%s", dregs_pair_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'H')
+	    {
+	      if ((mode == DImode || mode == DFmode) && REG_P (x))
+		fprintf (file, "%s", reg_names[REGNO (x) + 1]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'T')
+	    {
+	      if (D_REGNO_P (REGNO (x)))
+		fprintf (file, "%s", byte_reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else 
+	    fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case MEM:
+	  fputc ('[', file);
+	  x = XEXP (x,0);
+	  print_address_operand (file, x);
+	  fputc (']', file);
+	  break;
+
+	case CONST_INT:
+	  if (code == 'M')
+	    {
+	      switch (INTVAL (x))
+		{
+		case MACFLAG_NONE:
+		  break;
+		case MACFLAG_FU:
+		  fputs ("(FU)", file);
+		  break;
+		case MACFLAG_T:
+		  fputs ("(T)", file);
+		  break;
+		case MACFLAG_TFU:
+		  fputs ("(TFU)", file);
+		  break;
+		case MACFLAG_W32:
+		  fputs ("(W32)", file);
+		  break;
+		case MACFLAG_IS:
+		  fputs ("(IS)", file);
+		  break;
+		case MACFLAG_IU:
+		  fputs ("(IU)", file);
+		  break;
+		case MACFLAG_IH:
+		  fputs ("(IH)", file);
+		  break;
+		case MACFLAG_M:
+		  fputs ("(M)", file);
+		  break;
+		case MACFLAG_IS_M:
+		  fputs ("(IS,M)", file);
+		  break;
+		case MACFLAG_ISS2:
+		  fputs ("(ISS2)", file);
+		  break;
+		case MACFLAG_S2RND:
+		  fputs ("(S2RND)", file);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      break;
+	    }
+	  else if (code == 'b')
+	    {
+	      if (INTVAL (x) == 0)
+		fputs ("+=", file);
+	      else if (INTVAL (x) == 1)
+		fputs ("-=", file);
+	      else
+		gcc_unreachable ();
+	      break;
+	    }
+	  /* Moves to half registers with d or h modifiers always use unsigned
+	     constants.  */
+	  else if (code == 'd')
+	    x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
+	  else if (code == 'h')
+	    x = GEN_INT (INTVAL (x) & 0xffff);
+	  else if (code == 'N')
+	    x = GEN_INT (-INTVAL (x));
+	  else if (code == 'X')
+	    x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
+	  else if (code == 'Y')
+	    x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x)));
+	  else if (code == 'Z')
+	    /* Used for LINK insns.  */
+	    x = GEN_INT (-8 - INTVAL (x));
+
+	  /* fall through */
+
+	case SYMBOL_REF:
+	  output_addr_const (file, x);
+	  break;
+
+	case CONST_DOUBLE:
+	  output_operand_lossage ("invalid const_double operand");
+	  break;
+
+	case UNSPEC:
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_MOVE_PIC:
+	      output_addr_const (file, XVECEXP (x, 0, 0));
+	      fprintf (file, "@GOT");
+	      break;
+
+	    case UNSPEC_MOVE_FDPIC:
+	      output_addr_const (file, XVECEXP (x, 0, 0));
+	      fprintf (file, "@GOT17M4");
+	      break;
+
+	    case UNSPEC_FUNCDESC_GOT17M4:
+	      output_addr_const (file, XVECEXP (x, 0, 0));
+	      fprintf (file, "@FUNCDESC_GOT17M4");
+	      break;
+
+	    case UNSPEC_LIBRARY_OFFSET:
+	      fprintf (file, "_current_shared_library_p5_offset_");
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	default:
+	  output_addr_const (file, x);
+	}
+    }
+}
+
+/* Argument support functions.  */
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  
+   VDSP C Compiler manual, our ABI says that
+   first 3 words of arguments will use R0, R1 and R2.
+*/
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cum;
+
+  *cum = zero_cum;
+
+  /* Set up the number of registers to use for passing arguments.  */
+
+  cum->nregs = max_arg_registers;
+  cum->arg_regs = arg_regs;
+
+  cum->call_cookie = CALL_NORMAL;
+  /* Check for a longcall attribute.  */
+  if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))
+    cum->call_cookie |= CALL_SHORT;
+  else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)))
+    cum->call_cookie |= CALL_LONG;
+
+  return;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+bfin_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int count, bytes, words;
+
+  bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  cum->words += words;
+  cum->nregs -= words;
+
+  if (cum->nregs <= 0)
+    {
+      cum->nregs = 0;
+      cum->arg_regs = NULL;
+    }
+  else
+    {
+      for (count = 1; count <= words; count++)
+        cum->arg_regs++;
+    }
+
+  return;
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+bfin_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int bytes
+    = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+
+  if (mode == VOIDmode)
+    /* Compute operand 2 of the call insn.  */
+    return GEN_INT (cum->call_cookie);
+
+  if (bytes == -1)
+    return NULL_RTX;
+
+  if (cum->nregs)
+    return gen_rtx_REG (mode, *(cum->arg_regs));
+
+  return NULL_RTX;
+}
+
+/* For an arg passed partly in registers and partly in memory,
+   this is the number of bytes passed in registers.
+   For args passed entirely in registers or entirely in memory, zero.
+
+   Refer VDSP C Compiler manual, our ABI.
+   First 3 words are in registers. So, if an argument is larger
+   than the registers available, it will span the register and
+   stack.   */
+
+static int
+bfin_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
+			tree type ATTRIBUTE_UNUSED,
+			bool named ATTRIBUTE_UNUSED)
+{
+  int bytes
+    = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  int bytes_left = get_cumulative_args (cum)->nregs * UNITS_PER_WORD;
+  
+  if (bytes == -1)
+    return 0;
+
+  if (bytes_left == 0)
+    return 0;
+  if (bytes > bytes_left)
+    return bytes_left;
+  return 0;
+}
+
+/* Variable sized types are passed by reference.  */
+
+static bool
+bfin_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
+}
+
+/* Decide whether a type should be returned in memory (true)
+   or in a register (false).  This is called by the macro
+   TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  int size = int_size_in_bytes (type);
+  return size > 2 * UNITS_PER_WORD || size == -1;
+}
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+static rtx
+bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, REG_P0);
+}
+
+/* Return true when register may be used to pass function parameters.  */
+
+bool 
+function_arg_regno_p (int n)
+{
+  int i;
+  for (i = 0; arg_regs[i] != -1; i++)
+    if (n == arg_regs[i])
+      return true;
+  return false;
+}
+
+/* Returns 1 if OP contains a symbol reference */
+
+int
+symbolic_reference_mentioned_p (rtx op)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+
+static bool
+bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+			      tree exp ATTRIBUTE_UNUSED)
+{
+  struct cgraph_local_info *this_func, *called_func;
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  if (fkind != SUBROUTINE)
+    return false;
+  if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)
+    return true;
+
+  /* When compiling for ID shared libraries, can't sibcall a local function
+     from a non-local function, because the local function thinks it does
+     not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
+     sibcall epilogue, and we end up with the wrong value in P5.  */
+
+  if (!decl)
+    /* Not enough information.  */
+    return false;
+ 
+  this_func = cgraph_local_info (current_function_decl);
+  called_func = cgraph_local_info (decl);
+  if (!called_func)
+    return false;
+  return !called_func->local || this_func->local;
+}
+
+/* Write a template for a trampoline to F.  */
+
+static void
+bfin_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_FDPIC)
+    {
+      fprintf (f, "\t.dd\t0x00000000\n");	/* 0 */
+      fprintf (f, "\t.dd\t0x00000000\n");	/* 0 */
+      fprintf (f, "\t.dd\t0x0000e109\n");	/* p1.l = fn low */
+      fprintf (f, "\t.dd\t0x0000e149\n");	/* p1.h = fn high */
+      fprintf (f, "\t.dd\t0x0000e10a\n");	/* p2.l = sc low */
+      fprintf (f, "\t.dd\t0x0000e14a\n");	/* p2.h = sc high */
+      fprintf (f, "\t.dw\t0xac4b\n");		/* p3 = [p1 + 4] */
+      fprintf (f, "\t.dw\t0x9149\n");		/* p1 = [p1] */
+      fprintf (f, "\t.dw\t0x0051\n");		/* jump (p1)*/
+    }
+  else
+    {
+      fprintf (f, "\t.dd\t0x0000e109\n");	/* p1.l = fn low */
+      fprintf (f, "\t.dd\t0x0000e149\n");	/* p1.h = fn high */
+      fprintf (f, "\t.dd\t0x0000e10a\n");	/* p2.l = sc low */
+      fprintf (f, "\t.dd\t0x0000e14a\n");	/* p2.h = sc high */
+      fprintf (f, "\t.dw\t0x0051\n");		/* jump (p1)*/
+    }
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline at
+   M_TRAMP. FNDECL is the target function.  CHAIN_VALUE is an RTX for
+   the static chain value for the function.  */
+
+static void
+bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
+  rtx t2 = copy_to_reg (chain_value);
+  rtx mem;
+  int i = 0;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_FDPIC)
+    {
+      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0), 8));
+      mem = adjust_address (m_tramp, Pmode, 0);
+      emit_move_insn (mem, a);
+      i = 8;
+    }
+
+  mem = adjust_address (m_tramp, HImode, i + 2);
+  emit_move_insn (mem, gen_lowpart (HImode, t1));
+  emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
+  mem = adjust_address (m_tramp, HImode, i + 6);
+  emit_move_insn (mem, gen_lowpart (HImode, t1));
+
+  mem = adjust_address (m_tramp, HImode, i + 10);
+  emit_move_insn (mem, gen_lowpart (HImode, t2));
+  emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
+  mem = adjust_address (m_tramp, HImode, i + 14);
+  emit_move_insn (mem, gen_lowpart (HImode, t2));
+}
+
+/* Emit insns to move operands[1] into operands[0].  */
+
+void
+emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+
+  gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed));
+  if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  else
+    operands[1] = legitimize_pic_address (operands[1], temp,
+					  TARGET_FDPIC ? OUR_FDPIC_REG
+					  : pic_offset_table_rtx);
+}
+
+/* Expand a move operation in mode MODE.  The operands are in OPERANDS.
+   Returns true if no further code must be generated, false if the caller
+   should generate an insn to move OPERANDS[1] to OPERANDS[0].  */
+
+bool
+expand_move (rtx *operands, enum machine_mode mode)
+{
+  rtx op = operands[1];
+  if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC)
+      && SYMBOLIC_CONST (op))
+    emit_pic_move (operands, mode);
+  else if (mode == SImode && GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	   && !targetm.legitimate_constant_p (mode, op))
+    {
+      rtx dest = operands[0];
+      rtx op0, op1;
+      gcc_assert (!reload_in_progress && !reload_completed);
+      op = XEXP (op, 0);
+      op0 = force_reg (mode, XEXP (op, 0));
+      op1 = XEXP (op, 1);
+      if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode))
+	op1 = force_reg (mode, op1);
+      if (GET_CODE (dest) == MEM)
+	dest = gen_reg_rtx (mode);
+      emit_insn (gen_addsi3 (dest, op0, op1));
+      if (dest == operands[0])
+	return true;
+      operands[1] = dest;
+    }
+  /* Don't generate memory->memory or constant->memory moves, go through a
+     register */
+  else if ((reload_in_progress | reload_completed) == 0
+	   && GET_CODE (operands[0]) == MEM
+    	   && GET_CODE (operands[1]) != REG)
+    operands[1] = force_reg (mode, operands[1]);
+  return false;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuse to split volatile memory addresses,
+         but we still have to handle it.  */
+      if (GET_CODE (op) == MEM)
+	{
+	  lo_half[num] = adjust_address (op, SImode, 0);
+	  hi_half[num] = adjust_address (op, SImode, 4);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 0);
+	  hi_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 4);
+	}
+    }
+}
+
+bool
+bfin_longcall_p (rtx op, int call_cookie)
+{
+  gcc_assert (GET_CODE (op) == SYMBOL_REF);
+  if (SYMBOL_REF_WEAK (op))
+    return 1;
+  if (call_cookie & CALL_SHORT)
+    return 0;
+  if (call_cookie & CALL_LONG)
+    return 1;
+  if (TARGET_LONG_CALLS)
+    return 1;
+  return 0;
+}
+
+/* Expand a call instruction.  FNADDR is the call target, RETVAL the return value.
+   COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args.
+   SIBCALL is nonzero if this is a sibling call.  */
+
+void
+bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall)
+{
+  rtx use = NULL, call;
+  rtx callee = XEXP (fnaddr, 0);
+  int nelts = 3;
+  rtx pat;
+  rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
+  rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
+  int n;
+
+  /* In an untyped call, we can get NULL for operand 2.  */
+  if (cookie == NULL_RTX)
+    cookie = const0_rtx;
+
+  /* Static functions and indirect calls don't need the pic register.  */
+  if (!TARGET_FDPIC && flag_pic
+      && GET_CODE (callee) == SYMBOL_REF
+      && !SYMBOL_REF_LOCAL_P (callee))
+    use_reg (&use, pic_offset_table_rtx);
+
+  if (TARGET_FDPIC)
+    {
+      int caller_in_sram, callee_in_sram;
+
+      /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram.  */
+      caller_in_sram = callee_in_sram = 0;
+
+      if (lookup_attribute ("l1_text",
+			    DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
+	caller_in_sram = 1;
+      else if (lookup_attribute ("l2",
+				 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
+	caller_in_sram = 2;
+
+      if (GET_CODE (callee) == SYMBOL_REF
+	  && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee)))
+	{
+	  if (lookup_attribute
+	      ("l1_text",
+	       DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
+	    callee_in_sram = 1;
+	  else if (lookup_attribute
+		   ("l2",
+		    DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
+	    callee_in_sram = 2;
+	}
+
+      if (GET_CODE (callee) != SYMBOL_REF
+	  || bfin_longcall_p (callee, INTVAL (cookie))
+	  || (GET_CODE (callee) == SYMBOL_REF
+	      && !SYMBOL_REF_LOCAL_P (callee)
+	      && TARGET_INLINE_PLT)
+	  || caller_in_sram != callee_in_sram
+	  || (caller_in_sram && callee_in_sram
+	      && (GET_CODE (callee) != SYMBOL_REF
+		  || !SYMBOL_REF_LOCAL_P (callee))))
+	{
+	  rtx addr = callee;
+	  if (! address_operand (addr, Pmode))
+	    addr = force_reg (Pmode, addr);
+
+	  fnaddr = gen_reg_rtx (SImode);
+	  emit_insn (gen_load_funcdescsi (fnaddr, addr));
+	  fnaddr = gen_rtx_MEM (Pmode, fnaddr);
+
+	  picreg = gen_reg_rtx (SImode);
+	  emit_insn (gen_load_funcdescsi (picreg,
+					  plus_constant (Pmode, addr, 4)));
+	}
+
+      nelts++;
+    }
+  else if ((!register_no_elim_operand (callee, Pmode)
+	    && GET_CODE (callee) != SYMBOL_REF)
+	   || (GET_CODE (callee) == SYMBOL_REF
+	       && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY)
+		   || bfin_longcall_p (callee, INTVAL (cookie)))))
+    {
+      callee = copy_to_mode_reg (Pmode, callee);
+      fnaddr = gen_rtx_MEM (Pmode, callee);
+    }
+  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+
+  if (retval)
+    call = gen_rtx_SET (VOIDmode, retval, call);
+
+  pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts));
+  n = 0;
+  XVECEXP (pat, 0, n++) = call;
+  if (TARGET_FDPIC)
+    XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
+  XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
+  if (sibcall)
+    XVECEXP (pat, 0, n++) = ret_rtx;
+  else
+    XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
+  call = emit_call_insn (pat);
+  if (use)
+    CALL_INSN_FUNCTION_USAGE (call) = use;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+
+int
+hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* Allow only dregs to store value of mode HI or QI */
+  enum reg_class rclass = REGNO_REG_CLASS (regno);
+
+  if (mode == CCmode)
+    return 0;
+
+  if (mode == V2HImode)
+    return D_REGNO_P (regno);
+  if (rclass == CCREGS)
+    return mode == BImode;
+  if (mode == PDImode || mode == V2PDImode)
+    return regno == REG_A0 || regno == REG_A1;
+
+  /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
+     up with a bad register class (such as ALL_REGS) for DImode.  */
+  if (mode == DImode)
+    return regno < REG_M3;
+
+  if (mode == SImode
+      && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno))
+    return 1;
+
+  return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+
+static bool
+bfin_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mode == V2HImode;
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+static int
+bfin_register_move_cost (enum machine_mode mode,
+			 reg_class_t class1, reg_class_t class2)
+{
+  /* These need secondary reloads, so they're more expensive.  */
+  if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
+      || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
+    return 4;
+
+  /* If optimizing for size, always prefer reg-reg over reg-memory moves.  */
+  if (optimize_size)
+    return 2;
+
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    {
+      /* Discourage trying to use the accumulators.  */
+      if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0)
+	  || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1)
+	  || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0)
+	  || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1))
+	return 20;
+    }
+  return 2;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.
+
+   ??? In theory L1 memory has single-cycle latency.  We should add a switch
+   that tells the compiler whether we expect to use only L1 memory for the
+   program; it'll make the costs more accurate.  */
+
+static int
+bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  /* Make memory accesses slightly more expensive than any register-register
+     move.  Also, penalize non-DP registers, since they need secondary
+     reloads to load and store.  */
+  if (! reg_class_subset_p (rclass, DPREGS))
+    return 10;
+
+  return 8;
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch register.  Return the class needed for the
+   scratch register.  */
+
+static reg_class_t
+bfin_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		       enum machine_mode mode, secondary_reload_info *sri)
+{
+  /* If we have HImode or QImode, we can only use DREGS as secondary registers;
+     in most other cases we can also use PREGS.  */
+  enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS;
+  enum reg_class x_class = NO_REGS;
+  enum rtx_code code = GET_CODE (x);
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  if (code == SUBREG)
+    x = SUBREG_REG (x), code = GET_CODE (x);
+  if (REG_P (x))
+    {
+      int regno = REGNO (x);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	regno = reg_renumber[regno];
+
+      if (regno == -1)
+	code = MEM;
+      else
+	x_class = REGNO_REG_CLASS (regno);
+    }
+
+  /* We can be asked to reload (plus (FP) (large_constant)) into a DREG.
+     This happens as a side effect of register elimination, and we need
+     a scratch register to do it.  */
+  if (fp_plus_const_operand (x, mode))
+    {
+      rtx op2 = XEXP (x, 1);
+      int large_constant_p = ! satisfies_constraint_Ks7 (op2);
+
+      if (rclass == PREGS || rclass == PREGS_CLOBBERED)
+	return NO_REGS;
+      /* If destination is a DREG, we can do this without a scratch register
+	 if the constant is valid for an add instruction.  */
+      if ((rclass == DREGS || rclass == DPREGS)
+	  && ! large_constant_p)
+	return NO_REGS;
+      /* Reloading to anything other than a DREG?  Use a PREG scratch
+	 register.  */
+      sri->icode = CODE_FOR_reload_insi;
+      return NO_REGS;
+    }
+
+  /* Data can usually be moved freely between registers of most classes.
+     AREGS are an exception; they can only move to or from another register
+     in AREGS or one in DREGS.  They can also be assigned the constant 0.  */
+  if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
+    return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
+	    || rclass == ODD_AREGS
+	    ? NO_REGS : DREGS);
+
+  if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
+    {
+      if (code == MEM)
+	{
+	  sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi;
+	  return NO_REGS;
+	}
+
+      if (x != const0_rtx && x_class != DREGS)
+	{
+	  return DREGS;
+	}
+      else
+	return NO_REGS;
+    }
+
+  /* CCREGS can only be moved from/to DREGS.  */
+  if (rclass == CCREGS && x_class != DREGS)
+    return DREGS;
+  if (x_class == CCREGS && rclass != DREGS)
+    return DREGS;
+
+  /* All registers other than AREGS can load arbitrary constants.  The only
+     case that remains is MEM.  */
+  if (code == MEM)
+    if (! reg_class_subset_p (rclass, default_class))
+      return default_class;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+bfin_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+      case PREGS_CLOBBERED:
+      case PROLOGUE_REGS:
+      case P0REGS:
+      case D0REGS:
+      case D1REGS:
+      case D2REGS:
+      case CCREGS:
+        return true;
+
+      default:
+        break;
+    }
+
+  return false;
+}
+
+static struct machine_function *
+bfin_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook.  */
+
+static void
+bfin_option_override (void)
+{
+  /* If processor type is not specified, enable all workarounds.  */
+  if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
+    {
+      int i;
+
+      for (i = 0; bfin_cpus[i].name != NULL; i++)
+	bfin_workarounds |= bfin_cpus[i].workarounds;
+
+      bfin_si_revision = 0xffff;
+    }
+
+  if (bfin_csync_anomaly == 1)
+    bfin_workarounds |= WA_SPECULATIVE_SYNCS;
+  else if (bfin_csync_anomaly == 0)
+    bfin_workarounds &= ~WA_SPECULATIVE_SYNCS;
+
+  if (bfin_specld_anomaly == 1)
+    bfin_workarounds |= WA_SPECULATIVE_LOADS;
+  else if (bfin_specld_anomaly == 0)
+    bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
+
+  if (TARGET_OMIT_LEAF_FRAME_POINTER)
+    flag_omit_frame_pointer = 1;
+
+#ifdef SUBTARGET_FDPIC_NOT_SUPPORTED
+  if (TARGET_FDPIC)
+    error ("-mfdpic is not supported, please use a bfin-linux-uclibc target");
+#endif
+
+  /* Library identification */
+  if (global_options_set.x_bfin_library_id && ! TARGET_ID_SHARED_LIBRARY)
+    error ("-mshared-library-id= specified without -mid-shared-library");
+
+  if (stack_limit_rtx && TARGET_FDPIC)
+    {
+      warning (0, "-fstack-limit- options are ignored with -mfdpic; use -mstack-check-l1");
+      stack_limit_rtx = NULL_RTX;
+    }
+
+  if (stack_limit_rtx && TARGET_STACK_CHECK_L1)
+    error ("can%'t use multiple stack checking methods together");
+
+  if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC)
+    error ("ID shared libraries and FD-PIC mode can%'t be used together");
+
+  /* Don't allow the user to specify -mid-shared-library and -msep-data
+     together, as it makes little sense from a user's point of view...  */
+  if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
+    error ("cannot specify both -msep-data and -mid-shared-library");
+  /* ... internally, however, it's nearly the same.  */
+  if (TARGET_SEP_DATA)
+    target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY;
+
+  if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0)
+    flag_pic = 1;
+
+  /* There is no single unaligned SI op for PIC code.  Sometimes we
+     need to use ".4byte" and sometimes we need to use ".picptr".
+     See bfin_assemble_integer for details.  */
+  if (TARGET_FDPIC)
+    targetm.asm_out.unaligned_op.si = 0;
+
+  /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries,
+     since we don't support it and it'll just break.  */
+  if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
+    flag_pic = 0;
+
+  if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
+    error ("-mmulticore can only be used with BF561");
+
+  if (TARGET_COREA && !TARGET_MULTICORE)
+    error ("-mcorea should be used with -mmulticore");
+
+  if (TARGET_COREB && !TARGET_MULTICORE)
+    error ("-mcoreb should be used with -mmulticore");
+
+  if (TARGET_COREA && TARGET_COREB)
+    error ("-mcorea and -mcoreb can%'t be used together");
+
+  flag_schedule_insns = 0;
+
+  init_machine_status = bfin_init_machine_status;
+}
+
+/* Return the destination address of BRANCH.
+   We need to use this instead of get_attr_length, because the
+   cbranch_with_nops pattern conservatively sets its length to 6, and
+   we still prefer to use shorter sequences.  */
+
+static int
+branch_dest (rtx branch)
+{
+  rtx dest;
+  int dest_uid;
+  rtx pat = PATTERN (branch);
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  dest = SET_SRC (pat);
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, 1);
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+  return INSN_ADDRESSES (dest_uid);
+}
+
+/* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates
+   it's a branch that's predicted taken.  */
+
+static int
+cbranch_predicted_taken_p (rtx insn)
+{
+  rtx x = find_reg_note (insn, REG_BR_PROB, 0);
+
+  if (x)
+    {
+      int pred_val = XINT (x, 0);
+
+      return pred_val >= REG_BR_PROB_BASE / 2;
+    }
+
+  return 0;
+}
+
+/* Templates for use by asm_conditional_branch.  */
+
+static const char *ccbranch_templates[][3] = {
+  { "if !cc jump %3;",  "if cc jump 4 (bp); jump.s %3;",  "if cc jump 6 (bp); jump.l %3;" },
+  { "if cc jump %3;",   "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" },
+  { "if !cc jump %3 (bp);",  "if cc jump 4; jump.s %3;",  "if cc jump 6; jump.l %3;" },
+  { "if cc jump %3 (bp);",  "if !cc jump 4; jump.s %3;",  "if !cc jump 6; jump.l %3;" },
+};
+
+/* Output INSN, which is a conditional branch instruction with operands
+   OPERANDS.
+
+   We deal with the various forms of conditional branches that can be generated
+   by bfin_reorg to prevent the hardware from doing speculative loads, by
+   - emitting a sufficient number of nops, if N_NOPS is nonzero, or
+   - always emitting the branch as predicted taken, if PREDICT_TAKEN is true.
+   Either of these is only necessary if the branch is short, otherwise the
+   template we use ends in an unconditional jump which flushes the pipeline
+   anyway.  */
+
+void
+asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken)
+{
+  int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+  /* Note : offset for instructions like if cc jmp; jump.[sl] offset
+            is to be taken from start of if cc rather than jump.
+            Range for jump.s is (-4094, 4096) instead of (-4096, 4094)
+  */
+  int len = (offset >= -1024 && offset <= 1022 ? 0
+	     : offset >= -4094 && offset <= 4096 ? 1
+	     : 2);
+  int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn);
+  int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT);
+  output_asm_insn (ccbranch_templates[idx][len], operands);
+  gcc_assert (n_nops == 0 || !bp);
+  if (len == 0)
+    while (n_nops-- > 0)
+      output_asm_insn ("nop;", NULL);
+}
+
+/* Emit rtl for a comparison operation CMP in mode MODE.  Operands have been
+   stored in bfin_compare_op0 and bfin_compare_op1 already.  */
+
+rtx
+bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code1, code2;
+  rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
+  rtx tem = bfin_cc_rtx;
+  enum rtx_code code = GET_CODE (cmp);
+
+  /* If we have a BImode input, then we already have a compare result, and
+     do not need to emit another comparison.  */
+  if (GET_MODE (op0) == BImode)
+    {
+      gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
+      tem = op0, code2 = code;
+    }
+  else
+    {
+      switch (code) {
+	/* bfin has these conditions */
+      case EQ:
+      case LT:
+      case LE:
+      case LEU:
+      case LTU:
+	code1 = code;
+	code2 = NE;
+	break;
+      default:
+	code1 = reverse_condition (code);
+	code2 = EQ;
+	break;
+      }
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_fmt_ee (code1, BImode, op0, op1)));
+    }
+
+  return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode));
+}
+
+/* Return nonzero iff C has exactly one bit set if it is interpreted
+   as a 32-bit constant.  */
+
+int
+log2constp (unsigned HOST_WIDE_INT c)
+{
+  c &= 0xFFFFFFFF;
+  return c != 0 && (c & (c-1)) == 0;
+}
+
+/* Returns the number of consecutive least significant zeros in the binary
+   representation of *V.
+   We modify *V to contain the original value arithmetically shifted right by
+   the number of zeroes.  */
+
+static int
+shiftr_zero (HOST_WIDE_INT *v)
+{
+  unsigned HOST_WIDE_INT tmp = *v;
+  unsigned HOST_WIDE_INT sgn;
+  int n = 0;
+
+  if (tmp == 0)
+    return 0;
+
+  sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1));
+  while ((tmp & 0x1) == 0 && n <= 32)
+    {
+      tmp = (tmp >> 1) | sgn;
+      n++;
+    }
+  *v = tmp;
+  return n;
+}
+
+/* After reload, split the load of an immediate constant.  OPERANDS are the
+   operands of the movsi_insn pattern which we are splitting.  We return
+   nonzero if we emitted a sequence to load the constant, zero if we emitted
+   nothing because we want to use the splitter's default sequence.  */
+
+int
+split_load_immediate (rtx operands[])
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+  HOST_WIDE_INT tmp;
+  HOST_WIDE_INT shifted = val;
+  HOST_WIDE_INT shifted_compl = ~val;
+  int num_zero = shiftr_zero (&shifted);
+  int num_compl_zero = shiftr_zero (&shifted_compl);
+  unsigned int regno = REGNO (operands[0]);
+
+  /* This case takes care of single-bit set/clear constants, which we could
+     also implement with BITSET/BITCLR.  */
+  if (num_zero
+      && shifted >= -32768 && shifted < 65536
+      && (D_REGNO_P (regno)
+	  || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2)))
+    {
+      emit_insn (gen_movsi (operands[0], GEN_INT (shifted)));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero)));
+      return 1;
+    }
+
+  tmp = val & 0xFFFF;
+  tmp |= -(tmp & 0x8000);
+
+  /* If high word has one bit set or clear, try to use a bit operation.  */
+  if (D_REGNO_P (regno))
+    {
+      if (log2constp (val & 0xFFFF0000))
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF)));
+	  emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000)));
+	  return 1;
+	}
+      else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0)
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
+	  emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF)));
+	}
+    }
+
+  if (D_REGNO_P (regno))
+    {
+      if (tmp >= -64 && tmp <= 63)
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
+	  emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536)));
+	  return 1;
+	}
+
+      if ((val & 0xFFFF0000) == 0)
+	{
+	  emit_insn (gen_movsi (operands[0], const0_rtx));
+	  emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
+	  return 1;
+	}
+
+      if ((val & 0xFFFF0000) == 0xFFFF0000)
+	{
+	  emit_insn (gen_movsi (operands[0], constm1_rtx));
+	  emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
+	  return 1;
+	}
+    }
+
+  /* Need DREGs for the remaining case.  */
+  if (regno > REG_R7)
+    return 0;
+
+  if (optimize_size
+      && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
+    {
+      /* If optimizing for size, generate a sequence that has more instructions
+	 but is shorter.  */
+      emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl)));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0],
+			      GEN_INT (num_compl_zero)));
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+      return 1;
+    }
+  return 0;
+}
+
+/* Return true if the legitimate memory address for a memory operand of mode
+   MODE.  Return false if not.  */
+
+static bool
+bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value)
+{
+  unsigned HOST_WIDE_INT v = value > 0 ? value : -value;
+  int sz = GET_MODE_SIZE (mode);
+  int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2;
+  /* The usual offsettable_memref machinery doesn't work so well for this
+     port, so we deal with the problem here.  */
+  if (value > 0 && sz == 8)
+    v += 4;
+  return (v & ~(0x7fff << shift)) == 0;
+}
+
+static bool
+bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode,
+		  enum rtx_code outer_code)
+{
+  if (strict)
+    return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH);
+  else
+    return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
+}
+
+/* Recognize an RTL expression that is a valid memory address for an
+   instruction.  The MODE argument is the machine mode for the MEM expression
+   that wants to use this address. 
+
+   Blackfin addressing modes are as follows:
+
+      [preg]
+      [preg + imm16]
+
+      B [ Preg + uimm15 ]
+      W [ Preg + uimm16m2 ]
+      [ Preg + uimm17m4 ] 
+
+      [preg++]
+      [preg--]
+      [--sp]
+*/
+
+static bool
+bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  switch (GET_CODE (x)) {
+  case REG:
+    if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM))
+      return true;
+    break;
+  case PLUS:
+    if (REG_P (XEXP (x, 0))
+	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS)
+	&& ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode)
+	    || (GET_CODE (XEXP (x, 1)) == CONST_INT
+		&& bfin_valid_add (mode, INTVAL (XEXP (x, 1))))))
+      return true;
+    break;
+  case POST_INC:
+  case POST_DEC:
+    if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
+	&& REG_P (XEXP (x, 0))
+	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC))
+      return true;
+  case PRE_DEC:
+    if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
+	&& XEXP (x, 0) == stack_pointer_rtx
+	&& REG_P (XEXP (x, 0))
+	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC))
+      return true;
+    break;
+  default:
+    break;
+  }
+  return false;
+}
+
+/* Decide whether we can force certain constants to memory.  If we
+   decide we can't, the caller should be able to cope with it in
+   another way.  */
+
+static bool
+bfin_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x ATTRIBUTE_UNUSED)
+{
+  /* We have only one class of non-legitimate constants, and our movsi
+     expander knows how to handle them.  Dropping these constants into the
+     data section would only shift the problem - we'd still get relocs
+     outside the object, in the data section rather than the text section.  */
+  return true;
+}
+
+/* Ensure that for any constant of the form symbol + offset, the offset
+   remains within the object.  Any other constants are ok.
+   This ensures that flat binaries never have to deal with relocations
+   crossing section boundaries.  */
+
+static bool
+bfin_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  rtx sym;
+  HOST_WIDE_INT offset;
+
+  if (GET_CODE (x) != CONST)
+    return true;
+
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == PLUS);
+
+  sym = XEXP (x, 0);
+  x = XEXP (x, 1);
+  if (GET_CODE (sym) != SYMBOL_REF
+      || GET_CODE (x) != CONST_INT)
+    return true;
+  offset = INTVAL (x);
+
+  if (SYMBOL_REF_DECL (sym) == 0)
+    return true;
+  if (offset < 0
+      || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym))))
+    return false;
+
+  return true;
+}
+
+static bool
+bfin_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
+		bool speed)
+{
+  enum rtx_code code = (enum rtx_code) code_i;
+  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
+  int cost2 = COSTS_N_INSNS (1);
+  rtx op0, op1;
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (outer_code == SET || outer_code == PLUS)
+        *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
+      else if (outer_code == AND)
+        *total = log2constp (~INTVAL (x)) ? 0 : cost2;
+      else if (outer_code == LE || outer_code == LT || outer_code == EQ)
+        *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2;
+      else if (outer_code == LEU || outer_code == LTU)
+        *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2;
+      else if (outer_code == MULT)
+        *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2;
+      else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2))
+        *total = 0;
+      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
+	       || outer_code == LSHIFTRT)
+        *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2;
+      else if (outer_code == IOR || outer_code == XOR)
+        *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2;
+      else
+	*total = cost2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_MODE (x) == SImode)
+	{
+	  if (GET_CODE (op0) == MULT
+	      && GET_CODE (XEXP (op0, 1)) == CONST_INT)
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
+	      if (val == 2 || val == 4)
+		{
+		  *total = cost2;
+		  *total += rtx_cost (XEXP (op0, 0), outer_code, opno, speed);
+		  *total += rtx_cost (op1, outer_code, opno, speed);
+		  return true;
+		}
+	    }
+	  *total = cost2;
+	  if (GET_CODE (op0) != REG
+	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	    *total += set_src_cost (op0, speed);
+#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
+	 towards creating too many induction variables.  */
+	  if (!reg_or_7bit_operand (op1, SImode))
+	    *total += set_src_cost (op1, speed);
+#endif
+	}
+      else if (GET_MODE (x) == DImode)
+	{
+	  *total = 6 * cost2;
+	  if (GET_CODE (op1) != CONST_INT
+	      || !satisfies_constraint_Ks7 (op1))
+	    *total += rtx_cost (op1, PLUS, 1, speed);
+	  if (GET_CODE (op0) != REG
+	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	    *total += rtx_cost (op0, PLUS, 0, speed);
+	}
+      return true;
+
+    case MINUS:
+      if (GET_MODE (x) == DImode)
+	*total = 6 * cost2;
+      else
+	*total = cost2;
+      return true;
+      
+    case ASHIFT: 
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (GET_MODE (x) == DImode)
+	*total = 6 * cost2;
+      else
+	*total = cost2;
+
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_CODE (op0) != REG
+	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	*total += rtx_cost (op0, code, 0, speed);
+
+      return true;
+	  
+    case IOR:
+    case AND:
+    case XOR:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high.  */
+      if (code == IOR)
+	{
+	  if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
+	      || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
+	      || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
+	      || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
+	    {
+	      *total = cost2;
+	      return true;
+	    }
+	}
+
+      if (GET_CODE (op0) != REG
+	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	*total += rtx_cost (op0, code, 0, speed);
+
+      if (GET_MODE (x) == DImode)
+	{
+	  *total = 2 * cost2;
+	  return true;
+	}
+      *total = cost2;
+      if (GET_MODE (x) != SImode)
+	return true;
+
+      if (code == AND)
+	{
+	  if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
+	    *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	}
+      else
+	{
+	  if (! regorlog2_operand (XEXP (x, 1), SImode))
+	    *total += rtx_cost (XEXP (x, 1), code, 1, speed);
+	}
+
+      return true;
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      if (outer_code == SET
+	  && XEXP (x, 1) == const1_rtx
+	  && GET_CODE (XEXP (x, 2)) == CONST_INT)
+	{
+	  *total = 2 * cost2;
+	  return true;
+	}
+      /* fall through */
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      *total = cost2;
+      return true;
+
+    case MULT:
+	{
+	  op0 = XEXP (x, 0);
+	  op1 = XEXP (x, 1);
+	  if (GET_CODE (op0) == GET_CODE (op1)
+	      && (GET_CODE (op0) == ZERO_EXTEND
+		  || GET_CODE (op0) == SIGN_EXTEND))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      op0 = XEXP (op0, 0);
+	      op1 = XEXP (op1, 0);
+	    }
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (3);
+
+	  if (GET_CODE (op0) != REG
+	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	    *total += rtx_cost (op0, MULT, 0, speed);
+	  if (GET_CODE (op1) != REG
+	      && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
+	    *total += rtx_cost (op1, MULT, 1, speed);
+	}
+      return true;
+
+    case UDIV:
+    case UMOD:
+      *total = COSTS_N_INSNS (32);
+      return true;
+
+    case VEC_CONCAT:
+    case VEC_SELECT:
+      if (outer_code == SET)
+	*total = cost2;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Used for communication between {push,pop}_multiple_operation (which
+   we use not only as a predicate) and the corresponding output functions.  */
+static int first_preg_to_save, first_dreg_to_save;
+static int n_regs_to_save;
+
+int
+analyze_push_multiple_operation (rtx op)
+{
+  int lastdreg = 8, lastpreg = 6;
+  int i, group;
+
+  first_preg_to_save = lastpreg;
+  first_dreg_to_save = lastdreg;
+  for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++)
+    {
+      rtx t = XVECEXP (op, 0, i);
+      rtx src, dest;
+      int regno;
+
+      if (GET_CODE (t) != SET)
+	return 0;
+
+      src = SET_SRC (t);
+      dest = SET_DEST (t);
+      if (GET_CODE (dest) != MEM || ! REG_P (src))
+	return 0;
+      dest = XEXP (dest, 0);
+      if (GET_CODE (dest) != PLUS
+	  || ! REG_P (XEXP (dest, 0))
+	  || REGNO (XEXP (dest, 0)) != REG_SP
+	  || GET_CODE (XEXP (dest, 1)) != CONST_INT
+	  || INTVAL (XEXP (dest, 1)) != -i * 4)
+	return 0;
+
+      regno = REGNO (src);
+      if (group == 0)
+	{
+	  if (D_REGNO_P (regno))
+	    {
+	      group = 1;
+	      first_dreg_to_save = lastdreg = regno - REG_R0;
+	    }
+	  else if (regno >= REG_P0 && regno <= REG_P7)
+	    {
+	      group = 2;
+	      first_preg_to_save = lastpreg = regno - REG_P0;
+	    }
+	  else
+	    return 0;
+
+	  continue;
+	}
+
+      if (group == 1)
+	{
+	  if (regno >= REG_P0 && regno <= REG_P7)
+	    {
+	      group = 2;
+	      first_preg_to_save = lastpreg = regno - REG_P0;
+	    }
+	  else if (regno != REG_R0 + lastdreg + 1)
+	    return 0;
+	  else
+	    lastdreg++;
+	}
+      else if (group == 2)
+	{
+	  if (regno != REG_P0 + lastpreg + 1)
+	    return 0;
+	  lastpreg++;
+	}
+    }
+  n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
+  return 1;
+}
+
+int
+analyze_pop_multiple_operation (rtx op)
+{
+  int lastdreg = 8, lastpreg = 6;
+  int i, group;
+
+  for (i = 1, group = 0; i < XVECLEN (op, 0); i++)
+    {
+      rtx t = XVECEXP (op, 0, i);
+      rtx src, dest;
+      int regno;
+
+      if (GET_CODE (t) != SET)
+	return 0;
+
+      src = SET_SRC (t);
+      dest = SET_DEST (t);
+      if (GET_CODE (src) != MEM || ! REG_P (dest))
+	return 0;
+      src = XEXP (src, 0);
+
+      if (i == 1)
+	{
+	  if (! REG_P (src) || REGNO (src) != REG_SP)
+	    return 0;
+	}
+      else if (GET_CODE (src) != PLUS
+	       || ! REG_P (XEXP (src, 0))
+	       || REGNO (XEXP (src, 0)) != REG_SP
+	       || GET_CODE (XEXP (src, 1)) != CONST_INT
+	       || INTVAL (XEXP (src, 1)) != (i - 1) * 4)
+	return 0;
+
+      regno = REGNO (dest);
+      if (group == 0)
+	{
+	  if (regno == REG_R7)
+	    {
+	      group = 1;
+	      lastdreg = 7;
+	    }
+	  else if (regno != REG_P0 + lastpreg - 1)
+	    return 0;
+	  else
+	    lastpreg--;
+	}
+      else if (group == 1)
+	{
+	  if (regno != REG_R0 + lastdreg - 1)
+	    return 0;
+	  else
+	    lastdreg--;
+	}
+    }
+  first_dreg_to_save = lastdreg;
+  first_preg_to_save = lastpreg;
+  n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
+  return 1;
+}
+
+/* Emit assembly code for one multi-register push described by INSN, with
+   operands in OPERANDS.  */
+
+void
+output_push_multiple (rtx insn, rtx *operands)
+{
+  char buf[80];
+  int ok;
+  
+  /* Validate the insn again, and compute first_[dp]reg_to_save. */
+  ok = analyze_push_multiple_operation (PATTERN (insn));
+  gcc_assert (ok);
+  
+  if (first_dreg_to_save == 8)
+    sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
+  else if (first_preg_to_save == 6)
+    sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save);
+  else
+    sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n",
+	     first_dreg_to_save, first_preg_to_save);
+
+  output_asm_insn (buf, operands);
+}
+
+/* Emit assembly code for one multi-register pop described by INSN, with
+   operands in OPERANDS.  */
+
+void
+output_pop_multiple (rtx insn, rtx *operands)
+{
+  char buf[80];
+  int ok;
+  
+  /* Validate the insn again, and compute first_[dp]reg_to_save. */
+  ok = analyze_pop_multiple_operation (PATTERN (insn));
+  gcc_assert (ok);
+
+  if (first_dreg_to_save == 8)
+    sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save);
+  else if (first_preg_to_save == 6)
+    sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save);
+  else
+    sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n",
+	     first_dreg_to_save, first_preg_to_save);
+
+  output_asm_insn (buf, operands);
+}
+
+/* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE.  */
+
+static void
+single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset)
+{
+  rtx scratch = gen_reg_rtx (mode);
+  rtx srcmem, dstmem;
+
+  srcmem = adjust_address_nv (src, mode, offset);
+  dstmem = adjust_address_nv (dst, mode, offset);
+  emit_move_insn (scratch, srcmem);
+  emit_move_insn (dstmem, scratch);
+}
+
+/* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with
+   alignment ALIGN_EXP.  Return true if successful, false if we should fall
+   back on a different method.  */
+
+bool
+bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
+{
+  rtx srcreg, destreg, countreg;
+  HOST_WIDE_INT align = 0;
+  unsigned HOST_WIDE_INT count = 0;
+
+  if (GET_CODE (align_exp) == CONST_INT)
+    align = INTVAL (align_exp);
+  if (GET_CODE (count_exp) == CONST_INT)
+    {
+      count = INTVAL (count_exp);
+#if 0
+      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+	return false;
+#endif
+    }
+
+  /* If optimizing for size, only do single copies inline.  */
+  if (optimize_size)
+    {
+      if (count == 2 && align < 2)
+	return false;
+      if (count == 4 && align < 4)
+	return false;
+      if (count != 1 && count != 2 && count != 4)
+	return false;
+    }
+  if (align < 2 && count != 1)
+    return false;
+
+  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  if (destreg != XEXP (dst, 0))
+    dst = replace_equiv_address_nv (dst, destreg);
+  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  if (srcreg != XEXP (src, 0))
+    src = replace_equiv_address_nv (src, srcreg);
+
+  if (count != 0 && align >= 2)
+    {
+      unsigned HOST_WIDE_INT offset = 0;
+
+      if (align >= 4)
+	{
+	  if ((count & ~3) == 4)
+	    {
+	      single_move_for_movmem (dst, src, SImode, offset);
+	      offset = 4;
+	    }
+	  else if (count & ~3)
+	    {
+	      HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1;
+	      countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
+
+	      emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
+	      cfun->machine->has_loopreg_clobber = true;
+	    }
+	  if (count & 2)
+	    {
+	      single_move_for_movmem (dst, src, HImode, offset);
+	      offset += 2;
+	    }
+	}
+      else
+	{
+	  if ((count & ~1) == 2)
+	    {
+	      single_move_for_movmem (dst, src, HImode, offset);
+	      offset = 2;
+	    }
+	  else if (count & ~1)
+	    {
+	      HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1;
+	      countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
+
+	      emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
+	      cfun->machine->has_loopreg_clobber = true;
+	    }
+	}
+      if (count & 1)
+	{
+	  single_move_for_movmem (dst, src, QImode, offset);
+	}
+      return true;
+    }
+  return false;
+}
+
+/* Compute the alignment for a local variable.
+   TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.  */
+
+unsigned
+bfin_local_alignment (tree type, unsigned align)
+{
+  /* Increasing alignment for (relatively) big types allows the builtin
+     memcpy can use 32 bit loads/stores.  */
+  if (TYPE_SIZE (type)
+      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8
+	  || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32)
+    return 32;
+  return align;
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+
+static int
+bfin_issue_rate (void)
+{
+  return 3;
+}
+
+static int
+bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type dep_insn_type;
+  int dep_insn_code_number;
+
+  /* Anti and output dependencies have zero cost.  */
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+
+  dep_insn_code_number = recog_memoized (dep_insn);
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+    return cost;
+
+  dep_insn_type = get_attr_type (dep_insn);
+
+  if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD)
+    {
+      rtx pat = PATTERN (dep_insn);
+      rtx dest, src;
+
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      dest = SET_DEST (pat);
+      src = SET_SRC (pat);
+      if (! ADDRESS_REGNO_P (REGNO (dest))
+	  || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
+	return cost;
+      return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
+    }
+
+  return cost;
+}
+
+/* This function acts like NEXT_INSN, but is aware of three-insn bundles and
+   skips all subsequent parallel instructions if INSN is the start of such
+   a group.  */
+static rtx
+find_next_insn_start (rtx insn)
+{
+  if (GET_MODE (insn) == SImode)
+    {
+      while (GET_MODE (insn) != QImode)
+	insn = NEXT_INSN (insn);
+    }
+  return NEXT_INSN (insn);
+}
+
+/* This function acts like PREV_INSN, but is aware of three-insn bundles and
+   skips all subsequent parallel instructions if INSN is the start of such
+   a group.  */
+static rtx
+find_prev_insn_start (rtx insn)
+{
+  insn = PREV_INSN (insn);
+  gcc_assert (GET_MODE (insn) != SImode);
+  if (GET_MODE (insn) == QImode)
+    {
+      while (GET_MODE (PREV_INSN (insn)) == SImode)
+	insn = PREV_INSN (insn);
+    }
+  return insn;
+}
+
+/* Implement TARGET_CAN_USE_DOLOOP_P.  */
+
+static bool
+bfin_can_use_doloop_p (double_int, double_int iterations_max,
+		       unsigned int, bool)
+{
+  /* Due to limitations in the hardware (an initial loop count of 0
+     does not loop 2^32 times) we must avoid to generate a hardware
+     loops when we cannot rule out this case.  */
+  if (!flag_unsafe_loop_optimizations
+      && (iterations_max.high != 0
+	  || iterations_max.low >= 0xFFFFFFFF))
+    return false;
+  return true;
+}
+
+/* Increment the counter for the number of loop instructions in the
+   current function.  */
+
+void
+bfin_hardware_loop (void)
+{
+  cfun->machine->has_hardware_loops++;
+}
+
+/* Maximum loop nesting depth.  */
+#define MAX_LOOP_DEPTH 2
+
+/* Maximum size of a loop.  */
+#define MAX_LOOP_LENGTH 2042
+
+/* Maximum distance of the LSETUP instruction from the loop start.  */
+#define MAX_LSETUP_DISTANCE 30
+
+/* Estimate the length of INSN conservatively.  */
+
+static int
+length_for_loop (rtx insn)
+{
+  int length = 0;
+  if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
+    {
+      if (ENABLE_WA_SPECULATIVE_SYNCS)
+	length = 8;
+      else if (ENABLE_WA_SPECULATIVE_LOADS)
+	length = 6;
+    }
+  else if (LABEL_P (insn))
+    {
+      if (ENABLE_WA_SPECULATIVE_SYNCS)
+	length = 4;
+    }
+
+  if (NONDEBUG_INSN_P (insn))
+    length += get_attr_length (insn);
+
+  return length;
+}
+
+/* Optimize LOOP.  */
+
+static bool
+hwloop_optimize (hwloop_info loop)
+{
+  basic_block bb;
+  rtx insn, last_insn;
+  rtx loop_init, start_label, end_label;
+  rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
+  rtx lc_reg, lt_reg, lb_reg;
+  rtx seq, seq_end;
+  int length;
+  bool clobber0, clobber1;
+
+  if (loop->depth > MAX_LOOP_DEPTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
+      return false;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  gcc_assert (REG_P (iter_reg));
+
+  scratchreg = NULL_RTX;
+  scratch_init = iter_reg;
+  scratch_init_insn = NULL_RTX;
+  if (!PREG_P (iter_reg) && loop->incoming_src)
+    {
+      basic_block bb_in = loop->incoming_src;
+      int i;
+      for (i = REG_P0; i <= REG_P5; i++)
+	if ((df_regs_ever_live_p (i)
+	     || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
+		 && call_used_regs[i]))
+	    && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
+	  {
+	    scratchreg = gen_rtx_REG (SImode, i);
+	    break;
+	  }
+      for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
+	   insn = PREV_INSN (insn))
+	{
+	  rtx set;
+	  if (NOTE_P (insn) || BARRIER_P (insn))
+	    continue;
+	  set = single_set (insn);
+	  if (set && rtx_equal_p (SET_DEST (set), iter_reg))
+	    {
+	      if (CONSTANT_P (SET_SRC (set)))
+		{
+		  scratch_init = SET_SRC (set);
+		  scratch_init_insn = insn;
+		}
+	      break;
+	    }
+	  else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
+	    break;
+	}
+    }
+
+  if (loop->incoming_src)
+    {
+      /* Make sure the predecessor is before the loop start label, as required by
+	 the LSETUP instruction.  */
+      length = 0;
+      insn = BB_END (loop->incoming_src);
+      /* If we have to insert the LSETUP before a jump, count that jump in the
+	 length.  */
+      if (vec_safe_length (loop->incoming) > 1
+	  || !(loop->incoming->last ()->flags & EDGE_FALLTHRU))
+	{
+	  gcc_assert (JUMP_P (insn));
+	  insn = PREV_INSN (insn);
+	}
+
+      for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
+	length += length_for_loop (insn);
+
+      if (!insn)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
+		     loop->loop_no);
+	  return false;
+	}
+
+      /* Account for the pop of a scratch register where necessary.  */
+      if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
+	  && ENABLE_WA_LOAD_LCREGS)
+	length += 2;
+
+      if (length > MAX_LSETUP_DISTANCE)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check if start_label appears before loop_end and calculate the
+     offset between them.  We calculate the length of instructions
+     conservatively.  */
+  length = 0;
+  for (insn = loop->start_label;
+       insn && insn != loop->loop_end;
+       insn = NEXT_INSN (insn))
+    length += length_for_loop (insn);
+
+  if (!insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  loop->length = length;
+  if (loop->length > MAX_LOOP_LENGTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+      return false;
+    }
+
+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (loop->iter_reg_used || loop->iter_reg_used_outside)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
+      return false;
+    }
+
+  clobber0 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0)
+	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB0)
+	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT0));
+  clobber1 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1)
+	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB1)
+	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT1));
+  if (clobber0 && clobber1)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d no loop reg available\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  /* There should be an instruction before the loop_end instruction
+     in the same basic block. And the instruction must not be
+     - JUMP
+     - CONDITIONAL BRANCH
+     - CALL
+     - CSYNC
+     - SSYNC
+     - Returns (RTS, RTN, etc.)  */
+
+  bb = loop->tail;
+  last_insn = find_prev_insn_start (loop->loop_end);
+
+  while (1)
+    {
+      for (; last_insn != BB_HEAD (bb);
+	   last_insn = find_prev_insn_start (last_insn))
+	if (NONDEBUG_INSN_P (last_insn))
+	  break;
+
+      if (last_insn != BB_HEAD (bb))
+	break;
+
+      if (single_pred_p (bb)
+	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
+	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+	{
+	  bb = single_pred (bb);
+	  last_insn = BB_END (bb);
+	  continue;
+	}
+      else
+	{
+	  last_insn = NULL_RTX;
+	  break;
+	}
+    }
+
+  if (!last_insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has no last instruction\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		 loop->loop_no);
+      return false;
+    }
+  /* In all other cases, try to replace a bad last insn with a nop.  */
+  else if (JUMP_P (last_insn)
+	   || CALL_P (last_insn)
+	   || get_attr_type (last_insn) == TYPE_SYNC
+	   || get_attr_type (last_insn) == TYPE_CALL
+	   || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
+	   || recog_memoized (last_insn) == CODE_FOR_return_internal
+	   || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+	   || asm_noperands (PATTERN (last_insn)) >= 0)
+    {
+      if (loop->length + 2 > MAX_LOOP_LENGTH)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+	  return false;
+	}
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
+		 loop->loop_no);
+
+      last_insn = emit_insn_after (gen_forced_nop (), last_insn);
+    }
+
+  loop->last_insn = last_insn;
+
+  /* The loop is good for replacement.  */
+  start_label = loop->start_label;
+  end_label = gen_label_rtx ();
+  iter_reg = loop->iter_reg;
+
+  if (loop->depth == 1 && !clobber1)
+    {
+      lc_reg = gen_rtx_REG (SImode, REG_LC1);
+      lb_reg = gen_rtx_REG (SImode, REG_LB1);
+      lt_reg = gen_rtx_REG (SImode, REG_LT1);
+      SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1);
+    }
+  else
+    {
+      lc_reg = gen_rtx_REG (SImode, REG_LC0);
+      lb_reg = gen_rtx_REG (SImode, REG_LB0);
+      lt_reg = gen_rtx_REG (SImode, REG_LT0);
+      SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0);
+    }
+
+  loop->end_label = end_label;
+
+  /* Create a sequence containing the loop setup.  */
+  start_sequence ();
+
+  /* LSETUP only accepts P registers.  If we have one, we can use it,
+     otherwise there are several ways of working around the problem.
+     If we're not affected by anomaly 312, we can load the LC register
+     from any iteration register, and use LSETUP without initialization.
+     If we've found a P scratch register that's not live here, we can
+     instead copy the iter_reg into that and use an initializing LSETUP.
+     If all else fails, push and pop P0 and use it as a scratch.  */
+  if (P_REGNO_P (REGNO (iter_reg)))
+    {
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, iter_reg);
+      seq_end = emit_insn (loop_init);
+    }
+  else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
+    {
+      emit_insn (gen_movsi (lc_reg, iter_reg));
+      loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
+					       lb_reg, end_label,
+					       lc_reg);
+      seq_end = emit_insn (loop_init);
+    }
+  else if (scratchreg != NULL_RTX)
+    {
+      emit_insn (gen_movsi (scratchreg, scratch_init));
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, scratchreg);
+      seq_end = emit_insn (loop_init);
+      if (scratch_init_insn != NULL_RTX)
+	delete_insn (scratch_init_insn);
+    }
+  else
+    {
+      rtx p0reg = gen_rtx_REG (SImode, REG_P0);
+      rtx push = gen_frame_mem (SImode,
+				gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
+      rtx pop = gen_frame_mem (SImode,
+			       gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+      emit_insn (gen_movsi (push, p0reg));
+      emit_insn (gen_movsi (p0reg, scratch_init));
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, p0reg);
+      emit_insn (loop_init);
+      seq_end = emit_insn (gen_movsi (p0reg, pop));
+      if (scratch_init_insn != NULL_RTX)
+	delete_insn (scratch_init_insn);
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; replacing loop %d initializer with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop_init);
+      fprintf (dump_file, ";; replacing loop %d terminator with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_end);
+    }
+
+  /* If the loop isn't entered at the top, also create a jump to the entry
+     point.  */
+  if (!loop->incoming_src && loop->head != loop->incoming_dest)
+    {
+      rtx label = BB_HEAD (loop->incoming_dest);
+      /* If we're jumping to the final basic block in the loop, and there's
+	 only one cheap instruction before the end (typically an increment of
+	 an induction variable), we can just emit a copy here instead of a
+	 jump.  */
+      if (loop->incoming_dest == loop->tail
+	  && next_real_insn (label) == last_insn
+	  && asm_noperands (last_insn) < 0
+	  && GET_CODE (PATTERN (last_insn)) == SET)
+	{
+	  seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
+	}
+      else
+	{
+	  emit_jump_insn (gen_jump (label));
+	  seq_end = emit_barrier ();
+	}
+    }
+
+  seq = get_insns ();
+  end_sequence ();
+
+  if (loop->incoming_src)
+    {
+      rtx prev = BB_END (loop->incoming_src);
+      if (vec_safe_length (loop->incoming) > 1
+	  || !(loop->incoming->last ()->flags & EDGE_FALLTHRU))
+	{
+	  gcc_assert (JUMP_P (prev));
+	  prev = PREV_INSN (prev);
+	}
+      emit_insn_after (seq, prev);
+    }
+  else
+    {
+      basic_block new_bb;
+      edge e;
+      edge_iterator ei;
+
+#ifdef ENABLE_CHECKING
+      if (loop->head != loop->incoming_dest)
+	{
+	  /* We aren't entering the loop at the top.  Since we've established
+	     that the loop is entered only at one point, this means there
+	     can't be fallthru edges into the head.  Any such fallthru edges
+	     would become invalid when we insert the new block, so verify
+	     that this does not in fact happen.  */
+	  FOR_EACH_EDGE (e, ei, loop->head->preds)
+	    gcc_assert (!(e->flags & EDGE_FALLTHRU));
+	}
+#endif
+
+      emit_insn_before (seq, BB_HEAD (loop->head));
+      seq = emit_label_before (gen_label_rtx (), seq);
+
+      new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
+      FOR_EACH_EDGE (e, ei, loop->incoming)
+	{
+	  if (!(e->flags & EDGE_FALLTHRU)
+	      || e->dest != loop->head)
+	    redirect_edge_and_branch_force (e, new_bb);
+	  else
+	    redirect_edge_succ (e, new_bb);
+	}
+      e = make_edge (new_bb, loop->head, 0);
+    }
+
+  delete_insn (loop->loop_end);
+  /* Insert the loop end label before the last instruction of the loop.  */
+  emit_label_before (loop->end_label, loop->last_insn);
+
+  return true;
+}
+
+/* A callback for the hw-doloop pass.  Called when a loop we have discovered
+   turns out not to be optimizable; we have to split the doloop_end pattern
+   into a subtract and a test.  */
+static void
+hwloop_fail (hwloop_info loop)
+{
+  rtx insn = loop->loop_end;
+  
+  if (DPREG_P (loop->iter_reg))
+    {
+      /* If loop->iter_reg is a DREG or PREG, we can split it here
+	 without scratch register.  */
+      rtx insn, test;
+
+      emit_insn_before (gen_addsi3 (loop->iter_reg,
+				    loop->iter_reg,
+				    constm1_rtx),
+			loop->loop_end);
+
+      test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
+      insn = emit_jump_insn_before (gen_cbranchsi4 (test,
+						    loop->iter_reg, const0_rtx,
+						    loop->start_label),
+				    loop->loop_end);
+
+      JUMP_LABEL (insn) = loop->start_label;
+      LABEL_NUSES (loop->start_label)++;
+      delete_insn (loop->loop_end);
+    }
+  else
+    {
+      splitting_loops = 1;  
+      try_split (PATTERN (insn), insn, 1);
+      splitting_loops = 0;
+    }
+}
+
+/* A callback for the hw-doloop pass.  This function examines INSN; if
+   it is a loop_end pattern we recognize, return the reg rtx for the
+   loop counter.  Otherwise, return NULL_RTX.  */
+
+static rtx
+hwloop_pattern_reg (rtx insn)
+{
+  rtx reg;
+
+  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
+    return NULL_RTX;
+
+  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
+  if (!REG_P (reg))
+    return NULL_RTX;
+  return reg;
+}
+
+static struct hw_doloop_hooks bfin_doloop_hooks =
+{
+  hwloop_pattern_reg,
+  hwloop_optimize,
+  hwloop_fail
+};
+
+/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
+   and tries to rewrite the RTL of these loops so that proper Blackfin
+   hardware loops are generated.  */
+
+static void
+bfin_reorg_loops (void)
+{
+  reorg_loops (true, &bfin_doloop_hooks);
+}
+
+/* Possibly generate a SEQUENCE out of three insns found in SLOT.
+   Returns true if we modified the insn chain, false otherwise.  */
+static bool
+gen_one_bundle (rtx slot[3])
+{
+  gcc_assert (slot[1] != NULL_RTX);
+
+  /* Don't add extra NOPs if optimizing for size.  */
+  if (optimize_size
+      && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
+    return false;
+
+  /* Verify that we really can do the multi-issue.  */
+  if (slot[0])
+    {
+      rtx t = NEXT_INSN (slot[0]);
+      while (t != slot[1])
+	{
+	  if (! NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_DELETED)
+	    return false;
+	  t = NEXT_INSN (t);
+	}
+    }
+  if (slot[2])
+    {
+      rtx t = NEXT_INSN (slot[1]);
+      while (t != slot[2])
+	{
+	  if (! NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_DELETED)
+	    return false;
+	  t = NEXT_INSN (t);
+	}
+    }
+
+  if (slot[0] == NULL_RTX)
+    {
+      slot[0] = emit_insn_before (gen_mnop (), slot[1]);
+      df_insn_rescan (slot[0]);
+    }
+  if (slot[2] == NULL_RTX)
+    {
+      slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
+      df_insn_rescan (slot[2]);
+    }
+
+  /* Avoid line number information being printed inside one bundle.  */
+  if (INSN_LOCATION (slot[1])
+      && INSN_LOCATION (slot[1]) != INSN_LOCATION (slot[0]))
+    INSN_LOCATION (slot[1]) = INSN_LOCATION (slot[0]);
+  if (INSN_LOCATION (slot[2])
+      && INSN_LOCATION (slot[2]) != INSN_LOCATION (slot[0]))
+    INSN_LOCATION (slot[2]) = INSN_LOCATION (slot[0]);
+
+  /* Terminate them with "|| " instead of ";" in the output.  */
+  PUT_MODE (slot[0], SImode);
+  PUT_MODE (slot[1], SImode);
+  /* Terminate the bundle, for the benefit of reorder_var_tracking_notes.  */
+  PUT_MODE (slot[2], QImode);
+  return true;
+}
+
+/* Go through all insns, and use the information generated during scheduling
+   to generate SEQUENCEs to represent bundles of instructions issued
+   simultaneously.  */
+
+static void
+bfin_gen_bundles (void)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next;
+      rtx slot[3];
+      int n_filled = 0;
+
+      slot[0] = slot[1] = slot[2] = NULL_RTX;
+      for (insn = BB_HEAD (bb);; insn = next)
+	{
+	  int at_end;
+	  rtx delete_this = NULL_RTX;
+
+	  if (NONDEBUG_INSN_P (insn))
+	    {
+	      enum attr_type type = get_attr_type (insn);
+
+	      if (type == TYPE_STALL)
+		{
+		  gcc_assert (n_filled == 0);
+		  delete_this = insn;
+		}
+	      else
+		{
+		  if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM)
+		    slot[0] = insn;
+		  else if (slot[1] == NULL_RTX)
+		    slot[1] = insn;
+		  else
+		    slot[2] = insn;
+		  n_filled++;
+		}
+	    }
+
+	  next = NEXT_INSN (insn);
+	  while (next && insn != BB_END (bb)
+		 && !(INSN_P (next)
+		      && GET_CODE (PATTERN (next)) != USE
+		      && GET_CODE (PATTERN (next)) != CLOBBER))
+	    {
+	      insn = next;
+	      next = NEXT_INSN (insn);
+	    }
+
+	  /* BB_END can change due to emitting extra NOPs, so check here.  */
+	  at_end = insn == BB_END (bb);
+	  if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
+	    {
+	      if ((n_filled < 2
+		   || !gen_one_bundle (slot))
+		  && slot[0] != NULL_RTX)
+		{
+		  rtx pat = PATTERN (slot[0]);
+		  if (GET_CODE (pat) == SET
+		      && GET_CODE (SET_SRC (pat)) == UNSPEC
+		      && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
+		    {
+		      SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
+		      INSN_CODE (slot[0]) = -1;
+		      df_insn_rescan (slot[0]);
+		    }
+		}
+	      n_filled = 0;
+	      slot[0] = slot[1] = slot[2] = NULL_RTX;
+	    }
+	  if (delete_this != NULL_RTX)
+	    delete_insn (delete_this);
+	  if (at_end)
+	    break;
+	}
+    }
+}
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+   three-instruction bundle.  */
+
+static void
+reorder_var_tracking_notes (void)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next;
+      rtx queue = NULL_RTX;
+      bool in_bundle = false;
+
+      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
+	{
+	  next = NEXT_INSN (insn);
+
+	  if (INSN_P (insn))
+	    {
+	      /* Emit queued up notes at the last instruction of a bundle.  */
+	      if (GET_MODE (insn) == QImode)
+		{
+		  while (queue)
+		    {
+		      rtx next_queue = PREV_INSN (queue);
+		      PREV_INSN (NEXT_INSN (insn)) = queue;
+		      NEXT_INSN (queue) = NEXT_INSN (insn);
+		      NEXT_INSN (insn) = queue;
+		      PREV_INSN (queue) = insn;
+		      queue = next_queue;
+		    }
+		  in_bundle = false;
+		}
+	      else if (GET_MODE (insn) == SImode)
+		in_bundle = true;
+	    }
+	  else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+	    {
+	      if (in_bundle)
+		{
+		  rtx prev = PREV_INSN (insn);
+		  PREV_INSN (next) = prev;
+		  NEXT_INSN (prev) = next;
+
+		  PREV_INSN (insn) = queue;
+		  queue = insn;
+		}
+	    }
+	}
+    }
+}
+
+/* On some silicon revisions, functions shorter than a certain number of cycles
+   can cause unpredictable behaviour.  Work around this by adding NOPs as
+   needed.  */
+static void
+workaround_rts_anomaly (void)
+{
+  rtx insn, first_insn = NULL_RTX;
+  int cycles = 4;
+
+  if (! ENABLE_WA_RETS)
+    return;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat;
+
+      if (BARRIER_P (insn))
+	return;
+      
+      if (NOTE_P (insn) || LABEL_P (insn))
+	continue;
+
+      if (JUMP_TABLE_DATA_P (insn))
+	continue;
+
+      if (first_insn == NULL_RTX)
+	first_insn = insn;
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+	  || GET_CODE (pat) == ASM_INPUT
+	  || asm_noperands (pat) >= 0)
+	continue;
+
+      if (CALL_P (insn))
+	return;
+
+      if (JUMP_P (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_return_internal)
+	    break;
+
+	  /* Nothing to worry about for direct jumps.  */
+	  if (!any_condjump_p (insn))
+	    return;
+	  if (cycles <= 1)
+	    return;
+	  cycles--;
+	}
+      else if (INSN_P (insn))
+	{
+	  rtx pat = PATTERN (insn);
+	  int this_cycles = 1;
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      if (analyze_push_multiple_operation (pat)
+		  || analyze_pop_multiple_operation (pat))
+		this_cycles = n_regs_to_save;
+	    }
+	  else
+	    {
+	      int icode = recog_memoized (insn);
+
+	      if (icode == CODE_FOR_link)
+		this_cycles = 4;
+	      else if (icode == CODE_FOR_unlink)
+		this_cycles = 3;
+	      else if (icode == CODE_FOR_mulsi3)
+		this_cycles = 5;
+	    }
+	  if (this_cycles >= cycles)
+	    return;
+
+	  cycles -= this_cycles;
+	}
+    }
+  while (cycles > 0)
+    {
+      emit_insn_before (gen_nop (), first_insn);
+      cycles--;
+    }
+}
+
+/* Return an insn type for INSN that can be used by the caller for anomaly
+   workarounds.  This differs from plain get_attr_type in that it handles
+   SEQUENCEs.  */
+
+static enum attr_type
+type_for_anomaly (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  if (GET_CODE (pat) == SEQUENCE)
+    {
+      enum attr_type t;
+      t = get_attr_type (XVECEXP (pat, 0, 1));
+      if (t == TYPE_MCLD)
+	return t;
+      t = get_attr_type (XVECEXP (pat, 0, 2));
+      if (t == TYPE_MCLD)
+	return t;
+      return TYPE_MCST;
+    }
+  else
+    return get_attr_type (insn);
+}
+
+/* Return true iff the address found in MEM is based on the register
+   NP_REG and optionally has a positive offset.  */
+static bool
+harmless_null_pointer_p (rtx mem, int np_reg)
+{
+  mem = XEXP (mem, 0);
+  if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC)
+    mem = XEXP (mem, 0);
+  if (REG_P (mem) && (int) REGNO (mem) == np_reg)
+    return true;
+  if (GET_CODE (mem) == PLUS
+      && REG_P (XEXP (mem, 0)) && (int) REGNO (XEXP (mem, 0)) == np_reg)
+    {
+      mem = XEXP (mem, 1);
+      if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0)
+	return true;
+    }
+  return false;
+}
+
+/* Return nonzero if INSN contains any loads that may trap.  */
+
+static bool
+trapping_loads_p (rtx insn, int np_reg, bool after_np_branch)
+{
+  rtx mem = SET_SRC (single_set (insn));
+
+  if (!after_np_branch)
+    np_reg = -1;
+  return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg))
+	  && may_trap_p (mem));
+}
+
+/* Return INSN if it is of TYPE_MCLD.  Alternatively, if INSN is the start of
+   a three-insn bundle, see if one of them is a load and return that if so.
+   Return NULL_RTX if the insn does not contain loads.  */
+static rtx
+find_load (rtx insn)
+{
+  if (!NONDEBUG_INSN_P (insn))
+    return NULL_RTX;
+  if (get_attr_type (insn) == TYPE_MCLD)
+    return insn;
+  if (GET_MODE (insn) != SImode)
+    return NULL_RTX;
+  do {
+    insn = NEXT_INSN (insn);
+    if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
+	&& get_attr_type (insn) == TYPE_MCLD)
+      return insn;
+  } while (GET_MODE (insn) != QImode);
+  return NULL_RTX;
+}
+
+/* Determine whether PAT is an indirect call pattern.  */
+static bool
+indirect_call_p (rtx pat)
+{
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  if (GET_CODE (pat) == SET)
+    pat = SET_SRC (pat);
+  gcc_assert (GET_CODE (pat) == CALL);
+  pat = XEXP (pat, 0);
+  gcc_assert (GET_CODE (pat) == MEM);
+  pat = XEXP (pat, 0);
+  
+  return REG_P (pat);
+}
+
+/* During workaround_speculation, track whether we're in the shadow of a
+   conditional branch that tests a P register for NULL.  If so, we can omit
+   emitting NOPs if we see a load from that P register, since a speculative
+   access at address 0 isn't a problem, and the load is executed in all other
+   cases anyway.
+   Global for communication with note_np_check_stores through note_stores.
+   */
+int np_check_regno = -1;
+bool np_after_branch = false;
+
+/* Subroutine of workaround_speculation, called through note_stores.  */
+static void
+note_np_check_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+		      void *data ATTRIBUTE_UNUSED)
+{
+  if (REG_P (x) && (REGNO (x) == REG_CC || (int) REGNO (x) == np_check_regno))
+    np_check_regno = -1;
+}
+
+static void
+workaround_speculation (void)
+{
+  rtx insn, next;
+  rtx last_condjump = NULL_RTX;
+  int cycles_since_jump = INT_MAX;
+  int delay_added = 0;
+
+  if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+      && ! ENABLE_WA_INDIRECT_CALLS)
+    return;
+
+  /* First pass: find predicted-false branches; if something after them
+     needs nops, insert them or change the branch to predict true.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      rtx pat;
+      int delay_needed = 0;
+
+      next = find_next_insn_start (insn);
+      
+      if (NOTE_P (insn) || BARRIER_P (insn))
+	continue;
+      if (JUMP_TABLE_DATA_P (insn))
+	continue;
+
+      if (LABEL_P (insn))
+	{
+	  np_check_regno = -1;
+	  continue;
+	}
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
+	continue;
+      
+      if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0)
+	{
+	  np_check_regno = -1;
+	  continue;
+	}
+
+      if (JUMP_P (insn))
+	{
+	  /* Is this a condjump based on a null pointer comparison we saw
+	     earlier?  */
+	  if (np_check_regno != -1
+	      && recog_memoized (insn) == CODE_FOR_cbranchbi4)
+	    {
+	      rtx op = XEXP (SET_SRC (PATTERN (insn)), 0);
+	      gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE);
+	      if (GET_CODE (op) == NE)
+		np_after_branch = true;
+	    }
+	  if (any_condjump_p (insn)
+	      && ! cbranch_predicted_taken_p (insn))
+	    {
+	      last_condjump = insn;
+	      delay_added = 0;
+	      cycles_since_jump = 0;
+	    }
+	  else
+	    cycles_since_jump = INT_MAX;
+	}
+      else if (CALL_P (insn))
+	{
+	  np_check_regno = -1;
+	  if (cycles_since_jump < INT_MAX)
+	    cycles_since_jump++;
+	  if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
+	    {
+	      delay_needed = 3;
+	    }
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  rtx load_insn = find_load (insn);
+	  enum attr_type type = type_for_anomaly (insn);
+
+	  if (cycles_since_jump < INT_MAX)
+	    cycles_since_jump++;
+
+	  /* Detect a comparison of a P register with zero.  If we later
+	     see a condjump based on it, we have found a null pointer
+	     check.  */
+	  if (recog_memoized (insn) == CODE_FOR_compare_eq)
+	    {
+	      rtx src = SET_SRC (PATTERN (insn));
+	      if (REG_P (XEXP (src, 0))
+		  && P_REGNO_P (REGNO (XEXP (src, 0)))
+		  && XEXP (src, 1) == const0_rtx)
+		{
+		  np_check_regno = REGNO (XEXP (src, 0));
+		  np_after_branch = false;
+		}
+	      else
+		np_check_regno = -1;
+	    }
+
+	  if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
+	    {
+	      if (trapping_loads_p (load_insn, np_check_regno,
+				    np_after_branch))
+		delay_needed = 4;
+	    }
+	  else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
+	    delay_needed = 3;
+
+	  /* See if we need to forget about a null pointer comparison
+	     we found earlier.  */
+	  if (recog_memoized (insn) != CODE_FOR_compare_eq)
+	    {
+	      note_stores (PATTERN (insn), note_np_check_stores, NULL);
+	      if (np_check_regno != -1)
+		{
+		  if (find_regno_note (insn, REG_INC, np_check_regno))
+		    np_check_regno = -1;
+		}
+	    }
+
+	}
+
+      if (delay_needed > cycles_since_jump
+	  && (delay_needed - cycles_since_jump) > delay_added)
+	{
+	  rtx pat1;
+	  int num_clobbers;
+	  rtx *op = recog_data.operand;
+
+	  delay_needed -= cycles_since_jump;
+
+	  extract_insn (last_condjump);
+	  if (optimize_size)
+	    {
+	      pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
+						 op[3]);
+	      cycles_since_jump = INT_MAX;
+	    }
+	  else
+	    {
+	      /* Do not adjust cycles_since_jump in this case, so that
+		 we'll increase the number of NOPs for a subsequent insn
+		 if necessary.  */
+	      pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
+					    GEN_INT (delay_needed));
+	      delay_added = delay_needed;
+	    }
+	  PATTERN (last_condjump) = pat1;
+	  INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
+	}
+      if (CALL_P (insn))
+	{
+	  cycles_since_jump = INT_MAX;
+	  delay_added = 0;
+	}
+    }
+
+  /* Second pass: for predicted-true branches, see if anything at the
+     branch destination needs extra nops.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      int cycles_since_jump;
+      if (JUMP_P (insn)
+	  && any_condjump_p (insn)
+	  && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
+	      || cbranch_predicted_taken_p (insn)))
+	{
+	  rtx target = JUMP_LABEL (insn);
+	  rtx label = target;
+	  rtx next_tgt;
+
+	  cycles_since_jump = 0;
+	  for (; target && cycles_since_jump < 3; target = next_tgt)
+	    {
+	      rtx pat;
+
+	      next_tgt = find_next_insn_start (target);
+
+	      if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
+		continue;
+
+	      if (JUMP_TABLE_DATA_P (target))
+		continue;
+
+	      pat = PATTERN (target);
+	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+		  || GET_CODE (pat) == ASM_INPUT
+		  || asm_noperands (pat) >= 0)
+		continue;
+
+	      if (NONDEBUG_INSN_P (target))
+		{
+		  rtx load_insn = find_load (target);
+		  enum attr_type type = type_for_anomaly (target);
+		  int delay_needed = 0;
+		  if (cycles_since_jump < INT_MAX)
+		    cycles_since_jump++;
+
+		  if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
+		    {
+		      if (trapping_loads_p (load_insn, -1, false))
+			delay_needed = 2;
+		    }
+		  else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
+		    delay_needed = 2;
+
+		  if (delay_needed > cycles_since_jump)
+		    {
+		      rtx prev = prev_real_insn (label);
+		      delay_needed -= cycles_since_jump;
+		      if (dump_file)
+			fprintf (dump_file, "Adding %d nops after %d\n",
+				 delay_needed, INSN_UID (label));
+		      if (JUMP_P (prev)
+			  && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops)
+			{
+			  rtx x;
+			  HOST_WIDE_INT v;
+
+			  if (dump_file)
+			    fprintf (dump_file,
+				     "Reducing nops on insn %d.\n",
+				     INSN_UID (prev));
+			  x = PATTERN (prev);
+			  x = XVECEXP (x, 0, 1);
+			  v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed;
+			  XVECEXP (x, 0, 0) = GEN_INT (v);
+			}
+		      while (delay_needed-- > 0)
+			emit_insn_after (gen_nop (), label);
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* Called just before the final scheduling pass.  If we need to insert NOPs
+   later on to work around speculative loads, insert special placeholder
+   insns that cause loads to be delayed for as many cycles as necessary
+   (and possible).  This reduces the number of NOPs we need to add.
+   The dummy insns we generate are later removed by bfin_gen_bundles.  */
+static void
+add_sched_insns_for_speculation (void)
+{
+  rtx insn;
+
+  if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+      && ! ENABLE_WA_INDIRECT_CALLS)
+    return;
+
+  /* First pass: find predicted-false branches; if something after them
+     needs nops, insert them or change the branch to predict true.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat;
+
+      if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+	continue;
+      if (JUMP_TABLE_DATA_P (insn))
+	continue;
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+	  || GET_CODE (pat) == ASM_INPUT
+	  || asm_noperands (pat) >= 0)
+	continue;
+
+      if (JUMP_P (insn))
+	{
+	  if (any_condjump_p (insn)
+	      && !cbranch_predicted_taken_p (insn))
+	    {
+	      rtx n = next_real_insn (insn);
+	      emit_insn_before (gen_stall (GEN_INT (3)), n);
+	    }
+	}
+    }
+
+  /* Second pass: for predicted-true branches, see if anything at the
+     branch destination needs extra nops.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn)
+	  && any_condjump_p (insn)
+	  && (cbranch_predicted_taken_p (insn)))
+	{
+	  rtx target = JUMP_LABEL (insn);
+	  rtx next = next_real_insn (target);
+
+	  if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
+	      && get_attr_type (next) == TYPE_STALL)
+	    continue;
+	  emit_insn_before (gen_stall (GEN_INT (1)), next);	  
+	}
+    }
+}
+
+/* We use the machine specific reorg pass for emitting CSYNC instructions
+   after conditional branches as needed.
+
+   The Blackfin is unusual in that a code sequence like
+     if cc jump label
+     r0 = (p0)
+   may speculatively perform the load even if the condition isn't true.  This
+   happens for a branch that is predicted not taken, because the pipeline
+   isn't flushed or stalled, so the early stages of the following instructions,
+   which perform the memory reference, are allowed to execute before the
+   jump condition is evaluated.
+   Therefore, we must insert additional instructions in all places where this
+   could lead to incorrect behavior.  The manual recommends CSYNC, while
+   VDSP seems to use NOPs (even though its corresponding compiler option is
+   named CSYNC).
+
+   When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
+   When optimizing for size, we turn the branch into a predicted taken one.
+   This may be slower due to mispredicts, but saves code size.  */
+
+static void
+bfin_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  if (flag_schedule_insns_after_reload)
+    {
+      splitting_for_sched = 1;
+      split_all_insns ();
+      splitting_for_sched = 0;
+
+      add_sched_insns_for_speculation ();
+
+      timevar_push (TV_SCHED2);
+      if (flag_selective_scheduling2
+	  && !maybe_skip_selective_scheduling ())
+        run_selective_scheduling ();
+      else
+	schedule_insns ();
+      timevar_pop (TV_SCHED2);
+
+      /* Examine the schedule and insert nops as necessary for 64-bit parallel
+	 instructions.  */
+      bfin_gen_bundles ();
+    }
+
+  df_analyze ();
+
+  /* Doloop optimization */
+  if (cfun->machine->has_hardware_loops)
+    bfin_reorg_loops ();
+
+  workaround_speculation ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      reorder_var_tracking_notes ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+
+  df_finish_pass (false);
+
+  workaround_rts_anomaly ();
+}
+
+/* Handle interrupt_handler, exception_handler and nmi_handler function
+   attributes; arguments as in struct attribute_spec.handler.  */
+
+static tree
+handle_int_attribute (tree *node, tree name,
+		      tree args ATTRIBUTE_UNUSED,
+		      int flags ATTRIBUTE_UNUSED,
+		      bool *no_add_attrs)
+{
+  tree x = *node;
+  if (TREE_CODE (x) == FUNCTION_DECL)
+    x = TREE_TYPE (x);
+
+  if (TREE_CODE (x) != FUNCTION_TYPE)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (funkind (x) != SUBROUTINE)
+    error ("multiple function type attributes specified");
+
+  return NULL_TREE;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible, and 2 if they are nearly compatible (which causes a
+   warning to be generated).  */
+
+static int
+bfin_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  e_funkind kind1, kind2;
+
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
+
+  kind1 = funkind (type1);
+  kind2 = funkind (type2);
+
+  if (kind1 != kind2)
+    return 0;
+  
+  /*  Check for mismatched modifiers */
+  if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  return 1;
+}
+
+/* Handle a "longcall" or "shortcall" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_longcall_attribute (tree *node, tree name, 
+				tree args ATTRIBUTE_UNUSED, 
+				int flags ATTRIBUTE_UNUSED, 
+				bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0
+       && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node)))
+      || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0
+	  && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
+    {
+      warning (OPT_Wattributes,
+	       "can%'t apply both longcall and shortcall attributes to the same function");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "l1_text" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args),
+			       int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    {
+      error ("%qE attribute only applies to functions",
+	     name);
+      *no_add_attrs = true;
+    }
+
+  /* The decl may have already been given a section attribute
+     from a previous declaration. Ensure they match.  */
+  else if (DECL_SECTION_NAME (decl) != NULL_TREE
+	   && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		      ".l1.text") != 0)
+    {
+      error ("section of %q+D conflicts with previous declaration",
+	     decl);
+      *no_add_attrs = true;
+    }
+  else
+    DECL_SECTION_NAME (decl) = build_string (9, ".l1.text");
+
+  return NULL_TREE;
+}
+
+/* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args),
+			       int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    {
+      error ("%qE attribute only applies to variables",
+	     name);
+      *no_add_attrs = true;
+    }
+  else if (current_function_decl != NULL_TREE
+	   && !TREE_STATIC (decl))
+    {
+      error ("%qE attribute cannot be specified for local variables",
+	     name);
+      *no_add_attrs = true;
+    }
+  else
+    {
+      const char *section_name;
+
+      if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0)
+	section_name = ".l1.data";
+      else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0)
+	section_name = ".l1.data.A";
+      else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0)
+	section_name = ".l1.data.B";
+      else
+	gcc_unreachable ();
+
+      /* The decl may have already been given a section attribute
+	 from a previous declaration. Ensure they match.  */
+      if (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		     section_name) != 0)
+	{
+	  error ("section of %q+D conflicts with previous declaration",
+		 decl);
+	  *no_add_attrs = true;
+	}
+      else
+	DECL_SECTION_NAME (decl)
+	  = build_string (strlen (section_name) + 1, section_name);
+    }
+
+ return NULL_TREE;
+}
+
+/* Handle a "l2" attribute; arguments as in struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name),
+			  tree ARG_UNUSED (args), int ARG_UNUSED (flags),
+			  bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      if (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		     ".l2.text") != 0)
+	{
+	  error ("section of %q+D conflicts with previous declaration",
+		 decl);
+	  *no_add_attrs = true;
+	}
+      else
+	DECL_SECTION_NAME (decl) = build_string (9, ".l2.text");
+    }
+  else if (TREE_CODE (decl) == VAR_DECL)
+    {
+      if (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		     ".l2.data") != 0)
+	{
+	  error ("section of %q+D conflicts with previous declaration",
+		 decl);
+	  *no_add_attrs = true;
+	}
+      else
+	DECL_SECTION_NAME (decl) = build_string (9, ".l2.data");
+    }
+
+  return NULL_TREE;
+}
+
+/* Table of valid machine attributes.  */
+static const struct attribute_spec bfin_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt_handler", 0, 0, false, true,  true, handle_int_attribute,
+    false },
+  { "exception_handler", 0, 0, false, true,  true, handle_int_attribute,
+    false },
+  { "nmi_handler", 0, 0, false, true,  true, handle_int_attribute, false },
+  { "nesting", 0, 0, false, true,  true, NULL, false },
+  { "kspisusp", 0, 0, false, true,  true, NULL, false },
+  { "saveall", 0, 0, false, true,  true, NULL, false },
+  { "longcall",  0, 0, false, true,  true,  bfin_handle_longcall_attribute,
+    false },
+  { "shortcall", 0, 0, false, true,  true,  bfin_handle_longcall_attribute,
+    false },
+  { "l1_text", 0, 0, true, false, false,  bfin_handle_l1_text_attribute,
+    false },
+  { "l1_data", 0, 0, true, false, false,  bfin_handle_l1_data_attribute,
+    false },
+  { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute,
+    false },
+  { "l1_data_B", 0, 0, true, false, false,  bfin_handle_l1_data_attribute,
+    false },
+  { "l2", 0, 0, true, false, false,  bfin_handle_l2_attribute, false },
+  { NULL, 0, 0, false, false, false, NULL, false }
+};
+
+/* Implementation of TARGET_ASM_INTEGER.  When using FD-PIC, we need to
+   tell the assembler to generate pointers to function descriptors in
+   some cases.  */
+
+static bool
+bfin_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+  if (TARGET_FDPIC && size == UNITS_PER_WORD)
+    {
+      if (GET_CODE (value) == SYMBOL_REF
+	  && SYMBOL_REF_FUNCTION_P (value))
+	{
+	  fputs ("\t.picptr\tfuncdesc(", asm_out_file);
+	  output_addr_const (asm_out_file, value);
+	  fputs (")\n", asm_out_file);
+	  return true;
+	}
+      if (!aligned_p)
+	{
+	  /* We've set the unaligned SI op to NULL, so we always have to
+	     handle the unaligned case here.  */
+	  assemble_integer_with_op ("\t.4byte\t", value);
+	  return true;
+	}
+    }
+  return default_assemble_integer (value, size, aligned_p);
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+		      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+		      HOST_WIDE_INT vcall_offset, tree function)
+{
+  rtx xops[3];
+  /* The this parameter is passed as the first argument.  */
+  rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
+
+  /* Adjust the this parameter by a fixed constant.  */
+  if (delta)
+    {
+      xops[1] = this_rtx;
+      if (delta >= -64 && delta <= 63)
+	{
+	  xops[0] = GEN_INT (delta);
+	  output_asm_insn ("%1 += %0;", xops);
+	}
+      else if (delta >= -128 && delta < -64)
+	{
+	  xops[0] = GEN_INT (delta + 64);
+	  output_asm_insn ("%1 += -64; %1 += %0;", xops);
+	}
+      else if (delta > 63 && delta <= 126)
+	{
+	  xops[0] = GEN_INT (delta - 63);
+	  output_asm_insn ("%1 += 63; %1 += %0;", xops);
+	}
+      else
+	{
+	  xops[0] = GEN_INT (delta);
+	  output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops);
+	}
+    }
+
+  /* Adjust the this parameter by a value stored in the vtable.  */
+  if (vcall_offset)
+    {
+      rtx p2tmp = gen_rtx_REG (Pmode, REG_P2);
+      rtx tmp = gen_rtx_REG (Pmode, REG_R3);
+
+      xops[1] = tmp;
+      xops[2] = p2tmp;
+      output_asm_insn ("%2 = r0; %2 = [%2];", xops);
+
+      /* Adjust the this parameter.  */
+      xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, p2tmp,
+						   vcall_offset));
+      if (!memory_operand (xops[0], Pmode))
+	{
+	  rtx tmp2 = gen_rtx_REG (Pmode, REG_P1);
+	  xops[0] = GEN_INT (vcall_offset);
+	  xops[1] = tmp2;
+	  output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
+	  xops[0] = gen_rtx_MEM (Pmode, p2tmp);
+	}
+      xops[2] = this_rtx;
+      output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
+    }
+
+  xops[0] = XEXP (DECL_RTL (function), 0);
+  if (1 || !flag_pic || (*targetm.binds_local_p) (function))
+    output_asm_insn ("jump.l\t%P0", xops);
+}
+
+/* Codes for all the Blackfin builtins.  */
+enum bfin_builtins
+{
+  BFIN_BUILTIN_CSYNC,
+  BFIN_BUILTIN_SSYNC,
+  BFIN_BUILTIN_ONES,
+  BFIN_BUILTIN_COMPOSE_2X16,
+  BFIN_BUILTIN_EXTRACTLO,
+  BFIN_BUILTIN_EXTRACTHI,
+
+  BFIN_BUILTIN_SSADD_2X16,
+  BFIN_BUILTIN_SSSUB_2X16,
+  BFIN_BUILTIN_SSADDSUB_2X16,
+  BFIN_BUILTIN_SSSUBADD_2X16,
+  BFIN_BUILTIN_MULT_2X16,
+  BFIN_BUILTIN_MULTR_2X16,
+  BFIN_BUILTIN_NEG_2X16,
+  BFIN_BUILTIN_ABS_2X16,
+  BFIN_BUILTIN_MIN_2X16,
+  BFIN_BUILTIN_MAX_2X16,
+
+  BFIN_BUILTIN_SSADD_1X16,
+  BFIN_BUILTIN_SSSUB_1X16,
+  BFIN_BUILTIN_MULT_1X16,
+  BFIN_BUILTIN_MULTR_1X16,
+  BFIN_BUILTIN_NORM_1X16,
+  BFIN_BUILTIN_NEG_1X16,
+  BFIN_BUILTIN_ABS_1X16,
+  BFIN_BUILTIN_MIN_1X16,
+  BFIN_BUILTIN_MAX_1X16,
+
+  BFIN_BUILTIN_SUM_2X16,
+  BFIN_BUILTIN_DIFFHL_2X16,
+  BFIN_BUILTIN_DIFFLH_2X16,
+
+  BFIN_BUILTIN_SSADD_1X32,
+  BFIN_BUILTIN_SSSUB_1X32,
+  BFIN_BUILTIN_NORM_1X32,
+  BFIN_BUILTIN_ROUND_1X32,
+  BFIN_BUILTIN_NEG_1X32,
+  BFIN_BUILTIN_ABS_1X32,
+  BFIN_BUILTIN_MIN_1X32,
+  BFIN_BUILTIN_MAX_1X32,
+  BFIN_BUILTIN_MULT_1X32,
+  BFIN_BUILTIN_MULT_1X32X32,
+  BFIN_BUILTIN_MULT_1X32X32NS,
+
+  BFIN_BUILTIN_MULHISILL,
+  BFIN_BUILTIN_MULHISILH,
+  BFIN_BUILTIN_MULHISIHL,
+  BFIN_BUILTIN_MULHISIHH,
+
+  BFIN_BUILTIN_LSHIFT_1X16,
+  BFIN_BUILTIN_LSHIFT_2X16,
+  BFIN_BUILTIN_SSASHIFT_1X16,
+  BFIN_BUILTIN_SSASHIFT_2X16,
+  BFIN_BUILTIN_SSASHIFT_1X32,
+
+  BFIN_BUILTIN_CPLX_MUL_16,
+  BFIN_BUILTIN_CPLX_MAC_16,
+  BFIN_BUILTIN_CPLX_MSU_16,
+
+  BFIN_BUILTIN_CPLX_MUL_16_S40,
+  BFIN_BUILTIN_CPLX_MAC_16_S40,
+  BFIN_BUILTIN_CPLX_MSU_16_S40,
+
+  BFIN_BUILTIN_CPLX_SQU,
+
+  BFIN_BUILTIN_LOADBYTES,
+
+  BFIN_BUILTIN_MAX
+};
+
+#define def_builtin(NAME, TYPE, CODE)					\
+do {									\
+  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,		\
+		       NULL, NULL_TREE);				\
+} while (0)
+
+/* Set up all builtin functions for this target.  */
+static void
+bfin_init_builtins (void)
+{
+  tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
+  tree void_ftype_void
+    = build_function_type_list (void_type_node, NULL_TREE);
+  tree short_ftype_short
+    = build_function_type_list (short_integer_type_node, short_integer_type_node,
+				NULL_TREE);
+  tree short_ftype_int_int
+    = build_function_type_list (short_integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_int_int
+    = build_function_type_list (integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_int
+    = build_function_type_list (integer_type_node, integer_type_node,
+				NULL_TREE);
+  tree short_ftype_int
+    = build_function_type_list (short_integer_type_node, integer_type_node,
+				NULL_TREE);
+  tree int_ftype_v2hi_v2hi
+    = build_function_type_list (integer_type_node, V2HI_type_node,
+				V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_int_int
+    = build_function_type_list (V2HI_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_int
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_short_short
+    = build_function_type_list (integer_type_node, short_integer_type_node,
+				short_integer_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree short_ftype_v2hi
+    = build_function_type_list (short_integer_type_node, V2HI_type_node,
+				NULL_TREE);
+  tree int_ftype_pint
+    = build_function_type_list (integer_type_node,
+				build_pointer_type (integer_type_node),
+				NULL_TREE);
+  
+  /* Add the remaining MMX insns with somewhat more complicated types.  */
+  def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
+  def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
+
+  def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
+
+  def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
+	       BFIN_BUILTIN_COMPOSE_2X16);
+  def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
+	       BFIN_BUILTIN_EXTRACTHI);
+  def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi,
+	       BFIN_BUILTIN_EXTRACTLO);
+
+  def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MIN_2X16);
+  def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MAX_2X16);
+
+  def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSADD_2X16);
+  def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSSUB_2X16);
+  def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSADDSUB_2X16);
+  def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSSUBADD_2X16);
+  def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULT_2X16);
+  def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULTR_2X16);
+  def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi,
+	       BFIN_BUILTIN_NEG_2X16);
+  def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
+	       BFIN_BUILTIN_ABS_2X16);
+
+  def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MIN_1X16);
+  def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MAX_1X16);
+
+  def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_SSADD_1X16);
+  def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_SSSUB_1X16);
+  def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MULT_1X16);
+  def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MULTR_1X16);
+  def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short,
+	       BFIN_BUILTIN_NEG_1X16);
+  def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short,
+	       BFIN_BUILTIN_ABS_1X16);
+  def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int,
+	       BFIN_BUILTIN_NORM_1X16);
+
+  def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi,
+	       BFIN_BUILTIN_SUM_2X16);
+  def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi,
+	       BFIN_BUILTIN_DIFFHL_2X16);
+  def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi,
+	       BFIN_BUILTIN_DIFFLH_2X16);
+
+  def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISILL);
+  def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISIHL);
+  def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISILH);
+  def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISIHH);
+
+  def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_MIN_1X32);
+  def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_MAX_1X32);
+
+  def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_SSADD_1X32);
+  def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_SSSUB_1X32);
+  def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int,
+	       BFIN_BUILTIN_NEG_1X32);
+  def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int,
+	       BFIN_BUILTIN_ABS_1X32);
+  def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int,
+	       BFIN_BUILTIN_NORM_1X32);
+  def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int,
+	       BFIN_BUILTIN_ROUND_1X32);
+  def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short,
+	       BFIN_BUILTIN_MULT_1X32);
+  def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int,
+	       BFIN_BUILTIN_MULT_1X32X32);
+  def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int,
+	       BFIN_BUILTIN_MULT_1X32X32NS);
+
+  /* Shifts.  */
+  def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_SSASHIFT_1X16);
+  def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int,
+	       BFIN_BUILTIN_SSASHIFT_2X16);
+  def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_LSHIFT_1X16);
+  def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int,
+	       BFIN_BUILTIN_LSHIFT_2X16);
+  def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_SSASHIFT_1X32);
+
+  /* Complex numbers.  */
+  def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSADD_2X16);
+  def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSSUB_2X16);
+  def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MUL_16);
+  def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MAC_16);
+  def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MSU_16);
+  def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MUL_16_S40);
+  def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MAC_16_S40);
+  def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MSU_16_S40);
+  def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
+	       BFIN_BUILTIN_CPLX_SQU);
+
+  /* "Unaligned" load.  */
+  def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
+	       BFIN_BUILTIN_LOADBYTES);
+
+}
+
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *const name;
+  const enum bfin_builtins code;
+  int macflag;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+  { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 },
+
+  { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 },
+  { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 },
+  { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 },
+  { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 },
+  { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 },
+
+  { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 },
+  { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 },
+  { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 },
+  { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 },
+
+  { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 },
+  { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 },
+  { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 },
+  { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 },
+
+  { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 },
+  { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 },
+  { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 },
+  { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 },
+  { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 },
+  { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 },
+
+  { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE },
+  { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
+  { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
+  { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
+  { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
+
+  { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
+  { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
+  { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
+  { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
+
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+  { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
+
+  { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
+
+  { CODE_FOR_clrsbhi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
+  { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
+  { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
+
+  { CODE_FOR_clrsbsi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 },
+  { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 },
+  { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 },
+  { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 },
+
+  { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 },
+  { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 },
+  { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 },
+  { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 }
+};
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x != const0_rtx)
+    return x;
+  x = gen_reg_rtx (SImode);
+
+  emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
+  return gen_lowpart (mode, x);
+}
+
+/* Subroutine of bfin_expand_builtin to take care of binop insns.  MACFLAG is -1
+   if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
+
+static rtx
+bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
+			   int macflag)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode op0mode = GET_MODE (op0);
+  enum machine_mode op1mode = GET_MODE (op1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode)
+    {
+      op0mode = HImode;
+      op0 = gen_lowpart (HImode, op0);
+    }
+  if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode)
+    {
+      op1mode = HImode;
+      op1 = gen_lowpart (HImode, op1);
+    }
+  /* In case the insn wants input operands in modes different from
+     the result, abort.  */
+  gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
+	      && (op1mode == mode1 || op1mode == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  if (macflag == -1)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag));
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of bfin_expand_builtin to take care of unop insns.  */
+
+static rtx
+bfin_expand_unop_builtin (enum insn_code icode, tree exp,
+			  rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode op0mode = GET_MODE (op0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+
+  if (op0mode == SImode && mode0 == HImode)
+    {
+      op0mode = HImode;
+      op0 = gen_lowpart (HImode, op0);
+    }
+  gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+		     rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  size_t i;
+  enum insn_code icode;
+  const struct builtin_description *d;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg;
+  enum machine_mode tmode, mode0;
+
+  switch (fcode)
+    {
+    case BFIN_BUILTIN_CSYNC:
+      emit_insn (gen_csync ());
+      return 0;
+    case BFIN_BUILTIN_SSYNC:
+      emit_insn (gen_ssync ());
+      return 0;
+
+    case BFIN_BUILTIN_DIFFHL_2X16:
+    case BFIN_BUILTIN_DIFFLH_2X16:
+    case BFIN_BUILTIN_SUM_2X16:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3
+	       : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3
+	       : CODE_FOR_ssaddhilov2hi3);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+
+      if (! target
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+
+      if (VECTOR_MODE_P (mode0))
+	op0 = safe_vector_operand (op0, mode0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (target, op0, op0);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case BFIN_BUILTIN_MULT_1X32X32:
+    case BFIN_BUILTIN_MULT_1X32X32NS:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      if (! target
+	  || !register_operand (target, SImode))
+	target = gen_reg_rtx (SImode);
+      if (! register_operand (op0, SImode))
+	op0 = copy_to_mode_reg (SImode, op0);
+      if (! register_operand (op1, SImode))
+	op1 = copy_to_mode_reg (SImode, op1);
+
+      a1reg = gen_rtx_REG (PDImode, REG_A1);
+      a0reg = gen_rtx_REG (PDImode, REG_A0);
+      tmp1 = gen_lowpart (V2HImode, op0);
+      tmp2 = gen_lowpart (V2HImode, op1);
+      emit_insn (gen_flag_macinit1hi (a1reg,
+				      gen_lowpart (HImode, op0),
+				      gen_lowpart (HImode, op1),
+				      GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+
+      if (fcode == BFIN_BUILTIN_MULT_1X32X32)
+	emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2,
+						       const1_rtx, const1_rtx,
+						       const1_rtx, const0_rtx, a1reg,
+						       const0_rtx, GEN_INT (MACFLAG_NONE),
+						       GEN_INT (MACFLAG_M)));
+      else
+	{
+	  /* For saturating multiplication, there's exactly one special case
+	     to be handled: multiplying the smallest negative value with
+	     itself.  Due to shift correction in fractional multiplies, this
+	     can overflow.  Iff this happens, OP2 will contain 1, which, when
+	     added in 32 bits to the smallest negative, wraps to the largest
+	     positive, which is the result we want.  */
+	  op2 = gen_reg_rtx (V2HImode);
+	  emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx));
+	  emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC),
+				  gen_lowpart (SImode, op2)));
+	  emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2,
+								const1_rtx, const1_rtx,
+								const1_rtx, const0_rtx, a1reg,
+								const0_rtx, GEN_INT (MACFLAG_NONE),
+								GEN_INT (MACFLAG_M)));
+	  op2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC)));
+	}
+      emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1,
+					       const1_rtx, const0_rtx,
+					       a1reg, const0_rtx, GEN_INT (MACFLAG_M)));
+      emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15)));
+      emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg));
+      if (fcode == BFIN_BUILTIN_MULT_1X32X32NS)
+	emit_insn (gen_addsi3 (target, target, op2));
+      return target;
+
+    case BFIN_BUILTIN_CPLX_MUL_16:
+    case BFIN_BUILTIN_CPLX_MUL_16_S40:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      accvec = gen_reg_rtx (V2PDImode);
+      icode = CODE_FOR_flag_macv2hi_parts;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (! target
+	  || GET_MODE (target) != V2HImode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+	target = gen_reg_rtx (tmode);
+      if (! register_operand (op0, GET_MODE (op0)))
+	op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+      if (! register_operand (op1, GET_MODE (op1)))
+	op1 = copy_to_mode_reg (GET_MODE (op1), op1);
+
+      if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
+	emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
+						const0_rtx, const0_rtx,
+						const1_rtx, GEN_INT (MACFLAG_W32)));
+      else
+	emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
+						const0_rtx, const0_rtx,
+						const1_rtx, GEN_INT (MACFLAG_NONE)));
+      emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
+					 const1_rtx, const1_rtx,
+					 const0_rtx, accvec, const1_rtx, const0_rtx,
+					 GEN_INT (MACFLAG_NONE), accvec));
+
+      return target;
+
+    case BFIN_BUILTIN_CPLX_MAC_16:
+    case BFIN_BUILTIN_CPLX_MSU_16:
+    case BFIN_BUILTIN_CPLX_MAC_16_S40:
+    case BFIN_BUILTIN_CPLX_MSU_16_S40:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      accvec = gen_reg_rtx (V2PDImode);
+      icode = CODE_FOR_flag_macv2hi_parts;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (! target
+	  || GET_MODE (target) != V2HImode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+	target = gen_reg_rtx (tmode);
+      if (! register_operand (op1, GET_MODE (op1)))
+	op1 = copy_to_mode_reg (GET_MODE (op1), op1);
+      if (! register_operand (op2, GET_MODE (op2)))
+	op2 = copy_to_mode_reg (GET_MODE (op2), op2);
+
+      tmp1 = gen_reg_rtx (SImode);
+      tmp2 = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16)));
+      emit_move_insn (tmp2, gen_lowpart (SImode, op0));
+      emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
+      emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
+      if (fcode == BFIN_BUILTIN_CPLX_MAC_16
+	  || fcode == BFIN_BUILTIN_CPLX_MSU_16)
+	emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
+						   const0_rtx, const0_rtx,
+						   const1_rtx, accvec, const0_rtx,
+						   const0_rtx,
+						   GEN_INT (MACFLAG_W32)));
+      else
+	emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
+						   const0_rtx, const0_rtx,
+						   const1_rtx, accvec, const0_rtx,
+						   const0_rtx,
+						   GEN_INT (MACFLAG_NONE)));
+      if (fcode == BFIN_BUILTIN_CPLX_MAC_16
+	  || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
+	{
+	  tmp1 = const1_rtx;
+	  tmp2 = const0_rtx;
+	}
+      else
+	{
+	  tmp1 = const0_rtx;
+	  tmp2 = const1_rtx;
+	}
+      emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
+					 const1_rtx, const1_rtx,
+					 const0_rtx, accvec, tmp1, tmp2,
+					 GEN_INT (MACFLAG_NONE), accvec));
+
+      return target;
+
+    case BFIN_BUILTIN_CPLX_SQU:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      accvec = gen_reg_rtx (V2PDImode);
+      icode = CODE_FOR_flag_mulv2hi;
+      tmp1 = gen_reg_rtx (V2HImode);
+      tmp2 = gen_reg_rtx (V2HImode);
+
+      if (! target
+	  || GET_MODE (target) != V2HImode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+	target = gen_reg_rtx (V2HImode);
+      if (! register_operand (op0, GET_MODE (op0)))
+	op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+
+      emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
+
+      emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
+				       const0_rtx, const1_rtx,
+				       GEN_INT (MACFLAG_NONE)));
+
+      emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
+					  const0_rtx));
+      emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
+					 const0_rtx, const1_rtx));
+
+      return target;
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return bfin_expand_binop_builtin (d->icode, exp, target,
+					d->macflag);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return bfin_expand_unop_builtin (d->icode, exp, target);
+
+  gcc_unreachable ();
+}
+
+static void
+bfin_conditional_register_usage (void)
+{
+  /* initialize condition code flag register rtx */
+  bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC);
+  bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS);
+  if (TARGET_FDPIC)
+    call_used_regs[FDPIC_REGNO] = 1;
+  if (!TARGET_FDPIC && flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS bfin_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN bfin_expand_builtin
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label 
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START output_file_start
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE bfin_attribute_table
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS bfin_rtx_costs
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST bfin_address_cost
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST bfin_register_move_cost
+
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST bfin_memory_move_cost
+
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER bfin_assemble_integer
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST bfin_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG bfin_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE bfin_function_arg_advance
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE bfin_option_override
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD bfin_secondary_reload
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P bfin_class_likely_spilled_p
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P bfin_legitimate_constant_p
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	bfin_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE bfin_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE bfin_conditional_register_usage
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT bfin_trampoline_init
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY bfin_extra_live_on_entry
+
+/* Passes after sched2 can break the helpful TImode annotations that
+   haifa-sched puts on every insn.  Just do scheduling in reorg.  */
+#undef TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P bfin_can_use_doloop_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/bfin/bfin.h b/gcc-4.9/gcc/config/bfin/bfin.h
new file mode 100644
index 000000000..d6f4c610d
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin.h
@@ -0,0 +1,1156 @@
+/* Definitions for the Blackfin port.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _BFIN_CONFIG
+#define _BFIN_CONFIG
+
+#ifndef BFIN_OPTS_H
+#include "config/bfin/bfin-opts.h"
+#endif
+
+#define OBJECT_FORMAT_ELF
+
+#define BRT 1
+#define BRF 0
+
+/* Predefinition in the preprocessor for this target machine */
+#ifndef TARGET_CPU_CPP_BUILTINS
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("bfin");		\
+      builtin_define_std ("BFIN");		\
+      builtin_define ("__ADSPBLACKFIN__");	\
+      builtin_define ("__ADSPLPBLACKFIN__");	\
+						\
+      switch (bfin_cpu_type)			\
+	{					\
+	case BFIN_CPU_BF512:			\
+	  builtin_define ("__ADSPBF512__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF514:			\
+	  builtin_define ("__ADSPBF514__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF516:			\
+	  builtin_define ("__ADSPBF516__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF518:			\
+	  builtin_define ("__ADSPBF518__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF522:			\
+	  builtin_define ("__ADSPBF522__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF523:			\
+	  builtin_define ("__ADSPBF523__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF524:			\
+	  builtin_define ("__ADSPBF524__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF525:			\
+	  builtin_define ("__ADSPBF525__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF526:			\
+	  builtin_define ("__ADSPBF526__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF527:			\
+	  builtin_define ("__ADSPBF527__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF531:			\
+	  builtin_define ("__ADSPBF531__");	\
+	  break;				\
+	case BFIN_CPU_BF532:			\
+	  builtin_define ("__ADSPBF532__");	\
+	  break;				\
+	case BFIN_CPU_BF533:			\
+	  builtin_define ("__ADSPBF533__");	\
+	  break;				\
+	case BFIN_CPU_BF534:			\
+	  builtin_define ("__ADSPBF534__");	\
+	  break;				\
+	case BFIN_CPU_BF536:			\
+	  builtin_define ("__ADSPBF536__");	\
+	  break;				\
+	case BFIN_CPU_BF537:			\
+	  builtin_define ("__ADSPBF537__");	\
+	  break;				\
+	case BFIN_CPU_BF538:			\
+	  builtin_define ("__ADSPBF538__");	\
+	  break;				\
+	case BFIN_CPU_BF539:			\
+	  builtin_define ("__ADSPBF539__");	\
+	  break;				\
+	case BFIN_CPU_BF542M:			\
+	  builtin_define ("__ADSPBF542M__");	\
+	case BFIN_CPU_BF542:			\
+	  builtin_define ("__ADSPBF542__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF544M:			\
+	  builtin_define ("__ADSPBF544M__");	\
+	case BFIN_CPU_BF544:			\
+	  builtin_define ("__ADSPBF544__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF547M:			\
+	  builtin_define ("__ADSPBF547M__");	\
+	case BFIN_CPU_BF547:			\
+	  builtin_define ("__ADSPBF547__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF548M:			\
+	  builtin_define ("__ADSPBF548M__");	\
+	case BFIN_CPU_BF548:			\
+	  builtin_define ("__ADSPBF548__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF549M:			\
+	  builtin_define ("__ADSPBF549M__");	\
+	case BFIN_CPU_BF549:			\
+	  builtin_define ("__ADSPBF549__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF561:			\
+	  builtin_define ("__ADSPBF561__");	\
+	  break;				\
+	case BFIN_CPU_BF592:            \
+	  builtin_define ("__ADSPBF592__"); \
+	  builtin_define ("__ADSPBF59x__"); \
+	  break;                \
+	}					\
+						\
+      if (bfin_si_revision != -1)		\
+	{					\
+	  /* space of 0xnnnn and a NUL */	\
+	  char *buf = XALLOCAVEC (char, 7);	\
+						\
+	  sprintf (buf, "0x%04x", bfin_si_revision);			\
+	  builtin_define_with_value ("__SILICON_REVISION__", buf, 0);	\
+	}								\
+									\
+      if (bfin_workarounds)						\
+	builtin_define ("__WORKAROUNDS_ENABLED");			\
+      if (ENABLE_WA_SPECULATIVE_LOADS)					\
+	builtin_define ("__WORKAROUND_SPECULATIVE_LOADS");		\
+      if (ENABLE_WA_SPECULATIVE_SYNCS)					\
+	builtin_define ("__WORKAROUND_SPECULATIVE_SYNCS");		\
+      if (ENABLE_WA_INDIRECT_CALLS)					\
+	builtin_define ("__WORKAROUND_INDIRECT_CALLS");			\
+      if (ENABLE_WA_RETS)						\
+	builtin_define ("__WORKAROUND_RETS");				\
+						\
+      if (TARGET_FDPIC)				\
+	{					\
+	  builtin_define ("__BFIN_FDPIC__");	\
+	  builtin_define ("__FDPIC__");		\
+	}					\
+      if (TARGET_ID_SHARED_LIBRARY		\
+	  && !TARGET_SEP_DATA)			\
+	builtin_define ("__ID_SHARED_LIB__");	\
+      if (flag_no_builtin)			\
+	builtin_define ("__NO_BUILTIN");	\
+      if (TARGET_MULTICORE)			\
+	builtin_define ("__BFIN_MULTICORE");	\
+      if (TARGET_COREA)				\
+	builtin_define ("__BFIN_COREA");	\
+      if (TARGET_COREB)				\
+	builtin_define ("__BFIN_COREB");	\
+      if (TARGET_SDRAM)				\
+	builtin_define ("__BFIN_SDRAM");	\
+    }						\
+  while (0)
+#endif
+
+#define DRIVER_SELF_SPECS SUBTARGET_DRIVER_SELF_SPECS	"\
+ %{mleaf-id-shared-library:%{!mid-shared-library:-mid-shared-library}} \
+ %{mfdpic:%{!fpic:%{!fpie:%{!fPIC:%{!fPIE:\
+   	    %{!fno-pic:%{!fno-pie:%{!fno-PIC:%{!fno-PIE:-fpie}}}}}}}}} \
+"
+#ifndef SUBTARGET_DRIVER_SELF_SPECS
+# define SUBTARGET_DRIVER_SELF_SPECS
+#endif
+
+#define LINK_GCC_C_SEQUENCE_SPEC "\
+  %{mfast-fp:-lbffastfp} %G %L %{mfast-fp:-lbffastfp} %G \
+"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+    %{mno-fdpic:-mnopic} %{mfdpic}"
+
+#define LINK_SPEC "\
+%{h*} %{v:-V} \
+%{mfdpic:-melf32bfinfd -z text} \
+%{static:-dn -Bstatic} \
+%{shared:-G -Bdynamic} \
+%{symbolic:-Bsymbolic} \
+-init __init -fini __fini "
+
+/* Generate DSP instructions, like DSP halfword loads */
+#define TARGET_DSP			(1)
+
+#define TARGET_DEFAULT 0
+
+/* Maximum number of library ids we permit */
+#define MAX_LIBRARY_ID 255
+
+extern const char *bfin_library_id_string;
+
+#define FUNCTION_MODE    SImode
+#define Pmode            SImode
+
+/* store-condition-codes instructions store 0 for false
+   This is the value stored for true.  */
+#define STORE_FLAG_VALUE 1
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+#define STACK_PUSH_CODE PRE_DEC
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* We define a dummy ARGP register; the parameters start at offset 0 from
+   it. */
+#define FIRST_PARM_OFFSET(DECL) 0
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM REG_P6
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM REG_P7
+
+/* A dummy register that will be eliminated to either FP or SP.  */
+#define ARG_POINTER_REGNUM REG_ARGP
+
+/* `PIC_OFFSET_TABLE_REGNUM'
+     The register number of the register used to address a table of
+     static data addresses in memory.  In some cases this register is
+     defined by a processor's "application binary interface" (ABI).
+     When this macro is defined, RTL is generated for this register
+     once, as with the stack pointer and frame pointer registers.  If
+     this macro is not defined, it is up to the machine-dependent files
+     to allocate such a register (if necessary). */
+#define PIC_OFFSET_TABLE_REGNUM (REG_P5)
+
+#define FDPIC_FPTR_REGNO REG_P1
+#define FDPIC_REGNO REG_P3
+#define OUR_FDPIC_REG	get_hard_reg_initial_val (SImode, FDPIC_REGNO)
+
+/* A static chain register for nested functions.  We need to use a
+   call-clobbered register for this.  */
+#define STATIC_CHAIN_REGNUM REG_P2
+
+/* Define this if functions should assume that stack space has been
+   allocated for arguments even when their values are passed in
+   registers.
+
+   The value of this macro is the size, in bytes, of the area reserved for
+   arguments passed in registers.
+
+   This space can either be allocated by the caller or be a part of the
+   machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE'
+   says which.  */
+#define FIXED_STACK_AREA 12
+#define REG_PARM_STACK_SPACE(FNDECL) FIXED_STACK_AREA
+
+/* Define this if the above stack space is to be considered part of the
+ * space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+	  
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size. */ 
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/*#define DATA_ALIGNMENT(TYPE, BASIC-ALIGN) for arrays.. */
+
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) bfin_local_alignment ((TYPE), (ALIGN))
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST        \
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))    
+
+#define TRAMPOLINE_SIZE (TARGET_FDPIC ? 30 : 18)
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   There are two registers that can always be eliminated on the i386.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+
+#define ELIMINABLE_REGS				\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}	\
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = bfin_initial_elimination_offset ((FROM), (TO)))
+
+/* This processor has
+   8 data register for doing arithmetic
+   8  pointer register for doing addressing, including
+      1  stack pointer P6
+      1  frame pointer P7
+   4 sets of indexing registers (I0-3, B0-3, L0-3, M0-3)
+   1  condition code flag register CC
+   5  return address registers RETS/I/X/N/E
+   1  arithmetic status register (ASTAT).  */
+
+#define FIRST_PSEUDO_REGISTER 50
+
+#define D_REGNO_P(X) ((X) <= REG_R7)
+#define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7)
+#define I_REGNO_P(X) ((X) >= REG_I0 && (X) <= REG_I3)
+#define DP_REGNO_P(X) (D_REGNO_P (X) || P_REGNO_P (X))
+#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3)
+#define DREG_P(X) (REG_P (X) && D_REGNO_P (REGNO (X)))
+#define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X)))
+#define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X)))
+#define DPREG_P(X) (REG_P (X) && DP_REGNO_P (REGNO (X)))
+
+#define REGISTER_NAMES { \
+  "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", \
+  "P0", "P1", "P2", "P3", "P4", "P5", "SP", "FP", \
+  "I0", "I1", "I2", "I3", "B0", "B1", "B2", "B3", \
+  "L0", "L1", "L2", "L3", "M0", "M1", "M2", "M3", \
+  "A0", "A1", \
+  "CC", \
+  "RETS", "RETI", "RETX", "RETN", "RETE", "ASTAT", "SEQSTAT", "USP", \
+  "ARGP", \
+  "LT0", "LT1", "LC0", "LC1", "LB0", "LB1" \
+}
+
+#define SHORT_REGISTER_NAMES { \
+	"R0.L",	"R1.L",	"R2.L",	"R3.L", "R4.L", "R5.L", "R6.L", "R7.L", \
+	"P0.L",	"P1.L",	"P2.L",	"P3.L", "P4.L", "P5.L", "SP.L", "FP.L", \
+	"I0.L",	"I1.L", "I2.L",	"I3.L",	"B0.L",	"B1.L",	"B2.L",	"B3.L", \
+	"L0.L",	"L1.L",	"L2.L",	"L3.L",	"M0.L",	"M1.L",	"M2.L",	"M3.L", }
+
+#define HIGH_REGISTER_NAMES { \
+	"R0.H",	"R1.H",	"R2.H",	"R3.H", "R4.H", "R5.H", "R6.H", "R7.H", \
+	"P0.H",	"P1.H",	"P2.H",	"P3.H", "P4.H", "P5.H", "SP.H", "FP.H", \
+	"I0.H",	"I1.H",	"I2.H",	"I3.H",	"B0.H",	"B1.H",	"B2.H",	"B3.H", \
+	"L0.H",	"L1.H",	"L2.H",	"L3.H",	"M0.H",	"M1.H",	"M2.H",	"M3.H", }
+
+#define DREGS_PAIR_NAMES { \
+  "R1:0.p", 0, "R3:2.p", 0, "R5:4.p", 0, "R7:6.p", 0,  }
+
+#define BYTE_REGISTER_NAMES { \
+  "R0.B", "R1.B", "R2.B", "R3.B", "R4.B", "R5.B", "R6.B", "R7.B",  }
+
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS \
+/*r0 r1 r2 r3 r4 r5 r6 r7   p0 p1 p2 p3 p4 p5 p6 p7 */ \
+{ 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 1, 0,    \
+/*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
+  0, 0, 0, 0, 0, 0, 0, 0,   1, 1, 1, 1, 0, 0, 0, 0,    \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  0, 0, 0, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,    \
+/*lb0/1 */ \
+  1, 1  \
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS \
+/*r0 r1 r2 r3 r4 r5 r6 r7   p0 p1 p2 p3 p4 p5 p6 p7 */ \
+{ 1, 1, 1, 1, 0, 0, 0, 0,   1, 1, 1, 0, 0, 0, 1, 0, \
+/*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
+  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1, \
+/*lb0/1 */ \
+  1, 1  \
+}
+
+/* Order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  List frame pointer
+   late and fixed registers last.  Note that, in general, we prefer
+   registers listed in CALL_USED_REGISTERS, keeping the others
+   available for storage of persistent values. */
+
+#define REG_ALLOC_ORDER \
+{ REG_R0, REG_R1, REG_R2, REG_R3, REG_R7, REG_R6, REG_R5, REG_R4, \
+  REG_P2, REG_P1, REG_P0, REG_P5, REG_P4, REG_P3, REG_P6, REG_P7, \
+  REG_A0, REG_A1, \
+  REG_I0, REG_I1, REG_I2, REG_I3, REG_B0, REG_B1, REG_B2, REG_B3, \
+  REG_L0, REG_L1, REG_L2, REG_L3, REG_M0, REG_M1, REG_M2, REG_M3, \
+  REG_RETS, REG_RETI, REG_RETX, REG_RETN, REG_RETE,		  \
+  REG_ASTAT, REG_SEQSTAT, REG_USP, 				  \
+  REG_CC, REG_ARGP,						  \
+  REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1		  \
+}
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union. */
+
+
+enum reg_class
+{
+  NO_REGS,
+  IREGS,
+  BREGS,
+  LREGS,
+  MREGS,
+  CIRCREGS, /* Circular buffering registers, Ix, Bx, Lx together form.  See Automatic Circular Buffering.  */
+  DAGREGS,
+  EVEN_AREGS,
+  ODD_AREGS,
+  AREGS,
+  CCREGS,
+  EVEN_DREGS,
+  ODD_DREGS,
+  D0REGS,
+  D1REGS,
+  D2REGS,
+  D3REGS,
+  D4REGS,
+  D5REGS,
+  D6REGS,
+  D7REGS,
+  DREGS,
+  P0REGS,
+  FDPIC_REGS,
+  FDPIC_FPTR_REGS,
+  PREGS_CLOBBERED,
+  PREGS,
+  IPREGS,
+  DPREGS,
+  MOST_REGS,
+  LT_REGS,
+  LC_REGS,
+  LB_REGS,
+  PROLOGUE_REGS,
+  NON_A_CC_REGS,
+  ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int)LIM_REG_CLASSES)
+
+#define GENERAL_REGS DPREGS
+
+/* Give names of register classes as strings for dump file.   */
+
+#define REG_CLASS_NAMES \
+{  "NO_REGS",		\
+   "IREGS",		\
+   "BREGS",		\
+   "LREGS",		\
+   "MREGS",		\
+   "CIRCREGS",		\
+   "DAGREGS",		\
+   "EVEN_AREGS",	\
+   "ODD_AREGS",		\
+   "AREGS",		\
+   "CCREGS",		\
+   "EVEN_DREGS",	\
+   "ODD_DREGS",		\
+   "D0REGS",		\
+   "D1REGS",		\
+   "D2REGS",		\
+   "D3REGS",		\
+   "D4REGS",		\
+   "D5REGS",		\
+   "D6REGS",		\
+   "D7REGS",		\
+   "DREGS",		\
+   "P0REGS",		\
+   "FDPIC_REGS",	\
+   "FDPIC_FPTR_REGS",	\
+   "PREGS_CLOBBERED",	\
+   "PREGS",		\
+   "IPREGS",		\
+   "DPREGS",		\
+   "MOST_REGS",		\
+   "LT_REGS",		\
+   "LC_REGS",		\
+   "LB_REGS",		\
+   "PROLOGUE_REGS",	\
+   "NON_A_CC_REGS",	\
+   "ALL_REGS" }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not suffice.
+   Then the integers are replaced by sub-initializers, braced groupings
+   containing several integers.  Each sub-initializer must be suitable as an
+   initializer for the type `HARD_REG_SET' which is defined in
+   `hard-reg-set.h'.  */
+
+/* NOTE: DSP registers, IREGS - AREGS, are not GENERAL_REGS.  We use
+   MOST_REGS as the union of DPREGS and DAGREGS.  */
+
+#define REG_CLASS_CONTENTS \
+    /* 31 - 0       63-32   */ \
+{   { 0x00000000,    0 },		/* NO_REGS */	\
+    { 0x000f0000,    0 },		/* IREGS */	\
+    { 0x00f00000,    0 },		/* BREGS */		\
+    { 0x0f000000,    0 },		/* LREGS */	\
+    { 0xf0000000,    0 },		/* MREGS */   \
+    { 0x0fff0000,    0 },		/* CIRCREGS */   \
+    { 0xffff0000,    0 },		/* DAGREGS */   \
+    { 0x00000000,    0x1 },		/* EVEN_AREGS */   \
+    { 0x00000000,    0x2 },		/* ODD_AREGS */   \
+    { 0x00000000,    0x3 },		/* AREGS */   \
+    { 0x00000000,    0x4 },		/* CCREGS */  \
+    { 0x00000055,    0 },		/* EVEN_DREGS */   \
+    { 0x000000aa,    0 },		/* ODD_DREGS */   \
+    { 0x00000001,    0 },		/* D0REGS */   \
+    { 0x00000002,    0 },		/* D1REGS */   \
+    { 0x00000004,    0 },		/* D2REGS */   \
+    { 0x00000008,    0 },		/* D3REGS */   \
+    { 0x00000010,    0 },		/* D4REGS */   \
+    { 0x00000020,    0 },		/* D5REGS */   \
+    { 0x00000040,    0 },		/* D6REGS */   \
+    { 0x00000080,    0 },		/* D7REGS */   \
+    { 0x000000ff,    0 },		/* DREGS */   \
+    { 0x00000100,    0x000 },		/* P0REGS */   \
+    { 0x00000800,    0x000 },		/* FDPIC_REGS */   \
+    { 0x00000200,    0x000 },		/* FDPIC_FPTR_REGS */   \
+    { 0x00004700,    0x800 },		/* PREGS_CLOBBERED */   \
+    { 0x0000ff00,    0x800 },		/* PREGS */   \
+    { 0x000fff00,    0x800 },		/* IPREGS */	\
+    { 0x0000ffff,    0x800 },		/* DPREGS */   \
+    { 0xffffffff,    0x800 },		/* MOST_REGS */\
+    { 0x00000000,    0x3000 },		/* LT_REGS */\
+    { 0x00000000,    0xc000 },		/* LC_REGS */\
+    { 0x00000000,    0x30000 },		/* LB_REGS */\
+    { 0x00000000,    0x3f7f8 },		/* PROLOGUE_REGS */\
+    { 0xffffffff,    0x3fff8 },		/* NON_A_CC_REGS */\
+    { 0xffffffff,    0x3ffff }}		/* ALL_REGS */
+
+#define IREG_POSSIBLE_P(OUTER)				     \
+  ((OUTER) == POST_INC || (OUTER) == PRE_INC		     \
+   || (OUTER) == POST_DEC || (OUTER) == PRE_DEC		     \
+   || (OUTER) == MEM || (OUTER) == ADDRESS)
+
+#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OUTER, INDEX)	\
+  ((MODE) == HImode && IREG_POSSIBLE_P (OUTER) ? IPREGS : PREGS)
+
+#define INDEX_REG_CLASS         PREGS
+
+#define REGNO_OK_FOR_BASE_STRICT_P(X, MODE, OUTER, INDEX)	\
+  (P_REGNO_P (X) || (X) == REG_ARGP				\
+   || (IREG_POSSIBLE_P (OUTER) && (MODE) == HImode		\
+       && I_REGNO_P (X)))
+
+#define REGNO_OK_FOR_BASE_NONSTRICT_P(X, MODE, OUTER, INDEX)	\
+  ((X) >= FIRST_PSEUDO_REGISTER					\
+   || REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX))
+
+#ifdef REG_OK_STRICT
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, AS, OUTER, INDEX) \
+  REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX)
+#else
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, AS, OUTER, INDEX) \
+  REGNO_OK_FOR_BASE_NONSTRICT_P (X, MODE, OUTER, INDEX)
+#endif
+
+#define REGNO_OK_FOR_INDEX_P(X)   0
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) \
+((REGNO) == REG_R0 ? D0REGS				\
+ : (REGNO) == REG_R1 ? D1REGS				\
+ : (REGNO) == REG_R2 ? D2REGS				\
+ : (REGNO) == REG_R3 ? D3REGS				\
+ : (REGNO) == REG_R4 ? D4REGS				\
+ : (REGNO) == REG_R5 ? D5REGS				\
+ : (REGNO) == REG_R6 ? D6REGS				\
+ : (REGNO) == REG_R7 ? D7REGS				\
+ : (REGNO) == REG_P0 ? P0REGS				\
+ : (REGNO) < REG_I0 ? PREGS				\
+ : (REGNO) == REG_ARGP ? PREGS				\
+ : (REGNO) >= REG_I0 && (REGNO) <= REG_I3 ? IREGS	\
+ : (REGNO) >= REG_L0 && (REGNO) <= REG_L3 ? LREGS	\
+ : (REGNO) >= REG_B0 && (REGNO) <= REG_B3 ? BREGS	\
+ : (REGNO) >= REG_M0 && (REGNO) <= REG_M3 ? MREGS	\
+ : (REGNO) == REG_A0 || (REGNO) == REG_A1 ? AREGS	\
+ : (REGNO) == REG_LT0 || (REGNO) == REG_LT1 ? LT_REGS	\
+ : (REGNO) == REG_LC0 || (REGNO) == REG_LC1 ? LC_REGS	\
+ : (REGNO) == REG_LB0 || (REGNO) == REG_LB1 ? LB_REGS	\
+ : (REGNO) == REG_CC ? CCREGS				\
+ : (REGNO) >= REG_RETS ? PROLOGUE_REGS			\
+ : NO_REGS)
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Do not allow to store a value in REG_CC for any mode */
+/* Do not allow to store value in pregs if mode is not SI*/
+#define HARD_REGNO_MODE_OK(REGNO, MODE) hard_regno_mode_ok((REGNO), (MODE))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((MODE) == V2PDImode && (CLASS) == AREGS ? 2				\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((MODE) == PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 1	\
+   : (MODE) == V2PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 2 \
+   : CLASS_MAX_NREGS (GENERAL_REGS, MODE))
+
+/* A C expression that is nonzero if hard register TO can be
+   considered for use as a rename register for FROM register */
+#define HARD_REGNO_RENAME_OK(FROM, TO) bfin_hard_regno_rename_ok (FROM, TO)
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero. */
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+ ((MODE1) == (MODE2)					\
+  || ((GET_MODE_CLASS (MODE1) == MODE_INT		\
+       || GET_MODE_CLASS (MODE1) == MODE_FLOAT)		\
+      && (GET_MODE_CLASS (MODE2) == MODE_INT		\
+	  || GET_MODE_CLASS (MODE2) == MODE_FLOAT)	\
+      && (MODE1) != BImode && (MODE2) != BImode		\
+      && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+      && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD))
+
+/* `PREFERRED_RELOAD_CLASS (X, CLASS)'
+   A C expression that places additional restrictions on the register
+   class to use when it is necessary to copy value X into a register
+   in class CLASS.  The value is a register class; perhaps CLASS, or
+   perhaps another, smaller class.  */
+#define PREFERRED_RELOAD_CLASS(X, CLASS)		\
+  (GET_CODE (X) == POST_INC				\
+   || GET_CODE (X) == POST_DEC				\
+   || GET_CODE (X) == PRE_DEC ? PREGS : (CLASS))
+
+/* Function Calling Conventions. */
+
+/* The type of the current function; normal functions are of type
+   SUBROUTINE.  */
+typedef enum {
+  SUBROUTINE, INTERRUPT_HANDLER, EXCPT_HANDLER, NMI_HANDLER
+} e_funkind;
+#define FUNCTION_RETURN_REGISTERS { REG_RETS, REG_RETI, REG_RETX, REG_RETN }
+
+#define FUNCTION_ARG_REGISTERS { REG_R0, REG_R1, REG_R2, -1 }
+
+/* Flags for the call/call_value rtl operations set up by function_arg */
+#define CALL_NORMAL		0x00000000	/* no special processing */
+#define CALL_LONG		0x00000001	/* always call indirect */
+#define CALL_SHORT		0x00000002	/* always call by symbol */
+
+typedef struct {
+  int words;			/* # words passed so far */
+  int nregs;			/* # registers available for passing */
+  int *arg_regs;		/* array of register -1 terminated */
+  int call_cookie;		/* Do special things for this call */
+} CUMULATIVE_ARGS;
+
+#define FUNCTION_ARG_REGNO_P(REGNO) function_arg_regno_p (REGNO)
+
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT, N_NAMED_ARGS)	\
+  (init_cumulative_args (&CUM, FNTYPE, LIBNAME))
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.
+*/
+
+#define VALUE_REGNO(MODE) (REG_R0)
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)		\
+  gen_rtx_REG (TYPE_MODE (VALTYPE),		\
+	       VALUE_REGNO(TYPE_MODE(VALTYPE)))
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, VALUE_REGNO(MODE))
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == REG_R0)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Before the prologue, the return address is in the RETS register.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, REG_RETS)
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) bfin_return_addr_rtx (COUNT)
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (REG_RETS)
+
+/* Call instructions don't modify the stack pointer on the Blackfin.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) < 2 ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, REG_P2)
+#define EH_RETURN_HANDLER_RTX \
+  gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, \
+				       UNITS_PER_WORD))
+
+/* Addressing Modes */
+
+/*   A number, the maximum number of registers that can appear in a
+     valid memory address.  Note that it is up to you to specify a
+     value equal to the maximum number that `TARGET_LEGITIMATE_ADDRESS_P'
+     would ever accept. */
+#define MAX_REGS_PER_ADDRESS 1
+
+#define LEGITIMATE_MODE_FOR_AUTOINC_P(MODE) \
+      (GET_MODE_SIZE (MODE) <= 4 || (MODE) == PDImode)
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_DECREMENT  1
+
+/* `LEGITIMATE_PIC_OPERAND_P (X)'
+     A C expression that is nonzero if X is a legitimate immediate
+     operand on the target machine when generating position independent
+     code.  You can assume that X satisfies `CONSTANT_P', so you need
+     not check this.  You can also assume FLAG_PIC is true, so you need
+     not check it either.  You need not define this macro if all
+     constants (including `SYMBOL_REF') can be immediate operands when
+     generating position independent code. */
+#define LEGITIMATE_PIC_OPERAND_P(X) ! SYMBOLIC_CONST (X)
+
+#define SYMBOLIC_CONST(X)	\
+(GET_CODE (X) == SYMBOL_REF						\
+ || GET_CODE (X) == LABEL_REF						\
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+#define NOTICE_UPDATE_CC(EXPR, INSN) 0
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX UNITS_PER_WORD
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.  */
+
+#define MOVE_RATIO(speed) 5
+
+/* STORAGE LAYOUT: target machine storage layout
+   Define this macro as a C expression which is nonzero if accessing
+   less than a word of memory (i.e. a `char' or a `short') is no
+   faster than accessing a word of memory, i.e., if such access
+   require more than one instruction or if there is no difference in
+   cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by
+   finding the smallest containing object; when it is defined, a
+   fullword load will be used if alignment permits.  Unless bytes
+   accesses are faster than word accesses, using word accesses is
+   preferable since it may eliminate subsequent memory access if
+   subsequent accesses occur to other fields in the same word of the
+   structure, but to different bytes.  */
+#define SLOW_BYTE_ACCESS  0
+#define SLOW_SHORT_ACCESS 0
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields. */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   We can't access bytes but if we could we would in the Big Endian order. */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is numbered. */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width in bits of a "word", which is the contents of a machine register.
+   Note that this is not necessarily the width of data type `int';
+   if using 16-bit ints on a 68000, this would still be 32.
+   But on a machine with 16-bit registers, this would be 16.  */
+#define BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode1' defined below.  */
+#define POINTER_SIZE 32
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* (shell-command "rm c-decl.o stor-layout.o")
+ *  never define PCC_BITFIELD_TYPE_MATTERS
+ *  really cause some alignment problem
+ */
+
+#define UNITS_PER_FLOAT  ((FLOAT_TYPE_SIZE  + BITS_PER_UNIT - 1) / \
+			   BITS_PER_UNIT)
+
+#define UNITS_PER_DOUBLE ((DOUBLE_TYPE_SIZE + BITS_PER_UNIT - 1) / \
+ 			   BITS_PER_UNIT)
+
+
+/* what is the 'type' of size_t */
+#define SIZE_TYPE "long unsigned int"
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+#define FLOAT_TYPE_SIZE BITS_PER_WORD
+#define SHORT_TYPE_SIZE 16 
+#define CHAR_TYPE_SIZE	8
+#define INT_TYPE_SIZE	32
+#define LONG_TYPE_SIZE	32
+#define LONG_LONG_TYPE_SIZE 64 
+
+/* Note: Fix this to depend on target switch. -- lev */
+
+/* Note: Try to implement double and force long double. -- tonyko
+ * #define __DOUBLES_ARE_FLOATS__
+ * #define DOUBLE_TYPE_SIZE FLOAT_TYPE_SIZE
+ * #define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE
+ * #define DOUBLES_ARE_FLOATS 1
+ */
+
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* `PROMOTE_MODE (M, UNSIGNEDP, TYPE)'
+     A macro to update M and UNSIGNEDP when an object whose type is
+     TYPE and which has the specified mode and signedness is to be
+     stored in a register.  This macro is only called when TYPE is a
+     scalar type.
+
+     On most RISC machines, which only have operations that operate on
+     a full register, define this macro to set M to `word_mode' if M is
+     an integer mode narrower than `BITS_PER_WORD'.  In most cases,
+     only integer modes should be widened because wider-precision
+     floating-point operations are usually more expensive than their
+     narrower counterparts.
+
+     For most machines, the macro definition does not change UNSIGNEDP.
+     However, some machines, have instructions that preferentially
+     handle either signed or unsigned quantities of certain modes.  For
+     example, on the DEC Alpha, 32-bit loads from memory and 32-bit add
+     instructions sign-extend the result to 64 bits.  On such machines,
+     set UNSIGNEDP according to which kind of extension is more
+     efficient.
+
+     Do not define this macro if it would never modify M.*/
+
+#define BFIN_PROMOTE_MODE_P(MODE) \
+    (!TARGET_DSP && GET_MODE_CLASS (MODE) == MODE_INT	\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
+  if (BFIN_PROMOTE_MODE_P(MODE))		\
+    {                                           \
+      if (MODE == QImode)                       \
+        UNSIGNEDP = 1;                          \
+      else if (MODE == HImode)                  \
+        UNSIGNEDP = 0;      			\
+      (MODE) = SImode;                          \
+    }
+
+/* Describing Relative Costs of Operations */
+
+/* Do not put function addr into constant pool */
+#define NO_FUNCTION_CSE 1
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+#define JUMP_TABLES_IN_TEXT_SECTION flag_pic
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified. 
+#define WORD_REGISTER_OPERATIONS
+*/
+
+/* Evaluates to true if A and B are mac flags that can be used
+   together in a single multiply insn.  That is the case if they are
+   both the same flag not involving M, or if one is a combination of
+   the other with M.  */
+#define MACFLAGS_MATCH_P(A, B) \
+ ((A) == (B) \
+  || ((A) == MACFLAG_NONE && (B) == MACFLAG_M) \
+  || ((A) == MACFLAG_M && (B) == MACFLAG_NONE) \
+  || ((A) == MACFLAG_IS && (B) == MACFLAG_IS_M) \
+  || ((A) == MACFLAG_IS_M && (B) == MACFLAG_IS))
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#define PRINT_OPERAND(FILE, RTX, CODE)	 print_operand (FILE, RTX, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, RTX) print_address_operand (FILE, RTX)
+
+typedef enum sections {
+    CODE_DIR,
+    DATA_DIR,
+    LAST_SECT_NM
+} SECT_ENUM_T;
+
+typedef enum directives {
+    LONG_CONST_DIR,
+    SHORT_CONST_DIR,
+    BYTE_CONST_DIR,
+    SPACE_DIR,
+    INIT_DIR,
+    LAST_DIR_NM
+} DIR_ENUM_T;
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR)	\
+  ((C) == ';'					\
+   || ((C) == '|' && (STR)[1] == '|'))
+
+#define TEXT_SECTION_ASM_OP ".text;"
+#define DATA_SECTION_ASM_OP ".data;"
+
+#define ASM_APP_ON  ""
+#define ASM_APP_OFF ""
+
+#define ASM_GLOBALIZE_LABEL1(FILE, NAME) \
+  do {  fputs (".global ", FILE);		\
+        assemble_name (FILE, NAME);	        \
+        fputc (';',FILE);			\
+        fputc ('\n',FILE);			\
+      } while (0)
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \
+  do {					\
+    fputs (".type ", FILE);           	\
+    assemble_name (FILE, NAME);         \
+    fputs (", STT_FUNC", FILE);         \
+    fputc (';',FILE);                   \
+    fputc ('\n',FILE);			\
+    ASM_OUTPUT_LABEL(FILE, NAME);	\
+  } while (0)
+
+#define ASM_OUTPUT_LABEL(FILE, NAME)    \
+  do {  assemble_name (FILE, NAME);		\
+        fputs (":\n",FILE);			\
+      } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME) 	\
+    do {  fprintf (FILE, "_%s", NAME); \
+        } while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)    	\
+do { char __buf[256];					\
+     fprintf (FILE, "\t.dd\t");				\
+     ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE);	\
+     assemble_name (FILE, __buf);			\
+     fputc (';', FILE);					\
+     fputc ('\n', FILE);				\
+   } while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+    MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL)
+
+#define MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL)		\
+    do {							\
+	char __buf[256];					\
+	fprintf (FILE, "\t.dd\t");				\
+	ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE);	\
+	assemble_name (FILE, __buf);				\
+	fputs (" - ", FILE);					\
+	ASM_GENERATE_INTERNAL_LABEL (__buf, "L", REL);		\
+	assemble_name (FILE, __buf);				\
+	fputc (';', FILE);					\
+	fputc ('\n', FILE);					\
+    } while (0)
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) 				\
+    do {							\
+      if ((LOG) != 0)						\
+	fprintf (FILE, "\t.align %d\n", 1 << (LOG));		\
+    } while (0)
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)		\
+    do {					\
+	asm_output_skip (FILE, SIZE);		\
+    } while (0)
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) 	\
+do { 						\
+    switch_to_section (data_section);				\
+    if ((SIZE) >= (unsigned int) 4 ) ASM_OUTPUT_ALIGN(FILE,2);	\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);		\
+    ASM_OUTPUT_LABEL (FILE, NAME);				\
+    fprintf (FILE, "%s %ld;\n", ASM_SPACE,			\
+	     (ROUNDED) > (unsigned int) 1 ? (ROUNDED) : 1);	\
+} while (0)
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+     do {						\
+	ASM_GLOBALIZE_LABEL1(FILE,NAME); 		\
+        ASM_OUTPUT_LOCAL (FILE, NAME, SIZE, ROUNDED); } while(0)
+
+#define ASM_COMMENT_START "//"
+
+#define PROFILE_BEFORE_PROLOGUE
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+  do {						\
+    fprintf (FILE, "\t[--SP] = RETS;\n");	\
+    if (TARGET_LONG_CALLS)			\
+      {						\
+	fprintf (FILE, "\tP2.h = __mcount;\n");	\
+	fprintf (FILE, "\tP2.l = __mcount;\n");	\
+	fprintf (FILE, "\tCALL (P2);\n");	\
+      }						\
+    else					\
+      fprintf (FILE, "\tCALL __mcount;\n");	\
+    fprintf (FILE, "\tRETS = [SP++];\n");	\
+  } while(0)
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) fprintf (FILE, "\t[--SP] = %s;\n", reg_names[REGNO])
+#define ASM_OUTPUT_REG_POP(FILE, REGNO)  fprintf (FILE, "\t%s = [SP++];\n", reg_names[REGNO])
+
+extern rtx bfin_cc_rtx, bfin_rets_rtx;
+
+/* This works for GAS and some other assemblers.  */
+#define SET_ASM_OP              ".set "
+
+/* DBX register number for a given compiler register number */
+#define DBX_REGISTER_NUMBER(REGNO)  (REGNO) 
+
+#define SIZE_ASM_OP     "\t.size\t"
+
+extern int splitting_for_sched, splitting_loops;
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) ((CHAR) == '!')
+
+#ifndef TARGET_SUPPORTS_SYNC_CALLS
+#define TARGET_SUPPORTS_SYNC_CALLS 0
+#endif
+
+struct bfin_cpu
+{
+  const char *name;
+  bfin_cpu_t type;
+  int si_revision;
+  unsigned int workarounds;
+};
+
+extern const struct bfin_cpu bfin_cpus[];
+
+#endif /*  _BFIN_CONFIG */
diff --git a/gcc-4.9/gcc/config/bfin/bfin.md b/gcc-4.9/gcc/config/bfin/bfin.md
new file mode 100644
index 000000000..f5e64d3ef
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin.md
@@ -0,0 +1,4202 @@
+;;- Machine description for Blackfin for GNU compiler
+;;  Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;  Contributed by Analog Devices.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; operand punctuation marks:
+;
+;     X -- integer value printed as log2
+;     Y -- integer value printed as log2(~value) - for bitclear
+;     h -- print half word register, low part
+;     d -- print half word register, high part
+;     D -- print operand as dregs pairs
+;     w -- print operand as accumulator register word (a0w, a1w)
+;     H -- high part of double mode operand
+;     T -- byte register representation Oct. 02 2001
+
+; constant operand classes
+;
+;     J   2**N       5bit imm scaled
+;     Ks7 -64 .. 63  signed 7bit imm
+;     Ku5 0..31      unsigned 5bit imm
+;     Ks4 -8 .. 7    signed 4bit imm
+;     Ks3 -4 .. 3    signed 3bit imm
+;     Ku3 0 .. 7     unsigned 3bit imm
+;     Pn  0, 1, 2    constants 0, 1 or 2, corresponding to n
+;
+; register operands
+;     d  (r0..r7)
+;     a  (p0..p5,fp,sp)
+;     e  (a0, a1)
+;     b  (i0..i3)
+;     f  (m0..m3)
+;     v  (b0..b3)
+;     c  (i0..i3,m0..m3) CIRCREGS
+;     C  (CC)            CCREGS
+;     t  (lt0,lt1)
+;     k  (lc0,lc1)
+;     u  (lb0,lb1)
+;
+
+;; Define constants for hard registers.
+
+(define_constants
+  [(REG_R0 0)
+   (REG_R1 1)
+   (REG_R2 2)
+   (REG_R3 3)
+   (REG_R4 4)
+   (REG_R5 5)
+   (REG_R6 6)
+   (REG_R7 7)
+
+   (REG_P0 8)
+   (REG_P1 9)
+   (REG_P2 10)
+   (REG_P3 11)
+   (REG_P4 12)
+   (REG_P5 13)
+   (REG_P6 14)
+   (REG_P7 15)
+
+   (REG_SP 14)
+   (REG_FP 15)
+
+   (REG_I0 16)
+   (REG_I1 17)
+   (REG_I2 18)
+   (REG_I3 19)
+
+   (REG_B0 20)
+   (REG_B1 21)
+   (REG_B2 22)
+   (REG_B3 23)
+
+   (REG_L0 24)
+   (REG_L1 25)
+   (REG_L2 26)
+   (REG_L3 27)
+
+   (REG_M0 28)
+   (REG_M1 29)
+   (REG_M2 30)
+   (REG_M3 31)
+
+   (REG_A0 32)
+   (REG_A1 33)
+
+   (REG_CC 34)
+   (REG_RETS 35)
+   (REG_RETI 36)
+   (REG_RETX 37)
+   (REG_RETN 38)
+   (REG_RETE 39)
+
+   (REG_ASTAT 40)
+   (REG_SEQSTAT 41)
+   (REG_USP 42)
+
+   (REG_ARGP 43)
+
+   (REG_LT0 44)
+   (REG_LT1 45)
+   (REG_LC0 46)
+   (REG_LC1 47)
+   (REG_LB0 48)
+   (REG_LB1 49)])
+
+;; Constants used in UNSPECs and UNSPEC_VOLATILEs.
+
+(define_constants
+  [(UNSPEC_CBRANCH_TAKEN 0)
+   (UNSPEC_CBRANCH_NOPS 1)
+   (UNSPEC_RETURN 2)
+   (UNSPEC_MOVE_PIC 3)
+   (UNSPEC_LIBRARY_OFFSET 4)
+   (UNSPEC_PUSH_MULTIPLE 5)
+   ;; Multiply or MAC with extra CONST_INT operand specifying the macflag
+   (UNSPEC_MUL_WITH_FLAG 6)
+   (UNSPEC_MAC_WITH_FLAG 7)
+   (UNSPEC_MOVE_FDPIC 8)
+   (UNSPEC_FUNCDESC_GOT17M4 9)
+   (UNSPEC_LSETUP_END 10)
+   ;; Distinguish a 32-bit version of an insn from a 16-bit version.
+   (UNSPEC_32BIT 11)
+   (UNSPEC_NOP 12)
+   (UNSPEC_ONES 13)
+   (UNSPEC_ATOMIC 14)])
+
+(define_constants
+  [(UNSPEC_VOLATILE_CSYNC 1)
+   (UNSPEC_VOLATILE_SSYNC 2)
+   (UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
+   (UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
+   (UNSPEC_VOLATILE_DUMMY 5)
+   (UNSPEC_VOLATILE_STALL 6)])
+
+(define_constants
+  [(MACFLAG_NONE 0)
+   (MACFLAG_T 1)
+   (MACFLAG_FU 2)
+   (MACFLAG_TFU 3)
+   (MACFLAG_IS 4)
+   (MACFLAG_IU 5)
+   (MACFLAG_W32 6)
+   (MACFLAG_M 7)
+   (MACFLAG_IS_M 8)
+   (MACFLAG_S2RND 9)
+   (MACFLAG_ISS2 10)
+   (MACFLAG_IH 11)])
+
+(define_attr "type"
+  "move,movcc,mvi,mcld,mcst,dsp32,dsp32shiftimm,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall"
+  (const_string "misc"))
+
+(define_attr "addrtype" "32bit,preg,spreg,ireg"
+  (cond [(and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "dp_register_operand" "")
+		   (match_operand 1 "mem_p_address_operand" "")))
+	   (const_string "preg")
+	 (and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "dp_register_operand" "")
+		   (match_operand 1 "mem_spfp_address_operand" "")))
+	   (const_string "spreg")
+	 (and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "dp_register_operand" "")
+		   (match_operand 1 "mem_i_address_operand" "")))
+	   (const_string "ireg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "dp_register_operand" "")
+		   (match_operand 0 "mem_p_address_operand" "")))
+	   (const_string "preg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "dp_register_operand" "")
+		   (match_operand 0 "mem_spfp_address_operand" "")))
+	   (const_string "spreg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "dp_register_operand" "")
+		   (match_operand 0 "mem_i_address_operand" "")))
+	   (const_string "ireg")]
+	(const_string "32bit")))
+
+(define_attr "storereg" "preg,other"
+  (cond [(and (eq_attr "type" "mcst")
+	      (match_operand 1 "p_register_operand" ""))
+	   (const_string "preg")]
+	(const_string "other")))
+
+;; Scheduling definitions
+
+(define_automaton "bfin")
+
+(define_cpu_unit "slot0" "bfin")
+(define_cpu_unit "slot1" "bfin")
+(define_cpu_unit "slot2" "bfin")
+
+;; Three units used to enforce parallel issue restrictions:
+;; only one of the 16-bit slots can use a P register in an address,
+;; and only one them can be a store.
+(define_cpu_unit "store" "bfin")
+(define_cpu_unit "pregs" "bfin")
+
+;; A dummy unit used to delay scheduling of loads after a conditional
+;; branch.
+(define_cpu_unit "load" "bfin")
+
+;; A logical unit used to work around anomaly 05000074.
+(define_cpu_unit "anomaly_05000074" "bfin")
+
+(define_reservation "core" "slot0+slot1+slot2")
+
+(define_insn_reservation "alu" 1
+  (eq_attr "type" "move,movcc,mvi,alu0,shft,brcc,br,call,misc,sync,compare")
+  "core")
+
+(define_insn_reservation "imul" 3
+  (eq_attr "type" "mult")
+  "core*3")
+
+(define_insn_reservation "dsp32" 1
+  (eq_attr "type" "dsp32")
+  "slot0")
+
+(define_insn_reservation "dsp32shiftimm" 1
+  (and (eq_attr "type" "dsp32shiftimm")
+       (not (match_test "ENABLE_WA_05000074")))
+  "slot0")
+
+(define_insn_reservation "dsp32shiftimm_anomaly_05000074" 1
+  (and (eq_attr "type" "dsp32shiftimm")
+       (match_test "ENABLE_WA_05000074"))
+  "slot0+anomaly_05000074")
+
+(define_insn_reservation "load32" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit")))
+  "core+load")
+
+(define_insn_reservation "loadp" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg")))
+  "slot1+pregs+load")
+
+(define_insn_reservation "loadsp" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg")))
+  "slot1+pregs")
+
+(define_insn_reservation "loadi" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg")))
+  "(slot1|slot2)+load")
+
+(define_insn_reservation "store32" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcst") (eq_attr "addrtype" "32bit")))
+  "core")
+
+(define_insn_reservation "storep" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst")
+		 (ior (eq_attr "addrtype" "preg")
+		      (eq_attr "addrtype" "spreg"))))
+       (ior (not (match_test "ENABLE_WA_05000074"))
+	    (eq_attr "storereg" "other")))
+  "slot1+pregs+store")
+
+(define_insn_reservation "storep_anomaly_05000074" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst")
+		 (ior (eq_attr "addrtype" "preg")
+		      (eq_attr "addrtype" "spreg"))))
+       (and (match_test "ENABLE_WA_05000074")
+	    (eq_attr "storereg" "preg")))
+  "slot1+anomaly_05000074+pregs+store")
+
+(define_insn_reservation "storei" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg")))
+       (ior (not (match_test "ENABLE_WA_05000074"))
+	    (eq_attr "storereg" "other")))
+  "(slot1|slot2)+store")
+
+(define_insn_reservation "storei_anomaly_05000074" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg")))
+       (and (match_test "ENABLE_WA_05000074")
+	    (eq_attr "storereg" "preg")))
+  "((slot1+anomaly_05000074)|slot2)+store")
+
+(define_insn_reservation "multi" 2
+  (eq_attr "seq_insns" "multi")
+  "core")
+
+(define_insn_reservation "load_stall1" 1
+  (and (eq_attr "type" "stall")
+       (match_operand 0 "const1_operand" ""))
+  "core+load*2")
+
+(define_insn_reservation "load_stall3" 1
+  (and (eq_attr "type" "stall")
+       (match_operand 0 "const3_operand" ""))
+  "core+load*4")
+
+(absence_set "slot0" "slot1,slot2")
+(absence_set "slot1" "slot2")
+
+;; Make sure genautomata knows about the maximum latency that can be produced
+;; by the adjust_cost function.
+(define_insn_reservation "dummy" 5
+  (eq_attr "type" "dummy")
+  "core")
+
+;; Operand and operator predicates
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;; FRIO branches have been optimized for code density
+;;; this comes at a slight cost of complexity when
+;;; a compiler needs to generate branches in the general
+;;; case.  In order to generate the correct branching
+;;; mechanisms the compiler needs keep track of instruction
+;;; lengths.  The follow table describes how to count instructions
+;;; for the FRIO architecture.
+;;;
+;;; unconditional br are 12-bit imm pcrelative branches *2
+;;; conditional   br are 10-bit imm pcrelative branches *2
+;;; brcc 10-bit:
+;;;   1024 10-bit imm *2 is 2048 (-1024..1022)
+;;; br 12-bit  :
+;;;   4096 12-bit imm *2 is 8192 (-4096..4094)
+;;; NOTE : For brcc we generate instructions such as
+;;;   if cc jmp; jump.[sl] offset
+;;;   offset of jump.[sl] is from the jump instruction but
+;;;     gcc calculates length from the if cc jmp instruction
+;;;     furthermore gcc takes the end address of the branch instruction
+;;;     as (pc) for a forward branch
+;;;     hence our range is (-4094, 4092) instead of (-4096, 4094) for a br
+;;;
+;;; The way the (pc) rtx works in these calculations is somewhat odd;
+;;; for backward branches it's the address of the current instruction,
+;;; for forward branches it's the previously known address of the following
+;;; instruction - we have to take this into account by reducing the range
+;;; for a forward branch.
+
+;; Lengths for type "mvi" insns are always defined by the instructions
+;; themselves.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "mcld")
+         (if_then_else (match_operand 1 "effective_address_32bit_p" "")
+                       (const_int 4) (const_int 2))
+
+	 (eq_attr "type" "mcst")
+	 (if_then_else (match_operand 0 "effective_address_32bit_p" "")
+		       (const_int 4) (const_int 2))
+
+	 (eq_attr "type" "move") (const_int 2)
+
+	 (eq_attr "type" "dsp32") (const_int 4)
+	 (eq_attr "type" "dsp32shiftimm") (const_int 4)
+	 (eq_attr "type" "call")  (const_int 4)
+
+         (eq_attr "type" "br")
+  	 (if_then_else (and
+	                  (le (minus (match_dup 0) (pc)) (const_int 4092))
+	                  (ge (minus (match_dup 0) (pc)) (const_int -4096)))
+        	  (const_int 2)
+                  (const_int 4))
+
+         (eq_attr "type" "brcc")
+	 (cond [(and
+	            (le (minus (match_dup 3) (pc)) (const_int 1020))
+	            (ge (minus (match_dup 3) (pc)) (const_int -1024)))
+		  (const_int 2)
+		(and
+	            (le (minus (match_dup 3) (pc)) (const_int 4092))
+	            (ge (minus (match_dup 3) (pc)) (const_int -4094)))
+		  (const_int 4)]
+	       (const_int 6))
+        ]
+
+	(const_int 2)))
+
+;; Classify the insns into those that are one instruction and those that
+;; are more than one in sequence.
+(define_attr "seq_insns" "single,multi"
+  (const_string "single"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "misc")
+   (set_attr "seq_insns" "multi")
+   (set_attr "length" "4")])
+
+;; Conditional moves
+
+(define_mode_iterator CCMOV [QI HI SI])
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:CCMOV 0 "register_operand" "")
+        (if_then_else:CCMOV (match_operand 1 "comparison_operator" "")
+			    (match_operand:CCMOV 2 "register_operand" "")
+			    (match_operand:CCMOV 3 "register_operand" "")))]
+  ""
+{
+  operands[1] = bfin_gen_compare (operands[1], <MODE>mode);
+})
+
+(define_insn "*mov<mode>cc_insn1"
+  [(set (match_operand:CCMOV 0 "register_operand" "=da,da,da")
+        (if_then_else:CCMOV
+	    (eq:BI (match_operand:BI 3 "register_operand" "C,C,C")
+		(const_int 0))
+	    (match_operand:CCMOV 1 "register_operand" "da,0,da")
+	    (match_operand:CCMOV 2 "register_operand" "0,da,da")))]
+  ""
+  "@
+    if !cc %0 = %1;
+    if cc %0 = %2;
+    if !cc %0 = %1; if cc %0 = %2;"
+  [(set_attr "length" "2,2,4")
+   (set_attr "type" "movcc")
+   (set_attr "seq_insns" "*,*,multi")])
+
+(define_insn "*mov<mode>cc_insn2"
+  [(set (match_operand:CCMOV 0 "register_operand" "=da,da,da")
+        (if_then_else:CCMOV
+	    (ne:BI (match_operand:BI 3 "register_operand" "C,C,C")
+		(const_int 0))
+	    (match_operand:CCMOV 1 "register_operand" "0,da,da")
+	    (match_operand:CCMOV 2 "register_operand" "da,0,da")))]
+  ""
+  "@
+   if !cc %0 = %2;
+   if cc %0 = %1;
+   if cc %0 = %1; if !cc %0 = %2;"
+  [(set_attr "length" "2,2,4")
+   (set_attr "type" "movcc")
+   (set_attr "seq_insns" "*,*,multi")])
+
+;; Insns to load HIGH and LO_SUM
+
+(define_insn "movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(high:SI (match_operand:SI 1 "immediate_operand" "i")))]
+  "reload_completed"
+  "%d0 = %d1;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movstricthi_high"
+  [(set (match_operand:SI 0 "register_operand" "+x")
+	(ior:SI (and:SI (match_dup 0) (const_int 65535))
+		(match_operand:SI 1 "immediate_operand" "i")))]
+  "reload_completed"
+  "%d0 = %d1;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_low"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "immediate_operand" "i")))]
+  "reload_completed"
+  "%h0 = %h2;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_high_pic"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(high:SI (unspec:SI [(match_operand:SI 1 "" "")]
+			    UNSPEC_MOVE_PIC)))]
+  ""
+  "%d0 = %1@GOT_LOW;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_low_pic"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (unspec:SI [(match_operand:SI 2 "" "")]
+			      UNSPEC_MOVE_PIC)))]
+  ""
+  "%h0 = %h2@GOT_HIGH;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+;;; Move instructions
+
+(define_insn_and_split "movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,mx,r")
+	(match_operand:DI 1 "general_operand" "iFx,r,mx"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx lo_half[2], hi_half[2];
+  split_di (operands, 2, lo_half, hi_half);
+
+  if (reg_overlap_mentioned_p (lo_half[0], hi_half[1]))
+    {
+      operands[2] = hi_half[0];
+      operands[3] = hi_half[1];
+      operands[4] = lo_half[0];
+      operands[5] = lo_half[1];
+    }
+  else
+    {
+      operands[2] = lo_half[0];
+      operands[3] = lo_half[1];
+      operands[4] = hi_half[0];
+      operands[5] = hi_half[1];
+    }
+})
+
+(define_insn "movbi"
+  [(set (match_operand:BI 0 "nonimmediate_operand" "=x,x,d,md,C,d,C,P1")
+        (match_operand:BI 1 "general_operand" "x,xKs3,md,d,d,C,P0,P1"))]
+
+  ""
+  "@
+   %0 = %1;
+   %0 = %1 (X);
+   %0 = B %1 (Z)%!
+   B %0 = %1;
+   CC = %1;
+   %0 = CC;
+   CC = R0 < R0;
+   CC = R0 == R0;"
+  [(set_attr "type" "move,mvi,mcld,mcst,compare,compare,compare,compare")
+   (set_attr "length" "2,2,*,*,2,2,2,2")
+   (set_attr "seq_insns" "*,*,*,*,*,*,*,*")])
+
+(define_insn "movpdi"
+  [(set (match_operand:PDI 0 "nonimmediate_operand" "=e,<,e")
+        (match_operand:PDI 1 "general_operand" " e,e,>"))]
+  ""
+  "@
+   %0 = %1;
+   %0 = %x1; %0 = %w1;
+   %w0 = %1; %x0 = %1;"
+  [(set_attr "type" "move,mcst,mcld")
+   (set_attr "seq_insns" "*,multi,multi")])
+
+(define_insn "load_accumulator"
+  [(set (match_operand:PDI 0 "register_operand" "=e")
+        (sign_extend:PDI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = %1;"
+  [(set_attr "type" "move")])
+
+(define_insn_and_split "load_accumulator_pair"
+  [(set (match_operand:V2PDI 0 "register_operand" "=e")
+        (sign_extend:V2PDI (vec_concat:V2SI
+			    (match_operand:SI 1 "register_operand" "d")
+			    (match_operand:SI 2 "register_operand" "d"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (sign_extend:PDI (match_dup 1)))
+   (set (match_dup 4) (sign_extend:PDI (match_dup 2)))]
+{
+  operands[3] = gen_rtx_REG (PDImode, REGNO (operands[0]));
+  operands[4] = gen_rtx_REG (PDImode, REGNO (operands[0]) + 1);
+})
+
+(define_insn "*pushsi_insn"
+  [(set (mem:SI (pre_dec:SI (reg:SI REG_SP)))
+        (match_operand:SI 0 "register_operand" "xy"))]
+  ""
+  "[--SP] = %0;"
+  [(set_attr "type" "mcst")
+   (set_attr "addrtype" "32bit")
+   (set_attr "length" "2")])
+
+(define_insn "*popsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,xy")
+        (mem:SI (post_inc:SI (reg:SI REG_SP))))]
+  ""
+  "%0 = [SP++]%!"
+  [(set_attr "type" "mcld")
+   (set_attr "addrtype" "preg,32bit")
+   (set_attr "length" "2")])
+
+;; The first alternative is used to make reload choose a limited register
+;; class when faced with a movsi_insn that had its input operand replaced
+;; with a PLUS.  We generally require fewer secondary reloads this way.
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x,da,y,da,x,x,x,da,mr")
+	(match_operand:SI 1 "general_operand" "da,x,y,da,xKs7,xKsh,xKuh,ix,mr,da"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+ "@
+   %0 = %1;
+   %0 = %1;
+   %0 = %1;
+   %0 = %1;
+   %0 = %1 (X);
+   %0 = %1 (X);
+   %0 = %1 (Z);
+   #
+   %0 = %1%!
+   %0 = %1%!"
+  [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst")
+   (set_attr "length" "2,2,2,2,2,4,4,*,*,*")])
+
+(define_insn "*movsi_insn32"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(unspec:SI [(match_operand:SI 1 "nonmemory_operand" "d,P0")] UNSPEC_32BIT))]
+  ""
+ "@
+   %0 = ROT %1 BY 0%!
+   %0 = %0 -|- %0%!"
+  [(set_attr "type" "dsp32shiftimm,dsp32")])
+
+(define_split
+  [(set (match_operand:SI 0 "d_register_operand" "")
+	(const_int 0))]
+  "splitting_for_sched && !optimize_size"
+  [(set (match_dup 0) (unspec:SI [(const_int 0)] UNSPEC_32BIT))])
+
+(define_split
+  [(set (match_operand:SI 0 "d_register_operand" "")
+	(match_operand:SI 1 "d_register_operand" ""))]
+  "splitting_for_sched && !optimize_size"
+  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_32BIT))])
+
+(define_insn_and_split "*movv2hi_insn"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "=da,da,d,dm")
+        (match_operand:V2HI 1 "general_operand" "i,di,md,d"))]
+
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "@
+   #
+   %0 = %1;
+   %0 = %1%!
+   %0 = %1%!"
+  "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0) (high:SI (match_dup 2)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 3)))]
+{
+  HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+  intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+
+  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  operands[2] = operands[3] = GEN_INT (trunc_int_for_mode (intval, SImode));
+}
+  [(set_attr "type" "move,move,mcld,mcst")
+   (set_attr "length" "2,2,*,*")])
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=x,da,x,d,mr")
+        (match_operand:HI 1 "general_operand" "x,xKs7,xKsh,mr,d"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+{
+  static const char *templates[] = {
+    "%0 = %1;",
+    "%0 = %1 (X);",
+    "%0 = %1 (X);",
+    "%0 = W %1 (X)%!",
+    "W %0 = %1%!",
+    "%h0 = W %1%!",
+    "W %0 = %h1%!"
+  };
+  int alt = which_alternative;
+  rtx mem = (MEM_P (operands[0]) ? operands[0]
+	     : MEM_P (operands[1]) ? operands[1] : NULL_RTX);
+  if (mem && bfin_dsp_memref_p (mem))
+    alt += 2;
+  return templates[alt];
+}
+  [(set_attr "type" "move,mvi,mvi,mcld,mcst")
+   (set_attr "length" "2,2,4,*,*")])
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=x,da,x,d,mr")
+        (match_operand:QI 1 "general_operand" "x,xKs7,xKsh,mr,d"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "@
+   %0 = %1;
+   %0 = %1 (X);
+   %0 = %1 (X);
+   %0 = B %1 (X)%!
+   B %0 = %1%!"
+  [(set_attr "type" "move,mvi,mvi,mcld,mcst")
+   (set_attr "length" "2,2,4,*,*")])
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,da,mr")
+        (match_operand:SF 1 "general_operand" "x,Fx,mr,da"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "@
+   %0 = %1;
+   #
+   %0 = %1%!
+   %0 = %1%!"
+  [(set_attr "type" "move,*,mcld,mcst")])
+
+(define_insn_and_split "movdf_insn"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=x,mx,r")
+	(match_operand:DF 1 "general_operand" "iFx,r,mx"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx lo_half[2], hi_half[2];
+  split_di (operands, 2, lo_half, hi_half);
+
+  if (reg_overlap_mentioned_p (lo_half[0], hi_half[1]))
+    {
+      operands[2] = hi_half[0];
+      operands[3] = hi_half[1];
+      operands[4] = lo_half[0];
+      operands[5] = lo_half[1];
+    }
+  else
+    {
+      operands[2] = lo_half[0];
+      operands[3] = lo_half[1];
+      operands[4] = hi_half[0];
+      operands[5] = hi_half[1];
+    }
+})
+
+;; Storing halfwords.
+(define_insn "*movsi_insv"
+  [(set (zero_extract:SI (match_operand 0 "register_operand" "+d,x")
+			 (const_int 16)
+			 (const_int 16))
+	(match_operand:SI 1 "nonmemory_operand" "d,n"))]
+  ""
+  "@
+   %d0 = %h1 << 0%!
+   %d0 = %1;"
+  [(set_attr "type" "dsp32shiftimm,mvi")])
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "immediate_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))
+        (match_operand:SI 3 "nonmemory_operand" ""))]
+  ""
+{
+  if (INTVAL (operands[1]) != 16 || INTVAL (operands[2]) != 16)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! register_operand (operands[0], VOIDmode))
+    FAIL;
+})
+
+;; This is the main "hook" for PIC code.  When generating
+;; PIC, movsi is responsible for determining when the source address
+;; needs PIC relocation and appropriately calling legitimize_pic_address
+;; to perform the actual relocation.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (expand_move (operands, SImode))
+    DONE;
+})
+
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "")
+	(match_operand:V2HI 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, V2HImode);")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, DImode);")
+
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+       (match_operand:SF 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, SFmode);")
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+       (match_operand:DF 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, DFmode);")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, HImode);")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  " expand_move (operands, QImode); ")
+
+;; Some define_splits to break up SI/SFmode loads of immediate constants.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "symbolic_or_const_operand" ""))]
+  "reload_completed
+   /* Always split symbolic operands; split integer constants that are
+      too large for a single instruction.  */
+   && (GET_CODE (operands[1]) != CONST_INT
+       || (INTVAL (operands[1]) < -32768
+ 	   || INTVAL (operands[1]) >= 65536
+	   || (INTVAL (operands[1]) >= 32768 && PREG_P (operands[0]))))"
+  [(set (match_dup 0) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))]
+{
+  if (GET_CODE (operands[1]) == CONST_INT
+      && split_load_immediate (operands))
+    DONE;
+  /* ??? Do something about TARGET_LOW_64K.  */
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (high:SI (match_dup 3)))
+   (set (match_dup 2) (lo_sum:SI (match_dup 2) (match_dup 3)))]
+{
+  long values;
+  REAL_VALUE_TYPE value;
+
+  gcc_assert (GET_CODE (operands[1]) == CONST_DOUBLE);
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (value, values);
+
+  operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0]));
+  operands[3] = GEN_INT (trunc_int_for_mode (values, SImode));
+  if (values >= -32768 && values < 65536)
+    {
+      emit_move_insn (operands[2], operands[3]);
+      DONE;
+    }
+  if (split_load_immediate (operands + 2))
+    DONE;
+})
+
+;; Sadly, this can't be a proper named movstrict pattern, since the compiler
+;; expects to be able to use registers for operand 1.
+;; Note that the asm instruction is defined by the manual to take an unsigned
+;; constant, but it doesn't matter to the assembler, and the compiler only
+;; deals with sign-extended constants.  Hence "Ksh".
+(define_insn "movstricthi_1"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+x"))
+	(match_operand:HI 1 "immediate_operand" "Ksh"))]
+  ""
+  "%h0 = %1;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+;; Sign and zero extensions
+
+(define_insn_and_split "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))]
+  ""
+  "@
+   %0 = %h1 (X);
+   %0 = W %h1 (X)%!"
+  "reload_completed && bfin_dsp_memref_p (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (HImode, operands[0]);
+}
+  [(set_attr "type" "alu0,mcld")])
+
+(define_insn_and_split "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))]
+  ""
+  "@
+   %0 = %h1 (Z);
+   %0 = W %h1 (Z)%!"
+  "reload_completed && bfin_dsp_memref_p (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (HImode, operands[0]);
+}
+  [(set_attr "type" "alu0,mcld")])
+
+(define_insn "zero_extendbisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:BI 1 "nonimmediate_operand" "C")))]
+  ""
+  "%0 = %1;"
+  [(set_attr "type" "compare")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d, d")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (X)%!
+   %0 = %T1 (X);"
+  [(set_attr "type" "mcld,alu0")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (X)%!
+   %0 = %T1 (X);"
+  [(set_attr "type" "mcld,alu0")])
+
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d, d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (Z)%!
+   %0 = %T1 (Z);"
+  [(set_attr "type" "mcld,alu0")])
+
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (Z)%!
+   %0 = %T1 (Z);"
+  [(set_attr "type" "mcld,alu0")])
+
+;; DImode logical operations
+
+(define_code_iterator any_logical [and ior xor])
+(define_code_attr optab [(and "and")
+			 (ior "ior")
+			 (xor "xor")])
+(define_code_attr op [(and "&")
+		      (ior "|")
+		      (xor "^")])
+(define_code_attr high_result [(and "0")
+			       (ior "%H1")
+			       (xor "%H1")])
+
+;; Keep this pattern around to avoid generating NO_CONFLICT blocks.
+(define_expand "<optab>di3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (any_logical:DI (match_operand:DI 1 "register_operand" "0")
+			(match_operand:DI 2 "general_operand" "d")))]
+  ""
+{
+  rtx hi_half[3], lo_half[3];
+  enum insn_code icode = CODE_FOR_<optab>si3;
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      && !reg_overlap_mentioned_p (operands[0], operands[2]))
+    emit_clobber (operands[0]);
+  split_di (operands, 3, lo_half, hi_half);
+  if (!(*insn_data[icode].operand[2].predicate) (lo_half[2], SImode))
+    lo_half[2] = force_reg (SImode, lo_half[2]);
+  emit_insn (GEN_FCN (icode) (lo_half[0], lo_half[1], lo_half[2]));
+  if (!(*insn_data[icode].operand[2].predicate) (hi_half[2], SImode))
+    hi_half[2] = force_reg (SImode, hi_half[2]);
+  emit_insn (GEN_FCN (icode) (hi_half[0], hi_half[1], hi_half[2]));
+  DONE;
+})
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (zero_extend:DI (match_operand:QI 1 "register_operand" "d")))]
+  ""
+  "%0 = %T1 (Z);\\n\\t%H0 = 0;"
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (zero_extend:DI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = %h1 (Z);\\n\\t%H0 = 0;"
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))]
+{
+  split_di (operands, 1, operands + 2, operands + 3);
+  if (REGNO (operands[0]) != REGNO (operands[1]))
+    emit_move_insn (operands[2], operands[1]);
+})
+
+(define_insn_and_split "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:QI 1 "register_operand" "d")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))]
+{
+  split_di (operands, 1, operands + 2, operands + 3);
+})
+
+(define_insn_and_split "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))]
+{
+  split_di (operands, 1, operands + 2, operands + 3);
+})
+
+;; DImode arithmetic operations
+
+(define_insn "add_with_carry"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (plus:SI (match_operand:SI 1 "register_operand" "%0,d")
+                 (match_operand:SI 2 "nonmemory_operand" "Ks7,d")))
+   (set (match_operand:BI 3 "register_operand" "=C,C")
+	(ltu:BI (not:SI (match_dup 1)) (match_dup 2)))]
+  ""
+  "@
+   %0 += %2; cc = ac0;
+   %0 = %1 + %2; cc = ac0;"
+  [(set_attr "type" "alu0")
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn "sub_with_carry"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (minus:SI (match_operand:SI 1 "register_operand" "%d")
+		  (match_operand:SI 2 "nonmemory_operand" "d")))
+   (set (match_operand:BI 3 "register_operand" "=C")
+	(leu:BI (match_dup 2) (match_dup 1)))]
+  ""
+  "%0 = %1 - %2; cc = ac0;"
+  [(set_attr "type" "alu0")
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (plus:DI (match_operand:DI 1 "register_operand" "")
+                 (match_operand:DI 2 "nonmemory_operand" "")))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (reg:CC 34))]
+  ""
+{
+  rtx xops[8];
+  xops[0] = gen_lowpart (SImode, operands[0]);
+  xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  xops[2] = gen_lowpart (SImode, operands[1]);
+  xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+  xops[4] = gen_lowpart (SImode, operands[2]);
+  xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+  xops[6] = gen_reg_rtx (SImode);
+  xops[7] = gen_rtx_REG (BImode, REG_CC);
+  if (!register_operand (xops[4], SImode)
+      && (GET_CODE (xops[4]) != CONST_INT
+          || !satisfies_constraint_Ks7 (xops[4])))
+    xops[4] = force_reg (SImode, xops[4]);
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      && !reg_overlap_mentioned_p (operands[0], operands[2]))
+    emit_clobber (operands[0]);
+  emit_insn (gen_add_with_carry (xops[0], xops[2], xops[4], xops[7]));
+  emit_insn (gen_movbisi (xops[6], xops[7]));
+  if (!register_operand (xops[5], SImode)
+      && (GET_CODE (xops[5]) != CONST_INT
+          || !satisfies_constraint_Ks7 (xops[5])))
+    xops[5] = force_reg (SImode, xops[5]);
+  if (xops[5] != const0_rtx)
+    emit_insn (gen_addsi3 (xops[1], xops[3], xops[5]));
+  else
+    emit_move_insn (xops[1], xops[3]);
+  emit_insn (gen_addsi3 (xops[1], xops[1], xops[6]));
+  DONE;
+})
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 34))]
+  ""
+{
+  rtx xops[8];
+  xops[0] = gen_lowpart (SImode, operands[0]);
+  xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  xops[2] = gen_lowpart (SImode, operands[1]);
+  xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+  xops[4] = gen_lowpart (SImode, operands[2]);
+  xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+  xops[6] = gen_reg_rtx (SImode);
+  xops[7] = gen_rtx_REG (BImode, REG_CC);
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      && !reg_overlap_mentioned_p (operands[0], operands[2]))
+    emit_clobber (operands[0]);
+  emit_insn (gen_sub_with_carry (xops[0], xops[2], xops[4], xops[7]));
+  emit_insn (gen_notbi (xops[7], xops[7]));
+  emit_insn (gen_movbisi (xops[6], xops[7]));
+  emit_insn (gen_subsi3 (xops[1], xops[3], xops[5]));
+  emit_insn (gen_subsi3 (xops[1], xops[1], xops[6]));
+  DONE;
+})
+
+;; Combined shift/add instructions
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a,d")
+	(ashift:SI (plus:SI (match_operand:SI 1 "register_operand" "%0,0")
+		            (match_operand:SI 2 "register_operand" "a,d"))
+		   (match_operand:SI 3 "pos_scale_operand" "P1P2,P1P2")))]
+  ""
+  "%0 = (%0 + %2) << %3;" /* "shadd %0,%2,%3;" */
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (match_operand:SI 1 "register_operand" "a")
+		 (mult:SI (match_operand:SI 2 "register_operand" "a")
+			  (match_operand:SI 3 "scale_by_operand" "i"))))]
+  ""
+  "%0 = %1 + (%2 << %X3);"
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (match_operand:SI 1 "register_operand" "a")
+		 (ashift:SI (match_operand:SI 2 "register_operand" "a")
+			    (match_operand:SI 3 "pos_scale_operand" "i"))))]
+  ""
+  "%0 = %1 + (%2 << %3);"
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "a")
+			  (match_operand:SI 2 "scale_by_operand" "i"))
+		 (match_operand:SI 3 "register_operand" "a")))]
+  ""
+  "%0 = %3 + (%1 << %X2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "a")
+			    (match_operand:SI 2 "pos_scale_operand" "i"))
+		 (match_operand:SI 3 "register_operand" "a")))]
+  ""
+  "%0 = %3 + (%1 << %2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%d"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "d"))))]
+  ""
+  "%0 = %h1 * %h2 (IS)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%d"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "d"))))]
+  ""
+  "%0 = %h1 * %h2 (FU)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "W"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "W"))))]
+  ""
+  "%0 = %h2 * %h1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+;; The alternative involving IREGS requires that the corresponding L register
+;; is zero.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=ad,a,d,b")
+       (plus:SI (match_operand:SI 1 "register_operand" "%0, a,d,0")
+                (match_operand:SI 2 "reg_or_7bit_operand" "Ks7, a,d,fP2P4")))]
+  ""
+  "@
+   %0 += %2;
+   %0 = %1 + %2;
+   %0 = %1 + %2;
+   %0 += %2;"
+  [(set_attr "type" "alu0")
+   (set_attr "length" "2,2,2,2")])
+
+(define_insn "ssaddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_plus:SI (match_operand:SI 1 "register_operand" "d")
+		    (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 + %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=da,d,a")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,d,0")
+		  (match_operand:SI 2 "reg_or_neg7bit_operand" "KN7,d,a")))]
+  ""
+{
+  static const char *const strings_subsi3[] = {
+    "%0 += -%2;",
+    "%0 = %1 - %2;",
+    "%0 -= %2;",
+  };
+
+  if (CONSTANT_P (operands[2]) && INTVAL (operands[2]) < 0) {
+     rtx tmp_op = operands[2];
+     operands[2] = GEN_INT (-INTVAL (operands[2]));
+     output_asm_insn ("%0 += %2;", operands);
+     operands[2] = tmp_op;
+     return "";
+  }
+
+  return strings_subsi3[which_alternative];
+}
+  [(set_attr "type" "alu0")])
+
+(define_insn "sssubsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_minus:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 - %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Accumulator addition
+
+(define_insn "addpdi3"
+  [(set (match_operand:PDI 0 "register_operand" "=A")
+        (ss_plus:PDI (match_operand:PDI 1 "register_operand" "%0")
+		     (match_operand:PDI 2 "nonmemory_operand" "B")))]
+  ""
+  "A0 += A1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sum_of_accumulators"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_truncate:SI
+	 (ss_plus:PDI (match_operand:PDI 2 "register_operand" "1")
+		      (match_operand:PDI 3 "register_operand" "B"))))
+   (set (match_operand:PDI 1 "register_operand" "=A")
+	 (ss_plus:PDI (match_dup 2) (match_dup 3)))]
+  ""
+  "%0 = (A0 += A1)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "us_truncpdisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,W")
+	(us_truncate:SI (match_operand:PDI 1 "register_operand" "A,B")))]
+  ""
+  "%0 = %1 (FU)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Bit test instructions
+
+(define_insn "*not_bittst"
+ [(set (match_operand:BI 0 "register_operand" "=C")
+       (eq:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			       (const_int 1)
+			       (match_operand:SI 2 "immediate_operand" "Ku5"))
+	      (const_int 0)))]
+ ""
+ "cc = !BITTST (%1,%2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn "*bittst"
+ [(set (match_operand:BI 0 "register_operand" "=C")
+       (ne:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			       (const_int 1)
+			       (match_operand:SI 2 "immediate_operand" "Ku5"))
+		(const_int 0)))]
+ ""
+ "cc = BITTST (%1,%2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn_and_split "*bit_extract"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" "Ku5")))
+   (clobber (reg:BI REG_CC))]
+  ""
+  "#"
+  ""
+  [(set (reg:BI REG_CC)
+	(ne:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2))
+	       (const_int 0)))
+   (set (match_dup 0)
+	(ne:SI (reg:BI REG_CC) (const_int 0)))])
+
+(define_insn_and_split "*not_bit_extract"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extract:SI (not:SI (match_operand:SI 1 "register_operand" "d"))
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" "Ku5")))
+   (clobber (reg:BI REG_CC))]
+  ""
+  "#"
+  ""
+  [(set (reg:BI REG_CC)
+	(eq:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2))
+	       (const_int 0)))
+   (set (match_dup 0)
+	(ne:SI (reg:BI REG_CC) (const_int 0)))])
+
+(define_insn "*andsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,d,d,d")
+		(match_operand:SI 2 "rhs_andsi3_operand" "L,M1,M2,d")))]
+  ""
+  "@
+   BITCLR (%0,%Y2);
+   %0 = %T1 (Z);
+   %0 = %h1 (Z);
+   %0 = %1 & %2;"
+  [(set_attr "type" "alu0")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (highbits_operand (operands[2], SImode))
+    {
+      operands[2] = GEN_INT (exact_log2 (-INTVAL (operands[2])));
+      emit_insn (gen_ashrsi3 (operands[0], operands[1], operands[2]));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+      DONE;
+    }
+  if (! rhs_andsi3_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,d")
+		(match_operand:SI 2 "regorlog2_operand" "J,d")))]
+  ""
+  "@
+   BITSET (%0, %X2);
+   %0 = %1 | %2;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,d")
+		  (match_operand:SI 2 "regorlog2_operand" "J,d")))]
+  ""
+  "@
+   BITTGL (%0, %X2);
+   %0 = %1 ^ %2;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "ones"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(unspec:HI [(match_operand:SI 1 "register_operand" "d")]
+		UNSPEC_ONES))]
+  ""
+  "%h0 = ONES %1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(smax:SI (match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = max(%1,%2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(smin:SI (match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = min(%1,%2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(abs:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = abs %1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssabssi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_abs:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = abs %1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "ssnegsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(not:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = ~%1;"
+  [(set_attr "type" "alu0")])
+
+(define_expand "clrsbsi2"
+  [(set (match_dup 2)
+	(truncate:HI (clrsb:SI (match_operand:SI 1 "register_operand" "d"))))
+   (set (match_operand:SI 0 "register_operand")
+	(zero_extend:SI (match_dup 2)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (HImode);
+})
+
+(define_insn "signbitssi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(truncate:HI (clrsb:SI (match_operand:SI 1 "register_operand" "d"))))]
+  ""
+  "%h0 = signbits %1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssroundsi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(truncate:HI
+	 (lshiftrt:SI (ss_plus:SI (match_operand:SI 1 "register_operand" "d")
+				  (const_int 32768))
+		      (const_int 16))))]
+  ""
+  "%h0 = %1 (RND)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "smaxhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(smax:HI (match_operand:HI 1 "register_operand" "d")
+		 (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%0 = max(%1,%2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sminhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(smin:HI (match_operand:HI 1 "register_operand" "d")
+		 (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%0 = min(%1,%2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "abshi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(abs:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = abs %1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(neg:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "ssneghi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_neg:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "clrsbhi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(clrsb:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%h0 = signbits %h1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 *= %2;"
+  [(set_attr "type" "mult")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (zero_extend:DI
+		      (match_operand:SI 1 "nonimmediate_operand" ""))
+		     (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))
+     (clobber (reg:PDI REG_A0))
+     (clobber (reg:PDI REG_A1))])]
+  ""
+{
+  if (!optimize_size)
+    {
+      rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+      rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+      emit_insn (gen_flag_macinit1hi (a1reg,
+				      gen_lowpart (HImode, operands[1]),
+				      gen_lowpart (HImode, operands[2]),
+				      GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+						     gen_lowpart (V2HImode, operands[1]),
+						     gen_lowpart (V2HImode, operands[2]),
+						     const1_rtx, const1_rtx,
+						     const1_rtx, const0_rtx, a1reg,
+						     const0_rtx, GEN_INT (MACFLAG_FU),
+						     GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_flag_machi_parts_acconly (a1reg,
+					       gen_lowpart (V2HImode, operands[2]),
+					       gen_lowpart (V2HImode, operands[1]),
+					       const1_rtx, const0_rtx,
+					       a1reg, const0_rtx, GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_addpdi3 (a0reg, a0reg, a1reg));
+      emit_insn (gen_us_truncpdisi2 (operands[0], a0reg));
+    }
+  else
+    {
+      rtx umulsi3_highpart_libfunc
+	= init_one_libfunc ("__umulsi3_highpart");
+
+      emit_library_call_value (umulsi3_highpart_libfunc,
+			       operands[0], LCT_NORMAL, SImode,
+			       2, operands[1], SImode, operands[2], SImode);
+    }
+  DONE;
+})
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (sign_extend:DI
+		      (match_operand:SI 1 "nonimmediate_operand" ""))
+		     (sign_extend:DI
+		      (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))
+     (clobber (reg:PDI REG_A0))
+     (clobber (reg:PDI REG_A1))])]
+  ""
+{
+  if (!optimize_size)
+    {
+      rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+      rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+      emit_insn (gen_flag_macinit1hi (a1reg,
+				      gen_lowpart (HImode, operands[1]),
+				      gen_lowpart (HImode, operands[2]),
+				      GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+						     gen_lowpart (V2HImode, operands[1]),
+						     gen_lowpart (V2HImode, operands[2]),
+						     const1_rtx, const1_rtx,
+						     const1_rtx, const0_rtx, a1reg,
+						     const0_rtx, GEN_INT (MACFLAG_IS),
+						     GEN_INT (MACFLAG_IS_M)));
+      emit_insn (gen_flag_machi_parts_acconly (a1reg,
+					       gen_lowpart (V2HImode, operands[2]),
+					       gen_lowpart (V2HImode, operands[1]),
+					       const1_rtx, const0_rtx,
+					       a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M)));
+      emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
+    }
+  else
+    {
+      rtx smulsi3_highpart_libfunc
+	= init_one_libfunc ("__smulsi3_highpart");
+
+      emit_library_call_value (smulsi3_highpart_libfunc,
+			       operands[0], LCT_NORMAL, SImode,
+			       2, operands[1], SImode, operands[2], SImode);
+    }
+  DONE;
+})
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ashift:SI (match_operand:SI 1 "register_operand" "")
+                   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+{
+ if (GET_CODE (operands[2]) == CONST_INT
+     && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+   {
+     emit_insn (gen_movsi (operands[0], const0_rtx));
+     DONE;
+   }
+})
+
+(define_insn_and_split "*ashlsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,a,a,a")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,d,a,a,a")
+		   (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1,P2,?P3P4")))]
+  ""
+  "@
+   %0 <<= %2;
+   %0 = %1 << %2%!
+   %0 = %1 + %1;
+   %0 = %1 << %2;
+   #"
+  "PREG_P (operands[0]) && INTVAL (operands[2]) > 2"
+  [(set (match_dup 0) (ashift:SI (match_dup 1) (const_int 2)))
+   (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))]
+  "operands[3] = GEN_INT (INTVAL (operands[2]) - 2);"
+  [(set_attr "type" "shft,dsp32shiftimm,shft,shft,*")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0,d")
+		     (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5")))]
+  ""
+  "@
+   %0 >>>= %2;
+   %0 = %1 >>> %2%!"
+  [(set_attr "type" "shft,dsp32shiftimm")])
+
+(define_insn "rotl16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotate:SI (match_operand:SI 1 "register_operand" "d")
+		   (const_int 16)))]
+  ""
+  "%0 = PACK (%h1, %d1)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(rotate:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 16)
+    FAIL;
+})
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "const_int_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 16)
+    FAIL;
+  emit_insn (gen_rotl16 (operands[0], operands[1]));
+  DONE;
+})
+
+
+(define_insn "ror_one"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "d") (const_int 1))
+		(ashift:SI (zero_extend:SI (reg:BI REG_CC)) (const_int 31))))
+   (set (reg:BI REG_CC)
+	(zero_extract:BI (match_dup 1) (const_int 1) (const_int 0)))]
+  ""
+  "%0 = ROT %1 BY -1%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_insn "rol_one"
+  [(set (match_operand:SI 0 "register_operand" "+d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") (const_int 1))
+		(zero_extend:SI (reg:BI REG_CC))))
+   (set (reg:BI REG_CC)
+	(zero_extract:BI (match_dup 1) (const_int 31) (const_int 0)))]
+  ""
+  "%0 = ROT %1 BY 1%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "general_operand" "")))]
+  ""
+{
+  rtx lo_half[2], hi_half[2];
+      
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (! rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  emit_move_insn (bfin_cc_rtx, const0_rtx);
+  emit_insn (gen_ror_one (hi_half[0], hi_half[0]));
+  emit_insn (gen_ror_one (lo_half[0], lo_half[0]));
+  DONE;
+})
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "general_operand" "")))]
+  ""
+{
+  rtx lo_half[2], hi_half[2];
+      
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (! rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  emit_insn (gen_compare_lt (gen_rtx_REG (BImode, REG_CC),
+			     hi_half[1], const0_rtx));
+  emit_insn (gen_ror_one (hi_half[0], hi_half[0]));
+  emit_insn (gen_ror_one (lo_half[0], lo_half[0]));
+  DONE;
+})
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand:DI 2 "general_operand" "")))]
+  ""
+{
+  rtx lo_half[2], hi_half[2];
+      
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (! rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  emit_move_insn (bfin_cc_rtx, const0_rtx);
+  emit_insn (gen_rol_one (lo_half[0], lo_half[0]));
+  emit_insn (gen_rol_one (hi_half[0], hi_half[0]));
+  DONE;
+})
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,a")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0,d,a")
+		     (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1P2")))]
+  ""
+  "@
+   %0 >>= %2;
+   %0 = %1 >> %2%!
+   %0 = %1 >> %2;"
+  [(set_attr "type" "shft,dsp32shiftimm,shft")])
+
+(define_insn "lshrpdi3"
+  [(set (match_operand:PDI 0 "register_operand" "=e")
+	(lshiftrt:PDI (match_operand:PDI 1 "register_operand" "0")
+		      (match_operand:SI 2 "nonmemory_operand" "Ku5")))]
+  ""
+  "%0 = %1 >> %2%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_insn "ashrpdi3"
+  [(set (match_operand:PDI 0 "register_operand" "=e")
+	(ashiftrt:PDI (match_operand:PDI 1 "register_operand" "0")
+		      (match_operand:SI 2 "nonmemory_operand" "Ku5")))]
+  ""
+  "%0 = %1 >>> %2%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+;; A pattern to reload the equivalent of
+;;   (set (Dreg) (plus (FP) (large_constant)))
+;; or
+;;   (set (dagreg) (plus (FP) (arbitrary_constant))) 
+;; using a scratch register
+(define_expand "reload_insi"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=w")
+                   (match_operand:SI 1 "fp_plus_const_operand" ""))
+              (clobber (match_operand:SI 2 "register_operand" "=&a"))])]
+  ""
+{
+  rtx fp_op = XEXP (operands[1], 0);
+  rtx const_op = XEXP (operands[1], 1);
+  rtx primary = operands[0];
+  rtx scratch = operands[2];
+
+  emit_move_insn (scratch, const_op);
+  emit_insn (gen_addsi3 (scratch, scratch, fp_op));
+  emit_move_insn (primary, scratch);
+  DONE;
+})
+
+(define_mode_iterator AREG [PDI V2PDI])
+
+(define_insn "reload_in<mode>"
+  [(set (match_operand:AREG 0 "register_operand" "=e")
+	(match_operand:AREG 1 "memory_operand" "m"))
+   (clobber (match_operand:SI 2 "register_operand" "=d"))]
+  ""
+{
+  rtx xops[4];
+  xops[0] = operands[0];
+  xops[1] = operands[2];
+  split_di (operands + 1, 1, xops + 2, xops + 3);
+  output_asm_insn ("%1 = %2;", xops);
+  output_asm_insn ("%w0 = %1;", xops);
+  output_asm_insn ("%1 = %3;", xops);
+  output_asm_insn ("%x0 = %1;", xops);
+  return "";
+}
+ [(set_attr "seq_insns" "multi")
+  (set_attr "type" "mcld")
+  (set_attr "length" "12")])
+
+(define_insn "reload_out<mode>"
+  [(set (match_operand:AREG 0 "memory_operand" "=m")
+	(match_operand:AREG 1 "register_operand" "e"))
+   (clobber (match_operand:SI 2 "register_operand" "=d"))]
+  ""
+{
+  rtx xops[4];
+  xops[0] = operands[1];
+  xops[1] = operands[2];
+  split_di (operands, 1, xops + 2, xops + 3);
+  output_asm_insn ("%1 = %w0;", xops);
+  output_asm_insn ("%2 = %1;", xops);
+  output_asm_insn ("%1 = %x0;", xops);
+  output_asm_insn ("%3 = %1;", xops);
+  return "";
+}
+ [(set_attr "seq_insns" "multi")
+  (set_attr "type" "mcld")
+  (set_attr "length" "12")])
+
+;; Jump instructions
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (get_attr_length (insn) == 2)
+    return "jump.s %0;";
+  else
+    return "jump.l %0;";
+}
+  [(set_attr "type" "br")])
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "a"))]
+  ""
+  "jump (%0);"
+  [(set_attr "type" "misc")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+              (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  /* In PIC mode, the table entries are stored PC relative.
+     Convert the relative address to an absolute address.  */
+  if (flag_pic)
+    {
+      rtx op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+
+      operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
+					 op1, NULL_RTX, 0, OPTAB_DIRECT);
+    }
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jump (%0);"
+  [(set_attr "type" "misc")])
+
+;;  Hardware loop
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 1 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))
+	      (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+	      (clobber (match_dup 2))])] ; match_scratch
+  ""
+{
+  /* The loop optimizer doesn't check the predicates... */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  bfin_hardware_loop ();
+  operands[2] = gen_rtx_SCRATCH (SImode);
+})
+
+(define_insn "loop_end"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0,0")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=a*d,*b*v*f,m")
+	(plus (match_dup 2)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 3 "=X,&r,&r"))]
+  ""
+  "@
+   /* loop end %0 %l1 */
+   #
+   #"
+  [(set_attr "length" "6,10,14")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nondp_reg_or_memory_operand" "")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "memory_operand (operands[0], SImode) || splitting_loops"
+  [(set (match_dup 2) (match_dup 0))
+   (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+   (set (match_dup 0) (match_dup 2))
+   (set (reg:BI REG_CC) (eq:BI (match_dup 2) (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (reg:BI REG_CC)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+(define_insn "lsetup_with_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=u")
+	(label_ref (match_operand 3 "" "")))
+   (set (match_operand:SI 4 "lc_register_operand" "=k")
+	(match_operand:SI 5 "register_operand" "a"))]
+  ""
+  "LSETUP (%1, %3) %4 = %5;"
+  [(set_attr "length" "4")])
+
+(define_insn "lsetup_without_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=u")
+	(label_ref (match_operand 3 "" "")))
+   (use (match_operand:SI 4 "lc_register_operand" "k"))]
+  ""
+  "LSETUP (%1, %3) %4;"
+  [(set_attr "length" "4")])
+
+;;  Call instructions..
+
+;; The explicit MEM inside the UNSPEC prevents the compiler from moving
+;; the load before a branch after a NULL test, or before a store that
+;; initializes a function descriptor.
+
+(define_insn_and_split "load_funcdescsi"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI [(mem:SI (match_operand:SI 1 "address_operand" "p"))]
+			    UNSPEC_VOLATILE_LOAD_FUNCDESC))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (mem:SI (match_dup 1)))])
+
+(define_expand "call"
+  [(parallel [(call (match_operand:SI 0 "" "")
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))])]
+  ""
+{
+  bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand:SI 0 "" "")
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (return)])]
+  ""
+{
+  bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 1);
+  DONE;
+})
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))])]
+  ""
+{
+  bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 0);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (return)])]
+  ""
+{
+  bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 1);
+  DONE;
+})
+
+(define_insn "*call_symbol_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[3]))"
+  "call %0;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_symbol_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[3]))"
+  "jump.l %0;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_symbol_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[4]))"
+  "call %1;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_symbol_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[4]))"
+  "jump.l %1;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_insn_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%0);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_insn_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%0);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+(define_insn "*call_value_insn_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%1);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_value_insn_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%1);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+(define_insn "*call_symbol"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[2]))"
+  "call %0;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_symbol"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[2]))"
+  "jump.l %0;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_symbol"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[3]))"
+  "call %1;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_symbol"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[3]))"
+  "jump.l %1;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_insn"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "a"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%0);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_insn"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "z"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%0);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+(define_insn "*call_value_insn"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "a"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%1);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_value_insn"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "z"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%1);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+;; Block move patterns
+
+;; We cheat.  This copies one more word than operand 2 indicates.
+
+(define_insn "rep_movsi"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+        (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0")
+			  (ashift:SI (match_operand:SI 2 "register_operand" "a")
+				     (const_int 2)))
+		 (const_int 4)))
+   (set (match_operand:SI 1 "register_operand" "=&b")
+        (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1")
+			  (ashift:SI (match_dup 2) (const_int 2)))
+		 (const_int 4)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 2))
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
+  ""
+  "%5 = [%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || [%3++] = %5 || %5 = [%4++]; [%3++] = %5;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn "rep_movhi"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+        (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0")
+			  (ashift:SI (match_operand:SI 2 "register_operand" "a")
+				     (const_int 1)))
+		 (const_int 2)))
+   (set (match_operand:SI 1 "register_operand" "=&b")
+        (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1")
+			  (ashift:SI (match_dup 2) (const_int 1)))
+		 (const_int 2)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 2))
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
+  ""
+  "%h5 = W[%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || W [%3++] = %5 || %h5 = W [%4++]; W [%3++] = %5;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
+
+(define_expand "movmemsi"
+  [(match_operand:BLK 0 "general_operand" "")
+   (match_operand:BLK 1 "general_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  ""
+{
+  if (bfin_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  FAIL;
+})
+
+;; Conditional branch patterns
+;; The Blackfin has only few condition codes: eq, lt, lte, ltu, leu
+
+(define_insn "compare_eq"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (eq:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  ""
+  "cc =%1==%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_ne"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (ne:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  "0"
+  "cc =%1!=%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_lt"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (lt:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  ""
+  "cc =%1<%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_le"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (le:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  ""
+  "cc =%1<=%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_leu"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (leu:BI (match_operand:SI 1 "register_operand" "d,a")
+                (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))]
+  ""
+  "cc =%1<=%2 (iu);"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_ltu"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (ltu:BI (match_operand:SI 1 "register_operand" "d,a")
+                (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))]
+  ""
+  "cc =%1<%2 (iu);"
+  [(set_attr "type" "compare")])
+
+;; Same as above, but and CC with the overflow bit generated by the first
+;; multiplication.
+(define_insn "flag_mul_macv2hi_parts_acconly_andcc0"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d,d,d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 3 "register_operand" "d,d,d")
+		      (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand 10 "const_int_operand" "PB,PA,PA")]
+		    UNSPEC_MUL_WITH_FLAG))
+   (set (match_operand:PDI 1 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_dup 2)
+		      (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_dup 3)
+		      (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand:PDI 8 "register_operand" "1,1,1")
+		     (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1")
+		     (match_operand 11 "const_int_operand" "PA,PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))
+   (set (reg:BI REG_CC)
+	(and:BI (reg:BI REG_CC)
+		(unspec:BI [(vec_select:HI (match_dup 2) (parallel [(match_dup 4)]))
+			    (vec_select:HI (match_dup 3) (parallel [(match_dup 6)]))
+			    (match_dup 10)]
+			   UNSPEC_MUL_WITH_FLAG)))]
+  "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))"
+{
+  rtx xops[6];
+  const char *templates[] = {
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;" };
+  int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1)
+	     + (INTVAL (operands[6]) << 2)  + (INTVAL (operands[7]) << 3));
+  xops[0] = operands[0];
+  xops[1] = operands[1];
+  xops[2] = operands[2];
+  xops[3] = operands[3];
+  xops[4] = operands[9];
+  xops[5] = which_alternative == 0 ? operands[10] : operands[11];
+  output_asm_insn (templates[alt], xops);
+  return "";
+}
+  [(set_attr "type" "misc")
+   (set_attr "length" "6")
+   (set_attr "seq_insns" "multi")])
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(match_operand:SI 1 "register_operand" "")
+                        (match_operand:SI 2 "reg_or_const_int_operand" "")])
+                   (label_ref (match_operand 3 "" ""))
+                   (pc)))]
+  ""
+{
+  rtx bi_compare = bfin_gen_compare (operands[0], SImode);
+  emit_jump_insn (gen_cbranchbi4 (bi_compare, bfin_cc_rtx, CONST0_RTX (BImode),
+				  operands[3]));
+  DONE;
+})
+
+(define_insn "cbranchbi4"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "bfin_bimode_comparison_operator"
+			 [(match_operand:BI 1 "register_operand" "C")
+			  (match_operand:BI 2 "immediate_operand" "P0")])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  ""
+{
+  asm_conditional_branch (insn, operands, 0, 0);
+  return "";
+}
+  [(set_attr "type" "brcc")])
+
+;; Special cbranch patterns to deal with the speculative load problem - see
+;; bfin_reorg for details.
+
+(define_insn "cbranch_predicted_taken"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "bfin_bimode_comparison_operator"
+			 [(match_operand:BI 1 "register_operand" "C")
+			  (match_operand:BI 2 "immediate_operand" "P0")])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (unspec [(const_int 0)] UNSPEC_CBRANCH_TAKEN)]
+  ""
+{
+  asm_conditional_branch (insn, operands, 0, 1);
+  return "";
+}
+  [(set_attr "type" "brcc")])
+
+(define_insn "cbranch_with_nops"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "bfin_bimode_comparison_operator"
+			 [(match_operand:BI 1 "register_operand" "C")
+			  (match_operand:BI 2 "immediate_operand" "P0")])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (unspec [(match_operand 4 "immediate_operand" "")] UNSPEC_CBRANCH_NOPS)]
+  "reload_completed"
+{
+  asm_conditional_branch (insn, operands, INTVAL (operands[4]), 0);
+  return "";
+}
+  [(set_attr "type" "brcc")
+   (set_attr "length" "8")])
+
+;; setcc insns.
+
+(define_expand "cstorebi4"
+  [(set (match_dup 4)
+        (match_operator:BI 1 "bfin_bimode_comparison_operator"
+                       [(match_operand:BI 2 "register_operand" "")
+                        (match_operand:BI 3 "reg_or_const_int_operand" "")]))
+   (set (match_operand:SI 0 "register_operand" "")
+       (ne:SI (match_dup 4) (const_int 0)))]
+  ""
+{
+  /* It could be expanded as a movbisi instruction, but the portable
+     alternative produces better code.  */
+  if (GET_CODE (operands[1]) == NE)
+    FAIL;
+
+  operands[4] = bfin_cc_rtx;
+})
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator"
+                       [(match_operand:SI 2 "register_operand" "")
+                        (match_operand:SI 3 "reg_or_const_int_operand" "")]))]
+  ""
+{
+  rtx bi_compare, test;
+
+  if (!bfin_direct_comparison_operator (operands[1], SImode))
+    {
+      if (!register_operand (operands[3], SImode)
+	  || GET_CODE (operands[1]) == NE)
+	FAIL;
+      test = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
+			     SImode, operands[3], operands[2]);
+    }
+  else
+    test = operands[1];
+
+  bi_compare = bfin_gen_compare (test, SImode);
+  gcc_assert (GET_CODE (bi_compare) == NE);
+  emit_insn (gen_movbisi (operands[0], bfin_cc_rtx));
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop;")
+
+;; A nop which stays there when emitted.
+(define_insn "forced_nop"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  ""
+  "nop;")
+
+(define_insn "mnop"
+  [(unspec [(const_int 0)] UNSPEC_32BIT)]
+  ""
+  "mnop%!"
+  [(set_attr "type" "dsp32")])
+
+;;;;;;;;;;;;;;;;;;;;   CC2dreg   ;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "movsibi"
+  [(set (match_operand:BI 0 "register_operand" "=C")
+	(ne:BI (match_operand:SI 1 "register_operand" "d")
+	       (const_int 0)))]
+  ""
+  "CC = %1;"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "movbisi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI (match_operand:BI 1 "register_operand" "C")
+	       (const_int 0)))]
+  ""
+  "#"
+  ""
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:BI 1 "register_operand" "")))]
+  "")
+
+(define_insn "notbi"
+  [(set (match_operand:BI 0 "register_operand" "=C")
+	(eq:BI (match_operand:BI 1 "register_operand" " 0")
+	       (const_int 0)))]
+  ""
+  "%0 = ! %0;"    /*  NOT CC;"  */
+  [(set_attr "type" "compare")])
+
+;; Vector and DSP insns
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d")
+			   (const_int 24))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "d")
+			     (const_int 8))))]
+  ""
+  "%0 = ALIGN8(%1, %2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d")
+			   (const_int 16))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "d")
+			     (const_int 16))))]
+  ""
+  "%0 = ALIGN16(%1, %2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d")
+			   (const_int 8))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "d")
+			     (const_int 24))))]
+  ""
+  "%0 = ALIGN24(%1, %2)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Prologue and epilogue.
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "bfin_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(const_int 1)]
+  ""
+  "bfin_expand_epilogue (1, 0, 0); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(const_int 1)]
+  ""
+  "bfin_expand_epilogue (0, 0, 1); DONE;")
+
+(define_expand "eh_return"
+  [(use (match_operand:SI 0 "register_operand" ""))]
+  ""
+{
+  emit_insn (gen_eh_store_handler (EH_RETURN_HANDLER_RTX, operands[0]));
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn "eh_store_handler"
+  [(unspec_volatile [(match_operand:SI 1 "register_operand" "da")]
+		    UNSPEC_VOLATILE_STORE_EH_HANDLER)
+   (clobber (match_operand:SI 0 "memory_operand" "=m"))]
+  ""
+  "%0 = %1%!"
+  [(set_attr "type" "mcst")])
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 1)]
+  "bfin_expand_epilogue (1, 1, 0); DONE;")
+
+(define_insn "link"
+  [(set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -4))) (reg:SI REG_RETS))
+   (set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -8))) (reg:SI REG_FP))
+   (set (reg:SI REG_FP)
+	(plus:SI (reg:SI REG_SP) (const_int -8)))
+   (set (reg:SI REG_SP)
+	(plus:SI (reg:SI REG_SP) (match_operand:SI 0 "immediate_operand" "i")))]
+  ""
+  "LINK %Z0;"
+  [(set_attr "length" "4")])
+
+(define_insn "unlink"
+  [(set (reg:SI REG_FP) (mem:SI (reg:SI REG_FP)))
+   (set (reg:SI REG_RETS) (mem:SI (plus:SI (reg:SI REG_FP) (const_int 4))))
+   (set (reg:SI REG_SP) (plus:SI (reg:SI REG_FP) (const_int 8)))]
+  ""
+  "UNLINK;"
+  [(set_attr "length" "4")])
+
+;; This pattern is slightly clumsy.  The stack adjust must be the final SET in
+;; the pattern, otherwise dwarf2out becomes very confused about which reg goes
+;; where on the stack, since it goes through all elements of the parallel in
+;; sequence.
+(define_insn "push_multiple"
+  [(match_parallel 0 "push_multiple_operation"
+    [(unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_PUSH_MULTIPLE)])]
+  ""
+{
+  output_push_multiple (insn, operands);
+  return "";
+})
+
+(define_insn "pop_multiple"
+  [(match_parallel 0 "pop_multiple_operation"
+    [(set (reg:SI REG_SP)
+	  (plus:SI (reg:SI REG_SP) (match_operand:SI 1 "immediate_operand" "i")))])]
+  ""
+{
+  output_pop_multiple (insn, operands);
+  return "";
+})
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand 0 "register_operand" ""))]
+  "reload_completed"
+{
+  switch (REGNO (operands[0]))
+    {
+    case REG_RETX:
+      return "rtx;";
+    case REG_RETN:
+      return "rtn;";
+    case REG_RETI:
+      return "rti;";
+    case REG_RETS:
+      return "rts;";
+    }
+  gcc_unreachable ();
+})
+
+;; When used at a location where CC contains 1, causes a speculative load
+;; that is later cancelled.  This is used for certain workarounds in
+;; interrupt handler prologues.
+(define_insn "dummy_load"
+  [(unspec_volatile [(match_operand 0 "register_operand" "a")
+		     (match_operand 1 "register_operand" "C")]
+		    UNSPEC_VOLATILE_DUMMY)]
+  ""
+  "if cc jump 4;\n\tr7 = [%0];"
+ [(set_attr "type" "misc")
+  (set_attr "length" "4")
+  (set_attr "seq_insns" "multi")])
+
+;; A placeholder insn inserted before the final scheduling pass.  It is used
+;; to improve scheduling of loads when workarounds for speculative loads are
+;; needed, by not placing them in the first few cycles after a conditional
+;; branch.
+(define_insn "stall"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")]
+		    UNSPEC_VOLATILE_STALL)]
+  ""
+  ""
+  [(set_attr "type" "stall")])
+
+(define_insn "csync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)]
+  ""
+  "csync;"
+  [(set_attr "type" "sync")])
+
+(define_insn "ssync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_SSYNC)]
+  ""
+  "ssync;"
+  [(set_attr "type" "sync")])
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 3))]
+  ""
+  "excpt 3;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2")])
+
+(define_insn "trapifcc"
+  [(trap_if (reg:BI REG_CC) (const_int 3))]
+  ""
+  "if !cc jump 4 (bp); excpt 3;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+;;; Vector instructions
+
+;; First, all sorts of move variants
+
+(define_insn "movhiv2hi_low"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (match_operand:HI 2 "register_operand" "d")
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h2 << 0%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_insn "movhiv2hi_high"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 0)]))
+	 (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%d0 = %h2 << 0%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+;; No earlyclobber on alternative two since our sequence ought to be safe.
+;; The order of operands is intentional to match the VDSP builtin (high word
+;; is passed first).
+(define_insn_and_split "composev2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(vec_concat:V2HI (match_operand:HI 2 "register_operand" "0,d")
+			 (match_operand:HI 1 "register_operand" "d,d")))]
+  ""
+  "@
+   %d0 = %h1 << 0%!
+   #"
+  "reload_completed"
+  [(set (match_dup 0)
+	(vec_concat:V2HI
+	 (vec_select:HI (match_dup 0) (parallel [(const_int 0)]))
+	 (match_dup 1)))
+   (set (match_dup 0)
+	(vec_concat:V2HI
+	 (match_dup 2)
+	 (vec_select:HI (match_dup 0) (parallel [(const_int 1)]))))]
+  ""
+  [(set_attr "type" "dsp32shiftimm")])
+
+; Like composev2hi, but operating on elements of V2HI vectors.
+; Useful on its own, and as a combiner bridge for the multiply and
+; mac patterns.
+(define_insn "packv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d,d,d,d,d,d,d")
+	(vec_concat:V2HI (vec_select:HI
+			  (match_operand:V2HI 1 "register_operand" "0,0,d,d,d,d,d,d")
+			  (parallel [(match_operand 3 "const01_operand" "P0,P0,P0,P1,P0,P1,P0,P1")]))
+			 (vec_select:HI
+			  (match_operand:V2HI 2 "register_operand" "d,d,0,0,d,d,d,d")
+			  (parallel [(match_operand 4 "const01_operand" "P0,P1,P1,P1,P0,P0,P1,P1")]))))]
+  ""
+  "@
+   %d0 = %h2 << 0%!
+   %d0 = %d2 << 0%!
+   %h0 = %h1 << 0%!
+   %h0 = %d1 << 0%!
+   %0 = PACK (%h2,%h1)%!
+   %0 = PACK (%h2,%d1)%!
+   %0 = PACK (%d2,%h1)%!
+   %0 = PACK (%d2,%d1)%!"
+  [(set_attr "type" "dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32,dsp32,dsp32,dsp32")])
+
+(define_insn "movv2hi_hi"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(vec_select:HI (match_operand:V2HI 1 "register_operand" "0,d,d")
+		       (parallel [(match_operand 2 "const01_operand" "P0,P0,P1")])))]
+  ""
+  "@
+   /* optimized out */
+   %h0 = %h1 << 0%!
+   %h0 = %d1 << 0%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_expand "movv2hi_hi_low"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI (match_operand:V2HI 1 "register_operand" "")
+		       (parallel [(const_int 0)])))]
+  ""
+  "")
+
+(define_expand "movv2hi_hi_high"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI (match_operand:V2HI 1 "register_operand" "")
+		       (parallel [(const_int 1)])))]
+  ""
+  "")
+
+;; Unusual arithmetic operations on 16-bit registers.
+
+(define_code_iterator sp_or_sm [ss_plus ss_minus])
+(define_code_attr spm_string [(ss_plus "+") (ss_minus "-")])
+(define_code_attr spm_name [(ss_plus "add") (ss_minus "sub")])
+
+(define_insn "ss<spm_name>hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sp_or_sm:HI (match_operand:HI 1 "register_operand" "d")
+		    (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%h0 = %h1 <spm_string>  %h2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ss<spm_name>hi3_parts"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sp_or_sm:HI (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" "d")
+		      (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1")]))))]
+   ""
+{
+  const char *templates[] = {
+    "%h0 = %h1 <spm_string> %h2 (S)%!",
+    "%h0 = %d1 <spm_string> %h2 (S)%!",
+    "%h0 = %h1 <spm_string> %d2 (S)%!",
+    "%h0 = %d1 <spm_string> %d2 (S)%!" };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ss<spm_name>hi3_low_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 0)]))
+	 (sp_or_sm:HI (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand" "d")
+		       (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		      (vec_select:HI
+		       (match_operand:V2HI 3 "register_operand" "d")
+		       (parallel [(match_operand 5 "const01_operand" "P0P1")])))))]
+   ""
+{
+  const char *templates[] = {
+    "%h0 = %h2 <spm_string> %h3 (S)%!",
+    "%h0 = %d2 <spm_string> %h3 (S)%!",
+    "%h0 = %h2 <spm_string> %d3 (S)%!",
+    "%h0 = %d2 <spm_string> %d3 (S)%!" };
+  int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ss<spm_name>hi3_high_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (sp_or_sm:HI (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand" "d")
+		       (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		      (vec_select:HI
+		       (match_operand:V2HI 3 "register_operand" "d")
+		       (parallel [(match_operand 5 "const01_operand" "P0P1")])))
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 1)]))))]
+   ""
+{
+  const char *templates[] = {
+    "%d0 = %h2 <spm_string> %h3 (S)%!",
+    "%d0 = %d2 <spm_string> %h3 (S)%!",
+    "%d0 = %h2 <spm_string> %d3 (S)%!",
+    "%d0 = %d2 <spm_string> %d3 (S)%!" };
+  int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; V2HI vector insns
+
+(define_insn "addv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(plus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 +|+ %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_plus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 +|+ %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(minus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 -|- %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_minus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		       (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 -|- %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "addsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)]))
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))
+	 (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 +|- %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subaddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				  (parallel [(const_int 0)]))
+		   (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				  (parallel [(const_int 0)])))
+	 (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 -|+ %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				    (parallel [(const_int 0)]))
+		     (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				    (parallel [(const_int 0)])))
+	 (ss_minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 +|- %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssubaddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				     (parallel [(const_int 0)]))
+		      (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				     (parallel [(const_int 0)])))
+	 (ss_plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		     (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 -|+ %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sublohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 1)]))
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 - %h2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)]))
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 - %d2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssublohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				    (parallel [(const_int 1)]))
+		     (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				    (parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 - %h2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssubhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				    (parallel [(const_int 0)]))
+		     (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				    (parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 - %d2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "addlohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				(parallel [(const_int 1)]))
+		 (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				(parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 + %h2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "addhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				(parallel [(const_int 0)]))
+		 (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				(parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 + %d2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddlohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				   (parallel [(const_int 1)]))
+		    (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				   (parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 + %h2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				   (parallel [(const_int 0)]))
+		    (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				   (parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 + %d2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sminv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(smin:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = MIN (%1, %2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "smaxv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(smax:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = MAX (%1, %2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Multiplications.
+
+;; The Blackfin allows a lot of different options, and we need many patterns to
+;; cover most of the hardware's abilities.
+;; There are a few simple patterns using MULT rtx codes, but most of them use
+;; an unspec with a const_int operand that determines which flag to use in the
+;; instruction.
+;; There are variants for single and parallel multiplications.
+;; There are variants which just use 16-bit lowparts as inputs, and variants
+;; which allow the user to choose just which halves to use as input values.
+;; There are variants which set D registers, variants which set accumulators,
+;; variants which set both, some of them optionally using the accumulators as
+;; inputs for multiply-accumulate operations.
+
+(define_insn "flag_mulhi"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "d")
+		    (match_operand:HI 2 "register_operand" "d")
+		    (match_operand 3 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+  "%h0 = %h1 * %h2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulhi_parts"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(unspec:HI [(vec_select:HI
+		     (match_operand:V2HI 1 "register_operand" "d")
+		     (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		    (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand" "d")
+		     (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		    (match_operand 5 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = %h1 * %h2 %M5%!",
+    "%h0 = %d1 * %h2 %M5%!",
+    "%h0 = %h1 * %d2 %M5%!",
+    "%h0 = %d1 * %d2 %M5%!" };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulhisi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:HI 1 "register_operand" "d")
+		    (match_operand:HI 2 "register_operand" "d")
+		    (match_operand 3 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+  "%0 = %h1 * %h2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulhisi_parts"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(vec_select:HI
+		     (match_operand:V2HI 1 "register_operand" "d")
+		     (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		    (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand" "d")
+		     (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		    (match_operand 5 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%0 = %h1 * %h2 %M5%!",
+    "%0 = %d1 * %h2 %M5%!",
+    "%0 = %h1 * %d2 %M5%!",
+    "%0 = %d1 * %d2 %M5%!" };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; Three alternatives here to cover all possible allocations:
+;; 0. mac flag is usable only for accumulator 1 - use A1 and odd DREG
+;; 1. mac flag is usable for accumulator 0 - use A0 and even DREG
+;; 2. mac flag is usable in any accumulator - use A1 and odd DREG
+;; Other patterns which don't have a DREG destination can collapse cases
+;; 1 and 2 into one.
+(define_insn "flag_machi"
+  [(set (match_operand:HI 0 "register_operand" "=W,D,W")
+	(unspec:HI [(match_operand:HI 2 "register_operand" "d,d,d")
+		    (match_operand:HI 3 "register_operand" "d,d,d")
+		    (match_operand 4 "register_operand" "1,1,1")
+		    (match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")
+		    (match_operand 6 "const_int_operand" "PB,PA,PA")]
+		   UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:PDI 1 "register_operand" "=B,A,B")
+	(unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3)
+		     (match_dup 4) (match_dup 5)]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%h0 = (%1 %b5 %h2 * %h3) %M6%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_machi_acconly"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e")
+	(unspec:PDI [(match_operand:HI 1 "register_operand" "d,d")
+		     (match_operand:HI 2 "register_operand" "d,d")
+		     (match_operand 3 "register_operand" "0,0")
+		     (match_operand 4 "const01_operand" "P0P1,P0P1")
+		     (match_operand 5 "const_int_operand" "PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%0 %b4 %h1 * %h2 %M5%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_machi_parts_acconly"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" "d,d")
+		      (parallel [(match_operand 3 "const01_operand" "P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d,d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1")]))
+		     (match_operand:PDI 5 "register_operand" "0,0")
+		     (match_operand 6 "const01_operand" "P0P1,P0P1")
+		     (match_operand 7 "const_int_operand" "PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%0 %b6 %h1 * %h2 %M7%!",
+    "%0 %b6 %d1 * %h2 %M7%!",
+    "%0 %b6 %h1 * %d2 %M7%!",
+    "%0 %b6 %d1 * %d2 %M7%!"
+  };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macinithi"
+  [(set (match_operand:HI 0 "register_operand" "=W,D,W")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "d,d,d")
+		    (match_operand:HI 2 "register_operand" "d,d,d")
+		    (match_operand 3 "const_int_operand" "PB,PA,PA")]
+		   UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:PDI 4 "register_operand" "=B,A,B")
+	(unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3)]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%h0 = (%4 = %h1 * %h2) %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macinit1hi"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e")
+	(unspec:PDI [(match_operand:HI 1 "register_operand" "d,d")
+		     (match_operand:HI 2 "register_operand" "d,d")
+		     (match_operand 3 "const_int_operand" "PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%0 = %h1 * %h2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "mulv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(mult:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%h0 = %h1 * %h2, %d0 = %d1 * %d2 (IS)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand 3 "const_int_operand" "n")]
+		     UNSPEC_MUL_WITH_FLAG))]
+  ""
+  "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulv2hi_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand 7 "const_int_operand" "n")]
+		     UNSPEC_MUL_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = %h1 * %h2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %h1 * %h2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %h1 * %h2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %d1 * %d2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %d1 * %d2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %d1 * %d2 %M7%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; A slightly complicated pattern.
+;; Operand 0 is the halfword output; operand 11 is the accumulator output
+;; Halfword inputs are operands 1 and 2; operands 3, 4, 5 and 6 specify which
+;; parts of these 2x16 bit registers to use.
+;; Operand 7 is the accumulator input.
+;; Operands 8/9 specify whether low/high parts are mac (0) or msu (1)
+;; Operand 10 is the macflag to be used.
+(define_insn "flag_macv2hi_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand:V2PDI 7 "register_operand" "e")
+		      (match_operand 8 "const01_operand" "P0P1")
+		      (match_operand 9 "const01_operand" "P0P1")
+		      (match_operand 10 "const_int_operand" "n")]
+		     UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:V2PDI 11 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 3)]))
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 4)])))
+		       (vec_concat:V2HI
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)])))
+		       (match_dup 7) (match_dup 8) (match_dup 9) (match_dup 10)]
+		      UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macv2hi_parts_acconly"
+  [(set (match_operand:V2PDI 0 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+			(vec_select:HI
+			 (match_operand:V2HI 1 "register_operand" "d")
+			 (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+			(vec_select:HI
+			 (match_dup 1)
+			 (parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		       (vec_concat:V2HI
+			(vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				       (parallel [(match_operand 5 "const01_operand" "P0P1")]))
+			(vec_select:HI (match_dup 2)
+				       (parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		       (match_operand:V2PDI 7 "register_operand" "e")
+		       (match_operand 8 "const01_operand" "P0P1")
+		       (match_operand 9 "const01_operand" "P0P1")
+		       (match_operand 10 "const_int_operand" "n")]
+		      UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %d2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %d2 %M10%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; Same as above, but initializing the accumulators and therefore a couple fewer
+;; necessary operands.
+(define_insn "flag_macinitv2hi_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand 7 "const_int_operand" "n")]
+		     UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:V2PDI 8 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 3)]))
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 4)])))
+		       (vec_concat:V2HI
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)])))
+		       (match_dup 7)]
+		      UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macinit1v2hi_parts"
+  [(set (match_operand:V2PDI 0 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand 7 "const_int_operand" "n")]
+		     UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "A0 = %h1 * %h2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %h1 * %h2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %h1 * %h2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %h1 * %h2, A1 = %d1 * %d2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %d1 * %d2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %d1 * %d2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %d1 * %d2 %M7%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; A mixture of multiply and multiply-accumulate for when we only want to
+;; initialize one part.
+(define_insn "flag_mul_macv2hi_parts_acconly"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d,d,d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 3 "register_operand" "d,d,d")
+		      (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand 10 "const_int_operand" "PB,PA,PA")]
+		    UNSPEC_MUL_WITH_FLAG))
+   (set (match_operand:PDI 1 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_dup 2)
+		      (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_dup 3)
+		      (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand:PDI 8 "register_operand" "1,1,1")
+		     (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1")
+		     (match_operand 11 "const_int_operand" "PA,PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))"
+{
+  rtx xops[6];
+  const char *templates[] = {
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5%!" };
+  int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1)
+	     + (INTVAL (operands[6]) << 2)  + (INTVAL (operands[7]) << 3));
+  xops[0] = operands[0];
+  xops[1] = operands[1];
+  xops[2] = operands[2];
+  xops[3] = operands[3];
+  xops[4] = operands[9];
+  xops[5] = which_alternative == 0 ? operands[10] : operands[11];
+  output_asm_insn (templates[alt], xops);
+  return "";
+}
+  [(set_attr "type" "dsp32")])
+
+
+(define_code_iterator s_or_u [sign_extend zero_extend])
+(define_code_attr su_optab [(sign_extend "mul")
+			    (zero_extend "umul")])
+(define_code_attr su_modifier [(sign_extend "IS")
+			       (zero_extend "FU")])
+
+(define_insn "<su_optab>hisi_ll"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_lh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_hl"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_hh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Additional variants for signed * unsigned multiply.
+
+(define_insn "usmulhisi_ull"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h2 * %h1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ulh"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d2 * %h1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_uhl"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h2 * %d1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_uhh"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d2 * %d1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Parallel versions of these operations.  First, normal signed or unsigned
+;; multiplies.
+
+(define_insn "<su_optab>hisi_ll_lh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %h1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_ll_hl"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_ll_hh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_lh_hl"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_lh_hh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_hl_hh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Special signed * unsigned variants.
+
+(define_insn "usmulhisi_ll_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ll_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ll_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ll_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Vector neg/abs.
+
+(define_insn "ssnegv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_neg:V2HI (match_operand:V2HI 1 "register_operand" "d")))]
+  ""
+  "%0 = - %1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssabsv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "d")))]
+  ""
+  "%0 = ABS %1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Shifts.
+
+(define_insn "ssashiftv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d,d")
+	(if_then_else:V2HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d")
+			(match_dup 2))
+	 (ss_ashift:V2HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = ASHIFT %1 BY %h2 (V, S)%!
+   %0 = %1 << %2 (V,S)%!
+   %0 = %1 >>> %N2 (V,S)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "ssashifthi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(if_then_else:HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (ashiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d")
+		      (match_dup 2))
+	 (ss_ashift:HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = ASHIFT %1 BY %h2 (V, S)%!
+   %0 = %1 << %2 (V,S)%!
+   %0 = %1 >>> %N2 (V,S)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "ssashiftsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(if_then_else:SI
+	 (lt (match_operand:HI 2 "reg_or_const_int_operand" "d,Ku5,Ks5") (const_int 0))
+	 (ashiftrt:SI (match_operand:HI 1 "register_operand" "d,d,d")
+		      (match_dup 2))
+	 (ss_ashift:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = ASHIFT %1 BY %h2 (S)%!
+   %0 = %1 << %2 (S)%!
+   %0 = %1 >>> %N2 (S)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "lshiftv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d,d")
+	(if_then_else:V2HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d")
+			(match_dup 2))
+	 (ashift:V2HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = LSHIFT %1 BY %h2 (V)%!
+   %0 = %1 << %2 (V)%!
+   %0 = %1 >> %N2 (V)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "lshifthi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(if_then_else:HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (lshiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d")
+		      (match_dup 2))
+	 (ashift:HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = LSHIFT %1 BY %h2 (V)%!
+   %0 = %1 << %2 (V)%!
+   %0 = %1 >> %N2 (V)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+;; Load without alignment exception (masking off low bits)
+
+(define_insn "loadbytes"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mem:SI (and:SI (match_operand:SI 1 "register_operand" "b")
+			(const_int -4))))]
+  ""
+  "DISALGNEXCPT || %0 = [%1];"
+  [(set_attr "type" "mcld")
+   (set_attr "length" "8")])
+
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/bfin/bfin.opt b/gcc-4.9/gcc/config/bfin/bfin.opt
new file mode 100644
index 000000000..b736d91f8
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/bfin.opt
@@ -0,0 +1,118 @@
+; Options for the Blackfin port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/bfin/bfin-opts.h
+
+; Value of -mcpu=.
+Variable
+bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN
+
+; -msi-revision support. There are three special values:
+; -1      -msi-revision=none.
+; 0xffff  -msi-revision=any.
+Variable
+int bfin_si_revision
+
+; The workarounds enabled.
+Variable
+unsigned int bfin_workarounds = 0
+
+msim
+Target RejectNegative
+Use simulator runtime
+
+mcpu=
+Target RejectNegative Joined
+Specify the name of the target CPU
+
+momit-leaf-frame-pointer
+Target Report Mask(OMIT_LEAF_FRAME_POINTER)
+Omit frame pointer for leaf functions
+
+mlow64k
+Target Report Mask(LOW_64K)
+Program is entirely located in low 64k of memory
+
+mcsync-anomaly
+Target Report Var(bfin_csync_anomaly) Init(-1)
+Work around a hardware anomaly by adding a number of NOPs before a
+CSYNC or SSYNC instruction.
+
+mspecld-anomaly
+Target Report Var(bfin_specld_anomaly) Init(-1)
+Avoid speculative loads to work around a hardware anomaly.
+
+mid-shared-library
+Target Report Mask(ID_SHARED_LIBRARY)
+Enabled ID based shared library
+
+mleaf-id-shared-library
+Target Report Mask(LEAF_ID_SHARED_LIBRARY)
+Generate code that won't be linked against any other ID shared libraries,
+but may be used as a shared library.
+
+mshared-library-id=
+Target RejectNegative Joined UInteger Var(bfin_library_id)
+ID of shared library to build
+
+msep-data
+Target Report Mask(SEP_DATA)
+Enable separate data segment
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Avoid generating pc-relative calls; use indirection
+
+mfast-fp
+Target Report Mask(FAST_FP)
+Link with the fast floating-point library
+
+mfdpic
+Target Report Mask(FDPIC)
+Enable Function Descriptor PIC mode
+
+minline-plt
+Target Report Mask(INLINE_PLT)
+Enable inlining of PLT in function calls
+
+mstack-check-l1
+Target Report Mask(STACK_CHECK_L1)
+Do stack checking using bounds in L1 scratch memory
+
+mmulticore
+Target Report Mask(MULTICORE)
+Enable multicore support
+
+mcorea
+Target Report Mask(COREA)
+Build for Core A
+
+mcoreb
+Target Report Mask(COREB)
+Build for Core B
+
+msdram
+Target Report Mask(SDRAM)
+Build for SDRAM
+
+micplb
+Target Report Mask(ICPLB)
+Assume ICPLBs are enabled at runtime.
diff --git a/gcc-4.9/gcc/config/bfin/constraints.md b/gcc-4.9/gcc/config/bfin/constraints.md
new file mode 100644
index 000000000..7cebbbff9
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/constraints.md
@@ -0,0 +1,225 @@
+;; Constraint definitions for Blackfin
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Analog Devices
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "a" "PREGS"
+  "A Pn register.")
+
+(define_register_constraint "d" "DREGS"
+  "A Rn register.")
+
+(define_register_constraint "z" "PREGS_CLOBBERED"
+  "A call clobbered Pn register.")
+
+(define_register_constraint "D" "EVEN_DREGS"
+  "An even-numbered Rn register.")
+
+(define_register_constraint "W" "ODD_DREGS"
+  "An odd-numbered Rn register.")
+
+(define_register_constraint "e" "AREGS"
+  "An accumulator register.")
+
+(define_register_constraint "A" "EVEN_AREGS"
+  "An even-numbered accumulator; A0.")
+
+(define_register_constraint "B" "ODD_AREGS"
+  "An odd-numbered accumulator; A1.")
+
+(define_register_constraint "b" "IREGS"
+  "An I register.")
+
+(define_register_constraint "v" "BREGS"
+  "A B register.")
+
+(define_register_constraint "f" "MREGS"
+  "An M register.")
+
+(define_register_constraint "c" "CIRCREGS"
+  "A register used for circular buffering, i.e. I, B, or L registers.")
+
+(define_register_constraint "C" "CCREGS"
+  "The CC register.")
+
+(define_register_constraint "t" "LT_REGS"
+  "LT0 or LT1.")
+
+(define_register_constraint "u" "LB_REGS"
+  "LB0 or LB1.")
+
+(define_register_constraint "k" "LC_REGS"
+  "LC0 or LC1.")
+
+(define_register_constraint "x" "MOST_REGS"
+  "Any R, P, B, M, I or L register.")
+
+(define_register_constraint "y" "PROLOGUE_REGS"
+  "Additional registers typically used only in prologues and epilogues:
+   RETS, RETN, RETI, RETX, RETE, ASTAT, SEQSTAT and USP.")
+
+(define_register_constraint "w" "NON_A_CC_REGS"
+  "Any register except accumulators or CC.")
+
+(define_register_constraint "Z" "FDPIC_REGS"
+  "@internal The FD-PIC GOT pointer; P3.")
+
+(define_register_constraint "Y" "FDPIC_FPTR_REGS"
+  "@internal The FD-PIC function pointer register; P1.")
+
+(define_register_constraint "q0" "D0REGS"
+  "The register R0.")
+
+(define_register_constraint "q1" "D1REGS"
+  "The register R1.")
+
+(define_register_constraint "q2" "D2REGS"
+  "The register R2.")
+
+(define_register_constraint "q3" "D3REGS"
+  "The register R3.")
+
+(define_register_constraint "q4" "D4REGS"
+  "The register R4.")
+
+(define_register_constraint "q5" "D5REGS"
+  "The register R5.")
+
+(define_register_constraint "q6" "D6REGS"
+  "The register R6.")
+
+(define_register_constraint "q7" "D7REGS"
+  "The register R7.")
+
+(define_register_constraint "qA" "P0REGS"
+  "The register P0.")
+
+;; Constant constraints.
+
+(define_constraint "J"
+  "A constant value of the form 2**N, where N 5-bit wide."
+  (and (match_code "const_int")
+       (match_test "log2constp (ival)")))
+
+(define_constraint "Ks3"
+  "A signed 3 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -4 && ival <= 3")))
+
+(define_constraint "Ku3"
+  "An unsigned 3 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+(define_constraint "Ks4"
+  "A signed 4 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -8 && ival <= 7")))
+
+(define_constraint "Ku4"
+  "An unsigned 4 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 15")))
+
+(define_constraint "Ks5"
+  "A signed 5 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -16 && ival <= 15")))
+
+(define_constraint "Ku5"
+  "An unsigned 5 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "Ks7"
+  "A signed 7 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -64 && ival <= 63")))
+
+(define_constraint "KN7"
+  "A constant that when negated is a signed 7 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -63 && ival <= 64")))
+
+(define_constraint "Ksh"
+  "A signed 16 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "Kuh"
+  "An unsigned 16 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "L"
+  "A constant value of the form ~(2**N)."
+  (and (match_code "const_int")
+       (match_test "log2constp (~ival)")))
+
+(define_constraint "M1"
+  "An integer with the value 255."
+  (and (match_code "const_int")
+       (match_test "ival == 255")))
+
+(define_constraint "M2"
+  "An integer with the value 65535."
+  (and (match_code "const_int")
+       (match_test "ival == 65535")))
+
+(define_constraint "P0"
+  "An integer with the value 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P1"
+  "An integer with the value 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "P2"
+  "An integer with the value 2."
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "P3"
+  "An integer with the value 3."
+  (and (match_code "const_int")
+       (match_test "ival == 3")))
+
+(define_constraint "P4"
+  "An integer with the value 4."
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "PA"
+  "An integer constant describing any macflag except variants involving M."
+  (and (match_code "const_int")
+       (match_test "ival != MACFLAG_M && ival != MACFLAG_IS_M")))
+
+(define_constraint "PB"
+  "An integer constant describing any macflag involving M."
+  (and (match_code "const_int")
+       (match_test "ival == MACFLAG_M || ival == MACFLAG_IS_M")))
+
+
+;; Extra constraints
+
+(define_constraint "Q"
+  "A SYMBOL_REF."
+  (match_code "symbol_ref"))
+
diff --git a/gcc-4.9/gcc/config/bfin/elf.h b/gcc-4.9/gcc/config/bfin/elf.h
new file mode 100644
index 000000000..7d6c97ada
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/elf.h
@@ -0,0 +1,74 @@
+/* Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+%{msim:%{!shared:crt0%O%s}} \
+%{!msim:%{!mcpu=bf561*:%{!msdram:basiccrt%O%s} %{msdram:basiccrts%O%s};: \
+		       %{!msdram:basiccrt561%O%s} %{msdram:basiccrt561s%O%s}} \
+	%{mcpu=bf561*:%{mmulticore:%{!mcorea:%{!mcoreb:basiccrt561b%O%s}}}}} \
+crti%O%s crtbegin%O%s crtlibid%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "--start-group -lc %{msim:-lsim}%{!msim:-lnosys} --end-group \
+%{!T*:%{!msim:%{!msdram: \
+	      %{mcpu=bf512*:-T bf512.ld%s}%{mcpu=bf514*:-T bf514.ld%s} \
+	      %{mcpu=bf516*:-T bf516.ld%s}%{mcpu=bf518*:-T bf518.ld%s} \
+	      %{mcpu=bf522*:-T bf522.ld%s}%{mcpu=bf523*:-T bf523.ld%s} \
+	      %{mcpu=bf524*:-T bf524.ld%s}%{mcpu=bf525*:-T bf525.ld%s} \
+	      %{mcpu=bf526*:-T bf526.ld%s}%{mcpu=bf527*:-T bf527.ld%s} \
+	      %{mcpu=bf531*:-T bf531.ld%s}%{mcpu=bf532*:-T bf532.ld%s} \
+	      %{mcpu=bf533*:-T bf533.ld%s}%{mcpu=bf534*:-T bf534.ld%s} \
+	      %{mcpu=bf536*:-T bf536.ld%s}%{mcpu=bf537*:-T bf537.ld%s} \
+	      %{mcpu=bf538*:-T bf538.ld%s}%{mcpu=bf539*:-T bf539.ld%s} \
+	      %{mcpu=bf542*:-T bf542.ld%s}%{mcpu=bf544*:-T bf544.ld%s} \
+	      %{mcpu=bf547*:-T bf547.ld%s}%{mcpu=bf548*:-T bf548.ld%s} \
+	      %{mcpu=bf549*:-T bf549.ld%s} \
+	      %{mcpu=bf561*:%{!mmulticore:-T bf561.ld%s} \
+			    %{mmulticore:%{mcorea:-T bf561a.ld%s}} \
+			    %{mmulticore:%{mcoreb:-T bf561b.ld%s}} \
+			    %{mmulticore:%{!mcorea:%{!mcoreb:-T bf561m.ld%s}}}} \
+	      %{mcpu=bf592*:-T bf592.ld%s} \
+	      %{!mcpu=*:%eno processor type specified for linking} \
+	      %{!mcpu=bf561*:-T bfin-common-sc.ld%s} \
+	      %{mcpu=bf561*:%{!mmulticore:-T bfin-common-sc.ld%s} \
+			   %{mmulticore:-T bfin-common-mc.ld%s}}}}}"
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#ifdef __BFIN_FDPIC__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+asm (SECTION_OP); \
+asm ("P3 = [SP + 20];\n\tcall " USER_LABEL_PREFIX #FUNC ";"); \
+asm (TEXT_SECTION_ASM_OP);
+#endif
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS \
+     "%{mfdpic:-msim} %{mid-shared-library:-msim}"
+
+#define NO_IMPLICIT_EXTERN_C
diff --git a/gcc-4.9/gcc/config/bfin/linux.h b/gcc-4.9/gcc/config/bfin/linux.h
new file mode 100644
index 000000000..7e2dd455f
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/linux.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS \
+  "%{!mno-fdpic:-mfdpic} -micplb",
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} crtreloc.o%s \
+   crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %{mfast-fp:-lbffastfp} %G %L %{static:--end-group} \
+   %{!static:%{mfast-fp:-lbffastfp} %G}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{mfdpic: -m elf32bfinfd -z text} %{shared} %{pie} \
+  %{static:-dn -Bstatic} \
+  %{shared:-G -Bdynamic} \
+  %{!shared: %{!static: \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker /lib/ld-uClibc.so.0} \
+   %{static}} -init __init -fini __fini"
+
+#undef TARGET_SUPPORTS_SYNC_CALLS
+#define TARGET_SUPPORTS_SYNC_CALLS 1
diff --git a/gcc-4.9/gcc/config/bfin/predicates.md b/gcc-4.9/gcc/config/bfin/predicates.md
new file mode 100644
index 000000000..d73480f97
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/predicates.md
@@ -0,0 +1,249 @@
+;; Predicate definitions for the Blackfin.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Analog Devices.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return nonzero iff OP is one of the integer constants 1 or 2.
+(define_predicate "pos_scale_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
+
+;; Return nonzero iff OP is one of the integer constants 2 or 4.
+(define_predicate "scale_by_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4")))
+
+;; Return nonzero if OP is a constant that consists of two parts; lower
+;; bits all zero and upper bits all ones.  In this case, we can perform
+;; an AND operation with a sequence of two shifts.  Don't return nonzero
+;; if the constant would be cheap to load.
+(define_predicate "highbits_operand"
+  (and (match_code "const_int")
+       (match_test "log2constp (-INTVAL (op)) && !satisfies_constraint_Ks7 (op)")))
+
+;; Return nonzero if OP is suitable as a right-hand side operand for an
+;; andsi3 operation.
+(define_predicate "rhs_andsi3_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "log2constp (~INTVAL (op)) || INTVAL (op) == 255 || INTVAL (op) == 65535"))))
+
+;; Return nonzero if OP is a register or a constant with exactly one bit
+;; set.
+(define_predicate "regorlog2_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "log2constp (INTVAL (op))"))))
+
+;; Return nonzero if OP is a register or an integer constant.
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_code "const_int")))
+
+(define_predicate "const01_operand"
+  (and (match_code "const_int")
+       (match_test "op == const0_rtx || op == const1_rtx")))
+
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const1_rtx")))
+
+(define_predicate "const3_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 3")))
+
+(define_predicate "vec_shift_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "INTVAL (op) >= -16 && INTVAL (op) < 15"))
+       (match_operand 0 "register_operand")))
+
+;; Like register_operand, but make sure that hard regs have a valid mode.
+(define_predicate "valid_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return HARD_REGNO_MODE_OK (REGNO (op), mode);
+  return 1;
+})
+
+;; Return nonzero if OP is a D register.
+(define_predicate "d_register_operand"
+  (and (match_code "reg")
+       (match_test "D_REGNO_P (REGNO (op))")))
+
+(define_predicate "p_register_operand"
+  (and (match_code "reg")
+       (match_test "P_REGNO_P (REGNO (op))")))
+
+(define_predicate "dp_register_operand"
+  (and (match_code "reg")
+       (match_test "D_REGNO_P (REGNO (op)) || P_REGNO_P (REGNO (op))")))
+
+;; Return nonzero if OP is a LC register.
+(define_predicate "lc_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LC0 || REGNO (op) == REG_LC1")))
+
+;; Return nonzero if OP is a LT register.
+(define_predicate "lt_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LT0 || REGNO (op) == REG_LT1")))
+
+;; Return nonzero if OP is a LB register.
+(define_predicate "lb_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LB0 || REGNO (op) == REG_LB1")))
+
+;; Return nonzero if OP is a register or a 7-bit signed constant.
+(define_predicate "reg_or_7bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_Ks7 (op)"))))
+
+;; Return nonzero if OP is a register other than DREG and PREG.
+(define_predicate "nondp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || !DP_REGNO_P (regno));
+})
+
+;; Return nonzero if OP is a register other than DREG and PREG, or MEM.
+(define_predicate "nondp_reg_or_memory_operand"
+  (ior (match_operand 0 "nondp_register_operand")
+       (match_operand 0 "memory_operand")))
+
+;; Return nonzero if OP is a register or, when negated, a 7-bit signed
+;; constant.
+(define_predicate "reg_or_neg7bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_KN7 (op)"))))
+
+;; Used for secondary reloads, this function returns 1 if OP is of the
+;; form (plus (fp) (const_int)).
+(define_predicate "fp_plus_const_operand"
+  (match_code "plus")
+{
+  rtx op1, op2;
+
+  op1 = XEXP (op, 0);
+  op2 = XEXP (op, 1);
+  return (REG_P (op1)
+	  && (REGNO (op1) == FRAME_POINTER_REGNUM
+	      || REGNO (op1) == STACK_POINTER_REGNUM)
+	  && GET_CODE (op2) == CONST_INT);
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF)
+			 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT"))))
+
+;; Returns 1 if OP is a plain constant or matched by symbolic_operand.
+(define_predicate "symbolic_or_const_operand"
+  (ior (match_code "const_int,const_double")
+       (match_operand 0 "symbolic_operand")))
+
+;; Returns 1 if OP is a SYMBOL_REF.
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; True for any non-virtual or eliminable register.  Used in places where
+;; instantiation of such a register may cause the pattern to not be recognized.
+(define_predicate "register_no_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return !(op == arg_pointer_rtx
+	   || op == frame_pointer_rtx
+	   || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	       && REGNO (op) <= LAST_VIRTUAL_REGISTER));
+})
+
+;; Test for an operator valid in a BImode conditional branch
+(define_predicate "bfin_bimode_comparison_operator"
+  (match_code "eq,ne"))
+
+;; Test for an operator whose result is accessible with movbisi.
+(define_predicate "bfin_direct_comparison_operator"
+  (match_code "eq,lt,le,leu,ltu"))
+
+;; The following three are used to compute the addrtype attribute.  They return
+;; true if passed a memory address usable for a 16-bit load or store using a
+;; P or I register, respectively.  If neither matches, we know we have a
+;; 32-bit instruction.
+;; We subdivide the P case into normal P registers, and SP/FP.  We can assume
+;; that speculative loads through SP and FP are no problem, so this has
+;; an effect on the anomaly workaround code.
+
+(define_predicate "mem_p_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx;
+})
+
+(define_predicate "mem_spfp_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return op == stack_pointer_rtx || op == frame_pointer_rtx;
+})
+
+(define_predicate "mem_i_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return IREG_P (op);
+})
+
+(define_predicate "push_multiple_operation"
+  (and (match_code "parallel")
+       (match_test "analyze_push_multiple_operation (op)")))
+
+(define_predicate "pop_multiple_operation"
+  (and (match_code "parallel")
+       (match_test "analyze_pop_multiple_operation (op)")))
diff --git a/gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh b/gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh
new file mode 100644
index 000000000..36a71b114
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This script takes the following arguments:
+#
+#    - the target sysroot
+#    - the value of $(MULTILIB_MATCHES)
+#    - the value of $(MULTILIB_OPTIONS)
+#
+# It uses these arguments to construct a definition of SYSROOT_SUFFIX_SPEC,
+# which it prints to the standard output.  For each multilib directory FOO,
+# the script checks whether $sysroot has a subdirectory FOO, and if so will
+# use /FOO for all compatible command-line options.  It will not add a
+# suffix for /FOO's options otherwise.  These suffixes are concatenated,
+# with one subspec for each space-separated entry in $(MULTILIB_OPTIONS).
+set -e
+sysroot=$1
+matches=$2
+options=$3
+
+# For each multilib option OPT, add to $substs a sed command of the
+# form "-e 's/OPT/OPT/'".
+substs=""
+for option in `echo "$options" | tr '/' ' '`
+do
+  substs="$substs -e 's/$option/$option/g'"
+done
+
+# For each ALIAS=CANONICAL entry in $MULTILIB_MATCHES, look for sed
+# arguments in $substs of the form "-e 's/CANONICAL/.../'".  Replace
+# such entries with "-e 's/CANONICAL/ALIAS|.../'".  Both the ALIAS and
+# CANONICAL parts of $MULTILIB_MATCHES use '?' to stand for '='.
+#
+# After this loop, a command of the form "echo FOO | eval sed $substs"
+# will replace a canonical option FOO with a %{...}-style spec pattern.
+for match in $matches
+do
+  canonical=`echo "$match" | sed -e 's/=.*//' -e 's/?/=/g'`
+  alias=`echo "$match" | sed -e 's/.*=//' -e 's/?/=/g'`
+  substs=`echo "$substs" | sed -e "s,s/$canonical/,&$alias|,"`
+done
+
+# Build up the final SYSROOT_SUFFIX_SPEC in $spec.
+spec=
+for combo in $options
+do
+  # See which option alternatives in $combo have their own sysroot
+  # directory.  Create a subspec of the form "%{PAT1:/DIR1;...;PATn:DIRn}"
+  # from each such option OPTi, where DIRi is the directory associated
+  # with OPTi and PATi is the result of passing OPTi through $substs.
+  subspec=
+  for option in `echo "$combo" | tr '/' ' '`
+  do
+    dir=`echo "$option" | sed 's/mcpu=//'`
+    if test -d "$sysroot/$dir"; then
+      test -z "$subspec" || subspec="$subspec;"
+      subspec="$subspec"`echo "$option" | eval sed $substs`":/$dir"
+    fi
+  done
+  # Concatenate all the subspecs.
+  test -z "$subspec" || spec="$spec%{$subspec}"
+done
+if test -n "$spec"; then
+  echo "#undef SYSROOT_SUFFIX_SPEC"
+  echo "#define SYSROOT_SUFFIX_SPEC \"$spec\""
+fi
diff --git a/gcc-4.9/gcc/config/bfin/rtems.h b/gcc-4.9/gcc/config/bfin/rtems.h
new file mode 100644
index 000000000..0897e26b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/rtems.h
@@ -0,0 +1,28 @@
+/* Definitions for rtems targeting a bfin
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Ralf Corsépius (ralf.corsepius@rtems.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS preprocessor built-ins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__rtems__");		\
+      builtin_assert ("system=rtems");		\
+    }						\
+  while (0)
diff --git a/gcc-4.9/gcc/config/bfin/sync.md b/gcc-4.9/gcc/config/bfin/sync.md
new file mode 100644
index 000000000..62a87b7e1
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/sync.md
@@ -0,0 +1,178 @@
+;; GCC machine description for Blackfin synchronization instructions.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Analog Devices.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_code_iterator FETCHOP [plus minus ior and xor])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "ior") (and "and") (xor "xor")])
+(define_code_attr fetchop_addr
+  [(plus "1072") (minus "1088") (ior "1104") (and "1120") (xor "1136")])
+
+(define_insn "sync_<fetchop_name>si_internal"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" "qA"))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 0))
+	     (match_operand:SI 1 "register_operand" "q0"))
+	   (match_operand:SI 2 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=q0"))
+   (clobber (match_scratch:SI 4 "=q1"))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%2);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "memory_operand" "+m")
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 0)
+			(match_operand:SI 1 "register_operand" "q0"))
+	    (match_dup 2)]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 3 ""))
+     (clobber (match_scratch:SI 4 ""))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[0], 0)))
+    {
+      operands[0] = shallow_copy_rtx (operands[0]);
+      XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0));
+    }
+  operands[2] = force_reg (Pmode, GEN_INT (<fetchop_addr>));
+})
+
+(define_insn "sync_old_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "register_operand" "=q1")
+	(mem:SI (match_operand:SI 1 "register_operand" "qA")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1))
+	     (match_operand:SI 2 "register_operand" "q0"))
+	   (match_operand:SI 3 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 4 "=q0"))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%3);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_old_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operand:SI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 1)
+			(match_operand:SI 2 "register_operand" ""))
+	    (match_dup 3)]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 4 ""))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      operands[1] = shallow_copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+    }
+  operands[3] = force_reg (Pmode, GEN_INT (<fetchop_addr>));
+})
+
+(define_insn "sync_new_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "register_operand" "=q0")
+	(unspec:SI
+	  [(FETCHOP:SI
+	    (mem:SI (match_operand:SI 1 "register_operand" "qA"))
+	    (match_operand:SI 2 "register_operand" "q0"))
+	   (match_operand:SI 3 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))
+	   (match_dup 3)]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 4 "=q1"))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%3);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_new_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec:SI
+	   [(FETCHOP:SI (match_operand:SI 1 "memory_operand" "")
+			(match_operand:SI 2 "register_operand" ""))
+	    (match_dup 3)]
+	   UNSPEC_ATOMIC))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 1) (match_dup 2))
+	    (match_dup 3)]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 4 ""))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      operands[1] = shallow_copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+    }
+  operands[3] = force_reg (Pmode, GEN_INT (<fetchop_addr>));
+})
+
+(define_insn "sync_compare_and_swapsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=q0")
+	(mem:SI (match_operand:SI 1 "register_operand" "qA")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(mem:SI (match_dup 1))
+	   (match_operand:SI 2 "register_operand" "q1")
+	   (match_operand:SI 3 "register_operand" "q2")
+	   (match_operand:SI 4 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%4);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_compare_and_swapsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operand:SI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(match_dup 1)
+	    (match_operand:SI 2 "register_operand" "")
+	    (match_operand:SI 3 "register_operand" "")
+	    (match_dup 4)]
+	   UNSPEC_ATOMIC))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      operands[1] = shallow_copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+    }
+  operands[4] = force_reg (Pmode, GEN_INT (0x420));
+})
diff --git a/gcc-4.9/gcc/config/bfin/t-bfin-elf b/gcc-4.9/gcc/config/bfin/t-bfin-elf
new file mode 100644
index 000000000..1e08c98b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/t-bfin-elf
@@ -0,0 +1,49 @@
+# Copyright (C) 2005-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+MULTILIB_OPTIONS=mcpu=bf532-none
+MULTILIB_OPTIONS+=mid-shared-library/msep-data/mfdpic mleaf-id-shared-library
+MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mfdpic mleaf-id-shared-library
+
+MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf592-none
+
+MULTILIB_EXCEPTIONS=mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=*mfdpic/mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library*
diff --git a/gcc-4.9/gcc/config/bfin/t-bfin-linux b/gcc-4.9/gcc/config/bfin/t-bfin-linux
new file mode 100644
index 000000000..8726d8fd6
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/t-bfin-linux
@@ -0,0 +1,52 @@
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+MULTILIB_OPTIONS=mcpu=bf532-none
+MULTILIB_DIRNAMES=bf532-none
+
+MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf592-none
+
+# This rule uses MULTILIB_MATCHES to generate a definition of
+# SYSROOT_SUFFIX_SPEC.
+linux-sysroot-suffix.h: $(srcdir)/config/bfin/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/bfin/print-sysroot-suffix.sh \
+	  "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \
+	  "$(MULTILIB_OPTIONS)" > $@
+
+generated_files += linux-sysroot-suffix.h
diff --git a/gcc-4.9/gcc/config/bfin/t-bfin-uclinux b/gcc-4.9/gcc/config/bfin/t-bfin-uclinux
new file mode 100644
index 000000000..158ca3766
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/t-bfin-uclinux
@@ -0,0 +1,48 @@
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+MULTILIB_OPTIONS=mcpu=bf532-none
+MULTILIB_OPTIONS+=mid-shared-library/msep-data mleaf-id-shared-library
+MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mleaf-id-shared-library
+
+MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf592-none
+
+MULTILIB_EXCEPTIONS=mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library*
diff --git a/gcc-4.9/gcc/config/bfin/t-rtems b/gcc-4.9/gcc/config/bfin/t-rtems
new file mode 100644
index 000000000..728ab1c4f
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/t-rtems
@@ -0,0 +1,6 @@
+# Multilibs for fbin RTEMS targets.
+
+MULTILIB_OPTIONS	=
+MULTILIB_DIRNAMES	=
+MULTILIB_EXTRA_OPTS	=
+MULTILIB_EXCEPTIONS 	=
diff --git a/gcc-4.9/gcc/config/bfin/uclinux.h b/gcc-4.9/gcc/config/bfin/uclinux.h
new file mode 100644
index 000000000..0ae03b28e
--- /dev/null
+++ b/gcc-4.9/gcc/config/bfin/uclinux.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1%O%s} crti%O%s crtbegin%O%s crtlibid%O%s"
+
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "\
+  %{mfast-fp:-lbffastfp} %G %L %{mfast-fp:-lbffastfp} %G \
+"
+
+#undef TARGET_SUPPORTS_SYNC_CALLS
+#define TARGET_SUPPORTS_SYNC_CALLS 1
+
+#define SUBTARGET_FDPIC_NOT_SUPPORTED
diff --git a/gcc-4.9/gcc/config/c6x/c6x-isas.def b/gcc-4.9/gcc/config/c6x/c6x-isas.def
new file mode 100644
index 000000000..1447a5d14
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-isas.def
@@ -0,0 +1,37 @@
+/* C6X ISA names.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define ISAs for the -march option, used both in C6X.c and to
+   generate c6x-tables.opt.  Before including this file, define a
+   macro:
+
+   C6X_ISA (NAME, ENUM_VALUE, FLAGS)
+
+   where NAME is the name for use with -march=, ENUM_VALUE is an enum
+   corresponding to this arch, and FLAGS is a combination of flags
+   that together specify the available instructions.  */
+
+C6X_ISA("c62x", C6X_CPU_C62X, C6X_INSNS_C62X)
+C6X_ISA("c64x", C6X_CPU_C64X, C6X_INSNS_C62X | C6X_INSNS_C64X)
+C6X_ISA("c64x+", C6X_CPU_C64XP, C6X_INSNS_C62X | C6X_INSNS_C64X | C6X_INSNS_C64XP)
+C6X_ISA("c67x", C6X_CPU_C67X, C6X_INSNS_C62X | C6X_INSNS_C67X)
+C6X_ISA("c67x+", C6X_CPU_C67XP, C6X_INSNS_C62X | C6X_INSNS_C67X | C6X_INSNS_C67XP)
+C6X_ISA("c674x", C6X_CPU_C674X,
+	(C6X_INSNS_C62X | C6X_INSNS_C64X | C6X_INSNS_C64XP | C6X_INSNS_C67X
+         | C6X_INSNS_C67XP | C6X_INSNS_C674X))
diff --git a/gcc-4.9/gcc/config/c6x/c6x-modes.def b/gcc-4.9/gcc/config/c6x/c6x-modes.def
new file mode 100644
index 000000000..a438e2808
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-modes.def
@@ -0,0 +1,24 @@
+/* Definitions of target machine for GNU compiler, for TI C6x.
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+VECTOR_MODES (INT, 4);        /*              V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*         V8QI V4HI V2SI */
+
+VECTOR_MODE (FRACT, SQ, 2); /* V2SQ.  */
+VECTOR_MODE (FRACT, HQ, 2); /* V2HQ.  */
diff --git a/gcc-4.9/gcc/config/c6x/c6x-mult.md b/gcc-4.9/gcc/config/c6x/c6x-mult.md
new file mode 100644
index 000000000..d8e262652
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-mult.md
@@ -0,0 +1,844 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically from c6x-mult.md.in by genmult.sh
+;; Multiplication patterns for TI C6X.
+;; This file is processed by genmult.sh to produce two variants of each
+;; pattern, a normal one and a real_mult variant for modulo scheduling.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Miscellaneous insns that execute on the M units
+;; -------------------------------------------------------------------------
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (rotate:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a")
+		   (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")))]
+  "TARGET_INSNS_64"
+  "%|%.\\trotl\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "bitrevsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a,b,b")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "a,?b,b,?a")]
+		   UNSPEC_BITREV))]
+  "TARGET_INSNS_64"
+  "%|%.\\tbitr\\t%$\\t%1, %0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,y,n,y")])
+
+;; Vector average.
+
+(define_insn "avgv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b")
+        (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:V2HI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG))]
+  "TARGET_INSNS_64"
+  "%|%.\\tavg2\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "uavgv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand" "=a,b,a,b")
+        (unspec:V4QI [(match_operand:V4QI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:V4QI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG))]
+  "TARGET_INSNS_64"
+  "%|%.\\tavgu4\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication
+;; -------------------------------------------------------------------------
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=a,b,a,b")
+        (mult:HI (match_operand:HI 1 "register_operand" "a,b,?b,?a")
+                 (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5")))]
+  ""
+  "%|%.\\tmpy\\t%$\\t%2, %1, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "op_pattern" "sxs")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_const"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,ab")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?ab"))
+                 (match_operand:HI 2 "scst5_operand" "Is5,Is5,Is5")))]
+  ""
+  "%|%.\\tmpy\\t%$\\t%2, %1, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y")])
+
+(define_insn "*mulhisi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "%a,b,?a,?b"))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_scst5_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tmpy\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "op_pattern" "ssx")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_lh"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16))))]
+  ""
+  "%|%.\\tmpylh\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_hl"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tmpyhl\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_hh"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		  (const_int 16))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16))))]
+  ""
+  "%|%.\\tmpyh\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "%a,b,?a,?b"))
+                 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tmpyu\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_lh"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (lshiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16))))]
+  ""
+  "%|%.\\tmpylhu\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_hl"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tmpyhlu\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_hh"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		  (const_int 16))
+                 (lshiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16))))]
+  ""
+  "%|%.\\tmpyhu\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_const"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,ab")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?ab"))
+                 (match_operand:SI 2 "scst5_operand" "Is5,Is5,Is5")))]
+  ""
+  "%|%.\\tmpysu\\t%$\\t%2, %1, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y")])
+
+(define_insn "*usmulhisi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,bIs5,aIs5"))))]
+  ""
+  "%|%.\\tmpyus\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_lh"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16))))]
+  ""
+  "%|%.\\tmpyluhs\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_hl"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tmpyhuls\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_hh"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16))))]
+  ""
+  "%|%.\\tmpyhus\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(mult:SI (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		 (match_operand:SI 2 "register_operand" "a,b,b,a")))]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32\\t%$\\t%1, %2, %0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=a,b,a,b")
+        (mult:DI (any_ext:DI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b"))
+                 (any_ext:DI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a"))))]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32<u>\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=a,b,a,b")
+        (mult:DI (zero_extend:DI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b"))
+                 (sign_extend:DI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a"))))]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32us\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Widening vector multiply and dot product
+
+(define_insn "mulv2hiv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=a,b,a,b")
+	(mult:V2SI
+	 (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" "a,b,a,b"))
+	 (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" "a,b,?b,?a"))))]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpy2\\t%$\\t%1, %2, %0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulv4qiv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=a,b,a,b")
+	(mult:V4HI
+	 (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,a,b"))
+	 (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,?b,?a"))))]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpyu4\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulv4qiv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=a,b,a,b")
+	(mult:V4HI
+	 (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,?b,?a"))
+	 (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,a,b"))))]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpyus4\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "dotv2hi"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" "a,b,a,b")
+		(parallel [(const_int 0)])))
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" "a,b,?b,?a")
+		(parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (sign_extend:SI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))]
+  "TARGET_INSNS_64"
+  "%|%.\\tdotp2\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Fractional multiply
+
+(define_insn "mulv2hqv2sq3"
+  [(set (match_operand:V2SQ 0 "register_operand" "=a,b,a,b")
+        (ss_mult:V2SQ
+	 (fract_convert:V2SQ
+	  (match_operand:V2HQ 1 "register_operand" "%a,b,?a,?b"))
+	 (fract_convert:V2SQ
+	  (match_operand:V2HQ 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tsmpy2\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3"
+  [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (match_operand:HQ 1 "register_operand" "%a,b,?a,?b"))
+	 (fract_convert:SQ
+	  (match_operand:HQ 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tsmpy\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_lh"
+  [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (match_operand:HQ 1 "register_operand" "a,b,?a,?b"))
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a")))))]
+  ""
+  "%|%.\\tsmpylh\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_hl"
+  [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a")))
+	 (fract_convert:SQ
+	  (match_operand:HQ 2 "register_operand" "a,b,b,a"))))]
+  ""
+  "%|%.\\tsmpyhl\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_hh"
+  [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a")))
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a")))))]
+  ""
+  "%|%.\\tsmpyh\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+;; Multiplication patterns for TI C6X.
+;; This file is processed by genmult.sh to produce two variants of each
+;; pattern, a normal one and a real_mult variant for modulo scheduling.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Miscellaneous insns that execute on the M units
+;; -------------------------------------------------------------------------
+
+(define_insn "rotlsi3_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (rotate:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a")
+		   (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5"))] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\trotl\\t%$\\t%1, %2, %k0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "bitrevsi2_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JA,JB,JB")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "a,?b,b,?a")]
+		   UNSPEC_BITREV)] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tbitr\\t%$\\t%1, %k0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,y,n,y")])
+
+;; Vector average.
+
+(define_insn "avgv2hi3_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:V2HI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tavg2\\t%$\\t%1, %2, %k0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "uavgv4qi3_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (unspec:V4QI [(match_operand:V4QI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:V4QI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tavgu4\\t%$\\t%1, %2, %k0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication
+;; -------------------------------------------------------------------------
+
+(define_insn "mulhi3_real"
+  [(unspec [(match_operand:HI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:HI (match_operand:HI 1 "register_operand" "a,b,?b,?a")
+                 (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5"))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpy\\t%$\\t%2, %1, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "op_pattern" "sxs")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_const_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JAJB")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?ab"))
+                 (match_operand:HI 2 "scst5_operand" "Is5,Is5,Is5"))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpy\\t%$\\t%2, %1, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y")])
+
+(define_insn "*mulhisi3_insn_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "%a,b,?a,?b"))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_scst5_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpy\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "op_pattern" "ssx")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_lh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpylh\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_hl_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyhl\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_hh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		  (const_int 16))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyh\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "%a,b,?a,?b"))
+                 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyu\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_lh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (lshiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpylhu\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_hl_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyhlu\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_hh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		  (const_int 16))
+                 (lshiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyhu\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_const_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JAJB")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?ab"))
+                 (match_operand:SI 2 "scst5_operand" "Is5,Is5,Is5"))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpysu\\t%$\\t%2, %1, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y")])
+
+(define_insn "*usmulhisi3_insn_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,bIs5,aIs5")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyus\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_lh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyluhs\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_hl_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyhuls\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_hh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tmpyhus\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulsi3_insn_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+	(mult:SI (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		 (match_operand:SI 2 "register_operand" "a,b,b,a"))] UNSPEC_REAL_MULT)]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32\\t%$\\t%1, %2, %k0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "<u>mulsidi3_real"
+  [(unspec [(match_operand:DI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:DI (any_ext:DI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b"))
+                 (any_ext:DI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32<u>\\t%$\\t%1, %2, %K0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulsidi3_real"
+  [(unspec [(match_operand:DI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (mult:DI (zero_extend:DI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b"))
+                 (sign_extend:DI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32us\\t%$\\t%1, %2, %K0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Widening vector multiply and dot product
+
+(define_insn "mulv2hiv2si3_real"
+  [(unspec [(match_operand:V2SI 0 "const_int_operand" "=JA,JB,JA,JB")
+	(mult:V2SI
+	 (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" "a,b,a,b"))
+	 (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpy2\\t%$\\t%1, %2, %k0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulv4qiv4hi3_real"
+  [(unspec [(match_operand:V4HI 0 "const_int_operand" "=JA,JB,JA,JB")
+	(mult:V4HI
+	 (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,a,b"))
+	 (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpyu4\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulv4qiv4hi3_real"
+  [(unspec [(match_operand:V4HI 0 "const_int_operand" "=JA,JB,JA,JB")
+	(mult:V4HI
+	 (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,?b,?a"))
+	 (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,a,b")))] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpyus4\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "dotv2hi_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" "a,b,a,b")
+		(parallel [(const_int 0)])))
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" "a,b,?b,?a")
+		(parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (sign_extend:SI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] UNSPEC_REAL_MULT)]
+  "TARGET_INSNS_64"
+  "%|%.\\tdotp2\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Fractional multiply
+
+(define_insn "mulv2hqv2sq3_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (ss_mult:V2SQ
+	 (fract_convert:V2SQ
+	  (match_operand:V2HQ 1 "register_operand" "%a,b,?a,?b"))
+	 (fract_convert:V2SQ
+	  (match_operand:V2HQ 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tsmpy2\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (match_operand:HQ 1 "register_operand" "%a,b,?a,?b"))
+	 (fract_convert:SQ
+	  (match_operand:HQ 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tsmpy\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_lh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (match_operand:HQ 1 "register_operand" "a,b,?a,?b"))
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tsmpylh\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_hl_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a")))
+	 (fract_convert:SQ
+	  (match_operand:HQ 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tsmpyhl\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_hh_real"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a")))
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))] UNSPEC_REAL_MULT)]
+  ""
+  "%|%.\\tsmpyh\\t%$\\t%1, %2, %k0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
diff --git a/gcc-4.9/gcc/config/c6x/c6x-mult.md.in b/gcc-4.9/gcc/config/c6x/c6x-mult.md.in
new file mode 100644
index 000000000..f09c7c085
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-mult.md.in
@@ -0,0 +1,421 @@
+;; Multiplication patterns for TI C6X.
+;; This file is processed by genmult.sh to produce two variants of each
+;; pattern, a normal one and a real_mult variant for modulo scheduling.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Miscellaneous insns that execute on the M units
+;; -------------------------------------------------------------------------
+
+(define_insn "rotlsi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (rotate:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a")
+		   (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5"))_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\trotl\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "bitrevsi2_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_A_,_B_,_B_")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "a,?b,b,?a")]
+		   UNSPEC_BITREV)_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tbitr\\t%$\\t%1, %_MODk_0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,y,n,y")])
+
+;; Vector average.
+
+(define_insn "avgv2hi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MV2HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:V2HI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tavg2\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "uavgv4qi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MV4QI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (unspec:V4QI [(match_operand:V4QI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:V4QI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tavgu4\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "units" "m")
+   (set_attr "type" "mpy2")
+   (set_attr "cross" "n,n,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication
+;; -------------------------------------------------------------------------
+
+(define_insn "mulhi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:HI (match_operand:HI 1 "register_operand" "a,b,?b,?a")
+                 (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5"))_CBRK_)]
+  ""
+  "%|%.\\tmpy\\t%$\\t%2, %1, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "op_pattern" "sxs")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_const_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A__B_")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?ab"))
+                 (match_operand:HI 2 "scst5_operand" "Is5,Is5,Is5"))_CBRK_)]
+  ""
+  "%|%.\\tmpy\\t%$\\t%2, %1, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y")])
+
+(define_insn "*mulhisi3_insn_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "%a,b,?a,?b"))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_scst5_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tmpy\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "op_pattern" "ssx")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_lh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))_CBRK_)]
+  ""
+  "%|%.\\tmpylh\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_hl_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tmpyhl\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhisi3_hh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		  (const_int 16))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))_CBRK_)]
+  ""
+  "%|%.\\tmpyh\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "%a,b,?a,?b"))
+                 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tmpyu\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_lh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (lshiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))_CBRK_)]
+  ""
+  "%|%.\\tmpylhu\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_hl_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tmpyhlu\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulhisi3_hh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		  (const_int 16))
+                 (lshiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))_CBRK_)]
+  ""
+  "%|%.\\tmpyhu\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_const_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A__B_")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?ab"))
+                 (match_operand:SI 2 "scst5_operand" "Is5,Is5,Is5"))_CBRK_)]
+  ""
+  "%|%.\\tmpysu\\t%$\\t%2, %1, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y")])
+
+(define_insn "*usmulhisi3_insn_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,bIs5,aIs5")))_CBRK_)]
+  ""
+  "%|%.\\tmpyus\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_lh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "a,b,?a,?b"))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))_CBRK_)]
+  ""
+  "%|%.\\tmpyluhs\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_hl_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tmpyhuls\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulhisi3_hh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:SI (lshiftrt:SI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b")
+		  (const_int 16))
+                 (ashiftrt:SI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")
+		  (const_int 16)))_CBRK_)]
+  ""
+  "%|%.\\tmpyhus\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulsi3_insn_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+	(mult:SI (match_operand:SI 1 "register_operand" "%a,b,?a,?b")
+		 (match_operand:SI 2 "register_operand" "a,b,b,a"))_CBRK_)]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32\\t%$\\t%1, %2, %_MODk_0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "<u>mulsidi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:DI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:DI (any_ext:DI
+		  (match_operand:SI 1 "register_operand" "%a,b,?a,?b"))
+                 (any_ext:DI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32<u>\\t%$\\t%1, %2, %_MODK_0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulsidi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:DI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (mult:DI (zero_extend:DI
+		  (match_operand:SI 1 "register_operand" "a,b,?a,?b"))
+                 (sign_extend:DI
+		  (match_operand:SI 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  "TARGET_MPY32"
+  "%|%.\\tmpy32us\\t%$\\t%1, %2, %_MODK_0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Widening vector multiply and dot product
+
+(define_insn "mulv2hiv2si3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:V2SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+	(mult:V2SI
+	 (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" "a,b,a,b"))
+	 (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpy2\\t%$\\t%1, %2, %_MODk_0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umulv4qiv4hi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:V4HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+	(mult:V4HI
+	 (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,a,b"))
+	 (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpyu4\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "usmulv4qiv4hi3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:V4HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+	(mult:V4HI
+	 (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,?b,?a"))
+	 (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,a,b")))_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tmpyus4\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "dotv2hi_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" "a,b,a,b")
+		(parallel [(const_int 0)])))
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" "a,b,?b,?a")
+		(parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (sign_extend:SI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))_CBRK_)]
+  "TARGET_INSNS_64"
+  "%|%.\\tdotp2\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Fractional multiply
+
+(define_insn "mulv2hqv2sq3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MV2SQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (ss_mult:V2SQ
+	 (fract_convert:V2SQ
+	  (match_operand:V2HQ 1 "register_operand" "%a,b,?a,?b"))
+	 (fract_convert:V2SQ
+	  (match_operand:V2HQ 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tsmpy2\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy4")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (match_operand:HQ 1 "register_operand" "%a,b,?a,?b"))
+	 (fract_convert:SQ
+	  (match_operand:HQ 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tsmpy\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_lh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (match_operand:HQ 1 "register_operand" "a,b,?a,?b"))
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))_CBRK_)]
+  ""
+  "%|%.\\tsmpylh\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_hl_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a")))
+	 (fract_convert:SQ
+	  (match_operand:HQ 2 "register_operand" "a,b,b,a")))_CBRK_)]
+  ""
+  "%|%.\\tsmpyhl\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "mulhqsq3_hh_VARIANT_"
+  [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_")
+        (ss_mult:SQ
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a")))
+	 (fract_convert:SQ
+	  (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))_CBRK_)]
+  ""
+  "%|%.\\tsmpyh\\t%$\\t%1, %2, %_MODk_0"
+  [(set_attr "type" "mpy2")
+   (set_attr "units" "m")
+   (set_attr "cross" "n,n,y,y")])
diff --git a/gcc-4.9/gcc/config/c6x/c6x-opts.h b/gcc-4.9/gcc/config/c6x/c6x-opts.h
new file mode 100644
index 000000000..6bc3fe846
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-opts.h
@@ -0,0 +1,35 @@
+/* Definitions for option handling for TI C6X.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef C6X_OPTS_H
+#define C6X_OPTS_H
+
+/* An enumeration of all supported target devices.  */
+typedef enum c6x_cpu_type
+{
+#define C6X_ISA(NAME,ENUM_VALUE,FLAGS)		\
+  ENUM_VALUE,
+#include "c6x-isas.def"
+#undef C6X_ISA
+  unk_isa
+} c6x_cpu_t;
+
+enum c6x_sdata { C6X_SDATA_NONE, C6X_SDATA_DEFAULT, C6X_SDATA_ALL };
+
+#endif
diff --git a/gcc-4.9/gcc/config/c6x/c6x-protos.h b/gcc-4.9/gcc/config/c6x/c6x-protos.h
new file mode 100644
index 000000000..e360ebff8
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-protos.h
@@ -0,0 +1,65 @@
+/* Prototypes for exported functions defined in c6x.c.
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_C6X_PROTOS_H
+#define GCC_C6X_PROTOS_H
+
+/* Functions defined in c6x.c.  */
+
+#ifdef RTX_CODE
+extern void c6x_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, int);
+extern bool c6x_block_reg_pad_upward (enum machine_mode, const_tree, bool);
+
+extern bool c6x_legitimate_address_p_1 (enum machine_mode, rtx, bool, bool);
+extern bool c6x_mem_operand (rtx, enum reg_class, bool);
+extern bool expand_move (rtx *, enum machine_mode);
+
+extern bool c6x_long_call_p (rtx);
+extern void c6x_expand_call (rtx, rtx, bool);
+extern rtx c6x_expand_compare (rtx, enum machine_mode);
+extern bool c6x_force_op_for_comparison_p (enum rtx_code, rtx);
+extern bool c6x_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
+
+extern rtx c6x_subword (rtx, bool);
+extern void split_di (rtx *, int, rtx *, rtx *);
+extern bool c6x_valid_mask_p (HOST_WIDE_INT);
+
+extern char c6x_get_unit_specifier (rtx);
+
+extern void c6x_final_prescan_insn(rtx insn, rtx *opvec, int noperands);
+
+extern int c6x_nsaved_regs (void);
+extern HOST_WIDE_INT c6x_initial_elimination_offset (int, int);
+extern void c6x_expand_prologue (void);
+extern void c6x_expand_epilogue (bool);
+
+extern rtx c6x_return_addr_rtx (int);
+
+extern void c6x_set_return_address (rtx, rtx);
+#endif
+
+extern void c6x_override_options (void);
+extern void c6x_optimization_options (int, int);
+
+extern void c6x_output_file_unwind (FILE *);
+
+extern void c6x_function_end (FILE *, const char *);
+
+#endif /* GCC_C6X_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/c6x/c6x-sched.md b/gcc-4.9/gcc/config/c6x/c6x-sched.md
new file mode 100644
index 000000000..d85c1a9b9
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-sched.md
@@ -0,0 +1,934 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically from c6x-sched.md.in by gensched.sh
+
+;; Definitions for side 1, cross n
+
+;; Scheduling description for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Input file for gensched.sh We process this file multiple times,
+;; replacing 1 with either 1 or 2 for each of the sides of the
+;; machine, and a correspondingly with "a" or "b".  n and
+;;  are replaced with yes/no and the appropriate reservation.
+
+(define_insn_reservation "load_d1n" 5
+  (and (eq_attr "type" "load")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t1")
+
+(define_insn_reservation "store_d1n" 1
+  (and (eq_attr "type" "store")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t1")
+
+(define_insn_reservation "loadn_d1n" 5
+  (and (eq_attr "type" "loadn")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t1+t2")
+
+(define_insn_reservation "storen_d1n" 1
+  (and (eq_attr "type" "storen")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t1+t2")
+
+(define_insn_reservation "single_d1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d")
+		 (eq_attr "dest_regfile" "a"))))
+  "d1")
+
+(define_insn_reservation "single_l1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1+l1w")
+
+(define_insn_reservation "fp4_l1n" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1,nothing*2,l1w")
+
+(define_insn_reservation "intdp_l1n" 5
+  (and (eq_attr "type" "intdp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1,nothing*2,l1w*2")
+
+(define_insn_reservation "adddp_l1n" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "(l1)*2,nothing*3,l1w*2")
+
+(define_insn_reservation "branch_s1n" 6
+  (and (eq_attr "type" "branch")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "(s1+s1w)+br1")
+
+(define_insn_reservation "call_addkpc_s1n" 6
+  (and (eq_attr "type" "call")
+       (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "n")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "a")))))
+  "(s1+s1w)+br1,s2+br0+br1")
+
+(define_insn_reservation "call_mvk_s1n" 6
+  (and (eq_attr "type" "call")
+       (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "n")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "a")))))
+  "(s1+s1w)+br1,s2,s2")
+
+(define_insn_reservation "single_s1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "(s1+s1w)")
+
+(define_insn_reservation "cmpdp_s1n" 2
+  (and (eq_attr "type" "cmpdp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "s1,(s1)+s1w")
+
+(define_insn_reservation "dp2_s1n" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "s1+s1w,s1w")
+
+(define_insn_reservation "fp4_s1n" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "s1,nothing*2,s1w")
+
+(define_insn_reservation "mvilc4_s1n" 4
+  (and (eq_attr "type" "mvilc")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "(s1+s1w)")
+
+(define_insn_reservation "single_dl1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "dl")
+		 (eq_attr "dest_regfile" "a"))))
+  "(d1|(l1+l1w))")
+
+(define_insn_reservation "single_ds1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ds")
+		 (eq_attr "dest_regfile" "a"))))
+  "(d1|(s1+s1w))")
+
+(define_insn_reservation "single_ls1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "a"))))
+  "((l1+l1w)|(s1+s1w))")
+
+(define_insn_reservation "dp2_l1n" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1+l1w,l1w")
+
+(define_insn_reservation "fp4_ls1n" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "a"))))
+  "(fps1+s1,nothing*2,s1w)|(fpl1+l1,nothing*2,l1w)")
+
+(define_insn_reservation "adddp_ls1n" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "a"))))
+  "(adddps1+(s1)*2,nothing*3,s1w*2)|(adddpl1+(l1)*2,nothing*3,l1w*2)")
+
+(define_insn_reservation "single_dls1n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "dls")
+		 (eq_attr "dest_regfile" "a"))))
+  "(d1|(l1+l1w)|(s1+s1w))")
+
+(define_insn_reservation "mpy2_m1n" 2
+  (and (eq_attr "type" "mpy2")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "m1,m1w")
+
+(define_insn_reservation "mpy4_m1n" 4
+  (and (eq_attr "type" "mpy4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "m1,nothing,nothing,m1w")
+
+(define_insn_reservation "mpydp_m1n" 10
+  (and (eq_attr "type" "mpydp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "(m1)*4,nothing*4,m1w*2")
+
+(define_insn_reservation "mpyspdp_m1n" 7
+  (and (eq_attr "type" "mpyspdp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "(m1)*2,nothing*3,m1w*2")
+
+(define_insn_reservation "mpysp2dp_m1n" 5
+  (and (eq_attr "type" "mpysp2dp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "m1,nothing*2,m1w*2")
+
+;; Definitions for side 2, cross n
+
+;; Scheduling description for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Input file for gensched.sh We process this file multiple times,
+;; replacing 2 with either 1 or 2 for each of the sides of the
+;; machine, and b correspondingly with "a" or "b".  n and
+;;  are replaced with yes/no and the appropriate reservation.
+
+(define_insn_reservation "load_d2n" 5
+  (and (eq_attr "type" "load")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t2")
+
+(define_insn_reservation "store_d2n" 1
+  (and (eq_attr "type" "store")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t2")
+
+(define_insn_reservation "loadn_d2n" 5
+  (and (eq_attr "type" "loadn")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t1+t2")
+
+(define_insn_reservation "storen_d2n" 1
+  (and (eq_attr "type" "storen")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t1+t2")
+
+(define_insn_reservation "single_d2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "d")
+		 (eq_attr "dest_regfile" "b"))))
+  "d2")
+
+(define_insn_reservation "single_l2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2+l2w")
+
+(define_insn_reservation "fp4_l2n" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2,nothing*2,l2w")
+
+(define_insn_reservation "intdp_l2n" 5
+  (and (eq_attr "type" "intdp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2,nothing*2,l2w*2")
+
+(define_insn_reservation "adddp_l2n" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "(l2)*2,nothing*3,l2w*2")
+
+(define_insn_reservation "branch_s2n" 6
+  (and (eq_attr "type" "branch")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "(s2+s2w)+br1")
+
+(define_insn_reservation "call_addkpc_s2n" 6
+  (and (eq_attr "type" "call")
+       (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "n")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "b")))))
+  "(s2+s2w)+br1,s2+br0+br1")
+
+(define_insn_reservation "call_mvk_s2n" 6
+  (and (eq_attr "type" "call")
+       (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "n")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "b")))))
+  "(s2+s2w)+br1,s2,s2")
+
+(define_insn_reservation "single_s2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "(s2+s2w)")
+
+(define_insn_reservation "cmpdp_s2n" 2
+  (and (eq_attr "type" "cmpdp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "s2,(s2)+s2w")
+
+(define_insn_reservation "dp2_s2n" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "s2+s2w,s2w")
+
+(define_insn_reservation "fp4_s2n" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "s2,nothing*2,s2w")
+
+(define_insn_reservation "mvilc4_s2n" 4
+  (and (eq_attr "type" "mvilc")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "(s2+s2w)")
+
+(define_insn_reservation "single_dl2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "dl")
+		 (eq_attr "dest_regfile" "b"))))
+  "(d2|(l2+l2w))")
+
+(define_insn_reservation "single_ds2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ds")
+		 (eq_attr "dest_regfile" "b"))))
+  "(d2|(s2+s2w))")
+
+(define_insn_reservation "single_ls2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "b"))))
+  "((l2+l2w)|(s2+s2w))")
+
+(define_insn_reservation "dp2_l2n" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2+l2w,l2w")
+
+(define_insn_reservation "fp4_ls2n" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "b"))))
+  "(fps2+s2,nothing*2,s2w)|(fpl2+l2,nothing*2,l2w)")
+
+(define_insn_reservation "adddp_ls2n" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "b"))))
+  "(adddps2+(s2)*2,nothing*3,s2w*2)|(adddpl2+(l2)*2,nothing*3,l2w*2)")
+
+(define_insn_reservation "single_dls2n" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "dls")
+		 (eq_attr "dest_regfile" "b"))))
+  "(d2|(l2+l2w)|(s2+s2w))")
+
+(define_insn_reservation "mpy2_m2n" 2
+  (and (eq_attr "type" "mpy2")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "m2,m2w")
+
+(define_insn_reservation "mpy4_m2n" 4
+  (and (eq_attr "type" "mpy4")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "m2,nothing,nothing,m2w")
+
+(define_insn_reservation "mpydp_m2n" 10
+  (and (eq_attr "type" "mpydp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "(m2)*4,nothing*4,m2w*2")
+
+(define_insn_reservation "mpyspdp_m2n" 7
+  (and (eq_attr "type" "mpyspdp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "(m2)*2,nothing*3,m2w*2")
+
+(define_insn_reservation "mpysp2dp_m2n" 5
+  (and (eq_attr "type" "mpysp2dp")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "m2,nothing*2,m2w*2")
+
+;; Definitions for side 1, cross y
+
+;; Scheduling description for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Input file for gensched.sh We process this file multiple times,
+;; replacing 1 with either 1 or 2 for each of the sides of the
+;; machine, and a correspondingly with "a" or "b".  y and
+;; +x1 are replaced with yes/no and the appropriate reservation.
+
+(define_insn_reservation "load_d1y" 5
+  (and (eq_attr "type" "load")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t2")
+
+(define_insn_reservation "store_d1y" 1
+  (and (eq_attr "type" "store")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t2")
+
+(define_insn_reservation "loadn_d1y" 5
+  (and (eq_attr "type" "loadn")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t1+t2")
+
+(define_insn_reservation "storen_d1y" 1
+  (and (eq_attr "type" "storen")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "a"))))
+  "d1+t1+t2")
+
+(define_insn_reservation "single_d1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d")
+		 (eq_attr "dest_regfile" "a"))))
+  "d1+x1")
+
+(define_insn_reservation "single_l1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1+l1w+x1")
+
+(define_insn_reservation "fp4_l1y" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1+x1,nothing*2,l1w")
+
+(define_insn_reservation "intdp_l1y" 5
+  (and (eq_attr "type" "intdp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1+x1,nothing*2,l1w*2")
+
+(define_insn_reservation "adddp_l1y" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "(l1+x1)*2,nothing*3,l1w*2")
+
+(define_insn_reservation "branch_s1y" 6
+  (and (eq_attr "type" "branch")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "(s1+s1w)+x1+br1")
+
+(define_insn_reservation "call_addkpc_s1y" 6
+  (and (eq_attr "type" "call")
+       (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "y")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "a")))))
+  "(s1+s1w)+x1+br1,s2+br0+br1")
+
+(define_insn_reservation "call_mvk_s1y" 6
+  (and (eq_attr "type" "call")
+       (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "y")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "a")))))
+  "(s1+s1w)+x1+br1,s2,s2")
+
+(define_insn_reservation "single_s1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "(s1+s1w)+x1")
+
+(define_insn_reservation "cmpdp_s1y" 2
+  (and (eq_attr "type" "cmpdp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "s1+x1,(s1+x1)+s1w")
+
+(define_insn_reservation "dp2_s1y" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "s1+s1w+x1,s1w")
+
+(define_insn_reservation "fp4_s1y" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "s1+x1,nothing*2,s1w")
+
+(define_insn_reservation "mvilc4_s1y" 4
+  (and (eq_attr "type" "mvilc")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "a"))))
+  "(s1+s1w)+x1")
+
+(define_insn_reservation "single_dl1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "dl")
+		 (eq_attr "dest_regfile" "a"))))
+  "(d1|(l1+l1w))+x1")
+
+(define_insn_reservation "single_ds1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ds")
+		 (eq_attr "dest_regfile" "a"))))
+  "(d1|(s1+s1w))+x1")
+
+(define_insn_reservation "single_ls1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "a"))))
+  "((l1+l1w)|(s1+s1w))+x1")
+
+(define_insn_reservation "dp2_l1y" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "a"))))
+  "l1+l1w+x1,l1w")
+
+(define_insn_reservation "fp4_ls1y" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "a"))))
+  "(fps1+s1+x1,nothing*2,s1w)|(fpl1+l1+x1,nothing*2,l1w)")
+
+(define_insn_reservation "adddp_ls1y" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "a"))))
+  "(adddps1+(s1+x1)*2,nothing*3,s1w*2)|(adddpl1+(l1+x1)*2,nothing*3,l1w*2)")
+
+(define_insn_reservation "single_dls1y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "dls")
+		 (eq_attr "dest_regfile" "a"))))
+  "(d1|(l1+l1w)|(s1+s1w))+x1")
+
+(define_insn_reservation "mpy2_m1y" 2
+  (and (eq_attr "type" "mpy2")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "m1+x1,m1w")
+
+(define_insn_reservation "mpy4_m1y" 4
+  (and (eq_attr "type" "mpy4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "m1+x1,nothing,nothing,m1w")
+
+(define_insn_reservation "mpydp_m1y" 10
+  (and (eq_attr "type" "mpydp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "(m1+x1)*4,nothing*4,m1w*2")
+
+(define_insn_reservation "mpyspdp_m1y" 7
+  (and (eq_attr "type" "mpyspdp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "(m1+x1)*2,nothing*3,m1w*2")
+
+(define_insn_reservation "mpysp2dp_m1y" 5
+  (and (eq_attr "type" "mpysp2dp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "a"))))
+  "m1+x1,nothing*2,m1w*2")
+
+;; Definitions for side 2, cross y
+
+;; Scheduling description for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Input file for gensched.sh We process this file multiple times,
+;; replacing 2 with either 1 or 2 for each of the sides of the
+;; machine, and b correspondingly with "a" or "b".  y and
+;; +x2 are replaced with yes/no and the appropriate reservation.
+
+(define_insn_reservation "load_d2y" 5
+  (and (eq_attr "type" "load")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t1")
+
+(define_insn_reservation "store_d2y" 1
+  (and (eq_attr "type" "store")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t1")
+
+(define_insn_reservation "loadn_d2y" 5
+  (and (eq_attr "type" "loadn")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t1+t2")
+
+(define_insn_reservation "storen_d2y" 1
+  (and (eq_attr "type" "storen")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "b"))))
+  "d2+t1+t2")
+
+(define_insn_reservation "single_d2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "d")
+		 (eq_attr "dest_regfile" "b"))))
+  "d2+x2")
+
+(define_insn_reservation "single_l2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2+l2w+x2")
+
+(define_insn_reservation "fp4_l2y" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2+x2,nothing*2,l2w")
+
+(define_insn_reservation "intdp_l2y" 5
+  (and (eq_attr "type" "intdp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2+x2,nothing*2,l2w*2")
+
+(define_insn_reservation "adddp_l2y" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "(l2+x2)*2,nothing*3,l2w*2")
+
+(define_insn_reservation "branch_s2y" 6
+  (and (eq_attr "type" "branch")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "(s2+s2w)+x2+br1")
+
+(define_insn_reservation "call_addkpc_s2y" 6
+  (and (eq_attr "type" "call")
+       (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "y")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "b")))))
+  "(s2+s2w)+x2+br1,s2+br0+br1")
+
+(define_insn_reservation "call_mvk_s2y" 6
+  (and (eq_attr "type" "call")
+       (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "y")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "b")))))
+  "(s2+s2w)+x2+br1,s2,s2")
+
+(define_insn_reservation "single_s2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "(s2+s2w)+x2")
+
+(define_insn_reservation "cmpdp_s2y" 2
+  (and (eq_attr "type" "cmpdp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "s2+x2,(s2+x2)+s2w")
+
+(define_insn_reservation "dp2_s2y" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "s2+s2w+x2,s2w")
+
+(define_insn_reservation "fp4_s2y" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "s2+x2,nothing*2,s2w")
+
+(define_insn_reservation "mvilc4_s2y" 4
+  (and (eq_attr "type" "mvilc")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "b"))))
+  "(s2+s2w)+x2")
+
+(define_insn_reservation "single_dl2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "dl")
+		 (eq_attr "dest_regfile" "b"))))
+  "(d2|(l2+l2w))+x2")
+
+(define_insn_reservation "single_ds2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ds")
+		 (eq_attr "dest_regfile" "b"))))
+  "(d2|(s2+s2w))+x2")
+
+(define_insn_reservation "single_ls2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "b"))))
+  "((l2+l2w)|(s2+s2w))+x2")
+
+(define_insn_reservation "dp2_l2y" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "b"))))
+  "l2+l2w+x2,l2w")
+
+(define_insn_reservation "fp4_ls2y" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "b"))))
+  "(fps2+s2+x2,nothing*2,s2w)|(fpl2+l2+x2,nothing*2,l2w)")
+
+(define_insn_reservation "adddp_ls2y" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "b"))))
+  "(adddps2+(s2+x2)*2,nothing*3,s2w*2)|(adddpl2+(l2+x2)*2,nothing*3,l2w*2)")
+
+(define_insn_reservation "single_dls2y" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "dls")
+		 (eq_attr "dest_regfile" "b"))))
+  "(d2|(l2+l2w)|(s2+s2w))+x2")
+
+(define_insn_reservation "mpy2_m2y" 2
+  (and (eq_attr "type" "mpy2")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "m2+x2,m2w")
+
+(define_insn_reservation "mpy4_m2y" 4
+  (and (eq_attr "type" "mpy4")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "m2+x2,nothing,nothing,m2w")
+
+(define_insn_reservation "mpydp_m2y" 10
+  (and (eq_attr "type" "mpydp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "(m2+x2)*4,nothing*4,m2w*2")
+
+(define_insn_reservation "mpyspdp_m2y" 7
+  (and (eq_attr "type" "mpyspdp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "(m2+x2)*2,nothing*3,m2w*2")
+
+(define_insn_reservation "mpysp2dp_m2y" 5
+  (and (eq_attr "type" "mpysp2dp")
+       (and (eq_attr "cross" "y")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "b"))))
+  "m2+x2,nothing*2,m2w*2")
diff --git a/gcc-4.9/gcc/config/c6x/c6x-sched.md.in b/gcc-4.9/gcc/config/c6x/c6x-sched.md.in
new file mode 100644
index 000000000..2a98dddac
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-sched.md.in
@@ -0,0 +1,230 @@
+;; Scheduling description for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Input file for gensched.sh We process this file multiple times,
+;; replacing _N_ with either 1 or 2 for each of the sides of the
+;; machine, and _RF_ correspondingly with "a" or "b".  _CROSS_ and
+;; _CUNIT_ are replaced with yes/no and the appropriate reservation.
+
+(define_insn_reservation "load_d_N__CROSS_" 5
+  (and (eq_attr "type" "load")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "_RF_"))))
+  "d_N_+t_NX_")
+
+(define_insn_reservation "store_d_N__CROSS_" 1
+  (and (eq_attr "type" "store")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "_RF_"))))
+  "d_N_+t_NX_")
+
+(define_insn_reservation "loadn_d_N__CROSS_" 5
+  (and (eq_attr "type" "loadn")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "_RF_"))))
+  "d_N_+t1+t2")
+
+(define_insn_reservation "storen_d_N__CROSS_" 1
+  (and (eq_attr "type" "storen")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "d_addr")
+		 (eq_attr "addr_regfile" "_RF_"))))
+  "d_N_+t1+t2")
+
+(define_insn_reservation "single_d_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "d")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "d_N__CUNIT_")
+
+(define_insn_reservation "single_l_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "l_N_+l_N_w_CUNIT_")
+
+(define_insn_reservation "fp4_l_N__CROSS_" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "l_N__CUNIT_,nothing*2,l_N_w")
+
+(define_insn_reservation "intdp_l_N__CROSS_" 5
+  (and (eq_attr "type" "intdp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "l_N__CUNIT_,nothing*2,l_N_w*2")
+
+(define_insn_reservation "adddp_l_N__CROSS_" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(l_N__CUNIT_)*2,nothing*3,l_N_w*2")
+
+(define_insn_reservation "branch_s_N__CROSS_" 6
+  (and (eq_attr "type" "branch")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(s_N_+s_N_w)_CUNIT_+br1")
+
+(define_insn_reservation "call_addkpc_s_N__CROSS_" 6
+  (and (eq_attr "type" "call")
+       (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "_CROSS_")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "_RF_")))))
+  "(s_N_+s_N_w)_CUNIT_+br1,s2+br0+br1")
+
+(define_insn_reservation "call_mvk_s_N__CROSS_" 6
+  (and (eq_attr "type" "call")
+       (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "_CROSS_")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "_RF_")))))
+  "(s_N_+s_N_w)_CUNIT_+br1,s2,s2")
+
+(define_insn_reservation "single_s_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(s_N_+s_N_w)_CUNIT_")
+
+(define_insn_reservation "cmpdp_s_N__CROSS_" 2
+  (and (eq_attr "type" "cmpdp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "s_N__CUNIT_,(s_N__CUNIT_)+s_N_w")
+
+(define_insn_reservation "dp2_s_N__CROSS_" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "s_N_+s_N_w_CUNIT_,s_N_w")
+
+(define_insn_reservation "fp4_s_N__CROSS_" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "s_N__CUNIT_,nothing*2,s_N_w")
+
+(define_insn_reservation "mvilc4_s_N__CROSS_" 4
+  (and (eq_attr "type" "mvilc")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(s_N_+s_N_w)_CUNIT_")
+
+(define_insn_reservation "single_dl_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "dl")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(d_N_|(l_N_+l_N_w))_CUNIT_")
+
+(define_insn_reservation "single_ds_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "ds")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(d_N_|(s_N_+s_N_w))_CUNIT_")
+
+(define_insn_reservation "single_ls_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "((l_N_+l_N_w)|(s_N_+s_N_w))_CUNIT_")
+
+(define_insn_reservation "dp2_l_N__CROSS_" 2
+  (and (eq_attr "type" "dp2")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "l")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "l_N_+l_N_w_CUNIT_,l_N_w")
+
+(define_insn_reservation "fp4_ls_N__CROSS_" 4
+  (and (eq_attr "type" "fp4")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(fps_N_+s_N__CUNIT_,nothing*2,s_N_w)|(fpl_N_+l_N__CUNIT_,nothing*2,l_N_w)")
+
+(define_insn_reservation "adddp_ls_N__CROSS_" 7
+  (and (eq_attr "type" "adddp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "ls")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(adddps_N_+(s_N__CUNIT_)*2,nothing*3,s_N_w*2)|(adddpl_N_+(l_N__CUNIT_)*2,nothing*3,l_N_w*2)")
+
+(define_insn_reservation "single_dls_N__CROSS_" 1
+  (and (eq_attr "type" "single")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "dls")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(d_N_|(l_N_+l_N_w)|(s_N_+s_N_w))_CUNIT_")
+
+(define_insn_reservation "mpy2_m_N__CROSS_" 2
+  (and (eq_attr "type" "mpy2")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "m_N__CUNIT_,m_N_w")
+
+(define_insn_reservation "mpy4_m_N__CROSS_" 4
+  (and (eq_attr "type" "mpy4")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "m_N__CUNIT_,nothing,nothing,m_N_w")
+
+(define_insn_reservation "mpydp_m_N__CROSS_" 10
+  (and (eq_attr "type" "mpydp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(m_N__CUNIT_)*4,nothing*4,m_N_w*2")
+
+(define_insn_reservation "mpyspdp_m_N__CROSS_" 7
+  (and (eq_attr "type" "mpyspdp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "(m_N__CUNIT_)*2,nothing*3,m_N_w*2")
+
+(define_insn_reservation "mpysp2dp_m_N__CROSS_" 5
+  (and (eq_attr "type" "mpysp2dp")
+       (and (eq_attr "cross" "_CROSS_")
+	    (and (eq_attr "units" "m")
+		 (eq_attr "dest_regfile" "_RF_"))))
+  "m_N__CUNIT_,nothing*2,m_N_w*2")
diff --git a/gcc-4.9/gcc/config/c6x/c6x-tables.opt b/gcc-4.9/gcc/config/c6x/c6x-tables.opt
new file mode 100644
index 000000000..a4eb62fab
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x-tables.opt
@@ -0,0 +1,43 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from c6x-isas.def.
+;
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(c6x_isa) Type(int)
+Known C6X ISAs (for use with the -march= option):
+
+EnumValue
+Enum(c6x_isa) String(c62x) Value(0)
+
+EnumValue
+Enum(c6x_isa) String(c64x) Value(1)
+
+EnumValue
+Enum(c6x_isa) String(c64x+) Value(2)
+
+EnumValue
+Enum(c6x_isa) String(c67x) Value(3)
+
+EnumValue
+Enum(c6x_isa) String(c67x+) Value(4)
+
+EnumValue
+Enum(c6x_isa) String(c674x) Value(5)
+
diff --git a/gcc-4.9/gcc/config/c6x/c6x.c b/gcc-4.9/gcc/config/c6x/c6x.c
new file mode 100644
index 000000000..9ba10df73
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x.c
@@ -0,0 +1,6846 @@
+/* Target Code for TI C6X
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Andrew Jenner <andrew@codesourcery.com>
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "expr.h"
+#include "regs.h"
+#include "optabs.h"
+#include "recog.h"
+#include "ggc.h"
+#include "sched-int.h"
+#include "timevar.h"
+#include "tm_p.h"
+#include "tm-preds.h"
+#include "tm-constrs.h"
+#include "df.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "cgraph.h"
+#include "langhooks.h"
+#include "target.h"
+#include "target-def.h"
+#include "sel-sched.h"
+#include "debug.h"
+#include "opts.h"
+#include "hw-doloop.h"
+#include "regrename.h"
+#include "dumpfile.h"
+#include "gimple-expr.h"
+
+/* Table of supported architecture variants.  */
+typedef struct
+{
+  const char *arch;
+  enum c6x_cpu_type type;
+  unsigned short features;
+} c6x_arch_table;
+
+/* A list of all ISAs, mapping each one to a representative device.
+   Used for -march selection.  */
+static const c6x_arch_table all_isas[] =
+{
+#define C6X_ISA(NAME,DEVICE,FLAGS) \
+  { NAME, DEVICE, FLAGS },
+#include "c6x-isas.def"
+#undef C6X_ISA
+  { NULL, C6X_CPU_C62X, 0 }
+};
+
+/* This is the parsed result of the "-march=" option, if given.  */
+enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
+
+/* A mask of insn types that are allowed by the architecture selected by
+   the -march option.  */
+unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
+
+/* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
+ */
+static rtx c6x_current_insn = NULL_RTX;
+
+/* A decl we build to access __c6xabi_DSBT_base.  */
+static GTY(()) tree dsbt_decl;
+
+/* Determines whether we run our final scheduling pass or not.  We always
+   avoid the normal second scheduling pass.  */
+static int c6x_flag_schedule_insns2;
+
+/* Determines whether we run variable tracking in machine dependent
+   reorganization.  */
+static int c6x_flag_var_tracking;
+
+/* Determines whether we use modulo scheduling.  */
+static int c6x_flag_modulo_sched;
+
+/* Record the state of flag_pic before we set it to 1 for DSBT.  */
+int c6x_initial_flag_pic;
+
+typedef struct
+{
+  /* We record the clock cycle for every insn during scheduling.  */
+  int clock;
+  /* After scheduling, we run assign_reservations to choose unit
+     reservations for all insns.  These are recorded here.  */
+  int reservation;
+  /* Records the new condition for insns which must be made
+     conditional after scheduling.  An entry of NULL_RTX means no such
+     change is necessary.  */
+  rtx new_cond;
+  /* True for the first insn that was scheduled in an ebb.  */
+  bool ebb_start;
+  /* The scheduler state after the insn, transformed into a mask of UNIT_QID
+     bits rather than storing the state.  Meaningful only for the last
+     insn in a cycle.  */
+  unsigned int unit_mask;
+} c6x_sched_insn_info;
+
+
+/* Record a c6x_sched_insn_info structure for every insn in the function.  */
+static vec<c6x_sched_insn_info> insn_info;
+
+#define INSN_INFO_LENGTH (insn_info).length ()
+#define INSN_INFO_ENTRY(N) (insn_info[(N)])
+
+static bool done_cfi_sections;
+
+#define RESERVATION_FLAG_D 1
+#define RESERVATION_FLAG_L 2
+#define RESERVATION_FLAG_S 4
+#define RESERVATION_FLAG_M 8
+#define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
+#define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
+#define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
+#define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
+
+/* The DFA names of the units.  */
+static const char *const c6x_unit_names[] =
+{
+  "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
+  "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
+};
+
+/* The DFA unit number for each unit in c6x_unit_names[].  */
+static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
+
+/* Unit query IDs.  */
+#define UNIT_QID_D1 0
+#define UNIT_QID_L1 1
+#define UNIT_QID_S1 2
+#define UNIT_QID_M1 3
+#define UNIT_QID_FPS1 4
+#define UNIT_QID_FPL1 5
+#define UNIT_QID_ADDDPS1 6
+#define UNIT_QID_ADDDPL1 7
+#define UNIT_QID_SIDE_OFFSET 8
+
+#define RESERVATION_S1 2
+#define RESERVATION_S2 10
+
+/* An enum for the unit requirements we count in the UNIT_REQS table.  */
+enum unitreqs
+{
+  UNIT_REQ_D,
+  UNIT_REQ_L,
+  UNIT_REQ_S,
+  UNIT_REQ_M,
+  UNIT_REQ_DL,
+  UNIT_REQ_DS,
+  UNIT_REQ_LS,
+  UNIT_REQ_DLS,
+  UNIT_REQ_T,
+  UNIT_REQ_X,
+  UNIT_REQ_MAX
+};
+
+/* A table used to count unit requirements.  Used when computing minimum
+   iteration intervals.  */
+typedef int unit_req_table[2][UNIT_REQ_MAX];
+static unit_req_table unit_reqs;
+
+/* Register map for debugging.  */
+unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,	/* A0 - A15.  */
+  37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,	/* A16 - A32.  */
+  50, 51, 52,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,	/* B0 - B15.  */
+  29, 30, 31,
+  53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,	/* B16 - B32.  */
+  66, 67, 68,
+  -1, -1, -1						/* FP, ARGP, ILC.  */
+};
+
+/* Allocate a new, cleared machine_function structure.  */
+
+static struct machine_function *
+c6x_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+c6x_option_override (void)
+{
+  unsigned i;
+
+  if (global_options_set.x_c6x_arch_option)
+    {
+      c6x_arch = all_isas[c6x_arch_option].type;
+      c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
+      c6x_insn_mask |= all_isas[c6x_arch_option].features;
+    }
+
+  c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
+  flag_schedule_insns_after_reload = 0;
+
+  c6x_flag_modulo_sched = flag_modulo_sched;
+  flag_modulo_sched = 0;
+
+  init_machine_status = c6x_init_machine_status;
+
+  for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
+    c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
+
+  if (flag_pic && !TARGET_DSBT)
+    {
+      error ("-fpic and -fPIC not supported without -mdsbt on this target");
+      flag_pic = 0;
+    }
+  c6x_initial_flag_pic = flag_pic;
+  if (TARGET_DSBT && !flag_pic)
+    flag_pic = 1;
+}
+
+
+/* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook.  */
+
+static void
+c6x_conditional_register_usage (void)
+{
+  int i;
+  if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
+    for (i = 16; i < 32; i++)
+      {
+	fixed_regs[i] = 1;
+	fixed_regs[32 + i] = 1;
+      }
+  if (TARGET_INSNS_64)
+    {
+      SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
+			REG_A0);
+      SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
+			REG_A0);
+      CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
+			  REG_A0);
+      CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
+			  REG_A0);
+    }
+}
+
+static GTY(()) rtx eqdf_libfunc;
+static GTY(()) rtx nedf_libfunc;
+static GTY(()) rtx ledf_libfunc;
+static GTY(()) rtx ltdf_libfunc;
+static GTY(()) rtx gedf_libfunc;
+static GTY(()) rtx gtdf_libfunc;
+static GTY(()) rtx eqsf_libfunc;
+static GTY(()) rtx nesf_libfunc;
+static GTY(()) rtx lesf_libfunc;
+static GTY(()) rtx ltsf_libfunc;
+static GTY(()) rtx gesf_libfunc;
+static GTY(()) rtx gtsf_libfunc;
+static GTY(()) rtx strasgi_libfunc;
+static GTY(()) rtx strasgi64p_libfunc;
+
+/* Implement the TARGET_INIT_LIBFUNCS macro.  We use this to rename library
+   functions to match the C6x ABI.  */
+
+static void
+c6x_init_libfuncs (void)
+{
+  /* Double-precision floating-point arithmetic.  */
+  set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
+  set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
+  set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
+  set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
+  set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
+
+  /* Single-precision floating-point arithmetic.  */
+  set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
+  set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
+  set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
+  set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
+  set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
+
+  /* Floating-point comparisons.  */
+  eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
+  nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
+  lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
+  ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
+  gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
+  gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
+  eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
+  nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
+  ledf_libfunc = init_one_libfunc ("__c6xabi_led");
+  ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
+  gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
+  gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
+
+  set_optab_libfunc (eq_optab, SFmode, NULL);
+  set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
+  set_optab_libfunc (gt_optab, SFmode, NULL);
+  set_optab_libfunc (ge_optab, SFmode, NULL);
+  set_optab_libfunc (lt_optab, SFmode, NULL);
+  set_optab_libfunc (le_optab, SFmode, NULL);
+  set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
+  set_optab_libfunc (eq_optab, DFmode, NULL);
+  set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
+  set_optab_libfunc (gt_optab, DFmode, NULL);
+  set_optab_libfunc (ge_optab, DFmode, NULL);
+  set_optab_libfunc (lt_optab, DFmode, NULL);
+  set_optab_libfunc (le_optab, DFmode, NULL);
+  set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
+
+  /* Floating-point to integer conversions.  */
+  set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
+  set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
+  set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
+  set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
+  set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
+  set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
+  set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
+  set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
+
+  /* Conversions between floating types.  */
+  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
+  set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
+
+  /* Integer to floating-point conversions.  */
+  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
+  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
+  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
+  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
+  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
+  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
+  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
+  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
+
+  /* Long long.  */
+  set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
+  set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
+  set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
+  set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
+
+  set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
+  set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
+  set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
+  set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
+  set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
+  set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
+  set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
+  set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
+  set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
+  set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
+  set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
+
+  /* Block move.  */
+  strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
+  strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
+}
+
+/* Begin the assembly file.  */
+
+static void
+c6x_file_start (void)
+{
+  /* Variable tracking should be run after all optimizations which change order
+     of insns.  It also needs a valid CFG.  This can't be done in
+     c6x_override_options, because flag_var_tracking is finalized after
+     that.  */
+  c6x_flag_var_tracking = flag_var_tracking;
+  flag_var_tracking = 0;
+
+  done_cfi_sections = false;
+  default_file_start ();
+
+  /* Arrays are aligned to 8-byte boundaries.  */
+  asm_fprintf (asm_out_file,
+	       "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
+  asm_fprintf (asm_out_file,
+	       "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
+
+  /* Stack alignment is 8 bytes.  */
+  asm_fprintf (asm_out_file,
+	       "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
+  asm_fprintf (asm_out_file,
+	       "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
+
+#if 0 /* FIXME: Reenable when TI's tools are fixed.  */
+  /* ??? Ideally we'd check flag_short_wchar somehow.  */
+  asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
+#endif
+
+  /* We conform to version 1.0 of the ABI.  */
+  asm_fprintf (asm_out_file,
+	       "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
+
+}
+
+/* The LTO frontend only enables exceptions when it sees a function that
+   uses it.  This changes the return value of dwarf2out_do_frame, so we
+   have to check before every function.  */
+
+void
+c6x_output_file_unwind (FILE * f)
+{
+  if (done_cfi_sections)
+    return;
+
+  /* Output a .cfi_sections directive.  */
+  if (dwarf2out_do_frame ())
+    {
+      if (flag_unwind_tables || flag_exceptions)
+	{
+	  if (write_symbols == DWARF2_DEBUG
+	      || write_symbols == VMS_AND_DWARF2_DEBUG)
+	    asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
+	  else
+	    asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
+	}
+      else
+	asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
+      done_cfi_sections = true;
+    }
+}
+
+/* Output unwind directives at the end of a function.  */
+
+static void
+c6x_output_fn_unwind (FILE * f)
+{
+  /* Return immediately if we are not generating unwinding tables.  */
+  if (! (flag_unwind_tables || flag_exceptions))
+    return;
+
+  /* If this function will never be unwound, then mark it as such.  */
+  if (!(flag_unwind_tables || crtl->uses_eh_lsda)
+      && (TREE_NOTHROW (current_function_decl)
+	  || crtl->all_throwers_are_sibcalls))
+    fputs("\t.cantunwind\n", f);
+
+  fputs ("\t.endp\n", f);
+}
+
+
+/* Stack and Calling.  */
+
+int argument_registers[10] =
+{
+  REG_A4, REG_B4,
+  REG_A6, REG_B6,
+  REG_A8, REG_B8,
+  REG_A10, REG_B10,
+  REG_A12, REG_B12
+};
+
+/* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h.  */
+
+void
+c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
+			  int n_named_args ATTRIBUTE_UNUSED)
+{
+  cum->count = 0;
+  cum->nregs = 10;
+  if (!libname && fntype)
+    {
+      /* We need to find out the number of named arguments.  Unfortunately,
+	 for incoming arguments, N_NAMED_ARGS is set to -1.  */
+      if (stdarg_p (fntype))
+	cum->nregs = type_num_arguments (fntype) - 1;
+      if (cum->nregs > 10)
+	cum->nregs = 10;
+    }
+}
+
+/* Implements the macro FUNCTION_ARG defined in c6x.h.  */
+
+static rtx
+c6x_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  if (cum->count >= cum->nregs)
+    return NULL_RTX;
+  if (type)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
+	{
+	  if (size > 4)
+	    {
+	      rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
+	      rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
+	      rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
+				     gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
+	      return gen_rtx_PARALLEL (mode, vec);
+	    }
+	}
+    }
+  return gen_rtx_REG (mode, argument_registers[cum->count]);
+}
+
+static void
+c6x_function_arg_advance (cumulative_args_t cum_v,
+			  enum machine_mode mode ATTRIBUTE_UNUSED,
+			  const_tree type ATTRIBUTE_UNUSED,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  cum->count++;
+}
+
+
+/* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
+   upward rather than downward.  */
+
+bool
+c6x_block_reg_pad_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  const_tree type, bool first)
+{
+  HOST_WIDE_INT size;
+
+  if (!TARGET_BIG_ENDIAN)
+    return true;
+  if (!first)
+    return true;
+  if (!type)
+    return true;
+  size = int_size_in_bytes (type);
+  return size == 3;
+}
+
+/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
+
+static unsigned int
+c6x_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
+
+  if (boundary > BITS_PER_WORD)
+    return 2 * BITS_PER_WORD;
+
+  if (mode == BLKmode)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size > 4)
+	return 2 * BITS_PER_WORD;
+      if (boundary < BITS_PER_WORD)
+	{
+	  if (size >= 3)
+	    return BITS_PER_WORD;
+	  if (size >= 2)
+	    return 2 * BITS_PER_UNIT;
+	}
+    }
+  return boundary;
+}
+
+/* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY.  */
+static unsigned int
+c6x_function_arg_round_boundary (enum machine_mode mode, const_tree type)
+{
+  return c6x_function_arg_boundary (mode, type);
+}
+
+/* TARGET_FUNCTION_VALUE implementation.  Returns an RTX representing the place
+   where function FUNC returns or receives a value of data type TYPE.  */
+
+static rtx
+c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  /* Functions return values in register A4.  When returning aggregates, we may
+     have to adjust for endianness.  */
+  if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size > 4)
+	{
+
+	  rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
+	  rtx reg2 = gen_rtx_REG (SImode, REG_A4);
+	  rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
+				 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
+	  return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
+	}
+    }
+  return gen_rtx_REG (TYPE_MODE (type), REG_A4);
+}
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+
+static rtx
+c6x_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, REG_A4);
+}
+
+/* TARGET_STRUCT_VALUE_RTX implementation.  */
+
+static rtx
+c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, REG_A3);
+}
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+c6x_function_value_regno_p (const unsigned int regno)
+{
+  return regno == REG_A4;
+}
+
+/* Types larger than 64 bit, and variable sized types, are passed by
+   reference.  The callee must copy them; see c6x_callee_copies.  */
+
+static bool
+c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
+		       enum machine_mode mode, const_tree type,
+		       bool named ATTRIBUTE_UNUSED)
+{
+  int size = -1;
+  if (type)
+    size = int_size_in_bytes (type);
+  else if (mode != VOIDmode)
+    size = GET_MODE_SIZE (mode);
+  return size > 2 * UNITS_PER_WORD || size == -1;
+}
+
+/* Decide whether a type should be returned in memory (true)
+   or in a register (false).  This is called by the macro
+   TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  int size = int_size_in_bytes (type);
+  return size > 2 * UNITS_PER_WORD || size == -1;
+}
+
+/* Values which must be returned in the most-significant end of the return
+   register.  */
+
+static bool
+c6x_return_in_msb (const_tree valtype)
+{
+  HOST_WIDE_INT size = int_size_in_bytes (valtype);
+  return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
+}
+
+/* Implement TARGET_CALLEE_COPIES.  */
+
+static bool
+c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Return the type to use as __builtin_va_list.  */
+static tree
+c6x_build_builtin_va_list (void)
+{
+  return build_pointer_type (char_type_node);
+}
+
+static void
+c6x_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
+  fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
+  fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
+  fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
+  fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
+  fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
+  fprintf (f, "\t.long\t0x00000000\n"); /* nop */
+  fprintf (f, "\t.long\t0x00000000\n"); /* nop */
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline at
+   TRAMP. FNADDR is an RTX for the address of the function's pure
+   code.  CXT is an RTX for the static chain value for the function.  */
+
+static void
+c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx t1 = copy_to_reg (fnaddr);
+  rtx t2 = copy_to_reg (cxt);
+  rtx mask = gen_reg_rtx (SImode);
+  int i;
+
+  emit_block_move (tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  emit_move_insn (mask, GEN_INT (0xffff << 7));
+
+  for (i = 0; i < 4; i++)
+    {
+      rtx mem = adjust_address (tramp, SImode, i * 4);
+      rtx t = (i & 1) ? t2 : t1;
+      rtx v1 = gen_reg_rtx (SImode);
+      rtx v2 = gen_reg_rtx (SImode);
+      emit_move_insn (v1, mem);
+      if (i < 2)
+	emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
+      else
+	emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
+      emit_insn (gen_andsi3 (v2, v2, mask));
+      emit_insn (gen_iorsi3 (v2, v2, v1));
+      emit_move_insn (mem, v2);
+    }
+#ifdef CLEAR_INSN_CACHE
+  tramp = XEXP (tramp, 0);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
+		     LCT_NORMAL, VOIDmode, 2, tramp, Pmode,
+		     plus_constant (Pmode, tramp, TRAMPOLINE_SIZE),
+		     Pmode);
+#endif
+}
+
+/* Determine whether c6x_output_mi_thunk can succeed.  */
+
+static bool
+c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			 const_tree function ATTRIBUTE_UNUSED)
+{
+  return !TARGET_LONG_CALLS;
+}
+
+/* Output the assembler code for a thunk function.  THUNK is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset, tree function)
+{
+  rtx xops[5];
+  /* The this parameter is passed as the first argument.  */
+  rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
+
+  c6x_current_insn = NULL_RTX;
+
+  xops[4] = XEXP (DECL_RTL (function), 0);
+  if (!vcall_offset)
+    {
+      output_asm_insn ("b .s2 \t%4", xops);
+      if (!delta)
+	output_asm_insn ("nop 5", xops);
+    }
+
+  /* Adjust the this parameter by a fixed constant.  */
+  if (delta)
+    {
+      xops[0] = GEN_INT (delta);
+      xops[1] = this_rtx;
+      if (delta >= -16 && delta <= 15)
+	{
+	  output_asm_insn ("add .s1 %0, %1, %1", xops);
+	  if (!vcall_offset)
+	    output_asm_insn ("nop 4", xops);
+	}
+      else if (delta >= 16 && delta < 32)
+	{
+	  output_asm_insn ("add .d1 %0, %1, %1", xops);
+	  if (!vcall_offset)
+	    output_asm_insn ("nop 4", xops);
+	}
+      else if (delta >= -32768 && delta < 32768)
+	{
+	  output_asm_insn ("mvk .s1 %0, A0", xops);
+	  output_asm_insn ("add .d1 %1, A0, %1", xops);
+	  if (!vcall_offset)
+	    output_asm_insn ("nop 3", xops);
+	}
+      else
+	{
+	  output_asm_insn ("mvkl .s1 %0, A0", xops);
+	  output_asm_insn ("mvkh .s1 %0, A0", xops);
+	  output_asm_insn ("add .d1 %1, A0, %1", xops);
+	  if (!vcall_offset)
+	    output_asm_insn ("nop 3", xops);
+	}
+    }
+
+  /* Adjust the this parameter by a value stored in the vtable.  */
+  if (vcall_offset)
+    {
+      rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
+      rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
+
+      xops[1] = a3tmp;
+      xops[2] = a0tmp;
+      xops[3] = gen_rtx_MEM (Pmode, a0tmp);
+      output_asm_insn ("mv .s1 a4, %2", xops);
+      output_asm_insn ("ldw .d1t1 %3, %2", xops);
+
+      /* Adjust the this parameter.  */
+      xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
+						   vcall_offset));
+      if (!memory_operand (xops[0], Pmode))
+	{
+	  rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
+	  xops[0] = GEN_INT (vcall_offset);
+	  xops[1] = tmp2;
+	  output_asm_insn ("mvkl .s1 %0, %1", xops);
+	  output_asm_insn ("mvkh .s1 %0, %1", xops);
+	  output_asm_insn ("nop 2", xops);
+	  output_asm_insn ("add .d1 %2, %1, %2", xops);
+	  xops[0] = gen_rtx_MEM (Pmode, a0tmp);
+	}
+      else
+	output_asm_insn ("nop 4", xops);
+      xops[2] = this_rtx;
+      output_asm_insn ("ldw .d1t1 %0, %1", xops);
+      output_asm_insn ("|| b .s2 \t%4", xops);
+      output_asm_insn ("nop 4", xops);
+      output_asm_insn ("add .d1 %2, %1, %2", xops);
+    }
+}
+
+/* Return true if EXP goes in small data/bss.  */
+
+static bool
+c6x_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never small data.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+
+      if (strcmp (section, ".neardata") == 0
+	  || strncmp (section, ".neardata.", 10) == 0
+	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
+	  || strcmp (section, ".bss") == 0
+	  || strncmp (section, ".bss.", 5) == 0
+	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
+	  || strcmp (section, ".rodata") == 0
+	  || strncmp (section, ".rodata.", 8) == 0
+	  || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
+	return true;
+    }
+  else
+    return PLACE_IN_SDATA_P (exp);
+
+  return false;
+}
+
+/* Return a section for X.  The only special thing we do here is to
+   honor small data.  We don't have a tree type, so we can't use the
+   PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
+   everything sized 8 bytes or smaller into small data.  */
+
+static section *
+c6x_select_rtx_section (enum machine_mode mode, rtx x,
+			unsigned HOST_WIDE_INT align)
+{
+  if (c6x_sdata_mode == C6X_SDATA_ALL
+      || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
+    /* ??? Consider using mergeable sdata sections.  */
+    return sdata_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+static section *
+c6x_elf_select_section (tree decl, int reloc,
+			unsigned HOST_WIDE_INT align)
+{
+  const char *sname = NULL;
+  unsigned int flags = SECTION_WRITE;
+  if (c6x_in_small_data_p (decl))
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_SRODATA:
+	  sname = ".rodata";
+	  flags = 0;
+	  break;
+	case SECCAT_SDATA:
+	  sname = ".neardata";
+	  break;
+	case SECCAT_SBSS:
+	  sname = ".bss";
+	  flags |= SECTION_BSS;
+	default:
+	  break;
+	}
+    }
+  else
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_DATA:
+	  sname = ".fardata";
+	  break;
+	case SECCAT_DATA_REL:
+	  sname = ".fardata.rel";
+	  break;
+	case SECCAT_DATA_REL_LOCAL:
+	  sname = ".fardata.rel.local";
+	  break;
+	case SECCAT_DATA_REL_RO:
+	  sname = ".fardata.rel.ro";
+	  break;
+	case SECCAT_DATA_REL_RO_LOCAL:
+	  sname = ".fardata.rel.ro.local";
+	  break;
+	case SECCAT_BSS:
+	  sname = ".far";
+	  flags |= SECTION_BSS;
+	  break;
+	case SECCAT_RODATA:
+	  sname = ".const";
+	  flags = 0;
+	  break;
+	case SECCAT_SRODATA:
+	case SECCAT_SDATA:
+	case SECCAT_SBSS:
+	  gcc_unreachable ();
+	default:
+	  break;
+	}
+    }
+  if (sname)
+    {
+      /* We might get called with string constants, but get_named_section
+	 doesn't like them as they are not DECLs.  Also, we need to set
+	 flags in that case.  */
+      if (!DECL_P (decl))
+	return get_section (sname, flags, NULL);
+      return get_named_section (decl, sname, reloc);
+    }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Build up a unique section name, expressed as a
+   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+   RELOC indicates whether the initial value of EXP requires
+   link-time relocations.  */
+
+static void ATTRIBUTE_UNUSED
+c6x_elf_unique_section (tree decl, int reloc)
+{
+  const char *prefix = NULL;
+  /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
+  bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
+
+  if (c6x_in_small_data_p (decl))
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_SDATA:
+          prefix = one_only ? ".s" : ".neardata";
+	  break;
+	case SECCAT_SBSS:
+          prefix = one_only ? ".sb" : ".bss";
+	  break;
+	case SECCAT_SRODATA:
+          prefix = one_only ? ".s2" : ".rodata";
+	  break;
+	case SECCAT_RODATA_MERGE_STR:
+	case SECCAT_RODATA_MERGE_STR_INIT:
+	case SECCAT_RODATA_MERGE_CONST:
+	case SECCAT_RODATA:
+	case SECCAT_DATA:
+	case SECCAT_DATA_REL:
+	case SECCAT_DATA_REL_LOCAL:
+	case SECCAT_DATA_REL_RO:
+	case SECCAT_DATA_REL_RO_LOCAL:
+	  gcc_unreachable ();
+	default:
+	  /* Everything else we place into default sections and hope for the
+	     best.  */
+	  break;
+	}
+    }
+  else
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_DATA:
+	case SECCAT_DATA_REL:
+	case SECCAT_DATA_REL_LOCAL:
+	case SECCAT_DATA_REL_RO:
+	case SECCAT_DATA_REL_RO_LOCAL:
+          prefix = one_only ? ".fd" : ".fardata";
+	  break;
+	case SECCAT_BSS:
+          prefix = one_only ? ".fb" : ".far";
+	  break;
+	case SECCAT_RODATA:
+	case SECCAT_RODATA_MERGE_STR:
+	case SECCAT_RODATA_MERGE_STR_INIT:
+	case SECCAT_RODATA_MERGE_CONST:
+          prefix = one_only ? ".fr" : ".const";
+	  break;
+	case SECCAT_SRODATA:
+	case SECCAT_SDATA:
+	case SECCAT_SBSS:
+	  gcc_unreachable ();
+	default:
+	  break;
+	}
+    }
+
+  if (prefix)
+    {
+      const char *name, *linkonce;
+      char *string;
+
+      name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+      name = targetm.strip_name_encoding (name);
+
+      /* If we're using one_only, then there needs to be a .gnu.linkonce
+	 prefix to the section name.  */
+      linkonce = one_only ? ".gnu.linkonce" : "";
+
+      string = ACONCAT ((linkonce, prefix, ".", name, NULL));
+
+      DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
+      return;
+    }
+  default_unique_section (decl, reloc);
+}
+
+static unsigned int
+c6x_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = 0;
+
+  if (strcmp (name, ".far") == 0
+      || strncmp (name, ".far.", 5) == 0)
+    flags |= SECTION_BSS;
+
+  flags |= default_section_type_flags (decl, name, reloc);
+
+  return flags;
+}
+
+/* Checks whether the given CALL_EXPR would use a caller saved
+   register.  This is used to decide whether sibling call optimization
+   could be performed on the respective function call.  */
+
+static bool
+c6x_call_saved_register_used (tree call_expr)
+{
+  CUMULATIVE_ARGS cum_v;
+  cumulative_args_t cum;
+  HARD_REG_SET call_saved_regset;
+  tree parameter;
+  enum machine_mode mode;
+  tree type;
+  rtx parm_rtx;
+  int i;
+
+  INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
+  cum = pack_cumulative_args (&cum_v);
+
+  COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
+  for (i = 0; i < call_expr_nargs (call_expr); i++)
+    {
+      parameter = CALL_EXPR_ARG (call_expr, i);
+      gcc_assert (parameter);
+
+      /* For an undeclared variable passed as parameter we will get
+	 an ERROR_MARK node here.  */
+      if (TREE_CODE (parameter) == ERROR_MARK)
+	return true;
+
+      type = TREE_TYPE (parameter);
+      gcc_assert (type);
+
+      mode = TYPE_MODE (type);
+      gcc_assert (mode);
+
+      if (pass_by_reference (&cum_v, mode, type, true))
+ 	{
+ 	  mode = Pmode;
+ 	  type = build_pointer_type (type);
+ 	}
+
+       parm_rtx = c6x_function_arg (cum, mode, type, 0);
+
+       c6x_function_arg_advance (cum, mode, type, 0);
+
+       if (!parm_rtx)
+	 continue;
+
+       if (REG_P (parm_rtx)
+	   && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
+				       REGNO (parm_rtx)))
+	 return true;
+       if (GET_CODE (parm_rtx) == PARALLEL)
+	 {
+	   int n = XVECLEN (parm_rtx, 0);
+	   while (n-- > 0)
+	     {
+	       rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
+	       if (REG_P (x)
+		   && overlaps_hard_reg_set_p (call_saved_regset,
+					       GET_MODE (x), REGNO (x)))
+		 return true;
+	     }
+	 }
+    }
+  return false;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+
+static bool
+c6x_function_ok_for_sibcall (tree decl, tree exp)
+{
+  /* Registers A10, A12, B10 and B12 are available as arguments
+     register but unfortunately caller saved. This makes functions
+     needing these registers for arguments not suitable for
+     sibcalls.  */
+  if (c6x_call_saved_register_used (exp))
+    return false;
+
+  if (!flag_pic)
+    return true;
+
+  if (TARGET_DSBT)
+    {
+      /* When compiling for DSBT, the calling function must be local,
+	 so that when we reload B14 in the sibcall epilogue, it will
+	 not change its value.  */
+      struct cgraph_local_info *this_func;
+
+      if (!decl)
+	/* Not enough information.  */
+	return false;
+
+      this_func = cgraph_local_info (current_function_decl);
+      return this_func->local;
+    }
+
+  return true;
+}
+
+/* Return true if DECL is known to be linked into section SECTION.  */
+
+static bool
+c6x_function_in_section_p (tree decl, section *section)
+{
+  /* We can only be certain about functions defined in the same
+     compilation unit.  */
+  if (!TREE_STATIC (decl))
+    return false;
+
+  /* Make sure that SYMBOL always binds to the definition in this
+     compilation unit.  */
+  if (!targetm.binds_local_p (decl))
+    return false;
+
+  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
+  if (!DECL_SECTION_NAME (decl))
+    {
+      /* Make sure that we will not create a unique section for DECL.  */
+      if (flag_function_sections || DECL_ONE_ONLY (decl))
+	return false;
+    }
+
+  return function_section (decl) == section;
+}
+
+/* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
+   as a long call.  */
+bool
+c6x_long_call_p (rtx op)
+{
+  tree decl;
+
+  if (!TARGET_LONG_CALLS)
+    return false;
+
+  decl = SYMBOL_REF_DECL (op);
+
+  /* Try to determine whether the symbol is in the same section as the current
+     function.  Be conservative, and only cater for cases in which the
+     whole of the current function is placed in the same section.  */
+  if (decl != NULL_TREE
+      && !flag_reorder_blocks_and_partition
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && c6x_function_in_section_p (decl, current_function_section ()))
+    return false;
+
+  return true;
+}
+
+/* Emit the sequence for a call.  */
+void
+c6x_expand_call (rtx retval, rtx address, bool sibcall)
+{
+  rtx callee = XEXP (address, 0);
+  rtx call_insn;
+
+  if (!c6x_call_operand (callee, Pmode))
+    {
+      callee = force_reg (Pmode, callee);
+      address = change_address (address, Pmode, callee);
+    }
+  call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
+  if (sibcall)
+    {
+      call_insn = emit_call_insn (call_insn);
+      use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+	       gen_rtx_REG (Pmode, REG_B3));
+    }
+  else
+    {
+      if (retval == NULL_RTX)
+	call_insn = emit_call_insn (call_insn);
+      else
+	call_insn = emit_call_insn (gen_rtx_SET (GET_MODE (retval), retval,
+						 call_insn));
+    }
+  if (flag_pic)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+}
+
+/* Legitimize PIC addresses.  If the address is already position-independent,
+   we return ORIG.  Newly generated position-independent addresses go into a
+   reg.  This is REG if nonzero, otherwise we allocate register(s) as
+   necessary.  PICREG is the register holding the pointer to the PIC offset
+   table.  */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
+{
+  rtx addr = orig;
+  rtx new_rtx = orig;
+
+  if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
+    {
+      int unspec = UNSPEC_LOAD_GOT;
+      rtx tmp;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+      if (flag_pic == 2)
+	{
+	  if (can_create_pseudo_p ())
+	    tmp = gen_reg_rtx (Pmode);
+	  else
+	    tmp = reg;
+	  emit_insn (gen_movsi_gotoff_high (tmp, addr));
+	  emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
+	  emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
+	}
+      else
+	{
+	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
+	  new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
+
+	  emit_move_insn (reg, new_rtx);
+	}
+      if (picreg == pic_offset_table_rtx)
+	crtl->uses_pic_offset_table = 1;
+      return reg;
+    }
+
+  else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
+    {
+      rtx base;
+
+      if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	}
+
+      if (XEXP (addr, 0) == picreg)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
+      addr = legitimize_pic_address (XEXP (addr, 1),
+				     base == reg ? NULL_RTX : reg,
+				     picreg);
+
+      if (GET_CODE (addr) == CONST_INT)
+	{
+	  gcc_assert (! reload_in_progress && ! reload_completed);
+	  addr = force_reg (Pmode, addr);
+	}
+
+      if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
+	{
+	  base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
+	  addr = XEXP (addr, 1);
+	}
+
+      return gen_rtx_PLUS (Pmode, base, addr);
+    }
+
+  return new_rtx;
+}
+
+/* Expand a move operation in mode MODE.  The operands are in OPERANDS.
+   Returns true if no further code must be generated, false if the caller
+   should generate an insn to move OPERANDS[1] to OPERANDS[0].  */
+
+bool
+expand_move (rtx *operands, enum machine_mode mode)
+{
+  rtx dest = operands[0];
+  rtx op = operands[1];
+
+  if ((reload_in_progress | reload_completed) == 0
+      && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
+    operands[1] = force_reg (mode, op);
+  else if (mode == SImode && symbolic_operand (op, SImode))
+    {
+      if (flag_pic)
+	{
+	  if (sdata_symbolic_operand (op, SImode))
+	    {
+	      emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
+	      crtl->uses_pic_offset_table = 1;
+	      return true;
+	    }
+	  else
+	    {
+	      rtx temp = (reload_completed || reload_in_progress
+			  ? dest : gen_reg_rtx (Pmode));
+
+	      operands[1] = legitimize_pic_address (op, temp,
+						    pic_offset_table_rtx);
+	    }
+	}
+      else if (reload_completed
+	       && !sdata_symbolic_operand (op, SImode))
+	{
+	  emit_insn (gen_movsi_high (dest, op));
+	  emit_insn (gen_movsi_lo_sum (dest, dest, op));
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* This function is called when we're about to expand an integer compare
+   operation which performs COMPARISON.  It examines the second operand,
+   and if it is an integer constant that cannot be used directly on the
+   current machine in a comparison insn, it returns true.  */
+bool
+c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
+{
+  if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
+    return false;
+
+  if ((code == EQ || code == LT || code == GT)
+       && !satisfies_constraint_Is5 (op))
+    return true;
+  if ((code == GTU || code == LTU)
+      && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
+    return true;
+
+  return false;
+}
+
+/* Emit comparison instruction if necessary, returning the expression
+   that holds the compare result in the proper mode.  Return the comparison
+   that should be used in the jump insn.  */
+
+rtx
+c6x_expand_compare (rtx comparison, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (comparison);
+  rtx op0 = XEXP (comparison, 0);
+  rtx op1 = XEXP (comparison, 1);
+  rtx cmp;
+  enum rtx_code jump_code = code;
+  enum machine_mode op_mode = GET_MODE (op0);
+
+  if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
+    {
+      rtx t = gen_reg_rtx (SImode);
+      emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
+			     gen_highpart (SImode, op0)));
+      op_mode = SImode;
+      cmp = t;
+    }
+  else if (op_mode == DImode)
+    {
+      rtx lo[2], high[2];
+      rtx cmp1, cmp2;
+
+      if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
+	{
+	  code = reverse_condition (code);
+	  jump_code = EQ;
+	}
+      else
+	jump_code = NE;
+
+      split_di (&op0, 1, lo, high);
+      split_di (&op1, 1, lo + 1, high + 1);
+
+      if (c6x_force_op_for_comparison_p (code, high[1])
+	  || c6x_force_op_for_comparison_p (EQ, high[1]))
+	high[1] = force_reg (SImode, high[1]);
+
+      cmp1 = gen_reg_rtx (SImode);
+      cmp2 = gen_reg_rtx (SImode);
+      emit_insn (gen_rtx_SET (VOIDmode, cmp1,
+			      gen_rtx_fmt_ee (code, SImode, high[0], high[1])));
+      if (code == EQ)
+	{
+	  if (c6x_force_op_for_comparison_p (code, lo[1]))
+	    lo[1] = force_reg (SImode, lo[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode, cmp2,
+				  gen_rtx_fmt_ee (code, SImode, lo[0], lo[1])));
+	  emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, cmp2,
+				  gen_rtx_EQ (SImode, high[0], high[1])));
+	  if (code == GT)
+	    code = GTU;
+	  else if (code == LT)
+	    code = LTU;
+	  if (c6x_force_op_for_comparison_p (code, lo[1]))
+	    lo[1] = force_reg (SImode, lo[1]);
+	  emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
+							  lo[0], lo[1]),
+				    lo[0], lo[1], cmp2));
+	  emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
+	}
+      cmp = cmp1;
+    }
+  else if (TARGET_FP && !flag_finite_math_only
+	   && (op_mode == DFmode || op_mode == SFmode)
+	   && code != EQ && code != NE && code != LT && code != GT
+	   && code != UNLE && code != UNGE)
+    {
+      enum rtx_code code1, code2, code3;
+      rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
+
+      jump_code = NE;
+      code3 = UNKNOWN;
+      switch (code)
+	{
+	case UNLT:
+	case UNGT:
+	  jump_code = EQ;
+	  /* fall through */
+	case LE:
+	case GE:
+	  code1 = code == LE || code == UNGT ? LT : GT;
+	  code2 = EQ;
+	  break;
+
+	case UNORDERED:
+	  jump_code = EQ;
+	  /* fall through */
+	case ORDERED:
+	  code3 = EQ;
+	  /* fall through */
+	case LTGT:
+	  code1 = LT;
+	  code2 = GT;
+	  break;
+
+	case UNEQ:
+	  code1 = LT;
+	  code2 = GT;
+	  jump_code = EQ;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      cmp = gen_reg_rtx (SImode);
+      emit_insn (gen_rtx_SET (VOIDmode, cmp,
+			      gen_rtx_fmt_ee (code1, SImode, op0, op1)));
+      fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
+      emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
+		     op0, op1, cmp));
+      if (code3 != UNKNOWN)
+	emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
+		       op0, op1, cmp));
+    }
+  else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
+    cmp = op0;
+  else
+    {
+      bool is_fp_libfunc;
+      is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
+
+      if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
+	  && !is_fp_libfunc)
+	{
+	  code = reverse_condition (code);
+	  jump_code = EQ;
+	}
+      else if (code == UNGE)
+	{
+	  code = LT;
+	  jump_code = EQ;
+	}
+      else if (code == UNLE)
+	{
+	  code = GT;
+	  jump_code = EQ;
+	}
+      else
+	jump_code = NE;
+
+      if (is_fp_libfunc)
+	{
+	  rtx insns;
+	  rtx libfunc;
+	  switch (code)
+	    {
+	    case EQ:
+	      libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
+	      break;
+	    case NE:
+	      libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
+	      break;
+	    case GT:
+	      libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
+	      break;
+	    case GE:
+	      libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
+	      break;
+	    case LT:
+	      libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
+	      break;
+	    case LE:
+	      libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  start_sequence ();
+
+	  cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2,
+					 op0, op_mode, op1, op_mode);
+	  insns = get_insns ();
+	  end_sequence ();
+
+	  emit_libcall_block (insns, cmp, cmp,
+			      gen_rtx_fmt_ee (code, SImode, op0, op1));
+	}
+      else
+	{
+	  cmp = gen_reg_rtx (SImode);
+	  if (c6x_force_op_for_comparison_p (code, op1))
+	    op1 = force_reg (SImode, op1);
+	  emit_insn (gen_rtx_SET (VOIDmode, cmp,
+				  gen_rtx_fmt_ee (code, SImode, op0, op1)));
+	}
+    }
+
+  return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
+}
+
+/* Return one word of double-word value OP.  HIGH_P is true to select the
+   high part, false to select the low part.  When encountering auto-increment
+   addressing, we make the assumption that the low part is going to be accessed
+   first.  */
+
+rtx
+c6x_subword (rtx op, bool high_p)
+{
+  unsigned int byte;
+  enum machine_mode mode;
+
+  mode = GET_MODE (op);
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  if (TARGET_BIG_ENDIAN ? !high_p : high_p)
+    byte = UNITS_PER_WORD;
+  else
+    byte = 0;
+
+  if (MEM_P (op))
+    {
+      rtx addr = XEXP (op, 0);
+      if (GET_CODE (addr) == PLUS || REG_P (addr))
+	return adjust_address (op, word_mode, byte);
+      /* FIXME: should really support autoincrement addressing for
+	 multi-word modes.  */
+      gcc_unreachable ();
+    }
+
+  return simplify_gen_subreg (word_mode, op, mode, byte);
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      lo_half[num] = c6x_subword (op, false);
+      hi_half[num] = c6x_subword (op, true);
+    }
+}
+
+/* Return true if VAL is a mask valid for a clr instruction.  */
+bool
+c6x_valid_mask_p (HOST_WIDE_INT val)
+{
+  int i;
+  for (i = 0; i < 32; i++)
+    if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
+      break;
+  for (; i < 32; i++)
+    if (val & ((unsigned HOST_WIDE_INT)1 << i))
+      break;
+  for (; i < 32; i++)
+    if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
+      return false;
+  return true;
+}
+
+/* Expand a block move for a movmemM pattern.  */
+
+bool
+c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
+		   rtx expected_align_exp ATTRIBUTE_UNUSED,
+		   rtx expected_size_exp ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT align = 1;
+  unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
+  unsigned HOST_WIDE_INT count = 0, offset = 0;
+  unsigned int biggest_move = TARGET_STDW ? 8 : 4;
+
+  if (CONST_INT_P (align_exp))
+    align = INTVAL (align_exp);
+
+  src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
+  dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
+  min_mem_align = MIN (src_mem_align, dst_mem_align);
+
+  if (min_mem_align > align)
+    align = min_mem_align / BITS_PER_UNIT;
+  if (src_mem_align < align)
+    src_mem_align = align;
+  if (dst_mem_align < align)
+    dst_mem_align = align;
+
+  if (CONST_INT_P (count_exp))
+    count = INTVAL (count_exp);
+  else
+    return false;
+
+  /* Make sure we don't need to care about overflow later on.  */
+  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+    return false;
+
+  if (count >= 28 && (count & 3) == 0 && align >= 4)
+    {
+      tree dst_expr = MEM_EXPR (dst);
+      tree src_expr = MEM_EXPR (src);
+      rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
+      rtx srcreg = force_reg (Pmode, XEXP (src, 0));
+      rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
+
+      if (src_expr)
+	mark_addressable (src_expr);
+      if (dst_expr)
+	mark_addressable (dst_expr);
+      emit_library_call (fn, LCT_NORMAL, VOIDmode, 3,
+			 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
+      return true;
+    }
+
+  if (biggest_move > align && !TARGET_INSNS_64)
+    biggest_move = align;
+
+  if (count / biggest_move > 7)
+    return false;
+
+  while (count > 0)
+    {
+      rtx reg, reg_lowpart;
+      enum machine_mode srcmode, dstmode;
+      unsigned HOST_WIDE_INT src_size, dst_size, src_left;
+      int shift;
+      rtx srcmem, dstmem;
+
+      while (biggest_move > count)
+	biggest_move /= 2;
+
+      src_size = dst_size = biggest_move;
+      if (src_size > src_mem_align && src_size == 2)
+	src_size = 1;
+      if (dst_size > dst_mem_align && dst_size == 2)
+	dst_size = 1;
+
+      if (dst_size > src_size)
+	dst_size = src_size;
+
+      srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0);
+      dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0);
+      if (src_size >= 4)
+	reg_lowpart = reg = gen_reg_rtx (srcmode);
+      else
+	{
+	  reg = gen_reg_rtx (SImode);
+	  reg_lowpart = gen_lowpart (srcmode, reg);
+	}
+
+      srcmem = adjust_address (copy_rtx (src), srcmode, offset);
+
+      if (src_size > src_mem_align)
+	{
+	  enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
+				  : CODE_FOR_movmisaligndi);
+	  emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
+	}
+      else
+	emit_move_insn (reg_lowpart, srcmem);
+
+      src_left = src_size;
+      shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT  : 0;
+      while (src_left > 0)
+	{
+	  rtx dstreg = reg_lowpart;
+
+	  if (src_size > dst_size)
+	    {
+	      rtx srcword = reg;
+	      int shift_amount = shift & (BITS_PER_WORD - 1);
+	      if (src_size > 4)
+		srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
+						 SImode);
+	      if (shift_amount > 0)
+		{
+		  dstreg = gen_reg_rtx (SImode);
+		  emit_insn (gen_lshrsi3 (dstreg, srcword,
+					  GEN_INT (shift_amount)));
+		}
+	      else
+		dstreg = srcword;
+	      dstreg = gen_lowpart (dstmode, dstreg);
+	    }
+
+	  dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
+	  if (dst_size > dst_mem_align)
+	    {
+	      enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
+				      : CODE_FOR_movmisaligndi);
+	      emit_insn (GEN_FCN (icode) (dstmem, dstreg));
+	    }
+	  else
+	    emit_move_insn (dstmem, dstreg);
+
+	  if (TARGET_BIG_ENDIAN)
+	    shift -= dst_size * BITS_PER_UNIT;
+	  else
+	    shift += dst_size * BITS_PER_UNIT;
+	  offset += dst_size;
+	  src_left -= dst_size;
+	}
+      count -= src_size;
+    }
+  return true;
+}
+
+/* Subroutine of print_address_operand, print a single address offset OFF for
+   a memory access of mode MEM_MODE, choosing between normal form and scaled
+   form depending on the type of the insn.  Misaligned memory references must
+   use the scaled form.  */
+
+static void
+print_address_offset (FILE *file, rtx off, enum machine_mode mem_mode)
+{
+  rtx pat;
+
+  if (c6x_current_insn != NULL_RTX)
+    {
+      pat = PATTERN (c6x_current_insn);
+      if (GET_CODE (pat) == COND_EXEC)
+	pat = COND_EXEC_CODE (pat);
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+
+      if (GET_CODE (pat) == SET
+	  && GET_CODE (SET_SRC (pat)) == UNSPEC
+	  && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
+	{
+	  gcc_assert (CONST_INT_P (off)
+		      && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
+	  fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
+		   INTVAL (off) / GET_MODE_SIZE (mem_mode));
+	  return;
+	}
+    }
+  fputs ("(", file);
+  output_address (off);
+  fputs (")", file);
+}
+
+static bool
+c6x_print_operand_punct_valid_p (unsigned char c)
+{
+  return c == '$' || c == '.' || c == '|';
+}
+
+static void c6x_print_operand (FILE *, rtx, int);
+
+/* Subroutine of c6x_print_operand; used to print a memory reference X to FILE.  */
+
+static void
+c6x_print_address_operand (FILE *file, rtx x, enum machine_mode mem_mode)
+{
+  rtx off;
+  switch (GET_CODE (x))
+    {
+    case PRE_MODIFY:
+    case POST_MODIFY:
+      if (GET_CODE (x) == POST_MODIFY)
+	output_address (XEXP (x, 0));
+      off = XEXP (XEXP (x, 1), 1);
+      if (XEXP (x, 0) == stack_pointer_rtx)
+	{
+	  if (GET_CODE (x) == PRE_MODIFY)
+	    gcc_assert (INTVAL (off) > 0);
+	  else
+	    gcc_assert (INTVAL (off) < 0);
+	}
+      if (CONST_INT_P (off) && INTVAL (off) < 0)
+	{
+	  fprintf (file, "--");
+	  off = GEN_INT (-INTVAL (off));
+	}
+      else
+	fprintf (file, "++");
+      if (GET_CODE (x) == PRE_MODIFY)
+	output_address (XEXP (x, 0));
+      print_address_offset (file, off, mem_mode);
+      break;
+
+    case PLUS:
+      off = XEXP (x, 1);
+      if (CONST_INT_P (off) && INTVAL (off) < 0)
+	{
+	  fprintf (file, "-");
+	  off = GEN_INT (-INTVAL (off));
+	}
+      else
+	fprintf (file, "+");
+      output_address (XEXP (x, 0));
+      print_address_offset (file, off, mem_mode);
+      break;
+
+    case PRE_DEC:
+      gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
+      fprintf (file, "--");
+      output_address (XEXP (x, 0));
+      fprintf (file, "[1]");
+      break;
+    case PRE_INC:
+      fprintf (file, "++");
+      output_address (XEXP (x, 0));
+      fprintf (file, "[1]");
+      break;
+    case POST_INC:
+      gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
+      output_address (XEXP (x, 0));
+      fprintf (file, "++[1]");
+      break;
+    case POST_DEC:
+      output_address (XEXP (x, 0));
+      fprintf (file, "--[1]");
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+    case LABEL_REF:
+      gcc_assert (sdata_symbolic_operand (x, Pmode));
+      fprintf (file, "+B14(");
+      output_addr_const (file, x);
+      fprintf (file, ")");
+      break;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_LOAD_GOT:
+	  fputs ("$GOT(", file);
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs (")", file);
+	  break;
+	case UNSPEC_LOAD_SDATA:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_assert (GET_CODE (x) != MEM);
+      c6x_print_operand (file, x, 0);
+      break;
+    }
+}
+
+/* Return a single character, which is either 'l', 's', 'd' or 'm', which
+   specifies the functional unit used by INSN.  */
+
+char
+c6x_get_unit_specifier (rtx insn)
+{
+  enum attr_units units;
+
+  if (insn_info.exists ())
+    {
+      int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
+      return c6x_unit_names[unit][0];
+    }
+
+  units = get_attr_units (insn);
+  switch (units)
+    {
+    case UNITS_D:
+    case UNITS_DL:
+    case UNITS_DS:
+    case UNITS_DLS:
+    case UNITS_D_ADDR:
+      return 'd';
+      break;
+    case UNITS_L:
+    case UNITS_LS:
+      return 'l';
+      break;
+    case UNITS_S:
+      return 's';
+      break;
+    case UNITS_M:
+      return 'm';
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Prints the unit specifier field.  */
+static void
+c6x_print_unit_specifier_field (FILE *file, rtx insn)
+{
+  enum attr_units units = get_attr_units (insn);
+  enum attr_cross cross = get_attr_cross (insn);
+  enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
+  int half;
+  char unitspec;
+
+  if (units == UNITS_D_ADDR)
+    {
+      enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
+      int t_half;
+      gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
+      half = arf == ADDR_REGFILE_A ? 1 : 2;
+      t_half = rf == DEST_REGFILE_A ? 1 : 2;
+      fprintf (file, ".d%dt%d", half, t_half);
+      return;
+    }
+
+  if (insn_info.exists ())
+    {
+      int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
+      fputs (".", file);
+      fputs (c6x_unit_names[unit], file);
+      if (cross == CROSS_Y)
+	fputs ("x", file);
+      return;
+    }
+
+  gcc_assert (rf != DEST_REGFILE_UNKNOWN);
+  unitspec = c6x_get_unit_specifier (insn);
+  half = rf == DEST_REGFILE_A ? 1 : 2;
+  fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
+}
+
+/* Output assembly language output for the address ADDR to FILE.  */
+static void
+c6x_print_operand_address (FILE *file, rtx addr)
+{
+  c6x_print_address_operand (file, addr, VOIDmode);
+}
+
+/* Print an operand, X, to FILE, with an optional modifier in CODE.
+
+   Meaning of CODE:
+   $ -- print the unit specifier field for the instruction.
+   . -- print the predicate for the instruction or an emptry string for an
+        unconditional one.
+   | -- print "||" if the insn should be issued in parallel with the previous
+        one.
+
+   C -- print an opcode suffix for a reversed condition
+   d -- H, W or D as a suffix for ADDA, based on the factor given by the
+        operand
+   D -- print either B, H, W or D as a suffix for ADDA, based on the size of
+        the operand
+   J -- print a predicate
+   j -- like J, but use reverse predicate
+   k -- treat a CONST_INT as a register number and print it as a register
+   k -- like k, but print out a doubleword register
+   n -- print an integer operand, negated
+   p -- print the low part of a DImode register
+   P -- print the high part of a DImode register
+   r -- print the absolute value of an integer operand, shifted right by 1
+   R -- print the absolute value of an integer operand, shifted right by 2
+   f -- the first clear bit in an integer operand assumed to be a mask for
+        a clr instruction
+   F -- the last clear bit in such a mask
+   s -- the first set bit in an integer operand assumed to be a mask for
+        a set instruction
+   S -- the last set bit in such a mask
+   U -- print either 1 or 2, depending on the side of the machine used by
+        the operand  */
+
+static void
+c6x_print_operand (FILE *file, rtx x, int code)
+{
+  int i;
+  HOST_WIDE_INT v;
+  tree t;
+  enum machine_mode mode;
+
+  if (code == '|')
+    {
+      if (GET_MODE (c6x_current_insn) != TImode)
+	fputs ("||", file);
+      return;
+    }
+  if (code == '$')
+    {
+      c6x_print_unit_specifier_field (file, c6x_current_insn);
+      return;
+    }
+
+  if (code == '.')
+    {
+      x = current_insn_predicate;
+      if (x)
+	{
+	  unsigned int regno = REGNO (XEXP (x, 0));
+	  fputs ("[", file);
+ 	  if (GET_CODE (x) == EQ)
+	    fputs ("!", file);
+	  fputs (reg_names [regno], file);
+	  fputs ("]", file);
+	}
+      return;
+    }
+
+  mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case 'C':
+    case 'c':
+      {
+	enum rtx_code c = GET_CODE (x);
+	if (code == 'C')
+	  c = swap_condition (c);
+	fputs (GET_RTX_NAME (c), file);
+      }
+      return;
+
+    case 'J':
+    case 'j':
+      {
+	unsigned int regno = REGNO (XEXP (x, 0));
+	if ((GET_CODE (x) == EQ) == (code == 'J'))
+	  fputs ("!", file);
+        fputs (reg_names [regno], file);
+      }
+      return;
+
+    case 'k':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      v = INTVAL (x);
+      fprintf (file, "%s", reg_names[v]);
+      return;
+    case 'K':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      v = INTVAL (x);
+      gcc_assert ((v & 1) == 0);
+      fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
+      return;
+
+    case 's':
+    case 'S':
+    case 'f':
+    case 'F':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      v = INTVAL (x);
+      for (i = 0; i < 32; i++)
+	{
+	  HOST_WIDE_INT tst = v & 1;
+	  if (((code == 'f' || code == 'F') && !tst)
+	      || ((code == 's' || code == 'S') && tst))
+	    break;
+	  v >>= 1;
+	}
+      if (code == 'f' || code == 's')
+	{
+	  fprintf (file, "%d", i);
+	  return;
+	}
+      for (;i < 32; i++)
+	{
+	  HOST_WIDE_INT tst = v & 1;
+	  if ((code == 'F' && tst) || (code == 'S' && !tst))
+	    break;
+	  v >>= 1;
+	}
+      fprintf (file, "%d", i - 1);
+      return;
+
+    case 'n':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      output_addr_const (file, GEN_INT (-INTVAL (x)));
+      return;
+
+    case 'r':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      v = INTVAL (x);
+      if (v < 0)
+	v = -v;
+      output_addr_const (file, GEN_INT (v >> 1));
+      return;
+
+    case 'R':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      v = INTVAL (x);
+      if (v < 0)
+	v = -v;
+      output_addr_const (file, GEN_INT (v >> 2));
+      return;
+
+    case 'd':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      v = INTVAL (x);
+      fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
+      return;
+
+    case 'p':
+    case 'P':
+      gcc_assert (GET_CODE (x) == REG);
+      v = REGNO (x);
+      if (code == 'P')
+	v++;
+      fputs (reg_names[v], file);
+      return;
+
+    case 'D':
+      v = 0;
+      if (GET_CODE (x) == CONST)
+	{
+	  x = XEXP (x, 0);
+	  gcc_assert (GET_CODE (x) == PLUS);
+	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
+	  v = INTVAL (XEXP (x, 1));
+	  x = XEXP (x, 0);
+
+	}
+      gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+      t = SYMBOL_REF_DECL (x);
+      if (DECL_P (t))
+	v |= DECL_ALIGN_UNIT (t);
+      else
+	v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
+      if (v & 1)
+	fputs ("b", file);
+      else if (v & 2)
+	fputs ("h", file);
+      else
+	fputs ("w", file);
+      return;
+
+    case 'U':
+      if (MEM_P (x))
+	{
+	  x = XEXP (x, 0);
+	  if (GET_CODE (x) == PLUS
+	      || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
+	    x = XEXP (x, 0);
+	  if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
+	    {
+	      gcc_assert (sdata_symbolic_operand (x, Pmode));
+	      fputs ("2", file);
+	      return;
+	    }
+	}
+      gcc_assert (REG_P (x));
+      if (A_REGNO_P (REGNO (x)))
+	fputs ("1", file);
+      if (B_REGNO_P (REGNO (x)))
+	fputs ("2", file);
+      return;
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (GET_MODE_SIZE (mode) == 8)
+	    fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
+		     reg_names[REGNO (x)]);
+	  else
+	    fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case MEM:
+	  fputc ('*', file);
+	  gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
+	  c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
+	  break;
+
+	case SYMBOL_REF:
+	  fputc ('(', file);
+	  output_addr_const (file, x);
+	  fputc (')', file);
+	  break;
+
+	case CONST_INT:
+	  output_addr_const (file, x);
+	  break;
+
+	case CONST_DOUBLE:
+	  output_operand_lossage ("invalid const_double operand");
+	  break;
+
+	default:
+	  output_addr_const (file, x);
+	}
+    }
+}
+
+/* Return TRUE if OP is a valid memory address with a base register of
+   class C.  If SMALL_OFFSET is true, we disallow memory references which would
+   require a long offset with B14/B15.  */
+
+bool
+c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
+{
+  enum machine_mode mode = GET_MODE (op);
+  rtx base = XEXP (op, 0);
+  switch (GET_CODE (base))
+    {
+    case REG:
+      break;
+    case PLUS:
+      if (small_offset
+	  && (XEXP (base, 0) == stack_pointer_rtx
+	      || XEXP (base, 0) == pic_offset_table_rtx))
+	{
+	  if (!c6x_legitimate_address_p_1 (mode, base, true, true))
+	    return false;
+	}
+
+      /* fall through */
+    case PRE_INC:
+    case PRE_DEC:
+    case PRE_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+    case POST_MODIFY:
+      base = XEXP (base, 0);
+      break;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      gcc_assert (sdata_symbolic_operand (base, Pmode));
+      return !small_offset && c == B_REGS;
+
+    default:
+      return false;
+    }
+  return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
+}
+
+/* Returns true if X is a valid address for use in a memory reference
+   of mode MODE.  If STRICT is true, we do not allow pseudo registers
+   in the address.  NO_LARGE_OFFSET is true if we are examining an
+   address for use in a load or store misaligned instruction, or
+   recursively examining an operand inside a PRE/POST_MODIFY.  */
+
+bool
+c6x_legitimate_address_p_1 (enum machine_mode mode, rtx x, bool strict,
+			    bool no_large_offset)
+{
+  int size, size1;
+  HOST_WIDE_INT off;
+  enum rtx_code code = GET_CODE (x);
+
+  switch (code)
+    {
+    case PRE_MODIFY:
+    case POST_MODIFY:
+      /* We can't split these into word-sized pieces yet.  */
+      if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return false;
+      if (GET_CODE (XEXP (x, 1)) != PLUS)
+	return false;
+      if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
+	return false;
+      if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
+	return false;
+
+      /* fall through */
+    case PRE_INC:
+    case PRE_DEC:
+    case POST_INC:
+    case POST_DEC:
+      /* We can't split these into word-sized pieces yet.  */
+      if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return false;
+      x = XEXP (x, 0);
+      if (!REG_P (x))
+	return false;
+
+      /* fall through */
+    case REG:
+      if (strict)
+	return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
+      else
+	return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
+
+    case PLUS:
+      if (!REG_P (XEXP (x, 0))
+	  || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
+	return false;
+      /* We cannot ensure currently that both registers end up in the
+	 same register file.  */
+      if (REG_P (XEXP (x, 1)))
+	return false;
+
+      if (mode == BLKmode)
+	size = 4;
+      else if (mode == VOIDmode)
+	/* ??? This can happen during ivopts.  */
+	size = 1;
+      else
+	size = GET_MODE_SIZE (mode);
+
+      if (flag_pic
+	  && GET_CODE (XEXP (x, 1)) == UNSPEC
+	  && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
+	  && XEXP (x, 0) == pic_offset_table_rtx
+	  && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
+	return !no_large_offset && size <= 4;
+      if (flag_pic == 1
+	  && mode == Pmode
+	  && GET_CODE (XEXP (x, 1)) == UNSPEC
+	  && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
+	  && XEXP (x, 0) == pic_offset_table_rtx
+	  && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
+	      || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
+	return !no_large_offset;
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	return false;
+
+      off = INTVAL (XEXP (x, 1));
+
+      /* If the machine does not have doubleword load/stores, we'll use
+	 word size accesses.  */
+      size1 = size;
+      if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
+	size = UNITS_PER_WORD;
+
+      if (((HOST_WIDE_INT)size1 - 1) & off)
+	return false;
+      off /= size;
+      if (off > -32 && off < (size1 == size ? 32 : 28))
+	return true;
+      if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
+	  || size1 > UNITS_PER_WORD)
+	return false;
+      return off >= 0 && off < 32768;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return (!no_large_offset
+	      /* With -fpic, we must wrap it in an unspec to show the B14
+		 dependency.  */
+	      && !flag_pic
+	      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	      && sdata_symbolic_operand (x, Pmode));
+
+    default:
+      return false;
+    }
+}
+
+static bool
+c6x_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  return c6x_legitimate_address_p_1 (mode, x, strict, false);
+}
+
+static bool
+c6x_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Implements TARGET_PREFERRED_RENAME_CLASS.  */
+static reg_class_t
+c6x_preferred_rename_class (reg_class_t cl)
+{
+  if (cl == A_REGS)
+    return NONPREDICATE_A_REGS;
+  if (cl == B_REGS)
+    return NONPREDICATE_B_REGS;
+  if (cl == ALL_REGS || cl == GENERAL_REGS)
+    return NONPREDICATE_REGS;
+  return NO_REGS;
+}
+
+/* Implements FINAL_PRESCAN_INSN.  */
+void
+c6x_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  c6x_current_insn = insn;
+}
+
+/* A structure to describe the stack layout of a function.  The layout is
+   as follows:
+
+   [saved frame pointer (or possibly padding0)]
+   --> incoming stack pointer, new hard frame pointer
+   [saved call-used regs]
+   [optional padding1]
+   --> soft frame pointer
+   [frame]
+   [outgoing arguments]
+   [optional padding2]
+
+  The structure members are laid out in this order.  */
+
+struct c6x_frame
+{
+  int padding0;
+  /* Number of registers to save.  */
+  int nregs;
+  int padding1;
+  HOST_WIDE_INT frame;
+  int outgoing_arguments_size;
+  int padding2;
+
+  HOST_WIDE_INT to_allocate;
+  /* The offsets relative to the incoming stack pointer (which
+     becomes HARD_FRAME_POINTER).  */
+  HOST_WIDE_INT frame_pointer_offset;
+  HOST_WIDE_INT b3_offset;
+
+  /* True if we should call push_rts/pop_rts to save and restore
+     registers.  */
+  bool push_rts;
+};
+
+/* Return true if we need to save and modify the PIC register in the
+   prologue.  */
+
+static bool
+must_reload_pic_reg_p (void)
+{
+  struct cgraph_local_info *i = NULL;
+
+  if (!TARGET_DSBT)
+    return false;
+
+  i = cgraph_local_info (current_function_decl);
+
+  if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local)
+    return true;
+  return false;
+}
+
+/* Return 1 if we need to save REGNO.  */
+static int
+c6x_save_reg (unsigned int regno)
+{
+  return ((df_regs_ever_live_p (regno)
+	   && !call_used_regs[regno]
+	   && !fixed_regs[regno])
+	  || (regno == RETURN_ADDR_REGNO
+	      && (df_regs_ever_live_p (regno)
+		  || !crtl->is_leaf))
+	  || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
+}
+
+/* Examine the number of regs NREGS we've determined we must save.
+   Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
+   prologue and epilogue.  */
+
+static bool
+use_push_rts_p (int nregs)
+{
+  if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
+      && !cfun->machine->contains_sibcall
+      && !cfun->returns_struct
+      && !TARGET_LONG_CALLS
+      && nregs >= 6 && !frame_pointer_needed)
+    return true;
+  return false;
+}
+
+/* Return number of saved general prupose registers.  */
+
+int
+c6x_nsaved_regs (void)
+{
+  int nregs = 0;
+  int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (c6x_save_reg (regno))
+      nregs++;
+  return nregs;
+}
+
+/* The safe debug order mandated by the ABI.  */
+static unsigned reg_save_order[] =
+{
+  REG_A10, REG_A11, REG_A12, REG_A13,
+  REG_A14, REG_B3,
+  REG_B10, REG_B11, REG_B12, REG_B13,
+  REG_B14, REG_A15
+};
+
+#define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
+
+/* Compute the layout of the stack frame and store it in FRAME.  */
+
+static void
+c6x_compute_frame_layout (struct c6x_frame *frame)
+{
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT offset;
+  int nregs;
+
+  /* We use the four bytes which are technically inside the caller's frame,
+     usually to save the frame pointer.  */
+  offset = -4;
+  frame->padding0 = 0;
+  nregs = c6x_nsaved_regs ();
+  frame->push_rts = false;
+  frame->b3_offset = 0;
+  if (use_push_rts_p (nregs))
+    {
+      frame->push_rts = true;
+      frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
+      nregs = 14;
+    }
+  else if (c6x_save_reg (REG_B3))
+    {
+      int idx;
+      for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
+	{
+	  if (c6x_save_reg (reg_save_order[idx]))
+	    frame->b3_offset -= 4;
+	}
+    }
+  frame->nregs = nregs;
+
+  if (size == 0 && nregs == 0)
+    {
+      frame->padding0 = 4;
+      frame->padding1 = frame->padding2 = 0;
+      frame->frame_pointer_offset = frame->to_allocate = 0;
+      frame->outgoing_arguments_size = 0;
+      return;
+    }
+
+  if (!frame->push_rts)
+    offset += frame->nregs * 4;
+
+  if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
+      && !crtl->is_leaf)
+    /* Don't use the bottom of the caller's frame if we have no
+       allocation of our own and call other functions.  */
+    frame->padding0 = frame->padding1 = 4;
+  else if (offset & 4)
+    frame->padding1 = 4;
+  else
+    frame->padding1 = 0;
+
+  offset += frame->padding0 + frame->padding1;
+  frame->frame_pointer_offset = offset;
+  offset += size;
+
+  frame->outgoing_arguments_size = crtl->outgoing_args_size;
+  offset += frame->outgoing_arguments_size;
+
+  if ((offset & 4) == 0)
+    frame->padding2 = 8;
+  else
+    frame->padding2 = 4;
+  frame->to_allocate = offset + frame->padding2;
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+c6x_initial_elimination_offset (int from, int to)
+{
+  struct c6x_frame frame;
+  c6x_compute_frame_layout (&frame);
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return 0;
+  else if (from == FRAME_POINTER_REGNUM
+	   && to == HARD_FRAME_POINTER_REGNUM)
+    return -frame.frame_pointer_offset;
+  else
+    {
+      gcc_assert (to == STACK_POINTER_REGNUM);
+
+      if (from == ARG_POINTER_REGNUM)
+	return frame.to_allocate + (frame.push_rts ? 56 : 0);
+
+      gcc_assert (from == FRAME_POINTER_REGNUM);
+      return frame.to_allocate - frame.frame_pointer_offset;
+    }
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  Frame pointer elimination is automatically handled.  */
+
+static bool
+c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  if (to == STACK_POINTER_REGNUM)
+    return !frame_pointer_needed;
+  return true;
+}
+
+/* Emit insns to increment the stack pointer by OFFSET.  If
+   FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
+   Does nothing if the offset is zero.  */
+
+static void
+emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
+{
+  rtx to_add = GEN_INT (offset);
+  rtx orig_to_add = to_add;
+  rtx insn;
+
+  if (offset == 0)
+    return;
+
+  if (offset < -32768 || offset > 32767)
+    {
+      rtx reg = gen_rtx_REG (SImode, REG_A0);
+      rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
+
+      insn = emit_insn (gen_movsi_high (reg, low));
+      if (frame_related_p)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
+      if (frame_related_p)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      to_add = reg;
+    }
+  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				to_add));
+  if (frame_related_p)
+    {
+      if (REG_P (to_add))
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						 orig_to_add)));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Prologue and epilogue.  */
+void
+c6x_expand_prologue (void)
+{
+  struct c6x_frame frame;
+  rtx insn, mem;
+  int nsaved = 0;
+  HOST_WIDE_INT initial_offset, off, added_already;
+
+  c6x_compute_frame_layout (&frame);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = frame.to_allocate;
+
+  initial_offset = -frame.to_allocate;
+  if (frame.push_rts)
+    {
+      emit_insn (gen_push_rts ());
+      nsaved = frame.nregs;
+    }
+
+  /* If the offsets would be too large for the memory references we will
+     create to save registers, do the stack allocation in two parts.
+     Ensure by subtracting 8 that we don't store to the word pointed to
+     by the stack pointer.  */
+  if (initial_offset < -32768)
+    initial_offset = -frame.frame_pointer_offset - 8;
+
+  if (frame.to_allocate > 0)
+    gcc_assert (initial_offset != 0);
+
+  off = -initial_offset + 4 - frame.padding0;
+
+  mem = gen_frame_mem (Pmode, stack_pointer_rtx);
+
+  added_already = 0;
+  if (frame_pointer_needed)
+    {
+      rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
+      /* We go through some contortions here to both follow the ABI's
+	 recommendation that FP == incoming SP, and to avoid writing or
+	 reading the word pointed to by the stack pointer.  */
+      rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
+				      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						    GEN_INT (-8)));
+      insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      nsaved++;
+      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (8)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      off -= 4;
+      added_already = -8;
+    }
+
+  emit_add_sp_const (initial_offset - added_already, true);
+
+  if (nsaved < frame.nregs)
+    {
+      unsigned i;
+
+      for (i = 0; i < N_SAVE_ORDER; i++)
+	{
+	  int idx = N_SAVE_ORDER - i - 1;
+	  unsigned regno = reg_save_order[idx];
+	  rtx reg;
+	  enum machine_mode save_mode = SImode;
+
+	  if (regno == REG_A15 && frame_pointer_needed)
+	    /* Already saved.  */
+	    continue;
+	  if (!c6x_save_reg (regno))
+	    continue;
+
+	  if (TARGET_STDW && (off & 4) == 0 && off <= 256
+	      && (regno & 1) == 1
+	      && i + 1 < N_SAVE_ORDER
+	      && reg_save_order[idx - 1] == regno - 1
+	      && c6x_save_reg (regno - 1))
+	    {
+	      save_mode = DImode;
+	      regno--;
+	      i++;
+	    }
+	  reg = gen_rtx_REG (save_mode, regno);
+	  off -= GET_MODE_SIZE (save_mode);
+
+	  insn = emit_move_insn (adjust_address (mem, save_mode, off),
+				 reg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  nsaved += HARD_REGNO_NREGS (regno, save_mode);
+	}
+    }
+  gcc_assert (nsaved == frame.nregs);
+  emit_add_sp_const (-frame.to_allocate - initial_offset, true);
+  if (must_reload_pic_reg_p ())
+    {
+      if (dsbt_decl == NULL)
+	{
+	  tree t;
+
+	  t = build_index_type (integer_one_node);
+	  t = build_array_type (integer_type_node, t);
+	  t = build_decl (BUILTINS_LOCATION, VAR_DECL,
+			  get_identifier ("__c6xabi_DSBT_BASE"), t);
+	  DECL_ARTIFICIAL (t) = 1;
+	  DECL_IGNORED_P (t) = 1;
+	  DECL_EXTERNAL (t) = 1;
+	  TREE_STATIC (t) = 1;
+	  TREE_PUBLIC (t) = 1;
+	  TREE_USED (t) = 1;
+
+	  dsbt_decl = t;
+	}
+      emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
+				 XEXP (DECL_RTL (dsbt_decl), 0)));
+    }
+}
+
+void
+c6x_expand_epilogue (bool sibcall)
+{
+  unsigned i;
+  struct c6x_frame frame;
+  rtx mem;
+  HOST_WIDE_INT off;
+  int nsaved = 0;
+
+  c6x_compute_frame_layout (&frame);
+
+  mem = gen_frame_mem (Pmode, stack_pointer_rtx);
+
+  /* Insert a dummy set/use of the stack pointer.  This creates a
+     scheduler barrier between the prologue saves and epilogue restores. */
+  emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
+
+  /* If the offsets would be too large for the memory references we will
+     create to restore registers, do a preliminary stack adjustment here.  */
+  off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
+  if (frame.push_rts)
+    {
+      nsaved = frame.nregs;
+    }
+  else
+    {
+      if (frame.to_allocate > 32768)
+	{
+	  /* Don't add the entire offset so that we leave an unused word
+	     above the stack pointer.  */
+	  emit_add_sp_const ((off - 16) & ~7, false);
+	  off &= 7;
+	  off += 16;
+	}
+      for (i = 0; i < N_SAVE_ORDER; i++)
+	{
+	  unsigned regno = reg_save_order[i];
+	  rtx reg;
+	  enum machine_mode save_mode = SImode;
+
+	  if (!c6x_save_reg (regno))
+	    continue;
+	  if (regno == REG_A15 && frame_pointer_needed)
+	    continue;
+
+	  if (TARGET_STDW && (off & 4) == 0 && off < 256
+	      && (regno & 1) == 0
+	      && i + 1 < N_SAVE_ORDER
+	      && reg_save_order[i + 1] == regno + 1
+	      && c6x_save_reg (regno + 1))
+	    {
+	      save_mode = DImode;
+	      i++;
+	    }
+	  reg = gen_rtx_REG (save_mode, regno);
+
+	  emit_move_insn (reg, adjust_address (mem, save_mode, off));
+
+	  off += GET_MODE_SIZE (save_mode);
+	  nsaved += HARD_REGNO_NREGS (regno, save_mode);
+	}
+    }
+  if (!frame_pointer_needed)
+    emit_add_sp_const (off + frame.padding0 - 4, false);
+  else
+    {
+      rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
+      rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
+				      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						    GEN_INT (8)));
+      emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
+			     GEN_INT (-8)));
+      emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
+      nsaved++;
+    }
+  gcc_assert (nsaved == frame.nregs);
+  if (!sibcall)
+    {
+      if (frame.push_rts)
+	emit_jump_insn (gen_pop_rts ());
+      else
+	emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
+							  RETURN_ADDR_REGNO)));
+    }
+}
+
+/* Return the value of the return address for the frame COUNT steps up
+   from the current frame, after the prologue.
+   We punt for everything but the current frame by returning const0_rtx.  */
+
+rtx
+c6x_return_addr_rtx (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
+}
+
+/* Return true iff TYPE is one of the shadow types.  */
+static bool
+shadow_type_p (enum attr_type type)
+{
+  return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
+	  || type == TYPE_MULT_SHADOW);
+}
+
+/* Return true iff INSN is a shadow pattern.  */
+static bool
+shadow_p (rtx insn)
+{
+  if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
+    return false;
+  return shadow_type_p (get_attr_type (insn));
+}
+
+/* Return true iff INSN is a shadow or blockage pattern.  */
+static bool
+shadow_or_blockage_p (rtx insn)
+{
+  enum attr_type type;
+  if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
+    return false;
+  type = get_attr_type (insn);
+  return shadow_type_p (type) || type == TYPE_BLOCKAGE;
+}
+
+/* Translate UNITS into a bitmask of units we can reserve for this
+   insn.  */
+static int
+get_reservation_flags (enum attr_units units)
+{
+  switch (units)
+    {
+    case UNITS_D:
+    case UNITS_D_ADDR:
+      return RESERVATION_FLAG_D;
+    case UNITS_L:
+      return RESERVATION_FLAG_L;
+    case UNITS_S:
+      return RESERVATION_FLAG_S;
+    case UNITS_M:
+      return RESERVATION_FLAG_M;
+    case UNITS_LS:
+      return RESERVATION_FLAG_LS;
+    case UNITS_DL:
+      return RESERVATION_FLAG_DL;
+    case UNITS_DS:
+      return RESERVATION_FLAG_DS;
+    case UNITS_DLS:
+      return RESERVATION_FLAG_DLS;
+    default:
+      return 0;
+    }
+}
+
+/* Compute the side of the machine used by INSN, which reserves UNITS.
+   This must match the reservations in the scheduling description.  */
+static int
+get_insn_side (rtx insn, enum attr_units units)
+{
+  if (units == UNITS_D_ADDR)
+    return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
+  else
+    {
+      enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
+      if (rf == DEST_REGFILE_ANY)
+	return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
+      else
+	return rf == DEST_REGFILE_A ? 0 : 1;
+    }
+}
+
+/* After scheduling, walk the insns between HEAD and END and assign unit
+   reservations.  */
+static void
+assign_reservations (rtx head, rtx end)
+{
+  rtx insn;
+  for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
+    {
+      unsigned int sched_mask, reserved;
+      rtx within, last;
+      int pass;
+      int rsrv[2];
+      int rsrv_count[2][4];
+      int i;
+
+      if (GET_MODE (insn) != TImode)
+	continue;
+
+      reserved = 0;
+      last = NULL_RTX;
+      /* Find the last insn in the packet.  It has a state recorded for it,
+	 which we can use to determine the units we should be using.  */
+      for (within = insn;
+	   (within != NEXT_INSN (end)
+	    && (within == insn || GET_MODE (within) != TImode));
+	   within = NEXT_INSN (within))
+	{
+	  int icode;
+	  if (!NONDEBUG_INSN_P (within))
+	    continue;
+	  icode = recog_memoized (within);
+	  if (icode < 0)
+	    continue;
+	  if (shadow_p (within))
+	    continue;
+	  if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
+	    reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
+	  last = within;
+	}
+      if (last == NULL_RTX)
+	continue;
+
+      sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
+      sched_mask &= ~reserved;
+
+      memset (rsrv_count, 0, sizeof rsrv_count);
+      rsrv[0] = rsrv[1] = ~0;
+      for (i = 0; i < 8; i++)
+	{
+	  int side = i / 4;
+	  int unit = i & 3;
+	  unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
+	  /* Clear the bits which we expect to reserve in the following loop,
+	     leaving the ones set which aren't present in the scheduler's
+	     state and shouldn't be reserved.  */
+	  if (sched_mask & unit_bit)
+	    rsrv[i / 4] &= ~(1 << unit);
+	}
+
+      /* Walk through the insns that occur in the same cycle.  We use multiple
+	 passes to assign units, assigning for insns with the most specific
+	 requirements first.  */
+      for (pass = 0; pass < 4; pass++)
+	for (within = insn;
+	     (within != NEXT_INSN (end)
+	      && (within == insn || GET_MODE (within) != TImode));
+	     within = NEXT_INSN (within))
+	  {
+	    int uid = INSN_UID (within);
+	    int this_rsrv, side;
+	    int icode;
+	    enum attr_units units;
+	    enum attr_type type;
+	    int j;
+
+	    if (!NONDEBUG_INSN_P (within))
+	      continue;
+	    icode = recog_memoized (within);
+	    if (icode < 0)
+	      continue;
+	    if (INSN_INFO_ENTRY (uid).reservation != 0)
+	      continue;
+	    units = get_attr_units (within);
+	    type = get_attr_type (within);
+	    this_rsrv = get_reservation_flags (units);
+	    if (this_rsrv == 0)
+	      continue;
+	    side = get_insn_side (within, units);
+
+	    /* Certain floating point instructions are treated specially.  If
+	       an insn can choose between units it can reserve, and its
+	       reservation spans more than one cycle, the reservation contains
+	       special markers in the first cycle to help us reconstruct what
+	       the automaton chose.  */
+	    if ((type == TYPE_ADDDP || type == TYPE_FP4)
+		&& units == UNITS_LS)
+	      {
+		int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
+				  + side * UNIT_QID_SIDE_OFFSET);
+		int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
+				  + side * UNIT_QID_SIDE_OFFSET);
+		if ((sched_mask & (1 << test1_code)) != 0)
+		  {
+		    this_rsrv = RESERVATION_FLAG_L;
+		    sched_mask &= ~(1 << test1_code);
+		  }
+		else if ((sched_mask & (1 << test2_code)) != 0)
+		  {
+		    this_rsrv = RESERVATION_FLAG_S;
+		    sched_mask &= ~(1 << test2_code);
+		  }
+	      }
+
+	    if ((this_rsrv & (this_rsrv - 1)) == 0)
+	      {
+		int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
+		rsrv[side] |= this_rsrv;
+		INSN_INFO_ENTRY (uid).reservation = t;
+		continue;
+	      }
+
+	    if (pass == 1)
+	      {
+		for (j = 0; j < 4; j++)
+		  if (this_rsrv & (1 << j))
+		    rsrv_count[side][j]++;
+		continue;
+	      }
+	    if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
+		|| (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
+	      {
+		int best = -1, best_cost = INT_MAX;
+		for (j = 0; j < 4; j++)
+		  if ((this_rsrv & (1 << j))
+		      && !(rsrv[side] & (1 << j))
+		      && rsrv_count[side][j] < best_cost)
+		    {
+		      best_cost = rsrv_count[side][j];
+		      best = j;
+		    }
+		gcc_assert (best != -1);
+		rsrv[side] |= 1 << best;
+		for (j = 0; j < 4; j++)
+		  if ((this_rsrv & (1 << j)) && j != best)
+		    rsrv_count[side][j]--;
+
+		INSN_INFO_ENTRY (uid).reservation
+		  = best + side * UNIT_QID_SIDE_OFFSET;
+	      }
+	  }
+    }
+}
+
+/* Return a factor by which to weight unit imbalances for a reservation
+   R.  */
+static int
+unit_req_factor (enum unitreqs r)
+{
+  switch (r)
+    {
+    case UNIT_REQ_D:
+    case UNIT_REQ_L:
+    case UNIT_REQ_S:
+    case UNIT_REQ_M:
+    case UNIT_REQ_X:
+    case UNIT_REQ_T:
+      return 1;
+    case UNIT_REQ_DL:
+    case UNIT_REQ_LS:
+    case UNIT_REQ_DS:
+      return 2;
+    case UNIT_REQ_DLS:
+      return 3;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
+   requirements.  Returns zero if INSN can't be handled, otherwise
+   either one or two to show how many of the two pairs are in use.
+   REQ1 is always used, it holds what is normally thought of as the
+   instructions reservation, e.g. UNIT_REQ_DL.  REQ2 is used to either
+   describe a cross path, or for loads/stores, the T unit.  */
+static int
+get_unit_reqs (rtx insn, int *req1, int *side1, int *req2, int *side2)
+{
+  enum attr_units units;
+  enum attr_cross cross;
+  int side, req;
+
+  if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
+    return 0;
+  units = get_attr_units (insn);
+  if (units == UNITS_UNKNOWN)
+    return 0;
+  side = get_insn_side (insn, units);
+  cross = get_attr_cross (insn);
+
+  req = (units == UNITS_D ? UNIT_REQ_D
+	 : units == UNITS_D_ADDR ? UNIT_REQ_D
+	 : units == UNITS_DL ? UNIT_REQ_DL
+	 : units == UNITS_DS ? UNIT_REQ_DS
+	 : units == UNITS_L ? UNIT_REQ_L
+	 : units == UNITS_LS ? UNIT_REQ_LS
+	 : units == UNITS_S ? UNIT_REQ_S
+	 : units == UNITS_M ? UNIT_REQ_M
+	 : units == UNITS_DLS ? UNIT_REQ_DLS
+	 : -1);
+  gcc_assert (req != -1);
+  *req1 = req;
+  *side1 = side;
+  if (units == UNITS_D_ADDR)
+    {
+      *req2 = UNIT_REQ_T;
+      *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
+      return 2;
+    }
+  else if (cross == CROSS_Y)
+    {
+      *req2 = UNIT_REQ_X;
+      *side2 = side;
+      return 2;
+    }
+  return 1;
+}
+
+/* Walk the insns between and including HEAD and TAIL, and mark the
+   resource requirements in the unit_reqs table.  */
+static void
+count_unit_reqs (unit_req_table reqs, rtx head, rtx tail)
+{
+  rtx insn;
+
+  memset (reqs, 0, sizeof (unit_req_table));
+
+  for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
+    {
+      int side1, side2, req1, req2;
+
+      switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
+	{
+	case 2:
+	  reqs[side2][req2]++;
+	  /* fall through */
+	case 1:
+	  reqs[side1][req1]++;
+	  break;
+	}
+    }
+}
+
+/* Update the table REQS by merging more specific unit reservations into
+   more general ones, i.e. counting (for example) UNIT_REQ_D also in
+   UNIT_REQ_DL, DS, and DLS.  */
+static void
+merge_unit_reqs (unit_req_table reqs)
+{
+  int side;
+  for (side = 0; side < 2; side++)
+    {
+      int d = reqs[side][UNIT_REQ_D];
+      int l = reqs[side][UNIT_REQ_L];
+      int s = reqs[side][UNIT_REQ_S];
+      int dl = reqs[side][UNIT_REQ_DL];
+      int ls = reqs[side][UNIT_REQ_LS];
+      int ds = reqs[side][UNIT_REQ_DS];
+
+      reqs[side][UNIT_REQ_DL] += d;
+      reqs[side][UNIT_REQ_DL] += l;
+      reqs[side][UNIT_REQ_DS] += d;
+      reqs[side][UNIT_REQ_DS] += s;
+      reqs[side][UNIT_REQ_LS] += l;
+      reqs[side][UNIT_REQ_LS] += s;
+      reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
+    }
+}
+
+/* Examine the table REQS and return a measure of unit imbalance by comparing
+   the two sides of the machine.  If, for example, D1 is used twice and D2
+   used not at all, the return value should be 1 in the absence of other
+   imbalances.  */
+static int
+unit_req_imbalance (unit_req_table reqs)
+{
+  int val = 0;
+  int i;
+
+  for (i = 0; i < UNIT_REQ_MAX; i++)
+    {
+      int factor = unit_req_factor ((enum unitreqs) i);
+      int diff = abs (reqs[0][i] - reqs[1][i]);
+      val += (diff + factor - 1) / factor / 2;
+    }
+  return val;
+}
+
+/* Return the resource-constrained minimum iteration interval given the
+   data in the REQS table.  This must have been processed with
+   merge_unit_reqs already.  */
+static int
+res_mii (unit_req_table reqs)
+{
+  int side, req;
+  int worst = 1;
+  for (side = 0; side < 2; side++)
+    for (req = 0; req < UNIT_REQ_MAX; req++)
+      {
+	int factor = unit_req_factor ((enum unitreqs) req);
+	worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
+      }
+
+  return worst;
+}
+
+/* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
+   the operands that are involved in the (up to) two reservations, as
+   found by get_unit_reqs.  Return true if we did this successfully, false
+   if we couldn't identify what to do with INSN.  */
+static bool
+get_unit_operand_masks (rtx insn, unsigned int *pmask1, unsigned int *pmask2)
+{
+  enum attr_op_pattern op_pat;
+
+  if (recog_memoized (insn) < 0)
+    return 0;
+  if (GET_CODE (PATTERN (insn)) == COND_EXEC)
+    return false;
+  extract_insn (insn);
+  op_pat = get_attr_op_pattern (insn);
+  if (op_pat == OP_PATTERN_DT)
+    {
+      gcc_assert (recog_data.n_operands == 2);
+      *pmask1 = 1 << 0;
+      *pmask2 = 1 << 1;
+      return true;
+    }
+  else if (op_pat == OP_PATTERN_TD)
+    {
+      gcc_assert (recog_data.n_operands == 2);
+      *pmask1 = 1 << 1;
+      *pmask2 = 1 << 0;
+      return true;
+    }
+  else if (op_pat == OP_PATTERN_SXS)
+    {
+      gcc_assert (recog_data.n_operands == 3);
+      *pmask1 = (1 << 0) | (1 << 2);
+      *pmask2 = 1 << 1;
+      return true;
+    }
+  else if (op_pat == OP_PATTERN_SX)
+    {
+      gcc_assert (recog_data.n_operands == 2);
+      *pmask1 = 1 << 0;
+      *pmask2 = 1 << 1;
+      return true;
+    }
+  else if (op_pat == OP_PATTERN_SSX)
+    {
+      gcc_assert (recog_data.n_operands == 3);
+      *pmask1 = (1 << 0) | (1 << 1);
+      *pmask2 = 1 << 2;
+      return true;
+    }
+  return false;
+}
+
+/* Try to replace a register in INSN, which has corresponding rename info
+   from regrename_analyze in INFO.  OP_MASK and ORIG_SIDE provide information
+   about the operands that must be renamed and the side they are on.
+   REQS is the table of unit reservations in the loop between HEAD and TAIL.
+   We recompute this information locally after our transformation, and keep
+   it only if we managed to improve the balance.  */
+static void
+try_rename_operands (rtx head, rtx tail, unit_req_table reqs, rtx insn,
+		     insn_rr_info *info, unsigned int op_mask, int orig_side)
+{
+  enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
+  HARD_REG_SET unavailable;
+  du_head_p this_head;
+  struct du_chain *chain;
+  int i;
+  unsigned tmp_mask;
+  int best_reg, old_reg;
+  vec<du_head_p> involved_chains = vNULL;
+  unit_req_table new_reqs;
+
+  for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
+    {
+      du_head_p op_chain;
+      if ((tmp_mask & (1 << i)) == 0)
+	continue;
+      if (info->op_info[i].n_chains != 1)
+	goto out_fail;
+      op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
+      involved_chains.safe_push (op_chain);
+      tmp_mask &= ~(1 << i);
+    }
+
+  if (involved_chains.length () > 1)
+    goto out_fail;
+
+  this_head = involved_chains[0];
+  if (this_head->cannot_rename)
+    goto out_fail;
+
+  for (chain = this_head->first; chain; chain = chain->next_use)
+    {
+      unsigned int mask1, mask2, mask_changed;
+      int count, side1, side2, req1, req2;
+      insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)];
+
+      count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
+
+      if (count == 0)
+	goto out_fail;
+
+      if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
+	goto out_fail;
+
+      extract_insn (chain->insn);
+
+      mask_changed = 0;
+      for (i = 0; i < recog_data.n_operands; i++)
+	{
+	  int j;
+	  int n_this_op = this_rr->op_info[i].n_chains;
+	  for (j = 0; j < n_this_op; j++)
+	    {
+	      du_head_p other = this_rr->op_info[i].heads[j];
+	      if (regrename_chain_from_id (other->id) == this_head)
+		break;
+	    }
+	  if (j == n_this_op)
+	    continue;
+
+	  if (n_this_op != 1)
+	    goto out_fail;
+	  mask_changed |= 1 << i;
+	}
+      gcc_assert (mask_changed != 0);
+      if (mask_changed != mask1 && mask_changed != mask2)
+	goto out_fail;
+    }
+
+  /* If we get here, we can do the renaming.  */
+  COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
+
+  old_reg = this_head->regno;
+  best_reg = find_best_rename_reg (this_head, super_class, &unavailable, old_reg);
+
+  regrename_do_replace (this_head, best_reg);
+
+  count_unit_reqs (new_reqs, head, PREV_INSN (tail));
+  merge_unit_reqs (new_reqs);
+  if (dump_file)
+    {
+      fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
+	       "original side %d, new reg %d\n",
+	       INSN_UID (insn), op_mask, orig_side, best_reg);
+      fprintf (dump_file, "  imbalance %d -> %d\n",
+	       unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
+    }
+  if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
+    regrename_do_replace (this_head, old_reg);
+  else
+    memcpy (reqs, new_reqs, sizeof (unit_req_table));
+
+ out_fail:
+  involved_chains.release ();
+}
+
+/* Find insns in LOOP which would, if shifted to the other side
+   of the machine, reduce an imbalance in the unit reservations.  */
+static void
+reshuffle_units (basic_block loop)
+{
+  rtx head = BB_HEAD (loop);
+  rtx tail = BB_END (loop);
+  rtx insn;
+  unit_req_table reqs;
+  edge e;
+  edge_iterator ei;
+  bitmap_head bbs;
+
+  count_unit_reqs (reqs, head, PREV_INSN (tail));
+  merge_unit_reqs (reqs);
+
+  regrename_init (true);
+
+  bitmap_initialize (&bbs, &bitmap_default_obstack);
+
+  FOR_EACH_EDGE (e, ei, loop->preds)
+    bitmap_set_bit (&bbs, e->src->index);
+
+  bitmap_set_bit (&bbs, loop->index);
+  regrename_analyze (&bbs);
+
+  for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
+    {
+      enum attr_units units;
+      int count, side1, side2, req1, req2;
+      unsigned int mask1, mask2;
+      insn_rr_info *info;
+
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+      count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
+
+      if (count == 0)
+	continue;
+
+      if (!get_unit_operand_masks (insn, &mask1, &mask2))
+	continue;
+
+      info = &insn_rr[INSN_UID (insn)];
+      if (info->op_info == NULL)
+	continue;
+
+      if (reqs[side1][req1] > 1
+	  && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
+	{
+	  try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
+	}
+
+      units = get_attr_units (insn);
+      if (units == UNITS_D_ADDR)
+	{
+	  gcc_assert (count == 2);
+	  if (reqs[side2][req2] > 1
+	      && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
+	    {
+	      try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
+	    }
+	}
+    }
+  regrename_finish ();
+}
+
+/* Backend scheduling state.  */
+typedef struct c6x_sched_context
+{
+  /* The current scheduler clock, saved in the sched_reorder hook.  */
+  int curr_sched_clock;
+
+  /* Number of insns issued so far in this cycle.  */
+  int issued_this_cycle;
+
+  /* We record the time at which each jump occurs in JUMP_CYCLES.  The
+     theoretical maximum for number of jumps in flight is 12: 2 every
+     cycle, with a latency of 6 cycles each.  This is a circular
+     buffer; JUMP_CYCLE_INDEX is the pointer to the start.  Earlier
+     jumps have a higher index.  This array should be accessed through
+     the jump_cycle function.  */
+  int jump_cycles[12];
+  int jump_cycle_index;
+
+  /* In parallel with jump_cycles, this array records the opposite of
+     the condition used in each pending jump.  This is used to
+     predicate insns that are scheduled in the jump's delay slots.  If
+     this is NULL_RTX no such predication happens.  */
+  rtx jump_cond[12];
+
+  /* Similar to the jump_cycles mechanism, but here we take into
+     account all insns with delay slots, to avoid scheduling asms into
+     the delay slots.  */
+  int delays_finished_at;
+
+  /* The following variable value is the last issued insn.  */
+  rtx last_scheduled_insn;
+  /* The last issued insn that isn't a shadow of another.  */
+  rtx last_scheduled_iter0;
+
+  /* The following variable value is DFA state before issuing the
+     first insn in the current clock cycle.  We do not use this member
+     of the structure directly; we copy the data in and out of
+     prev_cycle_state.  */
+  state_t prev_cycle_state_ctx;
+
+  int reg_n_accesses[FIRST_PSEUDO_REGISTER];
+  int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
+  int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
+
+  int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
+  int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
+} *c6x_sched_context_t;
+
+/* The current scheduling state.  */
+static struct c6x_sched_context ss;
+
+/* The following variable value is DFA state before issuing the first insn
+   in the current clock cycle.  This is used in c6x_variable_issue for
+   comparison with the state after issuing the last insn in a cycle.  */
+static state_t prev_cycle_state;
+
+/* Set when we discover while processing an insn that it would lead to too
+   many accesses of the same register.  */
+static bool reg_access_stall;
+
+/* The highest insn uid after delayed insns were split, but before loop bodies
+   were copied by the modulo scheduling code.  */
+static int sploop_max_uid_iter0;
+
+/* Look up the jump cycle with index N.  For an out-of-bounds N, we return 0,
+   so the caller does not specifically have to test for it.  */
+static int
+get_jump_cycle (int n)
+{
+  if (n >= 12)
+    return 0;
+  n += ss.jump_cycle_index;
+  if (n >= 12)
+    n -= 12;
+  return ss.jump_cycles[n];
+}
+
+/* Look up the jump condition with index N.  */
+static rtx
+get_jump_cond (int n)
+{
+  if (n >= 12)
+    return NULL_RTX;
+  n += ss.jump_cycle_index;
+  if (n >= 12)
+    n -= 12;
+  return ss.jump_cond[n];
+}
+
+/* Return the index of the first jump that occurs after CLOCK_VAR.  If no jump
+   has delay slots beyond CLOCK_VAR, return -1.  */
+static int
+first_jump_index (int clock_var)
+{
+  int retval = -1;
+  int n = 0;
+  for (;;)
+    {
+      int t = get_jump_cycle (n);
+      if (t <= clock_var)
+	break;
+      retval = n;
+      n++;
+    }
+  return retval;
+}
+
+/* Add a new entry in our scheduling state for a jump that occurs in CYCLE
+   and has the opposite condition of COND.  */
+static void
+record_jump (int cycle, rtx cond)
+{
+  if (ss.jump_cycle_index == 0)
+    ss.jump_cycle_index = 11;
+  else
+    ss.jump_cycle_index--;
+  ss.jump_cycles[ss.jump_cycle_index] = cycle;
+  ss.jump_cond[ss.jump_cycle_index] = cond;
+}
+
+/* Set the clock cycle of INSN to CYCLE.  Also clears the insn's entry in
+   new_conditions.  */
+static void
+insn_set_clock (rtx insn, int cycle)
+{
+  unsigned uid = INSN_UID (insn);
+
+  if (uid >= INSN_INFO_LENGTH)
+    insn_info.safe_grow (uid * 5 / 4 + 10);
+
+  INSN_INFO_ENTRY (uid).clock = cycle;
+  INSN_INFO_ENTRY (uid).new_cond = NULL;
+  INSN_INFO_ENTRY (uid).reservation = 0;
+  INSN_INFO_ENTRY (uid).ebb_start = false;
+}
+
+/* Return the clock cycle we set for the insn with uid UID.  */
+static int
+insn_uid_get_clock (int uid)
+{
+  return INSN_INFO_ENTRY (uid).clock;
+}
+
+/* Return the clock cycle we set for INSN.  */
+static int
+insn_get_clock (rtx insn)
+{
+  return insn_uid_get_clock (INSN_UID (insn));
+}
+
+/* Examine INSN, and if it is a conditional jump of any kind, return
+   the opposite of the condition in which it branches.  Otherwise,
+   return NULL_RTX.  */
+static rtx
+condjump_opposite_condition (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  int icode = INSN_CODE (insn);
+  rtx x = NULL;
+
+  if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
+    {
+      x = XEXP (SET_SRC (pat), 0);
+      if (icode == CODE_FOR_br_false)
+	return x;
+    }
+  if (GET_CODE (pat) == COND_EXEC)
+    {
+      rtx t = COND_EXEC_CODE (pat);
+      if ((GET_CODE (t) == PARALLEL
+	   && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
+	  || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
+	  || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
+	x = COND_EXEC_TEST (pat);
+    }
+
+  if (x != NULL_RTX)
+    {
+      enum rtx_code code = GET_CODE (x);
+      x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
+			  GET_MODE (x), XEXP (x, 0),
+			  XEXP (x, 1));
+    }
+  return x;
+}
+
+/* Return true iff COND1 and COND2 are exactly opposite conditions
+   one of them NE and the other EQ.  */
+static bool
+conditions_opposite_p (rtx cond1, rtx cond2)
+{
+  return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
+	  && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
+	  && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
+}
+
+/* Return true if we can add a predicate COND to INSN, or if INSN
+   already has that predicate.  If DOIT is true, also perform the
+   modification.  */
+static bool
+predicate_insn (rtx insn, rtx cond, bool doit)
+{
+  int icode;
+  if (cond == NULL_RTX)
+    {
+      gcc_assert (!doit);
+      return false;
+    }
+
+  if (get_attr_predicable (insn) == PREDICABLE_YES
+      && GET_CODE (PATTERN (insn)) != COND_EXEC)
+    {
+      if (doit)
+	{
+	  rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
+	  PATTERN (insn) = newpat;
+	  INSN_CODE (insn) = -1;
+	}
+      return true;
+    }
+  if (GET_CODE (PATTERN (insn)) == COND_EXEC
+      && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
+    return true;
+  icode = INSN_CODE (insn);
+  if (icode == CODE_FOR_real_jump
+      || icode == CODE_FOR_jump
+      || icode == CODE_FOR_indirect_jump)
+    {
+      rtx pat = PATTERN (insn);
+      rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
+		  : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
+		  : SET_SRC (pat));
+      if (doit)
+	{
+	  rtx newpat;
+	  if (REG_P (dest))
+	    newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
+	  else
+	    newpat = gen_br_true (cond, XEXP (cond, 0), dest);
+	  PATTERN (insn) = newpat;
+	  INSN_CODE (insn) = -1;
+	}
+      return true;
+    }
+  if (INSN_CODE (insn) == CODE_FOR_br_true)
+    {
+      rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
+      return rtx_equal_p (br_cond, cond);
+    }
+  if (INSN_CODE (insn) == CODE_FOR_br_false)
+    {
+      rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
+      return conditions_opposite_p (br_cond, cond);
+    }
+  return false;
+}
+
+/* Initialize SC.  Used by c6x_init_sched_context and c6x_sched_init.  */
+static void
+init_sched_state (c6x_sched_context_t sc)
+{
+  sc->last_scheduled_insn = NULL_RTX;
+  sc->last_scheduled_iter0 = NULL_RTX;
+  sc->issued_this_cycle = 0;
+  memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
+  memset (sc->jump_cond, 0, sizeof sc->jump_cond);
+  sc->jump_cycle_index = 0;
+  sc->delays_finished_at = 0;
+  sc->curr_sched_clock = 0;
+
+  sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
+
+  memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
+  memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
+  memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
+
+  state_reset (sc->prev_cycle_state_ctx);
+}
+
+/* Allocate store for new scheduling context.  */
+static void *
+c6x_alloc_sched_context (void)
+{
+  return xmalloc (sizeof (struct c6x_sched_context));
+}
+
+/* If CLEAN_P is true then initializes _SC with clean data,
+   and from the global context otherwise.  */
+static void
+c6x_init_sched_context (void *_sc, bool clean_p)
+{
+  c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
+
+  if (clean_p)
+    {
+      init_sched_state (sc);
+    }
+  else
+    {
+      *sc = ss;
+      sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
+      memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
+    }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC.  */
+static void
+c6x_set_sched_context (void *_sc)
+{
+  c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
+
+  gcc_assert (sc != NULL);
+  ss = *sc;
+  memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
+}
+
+/* Clear data in _SC.  */
+static void
+c6x_clear_sched_context (void *_sc)
+{
+  c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
+  gcc_assert (_sc != NULL);
+
+  free (sc->prev_cycle_state_ctx);
+}
+
+/* Free _SC.  */
+static void
+c6x_free_sched_context (void *_sc)
+{
+  free (_sc);
+}
+
+/* True if we are currently performing a preliminary scheduling
+   pass before modulo scheduling; we can't allow the scheduler to
+   modify instruction patterns using packetization assumptions,
+   since there will be another scheduling pass later if modulo
+   scheduling fails.  */
+static bool in_hwloop;
+
+/* Provide information about speculation capabilities, and set the
+   DO_BACKTRACKING flag.  */
+static void
+c6x_set_sched_flags (spec_info_t spec_info)
+{
+  unsigned int *flags = &(current_sched_info->flags);
+
+  if (*flags & SCHED_EBB)
+    {
+      *flags |= DO_BACKTRACKING | DO_PREDICATION;
+    }
+  if (in_hwloop)
+    *flags |= DONT_BREAK_DEPENDENCIES;
+
+  spec_info->mask = 0;
+}
+
+/* Implement the TARGET_SCHED_ISSUE_RATE hook.  */
+
+static int
+c6x_issue_rate (void)
+{
+  return 8;
+}
+
+/* Used together with the collapse_ndfa option, this ensures that we reach a
+   deterministic automaton state before trying to advance a cycle.
+   With collapse_ndfa, genautomata creates advance cycle arcs only for
+   such deterministic states.  */
+
+static rtx
+c6x_sched_dfa_pre_cycle_insn (void)
+{
+  return const0_rtx;
+}
+
+/* We're beginning a new block.  Initialize data structures as necessary.  */
+
+static void
+c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		int sched_verbose ATTRIBUTE_UNUSED,
+		int max_ready ATTRIBUTE_UNUSED)
+{
+  if (prev_cycle_state == NULL)
+    {
+      prev_cycle_state = xmalloc (dfa_state_size);
+    }
+  init_sched_state (&ss);
+  state_reset (prev_cycle_state);
+}
+
+/* We are about to being issuing INSN.  Return nonzero if we cannot
+   issue it on given cycle CLOCK and return zero if we should not sort
+   the ready queue on the next clock start.
+   For C6X, we use this function just to copy the previous DFA state
+   for comparison purposes.  */
+
+static int
+c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		   rtx insn ATTRIBUTE_UNUSED, int last_clock ATTRIBUTE_UNUSED,
+		   int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
+{
+  if (clock != last_clock)
+    memcpy (prev_cycle_state, curr_state, dfa_state_size);
+  return 0;
+}
+
+static void
+c6x_mark_regno_read (int regno, bool cross)
+{
+  int t = ++ss.tmp_reg_n_accesses[regno];
+
+  if (t > 4)
+    reg_access_stall = true;
+
+  if (cross)
+    {
+      int set_cycle = ss.reg_set_in_cycle[regno];
+      /* This must be done in this way rather than by tweaking things in
+	 adjust_cost, since the stall occurs even for insns with opposite
+	 predicates, and the scheduler may not even see a dependency.  */
+      if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
+	reg_access_stall = true;
+      /* This doesn't quite do anything yet as we're only modeling one
+	 x unit.  */
+      ++ss.tmp_reg_n_xaccesses[regno];
+    }
+}
+
+/* Note that REG is read in the insn being examined.  If CROSS, it
+   means the access is through a cross path.  Update the temporary reg
+   access arrays, and set REG_ACCESS_STALL if the insn can't be issued
+   in the current cycle.  */
+
+static void
+c6x_mark_reg_read (rtx reg, bool cross)
+{
+  unsigned regno = REGNO (reg);
+  unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
+
+  while (nregs-- > 0)
+    c6x_mark_regno_read (regno + nregs, cross);
+}
+
+/* Note that register REG is written in cycle CYCLES.  */
+
+static void
+c6x_mark_reg_written (rtx reg, int cycles)
+{
+  unsigned regno = REGNO (reg);
+  unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
+
+  while (nregs-- > 0)
+    ss.reg_set_in_cycle[regno + nregs] = cycles;
+}
+
+/* Update the register state information for an instruction whose
+   body is X.  Return true if the instruction has to be delayed until the
+   next cycle.  */
+
+static bool
+c6x_registers_update (rtx insn)
+{
+  enum attr_cross cross;
+  enum attr_dest_regfile destrf;
+  int i, nops;
+  rtx x;
+
+  if (!reload_completed || recog_memoized (insn) < 0)
+    return false;
+
+  reg_access_stall = false;
+  memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
+	  sizeof ss.tmp_reg_n_accesses);
+  memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
+	  sizeof ss.tmp_reg_n_xaccesses);
+
+  extract_insn (insn);
+
+  cross = get_attr_cross (insn);
+  destrf = get_attr_dest_regfile (insn);
+
+  nops = recog_data.n_operands;
+  x = PATTERN (insn);
+  if (GET_CODE (x) == COND_EXEC)
+    {
+      c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
+      nops -= 2;
+    }
+
+  for (i = 0; i < nops; i++)
+    {
+      rtx op = recog_data.operand[i];
+      if (recog_data.operand_type[i] == OP_OUT)
+	continue;
+      if (REG_P (op))
+	{
+	  bool this_cross = cross;
+	  if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
+	    this_cross = false;
+	  if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
+	    this_cross = false;
+	  c6x_mark_reg_read (op, this_cross);
+	}
+      else if (MEM_P (op))
+	{
+	  op = XEXP (op, 0);
+	  switch (GET_CODE (op))
+	    {
+	    case POST_INC:
+	    case PRE_INC:
+	    case POST_DEC:
+	    case PRE_DEC:
+	      op = XEXP (op, 0);
+	      /* fall through */
+	    case REG:
+	      c6x_mark_reg_read (op, false);
+	      break;
+	    case POST_MODIFY:
+	    case PRE_MODIFY:
+	      op = XEXP (op, 1);
+	      gcc_assert (GET_CODE (op) == PLUS);
+	      /* fall through */
+	    case PLUS:
+	      c6x_mark_reg_read (XEXP (op, 0), false);
+	      if (REG_P (XEXP (op, 1)))
+		c6x_mark_reg_read (XEXP (op, 1), false);
+	      break;
+	    case SYMBOL_REF:
+	    case LABEL_REF:
+	    case CONST:
+	      c6x_mark_regno_read (REG_B14, false);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
+	gcc_unreachable ();
+    }
+  return reg_access_stall;
+}
+
+/* Helper function for the TARGET_SCHED_REORDER and
+   TARGET_SCHED_REORDER2 hooks.  If scheduling an insn would be unsafe
+   in the current cycle, move it down in the ready list and return the
+   number of non-unsafe insns.  */
+
+static int
+c6x_sched_reorder_1 (rtx *ready, int *pn_ready, int clock_var)
+{
+  int n_ready = *pn_ready;
+  rtx *e_ready = ready + n_ready;
+  rtx *insnp;
+  int first_jump;
+
+  /* Keep track of conflicts due to a limit number of register accesses,
+     and due to stalls incurred by too early accesses of registers using
+     cross paths.  */
+
+  for (insnp = ready; insnp < e_ready; insnp++)
+    {
+      rtx insn = *insnp;
+      int icode = recog_memoized (insn);
+      bool is_asm = (icode < 0
+		     && (GET_CODE (PATTERN (insn)) == ASM_INPUT
+			 || asm_noperands (PATTERN (insn)) >= 0));
+      bool no_parallel = (is_asm || icode == CODE_FOR_sploop
+			  || (icode >= 0
+			      && get_attr_type (insn) == TYPE_ATOMIC));
+
+      /* We delay asm insns until all delay slots are exhausted.  We can't
+	 accurately tell how many cycles an asm takes, and the main scheduling
+	 code always assumes at least 1 cycle, which may be wrong.  */
+      if ((no_parallel
+	   && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
+	  || c6x_registers_update (insn)
+	  || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
+	{
+	  memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	  *ready = insn;
+	  n_ready--;
+	  ready++;
+	}
+      else if (shadow_p (insn))
+	{
+	  memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	  *ready = insn;
+	}
+    }
+
+  /* Ensure that no other jump is scheduled in jump delay slots, since
+     it would put the machine into the wrong state.  Also, we must
+     avoid scheduling insns that have a latency longer than the
+     remaining jump delay slots, as the code at the jump destination
+     won't be prepared for it.
+
+     However, we can relax this condition somewhat.  The rest of the
+     scheduler will automatically avoid scheduling an insn on which
+     the jump shadow depends so late that its side effect happens
+     after the jump.  This means that if we see an insn with a longer
+     latency here, it can safely be scheduled if we can ensure that it
+     has a predicate opposite of the previous jump: the side effect
+     will happen in what we think of as the same basic block.  In
+     c6x_variable_issue, we will record the necessary predicate in
+     new_conditions, and after scheduling is finished, we will modify
+     the insn.
+
+     Special care must be taken whenever there is more than one jump
+     in flight.  */
+
+  first_jump = first_jump_index (clock_var);
+  if (first_jump != -1)
+    {
+      int first_cycle = get_jump_cycle (first_jump);
+      rtx first_cond = get_jump_cond (first_jump);
+      int second_cycle = 0;
+
+      if (first_jump > 0)
+	second_cycle = get_jump_cycle (first_jump - 1);
+
+      for (insnp = ready; insnp < e_ready; insnp++)
+	{
+	  rtx insn = *insnp;
+	  int icode = recog_memoized (insn);
+	  bool is_asm = (icode < 0
+			 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
+			     || asm_noperands (PATTERN (insn)) >= 0));
+	  int this_cycles, rsrv_cycles;
+	  enum attr_type type;
+
+	  gcc_assert (!is_asm);
+	  if (icode < 0)
+	    continue;
+	  this_cycles = get_attr_cycles (insn);
+	  rsrv_cycles = get_attr_reserve_cycles (insn);
+	  type = get_attr_type (insn);
+	  /* Treat branches specially; there is also a hazard if two jumps
+	     end at the same cycle.  */
+	  if (type == TYPE_BRANCH || type == TYPE_CALL)
+	    this_cycles++;
+	  if (clock_var + this_cycles <= first_cycle)
+	    continue;
+	  if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
+	      || clock_var + rsrv_cycles > first_cycle
+	      || !predicate_insn (insn, first_cond, false))
+	    {
+	      memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	      *ready = insn;
+	      n_ready--;
+	      ready++;
+	    }
+	}
+    }
+
+  return n_ready;
+}
+
+/* Implement the TARGET_SCHED_REORDER hook.  We save the current clock
+   for later and clear the register access information for the new
+   cycle.  We also move asm statements out of the way if they would be
+   scheduled in a delay slot.  */
+
+static int
+c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
+		   int sched_verbose ATTRIBUTE_UNUSED,
+		   rtx *ready ATTRIBUTE_UNUSED,
+		   int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
+{
+  ss.curr_sched_clock = clock_var;
+  ss.issued_this_cycle = 0;
+  memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
+  memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
+
+  if (ready == NULL)
+    return 0;
+
+  return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
+}
+
+/* Implement the TARGET_SCHED_REORDER2 hook.  We use this to record the clock
+   cycle for every insn.  */
+
+static int
+c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+		    int sched_verbose ATTRIBUTE_UNUSED,
+		    rtx *ready ATTRIBUTE_UNUSED,
+		    int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
+{
+  /* FIXME: the assembler rejects labels inside an execute packet.
+     This can occur if prologue insns are scheduled in parallel with
+     others, so we avoid this here.  Also make sure that nothing is
+     scheduled in parallel with a TYPE_ATOMIC insn or after a jump.  */
+  if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
+      || JUMP_P (ss.last_scheduled_insn)
+      || (recog_memoized (ss.last_scheduled_insn) >= 0
+	  && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
+    {
+      int n_ready = *pn_ready;
+      rtx *e_ready = ready + n_ready;
+      rtx *insnp;
+
+      for (insnp = ready; insnp < e_ready; insnp++)
+	{
+	  rtx insn = *insnp;
+	  if (!shadow_p (insn))
+	    {
+	      memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	      *ready = insn;
+	      n_ready--;
+	      ready++;
+	    }
+	}
+      return n_ready;
+    }
+
+  return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
+}
+
+/* Subroutine of maybe_clobber_cond, called through note_stores.  */
+
+static void
+clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
+{
+  rtx *cond = (rtx *)data1;
+  if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
+    *cond = NULL_RTX;
+}
+
+/* Examine INSN, and if it destroys the conditions have recorded for
+   any of the jumps in flight, clear that condition so that we don't
+   predicate any more insns.  CLOCK_VAR helps us limit the search to
+   only those jumps which are still in flight.  */
+
+static void
+maybe_clobber_cond (rtx insn, int clock_var)
+{
+  int n, idx;
+  idx = ss.jump_cycle_index;
+  for (n = 0; n < 12; n++, idx++)
+    {
+      rtx cond, link;
+      int cycle;
+
+      if (idx >= 12)
+	idx -= 12;
+      cycle = ss.jump_cycles[idx];
+      if (cycle <= clock_var)
+	return;
+
+      cond = ss.jump_cond[idx];
+      if (cond == NULL_RTX)
+	continue;
+
+      if (CALL_P (insn))
+	{
+	  ss.jump_cond[idx] = NULL_RTX;
+	  continue;
+	}
+
+      note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
+      for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+	if (REG_NOTE_KIND (link) == REG_INC)
+	  clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
+    }
+}
+
+/* Implement the TARGET_SCHED_VARIABLE_ISSUE hook.  We are about to
+   issue INSN.  Return the number of insns left on the ready queue
+   that can be issued this cycle.
+   We use this hook to record clock cycles and reservations for every insn.  */
+
+static int
+c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+		    int sched_verbose ATTRIBUTE_UNUSED,
+		    rtx insn, int can_issue_more ATTRIBUTE_UNUSED)
+{
+  ss.last_scheduled_insn = insn;
+  if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
+    ss.last_scheduled_iter0 = insn;
+  if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
+    ss.issued_this_cycle++;
+  if (insn_info.exists ())
+    {
+      state_t st_after = alloca (dfa_state_size);
+      int curr_clock = ss.curr_sched_clock;
+      int uid = INSN_UID (insn);
+      int icode = recog_memoized (insn);
+      rtx first_cond;
+      int first, first_cycle;
+      unsigned int mask;
+      int i;
+
+      insn_set_clock (insn, curr_clock);
+      INSN_INFO_ENTRY (uid).ebb_start
+	= curr_clock == 0 && ss.issued_this_cycle == 1;
+
+      first = first_jump_index (ss.curr_sched_clock);
+      if (first == -1)
+	{
+	  first_cycle = 0;
+	  first_cond = NULL_RTX;
+	}
+      else
+	{
+	  first_cycle = get_jump_cycle (first);
+	  first_cond = get_jump_cond (first);
+	}
+      if (icode >= 0
+	  && first_cycle > curr_clock
+	  && first_cond != NULL_RTX
+	  && (curr_clock + get_attr_cycles (insn) > first_cycle
+	      || get_attr_type (insn) == TYPE_BRANCH
+	      || get_attr_type (insn) == TYPE_CALL))
+	INSN_INFO_ENTRY (uid).new_cond = first_cond;
+
+      memcpy (st_after, curr_state, dfa_state_size);
+      state_transition (st_after, const0_rtx);
+
+      mask = 0;
+      for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
+	if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
+	    && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
+	  mask |= 1 << i;
+      INSN_INFO_ENTRY (uid).unit_mask = mask;
+
+      maybe_clobber_cond (insn, curr_clock);
+
+      if (icode >= 0)
+	{
+	  int i, cycles;
+
+	  c6x_registers_update (insn);
+	  memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
+		  sizeof ss.reg_n_accesses);
+	  memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
+		  sizeof ss.reg_n_xaccesses);
+
+	  cycles = get_attr_cycles (insn);
+	  if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
+	    ss.delays_finished_at = ss.curr_sched_clock + cycles;
+	  if (get_attr_type (insn) == TYPE_BRANCH
+	      || get_attr_type (insn) == TYPE_CALL)
+	    {
+	      rtx opposite = condjump_opposite_condition (insn);
+	      record_jump (ss.curr_sched_clock + cycles, opposite);
+	    }
+
+	  /* Mark the cycles in which the destination registers are written.
+	     This is used for calculating stalls when using cross units.  */
+	  extract_insn (insn);
+	  /* Cross-path stalls don't apply to results of load insns.  */
+	  if (get_attr_type (insn) == TYPE_LOAD
+	      || get_attr_type (insn) == TYPE_LOADN
+	      || get_attr_type (insn) == TYPE_LOAD_SHADOW)
+	    cycles--;
+	  for (i = 0; i < recog_data.n_operands; i++)
+	    {
+	      rtx op = recog_data.operand[i];
+	      if (MEM_P (op))
+		{
+		  rtx addr = XEXP (op, 0);
+		  if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
+		    c6x_mark_reg_written (XEXP (addr, 0),
+					  insn_uid_get_clock (uid) + 1);
+		}
+	      if (recog_data.operand_type[i] != OP_IN
+		  && REG_P (op))
+		{
+		  c6x_mark_reg_written (op,
+					insn_uid_get_clock (uid) + cycles);
+		}
+	    }
+	}
+    }
+  return can_issue_more;
+}
+
+/* Implement the TARGET_SCHED_ADJUST_COST hook.  We need special handling for
+   anti- and output dependencies.  */
+
+static int
+c6x_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
+  int dep_insn_code_number, insn_code_number;
+  int shadow_bonus = 0;
+  enum reg_note kind;
+  dep_insn_code_number = recog_memoized (dep_insn);
+  insn_code_number = recog_memoized (insn);
+
+  if (dep_insn_code_number >= 0)
+    dep_insn_type = get_attr_type (dep_insn);
+
+  if (insn_code_number >= 0)
+    insn_type = get_attr_type (insn);
+
+  kind = REG_NOTE_KIND (link);
+  if (kind == 0)
+    {
+      /* If we have a dependency on a load, and it's not for the result of
+	 the load, it must be for an autoincrement.  Reduce the cost in that
+	 case.  */
+      if (dep_insn_type == TYPE_LOAD)
+	{
+	  rtx set = PATTERN (dep_insn);
+	  if (GET_CODE (set) == COND_EXEC)
+	    set = COND_EXEC_CODE (set);
+	  if (GET_CODE (set) == UNSPEC)
+	    cost = 1;
+	  else
+	    {
+	      gcc_assert (GET_CODE (set) == SET);
+	      if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
+		cost = 1;
+	    }
+	}
+    }
+
+  /* A jump shadow needs to have its latency decreased by one.  Conceptually,
+     it occurs in between two cycles, but we schedule it at the end of the
+     first cycle.  */
+  if (shadow_type_p (insn_type))
+    shadow_bonus = 1;
+
+  /* Anti and output dependencies usually have zero cost, but we want
+     to insert a stall after a jump, and after certain floating point
+     insns that take more than one cycle to read their inputs.  In the
+     future, we should try to find a better algorithm for scheduling
+     jumps.  */
+  if (kind != 0)
+    {
+      /* We can get anti-dependencies against shadow insns.  Treat these
+	 like output dependencies, so that the insn is entirely finished
+	 before the branch takes place.  */
+      if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
+	kind = REG_DEP_OUTPUT;
+      switch (dep_insn_type)
+	{
+	case TYPE_CALLP:
+	  return 1;
+	case TYPE_BRANCH:
+	case TYPE_CALL:
+	  if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
+	    /* This is a real_jump/real_call insn.  These don't have
+	       outputs, and ensuring the validity of scheduling things
+	       in the delay slot is the job of
+	       c6x_sched_reorder_1.  */
+	    return 0;
+	  /* Unsplit calls can happen - e.g. for divide insns.  */
+	  return 6;
+	case TYPE_LOAD:
+	case TYPE_LOADN:
+	case TYPE_INTDP:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 5 - shadow_bonus;
+	  return 0;
+	case TYPE_MPY4:
+	case TYPE_FP4:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 4 - shadow_bonus;
+	  return 0;
+	case TYPE_MPY2:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 2 - shadow_bonus;
+	  return 0;
+	case TYPE_CMPDP:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 2 - shadow_bonus;
+	  return 2;
+	case TYPE_ADDDP:
+	case TYPE_MPYSPDP:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 7 - shadow_bonus;
+	  return 2;
+	case TYPE_MPYSP2DP:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 5 - shadow_bonus;
+	  return 2;
+	case TYPE_MPYI:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 9 - shadow_bonus;
+	  return 4;
+	case TYPE_MPYID:
+	case TYPE_MPYDP:
+	  if (kind == REG_DEP_OUTPUT)
+	    return 10 - shadow_bonus;
+	  return 4;
+
+	default:
+	  if (insn_type == TYPE_SPKERNEL)
+	    return 0;
+	  if (kind == REG_DEP_OUTPUT)
+	    return 1 - shadow_bonus;
+
+	  return 0;
+	}
+    }
+
+  return cost - shadow_bonus;
+}
+
+/* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
+   are N_FILLED.  REAL_FIRST identifies the slot if the insn that appears
+   first in the original stream.  */
+
+static void
+gen_one_bundle (rtx *slot, int n_filled, int real_first)
+{
+  rtx bundle;
+  rtx t;
+  int i;
+
+  bundle = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
+  bundle = make_insn_raw (bundle);
+  BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
+  INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]);
+  PREV_INSN (bundle) = PREV_INSN (slot[real_first]);
+
+  t = NULL_RTX;
+
+  for (i = 0; i < n_filled; i++)
+    {
+      rtx insn = slot[i];
+      remove_insn (insn);
+      PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
+      if (t != NULL_RTX)
+	NEXT_INSN (t) = insn;
+      t = insn;
+      if (i > 0)
+	INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle);
+    }
+
+  NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
+  NEXT_INSN (t) = NEXT_INSN (bundle);
+  NEXT_INSN (PREV_INSN (bundle)) = bundle;
+  PREV_INSN (NEXT_INSN (bundle)) = bundle;
+}
+
+/* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
+   try to insert labels in the middle.  */
+
+static void
+c6x_gen_bundles (void)
+{
+  basic_block bb;
+  rtx insn, next, last_call;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next;
+      /* The machine is eight insns wide.  We can have up to six shadow
+	 insns, plus an extra slot for merging the jump shadow.  */
+      rtx slot[15];
+      int n_filled = 0;
+      int first_slot = 0;
+
+      for (insn = BB_HEAD (bb);; insn = next)
+	{
+	  int at_end;
+	  rtx delete_this = NULL_RTX;
+
+	  if (NONDEBUG_INSN_P (insn))
+	    {
+	      /* Put calls at the start of the sequence.  */
+	      if (CALL_P (insn))
+		{
+		  first_slot++;
+		  if (n_filled)
+		    {
+		      memmove (&slot[1], &slot[0],
+			       n_filled * sizeof (slot[0]));
+		    }
+		  if (!shadow_p (insn))
+		    {
+		      PUT_MODE (insn, TImode);
+		      if (n_filled)
+			PUT_MODE (slot[1], VOIDmode);
+		    }
+		  n_filled++;
+		  slot[0] = insn;
+		}
+	      else
+		{
+		  slot[n_filled++] = insn;
+		}
+	    }
+
+	  next = NEXT_INSN (insn);
+	  while (next && insn != BB_END (bb)
+		 && !(NONDEBUG_INSN_P (next)
+		      && GET_CODE (PATTERN (next)) != USE
+		      && GET_CODE (PATTERN (next)) != CLOBBER))
+	    {
+	      insn = next;
+	      next = NEXT_INSN (insn);
+	    }
+
+	  at_end = insn == BB_END (bb);
+	  if (delete_this == NULL_RTX
+	      && (at_end || (GET_MODE (next) == TImode
+			     && !(shadow_p (next) && CALL_P (next)))))
+	    {
+	      if (n_filled >= 2)
+		gen_one_bundle (slot, n_filled, first_slot);
+
+	      n_filled = 0;
+	      first_slot = 0;
+	    }
+	  if (at_end)
+	    break;
+	}
+    }
+  /* Bundling, and emitting nops, can separate
+     NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls.  Fix
+     that up here.  */
+  last_call = NULL_RTX;
+  for (insn = get_insns (); insn; insn = next)
+    {
+      next = NEXT_INSN (insn);
+      if (CALL_P (insn)
+	  || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
+	      && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
+	last_call = insn;
+      if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
+	continue;
+      if (NEXT_INSN (last_call) == insn)
+	continue;
+      NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
+      PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
+      PREV_INSN (insn) = last_call;
+      NEXT_INSN (insn) = NEXT_INSN (last_call);
+      PREV_INSN (NEXT_INSN (insn)) = insn;
+      NEXT_INSN (PREV_INSN (insn)) = insn;
+      last_call = insn;
+    }
+}
+
+/* Emit a NOP instruction for CYCLES cycles after insn AFTER.  Return it.  */
+
+static rtx
+emit_nop_after (int cycles, rtx after)
+{
+  rtx insn;
+
+  /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
+     operation.  We don't need the extra NOP since in this case, the hardware
+     will automatically insert the required stall.  */
+  if (cycles == 10)
+    cycles--;
+
+  gcc_assert (cycles < 10);
+
+  insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
+  PUT_MODE (insn, TImode);
+
+  return insn;
+}
+
+/* Determine whether INSN is a call that needs to have a return label
+   placed.  */
+
+static bool
+returning_call_p (rtx insn)
+{
+  if (CALL_P (insn))
+    return (!SIBLING_CALL_P (insn)
+	    && get_attr_type (insn) != TYPE_CALLP
+	    && get_attr_type (insn) != TYPE_SHADOW);
+  if (recog_memoized (insn) < 0)
+    return false;
+  if (get_attr_type (insn) == TYPE_CALL)
+    return true;
+  return false;
+}
+
+/* Determine whether INSN's pattern can be converted to use callp.  */
+static bool
+can_use_callp (rtx insn)
+{
+  int icode = recog_memoized (insn);
+  if (!TARGET_INSNS_64PLUS
+      || icode < 0
+      || GET_CODE (PATTERN (insn)) == COND_EXEC)
+    return false;
+
+  return ((icode == CODE_FOR_real_call
+	   || icode == CODE_FOR_call_internal
+	   || icode == CODE_FOR_call_value_internal)
+	  && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
+}
+
+/* Convert the pattern of INSN, which must be a CALL_INSN, into a callp.  */
+static void
+convert_to_callp (rtx insn)
+{
+  rtx lab;
+  extract_insn (insn);
+  if (GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx dest = recog_data.operand[0];
+      lab = recog_data.operand[1];
+      PATTERN (insn) = gen_callp_value (dest, lab);
+      INSN_CODE (insn) = CODE_FOR_callp_value;
+    }
+  else
+    {
+      lab = recog_data.operand[0];
+      PATTERN (insn) = gen_callp (lab);
+      INSN_CODE (insn) = CODE_FOR_callp;
+    }
+}
+
+/* Scan forwards from INSN until we find the next insn that has mode TImode
+   (indicating it starts a new cycle), and occurs in cycle CLOCK.
+   Return it if we find such an insn, NULL_RTX otherwise.  */
+static rtx
+find_next_cycle_insn (rtx insn, int clock)
+{
+  rtx t = insn;
+  if (GET_MODE (t) == TImode)
+    t = next_real_insn (t);
+  while (t && GET_MODE (t) != TImode)
+    t = next_real_insn (t);
+
+  if (t && insn_get_clock (t) == clock)
+    return t;
+  return NULL_RTX;
+}
+
+/* If COND_INSN has a COND_EXEC condition, wrap the same condition
+   around PAT.  Return PAT either unchanged or modified in this
+   way.  */
+static rtx
+duplicate_cond (rtx pat, rtx cond_insn)
+{
+  rtx cond_pat = PATTERN (cond_insn);
+  if (GET_CODE (cond_pat) == COND_EXEC)
+    pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
+			     pat);
+  return pat;
+}
+
+/* Walk forward from INSN to find the last insn that issues in the same clock
+   cycle.  */
+static rtx
+find_last_same_clock (rtx insn)
+{
+  rtx retval = insn;
+  rtx t = next_real_insn (insn);
+
+  while (t && GET_MODE (t) != TImode)
+    {
+      if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
+	retval = t;
+      t = next_real_insn (t);
+    }
+  return retval;
+}
+
+/* For every call insn in the function, emit code to load the return
+   address.  For each call we create a return label and store it in
+   CALL_LABELS.  If are not scheduling, we emit the labels here,
+   otherwise the caller will do it later.
+   This function is called after final insn scheduling, but before creating
+   the SEQUENCEs that represent execute packets.  */
+
+static void
+reorg_split_calls (rtx *call_labels)
+{
+  unsigned int reservation_mask = 0;
+  rtx insn = get_insns ();
+  gcc_assert (NOTE_P (insn));
+  insn = next_real_insn (insn);
+  while (insn)
+    {
+      int uid;
+      rtx next = next_real_insn (insn);
+
+      if (DEBUG_INSN_P (insn))
+	goto done;
+
+      if (GET_MODE (insn) == TImode)
+	reservation_mask = 0;
+      uid = INSN_UID (insn);
+      if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
+	reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
+
+      if (returning_call_p (insn))
+	{
+	  rtx label = gen_label_rtx ();
+	  rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
+	  rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
+
+	  LABEL_NUSES (label) = 2;
+	  if (!c6x_flag_schedule_insns2)
+	    {
+	      if (can_use_callp (insn))
+		convert_to_callp (insn);
+	      else
+		{
+		  rtx t;
+		  rtx slot[4];
+		  emit_label_after (label, insn);
+
+		  /* Bundle the call and its delay slots into a single
+		     SEQUENCE.  While these do not issue in parallel
+		     we need to group them into a single EH region.  */
+		  slot[0] = insn;
+		  PUT_MODE (insn, TImode);
+		  if (TARGET_INSNS_64)
+		    {
+		      t = gen_addkpc (reg, labelref, GEN_INT (4));
+		      slot[1] = emit_insn_after (duplicate_cond (t, insn),
+						 insn);
+		      PUT_MODE (slot[1], TImode);
+		      gen_one_bundle (slot, 2, 0);
+		    }
+		  else
+		    {
+		      slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
+						 insn);
+		      PUT_MODE (slot[3], TImode);
+		      t = gen_movsi_lo_sum (reg, reg, labelref);
+		      slot[2] = emit_insn_after (duplicate_cond (t, insn),
+						  insn);
+		      PUT_MODE (slot[2], TImode);
+		      t = gen_movsi_high (reg, labelref);
+		      slot[1] = emit_insn_after (duplicate_cond (t, insn),
+						 insn);
+		      PUT_MODE (slot[1], TImode);
+		      gen_one_bundle (slot, 4, 0);
+		    }
+		}
+	    }
+	  else
+	    {
+	      /* If we scheduled, we reserved the .S2 unit for one or two
+		 cycles after the call.  Emit the insns in these slots,
+		 unless it's possible to create a CALLP insn.
+		 Note that this works because the dependencies ensure that
+		 no insn setting/using B3 is scheduled in the delay slots of
+		 a call.  */
+	      int this_clock = insn_get_clock (insn);
+	      rtx last_same_clock;
+	      rtx after1;
+
+	      call_labels[INSN_UID (insn)] = label;
+
+	      last_same_clock = find_last_same_clock (insn);
+
+	      if (can_use_callp (insn))
+		{
+		  /* Find the first insn of the next execute packet.  If it
+		     is the shadow insn corresponding to this call, we may
+		     use a CALLP insn.  */
+		  rtx shadow = next_nonnote_nondebug_insn (last_same_clock);
+
+		  if (CALL_P (shadow)
+		      && insn_get_clock (shadow) == this_clock + 5)
+		    {
+		      convert_to_callp (shadow);
+		      insn_set_clock (shadow, this_clock);
+		      INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
+			= RESERVATION_S2;
+		      INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
+			= INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
+		      if (GET_MODE (insn) == TImode)
+			{
+			  rtx new_cycle_first = NEXT_INSN (insn);
+			  while (!NONDEBUG_INSN_P (new_cycle_first)
+				 || GET_CODE (PATTERN (new_cycle_first)) == USE
+				 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
+			    new_cycle_first = NEXT_INSN (new_cycle_first);
+			  PUT_MODE (new_cycle_first, TImode);
+			  if (new_cycle_first != shadow)
+			    PUT_MODE (shadow, VOIDmode);
+			  INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
+			    = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
+			}
+		      else
+			PUT_MODE (shadow, VOIDmode);
+		      delete_insn (insn);
+		      goto done;
+		    }
+		}
+	      after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
+	      if (after1 == NULL_RTX)
+		after1 = last_same_clock;
+	      else
+		after1 = find_last_same_clock (after1);
+	      if (TARGET_INSNS_64)
+		{
+		  rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
+		  x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
+		  insn_set_clock (x1, this_clock + 1);
+		  INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
+		  if (after1 == last_same_clock)
+		    PUT_MODE (x1, TImode);
+		  else
+		    INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
+		      = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
+		}
+	      else
+		{
+		  rtx x1, x2;
+		  rtx after2 = find_next_cycle_insn (after1, this_clock + 2);
+		  if (after2 == NULL_RTX)
+		    after2 = after1;
+		  x2 = gen_movsi_lo_sum (reg, reg, labelref);
+		  x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
+		  x1 = gen_movsi_high (reg, labelref);
+		  x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
+		  insn_set_clock (x1, this_clock + 1);
+		  insn_set_clock (x2, this_clock + 2);
+		  INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
+		  INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
+		  if (after1 == last_same_clock)
+		    PUT_MODE (x1, TImode);
+		  else
+		    INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
+		      = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
+		  if (after1 == after2)
+		    PUT_MODE (x2, TImode);
+		  else
+		    INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
+		      = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
+		}
+	    }
+	}
+    done:
+      insn = next;
+    }
+}
+
+/* Called as part of c6x_reorg.  This function emits multi-cycle NOP
+   insns as required for correctness.  CALL_LABELS is the array that
+   holds the return labels for call insns; we emit these here if
+   scheduling was run earlier.  */
+
+static void
+reorg_emit_nops (rtx *call_labels)
+{
+  bool first;
+  rtx prev, last_call;
+  int prev_clock, earliest_bb_end;
+  int prev_implicit_nops;
+  rtx insn = get_insns ();
+
+  /* We look at one insn (or bundle inside a sequence) in each iteration, storing
+     its issue time in PREV_CLOCK for the next iteration.  If there is a gap in
+     clocks, we must insert a NOP.
+     EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
+     current basic block will finish.  We must not allow the next basic block to
+     begin before this cycle.
+     PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
+     a multi-cycle nop.  The code is scheduled such that subsequent insns will
+     show the cycle gap, but we needn't insert a real NOP instruction.  */
+  insn = next_real_insn (insn);
+  last_call = prev = NULL_RTX;
+  prev_clock = -1;
+  earliest_bb_end = 0;
+  prev_implicit_nops = 0;
+  first = true;
+  while (insn)
+    {
+      int this_clock = -1;
+      rtx next;
+      int max_cycles = 0;
+
+      next = next_real_insn (insn);
+
+      if (DEBUG_INSN_P (insn)
+	  || GET_CODE (PATTERN (insn)) == USE
+	  || GET_CODE (PATTERN (insn)) == CLOBBER
+	  || shadow_or_blockage_p (insn)
+	  || JUMP_TABLE_DATA_P (insn))
+	goto next_insn;
+
+      if (!c6x_flag_schedule_insns2)
+	/* No scheduling; ensure that no parallel issue happens.  */
+	PUT_MODE (insn, TImode);
+      else
+	{
+	  int cycles;
+
+	  this_clock = insn_get_clock (insn);
+	  if (this_clock != prev_clock)
+	    {
+	      PUT_MODE (insn, TImode);
+
+	      if (!first)
+		{
+		  cycles = this_clock - prev_clock;
+
+		  cycles -= prev_implicit_nops;
+		  if (cycles > 1)
+		    {
+		      rtx nop = emit_nop_after (cycles - 1, prev);
+		      insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
+		    }
+		}
+	      prev_clock = this_clock;
+
+	      if (last_call
+		  && insn_get_clock (last_call) + 6 <= this_clock)
+		{
+		  emit_label_before (call_labels[INSN_UID (last_call)], insn);
+		  last_call = NULL_RTX;
+		}
+	      prev_implicit_nops = 0;
+	    }
+	}
+
+      /* Examine how many cycles the current insn takes, and adjust
+	 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS.  */
+      if (recog_memoized (insn) >= 0
+	  /* If not scheduling, we've emitted NOPs after calls already.  */
+	  && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
+	{
+	  max_cycles = get_attr_cycles (insn);
+	  if (get_attr_type (insn) == TYPE_CALLP)
+	    prev_implicit_nops = 5;
+	}
+      else
+	max_cycles = 1;
+      if (returning_call_p (insn))
+	last_call = insn;
+
+      if (c6x_flag_schedule_insns2)
+	{
+	  gcc_assert (this_clock >= 0);
+	  if (earliest_bb_end < this_clock + max_cycles)
+	    earliest_bb_end = this_clock + max_cycles;
+	}
+      else if (max_cycles > 1)
+	emit_nop_after (max_cycles - 1, insn);
+
+      prev = insn;
+      first = false;
+
+    next_insn:
+      if (c6x_flag_schedule_insns2
+	  && (next == NULL_RTX
+	      || (GET_MODE (next) == TImode
+		  && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
+	  && earliest_bb_end > 0)
+	{
+	  int cycles = earliest_bb_end - prev_clock;
+	  if (cycles > 1)
+	    {
+	      prev = emit_nop_after (cycles - 1, prev);
+	      insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
+	    }
+	  earliest_bb_end = 0;
+	  prev_clock = -1;
+	  first = true;
+
+	  if (last_call)
+	    emit_label_after (call_labels[INSN_UID (last_call)], prev);
+	  last_call = NULL_RTX;
+	}
+      insn = next;
+    }
+}
+
+/* If possible, split INSN, which we know is either a jump or a call, into a real
+   insn and its shadow.  */
+static void
+split_delayed_branch (rtx insn)
+{
+  int code = recog_memoized (insn);
+  rtx i1, newpat;
+  rtx pat = PATTERN (insn);
+
+  if (GET_CODE (pat) == COND_EXEC)
+    pat = COND_EXEC_CODE (pat);
+
+  if (CALL_P (insn))
+    {
+      rtx src = pat, dest = NULL_RTX;
+      rtx callee;
+      if (GET_CODE (pat) == SET)
+	{
+	  dest = SET_DEST (pat);
+	  src = SET_SRC (pat);
+	}
+      callee = XEXP (XEXP (src, 0), 0);
+      if (SIBLING_CALL_P (insn))
+	{
+	  if (REG_P (callee))
+	    newpat = gen_indirect_sibcall_shadow ();
+	  else
+	    newpat = gen_sibcall_shadow (callee);
+	  pat = gen_real_jump (callee);
+	}
+      else if (dest != NULL_RTX)
+	{
+	  if (REG_P (callee))
+	    newpat = gen_indirect_call_value_shadow (dest);
+	  else
+	    newpat = gen_call_value_shadow (dest, callee);
+	  pat = gen_real_call (callee);
+	}
+      else
+	{
+	  if (REG_P (callee))
+	    newpat = gen_indirect_call_shadow ();
+	  else
+	    newpat = gen_call_shadow (callee);
+	  pat = gen_real_call (callee);
+	}
+      pat = duplicate_cond (pat, insn);
+      newpat = duplicate_cond (newpat, insn);
+    }
+  else
+    {
+      rtx src, op;
+      if (GET_CODE (pat) == PARALLEL
+	  && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
+	{
+	  newpat = gen_return_shadow ();
+	  pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
+	  newpat = duplicate_cond (newpat, insn);
+	}
+      else
+	switch (code)
+	  {
+	  case CODE_FOR_br_true:
+	  case CODE_FOR_br_false:
+	    src = SET_SRC (pat);
+	    op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
+	    newpat = gen_condjump_shadow (op);
+	    pat = gen_real_jump (op);
+	    if (code == CODE_FOR_br_true)
+	      pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
+	    else
+	      pat = gen_rtx_COND_EXEC (VOIDmode,
+				       reversed_comparison (XEXP (src, 0),
+							    VOIDmode),
+				       pat);
+	    break;
+
+	  case CODE_FOR_jump:
+	    op = SET_SRC (pat);
+	    newpat = gen_jump_shadow (op);
+	    break;
+
+	  case CODE_FOR_indirect_jump:
+	    newpat = gen_indirect_jump_shadow ();
+	    break;
+
+	  case CODE_FOR_return_internal:
+	    newpat = gen_return_shadow ();
+	    pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
+	    break;
+
+	  default:
+	    return;
+	  }
+    }
+  i1 = emit_insn_before (pat, insn);
+  PATTERN (insn) = newpat;
+  INSN_CODE (insn) = -1;
+  record_delay_slot_pair (i1, insn, 5, 0);
+}
+
+/* If INSN is a multi-cycle insn that should be handled properly in
+   modulo-scheduling, split it into a real insn and a shadow.
+   Return true if we made a change.
+
+   It is valid for us to fail to split an insn; the caller has to deal
+   with the possibility.  Currently we handle loads and most mpy2 and
+   mpy4 insns.  */
+static bool
+split_delayed_nonbranch (rtx insn)
+{
+  int code = recog_memoized (insn);
+  enum attr_type type;
+  rtx i1, newpat, src, dest;
+  rtx pat = PATTERN (insn);
+  rtvec rtv;
+  int delay;
+
+  if (GET_CODE (pat) == COND_EXEC)
+    pat = COND_EXEC_CODE (pat);
+
+  if (code < 0 || GET_CODE (pat) != SET)
+    return false;
+  src = SET_SRC (pat);
+  dest = SET_DEST (pat);
+  if (!REG_P (dest))
+    return false;
+
+  type = get_attr_type (insn);
+  if (code >= 0
+      && (type == TYPE_LOAD
+	  || type == TYPE_LOADN))
+    {
+      if (!MEM_P (src)
+	  && (GET_CODE (src) != ZERO_EXTEND
+	      || !MEM_P (XEXP (src, 0))))
+	return false;
+
+      if (GET_MODE_SIZE (GET_MODE (dest)) > 4
+	  && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
+	return false;
+
+      rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
+		       SET_SRC (pat));
+      newpat = gen_load_shadow (SET_DEST (pat));
+      pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
+      delay = 4;
+    }
+  else if (code >= 0
+	   && (type == TYPE_MPY2
+	       || type == TYPE_MPY4))
+    {
+      /* We don't handle floating point multiplies yet.  */
+      if (GET_MODE (dest) == SFmode)
+	return false;
+
+      rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
+		       SET_SRC (pat));
+      newpat = gen_mult_shadow (SET_DEST (pat));
+      pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
+      delay = type == TYPE_MPY2 ? 1 : 3;
+    }
+  else
+    return false;
+
+  pat = duplicate_cond (pat, insn);
+  newpat = duplicate_cond (newpat, insn);
+  i1 = emit_insn_before (pat, insn);
+  PATTERN (insn) = newpat;
+  INSN_CODE (insn) = -1;
+  recog_memoized (insn);
+  recog_memoized (i1);
+  record_delay_slot_pair (i1, insn, delay, 0);
+  return true;
+}
+
+/* Examine if INSN is the result of splitting a load into a real load and a
+   shadow, and if so, undo the transformation.  */
+static void
+undo_split_delayed_nonbranch (rtx insn)
+{
+  int icode = recog_memoized (insn);
+  enum attr_type type;
+  rtx prev_pat, insn_pat, prev;
+
+  if (icode < 0)
+    return;
+  type = get_attr_type (insn);
+  if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
+    return;
+  prev = PREV_INSN (insn);
+  prev_pat = PATTERN (prev);
+  insn_pat = PATTERN (insn);
+  if (GET_CODE (prev_pat) == COND_EXEC)
+    {
+      prev_pat = COND_EXEC_CODE (prev_pat);
+      insn_pat = COND_EXEC_CODE (insn_pat);
+    }
+
+  gcc_assert (GET_CODE (prev_pat) == UNSPEC
+	      && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
+		   && type == TYPE_LOAD_SHADOW)
+		  || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
+		      && type == TYPE_MULT_SHADOW)));
+  insn_pat = gen_rtx_SET (VOIDmode, SET_DEST (insn_pat),
+			  XVECEXP (prev_pat, 0, 1));
+  insn_pat = duplicate_cond (insn_pat, prev);
+  PATTERN (insn) = insn_pat;
+  INSN_CODE (insn) = -1;
+  delete_insn (prev);
+}
+
+/* Split every insn (i.e. jumps and calls) which can have delay slots into
+   two parts: the first one is scheduled normally and emits the instruction,
+   while the second one is a shadow insn which shows the side effect taking
+   place. The second one is placed in the right cycle by the scheduler, but
+   not emitted as an assembly instruction.  */
+
+static void
+split_delayed_insns (void)
+{
+  rtx insn;
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn) || CALL_P (insn))
+	split_delayed_branch (insn);
+    }
+}
+
+/* For every insn that has an entry in the new_conditions vector, give it
+   the appropriate predicate.  */
+static void
+conditionalize_after_sched (void)
+{
+  basic_block bb;
+  rtx insn;
+  FOR_EACH_BB_FN (bb, cfun)
+    FOR_BB_INSNS (bb, insn)
+      {
+	unsigned uid = INSN_UID (insn);
+	rtx cond;
+	if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
+	  continue;
+	cond = INSN_INFO_ENTRY (uid).new_cond;
+	if (cond == NULL_RTX)
+	  continue;
+	if (dump_file)
+	  fprintf (dump_file, "Conditionalizing insn %d\n", uid);
+	predicate_insn (insn, cond, true);
+      }
+}
+
+/* A callback for the hw-doloop pass.  This function examines INSN; if
+   it is a loop_end pattern we recognize, return the reg rtx for the
+   loop counter.  Otherwise, return NULL_RTX.  */
+
+static rtx
+hwloop_pattern_reg (rtx insn)
+{
+  rtx pat, reg;
+
+  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
+    return NULL_RTX;
+
+  pat = PATTERN (insn);
+  reg = SET_DEST (XVECEXP (pat, 0, 1));
+  if (!REG_P (reg))
+    return NULL_RTX;
+  return reg;
+}
+
+/* Return the number of cycles taken by BB, as computed by scheduling,
+   including the latencies of all insns with delay slots.  IGNORE is
+   an insn we should ignore in the calculation, usually the final
+   branch.  */
+static int
+bb_earliest_end_cycle (basic_block bb, rtx ignore)
+{
+  int earliest = 0;
+  rtx insn;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      int cycles, this_clock;
+
+      if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
+	  || GET_CODE (PATTERN (insn)) == USE
+	  || GET_CODE (PATTERN (insn)) == CLOBBER
+	  || insn == ignore)
+	continue;
+
+      this_clock = insn_get_clock (insn);
+      cycles = get_attr_cycles (insn);
+
+      if (earliest < this_clock + cycles)
+	earliest = this_clock + cycles;
+    }
+  return earliest;
+}
+
+/* Examine the insns in BB and remove all which have a uid greater or
+   equal to MAX_UID.  */
+static void
+filter_insns_above (basic_block bb, int max_uid)
+{
+  rtx insn, next;
+  bool prev_ti = false;
+  int prev_cycle = -1;
+
+  FOR_BB_INSNS_SAFE (bb, insn, next)
+    {
+      int this_cycle;
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+      if (insn == BB_END (bb))
+	return;
+      this_cycle = insn_get_clock (insn);
+      if (prev_ti && this_cycle == prev_cycle)
+	{
+	  gcc_assert (GET_MODE (insn) != TImode);
+	  PUT_MODE (insn, TImode);
+	}
+      prev_ti = false;
+      if (INSN_UID (insn) >= max_uid)
+	{
+	  if (GET_MODE (insn) == TImode)
+	    {
+	      prev_ti = true;
+	      prev_cycle = this_cycle;
+	    }
+	  delete_insn (insn);
+	}
+    }
+}
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
+
+static void
+c6x_asm_emit_except_personality (rtx personality)
+{
+  fputs ("\t.personality\t", asm_out_file);
+  output_addr_const (asm_out_file, personality);
+  fputc ('\n', asm_out_file);
+}
+
+/* Use a special assembly directive rather than a regular setion for
+   unwind table data.  */
+
+static void
+c6x_asm_init_sections (void)
+{
+  exception_section = get_unnamed_section (0, output_section_asm_op,
+					   "\t.handlerdata");
+}
+
+/* A callback for the hw-doloop pass.  Called to optimize LOOP in a
+   machine-specific fashion; returns true if successful and false if
+   the hwloop_fail function should be called.  */
+
+static bool
+hwloop_optimize (hwloop_info loop)
+{
+  basic_block entry_bb, bb;
+  rtx seq, insn, prev, entry_after, end_packet;
+  rtx head_insn, tail_insn, new_insns, last_insn;
+  int loop_earliest;
+  int n_execute_packets;
+  edge entry_edge;
+  unsigned ix;
+  int max_uid_before, delayed_splits;
+  int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
+  rtx *orig_vec;
+  rtx *copies;
+  rtx **insn_copies;
+
+  if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
+      || !TARGET_INSNS_64PLUS)
+    return false;
+
+  if (loop->iter_reg_used || loop->depth > 1)
+    return false;
+  if (loop->has_call || loop->has_asm)
+    return false;
+
+  if (loop->head != loop->tail)
+    return false;
+
+  gcc_assert (loop->incoming_dest == loop->head);
+
+  entry_edge = NULL;
+  FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
+    if (entry_edge->flags & EDGE_FALLTHRU)
+      break;
+  if (entry_edge == NULL)
+    return false;
+
+  reshuffle_units (loop->head);
+
+  in_hwloop = true;
+  schedule_ebbs_init ();
+  schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
+  schedule_ebbs_finish ();
+  in_hwloop = false;
+
+  bb = loop->head;
+  loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
+
+  max_uid_before = get_max_uid ();
+
+  /* Split all multi-cycle operations, such as loads.  For normal
+     scheduling, we only do this for branches, as the generated code
+     would otherwise not be interrupt-safe.  When using sploop, it is
+     safe and beneficial to split them.  If any multi-cycle operations
+     remain after splitting (because we don't handle them yet), we
+     cannot pipeline the loop.  */
+  delayed_splits = 0;
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  recog_memoized (insn);
+	  if (split_delayed_nonbranch (insn))
+	    delayed_splits++;
+	  else if (INSN_CODE (insn) >= 0
+		   && get_attr_cycles (insn) > 1)
+	    goto undo_splits;
+	}
+    }
+
+  /* Count the number of insns as well as the number real insns, and save
+     the original sequence of insns in case we must restore it later.  */
+  n_insns = n_real_insns = 0;
+  FOR_BB_INSNS (bb, insn)
+    {
+      n_insns++;
+      if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
+	n_real_insns++;
+    }
+  orig_vec = XNEWVEC (rtx, n_insns);
+  n_insns = 0;
+  FOR_BB_INSNS (bb, insn)
+    orig_vec[n_insns++] = insn;
+
+  /* Count the unit reservations, and compute a minimum II from that
+     table.  */
+  count_unit_reqs (unit_reqs, loop->start_label,
+		   PREV_INSN (loop->loop_end));
+  merge_unit_reqs (unit_reqs);
+
+  min_ii = res_mii (unit_reqs);
+  max_ii = loop_earliest < 15 ? loop_earliest : 14;
+
+  /* Make copies of the loop body, up to a maximum number of stages we want
+     to handle.  */
+  max_parallel = loop_earliest / min_ii + 1;
+
+  copies = XCNEWVEC (rtx, (max_parallel + 1) * n_real_insns);
+  insn_copies = XNEWVEC (rtx *, max_parallel + 1);
+  for (i = 0; i < max_parallel + 1; i++)
+    insn_copies[i] = copies + i * n_real_insns;
+
+  head_insn = next_nonnote_nondebug_insn (loop->start_label);
+  tail_insn = prev_real_insn (BB_END (bb));
+
+  i = 0;
+  FOR_BB_INSNS (bb, insn)
+    if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
+      insn_copies[0][i++] = insn;
+
+  sploop_max_uid_iter0 = get_max_uid ();
+
+  /* Generate the copies of the loop body, and save them in the
+     INSN_COPIES array.  */
+  start_sequence ();
+  for (i = 0; i < max_parallel; i++)
+    {
+      int j;
+      rtx this_iter;
+
+      this_iter = duplicate_insn_chain (head_insn, tail_insn);
+      j = 0;
+      while (this_iter)
+	{
+	  rtx prev_stage_insn = insn_copies[i][j];
+	  gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
+
+	  if (INSN_CODE (this_iter) >= 0
+	      && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
+		  || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
+	    {
+	      rtx prev = PREV_INSN (this_iter);
+	      record_delay_slot_pair (prev, this_iter,
+				      get_attr_cycles (prev) - 1, 0);
+	    }
+	  else
+	    record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
+
+	  insn_copies[i + 1][j] = this_iter;
+	  j++;
+	  this_iter = next_nonnote_nondebug_insn (this_iter);
+	}
+    }
+  new_insns = get_insns ();
+  last_insn = insn_copies[max_parallel][n_real_insns - 1];
+  end_sequence ();
+  emit_insn_before (new_insns, BB_END (bb));
+
+  /* Try to schedule the loop using varying initiation intervals,
+     starting with the smallest possible and incrementing it
+     on failure.  */
+  for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
+    {
+      basic_block tmp_bb;
+      if (dump_file)
+	fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
+
+      df_clear_flags (DF_LR_RUN_DCE);
+
+      schedule_ebbs_init ();
+      set_modulo_params (sp_ii, max_parallel, n_real_insns,
+			 sploop_max_uid_iter0);
+      tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
+      schedule_ebbs_finish ();
+
+      if (tmp_bb)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
+	  break;
+	}
+    }
+
+  discard_delay_pairs_above (max_uid_before);
+
+  if (sp_ii > max_ii)
+    goto restore_loop;
+
+  stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
+
+  if (stages == 1 && sp_ii > 5)
+    goto restore_loop;
+
+  /* At this point, we know we've been successful, unless we find later that
+     there are too many execute packets for the loop buffer to hold.  */
+
+  /* Assign reservations to the instructions in the loop.  We must find
+     the stage that contains the full loop kernel, and transfer the
+     reservations of the instructions contained in it to the corresponding
+     instructions from iteration 0, which are the only ones we'll keep.  */
+  assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
+  PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
+  NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
+  filter_insns_above (bb, sploop_max_uid_iter0);
+
+  for (i = 0; i < n_real_insns; i++)
+    {
+      rtx insn = insn_copies[0][i];
+      int uid = INSN_UID (insn);
+      int stage = insn_uid_get_clock (uid) / sp_ii;
+
+      if (stage + 1 < stages)
+	{
+	  int copy_uid;
+	  stage = stages - stage - 1;
+	  copy_uid = INSN_UID (insn_copies[stage][i]);
+	  INSN_INFO_ENTRY (uid).reservation
+	    = INSN_INFO_ENTRY (copy_uid).reservation;
+	}
+    }
+  if (stages == 1)
+    stages++;
+
+  /* Compute the number of execute packets the pipelined form of the loop will
+     require.  */
+  prev = NULL_RTX;
+  n_execute_packets = 0;
+  for (insn = loop->start_label; insn != loop->loop_end; insn = NEXT_INSN (insn))
+    {
+      if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
+	  && !shadow_p (insn))
+	{
+	  n_execute_packets++;
+	  if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
+	    /* We need an extra NOP instruction.  */
+	    n_execute_packets++;
+
+	  prev = insn;
+	}
+    }
+
+  end_packet = ss.last_scheduled_iter0;
+  while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
+    end_packet = PREV_INSN (end_packet);
+
+  /* The earliest cycle in which we can emit the SPKERNEL instruction.  */
+  loop_earliest = (stages - 1) * sp_ii;
+  if (loop_earliest > insn_get_clock (end_packet))
+    {
+      n_execute_packets++;
+      end_packet = loop->loop_end;
+    }
+  else
+    loop_earliest = insn_get_clock (end_packet);
+
+  if (n_execute_packets > 14)
+    goto restore_loop;
+
+  /* Generate the spkernel instruction, and place it at the appropriate
+     spot.  */
+  PUT_MODE (end_packet, VOIDmode);
+
+  insn = gen_spkernel (GEN_INT (stages - 1),
+		       const0_rtx, JUMP_LABEL (loop->loop_end));
+  insn = emit_jump_insn_before (insn, end_packet);
+  JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
+  insn_set_clock (insn, loop_earliest);
+  PUT_MODE (insn, TImode);
+  INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
+  delete_insn (loop->loop_end);
+
+  /* Place the mvc and sploop instructions before the loop.  */
+  entry_bb = entry_edge->src;
+
+  start_sequence ();
+
+  insn = emit_insn (gen_mvilc (loop->iter_reg));
+  insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
+
+  seq = get_insns ();
+
+  if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
+    {
+      basic_block new_bb;
+      edge e;
+      edge_iterator ei;
+
+      emit_insn_before (seq, BB_HEAD (loop->head));
+      seq = emit_label_before (gen_label_rtx (), seq);
+
+      new_bb = create_basic_block (seq, insn, entry_bb);
+      FOR_EACH_EDGE (e, ei, loop->incoming)
+	{
+	  if (!(e->flags & EDGE_FALLTHRU))
+	    redirect_edge_and_branch_force (e, new_bb);
+	  else
+	    redirect_edge_succ (e, new_bb);
+	}
+      make_edge (new_bb, loop->head, 0);
+    }
+  else
+    {
+      entry_after = BB_END (entry_bb);
+      while (DEBUG_INSN_P (entry_after)
+	     || (NOTE_P (entry_after)
+		 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
+	entry_after = PREV_INSN (entry_after);
+      emit_insn_after (seq, entry_after);
+    }
+
+  end_sequence ();
+
+  /* Make sure we don't try to schedule this loop again.  */
+  for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
+    bb->flags |= BB_DISABLE_SCHEDULE;
+
+  return true;
+
+ restore_loop:
+  if (dump_file)
+    fprintf (dump_file, "Unable to pipeline loop.\n");
+
+  for (i = 1; i < n_insns; i++)
+    {
+      NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
+      PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
+    }
+  PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
+  NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
+  NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
+  PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
+  BB_HEAD (bb) = orig_vec[0];
+  BB_END (bb) = orig_vec[n_insns - 1];
+ undo_splits:
+  free_delay_pairs ();
+  FOR_BB_INSNS (bb, insn)
+    if (NONDEBUG_INSN_P (insn))
+      undo_split_delayed_nonbranch (insn);
+  return false;
+}
+
+/* A callback for the hw-doloop pass.  Called when a loop we have discovered
+   turns out not to be optimizable; we have to split the doloop_end pattern
+   into a subtract and a test.  */
+static void
+hwloop_fail (hwloop_info loop)
+{
+  rtx insn, test, testreg;
+
+  if (dump_file)
+    fprintf (dump_file, "splitting doloop insn %d\n",
+	     INSN_UID (loop->loop_end));
+  insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
+  /* See if we can emit the add at the head of the loop rather than at the
+     end.  */
+  if (loop->head == NULL
+      || loop->iter_reg_used_outside
+      || loop->iter_reg_used
+      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
+      || loop->incoming_dest != loop->head
+      || EDGE_COUNT (loop->head->preds) != 2)
+    emit_insn_before (insn, loop->loop_end);
+  else
+    {
+      rtx t = loop->start_label;
+      while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
+	t = NEXT_INSN (t);
+      emit_insn_after (insn, t);
+    }
+
+  testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
+  if (GET_CODE (testreg) == SCRATCH)
+    testreg = loop->iter_reg;
+  else
+    emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
+
+  test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
+  insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
+						loop->start_label),
+				loop->loop_end);
+
+  JUMP_LABEL (insn) = loop->start_label;
+  LABEL_NUSES (loop->start_label)++;
+  delete_insn (loop->loop_end);
+}
+
+static struct hw_doloop_hooks c6x_doloop_hooks =
+{
+  hwloop_pattern_reg,
+  hwloop_optimize,
+  hwloop_fail
+};
+
+/* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
+   doloop_end patterns where such optimizations are impossible.  */
+static void
+c6x_hwloops (void)
+{
+  if (optimize)
+    reorg_loops (true, &c6x_doloop_hooks);
+}
+
+/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass.  We split call insns here
+   into a sequence that loads the return register and performs the call,
+   and emit the return label.
+   If scheduling after reload is requested, it happens here.  */
+
+static void
+c6x_reorg (void)
+{
+  basic_block bb;
+  rtx *call_labels;
+  bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
+		      && !maybe_skip_selective_scheduling ());
+
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  df_clear_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+
+  /* If optimizing, we'll have split before scheduling.  */
+  if (optimize == 0)
+    split_all_insns ();
+
+  df_analyze ();
+
+  if (c6x_flag_schedule_insns2)
+    {
+      int sz = get_max_uid () * 3 / 2 + 1;
+
+      insn_info.create (sz);
+    }
+
+  /* Make sure the real-jump insns we create are not deleted.  When modulo-
+     scheduling, situations where a reg is only stored in a loop can also
+     cause dead code when doing the initial unrolling.  */
+  sched_no_dce = true;
+
+  c6x_hwloops ();
+
+  if (c6x_flag_schedule_insns2)
+    {
+      split_delayed_insns ();
+      timevar_push (TV_SCHED2);
+      if (do_selsched)
+	run_selective_scheduling ();
+      else
+	schedule_ebbs ();
+      conditionalize_after_sched ();
+      timevar_pop (TV_SCHED2);
+
+      free_delay_pairs ();
+    }
+  sched_no_dce = false;
+
+  call_labels = XCNEWVEC (rtx, get_max_uid () + 1);
+
+  reorg_split_calls (call_labels);
+
+  if (c6x_flag_schedule_insns2)
+    {
+      FOR_EACH_BB_FN (bb, cfun)
+	if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
+	  assign_reservations (BB_HEAD (bb), BB_END (bb));
+    }
+
+  if (c6x_flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+
+  reorg_emit_nops (call_labels);
+
+  /* Post-process the schedule to move parallel insns into SEQUENCEs.  */
+  if (c6x_flag_schedule_insns2)
+    {
+      free_delay_pairs ();
+      c6x_gen_bundles ();
+    }
+
+  df_finish_pass (false);
+}
+
+/* Called when a function has been assembled.  It should perform all the
+   tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
+   tasks.
+   We free the reservation (and other scheduling) information here now that
+   all insns have been output.  */
+void
+c6x_function_end (FILE *file, const char *fname)
+{
+  c6x_output_fn_unwind (file);
+
+  insn_info.release ();
+
+  if (!flag_inhibit_size_directive)
+    ASM_OUTPUT_MEASURED_SIZE (file, fname);
+}
+
+/* Determine whether X is a shift with code CODE and an integer amount
+   AMOUNT.  */
+static bool
+shift_p (rtx x, enum rtx_code code, int amount)
+{
+  return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) == amount);
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+c6x_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
+	       bool speed)
+{
+  int cost2 = COSTS_N_INSNS (1);
+  rtx op0, op1;
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (outer_code == SET || outer_code == PLUS)
+        *total = satisfies_constraint_IsB (x) ? 0 : cost2;
+      else if (outer_code == AND || outer_code == IOR || outer_code == XOR
+	       || outer_code == MINUS)
+	*total = satisfies_constraint_Is5 (x) ? 0 : cost2;
+      else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
+	       || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
+	*total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
+      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
+	       || outer_code == LSHIFTRT)
+	*total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
+      else
+	*total = cost2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case TRUNCATE:
+      /* Recognize a mult_highpart operation.  */
+      if ((GET_MODE (x) == HImode || GET_MODE (x) == SImode)
+	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+	  && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (GET_MODE (x))
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	  && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (GET_MODE (x)))
+	{
+	  rtx mul = XEXP (XEXP (x, 0), 0);
+	  rtx op0 = XEXP (mul, 0);
+	  rtx op1 = XEXP (mul, 1);
+	  enum rtx_code code0 = GET_CODE (op0);
+	  enum rtx_code code1 = GET_CODE (op1);
+
+	  if ((code0 == code1
+	       && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
+	      || (GET_MODE (x) == HImode
+		  && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
+	    {
+	      if (GET_MODE (x) == HImode)
+		*total = COSTS_N_INSNS (2);
+	      else
+		*total = COSTS_N_INSNS (12);
+	      *total += rtx_cost (XEXP (op0, 0), code0, 0, speed);
+	      *total += rtx_cost (XEXP (op1, 0), code1, 0, speed);
+	      return true;
+	    }
+	}
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case PLUS:
+    case MINUS:
+      *total = COSTS_N_INSNS (1);
+      op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
+      op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
+      if (GET_MODE_SIZE (GET_MODE (x)) <= UNITS_PER_WORD
+	  && INTEGRAL_MODE_P (GET_MODE (x))
+	  && GET_CODE (op0) == MULT
+	  && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	  && (INTVAL (XEXP (op0, 1)) == 2
+	      || INTVAL (XEXP (op0, 1)) == 4
+	      || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
+	{
+	  *total += rtx_cost (XEXP (op0, 0), ASHIFT, 0, speed);
+	  *total += rtx_cost (op1, (enum rtx_code) code, 1, speed);
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_MODE (x) == DFmode)
+	{
+	  if (TARGET_FP)
+	    *total = COSTS_N_INSNS (speed ? 10 : 1);
+	  else
+	    *total = COSTS_N_INSNS (speed ? 200 : 4);
+	}
+      else if (GET_MODE (x) == SFmode)
+	{
+	  if (TARGET_FP)
+	    *total = COSTS_N_INSNS (speed ? 4 : 1);
+	  else
+	    *total = COSTS_N_INSNS (speed ? 100 : 4);
+	}
+      else if (GET_MODE (x) == DImode)
+	{
+	  if (TARGET_MPY32
+	      && GET_CODE (op0) == GET_CODE (op1)
+	      && (GET_CODE (op0) == ZERO_EXTEND
+		  || GET_CODE (op0) == SIGN_EXTEND))
+	    {
+	      *total = COSTS_N_INSNS (speed ? 2 : 1);
+	      op0 = XEXP (op0, 0);
+	      op1 = XEXP (op1, 0);
+	    }
+	  else
+	    /* Maybe improve this laster.  */
+	    *total = COSTS_N_INSNS (20);
+	}
+      else if (GET_MODE (x) == SImode)
+	{
+	  if (((GET_CODE (op0) == ZERO_EXTEND
+		|| GET_CODE (op0) == SIGN_EXTEND
+		|| shift_p (op0, LSHIFTRT, 16))
+	       && (GET_CODE (op1) == SIGN_EXTEND
+		   || GET_CODE (op1) == ZERO_EXTEND
+		   || scst5_operand (op1, SImode)
+		   || shift_p (op1, ASHIFTRT, 16)
+		   || shift_p (op1, LSHIFTRT, 16)))
+	      || (shift_p (op0, ASHIFTRT, 16)
+		  && (GET_CODE (op1) == SIGN_EXTEND
+		      || shift_p (op1, ASHIFTRT, 16))))
+	    {
+	      *total = COSTS_N_INSNS (speed ? 2 : 1);
+	      op0 = XEXP (op0, 0);
+	      if (scst5_operand (op1, SImode))
+		op1 = NULL_RTX;
+	      else
+		op1 = XEXP (op1, 0);
+	    }
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else if (TARGET_MPY32)
+	    *total = COSTS_N_INSNS (4);
+	  else
+	    *total = COSTS_N_INSNS (6);
+	}
+      else if (GET_MODE (x) == HImode)
+	*total = COSTS_N_INSNS (speed ? 2 : 1);
+
+      if (GET_CODE (op0) != REG
+	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	*total += rtx_cost (op0, MULT, 0, speed);
+      if (op1 && GET_CODE (op1) != REG
+	  && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
+	*total += rtx_cost (op1, MULT, 1, speed);
+      return true;
+
+    case UDIV:
+    case DIV:
+      /* This is a bit random; assuming on average there'll be 16 leading
+	 zeros.  FIXME: estimate better for constant dividends.  */
+      *total = COSTS_N_INSNS (6 + 3 * 16);
+      return false;
+
+    case IF_THEN_ELSE:
+      /* Recognize the cmp_and/ior patterns.  */
+      op0 = XEXP (x, 0);
+      if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
+	  && REG_P (XEXP (op0, 0))
+	  && XEXP (op0, 1) == const0_rtx
+	  && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
+	{
+	  *total = rtx_cost (XEXP (x, 1), (enum rtx_code) outer_code,
+			     opno, speed);
+	  return false;
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+
+static bool
+c6x_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V2HImode:
+    case V4QImode:
+    case V2SImode:
+    case V4HImode:
+    case V8QImode:
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+static enum machine_mode
+c6x_preferred_simd_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case HImode:
+      return V2HImode;
+    case QImode:
+      return V4QImode;
+
+    default:
+      return word_mode;
+    }
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
+
+static bool
+c6x_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (ALL_FIXED_POINT_MODE_P (mode)
+      && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
+    return true;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Output a reference from a function exception table to the type_info
+   object X.  Output these via a special assembly directive.  */
+
+static bool
+c6x_output_ttype (rtx x)
+{
+  /* Use special relocations for symbol references.  */
+  if (GET_CODE (x) != CONST_INT)
+    fputs ("\t.ehtype\t", asm_out_file);
+  else
+    fputs ("\t.word\t", asm_out_file);
+  output_addr_const (asm_out_file, x);
+  fputc ('\n', asm_out_file);
+
+  return TRUE;
+}
+
+/* Modify the return address of the current function.  */
+
+void
+c6x_set_return_address (rtx source, rtx scratch)
+{
+  struct c6x_frame frame;
+  rtx addr;
+  HOST_WIDE_INT offset;
+
+  c6x_compute_frame_layout (&frame);
+  if (! c6x_save_reg (RETURN_ADDR_REGNO))
+    emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
+  else
+    {
+
+      if (frame_pointer_needed)
+	{
+	  addr = hard_frame_pointer_rtx;
+	  offset = frame.b3_offset;
+	}
+      else
+	{
+	  addr = stack_pointer_rtx;
+	  offset = frame.to_allocate - frame.b3_offset;
+	}
+
+      /* TODO: Use base+offset loads where possible.  */
+      if (offset)
+	{
+	  HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
+
+	  emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
+	  if (low != offset)
+	    emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
+	  emit_insn (gen_addsi3 (scratch, addr, scratch));
+	  addr = scratch;
+	}
+
+      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+    }
+}
+
+/* We save pairs of registers using a DImode store.  Describe the component
+   registers for DWARF generation code.  */
+
+static rtx
+c6x_dwarf_register_span (rtx rtl)
+{
+    unsigned regno;
+    unsigned real_regno;
+    int nregs;
+    int i;
+    rtx p;
+
+    regno = REGNO (rtl);
+    nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl));
+    if (nregs == 1)
+      return  NULL_RTX;
+
+    p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
+    for (i = 0; i < nregs; i++)
+      {
+	if (TARGET_BIG_ENDIAN)
+	  real_regno = regno + nregs - (i + 1);
+	else
+	  real_regno = regno + i;
+
+	XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
+      }
+
+    return p;
+}
+
+/* Codes for all the C6X builtins.  */
+enum c6x_builtins
+{
+  C6X_BUILTIN_SADD,
+  C6X_BUILTIN_SSUB,
+  C6X_BUILTIN_ADD2,
+  C6X_BUILTIN_SUB2,
+  C6X_BUILTIN_ADD4,
+  C6X_BUILTIN_SUB4,
+  C6X_BUILTIN_SADD2,
+  C6X_BUILTIN_SSUB2,
+  C6X_BUILTIN_SADDU4,
+
+  C6X_BUILTIN_SMPY,
+  C6X_BUILTIN_SMPYH,
+  C6X_BUILTIN_SMPYHL,
+  C6X_BUILTIN_SMPYLH,
+  C6X_BUILTIN_MPY2,
+  C6X_BUILTIN_SMPY2,
+
+  C6X_BUILTIN_CLRR,
+  C6X_BUILTIN_EXTR,
+  C6X_BUILTIN_EXTRU,
+
+  C6X_BUILTIN_SSHL,
+  C6X_BUILTIN_SUBC,
+  C6X_BUILTIN_ABS,
+  C6X_BUILTIN_ABS2,
+  C6X_BUILTIN_AVG2,
+  C6X_BUILTIN_AVGU4,
+
+  C6X_BUILTIN_MAX
+};
+
+
+static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
+
+/* Return the C6X builtin for CODE.  */
+static tree
+c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= C6X_BUILTIN_MAX)
+    return error_mark_node;
+
+  return c6x_builtin_decls[code];
+}
+
+#define def_builtin(NAME, TYPE, CODE)					\
+do {									\
+  tree bdecl;								\
+  bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
+				NULL, NULL_TREE);			\
+  c6x_builtin_decls[CODE] = bdecl;					\
+} while (0)
+
+/* Set up all builtin functions for this target.  */
+static void
+c6x_init_builtins (void)
+{
+  tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
+  tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
+  tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
+  tree int_ftype_int
+    = build_function_type_list (integer_type_node, integer_type_node,
+				NULL_TREE);
+  tree int_ftype_int_int
+    = build_function_type_list (integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v4qi_ftype_v4qi_v4qi
+    = build_function_type_list (V4QI_type_node, V4QI_type_node,
+				V4QI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2hi_v2hi
+    = build_function_type_list (V2SI_type_node, V2HI_type_node,
+				V2HI_type_node, NULL_TREE);
+  
+  def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
+	       C6X_BUILTIN_SADD);
+  def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
+	       C6X_BUILTIN_SSUB);
+  def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_ADD2);
+  def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_SUB2);
+  def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
+	       C6X_BUILTIN_ADD4);
+  def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
+	       C6X_BUILTIN_SUB4);
+  def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_MPY2);
+  def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_SADD2);
+  def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_SSUB2);
+  def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
+	       C6X_BUILTIN_SADDU4);
+  def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_SMPY2);
+
+  def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
+	       C6X_BUILTIN_SMPY);
+  def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
+	       C6X_BUILTIN_SMPYH);
+  def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
+	       C6X_BUILTIN_SMPYHL);
+  def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
+	       C6X_BUILTIN_SMPYLH);
+
+  def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
+	       C6X_BUILTIN_SSHL);
+  def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
+	       C6X_BUILTIN_SUBC);
+
+  def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
+	       C6X_BUILTIN_AVG2);
+  def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
+	       C6X_BUILTIN_AVGU4);
+
+  def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
+	       C6X_BUILTIN_CLRR);
+  def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
+	       C6X_BUILTIN_EXTR);
+  def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
+	       C6X_BUILTIN_EXTRU);
+
+  def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
+  def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
+}
+
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *const name;
+  const enum c6x_builtins code;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+  { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
+  { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
+  { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
+  { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
+  { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
+  { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
+  { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
+  { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
+  { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
+
+  { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
+  { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
+
+  { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
+  { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
+
+  { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
+  { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
+  { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
+  { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
+
+  { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
+
+  { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
+  { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
+  { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+  { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
+  { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
+};
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x != const0_rtx)
+    return x;
+  x = gen_reg_rtx (SImode);
+
+  emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
+  return gen_lowpart (mode, x);
+}
+
+/* Subroutine of c6x_expand_builtin to take care of binop insns.  MACFLAG is -1
+   if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
+
+static rtx
+c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
+			  bool match_op)
+{
+  int offs = match_op ? 1 : 0;
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  enum machine_mode op0mode = GET_MODE (op0);
+  enum machine_mode op1mode = GET_MODE (op1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
+  rtx ret = target;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    {
+      if (tmode == SQmode || tmode == V2SQmode)
+	{
+	  ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
+	  target = gen_lowpart (tmode, ret);
+	}
+      else
+	target = gen_reg_rtx (tmode);
+    }
+
+  if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
+      && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
+    {
+      op0mode = mode0;
+      op0 = gen_lowpart (mode0, op0);
+    }
+  if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
+      && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
+    {
+      op1mode = mode1;
+      op1 = gen_lowpart (mode1, op1);
+    }
+  /* In case the insn wants input operands in modes different from
+     the result, abort.  */
+  gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
+	      && (op1mode == mode1 || op1mode == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  if (match_op)
+    pat = GEN_FCN (icode) (target, target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (target, op0, op1);
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return ret;
+}
+
+/* Subroutine of c6x_expand_builtin to take care of unop insns.  */
+
+static rtx
+c6x_expand_unop_builtin (enum insn_code icode, tree exp,
+			  rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  enum machine_mode op0mode = GET_MODE (op0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+
+  if (op0mode == SImode && mode0 == HImode)
+    {
+      op0mode = HImode;
+      op0 = gen_lowpart (HImode, op0);
+    }
+  gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+		     rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  size_t i;
+  const struct builtin_description *d;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return c6x_expand_binop_builtin (d->icode, exp, target,
+				       fcode == C6X_BUILTIN_CLRR);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return c6x_expand_unop_builtin (d->icode, exp, target);
+
+  gcc_unreachable ();
+}
+
+/* Target unwind frame info is generated from dwarf CFI directives, so
+   always output dwarf2 unwind info.  */
+
+static enum unwind_info_type
+c6x_debug_unwind_info (void)
+{
+  if (flag_unwind_tables || flag_exceptions)
+    return UI_DWARF2;
+
+  return default_debug_unwind_info ();
+}
+
+/* Target Structure.  */
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG c6x_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
+#undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
+#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
+  c6x_function_arg_round_boundary
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE c6x_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE c6x_libcall_value
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB c6x_return_in_msb
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES c6x_callee_copies
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  c6x_select_rtx_section
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  c6x_elf_select_section
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  c6x_elf_unique_section
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  c6x_section_type_flags
+#undef TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION true
+#undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
+#define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE c6x_option_override
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
+#undef TARGET_LIBFUNC_GNU_PREFIX
+#define TARGET_LIBFUNC_GNU_PREFIX true
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS c6x_rtx_costs
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT c6x_sched_init
+#undef TARGET_SCHED_SET_SCHED_FLAGS
+#define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER c6x_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 c6x_sched_reorder2
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
+#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
+#undef TARGET_SCHED_EXPOSED_PIPELINE
+#define TARGET_SCHED_EXPOSED_PIPELINE true
+
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
+#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
+#define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE c6x_can_eliminate
+
+#undef TARGET_PREFERRED_RENAME_CLASS
+#define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START c6x_file_start
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND c6x_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
+
+/* C6x unwinding tables use a different format for the typeinfo tables.  */
+#undef TARGET_ASM_TTYPE
+#define TARGET_ASM_TTYPE c6x_output_ttype
+
+/* The C6x ABI follows the ARM EABI exception handling rules.  */
+#undef TARGET_ARM_EABI_UNWINDER
+#define TARGET_ARM_EABI_UNWINDER true
+
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
+
+#undef TARGET_DEBUG_UNWIND_INFO
+#define TARGET_DEBUG_UNWIND_INFO  c6x_debug_unwind_info
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS c6x_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN c6x_expand_builtin
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL c6x_builtin_decl
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-c6x.h"
diff --git a/gcc-4.9/gcc/config/c6x/c6x.h b/gcc-4.9/gcc/config/c6x/c6x.h
new file mode 100644
index 000000000..e0a60a971
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x.h
@@ -0,0 +1,618 @@
+/* Target Definitions for TI C6X.
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Andrew Jenner <andrew@codesourcery.com>
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_C6X_H
+#define GCC_C6X_H
+
+/* Feature bit definitions that enable specific insns.  */
+#define C6X_INSNS_C62X		1
+#define C6X_INSNS_C64X		2
+#define C6X_INSNS_C64XP		4
+#define C6X_INSNS_C67X		8
+#define C6X_INSNS_C67XP		16
+#define C6X_INSNS_C674X		32
+#define C6X_INSNS_ATOMIC	64
+#define C6X_INSNS_ALL_CPU_BITS	127
+
+#define C6X_DEFAULT_INSN_MASK						\
+  (C6X_INSNS_C62X | C6X_INSNS_C64X | C6X_INSNS_C64XP)
+
+/* A mask of allowed insn types, as defined above.  */
+extern unsigned long c6x_insn_mask;
+
+/* Value of -march= */
+extern c6x_cpu_t c6x_arch;
+#define C6X_DEFAULT_ARCH C6X_CPU_C64XP
+
+/* True if the target has C64x instructions.  */
+#define TARGET_INSNS_64		((c6x_insn_mask & C6X_INSNS_C64X) != 0)
+/* True if the target has C64x+ instructions.  */
+#define TARGET_INSNS_64PLUS	((c6x_insn_mask & C6X_INSNS_C64XP) != 0)
+/* True if the target has C67x instructions.  */
+#define TARGET_INSNS_67		((c6x_insn_mask & C6X_INSNS_C67X) != 0)
+/* True if the target has C67x+ instructions.  */
+#define TARGET_INSNS_67PLUS	((c6x_insn_mask & C6X_INSNS_C67XP) != 0)
+
+/* True if the target supports doubleword loads.  */
+#define TARGET_LDDW		(TARGET_INSNS_64 || TARGET_INSNS_67)
+/* True if the target supports doubleword loads.  */
+#define TARGET_STDW		TARGET_INSNS_64
+/* True if the target supports the MPY32 family of instructions.  */
+#define TARGET_MPY32		TARGET_INSNS_64PLUS
+/* True if the target has floating point hardware.  */
+#define TARGET_FP		TARGET_INSNS_67
+/* True if the target has C67x+ floating point extensions.  */
+#define TARGET_FP_EXT		TARGET_INSNS_67PLUS
+
+#define TARGET_DEFAULT 0
+
+/* Run-time Target.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_assert ("machine=tic6x");		\
+      builtin_assert ("cpu=tic6x");		\
+      builtin_define ("__TMS320C6X__");		\
+      builtin_define ("_TMS320C6X");		\
+						\
+      if (TARGET_DSBT)				\
+	builtin_define ("__DSBT__");		\
+						\
+      if (TARGET_BIG_ENDIAN)			\
+	builtin_define ("_BIG_ENDIAN");		\
+      else					\
+	builtin_define ("_LITTLE_ENDIAN");	\
+						\
+      switch (c6x_arch)				\
+	{					\
+	case C6X_CPU_C62X:			\
+	  builtin_define ("_TMS320C6200");	\
+	  break;				\
+						\
+	case C6X_CPU_C64XP:			\
+	  builtin_define ("_TMS320C6400_PLUS");	\
+	  /* ... fall through ... */		\
+	case C6X_CPU_C64X:			\
+	  builtin_define ("_TMS320C6400");	\
+	  break;				\
+						\
+	case C6X_CPU_C67XP:			\
+	  builtin_define ("_TMS320C6700_PLUS");	\
+	  /* ... fall through ... */		\
+	case C6X_CPU_C67X:			\
+	  builtin_define ("_TMS320C6700");	\
+	  break;				\
+						\
+	case C6X_CPU_C674X:			\
+	  builtin_define ("_TMS320C6740");	\
+	  builtin_define ("_TMS320C6700_PLUS");	\
+	  builtin_define ("_TMS320C6700");	\
+	  builtin_define ("_TMS320C6400_PLUS");	\
+	  builtin_define ("_TMS320C6400");	\
+	  break;				\
+	}					\
+    } while (0)
+
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:-march=%(VALUE)}" }
+
+/* Storage Layout.  */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define REG_WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 4
+#define PARM_BOUNDARY 8
+#define STACK_BOUNDARY 64
+#define FUNCTION_BOUNDARY 32
+#define BIGGEST_ALIGNMENT 64
+#define STRICT_ALIGNMENT 1
+
+/* The ABI requires static arrays must be at least 8 byte aligned.
+   Really only externally visible arrays must be aligned this way, as
+   only those are directly visible from another compilation unit.  But
+   we don't have that information available here.  */
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN)					\
+  (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE)	\
+   ? BITS_PER_UNIT * 8 : (ALIGN))
+
+/* Type Layout.  */
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Registers.  */
+
+#define FIRST_PSEUDO_REGISTER 67
+#define FIXED_REGISTERS					\
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+    1, 1, 1}
+#define CALL_USED_REGISTERS				\
+  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,	\
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,	\
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+    1, 1, 1}
+
+/* This lists call-used non-predicate registers first, followed by call-used
+   registers, followed by predicate registers.  We want to avoid allocating
+   the predicate registers for other uses as much as possible.  */
+#define REG_ALLOC_ORDER							\
+  {									\
+    REG_A0, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7, REG_A8, REG_A9,	\
+    REG_A16, REG_A17, REG_A18, REG_A19, REG_A20, REG_A21, REG_A22, REG_A23, \
+    REG_A24, REG_A25, REG_A26, REG_A27, REG_A28, REG_A29, REG_A30, REG_A31, \
+    REG_B4, REG_B5, REG_B6, REG_B7, REG_B8, REG_B9, REG_B16,	\
+    REG_B17, REG_B18, REG_B19, REG_B20, REG_B21, REG_B22, REG_B23, REG_B24, \
+    REG_B25, REG_B26, REG_B27, REG_B28, REG_B29, REG_B30, REG_B31,	\
+    REG_A10, REG_A11, REG_A12, REG_A13, REG_A14, REG_A15,		\
+    REG_B3, REG_B10, REG_B11, REG_B12, REG_B13, REG_B14, REG_B15,	\
+    REG_A1, REG_A2, REG_B0, REG_B1, REG_B2, REG_ILC			\
+  }
+
+#define HARD_REGNO_NREGS(regno, mode)		\
+  ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)  \
+   / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(reg, mode) (GET_MODE_SIZE (mode) <= UNITS_PER_WORD \
+				       ? 1 : ((reg) & 1) == 0)
+
+#define MODES_TIEABLE_P(mode1, mode2)	       \
+  ((mode1) == (mode2) ||		       \
+   (GET_MODE_SIZE (mode1) <= UNITS_PER_WORD && \
+    GET_MODE_SIZE (mode2) <= UNITS_PER_WORD))
+
+
+/* Register Classes.  */
+
+enum reg_class
+  {
+    NO_REGS,
+    PREDICATE_A_REGS,
+    PREDICATE_B_REGS,
+    PREDICATE_REGS,
+    PICREG,
+    SPREG,
+    CALL_USED_B_REGS,
+    NONPREDICATE_A_REGS,
+    NONPREDICATE_B_REGS,
+    NONPREDICATE_REGS,
+    A_REGS,
+    B_REGS,
+    GENERAL_REGS,
+    ALL_REGS,
+    LIM_REG_CLASSES
+  };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {	  \
+    "NO_REGS",			  \
+    "PREDICATE_A_REGS",		  \
+    "PREDICATE_B_REGS",		  \
+    "PREDICATE_REGS",		  \
+    "PICREG",			  \
+    "SPREG",			  \
+    "CALL_USED_B_REGS",		  \
+    "NONPREDICATE_A_REGS",	  \
+    "NONPREDICATE_B_REGS",	  \
+    "NONPREDICATE_REGS",	  \
+    "A_REGS",			  \
+    "B_REGS",			  \
+    "GENERAL_REGS",		  \
+    "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS			\
+{						\
+  /* NO_REGS.  */				\
+  { 0x00000000, 0x00000000, 0 },		\
+  /* PREDICATE_A_REGS.  */			\
+  { 0x00000006, 0x00000000, 0 },		\
+  /* PREDICATE_B_REGS.  */			\
+  { 0x00000000, 0x00000007, 0 },		\
+  /* PREDICATE_REGS.  */			\
+  { 0x00000006, 0x00000007, 0 },		\
+  /* PICREG.  */				\
+  { 0x00000000, 0x00004000, 0 },		\
+  /* SPREG.  */					\
+  { 0x00000000, 0x00008000, 0 },		\
+  /* CALL_USED_B_REGS.  */			\
+  { 0x00000000, 0xFFFF03FF, 0 },		\
+  /* NONPREDICATE_A_REGS.  */			\
+  { 0xFFFFFFF9, 0x00000000, 0 },		\
+  /* NONPREDICATE_B_REGS.  */			\
+  { 0x00000000, 0xFFFFFFF8, 0 },		\
+  /* NONPREDICATE_REGS.  */			\
+  { 0xFFFFFFF9, 0xFFFFFFF8, 0 },		\
+  /* A_REGS.  */				\
+  { 0xFFFFFFFF, 0x00000000, 3 },		\
+  /* B_REGS.  */				\
+  { 0x00000000, 0xFFFFFFFF, 3 },		\
+  /* GENERAL_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 3 },		\
+  /* ALL_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 7 },		\
+}
+
+#define A_REGNO_P(N) ((N) <= REG_A31)
+#define B_REGNO_P(N) ((N) >= REG_B0 && (N) <= REG_B31)
+
+#define A_REG_P(X) (REG_P (X) && A_REGNO_P (REGNO (X)))
+#define CROSS_OPERANDS(X0,X1) \
+  (A_REG_P (X0) == A_REG_P (X1) ? CROSS_N : CROSS_Y)
+
+#define REGNO_REG_CLASS(reg) \
+    ((reg) >= REG_A1 && (reg) <= REG_A2 ? PREDICATE_A_REGS	\
+     : (reg) == REG_A0 && TARGET_INSNS_64 ? PREDICATE_A_REGS	\
+     : (reg) >= REG_B0 && (reg) <= REG_B2 ? PREDICATE_B_REGS	\
+     : A_REGNO_P (reg) ? NONPREDICATE_A_REGS			\
+     : call_used_regs[reg] ? CALL_USED_B_REGS : B_REGS)
+
+#define BASE_REG_CLASS ALL_REGS
+#define INDEX_REG_CLASS ALL_REGS
+
+#define REGNO_OK_FOR_BASE_STRICT_P(X)				\
+  ((X) < FIRST_PSEUDO_REGISTER					\
+   || (reg_renumber[X] >= 0 && reg_renumber[X] < FIRST_PSEUDO_REGISTER))
+#define REGNO_OK_FOR_BASE_NONSTRICT_P(X) 1
+
+#define REGNO_OK_FOR_INDEX_STRICT_P(X)				\
+  ((X) < FIRST_PSEUDO_REGISTER					\
+   || (reg_renumber[X] >= 0 && reg_renumber[X] < FIRST_PSEUDO_REGISTER))
+#define REGNO_OK_FOR_INDEX_NONSTRICT_P(X) 1
+
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_STRICT_P (X)
+#define REGNO_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_STRICT_P (X)
+#else
+#define REGNO_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_NONSTRICT_P (X)
+#define REGNO_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_NONSTRICT_P (X)
+#endif
+
+#define CLASS_MAX_NREGS(class, mode) \
+  ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define REGNO_OK_FOR_INDIRECT_JUMP_P(REGNO, MODE) B_REGNO_P (REGNO)
+
+/* Stack and Calling.  */
+
+/* SP points to 4 bytes below the first word of the frame.  */
+#define STACK_POINTER_OFFSET 4
+/* Likewise for AP (which is the incoming stack pointer).  */
+#define FIRST_PARM_OFFSET(fundecl) 4
+#define STARTING_FRAME_OFFSET 0
+#define FRAME_GROWS_DOWNWARD 1
+#define STACK_GROWS_DOWNWARD
+
+#define STACK_POINTER_REGNUM REG_B15
+#define HARD_FRAME_POINTER_REGNUM REG_A15
+/* These two always get eliminated in favour of the stack pointer
+   or the hard frame pointer.  */
+#define FRAME_POINTER_REGNUM REG_FRAME
+#define ARG_POINTER_REGNUM REG_ARGP
+
+#define PIC_OFFSET_TABLE_REGNUM REG_B14
+
+/* We keep the stack pointer constant rather than using push/pop
+   instructions.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Before the prologue, the return address is in the B3 register.  */
+#define RETURN_ADDR_REGNO REG_B3
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, RETURN_ADDR_REGNO)
+#define DWARF_FRAME_RETURN_COLUMN	DWARF_FRAME_REGNUM (RETURN_ADDR_REGNO)
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) c6x_return_addr_rtx (COUNT)
+
+#define INCOMING_FRAME_SP_OFFSET 0
+#define ARG_POINTER_CFA_OFFSET(fundecl) 0
+
+#define STATIC_CHAIN_REGNUM REG_A2
+
+struct c6x_args {
+  /* Number of arguments to pass in registers.  */
+  int nregs;
+  /* Number of arguments passed in registers so far.  */
+  int count;
+};
+
+#define CUMULATIVE_ARGS struct c6x_args
+
+#define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \
+  c6x_init_cumulative_args (&cum, fntype, libname, n_named_args)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (c6x_block_reg_pad_upward (MODE, TYPE, FIRST) ? upward : downward)
+
+#define FUNCTION_ARG_REGNO_P(r) \
+    (((r) >= REG_A4 && (r) <= REG_A13) || ((r) >= REG_B4 && (r) <= REG_B13))
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define FUNCTION_PROFILER(file, labelno) \
+  fatal_error ("profiling is not yet implemented for this architecture")
+
+
+/* Trampolines.  */
+#define TRAMPOLINE_SIZE 32
+#define TRAMPOLINE_ALIGNMENT 256
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}	\
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = c6x_initial_elimination_offset ((FROM), (TO)))
+
+/* Addressing Modes.  */
+
+#define CONSTANT_ADDRESS_P(x) (CONSTANT_P(x) && GET_CODE(x) != CONST_DOUBLE)
+#define MAX_REGS_PER_ADDRESS 2
+
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_POST_INCREMENT 1
+
+/* Register forms are available, but due to scaling we currently don't
+   support them.  */
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_DISP 1
+
+#define LEGITIMATE_PIC_OPERAND_P(X) \
+  (!symbolic_operand (X, SImode))
+
+struct GTY(()) machine_function
+{
+  /* True if we expanded a sibling call.  */
+  int contains_sibcall;
+};
+
+/* Costs.  */
+#define NO_FUNCTION_CSE 1
+
+#define SLOW_BYTE_ACCESS 0
+
+#define BRANCH_COST(speed_p, predictable_p) 6
+
+
+/* Model costs for the vectorizer.  */
+
+/* Cost of conditional branch.  */
+#ifndef TARG_COND_BRANCH_COST
+#define TARG_COND_BRANCH_COST        6
+#endif
+
+/* Cost of any scalar operation, excluding load and store.  */
+#ifndef TARG_SCALAR_STMT_COST
+#define TARG_SCALAR_STMT_COST        1
+#endif
+
+/* Cost of scalar load. */
+#undef TARG_SCALAR_LOAD_COST
+#define TARG_SCALAR_LOAD_COST        2 /* load + rotate */
+
+/* Cost of scalar store.  */
+#undef TARG_SCALAR_STORE_COST
+#define TARG_SCALAR_STORE_COST       10
+
+/* Cost of any vector operation, excluding load, store,
+   or vector to scalar operation.  */
+#undef TARG_VEC_STMT_COST
+#define TARG_VEC_STMT_COST           1
+
+/* Cost of vector to scalar operation.  */
+#undef TARG_VEC_TO_SCALAR_COST
+#define TARG_VEC_TO_SCALAR_COST      1
+
+/* Cost of scalar to vector operation.  */
+#undef TARG_SCALAR_TO_VEC_COST
+#define TARG_SCALAR_TO_VEC_COST      1
+
+/* Cost of aligned vector load.  */
+#undef TARG_VEC_LOAD_COST
+#define TARG_VEC_LOAD_COST           1
+
+/* Cost of misaligned vector load.  */
+#undef TARG_VEC_UNALIGNED_LOAD_COST
+#define TARG_VEC_UNALIGNED_LOAD_COST 2
+
+/* Cost of vector store.  */
+#undef TARG_VEC_STORE_COST
+#define TARG_VEC_STORE_COST          1
+
+/* Cost of vector permutation.  */
+#ifndef TARG_VEC_PERMUTE_COST
+#define TARG_VEC_PERMUTE_COST        1
+#endif
+
+/* ttype entries (the only interesting data references used) are
+   sb-relative got-indirect (aka .ehtype).  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \
+  (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \
+				: DW_EH_PE_absptr)
+
+/* This should be the same as the definition in elfos.h, plus the call
+   to output special unwinding directives.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      c6x_output_file_unwind (FILE);				\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+
+/* This should be the same as the definition in elfos.h, plus the call
+   to output special unwinding directives.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \
+  c6x_function_end (STREAM, NAME)
+
+/* Arbitrarily choose A4/A5.  */
+#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N) + 4 : INVALID_REGNUM)
+
+/* The register that holds the return address in exception handlers.  */
+#define C6X_EH_STACKADJ_REGNUM  3
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (SImode, C6X_EH_STACKADJ_REGNUM)
+
+
+/* Assembler Format.  */
+
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "\t; #APP \n"
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "\t; #NO_APP \n"
+
+#define ASM_OUTPUT_COMMON(stream, name, size, rounded)
+#define ASM_OUTPUT_LOCAL(stream, name, size, rounded)
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define REGISTER_NAMES						\
+  {								\
+    "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7",		\
+    "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15",	\
+    "A16", "A17", "A18", "A19", "A20", "A21", "A22", "A23",	\
+    "A24", "A25", "A26", "A27", "A28", "A29", "A30", "A31",     \
+    "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7",             \
+    "B8", "B9", "B10", "B11", "B12", "B13", "B14", "B15",       \
+    "B16", "B17", "B18", "B19", "B20", "B21", "B22", "B23",     \
+    "B24", "B25", "B26", "B27", "B28", "B29", "B30", "B31",	\
+    "FP", "ARGP", "ILC" }
+
+#define DBX_REGISTER_NUMBER(N) (dbx_register_map[(N)])
+
+extern unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER];
+
+#define FINAL_PRESCAN_INSN c6x_final_prescan_insn
+
+#define TEXT_SECTION_ASM_OP ".text;"
+#define DATA_SECTION_ASM_OP ".data;"
+
+#define ASM_OUTPUT_ALIGN(stream, power)			    \
+  do							    \
+    {							    \
+      if (power)					    \
+        fprintf ((stream), "\t.align\t%d\n", power);	    \
+    }                                                       \
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)    	\
+do { char __buf[256];					\
+     fprintf (FILE, "\t.long\t");				\
+     ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE);	\
+     assemble_name (FILE, __buf);			\
+     fputc ('\n', FILE);				\
+   } while (0)
+
+/* Determine whether to place EXP (an expression or a decl) should be
+   placed into one of the small data sections.  */
+#define PLACE_IN_SDATA_P(EXP) \
+  (c6x_sdata_mode == C6X_SDATA_NONE ? false	\
+   : c6x_sdata_mode == C6X_SDATA_ALL ? true	\
+   : !AGGREGATE_TYPE_P (TREE_TYPE (EXP)))
+
+#define SCOMMON_ASM_OP "\t.scomm\t"
+
+#undef  ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN)	\
+  do									\
+    {									\
+      if (DECL != NULL && PLACE_IN_SDATA_P (DECL))			\
+	fprintf ((FILE), "%s", SCOMMON_ASM_OP);				\
+      else								\
+	fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (ALIGN) / BITS_PER_UNIT);\
+    }									\
+  while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  */
+
+#undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+do {									\
+  if (PLACE_IN_SDATA_P (DECL))						\
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+  if (!flag_inhibit_size_directive)					\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+  ASM_OUTPUT_LABEL(FILE, NAME);						\
+  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);				\
+} while (0)
+
+#define CASE_VECTOR_PC_RELATIVE flag_pic
+#define JUMP_TABLES_IN_TEXT_SECTION flag_pic
+
+#define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2)
+
+/* This is how to output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  do { char buf[100];					\
+       fputs ("\t.long ", FILE);			\
+       ASM_GENERATE_INTERNAL_LABEL (buf, "L", VALUE);	\
+       assemble_name (FILE, buf);			\
+       putc ('-', FILE);				\
+       ASM_GENERATE_INTERNAL_LABEL (buf, "L", REL);	\
+       assemble_name (FILE, buf);			\
+       putc ('\n', FILE);				\
+     } while (0)
+
+/* Misc.  */
+
+#define CASE_VECTOR_MODE SImode
+#define MOVE_MAX 4
+#define MOVE_RATIO(SPEED) 4
+#define TRULY_NOOP_TRUNCATION(outprec, inprec) 1
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+#define Pmode SImode
+#define FUNCTION_MODE QImode
+
+#define CPU_UNITS_QUERY 1
+
+extern int c6x_initial_flag_pic;
+
+#endif /* GCC_C6X_H */
diff --git a/gcc-4.9/gcc/config/c6x/c6x.md b/gcc-4.9/gcc/config/c6x/c6x.md
new file mode 100644
index 000000000..53032b1f0
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x.md
@@ -0,0 +1,3136 @@
+;; Machine description for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Andrew Jenner <andrew@codesourcery.com>
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Register names
+
+(define_constants
+  [(REG_A0 0)
+   (REG_A1 1)
+   (REG_A2 2)
+   (REG_A3 3)
+   (REG_A4 4)
+   (REG_A5 5)
+   (REG_A6 6)
+   (REG_A7 7)
+   (REG_A8 8)
+   (REG_A9 9)
+   (REG_A10 10)
+   (REG_A11 11)
+   (REG_A12 12)
+   (REG_A13 13)
+   (REG_A14 14)
+   (REG_A15 15)
+   (REG_A16 16)
+   (REG_A17 17)
+   (REG_A18 18)
+   (REG_A19 19)
+   (REG_A20 20)
+   (REG_A21 21)
+   (REG_A22 22)
+   (REG_A23 23)
+   (REG_A24 24)
+   (REG_A25 25)
+   (REG_A26 26)
+   (REG_A27 27)
+   (REG_A28 28)
+   (REG_A29 29)
+   (REG_A30 30)
+   (REG_A31 31)
+   (REG_B0 32)
+   (REG_B1 33)
+   (REG_B2 34)
+   (REG_B3 35)
+   (REG_B4 36)
+   (REG_B5 37)
+   (REG_B6 38)
+   (REG_B7 39)
+   (REG_B8 40)
+   (REG_B9 41)
+   (REG_B10 42)
+   (REG_B11 43)
+   (REG_B12 44)
+   (REG_B13 45)
+   (REG_B14 46)
+   (REG_SP 47)
+   (REG_B15 47)
+   (REG_B16 48)
+   (REG_B17 49)
+   (REG_B18 50)
+   (REG_B19 51)
+   (REG_B20 52)
+   (REG_B21 53)
+   (REG_B22 54)
+   (REG_B23 55)
+   (REG_B24 56)
+   (REG_B25 57)
+   (REG_B26 58)
+   (REG_B27 59)
+   (REG_B28 60)
+   (REG_B29 61)
+   (REG_B30 62)
+   (REG_B31 63)
+   (REG_FRAME 64)
+   (REG_ARGP 65)
+   (REG_ILC 66)])
+
+(define_c_enum "unspec" [
+   UNSPEC_NOP
+   UNSPEC_RCP
+   UNSPEC_MISALIGNED_ACCESS
+   UNSPEC_ADDKPC
+   UNSPEC_SETUP_DSBT
+   UNSPEC_LOAD_GOT
+   UNSPEC_LOAD_SDATA
+   UNSPEC_BITREV
+   UNSPEC_GOTOFF
+   UNSPEC_MVILC
+   UNSPEC_REAL_JUMP
+   UNSPEC_REAL_LOAD
+   UNSPEC_REAL_MULT
+   UNSPEC_JUMP_SHADOW
+   UNSPEC_LOAD_SHADOW
+   UNSPEC_MULT_SHADOW
+   UNSPEC_EPILOGUE_BARRIER
+   UNSPEC_ATOMIC
+   UNSPEC_CLR
+   UNSPEC_EXT
+   UNSPEC_EXTU
+   UNSPEC_SUBC
+   UNSPEC_AVG
+])
+
+(define_c_enum "unspecv" [
+   UNSPECV_BLOCKAGE
+   UNSPECV_SPLOOP
+   UNSPECV_SPKERNEL
+   UNSPECV_EH_RETURN
+   UNSPECV_CAS
+])
+
+;; -------------------------------------------------------------------------
+;; Instruction attributes
+;; -------------------------------------------------------------------------
+
+(define_attr "cpu"
+  "c62x,c64x,c64xp,c67x,c67xp,c674x"
+  (const (symbol_ref "(enum attr_cpu)c6x_arch")))
+
+;; Define a type for each insn which is used in the scheduling description.
+;; These correspond to the types defined in chapter 4 of the C674x manual.
+(define_attr "type"
+  "unknown,single,mpy2,store,storen,mpy4,load,loadn,branch,call,callp,dp2,fp4,
+   intdp,cmpdp,adddp,mpy,mpyi,mpyid,mpydp,mpyspdp,mpysp2dp,spkernel,sploop,
+   mvilc,blockage,shadow,load_shadow,mult_shadow,atomic"
+  (const_string "single"))
+
+;; The register file used by an instruction's destination register.
+;; The function destreg_file computes this; instructions can override the
+;; attribute if they aren't a single_set.
+(define_attr "dest_regfile"
+  "unknown,any,a,b"
+  (cond [(eq_attr "type" "single,load,mpy2,mpy4,dp2,fp4,intdp,cmpdp,adddp,mpy,mpyi,mpyid,mpydp,mpyspdp,mpysp2dp")
+	 (cond [(match_operand 0 "a_register" "") (const_string "a")
+		(match_operand 0 "b_register" "") (const_string "b")]
+	       (const_string "unknown"))
+	 (eq_attr "type" "store")
+		(cond [(match_operand 1 "a_register" "") (const_string "a")
+		       (match_operand 1 "b_register" "") (const_string "b")]
+	       (const_string "unknown"))]
+	(const_string "unknown")))
+
+(define_attr "addr_regfile"
+  "unknown,a,b"
+  (const_string "unknown"))
+
+(define_attr "cross"
+  "n,y"
+  (const_string "n"))
+
+;; This describes the relationship between operands and register files.
+;; For example, "sxs" means that operands 0 and 2 determine the side of
+;; the machine, and operand 1 can optionally use the cross path.  "dt" and
+;; "td" are used to describe loads and stores.
+;; Used for register renaming in loops for improving modulo scheduling.
+(define_attr "op_pattern"
+  "unknown,dt,td,sx,sxs,ssx"
+  (cond [(eq_attr "type" "load") (const_string "td")
+	 (eq_attr "type" "store") (const_string "dt")]
+	(const_string "unknown")))
+
+(define_attr "has_shadow"
+  "n,y"
+  (const_string "n"))
+
+;; The number of cycles the instruction takes to finish.  Any cycles above
+;; the first are delay slots.
+(define_attr "cycles" ""
+  (cond [(eq_attr "type" "branch,call") (const_int 6)
+	 (eq_attr "type" "load,loadn") (const_int 5)
+	 (eq_attr "type" "dp2") (const_int 2)
+	 (eq_attr "type" "mpy2") (const_int 2)
+	 (eq_attr "type" "mpy4") (const_int 4)
+	 (eq_attr "type" "fp4") (const_int 4)
+	 (eq_attr "type" "mvilc") (const_int 4)
+	 (eq_attr "type" "cmpdp") (const_int 2)
+	 (eq_attr "type" "intdp") (const_int 5)
+	 (eq_attr "type" "adddp") (const_int 7)
+	 (eq_attr "type" "mpydp") (const_int 10)
+	 (eq_attr "type" "mpyi") (const_int 9)
+	 (eq_attr "type" "mpyid") (const_int 10)
+	 (eq_attr "type" "mpyspdp") (const_int 7)
+	 (eq_attr "type" "mpysp2dp") (const_int 5)]
+	(const_int 1)))
+
+;; The number of cycles during which the instruction reserves functional
+;; units.
+(define_attr "reserve_cycles" ""
+  (cond [(eq_attr "type" "cmpdp") (const_int 2)
+	 (eq_attr "type" "adddp") (const_int 2)
+	 (eq_attr "type" "mpydp") (const_int 4)
+	 (eq_attr "type" "mpyi") (const_int 4)
+	 (eq_attr "type" "mpyid") (const_int 4)
+	 (eq_attr "type" "mpyspdp") (const_int 2)]
+	(const_int 1)))
+
+(define_attr "predicable" "no,yes"
+  (const_string "yes"))
+
+(define_attr "enabled" "no,yes"
+  (const_string "yes"))
+
+;; Specify which units can be used by a given instruction.  Normally,
+;; dest_regfile is used to select between the two halves of the machine.
+;; D_ADDR is for load/store instructions; they use the D unit and use
+;; addr_regfile to choose between D1 and D2.
+
+(define_attr "units62"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (const_string "unknown"))
+
+(define_attr "units64"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (const_string "unknown"))
+
+(define_attr "units64p"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (attr "units64"))
+
+(define_attr "units67"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (attr "units62"))
+
+(define_attr "units67p"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (attr "units67"))
+
+(define_attr "units674"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (attr "units64"))
+
+(define_attr "units"
+  "unknown,d,d_addr,l,m,s,dl,ds,dls,ls"
+  (cond [(eq_attr "cpu" "c62x")
+	   (attr "units62")
+	 (eq_attr "cpu" "c67x")
+	   (attr "units67")
+	 (eq_attr "cpu" "c67xp")
+	   (attr "units67p")
+	 (eq_attr "cpu" "c64x")
+	   (attr "units64")
+	 (eq_attr "cpu" "c64xp")
+	   (attr "units64p")
+	 (eq_attr "cpu" "c674x")
+	   (attr "units674")
+	]
+	(const_string "unknown")))
+
+(define_automaton "c6x_1,c6x_2,c6x_m1,c6x_m2,c6x_t1,c6x_t2,c6x_branch")
+(automata_option "no-comb-vect")
+(automata_option "ndfa")
+(automata_option "collapse-ndfa")
+
+(define_query_cpu_unit "d1,l1,s1" "c6x_1")
+(define_cpu_unit "x1" "c6x_1")
+(define_cpu_unit "l1w,s1w" "c6x_1")
+(define_query_cpu_unit "m1" "c6x_m1")
+(define_cpu_unit "m1w" "c6x_m1")
+(define_cpu_unit "t1" "c6x_t1")
+(define_query_cpu_unit "d2,l2,s2" "c6x_2")
+(define_cpu_unit "x2" "c6x_2")
+(define_cpu_unit "l2w,s2w" "c6x_2")
+(define_query_cpu_unit "m2" "c6x_m2")
+(define_cpu_unit "m2w" "c6x_m2")
+(define_cpu_unit "t2" "c6x_t2")
+;; A special set of units used to identify specific reservations, rather than
+;; just units.
+(define_query_cpu_unit "fps1,fpl1,adddps1,adddpl1" "c6x_1")
+(define_query_cpu_unit "fps2,fpl2,adddps2,adddpl2" "c6x_2")
+
+;; There can be up to two branches in one cycle (on the .s1 and .s2
+;; units), but some instructions must not be scheduled in parallel
+;; with a branch.  We model this by reserving either br0 or br1 for a
+;; normal branch, and both of them for an insn such as callp.
+;; Another constraint is that two branches may only execute in parallel
+;; if one uses an offset, and the other a register.  We can distinguish
+;; these by the dest_regfile attribute; it is "any" iff the branch uses
+;; an offset.  br0 is reserved for these, while br1 is reserved for
+;; branches using a register.
+(define_cpu_unit "br0,br1" "c6x_branch")
+
+(include "c6x-sched.md")
+
+;; Some reservations which aren't generated from c6x-sched.md.in
+
+(define_insn_reservation "branch_s1any" 6
+  (and (eq_attr "type" "branch")
+       (and (eq_attr "cross" "n")
+	    (and (eq_attr "units" "s")
+		 (eq_attr "dest_regfile" "any"))))
+  "s1+s1w+br0")
+
+;; For calls, we also reserve the units needed in the following cycles
+;; to load the return address.  There are two options; using addkpc or
+;; mvkh/mvkl.  The code in c6x_reorg knows whether to use one of these
+;; or whether to use callp.  The actual insns are emitted only after
+;; the final scheduling pass is complete.
+;; We always reserve S2 for PC-relative call insns, since that allows
+;; us to turn them into callp insns later on.
+(define_insn_reservation "call_addkpc_s1any" 6
+  (and (eq_attr "type" "call")
+       (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "n")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "any")))))
+  "s2+s2w+br0,s2+s2w+br0+br1")
+
+(define_insn_reservation "call_mvk_s1any" 6
+  (and (eq_attr "type" "call")
+       (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0))
+	    (and (eq_attr "cross" "n")
+		 (and (eq_attr "units" "s")
+		      (eq_attr "dest_regfile" "any")))))
+  "s2+s2w+br0,s2+s2w,s2+s2w")
+
+(define_reservation "all" "s1+s2+d1+d2+l1+l2+m1+m2")
+
+(define_insn_reservation "callp_s1" 1
+  (and (eq_attr "type" "callp") (eq_attr "dest_regfile" "a"))
+  "s1+s1w,all*5")
+
+(define_insn_reservation "callp_s2" 1
+  (and (eq_attr "type" "callp") (eq_attr "dest_regfile" "b"))
+  "s2+s2w,all*5")
+
+;; Constraints
+
+(include "constraints.md")
+
+;; Predicates
+
+(include "predicates.md")
+
+;; General predication pattern.
+
+(define_cond_exec
+  [(match_operator 0 "eqne_operator"
+    [(match_operand 1 "predicate_register" "AB")
+     (const_int 0)])]
+  ""
+  "")
+
+;; -------------------------------------------------------------------------
+;; NOP instruction
+;; -------------------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+(define_insn "nop_count"
+  [(unspec [(match_operand 0 "const_int_operand" "n")] UNSPEC_NOP)]
+  ""
+  "%|%.\\tnop\\t%0")
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+(define_mode_iterator QIHIM [QI HI])
+(define_mode_iterator SIDIM [SI DI])
+(define_mode_iterator SIDIVM [SI DI V2HI V4QI])
+(define_mode_iterator VEC4M [V2HI V4QI])
+(define_mode_iterator VEC8M [V2SI V4HI V8QI])
+(define_mode_iterator SISFVM [SI SF V2HI V4QI])
+(define_mode_iterator DIDFM [DI DF])
+(define_mode_iterator DIDFVM [DI DF V2SI V4HI V8QI])
+(define_mode_iterator SFDFM [SF DF])
+(define_mode_iterator M32 [QI HI SI SF V2HI V4QI])
+
+;; The C6X LO_SUM and HIGH are backwards - HIGH sets the low bits, and
+;; LO_SUM adds in the high bits.  Fortunately these are opaque operations
+;; so this does not matter.
+(define_insn "movsi_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=ab")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "const_int_or_symbolic_operand" "i")))]
+  "reload_completed"
+  "%|%.\\tmvkh\\t%$\\t%2, %0"
+  [(set_attr "units" "s")])
+
+(define_insn "movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=ab")
+	(high:SI (match_operand:SI 1 "const_int_or_symbolic_operand" "i")))]
+  "reload_completed"
+  "%|%.\\tmvkl\\t%$\\t%1, %0"
+  [(set_attr "units" "s")])
+
+(define_insn "movsi_gotoff_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=ab")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (unspec:SI [(match_operand:SI 2 "symbolic_operand" "S2")]
+			      UNSPEC_GOTOFF)))]
+  "flag_pic == 2"
+  "%|%.\\tmvkh\\t%$\\t$dpr_got%2, %0"
+  [(set_attr "units" "s")])
+
+(define_insn "movsi_gotoff_high"
+  [(set (match_operand:SI 0 "register_operand" "=ab")
+	(high:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "S2")]
+			    UNSPEC_GOTOFF)))]
+  "flag_pic == 2"
+  "%|%.\\tmvkl\\t%$\\t$dpr_got%1, %0"
+  [(set_attr "units" "s")])
+
+;; Normally we'd represent this as a normal load insn, but we can't currently
+;; represent the addressing mode.
+(define_insn "load_got_gotoff"
+  [(set (match_operand:SI 0 "register_operand" "=a,b")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "Z,Z")
+		    (match_operand:SI 2 "register_operand" "b,b")]
+		   UNSPEC_GOTOFF))]
+  "flag_pic == 2"
+  "%|%.\\tldw\\t%$\\t*+%1[%2], %0"
+  [(set_attr "type" "load")
+   (set_attr "units" "d_addr")
+   (set_attr "op_pattern" "unknown")
+   (set_attr "dest_regfile" "a,b")
+   (set_attr "addr_regfile" "b")])
+
+(define_insn "*movstricthi_high"
+  [(set (match_operand:SI 0 "register_operand" "+ab")
+	(ior:SI (and:SI (match_dup 0) (const_int 65535))
+		(ashift:SI (match_operand:SI 1 "const_int_operand" "IuB")
+			   (const_int 16))))]
+  "reload_completed"
+  "%|%.\\tmvklh\\t%$\\t%1, %0"
+  [(set_attr "units" "s")])
+
+;; Break up SImode loads of immediate operands.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "reload_completed
+   && !satisfies_constraint_IsB (operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (and:SI (match_dup 0) (const_int 65535))
+			      (ashift:SI (match_dup 3) (const_int 16))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+  operands[2] = GEN_INT (trunc_int_for_mode (val, HImode));
+  operands[3] = GEN_INT ((val >> 16) & 65535);
+})
+
+(define_split
+  [(set (match_operand:VEC4M 0 "register_operand" "")
+	(match_operand:VEC4M 1 "const_vector_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))]
+{
+  unsigned HOST_WIDE_INT mask, val;
+  enum machine_mode inner_mode = GET_MODE_INNER (<MODE>mode);
+  int i;
+
+  val = 0;
+  mask = GET_MODE_MASK (inner_mode);
+  if (TARGET_BIG_ENDIAN)
+    {
+      for (i = 0; i < GET_MODE_NUNITS (<MODE>mode); i++)
+	{
+	  val <<= GET_MODE_BITSIZE (inner_mode);
+	  val |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask;
+	}
+    }
+  else
+    {
+      i = GET_MODE_NUNITS (<MODE>mode);
+      while (i-- > 0)
+	{
+	  val <<= GET_MODE_BITSIZE (inner_mode);
+	  val |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask;
+	}
+    }
+  operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  operands[3] = GEN_INT (trunc_int_for_mode (val, SImode));
+})
+
+(define_split
+  [(set (match_operand:VEC8M 0 "register_operand" "")
+	(match_operand:VEC8M 1 "const_vector_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  unsigned HOST_WIDE_INT mask;
+  unsigned HOST_WIDE_INT val[2];
+  rtx lo_half, hi_half;
+  enum machine_mode inner_mode = GET_MODE_INNER (<MODE>mode);
+  int i, j;
+
+  split_di (operands, 1, &lo_half, &hi_half);
+
+  val[0] = val[1] = 0;
+  mask = GET_MODE_MASK (inner_mode);
+  if (TARGET_BIG_ENDIAN)
+    {
+      for (i = 0, j = 1; i < GET_MODE_NUNITS (<MODE>mode); i++)
+	{
+	  if (i * 2 == GET_MODE_NUNITS (<MODE>mode))
+	    j--;
+	  val[j] <<= GET_MODE_BITSIZE (inner_mode);
+	  val[j] |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask;
+	}
+    }
+  else
+    {
+      i = GET_MODE_NUNITS (<MODE>mode);
+      j = 1;
+      while (i-- > 0)
+        {
+	  val[j] <<= GET_MODE_BITSIZE (inner_mode);
+	  val[j] |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask;
+	  if (i * 2 == GET_MODE_NUNITS (<MODE>mode))
+	    j--;
+	}
+    }
+  operands[2] = lo_half;
+  operands[3] = GEN_INT (trunc_int_for_mode (val[0], SImode));
+  operands[4] = hi_half;
+  operands[5] = GEN_INT (trunc_int_for_mode (val[1], SImode));
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 2) (ior:SI (and:SI (match_dup 2) (const_int 65535))
+			      (ashift:SI (match_dup 4) (const_int 16))))]
+{
+  long values;
+  REAL_VALUE_TYPE value;
+
+  gcc_assert (GET_CODE (operands[1]) == CONST_DOUBLE);
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (value, values);
+
+  operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0]));
+  operands[3] = GEN_INT (trunc_int_for_mode (values, HImode));
+  if (values >= -32768 && values < 32768)
+    {
+      emit_move_insn (operands[2], operands[3]);
+      DONE;
+    }
+  operands[4] = GEN_INT ((values >> 16) & 65535);
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "symbolic_operand" ""))]
+  "reload_completed
+   && (!TARGET_INSNS_64PLUS
+       || !sdata_symbolic_operand (operands[1], SImode))"
+  [(set (match_dup 0) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+;; Normally, we represent the load of an sdata address as a normal
+;; move of a SYMBOL_REF.  In DSBT mode, B14 is not constant, so we
+;; should show the dependency.
+(define_insn "load_sdata_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a,b")
+	(plus:SI (match_operand:SI 1 "pic_register_operand" "Z,Z")
+		 (unspec:SI [(match_operand:SI 2 "sdata_symbolic_operand" "S0,S0")]
+			    UNSPEC_LOAD_SDATA)))]
+  "flag_pic"
+  "@
+   %|%.\\tadda%D2\\t%$\\t%1, %2, %0
+   %|%.\\tadda%D2\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "d")
+   (set_attr "cross" "y,n")
+   (set_attr "op_pattern" "unknown")
+   (set_attr "predicable" "no")])
+
+;; Move instruction patterns
+
+(define_mode_attr LDST_SUFFIX [(QI "b") (HI "h")
+			       (SI "w") (SF "w") (V2HI "w") (V4QI "w")
+			       (DI "dw") (V2SI "dw") (V4HI "dw") (V8QI "dw")])
+
+(define_insn "mov<mode>_insn"
+ [(set (match_operand:QIHIM 0 "nonimmediate_operand"
+        "=a,b, a, b, ab, ab,a,?a, b,?b, Q, R, R, Q")
+       (match_operand:QIHIM 1 "general_operand"
+         "a,b,?b,?a,Is5,IsB,Q, R, R, Q, a,?a, b,?b"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+ "@
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmvk\\t%$\\t%1, %0
+  %|%.\\tmvk\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tst<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tst<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tst<LDST_SUFFIX>\\t%$\\t%1, %0
+  %|%.\\tst<LDST_SUFFIX>\\t%$\\t%1, %0"
+  [(set_attr "type" "*,*,*,*,*,*,load,load,load,load,store,store,store,store")
+   (set_attr "units62" "dls,dls,ls,ls,s,s,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "units64" "dls,dls,ls,ls,dl,s,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "op_pattern" "sx,sx,sx,sx,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "addr_regfile" "*,*,*,*,*,*,a,b,b,a,a,b,b,a")
+   (set_attr "dest_regfile" "*,*,*,*,*,*,a,a,b,b,a,a,b,b")
+   (set_attr "cross" "n,n,y,y,n,n,n,y,n,y,n,y,n,y")])
+
+(define_insn "mov<mode>_insn"
+ [(set (match_operand:SISFVM 0 "nonimmediate_operand"
+        "=a,b, a, b, ab, ab,a,b,ab,a,?a, b,?b, Q, R, R, Q")
+       (match_operand:SISFVM 1 "general_operand"
+         "a,b,?b,?a,Is5,IsB,S0,S0,Si,Q, R, R, Q, a,?a, b,?b"))]
+  "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG
+    || (GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))))"
+ "@
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmv\\t%$\\t%1, %0
+  %|%.\\tmvk\\t%$\\t%1, %0
+  %|%.\\tmvk\\t%$\\t%1, %0
+  %|%.\\tadda%D1\\t%$\\tB14, %1, %0
+  %|%.\\tadda%D1\\t%$\\tB14, %1, %0
+  #
+  %|%.\\tldw\\t%$\\t%1, %0
+  %|%.\\tldw\\t%$\\t%1, %0
+  %|%.\\tldw\\t%$\\t%1, %0
+  %|%.\\tldw\\t%$\\t%1, %0
+  %|%.\\tstw\\t%$\\t%1, %0
+  %|%.\\tstw\\t%$\\t%1, %0
+  %|%.\\tstw\\t%$\\t%1, %0
+  %|%.\\tstw\\t%$\\t%1, %0"
+  [(set_attr "type" "*,*,*,*,*,*,*,*,*,load,load,load,load,store,store,store,store")
+   (set_attr "units62" "dls,dls,ls,ls,s,s,d,d,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "units64" "dls,dls,ls,ls,dl,s,d,d,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "op_pattern" "sx,sx,sx,sx,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "addr_regfile" "*,*,*,*,*,*,*,*,*,a,b,b,a,a,b,b,a")
+   (set_attr "dest_regfile" "*,*,*,*,*,*,*,*,*,a,a,b,b,a,a,b,b")
+   (set_attr "cross" "n,n,y,y,n,n,y,n,*,n,y,n,y,n,y,n,y")
+   (set_attr "predicable" "yes,yes,yes,yes,yes,yes,no,no,yes,yes,yes,yes,yes,yes,yes,yes,yes")])
+
+(define_insn "*mov<mode>_insn"
+  [(set (match_operand:DIDFVM 0 "nonimmediate_operand"
+         "=a,b, a, b,ab,a,?a, b,?b, Q, R, R, Q")
+        (match_operand:DIDFVM 1 "general_operand"
+          "a,b,?b,?a,iF,Q, R, R, Q, a,?a, b,?b"))]
+  "(!MEM_P (operands[0]) || REG_P (operands[1])
+    || (GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))))"
+{
+  if (MEM_P (operands[1]) && TARGET_LDDW)
+    return "%|%.\\tlddw\\t%$\\t%1, %0";
+  if (MEM_P (operands[0]) && TARGET_STDW)
+    return "%|%.\\tstdw\\t%$\\t%1, %0";
+  if (TARGET_INSNS_64PLUS && REG_P (operands[0]) && REG_P (operands[1])
+      && A_REGNO_P (REGNO (operands[0])) == A_REGNO_P (REGNO (operands[1])))
+    return "%|%.\\tdmv\\t%$\\t%P1, %p1, %0";
+  return "#";
+}
+  [(set_attr "units" "s,s,*,*,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "addr_regfile" "*,*,*,*,*,a,b,b,a,a,b,b,a")
+   (set_attr "dest_regfile" "*,*,*,*,*,a,a,b,b,a,a,b,b")
+   (set_attr "type" "*,*,*,*,*,load,load,load,load,store,store,store,store")
+   (set_attr "cross" "n,n,y,y,*,n,y,n,y,n,y,n,y")])
+
+(define_split
+  [(set (match_operand:DIDFVM 0 "nonimmediate_operand" "")
+  	(match_operand:DIDFVM 1 "general_operand" ""))]
+  "reload_completed
+   && !((MEM_P (operands[0]) && TARGET_STDW)
+ 	|| (MEM_P (operands[1]) && TARGET_LDDW))
+   && !const_vector_operand (operands[1], <MODE>mode)
+   && !(TARGET_INSNS_64PLUS && REG_P (operands[0]) && REG_P (operands[1])
+	&& A_REGNO_P (REGNO (operands[0])) == A_REGNO_P (REGNO (operands[1])))"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx lo_half[2], hi_half[2];
+  split_di (operands, 2, lo_half, hi_half);
+
+  /* We can't have overlap for a register-register move, but if
+     memory is involved, we have to make sure we don't clobber the
+     address.  */
+  if (reg_overlap_mentioned_p (lo_half[0], hi_half[1]))
+    {
+      operands[2] = hi_half[0];
+      operands[3] = hi_half[1];
+      operands[4] = lo_half[0];
+      operands[5] = lo_half[1];
+    }
+  else
+    {
+      operands[2] = lo_half[0];
+      operands[3] = lo_half[1];
+      operands[4] = hi_half[0];
+      operands[5] = hi_half[1];
+    }
+})
+
+(define_insn "real_load<mode>"
+  [(unspec [(match_operand 0 "const_int_operand" "JA,JA,JB,JB")
+	    (match_operand:M32 1 "memory_operand" "Q,R,R,Q")]
+	   UNSPEC_REAL_LOAD)]
+  ""
+  "%|%.\\tld<LDST_SUFFIX>\\t%$\\t%1, %k0"
+  [(set_attr "type" "load")
+   (set_attr "units" "d_addr")
+   (set_attr "addr_regfile" "a,b,b,a")
+   (set_attr "dest_regfile" "a,a,b,b")
+   (set_attr "cross" "n,y,n,y")])
+
+(define_insn "real_load<mode>"
+  [(unspec [(match_operand 0 "const_int_operand" "JA,JA,JB,JB")
+	    (match_operand:DIDFVM 1 "memory_operand" "Q,R,R,Q")]
+	   UNSPEC_REAL_LOAD)]
+  "TARGET_LDDW"
+  "%|%.\\tlddw\\t%$\\t%1, %K0"
+  [(set_attr "type" "load")
+   (set_attr "units" "d_addr")
+   (set_attr "addr_regfile" "a,b,b,a")
+   (set_attr "dest_regfile" "a,a,b,b")
+   (set_attr "cross" "n,y,n,y")])
+
+(define_insn "load_shadow"
+  [(set (match_operand 0 "register_operand" "=ab")
+ 	(unspec [(pc)] UNSPEC_LOAD_SHADOW))]
+  ""
+  ";; load to %0 occurs"
+  [(set_attr "type" "load_shadow")])
+
+(define_insn "mult_shadow"
+  [(set (match_operand 0 "register_operand" "=ab")
+ 	(unspec [(pc)] UNSPEC_MULT_SHADOW))]
+  ""
+  ";; multiplication occurs and stores to %0"
+  [(set_attr "type" "mult_shadow")])
+
+
+(define_mode_iterator MOV [QI HI SI SF DI DF V2HI V4QI V2SI V4HI V8QI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:MOV 0 "nonimmediate_operand" "")
+	(match_operand:MOV 1 "general_operand" ""))]
+  ""
+{
+  if (expand_move (operands, <MODE>mode))
+    DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SIDIVM 0 "nonimmediate_operand"	      "")
+	(unspec:SIDIVM [(match_operand:SIDIVM 1 "nonimmediate_operand" "")]
+		       UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_INSNS_64"
+{
+  if (memory_operand (operands[0], <MODE>mode))
+    {
+      emit_insn (gen_movmisalign<mode>_store (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn_and_split "movmisalign<mode>_store"
+  [(set (match_operand:SIDIVM 0 "memory_operand" "=W,Q,T,Q,T")
+	(unspec:SIDIVM [(match_operand:SIDIVM 1 "register_operand" "r,a,b,b,a")]
+		       UNSPEC_MISALIGNED_ACCESS))
+   (clobber (match_scratch:SI 2 "=r,X,X,X,X"))]
+  "TARGET_INSNS_64"
+  "@
+   #
+   %|%.\\tstn<LDST_SUFFIX>\\t%$\\t%1, %0
+   %|%.\\tstn<LDST_SUFFIX>\\t%$\\t%1, %0
+   %|%.\\tstn<LDST_SUFFIX>\\t%$\\t%1, %0
+   %|%.\\tstn<LDST_SUFFIX>\\t%$\\t%1, %0"
+  "&& reload_completed && satisfies_constraint_W (operands[0])"
+  [(parallel
+    [(set (match_dup 3) (unspec:SIDIVM [(match_dup 1)] UNSPEC_MISALIGNED_ACCESS))
+     (clobber (match_dup 4))])]
+{
+  rtx addr = XEXP (operands[0], 0);
+  rtx tmpreg = operands[2];
+
+  if (GET_CODE (addr) == PLUS && XEXP (addr, 0) == stack_pointer_rtx
+      && GET_CODE (XEXP (addr, 1)) == CONST_INT)
+    {
+      unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
+      val &= GET_MODE_SIZE (<MODE>mode) - 1;
+      if (val == 0)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+    }
+  operands[3] = change_address (operands[0], <MODE>mode, tmpreg);
+  emit_move_insn (tmpreg, addr);
+  operands[4] = gen_rtx_SCRATCH (SImode);
+}
+  [(set_attr "type" "storen")
+   (set_attr "units" "d_addr")
+   (set_attr "addr_regfile" "*,a,b,a,b")
+   (set_attr "dest_regfile" "*,a,b,b,a")
+   (set_attr "cross" "*,n,n,y,y")])
+
+(define_insn_and_split "movmisalign<mode>_load"
+  [(set (match_operand:SIDIVM 0 "register_operand" "=ab,a,b,b,a")
+	(unspec:SIDIVM [(match_operand:SIDIVM 1 "memory_operand" "W,Q,T,Q,T")]
+		       UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_INSNS_64"
+  "@
+   #
+   %|%.\\tldn<LDST_SUFFIX>\\t%$\\t%1, %0
+   %|%.\\tldn<LDST_SUFFIX>\\t%$\\t%1, %0
+   %|%.\\tldn<LDST_SUFFIX>\\t%$\\t%1, %0
+   %|%.\\tldn<LDST_SUFFIX>\\t%$\\t%1, %0"
+  "&& reload_completed && satisfies_constraint_W (operands[1])"
+  [(set (match_dup 0) (unspec:SIDIVM [(match_dup 2)] UNSPEC_MISALIGNED_ACCESS))]
+{
+  rtx addr = XEXP (operands[1], 0);
+  rtx tmpreg = (GET_MODE (operands[0]) == SImode ? operands[0]
+		: operand_subword_force (operands[0], 0, DImode));
+
+  if (GET_CODE (addr) == PLUS && XEXP (addr, 0) == stack_pointer_rtx
+      && GET_CODE (XEXP (addr, 1)) == CONST_INT)
+    {
+      unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
+      val &= GET_MODE_SIZE (<MODE>mode) - 1;
+      if (val == 0)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+    }
+  operands[2] = change_address (operands[1], <MODE>mode, tmpreg);
+  emit_move_insn (tmpreg, addr);
+}
+  [(set_attr "type" "loadn")
+   (set_attr "units" "d_addr")
+   (set_attr "addr_regfile" "*,a,b,a,b")
+   (set_attr "dest_regfile" "*,a,b,b,a")
+   (set_attr "cross" "*,n,n,y,y")])
+
+;;
+
+;; -------------------------------------------------------------------------
+;; Extensions/extractions
+;; -------------------------------------------------------------------------
+
+(define_code_iterator any_extract [zero_extract sign_extract])
+(define_code_iterator any_ext [zero_extend sign_extend])
+
+(define_code_attr ext_name [(zero_extend "zero_extend") (sign_extend "sign_extend")])
+
+(define_code_attr u [(zero_extend "u") (sign_extend "")])
+
+(define_code_attr z [(zero_extract "z") (sign_extract "")])
+(define_code_attr zu [(zero_extract "u") (sign_extract "")])
+
+(define_mode_attr ext_shift [(QI "24") (HI "16")])
+
+(define_insn "<ext_name><mode>si2"
+ [(set (match_operand:SI 0 "register_operand" "=a,b,a,?a, b,?b")
+       (any_ext:SI (match_operand:QIHIM 1 "nonimmediate_operand" "a,b,Q, R, R, Q")))]
+  ""
+ "@
+  %|%.\\text<u>\\t%$\\t%1, <ext_shift>, <ext_shift>, %0
+  %|%.\\text<u>\\t%$\\t%1, <ext_shift>, <ext_shift>, %0
+  %|%.\\tld<LDST_SUFFIX><u>\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX><u>\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX><u>\\t%$\\t%1, %0
+  %|%.\\tld<LDST_SUFFIX><u>\\t%$\\t%1, %0"
+  [(set_attr "type" "*,*,load,load,load,load")
+   (set_attr "units" "s,s,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "addr_regfile" "*,*,a,b,b,a")
+   (set_attr "dest_regfile" "*,*,a,a,b,b")
+   (set_attr "cross" "n,n,n,y,n,y")])
+
+(define_insn "*ext<z>v_const"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a,b")
+	(any_extract:SI (match_operand:SI 1 "register_operand" "a,b")
+			(match_operand:SI 2 "const_int_operand" "n,n")
+			(match_operand:SI 3 "const_int_operand" "n,n")))]
+  "INTVAL (operands[3]) >= 0
+   && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32"
+{
+  int pos = INTVAL (operands[3]);
+  int len = INTVAL (operands[2]);
+  rtx xop[4];
+  xop[0] = operands[0];
+  xop[1] = operands[1];
+  xop[2] = GEN_INT (32 - pos - len);
+  xop[3] = GEN_INT (32 - len);
+
+  output_asm_insn ("%|%.\\text<zu>\\t%$\\t%1, %2, %3, %0", xop);
+  return "";
+}
+  [(set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_expand "ext<z>v"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(any_extract:SI (match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")
+			(match_operand:SI 3 "const_int_operand" "")))]
+  ""
+{
+   if (INTVAL (operands[2]) < 0
+       || INTVAL (operands[2]) + INTVAL (operands[3]) > 32)
+     FAIL;
+})
+
+(define_insn "real_<ext_name><mode>"
+  [(unspec [(match_operand 0 "const_int_operand" "JA,JA,JB,JB")
+	    (any_ext:SI (match_operand:QIHIM 1 "memory_operand" "Q,R,R,Q"))]
+	   UNSPEC_REAL_LOAD)]
+  ""
+  "%|%.\\tld<LDST_SUFFIX><u>\\t%$\\t%1, %k0"
+  [(set_attr "type" "load")
+   (set_attr "units" "d_addr")
+   (set_attr "addr_regfile" "a,b,b,a")
+   (set_attr "dest_regfile" "a,a,b,b")
+   (set_attr "cross" "n,y,n,y")])
+
+(define_insn "clrr"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0,0,0,0")
+		    (match_operand:SI 2 "register_operand" "a,b,?b,?a")
+		    (match_operand:SI 3 "reg_or_const_int_operand" "ai,bi,a,b")]
+		   UNSPEC_CLR))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx xops[4];
+      int v1 = INTVAL (operands[2]);
+      int v2 = (v1 >> 5) & 0x1f;
+      v1 &= 0x1f;
+      xops[0] = operands[0];
+      xops[1] = operands[1];
+      xops[2] = GEN_INT (v1);
+      xops[3] = GEN_INT (v2);
+      output_asm_insn ("%|%.\\tclr\\t%$\\t%1, %3, %2, %0", xops);
+      return "";
+    }
+  return "%|%.\\tclr\\t%$\\t%2, %3, %0";
+}
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "extr"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "a,b,?b,?a")
+		    (match_operand:SI 2 "reg_or_const_int_operand" "ai,bi,a,b")]
+		   UNSPEC_EXT))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx xops[4];
+      int v1 = INTVAL (operands[2]);
+      int v2 = (v1 >> 5) & 0x1f;
+      v1 &= 0x1f;
+      xops[0] = operands[0];
+      xops[1] = operands[1];
+      xops[2] = GEN_INT (v1);
+      xops[3] = GEN_INT (v2);
+      output_asm_insn ("%|%.\\text\\t%$\\t%1, %3, %2, %0", xops);
+      return "";
+    }
+  return "%|%.\\text\\t%$\\t%1, %2, %0";
+}
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "extru"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "a,b,?b,?a")
+		    (match_operand:SI 2 "reg_or_const_int_operand" "ai,bi,a,b")]
+		   UNSPEC_EXTU))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx xops[4];
+      int v1 = INTVAL (operands[2]);
+      int v2 = (v1 >> 5) & 0x1f;
+      v1 &= 0x1f;
+      xops[0] = operands[0];
+      xops[1] = operands[1];
+      xops[2] = GEN_INT (v1);
+      xops[3] = GEN_INT (v2);
+      output_asm_insn ("%|%.\\textu\\t%$\\t%1, %3, %2, %0", xops);
+      return "";
+    }
+  return "%|%.\\textu\\t%$\\t%1, %2, %0";
+}
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,y,n,y")])
+
+;; -------------------------------------------------------------------------
+;; Compare instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "scmpsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=ab,a,b,a,b")
+	(match_operator:SI 1 "eqltgt_operator"
+	   [(match_operand:SI 2 "register_operand" "ab,a,b,?b,?a")
+	    (match_operand:SI 3 "reg_or_scst5_operand" "Is5,aIs5,bIs5,aIs5,bIs5")]))]
+  ""
+  "%|%.\\tcmp%C1\\t%$\\t%3, %2, %0"
+  [(set_attr "units" "l")
+   (set (attr "cross")
+	(symbol_ref "CROSS_OPERANDS (operands[0], operands[2])"))])
+
+(define_insn "*ucmpsi_insn_64"
+  [(set (match_operand:SI 0 "register_operand" "=ab,a,b,a,b")
+	(match_operator:SI 1 "ltugtu_operator"
+	   [(match_operand:SI 2 "register_operand" "ab,a,b,?b,?a")
+	    (match_operand:SI 3 "reg_or_ucst5_operand" "Iu5,aIu5,bIu5,aIu5,bIu5")]))]
+  "TARGET_INSNS_64"
+  "%|%.\\tcmp%C1\\t%$\\t%3, %2, %0"
+  [(set_attr "units" "l")
+   (set (attr "cross")
+	(symbol_ref "CROSS_OPERANDS (operands[0], operands[2])"))])
+
+(define_insn "*ucmpsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=ab,a,b,a,b")
+	(match_operator:SI 1 "ltugtu_operator"
+	   [(match_operand:SI 2 "register_operand" "ab,a,b,?b,?a")
+	    (match_operand:SI 3 "reg_or_ucst4_operand" "Iu4,aIu4,bIu4,aIu4,bIu4")]))]
+  "!TARGET_INSNS_64"
+  "%|%.\\tcmp%C1\\t%$\\t%3, %2, %0"
+  [(set_attr "units" "l")
+   (set (attr "cross")
+	(symbol_ref "CROSS_OPERANDS (operands[0], operands[2])"))])
+
+(define_code_iterator andior_eqne [eq ne])
+(define_code_attr andior_name [(eq "and") (ne "ior")])
+(define_code_attr andior_condmod [(eq "") (ne "!")])
+
+(define_insn "*scmpsi_<andior_name>_insn"
+  [(set (match_operand:SI 0 "register_operand" "=A,B,A,B")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "eqltgt_operator"
+	  [(match_operand:SI 2 "register_operand" "a,b,?b,?a")
+	   (match_operand:SI 3 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5")])))]
+  ""
+  "%|[<andior_condmod>%4]\\tcmp%C1\\t%$\\t%3, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")
+   (set_attr "predicable" "no")])
+
+(define_insn "*ucmpsi_<andior_name>_insn_64"
+  [(set (match_operand:SI 0 "register_operand" "=A,B,A,B")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "ltugtu_operator"
+	  [(match_operand:SI 2 "register_operand" "a,b,?b,?a")
+	   (match_operand:SI 3 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")])))]
+  "TARGET_INSNS_64"
+  "%|[<andior_condmod>%4]\\tcmp%C1\\t%$\\t%3, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")
+   (set_attr "predicable" "no")])
+
+(define_insn "*ucmpsi_<andior_name>_insn"
+  [(set (match_operand:SI 0 "register_operand" "=A,B,A,B")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "ltugtu_operator"
+	  [(match_operand:SI 2 "register_operand" "a,b,?b,?a")
+	   (match_operand:SI 3 "reg_or_ucst4_operand" "aIu4,bIu4,aIu4,bIu4")])))]
+  "!TARGET_INSNS_64"
+  "%|[<andior_condmod>%4]\\tcmp%C1\\t%$\\t%3, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")
+   (set_attr "predicable" "no")])
+
+(define_expand "cmpsi_<andior_name>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "c6x_comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "reg_or_const_int_operand" "")])))]
+  ""
+{
+  if (c6x_force_op_for_comparison_p (GET_CODE (operands[1]), operands[3]))
+    operands[3] = force_reg (SImode, operands[3]);
+})
+
+(define_insn "*cmpsf_insn"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(match_operator:SI 1 "eqltgt_operator"
+	   [(match_operand:SF 2 "register_operand" "a,b,a,b")
+	    (match_operand:SF 3 "register_operand" "a,b,?b,?a")]))]
+  "TARGET_FP"
+  "%|%.\\tcmp%c1sp\\t%$\\t%2, %3, %0"
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "*cmpdf_insn"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(match_operator:SI 1 "eqltgt_operator"
+	   [(match_operand:DF 2 "register_operand" "a,b,a,b")
+	    (match_operand:DF 3 "register_operand" "a,b,?b,?a")]))]
+  "TARGET_FP"
+  "%|%.\\tcmp%c1dp\\t%$\\t%2, %3, %0"
+  [(set_attr "type" "cmpdp")
+   (set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_expand "cmp<mode>_<andior_name>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "eqltgt_operator"
+	   [(match_operand:SFDFM 2 "register_operand" "")
+	    (match_operand:SFDFM 3 "register_operand" "")])))]
+  "TARGET_FP")
+
+(define_insn "*cmpsf_<andior_name>_insn"
+  [(set (match_operand:SI 0 "register_operand" "=A,B,A,B")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "eqltgt_operator"
+	   [(match_operand:SF 2 "register_operand" "a,b,a,b")
+	    (match_operand:SF 3 "register_operand" "a,b,?b,?a")])))]
+  "TARGET_FP"
+  "%|[<andior_condmod>%4]\\tcmp%c1sp\\t%$\\t%2, %3, %0"
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")
+   (set_attr "predicable" "no")])
+
+;; reload_reg_class_lower will ensure that two-word reloads are allocated first,
+;; which could exhaust the predicate registers if we used just "a" and "b"
+;; constraints on operands 2 and 3.
+(define_insn "*cmpdf_<andior_name>_insn"
+  [(set (match_operand:SI 0 "register_operand" "=A,B,A,B")
+	(if_then_else:SI
+	 (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0")
+			 (const_int 0))
+	 (match_dup 4)
+	 (match_operator:SI 1 "eqltgt_operator"
+	   [(match_operand:DF 2 "register_operand" "Da,Db,Da,Db")
+	    (match_operand:DF 3 "register_operand" "Da,Db,?Db,?Da")])))]
+  "TARGET_FP"
+  "%|[<andior_condmod>%4]\\tcmp%c1dp\\t%$\\t%2, %3, %0"
+  [(set_attr "type" "cmpdp")
+   (set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand 1 "c6x_any_comparison_operand" "")
+		(match_operand 2 "c6x_any_comparison_operand" "")))]
+  "!reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0)
+	(if_then_else:SI (ne:SI (match_dup 0) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 2)))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand 1 "c6x_any_comparison_operand" "")
+		(match_operand 2 "c6x_any_comparison_operand" "")))]
+  "!reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0)
+	(if_then_else:SI (eq:SI (match_dup 0) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 2)))])
+
+
+;; -------------------------------------------------------------------------
+;; setcc instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:SI 2 "register_operand" "")
+	  (match_operand:SI 3 "reg_or_ucst4_operand" "")]))]
+  ""
+{
+  if (!c6x_comparison_operator (operands[1], SImode))
+    {
+      rtx tmpreg = gen_reg_rtx (SImode);
+      rtx t = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[1])),
+			      SImode, operands[2], operands[3]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmpreg, t));
+      emit_insn (gen_scmpsi_insn (operands[0],
+				  gen_rtx_fmt_ee (EQ, SImode, tmpreg, const0_rtx),
+				  tmpreg, const0_rtx));
+      DONE;
+    }
+})
+
+;; -------------------------------------------------------------------------
+;; Jump instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a,b"))]
+  ""
+  "%|%.\\tb\\t%$\\t%0"
+  [(set_attr "type" "branch")
+   (set_attr "units" "s")
+   (set_attr "cross" "y,n")
+   (set_attr "dest_regfile" "b")])
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "%|%.\\tb\\t%$\\t%l0"
+  [(set_attr "type" "branch")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "any")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "register_operand" ""))
+              (use (label_ref (match_operand 1 "" "")))])]
+  "!flag_pic || !TARGET_INSNS_64"
+{
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "!flag_pic || !TARGET_INSNS_64"
+  "%|\\tb\\t%$\\t%0"
+  [(set_attr "type" "branch")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "b")])
+
+;; Implement switch statements when generating PIC code.  Switches are
+;; implemented by `tablejump' when not using -fpic.
+
+;; Emit code here to do the range checking and make the index zero based.
+;; operand 0 is the index
+;; operand 1 is the lower bound
+;; operand 2 is the range of indices (highest - lowest + 1)
+;; operand 3 is the label that precedes the table itself
+;; operand 4 is the fall through label
+
+(define_expand "casesi"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "const_int_operand" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  "flag_pic && TARGET_INSNS_64"
+{
+  rtx indx;
+  rtx low = operands[1];
+  rtx range = operands[2];
+  rtx table = operands[3];
+  rtx fail = operands[4];
+
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  if (!reg_or_ucst4_operand (range, SImode))
+    range = force_reg (SImode, range);
+
+  /* If low bound is 0, we don't have to subtract it.  */
+  if (INTVAL (operands[1]) == 0)
+    indx = operands[0];
+  else
+    {
+      rtx offset = GEN_INT (-INTVAL (low));
+      indx = gen_reg_rtx (SImode);
+      if (!addsi_operand (offset, SImode))
+        offset = force_reg (SImode, offset);
+      emit_insn (gen_addsi3 (indx, operands[0], offset));
+    }
+  emit_cmp_and_jump_insns (indx, range, GTU, NULL_RTX, SImode, 1, fail);
+
+  emit_jump_insn (gen_casesi_internal (indx, table));
+  DONE;
+})
+
+;; This is the only instance in this file where a pattern emits more than
+;; one instruction.  The concern here is that the addkpc insn could otherwise
+;; be scheduled too far away from the label.  A tablejump always ends an
+;; extended basic block, so it shouldn't happen that the scheduler places
+;; something in the delay slots.
+(define_insn "casesi_internal"
+  [(set (pc)
+	(mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "b")
+				  (const_int 4))
+			 (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:SI 2 "=&b"))
+   (clobber (match_scratch:SI 3 "=b"))]
+  "flag_pic && TARGET_INSNS_64"
+  "addkpc\t.s2\t%l1,%2, 0\n\t\tldw\t.d2t2\t*+%2[%0], %3\n\t\tnop\t\t4\n\t\tadd\t.l2\t%2, %3, %3\n\t\tb\t.s2\t%3"
+  [(set_attr "type" "branch")
+   (set_attr "predicable" "no")
+   (set_attr "dest_regfile" "b")])
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:SIDIM 1 "register_operand" "")
+			(match_operand:SIDIM 2 "reg_or_const_int_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx t = c6x_expand_compare (operands[0], VOIDmode);
+  operands[0] = t;
+  operands[1] = XEXP (t, 0);
+  operands[2] = XEXP (t, 1);
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "c6x_fp_comparison_operator"
+		       [(match_operand:SFDFM 1 "register_operand" "")
+			(match_operand:SFDFM 2 "register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx t = c6x_expand_compare (operands[0], VOIDmode);
+  operands[0] = t;
+  operands[1] = XEXP (t, 0);
+  operands[2] = XEXP (t, 1);
+})
+
+(define_insn "br_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:SI 1 "register_operand" "AB")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "%|[%J0]\\tb\\t%$\\t%l2"
+  [(set_attr "type" "branch")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "any")])
+
+(define_insn "br_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:SI 1 "register_operand" "AB")
+			 (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "%|[%j0]\\tb\\t%$\\t%l2"
+  [(set_attr "type" "branch")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "any")])
+
+(define_expand "return"
+  [(parallel
+    [(return)
+     (use (reg:SI REG_B3))])]
+  "reload_completed && get_frame_size () == 0 && c6x_nsaved_regs () == 0")
+
+;; We can't expand this before we know where the link register is stored.
+(define_insn_and_split "eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "ab")]
+		    UNSPECV_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&ab"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    c6x_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+)
+
+;; -------------------------------------------------------------------------
+;; Doloop
+;; -------------------------------------------------------------------------
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 1 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))
+	      (clobber (match_dup 2))])] ; match_scratch
+  "TARGET_INSNS_64PLUS && optimize"
+{
+  /* The loop optimizer doesn't check the predicates... */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  operands[2] = gen_rtx_SCRATCH (SImode);
+})
+
+(define_insn "mvilc"
+  [(set (reg:SI REG_ILC)
+	(unspec [(match_operand:SI 0 "register_operand" "a,b")] UNSPEC_MVILC))]
+  "TARGET_INSNS_64PLUS"
+  "%|%.\\tmvc\\t%$\\t%0, ILC"
+  [(set_attr "predicable" "no")
+   (set_attr "cross" "y,n")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "b")
+   (set_attr "type" "mvilc")])
+  
+(define_insn "sploop"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")
+		     (reg:SI REG_ILC)]
+		    UNSPECV_SPLOOP)]
+  "TARGET_INSNS_64PLUS"
+  "%|%.\\tsploop\t%0"
+  [(set_attr "predicable" "no")
+   (set_attr "type" "sploop")])
+  
+(define_insn "spkernel"
+  [(set (pc)
+	(if_then_else
+	 (ne (unspec_volatile:SI
+	      [(match_operand:SI 0 "const_int_operand" "i")
+	       (match_operand:SI 1 "const_int_operand" "i")]
+	      UNSPECV_SPKERNEL)
+	     (const_int 1))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_INSNS_64PLUS"
+  "%|%.\\tspkernel\t%0, %1"
+  [(set_attr "predicable" "no")
+   (set_attr "type" "spkernel")])
+  
+(define_insn "loop_end"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "0,0,0,*r")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=AB,*r,m,m")
+	(plus:SI (match_dup 3)
+		 (const_int -1)))
+   (clobber (match_scratch:SI 2 "=X,&AB,&AB,&AB"))]
+  "TARGET_INSNS_64PLUS && optimize"
+  "#"
+  [(set_attr "type" "spkernel")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "memory_operand" "")
+	(plus:SI (match_dup 3)
+		 (const_int -1)))
+   (clobber (match_scratch 2))]
+  ""
+  [(set (match_dup 2) (plus:SI (match_dup 3) (const_int -1)))
+   (set (match_dup 0) (match_dup 2))
+   (set (pc)
+	(if_then_else (ne (match_dup 2) (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+{
+  if (!REG_P (operands[3]))
+    {
+      emit_move_insn (operands[2], operands[3]);
+      operands[3] = operands[2];
+    }
+})
+
+;; -------------------------------------------------------------------------
+;; Delayed-branch real jumps and shadows
+;; -------------------------------------------------------------------------
+
+(define_insn "real_jump"
+  [(unspec [(match_operand 0 "c6x_jump_operand" "a,b,s") (const_int 0)]
+	   UNSPEC_REAL_JUMP)]
+  ""
+{
+  if (GET_CODE (operands[0]) == LABEL_REF)
+    return "%|%.\\tb\\t%$\\t%l0";
+  return "%|%.\\tb\\t%$\\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "has_shadow" "y")
+   (set_attr "units" "s")
+   (set_attr "cross" "y,n,n")
+   (set_attr "dest_regfile" "b,b,any")])
+
+(define_insn "real_call"
+  [(unspec [(match_operand 0 "c6x_call_operand" "a,b,S1") (const_int 1)]
+	   UNSPEC_REAL_JUMP)
+   (clobber (reg:SI REG_B3))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "has_shadow" "y")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "cross" "y,n,n")
+   (set_attr "dest_regfile" "b,b,any")])
+
+(define_insn "real_ret"
+  [(unspec [(match_operand 0 "register_operand" "a,b") (const_int 2)]
+	   UNSPEC_REAL_JUMP)]
+  ""
+  "%|%.\\tret\\t%$\\t%0"
+  [(set_attr "type" "branch")
+   (set_attr "has_shadow" "y")
+   (set_attr "units" "s")
+   (set_attr "cross" "y,n")
+   (set_attr "dest_regfile" "b")])
+
+;; computed_jump_p returns true if it finds a constant; so use one in the
+;; unspec.
+(define_insn "indirect_jump_shadow"
+  [(set (pc) (unspec [(const_int 1)] UNSPEC_JUMP_SHADOW))]
+  ""
+  ";; indirect jump occurs"
+  [(set_attr "type" "shadow")])
+
+;; Operand 0 may be a PARALLEL which isn't handled by output_operand, so
+;; we don't try to print it.
+(define_insn "indirect_call_value_shadow"
+  [(set (match_operand 0 "" "")
+	(call (unspec [(pc)] UNSPEC_JUMP_SHADOW)
+	      (const_int 0)))]
+  ""
+  ";; indirect call occurs, with return value"
+  [(set_attr "type" "shadow")])
+
+(define_insn "indirect_sibcall_shadow"
+  [(call (unspec [(pc)] UNSPEC_JUMP_SHADOW)
+	 (const_int 0))]
+  "SIBLING_CALL_P (insn)"
+  ";; indirect sibcall occurs"
+  [(set_attr "type" "shadow")])
+
+(define_insn "indirect_call_shadow"
+  [(call (unspec [(pc)] UNSPEC_JUMP_SHADOW)
+	 (const_int 0))]
+  ""
+  ";; indirect call occurs"
+  [(set_attr "type" "shadow")])
+
+(define_insn "call_value_shadow"
+  [(set (match_operand 0 "" "")
+	(call (unspec [(match_operand 1 "" "")] UNSPEC_JUMP_SHADOW)
+	      (const_int 0)))]
+  ""
+  ";; call to %1 occurs, with return value"
+  [(set_attr "type" "shadow")])
+
+(define_insn "call_shadow"
+  [(call (unspec [(match_operand 0 "" "")] UNSPEC_JUMP_SHADOW)
+	 (const_int 0))]
+  "!SIBLING_CALL_P (insn)"
+  ";; call to %0 occurs"
+  [(set_attr "type" "shadow")])
+
+(define_insn "sibcall_shadow"
+  [(call (unspec [(match_operand 0 "" "")] UNSPEC_JUMP_SHADOW)
+	 (const_int 0))]
+  "SIBLING_CALL_P (insn)"
+  ";; sibcall to %0 occurs"
+  [(set_attr "type" "shadow")])
+
+(define_insn "jump_shadow"
+  [(set (pc) (unspec [(match_operand 0 "" "")] UNSPEC_JUMP_SHADOW))]
+  ""
+  ";; jump to %0 occurs"
+  [(set_attr "type" "shadow")])
+
+(define_insn "condjump_shadow"
+  [(set (pc)
+	(if_then_else (eq (unspec [(const_int 0)] UNSPEC_JUMP_SHADOW)
+			  (const_int 0))
+		      (match_operand 0 "" "")
+		      (pc)))]
+  ""
+  ";; condjump to %0 occurs"
+  [(set_attr "type" "shadow")])
+
+(define_insn "return_shadow"
+  [(unspec [(const_int 0)] UNSPEC_JUMP_SHADOW)
+   (return)]
+  ""
+  ";; return occurs"
+  [(set_attr "type" "shadow")])
+
+;; -------------------------------------------------------------------------
+;; Add instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand"
+              "=a   ,b   , a, b, a, b,    a,    b, ab,  a,  b,  a,  b,ab")
+    (plus:SI (match_operand:SI 1 "register_operand"
+              "%a   ,b   , a, b, b, a,    b,    a,  0,  a,  b,  z,  z,0")
+  	     (match_operand:SI 2 "addsi_operand"
+               "aIs5,bIs5,?b,?a,?a,?b,?aIs5,?bIs5,I5x,I5x,I5x,Iux,Iux,IsB")))]
+  ""
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+
+      if (c6x_get_unit_specifier (insn) == 'd')
+	{
+	  bool issp = (TARGET_INSNS_64PLUS
+		       && operands[1] == stack_pointer_rtx
+		       && GET_CODE (PATTERN (insn)) != COND_EXEC);
+
+	  if (get_attr_cross (insn) == CROSS_N)
+	    {
+	      if (satisfies_constraint_Iu5 (operands[2]))
+		return "%|%.\\tadd\\t%$\\t%1, %2, %0";
+	      else if (satisfies_constraint_In5 (operands[2]))
+		return "%|%.\\tsub\\t%$\\t%1, %n2, %0";
+	    }
+
+	  if (issp && val > 0 && val < 32768)
+	    {
+	      return "%|%.\\taddab\\t%$\\t%1, %2, %0";
+	    }
+	  if ((val & 1) == 0 && ((val >= -62 && val <= 62)
+				 || (issp && val > 0 && val < 65536)))
+	    {
+	      if (val < 0)
+		return "%|%.\\tsubah\\t%$\\t%1, %r2, %0";
+	      else
+		return "%|%.\\taddah\\t%$\\t%1, %r2, %0";
+	    }
+	  else if ((val & 3) == 0 && ((val >= -124 && val <= 124)
+				       || (issp && val > 0 && val < 131072)))
+	    {
+	      if (val < 0)
+		return "%|%.\\tsubaw\\t%$\\t%1, %R2, %0";
+	      else
+		return "%|%.\\taddaw\\t%$\\t%1, %R2, %0";
+	    }
+	  else if ((val & 7) == 0 && val > 0 && val <= 248)
+	    {
+	      rtx xop[3];
+	      xop[0] = operands[0];
+	      xop[1] = operands[1];
+	      xop[2] = GEN_INT (val >> 3);
+	      output_asm_insn ("%|%.\\taddad\\t%$\\t%1, %2, %0", xop);
+	      return "";
+	    }
+	}
+      else
+        {
+	  if (satisfies_constraint_Is5 (operands[2]))
+	    return "%|%.\\tadd\\t%$\\t%2, %1, %0";
+	}
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "%|%.\\taddk\\t%$\\t%2, %0";
+    }
+  if (which_alternative == 4 || which_alternative == 5)
+    return "%|%.\\tadd\\t%$\\t%2, %1, %0";
+  else
+    return "%|%.\\tadd\\t%$\\t%1, %2, %0";
+}
+  [(set_attr "units62" "dls,dls,ls,ls,ls,ls,ls,ls,s,d,d,*,*,s")
+   (set_attr "units67" "dls,dls,ls,ls,ls,ls,ls,ls,ds,d,d,*,*,s")
+   (set_attr "units64" "dls,dls,dls,dls,dls,dls,ls,ls,ds,d,d,d,d,s")
+   (set_attr "cross" "n,n,y,y,y,y,y,y,n,n,n,y,n,n")
+   (set_attr "predicable" "yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,no,no,yes")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b")
+	(minus:SI (match_operand:SI 1 "reg_or_scst5_operand" "a,b,aIs5,bIs5,bIs5,aIs5")
+		  (match_operand:SI 2 "register_operand" "a,b,a,b,?a,?b")))]
+  ""
+  "%|%.\\tsub\\t%$\\t%1, %2, %0"
+  [(set_attr "units62" "dls,dls,ls,ls,l,l")
+   (set_attr "units64" "dls,dls,ls,ls,ls,ls")
+   (set_attr "cross" "n,n,n,n,y,y")])
+
+(define_insn "*addshiftsi"
+  [(set (match_operand:SI 0 "register_operand" "=a,b")
+	(plus:SI (mult:SI (match_operand:SI 2 "register_operand" "a,b")
+			  (match_operand:SI 3 "adda_scale_operand" "n,n"))
+		 (match_operand:SI 1 "register_operand" "a,b")))]
+  ""
+  "%|%.\\tadda%d3\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "d")])
+
+(define_insn "*subshiftsi"
+  [(set (match_operand:SI 0 "register_operand" "=a,b")
+	(minus:SI (match_operand:SI 1 "register_operand" "a,b")
+		  (mult:SI (match_operand:SI 2 "register_operand" "a,b")
+			   (match_operand:SI 3 "suba_scale_operand" "n,n"))))]
+  ""
+  "%|%.\\tsuba%d3\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "d")])
+
+(define_insn "addsidi3_widen"
+  [(set (match_operand:DI 0 "register_operand" "=a,b,a,b")
+	(plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%a,b,a,b"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "a,b,?b,?a"))))]
+  ""
+  "%|%.\\taddu\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))]
+  ""
+{
+  rtx tmp;
+  rtx lo_half[3], hi_half[3];
+  split_di (operands + 1, 2, lo_half + 1, hi_half + 1);
+  if (reg_overlap_mentioned_p (operands[0], hi_half[1])
+      || reg_overlap_mentioned_p (operands[0], hi_half[2]))
+    tmp = gen_reg_rtx (DImode);
+  else
+    tmp = operands[0];
+  split_di (&tmp, 1, lo_half, hi_half);
+  emit_insn (gen_addsidi3_widen (tmp, lo_half[1], lo_half[2]));
+  emit_insn (gen_addsi3 (hi_half[0], copy_rtx (hi_half[0]), hi_half[1]));
+  emit_insn (gen_addsi3 (copy_rtx (hi_half[0]),
+			 copy_rtx (hi_half[0]), hi_half[2]));
+  if (tmp != operands[0])
+    emit_move_insn (operands[0], tmp);
+  DONE;
+})
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=a,b,a,b")
+	(plus:SF (match_operand:SF 1 "register_operand" "%a,b,a,b")
+		 (match_operand:SF 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\taddsp\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "fp4")
+   (set_attr "units67" "l")
+   (set_attr "units67p" "ls")
+   (set_attr "units674" "ls")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,b,a,b")
+	(plus:DF (match_operand:DF 1 "register_operand" "%a,b,a,b")
+		 (match_operand:DF 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tadddp\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "adddp")
+   (set_attr "units67" "l")
+   (set_attr "units67p" "ls")
+   (set_attr "units674" "ls")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=a,b, a, b, a, b")
+	(minus:SF (match_operand:SF 1 "register_operand" "a,b, b, a, a, b")
+		  (match_operand:SF 2 "register_operand" "a,b,?a,?b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tsubsp\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "fp4")
+   (set_attr "units67" "l")
+   (set_attr "units67p" "ls")
+   (set_attr "units674" "ls")
+   (set_attr "cross" "n,n,y,y,y,y")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,b, a, b, a, b")
+	(minus:DF (match_operand:DF 1 "register_operand" "a,b, b, a, a, b")
+		  (match_operand:DF 2 "register_operand" "a,b,?a,?b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tsubdp\\t%$\\t%1, %2, %0"
+  [(set_attr "type" "adddp")
+   (set_attr "units67" "l")
+   (set_attr "units67p" "ls")
+   (set_attr "units674" "ls")
+   (set_attr "cross" "n,n,y,y,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Logical instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b")
+	(and:SI (match_operand:SI 1 "register_operand" "%a,b,b,a,a,b")
+		(match_operand:SI 2 "andsi_operand" "aIs5,bIs5,?aIs5,?bIs5,aJc,bJc")))]
+  ""
+{
+  if (which_alternative < 4)
+    return "%|%.\\tand\\t%$\\t%2, %1, %0";
+  else
+    return "%|%.\\tclr\\t%$\\t%1, %f2, %F2, %0";
+}
+  [(set_attr "units62" "ls,ls,ls,ls,s,s")
+   (set_attr "units64" "dls,dls,dls,dls,s,s")
+   (set_attr "cross" "n,n,y,y,n,n")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b")
+	(ior:SI (match_operand:SI 1 "register_operand" "%a,b,b,a,a,b")
+		(match_operand:SI 2 "iorsi_operand" "aIs5,bIs5,?aIs5,?bIs5,aJs,bJs")))]
+  ""
+{
+  if (which_alternative < 4)
+    return "%|%.\\tor\\t%$\\t%2, %1, %0";
+  else
+    return "%|%.\\tset\\t%$\\t%1, %s2, %S2, %0";
+}
+  [(set_attr "units62" "ls,ls,ls,ls,s,s")
+   (set_attr "units64" "dls,dls,dls,dls,s,s")
+   (set_attr "cross" "n,n,y,y,n,n")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(xor:SI (match_operand:SI 1 "register_operand" "%a,b,b,a")
+		(match_operand:SI 2 "reg_or_scst5_operand" "aIs5,bIs5,?aIs5,?bIs5")))]
+  ""
+  "%|%.\\txor\\t%$\\t%2, %1, %0"
+  [(set_attr "units62" "ls")
+   (set_attr "units64" "dls")
+   (set_attr "cross" "n,n,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Conversions
+;; -------------------------------------------------------------------------
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,b,a,b")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tspdp\\t%$\\t%1,%0"
+  [(set_attr "type" "dp2")
+   (set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=a,b")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "a,b")))]
+  "TARGET_FP"
+  "%|%.\\tdpsp\\t%$\\t%1,%0"
+  [(set_attr "type" "fp4")
+   (set_attr "units" "l")
+   (set_attr "cross" "n")])
+
+;;;; Convert between signed integer types and floating point.
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=a,b,a,b")
+	(float:SF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tintsp\\t%$\\t%1,%0"
+  [(set_attr "type" "fp4")
+   (set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF 0 "register_operand" "=a,b,a,b")
+	(unsigned_float:SF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tintspu\\t%$\\t%1,%0"
+  [(set_attr "type" "fp4")
+   (set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,b,a,b")
+	(float:DF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tintdp\\t%$\\t%1,%0"
+  [(set_attr "type" "intdp")
+   (set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "floatunssidf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,b,a,b")
+	(unsigned_float:DF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tintdpu\\t%$\\t%1,%0"
+  [(set_attr "type" "intdp")
+   (set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(fix:SI (match_operand:SF 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP"
+  "%|%.\\tsptrunc\\t%$\\t%1,%0"
+  [(set_attr "type" "fp4")
+   (set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,b")
+	(fix:SI (match_operand:DF 1 "register_operand" "a,b")))]
+  "TARGET_FP"
+  "%|%.\\tdptrunc\\t%$\\t%1,%0"
+  [(set_attr "type" "fp4")
+   (set_attr "units" "l")
+   (set_attr "cross" "n")])
+
+;; -------------------------------------------------------------------------
+;; Saturating arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "saddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b,a,b")
+	(ss_plus:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a,a,b,?b,?a")
+		    (match_operand:SI 2 "reg_or_const_int_operand" "a,b,a,b,aIs5,bIs5,aIs5,bIs5")))]
+  ""
+  "%|%.\\tsadd\\t%$\\t%2, %1, %0"
+  [(set_attr "units" "ls,ls,ls,ls,l,l,l,l")
+   (set_attr "cross" "n,n,y,y,n,n,y,y")])
+
+(define_insn "ssubsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(ss_minus:SI (match_operand:SI 1 "reg_or_scst5_operand" "aIs5,bIs5,?bIs5,?aIs5")
+		     (match_operand:SI 2 "register_operand" "a,b,a,b")))]
+  ""
+  "%|%.\\tssub\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "subcsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+	(unspec:SI
+	 [(match_operand:SI 1 "register_operand" "a,b,a,b")
+	  (match_operand:SI 2 "register_operand" "a,b,?b,?a")]
+	 UNSPEC_SUBC))]
+  ""
+  "%|%.\\tsubc\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Call instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "call"
+ [(match_operand 0 "" "")]
+ ""
+{
+  c6x_expand_call (NULL_RTX, operands[0], false);
+  DONE;
+})
+
+(define_expand "call_value"
+  [(match_operand 0 "" "")
+   (match_operand 1 "" "")]
+ ""
+{
+  c6x_expand_call (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "sibcall"
+ [(match_operand 0 "" "")]
+ ""
+{
+  c6x_expand_call (NULL_RTX, operands[0], true);
+  cfun->machine->contains_sibcall = true;
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(match_operand 0 "" "")
+   (match_operand 1 "" "")]
+ ""
+{
+  c6x_expand_call (operands[0], operands[1], true);
+  cfun->machine->contains_sibcall = true;
+  DONE;
+})
+
+(define_insn "call_internal"
+  [(call (mem (match_operand:SI 0 "c6x_call_operand" "S1,a,b"))
+	 (const_int 0))]
+  "!SIBLING_CALL_P (insn)"
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "any,b,b")
+   (set_attr "cross" "n,y,n")])
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "" "")
+	(call (mem (match_operand:SI 1 "c6x_call_operand" "S1,a,b"))
+	      (const_int 0)))]
+  ""
+  "%|%.\\tcall\\t%$\\t%1"
+  [(set_attr "type" "call")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "any,b,b")
+   (set_attr "cross" "n,y,n")])
+
+(define_insn "sibcall_internal"
+  [(call (mem (match_operand:SI 0 "c6x_call_operand" "S1,C"))
+	 (const_int 0))]
+  "SIBLING_CALL_P (insn)"
+  "%|%.\\tb\\t%$\\t%0"
+  [(set_attr "type" "branch")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "any,b")])
+
+(define_insn "callp"
+  [(call (mem (match_operand:SI 0 "c6x_call_operand" "S1"))
+	 (const_int 0))
+   (unspec [(const_int 6)] UNSPEC_NOP)]
+  "!SIBLING_CALL_P (insn)"
+  "%|%.\\tcallp\\t%$\\t%0, B3"
+  [(set_attr "type" "callp")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "b")
+   (set_attr "cross" "n")])
+
+(define_insn "callp_value"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(call (mem (match_operand:SI 1 "c6x_call_operand" "S1"))
+	      (const_int 0)))
+   (unspec [(const_int 6)] UNSPEC_NOP)]
+  "!SIBLING_CALL_P (insn)"
+  "%|%.\\tcallp\\t%$\\t%1, B3"
+  [(set_attr "type" "callp")
+   (set_attr "predicable" "no")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "b")
+   (set_attr "cross" "n")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" "b"))]
+  "reload_completed"
+  "%|%.\\tret\\t%$\\t%0"
+  [(set_attr "type" "branch")
+   (set_attr "units" "s")
+   (set_attr "dest_regfile" "b")])
+
+(define_insn "addkpc"
+  [(set (match_operand:SI 0 "register_operand" "=b")
+	(unspec:SI [(match_operand 1 "" "")] UNSPEC_ADDKPC))
+   (unspec [(match_operand 2 "const_int_operand" "n")] UNSPEC_NOP)]
+  "TARGET_INSNS_64"
+  "%|%.\\taddkpc\\t%$\\t%l1, %0, %2"
+  [(set_attr "units" "s")
+   (set_attr "dest_regfile" "b")])
+
+;; -------------------------------------------------------------------------
+;; Unary operations
+;; -------------------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a, a, b, b")
+        (neg:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))]
+  ""
+  "%|%.\\tneg\\t%$\\t%1, %0"
+  [(set_attr "units" "ls")
+   (set_attr "cross" "n,y,n,y")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a, a, b, b")
+	(not:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))]
+  ""
+  "%|%.\\tnot\\t%$\\t%1, %0"
+  [(set_attr "units" "ls")
+   (set_attr "cross" "n,y,n,y")])
+
+(define_insn "clrsbsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a, a, b, b")
+	(clrsb:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))]
+  ""
+  "%|%.\\tnorm\\t%$\\t%1, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,y,n,y")])
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a, a, b, b")
+	(clz:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))]
+  ""
+  "%|%.\\tlmbd\\t%$\\t1, %1, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,y,n,y")])
+
+;; bitrevsi2 is defined in c6x-mult.md.in.
+
+(define_expand "ctzsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ctz:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_INSNS_64"
+{
+  rtx tmpreg = gen_reg_rtx (SImode);
+  emit_insn (gen_bitrevsi2 (tmpreg, operands[1]));
+  emit_insn (gen_clzsi2 (operands[0], tmpreg));
+  DONE;
+})
+
+(define_expand "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ctz:DI (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_INSNS_64"
+{
+  rtx tmpreg = gen_reg_rtx (DImode);
+  rtx out;
+  emit_insn (gen_bitrevsi2 (gen_highpart (SImode, tmpreg),
+			    gen_lowpart (SImode, operands[1])));
+  emit_insn (gen_bitrevsi2 (gen_lowpart (SImode, tmpreg),
+			    gen_highpart (SImode, operands[1])));
+  out = expand_unop (DImode, clz_optab, tmpreg, operands[0], 1);
+  if (!rtx_equal_p (out, operands[0]))
+    emit_move_insn (operands[0], out);
+  DONE;
+})
+
+(define_insn "ssabssi2"
+  [(set (match_operand:SI 0 "register_operand" "=a, a, b, b")
+        (ss_abs:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))]
+  ""
+  "%|%.\\tabs\\t%$\\t%1, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,y,n,y")])
+
+;; -------------------------------------------------------------------------
+;; Shift instructions
+;; -------------------------------------------------------------------------
+
+(define_code_iterator any_shift [ss_ashift ashift ashiftrt lshiftrt])
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+(define_code_attr shift_code [(ss_ashift "ss_ashl") (ashift "ashl")
+			      (ashiftrt "ashr") (lshiftrt "lshr")])
+(define_code_attr shift_insn [(ss_ashift "sshl") (ashift "shl")
+			      (ashiftrt "shr") (lshiftrt "shru")])
+
+(define_insn "<shift_code>si3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (any_shift:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a")
+		      (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")))]
+  ""
+  "%|%.\\t<shift_insn>\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+;; See c6x-mult.md.in for the rotlsi3 pattern.
+
+(define_insn "rotrdi3_16"
+  [(set (match_operand:DI 0 "register_operand" "=a,b")
+        (rotatert:DI (match_operand:DI 1 "register_operand" "a,b")
+		     (const_int 16)))]
+  "TARGET_INSNS_64PLUS"
+  "%|%.\\tdpackx2\\t%$\\t%P1, %p1, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n")])
+
+(define_insn "shlmbsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,b,a,b")
+        (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a")
+			   (const_int 8))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "a,b,a,b")
+			     (const_int 24))))]
+  "TARGET_INSNS_64"
+  "%|%.\\tshlmb\\t%$\\t%2, %1, %0"
+  [(set_attr "units" "ls")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_INSNS_64"
+{
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 8)
+    {
+      rtx lo0, lo1, hi0, hi1, tmp;
+      lo0 = gen_lowpart (SImode, operands[0]);
+      hi0 = gen_highpart (SImode, operands[0]);
+      lo1 = gen_lowpart (SImode, operands[1]);
+      hi1 = gen_highpart (SImode, operands[1]);
+      if (reg_overlap_mentioned_p (hi0, lo1))
+        tmp = gen_reg_rtx (SImode);
+      else
+        tmp = hi0;
+      emit_insn (gen_shlmbsi3 (tmp, hi1, lo1));
+      emit_insn (gen_ashlsi3 (lo0, lo1, operands[2]));
+      if (tmp != hi0)
+        emit_move_insn (hi0, tmp);
+      DONE;
+    }
+  FAIL;
+})
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (rotatert:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_INSNS_64PLUS"
+{
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 16)
+    {
+      emit_insn (gen_rotrdi3_16 (operands[0], operands[1]));
+      DONE;
+    }
+  FAIL;
+})
+
+(define_insn "bswapv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b")
+        (bswap:V2HI (match_operand:V2HI 1 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tswap4\\t%$\\t%1, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(bswap:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_INSNS_64"
+{
+  rtx tmpreg = gen_reg_rtx (SImode);
+  rtx tmpv2 = gen_lowpart (V2HImode, tmpreg);
+  rtx op0v2 = gen_lowpart (V2HImode, operands[0]);
+  emit_insn (gen_rotlsi3 (tmpreg, operands[1], GEN_INT (16)));
+  emit_insn (gen_bswapv2hi2 (op0v2, tmpv2));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Division
+;; -------------------------------------------------------------------------
+
+(define_insn "divsi3_insn"
+  [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B5))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t__c6xabi_divi"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "divsi3_insn_indcall"
+  [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (use (match_operand:SI 0 "register_operand" "b"))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B5))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "udivsi3_insn"
+  [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t__c6xabi_divu"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "udivsi3_insn_indcall"
+  [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (use (match_operand:SI 0 "register_operand" "b"))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "modsi3_insn"
+  [(set (reg:SI REG_A4) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t__c6xabi_remi"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "modsi3_insn_indcall"
+  [(set (reg:SI REG_A4) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (use (match_operand:SI 0 "register_operand" "b"))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "divmodsi4_insn"
+  [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (set (reg:SI REG_A5) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t__c6xabi_divremi"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "divmodsi4_insn_indcall"
+  [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (set (reg:SI REG_A5) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (use (match_operand:SI 0 "register_operand" "b"))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "umodsi3_insn"
+  [(set (reg:SI REG_A4) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A7))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t__c6xabi_remu"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "umodsi3_insn_indcall"
+  [(set (reg:SI REG_A4) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (use (match_operand:SI 0 "register_operand" "b"))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A7))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "udivmodsi4_insn"
+  [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (set (reg:SI REG_A5) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t__c6xabi_divremu"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "udivmodsi4_insn_indcall"
+  [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (set (reg:SI REG_A5) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4)))
+   (use (match_operand:SI 0 "register_operand" "b"))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "%|%.\\tcall\\t%$\\t%0"
+  [(set_attr "type" "call")
+   (set_attr "dest_regfile" "any")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn_and_split "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(div:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(mod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A4))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B5))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+{
+  rtx reg = NULL_RTX;
+
+  if (TARGET_LONG_CALLS)
+    {
+      if (reload_completed)
+	reg = gen_rtx_REG (SImode, REG_A6);
+      else
+        reg = gen_reg_rtx (SImode);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, REG_A4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, REG_B4), operands[2]);
+  if (find_reg_note (curr_insn, REG_UNUSED, operands[3]))
+    {
+      if (TARGET_LONG_CALLS)
+	{
+	  emit_move_insn (reg, optab_libfunc (sdiv_optab, SImode));
+	  emit_insn (gen_divsi3_insn_indcall (reg));
+	}
+      else
+        emit_insn (gen_divsi3_insn ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4));
+    }
+  else if (find_reg_note (curr_insn, REG_UNUSED, operands[0]))
+    {
+      if (TARGET_LONG_CALLS)
+	{
+	  emit_move_insn (reg, optab_libfunc (smod_optab, SImode));
+	  emit_insn (gen_modsi3_insn_indcall (reg));
+	}
+      else
+        emit_insn (gen_modsi3_insn ());
+      emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A4));
+    }
+  else
+    {
+      if (TARGET_LONG_CALLS)
+	{
+	  emit_move_insn (reg, optab_libfunc (sdivmod_optab, SImode));
+	  emit_insn (gen_divmodsi4_insn_indcall (reg));
+	}
+      else
+        emit_insn (gen_divmodsi4_insn ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4));
+      emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A5));
+    }
+  DONE;
+})
+
+(define_insn_and_split "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(umod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:SI REG_A0))
+   (clobber (reg:SI REG_A1))
+   (clobber (reg:SI REG_A2))
+   (clobber (reg:SI REG_A4))
+   (clobber (reg:SI REG_A5))
+   (clobber (reg:SI REG_A6))
+   (clobber (reg:SI REG_A7))
+   (clobber (reg:SI REG_B0))
+   (clobber (reg:SI REG_B1))
+   (clobber (reg:SI REG_B2))
+   (clobber (reg:SI REG_B3))
+   (clobber (reg:SI REG_B4))
+   (clobber (reg:SI REG_B30))
+   (clobber (reg:SI REG_B31))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+{
+  rtx reg = NULL_RTX;
+
+  if (TARGET_LONG_CALLS)
+    {
+      if (reload_completed)
+	reg = gen_rtx_REG (SImode, REG_A6);
+      else
+        reg = gen_reg_rtx (SImode);
+    }
+
+  emit_move_insn (gen_rtx_REG (SImode, REG_A4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, REG_B4), operands[2]);
+  if (find_reg_note (curr_insn, REG_UNUSED, operands[3]))
+    {
+      if (TARGET_LONG_CALLS)
+	{
+	  emit_move_insn (reg, optab_libfunc (udiv_optab, SImode));
+	  emit_insn (gen_udivsi3_insn_indcall (reg));
+	}
+      else
+        emit_insn (gen_udivsi3_insn ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4));
+    }
+  else if (find_reg_note (curr_insn, REG_UNUSED, operands[0]))
+    {
+      if (TARGET_LONG_CALLS)
+	{
+	  emit_move_insn (reg, optab_libfunc (umod_optab, SImode));
+	  emit_insn (gen_umodsi3_insn_indcall (reg));
+	}
+      else
+        emit_insn (gen_umodsi3_insn ());
+      emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A4));
+    }
+  else
+    {
+      if (TARGET_LONG_CALLS)
+	{
+	  emit_move_insn (reg, optab_libfunc (udivmod_optab, SImode));
+	  emit_insn (gen_udivmodsi4_insn_indcall (reg));
+	}
+      else
+        emit_insn (gen_udivmodsi4_insn ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4));
+      emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A5));
+    }
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Multiplication
+;; See c6x-mult.md.in for define_insn patterns.
+;; -------------------------------------------------------------------------
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+                 (sign_extend:SI (match_operand:HI 2 "reg_or_scst5_operand" ""))))]
+  ""
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      emit_insn (gen_mulhisi3_const (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_expand "usmulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+                 (sign_extend:SI (match_operand:HI 2 "reg_or_scst5_operand" ""))))]
+ ""
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      emit_insn (gen_usmulhisi3_const (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))]
+  ""
+{
+  if (!TARGET_MPY32)
+    {
+      rtx lo1 = gen_lowpart (HImode, operands[1]);
+      rtx lo2 = gen_lowpart (HImode, operands[2]);
+      /*   (N * AH + AL) * (N * BH + BL)
+         = N*(AH * BL + BH * AL) + AL*BL  */
+      rtx tmp1 = gen_reg_rtx (SImode);
+      rtx tmp2 = gen_reg_rtx (SImode);
+      rtx tmp3 = gen_reg_rtx (SImode);
+      emit_insn (gen_umulhisi3 (tmp1, lo1, lo2));
+      emit_insn (gen_umulhisi3_lh (tmp2, lo1, operands[2]));
+      emit_insn (gen_umulhisi3_hl (tmp3, operands[1], lo2));
+      emit_insn (gen_addsi3 (tmp2, tmp2, tmp3));
+      emit_insn (gen_ashlsi3 (tmp2, tmp2, GEN_INT (16)));
+      emit_insn (gen_addsi3 (operands[0], tmp1, tmp2));
+      DONE;
+    }
+})
+
+;; -------------------------------------------------------------------------
+;; Floating point multiplication
+;; -------------------------------------------------------------------------
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=a,b,a,b")
+	(mult:SF (match_operand:SF 1 "register_operand" "%a,b,?a,?b")
+		 (match_operand:SF 2 "register_operand" "a,b,b,a")))]
+  "TARGET_FP"
+  "%|%.\\tmpysp\\t%$\\t%1, %2, %0"
+ [(set_attr "type" "mpy4")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,b")
+	(mult:DF (match_operand:DF 1 "register_operand" "%a,b")
+		 (match_operand:DF 2 "register_operand" "a,b")))]
+  "TARGET_FP"
+  "%|%.\\tmpydp\\t%$\\t%1, %2, %0"
+ [(set_attr "type" "mpydp")
+  (set_attr "units" "m")
+  (set_attr "cross" "n")])
+
+;; Note that mpyspdp and mpysp2dp are available on C67x, despite what the
+;; manual says.
+(define_insn "*muldf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=a,b,a,b")
+	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "a,b,a,b"))
+		 (match_operand:DF 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_FP_EXT"
+  "%|%.\\tmpyspdp\\t%$\\t%1, %2, %0"
+ [(set_attr "type" "mpyspdp")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "*muldf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=a,b,a,b")
+	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "%a,b,a,b"))
+		 (float_extend:DF (match_operand:SF 2 "register_operand" "a,b,?b,?a"))))]
+  "TARGET_FP_EXT"
+  "%|%.\\tmpysp2dp\\t%$\\t%1, %2, %0"
+ [(set_attr "type" "mpysp2dp")
+  (set_attr "units" "m")
+  (set_attr "cross" "n,n,y,y")])
+
+;; -------------------------------------------------------------------------
+;; Floating point division
+;; -------------------------------------------------------------------------
+
+(define_insn "rcpsf2"
+  [(set (match_operand:SF 0 "register_operand" "=a,b,a,b")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "a,b,?b,?a")]
+		   UNSPEC_RCP))]
+  "TARGET_FP"
+  "%|%.\\trcpsp\\t%$\\t%1, %0"
+ [(set_attr "units" "s")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "rcpdf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,b")
+	(unspec:DF [(match_operand:DF 1 "register_operand" "a,b")]
+		   UNSPEC_RCP))]
+  "TARGET_FP"
+  "%|%.\\trcpdp\\t%$\\t%1, %0"
+ [(set_attr "type" "dp2")
+  (set_attr "units" "s")
+  (set_attr "cross" "n")])
+
+(define_expand "divsf3"
+  [(set (match_dup 4)
+	(unspec:SF [(match_operand:SF 2 "register_operand" "")]
+		   UNSPEC_RCP))
+   (set (match_dup 5) (mult:SF (match_dup 2) (match_dup 4)))
+   (set (match_dup 6) (minus:SF (match_dup 3) (match_dup 5)))
+   (set (match_dup 4) (mult:SF (match_dup 4) (match_dup 6)))
+   (set (match_dup 5) (mult:SF (match_dup 2) (match_dup 4)))
+   (set (match_dup 6) (minus:SF (match_dup 3) (match_dup 5)))
+   (set (match_dup 4) (mult:SF (match_dup 4) (match_dup 6)))
+   (set (match_operand:SF 0 "register_operand" "")
+	(mult:SF (match_operand:SF 1 "register_operand")
+		 (match_dup 4)))]
+  "TARGET_FP && flag_reciprocal_math"
+{
+  operands[3] = force_reg (SFmode,
+			   CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode));
+  operands[4] = gen_reg_rtx (SFmode);
+  operands[5] = gen_reg_rtx (SFmode);
+  operands[6] = gen_reg_rtx (SFmode);
+})
+
+(define_expand "divdf3"
+  [(set (match_dup 4)
+	(unspec:DF [(match_operand:DF 2 "register_operand" "")]
+		   UNSPEC_RCP))
+   (set (match_dup 5) (mult:DF (match_dup 2) (match_dup 4)))
+   (set (match_dup 6) (minus:DF (match_dup 3) (match_dup 5)))
+   (set (match_dup 4) (mult:DF (match_dup 4) (match_dup 6)))
+   (set (match_dup 5) (mult:DF (match_dup 2) (match_dup 4)))
+   (set (match_dup 6) (minus:DF (match_dup 3) (match_dup 5)))
+   (set (match_dup 4) (mult:DF (match_dup 4) (match_dup 6)))
+   (set (match_dup 5) (mult:DF (match_dup 2) (match_dup 4)))
+   (set (match_dup 6) (minus:DF (match_dup 3) (match_dup 5)))
+   (set (match_dup 4) (mult:DF (match_dup 4) (match_dup 6)))
+   (set (match_operand:DF 0 "register_operand" "")
+	(mult:DF (match_operand:DF 1 "register_operand")
+		 (match_dup 4)))]
+  "TARGET_FP && flag_reciprocal_math"
+{
+  operands[3] = force_reg (DFmode,
+			   CONST_DOUBLE_FROM_REAL_VALUE (dconst2, DFmode));
+  operands[4] = gen_reg_rtx (DFmode);
+  operands[5] = gen_reg_rtx (DFmode);
+  operands[6] = gen_reg_rtx (DFmode);
+})
+
+;; -------------------------------------------------------------------------
+;; Block moves
+;; -------------------------------------------------------------------------
+
+(define_expand "movmemsi"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:SI 2 "nonmemory_operand" ""))
+   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (match_operand:SI 4 "const_int_operand" ""))
+   (use (match_operand:SI 5 "const_int_operand" ""))]
+  ""
+{
+ if (c6x_expand_movmem (operands[0], operands[1], operands[2], operands[3],
+			operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+;; -------------------------------------------------------------------------
+;; Prologue and epilogue.
+;; -------------------------------------------------------------------------
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "blockage")])
+
+(define_insn "push_rts"
+  [(set (mem:SI (reg:SI REG_SP)) (reg:SI REG_B14))
+   (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -8))) (reg:DI REG_A14))
+   (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -16))) (reg:DI REG_B12))
+   (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -24))) (reg:DI REG_A12))
+   (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -32))) (reg:DI REG_B10))
+   (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -40))) (reg:DI REG_A10))
+   (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -48))) (reg:DI REG_B2))
+   (set (reg:SI REG_SP) (plus:SI (reg:SI REG_SP) (const_int -56)))
+   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
+   (clobber (reg:SI REG_A3))]
+  "TARGET_INSNS_64PLUS"
+  "%|%.\\tcallp\\t%$\\t__c6xabi_push_rts, a3"
+  [(set_attr "type" "callp")
+   (set_attr "dest_regfile" "a")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_insn "pop_rts"
+  [(set (reg:SI REG_B14) (mem:SI (plus:SI (reg:SI REG_SP) (const_int 56))))
+   (set (reg:DI REG_A14) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 48))))
+   (set (reg:DI REG_B12) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 40))))
+   (set (reg:DI REG_A12) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 32))))
+   (set (reg:DI REG_B10) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 24))))
+   (set (reg:DI REG_A10) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 16))))
+   (set (reg:DI REG_B2) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 8))))
+   (set (reg:SI REG_SP) (plus:SI (reg:SI REG_SP) (const_int 56)))
+   (clobber (reg:SI REG_A3))
+   (return)]
+  "TARGET_INSNS_64PLUS"
+  "%|%.\\tretp\\t%$\\t__c6xabi_pop_rts, a3"
+  [(set_attr "type" "callp")
+   (set_attr "dest_regfile" "a")
+   (set_attr "units" "s")
+   (set_attr "cross" "n")])
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "c6x_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(const_int 1)]
+  ""
+  "c6x_expand_epilogue (false); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  c6x_expand_epilogue (true);
+  DONE;
+})
+
+(define_insn "setup_dsbt"
+  [(set (match_operand:SI 0 "pic_register_operand" "+Z")
+	(unspec:SI [(match_dup 0)
+		    (match_operand:SI 1 "symbolic_operand" "")]
+		   UNSPEC_SETUP_DSBT))]
+  "TARGET_DSBT"
+  "%|%.\\tldw\\t%$\\t*+%0($DSBT_index%1), %0"
+  [(set_attr "type" "load")
+   (set_attr "units" "d_addr")
+   (set_attr "dest_regfile" "b")
+   (set_attr "addr_regfile" "b")])
+
+
+;; A dummy use/set to prevent prologue and epiloge overlapping.
+;; This can be caused by sched-ebb in the presence of multiple
+;; exit sequences, and causes the unwinding table generation to explode.
+(define_insn "epilogue_barrier"
+ [(set (match_operand:SI 0 "register_operand" "")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "")]
+		  UNSPEC_EPILOGUE_BARRIER))]
+ ""
+ ""
+ [(set_attr "type" "blockage")])
+
+;; -------------------------------------------------------------------------
+;; Vector insns
+;; -------------------------------------------------------------------------
+
+(define_code_iterator logical [and ior xor])
+(define_code_attr logical_insn [(and "and") (ior "ior") (xor "xor")])
+(define_code_attr logical_opcode [(and "and") (ior "or") (xor "xor")])
+(define_code_iterator plusminus [plus minus])
+(define_code_attr plusminus_insn [(plus "add") (minus "sub")])
+(define_code_iterator ss_plusminus [ss_plus ss_minus])
+(define_code_attr ss_plusminus_insn [(ss_plus "add") (ss_minus "sub")])
+
+;; Vector logical insns
+
+(define_insn "<logical_insn><mode>3"
+  [(set (match_operand:VEC4M 0 "register_operand" "=a,b,a,b")
+	(logical:VEC4M (match_operand:VEC4M 1 "register_operand" "a,b,a,b")
+		      (match_operand:VEC4M 2 "register_operand" "a,b,?b,?a")))]
+  ""
+  "%|%.\\t<logical_opcode>\\t%$\\t%1, %2, %0"
+  [(set_attr "units62" "ls")
+   (set_attr "units64" "dls")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Vector add/subtract
+
+(define_insn "<plusminus_insn>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b")
+	(plusminus:V2HI (match_operand:V2HI 1 "register_operand" "a,b,a,b")
+			(match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))]
+  ""
+  "%|%.\\t<plusminus_insn>2\\t%$\\t%1, %2, %0"
+ [(set_attr "units62" "l")
+  (set_attr "units64" "dls")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "<plusminus_insn>v4qi3"
+  [(set (match_operand:V4QI 0 "register_operand" "=a,b,a,b")
+	(plusminus:V4QI (match_operand:V4QI 1 "register_operand" "a,b,a,b")
+			(match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\t<plusminus_insn>4\\t%$\\t%1, %2, %0"
+ [(set_attr "units" "l")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "ss_addv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b")
+	(ss_plus:V2HI (match_operand:V2HI 1 "register_operand" "a,b,a,b")
+		      (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tsadd2\\t%$\\t%1, %2, %0"
+ [(set_attr "units" "s")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "ss_subv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b")
+	(ss_minus:V2HI (match_operand:V2HI 1 "register_operand" "a,b,a,b")
+		       (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tssub2\\t%$\\t%1, %2, %0"
+ [(set_attr "units" "l")
+  (set_attr "cross" "n,n,y,y")])
+
+(define_insn "us_addv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand" "=a,b,a,b")
+	(ss_plus:V4QI (match_operand:V4QI 1 "register_operand" "a,b,a,b")
+		      (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tsaddu4\\t%$\\t%1, %2, %0"
+ [(set_attr "units" "s")
+  (set_attr "cross" "n,n,y,y")])
+
+;; Vector/scalar min/max
+
+(define_mode_iterator SMINMAX [HI V2HI])
+(define_mode_iterator UMINMAX [QI V4QI])
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:SMINMAX 0 "register_operand" "=a,b,a,b")
+	(smax:SMINMAX (match_operand:SMINMAX 1 "register_operand" "a,b,a,b")
+		      (match_operand:SMINMAX 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tmax2\\t%$\\t%1, %2, %0"
+  [(set_attr "units64" "l")
+   (set_attr "units64p" "ls")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:SMINMAX 0 "register_operand" "=a,b,a,b")
+	(smin:SMINMAX (match_operand:SMINMAX 1 "register_operand" "a,b,a,b")
+		      (match_operand:SMINMAX 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tmin2\\t%$\\t%1, %2, %0"
+  [(set_attr "units64" "l")
+   (set_attr "units64p" "ls")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umax<mode>3"
+  [(set (match_operand:UMINMAX 0 "register_operand" "=a,b,a,b")
+	(umax:UMINMAX (match_operand:UMINMAX 1 "register_operand" "a,b,a,b")
+		      (match_operand:UMINMAX 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tmaxu4\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+(define_insn "umin<mode>3"
+  [(set (match_operand:UMINMAX 0 "register_operand" "=a,b,a,b")
+	(umin:UMINMAX (match_operand:UMINMAX 1 "register_operand" "a,b,a,b")
+		      (match_operand:UMINMAX 2 "register_operand" "a,b,?b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tminu4\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,n,y,y")])
+
+;; Vector shifts
+
+(define_insn "<shift_code>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b")
+        (any_rshift:V2HI (match_operand:V2HI 1 "register_operand" "a,b,?b,?a")
+			 (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")))]
+  "TARGET_INSNS_64"
+  "%|%.\\t<shift_insn>2\\t%$\\t%1, %2, %0"
+  [(set_attr "units" "s")
+   (set_attr "cross" "n,n,y,y")])
+
+;; See c6x-mult.md.in for avg2/avgu4
+
+;; Widening vector multiply and dot product.
+;; See c6x-mult.md.in for the define_insn patterns
+
+(define_expand "sdot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_INSNS_64"
+{
+  rtx t = gen_reg_rtx (SImode);
+  emit_insn (gen_dotv2hi (t, operands[1], operands[2]));
+  emit_insn (gen_addsi3 (operands[0], operands[3], t));
+  DONE;
+})
+
+;; Unary vector operations
+
+(define_insn "ssabsv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=a, a, b, b")
+        (ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "a,?b, b,?a")))]
+  "TARGET_INSNS_64"
+  "%|%.\\tabs2\\t%$\\t%1, %0"
+  [(set_attr "units" "l")
+   (set_attr "cross" "n,y,n,y")])
+
+;; Pack insns
+
+(define_insn "*packv2hi_insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+a,b,a,b,ab")
+			 (const_int 16)
+			 (const_int 16))
+	(match_operand:SI 1 "nonmemory_operand" "a,b,?b,?a,n"))]
+  "TARGET_INSNS_64"
+  "@
+   %|%.\\tpack2\\t%$\\t%1, %0, %0
+   %|%.\\tpack2\\t%$\\t%1, %0, %0
+   %|%.\\tpack2\\t%$\\t%1, %0, %0
+   %|%.\\tpack2\\t%$\\t%1, %0, %0
+   %|%.\\tmvklh\\t%$\\t%1, %0"
+  [(set_attr "units" "ls")
+   (set_attr "cross" "n,n,y,y,n")])
+
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+a,b,a,b"))
+	(match_operand:HI 1 "register_operand" "a,b,?b,?a"))]
+  "TARGET_INSNS_64"
+  "%|%.\\tpackhl2\\t%$\\t%0, %1, %0"
+  [(set_attr "units" "ls")
+   (set_attr "cross" "n,n,y,y")])
+
+(include "c6x-mult.md")
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/c6x/c6x.opt b/gcc-4.9/gcc/config/c6x/c6x.opt
new file mode 100644
index 000000000..1a96f6086
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x.opt
@@ -0,0 +1,67 @@
+; Option definitions for TI C6X.
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+; Contributed by CodeSourcery.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 3, or (at your option)
+; any later version.
+;
+; GCC is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/c6x/c6x-opts.h
+
+SourceInclude
+config/c6x/c6x-opts.h
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Use big-endian byte order
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_ENDIAN, LITTLE_ENDIAN)
+Use little-endian byte order
+
+msim
+Target RejectNegative
+Use simulator runtime
+
+msdata=
+Target RejectNegative Enum(c6x_sdata) Joined Var(c6x_sdata_mode) Init(C6X_SDATA_DEFAULT)
+Select method for sdata handling
+
+Enum
+Name(c6x_sdata) Type(enum c6x_sdata)
+Valid arguments for the -msdata= option
+
+EnumValue
+Enum(c6x_sdata) String(none) Value(C6X_SDATA_NONE)
+
+EnumValue
+Enum(c6x_sdata) String(default) Value(C6X_SDATA_DEFAULT)
+
+EnumValue
+Enum(c6x_sdata) String(all) Value(C6X_SDATA_ALL)
+
+mdsbt
+Target Mask(DSBT)
+Compile for the DSBT shared library ABI
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Avoid generating pc-relative calls; use indirection
+
+march=
+Target RejectNegative Joined Enum(c6x_isa) Var(c6x_arch_option)
+Specify the name of the target architecture
diff --git a/gcc-4.9/gcc/config/c6x/c6x_intrinsics.h b/gcc-4.9/gcc/config/c6x/c6x_intrinsics.h
new file mode 100644
index 000000000..ce0436ca7
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/c6x_intrinsics.h
@@ -0,0 +1,194 @@
+/* Intrinsics for TI C6X.
+
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_C6X_INTRINSICS_H
+#define _GCC_C6X_INTRINSICS_H
+
+#if !defined(__TMS320C6X__)
+# error "c6x_intrinsics.h is only supported for C6X targets"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* Define vector types.  */
+typedef uint8_t __uv4qi __attribute__((vector_size (4)));
+typedef int16_t __v2hi __attribute__((vector_size (4)));
+typedef int32_t __v2si __attribute__((vector_size (8)));
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_abs (int src)
+{
+  return __builtin_c6x_abs (src);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_abs2 (int src)
+{
+  return (int)__builtin_c6x_abs2 ((__v2hi)src);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_sadd (int src1, int src2)
+{
+  return __builtin_c6x_sadd (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_ssub (int src1, int src2)
+{
+  return __builtin_c6x_ssub (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_add2 (int src1, int src2)
+{
+  return (int)__builtin_c6x_add2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_sub2 (int src1, int src2)
+{
+  return (int)__builtin_c6x_sub2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_add4 (int src1, int src2)
+{
+  return (int)__builtin_c6x_add4 ((__uv4qi)src1, (__uv4qi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_sub4 (int src1, int src2)
+{
+  return (int)__builtin_c6x_sub4 ((__uv4qi)src1, (__uv4qi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_sadd2 (int src1, int src2)
+{
+  return (int)__builtin_c6x_sadd2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_ssub2 (int src1, int src2)
+{
+  return (int)__builtin_c6x_ssub2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_saddu4 (int src1, int src2)
+{
+  return (int)__builtin_c6x_saddu4 ((__uv4qi)src1, (__uv4qi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_smpy (int src1, int src2)
+{
+  return __builtin_c6x_smpy (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_smpylh (int src1, int src2)
+{
+  return __builtin_c6x_smpylh (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_smpyhl (int src1, int src2)
+{
+  return __builtin_c6x_smpyhl (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_smpyh (int src1, int src2)
+{
+  return __builtin_c6x_smpyh (src1, src2);
+}
+
+__extension__ static __inline long long __attribute__ ((__always_inline__))
+_smpy2ll (int src1, int src2)
+{
+  return (long long)__builtin_c6x_smpy2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline long long __attribute__ ((__always_inline__))
+_mpy2ll (int src1, int src2)
+{
+  return (long long)__builtin_c6x_mpy2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_extr (int src1, int src2)
+{
+  return __builtin_c6x_extr (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_extru (int src1, int src2)
+{
+  return __builtin_c6x_extru (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_clrr (int src1, int src2)
+{
+  return __builtin_c6x_clrr (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_avg2 (int src1, int src2)
+{
+  return (int)__builtin_c6x_avg2 ((__v2hi)src1, (__v2hi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_avgu4 (int src1, int src2)
+{
+  return (int)__builtin_c6x_avgu4 ((__uv4qi)src1, (__uv4qi)src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_sshl (int src1, int src2)
+{
+  return __builtin_c6x_sshl (src1, src2);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+_subc (int src1, int src2)
+{
+  return __builtin_c6x_subc (src1, src2);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gcc-4.9/gcc/config/c6x/constraints.md b/gcc-4.9/gcc/config/c6x/constraints.md
new file mode 100644
index 000000000..e2721d9a7
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/constraints.md
@@ -0,0 +1,174 @@
+;; Constraint definitions for TI C6X.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Andrew Jenner <andrew@codesourcery.com>
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "a" "A_REGS"
+  "Register file A (A0--A31).")
+
+(define_register_constraint "b" "B_REGS"
+  "Register file B (B0--B31).")
+
+(define_register_constraint "A" "PREDICATE_A_REGS"
+  "Predicate registers in register file A (A0--A2 on C64X and higher,
+   A1 and A2 otherwise).")
+
+(define_register_constraint "B" "PREDICATE_B_REGS"
+  "Predicate registers in register file B (B0--B2).")
+
+(define_register_constraint "C" "CALL_USED_B_REGS"
+  "A call-used register in register file B (B0--B9, B16--B31).")
+
+(define_register_constraint "Da" "NONPREDICATE_A_REGS"
+  "Register file A, excluding predicate registers (A3--A31, plus A0 if
+not C64X or higher).")
+
+(define_register_constraint "Db" "NONPREDICATE_B_REGS"
+  "Register file B, excluding predicate registers (B3--B31).")
+
+(define_register_constraint "Z" "PICREG"
+  "Register B14 (aka DP).")
+
+(define_register_constraint "z" "SPREG"
+  "Register B15 (aka SP).")
+
+(define_constraint "Iu4"
+  "Integer constant in the range 0 @dots{} 15, aka ucst4."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 15")))
+
+(define_constraint "Iu5"
+  "Integer constant in the range 0 @dots{} 31, aka ucst5."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "In5"
+  "Integer constant in the range @minus{}31 @dots{} 0, negation of ucst5."
+  (and (match_code "const_int")
+       (match_test "ival >= -31 && ival <= 0")))
+
+(define_constraint "Is5"
+  "Integer constant in the range @minus{}16 @dots{} 15, aka scst5."
+  (and (match_code "const_int")
+       (match_test "ival >= -16 && ival <= 15")))
+
+(define_constraint "I5x"
+  "Integer constant that can be the operand of an ADDA or a SUBA insn."
+  (and (match_code "const_int")
+       (match_test "(ival >= -31 && ival <= 31)
+		    || ((ival & 1) == 0 && ival >= -62 && ival <= 62)
+		    || ((ival & 3) == 0 && ival >= -124 && ival <= 124)
+		    || ((TARGET_INSNS_64 || TARGET_INSNS_67)
+ 			&& (ival & 7) == 0 && ival > 0 && ival <= 248)")))
+
+(define_constraint "Iux"
+  "Integer constant that can be the operand of a long ADDA or a SUBA insn,
+   i.e. one involving B14 or B15 as source operand."
+  (and (match_code "const_int")
+       (and (match_test "TARGET_INSNS_64PLUS")
+	    (match_test "ival >= 0
+ 			 && (ival < 32768
+		     	     || ((ival & 1) == 0 && ival < 65536)
+			     || ((ival & 3) == 0 && ival < 131072))"))))
+
+(define_constraint "IuB"
+  "Integer constant in the range 0 @dots{} 65535, aka ucst16."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "IsB"
+  "Integer constant in the range @minus{}32768 @dots{} 32767."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "IsC"
+  "Integer constant in the range @math{-2^{20}} @dots{} @math{2^{20} - 1}."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x100000 && ival <= 0xfffff")))
+
+(define_constraint "JA"
+  "@internal
+   Integer constant in the range 0 @dots{} 31, corresponding to an A register
+   number."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 32")))
+
+(define_constraint "JB"
+  "@internal
+   Integer constant in the range 32 @dots{} 63, corresponding to a B register
+   number."
+  (and (match_code "const_int")
+       (match_test "ival >= 32 && ival < 64")))
+
+(define_constraint "Jc"
+  "Integer constant that is a valid mask for the clr instruction"
+  (and (match_code "const_int")
+       (match_test "c6x_valid_mask_p (ival)")))
+
+(define_constraint "Js"
+  "Integer constant that is a valid mask for the set instruction"
+  (and (match_code "const_int")
+       (match_test "c6x_valid_mask_p (~ival)")))
+
+(define_memory_constraint "Q"
+  "Memory location with A base register."
+  (and (match_code "mem")
+       (match_test "c6x_mem_operand (op, A_REGS, false)")))
+
+(define_memory_constraint "R"
+  "Memory location with B base register."
+  (and (match_code "mem")
+       (match_test "c6x_mem_operand (op, B_REGS, false)")))
+
+(define_memory_constraint "T"
+  "@internal
+   Memory location with B base register, but not using a long offset."
+  (and (match_code "mem")
+       (match_test "c6x_mem_operand (op, B_REGS, true)")))
+
+(define_constraint "S0"
+  "@internal
+   On C64x+ targets, a GP-relative small data reference"
+  (and (match_test "TARGET_INSNS_64PLUS")
+       (match_operand 0 "sdata_symbolic_operand")))
+
+(define_constraint "S1"
+  "@internal
+   Any kind of @code{SYMBOL_REF}, for use in a call address."
+  (and (match_code "symbol_ref")
+       (match_operand 0 "c6x_call_operand")))
+
+(define_constraint "S2"
+  "@internal
+   Any SYMBOL_REF or LABEL_REF."
+  (ior (match_code "symbol_ref") (match_code "label_ref")))
+
+(define_constraint "Si"
+  "@internal
+   Any immediate value, unless it matches the S0 constraint."
+  (and (match_operand 0 "immediate_operand")
+       (match_test "!satisfies_constraint_S0 (op)")))
+
+(define_memory_constraint "W"
+  "@internal
+   A memory operand with an address that can't be used in an unaligned access."
+  (and (match_code "mem")
+       (match_test "!c6x_legitimate_address_p_1 (GET_MODE (op), XEXP (op, 0),
+ 						 reload_completed, true)")))
diff --git a/gcc-4.9/gcc/config/c6x/elf-common.h b/gcc-4.9/gcc/config/c6x/elf-common.h
new file mode 100644
index 000000000..8eef1b82e
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/elf-common.h
@@ -0,0 +1,37 @@
+/* ELF definitions for TI C6X
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Andrew Jenner <andrew@codesourcery.com>
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Controlling the Compilation Driver.  */
+#define ENDIAN_LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} "
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{march=*:-march=%*} %{mbig-endian:-mbig-endian} \
+ %{mdsbt:-mdsbt %{!fPIC:-mpid=near} %{fPIC:-mpid=far -mpic} %{fpic:-mpic}} \
+ %{!mdsbt:%{fpic:-mpic -mpid=near} %{fPIC:-mpic -mpid=far}}"
+
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.section\t\".fardata\",\"aw\""
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t\".const\",\"a\",@progbits"
+#define BSS_SECTION_ASM_OP "\t.section\t\".far\",\"aw\",@nobits"
+#define SDATA_SECTION_ASM_OP "\t.section\t\".neardata\",\"aw\""
+#define SBSS_SECTION_ASM_OP "\t.section\t\".bss\",\"aw\",@nobits"
+#define TARGET_LIBGCC_SDATA_SECTION ".neardata"
diff --git a/gcc-4.9/gcc/config/c6x/elf.h b/gcc-4.9/gcc/config/c6x/elf.h
new file mode 100644
index 000000000..a4189f6ae
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/elf.h
@@ -0,0 +1,35 @@
+/* ELF definitions for TI C6X
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Andrew Jenner <andrew@codesourcery.com>
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* crt0.o should come from the linker script, but for compatibility,
+   we mention it here for -msim.  */
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{msim:crt0%O%s} crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "%{msim:--start-group -lc -lsim --end-group;" \
+	":-lc}"
+
+#undef LINK_SPEC
+#define LINK_SPEC ENDIAN_LINK_SPEC
diff --git a/gcc-4.9/gcc/config/c6x/genmult.sh b/gcc-4.9/gcc/config/c6x/genmult.sh
new file mode 100644
index 000000000..dd8a086f4
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/genmult.sh
@@ -0,0 +1,33 @@
+#! /bin/sh
+# Generate c6x-mult.md from c6x-mult.md.in
+# The input file is passed as an argument.
+
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+#This file is part of GCC.
+
+#GCC is free software; you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3, or (at your option)
+#any later version.
+
+#GCC is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#GNU General Public License for more details.
+
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.  If not see
+#<http://www.gnu.org/licenses/>.
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically from c6x-mult.md.in by genmult.sh"
+
+sed -e "s,_VARIANT_,,g" -e "s,_SET_,set,g" -e "s,_.BRK_,,g" \
+    -e "s,_A_,a,g" -e "s,_B_,b,g" -e "s,_DESTOPERAND_,register_operand,g" \
+    -e "s,_MOD._,,g" -e "s,:_M,:,g" < $1
+
+sed -e "s,_VARIANT_,_real,g" -e "s,_SET_,unspec,g" -e "s,_OBRK_,[,g" \
+    -e "s,_CBRK_,] UNSPEC_REAL_MULT,g" -e "s,_A_,JA,g" -e "s,_B_,JB,g" \
+    -e "s,_DESTOPERAND_,const_int_operand,g" -e "s,_MODk_,k,g" \
+    -e "s,_MODK_,K,g" -e 's,:_MV..[IQ],:SI,g' -e "s,:_MSQ,:SI,g"  < $1
diff --git a/gcc-4.9/gcc/config/c6x/genopt.sh b/gcc-4.9/gcc/config/c6x/genopt.sh
new file mode 100644
index 000000000..406823a7b
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/genopt.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Generate c6x-tables.opt from the lists in *.def.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+cat <<EOF
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from c6x-isas.def.
+;
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(c6x_isa) Type(int)
+Known C6X ISAs (for use with the -march= option):
+
+EOF
+
+awk -F'[(, 	]+' 'BEGIN {
+    value = 0
+}
+/^C6X_ISA/ {
+    name = $2
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(c6x_isa) String(" name ") Value(" value ")"
+    print ""
+    value++
+}' $1/c6x-isas.def
diff --git a/gcc-4.9/gcc/config/c6x/gensched.sh b/gcc-4.9/gcc/config/c6x/gensched.sh
new file mode 100644
index 000000000..4d389cc03
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/gensched.sh
@@ -0,0 +1,44 @@
+#! /bin/sh
+# Generate c6x-sched.md from c6x-sched.md.in
+# The input file is passed as an argument.
+
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+#This file is part of GCC.
+
+#GCC is free software; you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3, or (at your option)
+#any later version.
+
+#GCC is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#GNU General Public License for more details.
+
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.  If not see
+#<http://www.gnu.org/licenses/>.
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically from c6x-sched.md.in by gensched.sh"
+
+for cross in n y; do
+    for side in 1 2; do
+	tside=$side
+	case $side in
+	    1) rf="a"; otherside=2 ;;
+	    2) rf="b"; otherside=1 ;;
+	esac
+	case $cross in
+	    y) cunit="+x$side"; tside=$otherside;;
+	    n) cunit="";;
+	esac
+	echo
+	echo ";; Definitions for side $side, cross $cross"
+	echo
+	sed -e "s,_CROSS_,$cross,g" -e "s,_CUNIT_,$cunit,g" \
+	    -e "s,_N_,$side,g" -e "s,_RF_,$rf,g" -e "s,_NX_,$tside,g" \
+	    < $1
+    done
+done
diff --git a/gcc-4.9/gcc/config/c6x/predicates.md b/gcc-4.9/gcc/config/c6x/predicates.md
new file mode 100644
index 000000000..464d27689
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/predicates.md
@@ -0,0 +1,226 @@
+/* Predicates for TI C6X
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Andrew Jenner <andrew@codesourcery.com>
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+(define_predicate "const_vector_operand"
+  (match_code "const_vector"))
+
+(define_predicate "scst5_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Is5 (op)")))
+
+(define_predicate "reg_or_ucst4_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_Iu4 (op)"))))
+
+(define_predicate "reg_or_scst5_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "scst5_operand")))
+
+(define_predicate "reg_or_ucst5_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_Iu5 (op)"))))
+
+(define_predicate "addsi_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_IsB (op)"))))
+
+(define_predicate "andsi_operand"
+  (ior (match_operand 0 "reg_or_scst5_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_Jc (op)"))))
+
+(define_predicate "iorsi_operand"
+  (ior (match_operand 0 "reg_or_scst5_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_Js (op)"))))
+
+(define_predicate "insv_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "INTVAL (op) == 0 || INTVAL (op) == -1")))
+
+(define_predicate "c6x_jump_operand"
+  (match_code "label_ref,symbol_ref,reg"))
+
+(define_predicate "c6x_call_operand"
+  (ior (match_code "symbol_ref,reg")
+       (and (match_code "subreg")
+            (match_test "GET_CODE (XEXP (op, 0)) == REG")))
+{
+  /* The linker transforms jumps to undefined weak symbols in a way that
+     is incompatible with our code generation.  */
+  return (GET_CODE (op) != SYMBOL_REF
+	  || (!SYMBOL_REF_WEAK (op)
+	      && !c6x_long_call_p (op)));
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF)
+			 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT"))))
+
+(define_predicate "const_int_or_symbolic_operand"
+  (ior (match_operand 0 "symbolic_operand")
+       (match_operand 0 "const_int_operand")))
+
+;; Return nonzero iff OP is one of the integer constants 2, 4 or 8.
+(define_predicate "adda_scale_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4
+ 		    || ((TARGET_INSNS_64 || TARGET_INSNS_67)
+ 			&& INTVAL (op) == 8)")))
+
+;; Return nonzero iff OP is one of the integer constants 2 or 4.
+(define_predicate "suba_scale_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4")))
+
+;; True if this operator is valid for predication.
+(define_predicate "predicate_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "c6x_comparison_operator"
+  (match_code "eq,ltu,gtu,lt,gt"))
+
+(define_predicate "non_c6x_comparison_operator"
+  (match_code "ne,leu,geu,le,ge"))
+
+;; FP Comparisons handled by c6x_expand_compare.
+(define_predicate "c6x_fp_comparison_operator"
+  (ior (match_code "eq,lt,gt,le,ge")
+       (and (match_test "TARGET_FP")
+	    (match_code "ltgt,uneq,unlt,ungt,unle,unge,ordered,unordered"))))
+
+(define_predicate "c6x_any_comparison_operand"
+  (match_code "eq,lt,gt,le,ge,ltu,gtu")
+{
+  rtx op0 = XEXP (op, 0);
+  rtx op1 = XEXP (op, 1);
+  if (ltugtu_operator (op, SImode)
+      && register_operand (op0, SImode)
+      && ((TARGET_INSNS_64 && reg_or_ucst5_operand (op1, SImode))
+	  || (!TARGET_INSNS_64 && reg_or_ucst4_operand (op1, SImode))))
+    return true;
+  if (eqltgt_operator (op, SImode)
+      && register_operand (op0, SImode)
+      && reg_or_scst5_operand (op1, SImode))
+    return true;
+  if (!TARGET_FP)
+    return false;
+  if (!eqltgt_operator (op, SFmode) && !eqltgt_operator (op, DFmode))
+    return false;
+  if (register_operand (op0, GET_MODE (op))
+      && register_operand (op1, GET_MODE (op)))
+    return true;
+  return false;
+})
+
+(define_predicate "ltugtu_operator"
+  (match_code "ltu,gtu"))
+
+(define_predicate "eqltgt_operator"
+  (match_code "eq,lt,gt"))
+
+(define_predicate "eqne_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "predicate_register"
+  (and (match_code "reg")
+       (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == PREDICATE_A_REGS")
+	    (match_test "REGNO_REG_CLASS (REGNO (op)) == PREDICATE_B_REGS"))))
+
+;; Allow const_ints for things like the real_mult patterns.
+(define_predicate "a_register"
+  (ior (and (match_code "reg")
+	    (match_test "A_REGNO_P (REGNO (op))"))
+       (and (match_code "const_int")
+	    (match_test "A_REGNO_P (INTVAL (op))"))))
+
+(define_predicate "b_register"
+  (ior (and (match_code "reg")
+	    (match_test "B_REGNO_P (REGNO (op))"))
+       (and (match_code "const_int")
+	    (match_test "B_REGNO_P (INTVAL (op))"))))
+
+(define_predicate "pic_register_operand"
+  (and (match_code "reg")
+       (match_test "op == pic_offset_table_rtx")))
+
+;; True if OP refers to a symbol in the sdata section.
+(define_predicate "sdata_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  HOST_WIDE_INT offset = 0, size = 0;
+  tree t;
+
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      offset = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* For shared libraries, only allow symbols we know are local.
+         For executables, the linker knows to create copy relocs if
+	 necessary so we can use DP-relative addressing for all small
+	 objects.  */
+      if ((c6x_initial_flag_pic && !SYMBOL_REF_LOCAL_P (op))
+	  || !SYMBOL_REF_SMALL_P (op))
+	return false;
+
+     /* Note that in addition to DECLs, we can get various forms
+	of constants here.  */
+      t = SYMBOL_REF_DECL (op);
+      if (DECL_P (t))
+        t = DECL_SIZE_UNIT (t);
+      else
+	t = TYPE_SIZE_UNIT (TREE_TYPE (t));
+      if (t && tree_fits_shwi_p (t))
+	{
+	  size = tree_to_shwi (t);
+	  if (size < 0)
+	    size = 0;
+	}
+
+      /* Don't allow addressing outside the object.  */
+      return (offset >= 0 && offset <= size);
+
+    default:
+      gcc_unreachable ();
+    }
+})
diff --git a/gcc-4.9/gcc/config/c6x/sync.md b/gcc-4.9/gcc/config/c6x/sync.md
new file mode 100644
index 000000000..fff6c4394
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/sync.md
@@ -0,0 +1,270 @@
+;; GCC machine description for C6X synchronization instructions.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; C64X+ has atomic instructions, but they are not atomic on all
+;; devices and have other problems.  We use normal loads and stores,
+;; and place them in overlapping branch shadows to ensure interrupts
+;; are disabled during the sequence, which guarantees atomicity on all
+;; single-core systems.
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")])
+(define_code_attr fetchop_pred
+  [(plus "reg_or_scst5_operand") (minus "register_operand")
+   (ior "reg_or_scst5_operand") (xor "reg_or_scst5_operand")
+   (and "reg_or_scst5_operand")])
+(define_code_attr fetchop_constr
+  [(plus "bIs5") (minus "b") (ior "bIs5") (xor "bIs5") (and "bIs5")])
+(define_code_attr fetchop_opcode
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+(define_code_attr fetchop_inops02
+  [(plus "%2, %0") (minus "%0, %2") (ior "%2, %0") (xor "%2, %0")
+   (and "%2, %0")])
+(define_code_attr fetchop_inops21
+  [(plus "%1, %2") (minus "%2, %1") (ior "%1, %2") (xor "%1, %2")
+   (and "%1, %2")])
+
+(define_expand "sync_compare_and_swapsi"
+  [(parallel
+     [(set (match_operand:SI 0 "register_operand" "")
+	   (match_operand:SI 1 "memory_operand" ""))
+      (set (match_dup 1)
+	   (unspec_volatile:SI
+	     [(match_operand:SI 2 "register_operand" "")
+	      (match_operand:SI 3 "register_operand" "")]
+	     UNSPECV_CAS))
+      (clobber (match_scratch:SI 4 ""))])]
+  ""
+{
+})
+
+(define_expand "sync_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "memory_operand" "")
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 0)
+			(match_operand:SI 1 "<fetchop_pred>" ""))]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 2 ""))])]
+  ""
+{
+})
+
+(define_expand "sync_old_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operand:SI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 1)
+			(match_operand:SI 2 "<fetchop_pred>" ""))]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 3 ""))])]
+  ""
+{
+})
+
+(define_expand "sync_new_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (FETCHOP:SI (match_operand:SI 1 "memory_operand" "")
+		      (match_operand:SI 2 "<fetchop_pred>" "")))
+     (set (match_dup 1)
+	  (unspec:SI [(FETCHOP:SI (match_dup 1) (match_dup 2))]
+		     UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 3 ""))])]
+  ""
+{
+})
+
+(define_expand "sync_nandsi"
+  [(parallel
+    [(set (match_operand:SI 0 "memory_operand" "")
+	  (unspec:SI
+	   [(not:SI (and:SI (match_dup 0)
+			    (match_operand:SI 1 "reg_or_scst5_operand" "")))]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 2 ""))])]
+  ""
+{
+})
+
+(define_expand "sync_old_nandsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operand:SI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(not:SI (and:SI (match_dup 1)
+		    (match_operand:SI 2 "reg_or_scst5_operand" "")))]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 3 ""))])]
+  ""
+{
+})
+
+(define_expand "sync_new_nandsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (not:SI (and:SI (match_operand:SI 1 "memory_operand" "")
+			  (match_operand:SI 2 "reg_or_scst5_operand" ""))))
+     (set (match_dup 1)
+	  (unspec:SI [(not:SI (and:SI (match_dup 1) (match_dup 2)))]
+		     UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 3 ""))])]
+  ""
+{
+})
+
+(define_insn "*sync_compare_and_swapsi"
+  [(set (match_operand:SI 0 "register_operand" "=&b")
+	(match_operand:SI 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_operand:SI 2 "register_operand" "B")
+	   (match_operand:SI 3 "register_operand" "b")]
+	  UNSPECV_CAS))
+   (clobber (match_scratch:SI 4 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+   || ldw .d%U1t%U0 %1, %0\n\\
+   nop 4\n\\
+|| b .s2 2f ; 1\n\\
+   cmpeq .l2 %0, %2, %2 ; 5\n\\
+1: [%2] stw .d%U1t%U3 %3, %1 ; 6\n\\
+2:"
+  [(set_attr "type" "atomic")])
+
+(define_insn "sync_<fetchop_name>si_insn"
+  [(set (match_operand:SI 0 "memory_operand" "+m")
+	(unspec:SI
+	  [(FETCHOP:SI (match_dup 0)
+	     (match_operand:SI 1 "<fetchop_pred>" "<fetchop_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 2 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+|| ldw .d%U0t%U2 %0, %2\n\\
+   nop 4\n\\
+|| b .s2 2f ; 1\n\\
+   <fetchop_opcode> .l2 <fetchop_inops21>, %2 ; 5\n\\
+1: stw .d%U0t%U2 %2, %0 ; 6\n\\
+2:"
+  [(set_attr "type" "atomic")])
+
+(define_insn "sync_old_<fetchop_name>si_insn"
+  [(set (match_operand:SI 0 "register_operand" "=&b")
+	(match_operand:SI 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:SI
+	  [(FETCHOP:SI (match_dup 1)
+	     (match_operand:SI 2 "<fetchop_pred>" "<fetchop_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+|| ldw .d%U1t%U0 %1, %0\n\\
+   nop 4\n\\
+|| b .s2 2f ; 1\n\\
+   <fetchop_opcode> .l2 <fetchop_inops02>, %3 ; 5\n\\
+1: stw .d%U1t%U3 %3, %1 ; 6\n\\
+2:"
+  [(set_attr "type" "atomic")])
+
+(define_insn "sync_new_<fetchop_name>si_insn"
+  [(set (match_operand:SI 0 "register_operand" "=&b")
+	(FETCHOP:SI (match_operand:SI 1 "memory_operand" "+m")
+	   (match_operand:SI 2 "<fetchop_pred>" "<fetchop_constr>")))
+   (set (match_dup 1)
+	(unspec:SI
+	  [(FETCHOP:SI (match_dup 1)
+		       (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+|| ldw .d%U1t%U0 %1, %0\n\\
+   nop 4\n\\
+|| b .s2 2f ; 1\n\\
+   <fetchop_opcode> .l2 <fetchop_inops02>, %0 ; 5\n\\
+1: stw .d%U1t%U0 %0, %1 ; 6\n\\
+2:"
+  [(set_attr "type" "atomic")])
+
+(define_insn "sync_nandsi_insn"
+  [(set (match_operand:SI 0 "memory_operand" "+m")
+	(unspec:SI
+	  [(not:SI (and:SI (match_dup 0)
+			   (match_operand:SI 1 "reg_or_scst5_operand" "bIs5")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 2 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+|| ldw .d%U0t%U2 %0, %2\n\\
+   nop 1\n\\
+   nop 3\n\\
+|| b .s2 2f ; 2\n\\
+   and .l2 %1, %2, %2 ; 5\n\\
+1: not .l2 %2, %2 ; 6\n\\
+   stw .d%U0t%U2 %2, %0 ; 7\n\\
+2:"
+  [(set_attr "type" "atomic")])
+
+(define_insn "sync_old_nandsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=&b")
+	(match_operand:SI 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:SI
+	  [(not:SI (and:SI (match_dup 1)
+			   (match_operand:SI 2 "reg_or_scst5_operand" "bIs5")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+|| ldw .d%U1t%U0 %1, %0\n\\
+   nop 1\n\\
+   nop 3\n\\
+|| b .s2 2f ; 2\n\\
+   and .l2 %2, %0, %3 ; 5\n\\
+1: not .l2 %3, %3 ; 6\n\\
+   stw .d%U1t%U3 %3, %1 ; 7\n\\
+2:"
+  [(set_attr "type" "atomic")])
+
+(define_insn "sync_new_nandsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=&b")
+	(not:SI (and:SI (match_operand:SI 1 "memory_operand" "+m")
+			(match_operand:SI 2 "reg_or_scst5_operand" "bIs5"))))
+   (set (match_dup 1)
+	(unspec:SI
+	  [(not:SI (and:SI (match_dup 1) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&B"))]
+  ""
+  "0: b .s2 1f ; 0\n\\
+|| ldw .d%U1t%U0 %1, %0\n\\
+   nop 1\n\\
+   nop 3\n\\
+|| b .s2 2f ; 2\n\\
+   and .l2 %2, %0, %0 ; 5\n\\
+1: not .l2 %0, %0 ; 6\n\\
+   stw .d%U1t%U0 %0, %1 ; 7\n\\
+2:"
+  [(set_attr "type" "atomic")])
diff --git a/gcc-4.9/gcc/config/c6x/t-c6x b/gcc-4.9/gcc/config/c6x/t-c6x
new file mode 100644
index 000000000..4cde36ce8
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/t-c6x
@@ -0,0 +1,42 @@
+# Target Makefile Fragment for TI C6X.
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+# Contributed by CodeSourcery.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MD_INCLUDES= 	$(srcdir)/config/c6x/constraints.md \
+		$(srcdir)/config/c6x/predicates.md \
+		$(srcdir)/config/c6x/c6x-mult.md \
+		$(srcdir)/config/c6x/c6x-sched.md
+
+s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
+	s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
+
+$(srcdir)/config/c6x/c6x-sched.md: $(srcdir)/config/c6x/gensched.sh \
+  $(srcdir)/config/c6x/c6x-sched.md.in
+	$(SHELL) $(srcdir)/config/c6x/gensched.sh \
+	$(srcdir)/config/c6x/c6x-sched.md.in > $@
+
+$(srcdir)/config/c6x/c6x-mult.md: $(srcdir)/config/c6x/genmult.sh \
+  $(srcdir)/config/c6x/c6x-mult.md.in
+	$(SHELL) $(srcdir)/config/c6x/genmult.sh \
+	$(srcdir)/config/c6x/c6x-mult.md.in > $@
+
+$(srcdir)/config/c6x/c6x-tables.opt: $(srcdir)/config/c6x/genopt.sh \
+  $(srcdir)/config/c6x/c6x-isas.def
+	$(SHELL) $(srcdir)/config/c6x/genopt.sh $(srcdir)/config/c6x > \
+		$(srcdir)/config/c6x/c6x-tables.opt
diff --git a/gcc-4.9/gcc/config/c6x/t-c6x-elf b/gcc-4.9/gcc/config/c6x/t-c6x-elf
new file mode 100644
index 000000000..8d7276be4
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/t-c6x-elf
@@ -0,0 +1,30 @@
+# Target Makefile Fragment for TI C6X using ELF.
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+# Contributed by CodeSourcery.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
+
+# Use this variant for fully testing all CPU types
+#MULTILIB_OPTIONS     = mbig-endian march=c674x/march=c64x/march=c67x/march=c67x+/march=c62x
+#MULTILIB_DIRNAMES    = be c674x c64x c67x c67x+ c62x
+
+MULTILIB_OPTIONS     = mbig-endian march=c674x
+MULTILIB_DIRNAMES    = be c674x
+MULTILIB_EXCEPTIONS  =
+MULTILIB_MATCHES     =
diff --git a/gcc-4.9/gcc/config/c6x/t-c6x-uclinux b/gcc-4.9/gcc/config/c6x/t-c6x-uclinux
new file mode 100644
index 000000000..e4b93908f
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/t-c6x-uclinux
@@ -0,0 +1,3 @@
+MULTILIB_OSDIRNAMES    = march.c674x=!c674x
+MULTILIB_OSDIRNAMES   += mbig-endian=!be
+MULTILIB_OSDIRNAMES   += mbig-endian/march.c674x=!be/c674x
diff --git a/gcc-4.9/gcc/config/c6x/uclinux-elf.h b/gcc-4.9/gcc/config/c6x/uclinux-elf.h
new file mode 100644
index 000000000..3f3964ba7
--- /dev/null
+++ b/gcc-4.9/gcc/config/c6x/uclinux-elf.h
@@ -0,0 +1,63 @@
+/* Definitions for TI C6X running ucLinux using ELF
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Andrew Jenner <andrew@codesourcery.com>
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      builtin_define ("__uClinux__");		\
+      builtin_define_std ("linux");             \
+      builtin_define_std ("unix");              \
+      builtin_assert ("system=linux");          \
+      builtin_assert ("system=unix");           \
+      builtin_assert ("system=posix");          \
+    }						\
+  while (false)
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:crt1%O%s} crti%O%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+
+#undef LINK_SPEC
+#define LINK_SPEC ENDIAN_LINK_SPEC \
+  "%{shared} %{fpie|fPIE:-pie} \
+  %{!shared: %{!static: \
+   %{rdynamic:-export-dynamic} \
+   %{!dynamic-linker:-dynamic-linker " UCLIBC_DYNAMIC_LINKER "}} \
+   %{static}}"
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS "%{!mno-dsbt:-mdsbt}"
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("A4") = (unsigned long) (BEG);	\
+  register unsigned long _end __asm ("B4") = (unsigned long) (END);	\
+  register unsigned long _scno __asm ("B0") = 244;			\
+  __asm __volatile ("swe		; sys_cache_sync"		\
+		    : "=a" (_beg)					\
+		    : "0" (_beg), "b" (_end), "b" (_scno));	\
+}
diff --git a/gcc-4.9/gcc/config/cr16/constraints.md b/gcc-4.9/gcc/config/cr16/constraints.md
new file mode 100644
index 000000000..9d9789bce
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/constraints.md
@@ -0,0 +1,81 @@
+;; Predicates of machine description for CR16.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by KPIT Cummins Infosystems Limited.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  
+
+;; Constraints
+;; Register constraints
+(define_register_constraint "b" "NOSP_REGS"
+  "@no sp registers")
+
+(define_register_constraint "c" "SHORT_REGS"
+  "@short registers")
+
+(define_register_constraint "d" "LONG_REGS"
+  "@long registers")
+
+;; Integer constraints.
+(define_constraint "I"
+  "A signed 4-bit immediate."
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT_FITS_N_BITS (ival, 4)")))
+
+(define_constraint "J"
+  "A signed 5-bit immediate."
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT_FITS_N_BITS (ival, 5)")))
+
+(define_constraint "K"
+  "A signed 6-bit immediate."
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT_FITS_N_BITS (ival, 6)")))
+
+(define_constraint "L"
+  "A unsigned 4-bit immediate."
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT_FITS_N_BITS (ival, 4)")))
+
+(define_constraint "M"
+  "A unsigned and customized  4-bit immediate."
+  (and (match_code "const_int")
+       (match_test "(IN_RANGE_P (ival, 0, 15) && ((ival != 9) && (ival != 11)))")))
+
+(define_constraint "N"
+  "A signed 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE_P (ival, -32768, 32767)")))
+
+(define_constraint "O"
+  "A unsigned 20-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE_P (ival, 0, 1048575)")))
+
+(define_constraint "Q"
+  "A shift QI immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE_P (ival, 0, 7)")))
+
+(define_constraint "R"
+  "A shift HI immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE_P (ival, 0, 15)")))
+
+(define_constraint "S"
+  "A shift SI immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE_P (ival, 0, 31)")))
diff --git a/gcc-4.9/gcc/config/cr16/cr16-protos.h b/gcc-4.9/gcc/config/cr16/cr16-protos.h
new file mode 100644
index 000000000..80ea43260
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/cr16-protos.h
@@ -0,0 +1,99 @@
+/* Prototypes for exported functions defined in cr16.c
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by KPIT Cummins Infosystems Limited.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_CR16_PROTOS_H
+#define GCC_CR16_PROTOS_H
+
+/* Register usage.  */
+extern enum reg_class cr16_regno_reg_class (int);
+extern int cr16_hard_regno_mode_ok (int regno, enum machine_mode);
+
+/* Passing function arguments.  */
+extern int cr16_function_arg_regno_p (int);
+
+#ifdef TREE_CODE
+#ifdef RTX_CODE
+
+extern void cr16_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
+
+#endif /* RTX_CODE.  */
+#endif /* TREE_CODE.  */
+
+/* Enumeration giving the various data models we support.  */
+enum data_model_type
+{
+  DM_DEFAULT,		/* Default data model (in CR16C/C+ - up to 16M).  */
+  DM_NEAR,		/* Near data model    (in CR16C/C+ - up to 1M).  */
+  DM_FAR,		/* Far data model     (in CR16C+   - up to 4G)
+			   (in CR16C    - up to 16M).  */
+  ILLEGAL_DM		/* Illegal data model.  */
+};
+
+#ifdef RTX_CODE
+
+/* Addressing Modes.  */
+struct cr16_address
+{
+  rtx base;	 	/* Base register: Any register or register pair.  */
+  rtx index;		/* Index register: If one is present.  */
+  rtx disp;		/* Displacement or Absolute address.  */
+  enum data_model_type data;	/* data ref type.  */
+  int code;		/* Whether the address is code address. 
+			   0 - data, 1 - code label, 2 - function label.  */
+};
+
+enum cr16_addrtype
+{
+  CR16_INVALID,
+  CR16_REG_REL,
+  CR16_REGP_REL,
+  CR16_INDEX_REGP_REL,
+  CR16_ABSOLUTE
+};
+
+extern void notice_update_cc (rtx);
+extern int cr16_operand_bit_pos (int val, int bitval);
+extern void cr16_decompose_const (rtx x, int *code,
+				  enum data_model_type *data,
+				  bool treat_as_const);
+extern enum cr16_addrtype cr16_decompose_address (rtx addr,
+						  struct cr16_address *out,
+						  bool debug_print,
+						  bool treat_as_const);
+extern int cr16_const_double_ok (rtx op);
+extern int legitimate_pic_operand_p (rtx);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+
+
+/* Prologue/Epilogue functions.  */
+extern int cr16_initial_elimination_offset (int, int);
+extern char *cr16_prepare_push_pop_string (int);
+extern void cr16_expand_prologue (void);
+extern void cr16_expand_epilogue (void);
+extern const char *cr16_emit_add_sub_di (rtx *, enum rtx_code);
+extern const char *cr16_emit_logical_di (rtx *, enum rtx_code);
+
+#endif /* RTX_CODE.  */
+
+/* Handling the "interrupt" attribute.  */
+extern int cr16_interrupt_function_p (void);
+extern bool cr16_is_data_model (enum data_model_type);
+
+#endif /* Not GCC_CR16_PROTOS_H.  */ 
diff --git a/gcc-4.9/gcc/config/cr16/cr16.c b/gcc-4.9/gcc/config/cr16/cr16.c
new file mode 100644
index 000000000..f5a444bec
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/cr16.c
@@ -0,0 +1,2194 @@
+/* Output routines for CR16 processor.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by KPIT Cummins Infosystems Limited.
+  
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* Definitions.  */
+
+/* Maximum number of register used for passing parameters.  */
+#define MAX_REG_FOR_PASSING_ARGS  6
+
+/* Minimum number register used for passing parameters.  */
+#define MIN_REG_FOR_PASSING_ARGS  2
+
+/* The maximum count of words supported in the assembly of the architecture in
+   a push/pop instruction.  */
+#define MAX_COUNT  8
+
+/* Predicate is true if the current function is a 'noreturn' function, 
+   i.e. it is qualified as volatile.  */
+#define FUNC_IS_NORETURN_P(decl) (TREE_THIS_VOLATILE (decl))
+
+/* Predicate that holds when we need to save registers even for 'noreturn'
+   functions, to accommodate for unwinding.  */
+#define MUST_SAVE_REGS_P() \
+  (flag_unwind_tables || (flag_exceptions && !UI_SJLJ))
+
+/* Nonzero if the rtx X is a signed const int of n bits.  */
+#define RTX_SIGNED_INT_FITS_N_BITS(X, n)                \
+  ((GET_CODE (X) == CONST_INT                          \
+   && SIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0)
+
+/* Nonzero if the rtx X is an unsigned const int of n bits.  */
+#define RTX_UNSIGNED_INT_FITS_N_BITS(X, n)               \
+  ((GET_CODE (X) == CONST_INT                            \
+   && UNSIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0)
+
+/* Structure for stack computations.  */
+
+/* variable definitions in the struture
+   args_size             Number of bytes saved on the stack for local 
+			 variables
+
+   reg_size		 Number of bytes saved on the stack for 
+			 non-scratch registers
+
+   total_size 		 The sum of 2 sizes: locals vars and padding byte 
+			 for saving the registers. Used in expand_prologue() 
+			 and expand_epilogue()
+
+   last_reg_to_save      Will hold the number of the last register the 
+			 prologue saves, -1 if no register is saved
+
+   save_regs[16]	 Each object in the array is a register number. 
+			 Mark 1 for registers that need to be saved
+
+   num_regs		 Number of registers saved
+
+   initialized		 Non-zero if frame size already calculated, not 
+			 used yet
+
+   function_makes_calls  Does the function make calls ? not used yet.  */
+
+struct cr16_frame_info
+{
+  unsigned long var_size;
+  unsigned long args_size;
+  unsigned int  reg_size;
+  unsigned long total_size;
+  long          last_reg_to_save;
+  long          save_regs[FIRST_PSEUDO_REGISTER];
+  int           num_regs;
+  int           initialized;
+  int           function_makes_calls;
+};
+
+/* Current frame information calculated by cr16_compute_frame_size.  */
+static struct cr16_frame_info current_frame_info;
+
+/* Static Variables.  */
+
+/* Data model that was supplied by user via command line option
+   This will be overridden in case of invalid combination
+   of core and data model options are supplied.  */
+static enum data_model_type data_model = DM_DEFAULT;
+
+/* TARGETM Function Prototypes and forward declarations  */
+static void cr16_print_operand (FILE *, rtx, int);
+static void cr16_print_operand_address (FILE *, rtx);
+
+/* Stack layout and calling conventions.  */
+#undef  TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX		cr16_struct_value_rtx
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		cr16_return_in_memory
+
+/* Target-specific uses of '__attribute__'.  */
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE 		cr16_attribute_table
+#undef TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
+
+/* EH related.  */
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE		cr16_unwind_word_mode
+
+/* Override Options.  */
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE  	cr16_override_options 
+
+/* Conditional register usuage.  */
+#undef TARGET_CONDITIONAL_REGISTER_USAGE 
+#define TARGET_CONDITIONAL_REGISTER_USAGE cr16_conditional_register_usage
+
+/* Controlling register spills.  */
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P	cr16_class_likely_spilled_p
+
+/* Passing function arguments.  */
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG 		cr16_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE 	cr16_function_arg_advance
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS 	cr16_return_pops_args
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED	cr16_frame_pointer_required
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE 		cr16_can_eliminate
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS 	cr16_legitimize_address
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P    cr16_legitimate_constant_p
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P     cr16_legitimate_address_p
+
+/* Returning function value.  */
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE 		cr16_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE 		cr16_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P 	cr16_function_value_regno_p
+
+/* printing the values.  */
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND 		cr16_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS 	cr16_print_operand_address
+
+/* Relative costs of operations.  */
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST 		cr16_address_cost
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST 	cr16_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST 	cr16_memory_move_cost
+
+/* Table of machine attributes.  */
+static const struct attribute_spec cr16_attribute_table[] = {
+  /* ISRs have special prologue and epilogue requirements.  */
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity }.  */
+  {"interrupt", 0, 0, false, true, true, NULL, false},
+  {NULL, 0, 0, false, false, false, NULL, false}
+};
+
+/* TARGET_ASM_UNALIGNED_xx_OP generates .?byte directive
+   .?byte directive along with @c is not understood by assembler.
+   Therefore, make all TARGET_ASM_UNALIGNED_xx_OP same
+   as TARGET_ASM_ALIGNED_xx_OP.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP 	TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP 	TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP 	TARGET_ASM_ALIGNED_DI_OP
+
+/* Target hook implementations.  */
+
+/* Implements hook TARGET_RETURN_IN_MEMORY.  */
+static bool
+cr16_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return ((size == -1) || (size > 8));
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
+static bool
+cr16_class_likely_spilled_p (reg_class_t rclass)
+{
+  if ((rclass) == SHORT_REGS || (rclass) == DOUBLE_BASE_REGS 
+      || (rclass) == LONG_REGS || (rclass) == GENERAL_REGS)
+    return true;
+
+  return false;
+}
+
+static int
+cr16_return_pops_args (tree fundecl ATTRIBUTE_UNUSED,
+                       tree funtype ATTRIBUTE_UNUSED, 
+		       int size ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+/* Returns true if data model selected via command line option
+   is same as function argument.  */
+bool
+cr16_is_data_model (enum data_model_type model)
+{
+  return (model == data_model);
+}
+
+/* Parse relevant options and override.  */
+static void
+cr16_override_options (void)
+{
+  /* Disable -fdelete-null-pointer-checks option for CR16 target.
+     Programs which rely on NULL pointer dereferences _not_ halting the 
+     program may not work properly with this option. So disable this 
+     option.  */
+  flag_delete_null_pointer_checks = 0;
+
+  /* FIXME: To avoid spill_failure ICE during exception handling,
+   * disable cse_fllow_jumps. The spill error occurs when compiler
+   * can't find a suitable candidate in GENERAL_REGS class to reload
+   * a 32bit register.
+   * Need to find a better way of avoiding this situation. */
+  if (flag_exceptions)
+    flag_cse_follow_jumps = 0;
+
+  /* If -fpic option, data_model == DM_FAR.  */
+  if (flag_pic == NEAR_PIC)
+    {
+      data_model = DM_FAR;
+    }
+
+  /* The only option we want to examine is data model option.  */
+  if (cr16_data_model)
+    {
+      if (strcmp (cr16_data_model, "medium") == 0)
+	data_model = DM_DEFAULT;
+      else if (strcmp (cr16_data_model, "near") == 0)
+	data_model = DM_NEAR;
+      else if (strcmp (cr16_data_model, "far") == 0)
+	{
+	  if (TARGET_CR16CP)
+	    data_model = DM_FAR;
+	  else
+	    error ("data-model=far not valid for cr16c architecture");
+	}
+      else
+	error ("invalid data model option -mdata-model=%s", cr16_data_model);
+    }
+  else
+    data_model = DM_DEFAULT;
+}
+
+/* Implements the macro  TARGET_CONDITIONAL_REGISTER_USAGE.  */
+static void
+cr16_conditional_register_usage (void)
+{
+  if (flag_pic)
+    {
+      fixed_regs[12] = call_used_regs[12] = 1;
+    }
+}
+
+/* Stack layout and calling conventions routines.  */
+
+/* Return nonzero if the current function being compiled is an interrupt
+   function as specified by the "interrupt" attribute.  */
+int
+cr16_interrupt_function_p (void)
+{
+  tree attributes;
+
+  attributes = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  return (lookup_attribute ("interrupt", attributes) != NULL_TREE);
+}
+
+/* Compute values for the array current_frame_info.save_regs and the variable 
+   current_frame_info.reg_size. The index of current_frame_info.save_regs 
+   is numbers of register, each will get 1 if we need to save it in the 
+   current function, 0 if not. current_frame_info.reg_size is the total sum 
+   of the registers being saved.  */
+static void
+cr16_compute_save_regs (void)
+{
+  unsigned int regno;
+
+  /* Initialize here so in case the function is no-return it will be -1.  */
+  current_frame_info.last_reg_to_save = -1;
+
+  /* Initialize the number of bytes to be saved. */
+  current_frame_info.reg_size = 0;
+
+  /* No need to save any registers if the function never returns.  */
+  if (FUNC_IS_NORETURN_P (current_function_decl) && !MUST_SAVE_REGS_P ())
+    return;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      if (fixed_regs[regno])
+	{
+	  current_frame_info.save_regs[regno] = 0;
+	  continue;
+	}
+
+      /* If this reg is used and not call-used (except RA), save it.  */
+      if (cr16_interrupt_function_p ())
+	{
+	  if (!crtl->is_leaf && call_used_regs[regno])
+	    /* This is a volatile reg in a non-leaf interrupt routine - save 
+	       it for the sake of its sons.  */
+	    current_frame_info.save_regs[regno] = 1;
+	  else if (df_regs_ever_live_p (regno))
+	    /* This reg is used - save it.  */
+	    current_frame_info.save_regs[regno] = 1;
+	  else
+	    /* This reg is not used, and is not a volatile - don't save.  */
+	    current_frame_info.save_regs[regno] = 0;
+	}
+      else
+	{
+	  /* If this reg is used and not call-used (except RA), save it.  */
+	  if (df_regs_ever_live_p (regno)
+	      && (!call_used_regs[regno] || regno == RETURN_ADDRESS_REGNUM))
+	    current_frame_info.save_regs[regno] = 1;
+	  else
+	    current_frame_info.save_regs[regno] = 0;
+	}
+    }
+
+  /* Save registers so the exception handler can modify them.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0;; ++i)
+	{
+	  regno = EH_RETURN_DATA_REGNO (i);
+	  if (INVALID_REGNUM == regno)
+	    break;
+	  current_frame_info.save_regs[regno] = 1;
+	}
+    }
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (current_frame_info.save_regs[regno] == 1)
+      {
+	current_frame_info.last_reg_to_save = regno;
+	if (regno >= CR16_FIRST_DWORD_REGISTER)
+	  current_frame_info.reg_size += CR16_UNITS_PER_DWORD;
+	else
+	  current_frame_info.reg_size += UNITS_PER_WORD;
+      }
+}
+
+/* Compute the size of the local area and the size to be adjusted by the
+   prologue and epilogue.  */
+static void
+cr16_compute_frame (void)
+{
+  /* For aligning the local variables.  */
+  int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  int padding_locals;
+
+  /* Padding needed for each element of the frame.  */
+  current_frame_info.var_size = get_frame_size ();
+
+  /* Align to the stack alignment.  */
+  padding_locals = current_frame_info.var_size % stack_alignment;
+  if (padding_locals)
+    padding_locals = stack_alignment - padding_locals;
+
+  current_frame_info.var_size += padding_locals;
+  current_frame_info.total_size = current_frame_info.var_size 
+			          + (ACCUMULATE_OUTGOING_ARGS
+			             ? crtl->outgoing_args_size : 0);
+}
+
+/* Implements the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
+int
+cr16_initial_elimination_offset (int from, int to)
+{
+  /* Compute this since we need to use current_frame_info.reg_size.  */
+  cr16_compute_save_regs ();
+
+  /* Compute this since we need to use current_frame_info.var_size.  */
+  cr16_compute_frame ();
+
+  if (((from) == FRAME_POINTER_REGNUM) && ((to) == STACK_POINTER_REGNUM))
+    return (ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0);
+  else if (((from) == ARG_POINTER_REGNUM) && ((to) == FRAME_POINTER_REGNUM))
+    return (current_frame_info.reg_size + current_frame_info.var_size);
+  else if (((from) == ARG_POINTER_REGNUM) && ((to) == STACK_POINTER_REGNUM))
+    return (current_frame_info.reg_size + current_frame_info.var_size 
+	    + (ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0));
+  else
+    gcc_unreachable ();
+}
+
+/* Register Usage.  */
+
+/* Return the class number of the smallest class containing reg number REGNO.
+   This could be a conditional expression or could index an array.  */
+enum reg_class
+cr16_regno_reg_class (int regno)
+{
+  if ((regno >= 0) && (regno < CR16_FIRST_DWORD_REGISTER))
+    return SHORT_REGS;
+
+  if ((regno >= CR16_FIRST_DWORD_REGISTER) && (regno < FIRST_PSEUDO_REGISTER))
+    return LONG_REGS;
+
+  return NO_REGS;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+int
+cr16_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  if ((GET_MODE_SIZE (mode) >= 4) && (regno == 11))
+    return 0;
+ 
+  if (mode == DImode || mode == DFmode)
+    {
+      if ((regno > 8) || (regno & 1))
+	return 0;
+      return 1;
+    }
+
+  if ((TARGET_INT32)
+       && ((regno >= 12) && (GET_MODE_SIZE (mode) < 4 )))
+     return 0;
+
+  /* CC can only hold CCmode values.  */
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return 0;
+  return 1;
+}
+
+/* Returns register number for function return value.*/
+static inline unsigned int
+cr16_ret_register (void)
+{
+  return 0;
+}
+
+/* Implements hook TARGET_STRUCT_VALUE_RTX.  */
+static rtx
+cr16_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+                       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, cr16_ret_register ());
+}
+
+/* Returning function value.  */
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.  */
+static bool
+cr16_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == cr16_ret_register ());
+}
+
+/* Create an RTX representing the place where a
+   library function returns a value of mode MODE.  */
+static rtx
+cr16_libcall_value (enum machine_mode mode,
+		    const_rtx func ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, cr16_ret_register ());
+}
+
+/* Create an RTX representing the place where a
+   function returns a value of data type VALTYPE.  */
+static rtx
+cr16_function_value (const_tree type,
+		     const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (type), cr16_ret_register ());
+}
+
+/* Passing function arguments.  */
+
+/* If enough param regs are available for passing the param of type TYPE return
+   the number of registers needed else 0.  */
+static int
+enough_regs_for_param (CUMULATIVE_ARGS * cum, const_tree type,
+		       enum machine_mode mode)
+{
+  int type_size;
+  int remaining_size;
+
+  if (mode != BLKmode)
+    type_size = GET_MODE_BITSIZE (mode);
+  else
+    type_size = int_size_in_bytes (type) * BITS_PER_UNIT;
+
+  remaining_size = BITS_PER_WORD * (MAX_REG_FOR_PASSING_ARGS
+				    - (MIN_REG_FOR_PASSING_ARGS + cum->ints) +
+				    1);
+
+  /* Any variable which is too big to pass in two registers, will pass on
+     stack.  */
+  if ((remaining_size >= type_size) && (type_size <= 2 * BITS_PER_WORD))
+    return (type_size + BITS_PER_WORD - 1) / BITS_PER_WORD;
+
+  return 0;
+}
+
+/* Implements the macro FUNCTION_ARG defined in cr16.h.  */
+static rtx
+cr16_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  cum->last_parm_in_reg = 0;
+
+  /* function_arg () is called with this type just after all the args have 
+     had their registers assigned. The rtx that function_arg returns from 
+     this type is supposed to pass to 'gen_call' but currently it is not 
+     implemented (see macro GEN_CALL).  */
+  if (type == void_type_node)
+    return NULL_RTX;
+
+  if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0))
+    return NULL_RTX;
+
+  if (mode == BLKmode)
+    {
+      /* Enable structures that need padding bytes at the end to pass to a
+         function in registers.  */
+      if (enough_regs_for_param (cum, type, mode) != 0)
+	{
+	  cum->last_parm_in_reg = 1;
+	  return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints);
+	}
+    }
+
+  if ((MIN_REG_FOR_PASSING_ARGS + cum->ints) > MAX_REG_FOR_PASSING_ARGS)
+    return NULL_RTX;
+  else
+    {
+      if (enough_regs_for_param (cum, type, mode) != 0)
+	{
+	  cum->last_parm_in_reg = 1;
+	  return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints);
+	}
+    }
+
+  return NULL_RTX;
+}
+
+/* Implements the macro INIT_CUMULATIVE_ARGS defined in cr16.h.  */
+void
+cr16_init_cumulative_args (CUMULATIVE_ARGS * cum, tree fntype,
+			   rtx libfunc ATTRIBUTE_UNUSED)
+{
+  tree param, next_param;
+
+  cum->ints = 0;
+
+  /* Determine if this function has variable arguments.  This is indicated by
+     the last argument being 'void_type_mode' if there are no variable
+     arguments.  Change here for a different vararg.  */
+  for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
+       param != NULL_TREE; param = next_param)
+    {
+      next_param = TREE_CHAIN (param);
+      if ((next_param == NULL_TREE) && (TREE_VALUE (param) != void_type_node))
+	{
+	  cum->ints = -1;
+	  return;
+	}
+    }
+}
+
+/* Implements the macro FUNCTION_ARG_ADVANCE defined in cr16.h.  */
+static void
+cr16_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS * cum = get_cumulative_args (cum_v);
+
+  /* l holds the number of registers required.  */
+  int l = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+
+  /* If the parameter isn't passed on a register don't advance cum.  */
+  if (!cum->last_parm_in_reg)
+    return;
+
+  if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0))
+    return;
+
+  if ((mode == SImode) || (mode == HImode)
+      || (mode == QImode) || (mode == DImode))
+    {
+      if (l <= 1)
+	cum->ints += 1;
+      else
+	cum->ints += l;
+    }
+  else if ((mode == SFmode) || (mode == DFmode))
+    cum->ints += l;
+  else if ((mode) == BLKmode)
+    {
+      if ((l = enough_regs_for_param (cum, type, mode)) != 0)
+	cum->ints += l;
+    }
+  return;
+}
+
+/* Implements the macro FUNCTION_ARG_REGNO_P defined in cr16.h.
+   Return nonzero if N is a register used for passing parameters.  */
+int
+cr16_function_arg_regno_p (int n)
+{
+  return ((n <= MAX_REG_FOR_PASSING_ARGS) && (n >= MIN_REG_FOR_PASSING_ARGS));
+}
+
+/* Addressing modes. 
+   Following set of function implement the macro GO_IF_LEGITIMATE_ADDRESS
+   defined in cr16.h.  */
+
+/* Helper function to check if is a valid base register that can
+   hold address.  */
+static int
+cr16_addr_reg_p (rtx addr_reg)
+{
+  rtx reg;
+
+  if (REG_P (addr_reg))
+    reg = addr_reg;
+  else if ((GET_CODE (addr_reg) == SUBREG)
+	   && REG_P (SUBREG_REG (addr_reg))
+	   && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (addr_reg)))
+	       <= UNITS_PER_WORD))
+    reg = SUBREG_REG (addr_reg);
+  else
+    return FALSE;
+
+  if (GET_MODE (reg) != Pmode)
+    return FALSE;
+
+  return TRUE;
+}
+
+/* Helper functions: Created specifically for decomposing operand of CONST
+   Recursively look into expression x for code or data symbol.
+   The function expects the expression to contain combination of 
+   SYMBOL_REF, CONST_INT, (PLUS or MINUS)
+   LABEL_REF, CONST_INT, (PLUS or MINUS)
+   SYMBOL_REF
+   LABEL_REF
+   All other combinations will result in code = -1 and data = ILLEGAL_DM
+   code data
+   -1   ILLEGAL_DM   The expression did not contain SYMBOL_REF or LABEL_REF
+    0   DM_FAR       SYMBOL_REF was found and it was far data reference. 
+    0   DM_DEFAULT   SYMBOL_REF was found and it was medium data reference. 
+    1   ILLEGAL_DM   LABEL_REF was found. 
+    2   ILLEGAL_DM   SYMBOL_REF was found and it was function reference.  */
+void
+cr16_decompose_const (rtx x, int *code, enum data_model_type *data,
+		      bool treat_as_const)
+{
+  *code = -1;
+  *data = ILLEGAL_DM;
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      *code = SYMBOL_REF_FUNCTION_P (x) ? 2 : 0;
+      /* 2 indicates func sym.  */
+      if (*code == 0)
+	{
+	  if (CR16_TARGET_DATA_NEAR)
+	    *data = DM_DEFAULT;
+	  else if (CR16_TARGET_DATA_MEDIUM)
+	    *data = DM_FAR;
+	  else if (CR16_TARGET_DATA_FAR)
+	    {
+	      if (treat_as_const)
+		/* This will be used only for printing 
+		   the qualifier. This call is (may be)
+		   made by cr16_print_operand_address.  */
+		*data = DM_FAR;
+	      else
+		/* This call is (may be) made by 
+		   cr16_legitimate_address_p.  */
+		*data = ILLEGAL_DM;
+	    }
+	}
+      return;
+
+    case LABEL_REF:
+      /* 1 - indicates non-function symbol.  */
+      *code = 1;
+      return;
+
+    case PLUS:
+    case MINUS:
+      /* Look into the tree nodes.  */
+      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	cr16_decompose_const (XEXP (x, 1), code, data, treat_as_const);
+      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	cr16_decompose_const (XEXP (x, 0), code, data, treat_as_const);
+      return;
+    default:
+      return;
+    }
+}
+
+/* Decompose Address
+   This function decomposes the address returns the type of address
+   as defined in enum cr16_addrtype.  It also fills the parameter *out.
+   The decomposed address can be used for two purposes.  One to 
+   check if the address is valid and second to print the address
+   operand.
+
+   Following tables list valid address supported in CR16C/C+ architectures.
+   Legend: 
+   aN : Absoulte address N-bit address
+   R  : One 16-bit register
+   RP : Consecutive two 16-bit registers or one 32-bit register
+   I  : One 32-bit register
+   dispN : Signed displacement of N-bits
+
+   ----Code addresses----
+   Branch operands:
+   disp9        : CR16_ABSOLUTE       (disp)
+   disp17       : CR16_ABSOLUTE       (disp)
+   disp25       : CR16_ABSOLUTE       (disp)
+   RP + disp25  : CR16_REGP_REL       (base, disp)
+
+   Jump operands:
+   RP           : CR16_REGP_REL       (base, disp=0)
+   a24          : CR16_ABSOLUTE       (disp)
+
+   ----Data addresses----
+   a20          : CR16_ABSOLUTE       (disp)                near (1M)
+   a24          : CR16_ABSOLUTE       (disp)                medium  (16M)
+   R  + d20     : CR16_REG_REL        (base,  disp)         near (1M+64K)
+   RP + d4      : CR16_REGP_REL       (base,  disp)         far  (4G)
+   RP + d16     : CR16_REGP_REL       (base,  disp)         far  (4G)
+   RP + d20     : CR16_REGP_REL       (base,  disp)         far  (4G)
+   I            : *** Valid but port does not support this
+   I  + a20     : *** Valid but port does not support this
+   I  + RP + d14: CR16_INDEX_REGP_REL (base,  index, disp)  far  (4G)
+   I  + RP + d20: CR16_INDEX_REGP_REL (base,  index, disp)  far  (4G)
+
+   Decomposing Data model in case of absolute address.
+
+   Target Option             Address type Resultant Data ref type
+   ----------------------    ------------ -----------------------
+   CR16_TARGET_MODEL_NEAR    ABS20        DM_DEFAULT
+   CR16_TARGET_MODEL_NEAR    IMM20        DM_DEFAULT
+   CR16_TARGET_MODEL_NEAR    ABS24        Invalid
+   CR16_TARGET_MODEL_NEAR    IMM32        Invalid
+
+   CR16_TARGET_MODEL_MEDIUM  ABS20        DM_DEFAULT
+   CR16_TARGET_MODEL_MEDIUM  IMM20        DM_DEFAULT
+   CR16_TARGET_MODEL_MEDIUM  ABS24        DM_FAR
+   CR16_TARGET_MODEL_MEDIUM  IMM32        Invalid
+
+   CR16_TARGET_MODEL_FAR     ABS20        DM_DEFAULT
+   CR16_TARGET_MODEL_FAR     IMM20        DM_DEFAULT
+   CR16_TARGET_MODEL_FAR     ABS24        DM_FAR
+   CR16_TARGET_MODEL_FAR     IMM32        DM_FAR.  */
+enum cr16_addrtype
+cr16_decompose_address (rtx addr, struct cr16_address *out,
+			bool debug_print, bool treat_as_const)
+{
+  rtx base = NULL_RTX, disp = NULL_RTX, index = NULL_RTX;
+  enum data_model_type data = ILLEGAL_DM;
+  int code = -1;
+  enum cr16_addrtype retval = CR16_INVALID;
+
+  switch (GET_CODE (addr))
+    {
+    case CONST_INT:
+      /* Absolute address (known at compile time).  */
+      code = 0;
+      if (debug_print)
+	fprintf (stderr, "\ncode:%d", code);
+      disp = addr;
+
+      if (debug_print)
+	{
+	  fprintf (stderr, "\ndisp:");
+	  debug_rtx (disp);
+	}
+
+      if (UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 20))
+	{
+	  data = DM_DEFAULT;
+	  if (debug_print)
+	    fprintf (stderr, "\ndata:%d", data);
+	  retval = CR16_ABSOLUTE;
+	}
+      else if (UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 24))
+	{
+	  if (!CR16_TARGET_DATA_NEAR)
+	    {
+	      data = DM_FAR;
+	      if (debug_print)
+		fprintf (stderr, "\ndata:%d", data);
+	      retval = CR16_ABSOLUTE;
+	    }
+	  else
+	    return CR16_INVALID;	/* ABS24 is not support in NEAR model.  */
+	}
+      else
+	return CR16_INVALID;
+      break;
+
+    case CONST:
+      /* A CONST is an expression of PLUS or MINUS with 
+	 CONST_INT, SYMBOL_REF or LABEL_REF. This is the
+	 result of assembly-time arithmetic computation.  */
+      retval = CR16_ABSOLUTE;
+      disp = addr;
+      /* Call the helper function to check the validity.  */
+      cr16_decompose_const (XEXP (addr, 0), &code, &data, treat_as_const);
+      if ((code == 0) && (data == ILLEGAL_DM))
+	/* CONST is not valid code or data address.  */
+	return CR16_INVALID;
+      if (debug_print)
+	{
+	  fprintf (stderr, "\ndisp:");
+	  debug_rtx (disp);
+	  fprintf (stderr, "\ncode:%d", code);
+	  fprintf (stderr, "\ndata:%d", data);
+	}
+      break;
+
+    case LABEL_REF:
+      retval = CR16_ABSOLUTE;
+      disp = addr;
+      /* 1 - indicates non-function symbol.  */
+      code = 1;
+      if (debug_print)
+	{
+	  fprintf (stderr, "\ndisp:");
+	  debug_rtx (disp);
+	  fprintf (stderr, "\ncode:%d", code);
+	}
+      break;
+
+    case SYMBOL_REF:
+      /* Absolute address (known at link time).  */
+      retval = CR16_ABSOLUTE;
+      disp = addr;
+      /* This is a code address if symbol_ref is a function.  */
+      /* 2 indicates func sym.  */
+      code = SYMBOL_REF_FUNCTION_P (addr) ? 2 : 0;
+      if (debug_print)
+	{
+	  fprintf (stderr, "\ndisp:");
+	  debug_rtx (disp);
+	  fprintf (stderr, "\ncode:%d", code);
+	}
+      /* If not function ref then check if valid data ref.  */
+      if (code == 0)
+	{
+	  if (CR16_TARGET_DATA_NEAR)
+	    data = DM_DEFAULT;
+	  else if (CR16_TARGET_DATA_MEDIUM)
+	    data = DM_FAR;
+	  else if (CR16_TARGET_DATA_FAR)
+	    {
+	      if (treat_as_const)
+		/* This will be used only for printing the 
+		   qualifier. This call is (may be) made
+		   by cr16_print_operand_address.  */
+		data = DM_FAR;
+	      else
+		/* This call is (may be) made by 
+		   cr16_legitimate_address_p.  */
+		return CR16_INVALID;
+	    }
+	  else
+	    data = DM_DEFAULT;
+	}
+      if (debug_print)
+	fprintf (stderr, "\ndata:%d", data);
+      break;
+
+    case REG:
+    case SUBREG:
+      /* Register relative address.  */
+      /* Assume REG fits in a single register.  */
+      retval = CR16_REG_REL;
+      if (GET_MODE_BITSIZE (GET_MODE (addr)) > BITS_PER_WORD)
+	if (!LONG_REG_P (REGNO (addr)))
+	  /* REG will result in reg pair.  */
+	  retval = CR16_REGP_REL;
+      base = addr;
+      if (debug_print)
+	{
+	  fprintf (stderr, "\nbase:");
+	  debug_rtx (base);
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (addr, 0)))
+	{
+	case REG:
+	case SUBREG:
+	  /* REG + DISP20.  */
+	  /* All Reg relative addresses having a displacement needs 
+	     to fit in 20-bits.  */
+	  disp = XEXP (addr, 1);
+	  if (debug_print)
+	    {
+	      fprintf (stderr, "\ndisp:");
+	      debug_rtx (disp);
+	    }
+	  switch (GET_CODE (XEXP (addr, 1)))
+	    {
+	    case CONST_INT:
+	      /* Shall fit in 20-bits.  */
+	      if (!UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 20))
+		return CR16_INVALID;
+	      code = 0;
+	      if (debug_print)
+		fprintf (stderr, "\ncode:%d", code);
+	      break;
+
+	    case UNSPEC:
+	      switch (XINT (XEXP (addr, 1), 1))
+		{
+		case UNSPEC_LIBRARY_OFFSET:
+		default:
+		  gcc_unreachable ();
+		}
+	      break;
+
+	    case LABEL_REF:
+	    case SYMBOL_REF:
+	    case CONST:
+	      /* This is also a valid expression for address.
+	         However, we cannot ascertain if the resultant
+	         displacement will be valid 20-bit value.  Therefore, 
+	         lets not allow such an expression for now.  This will 
+	         be updated when  we find a way to validate this 
+	         expression as legitimate address. 
+	         Till then fall through CR16_INVALID.  */
+	    default:
+	      return CR16_INVALID;
+	    }
+
+	  /* Now check if REG can fit into single or pair regs.  */
+	  retval = CR16_REG_REL;
+	  base = XEXP (addr, 0);
+	  if (debug_print)
+	    {
+	      fprintf (stderr, "\nbase:");
+	      debug_rtx (base);
+	    }
+	  if (GET_MODE_BITSIZE (GET_MODE ((XEXP (addr, 0)))) > BITS_PER_WORD)
+	    {
+	      if (!LONG_REG_P (REGNO ((XEXP (addr, 0)))))
+		/* REG will result in reg pair.  */
+		retval = CR16_REGP_REL;
+	    }
+	  break;
+
+	case PLUS:
+	  /* Valid expr: 
+	     plus
+	      /\
+	     /  \
+	     plus idx
+	      /\
+	     /  \
+	     reg  const_int
+
+	     Check if the operand 1 is valid index register.  */
+	  data = ILLEGAL_DM;
+	  if (debug_print)
+	    fprintf (stderr, "\ndata:%d", data);
+	  switch (GET_CODE (XEXP (addr, 1)))
+	    {
+	    case REG:
+	    case SUBREG:
+	      if (!REG_OK_FOR_INDEX_P (XEXP (addr, 1)))
+		return CR16_INVALID;
+	      /* OK. REG is a valid index register.  */
+	      index = XEXP (addr, 1);
+	      if (debug_print)
+		{
+		  fprintf (stderr, "\nindex:");
+		  debug_rtx (index);
+		}
+	      break;
+	    default:
+	      return CR16_INVALID;
+	    }
+	  /* Check if operand 0 of operand 0 is REGP.  */
+	  switch (GET_CODE (XEXP (XEXP (addr, 0), 0)))
+	    {
+	    case REG:
+	    case SUBREG:
+	      /* Now check if REG is a REGP and not in LONG regs.  */
+	      if (GET_MODE_BITSIZE (GET_MODE (XEXP (XEXP (addr, 0), 0)))
+		  > BITS_PER_WORD)
+		{
+		  if (REGNO (XEXP (XEXP (addr, 0), 0))
+		      >= CR16_FIRST_DWORD_REGISTER)
+		    return CR16_INVALID;
+		  base = XEXP (XEXP (addr, 0), 0);
+		  if (debug_print)
+		    {
+		      fprintf (stderr, "\nbase:");
+		      debug_rtx (base);
+		    }
+		}
+	      else
+		return CR16_INVALID;
+	      break;
+	    default:
+	      return CR16_INVALID;
+	    }
+	  /* Now check if the operand 1 of operand 0 is const_int.  */
+	  if (GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+	    {
+	      disp = XEXP (XEXP (addr, 0), 1);
+	      if (debug_print)
+		{
+		  fprintf (stderr, "\ndisp:");
+		  debug_rtx (disp);
+		}
+	      if (!UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 20))
+		return CR16_INVALID;
+	    }
+	  else
+	    return CR16_INVALID;
+	  retval = CR16_INDEX_REGP_REL;
+	  break;
+	default:
+	  return CR16_INVALID;
+	}
+      break;
+
+    default:
+      return CR16_INVALID;
+    }
+
+  /* Check if the base and index registers are valid.  */
+  if (base && !(cr16_addr_reg_p (base)))
+    return CR16_INVALID;
+  if (base && !(CR16_REG_OK_FOR_BASE_P (base)))
+    return CR16_INVALID;
+  if (index && !(REG_OK_FOR_INDEX_P (index)))
+    return CR16_INVALID;
+
+  /* Write the decomposition to out parameter.  */
+  out->base = base;
+  out->disp = disp;
+  out->index = index;
+  out->data = data;
+  out->code = code;
+
+  return retval;
+}
+
+/* Return non-zero value if 'x' is legitimate PIC operand
+   when generating PIC code.  */
+int
+legitimate_pic_operand_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      return 0;
+      break;
+    case LABEL_REF:
+      return 0;
+      break;
+    case CONST:
+      /* REVISIT: Use something like symbol_referenced_p.  */
+      if (GET_CODE (XEXP (x, 0)) == PLUS
+	  && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	      || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
+	  && (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
+	return 0;
+      break;
+    case MEM:
+      return legitimate_pic_operand_p (XEXP (x, 0));
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
+/* Convert a non-PIC address in `orig' to a PIC address in `reg'.
+
+     Input            Output (-f pic)        Output (-f PIC)
+     orig             reg
+                                                                                                                             
+     C1   symbol           symbol@BRO (r12)        symbol@GOT (r12)
+                                                                                                                             
+     C2   symbol + offset  symbol+offset@BRO (r12) symbol+offset@GOT (r12)
+                                                                                                                             
+     NOTE: @BRO is added using unspec:BRO
+     NOTE: @GOT is added using unspec:GOT.  */
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
+			rtx reg)
+{
+  /* First handle a simple SYMBOL_REF or LABEL_REF.  */
+  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
+    {
+      if (reg == 0)
+	reg = gen_reg_rtx (Pmode);
+
+      if (flag_pic == NEAR_PIC)
+	{
+	  /* Unspec to handle -fpic option.  */
+	  emit_insn (gen_unspec_bro_addr (reg, orig));
+	  emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+	}
+      else if (flag_pic == FAR_PIC)
+	{
+	  /* Unspec to handle -fPIC option.  */
+	  emit_insn (gen_unspec_got_addr (reg, orig));
+	}
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      /* To handle (symbol + offset).  */
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				       base == reg ? 0 : reg);
+
+      /* REVISIT: Optimize for const-offsets.  */
+      emit_insn (gen_addsi3 (reg, base, offset));
+
+      return reg;
+    }
+  return orig;
+}
+
+/* Implementation of TARGET_LEGITIMATE_ADDRESS_P.  */
+static bool
+cr16_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx addr, bool strict)
+{
+  enum cr16_addrtype addrtype;
+  struct cr16_address address;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr,
+	       "\n======\nTARGET_LEGITIMATE_ADDRESS_P, mode = %s, strict = %d",
+	       GET_MODE_NAME (mode), strict);
+      debug_rtx (addr);
+    }
+  addrtype = cr16_decompose_address (addr, &address,
+				     (TARGET_DEBUG_ADDR ? 1 : 0), FALSE);
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      const char *typestr;
+
+      switch (addrtype)
+	{
+	case CR16_INVALID:
+	  typestr = "invalid";
+	  break;
+	case CR16_ABSOLUTE:
+	  typestr = "absolute";
+	  break;
+	case CR16_REG_REL:
+	  typestr = "register relative";
+	  break;
+	case CR16_REGP_REL:
+	  typestr = "register pair relative";
+	  break;
+	case CR16_INDEX_REGP_REL:
+	  typestr = "index + register pair relative";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      fprintf (stderr, "\ncr16 address type: %s\n", typestr);
+    }
+
+  if (addrtype == CR16_INVALID)
+    return FALSE;
+
+  if (strict)
+    {
+      if (address.base
+	  && !REGNO_MODE_OK_FOR_BASE_P (REGNO (address.base), mode))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    fprintf (stderr, "base register not strict\n");
+	  return FALSE;
+	}
+      if (address.index && !REGNO_OK_FOR_INDEX_P (REGNO (address.index)))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    fprintf (stderr, "index register not strict\n");
+	  return FALSE;
+	}
+    }
+
+  /* Return true if addressing mode is register relative.  */
+  if (flag_pic)
+    {
+      if (addrtype == CR16_REG_REL || addrtype == CR16_REGP_REL)
+	return TRUE;
+      else
+	return FALSE;
+    }
+
+  return TRUE;
+}
+
+/* Routines to compute costs.  */
+
+/* Return cost of the memory address x.  */
+static int
+cr16_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  enum cr16_addrtype addrtype;
+  struct cr16_address address;
+  int cost = 2;
+
+  addrtype = cr16_decompose_address (addr, &address, 0, FALSE);
+
+  gcc_assert (addrtype != CR16_INVALID);
+
+  /* CR16_ABSOLUTE            : 3
+     CR16_REG_REL  (disp !=0) : 4
+     CR16_REG_REL  (disp ==0) : 5
+     CR16_REGP_REL (disp !=0) : 6
+     CR16_REGP_REL (disp ==0) : 7
+     CR16_INDEX_REGP_REL (disp !=0) : 8
+     CR16_INDEX_REGP_REL (disp ==0) : 9.  */
+  switch (addrtype)
+    {
+    case CR16_ABSOLUTE:
+      cost += 1;
+      break;
+    case CR16_REGP_REL:
+      cost += 2;
+      /* Fall through.  */
+    case CR16_REG_REL:
+      cost += 3;
+      if (address.disp)
+	cost -= 1;
+      break;
+    case CR16_INDEX_REGP_REL:
+      cost += 7;
+      if (address.disp)
+	cost -= 1;
+    default:
+      break;
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n======\nmacro TARGET_ADDRESS_COST = %d\n", cost);
+      debug_rtx (addr);
+    }
+
+  return cost;
+}
+
+
+/* Implement `TARGET_REGISTER_MOVE_COST'.  */
+static int
+cr16_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from ATTRIBUTE_UNUSED, reg_class_t to)
+{
+  return (to != GENERAL_REGS ? 8 : 2);
+}
+
+/* Implement `TARGET_MEMORY_MOVE_COST'.  */
+
+/* Return the cost of moving data of mode MODE between a register of class
+   CLASS and memory; IN is zero if the value is to be written to memory,
+   nonzero if it is to be read in. This cost is relative to those in
+   REGISTER_MOVE_COST.  */
+static int
+cr16_memory_move_cost (enum machine_mode mode,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  /* One LD or ST takes twice the time of a simple reg-reg move.  */
+  if (reg_classes_intersect_p (rclass, GENERAL_REGS))
+    return (4 * HARD_REGNO_NREGS (0, mode));
+  else
+    return (100);
+}
+
+/* Instruction output.  */
+
+/* Check if a const_double is ok for cr16 store-immediate instructions.  */
+int
+cr16_const_double_ok (rtx op)
+{
+  if (GET_MODE (op) == SFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l;
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+      return UNSIGNED_INT_FITS_N_BITS (l, 4) ? 1 : 0;
+    }
+
+  return ((UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_LOW (op), 4)) &&
+	  (UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_HIGH (op), 4))) ? 1 : 0;
+}
+
+/* Returns bit position of first 0 or 1 bit.
+   It is safe to assume val as 16-bit wide.  */
+int
+cr16_operand_bit_pos (int val, int bitval)
+{
+  int i;
+  if (bitval == 0)
+    val = ~val;
+
+  for (i = 0; i < 16; i++)
+    if (val & (1 << i))
+      break;
+  return i;
+}
+
+/* Implements the macro PRINT_OPERAND defined in cr16.h.  */
+static void
+cr16_print_operand (FILE * file, rtx x, int code)
+{
+  int ptr_dereference = 0;
+
+  switch (code)
+    {
+    case 'd':
+      {
+	const char *cr16_cmp_str;
+	switch (GET_CODE (x))
+	  {
+	    /* MD: compare (reg, reg or imm) but CR16: cmp (reg or imm, reg)
+	       -> swap all non symmetric ops.  */
+	  case EQ:
+	    cr16_cmp_str = "eq";
+	    break;
+	  case NE:
+	    cr16_cmp_str = "ne";
+	    break;
+	  case GT:
+	    cr16_cmp_str = "lt";
+	    break;
+	  case GTU:
+	    cr16_cmp_str = "lo";
+	    break;
+	  case LT:
+	    cr16_cmp_str = "gt";
+	    break;
+	  case LTU:
+	    cr16_cmp_str = "hi";
+	    break;
+	  case GE:
+	    cr16_cmp_str = "le";
+	    break;
+	  case GEU:
+	    cr16_cmp_str = "ls";
+	    break;
+	  case LE:
+	    cr16_cmp_str = "ge";
+	    break;
+	  case LEU:
+	    cr16_cmp_str = "hs";
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	fprintf (file, "%s", cr16_cmp_str);
+	return;
+      }
+    case '$':
+      putc ('$', file);
+      return;
+
+    case 'p':
+      if (GET_CODE (x) == REG)
+	{
+	  /* For Push instructions, we should not print register pairs.  */
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  return;
+	}
+      break;
+
+    case 'b':
+      /* Print the immediate address for bal 
+         'b' is used instead of 'a' to avoid compiler calling
+         the GO_IF_LEGITIMATE_ADDRESS which cannot
+         perform checks on const_int code addresses as it
+         assumes all const_int are data addresses.  */
+      fprintf (file, "0x%lx", INTVAL (x));
+      return;
+
+    case 'r':
+      /* Print bit position of first 0.  */
+      fprintf (file, "%d", cr16_operand_bit_pos (INTVAL (x), 0));
+      return;
+
+    case 's':
+      /* Print bit position of first 1.  */
+      fprintf (file, "%d", cr16_operand_bit_pos (INTVAL (x), 1));
+      return;
+    case 'g':
+      /* 'g' is used for implicit mem: dereference.  */
+      ptr_dereference = 1;
+    case 'f':
+    case 0:
+      /* default.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (GET_MODE_BITSIZE (GET_MODE (x)) > BITS_PER_WORD)
+	    {
+	      if (LONG_REG_P (REGNO (x)))
+		fprintf (file, "(%s)", reg_names[REGNO (x)]);
+	      else
+		fprintf (file, "(%s,%s)", reg_names[REGNO (x) + 1],
+			 reg_names[REGNO (x)]);
+	    }
+	  else
+	    fprintf (file, "%s", reg_names[REGNO (x)]);
+	  return;
+
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  return;
+
+	case CONST_DOUBLE:
+	  {
+	    REAL_VALUE_TYPE r;
+	    long l;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+	    fprintf (file, "$0x%lx", l);
+	    return;
+	  }
+	case CONST_INT:
+	  {
+	    fprintf (file, "$%ld", INTVAL (x));
+	    return;
+	  }
+	case UNSPEC:
+	  switch (XINT (x, 1))
+	    {
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	default:
+	  if (!ptr_dereference)
+	    {
+	      putc ('$', file);
+	    }
+	  cr16_print_operand_address (file, x);
+	  return;
+	}
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+
+  gcc_unreachable ();
+}
+
+/* Implements the macro PRINT_OPERAND_ADDRESS defined in cr16.h.  */
+
+static void
+cr16_print_operand_address (FILE * file, rtx addr)
+{
+  enum cr16_addrtype addrtype;
+  struct cr16_address address;
+
+  /* Decompose the address. Also ask it to treat address as constant.  */
+  addrtype = cr16_decompose_address (addr, &address, 0, TRUE);
+
+  if (address.disp && GET_CODE (address.disp) == UNSPEC)
+    {
+      debug_rtx (addr);
+    }
+
+  switch (addrtype)
+    {
+    case CR16_REG_REL:
+      if (address.disp)
+	{
+	  if (GET_CODE (address.disp) == UNSPEC)
+	    cr16_print_operand (file, address.disp, 0);
+	  else
+	    output_addr_const (file, address.disp);
+	}
+      else
+	fprintf (file, "0");
+      fprintf (file, "(%s)", reg_names[REGNO (address.base)]);
+      break;
+
+    case CR16_ABSOLUTE:
+      if (address.disp)
+	output_addr_const (file, address.disp);
+      else
+	fprintf (file, "0");
+      break;
+
+    case CR16_INDEX_REGP_REL:
+      fprintf (file, "[%s]", reg_names[REGNO (address.index)]);
+      /* Fall through.  */
+    case CR16_REGP_REL:
+      if (address.disp)
+	{
+	  if (GET_CODE (address.disp) == UNSPEC)
+	    cr16_print_operand (file, address.disp, 0);
+	  else
+	    output_addr_const (file, address.disp);
+	}
+      else
+	fprintf (file, "0");
+      fprintf (file, "(%s,%s)", reg_names[REGNO (address.base) + 1],
+	       reg_names[REGNO (address.base)]);
+      break;
+    default:
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+  /* Add qualifiers to the address expression that was just printed.  */
+  if (flag_pic < NEAR_PIC && address.code == 0)
+    {
+      if (address.data == DM_FAR)
+	/* Addr contains SYMBOL_REF & far data ptr.  */
+	fprintf (file, "@l");
+      else if (address.data == DM_DEFAULT)
+	/* Addr contains SYMBOL_REF & medium data ptr.  */
+	fprintf (file, "@m");
+      /* Addr contains SYMBOL_REF & medium data ptr.  */
+      else if (address.data == DM_NEAR)
+	/* Addr contains SYMBOL_REF & near data ptr.  */
+	fprintf (file, "@s");
+    }
+  else if (flag_pic == NEAR_PIC
+	   && (address.code == 0) && (address.data == DM_FAR
+				      || address.data == DM_DEFAULT
+				      || address.data == DM_NEAR))
+    {
+      fprintf (file, "@l");
+    }
+  else if (flag_pic == NEAR_PIC && address.code == 2)
+    {
+      fprintf (file, "pic");
+    }
+  else if (flag_pic == NEAR_PIC && address.code == 1)
+    {
+      fprintf (file, "@cpic");
+    }
+
+  else if (flag_pic == FAR_PIC && address.code == 2)
+    {
+      /* REVISIT: cr16 register indirect jump expects a 1-bit right shifted
+         address ! GOTc tells assembler this symbol is a text-address 
+         This needs to be fixed in such a way that this offset is done 
+         only in the case where an address is being used for indirect jump
+         or call. Determining the potential usage of loadd is of course not
+         possible always. Eventually, this has to be fixed in the 
+         processor.  */
+      fprintf (file, "GOT (%s)", reg_names[PIC_OFFSET_TABLE_REGNUM]);
+    }
+  else if (flag_pic == FAR_PIC && address.code == 1)
+    {
+      fprintf (file, "@cGOT (%s)", reg_names[PIC_OFFSET_TABLE_REGNUM]);
+    }
+
+  else if (flag_pic == FAR_PIC &&
+	   (address.data == DM_FAR || address.data == DM_DEFAULT
+	    || address.data == DM_NEAR))
+    {
+      fprintf (file, "@GOT (%s)", reg_names[PIC_OFFSET_TABLE_REGNUM]);
+    }
+}
+
+/* Machine description helper functions.  */
+
+/* Called from cr16.md. The return value depends on the parameter push_or_pop:
+   When push_or_pop is zero -> string for push instructions of prologue.
+   When push_or_pop is nonzero -> string for pop/popret/retx in epilogue.
+   Relies on the assumptions:
+   1. RA is the last register to be saved.
+   2. The maximal value of the counter is MAX_COUNT.  */
+char *
+cr16_prepare_push_pop_string (int push_or_pop)
+{
+  /* j is the number of registers being saved, takes care that there won't be
+     more than 8 in one push/pop instruction.  */
+
+  /* For the register mask string.  */
+  static char one_inst_str[50];
+
+  /* i is the index of current_frame_info.save_regs[], going from 0 until 
+     current_frame_info.last_reg_to_save.  */
+  int i, start_reg;
+  int word_cnt;
+  int print_ra;
+  char *return_str;
+
+  /* For reversing on the push instructions if there are more than one.  */
+  char *temp_str;
+
+  return_str = (char *) xmalloc (160);
+  temp_str = (char *) xmalloc (160);
+
+  /* Initialize.  */
+  memset (return_str, 0, 3);
+
+  i = 0;
+  while (i <= current_frame_info.last_reg_to_save)
+    {
+      /* Prepare mask for one instruction.  */
+      one_inst_str[0] = 0;
+
+      /* To count number of words in one instruction.  */
+      word_cnt = 0;
+      start_reg = i;
+      print_ra = 0;
+      while ((word_cnt < MAX_COUNT) 
+	     && (i <= current_frame_info.last_reg_to_save))
+	{
+	  /* For each non consecutive save register, 
+	     a new instruction shall be generated.  */
+	  if (!current_frame_info.save_regs[i])
+	    {
+	      /* Move to next reg and break.  */
+	      ++i;
+	      break;
+	    }
+
+	  if (i == RETURN_ADDRESS_REGNUM)
+	    print_ra = 1;
+	  else
+	    {
+	      /* Check especially if adding 2 does not cross the MAX_COUNT.  */
+	      if ((word_cnt + ((i < CR16_FIRST_DWORD_REGISTER) ? 1 : 2))
+		  >= MAX_COUNT)
+		break;
+	      /* Increase word count by 2 for long registers except RA.   */
+	      word_cnt += ((i < CR16_FIRST_DWORD_REGISTER) ? 1 : 2);
+	    }
+	  ++i;
+	}
+
+      /* No need to generate any instruction as
+         no register or RA needs to be saved.  */
+      if ((word_cnt == 0) && (print_ra == 0))
+	continue;
+
+      /* Now prepare the instruction operands.  */
+      if (word_cnt > 0)
+	{
+	  sprintf (one_inst_str, "$%d, %s", word_cnt, reg_names[start_reg]);
+	  if (print_ra)
+	    strcat (one_inst_str, ", ra");
+	}
+      else
+	strcat (one_inst_str, "ra");
+
+      if (push_or_pop == 1)
+	{
+	  /* Pop instruction.  */
+	  if (print_ra && !cr16_interrupt_function_p ()
+	      && !crtl->calls_eh_return)
+	    /* Print popret if RA is saved and its not a interrupt 
+	       function.  */
+	    strcpy (temp_str, "\n\tpopret\t");
+	  else
+	    strcpy (temp_str, "\n\tpop\t");
+
+	  strcat (temp_str, one_inst_str);
+
+	  /* Add the pop instruction list.  */
+	  strcat (return_str, temp_str);
+	}
+      else
+	{
+	  /* Push instruction.  */
+	  strcpy (temp_str, "\n\tpush\t");
+	  strcat (temp_str, one_inst_str);
+
+	  /* We need to reverse the order of the instructions if there
+	     are more than one. (since the pop will not be reversed in 
+	     the epilogue.  */
+	  strcat (temp_str, return_str);
+	  strcpy (return_str, temp_str);
+	}
+    }
+
+  if (push_or_pop == 1)
+    {
+      /* POP.  */
+      if (cr16_interrupt_function_p ())
+	strcat (return_str, "\n\tretx\n");
+      else if (crtl->calls_eh_return)
+	{
+	  /* Add stack adjustment before returning to exception handler
+	     NOTE: EH_RETURN_STACKADJ_RTX must refer to (r5, r4).  */
+	  strcat (return_str, "\n\taddd\t (r5, r4), (sp)\t\n");
+	  strcat (return_str, "\n\tjump\t (ra)\n");
+
+	  /* But before anything else, undo the adjustment addition done in
+	     cr16_expand_epilogue ().  */
+	  strcpy (temp_str, "\n\tsubd\t (r5, r4), (sp)\t\n");
+	  strcat (temp_str, return_str);
+	  strcpy (return_str, temp_str);
+	}
+      else if (!FUNC_IS_NORETURN_P (current_function_decl)
+	       && !(current_frame_info.save_regs[RETURN_ADDRESS_REGNUM]))
+	strcat (return_str, "\n\tjump\t (ra)\n");
+    }
+
+  /* Skip the newline and the tab in the start of return_str.  */
+  return_str += 2;
+  return return_str;
+}
+
+
+/* Generate DWARF2 annotation for multi-push instruction.  */
+static void
+cr16_create_dwarf_for_multi_push (rtx insn)
+{
+  rtx dwarf, reg, tmp;
+  int i, j, from, to, word_cnt, dwarf_par_index, inc;
+  enum machine_mode mode;
+  int num_regs = 0, offset = 0, split_here = 0, total_push_bytes = 0;
+
+  for (i = 0; i <= current_frame_info.last_reg_to_save; ++i)
+    {
+      if (current_frame_info.save_regs[i])
+	{
+	  ++num_regs;
+	  if (i < CR16_FIRST_DWORD_REGISTER)
+	    total_push_bytes += 2;
+	  else
+	    total_push_bytes += 4;
+	}
+    }
+
+  if (!num_regs)
+    return;
+
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
+  dwarf_par_index = num_regs;
+
+  from = current_frame_info.last_reg_to_save + 1;
+  to = current_frame_info.last_reg_to_save;
+  word_cnt = 0;
+
+  for (i = current_frame_info.last_reg_to_save; i >= 0;)
+    {
+      if (!current_frame_info.save_regs[i] || 0 == i || split_here)
+	{
+	  /* This block of regs is pushed in one instruction.  */
+	  if (0 == i && current_frame_info.save_regs[i])
+	    from = 0;
+
+	  for (j = to; j >= from; --j)
+	    {
+	      if (j < CR16_FIRST_DWORD_REGISTER)
+		{
+		  mode = HImode;
+		  inc = 1;
+		}
+	      else
+		{
+		  mode = SImode;
+		  inc = 2;
+		}
+	      reg = gen_rtx_REG (mode, j);
+	      offset += 2 * inc;
+	      tmp = gen_rtx_SET (VOIDmode,
+				 gen_frame_mem (mode,
+						plus_constant
+						(Pmode, stack_pointer_rtx,
+						 total_push_bytes - offset)),
+				 reg);
+	      RTX_FRAME_RELATED_P (tmp) = 1;
+	      XVECEXP (dwarf, 0, dwarf_par_index--) = tmp;
+	    }
+	  from = i;
+	  to = --i;
+	  split_here = 0;
+	  word_cnt = 0;
+	  continue;
+	}
+
+      if (i != RETURN_ADDRESS_REGNUM)
+	{
+	  inc = (i < CR16_FIRST_DWORD_REGISTER) ? 1 : 2;
+	  if (word_cnt + inc >= MAX_COUNT || FRAME_POINTER_REGNUM == i)
+	    {
+	      split_here = 1;
+	      from = i;
+	      continue;
+	    }
+	  word_cnt += inc;
+	}
+
+      from = i--;
+    }
+
+  tmp = gen_rtx_SET (SImode, stack_pointer_rtx,
+		     gen_rtx_PLUS (SImode, stack_pointer_rtx,
+				   GEN_INT (-offset)));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+}
+
+/*
+CompactRISC CR16 Architecture stack layout:
+
+     0 +---------------------
+    |
+    .
+    .
+    |
+    +==================== Sp (x) = Ap (x+1)
+      A | Args for functions
+      | | called by X and      Dynamically
+      | | Dynamic allocations  allocated and
+      | | (alloca, variable    deallocated
+  Stack | length arrays).
+  grows +-------------------- Fp (x)
+  down| | Local variables of X
+  ward| +--------------------
+      | | Regs saved for X-1
+      | +==================== Sp (x-1) = Ap (x)
+    | Args for func X
+    | pushed by X-1
+    +-------------------- Fp (x-1)
+    |
+    |
+    V
+*/
+void
+cr16_expand_prologue (void)
+{
+  rtx insn;
+
+  cr16_compute_frame ();
+  cr16_compute_save_regs ();
+
+  /* If there is no need in push and adjustment to sp, return.  */
+  if ((current_frame_info.total_size + current_frame_info.reg_size) == 0)
+    return;
+
+  if (current_frame_info.last_reg_to_save != -1)
+    {
+      /* If there are registers to push.  */
+      insn = emit_insn (gen_push_for_prologue
+			(GEN_INT (current_frame_info.reg_size)));
+      cr16_create_dwarf_for_multi_push (insn);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+
+  if (current_frame_info.total_size > 0)
+    {
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (-current_frame_info.total_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (frame_pointer_needed)
+    {
+      /* Initialize the frame pointer with the value of the stack pointer
+         pointing now to the locals.  */
+      insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+    }
+}
+
+/* Generate insn that updates the stack for local variables and padding 
+   for registers we save.   - Generate the appropriate return insn.  */
+void
+cr16_expand_epilogue (void)
+{
+  rtx insn;
+
+  /* Nonzero if we need to return and pop only RA. This will generate a
+     different insn. This differentiate is for the peepholes for call as 
+     last statement in function.  */
+  int only_popret_RA = (current_frame_info.save_regs[RETURN_ADDRESS_REGNUM]
+			&& (current_frame_info.reg_size 
+			    == CR16_UNITS_PER_DWORD));
+  
+  if (frame_pointer_needed)
+    {
+      /* Restore the stack pointer with the frame pointers value.  */
+      insn = emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+    }
+
+  if (current_frame_info.total_size > 0)
+    {
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (current_frame_info.total_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      /* Add this here so that (r5, r4) is actually loaded with the adjustment
+         value; otherwise, the load might be optimized away...
+         NOTE: remember to subtract the adjustment before popping the regs
+         and add it back before returning.  */
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    EH_RETURN_STACKADJ_RTX));
+    }
+
+  if (cr16_interrupt_function_p ())
+    {
+      insn = emit_jump_insn (gen_interrupt_return ());
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else if (crtl->calls_eh_return)
+    {
+      /* Special case, pop what's necessary, adjust SP and jump to (RA).  */
+      insn = emit_jump_insn (gen_pop_and_popret_return 
+			     (GEN_INT (current_frame_info.reg_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else if (current_frame_info.last_reg_to_save == -1)
+    /* Nothing to pop.  */
+    /* Don't output jump for interrupt routine, only retx.  */
+    emit_jump_insn (gen_jump_return ());
+  else if (only_popret_RA)
+    {
+      insn = emit_jump_insn (gen_popret_RA_return ());
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      insn = emit_jump_insn (gen_pop_and_popret_return 
+			     (GEN_INT (current_frame_info.reg_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Implements FRAME_POINTER_REQUIRED.  */
+static bool
+cr16_frame_pointer_required (void)
+{
+  return (cfun->calls_alloca || crtl->calls_eh_return
+	  || cfun->has_nonlocal_label || crtl->calls_eh_return);
+}
+
+static bool
+cr16_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true);
+}
+
+
+/* A C compound statement that attempts to replace X with
+   a valid memory address for an operand of mode MODE. WIN
+   will be a C statement label elsewhere in the code.
+   X will always be the result of a call to break_out_memory_refs (),
+   and OLDX will be the operand that was given to that function to
+   produce X.
+   The code generated by this macro should not alter the
+   substructure of X.  If it transforms X into a more legitimate form, 
+   it should assign X (which will always be a C variable) a new value.  */
+static rtx
+cr16_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic)
+    return legitimize_pic_address (orig_x, mode, NULL_RTX);
+  else
+    return x;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P
+   Nonzero if X is a legitimate constant for an immediate
+   operand on the target machine.  You can assume that X
+   satisfies CONSTANT_P. In cr16c treat legitimize float 
+   constant as an immediate operand.  */
+static bool
+cr16_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    rtx x ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+void
+notice_update_cc (rtx exp)
+{
+  if (GET_CODE (exp) == SET)
+    {
+      /* Jumps do not alter the cc's.  */
+      if (SET_DEST (exp) == pc_rtx)
+	return;
+
+      /* Moving register or memory into a register:
+         it doesn't alter the cc's, but it might invalidate
+         the RTX's which we remember the cc's came from.
+         (Note that moving a constant 0 or 1 MAY set the cc's).  */
+      if (REG_P (SET_DEST (exp))
+	  && (REG_P (SET_SRC (exp)) || GET_CODE (SET_SRC (exp)) == MEM))
+	{
+	  return;
+	}
+
+      /* Moving register into memory doesn't alter the cc's.
+         It may invalidate the RTX's which we remember the cc's came from.  */
+      if (GET_CODE (SET_DEST (exp)) == MEM && REG_P (SET_SRC (exp)))
+	{
+	  return;
+	}
+    }
+
+  CC_STATUS_INIT;
+  return;
+}
+
+static enum machine_mode
+cr16_unwind_word_mode (void)
+{
+  return SImode;
+}
+
+/* Helper function for md file. This function is used to emit arithmetic 
+   DI instructions. The argument "num" decides which instruction to be
+   printed.  */
+const char *
+cr16_emit_add_sub_di (rtx *operands, enum rtx_code code)
+{
+  rtx lo_op[2] ;
+  rtx hi0_op[2] ;
+  rtx hi1_op[2] ;
+
+  lo_op[0] = gen_lowpart (SImode, operands[0]);
+  hi0_op[0] = simplify_gen_subreg (HImode, operands[0], DImode, 4);
+  hi1_op[0] = simplify_gen_subreg (HImode, operands[0], DImode, 6);
+
+  lo_op[1] = gen_lowpart (SImode, operands[2]);
+  hi0_op[1] = simplify_gen_subreg (HImode, operands[2], DImode, 4);
+  hi1_op[1] = simplify_gen_subreg (HImode, operands[2], DImode, 6);
+
+  switch (code)
+  {
+    case PLUS:
+      {
+	output_asm_insn ("addd\t%1, %0", lo_op) ;
+	output_asm_insn ("addcw\t%1, %0", hi0_op) ;
+	output_asm_insn ("addcw\t%1, %0", hi1_op) ;
+	break;
+      }
+    case MINUS:
+      {
+	output_asm_insn ("subd\t%1, %0", lo_op) ;
+	output_asm_insn ("subcw\t%1, %0", hi0_op) ;
+	output_asm_insn ("subcw\t%1, %0", hi1_op) ;
+	break;
+      }
+   default:
+     break;
+  }
+
+  return "";
+}
+
+
+/* Helper function for md file. This function is used to emit logical 
+   DI instructions. The argument "num" decides which instruction to be
+   printed.  */
+const char *
+cr16_emit_logical_di (rtx *operands, enum rtx_code code)
+{
+  rtx lo_op[2] ;
+  rtx hi_op[2] ;
+
+  lo_op[0] = gen_lowpart (SImode, operands[0]);
+  hi_op[0] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+
+  lo_op[1] = gen_lowpart (SImode, operands[2]);
+  hi_op[1] = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+
+  switch (code)
+  {
+    case AND:
+      {
+	output_asm_insn ("andd\t%1, %0", lo_op) ;
+	output_asm_insn ("andd\t%1, %0", hi_op) ;
+	return "";
+      }
+    case IOR:
+      {
+	output_asm_insn ("ord\t%1, %0", lo_op) ;
+	output_asm_insn ("ord\t%1, %0", hi_op) ;
+	return "";
+      }
+    case XOR:
+      {
+	output_asm_insn ("xord\t%1, %0", lo_op) ;
+	output_asm_insn ("xord\t%1, %0", hi_op) ;
+	return "";
+      }
+    default:
+      break;
+  }
+
+  return "";
+}
+
+/* Initialize 'targetm' variable which contains pointers to functions 
+   and data relating to the target machine.  */
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/cr16/cr16.h b/gcc-4.9/gcc/config/cr16/cr16.h
new file mode 100644
index 000000000..c40f7940a
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/cr16.h
@@ -0,0 +1,586 @@
+/* Definitions of target machine for GNU compiler, for CR16.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by KPIT Cummins Infosystems Limited.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+   
+#ifndef GCC_CR16_H
+#define GCC_CR16_H
+
+#define OBJECT_FORMAT_ELF
+
+/* Controlling the driver.  */
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt1.o%s crti.o%s crtbegin.o%s crtlibid.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim*:-lsim}%{!msim*:-lnosys} -) \
+%{msim*:%{!T*:-Tsim.ld}} \
+%{!T*:%{!msim*: %{-Telf32cr16.x}}}"
+
+/* Run-time target specification.  */
+#ifndef TARGET_CPU_CPP_BUILTINS
+#define TARGET_CPU_CPP_BUILTINS()          \
+do                                         \
+  {                                        \
+    builtin_define ("__CR__");             \
+    builtin_define ("__CR16__");           \
+    builtin_define ("__CR16C__");          \
+    if (TARGET_CR16CP)                     \
+      builtin_define ("__CR16CP__");       \
+    else                                   \
+      builtin_define ("__CR16CSTD__");     \
+    if (CR16_TARGET_DATA_NEAR)             \
+      builtin_define ("__DATA_NEAR__");    \
+    if (CR16_TARGET_DATA_MEDIUM)           \
+      builtin_define ("__DATA_MEDIUM__");  \
+    if (CR16_TARGET_DATA_FAR)              \
+      builtin_define ("__DATA_FAR__");     \
+    if (TARGET_INT32)                      \
+      builtin_define ("__INT32__");        \
+  }                                        \
+while (0)
+#endif
+
+/* Force the generation of dwarf .debug_frame sections even if not
+   compiling -g.  This guarantees that we can unwind the stack.  */
+#define DWARF2_FRAME_INFO 1
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Generate .file/.loc directives, so that the assembler generates the
+   line table.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#define CR16_TARGET_DATA_NEAR   cr16_is_data_model (DM_NEAR)
+#define CR16_TARGET_DATA_MEDIUM cr16_is_data_model (DM_DEFAULT)
+#define CR16_TARGET_DATA_FAR    cr16_is_data_model (DM_FAR)
+
+/* Storage layout.  */
+#define BITS_BIG_ENDIAN     0
+
+#define BYTES_BIG_ENDIAN    0
+
+#define WORDS_BIG_ENDIAN    0
+
+#define UNITS_PER_WORD      2
+
+/* Units per 32-bit (DWORD).  */
+#define CR16_UNITS_PER_DWORD 4
+
+#define POINTER_SIZE        32
+
+#define PARM_BOUNDARY       16
+
+#define STACK_BOUNDARY      (MAX (BIGGEST_ALIGNMENT, PARM_BOUNDARY))
+
+#define FUNCTION_BOUNDARY   BIGGEST_ALIGNMENT
+
+/* Biggest alignment on CR16C+ is 32-bit as internal bus is AMBA based 
+   where as CR16C is proprietary internal bus architecture.  */
+#define BIGGEST_ALIGNMENT   ((TARGET_CR16CP) ? 32 : 16)
+
+#define MAX_FIXED_MODE_SIZE 64
+
+/* In CR16 arrays of chars are word-aligned, so strcpy () will be faster.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)              \
+  (((TREE_CODE (TYPE) == ARRAY_TYPE)             \
+     && (TYPE_MODE (TREE_TYPE (TYPE)) == QImode) \
+     && ((ALIGN) < BITS_PER_WORD))               \
+     ? (BITS_PER_WORD) : (ALIGN))
+
+/* In CR16 strings are word-aligne; strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(CONSTANT, ALIGN)                            \
+  (((TREE_CODE (CONSTANT) == STRING_CST) && ((ALIGN) < BITS_PER_WORD)) \
+     ? (BITS_PER_WORD) : (ALIGN))
+
+#define STRICT_ALIGNMENT 0
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Layout of source language data types.  */
+#define INT_TYPE_SIZE       (TARGET_INT32 ? 32 : 16)
+
+#define SHORT_TYPE_SIZE     16
+
+#define LONG_TYPE_SIZE      32
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE     32
+
+#define DOUBLE_TYPE_SIZE    64
+
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE           "long unsigned int"
+
+#define PTRDIFF_TYPE        "long int"
+
+#define WCHAR_TYPE          "short unsigned int"
+
+#define WCHAR_TYPE_SIZE     16
+
+/* By default, the C++ compiler will use the lowest bit of the pointer
+   to function to indicate a pointer-to-member-function points to a
+   virtual member function.  However, in CR architecture FUNCTION_BOUNDARY
+   indicates function addresses are always even, but function pointers can be
+   odd (after right-shifting them when loading them into a register), and the
+   default doesn't work.  In that case, the lowest bit of the delta
+   field will be used (the remainder of the field is shifted to the left).  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION     ptrmemfunc_vbit_in_delta
+
+/* Define DEFAULT_PCC_STRUCT_RETURN to 1 if all structure and union return
+   values must be in memory.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Register usage.  */
+
+/* First 32-bit register is R12.  */
+#define CR16_FIRST_DWORD_REGISTER   12
+
+#define FIRST_PSEUDO_REGISTER       16
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the CR16, only the stack pointer (r15) is such.  */
+#define FIXED_REGISTERS                               \
+  {                                                   \
+  /* r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10.  */ \
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,        \
+  /* r11 r12 r13 ra  sp.  */                          \
+    0,  0,  0,  0,  1                                 \
+  }      
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+ 
+   On the CR16, calls clobbers r0-r6 (scratch registers), 
+   ra (the return address) and sp (the stack pointer).  */
+#define CALL_USED_REGISTERS                           \
+  {                                                   \
+  /* r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10.  */ \
+    1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,        \
+  /* r11 r12 r13 ra  sp.  */                          \
+    0,  0,  0,  1,  1                                 \
+  }
+
+/* Returns 1 if the register is longer than word size, 0 otherwise.  */
+#define LONG_REG_P(REGNO)                                                    \
+  (HARD_REGNO_NREGS (REGNO,                                                  \
+		     GET_MODE_WIDER_MODE (smallest_mode_for_size	     \
+					 (BITS_PER_WORD, MODE_INT))) == 1)
+
+#define HARD_REGNO_NREGS(REGNO, MODE)                                         \
+ ((REGNO >= CR16_FIRST_DWORD_REGISTER)                                        \
+  ? ((GET_MODE_SIZE (MODE) + CR16_UNITS_PER_DWORD - 1) / CR16_UNITS_PER_DWORD)\
+  : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD       - 1) / UNITS_PER_WORD))
+
+/* Nonzero if it is permissible to store a value of mode @var{mode} in hard
+   register number @var{regno} (or in several registers starting with that
+   one).  On the CR16 architecture, all registers can hold all modes,
+   except that double precision floats (and double ints) must fall on
+   even-register boundaries.  */ 
+#define HARD_REGNO_MODE_OK(REGNO, MODE) cr16_hard_regno_mode_ok (REGNO, MODE)
+
+#define NOTICE_UPDATE_CC(EXP, INSN) \
+   notice_update_cc ((EXP))
+
+/* Interrupt functions can only use registers that have already been 
+   saved by the prologue, even if they would normally be call-clobbered 
+   Check if sizes are same and then check if it is possible to rename.  */
+#define HARD_REGNO_RENAME_OK(SRC, DEST)                 \
+  (!cr16_interrupt_function_p () || (df_regs_ever_live_p (DEST)))
+
+/* Exception handling stuff.  */
+
+/*To ensure correct dwarf unwinding.  */
+#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((optimize ("no-gcse","no-dse")))
+
+#define gen_rtx_RA	gen_rtx_REG (Pmode, RETURN_ADDRESS_REGNUM)
+
+/* Use (r8,r7) and (r10,r9) to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N*2 + 7) : INVALID_REGNUM)
+
+#define DWARF2_UNWIND_INFO 1
+
+/* (r5,r4) holds a stack adjustment for returning to a handler.  */
+#define EH_RETURN_STACKADJ_RTX 		gen_rtx_REG (Pmode, 4)
+
+#define EH_RETURN_HANDLER_RTX \
+  gen_rtx_MEM (Pmode, plus_constant (Pmode, arg_pointer_rtx, -4))
+
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_RA
+
+#define DWARF_FRAME_RETURN_COLUMN	\
+  DWARF_FRAME_REGNUM (RETURN_ADDRESS_REGNUM)
+
+#define INCOMING_FRAME_SP_OFFSET		0	
+#define FRAME_POINTER_CFA_OFFSET(FNDECL)	0	
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME) 			  		\
+  (0 == COUNT)	?  gen_rtx_PLUS (Pmode, gen_rtx_RA, gen_rtx_RA)		\
+		:  const0_rtx
+
+#define MODES_TIEABLE_P(MODE1, MODE2)  \
+  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+enum reg_class
+{
+  NO_REGS,
+  SHORT_REGS,
+  LONG_REGS,
+  NOSP_REGS,
+  DOUBLE_BASE_REGS,
+  GENERAL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES	\
+  {			\
+    "NO_REGS",		\
+    "SHORT_REGS",	\
+    "LONG_REGS",	\
+    "NOSP_REGS",	\
+    "DOUBLE_BASE_REGS",	\
+    "GENERAL_REGS",	\
+    "ALL_REGS"		\
+  }
+
+#define REG_CLASS_CONTENTS			     		\
+  {						     		\
+    {0x00000000}, /* NO_REGS		             */  	\
+    {0x00000FFF}, /* SHORT_REGS 	: 0 - 11     */   	\
+    {0x0000F000}, /* LONG_REGS 		: 12 - 15    */  	\
+    {0x00007FFF}, /* NOSP_REGS 		: 0 - 14     */   	\
+    {0x0000F555}, /* DOUBLE_BASE_REGS   : 2,4,6,8,10 */  	\
+    {0x0000FFFF}, /* GENERAL_REGS	: 0 - 15     */  	\
+    {0x0000FFFF}  /* ALL_REGS 		: 0 - 15     */  	\
+  }
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P  hook_bool_mode_true 
+
+#define REGNO_REG_CLASS(REGNO)  cr16_regno_reg_class (REGNO)
+
+#define BASE_REG_CLASS      GENERAL_REGS
+
+#define MODE_BASE_REG_CLASS(MODE) \
+  (GET_MODE_SIZE (MODE) <= 4 ?  (BASE_REG_CLASS) :  (DOUBLE_BASE_REGS))
+
+#define INDEX_REG_CLASS      LONG_REGS
+
+#define CR16_REGNO_OK_FOR_BASE_P(REGNO)                  \
+  (((REGNO) < FIRST_PSEUDO_REGISTER)                     \
+     || (reg_renumber && ((unsigned) reg_renumber[REGNO] \
+                        < FIRST_PSEUDO_REGISTER)))
+
+/* Use even-numbered reg for 64-bit accesses.  */
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE)	  \
+	(CR16_REGNO_OK_FOR_BASE_P(REGNO)  &&	  \
+	  ((GET_MODE_SIZE (MODE) > 4  &&  	  \
+	     (REGNO) < CR16_FIRST_DWORD_REGISTER) \
+	     ? (0 == ((REGNO) & 1)) 		  \
+	     : 1))
+
+/* TODO: For now lets not support index addressing mode.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO)        \
+  (((REGNO >= CR16_FIRST_DWORD_REGISTER)   \
+     && ((REGNO) < FIRST_PSEUDO_REGISTER)) \
+   || (reg_renumber                        \
+       && (((unsigned) reg_renumber[REGNO] >= CR16_FIRST_DWORD_REGISTER)  \
+            && ((unsigned) reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER))) \
+  )
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) CLASS
+
+/* The maximum number of consecutive registers of class CLASS needed to
+   hold a value of mode MODE.
+   On the CompactRISC architecture, the size of MODE in words.
+   The size of MODE in double words for the class LONG_REGS.
+
+   The following check assumes if the class is not LONG_REGS, then
+   all (NO_REGS, SHORT_REGS, NOSP_REGS and GENERAL_REGS) other classes are 
+   short.  We may have to check if this can cause any degradation in 
+   performance.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  (CLASS == LONG_REGS \
+   ? (GET_MODE_SIZE (MODE) + CR16_UNITS_PER_DWORD - 1) / CR16_UNITS_PER_DWORD\
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Macros to check the range of integers . These macros were used across
+   the port, majorly in constraints.md, predicates.md files. */
+#define SIGNED_INT_FITS_N_BITS(imm, N)           \
+  ((((imm) < ((HOST_WIDE_INT) 1 << ((N) - 1)))       \
+      && ((imm) >= -((HOST_WIDE_INT) 1 << ((N) - 1)))) ? 1 : 0)
+
+#define UNSIGNED_INT_FITS_N_BITS(imm, N) \
+  (((imm) < ((HOST_WIDE_INT) 1 << (N)) && (imm) >= (HOST_WIDE_INT) 0) ? 1 : 0)
+
+#define IN_RANGE_P(VALUE, LOW, HIGH)                            \
+  ((((HOST_WIDE_INT)(VALUE)) >= (HOST_WIDE_INT)(LOW))           \
+   && (((HOST_WIDE_INT)(VALUE)) <= ((HOST_WIDE_INT)(HIGH))))
+
+#define IN_RAN(VALUE, LOW, HIGH)                             \
+  (((((HOST_WIDE_INT)(VALUE)) >= (HOST_WIDE_INT)(LOW))       \
+   && (((HOST_WIDE_INT)(VALUE)) <= ((HOST_WIDE_INT)(HIGH)))) ? 1 : 0)
+
+/* This check is for sbit/cbit instruction.  */
+#define OK_FOR_Z(OP) \
+  ((GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == CONST_INT) \
+   || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == REG) \
+   || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == PLUS \
+       && GET_CODE (XEXP ((XEXP (OP, 0)), 0)) == REG \
+       && GET_CODE (XEXP ((XEXP (OP, 0)), 1)) == CONST_INT))
+
+/* Stack layout and calling conventions.  */
+#define STACK_GROWS_DOWNWARD
+
+#define STARTING_FRAME_OFFSET   0
+
+#define STACK_POINTER_REGNUM    15
+
+#define FRAME_POINTER_REGNUM    13
+
+#define ARG_POINTER_REGNUM      12
+
+#define STATIC_CHAIN_REGNUM     1
+
+#define RETURN_ADDRESS_REGNUM   14
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define ELIMINABLE_REGS                            \
+  {                                                \
+    { ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM}, \
+    { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM}, \
+    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}  \
+  }
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)              \
+  do                                                              \
+    {                                                             \
+      (OFFSET) = cr16_initial_elimination_offset ((FROM), (TO));  \
+    }                                                             \
+  while (0)
+
+/* Passing function arguments.  */
+
+#define ACCUMULATE_OUTGOING_ARGS 0
+
+#define PUSH_ARGS 1
+
+#define PUSH_ROUNDING(BYTES) (((BYTES) + 1) & ~1)
+
+#ifndef CUMULATIVE_ARGS
+struct cumulative_args
+{
+  int ints;
+  int last_parm_in_reg;
+};
+
+#define CUMULATIVE_ARGS struct cumulative_args
+#endif
+
+/* On the CR16 architecture, Varargs routines should receive their parameters 
+   on the stack.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  cr16_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME))
+
+#define FUNCTION_ARG_REGNO_P(REGNO)  cr16_function_arg_regno_p (REGNO)
+
+/* Generating code for profiling - NOT IMPLEMENTED.  */
+#undef  FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM, LABELNO)      \
+{                                               \
+  sorry ("profiler support for CR16");          \
+}
+
+/* Trampolines for nested functions - NOT SUPPORTED.  */
+#define TRAMPOLINE_SIZE    16
+
+/* ADDRESSING MODES.  */
+
+#define CONSTANT_ADDRESS_P(X)       \
+  (GET_CODE (X) == LABEL_REF        \
+   || GET_CODE (X) == SYMBOL_REF    \
+   || GET_CODE (X) == CONST         \
+   || GET_CODE (X) == CONST_INT)
+
+#define MAX_REGS_PER_ADDRESS    2
+
+#define HAVE_POST_INCREMENT     0
+#define HAVE_POST_DECREMENT     0
+#define HAVE_POST_MODIFY_DISP   0
+#define HAVE_POST_MODIFY_REG    0
+
+#ifdef REG_OK_STRICT
+#define CR16_REG_OK_FOR_BASE_P(X)	CR16_REGNO_OK_FOR_BASE_P (REGNO (X))
+#define REG_MODE_OK_FOR_BASE_P(X, MODE)	\
+  REGNO_MODE_OK_FOR_BASE_P (REGNO(X), MODE)
+#define REG_OK_FOR_INDEX_P(X)   REGNO_OK_FOR_INDEX_P (REGNO (X))
+#else /* not REG_OK_STRICT.  */
+#define CR16_REG_OK_FOR_BASE_P(X)	1
+#define REG_MODE_OK_FOR_BASE_P(X, MODE)	1
+#define REG_OK_FOR_INDEX_P(X)   1
+#endif /* not REG_OK_STRICT.  */
+
+/* Assume best case (branch predicted).  */
+#define BRANCH_COST(speed_p, predictable_p)       2
+
+#define SLOW_BYTE_ACCESS  1
+
+/* It is as good or better to call a constant function address than to
+   call an address kept in a register.  */
+#define NO_FUNCTION_CSE
+
+/* Dividing the output into sections.  */
+
+#define TEXT_SECTION_ASM_OP "\t.section\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.section\t.data"
+
+#define BSS_SECTION_ASM_OP  "\t.section\t.bss"
+
+/* Position independent code (PIC).  */
+/* NEAR_PIC for -fpic option.  */
+
+#define NEAR_PIC 1
+                                      
+/* FAR_PIC for -fPIC option.  */                                                                                       
+
+#define FAR_PIC  2
+
+#define PIC_OFFSET_TABLE_REGNUM  12
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)       
+
+/* Assembler format.  */
+
+/* Character to start a comment.  */
+#define ASM_COMMENT_START "#"
+
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  asm_fprintf (STREAM, "%U%s", (*targetm.strip_name_encoding) (NAME));
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYMBOL)   \
+  do                                            \
+    {                                           \
+      const char *rn = XSTR (SYMBOL, 0);        \
+      assemble_name (STREAM, rn);               \
+      if (SYMBOL_REF_FUNCTION_P (SYMBOL))       \
+      {                                         \
+        fprintf ((STREAM), "@c");               \
+      }                                         \
+    }                                           \
+  while (0)
+
+#undef ASM_APP_ON
+#define ASM_APP_ON   "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF  "#NO_APP\n"
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION	default_elf_asm_named_section
+
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP		"\t.section\t.init"
+
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP		"\t.section\t.fini"
+
+/* Instruction output.  */
+
+#define REGISTER_NAMES                                     \
+  {                                                        \
+    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7", \
+    "r8",  "r9",  "r10", "r11", "r12", "r13", "ra",  "sp"  \
+  }
+
+/* Output of dispatch tables.  */
+
+/* Revisit. No PC relative case as label expressions are not 
+   properly supported in binutils else we could have done this:
+   #define CASE_VECTOR_PC_RELATIVE (optimize_size ? 1 : 0).  */
+#define CASE_VECTOR_PC_RELATIVE 0
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)    \
+  ((GET_MODE (BODY) == QImode)                              \
+   ? fprintf ((FILE), "\t.byte (.L%d-.L%d) >> 1\n",         \
+              VALUE, REL)                                   \
+   : fprintf ((FILE), "\t.word (.L%d-.L%d) >> 1\n",         \
+              VALUE, REL))
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  asm_fprintf ((STREAM), "\t.long\t.L%d@c\n", (VALUE))
+
+/* Alignment in assembler file.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  asm_fprintf ((STREAM), "\t.align\t%d\n", 1 << (POWER))
+
+/* Miscellaneous parameters.  */
+
+#define CASE_VECTOR_MODE  Pmode
+
+#define MOVE_MAX 4
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+#define STORE_FLAG_VALUE  1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE QImode
+
+/* Define this boolean macro(s) to indicate whether or not your architecture
+   has (un)conditional branches that can span all of memory.  It is used in
+   conjunction with an optimization that partitions hot and cold basic blocks
+   into separate sections of the executable.
+   CR16 contains branch instructions that span whole address space.  */
+#define HAS_LONG_COND_BRANCH    1
+#define HAS_LONG_UNCOND_BRANCH  1
+
+#endif /* End of GCC_CR16_H.  */
diff --git a/gcc-4.9/gcc/config/cr16/cr16.md b/gcc-4.9/gcc/config/cr16/cr16.md
new file mode 100644
index 000000000..fb2fc9bc5
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/cr16.md
@@ -0,0 +1,1084 @@
+;; GCC machine description for CR16.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by KPIT Cummins Infosystems Limited.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>. 
+
+;;  Register numbers
+(define_constants
+  [(SP_REGNUM 15); Stack pointer
+   (RA_REGNUM 14); Return address
+  ]
+)
+
+;; Predicates & Constraints
+(include "predicates.md")
+(include "constraints.md")
+
+;; UNSPEC usage
+(define_constants
+  [(UNSPEC_PIC_ADDR             0)
+   (UNSPEC_PIC_LOAD_ADDR        1)
+   (UNSPEC_LIBRARY_OFFSET       2)
+   (UNSPEC_SH_LIB_PUSH_R12      3)
+   (UNSPEC_SH_LIB_POP_R12       4)
+   (UNSPEC_RETURN_ADDR          5)
+  ]
+)
+
+;; Attributes
+(define_attr "length" "" (const_int 2))
+
+(define_asm_attributes
+  [(set_attr "length" "2")]
+)
+
+;;  Mode Macro Definitions
+(define_mode_iterator CR16IM [QI HI SI])
+(define_mode_iterator LONG   [SI SF])
+(define_mode_iterator ALLMTD [QI HI SI SF DI DF])
+(define_mode_iterator DOUBLE [DI DF])
+(define_mode_iterator SHORT  [QI HI])
+(define_mode_attr tIsa       [(QI "b") (HI "w") (SI "d") (SF "d")])
+(define_mode_attr lImmArith  [(QI "4") (HI "4") (SI "6") (SF "6")])
+(define_mode_attr lImmArithD [(QI "4") (HI "4") (SI "6") (SF "6") (DI "12") (DF "12")])
+(define_mode_attr iF         [(QI "i") (HI "i") (SI "i") (SF "F")])
+(define_mode_attr iFD        [(DI "i") (DF "F")])
+(define_mode_attr LL         [(QI "L") (HI "L")])
+(define_mode_attr shImmBits  [(QI "3") (HI "4") (SI "5")])
+
+; In QI mode we push 2 bytes instead of 1 byte.
+(define_mode_attr pushCnstr [(QI "X") (HI "<") (SI "<") (SF "<") (DI "<") (DF "<")])
+
+; tpush will be used to generate the 'number of registers to push' in the 
+; push instruction.
+(define_mode_attr tpush [(QI "1") (HI "1") (SI "2") (SF "2") (DI "4") (DF "4")])
+
+;;  Code Macro Definitions
+(define_code_attr  sIsa    [(sign_extend "")  (zero_extend "u")])
+(define_code_attr  sPat    [(sign_extend "s") (zero_extend "u")])
+(define_code_attr  szPat   [(sign_extend "")  (zero_extend "zero_")])
+(define_code_attr  szIsa   [(sign_extend "x") (zero_extend "z")])
+
+(define_code_iterator sz_xtnd    [ sign_extend       zero_extend])
+(define_code_iterator any_cond   [eq ne gt gtu lt ltu ge geu le leu])
+(define_code_iterator plusminus  [plus minus])
+
+(define_code_attr plusminus_insn [(plus "add") (minus "sub")])
+(define_code_attr plusminus_flag [(plus "PLUS") (minus "MINUS")])
+(define_code_attr comm 		 [(plus "%") (minus "")])
+
+(define_code_iterator any_logic  [and ior xor])
+(define_code_attr logic 	 [(and "and") (ior "or") (xor "xor")])
+(define_code_attr any_logic_insn [(and "and") (ior "ior") (xor "xor")])
+(define_code_attr any_logic_flag [(and "AND") (ior "IOR") (xor "XOR")])
+
+(define_mode_iterator QH 	 [QI HI])
+(define_mode_attr qh 		 [(QI "qi") (HI "hi")])
+(define_mode_attr QHsz 		 [(QI "2,2,2") (HI "2,2,4")])
+(define_mode_attr QHsuffix 	 [(QI "b") (HI "w")])
+
+
+;;  Function Prologue and Epilogue
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  {
+    cr16_expand_prologue ();
+    DONE;
+  }
+)
+
+(define_insn "push_for_prologue"
+  [(set (reg:SI SP_REGNUM)
+	(minus:SI (reg:SI SP_REGNUM)
+		  (match_operand:SI 0 "immediate_operand" "i")))]
+  "reload_completed"
+  {
+    return cr16_prepare_push_pop_string (0);
+  }
+  [(set_attr "length" "4")]
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  {
+    cr16_expand_epilogue ();
+    DONE;
+  }
+)
+
+(define_insn "pop_and_popret_return"
+  [(set (reg:SI SP_REGNUM)
+	(plus:SI (reg:SI SP_REGNUM)
+		 (match_operand:SI 0 "immediate_operand" "i")))
+   (use (reg:SI RA_REGNUM))
+   (return)]
+  "reload_completed"
+  {
+    return cr16_prepare_push_pop_string (1);
+  }
+  [(set_attr "length" "4")]
+)
+
+(define_insn "popret_RA_return"
+  [(use (reg:SI RA_REGNUM))
+   (return)]
+  "reload_completed"
+  "popret\tra"
+  [(set_attr "length" "2")]
+)
+
+;; Arithmetic Instruction  Patterns
+
+;; Addition-Subtraction "adddi3/subdi3" insns.
+(define_insn "<plusminus_insn>di3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plusminus:DI (match_operand:DI 1 "register_operand" "<comm>0")
+		      (match_operand:DI 2 "register_operand" "r")))]
+  ""
+  {
+    return cr16_emit_add_sub_di (operands, <plusminus_flag>);
+  })
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0")
+		 (match_operand:SI 2 "reg_si_int_operand" "r,M,N,O,i")))]
+  ""
+  "addd\t%2, %0"
+  [(set_attr "length" "2,2,4,4,6")]
+)
+
+;; Addition-Subtraction "addhi3/subhi3" insns.
+(define_insn "<plusminus_insn>hi3"
+  [(set (match_operand:HI 0 "register_operand" "=c,c,c")
+	(plusminus:HI (match_operand:HI 1 "register_operand" "<comm>0,0,0")
+		      (match_operand:HI 2 "reg_hi_int_operand" "c,M,N")))]
+  ""
+  "<plusminus_insn>w\t%2, %0"
+  [(set_attr "length" "2,2,4")]
+)
+
+;; Addition-Subtraction "addqi3/subqi3" insns.
+(define_insn "<plusminus_insn>qi3"
+  [(set (match_operand:QI 0 "register_operand" "=c,c")
+	(plusminus:QI (match_operand:QI 1 "register_operand" "<comm>0,0")
+		      (match_operand:QI 2 "reg_qi_int_operand" "c,M")))]
+  ""
+  "<plusminus_insn>b\t%2, %0"
+  [(set_attr "length" "2,2")]
+)
+
+;;  Subtract Instruction
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,0")
+		  (match_operand:SI 2 "reg_si_int_operand" "r,i")))]
+  ""
+  "subd\t%2, %0"
+  [(set_attr "length" "4,6")]
+)
+
+;;  Multiply and Accumulate Instructions "smachisi3/umachisi3"
+(define_insn "<sPat>maddhisi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	(mult:SI (sz_xtnd:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sz_xtnd:SI (match_operand:HI 2 "register_operand" "r")))
+	(match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_MAC"
+  "mac<sPat>w\t%1, %2, %0"
+  [(set_attr "length" "2")]
+)
+
+;;  Multiply Instructions
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=c,c,c")
+	(mult:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+		 (match_operand:HI 2 "reg_or_int_operand" "c,M,N")))]
+  ""
+  "mulw\t%2, %0"
+  [(set_attr "length" "2,2,4")]
+)
+
+(define_insn "mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=c")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (sign_extend:HI (match_operand:QI 2 "register_operand" "c"))))]
+  ""
+  "mulsb\t%2, %0"
+  [(set_attr "length" "2")]
+)
+
+;;  Bit Set/Clear Instructions
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "memory_operand" "")
+		      (match_operand 1 "immediate_operand" "")
+		      (match_operand 2 "immediate_operand" ""))
+	(match_operand 3 "immediate_operand" ""))]
+  "TARGET_BIT_OPS"
+  {
+    if (INTVAL (operands[1]) != 1)
+      FAIL;
+    if (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 15)
+      FAIL;
+    if (INTVAL (operands[3]) == 1)
+      {
+	if (GET_MODE (operands[0]) == QImode)
+	  {
+	    emit_insn (gen_set_bitqi (operands[0], operands[2]));
+	    DONE;
+	  }
+	else if (GET_MODE (operands[0]) == HImode)
+	  {
+	    emit_insn (gen_set_bithi (operands[0], operands[2]));
+	    DONE;
+	  }
+      }
+    if (INTVAL (operands[3]) == 0)
+      {
+	if (GET_MODE (operands[0]) == QImode)
+	  {
+	    emit_insn (gen_clr_bitqi (operands[0], operands[2]));
+	    DONE;
+	  }
+	else if (GET_MODE (operands[0]) == HImode)
+	  {
+	    emit_insn (gen_clr_bithi (operands[0], operands[2]));
+	    DONE;
+	  }
+      }
+  }
+)
+
+(define_insn "set_bit<mode>"
+  [(set (zero_extract:SHORT (match_operand:SHORT 0 "memory_operand" "+m")
+			    (const_int 1)
+			    (match_operand 1 "immediate_operand" "i"))
+	(const_int 1))]
+  "TARGET_BIT_OPS"
+  "sbit<tIsa>\t%1,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "clr_bit<mode>"
+  [(set (zero_extract:SHORT (match_operand:SHORT 0 "memory_operand" "+m")
+			    (const_int 1)
+			    (match_operand 1 "immediate_operand" "i"))
+	(const_int 0))]
+  "TARGET_BIT_OPS"
+  "cbit<tIsa>\t%1,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "set_bit<mode>_mem"
+  [(set (match_operand:SHORT 0 "bit_operand" "=m")
+	(ior:SHORT (match_dup 0)
+		   (match_operand:SHORT 1 "one_bit_operand" "i"))
+  )]
+  "TARGET_BIT_OPS"
+  "sbit<tIsa>\t$%s1,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "clear_bit<mode>_mem"
+  [(set (match_operand:SHORT 0 "bit_operand" "=m")
+	(and:SHORT (match_dup 0)
+		   (match_operand:SHORT 1 "rev_one_bit_operand" "i"))
+  )]
+  "TARGET_BIT_OPS"
+  "cbit<tIsa>\t$%r1,%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Logical Instructions - and/ior/xor "anddi3/iordi3/xordi3"
+(define_insn "<any_logic_insn>di3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_logic:DI (match_operand:DI 1 "register_operand" "%0")
+		      (match_operand:DI 2 "register_operand" "r")))]
+  ""
+  {
+    return cr16_emit_logical_di (operands, <any_logic_flag>);
+  })
+
+; Logical and/ior/xor "andsi3/iorsi3/xorsi3"
+(define_insn "<any_logic_insn>si3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(any_logic:SI (match_operand:SI 1 "register_operand" "%0,0,0,0")
+		      (match_operand:SI 2 "reg_si_int_operand" "r,M,N,i")))]
+  ""
+  "<logic>d\t%2, %0"
+  [(set_attr "length" "2,2,4,6")]
+)
+
+; Logical and/ior/xor in HImode "andhi3/iorhi3/xorhi3"
+; Logical and/ior/xor in QImode "andqi3/iorqi3/xorqi3"
+(define_insn "<any_logic_insn><qh>3"
+  [(set (match_operand:QH 0 "register_operand" "=c,c,c")
+	(any_logic:QH (match_operand:QH 1 "register_operand" "%0,0,0")
+		      (match_operand:QH 2 "reg_hi_int_operand" "c,M,N")))]
+  ""
+  "<logic><QHsuffix>\t%2, %0"
+  [(set_attr "length" "<QHsz>")]
+)
+
+;;  Sign and Zero Extend Instructions
+(define_insn "<szPat>extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sz_xtnd:SI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "mov<szIsa>w\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<szPat>extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sz_xtnd:HI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "mov<szIsa>b\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  One's Complement
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "0")))]
+  ""
+  {
+    rtx xoperand ;
+    int reg0 = REGNO (operands[0]);
+
+    xoperand = gen_rtx_REG (SImode, reg0 + 2);
+    output_asm_insn ("xord\t$-1, %0", operands);
+    output_asm_insn ("xord\t$-1, %0", &xoperand);
+    return "" ;
+  }
+  [(set_attr "length" "12")]
+)
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:CR16IM 0 "register_operand" "=r")
+	(not:CR16IM (match_operand:CR16IM 1 "register_operand" "0")))]
+  ""
+  "xor<tIsa>\t$-1, %0"
+  [(set_attr "length" "2")]
+)
+
+;;  Arithmetic Left and Right Shift Instructions
+(define_insn "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=c,c")
+	(ashift:QI (match_operand:QI 1 "register_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "c,I")))]
+  ""
+  "ashub\t%2, %0"
+  [(set_attr "length" "2,2")]
+)
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=c,c")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "c,J")))]
+  ""
+  "ashuw\t%2, %0"
+  [(set_attr "length" "2,2")]
+)
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "r,K")))]
+  ""
+  "ashud\t%2, %0"
+  [(set_attr "length" "2,2")]
+)
+
+(define_expand "ashr<mode>3"
+  [(set (match_operand:CR16IM 0 "register_operand" "")
+	(ashiftrt:CR16IM (match_operand:CR16IM 1 "register_operand" "")
+			 (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	/* If the constant is not in range, try placing it in a reg */
+	if (!UNSIGNED_INT_FITS_N_BITS(INTVAL (operands[2]),<shImmBits>))
+	operands[2] = copy_to_mode_reg(QImode, operands[2]);
+      }
+
+    if (GET_CODE (operands[2]) != CONST_INT)
+      operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+  }
+)
+
+(define_insn "ashrqi3_imm_insn"
+  [(set (match_operand:QI 0 "register_operand" "=c")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "shift_qi_imm_operand" "i")))]
+  ""
+  "ashub\t$%n2, %0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "ashrhi3_imm_insn"
+  [(set (match_operand:HI 0 "register_operand" "=c")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:QI 2 "shift_hi_imm_operand" "i")))]
+  ""
+  "ashuw\t$%n2, %0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "ashrsi3_imm_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:QI 2 "shift_si_imm_operand" "i")))]
+  ""
+  "ashud\t$%n2, %0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "ashrqi3_neg_insn"
+  [(set (match_operand:QI 0 "register_operand" "=c")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (neg:QI (match_operand:QI 2 "register_operand" "c"))))]
+  ""
+  "ashub\t%2,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "ashrhi3_neg_insn"
+  [(set (match_operand:HI 0 "register_operand" "=c")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (neg:QI (match_operand:QI 2 "register_operand" "c"))))]
+  ""
+  "ashuw\t%2,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "ashrdi3_neg_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (neg:QI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "ashud\t%2,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_expand "lshr<mode>3"
+  [(set (match_operand:CR16IM 0 "register_operand" "")
+	(lshiftrt:CR16IM (match_operand:CR16IM 1 "register_operand" "")
+			 (match_operand:QI 2 "reg_or_int_operand" "")))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	/* If the constant is not in range, try placing it in a reg */
+	if (!UNSIGNED_INT_FITS_N_BITS(INTVAL (operands[2]),<shImmBits>))
+	operands[2] = copy_to_mode_reg(QImode, operands[2]);
+      }
+
+	if (GET_CODE (operands[2]) != CONST_INT)
+	operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+   }
+)
+
+(define_insn "lshrqi3_imm_insn"
+  [(set (match_operand:QI 0 "register_operand" "=c")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "shift_qi_operand" "Q")))]
+  ""
+  "lshb\t$%n2, %0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "lshrhi3_imm_insn"
+  [(set (match_operand:HI 0 "register_operand" "=c")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:QI 2 "shift_hi_operand" "R")))]
+  ""
+  "lshw\t$%n2, %0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "lshrsi3_imm_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:QI 2 "shift_si_operand" "S")))]
+  ""
+  "lshd\t$%n2, %0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "lshrqi3_neg_insn"
+  [(set (match_operand:QI 0 "register_operand" "=c")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (neg:QI (match_operand:QI 2 "register_operand" "c"))))]
+  ""
+  "lshb\t%2,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "lshrhi3_neg_insn"
+  [(set (match_operand:HI 0 "register_operand" "=c")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (neg:QI (match_operand:QI 2 "register_operand" "c"))))]
+  ""
+  "lshw\t%2,%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "lshrsi3_neg_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (neg:QI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "lshd\t%2,%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Move Instructions
+
+;; Move any non-immediate operand 0 to a general operand 1.
+;; This applies only before starting the reload process
+;; Operand 0 is not a register operand of type mode MODE
+;; If Operand 0 is a push operand of type mode MODE
+;; then, if Operand 1 is a non-SP register
+;; then, Operand 1 = copy_to_mode_reg (<MODE>mode, Operand 1)
+;; endif
+;; else
+;; if Operand 1 is either register or 4-bit immediate constant
+;; then, Operand 1 = copy_to_mode_reg (<MODE>mode, Operand 1)
+;; endif
+;; endif
+;;
+;; What does copy_to_mode_reg (mode, rtx val) do?
+;; Copy the value into new temp reg and return the reg where the
+;; mode of the new reg is always mode MODE when value is constant
+;;
+;; Why should copy_to_mode_reg be called?
+;; All sorts of move are nor supported by CR16. Therefore, 
+;; when unsupported move is encountered, the additional instructions 
+;; will be introduced for the purpose.
+;;
+;; A new move insn is inserted for Op 1 when one of the following
+;; conditions is met.
+;; Case 1:  Op 0 is push_operand
+;;          Op 1 is SP register
+;;
+;; Case 2:  Op 0 is not push_operand
+;;          Op 1 is neither register nor unsigned 4-bit immediate
+
+(define_expand "mov<mode>"
+  [(set (match_operand:ALLMTD 0 "nonimmediate_operand" "")
+	(match_operand:ALLMTD 1 "general_operand" ""))]
+  ""
+  {
+    if (!(reload_in_progress || reload_completed))
+      {
+	/* Only if Op0 is a register operand.  */
+	if (!register_operand (operands[0], <MODE>mode))
+	  {
+	    if (push_operand (operands[0], <MODE>mode)) 
+	      {
+		/* Use copy_to_mode_reg only if the register needs 
+		to be pushed is SP as CR16 does not support pushing SP.  */
+		if (!nosp_reg_operand (operands[1], <MODE>mode))
+		  operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
+	      }
+	    else
+	      {
+		/* Use copy_to_mode_reg if op1 is not register operand
+		   subject to conditions inside.  */
+		if (!register_operand (operands[1], <MODE>mode))
+		  {
+		    /* CR16 does not support moving immediate to SI or SF 
+		       type memory.  */
+		    if (<MODE>mode == SImode || <MODE>mode == SFmode ||
+			<MODE>mode == DImode || <MODE>mode == DFmode)
+		      operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
+		    else
+		      /* moving imm4 is supported by CR16 instruction.  */
+		      if (!u4bits_operand (operands[1], <MODE>mode))
+			operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
+		  }
+	       }
+	  }
+
+	  /* If operand-1 is a symbol, convert it into a BRO or GOT Format.  */
+	  if (flag_pic && ! legitimate_pic_operand_p (operands[1]))
+	    {
+	      operands[1] = legitimize_pic_address (operands[1], <MODE>mode, 0);
+	    }
+      }
+  }
+)
+
+; ALLMT     : QI,HI,SI,SF
+; pushCnstr : Push constraints 
+;                QI : X
+;             HI,SI,SF,DI,DF : <
+; b         : All non-sp registers
+; tpush     : Push count
+;                QI,HI : 1
+;                SI,SF : 2
+;                DI,DF : 4
+(define_insn "push<mode>_internal"
+  [(set (match_operand:ALLMTD 0 "push_operand" "=<pushCnstr>")
+	(match_operand:ALLMTD 1 "nosp_reg_operand" "b"))]
+  ""
+  "push\t$<tpush>,%p1"
+  [(set_attr "length" "2")]
+)
+
+; (DI, DF) move
+(define_insn "*mov<mode>_double"
+  [(set (match_operand:DOUBLE 0 "nonimmediate_operand" "=r, r, r, m")
+	(match_operand:DOUBLE 1 "general_operand" "r, <iFD>, m, r"))]
+  "register_operand (operands[0], DImode) 
+   || register_operand (operands[0], DFmode)
+   || register_operand (operands[1], DImode)
+   || register_operand (operands[1], DFmode)"
+  {
+    if (0 == which_alternative) {
+      rtx xoperands[2] ;
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
+
+      xoperands[0] = gen_rtx_REG (SImode, reg0 + 2);
+      xoperands[1] = gen_rtx_REG (SImode, reg1 + 2);
+      if ((reg1 + 2) != reg0)
+	{
+	  output_asm_insn ("movd\t%1, %0", operands);
+	  output_asm_insn ("movd\t%1, %0", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("movd\t%1, %0", xoperands);
+	  output_asm_insn ("movd\t%1, %0", operands);
+	}}
+
+    else if (1 == which_alternative) {
+      rtx lo_operands[2] ;
+      rtx hi_operands[2] ;
+
+      lo_operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+      hi_operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 2);
+      lo_operands[1] = simplify_gen_subreg (SImode, operands[1],
+		       VOIDmode == GET_MODE (operands[1])
+		       ? DImode  : GET_MODE (operands[1]), 0);
+      hi_operands[1] = simplify_gen_subreg (SImode, operands[1],
+		       VOIDmode == GET_MODE (operands[1])
+		       ? DImode  : GET_MODE (operands[1]), 4);
+      output_asm_insn ("movd\t%1, %0", lo_operands);
+      output_asm_insn ("movd\t%1, %0", hi_operands);}
+
+    else if (2 == which_alternative) {
+      rtx xoperands[2] ;
+      int reg0 = REGNO (operands[0]), reg1 = -2 ;
+      rtx addr ;
+
+	if (MEM_P (operands[1]))
+	  addr = XEXP (operands[1], 0);
+	else
+	  addr = NULL_RTX ;
+	switch (GET_CODE (addr))
+	  {
+	    case REG:
+	    case SUBREG:
+	      reg1 = REGNO (addr);
+	      break ;
+	    case PLUS:
+	      switch (GET_CODE (XEXP (addr, 0))) {
+		case REG:
+		case SUBREG:
+		  reg1 = REGNO (XEXP (addr, 0));
+		  break ;
+		case PLUS:
+		  reg1 = REGNO (XEXP (XEXP (addr, 0), 0));
+		  break ;
+		default:
+		  inform (DECL_SOURCE_LOCATION (cfun->decl), "unexpected expression; addr:");
+		  debug_rtx (addr);
+		  inform (DECL_SOURCE_LOCATION (cfun->decl), "operands[1]:");
+		  debug_rtx (operands[1]);
+		  inform (DECL_SOURCE_LOCATION (cfun->decl), "generated code might now work\n");
+		  break ;}
+	      break ;
+	    default:
+	      break ;
+	  }
+
+	xoperands[0] = gen_rtx_REG (SImode, reg0 + 2);
+	xoperands[1] = offset_address (operands[1], GEN_INT (4), 2);
+	gcc_assert ((reg0 + 1) != reg1);
+	if (reg0 != reg1  &&  (reg1 + 1) != reg0)
+	  {
+	    output_asm_insn ("loadd\t%1, %0", operands);
+	    output_asm_insn ("loadd\t%1, %0", xoperands);
+	  }
+	else
+	  {
+	    output_asm_insn ("loadd\t%1, %0", xoperands);
+	    output_asm_insn ("loadd\t%1, %0", operands);
+	  }}
+    else
+      {
+	rtx xoperands[2] ;
+	xoperands[0] = offset_address (operands[0], GEN_INT (4), 2);
+	xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	output_asm_insn ("stord\t%1, %0", operands);
+   	output_asm_insn ("stord\t%1, %0", xoperands);
+      }
+    return "" ;
+  }
+  [(set_attr "length" "4, <lImmArithD>, <lImmArithD>, <lImmArithD>")]
+)
+
+; All long (SI, SF) register move, load and store operations
+; The print_operand will take care of printing the register pair 
+; when mode is SI/SF and register is in SHORT_REGS
+(define_insn "*mov<mode>_long"
+  [(set (match_operand:LONG 0 "nonimmediate_operand" "=r, r, r, m")
+	(match_operand:LONG 1 "general_operand" "r, <iF>, m, r"))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+  mov<tIsa>\t%1, %0
+  mov<tIsa>\t%1, %0
+  load<tIsa>\t%1, %0
+  stor<tIsa>\t%1, %0"
+  [(set_attr "length" "2,<lImmArith>,<lImmArith>,<lImmArith>")]
+)
+
+;; All short (QI, HI) register move, load and store operations
+(define_insn "*mov<mode>_short"
+  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r, r, r, m, m")
+	(match_operand:SHORT 1 "general_operand" "r, <iF>, m, r, <LL>"))]
+  "(register_operand (operands[0], <MODE>mode))
+    || (store_operand (operands[0], <MODE>mode)
+	&& (register_operand (operands[1], <MODE>mode)
+	    || u4bits_operand (operands[1], <MODE>mode)))"
+  "@
+  mov<tIsa>\t%1, %0
+  mov<tIsa>\t%1, %0
+  load<tIsa>\t%1, %0
+  stor<tIsa>\t%1, %0
+  stor<tIsa>\t%1, %0"
+  [(set_attr "length" "2,<lImmArith>,<lImmArith>,<lImmArith>,<lImmArith>")]
+)
+
+;;  Compare Instructions
+; Instruction generated compares the operands in reverse order
+; Therefore, while printing the asm, the reverse of the
+; compare condition shall be printed.
+(define_insn "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		      [(match_operand:CR16IM 1 "register_operand" "r,r")
+		       (match_operand:CR16IM 2 "nonmemory_operand" "r,n")])
+		       (label_ref (match_operand 3 "" ""))
+                      (pc)))
+   (clobber (cc0))]
+  ""
+  "cmp<tIsa>\t%2, %1\;b%d0\t%l3"
+  [(set_attr "length" "6,6")]
+)
+
+(define_expand "cmp<mode>"
+  [(parallel [(set (cc0)
+    (compare (match_operand:CR16IM 0 "register_operand" "")
+	     (match_operand:CR16IM 1 "nonmemory_operand" "")))
+    (clobber (match_scratch:HI 2 "=r"))] ) ]
+  ""
+  "")
+
+;;  Scond Instructions
+(define_expand "cstore<mode>4"
+  [(set (cc0)
+	(compare (match_operand:CR16IM 2 "register_operand" "")
+		 (match_operand:CR16IM 3 "nonmemory_operand" "")))
+   (set (match_operand:HI 0 "register_operand")
+	(match_operator:HI 1 "ordered_comparison_operator"
+	[(cc0) (const_int 0)]))]
+  ""
+  ""
+)
+
+(define_insn "*cmp<mode>_insn"
+  [(set (cc0)
+	(compare (match_operand:CR16IM 0 "register_operand" "r,r")
+		 (match_operand:CR16IM 1 "nonmemory_operand" "r,n")))]
+  ""
+  "cmp<tIsa>\t%1, %0"
+  [(set_attr "length" "2,4")]
+)
+
+(define_insn "sCOND_internal"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "ordered_comparison_operator"
+	[(cc0) (const_int 0)]))]
+  ""
+  "s%d1\t%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Jumps and Branches
+(define_insn "indirect_jump_return"
+  [(set (pc)
+	  (reg:SI RA_REGNUM))
+   (return)]
+  "reload_completed"
+  "jump\t (ra)"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "jump_return"
+  [(unspec:SI [(const_int 0)] UNSPEC_RETURN_ADDR)
+   (return)]
+  "reload_completed"
+  "jump\t(ra)"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "reg_or_sym_operand" "r,i"))]
+  ""
+  "@
+  jump\t%0
+  br\t%a0"
+  [(set_attr "length" "2,6")]
+)
+
+(define_insn "interrupt_return"
+  [(unspec_volatile [(const_int 0)] 0)
+   (return)]
+  ""
+  {
+    return cr16_prepare_push_pop_string (1);
+  }
+  [(set_attr "length" "14")]
+)
+
+(define_insn "jump_to_imm"
+  [(set (pc)
+	(match_operand 0 "jump_imm_operand" "i"))]
+  ""
+  "br\t%c0"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "br\t%l0"
+  [(set_attr "length" "6")]
+)
+
+;;  Table Jump
+(define_insn "tablejump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref:SI (match_operand 1 "" "")))]
+  "!flag_pic"
+  "jump\t%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Call Instructions
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  ""
+  {
+    if (flag_pic && ! legitimate_pic_operand_p (operands[0]))
+      {
+	operands[0] = gen_const_mem (QImode,
+	legitimize_pic_address (XEXP (operands[0], 0), Pmode, 0));
+	emit_call_insn (gen_cr16_call (operands[0], operands[1]));
+      }
+    else
+      emit_call_insn (gen_cr16_call (operands[0], operands[1]));
+      DONE;
+  }
+)
+
+(define_expand "cr16_call"
+  [(parallel
+    [(call (match_operand:QI 0 "memory_operand" "")
+	   (match_operand 1 "" ""))
+   (clobber (reg:SI RA_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "cr16_call_insn_branch_pic"
+  [(call (mem:QI (match_operand:SI 0 "call_imm_operand" "i"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "register_operand" "+r"))]
+  "flag_pic == FAR_PIC"
+  {
+    if (GET_CODE (operands[0]) != CONST_INT)
+      return "loadd\t%g0, %2 \n\tjal %2";
+    else
+      return "jal %2";
+  }
+  [(set_attr "length" "8")]
+)
+
+(define_insn "cr16_call_insn_branch"
+  [(call (mem:QI (match_operand:SI 0 "call_imm_operand" "i"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "register_operand" "+r"))]
+  "flag_pic == 0 || flag_pic == NEAR_PIC"
+  {
+    /* Print the immediate address for bal 
+       'b' is used instead of 'a' to avoid compiler calling
+       the GO_IF_LEGITIMATE_ADDRESS which cannot
+       perform checks on const_int code addresses as it
+       assumes all const_int are data addresses.
+    */
+    if (GET_CODE (operands[0]) != CONST_INT)
+      return "bal (ra), %a0";
+    else
+      operands[4] = GEN_INT ((INTVAL (operands[0]))>>1);
+      return "movd\t%g4,\t(r1,r0)\n\tjal\t(r1,r0)";	
+  }
+  [(set_attr "length" "6")]
+)
+
+(define_insn "cr16_call_insn_jump"
+  [(call (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "register_operand" "+r"))]
+  ""
+  "jal\t%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Call Value Instructions
+
+(define_expand "call_value"
+  [(set (match_operand 0 "general_operand" "")
+	(call (match_operand:QI 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  ""
+  {
+    if (flag_pic && !legitimate_pic_operand_p (operands[1]))
+      {
+	operands[1] = gen_const_mem (QImode,
+	legitimize_pic_address (XEXP (operands[1], 0), Pmode, 0));
+	emit_call_insn (gen_cr16_call_value (operands[0], operands[1], operands[2]));
+      }
+    else
+	emit_call_insn (gen_cr16_call_value (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "cr16_call_value"
+  [(parallel
+    [(set (match_operand 0 "general_operand" "")
+	  (call (match_operand 1 "memory_operand" "")
+		(match_operand 2 "" "")))
+     (clobber (reg:SI RA_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "cr16_call_value_insn_branch_pic"
+  [(set (match_operand 0 "" "=g")
+	(call (mem:QI (match_operand:SI 1 "call_imm_operand" "i"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "register_operand" "+r"))]
+  "flag_pic == FAR_PIC"
+  {
+    if (GET_CODE (operands[1]) != CONST_INT)
+      return "loadd\t%g1, %3 \n\tjal %3";
+    else
+      return "jal %3";
+  }
+  [(set_attr "length" "8")]
+)
+
+(define_insn "cr16_call_value_insn_branch"
+  [(set (match_operand 0 "" "=g")
+	(call (mem:QI (match_operand:SI 1 "call_imm_operand" "i"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "register_operand" "+r"))]
+  "flag_pic == 0 || flag_pic == NEAR_PIC"
+  {
+    /* Print the immediate address for bal 
+       'b' is used instead of 'a' to avoid compiler calling
+       the GO_IF_LEGITIMATE_ADDRESS which cannot
+       perform checks on const_int code addresses as it
+       assumes all const_int are data addresses.
+    */
+    if (GET_CODE (operands[1]) != CONST_INT) 
+      return "bal (ra), %a1";
+    else
+      {
+	operands[4] = GEN_INT ((INTVAL (operands[1]))>>1);
+        return "movd\t%g4,\t(r1,r0)\n\tjal\t(r1,r0)";	
+      }
+  }
+  [(set_attr "length" "6")]
+)
+
+
+(define_insn "cr16_call_value_insn_jump"
+  [(set (match_operand 0 "" "=g")
+	(call (mem:QI (match_operand:SI 1 "register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "register_operand" "+r"))]
+  ""
+  "jal\t%1"
+  [(set_attr "length" "2")]
+)
+
+
+;;  Nop
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop\t"
+)
+
+;; PIC
+/* When generating pic, we need to load the symbol offset into a register.
+   So that the optimizer does not confuse this with a normal symbol load
+   we use an unspec.  The offset will be loaded from a constant pool entry,
+   since that is the only type of relocation we can use.  */
+                                                                                                                            
+(define_insn "unspec_bro_addr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand 1 "" "")] UNSPEC_PIC_ADDR))]
+  ""
+  "movd \t%f1, %0"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "unspec_got_addr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand 1 "" "")] UNSPEC_PIC_LOAD_ADDR))]
+  ""
+  "loadd \t%g1, %0"
+  [(set_attr "length" "6")]
+)
diff --git a/gcc-4.9/gcc/config/cr16/cr16.opt b/gcc-4.9/gcc/config/cr16/cr16.opt
new file mode 100644
index 000000000..e4433cb5d
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/cr16.opt
@@ -0,0 +1,51 @@
+; Options for the National Semiconductor CR16 port of the compiler.
+; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+; Contributed by KPIT Cummins Infosystems Limited.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published
+; by the Free Software Foundation; either version 3, or (at your
+; option) any later version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msim
+Target
+-msim   Use simulator runtime
+
+mbit-ops
+Target Report Mask(BIT_OPS)
+Generate SBIT, CBIT instructions
+
+mmac
+Target Report Mask(MAC)
+Support multiply accumulate instructions
+
+mdebug-addr
+Target RejectNegative Var(TARGET_DEBUG_ADDR) Undocumented
+
+mdata-model=
+Target RejectNegative JoinedOrMissing Var(cr16_data_model)
+Treat data references as near, far or medium. medium is default
+
+mcr16c
+Target RejectNegative Mask(CR16C)
+Generate code for CR16C architecture
+
+mcr16cplus
+Target RejectNegative InverseMask(CR16C,CR16CP)
+Generate code for CR16C+ architecture (Default)
+
+mint32
+Target RejectNegative Mask(INT32)
+Treat integers as 32-bit.
+
diff --git a/gcc-4.9/gcc/config/cr16/predicates.md b/gcc-4.9/gcc/config/cr16/predicates.md
new file mode 100644
index 000000000..d998df9da
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/predicates.md
@@ -0,0 +1,225 @@
+;; Predicates of machine description for CR16.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by KPIT Cummins Infosystems Limited.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  
+
+;;  Predicates
+
+;; Predicates for sbit/cbit instructions
+;; bit operand used for the generation of bit insn generation
+(define_predicate "bit_operand"
+  (match_code "mem")
+{
+  return ((GET_CODE (op) == MEM && OK_FOR_Z (op)));
+})
+
+;; Unsigned 4-bits constant int or double value.
+(define_predicate "u4bits_operand"
+  (match_code "const_int,const_double")
+{
+  if (GET_CODE (op) == CONST_DOUBLE)
+    return cr16_const_double_ok (op);
+    return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 4)) ? 1 : 0;
+})
+
+;; Operand is a constant integer where
+;; only one bit is set to 1.
+(define_predicate "one_bit_operand"
+  (match_code "const_int")
+{
+  unsigned int val;
+
+  val = INTVAL (op);
+  if (mode == QImode) 
+    val &= 0xff;
+  else if (mode == HImode)
+    val &= 0xffff;
+  else
+    gcc_unreachable();
+
+  if (val != 0)
+    return (val & (val - 1)) == 0; /* true if only one bit is set.  */
+  else
+    return 0;
+})
+
+;; Operand is a constant integer where
+;; only one bit is set to 0.
+(define_predicate "rev_one_bit_operand"
+  (match_code "const_int")
+{
+  unsigned int val;
+
+  val = ~INTVAL (op); /* Invert and use.  */
+  if (mode == QImode) 
+    val &= 0xff;
+  else if (mode == HImode)
+    val &= 0xffff;
+  else
+    gcc_unreachable();
+
+  if (val != 0)
+    return (val & (val - 1)) == 0; /* true if only one bit is set.  */
+  else
+    return 0;
+})
+
+;; Predicates for shift instructions
+;; Immediate operand predicate for count in shift operations.
+;; Immediate shall be 3-bits in case operand to be operated on
+;; is a qi mode operand.
+(define_predicate "shift_qi_imm_operand"
+  (match_code "const_int")
+{
+  return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 3)) ? 1 : 0;
+})
+
+;; Immediate shall be 4-bits in case operand to be operated on
+;; is a hi mode operand.
+(define_predicate "shift_hi_imm_operand"
+  (match_code "const_int")
+{
+  return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 4)) ? 1 : 0;
+})
+
+;; Immediate shall be 3-bits in case operand to be operated on
+;; is a si mode operand.
+(define_predicate "shift_si_imm_operand"
+  (match_code "const_int")
+{
+  return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 5)) ? 1 : 0;
+})
+
+;; Predicates for jump/call instructions
+;; Jump immediate cannot be more than 24-bits
+(define_predicate "jump_imm_operand"
+  (match_code "const_int")
+{
+  return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 24)) ? 1 : 0;
+})
+
+;; Call immediate cannot be more than 24-bits
+(define_predicate "call_imm_operand"
+  (match_operand 0 "immediate_operand")
+{
+  if (GET_CODE (op) != CONST_INT) return 1;
+    return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 24)) ? 1 : 0;
+})
+
+;; Operand is register or 4-bit immediate operand
+(define_predicate "reg_or_u4bits_operand"
+  (ior (match_operand 0 "u4bits_operand")
+       (match_operand 0 "register_operand")))
+
+;; Operand is a register or symbol reference
+(define_predicate "reg_or_sym_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
+;; Operand is a non stack pointer register
+(define_predicate "nosp_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO (op) != SP_REGNUM")))
+
+(define_predicate "hard_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO (op) <= 15")))
+
+;; Operand is a memory reference and
+;; not a push operand.
+(define_predicate "store_operand"
+  (and (match_operand 0 "memory_operand")
+       (not (match_operand 0 "push_operand"))))
+
+;; Helper predicate 
+(define_predicate "reg_or_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "register_operand")))
+
+;;
+;;
+;; Atithmetic/logical predicates
+
+;; QI Helper
+(define_predicate "arith_qi_operand"
+   (match_code "const_int")
+{
+        return (IN_RAN(INTVAL (op), 0, 15) && ((INTVAL (op) != 9) 
+                || (INTVAL (op) != 11))) ? 1 : 0 ; 
+})
+
+;;QI Reg, subreg(reg) or const_int.
+(define_predicate "reg_qi_int_operand"
+  (ior (match_operand 0 "arith_qi_operand")
+       (match_operand 0 "register_operand")))
+
+;; HI Helper
+(define_predicate "arith_hi_operand"
+   (match_code "const_int")
+{
+        return (IN_RAN(INTVAL (op), -32768, 32768) ) ? 1 : 0 ; 
+})
+
+;;HI Reg, subreg(reg) or const_int.
+(define_predicate "reg_hi_int_operand"
+  (ior (match_operand 0 "arith_hi_operand")
+       (match_operand 0 "register_operand")))
+
+;;SI Reg, subreg(reg) or const_int.
+(define_predicate "reg_si_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "register_operand")))
+
+;;
+;; Shift predicates
+
+;; QI Helper
+(define_predicate "shift_qi_operand"
+   (match_code "const_int")
+{
+        return (IN_RAN(INTVAL (op), 0, 7) ) ? 1 : 0; 
+})
+
+;;QI Reg, subreg(reg) or const_int.
+(define_predicate "shift_reg_qi_int_operand"
+  (ior (match_operand 0 "shift_qi_operand")
+       (match_operand 0 "register_operand")))
+
+;; HI Helper
+(define_predicate "shift_hi_operand"
+   (match_code "const_int")
+{
+        return (IN_RAN(INTVAL (op), 0, 15) ) ? 1 : 0 ; 
+})
+
+;;HI Reg, subreg(reg) or const_int.
+(define_predicate "shift_reg_hi_int_operand"
+  (ior (match_operand 0 "shift_hi_operand")
+       (match_operand 0 "register_operand")))
+
+;; SI Helper
+(define_predicate "shift_si_operand"
+   (match_code "const_int")
+{
+        return (IN_RAN(INTVAL (op), 0, 31) ) ? 1 : 0; 
+})
+
+;;SI Reg, subreg(reg) or const_int.
+(define_predicate "shift_reg_si_int_operand"
+  (ior (match_operand 0 "shift_si_operand")
+       (match_operand 0 "register_operand")))
diff --git a/gcc-4.9/gcc/config/cr16/t-cr16 b/gcc-4.9/gcc/config/cr16/t-cr16
new file mode 100644
index 000000000..835841fc9
--- /dev/null
+++ b/gcc-4.9/gcc/config/cr16/t-cr16
@@ -0,0 +1,25 @@
+# CR16 Target Makefile
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Contributed by KPIT Cummins Infosystems Limited.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.     
+
+MULTILIB_OPTIONS     = fPIC mint32 
+MULTILIB_DIRNAMES    = far-pic int32 
+MULTILIB_MATCHES     =
+MULTILIB_EXTRA_OPTS  = mcr16cplus mdata-model=far
+
diff --git a/gcc-4.9/gcc/config/cris/constraints.md b/gcc-4.9/gcc/config/cris/constraints.md
new file mode 100644
index 000000000..651fbedb0
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/constraints.md
@@ -0,0 +1,164 @@
+;; Constraint definitions for CRIS.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "ACR_REGS"
+  "@internal")
+
+(define_register_constraint "b" "GENNONACR_REGS"
+  "@internal")
+
+(define_register_constraint "h" "MOF_REGS"
+  "@internal")
+
+(define_register_constraint "x" "SPECIAL_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CC0_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "MOVEQ, CMPQ, ANDQ, ORQ."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32, 31)")))
+
+(define_constraint "J"
+  "ADDQ, SUBQ."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "Kc"
+  "ASRQ, BTSTQ, LSRQ, LSLQ."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 31)")))
+
+(define_constraint "Kp"
+  "A power of two."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (ival) >= 0")))
+
+(define_constraint "L"
+  "A 16-bit signed number."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "M"
+  "The constant 0 for CLEAR."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "N"
+  "A negative ADDQ or SUBQ."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -63, -1)")))
+
+(define_constraint "O"
+  "Quickened ints, QI and HI."
+  (and (match_code "const_int")
+       (ior (match_test "IN_RANGE (ival, (65535 - 31), 65535)")
+	    (match_test "IN_RANGE (ival, (255 - 31), 255)"))))
+
+(define_constraint "P"
+  "A 16-bit number signed *or* unsigned."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 65535)")))
+
+;; Floating-point constant constraints.
+(define_constraint "G"
+  "The floating point zero constant"
+  (and (match_code "const_double")
+       (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Memory constraints.
+
+;; Just an indirect register (happens to also be "all" slottable
+;; memory addressing modes not covered by other constraints, i.e. '>').
+(define_memory_constraint "Q"
+  "@internal"
+  (and (match_code "mem")
+       (match_test "cris_base_p (XEXP (op, 0), reload_in_progress
+					       || reload_completed)")))
+
+;; Extra constraints.
+(define_constraint "R"
+  "An operand to BDAP or BIAP."
+       ;; A BIAP; r.S?
+  (ior (match_test "cris_biap_index_p (op, reload_in_progress
+					   || reload_completed)")
+       ;; A [reg] or (int) [reg], maybe with post-increment.
+       (match_test "cris_bdap_index_p (op, reload_in_progress
+					   || reload_completed)")
+       (match_test "cris_constant_index_p (op)")))
+
+(define_constraint "T"
+  "Memory three-address operand."
+  ;; All are indirect-memory:
+  (and (match_code "mem")
+	    ;; Double indirect: [[reg]] or [[reg+]]?
+       (ior (and (match_code "mem" "0")
+		 (match_test "cris_base_or_autoincr_p (XEXP (XEXP (op, 0), 0),
+						       reload_in_progress
+						       || reload_completed)"))
+	    ;; Just an explicit indirect reference: [const]?
+	    (match_test "CONSTANT_P (XEXP (op, 0))")
+	    ;; Something that is indexed; [...+...]?
+	    (and (match_code "plus" "0")
+		      ;; A BDAP constant: [reg+(8|16|32)bit offset]?
+		 (ior (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 0),
+						     reload_in_progress
+						     || reload_completed)")
+			   (match_test "cris_constant_index_p (XEXP (XEXP (op, 0), 1))"))
+		      ;; A BDAP register: [reg+[reg(+)].S]?
+		      (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 0),
+						     reload_in_progress
+						     || reload_completed)")
+			   (match_test "cris_bdap_index_p (XEXP (XEXP (op, 0), 1),
+							   reload_in_progress
+							   || reload_completed)"))
+		      ;; Same, but with swapped arguments (no canonical
+		      ;; ordering between e.g. REG and MEM as of LAST_UPDATED
+		      ;; "Thu May 12 03:59:11 UTC 2005").
+		      (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 1),
+						     reload_in_progress
+						     || reload_completed)")
+			   (match_test "cris_bdap_index_p (XEXP (XEXP (op, 0), 0),
+							   reload_in_progress
+							   || reload_completed)"))
+		      ;; A BIAP: [reg+reg.S] (MULT comes first).
+		      (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 1),
+						     reload_in_progress
+						     || reload_completed)")
+			   (match_test "cris_biap_index_p (XEXP (XEXP (op, 0), 0),
+							   reload_in_progress
+							   || reload_completed)")))))))
+
+(define_constraint "S"
+  "PIC-constructs for symbols."
+  (and (match_test "flag_pic")
+       (match_code "const")
+       (match_test "cris_valid_pic_const (op, false)")))
+
+(define_constraint "U"
+  "@internal"
+  (and (match_test "flag_pic")
+       (match_test "CONSTANT_P (op)")
+       (match_operand 0 "cris_nonmemory_operand_or_callable_symbol")))
+
diff --git a/gcc-4.9/gcc/config/cris/cris-protos.h b/gcc-4.9/gcc/config/cris/cris-protos.h
new file mode 100644
index 000000000..0fdcafe52
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/cris-protos.h
@@ -0,0 +1,67 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Axis Communications.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Prototypes for the CRIS port.  */
+
+extern bool cris_simple_epilogue (void);
+#ifdef RTX_CODE
+extern const char *cris_op_str (rtx);
+extern void cris_notice_update_cc (rtx, rtx);
+extern bool cris_reload_address_legitimized (rtx, enum machine_mode, int, int, int);
+extern int cris_side_effect_mode_ok (enum rtx_code, rtx *, int, int,
+                                     int, int, int);
+extern bool cris_cc0_user_requires_cmp (rtx);
+extern rtx cris_return_addr_rtx (int, rtx);
+extern rtx cris_split_movdx (rtx *);
+extern int cris_legitimate_pic_operand (rtx);
+extern enum cris_pic_symbol_type cris_pic_symbol_type_of (const_rtx);
+extern bool cris_valid_pic_const (const_rtx, bool);
+extern bool cris_constant_index_p (const_rtx);
+extern bool cris_base_p (const_rtx, bool);
+extern bool cris_base_or_autoincr_p (const_rtx, bool);
+extern bool cris_bdap_index_p (const_rtx, bool);
+extern bool cris_biap_index_p (const_rtx, bool);
+extern bool cris_legitimate_address_p (enum machine_mode, rtx, bool);
+extern bool cris_store_multiple_op_p (rtx);
+extern bool cris_movem_load_rest_p (rtx, int);
+extern void cris_asm_output_symbol_ref (FILE *, rtx);
+extern int cris_cfun_uses_pic_table (void);
+extern void cris_asm_output_case_end (FILE *, int, rtx);
+extern rtx cris_gen_movem_load (rtx, rtx, int);
+extern rtx cris_emit_movem_store (rtx, rtx, int, bool);
+extern void cris_expand_pic_call_address (rtx *);
+extern void cris_order_for_addsi3 (rtx *, int);
+extern void cris_emit_trap_for_misalignment (rtx);
+#endif /* RTX_CODE */
+extern void cris_asm_output_label_ref (FILE *, char *);
+extern void cris_asm_output_ident (const char *);
+extern void cris_expand_prologue (void);
+extern void cris_expand_epilogue (void);
+extern void cris_expand_return (bool);
+extern bool cris_return_address_on_stack_for_return (void);
+extern bool cris_return_address_on_stack (void);
+extern void cris_pragma_expand_mul (struct cpp_reader *);
+
+/* Need one that returns an int; usable in expressions.  */
+extern int cris_fatal (char *);
+
+extern int cris_initial_elimination_offset (int, int);
+
+extern void cris_init_expanders (void);
diff --git a/gcc-4.9/gcc/config/cris/cris.c b/gcc-4.9/gcc/config/cris/cris.c
new file mode 100644
index 000000000..209f127a6
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/cris.c
@@ -0,0 +1,4359 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "stmt.h"
+#include "expr.h"
+#include "except.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "output.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "ggc.h"
+#include "optabs.h"
+#include "df.h"
+#include "opts.h"
+#include "cgraph.h"
+
+/* Usable when we have an amount to add or subtract, and want the
+   optimal size of the insn.  */
+#define ADDITIVE_SIZE_MODIFIER(size) \
+ ((size) <= 63 ? "q" : (size) <= 255 ? "u.b" : (size) <= 65535 ? "u.w" : ".d")
+
+#define LOSE_AND_RETURN(msgid, x)			\
+  do						\
+    {						\
+      cris_operand_lossage (msgid, x);		\
+      return;					\
+    } while (0)
+
+enum cris_retinsn_type
+ { CRIS_RETINSN_UNKNOWN = 0, CRIS_RETINSN_RET, CRIS_RETINSN_JUMP };
+
+/* Per-function machine data.  */
+struct GTY(()) machine_function
+ {
+   int needs_return_address_on_stack;
+
+   /* This is the number of registers we save in the prologue due to
+      stdarg.  */
+   int stdarg_regs;
+
+   enum cris_retinsn_type return_type;
+ };
+
+/* This little fix suppresses the 'u' or 's' when '%e' in assembly
+   pattern.  */
+static char cris_output_insn_is_bound = 0;
+
+/* In code for output macros, this is how we know whether e.g. constant
+   goes in code or in a static initializer.  */
+static int in_code = 0;
+
+/* Fix for reg_overlap_mentioned_p.  */
+static int cris_reg_overlap_mentioned_p (rtx, rtx);
+
+static enum machine_mode cris_promote_function_mode (const_tree, enum machine_mode,
+						     int *, const_tree, int);
+
+static unsigned int cris_atomic_align_for_mode (enum machine_mode);
+
+static void cris_print_base (rtx, FILE *);
+
+static void cris_print_index (rtx, FILE *);
+
+static void cris_output_addr_const (FILE *, rtx);
+
+static struct machine_function * cris_init_machine_status (void);
+
+static rtx cris_struct_value_rtx (tree, int);
+
+static void cris_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+					 tree type, int *, int);
+
+static int cris_initial_frame_pointer_offset (void);
+
+static void cris_operand_lossage (const char *, rtx);
+
+static int cris_reg_saved_in_regsave_area  (unsigned int, bool);
+
+static void cris_print_operand (FILE *, rtx, int);
+
+static void cris_print_operand_address (FILE *, rtx);
+
+static bool cris_print_operand_punct_valid_p (unsigned char code);
+
+static bool cris_output_addr_const_extra (FILE *, rtx);
+
+static void cris_conditional_register_usage (void);
+
+static void cris_asm_output_mi_thunk
+  (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+
+static void cris_file_start (void);
+static void cris_init_libfuncs (void);
+
+static reg_class_t cris_preferred_reload_class (rtx, reg_class_t);
+
+static int cris_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int cris_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static bool cris_rtx_costs (rtx, int, int, int, int *, bool);
+static int cris_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static bool cris_pass_by_reference (cumulative_args_t, enum machine_mode,
+				    const_tree, bool);
+static int cris_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				   tree, bool);
+static rtx cris_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static rtx cris_function_incoming_arg (cumulative_args_t,
+				       enum machine_mode, const_tree, bool);
+static void cris_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static tree cris_md_asm_clobbers (tree, tree, tree);
+
+static void cris_option_override (void);
+
+static bool cris_frame_pointer_required (void);
+
+static void cris_asm_trampoline_template (FILE *);
+static void cris_trampoline_init (rtx, tree, rtx);
+
+static rtx cris_function_value(const_tree, const_tree, bool);
+static rtx cris_libcall_value (enum machine_mode, const_rtx);
+static bool cris_function_value_regno_p (const unsigned int);
+static void cris_file_end (void);
+
+/* This is the parsed result of the "-max-stack-stackframe=" option.  If
+   it (still) is zero, then there was no such option given.  */
+int cris_max_stackframe = 0;
+
+/* This is the parsed result of the "-march=" option, if given.  */
+int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.dword\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+/* We need to define these, since the 2byte, 4byte, 8byte op:s are only
+   available in ELF.  These "normal" pseudos do not have any alignment
+   constraints or side-effects.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND cris_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS cris_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P cris_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA cris_output_addr_const_extra
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE cris_conditional_register_usage
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK cris_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START cris_file_start
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END cris_file_end
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS cris_init_libfuncs
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P cris_legitimate_address_p
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS cris_preferred_reload_class
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST cris_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST cris_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS cris_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST cris_address_cost
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE cris_promote_function_mode
+
+#undef TARGET_ATOMIC_ALIGN_FOR_MODE
+#define TARGET_ATOMIC_ALIGN_FOR_MODE cris_atomic_align_for_mode
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX cris_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS cris_setup_incoming_varargs
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE cris_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES cris_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG cris_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG cris_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE cris_function_arg_advance
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS cris_md_asm_clobbers
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED cris_frame_pointer_required
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE cris_option_override
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE cris_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT cris_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE cris_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE cris_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P cris_function_value_regno_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Helper for cris_load_multiple_op and cris_ret_movem_op.  */
+
+bool
+cris_movem_load_rest_p (rtx op, int offs)
+{
+  unsigned int reg_count = XVECLEN (op, 0) - offs;
+  rtx src_addr;
+  int i;
+  rtx elt;
+  int setno;
+  int regno_dir = 1;
+  unsigned int regno = 0;
+
+  /* Perform a quick check so we don't blow up below.  FIXME: Adjust for
+     other than (MEM reg).  */
+  if (reg_count <= 1
+      || GET_CODE (XVECEXP (op, 0, offs)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offs)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, offs))))
+    return false;
+
+  /* Check a possible post-inc indicator.  */
+  if (GET_CODE (SET_SRC (XVECEXP (op, 0, offs + 1))) == PLUS)
+    {
+      rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 0);
+      rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 1);
+
+      reg_count--;
+
+      if (reg_count == 1
+	  || !REG_P (reg)
+	  || !REG_P (SET_DEST (XVECEXP (op, 0, offs + 1)))
+	  || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, offs + 1)))
+	  || !CONST_INT_P (inc)
+	  || INTVAL (inc) != (HOST_WIDE_INT) reg_count * 4)
+	return false;
+      i = offs + 2;
+    }
+  else
+    i = offs + 1;
+
+  if (!TARGET_V32)
+    {
+      regno_dir = -1;
+      regno = reg_count - 1;
+    }
+
+  elt = XVECEXP (op, 0, offs);
+  src_addr = XEXP (SET_SRC (elt), 0);
+
+  if (GET_CODE (elt) != SET
+      || !REG_P (SET_DEST (elt))
+      || GET_MODE (SET_DEST (elt)) != SImode
+      || REGNO (SET_DEST (elt)) != regno
+      || !MEM_P (SET_SRC (elt))
+      || GET_MODE (SET_SRC (elt)) != SImode
+      || !memory_address_p (SImode, src_addr))
+    return false;
+
+  for (setno = 1; i < XVECLEN (op, 0); setno++, i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      regno += regno_dir;
+
+      if (GET_CODE (elt) != SET
+	  || !REG_P (SET_DEST (elt))
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt)) != regno
+	  || !MEM_P (SET_SRC (elt))
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != setno * 4)
+	return false;
+    }
+
+  return true;
+}
+
+/* Worker function for predicate for the parallel contents in a movem
+   to-memory.  */
+
+bool
+cris_store_multiple_op_p (rtx op)
+{
+  int reg_count = XVECLEN (op, 0);
+  rtx dest;
+  rtx dest_addr;
+  rtx dest_base;
+  int i;
+  rtx elt;
+  int setno;
+  int regno_dir = 1;
+  int regno = 0;
+  int offset = 0;
+
+  /* Perform a quick check so we don't blow up below.  FIXME: Adjust for
+     other than (MEM reg) and (MEM (PLUS reg const)).  */
+  if (reg_count <= 1)
+    return false;
+
+  elt = XVECEXP (op, 0, 0);
+
+  if (GET_CODE (elt) != SET)
+    return  false;
+
+  dest = SET_DEST (elt);
+
+  if (!REG_P (SET_SRC (elt)) || !MEM_P (dest))
+    return false;
+
+  dest_addr = XEXP (dest, 0);
+
+  /* Check a possible post-inc indicator.  */
+  if (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS)
+    {
+      rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 0);
+      rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1);
+
+      reg_count--;
+
+      if (reg_count == 1
+	  || !REG_P (reg)
+	  || !REG_P (SET_DEST (XVECEXP (op, 0, 1)))
+	  || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, 1)))
+	  || !CONST_INT_P (inc)
+	  /* Support increment by number of registers, and by the offset
+	     of the destination, if it has the form (MEM (PLUS reg
+	     offset)).  */
+	  || !((REG_P (dest_addr)
+		&& REGNO (dest_addr) == REGNO (reg)
+		&& INTVAL (inc) == (HOST_WIDE_INT) reg_count * 4)
+	       || (GET_CODE (dest_addr) == PLUS
+		   && REG_P (XEXP (dest_addr, 0))
+		   && REGNO (XEXP (dest_addr, 0)) == REGNO (reg)
+		   && CONST_INT_P (XEXP (dest_addr, 1))
+		   && INTVAL (XEXP (dest_addr, 1)) == INTVAL (inc))))
+	return false;
+
+      i = 2;
+    }
+  else
+    i = 1;
+
+  if (!TARGET_V32)
+    {
+      regno_dir = -1;
+      regno = reg_count - 1;
+    }
+
+  if (GET_CODE (elt) != SET
+      || !REG_P (SET_SRC (elt))
+      || GET_MODE (SET_SRC (elt)) != SImode
+      || REGNO (SET_SRC (elt)) != (unsigned int) regno
+      || !MEM_P (SET_DEST (elt))
+      || GET_MODE (SET_DEST (elt)) != SImode)
+    return false;
+
+  if (REG_P (dest_addr))
+    {
+      dest_base = dest_addr;
+      offset = 0;
+    }
+  else if (GET_CODE (dest_addr) == PLUS
+	   && REG_P (XEXP (dest_addr, 0))
+	   && CONST_INT_P (XEXP (dest_addr, 1)))
+    {
+      dest_base = XEXP (dest_addr, 0);
+      offset = INTVAL (XEXP (dest_addr, 1));
+    }
+  else
+    return false;
+
+  for (setno = 1; i < XVECLEN (op, 0); setno++, i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      regno += regno_dir;
+
+      if (GET_CODE (elt) != SET
+	  || !REG_P (SET_SRC (elt))
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != (unsigned int) regno
+	  || !MEM_P (SET_DEST (elt))
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_base)
+	  || !CONST_INT_P (XEXP (XEXP (SET_DEST (elt), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != setno * 4 + offset)
+	return false;
+    }
+
+  return true;
+}
+
+/* The TARGET_CONDITIONAL_REGISTER_USAGE worker.  */
+
+static void
+cris_conditional_register_usage (void)
+{
+  /* FIXME: This isn't nice.  We should be able to use that register for
+     something else if the PIC table isn't needed.  */
+  if (flag_pic)
+    fixed_regs[PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  /* Allow use of ACR (PC in pre-V32) and tweak order.  */
+  if (TARGET_V32)
+    {
+      static const int reg_alloc_order_v32[] = REG_ALLOC_ORDER_V32;
+      unsigned int i;
+
+      fixed_regs[CRIS_ACR_REGNUM] = 0;
+
+      for (i = 0;
+          i < sizeof (reg_alloc_order_v32)/sizeof (reg_alloc_order_v32[0]);
+          i++)
+       reg_alloc_order[i] = reg_alloc_order_v32[i];
+    }
+
+  if (TARGET_HAS_MUL_INSNS)
+    fixed_regs[CRIS_MOF_REGNUM] = 0;
+
+  /* On early versions, we must use the 16-bit condition-code register,
+     which has another name.  */
+  if (cris_cpu_version < 8)
+    reg_names[CRIS_CC0_REGNUM] = "ccr";
+}
+
+/* Return crtl->uses_pic_offset_table.  For use in cris.md,
+   since some generated files do not include function.h.  */
+
+int
+cris_cfun_uses_pic_table (void)
+{
+  return crtl->uses_pic_offset_table;
+}
+
+/* Given an rtx, return the text string corresponding to the CODE of X.
+   Intended for use in the assembly language output section of a
+   define_insn.  */
+
+const char *
+cris_op_str (rtx x)
+{
+  cris_output_insn_is_bound = 0;
+  switch (GET_CODE (x))
+    {
+    case PLUS:
+      return "add";
+      break;
+
+    case MINUS:
+      return "sub";
+      break;
+
+    case MULT:
+      /* This function is for retrieving a part of an instruction name for
+	 an operator, for immediate output.  If that ever happens for
+	 MULT, we need to apply TARGET_MUL_BUG in the caller.  Make sure
+	 we notice.  */
+      internal_error ("MULT case in cris_op_str");
+      break;
+
+    case DIV:
+      return "div";
+      break;
+
+    case AND:
+      return "and";
+      break;
+
+    case IOR:
+      return "or";
+      break;
+
+    case XOR:
+      return "xor";
+      break;
+
+    case NOT:
+      return "not";
+      break;
+
+    case ASHIFT:
+      return "lsl";
+      break;
+
+    case LSHIFTRT:
+      return "lsr";
+      break;
+
+    case ASHIFTRT:
+      return "asr";
+      break;
+
+    case UMIN:
+      /* Used to control the sign/zero-extend character for the 'E' modifier.
+	 BOUND has none.  */
+      cris_output_insn_is_bound = 1;
+      return "bound";
+      break;
+
+    default:
+      return "Unknown operator";
+      break;
+  }
+}
+
+/* Emit an error message when we're in an asm, and a fatal error for
+   "normal" insns.  Formatted output isn't easily implemented, since we
+   use output_operand_lossage to output the actual message and handle the
+   categorization of the error.  */
+
+static void
+cris_operand_lossage (const char *msgid, rtx op)
+{
+  debug_rtx (op);
+  output_operand_lossage ("%s", msgid);
+}
+
+/* Print an index part of an address to file.  */
+
+static void
+cris_print_index (rtx index, FILE *file)
+{
+  /* Make the index "additive" unless we'll output a negative number, in
+     which case the sign character is free (as in free beer).  */
+  if (!CONST_INT_P (index) || INTVAL (index) >= 0)
+    putc ('+', file);
+
+  if (REG_P (index))
+    fprintf (file, "$%s.b", reg_names[REGNO (index)]);
+  else if (CONSTANT_P (index))
+    cris_output_addr_const (file, index);
+  else if (GET_CODE (index) == MULT)
+    {
+      fprintf (file, "$%s.",
+	       reg_names[REGNO (XEXP (index, 0))]);
+
+      putc (INTVAL (XEXP (index, 1)) == 2 ? 'w' : 'd', file);
+    }
+  else if (GET_CODE (index) == SIGN_EXTEND && MEM_P (XEXP (index, 0)))
+    {
+      rtx inner = XEXP (index, 0);
+      rtx inner_inner = XEXP (inner, 0);
+
+      if (GET_CODE (inner_inner) == POST_INC)
+	{
+	  fprintf (file, "[$%s+].",
+		   reg_names[REGNO (XEXP (inner_inner, 0))]);
+	  putc (GET_MODE (inner) == HImode ? 'w' : 'b', file);
+	}
+      else
+	{
+	  fprintf (file, "[$%s].", reg_names[REGNO (inner_inner)]);
+
+	  putc (GET_MODE (inner) == HImode ? 'w' : 'b', file);
+	}
+    }
+  else if (MEM_P (index))
+    {
+      rtx inner = XEXP (index, 0);
+      if (GET_CODE (inner) == POST_INC)
+	fprintf (file, "[$%s+].d", reg_names[REGNO (XEXP (inner, 0))]);
+      else
+	fprintf (file, "[$%s].d", reg_names[REGNO (inner)]);
+    }
+  else
+    cris_operand_lossage ("unexpected index-type in cris_print_index",
+			  index);
+}
+
+/* Print a base rtx of an address to file.  */
+
+static void
+cris_print_base (rtx base, FILE *file)
+{
+  if (REG_P (base))
+    fprintf (file, "$%s", reg_names[REGNO (base)]);
+  else if (GET_CODE (base) == POST_INC)
+    {
+      gcc_assert (REGNO (XEXP (base, 0)) != CRIS_ACR_REGNUM);
+      fprintf (file, "$%s+", reg_names[REGNO (XEXP (base, 0))]);
+    }
+  else
+    cris_operand_lossage ("unexpected base-type in cris_print_base",
+			  base);
+}
+
+/* Usable as a guard in expressions.  */
+
+int
+cris_fatal (char *arg)
+{
+  internal_error (arg);
+
+  /* We'll never get here; this is just to appease compilers.  */
+  return 0;
+}
+
+/* Return nonzero if REGNO is an ordinary register that *needs* to be
+   saved together with other registers, possibly by a MOVEM instruction,
+   or is saved for target-independent reasons.  There may be
+   target-dependent reasons to save the register anyway; this is just a
+   wrapper for a complicated conditional.  */
+
+static int
+cris_reg_saved_in_regsave_area (unsigned int regno, bool got_really_used)
+{
+  return
+    (((df_regs_ever_live_p (regno)
+       && !call_used_regs[regno])
+      || (regno == PIC_OFFSET_TABLE_REGNUM
+	  && (got_really_used
+	      /* It is saved anyway, if there would be a gap.  */
+	      || (flag_pic
+		  && df_regs_ever_live_p (regno + 1)
+		  && !call_used_regs[regno + 1]))))
+     && (regno != FRAME_POINTER_REGNUM || !frame_pointer_needed)
+     && regno != CRIS_SRP_REGNUM)
+    || (crtl->calls_eh_return
+	&& (regno == EH_RETURN_DATA_REGNO (0)
+	    || regno == EH_RETURN_DATA_REGNO (1)
+	    || regno == EH_RETURN_DATA_REGNO (2)
+	    || regno == EH_RETURN_DATA_REGNO (3)));
+}
+
+/* The PRINT_OPERAND worker.  */
+
+static void
+cris_print_operand (FILE *file, rtx x, int code)
+{
+  rtx operand = x;
+
+  /* Size-strings corresponding to MULT expressions.  */
+  static const char *const mults[] = { "BAD:0", ".b", ".w", "BAD:3", ".d" };
+
+  /* New code entries should just be added to the switch below.  If
+     handling is finished, just return.  If handling was just a
+     modification of the operand, the modified operand should be put in
+     "operand", and then do a break to let default handling
+     (zero-modifier) output the operand.  */
+
+  switch (code)
+    {
+    case 'b':
+      /* Print the unsigned supplied integer as if it were signed
+	 and < 0, i.e print 255 or 65535 as -1, 254, 65534 as -2, etc.  */
+      if (!satisfies_constraint_O (x))
+	LOSE_AND_RETURN ("invalid operand for 'b' modifier", x);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	       INTVAL (x)| (INTVAL (x) <= 255 ? ~255 : ~65535));
+      return;
+
+    case 'x':
+      /* Print assembler code for operator.  */
+      fprintf (file, "%s", cris_op_str (operand));
+      return;
+
+    case 'o':
+      {
+	/* A movem modifier working on a parallel; output the register
+	   name.  */
+	int regno;
+
+	if (GET_CODE (x) != PARALLEL)
+	  LOSE_AND_RETURN ("invalid operand for 'o' modifier", x);
+
+	/* The second item can be (set reg (plus reg const)) to denote a
+	   postincrement.  */
+	regno
+	  = (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS
+	     ? XVECLEN (x, 0) - 2
+	     : XVECLEN (x, 0) - 1);
+
+	fprintf (file, "$%s", reg_names [regno]);
+      }
+      return;
+
+    case 'O':
+      {
+	/* A similar movem modifier; output the memory operand.  */
+	rtx addr;
+
+	if (GET_CODE (x) != PARALLEL)
+	  LOSE_AND_RETURN ("invalid operand for 'O' modifier", x);
+
+	/* The lowest mem operand is in the first item, but perhaps it
+	   needs to be output as postincremented.  */
+	addr = MEM_P (SET_SRC (XVECEXP (x, 0, 0)))
+	  ? XEXP (SET_SRC (XVECEXP (x, 0, 0)), 0)
+	  : XEXP (SET_DEST (XVECEXP (x, 0, 0)), 0);
+
+	/* The second item can be a (set reg (plus reg const)) to denote
+	   a modification.  */
+	if (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS)
+	  {
+	    /* It's a post-increment, if the address is a naked (reg).  */
+	    if (REG_P (addr))
+	      addr = gen_rtx_POST_INC (SImode, addr);
+	    else
+	      {
+		/* Otherwise, it's a side-effect; RN=RN+M.  */
+		fprintf (file, "[$%s=$%s%s%d]",
+			 reg_names [REGNO (SET_DEST (XVECEXP (x, 0, 1)))],
+			 reg_names [REGNO (XEXP (addr, 0))],
+			 INTVAL (XEXP (addr, 1)) < 0 ? "" : "+",
+			 (int) INTVAL (XEXP (addr, 1)));
+		return;
+	      }
+	  }
+	output_address (addr);
+      }
+      return;
+
+    case 'p':
+      /* Adjust a power of two to its log2.  */
+      if (!CONST_INT_P (x) || exact_log2 (INTVAL (x)) < 0 )
+	LOSE_AND_RETURN ("invalid operand for 'p' modifier", x);
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      return;
+
+    case 's':
+      /* For an integer, print 'b' or 'w' if <= 255 or <= 65535
+	 respectively.  This modifier also terminates the inhibiting
+         effects of the 'x' modifier.  */
+      cris_output_insn_is_bound = 0;
+      if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
+	{
+	  if (INTVAL (x) >= 0)
+	    {
+	      if (INTVAL (x) <= 255)
+		putc ('b', file);
+	      else if (INTVAL (x) <= 65535)
+		putc ('w', file);
+	      else
+		putc ('d', file);
+	    }
+	  else
+	    putc ('d', file);
+	  return;
+	}
+
+      /* For a non-integer, print the size of the operand.  */
+      putc ((GET_MODE (x) == SImode || GET_MODE (x) == SFmode)
+	    ? 'd' : GET_MODE (x) == HImode ? 'w'
+	    : GET_MODE (x) == QImode ? 'b'
+	    /* If none of the above, emit an erroneous size letter.  */
+	    : 'X',
+	    file);
+      return;
+
+    case 'z':
+      /* Const_int: print b for -127 <= x <= 255,
+	 w for -32768 <= x <= 65535, else die.  */
+      if (!CONST_INT_P (x)
+	  || INTVAL (x) < -32768 || INTVAL (x) > 65535)
+	LOSE_AND_RETURN ("invalid operand for 'z' modifier", x);
+      putc (INTVAL (x) >= -128 && INTVAL (x) <= 255 ? 'b' : 'w', file);
+      return;
+
+    case 'Z':
+      /* If this is a GOT-symbol, print the size-letter corresponding to
+	 -fpic/-fPIC.  For everything else, print "d".  */
+      putc ((flag_pic == 1
+	     && GET_CODE (x) == CONST
+	     && GET_CODE (XEXP (x, 0)) == UNSPEC
+	     && XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREAD)
+	    ? 'w' : 'd', file);
+      return;
+
+    case '#':
+      /* Output a 'nop' if there's nothing for the delay slot.
+	 This method stolen from the sparc files.  */
+      if (dbr_sequence_length () == 0)
+	fputs ("\n\tnop", file);
+      return;
+
+    case '!':
+      /* Output directive for alignment padded with "nop" insns.
+	 Optimizing for size, it's plain 4-byte alignment, otherwise we
+	 align the section to a cache-line (32 bytes) and skip at max 2
+	 bytes, i.e. we skip if it's the last insn on a cache-line.  The
+	 latter is faster by a small amount (for two test-programs 99.6%
+	 and 99.9%) and larger by a small amount (ditto 100.1% and
+	 100.2%).  This is supposed to be the simplest yet performance-
+	 wise least intrusive way to make sure the immediately following
+	 (supposed) muls/mulu insn isn't located at the end of a
+	 cache-line.  */
+      if (TARGET_MUL_BUG)
+	fputs (optimize_size
+	       ? ".p2alignw 2,0x050f\n\t"
+	       : ".p2alignw 5,0x050f,2\n\t", file);
+      return;
+
+    case ':':
+      /* The PIC register.  */
+      if (! flag_pic)
+	internal_error ("invalid use of ':' modifier");
+      fprintf (file, "$%s", reg_names [PIC_OFFSET_TABLE_REGNUM]);
+      return;
+
+    case 'H':
+      /* Print high (most significant) part of something.  */
+      switch (GET_CODE (operand))
+	{
+	case CONST_INT:
+	  /* If we're having 64-bit HOST_WIDE_INTs, the whole (DImode)
+	     value is kept here, and so may be other than 0 or -1.  */
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		   INTVAL (operand_subword (operand, 1, 0, DImode)));
+	  return;
+
+	case CONST_DOUBLE:
+	  /* High part of a long long constant.  */
+	  if (GET_MODE (operand) == VOIDmode)
+	    {
+	      fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_HIGH (x));
+	      return;
+	    }
+	  else
+	    LOSE_AND_RETURN ("invalid operand for 'H' modifier", x);
+
+	case REG:
+	  /* Print reg + 1.  Check that there's not an attempt to print
+	     high-parts of registers like stack-pointer or higher, except
+	     for SRP (where the "high part" is MOF).  */
+	  if (REGNO (operand) > STACK_POINTER_REGNUM - 2
+	      && (REGNO (operand) != CRIS_SRP_REGNUM
+		  || CRIS_SRP_REGNUM + 1 != CRIS_MOF_REGNUM
+		  || fixed_regs[CRIS_MOF_REGNUM] != 0))
+	    LOSE_AND_RETURN ("bad register", operand);
+	  fprintf (file, "$%s", reg_names[REGNO (operand) + 1]);
+	  return;
+
+	case MEM:
+	  /* Adjust memory address to high part.  */
+	  {
+	    rtx adj_mem = operand;
+	    int size
+	      = GET_MODE_BITSIZE (GET_MODE (operand)) / BITS_PER_UNIT;
+
+	    /* Adjust so we can use two SImode in DImode.
+	       Calling adj_offsettable_operand will make sure it is an
+	       offsettable address.  Don't do this for a postincrement
+	       though; it should remain as it was.  */
+	    if (GET_CODE (XEXP (adj_mem, 0)) != POST_INC)
+	      adj_mem
+		= adjust_address (adj_mem, GET_MODE (adj_mem), size / 2);
+
+	    output_address (XEXP (adj_mem, 0));
+	    return;
+	  }
+
+	default:
+	  LOSE_AND_RETURN ("invalid operand for 'H' modifier", x);
+	}
+
+    case 'L':
+      /* Strip the MEM expression.  */
+      operand = XEXP (operand, 0);
+      break;
+
+    case 'e':
+      /* Like 'E', but ignore state set by 'x'.  FIXME: Use code
+	 iterators and attributes in cris.md to avoid the need for %x
+	 and %E (and %e) and state passed between those modifiers.  */
+      cris_output_insn_is_bound = 0;
+      /* FALL THROUGH.  */
+    case 'E':
+      /* Print 's' if operand is SIGN_EXTEND or 'u' if ZERO_EXTEND unless
+	 cris_output_insn_is_bound is nonzero.  */
+      if (GET_CODE (operand) != SIGN_EXTEND
+	  && GET_CODE (operand) != ZERO_EXTEND
+	  && !CONST_INT_P (operand))
+	LOSE_AND_RETURN ("invalid operand for 'e' modifier", x);
+
+      if (cris_output_insn_is_bound)
+	{
+	  cris_output_insn_is_bound = 0;
+	  return;
+	}
+
+      putc (GET_CODE (operand) == SIGN_EXTEND
+	    || (CONST_INT_P (operand) && INTVAL (operand) < 0)
+	    ? 's' : 'u', file);
+      return;
+
+    case 'm':
+      /* Print the size letter of the inner element.  We can do it by
+	 calling ourselves with the 's' modifier.  */
+      if (GET_CODE (operand) != SIGN_EXTEND && GET_CODE (operand) != ZERO_EXTEND)
+	LOSE_AND_RETURN ("invalid operand for 'm' modifier", x);
+      cris_print_operand (file, XEXP (operand, 0), 's');
+      return;
+
+    case 'M':
+      /* Print the least significant part of operand.  */
+      if (GET_CODE (operand) == CONST_DOUBLE)
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
+	  return;
+	}
+      else if (HOST_BITS_PER_WIDE_INT > 32 && CONST_INT_P (operand))
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		   INTVAL (x) & ((unsigned int) 0x7fffffff * 2 + 1));
+	  return;
+	}
+      /* Otherwise the least significant part equals the normal part,
+	 so handle it normally.  */
+      break;
+
+    case 'A':
+      /* When emitting an add for the high part of a DImode constant, we
+	 want to use addq for 0 and adds.w for -1.  */
+      if (!CONST_INT_P (operand))
+	LOSE_AND_RETURN ("invalid operand for 'A' modifier", x);
+      fprintf (file, INTVAL (operand) < 0 ? "adds.w" : "addq");
+      return;
+
+    case 'P':
+      /* For const_int operands, print the additive mnemonic and the
+	 modified operand (byte-sized operands don't save anything):
+          N=MIN_INT..-65536: add.d N
+          -65535..-64: subu.w -N
+          -63..-1: subq -N
+          0..63: addq N
+          64..65535: addu.w N
+          65536..MAX_INT: add.d N.
+	 (Emitted mnemonics are capitalized to simplify testing.)
+	 For anything else (N.B: only register is valid), print "add.d".  */
+      if (REG_P (operand))
+	{
+	  fprintf (file, "Add.d ");
+
+	  /* Deal with printing the operand by dropping through to the
+	     normal path.  */
+	  break;
+	}
+      else
+	{
+	  int val;
+	  gcc_assert (CONST_INT_P (operand));
+
+	  val = INTVAL (operand);
+	  if (!IN_RANGE (val, -65535, 65535))
+	      fprintf (file, "Add.d %d", val);
+	  else if (val <= -64)
+	    fprintf (file, "Subu.w %d", -val);
+	  else if (val <= -1)
+	    fprintf (file, "Subq %d", -val);
+	  else if (val <= 63)
+	      fprintf (file, "Addq %d", val);
+	  else if (val <= 65535)
+	    fprintf (file, "Addu.w %d", val);
+	  return;
+	}
+      break;
+
+    case 'q':
+      /* If the operand is an integer -31..31, print "q" else ".d".  */
+      if (CONST_INT_P (operand) && IN_RANGE (INTVAL (operand), -31, 31))
+	fprintf (file, "q");
+      else
+	fprintf (file, ".d");
+      return;
+
+    case 'd':
+      /* If this is a GOT symbol, force it to be emitted as :GOT and
+	 :GOTPLT regardless of -fpic (i.e. not as :GOT16, :GOTPLT16).
+	 Avoid making this too much of a special case.  */
+      if (flag_pic == 1 && CONSTANT_P (operand))
+	{
+	  int flag_pic_save = flag_pic;
+
+	  flag_pic = 2;
+	  cris_output_addr_const (file, operand);
+	  flag_pic = flag_pic_save;
+	  return;
+	}
+      break;
+
+    case 'D':
+      /* When emitting an sub for the high part of a DImode constant, we
+	 want to use subq for 0 and subs.w for -1.  */
+      if (!CONST_INT_P (operand))
+	LOSE_AND_RETURN ("invalid operand for 'D' modifier", x);
+      fprintf (file, INTVAL (operand) < 0 ? "subs.w" : "subq");
+      return;
+
+    case 'S':
+      /* Print the operand as the index-part of an address.
+	 Easiest way out is to use cris_print_index.  */
+      cris_print_index (operand, file);
+      return;
+
+    case 'T':
+      /* Print the size letter for an operand to a MULT, which must be a
+	 const_int with a suitable value.  */
+      if (!CONST_INT_P (operand) || INTVAL (operand) > 4)
+	LOSE_AND_RETURN ("invalid operand for 'T' modifier", x);
+      fprintf (file, "%s", mults[INTVAL (operand)]);
+      return;
+
+    case 'u':
+      /* Print "u.w" if a GOT symbol and flag_pic == 1, else ".d".  */
+      if (flag_pic == 1
+	  && GET_CODE (operand) == CONST
+	  && GET_CODE (XEXP (operand, 0)) == UNSPEC
+	  && XINT (XEXP (operand, 0), 1) == CRIS_UNSPEC_GOTREAD)
+	fprintf (file, "u.w");
+      else
+	fprintf (file, ".d");
+      return;
+
+    case 0:
+      /* No code, print as usual.  */
+      break;
+
+    default:
+      LOSE_AND_RETURN ("invalid operand modifier letter", x);
+    }
+
+  /* Print an operand as without a modifier letter.  */
+  switch (GET_CODE (operand))
+    {
+    case REG:
+      if (REGNO (operand) > 15
+	  && REGNO (operand) != CRIS_MOF_REGNUM
+	  && REGNO (operand) != CRIS_SRP_REGNUM
+	  && REGNO (operand) != CRIS_CC0_REGNUM)
+	internal_error ("internal error: bad register: %d", REGNO (operand));
+      fprintf (file, "$%s", reg_names[REGNO (operand)]);
+      return;
+
+    case MEM:
+      output_address (XEXP (operand, 0));
+      return;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (operand) == VOIDmode)
+	/* A long long constant.  */
+	output_addr_const (file, operand);
+      else
+	{
+	  /* Only single precision is allowed as plain operands the
+	     moment.  FIXME:  REAL_VALUE_FROM_CONST_DOUBLE isn't
+	     documented.  */
+	  REAL_VALUE_TYPE r;
+	  long l;
+
+	  /* FIXME:  Perhaps check overflow of the "single".  */
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+	  REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+	  fprintf (file, "0x%lx", l);
+	}
+      return;
+
+    case UNSPEC:
+      /* Fall through.  */
+    case CONST:
+      cris_output_addr_const (file, operand);
+      return;
+
+    case MULT:
+    case ASHIFT:
+      {
+	/* For a (MULT (reg X) const_int) we output "rX.S".  */
+	int i = CONST_INT_P (XEXP (operand, 1))
+	  ? INTVAL (XEXP (operand, 1)) : INTVAL (XEXP (operand, 0));
+	rtx reg = CONST_INT_P (XEXP (operand, 1))
+	  ? XEXP (operand, 0) : XEXP (operand, 1);
+
+	if (!REG_P (reg)
+	    || (!CONST_INT_P (XEXP (operand, 0))
+		&& !CONST_INT_P (XEXP (operand, 1))))
+	  LOSE_AND_RETURN ("unexpected multiplicative operand", x);
+
+	cris_print_base (reg, file);
+	fprintf (file, ".%c",
+		 i == 0 || (i == 1 && GET_CODE (operand) == MULT) ? 'b'
+		 : i == 4 ? 'd'
+		 : (i == 2 && GET_CODE (operand) == MULT) || i == 1 ? 'w'
+		 : 'd');
+	return;
+      }
+
+    default:
+      /* No need to handle all strange variants, let output_addr_const
+	 do it for us.  */
+      if (CONSTANT_P (operand))
+	{
+	  cris_output_addr_const (file, operand);
+	  return;
+	}
+
+      LOSE_AND_RETURN ("unexpected operand", x);
+    }
+}
+
+static bool
+cris_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '#' || code == '!' || code == ':');
+}
+
+/* The PRINT_OPERAND_ADDRESS worker.  */
+
+static void
+cris_print_operand_address (FILE *file, rtx x)
+{
+  /* All these were inside MEM:s so output indirection characters.  */
+  putc ('[', file);
+
+  if (CONSTANT_ADDRESS_P (x))
+    cris_output_addr_const (file, x);
+  else if (cris_base_or_autoincr_p (x, true))
+    cris_print_base (x, file);
+  else if (GET_CODE (x) == PLUS)
+    {
+      rtx x1, x2;
+
+      x1 = XEXP (x, 0);
+      x2 = XEXP (x, 1);
+      if (cris_base_p (x1, true))
+	{
+	  cris_print_base (x1, file);
+	  cris_print_index (x2, file);
+	}
+      else if (cris_base_p (x2, true))
+	{
+	  cris_print_base (x2, file);
+	  cris_print_index (x1, file);
+	}
+      else
+	LOSE_AND_RETURN ("unrecognized address", x);
+    }
+  else if (MEM_P (x))
+    {
+      /* A DIP.  Output more indirection characters.  */
+      putc ('[', file);
+      cris_print_base (XEXP (x, 0), file);
+      putc (']', file);
+    }
+  else
+    LOSE_AND_RETURN ("unrecognized address", x);
+
+  putc (']', file);
+}
+
+/* The RETURN_ADDR_RTX worker.
+   We mark that the return address is used, either by EH or
+   __builtin_return_address, for use by the function prologue and
+   epilogue.  FIXME: This isn't optimal; we just use the mark in the
+   prologue and epilogue to say that the return address is to be stored
+   in the stack frame.  We could return SRP for leaf-functions and use the
+   initial-value machinery.  */
+
+rtx
+cris_return_addr_rtx (int count, rtx frameaddr ATTRIBUTE_UNUSED)
+{
+  cfun->machine->needs_return_address_on_stack = 1;
+
+  /* The return-address is stored just above the saved frame-pointer (if
+     present).  Apparently we can't eliminate from the frame-pointer in
+     that direction, so use the incoming args (maybe pretended) pointer.  */
+  return count == 0
+    ? gen_rtx_MEM (Pmode, plus_constant (Pmode, virtual_incoming_args_rtx, -4))
+    : NULL_RTX;
+}
+
+/* Accessor used in cris.md:return because cfun->machine isn't available
+   there.  */
+
+bool
+cris_return_address_on_stack (void)
+{
+  return df_regs_ever_live_p (CRIS_SRP_REGNUM)
+    || cfun->machine->needs_return_address_on_stack;
+}
+
+/* Accessor used in cris.md:return because cfun->machine isn't available
+   there.  */
+
+bool
+cris_return_address_on_stack_for_return (void)
+{
+  return cfun->machine->return_type == CRIS_RETINSN_RET ? false
+    : cris_return_address_on_stack ();
+}
+
+/* This used to be the INITIAL_FRAME_POINTER_OFFSET worker; now only
+   handles FP -> SP elimination offset.  */
+
+static int
+cris_initial_frame_pointer_offset (void)
+{
+  int regno;
+
+  /* Initial offset is 0 if we don't have a frame pointer.  */
+  int offs = 0;
+  bool got_really_used = false;
+
+  if (crtl->uses_pic_offset_table)
+    {
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (),
+			      NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* And 4 for each register pushed.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      offs += 4;
+
+  /* And then, last, we add the locals allocated.  */
+  offs += get_frame_size ();
+
+  /* And more; the accumulated args size.  */
+  offs += crtl->outgoing_args_size;
+
+  /* Then round it off, in case we use aligned stack.  */
+  if (TARGET_STACK_ALIGN)
+    offs = TARGET_ALIGN_BY_32 ? (offs + 3) & ~3 : (offs + 1) & ~1;
+
+  return offs;
+}
+
+/* The INITIAL_ELIMINATION_OFFSET worker.
+   Calculate the difference between imaginary registers such as frame
+   pointer and the stack pointer.  Used to eliminate the frame pointer
+   and imaginary arg pointer.  */
+
+int
+cris_initial_elimination_offset (int fromreg, int toreg)
+{
+  int fp_sp_offset
+    = cris_initial_frame_pointer_offset ();
+
+  /* We should be able to use regs_ever_live and related prologue
+     information here, or alpha should not as well.  */
+  bool return_address_on_stack = cris_return_address_on_stack ();
+
+  /* Here we act as if the frame-pointer were needed.  */
+  int ap_fp_offset = 4 + (return_address_on_stack ? 4 : 0);
+
+  if (fromreg == ARG_POINTER_REGNUM
+      && toreg == FRAME_POINTER_REGNUM)
+    return ap_fp_offset;
+
+  /* Between the frame pointer and the stack are only "normal" stack
+     variables and saved registers.  */
+  if (fromreg == FRAME_POINTER_REGNUM
+      && toreg == STACK_POINTER_REGNUM)
+    return fp_sp_offset;
+
+  /* We need to balance out the frame pointer here.  */
+  if (fromreg == ARG_POINTER_REGNUM
+      && toreg == STACK_POINTER_REGNUM)
+    return ap_fp_offset + fp_sp_offset - 4;
+
+  gcc_unreachable ();
+}
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+static inline bool
+reg_ok_for_base_p (const_rtx x, bool strict)
+{
+  return ((! strict && ! HARD_REGISTER_P (x))
+          || REGNO_OK_FOR_BASE_P (REGNO (x)));
+}
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+static inline bool
+reg_ok_for_index_p (const_rtx x, bool strict)
+{
+  return reg_ok_for_base_p (x, strict);
+}
+
+/* No symbol can be used as an index (or more correct, as a base) together
+   with a register with PIC; the PIC register must be there.  */
+
+bool
+cris_constant_index_p (const_rtx x)
+{
+  return (CONSTANT_P (x) && (!flag_pic || cris_valid_pic_const (x, true)));
+}
+
+/* True if X is a valid base register.  */
+
+bool
+cris_base_p (const_rtx x, bool strict)
+{
+  return (REG_P (x) && reg_ok_for_base_p (x, strict));
+}
+
+/* True if X is a valid index register.  */
+
+static inline bool
+cris_index_p (const_rtx x, bool strict)
+{
+  return (REG_P (x) && reg_ok_for_index_p (x, strict));
+}
+
+/* True if X is a valid base register with or without autoincrement.  */
+
+bool
+cris_base_or_autoincr_p (const_rtx x, bool strict)
+{
+  return (cris_base_p (x, strict)
+	  || (GET_CODE (x) == POST_INC
+	      && cris_base_p (XEXP (x, 0), strict)
+	      && REGNO (XEXP (x, 0)) != CRIS_ACR_REGNUM));
+}
+
+/* True if X is a valid (register) index for BDAP, i.e. [Rs].S or [Rs+].S.  */
+
+bool
+cris_bdap_index_p (const_rtx x, bool strict)
+{
+  return ((MEM_P (x)
+	   && GET_MODE (x) == SImode
+	   && cris_base_or_autoincr_p (XEXP (x, 0), strict))
+	  || (GET_CODE (x) == SIGN_EXTEND
+	      && MEM_P (XEXP (x, 0))
+	      && (GET_MODE (XEXP (x, 0)) == HImode
+		  || GET_MODE (XEXP (x, 0)) == QImode)
+	      && cris_base_or_autoincr_p (XEXP (XEXP (x, 0), 0), strict)));
+}
+
+/* True if X is a valid (register) index for BIAP, i.e. Rd.m.  */
+
+bool
+cris_biap_index_p (const_rtx x, bool strict)
+{
+  return (cris_index_p (x, strict)
+	  || (GET_CODE (x) == MULT
+	      && cris_index_p (XEXP (x, 0), strict)
+	      && cris_scale_int_operand (XEXP (x, 1), VOIDmode)));
+}
+
+/* Worker function for TARGET_LEGITIMATE_ADDRESS_P.
+
+   A PIC operand looks like a normal symbol here.  At output we dress it
+   in "[rPIC+symbol:GOT]" (global symbol) or "rPIC+symbol:GOTOFF" (local
+   symbol) so we exclude all addressing modes where we can't replace a
+   plain "symbol" with that.  A global PIC symbol does not fit anywhere
+   here (but is thankfully a general_operand in itself).  A local PIC
+   symbol is valid for the plain "symbol + offset" case.  */
+
+bool
+cris_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  const_rtx x1, x2;
+
+  if (cris_base_or_autoincr_p (x, strict))
+    return true;
+  else if (TARGET_V32)
+    /* Nothing else is valid then.  */
+    return false;
+  else if (cris_constant_index_p (x))
+    return true;
+  /* Indexed?  */
+  else if (GET_CODE (x) == PLUS)
+    {
+      x1 = XEXP (x, 0);
+      x2 = XEXP (x, 1);
+      /* BDAP o, Rd.  */
+      if ((cris_base_p (x1, strict) && cris_constant_index_p (x2))
+	  || (cris_base_p (x2, strict) && cris_constant_index_p (x1))
+	   /* BDAP Rs[+], Rd.  */
+	  || (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	      && ((cris_base_p (x1, strict)
+		   && cris_bdap_index_p (x2, strict))
+		  || (cris_base_p (x2, strict)
+		      && cris_bdap_index_p (x1, strict))
+		  /* BIAP.m Rs, Rd */
+		  || (cris_base_p (x1, strict)
+		      && cris_biap_index_p (x2, strict))
+		  || (cris_base_p (x2, strict)
+		      && cris_biap_index_p (x1, strict)))))
+	return true;
+     }
+  else if (MEM_P (x))
+    {
+      /* DIP (Rs).  Reject [[reg+]] and [[reg]] for DImode (long long).  */
+      if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	  && cris_base_or_autoincr_p (XEXP (x, 0), strict))
+	return true;
+    }
+
+  return false;
+}
+
+/* Worker function for LEGITIMIZE_RELOAD_ADDRESS.  */
+
+bool
+cris_reload_address_legitimized (rtx x,
+				 enum machine_mode mode ATTRIBUTE_UNUSED,
+				 int opnum ATTRIBUTE_UNUSED,
+				 int itype,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  enum reload_type type = (enum reload_type) itype;
+  rtx op0, op1;
+  rtx *op1p;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  if (TARGET_V32)
+    return false;
+
+  op0 = XEXP (x, 0);
+  op1 = XEXP (x, 1);
+  op1p = &XEXP (x, 1);
+
+  if (!REG_P (op1))
+    return false;
+
+  if (GET_CODE (op0) == SIGN_EXTEND && MEM_P (XEXP (op0, 0)))
+    {
+      rtx op00 = XEXP (op0, 0);
+      rtx op000 = XEXP (op00, 0);
+      rtx *op000p = &XEXP (op00, 0);
+
+      if ((GET_MODE (op00) == HImode || GET_MODE (op00) == QImode)
+	  && (REG_P (op000)
+	      || (GET_CODE (op000) == POST_INC && REG_P (XEXP (op000, 0)))))
+	{
+	  bool something_reloaded = false;
+
+	  if (GET_CODE (op000) == POST_INC
+	      && REG_P (XEXP (op000, 0))
+	      && REGNO (XEXP (op000, 0)) > CRIS_LAST_GENERAL_REGISTER)
+	    /* No, this gets too complicated and is too rare to care
+	       about trying to improve on the general code Here.
+	       As the return-value is an all-or-nothing indicator, we
+	       punt on the other register too.  */
+	    return false;
+
+	  if ((REG_P (op000)
+	       && REGNO (op000) > CRIS_LAST_GENERAL_REGISTER))
+	    {
+	      /* The address of the inner mem is a pseudo or wrong
+		 reg: reload that.  */
+	      push_reload (op000, NULL_RTX, op000p, NULL, GENERAL_REGS,
+			   GET_MODE (x), VOIDmode, 0, 0, opnum, type);
+	      something_reloaded = true;
+	    }
+
+	  if (REGNO (op1) > CRIS_LAST_GENERAL_REGISTER)
+	    {
+	      /* Base register is a pseudo or wrong reg: reload it.  */
+	      push_reload (op1, NULL_RTX, op1p, NULL, GENERAL_REGS,
+			   GET_MODE (x), VOIDmode, 0, 0,
+			   opnum, type);
+	      something_reloaded = true;
+	    }
+
+	  gcc_assert (something_reloaded);
+
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
+
+   It seems like gcc (2.7.2 and 2.9x of 2000-03-22) may send "NO_REGS" as
+   the class for a constant (testcase: __Mul in arit.c).  To avoid forcing
+   out a constant into the constant pool, we will trap this case and
+   return something a bit more sane.  FIXME: Check if this is a bug.
+   Beware that we must not "override" classes that can be specified as
+   constraint letters, or else asm operands using them will fail when
+   they need to be reloaded.  FIXME: Investigate whether that constitutes
+   a bug.  */
+
+static reg_class_t
+cris_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
+{
+  if (rclass != ACR_REGS
+      && rclass != MOF_REGS
+      && rclass != MOF_SRP_REGS
+      && rclass != SRP_REGS
+      && rclass != CC0_REGS
+      && rclass != SPECIAL_REGS)
+    return GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+static int
+cris_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from, reg_class_t to)
+{
+  /* Can't move to and from a SPECIAL_REGS register, so we have to say
+     their move cost within that class is higher.  How about 7?  That's 3
+     for a move to a GENERAL_REGS register, 3 for the move from the
+     GENERAL_REGS register, and 1 for the increased register pressure.
+     Also, it's higher than the memory move cost, as it should.
+     We also do this for ALL_REGS, since we don't want that class to be
+     preferred (even to memory) at all where GENERAL_REGS doesn't fit.
+     Whenever it's about to be used, it's for SPECIAL_REGS.  If we don't
+     present a higher cost for ALL_REGS than memory, a SPECIAL_REGS may be
+     used when a GENERAL_REGS should be used, even if there are call-saved
+     GENERAL_REGS left to allocate.  This is because the fall-back when
+     the most preferred register class isn't available, isn't the next
+     (or next good) wider register class, but the *most widest* register
+     class.  FIXME: pre-IRA comment, perhaps obsolete now.  */
+
+  if ((reg_classes_intersect_p (from, SPECIAL_REGS)
+       && reg_classes_intersect_p (to, SPECIAL_REGS))
+      || from == ALL_REGS || to == ALL_REGS)
+    return 7;
+
+  /* Make moves to/from SPECIAL_REGS slightly more expensive, as we
+     generally prefer GENERAL_REGS.  */
+  if (reg_classes_intersect_p (from, SPECIAL_REGS)
+      || reg_classes_intersect_p (to, SPECIAL_REGS))
+    return 3;
+
+  return 2;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.
+
+   This isn't strictly correct for v0..3 in buswidth-8bit mode, but should
+   suffice.  */
+
+static int
+cris_memory_move_cost (enum machine_mode mode,
+                       reg_class_t rclass ATTRIBUTE_UNUSED,
+                       bool in ATTRIBUTE_UNUSED)
+{
+  if (mode == QImode
+      || mode == HImode)
+    return 4;
+  else
+    return 6;
+}
+
+/* Worker for cris_notice_update_cc; handles the "normal" cases.
+   FIXME: this code is historical; its functionality should be
+   refactored to look at insn attributes and moved to
+   cris_notice_update_cc.  Except, we better lose cc0 entirely.  */
+
+static void
+cris_normal_notice_update_cc (rtx exp, rtx insn)
+{
+  /* "Normal" means, for:
+     (set (cc0) (...)):
+     CC is (...).
+
+     (set (reg) (...)):
+     CC is (reg) and (...) - unless (...) is 0 or reg is a special
+        register or (v32 and (...) is -32..-1), then CC does not change.
+     CC_NO_OVERFLOW unless (...) is reg or mem.
+
+     (set (mem) (...)):
+     CC does not change.
+
+     (set (pc) (...)):
+     CC does not change.
+
+     (parallel
+      (set (reg1) (mem (bdap/biap)))
+      (set (reg2) (bdap/biap))):
+     CC is (reg1) and (mem (reg2))
+
+     (parallel
+      (set (mem (bdap/biap)) (reg1)) [or 0]
+      (set (reg2) (bdap/biap))):
+     CC does not change.
+
+     (where reg and mem includes strict_low_parts variants thereof)
+
+     For all others, assume CC is clobbered.
+     Note that we do not have to care about setting CC_NO_OVERFLOW,
+     since the overflow flag is set to 0 (i.e. right) for
+     instructions where it does not have any sane sense, but where
+     other flags have meanings.  (This includes shifts; the carry is
+     not set by them).
+
+     Note that there are other parallel constructs we could match,
+     but we don't do that yet.  */
+
+  if (GET_CODE (exp) == SET)
+    {
+      /* FIXME: Check when this happens.  It looks like we should
+	 actually do a CC_STATUS_INIT here to be safe.  */
+      if (SET_DEST (exp) == pc_rtx)
+	return;
+
+      /* Record CC0 changes, so we do not have to output multiple
+	 test insns.  */
+      if (SET_DEST (exp) == cc0_rtx)
+	{
+	  CC_STATUS_INIT;
+
+	  if (GET_CODE (SET_SRC (exp)) == COMPARE
+	      && XEXP (SET_SRC (exp), 1) == const0_rtx)
+	    cc_status.value1 = XEXP (SET_SRC (exp), 0);
+	  else
+	    cc_status.value1 = SET_SRC (exp);
+
+          /* Handle flags for the special btstq on one bit.  */
+	  if (GET_CODE (cc_status.value1) == ZERO_EXTRACT
+	      && XEXP (cc_status.value1, 1) == const1_rtx)
+	    {
+	      if (CONST_INT_P (XEXP (cc_status.value1, 0)))
+		/* Using cmpq.  */
+		cc_status.flags = CC_INVERTED;
+	      else
+		/* A one-bit btstq.  */
+		cc_status.flags = CC_Z_IN_NOT_N;
+	    }
+
+	  else if (GET_CODE (SET_SRC (exp)) == COMPARE)
+	    {
+	      if (!REG_P (XEXP (SET_SRC (exp), 0))
+		  && XEXP (SET_SRC (exp), 1) != const0_rtx)
+		/* For some reason gcc will not canonicalize compare
+		   operations, reversing the sign by itself if
+		   operands are in wrong order.  */
+		/* (But NOT inverted; eq is still eq.) */
+		cc_status.flags = CC_REVERSED;
+
+	      /* This seems to be overlooked by gcc.  FIXME: Check again.
+		 FIXME:  Is it really safe?  */
+	      cc_status.value2
+		= gen_rtx_MINUS (GET_MODE (SET_SRC (exp)),
+				 XEXP (SET_SRC (exp), 0),
+				 XEXP (SET_SRC (exp), 1));
+	    }
+	  return;
+	}
+      else if (REG_P (SET_DEST (exp))
+	       || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART
+		   && REG_P (XEXP (SET_DEST (exp), 0))))
+	{
+	  /* A register is set; normally CC is set to show that no
+	     test insn is needed.  Catch the exceptions.  */
+
+	  /* If not to cc0, then no "set"s in non-natural mode give
+	     ok cc0...  */
+	  if (GET_MODE_SIZE (GET_MODE (SET_DEST (exp))) > UNITS_PER_WORD
+	      || GET_MODE_CLASS (GET_MODE (SET_DEST (exp))) == MODE_FLOAT)
+	    {
+	      /* ... except add:s and sub:s in DImode.  */
+	      if (GET_MODE (SET_DEST (exp)) == DImode
+		  && (GET_CODE (SET_SRC (exp)) == PLUS
+		      || GET_CODE (SET_SRC (exp)) == MINUS))
+		{
+		  CC_STATUS_INIT;
+		  cc_status.value1 = SET_DEST (exp);
+		  cc_status.value2 = SET_SRC (exp);
+
+		  if (cris_reg_overlap_mentioned_p (cc_status.value1,
+						    cc_status.value2))
+		    cc_status.value2 = 0;
+
+		  /* Add and sub may set V, which gets us
+		     unoptimizable results in "gt" and "le" condition
+		     codes.  */
+		  cc_status.flags |= CC_NO_OVERFLOW;
+
+		  return;
+		}
+	    }
+	  else if (SET_SRC (exp) == const0_rtx
+		   || (REG_P (SET_SRC (exp))
+		       && (REGNO (SET_SRC (exp))
+			   > CRIS_LAST_GENERAL_REGISTER))
+		   || (TARGET_V32
+		       && REG_P (SET_DEST (exp))
+		       && satisfies_constraint_I (SET_SRC (exp))))
+	    {
+	      /* There's no CC0 change for this case.  Just check
+		 for overlap.  */
+	      if (cc_status.value1
+		  && modified_in_p (cc_status.value1, insn))
+		cc_status.value1 = 0;
+
+	      if (cc_status.value2
+		  && modified_in_p (cc_status.value2, insn))
+		cc_status.value2 = 0;
+
+	      return;
+	    }
+	  else
+	    {
+	      CC_STATUS_INIT;
+	      cc_status.value1 = SET_DEST (exp);
+	      cc_status.value2 = SET_SRC (exp);
+
+	      if (cris_reg_overlap_mentioned_p (cc_status.value1,
+						cc_status.value2))
+		cc_status.value2 = 0;
+
+	      /* Some operations may set V, which gets us
+		 unoptimizable results in "gt" and "le" condition
+		 codes.  */
+	      if (GET_CODE (SET_SRC (exp)) == PLUS
+		  || GET_CODE (SET_SRC (exp)) == MINUS
+		  || GET_CODE (SET_SRC (exp)) == NEG)
+		cc_status.flags |= CC_NO_OVERFLOW;
+
+	      /* For V32, nothing with a register destination sets
+		 C and V usefully.  */
+	      if (TARGET_V32)
+		cc_status.flags |= CC_NO_OVERFLOW;
+
+	      return;
+	    }
+	}
+      else if (MEM_P (SET_DEST (exp))
+	       || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART
+		   && MEM_P (XEXP (SET_DEST (exp), 0))))
+	{
+	  /* When SET to MEM, then CC is not changed (except for
+	     overlap).  */
+	  if (cc_status.value1
+	      && modified_in_p (cc_status.value1, insn))
+	    cc_status.value1 = 0;
+
+	  if (cc_status.value2
+	      && modified_in_p (cc_status.value2, insn))
+	    cc_status.value2 = 0;
+
+	  return;
+	}
+    }
+  else if (GET_CODE (exp) == PARALLEL)
+    {
+      if (GET_CODE (XVECEXP (exp, 0, 0)) == SET
+	  && GET_CODE (XVECEXP (exp, 0, 1)) == SET
+	  && REG_P (XEXP (XVECEXP (exp, 0, 1), 0)))
+	{
+	  if (REG_P (XEXP (XVECEXP (exp, 0, 0), 0))
+	      && MEM_P (XEXP (XVECEXP (exp, 0, 0), 1)))
+	    {
+	      CC_STATUS_INIT;
+
+	      /* For "move.S [rx=ry+o],rz", say CC reflects
+		 value1=rz and value2=[rx] */
+	      cc_status.value1 = XEXP (XVECEXP (exp, 0, 0), 0);
+	      cc_status.value2
+		= replace_equiv_address (XEXP (XVECEXP (exp, 0, 0), 1),
+					 XEXP (XVECEXP (exp, 0, 1), 0));
+
+	      /* Huh?  A side-effect cannot change the destination
+		 register.  */
+	      if (cris_reg_overlap_mentioned_p (cc_status.value1,
+						cc_status.value2))
+		internal_error ("internal error: sideeffect-insn affecting main effect");
+
+	      /* For V32, moves to registers don't set C and V.  */
+	      if (TARGET_V32)
+		cc_status.flags |= CC_NO_OVERFLOW;
+	      return;
+	    }
+	  else if ((REG_P (XEXP (XVECEXP (exp, 0, 0), 1))
+		    || XEXP (XVECEXP (exp, 0, 0), 1) == const0_rtx)
+		   && MEM_P (XEXP (XVECEXP (exp, 0, 0), 0)))
+	    {
+	      /* For "move.S rz,[rx=ry+o]" and "clear.S [rx=ry+o]",
+		 say flags are not changed, except for overlap.  */
+	      if (cc_status.value1
+		  && modified_in_p (cc_status.value1, insn))
+		cc_status.value1 = 0;
+
+	      if (cc_status.value2
+		  && modified_in_p (cc_status.value2, insn))
+		cc_status.value2 = 0;
+
+	      return;
+	    }
+	}
+    }
+
+  /* If we got here, the case wasn't covered by the code above.  */
+  CC_STATUS_INIT;
+}
+
+/*  This function looks into the pattern to see how this insn affects
+    condition codes.
+
+    Used when to eliminate test insns before a condition-code user,
+    such as a "scc" insn or a conditional branch.  This includes
+    checking if the entities that cc was updated by, are changed by the
+    operation.
+
+    Currently a jumble of the old peek-inside-the-insn and the newer
+    check-cc-attribute methods.  */
+
+void
+cris_notice_update_cc (rtx exp, rtx insn)
+{
+  enum attr_cc attrval = get_attr_cc (insn);
+
+  /* Check if user specified "-mcc-init" as a bug-workaround.  Remember
+     to still set CC_REVERSED as below, since that's required by some
+     compare insn alternatives.  (FIXME: GCC should do this virtual
+     operand swap by itself.)  A test-case that may otherwise fail is
+     gcc.c-torture/execute/20000217-1.c -O0 and -O1.  */
+  if (TARGET_CCINIT)
+    {
+      CC_STATUS_INIT;
+
+      if (attrval == CC_REV)
+	cc_status.flags = CC_REVERSED;
+      return;
+    }
+
+  /* Slowly, we're converting to using attributes to control the setting
+     of condition-code status.  */
+  switch (attrval)
+    {
+    case CC_NONE:
+      /* Even if it is "none", a setting may clobber a previous
+	 cc-value, so check.  */
+      if (GET_CODE (exp) == SET)
+	{
+	  if (cc_status.value1
+	      && modified_in_p (cc_status.value1, insn))
+	    cc_status.value1 = 0;
+
+	  if (cc_status.value2
+	      && modified_in_p (cc_status.value2, insn))
+	    cc_status.value2 = 0;
+	}
+      return;
+
+    case CC_CLOBBER:
+      CC_STATUS_INIT;
+      return;
+
+    case CC_REV:
+    case CC_NOOV32:
+    case CC_NORMAL:
+      cris_normal_notice_update_cc (exp, insn);
+
+      /* The "test" insn doesn't clear (carry and) overflow on V32.  We
+        can change bge => bpl and blt => bmi by passing on to the cc0
+        user that V should not be considered; bgt and ble are taken
+        care of by other methods (see {tst,cmp}{si,hi,qi}).  */
+      if (attrval == CC_NOOV32 && TARGET_V32)
+	cc_status.flags |= CC_NO_OVERFLOW;
+      return;
+
+    default:
+      internal_error ("unknown cc_attr value");
+    }
+
+  CC_STATUS_INIT;
+}
+
+/* Return != 0 if the return sequence for the current function is short,
+   like "ret" or "jump [sp+]".  Prior to reloading, we can't tell if
+   registers must be saved, so return 0 then.  */
+
+bool
+cris_simple_epilogue (void)
+{
+  unsigned int regno;
+  unsigned int reglimit = STACK_POINTER_REGNUM;
+  bool got_really_used = false;
+
+  if (! reload_completed
+      || frame_pointer_needed
+      || get_frame_size () != 0
+      || crtl->args.pretend_args_size
+      || crtl->args.size
+      || crtl->outgoing_args_size
+      || crtl->calls_eh_return
+
+      /* If we're not supposed to emit prologue and epilogue, we must
+	 not emit return-type instructions.  */
+      || !TARGET_PROLOGUE_EPILOGUE)
+    return false;
+
+  /* Can't return from stacked return address with v32.  */
+  if (TARGET_V32 && cris_return_address_on_stack ())
+    return false;
+
+  if (crtl->uses_pic_offset_table)
+    {
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* No simple epilogue if there are saved registers.  */
+  for (regno = 0; regno < reglimit; regno++)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      return false;
+
+  return true;
+}
+
+/* Emit checking that MEM is aligned for an access in MODE, failing
+   that, executing a "break 8" (or call to abort, if "break 8" is
+   disabled).  */
+
+void
+cris_emit_trap_for_misalignment (rtx mem)
+{
+  rtx addr, reg, ok_label, andop, jmp;
+  int natural_alignment;
+  gcc_assert (MEM_P (mem));
+
+  natural_alignment = GET_MODE_SIZE (GET_MODE (mem));
+  addr = XEXP (mem, 0);
+  reg = force_reg (Pmode, addr);
+  ok_label = gen_label_rtx ();
+
+  /* This will yield a btstq without a separate register used, usually -
+     with the exception for PRE hoisting the "and" but not the branch
+     around the trap: see testsuite/gcc.target/cris/sync-3s.c.  */
+  andop = gen_rtx_AND (Pmode, reg, GEN_INT (natural_alignment - 1));
+  emit_cmp_and_jump_insns (force_reg (SImode, andop), const0_rtx, EQ,
+			   NULL_RTX, Pmode, 1, ok_label);
+  jmp = get_last_insn ();
+  gcc_assert (JUMP_P (jmp));
+
+  predict_insn_def (jmp, PRED_NORETURN, TAKEN);
+  expand_builtin_trap ();
+  emit_label (ok_label);
+}
+
+/* Expand a return insn (just one insn) marked as using SRP or stack
+   slot depending on parameter ON_STACK.  */
+
+void
+cris_expand_return (bool on_stack)
+{
+  /* FIXME: emit a parallel with a USE for SRP or the stack-slot, to
+     tell "ret" from "jump [sp+]".  Some, but not all, other parts of
+     GCC expect just (return) to do the right thing when optimizing, so
+     we do that until they're fixed.  Currently, all return insns in a
+     function must be the same (not really a limiting factor) so we need
+     to check that it doesn't change half-way through.  */
+  emit_jump_insn (ret_rtx);
+
+  CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_RET || !on_stack);
+  CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_JUMP || on_stack);
+
+  cfun->machine->return_type
+    = on_stack ? CRIS_RETINSN_JUMP : CRIS_RETINSN_RET;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+cris_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
+		bool speed)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      {
+	HOST_WIDE_INT val = INTVAL (x);
+	if (val == 0)
+	  *total = 0;
+	else if (val < 32 && val >= -32)
+	  *total = 1;
+	/* Eight or 16 bits are a word and cycle more expensive.  */
+	else if (val <= 32767 && val >= -32768)
+	  *total = 2;
+	/* A 32-bit constant (or very seldom, unsigned 16 bits) costs
+	   another word.  FIXME: This isn't linear to 16 bits.  */
+	else
+	  *total = 4;
+	return true;
+      }
+
+    case LABEL_REF:
+      *total = 6;
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+      *total = 6;
+      return true;
+
+    case CONST_DOUBLE:
+      if (x != CONST0_RTX (GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x)))
+	*total = 12;
+      else
+        /* Make 0.0 cheap, else test-insns will not be used.  */
+	*total = 0;
+      return true;
+
+    case MULT:
+      /* If we have one arm of an ADDI, make sure it gets the cost of
+	 one insn, i.e. zero cost for this operand, and just the cost
+	 of the PLUS, as the insn is created by combine from a PLUS
+	 and an ASHIFT, and the MULT cost below would make the
+	 combined value be larger than the separate insns.  The insn
+	 validity is checked elsewhere by combine.
+
+	 FIXME: this case is a stop-gap for 4.3 and 4.4, this whole
+	 function should be rewritten.  */
+      if (outer_code == PLUS && cris_biap_index_p (x, false))
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      /* Identify values that are no powers of two.  Powers of 2 are
+         taken care of already and those values should not be changed.  */
+      if (!CONST_INT_P (XEXP (x, 1))
+          || exact_log2 (INTVAL (XEXP (x, 1)) < 0))
+	{
+	  /* If we have a multiply insn, then the cost is between
+	     1 and 2 "fast" instructions.  */
+	  if (TARGET_HAS_MUL_INSNS)
+	    {
+	      *total = COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2;
+	      return true;
+	    }
+
+	  /* Estimate as 4 + 4 * #ofbits.  */
+	  *total = COSTS_N_INSNS (132);
+	  return true;
+	}
+      return false;
+
+    case UDIV:
+    case MOD:
+    case UMOD:
+    case DIV:
+      if (!CONST_INT_P (XEXP (x, 1))
+          || exact_log2 (INTVAL (XEXP (x, 1)) < 0))
+	{
+	  /* Estimate this as 4 + 8 * #of bits.  */
+	  *total = COSTS_N_INSNS (260);
+	  return true;
+	}
+      return false;
+
+    case AND:
+      if (CONST_INT_P (XEXP (x, 1))
+          /* Two constants may actually happen before optimization.  */
+          && !CONST_INT_P (XEXP (x, 0))
+	  && !satisfies_constraint_I (XEXP (x, 1)))
+	{
+	  *total
+	    = (rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code,
+			 opno, speed) + 2
+	       + 2 * GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))));
+	  return true;
+	}
+      return false;
+
+    case ZERO_EXTRACT:
+      if (outer_code != COMPARE)
+        return false;
+      /* fall through */
+
+    case ZERO_EXTEND: case SIGN_EXTEND:
+      *total = rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, opno, speed);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* The ADDRESS_COST worker.  */
+
+static int
+cris_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  /* The metric to use for the cost-macros is unclear.
+     The metric used here is (the number of cycles needed) / 2,
+     where we consider equal a cycle for a word of code and a cycle to
+     read memory.  FIXME: Adding "+ 1" to all values would avoid
+     returning 0, as tree-ssa-loop-ivopts.c as of r128272 "normalizes"
+     0 to 1, thereby giving equal costs to [rN + rM] and [rN].
+     Unfortunately(?) such a hack would expose other pessimizations,
+     at least with g++.dg/tree-ssa/ivopts-1.C, adding insns to the
+     loop there, without apparent reason.  */
+
+  /* The cheapest addressing modes get 0, since nothing extra is needed.  */
+  if (cris_base_or_autoincr_p (x, false))
+    return 0;
+
+  /* An indirect mem must be a DIP.  This means two bytes extra for code,
+     and 4 bytes extra for memory read, i.e.  (2 + 4) / 2.  */
+  if (MEM_P (x))
+    return (2 + 4) / 2;
+
+  /* Assume (2 + 4) / 2 for a single constant; a dword, since it needs
+     an extra DIP prefix and 4 bytes of constant in most cases.  */
+  if (CONSTANT_P (x))
+    return (2 + 4) / 2;
+
+  /* Handle BIAP and BDAP prefixes.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx tem1 = XEXP (x, 0);
+      rtx tem2 = XEXP (x, 1);
+
+      /* Local extended canonicalization rule: the first operand must
+	 be REG, unless it's an operation (MULT).  */
+      if (!REG_P (tem1) && GET_CODE (tem1) != MULT)
+	tem1 = tem2, tem2 = XEXP (x, 0);
+
+      /* We'll "assume" we have canonical RTX now.  */
+      gcc_assert (REG_P (tem1) || GET_CODE (tem1) == MULT);
+
+      /* A BIAP is 2 extra bytes for the prefix insn, nothing more.  We
+	 recognize the typical MULT which is always in tem1 because of
+	 insn canonicalization.  */
+      if ((GET_CODE (tem1) == MULT && cris_biap_index_p (tem1, false))
+	  || REG_P (tem2))
+	return 2 / 2;
+
+      /* A BDAP (quick) is 2 extra bytes.  Any constant operand to the
+	 PLUS is always found in tem2.  */
+      if (CONST_INT_P (tem2) && INTVAL (tem2) < 128 && INTVAL (tem2) >= -128)
+	return 2 / 2;
+
+      /* A BDAP -32768 .. 32767 is like BDAP quick, but with 2 extra
+	 bytes.  */
+      if (satisfies_constraint_L (tem2))
+	return (2 + 2) / 2;
+
+      /* A BDAP with some other constant is 2 bytes extra.  */
+      if (CONSTANT_P (tem2))
+	return (2 + 2 + 2) / 2;
+
+      /* BDAP with something indirect should have a higher cost than
+	 BIAP with register.   FIXME: Should it cost like a MEM or more?  */
+      return (2 + 2 + 2) / 2;
+    }
+
+  /* What else?  Return a high cost.  It matters only for valid
+     addressing modes.  */
+  return 10;
+}
+
+/* Check various objections to the side-effect.  Used in the test-part
+   of an anonymous insn describing an insn with a possible side-effect.
+   Returns nonzero if the implied side-effect is ok.
+
+   code     : PLUS or MULT
+   ops	    : An array of rtx:es. lreg, rreg, rval,
+	      The variables multop and other_op are indexes into this,
+	      or -1 if they are not applicable.
+   lreg     : The register that gets assigned in the side-effect.
+   rreg     : One register in the side-effect expression
+   rval     : The other register, or an int.
+   multop   : An integer to multiply rval with.
+   other_op : One of the entities of the main effect,
+	      whose mode we must consider.  */
+
+int
+cris_side_effect_mode_ok (enum rtx_code code, rtx *ops,
+			  int lreg, int rreg, int rval,
+			  int multop, int other_op)
+{
+  /* Find what value to multiply with, for rx =ry + rz * n.  */
+  int mult = multop < 0 ? 1 : INTVAL (ops[multop]);
+
+  rtx reg_rtx = ops[rreg];
+  rtx val_rtx = ops[rval];
+
+  /* The operands may be swapped.  Canonicalize them in reg_rtx and
+     val_rtx, where reg_rtx always is a reg (for this constraint to
+     match).  */
+  if (! cris_base_p (reg_rtx, reload_in_progress || reload_completed))
+    reg_rtx = val_rtx, val_rtx = ops[rreg];
+
+  /* Don't forget to check that reg_rtx really is a reg.  If it isn't,
+     we have no business.  */
+  if (! cris_base_p (reg_rtx, reload_in_progress || reload_completed))
+    return 0;
+
+  /* Don't do this when -mno-split.  */
+  if (!TARGET_SIDE_EFFECT_PREFIXES)
+    return 0;
+
+  /* The mult expression may be hidden in lreg.  FIXME:  Add more
+     commentary about that.  */
+  if (GET_CODE (val_rtx) == MULT)
+    {
+      mult = INTVAL (XEXP (val_rtx, 1));
+      val_rtx = XEXP (val_rtx, 0);
+      code = MULT;
+    }
+
+  /* First check the "other operand".  */
+  if (other_op >= 0)
+    {
+      if (GET_MODE_SIZE (GET_MODE (ops[other_op])) > UNITS_PER_WORD)
+	return 0;
+
+      /* Check if the lvalue register is the same as the "other
+	 operand".  If so, the result is undefined and we shouldn't do
+	 this.  FIXME:  Check again.  */
+      if ((cris_base_p (ops[lreg], reload_in_progress || reload_completed)
+	   && cris_base_p (ops[other_op],
+			   reload_in_progress || reload_completed)
+	   && REGNO (ops[lreg]) == REGNO (ops[other_op]))
+	  || rtx_equal_p (ops[other_op], ops[lreg]))
+      return 0;
+    }
+
+  /* Do not accept frame_pointer_rtx as any operand.  */
+  if (ops[lreg] == frame_pointer_rtx || ops[rreg] == frame_pointer_rtx
+      || ops[rval] == frame_pointer_rtx
+      || (other_op >= 0 && ops[other_op] == frame_pointer_rtx))
+    return 0;
+
+  if (code == PLUS
+      && ! cris_base_p (val_rtx, reload_in_progress || reload_completed))
+    {
+
+      /* Do not allow rx = rx + n if a normal add or sub with same size
+	 would do.  */
+      if (rtx_equal_p (ops[lreg], reg_rtx)
+	  && CONST_INT_P (val_rtx)
+	  && (INTVAL (val_rtx) <= 63 && INTVAL (val_rtx) >= -63))
+	return 0;
+
+      /* Check allowed cases, like [r(+)?].[bwd] and const.  */
+      if (CONSTANT_P (val_rtx))
+	return 1;
+
+      if (MEM_P (val_rtx)
+	  && cris_base_or_autoincr_p (XEXP (val_rtx, 0),
+				      reload_in_progress || reload_completed))
+	return 1;
+
+      if (GET_CODE (val_rtx) == SIGN_EXTEND
+	  && MEM_P (XEXP (val_rtx, 0))
+	  && cris_base_or_autoincr_p (XEXP (XEXP (val_rtx, 0), 0),
+				      reload_in_progress || reload_completed))
+	return 1;
+
+      /* If we got here, it's not a valid addressing mode.  */
+      return 0;
+    }
+  else if (code == MULT
+	   || (code == PLUS
+	       && cris_base_p (val_rtx,
+			       reload_in_progress || reload_completed)))
+    {
+      /* Do not allow rx = rx + ry.S, since it doesn't give better code.  */
+      if (rtx_equal_p (ops[lreg], reg_rtx)
+	  || (mult == 1 && rtx_equal_p (ops[lreg], val_rtx)))
+	return 0;
+
+      /* Do not allow bad multiply-values.  */
+      if (mult != 1 && mult != 2 && mult != 4)
+	return 0;
+
+      /* Only allow  r + ...  */
+      if (! cris_base_p (reg_rtx, reload_in_progress || reload_completed))
+	return 0;
+
+      /* If we got here, all seems ok.
+	 (All checks need to be done above).  */
+      return 1;
+    }
+
+  /* If we get here, the caller got its initial tests wrong.  */
+  internal_error ("internal error: cris_side_effect_mode_ok with bad operands");
+}
+
+/* Whether next_cc0_user of insn is LE or GT or requires a real compare
+   insn for other reasons.  */
+
+bool
+cris_cc0_user_requires_cmp (rtx insn)
+{
+  rtx cc0_user = NULL;
+  rtx body;
+  rtx set;
+
+  gcc_assert (insn != NULL);
+
+  if (!TARGET_V32)
+    return false;
+
+  cc0_user = next_cc0_user (insn);
+  if (cc0_user == NULL)
+    return false;
+
+  body = PATTERN (cc0_user);
+  set = single_set (cc0_user);
+
+  /* Users can be sCC and bCC.  */
+  if (JUMP_P (cc0_user)
+      && GET_CODE (body) == SET
+      && SET_DEST (body) == pc_rtx
+      && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE
+      && XEXP (XEXP (SET_SRC (body), 0), 0) == cc0_rtx)
+    {
+      return
+	GET_CODE (XEXP (SET_SRC (body), 0)) == GT
+	|| GET_CODE (XEXP (SET_SRC (body), 0)) == LE;
+    }
+  else if (set)
+    {
+      return
+	GET_CODE (SET_SRC (body)) == GT
+	|| GET_CODE (SET_SRC (body)) == LE;
+    }
+
+  gcc_unreachable ();
+}
+
+/* The function reg_overlap_mentioned_p in CVS (still as of 2001-05-16)
+   does not handle the case where the IN operand is strict_low_part; it
+   does handle it for X.  Test-case in Axis-20010516.  This function takes
+   care of that for THIS port.  FIXME: strict_low_part is going away
+   anyway.  */
+
+static int
+cris_reg_overlap_mentioned_p (rtx x, rtx in)
+{
+  /* The function reg_overlap_mentioned now handles when X is
+     strict_low_part, but not when IN is a STRICT_LOW_PART.  */
+  if (GET_CODE (in) == STRICT_LOW_PART)
+    in = XEXP (in, 0);
+
+  return reg_overlap_mentioned_p (x, in);
+}
+
+/* Return TRUE iff X is a CONST valid for e.g. indexing.
+   ANY_OPERAND is 0 if X is in a CALL_P insn or movsi, 1
+   elsewhere.  */
+
+bool
+cris_valid_pic_const (const_rtx x, bool any_operand)
+{
+  gcc_assert (flag_pic);
+
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return true;
+    default:
+      ;
+    }
+
+  if (GET_CODE (x) != CONST)
+    return false;
+
+  x = XEXP (x, 0);
+
+  /* Handle (const (plus (unspec .. UNSPEC_GOTREL) (const_int ...))).  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == UNSPEC
+      && (XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREL
+	  || XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_PCREL)
+      && CONST_INT_P (XEXP (x, 1)))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == UNSPEC)
+    switch (XINT (x, 1))
+      {
+	/* A PCREL operand is only valid for call and movsi.  */
+      case CRIS_UNSPEC_PLT_PCREL:
+      case CRIS_UNSPEC_PCREL:
+	return !any_operand;
+
+      case CRIS_UNSPEC_PLT_GOTREL:
+      case CRIS_UNSPEC_PLTGOTREAD:
+      case CRIS_UNSPEC_GOTREAD:
+      case CRIS_UNSPEC_GOTREL:
+	return true;
+      default:
+	gcc_unreachable ();
+      }
+
+  return cris_pic_symbol_type_of (x) == cris_no_symbol;
+}
+
+/* Helper function to find the right PIC-type symbol to generate,
+   given the original (non-PIC) representation.  */
+
+enum cris_pic_symbol_type
+cris_pic_symbol_type_of (const_rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      return SYMBOL_REF_LOCAL_P (x)
+	? cris_rel_symbol : cris_got_symbol;
+
+    case LABEL_REF:
+      return cris_rel_symbol;
+
+    case CONST:
+      return cris_pic_symbol_type_of (XEXP (x, 0));
+
+    case PLUS:
+    case MINUS:
+      {
+	enum cris_pic_symbol_type t1 = cris_pic_symbol_type_of (XEXP (x, 0));
+	enum cris_pic_symbol_type t2 = cris_pic_symbol_type_of (XEXP (x, 1));
+
+	gcc_assert (t1 == cris_no_symbol || t2 == cris_no_symbol);
+
+	if (t1 == cris_got_symbol || t2 == cris_got_symbol)
+	  return cris_got_symbol_needing_fixup;
+
+	return t1 != cris_no_symbol ? t1 : t2;
+      }
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return cris_no_symbol;
+
+    case UNSPEC:
+      /* Likely an offsettability-test attempting to add a constant to
+	 a GOTREAD symbol, which can't be handled.  */
+      return cris_invalid_pic_symbol;
+
+    default:
+      fatal_insn ("unrecognized supposed constant", x);
+    }
+
+  gcc_unreachable ();
+}
+
+/* The LEGITIMATE_PIC_OPERAND_P worker.  */
+
+int
+cris_legitimate_pic_operand (rtx x)
+{
+  /* Symbols are not valid PIC operands as-is; just constants.  */
+  return cris_valid_pic_const (x, true);
+}
+
+/* Queue an .ident string in the queue of top-level asm statements.
+   If the front-end is done, we must be being called from toplev.c.
+   In that case, do nothing.  */
+void 
+cris_asm_output_ident (const char *string)
+{
+  if (cgraph_state != CGRAPH_STATE_PARSING)
+    return;
+
+  default_asm_output_ident_directive (string);
+}
+
+/* The ASM_OUTPUT_CASE_END worker.  */
+
+void
+cris_asm_output_case_end (FILE *stream, int num, rtx table)
+{
+  /* Step back, over the label for the table, to the actual casejump and
+     assert that we find only what's expected.  */
+  rtx whole_jump_insn = prev_nonnote_nondebug_insn (table);
+  gcc_assert (whole_jump_insn != NULL_RTX && LABEL_P (whole_jump_insn));
+  whole_jump_insn = prev_nonnote_nondebug_insn (whole_jump_insn);
+  gcc_assert (whole_jump_insn != NULL_RTX
+	      && (JUMP_P (whole_jump_insn)
+		  || (TARGET_V32 && INSN_P (whole_jump_insn)
+		      && GET_CODE (PATTERN (whole_jump_insn)) == SEQUENCE)));
+  /* Get the pattern of the casejump, so we can extract the default label.  */
+  whole_jump_insn = PATTERN (whole_jump_insn);
+
+  if (TARGET_V32)
+    {
+      /* This can be a SEQUENCE, meaning the delay-slot of the jump is
+	 filled.  We also output the offset word a little differently.  */
+      rtx parallel_jump
+	= (GET_CODE (whole_jump_insn) == SEQUENCE
+	   ? PATTERN (XVECEXP (whole_jump_insn, 0, 0)) : whole_jump_insn);
+
+      asm_fprintf (stream,
+		   "\t.word %LL%d-.%s\n",
+		   CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (XVECEXP
+							(parallel_jump, 0, 0),
+							1), 2), 0)),
+		   (TARGET_PDEBUG ? "; default" : ""));
+      return;
+    }
+
+  asm_fprintf (stream,
+	       "\t.word %LL%d-%LL%d%s\n",
+	       CODE_LABEL_NUMBER (XEXP
+				  (XEXP
+				   (XEXP (XVECEXP (whole_jump_insn, 0, 0), 1), 
+				    2), 0)),
+	       num,
+	       (TARGET_PDEBUG ? "; default" : ""));
+}
+
+/* The TARGET_OPTION_OVERRIDE worker.
+   As is the norm, this also parses -mfoo=bar type parameters.  */
+
+static void
+cris_option_override (void)
+{
+  if (cris_max_stackframe_str)
+    {
+      cris_max_stackframe = atoi (cris_max_stackframe_str);
+
+      /* Do some sanity checking.  */
+      if (cris_max_stackframe < 0 || cris_max_stackframe > 0x20000000)
+	internal_error ("-max-stackframe=%d is not usable, not between 0 and %d",
+			cris_max_stackframe, 0x20000000);
+    }
+
+  /* Let "-metrax4" and "-metrax100" change the cpu version.  */
+  if (TARGET_SVINTO && cris_cpu_version < CRIS_CPU_SVINTO)
+    cris_cpu_version = CRIS_CPU_SVINTO;
+  else if (TARGET_ETRAX4_ADD && cris_cpu_version < CRIS_CPU_ETRAX4)
+    cris_cpu_version = CRIS_CPU_ETRAX4;
+
+  /* Parse -march=... and its synonym, the deprecated -mcpu=...  */
+  if (cris_cpu_str)
+    {
+      cris_cpu_version
+	= (*cris_cpu_str == 'v' ? atoi (cris_cpu_str + 1) : -1);
+
+      if (strcmp ("etrax4", cris_cpu_str) == 0)
+	cris_cpu_version = 3;
+
+      if (strcmp ("svinto", cris_cpu_str) == 0
+	  || strcmp ("etrax100", cris_cpu_str) == 0)
+	cris_cpu_version = 8;
+
+      if (strcmp ("ng", cris_cpu_str) == 0
+	  || strcmp ("etrax100lx", cris_cpu_str) == 0)
+	cris_cpu_version = 10;
+
+      if (cris_cpu_version < 0 || cris_cpu_version > 32)
+	error ("unknown CRIS version specification in -march= or -mcpu= : %s",
+	       cris_cpu_str);
+
+      /* Set the target flags.  */
+      if (cris_cpu_version >= CRIS_CPU_ETRAX4)
+	target_flags |= MASK_ETRAX4_ADD;
+
+      /* If this is Svinto or higher, align for 32 bit accesses.  */
+      if (cris_cpu_version >= CRIS_CPU_SVINTO)
+	target_flags
+	  |= (MASK_SVINTO | MASK_ALIGN_BY_32
+	      | MASK_STACK_ALIGN | MASK_CONST_ALIGN
+	      | MASK_DATA_ALIGN);
+
+      /* Note that we do not add new flags when it can be completely
+	 described with a macro that uses -mcpu=X.  So
+	 TARGET_HAS_MUL_INSNS is (cris_cpu_version >= CRIS_CPU_NG).  */
+    }
+
+  if (cris_tune_str)
+    {
+      int cris_tune
+	= (*cris_tune_str == 'v' ? atoi (cris_tune_str + 1) : -1);
+
+      if (strcmp ("etrax4", cris_tune_str) == 0)
+	cris_tune = 3;
+
+      if (strcmp ("svinto", cris_tune_str) == 0
+	  || strcmp ("etrax100", cris_tune_str) == 0)
+	cris_tune = 8;
+
+      if (strcmp ("ng", cris_tune_str) == 0
+	  || strcmp ("etrax100lx", cris_tune_str) == 0)
+	cris_tune = 10;
+
+      if (cris_tune < 0 || cris_tune > 32)
+	error ("unknown CRIS cpu version specification in -mtune= : %s",
+	       cris_tune_str);
+
+      if (cris_tune >= CRIS_CPU_SVINTO)
+	/* We have currently nothing more to tune than alignment for
+	   memory accesses.  */
+	target_flags
+	  |= (MASK_STACK_ALIGN | MASK_CONST_ALIGN
+	      | MASK_DATA_ALIGN | MASK_ALIGN_BY_32);
+    }
+
+  if (cris_cpu_version >= CRIS_CPU_V32)
+    target_flags &= ~(MASK_SIDE_EFFECT_PREFIXES|MASK_MUL_BUG);
+
+  if (flag_pic)
+    {
+      /* Use error rather than warning, so invalid use is easily
+	 detectable.  Still change to the values we expect, to avoid
+	 further errors.  */
+      if (! TARGET_LINUX)
+	{
+	  error ("-fPIC and -fpic are not supported in this configuration");
+	  flag_pic = 0;
+	}
+
+      /* Turn off function CSE.  We need to have the addresses reach the
+	 call expanders to get PLT-marked, as they could otherwise be
+	 compared against zero directly or indirectly.  After visiting the
+	 call expanders they will then be cse:ed, as the call expanders
+	 force_reg the addresses, effectively forcing flag_no_function_cse
+	 to 0.  */
+      flag_no_function_cse = 1;
+    }
+
+  /* Set the per-function-data initializer.  */
+  init_machine_status = cris_init_machine_status;
+}
+
+/* The TARGET_ASM_OUTPUT_MI_THUNK worker.  */
+
+static void
+cris_asm_output_mi_thunk (FILE *stream,
+			  tree thunkdecl ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT delta,
+			  HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			  tree funcdecl)
+{
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), stream, 1);
+
+  if (delta > 0)
+    fprintf (stream, "\tadd%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n",
+	     ADDITIVE_SIZE_MODIFIER (delta), delta,
+	     reg_names[CRIS_FIRST_ARG_REG]);
+  else if (delta < 0)
+    fprintf (stream, "\tsub%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n",
+	     ADDITIVE_SIZE_MODIFIER (-delta), -delta,
+	     reg_names[CRIS_FIRST_ARG_REG]);
+
+  if (flag_pic)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (funcdecl), 0), 0);
+
+      name = (* targetm.strip_name_encoding) (name);
+
+      if (TARGET_V32)
+	{
+	  fprintf (stream, "\tba ");
+	  assemble_name (stream, name);
+	  fprintf (stream, "%s\n", CRIS_PLT_PCOFFSET_SUFFIX);
+	}
+      else
+	{
+	  fprintf (stream, "add.d ");
+	  assemble_name (stream, name);
+	  fprintf (stream, "%s,$pc\n", CRIS_PLT_PCOFFSET_SUFFIX);
+	}
+    }
+  else
+    {
+      fprintf (stream, "jump ");
+      assemble_name (stream, XSTR (XEXP (DECL_RTL (funcdecl), 0), 0));
+      fprintf (stream, "\n");
+
+      if (TARGET_V32)
+	fprintf (stream, "\tnop\n");
+    }
+
+  final_end_function ();
+}
+
+/* Boilerplate emitted at start of file.
+
+   NO_APP *only at file start* means faster assembly.  It also means
+   comments are not allowed.  In some cases comments will be output
+   for debugging purposes.  Make sure they are allowed then.  */
+static void
+cris_file_start (void)
+{
+  /* These expressions can vary at run time, so we cannot put
+     them into TARGET_INITIALIZER.  */
+  targetm.asm_file_start_app_off = !(TARGET_PDEBUG || flag_print_asm_name);
+
+  default_file_start ();
+}
+
+/* Output that goes at the end of the file, similarly.  */
+
+static void
+cris_file_end (void)
+{
+  /* For CRIS, the default is to assume *no* executable stack, so output
+     an executable-stack-note only when needed.  */
+  if (TARGET_LINUX && trampolines_created)
+    file_end_indicate_exec_stack ();
+}
+
+/* Rename the function calls for integer multiply and divide.  */
+static void
+cris_init_libfuncs (void)
+{
+  set_optab_libfunc (smul_optab, SImode, "__Mul");
+  set_optab_libfunc (sdiv_optab, SImode, "__Div");
+  set_optab_libfunc (udiv_optab, SImode, "__Udiv");
+  set_optab_libfunc (smod_optab, SImode, "__Mod");
+  set_optab_libfunc (umod_optab, SImode, "__Umod");
+
+  /* Atomic data being unaligned is unfortunately a reality.
+     Deal with it.  */
+  if (TARGET_ATOMICS_MAY_CALL_LIBFUNCS)
+    {
+      set_optab_libfunc (sync_compare_and_swap_optab, SImode,
+			 "__cris_atcmpxchgr32");
+      set_optab_libfunc (sync_compare_and_swap_optab, HImode,
+			 "__cris_atcmpxchgr16");
+    }
+}
+
+/* The INIT_EXPANDERS worker sets the per-function-data initializer and
+   mark functions.  */
+
+void
+cris_init_expanders (void)
+{
+  /* Nothing here at the moment.  */
+}
+
+/* Zero initialization is OK for all current fields.  */
+
+static struct machine_function *
+cris_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Split a 2 word move (DI or presumably DF) into component parts.
+   Originally a copy of gen_split_move_double in m32r.c.  */
+
+rtx
+cris_split_movdx (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  rtx val;
+
+  /* We used to have to handle (SUBREG (MEM)) here, but that should no
+     longer happen; after reload there are no SUBREGs any more, and we're
+     only called after reload.  */
+  CRIS_ASSERT (GET_CODE (dest) != SUBREG && GET_CODE (src) != SUBREG);
+
+  start_sequence ();
+  if (REG_P (dest))
+    {
+      int dregno = REGNO (dest);
+
+      /* Reg-to-reg copy.  */
+      if (REG_P (src))
+	{
+	  int sregno = REGNO (src);
+
+	  int reverse = (dregno == sregno + 1);
+
+	  /* We normally copy the low-numbered register first.  However, if
+	     the first register operand 0 is the same as the second register of
+	     operand 1, we must copy in the opposite order.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  operand_subword (src, reverse, TRUE, mode)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, !reverse, TRUE, mode),
+				  operand_subword (src, !reverse, TRUE, mode)));
+	}
+      /* Constant-to-reg copy.  */
+      else if (CONST_INT_P (src) || GET_CODE (src) == CONST_DOUBLE)
+	{
+	  rtx words[2];
+	  split_double (src, &words[0], &words[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 0, TRUE, mode),
+				  words[0]));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 1, TRUE, mode),
+				  words[1]));
+	}
+      /* Mem-to-reg copy.  */
+      else if (MEM_P (src))
+	{
+	  /* If the high-address word is used in the address, we must load it
+	     last.  Otherwise, load it first.  */
+	  rtx addr = XEXP (src, 0);
+	  int reverse
+	    = (refers_to_regno_p (dregno, dregno + 1, addr, NULL) != 0);
+
+	  /* The original code implies that we can't do
+	     move.x [rN+],rM  move.x [rN],rM+1
+	     when rN is dead, because of REG_NOTES damage.  That is
+	     consistent with what I've seen, so don't try it.
+
+             We have two different cases here; if the addr is POST_INC,
+             just pass it through, otherwise add constants.  */
+
+          if (GET_CODE (addr) == POST_INC)
+	    {
+	      rtx mem;
+	      rtx insn;
+
+	      /* Whenever we emit insns with post-incremented
+		 addresses ourselves, we must add a post-inc note
+		 manually.  */
+	      mem = change_address (src, SImode, addr);
+	      insn
+		= gen_rtx_SET (VOIDmode,
+			       operand_subword (dest, 0, TRUE, mode), mem);
+	      insn = emit_insn (insn);
+	      if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+		REG_NOTES (insn)
+		  = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				     REG_NOTES (insn));
+
+	      mem = copy_rtx (mem);
+	      insn
+		= gen_rtx_SET (VOIDmode,
+			       operand_subword (dest, 1, TRUE, mode), mem);
+	      insn = emit_insn (insn);
+	      if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+		REG_NOTES (insn)
+		  = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				     REG_NOTES (insn));
+	    }
+	  else
+	    {
+	      /* Make sure we don't get any other addresses with
+		 embedded postincrements.  They should be stopped in
+		 GO_IF_LEGITIMATE_ADDRESS, but we're here for your
+		 safety.  */
+	      if (side_effects_p (addr))
+		fatal_insn ("unexpected side-effects in address", addr);
+
+	      emit_insn (gen_rtx_SET
+			 (VOIDmode,
+			  operand_subword (dest, reverse, TRUE, mode),
+			  change_address
+			  (src, SImode,
+			   plus_constant (Pmode, addr,
+					  reverse * UNITS_PER_WORD))));
+	      emit_insn (gen_rtx_SET
+			 (VOIDmode,
+			  operand_subword (dest, ! reverse, TRUE, mode),
+			  change_address
+			  (src, SImode,
+			   plus_constant (Pmode, addr,
+					  (! reverse) *
+					  UNITS_PER_WORD))));
+	    }
+	}
+      else
+	internal_error ("unknown src");
+    }
+  /* Reg-to-mem copy or clear mem.  */
+  else if (MEM_P (dest)
+	   && (REG_P (src)
+	       || src == const0_rtx
+	       || src == CONST0_RTX (DFmode)))
+    {
+      rtx addr = XEXP (dest, 0);
+
+      if (GET_CODE (addr) == POST_INC)
+	{
+	  rtx mem;
+	  rtx insn;
+
+	  /* Whenever we emit insns with post-incremented addresses
+	     ourselves, we must add a post-inc note manually.  */
+	  mem = change_address (dest, SImode, addr);
+	  insn
+	    = gen_rtx_SET (VOIDmode,
+			   mem, operand_subword (src, 0, TRUE, mode));
+	  insn = emit_insn (insn);
+	  if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+	    REG_NOTES (insn)
+	      = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				 REG_NOTES (insn));
+
+	  mem = copy_rtx (mem);
+	  insn
+	    = gen_rtx_SET (VOIDmode,
+			   mem,
+			   operand_subword (src, 1, TRUE, mode));
+	  insn = emit_insn (insn);
+	  if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+	    REG_NOTES (insn)
+	      = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				 REG_NOTES (insn));
+	}
+      else
+	{
+	  /* Make sure we don't get any other addresses with embedded
+	     postincrements.  They should be stopped in
+	     GO_IF_LEGITIMATE_ADDRESS, but we're here for your safety.  */
+	  if (side_effects_p (addr))
+	    fatal_insn ("unexpected side-effects in address", addr);
+
+	  emit_insn (gen_rtx_SET
+		     (VOIDmode,
+		      change_address (dest, SImode, addr),
+		      operand_subword (src, 0, TRUE, mode)));
+
+	  emit_insn (gen_rtx_SET
+		     (VOIDmode,
+		      change_address (dest, SImode,
+				      plus_constant (Pmode, addr,
+						     UNITS_PER_WORD)),
+		      operand_subword (src, 1, TRUE, mode)));
+	}
+    }
+
+  else
+    internal_error ("unknown dest");
+
+  val = get_insns ();
+  end_sequence ();
+  return val;
+}
+
+/* The expander for the prologue pattern name.  */
+
+void
+cris_expand_prologue (void)
+{
+  int regno;
+  int size = get_frame_size ();
+  /* Shorten the used name for readability.  */
+  int cfoa_size = crtl->outgoing_args_size;
+  int last_movem_reg = -1;
+  int framesize = 0;
+  rtx mem, insn;
+  int return_address_on_stack = cris_return_address_on_stack ();
+  int got_really_used = false;
+  int n_movem_regs = 0;
+  int pretend = crtl->args.pretend_args_size;
+
+  /* Don't do anything if no prologues or epilogues are wanted.  */
+  if (!TARGET_PROLOGUE_EPILOGUE)
+    return;
+
+  CRIS_ASSERT (size >= 0);
+
+  if (crtl->uses_pic_offset_table)
+    {
+      /* A reference may have been optimized out (like the abort () in
+	 fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that
+	 it's still used.  */
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* Align the size to what's best for the CPU model.  */
+  if (TARGET_STACK_ALIGN)
+    size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1;
+
+  if (pretend)
+    {
+      /* See also cris_setup_incoming_varargs where
+	 cfun->machine->stdarg_regs is set.  There are other setters of
+	 crtl->args.pretend_args_size than stdarg handling, like
+	 for an argument passed with parts in R13 and stack.  We must
+	 not store R13 into the pretend-area for that case, as GCC does
+	 that itself.  "Our" store would be marked as redundant and GCC
+	 will attempt to remove it, which will then be flagged as an
+	 internal error; trying to remove a frame-related insn.  */
+      int stdarg_regs = cfun->machine->stdarg_regs;
+
+      framesize += pretend;
+
+      for (regno = CRIS_FIRST_ARG_REG + CRIS_MAX_ARGS_IN_REGS - 1;
+	   stdarg_regs > 0;
+	   regno--, pretend -= 4, stdarg_regs--)
+	{
+	  insn = emit_insn (gen_rtx_SET (VOIDmode,
+					 stack_pointer_rtx,
+					 plus_constant (Pmode,
+							stack_pointer_rtx,
+							-4)));
+	  /* FIXME: When dwarf2 frame output and unless asynchronous
+	     exceptions, make dwarf2 bundle together all stack
+	     adjustments like it does for registers between stack
+	     adjustments.  */
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+	  insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno));
+
+	  /* Note the absence of RTX_FRAME_RELATED_P on the above insn:
+	     the value isn't restored, so we don't want to tell dwarf2
+	     that it's been stored to stack, else EH handling info would
+	     get confused.  */
+	}
+
+      /* For other setters of crtl->args.pretend_args_size, we
+	 just adjust the stack by leaving the remaining size in
+	 "pretend", handled below.  */
+    }
+
+  /* Save SRP if not a leaf function.  */
+  if (return_address_on_stack)
+    {
+      insn = emit_insn (gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    -4 - pretend)));
+      pretend = 0;
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      framesize += 4;
+    }
+
+  /* Set up the frame pointer, if needed.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_insn (gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    -4 - pretend)));
+      pretend = 0;
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn = emit_move_insn (mem, frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      framesize += 4;
+    }
+
+  /* Between frame-pointer and saved registers lie the area for local
+     variables.  If we get here with "pretended" size remaining, count
+     it into the general stack size.  */
+  size += pretend;
+
+  /* Get a contiguous sequence of registers, starting with R0, that need
+     to be saved.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+	{
+	  n_movem_regs++;
+
+	  /* Check if movem may be used for registers so far.  */
+	  if (regno == last_movem_reg + 1)
+	    /* Yes, update next expected register.  */
+	    last_movem_reg = regno;
+	  else
+	    {
+	      /* We cannot use movem for all registers.  We have to flush
+		 any movem:ed registers we got so far.  */
+	      if (last_movem_reg != -1)
+		{
+		  int n_saved
+		    = (n_movem_regs == 1) ? 1 : last_movem_reg + 1;
+
+		  /* It is a win to use a side-effect assignment for
+		     64 <= size <= 128.  But side-effect on movem was
+		     not usable for CRIS v0..3.  Also only do it if
+		     side-effects insns are allowed.  */
+		  if ((last_movem_reg + 1) * 4 + size >= 64
+		      && (last_movem_reg + 1) * 4 + size <= 128
+		      && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1)
+		      && TARGET_SIDE_EFFECT_PREFIXES)
+		    {
+		      mem
+			= gen_rtx_MEM (SImode,
+				       plus_constant (Pmode, stack_pointer_rtx,
+						      -(n_saved * 4 + size)));
+		      set_mem_alias_set (mem, get_frame_alias_set ());
+		      insn
+			= cris_emit_movem_store (mem, GEN_INT (n_saved),
+						 -(n_saved * 4 + size),
+						 true);
+		    }
+		  else
+		    {
+		      insn
+			= gen_rtx_SET (VOIDmode,
+				       stack_pointer_rtx,
+				       plus_constant (Pmode, stack_pointer_rtx,
+						      -(n_saved * 4 + size)));
+		      insn = emit_insn (insn);
+		      RTX_FRAME_RELATED_P (insn) = 1;
+
+		      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+		      set_mem_alias_set (mem, get_frame_alias_set ());
+		      insn = cris_emit_movem_store (mem, GEN_INT (n_saved),
+						    0, true);
+		    }
+
+		  framesize += n_saved * 4 + size;
+		  last_movem_reg = -1;
+		  size = 0;
+		}
+
+	      insn = emit_insn (gen_rtx_SET (VOIDmode,
+					     stack_pointer_rtx,
+					     plus_constant (Pmode,
+							    stack_pointer_rtx,
+							    -4 - size)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	      set_mem_alias_set (mem, get_frame_alias_set ());
+	      insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      framesize += 4 + size;
+	      size = 0;
+	    }
+	}
+    }
+
+  /* Check after, if we could movem all registers.  This is the normal case.  */
+  if (last_movem_reg != -1)
+    {
+      int n_saved
+	= (n_movem_regs == 1) ? 1 : last_movem_reg + 1;
+
+      /* Side-effect on movem was not usable for CRIS v0..3.  Also only
+	 do it if side-effects insns are allowed.  */
+      if ((last_movem_reg + 1) * 4 + size >= 64
+	  && (last_movem_reg + 1) * 4 + size <= 128
+	  && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1)
+	  && TARGET_SIDE_EFFECT_PREFIXES)
+	{
+	  mem
+	    = gen_rtx_MEM (SImode,
+			   plus_constant (Pmode, stack_pointer_rtx,
+					  -(n_saved * 4 + size)));
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = cris_emit_movem_store (mem, GEN_INT (n_saved),
+					-(n_saved * 4 + size), true);
+	}
+      else
+	{
+	  insn
+	    = gen_rtx_SET (VOIDmode,
+			   stack_pointer_rtx,
+			   plus_constant (Pmode, stack_pointer_rtx,
+					  -(n_saved * 4 + size)));
+	  insn = emit_insn (insn);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = cris_emit_movem_store (mem, GEN_INT (n_saved), 0, true);
+	}
+
+      framesize += n_saved * 4 + size;
+      /* We have to put outgoing argument space after regs.  */
+      if (cfoa_size)
+	{
+	  insn = emit_insn (gen_rtx_SET (VOIDmode,
+					 stack_pointer_rtx,
+					 plus_constant (Pmode,
+							stack_pointer_rtx,
+							-cfoa_size)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  framesize += cfoa_size;
+	}
+    }
+  else if ((size + cfoa_size) > 0)
+    {
+      insn = emit_insn (gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     plus_constant (Pmode,
+						    stack_pointer_rtx,
+						    -(cfoa_size + size))));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      framesize += size + cfoa_size;
+    }
+
+  /* Set up the PIC register, if it is used.  */
+  if (got_really_used)
+    {
+      rtx got
+	= gen_rtx_UNSPEC (SImode, gen_rtvec (1, const0_rtx), CRIS_UNSPEC_GOT);
+      emit_move_insn (pic_offset_table_rtx, got);
+
+      /* FIXME: This is a cover-up for flow2 messing up; it doesn't
+	 follow exceptional paths and tries to delete the GOT load as
+	 unused, if it isn't used on the non-exceptional paths.  Other
+	 ports have similar or other cover-ups, or plain bugs marking
+	 the GOT register load as maybe-dead.  To see this, remove the
+	 line below and try libsupc++/vec.cc or a trivial
+	 "static void y (); void x () {try {y ();} catch (...) {}}".  */
+      emit_use (pic_offset_table_rtx);
+    }
+
+  if (cris_max_stackframe && framesize > cris_max_stackframe)
+    warning (0, "stackframe too big: %d bytes", framesize);
+}
+
+/* The expander for the epilogue pattern.  */
+
+void
+cris_expand_epilogue (void)
+{
+  int regno;
+  int size = get_frame_size ();
+  int last_movem_reg = -1;
+  int argspace_offset = crtl->outgoing_args_size;
+  int pretend =	 crtl->args.pretend_args_size;
+  rtx mem;
+  bool return_address_on_stack = cris_return_address_on_stack ();
+  /* A reference may have been optimized out
+     (like the abort () in fde_split in unwind-dw2-fde.c, at least 3.2.1)
+     so check that it's still used.  */
+  int got_really_used = false;
+  int n_movem_regs = 0;
+
+  if (!TARGET_PROLOGUE_EPILOGUE)
+    return;
+
+  if (crtl->uses_pic_offset_table)
+    {
+      /* A reference may have been optimized out (like the abort () in
+	 fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that
+	 it's still used.  */
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* Align byte count of stack frame.  */
+  if (TARGET_STACK_ALIGN)
+    size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1;
+
+  /* Check how many saved regs we can movem.  They start at r0 and must
+     be contiguous.  */
+  for (regno = 0;
+       regno < FIRST_PSEUDO_REGISTER;
+       regno++)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      {
+	n_movem_regs++;
+
+	if (regno == last_movem_reg + 1)
+	  last_movem_reg = regno;
+	else
+	  break;
+      }
+
+  /* If there was only one register that really needed to be saved
+     through movem, don't use movem.  */
+  if (n_movem_regs == 1)
+    last_movem_reg = -1;
+
+  /* Now emit "normal" move insns for all regs higher than the movem
+     regs.  */
+  for (regno = FIRST_PSEUDO_REGISTER - 1;
+       regno > last_movem_reg;
+       regno--)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      {
+	rtx insn;
+
+	if (argspace_offset)
+	  {
+	    /* There is an area for outgoing parameters located before
+	       the saved registers.  We have to adjust for that.  */
+	    emit_insn (gen_rtx_SET (VOIDmode,
+				    stack_pointer_rtx,
+				    plus_constant (Pmode, stack_pointer_rtx,
+						   argspace_offset)));
+	    /* Make sure we only do this once.  */
+	    argspace_offset = 0;
+	  }
+
+	mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode,
+						     stack_pointer_rtx));
+	set_mem_alias_set (mem, get_frame_alias_set ());
+	insn = emit_move_insn (gen_rtx_raw_REG (SImode, regno), mem);
+
+	/* Whenever we emit insns with post-incremented addresses
+	   ourselves, we must add a post-inc note manually.  */
+	REG_NOTES (insn)
+	  = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+      }
+
+  /* If we have any movem-restore, do it now.  */
+  if (last_movem_reg != -1)
+    {
+      rtx insn;
+
+      if (argspace_offset)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  stack_pointer_rtx,
+				  plus_constant (Pmode, stack_pointer_rtx,
+						 argspace_offset)));
+	  argspace_offset = 0;
+	}
+
+      mem = gen_rtx_MEM (SImode,
+			 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn
+	= emit_insn (cris_gen_movem_load (mem,
+					  GEN_INT (last_movem_reg + 1), 0));
+      /* Whenever we emit insns with post-incremented addresses
+	 ourselves, we must add a post-inc note manually.  */
+      if (side_effects_p (PATTERN (insn)))
+	REG_NOTES (insn)
+	  = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+    }
+
+  /* If we don't clobber all of the allocated stack area (we've already
+     deallocated saved registers), GCC might want to schedule loads from
+     the stack to *after* the stack-pointer restore, which introduces an
+     interrupt race condition.  This happened for the initial-value
+     SRP-restore for g++.dg/eh/registers1.C (noticed by inspection of
+     other failure for that test).  It also happened for the stack slot
+     for the return value in (one version of)
+     linux/fs/dcache.c:__d_lookup, at least with "-O2
+     -fno-omit-frame-pointer".  */
+
+  /* Restore frame pointer if necessary.  */
+  if (frame_pointer_needed)
+    {
+      rtx insn;
+
+      emit_insn (gen_cris_frame_deallocated_barrier ());
+
+      emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+      mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode,
+						   stack_pointer_rtx));
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn = emit_move_insn (frame_pointer_rtx, mem);
+
+      /* Whenever we emit insns with post-incremented addresses
+	 ourselves, we must add a post-inc note manually.  */
+      REG_NOTES (insn)
+	= alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+    }
+  else if ((size + argspace_offset) != 0)
+    {
+      emit_insn (gen_cris_frame_deallocated_barrier ());
+
+      /* If there was no frame-pointer to restore sp from, we must
+	 explicitly deallocate local variables.  */
+
+      /* Handle space for outgoing parameters that hasn't been handled
+	 yet.  */
+      size += argspace_offset;
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      stack_pointer_rtx,
+			      plus_constant (Pmode, stack_pointer_rtx, size)));
+    }
+
+  /* If this function has no pushed register parameters
+     (stdargs/varargs), and if it is not a leaf function, then we have
+     the return address on the stack.  */
+  if (return_address_on_stack && pretend == 0)
+    {
+      if (TARGET_V32 || crtl->calls_eh_return)
+	{
+	  rtx mem;
+	  rtx insn;
+	  rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM);
+	  mem = gen_rtx_MEM (SImode,
+			     gen_rtx_POST_INC (SImode,
+					       stack_pointer_rtx));
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = emit_move_insn (srpreg, mem);
+
+	  /* Whenever we emit insns with post-incremented addresses
+	     ourselves, we must add a post-inc note manually.  */
+	  REG_NOTES (insn)
+	    = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+
+	  if (crtl->calls_eh_return)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   gen_rtx_raw_REG (SImode,
+						    CRIS_STACKADJ_REG)));
+	  cris_expand_return (false);
+	}
+      else
+	cris_expand_return (true);
+
+      return;
+    }
+
+  /* If we pushed some register parameters, then adjust the stack for
+     them.  */
+  if (pretend != 0)
+    {
+      /* If SRP is stored on the way, we need to restore it first.  */
+      if (return_address_on_stack)
+	{
+	  rtx mem;
+	  rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM);
+	  rtx insn;
+
+	  mem = gen_rtx_MEM (SImode,
+			     gen_rtx_POST_INC (SImode,
+					       stack_pointer_rtx));
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = emit_move_insn (srpreg, mem);
+
+	  /* Whenever we emit insns with post-incremented addresses
+	     ourselves, we must add a post-inc note manually.  */
+	  REG_NOTES (insn)
+	    = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      stack_pointer_rtx,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     pretend)));
+    }
+
+  /* Perform the "physical" unwinding that the EH machinery calculated.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   gen_rtx_raw_REG (SImode,
+					    CRIS_STACKADJ_REG)));
+  cris_expand_return (false);
+}
+
+/* Worker function for generating movem from mem for load_multiple.  */
+
+rtx
+cris_gen_movem_load (rtx src, rtx nregs_rtx, int nprefix)
+{
+  int nregs = INTVAL (nregs_rtx);
+  rtvec vec;
+  int eltno = 1;
+  int i;
+  rtx srcreg = XEXP (src, 0);
+  unsigned int regno = nregs - 1;
+  int regno_inc = -1;
+
+  if (TARGET_V32)
+    {
+      regno = 0;
+      regno_inc = 1;
+    }
+
+  if (GET_CODE (srcreg) == POST_INC)
+    srcreg = XEXP (srcreg, 0);
+
+  CRIS_ASSERT (REG_P (srcreg));
+
+  /* Don't use movem for just one insn.  The insns are equivalent except
+     for the pipeline hazard (on v32); movem does not forward the loaded
+     registers so there's a three cycles penalty for their use.  */
+  if (nregs == 1)
+    return gen_movsi (gen_rtx_REG (SImode, 0), src);
+
+  vec = rtvec_alloc (nprefix + nregs
+		     + (GET_CODE (XEXP (src, 0)) == POST_INC));
+
+  if (GET_CODE (XEXP (src, 0)) == POST_INC)
+    {
+      RTVEC_ELT (vec, nprefix + 1)
+	= gen_rtx_SET (VOIDmode, srcreg,
+		       plus_constant (Pmode, srcreg, nregs * 4));
+      eltno++;
+    }
+
+  src = replace_equiv_address (src, srcreg);
+  RTVEC_ELT (vec, nprefix)
+    = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno), src);
+  regno += regno_inc;
+
+  for (i = 1; i < nregs; i++, eltno++)
+    {
+      RTVEC_ELT (vec, nprefix + eltno)
+	= gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno),
+		       adjust_address_nv (src, SImode, i * 4));
+      regno += regno_inc;
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, vec);
+}
+
+/* Worker function for generating movem to mem.  If FRAME_RELATED, notes
+   are added that the dwarf2 machinery understands.  */
+
+rtx
+cris_emit_movem_store (rtx dest, rtx nregs_rtx, int increment,
+		       bool frame_related)
+{
+  int nregs = INTVAL (nregs_rtx);
+  rtvec vec;
+  int eltno = 1;
+  int i;
+  rtx insn;
+  rtx destreg = XEXP (dest, 0);
+  unsigned int regno = nregs - 1;
+  int regno_inc = -1;
+
+  if (TARGET_V32)
+    {
+      regno = 0;
+      regno_inc = 1;
+    }
+
+  if (GET_CODE (destreg) == POST_INC)
+    increment += nregs * 4;
+
+  if (GET_CODE (destreg) == POST_INC || GET_CODE (destreg) == PLUS)
+    destreg = XEXP (destreg, 0);
+
+  CRIS_ASSERT (REG_P (destreg));
+
+  /* Don't use movem for just one insn.  The insns are equivalent except
+     for the pipeline hazard (on v32); movem does not forward the loaded
+     registers so there's a three cycles penalty for use.  */
+  if (nregs == 1)
+    {
+      rtx mov = gen_rtx_SET (VOIDmode, dest, gen_rtx_REG (SImode, 0));
+
+      if (increment == 0)
+	{
+	  insn = emit_insn (mov);
+	  if (frame_related)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  return insn;
+	}
+
+      /* If there was a request for a side-effect, create the ordinary
+         parallel.  */
+      vec = rtvec_alloc (2);
+
+      RTVEC_ELT (vec, 0) = mov;
+      RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, destreg,
+					plus_constant (Pmode, destreg,
+						       increment));
+      if (frame_related)
+	{
+	  RTX_FRAME_RELATED_P (mov) = 1;
+	  RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1;
+	}
+    }
+  else
+    {
+      vec = rtvec_alloc (nregs + (increment != 0 ? 1 : 0));
+      RTVEC_ELT (vec, 0)
+	= gen_rtx_SET (VOIDmode,
+		       replace_equiv_address (dest,
+					      plus_constant (Pmode, destreg,
+							     increment)),
+		       gen_rtx_REG (SImode, regno));
+      regno += regno_inc;
+
+      /* The dwarf2 info wants this mark on each component in a parallel
+	 that's part of the prologue (though it's optional on the first
+	 component).  */
+      if (frame_related)
+	RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 0)) = 1;
+
+      if (increment != 0)
+	{
+	  RTVEC_ELT (vec, 1)
+	    = gen_rtx_SET (VOIDmode, destreg,
+			   plus_constant (Pmode, destreg,
+					  increment != 0
+					  ? increment : nregs * 4));
+	  eltno++;
+
+	  if (frame_related)
+	    RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1;
+
+	  /* Don't call adjust_address_nv on a post-incremented address if
+	     we can help it.  */
+	  if (GET_CODE (XEXP (dest, 0)) == POST_INC)
+	    dest = replace_equiv_address (dest, destreg);
+	}
+
+      for (i = 1; i < nregs; i++, eltno++)
+	{
+	  RTVEC_ELT (vec, eltno)
+	    = gen_rtx_SET (VOIDmode, adjust_address_nv (dest, SImode, i * 4),
+			   gen_rtx_REG (SImode, regno));
+	  if (frame_related)
+	    RTX_FRAME_RELATED_P (RTVEC_ELT (vec, eltno)) = 1;
+	  regno += regno_inc;
+	}
+    }
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+
+  /* Because dwarf2out.c handles the insns in a parallel as a sequence,
+     we need to keep the stack adjustment separate, after the
+     MEM-setters.  Else the stack-adjustment in the second component of
+     the parallel would be mishandled; the offsets for the SETs that
+     follow it would be wrong.  We prepare for this by adding a
+     REG_FRAME_RELATED_EXPR with the MEM-setting parts in a SEQUENCE
+     followed by the increment.  Note that we have FRAME_RELATED_P on
+     all the SETs, including the original stack adjustment SET in the
+     parallel.  */
+  if (frame_related)
+    {
+      if (increment != 0)
+	{
+	  rtx seq = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (nregs + 1));
+	  XVECEXP (seq, 0, 0) = copy_rtx (XVECEXP (PATTERN (insn), 0, 0));
+	  for (i = 1; i < nregs; i++)
+	    XVECEXP (seq, 0, i)
+	      = copy_rtx (XVECEXP (PATTERN (insn), 0, i + 1));
+	  XVECEXP (seq, 0, nregs) = copy_rtx (XVECEXP (PATTERN (insn), 0, 1));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, seq);
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  return insn;
+}
+
+/* Worker function for expanding the address for PIC function calls.  */
+
+void
+cris_expand_pic_call_address (rtx *opp)
+{
+  rtx op = *opp;
+
+  gcc_assert (MEM_P (op));
+  op = XEXP (op, 0);
+
+  /* It might be that code can be generated that jumps to 0 (or to a
+     specific address).  Don't die on that.  (There is a
+     testcase.)  */
+  if (CONSTANT_ADDRESS_P (op) && !CONST_INT_P (op))
+    {
+      enum cris_pic_symbol_type t = cris_pic_symbol_type_of (op);
+
+      CRIS_ASSERT (can_create_pseudo_p ());
+
+      /* For local symbols (non-PLT), just get the plain symbol
+	 reference into a register.  For symbols that can be PLT, make
+	 them PLT.  */
+      if (t == cris_rel_symbol)
+	{
+	  /* For v32, we're fine as-is; just PICify the symbol.  Forcing
+	     into a register caused performance regression for 3.2.1,
+	     observable in __floatdidf and elsewhere in libgcc.  */
+	  if (TARGET_V32)
+	    {
+	      rtx sym = GET_CODE (op) != CONST ? op : get_related_value (op);
+	      HOST_WIDE_INT offs = get_integer_term (op);
+
+	      /* We can't get calls to sym+N, N integer, can we?  */
+	      gcc_assert (offs == 0);
+
+	      op = gen_rtx_CONST (Pmode,
+				  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym),
+						  CRIS_UNSPEC_PCREL));
+	    }
+	  else
+	    op = force_reg (Pmode, op);
+	}
+      else if (t == cris_got_symbol)
+	{
+	  if (TARGET_AVOID_GOTPLT)
+	    {
+	      /* Change a "jsr sym" into (allocate register rM, rO)
+		 "move.d (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_GOTREL)),rM"
+		 "add.d rPIC,rM,rO", "jsr rO" for pre-v32 and
+		 "jsr (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_PCREL))"
+		 for v32.  */
+	      rtx tem, rm, ro;
+	      gcc_assert (can_create_pseudo_p ());
+	      crtl->uses_pic_offset_table = 1;
+	      tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op),
+				    TARGET_V32
+				    ? CRIS_UNSPEC_PLT_PCREL
+				    : CRIS_UNSPEC_PLT_GOTREL);
+	      tem = gen_rtx_CONST (Pmode, tem);
+	      if (TARGET_V32)
+		op = tem;
+	      else
+		{
+		  rm = gen_reg_rtx (Pmode);
+		  emit_move_insn (rm, tem);
+		  ro = gen_reg_rtx (Pmode);
+		  if (expand_binop (Pmode, add_optab, rm,
+				    pic_offset_table_rtx,
+				    ro, 0, OPTAB_LIB_WIDEN) != ro)
+		    internal_error ("expand_binop failed in movsi got");
+		  op = ro;
+		}
+	    }
+	  else
+	    {
+	      /* Change a "jsr sym" into (allocate register rM, rO)
+		 "move.d (const (unspec [sym] CRIS_UNSPEC_PLTGOTREAD)),rM"
+		 "add.d rPIC,rM,rO" "jsr [rO]" with the memory access
+		 marked as not trapping and not aliasing.  No "move.d
+		 [rO],rP" as that would invite to re-use of a value
+		 that should not be reused.  FIXME: Need a peephole2
+		 for cases when this is cse:d from the call, to change
+		 back to just get the PLT entry address, so we don't
+		 resolve the same symbol over and over (the memory
+		 access of the PLTGOT isn't constant).  */
+	      rtx tem, mem, rm, ro;
+
+	      gcc_assert (can_create_pseudo_p ());
+	      crtl->uses_pic_offset_table = 1;
+	      tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op),
+				    CRIS_UNSPEC_PLTGOTREAD);
+	      rm = gen_reg_rtx (Pmode);
+	      emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+	      ro = gen_reg_rtx (Pmode);
+	      if (expand_binop (Pmode, add_optab, rm,
+				pic_offset_table_rtx,
+				ro, 0, OPTAB_LIB_WIDEN) != ro)
+		internal_error ("expand_binop failed in movsi got");
+	      mem = gen_rtx_MEM (Pmode, ro);
+
+	      /* This MEM doesn't alias anything.  Whether it aliases
+		 other same symbols is unimportant.  */
+	      set_mem_alias_set (mem, new_alias_set ());
+	      MEM_NOTRAP_P (mem) = 1;
+	      op = mem;
+	    }
+	}
+      else
+	/* Can't possibly get a GOT-needing-fixup for a function-call,
+	   right?  */
+	fatal_insn ("unidentifiable call op", op);
+
+      *opp = replace_equiv_address (*opp, op);
+    }
+}
+
+/* Make sure operands are in the right order for an addsi3 insn as
+   generated by a define_split.  Nothing but REG_P as the first
+   operand is recognized by addsi3 after reload.  OPERANDS contains
+   the operands, with the first at OPERANDS[N] and the second at
+   OPERANDS[N+1].  */
+
+void
+cris_order_for_addsi3 (rtx *operands, int n)
+{
+  if (!REG_P (operands[n]))
+    {
+      rtx tem = operands[n];
+      operands[n] = operands[n + 1];
+      operands[n + 1] = tem;
+    }
+}
+
+/* Use from within code, from e.g. PRINT_OPERAND and
+   PRINT_OPERAND_ADDRESS.  Macros used in output_addr_const need to emit
+   different things depending on whether code operand or constant is
+   emitted.  */
+
+static void
+cris_output_addr_const (FILE *file, rtx x)
+{
+  in_code++;
+  output_addr_const (file, x);
+  in_code--;
+}
+
+/* Worker function for ASM_OUTPUT_SYMBOL_REF.  */
+
+void
+cris_asm_output_symbol_ref (FILE *file, rtx x)
+{
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (flag_pic && in_code > 0)
+    {
+     const char *origstr = XSTR (x, 0);
+     const char *str;
+     str = (* targetm.strip_name_encoding) (origstr);
+     assemble_name (file, str);
+
+     /* Sanity check.  */
+     if (!TARGET_V32 && !crtl->uses_pic_offset_table)
+       output_operand_lossage ("PIC register isn't set up");
+    }
+  else
+    assemble_name (file, XSTR (x, 0));
+}
+
+/* Worker function for ASM_OUTPUT_LABEL_REF.  */
+
+void
+cris_asm_output_label_ref (FILE *file, char *buf)
+{
+  if (flag_pic && in_code > 0)
+    {
+      assemble_name (file, buf);
+
+      /* Sanity check.  */
+      if (!TARGET_V32 && !crtl->uses_pic_offset_table)
+	internal_error ("emitting PIC operand, but PIC register "
+			"isn%'t set up");
+    }
+  else
+    assemble_name (file, buf);
+}
+
+/* Worker function for TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+cris_output_addr_const_extra (FILE *file, rtx xconst)
+{
+  switch (GET_CODE (xconst))
+    {
+      rtx x;
+
+    case UNSPEC:
+      x = XVECEXP (xconst, 0, 0);
+      CRIS_ASSERT (GET_CODE (x) == SYMBOL_REF
+		   || GET_CODE (x) == LABEL_REF
+		   || GET_CODE (x) == CONST);
+      output_addr_const (file, x);
+      switch (XINT (xconst, 1))
+	{
+	case CRIS_UNSPEC_PCREL:
+	  /* We only get this with -fpic/PIC to tell it apart from an
+	     invalid symbol.  We can't tell here, but it should only
+	     be the operand of a call or movsi.  */
+	  gcc_assert (TARGET_V32 && flag_pic);
+	  break;
+
+	case CRIS_UNSPEC_PLT_PCREL:
+	  gcc_assert (TARGET_V32);
+	  fprintf (file, ":PLT");
+	  break;
+
+	case CRIS_UNSPEC_PLT_GOTREL:
+	  gcc_assert (!TARGET_V32);
+	  fprintf (file, ":PLTG");
+	  break;
+
+	case CRIS_UNSPEC_GOTREL:
+	  gcc_assert (!TARGET_V32);
+	  fprintf (file, ":GOTOFF");
+	  break;
+
+	case CRIS_UNSPEC_GOTREAD:
+	  if (flag_pic == 1)
+	    fprintf (file, ":GOT16");
+	  else
+	    fprintf (file, ":GOT");
+	  break;
+
+	case CRIS_UNSPEC_PLTGOTREAD:
+	  if (flag_pic == 1)
+	    fprintf (file, CRIS_GOTPLT_SUFFIX "16");
+	  else
+	    fprintf (file, CRIS_GOTPLT_SUFFIX);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+cris_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, CRIS_STRUCT_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+cris_setup_incoming_varargs (cumulative_args_t ca_v,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     tree type ATTRIBUTE_UNUSED,
+			     int *pretend_arg_size,
+			     int second_time)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if (ca->regs < CRIS_MAX_ARGS_IN_REGS)
+    {
+      int stdarg_regs = CRIS_MAX_ARGS_IN_REGS - ca->regs;
+      cfun->machine->stdarg_regs = stdarg_regs;
+      *pretend_arg_size = stdarg_regs * 4;
+    }
+
+  if (TARGET_PDEBUG)
+    fprintf (asm_out_file,
+	     "\n; VA:: ANSI: %d args before, anon @ #%d, %dtime\n",
+	     ca->regs, *pretend_arg_size, second_time);
+}
+
+/* Return true if TYPE must be passed by invisible reference.
+   For cris, we pass <= 8 bytes by value, others by reference.  */
+
+static bool
+cris_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  return (targetm.calls.must_pass_in_stack (mode, type)
+	  || CRIS_FUNCTION_ARG_SIZE (mode, type) > 8);
+}
+
+/* A combination of defining TARGET_PROMOTE_FUNCTION_MODE, promoting arguments
+   and *not* defining TARGET_PROMOTE_PROTOTYPES or PROMOTE_MODE gives the
+   best code size and speed for gcc, ipps and products in gcc-2.7.2.  */
+
+enum machine_mode
+cris_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                            enum machine_mode mode,
+                            int *punsignedp ATTRIBUTE_UNUSED,
+			    const_tree fntype ATTRIBUTE_UNUSED,
+                            int for_return)
+{
+  /* Defining PROMOTE_FUNCTION_RETURN in gcc-2.7.2 uncovered bug 981110 (even
+     when modifying TARGET_FUNCTION_VALUE to return the promoted mode).
+     Maybe pointless as of now, but let's keep the old behavior.  */
+  if (for_return == 1)
+    return mode;
+  return CRIS_PROMOTED_MODE (mode, *punsignedp, type);
+} 
+
+/* Atomic types require alignment to be at least their "natural" size.  */
+
+static unsigned int
+cris_atomic_align_for_mode (enum machine_mode mode)
+{
+  return GET_MODE_BITSIZE (mode);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static rtx
+cris_function_value(const_tree type,
+		    const_tree func ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (type), CRIS_FIRST_ARG_REG);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static rtx
+cris_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static bool
+cris_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == CRIS_FIRST_ARG_REG);
+}
+
+static int
+cris_arg_partial_bytes (cumulative_args_t ca, enum machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  if (get_cumulative_args (ca)->regs == CRIS_MAX_ARGS_IN_REGS - 1
+      && !targetm.calls.must_pass_in_stack (mode, type)
+      && CRIS_FUNCTION_ARG_SIZE (mode, type) > 4
+      && CRIS_FUNCTION_ARG_SIZE (mode, type) <= 8)
+    return UNITS_PER_WORD;
+  else
+    return 0;
+}
+
+static rtx
+cris_function_arg_1 (cumulative_args_t ca_v,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     const_tree type ATTRIBUTE_UNUSED,
+		     bool named, bool incoming)
+{
+  const CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if ((!incoming || named) && ca->regs < CRIS_MAX_ARGS_IN_REGS)
+    return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG + ca->regs);
+  else
+    return NULL_RTX;
+}
+
+/* Worker function for TARGET_FUNCTION_ARG.
+   The void_type_node is sent as a "closing" call.  */
+
+static rtx
+cris_function_arg (cumulative_args_t ca, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  return cris_function_arg_1 (ca, mode, type, named, false);
+}
+
+/* Worker function for TARGET_FUNCTION_INCOMING_ARG.
+
+   The differences between this and the previous, is that this one checks
+   that an argument is named, since incoming stdarg/varargs arguments are
+   pushed onto the stack, and we don't have to check against the "closing"
+   void_type_node TYPE parameter.  */
+
+static rtx
+cris_function_incoming_arg (cumulative_args_t ca, enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  return cris_function_arg_1 (ca, mode, type, named, true);
+}
+
+/* Worker function for TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+cris_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  ca->regs += (3 + CRIS_FUNCTION_ARG_SIZE (mode, type)) / 4;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.  */
+
+static tree
+cris_md_asm_clobbers (tree outputs, tree inputs, tree in_clobbers)
+{
+  HARD_REG_SET mof_set;
+  tree clobbers;
+  tree t;
+
+  CLEAR_HARD_REG_SET (mof_set);
+  SET_HARD_REG_BIT (mof_set, CRIS_MOF_REGNUM);
+
+  /* For the time being, all asms clobber condition codes.  Revisit when
+     there's a reasonable use for inputs/outputs that mention condition
+     codes.  */
+  clobbers
+    = tree_cons (NULL_TREE,
+		 build_string (strlen (reg_names[CRIS_CC0_REGNUM]),
+			       reg_names[CRIS_CC0_REGNUM]),
+		 in_clobbers);
+
+  for (t = outputs; t != NULL; t = TREE_CHAIN (t))
+    {
+      tree val = TREE_VALUE (t);
+
+      /* The constraint letter for the singleton register class of MOF
+	 is 'h'.  If it's mentioned in the constraints, the asm is
+	 MOF-aware and adding it to the clobbers would cause it to have
+	 impossible constraints.  */
+      if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))),
+		  'h') != NULL
+	  || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE)
+	return clobbers;
+    }
+
+  for (t = inputs; t != NULL; t = TREE_CHAIN (t))
+    {
+      tree val = TREE_VALUE (t);
+
+      if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))),
+		  'h') != NULL
+	  || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE)
+	return clobbers;
+    }
+
+  return tree_cons (NULL_TREE,
+		    build_string (strlen (reg_names[CRIS_MOF_REGNUM]),
+				  reg_names[CRIS_MOF_REGNUM]),
+		    clobbers);
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.
+
+   Really only needed if the stack frame has variable length (alloca
+   or variable sized local arguments (GNU C extension).  See PR39499 and
+   PR38609 for the reason this isn't just 0.  */
+
+bool
+cris_frame_pointer_required (void)
+{
+  return !crtl->sp_is_unchanging;
+}
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
+
+   This looks too complicated, and it is.  I assigned r7 to be the
+   static chain register, but it is call-saved, so we have to save it,
+   and come back to restore it after the call, so we have to save srp...
+   Anyway, trampolines are rare enough that we can cope with this
+   somewhat lack of elegance.
+    (Do not be tempted to "straighten up" whitespace in the asms; the
+   assembler #NO_APP state mandates strict spacing).  */
+/* ??? See the i386 regparm=3 implementation that pushes the static
+   chain value to the stack in the trampoline, and uses a call-saved
+   register when called directly.  */
+
+static void
+cris_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_V32)
+    {
+      /* This normally-unused nop insn acts as an instruction to
+	 the simulator to flush its instruction cache.  None of
+	 the other instructions in the trampoline template suits
+	 as a trigger for V32.  The pc-relative addressing mode
+	 works nicely as a trigger for V10.
+	 FIXME: Have specific V32 template (possibly avoiding the
+	 use of a special instruction).  */
+      fprintf (f, "\tclearf x\n");
+      /* We have to use a register as an intermediate, choosing
+	 semi-randomly R1 (which has to not be the STATIC_CHAIN_REGNUM),
+	 so we can use it for address indirection and jsr target.  */
+      fprintf (f, "\tmove $r1,$mof\n");
+      /* +4 */
+      fprintf (f, "\tmove.d 0,$r1\n");
+      fprintf (f, "\tmove.d $%s,[$r1]\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\taddq 6,$r1\n");
+      fprintf (f, "\tmove $mof,[$r1]\n");
+      fprintf (f, "\taddq 6,$r1\n");
+      fprintf (f, "\tmove $srp,[$r1]\n");
+      /* +20 */
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      /* +26 */
+      fprintf (f, "\tmove.d 0,$r1\n");
+      fprintf (f, "\tjsr $r1\n");
+      fprintf (f, "\tsetf\n");
+      /* +36 */
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      /* +42 */
+      fprintf (f, "\tmove.d 0,$r1\n");
+      /* +48 */
+      fprintf (f, "\tmove.d 0,$r9\n");
+      fprintf (f, "\tjump $r9\n");
+      fprintf (f, "\tsetf\n");
+    }
+  else
+    {
+      fprintf (f, "\tmove.d $%s,[$pc+20]\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tmove $srp,[$pc+22]\n");
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tjsr 0\n");
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tjump 0\n");
+    }
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+cris_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx tramp = XEXP (m_tramp, 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_V32)
+    {
+      mem = adjust_address (m_tramp, SImode, 6);
+      emit_move_insn (mem, plus_constant (Pmode, tramp, 38));
+      mem = adjust_address (m_tramp, SImode, 22);
+      emit_move_insn (mem, chain_value);
+      mem = adjust_address (m_tramp, SImode, 28);
+      emit_move_insn (mem, fnaddr);
+    }
+  else
+    {
+      mem = adjust_address (m_tramp, SImode, 10);
+      emit_move_insn (mem, chain_value);
+      mem = adjust_address (m_tramp, SImode, 16);
+      emit_move_insn (mem, fnaddr);
+    }
+
+  /* Note that there is no need to do anything with the cache for
+     sake of a trampoline.  */
+}
+
+
+#if 0
+/* Various small functions to replace macros.  Only called from a
+   debugger.  They might collide with gcc functions or system functions,
+   so only emit them when '#if 1' above.  */
+
+enum rtx_code Get_code (rtx);
+
+enum rtx_code
+Get_code (rtx x)
+{
+  return GET_CODE (x);
+}
+
+const char *Get_mode (rtx);
+
+const char *
+Get_mode (rtx x)
+{
+  return GET_MODE_NAME (GET_MODE (x));
+}
+
+rtx Xexp (rtx, int);
+
+rtx
+Xexp (rtx x, int n)
+{
+  return XEXP (x, n);
+}
+
+rtx Xvecexp (rtx, int, int);
+
+rtx
+Xvecexp (rtx x, int n, int m)
+{
+  return XVECEXP (x, n, m);
+}
+
+int Get_rtx_len (rtx);
+
+int
+Get_rtx_len (rtx x)
+{
+  return GET_RTX_LENGTH (GET_CODE (x));
+}
+
+/* Use upper-case to distinguish from local variables that are sometimes
+   called next_insn and prev_insn.  */
+
+rtx Next_insn (rtx);
+
+rtx
+Next_insn (rtx insn)
+{
+  return NEXT_INSN (insn);
+}
+
+rtx Prev_insn (rtx);
+
+rtx
+Prev_insn (rtx insn)
+{
+  return PREV_INSN (insn);
+}
+#endif
+
+#include "gt-cris.h"
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc-4.9/gcc/config/cris/cris.h b/gcc-4.9/gcc/config/cris/cris.h
new file mode 100644
index 000000000..37b562e5d
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/cris.h
@@ -0,0 +1,1081 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* After the first "Node:" comment comes all preprocessor directives and
+   attached declarations described in the info files, the "Using and
+   Porting GCC" manual (uapgcc), in the same order as found in the "Target
+   macros" section in the gcc-2.9x CVS edition of 2000-03-17.  FIXME: Not
+   really, but needs an update anyway.
+
+   There is no generic copy-of-uapgcc comment, you'll have to see uapgcc
+   for that.  If applicable, there is a CRIS-specific comment.  The order
+   of macro definitions follow the order in the manual.  Every section in
+   the manual (node in the info pages) has an introductory `Node:
+   <subchapter>' comment.  If no macros are defined for a section, only
+   the section-comment is present.  */
+
+/* Note that other header files (e.g. config/elfos.h, config/linux.h,
+   and config/cris/linux.h) are responsible for lots of settings not
+   repeated below.  This file contains general CRIS definitions
+   and definitions for the cris-*-elf subtarget.  */
+
+/* We don't want to use gcc_assert for everything, as that can be
+   compiled out.  */
+#define CRIS_ASSERT(x) \
+ do { if (!(x)) internal_error ("CRIS-port assertion failed: " #x); } while (0)
+
+/* Replacement for REG_P since it does not match SUBREGs.  Happens for
+   testcase Axis-20000320 with gcc-2.9x.  */
+#define REG_S_P(x) \
+ (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (XEXP (x, 0))))
+
+/* Last register in main register bank r0..r15.  */
+#define CRIS_LAST_GENERAL_REGISTER 15
+
+/* Descriptions of registers used for arguments.  */
+#define CRIS_FIRST_ARG_REG 10
+#define CRIS_MAX_ARGS_IN_REGS 4
+
+/* See also *_REGNUM constants in cris.md.  */
+
+/* Most of the time, we need the index into the register-names array.
+   When passing debug-info, we need the real hardware register number.  */
+#define CRIS_CANONICAL_SRP_REGNUM (16 + 11)
+#define CRIS_CANONICAL_MOF_REGNUM (16 + 7)
+/* We have CCR in all models including v10, but that's 16 bits, so let's
+   prefer the DCCR number, which is a DMA pointer in pre-v8, so we'll
+   never clash with it for GCC purposes.  */
+#define CRIS_CANONICAL_CC0_REGNUM (16 + 13)
+
+/* When generating PIC, these suffixes are added to the names of non-local
+   functions when being output.  Contrary to other ports, we have offsets
+   relative to the GOT, not the PC.  We might implement PC-relative PLT
+   semantics later for the general case; they are used in some cases right
+   now, such as MI thunks.  */
+#define CRIS_GOTPLT_SUFFIX ":GOTPLT"
+#define CRIS_PLT_GOTOFFSET_SUFFIX ":PLTG"
+#define CRIS_PLT_PCOFFSET_SUFFIX ":PLT"
+
+#define CRIS_FUNCTION_ARG_SIZE(MODE, TYPE)	\
+  ((MODE) != BLKmode ? GET_MODE_SIZE (MODE)	\
+   : (unsigned) int_size_in_bytes (TYPE))
+
+/* Which CPU version this is.  The parsed and adjusted cris_cpu_str.  */
+extern int cris_cpu_version;
+
+/* Changing the order used to be necessary to put the fourth __make_dp
+   argument (a DImode parameter) in registers, to fit with the libfunc
+   parameter passing scheme used for intrinsic functions.  FIXME: Check
+   performance.  */
+#ifdef IN_LIBGCC2
+#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c)
+#endif
+
+
+/* Node: Driver */
+
+/* Also provide canonical vN definitions when user specifies an alias.  */
+
+#define CPP_SPEC \
+ "%{mtune=*:-D__tune_%* %{mtune=v*:-D__CRIS_arch_tune=%*}\
+   %{mtune=etrax4:-D__tune_v3 -D__CRIS_arch_tune=3}\
+   %{mtune=etrax100:-D__tune_v8 -D__CRIS_arch_tune=8}\
+   %{mtune=svinto:-D__tune_v8 -D__CRIS_arch_tune=8}\
+   %{mtune=etrax100lx:-D__tune_v10 -D__CRIS_arch_tune=10}\
+   %{mtune=ng:-D__tune_v10 -D__CRIS_arch_tune=10}}\
+  %{mcpu=*:-D__arch_%* %{mcpu=v*:-D__CRIS_arch_version=%*}\
+   %{mcpu=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\
+   %{mcpu=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{mcpu=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{mcpu=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\
+   %{mcpu=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\
+  %{march=*:-D__arch_%* %{march=v*:-D__CRIS_arch_version=%*}\
+   %{march=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\
+   %{march=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{march=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{march=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\
+   %{march=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\
+  %{metrax100:-D__arch__v8 -D__CRIS_arch_version=8}\
+  %{metrax4:-D__arch__v3 -D__CRIS_arch_version=3}\
+  %(cpp_subtarget)"
+
+/* For the cris-*-elf subtarget.  */
+
+#define CRIS_DEFAULT_TUNE "10"
+#define CRIS_ARCH_CPP_DEFAULT
+#define CRIS_DEFAULT_ASM_ARCH_OPTION ""
+
+#ifdef TARGET_CPU_DEFAULT
+#if TARGET_CPU_DEFAULT != 32 && TARGET_CPU_DEFAULT != 10
+ #error "Due to '()'; e.g. '#define TARGET_CPU_DEFAULT (10)', stringize TARGET_CPU_DEFAULT isn't useful: update manually."
+#endif
+
+#if TARGET_CPU_DEFAULT == 32
+#undef CRIS_DEFAULT_TUNE
+#define CRIS_DEFAULT_TUNE "32"
+/* To enable use of "generic" cris-axis-elf binutils, always pass the
+   architecture option to GAS.  (We don't do this for non-v32.)  */
+#undef CRIS_DEFAULT_ASM_ARCH_OPTION
+#define CRIS_DEFAULT_ASM_ARCH_OPTION "--march=v32"
+#endif
+
+#undef CRIS_ARCH_CPP_DEFAULT
+#define CRIS_ARCH_CPP_DEFAULT \
+ "%{!march=*:\
+   %{!metrax*:\
+    %{!mcpu=*:\
+     %{!mtune=*:-D__tune_v" CRIS_DEFAULT_TUNE "}\
+     -D__arch_v"CRIS_DEFAULT_TUNE\
+   " -D__CRIS_arch_version=" CRIS_DEFAULT_TUNE "}}}"
+#endif
+
+#define CRIS_CPP_SUBTARGET_SPEC \
+ "%{mbest-lib-options:\
+   %{!moverride-best-lib-options:\
+   %{!march=*:%{!metrax*:%{!mcpu=*:\
+      -D__tune_v" CRIS_DEFAULT_TUNE \
+    " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}}}}"\
+ CRIS_ARCH_CPP_DEFAULT
+
+/* Override previous definitions (../linux.h).  */
+#undef CC1_SPEC
+#define CC1_SPEC \
+ "%{metrax4:-march=v3}\
+  %{metrax100:-march=v8}\
+  %{march=*:-march=%*}\
+  %{mcpu=*:-mcpu=%*}\
+  %(cc1_subtarget)"
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_CC1_SUBTARGET_SPEC \
+ "-melf\
+  %{mbest-lib-options:\
+   %{!moverride-best-lib-options:\
+   %{!march=*:%{!mcpu=*:-mtune=v" CRIS_DEFAULT_TUNE\
+       " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}\
+    %{!finhibit-size-directive:\
+      %{!fno-function-sections: -ffunction-sections}\
+      %{!fno-data-sections: -fdata-sections}}}}"
+
+/* This adds to CC1_SPEC.  */
+#define CC1PLUS_SPEC ""
+
+#ifdef HAVE_AS_NO_MUL_BUG_ABORT_OPTION
+#define MAYBE_AS_NO_MUL_BUG_ABORT \
+ "%{mno-mul-bug-workaround:-no-mul-bug-abort} "
+#else
+#define MAYBE_AS_NO_MUL_BUG_ABORT
+#endif
+
+/* Override previous definitions (../linux.h).  */
+#undef ASM_SPEC
+#define ASM_SPEC \
+ MAYBE_AS_NO_MUL_BUG_ABORT \
+ "%(asm_subtarget)\
+ %{march=*:%{mcpu=*:%edo not specify both -march=... and -mcpu=...}}\
+ %{march=v0|mcpu=v0|march=v3|mcpu=v3|march=v8|mcpu=v8:--march=v0_v10}\
+ %{march=v10|mcpu=v10:--march=v10}\
+ %{march=v32|mcpu=v32:--march=v32}"
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_ASM_SUBTARGET_SPEC \
+ "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
+
+/* FIXME: We should propagate the -melf option to make the criself
+   "emulation" unless a linker script is provided (-T*), but I don't know
+   how to do that if either of -Ttext, -Tdata or -Tbss is given but no
+   linker script, as is usually the case.  Leave it to the user for the
+   time being.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{v:--verbose}\
+  %(link_subtarget)"
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_LINK_SUBTARGET_SPEC \
+ "-mcriself\
+  %{sim2:%{!T*:-Tdata 0x4000000 -Tbss 0x8000000}}\
+  %{!r:%{O2|O3: --gc-sections}}"
+
+/* Which library to get.  The simulator uses a different library for
+   the low-level syscalls (implementing the Linux syscall ABI instead
+   of direct-iron accesses).  Default everything with the stub "nosys"
+   library.  */
+/* Override previous definitions (linux.h).  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+ "%{sim*:--start-group -lc -lsyslinux --end-group}\
+  %{!sim*:%{g*:-lg}\
+    %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p} -lbsp}\
+  -lnosys"
+
+/* Linker startfile options; crt0 flavors.
+   We need to remove any previous definition (elfos.h).  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+ "%{sim*:crt1.o%s}%{!sim*:crt0.o%s}\
+  crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#define EXTRA_SPECS				\
+  {"cpp_subtarget", CRIS_CPP_SUBTARGET_SPEC},	\
+  {"cc1_subtarget", CRIS_CC1_SUBTARGET_SPEC},	\
+  {"asm_subtarget", CRIS_ASM_SUBTARGET_SPEC},	\
+  {"link_subtarget", CRIS_LINK_SUBTARGET_SPEC},	\
+  CRIS_SUBTARGET_EXTRA_SPECS
+
+#define CRIS_SUBTARGET_EXTRA_SPECS
+
+
+/* Node: Run-time Target */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("cris");		\
+      builtin_define_std ("CRIS");		\
+      builtin_define_std ("GNU_CRIS");		\
+      builtin_define ("__CRIS_ABI_version=2");	\
+      builtin_assert ("cpu=cris");		\
+      builtin_assert ("machine=cris");		\
+    }						\
+  while (0)
+
+/* Previously controlled by target_flags.  Note that this is *not* set
+   for -melinux.  */
+#define TARGET_LINUX 0
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_SUBTARGET_DEFAULT 0
+
+#define CRIS_CPU_BASE 0
+#define CRIS_CPU_ETRAX4 3	/* Just lz added.  */
+#define CRIS_CPU_SVINTO 8	/* Added swap, jsrc & Co., 32-bit accesses.  */
+#define CRIS_CPU_NG 10		/* Added mul[su].  */
+#define CRIS_CPU_V32 32		/* Major changes.  */
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT CRIS_CPU_BASE
+#endif
+
+/* Default target_flags if no switches specified.
+   The alignment-by-32 is to make builtin atomic support for v10 and v32
+   work for *-elf for types without specified alignment (like plain
+   "int").  See top comment in sync.md.  */
+#ifndef TARGET_DEFAULT
+# if TARGET_CPU_DEFAULT == 32
+#  define TARGET_DEFAULT \
+ (MASK_STACK_ALIGN \
+  + MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+  + MASK_ALIGN_BY_32 \
+  + MASK_PROLOGUE_EPILOGUE)
+# elif TARGET_CPU_DEFAULT == 10
+#  define TARGET_DEFAULT \
+ (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \
+  + MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+  + MASK_ALIGN_BY_32 \
+  + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG)
+# else  /* 0 */
+#  define TARGET_DEFAULT \
+ (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \
+  + MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+  + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG)
+# endif
+#endif
+
+/* Local, providing a default for cris_cpu_version.  */
+#define CRIS_DEFAULT_CPU_VERSION TARGET_CPU_DEFAULT
+
+#define TARGET_HAS_MUL_INSNS (cris_cpu_version >= CRIS_CPU_NG)
+#define TARGET_HAS_LZ (cris_cpu_version >= CRIS_CPU_ETRAX4)
+#define TARGET_HAS_BREAK (cris_cpu_version >= CRIS_CPU_ETRAX4)
+#define TARGET_HAS_SWAP (cris_cpu_version >= CRIS_CPU_SVINTO)
+#define TARGET_V32 (cris_cpu_version >= CRIS_CPU_V32)
+
+/* The "break" instruction was introduced with ETRAX 4.  */
+#define TARGET_TRAP_USING_BREAK8 \
+ (cris_trap_using_break8 == 2 ? TARGET_HAS_BREAK : cris_trap_using_break8)
+
+/* Call library functions by default for GNU/Linux.  */
+#define TARGET_ATOMICS_MAY_CALL_LIBFUNCS		\
+ (cris_atomics_calling_libfunc == 2			\
+  ? TARGET_LINUX : cris_atomics_calling_libfunc)
+
+/* The < v10 atomics turn off interrupts, so they don't need alignment.
+   Incidentally, by default alignment is off there causing variables to
+   be default unaligned all over, so we'd have to make support
+   libraries use a proper atomic type (instead of "int"), one we'd
+   specify as aligned.  */
+#define TARGET_TRAP_UNALIGNED_ATOMIC		\
+ (cris_trap_unaligned_atomic == 2		\
+  ? (TARGET_V32 || cris_cpu_version == 10)	\
+  : cris_trap_unaligned_atomic)
+
+/* Node: Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+
+#define BYTES_BIG_ENDIAN 0
+
+/* WORDS_BIG_ENDIAN is not defined in the hardware, but for consistency,
+   we use little-endianness, and we may also be able to use
+   post-increment on DImode indirect.  */
+#define WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 4
+
+#define CRIS_PROMOTED_MODE(MODE, UNSIGNEDP, TYPE) \
+ (GET_MODE_CLASS (MODE) == MODE_INT && GET_MODE_SIZE (MODE) < 4) \
+  ? SImode : MODE
+
+/* We will be using prototype promotion, so they will be 32 bit.  */
+#define PARM_BOUNDARY 32
+
+/* Stack boundary is guided by -mstack-align, -mno-stack-align,
+   -malign.
+   Old comment: (2.1: still valid in 2.7.2?)
+    Note that to make this macro affect the alignment of stack
+   locals, a fix was required, and special precautions when handling
+   the stack pointer in various other macros (TARGET_ASM_FUNCTION_PROLOGUE
+   et al) were required.  See file "function.c".  If you would just define
+   this macro, it would only affect the builtin alloca and variable
+   local data (non-ANSI, non-K&R, Gnu C extension).  */
+#define STACK_BOUNDARY \
+ (TARGET_STACK_ALIGN ? (TARGET_ALIGN_BY_32 ? 32 : 16) : 8)
+
+#define FUNCTION_BOUNDARY 16
+
+/* Do not change BIGGEST_ALIGNMENT (when optimizing), as it will affect
+   strange places, at least in 2.1.  */
+#define BIGGEST_ALIGNMENT 8
+
+/* If -m16bit,	-m16-bit, -malign or -mdata-align,
+   align everything to 16 bit.  */
+#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN)			\
+ (TARGET_DATA_ALIGN						\
+  ? (TARGET_ALIGN_BY_32						\
+     ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN)			\
+     : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN)
+
+/* Note that CONSTANT_ALIGNMENT has the effect of making gcc believe that
+   ALL references to constant stuff (in code segment, like strings) has
+   this alignment.  That is a rather rushed assumption.  Luckily we do not
+   care about the "alignment" operand to builtin memcpy (only place where
+   it counts), so it doesn't affect any bad spots.  */
+#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN)		\
+ (TARGET_CONST_ALIGN						\
+  ? (TARGET_ALIGN_BY_32						\
+     ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN)			\
+     : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN)
+
+/* FIXME: Define LOCAL_ALIGNMENT for word and dword or arrays and
+   structures (if -mstack-align=), and check that it is good.  */
+
+#define EMPTY_FIELD_BOUNDARY 8
+
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+#define STRICT_ALIGNMENT 0
+
+/* Remove any previous definition (elfos.h).
+   ??? If it wasn't for all the other stuff that affects layout of
+   structures and bit-fields, this could presumably cause incompatibility
+   with other GNU/Linux ports (i.e. elfos.h users).  */
+#undef PCC_BITFIELD_TYPE_MATTERS
+
+/* This is only used for non-scalars.  Strange stuff happens to structs
+   (FIXME: What?) if we use anything larger than largest actually used
+   datum size, so lets make it 32.  The type "long long" will still work
+   as usual.  We can still have DImode insns, but they will only be used
+   for scalar data (i.e. long long).  */
+#define MAX_FIXED_MODE_SIZE 32
+
+
+/* Node: Type Layout */
+
+/* Note that DOUBLE_TYPE_SIZE is not defined anymore, since the default
+   value gives a 64-bit double, which is what we now use.  */
+
+/* For compatibility and historical reasons, a char should be signed.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Note that WCHAR_TYPE_SIZE is used in cexp.y,
+   where TARGET_SHORT is not available.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Node: Register Basics */
+
+/*  We count all 16 non-special registers, SRP, a faked argument
+    pointer register, MOF and CCR/DCCR.  */
+#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1)
+
+/* For CRIS, these are r15 (pc) and r14 (sp). Register r8 is used as a
+   frame-pointer, but is not fixed.  SRP is not included in general
+   registers and will not be used automatically.  All other special
+   registers are fixed at the moment.  The faked argument pointer register
+   is fixed too.  */
+#define FIXED_REGISTERS \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1}
+
+/* Register r9 is used for structure-address, r10-r13 for parameters,
+   r10- for return values.  */
+#define CALL_USED_REGISTERS \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1}
+
+/* Node: Allocation Order */
+
+/* We need this on CRIS, because call-used regs should be used first,
+   (so we don't need to push).  Else start using registers from r0 and up.
+    This preference is mainly because if we put call-used-regs from r0
+   and up, then we can't use movem to push the rest, (which have to be
+   saved if we use them, and movem has to start with r0).
+   Change here if you change which registers to use as call registers.
+
+   The actual need to explicitly prefer call-used registers improved the
+   situation a lot for 2.1, but might not actually be needed anymore.
+   Still, this order reflects what GCC should find out by itself, so it
+   probably does not hurt.
+
+   Order of preference: Call-used-regs first, then r0 and up, last fp &
+   sp & pc as fillers.
+   Call-used regs in opposite order, so they will cause less conflict if
+   a function has few args (<= 3) and it wants a scratch reg.
+    Use struct-return address first, since very few functions use
+   structure return values so it is likely to be available.  */
+#define REG_ALLOC_ORDER \
+ {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19}
+
+/* Use MOF and ACR.  Prefer ACR before any other register.  Prefer MOF
+   then SRP after saved registers.  The *after* is because they're only
+   useful for storage, not for things being computed, which is
+   apparently more common.  */
+#define REG_ALLOC_ORDER_V32 \
+ {15, 9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 16, 14, 18, 19}
+
+
+/* Node: Values in Registers */
+
+/* The VOIDmode test is so we can omit mode on anonymous insns.  FIXME:
+   Still needed in 2.9x, at least for Axis-20000319.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)	\
+ (MODE == VOIDmode \
+  ? 1 : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* CRIS permits all registers to hold all modes.  Well, except for the
+   condition-code register.  And we can't hold larger-than-register size
+   modes in the last special register that can hold a full 32 bits.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)		\
+ (((MODE) == CCmode				\
+   || (REGNO) != CRIS_CC0_REGNUM)		\
+  && (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD	\
+      || ((REGNO) != CRIS_MOF_REGNUM && (REGNO) != CRIS_ACR_REGNUM)))
+
+/* Because CCmode isn't covered by the "narrower mode" statement in
+   tm.texi, we can still say all modes are tieable despite not having an
+   always 1 HARD_REGNO_MODE_OK.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+
+/* Node: Leaf Functions */
+/* (no definitions) */
+
+/* Node: Stack Registers */
+/* (no definitions) */
+
+
+/* Node: Register Classes */
+
+/* We need a separate register class to handle register allocation for
+   ACR, since it can't be used for post-increment.
+
+   It's not obvious, but having subunions of all movable-between
+   register classes does really help register allocation (pre-IRA
+   comment).  */
+enum reg_class
+  {
+    NO_REGS,
+    ACR_REGS, MOF_REGS, SRP_REGS, CC0_REGS,
+    MOF_SRP_REGS, SPECIAL_REGS,
+    SPEC_ACR_REGS, GENNONACR_REGS,
+    SPEC_GENNONACR_REGS, GENERAL_REGS,
+    ALL_REGS,
+    LIM_REG_CLASSES
+  };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES						\
+  {"NO_REGS",							\
+   "ACR_REGS", "MOF_REGS", "SRP_REGS", "CC0_REGS",		\
+   "MOF_SRP_REGS", "SPECIAL_REGS",				\
+   "SPEC_ACR_REGS", "GENNONACR_REGS", "SPEC_GENNONACR_REGS",	\
+   "GENERAL_REGS", "ALL_REGS"}
+
+#define CRIS_SPECIAL_REGS_CONTENTS					\
+ ((1 << CRIS_SRP_REGNUM) | (1 << CRIS_MOF_REGNUM) | (1 << CRIS_CC0_REGNUM))
+
+/* Count in the faked argument register in GENERAL_REGS.  Keep out SRP.  */
+#define REG_CLASS_CONTENTS			\
+  {						\
+   {0},						\
+   {1 << CRIS_ACR_REGNUM},			\
+   {1 << CRIS_MOF_REGNUM},			\
+   {1 << CRIS_SRP_REGNUM},			\
+   {1 << CRIS_CC0_REGNUM},			\
+   {(1 << CRIS_MOF_REGNUM)			\
+    | (1 << CRIS_SRP_REGNUM)},			\
+   {CRIS_SPECIAL_REGS_CONTENTS},		\
+   {CRIS_SPECIAL_REGS_CONTENTS			\
+    | (1 << CRIS_ACR_REGNUM)},			\
+   {(0xffff | (1 << CRIS_AP_REGNUM))		\
+    & ~(1 << CRIS_ACR_REGNUM)},			\
+   {(0xffff | (1 << CRIS_AP_REGNUM)		\
+    | CRIS_SPECIAL_REGS_CONTENTS)		\
+    & ~(1 << CRIS_ACR_REGNUM)},			\
+   {0xffff | (1 << CRIS_AP_REGNUM)},		\
+   {0xffff | (1 << CRIS_AP_REGNUM)		\
+    | CRIS_SPECIAL_REGS_CONTENTS}		\
+  }
+
+#define REGNO_REG_CLASS(REGNO)			\
+  ((REGNO) == CRIS_ACR_REGNUM ? ACR_REGS :	\
+   (REGNO) == CRIS_MOF_REGNUM ? MOF_REGS :	\
+   (REGNO) == CRIS_SRP_REGNUM ? SRP_REGS :	\
+   (REGNO) == CRIS_CC0_REGNUM ? CC0_REGS :	\
+   GENERAL_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OCODE, ICODE)	\
+  ((OCODE) != POST_INC ? BASE_REG_CLASS : GENNONACR_REGS)
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* Since it uses reg_renumber, it is safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define REGNO_OK_FOR_BASE_P(REGNO)					\
+ ((REGNO) <= CRIS_LAST_GENERAL_REGISTER					\
+  || (REGNO) == ARG_POINTER_REGNUM					\
+  || (unsigned) reg_renumber[REGNO] <= CRIS_LAST_GENERAL_REGISTER	\
+  || (unsigned) reg_renumber[REGNO] == ARG_POINTER_REGNUM)
+
+/* REGNO_OK_FOR_BASE_P seems to be obsolete wrt. this one, but not yet
+   documented as such.  */
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(REGNO, MODE, AS, OCODE, ICODE)	\
+ (REGNO_OK_FOR_BASE_P (REGNO)						\
+  && ((OCODE) != POST_INC						\
+      || !((REGNO) == CRIS_ACR_REGNUM					\
+	   || (unsigned) reg_renumber[REGNO] == CRIS_ACR_REGNUM)))
+
+/* See REGNO_OK_FOR_BASE_P.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* We can't move special registers to and from memory in smaller than
+   word_mode.  We also can't move between special registers.  Luckily,
+   -1, as returned by true_regnum for non-sub/registers, is valid as a
+   parameter to our REGNO_REG_CLASS, returning GENERAL_REGS, so we get
+   the effect that any X that isn't a special-register is treated as
+   a non-empty intersection with GENERAL_REGS.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X)				\
+ ((reg_class_subset_p (CLASS, SPECIAL_REGS)				\
+   && ((GET_MODE_SIZE (MODE) < 4 && MEM_P (X))				\
+       || !reg_classes_intersect_p (REGNO_REG_CLASS (true_regnum (X)),	\
+				    GENERAL_REGS)))			\
+   ? GENERAL_REGS : NO_REGS)
+
+/* FIXME: Fix regrename.c; it should check validity of replacements,
+   not just with a silly pass-specific macro.  We may miss some
+   opportunities, but we must stop regrename from creating acr++.  */
+#define HARD_REGNO_RENAME_OK(FROM, TO) ((TO) != CRIS_ACR_REGNUM)
+
+/* For CRIS, this is always the size of MODE in words,
+   since all registers are the same size.  To use omitted modes in
+   patterns with reload constraints, you must say the widest size
+   which is allowed for VOIDmode.
+   FIXME:  Does that still apply for gcc-2.9x?  Keep poisoned until such
+   patterns are added back.  News: 2001-03-16: Happens as early as the
+   underscore-test.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+ ((MODE) == VOIDmode							\
+  ? 1 /* + cris_fatal ("CLASS_MAX_NREGS with VOIDmode")	*/		\
+  : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+
+/* Node: Frame Layout */
+
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+
+/* It seems to be indicated in the code (at least 2.1) that this is
+   better a constant, and best 0.  */
+#define STARTING_FRAME_OFFSET 0
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \
+ cris_return_addr_rtx (COUNT, FRAMEADDR)
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, CRIS_SRP_REGNUM)
+
+/* FIXME: Any __builtin_eh_return callers must not return anything and
+   there must not be collisions with incoming parameters.  Luckily the
+   number of __builtin_eh_return callers is limited.  For now return
+   parameter registers in reverse order and hope for the best.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  (IN_RANGE ((N), 0, 3) ? (CRIS_FIRST_ARG_REG + 3 - (N)) : INVALID_REGNUM)
+
+/* Store the stack adjustment in the structure-return-address register.  */
+#define CRIS_STACKADJ_REG CRIS_STRUCT_VALUE_REGNUM
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, CRIS_STACKADJ_REG)
+
+#define EH_RETURN_HANDLER_RTX \
+  cris_return_addr_rtx (0, NULL)
+
+#define INIT_EXPANDERS cris_init_expanders ()
+
+/* FIXME: Move this to right node (it's not documented properly yet).  */
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (CRIS_SRP_REGNUM)
+
+/* FIXME: Move this to right node (it's not documented properly yet).
+   FIXME: Check what alignment we can assume regarding
+   TARGET_STACK_ALIGN and TARGET_ALIGN_BY_32.  */
+#define DWARF_CIE_DATA_ALIGNMENT -1
+
+/* If we would ever need an exact mapping between canonical register
+   number and dwarf frame register, we would either need to include all
+   registers in the gcc description (with some marked fixed of course), or
+   an inverse mapping from dwarf register to gcc register.  There is one
+   need in dwarf2out.c:expand_builtin_init_dwarf_reg_sizes.  Right now, I
+   don't see that we need exact correspondence between DWARF *frame*
+   registers and DBX_REGISTER_NUMBER, so map them onto GCC registers.  */
+#define DWARF_FRAME_REGNUM(REG) (REG)
+
+/* Node: Stack Checking */
+/* (no definitions) FIXME: Check.  */
+
+/* Node: Frame Registers */
+
+#define STACK_POINTER_REGNUM CRIS_SP_REGNUM
+
+/* Register used for frame pointer.  This is also the last of the saved
+   registers, when a frame pointer is not used.  */
+#define FRAME_POINTER_REGNUM CRIS_FP_REGNUM
+
+/* Faked register, is always eliminated.  We need it to eliminate
+   allocating stack slots for the return address and the frame pointer.  */
+#define ARG_POINTER_REGNUM CRIS_AP_REGNUM
+
+#define STATIC_CHAIN_REGNUM CRIS_STATIC_CHAIN_REGNUM
+
+
+/* Node: Elimination */
+
+#define ELIMINABLE_REGS				\
+ {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ (OFFSET) = cris_initial_elimination_offset (FROM, TO)
+
+
+/* Node: Stack Arguments */
+
+/* Since many parameters take up one register each in any case,
+   defining TARGET_PROMOTE_PROTOTYPES that always returns true would
+   seem like a good idea, but measurements indicate that a combination
+   using PROMOTE_MODE is better.  */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Node: Register Arguments */
+
+/* Contrary to what you'd believe, defining FUNCTION_ARG_CALLEE_COPIES
+   seems like a (small total) loss, at least for gcc-2.7.2 compiling and
+   running gcc-2.1 (small win in size, small loss running -- 100.1%),
+   and similarly for size for products (.1 .. .3% bloat, sometimes win).
+   Due to the empirical likeliness of making slower code, it is not
+   defined.  */
+
+/* This no longer *needs* to be a structure; but keeping it as such should
+   not hurt (and hacking the ABI is simpler).  */
+#define CUMULATIVE_ARGS struct cum_args
+struct cum_args {int regs;};
+
+/* The regs member is an integer, the number of arguments got into
+   registers so far.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+ ((CUM).regs = 0)
+
+#define FUNCTION_ARG_REGNO_P(REGNO)			\
+ ((REGNO) >= CRIS_FIRST_ARG_REG				\
+  && (REGNO) < CRIS_FIRST_ARG_REG + (CRIS_MAX_ARGS_IN_REGS))
+
+
+/* Node: Aggregate Return */
+
+#define CRIS_STRUCT_VALUE_REGNUM ((CRIS_FIRST_ARG_REG) - 1)
+
+
+/* Node: Caller Saves */
+/* (no definitions) */
+
+/* Node: Function entry */
+
+/* See cris.c for TARGET_ASM_FUNCTION_PROLOGUE and
+   TARGET_ASM_FUNCTION_EPILOGUE.  */
+
+/* Node: Profiling */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+ error ("no FUNCTION_PROFILER for CRIS")
+
+/* FIXME: Some of the undefined macros might be mandatory.  If so, fix
+   documentation.  */
+
+
+/* Node: Trampolines */
+
+#define TRAMPOLINE_SIZE (TARGET_V32 ? 58 : 32)
+
+/* CRIS wants instructions on word-boundary.  */
+#define TRAMPOLINE_ALIGNMENT 16
+
+/* Node: Library Calls */
+
+/* If you change this, you have to check whatever libraries and systems
+   that use it.  */
+#define TARGET_EDOM 33
+
+
+/* Node: Addressing Modes */
+
+#define HAVE_POST_INCREMENT 1
+
+#define CONSTANT_ADDRESS_P(X) \
+  (CONSTANT_P (X) && cris_legitimate_address_p (QImode, X, false))
+
+/* Must be a compile-time constant, so we go with the highest value
+   among all CRIS variants.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Fix reloads known to cause suboptimal spilling.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, INDL, WIN)	\
+  do									\
+    {									\
+      if (cris_reload_address_legitimized (X, MODE, OPNUM, TYPE, INDL))	\
+	goto WIN;							\
+    }									\
+  while (0)
+
+
+/* Node: Condition Code */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) cris_notice_update_cc (EXP, INSN)
+
+/* FIXME: Maybe define CANONICALIZE_COMPARISON later, when playing with
+   optimizations.  It is needed; currently we do this with instruction
+   patterns and NOTICE_UPDATE_CC.  */
+
+
+/* Node: Costs */
+
+/* Regardless of the presence of delay slots, the default value of 1 for
+   BRANCH_COST is the best in the range (1, 2, 3), tested with gcc-2.7.2
+   with testcases ipps and gcc, giving smallest and fastest code.  */
+
+#define SLOW_BYTE_ACCESS 0
+
+/* This is the threshold *below* which inline move sequences of
+   word-length sizes will be emitted.  The "9" will translate to
+   (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions
+   (8 instruction sequences) or less.  */
+#define MOVE_RATIO(speed) 9
+
+
+/* Node: Sections */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* The jump table is immediately connected to the preceding insn.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* Node: PIC */
+
+/* Helper type.  */
+
+enum cris_pic_symbol_type
+  {
+    cris_no_symbol = 0,
+    cris_got_symbol = 1,
+    cris_rel_symbol = 2,
+    cris_got_symbol_needing_fixup = 3,
+    cris_invalid_pic_symbol = 4
+  };
+
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? CRIS_GOT_REGNUM : INVALID_REGNUM)
+
+#define LEGITIMATE_PIC_OPERAND_P(X) cris_legitimate_pic_operand (X)
+
+
+/* Node: File Framework */
+
+/* We don't want an .ident for gcc.  To avoid that but still support
+   #ident, we override TARGET_ASM_OUTPUT_IDENT and, since the gcc .ident
+   is its only use besides front-end .ident directives, we return if
+   the state if the cgraph is not CGRAPH_STATE_PARSING.  */
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT cris_asm_output_ident
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Node: Data Output */
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) (C) == '@'
+
+/* Node: Uninitialized Data */
+
+/* Remember to round off odd values if we want data alignment,
+   since we cannot do that with an .align directive.
+
+   Using .comm causes the space not to be reserved in .bss, but by
+   tricks with the symbol type.  Not good if other tools than binutils
+   are used on the object files.  Since ".global ... .lcomm ..." works, we
+   use that.  Use .._ALIGNED_COMMON, since gcc whines when we only have
+   ..._COMMON, and we prefer to whine ourselves; BIGGEST_ALIGNMENT is not
+   the one to check.  */
+/* FIXME: I suspect a bug in gcc with alignment.  Do not warn until
+   investigated; it mucks up the testsuite results.  */
+#define CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, LOCAL) \
+  do									\
+    {									\
+      int align_ = (ALIGN) / BITS_PER_UNIT;				\
+      if (TARGET_DATA_ALIGN && TARGET_ALIGN_BY_32 && align_ < 4)	\
+	align_ = 4;							\
+      else if (TARGET_DATA_ALIGN && align_ < 2)				\
+	align_ = 2;							\
+      /* FIXME: Do we need this?  */					\
+      else if (align_ < 1)						\
+	align_ = 1;							\
+									\
+      if (LOCAL)							\
+	{								\
+	  fprintf ((FILE), "%s", LOCAL_ASM_OP);				\
+	  assemble_name ((FILE), (NAME));				\
+	  fprintf ((FILE), "\n");					\
+	}								\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ",%u,%u\n", (int)(SIZE), align_);		\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+ CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 0)
+
+#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+ CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 1)
+
+/* Node: Label Output */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+#define SUPPORTS_WEAK 1
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYM) \
+ cris_asm_output_symbol_ref (STREAM, SYM)
+
+#define ASM_OUTPUT_LABEL_REF(STREAM, BUF) \
+ cris_asm_output_label_ref (STREAM, BUF)
+
+/* Remove any previous definition (elfos.h).  */
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
+  sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long) NUM)
+
+/* Node: Initialization */
+/* (no definitions) */
+
+/* Node: Macros for Initialization */
+/* (no definitions) */
+
+/* Node: Instruction Output */
+
+#define REGISTER_NAMES					\
+ {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8",	\
+  "r9", "r10", "r11", "r12", "r13", "sp", "acr", "srp", "mof", "faked_ap", "dccr"}
+
+#define ADDITIONAL_REGISTER_NAMES \
+ {{"r14", 14}, {"r15", 15}, {"pc", 15}}
+
+/* Output an empty line to illustrate the presence of the delay slot.  */
+#define DBR_OUTPUT_SEQEND(FILE) \
+  fprintf (FILE, "\n")
+
+#define LOCAL_LABEL_PREFIX "."
+
+/* cppinit.c initializes a const array from this, so it must be constant,
+   can't have it different based on options.  Luckily, the prefix is
+   always allowed, so let's have it on all GCC-generated code.  Note that
+   we have this verbatim everywhere in the back-end, not using %R or %s or
+   such.  */
+#define REGISTER_PREFIX "$"
+
+/* Remove any previous definition (elfos.h).  */
+/* We use -fno-leading-underscore to remove it, when necessary.  */
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO)				\
+  fprintf (FILE,							\
+	   TARGET_V32							\
+	   ? "\tsubq 4,$sp\n\tmove $%s,[$sp]\n" : "\tpush $%s\n",	\
+	   reg_names[REGNO])
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO) \
+  fprintf (FILE, "\tmove [$sp+],$%s\n", reg_names[REGNO])
+
+
+/* Node: Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
+  do									\
+    {									\
+      if (TARGET_V32)							\
+       asm_fprintf (FILE, "\t.word %LL%d-.\n", VALUE);			\
+      else								\
+       asm_fprintf (FILE, "\t.word %LL%d-%LL%d\n", VALUE, REL);		\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  asm_fprintf (FILE, "\t.dword %LL%d\n", VALUE)
+
+/* Defined to also emit an .align in elfos.h.  We don't want that.  */
+#undef ASM_OUTPUT_CASE_LABEL
+
+/* Since the "bound" insn loads the comparison value if the compared<
+   value (register) is out of bounds (0..comparison value-1), we need
+   to output another case to catch it.
+   The way to find it is to look for the label_ref at the else-arm inside
+   the expanded casesi core-insn.
+   FIXME: Check this construct when changing to new version of gcc.  */
+#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE)				\
+  cris_asm_output_case_end (STREAM, NUM, TABLE)
+
+
+/* Node: Exception Region Output */
+/* (no definitions) */
+/* FIXME: Fill in with our own optimized layout.  */
+
+/* Node: Alignment Output */
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)  \
+ fprintf (FILE, "\t.align %d\n", (LOG))
+
+
+/* Node: All Debuggers */
+
+#define DBX_REGISTER_NUMBER(REGNO)				\
+ ((REGNO) == CRIS_SRP_REGNUM ? CRIS_CANONICAL_SRP_REGNUM :	\
+  (REGNO) == CRIS_MOF_REGNUM ? CRIS_CANONICAL_MOF_REGNUM :	\
+  (REGNO) == CRIS_CC0_REGNUM ? CRIS_CANONICAL_CC0_REGNUM :	\
+ (REGNO))
+
+/* FIXME: Investigate DEBUGGER_AUTO_OFFSET, DEBUGGER_ARG_OFFSET.  */
+
+
+/* Node: DBX Options */
+
+/* Is this correct? Check later.  */
+#define DBX_NO_XREFS
+
+#define DBX_CONTIN_LENGTH 0
+
+/* FIXME: Is this needed when we have 0 DBX_CONTIN_LENGTH?  */
+#define DBX_CONTIN_CHAR '?'
+
+
+/* Node: DBX Hooks */
+/* (no definitions) */
+
+/* Node: File names and DBX */
+/* (no definitions) */
+
+
+/* Node: SDB and DWARF */
+/* (no definitions) */
+
+/* Node: Misc */
+
+/* A combination of the bound (umin) insn together with a
+   sign-extended add via the table to PC seems optimal.
+   If the table overflows, the assembler will take care of it.
+   Theoretically, in extreme cases (uncertain if they occur), an error
+   will be emitted, so FIXME: Check how large case-tables are emitted,
+   possible add an option to emit SImode case-tables.  */
+#define CASE_VECTOR_MODE HImode
+
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* FIXME: Investigate CASE_VECTOR_SHORTEN_MODE to make sure HImode is not
+   used when broken-.word could possibly fail (plus testcase).  */
+
+/* This is the number of bytes that can be moved in one
+   reasonably fast instruction sequence.  For CRIS, this is two
+   instructions: mem => reg, reg => mem.  */
+#define MOVE_MAX 4
+
+/* Maybe SHIFT_COUNT_TRUNCATED is safe to define?  FIXME: Check later.  */
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+
+#define Pmode SImode
+
+#define FUNCTION_MODE QImode
+
+#define NO_IMPLICIT_EXTERN_C
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc-4.9/gcc/config/cris/cris.md b/gcc-4.9/gcc/config/cris/cris.md
new file mode 100644
index 000000000..47f64512a
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/cris.md
@@ -0,0 +1,5157 @@
+;; GCC machine description for CRIS cpu cores.
+;; Copyright (C) 1998-2014 Free Software Foundation, Inc.
+;; Contributed by Axis Communications.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See files "md.texi" and "rtl.def" for documentation on define_insn,
+;; match_*, et. al.
+;;
+;; The function cris_notice_update_cc in cris.c handles condition code
+;; updates for most instructions, helped by the "cc" attribute.
+
+;; There are several instructions that are orthogonal in size, and seems
+;; they could be matched by a single pattern without a specified size
+;; for the operand that is orthogonal.  However, this did not work on
+;; gcc-2.7.2 (and probably not on gcc-2.8.1), relating to that when a
+;; constant is substituted into an operand, the actual mode must be
+;; deduced from the pattern.  There is reasonable hope that that has been
+;; fixed, so FIXME: try again.
+
+;; You will notice that three-operand alternatives ("=r", "r", "!To")
+;; are marked with a "!" constraint modifier to avoid being reloaded
+;; into.  This is because gcc would otherwise prefer to use the constant
+;; pool and its offsettable address instead of reloading to an
+;; ("=r", "0", "i") alternative.  Also, the constant-pool support was not
+;; only suboptimal but also buggy in 2.7.2, ??? maybe only in 2.6.3.
+
+;; All insns that look like (set (...) (plus (...) (reg:SI 8)))
+;; get problems when reloading r8 (frame pointer) to r14 + offs (stack
+;; pointer).  Thus the instructions that get into trouble have specific
+;; checks against matching frame_pointer_rtx.
+;; ??? But it should be re-checked for gcc > 2.7.2
+;; FIXME: This changed some time ago (from 2000-03-16) for gcc-2.9x.
+
+;; FIXME: When PIC, all [rX=rY+S] could be enabled to match
+;; [rX=gotless_symbol].
+;; The movsi for a gotless symbol could be split (post reload).
+
+
+(define_c_enum ""
+  [
+   ;; PLT reference from call expansion: operand 0 is the address,
+   ;; the mode is VOIDmode.  Always wrapped in CONST.
+   ;; The value is relative to the GOT.
+   CRIS_UNSPEC_PLT_GOTREL
+
+   ;; PLT reference from call expansion: operand 0 is the address,
+   ;; the mode is VOIDmode.  Always wrapped in CONST.
+   ;; The value is relative to the PC.  It's arch-dependent whether
+   ;; the offset counts from the start or the end of the current item.
+   CRIS_UNSPEC_PLT_PCREL
+
+   ;; The address of the global offset table as a source operand.
+   CRIS_UNSPEC_GOT
+
+   ;; The offset from the global offset table to the operand.
+   CRIS_UNSPEC_GOTREL
+
+   ;; The PC-relative offset to the operand.  It's arch-dependent whether
+   ;; the offset counts from the start or the end of the current item.
+   CRIS_UNSPEC_PCREL
+
+   ;; The index into the global offset table of a symbol, while
+   ;; also generating a GOT entry for the symbol.
+   CRIS_UNSPEC_GOTREAD
+
+   ;; Similar to CRIS_UNSPEC_GOTREAD, but also generating a PLT entry.
+   CRIS_UNSPEC_PLTGOTREAD
+
+   ;; Condition for v32 casesi jump, since it needs to have if_then_else
+   ;; form with register as one branch and default label as other.
+   ;; Operand 0 is const_int 0.
+   CRIS_UNSPEC_CASESI
+
+   ;; Stack frame deallocation barrier.
+   CRIS_UNSPEC_FRAME_DEALLOC
+
+   ;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1...
+   CRIS_UNSPEC_SWAP_BITS
+  ])
+
+;; Register numbers.
+(define_constants
+  [(CRIS_GOT_REGNUM 0)
+   (CRIS_STATIC_CHAIN_REGNUM 7)
+   (CRIS_FP_REGNUM 8)
+   (CRIS_SP_REGNUM 14)
+   (CRIS_ACR_REGNUM 15)
+   (CRIS_SRP_REGNUM 16)
+   (CRIS_MOF_REGNUM 17)
+   (CRIS_AP_REGNUM 18)
+   (CRIS_CC0_REGNUM 19)]
+)
+
+;; We need an attribute to define whether an instruction can be put in
+;; a branch-delay slot or not, and whether it has a delay slot.
+;;
+;; Branches and return instructions have a delay slot, and cannot
+;; themselves be put in a delay slot.  This has changed *for short
+;; branches only* between architecture variants, but the possible win
+;; is presumed negligible compared to the added complexity of the machine
+;; description: one would have to add always-correct infrastructure to
+;; distinguish short branches.
+;;
+;; Whether an instruction can be put in a delay slot depends on the
+;; instruction (all short instructions except jumps and branches)
+;; and the addressing mode (must not be prefixed or referring to pc).
+;; In short, any "slottable" instruction must be 16 bit and not refer
+;; to pc, or alter it.
+;;
+;; The possible values are "yes", "no", "has_slot", "has_return_slot"
+;; and "has_call_slot".
+;; Yes/no tells whether the insn is slottable or not.  Has_call_slot means
+;; that the insn is a call insn, which for CRIS v32 has a delay-slot.
+;; Of special concern is that no RTX_FRAME_RELATED insn must go in that
+;; call delay slot, as it's located in the address *after* the call insn,
+;; and the unwind machinery doesn't know about delay slots.
+;; Has_slot means that the insn is a branch insn (which are
+;; not considered slottable since that is generally true).  Having the
+;; seemingly illogical value "has_slot" means we do not have to add
+;; another attribute just to say that an insn has a delay-slot, since it
+;; also infers that it is not slottable.  Better names for the attribute
+;; were found to be longer and not add readability to the machine
+;; description.
+;; Has_return_slot is similar, for the return insn.
+;;
+;; The default that is defined here for this attribute is "no", not
+;; slottable, not having a delay-slot, so there's no need to worry about
+;; it being wrong for non-branch and return instructions.
+;;  The default could depend on the kind of insn and the addressing
+;; mode, but that would need more attributes and hairier, more error
+;; prone code.
+;;
+;;  There is an extra memory constraint, 'Q', which recognizes an indirect
+;; register.  The constraints 'Q' and '>' together match all possible
+;; memory operands that are slottable.
+;;  For other operands, you need to check if it has a valid "slottable"
+;; quick-immediate operand, where the particular signedness-variation
+;; may match the constraints 'I' or 'J'.), and include it in the
+;; constraint pattern for the slottable pattern.  An alternative using
+;; only "r" constraints is most often slottable.
+
+(define_attr "slottable" "no,yes,has_slot,has_return_slot,has_call_slot"
+  (const_string "no"))
+
+;; We also need attributes to sanely determine the condition code
+;; state.  See cris_notice_update_cc for how this is used.
+
+(define_attr "cc" "none,clobber,normal,noov32,rev" (const_string "normal"))
+
+;; At the moment, this attribute is just used to help bb-reorder do its
+;; work; the default 0 doesn't help it.  Many insns have other lengths,
+;; though none are shorter.
+(define_attr "length" "" (const_int 2))
+
+;; A branch has one delay-slot.  The instruction in the
+;; delay-slot is always executed, independent of whether the branch is
+;; taken or not.  Note that besides setting "slottable" to "has_slot",
+;; there also has to be a "%#" at the end of a "delayed" instruction
+;; output pattern (for "jump" this means "ba %l0%#"), so print_operand can
+;; catch it and print a "nop" if necessary.  This method was stolen from
+;; sparc.md.
+
+(define_delay (eq_attr "slottable" "has_slot")
+  [(eq_attr "slottable" "yes") (nil) (nil)])
+
+;; We can't put prologue insns in call-insn delay-slots when
+;; DWARF2 unwind info is emitted, because the unwinder matches the
+;; address after the insn.  It must see the return address of a call at
+;; a position at least *one byte after* the insn, or it'll think that
+;; the insn hasn't been executed.  If the insn is in a delay-slot of a
+;; call, it's just *exactly* after the insn.
+
+(define_delay (eq_attr "slottable" "has_call_slot")
+  [(and (eq_attr "slottable" "yes")
+	(ior (not (match_test "RTX_FRAME_RELATED_P (insn)"))
+	     (not (match_test "flag_exceptions"))))
+   (nil) (nil)])
+
+;; The insn in the return insn slot must not be the
+;; return-address-register restore.  FIXME: Use has_slot and express
+;; as a parallel with a use of the return-address-register (currently
+;; only SRP).  However, this requires an amount of fixing tests for
+;; naked RETURN in middle-end.
+(define_delay (eq_attr "slottable" "has_return_slot")
+  [(and (eq_attr "slottable" "yes")
+	(not (match_test "dead_or_set_regno_p (insn, CRIS_SRP_REGNUM)")))
+   (nil) (nil)])
+
+
+;; Iterator definitions.
+
+;; For the "usual" pattern size alternatives.
+(define_mode_iterator BWD [SI HI QI])
+(define_mode_iterator WD [SI HI])
+(define_mode_iterator BW [HI QI])
+(define_mode_attr S [(SI "HI") (HI "QI")])
+(define_mode_attr s [(SI "hi") (HI "qi")])
+(define_mode_attr m [(SI ".d") (HI ".w") (QI ".b")])
+(define_mode_attr mm [(SI ".w") (HI ".b")])
+(define_mode_attr nbitsm1 [(SI "31") (HI "15") (QI "7")])
+
+;; For the sign_extend+zero_extend variants.
+(define_code_iterator szext [sign_extend zero_extend])
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; For the shift variants.
+(define_code_iterator shift [ashiftrt lshiftrt ashift])
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+(define_code_attr shlr [(ashiftrt "ashr") (lshiftrt "lshr") (ashift "ashl")])
+(define_code_attr slr [(ashiftrt "asr") (lshiftrt "lsr") (ashift "lsl")])
+
+(define_code_iterator ncond [eq ne gtu ltu geu leu])
+(define_code_iterator ocond [gt le])
+(define_code_iterator rcond [lt ge])
+(define_code_attr CC [(eq "eq") (ne "ne") (gt "gt") (gtu "hi") (lt "lt")
+		      (ltu "lo") (ge "ge") (geu "hs") (le "le") (leu "ls")])
+(define_code_attr rCC [(eq "ne") (ne "eq") (gt "le") (gtu "ls") (lt "ge")
+		       (ltu "hs") (ge "lt") (geu "lo") (le "gt") (leu "hi")])
+(define_code_attr oCC [(lt "mi") (ge "pl")])
+(define_code_attr roCC [(lt "pl") (ge "mi")])
+
+;; Operand and operator predicates.
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Test insns.
+
+;; No test insns with side-effect on the mem addressing.
+;;
+;; See note on cmp-insns with side-effects (or lack of them)
+
+;; Normal named test patterns from SI on.
+
+(define_insn "*tstsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "r,Q>,m")
+		 (const_int 0)))]
+  ""
+{
+  if (which_alternative == 0 && TARGET_V32)
+    return "cmpq 0,%0";
+  return "test.d %0";
+}
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*tst<mode>_cmp"
+  [(set (cc0)
+	(compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m")
+		 (const_int 0)))]
+  "cris_cc0_user_requires_cmp (insn)"
+  "@
+   cmp<m> 0,%0
+   test<m> %0
+   test<m> %0"
+  [(set_attr "slottable" "no,yes,no")])
+
+(define_insn "*tst<mode>_non_cmp"
+  [(set (cc0)
+	(compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m")
+		 (const_int 0)))]
+  "!cris_cc0_user_requires_cmp (insn)"
+  "@
+   move<m> %0,%0
+   test<m> %0
+   test<m> %0"
+  [(set_attr "slottable" "yes,yes,no")
+   (set_attr "cc" "noov32,*,*")])
+
+;; It seems that the position of the sign-bit and the fact that 0.0 is
+;; all 0-bits would make "tstsf" a straight-forward implementation;
+;; either "test.d" it for positive/negative or "btstq 30,r" it for
+;; zeroness.
+;;
+;; FIXME: Do that some time; check next_cc0_user to determine if
+;; zero or negative is tested for.
+
+;; Compare insns.
+
+;; We could optimize the sizes of the immediate operands for various
+;; cases, but that is not worth it because of the very little usage of
+;; DImode for anything else but a structure/block-mode.  Just do the
+;; obvious stuff for the straight-forward constraint letters.
+
+(define_insn "*cmpdi_non_v32"
+  [(set (cc0)
+	(compare (match_operand:DI 0 "nonimmediate_operand" "rm,r,r,r,r,r,r,o")
+		 (match_operand:DI 1 "general_operand" "M,Kc,I,P,n,r,o,r")))]
+  "!TARGET_V32"
+  "@
+   test.d %M0\;ax\;test.d %H0
+   cmpq %1,%M0\;ax\;cmpq 0,%H0
+   cmpq %1,%M0\;ax\;cmpq -1,%H0
+   cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M0,%M1\;ax\;cmp.d %H0,%H1")
+
+(define_insn "*cmpdi_v32"
+  [(set (cc0)
+	(compare (match_operand:DI 0 "register_operand"  "r,r,r,r,r")
+		 (match_operand:DI 1 "nonmemory_operand" "Kc,I,P,n,r")))]
+  "TARGET_V32"
+  "@
+   cmpq %1,%M0\;ax\;cmpq 0,%H0
+   cmpq %1,%M0\;ax\;cmpq -1,%H0
+   cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0")
+
+;; Note that compare insns with side effect addressing mode (e.g.):
+;;
+;; cmp.S [rx=ry+i],rz;
+;; cmp.S [%3=%1+%2],%0
+;;
+;; are *not* usable for gcc since the reloader *does not accept*
+;; cc0-changing insns with side-effects other than setting the condition
+;; codes.  The reason is that the reload stage *may* cause another insn to
+;; be output after the main instruction, in turn invalidating cc0 for the
+;; insn using the test.  (This does not apply to the CRIS case, since a
+;; reload for output -- move to memory -- does not change the condition
+;; code.  Unfortunately we have no way to describe that at the moment.  I
+;; think code would improve being in the order of one percent faster.
+
+;; We have cmps and cmpu (compare reg w. sign/zero extended mem).
+;; These are mostly useful for compares in SImode, using 8 or 16-bit
+;; constants, but sometimes gcc will find its way to use it for other
+;; (memory) operands.  Avoid side-effect patterns, though (see above).
+
+(define_insn "*cmp_ext<mode>"
+  [(set (cc0)
+	(compare
+	 (match_operand:SI 0 "register_operand" "r,r")
+	 (match_operator:SI 2 "cris_extend_operator"
+			 [(match_operand:BW 1 "memory_operand" "Q>,m")])))]
+  ""
+  "cmp%e2<m> %1,%0"
+  [(set_attr "slottable" "yes,no")])
+
+;; Swap operands; it seems the canonical look (if any) is not enforced.
+;;
+;; FIXME: Investigate that.
+
+(define_insn "*cmp_swapext<mode>"
+  [(set (cc0)
+	(compare
+	 (match_operator:SI 2 "cris_extend_operator"
+			    [(match_operand:BW 0 "memory_operand" "Q>,m")])
+	 (match_operand:SI 1 "register_operand" "r,r")))]
+  ""
+  "cmp%e2<m> %0,%1"
+  [(set_attr "slottable" "yes,no")
+   (set_attr "cc" "rev")])
+
+;; The "normal" compare patterns, from SI on.  Special-cases with zero
+;; are covered above.
+
+(define_insn "*cmpsi"
+  [(set (cc0)
+	(compare
+	 (match_operand:SI 0 "nonimmediate_operand" "r,r,r, Q>,r,r,m")
+	 (match_operand:SI 1 "general_operand"	    "I,r,Q>,r, P,g,r")))]
+  ""
+  "@
+   cmpq %1,%0
+   cmp.d %1,%0
+   cmp.d %1,%0
+   cmp.d %0,%1
+   cmp%e1.%z1 %1,%0
+   cmp.d %1,%0
+   cmp.d %0,%1"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no,no")
+   (set_attr "cc" "normal,normal,normal,rev,normal,normal,rev")])
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:BW 0 "nonimmediate_operand" "r,r, Q>,r,m")
+		 (match_operand:BW 1 "general_operand"	    "r,Q>,r, g,r")))]
+  ""
+  "@
+   cmp<m> %1,%0
+   cmp<m> %1,%0
+   cmp<m> %0,%1
+   cmp<m> %1,%0
+   cmp<m> %0,%1"
+  [(set_attr "slottable" "yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,rev,normal,rev")])
+
+;; Pattern matching the BTST insn.
+;; It is useful for "if (i & val)" constructs, where val is an exact
+;; power of 2, or if val + 1 is a power of two, where we check for a bunch
+;; of zeros starting at bit 0).
+
+;; SImode.  This mode is the only one needed, since gcc automatically
+;; extends subregs for lower-size modes.  FIXME: Add testcase.
+(define_insn "*btst"
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI
+	  (match_operand:SI 0 "nonmemory_operand" "r, r,r, r,r, r,Kp")
+	  (match_operand:SI 1 "const_int_operand" "Kc,n,Kc,n,Kc,n,n")
+	  (match_operand:SI 2 "nonmemory_operand" "M, M,Kc,n,r, r,r"))
+	 (const_int 0)))]
+  ;; Either it is a single bit, or consecutive ones starting at 0.
+  ;; The btst ones depend on stuff in NOTICE_UPDATE_CC.
+  "CONST_INT_P (operands[1])
+   && (operands[1] == const1_rtx || operands[2] == const0_rtx)
+   && (REG_S_P (operands[0])
+       || (operands[1] == const1_rtx
+	   && REG_S_P (operands[2])
+	   && CONST_INT_P (operands[0])
+	   && exact_log2 (INTVAL (operands[0])) >= 0))
+   && !TARGET_CCINIT"
+
+;; The next-to-last "&&" condition above should be caught by some kind of
+;; canonicalization in gcc, but we can easily help with it here.
+;;  It results from expressions of the type
+;; "power_of_2_value & (1 << y)".
+;;
+;; Since there may be codes with tests in on bits (in constant position)
+;; beyond the size of a word, handle that by assuming those bits are 0.
+;; GCC should handle that, but it's a matter of easily-added belts while
+;; having suspenders.
+
+  "@
+   btstq (%1-1),%0
+   cmpq 0,%0
+   btstq %2,%0
+   clearf nz
+   btst %2,%0
+   clearf nz
+   cmpq %p0,%2"
+ [(set_attr "slottable" "yes")
+  (set_attr "cc" "noov32")])
+
+;; Move insns.
+
+;; The whole mandatory movdi family is here; expander, "anonymous"
+;; recognizer and splitter.  We're forced to have a movdi pattern,
+;; although GCC should be able to split it up itself.  Normally it can,
+;; but if other insns have DI operands (as is the case here), reload
+;; must be able to generate or match a movdi.  many testcases fail at
+;; -O3 or -fssa if we don't have this.  FIXME: Fix GCC...  See
+;; <URL:http://gcc.gnu.org/ml/gcc-patches/2000-04/msg00104.html>.
+;; However, a patch from Richard Kenner (similar to the cause of
+;; discussion at the URL above), indicates otherwise.  See
+;; <URL:http://gcc.gnu.org/ml/gcc-patches/2000-04/msg00554.html>.
+;; The truth has IMO is not been decided yet, so check from time to
+;; time by disabling the movdi patterns.
+
+;; To appease testcase gcc.c-torture/execute/920501-2.c (and others) at
+;; -O0, we need a movdi as a temporary measure.  Here's how things fail:
+;;  A cmpdi RTX needs reloading (global):
+;;    (insn 185 326 186 (set (cc0)
+;;	    (compare (mem/f:DI (reg/v:SI 22) 0)
+;;		(const_int 1 [0x1]))) 4 {cmpdi} (nil)
+;;	(nil))
+;; Now, reg 22 is reloaded for input address, and the mem is also moved
+;; out of the instruction (into a register), since one of the operands
+;; must be a register.  Reg 22 is reloaded (into reg 10), and the mem is
+;; moved out and synthesized in SImode parts (reg 9, reg 10 - should be ok
+;; wrt. overlap).  The bad things happen with the synthesis in
+;; emit_move_insn_1; the location where to substitute reg 10 is lost into
+;; two new RTX:es, both still having reg 22.  Later on, the left-over reg
+;; 22 is recognized to have an equivalent in memory which is substituted
+;; straight in, and we end up with an unrecognizable insn:
+;;    (insn 325 324 326 (set (reg:SI 9 r9)
+;;    	      (mem/f:SI (mem:SI (plus:SI (reg:SI 8 r8)
+;;    			  (const_int -84 [0xffffffac])) 0) 0)) -1 (nil)
+;;    	  (nil))
+;; which is the first part of the reloaded synthesized "movdi".
+;;  The right thing would be to add equivalent replacement locations for
+;; insn with pseudos that need more reloading.  The question is where.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && operands[1] != const0_rtx
+      && (!TARGET_V32 || (!REG_P (operands[1]) && can_create_pseudo_p ())))
+    operands[1] = copy_to_mode_reg (DImode, operands[1]);
+
+  /* Some other ports (as of 2001-09-10 for example mcore and romp) also
+     prefer to split up constants early, like this.  The testcase in
+     gcc.c-torture/execute/961213-1.c shows that CSE2 gets confused by the
+     resulting subreg sets when using the construct from mcore (as of FSF
+     CVS, version -r 1.5), and it believes that the high part (the last one
+     emitted) is the final value.  */
+  if ((CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_DOUBLE)
+      && ! reload_completed
+      && ! reload_in_progress)
+    {
+      rtx insns;
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+
+      start_sequence ();
+      emit_move_insn (operand_subword (op0, 0, 1, DImode),
+		      operand_subword (op1, 0, 1, DImode));
+      emit_move_insn (operand_subword (op0, 1, 1, DImode),
+		      operand_subword (op1, 1, 1, DImode));
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_insn_and_split "*movdi_insn_non_v32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rx,m")
+	(match_operand:DI 1 "general_operand"	   "rx,g,rxM"))]
+  "(register_operand (operands[0], DImode)
+    || register_operand (operands[1], DImode)
+    || operands[1] == const0_rtx)
+   && !TARGET_V32"
+  "#"
+  "&& reload_completed"
+  [(match_dup 2)]
+  "operands[2] = cris_split_movdx (operands);")
+
+;; Overlapping (but non-identical) source memory address and destination
+;; register would be a compiler bug, so we don't have to specify that.
+(define_insn "*movdi_v32"
+  [(set
+    (match_operand:DI 0 "nonimmediate_operand" "=r,rx,&r,>, m,r,x,m")
+    (match_operand:DI 1 "general_operand"     "rxi,r>,m, rx,r,m,m,x"))]
+  "TARGET_V32"
+{
+  switch (which_alternative)
+    {
+      /* FIXME: 1) Use autoincrement where possible.  2) Have peephole2,
+	 particularly for cases where the address register is dead.  */
+    case 5:
+      if (REGNO (operands[0]) == REGNO (XEXP (operands[1], 0)))
+	return "addq 4,%L1\;move.d %1,%H0\;subq 4,%L1\;move.d %1,%M0";
+      gcc_assert (REGNO (operands[0]) + 1 == REGNO (XEXP (operands[1], 0)));
+      return "move.d [%L1+],%M0\;move.d [%L1],%H0";
+    case 2:
+      /* We could do away with the addq if we knew the address-register
+	 isn't ACR.  If we knew the address-register is dead, we could do
+	 away with the subq too.  */
+      return "move.d [%L1],%M0\;addq 4,%L1\;move.d [%L1],%H0\;subq 4,%L1";
+    case 4:
+      return "move.d %M1,[%L0]\;addq 4,%L0\;move.d %H1,[%L0]\;subq 4,%L0";
+    case 6:
+      return "move [%L1],%M0\;addq 4,%L1\;move [%L1],%H0\;subq 4,%L1";
+    case 7:
+      return "move %M1,[%L0]\;addq 4,%L0\;move %H1,[%L0]\;subq 4,%L0";
+
+    default:
+      return "#";
+    }
+}
+  ;; The non-split cases clobber cc0 because of their adds and subs.
+  ;; Beware that NOTICE_UPDATE_CC is called before the forced split happens.
+  [(set_attr "cc" "*,*,clobber,*,clobber,clobber,*,*")])
+
+;; Much like "*movdi_insn_non_v32".  Overlapping registers and constants
+;; is handled so much better in cris_split_movdx.
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  "TARGET_V32
+   && reload_completed
+   && (!MEM_P (operands[0]) || !REG_P (XEXP (operands[0], 0)))
+   && (!MEM_P (operands[1]) || !REG_P (XEXP (operands[1], 0)))"
+  [(match_dup 2)]
+  "operands[2] = cris_split_movdx (operands);")
+
+;; Side-effect patterns for move.S1 [rx=ry+rx.S2],rw
+;; and move.S1 [rx=ry+i],rz
+;;  Then movs.S1 and movu.S1 for both modes.
+;;
+;; move.S1 [rx=ry+rz.S],rw avoiding when rx is ry, or rw is rx
+;; FIXME: These could have anonymous mode for operand 0.
+;; FIXME: Special registers' alternatives too.
+
+(define_insn "*mov_side<mode>_biap"
+  [(set (match_operand:BW 0 "register_operand" "=r,r")
+	(mem:BW (plus:SI
+		 (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+			  (match_operand:SI 2 "const_int_operand" "n,n"))
+		 (match_operand:SI 3 "register_operand" "r,r"))))
+   (set (match_operand:SI 4 "register_operand" "=*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   move<m> [%4=%3+%1%T2],%0")
+
+(define_insn "*mov_sidesisf_biap"
+  [(set (match_operand 0 "register_operand" "=r,r,x,x")
+	(mem (plus:SI
+	      (mult:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+		       (match_operand:SI 2 "const_int_operand" "n,n,n,n"))
+	      (match_operand:SI 3 "register_operand" "r,r,r,r"))))
+   (set (match_operand:SI 4 "register_operand" "=*3,r,*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   move.%s0 [%4=%3+%1%T2],%0
+   #
+   move [%4=%3+%1%T2],%0")
+
+;; move.S1 [rx=ry+i],rz
+;; avoiding move.S1 [ry=ry+i],rz
+;; and      move.S1 [rz=ry+i],rz
+;; Note that "i" is allowed to be a register.
+
+(define_insn "*mov_side<mode>"
+  [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r")
+	(mem:BW
+	 (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R")
+		  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r"))))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || satisfies_constraint_N (operands[2])
+	  || satisfies_constraint_J (operands[2])))
+    return "#";
+  if (which_alternative == 4)
+    return "move<m> [%3=%2%S1],%0";
+  return "move<m> [%3=%1%S2],%0";
+})
+
+(define_insn "*mov_sidesisf"
+  [(set (match_operand 0 "register_operand" "=r,r,r,x,x,x,r,r,x,x")
+	(mem
+	 (plus:SI
+	  (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,r,r,r,R,R,R,R")
+	  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r>Rn,r,>Rn,r,r,r,r"))))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*1,r,r,*2,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0
+       || which_alternative == 3
+       || which_alternative == 6
+       || which_alternative == 8)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || satisfies_constraint_N (operands[2])
+	  || satisfies_constraint_J (operands[2])))
+    return "#";
+  if (which_alternative < 3)
+    return "move.%s0 [%3=%1%S2],%0";
+  if (which_alternative == 7)
+    return "move.%s0 [%3=%2%S1],%0";
+  if (which_alternative == 9)
+    return "move [%3=%2%S1],%0";
+  return "move [%3=%1%S2],%0";
+})
+
+;; Other way around; move to memory.
+
+;; Note that the condition (which for side-effect patterns is usually a
+;; call to cris_side_effect_mode_ok), isn't consulted for register
+;; allocation preferences -- constraints is the method for that.  The
+;; drawback is that we can't exclude register allocation to cause
+;; "move.s rw,[rx=ry+rz.S]" when rw==rx without also excluding rx==ry or
+;; rx==rz if we use an earlyclobber modifier for the constraint for rx.
+;; Instead of that, we recognize and split the cases where dangerous
+;; register combinations are spotted: where a register is set in the
+;; side-effect, and used in the main insn.  We don't handle the case where
+;; the set in the main insn overlaps the set in the side-effect; that case
+;; must be handled in gcc.  We handle just the case where the set in the
+;; side-effect overlaps the input operand of the main insn (i.e. just
+;; moves to memory).
+
+;;
+;; move.s rz,[ry=rx+rw.S]
+
+(define_insn "*mov_side<mode>_biap_mem"
+  [(set (mem:BW (plus:SI
+		 (mult:SI (match_operand:SI 0 "register_operand" "r,r,r")
+			  (match_operand:SI 1 "const_int_operand" "n,n,n"))
+		 (match_operand:SI 2 "register_operand" "r,r,r")))
+	(match_operand:BW 3 "register_operand" "r,r,r"))
+   (set (match_operand:SI 4 "register_operand" "=*2,!3,r")
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)"
+  "@
+   #
+   #
+   move<m> %3,[%4=%2+%0%T1]")
+
+(define_insn "*mov_sidesisf_biap_mem"
+  [(set (mem (plus:SI
+	      (mult:SI (match_operand:SI 0 "register_operand" "r,r,r,r,r,r")
+		       (match_operand:SI 1 "const_int_operand" "n,n,n,n,n,n"))
+	      (match_operand:SI 2 "register_operand" "r,r,r,r,r,r")))
+	(match_operand 3 "register_operand" "r,r,r,x,x,x"))
+   (set (match_operand:SI 4 "register_operand" "=*2,!3,r,*2,!3,r")
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))]
+  "GET_MODE_SIZE (GET_MODE (operands[3])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)"
+  "@
+   #
+   #
+   move.%s3 %3,[%4=%2+%0%T1]
+   #
+   #
+   move %3,[%4=%2+%0%T1]")
+
+;; Split for the case above where we're out of luck with register
+;; allocation (again, the condition isn't checked for that), and we end up
+;; with the set in the side-effect getting the same register as the input
+;; register.
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI
+	     (mult:SI (match_operand:SI 0 "register_operand" "")
+		      (match_operand:SI 1 "const_int_operand" ""))
+	     (match_operand:SI 2 "register_operand" ""))])
+	  (match_operand 3 "register_operand" ""))
+     (set (match_operand:SI 4 "cris_nonsp_register_operand" "")
+	  (plus:SI (mult:SI (match_dup 0)
+			    (match_dup 1))
+		   (match_dup 2)))])]
+  "reload_completed && reg_overlap_mentioned_p (operands[4], operands[3])"
+  [(set (match_dup 5) (match_dup 3))
+   (set (match_dup 4) (match_dup 2))
+   (set (match_dup 4)
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 4)))]
+  "operands[5]
+     = replace_equiv_address (operands[6],
+			      gen_rtx_PLUS (SImode,
+					    gen_rtx_MULT (SImode,
+							  operands[0],
+							  operands[1]),
+					    operands[2]));")
+
+;; move.s rx,[ry=rz+i]
+;; FIXME: These could have anonymous mode for operand 2.
+
+;; QImode
+
+(define_insn "*mov_side<mode>_mem"
+  [(set (mem:BW
+	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r,R,R,R")
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn,r,r,r")))
+	(match_operand:BW 2 "register_operand" "r,r,r,r,r,r,r"))
+   (set (match_operand:SI 3 "register_operand" "=*0,!*2,r,r,*1,!*2,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)"
+{
+  if ((which_alternative == 0 || which_alternative == 4)
+      && (!CONST_INT_P (operands[1])
+	  || INTVAL (operands[1]) > 127
+	  || INTVAL (operands[1]) < -128
+	  || satisfies_constraint_N (operands[1])
+	  || satisfies_constraint_J (operands[1])))
+    return "#";
+  if (which_alternative == 1 || which_alternative == 5)
+    return "#";
+  if (which_alternative == 6)
+    return "move.%s2 %2,[%3=%1%S0]";
+  return "move<m> %2,[%3=%0%S1]";
+})
+
+;; SImode
+
+(define_insn "*mov_sidesisf_mem"
+  [(set (mem
+	 (plus:SI
+	  (match_operand:SI
+	   0 "cris_bdap_operand"
+	   			"%r,  r,   r,r,  r,   r,r,  R,R,  R,R, R")
+	  (match_operand:SI
+	   1 "cris_bdap_operand"
+	   			"r>Rn,r>Rn,r,>Rn,r>Rn,r,>Rn,r,r,  r,r, r")))
+	(match_operand 2 "register_operand"
+		       		"r,   r,   r,r,  x,   x,x,  r,r,  r,x, x"))
+   (set (match_operand:SI 3 "register_operand"
+			  	"=*0,!2,   r,r,  *0,  r,r, *1,!*2,r,*1,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "GET_MODE_SIZE (GET_MODE (operands[2])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)"
+{
+  if ((which_alternative == 0 || which_alternative == 4)
+      && (!CONST_INT_P (operands[1])
+	  || INTVAL (operands[1]) > 127
+	  || INTVAL (operands[1]) < -128
+	  || satisfies_constraint_N (operands[1])
+	  || satisfies_constraint_J (operands[1])))
+    return "#";
+  if (which_alternative == 1
+      || which_alternative == 7
+      || which_alternative == 8
+      || which_alternative == 10)
+    return "#";
+  if (which_alternative < 4)
+    return "move.%s2 %2,[%3=%0%S1]";
+  if (which_alternative == 9)
+    return "move.%s2 %2,[%3=%1%S0]";
+  if (which_alternative == 11)
+    return "move %2,[%3=%1%S0]";
+  return "move %2,[%3=%0%S1]";
+})
+
+;; Like the biap case, a split where the set in the side-effect gets the
+;; same register as the input register to the main insn, since the
+;; condition isn't checked at register allocation.
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   4 "cris_mem_op"
+	   [(plus:SI
+	     (match_operand:SI 0 "cris_bdap_operand" "")
+	     (match_operand:SI 1 "cris_bdap_operand" ""))])
+	  (match_operand 2 "register_operand" ""))
+     (set (match_operand:SI 3 "cris_nonsp_register_operand" "")
+	  (plus:SI (match_dup 0) (match_dup 1)))])]
+  "reload_completed && reg_overlap_mentioned_p (operands[3], operands[2])"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 3) (match_dup 0))
+   (set (match_dup 3) (plus:SI (match_dup 3) (match_dup 1)))]
+  "")
+
+;; Clear memory side-effect patterns.  It is hard to get to the mode if
+;; the MEM was anonymous, so there will be one for each mode.
+
+;;  clear.[bwd] [ry=rx+rw.s2]
+
+(define_insn "*clear_side<mode>_biap"
+  [(set (mem:BWD (plus:SI
+		  (mult:SI (match_operand:SI 0 "register_operand" "r,r")
+			   (match_operand:SI 1 "const_int_operand" "n,n"))
+		  (match_operand:SI 2 "register_operand" "r,r")))
+	(const_int 0))
+   (set (match_operand:SI 3 "register_operand" "=*2,r")
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (MULT, operands, 3, 2, 0, 1, -1)"
+  "@
+   #
+   clear<m> [%3=%2+%0%T1]")
+
+;; clear.[bwd] [ry=rz+i]
+
+(define_insn "*clear_side<mode>"
+  [(set (mem:BWD
+	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,R,R")
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))
+	(const_int 0))
+   (set (match_operand:SI 2 "register_operand" "=*0,r,r,*1,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 2, 0, 1, -1, -1)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[1])
+	  || INTVAL (operands[1]) > 127
+	  || INTVAL (operands[1]) < -128
+	  || satisfies_constraint_N (operands[1])
+	  || satisfies_constraint_J (operands[1])))
+    return "#";
+  if (which_alternative == 4)
+    return "clear<m> [%2=%1%S0]";
+  return "clear<m> [%2=%0%S1]";
+})
+
+;; Normal move patterns from SI on.
+
+(define_expand "movsi"
+  [(set
+    (match_operand:SI 0 "nonimmediate_operand" "")
+    (match_operand:SI 1 "cris_general_operand_or_symbol" ""))]
+  ""
+{
+  /* If the output goes to a MEM, make sure we have zero or a register as
+     input.  */
+  if (MEM_P (operands[0])
+      && ! REG_S_P (operands[1])
+      && operands[1] != const0_rtx
+      && can_create_pseudo_p ())
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* If we're generating PIC and have an incoming symbol, validize it to a
+     general operand or something that will match a special pattern.
+
+     FIXME: Do we *have* to recognize anything that would normally be a
+     valid symbol?  Can we exclude global PIC addresses with an added
+     offset?  */
+    if (flag_pic
+	&& CONSTANT_ADDRESS_P (operands[1])
+	&& !cris_valid_pic_const (operands[1], false))
+      {
+	enum cris_pic_symbol_type t = cris_pic_symbol_type_of (operands[1]);
+
+	gcc_assert (t != cris_no_symbol);
+
+	if (! REG_S_P (operands[0]))
+	  {
+	    /* We must have a register as destination for what we're about to
+	       do, and for the patterns we generate.  */
+	    CRIS_ASSERT (can_create_pseudo_p ());
+	    operands[1] = force_reg (SImode, operands[1]);
+	  }
+	else
+	  {
+	    /* FIXME: add a REG_EQUAL (or is it REG_EQUIV) note to the
+	       destination register for the symbol.  It might not be
+	       worth it.  Measure.  */
+	    crtl->uses_pic_offset_table = 1;
+	    if (t == cris_rel_symbol)
+	      {
+		/* Change a "move.d sym(+offs),rN" into (allocate register rM)
+		   for pre-v32:
+		   "move.d (const (plus (unspec [sym]
+		    CRIS_UNSPEC_GOTREL) offs)),rM" "add.d rPIC,rM,rN"
+		   and for v32:
+		   "move.d (const (plus (unspec [sym]
+		    CRIS_UNSPEC_PCREL) offs)),rN".  */
+		rtx tem, rm, rn = operands[0];
+		rtx sym = GET_CODE (operands[1]) != CONST
+		  ? operands[1] : get_related_value (operands[1]);
+		HOST_WIDE_INT offs = get_integer_term (operands[1]);
+
+		gcc_assert (can_create_pseudo_p ());
+
+		if (TARGET_V32)
+		  {
+		    tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym),
+					  CRIS_UNSPEC_PCREL);
+		    if (offs != 0)
+		      tem = plus_constant (Pmode, tem, offs);
+		    rm = rn;
+		    emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+		  }
+		else
+		  {
+		    /* We still uses GOT-relative addressing for
+		       pre-v32.	 */
+		    crtl->uses_pic_offset_table = 1;
+		    tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym),
+					  CRIS_UNSPEC_GOTREL);
+		    if (offs != 0)
+		      tem = plus_constant (Pmode, tem, offs);
+		    rm = gen_reg_rtx (Pmode);
+		    emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+		    if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx,
+				      rn, 0, OPTAB_LIB_WIDEN) != rn)
+		      internal_error ("expand_binop failed in movsi gotrel");
+		  }
+		DONE;
+	      }
+	    else if (t == cris_got_symbol)
+	      {
+		/* Change a "move.d sym,rN" into (allocate register rM, rO)
+		   "move.d (const (unspec [sym] CRIS_UNSPEC_GOTREAD)),rM"
+		   "add.d rPIC,rM,rO", "move.d [rO],rN" with
+		   the memory access marked as read-only.  */
+		rtx tem, mem, rm, ro, rn = operands[0];
+		gcc_assert (can_create_pseudo_p ());
+		tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]),
+				      CRIS_UNSPEC_GOTREAD);
+		rm = gen_reg_rtx (Pmode);
+		emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+		ro = gen_reg_rtx (Pmode);
+	        if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx,
+				  ro, 0, OPTAB_LIB_WIDEN) != ro)
+		  internal_error ("expand_binop failed in movsi got");
+		mem = gen_rtx_MEM (Pmode, ro);
+
+		/* This MEM doesn't alias anything.  Whether it
+		   aliases other same symbols is unimportant.  */
+		set_mem_alias_set (mem, new_alias_set ());
+		MEM_NOTRAP_P (mem) = 1;
+
+		/* We can set the GOT memory read of a non-called symbol
+		   to readonly, but not that of a call symbol, as those
+		   are subject to lazy evaluation and usually have the value
+		   changed from the first call to the second (but
+		   constant thereafter).  */
+		MEM_READONLY_P (mem) = 1;
+		emit_move_insn (rn, mem);
+		DONE;
+	      }
+	    else
+	      {
+		/* We get here when we have to change something that would
+		   be recognizable if it wasn't PIC.  A ``sym'' is ok for
+		   PIC symbols both with and without a GOT entry.  And ``sym
+		   + offset'' is ok for local symbols, so the only thing it
+		   could be, is a global symbol with an offset.  Check and
+		   abort if not.  */
+		rtx reg = gen_reg_rtx (Pmode);
+		rtx sym = get_related_value (operands[1]);
+		HOST_WIDE_INT offs = get_integer_term (operands[1]);
+
+		gcc_assert (can_create_pseudo_p ()
+			    && t == cris_got_symbol_needing_fixup
+			    && sym != NULL_RTX && offs != 0);
+
+		emit_move_insn (reg, sym);
+		if (expand_binop (SImode, add_optab, reg,
+				  GEN_INT (offs), operands[0], 0,
+				  OPTAB_LIB_WIDEN) != operands[0])
+		  internal_error ("expand_binop failed in movsi got+offs");
+		DONE;
+	      }
+	  }
+      }
+})
+
+(define_insn "*movsi_got_load"
+  [(set (reg:SI CRIS_GOT_REGNUM) (unspec:SI [(const_int 0)] CRIS_UNSPEC_GOT))]
+  "flag_pic"
+{
+  return TARGET_V32
+    ? "lapc _GLOBAL_OFFSET_TABLE_,%:"
+    : "move.d $pc,%:\;sub.d .:GOTOFF,%:";
+}
+  [(set_attr "cc" "clobber")])
+
+(define_insn "*movsi_internal"
+  [(set
+    (match_operand:SI 0 "nonimmediate_operand"
+		      "=r,r, r,Q>,r,Q>,g,r,r, r,g,rQ>,x,  m,x")
+    (match_operand:SI 1 "cris_general_operand_or_pic_source"
+		       "r,Q>,M,M, I,r, M,n,!S,g,r,x,  rQ>,x,gi"))]
+    ;; Note that we prefer not to use the S alternative (if for some reason
+    ;; it competes with others) above, but g matches S.
+  ""
+{
+  /* Better to have c-switch here; it is worth it to optimize the size of
+     move insns.  The alternative would be to try to find more constraint
+     letters.  FIXME: Check again.  It seems this could shrink a bit.  */
+  switch (which_alternative)
+    {
+    case 9:
+      if (TARGET_V32)
+       {
+	 if (!flag_pic
+	     && (GET_CODE (operands[1]) == SYMBOL_REF
+		 || GET_CODE (operands[1]) == LABEL_REF
+		 || GET_CODE (operands[1]) == CONST))
+	   {
+	     /* FIXME: Express this through (set_attr cc none) instead,
+		since we can't express the ``none'' at this point.  FIXME:
+		Use lapc for everything except const_int and when next cc0
+		user would want the flag setting.  */
+	     CC_STATUS_INIT;
+	     return "lapc %1,%0";
+	   }
+	 if (flag_pic == 1
+	     && GET_CODE (operands[1]) == CONST
+	     && GET_CODE (XEXP (operands[1], 0)) == UNSPEC
+	     && XINT (XEXP (operands[1], 0), 1) == CRIS_UNSPEC_GOTREAD)
+	   return "movu.w %1,%0";
+       }
+       /* FALLTHROUGH */
+    case 0:
+    case 1:
+    case 5:
+    case 10:
+      return "move.d %1,%0";
+
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+      return "move %d1,%0";
+
+    case 2:
+    case 3:
+    case 6:
+      return "clear.d %0";
+
+      /* Constants -32..31 except 0.  */
+    case 4:
+      return "moveq %1,%0";
+
+      /* We can win a little on constants -32768..-33, 32..65535.  */
+    case 7:
+      if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) < 65536)
+	{
+	  if (INTVAL (operands[1]) < 256)
+	    return "movu.b %1,%0";
+	  return "movu.w %1,%0";
+	}
+      else if (INTVAL (operands[1]) >= -32768 && INTVAL (operands[1]) < 32768)
+	{
+	  if (INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) < 128)
+	    return "movs.b %1,%0";
+	  return "movs.w %1,%0";
+	}
+      return "move.d %1,%0";
+
+    case 8:
+      {
+	rtx tem = operands[1];
+	gcc_assert (GET_CODE (tem) == CONST);
+	tem = XEXP (tem, 0);
+	if (GET_CODE (tem) == PLUS
+	    && GET_CODE (XEXP (tem, 0)) == UNSPEC
+	    && (XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL
+		|| XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_PCREL)
+	    && CONST_INT_P (XEXP (tem, 1)))
+	  tem = XEXP (tem, 0);
+	gcc_assert (GET_CODE (tem) == UNSPEC);
+	switch (XINT (tem, 1))
+	  {
+	  case CRIS_UNSPEC_GOTREAD:
+	  case CRIS_UNSPEC_PLTGOTREAD:
+	    /* Using sign-extend mostly to be consistent with the
+	       indexed addressing mode.  */
+	    if (flag_pic == 1)
+	      return "movs.w %1,%0";
+	    return "move.d %1,%0";
+
+	  case CRIS_UNSPEC_GOTREL:
+	  case CRIS_UNSPEC_PLT_GOTREL:
+	    gcc_assert (!TARGET_V32);
+	    return "move.d %1,%0";
+
+	  case CRIS_UNSPEC_PCREL:
+	  case CRIS_UNSPEC_PLT_PCREL:
+	    gcc_assert (TARGET_V32);
+	    return "lapc %1,%0";
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+    default:
+      return "BOGUS: %1 to %0";
+    }
+}
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,no,yes,yes,no,no")
+   (set_attr "cc" "*,*,*,*,*,*,*,*,*,*,*,none,none,none,none")])
+
+;; Extend operations with side-effect from mem to register, using
+;; MOVS/MOVU.  These are from mem to register only.
+;;
+;; [rx=ry+rz.S]
+;;
+;; QImode to HImode
+;;
+;; FIXME: Can we omit extend to HImode, since GCC should truncate for
+;; HImode by itself?  Perhaps use only anonymous modes?
+
+(define_insn "*ext_sideqihi_biap"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI
+	 5 "cris_extend_operator"
+	 [(mem:QI (plus:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "n,n"))
+		   (match_operand:SI 3 "register_operand" "r,r")))]))
+   (set (match_operand:SI 4 "register_operand" "=*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   mov%e5.%m5 [%4=%3+%1%T2],%0")
+
+(define_insn "*ext_side<mode>si_biap"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 5 "cris_extend_operator"
+	 [(mem:BW (plus:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "n,n"))
+		   (match_operand:SI 3 "register_operand" "r,r")))]))
+   (set (match_operand:SI 4 "register_operand" "=*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   mov%e5<m> [%4=%3+%1%T2],%0")
+
+;; Same but [rx=ry+i]
+
+;; QImode to HImode
+
+(define_insn "*ext_sideqihi"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:HI
+	 4 "cris_extend_operator"
+	 [(mem:QI (plus:SI
+		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R")
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || satisfies_constraint_N (operands[2])
+	  || satisfies_constraint_J (operands[2])))
+    return "#";
+  if (which_alternative == 4)
+    return "mov%e4.%m4 [%3=%2%S1],%0";
+  return "mov%e4.%m4 [%3=%1%S2],%0";
+})
+
+(define_insn "*ext_side<mode>si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:SI
+	 4 "cris_extend_operator"
+	 [(mem:BW (plus:SI
+		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R")
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || satisfies_constraint_N (operands[2])
+	  || satisfies_constraint_J (operands[2])))
+    return "#";
+  if (which_alternative == 4)
+    return "mov%e4<m> [%3=%2%S1],%0";
+  return "mov%e4<m> [%3=%1%S2],%0";
+})
+
+;; FIXME: See movsi.
+
+(define_insn "movhi"
+  [(set
+    (match_operand:HI 0 "nonimmediate_operand" "=r,r, r,Q>,r,Q>,r,r,r,g,g,r,r,x")
+    (match_operand:HI 1 "general_operand"	"r,Q>,M,M, I,r, L,O,n,M,r,g,x,r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 5:
+    case 10:
+    case 11:
+      return "move.w %1,%0";
+    case 12:
+    case 13:
+      return "move %1,%0";
+    case 2:
+    case 3:
+    case 9:
+      return "clear.w %0";
+    case 4:
+      return "moveq %1,%0";
+    case 6:
+    case 8:
+      if (INTVAL (operands[1]) < 256 && INTVAL (operands[1]) >= -128)
+	{
+	  if (INTVAL (operands[1]) > 0)
+	    return "movu.b %1,%0";
+	  return "movs.b %1,%0";
+	}
+      return "move.w %1,%0";
+    case 7:
+      return "movEq %b1,%0";
+    default:
+      return "BOGUS: %1 to %0";
+  }
+}
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,yes,no,no,no,no,yes,yes")
+   (set_attr "cc" "*,*,none,none,*,none,*,clobber,*,none,none,*,none,none")])
+
+(define_insn "movstricthi"
+  [(set
+    (strict_low_part
+     (match_operand:HI 0 "nonimmediate_operand" "+r,r, r,Q>,Q>,g,r,g"))
+    (match_operand:HI 1 "general_operand"	 "r,Q>,M,M, r, M,g,r"))]
+  ""
+  "@
+   move.w %1,%0
+   move.w %1,%0
+   clear.w %0
+   clear.w %0
+   move.w %1,%0
+   clear.w %0
+   move.w %1,%0
+   move.w %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")])
+
+(define_expand "reload_in<mode>"
+  [(set (match_operand:BW 2 "register_operand" "=r")
+	(match_operand:BW 1 "memory_operand" "m"))
+   (set (match_operand:BW 0 "register_operand" "=x")
+	(match_dup 2))]
+  ""
+  "")
+
+(define_expand "reload_out<mode>"
+  [(set (match_operand:BW 2 "register_operand" "=&r")
+	(match_operand:BW 1 "register_operand" "x"))
+   (set (match_operand:BW 0 "memory_operand" "=m")
+	(match_dup 2))]
+  ""
+  "")
+
+(define_insn "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,r,g,g,r,r,r,x")
+	(match_operand:QI 1 "general_operand"	    "r,r, Q>,M,M, I,M,r,O,g,x,r"))]
+  ""
+  "@
+   move.b %1,%0
+   move.b %1,%0
+   move.b %1,%0
+   clear.b %0
+   clear.b %0
+   moveq %1,%0
+   clear.b %0
+   move.b %1,%0
+   moveq %b1,%0
+   move.b %1,%0
+   move %1,%0
+   move %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,yes,no,yes,yes")
+   (set_attr "cc" "*,*,*,*,*,*,*,*,clobber,*,none,none")])
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part
+	 (match_operand:QI 0 "nonimmediate_operand" "+r,Q>,r, r,Q>,g,g,r"))
+	(match_operand:QI 1 "general_operand"	     "r,r, Q>,M,M, M,r,g"))]
+  ""
+  "@
+   move.b %1,%0
+   move.b %1,%0
+   move.b %1,%0
+   clear.b %0
+   clear.b %0
+   clear.b %0
+   move.b %1,%0
+   move.b %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")])
+
+;; The valid "quick" bit-patterns are, except for 0.0, denormalized
+;; values REALLY close to 0, and some NaN:s (I think; their exponent is
+;; all ones); the worthwhile one is "0.0".
+;; It will use clear, so we know ALL types of immediate 0 never change cc.
+
+(define_insn "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,g,g,r,r,x,Q>,m,x, x")
+	(match_operand:SF 1 "general_operand"       "r,r, Q>,G,G, G,r,g,x,r,x, x,Q>,g"))]
+  ""
+  "@
+   move.d %1,%0
+   move.d %1,%0
+   move.d %1,%0
+   clear.d %0
+   clear.d %0
+   clear.d %0
+   move.d %1,%0
+   move.d %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no,yes,yes,yes,no,yes,no")])
+
+;; Movem patterns.  Primarily for use in function prologue and epilogue.
+;; The V32 variants have an ordering matching the expectations of the
+;; standard names "load_multiple" and "store_multiple"; pre-v32 movem
+;; store R0 in the highest memory location.
+
+(define_expand "load_multiple"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "memory_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_V32"
+{
+  rtx indreg;
+
+  /* Apparently the predicate isn't checked, so we need to do so
+     manually.  Once happened for libstdc++-v3 locale_facets.tcc.  */
+  if (!MEM_P (operands[1]))
+    FAIL;
+
+  indreg = XEXP (operands[1], 0);
+
+  if (GET_CODE (indreg) == POST_INC)
+    indreg = XEXP (indreg, 0);
+  if (!REG_P (indreg)
+      || GET_CODE (operands[2]) != CONST_INT
+      || !REG_P (operands[0])
+      || REGNO (operands[0]) != 0
+      || INTVAL (operands[2]) > CRIS_SP_REGNUM
+      || (int) REGNO (indreg) < INTVAL (operands[2]))
+    FAIL;
+  gcc_unreachable ();
+  emit_insn (cris_gen_movem_load (operands[1], operands[2], 0));
+  DONE;
+})
+
+(define_expand "store_multiple"
+  [(match_operand:SI 0 "memory_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_V32"
+{
+  rtx indreg;
+
+  /* See load_multiple.  */
+  if (!MEM_P (operands[0]))
+    FAIL;
+
+  indreg = XEXP (operands[0], 0);
+
+  if (GET_CODE (indreg) == POST_INC)
+    indreg = XEXP (indreg, 0);
+  if (!REG_P (indreg)
+      || GET_CODE (operands[2]) != CONST_INT
+      || !REG_P (operands[1])
+      || REGNO (operands[1]) != 0
+      || INTVAL (operands[2]) > CRIS_SP_REGNUM
+      || (int) REGNO (indreg) < INTVAL (operands[2]))
+    FAIL;
+  gcc_unreachable ();
+  cris_emit_movem_store (operands[0], operands[2], 0, false);
+  DONE;
+})
+
+(define_insn "*cris_load_multiple"
+  [(match_parallel 0 "cris_load_multiple_op"
+		   [(set (match_operand:SI 1 "register_operand" "=r,r")
+			 (match_operand:SI 2 "memory_operand" "Q,m"))])]
+  ""
+  "movem %O0,%o0"
+  [(set_attr "cc" "none")
+   (set_attr "slottable" "yes,no")
+   ;; Not true, but setting the length to 0 causes return sequences (ret
+   ;; movem) to have the cost they had when (return) included the movem
+   ;; and reduces the performance penalty taken for needing to emit an
+   ;; epilogue (in turn copied by bb-reorder) instead of return patterns.
+   ;; FIXME: temporary change until all insn lengths are correctly
+   ;; described.  FIXME: have better target control over bb-reorder.
+   (set_attr "length" "0")])
+
+(define_insn "*cris_store_multiple"
+  [(match_parallel 0 "cris_store_multiple_op"
+		   [(set (match_operand:SI 2 "memory_operand" "=Q,m")
+			 (match_operand:SI 1 "register_operand" "r,r"))])]
+  ""
+  "movem %o0,%O0"
+  [(set_attr "cc" "none")
+   (set_attr "slottable" "yes,no")])
+
+
+;; Sign- and zero-extend insns with standard names.
+;;  Those for integer source operand are ordered with the widest source
+;; type first.
+
+;; Sign-extend.
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:SI 1 "general_operand" "g")))]
+  ""
+  "move.d %1,%M0\;smi %H0\;neg.d %H0,%H0")
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:BW 1 "general_operand" "g")))]
+  ""
+  "movs<m> %1,%M0\;smi %H0\;neg.d %H0,%H0")
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:BW 1 "general_operand" "r,Q>,g")))]
+  ""
+  "movs<m> %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+;; To do a byte->word extension, extend to dword, except that the top half
+;; of the register will be clobbered.  FIXME: Perhaps this is not needed.
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_operand" "r,Q>,g")))]
+  ""
+  "movs.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+
+;; Zero-extend.  The DImode ones are synthesized by gcc, so we don't
+;; specify them here.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI
+	 (match_operand:BW 1 "nonimmediate_operand" "r,Q>,m")))]
+  ""
+  "movu<m> %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+;; Same comment as sign-extend QImode to HImode above applies.
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(zero_extend:HI
+	 (match_operand:QI 1 "nonimmediate_operand" "r,Q>,m")))]
+  ""
+  "movu.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+;; All kinds of arithmetic and logical instructions.
+;;
+;; First, anonymous patterns to match addressing modes with
+;; side-effects.
+;;
+;; op.S [rx=ry+I],rz; (add, sub, or, and, bound).
+;;
+;; [rx=ry+rz.S]
+
+(define_insn "*op_side<mode>_biap"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r")
+	(match_operator:BWD
+	 6 "cris_orthogonal_operator"
+	 [(match_operand:BWD 1 "register_operand" "0,0")
+	  (mem:BWD (plus:SI
+		    (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			     (match_operand:SI 3 "const_int_operand" "n,n"))
+		    (match_operand:SI 4 "register_operand" "r,r")))]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6<m> [%5=%4+%2%T3],%0")
+
+;; [rx=ry+i] ([%4=%2+%3])
+
+(define_insn "*op_side<mode>"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:BWD
+	 5 "cris_orthogonal_operator"
+	 [(match_operand:BWD 1 "register_operand" "0,0,0,0,0")
+	  (mem:BWD (plus:SI
+		   (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		   (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || satisfies_constraint_N (operands[3])
+	  || satisfies_constraint_J (operands[3])))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5.%s0 [%4=%3%S2],%0";
+  return "%x5<m> [%4=%2%S3],%0";
+})
+
+;; To match all cases for commutative operations we may have to have the
+;; following pattern for add, or & and.  I do not know really, but it does
+;; not break anything.
+;;
+;; FIXME: This really ought to be checked.
+;;
+;; op.S [rx=ry+I],rz;
+;;
+;; [rx=ry+rz.S]
+
+(define_insn "*op_swap_side<mode>_biap"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r")
+	(match_operator:BWD
+	 6 "cris_commutative_orth_op"
+	 [(mem:BWD (plus:SI
+		   (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			    (match_operand:SI 3 "const_int_operand" "n,n"))
+		   (match_operand:SI 4 "register_operand" "r,r")))
+	  (match_operand:BWD 1 "register_operand" "0,0")]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6<m> [%5=%4+%2%T3],%0")
+
+;; [rx=ry+i] ([%4=%2+%3])
+;; FIXME: These could have anonymous mode for operand 0.
+
+;; QImode
+
+(define_insn "*op_swap_side<mode>"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:BWD
+	 5 "cris_commutative_orth_op"
+	 [(mem:BWD
+	   (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))
+	  (match_operand:BWD 1 "register_operand" "0,0,0,0,0")]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || satisfies_constraint_N (operands[3])
+	  || satisfies_constraint_J (operands[3])))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5<m> [%4=%3%S2],%0";
+  return "%x5<m> [%4=%2%S3],%0";
+})
+
+;; Add operations, standard names.
+
+;; Note that for the 'P' constraint, the high part can be -1 or 0.  We
+;; output the insn through the 'A' output modifier as "adds.w" and "addq",
+;; respectively.
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "general_operand")))]
+  ""
+{
+  if (MEM_P (operands[2]) && TARGET_V32)
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+(define_insn "*adddi3_non_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,r")
+		 (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))]
+  "!TARGET_V32"
+  "@
+   addq %2,%M0\;ax\;addq 0,%H0
+   subq %n2,%M0\;ax\;subq 0,%H0
+   add%e2.%z2 %2,%M0\;ax\;%A2 %H2,%H0
+   add.d %M2,%M0\;ax\;add.d %H2,%H0
+   add.d %M2,%M1,%M0\;ax\;add.d %H2,%H1,%H0")
+
+; It seems no use allowing a memory operand for this one, because we'd
+; need a scratch register for incrementing the address.
+(define_insn "*adddi3_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
+       (plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,0")
+                (match_operand:DI 2 "nonmemory_operand" "J,N,P,r,n")))]
+  "TARGET_V32"
+  "@
+   addq %2,%M0\;addc 0,%H0
+   subq %n2,%M0\;ax\;subq 0,%H0
+   add%e2.%z2 %2,%M0\;addc %H2,%H0
+   add.d %M2,%M0\;addc %H2,%H0
+   add.d %M2,%M0\;addc %H2,%H0")
+
+(define_expand "add<mode>3"
+  [(set (match_operand:BWD 0 "register_operand")
+	(plus:BWD
+	 (match_operand:BWD 1 "register_operand")
+	 (match_operand:BWD 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*addsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand"  "=r,r, r,r,r,r, r,r,  r")
+	(plus:SI
+	 (match_operand:SI 1 "register_operand" "%0,0, 0,0,0,0, 0,r,  r")
+	 (match_operand:SI 2 "general_operand"   "r,Q>,J,N,n,!S,g,!To,0")))]
+
+;; The last constraint is due to that after reload, the '%' is not
+;; honored, and canonicalization doesn't care about keeping the same
+;; register as in destination.  This will happen after insn splitting.
+;; gcc <= 2.7.2.  FIXME: Check for gcc-2.9x
+
+ "!TARGET_V32"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "add.d %2,%0";
+    case 2:
+      return "addq %2,%0";
+    case 3:
+      return "subq %n2,%0";
+    case 4:
+      /* 'Known value', but not in -63..63.
+	 Check if addu/subu may be used.  */
+      if (INTVAL (operands[2]) > 0)
+	{
+	  if (INTVAL (operands[2]) < 256)
+	    return "addu.b %2,%0";
+	  if (INTVAL (operands[2]) < 65536)
+	    return "addu.w %2,%0";
+	}
+      else
+	{
+	  if (INTVAL (operands[2]) >= -255)
+	    return "subu.b %n2,%0";
+	  if (INTVAL (operands[2]) >= -65535)
+	    return "subu.w %n2,%0";
+	}
+      return "add.d %2,%0";
+    case 5:
+      {
+	rtx tem = operands[2];
+	gcc_assert (GET_CODE (tem) == CONST);
+	tem = XEXP (tem, 0);
+	if (GET_CODE (tem) == PLUS
+	    && GET_CODE (XEXP (tem, 0)) == UNSPEC
+	    /* We don't allow CRIS_UNSPEC_PCREL here; we can't have a
+	       pc-relative operand in an add insn.  */
+	    && XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL
+	    && CONST_INT_P (XEXP (tem, 1)))
+	  tem = XEXP (tem, 0);
+	gcc_assert (GET_CODE (tem) == UNSPEC);
+	switch (XINT (tem, 1))
+	  {
+	  case CRIS_UNSPEC_GOTREAD:
+	  case CRIS_UNSPEC_PLTGOTREAD:
+	    /* Using sign-extend mostly to be consistent with the
+	       indexed addressing mode.  */
+	    if (flag_pic == 1)
+	      return "adds.w %2,%0";
+	    return "add.d %2,%0";
+
+	  case CRIS_UNSPEC_PLT_GOTREL:
+	  case CRIS_UNSPEC_GOTREL:
+	    return "add.d %2,%0";
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+    case 6:
+      return "add%u2 %2,%0";
+    case 7:
+      return "add.d %2,%1,%0";
+    case 8:
+      return "add.d %1,%0";
+    default:
+      return "BOGUS addsi %2+%1 to %0";
+    }
+}
+ [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no,yes")])
+
+; FIXME: Check what's best: having the three-operand ACR alternative
+; before or after the corresponding-operand2 alternative.  Check for
+; *all* insns.  FIXME: constant constraint letter for -128..127.
+(define_insn "*addsi3_v32"
+  [(set (match_operand:SI 0 "register_operand"  "=r,!a,r,!a, r,r,!a,r,!a,r,r,r,!a")
+	(plus:SI
+	 (match_operand:SI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r, 0,0,0,r")
+	 (match_operand:SI 2 "general_operand"  "r, r, Q>,Q>,J,N,NJ,L,L, P,n,g,g")))]
+  "TARGET_V32"
+  "@
+   add.d %2,%0
+   addi %2.b,%1,%0
+   add.d %2,%0
+   addo.d %2,%1,%0
+   addq %2,%0
+   subq %n2,%0
+   addoq %2,%1,%0
+   adds.w %2,%0
+   addo %2,%1,%0
+   addu.w %2,%0
+   add.d %2,%0
+   add%u2 %2,%0
+   addo.%Z2 %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no,no,no,no,no")
+   (set_attr "cc" "*,none,*,none,*,*,none,*,none,*,*,*,none")])
+
+(define_insn "*addhi3_non_v32"
+  [(set (match_operand:HI 0 "register_operand"		"=r,r, r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0, 0,0,0,r")
+		 (match_operand:HI 2 "general_operand"   "r,Q>,J,N,g,!To")))]
+  "!TARGET_V32"
+  "@
+   add.w %2,%0
+   add.w %2,%0
+   addq %2,%0
+   subq %n2,%0
+   add.w %2,%0
+   add.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")])
+
+(define_insn "*addhi3_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r, !a,r,!a, r,r,!a,r,!a")
+	(plus:HI
+	 (match_operand:HI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r")
+	 (match_operand:HI 2 "general_operand"  "r, r, Q>,Q>,J,N,NJ,g,g")))]
+  "TARGET_V32"
+  "@
+   add.w %2,%0
+   addi %2.b,%1,%0
+   add.w %2,%0
+   addo.w %2,%1,%0
+   addq %2,%0
+   subq %n2,%0
+   addoq %2,%1,%0
+   add.w %2,%0
+   addo.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no")
+   (set_attr "cc" "*,none,*,none,clobber,clobber,none,*,none")])
+
+(define_insn "*addqi3_non_v32"
+  [(set (match_operand:QI 0 "register_operand"		"=r,r, r,r,r,r,r")
+	(plus:QI (match_operand:QI 1 "register_operand" "%0,0, 0,0,0,0,r")
+		 (match_operand:QI 2 "general_operand"	 "r,Q>,J,N,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   add.b %2,%0
+   add.b %2,%0
+   addq %2,%0
+   subq %n2,%0
+   subQ -%b2,%0
+   add.b %2,%0
+   add.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,clobber,normal,normal")])
+
+(define_insn "*addqi3_v32"
+  [(set (match_operand:QI 0 "register_operand"  "=r,!a,r,!a, r,r,!a,r,r,!a")
+	(plus:QI
+	 (match_operand:QI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,0,r")
+	 (match_operand:QI 2 "general_operand"   "r,r, Q>,Q>,J,N,NJ,O,g,g")))]
+  "TARGET_V32"
+  "@
+   add.b %2,%0
+   addi %2.b,%1,%0
+   add.b %2,%0
+   addo.b %2,%1,%0
+   addq %2,%0
+   subq %n2,%0
+   addoq %2,%1,%0
+   subQ -%b2,%0
+   add.b %2,%0
+   addo.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,yes,no,no")
+   (set_attr "cc" "*,none,*,none,clobber,clobber,none,clobber,*,none")])
+
+;; Subtract.
+;;
+;; Note that because of insn canonicalization these will *seldom* but
+;; rarely be used with a known constant as an operand.
+
+;; Note that for the 'P' constraint, the high part can be -1 or 0.  We
+;; output the insn through the 'D' output modifier as "subs.w" and "subq",
+;; respectively.
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(minus:DI (match_operand:DI 1 "register_operand")
+		  (match_operand:DI 2 "general_operand")))]
+  ""
+{
+  if (TARGET_V32 && MEM_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+(define_insn "*subdi3_non_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0,r")
+		  (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))]
+  "!TARGET_V32"
+  "@
+   subq %2,%M0\;ax\;subq 0,%H0
+   addq %n2,%M0\;ax\;addq 0,%H0
+   sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0
+   sub.d %M2,%M0\;ax\;sub.d %H2,%H0
+   sub.d %M2,%M1,%M0\;ax\;sub.d %H2,%H1,%H0")
+
+(define_insn "*subdi3_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0")
+		  (match_operand:DI 2 "nonmemory_operand" "J,N,P,r")))]
+  "TARGET_V32"
+  "@
+   subq %2,%M0\;ax\;subq 0,%H0
+   addq %n2,%M0\;ax\;addq 0,%H0
+   sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0
+   sub.d %M2,%M0\;ax\;sub.d %H2,%H0")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:BWD 0 "register_operand")
+	(minus:BWD
+	 (match_operand:BWD 1 "register_operand")
+	 (match_operand:BWD 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*subsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r, r,r,r,r,r,r")
+	(minus:SI
+	 (match_operand:SI 1 "register_operand" "0,0, 0,0,0,0,0,r")
+	 (match_operand:SI 2 "general_operand"	"r,Q>,J,N,P,n,g,!To")))]
+  "!TARGET_V32"
+
+;; This does not do the optimal: "addu.w 65535,r0" when %2 is negative.
+;; But then again, %2 should not be negative.
+
+  "@
+   sub.d %2,%0
+   sub.d %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub%e2.%z2 %2,%0
+   sub.d %2,%0
+   sub.d %2,%0
+   sub.d %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no")])
+
+(define_insn "*subsi3_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r")
+       (minus:SI
+        (match_operand:SI 1 "register_operand" "0,0,0,0,0,0,0")
+        (match_operand:SI 2 "general_operand" "r,Q>,J,N,P,n,g")))]
+  "TARGET_V32"
+  "@
+   sub.d %2,%0
+   sub.d %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub%e2.%z2 %2,%0
+   sub.d %2,%0
+   sub.d %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no,no")])
+
+(define_insn "*sub<mode>3_nonv32"
+  [(set (match_operand:BW 0 "register_operand"		"=r,r, r,r,r,r")
+	(minus:BW (match_operand:BW 1 "register_operand" "0,0, 0,0,0,r")
+		  (match_operand:BW 2 "general_operand"  "r,Q>,J,N,g,!To")))]
+  "!TARGET_V32"
+  "@
+   sub<m> %2,%0
+   sub<m> %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub<m> %2,%0
+   sub<m> %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")])
+
+(define_insn "*sub<mode>3_v32"
+  [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r")
+	(minus:BW (match_operand:BW 1 "register_operand" "0,0,0,0,0")
+		  (match_operand:BW 2 "general_operand" "r,Q>,J,N,g")))]
+  "TARGET_V32"
+  "@
+   sub<m> %2,%0
+   sub<m> %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub<m> %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,normal")])
+
+;; CRIS has some add/sub-with-sign/zero-extend instructions.
+;;  Although these perform sign/zero-extension to SImode, they are
+;; equally applicable for the HImode case.
+;; FIXME: Check; GCC should handle the widening.
+;;  Note that these must be located after the normal add/sub patterns,
+;; so not to get constants into any less specific operands.
+;;
+;; Extend with add/sub and side-effect.
+;;
+;; ADDS/SUBS/ADDU/SUBU and BOUND, which needs a check for zero_extend
+;;
+;; adds/subs/addu/subu bound [rx=ry+rz.S]
+
+;; QImode to HImode
+;; FIXME: GCC should widen.
+
+(define_insn "*extopqihi_side_biap"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI
+	 6 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0")
+	  (match_operator:HI
+	   7 "cris_extend_operator"
+	   [(mem:QI (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			      (match_operand:SI 3 "const_int_operand" "n,n"))
+		     (match_operand:SI 4 "register_operand" "r,r")))])]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6%e7.%m7 [%5=%4+%2%T3],%0")
+
+(define_insn "*extop<mode>si_side_biap"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 6 "cris_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operator:SI
+	   7 "cris_extend_operator"
+	   [(mem:BW (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			      (match_operand:SI 3 "const_int_operand" "n,n"))
+		     (match_operand:SI 4 "register_operand" "r,r")))])]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6%e7<m> [%5=%4+%2%T3],%0")
+
+
+;; [rx=ry+i]
+
+;; QImode to HImode
+
+(define_insn "*extopqihi_side"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:HI
+	 5 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0,0,0,0")
+	  (match_operator:HI
+	   6 "cris_extend_operator"
+	   [(mem:QI
+	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")
+		      ))])]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || satisfies_constraint_N (operands[3])
+	  || satisfies_constraint_J (operands[3])))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5%E6.%m6 [%4=%3%S2],%0";
+  return "%x5%E6.%m6 [%4=%2%S3],%0";
+})
+
+(define_insn "*extop<mode>si_side"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:SI
+	 5 "cris_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0,0,0,0")
+	  (match_operator:SI
+	   6 "cris_extend_operator"
+	   [(mem:BW
+	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")
+		      ))])]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || satisfies_constraint_N (operands[3])
+	  || satisfies_constraint_J (operands[3])))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5%E6<m> [%4=%3%S2],%0";
+  return "%x5%E6<m> [%4=%2%S3],%0";
+})
+
+
+;; As with op.S we may have to add special pattern to match commuted
+;; operands to adds/addu and bound
+;;
+;; adds/addu/bound [rx=ry+rz.S]
+
+;; QImode to HImode
+;; FIXME: GCC should widen.
+
+(define_insn "*extopqihi_swap_side_biap"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI
+	 (match_operator:HI
+	  6 "cris_extend_operator"
+	  [(mem:QI (plus:SI
+		    (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			     (match_operand:SI 3 "const_int_operand" "n,n"))
+		    (match_operand:SI 4 "register_operand" "r,r")))])
+	 (match_operand:HI 1 "register_operand" "0,0")))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   add%e6.b [%5=%4+%2%T3],%0")
+
+(define_insn "*extop<mode>si_swap_side_biap"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 7 "cris_plus_or_bound_operator"
+	 [(match_operator:SI
+	   6 "cris_extend_operator"
+	   [(mem:BW (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			      (match_operand:SI 3 "const_int_operand" "n,n"))
+		     (match_operand:SI 4 "register_operand" "r,r")))])
+	  (match_operand:SI 1 "register_operand" "0,0")]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "(GET_CODE (operands[7]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x7%E6<m> [%5=%4+%2%T3],%0")
+
+;; [rx=ry+i]
+;; FIXME: GCC should widen.
+
+;; QImode to HImode
+
+(define_insn "*extopqihi_swap_side"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(plus:HI
+	 (match_operator:HI
+	  5 "cris_extend_operator"
+	  [(mem:QI (plus:SI
+		    (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])
+	 (match_operand:HI 1 "register_operand" "0,0,0,0,0")))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || satisfies_constraint_N (operands[3])
+	  || satisfies_constraint_J (operands[3])))
+    return "#";
+  if (which_alternative == 4)
+    return "add%e5.b [%4=%3%S2],%0";
+  return "add%e5.b [%4=%2%S3],%0";
+})
+
+(define_insn "*extop<mode>si_swap_side"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:SI
+	 6 "cris_plus_or_bound_operator"
+	 [(match_operator:SI
+	   5 "cris_extend_operator"
+	   [(mem:BW (plus:SI
+		     (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		     (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])
+	  (match_operand:SI 1 "register_operand" "0,0,0,0,0")]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[5]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || satisfies_constraint_N (operands[3])
+	  || satisfies_constraint_J (operands[3])))
+    return "#";
+  if (which_alternative == 4)
+    return \"%x6%E5.%m5 [%4=%3%S2],%0\";
+  return "%x6%E5<m> [%4=%2%S3],%0";
+})
+
+;; Extend versions (zero/sign) of normal add/sub (no side-effects).
+
+;; QImode to HImode
+;; FIXME: GCC should widen.
+
+(define_insn "*extopqihi_non_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(match_operator:HI
+	 3 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0,0,r")
+	  (match_operator:HI
+	   4 "cris_extend_operator"
+	   [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])]))]
+  "!TARGET_V32 && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)"
+  "@
+   %x3%E4.%m4 %2,%0
+   %x3%E4.%m4 %2,%0
+   %x3%E4.%m4 %2,%0
+   %x3%E4.%m4 %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*extopqihi_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI
+	 3 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0")
+	  (match_operator:HI
+	   4 "cris_extend_operator"
+	   [(match_operand:QI 2 "nonimmediate_operand" "r,m")])]))]
+  "TARGET_V32"
+  "%x3%e4.%m4 %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+;; QImode to SImode
+
+(define_insn "*extop<mode>si_non_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(match_operator:SI
+	 3 "cris_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0,0,r")
+	  (match_operator:SI
+	   4 "cris_extend_operator"
+	   [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")])]))]
+  "!TARGET_V32
+   && (GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND)
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)"
+  "@
+   %x3%E4<m> %2,%0
+   %x3%E4<m> %2,%0
+   %x3%E4<m> %2,%0
+   %x3%E4<m> %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")])
+
+(define_insn "*extop<mode>si_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 3 "cris_additive_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operator:SI
+	   4 "cris_extend_operator"
+	   [(match_operand:BW 2 "nonimmediate_operand" "r,m")])]))]
+  "TARGET_V32"
+  "%x3%e4.%m4 %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; As with the side-effect patterns, may have to have swapped operands for add.
+;; For commutative operands, these are the canonical forms.
+
+;; QImode to HImode
+
+(define_insn "*addxqihi_swap_non_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(plus:HI
+	 (match_operator:HI
+	  3 "cris_extend_operator"
+	  [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])
+	 (match_operand:HI 1 "register_operand" "0,0,0,r")))]
+  "!TARGET_V32 && operands[1] != frame_pointer_rtx"
+  "@
+   add%e3.b %2,%0
+   add%e3.b %2,%0
+   add%e3.b %2,%0
+   add%e3.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")
+   (set_attr "cc" "clobber")])
+
+;; A case for v32, to catch the "addo" insn in addition to "adds".  We
+;; only care to match the canonical form; there should be no other.
+
+(define_insn "*addsbw_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,!a")
+	(plus:HI
+	 (sign_extend:HI
+	  (match_operand:QI 2 "nonimmediate_operand" "r,m,m"))
+	 (match_operand:HI 1 "register_operand" "0,0,r")))]
+  "TARGET_V32"
+  "@
+   adds.b %2,%0
+   adds.b %2,%0
+   addo.b %2,%1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber,clobber,none")])
+
+(define_insn "*addubw_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI
+	 (zero_extend:HI
+	  (match_operand:QI 2 "nonimmediate_operand" "r,m"))
+	 (match_operand:HI 1 "register_operand" "0,0")))]
+  "TARGET_V32"
+  "addu.b %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*extop<mode>si_swap_non_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(match_operator:SI
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operator:SI
+	   3 "cris_extend_operator"
+	   [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")])
+	  (match_operand:SI 1 "register_operand" "0,0,0,r")]))]
+  "!TARGET_V32
+   && (GET_CODE (operands[4]) != UMIN || GET_CODE (operands[3]) == ZERO_EXTEND)
+   && operands[1] != frame_pointer_rtx"
+  "@
+   %x4%E3<m> %2,%0
+   %x4%E3<m> %2,%0
+   %x4%E3<m> %2,%0
+   %x4%E3<m> %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")])
+
+(define_insn "*adds<mode>_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,!a")
+	(plus:SI
+	 (sign_extend:SI
+	  (match_operand:BW 2 "nonimmediate_operand" "r,m,m"))
+	 (match_operand:SI 1 "register_operand" "0,0,r")))]
+  "TARGET_V32"
+  "@
+   adds<m> %2,%0
+   adds<m> %2,%0
+   addo<m> %2,%1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "*,*,none")])
+
+(define_insn "*addu<mode>_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (plus:SI
+        (zero_extend:SI
+          (match_operand:BW 2 "nonimmediate_operand" "r,m"))
+        (match_operand:SI 1 "register_operand" "0,0")))]
+  "TARGET_V32 && operands[1] != frame_pointer_rtx"
+  "addu<m> %2,%0"
+  [(set_attr "slottable" "yes")])
+
+(define_insn "*bound<mode>_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (umin:SI
+        (zero_extend:SI
+         (match_operand:BW 2 "register_operand" "r"))
+        (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_V32 && operands[1] != frame_pointer_rtx"
+  "bound<m> %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; This is the special case when we use what corresponds to the
+;; instruction above in "casesi".  Do *not* change it to use the generic
+;; pattern and "REG 15" as pc; I did that and it led to madness and
+;; maintenance problems: Instead of (as imagined) recognizing and removing
+;; or replacing this pattern with something simpler, other variant
+;; patterns were recognized or combined, including some prefix variants
+;; where the value in pc is not that of the next instruction (which means
+;; this instruction actually *is* special and *should* be marked as such).
+;; When switching from the "generic pattern match" approach to this simpler
+;; approach, there were insignificant differences in gcc, ipps and
+;; product code, somehow due to scratching reload behind the ear or
+;; something.  Testcase "gcc" looked .01% slower and 4 bytes bigger;
+;; product code became .001% smaller but "looked better".  The testcase
+;; "ipps" was just different at register allocation).
+;;
+;; Assumptions in the jump optimizer forces us to use IF_THEN_ELSE in this
+;; pattern with the default-label as the else, with the "if" being
+;; index-is-less-than the max number of cases plus one.  The default-label
+;; is attached to the end of the case-table at time of output.
+
+(define_insn "*casesi_adds_w"
+  [(set (pc)
+	(if_then_else
+	 (ltu (match_operand:SI 0 "register_operand" "r")
+	      (match_operand:SI 1 "const_int_operand" "n"))
+	 (plus:SI (sign_extend:SI
+		   (mem:HI
+		    (plus:SI (mult:SI (match_dup 0) (const_int 2))
+			     (pc))))
+		  (pc))
+	 (label_ref (match_operand 2 "" ""))))
+   (use (label_ref (match_operand 3 "" "")))]
+  "!TARGET_V32 && operands[0] != frame_pointer_rtx"
+  "adds.w [$pc+%0.w],$pc"
+  [(set_attr "cc" "clobber")])
+
+;; For V32, we just have a jump, but we need to mark the table as used,
+;; and the jump insn must have the if_then_else form expected by core
+;; GCC.  Since we don't want to prolong the lifetime of the original
+;; index value, we compare against "unspec 0".  It's a pity we have to
+;; jump through to get the default label in place and to keep the jump
+;; table around.  FIXME: Look into it some time.
+
+(define_insn "*casesi_jump_v32"
+  [(set (pc)
+	(if_then_else
+	 (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI)
+	      (match_operand:SI 0 "const_int_operand" "n"))
+	 (match_operand:SI 1 "register_operand" "r")
+	 (label_ref (match_operand 2 "" ""))))
+   (use (label_ref (match_operand 3 "" "")))]
+  "TARGET_V32"
+  "jump %1%#"
+  [(set_attr "cc" "clobber")
+   (set_attr "slottable" "has_slot")])
+
+;; Multiply instructions.
+
+;; Sometimes powers of 2 (which are normally canonicalized to a
+;; left-shift) appear here, as a result of address reloading.
+;; As a special, for values 3 and 5, we can match with an addi, so add those.
+;;
+;; FIXME: This may be unnecessary now.
+;; Explicitly named for convenience of having a gen_... function.
+
+(define_insn "addi_mul"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	 (match_operand:SI 1 "register_operand" "%0")
+	 (match_operand:SI 2 "const_int_operand" "n")))]
+  "operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && CONST_INT_P (operands[2])
+   && (INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 4 || INTVAL (operands[2]) == 3
+       || INTVAL (operands[2]) == 5)"
+{
+  if (INTVAL (operands[2]) == 2)
+    return "lslq 1,%0";
+  else if (INTVAL (operands[2]) == 4)
+    return "lslq 2,%0";
+  else if (INTVAL (operands[2]) == 3)
+    return "addi %0.w,%0";
+  else if (INTVAL (operands[2]) == 5)
+    return "addi %0.d,%0";
+  return "BAD: adr_mulsi: %0=%1*%2";
+}
+[(set_attr "slottable" "yes")
+ ;; No flags are changed if this insn is "addi", but it does not seem
+ ;; worth the trouble to distinguish that to the lslq cases.
+ (set_attr "cc" "clobber")])
+
+;; The addi insn as it is normally used.
+
+;; Make the ACR alternative taste bad enough to not choose it as a
+;; preference to avoid spilling problems (unwind-dw2-fde.c at build).
+;; FIXME: Revisit for new register allocator.
+
+(define_insn "*addi"
+  [(set (match_operand:SI 0 "register_operand" "=r,!a")
+	(plus:SI
+	 (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+		  (match_operand:SI 3 "const_int_operand" "n,n"))
+	 (match_operand:SI 1 "register_operand" "0,r")))]
+  "operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && CONST_INT_P (operands[3])
+   && (INTVAL (operands[3]) == 1
+       || INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)"
+  "@
+   addi %2%T3,%0
+   addi %2%T3,%1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+;; The mstep instruction.  Probably not useful by itself; it's to
+;; non-linear wrt. the other insns.  We used to expand to it, so at least
+;; it's correct.
+
+(define_insn "mstep_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (lt:SI (cc0) (const_int 0))
+	 (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			     (const_int 1))
+		  (match_operand:SI 2 "register_operand" "r"))
+	 (ashift:SI (match_operand:SI 3 "register_operand" "0")
+		    (const_int 1))))]
+  "!TARGET_V32"
+  "mstep %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; When illegitimate addresses are legitimized, sometimes gcc forgets
+;; to canonicalize the multiplications.
+;;
+;; FIXME: Check gcc > 2.7.2, remove and possibly fix in gcc.
+
+(define_insn "mstep_mul"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (lt:SI (cc0) (const_int 0))
+	 (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			   (const_int 2))
+		  (match_operand:SI 2 "register_operand" "r"))
+	 (mult:SI (match_operand:SI 3 "register_operand" "0")
+		  (const_int 2))))]
+  "!TARGET_V32
+   && operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[2] != frame_pointer_rtx
+   && operands[3] != frame_pointer_rtx"
+  "mstep %2,%0"
+  [(set_attr "slottable" "yes")])
+
+(define_insn "<u>mul<s><mode>3"
+  [(set (match_operand:WD 0 "register_operand" "=r")
+	(mult:WD
+	 (szext:WD (match_operand:<S> 1 "register_operand" "%0"))
+	 (szext:WD (match_operand:<S> 2 "register_operand" "r"))))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "%!mul<su><mm> %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (match_test "TARGET_MUL_BUG")
+		      (const_string "no")
+		      (const_string "yes")))
+   ;; For umuls.[bwd] it's just N unusable here, but let's be safe.
+   ;; For muls.b, this really extends to SImode, so cc should be
+   ;; considered clobbered.
+   ;; For muls.w, it's just N unusable here, but let's be safe.
+   (set_attr "cc" "clobber")])
+
+;; Note that gcc does not make use of such a thing as umulqisi3.  It gets
+;; confused and will erroneously use it instead of umulhisi3, failing (at
+;; least) gcc.c-torture/execute/arith-rand.c at all optimization levels.
+;; Inspection of optab code shows that there must be only one widening
+;; multiplication per mode widened to.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "%!muls.d %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (match_test "TARGET_MUL_BUG")
+		      (const_string "no")
+		      (const_string "yes")))
+   ;; Just N unusable here, but let's be safe.
+   (set_attr "cc" "clobber")])
+
+;; A few multiply variations.
+
+;; When needed, we can get the high 32 bits from the overflow
+;; register.  We don't care to split and optimize these.
+;;
+;; Note that cc0 is still valid after the move-from-overflow-register
+;; insn; no special precaution need to be taken in cris_notice_update_cc.
+
+(define_insn "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	 (szext:DI (match_operand:SI 1 "register_operand" "%0"))
+	 (szext:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "%!mul<su>.d %2,%M0\;move $mof,%H0")
+
+;; These two patterns may be expressible by other means, perhaps by making
+;; [u]?mulsidi3 a define_expand.
+
+;; Due to register allocation braindamage, the clobber 1,2 alternatives
+;; cause a move into the clobbered register *before* the insn, then
+;; after the insn, mof is moved too, rather than the clobber assigned
+;; the last mof target.  This became apparent when making MOF and SRP
+;; visible registers, with the necessary tweak to smulsi3_highpart.
+;; Because these patterns are used in division by constants, that damage
+;; is visible (ipps regression tests).  Therefore the last two
+;; alternatives, "helping" reload to avoid an unnecessary move, but
+;; punished by force of one "?".  Check code from "int d (int a) {return
+;; a / 1000;}" and unsigned.  FIXME: Comment above was for 3.2, revisit.
+
+(define_insn "<su>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=h,h,?r,?r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (szext:DI (match_operand:SI 1 "register_operand" "r,r,0,r"))
+	   (szext:DI (match_operand:SI 2 "register_operand" "r,r,r,0")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=1,2,h,h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "@
+   %!mul<su>.d %2,%1
+   %!mul<su>.d %1,%2
+   %!mul<su>.d %2,%1\;move $mof,%0
+   %!mul<su>.d %1,%2\;move $mof,%0"
+  [(set_attr "slottable" "yes,yes,no,no")
+   (set_attr "cc" "clobber")])
+
+;; Divide and modulus instructions.  CRIS only has a step instruction.
+
+(define_insn "dstep_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (geu:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			    (const_int 1))
+	      (match_operand:SI 2 "register_operand" "r"))
+	 (minus:SI (ashift:SI (match_operand:SI 3 "register_operand" "0")
+			(const_int 1))
+		   (match_operand:SI 4 "register_operand" "2"))
+	 (ashift:SI (match_operand:SI 5 "register_operand" "0")
+			(const_int 1))))]
+  ""
+  "dstep %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Here's a variant with mult instead of ashift.
+;;
+;; FIXME: This should be investigated.  Which one matches through combination?
+
+(define_insn "dstep_mul"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (geu:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+	      (match_operand:SI 2 "register_operand" "r"))
+	 (minus:SI (mult:SI (match_operand:SI 3 "register_operand" "0")
+			    (const_int 2))
+		   (match_operand:SI 4 "register_operand" "2"))
+	 (mult:SI (match_operand:SI 5 "register_operand" "0")
+		  (const_int 2))))]
+  "operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[2] != frame_pointer_rtx
+   && operands[3] != frame_pointer_rtx"
+  "dstep %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Logical operators.
+
+;; Bitwise "and".
+
+;; There is no use in defining "anddi3", because gcc can expand this by
+;; itself, and make reasonable code without interference.
+
+;; If the first operand is memory or a register and is the same as the
+;; second operand, and the third operand is -256 or -65536, we can use
+;; CLEAR instead.  Or, if the first operand is a register, and the third
+;; operand is 255 or 65535, we can zero_extend.
+;; GCC isn't smart enough to recognize these cases (yet), and they seem
+;; to be common enough to be worthwhile.
+;; FIXME: This should be made obsolete.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand"	   "")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		(match_operand:SI 2 "general_operand"	 "")))]
+  ""
+{
+  if (! (CONST_INT_P (operands[2])
+	 && (((INTVAL (operands[2]) == -256
+	       || INTVAL (operands[2]) == -65536)
+	      && rtx_equal_p (operands[1], operands[0]))
+	     || ((INTVAL (operands[2]) == 255
+		  || INTVAL (operands[2]) == 65535)
+		 && REG_P (operands[0])))))
+    {
+      /* Make intermediate steps if operand0 is not a register or
+	 operand1 is not a register, and hope that the reload pass will
+	 make something useful out of it.  Note that the operands are
+	 *not* canonicalized.  For the moment, I chicken out on this,
+	 because all or most ports do not describe 'and' with
+	 canonicalized operands, and I seem to remember magic in reload,
+	 checking that operand1 has constraint '%0', in which case
+	 operand0 and operand1 must have similar predicates.
+	 FIXME: Investigate.  */
+      rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+      rtx reg1 = operands[1];
+
+      if (! REG_P (reg1))
+	{
+	  emit_move_insn (reg0, reg1);
+	  reg1 = reg0;
+	}
+
+      emit_insn (gen_rtx_SET (SImode, reg0,
+			  gen_rtx_AND (SImode, reg1, operands[2])));
+
+      /* Make sure we get the right *final* destination.  */
+      if (! REG_P (operands[0]))
+	emit_move_insn (operands[0], reg0);
+
+      DONE;
+    }
+})
+
+;; Some special cases of andsi3.
+
+(define_insn "*andsi_movu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%r,Q,To")
+		(match_operand:SI 2 "const_int_operand" "n,n,n")))]
+  "(INTVAL (operands[2]) == 255 || INTVAL (operands[2]) == 65535)
+   && !side_effects_p (operands[1])"
+  "movu.%z2 %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*andsi_clear"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,Q,Q,To,To")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0,0,0")
+		(match_operand:SI 2 "const_int_operand" "P,n,P,n,P,n")))]
+  "(INTVAL (operands[2]) == -65536 || INTVAL (operands[2]) == -256)
+   && !side_effects_p (operands[0])"
+  "@
+   cLear.b %0
+   cLear.w %0
+   cLear.b %0
+   cLear.w %0
+   cLear.b %0
+   cLear.w %0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "none")])
+
+;; This is a catch-all pattern, taking care of everything that was not
+;; matched in the insns above.
+;;
+;; Sidenote: the tightening from "nonimmediate_operand" to
+;; "register_operand" for operand 1 actually increased the register
+;; pressure (worse code).  That will hopefully change with an
+;; improved reload pass.
+
+(define_insn "*expanded_andsi_non_v32"
+  [(set (match_operand:SI 0 "register_operand"	       "=r,r,r, r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,r")
+		(match_operand:SI 2 "general_operand"   "I,r,Q>,g,!To")))]
+  "!TARGET_V32"
+  "@
+   andq %2,%0
+   and.d %2,%0
+   and.d %2,%0
+   and.d %2,%0
+   and.d %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no")])
+
+(define_insn "*expanded_andsi_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0")
+		(match_operand:SI 2 "general_operand" "I,r,Q>,g")))]
+  "TARGET_V32"
+  "@
+   andq %2,%0
+   and.d %2,%0
+   and.d %2,%0
+   and.d %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no")
+   (set_attr "cc" "noov32")])
+
+;; For both QI and HI we may use the quick patterns.  This results in
+;; useless condition codes, but that is used rarely enough for it to
+;; normally be a win (could check ahead for use of cc0, but seems to be
+;; more pain than win).
+
+;; FIXME: See note for andsi3
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		(match_operand:HI 2 "general_operand"  "")))]
+  ""
+{
+  if (! (CONST_INT_P (operands[2])
+	 && (((INTVAL (operands[2]) == -256
+	       || INTVAL (operands[2]) == 65280)
+	      && rtx_equal_p (operands[1], operands[0]))
+	     || (INTVAL (operands[2]) == 255
+		 && REG_P (operands[0])))))
+    {
+      /* See comment for andsi3.  */
+      rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (HImode);
+      rtx reg1 = operands[1];
+
+      if (! REG_P (reg1))
+	{
+	  emit_move_insn (reg0, reg1);
+	  reg1 = reg0;
+	}
+
+      emit_insn (gen_rtx_SET (HImode, reg0,
+			  gen_rtx_AND (HImode, reg1, operands[2])));
+
+      /* Make sure we get the right destination.  */
+      if (! REG_P (operands[0]))
+	emit_move_insn (operands[0], reg0);
+
+      DONE;
+    }
+})
+
+;; Some fast andhi3 special cases.
+
+(define_insn "*andhi_movu"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "r,Q,To")
+		(const_int 255)))]
+  "!side_effects_p (operands[1])"
+  "mOvu.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*andhi_clear"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,Q,To")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "0,0,0")
+		(const_int -256)))]
+  "!side_effects_p (operands[0])"
+  "cLear.b %0"
+  [(set_attr "slottable" "yes,yes,no")
+   (set_attr "cc" "none")])
+
+;; Catch-all andhi3 pattern.
+
+(define_insn "*expanded_andhi_non_v32"
+  [(set (match_operand:HI 0 "register_operand"	       "=r,r,r, r,r,r,r")
+	(and:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r")
+		(match_operand:HI 2 "general_operand"   "I,r,Q>,L,O,g,!To")))]
+
+;; Sidenote: the tightening from "general_operand" to
+;; "register_operand" for operand 1 actually increased the register
+;; pressure (worse code).  That will hopefully change with an
+;; improved reload pass.
+
+  "!TARGET_V32"
+  "@
+   andq %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   anDq %b2,%0
+   and.w %2,%0
+   and.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*expanded_andhi_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r")
+       (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
+               (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))]
+  "TARGET_V32"
+  "@
+   andq %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   anDq %b2,%0
+   and.w %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")])
+
+;; A strict_low_part pattern.
+
+;; Note the use of (match_dup 0) for the first operand of the operation
+;; here.  Reload can't handle an operand pair where one is read-write
+;; and must match a read, like in:
+;; (insn 80 79 81 4
+;;  (set (strict_low_part
+;;        (subreg:QI (reg/v:SI 0 r0 [orig:36 data ] [36]) 0))
+;;       (and:QI
+;;        (subreg:QI (reg:SI 15 acr [orig:27 D.7531 ] [27]) 0)
+;;        (const_int -64 [0xf..fc0]))) x.c:126 147 {*andqi_lowpart_v32}
+;;  (nil))
+;; In theory, it could reload this as a movstrictqi of the register
+;; operand at the and:QI to the destination register and change the
+;; and:QI operand to the same as the read-write output operand and the
+;; result would be recognized, but it doesn't recognize that's a valid
+;; reload for a strict_low_part-destination; it just sees a "+" at the
+;; destination constraints.  Better than adding complexity to reload is
+;; to follow the lead of m68k (see comment that begins with "These insns
+;; must use MATCH_DUP") since prehistoric times and make it just a
+;; match_dup.  FIXME: a sanity-check in gen* to refuse an insn with
+;; input-constraints matching input-output-constraints, e.g. "+r" <- "0".
+
+(define_insn "*andhi_lowpart_non_v32"
+  [(set (strict_low_part
+	 (match_operand:HI 0 "register_operand"	       "+r,r,r"))
+	(and:HI (match_dup 0)
+		(match_operand:HI 1 "general_operand"   "r,Q>,g")))]
+  "!TARGET_V32"
+  "@
+   and.w %1,%0
+   and.w %1,%0
+   and.w %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*andhi_lowpart_v32"
+  [(set (strict_low_part
+	 (match_operand:HI 0 "register_operand" "+r,r,r"))
+	(and:HI (match_dup 0)
+		(match_operand:HI 1 "general_operand" "r,Q>,g")))]
+  "TARGET_V32"
+  "@
+   and.w %1,%0
+   and.w %1,%0
+   and.w %1,%0"
+  [(set_attr "slottable" "yes,yes,no")
+   (set_attr "cc" "noov32")])
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "register_operand")
+	(and:QI (match_operand:QI 1 "register_operand")
+               (match_operand:QI 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*andqi3_non_v32"
+  [(set (match_operand:QI 0 "register_operand"	       "=r,r,r, r,r,r")
+	(and:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r")
+		(match_operand:QI 2 "general_operand"   "I,r,Q>,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   andq %2,%0
+   and.b %2,%0
+   and.b %2,%0
+   andQ %b2,%0
+   and.b %2,%0
+   and.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")])
+
+(define_insn "*andqi3_v32"
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r")
+	(and:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))]
+  "TARGET_V32"
+  "@
+   andq %2,%0
+   and.b %2,%0
+   and.b %2,%0
+   andQ %b2,%0
+   and.b %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")])
+
+(define_insn "*andqi_lowpart_non_v32"
+  [(set (strict_low_part
+	 (match_operand:QI 0 "register_operand"	       "+r,r,r"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand"   "r,Q>,g")))]
+  "!TARGET_V32"
+  "@
+   and.b %1,%0
+   and.b %1,%0
+   and.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*andqi_lowpart_v32"
+  [(set (strict_low_part
+	 (match_operand:QI 0 "register_operand" "+r,r,r"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "r,Q>,g")))]
+  "TARGET_V32"
+  "@
+   and.b %1,%0
+   and.b %1,%0
+   and.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")
+   (set_attr "cc" "noov32")])
+
+;; Bitwise or.
+
+;; Same comment as anddi3 applies here - no need for such a pattern.
+
+;; It seems there's no need to jump through hoops to get good code such as
+;; with andsi3.
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:BWD 0 "register_operand")
+	(ior:BWD (match_operand:BWD 1 "register_operand")
+		 (match_operand:BWD 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*iorsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand"	       "=r,r,r, r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,0,r")
+		(match_operand:SI 2 "general_operand"  "I, r,Q>,n,g,!To")))]
+  "!TARGET_V32"
+  "@
+   orq %2,%0
+   or.d %2,%0
+   or.d %2,%0
+   oR.%s2 %2,%0
+   or.d %2,%0
+   or.d %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no,no")
+   (set_attr "cc" "normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*iorsi3_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:SI 2 "general_operand" "I,r,Q>,n,g")))]
+  "TARGET_V32"
+  "@
+   orq %2,%0
+   or.d %2,%0
+   or.d %2,%0
+   oR.%s2 %2,%0
+   or.d %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no")
+   (set_attr "cc" "noov32,noov32,noov32,clobber,noov32")])
+
+(define_insn "*iorhi3_non_v32"
+  [(set (match_operand:HI 0 "register_operand"	       "=r,r,r, r,r,r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r")
+		(match_operand:HI 2 "general_operand"   "I,r,Q>,L,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   orq %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   oRq %b2,%0
+   or.w %2,%0
+   or.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*iorhi3_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
+		(match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))]
+  "TARGET_V32"
+  "@
+   orq %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   oRq %b2,%0
+   or.w %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")])
+
+(define_insn "*iorqi3_non_v32"
+  [(set (match_operand:QI 0 "register_operand"	       "=r,r,r, r,r,r")
+	(ior:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r")
+		(match_operand:QI 2 "general_operand"   "I,r,Q>,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   orq %2,%0
+   or.b %2,%0
+   or.b %2,%0
+   orQ %b2,%0
+   or.b %2,%0
+   or.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")])
+
+(define_insn "*iorqi3_v32"
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r")
+	(ior:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))]
+  "TARGET_V32"
+  "@
+   orq %2,%0
+   or.b %2,%0
+   or.b %2,%0
+   orQ %b2,%0
+   or.b %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")])
+
+;; Exclusive-or
+
+;; See comment about "anddi3" for xordi3 - no need for such a pattern.
+;; FIXME: Do we really need the shorter variants?
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:BW 0 "register_operand" "=r")
+	(xor:BW (match_operand:BW 1 "register_operand" "%0")
+		(match_operand:BW 2 "register_operand" "r")))]
+  ""
+  "xor %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+;; Negation insns.
+
+;; Questionable use, here mostly as a (slightly usable) define_expand
+;; example.
+
+(define_expand "negsf2"
+  [(set (match_dup 2)
+        (match_dup 3))
+   (parallel [(set (match_operand:SF 0 "register_operand" "=r")
+                   (neg:SF (match_operand:SF 1
+                            "register_operand" "0")))
+              (use (match_dup 2))])]
+  ""
+{
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = GEN_INT (1 << 31);
+})
+
+(define_insn "*expanded_negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0")))
+   (use (match_operand:SI 2 "register_operand" "r"))]
+  ""
+  "xor %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; No "negdi2" although we could make one up that may be faster than
+;; the one in libgcc.
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:BWD 0 "register_operand" "=r")
+	(neg:BWD (match_operand:BWD 1 "register_operand" "r")))]
+  ""
+  "neg<m> %1,%0"
+  [(set_attr "slottable" "yes")])
+
+;; One-complements.
+
+;; See comment on anddi3 - no need for a DImode pattern.
+;; See also xor comment.
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "not %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:BW 0 "register_operand" "=r")
+	(not:BW (match_operand:BW 1 "register_operand" "0")))]
+  ""
+  "not %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+;; Arithmetic/Logical shift right (and SI left).
+
+(define_insn "<shlr>si3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(shift:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "nonmemory_operand" "Kcr")))]
+  ""
+{
+  if (REG_S_P (operands[2]))
+    return "<slr>.d %2,%0";
+
+  return "<slr>q %2,%0";
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Since gcc gets lost, and forgets to zero-extend the source (or mask
+;; the destination) when it changes shifts of lower modes into SImode,
+;; it is better to make these expands an anonymous patterns instead of
+;; the more correct define_insns.  This occurs when gcc thinks that is
+;; is better to widen to SImode and use immediate shift count.
+
+;; FIXME: Is this legacy or still true for gcc >= 2.7.2?
+
+;; FIXME: Can't parametrize sign_extend and zero_extend (before
+;; mentioning "shiftrt"), so we need two patterns.
+(define_expand "ashr<mode>3"
+  [(set (match_dup 3)
+	(sign_extend:SI (match_operand:BW 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" "")))
+   (set (match_dup 5) (ashiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:BW 0 "general_operand" "")
+	(subreg:BW (match_dup 5) 0))]
+  ""
+{
+  int i;
+
+  for (i = 3; i < 6; i++)
+    operands[i] = gen_reg_rtx (SImode);
+})
+
+(define_expand "lshr<mode>3"
+  [(set (match_dup 3)
+	(zero_extend:SI (match_operand:BW 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" "")))
+   (set (match_dup 5) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:BW 0 "general_operand" "")
+	(subreg:BW (match_dup 5) 0))]
+  ""
+{
+  int i;
+
+  for (i = 3; i < 6; i++)
+    operands[i] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*expanded_<shlr><mode>"
+  [(set (match_operand:BW 0 "register_operand" "=r")
+	(shiftrt:BW (match_operand:BW 1 "register_operand" "0")
+		    (match_operand:BW 2 "register_operand" "r")))]
+  ""
+  "<slr><m> %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "*<shlr><mode>_lowpart"
+  [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r"))
+	(shiftrt:BW (match_dup 0)
+		    (match_operand:BW 1 "register_operand" "r")))]
+  ""
+  "<slr><m> %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Arithmetic/logical shift left.
+
+;; For narrower modes than SI, we can use lslq although it makes cc
+;; unusable.  The win is that we do not have to reload the shift-count
+;; into a register.
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:BW 0 "register_operand" "=r,r")
+	(ashift:BW (match_operand:BW 1 "register_operand" "0,0")
+		   (match_operand:BW 2 "nonmemory_operand" "r,Kc")))]
+  ""
+{
+  return
+    (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > <nbitsm1>)
+    ? "moveq 0,%0"
+    : (CONSTANT_P (operands[2])
+       ? "lslq %2,%0" : "lsl<m> %2,%0");
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32,clobber")])
+
+;; A strict_low_part matcher.
+
+(define_insn "*ashl<mode>_lowpart"
+  [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r"))
+	(ashift:BW (match_dup 0)
+		   (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "lsl<m> %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Various strange insns that gcc likes.
+
+;; Fortunately, it is simple to construct an abssf (although it may not
+;; be very much used in practice).
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  ""
+  "lslq 1,%0\;lsrq 1,%0")
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "abs %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; FIXME: GCC should be able to do these expansions itself.
+
+(define_expand "abs<mode>2"
+  [(set (match_dup 2)
+	(sign_extend:SI (match_operand:BW 1 "general_operand" "")))
+   (set (match_dup 3) (abs:SI (match_dup 2)))
+   (set (match_operand:BW 0 "register_operand" "")
+	(subreg:BW (match_dup 3) 0))]
+  ""
+  "operands[2] = gen_reg_rtx (SImode); operands[3] = gen_reg_rtx (SImode);")
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_HAS_LZ"
+  "lz %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (bswap:SI (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_HAS_SWAP"
+  "swapwb %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; This instruction swaps all bits in a register.
+;; That means that the most significant bit is put in the place
+;; of the least significant bit, and so on.
+
+(define_insn "cris_swap_bits"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")]
+		   CRIS_UNSPEC_SWAP_BITS))]
+  "TARGET_HAS_SWAP"
+  "swapwbr %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Implement ctz using two instructions, one for bit swap and one for clz.
+;; Defines a scratch register to avoid clobbering input.
+
+(define_expand "ctzsi2"
+  [(set (match_dup 2)
+	(match_operand:SI 1 "register_operand"))
+   (set (match_dup 2)
+	(unspec:SI [(match_dup 2)] CRIS_UNSPEC_SWAP_BITS))
+   (set (match_operand:SI 0 "register_operand")
+	(clz:SI (match_dup 2)))]
+  "TARGET_HAS_LZ && TARGET_HAS_SWAP"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+;; Bound-insn.  Defined to be the same as an unsigned minimum, which is an
+;; operation supported by gcc.  Used in casesi, but used now and then in
+;; normal code too.
+
+(define_expand "uminsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(umin:SI  (match_operand:SI 1 "register_operand" "")
+		  (match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (MEM_P (operands[2]) && TARGET_V32)
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "*uminsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand"		 "=r,r, r,r")
+	(umin:SI  (match_operand:SI 1 "register_operand" "%0,0, 0,r")
+		  (match_operand:SI 2 "general_operand"   "r,Q>,g,!To")))]
+  "!TARGET_V32"
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      /* Constant operands are zero-extended, so only 32-bit operands
+	 may be negative.  */
+      if (INTVAL (operands[2]) >= 0)
+	{
+	  if (INTVAL (operands[2]) < 256)
+	    return "bound.b %2,%0";
+
+	  if (INTVAL (operands[2]) < 65536)
+	    return "bound.w %2,%0";
+	}
+    }
+  else if (which_alternative == 3)
+    return "bound.d %2,%1,%0";
+
+  return "bound.d %2,%0";
+}
+ [(set_attr "slottable" "yes,yes,no,no")])
+
+(define_insn "*uminsi3_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(umin:SI  (match_operand:SI 1 "register_operand" "%0,0")
+		  (match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  "TARGET_V32"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      /* Constant operands are zero-extended, so only 32-bit operands
+	 may be negative.  */
+      if (INTVAL (operands[2]) >= 0)
+	{
+	  if (INTVAL (operands[2]) < 256)
+	    return "bound.b %2,%0";
+
+	  if (INTVAL (operands[2]) < 65536)
+	    return "bound.w %2,%0";
+	}
+    }
+
+  return "bound.d %2,%0";
+}
+ [(set_attr "slottable" "yes,no")])
+
+;; Jump and branch insns.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "ba %l0%#"
+  [(set_attr "slottable" "has_slot")])
+
+;; Testcase gcc.c-torture/compile/991213-3.c fails if we allow a constant
+;; here, since the insn is not recognized as an indirect jump by
+;; jmp_uses_reg_or_mem used by computed_jump_p.  Perhaps it is a kludge to
+;; change from general_operand to nonimmediate_operand (at least the docs
+;; should be changed), but then again the pattern is called indirect_jump.
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand"))]
+  ""
+{
+  if (TARGET_V32 && MEM_P (operands[0]))
+    operands[0] = force_reg (SImode, operands[0]);
+})
+
+(define_insn "*indirect_jump_non_v32"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))]
+  "!TARGET_V32"
+  "jump %0")
+
+(define_insn "*indirect_jump_v32"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  "TARGET_V32"
+  "jump %0%#"
+  [(set_attr "slottable" "has_slot")])
+
+;; Return insn.  Used whenever the epilogue is very simple; if it is only
+;; a single ret or jump [sp+].  No allocated stack space or saved
+;; registers are allowed.
+;; Note that for this pattern, although named, it is ok to check the
+;; context of the insn in the test, not only compiler switches.
+
+(define_expand "return"
+  [(return)]
+  "cris_simple_epilogue ()"
+  "cris_expand_return (cris_return_address_on_stack ()); DONE;")
+
+(define_insn "*return_expanded"
+  [(return)]
+  ""
+{
+  return cris_return_address_on_stack_for_return ()
+    ? "jump [$sp+]" : "ret%#";
+}
+  [(set (attr "slottable")
+ 	(if_then_else
+ 	 (match_test "cris_return_address_on_stack_for_return ()")
+ 	 (const_string "no")
+	 (const_string "has_return_slot")))])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  "TARGET_PROLOGUE_EPILOGUE"
+  "cris_expand_prologue (); DONE;")
+
+;; Note that the (return) from the expander itself is always the last
+;; insn in the epilogue.
+(define_expand "epilogue"
+  [(const_int 0)]
+  "TARGET_PROLOGUE_EPILOGUE"
+  "cris_expand_epilogue (); DONE;")
+
+;; Conditional branches.
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0) (compare
+	       (match_operand:BWD 1 "nonimmediate_operand")
+	       (match_operand:BWD 2 "general_operand")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cbranchdi4"
+  [(set (cc0)
+	(compare (match_operand:DI 1 "nonimmediate_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  if (TARGET_V32 && !REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+  if (TARGET_V32 && MEM_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+
+;; We suffer from the same overflow-bit-gets-in-the-way problem as
+;; e.g. m68k, so we have to check if overflow bit is set on all "signed"
+;; conditions.
+
+(define_insn "b<ncond:code>"
+  [(set (pc)
+	(if_then_else (ncond (cc0)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "b<CC> %l0%#"
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "b<ocond:code>"
+  [(set (pc)
+	(if_then_else (ocond (cc0)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? 0 : "b<CC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "b<rcond:code>"
+  [(set (pc)
+	(if_then_else (rcond (cc0)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? "b<oCC> %l0%#" : "b<CC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+;; Reversed anonymous patterns to the ones above, as mandated.
+
+(define_insn "*b<ncond:code>_reversed"
+  [(set (pc)
+	(if_then_else (ncond (cc0)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "b<rCC> %l0%#"
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "*b<ocond:code>_reversed"
+  [(set (pc)
+	(if_then_else (ocond (cc0)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? 0 : "b<rCC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "*b<rcond:code>_reversed"
+  [(set (pc)
+	(if_then_else (rcond (cc0)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? "b<roCC> %l0%#" : "b<rCC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+;; Set on condition: sCC.
+
+(define_expand "cstoredi4"
+  [(set (cc0) (compare
+	       (match_operand:DI 2 "nonimmediate_operand")
+	       (match_operand:DI 3 "general_operand")))
+   (set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(cc0) (const_int 0)]))]
+  ""
+{
+  if (TARGET_V32 && !REG_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+  if (TARGET_V32 && MEM_P (operands[3]))
+    operands[3] = force_reg (DImode, operands[3]);
+})
+
+(define_expand "cstore<mode>4"
+  [(set (cc0) (compare
+	       (match_operand:BWD 2 "nonimmediate_operand")
+	       (match_operand:BWD 3 "general_operand")))
+   (set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+;; Like bCC, we have to check the overflow bit for
+;; signed conditions.
+
+(define_insn "s<ncond:code>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ncond:SI (cc0) (const_int 0)))]
+  ""
+  "s<CC> %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+(define_insn "s<rcond:code>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rcond:SI (cc0) (const_int 0)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? "s<oCC> %0" : "s<CC> %0";
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+(define_insn "s<ocond:code>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ocond:SI (cc0) (const_int 0)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? 0 : "s<CC> %0";
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+;; Call insns.
+
+;; We need to make these patterns "expand", since the real operand is
+;; hidden in a (mem:QI ) inside operand[0] (call_value: operand[1]),
+;; and cannot be checked if it were a "normal" pattern.
+;;  Note that "call" and "call_value" are *always* called with a
+;; mem-operand for operand 0 and 1 respective.  What happens for combined
+;; instructions is a different issue.
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "cris_mem_call_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+  if (flag_pic)
+    cris_expand_pic_call_address (&operands[0]);
+})
+
+;; Accept *anything* as operand 1.  Accept operands for operand 0 in
+;; order of preference.
+
+(define_insn "*expanded_call_non_v32"
+  [(call (mem:QI (match_operand:SI 0 "general_operand" "r,Q>,g"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI CRIS_SRP_REGNUM))]
+  "!TARGET_V32"
+  "jsr %0")
+
+(define_insn "*expanded_call_v32"
+  [(call
+    (mem:QI
+     (match_operand:SI 0 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i"))
+    (match_operand 1 "" ""))
+   (clobber (reg:SI CRIS_SRP_REGNUM))]
+  "TARGET_V32"
+  "@
+   jsr %0%#
+   jsr %0%#
+   bsr %0%#
+   bsr %0%#"
+  [(set_attr "slottable" "has_call_slot")])
+
+;; Parallel when calculating and reusing address of indirect pointer
+;; with simple offset.  (Makes most sense with PIC.)  It looks a bit
+;; wrong not to have the clobber last, but that's the way combine
+;; generates it (except it doesn' look into the *inner* mem, so this
+;; just matches a peephole2).  FIXME: investigate that.
+(define_insn "*expanded_call_side"
+  [(call (mem:QI
+	  (mem:SI
+	   (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,  r,r")
+		    (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn"))))
+	 (match_operand 2 "" ""))
+   (clobber (reg:SI CRIS_SRP_REGNUM))
+   (set (match_operand:SI 3 "register_operand" "=*0,r,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "!TARGET_AVOID_GOTPLT && !TARGET_V32"
+  "jsr [%3=%0%S1]")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "cris_mem_call_operand" "")
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+  if (flag_pic)
+    cris_expand_pic_call_address (&operands[1]);
+})
+
+;; Accept *anything* as operand 2.  The validity other than "general" of
+;; operand 0 will be checked elsewhere.  Accept operands for operand 1 in
+;; order of preference (Q includes r, but r is shorter, faster).
+;;  We also accept a PLT symbol.  We output it as [rPIC+sym:GOTPLT] rather
+;; than requiring getting rPIC + sym:PLT into a register.
+
+(define_insn "*expanded_call_value_non_v32"
+  [(set (match_operand 0 "nonimmediate_operand" "=g,g,g")
+	(call (mem:QI (match_operand:SI 1 "general_operand" "r,Q>,g"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI CRIS_SRP_REGNUM))]
+  "!TARGET_V32"
+  "Jsr %1"
+  [(set_attr "cc" "clobber")])
+
+;; See similar call special-case.
+(define_insn "*expanded_call_value_side"
+  [(set (match_operand 0 "nonimmediate_operand" "=g,g,g")
+	(call
+	 (mem:QI
+	  (mem:SI
+	   (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,  r,r")
+		    (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn"))))
+	      (match_operand 3 "" "")))
+   (clobber (reg:SI CRIS_SRP_REGNUM))
+   (set (match_operand:SI 4 "register_operand" "=*1,r,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "!TARGET_AVOID_GOTPLT && !TARGET_V32"
+  "Jsr [%4=%1%S2]"
+  [(set_attr "cc" "clobber")])
+
+(define_insn "*expanded_call_value_v32"
+  [(set
+    (match_operand 0 "nonimmediate_operand" "=g,g,g,g")
+    (call
+     (mem:QI
+      (match_operand:SI 1 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i"))
+     (match_operand 2 "" "")))
+   (clobber (reg:SI 16))]
+  "TARGET_V32"
+  "@
+   Jsr %1%#
+   Jsr %1%#
+   Bsr %1%#
+   Bsr %1%#"
+  [(set_attr "cc" "clobber")
+   (set_attr "slottable" "has_call_slot")])
+
+;; Used in debugging.  No use for the direct pattern; unfilled
+;; delayed-branches are taken care of by other means.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "cc" "none")])
+
+;; Same as the gdb trap breakpoint, will cause a SIGTRAP for
+;; cris-linux* and crisv32-linux*, as intended.  Will work in
+;; freestanding environments with sufficient framework.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 8))]
+  "TARGET_TRAP_USING_BREAK8"
+  "break 8")
+
+;; We need to stop accesses to the stack after the memory is
+;; deallocated.  Unfortunately, reorg doesn't look at naked clobbers,
+;; e.g. (insn ... (clobber (mem:BLK (stack_pointer_rtx)))) and we don't
+;; want to use a naked (unspec_volatile) as that would stop any
+;; scheduling in the epilogue.  Hence we model it as a "real" insn that
+;; sets the memory in an unspecified manner.  FIXME: Unfortunately it
+;; still has the effect of an unspec_volatile.
+(define_insn "cris_frame_deallocated_barrier"
+  [(set (mem:BLK (reg:SI CRIS_SP_REGNUM))
+	(unspec:BLK [(const_int 0)] CRIS_UNSPEC_FRAME_DEALLOC))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; We expand on casesi so we can use "bound" and "add offset fetched from
+;; a table to pc" (adds.w [pc+%0.w],pc).
+
+;; Note: if you change the "parallel" (or add anything after it) in
+;; this expansion, you must change the macro ASM_OUTPUT_CASE_END
+;; accordingly, to add the default case at the end of the jump-table.
+
+(define_expand "cris_casesi_non_v32"
+  [(set (match_dup 5) (match_operand:SI 0 "general_operand" ""))
+   (set (match_dup 6)
+	(minus:SI (match_dup 5)
+		  (match_operand:SI 1 "const_int_operand" "n")))
+   (set (match_dup 7)
+	(umin:SI (match_dup 6)
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	   (ltu (match_dup 7) (match_dup 2))
+	   (plus:SI (sign_extend:SI
+		     (mem:HI
+		      (plus:SI (mult:SI (match_dup 7) (const_int 2))
+			       (pc))))
+		    (pc))
+	   (label_ref (match_operand 4 "" ""))))
+     (use (label_ref (match_operand 3 "" "")))])]
+  ""
+{
+  operands[2] = plus_constant (SImode, operands[2], 1);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = gen_reg_rtx (SImode);
+})
+
+;; FIXME: Check effect of not JUMP_TABLES_IN_TEXT_SECTION.
+(define_expand "cris_casesi_v32"
+  [(set (match_dup 5) (match_operand:SI 0 "general_operand"))
+   (set (match_dup 6)
+       (minus:SI (match_dup 5)
+		 (match_operand:SI 1 "const_int_operand")))
+   (set (match_dup 7)
+       (umin:SI (match_dup 6)
+		(match_operand:SI 2 "const_int_operand")))
+   (set (match_dup 8) (match_dup 11))
+   (set (match_dup 9)
+       (plus:SI (mult:SI (match_dup 7) (const_int 2))
+		(match_dup 8)))
+   (set (match_dup 10)
+       (plus:SI (sign_extend:SI (mem:HI (match_dup 9)))
+		(match_dup 9)))
+   (parallel
+    [(set (pc)
+	 (if_then_else
+	  (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI) (match_dup 2))
+	  (match_dup 10)
+	  (label_ref (match_operand 4 "" ""))))
+     (use (label_ref (match_dup 3)))])]
+  "TARGET_V32"
+{
+  int i;
+  rtx xlabel = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  for (i = 5; i <= 10; i++)
+    operands[i] = gen_reg_rtx (SImode);
+  operands[2] = plus_constant (SImode, operands[2], 1);
+
+  /* Don't forget to decorate labels too, for PIC.  */
+  operands[11] = flag_pic
+    ? gen_rtx_CONST (Pmode,
+		    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xlabel),
+				    CRIS_UNSPEC_PCREL))
+    : xlabel;
+})
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand")
+   (match_operand:SI 1 "const_int_operand")
+   (match_operand:SI 2 "const_int_operand")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+{
+  if (TARGET_V32)
+    emit_insn (gen_cris_casesi_v32 (operands[0], operands[1], operands[2],
+				    operands[3], operands[4]));
+  else
+    emit_insn (gen_cris_casesi_non_v32 (operands[0], operands[1], operands[2],
+					operands[3], operands[4]));
+  DONE;
+})
+
+;; Split-patterns.  Some of them have modes unspecified.  This
+;; should always be ok; if for no other reason sparc.md has it as
+;; well.
+;;
+;; When register_operand is specified for an operand, we can get a
+;; subreg as well (Axis-990331), so don't just assume that REG_P is true
+;; for a register_operand and that REGNO can be used as is.  It is best to
+;; guard with REG_P, unless it is worth it to adjust for the subreg case.
+
+;; op [rx + 0],ry,rz
+;; The index to rx is optimized into zero, and gone.
+
+;; First, recognize bound [rx],ry,rz; where [rx] is zero-extended,
+;; and add/sub [rx],ry,rz, with zero or sign-extend on [rx].
+;; Split this into:
+;;  move ry,rz
+;;  op [rx],rz
+;; Lose if rz=ry or rx=rz.
+;; Call this op-extend-split.
+;; Do not match for V32; the addo and addi shouldn't be split
+;; up.
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 4 "cris_operand_extend_operator"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_op_dup 3 [(match_dup 2)])]))]
+  "")
+
+;; As op-extend-split, but recognize and split op [rz],ry,rz into
+;;  ext [rz],rz
+;;  op ry,rz
+;; Do this for plus or bound only, being commutative operations, since we
+;; have swapped the operands.
+;; Call this op-extend-split-rx=rz
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_op_dup 3 [(match_dup 2)]))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; As the op-extend-split, but swapped operands, and only for
+;; plus or bound, being the commutative extend-operators.  FIXME: Why is
+;; this needed?  Is it?
+;; Call this op-extend-split-swapped
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_op_dup 3 [(match_dup 2)])]))]
+  "")
+
+;; As op-extend-split-rx=rz, but swapped operands, only for plus or
+;; bound.  Call this op-extend-split-swapped-rx=rz.
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_op_dup 3 [(match_dup 2)]))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; As op-extend-split, but the mem operand is not extended.
+;;
+;; op [rx],ry,rz changed into
+;;  move ry,rz
+;;  op [rx],rz
+;; lose if ry=rz or rx=rz
+;; Call this op-extend.
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 3 "cris_orthogonal_operator"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operand 2 "memory_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 2)]))]
+  "")
+
+;; As op-extend-split-rx=rz, non-extended.
+;; Call this op-split-rx=rz
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 3 "cris_commutative_orth_op"
+	 [(match_operand 2 "memory_operand" "")
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 2)]))]
+  "")
+
+;; As op-extend-split-swapped, nonextended.
+;; Call this op-split-swapped.
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 3 "cris_commutative_orth_op"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operand 2 "memory_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; As op-extend-split-swapped-rx=rz, non-extended.
+;; Call this op-split-swapped-rx=rz.
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 3 "cris_orthogonal_operator"
+	 [(match_operand 2 "memory_operand" "")
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+(include "sync.md")
+
+;; Splits for all cases in side-effect insns where (possibly after reload
+;; and register allocation) rx and ry in [rx=ry+i] are equal.
+
+;; move.S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI
+	     (mult:SI (match_operand:SI 1 "register_operand" "")
+		      (match_operand:SI 2 "const_int_operand" ""))
+	     (match_operand:SI 3 "register_operand" ""))]))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (plus:SI (mult:SI (match_dup 1)
+			    (match_dup 2))
+		    (match_dup 3)))])]
+  "REG_P (operands[3]) && REG_P (operands[4])
+   && REGNO (operands[3]) == REGNO (operands[4])"
+  [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+			       (match_dup 3)))
+   (set (match_dup 0) (match_dup 5))]
+  "operands[5] = replace_equiv_address (operands[6], operands[3]);")
+
+;; move.S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	   5 "cris_mem_op"
+	   [(plus:SI (match_operand:SI 1 "cris_bdap_operand" "")
+		     (match_operand:SI 2 "cris_bdap_operand" ""))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (match_dup 1)
+		    (match_dup 2)))])]
+  "(rtx_equal_p (operands[3], operands[1])
+    || rtx_equal_p (operands[3], operands[2]))"
+  [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (match_dup 4))]
+{
+  operands[4] = replace_equiv_address (operands[5], operands[3]);
+  cris_order_for_addsi3 (operands, 1);
+})
+
+;; move.S1 ry,[rx=rx+rz.S2]
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI
+	     (mult:SI (match_operand:SI 0 "register_operand" "")
+		      (match_operand:SI 1 "const_int_operand" ""))
+	     (match_operand:SI 2 "register_operand" ""))])
+	  (match_operand 3 "register_operand" ""))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 0)
+			     (match_dup 1))
+		    (match_dup 2)))])]
+  "REG_P (operands[2]) && REG_P (operands[4])
+   && REGNO (operands[4]) == REGNO (operands[2])"
+  [(set (match_dup 4) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+				(match_dup 2)))
+   (set (match_dup 5) (match_dup 3))]
+  "operands[5] = replace_equiv_address (operands[6], operands[4]);")
+
+;; move.S1 ry,[rx=rx+i]
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI (match_operand:SI 0 "cris_bdap_operand" "")
+		     (match_operand:SI 1 "cris_bdap_operand" ""))])
+	  (match_operand 2 "register_operand" ""))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (match_dup 0)
+		   (match_dup 1)))])]
+  "(rtx_equal_p (operands[3], operands[0])
+    || rtx_equal_p (operands[3], operands[1]))"
+  [(set (match_dup 3) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 5) (match_dup 2))]
+{
+  operands[5] = replace_equiv_address (operands[6], operands[3]);
+  cris_order_for_addsi3 (operands, 0);
+})
+
+;; clear.[bwd] [rx=rx+rz.S2]
+
+(define_split
+  [(parallel
+    [(set (mem:BWD (plus:SI
+		    (mult:SI (match_operand:SI 0 "register_operand" "")
+			     (match_operand:SI 1 "const_int_operand" ""))
+		    (match_operand:SI 2 "register_operand" "")))
+	   (const_int 0))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 0)
+			     (match_dup 1))
+		    (match_dup 2)))])]
+  "REG_P (operands[2]) && REG_P (operands[3])
+   && REGNO (operands[3]) == REGNO (operands[2])"
+  [(set (match_dup 3) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+				(match_dup 2)))
+   (set (mem:BWD (match_dup 3)) (const_int 0))]
+  "")
+
+;; clear.[bwd] [rx=rx+i]
+
+(define_split
+  [(parallel
+    [(set (mem:BWD
+	   (plus:SI (match_operand:SI 0 "cris_bdap_operand" "")
+		    (match_operand:SI 1 "cris_bdap_operand" "")))
+	   (const_int 0))
+     (set (match_operand:SI 2 "register_operand" "")
+	   (plus:SI (match_dup 0)
+		    (match_dup 1)))])]
+  "(rtx_equal_p (operands[0], operands[2])
+    || rtx_equal_p (operands[2], operands[1]))"
+  [(set (match_dup 2) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (mem:BWD (match_dup 2)) (const_int 0))]
+  "cris_order_for_addsi3 (operands, 0);")
+
+;; mov(s|u).S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_extend_operator"
+	    [(mem (plus:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		   (match_operand:SI 3 "register_operand" "")))]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 1)
+			     (match_dup 2))
+		    (match_dup 3)))])]
+  "REG_P (operands[3])
+   && REG_P (operands[4])
+   && REGNO (operands[3]) == REGNO (operands[4])"
+  [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+				(match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 6)]))]
+  "operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]);")
+
+;; mov(s|u).S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    4 "cris_extend_operator"
+	    [(mem (plus:SI
+		   (match_operand:SI 1 "cris_bdap_operand" "")
+		   (match_operand:SI 2 "cris_bdap_operand" "")))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (match_dup 1)
+		    (match_dup 2)))])]
+  "(rtx_equal_p (operands[1], operands[3])
+    || rtx_equal_p (operands[2], operands[3]))"
+  [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (match_op_dup 4 [(match_dup 5)]))]
+{
+  operands[5] = replace_equiv_address (XEXP (operands[4], 0), operands[3]);
+  cris_order_for_addsi3 (operands, 1);
+})
+
+;; op.S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_orthogonal_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (mem (plus:SI
+		   (match_operand:SI 2 "cris_bdap_operand" "")
+		   (match_operand:SI 3 "cris_bdap_operand" "")))]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 6)]))]
+{
+  operands[6] = replace_equiv_address (XEXP (operands[5], 1), operands[4]);
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; op.S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_orthogonal_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (mem (plus:SI
+		   (mult:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))
+		   (match_operand:SI 4 "register_operand" "")))]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		   (match_dup 4)))])]
+  "REG_P (operands[4])
+   && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+				(match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 7)]))]
+  "operands[7] = replace_equiv_address (XEXP (operands[6], 1), operands[5]);")
+
+;; op.S1 [rx=rx+rz.S2],ry (swapped)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_commutative_orth_op"
+	    [(mem (plus:SI
+		   (mult:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))
+		   (match_operand:SI 4 "register_operand" "")))
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		    (match_dup 4)))])]
+  "REG_P (operands[4])
+   && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+			       (match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))]
+  "operands[7] = replace_equiv_address (XEXP (operands[6], 0), operands[5]);")
+
+;; op.S1 [rx=rx+i],ry (swapped)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_commutative_orth_op"
+	    [(mem
+	      (plus:SI (match_operand:SI 2 "cris_bdap_operand" "")
+		       (match_operand:SI 3 "cris_bdap_operand" "")))
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 6) (match_dup 1)]))]
+{
+  operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]);
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; op(s|u).S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_operand_extend_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (match_operator
+	      7 "cris_extend_operator"
+	      [(mem (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "")
+			      (match_operand:SI 3 "const_int_operand" ""))
+		     (match_operand:SI 4 "register_operand" "")))])]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		    (match_dup 4)))])]
+  "REG_P (operands[4])
+   && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+			       (match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 8)]))]
+  "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[7]), GET_MODE (operands[7]),
+				replace_equiv_address (XEXP (operands[7], 0),
+						       operands[5]));")
+
+;; op(s|u).S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_operand_extend_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (match_operator
+	      6 "cris_extend_operator"
+	      [(mem
+		(plus:SI (match_operand:SI 2 "cris_bdap_operand" "")
+			 (match_operand:SI 3 "cris_bdap_operand" "")
+			 ))])]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 7)]))]
+{
+  operands[7] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]),
+			       replace_equiv_address (XEXP (operands[6], 0),
+						      operands[4]));
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; op(s|u).S1 [rx=rx+rz.S2],ry (swapped, plus or bound)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    7 "cris_plus_or_bound_operator"
+	    [(match_operator
+	      6 "cris_extend_operator"
+	      [(mem (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "")
+			      (match_operand:SI 3 "const_int_operand" ""))
+		     (match_operand:SI 4 "register_operand" "")))])
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		    (match_dup 4)))])]
+  "REG_P (operands[4]) && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+			       (match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 8) (match_dup 1)]))]
+  "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]),
+				replace_equiv_address (XEXP (operands[6], 0),
+						       operands[5]));")
+
+;; op(s|u).S1 [rx=rx+i],ry (swapped, plus or bound)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_plus_or_bound_operator"
+	    [(match_operator
+	      5 "cris_extend_operator"
+	     [(mem (plus:SI
+		    (match_operand:SI 2 "cris_bdap_operand" "")
+		    (match_operand:SI 3 "cris_bdap_operand" "")))])
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))]
+{
+  operands[7] = gen_rtx_fmt_e (GET_CODE (operands[5]), GET_MODE (operands[5]),
+			       replace_equiv_address (XEXP (operands[5], 0),
+						      operands[4]));
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; Splits for addressing prefixes that have no side-effects, so we can
+;; fill a delay slot.  Never split if we lose something, though.
+
+;; If we have a
+;;  move [indirect_ref],rx
+;; where indirect ref = {const, [r+], [r]}, it costs as much as
+;;  move indirect_ref,rx
+;;  move [rx],rx
+;; Take care not to allow indirect_ref = register.
+
+;; We're not allowed to generate copies of registers with different mode
+;; until after reload; copying pseudos upsets reload.  CVS as of
+;; 2001-08-24, unwind-dw2-fde.c, _Unwind_Find_FDE ICE in
+;; cselib_invalidate_regno.  Also, don't do this for the stack-pointer,
+;; as we don't want it set temporarily to an invalid value.
+
+(define_split ; indir_to_reg_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operand 1 "indirect_operand" ""))]
+  "reload_completed
+   && REG_P (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (MEM_P (XEXP (operands[1], 0)) || CONSTANT_P (XEXP (operands[1], 0)))
+   && REGNO (operands[0]) < CRIS_LAST_GENERAL_REGISTER"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 0) (match_dup 3))]
+  "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0]));
+   operands[3] = replace_equiv_address (operands[1], operands[2]);
+   operands[4] = XEXP (operands[1], 0);")
+
+;; As the above, but MOVS and MOVU.
+
+(define_split
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
+	(match_operator
+	 4 "cris_extend_operator"
+	 [(match_operand 1 "indirect_operand" "")]))]
+  "reload_completed
+   && REG_P (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (MEM_P (XEXP (operands[1], 0))
+       || CONSTANT_P (XEXP (operands[1], 0)))"
+  [(set (match_dup 2) (match_dup 5))
+   (set (match_dup 0) (match_op_dup 4 [(match_dup 3)]))]
+  "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0]));
+   operands[3] = replace_equiv_address (XEXP (operands[4], 0), operands[2]);
+   operands[5] = XEXP (operands[1], 0);")
+
+;; Various peephole optimizations.
+;;
+;; Watch out: when you exchange one set of instructions for another, the
+;; condition codes setting must be the same, or you have to CC_INIT or
+;; whatever is appropriate, in the pattern before you emit the
+;; assembly text.  This is best done here, not in cris_notice_update_cc,
+;; to keep changes local to their cause.
+;;
+;; Do not add patterns that you do not know will be matched.
+;; Please also add a self-contained testcase.
+
+;; We have trouble with and:s and shifts.  Maybe something is broken in
+;; gcc?  Or it could just be that bit-field insn expansion is a bit
+;; suboptimal when not having extzv insns.
+;; Testcase for the following four peepholes: gcc.dg/cris-peep2-xsrand.c
+
+(define_peephole2 ; asrandb (peephole casesi+31)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31
+   && INTVAL (operands[2]) < 255
+   && INTVAL (operands[1]) > 23
+   /* Check that the and-operation enables us to use logical-shift.  */
+   && (INTVAL (operands[2])
+	  & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (QImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+})
+
+(define_peephole2 ; asrandw (peephole casesi+32)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31
+   && INTVAL (operands[2]) < 65535
+   && INTVAL (operands[2]) != 255
+   && INTVAL (operands[1]) > 15
+   /* Check that the and-operation enables us to use logical-shift.  */
+   && (INTVAL (operands[2])
+       & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode));
+})
+
+(define_peephole2 ; lsrandb (peephole casesi+33)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31
+   && INTVAL (operands[2]) < 255
+   && INTVAL (operands[1]) > 23"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (QImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+})
+
+(define_peephole2 ; lsrandw (peephole casesi+34)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31 && INTVAL (operands[2]) < 65535
+   && INTVAL (operands[2]) != 255
+   && INTVAL (operands[1]) > 15"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode));
+})
+
+
+;; Change
+;;  add.d n,rx
+;;  move [rx],ry
+;; into
+;;  move [rx=rx+n],ry
+;; when -128 <= n <= 127.
+;; This will reduce the size of the assembler code for n = [-128..127],
+;; and speed up accordingly.  Don't match if the previous insn is
+;; (set rx rz) because that combination is matched by another peephole.
+;; No stable test-case.
+
+(define_peephole2 ; moversideqi (peephole casesi+35)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand 3 "register_operand" "")
+	(match_operator 4 "cris_mem_op" [(match_dup 0)]))]
+  "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD
+   && REGNO (operands[3]) != REGNO (operands[0])
+   && (cris_base_p (operands[1], true) || cris_base_p (operands[2], true))
+   && !satisfies_constraint_J (operands[2])
+   && !satisfies_constraint_N (operands[2])
+   && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128)
+   && TARGET_SIDE_EFFECT_PREFIXES"
+  [(parallel
+    [(set (match_dup 3) (match_dup 5))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])]
+  ;; Checking the previous insn is a bit too awkward for the condition.
+{
+  rtx prev = prev_nonnote_insn (curr_insn);
+  if (prev != NULL_RTX)
+    {
+      rtx set = single_set (prev);
+      if (set != NULL_RTX
+	  && REG_S_P (SET_DEST (set))
+	  && REGNO (SET_DEST (set)) == REGNO (operands[0])
+	  && REG_S_P (SET_SRC (set)))
+	FAIL;
+    }
+  operands[5]
+    = replace_equiv_address (operands[4],
+			     gen_rtx_PLUS (SImode,
+					   operands[1], operands[2]));
+})
+
+;; Vice versa: move ry,[rx=rx+n]
+
+(define_peephole2 ; movemsideqi (peephole casesi+36)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operator 3 "cris_mem_op" [(match_dup 0)])
+	(match_operand 4 "register_operand" ""))]
+  "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD
+   && REGNO (operands[4]) != REGNO (operands[0])
+   && (cris_base_p (operands[1], true) || cris_base_p (operands[2], true))
+   && !satisfies_constraint_J (operands[2])
+   && !satisfies_constraint_N (operands[2])
+   && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128)
+   && TARGET_SIDE_EFFECT_PREFIXES"
+  [(parallel
+    [(set (match_dup 5) (match_dup 4))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])]
+  "operands[5]
+     = replace_equiv_address (operands[3],
+			      gen_rtx_PLUS (SImode,
+					    operands[1], operands[2]));")
+
+;; As above, change:
+;;  add.d n,rx
+;;  op.d [rx],ry
+;; into:
+;;  op.d [rx=rx+n],ry
+;; Saves when n = [-128..127].
+;;
+;; Splitting and joining combinations for side-effect modes are slightly
+;; out of hand.  They probably will not save the time they take typing in,
+;; not to mention the bugs that creep in.  FIXME: Get rid of as many of
+;; the splits and peepholes as possible.
+;; No stable test-case.
+
+(define_peephole2 ; mover2side (peephole casesi+37)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand 3 "register_operand" "")
+	  (match_operator 4 "cris_orthogonal_operator"
+			  [(match_dup 3)
+			   (match_operator
+			    5 "cris_mem_op" [(match_dup 0)])]))]
+  ;; FIXME: What about DFmode?
+  ;; Change to GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD?
+  "GET_MODE (operands[3]) != DImode
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && !satisfies_constraint_J (operands[2])
+   && !satisfies_constraint_N (operands[2])
+   && INTVAL (operands[2]) >= -128
+   && INTVAL (operands[2]) <= 127
+   && TARGET_SIDE_EFFECT_PREFIXES"
+  [(parallel
+    [(set (match_dup 3) (match_op_dup 4 [(match_dup 3) (match_dup 6)]))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])]
+  "operands[6]
+     = replace_equiv_address (operands[5],
+			      gen_rtx_PLUS (SImode,
+					    operands[1], operands[2]));")
+
+;; Sometimes, for some reason the pattern
+;;  move x,rx
+;;  add y,rx
+;;  move [rx],rz
+;; will occur.  Solve this, and likewise for to-memory.
+;; No stable test-case.
+
+(define_peephole2 ; moverside (peephole casesi+38)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "cris_bdap_biap_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "")))
+   (set (match_operand 4 "register_operand" "")
+	(match_operator 5 "cris_mem_op" [(match_dup 0)]))]
+  "(rtx_equal_p (operands[2], operands[0])
+    || rtx_equal_p (operands[3], operands[0]))
+   && cris_side_effect_mode_ok (PLUS, operands, 0,
+				(REG_S_P (operands[1])
+				 ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				(! REG_S_P (operands[1])
+				 ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				-1, 4)"
+  [(parallel
+    [(set (match_dup 4) (match_dup 6))
+     (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])]
+{
+  rtx otherop
+    = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2];
+
+  /* Make sure we have canonical RTX so we match the insn pattern -
+     not a constant in the first operand.  We also require the order
+     (plus reg mem) to match the final pattern.  */
+  if (CONSTANT_P (otherop) || MEM_P (otherop))
+    {
+      operands[7] = operands[1];
+      operands[8] = otherop;
+    }
+  else
+    {
+      operands[7] = otherop;
+      operands[8] = operands[1];
+    }
+  operands[6]
+    = replace_equiv_address (operands[5],
+			     gen_rtx_PLUS (SImode,
+					   operands[7], operands[8]));
+})
+
+;; As above but to memory.
+;; FIXME: Split movemside and moverside into variants and prune
+;; the ones that don't trig.
+;; No stable test-case.
+
+(define_peephole2 ; movemside (peephole casesi+39)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "cris_bdap_biap_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "")))
+   (set (match_operator 4 "cris_mem_op" [(match_dup 0)])
+	(match_operand 5 "register_operand" ""))]
+  "(rtx_equal_p (operands[2], operands[0])
+    || rtx_equal_p (operands[3], operands[0]))
+   && cris_side_effect_mode_ok (PLUS, operands, 0,
+				(REG_S_P (operands[1])
+				 ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				(! REG_S_P (operands[1])
+				   ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				-1, 5)"
+  [(parallel
+    [(set (match_dup 6) (match_dup 5))
+     (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])]
+{
+  rtx otherop
+    = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2];
+
+  /* Make sure we have canonical RTX so we match the insn pattern -
+     not a constant in the first operand.  We also require the order
+     (plus reg mem) to match the final pattern.  */
+  if (CONSTANT_P (otherop) || MEM_P (otherop))
+    {
+      operands[7] = operands[1];
+      operands[8] = otherop;
+    }
+  else
+    {
+      operands[7] = otherop;
+      operands[8] = operands[1];
+    }
+  operands[6]
+    = replace_equiv_address (operands[4],
+			     gen_rtx_PLUS (SImode,
+					   operands[7], operands[8]));
+})
+
+;; Another spotted bad code:
+;;   move rx,ry
+;;   move [ry],ry
+;; No stable test-case.
+
+(define_peephole2 ; movei (peephole casesi+42)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_operand 2 "register_operand" "")
+	(match_operator 3 "cris_mem_op" [(match_dup 0)]))]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && (REGNO_REG_CLASS (REGNO (operands[0]))
+       == REGNO_REG_CLASS (REGNO (operands[1])))
+   && GET_MODE_SIZE (GET_MODE (operands[2])) <= UNITS_PER_WORD"
+  [(set (match_dup 2) (match_dup 4))]
+  "operands[4] = replace_equiv_address (operands[3], operands[1]);")
+
+;;   move.d [r10+16],r9
+;;   and.d r12,r9
+;; change to
+;;   and.d [r10+16],r12,r9
+;; With generalization of the operation, the size and the addressing mode.
+;;  This seems to be the result of a quirk in register allocation
+;; missing the three-operand cases when having different predicates.
+;; Maybe that it matters that it is a commutative operation.
+;;  This pattern helps that situation, but there's still the increased
+;; register pressure.
+;;  Note that adding the noncommutative variant did not show any matches
+;; in ipps and cc1, so it's not here.
+;; No stable test-case.
+
+(define_peephole2 ; op3 (peephole casesi+44)
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 6 "cris_mem_op"
+	 [(plus:SI
+	   (match_operand:SI 1 "cris_bdap_biap_operand" "")
+	   (match_operand:SI 2 "cris_bdap_biap_operand" ""))]))
+   (set (match_dup 0)
+	(match_operator
+	 5 "cris_commutative_orth_op"
+	 [(match_operand 3 "register_operand" "")
+	  (match_operand 4 "register_operand" "")]))]
+  "(rtx_equal_p (operands[3], operands[0])
+    || rtx_equal_p (operands[4], operands[0]))
+   && ! rtx_equal_p (operands[3], operands[4])
+   && (REG_S_P (operands[1]) || REG_S_P (operands[2]))
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD"
+  [(set (match_dup 0) (match_op_dup 5 [(match_dup 7) (match_dup 6)]))]
+  "operands[7]
+     = rtx_equal_p (operands[3], operands[0]) ? operands[4] : operands[3];")
+
+;; There seems to be no other way to make GCC (including 4.8/trunk at
+;; r186932) optimally reload an instruction that looks like
+;;   and.d reg_or_mem,const_32__65535,other_reg
+;; where other_reg is the destination.
+;; It should be:
+;;   movu.[bw] reg_or_mem,reg_32
+;;   and.[bw] trunc_int_for_mode([bw], const_32__65535),reg_32 ;; or andq
+;; but it turns into:
+;;   move.d reg_or_mem,reg_32
+;;   and.d const_32__65535,reg_32
+;; Fix it with these two peephole2's.
+;; Testcases: gcc.dg/cris-peep2-andu1.c gcc.dg/cris-peep2-andu2.c
+
+(define_peephole2 ; andu (casesi+45)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "nonimmediate_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+   ;; Since the size of the memory access could be made different here,
+   ;; don't do this for a mem-volatile access.
+  "REGNO (operands[2]) == REGNO (operands[0])
+   && INTVAL (operands[3]) <= 65535 && INTVAL (operands[3]) >= 0
+   && !satisfies_constraint_I (operands[3])
+   && !side_effects_p (operands[1])
+   && (!REG_P (operands[1])
+       || REGNO (operands[1]) <= CRIS_LAST_GENERAL_REGISTER)"
+  ;; FIXME: CC0 valid except for M (i.e. CC_NOT_NEGATIVE).
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+{
+  enum machine_mode zmode = INTVAL (operands[3]) <= 255 ? QImode : HImode;
+  enum machine_mode amode
+    = satisfies_constraint_O (operands[3]) ? SImode : zmode;
+  rtx op1
+    = (REG_S_P (operands[1])
+       ? gen_rtx_REG (zmode, REGNO (operands[1]))
+       : adjust_address (operands[1], zmode, 0));
+  operands[4]
+    = gen_rtx_ZERO_EXTEND (SImode, op1);
+  operands[5] = gen_rtx_REG (amode, REGNO (operands[0]));
+  operands[6]
+    = gen_rtx_AND (amode, gen_rtx_REG (amode, REGNO (operands[0])),
+		   GEN_INT (trunc_int_for_mode (INTVAL (operands[3]),
+						amode == SImode
+						? QImode : amode)));
+})
+
+;; Since r186861, gcc.dg/cris-peep2-andu2.c trigs this pattern, with which
+;; we fix up e.g.:
+;;  movu.b 254,$r9.
+;;  and.d $r10,$r9
+;; into:
+;;  movu.b $r10,$r9
+;;  andq -2,$r9.
+;; Only do this for values fitting the quick immediate operand.
+(define_peephole2 ; andqu (casesi+46)
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "const_int_operand"))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand:SI 2 "nonimmediate_operand")))]
+   ;; Since the size of the memory access will be made different here,
+   ;; don't do this for a volatile access or a post-incremented address.
+  "satisfies_constraint_O (operands[1])
+   && !side_effects_p (operands[2])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (and:SI (match_dup 0) (match_dup 4)))]
+{
+  enum machine_mode zmode = INTVAL (operands[2]) <= 255 ? QImode : HImode;
+  rtx op1
+    = (REG_S_P (operands[2])
+       ? gen_rtx_REG (zmode, REGNO (operands[2]))
+       : adjust_address (operands[2], zmode, 0));
+  operands[3] = gen_rtx_ZERO_EXTEND (SImode, op1);
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode));
+})
+
+;; Try and avoid GOTPLT reads escaping a call: transform them into
+;; PLT.  Curiously (but thankfully), peepholes for instructions
+;; *without side-effects* that just feed a call (or call_value) are
+;; not matched neither in a build or test-suite, so those patterns are
+;; omitted.
+
+;; A "normal" move where we don't check the consumer.
+
+(define_peephole2 ; gotplt-to-plt
+  [(set
+    (match_operand:SI 0 "register_operand" "")
+    (match_operator:SI
+     1 "cris_mem_op"
+     [(plus:SI
+       (reg:SI CRIS_GOT_REGNUM)
+       (const:SI
+	(unspec:SI [(match_operand:SI 2 "cris_general_operand_or_symbol" "")]
+		   CRIS_UNSPEC_PLTGOTREAD)))]))]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)"
+  [(set (match_dup 0) (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))]
+  "")
+
+;; And one set with a side-effect getting the PLTGOT offset.
+;; First call and call_value variants.
+
+(define_peephole2 ; gotplt-to-plt-side-call
+  [(parallel
+    [(set
+      (match_operand:SI 0 "register_operand" "")
+      (match_operator:SI
+       1 "cris_mem_op"
+       [(plus:SI
+	 (reg:SI CRIS_GOT_REGNUM)
+	 (const:SI
+	  (unspec:SI [(match_operand:SI
+		       2 "cris_general_operand_or_symbol" "")]
+		     CRIS_UNSPEC_PLTGOTREAD)))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (reg:SI CRIS_GOT_REGNUM)
+		   (const:SI
+		    (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])
+  (parallel [(call (mem:QI (match_dup 0))
+		    (match_operand 4 "" ""))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(call (mem:QI (match_dup 1))
+		    (match_dup 4))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))
+	      (set (match_dup 3)
+		   (plus:SI (reg:SI CRIS_GOT_REGNUM)
+			    (const:SI
+			     (unspec:SI [(match_dup 2)]
+					CRIS_UNSPEC_PLTGOTREAD))))])]
+  "")
+
+(define_peephole2 ; gotplt-to-plt-side-call-value
+  [(parallel
+    [(set
+      (match_operand:SI 0 "register_operand" "")
+      (match_operator:SI
+       1 "cris_mem_op"
+       [(plus:SI
+	 (reg:SI CRIS_GOT_REGNUM)
+	 (const:SI
+	  (unspec:SI [(match_operand:SI
+		       2 "cris_general_operand_or_symbol" "")]
+		     CRIS_UNSPEC_PLTGOTREAD)))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (reg:SI CRIS_GOT_REGNUM)
+		   (const:SI
+		    (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])
+   (parallel [(set (match_operand 5 "" "")
+		   (call (mem:QI (match_dup 0))
+			 (match_operand 4 "" "")))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (match_dup 5)
+		   (call (mem:QI (match_dup 1))
+			 (match_dup 4)))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))
+	      (set (match_dup 3)
+		   (plus:SI (reg:SI CRIS_GOT_REGNUM)
+			    (const:SI
+			     (unspec:SI [(match_dup 2)]
+					CRIS_UNSPEC_PLTGOTREAD))))])]
+  "")
+
+(define_peephole2 ; gotplt-to-plt-side
+  [(parallel
+    [(set
+      (match_operand:SI 0 "register_operand" "")
+      (match_operator:SI
+       1 "cris_mem_op"
+       [(plus:SI
+	 (reg:SI CRIS_GOT_REGNUM)
+	 (const:SI
+	  (unspec:SI [(match_operand:SI
+		       2 "cris_general_operand_or_symbol" "")]
+		     CRIS_UNSPEC_PLTGOTREAD)))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (reg:SI CRIS_GOT_REGNUM)
+		   (const:SI
+		    (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)"
+  [(set (match_dup 3)
+	(const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD)))
+   (set (match_dup 3) (plus:SI (match_dup 3) (reg:SI CRIS_GOT_REGNUM)))
+   (set (match_dup 0)
+	(const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))]
+  "")
+
+;; Local variables:
+;; mode:emacs-lisp
+;; comment-start: ";; "
+;; eval: (set-syntax-table (copy-sequence (syntax-table)))
+;; eval: (modify-syntax-entry ?[ "(]")
+;; eval: (modify-syntax-entry ?] ")[")
+;; eval: (modify-syntax-entry ?{ "(}")
+;; eval: (modify-syntax-entry ?} "){")
+;; eval: (setq indent-tabs-mode t)
+;; End:
diff --git a/gcc-4.9/gcc/config/cris/cris.opt b/gcc-4.9/gcc/config/cris/cris.opt
new file mode 100644
index 000000000..d359c8948
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/cris.opt
@@ -0,0 +1,202 @@
+; Options for the CRIS port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; TARGET_MUL_BUG: Whether or not to work around multiplication
+; instruction hardware bug when generating code for models where
+; it may be present.  From the trouble report for Etrax 100 LX:
+; "A multiply operation may cause incorrect cache behaviour
+; under some specific circumstances. The problem can occur if
+; the instruction following the multiply instruction causes a
+; cache miss, and multiply operand 1 (source operand) bits
+; [31:27] matches the logical mapping of the mode register
+; address (0xb0....), and bits [9:2] of operand 1 matches the
+; TLB register address (0x258-0x25f).  There is such a mapping
+; in kernel mode or when the MMU is off.  Normally there is no
+; such mapping in user mode, and the problem will therefore
+; probably not occur in Linux user mode programs."
+;
+; We have no sure-fire way to know from within GCC that we're
+; compiling a user program.  For example, -fpic/PIC is used in
+; libgcc which is linked into the kernel.  However, the
+; workaround option -mno-mul-bug can be safely used per-package
+; when compiling programs.  The same goes for general user-only
+; libraries such as glibc, since there's no user-space
+; driver-like program that gets a mapping of I/O registers (all
+; on the same page, including the TLB registers).
+mmul-bug-workaround
+Target Report Mask(MUL_BUG)
+Work around bug in multiplication instruction
+
+; TARGET_ETRAX4_ADD: Instruction-set additions from Etrax 4 and up.
+; (Just "lz".)
+metrax4
+Target Report Mask(ETRAX4_ADD)
+Compile for ETRAX 4 (CRIS v3)
+
+; See cris_handle_option.
+metrax100
+Target Report RejectNegative
+Compile for ETRAX 100 (CRIS v8)
+
+; See cris_handle_option.
+mno-etrax100
+Target Report RejectNegative Undocumented
+
+mpdebug
+Target Report Mask(PDEBUG)
+Emit verbose debug information in assembly code
+
+; TARGET_CCINIT: Whether to use condition-codes generated by
+; insns other than the immediately preceding compare/test insn.
+; Used to check for errors in notice_update_cc.
+mcc-init
+Target Report Mask(CCINIT)
+Do not use condition codes from normal instructions
+
+; TARGET_SIDE_EFFECT_PREFIXES: Whether to use side-effect
+; patterns.  Used to debug the [rx=ry+i] type patterns.
+mside-effects
+Target Report RejectNegative Mask(SIDE_EFFECT_PREFIXES) Undocumented
+
+mno-side-effects
+Target Report RejectNegative InverseMask(SIDE_EFFECT_PREFIXES)
+Do not emit addressing modes with side-effect assignment
+
+; TARGET_STACK_ALIGN: Whether to *keep* (not force) alignment of
+; stack at 16 (or 32, depending on TARGET_ALIGN_BY_32) bits.
+mstack-align
+Target Report RejectNegative Mask(STACK_ALIGN) Undocumented
+
+mno-stack-align
+Target Report RejectNegative InverseMask(STACK_ALIGN)
+Do not tune stack alignment
+
+; TARGET_DATA_ALIGN: Whether to do alignment on individual
+; modifiable objects.
+mdata-align
+Target Report RejectNegative Mask(DATA_ALIGN) Undocumented
+
+mno-data-align
+Target Report RejectNegative InverseMask(DATA_ALIGN)
+Do not tune writable data alignment
+
+; TARGET_CONST_ALIGN: Whether to do alignment on individual
+; non-modifiable objects.
+mconst-align
+Target Report RejectNegative Mask(CONST_ALIGN) Undocumented
+
+mno-const-align
+Target Report RejectNegative InverseMask(CONST_ALIGN)
+Do not tune code and read-only data alignment
+
+; See cris_handle_option.
+m32-bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m32bit
+Target Report RejectNegative
+Align code and data to 32 bits
+
+; See cris_handle_option.
+m16-bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m16bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m8-bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m8bit
+Target Report RejectNegative
+Don't align items in code or data
+
+; TARGET_PROLOGUE_EPILOGUE: Whether or not to omit function
+; prologue and epilogue.
+mprologue-epilogue
+Target Report RejectNegative Mask(PROLOGUE_EPILOGUE) Undocumented
+
+mno-prologue-epilogue
+Target Report RejectNegative InverseMask(PROLOGUE_EPILOGUE)
+Do not emit function prologue or epilogue
+
+; We have to handle this m-option here since we can't wash it
+; off in both CC1_SPEC and CC1PLUS_SPEC.
+
+mbest-lib-options
+Target Report RejectNegative
+Use the most feature-enabling options allowed by other options
+
+; FIXME: The following comment relates to gcc before cris.opt.
+; Check if it's still valid:
+; We must call it "override-" since calling it "no-" will cause
+; gcc.c to forget it, if there's a "later" -mbest-lib-options.
+; Kludgy, but needed for some multilibbed files.
+moverride-best-lib-options
+Target Report RejectNegative
+Override -mbest-lib-options
+
+mcpu=
+Target Report RejectNegative Joined Undocumented Var(cris_cpu_str)
+
+march=
+Target Report RejectNegative Joined Var(cris_cpu_str)
+-march=ARCH	Generate code for the specified chip or CPU version
+
+mtune=
+Target Report RejectNegative Joined Var(cris_tune_str)
+-mtune=ARCH	Tune alignment for the specified chip or CPU version
+
+mmax-stackframe=
+Target Report RejectNegative Joined Var(cris_max_stackframe_str)
+-mmax-stackframe=SIZE	Warn when a stackframe is larger than the specified size
+
+max-stackframe=
+Target Report RejectNegative Joined Undocumented Var(cris_max_stackframe_str)
+
+mtrap-using-break8
+Target Report Var(cris_trap_using_break8) Init(2)
+Emit traps as \"break 8\", default for CRIS v3 and up.  If disabled, calls to abort() are used.
+
+mtrap-unaligned-atomic
+Target Report Var(cris_trap_unaligned_atomic) Init(2)
+Emit checks causing \"break 8\" instructions to execute when applying atomic builtins on misaligned memory
+
+munaligned-atomic-may-use-library
+Target Report Var(cris_atomics_calling_libfunc) Init(2)
+Handle atomic builtins that may be applied to unaligned data by calling library functions. Overrides -mtrap-unaligned-atomic.
+
+; TARGET_SVINTO: Currently this just affects alignment.  FIXME:
+; Redundant with TARGET_ALIGN_BY_32, or put machine stuff here?
+; This and the others below could just as well be variables and
+; TARGET_* defines in cris.h.
+Mask(SVINTO)
+
+; TARGET_ALIGN_BY_32: Say that all alignment specifications say
+; to prefer 32 rather than 16 bits.
+Mask(ALIGN_BY_32)
+
+; TARGET_AVOID_GOTPLT is referred to in the .c and the .md so we
+; need to allocate the flag and macros here.
+Mask(AVOID_GOTPLT)
diff --git a/gcc-4.9/gcc/config/cris/elf.opt b/gcc-4.9/gcc/config/cris/elf.opt
new file mode 100644
index 000000000..f759d0173
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/elf.opt
@@ -0,0 +1,25 @@
+; ELF-specific options for the CRIS port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+melf
+Target Report RejectNegative Undocumented
+
+sim
+Driver JoinedOrMissing
diff --git a/gcc-4.9/gcc/config/cris/linux.h b/gcc-4.9/gcc/config/cris/linux.h
new file mode 100644
index 000000000..af27e1089
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/linux.h
@@ -0,0 +1,150 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* After the first "Node:" comment comes all preprocessor directives and
+   attached declarations described in the info files, the "Using and
+   Porting GCC" manual (uapgcc), in the same order as found in the "Target
+   macros" section in the gcc-2.9x CVS edition of 2000-03-17.  FIXME: Not
+   really, but needs an update anyway.
+
+   There is no generic copy-of-uapgcc comment, you'll have to see uapgcc
+   for that.  If applicable, there is a CRIS-specific comment.  The order
+   of macro definitions follow the order in the manual.  Every section in
+   the manual (node in the info pages) has an introductory `Node:
+   <subchapter>' comment.  If no macros are defined for a section, only
+   the section-comment is present.  */
+
+/* This file defines the macros for cris-axis-linux-gnu that are not
+   covered by cris.h, elfos.h and (config/)linux.h.  */
+
+/* Make sure we have a valid TARGET_CPU_DEFAULT, so we can assume it
+   and take shortcuts below.  */
+#ifndef TARGET_CPU_DEFAULT
+#error "TARGET_CPU_DEFAULT not defined"
+#elif (TARGET_CPU_DEFAULT+0) != 10 && (TARGET_CPU_DEFAULT+0) != 32
+#error "TARGET_CPU_DEFAULT must be 10 or 32, or this file be updated"
+#endif
+
+/* Node: Instruction Output */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* Node: Driver */
+/* These macros are CRIS-specific, but used in target driver macros.  */
+
+#undef CRIS_CPP_SUBTARGET_SPEC
+#if TARGET_CPU_DEFAULT == 32
+# define CRIS_CPP_SUBTARGET_SPEC \
+  "%{pthread:-D_REENTRANT}\
+   %{!march=*:%{!mcpu=*:-D__arch_v32 -D__CRIS_arch_version=32}}"
+#else
+# define CRIS_CPP_SUBTARGET_SPEC \
+  "%{pthread:-D_REENTRANT}\
+   %{!march=*:%{!mcpu=*:-D__arch_v10 -D__CRIS_arch_version=10}}"
+#endif
+
+#undef CRIS_CC1_SUBTARGET_SPEC
+#if TARGET_CPU_DEFAULT == 32
+# define CRIS_CC1_SUBTARGET_SPEC \
+ "%{!march=*:%{!mcpu=*:-march=v32}}"
+#define CRIS_SUBTARGET_DEFAULT_ARCH MASK_AVOID_GOTPLT
+#else
+# define CRIS_CC1_SUBTARGET_SPEC \
+ "%{!march=*:%{!mcpu=*:-march=v10}}"
+#define CRIS_SUBTARGET_DEFAULT_ARCH 0
+#endif
+
+#undef CRIS_ASM_SUBTARGET_SPEC
+#if TARGET_CPU_DEFAULT == 32
+# define CRIS_ASM_SUBTARGET_SPEC \
+ "--em=criself \
+  %{!march=*:%{!mcpu=*:--march=v32}} \
+  %{!fleading-underscore:--no-underscore}\
+  %{fPIC|fpic|fPIE|fpie: --pic}"
+#else
+# define CRIS_ASM_SUBTARGET_SPEC \
+ "--em=criself \
+  %{!march=*:%{!mcpu=*:--march=v10}} \
+  %{!fleading-underscore:--no-underscore}\
+  %{fPIC|fpic|fPIE|fpie: --pic}"
+#endif
+
+/* Previously controlled by target_flags.  */
+#undef TARGET_LINUX
+#define TARGET_LINUX 1
+
+#undef CRIS_SUBTARGET_DEFAULT
+#define CRIS_SUBTARGET_DEFAULT			\
+  (MASK_SVINTO					\
+   + MASK_ETRAX4_ADD				\
+   + MASK_ALIGN_BY_32				\
+   + CRIS_SUBTARGET_DEFAULT_ARCH)
+
+#undef CRIS_DEFAULT_CPU_VERSION
+#define CRIS_DEFAULT_CPU_VERSION CRIS_CPU_NG
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef CRIS_LINK_SUBTARGET_SPEC
+#define CRIS_LINK_SUBTARGET_SPEC \
+ "-mcrislinux\
+  %{shared} %{static}\
+  %{symbolic:-Bdynamic} %{static:-Bstatic}\
+  %{!shared:%{!static:\
+              %{rdynamic:-export-dynamic}\
+              -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}\
+  %{!r:%{O2|O3: --gc-sections}}"
+
+
+/* Node: Run-time Target */
+
+/* For the cris-*-linux* subtarget.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+      if (flag_leading_underscore <= 0)		\
+	builtin_define ("__NO_UNDERSCORES__");	\
+    }						\
+  while (0)
+
+/* Node: Type Layout */
+     
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Node: Sections */
+
+/* GNU/Linux has crti and crtn and does not need the
+   CRT_CALL_STATIC_FUNCTION trick in cris.h.  */
+#undef CRT_CALL_STATIC_FUNCTION
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc-4.9/gcc/config/cris/linux.opt b/gcc-4.9/gcc/config/cris/linux.opt
new file mode 100644
index 000000000..b5a19e9ad
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/linux.opt
@@ -0,0 +1,33 @@
+; GNU/Linux-specific options for the CRIS port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Provide a legacy -mlinux option.
+mlinux
+Target Report RejectNegative Undocumented
+
+mno-gotplt
+Target Report RejectNegative Mask(AVOID_GOTPLT)
+Together with -fpic and -fPIC, do not use GOTPLT references
+
+; There's a small added setup cost with using GOTPLT references
+; for the first (resolving) call, but should in total be a win
+; both in code-size and execution-time.
+mgotplt
+Target Report RejectNegative InverseMask(AVOID_GOTPLT) Undocumented
diff --git a/gcc-4.9/gcc/config/cris/predicates.md b/gcc-4.9/gcc/config/cris/predicates.md
new file mode 100644
index 000000000..0169b0b71
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/predicates.md
@@ -0,0 +1,178 @@
+;; Operand and operator predicates for the GCC CRIS port.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Operator predicates.
+
+(define_predicate "cris_orthogonal_operator"
+  (match_code "plus, minus, ior, and, umin"))
+
+(define_predicate "cris_commutative_orth_op"
+  (match_code "plus, ior, and, umin"))
+
+;; By the name, you might think we should include MULT.  We don't because
+;; it doesn't accept the same addressing modes as the others (only
+;; registers) and there's also the problem of handling TARGET_MUL_BUG.
+
+(define_predicate "cris_operand_extend_operator"
+  (match_code "plus, minus, umin"))
+
+(define_predicate "cris_additive_operand_extend_operator"
+  (match_code "plus, minus"))
+
+(define_predicate "cris_extend_operator"
+  (match_code "zero_extend, sign_extend"))
+
+(define_predicate "cris_plus_or_bound_operator"
+  (match_code "plus, umin"))
+
+;; Used as an operator to get a handle on a already-known-valid MEM rtx:es
+;; (no need to validate the address), where some address expression parts
+;; have their own match_operand.
+
+(define_predicate "cris_mem_op"
+  (match_code "mem"))
+
+(define_predicate "cris_load_multiple_op"
+  (and (match_code "parallel")
+       (match_test "cris_movem_load_rest_p (op, 0)")))
+
+(define_predicate "cris_store_multiple_op"
+  (and (match_code "parallel")
+       (match_test "cris_store_multiple_op_p (op)")))
+
+
+;; Operand helper predicates.
+
+(define_predicate "cris_bdap_const_operand"
+  (and (match_code "label_ref, symbol_ref, const_int, const_double, const")
+       (ior (not (match_test "flag_pic"))
+	    (match_test "cris_valid_pic_const (op, true)"))))
+
+(define_predicate "cris_simple_address_operand"
+  (ior (match_operand:SI 0 "register_operand")
+       (and (match_code "post_inc")
+	    (match_test "register_operand (XEXP (op, 0), Pmode)"))))
+
+(define_predicate "cris_simple_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "mem")
+	    (match_test "cris_simple_address_operand (XEXP (op, 0),
+						      Pmode)"))))
+
+(define_predicate "cris_nonsp_register_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "op != stack_pointer_rtx")))
+
+;; The caller needs to use :SI.
+(define_predicate "cris_bdap_sign_extend_operand"
+; Disabled until <URL:http://gcc.gnu.org/ml/gcc-patches/2005-10/msg01376.html>
+; or <URL:http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00940.html> is committed.
+  (match_test "0"))
+;  (and (match_code "sign_extend")
+;       (and (match_test "MEM_P (XEXP (op, 0))")
+;	    (match_test "cris_simple_address_operand (XEXP (XEXP (op, 0), 0),
+;						      Pmode)"))))
+
+;; FIXME: Should not have to test for 1.
+(define_predicate "cris_scale_int_operand"
+  (and (match_code "const_int")
+       (ior (ior (match_test "op == GEN_INT (4)")
+		 (match_test "op == const2_rtx"))
+	    (match_test "op == const1_rtx"))))
+
+;; FIXME: Should be able to assume (reg int).
+(define_predicate "cris_biap_mult_operand"
+  (and (match_code "mult")
+       (ior (and (match_test "register_operand (XEXP (op, 0), Pmode)")
+		 (match_test "cris_scale_int_operand (XEXP (op, 1), Pmode)"))
+	    (and (match_test "cris_scale_int_operand (XEXP (op, 0), Pmode)")
+		 (match_test "register_operand (XEXP (op, 1), Pmode)")))))
+
+
+;; Operand predicates.
+
+;; This checks a part of an address, the one that is not a plain register
+;; for an addressing mode using BDAP.
+;; Allowed operands are either:
+;; a) a register
+;; b) a CONST operand (but not a symbol when generating PIC)
+;; c) a [r] or [r+] in SImode, or sign-extend from HI or QI.
+
+(define_predicate "cris_bdap_operand"
+  (ior (match_operand 0 "cris_bdap_const_operand")
+       (ior (match_operand:SI 0 "cris_simple_operand")
+	    (match_operand:SI 0 "cris_bdap_sign_extend_operand"))))
+
+;; This is similar to cris_bdap_operand:
+;; It checks a part of an address, the one that is not a plain register
+;; for an addressing mode using BDAP or BIAP.
+;; Allowed operands are either:
+;; a) a register
+;; b) a CONST operand (but not a symbol when generating PIC)
+;; c) a mult of (1, 2 or 4) and a register
+;; d) a [r] or [r+] in SImode, or sign-extend from HI or QI.  */
+
+(define_predicate "cris_bdap_biap_operand"
+  (ior (match_operand 0 "cris_bdap_operand")
+       (match_operand 0 "cris_biap_mult_operand")))
+
+;; Since with -fPIC, not all symbols are valid PIC symbols or indeed
+;; general_operands, we have to have a predicate that matches it for the
+;; "movsi" expander.
+;; FIXME: Can s/special_// when PR 20413 is fixed.
+
+(define_special_predicate "cris_general_operand_or_symbol"
+  (ior (match_operand 0 "general_operand")
+       (and (match_code "const, symbol_ref, label_ref")
+       	    ; The following test is actually just an assertion.
+	    (match_test "cris_pic_symbol_type_of (op) != cris_no_symbol"))))
+
+;; A predicate for the anon movsi expansion, one that fits a PCREL
+;; operand as well as general_operand.
+
+(define_special_predicate "cris_general_operand_or_pic_source"
+  (ior (match_operand 0 "general_operand")
+       (and (match_test "flag_pic")
+	    (match_test "cris_valid_pic_const (op, false)"))))
+
+;; Since a PLT symbol is not a general_operand, we have to have a
+;; predicate that matches it when we need it.  We use this in the expanded
+;; "call" and "call_value" anonymous patterns.
+
+(define_predicate "cris_nonmemory_operand_or_callable_symbol"
+  (ior (match_operand 0 "nonmemory_operand")
+       (and (match_code "const")
+	    (and
+	     (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+	     (ior
+	      (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PLT_PCREL")
+	      (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PCREL"))))))
+
+;; This matches a (MEM (general_operand)) or
+;; (MEM (cris_general_operand_or_symbol)).  The second one isn't a valid
+;; memory_operand, so we need this predicate to recognize call
+;; destinations before we change them to a PLT operand (by wrapping in
+;; UNSPEC CRIS_UNSPEC_PLT).
+
+(define_predicate "cris_mem_call_operand"
+  (and (match_code "mem")
+       (ior (match_operand 0 "memory_operand")
+	    (match_test "cris_general_operand_or_symbol (XEXP (op, 0),
+							 Pmode)"))))
diff --git a/gcc-4.9/gcc/config/cris/sync.md b/gcc-4.9/gcc/config/cris/sync.md
new file mode 100644
index 000000000..7f10aa43d
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/sync.md
@@ -0,0 +1,314 @@
+;; GCC machine description for CRIS atomic memory sequences.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The CRIS atomic support yields code in three flavors, depending on
+;; the CPU for which code is generated:
+;;
+;; - Plain old CRIS v0 (..v8)
+;; - CRIS v10 (as used in ETRAX 100 LX)
+;; - CRIS v32 (as used in ETRAX FS)
+;;
+;; The last two alternatives are similar, of LL/SC type.  They may
+;; fail for other reasons; an exception, a cache miss or a bus request
+;; from other parts of the system.  The difference between them is
+;; just in what condition-codes are used to track LL and success or
+;; failure for the store.  See the chapter on integral read-write
+;; operations, chapter 1.13 in "ETRAX 100LX Programmers Manual",
+;; <http://www.axis.com/files/tech_notes/etrax_100lx_prog_man-050519.pdf>
+;; and chapter 2.1 in "ETRAX FS Designer's reference",
+;; <http://www.axis.com/files/manuals/etrax_fs_des_ref-070821.pdf>.
+;; Note that the datum being stored has to be contained fully within a
+;; cache-line to be integral.  A failure to store the data integrally
+;; will be flagged, but the store may still have happened in part,
+;; which translates most usefully into the data having to be
+;; "naturally aligned" to work.  Natural alignment is verified in the
+;; generated code and will by default cause for unaligned pointers a
+;; "break 8" to be executed or optionally a call to abort().  Beware
+;; that options -m16bit and -m8bit may cause data to be unaligned
+;; where it was otherwise aligned.  Data has a better chance of being
+;; aligned if it is declared with e.g. __attribute__ ((__align__ (4))).
+;;
+;; The "plain old v0..v8 flavor" just assumes there's a single CPU in
+;; the system, that no other parts of the system have access to memory
+;; used for atomic accesses and since there's no user mode without
+;; access to interrupt flags (another assumption), it just turns off
+;; interrupts while doing the access.  Here, alignment is neither
+;; required nor asserted.
+
+(define_c_enum ""
+  [
+   CRIS_UNSPEC_ATOMIC_OP
+   CRIS_UNSPEC_ATOMIC_SWAP_MEM
+   CRIS_UNSPEC_ATOMIC_SWAP_BOOL
+  ])
+
+(define_constants [(CRIS_CCR_INTERRUPT_BIT 5)])
+
+;; We use "mult" as a placeholder for "nand" (which does not have a
+;; separate binary rtx operation) so we can use an iterator in the
+;; define_expand and define_insn and avoid having a separate
+;; mostly-identical copy.  You will see the "mult" operator in rtl
+;; dumps, but it shouldn't matter as its use has one of its operands
+;; inside an unspec_volatile.
+
+(define_code_iterator atomic_op [plus minus ior and xor mult])
+
+(define_code_attr atomic_op_name
+ [(plus "add") (minus "sub") (and "and") (ior "or") (xor "xor") (mult "nand")])
+
+;; The operator nonatomic-operand can be memory, constant or register
+;; for all but xor.  We can't use memory or addressing modes with
+;; side-effects though, so just use registers and literal constants.
+(define_code_attr atomic_op_op_cnstr
+ [(plus "ri") (minus "ri") (and "ri") (ior "ri") (xor "r") (mult "ri")])
+
+(define_code_attr atomic_op_op_pred
+ [(plus "nonmemory_operand") (minus "nonmemory_operand")
+  (and "nonmemory_operand") (ior "nonmemory_operand")
+  (xor "register_operand") (mult "nonmemory_operand")])
+
+;; Pairs of these are used to insert the "not" after the "and" for nand.
+(define_code_attr atomic_op_mnem_pre_op2 ;; Upper-case only to simplify testing.
+ [(plus "%P2") (minus "Sub.d %2") (and "And%q2 %2") (ior "Or%q2 %2") (xor "Xor %2")
+  (mult "aNd%q2 %2")])
+
+(define_code_attr atomic_op_mnem_post_op3
+ [(plus "") (minus "") (and "") (ior "") (xor "") (mult "not %3\;")])
+
+;; For SImode, emit "q" for operands -31..31.
+(define_mode_attr qm3 [(SI "%q3") (HI ".w") (QI ".b")])
+
+(define_expand "atomic_fetch_<atomic_op_name><mode>"
+  [(match_operand:BWD 0 "register_operand")
+   (match_operand:BWD 1 "memory_operand")
+   (match_operand:BWD 2 "<atomic_op_op_pred>")
+   (match_operand 3)
+   (atomic_op:BWD (match_dup 0) (match_dup 1))]
+  "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
+{
+  enum memmodel mmodel = (enum memmodel) INTVAL (operands[3]);
+
+  if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
+    cris_emit_trap_for_misalignment (operands[1]);
+
+  if (need_atomic_barrier_p (mmodel, true))
+    expand_mem_thread_fence (mmodel);
+
+  emit_insn (gen_cris_atomic_fetch_<atomic_op_name><mode>_1 (operands[0],
+							     operands[1],
+							     operands[2]));
+  if (need_atomic_barrier_p (mmodel, false))
+    expand_mem_thread_fence (mmodel);
+
+  DONE;
+})
+
+(define_insn "cris_atomic_fetch_<atomic_op_name><mode>_1"
+  [(set (match_operand:BWD 1 "memory_operand" "+Q")
+	(atomic_op:BWD
+	 (unspec_volatile:BWD [(match_dup 1)] CRIS_UNSPEC_ATOMIC_OP)
+	 ;; FIXME: improve constants more for plus, minus, and, ior.
+	 ;; FIXME: handle memory operands without side-effects.
+	 (match_operand:BWD 2 "<atomic_op_op_pred>" "<atomic_op_op_cnstr>")))
+   (set (match_operand:BWD 0 "register_operand" "=&r")
+	(match_dup 1))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
+{
+  /* Can't be too sure; better ICE if this happens.  */
+  gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));
+
+  if (TARGET_V32)
+    return
+      "clearf p\n"
+      ".Lsync.%=:\;"
+      "move<m> %1,%0\;"
+      "move.d %0,%3\;"
+      "<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
+      "ax\;"
+      "move<m> %3,%1\;"
+      "bcs .Lsync.%=\;"
+      "clearf p";
+  else if (cris_cpu_version == 10)
+    return
+      "clearf\n"
+      ".Lsync.%=:\;"
+      "move<m> %1,%0\;"
+      "move.d %0,%3\;"
+      "<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
+      "ax\;"
+      "move<m> %3,%1\;"
+      "bwf .Lsync.%=\;"
+      "clearf";
+  else
+    {
+      /* This one is for CRIS versions without load-locked-store-conditional
+	 machinery; assume single-core-non-shared-memory without user
+	 mode/supervisor mode distinction, and just disable interrupts
+	 while performing the operation.
+	 Rather than making this pattern more complex by freeing another
+	 register or stack position to save condition codes (the value
+	 of the interrupt-enabled bit), we check whether interrupts were
+	 enabled before we disabled them and branch to a version
+	 with/without afterwards re-enabling them.  */
+      rtx ops[5];
+
+      /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT.  */
+      memcpy (ops, operands, sizeof(ops));
+      ops[4] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
+
+      output_asm_insn ("move $ccr,%3\;"
+		       "di\;"
+		       "move<m> %1,%0\;"
+		       "btstq %4,%3",
+		       ops);
+      return
+	"bmi .Lsync.irqon.%=\;"
+	"move.d %0,%3\;"
+
+	"<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
+	"ba .Lsync.irqoff.%=\;"
+	"move<m> %3,%1\n"
+
+	".Lsync.irqon.%=:\;"
+	"<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
+	"move<m> %3,%1\;"
+	"ei\n"
+	".Lsync.irqoff.%=:";
+    }
+})
+
+;; This pattern is more-or-less assumed to always exist if any of the
+;; other atomic patterns exist (see e.g.  comment at the
+;; can_compare_and_swap_p call in omp-low.c, 4.8 era).  We'd slightly
+;; prefer atomic_exchange<mode> over this, but having both would be
+;; redundant.
+;; FIXME: handle memory without side-effects for operand[3].
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:BWD 1 "register_operand")
+   (match_operand:BWD 2 "memory_operand")
+   (match_operand:BWD 3 "nonmemory_operand")
+   (match_operand:BWD 4 "register_operand")
+   (match_operand 5)
+   (match_operand 6)
+   (match_operand 7)]
+  "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
+{
+  enum memmodel mmodel = (enum memmodel) INTVAL (operands[6]);
+
+  if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
+    cris_emit_trap_for_misalignment (operands[2]);
+
+  if (need_atomic_barrier_p (mmodel, true))
+    expand_mem_thread_fence (mmodel);
+
+  emit_insn (gen_cris_atomic_compare_and_swap<mode>_1 (operands[0],
+						       operands[1],
+						       operands[2],
+						       operands[3],
+						       operands[4]));
+  if (need_atomic_barrier_p (mmodel, false))
+    expand_mem_thread_fence (mmodel);
+
+  DONE;
+})
+
+(define_insn "cris_atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	 [(match_operand:BWD 2 "memory_operand" "+Q")
+	  (match_operand:BWD 3 "nonmemory_operand" "ri")]
+	 CRIS_UNSPEC_ATOMIC_SWAP_BOOL))
+   (set (match_operand:BWD 1 "register_operand" "=&r") (match_dup 2))
+   (set (match_dup 2)
+	(unspec_volatile:BWD
+	 [(match_dup 2)
+	  (match_dup 3)
+	  (match_operand:BWD 4 "register_operand" "r")]
+	 CRIS_UNSPEC_ATOMIC_SWAP_MEM))]
+  "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
+{
+  if (TARGET_V32)
+    return
+      "\n.Lsync.repeat.%=:\;"
+      "clearf p\;"
+      "move<m> %2,%1\;"
+      "cmp<qm3> %3,%1\;"
+      "bne .Lsync.after.%=\;"
+      "ax\;"
+
+      "move<m> %4,%2\;"
+      "bcs .Lsync.repeat.%=\n"
+      ".Lsync.after.%=:\;"
+      "seq %0";
+  else if (cris_cpu_version == 10)
+    return
+      "\n.Lsync.repeat.%=:\;"
+      "clearf\;"
+      "move<m> %2,%1\;"
+      "cmp<qm3> %3,%1\;"
+      "bne .Lsync.after.%=\;"
+      "ax\;"
+
+      "move<m> %4,%2\;"
+      "bwf .Lsync.repeat.%=\n"
+      ".Lsync.after.%=:\;"
+      "seq %0";
+  else
+    {
+      /* This one is for CRIS versions without load-locked-store-conditional
+	 machinery; assume single-core-non-shared-memory without user
+	 mode/supervisor mode distinction, and just disable interrupts
+	 while performing the operation.
+	 Rather than making this pattern more complex by freeing another
+	 register or stack position to save condition codes (the value
+	 of the interrupt-enabled bit), we check whether interrupts were
+	 enabled before we disabled them and branch to a version
+	 with/without afterwards re-enabling them.  */
+      rtx ops[4];
+
+      /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT.  */
+      memcpy (ops, operands, sizeof(ops));
+      ops[3] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
+
+      output_asm_insn ("move $ccr,%0\;"
+		       "di\;"
+		       "move<m> %2,%1\;"
+		       "btstq %3,%0",
+		       ops);
+      return
+	"bmi .Lsync.irqon.%=\;"
+	"nop\;"
+
+	"cmp<qm3> %3,%1\;"
+	"bne .Lsync.after.%=\;"
+	"seq %0\;"
+	"ba .Lsync.after.%=\;"
+	"move<m> %4,%2\n"
+
+	".Lsync.irqon.%=:\;"
+	"cmp<qm3> %3,%1\;"
+	"bne .Lsync.after.%=\;"
+	"seq %0\;"
+	"move<m> %4,%2\;"
+	"ei\n"
+	".Lsync.after.%=:";
+    }
+})
diff --git a/gcc-4.9/gcc/config/cris/t-cris b/gcc-4.9/gcc/config/cris/t-cris
new file mode 100644
index 000000000..a58566525
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/t-cris
@@ -0,0 +1,29 @@
+#
+# t-cris
+#
+# The Makefile fragment to include when compiling gcc et al for CRIS.
+#
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+# The makefile macros etc. are included in the order found in the
+# section "Target Fragment" in the gcc info-files (or the paper copy) of
+# "Using and Porting GCC"
+
+$(out_object_file): gt-cris.h
+gt-cris.h : s-gtype ; @true
diff --git a/gcc-4.9/gcc/config/cris/t-elfmulti b/gcc-4.9/gcc/config/cris/t-elfmulti
new file mode 100644
index 000000000..1e9cf72d0
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/t-elfmulti
@@ -0,0 +1,31 @@
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = march=v8/march=v10/march=v32
+MULTILIB_DIRNAMES = v8 v10 v32
+MULTILIB_MATCHES = \
+		march?v8=mcpu?v8 \
+		march?v10=mcpu?etrax100lx \
+		march?v10=mcpu?ng \
+		march?v10=march?etrax100lx \
+		march?v10=march?ng \
+		march?v10=march?v11 \
+		march?v10=mcpu?v11 \
+		march?v10=mcpu?v10 \
+		march?v32=mcpu?v32
+MULTILIB_EXTRA_OPTS = mbest-lib-options
diff --git a/gcc-4.9/gcc/config/cris/t-linux b/gcc-4.9/gcc/config/cris/t-linux
new file mode 100644
index 000000000..71a964936
--- /dev/null
+++ b/gcc-4.9/gcc/config/cris/t-linux
@@ -0,0 +1,5 @@
+# We *know* we have a limits.h in the glibc library, with extra
+# definitions needed for e.g. libgfortran.
+ifneq ($(inhibit_libc),true)
+LIMITS_H_TEST = :
+endif
diff --git a/gcc-4.9/gcc/config/darwin-c.c b/gcc-4.9/gcc/config/darwin-c.c
new file mode 100644
index 000000000..892ba3547
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin-c.c
@@ -0,0 +1,775 @@
+/* Darwin support needed only by C/C++ frontends.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "target.h"
+#include "incpath.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-format.h"
+#include "diagnostic-core.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "cppdefault.h"
+#include "prefix.h"
+#include "c-family/c-target.h"
+#include "c-family/c-target-def.h"
+#include "cgraph.h"
+#include "../../libcpp/internal.h"
+
+/* Pragmas.  */
+
+#define BAD(gmsgid) do { warning (OPT_Wpragmas, gmsgid); return; } while (0)
+#define BAD2(msgid, arg) do { warning (OPT_Wpragmas, msgid, arg); return; } while (0)
+
+static bool using_frameworks = false;
+
+static const char *find_subframework_header (cpp_reader *pfile, const char *header,
+					     cpp_dir **dirp);
+
+typedef struct align_stack
+{
+  int alignment;
+  struct align_stack * prev;
+} align_stack;
+
+static struct align_stack * field_align_stack = NULL;
+
+/* Maintain a small stack of alignments.  This is similar to pragma
+   pack's stack, but simpler.  */
+
+static void
+push_field_alignment (int bit_alignment)
+{
+  align_stack *entry = XNEW (align_stack);
+
+  entry->alignment = maximum_field_alignment;
+  entry->prev = field_align_stack;
+  field_align_stack = entry;
+
+  maximum_field_alignment = bit_alignment;
+}
+
+static void
+pop_field_alignment (void)
+{
+  if (field_align_stack)
+    {
+      align_stack *entry = field_align_stack;
+
+      maximum_field_alignment = entry->alignment;
+      field_align_stack = entry->prev;
+      free (entry);
+    }
+  else
+    error ("too many #pragma options align=reset");
+}
+
+/* Handlers for Darwin-specific pragmas.  */
+
+void
+darwin_pragma_ignore (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  /* Do nothing.  */
+}
+
+/* #pragma options align={mac68k|power|reset} */
+
+void
+darwin_pragma_options (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  const char *arg;
+  tree t, x;
+
+  if (pragma_lex (&t) != CPP_NAME)
+    BAD ("malformed '#pragma options', ignoring");
+  arg = IDENTIFIER_POINTER (t);
+  if (strcmp (arg, "align"))
+    BAD ("malformed '#pragma options', ignoring");
+  if (pragma_lex (&t) != CPP_EQ)
+    BAD ("malformed '#pragma options', ignoring");
+  if (pragma_lex (&t) != CPP_NAME)
+    BAD ("malformed '#pragma options', ignoring");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of '#pragma options'");
+
+  arg = IDENTIFIER_POINTER (t);
+  if (!strcmp (arg, "mac68k"))
+    push_field_alignment (16);
+  else if (!strcmp (arg, "power"))
+    push_field_alignment (0);
+  else if (!strcmp (arg, "reset"))
+    pop_field_alignment ();
+  else
+    BAD ("malformed '#pragma options align={mac68k|power|reset}', ignoring");
+}
+
+/* #pragma unused ([var {, var}*]) */
+
+void
+darwin_pragma_unused (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree decl, x;
+  int tok;
+
+  if (pragma_lex (&x) != CPP_OPEN_PAREN)
+    BAD ("missing '(' after '#pragma unused', ignoring");
+
+  while (1)
+    {
+      tok = pragma_lex (&decl);
+      if (tok == CPP_NAME && decl)
+	{
+	  tree local = lookup_name (decl);
+	  if (local && (TREE_CODE (local) == PARM_DECL
+			|| TREE_CODE (local) == VAR_DECL))
+	    {
+	      TREE_USED (local) = 1;
+	      DECL_READ_P (local) = 1;
+	    }
+	  tok = pragma_lex (&x);
+	  if (tok != CPP_COMMA)
+	    break;
+	}
+    }
+
+  if (tok != CPP_CLOSE_PAREN)
+    BAD ("missing ')' after '#pragma unused', ignoring");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    BAD ("junk at end of '#pragma unused'");
+}
+
+/* Parse the ms_struct pragma.  */
+void
+darwin_pragma_ms_struct (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  const char *arg;
+  tree t;
+
+  if (pragma_lex (&t) != CPP_NAME)
+    BAD ("malformed '#pragma ms_struct', ignoring");
+  arg = IDENTIFIER_POINTER (t);
+
+  if (!strcmp (arg, "on"))
+    darwin_ms_struct = true;
+  else if (!strcmp (arg, "off") || !strcmp (arg, "reset"))
+    darwin_ms_struct = false;
+  else
+    BAD ("malformed '#pragma ms_struct {on|off|reset}', ignoring");
+
+  if (pragma_lex (&t) != CPP_EOF)
+    BAD ("junk at end of '#pragma ms_struct'");
+}
+
+static struct frameworks_in_use {
+  size_t len;
+  const char *name;
+  cpp_dir* dir;
+} *frameworks_in_use;
+static int num_frameworks = 0;
+static int max_frameworks = 0;
+
+
+/* Remember which frameworks have been seen, so that we can ensure
+   that all uses of that framework come from the same framework.  DIR
+   is the place where the named framework NAME, which is of length
+   LEN, was found.  We copy the directory name from NAME, as it will be
+   freed by others.  */
+
+static void
+add_framework (const char *name, size_t len, cpp_dir *dir)
+{
+  char *dir_name;
+  int i;
+  for (i = 0; i < num_frameworks; ++i)
+    {
+      if (len == frameworks_in_use[i].len
+	  && strncmp (name, frameworks_in_use[i].name, len) == 0)
+	{
+	  return;
+	}
+    }
+  if (i >= max_frameworks)
+    {
+      max_frameworks = i*2;
+      max_frameworks += i == 0;
+      frameworks_in_use = XRESIZEVEC (struct frameworks_in_use,
+				      frameworks_in_use, max_frameworks);
+    }
+  dir_name = XNEWVEC (char, len + 1);
+  memcpy (dir_name, name, len);
+  dir_name[len] = '\0';
+  frameworks_in_use[num_frameworks].name = dir_name;
+  frameworks_in_use[num_frameworks].len = len;
+  frameworks_in_use[num_frameworks].dir = dir;
+  ++num_frameworks;
+}
+
+/* Recall if we have seen the named framework NAME, before, and where
+   we saw it.  NAME is LEN bytes long.  The return value is the place
+   where it was seen before.  */
+
+static struct cpp_dir*
+find_framework (const char *name, size_t len)
+{
+  int i;
+  for (i = 0; i < num_frameworks; ++i)
+    {
+      if (len == frameworks_in_use[i].len
+	  && strncmp (name, frameworks_in_use[i].name, len) == 0)
+	{
+	  return frameworks_in_use[i].dir;
+	}
+    }
+  return 0;
+}
+
+/* There are two directories in a framework that contain header files,
+   Headers and PrivateHeaders.  We search Headers first as it is more
+   common to upgrade a header from PrivateHeaders to Headers and when
+   that is done, the old one might hang around and be out of data,
+   causing grief.  */
+
+struct framework_header {const char * dirName; int dirNameLen; };
+static struct framework_header framework_header_dirs[] = {
+  { "Headers", 7 },
+  { "PrivateHeaders", 14 },
+  { NULL, 0 }
+};
+
+/* Returns a pointer to a malloced string that contains the real pathname
+   to the file, given the base name and the name.  */
+
+static char *
+framework_construct_pathname (const char *fname, cpp_dir *dir)
+{
+  const char *buf;
+  size_t fname_len, frname_len;
+  cpp_dir *fast_dir;
+  char *frname;
+  struct stat st;
+  int i;
+
+  /* Framework names must have a / in them.  */
+  buf = strchr (fname, '/');
+  if (buf)
+    fname_len = buf - fname;
+  else
+    return 0;
+
+  fast_dir = find_framework (fname, fname_len);
+
+  /* Framework includes must all come from one framework.  */
+  if (fast_dir && dir != fast_dir)
+    return 0;
+
+  frname = XNEWVEC (char, strlen (fname) + dir->len + 2
+		    + strlen(".framework/") + strlen("PrivateHeaders"));
+  strncpy (&frname[0], dir->name, dir->len);
+  frname_len = dir->len;
+  if (frname_len && frname[frname_len-1] != '/')
+    frname[frname_len++] = '/';
+  strncpy (&frname[frname_len], fname, fname_len);
+  frname_len += fname_len;
+  strncpy (&frname[frname_len], ".framework/", strlen (".framework/"));
+  frname_len += strlen (".framework/");
+
+  if (fast_dir == 0)
+    {
+      frname[frname_len-1] = 0;
+      if (stat (frname, &st) == 0)
+	{
+	  /* As soon as we find the first instance of the framework,
+	     we stop and never use any later instance of that
+	     framework.  */
+	  add_framework (fname, fname_len, dir);
+	}
+      else
+	{
+	  /* If we can't find the parent directory, no point looking
+	     further.  */
+	  free (frname);
+	  return 0;
+	}
+      frname[frname_len-1] = '/';
+    }
+
+  /* Append framework_header_dirs and header file name */
+  for (i = 0; framework_header_dirs[i].dirName; i++)
+    {
+      strncpy (&frname[frname_len],
+	       framework_header_dirs[i].dirName,
+	       framework_header_dirs[i].dirNameLen);
+      strcpy (&frname[frname_len + framework_header_dirs[i].dirNameLen],
+	      &fname[fname_len]);
+
+      if (stat (frname, &st) == 0)
+	return frname;
+    }
+
+  free (frname);
+  return 0;
+}
+
+/* Search for FNAME in sub-frameworks.  pname is the context that we
+   wish to search in.  Return the path the file was found at,
+   otherwise return 0.  */
+
+static const char*
+find_subframework_file (const char *fname, const char *pname)
+{
+  char *sfrname;
+  const char *dot_framework = ".framework/";
+  const char *bufptr;
+  int sfrname_len, i, fname_len;
+  struct cpp_dir *fast_dir;
+  static struct cpp_dir subframe_dir;
+  struct stat st;
+
+  bufptr = strchr (fname, '/');
+
+  /* Subframework files must have / in the name.  */
+  if (bufptr == 0)
+    return 0;
+
+  fname_len = bufptr - fname;
+  fast_dir = find_framework (fname, fname_len);
+
+  /* Sub framework header filename includes parent framework name and
+     header name in the "CarbonCore/OSUtils.h" form. If it does not
+     include slash it is not a sub framework include.  */
+  bufptr = strstr (pname, dot_framework);
+
+  /* If the parent header is not of any framework, then this header
+     cannot be part of any subframework.  */
+  if (!bufptr)
+    return 0;
+
+  /* Now translate. For example,                  +- bufptr
+     fname = CarbonCore/OSUtils.h                 |
+     pname = /System/Library/Frameworks/Foundation.framework/Headers/Foundation.h
+     into
+     sfrname = /System/Library/Frameworks/Foundation.framework/Frameworks/CarbonCore.framework/Headers/OSUtils.h */
+
+  sfrname = XNEWVEC (char, strlen (pname) + strlen (fname) + 2 +
+			      strlen ("Frameworks/") + strlen (".framework/")
+			      + strlen ("PrivateHeaders"));
+
+  bufptr += strlen (dot_framework);
+
+  sfrname_len = bufptr - pname;
+
+  strncpy (&sfrname[0], pname, sfrname_len);
+
+  strncpy (&sfrname[sfrname_len], "Frameworks/", strlen ("Frameworks/"));
+  sfrname_len += strlen("Frameworks/");
+
+  strncpy (&sfrname[sfrname_len], fname, fname_len);
+  sfrname_len += fname_len;
+
+  strncpy (&sfrname[sfrname_len], ".framework/", strlen (".framework/"));
+  sfrname_len += strlen (".framework/");
+
+  /* Append framework_header_dirs and header file name */
+  for (i = 0; framework_header_dirs[i].dirName; i++)
+    {
+      strncpy (&sfrname[sfrname_len],
+	       framework_header_dirs[i].dirName,
+	       framework_header_dirs[i].dirNameLen);
+      strcpy (&sfrname[sfrname_len + framework_header_dirs[i].dirNameLen],
+	      &fname[fname_len]);
+
+      if (stat (sfrname, &st) == 0)
+	{
+	  if (fast_dir != &subframe_dir)
+	    {
+	      if (fast_dir)
+		warning (0, "subframework include %s conflicts with framework include",
+			 fname);
+	      else
+		add_framework (fname, fname_len, &subframe_dir);
+	    }
+
+	  return sfrname;
+	}
+    }
+  free (sfrname);
+
+  return 0;
+}
+
+/* Add PATH to the system includes. PATH must be malloc-ed and
+   NUL-terminated.  System framework paths are C++ aware.  */
+
+static void
+add_system_framework_path (char *path)
+{
+  int cxx_aware = 1;
+  cpp_dir *p;
+
+  p = XNEW (cpp_dir);
+  p->next = NULL;
+  p->name = path;
+  p->sysp = 1 + !cxx_aware;
+  p->construct = framework_construct_pathname;
+  using_frameworks = 1;
+
+  add_cpp_dir_path (p, SYSTEM);
+}
+
+/* Add PATH to the bracket includes. PATH must be malloc-ed and
+   NUL-terminated.  */
+
+void
+add_framework_path (char *path)
+{
+  cpp_dir *p;
+
+  p = XNEW (cpp_dir);
+  p->next = NULL;
+  p->name = path;
+  p->sysp = 0;
+  p->construct = framework_construct_pathname;
+  using_frameworks = 1;
+
+  add_cpp_dir_path (p, BRACKET);
+}
+
+static const char *framework_defaults [] =
+  {
+    "/System/Library/Frameworks",
+    "/Library/Frameworks",
+  };
+
+/* Register the GNU objective-C runtime include path if STDINC.  */
+
+void
+darwin_register_objc_includes (const char *sysroot, const char *iprefix,
+			       int stdinc)
+{
+  const char *fname;
+  size_t len;
+  /* We do not do anything if we do not want the standard includes. */
+  if (!stdinc)
+    return;
+
+  fname = GCC_INCLUDE_DIR "-gnu-runtime";
+
+  /* Register the GNU OBJC runtime include path if we are compiling  OBJC
+    with GNU-runtime.  */
+
+  if (c_dialect_objc () && !flag_next_runtime)
+    {
+      char *str;
+      /* See if our directory starts with the standard prefix.
+	 "Translate" them, i.e. replace /usr/local/lib/gcc... with
+	 IPREFIX and search them first.  */
+      if (iprefix && (len = cpp_GCC_INCLUDE_DIR_len) != 0 && !sysroot
+	  && !strncmp (fname, cpp_GCC_INCLUDE_DIR, len))
+	{
+	  str = concat (iprefix, fname + len, NULL);
+          /* FIXME: wrap the headers for C++awareness.  */
+	  add_path (str, SYSTEM, /*c++aware=*/false, false);
+	}
+
+      /* Should this directory start with the sysroot?  */
+      if (sysroot)
+	str = concat (sysroot, fname, NULL);
+      else
+	str = update_path (fname, "");
+
+      add_path (str, SYSTEM, /*c++aware=*/false, false);
+    }
+}
+
+
+/* Register all the system framework paths if STDINC is true and setup
+   the missing_header callback for subframework searching if any
+   frameworks had been registered.  */
+
+void
+darwin_register_frameworks (const char *sysroot,
+			    const char *iprefix ATTRIBUTE_UNUSED, int stdinc)
+{
+  if (stdinc)
+    {
+      size_t i;
+
+      /* Setup default search path for frameworks.  */
+      for (i=0; i<sizeof (framework_defaults)/sizeof(const char *); ++i)
+	{
+	  char *str;
+	  if (sysroot)
+	    str = concat (sysroot, xstrdup (framework_defaults [i]), NULL);
+	  else
+	    str = xstrdup (framework_defaults[i]);
+	  /* System Framework headers are cxx aware.  */
+	  add_system_framework_path (str);
+	}
+    }
+
+  if (using_frameworks)
+    cpp_get_callbacks (parse_in)->missing_header = find_subframework_header;
+}
+
+/* Search for HEADER in context dependent way.  The return value is
+   the malloced name of a header to try and open, if any, or NULL
+   otherwise.  This is called after normal header lookup processing
+   fails to find a header.  We search each file in the include stack,
+   using FUNC, starting from the most deeply nested include and
+   finishing with the main input file.  We stop searching when FUNC
+   returns nonzero.  */
+
+static const char*
+find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp)
+{
+  const char *fname = header;
+  struct cpp_buffer *b;
+  const char *n;
+
+  for (b = cpp_get_buffer (pfile);
+       b && cpp_get_file (b) && cpp_get_path (cpp_get_file (b));
+       b = cpp_get_prev (b))
+    {
+      n = find_subframework_file (fname, cpp_get_path (cpp_get_file (b)));
+      if (n)
+	{
+	  /* Logically, the place where we found the subframework is
+	     the place where we found the Framework that contains the
+	     subframework.  This is useful for tracking wether or not
+	     we are in a system header.  */
+	  *dirp = cpp_get_dir (cpp_get_file (b));
+	  return n;
+	}
+    }
+
+  return 0;
+}
+
+/* Return the value of darwin_macosx_version_min suitable for the
+   __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro,
+   so '10.4.2' becomes 1040.  The lowest digit is always zero.
+   Print a warning if the version number can't be understood.  */
+static const char *
+version_as_macro (void)
+{
+  static char result[] = "1000";
+
+  if (strncmp (darwin_macosx_version_min, "10.", 3) != 0)
+    goto fail;
+  if (! ISDIGIT (darwin_macosx_version_min[3]))
+    goto fail;
+  result[2] = darwin_macosx_version_min[3];
+  if (darwin_macosx_version_min[4] != '\0'
+      && darwin_macosx_version_min[4] != '.')
+    goto fail;
+
+  return result;
+
+ fail:
+  error ("unknown value %qs of -mmacosx-version-min",
+	 darwin_macosx_version_min);
+  return "1000";
+}
+
+/* Define additional CPP flags for Darwin.   */
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+
+void
+darwin_cpp_builtins (cpp_reader *pfile)
+{
+  builtin_define ("__MACH__");
+  builtin_define ("__APPLE__");
+
+  /* __APPLE_CC__ is defined as some old Apple include files expect it
+     to be defined and won't work if it isn't.  */
+  builtin_define_with_value ("__APPLE_CC__", "1", false);
+
+  if (darwin_constant_cfstrings)
+    builtin_define ("__CONSTANT_CFSTRINGS__");
+
+  builtin_define_with_value ("__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__",
+			     version_as_macro(), false);
+
+  /* Since we do not (at 4.6) support ObjC gc for the NeXT runtime, the
+     following will cause a syntax error if one tries to compile gc attributed
+     items.  However, without this, NeXT system headers cannot be parsed 
+     properly (on systems >= darwin 9).  */
+  if (flag_objc_gc)
+    {
+      builtin_define ("__strong=__attribute__((objc_gc(strong)))");
+      builtin_define ("__weak=__attribute__((objc_gc(weak)))");
+      builtin_define ("__OBJC_GC__");
+    }
+  else
+    {
+      builtin_define ("__strong=");
+      builtin_define ("__weak=");
+    }
+
+  if (CPP_OPTION (pfile, objc) && flag_objc_abi == 2)
+    builtin_define ("__OBJC2__");
+}
+
+/* Handle C family front-end options.  */
+
+static bool
+handle_c_option (size_t code,
+		 const char *arg,
+		 int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    default:
+      /* Unrecognized options that we said we'd handle turn into
+	 errors if not listed here.  */
+      return false;
+
+    case OPT_iframework:
+      add_system_framework_path (xstrdup (arg));
+      break;
+
+    case OPT_fapple_kext:
+      ;
+    }
+
+  /* We recognized the option.  */
+  return true;
+}
+
+/* Allow ObjC* access to CFStrings.  */
+static tree
+darwin_objc_construct_string (tree str)
+{
+  if (!darwin_constant_cfstrings)
+    {
+    /* Even though we are not using CFStrings, place our literal
+       into the cfstring_htab hash table, so that the
+       darwin_constant_cfstring_p() function will see it.  */
+      darwin_enter_string_into_cfstring_table (str);
+      /* Fall back to NSConstantString.  */
+      return NULL_TREE;
+    }
+
+  return darwin_build_constant_cfstring (str);
+}
+
+/* The string ref type is created as CFStringRef by <CFBase.h> therefore, we
+   must match for it explicitly, since it's outside the gcc code.  */
+
+static bool
+darwin_cfstring_ref_p (const_tree strp)
+{
+  tree tn;
+  if (!strp || TREE_CODE (strp) != POINTER_TYPE)
+    return false;
+
+  tn = TYPE_NAME (strp);
+  if (tn) 
+    tn = DECL_NAME (tn);
+  return (tn 
+	  && IDENTIFIER_POINTER (tn)
+	  && !strncmp (IDENTIFIER_POINTER (tn), "CFStringRef", 8));
+}
+
+/* At present the behavior of this is undefined and it does nothing.  */
+static void
+darwin_check_cfstring_format_arg (tree ARG_UNUSED (format_arg), 
+				  tree ARG_UNUSED (args_list))
+{
+}
+
+/* The extra format types we recognize.  */
+EXPORTED_CONST format_kind_info darwin_additional_format_types[] = {
+  { "CFString",   NULL,  NULL, NULL, NULL, 
+    NULL, NULL, 
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0,
+    NULL, NULL
+  }
+};
+
+
+/* Support routines to dump the class references for NeXT ABI v1, aka
+   32-bits ObjC-2.0, as top-level asms.
+   The following two functions should only be called from
+   objc/objc-next-runtime-abi-01.c.  */
+
+static void
+darwin_objc_declare_unresolved_class_reference (const char *name)
+{
+  const char *lazy_reference = ".lazy_reference\t";
+  const char *hard_reference = ".reference\t";
+  const char *reference = MACHOPIC_INDIRECT ? lazy_reference : hard_reference;
+  size_t len = strlen (reference) + strlen(name) + 2;
+  char *buf = (char *) alloca (len);
+
+  gcc_checking_assert (!strncmp (name, ".objc_class_name_", 17));
+
+  snprintf (buf, len, "%s%s", reference, name);
+  add_asm_node (build_string (strlen (buf), buf));
+}
+
+static void
+darwin_objc_declare_class_definition (const char *name)
+{
+  const char *xname = targetm.strip_name_encoding (name);
+  size_t len = strlen (xname) + 7 + 5;
+  char *buf = (char *) alloca (len);
+
+  gcc_checking_assert (!strncmp (name, ".objc_class_name_", 17)
+		       || !strncmp (name, "*.objc_category_name_", 21));
+
+  /* Mimic default_globalize_label.  */
+  snprintf (buf, len, ".globl\t%s", xname);
+  add_asm_node (build_string (strlen (buf), buf));
+
+  snprintf (buf, len, "%s = 0", xname);
+  add_asm_node (build_string (strlen (buf), buf));
+}
+
+#undef  TARGET_HANDLE_C_OPTION
+#define TARGET_HANDLE_C_OPTION handle_c_option
+
+#undef  TARGET_OBJC_CONSTRUCT_STRING_OBJECT
+#define TARGET_OBJC_CONSTRUCT_STRING_OBJECT darwin_objc_construct_string
+
+#undef  TARGET_OBJC_DECLARE_UNRESOLVED_CLASS_REFERENCE
+#define TARGET_OBJC_DECLARE_UNRESOLVED_CLASS_REFERENCE \
+	darwin_objc_declare_unresolved_class_reference
+
+#undef  TARGET_OBJC_DECLARE_CLASS_DEFINITION
+#define TARGET_OBJC_DECLARE_CLASS_DEFINITION \
+	darwin_objc_declare_class_definition
+
+#undef  TARGET_STRING_OBJECT_REF_TYPE_P
+#define TARGET_STRING_OBJECT_REF_TYPE_P darwin_cfstring_ref_p
+
+#undef TARGET_CHECK_STRING_OBJECT_FORMAT_ARG
+#define TARGET_CHECK_STRING_OBJECT_FORMAT_ARG darwin_check_cfstring_format_arg
+
+struct gcc_targetcm targetcm = TARGETCM_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/darwin-driver.c b/gcc-4.9/gcc/config/darwin-driver.c
new file mode 100644
index 000000000..8b6ae9391
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin-driver.c
@@ -0,0 +1,224 @@
+/* Additional functions for the GCC driver on Darwin native.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "gcc.h"
+#include "opts.h"
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#include <sys/sysctl.h>
+#include "xregex.h"
+
+static bool
+darwin_find_version_from_kernel (char *new_flag)
+{
+  char osversion[32];
+  size_t osversion_len = sizeof (osversion) - 1;
+  static int osversion_name[2] = { CTL_KERN, KERN_OSRELEASE };
+  int major_vers;
+  char minor_vers[6];
+  char * version_p;
+  char * version_pend;
+
+  /* Determine the version of the running OS.  If we can't, warn user,
+     and do nothing.  */
+  if (sysctl (osversion_name, ARRAY_SIZE (osversion_name), osversion,
+	      &osversion_len, NULL, 0) == -1)
+    {
+      warning (0, "sysctl for kern.osversion failed: %m");
+      return false;
+    }
+
+  /* Try to parse the first two parts of the OS version number.  Warn
+     user and return if it doesn't make sense.  */
+  if (! ISDIGIT (osversion[0]))
+    goto parse_failed;
+  major_vers = osversion[0] - '0';
+  version_p = osversion + 1;
+  if (ISDIGIT (*version_p))
+    major_vers = major_vers * 10 + (*version_p++ - '0');
+  if (major_vers > 4 + 9)
+    goto parse_failed;
+  if (*version_p++ != '.')
+    goto parse_failed;
+  version_pend = strchr(version_p, '.');
+  if (!version_pend)
+    goto parse_failed;
+  if (! ISDIGIT (*version_p))
+    goto parse_failed;
+  strncpy(minor_vers, version_p, version_pend - version_p);
+  minor_vers[version_pend - version_p] = '\0';
+  
+  /* The major kernel version number is 4 plus the second OS version
+     component.  */
+  if (major_vers - 4 <= 4)
+    /* On 10.4 and earlier, the old linker is used which does not
+       support three-component system versions.  */
+    sprintf (new_flag, "10.%d", major_vers - 4);
+  else
+    sprintf (new_flag, "10.%d.%s", major_vers - 4,
+	     minor_vers);
+
+  return true;
+
+ parse_failed:
+  warning (0, "couldn%'t understand kern.osversion %q.*s",
+	   (int) osversion_len, osversion);
+  return false;
+}
+
+#endif
+
+/* When running on a Darwin system and using that system's headers and
+   libraries, default the -mmacosx-version-min flag to be the version
+   of the system on which the compiler is running.  
+   
+   When building cross or native cross compilers, default to the OSX
+   version of the target (as provided by the most specific target header
+   included in tm.h).  This may be overidden by setting the flag explicitly
+   (or by the MACOSX_DEPLOYMENT_TARGET environment).  */
+
+static void
+darwin_default_min_version (unsigned int *decoded_options_count,
+			    struct cl_decoded_option **decoded_options)
+{
+  const unsigned int argc = *decoded_options_count;
+  struct cl_decoded_option *const argv = *decoded_options;
+  unsigned int i;
+  static char new_flag[sizeof ("10.0.0") + 6];
+
+  /* If the command-line is empty, just return.  */
+  if (argc <= 1)
+    return;
+  
+  /* Don't do this if the user specified -mmacosx-version-min= or
+     -mno-macosx-version-min.  */
+  for (i = 1; i < argc; i++)
+    if (argv[i].opt_index == OPT_mmacosx_version_min_)
+      return;
+
+  /* Retrieve the deployment target from the environment and insert
+     it as a flag.  */
+  {
+    const char * macosx_deployment_target;
+    macosx_deployment_target = getenv ("MACOSX_DEPLOYMENT_TARGET");
+    if (macosx_deployment_target
+	/* Apparently, an empty string for MACOSX_DEPLOYMENT_TARGET means
+	   "use the default".  Or, possibly "use 10.1".  We choose
+	   to ignore the environment variable, as if it was never set.  */
+	&& macosx_deployment_target[0])
+      {
+	++*decoded_options_count;
+	*decoded_options = XNEWVEC (struct cl_decoded_option,
+				    *decoded_options_count);
+	(*decoded_options)[0] = argv[0];
+	generate_option (OPT_mmacosx_version_min_, macosx_deployment_target,
+			 1, CL_DRIVER, &(*decoded_options)[1]);
+	memcpy (*decoded_options + 2, argv + 1,
+		(argc - 1) * sizeof (struct cl_decoded_option));
+	return;
+      }
+  }
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+
+ /* Try to find the version from the kernel, if we fail - we print a message 
+    and give up.  */
+ if (!darwin_find_version_from_kernel (new_flag))
+   return;
+
+#else
+
+ /* For cross-compilers, default to the target OS version. */
+
+ strncpy (new_flag, DEF_MIN_OSX_VERSION, sizeof (new_flag));
+
+#endif /* CROSS_DIRECTORY_STRUCTURE */
+
+  /* Add the new flag.  */
+  ++*decoded_options_count;
+  *decoded_options = XNEWVEC (struct cl_decoded_option,
+			      *decoded_options_count);
+  (*decoded_options)[0] = argv[0];
+  generate_option (OPT_mmacosx_version_min_, new_flag,
+		   1, CL_DRIVER, &(*decoded_options)[1]);
+  memcpy (*decoded_options + 2, argv + 1,
+	  (argc - 1) * sizeof (struct cl_decoded_option));
+  return;
+  
+}
+
+/* Translate -filelist and -framework options in *DECODED_OPTIONS
+   (size *DECODED_OPTIONS_COUNT) to use -Xlinker so that they are
+   considered to be linker inputs in the case that no other inputs are
+   specified.  Handling these options in DRIVER_SELF_SPECS does not
+   suffice because specs are too late to add linker inputs, and
+   handling them in LINK_SPEC does not suffice because the linker will
+   not be called if there are no other inputs.  When native, also
+   default the -mmacosx-version-min flag.  */
+
+void
+darwin_driver_init (unsigned int *decoded_options_count,
+		    struct cl_decoded_option **decoded_options)
+{
+  unsigned int i;
+
+  for (i = 1; i < *decoded_options_count; i++)
+    {
+      if ((*decoded_options)[i].errors & CL_ERR_MISSING_ARG)
+	continue;
+      switch ((*decoded_options)[i].opt_index)
+	{
+#if DARWIN_X86
+	case OPT_arch:
+	  if (!strcmp ((*decoded_options)[i].arg, "i386"))
+	    generate_option (OPT_m32, NULL, 1, CL_DRIVER, &(*decoded_options)[i]);
+	  else if (!strcmp ((*decoded_options)[i].arg, "x86_64"))
+	    generate_option (OPT_m64, NULL, 1, CL_DRIVER, &(*decoded_options)[i]);
+	  break;
+#endif
+
+	case OPT_filelist:
+	case OPT_framework:
+	  ++*decoded_options_count;
+	  *decoded_options = XRESIZEVEC (struct cl_decoded_option,
+					 *decoded_options,
+					 *decoded_options_count);
+	  memmove (*decoded_options + i + 2,
+		   *decoded_options + i + 1,
+		   ((*decoded_options_count - i - 2)
+		    * sizeof (struct cl_decoded_option)));
+	  generate_option (OPT_Xlinker, (*decoded_options)[i].arg, 1,
+			   CL_DRIVER, &(*decoded_options)[i + 1]);
+	  generate_option (OPT_Xlinker,
+			   (*decoded_options)[i].canonical_option[0], 1,
+			   CL_DRIVER, &(*decoded_options)[i]);
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  darwin_default_min_version (decoded_options_count, decoded_options);
+}
diff --git a/gcc-4.9/gcc/config/darwin-f.c b/gcc-4.9/gcc/config/darwin-f.c
new file mode 100644
index 000000000..736df5b08
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin-f.c
@@ -0,0 +1,60 @@
+/* Darwin support needed only by Fortran frontends.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Daniel Franke.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Provide stubs for the hooks defined by darwin.h
+     TARGET_EXTRA_PRE_INCLUDES, TARGET_EXTRA_INCLUDES
+
+   As both, gcc and gfortran link in incpath.o, we can not
+   conditionally undefine said hooks if fortran is build.
+   However, we can define do-nothing stubs of said hooks as
+   we are not interested in objc include files in Fortran.
+
+   The hooks original purpose (see also darwin-c.c):
+    * darwin_register_objc_includes
+      Register the GNU objective-C runtime include path if STDINC.
+
+    * darwin_register_frameworks
+      Register all the system framework paths if STDINC is true and setup
+      the missing_header callback for subframework searching if any
+      frameworks had been registered.  */
+
+
+#include "ansidecl.h"
+
+/* Prototypes for functions below to avoid a lengthy list of includes
+   to achieve the same.  */
+void darwin_register_objc_includes (const char *, const char *, int);
+void darwin_register_frameworks (const char *, const char *, int);
+
+
+void
+darwin_register_objc_includes (const char *sysroot ATTRIBUTE_UNUSED,
+			       const char *iprefix ATTRIBUTE_UNUSED,
+			       int stdinc ATTRIBUTE_UNUSED)
+{
+}
+
+void
+darwin_register_frameworks (const char *sysroot ATTRIBUTE_UNUSED,
+			    const char *iprefix ATTRIBUTE_UNUSED,
+			    int stdinc ATTRIBUTE_UNUSED)
+{
+}
diff --git a/gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def b/gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def
new file mode 100644
index 000000000..dc55bb674
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def
@@ -0,0 +1,113 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+PATCH_BUILTIN (BUILT_IN_ACOSHL)
+PATCH_BUILTIN (BUILT_IN_ACOSL)
+PATCH_BUILTIN (BUILT_IN_ASINHL)
+PATCH_BUILTIN (BUILT_IN_ASINL)
+PATCH_BUILTIN (BUILT_IN_ATAN2L)
+PATCH_BUILTIN (BUILT_IN_ATANHL)
+PATCH_BUILTIN (BUILT_IN_ATANL)
+PATCH_BUILTIN (BUILT_IN_CABSL)
+PATCH_BUILTIN (BUILT_IN_CACOSHL)
+PATCH_BUILTIN (BUILT_IN_CACOSL)
+PATCH_BUILTIN (BUILT_IN_CARGL)
+PATCH_BUILTIN (BUILT_IN_CASINHL)
+PATCH_BUILTIN (BUILT_IN_CASINL)
+PATCH_BUILTIN (BUILT_IN_CATANHL)
+PATCH_BUILTIN (BUILT_IN_CATANL)
+PATCH_BUILTIN (BUILT_IN_CBRTL)
+PATCH_BUILTIN (BUILT_IN_CCOSHL)
+PATCH_BUILTIN (BUILT_IN_CCOSL)
+PATCH_BUILTIN (BUILT_IN_CEILL)
+PATCH_BUILTIN (BUILT_IN_CEXPL)
+PATCH_BUILTIN (BUILT_IN_CIMAGL)
+PATCH_BUILTIN (BUILT_IN_CLOGL)
+PATCH_BUILTIN (BUILT_IN_CONJL)
+PATCH_BUILTIN (BUILT_IN_COPYSIGNL)
+PATCH_BUILTIN (BUILT_IN_COSHL)
+PATCH_BUILTIN (BUILT_IN_COSL)
+PATCH_BUILTIN (BUILT_IN_CPOWL)
+PATCH_BUILTIN (BUILT_IN_CPROJL)
+PATCH_BUILTIN (BUILT_IN_CREALL)
+PATCH_BUILTIN (BUILT_IN_CSINHL)
+PATCH_BUILTIN (BUILT_IN_CSINL)
+PATCH_BUILTIN (BUILT_IN_CSQRTL)
+PATCH_BUILTIN (BUILT_IN_CTANHL)
+PATCH_BUILTIN (BUILT_IN_CTANL)
+PATCH_BUILTIN (BUILT_IN_ERFCL)
+PATCH_BUILTIN (BUILT_IN_ERFL)
+PATCH_BUILTIN (BUILT_IN_EXP2L)
+PATCH_BUILTIN (BUILT_IN_EXPL)
+PATCH_BUILTIN (BUILT_IN_EXPM1L)
+PATCH_BUILTIN (BUILT_IN_FABSL)
+PATCH_BUILTIN (BUILT_IN_FDIML)
+PATCH_BUILTIN (BUILT_IN_FLOORL)
+PATCH_BUILTIN (BUILT_IN_FMAL)
+PATCH_BUILTIN (BUILT_IN_FMAXL)
+PATCH_BUILTIN (BUILT_IN_FMINL)
+PATCH_BUILTIN (BUILT_IN_FMODL)
+PATCH_BUILTIN (BUILT_IN_FREXPL)
+PATCH_BUILTIN (BUILT_IN_HYPOTL)
+PATCH_BUILTIN (BUILT_IN_ILOGBL)
+PATCH_BUILTIN (BUILT_IN_LDEXPL)
+PATCH_BUILTIN (BUILT_IN_LGAMMAL)
+PATCH_BUILTIN (BUILT_IN_LLRINTL)
+PATCH_BUILTIN (BUILT_IN_LLROUNDL)
+PATCH_BUILTIN (BUILT_IN_LOG10L)
+PATCH_BUILTIN (BUILT_IN_LOG1PL)
+PATCH_BUILTIN (BUILT_IN_LOG2L)
+PATCH_BUILTIN (BUILT_IN_LOGBL)
+PATCH_BUILTIN (BUILT_IN_LOGL)
+PATCH_BUILTIN (BUILT_IN_LRINTL)
+PATCH_BUILTIN (BUILT_IN_LROUNDL)
+PATCH_BUILTIN (BUILT_IN_MODFL)
+PATCH_BUILTIN (BUILT_IN_NANL)
+PATCH_BUILTIN (BUILT_IN_NEARBYINTL)
+PATCH_BUILTIN (BUILT_IN_NEXTAFTERL)
+PATCH_BUILTIN (BUILT_IN_NEXTTOWARDL)
+PATCH_BUILTIN (BUILT_IN_POWL)
+PATCH_BUILTIN (BUILT_IN_REMAINDERL)
+PATCH_BUILTIN (BUILT_IN_REMQUOL)
+PATCH_BUILTIN (BUILT_IN_RINTL)
+PATCH_BUILTIN (BUILT_IN_ROUNDL)
+PATCH_BUILTIN (BUILT_IN_SCALBLNL)
+PATCH_BUILTIN (BUILT_IN_SCALBNL)
+PATCH_BUILTIN (BUILT_IN_SINHL)
+PATCH_BUILTIN (BUILT_IN_SINL)
+PATCH_BUILTIN (BUILT_IN_SQRTL)
+PATCH_BUILTIN (BUILT_IN_TANHL)
+PATCH_BUILTIN (BUILT_IN_TANL)
+PATCH_BUILTIN (BUILT_IN_TGAMMAL)
+PATCH_BUILTIN (BUILT_IN_TRUNCL)
+
+PATCH_BUILTIN_NO64 (BUILT_IN_VFPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VFSCANF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSCANF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSNPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSSCANF)
+
+PATCH_BUILTIN_VARIADIC (BUILT_IN_FPRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_FSCANF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_PRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SCANF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SNPRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SPRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SSCANF)
diff --git a/gcc-4.9/gcc/config/darwin-protos.h b/gcc-4.9/gcc/config/darwin-protos.h
new file mode 100644
index 000000000..20974c19e
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin-protos.h
@@ -0,0 +1,127 @@
+/* Prototypes.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void darwin_init_sections (void);
+extern int name_needs_quotes (const char *);
+
+extern void machopic_validate_stub_or_non_lazy_ptr (const char *);
+
+extern void machopic_output_function_base_name (FILE *);
+extern const char *machopic_indirection_name (rtx, bool);
+extern const char *machopic_mcount_stub_name (void);
+extern bool machopic_should_output_picbase_label (void);
+extern const char *machopic_get_function_picbase (void);
+
+#ifdef RTX_CODE
+
+extern rtx machopic_gen_offset (rtx);
+extern int machopic_operand_p (rtx);
+extern int machopic_symbol_defined_p (rtx sym_ref);
+extern enum machopic_addr_class machopic_classify_symbol (rtx);
+
+extern rtx machopic_indirect_data_reference (rtx, rtx);
+extern rtx machopic_indirect_call_target (rtx);
+extern rtx machopic_legitimize_pic_address (rtx, enum machine_mode, rtx);
+
+extern void machopic_asm_out_constructor (rtx, int);
+extern void machopic_asm_out_destructor (rtx, int);
+extern section *machopic_select_rtx_section (enum machine_mode, rtx,
+					     unsigned HOST_WIDE_INT);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+
+extern void machopic_define_symbol (rtx);
+extern void darwin_encode_section_info (tree, rtx, int);
+extern void darwin_set_default_type_attributes (tree);
+
+#endif /* TREE_CODE */
+
+extern void machopic_finish (FILE *);
+
+extern int machopic_reloc_rw_mask (void);
+extern section *machopic_select_section (tree, int, unsigned HOST_WIDE_INT);
+
+extern section *darwin_function_section (tree, enum node_frequency, bool, bool); 
+extern section *darwin_tm_clone_table_section (void);
+extern void darwin_function_switched_text_sections (FILE *, tree, bool);
+
+extern void darwin_unique_section (tree decl, int reloc);
+extern void darwin_asm_named_section (const char *, unsigned int, tree);
+extern void darwin_non_lazy_pcrel (FILE *, rtx);
+
+extern void darwin_emit_unwind_label (FILE *, tree, int, int);
+extern void darwin_emit_except_table_label (FILE *);
+
+extern void darwin_pragma_ignore (struct cpp_reader *);
+extern void darwin_pragma_options (struct cpp_reader *);
+extern void darwin_pragma_unused (struct cpp_reader *);
+extern void darwin_pragma_ms_struct (struct cpp_reader *);
+
+extern void darwin_file_start (void);
+extern void darwin_file_end (void);
+
+extern void darwin_asm_lto_start (void);
+extern void darwin_asm_lto_end (void);
+
+extern void darwin_mark_decl_preserved (const char *);
+
+extern tree darwin_handle_kext_attribute (tree *, tree, tree, int, bool *);
+extern tree darwin_handle_weak_import_attribute (tree *node, tree name,
+						 tree args, int flags,
+						 bool * no_add_attrs);
+extern void machopic_output_stub (FILE *, const char *, const char *);
+extern void darwin_globalize_label (FILE *, const char *);
+extern void darwin_assemble_visibility (tree, int);
+
+extern void darwin_asm_output_dwarf_delta (FILE *, int, const char *,
+					   const char *);
+extern void darwin_asm_output_dwarf_offset (FILE *, int, const char *,
+					    section *);
+
+extern void darwin_asm_declare_object_name (FILE *, const char *, tree);
+extern void darwin_asm_declare_constant_name (FILE *, const char *,
+					      const_tree, HOST_WIDE_INT);
+
+extern void darwin_output_aligned_bss (FILE *, tree, const char *,
+				       unsigned HOST_WIDE_INT, unsigned int);
+
+extern void darwin_asm_output_aligned_decl_local (FILE *, tree, const char *, 
+						  unsigned HOST_WIDE_INT, 
+						  unsigned int);
+extern void darwin_asm_output_aligned_decl_common (FILE *, tree, const char *,
+						   unsigned HOST_WIDE_INT, 
+						   unsigned int);
+
+extern bool darwin_binds_local_p (const_tree);
+extern void darwin_cpp_builtins (struct cpp_reader *);
+
+extern tree darwin_init_cfstring_builtins (unsigned);
+extern tree darwin_fold_builtin (tree, int, tree *, bool);
+extern bool darwin_cfstring_p (tree);
+extern tree darwin_build_constant_cfstring (tree);
+extern void darwin_enter_string_into_cfstring_table (tree);
+
+extern void darwin_asm_output_anchor (rtx symbol);
+extern bool darwin_use_anchors_for_symbol_p (const_rtx symbol);
+extern bool darwin_kextabi_p (void);
+extern void darwin_override_options (void);
+extern void darwin_patch_builtins (void);
+extern void darwin_rename_builtins (void);
+extern bool darwin_libc_has_function (enum function_class fn_class);
diff --git a/gcc-4.9/gcc/config/darwin-sections.def b/gcc-4.9/gcc/config/darwin-sections.def
new file mode 100644
index 000000000..23474e125
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin-sections.def
@@ -0,0 +1,195 @@
+/* Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Since Darwin's ld will not allow zero-sized objects, and gcc wants them,
+   we emit one byte (in darwin.c) when such an object is encountered.
+
+   This messes up section anchoring because the emitted byte is not counted
+   outside the port.  To cope with this, we set aside sections for zero-sized
+   objects and disallow those sections from  participating in section anchors
+   ("zobj_" sections, below).
+   
+   Items that might be coalesced by the linker are prevented from participating,
+   (and those in mergeable sections are disallowed in varasm.c).  */
+
+/* The .text section is generated in varasm.c  */
+DEF_SECTION (text_coal_section, SECTION_CODE|SECTION_NO_ANCHOR,
+	     ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", 0)
+
+DEF_SECTION (text_hot_section, SECTION_CODE,
+	     ".section __TEXT,__text_hot,regular,pure_instructions", 0)
+DEF_SECTION (text_cold_section, SECTION_CODE,
+	     ".section __TEXT,__text_cold,regular,pure_instructions", 0)
+DEF_SECTION (text_startup_section, SECTION_CODE,
+	     ".section __TEXT,__text_startup,regular,pure_instructions", 0)
+DEF_SECTION (text_exit_section, SECTION_CODE,
+	     ".section __TEXT,__text_exit,regular,pure_instructions", 0)
+
+DEF_SECTION (text_hot_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_hot_coal,coalesced,pure_instructions", 0)
+DEF_SECTION (text_cold_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_cold_coal,coalesced,pure_instructions", 0)
+DEF_SECTION (text_startup_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_stt_coal,coalesced,pure_instructions", 0)
+DEF_SECTION (text_exit_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_exit_coal,coalesced,pure_instructions", 0)
+
+/* const */
+DEF_SECTION (const_section, 0, ".const", 0)
+DEF_SECTION (const_coal_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__const_coal,coalesced", 0)
+/* Place to put zero-sized to avoid issues with section anchors.  */
+DEF_SECTION (zobj_const_section, SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_const", 0)
+
+/* Write-able data.  '.data'  handled in varasm.c  */
+DEF_SECTION (static_data_section, SECTION_WRITE, ".static_data", 0)
+DEF_SECTION (data_coal_section, SECTION_WRITE|SECTION_NO_ANCHOR,
+	     ".section __DATA,__datacoal_nt,coalesced", 0)
+/* Place to put zero-sized to avoid issues with section anchors.  */
+DEF_SECTION (zobj_data_section, SECTION_WRITE|SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_data", 0)
+
+/* BSS - .lcomm / .zerofill __DATA,__bss sections cannot be switched to
+   explicitly (will create an assembler error).  */
+DEF_SECTION (zobj_bss_section, SECTION_WRITE|SECTION_BSS|SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_bss", 0)
+
+/* const data */
+DEF_SECTION (const_data_section, 0, ".const_data", 0)
+DEF_SECTION (const_data_coal_section, SECTION_NO_ANCHOR,
+	     ".section __DATA,__const_coal,coalesced", 0)
+/* Place to put zero-sized to avoid issues with section anchors.  */
+DEF_SECTION (zobj_const_data_section, SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_cnst_data", 0)
+
+/* Strings and other literals.  */
+DEF_SECTION (cstring_section, SECTION_MERGE | SECTION_STRINGS, ".cstring", 0)
+DEF_SECTION (literal4_section, SECTION_MERGE, ".literal4", 0)
+DEF_SECTION (literal8_section, SECTION_MERGE, ".literal8", 0)
+DEF_SECTION (literal16_section, SECTION_MERGE, ".literal16", 0)
+/* Unlike constant NSStrings, constant CFStrings do not live  in the
+   __OBJC segment since they may also occur in pure C  or C++ programs.  */
+DEF_SECTION (cfstring_constant_object_section, 0, 
+	     ".section __DATA, __cfstring", 0)
+
+/* Module init, term, constructors & destructors.  */
+DEF_SECTION (mod_init_section, 0, ".mod_init_func", 0)
+DEF_SECTION (mod_term_section, 0, ".mod_term_func", 0)
+DEF_SECTION (constructor_section, 0, ".constructor", 0)
+DEF_SECTION (destructor_section, 0, ".destructor", 0)
+
+/* Objective-C ABI=0 (Original version) sections.  */
+DEF_SECTION (objc_class_section, 0, ".objc_class", 1)
+DEF_SECTION (objc_meta_class_section, 0, ".objc_meta_class", 1)
+DEF_SECTION (objc_category_section, 0, ".objc_category", 1)
+DEF_SECTION (objc_class_vars_section, 0, ".objc_class_vars", 1)
+DEF_SECTION (objc_instance_vars_section, 0, ".objc_instance_vars", 1)
+DEF_SECTION (objc_cls_meth_section, 0, ".objc_cls_meth", 1)
+DEF_SECTION (objc_inst_meth_section, 0, ".objc_inst_meth", 1)
+DEF_SECTION (objc_cat_cls_meth_section, 0, ".objc_cat_cls_meth", 1)
+DEF_SECTION (objc_cat_inst_meth_section, 0, ".objc_cat_inst_meth", 1)
+DEF_SECTION (objc_selector_refs_section, SECTION_MERGE, ".objc_message_refs", 1)
+DEF_SECTION (objc_selector_fixup_section, 0,
+	    ".section __OBJC, __sel_fixup, regular, no_dead_strip", 1)
+DEF_SECTION (objc_symbols_section, 0, ".objc_symbols", 1)
+DEF_SECTION (objc_module_info_section, 0, ".objc_module_info", 1)
+DEF_SECTION (objc_protocol_section, 0, ".objc_protocol", 1)
+DEF_SECTION (objc_string_object_section, 0, ".objc_string_object", 1)
+DEF_SECTION (objc_constant_string_object_section, 0,
+	     ".section __OBJC, __cstring_object, regular, no_dead_strip", 0)
+
+/* Fix-and-Continue image marker.  */
+DEF_SECTION (objc_image_info_section, 0,
+	     ".section __OBJC, __image_info, regular, no_dead_strip", 1)
+DEF_SECTION (objc_class_names_section, 0, ".objc_class_names", 1)
+DEF_SECTION (objc_meth_var_names_section, 0, ".objc_meth_var_names", 1)
+DEF_SECTION (objc_meth_var_types_section, 0, ".objc_meth_var_types", 1)
+DEF_SECTION (objc_cls_refs_section, SECTION_MERGE, ".objc_cls_refs", 1)
+
+/* Stubs and symbol indirection sections.  */
+/* lazy symbol pointers.  */
+DEF_SECTION (machopic_lazy_symbol_ptr_section, SECTION_NO_ANCHOR, 
+	     ".lazy_symbol_pointer", 0)
+DEF_SECTION (machopic_lazy_symbol_ptr2_section,	SECTION_NO_ANCHOR,
+	     ".section __DATA, __la_sym_ptr2,lazy_symbol_pointers", 0)
+DEF_SECTION (machopic_lazy_symbol_ptr3_section, SECTION_NO_ANCHOR,
+	     ".section __DATA, __la_sym_ptr3,lazy_symbol_pointers", 0)
+/* non-lazy symbol pointers.  */
+DEF_SECTION (machopic_nl_symbol_ptr_section, SECTION_NO_ANCHOR,
+	     MACHOPIC_NL_SYMBOL_PTR_SECTION, 0)
+/* Symbol stubs.  */
+DEF_SECTION (machopic_symbol_stub_section, SECTION_NO_ANCHOR, 
+	     ".symbol_stub", 0)
+DEF_SECTION (machopic_symbol_stub1_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__symbol_stub1,symbol_stubs,"
+	     "pure_instructions,16", 0)
+/* PIC symbol stubs.  */
+DEF_SECTION (machopic_picsymbol_stub_section, SECTION_NO_ANCHOR, 
+	     ".picsymbol_stub", 0)
+DEF_SECTION (machopic_picsymbol_stub1_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__picsymbolstub1,symbol_stubs,"
+	     "pure_instructions,32", 0)
+DEF_SECTION (machopic_picsymbol_stub2_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__picsymbolstub2,symbol_stubs,pure_instructions,25", 0)
+DEF_SECTION (machopic_picsymbol_stub3_section, SECTION_NO_ANCHOR,
+	     ".section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5", 0)
+
+/* Exception-related.  */
+DEF_SECTION (darwin_exception_section, SECTION_NO_ANCHOR,
+	     ".section __DATA,__gcc_except_tab", 0)
+DEF_SECTION (darwin_eh_frame_section, SECTION_NO_ANCHOR,
+	     ".section " EH_FRAME_SECTION_NAME ",__eh_frame"
+	     EH_FRAME_SECTION_ATTR, 0)
+
+/* Sections for ObjC ABI=1 (ObjC 'V1' extensions) */
+DEF_SECTION (objc1_class_ext_section, 0,
+	    ".section __OBJC, __class_ext, regular, no_dead_strip", 1)
+DEF_SECTION (objc1_prop_list_section, 0,
+	    ".section __OBJC, __property, regular, no_dead_strip", 1)
+DEF_SECTION (objc1_protocol_ext_section, 0,
+	    ".section __OBJC, __protocol_ext, regular, no_dead_strip", 1)
+
+/* Sections for ObjC ABI=2 (m64).  */
+DEF_SECTION (objc2_message_refs_section, 0,
+	     ".section __DATA, __objc_msgrefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_classdefs_section, 0, ".section __DATA, __objc_data", 1)
+DEF_SECTION (objc2_metadata_section, 0, ".section __DATA, __objc_const", 1)
+
+DEF_SECTION (objc2_classrefs_section, 0, 
+             ".section __DATA, __objc_classrefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_classlist_section, 0,
+	     ".section __DATA, __objc_classlist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_categorylist_section, 0,
+	     ".section __DATA, __objc_catlist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_selector_refs_section, 0,
+	     ".section __DATA, __objc_selrefs, literal_pointers, no_dead_strip", 1)
+DEF_SECTION (objc2_nonlazy_class_section, 0,
+	     ".section __DATA, __objc_nlclslist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_nonlazy_category_section, 0,
+	     ".section __DATA, __objc_nlcatlist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_protocollist_section, 0,
+	     ".section __DATA, __objc_protolist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_protocolrefs_section, 0,
+	     ".section __DATA, __objc_protorefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_super_classrefs_section, 0,
+	     ".section __DATA, __objc_superrefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_image_info_section, 0,
+	     ".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_constant_string_object_section, 0,
+	     ".section __DATA, __objc_stringobj, regular, no_dead_strip", 1)
diff --git a/gcc-4.9/gcc/config/darwin.c b/gcc-4.9/gcc/config/darwin.c
new file mode 100644
index 000000000..adf370d4d
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin.c
@@ -0,0 +1,3663 @@
+/* Functions for generic Darwin as target machine for GNU C compiler.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "expr.h"
+#include "reload.h"
+#include "function.h"
+#include "ggc.h"
+#include "langhooks.h"
+#include "target.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "hashtab.h"
+#include "df.h"
+#include "debug.h"
+#include "obstack.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "lto-streamer.h"
+
+/* Darwin supports a feature called fix-and-continue, which is used
+   for rapid turn around debugging.  When code is compiled with the
+   -mfix-and-continue flag, two changes are made to the generated code
+   that allow the system to do things that it would normally not be
+   able to do easily.  These changes allow gdb to load in
+   recompilation of a translation unit that has been changed into a
+   running program and replace existing functions and methods of that
+   translation unit with versions of those functions and methods
+   from the newly compiled translation unit.  The new functions access
+   the existing static symbols from the old translation unit, if the
+   symbol existed in the unit to be replaced, and from the new
+   translation unit, otherwise.
+
+   The changes are to insert 5 nops at the beginning of all functions
+   and to use indirection to get at static symbols.  The 5 nops
+   are required by consumers of the generated code.  Currently, gdb
+   uses this to patch in a jump to the overriding function, this
+   allows all uses of the old name to forward to the replacement,
+   including existing function pointers and virtual methods.  See
+   rs6000_emit_prologue for the code that handles the nop insertions.
+
+   The added indirection allows gdb to redirect accesses to static
+   symbols from the newly loaded translation unit to the existing
+   symbol, if any.  @code{static} symbols are special and are handled by
+   setting the second word in the .non_lazy_symbol_pointer data
+   structure to symbol.  See indirect_data for the code that handles
+   the extra indirection, and machopic_output_indirection and its use
+   of MACHO_SYMBOL_STATIC for the code that handles @code{static}
+   symbol indirection.  */
+
+/* For darwin >= 9  (OSX 10.5) the linker is capable of making the necessary
+   branch islands and we no longer need to emit darwin stubs.
+   However, if we are generating code for earlier systems (or for use in the 
+   kernel) the stubs might still be required, and this will be set true.  */
+int darwin_emit_branch_islands = false;
+
+typedef struct GTY(()) cdtor_record {
+  rtx symbol;
+  int priority;		/* [con/de]structor priority */
+  int position;		/* original position */
+} cdtor_record;
+
+static GTY(()) vec<cdtor_record, va_gc> *ctors = NULL;
+static GTY(()) vec<cdtor_record, va_gc> *dtors = NULL;
+
+/* A flag to determine whether we are running c++ or obj-c++.  This has to be
+   settable from non-c-family contexts too (i.e. we can't use the c_dialect_
+   functions).  */
+int darwin_running_cxx;
+
+/* Some code-gen now depends on OS major version numbers (at least).  */
+int generating_for_darwin_version ;
+
+/* Section names.  */
+section * darwin_sections[NUM_DARWIN_SECTIONS];
+
+/* While we transition to using in-tests instead of ifdef'd code.  */
+#ifndef HAVE_lo_sum
+#define HAVE_lo_sum 0
+#define gen_macho_high(a,b) (a)
+#define gen_macho_low(a,b,c) (a)
+#endif
+
+/* True if we're setting __attribute__ ((ms_struct)).  */
+int darwin_ms_struct = false;
+
+/* Earlier versions of Darwin as do not recognize an alignment field in 
+   .comm directives, this should be set for versions that allow it.  */
+int emit_aligned_common = false;
+
+/* A get_unnamed_section callback used to switch to an ObjC section.
+   DIRECTIVE is as for output_section_asm_op.  */
+
+static void
+output_objc_section_asm_op (const void *directive)
+{
+  static bool been_here = false;
+
+  /* The NeXT ObjC Runtime requires these sections to be present and in 
+     order in the object.  The code below implements this by emitting 
+     a section header for each ObjC section the first time that an ObjC
+     section is requested.  */
+  if (! been_here)
+    {
+      section *saved_in_section = in_section;
+      static const enum darwin_section_enum tomark[] =
+	{
+	  /* written, cold -> hot */
+	  objc_cat_cls_meth_section,
+	  objc_cat_inst_meth_section,
+	  objc_string_object_section,
+	  objc_constant_string_object_section,
+	  objc_selector_refs_section,
+	  objc_selector_fixup_section,
+	  objc_cls_refs_section,
+	  objc_class_section,
+	  objc_meta_class_section,
+	  /* shared, hot -> cold */
+	  objc_cls_meth_section,
+	  objc_inst_meth_section,
+	  objc_protocol_section,
+	  objc_class_names_section,
+	  objc_meth_var_types_section,
+	  objc_meth_var_names_section,
+	  objc_category_section,
+	  objc_class_vars_section,
+	  objc_instance_vars_section,
+	  objc_module_info_section,
+	  objc_symbols_section,
+	};
+      /* ABI=1 */
+      static const enum darwin_section_enum tomarkv1[] =
+	{
+	  objc1_protocol_ext_section,
+	  objc1_class_ext_section,
+	  objc1_prop_list_section
+	} ;
+      /* ABI=2 */
+      static const enum darwin_section_enum tomarkv2[] =
+	{
+	  objc2_message_refs_section,
+	  objc2_classdefs_section,
+	  objc2_metadata_section,
+	  objc2_classrefs_section,
+	  objc2_classlist_section,
+	  objc2_categorylist_section,
+	  objc2_selector_refs_section,
+	  objc2_nonlazy_class_section,
+	  objc2_nonlazy_category_section,
+	  objc2_protocollist_section,
+	  objc2_protocolrefs_section,
+	  objc2_super_classrefs_section,
+	  objc2_image_info_section,
+	  objc2_constant_string_object_section
+	} ;
+      size_t i;
+
+      been_here = true;
+      if (flag_objc_abi < 2)
+	{
+	  for (i = 0; i < ARRAY_SIZE (tomark); i++)
+	    switch_to_section (darwin_sections[tomark[i]]);
+	  if (flag_objc_abi == 1)
+	    for (i = 0; i < ARRAY_SIZE (tomarkv1); i++)
+	      switch_to_section (darwin_sections[tomarkv1[i]]);
+	}
+      else
+	for (i = 0; i < ARRAY_SIZE (tomarkv2); i++)
+	  switch_to_section (darwin_sections[tomarkv2[i]]);
+      /* Make sure we don't get varasm.c out of sync with us.  */
+      switch_to_section (saved_in_section);
+    }
+  output_section_asm_op (directive);
+}
+
+
+/* Private flag applied to disable section-anchors in a particular section.  */
+#define SECTION_NO_ANCHOR SECTION_MACH_DEP
+
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+void
+darwin_init_sections (void)
+{
+#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC)		\
+  darwin_sections[NAME] =					\
+    get_unnamed_section (FLAGS, (OBJC				\
+				 ? output_objc_section_asm_op	\
+				 : output_section_asm_op),	\
+			 "\t" DIRECTIVE);
+#include "config/darwin-sections.def"
+#undef DEF_SECTION
+
+  readonly_data_section = darwin_sections[const_section];
+  exception_section = darwin_sections[darwin_exception_section];
+  eh_frame_section = darwin_sections[darwin_eh_frame_section];
+}
+
+int
+name_needs_quotes (const char *name)
+{
+  int c;
+  while ((c = *name++) != '\0')
+    if (! ISIDNUM (c) 
+	  && c != '.' && c != '$' && c != '_' )
+      return 1;
+  return 0;
+}
+
+/* Return true if SYM_REF can be used without an indirection.  */
+int
+machopic_symbol_defined_p (rtx sym_ref)
+{
+  if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_DEFINED)
+    return true;
+
+  /* If a symbol references local and is not an extern to this
+     file, then the symbol might be able to declared as defined.  */
+  if (SYMBOL_REF_LOCAL_P (sym_ref) && ! SYMBOL_REF_EXTERNAL_P (sym_ref))
+    {
+      /* If the symbol references a variable and the variable is a
+	 common symbol, then this symbol is not defined.  */
+      if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_VARIABLE)
+	{
+	  tree decl = SYMBOL_REF_DECL (sym_ref);
+	  if (!decl)
+	    return true;
+	  if (DECL_COMMON (decl))
+	    return false;
+	}
+      return true;
+    }
+  return false;
+}
+
+/* This module assumes that (const (symbol_ref "foo")) is a legal pic
+   reference, which will not be changed.  */
+
+enum machopic_addr_class
+machopic_classify_symbol (rtx sym_ref)
+{
+  bool function_p;
+
+  function_p = SYMBOL_REF_FUNCTION_P (sym_ref);
+  if (machopic_symbol_defined_p (sym_ref))
+    return (function_p
+	    ? MACHOPIC_DEFINED_FUNCTION : MACHOPIC_DEFINED_DATA);
+  else
+    return (function_p
+	    ? MACHOPIC_UNDEFINED_FUNCTION : MACHOPIC_UNDEFINED_DATA);
+}
+
+#ifndef TARGET_FIX_AND_CONTINUE
+#define TARGET_FIX_AND_CONTINUE 0
+#endif
+
+/* Indicate when fix-and-continue style code generation is being used
+   and when a reference to data should be indirected so that it can be
+   rebound in a new translation unit to reference the original instance
+   of that data.  Symbol names that are for code generation local to
+   the translation unit are bound to the new translation unit;
+   currently this means symbols that begin with L or _OBJC_;
+   otherwise, we indicate that an indirect reference should be made to
+   permit the runtime to rebind new instances of the translation unit
+   to the original instance of the data.  */
+
+static int
+indirect_data (rtx sym_ref)
+{
+  int lprefix;
+  const char *name;
+
+  /* If we aren't generating fix-and-continue code, don't do anything
+     special.  */
+  if (TARGET_FIX_AND_CONTINUE == 0)
+    return 0;
+
+  /* Otherwise, all symbol except symbols that begin with L or _OBJC_
+     are indirected.  Symbols that begin with L and _OBJC_ are always
+     bound to the current translation unit as they are used for
+     generated local data of the translation unit.  */
+
+  name = XSTR (sym_ref, 0);
+
+  lprefix = (((name[0] == '*' || name[0] == '&')
+              && (name[1] == 'L' || (name[1] == '"' && name[2] == 'L')))
+             || (strncmp (name, "_OBJC_", 6) == 0));
+
+  return ! lprefix;
+}
+
+static int
+machopic_data_defined_p (rtx sym_ref)
+{
+  if (indirect_data (sym_ref))
+    return 0;
+
+  switch (machopic_classify_symbol (sym_ref))
+    {
+    case MACHOPIC_DEFINED_DATA:
+    case MACHOPIC_DEFINED_FUNCTION:
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+void
+machopic_define_symbol (rtx mem)
+{
+  rtx sym_ref;
+
+  gcc_assert (GET_CODE (mem) == MEM);
+  sym_ref = XEXP (mem, 0);
+  SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED;
+}
+
+/* Return either ORIG or:
+
+     (const:P (unspec:P [ORIG] UNSPEC_MACHOPIC_OFFSET))
+
+   depending on MACHO_DYNAMIC_NO_PIC_P.  */
+rtx
+machopic_gen_offset (rtx orig)
+{
+  if (MACHO_DYNAMIC_NO_PIC_P)
+    return orig;
+  else
+    {
+      /* Play games to avoid marking the function as needing pic if we
+	 are being called as part of the cost-estimation process.  */
+      if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+	crtl->uses_pic_offset_table = 1;
+      orig = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),
+			     UNSPEC_MACHOPIC_OFFSET);
+      return gen_rtx_CONST (Pmode, orig);
+    }
+}
+
+static GTY(()) const char * function_base_func_name;
+static GTY(()) int current_pic_label_num;
+static GTY(()) int emitted_pic_label_num;
+
+static void
+update_pic_label_number_if_needed (void)
+{
+  const char *current_name;
+
+  /* When we are generating _get_pc thunks within stubs, there is no current
+     function.  */
+  if (current_function_decl)
+    {
+      current_name =
+	IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl));
+      if (function_base_func_name != current_name)
+	{
+	  ++current_pic_label_num;
+	  function_base_func_name = current_name;
+	}
+    }
+  else
+    {
+      ++current_pic_label_num;
+      function_base_func_name = "L_machopic_stub_dummy";
+    }
+}
+
+void
+machopic_output_function_base_name (FILE *file)
+{
+  /* If dynamic-no-pic is on, we should not get here.  */
+  gcc_assert (!MACHO_DYNAMIC_NO_PIC_P);
+
+  update_pic_label_number_if_needed ();
+  fprintf (file, "L%d$pb", current_pic_label_num);
+}
+
+char curr_picbasename[32];
+
+const char *
+machopic_get_function_picbase (void)
+{
+  /* If dynamic-no-pic is on, we should not get here.  */
+  gcc_assert (!MACHO_DYNAMIC_NO_PIC_P);
+
+  update_pic_label_number_if_needed ();
+  snprintf (curr_picbasename, 32, "L%d$pb", current_pic_label_num);
+  return (const char *) curr_picbasename;
+}
+
+bool
+machopic_should_output_picbase_label (void)
+{
+  update_pic_label_number_if_needed ();
+
+  if (current_pic_label_num == emitted_pic_label_num)
+    return false;
+
+  emitted_pic_label_num = current_pic_label_num;
+  return true;
+}
+
+/* The suffix attached to non-lazy pointer symbols.  */
+#define NON_LAZY_POINTER_SUFFIX "$non_lazy_ptr"
+/* The suffix attached to stub symbols.  */
+#define STUB_SUFFIX "$stub"
+
+typedef struct GTY (()) machopic_indirection
+{
+  /* The SYMBOL_REF for the entity referenced.  */
+  rtx symbol;
+  /* The name of the stub or non-lazy pointer.  */
+  const char * ptr_name;
+  /* True iff this entry is for a stub (as opposed to a non-lazy
+     pointer).  */
+  bool stub_p;
+  /* True iff this stub or pointer pointer has been referenced.  */
+  bool used;
+} machopic_indirection;
+
+/* A table mapping stub names and non-lazy pointer names to
+   SYMBOL_REFs for the stubbed-to and pointed-to entities.  */
+
+static GTY ((param_is (struct machopic_indirection))) htab_t
+  machopic_indirections;
+
+/* Return a hash value for a SLOT in the indirections hash table.  */
+
+static hashval_t
+machopic_indirection_hash (const void *slot)
+{
+  const machopic_indirection *p = (const machopic_indirection *) slot;
+  return htab_hash_string (p->ptr_name);
+}
+
+/* Returns true if the KEY is the same as that associated with
+   SLOT.  */
+
+static int
+machopic_indirection_eq (const void *slot, const void *key)
+{
+  return strcmp (((const machopic_indirection *) slot)->ptr_name,
+		 (const char *) key) == 0;
+}
+
+/* Return the name of the non-lazy pointer (if STUB_P is false) or
+   stub (if STUB_B is true) corresponding to the given name.  */
+
+const char *
+machopic_indirection_name (rtx sym_ref, bool stub_p)
+{
+  char *buffer;
+  const char *name = XSTR (sym_ref, 0);
+  size_t namelen = strlen (name);
+  machopic_indirection *p;
+  void ** slot;
+  bool needs_quotes;
+  const char *suffix;
+  const char *prefix = user_label_prefix;
+  const char *quote = "";
+  tree id;
+
+  id = maybe_get_identifier (name);
+  if (id)
+    {
+      tree id_orig = id;
+
+      while (IDENTIFIER_TRANSPARENT_ALIAS (id))
+	id = TREE_CHAIN (id);
+      if (id != id_orig)
+	{
+	  name = IDENTIFIER_POINTER (id);
+	  namelen = strlen (name);
+	}
+    }
+
+  if (name[0] == '*')
+    {
+      prefix = "";
+      ++name;
+      --namelen;
+    }
+
+  needs_quotes = name_needs_quotes (name);
+  if (needs_quotes)
+    {
+      quote = "\"";
+    }
+
+  if (stub_p)
+    suffix = STUB_SUFFIX;
+  else
+    suffix = NON_LAZY_POINTER_SUFFIX;
+
+  buffer = XALLOCAVEC (char, strlen ("&L")
+		   + strlen (prefix)
+		   + namelen
+		   + strlen (suffix)
+		   + 2 * strlen (quote)
+		   + 1 /* '\0' */);
+
+  /* Construct the name of the non-lazy pointer or stub.  */
+  sprintf (buffer, "&%sL%s%s%s%s", quote, prefix, name, suffix, quote);
+
+  if (!machopic_indirections)
+    machopic_indirections = htab_create_ggc (37,
+					     machopic_indirection_hash,
+					     machopic_indirection_eq,
+					     /*htab_del=*/NULL);
+
+  slot = htab_find_slot_with_hash (machopic_indirections, buffer,
+				   htab_hash_string (buffer), INSERT);
+  if (*slot)
+    {
+      p = (machopic_indirection *) *slot;
+    }
+  else
+    {
+      p = ggc_alloc_machopic_indirection ();
+      p->symbol = sym_ref;
+      p->ptr_name = xstrdup (buffer);
+      p->stub_p = stub_p;
+      p->used = false;
+      *slot = p;
+    }
+
+  return p->ptr_name;
+}
+
+/* Return the name of the stub for the mcount function.  */
+
+const char*
+machopic_mcount_stub_name (void)
+{
+  rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "*mcount");
+  return machopic_indirection_name (symbol, /*stub_p=*/true);
+}
+
+/* If NAME is the name of a stub or a non-lazy pointer , mark the stub
+   or non-lazy pointer as used -- and mark the object to which the
+   pointer/stub refers as used as well, since the pointer/stub will
+   emit a reference to it.  */
+
+void
+machopic_validate_stub_or_non_lazy_ptr (const char *name)
+{
+  machopic_indirection *p;
+
+  p = ((machopic_indirection *)
+       (htab_find_with_hash (machopic_indirections, name,
+			     htab_hash_string (name))));
+  if (p && ! p->used)
+    {
+      const char *real_name;
+      tree id;
+
+      p->used = true;
+
+      /* Do what output_addr_const will do when we actually call it.  */
+      if (SYMBOL_REF_DECL (p->symbol))
+	mark_decl_referenced (SYMBOL_REF_DECL (p->symbol));
+
+      real_name = targetm.strip_name_encoding (XSTR (p->symbol, 0));
+
+      id = maybe_get_identifier (real_name);
+      if (id)
+	mark_referenced (id);
+    }
+}
+
+/* Transform ORIG, which may be any data source, to the corresponding
+   source using indirections.  */
+
+rtx
+machopic_indirect_data_reference (rtx orig, rtx reg)
+{
+  rtx ptr_ref = orig;
+
+  if (! MACHOPIC_INDIRECT)
+    return orig;
+
+  if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      int defined = machopic_data_defined_p (orig);
+
+      if (defined && MACHO_DYNAMIC_NO_PIC_P)
+	{
+	  if (DARWIN_PPC)
+	    {
+	  /* Create a new register for CSE opportunities.  */
+	  rtx hi_reg = (!can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode));
+ 	  emit_insn (gen_macho_high (hi_reg, orig));
+ 	  emit_insn (gen_macho_low (reg, hi_reg, orig));
+	      return reg;
+ 	    }
+	  else if (DARWIN_X86)
+	    return orig;
+	  else
+	   /* some other cpu -- writeme!  */
+	   gcc_unreachable ();
+	}
+      else if (defined)
+	{
+	  rtx offset = NULL;
+	  if (DARWIN_PPC || HAVE_lo_sum)
+	    offset = machopic_gen_offset (orig);
+
+	  if (DARWIN_PPC)
+	    {
+	  rtx hi_sum_reg = (!can_create_pseudo_p ()
+			    ? reg
+			    : gen_reg_rtx (Pmode));
+
+	  gcc_assert (reg);
+
+	  emit_insn (gen_rtx_SET (Pmode, hi_sum_reg,
+			      gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+				       gen_rtx_HIGH (Pmode, offset))));
+	  emit_insn (gen_rtx_SET (Pmode, reg,
+				  gen_rtx_LO_SUM (Pmode, hi_sum_reg,
+						  copy_rtx (offset))));
+
+	  orig = reg;
+	    }
+	  else if (HAVE_lo_sum)
+	    {
+	  gcc_assert (reg);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_HIGH (Pmode, offset)));
+	  emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_LO_SUM (Pmode, reg,
+						  copy_rtx (offset))));
+	  emit_use (pic_offset_table_rtx);
+
+	  orig = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, reg);
+	    }
+	  return orig;
+	}
+
+      ptr_ref = (gen_rtx_SYMBOL_REF
+		 (Pmode,
+		  machopic_indirection_name (orig, /*stub_p=*/false)));
+
+      SYMBOL_REF_DATA (ptr_ref) = SYMBOL_REF_DATA (orig);
+
+      ptr_ref = gen_const_mem (Pmode, ptr_ref);
+      machopic_define_symbol (ptr_ref);
+
+      if (DARWIN_X86 
+          && reg 
+          && MACHO_DYNAMIC_NO_PIC_P)
+	{
+	    emit_insn (gen_rtx_SET (Pmode, reg, ptr_ref));
+	    ptr_ref = reg;
+	}
+
+      return ptr_ref;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      /* If "(const (plus ...", walk the PLUS and return that result.
+	 PLUS processing (below) will restore the "(const ..." if
+	 appropriate.  */
+      if (GET_CODE (XEXP (orig, 0)) == PLUS)
+	return machopic_indirect_data_reference (XEXP (orig, 0), reg);
+      else 
+	return orig;
+    }
+  else if (GET_CODE (orig) == MEM)
+    {
+      XEXP (ptr_ref, 0) = 
+		machopic_indirect_data_reference (XEXP (orig, 0), reg);
+      return ptr_ref;
+    }
+  else if (GET_CODE (orig) == PLUS)
+    {
+      rtx base, result;
+      /* When the target is i386, this code prevents crashes due to the
+	compiler's ignorance on how to move the PIC base register to
+	other registers.  (The reload phase sometimes introduces such
+	insns.)  */
+      if (GET_CODE (XEXP (orig, 0)) == REG
+	   && REGNO (XEXP (orig, 0)) == PIC_OFFSET_TABLE_REGNUM
+	   /* Prevent the same register from being erroneously used
+	      as both the base and index registers.  */
+	   && (DARWIN_X86 && (GET_CODE (XEXP (orig, 1)) == CONST))
+	   && reg)
+	{
+	  emit_move_insn (reg, XEXP (orig, 0));
+	  XEXP (ptr_ref, 0) = reg;
+	  return ptr_ref;
+	}
+
+      /* Legitimize both operands of the PLUS.  */
+      base = machopic_indirect_data_reference (XEXP (orig, 0), reg);
+      orig = machopic_indirect_data_reference (XEXP (orig, 1),
+					       (base == reg ? 0 : reg));
+      if (MACHOPIC_INDIRECT && (GET_CODE (orig) == CONST_INT))
+	result = plus_constant (Pmode, base, INTVAL (orig));
+      else
+	result = gen_rtx_PLUS (Pmode, base, orig);
+
+      if (MACHOPIC_JUST_INDIRECT && GET_CODE (base) == MEM)
+	{
+	  if (reg)
+	    {
+	      emit_move_insn (reg, result);
+	      result = reg;
+	    }
+	  else
+	    {
+	      result = force_reg (GET_MODE (result), result);
+	    }
+	}
+
+      return result;
+    }
+  return ptr_ref;
+}
+
+/* Transform TARGET (a MEM), which is a function call target, to the
+   corresponding symbol_stub if necessary.  Return a new MEM.  */
+
+rtx
+machopic_indirect_call_target (rtx target)
+{
+  if (! darwin_emit_branch_islands)
+    return target;
+
+  if (GET_CODE (target) != MEM)
+    return target;
+
+  if (MACHOPIC_INDIRECT
+      && GET_CODE (XEXP (target, 0)) == SYMBOL_REF
+      && !(SYMBOL_REF_FLAGS (XEXP (target, 0))
+	   & MACHO_SYMBOL_FLAG_DEFINED))
+    {
+      rtx sym_ref = XEXP (target, 0);
+      const char *stub_name = machopic_indirection_name (sym_ref,
+							 /*stub_p=*/true);
+      enum machine_mode mode = GET_MODE (sym_ref);
+
+      XEXP (target, 0) = gen_rtx_SYMBOL_REF (mode, stub_name);
+      SYMBOL_REF_DATA (XEXP (target, 0)) = SYMBOL_REF_DATA (sym_ref);
+      MEM_READONLY_P (target) = 1;
+      MEM_NOTRAP_P (target) = 1;
+    }
+
+  return target;
+}
+
+rtx
+machopic_legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+{
+  rtx pic_ref = orig;
+
+  if (! MACHOPIC_INDIRECT)
+    return orig;
+
+  /* First handle a simple SYMBOL_REF or LABEL_REF */
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF
+	  ))
+    {
+      /* addr(foo) = &func+(foo-func) */
+      orig = machopic_indirect_data_reference (orig, reg);
+
+      if (GET_CODE (orig) == PLUS
+	  && GET_CODE (XEXP (orig, 0)) == REG)
+	{
+	  if (reg == 0)
+	    return force_reg (mode, orig);
+
+	  emit_move_insn (reg, orig);
+	  return reg;
+	}
+
+      if (GET_CODE (orig) == MEM)
+	{
+	  if (reg == 0)
+	    {
+	      gcc_assert (!reload_in_progress);
+	      reg = gen_reg_rtx (Pmode);
+	    }
+
+#if HAVE_lo_sum
+	  if (MACHO_DYNAMIC_NO_PIC_P
+	      && (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
+		  || GET_CODE (XEXP (orig, 0)) == LABEL_REF))
+	    {
+#if defined (TARGET_TOC)	/* ppc  */
+	      rtx temp_reg = (!can_create_pseudo_p ()
+			      ? reg :
+			      gen_reg_rtx (Pmode));
+	      rtx asym = XEXP (orig, 0);
+	      rtx mem;
+
+	      emit_insn (gen_macho_high (temp_reg, asym));
+	      mem = gen_const_mem (GET_MODE (orig),
+				   gen_rtx_LO_SUM (Pmode, temp_reg,
+						   copy_rtx (asym)));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+#else
+	      /* Some other CPU -- WriteMe! but right now there are no other
+		 platforms that can use dynamic-no-pic  */
+	      gcc_unreachable ();
+#endif
+	      pic_ref = reg;
+	    }
+	  else
+	  if (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
+	      || GET_CODE (XEXP (orig, 0)) == LABEL_REF)
+	    {
+	      rtx offset = machopic_gen_offset (XEXP (orig, 0));
+#if defined (TARGET_TOC) /* i.e., PowerPC */
+	      /* Generating a new reg may expose opportunities for
+		 common subexpression elimination.  */
+              rtx hi_sum_reg = (!can_create_pseudo_p ()
+				? reg
+				: gen_reg_rtx (Pmode));
+	      rtx mem;
+	      rtx insn;
+	      rtx sum;
+
+	      sum = gen_rtx_HIGH (Pmode, offset);
+	      if (! MACHO_DYNAMIC_NO_PIC_P)
+		sum = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, sum);
+
+	      emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, sum));
+
+	      mem = gen_const_mem (GET_MODE (orig),
+				  gen_rtx_LO_SUM (Pmode,
+						  hi_sum_reg,
+						  copy_rtx (offset)));
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+	      set_unique_reg_note (insn, REG_EQUAL, pic_ref);
+
+	      pic_ref = reg;
+#else
+	      emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
+
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_HIGH (Pmode,
+						    gen_rtx_CONST (Pmode,
+								   offset))));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_LO_SUM (Pmode, reg,
+					   gen_rtx_CONST (Pmode,
+						   	  copy_rtx (offset)))));
+	      pic_ref = gen_rtx_PLUS (Pmode,
+				      pic_offset_table_rtx, reg);
+#endif
+	    }
+	  else
+#endif  /* HAVE_lo_sum */
+	    {
+	      rtx pic = pic_offset_table_rtx;
+	      if (GET_CODE (pic) != REG)
+		{
+		  emit_move_insn (reg, pic);
+		  pic = reg;
+		}
+#if 0
+	      emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
+#endif
+
+	      if (reload_in_progress)
+		df_set_regs_ever_live (REGNO (pic), true);
+	      pic_ref = gen_rtx_PLUS (Pmode, pic,
+				      machopic_gen_offset (XEXP (orig, 0)));
+	    }
+
+#if !defined (TARGET_TOC)
+	  emit_move_insn (reg, pic_ref);
+	  pic_ref = gen_const_mem (GET_MODE (orig), reg);
+#endif
+	}
+      else
+	{
+
+#if HAVE_lo_sum
+	  if (GET_CODE (orig) == SYMBOL_REF
+	      || GET_CODE (orig) == LABEL_REF)
+	    {
+	      rtx offset = machopic_gen_offset (orig);
+#if defined (TARGET_TOC) /* i.e., PowerPC */
+              rtx hi_sum_reg;
+
+	      if (reg == 0)
+		{
+		  gcc_assert (!reload_in_progress);
+		  reg = gen_reg_rtx (Pmode);
+		}
+
+	      hi_sum_reg = reg;
+
+	      emit_insn (gen_rtx_SET (Pmode, hi_sum_reg,
+				      (MACHO_DYNAMIC_NO_PIC_P)
+				      ? gen_rtx_HIGH (Pmode, offset)
+				      : gen_rtx_PLUS (Pmode,
+						      pic_offset_table_rtx,
+						      gen_rtx_HIGH (Pmode,
+								    offset))));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_LO_SUM (Pmode,
+						      hi_sum_reg,
+						      copy_rtx (offset))));
+	      pic_ref = reg;
+#else
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_HIGH (Pmode, offset)));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_LO_SUM (Pmode, reg,
+						      copy_rtx (offset))));
+	      pic_ref = gen_rtx_PLUS (Pmode,
+				      pic_offset_table_rtx, reg);
+#endif
+	    }
+	  else
+#endif  /*  HAVE_lo_sum  */
+	    {
+	      if (REG_P (orig)
+	          || GET_CODE (orig) == SUBREG)
+		{
+		  return orig;
+		}
+	      else
+		{
+		  rtx pic = pic_offset_table_rtx;
+		  if (GET_CODE (pic) != REG)
+		    {
+		      emit_move_insn (reg, pic);
+		      pic = reg;
+		    }
+#if 0
+		  emit_use (pic_offset_table_rtx);
+#endif
+		  if (reload_in_progress)
+		    df_set_regs_ever_live (REGNO (pic), true);
+		  pic_ref = gen_rtx_PLUS (Pmode,
+					  pic,
+					  machopic_gen_offset (orig));
+		}
+	    }
+	}
+
+      if (GET_CODE (pic_ref) != REG)
+        {
+          if (reg != 0)
+            {
+              emit_move_insn (reg, pic_ref);
+              return reg;
+            }
+          else
+            {
+              return force_reg (mode, pic_ref);
+            }
+        }
+      else
+        {
+          return pic_ref;
+        }
+    }
+
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    return orig;
+
+  else if (GET_CODE (orig) == PLUS
+	   && (GET_CODE (XEXP (orig, 0)) == MEM
+	       || GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (orig, 0)) == LABEL_REF)
+	   && XEXP (orig, 0) != pic_offset_table_rtx
+	   && GET_CODE (XEXP (orig, 1)) != REG)
+
+    {
+      rtx base;
+      int is_complex = (GET_CODE (XEXP (orig, 0)) == MEM);
+
+      base = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
+      orig = machopic_legitimize_pic_address (XEXP (orig, 1),
+					      Pmode, (base == reg ? 0 : reg));
+      if (GET_CODE (orig) == CONST_INT)
+	{
+	  pic_ref = plus_constant (Pmode, base, INTVAL (orig));
+	  is_complex = 1;
+	}
+      else
+	pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+
+      if (reg && is_complex)
+	{
+	  emit_move_insn (reg, pic_ref);
+	  pic_ref = reg;
+	}
+      /* Likewise, should we set special REG_NOTEs here?  */
+    }
+
+  else if (GET_CODE (orig) == CONST)
+    {
+      return machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
+    }
+
+  else if (GET_CODE (orig) == MEM
+	   && GET_CODE (XEXP (orig, 0)) == SYMBOL_REF)
+    {
+      rtx addr = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
+      addr = replace_equiv_address (orig, addr);
+      emit_move_insn (reg, addr);
+      pic_ref = reg;
+    }
+
+  return pic_ref;
+}
+
+/* Output the stub or non-lazy pointer in *SLOT, if it has been used.
+   DATA is the FILE* for assembly output.  Called from
+   htab_traverse.  */
+
+static int
+machopic_output_indirection (void **slot, void *data)
+{
+  machopic_indirection *p = *((machopic_indirection **) slot);
+  FILE *asm_out_file = (FILE *) data;
+  rtx symbol;
+  const char *sym_name;
+  const char *ptr_name;
+
+  if (!p->used)
+    return 1;
+
+  symbol = p->symbol;
+  sym_name = XSTR (symbol, 0);
+  ptr_name = p->ptr_name;
+
+  if (p->stub_p)
+    {
+      char *sym;
+      char *stub;
+      tree id;
+
+      id = maybe_get_identifier (sym_name);
+      if (id)
+	{
+	  tree id_orig = id;
+
+	  while (IDENTIFIER_TRANSPARENT_ALIAS (id))
+	    id = TREE_CHAIN (id);
+	  if (id != id_orig)
+	    sym_name = IDENTIFIER_POINTER (id);
+	}
+
+      sym = XALLOCAVEC (char, strlen (sym_name) + 2);
+      if (sym_name[0] == '*' || sym_name[0] == '&')
+	strcpy (sym, sym_name + 1);
+      else if (sym_name[0] == '-' || sym_name[0] == '+')
+	strcpy (sym, sym_name);
+      else
+	sprintf (sym, "%s%s", user_label_prefix, sym_name);
+
+      stub = XALLOCAVEC (char, strlen (ptr_name) + 2);
+      if (ptr_name[0] == '*' || ptr_name[0] == '&')
+	strcpy (stub, ptr_name + 1);
+      else
+	sprintf (stub, "%s%s", user_label_prefix, ptr_name);
+
+      machopic_output_stub (asm_out_file, sym, stub);
+    }
+  else if (! indirect_data (symbol)
+	   && (machopic_symbol_defined_p (symbol)
+	       || SYMBOL_REF_LOCAL_P (symbol)))
+    {
+      switch_to_section (data_section);
+      assemble_align (GET_MODE_ALIGNMENT (Pmode));
+      assemble_label (asm_out_file, ptr_name);
+      assemble_integer (gen_rtx_SYMBOL_REF (Pmode, sym_name),
+			GET_MODE_SIZE (Pmode),
+			GET_MODE_ALIGNMENT (Pmode), 1);
+    }
+  else
+    {
+      rtx init = const0_rtx;
+
+      switch_to_section (darwin_sections[machopic_nl_symbol_ptr_section]);
+
+      /* Mach-O symbols are passed around in code through indirect
+	 references and the original symbol_ref hasn't passed through
+	 the generic handling and reference-catching in
+	 output_operand, so we need to manually mark weak references
+	 as such.  */
+      if (SYMBOL_REF_WEAK (symbol))
+	{
+	  tree decl = SYMBOL_REF_DECL (symbol);
+	  gcc_assert (DECL_P (decl));
+
+	  if (decl != NULL_TREE
+	      && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
+	      /* Handle only actual external-only definitions, not
+		 e.g. extern inline code or variables for which
+		 storage has been allocated.  */
+	      && !TREE_STATIC (decl))
+	    {
+	      fputs ("\t.weak_reference ", asm_out_file);
+	      assemble_name (asm_out_file, sym_name);
+	      fputc ('\n', asm_out_file);
+	    }
+	}
+
+      assemble_name (asm_out_file, ptr_name);
+      fprintf (asm_out_file, ":\n");
+
+      fprintf (asm_out_file, "\t.indirect_symbol ");
+      assemble_name (asm_out_file, sym_name);
+      fprintf (asm_out_file, "\n");
+
+      /* Variables that are marked with MACHO_SYMBOL_STATIC need to
+	 have their symbol name instead of 0 in the second entry of
+	 the non-lazy symbol pointer data structure when they are
+	 defined.  This allows the runtime to rebind newer instances
+	 of the translation unit with the original instance of the
+	 symbol.  */
+
+      if ((SYMBOL_REF_FLAGS (symbol) & MACHO_SYMBOL_STATIC)
+	  && machopic_symbol_defined_p (symbol))
+	init = gen_rtx_SYMBOL_REF (Pmode, sym_name);
+
+      assemble_integer (init, GET_MODE_SIZE (Pmode),
+			GET_MODE_ALIGNMENT (Pmode), 1);
+    }
+
+  return 1;
+}
+
+void
+machopic_finish (FILE *asm_out_file)
+{
+  if (machopic_indirections)
+    htab_traverse_noresize (machopic_indirections,
+			    machopic_output_indirection,
+			    asm_out_file);
+}
+
+int
+machopic_operand_p (rtx op)
+{
+  if (MACHOPIC_JUST_INDIRECT)
+    return (GET_CODE (op) == SYMBOL_REF
+	    && machopic_symbol_defined_p (op));
+  else
+    return (GET_CODE (op) == CONST
+	    && GET_CODE (XEXP (op, 0)) == UNSPEC
+	    && XINT (XEXP (op, 0), 1) == UNSPEC_MACHOPIC_OFFSET);
+}
+
+/* This function records whether a given name corresponds to a defined
+   or undefined function or variable, for machopic_classify_ident to
+   use later.  */
+
+void
+darwin_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED)
+{
+  rtx sym_ref;
+
+  /* Do the standard encoding things first.  */
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) != FUNCTION_DECL && TREE_CODE (decl) != VAR_DECL)
+    return;
+
+  sym_ref = XEXP (rtl, 0);
+  if (TREE_CODE (decl) == VAR_DECL)
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_VARIABLE;
+
+  if (!DECL_EXTERNAL (decl)
+      && (!TREE_PUBLIC (decl) || !DECL_WEAK (decl))
+      && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (decl))
+      && ((TREE_STATIC (decl)
+	   && (!DECL_COMMON (decl) || !TREE_PUBLIC (decl)))
+	  || (!DECL_COMMON (decl) && DECL_INITIAL (decl)
+	      && DECL_INITIAL (decl) != error_mark_node)))
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED;
+
+  if (! TREE_PUBLIC (decl))
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_STATIC;
+}
+
+void
+darwin_mark_decl_preserved (const char *name)
+{
+  fprintf (asm_out_file, "\t.no_dead_strip ");
+  assemble_name (asm_out_file, name);
+  fputc ('\n', asm_out_file);
+}
+
+static section *
+darwin_rodata_section (int weak, bool zsize)
+{
+  return (weak
+	  ? darwin_sections[const_coal_section]
+	  : (zsize ? darwin_sections[zobj_const_section]
+		   : darwin_sections[const_section]));
+}
+
+static section *
+darwin_mergeable_string_section (tree exp,
+				 unsigned HOST_WIDE_INT align)
+{
+  /* Darwin's ld expects to see non-writable string literals in the .cstring 
+     section.  Later versions of ld check and complain when CFStrings are 
+     enabled.  Therefore we shall force the strings into .cstring since we
+     don't support writable ones anyway.  */
+  if ((darwin_constant_cfstrings || flag_merge_constants)
+      && TREE_CODE (exp) == STRING_CST
+      && TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE
+      && align <= 256
+      && (int_size_in_bytes (TREE_TYPE (exp))
+	  == TREE_STRING_LENGTH (exp))
+      && ((size_t) TREE_STRING_LENGTH (exp)
+	  == strlen (TREE_STRING_POINTER (exp)) + 1))
+    return darwin_sections[cstring_section];
+
+  if (DARWIN_SECTION_ANCHORS && flag_section_anchors
+      && TREE_CODE (exp) == STRING_CST
+      && TREE_STRING_LENGTH (exp) == 0)
+    return darwin_sections[zobj_const_section];
+
+  return readonly_data_section;
+}
+
+#ifndef HAVE_GAS_LITERAL16
+#define HAVE_GAS_LITERAL16 0
+#endif
+
+static section *
+darwin_mergeable_constant_section (tree exp,
+				   unsigned HOST_WIDE_INT align,
+				   bool zsize)
+{
+  enum machine_mode mode = DECL_MODE (exp);
+  unsigned int modesize = GET_MODE_BITSIZE (mode);
+
+  if (DARWIN_SECTION_ANCHORS 
+      && flag_section_anchors 
+      && zsize)
+    return darwin_sections[zobj_const_section];
+
+  if (flag_merge_constants
+      && mode != VOIDmode
+      && mode != BLKmode
+      && modesize <= align
+      && align >= 8
+      && align <= 256
+      && (align & (align -1)) == 0)
+    {
+      tree size = TYPE_SIZE_UNIT (TREE_TYPE (exp));
+
+      if (TREE_CODE (size) == INTEGER_CST
+	  && TREE_INT_CST_LOW (size) == 4
+	  && TREE_INT_CST_HIGH (size) == 0)
+        return darwin_sections[literal4_section];
+      else if (TREE_CODE (size) == INTEGER_CST
+	       && TREE_INT_CST_LOW (size) == 8
+	       && TREE_INT_CST_HIGH (size) == 0)
+        return darwin_sections[literal8_section];
+      else if (HAVE_GAS_LITERAL16
+	       && TARGET_64BIT
+               && TREE_CODE (size) == INTEGER_CST
+               && TREE_INT_CST_LOW (size) == 16
+               && TREE_INT_CST_HIGH (size) == 0)
+        return darwin_sections[literal16_section];
+      else
+        return readonly_data_section;
+    }
+
+  return readonly_data_section;
+}
+
+section *
+darwin_tm_clone_table_section (void)
+{
+  return get_named_section (NULL,
+			    "__DATA,__tm_clone_table,regular,no_dead_strip",
+			    3);
+}
+
+int
+machopic_reloc_rw_mask (void)
+{
+  return MACHOPIC_INDIRECT ? 3 : 0;
+}
+
+/* We have to deal with ObjC/C++ metadata section placement in the common
+   code, since it will also be called from LTO.
+   
+   Return metadata attributes, if present (searching for ABI=2 first)
+   Return NULL_TREE if no such attributes are found.  */
+
+static tree
+is_objc_metadata (tree decl)
+{
+  if (DECL_P (decl) 
+      && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL)
+      && DECL_ATTRIBUTES (decl))
+    {
+      tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return meta;
+      meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return meta;
+    }
+  return NULL_TREE;
+}
+
+static int classes_seen;
+static int objc_metadata_seen;
+
+/* Return the section required for Objective C ABI 2 metadata.  */
+static section *
+darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
+{
+  const char *p;
+  tree ident = TREE_VALUE (meta);
+  gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
+  p = IDENTIFIER_POINTER (ident);
+
+  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi == 2);
+
+  objc_metadata_seen = 1;
+
+  if (base == data_section)
+    base = darwin_sections[objc2_metadata_section];
+
+  /* Most of the OBJC2 META-data end up in the base section, so check it
+     first.  */
+  if      (!strncmp (p, "V2_BASE", 7))
+    return base;
+  else if (!strncmp (p, "V2_STRG", 7))
+    return darwin_sections[cstring_section];
+
+  else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7))
+    return darwin_sections[objc2_classdefs_section];
+  else if (!strncmp (p, "V2_MREF", 7))
+    return darwin_sections[objc2_message_refs_section];
+  else if (!strncmp (p, "V2_CLRF", 7))
+    return darwin_sections[objc2_classrefs_section];
+  else if (!strncmp (p, "V2_SURF", 7))
+    return darwin_sections[objc2_super_classrefs_section];
+  else if (!strncmp (p, "V2_NLCL", 7))
+    return darwin_sections[objc2_nonlazy_class_section];
+  else if (!strncmp (p, "V2_CLAB", 7))
+    {
+      classes_seen = 1;
+      return darwin_sections[objc2_classlist_section];
+    }
+  else if (!strncmp (p, "V2_SRFS", 7))
+    return darwin_sections[objc2_selector_refs_section];
+  else if (!strncmp (p, "V2_NLCA", 7))
+    return darwin_sections[objc2_nonlazy_category_section];
+  else if (!strncmp (p, "V2_CALA", 7))
+    return darwin_sections[objc2_categorylist_section];
+
+  else if (!strncmp (p, "V2_PLST", 7))
+    return darwin_sections[objc2_protocollist_section];
+  else if (!strncmp (p, "V2_PRFS", 7))
+    return darwin_sections[objc2_protocolrefs_section];
+
+  else if (!strncmp (p, "V2_INFO", 7))
+    return darwin_sections[objc2_image_info_section];
+
+  else if (!strncmp (p, "V2_EHTY", 7))
+    return darwin_sections[data_coal_section];
+
+  else if (!strncmp (p, "V2_CSTR", 7))
+    return darwin_sections[objc2_constant_string_object_section];
+
+  /* Not recognized, default.  */
+  return base;
+}
+
+/* Return the section required for Objective C ABI 0/1 metadata.  */
+static section *
+darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
+{
+  const char *p;
+  tree ident = TREE_VALUE (meta);
+  gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
+  p = IDENTIFIER_POINTER (ident);
+
+  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi < 2);
+
+  objc_metadata_seen = 1;
+
+  /* String sections first, cos there are lots of strings.  */
+  if      (!strncmp (p, "V1_STRG", 7))
+    return darwin_sections[cstring_section];
+  else if (!strncmp (p, "V1_CLSN", 7))
+    return darwin_sections[objc_class_names_section];
+  else if (!strncmp (p, "V1_METN", 7))
+    return darwin_sections[objc_meth_var_names_section];
+  else if (!strncmp (p, "V1_METT", 7))
+    return darwin_sections[objc_meth_var_types_section];
+
+  else if (!strncmp (p, "V1_CLAS", 7))
+    {
+      classes_seen = 1;
+      return darwin_sections[objc_class_section];
+    }
+  else if (!strncmp (p, "V1_META", 7))
+    return darwin_sections[objc_meta_class_section];
+  else if (!strncmp (p, "V1_CATG", 7))
+    return darwin_sections[objc_category_section];
+  else if (!strncmp (p, "V1_PROT", 7))
+    return darwin_sections[objc_protocol_section];
+
+  else if (!strncmp (p, "V1_CLCV", 7))
+    return darwin_sections[objc_class_vars_section];
+  else if (!strncmp (p, "V1_CLIV", 7))
+    return darwin_sections[objc_instance_vars_section];
+
+  else if (!strncmp (p, "V1_CLCM", 7))
+    return darwin_sections[objc_cls_meth_section];
+  else if (!strncmp (p, "V1_CLIM", 7))
+    return darwin_sections[objc_inst_meth_section];
+  else if (!strncmp (p, "V1_CACM", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+  else if (!strncmp (p, "V1_CAIM", 7))
+    return darwin_sections[objc_cat_inst_meth_section];
+  else if (!strncmp (p, "V1_PNSM", 7))
+    return darwin_sections[objc_cat_inst_meth_section];
+  else if (!strncmp (p, "V1_PCLM", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+
+  else if (!strncmp (p, "V1_CLPR", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+  else if (!strncmp (p, "V1_CAPR", 7))
+    return darwin_sections[objc_category_section]; /* ??? CHECK me.  */
+
+  else if (!strncmp (p, "V1_PRFS", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+  else if (!strncmp (p, "V1_CLRF", 7))
+    return darwin_sections[objc_cls_refs_section];
+  else if (!strncmp (p, "V1_SRFS", 7))
+    return darwin_sections[objc_selector_refs_section];
+
+  else if (!strncmp (p, "V1_MODU", 7))
+    return darwin_sections[objc_module_info_section];
+  else if (!strncmp (p, "V1_SYMT", 7))
+    return darwin_sections[objc_symbols_section];
+  else if (!strncmp (p, "V1_INFO", 7))
+    return darwin_sections[objc_image_info_section];
+
+  else if (!strncmp (p, "V1_PLST", 7))
+    return darwin_sections[objc1_prop_list_section];
+  else if (!strncmp (p, "V1_PEXT", 7))
+    return darwin_sections[objc1_protocol_ext_section];
+  else if (!strncmp (p, "V1_CEXT", 7))
+    return darwin_sections[objc1_class_ext_section];
+
+  else if (!strncmp (p, "V2_CSTR", 7))
+    return darwin_sections[objc_constant_string_object_section];
+
+  return base;
+}
+
+section *
+machopic_select_section (tree decl,
+			 int reloc,
+			 unsigned HOST_WIDE_INT align)
+{
+  bool zsize, one, weak, ro;
+  section *base_section = NULL;
+
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", DECL_ATTRIBUTES (decl)));
+
+  zsize = (DECL_P (decl) 
+	   && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) 
+	   && tree_to_uhwi (DECL_SIZE_UNIT (decl)) == 0);
+
+  one = DECL_P (decl) 
+	&& TREE_CODE (decl) == VAR_DECL 
+	&& DECL_ONE_ONLY (decl);
+
+  ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ;
+
+  switch (categorize_decl_for_section (decl, reloc))
+    {
+    case SECCAT_TEXT:
+      gcc_unreachable ();
+      break;
+
+    case SECCAT_RODATA:
+    case SECCAT_SRODATA:
+      base_section = darwin_rodata_section (weak, zsize);
+      break;
+
+    case SECCAT_RODATA_MERGE_STR:
+      base_section = darwin_mergeable_string_section (decl, align);
+      break;
+
+    case SECCAT_RODATA_MERGE_STR_INIT:
+      base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align);
+      break;
+
+    case SECCAT_RODATA_MERGE_CONST:
+      base_section =  darwin_mergeable_constant_section (decl, align, zsize);
+      break;
+
+    case SECCAT_DATA:
+    case SECCAT_DATA_REL:
+    case SECCAT_DATA_REL_LOCAL:
+    case SECCAT_DATA_REL_RO:
+    case SECCAT_DATA_REL_RO_LOCAL:
+    case SECCAT_SDATA:
+    case SECCAT_TDATA:
+      if (weak || one)
+	{
+	  if (ro)
+	    base_section = darwin_sections[const_data_coal_section];
+	  else 
+	    base_section = darwin_sections[data_coal_section];
+	}
+      else if (DARWIN_SECTION_ANCHORS 
+	       && flag_section_anchors
+	       && zsize)
+	{
+	  /* If we're doing section anchors, then punt zero-sized objects into
+	     their own sections so that they don't interfere with offset
+	     computation for the remaining vars.  This does not need to be done
+	     for stuff in mergeable sections, since these are ineligible for 
+	     anchors.  */
+	  if (ro)
+	    base_section = darwin_sections[zobj_const_data_section];
+	  else
+	    base_section = darwin_sections[zobj_data_section];
+	}
+      else if (ro)
+	base_section = darwin_sections[const_data_section];
+      else
+	base_section = data_section;
+      break;
+    case SECCAT_BSS:
+    case SECCAT_SBSS:
+    case SECCAT_TBSS:
+      if (weak || one) 
+	base_section = darwin_sections[data_coal_section];
+      else
+	{
+	  if (!TREE_PUBLIC (decl))
+	    base_section = lcomm_section;
+	  else if (bss_noswitch_section)
+	    base_section = bss_noswitch_section;
+	  else
+	    base_section = data_section;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Darwin weird special cases.  
+     a) OBJC Meta-data. */
+  if (DECL_P (decl) 
+      && (TREE_CODE (decl) == VAR_DECL 
+	  || TREE_CODE (decl) == CONST_DECL)
+      && DECL_ATTRIBUTES (decl))
+    {
+      tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return darwin_objc2_section (decl, meta, base_section);
+      meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return darwin_objc1_section (decl, meta, base_section);
+      meta = lookup_attribute ("OBJC1METG", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return base_section; /* GNU runtime is happy with it all in one pot.  */
+    }
+
+  /* b) Constant string objects.  */
+  if (TREE_CODE (decl) == CONSTRUCTOR
+      && TREE_TYPE (decl)
+      && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE
+      && TYPE_NAME (TREE_TYPE (decl)))
+    {
+      tree name = TYPE_NAME (TREE_TYPE (decl));
+      if (TREE_CODE (name) == TYPE_DECL)
+        name = DECL_NAME (name);
+
+      if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_ObjCString"))
+	{
+	  if (flag_next_runtime)
+	    {
+	      if (flag_objc_abi == 2)
+		return darwin_sections[objc2_constant_string_object_section];
+	      else
+		return darwin_sections[objc_constant_string_object_section];
+	    }
+	  else
+	    return darwin_sections[objc_string_object_section];
+	}
+      else if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_CFString"))
+	return darwin_sections[cfstring_constant_object_section];
+      else
+	return base_section;
+    }
+  /* c) legacy meta-data selection.  */
+  else if (TREE_CODE (decl) == VAR_DECL
+	   && DECL_NAME (decl)
+	   && TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE
+	   && IDENTIFIER_POINTER (DECL_NAME (decl))
+	   && flag_next_runtime
+	   && !strncmp (IDENTIFIER_POINTER (DECL_NAME (decl)), "_OBJC_", 6))
+    {
+      const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
+      static bool warned_objc_46 = false;
+      /* We shall assert that zero-sized objects are an error in ObjC 
+         meta-data.  */
+      gcc_assert (tree_to_uhwi (DECL_SIZE_UNIT (decl)) != 0);
+      
+      /* ??? This mechanism for determining the metadata section is
+	 broken when LTO is in use, since the frontend that generated
+	 the data is not identified.  We will keep the capability for
+	 the short term - in case any non-Objective-C programs are using
+	 it to place data in specified sections.  */
+      if (!warned_objc_46)
+	{
+	  location_t loc = DECL_SOURCE_LOCATION (decl);
+	  warning_at (loc, 0, "the use of _OBJC_-prefixed variable names"
+		      " to select meta-data sections is deprecated at 4.6"
+		      " and will be removed in 4.7");
+	  warned_objc_46 = true;
+	}
+      
+      if (!strncmp (name, "_OBJC_CLASS_METHODS_", 20))
+        return darwin_sections[objc_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_INSTANCE_METHODS_", 23))
+        return darwin_sections[objc_inst_meth_section];
+      else if (!strncmp (name, "_OBJC_CATEGORY_CLASS_METHODS_", 29))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_CATEGORY_INSTANCE_METHODS_", 32))
+        return darwin_sections[objc_cat_inst_meth_section];
+      else if (!strncmp (name, "_OBJC_CLASS_VARIABLES_", 22))
+        return darwin_sections[objc_class_vars_section];
+      else if (!strncmp (name, "_OBJC_INSTANCE_VARIABLES_", 25))
+        return darwin_sections[objc_instance_vars_section];
+      else if (!strncmp (name, "_OBJC_CLASS_PROTOCOLS_", 22))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_CLASS_NAME_", 17))
+        return darwin_sections[objc_class_names_section];
+      else if (!strncmp (name, "_OBJC_METH_VAR_NAME_", 20))
+        return darwin_sections[objc_meth_var_names_section];
+      else if (!strncmp (name, "_OBJC_METH_VAR_TYPE_", 20))
+        return darwin_sections[objc_meth_var_types_section];
+      else if (!strncmp (name, "_OBJC_CLASS_REFERENCES", 22))
+        return darwin_sections[objc_cls_refs_section];
+      else if (!strncmp (name, "_OBJC_CLASS_", 12))
+        return darwin_sections[objc_class_section];
+      else if (!strncmp (name, "_OBJC_METACLASS_", 16))
+        return darwin_sections[objc_meta_class_section];
+      else if (!strncmp (name, "_OBJC_CATEGORY_", 15))
+        return darwin_sections[objc_category_section];
+      else if (!strncmp (name, "_OBJC_SELECTOR_REFERENCES", 25))
+        return darwin_sections[objc_selector_refs_section];
+      else if (!strncmp (name, "_OBJC_SELECTOR_FIXUP", 20))
+        return darwin_sections[objc_selector_fixup_section];
+      else if (!strncmp (name, "_OBJC_SYMBOLS", 13))
+        return darwin_sections[objc_symbols_section];
+      else if (!strncmp (name, "_OBJC_MODULES", 13))
+        return darwin_sections[objc_module_info_section];
+      else if (!strncmp (name, "_OBJC_IMAGE_INFO", 16))
+        return darwin_sections[objc_image_info_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_INSTANCE_METHODS_", 32))
+        return darwin_sections[objc_cat_inst_meth_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_CLASS_METHODS_", 29))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_REFS_", 20))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_", 15))
+        return darwin_sections[objc_protocol_section];
+      else
+        return base_section;
+    }
+
+  return base_section;
+}
+
+/* This can be called with address expressions as "rtx".
+   They must go in "const".  */
+
+section *
+machopic_select_rtx_section (enum machine_mode mode, rtx x,
+			     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_SIZE (mode) == 8
+      && (GET_CODE (x) == CONST_INT
+	  || GET_CODE (x) == CONST_DOUBLE))
+    return darwin_sections[literal8_section];
+  else if (GET_MODE_SIZE (mode) == 4
+	   && (GET_CODE (x) == CONST_INT
+	       || GET_CODE (x) == CONST_DOUBLE))
+    return darwin_sections[literal4_section];
+  else if (HAVE_GAS_LITERAL16
+	   && TARGET_64BIT
+	   && GET_MODE_SIZE (mode) == 16
+	   && (GET_CODE (x) == CONST_INT
+	       || GET_CODE (x) == CONST_DOUBLE
+	       || GET_CODE (x) == CONST_VECTOR))
+    return darwin_sections[literal16_section];
+  else if (MACHOPIC_INDIRECT
+	   && (GET_CODE (x) == SYMBOL_REF
+	       || GET_CODE (x) == CONST
+	       || GET_CODE (x) == LABEL_REF))
+    return darwin_sections[const_data_section];
+  else
+    return darwin_sections[const_section];
+}
+
+void
+machopic_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  cdtor_record new_elt = {symbol, priority, vec_safe_length (ctors)};
+
+  vec_safe_push (ctors, new_elt);
+
+  if (! MACHOPIC_INDIRECT)
+    fprintf (asm_out_file, ".reference .constructors_used\n");
+}
+
+void
+machopic_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  cdtor_record new_elt = {symbol, priority, vec_safe_length (dtors)};
+
+  vec_safe_push (dtors, new_elt);
+
+  if (! MACHOPIC_INDIRECT)
+    fprintf (asm_out_file, ".reference .destructors_used\n");
+}
+
+static int
+sort_cdtor_records (const void * a, const void * b)
+{
+  const cdtor_record *cda = (const cdtor_record *)a;
+  const cdtor_record *cdb = (const cdtor_record *)b;
+  if (cda->priority > cdb->priority)
+    return 1;
+  if (cda->priority < cdb->priority)
+    return -1;
+  if (cda->position > cdb->position)
+    return 1;
+  if (cda->position < cdb->position)
+    return -1;
+  return 0;
+}
+
+static void 
+finalize_ctors ()
+{
+  unsigned int i;
+  cdtor_record *elt;
+ 
+  if (MACHOPIC_INDIRECT)
+    switch_to_section (darwin_sections[mod_init_section]);
+  else
+    switch_to_section (darwin_sections[constructor_section]);
+
+  if (vec_safe_length (ctors) > 1)
+    ctors->qsort (sort_cdtor_records);
+  FOR_EACH_VEC_SAFE_ELT (ctors, i, elt)
+    {
+      assemble_align (POINTER_SIZE);
+      assemble_integer (elt->symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+    }
+}
+
+static void
+finalize_dtors ()
+{
+  unsigned int i;
+  cdtor_record *elt;
+
+  if (MACHOPIC_INDIRECT)
+    switch_to_section (darwin_sections[mod_term_section]);
+  else
+    switch_to_section (darwin_sections[destructor_section]);
+
+  if (vec_safe_length (dtors) > 1)
+    dtors->qsort (sort_cdtor_records);
+  FOR_EACH_VEC_SAFE_ELT (dtors, i, elt)
+    {
+      assemble_align (POINTER_SIZE);
+      assemble_integer (elt->symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+    }
+}
+
+void
+darwin_globalize_label (FILE *stream, const char *name)
+{
+  if (!!strncmp (name, "_OBJC_", 6))
+    default_globalize_label (stream, name);
+}
+
+/* This routine returns non-zero if 'name' starts with the special objective-c 
+   anonymous file-scope static name.  It accommodates c++'s mangling of such 
+   symbols (in this case the symbols will have form _ZL{d}*_OBJC_* d=digit).  */
+   
+int 
+darwin_label_is_anonymous_local_objc_name (const char *name)
+{
+  const unsigned char *p = (const unsigned char *) name;
+  if (*p != '_')
+    return 0;
+  if (p[1] == 'Z' && p[2] == 'L')
+  {
+    p += 3;
+    while (*p >= '0' && *p <= '9')
+      p++;
+  }
+  return (!strncmp ((const char *)p, "_OBJC_", 6));
+}
+
+/* LTO support for Mach-O.
+
+   This version uses three mach-o sections to encapsulate the (unlimited
+   number of) lto sections.
+
+   __GNU_LTO, __lto_sections  contains the concatented GNU LTO section data.
+   __GNU_LTO, __section_names contains the GNU LTO section names.
+   __GNU_LTO, __section_index contains an array of values that index these.
+
+   Indexed thus:
+     <section offset from the start of __GNU_LTO, __lto_sections>,
+     <section length>
+     <name offset from the start of __GNU_LTO, __section_names,
+     <name length>.
+
+   At present, for both m32 and m64 mach-o files each of these fields is
+   represented  by a uint32_t.  This is because, AFAICT, a mach-o object
+   cannot exceed 4Gb because the section_64 offset field (see below) is 32bits.
+
+    uint32_t offset;
+   "offset  An integer specifying the offset to this section in the file."  */
+
+/* Count lto section numbers.  */
+static unsigned int lto_section_num = 0;
+
+/* A vector of information about LTO sections, at present, we only have
+   the name.  TODO: see if we can get the data length somehow.  */
+typedef struct GTY (()) darwin_lto_section_e {
+  const char *sectname;
+} darwin_lto_section_e ;
+
+static GTY (()) vec<darwin_lto_section_e, va_gc> *lto_section_names;
+
+/* Segment for LTO data.  */
+#define LTO_SEGMENT_NAME "__GNU_LTO"
+
+/* Section wrapper scheme (used here to wrap the unlimited number of LTO
+   sections into three Mach-O ones).
+   NOTE: These names MUST be kept in sync with those in
+	 libiberty/simple-object-mach-o.  */
+#define LTO_SECTS_SECTION "__wrapper_sects"
+#define LTO_NAMES_SECTION "__wrapper_names"
+#define LTO_INDEX_SECTION "__wrapper_index"
+
+/* File to temporarily store LTO data.  This is appended to asm_out_file
+   in darwin_end_file.  */
+static FILE *lto_asm_out_file, *saved_asm_out_file;
+static char *lto_asm_out_name;
+
+/* Prepare asm_out_file for LTO output.  For darwin, this means hiding
+   asm_out_file and switching to an alternative output file.  */
+void
+darwin_asm_lto_start (void)
+{
+  gcc_assert (! saved_asm_out_file);
+  saved_asm_out_file = asm_out_file;
+  if (! lto_asm_out_name)
+    lto_asm_out_name = make_temp_file (".lto.s");
+  lto_asm_out_file = fopen (lto_asm_out_name, "a");
+  if (lto_asm_out_file == NULL)
+    fatal_error ("failed to open temporary file %s for LTO output",
+		 lto_asm_out_name);
+  asm_out_file = lto_asm_out_file;
+}
+
+/* Restore asm_out_file.  */
+void
+darwin_asm_lto_end (void)
+{
+  gcc_assert (saved_asm_out_file);
+  fclose (lto_asm_out_file);
+  asm_out_file = saved_asm_out_file;
+  saved_asm_out_file = NULL;
+}
+
+static void
+darwin_asm_dwarf_section (const char *name, unsigned int flags, tree decl);
+
+/*  Called for the TARGET_ASM_NAMED_SECTION hook.  */
+
+void
+darwin_asm_named_section (const char *name,
+			  unsigned int flags,
+			  tree decl ATTRIBUTE_UNUSED)
+{
+  /* LTO sections go in a special section that encapsulates the (unlimited)
+     number of GNU LTO sections within a single mach-o one.  */
+  if (strncmp (name, LTO_SECTION_NAME_PREFIX,
+	       strlen (LTO_SECTION_NAME_PREFIX)) == 0)
+    {
+      darwin_lto_section_e e;
+      /* We expect certain flags to be set...  */
+      gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED))
+		  == (SECTION_DEBUG | SECTION_NAMED));
+
+      /* Switch to our combined section.  */
+      fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+	       LTO_SEGMENT_NAME, LTO_SECTS_SECTION);
+      /* Output a label for the start of this sub-section.  */
+      fprintf (asm_out_file, "L_GNU_LTO%d:\t;# %s\n",
+	       lto_section_num, name);
+      /* We have to jump through hoops to get the values of the intra-section
+         offsets... */
+      fprintf (asm_out_file, "\t.set L$gnu$lto$offs%d,L_GNU_LTO%d-L_GNU_LTO0\n",
+	       lto_section_num, lto_section_num);
+      fprintf (asm_out_file,
+	       "\t.set L$gnu$lto$size%d,L_GNU_LTO%d-L_GNU_LTO%d\n",
+	       lto_section_num, lto_section_num+1, lto_section_num);
+      lto_section_num++;
+      e.sectname = xstrdup (name);
+      /* Keep the names, we'll need to make a table later.
+         TODO: check that we do not revisit sections, that would break
+         the assumption of how this is done.  */
+      if (lto_section_names == NULL)
+        vec_alloc (lto_section_names, 16);
+      vec_safe_push (lto_section_names, e);
+   }
+  else if (strncmp (name, "__DWARF,", 8) == 0)
+    darwin_asm_dwarf_section (name, flags, decl);
+  else
+    fprintf (asm_out_file, "\t.section %s\n", name);
+}
+
+void
+darwin_unique_section (tree decl ATTRIBUTE_UNUSED, int reloc ATTRIBUTE_UNUSED)
+{
+  /* Darwin does not use unique sections.  */
+}
+
+/* Handle __attribute__ ((apple_kext_compatibility)).
+   This only applies to darwin kexts for 2.95 compatibility -- it shrinks the
+   vtable for classes with this attribute (and their descendants) by not
+   outputting the new 3.0 nondeleting destructor.  This means that such
+   objects CANNOT be allocated on the stack or as globals UNLESS they have
+   a completely empty `operator delete'.
+   Luckily, this fits in with the Darwin kext model.
+
+   This attribute also disables gcc3's potential overlaying of derived
+   class data members on the padding at the end of the base class.  */
+
+tree
+darwin_handle_kext_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  /* APPLE KEXT stuff -- only applies with pure static C++ code.  */
+  if (! TARGET_KEXTABI)
+    {
+      warning (0, "%qE 2.95 vtable-compatibility attribute applies "
+	       "only when compiling a kext", name);
+
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != RECORD_TYPE)
+    {
+      warning (0, "%qE 2.95 vtable-compatibility attribute applies "
+	       "only to C++ classes", name);
+
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "weak_import" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+tree
+darwin_handle_weak_import_attribute (tree *node, tree name,
+				     tree ARG_UNUSED (args),
+				     int ARG_UNUSED (flags),
+				     bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL && TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+  else
+    declare_weak (*node);
+
+  return NULL_TREE;
+}
+
+/* Emit a label for an FDE, making it global and/or weak if appropriate.
+   The third parameter is nonzero if this is for exception handling.
+   The fourth parameter is nonzero if this is just a placeholder for an
+   FDE that we are omitting. */
+
+void
+darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty)
+{
+  char *lab ;
+  char buf[32];
+  static int invok_count = 0;
+  static tree last_fun_decl = NULL_TREE;
+  
+  /* We use the linker to emit the .eh labels for Darwin 9 and above.  */
+  if (! for_eh || generating_for_darwin_version >= 9)
+    return;
+
+  /* FIXME: This only works when the eh for all sections of a function is 
+     emitted at the same time.  If that changes, we would need to use a lookup
+     table of some form to determine what to do.  Also, we should emit the
+     unadorned label for the partition containing the public label for a
+     function.  This is of limited use, probably, since we do not currently
+     enable partitioning.  */
+  strcpy (buf, ".eh");
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL) 
+    {
+      if (decl == last_fun_decl)
+        {
+	  invok_count++;
+	  snprintf (buf, 31, "$$part$$%d.eh", invok_count);
+	}
+      else
+	{
+	  last_fun_decl = decl;
+	  invok_count = 0;
+	}
+    }
+
+  lab = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), buf, NULL);
+
+  if (TREE_PUBLIC (decl))
+    {
+      targetm.asm_out.globalize_label (file, lab);
+      if (DECL_VISIBILITY (decl) == VISIBILITY_HIDDEN)
+	{
+	  fputs ("\t.private_extern ", file);
+	  assemble_name (file, lab);
+	  fputc ('\n', file);
+	}
+    }
+
+  if (DECL_WEAK (decl))
+    {
+      fputs ("\t.weak_definition ", file);
+      assemble_name (file, lab);
+      fputc ('\n', file);
+    }
+
+  assemble_name (file, lab);
+  if (empty)
+    {
+      fputs (" = 0\n", file);
+
+      /* Mark the absolute .eh and .eh1 style labels as needed to
+	 ensure that we don't dead code strip them and keep such
+	 labels from another instantiation point until we can fix this
+	 properly with group comdat support.  */
+      darwin_mark_decl_preserved (lab);
+    }
+  else
+    fputs (":\n", file);
+
+  free (lab);
+}
+
+static GTY(()) unsigned long except_table_label_num;
+
+void
+darwin_emit_except_table_label (FILE *file)
+{
+  char section_start_label[30];
+
+  ASM_GENERATE_INTERNAL_LABEL (section_start_label, "GCC_except_table",
+			       except_table_label_num++);
+  ASM_OUTPUT_LABEL (file, section_start_label);
+}
+/* Generate a PC-relative reference to a Mach-O non-lazy-symbol.  */
+
+void
+darwin_non_lazy_pcrel (FILE *file, rtx addr)
+{
+  const char *nlp_name;
+
+  gcc_assert (GET_CODE (addr) == SYMBOL_REF);
+
+  nlp_name = machopic_indirection_name (addr, /*stub_p=*/false);
+  fputs ("\t.long\t", file);
+  ASM_OUTPUT_LABELREF (file, nlp_name);
+  fputs ("-.", file);
+}
+
+/* If this is uncommented, details of each allocation will be printed
+   in the asm right before the actual code.  WARNING - this will cause some
+   test-suite fails (since the printout will contain items that some tests
+   are not expecting) -- so don't leave it on by default (it bloats the
+   asm too).  */
+/*#define DEBUG_DARWIN_MEM_ALLOCATORS*/
+
+/* The first two of these routines are ostensibly just intended to put
+   names into the asm.  However, they are both hijacked in order to ensure
+   that zero-sized items do not make their way into the output.  Consequently,
+   we also need to make these participate in provisions for dealing with
+   such items in section anchors.  */
+
+/* The implementation of ASM_DECLARE_OBJECT_NAME.  */
+/* The RTTI data (e.g., __ti4name) is common and public (and static),
+   but it does need to be referenced via indirect PIC data pointers.
+   The machopic_define_symbol calls are telling the machopic subsystem
+   that the name *is* defined in this module, so it doesn't need to
+   make them indirect.  */
+void 
+darwin_asm_declare_object_name (FILE *file, 
+				const char *nam, tree decl)
+{
+  const char *xname = nam;
+  unsigned HOST_WIDE_INT size;
+  bool local_def, weak;
+
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+  local_def = DECL_INITIAL (decl) || (TREE_STATIC (decl) 
+				      && (!DECL_COMMON (decl) 
+					  || !TREE_PUBLIC (decl)));
+
+  if (GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
+    xname = IDENTIFIER_POINTER (DECL_NAME (decl));
+
+  if (local_def)
+    {
+      (* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
+      if (!weak)
+	machopic_define_symbol (DECL_RTL (decl));
+    }
+
+  size = tree_to_uhwi (DECL_SIZE_UNIT (decl));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d"
+	       " stat %d com %d pub %d t-const %d t-ro %d init %lx\n",
+	xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"), 
+	(unsigned long long)size, DECL_ALIGN (decl), local_def, 
+	DECL_WEAK (decl), TREE_STATIC (decl), DECL_COMMON (decl),
+	TREE_PUBLIC (decl), TREE_CONSTANT (decl), TREE_READONLY (decl),
+	(unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* Darwin needs help to support local zero-sized objects. 
+     They must be made at least one byte, and the section containing must be
+     marked as unsuitable for section-anchors (see storage allocators below).
+     
+     For non-zero objects this output is handled by varasm.c.
+  */
+  if (!size)
+    {
+      unsigned int l2align = 0;
+
+      /* The align must be honored, even for zero-sized.  */
+      if (DECL_ALIGN (decl))
+	{
+	  l2align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
+	  fprintf (file, "\t.align\t%u\n", l2align);
+	}
+
+      ASM_OUTPUT_LABEL (file, xname);
+      size = 1;
+      fprintf (file, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+
+      /* Check that we've correctly picked up the zero-sized item and placed it
+         properly.  */
+      gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
+		  || (in_section 
+		      && (in_section->common.flags & SECTION_NO_ANCHOR)));
+    }
+  else
+    ASM_OUTPUT_LABEL (file, xname);
+}
+
+/* The implementation of ASM_DECLARE_CONSTANT_NAME.  */
+void
+darwin_asm_declare_constant_name (FILE *file, const char *name,
+				  const_tree exp ATTRIBUTE_UNUSED,
+				  HOST_WIDE_INT size)
+{
+  assemble_label (file, name);
+  /* As for other items, we need at least one byte.  */
+  if (!size)
+    {
+      fputs ("\t.space\t1\n", file);
+      /* Check that we've correctly picked up the zero-sized item and placed it
+         properly.  */
+      gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
+		  || (in_section 
+		      && (in_section->common.flags & SECTION_NO_ANCHOR)));
+    }
+}
+
+/* Darwin storage allocators.
+
+   Zerofill sections are desirable for large blank data since, otherwise, these
+   data bloat objects (PR33210).
+
+   However, section anchors don't work in .zerofill sections (one cannot switch
+   to a zerofill section).  Ergo, for Darwin targets using section anchors we need
+   to put (at least some) data into 'normal' switchable sections.
+
+   Here we set a relatively arbitrary value for the size of an object to trigger
+   zerofill when section anchors are enabled (anything bigger than a page for
+   current Darwin implementations).  FIXME: there ought to be some objective way
+   to make this choice.
+
+   When section anchor are off this is ignored anyway.  */
+
+#define BYTES_ZFILL 4096
+
+/* Emit a chunk of data for items coalesced by the linker.  */
+static void
+darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name,
+				  unsigned HOST_WIDE_INT size, 
+				  unsigned int align)
+{
+  /* Since the sections used here are coalesed, they will not be eligible
+     for section anchors, and therefore we don't need to break that out.  */
+ if (TREE_READONLY (decl) || TREE_CONSTANT (decl))
+    switch_to_section (darwin_sections[const_data_coal_section]);
+  else
+    switch_to_section (darwin_sections[data_coal_section]);
+
+  /* To be consistent, we'll allow darwin_asm_declare_object_name to assemble
+     the align info for zero-sized items... but do it here otherwise.  */
+  if (size && align)
+    fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT));
+
+  if (TREE_PUBLIC (decl))
+    darwin_globalize_label (fp, name);
+
+  /* ... and we let it deal with outputting one byte of zero for them too.  */ 
+  darwin_asm_declare_object_name (fp, name, decl);
+  if (size)
+    assemble_zeros (size);
+}
+
+/* Emit a chunk of data for ObjC meta-data that got placed in BSS erroneously.  */
+static void
+darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name,
+				  unsigned HOST_WIDE_INT size, 
+				  unsigned int align, tree meta)
+{
+  section *ocs = data_section;
+
+  if (TREE_PURPOSE (meta) == get_identifier("OBJC2META"))
+    ocs = darwin_objc2_section (decl, meta, ocs);
+  else
+    ocs = darwin_objc1_section (decl, meta, ocs);
+
+  switch_to_section (ocs);
+
+  /* We shall declare that zero-sized meta-data are not valid (yet).  */
+  gcc_assert (size);
+  fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT));
+
+  /* ... and we let it deal with outputting one byte of zero for them too.  */ 
+  darwin_asm_declare_object_name (fp, name, decl);
+  assemble_zeros (size);
+}
+
+/* This routine emits 'local' storage:
+
+   When Section Anchors are off this routine emits .zerofill commands in 
+   sections named for their alignment.
+
+   When Section Anchors are on, smaller (non-zero-sized) items are placed in
+   the .static_data section so that the section anchoring system can see them.
+   Larger items are still placed in .zerofill sections, addressing PR33210.
+   The routine has no checking - it is all assumed to be done by the caller.
+*/
+static void
+darwin_emit_local_bss (FILE *fp, tree decl, const char *name, 
+			unsigned HOST_WIDE_INT size, 
+			unsigned int l2align)
+{
+   /* FIXME: We have a fudge to make this work with Java even when the target does
+   not use sections anchors -- Java seems to need at least one small item in a
+   non-zerofill segment.   */
+   if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
+       || (size && size <= 2))
+    {
+      /* Put smaller objects in _static_data, where the section anchors system
+	 can get them.
+	 However, if they are zero-sized punt them to yet a different section
+	 (that is not allowed to participate in anchoring).  */
+      if (!size)
+	{
+	  fputs ("\t.section\t__DATA,__zobj_bss\n", fp);
+	  in_section = darwin_sections[zobj_bss_section];
+	  size = 1;
+	}
+      else
+	{
+	  fputs ("\t.static_data\n", fp);
+	  in_section = darwin_sections[static_data_section];
+	}
+
+      if (l2align)
+	fprintf (fp, "\t.align\t%u\n", l2align);
+
+      assemble_name (fp, name);        
+      fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+  else 
+    {
+      /* When we are on a non-section anchor target, we can get zero-sized
+	 items here.  However, all we need to do is to bump them to one byte
+	 and the section alignment will take care of the rest.  */
+      char secnam[64];
+      unsigned int flags ;
+      snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"), 
+						(unsigned) l2align);
+      /* We can't anchor (yet, if ever) in zerofill sections, because we can't
+	 switch to them and emit a label.  */
+      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      in_section = get_section (secnam, flags, NULL);
+      fprintf (fp, "\t.zerofill %s,", secnam);
+      assemble_name (fp, name);
+      if (!size)
+	size = 1;
+
+      if (l2align)
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
+		 size, (unsigned) l2align);
+      else
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+
+  (*targetm.encode_section_info) (decl, DECL_RTL (decl), false);
+  /* This is defined as a file-scope var, so we know to notify machopic.  */
+  machopic_define_symbol (DECL_RTL (decl));
+}
+
+/* Emit a chunk of common.  */
+static void
+darwin_emit_common (FILE *fp, const char *name,
+		    unsigned HOST_WIDE_INT size, unsigned int align) 
+{
+  unsigned HOST_WIDE_INT rounded;
+  unsigned int l2align;
+
+  /* Earlier systems complain if the alignment exceeds the page size. 
+     The magic number is 4096 * 8 - hard-coded for legacy systems.  */
+  if (!emit_aligned_common && (align > 32768UL))
+    align = 4096UL; /* In units.  */
+  else
+    align /= BITS_PER_UNIT;
+
+  /* Make sure we have a meaningful align.  */
+  if (!align)
+    align = 1;
+
+  /* For earlier toolchains, we need to emit the var as a rounded size to 
+     tell ld the alignment.  */
+  if (size < align) 
+    rounded = align;
+  else
+    rounded = (size + (align-1)) & ~(align-1);
+
+  l2align = floor_log2 (align);
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+
+  in_section = comm_section;
+  /* We mustn't allow multiple public symbols to share an address when using
+     the normal OSX toolchain.  */
+  if (!size)
+    {
+      /* Put at least one byte.  */
+      size = 1;
+      /* This section can no longer participate in section anchoring.  */
+      comm_section->common.flags |= SECTION_NO_ANCHOR;
+    }
+
+  fputs ("\t.comm\t", fp);
+  assemble_name (fp, name);
+  fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED, 
+	   emit_aligned_common?size:rounded);
+  if (l2align && emit_aligned_common)
+    fprintf (fp, ",%u", l2align);
+  fputs ("\n", fp);
+}
+
+/* Output a var which is all zero - into aligned BSS sections, common, lcomm
+   or coalescable data sections (for weak or comdat) as appropriate.  */
+
+void
+darwin_output_aligned_bss (FILE *fp, tree decl, const char *name,
+			  unsigned HOST_WIDE_INT size, unsigned int align)
+{
+  unsigned int l2align;
+  bool one, pub, weak;
+  tree meta;
+
+  pub = TREE_PUBLIC (decl);
+  one = DECL_ONE_ONLY (decl);
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
+	     " pub %d weak %d one %d init %lx\n",
+	name, (long long)size, (int)align, TREE_READONLY (decl), 
+	TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl),
+	pub, weak, one, (unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+     before the target has a chance to comment.  */
+  if ((meta = is_objc_metadata (decl)))
+    {
+      darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta);
+      return;
+    }
+
+  /* Check that any initializer is valid.  */
+  gcc_assert ((DECL_INITIAL (decl) == NULL) 
+	       || (DECL_INITIAL (decl) == error_mark_node) 
+	       || initializer_zerop (DECL_INITIAL (decl)));
+
+  gcc_assert (DECL_SECTION_NAME (decl) == NULL);
+  gcc_assert (!DECL_COMMON (decl));
+
+  /*  Pick up the correct alignment.  */
+  if (!size || !align)
+    align = DECL_ALIGN (decl);
+
+  l2align = floor_log2 (align / BITS_PER_UNIT);
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+  
+  last_assemble_variable_decl = decl;
+
+  /* We would rather not have to check this here - but it seems that we might
+     be passed a decl that should be in coalesced space.  */
+  if (one || weak)
+    {
+      /* Weak or COMDAT objects are put in mergeable sections.  */
+      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+					DECL_ALIGN (decl));
+      return;
+    } 
+
+  /* If this is not public, then emit according to local rules.  */
+  if (!pub)
+    {
+      darwin_emit_local_bss (fp, decl, name, size, l2align);	
+      return;
+    }
+
+  /* So we have a public symbol (small item fudge for Java, see above).  */
+  if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) 
+       || (size && size <= 2))
+    {
+      /* Put smaller objects in data, where the section anchors system can get
+	 them.  However, if they are zero-sized punt them to yet a different 
+	 section (that is not allowed to participate in anchoring).  */
+      if (!size)
+	{
+	  fputs ("\t.section\t__DATA,__zobj_data\n", fp);
+	  in_section = darwin_sections[zobj_data_section];
+	  size = 1;
+	}
+      else
+	{
+	  fputs ("\t.data\n", fp);
+	  in_section = data_section;
+	}
+
+      if (l2align)
+	fprintf (fp, "\t.align\t%u\n", l2align);
+
+      assemble_name (fp, name);
+      fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+  else 
+    {
+      char secnam[64];
+      unsigned int flags ;
+      /* When we are on a non-section anchor target, we can get zero-sized
+	 items here.  However, all we need to do is to bump them to one byte
+	 and the section alignment will take care of the rest.  */
+      snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align);
+
+      /* We can't anchor in zerofill sections, because we can't switch
+	 to them and emit a label.  */
+      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      in_section = get_section (secnam, flags, NULL);
+      fprintf (fp, "\t.zerofill %s,", secnam);
+      assemble_name (fp, name);
+      if (!size)
+	size = 1;
+
+      if (l2align)
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align);
+      else
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+  (* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
+}
+
+/* Output a chunk of common, with alignment specified (where the target
+   supports this).  */
+void
+darwin_asm_output_aligned_decl_common (FILE *fp, tree decl, const char *name,
+				       unsigned HOST_WIDE_INT size, 
+				       unsigned int align)
+{
+  unsigned int l2align;
+  bool one, weak;
+  tree meta;
+
+  /* No corresponding var.  */
+  if (decl==NULL)
+    {
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align); 
+#endif
+      darwin_emit_common (fp, name, size, align);
+      return;
+    }
+
+  one = DECL_ONE_ONLY (decl);
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d"
+	     " weak %d one %d init %lx\n",
+	name,  (long long)size, (int)align, TREE_READONLY (decl), 
+	TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl),
+	TREE_PUBLIC (decl), weak, one, (unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+     before the target has a chance to comment.  */
+  if ((meta = is_objc_metadata (decl)))
+    {
+      darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta);
+      return;
+    }
+
+  /* We shouldn't be messing with this if the decl has a section name.  */
+  gcc_assert (DECL_SECTION_NAME (decl) == NULL);
+
+  /* We would rather not have to check this here - but it seems that we might
+     be passed a decl that should be in coalesced space.  */
+  if (one || weak)
+    {
+      /* Weak or COMDAT objects are put in mergable sections.  */
+      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+					DECL_ALIGN (decl));
+      return;
+    } 
+
+  /* We should only get here for DECL_COMMON, with a zero init (and, in 
+     principle, only for public symbols too - although we deal with local
+     ones below).  */
+
+  /* Check the initializer is OK.  */
+  gcc_assert (DECL_COMMON (decl) 
+	      && ((DECL_INITIAL (decl) == NULL) 
+	       || (DECL_INITIAL (decl) == error_mark_node) 
+	       || initializer_zerop (DECL_INITIAL (decl))));
+
+  last_assemble_variable_decl = decl;
+
+  if (!size || !align) 
+    align = DECL_ALIGN (decl);
+
+  l2align = floor_log2 (align / BITS_PER_UNIT);
+  /* Check we aren't asking for more aligment than the platform allows.  */
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+
+  if (TREE_PUBLIC (decl) != 0)
+    darwin_emit_common (fp, name, size, align);
+  else
+    darwin_emit_local_bss (fp, decl, name, size, l2align);	
+}
+
+/* Output a chunk of BSS with alignment specfied.  */
+void
+darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name, 
+				      unsigned HOST_WIDE_INT size, 
+				      unsigned int align)
+{
+  unsigned long l2align;
+  bool one, weak;
+  tree meta;
+
+  one = DECL_ONE_ONLY (decl);
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# adloc: %s (%lld,%d) ro %d cst %d stat %d one %d pub %d"
+	     " weak %d init %lx\n",
+	name, (long long)size, (int)align, TREE_READONLY (decl), 
+	TREE_CONSTANT (decl), TREE_STATIC (decl), one, TREE_PUBLIC (decl),
+	weak , (unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+     before the target has a chance to comment.  */
+  if ((meta = is_objc_metadata (decl)))
+    {
+      darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta);
+      return;
+    }
+
+  /* We shouldn't be messing with this if the decl has a section name.  */
+  gcc_assert (DECL_SECTION_NAME (decl) == NULL);
+
+  /* We would rather not have to check this here - but it seems that we might
+     be passed a decl that should be in coalesced space.  */
+  if (one || weak)
+    {
+      /* Weak or COMDAT objects are put in mergable sections.  */
+      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+					DECL_ALIGN (decl));
+      return;
+    } 
+
+  /* .. and it should be suitable for placement in local mem.  */
+  gcc_assert(!TREE_PUBLIC (decl) && !DECL_COMMON (decl));
+  /* .. and any initializer must be all-zero.  */
+  gcc_assert ((DECL_INITIAL (decl) == NULL) 
+	       || (DECL_INITIAL (decl) == error_mark_node) 
+	       || initializer_zerop (DECL_INITIAL (decl)));
+
+  last_assemble_variable_decl = decl;
+
+  if (!size || !align)
+    align = DECL_ALIGN (decl);
+
+  l2align = floor_log2 (align / BITS_PER_UNIT);
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+
+  darwin_emit_local_bss (fp, decl, name, size, l2align);
+}
+
+/* Emit an assembler directive to set visibility for a symbol.  The
+   only supported visibilities are VISIBILITY_DEFAULT and
+   VISIBILITY_HIDDEN; the latter corresponds to Darwin's "private
+   extern".  There is no MACH-O equivalent of ELF's
+   VISIBILITY_INTERNAL or VISIBILITY_PROTECTED. */
+
+void
+darwin_assemble_visibility (tree decl, int vis)
+{
+  if (vis == VISIBILITY_DEFAULT)
+    ;
+  else if (vis == VISIBILITY_HIDDEN || vis == VISIBILITY_INTERNAL)
+    {
+      fputs ("\t.private_extern ", asm_out_file);
+      assemble_name (asm_out_file,
+		     (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
+      fputs ("\n", asm_out_file);
+    }
+  else
+    warning (OPT_Wattributes, "protected visibility attribute "
+	     "not supported in this configuration; ignored");
+}
+
+/* vec used by darwin_asm_dwarf_section.
+   Maybe a hash tab would be better here - but the intention is that this is
+   a very short list (fewer than 16 items) and each entry should (ideally, 
+   eventually) only be presented once.
+
+   A structure to hold a dwarf debug section used entry.  */
+
+typedef struct GTY(()) dwarf_sect_used_entry {
+  const char *name;
+  unsigned count;
+}
+dwarf_sect_used_entry;
+
+
+/* A list of used __DWARF sections.  */
+static GTY (()) vec<dwarf_sect_used_entry, va_gc> *dwarf_sect_names_table;
+
+/* This is called when we are asked to assemble a named section and the 
+   name begins with __DWARF,.  We keep a list of the section names (without
+   the __DWARF, prefix) and use this to emit our required start label on the
+   first switch to each section.  */
+
+static void
+darwin_asm_dwarf_section (const char *name, unsigned int flags,
+			  tree ARG_UNUSED (decl))
+{
+  unsigned i;
+  int namelen;
+  const char * sname;
+  dwarf_sect_used_entry *ref;
+  bool found = false;
+  gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED))
+		    == (SECTION_DEBUG | SECTION_NAMED));
+  /* We know that the name starts with __DWARF,  */
+  sname = name + 8;
+  namelen = strchr (sname, ',') - sname;
+  gcc_assert (namelen);
+  if (dwarf_sect_names_table == NULL)
+    vec_alloc (dwarf_sect_names_table, 16);
+  else
+    for (i = 0; 
+	 dwarf_sect_names_table->iterate (i, &ref);
+	 i++)
+      {
+	if (!ref)
+	  break;
+	if (!strcmp (ref->name, sname))
+	  {
+	    found = true;
+	    ref->count++;
+	    break;
+	  }
+      }
+
+  fprintf (asm_out_file, "\t.section %s\n", name);
+  if (!found)
+    {
+      dwarf_sect_used_entry e;
+      fprintf (asm_out_file, "Lsection%.*s:\n", namelen, sname);
+      e.count = 1;
+      e.name = xstrdup (sname);
+      vec_safe_push (dwarf_sect_names_table, e);
+    }
+}
+
+/* Output a difference of two labels that will be an assembly time
+   constant if the two labels are local.  (.long lab1-lab2 will be
+   very different if lab1 is at the boundary between two sections; it
+   will be relocated according to the second section, not the first,
+   so one ends up with a difference between labels in different
+   sections, which is bad in the dwarf2 eh context for instance.)  */
+
+static int darwin_dwarf_label_counter;
+
+void
+darwin_asm_output_dwarf_delta (FILE *file, int size,
+			       const char *lab1, const char *lab2)
+{
+  int islocaldiff = (lab1[0] == '*' && lab1[1] == 'L'
+		     && lab2[0] == '*' && lab2[1] == 'L');
+  const char *directive = (size == 8 ? ".quad" : ".long");
+
+  if (islocaldiff)
+    fprintf (file, "\t.set L$set$%d,", darwin_dwarf_label_counter);
+  else
+    fprintf (file, "\t%s\t", directive);
+
+  assemble_name_raw (file, lab1);
+  fprintf (file, "-");
+  assemble_name_raw (file, lab2);
+  if (islocaldiff)
+    fprintf (file, "\n\t%s L$set$%d", directive, darwin_dwarf_label_counter++);
+}
+
+/* Output an offset in a DWARF section on Darwin.  On Darwin, DWARF section
+   offsets are not represented using relocs in .o files; either the
+   section never leaves the .o file, or the linker or other tool is
+   responsible for parsing the DWARF and updating the offsets.  */
+
+void
+darwin_asm_output_dwarf_offset (FILE *file, int size, const char * lab,
+				section *base)
+{
+  char sname[64];
+  int namelen;
+
+  gcc_assert (base->common.flags & SECTION_NAMED);
+  gcc_assert (strncmp (base->named.name, "__DWARF,", 8) == 0);
+  gcc_assert (strchr (base->named.name + 8, ','));
+
+  namelen = strchr (base->named.name + 8, ',') - (base->named.name + 8);
+  sprintf (sname, "*Lsection%.*s", namelen, base->named.name + 8);
+  darwin_asm_output_dwarf_delta (file, size, lab, sname);
+}
+
+/* Called from the within the TARGET_ASM_FILE_START for each target.  */
+
+void
+darwin_file_start (void)
+{
+  /* Nothing to do.  */
+}
+
+/* Called for the TARGET_ASM_FILE_END hook.
+   Emit the mach-o pic indirection data, the lto data and, finally a flag
+   to tell the linker that it can break the file object into sections and
+   move those around for efficiency.  */
+
+void
+darwin_file_end (void)
+{
+  if (!vec_safe_is_empty (ctors))
+    finalize_ctors ();
+  if (!vec_safe_is_empty (dtors))
+    finalize_dtors ();
+
+  /* If we are expecting to output NeXT ObjC meta-data, (and we actually see
+     some) then we output the fix-and-continue marker (Image Info).
+     This applies to Objective C, Objective C++ and LTO with either language
+     as part of the input.  */
+  if (flag_next_runtime && objc_metadata_seen)
+    {
+      unsigned int flags = 0;
+      if (flag_objc_abi >= 2)
+	{
+	  flags = 16;
+	  output_section_asm_op
+	    (darwin_sections[objc2_image_info_section]->unnamed.data);
+	}
+      else
+	output_section_asm_op
+	  (darwin_sections[objc_image_info_section]->unnamed.data);
+
+      ASM_OUTPUT_ALIGN (asm_out_file, 2);
+      fputs ("L_OBJC_ImageInfo:\n", asm_out_file);
+
+      flags |= (flag_replace_objc_classes && classes_seen) ? 1 : 0;
+      flags |= flag_objc_gc ? 2 : 0;
+
+      fprintf (asm_out_file, "\t.long\t0\n\t.long\t%u\n", flags);
+     }
+
+  machopic_finish (asm_out_file);
+  if (strcmp (lang_hooks.name, "GNU C++") == 0)
+    {
+      switch_to_section (darwin_sections[constructor_section]);
+      switch_to_section (darwin_sections[destructor_section]);
+      ASM_OUTPUT_ALIGN (asm_out_file, 1);
+    }
+
+  /* If there was LTO assembler output, append it to asm_out_file.  */
+  if (lto_asm_out_name)
+    {
+      int n;
+      char *buf, *lto_asm_txt;
+
+      /* Shouldn't be here if we failed to switch back.  */
+      gcc_assert (! saved_asm_out_file);
+
+      lto_asm_out_file = fopen (lto_asm_out_name, "r");
+      if (lto_asm_out_file == NULL)
+	fatal_error ("failed to open temporary file %s with LTO output",
+		     lto_asm_out_name);
+      fseek (lto_asm_out_file, 0, SEEK_END);
+      n = ftell (lto_asm_out_file);
+      if (n > 0)
+        {
+	  fseek (lto_asm_out_file, 0, SEEK_SET);
+	  lto_asm_txt = buf = (char *) xmalloc (n + 1);
+	  while (fgets (lto_asm_txt, n, lto_asm_out_file))
+	    fputs (lto_asm_txt, asm_out_file);
+	  /* Put a termination label.  */
+	  fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+		   LTO_SEGMENT_NAME, LTO_SECTS_SECTION);
+	  fprintf (asm_out_file, "L_GNU_LTO%d:\t;# end of lto\n",
+		   lto_section_num);
+	  /* Make sure our termination label stays in this section.  */
+	  fputs ("\t.space\t1\n", asm_out_file);
+	}
+
+      /* Remove the temporary file.  */
+      fclose (lto_asm_out_file);
+      unlink_if_ordinary (lto_asm_out_name);
+      free (lto_asm_out_name);
+    }
+
+  /* Output the names and indices.  */
+  if (lto_section_names && lto_section_names->length ())
+    {
+      int count;
+      darwin_lto_section_e *ref;
+      /* For now, we'll make the offsets 4 bytes and unaligned - we'll fix
+         the latter up ourselves.  */
+      const char *op = integer_asm_op (4,0);
+
+      /* Emit the names.  */
+      fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+	       LTO_SEGMENT_NAME, LTO_NAMES_SECTION);
+      FOR_EACH_VEC_ELT (*lto_section_names, count, ref)
+	{
+	  fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\n", count);
+         /* We have to jump through hoops to get the values of the intra-section
+            offsets... */
+	  fprintf (asm_out_file,
+		   "\t.set L$gnu$lto$noff%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME0\n",
+		   count, count);
+	  fprintf (asm_out_file,
+		   "\t.set L$gnu$lto$nsiz%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME%d\n",
+		   count, count+1, count);
+	  fprintf (asm_out_file, "\t.asciz\t\"%s\"\n", ref->sectname);
+	}
+      fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\t;# end\n", lto_section_num);
+      /* make sure our termination label stays in this section.  */
+      fputs ("\t.space\t1\n", asm_out_file);
+
+      /* Emit the Index.  */
+      fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+	       LTO_SEGMENT_NAME, LTO_INDEX_SECTION);
+      fputs ("\t.align\t2\n", asm_out_file);
+      fputs ("# Section offset, Section length, Name offset, Name length\n",
+	     asm_out_file);
+      FOR_EACH_VEC_ELT (*lto_section_names, count, ref)
+	{
+	  fprintf (asm_out_file, "%s L$gnu$lto$offs%d\t;# %s\n",
+		   op, count, ref->sectname);
+	  fprintf (asm_out_file, "%s L$gnu$lto$size%d\n", op, count);
+	  fprintf (asm_out_file, "%s L$gnu$lto$noff%d\n", op, count);
+	  fprintf (asm_out_file, "%s L$gnu$lto$nsiz%d\n", op, count);
+	}
+    }
+
+  /* If we have section anchors, then we must prevent the linker from
+     re-arranging data.  */
+  if (!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
+    fprintf (asm_out_file, "\t.subsections_via_symbols\n");
+}
+
+/* TODO: Add a language hook for identifying if a decl is a vtable.  */
+#define DARWIN_VTABLE_P(DECL) 0
+
+/* Cross-module name binding.  Darwin does not support overriding
+   functions at dynamic-link time, except for vtables in kexts.  */
+
+bool
+darwin_binds_local_p (const_tree decl)
+{
+  return default_binds_local_p_1 (decl,
+				  TARGET_KEXTABI && DARWIN_VTABLE_P (decl));
+}
+
+/* The Darwin's implementation of TARGET_ASM_OUTPUT_ANCHOR.  Define the
+   anchor relative to ".", the current section position.  We cannot use
+   the default one because ASM_OUTPUT_DEF is wrong for Darwin.  */
+void
+darwin_asm_output_anchor (rtx symbol)
+{
+  fprintf (asm_out_file, "\t.set\t");
+  assemble_name (asm_out_file, XSTR (symbol, 0));
+  fprintf (asm_out_file, ", . + " HOST_WIDE_INT_PRINT_DEC "\n",
+	   SYMBOL_REF_BLOCK_OFFSET (symbol));
+}
+
+/* Disable section anchoring on any section containing a zero-sized 
+   object.  */
+bool
+darwin_use_anchors_for_symbol_p (const_rtx symbol)
+{
+  if (DARWIN_SECTION_ANCHORS && flag_section_anchors) 
+    {
+      section *sect;
+      /* If the section contains a zero-sized object it's ineligible.  */
+      sect = SYMBOL_REF_BLOCK (symbol)->sect;
+      /* This should have the effect of disabling anchors for vars that follow
+         any zero-sized one, in a given section.  */     
+      if (sect->common.flags & SECTION_NO_ANCHOR)
+	return false;
+
+        /* Also check the normal reasons for suppressing.  */
+        return default_use_anchors_for_symbol_p (symbol);
+    }
+  else
+    return false;
+}
+
+/* Set the darwin specific attributes on TYPE.  */
+void
+darwin_set_default_type_attributes (tree type)
+{
+  if (darwin_ms_struct
+      && TREE_CODE (type) == RECORD_TYPE)
+    TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("ms_struct"),
+                                        NULL_TREE,
+                                        TYPE_ATTRIBUTES (type));
+}
+
+/* True, iff we're generating code for loadable kernel extensions.  */
+
+bool
+darwin_kextabi_p (void) {
+  return flag_apple_kext;
+}
+
+void
+darwin_override_options (void)
+{
+  /* Keep track of which (major) version we're generating code for.  */
+  if (darwin_macosx_version_min)
+    {
+      if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
+	generating_for_darwin_version = 10;
+      else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0)
+	generating_for_darwin_version = 9;
+
+      /* Earlier versions are not specifically accounted, until required.  */
+    }
+
+  /* In principle, this should be c-family only.  However, we really need to
+     set sensible defaults for LTO as well, since the section selection stuff
+     should check for correctness re. the ABI.  TODO: check and provide the
+     flags (runtime & ABI) from the lto wrapper).  */
+
+  /* Unless set, force ABI=2 for NeXT and m64, 0 otherwise.  */
+  if (!global_options_set.x_flag_objc_abi)
+    global_options.x_flag_objc_abi
+	= (!flag_next_runtime)
+		? 0
+		: (TARGET_64BIT ? 2
+				: (generating_for_darwin_version >= 9) ? 1
+								       : 0);
+
+  /* Objective-C family ABI 2 is only valid for next/m64 at present.  */
+  if (global_options_set.x_flag_objc_abi && flag_next_runtime)
+    {
+      if (TARGET_64BIT && global_options.x_flag_objc_abi < 2)
+	error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 must be"
+				    " used for %<-m64%> targets with"
+				    " %<-fnext-runtime%>");
+      if (!TARGET_64BIT && global_options.x_flag_objc_abi >= 2)
+	error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 is not"
+				    " supported on %<-m32%> targets with"
+				    " %<-fnext-runtime%>");
+    }
+
+  /* Don't emit DWARF3/4 unless specifically selected.  This is a 
+     workaround for tool bugs.  */
+  if (!global_options_set.x_dwarf_strict) 
+    dwarf_strict = 1;
+  if (!global_options_set.x_dwarf_version)
+    dwarf_version = 2;
+
+  /* Do not allow unwind tables to be generated by default for m32.  
+     fnon-call-exceptions will override this, regardless of what we do.  */
+  if (generating_for_darwin_version < 10
+      && !global_options_set.x_flag_asynchronous_unwind_tables
+      && !TARGET_64BIT)
+    global_options.x_flag_asynchronous_unwind_tables = 0;
+
+   /* Disable -freorder-blocks-and-partition when unwind tables are being
+      emitted for Darwin < 9 (OSX 10.5).
+      The strategy is, "Unless the User has specifically set/unset an unwind
+      flag we will switch off -freorder-blocks-and-partition when unwind tables
+      will be generated".  If the User specifically sets flags... we assume
+      (s)he knows why...  */
+   if (generating_for_darwin_version < 9
+       && global_options_set.x_flag_reorder_blocks_and_partition
+       && ((global_options.x_flag_exceptions 		/* User, c++, java */
+	    && !global_options_set.x_flag_exceptions) 	/* User specified... */
+	   || (global_options.x_flag_unwind_tables
+	       && !global_options_set.x_flag_unwind_tables)
+	   || (global_options.x_flag_non_call_exceptions
+	       && !global_options_set.x_flag_non_call_exceptions)
+	   || (global_options.x_flag_asynchronous_unwind_tables
+	       && !global_options_set.x_flag_asynchronous_unwind_tables)))
+    {
+      inform (input_location,
+	      "-freorder-blocks-and-partition does not work with exceptions "
+	      "on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+    /* FIXME: flag_objc_sjlj_exceptions is no longer needed since there is only
+       one valid choice of exception scheme for each runtime.  */
+    if (!global_options_set.x_flag_objc_sjlj_exceptions)
+      global_options.x_flag_objc_sjlj_exceptions = 
+				flag_next_runtime && !TARGET_64BIT;
+
+    /* FIXME: and this could be eliminated then too.  */
+    if (!global_options_set.x_flag_exceptions
+	&& flag_objc_exceptions
+	&& TARGET_64BIT)
+      flag_exceptions = 1;
+
+  if (flag_mkernel || flag_apple_kext)
+    {
+      /* -mkernel implies -fapple-kext for C++ */
+      if (strcmp (lang_hooks.name, "GNU C++") == 0)
+	flag_apple_kext = 1;
+
+      flag_no_common = 1;
+
+      /* No EH in kexts.  */
+      flag_exceptions = 0;
+      /* No -fnon-call-exceptions data in kexts.  */
+      flag_non_call_exceptions = 0;
+      /* so no tables either.. */
+      flag_unwind_tables = 0;
+      flag_asynchronous_unwind_tables = 0;
+      /* We still need to emit branch islands for kernel context.  */
+      darwin_emit_branch_islands = true;
+    }
+
+  if (flag_var_tracking_uninit == 0
+      && generating_for_darwin_version >= 9
+      && (flag_gtoggle ? (debug_info_level == DINFO_LEVEL_NONE)
+      : (debug_info_level >= DINFO_LEVEL_NORMAL))
+      && write_symbols == DWARF2_DEBUG)
+    flag_var_tracking_uninit = flag_var_tracking;
+
+  if (MACHO_DYNAMIC_NO_PIC_P)
+    {
+      if (flag_pic)
+	warning_at (UNKNOWN_LOCATION, 0,
+		 "%<-mdynamic-no-pic%> overrides %<-fpic%>, %<-fPIC%>,"
+		 " %<-fpie%> or %<-fPIE%>");
+      flag_pic = 0;
+    }
+  else if (flag_pic == 1)
+    {
+      /* Darwin's -fpic is -fPIC.  */
+      flag_pic = 2;
+    }
+
+  /* It is assumed that branch island stubs are needed for earlier systems.  */
+  if (generating_for_darwin_version < 9)
+    darwin_emit_branch_islands = true;
+  else
+    emit_aligned_common = true; /* Later systems can support aligned common.  */
+
+  /* The c_dialect...() macros are not available to us here.  */
+  darwin_running_cxx = (strstr (lang_hooks.name, "C++") != 0);
+}
+
+#if DARWIN_PPC
+/* Add $LDBL128 suffix to long double builtins for ppc darwin.  */
+
+static void
+darwin_patch_builtin (enum built_in_function fncode)
+{
+  tree fn = builtin_decl_explicit (fncode);
+  tree sym;
+  char *newname;
+
+  if (!fn)
+    return;
+
+  sym = DECL_ASSEMBLER_NAME (fn);
+  newname = ACONCAT (("_", IDENTIFIER_POINTER (sym), "$LDBL128", NULL));
+
+  set_user_assembler_name (fn, newname);
+
+  fn = builtin_decl_implicit (fncode);
+  if (fn)
+    set_user_assembler_name (fn, newname);
+}
+
+void
+darwin_patch_builtins (void)
+{
+  if (LONG_DOUBLE_TYPE_SIZE != 128)
+    return;
+
+#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_NO64(fncode)		\
+  if (!TARGET_64BIT)				\
+    darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_VARIADIC(fncode)				  \
+  if (!TARGET_64BIT						  \
+      && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \
+    darwin_patch_builtin (fncode);
+#include "darwin-ppc-ldouble-patch.def"
+#undef PATCH_BUILTIN
+#undef PATCH_BUILTIN_NO64
+#undef PATCH_BUILTIN_VARIADIC
+}
+#endif
+
+/*  CFStrings implementation.  */
+static GTY(()) tree cfstring_class_reference = NULL_TREE;
+static GTY(()) tree cfstring_type_node = NULL_TREE;
+static GTY(()) tree ccfstring_type_node = NULL_TREE;
+static GTY(()) tree pccfstring_type_node = NULL_TREE;
+static GTY(()) tree pcint_type_node = NULL_TREE;
+static GTY(()) tree pcchar_type_node = NULL_TREE;
+
+static enum built_in_function darwin_builtin_cfstring;
+
+/* Store all constructed constant CFStrings in a hash table so that
+   they get uniqued properly.  */
+
+typedef struct GTY (()) cfstring_descriptor {
+  /* The string literal.  */
+  tree literal;
+  /* The resulting constant CFString.  */
+  tree constructor;
+} cfstring_descriptor;
+
+static GTY ((param_is (struct cfstring_descriptor))) htab_t cfstring_htab;
+
+static hashval_t cfstring_hash (const void *);
+static int cfstring_eq (const void *, const void *);
+
+static tree
+add_builtin_field_decl (tree type, const char *name, tree **chain)
+{
+  tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, 
+			    get_identifier (name), type);
+
+  if (*chain != NULL)
+    **chain = field;
+  *chain = &DECL_CHAIN (field);
+
+  return field;
+}
+
+tree
+darwin_init_cfstring_builtins (unsigned builtin_cfstring)
+{
+  tree cfsfun, fields, pccfstring_ftype_pcchar;
+  tree *chain = NULL;
+
+  darwin_builtin_cfstring = 
+    (enum built_in_function) builtin_cfstring;
+  
+  /* struct __builtin_CFString {
+       const int *isa;		(will point at
+       int flags;		 __CFConstantStringClassReference)
+       const char *str;
+       long length;
+     };  */
+
+  pcint_type_node = build_pointer_type 
+		   (build_qualified_type (integer_type_node, TYPE_QUAL_CONST));
+
+  pcchar_type_node = build_pointer_type 
+		   (build_qualified_type (char_type_node, TYPE_QUAL_CONST));
+
+  cfstring_type_node = (*lang_hooks.types.make_type) (RECORD_TYPE);
+
+  /* Have to build backwards for finish struct.  */
+  fields = add_builtin_field_decl (long_integer_type_node, "length", &chain);
+  add_builtin_field_decl (pcchar_type_node, "str", &chain);
+  add_builtin_field_decl (integer_type_node, "flags", &chain);
+  add_builtin_field_decl (pcint_type_node, "isa", &chain);
+  finish_builtin_struct (cfstring_type_node, "__builtin_CFString",
+			 fields, NULL_TREE);
+
+  /* const struct __builtin_CFstring *
+     __builtin___CFStringMakeConstantString (const char *); */
+
+  ccfstring_type_node = build_qualified_type 
+			(cfstring_type_node, TYPE_QUAL_CONST);
+  pccfstring_type_node = build_pointer_type (ccfstring_type_node);
+  pccfstring_ftype_pcchar = build_function_type_list 
+			(pccfstring_type_node, pcchar_type_node, NULL_TREE);
+
+  cfsfun  = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 
+			get_identifier ("__builtin___CFStringMakeConstantString"),
+			pccfstring_ftype_pcchar);
+
+  TREE_PUBLIC (cfsfun) = 1;
+  DECL_EXTERNAL (cfsfun) = 1;
+  DECL_ARTIFICIAL (cfsfun) = 1;
+  /* Make a lang-specific section - dup_lang_specific_decl makes a new node
+     in place of the existing, which may be NULL.  */
+  DECL_LANG_SPECIFIC (cfsfun) = NULL;
+  (*lang_hooks.dup_lang_specific_decl) (cfsfun);
+  DECL_BUILT_IN_CLASS (cfsfun) = BUILT_IN_MD;
+  DECL_FUNCTION_CODE (cfsfun) = darwin_builtin_cfstring;
+  lang_hooks.builtin_function (cfsfun);
+
+  /* extern int __CFConstantStringClassReference[];  */
+  cfstring_class_reference = build_decl (BUILTINS_LOCATION, VAR_DECL,
+		 get_identifier ("__CFConstantStringClassReference"),
+		 build_array_type (integer_type_node, NULL_TREE));
+
+  TREE_PUBLIC (cfstring_class_reference) = 1;
+  DECL_ARTIFICIAL (cfstring_class_reference) = 1;
+  (*lang_hooks.decls.pushdecl) (cfstring_class_reference);
+  DECL_EXTERNAL (cfstring_class_reference) = 1;
+  rest_of_decl_compilation (cfstring_class_reference, 0, 0);
+  
+  /* Initialize the hash table used to hold the constant CFString objects.  */
+  cfstring_htab = htab_create_ggc (31, cfstring_hash, cfstring_eq, NULL);
+
+  return cfstring_type_node;
+}
+
+tree
+darwin_fold_builtin (tree fndecl, int n_args, tree *argp, 
+		     bool ARG_UNUSED (ignore))
+{
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  
+  if (fcode == darwin_builtin_cfstring)
+    {
+      if (!darwin_constant_cfstrings)
+	{
+	  error ("built-in function %qD requires the" 
+		 " %<-mconstant-cfstrings%> flag", fndecl);
+	  return error_mark_node;
+	}
+
+      if (n_args != 1)
+	{
+	  error ("built-in function %qD takes one argument only", fndecl);
+	  return error_mark_node;
+	}
+
+      return darwin_build_constant_cfstring (*argp);
+    }
+
+  return NULL_TREE;
+}
+
+void
+darwin_rename_builtins (void)
+{
+  /* The system ___divdc3 routine in libSystem on darwin10 is not
+     accurate to 1ulp, ours is, so we avoid ever using the system name
+     for this routine and instead install a non-conflicting name that
+     is accurate.
+
+     When -ffast-math or -funsafe-math-optimizations is given, we can
+     use the faster version.  */
+  if (!flag_unsafe_math_optimizations)
+    {
+      enum built_in_function dcode
+	= (enum built_in_function)(BUILT_IN_COMPLEX_DIV_MIN
+				   + DCmode - MIN_MODE_COMPLEX_FLOAT);
+      tree fn = builtin_decl_explicit (dcode);
+      /* Fortran and c call TARGET_INIT_BUILTINS and
+	 TARGET_INIT_LIBFUNCS at different times, so we have to put a
+	 call into each to ensure that at least one of them is called
+	 after build_common_builtin_nodes.  A better fix is to add a
+	 new hook to run after build_common_builtin_nodes runs.  */
+      if (fn)
+	set_user_assembler_name (fn, "___ieee_divdc3");
+      fn = builtin_decl_implicit (dcode);
+      if (fn)
+	set_user_assembler_name (fn, "___ieee_divdc3");
+    }
+}
+
+bool
+darwin_libc_has_function (enum function_class fn_class)
+{
+  if (fn_class == function_sincos)
+    return false;
+  if (fn_class == function_c99_math_complex
+      || fn_class == function_c99_misc)
+    return (TARGET_64BIT
+	    || strverscmp (darwin_macosx_version_min, "10.3") >= 0);
+
+  return true;
+}
+
+static hashval_t
+cfstring_hash (const void *ptr)
+{
+  tree str = ((const struct cfstring_descriptor *)ptr)->literal;
+  const unsigned char *p = (const unsigned char *) TREE_STRING_POINTER (str);
+  int i, len = TREE_STRING_LENGTH (str);
+  hashval_t h = len;
+
+  for (i = 0; i < len; i++)
+    h = ((h * 613) + p[i]);
+
+  return h;
+}
+
+static int
+cfstring_eq (const void *ptr1, const void *ptr2)
+{
+  tree str1 = ((const struct cfstring_descriptor *)ptr1)->literal;
+  tree str2 = ((const struct cfstring_descriptor *)ptr2)->literal;
+  int len1 = TREE_STRING_LENGTH (str1);
+
+  return (len1 == TREE_STRING_LENGTH (str2)
+	  && !memcmp (TREE_STRING_POINTER (str1), TREE_STRING_POINTER (str2),
+		      len1));
+}
+
+tree
+darwin_build_constant_cfstring (tree str)
+{
+  struct cfstring_descriptor *desc, key;
+  void **loc;
+  tree addr;
+
+  if (!str)
+    {
+      error ("CFString literal is missing");
+      return error_mark_node;
+    }
+
+  STRIP_NOPS (str);
+
+  if (TREE_CODE (str) == ADDR_EXPR)
+    str = TREE_OPERAND (str, 0);
+
+  if (TREE_CODE (str) != STRING_CST)
+    {
+      error ("CFString literal expression is not a string constant");
+      return error_mark_node;
+    }
+
+  /* Perhaps we already constructed a constant CFString just like this one? */
+  key.literal = str;
+  loc = htab_find_slot (cfstring_htab, &key, INSERT);
+  desc = (struct cfstring_descriptor *) *loc;
+
+  if (!desc)
+    {
+      tree var, constructor, field;
+      vec<constructor_elt, va_gc> *v = NULL;
+      int length = TREE_STRING_LENGTH (str) - 1;
+
+      if (darwin_warn_nonportable_cfstrings)
+	{
+	  const char *s = TREE_STRING_POINTER (str);
+	  int l = 0;
+
+	  for (l = 0; l < length; l++)
+	    if (!s[l] || !isascii (s[l]))
+	      {
+		warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal",
+			 s[l] ? "non-ASCII character" : "embedded NUL");
+		break;
+	      }
+	}
+
+      *loc = desc = ggc_alloc_cleared_cfstring_descriptor ();
+      desc->literal = str;
+
+      /* isa *. */
+      field = TYPE_FIELDS (ccfstring_type_node);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, 
+			     build1 (ADDR_EXPR,  TREE_TYPE (field),  
+				     cfstring_class_reference));
+      /* flags */
+      field = DECL_CHAIN (field);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, 
+			     build_int_cst (TREE_TYPE (field), 0x000007c8));
+      /* string *. */
+      field = DECL_CHAIN (field);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE,
+			     build1 (ADDR_EXPR, TREE_TYPE (field), str));
+      /* length */
+      field = DECL_CHAIN (field);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE,
+			     build_int_cst (TREE_TYPE (field), length));
+
+      constructor = build_constructor (ccfstring_type_node, v);
+      TREE_READONLY (constructor) = 1;
+      TREE_CONSTANT (constructor) = 1;
+      TREE_STATIC (constructor) = 1;
+
+      /* Fromage: The C++ flavor of 'build_unary_op' expects constructor nodes
+	 to have the TREE_HAS_CONSTRUCTOR (...) bit set.  However, this file is
+	 being built without any knowledge of C++ tree accessors; hence, we shall
+	 use the generic accessor that TREE_HAS_CONSTRUCTOR actually maps to!  */
+      if (darwin_running_cxx)
+	TREE_LANG_FLAG_4 (constructor) = 1;  /* TREE_HAS_CONSTRUCTOR  */
+
+      /* Create an anonymous global variable for this CFString.  */
+      var = build_decl (input_location, CONST_DECL, 
+			NULL, TREE_TYPE (constructor));
+      DECL_ARTIFICIAL (var) = 1;
+      TREE_STATIC (var) = 1;
+      DECL_INITIAL (var) = constructor;
+      /* FIXME: This should use a translation_unit_decl to indicate file scope.  */
+      DECL_CONTEXT (var) = NULL_TREE;
+      desc->constructor = var;
+    }
+
+  addr = build1 (ADDR_EXPR, pccfstring_type_node, desc->constructor);
+  TREE_CONSTANT (addr) = 1;
+
+  return addr;
+}
+
+bool
+darwin_cfstring_p (tree str)
+{
+  struct cfstring_descriptor key;
+  void **loc;
+
+  if (!str)
+    return false;
+
+  STRIP_NOPS (str);
+
+  if (TREE_CODE (str) == ADDR_EXPR)
+    str = TREE_OPERAND (str, 0);
+
+  if (TREE_CODE (str) != STRING_CST)
+    return false;
+
+  key.literal = str;
+  loc = htab_find_slot (cfstring_htab, &key, NO_INSERT);
+  
+  if (loc)
+    return true;
+
+  return false;
+}
+
+void
+darwin_enter_string_into_cfstring_table (tree str)
+{
+  struct cfstring_descriptor key;
+  void **loc;
+
+  key.literal = str;
+  loc = htab_find_slot (cfstring_htab, &key, INSERT);
+
+  if (!*loc)
+    {
+      *loc = ggc_alloc_cleared_cfstring_descriptor ();
+      ((struct cfstring_descriptor *)*loc)->literal = str;
+    }
+}
+
+/* Choose named function section based on its frequency.  */
+
+section *
+darwin_function_section (tree decl, enum node_frequency freq,
+			  bool startup, bool exit)
+{
+  /* Decide if we need to put this in a coalescable section.  */
+  bool weak = (decl 
+	       && DECL_WEAK (decl)
+	       && (!DECL_ATTRIBUTES (decl)
+		   || !lookup_attribute ("weak_import", 
+					  DECL_ATTRIBUTES (decl))));
+
+  /* If there is a specified section name, we should not be trying to
+     override.  */
+  if (decl && DECL_SECTION_NAME (decl) != NULL_TREE)
+    return get_named_section (decl, NULL, 0);
+
+  /* We always put unlikely executed stuff in the cold section.  */
+  if (freq == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+    return (weak) ? darwin_sections[text_cold_coal_section]
+		  : darwin_sections[text_cold_section];
+
+  /* If we have LTO *and* feedback information, then let LTO handle
+     the function ordering, it makes a better job (for normal, hot,
+     startup and exit - hence the bailout for cold above).  */
+  if (in_lto_p && flag_profile_values)
+    goto default_function_sections;
+
+  /* Non-cold startup code should go to startup subsection.  */
+  if (startup)
+    return (weak) ? darwin_sections[text_startup_coal_section]
+		  : darwin_sections[text_startup_section];
+
+  /* Similarly for exit.  */
+  if (exit)
+    return (weak) ? darwin_sections[text_exit_coal_section]
+		  : darwin_sections[text_exit_section];
+
+  /* Place hot code.  */
+  if (freq == NODE_FREQUENCY_HOT)
+    return (weak) ? darwin_sections[text_hot_coal_section]
+		  : darwin_sections[text_hot_section];
+
+  /* Otherwise, default to the 'normal' non-reordered sections.  */
+default_function_sections:
+  return (weak) ? darwin_sections[text_coal_section]
+		: text_section;
+}
+
+/* When a function is partitioned between sections, we need to insert a label
+   at the start of each new chunk - so that it may become a valid 'atom' for
+   eh and debug purposes.  Without this the linker will emit warnings if one 
+   tries to add line location information (since the switched fragment will 
+   be anonymous).  */
+
+void
+darwin_function_switched_text_sections (FILE *fp, tree decl, bool new_is_cold)
+{
+  char buf[128];
+  snprintf (buf, 128, "%s%s",new_is_cold?"__cold_sect_of_":"__hot_sect_of_",
+	    IDENTIFIER_POINTER (DECL_NAME (decl)));
+  /* Make sure we pick up all the relevant quotes etc.  */
+  assemble_name_raw (fp, (const char *) buf);
+  fputs (":\n", fp);
+}
+
+#include "gt-darwin.h"
diff --git a/gcc-4.9/gcc/config/darwin.h b/gcc-4.9/gcc/config/darwin.h
new file mode 100644
index 000000000..126364099
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin.h
@@ -0,0 +1,921 @@
+/* Target definitions for Darwin (Mac OS X) systems.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef CONFIG_DARWIN_H
+#define CONFIG_DARWIN_H
+
+/* The definitions in this file are common to all processor types
+   running Darwin, which is the kernel for Mac OS X.  Darwin is
+   basically a BSD user layer laid over a Mach kernel, then evolved
+   for many years (at NeXT) in parallel with other Unix systems.  So
+   while the runtime is a somewhat idiosyncratic Mach-based thing,
+   other definitions look like they would for a BSD variant.  */
+
+/* Although NeXT ran on many different architectures, as of Jan 2001
+   the only supported Darwin targets are PowerPC and x86.  */
+
+/* One of Darwin's NeXT legacies is the Mach-O format, which is partly
+   like a.out and partly like COFF, with additional features like
+   multi-architecture binary support.  */
+
+#define DARWIN_X86 0
+#define DARWIN_PPC 0
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* Suppress g++ attempt to link in the math library automatically. */
+#define MATH_LIBRARY ""
+
+/* We have atexit.  */
+
+#define HAVE_ATEXIT
+
+/* Define an empty body for the function do_global_dtors() in libgcc2.c.  */
+
+#define DO_GLOBAL_DTORS_BODY
+
+/* The string value for __SIZE_TYPE__.  */
+
+#ifndef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+#endif
+
+/* Type used for ptrdiff_t, as a string used in a declaration.  */
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* wchar_t is int.  */
+
+#undef	WCHAR_TYPE
+#define WCHAR_TYPE "int"
+#undef	WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Default to using the NeXT-style runtime, since that's what is
+   pre-installed on Darwin systems.  */
+
+#define NEXT_OBJC_RUNTIME 1
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+
+#undef	DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* True if pragma ms_struct is in effect.  */
+extern GTY(()) int darwin_ms_struct;
+
+#define DRIVER_SELF_SPECS					\
+  "%{gfull:-g -fno-eliminate-unused-debug-symbols} %<gfull",	\
+  "%{gused:-g -feliminate-unused-debug-symbols} %<gused",	\
+  "%{fapple-kext|mkernel:-static}",				\
+  "%{shared:-Zdynamiclib} %<shared"
+
+#define DARWIN_CC1_SPEC							\
+  "%{findirect-virtual-calls: -fapple-kext} %<findirect-virtual-calls " \
+  "%{fterminated-vtables: -fapple-kext} %<fterminated-vtables "		\
+  "%<filelist* %<framework*"
+
+#define SUBSUBTARGET_OVERRIDE_OPTIONS					\
+  do {									\
+    darwin_override_options ();						\
+  } while (0)
+
+#define SUBTARGET_C_COMMON_OVERRIDE_OPTIONS do {                        \
+    if (flag_mkernel || flag_apple_kext)				\
+      {									\
+	if (flag_use_cxa_atexit == 2)					\
+	  flag_use_cxa_atexit = 0;					\
+	/* kexts should always be built without the coalesced sections	\
+	   because the kernel loader doesn't grok such sections.  */	\
+	flag_weak = 0;							\
+	/* No RTTI in kexts.  */					\
+	flag_rtti = 0;							\
+      }									\
+  } while (0)
+
+/* Machine dependent cpp options.  Don't add more options here, add
+   them to darwin_cpp_builtins in darwin-c.c.  */
+
+#undef	CPP_SPEC
+#define CPP_SPEC "%{static:%{!dynamic:-D__STATIC__}}%{!static:-D__DYNAMIC__}" \
+	" %{pthread:-D_REENTRANT}"
+
+/* This is mostly a clone of the standard LINK_COMMAND_SPEC, plus
+   precomp, libtool, and fat build additions.
+
+   In general, random Darwin linker flags should go into LINK_SPEC
+   instead of LINK_COMMAND_SPEC.  The command spec is better for
+   specifying the handling of options understood by generic Unix
+   linkers, and for positional arguments like libraries.  */
+
+#define LINK_COMMAND_SPEC_A \
+   "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %(linker)" \
+    LINK_PLUGIN_SPEC \
+    "%{flto*:%<fcompare-debug*} \
+    %{flto*} \
+    %l %X %{s} %{t} %{Z} %{u*} \
+    %{e*} %{r} \
+    %{o*}%{!o:-o a.out} \
+    %{!nostdlib:%{!nostartfiles:%S}} \
+    %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \
+    %{fopenmp|ftree-parallelize-loops=*: \
+      %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \
+    %{fgnu-tm: \
+      %{static|static-libgcc|static-libstdc++|static-libgfortran: libitm.a%s; : -litm } } \
+    %{!nostdlib:%{!nodefaultlibs:\
+      %{%:sanitize(address): -lasan } \
+      %{%:sanitize(undefined): -lubsan } \
+      %(link_ssp) %(link_gcc_c_sequence)\
+    }}\
+    %{!nostdlib:%{!nostartfiles:%E}} %{T*} %{F*} }}}}}}}"
+
+#define DSYMUTIL "\ndsymutil"
+
+#define DSYMUTIL_SPEC \
+   "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %{v} \
+    %{gdwarf-2:%{!gstabs*:%{!g0: -idsym}}}\
+    %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm: \
+    %{gdwarf-2:%{!gstabs*:%{!g0: -dsym}}}}}}}}}}}"
+
+#define LINK_COMMAND_SPEC LINK_COMMAND_SPEC_A DSYMUTIL_SPEC
+
+/* Tell collect2 to run dsymutil for us as necessary.  */
+#define COLLECT_RUN_DSYMUTIL 1
+
+/* We only want one instance of %G, since libSystem (Darwin's -lc) does not depend
+   on libgcc.  */
+#undef  LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "%G %L"
+
+#ifdef TARGET_SYSTEM_ROOT
+#define LINK_SYSROOT_SPEC \
+  "%{isysroot*:-syslibroot %*;:-syslibroot " TARGET_SYSTEM_ROOT "}"
+#else
+#define LINK_SYSROOT_SPEC "%{isysroot*:-syslibroot %*}"
+#endif
+
+#define PIE_SPEC "%{fpie|pie|fPIE:}"
+
+/* Please keep the random linker options in alphabetical order (modulo
+   'Z' and 'no' prefixes). Note that options taking arguments may appear
+   multiple times on a command line with different arguments each time,
+   so put a * after their names so all of them get passed.  */
+#define LINK_SPEC  \
+  "%{static}%{!static:-dynamic} \
+   %:remove-outfile(-ldl) \
+   %:remove-outfile(-lm) \
+   %:remove-outfile(-lpthread) \
+   %{fgnu-runtime: %{static|static-libgcc: \
+                     %:replace-outfile(-lobjc libobjc-gnu.a%s); \
+                    :%:replace-outfile(-lobjc -lobjc-gnu ) } }\
+   %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\
+   %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\
+   %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\
+   %{!Zdynamiclib: \
+     %{Zforce_cpusubtype_ALL:-arch %(darwin_arch) -force_cpusubtype_ALL} \
+     %{!Zforce_cpusubtype_ALL:-arch %(darwin_subarch)} \
+     %{Zbundle:-bundle} \
+     %{Zbundle_loader*:-bundle_loader %*} \
+     %{client_name*} \
+     %{compatibility_version*:%e-compatibility_version only allowed with -dynamiclib\
+} \
+     %{current_version*:%e-current_version only allowed with -dynamiclib} \
+     %{Zforce_flat_namespace:-force_flat_namespace} \
+     %{Zinstall_name*:%e-install_name only allowed with -dynamiclib} \
+     %{keep_private_externs} \
+     %{private_bundle} \
+    } \
+   %{Zdynamiclib: -dylib \
+     %{Zbundle:%e-bundle not allowed with -dynamiclib} \
+     %{Zbundle_loader*:%e-bundle_loader not allowed with -dynamiclib} \
+     %{client_name*:%e-client_name not allowed with -dynamiclib} \
+     %{compatibility_version*:-dylib_compatibility_version %*} \
+     %{current_version*:-dylib_current_version %*} \
+     %{Zforce_cpusubtype_ALL:-arch %(darwin_arch)} \
+     %{!Zforce_cpusubtype_ALL: -arch %(darwin_subarch)} \
+     %{Zforce_flat_namespace:%e-force_flat_namespace not allowed with -dynamiclib} \
+     %{Zinstall_name*:-dylib_install_name %*} \
+     %{keep_private_externs:%e-keep_private_externs not allowed with -dynamiclib} \
+     %{private_bundle:%e-private_bundle not allowed with -dynamiclib} \
+    } \
+   %{Zall_load:-all_load} \
+   %{Zallowable_client*:-allowable_client %*} \
+   %{Zbind_at_load:-bind_at_load} \
+   %{Zarch_errors_fatal:-arch_errors_fatal} \
+   %{Zdead_strip:-dead_strip} \
+   %{Zno_dead_strip_inits_and_terms:-no_dead_strip_inits_and_terms} \
+   %{Zdylib_file*:-dylib_file %*} \
+   %{Zdynamic:-dynamic}\
+   %{Zexported_symbols_list*:-exported_symbols_list %*} \
+   %{Zflat_namespace:-flat_namespace} \
+   %{headerpad_max_install_names} \
+   %{Zimage_base*:-image_base %*} \
+   %{Zinit*:-init %*} \
+   %{!mmacosx-version-min=*:-macosx_version_min %(darwin_minversion)} \
+   %{mmacosx-version-min=*:-macosx_version_min %*} \
+   %{nomultidefs} \
+   %{Zmulti_module:-multi_module} %{Zsingle_module:-single_module} \
+   %{Zmultiply_defined*:-multiply_defined %*} \
+   %{!Zmultiply_defined*:%{shared-libgcc: \
+     %:version-compare(< 10.5 mmacosx-version-min= -multiply_defined) \
+     %:version-compare(< 10.5 mmacosx-version-min= suppress)}} \
+   %{Zmultiplydefinedunused*:-multiply_defined_unused %*} \
+   " PIE_SPEC " \
+   %{prebind} %{noprebind} %{nofixprebinding} %{prebind_all_twolevel_modules} \
+   %{read_only_relocs} \
+   %{sectcreate*} %{sectorder*} %{seg1addr*} %{segprot*} \
+   %{Zsegaddr*:-segaddr %*} \
+   %{Zsegs_read_only_addr*:-segs_read_only_addr %*} \
+   %{Zsegs_read_write_addr*:-segs_read_write_addr %*} \
+   %{Zseg_addr_table*: -seg_addr_table %*} \
+   %{Zfn_seg_addr_table_filename*:-seg_addr_table_filename %*} \
+   %{sub_library*} %{sub_umbrella*} \
+   " LINK_SYSROOT_SPEC " \
+   %{twolevel_namespace} %{twolevel_namespace_hints} \
+   %{Zumbrella*: -umbrella %*} \
+   %{undefined*} \
+   %{Zunexported_symbols_list*:-unexported_symbols_list %*} \
+   %{Zweak_reference_mismatches*:-weak_reference_mismatches %*} \
+   %{!Zweak_reference_mismatches*:-weak_reference_mismatches non-weak} \
+   %{X} \
+   %{y*} \
+   %{w} \
+   %{pagezero_size*} %{segs_read_*} %{seglinkedit} %{noseglinkedit}  \
+   %{sectalign*} %{sectobjectsymbols*} %{segcreate*} %{whyload} \
+   %{whatsloaded} %{dylinker_install_name*} \
+   %{dylinker} %{Mach} "
+
+
+/* Machine dependent libraries.  */
+
+#define LIB_SPEC "%{!static:-lSystem}"
+
+/* Support -mmacosx-version-min by supplying different (stub) libgcc_s.dylib
+   libraries to link against, and by not linking against libgcc_s on
+   earlier-than-10.3.9.
+
+   Note that by default, -lgcc_eh is not linked against!  This is
+   because in a future version of Darwin the EH frame information may
+   be in a new format, or the fallback routine might be changed; if
+   you want to explicitly link against the static version of those
+   routines, because you know you don't need to unwind through system
+   libraries, you need to explicitly say -static-libgcc.
+
+   If it is linked against, it has to be before -lgcc, because it may
+   need symbols from -lgcc.  */
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC						   \
+   "%{static-libgcc|static: -lgcc_eh -lgcc;				   \
+      shared-libgcc|fexceptions|fgnu-runtime:				   \
+       %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_s.10.4)	   \
+       %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5)   \
+       %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4)	   \
+       %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5)	   \
+       -lgcc ;								   \
+      :%:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \
+       %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5)   \
+       %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4)	   \
+       %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5)	   \
+       -lgcc }"
+
+/* We specify crt0.o as -lcrt0.o so that ld will search the library path.
+
+   crt3.o provides __cxa_atexit on systems that don't have it.  Since
+   it's only used with C++, which requires passing -shared-libgcc, key
+   off that to avoid unnecessarily adding a destructor to every
+   powerpc program built.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC							    \
+  "%{Zdynamiclib: %(darwin_dylib1) %{fgnu-tm: -lcrttms.o}}		    \
+   %{!Zdynamiclib:%{Zbundle:%{!static:					    \
+	%:version-compare(< 10.6 mmacosx-version-min= -lbundle1.o)	    \
+	%{fgnu-tm: -lcrttms.o}}}					    \
+     %{!Zbundle:%{pg:%{static:-lgcrt0.o}				    \
+                     %{!static:%{object:-lgcrt0.o}			    \
+                               %{!object:%{preload:-lgcrt0.o}		    \
+                                 %{!preload:-lgcrt1.o                       \
+                                 %:version-compare(>= 10.8 mmacosx-version-min= -no_new_main) \
+                                 %(darwin_crt2)}}}}    \
+                %{!pg:%{static:-lcrt0.o}				    \
+                      %{!static:%{object:-lcrt0.o}			    \
+                                %{!object:%{preload:-lcrt0.o}		    \
+                                  %{!preload: %(darwin_crt1)		    \
+					      %(darwin_crt2)}}}}}}	    \
+  %{shared-libgcc:%:version-compare(< 10.5 mmacosx-version-min= crt3.o%s)}"
+
+/* We want a destructor last in the list.  */
+#define TM_DESTRUCTOR "%{fgnu-tm: -lcrttme.o}"
+#define ENDFILE_SPEC TM_DESTRUCTOR
+
+#define DARWIN_EXTRA_SPECS						\
+  { "darwin_crt1", DARWIN_CRT1_SPEC },					\
+  { "darwin_dylib1", DARWIN_DYLIB1_SPEC },				\
+  { "darwin_minversion", DARWIN_MINVERSION_SPEC },
+
+#define DARWIN_DYLIB1_SPEC						\
+  "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o)		\
+   %:version-compare(>< 10.5 10.6 mmacosx-version-min= -ldylib1.10.5.o)"
+
+#define DARWIN_CRT1_SPEC						\
+  "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o)		\
+   %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lcrt1.10.5.o)	\
+   %:version-compare(>< 10.6 10.8 mmacosx-version-min= -lcrt1.10.6.o)	\
+   %{fgnu-tm: -lcrttms.o}"
+
+/* Default Darwin ASM_SPEC, very simple.  */
+#define ASM_SPEC "-arch %(darwin_arch) \
+  %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \
+  %{static}"
+
+/* Default ASM_DEBUG_SPEC.  Darwin's as cannot currently produce dwarf
+   debugging data.  */
+
+#define ASM_DEBUG_SPEC  "%{g*:%{!g0:%{!gdwarf*:--gstabs}}}"
+
+/* We still allow output of STABS.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+#define DWARF2_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#define DEBUG_FRAME_SECTION	"__DWARF,__debug_frame,regular,debug"
+#define DEBUG_INFO_SECTION	"__DWARF,__debug_info,regular,debug"
+#define DEBUG_ABBREV_SECTION	"__DWARF,__debug_abbrev,regular,debug"
+#define DEBUG_ARANGES_SECTION	"__DWARF,__debug_aranges,regular,debug"
+#define DEBUG_MACINFO_SECTION	"__DWARF,__debug_macinfo,regular,debug"
+#define DEBUG_LINE_SECTION	"__DWARF,__debug_line,regular,debug"
+#define DEBUG_LOC_SECTION	"__DWARF,__debug_loc,regular,debug"
+#define DEBUG_PUBNAMES_SECTION	"__DWARF,__debug_pubnames,regular,debug"
+#define DEBUG_PUBTYPES_SECTION	"__DWARF,__debug_pubtypes,regular,debug"
+#define DEBUG_STR_SECTION	"__DWARF,__debug_str,regular,debug"
+#define DEBUG_RANGES_SECTION	"__DWARF,__debug_ranges,regular,debug"
+#define DEBUG_MACRO_SECTION    "__DWARF,__debug_macro,regular,debug"
+
+#define TARGET_WANT_DEBUG_PUB_SECTIONS true
+
+#define TARGET_FORCE_AT_COMP_DIR true
+
+/* When generating stabs debugging, use N_BINCL entries.  */
+
+#define DBX_USE_BINCL
+
+/* There is no limit to the length of stabs strings.  */
+
+#define DBX_CONTIN_LENGTH 0
+
+/* gdb needs a null N_SO at the end of each file for scattered loading.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+/* GCC's definition of 'one_only' is the same as its definition of 'weak'.  */
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* Mach-O supports 'weak imports', and 'weak definitions' in coalesced
+   sections.  machopic_select_section ensures that weak variables go in
+   coalesced sections.  Weak aliases (or any other kind of aliases) are
+   not supported.  Weak symbols that aren't visible outside the .s file
+   are not supported.  */
+#define ASM_WEAKEN_DECL(FILE, DECL, NAME, ALIAS)			\
+  do {									\
+    if (ALIAS)								\
+      {									\
+	warning (0, "alias definitions not supported in Mach-O; ignored");	\
+	break;								\
+      }									\
+ 									\
+    if (! DECL_EXTERNAL (DECL) && TREE_PUBLIC (DECL))			\
+      targetm.asm_out.globalize_label (FILE, NAME);			\
+    if (DECL_EXTERNAL (DECL))						\
+      fputs ("\t.weak_reference ", FILE);				\
+    else if (lookup_attribute ("weak_import", DECL_ATTRIBUTES (DECL)))	\
+      break;								\
+    else if (TREE_PUBLIC (DECL))					\
+      fputs ("\t.weak_definition ", FILE);				\
+    else								\
+      break;								\
+    assemble_name (FILE, NAME);						\
+    fputc ('\n', FILE);							\
+  } while (0)
+
+/* Darwin has the pthread routines in libSystem, which every program
+   links to, so there's no need for weak-ness for that.  */
+#define GTHREAD_USE_WEAK 0
+
+/* The Darwin linker doesn't want coalesced symbols to appear in
+   a static archive's table of contents. */
+#undef TARGET_WEAK_NOT_IN_ARCHIVE_TOC
+#define TARGET_WEAK_NOT_IN_ARCHIVE_TOC 1
+
+/* On Darwin, we don't (at the time of writing) have linkonce sections
+   with names, so it's safe to make the class data not comdat.  */
+#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT hook_bool_void_false
+
+/* For efficiency, on Darwin the RTTI information that is always
+   emitted in the standard C++ library should not be COMDAT.  */
+#define TARGET_CXX_LIBRARY_RTTI_COMDAT hook_bool_void_false
+
+/* We make exception information linkonce. */
+#undef TARGET_USES_WEAK_UNWIND_INFO
+#define TARGET_USES_WEAK_UNWIND_INFO 1
+
+/* We need to use a nonlocal label for the start of an EH frame: the
+   Darwin linker requires that a coalesced section start with a label.
+   Unfortunately, it also requires that 'debug' sections don't contain
+   labels.  */
+#undef FRAME_BEGIN_LABEL
+#define FRAME_BEGIN_LABEL (for_eh ? "EH_frame" : "Lframe")
+
+/* Emit a label for the FDE corresponding to DECL.  EMPTY means
+   emit a label for an empty FDE. */
+#define TARGET_ASM_EMIT_UNWIND_LABEL darwin_emit_unwind_label
+
+/* Emit a label to separate the exception table.  */
+#define TARGET_ASM_EMIT_EXCEPT_TABLE_LABEL darwin_emit_except_table_label
+
+/* Our profiling scheme doesn't LP labels and counter words.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#undef	INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP
+
+#undef	INVOKE__main
+
+#define TARGET_ASM_CONSTRUCTOR  machopic_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   machopic_asm_out_destructor
+
+/* Always prefix with an underscore.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* A dummy symbol that will be replaced with the function base name.  */
+#define MACHOPIC_FUNCTION_BASE_NAME "<pic base>"
+
+/* Don't output a .file directive.  That is only used by the assembler for
+   error reporting.  */
+#undef	TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false
+
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_file_end
+
+/* Because Mach-O relocations have a counter from 1 to 255 for the
+   section number they apply to, it is necessary to output all
+   normal sections before the LTO sections, to make sure that the
+   sections that may have relocations always have a section number
+   smaller than 255.  */
+#undef  TARGET_ASM_LTO_START
+#define TARGET_ASM_LTO_START darwin_asm_lto_start
+#undef  TARGET_ASM_LTO_END
+#define TARGET_ASM_LTO_END darwin_asm_lto_end
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", SIZE)
+
+/* Give ObjC methods pretty symbol names.  */
+
+#undef	OBJC_GEN_METHOD_LABEL
+#define OBJC_GEN_METHOD_LABEL(BUF,IS_INST,CLASS_NAME,CAT_NAME,SEL_NAME,NUM) \
+  do { if (CAT_NAME)							\
+	 sprintf (BUF, "%c[%s(%s) %s]", (IS_INST) ? '-' : '+',		\
+		  (CLASS_NAME), (CAT_NAME), (SEL_NAME));		\
+       else								\
+	 sprintf (BUF, "%c[%s %s]", (IS_INST) ? '-' : '+',		\
+		  (CLASS_NAME), (SEL_NAME));				\
+     } while (0)
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \
+	darwin_asm_declare_object_name ((FILE), (NAME), (DECL))
+
+/* The RTTI data (e.g., __ti4name) is common and public (and static),
+   but it does need to be referenced via indirect PIC data pointers.
+   The machopic_define_symbol calls are telling the machopic subsystem
+   that the name *is* defined in this module, so it doesn't need to
+   make them indirect.  */
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  do {									\
+    const char *xname = NAME;						\
+    if (GET_CODE (XEXP (DECL_RTL (DECL), 0)) != SYMBOL_REF)		\
+      xname = IDENTIFIER_POINTER (DECL_NAME (DECL));			\
+    if (! DECL_WEAK (DECL)						\
+        && ((TREE_STATIC (DECL)						\
+	     && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL)))		\
+            || DECL_INITIAL (DECL)))					\
+        machopic_define_symbol (DECL_RTL (DECL));			\
+    if ((TREE_STATIC (DECL)						\
+	 && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL)))		\
+        || DECL_INITIAL (DECL))						\
+      (* targetm.encode_section_info) (DECL, DECL_RTL (DECL), false);	\
+    ASM_OUTPUT_FUNCTION_LABEL (FILE, xname, DECL);			\
+  } while (0)
+
+#undef TARGET_ASM_DECLARE_CONSTANT_NAME
+#define TARGET_ASM_DECLARE_CONSTANT_NAME darwin_asm_declare_constant_name
+
+/* Wrap new method names in quotes so the assembler doesn't gag.
+   Make Objective-C internal symbols local and in doing this, we need 
+   to accommodate the name mangling done by c++ on file scope locals.  */
+
+int darwin_label_is_anonymous_local_objc_name (const char *name);
+
+#undef	ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE,NAME)					     \
+  do {									     \
+       const char *xname = (NAME);					     \
+       if (! strcmp (xname, MACHOPIC_FUNCTION_BASE_NAME))		     \
+         machopic_output_function_base_name(FILE);                           \
+       else if (xname[0] == '&' || xname[0] == '*')			     \
+         {								     \
+           int len = strlen (xname);					     \
+	   if (len > 6 && !strcmp ("$stub", xname + len - 5))		     \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   else if (len > 7 && !strcmp ("$stub\"", xname + len - 6))	     \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   else if (len > 14 && !strcmp ("$non_lazy_ptr", xname + len - 13)) \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   else if (len > 15 && !strcmp ("$non_lazy_ptr\"", xname + len - 14)) \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   if (xname[1] != '"' && name_needs_quotes (&xname[1]))	     \
+	     fprintf (FILE, "\"%s\"", &xname[1]);			     \
+	   else								     \
+	     fputs (&xname[1], FILE); 					     \
+	 }								     \
+       else if (xname[0] == '+' || xname[0] == '-')			     \
+         fprintf (FILE, "\"%s\"", xname);				     \
+       else if (darwin_label_is_anonymous_local_objc_name (xname))	     \
+         fprintf (FILE, "L%s", xname);					     \
+       else if (xname[0] != '"' && name_needs_quotes (xname))		     \
+	 asm_fprintf (FILE, "\"%U%s\"", xname);				     \
+       else								     \
+         asm_fprintf (FILE, "%U%s", xname);				     \
+  } while (0)
+
+/* Output before executable code.  */
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable data.  */
+
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#undef	ALIGN_ASM_OP
+#define ALIGN_ASM_OP		".align"
+
+#undef	ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t%s\t%d\n", ALIGN_ASM_OP, (LOG))
+
+/* The maximum alignment which the object file format can support in
+   bits.  For Mach-O, this is 2^15 bytes.  */
+
+#undef	MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (0x8000 * 8)
+
+#define L2_MAX_OFILE_ALIGNMENT 15
+
+/*  These are the three variants that emit referenced blank space.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+	darwin_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+#undef	ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+	darwin_asm_output_aligned_decl_local				\
+				  ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+#undef  ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN)	\
+	darwin_asm_output_aligned_decl_common				\
+				   ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* The generic version, archs should over-ride where required.  */
+#define MACHOPIC_NL_SYMBOL_PTR_SECTION ".non_lazy_symbol_pointer"
+
+/* Declare the section variables.  */
+#ifndef USED_FOR_TARGET
+enum darwin_section_enum {
+#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC) NAME,
+#include "darwin-sections.def"
+#undef DEF_SECTION
+  NUM_DARWIN_SECTIONS
+};
+extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
+#endif
+
+#undef	TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION machopic_select_section
+
+#undef	TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION darwin_function_section
+
+#undef	TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS
+#define TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS \
+	darwin_function_switched_text_sections
+
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION machopic_select_rtx_section
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION darwin_unique_section
+#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#undef  TARGET_ASM_TM_CLONE_TABLE_SECTION
+#define TARGET_ASM_TM_CLONE_TABLE_SECTION darwin_tm_clone_table_section
+
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK machopic_reloc_rw_mask
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+#define TARGET_ASM_GLOBALIZE_LABEL darwin_globalize_label
+
+/* Emit an assembler directive to set visibility for a symbol.  Used
+   to support visibility attribute and Darwin's private extern
+   feature.  */
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY darwin_assemble_visibility
+
+/* Extra attributes for Darwin.  */
+#define SUBTARGET_ATTRIBUTE_TABLE					     \
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,     \
+       affects_type_identity } */						     \
+  { "apple_kext_compatibility", 0, 0, false, true, false,		     \
+    darwin_handle_kext_attribute, false },				     \
+  { "weak_import", 0, 0, true, false, false,				     \
+    darwin_handle_weak_import_attribute, false }
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM))
+
+#undef TARGET_ASM_MARK_DECL_PRESERVED
+#define TARGET_ASM_MARK_DECL_PRESERVED darwin_mark_decl_preserved
+
+/* Set on a symbol with SYMBOL_FLAG_FUNCTION or
+   MACHO_SYMBOL_FLAG_VARIABLE to indicate that the function or
+   variable has been defined in this translation unit.
+   When porting Mach-O to new architectures you need to make
+   sure these aren't clobbered by the backend.  */
+
+#define MACHO_SYMBOL_FLAG_VARIABLE (SYMBOL_FLAG_MACH_DEP)
+#define MACHO_SYMBOL_FLAG_DEFINED ((SYMBOL_FLAG_MACH_DEP) << 1)
+
+/* Set on a symbol to indicate when fix-and-continue style code
+   generation is being used and the symbol refers to a static symbol
+   that should be rebound from new instances of a translation unit to
+   the original instance of the data.  */
+
+#define MACHO_SYMBOL_STATIC ((SYMBOL_FLAG_MACH_DEP) << 2)
+
+/* Symbolic names for various things we might know about a symbol.  */
+
+enum machopic_addr_class {
+  MACHOPIC_UNDEFINED,
+  MACHOPIC_DEFINED_DATA,
+  MACHOPIC_UNDEFINED_DATA,
+  MACHOPIC_DEFINED_FUNCTION,
+  MACHOPIC_UNDEFINED_FUNCTION
+};
+
+/* Macros defining the various PIC cases.  */
+
+#undef  MACHO_DYNAMIC_NO_PIC_P
+#define MACHO_DYNAMIC_NO_PIC_P	(TARGET_MACHO_DYNAMIC_NO_PIC)
+#undef  MACHOPIC_INDIRECT
+#define MACHOPIC_INDIRECT	(flag_pic || MACHO_DYNAMIC_NO_PIC_P)
+#define MACHOPIC_JUST_INDIRECT	(MACHO_DYNAMIC_NO_PIC_P)
+#undef  MACHOPIC_PURE
+#define MACHOPIC_PURE		(flag_pic && ! MACHO_DYNAMIC_NO_PIC_P)
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO  darwin_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  default_strip_name_encoding
+
+#define GEN_BINDER_NAME_FOR_STUB(BUF,STUB,STUB_LENGTH)		\
+  do {								\
+    const char *const stub_ = (STUB);				\
+    char *buffer_ = (BUF);					\
+    strcpy (buffer_, stub_);					\
+    if (stub_[0] == '"')					\
+      {								\
+	strcpy (buffer_ + (STUB_LENGTH) - 1, "_binder\"");	\
+      }								\
+    else							\
+      {								\
+	strcpy (buffer_ + (STUB_LENGTH), "_binder");		\
+      }								\
+  } while (0)
+
+#define GEN_SYMBOL_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH)	\
+  do {								\
+    const char *const symbol_ = (SYMBOL);			\
+    char *buffer_ = (BUF);					\
+    if (name_needs_quotes (symbol_) && symbol_[0] != '"')	\
+      {								\
+	  sprintf (buffer_, "\"%s\"", symbol_);			\
+      }								\
+    else							\
+      {								\
+	strcpy (buffer_, symbol_);				\
+      }								\
+  } while (0)
+
+/* Given a symbol name string, create the lazy pointer version
+   of the symbol name.  */
+
+#define GEN_LAZY_PTR_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH)	\
+  do {								\
+    const char *symbol_ = (SYMBOL);                             \
+    char *buffer_ = (BUF);					\
+    if (symbol_[0] == '"')					\
+      {								\
+        strcpy (buffer_, "\"L");				\
+        strcpy (buffer_ + 2, symbol_ + 1);			\
+	strcpy (buffer_ + (SYMBOL_LENGTH), "$lazy_ptr\"");	\
+      }								\
+    else if (name_needs_quotes (symbol_))			\
+      {								\
+        strcpy (buffer_, "\"L");				\
+        strcpy (buffer_ + 2, symbol_);				\
+	strcpy (buffer_ + (SYMBOL_LENGTH) + 2, "$lazy_ptr\"");	\
+      }								\
+    else							\
+      {								\
+        strcpy (buffer_, "L");					\
+        strcpy (buffer_ + 1, symbol_);				\
+	strcpy (buffer_ + (SYMBOL_LENGTH) + 1, "$lazy_ptr");	\
+      }								\
+  } while (0)
+
+#define EH_FRAME_SECTION_NAME   "__TEXT"
+#define EH_FRAME_SECTION_ATTR ",coalesced,no_toc+strip_static_syms+live_support"
+
+/* Java runtime class list.  */
+#define JCR_SECTION_NAME "__DATA,jcr,regular,no_dead_strip"
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)  \
+  (((CODE) == 2 && (GLOBAL) == 1) \
+   ? (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4) : \
+     ((CODE) == 1 || (GLOBAL) == 0) ? DW_EH_PE_pcrel : DW_EH_PE_absptr)
+
+#define ASM_OUTPUT_DWARF_DELTA(FILE,SIZE,LABEL1,LABEL2)  \
+  darwin_asm_output_dwarf_delta (FILE, SIZE, LABEL1, LABEL2)
+
+#define ASM_OUTPUT_DWARF_OFFSET(FILE,SIZE,LABEL,BASE)  \
+  darwin_asm_output_dwarf_offset (FILE, SIZE, LABEL, BASE)
+
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(ASM_OUT_FILE, ENCODING, SIZE, ADDR, DONE)	\
+      if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) {				\
+	darwin_non_lazy_pcrel (ASM_OUT_FILE, ADDR);					\
+	goto DONE;									\
+      }
+
+/* Experimentally, putting jump tables in text is faster on SPEC.
+   Also this is needed for correctness for coalesced functions.  */
+
+#ifndef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+#endif
+
+#define TARGET_TERMINATE_DW2_EH_FRAME_INFO false
+
+#define TARGET_ASM_INIT_SECTIONS darwin_init_sections
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION darwin_asm_named_section
+
+#define DARWIN_REGISTER_TARGET_PRAGMAS()			\
+  do {								\
+    if (!flag_preprocess_only)					\
+      cpp_register_pragma (parse_in, NULL, "mark",		\
+			   darwin_pragma_ignore, false);	\
+    c_register_pragma (0, "options", darwin_pragma_options);	\
+    c_register_pragma (0, "segment", darwin_pragma_ignore);	\
+    c_register_pragma (0, "unused", darwin_pragma_unused);	\
+    c_register_pragma (0, "ms_struct", darwin_pragma_ms_struct); \
+  } while (0)
+
+#undef ASM_APP_ON
+#define ASM_APP_ON ""
+#undef ASM_APP_OFF
+#define ASM_APP_OFF ""
+
+void darwin_register_frameworks (const char *, const char *, int);
+void darwin_register_objc_includes (const char *, const char *, int);
+#define TARGET_EXTRA_PRE_INCLUDES darwin_register_objc_includes
+#define TARGET_EXTRA_INCLUDES darwin_register_frameworks
+
+void add_framework_path (char *);
+#define TARGET_OPTF add_framework_path
+
+#define TARGET_POSIX_IO
+
+#define WINT_TYPE "int"
+
+/* Every program on darwin links against libSystem which contains the pthread
+   routines, so there's no need to explicitly call out when doing threaded
+   work.  */
+
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS ""
+#undef GTM_SELF_SPECS
+#define GTM_SELF_SPECS ""
+
+/* Darwin disables section anchors by default.  
+   They should be enabled per arch where support exists in that arch.  */
+#define TARGET_ASM_OUTPUT_ANCHOR NULL
+#define DARWIN_SECTION_ANCHORS 0
+
+#define HAVE_ENABLE_EXECUTE_STACK
+
+/* For Apple KEXTs, we make the constructors return this to match gcc
+   2.95.  */
+#define TARGET_CXX_CDTOR_RETURNS_THIS (darwin_kextabi_p)
+#define TARGET_KEXTABI flag_apple_kext
+
+/* We have target-specific builtins.  */
+#define SUBTARGET_FOLD_BUILTIN darwin_fold_builtin
+
+#define TARGET_N_FORMAT_TYPES 1
+#define TARGET_FORMAT_TYPES darwin_additional_format_types
+
+#ifndef USED_FOR_TARGET
+extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **);
+#define GCC_DRIVER_HOST_INITIALIZATION \
+  darwin_driver_init (&decoded_options_count, &decoded_options)
+#endif
+
+/* The Apple assembler and linker do not support constructor priorities.  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY 0
+
+/* When building cross-compilers (and native crosses) we shall default to 
+   providing an osx-version-min of this unless overridden by the User.  */
+#define DEF_MIN_OSX_VERSION "10.4"
+
+#endif /* CONFIG_DARWIN_H */
diff --git a/gcc-4.9/gcc/config/darwin.opt b/gcc-4.9/gcc/config/darwin.opt
new file mode 100644
index 000000000..cedfb7a5b
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin.opt
@@ -0,0 +1,393 @@
+; Processor-independent options for Darwin.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Various linker options have a -Z added so that they can get to specs
+; processing without interference.  Note that an option name with a
+; prefix that matches another option name, that also takes an
+; argument, being mapped to a -Z linker option, needs to be modified
+; so the prefix is different, otherwise a '*' after the shorter option
+; will match with the longer one.
+
+all_load
+Driver Alias(Zall_load)
+
+allowable_client
+Driver Separate Alias(Zallowable_client)
+
+arch
+Driver RejectNegative Separate
+
+arch_errors_fatal
+Driver Alias(Zarch_errors_fatal)
+
+bind_at_load
+Driver Alias(Zbind_at_load)
+
+bundle
+Driver Alias(Zbundle)
+
+bundle_loader
+Driver Separate Alias(Zbundle_loader)
+
+dead_strip
+Driver Alias(Zdead_strip)
+
+dependency-file
+C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs)
+
+dylib_file
+Driver Separate Alias(Zdylib_file)
+
+dylinker
+Driver
+
+dynamic
+Driver Alias(Zdynamic)
+
+dynamiclib
+Driver Alias(Zdynamiclib)
+
+exported_symbols_list
+Driver Separate Alias(Zexported_symbols_list)
+
+filelist
+Driver RejectNegative Separate
+
+findirect-virtual-calls
+Driver RejectNegative
+
+flat_namespace
+Driver RejectNegative Alias(Zflat_namespace)
+
+force_cpusubtype_ALL
+Driver RejectNegative Alias(Zforce_cpusubtype_ALL)
+
+force_flat_namespace
+Driver RejectNegative Alias(Zforce_flat_namespace)
+
+framework
+Driver RejectNegative Separate
+
+fterminated-vtables
+Driver RejectNegative
+
+gfull
+Driver
+
+gused
+Driver
+
+headerpad_max_install_names
+Driver
+
+image_base
+Driver Separate Alias(Zimage_base)
+
+init
+Driver Separate Alias(Zinit)
+
+install_name
+Driver Separate Alias(Zinstall_name)
+
+keep_private_externs
+Driver
+
+mconstant-cfstrings
+Target Report Var(darwin_constant_cfstrings) Init(1)
+Generate compile-time CFString objects
+
+multi_module
+Driver RejectNegative Alias(Zmulti_module)
+
+multiply_defined
+Driver RejectNegative Separate Alias(Zmultiply_defined)
+
+multiply_defined_unused
+Driver RejectNegative Separate Alias(Zmultiplydefinedunused)
+
+no_dead_strip_inits_and_terms
+Driver Alias(Zno_dead_strip_inits_and_terms)
+
+nofixprebinding
+Driver
+
+nomultidefs
+Driver
+
+noprebind
+Driver
+
+noseglinkedit
+Driver
+
+object
+Driver
+
+prebind
+Driver
+
+prebind_all_twolevel_modules
+Driver
+
+preload
+Driver
+
+private_bundle
+Driver
+
+pthread
+Driver
+
+seg_addr_table
+Driver Separate Alias(Zseg_addr_table)
+
+seg_addr_table_filename
+Driver Separate Alias(Zfn_seg_addr_table_filename)
+
+segaddr
+Driver Separate Args(2) Alias(Zsegaddr)
+
+seglinkedit
+Driver
+
+segs_read_only_addr
+Driver Separate Alias(Zsegs_read_only_addr)
+
+segs_read_write_addr
+Driver Separate Alias(Zsegs_read_write_addr)
+
+single_module
+Driver Alias(Zsingle_module)
+
+twolevel_namespace
+Driver
+
+twolevel_namespace_hints
+Driver
+
+umbrella
+Driver Separate Alias(Zumbrella)
+
+unexported_symbols_list
+Driver Separate Alias(Zunexported_symbols_list)
+
+weak_reference_mismatches
+Driver Separate Alias(Zweak_reference_mismatches)
+
+whatsloaded
+Driver
+
+whyload
+Driver
+
+y
+Driver Joined
+
+Mach
+Driver
+
+Wnonportable-cfstrings
+Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning
+Warn if constant CFString objects contain non-portable characters
+
+; Use new-style pic stubs if this is true, x86 only so far.
+matt-stubs
+Target Report Var(darwin_macho_att_stub) Init(1)
+Generate AT&T-style stubs for Mach-O
+
+mdynamic-no-pic
+Target Common Report Mask(MACHO_DYNAMIC_NO_PIC)
+Generate code suitable for executables (NOT shared libs)
+
+mfix-and-continue
+Target Report Var(darwin_fix_and_continue)
+Generate code suitable for fast turn around debugging
+
+; The Init here is for the convenience of GCC developers, so that cc1
+; and cc1plus don't crash if no -mmacosx-version-min is passed.  The
+; driver will always pass a -mmacosx-version-min, so in normal use the
+; Init is never used.  Useful for setting the OS on which people
+; usually debug.
+mmacosx-version-min=
+Target Joined Report Var(darwin_macosx_version_min) Init("10.6")
+The earliest MacOS X version on which this program will run
+
+mone-byte-bool
+Target RejectNegative Report Var(darwin_one_byte_bool)
+Set sizeof(bool) to 1
+
+fapple-kext
+Target Report C++ Var(flag_apple_kext)
+Generate code for darwin loadable kernel extensions
+
+mkernel
+Target Report Var(flag_mkernel)
+Generate code for the kernel or loadable kernel extensions
+
+iframework
+Target RejectNegative C ObjC C++ ObjC++ Joined Separate 
+-iframework <dir>	Add <dir> to the end of the system framework include path
+
+X
+Driver
+
+Zall_load
+Driver
+
+Zallowable_client
+Driver Separate
+
+Zarch_errors_fatal
+Driver
+
+Zbind_at_load
+Driver
+
+Zbundle
+Driver
+
+Zbundle_loader
+Driver Separate
+
+Zdead_strip
+Driver
+
+Zdylib_file
+Driver Separate
+
+Zdynamic
+Driver
+
+Zdynamiclib
+Driver
+
+Zexported_symbols_list
+Driver Separate
+
+Zfn_seg_addr_table_filename
+Driver Separate
+
+Zflat_namespace
+Driver
+
+Zforce_cpusubtype_ALL
+Driver
+
+Zforce_flat_namespace
+Driver
+
+Zimage_base
+Driver Separate
+
+Zinit
+Driver Separate
+
+Zinstall_name
+Driver Separate
+
+Zmulti_module
+Driver
+
+Zmultiply_defined
+Driver Separate
+
+Zmultiplydefinedunused
+Driver Separate
+
+Zno_dead_strip_inits_and_terms
+Driver
+
+Zseg_addr_table
+Driver Separate
+
+Zsegaddr
+Driver Separate Args(2)
+
+Zsegs_read_only_addr
+Driver Separate
+
+Zsegs_read_write_addr
+Driver Separate
+
+Zsingle_module
+Driver
+
+Zumbrella
+Driver Separate
+
+Zunexported_symbols_list
+Driver Separate
+
+Zweak_reference_mismatches
+Driver Separate
+
+client_name
+Driver Separate
+
+compatibility_version
+Driver Separate
+
+current_version
+Driver Separate
+
+dylinker_install_name
+Driver Separate
+
+pagezero_size
+Driver Separate
+
+read_only_relocs
+Driver Separate
+
+sectalign
+Driver Separate Args(3)
+
+sectcreate
+Driver Separate Args(3)
+
+sectobjectsymbols
+Driver Separate Args(2)
+
+sectorder
+Driver Separate Args(3)
+
+seg1addr
+Driver Separate
+
+segcreate
+Driver Separate Args(3)
+
+segprot
+Driver Separate Args(3)
+
+segs_read_only_addr
+Driver Separate
+
+segs_read_write_addr
+Driver Separate
+
+sub_library
+Driver Separate
+
+sub_umbrella
+Driver Separate
+
+undefined
+Driver Separate
diff --git a/gcc-4.9/gcc/config/darwin10.h b/gcc-4.9/gcc/config/darwin10.h
new file mode 100644
index 000000000..9eb60dbfb
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin10.h
@@ -0,0 +1,34 @@
+/* Target definitions for Darwin (Mac OS X) systems.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Jack Howarth <howarth@bromo.med.uc.edu>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Fix PR41260 by passing -no_compact_unwind on darwin10 and later until
+   unwinder in libSystem is fixed to digest new epilog unwinding notes.
+
+   Fix PR47558 by linking against libSystem ahead of libgcc_ext. */
+#undef  LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+"%:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) \
+   %{!static:%{!static-libgcc: \
+      %:version-compare(>= 10.6 mmacosx-version-min= -lSystem) } } \
+   %{fno-pic|fno-PIC|fno-pie|fno-PIE|fapple-kext|mkernel|static|mdynamic-no-pic: \
+      %:version-compare(>= 10.7 mmacosx-version-min= -no_pie) } %G %L"
+
+#undef DEF_MIN_OSX_VERSION
+#define DEF_MIN_OSX_VERSION "10.6"
diff --git a/gcc-4.9/gcc/config/darwin9.h b/gcc-4.9/gcc/config/darwin9.h
new file mode 100644
index 000000000..675001cee
--- /dev/null
+++ b/gcc-4.9/gcc/config/darwin9.h
@@ -0,0 +1,65 @@
+/* Target definitions for Darwin (Mac OS X) systems.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Apple Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Prefer DWARF2.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DARWIN_PREFER_DWARF
+
+/* Since DWARF2 is default, conditions for running dsymutil are different.  */
+#undef DSYMUTIL_SPEC
+#define DSYMUTIL_SPEC \
+   "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %{v} \
+    %{g*:%{!gstabs*:%{!g0: -idsym}}}\
+    %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.f|.f90|.f95|.f03|.f77|.for|.F|.F90|.F95|.F03: \
+    %{g*:%{!gstabs*:%{!g0: -dsym}}}}}}}}}}}"
+
+/* Tell collect2 to run dsymutil for us as necessary.  */
+#define COLLECT_RUN_DSYMUTIL 1
+
+#undef PIE_SPEC
+#define PIE_SPEC \
+  "%{fpie|pie|fPIE: \
+     %{mdynamic-no-pic: %n'-mdynamic-no-pic' overrides '-pie', '-fpie' or '-fPIE'; \
+      :-pie}}"
+
+/* Only ask as for debug data if the debug style is stabs (since as doesn't
+   yet generate dwarf.)  */
+
+#undef  ASM_DEBUG_SPEC
+#define ASM_DEBUG_SPEC  "%{g*:%{!g0:%{gstabs:--gstabs}}}"
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do {									\
+    unsigned HOST_WIDE_INT _new_size = (SIZE);				\
+    fprintf ((FILE), "\t.comm ");						\
+    assemble_name ((FILE), (NAME));					\
+    if (_new_size == 0) _new_size = 1;					\
+    fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	     _new_size, floor_log2 ((ALIGN) / BITS_PER_UNIT));		\
+  } while (0)
+
+#undef DEF_MIN_OSX_VERSION
+#define DEF_MIN_OSX_VERSION "10.5"
+
+#undef STACK_CHECK_STATIC_BUILTIN
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc-4.9/gcc/config/dbx.h b/gcc-4.9/gcc/config/dbx.h
new file mode 100644
index 000000000..1b68bcd9a
--- /dev/null
+++ b/gcc-4.9/gcc/config/dbx.h
@@ -0,0 +1,27 @@
+/* Prefer DBX (stabs) debugging information.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file causes gcc to prefer using DBX (stabs) debugging
+   information.  The configure script will add a #include of this file
+   to tm.h when --with-stabs is used for certain targets.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
diff --git a/gcc-4.9/gcc/config/dbxcoff.h b/gcc-4.9/gcc/config/dbxcoff.h
new file mode 100644
index 000000000..c6a62c475
--- /dev/null
+++ b/gcc-4.9/gcc/config/dbxcoff.h
@@ -0,0 +1,62 @@
+/* Definitions needed when using stabs embedded in COFF sections.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file may be included by any COFF target which wishes to
+   support -gstabs generating stabs in sections, as produced by gas
+   and understood by gdb.  */
+
+/* Output DBX (stabs) debugging information if doing -gstabs.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+/* Generate SDB debugging information by default.  */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE SDB_DEBUG
+#endif
+
+/* Be function-relative for block and source line stab directives.  */
+
+#define DBX_BLOCKS_FUNCTION_RELATIVE 1
+
+/* but, to make this work, functions must appear prior to line info.  */
+
+#define DBX_FUNCTION_FIRST
+
+/* Generate a blank trailing N_SO to mark the end of the .o file, since
+   we can't depend upon the linker to mark .o file boundaries with
+   embedded stabs.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+/* Like block addresses, stabs line numbers are relative to the
+   current function.  */
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+/* When generating stabs debugging, use N_BINCL entries.  */
+
+#undef DBX_USE_BINCL
+#define DBX_USE_BINCL
+
+/* There is no limit to the length of stabs strings.  */
+
+#ifndef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 0
+#endif
diff --git a/gcc-4.9/gcc/config/dbxelf.h b/gcc-4.9/gcc/config/dbxelf.h
new file mode 100644
index 000000000..4819cfa79
--- /dev/null
+++ b/gcc-4.9/gcc/config/dbxelf.h
@@ -0,0 +1,68 @@
+/* Definitions needed when using stabs embedded in ELF sections.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file may be included by any ELF target which wishes to
+   support -gstabs generating stabs in sections, as produced by gas
+   and understood by gdb.  */
+
+#ifndef GCC_DBX_ELF_H
+#define GCC_DBX_ELF_H
+
+/* Output DBX (stabs) debugging information if doing -gstabs.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+/* Make LBRAC and RBRAC addresses relative to the start of the
+   function.  The native Solaris stabs debugging format works this
+   way, gdb expects it, and it reduces the number of relocation
+   entries...  */
+
+#define DBX_BLOCKS_FUNCTION_RELATIVE 1
+
+/* ... but, to make this work, functions must appear prior to line info.  */
+
+#define DBX_FUNCTION_FIRST
+
+/* When generating stabs debugging, use N_BINCL entries.  */
+
+#define DBX_USE_BINCL
+
+/* There is no limit to the length of stabs strings.  */
+
+#ifndef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 0
+#endif
+
+/* Like block addresses, stabs line numbers are relative to the
+   current function.  */
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+/* Generate a blank trailing N_SO to mark the end of the .o file, since
+   we can't depend upon the linker to mark .o file boundaries with
+   embedded stabs.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+#endif /* ! GCC_DBX_ELF_H */
diff --git a/gcc-4.9/gcc/config/default-c.c b/gcc-4.9/gcc/config/default-c.c
new file mode 100644
index 000000000..26c41f409
--- /dev/null
+++ b/gcc-4.9/gcc/config/default-c.c
@@ -0,0 +1,33 @@
+/* Default C-family target hooks initializer.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "c-family/c-target.h"
+#include "c-family/c-target-def.h"
+
+/* Do not include tm.h or tm_p.h here; if it is useful for a target to
+   define some macros for the initializer in a header without defining
+   targetcm itself (for example, because of interactions with some
+   hooks depending on the target OS and others on the target
+   architecture), create a separate tm_c.h for only the relevant
+   definitions.  */
+
+struct gcc_targetcm targetcm = TARGETCM_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/elfos.h b/gcc-4.9/gcc/config/elfos.h
new file mode 100644
index 000000000..1fce7011b
--- /dev/null
+++ b/gcc-4.9/gcc/config/elfos.h
@@ -0,0 +1,438 @@
+/* elfos.h  --  operating system specific defines to be used when
+   targeting GCC for some generic ELF system
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Based on svr4.h contributed by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJFMT_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__ELF__");		\
+    }						\
+  while (0)
+
+/* Define a symbol indicating that we are using elfos.h.
+   Some CPU specific configuration files use this.  */
+#define USING_ELFOS_H
+
+/* The prefix to add to user-visible assembler symbols.
+
+   For ELF systems the convention is *not* to prepend a leading
+   underscore onto user-level symbol names.  */
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* The biggest alignment supported by ELF in bits. 32-bit ELF 
+   supports section alignment up to (0x80000000 * 8), while 
+   64-bit ELF supports (0x8000000000000000 * 8). If this macro 
+   is not defined, the default is the largest alignment supported 
+   by 32-bit ELF and representable on a 32-bit host. Use this
+   macro to limit the alignment which can be specified using
+   the `__attribute__ ((aligned (N)))' construct.  */
+#ifndef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (((unsigned int) 1 << 28) * 8)
+#endif
+
+/* Use periods rather than dollar signs in special g++ assembler names.  */
+
+#define NO_DOLLAR_IN_LABEL
+
+/* Writing `int' for a bit-field forces int alignment for the structure.  */
+
+#ifndef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#endif
+
+/* All ELF targets can support DWARF-2.  */
+
+#define DWARF2_DEBUGGING_INFO 1
+
+/* The GNU tools operate better with dwarf2, and it is required by some
+   psABI's.  Since we don't have any native tools to be compatible with,
+   default to dwarf2.  */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+
+/* All SVR4 targets use the ELF object file format.  */
+#define OBJECT_FORMAT_ELF
+
+
+/* Output #ident as a .ident.  */
+
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT default_asm_output_ident_directive
+
+#undef  SET_ASM_OP
+#define SET_ASM_OP	"\t.set\t"
+
+/* Most svr4 assemblers want a .file directive at the beginning of
+   their input file.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This is how to allocate empty space in some section.  The .zero
+   pseudo-op is used for this on most svr4 assemblers.  */
+
+#define SKIP_ASM_OP	"\t.zero\t"
+
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+   fprintf ((FILE), "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+	    SKIP_ASM_OP, (SIZE))
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.
+
+   For most svr4 systems, the convention is that any symbol which begins
+   with a period is not put into the linker symbol table by the assembler.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
+  do								\
+    {								\
+      char *__p;						\
+      (LABEL)[0] = '*';						\
+      (LABEL)[1] = '.';						\
+      __p = stpcpy (&(LABEL)[2], PREFIX);			\
+      sprint_ul (__p, (unsigned long) (NUM));			\
+    }								\
+  while (0)
+
+/* Output the label which precedes a jumptable.  Note that for all svr4
+   systems where we actually generate jumptables (which is to say every
+   svr4 target except i386, where we use casesi instead) we put the jump-
+   tables into the .rodata section and since other stuff could have been
+   put into the .rodata section prior to any given jumptable, we have to
+   make sure that the location counter for the .rodata section gets pro-
+   perly re-aligned prior to the actual beginning of the jump table.  */
+
+#undef ALIGN_ASM_OP
+#define ALIGN_ASM_OP "\t.align\t"
+
+#ifndef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), 2);
+#endif
+
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)		\
+  do									\
+    {									\
+      ASM_OUTPUT_BEFORE_CASE_LABEL (FILE, PREFIX, NUM, JUMPTABLE)	\
+	(*targetm.asm_out.internal_label) (FILE, PREFIX, NUM);			\
+    }									\
+  while (0)
+
+/* The standard SVR4 assembler seems to require that certain builtin
+   library routines (e.g. .udiv) be explicitly declared as .globl
+   in each assembly file where they are referenced.  */
+
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)	\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0))
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#define COMMON_ASM_OP	"\t.comm\t"
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+    }									\
+  while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#define LOCAL_ASM_OP	"\t.local\t"
+
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)	\
+  do								\
+    {								\
+      fprintf ((FILE), "%s", LOCAL_ASM_OP);			\
+      assemble_name ((FILE), (NAME));				\
+      fprintf ((FILE), "\n");					\
+      ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);	\
+    }								\
+  while (0)
+
+/* This is the pseudo-op used to generate a contiguous sequence of byte
+   values from a double-quoted string WITHOUT HAVING A TERMINATING NUL
+   AUTOMATICALLY APPENDED.  This is the same for most svr4 assemblers.  */
+
+#undef  ASCII_DATA_ASM_OP
+#define ASCII_DATA_ASM_OP	"\t.ascii\t"
+
+/* Support a read-only data section.  */
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rodata"
+
+/* On svr4, we *do* have support for the .init and .fini sections, and we
+   can put stuff in there to be executed before and after `main'.  We let
+   crtstuff.c and other files know this by defining the following symbols.
+   The definitions say how to change sections to the .init and .fini
+   sections.  This is the same for all known svr4 assemblers.  */
+
+#define INIT_SECTION_ASM_OP	"\t.section\t.init"
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini"
+
+/* Output assembly directive to move to the beginning of current section.  */
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+# define ASM_SECTION_START_OP	"\t.subsection\t-1"
+# define ASM_OUTPUT_SECTION_START(FILE)	\
+  fprintf ((FILE), "%s\n", ASM_SECTION_START_OP)
+#endif
+
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#undef  TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION default_elf_select_rtx_section
+#undef	TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION default_elf_select_section
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS true
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+
+/* This is how we tell the assembler that a symbol is weak.  */
+
+#define ASM_WEAKEN_LABEL(FILE, NAME)	\
+  do					\
+    {					\
+      fputs ("\t.weak\t", (FILE));	\
+      assemble_name ((FILE), (NAME));	\
+      fputc ('\n', (FILE));		\
+    }					\
+  while (0)
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#define TYPE_OPERAND_FMT	"@%s"
+
+/* Write the extra assembler code needed to declare a function's result.
+   Most svr4 assemblers don't require any special declaration of the
+   result value, but there are exceptions.  */
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries in an ELF object file under SVR4.  These macros also output
+   the starting labels for the relevant functions/objects.  */
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+
+#ifndef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);		\
+    }								\
+  while (0)
+#endif
+
+/* Write the extra assembler code needed to declare an object properly.  */
+
+#ifdef HAVE_GAS_GNU_UNIQUE_OBJECT
+#define USE_GNU_UNIQUE_OBJECT 1
+#else
+#define USE_GNU_UNIQUE_OBJECT 0
+#endif
+
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
+  do									\
+    {									\
+      HOST_WIDE_INT size;						\
+									\
+      /* For template static data member instantiations or		\
+	 inline fn local statics and their guard variables, use		\
+	 gnu_unique_object so that they will be combined even under	\
+	 RTLD_LOCAL.  Don't use gnu_unique_object for typeinfo,		\
+	 vtables and other read-only artificial decls.  */		\
+      if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (DECL)			\
+	  && (!DECL_ARTIFICIAL (DECL) || !TREE_READONLY (DECL)))	\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "gnu_unique_object");	\
+      else								\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+									\
+      size_directive_output = 0;					\
+      if (!flag_inhibit_size_directive					\
+	  && (DECL) && DECL_SIZE (DECL))				\
+	{								\
+	  size_directive_output = 1;					\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));			\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);			\
+	}								\
+									\
+      ASM_OUTPUT_LABEL (FILE, NAME);					\
+    }									\
+  while (0)
+
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set
+   by ASM_DECLARE_OBJECT_NAME when it was run for the same decl.  */
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)\
+  do								\
+    {								\
+      const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);	\
+      HOST_WIDE_INT size;					\
+								\
+      if (!flag_inhibit_size_directive				\
+	  && DECL_SIZE (DECL)					\
+	  && ! AT_END && TOP_LEVEL				\
+	  && DECL_INITIAL (DECL) == error_mark_node		\
+	  && !size_directive_output)				\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);		\
+	}							\
+    }								\
+  while (0)
+
+/* This is how to declare the size of a function.  */
+#ifndef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do								\
+    {								\
+      if (!flag_inhibit_size_directive)				\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+    }								\
+  while (0)
+#endif
+
+/* A table of bytes codes used by the ASM_OUTPUT_ASCII and
+   ASM_OUTPUT_LIMITED_STRING macros.  Each byte in the table
+   corresponds to a particular byte value [0..255].  For any
+   given byte value, if the value in the corresponding table
+   position is zero, the given character can be output directly.
+   If the table value is 1, the byte must be output as a \ooo
+   octal escape.  If the tables value is anything else, then the
+   byte value should be output as a \ followed by the value
+   in the table.  Note that we can use standard UN*X escape
+   sequences for many control characters, but we don't use
+   \a to represent BEL because some svr4 assemblers (e.g. on
+   the i386) don't know about that.  Also, we don't use \v
+   since some versions of gas, such as 2.2 did not accept it.  */
+
+#define ELF_ASCII_ESCAPES \
+"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
+
+/* Some svr4 assemblers have a limit on the number of characters which
+   can appear in the operand of a .string directive.  If your assembler
+   has such a limitation, you should define STRING_LIMIT to reflect that
+   limit.  Note that at least some svr4 assemblers have a limit on the
+   actual number of bytes in the double-quoted string, and that they
+   count each character in an escape sequence as one byte.  Thus, an
+   escape sequence like \377 would count as four bytes.
+
+   If your target assembler doesn't support the .string directive, you
+   should define this to zero.
+*/
+
+#define ELF_STRING_LIMIT	((unsigned) 256)
+
+#define STRING_ASM_OP	"\t.string\t"
+
+/* The routine used to output NUL terminated strings.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable, especially for targets like the i386
+   (where the only alternative is to output character sequences as
+   comma separated lists of numbers).  */
+
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)		\
+  default_elf_asm_output_limited_string ((FILE), (STR))
+
+/* The routine used to output sequences of byte values.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable.  Note that if we find subparts of the
+   character sequence which end with NUL (and which are shorter than
+   STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
+
+#undef  ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)			\
+  default_elf_asm_output_ascii ((FILE), (STR), (LENGTH));
+
+/* Allow the use of the -frecord-gcc-switches switch via the
+   elf_record_gcc_switches function defined in varasm.c.  */
+#undef  TARGET_ASM_RECORD_GCC_SWITCHES
+#define TARGET_ASM_RECORD_GCC_SWITCHES elf_record_gcc_switches
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM
+   any text necessary for declaring the name of an external symbol
+   named NAME which is referenced in this compilation but not defined.
+   It is needed to properly support non-default visibility.  */
+
+#ifndef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  default_elf_asm_output_external (FILE, DECL, NAME)
+#endif
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc-4.9/gcc/config/epiphany/constraints.md b/gcc-4.9/gcc/config/epiphany/constraints.md
new file mode 100644
index 000000000..1c463e531
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/constraints.md
@@ -0,0 +1,130 @@
+;; Constraint definitions for Adaptiva epiphany
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by Embecosm on behalf of Adapteva, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Integer constraints
+
+(define_constraint "U16"
+  "An unsigned 16-bit constant."
+  (ior (and (match_code "const_int")
+	    (match_test "IMM16 (ival)"))
+       (and (match_code "symbol_ref,label_ref,const")
+	    (match_test "epiphany_small16 (op)"))))
+
+(define_constraint "K"
+  "An unsigned 5-bit constant."
+  (and (match_code "const_int")
+       (match_test "IMM5 (ival)")))
+
+;; This could also accept symbol_ref, label_ref or const if we introduce
+;; a small area and/or attribute that satisfies the 11-bit signed range.
+(define_constraint "L"
+  "A signed 11-bit constant."
+  (and (match_code "const_int")
+       (match_test "SIMM11 (ival)")))
+
+(define_constraint "CnL"
+  "A negated signed 11-bit constant."
+  (and (match_code "const_int")
+       (match_test "SIMM11 (-ival)")))
+
+(define_constraint "Cm1"
+  "A signed 11-bit constant added to -1"
+  (and (match_code "const_int")
+       (match_test "SIMM11 (ival+1)")
+       (match_test "epiphany_m1reg >= 0")))
+
+(define_constraint "Cl1"
+  "Left-shift of -1"
+  (and (match_code "const_int")
+       (match_test "ival == (ival | ~(ival-1))")
+       (match_test "epiphany_m1reg >= 0")))
+
+(define_constraint "Cr1"
+  "Right-shift of -1"
+  (and (match_code "const_int")
+       (match_test "ival == (ival & ~(ival+1))")
+       (match_test "epiphany_m1reg >= 0")))
+
+(define_constraint "Cal"
+  "Constant for arithmetic/logical operations"
+  (match_test "(flag_pic
+		? nonsymbolic_immediate_operand (op, VOIDmode)
+		: immediate_operand (op, VOIDmode))"))
+
+(define_constraint "Csy"
+  "Symbolic constant for call/jump instruction"
+  (match_test "symbolic_operand (op, VOIDmode)"))
+
+;; Register constraints
+;; proper register constraints define a register class and can thus
+;; drive register allocation and reload.  OTOH sometimes we want to
+;; avoid just that.
+
+;; The register class usable in short insns.
+;; Subject to TARGET_PREFER_SHORT_INSN_REGS.
+(define_register_constraint "Rcs" "SHORT_INSN_REGS"
+  "short insn register class.")
+
+; The registers that can be used to hold a sibcall call address.
+; This must not conflict with any callee-saved registers.
+(define_register_constraint "Rsc" "SIBCALL_REGS"
+  "sibcall register class")
+
+; The registers that can be used to hold a status value
+(define_register_constraint "Rct" "CORE_CONTROL_REGS"
+  "Core control register class")
+
+;; The register group usable in short insns.
+(define_constraint "Rgs"
+  "short insn register group."
+  (and (match_code "reg")
+       (match_test "REGNO (op) >= FIRST_PSEUDO_REGISTER || REGNO (op) <= 7")))
+
+;; Constant suitable for the addsi3_r pattern.
+(define_constraint "Car"
+  "addsi3_r constant."
+  (and (match_code "const_int")
+       (ior (match_test "RTX_OK_FOR_OFFSET_P (SImode, op)")
+	    (match_test "RTX_OK_FOR_OFFSET_P (HImode, op)")
+	    (match_test "RTX_OK_FOR_OFFSET_P (QImode, op)"))))
+
+;; The return address if it can be replaced with GPR_LR.
+(define_constraint "Rra"
+  "return address constraint - register variant"
+  (and (match_code "unspec")
+       (match_test "XINT (op, 1) == UNSPEC_RETURN_ADDR")
+       (match_test "!MACHINE_FUNCTION (cfun)->lr_clobbered")))
+
+(define_constraint "Rcc"
+  "integer condition code"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CC_REGNUM")))
+
+;; The return address, which might be a stack slot.  */
+(define_constraint "Sra"
+  "return address constraint - memory variant"
+  (and (match_code "unspec")
+       (match_test "XINT (op, 1) == UNSPEC_RETURN_ADDR")))
+
+(define_constraint "Cfm"
+  "control register values to switch fp mode"
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_FP_MODE")))
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany-modes.def b/gcc-4.9/gcc/config/epiphany/epiphany-modes.def
new file mode 100644
index 000000000..2a31d6e1c
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany-modes.def
@@ -0,0 +1,40 @@
+/* Definitions of target machine for GNU compiler, Adapteva Epiphany cpu.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CC_Z);      /* only Z valid - for add, testing result.  */
+CC_MODE (CC_N_NE);   /* N for not-equal (for lsl).  */
+CC_MODE (CC_C_LTU);  /* C for unsigned-less-than (for add with carry).  */
+CC_MODE (CC_C_GTU);  /* C for unsigned-greater-than (for sub with carry).  */
+CC_MODE (CC_FP);
+CC_MODE (CC_FP_EQ);  /* AZ for equal.  */
+CC_MODE (CC_FP_ORD); /* AZ || ~AC for ordered.  */
+CC_MODE (CC_FP_UNEQ); /* AZ || ~AC for unordered / equal.  */
+CC_MODE (CC_FP_GTE); /* ~AC  / AZ for greater than / equal.  */
+#if 0 /* This would be needed for simplified NaN testing.  */
+RESET_FLOAT_FORMAT (SF, motorola_single_format);
+RESET_FLOAT_FORMAT (DF, motorola_double_format);
+#endif
+VECTOR_MODES (INT, 4);		/* V4QI V2HI */
+VECTOR_MODES (INT, 8);		/* V8QI V4HI V2SI */
+VECTOR_MODE (FLOAT, SF, 2);	/* V2SF */
+ADJUST_ALIGNMENT (V8QI, epiphany_vect_align);
+ADJUST_ALIGNMENT (V4HI, epiphany_vect_align);
+ADJUST_ALIGNMENT (V2SI, epiphany_vect_align);
+ADJUST_ALIGNMENT (V2SF, epiphany_vect_align);
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany-protos.h b/gcc-4.9/gcc/config/epiphany/epiphany-protos.h
new file mode 100644
index 000000000..9121e0c3c
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany-protos.h
@@ -0,0 +1,64 @@
+/* Definitions of target machine for GNU compiler, EPIPHANY cpu.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef RTX_CODE
+extern enum machine_mode epiphany_select_cc_mode (enum rtx_code, rtx, rtx);
+
+/* Define the function that build the compare insn for scc and bcc.  */
+extern struct rtx_def *gen_compare_reg (enum machine_mode, enum rtx_code,
+					enum machine_mode, rtx, rtx);
+#endif
+
+/* Declarations for various fns used in the .md file.  */
+extern void epiphany_final_prescan_insn (rtx, rtx *, int);
+extern bool epiphany_is_long_call_p (rtx);
+extern bool epiphany_small16 (rtx);
+bool epiphany_uninterruptible_p (tree decl);
+bool epiphany_call_uninterruptible_p (rtx mem);
+extern rtx sfunc_symbol (const char *name);
+
+extern void epiphany_expand_prologue (void);
+extern void epiphany_expand_epilogue (int);
+extern int epiphany_initial_elimination_offset (int, int);
+extern void epiphany_init_expanders (void);
+extern int hard_regno_mode_ok (int regno, enum machine_mode mode);
+#ifdef HARD_CONST
+extern void emit_set_fp_mode (int entity, int mode, HARD_REG_SET regs_live);
+#endif
+extern void epiphany_insert_mode_switch_use (rtx insn, int, int);
+extern void epiphany_expand_set_fp_mode (rtx *operands);
+extern int epiphany_mode_needed (int entity, rtx insn);
+extern int epiphany_mode_entry_exit (int entity, bool);
+extern int epiphany_mode_after (int entity, int last_mode, rtx insn);
+extern int epiphany_mode_priority_to_mode (int entity, unsigned priority);
+extern bool epiphany_epilogue_uses (int regno);
+extern bool epiphany_optimize_mode_switching (int entity);
+extern bool epiphany_is_interrupt_p (tree);
+extern unsigned epiphany_special_round_type_align (tree, unsigned, unsigned);
+extern unsigned epiphany_adjust_field_align (tree, unsigned);
+extern void epiphany_start_function (FILE *f, const char *name, tree decl);
+extern bool epiphany_regno_rename_ok (unsigned src, unsigned dst);
+
+/* Also declared in insn-attr.h, but files generated from epiphany.md
+   can't / won't include that.  In particular:
+   PR other/55523: gencondmd file includes / dependencies are messed up,
+   it uses peephole2 predicates without having all the necessary headers.  */
+extern int get_attr_sched_use_fpu (rtx);
+
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany-sched.md b/gcc-4.9/gcc/config/epiphany/epiphany-sched.md
new file mode 100644
index 000000000..d18cba9c6
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany-sched.md
@@ -0,0 +1,145 @@
+;; DFA scheduling description for EPIPHANY
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;; Contributed by Embecosm on behalf of Adapteva, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+
+(define_automaton "inst_pipeline,fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since epiphany is a dual issue machine, it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+
+(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
+
+;; The fixed point arithmetic unit.
+
+(define_cpu_unit  "int" "inst_pipeline")
+
+;; The floating point unit.
+
+(define_cpu_unit "F0" "fpu_pipe")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation  "issue"  "pipe_01|pipe_02")
+
+;; This is to express instructions that cannot be paired.
+
+(define_reservation  "d_lock" "pipe_01+pipe_02")
+
+;; We don't model all pipeline stages; we model the issue stage
+;; inasmuch as we allow only two instructions to issue simultaneously,
+;; and flow instructions prevent any simultaneous issue of another instruction.
+;; (This uses pipe_01 and pipe_02).
+;; Double issue of 'other' insns is prevented by using the int unit in the
+;; E1 stage.
+;; Double issue of float instructions is prevented by using F0 in the E1 stage.
+
+(define_insn_reservation "simple_arith" 2
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "move,cmove,compare,shift,misc,mul")
+       (eq_attr "length" "4"))
+  "issue,int")
+
+; anything but fp / fp_int / v2fp has a bypass
+(define_bypass 1 "simple_arith" "simple_arith,simple_arith_2,simple_arith_4,load,store,branch,call,flow")
+
+(define_insn_reservation "simple_arith_2" 2
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "move,cmove,compare,shift,misc,mul")
+       (eq_attr "length" "8"))
+  "issue,issue+int,int")
+
+(define_insn_reservation "simple_arith_4" 4
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "move,compare,shift,misc,mul")
+       (eq_attr "length" "12,16,20,24"))
+  "issue,issue+int,issue+int,issue+int,int")
+
+;; Loads have a latency of two.
+;; Note that we fix up the latency of post_modify in epiphany.c:epiphany_adjust_cost
+
+(define_insn_reservation "load" 3
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "load"))
+  "issue,int")
+
+; anything but fp / fp_int / v2fp has a bypass
+(define_bypass 2 "load" "simple_arith,simple_arith_2,simple_arith_4,load,store,branch,call,flow")
+
+(define_insn_reservation "store" 1
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "store"))
+  "issue,int")
+
+;; Branch
+;; Latency when taken: 	3
+;; Issue Rate: 	1
+;; The latency is 1 when the branch is not taken.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+
+(define_insn_reservation "branch"  1
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "branch,uncond_branch"))
+  "d_lock")
+
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Both the call instruction itself and
+;; the rts at the end of the call / sfunc incurs a three cycle penalty,
+;; thus also isolating the scheduling of caller and callee.
+
+(define_insn_reservation "call" 8
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "call,sfunc,fp_sfunc"))
+  "d_lock*8")
+
+(define_insn_reservation "flow" 1
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "flow"))
+  "d_lock")
+
+(define_insn_reservation "fp_arith"  5
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "fp,fp_int"))
+  "issue,F0")
+
+(define_bypass 4 "fp_arith" "store")
+
+; There are two main consumers for v2fp:
+; - other v2fp operation - in that case, the latencies can dovetail to
+;   save one cycle of latency.
+; - 64 bit store operations - we need both registers, but OTOH the latency is
+; one lower to start with.
+; of the bypass saving one cyles then.
+(define_insn_reservation "v2fp_arith"  5
+  (and (eq_attr "pipe_model" "epiphany")
+       (eq_attr "type" "v2fp"))
+  "issue,issue+F0,F0")
+
+; A boolean attribute for use by peephole2 patterns that try to figure out
+; if we overcommitted the FPU.
+; This is notionally a numeric attribute to avoid dependency problems.
+(define_attr "sched_use_fpu" ""
+  (cond [(eq_attr "type" "fp,fp_int,v2fp") (const_int 1)]
+	(const_int 0)))
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.c b/gcc-4.9/gcc/config/epiphany/epiphany.c
new file mode 100644
index 000000000..59b24107a
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany.c
@@ -0,0 +1,2938 @@
+/* Subroutines used for code generation on the EPIPHANY cpu.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "toplev.h"
+#include "tm_p.h"
+#include "target.h"
+#include "df.h"
+#include "langhooks.h"
+#include "insn-codes.h"
+#include "ggc.h"
+#include "tm-constrs.h"
+#include "tree-pass.h"	/* for current_pass */
+#include "context.h"
+#include "pass_manager.h"
+
+/* Which cpu we're compiling for.  */
+int epiphany_cpu_type;
+
+/* Name of mangle string to add to symbols to separate code compiled for each
+   cpu (or NULL).  */
+const char *epiphany_mangle_cpu;
+
+/* Array of valid operand punctuation characters.  */
+char epiphany_punct_chars[256];
+
+/* The rounding mode that we generally use for floating point.  */
+int epiphany_normal_fp_rounding;
+
+/* The pass instance, for use in epiphany_optimize_mode_switching. */
+static opt_pass *pass_mode_switch_use;
+
+static void epiphany_init_reg_tables (void);
+static int get_epiphany_condition_code (rtx);
+static tree epiphany_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
+static tree epiphany_handle_forwarder_attribute (tree *, tree, tree, int,
+						 bool *);
+static bool epiphany_pass_by_reference (cumulative_args_t, enum machine_mode,
+					const_tree, bool);
+static rtx frame_insn (rtx);
+
+/* defines for the initialization of the GCC target structure.  */
+#define TARGET_ATTRIBUTE_TABLE epiphany_attribute_table
+
+#define TARGET_PRINT_OPERAND epiphany_print_operand
+#define TARGET_PRINT_OPERAND_ADDRESS epiphany_print_operand_address
+
+#define TARGET_RTX_COSTS epiphany_rtx_costs
+#define TARGET_ADDRESS_COST epiphany_address_cost
+#define TARGET_MEMORY_MOVE_COST epiphany_memory_move_cost
+
+#define TARGET_PROMOTE_FUNCTION_MODE epiphany_promote_function_mode
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#define TARGET_RETURN_IN_MEMORY epiphany_return_in_memory
+#define TARGET_PASS_BY_REFERENCE epiphany_pass_by_reference
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+#define TARGET_FUNCTION_VALUE epiphany_function_value
+#define TARGET_LIBCALL_VALUE epiphany_libcall_value
+#define TARGET_FUNCTION_VALUE_REGNO_P epiphany_function_value_regno_p
+
+#define TARGET_SETUP_INCOMING_VARARGS epiphany_setup_incoming_varargs
+
+/* Using the simplistic varags handling forces us to do partial reg/stack
+   argument passing for types with larger size (> 4 bytes) than alignemnt.  */
+#define TARGET_ARG_PARTIAL_BYTES epiphany_arg_partial_bytes
+
+#define TARGET_FUNCTION_OK_FOR_SIBCALL epiphany_function_ok_for_sibcall
+
+#define TARGET_SCHED_ISSUE_RATE epiphany_issue_rate
+#define TARGET_SCHED_ADJUST_COST epiphany_adjust_cost
+
+#define TARGET_LEGITIMATE_ADDRESS_P epiphany_legitimate_address_p
+
+#define TARGET_SECONDARY_RELOAD epiphany_secondary_reload
+
+#define TARGET_OPTION_OVERRIDE epiphany_override_options
+
+#define TARGET_CONDITIONAL_REGISTER_USAGE epiphany_conditional_register_usage
+
+#define TARGET_FUNCTION_ARG epiphany_function_arg
+
+#define TARGET_FUNCTION_ARG_ADVANCE epiphany_function_arg_advance
+
+#define TARGET_FUNCTION_ARG_BOUNDARY epiphany_function_arg_boundary
+
+#define TARGET_TRAMPOLINE_INIT epiphany_trampoline_init
+
+/* Nonzero if the constant rtx value is a legitimate general operand.
+   We can handle any 32- or 64-bit constant.  */
+#define TARGET_LEGITIMATE_CONSTANT_P hook_bool_mode_rtx_true
+
+#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL \
+  epiphany_min_divisions_for_recip_mul
+
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE epiphany_preferred_simd_mode
+
+#define TARGET_VECTOR_MODE_SUPPORTED_P epiphany_vector_mode_supported_p
+
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+  epiphany_vector_alignment_reachable
+
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+  epiphany_support_vector_misalignment
+
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+#define TARGET_ASM_OUTPUT_MI_THUNK epiphany_output_mi_thunk
+
+#include "target-def.h"
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+bool
+epiphany_is_interrupt_p (tree decl)
+{
+  tree attrs;
+
+  attrs = DECL_ATTRIBUTES (decl);
+  if (lookup_attribute ("interrupt", attrs))
+    return true;
+  else
+    return false;
+}
+
+/* Called from epiphany_override_options.
+   We use this to initialize various things.  */
+
+static void
+epiphany_init (void)
+{
+  /* N.B. this pass must not run before the first optimize_mode_switching
+     pass because of the side offect of epiphany_mode_needed on
+     MACHINE_FUNCTION(cfun)->unknown_mode_uses.  But it must run before
+     pass_resolve_sw_modes.  */
+  pass_mode_switch_use = make_pass_mode_switch_use (g);
+  struct register_pass_info insert_use_info
+    = { pass_mode_switch_use, "mode_sw",
+	1, PASS_POS_INSERT_AFTER
+      };
+  opt_pass *mode_sw2
+    = g->get_passes()->get_pass_mode_switching ()->clone ();
+  struct register_pass_info mode_sw2_info
+    = { mode_sw2, "mode_sw",
+	1, PASS_POS_INSERT_AFTER
+      };
+  opt_pass *mode_sw3 = make_pass_resolve_sw_modes (g);
+  struct register_pass_info mode_sw3_info
+    = { mode_sw3, "mode_sw",
+	1, PASS_POS_INSERT_AFTER
+      };
+  opt_pass *mode_sw4
+    = g->get_passes()->get_pass_split_all_insns ()->clone ();
+  struct register_pass_info mode_sw4_info
+    = { mode_sw4, "mode_sw",
+	1, PASS_POS_INSERT_AFTER
+      };
+  static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
+#define N_ENTITIES ARRAY_SIZE (num_modes)
+
+  epiphany_init_reg_tables ();
+
+  /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
+  memset (epiphany_punct_chars, 0, sizeof (epiphany_punct_chars));
+  epiphany_punct_chars['-'] = 1;
+
+  epiphany_normal_fp_rounding
+    = (epiphany_normal_fp_mode == FP_MODE_ROUND_TRUNC
+       ? FP_MODE_ROUND_TRUNC : FP_MODE_ROUND_NEAREST);
+  register_pass (&mode_sw4_info);
+  register_pass (&mode_sw2_info);
+  register_pass (&mode_sw3_info);
+  register_pass (&insert_use_info);
+  register_pass (&mode_sw2_info);
+  /* Verify that NUM_MODES_FOR_MODE_SWITCHING has one value per entity.  */
+  gcc_assert (N_ENTITIES == EPIPHANY_MSW_ENTITY_NUM);
+
+#if 1 /* As long as peep2_rescan is not implemented,
+         (see http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html,)
+         we need a second peephole2 pass to get reasonable code.  */
+  {
+    opt_pass *extra_peephole2
+      = g->get_passes ()->get_pass_peephole2 ()->clone ();
+    struct register_pass_info peep2_2_info
+      = { extra_peephole2, "peephole2",
+	  1, PASS_POS_INSERT_AFTER
+	};
+
+    register_pass (&peep2_2_info);
+  }
+#endif
+}
+
+/* The condition codes of the EPIPHANY, and the inverse function.  */
+static const char *const epiphany_condition_codes[] =
+{ /* 0    1      2      3      4      5      6     7      8      9   */
+   "eq", "ne", "ltu", "gteu", "gt", "lte", "gte", "lt", "gtu", "lteu",
+  /* 10   11    12     13  */
+   "beq","bne","blt", "blte",
+};
+
+#define EPIPHANY_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
+
+/* Returns the index of the EPIPHANY condition code string in
+   `epiphany_condition_codes'.  COMPARISON should be an rtx like
+   `(eq (...) (...))'.  */
+
+static int
+get_epiphany_condition_code (rtx comparison)
+{
+  switch (GET_MODE (XEXP (comparison, 0)))
+    {
+    case CCmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ  : return 0;
+	case NE  : return 1;
+	case LTU : return 2;
+	case GEU : return 3;
+	case GT  : return 4;
+	case LE  : return 5;
+	case GE  : return 6;
+	case LT  : return 7;
+	case GTU : return 8;
+	case LEU : return 9;
+
+	default : gcc_unreachable ();
+	}
+    case CC_N_NEmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ: return 6;
+	case NE: return 7;
+	default: gcc_unreachable ();
+	}
+    case CC_C_LTUmode:
+      switch (GET_CODE (comparison))
+	{
+	case GEU: return 2;
+	case LTU: return 3;
+	default: gcc_unreachable ();
+	}
+    case CC_C_GTUmode:
+      switch (GET_CODE (comparison))
+	{
+	case LEU: return 3;
+	case GTU: return 2;
+	default: gcc_unreachable ();
+	}
+    case CC_FPmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ: return 10;
+	case NE: return 11;
+	case LT: return 12;
+	case LE: return 13;
+	default: gcc_unreachable ();
+	}
+    case CC_FP_EQmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ: return 0;
+	case NE: return 1;
+	default: gcc_unreachable ();
+	}
+    case CC_FP_GTEmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ: return 0;
+	case NE: return 1;
+	case GT : return 4;
+	case GE : return 6;
+	case UNLE : return 5;
+	case UNLT : return 7;
+	default: gcc_unreachable ();
+	}
+    case CC_FP_ORDmode:
+      switch (GET_CODE (comparison))
+	{
+	case ORDERED: return 9;
+	case UNORDERED: return 8;
+	default: gcc_unreachable ();
+	}
+    case CC_FP_UNEQmode:
+      switch (GET_CODE (comparison))
+	{
+	case UNEQ: return 9;
+	case LTGT: return 8;
+	default: gcc_unreachable ();
+	}
+    default: gcc_unreachable ();
+    }
+  /*NOTREACHED*/
+  return (42);
+}
+
+
+/* Return 1 if hard register REGNO can hold a value of machine_mode MODE.  */
+int
+hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return (regno & 1) == 0 && GPR_P (regno);
+  else
+    return 1;
+}
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+
+enum machine_mode
+epiphany_select_cc_mode (enum rtx_code op,
+			 rtx x ATTRIBUTE_UNUSED,
+			 rtx y ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      if (TARGET_SOFT_CMPSF
+	  || op == ORDERED || op == UNORDERED)
+	{
+	  if (op == EQ || op == NE)
+	    return CC_FP_EQmode;
+	  if (op == ORDERED || op == UNORDERED)
+	    return CC_FP_ORDmode;
+	  if (op == UNEQ || op == LTGT)
+	    return CC_FP_UNEQmode;
+	  return CC_FP_GTEmode;
+	}
+      return CC_FPmode;
+    }
+  /* recognize combiner pattern ashlsi_btst:
+     (parallel [
+	    (set (reg:N_NE 65 cc1)
+		(compare:N_NE (zero_extract:SI (reg/v:SI 75 [ a ])
+			(const_int 1 [0x1])
+			(const_int 0 [0x0]))
+		    (const_int 0 [0x0])))
+	    (clobber (scratch:SI))  */
+  else if ((op == EQ || op == NE)
+	   && GET_CODE (x) == ZERO_EXTRACT
+	   && XEXP (x, 1) == const1_rtx
+	   && CONST_INT_P (XEXP (x, 2)))
+    return CC_N_NEmode;
+  else if ((op == GEU || op == LTU) && GET_CODE (x) == PLUS)
+    return CC_C_LTUmode;
+  else if ((op == LEU || op == GTU) && GET_CODE (x) == MINUS)
+    return CC_C_GTUmode;
+  else
+    return CCmode;
+}
+
+enum reg_class epiphany_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+static void
+epiphany_init_reg_tables (void)
+{
+  int i;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (i == GPR_LR)
+	epiphany_regno_reg_class[i] = LR_REGS;
+      else if (i <= 7 && TARGET_PREFER_SHORT_INSN_REGS)
+	epiphany_regno_reg_class[i] = SHORT_INSN_REGS;
+      else if (call_used_regs[i]
+	       && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i))
+	epiphany_regno_reg_class[i] = SIBCALL_REGS;
+      else if (i >= CORE_CONTROL_FIRST && i <= CORE_CONTROL_LAST)
+	epiphany_regno_reg_class[i] = CORE_CONTROL_REGS;
+      else if (i < (GPR_LAST+1)
+	       || i == ARG_POINTER_REGNUM || i == FRAME_POINTER_REGNUM)
+	epiphany_regno_reg_class[i] = GENERAL_REGS;
+      else if (i == CC_REGNUM)
+	epiphany_regno_reg_class[i] = NO_REGS /* CC_REG: must be NO_REGS */;
+      else
+	epiphany_regno_reg_class[i] = NO_REGS;
+    }
+}
+
+/* EPIPHANY specific attribute support.
+
+   The EPIPHANY has these attributes:
+   interrupt - for interrupt functions.
+   short_call - the function is assumed to be reachable with the b / bl
+		instructions.
+   long_call - the function address is loaded into a register before use.
+   disinterrupt - functions which mask interrupts throughout.
+                     They unmask them while calling an interruptible
+		     function, though.  */
+
+static const struct attribute_spec epiphany_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt",  0, 9, true,  false, false, epiphany_handle_interrupt_attribute, true },
+  { "forwarder_section", 1, 1, true, false, false, epiphany_handle_forwarder_attribute, false },
+  { "long_call",  0, 0, false, true, true, NULL, false },
+  { "short_call", 0, 0, false, true, true, NULL, false },
+  { "disinterrupt", 0, 0, false, true, true, NULL, true },
+  { NULL,         0, 0, false, false, false, NULL, false }
+};
+
+/* Handle an "interrupt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+epiphany_handle_interrupt_attribute (tree *node ATTRIBUTE_UNUSED,
+				     tree name, tree args,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool *no_add_attrs)
+{
+  tree value;
+
+  if (!args)
+    return NULL_TREE;
+
+  value = TREE_VALUE (args);
+
+  if (TREE_CODE (value) != STRING_CST)
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not a string constant", name);
+      *no_add_attrs = true;
+    }
+  else if (strcmp (TREE_STRING_POINTER (value), "reset")
+	   && strcmp (TREE_STRING_POINTER (value), "software_exception")
+	   && strcmp (TREE_STRING_POINTER (value), "page_miss")
+	   && strcmp (TREE_STRING_POINTER (value), "timer0")
+	   && strcmp (TREE_STRING_POINTER (value), "timer1")
+	   && strcmp (TREE_STRING_POINTER (value), "message")
+	   && strcmp (TREE_STRING_POINTER (value), "dma0")
+	   && strcmp (TREE_STRING_POINTER (value), "dma1")
+	   && strcmp (TREE_STRING_POINTER (value), "wand")
+	   && strcmp (TREE_STRING_POINTER (value), "swi"))
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not \"reset\", \"software_exception\", \"page_miss\", \"timer0\", \"timer1\", \"message\", \"dma0\", \"dma1\", \"wand\" or \"swi\"",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  return epiphany_handle_interrupt_attribute (node, name, TREE_CHAIN (args),
+					      flags, no_add_attrs);
+}
+
+/* Handle a "forwarder_section" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+epiphany_handle_forwarder_attribute (tree *node ATTRIBUTE_UNUSED,
+				     tree name, tree args,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool *no_add_attrs)
+{
+  tree value;
+
+  value = TREE_VALUE (args);
+
+  if (TREE_CODE (value) != STRING_CST)
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not a string constant", name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+
+/* Misc. utilities.  */
+
+/* Generate a SYMBOL_REF for the special function NAME.  When the address
+   can't be placed directly into a call instruction, and if possible, copy
+   it to a register so that cse / code hoisting is possible.  */
+rtx
+sfunc_symbol (const char *name)
+{
+  rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+
+  /* These sfuncs should be hidden, and every dso should get a copy.  */
+  SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION | SYMBOL_FLAG_LOCAL;
+  if (TARGET_SHORT_CALLS)
+    ; /* Nothing to be done.  */
+  else if (can_create_pseudo_p ())
+    sym = copy_to_mode_reg (Pmode, sym);
+  else /* We rely on reload to fix this up.  */
+    gcc_assert (!reload_in_progress || reload_completed);
+  return sym;
+}
+
+/* X and Y are two things to compare using CODE in IN_MODE.
+   Emit the compare insn, construct the the proper cc reg in the proper
+   mode, and return the rtx for the cc reg comparison in CMODE.  */
+
+rtx
+gen_compare_reg (enum machine_mode cmode, enum rtx_code code,
+		 enum machine_mode in_mode, rtx x, rtx y)
+{
+  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
+  rtx cc_reg, pat, clob0, clob1, clob2;
+
+  if (in_mode == VOIDmode)
+    in_mode = GET_MODE (x);
+  if (in_mode == VOIDmode)
+    in_mode = GET_MODE (y);
+
+  if (mode == CC_FPmode)
+    {
+      /* The epiphany has only EQ / NE / LT / LE conditions for
+	 hardware floating point.  */
+      if (code == GT || code == GE || code == UNLE || code == UNLT)
+	{
+	  rtx tmp = x; x = y; y = tmp;
+	  code = swap_condition (code);
+	}
+      cc_reg = gen_rtx_REG (mode, CCFP_REGNUM);
+      y = force_reg (in_mode, y);
+    }
+  else
+    {
+      if (mode == CC_FP_GTEmode
+	  && (code == LE || code == LT || code == UNGT || code == UNGE))
+	{
+	  if (flag_finite_math_only
+	      && ((REG_P (x) && REGNO (x) == GPR_0)
+		  || (REG_P (y) && REGNO (y) == GPR_1)))
+	    switch (code)
+	      {
+	      case LE: code = UNLE; break;
+	      case LT: code = UNLT; break;
+	      case UNGT: code = GT; break;
+	      case UNGE: code = GE; break;
+	      default: gcc_unreachable ();
+	      }
+	  else
+	    {
+	      rtx tmp = x; x = y; y = tmp;
+	      code = swap_condition (code);
+	    }
+	}
+      cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+    }
+  if ((mode == CC_FP_EQmode || mode == CC_FP_GTEmode
+       || mode == CC_FP_ORDmode || mode == CC_FP_UNEQmode)
+      /* mov<mode>cc might want to re-emit a comparison during ifcvt.  */
+      && (!REG_P (x) || REGNO (x) != GPR_0
+	  || !REG_P (y) || REGNO (y) != GPR_1))
+    {
+      rtx reg;
+
+#if 0
+      /* ??? We should really do the r0/r1 clobber only during rtl expansion,
+	 but just like the flag clobber of movsicc, we have to allow
+	 this for ifcvt to work, on the assumption that we'll only want
+	 to do this if these registers have been used before by the
+	 pre-ifcvt  code.  */
+      gcc_assert (currently_expanding_to_rtl);
+#endif
+      reg = gen_rtx_REG (in_mode, GPR_0);
+      if (reg_overlap_mentioned_p (reg, y))
+	return 0;
+      emit_move_insn (reg, x);
+      x = reg;
+      reg = gen_rtx_REG (in_mode, GPR_1);
+      emit_move_insn (reg, y);
+      y = reg;
+    }
+  else
+    x = force_reg (in_mode, x);
+
+  pat = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
+  if (mode == CC_FP_EQmode || mode == CC_FP_GTEmode)
+    {
+      const char *name = mode == CC_FP_EQmode ? "__eqsf2" : "__gtesf2";
+      rtx use = gen_rtx_USE (VOIDmode, sfunc_symbol (name));
+
+      clob0 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_IP));
+      clob1 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_LR));
+      pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, pat, use, clob0, clob1));
+    }
+  else if (mode == CC_FP_ORDmode || mode == CC_FP_UNEQmode)
+    {
+      const char *name = mode == CC_FP_ORDmode ? "__ordsf2" : "__uneqsf2";
+      rtx use = gen_rtx_USE (VOIDmode, sfunc_symbol (name));
+
+      clob0 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_IP));
+      clob1 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_16));
+      clob2 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_LR));
+      pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (5, pat, use,
+						   clob0, clob1, clob2));
+    }
+  else
+    {
+      clob0 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (in_mode));
+      pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob0));
+    }
+  emit_insn (pat);
+  return gen_rtx_fmt_ee (code, cmode, cc_reg, const0_rtx);
+}
+
+/* The ROUND_ADVANCE* macros are local to this file.  */
+/* Round SIZE up to a word boundary.  */
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round arg MODE/TYPE up to the next word boundary.  */
+#define ROUND_ADVANCE_ARG(MODE, TYPE) \
+  ((MODE) == BLKmode \
+   ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \
+   : ROUND_ADVANCE (GET_MODE_SIZE (MODE)))
+
+/* Round CUM up to the necessary point for argument MODE/TYPE.  */
+#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \
+  (epiphany_function_arg_boundary ((MODE), (TYPE)) > BITS_PER_WORD \
+   ? (((CUM) + 1) & ~1)	\
+   : (CUM))
+
+static unsigned int
+epiphany_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  if ((type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode)) <= PARM_BOUNDARY)
+    return PARM_BOUNDARY;
+  return 2 * PARM_BOUNDARY;
+}
+
+/* Do any needed setup for a variadic function.  For the EPIPHANY, we
+   actually emit the code in epiphany_expand_prologue.
+
+   CUM has not been updated for the last named argument which has type TYPE
+   and mode MODE, and we rely on this fact.  */
+
+
+static void
+epiphany_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+				 tree type, int *pretend_size, int no_rtl)
+{
+  int first_anon_arg;
+  CUMULATIVE_ARGS next_cum;
+  machine_function_t *mf = MACHINE_FUNCTION (cfun);
+
+  /* All BLKmode values are passed by reference.  */
+  gcc_assert (mode != BLKmode);
+
+  next_cum = *get_cumulative_args (cum);
+  next_cum
+    = ROUND_ADVANCE_CUM (next_cum, mode, type) + ROUND_ADVANCE_ARG (mode, type);
+  first_anon_arg = next_cum;
+
+  if (first_anon_arg < MAX_EPIPHANY_PARM_REGS && !no_rtl)
+    {
+      /* Note that first_reg_offset < MAX_EPIPHANY_PARM_REGS.  */
+      int first_reg_offset = first_anon_arg;
+
+      *pretend_size = ((MAX_EPIPHANY_PARM_REGS - first_reg_offset)
+		       * UNITS_PER_WORD);
+    }
+  mf->args_parsed = 1;
+  mf->pretend_args_odd = ((*pretend_size & UNITS_PER_WORD) ? 1 : 0);
+}
+
+static int
+epiphany_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
+			    tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words = 0, rounded_cum;
+
+  gcc_assert (!epiphany_pass_by_reference (cum, mode, type, /* named */ true));
+
+  rounded_cum = ROUND_ADVANCE_CUM (*get_cumulative_args (cum), mode, type);
+  if (rounded_cum < MAX_EPIPHANY_PARM_REGS)
+    {
+      words = MAX_EPIPHANY_PARM_REGS - rounded_cum;
+      if (words >= ROUND_ADVANCE_ARG (mode, type))
+	words = 0;
+    }
+  return words * UNITS_PER_WORD;
+}
+
+/* Cost functions.  */
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+epiphany_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		    int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      /* Small integers in the right context are as cheap as registers.  */
+    case CONST_INT:
+      if ((outer_code == PLUS || outer_code == MINUS)
+	  && SIMM11 (INTVAL (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      if (IMM16 (INTVAL (x)))
+	{
+	  *total = outer_code == SET ? 0 : COSTS_N_INSNS (1);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS ((epiphany_small16 (x) ? 0 : 1)
+			      + (outer_code == SET ? 0 : 1));
+      return true;
+
+    case CONST_DOUBLE:
+      {
+	rtx high, low;
+	split_double (x, &high, &low);
+	*total = COSTS_N_INSNS (!IMM16 (INTVAL (high))
+				+ !IMM16 (INTVAL (low)));
+	return true;
+      }
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   If ADDR is not a valid address, its cost is irrelevant.  */
+
+static int
+epiphany_address_cost (rtx addr, enum machine_mode mode,
+		       addr_space_t as ATTRIBUTE_UNUSED, bool speed)
+{
+  rtx reg;
+  rtx off = const0_rtx;
+  int i;
+
+  if (speed)
+    return 0;
+  /* Return 0 for addresses valid in short insns, 1 for addresses only valid
+     in long insns.  */
+  switch (GET_CODE (addr))
+    {
+    case PLUS :
+      reg = XEXP (addr, 0);
+      off = XEXP (addr, 1);
+      break;
+    case POST_MODIFY:
+      reg = XEXP (addr, 0);
+      off = XEXP (addr, 1);
+      gcc_assert (GET_CODE (off) == PLUS && rtx_equal_p (reg, XEXP (off, 0)));
+      off = XEXP (off, 1);
+      if (satisfies_constraint_Rgs (reg) && satisfies_constraint_Rgs (off))
+	return 0;
+      return 1;
+    case REG:
+    default:
+      reg = addr;
+      break;
+    }
+  if (!satisfies_constraint_Rgs (reg))
+    return 1;
+  /* The offset range available for short instructions depends on the mode
+     of the memory access.  */
+  /* First, make sure we have a valid integer.  */
+  if (!satisfies_constraint_L (off))
+    return 1;
+  i = INTVAL (off);
+  switch (GET_MODE_SIZE (mode))
+    {
+      default:
+      case 4:
+	if (i & 1)
+	  return 1;
+	i >>= 1;
+	/* Fall through.  */
+      case 2:
+	if (i & 1)
+	  return 1;
+	i >>= 1;
+	/* Fall through.  */
+      case 1:
+	return i < -7 || i > 7;
+    }
+}
+
+/* Compute the cost of moving data between registers and memory.
+   For integer, load latency is twice as long as register-register moves,
+   but issue pich is the same.  For floating point, load latency is three
+   times as much as a reg-reg move.  */
+static int
+epiphany_memory_move_cost (enum machine_mode mode,
+                          reg_class_t rclass ATTRIBUTE_UNUSED,
+                          bool in ATTRIBUTE_UNUSED)
+{
+  return GET_MODE_CLASS (mode) == MODE_INT ? 3 : 4;
+}
+
+/* Function prologue/epilogue handlers.  */
+
+/* EPIPHANY stack frames look like:
+
+	     Before call                       After call
+	+-----------------------+       +-----------------------+
+	|                       |       |                       |
+   high |  local variables,     |       |  local variables,     |
+   mem  |  reg save area, etc.  |       |  reg save area, etc.  |
+	|                       |       |                       |
+	+-----------------------+       +-----------------------+
+	|                       |       |                       |
+	|  arguments on stack.  |       |  arguments on stack.  |
+	|                       |       |                       |
+  SP+8->+-----------------------+FP+8m->+-----------------------+
+	| 2 word save area for  |       |  reg parm save area,  |
+	| leaf funcs / flags    |       |  only created for     |
+  SP+0->+-----------------------+       |  variable argument    |
+					|  functions            |
+				 FP+8n->+-----------------------+
+					|                       |
+					|  register save area   |
+					|                       |
+					+-----------------------+
+					|                       |
+					|  local variables      |
+					|                       |
+				  FP+0->+-----------------------+
+					|                       |
+					|  alloca allocations   |
+					|                       |
+					+-----------------------+
+					|                       |
+					|  arguments on stack   |
+					|                       |
+				  SP+8->+-----------------------+
+   low                                  | 2 word save area for  |
+   memory                               | leaf funcs / flags    |
+				  SP+0->+-----------------------+
+
+Notes:
+1) The "reg parm save area" does not exist for non variable argument fns.
+   The "reg parm save area" could be eliminated if we created our
+   own TARGET_GIMPLIFY_VA_ARG_EXPR, but that has tradeoffs as well
+   (so it's not done).  */
+
+/* Structure to be filled in by epiphany_compute_frame_size with register
+   save masks, and offsets for the current function.  */
+struct epiphany_frame_info
+{
+  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
+  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # bytes needed to store regs.  */
+  unsigned int var_size;	/* # bytes that variables take up.  */
+  HARD_REG_SET gmask;		/* Set of saved gp registers.  */
+  int          initialized;	/* Nonzero if frame size already calculated.  */
+  int      stld_sz;             /* Current load/store data size for offset
+				   adjustment. */
+  int      need_fp;             /* value to override "frame_pointer_needed */
+  /* FIRST_SLOT is the slot that is saved first, at the very start of
+     the frame, with a POST_MODIFY to allocate the frame, if the size fits,
+     or at least the parm and register save areas, otherwise.
+     In the case of a large frame, LAST_SLOT is the slot that is saved last,
+     with a POST_MODIFY to allocate the rest of the frame.  */
+  int first_slot, last_slot, first_slot_offset, last_slot_offset;
+  int first_slot_size;
+  int small_threshold;
+};
+
+/* Current frame information calculated by epiphany_compute_frame_size.  */
+static struct epiphany_frame_info current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+static struct epiphany_frame_info zero_frame_info;
+
+/* The usual; we set up our machine_function data.  */
+static struct machine_function *
+epiphany_init_machine_status (void)
+{
+  struct machine_function *machine;
+
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+
+  machine = ggc_alloc_cleared_machine_function_t ();
+
+  return machine;
+}
+
+/* Implements INIT_EXPANDERS.  We just set up to call the above
+ *    function.  */
+void
+epiphany_init_expanders (void)
+{
+  init_machine_status = epiphany_init_machine_status;
+}
+
+/* Type of function DECL.
+
+   The result is cached.  To reset the cache at the end of a function,
+   call with DECL = NULL_TREE.  */
+
+static enum epiphany_function_type
+epiphany_compute_function_type (tree decl)
+{
+  tree a;
+  /* Cached value.  */
+  static enum epiphany_function_type fn_type = EPIPHANY_FUNCTION_UNKNOWN;
+  /* Last function we were called for.  */
+  static tree last_fn = NULL_TREE;
+
+  /* Resetting the cached value?  */
+  if (decl == NULL_TREE)
+    {
+      fn_type = EPIPHANY_FUNCTION_UNKNOWN;
+      last_fn = NULL_TREE;
+      return fn_type;
+    }
+
+  if (decl == last_fn && fn_type != EPIPHANY_FUNCTION_UNKNOWN)
+    return fn_type;
+
+  /* Assume we have a normal function (not an interrupt handler).  */
+  fn_type = EPIPHANY_FUNCTION_NORMAL;
+
+  /* Now see if this is an interrupt handler.  */
+  for (a = DECL_ATTRIBUTES (decl);
+       a;
+       a = TREE_CHAIN (a))
+    {
+      tree name = TREE_PURPOSE (a);
+
+      if (name == get_identifier ("interrupt"))
+	fn_type = EPIPHANY_FUNCTION_INTERRUPT;
+    }
+
+  last_fn = decl;
+  return fn_type;
+}
+
+#define RETURN_ADDR_REGNUM GPR_LR
+#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
+#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.  */
+#define MUST_SAVE_REGISTER(regno, interrupt_p) \
+  ((df_regs_ever_live_p (regno) \
+    || (interrupt_p && !crtl->is_leaf \
+	&& call_used_regs[regno] && !fixed_regs[regno])) \
+   && (!call_used_regs[regno] || regno == GPR_LR \
+       || (interrupt_p && regno != GPR_SP)))
+
+#define MUST_SAVE_RETURN_ADDR 0
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   SIZE is the size needed for local variables.  */
+
+static unsigned int
+epiphany_compute_frame_size (int size /* # of var. bytes allocated.  */)
+{
+  int regno;
+  unsigned int total_size, var_size, args_size, pretend_size, reg_size;
+  HARD_REG_SET gmask;
+  enum epiphany_function_type fn_type;
+  int interrupt_p;
+  int first_slot, last_slot, first_slot_offset, last_slot_offset;
+  int first_slot_size;
+  int small_slots = 0;
+
+  var_size	= size;
+  args_size	= crtl->outgoing_args_size;
+  pretend_size	= crtl->args.pretend_args_size;
+  total_size	= args_size + var_size;
+  reg_size	= 0;
+  CLEAR_HARD_REG_SET (gmask);
+  first_slot = -1;
+  first_slot_offset = 0;
+  last_slot = -1;
+  last_slot_offset = 0;
+  first_slot_size = UNITS_PER_WORD;
+
+  /* See if this is an interrupt handler.  Call used registers must be saved
+     for them too.  */
+  fn_type = epiphany_compute_function_type (current_function_decl);
+  interrupt_p = EPIPHANY_INTERRUPT_P (fn_type);
+
+  /* Calculate space needed for registers.  */
+
+  for (regno = MAX_EPIPHANY_PARM_REGS - 1; pretend_size > reg_size; regno--)
+    {
+      reg_size += UNITS_PER_WORD;
+      SET_HARD_REG_BIT (gmask, regno);
+      if (epiphany_stack_offset - reg_size == 0)
+	first_slot = regno;
+    }
+
+  if (interrupt_p)
+    reg_size += 2 * UNITS_PER_WORD;
+  else
+    small_slots = epiphany_stack_offset / UNITS_PER_WORD;
+
+  if (frame_pointer_needed)
+    {
+      current_frame_info.need_fp = 1;
+      if (!interrupt_p && first_slot < 0)
+	first_slot = GPR_FP;
+    }
+  else
+    current_frame_info.need_fp = 0;
+  for (regno = 0; regno <= GPR_LAST; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno, interrupt_p))
+	{
+	  gcc_assert (!TEST_HARD_REG_BIT (gmask, regno));
+	  reg_size += UNITS_PER_WORD;
+	  SET_HARD_REG_BIT (gmask, regno);
+	  /* FIXME: when optimizing for speed, take schedling into account
+	     when selecting these registers.  */
+	  if (regno == first_slot)
+	    gcc_assert (regno == GPR_FP && frame_pointer_needed);
+	  else if (!interrupt_p && first_slot < 0)
+	    first_slot = regno;
+	  else if (last_slot < 0
+		   && (first_slot ^ regno) != 1
+		   && (!interrupt_p || regno > GPR_1))
+	    last_slot = regno;
+	}
+    }
+  if (TEST_HARD_REG_BIT (gmask, GPR_LR))
+    MACHINE_FUNCTION (cfun)->lr_clobbered = 1;
+  /* ??? Could sometimes do better than that.  */
+  current_frame_info.small_threshold
+    = (optimize >= 3 || interrupt_p ? 0
+       : pretend_size ? small_slots
+       : 4 + small_slots - (first_slot == GPR_FP));
+
+  /* If there might be variables with 64-bit alignment requirement, align the
+     start of the variables.  */
+  if (var_size >= 2 * UNITS_PER_WORD
+      /* We don't want to split a double reg save/restore across two unpaired
+	 stack slots when optimizing.  This rounding could be avoided with
+	 more complex reordering of the register saves, but that would seem
+	 to be a lot of code complexity for little gain.  */
+      || (reg_size > 8 && optimize))
+    reg_size = EPIPHANY_STACK_ALIGN (reg_size);
+  if (((total_size + reg_size
+	/* Reserve space for UNKNOWN_REGNUM.  */
+	+ EPIPHANY_STACK_ALIGN (4))
+       <= (unsigned) epiphany_stack_offset)
+      && !interrupt_p
+      && crtl->is_leaf && !frame_pointer_needed)
+    {
+      first_slot = -1;
+      last_slot = -1;
+      goto alloc_done;
+    }
+  else if (reg_size
+	   && !interrupt_p
+	   && reg_size < (unsigned HOST_WIDE_INT) epiphany_stack_offset)
+    reg_size = epiphany_stack_offset;
+  if (interrupt_p)
+    {
+      if (total_size + reg_size < 0x3fc)
+	{
+	  first_slot_offset = EPIPHANY_STACK_ALIGN (total_size + reg_size);
+	  first_slot_offset += EPIPHANY_STACK_ALIGN (epiphany_stack_offset);
+	  last_slot = -1;
+	}
+      else
+	{
+	  first_slot_offset = EPIPHANY_STACK_ALIGN (reg_size);
+	  last_slot_offset = EPIPHANY_STACK_ALIGN (total_size);
+	  last_slot_offset += EPIPHANY_STACK_ALIGN (epiphany_stack_offset);
+	  if (last_slot >= 0)
+	    CLEAR_HARD_REG_BIT (gmask, last_slot);
+	}
+    }
+  else if (total_size + reg_size < 0x1ffc && first_slot >= 0)
+    {
+      first_slot_offset = EPIPHANY_STACK_ALIGN (total_size + reg_size);
+      last_slot = -1;
+    }
+  else
+    {
+      if (total_size + reg_size <= (unsigned) epiphany_stack_offset)
+	{
+	  gcc_assert (first_slot < 0);
+	  gcc_assert (reg_size == 0 || (int) reg_size == epiphany_stack_offset);
+	  last_slot_offset = EPIPHANY_STACK_ALIGN (total_size + reg_size);
+	}
+      else
+	{
+	  first_slot_offset
+	    = (reg_size
+	       ? EPIPHANY_STACK_ALIGN (reg_size - epiphany_stack_offset) : 0);
+	  if (!first_slot_offset)
+	    {
+	      if (first_slot != GPR_FP || !current_frame_info.need_fp)
+		last_slot = first_slot;
+	      first_slot = -1;
+	    }
+	  last_slot_offset = EPIPHANY_STACK_ALIGN (total_size);
+	  if (reg_size)
+	    last_slot_offset += EPIPHANY_STACK_ALIGN (epiphany_stack_offset);
+	}
+      if (last_slot >= 0)
+	CLEAR_HARD_REG_BIT (gmask, last_slot);
+    }
+ alloc_done:
+  if (first_slot >= 0)
+    {
+      CLEAR_HARD_REG_BIT (gmask, first_slot);
+      if (TEST_HARD_REG_BIT (gmask, first_slot ^ 1)
+	  && epiphany_stack_offset - pretend_size >= 2 * UNITS_PER_WORD)
+	{
+	  CLEAR_HARD_REG_BIT (gmask, first_slot ^ 1);
+	  first_slot_size = 2 * UNITS_PER_WORD;
+	  first_slot &= ~1;
+	}
+    }
+  total_size = first_slot_offset + last_slot_offset;
+
+  /* Save computed information.  */
+  current_frame_info.total_size   = total_size;
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.var_size     = var_size;
+  current_frame_info.args_size    = args_size;
+  current_frame_info.reg_size	  = reg_size;
+  COPY_HARD_REG_SET (current_frame_info.gmask, gmask);
+  current_frame_info.first_slot		= first_slot;
+  current_frame_info.last_slot		= last_slot;
+  current_frame_info.first_slot_offset	= first_slot_offset;
+  current_frame_info.first_slot_size	= first_slot_size;
+  current_frame_info.last_slot_offset	= last_slot_offset;
+
+  current_frame_info.initialized  = reload_completed;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+static void
+epiphany_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'd':
+      fputs (epiphany_condition_codes[get_epiphany_condition_code (x)], file);
+      return;
+    case 'D':
+     fputs (epiphany_condition_codes[EPIPHANY_INVERSE_CONDITION_CODE
+				 (get_epiphany_condition_code (x))],
+	     file);
+      return;
+
+    case 'X':
+      current_frame_info.stld_sz = 8;
+      break;
+
+    case 'C' :
+      current_frame_info.stld_sz = 4;
+      break;
+
+    case 'c' :
+      current_frame_info.stld_sz = 2;
+      break;
+
+    case 'f':
+     fputs (REG_P (x) ? "jalr " : "bl ", file);
+     break;
+
+    case '-':
+    fprintf (file, "r%d", epiphany_m1reg);
+    return;
+
+    case 0 :
+      /* Do nothing special.  */
+      break;
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  switch (GET_CODE (x))
+    {
+      rtx addr;
+      rtx offset;
+
+    case REG :
+      fputs (reg_names[REGNO (x)], file);
+      break;
+    case MEM :
+      if (code == 0)
+	current_frame_info.stld_sz = 1;
+      fputc ('[', file);
+      addr = XEXP (x, 0);
+      switch (GET_CODE (addr))
+	{
+	  case POST_INC:
+	    offset = GEN_INT (GET_MODE_SIZE (GET_MODE (x)));
+	    addr = XEXP (addr, 0);
+	    break;
+	  case POST_DEC:
+	    offset = GEN_INT (-GET_MODE_SIZE (GET_MODE (x)));
+	    addr = XEXP (addr, 0);
+	    break;
+	  case POST_MODIFY:
+	    offset = XEXP (XEXP (addr, 1), 1);
+	    addr = XEXP (addr, 0);
+	    break;
+	  default:
+	    offset = 0;
+	    break;
+	}
+      output_address (addr);
+      fputc (']', file);
+      if (offset)
+	{
+	  fputc (',', file);
+	  if (CONST_INT_P (offset)) switch (GET_MODE_SIZE (GET_MODE (x)))
+	    {
+	      default:
+		gcc_unreachable ();
+	      case 8:
+		offset = GEN_INT (INTVAL (offset) >> 3);
+		break;
+	      case 4:
+		offset = GEN_INT (INTVAL (offset) >> 2);
+		break;
+	      case 2:
+		offset = GEN_INT (INTVAL (offset) >> 1);
+		break;
+	      case 1:
+		break;
+	    }
+	  output_address (offset);
+	}
+      break;
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "%s0x%08lx", IMMEDIATE_PREFIX, l);
+	  break;
+	}
+      /* Fall through.  Let output_addr_const deal with it.  */
+    case CONST_INT:
+      fprintf(file,"%s",IMMEDIATE_PREFIX);
+      if (code == 'C' || code == 'X')
+	{
+	  fprintf (file, "%ld",
+		   (long) (INTVAL (x) / current_frame_info.stld_sz));
+	  break;
+	}
+      /* Fall through */
+    default :
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+static void
+epiphany_print_operand_address (FILE *file, rtx addr)
+{
+  register rtx base, index = 0;
+  int offset = 0;
+
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      fputs (reg_names[REGNO (addr)], file);
+      break;
+    case SYMBOL_REF :
+      if (/*???*/ 0 && SYMBOL_REF_FUNCTION_P (addr))
+	{
+	  output_addr_const (file, addr);
+	}
+      else
+	{
+	  output_addr_const (file, addr);
+	}
+      break;
+    case PLUS :
+      if (GET_CODE (XEXP (addr, 0)) == CONST_INT)
+	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
+      else if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+      gcc_assert (GET_CODE (base) == REG);
+      fputs (reg_names[REGNO (base)], file);
+      if (index == 0)
+	{
+	  /*
+	  ** ++rk quirky method to scale offset for ld/str.......
+	  */
+	  fprintf (file, ",%s%d", IMMEDIATE_PREFIX,
+		   offset/current_frame_info.stld_sz);
+	}
+      else
+	{
+	  switch (GET_CODE (index))
+	    {
+	    case REG:
+	      fprintf (file, ",%s", reg_names[REGNO (index)]);
+	      break;
+	    case SYMBOL_REF:
+	      fputc (',', file), output_addr_const (file, index);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      break;
+    case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: case POST_MODIFY:
+      /* We shouldn't get here as we've lost the mode of the memory object
+	 (which says how much to inc/dec by.  */
+      gcc_unreachable ();
+      break;
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+void
+epiphany_final_prescan_insn (rtx insn ATTRIBUTE_UNUSED,
+			     rtx *opvec ATTRIBUTE_UNUSED,
+			     int noperands ATTRIBUTE_UNUSED)
+{
+  int i = epiphany_n_nops;
+  rtx pat ATTRIBUTE_UNUSED;
+
+  while (i--)
+    fputs ("\tnop\n", asm_out_file);
+}
+
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+epiphany_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size = int_size_in_bytes (type);
+
+  if (AGGREGATE_TYPE_P (type)
+      && (TYPE_MODE (type) == BLKmode || TYPE_NEEDS_CONSTRUCTING (type)))
+    return true;
+  return (size == -1 || size > 8);
+}
+
+/* For EPIPHANY, All aggregates and arguments greater than 8 bytes are
+   passed by reference.  */
+
+static bool
+epiphany_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+		       enum machine_mode mode, const_tree type,
+		       bool named ATTRIBUTE_UNUSED)
+{
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type)
+	  && (mode == BLKmode || TYPE_NEEDS_CONSTRUCTING (type)))
+	return true;
+    }
+  return false;
+}
+
+
+static rtx
+epiphany_function_value (const_tree ret_type,
+			 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+			 bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+
+  mode = TYPE_MODE (ret_type);
+  /* We must change the mode like PROMOTE_MODE does.
+     ??? PROMOTE_MODE is ignored for non-scalar types.
+     The set of types tested here has to be kept in sync
+     with the one in explow.c:promote_mode.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < 4
+      && (TREE_CODE (ret_type) == INTEGER_TYPE
+          || TREE_CODE (ret_type) == ENUMERAL_TYPE
+          || TREE_CODE (ret_type) == BOOLEAN_TYPE
+          || TREE_CODE (ret_type) == OFFSET_TYPE))
+    mode = SImode;
+  return gen_rtx_REG (mode, 0);
+}
+
+static rtx
+epiphany_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, 0);
+}
+
+static bool
+epiphany_function_value_regno_p (const unsigned int regno ATTRIBUTE_UNUSED)
+{
+  return regno == 0;
+}
+
+/* Fix up invalid option settings.  */
+static void
+epiphany_override_options (void)
+{
+  if (epiphany_stack_offset < 4)
+    error ("stack_offset must be at least 4");
+  if (epiphany_stack_offset & 3)
+    error ("stack_offset must be a multiple of 4");
+  epiphany_stack_offset = (epiphany_stack_offset + 3) & -4;
+
+  /* This needs to be done at start up.  It's convenient to do it here.  */
+  epiphany_init ();
+}
+
+/* For a DImode load / store SET, make a SImode set for a
+   REG_FRAME_RELATED_EXPR note, using OFFSET to create a high or lowpart
+   subreg.  */
+static rtx
+frame_subreg_note (rtx set, int offset)
+{
+  rtx src = simplify_gen_subreg (SImode, SET_SRC (set), DImode, offset);
+  rtx dst = simplify_gen_subreg (SImode, SET_DEST (set), DImode, offset);
+
+  set = gen_rtx_SET (VOIDmode, dst ,src);
+  RTX_FRAME_RELATED_P (set) = 1;
+  return set;
+}
+
+static rtx
+frame_insn (rtx x)
+{
+  int i;
+  rtx note = NULL_RTX;
+
+  if (GET_CODE (x) == PARALLEL)
+    {
+      rtx part = XVECEXP (x, 0, 0);
+
+      if (GET_MODE (SET_DEST (part)) == DImode)
+	{
+	  note = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (XVECLEN (x, 0) + 1));
+	  XVECEXP (note, 0, 0) = frame_subreg_note (part, 0);
+	  XVECEXP (note, 0, 1) = frame_subreg_note (part, UNITS_PER_WORD);
+	  for (i = XVECLEN (x, 0) - 1; i >= 1; i--)
+	    {
+	      part = copy_rtx (XVECEXP (x, 0, i));
+
+	      if (GET_CODE (part) == SET)
+		RTX_FRAME_RELATED_P (part) = 1;
+	      XVECEXP (note, 0, i + 1) = part;
+	    }
+	}
+      else
+	{
+	  for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	    {
+	      part = XVECEXP (x, 0, i);
+
+	      if (GET_CODE (part) == SET)
+		RTX_FRAME_RELATED_P (part) = 1;
+	    }
+	}
+    }
+  else if (GET_CODE (x) == SET && GET_MODE (SET_DEST (x)) == DImode)
+    note = gen_rtx_PARALLEL (VOIDmode,
+			     gen_rtvec (2, frame_subreg_note (x, 0),
+					frame_subreg_note (x, UNITS_PER_WORD)));
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  if (note)
+    add_reg_note (x, REG_FRAME_RELATED_EXPR, note);
+  return x;
+}
+
+static rtx
+frame_move_insn (rtx to, rtx from)
+{
+  return frame_insn (gen_rtx_SET (VOIDmode, to, from));
+}
+
+/* Generate a MEM referring to a varargs argument slot.  */
+
+static rtx
+gen_varargs_mem (enum machine_mode mode, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr);
+  MEM_NOTRAP_P (mem) = 1;
+  set_mem_alias_set (mem, get_varargs_alias_set ());
+  return mem;
+}
+
+/* Emit instructions to save or restore registers in the range [MIN..LIMIT) .
+   If EPILOGUE_P is 0, save; if it is one, restore.
+   ADDR is the stack slot to save the first register to; subsequent
+   registers are written to lower addresses.
+   However, the order of register pairs can be reversed in order to
+   use double-word load-store instructions.  Likewise, an unpaired single
+   word save slot can be skipped while double saves are carried out, and
+   reused when a single register is to be saved.  */
+
+static void
+epiphany_emit_save_restore (int min, int limit, rtx addr, int epilogue_p)
+{
+  int i;
+  int stack_offset
+    = current_frame_info.first_slot >= 0 ? epiphany_stack_offset : 0;
+  rtx skipped_mem = NULL_RTX;
+  int last_saved = limit - 1;
+
+  if (!optimize)
+    while (last_saved >= 0
+	   && !TEST_HARD_REG_BIT (current_frame_info.gmask, last_saved))
+      last_saved--;
+  for (i = 0; i < limit; i++)
+    {
+      enum machine_mode mode = word_mode;
+      rtx mem, reg;
+      int n = i;
+      rtx (*gen_mem) (enum machine_mode, rtx) = gen_frame_mem;
+
+      /* Make sure we push the arguments in the right order.  */
+      if (n < MAX_EPIPHANY_PARM_REGS && crtl->args.pretend_args_size)
+	{
+	  n = MAX_EPIPHANY_PARM_REGS - 1 - n;
+	  gen_mem = gen_varargs_mem;
+	}
+      if (stack_offset == current_frame_info.first_slot_size
+	  && current_frame_info.first_slot >= 0)
+	{
+	  if (current_frame_info.first_slot_size > UNITS_PER_WORD)
+	    {
+	      mode = DImode;
+	      addr = plus_constant (Pmode, addr,
+				    - (HOST_WIDE_INT) UNITS_PER_WORD);
+	    }
+	  if (i-- < min || !epilogue_p)
+	    goto next_slot;
+	  n = current_frame_info.first_slot;
+	  gen_mem = gen_frame_mem;
+	}
+      else if (n == UNKNOWN_REGNUM
+	       && stack_offset > current_frame_info.first_slot_size)
+	{
+	  i--;
+	  goto next_slot;
+	}
+      else if (!TEST_HARD_REG_BIT (current_frame_info.gmask, n))
+	continue;
+      else if (i < min)
+	goto next_slot;
+
+      /* Check for a register pair to save.  */
+      if (n == i
+	  && (n >= MAX_EPIPHANY_PARM_REGS || crtl->args.pretend_args_size == 0)
+	  && (n & 1) == 0 && n+1 < limit
+	  && TEST_HARD_REG_BIT (current_frame_info.gmask, n+1))
+	{
+	  /* If it fits in the current stack slot pair, place it there.  */
+	  if (GET_CODE (addr) == PLUS && (stack_offset & 7) == 0
+	      && stack_offset != 2 * UNITS_PER_WORD
+	      && (current_frame_info.last_slot < 0
+		  || INTVAL (XEXP (addr, 1)) != UNITS_PER_WORD)
+	      && (n+1 != last_saved || !skipped_mem))
+	    {
+	      mode = DImode;
+	      i++;
+	      addr = plus_constant (Pmode, addr,
+				    - (HOST_WIDE_INT) UNITS_PER_WORD);
+	    }
+	  /* If it fits in the following stack slot pair, that's fine, too.  */
+	  else if (GET_CODE (addr) == PLUS && (stack_offset & 7) == 4
+		   && stack_offset != 2 * UNITS_PER_WORD
+		   && stack_offset != 3 * UNITS_PER_WORD
+		   && (current_frame_info.last_slot < 0
+		       || INTVAL (XEXP (addr, 1)) != 2 * UNITS_PER_WORD)
+		   && n + 1 != last_saved)
+	    {
+	      gcc_assert (!skipped_mem);
+	      stack_offset -= GET_MODE_SIZE (mode);
+	      skipped_mem = gen_mem (mode, addr);
+	      mode = DImode;
+	      i++;
+	      addr = plus_constant (Pmode, addr,
+				    - (HOST_WIDE_INT) 2 * UNITS_PER_WORD);
+	    }
+	}
+      reg = gen_rtx_REG (mode, n);
+      if (mode != DImode && skipped_mem)
+	mem = skipped_mem;
+      else
+	mem = gen_mem (mode, addr);
+
+      /* If we are loading / storing LR, note the offset that
+	 gen_reload_insi_ra requires.  Since GPR_LR is even,
+	 we only need to test n, even if mode is DImode.  */
+      gcc_assert ((GPR_LR & 1) == 0);
+      if (n == GPR_LR)
+	{
+	  long lr_slot_offset = 0;
+	  rtx m_addr = XEXP (mem, 0);
+
+	  if (GET_CODE (m_addr) == PLUS)
+	    lr_slot_offset = INTVAL (XEXP (m_addr, 1));
+	  if (frame_pointer_needed)
+	    lr_slot_offset += (current_frame_info.first_slot_offset
+			       - current_frame_info.total_size);
+	  if (MACHINE_FUNCTION (cfun)->lr_slot_known)
+	    gcc_assert (MACHINE_FUNCTION (cfun)->lr_slot_offset
+			== lr_slot_offset);
+	  MACHINE_FUNCTION (cfun)->lr_slot_offset = lr_slot_offset;
+	  MACHINE_FUNCTION (cfun)->lr_slot_known = 1;
+	}
+
+      if (!epilogue_p)
+	frame_move_insn (mem, reg);
+      else if (n >= MAX_EPIPHANY_PARM_REGS || !crtl->args.pretend_args_size)
+	emit_move_insn (reg, mem);
+      if (mem == skipped_mem)
+	{
+	  skipped_mem = NULL_RTX;
+	  continue;
+	}
+    next_slot:
+      addr = plus_constant (Pmode, addr, -(HOST_WIDE_INT) UNITS_PER_WORD);
+      stack_offset -= GET_MODE_SIZE (mode);
+    }
+}
+
+void
+epiphany_expand_prologue (void)
+{
+  int interrupt_p;
+  enum epiphany_function_type fn_type;
+  rtx addr, mem, off, reg;
+
+  if (!current_frame_info.initialized)
+    epiphany_compute_frame_size (get_frame_size ());
+
+  /* It is debatable if we should adjust this by epiphany_stack_offset.  */
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = current_frame_info.total_size;
+
+  fn_type = epiphany_compute_function_type (current_function_decl);
+  interrupt_p = EPIPHANY_INTERRUPT_P (fn_type);
+
+  if (interrupt_p)
+    {
+      addr = plus_constant (Pmode, stack_pointer_rtx,
+			    - (HOST_WIDE_INT) 2 * UNITS_PER_WORD);
+      if (!lookup_attribute ("forwarder_section",
+			    DECL_ATTRIBUTES (current_function_decl))
+	  || !epiphany_is_long_call_p (XEXP (DECL_RTL (current_function_decl),
+					     0)))
+        frame_move_insn (gen_frame_mem (DImode, addr),
+			 gen_rtx_REG (DImode, GPR_0));
+      frame_move_insn (gen_rtx_REG (SImode, GPR_0),
+		       gen_rtx_REG (word_mode, STATUS_REGNUM));
+      frame_move_insn (gen_rtx_REG (SImode, GPR_1),
+		       gen_rtx_REG (word_mode, IRET_REGNUM));
+      mem = gen_frame_mem (BLKmode, stack_pointer_rtx);
+      off = GEN_INT (-current_frame_info.first_slot_offset);
+      frame_insn (gen_stack_adjust_add (off, mem));
+      if (!epiphany_uninterruptible_p (current_function_decl))
+	emit_insn (gen_gie ());
+      addr = plus_constant (Pmode, stack_pointer_rtx,
+			    current_frame_info.first_slot_offset
+			    - (HOST_WIDE_INT) 3 * UNITS_PER_WORD);
+    }
+  else
+    {
+      addr = plus_constant (Pmode, stack_pointer_rtx,
+			    epiphany_stack_offset
+			    - (HOST_WIDE_INT) UNITS_PER_WORD);
+      epiphany_emit_save_restore (0, current_frame_info.small_threshold,
+				  addr, 0);
+      /* Allocate register save area; for small to medium size frames,
+	 allocate the entire frame; this is joint with one register save.  */
+      if (current_frame_info.first_slot >= 0)
+	{
+	  enum machine_mode mode
+	= (current_frame_info.first_slot_size == UNITS_PER_WORD
+	   ? word_mode : DImode);
+
+	  off = GEN_INT (-current_frame_info.first_slot_offset);
+	  mem = gen_frame_mem (BLKmode,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx, off));
+	  frame_insn (gen_stack_adjust_str
+		       (gen_frame_mem (mode, stack_pointer_rtx),
+			gen_rtx_REG (mode, current_frame_info.first_slot),
+			off, mem));
+	  addr = plus_constant (Pmode, addr,
+				current_frame_info.first_slot_offset);
+	}
+    }
+  epiphany_emit_save_restore (current_frame_info.small_threshold,
+			      FIRST_PSEUDO_REGISTER, addr, 0);
+  if (current_frame_info.need_fp)
+    frame_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+  /* For large frames, allocate bulk of frame.  This is usually joint with one
+     register save.  */
+  if (current_frame_info.last_slot >= 0)
+    {
+      rtx ip, mem2, insn, note;
+
+      gcc_assert (current_frame_info.last_slot != GPR_FP
+		  || (!current_frame_info.need_fp
+		      && current_frame_info.first_slot < 0));
+      off = GEN_INT (-current_frame_info.last_slot_offset);
+      mem = gen_frame_mem (BLKmode,
+			   gen_rtx_PLUS (Pmode, stack_pointer_rtx, off));
+      ip = gen_rtx_REG (Pmode, GPR_IP);
+      frame_move_insn (ip, off);
+      reg = gen_rtx_REG (word_mode, current_frame_info.last_slot),
+      mem2 = gen_frame_mem (word_mode, stack_pointer_rtx),
+      insn = frame_insn (gen_stack_adjust_str (mem2, reg, ip, mem));
+      /* Instruction scheduling can separate the instruction setting IP from
+	 INSN so that dwarf2out_frame_debug_expr becomes confused what the
+	 temporary register is.  Example: _gcov.o  */
+      note = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, off));
+      note = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (2, gen_rtx_SET (VOIDmode, mem2, reg),
+					  note));
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
+    }
+  /* If there is only one or no register to save, yet we have a large frame,
+     use an add.  */
+  else if (current_frame_info.last_slot_offset)
+    {
+      mem = gen_frame_mem (BLKmode,
+			   plus_constant (Pmode, stack_pointer_rtx,
+					  current_frame_info.last_slot_offset));
+      off = GEN_INT (-current_frame_info.last_slot_offset);
+      if (!SIMM11 (INTVAL (off)))
+	{
+	  reg = gen_rtx_REG (Pmode, GPR_IP);
+	  frame_move_insn (reg, off);
+	  off = reg;
+	}
+      frame_insn (gen_stack_adjust_add (off, mem));
+    }
+}
+
+void
+epiphany_expand_epilogue (int sibcall_p)
+{
+  int interrupt_p;
+  enum epiphany_function_type fn_type;
+  rtx mem, addr, reg, off;
+  HOST_WIDE_INT restore_offset;
+
+  fn_type = epiphany_compute_function_type( current_function_decl);
+  interrupt_p = EPIPHANY_INTERRUPT_P (fn_type);
+
+  /* For variable frames, deallocate bulk of frame.  */
+  if (current_frame_info.need_fp)
+    {
+      mem = gen_frame_mem (BLKmode, stack_pointer_rtx);
+      emit_insn (gen_stack_adjust_mov (mem));
+    }
+  /* Else for large static frames, deallocate bulk of frame.  */
+  else if (current_frame_info.last_slot_offset)
+    {
+      mem = gen_frame_mem (BLKmode, stack_pointer_rtx);
+      reg = gen_rtx_REG (Pmode, GPR_IP);
+      emit_move_insn (reg, GEN_INT (current_frame_info.last_slot_offset));
+      emit_insn (gen_stack_adjust_add (reg, mem));
+    }
+  restore_offset = (interrupt_p
+		    ? - 3 * UNITS_PER_WORD
+		    : epiphany_stack_offset - (HOST_WIDE_INT) UNITS_PER_WORD);
+  addr = plus_constant (Pmode, stack_pointer_rtx,
+			(current_frame_info.first_slot_offset
+			 + restore_offset));
+  epiphany_emit_save_restore (current_frame_info.small_threshold,
+			   FIRST_PSEUDO_REGISTER, addr, 1);
+
+  if (interrupt_p && !epiphany_uninterruptible_p (current_function_decl))
+    emit_insn (gen_gid ());
+
+  off = GEN_INT (current_frame_info.first_slot_offset);
+  mem = gen_frame_mem (BLKmode, stack_pointer_rtx);
+  /* For large / variable size frames, deallocating the register save area is
+     joint with one register restore; for medium size frames, we use a
+     dummy post-increment load to dealloacte the whole frame.  */
+  if (!SIMM11 (INTVAL (off)) || current_frame_info.last_slot >= 0)
+    {
+      emit_insn (gen_stack_adjust_ldr
+		  (gen_rtx_REG (word_mode,
+				(current_frame_info.last_slot >= 0
+				 ? current_frame_info.last_slot : GPR_IP)),
+		   gen_frame_mem (word_mode, stack_pointer_rtx),
+		   off,
+		   mem));
+    }
+  /* While for small frames, we deallocate the entire frame with one add.  */
+  else if (INTVAL (off))
+    {
+      emit_insn (gen_stack_adjust_add (off, mem));
+    }
+  if (interrupt_p)
+    {
+      emit_move_insn (gen_rtx_REG (word_mode, STATUS_REGNUM),
+		      gen_rtx_REG (SImode, GPR_0));
+      emit_move_insn (gen_rtx_REG (word_mode, IRET_REGNUM),
+		      gen_rtx_REG (SImode, GPR_1));
+      addr = plus_constant (Pmode, stack_pointer_rtx,
+			    - (HOST_WIDE_INT) 2 * UNITS_PER_WORD);
+      emit_move_insn (gen_rtx_REG (DImode, GPR_0),
+		      gen_frame_mem (DImode, addr));
+    }
+  addr = plus_constant (Pmode, stack_pointer_rtx,
+			epiphany_stack_offset - (HOST_WIDE_INT) UNITS_PER_WORD);
+  epiphany_emit_save_restore (0, current_frame_info.small_threshold, addr, 1);
+  if (!sibcall_p)
+    {
+      if (interrupt_p)
+	emit_jump_insn (gen_return_internal_interrupt());
+      else
+	emit_jump_insn (gen_return_i ());
+    }
+}
+
+int
+epiphany_initial_elimination_offset (int from, int to)
+{
+  epiphany_compute_frame_size (get_frame_size ());
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return current_frame_info.total_size - current_frame_info.reg_size;
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return current_frame_info.first_slot_offset - current_frame_info.reg_size;
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return (current_frame_info.total_size
+	    - ((current_frame_info.pretend_size + 4) & -8));
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return (current_frame_info.first_slot_offset
+	    - ((current_frame_info.pretend_size + 4) & -8));
+  gcc_unreachable ();
+}
+
+bool
+epiphany_regno_rename_ok (unsigned, unsigned dst)
+{
+  enum epiphany_function_type fn_type;
+
+  fn_type = epiphany_compute_function_type (current_function_decl);
+  if (!EPIPHANY_INTERRUPT_P (fn_type))
+    return true;
+  if (df_regs_ever_live_p (dst))
+    return true;
+  return false;
+}
+
+static int
+epiphany_issue_rate (void)
+{
+  return 2;
+}
+
+/* Function to update the integer COST
+   based on the relationship between INSN that is dependent on
+   DEP_INSN through the dependence LINK.  The default is to make no
+   adjustment to COST.  This can be used for example to specify to
+   the scheduler that an output- or anti-dependence does not incur
+   the same cost as a data-dependence.  The return value should be
+   the new value for COST.  */
+static int
+epiphany_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  if (REG_NOTE_KIND (link) == 0)
+    {
+      rtx dep_set;
+
+      if (recog_memoized (insn) < 0
+	  || recog_memoized (dep_insn) < 0)
+	return cost;
+
+      dep_set = single_set (dep_insn);
+
+      /* The latency that we specify in the scheduling description refers
+	 to the actual output, not to an auto-increment register; for that,
+	 the latency is one.  */
+      if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
+	{
+	  rtx set = single_set (insn);
+
+	  if (set
+	      && !reg_overlap_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
+	      && (!MEM_P (SET_DEST (set))
+		  || !reg_overlap_mentioned_p (SET_DEST (dep_set),
+					       XEXP (SET_DEST (set), 0))))
+	    cost = 1;
+	}
+    }
+  return cost;
+}
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+#define RTX_OK_FOR_BASE_P(X) \
+  (REG_P (X) && REG_OK_FOR_BASE_P (X))
+
+#define RTX_OK_FOR_INDEX_P(MODE, X) \
+  ((GET_MODE_CLASS (MODE) != MODE_VECTOR_INT \
+    || epiphany_vect_align >= GET_MODE_SIZE (MODE)) \
+   && (REG_P (X) && REG_OK_FOR_INDEX_P (X)))
+
+#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X) \
+(GET_CODE (X) == PLUS \
+ && RTX_OK_FOR_BASE_P (XEXP (X, 0)) \
+ && (RTX_OK_FOR_INDEX_P (MODE, XEXP (X, 1)) \
+     || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
+
+static bool
+epiphany_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+#define REG_OK_FOR_BASE_P(X) \
+  (strict ? GPR_P (REGNO (X)) : GPR_AP_OR_PSEUDO_P (REGNO (X)))
+  if (RTX_OK_FOR_BASE_P (x))
+    return true;
+  if (RTX_FRAME_OFFSET_P (x))
+    return true;
+  if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x))
+    return true;
+  /* If this is a misaligned stack access, don't force it to reg+index.  */
+  if (GET_MODE_SIZE (mode) == 8
+      && GET_CODE (x) == PLUS && XEXP (x, 0) == stack_pointer_rtx
+      /* Decomposed to SImode; GET_MODE_SIZE (SImode) == 4 */
+      && !(INTVAL (XEXP (x, 1)) & 3)
+      && INTVAL (XEXP (x, 1)) >= -2047 * 4
+      && INTVAL (XEXP (x, 1)) <=  2046 * 4)
+    return true;
+  if (TARGET_POST_INC
+      && (GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
+      && RTX_OK_FOR_BASE_P (XEXP ((x), 0)))
+    return true;
+  if ((TARGET_POST_MODIFY || reload_completed)
+      && GET_CODE (x) == POST_MODIFY
+      && GET_CODE (XEXP ((x), 1)) == PLUS
+      && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP ((x), 1), 0))
+      && LEGITIMATE_OFFSET_ADDRESS_P (mode, XEXP ((x), 1)))
+    return true;
+  if (mode == BLKmode)
+    return true;
+  return false;
+}
+
+static reg_class_t
+epiphany_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			secondary_reload_info *sri)
+{
+  /* This could give more reload inheritance, but we are missing some
+     reload infrastructure.  */
+ if (0)
+  if (in_p && GET_CODE (x) == UNSPEC
+      && satisfies_constraint_Sra (x) && !satisfies_constraint_Rra (x))
+    {
+      gcc_assert (rclass == GENERAL_REGS);
+      sri->icode = CODE_FOR_reload_insi_ra;
+      return NO_REGS;
+    }
+  return NO_REGS;
+}
+
+bool
+epiphany_is_long_call_p (rtx x)
+{
+  tree decl = SYMBOL_REF_DECL (x);
+  bool ret_val = !TARGET_SHORT_CALLS;
+  tree attrs;
+
+  /* ??? Is it safe to default to ret_val if decl is NULL?  We should
+     probably encode information via encode_section_info, and also
+     have (an) option(s) to take SYMBOL_FLAG_LOCAL and/or SYMBOL_FLAG_EXTERNAL
+     into account.  */
+  if (decl)
+    {
+      attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+      if (lookup_attribute ("long_call", attrs))
+	ret_val = true;
+      else if (lookup_attribute ("short_call", attrs))
+	ret_val = false;
+    }
+  return ret_val;
+}
+
+bool
+epiphany_small16 (rtx x)
+{
+  rtx base = x;
+  rtx offs ATTRIBUTE_UNUSED = const0_rtx;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (x, 0), 0);
+      offs = XEXP (XEXP (x, 0), 1);
+    }
+  if (GET_CODE (base) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (base)
+      && epiphany_is_long_call_p (base))
+    return false;
+  return TARGET_SMALL16 != 0;
+}
+
+/* Return nonzero if it is ok to make a tail-call to DECL.  */
+static bool
+epiphany_function_ok_for_sibcall (tree decl, tree exp)
+{
+  bool cfun_interrupt_p, call_interrupt_p;
+
+  cfun_interrupt_p = EPIPHANY_INTERRUPT_P (epiphany_compute_function_type
+					(current_function_decl));
+  if (decl)
+    call_interrupt_p = EPIPHANY_INTERRUPT_P (epiphany_compute_function_type (decl));
+  else
+    {
+      tree fn_type = TREE_TYPE (CALL_EXPR_FN (exp));
+
+      gcc_assert (POINTER_TYPE_P (fn_type));
+      fn_type = TREE_TYPE (fn_type);
+      gcc_assert (TREE_CODE (fn_type) == FUNCTION_TYPE
+		  || TREE_CODE (fn_type) == METHOD_TYPE);
+      call_interrupt_p
+	= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (fn_type)) != NULL;
+    }
+
+  /* Don't tailcall from or to an ISR routine - although we could in
+     principle tailcall from one ISR routine to another, we'd need to
+     handle this in sibcall_epilogue to make it work.  */
+  if (cfun_interrupt_p || call_interrupt_p)
+    return false;
+
+  /* Everything else is ok.  */
+  return true;
+}
+
+/* T is a function declaration or the MEM_EXPR of a MEM passed to a call
+   expander.
+   Return true iff the type of T has the uninterruptible attribute.
+   If T is NULL, return false.  */
+bool
+epiphany_uninterruptible_p (tree t)
+{
+  tree attrs;
+
+  if (t)
+    {
+      attrs = TYPE_ATTRIBUTES (TREE_TYPE (t));
+      if (lookup_attribute ("disinterrupt", attrs))
+	return true;
+    }
+  return false;
+}
+
+bool
+epiphany_call_uninterruptible_p (rtx mem)
+{
+  rtx addr = XEXP (mem, 0);
+  tree t = NULL_TREE;
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    t = SYMBOL_REF_DECL (addr);
+  if (!t)
+    t = MEM_EXPR (mem);
+  return epiphany_uninterruptible_p (t);
+}
+
+static enum machine_mode
+epiphany_promote_function_mode (const_tree type, enum machine_mode mode,
+				int *punsignedp ATTRIBUTE_UNUSED,
+				const_tree funtype ATTRIBUTE_UNUSED,
+				int for_return ATTRIBUTE_UNUSED)
+{
+  int dummy;
+
+  return promote_mode (type, mode, &dummy);
+}
+
+static void
+epiphany_conditional_register_usage (void)
+{
+  int i;
+
+  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  if (TARGET_HALF_REG_FILE)
+    {
+      for (i = 32; i <= 63; i++)
+	{
+	  fixed_regs[i] = 1;
+	  call_used_regs[i] = 1;
+	}
+    }
+  if (epiphany_m1reg >= 0)
+    {
+      fixed_regs[epiphany_m1reg] = 1;
+      call_used_regs[epiphany_m1reg] = 1;
+    }
+  if (!TARGET_PREFER_SHORT_INSN_REGS)
+    CLEAR_HARD_REG_SET (reg_class_contents[SHORT_INSN_REGS]);
+  COPY_HARD_REG_SET (reg_class_contents[SIBCALL_REGS],
+		     reg_class_contents[GENERAL_REGS]);
+  /* It would be simpler and quicker if we could just use
+     AND_COMPL_HARD_REG_SET, alas, call_used_reg_set is yet uninitialized;
+     it is set up later by our caller.  */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (!call_used_regs[i])
+      CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], i);
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+/* On the EPIPHANY the first MAX_EPIPHANY_PARM_REGS args are normally in
+   registers and the rest are pushed.  */
+static rtx
+epiphany_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
+
+  if (PASS_IN_REG_P (cum, mode, type))
+    return gen_rtx_REG (mode, ROUND_ADVANCE_CUM (cum, mode, type));
+  return 0;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+static void
+epiphany_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum = ROUND_ADVANCE_CUM (*cum, mode, type) + ROUND_ADVANCE_ARG (mode, type);
+}
+
+/* Nested function support.
+   An epiphany trampoline looks like this:
+   mov r16,%low(fnaddr)
+   movt r16,%high(fnaddr)
+   mov ip,%low(cxt)
+   movt ip,%high(cxt)
+   jr r16  */
+
+#define EPIPHANY_LOW_RTX(X) \
+  (gen_rtx_IOR (SImode, \
+    gen_rtx_ASHIFT (SImode, \
+		    gen_rtx_AND (SImode, (X), GEN_INT (0xff)), GEN_INT (5)), \
+    gen_rtx_ASHIFT (SImode, \
+		    gen_rtx_AND (SImode, (X), GEN_INT (0xff00)), GEN_INT (12))))
+#define EPIPHANY_HIGH_RTX(X) \
+  EPIPHANY_LOW_RTX (gen_rtx_LSHIFTRT (SImode, (X), GEN_INT (16)))
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+static void
+epiphany_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
+
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 0)),
+		  gen_rtx_IOR (SImode, GEN_INT (0x4002000b),
+			       EPIPHANY_LOW_RTX (fnaddr)));
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 4)),
+		  gen_rtx_IOR (SImode, GEN_INT (0x5002000b),
+			       EPIPHANY_HIGH_RTX (fnaddr)));
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 8)),
+		  gen_rtx_IOR (SImode, GEN_INT (0x2002800b),
+			       EPIPHANY_LOW_RTX (cxt)));
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 12)),
+		  gen_rtx_IOR (SImode, GEN_INT (0x3002800b),
+			       EPIPHANY_HIGH_RTX (cxt)));
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 16)),
+		  GEN_INT (0x0802014f));
+}
+
+bool
+epiphany_optimize_mode_switching (int entity)
+{
+  if (MACHINE_FUNCTION (cfun)->sw_entities_processed & (1 << entity))
+    return false;
+  switch (entity)
+    {
+    case EPIPHANY_MSW_ENTITY_AND:
+    case EPIPHANY_MSW_ENTITY_OR:
+    case EPIPHANY_MSW_ENTITY_CONFIG:
+      return true;
+    case EPIPHANY_MSW_ENTITY_NEAREST:
+    case EPIPHANY_MSW_ENTITY_TRUNC:
+      return optimize > 0;
+    case EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN:
+      return MACHINE_FUNCTION (cfun)->unknown_mode_uses != 0;
+    case EPIPHANY_MSW_ENTITY_ROUND_KNOWN:
+      return (MACHINE_FUNCTION (cfun)->sw_entities_processed
+	      & (1 << EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN)) != 0;
+    case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS:
+      return optimize == 0 || current_pass == pass_mode_switch_use;
+    }
+  gcc_unreachable ();
+}
+
+int
+epiphany_mode_priority_to_mode (int entity, unsigned priority)
+{
+  if (entity == EPIPHANY_MSW_ENTITY_AND || entity == EPIPHANY_MSW_ENTITY_OR
+      || entity== EPIPHANY_MSW_ENTITY_CONFIG)
+    return priority;
+  if (priority > 3)
+    switch (priority)
+      {
+      case 4: return FP_MODE_ROUND_UNKNOWN;
+      case 5: return FP_MODE_NONE;
+      default: gcc_unreachable ();
+      }
+  switch ((enum attr_fp_mode) epiphany_normal_fp_mode)
+    {
+      case FP_MODE_INT:
+	switch (priority)
+	  {
+	  case 0: return FP_MODE_INT;
+	  case 1: return epiphany_normal_fp_rounding;
+	  case 2: return (epiphany_normal_fp_rounding == FP_MODE_ROUND_NEAREST
+			  ? FP_MODE_ROUND_TRUNC : FP_MODE_ROUND_NEAREST);
+	  case 3: return FP_MODE_CALLER;
+	  }
+      case FP_MODE_ROUND_NEAREST:
+      case FP_MODE_CALLER:
+	switch (priority)
+	  {
+	  case 0: return FP_MODE_ROUND_NEAREST;
+	  case 1: return FP_MODE_ROUND_TRUNC;
+	  case 2: return FP_MODE_INT;
+	  case 3: return FP_MODE_CALLER;
+	  }
+      case FP_MODE_ROUND_TRUNC:
+	switch (priority)
+	  {
+	  case 0: return FP_MODE_ROUND_TRUNC;
+	  case 1: return FP_MODE_ROUND_NEAREST;
+	  case 2: return FP_MODE_INT;
+	  case 3: return FP_MODE_CALLER;
+	  }
+      case FP_MODE_ROUND_UNKNOWN:
+      case FP_MODE_NONE:
+	gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+int
+epiphany_mode_needed (int entity, rtx insn)
+{
+  enum attr_fp_mode mode;
+
+  if (recog_memoized (insn) < 0)
+    {
+      if (entity == EPIPHANY_MSW_ENTITY_AND
+	  || entity == EPIPHANY_MSW_ENTITY_OR
+	  || entity == EPIPHANY_MSW_ENTITY_CONFIG)
+	return 2;
+      return FP_MODE_NONE;
+    }
+  mode = get_attr_fp_mode (insn);
+
+  switch (entity)
+  {
+  case EPIPHANY_MSW_ENTITY_AND:
+    return mode != FP_MODE_NONE && mode != FP_MODE_INT ? 1 : 2;
+  case EPIPHANY_MSW_ENTITY_OR:
+    return mode == FP_MODE_INT ? 1 : 2;
+  case EPIPHANY_MSW_ENTITY_CONFIG:
+    /* We must know/save config before we set it to something else.
+       Where we need the original value, we are fine with having it
+       just unchanged from the function start.
+       Because of the nature of the mode switching optimization,
+       a restore will be dominated by a clobber.  */
+    if (mode != FP_MODE_NONE && mode != FP_MODE_CALLER)
+      return 1;
+    /* A cpecial case are abnormal edges, which are deemed to clobber
+       the mode as well.  We need to pin this effect on a actually
+       dominating insn, and one where the frame can be accessed, too, in
+       case the pseudo used to save CONFIG doesn't get a hard register.  */
+    if (CALL_P (insn) && find_reg_note (insn, REG_EH_REGION, NULL_RTX))
+      return 1;
+    return 2;
+  case EPIPHANY_MSW_ENTITY_ROUND_KNOWN:
+    if (recog_memoized (insn) == CODE_FOR_set_fp_mode)
+      mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn);
+    /* Fall through.  */
+  case EPIPHANY_MSW_ENTITY_NEAREST:
+  case EPIPHANY_MSW_ENTITY_TRUNC:
+    if (mode == FP_MODE_ROUND_UNKNOWN)
+      {
+	MACHINE_FUNCTION (cfun)->unknown_mode_uses++;
+	return FP_MODE_NONE;
+      }
+    return mode;
+  case EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN:
+    if (mode == FP_MODE_ROUND_NEAREST || mode == FP_MODE_ROUND_TRUNC)
+	return FP_MODE_ROUND_UNKNOWN;
+    return mode;
+  case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS:
+    if (mode == FP_MODE_ROUND_UNKNOWN)
+      return epiphany_normal_fp_rounding;
+    return mode;
+  default:
+    gcc_unreachable ();
+  }
+}
+
+int
+epiphany_mode_entry_exit (int entity, bool exit)
+{
+  int normal_mode = epiphany_normal_fp_mode ;
+
+  MACHINE_FUNCTION (cfun)->sw_entities_processed |= (1 << entity);
+  if (epiphany_is_interrupt_p (current_function_decl))
+    normal_mode = FP_MODE_CALLER;
+  switch (entity)
+    {
+    case EPIPHANY_MSW_ENTITY_AND:
+      if (exit)
+	return normal_mode != FP_MODE_INT ? 1 : 2;
+      return 0;
+    case EPIPHANY_MSW_ENTITY_OR:
+      if (exit)
+	return normal_mode == FP_MODE_INT ? 1 : 2;
+      return 0;
+    case EPIPHANY_MSW_ENTITY_CONFIG:
+      if (exit)
+	return 2;
+      return normal_mode == FP_MODE_CALLER ? 0 : 1;
+    case EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN:
+      if (normal_mode == FP_MODE_ROUND_NEAREST
+	  || normal_mode == FP_MODE_ROUND_TRUNC)
+      return FP_MODE_ROUND_UNKNOWN;
+      /* Fall through.  */
+    case EPIPHANY_MSW_ENTITY_NEAREST:
+    case EPIPHANY_MSW_ENTITY_TRUNC:
+    case EPIPHANY_MSW_ENTITY_ROUND_KNOWN:
+    case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS:
+      return normal_mode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+int
+epiphany_mode_after (int entity, int last_mode, rtx insn)
+{
+  /* We have too few call-saved registers to hope to keep the masks across
+     calls.  */
+  if (entity == EPIPHANY_MSW_ENTITY_AND || entity == EPIPHANY_MSW_ENTITY_OR)
+    {
+      if (CALL_P (insn))
+	return 0;
+      return last_mode;
+    }
+  /* If there is an abnormal edge, we don't want the config register to
+     be 'saved' again at the destination.
+     The frame pointer adjustment is inside a PARALLEL because of the
+     flags clobber.  */
+  if (entity == EPIPHANY_MSW_ENTITY_CONFIG && NONJUMP_INSN_P (insn)
+      && GET_CODE (PATTERN (insn)) == PARALLEL
+      && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == SET
+      && SET_DEST (XVECEXP (PATTERN (insn), 0, 0)) == frame_pointer_rtx)
+    {
+      gcc_assert (cfun->has_nonlocal_label);
+      return 1;
+    }
+  if (recog_memoized (insn) < 0)
+    return last_mode;
+  if (get_attr_fp_mode (insn) == FP_MODE_ROUND_UNKNOWN
+      && last_mode != FP_MODE_ROUND_NEAREST && last_mode != FP_MODE_ROUND_TRUNC)
+    {
+      if (entity == EPIPHANY_MSW_ENTITY_NEAREST)
+	return FP_MODE_ROUND_NEAREST;
+      if (entity == EPIPHANY_MSW_ENTITY_TRUNC)
+	return FP_MODE_ROUND_TRUNC;
+    }
+  if (recog_memoized (insn) == CODE_FOR_set_fp_mode)
+    {
+      rtx src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      int fp_mode;
+
+      if (REG_P (src))
+	return FP_MODE_CALLER;
+      fp_mode = INTVAL (XVECEXP (XEXP (src, 0), 0, 0));
+      if (entity == EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN
+	  && (fp_mode == FP_MODE_ROUND_NEAREST
+	      || fp_mode == EPIPHANY_MSW_ENTITY_TRUNC))
+	return FP_MODE_ROUND_UNKNOWN;
+      return fp_mode;
+    }
+  return last_mode;
+}
+
+void
+emit_set_fp_mode (int entity, int mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
+{
+  rtx save_cc, cc_reg, mask, src, src2;
+  enum attr_fp_mode fp_mode;
+
+  if (!MACHINE_FUNCTION (cfun)->and_mask)
+    {
+      MACHINE_FUNCTION (cfun)->and_mask = gen_reg_rtx (SImode);
+      MACHINE_FUNCTION (cfun)->or_mask = gen_reg_rtx (SImode);
+    }
+  if (entity == EPIPHANY_MSW_ENTITY_AND)
+    {
+      gcc_assert (mode >= 0 && mode <= 2);
+      if (mode == 1)
+	emit_move_insn (MACHINE_FUNCTION (cfun)->and_mask,
+			gen_int_mode (0xfff1fffe, SImode));
+      return;
+    }
+  else if (entity == EPIPHANY_MSW_ENTITY_OR)
+    {
+      gcc_assert (mode >= 0 && mode <= 2);
+      if (mode == 1)
+	emit_move_insn (MACHINE_FUNCTION (cfun)->or_mask, GEN_INT(0x00080000));
+      return;
+    }
+  else if (entity == EPIPHANY_MSW_ENTITY_CONFIG)
+    {
+      /* Mode switching optimization is done after emit_initial_value_sets,
+	 so we have to take care of CONFIG_REGNUM here.  */
+      gcc_assert (mode >= 0 && mode <= 2);
+      rtx save = get_hard_reg_initial_val (SImode, CONFIG_REGNUM);
+      if (mode == 1)
+	emit_insn (gen_save_config (save));
+      return;
+    }
+  fp_mode = (enum attr_fp_mode) mode;
+  src = NULL_RTX;
+
+  switch (fp_mode)
+    {
+      case FP_MODE_CALLER:
+	/* The EPIPHANY_MSW_ENTITY_CONFIG processing must come later
+	   so that the config save gets inserted before the first use.  */
+	gcc_assert (entity > EPIPHANY_MSW_ENTITY_CONFIG);
+	src = get_hard_reg_initial_val (SImode, CONFIG_REGNUM);
+	mask = MACHINE_FUNCTION (cfun)->and_mask;
+	break;
+      case FP_MODE_ROUND_UNKNOWN:
+	MACHINE_FUNCTION (cfun)->unknown_mode_sets++;
+	mask = MACHINE_FUNCTION (cfun)->and_mask;
+	break;
+      case FP_MODE_ROUND_NEAREST:
+	if (entity == EPIPHANY_MSW_ENTITY_TRUNC)
+	  return;
+	mask = MACHINE_FUNCTION (cfun)->and_mask;
+	break;
+      case FP_MODE_ROUND_TRUNC:
+	if (entity == EPIPHANY_MSW_ENTITY_NEAREST)
+	  return;
+	mask = MACHINE_FUNCTION (cfun)->and_mask;
+	break;
+      case FP_MODE_INT:
+	mask = MACHINE_FUNCTION (cfun)->or_mask;
+	break;
+      case FP_MODE_NONE:
+      default:
+	gcc_unreachable ();
+    }
+  save_cc = gen_reg_rtx (CCmode);
+  cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+  emit_move_insn (save_cc, cc_reg);
+  mask = force_reg (SImode, mask);
+  if (!src)
+    {
+      rtvec v = gen_rtvec (1, GEN_INT (fp_mode));
+
+      src = gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, v, UNSPEC_FP_MODE));
+    }
+  if (entity == EPIPHANY_MSW_ENTITY_ROUND_KNOWN
+      || entity == EPIPHANY_MSW_ENTITY_FPU_OMNIBUS)
+    src2 = copy_rtx (src);
+  else
+    {
+      rtvec v = gen_rtvec (1, GEN_INT (FP_MODE_ROUND_UNKNOWN));
+
+      src2 = gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, v, UNSPEC_FP_MODE));
+    }
+  emit_insn (gen_set_fp_mode (src, src2, mask));
+  emit_move_insn (cc_reg, save_cc);
+}
+
+void
+epiphany_expand_set_fp_mode (rtx *operands)
+{
+  rtx ctrl = gen_rtx_REG (SImode, CONFIG_REGNUM);
+  rtx src = operands[0];
+  rtx mask_reg = operands[2];
+  rtx scratch = operands[3];
+  enum attr_fp_mode fp_mode;
+
+
+  gcc_assert (rtx_equal_p (src, operands[1])
+	      /* Sometimes reload gets silly and reloads the same pseudo
+		 into different registers.  */
+	      || (REG_P (src) && REG_P (operands[1])));
+
+  if (!epiphany_uninterruptible_p (current_function_decl))
+    emit_insn (gen_gid ());
+  emit_move_insn (scratch, ctrl);
+
+  if (GET_CODE (src) == REG)
+    {
+      /* FP_MODE_CALLER */
+      emit_insn (gen_xorsi3 (scratch, scratch, src));
+      emit_insn (gen_andsi3 (scratch, scratch, mask_reg));
+      emit_insn (gen_xorsi3 (scratch, scratch, src));
+    }
+  else
+    {
+      gcc_assert (GET_CODE (src) == CONST);
+      src = XEXP (src, 0);
+      fp_mode = (enum attr_fp_mode) INTVAL (XVECEXP (src, 0, 0));
+      switch (fp_mode)
+	{
+	case FP_MODE_ROUND_NEAREST:
+	  emit_insn (gen_andsi3 (scratch, scratch, mask_reg));
+	  break;
+	case FP_MODE_ROUND_TRUNC:
+	  emit_insn (gen_andsi3 (scratch, scratch, mask_reg));
+	  emit_insn (gen_add2_insn (scratch, const1_rtx));
+	  break;
+	case FP_MODE_INT:
+	  emit_insn (gen_iorsi3 (scratch, scratch, mask_reg));
+	  break;
+	case FP_MODE_CALLER:
+	case FP_MODE_ROUND_UNKNOWN:
+	case FP_MODE_NONE:
+	  gcc_unreachable ();
+	}
+    }
+  emit_move_insn (ctrl, scratch);
+  if (!epiphany_uninterruptible_p (current_function_decl))
+    emit_insn (gen_gie ());
+}
+
+void
+epiphany_insert_mode_switch_use (rtx insn,
+				 int entity ATTRIBUTE_UNUSED,
+				 int mode ATTRIBUTE_UNUSED)
+{
+  rtx pat = PATTERN (insn);
+  rtvec v;
+  int len, i;
+  rtx near = gen_rtx_REG (SImode, FP_NEAREST_REGNUM);
+  rtx trunc = gen_rtx_REG (SImode, FP_TRUNCATE_REGNUM);
+
+  if (entity != EPIPHANY_MSW_ENTITY_FPU_OMNIBUS)
+    return;
+  switch ((enum attr_fp_mode) get_attr_fp_mode (insn))
+    {
+      case FP_MODE_ROUND_NEAREST:
+	near = gen_rtx_USE (VOIDmode, near);
+	trunc = gen_rtx_CLOBBER (VOIDmode, trunc);
+	break;
+      case FP_MODE_ROUND_TRUNC:
+	near = gen_rtx_CLOBBER (VOIDmode, near);
+	trunc = gen_rtx_USE (VOIDmode, trunc);
+	break;
+      case FP_MODE_ROUND_UNKNOWN:
+	near = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FP_ANYFP_REGNUM));
+	trunc = copy_rtx (near);
+	/* Fall through.  */
+      case FP_MODE_INT:
+      case FP_MODE_CALLER:
+	near = gen_rtx_USE (VOIDmode, near);
+	trunc = gen_rtx_USE (VOIDmode, trunc);
+	break;
+      case FP_MODE_NONE:
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+  len = XVECLEN (pat, 0);
+  v = rtvec_alloc (len + 2);
+  for (i = 0; i < len; i++)
+    RTVEC_ELT (v, i) = XVECEXP (pat, 0, i);
+  RTVEC_ELT (v, len) = near;
+  RTVEC_ELT (v, len + 1) = trunc;
+  pat = gen_rtx_PARALLEL (VOIDmode, v);
+  PATTERN (insn) = pat;
+  MACHINE_FUNCTION (cfun)->control_use_inserted = true;
+}
+
+bool
+epiphany_epilogue_uses (int regno)
+{
+  if (regno == GPR_LR)
+    return true;
+  if (reload_completed && epiphany_is_interrupt_p (current_function_decl))
+    {
+      if (fixed_regs[regno]
+	  && regno != STATUS_REGNUM && regno != IRET_REGNUM
+	  && regno != FP_NEAREST_REGNUM && regno != FP_TRUNCATE_REGNUM)
+	return false;
+      return true;
+    }
+  if (regno == FP_NEAREST_REGNUM
+      && epiphany_normal_fp_mode != FP_MODE_ROUND_TRUNC)
+    return true;
+  if (regno == FP_TRUNCATE_REGNUM
+      && epiphany_normal_fp_mode != FP_MODE_ROUND_NEAREST)
+    return true;
+  return false;
+}
+
+static unsigned int
+epiphany_min_divisions_for_recip_mul (enum machine_mode mode)
+{
+  if (flag_reciprocal_math && mode == SFmode)
+    /* We'll expand into a multiply-by-reciprocal anyway, so we might a well do
+       it already at the tree level and expose it to further optimizations.  */
+    return 1;
+  return default_min_divisions_for_recip_mul (mode);
+}
+
+static enum machine_mode
+epiphany_preferred_simd_mode (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return TARGET_VECT_DOUBLE ? DImode : SImode;
+}
+
+static bool
+epiphany_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (mode == V2SFmode)
+    return true;
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+      && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8))
+    return true;
+  return false;
+}
+
+static bool
+epiphany_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+  /* Vectors which aren't in packed structures will not be less aligned than
+     the natural alignment of their element type, so this is safe.  */
+  if (TYPE_ALIGN_UNIT (type) == 4)
+    return !is_packed;
+
+  return default_builtin_vector_alignment_reachable (type, is_packed);
+}
+
+static bool
+epiphany_support_vector_misalignment (enum machine_mode mode, const_tree type,
+				      int misalignment, bool is_packed)
+{
+  if (GET_MODE_SIZE (mode) == 8 && misalignment % 4 == 0)
+    return true;
+  return default_builtin_support_vector_misalignment (mode, type, misalignment,
+						      is_packed);
+}
+
+/* STRUCTURE_SIZE_BOUNDARY seems a bit crude in how it enlarges small
+   structs.  Make structs double-word-aligned it they are a double word or
+   (potentially) larger;  failing that, do the same for a size of 32 bits.  */
+unsigned
+epiphany_special_round_type_align (tree type, unsigned computed,
+				   unsigned specified)
+{
+  unsigned align = MAX (computed, specified);
+  tree field;
+  HOST_WIDE_INT total, max;
+  unsigned try_align = FASTEST_ALIGNMENT;
+
+  if (maximum_field_alignment && try_align > maximum_field_alignment)
+    try_align = maximum_field_alignment;
+  if (align >= try_align)
+    return align;
+  for (max = 0, field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      tree offset, size;
+
+      if (TREE_CODE (field) != FIELD_DECL
+	  || TREE_TYPE (field) == error_mark_node)
+	continue;
+      offset = bit_position (field);
+      size = DECL_SIZE (field);
+      if (!tree_fits_uhwi_p (offset) || !tree_fits_uhwi_p (size)
+	  || tree_to_uhwi (offset) >= try_align
+	  || tree_to_uhwi (size) >= try_align)
+	return try_align;
+      total = tree_to_uhwi (offset) + tree_to_uhwi (size);
+      if (total > max)
+	max = total;
+    }
+  if (max >= (HOST_WIDE_INT) try_align)
+    align = try_align;
+  else if (try_align > 32 && max >= 32)
+    align = max > 32 ? 64 : 32;
+  return align;
+}
+
+/* Upping the alignment of arrays in structs is not only a performance
+   enhancement, it also helps preserve assumptions about how
+   arrays-at-the-end-of-structs work, like for struct gcov_fn_info in
+   libgcov.c .  */
+unsigned
+epiphany_adjust_field_align (tree field, unsigned computed)
+{
+  if (computed == 32
+      && TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE)
+    {
+      tree elmsz = TYPE_SIZE (TREE_TYPE (TREE_TYPE (field)));
+
+      if (!tree_fits_uhwi_p (elmsz) || tree_to_uhwi (elmsz) >= 32)
+	return 64;
+    }
+  return computed;
+}
+
+/* Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+static void
+epiphany_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT delta,
+			  HOST_WIDE_INT vcall_offset,
+			  tree function)
+{
+  int this_regno
+    = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
+  const char *this_name = reg_names[this_regno];
+  const char *fname;
+
+  /* We use IP and R16 as a scratch registers.  */
+  gcc_assert (call_used_regs [GPR_IP]);
+  gcc_assert (call_used_regs [GPR_16]);
+
+  /* Add DELTA.  When possible use a plain add, otherwise load it into
+     a register first. */
+  if (delta == 0)
+    ; /* Done.  */
+  else if (SIMM11 (delta))
+    asm_fprintf (file, "\tadd\t%s,%s,%d\n", this_name, this_name, (int) delta);
+  else if (delta < 0 && delta >= -0xffff)
+    {
+      asm_fprintf (file, "\tmov\tip,%d\n", (int) -delta);
+      asm_fprintf (file, "\tsub\t%s,%s,ip\n", this_name, this_name);
+    }
+  else
+    {
+      asm_fprintf (file, "\tmov\tip,%%low(%ld)\n", (long) delta);
+      if (delta & ~0xffff)
+	asm_fprintf (file, "\tmovt\tip,%%high(%ld)\n", (long) delta);
+      asm_fprintf (file, "\tadd\t%s,%s,ip\n", this_name, this_name);
+    }
+
+  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
+  if (vcall_offset != 0)
+    {
+      /* ldr ip,[this]		--> temp = *this
+	 ldr ip,[ip,vcall_offset] > temp = *(*this + vcall_offset)
+	 add this,this,ip	--> this+ = *(*this + vcall_offset) */
+      asm_fprintf (file, "\tldr\tip, [%s]\n", this_name);
+      if (vcall_offset < -0x7ff * 4 || vcall_offset > 0x7ff * 4
+	  || (vcall_offset & 3) != 0)
+	{
+	  asm_fprintf (file, "\tmov\tr16, %%low(%ld)\n", (long) vcall_offset);
+	  asm_fprintf (file, "\tmovt\tr16, %%high(%ld)\n", (long) vcall_offset);
+	  asm_fprintf (file, "\tldr\tip, [ip,r16]\n");
+	}
+      else
+	asm_fprintf (file, "\tldr\tip, [ip,%d]\n", (int) vcall_offset / 4);
+      asm_fprintf (file, "\tadd\t%s, %s, ip\n", this_name, this_name);
+    }
+
+  fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
+  if (epiphany_is_long_call_p (XEXP (DECL_RTL (function), 0)))
+    {
+      fputs ("\tmov\tip,%low(", file);
+      assemble_name (file, fname);
+      fputs (")\n\tmovt\tip,%high(", file);
+      assemble_name (file, fname);
+      fputs (")\n\tjr ip\n", file);
+    }
+  else
+    {
+      fputs ("\tb\t", file);
+      assemble_name (file, fname);
+      fputc ('\n', file);
+    }
+}
+
+void
+epiphany_start_function (FILE *file, const char *name, tree decl)
+{
+  /* If the function doesn't fit into the on-chip memory, it will have a
+     section attribute - or lack of it - that denotes it goes somewhere else.
+     But the architecture spec says that an interrupt vector still has to
+     point to on-chip memory.  So we must place a jump there to get to the
+     actual function implementation.  The forwarder_section attribute
+     specifies the section where this jump goes.
+     This mechanism can also be useful to have a shortcall destination for
+     a function that is actually placed much farther away.  */
+  tree attrs, int_attr, int_names, int_name, forwarder_attr;
+
+  attrs = DECL_ATTRIBUTES (decl);
+  int_attr = lookup_attribute ("interrupt", attrs);
+  if (int_attr)
+    for (int_names = TREE_VALUE (int_attr); int_names;
+	 int_names = TREE_CHAIN (int_names))
+      {
+	char buf[99];
+
+	int_name = TREE_VALUE (int_names);
+	sprintf (buf, "ivt_entry_%.80s", TREE_STRING_POINTER (int_name));
+	switch_to_section (get_section (buf, SECTION_CODE, decl));
+	fputs ("\tb\t", file);
+	assemble_name (file, name);
+	fputc ('\n', file);
+      }
+  forwarder_attr = lookup_attribute ("forwarder_section", attrs);
+  if (forwarder_attr)
+    {
+      const char *prefix = "__forwarder_dst_";
+      char *dst_name = (char *) alloca (strlen (prefix) + strlen (name) + 1);
+
+      strcpy (dst_name, prefix);
+      strcat (dst_name, name);
+      forwarder_attr = TREE_VALUE (TREE_VALUE (forwarder_attr));
+      switch_to_section (get_section (TREE_STRING_POINTER (forwarder_attr),
+			 SECTION_CODE, decl));
+      ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
+      if (epiphany_is_long_call_p (XEXP (DECL_RTL (decl), 0)))
+	{
+	  int tmp = GPR_0;
+
+	  if (int_attr)
+	    fputs ("\tstrd r0,[sp,-1]\n", file);
+	  else
+	    tmp = GPR_16;
+	  gcc_assert (call_used_regs[tmp]);
+	  fprintf (file, "\tmov r%d,%%low(", tmp);
+	  assemble_name (file, dst_name);
+	  fprintf (file, ")\n"
+		   "\tmovt r%d,%%high(", tmp);
+	  assemble_name (file, dst_name);
+	  fprintf (file, ")\n"
+		 "\tjr r%d\n", tmp);
+	}
+      else
+	{
+	  fputs ("\tb\t", file);
+	  assemble_name (file, dst_name);
+	  fputc ('\n', file);
+	}
+      name = dst_name;
+    }
+  switch_to_section (function_section (decl));
+  ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.h b/gcc-4.9/gcc/config/epiphany/epiphany.h
new file mode 100644
index 000000000..cffb00c03
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany.h
@@ -0,0 +1,945 @@
+/* Definitions of target machine for GNU compiler, Argonaut EPIPHANY cpu.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_EPIPHANY_H
+#define GCC_EPIPHANY_H
+
+#undef LINK_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__epiphany__");	\
+        builtin_define ("__little_endian__");	\
+	builtin_define_with_int_value ("__EPIPHANY_STACK_OFFSET__", \
+				       epiphany_stack_offset); \
+	builtin_assert ("cpu=epiphany");	\
+	builtin_assert ("machine=epiphany");	\
+    } while (0)
+
+/* Pick up the libgloss library. One day we may do this by linker script, but
+   for now its static.
+   libgloss might use errno/__errno, which might not have been needed when we
+   saw libc the first time, so link with libc a second time.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{g*:-lg} %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}} -lepiphany %{!shared:%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+#define LINK_SPEC "%{v}"
+
+#define STARTFILE_SPEC "%{!shared:crt0.o%s} crti.o%s " \
+  "%{mfp-mode=int:crtint.o%s} %{mfp-mode=truncate:crtrunc.o%s} " \
+  "%{m1reg-r43:crtm1reg-r43.o%s} %{m1reg-r63:crtm1reg-r63.o%s} " \
+  "crtbegin.o%s"
+
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#define EPIPHANY_LIBRARY_EXTRA_SPEC \
+  "-ffixed-r40 -ffixed-r41 -ffixed-r42 -ffixed-r43"
+
+/* In the "spec:" rule,, t-epiphany changes this to epiphany_library_stub_spec
+   and epiphany_library_extra_spec, respectively.  */
+#define EXTRA_SPECS \
+  { "epiphany_library_extra_spec", "" }, \
+  { "epiphany_library_build_spec", EPIPHANY_LIBRARY_EXTRA_SPEC }, \
+
+#define DRIVER_SELF_SPECS " %(epiphany_library_extra_spec) "
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+   asm (SECTION_OP "\n\
+	mov r0,%low(" USER_LABEL_PREFIX #FUNC")\n\
+	movt r0,%high(" USER_LABEL_PREFIX #FUNC")\n\
+	jalr r0\n\
+	.text");
+
+#if 0 /* We would like to use Posix for profiling, but the simulator
+	 interface still lacks mkdir.  */
+#define TARGET_POSIX_IO
+#endif
+
+/* Target machine storage layout.  */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+/* It is far faster to zero extend chars than to sign extend them */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)      	\
+    {						\
+      if (MODE == QImode)			\
+	UNSIGNEDP = 1;				\
+      else if (MODE == HImode)			\
+	UNSIGNEDP = 1;				\
+      (MODE) = SImode;				\
+    }
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 64
+
+/* ALIGN FRAMES on word boundaries */
+#define EPIPHANY_STACK_ALIGN(LOC) (((LOC)+7) & ~7)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+/* This is bigger than currently necessary for the EPIPHANY.  If 8 byte floats are
+   ever added it's not clear whether they'll need such alignment or not.  For
+   now we assume they will.  We can always relax it if necessary but the
+   reverse isn't true.  */
+#define BIGGEST_ALIGNMENT 64
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 64
+
+#define MALLOC_ABI_ALIGNMENT BIGGEST_ALIGNMENT
+
+/* Make strings dword-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars dword-aligned for the same reasons.
+   Also, align arrays of SImode items.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT		\
+   ? FASTEST_ALIGNMENT				\
+   : (TREE_CODE (TYPE) == ARRAY_TYPE		\
+      && TYPE_MODE (TREE_TYPE (TYPE)) == SImode	\
+      && (ALIGN) < FASTEST_ALIGNMENT)		\
+   ? FASTEST_ALIGNMENT				\
+   : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+/* On the EPIPHANY the lower address bits are masked to 0 as necessary.  The chip
+   won't croak when given an unaligned address, but the insn will still fail
+   to produce the correct result.  */
+#define STRICT_ALIGNMENT 1
+
+/* layout_type overrides our ADJUST_ALIGNMENT settings from epiphany-modes.def
+   for vector modes, so we have to override it back.  */
+#define ROUND_TYPE_ALIGN(TYPE, MANGLED_ALIGN, SPECIFIED_ALIGN) \
+ (TREE_CODE (TYPE) == VECTOR_TYPE && !TYPE_USER_ALIGN (TYPE) \
+  && SPECIFIED_ALIGN <= GET_MODE_ALIGNMENT (TYPE_MODE (TYPE)) \
+  ? GET_MODE_ALIGNMENT (TYPE_MODE (TYPE)) \
+  : ((TREE_CODE (TYPE) == RECORD_TYPE \
+      || TREE_CODE (TYPE) == UNION_TYPE \
+      || TREE_CODE (TYPE) == QUAL_UNION_TYPE) \
+     && !TYPE_PACKED (TYPE)) \
+  ? epiphany_special_round_type_align ((TYPE), (MANGLED_ALIGN), \
+				       (SPECIFIED_ALIGN)) \
+  : MAX ((MANGLED_ALIGN), (SPECIFIED_ALIGN)))
+
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+  epiphany_adjust_field_align((FIELD), (COMPUTED))
+
+/* Layout of source language data types.  */
+
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+#define WCHAR_TYPE "unsigned int"
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define FIRST_PSEUDO_REGISTER 78
+
+
+/* General purpose registers.  */
+#define GPR_FIRST       0                       /* First gpr */
+
+#define PIC_REGNO       (GPR_FIRST + 28)        /* PIC register.  */
+#define GPR_LAST        (GPR_FIRST + 63)        /* Last gpr */
+#define CORE_CONTROL_FIRST CONFIG_REGNUM
+#define CORE_CONTROL_LAST IRET_REGNUM
+
+#define GPR_P(R)	IN_RANGE (R, GPR_FIRST, GPR_LAST)
+#define GPR_OR_AP_P(R)	(GPR_P (R) || (R) == ARG_POINTER_REGNUM)
+
+#define GPR_OR_PSEUDO_P(R)	(GPR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define GPR_AP_OR_PSEUDO_P(R)	(GPR_OR_AP_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+
+#define FIXED_REGISTERS							\
+{	/* Integer Registers */						\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 000-007, gr0  - gr7  */	\
+	0, 0, 0, 0, 0, 1, 0, 0,		/* 008-015, gr8  - gr15 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 016-023, gr16 - gr23 */	\
+	0, 0, 0, 0, 1, 1, 1, 1,		/* 024-031, gr24 - gr31 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 032-039, gr32 - gr39 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 040-047, gr40 - gr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 048-055, gr48 - gr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 056-063, gr56 - gr63 */	\
+	/* Other registers */						\
+	1,				/* 64 AP   - fake arg ptr */	\
+	1,				/* soft frame pointer */	\
+        1,				/* CC_REGNUM  - integer conditions */\
+	1,				/* CCFP_REGNUM  - fp conditions */\
+	1, 1, 1, 1, 1, 1,               /* Core Control Registers.  */  \
+	1, 1, 1,			/* FP_{NEAREST,...}_REGNUM */\
+	1,				/* UNKNOWN_REGNUM - placeholder.  */\
+}
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
+   general) by function calls as well as for fixed registers.  This macro
+   therefore identifies the registers that are not available for general
+   allocation of values that must live across function calls.
+
+   If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically
+   saves it on function entry and restores it on function exit, if the register
+   is used within the function.  */
+
+#define CALL_USED_REGISTERS						\
+{	/* Integer Registers */						\
+	1, 1, 1, 1, 0, 0, 0, 0,	        /* 000-007, gr0  - gr7  */	\
+	0, 0, 0, 0, 1, 1, 1, 0,		/* 008-015, gr8  - gr15 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 016-023, gr16 - gr23 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 024-031, gr24 - gr31 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 032-039, gr32 - gr38 */	\
+	0, 0, 0, 0, 1, 1, 1, 1,		/* 040-047, gr40 - gr47 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 048-055, gr48 - gr55 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 056-063, gr56 - gr63 */	\
+	1,				/* 64 AP   - fake arg ptr */	\
+	1,				/* soft frame pointer */	\
+	1,				/* 66 CC_REGNUM */   		\
+	1,				/* 67 CCFP_REGNUM */   		\
+	1, 1, 1, 1, 1, 1,               /* Core Control Registers.  */  \
+	1, 1, 1,			/* FP_{NEAREST,...}_REGNUM */\
+	1,				/* UNKNOWN_REGNUM - placeholder.  */\
+}
+
+#define REG_ALLOC_ORDER \
+  { \
+    0, 1, 2, 3, /* Caller-saved 'small' registers.  */ \
+    12, /* Caller-saved unpaired register.  */ \
+    /* Caller-saved registers.  */ \
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, \
+    44, 45, 46, 47, \
+    48, 49, 50, 51, 52, 53, 54, 55, \
+    56, 57, 58, 59, 60, 61, 62, 63, \
+    4, 5, 6, 7, /* Calle-saved 'small' registers.  */ \
+    15, /* Calle-saved unpaired register.  */ \
+    8, 9, 10, 11, /* Calle-saved registers.  */ \
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, \
+    14, 13, /* Link register, stack pointer.  */ \
+    /* Can't allocate, but must name these... */ \
+    28, 29, 30, 31, \
+    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77 \
+  }
+
+#define HARD_REGNO_RENAME_OK(SRC, DST) epiphany_regno_rename_ok (SRC, DST)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+extern const unsigned int epiphany_hard_regno_mode_ok[];
+extern unsigned int epiphany_mode_class[];
+#define HARD_REGNO_MODE_OK(REGNO, MODE) hard_regno_mode_ok((REGNO), (MODE))
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+/* Register classes and constants.  */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It is important that any condition codes have class NO_REGS.
+   See `register_operand'.  */
+
+enum reg_class {
+  NO_REGS,
+  LR_REGS,
+  SHORT_INSN_REGS,
+  SIBCALL_REGS,
+  GENERAL_REGS,
+  CORE_CONTROL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES \
+{			\
+  "NO_REGS",		\
+  "LR_REGS",		\
+  "SHORT_INSN_REGS",	\
+  "SIBCALL_REGS",	\
+  "GENERAL_REGS",	\
+  "CORE_CONTROL_REGS",	\
+  "ALL_REGS"		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS						\
+{  /* r0-r31    r32-r63  ap/sfp/cc1/cc2/iret/status */			\
+  { 0x00000000,0x00000000,0x0},  /* NO_REGS  */				\
+  { 0x00004000,0x00000000,0x0},  /* LR_REGS  */				\
+  { 0x000000ff,0x00000000,0x0},  /* SHORT_INSN_REGS */			\
+  { 0xffff100f,0xffffff00,0x0},  /* SIBCALL_REGS */			\
+  { 0xffffffff,0xffffffff,0x0003}, /* GENERAL_REGS */			\
+  { 0x00000000,0x00000000,0x03f0}, /* CORE_CONTROL_REGS */		\
+  { 0xffffffff,0xffffffff,0x3fff}, /* ALL_REGS */				\
+}
+
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+extern enum reg_class epiphany_regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) \
+(epiphany_regno_reg_class[REGNO])
+
+/* The class value for index registers, and the one for base regs.  */
+#define BASE_REG_CLASS GENERAL_REGS
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+((REGNO) < FIRST_PSEUDO_REGISTER || (unsigned) reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER)
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+((REGNO) < FIRST_PSEUDO_REGISTER || (unsigned) reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER)
+
+
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+#define PREFERRED_RELOAD_CLASS(X,CLASS) \
+(CLASS)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The letters I, J, K, L, M, N, O, P in a register constraint string
+   can be used to stand for particular ranges of immediate operands.
+   This macro defines what the ranges are.
+   C is the letter, and VALUE is a constant value.
+   Return 1 if VALUE is in the range specified by C.  */
+
+/* 'I' is used for 16 bit unsigned.
+   'Cal' is used for long immediates (32 bits)
+   'K' is used for any constant up to 5 bits.
+   'L' is used for any 11 bit signed.
+*/
+
+#define IMM16(X)     (IN_RANGE ((X), 0, 0xFFFF))
+#define SIMM16(X)    (IN_RANGE ((X), -65536, 65535))
+#define SIMM11(X)    (IN_RANGE ((X), -1024, 1023))
+#define IMM5(X)      (IN_RANGE ((X), 0, 0x1F))
+
+typedef struct GTY (()) machine_function
+{
+  unsigned args_parsed : 1;
+  unsigned pretend_args_odd : 1;
+  unsigned lr_clobbered : 1;
+  unsigned control_use_inserted : 1;
+  unsigned lr_slot_known : 1;
+  unsigned sw_entities_processed : 6;
+  long lr_slot_offset;
+  rtx and_mask;
+  rtx or_mask;
+  unsigned unknown_mode_uses;
+  unsigned unknown_mode_sets;
+} machine_function_t;
+
+#define MACHINE_FUNCTION(fun) (fun)->machine
+
+#define INIT_EXPANDERS epiphany_init_expanders ()
+
+/* Stack layout and stack pointer usage.  */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET epiphany_stack_offset
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET epiphany_stack_offset
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* 4 bytes for each of previous fp, return address, and previous gp.
+   4 byte reserved area for future considerations.  */
+#define FIRST_PARM_OFFSET(FNDECL) \
+  (epiphany_stack_offset \
+   + (MACHINE_FUNCTION (DECL_STRUCT_FUNCTION (FNDECL))->pretend_args_odd \
+      ? 4 : 0))
+
+#define INCOMING_FRAME_SP_OFFSET epiphany_stack_offset
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM GPR_SP
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM GPR_FP
+
+/* Register in which static-chain is passed to a function.  This must
+   not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM GPR_IP
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define ELIMINABLE_REGS						\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},                   \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = epiphany_initial_elimination_offset ((FROM), (TO)))
+
+/* Function argument passing.  */
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   `current_function_outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+((CUM) = 0)
+
+/* The number of registers used for parameter passing.  Local to this file.  */
+#define MAX_EPIPHANY_PARM_REGS 4
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) \
+((unsigned) (N) < MAX_EPIPHANY_PARM_REGS)
+
+/* Return boolean indicating arg of type TYPE and mode MODE will be passed in
+   a reg.  This includes arguments that have to be passed by reference as the
+   pointer to them is passed in a reg if one is available (and that is what
+   we're given).
+   This macro is only used in this file.  */
+/* We must use partial argument passing because of the chosen mode
+   of varargs handling.  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)) < MAX_EPIPHANY_PARM_REGS)
+
+/* Tell GCC to use TARGET_RETURN_IN_MEMORY.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 1
+
+#define EPILOGUE_USES(REGNO) epiphany_epilogue_uses (REGNO)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/* Given an rtx for the frame pointer,
+   return an rtx for the address of the frame.  */
+#define FRAME_ADDR_RTX(frame) \
+  ((frame) == hard_frame_pointer_rtx ? arg_pointer_rtx : NULL)
+
+#define EPIPHANY_RETURN_REGNO \
+  ((current_function_decl != NULL \
+    && epiphany_is_interrupt_p (current_function_decl)) \
+   ? IRET_REGNUM : GPR_LR)
+/* This is not only for dwarf unwind info, but also for the benefit of
+   df-scan.c to tell it that LR is live at the function start.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, EPIPHANY_RETURN_REGNO)
+
+/* However, we haven't implemented the rest needed for dwarf2 unwind info.  */
+#define DWARF2_UNWIND_INFO 0
+
+#define RETURN_ADDR_RTX(count, frame) \
+  (count ? NULL_RTX \
+   : gen_rtx_UNSPEC (SImode, gen_rtvec (1, const0_rtx), UNSPEC_RETURN_ADDR))
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (EPIPHANY_RETURN_REGNO)
+
+/* Trampolines.
+   An epiphany trampoline looks like this:
+   mov r16,%low(fnaddr)
+   movt r16,%high(fnaddr)
+   mov ip,%low(cxt)
+   movt ip,%high(cxt)
+   jr r16  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE 20
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* We have post_modify (load/store with update).  */
+#define HAVE_POST_INCREMENT TARGET_POST_INC
+#define HAVE_POST_DECREMENT TARGET_POST_INC
+#define HAVE_POST_MODIFY_DISP TARGET_POST_MODIFY
+#define HAVE_POST_MODIFY_REG TARGET_POST_MODIFY
+
+/* Currently, the only users of the USE_*CREMENT macros are
+   move_by_pieces / store_by_pieces_1 .  We don't want them to use
+   POST_MODIFY modes, because we got ample addressing range for the
+   reg+offset addressing mode; besides, there are short index+offset loads,
+   but the only short post-modify load uses POST_MODIFY_REG.
+   Moreover, using auto-increment in move_by_pieces from structure copying
+   in the prologue causes confused debug output.
+   If another pass starts using these macros where the use of these
+   addressing modes would make more sense, we can try checking the
+   current pass.  */
+#define USE_LOAD_POST_INCREMENT(MODE) 0
+#define USE_LOAD_POST_DECREMENT(MODE) 0
+#define USE_STORE_POST_INCREMENT(MODE) 0
+#define USE_STORE_POST_DECREMENT(MODE) 0
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) \
+(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
+ || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST)
+
+#define RTX_OK_FOR_OFFSET_P(MODE, X) \
+  RTX_OK_FOR_OFFSET_1 (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \
+		       && epiphany_vect_align == 4 ? SImode : (MODE), X)
+#define RTX_OK_FOR_OFFSET_1(MODE, X) \
+  (GET_CODE (X) == CONST_INT \
+   && !(INTVAL (X) & (GET_MODE_SIZE (MODE) - 1)) \
+   && INTVAL (X) >= -2047 * (int) GET_MODE_SIZE (MODE) \
+   && INTVAL (X) <=  2047 * (int) GET_MODE_SIZE (MODE))
+
+/* Frame offsets cannot be evaluated till the frame pointer is eliminated.  */
+#define RTX_FRAME_OFFSET_P(X) \
+  ((X) == frame_pointer_rtx \
+   || (GET_CODE (X) == PLUS && XEXP ((X), 0) == frame_pointer_rtx \
+       && CONST_INT_P (XEXP ((X), 1))))
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+#define SELECT_CC_MODE(OP, X, Y) \
+  epiphany_select_cc_mode (OP, X, Y)
+
+/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+
+#define REVERSE_CONDITION(CODE, MODE) \
+  ((MODE) == CC_FPmode || (MODE) == CC_FP_EQmode || (MODE) == CC_FP_GTEmode \
+   || (MODE) == CC_FP_ORDmode || (MODE) == CC_FP_UNEQmode \
+   ? reverse_condition_maybe_unordered (CODE) \
+   : (MODE) == CCmode ? reverse_condition (CODE) \
+   : UNKNOWN)
+
+/* We can reverse all CCmodes with REVERSE_CONDITION.  */
+#define REVERSIBLE_CC_MODE(MODE) \
+  ((MODE) == CCmode || (MODE) == CC_FPmode || (MODE) == CC_FP_EQmode \
+   || (MODE) == CC_FP_GTEmode || (MODE) == CC_FP_ORDmode \
+   || (MODE) == CC_FP_UNEQmode)
+
+/* Costs.  */
+
+/* The cost of a branch insn.  */
+/* ??? What's the right value here?  Branches are certainly more
+   expensive than reg->reg moves.  */
+#define BRANCH_COST(speed_p, predictable_p) \
+  (speed_p ? epiphany_branch_cost : 1)
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+/* On the EPIPHANY, calling through registers is slow.  */
+#define NO_FUNCTION_CSE
+
+/* Section selection.  */
+/* WARNING: These section names also appear in dwarf2out.c.  */
+
+#define TEXT_SECTION_ASM_OP	"\t.section .text"
+#define DATA_SECTION_ASM_OP	"\t.section .data"
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section .rodata"
+
+#define BSS_SECTION_ASM_OP	"\t.section .bss"
+
+/* Define this macro if jump tables (for tablejump insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.
+   This macro is irrelevant if there is no separate readonly data section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+/* PIC */
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  In some cases this register is defined by a
+   processor's ``application binary interface'' (ABI).  When this macro
+   is defined, RTL is generated for this register once, as with the stack
+   pointer and frame pointer registers.  If this macro is not defined, it
+   is up to the machine-dependent files to allocate such a register (if
+   necessary).  */
+#define PIC_OFFSET_TABLE_REGNUM  (flag_pic ? PIC_REGNO : INVALID_REGNUM)
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will
+   end at the end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF ""
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES							\
+{									\
+  "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",	\
+  "r8",  "r9",  "r10", "fp",  "ip",  "sp",  "lr",  "r15",	\
+  "r16",  "r17","r18", "r19", "r20", "r21", "r22", "r23",	\
+  "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",	\
+  "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",	\
+  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",	\
+  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",	\
+  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",	\
+  "ap",  "sfp", "cc1", "cc2",					\
+  "config", "status", "lc", "ls", "le", "iret",			\
+  "fp_near", "fp_trunc", "fp_anyfp", "unknown"			\
+}
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  epiphany_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#define LOCAL_LABEL_PREFIX  "."
+
+/* A C expression which evaluates to true if CODE is a valid
+   punctuation character for use in the `PRINT_OPERAND' macro.  */
+extern char epiphany_punct_chars[256];
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+  epiphany_punct_chars[(unsigned char) (CHAR)]
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+do { \
+  if (CASE_VECTOR_MODE == Pmode) \
+    asm_fprintf ((FILE), "\t.word %LL%d\n", (VALUE)); \
+  else \
+    asm_fprintf ((FILE), "\t.short %LL%d\n", (VALUE)); \
+} while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+do {							\
+  if (CASE_VECTOR_MODE == Pmode) \
+    asm_fprintf ((FILE), "\t.word"); \
+  else \
+    asm_fprintf ((FILE), "\t.short"); \
+  asm_fprintf ((FILE), " %LL%d-%LL%d\n", (VALUE), (REL)); \
+} while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE, LOG) \
+do { if ((LOG) != 0) fprintf (FILE, "\t.balign %d\n", 1 << (LOG)); } while (0)
+
+/* Inside the text section, align with nops rather than zeros.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE, LOG) \
+do \
+{ \
+  if ((LOG) != 0) fprintf (FILE, "\t.balignw %d,0x01a2\n", 1 << (LOG)); \
+} while (0)
+
+/* This is how to declare the size of a function.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do									\
+    {									\
+      const char *__name = (FNAME);					\
+      tree attrs = DECL_ATTRIBUTES ((DECL));				\
+									\
+      if (!flag_inhibit_size_directive)					\
+	{								\
+	  if (lookup_attribute ("forwarder_section", attrs))			\
+	    {								\
+	      const char *prefix = "__forwarder_dst_";			\
+	      char *dst_name						\
+		= (char *) alloca (strlen (prefix) + strlen (__name) + 1); \
+									\
+	      strcpy (dst_name, prefix);				\
+	      strcat (dst_name, __name);				\
+	      __name = dst_name;					\
+	    }								\
+	  ASM_OUTPUT_MEASURED_SIZE ((FILE), __name);			\
+	}								\
+    }									\
+  while (0)
+
+/* Debugging information.  */
+
+/* Generate DBX and DWARF debugging information.  */
+#define DBX_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Turn off splitting of long stabs.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Miscellaneous.  */
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (TARGET_SMALL16 && optimize_size ? HImode : Pmode)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+
+#define Pmode SImode
+
+/* A function address in a call instruction.  */
+#define FUNCTION_MODE SImode
+
+/* EPIPHANY function types.  */
+enum epiphany_function_type
+{
+  EPIPHANY_FUNCTION_UNKNOWN, EPIPHANY_FUNCTION_NORMAL,
+  EPIPHANY_FUNCTION_INTERRUPT
+};
+
+#define EPIPHANY_INTERRUPT_P(TYPE) ((TYPE) == EPIPHANY_FUNCTION_INTERRUPT)
+
+/* Compute the type of a function from its DECL.  */
+
+#define IMMEDIATE_PREFIX "#"
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \
+  (epiphany_optimize_mode_switching (ENTITY))
+
+/* We have two fake entities for lazy code motion of the mask constants,
+   one entity each for round-to-nearest / truncating
+   with a different idea what FP_MODE_ROUND_UNKNOWN will be, and
+   finally an entity that runs in a second mode switching pass to
+   resolve FP_MODE_ROUND_UNKNOWN.  */
+#define NUM_MODES_FOR_MODE_SWITCHING \
+  { 2, 2, 2, \
+    FP_MODE_NONE, FP_MODE_NONE, FP_MODE_NONE, FP_MODE_NONE, FP_MODE_NONE }
+
+#define MODE_NEEDED(ENTITY, INSN) epiphany_mode_needed((ENTITY), (INSN))
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) \
+  (epiphany_mode_priority_to_mode ((ENTITY), (N)))
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+  emit_set_fp_mode ((ENTITY), (MODE), (HARD_REGS_LIVE))
+
+#define MODE_ENTRY(ENTITY) (epiphany_mode_entry_exit ((ENTITY), false))
+#define MODE_EXIT(ENTITY) (epiphany_mode_entry_exit ((ENTITY), true))
+#define MODE_AFTER(ENTITY, LAST_MODE, INSN) \
+  (epiphany_mode_after ((ENTITY), (LAST_MODE), (INSN)))
+
+#define TARGET_INSERT_MODE_SWITCH_USE epiphany_insert_mode_switch_use
+
+/* Mode switching entities.  */
+enum
+{
+  EPIPHANY_MSW_ENTITY_AND,
+  EPIPHANY_MSW_ENTITY_OR,
+  EPIPHANY_MSW_ENTITY_CONFIG, /* 1 means config is known or saved.  */
+  EPIPHANY_MSW_ENTITY_NEAREST,
+  EPIPHANY_MSW_ENTITY_TRUNC,
+  EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN,
+  EPIPHANY_MSW_ENTITY_ROUND_KNOWN,
+  EPIPHANY_MSW_ENTITY_FPU_OMNIBUS,
+  EPIPHANY_MSW_ENTITY_NUM
+};
+
+extern int epiphany_normal_fp_rounding;
+#ifndef IN_LIBGCC2
+extern rtl_opt_pass *make_pass_mode_switch_use (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_resolve_sw_modes (gcc::context *ctxt);
+#endif
+
+/* This will need to be adjusted when FP_CONTRACT_ON is properly
+   implemented.  */
+#define TARGET_FUSED_MADD (flag_fp_contract_mode == FP_CONTRACT_FAST)
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+  epiphany_start_function ((FILE), (NAME), (DECL))
+
+#endif /* !GCC_EPIPHANY_H */
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.md b/gcc-4.9/gcc/config/epiphany/epiphany.md
new file mode 100644
index 000000000..2844eeea7
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany.md
@@ -0,0 +1,2812 @@
+;; Machine description of the Adaptiva epiphany cpu for GNU C compiler
+;; Copyright (C) 1994-2014 Free Software Foundation, Inc.
+;; Contributed by Embecosm on behalf of Adapteva, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+(define_constants
+  [(GPR_0			 0)
+   (GPR_1			 1)
+   (GPR_FP			11)
+   (GPR_IP			12)
+   (GPR_SP			13)
+   (GPR_LR			14)
+   (GPR_16			16)
+   (GPR_18			18)
+   (GPR_20			20)
+   (ARG_POINTER_REGNUM		64)
+   (FRAME_POINTER_REGNUM	65)
+   (CC_REGNUM			66)   ;; 66 or 17
+   (CCFP_REGNUM			67)   ;; 67 or 18
+   (CONFIG_REGNUM		68)
+   (STATUS_REGNUM		69)
+   (LC_REGNUM			70)
+   (LS_REGNUM			71)
+   (LE_REGNUM			72)
+   (IRET_REGNUM			73)
+   (FP_NEAREST_REGNUM		74)
+   (FP_TRUNCATE_REGNUM		75)
+   (FP_ANYFP_REGNUM		76)
+   (UNKNOWN_REGNUM		77) ; used for addsi3_r and friends
+   ; We represent the return address as an unspec rather than a reg.
+   ; If we used a reg, we could use register elimination, but eliminating
+   ; to GPR_LR would make the latter visible to dataflow, thus making it
+   ; harder to determine when it must be saved.
+   (UNSPEC_RETURN_ADDR		 0)
+   (UNSPEC_FP_MODE		 1)
+
+   (UNSPECV_GID			 0)
+   (UNSPECV_GIE			 1)])
+
+;; Insn type.  Used to default other attribute values.
+
+(define_attr "type"
+  "move,load,store,cmove,unary,compare,shift,mul,uncond_branch,branch,call,fp,fp_int,v2fp,misc,sfunc,fp_sfunc,flow"
+  (const_string "misc"))
+
+;; Length (in # bytes)
+
+(define_attr "length" "" (const_int 4))
+
+;; The length here is the length of a single asm.
+
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "type" "misc")])
+
+;; pipeline model; so far we have only one.
+(define_attr "pipe_model" "epiphany" (const_string "epiphany"))
+
+(define_attr "rounding" "trunc,nearest"
+  (cond [(ne (symbol_ref "TARGET_ROUND_NEAREST") (const_int 0))
+	 (const_string "nearest")]
+	(const_string "trunc")))
+
+(define_attr "fp_mode" "round_unknown,round_nearest,round_trunc,int,caller,none"
+  (cond [(eq_attr "type" "fp,v2fp,fp_sfunc")
+	 (symbol_ref "(enum attr_fp_mode) epiphany_normal_fp_rounding")
+	 (eq_attr "type" "call")
+	 (symbol_ref "(enum attr_fp_mode) epiphany_normal_fp_mode")
+	 (eq_attr "type" "fp_int")
+	 (const_string "int")]
+	(const_string "none")))
+
+(include "epiphany-sched.md")
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; modes that are held in a single register, and hence, a word.
+(define_mode_iterator WMODE [SI SF HI QI V2HI V4QI])
+(define_mode_iterator WMODE2 [SI SF HI QI V2HI V4QI])
+
+;; modes that are held in a two single registers
+(define_mode_iterator DWMODE [DI DF V2SI V2SF V4HI V8QI])
+
+;; Double-word mode made up of two single-word mode values.
+(define_mode_iterator DWV2MODE [V2SI V2SF])
+(define_mode_attr vmode_part [(V2SI "si") (V2SF "sf")])
+(define_mode_attr vmode_PART [(V2SI "SI") (V2SF "SF")])
+(define_mode_attr vmode_fp_type [(V2SI "fp_int") (V2SF "fp")])
+(define_mode_attr vmode_ccmode [(V2SI "CC") (V2SF "CC_FP")])
+(define_mode_attr vmode_cc [(V2SI "CC_REGNUM") (V2SF "CCFP_REGNUM")])
+
+;; Move instructions.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:WMODE 0 "general_operand" "")
+	(match_operand:WMODE 1 "general_operand" ""))]
+  ""
+{
+  if (<MODE>mode == V4QImode || <MODE>mode == V2HImode)
+    {
+      operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+      operands[1] = simplify_gen_subreg (SImode, operands[1], <MODE>mode, 0);
+      emit_insn (gen_movsi (operands[0], operands[1]));
+      DONE;
+    }
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  if (<MODE>mode == SImode
+      && (operands[1] == frame_pointer_rtx || operands[1] == arg_pointer_rtx))
+    {
+      rtx reg = operands[0];
+
+      if (!REG_P (reg))
+	reg = gen_reg_rtx (SImode);
+      emit_insn (gen_move_frame (reg, operands[1]));
+      operands[1] = reg;
+      if (operands[0] == reg)
+	DONE;
+    }
+})
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "move_dest_operand" "=Rcs,   r,  r,r,m")
+	(match_operand:QI 1 "move_src_operand"   "Rcs,rU16,Cal,m,r"))]
+;; ??? Needed?
+  "gpr_operand (operands[0], QImode)
+   || gpr_operand (operands[1], QImode)"
+  "@
+   mov %0,%1
+   mov %0,%1
+   mov %0,%1
+   ldrb %0,%1
+   strb %1,%0"
+  [(set_attr "type" "move,move,move,load,store")])
+
+(define_insn_and_split "*movhi_insn"
+  [(set (match_operand:HI 0 "move_dest_operand" "=r,  r,r,m")
+	(match_operand:HI 1 "move_src_operand""rU16,Cal,m,r"))]
+  "gpr_operand (operands[0], HImode)
+   || gpr_operand (operands[1], HImode)"
+  "@
+   mov %0,%1
+   mov %0,%%low(%1); %1
+   ldrh %0,%c1
+   strh %1,%c0"
+  "reload_completed && CONSTANT_P (operands[1])
+   && !satisfies_constraint_U16 (operands[1]) && TARGET_SPLIT_LOHI"
+  [(set (match_dup 2) (match_dup 3))]
+  "operands[2] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
+   operands[3] = simplify_gen_subreg (SImode, operands[1], HImode, 0);"
+  [(set_attr "type" "move,move,load,store")])
+
+;; We use a special pattern for a move from the frame pointer to
+;; show the flag clobber that is needed when this move is changed
+;; to an add by register elimination.
+;; ??? A pseudo register might be equivalent to a function invariant,
+;; and thus placed by reload into reg_equiv_invariant; if the pseudo
+;; does not get a hard register, we then end up with the function
+;; invariant in its place, i.e. an unexpected clobber of the flags
+;; register.
+;;
+;; N.B. operand 1 is an operand so that reload will perform elimination.
+;;
+;; The post-reload pattern recognition and splitting is done in frame_move_1.
+(define_insn "move_frame"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(match_operand:SI 1 "register_operand" "r"))
+   (clobber (reg:CC CC_REGNUM))]
+  "operands[1] == frame_pointer_rtx || operands[1] == arg_pointer_rtx"
+  "#")
+
+(define_insn "movsi_high"
+  [(set (match_operand:SI 0 "gpr_operand" "+r")
+	(ior:SI (and:SI (match_dup 0) (const_int 65535))
+		(high:SI (match_operand:SI 1 "move_src_operand" "i"))))]
+  ""
+  "movt %0, %%high(%1)"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_lo_sum"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(lo_sum:SI (const_int 0)
+		   (match_operand:SI 1 "move_src_operand" "i")))]
+  ""
+  "mov %0, %%low(%1)"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*movsi_insn"
+  [(set (match_operand:SI 0 "move_dest_operand"
+	 "=   r,  r,  r,  r,  r,   r,   m,  r,  Rct")
+	(match_operand:SI 1 "move_src_operand"
+	 "rU16Rra,Cm1,Cl1,Cr1,Cal,mSra,rRra,Rct,r"))]
+  "gpr_operand (operands[0], SImode)
+   || gpr_operand (operands[1], SImode)
+   || satisfies_constraint_Sra (operands[1])"
+{
+  switch (which_alternative)
+    {
+    case 0: return "mov %0,%1";
+    case 1: return "add %0,%-,(1+%1)";
+    case 2: operands[1] = GEN_INT (exact_log2 (-INTVAL (operands[1])));
+      return "lsl %0,%-,%1";
+    case 3: operands[1] = GEN_INT (32 - exact_log2 (INTVAL (operands[1]) + 1));
+      return "lsr %0,%-,%1";
+    case 4: return "mov %0,%%low(%1)\;movt %0,%%high(%1) ; %1";
+    case 5: return "ldr %0,%C1";
+    case 6: return "str %1,%C0";
+    case 7: return "movfs %0,%1";
+    case 8: return "movts %0,%1";
+    default: gcc_unreachable ();
+    }
+}
+  "reload_completed && CONSTANT_P (operands[1])
+   && !satisfies_constraint_U16 (operands[1])
+   && !satisfies_constraint_Cm1 (operands[1])
+   && !satisfies_constraint_Cl1 (operands[1])
+   && !satisfies_constraint_Cr1 (operands[1])
+   && TARGET_SPLIT_LOHI"
+  [(match_dup 2) (match_dup 3)]
+  "operands[2] = gen_movsi_lo_sum (operands[0], operands[1]);
+   operands[3] = gen_movsi_high (operands[0], operands[1]);"
+  [(set_attr "type" "move,misc,misc,misc,move,load,store,flow,flow")
+   (set_attr "length" "4,4,4,4,8,4,4,4,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(unspec:SI [(const_int 0)] UNSPEC_RETURN_ADDR))]
+  "reload_completed && !MACHINE_FUNCTION (cfun)->lr_clobbered"
+  [(set (match_dup 0) (reg:SI GPR_LR))])
+
+(define_split
+  [(set (match_operand:SI 0 "gpr_operand")
+	(unspec:SI [(const_int 0)] UNSPEC_RETURN_ADDR))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  emit_insn (gen_reload_insi_ra (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reload_insi_ra"
+  [(set (match_operand:SI 0 "gpr_operand" "r") (match_operand:SI 1 "" "Sra"))]
+  ""
+{
+  rtx addr
+    = (frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx);
+
+  if (!MACHINE_FUNCTION (cfun)->lr_slot_known)
+    {
+      start_sequence ();
+      epiphany_expand_prologue ();
+      if (!MACHINE_FUNCTION (cfun)->lr_slot_known)
+        epiphany_expand_epilogue (0);
+      end_sequence ();
+      gcc_assert (MACHINE_FUNCTION (cfun)->lr_slot_known);
+    }
+  addr = plus_constant (Pmode, addr, MACHINE_FUNCTION (cfun)->lr_slot_offset);
+  operands[1] = gen_frame_mem (SImode, addr);
+})
+
+;; If the frame pointer elimination offset is zero, we'll use this pattern.
+;; Note that the splitter can accept any gpr in operands[1]; this is
+;; necessary, (e.g. for compile/20021015-1.c -O0,)
+;; because when register elimination cannot be done with the constant
+;; as an immediate operand of the add instruction, reload will resort to
+;; loading the constant into a reload register, using gen_add2_insn to add
+;; the stack pointer, and then use the reload register as new source in
+;; the move_frame pattern.
+(define_insn_and_split "*move_frame_1"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(match_operand:SI 1 "gpr_operand" "r"))
+   (clobber (reg:CC CC_REGNUM))]
+  "(reload_in_progress || reload_completed)
+   && (operands[1] == stack_pointer_rtx
+       || operands[1] == hard_frame_pointer_rtx)"
+  "#"
+  "reload_in_progress || reload_completed"
+  [(set (match_dup 0) (match_dup 1))])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:DWMODE 0 "general_operand" "")
+	(match_operand:DWMODE 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_INT
+      || GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT)
+    {
+      if (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)
+	{
+	  rtx o0l, o0h, o1l, o1h;
+
+	  o0l = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+	  o0h = simplify_gen_subreg (SImode, operands[0], <MODE>mode,
+				     UNITS_PER_WORD);
+	  o1l = simplify_gen_subreg (SImode, operands[1], <MODE>mode, 0);
+	  o1h = simplify_gen_subreg (SImode, operands[1], <MODE>mode,
+				     UNITS_PER_WORD);
+	  if (reg_overlap_mentioned_p (o0l, o1h))
+	    {
+	      emit_move_insn (o0h, o1h);
+	      emit_move_insn (o0l, o1l);
+	    }
+	  else
+	    {
+	      emit_move_insn (o0l, o1l);
+	      emit_move_insn (o0h, o1h);
+	    }
+	  DONE;
+	}
+      /* lower_subreg has a tendency to muck up vectorized code.
+	 To protect the wide memory accesses, we must use same-size
+	 subregs.  */
+      if (epiphany_vect_align != 4 /* == 8 */
+	  && !reload_in_progress
+	  && (GET_CODE (operands[0]) == MEM || GET_CODE (operands[1]) == MEM)
+	  && !misaligned_operand (operands[1], <MODE>mode)
+	  && (GET_CODE (operands[0]) != SUBREG
+	      || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))
+		  != GET_MODE_SIZE (<MODE>mode)
+		  && GET_CODE (operands[1]) != SUBREG)))
+	{
+	  operands[0]
+	    = simplify_gen_subreg (DImode, operands[0], <MODE>mode, 0);
+	  operands[1]
+	    = simplify_gen_subreg (DImode, operands[1], <MODE>mode, 0);
+	  emit_insn (gen_movdi (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+}")
+
+(define_insn_and_split "*mov<mode>_insn"
+  [(set (match_operand:DWMODE 0 "move_dest_operand"      "=r,   r,r,m")
+	(match_operand:DWMODE 1 "move_double_src_operand" "r,CalE,m,r"))]
+  "(gpr_operand (operands[0], <MODE>mode)
+    || gpr_operand (operands[1], <MODE>mode))"
+  "@
+   #
+   #
+   ldrd %0,%X1
+   strd %1,%X0"
+  "reload_completed
+   && (((!MEM_P (operands[0]) || misaligned_operand (operands[0], <MODE>mode))
+	&& (!MEM_P (operands[1])
+	    || misaligned_operand (operands[1], <MODE>mode)))
+       || epiphany_vect_align == 4)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int word0 = 0, word1 = UNITS_PER_WORD;
+
+  if (post_modify_operand (operands[0], <MODE>mode)
+      || post_modify_operand (operands[1], <MODE>mode))
+    word0 = UNITS_PER_WORD, word1 = 0;
+
+  operands[2] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, word0);
+  operands[3] = simplify_gen_subreg (SImode, operands[1], <MODE>mode, word0);
+  operands[4] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, word1);
+  operands[5] = simplify_gen_subreg (SImode, operands[1], <MODE>mode, word1);
+  if (post_modify_operand (operands[0], <MODE>mode))
+    operands[2]
+      = change_address (operands[2], VOIDmode,
+			plus_constant (Pmode, XEXP (XEXP (operands[0], 0), 0),
+				       UNITS_PER_WORD));
+  if (post_modify_operand (operands[1], <MODE>mode))
+    operands[3]
+      = change_address (operands[3], VOIDmode,
+			plus_constant (Pmode, XEXP (XEXP (operands[1], 0), 0),
+				       UNITS_PER_WORD));
+}
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "length" "8,16,4,4")])
+
+
+(define_insn_and_split "*movsf_insn"
+  [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:SF 1 "move_src_operand"   "r,E,m,r"))]
+  "gpr_operand (operands[0], SFmode)
+   || gpr_operand (operands[1], SFmode)"
+  "@
+   mov %0,%1
+   mov %0,%%low(%1)\;movt %0,%%high(%1) ; %1
+   ldr %0,%C1
+   str %1,%C0"
+  "reload_completed && CONSTANT_P (operands[1]) && TARGET_SPLIT_LOHI"
+  [(set (match_dup 2) (match_dup 3))]
+  "operands[2] = simplify_gen_subreg (SImode, operands[0], SFmode, 0);
+   operands[3] = simplify_gen_subreg (SImode, operands[1], SFmode, 0);"
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "length" "4,8,4,4")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "add_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "add_reg_operand" "")
+		 (match_operand:SI 2 "add_operand" "")))]
+  ""
+  "
+{
+  if (reload_in_progress || reload_completed)
+    emit_insn (gen_addsi3_r (operands[0], operands[1], operands[2]));
+  else if (TARGET_FP_IARITH && add_reg_operand (operands[2], SImode))
+    emit_insn (gen_iadd (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_addsi3_i (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+; The default case of epiphany_print_operand emits IMMEDIATE_PREFIX
+; where appropriate; however, 'n' is processed by output_asm_insn
+; which doesn't, so we have to explicitly emit the '# in the
+; r/r/CnL output template alternative.
+(define_insn "addsi3_i"
+  [(set (match_operand:SI 0 "add_reg_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "add_reg_operand" "%r,r")
+		 (match_operand:SI 2 "add_operand" "rL,CnL")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   add %0,%1,%2
+   sub %0,%1,#%n2"
+[(set_attr "type" "misc")])
+
+; We use a clobber of UNKNOWN_REGNUM here so that the peephole optimizers
+; can identify the unresolved flags clobber problem, and also to
+; avoid unwanted matches.
+;
+; At -O0 / -O1 we don't peephole all instances away.  We could get better
+; debug unwinding through the emitted code if we added a splitter.
+(define_insn "addsi3_r"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(plus:SI (match_operand:SI 1 "gpr_operand" "%r")
+		 (match_operand:SI 2 "nonmemory_operand" "rCar")))
+   (clobber (reg:CC UNKNOWN_REGNUM))]
+  "reload_in_progress || reload_completed"
+{
+  int scratch = (0x17
+		 ^ (true_regnum (operands[0]) & 1)
+		 ^ (true_regnum (operands[1]) & 2)
+		 ^ (true_regnum (operands[2]) & 4));
+  asm_fprintf (asm_out_file, "\tstr r%d,[sp,#0]\n", scratch);
+  asm_fprintf (asm_out_file, "\tmovfs r%d,status\n", scratch);
+  output_asm_insn ("add %0,%1,%2", operands);
+  asm_fprintf (asm_out_file, "\tmovts status,r%d\n", scratch);
+  asm_fprintf (asm_out_file, "\tldr r%d,[sp,#0]\n", scratch);
+  return "";
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "misc")])
+
+;; reload uses gen_addsi2 because it doesn't understand the need for
+;; the clobber.
+(define_peephole2
+  [(set (match_operand:SI 0 "gpr_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (match_operand:SI 2 "gpr_operand")))
+	      (clobber (reg:CC UNKNOWN_REGNUM))])]
+  "satisfies_constraint_L (operands[1])
+   || ((operands[2] == stack_pointer_rtx
+	|| (operands[2] == hard_frame_pointer_rtx && frame_pointer_needed))
+       && !peep2_regno_dead_p (2, CC_REGNUM)
+       && satisfies_constraint_Car (operands[1]))"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 2) (match_dup 1)))
+	      (clobber (reg:CC UNKNOWN_REGNUM))])]
+  ;; FIXME:
+  ;; need this patch: http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html
+  ;; "peep2_rescan = true;"
+)
+
+(define_peephole2
+  [(match_parallel 5 ""
+     [(set (match_operand 3 "cc_operand" "") (match_operand 4 "" ""))])
+   (parallel [(set (match_operand:SI 0 "gpr_operand" "")
+		   (plus:SI (match_operand:SI 1 "gpr_operand" "")
+			    (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC UNKNOWN_REGNUM))])]
+  "REGNO (operands[3]) == CC_REGNUM
+   && (gpr_operand (operands[2], SImode)
+       || satisfies_constraint_L (operands[2]))
+   && !reg_overlap_mentioned_p (operands[0], operands[5])
+   && !reg_set_p (operands[1], operands[5])
+   && !reg_set_p (operands[2], operands[5])"
+  [(parallel [(set (match_operand:SI 0 "gpr_operand" "")
+		   (plus:SI (match_operand:SI 1 "gpr_operand" "")
+			    (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC CC_REGNUM))])
+   (match_dup 5)]
+  "")
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "gpr_operand" "")
+		   (plus:SI (match_operand:SI 1 "gpr_operand" "")
+			    (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC UNKNOWN_REGNUM))])]
+  "peep2_regno_dead_p (1, CC_REGNUM)
+   && (gpr_operand (operands[2], SImode)
+       || satisfies_constraint_L (operands[2]))"
+  [(parallel [(set (match_operand:SI 0 "gpr_operand" "")
+		   (plus:SI (match_operand:SI 1 "gpr_operand" "")
+			    (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "gpr_operand" "")
+		   (plus:SI (reg:SI GPR_SP)
+			    (match_operand:SI 1 "nonmemory_operand" "")))
+	      (clobber (reg:CC UNKNOWN_REGNUM))])]
+  "(REG_P (operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[1]))
+   || RTX_OK_FOR_OFFSET_P (<MODE>mode, operands[1])"
+  [(set (match_dup 0) (reg:SI GPR_SP))
+   (set (mem:WMODE (post_modify (match_dup 0)
+				(plus:SI (match_dup 0) (match_dup 1))))
+	(reg:WMODE GPR_SP))]
+  "")
+
+
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "gpr_operand" "")
+		   (plus:SI (reg:SI GPR_FP)
+			    (match_operand:SI 1 "nonmemory_operand" "")))
+	      (clobber (reg:CC UNKNOWN_REGNUM))])
+   (match_scratch:WMODE 2 "r")]
+  "frame_pointer_needed
+   && ((REG_P (operands[1])
+	&& !reg_overlap_mentioned_p (operands[0], operands[1]))
+       || RTX_OK_FOR_OFFSET_P (<MODE>mode, operands[1]))"
+  [(set (match_dup 0) (reg:SI GPR_FP))
+   (set (match_dup 2)
+	(mem:WMODE (post_modify (match_dup 0)
+				(plus:SI (match_dup 0) (match_dup 1)))))]
+  "")
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "")
+	(plus:SI (match_operand:SI 1 "add_reg_operand" "")
+		 (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  "
+{
+  gcc_assert (!reload_in_progress && !reload_completed);
+
+  if (TARGET_FP_IARITH)
+    emit_insn (gen_isub (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_subsi3_i (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "subsi3_i"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(minus:SI (match_operand:SI 1 "add_reg_operand" "r")
+		  (match_operand:SI 2 "arith_operand" "rL")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "sub %0,%1,%2"
+  [(set_attr "type" "misc")])
+
+; After mode-switching, floating point operations, fp_sfuncs and calls
+; must exhibit the use of the control register, lest the setting of the
+; control register could be deleted or moved.  OTOH a use of a hard register
+; greatly counfounds optimizers like the rtl loop optimizers or combine.
+; Therefore, we put an extra pass immediately after the mode switching pass
+; that inserts the USEs of the control registers, and sets a flag in struct
+; machine_function that float_operation can henceforth only match with that
+; USE.
+
+;; Addition
+(define_expand "addsf3"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (plus:SF (match_operand:SF 1 "gpr_operand" "")
+		    (match_operand:SF 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*addsf3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (plus:SF (match_operand:SF 1 "gpr_operand" "%r")
+		    (match_operand:SF 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "fadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+;; Subtraction
+(define_expand "subsf3"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (minus:SF (match_operand:SF 1 "gpr_operand" "")
+		     (match_operand:SF 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*subsf3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (minus:SF (match_operand:SF 1 "gpr_operand" "r")
+		     (match_operand:SF 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "fsub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_expand "subsf3_f"
+  [(parallel
+     [(set (reg:CC_FP CCFP_REGNUM)
+	   (compare:CC_FP (match_operand:SF 1 "gpr_operand" "r")
+			  (match_operand:SF 2 "gpr_operand" "r")))
+      (set (match_operand:SF 0 "gpr_operand" "=r")
+	   (minus:SF (match_dup 1) (match_dup 2)))])]
+  "!TARGET_SOFT_CMPSF")
+
+(define_insn "*subsf3_f_i"
+  [(match_parallel 3 "float_operation"
+     [(set (reg:CC_FP CCFP_REGNUM)
+	   (compare:CC_FP (match_operand:SF 1 "gpr_operand" "r")
+			  (match_operand:SF 2 "gpr_operand" "r")))
+      (set (match_operand:SF 0 "gpr_operand" "=r")
+	   (minus:SF (match_dup 1) (match_dup 2)))])]
+  "!TARGET_SOFT_CMPSF"
+  "fsub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+; There is an fabs instruction, but it has longer latency.
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "gpr_operand" "")
+	(abs:SF (match_operand:SF 1 "gpr_operand" "")))]
+  ""
+  "
+{
+  rtx op1 = copy_to_mode_reg (SImode, simplify_gen_subreg (SImode, operands[1],
+							   SFmode, 0));
+  rtx op0 = simplify_gen_subreg (SImode, operands[0], SFmode, 0);
+
+  emit_insn (gen_ashlsi3 (op1, op1, const1_rtx));
+  emit_insn (gen_lshrsi3 (op0, op1, const1_rtx));
+  DONE;
+}")
+
+;; Multiplication
+(define_expand "mulsf3"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (mult:SF (match_operand:SF 1 "gpr_operand" "")
+		    (match_operand:SF 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*mulsf3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (mult:SF (match_operand:SF 1 "gpr_operand" "%r")
+		    (match_operand:SF 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "fmul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+;; Division
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "gpr_operand" "")
+	(div:SF (match_operand:SF 1 "gpr_operand" "")
+		(match_operand:SF 2 "gpr_operand" "")))]
+  "flag_reciprocal_math"
+{
+  rtx one = CONST1_RTX (SFmode);
+  rtx dst = operands[0];
+
+  if (rtx_equal_p (dst, operands[1]))
+    {
+      emit_move_insn (dst, one);
+      DONE;
+    }
+  else if (!register_operand (dst, SFmode) && can_create_pseudo_p ())
+    dst = gen_reg_rtx (SFmode);
+  emit_insn (gen_recipsf2 (dst, one, operands[2],
+			   sfunc_symbol (\"__fast_recipsf2\")));
+  emit_insn (gen_mulsf3 (operands[0], operands[1], dst));
+  DONE;
+})
+
+;; Before reload, keep the hard reg usage to clobbers so that the loop
+;; optimizers can more easily move this insn.
+;; It would be nicer to use a constraint for a GPR_0 - only register class,
+;; but sched1 can still cause trouble then, and there is no guarantee of
+;; better register allocations.
+;; Neither is there when using the opposite strategy - putting explicit
+;; hard register references into pre-reload rtl.
+(define_expand "recipsf2"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		   (match_operand:SF 2 "move_src_operand" "")))
+      (use (match_operand:SI 3 "move_src_operand" ""))
+      (clobber (reg:SF 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SF GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn_and_split "*recipsf2_1"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r,r")
+	   (div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		   (match_operand:SF 2 "move_src_operand" "rU16m,rU16mCal")))
+      (use (match_operand:SI 3 "move_src_operand" "rU16m,rU16mCal"))
+      (clobber (reg:SF 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SF GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "flag_reciprocal_math"
+  "#"
+  "&& reload_completed"
+  [(set (reg:SI 1) (match_dup 3))
+   (set (reg:SF 0) (match_dup 2))
+   (parallel
+     [(set (reg:SF 0)
+	   (div:SF (match_dup 1)
+		   (reg:SF 0)))
+      (use (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 5)
+      (match_dup 6)])
+   (set (match_dup 0) (reg:SF 0))]
+  "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2);
+   operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);"
+  [(set_attr "type" "fp_sfunc")
+   (set_attr "length" "16,24")])
+
+(define_insn "*recipsf2_2"
+  [(match_parallel 1 "float_operation"
+     [(set (reg:SF 0)
+	   (div:SF (match_operand:SF 0 "const_float_1_operand" "")
+		   (reg:SF 0)))
+      (use (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "flag_reciprocal_math"
+  "jalr r1"
+  [(set_attr "type" "fp_sfunc")])
+
+
+;; Fused multiply-add
+(define_expand "fmasf4"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (fma:SF (match_operand:SF 1 "gpr_operand" "")
+		   (match_operand:SF 2 "gpr_operand" "")
+		   (match_operand:SF 3 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "")
+
+; The multiply operands are commutative, but since they have the
+; same constraints, there is no point in telling reload about this.
+(define_insn "*fmadd"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (fma:SF (match_operand:SF 1 "gpr_operand" "r")
+		   (match_operand:SF 2 "gpr_operand" "r")
+		   (match_operand:SF 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "fmadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+; Once vetorization consistently works for this port, should check
+; if the fmadd / fmsub patterns still serve a purpose.  With the
+; introduction of fma / fnma handling by the SSA optimizers,
+; at least scalars should be handled by these optimizers, would
+; have to see how well they do on vectors from auto-vectorization.
+;
+; combiner pattern, also used by vector combiner pattern
+(define_expand "maddsf"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (plus:SF (mult:SF (match_operand:SF 1 "gpr_operand" "r")
+			     (match_operand:SF 2 "gpr_operand" "r"))
+		    (match_operand:SF 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "TARGET_FUSED_MADD")
+
+(define_insn "*maddsf_combine"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (plus:SF (mult:SF (match_operand:SF 1 "gpr_operand" "r")
+			     (match_operand:SF 2 "gpr_operand" "r"))
+		    (match_operand:SF 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "TARGET_FUSED_MADD"
+  "fmadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+;; Fused multiply-sub
+(define_expand "fnmasf4"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (fma:SF (neg:SF (match_operand:SF 1 "gpr_operand" ""))
+		   (match_operand:SF 2 "gpr_operand" "")
+		   (match_operand:SF 3 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "")
+
+(define_insn "*fmsub"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (fma:SF (neg:SF (match_operand:SF 1 "gpr_operand" "r"))
+		   (match_operand:SF 2 "gpr_operand" "r")
+		   (match_operand:SF 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "fmsub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*fmsub_combine"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (minus:SF  (match_operand:SF 3 "gpr_operand" "0")
+		      (mult:SF (match_operand:SF 1 "gpr_operand" "r")
+			       (match_operand:SF 2 "gpr_operand" "r"))))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "TARGET_FUSED_MADD"
+  "fmsub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+;; float / integer conversions
+
+(define_expand "floatsisf2"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (float:SF (match_operand:SI 1 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*floatsisf2_i"
+  [(match_parallel 2 "float_operation"
+     [(set (match_operand:SF 0 "gpr_operand" "=r")
+	   (float:SF (match_operand:SI 1 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "float %0, %1"
+  [(set_attr "type" "fp")])
+
+(define_expand "floatsisf2_cmp"
+  [(parallel
+     [(set (reg:CC_FP CCFP_REGNUM)
+	   (compare:CC_FP (float:SF (match_operand:SF 1 "gpr_operand" "r"))
+			  (match_dup 2)))
+      (set (match_operand:SF 0 "gpr_operand" "=r")
+	   (float:SF (match_dup 1)))])]
+  ""
+  "operands[2] = CONST0_RTX (SFmode);")
+
+(define_insn "*floatsisf2_cmp_i"
+  [(match_parallel 3 "float_operation"
+     [(set (reg:CC_FP CCFP_REGNUM)
+	   (compare:CC_FP (float:SF (match_operand:SF 1 "gpr_operand" "r"))
+			  (match_operand:SF 2 "const0_operand" "")))
+      (set (match_operand:SF 0 "gpr_operand" "=r")
+	   (float:SF (match_dup 1)))])]
+  ""
+  "float %0, %1"
+  [(set_attr "type" "fp")])
+
+(define_expand "floatunssisf2"
+  [(set (match_operand:SF 0 "gpr_operand" "")
+	(float:SF (match_operand:SI 1 "gpr_operand" "")))]
+  "epiphany_normal_fp_rounding == /*FP_MODE_ROUND_TRUNC*/ 2"
+{
+  rtx cst = force_reg (SImode, gen_int_mode (0xb0800000, SImode));
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx cmp = gen_rtx_GTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+
+  if (reg_overlap_mentioned_p (operands[0], operands[1]))
+    operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  emit_insn (gen_floatsisf2 (operands[0], operands[1]));
+  emit_insn (gen_ashrsi3 (tmp, operands[1], GEN_INT (8)));
+  emit_insn (gen_sub_f (tmp, tmp, cst));
+  emit_insn (gen_movsfcc (operands[0], cmp,
+			  simplify_gen_subreg (SFmode, tmp, SImode, 0),
+			  operands[0]));
+  DONE;
+})
+
+(define_expand "fix_truncsfsi2"
+  [(parallel
+     [(set (match_operand:SI 0 "gpr_operand" "")
+	   (fix:SI (match_operand:SF 1 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*fix_truncsfsi2_i"
+  [(match_parallel 2 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (fix:SI (match_operand:SF 1 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "fix %0, %1"
+  [(set_attr "type" "fp")
+   (set (attr "fp_mode")
+	(cond [(match_test "TARGET_MAY_ROUND_FOR_TRUNC")
+	       (const_string "round_unknown")]
+	      (const_string "round_trunc")))])
+
+(define_expand "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "gpr_operand" "")
+	(unsigned_fix:SI (match_operand:SF 1 "gpr_operand" "")))]
+  ""
+{
+  if (reg_overlap_mentioned_p (operands[0], operands[1]))
+    operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  if (TARGET_SOFT_CMPSF || optimize_function_for_speed_p (cfun))
+    {
+      rtx op1si;
+      /* By toggling what it to be bit31 before the shift, we get a chance to
+	 use a short movt insn.  */
+      rtx bit31 = force_reg (SImode, GEN_INT (0x800000));
+      rtx tmp = gen_reg_rtx (SImode);
+      rtx limit = force_reg (SImode, gen_int_mode (0x4f000000, SImode));
+      rtx cmp
+	= gen_rtx_GE (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+
+      op1si = simplify_gen_subreg (SImode, operands[1], SFmode, 0);
+      emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1]));
+      emit_insn (gen_subsi3_i (tmp, op1si, bit31));
+      emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (8)));
+      emit_insn (gen_cmpsi_cc_insn (op1si, limit));
+      emit_insn (gen_movsicc (operands[0], cmp, tmp, operands[0]));
+    }
+  else
+    {
+      REAL_VALUE_TYPE offset;
+      rtx limit;
+      rtx tmp = gen_reg_rtx (SFmode);
+      rtx label = gen_label_rtx ();
+      rtx bit31;
+      rtx cc1 = gen_rtx_REG (CC_FPmode, CCFP_REGNUM);
+      rtx cmp = gen_rtx_LT (VOIDmode, cc1, CONST0_RTX (SFmode));
+
+      real_2expN (&offset, 31, SFmode);
+      limit = CONST_DOUBLE_FROM_REAL_VALUE (offset, SFmode);
+      limit = force_reg (SFmode, limit);
+      emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1]));
+      emit_insn (gen_subsf3_f (tmp, operands[1], limit));
+      emit_jump_insn (gen_branch_insn (label, cmp, cc1));
+      bit31 = force_reg (SImode, gen_int_mode (0x80000000, SImode));
+      emit_insn (gen_fix_truncsfsi2 (operands[0], tmp));
+      emit_insn (gen_xorsi3 (operands[0], operands[0], bit31));
+      emit_label (label);
+    }
+  DONE;
+})
+
+(define_expand "iadd"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (plus:SI (match_operand:SF 1 "gpr_operand" "")
+		    (match_operand:SF 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*iadd_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (plus:SI (match_operand:SI 1 "gpr_operand" "%r")
+		    (match_operand:SI 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "iadd %0, %1, %2"
+  [(set_attr "type" "fp_int")])
+
+(define_expand "isub"
+  [(parallel
+     [(set (match_operand:SF 0 "gpr_operand" "")
+	   (minus:SI (match_operand:SF 1 "gpr_operand" "")
+		     (match_operand:SF 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*isub_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (minus:SI (match_operand:SI 1 "gpr_operand" "r")
+		     (match_operand:SI 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "isub %0, %1, %2"
+  [(set_attr "type" "fp_int")])
+
+; Try to figure out if we over-committed the FPU, and if so, move
+; some insns back over to the integer pipe.
+
+; The peephole optimizer 'consumes' the insns that are explicitly
+; mentioned.  We do not want the preceding insn reconsidered, but
+; we do want that for the following one, so that if we have a run
+; of five fpu users, two of them get changed.  Therefore, we
+; use next_active_insn to look at the 'following' insn.  That should
+; exist, because peephole2 runs after reload, and there has to be
+; a return after an fp_int insn.
+; ??? However, we can not even ordinarily match the preceding insn;
+; there is some bug in the generators such that then it leaves out
+; the check for PARALLEL before the length check for the then-second
+; main insn.  Observed when compiling compatibility-atomic-c++0x.cc
+; from libstdc++-v3.
+(define_peephole2
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "")
+	   (match_operator:SI 4 "addsub_operator"
+	     [(match_operand:SI 1 "gpr_operand" "")
+	      (match_operand:SI 2 "gpr_operand" "")]))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0)))
+   && peep2_regno_dead_p (1, CC_REGNUM)
+   && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0)))"
+  [(parallel [(set (match_dup 0) (match_dup 4))
+	      (clobber (reg:CC CC_REGNUM))])]
+)
+
+(define_peephole2
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "")
+	   (mult:SI
+	      (match_operand:SI 1 "gpr_operand" "")
+	      (match_operand:SI 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "prev_active_insn (peep2_next_insn (0))
+   && get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0)))
+   && peep2_regno_dead_p (1, CC_REGNUM)
+   && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0)))
+   && find_reg_note (insn, REG_EQUAL, NULL_RTX) != NULL_RTX
+   && GET_CODE (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0)) == MULT
+   && CONST_INT_P (XEXP (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0),
+			 1))"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 4)))
+	      (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[4]
+    = XEXP (XEXP (find_reg_note (curr_insn, REG_EQUAL, NULL_RTX), 0), 1);
+})
+
+(define_expand "mulsi3"
+  [(parallel
+     [(set (match_operand:SI 0 "gpr_operand" "")
+	   (mult:SI (match_operand:SI 1 "gpr_operand" "")
+		    (match_operand:SI 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn "*imul"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (mult:SI (match_operand:SI 1 "gpr_operand" "%r")
+		    (match_operand:SI 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "imul %0, %1, %2"
+  [(set_attr "type" "fp_int")])
+
+; combiner pattern, also used by vector combiner pattern
+(define_expand "maddsi"
+  [(parallel
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (plus:SI (mult:SI (match_operand:SI 1 "gpr_operand" "r")
+			     (match_operand:SI 2 "gpr_operand" "r"))
+		    (match_operand:SI 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "")
+
+(define_insn "*maddsi_combine"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (plus:SI (mult:SI (match_operand:SI 1 "gpr_operand" "r")
+			     (match_operand:SI 2 "gpr_operand" "r"))
+		    (match_operand:SI 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "imadd %0, %1, %2"
+  [(set_attr "type" "fp_int")])
+
+(define_insn "*imsub"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "=r")
+	   (minus:SI (match_operand:SI 3 "gpr_operand" "0")
+		     (mult:SI (match_operand:SI 1 "gpr_operand" "r")
+			      (match_operand:SI 2 "gpr_operand" "r"))))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "imsub %0, %1, %2"
+  [(set_attr "type" "fp_int")])
+
+(define_expand "divsi3"
+  [(parallel
+     [(set (match_operand:SI 0 "move_dest_operand" "")
+	   (div:SI (match_operand:SI 1 "move_src_operand" "")
+		   (match_operand:SI 2 "move_src_operand" "")))
+      (use (match_dup 3))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "operands[3] = sfunc_symbol (\"__divsi3\");")
+
+;; Before reload, keep the hard reg usage to clobbers so that the loop
+;; optimizers can more easily move this insn.
+(define_insn_and_split "*divsi3_1"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SI 0 "move_dest_operand" "=r,r")
+	   (div:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal")
+		   (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal")))
+      (use (match_operand:SI 3 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (reg:SI 0) (match_dup 1))
+   (set (reg:SI 1) (match_dup 2))
+   (parallel
+     [(set (reg:SI 0) (div:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_dup 3))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 5)
+      (match_dup 6)])
+   (set (match_dup 0) (reg:SI 0))]
+  "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2);
+   operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);"
+  [(set_attr "type" "fp_sfunc")
+   (set_attr "length" "16,24")])
+
+(define_insn "*divsi3_2"
+  [(match_parallel 1 "float_operation"
+     [(set (reg:SI 0) (div:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_20))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "%f0"
+  [(set_attr "type" "fp_sfunc")])
+
+(define_expand "udivsi3"
+  [(parallel
+     [(set (match_operand:SI 0 "move_dest_operand" "")
+	   (udiv:SI (match_operand:SI 1 "move_src_operand" "")
+		    (match_operand:SI 2 "move_src_operand" "")))
+      (use (match_dup 3))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "operands[3] = sfunc_symbol (\"__udivsi3\");")
+
+;; Before reload, keep the hard reg usage to clobbers so that the loop
+;; optimizers can more easily move this insn.
+(define_insn_and_split "*udivsi3_1"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SI 0 "move_dest_operand" "=r,r")
+	   (udiv:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal")
+		    (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal")))
+      (use (match_operand:SI 3 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (reg:SI 0) (match_dup 1))
+   (set (reg:SI 1) (match_dup 2))
+   (parallel
+     [(set (reg:SI 0) (udiv:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_dup 3))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 5)
+      (match_dup 6)])
+   (set (match_dup 0) (reg:SI 0))]
+  "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2);
+   operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);"
+  [(set_attr "type" "fp_sfunc")
+   (set_attr "length" "16,24")])
+
+(define_insn "*udivsi3_2"
+  [(match_parallel 1 "float_operation"
+     [(set (reg:SI 0) (udiv:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "%f0"
+  [(set_attr "type" "fp_sfunc")])
+
+(define_expand "modsi3"
+  [(parallel
+     [(set (match_operand:SI 0 "move_dest_operand" "")
+	   (mod:SI (match_operand:SI 1 "move_src_operand" "")
+		   (match_operand:SI 2 "move_src_operand" "")))
+      (use (match_dup 3))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "operands[3] = sfunc_symbol (\"__modsi3\");")
+
+;; Before reload, keep the hard reg usage to clobbers so that the loop
+;; optimizers can more easily move this insn.
+(define_insn_and_split "*modsi3_1"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SI 0 "move_dest_operand" "=r,r")
+	   (mod:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal")
+		   (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal")))
+      (use (match_operand:SI 3 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (reg:SI 0) (match_dup 1))
+   (set (reg:SI 1) (match_dup 2))
+   (parallel
+     [(set (reg:SI 0) (mod:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_dup 3))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 5)
+      (match_dup 6)])
+   (set (match_dup 0) (reg:SI 0))]
+  "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2);
+   operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);"
+  [(set_attr "type" "fp_sfunc")
+   (set_attr "length" "16,24")])
+
+(define_insn "*modsi3_2"
+  [(match_parallel 1 "float_operation"
+     [(set (reg:SI 0) (mod:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:DI GPR_18))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "%f0"
+  [(set_attr "type" "fp_sfunc")])
+
+(define_expand "umodsi3"
+  [(parallel
+     [(set (match_operand:SI 0 "move_dest_operand" "")
+	   (umod:SI (match_operand:SI 1 "move_src_operand" "")
+		    (match_operand:SI 2 "move_src_operand" "")))
+      (use (match_dup 3))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "operands[3] = sfunc_symbol (\"__umodsi3\");")
+
+;; Before reload, keep the hard reg usage to clobbers so that the loop
+;; optimizers can more easily move this insn.
+(define_insn_and_split "*umodsi3_1"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:SI 0 "move_dest_operand" "=r,r")
+	   (umod:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal")
+		    (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal")))
+      (use (match_operand:SI 3 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (reg:SI 0) (match_dup 1))
+   (set (reg:SI 1) (match_dup 2))
+   (parallel
+     [(set (reg:SI 0) (umod:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_dup 3))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 5)
+      (match_dup 6)])
+   (set (match_dup 0) (reg:SI 0))]
+  "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2);
+   operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);"
+  [(set_attr "type" "fp_sfunc")
+   (set_attr "length" "16,24")])
+
+(define_insn "*umodsi3_2"
+  [(match_parallel 1 "float_operation"
+     [(set (reg:SI 0) (umod:SI (reg:SI 0) (reg:SI 1)))
+      (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI GPR_IP))
+      (clobber (reg:DI GPR_16))
+      (clobber (reg:SI GPR_LR))
+      (clobber (reg:CC CC_REGNUM))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "%f0"
+  [(set_attr "type" "fp_sfunc")])
+
+; Disable interrupts.
+; Any earlier values read from CONFIG_REGNUM are out of date, since interrupts
+; might have changed settings that we do not want to mess with.
+(define_insn "gid"
+  [(set (reg:SI CONFIG_REGNUM)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_GID))]
+  ""
+  "gid"
+  [(set_attr "type" "flow")])
+
+; Enable interrupts.
+; Present CONTROL_REGNUM here to make sure it is live before the
+; actual uses in floating point insns / calls are inserted.
+; FWIW, interrupts also do mind what is in the control register.
+(define_insn "gie"
+  [(unspec_volatile [(reg:SI CONFIG_REGNUM)] UNSPECV_GIE)]
+  ""
+  "gie"
+  [(set_attr "type" "flow")])
+
+; Floating point instructions require manipulating the control register.
+; Manipulating the control register needs aritmetic.
+; Arithmetic clobbers flags.
+; The flags are in the status register, which also contains the alternate
+; flag and the interrupt enable/disable bits.
+; saving/restoring status and mixing up the order with gid/gie could
+; lead to disaster.
+; Usually, saving/restoring the status is unnecessary, and will be optimized
+; away.  But when we really need it, we must make sure that we don't change
+; anything but the flags.
+; N.B.: We could make the constant easier to load by inverting it, but
+; then we'd need to clobber the saved value - and that would make optimizing
+; away unneeded saves/restores harder / less likely.
+(define_expand "movcc"
+  [(parallel [(set (match_operand:CC 0 "cc_move_operand"  "")
+		   (match_operand:CC 1 "cc_move_operand" ""))
+	      (use (match_dup 2))
+	      (clobber (match_scratch:SI 3                 "=X, &r"))])]
+  ""
+  "operands[2] = gen_int_mode (~0x10f0, SImode);")
+
+(define_insn "*movcc_i"
+  [(set (match_operand:CC 0 "cc_move_operand"  "=r,Rcc")
+	(match_operand:CC 1 "cc_move_operand" "Rcc,  r"))
+   (use (match_operand:SI 2 "nonmemory_operand"  "X,  r"))
+   (clobber (match_scratch:SI 3                 "=X, &r"))]
+  ""
+  "@
+   movfs %0,status
+   movfs %3,status\;eor %3,%3,%1\;and %3,%3,%2\;eor %3,%3,%1\;movts status,%3"
+  [(set_attr "type" "flow")
+   (set_attr "length" "20,4")])
+
+(define_insn_and_split "save_config"
+  [(set (match_operand:SI 0 "gpr_operand" "=r") (reg:SI CONFIG_REGNUM))
+   (use (reg:SI FP_NEAREST_REGNUM))
+   (use (reg:SI FP_TRUNCATE_REGNUM))
+   (use (reg:SI FP_ANYFP_REGNUM))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (reg:SI CONFIG_REGNUM))])
+
+(define_insn_and_split "set_fp_mode"
+  [(set (reg:SI FP_NEAREST_REGNUM)
+	(match_operand:SI 0 "set_fp_mode_operand" "rCfm"))
+   (set (reg:SI FP_TRUNCATE_REGNUM) (match_dup 0))
+   (set (reg:SI FP_ANYFP_REGNUM)
+	(match_operand:SI 1 "set_fp_mode_operand" "rCfm"))
+   (use (match_operand:SI 2 "gpr_operand" "r"))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  ""
+  "#"
+  "reload_completed || !rtx_equal_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  if (!reload_completed)
+    emit_note (NOTE_INSN_DELETED);
+  else
+    epiphany_expand_set_fp_mode (operands);
+  DONE;
+})
+
+
+;; Boolean instructions.
+;;
+;; We don't define the DImode versions as expand_binop does a good enough job.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(and:SI (match_operand:SI 1 "gpr_operand" "r")
+		(match_operand:SI 2 "gpr_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "and %0,%1,%2")
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(ior:SI (match_operand:SI 1 "gpr_operand" "r")
+		(match_operand:SI 2 "gpr_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "orr %0,%1,%2")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(xor:SI (match_operand:SI 1 "gpr_operand" "r")
+		(match_operand:SI 2 "gpr_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "eor %0,%1,%2")
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "gpr_operand" "")
+	(xor:SI (match_operand:SI 1 "gpr_operand" "")
+		(match_dup 2)))]
+  ""
+{
+  if (epiphany_m1reg >= 0)
+    emit_insn (gen_one_cmplsi2_i (operands[0], operands[1]));
+  else
+    emit_insn (gen_xorsi3 (operands[0], operands[1],
+			   force_reg (SImode, GEN_INT (-1))));
+  DONE;
+})
+
+; Note that folding this pattern into the xorsi3 pattern would make combine
+; less effective.
+(define_insn "one_cmplsi2_i"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+        (not:SI (match_operand:SI 1 "gpr_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "epiphany_m1reg >= 0"
+  "eor %0,%1,%-")
+
+;; Shift instructions.
+;; In principle we could support arbitrary symbolic values as shift constant
+;; (truncating the value appropriately), but that would require a suitable
+;; relocation and assembler & linker support.
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "r,K")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "asr %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "shift")])
+
+(define_insn "ashrsi3_tst"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (ashiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r")
+		       (match_operand:SI 2 "arith_operand" "r,K"))
+	(const_int 0)))
+   (set (match_operand:SI 0 "gpr_operand" "=r,r")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "asr %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "shift")])
+
+;; Logical Shift Right
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "r,K")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "lsr %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "shift")])
+
+(define_insn "lshrsi3_tst"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (lshiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r")
+		       (match_operand:SI 2 "arith_operand" "r,K"))
+	(const_int 0)))
+   (set (match_operand:SI 0 "gpr_operand" "=r,r")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "lsr %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "shift")])
+
+;; Logical/Arithmetic Shift Left
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "gpr_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "gpr_operand" "r,r")
+		   (match_operand:SI 2 "arith_operand" "r,K")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "lsl %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "shift")])
+
+(define_insn "*ashlsi_btst"
+  [(set (reg:CC_N_NE CC_REGNUM)
+	(compare:CC_N_NE
+	  (zero_extract:SI (match_operand:SI 1 "gpr_operand" "r")
+			   (const_int 1)
+			   (match_operand 2 "const_int_operand" "K"))
+	  (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  ""
+{
+  rtx xop[3];
+
+  xop[0] = operands[0];
+  xop[1] = operands[1];
+  xop[2] = GEN_INT (31-INTVAL (operands[2]));
+  output_asm_insn ("lsl %0,%1,%2", xop);
+  return "";
+})
+
+;; zero extensions
+(define_insn_and_split "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   #
+   ldrb %0,%1"
+  "reload_completed
+   ? true_regnum (operands[1]) >= 0
+   : REG_P (operands[1]) && REGNO (operands[1]) < FIRST_PSEUDO_REGISTER"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0);")
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,m")))]
+  ""
+  "@
+   movt %0, 0
+   ldrh %0,%c1")
+
+
+;; Compare instructions.
+
+(define_insn "cmpsi_cc_insn"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 0 "add_reg_operand" "r,r")
+		    (match_operand:SI 1 "arith_operand" "r,L")))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "sub %2,%0,%1"
+  [(set_attr "type" "compare")])
+
+(define_insn "sub_f"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "gpr_operand"  "r,r")
+                    (match_operand:SI 2 "arith_operand" "r,L")))
+   (set (match_operand:SI 0 "gpr_operand" "=r,r")
+        (minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "sub %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_insn "*sub_f_add_imm"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "gpr_operand"  "r")
+                    (match_operand:SI 2 "arith_int_operand" "L")))
+   (set (match_operand:SI 0 "gpr_operand" "=r")
+        (plus:SI (match_dup 1) (match_operand:SI 3 "const_int_operand" "L")))]
+  "INTVAL (operands[2]) == -INTVAL (operands[3])"
+  "sub %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_expand "abssi2"
+  [(set (match_dup 2) (const_int 0))
+   (parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 2)
+			       (match_operand:SI 1 "nonmemory_operand" "")))
+	      (set (match_dup 3)
+		   (minus:SI (match_dup 2) (match_dup 1)))])
+   (set (match_operand:SI 0 "gpr_operand" "=r")
+	(if_then_else:SI (gt:SI (reg:CC CC_REGNUM) (const_int 0))
+			 (match_dup 3)
+			 (match_dup 1)))]
+  "TARGET_CMOVE"
+  "operands[2] = gen_reg_rtx (SImode); operands[3] = gen_reg_rtx (SImode);")
+
+(define_insn "*add_c"
+  [(set (reg:CC_C_LTU CC_REGNUM)
+        (compare:CC_C_LTU
+	  (plus:SI (match_operand:SI 1 "gpr_operand" "%r,r")
+		   (match_operand:SI 2 "arith_operand" "r,L"))
+	  (match_dup 1)))
+   (set (match_operand:SI 0 "gpr_operand" "=r,r")
+        (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "add %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_insn "*add_c_rev"
+  [(set (reg:CC_C_LTU CC_REGNUM)
+        (compare:CC_C_LTU
+	  (plus:SI (match_operand:SI 1 "gpr_operand" "%r,r")
+		   (match_operand:SI 2 "arith_operand" "r,L"))
+	  (match_dup 1)))
+   (set (match_operand:SI 0 "gpr_operand" "=r,r")
+        (plus:SI (match_dup 2) (match_dup 1)))]
+  ""
+  "add %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_insn "*sub_c"
+  [(set (reg:CC_C_GTU CC_REGNUM)
+        (compare:CC_C_GTU
+	  (minus:SI (match_operand:SI 1 "gpr_operand"  "r,r")
+		    (match_operand:SI 2 "arith_operand" "r,L"))
+	  (match_dup 1)))
+   (set (match_operand:SI 0 "gpr_operand" "=r,r")
+        (minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "sub %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_insn "*sub_c_void"
+  [(set (reg:CC_C_GTU CC_REGNUM)
+        (compare:CC_C_GTU
+	  (minus:SI (match_operand:SI 1 "gpr_operand"  "r,r")
+		    (match_operand:SI 2 "arith_operand" "r,L"))
+	  (match_dup 1)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  ""
+  "sub %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_code_iterator logical_op
+  [and ior xor])
+
+(define_code_attr op_mnc
+  [(plus "add") (minus "sub") (and "and") (ior "orr") (xor "eor")])
+
+(define_insn "*<op_mnc>_f"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (logical_op:SI (match_operand:SI 1 "gpr_operand" "%r")
+				   (match_operand:SI 2 "gpr_operand"  "r"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpr_operand" "=r")
+        (logical_op:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "<op_mnc> %0,%1,%2"
+  [(set_attr "type" "compare")])
+
+(define_insn_and_split "*mov_f"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "gpr_operand"  "r") (const_int 0)))
+   (set (match_operand:SI 0 "gpr_operand" "=r") (match_dup 1))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (and:SI (match_dup 1) (match_dup 1)) (const_int 0)))
+     (set (match_operand:SI 0 "gpr_operand" "=r")
+	  (and:SI (match_dup 1) (match_dup 1)))])]
+  ""
+  [(set_attr "type" "compare")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "gpr_operand")
+	  (logical_op:SI (match_operand:SI 1 "gpr_operand")
+			 (match_operand:SI 2 "gpr_operand")))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (and:SI (match_dup 0) (match_dup 0)) (const_int 0)))
+     (set (match_operand:SI 3 "gpr_operand")
+	  (and:SI (match_dup 0) (match_dup 0)))])]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (logical_op:SI (match_dup 1) (match_dup 2))
+		      (const_int 0)))
+     (set (match_dup 3) (logical_op:SI (match_dup 1) (match_dup 2)))])])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "gpr_operand")
+	  (logical_op:SI (match_operand:SI 1 "gpr_operand")
+			 (match_operand:SI 2 "gpr_operand")))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (and:SI (match_dup 0) (match_dup 0)) (const_int 0)))
+     (set (match_operand:SI 3 "gpr_operand")
+	  (and:SI (match_dup 0) (match_dup 0)))])]
+  "peep2_reg_dead_p (2, operands[3])"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (logical_op:SI (match_dup 1) (match_dup 2))
+		      (const_int 0)))
+     (set (match_dup 0) (logical_op:SI (match_dup 1) (match_dup 2)))])])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "gpr_operand")
+	  (logical_op:SI (match_operand:SI 1 "gpr_operand")
+			 (match_operand:SI 2 "gpr_operand")))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (match_dup 0) (const_int 0)))
+     (clobber (match_operand:SI 3 "gpr_operand"))])]
+  ""
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (logical_op:SI (match_dup 1) (match_dup 2))
+		      (const_int 0)))
+     (set (match_dup 0) (logical_op:SI (match_dup 1) (match_dup 2)))])])
+
+(define_expand "cstoresi4"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+          (match_operand:SI 1 "comparison_operator"))
+     (match_operand:SI 2 "" "")])
+   (set (match_dup 0) (match_operand:SI 3 "arith_operand" ""))
+   (set (match_operand:SI 0 "gpr_operand" "=r")
+	(if_then_else:SI (match_dup 4) (match_dup 5) (match_dup 0)))]
+  ""
+{
+  enum rtx_code o2_code = GET_CODE (operands[2]);
+  enum rtx_code cmp_code = GET_CODE (operands[1]);
+
+  if ((o2_code == AND || o2_code == IOR || o2_code == XOR)
+      && operands[3] == const0_rtx)
+    {
+      operands[2] = copy_rtx(operands[2]);
+      XEXP (operands[2], 0) = force_reg (SImode, XEXP (operands[2], 0));
+      XEXP (operands[2], 1) = force_reg (SImode, XEXP (operands[2], 1));
+    }
+  else
+    operands[2] = force_reg (SImode, operands[2]);
+  operands[1] = gen_rtx_COMPARE (CCmode, operands[2], operands[3]);
+  if (cmp_code != NE)
+    {
+      operands[2] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
+      operands[3] = const0_rtx;
+    }
+  else
+    {
+      if (operands[3] != const0_rtx)
+	operands[2] = gen_rtx_MINUS (SImode, operands[2], operands[3]);
+      operands[2] = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
+      operands[3] = operands[0];
+    }
+  operands[4] = gen_rtx_fmt_ee (cmp_code, SImode,
+				gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+  operands[5] = force_reg (SImode, GEN_INT (STORE_FLAG_VALUE));
+})
+
+
+; floating point comparisons
+
+(define_insn "*cmpsf_cc_insn"
+  [(match_parallel 3 "float_operation"
+     [(set (reg:CC_FP CCFP_REGNUM)
+	   (compare:CC_FP (match_operand:SF 0 "gpr_operand" "r")
+			  (match_operand:SF 1 "gpr_operand" "r")))
+      (clobber (match_scratch:SF 2 "=r"))])]
+  "!TARGET_SOFT_CMPSF"
+  "fsub %2,%0,%1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "round_unknown")])
+
+;; ??? do we have to relax the operand0 predicate to immediate_operand
+;; to allow the rtl loop optimizer to generate comparisons?  OTOH
+;; we want call_address_operand to enforce valid operands so that
+;; combine won't do silly things, allowing instruction scheduling to do
+;; a proper job.
+(define_insn "*cmpsf_eq"
+  [(set (reg:CC_FP_EQ CC_REGNUM) (compare:CC_FP_EQ (reg:SF 0) (reg:SF 1)))
+   (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+   (clobber (reg:SI GPR_IP))
+   (clobber (reg:SI GPR_LR))]
+  "TARGET_SOFT_CMPSF"
+  "%f0"
+  [(set_attr "type" "sfunc")])
+
+(define_insn "*cmpsf_gte"
+  [(set (reg:CC_FP_GTE CC_REGNUM) (compare:CC_FP_GTE (reg:SF 0) (reg:SF 1)))
+   (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+   (clobber (reg:SI GPR_IP))
+   (clobber (reg:SI GPR_LR))]
+  "TARGET_SOFT_CMPSF"
+  "%f0"
+  [(set_attr "type" "sfunc")])
+
+(define_insn "*cmpsf_ord"
+  [(set (reg:CC_FP_ORD CC_REGNUM) (compare:CC_FP_ORD (reg:SF 0) (reg:SF 1)))
+   (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+   (clobber (reg:SI GPR_IP))
+   (clobber (reg:SI GPR_16))
+   (clobber (reg:SI GPR_LR))]
+  ""
+  "%f0"
+  [(set_attr "type" "sfunc")])
+
+(define_insn "*cmpsf_uneq"
+  [(set (reg:CC_FP_UNEQ CC_REGNUM) (compare:CC_FP_UNEQ (reg:SF 0) (reg:SF 1)))
+   (use (match_operand:SI 0 "call_address_operand" "Csy,r"))
+   (clobber (reg:SI GPR_IP))
+   (clobber (reg:SI GPR_16))
+   (clobber (reg:SI GPR_LR))]
+  "TARGET_SOFT_CMPSF"
+  "%f0"
+  [(set_attr "type" "sfunc")])
+
+;; conditional moves
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:WMODE 0 "gpr_operand" "")
+	(if_then_else:WMODE (match_operand 1 "comparison_operator" "")
+			    (match_operand:WMODE 2 "gpr_operand" "")
+			    (match_operand:WMODE 3 "gpr_operand" "")))]
+  "TARGET_CMOVE"
+{
+  rtx cmp_op0 = XEXP (operands[1], 0);
+  rtx cmp_op1 = XEXP (operands[1], 1);
+  enum machine_mode cmp_in_mode;
+  enum rtx_code code = GET_CODE (operands[1]);
+
+  cmp_in_mode = GET_MODE (cmp_op0);
+  if (cmp_in_mode == VOIDmode)
+    cmp_in_mode = GET_MODE (cmp_op1);
+  if (cmp_in_mode == VOIDmode)
+    cmp_in_mode = SImode;
+  /* If the operands are a better match when reversed, swap them now.
+     This allows combine to see the proper comparison codes.  */
+  if (rtx_equal_p (operands[0], operands[2])
+      && !rtx_equal_p (operands[0], operands[3]))
+    {
+      rtx tmp = operands[2]; operands[2] = operands[3]; operands[3] = tmp;
+      code = (FLOAT_MODE_P (GET_MODE (cmp_op0))
+	      ? reverse_condition_maybe_unordered (code)
+	      : reverse_condition (code));
+    }
+
+  if (proper_comparison_operator (operands[1], VOIDmode))
+    operands[1] = gen_rtx_fmt_ee (code, cmp_in_mode, cmp_op0, cmp_op1);
+  else
+    {
+      if (!currently_expanding_to_rtl)
+	{
+	  /* ???  It would seem safest to FAIL here, but that would defeat
+	     the purpose of having an if-conversion pass; its logic currently
+	     assumes that the backend should be safe to insert condition code
+	     setting instructions, as the same condition codes were presumably
+	     set by the if-conversion input code.  */
+	}
+      /* What mode to give as first operand to gen_compare_reg here is
+	 debatable.  VOIDmode would be minimalist; telling gen_compare_reg
+	 to use the mode of CC_REGNUM (or putting it on the comparison
+	 operator afterwards) is also a logical choice.  OTOH, by using
+	 <MODE>mode, we have mode combine opportunities with flag setting
+	 operations - if we get some.  */
+      operands[1]
+	= gen_compare_reg (<MODE>mode, code, cmp_in_mode, cmp_op0, cmp_op1);
+      if (!operands[1])
+	FAIL;
+    }
+})
+
+(define_insn "*mov<mode>cc_insn"
+  [(set (match_operand:WMODE 0 "gpr_operand" "=r")
+	(if_then_else:WMODE (match_operator 3 "proper_comparison_operator"
+			      [(match_operand 4 "cc_operand") (const_int 0)])
+			    (match_operand:WMODE 1 "gpr_operand" "r")
+			    (match_operand:WMODE 2 "gpr_operand" "0")))]
+  "TARGET_CMOVE"
+  "mov%d3 %0,%1"
+  [(set_attr "type" "cmove")])
+
+(define_peephole2
+  [(parallel [(set (match_operand:WMODE 0 "gpr_operand" "")
+		   (match_operand:WMODE 1 "" ""))
+	      (clobber (match_operand 8 "cc_operand"))])
+   (match_operand 2 "" "")
+   (set (match_operand:WMODE2 3 "gpr_operand" "")
+	(match_operand:WMODE2 9 "gpr_operand" ""))
+   (set (match_dup 3)
+	(if_then_else:WMODE2 (match_operator 5 "proper_comparison_operator"
+			       [(match_operand 6 "cc_operand")
+				(match_operand 7 "const0_operand")])
+			     (match_operand:WMODE2 4 "nonmemory_operand" "")
+			     (match_dup 3)))]
+  "REGNO (operands[0]) == REGNO (operands[9])
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_set_p (operands[0], operands[2])
+   && !reg_set_p (operands[3], operands[2])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])"
+  [(parallel [(set (match_dup 10) (match_dup 1))
+	      (clobber (match_dup 8))])
+   (match_dup 2)
+   (set (match_dup 3)
+	(if_then_else:WMODE2 (match_dup 5) (match_dup 4) (match_dup 3)))]
+{
+  operands[10] = simplify_gen_subreg (<WMODE:MODE>mode, operands[3],
+				      <WMODE2:MODE>mode, 0);
+  replace_rtx (operands[2], operands[9], operands[3]);
+  replace_rtx (operands[2], operands[0], operands[10]);
+  gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[2]));
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand 6 "cc_operand") (match_operand 2 "" ""))
+	      (set (match_operand:WMODE 0 "gpr_operand" "")
+		   (match_operand:WMODE 1 "" ""))])
+   (set (match_operand:WMODE2 3 "gpr_operand" "")
+	(match_operand:WMODE2 4 "gpr_operand"))
+   (set (match_dup 3)
+	(if_then_else:WMODE2 (match_operator 5 "proper_comparison_operator"
+			       [(match_dup 6)
+			       (match_operand:WMODE 7 "const0_operand")])
+			    (match_operand:WMODE2 8 "gpr_operand")
+			    (match_dup 3)))]
+  "REGNO (operands[0]) == REGNO (operands[8])
+   && REVERSIBLE_CC_MODE (GET_MODE (operands[6]))
+   && peep2_reg_dead_p (3, operands[6])
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[4], operands[3])"
+  [(parallel [(set (match_dup 6) (match_dup 2))
+	      (set (match_dup 9) (match_dup 1))])
+   (set (match_dup 3)
+	(if_then_else:WMODE2 (match_dup 5) (match_dup 4) (match_dup 3)))]
+  "
+{
+  operands[5]
+    = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[5]),
+					 GET_MODE (operands[6])),
+		      GET_MODE (operands[5]), operands[6], operands[7]);
+  operands[9] = simplify_gen_subreg (<WMODE:MODE>mode, operands[3],
+				     <WMODE2:MODE>mode, 0);
+}")
+
+;; These control RTL generation for conditional jump insns
+
+;; To signal to can_compare_p that the cbranchs?4 patterns work,
+;; they must allow const0_rtx for both comparison operands
+(define_expand "cbranchsi4"
+  [(set (reg CC_REGNUM)
+	(compare (match_operand:SI 1 "add_operand" "")
+		 (match_operand:SI 2 "arith_operand" "")))
+   (set (pc)
+	(if_then_else
+	      (match_operator 0 "ordered_comparison_operator" [(reg CC_REGNUM)
+							       (const_int 0)])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+{
+  rtx cmp = gen_compare_reg (VOIDmode, GET_CODE (operands[0]), SImode,
+			     operands[1], operands[2]);
+  emit_jump_insn (gen_branch_insn (operands[3], cmp, XEXP (cmp, 0)));
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(set (reg CC_REGNUM)
+	(compare (match_operand:SF 1 "arith_operand" "")
+		 (match_operand:SF 2 "arith_operand" "")))
+   (set (pc)
+	(if_then_else
+	      (match_operator 0 "comparison_operator" [(reg CC_REGNUM)
+						       (const_int 0)])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+{
+  rtx cmp = gen_compare_reg (VOIDmode, GET_CODE (operands[0]), SFmode,
+			     operands[1], operands[2]);
+  emit_jump_insn (gen_branch_insn (operands[3], cmp, XEXP (cmp, 0)));
+  DONE;
+})
+
+;; Now match both normal and inverted jump.
+
+(define_insn "branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "proper_comparison_operator"
+				      [(match_operand 2 "cc_operand")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "b%d1 %l0"
+  [(set_attr "type" "branch")])
+
+(define_insn "*rev_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "proper_comparison_operator"
+				      [(reg CC_REGNUM) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "b%D1 %l0"
+  [(set_attr "type" "branch")])
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "b %l0"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "gpr_operand" "r"))]
+  ""
+  "jr %0"
+  [(set_attr "type" "uncond_branch")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "gpr_operand" ""))
+              (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  /* In PIC mode, the table entries are stored PC relative.
+     Convert the relative address to an absolute address.  */
+  if (flag_pic)
+    {
+      rtx op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+
+      operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
+					 op1, NULL_RTX, 0, OPTAB_DIRECT);
+    }
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:SI 0 "gpr_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jr %0;"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "*tablejump_hi_internal"
+  [(set (pc) (match_operand:HI 0 "gpr_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "optimize_size && TARGET_SMALL16"
+  "jr %0;"
+  [(set_attr "type" "uncond_branch")])
+
+
+(define_expand "call"
+  ;; operands[1] is stack_size_rtx
+  ;; operands[2] is next_arg_register
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	     (clobber (reg:SI GPR_LR))])]
+  ""
+{
+  bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[0]);
+
+  if (!call_operand (operands[1], VOIDmode))
+    operands[0]
+      = change_address (operands[0], VOIDmode,
+			copy_to_mode_reg (Pmode, XEXP (operands[0], 0)));
+  if (epiphany_uninterruptible_p (current_function_decl)
+      != target_uninterruptible)
+    {
+      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_call_insn
+	(gen_rtx_PARALLEL
+	  (VOIDmode,
+	   gen_rtvec (2, gen_rtx_CALL (VOIDmode, operands[0], operands[1]),
+			 gen_rtx_CLOBBER (VOIDmode,
+					  gen_rtx_REG (SImode, GPR_LR)))));
+      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      DONE;
+    }
+})
+
+(define_insn "*call_i"
+  [(match_parallel 2 "float_operation"
+     [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Csy,r"))
+	    (match_operand 1 "" ""))
+      (clobber (reg:SI GPR_LR))])]
+  ""
+  "%f0"
+  [(set_attr "type" "call")])
+
+(define_expand "sibcall"
+  ;; operands[1] is stack_size_rtx
+  ;; operands[2] is next_arg_register
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	     (return)])]
+  ""
+{
+  bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[0]);
+
+  if (!call_operand (operands[1], VOIDmode))
+    operands[0]
+      = change_address (operands[0], VOIDmode,
+			copy_to_mode_reg (Pmode, XEXP (operands[0], 0)));
+  if (epiphany_uninterruptible_p (current_function_decl)
+      != target_uninterruptible)
+    {
+      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_call_insn
+	(gen_rtx_PARALLEL
+	  (VOIDmode,
+	   gen_rtvec (2, gen_rtx_CALL (VOIDmode, operands[0], operands[1]),
+			 ret_rtx)));
+      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      DONE;
+    }
+})
+
+(define_insn "*sibcall_i"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Csy,Rsc"))
+	 (match_operand 1 "" ""))
+   (return)]
+  ""
+  "@
+   b %0
+   jr %0"
+  [(set_attr "type" "call")])
+
+(define_expand "call_value"
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  [(parallel [(set (match_operand 0 "gpr_operand" "=r")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	     (clobber (reg:SI GPR_LR))])]
+  ""
+{
+  bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[1]);
+
+  if (!call_operand (operands[1], VOIDmode))
+    operands[1]
+      = change_address (operands[1], VOIDmode,
+			copy_to_mode_reg (Pmode, XEXP (operands[1], 0)));
+  if (epiphany_uninterruptible_p (current_function_decl)
+      != target_uninterruptible)
+    {
+      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_call_insn
+	(gen_rtx_PARALLEL
+	  (VOIDmode,
+	   gen_rtvec (2, gen_rtx_SET
+			   (VOIDmode, operands[0],
+			    gen_rtx_CALL (VOIDmode, operands[1], operands[2])),
+			 gen_rtx_CLOBBER (VOIDmode,
+					  gen_rtx_REG (SImode, GPR_LR)))));
+      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      DONE;
+    }
+})
+
+(define_insn "*call_value_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand 0 "gpr_operand" "=r,r")
+	   (call (mem:SI (match_operand:SI 1 "call_address_operand" "Csy,r"))
+	         (match_operand 2 "" "")))
+      (clobber (reg:SI GPR_LR))])]
+  ""
+  "%f1"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_expand "sibcall_value"
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  [(parallel [(set (match_operand 0 "gpr_operand" "=r")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	     (return)])]
+  ""
+{
+  bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[1]);
+
+  if (!call_operand (operands[1], VOIDmode))
+    operands[1]
+      = change_address (operands[1], VOIDmode,
+			copy_to_mode_reg (Pmode, XEXP (operands[1], 0)));
+  if (epiphany_uninterruptible_p (current_function_decl)
+      != target_uninterruptible)
+    {
+      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_call_insn
+	(gen_rtx_PARALLEL
+	  (VOIDmode,
+	   gen_rtvec (2, gen_rtx_SET
+			   (VOIDmode, operands[0],
+			    gen_rtx_CALL (VOIDmode, operands[1], operands[2])),
+			 ret_rtx)));
+      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      DONE;
+    }
+})
+
+(define_insn "*sibcall_value_i"
+  [(set (match_operand 0 "gpr_operand" "=r,r")
+	(call (mem:SI (match_operand:SI 1 "call_address_operand" "Csy,Rsc"))
+	      (match_operand 2 "" "")))
+   (return)]
+  ""
+  "@
+   b %1
+   jr %1"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_expand "prologue"
+  [(pc)]
+  ""
+{
+  epiphany_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(pc)]
+  ""
+{
+  epiphany_expand_epilogue (0);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(pc)]
+  ""
+{
+  epiphany_expand_epilogue (1);
+  DONE;
+})
+
+; Since the demise of REG_N_SETS, it is no longer possible to find out
+; in the prologue / epilogue expanders how many times lr is set.
+; Using df_regs_ever_live_p to decide if lr needs saving means that
+; any explicit use of lr will cause it to be saved; hence we cannot
+; represent the blink use in return / sibcall instructions themselves, and
+; instead have to show it in EPILOGUE_USES.
+(define_insn "return_i"
+  [(return)]
+  "reload_completed"
+  "rts"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "return_internal_interrupt"
+  [(return)
+   (unspec_volatile [(const_int 0)] 1)]
+  ""
+  "rti"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "stack_adjust_add"
+  [(set (reg:SI GPR_SP)
+	(plus:SI (reg:SI GPR_SP) (match_operand:SI 0 "arith_operand" "rL")))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (reg:SI STATUS_REGNUM))
+   (clobber (match_operand:BLK 1 "memory_operand" "=m"))]
+  "reload_completed"
+  "add sp,sp,%0")
+
+(define_insn "stack_adjust_mov"
+  [(set (reg:SI GPR_SP) (reg:SI GPR_FP))
+   (clobber (match_operand:BLK 0 "memory_operand" "=m"))]
+  "reload_completed"
+  "mov sp,fp"
+  [(set_attr "type" "move")])
+
+(define_insn "stack_adjust_str"
+  [(set (match_operand 0 "stacktop_operand" "=m")
+	(match_operand 1 "any_gpr_operand" "r"))
+   (set (reg:SI GPR_SP)
+	(plus:SI (reg:SI GPR_SP) (match_operand:SI 2 "nonmemory_operand" "rn")))
+   (clobber (match_operand:BLK 3 "memory_operand" "=m"))]
+  "reload_completed"
+{
+  return (GET_MODE_SIZE (GET_MODE (operands[0])) <= 4
+	  ? \"str %1,%0,%C2\" : \"strd %1,%0,%X2\");
+}
+  [(set_attr "type" "store")])
+
+(define_insn "stack_adjust_ldr"
+  [(set (match_operand:SI 0 "gpr_operand" "=r")
+	(match_operand:SI 1 "stacktop_operand" "m"))
+   (set (reg:SI GPR_SP)
+	(plus:SI (reg:SI GPR_SP) (match_operand:SI 2 "nonmemory_operand" "rn")))
+   (clobber (match_operand:BLK 3 "memory_operand" "=m"))]
+  "reload_completed"
+  "ldr %0,%1,%C2"
+  [(set_attr "type" "load")])
+
+;; Define some fake vector operations so that the vectorizer is happy to use
+;; 64 bit loads/stores.
+(define_expand "vec_unpacks_lo_v4hi"
+  [(match_operand:V2SI 0 "gpr_operand")
+   (match_operand:V4HI 1 "gpr_operand")]
+  ""
+{
+  rtx in = simplify_gen_subreg (SImode, operands[1], V4HImode, 0);
+  rtx outl = simplify_gen_subreg (SImode, operands[0], V2SImode, 0);
+  rtx outh
+    = simplify_gen_subreg (SImode, operands[0], V2SImode, UNITS_PER_WORD);
+
+  if (reg_overlap_mentioned_p (outl, in))
+    in = copy_to_mode_reg (SImode, in);
+  emit_insn (gen_ashlsi3 (outl, in, GEN_INT (16)));
+  emit_insn (gen_ashrsi3 (outl, outl, GEN_INT (16)));
+  emit_insn (gen_ashrsi3 (outh, in, GEN_INT (16)));
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4hi"
+  [(match_operand:V2SI 0 "gpr_operand")
+   (match_operand:V4HI 1 "gpr_operand")]
+  ""
+{
+  rtx in = simplify_gen_subreg (SImode, operands[1], V4HImode, UNITS_PER_WORD);
+  rtx outl = simplify_gen_subreg (SImode, operands[0], V2SImode, 0);
+  rtx outh
+    = simplify_gen_subreg (SImode, operands[0], V2SImode, UNITS_PER_WORD);
+
+  if (reg_overlap_mentioned_p (outl, in))
+    in = copy_to_mode_reg (SImode, in);
+  emit_insn (gen_ashlsi3 (outl, in, GEN_INT (16)));
+  emit_insn (gen_ashrsi3 (outl, outl, GEN_INT (16)));
+  emit_insn (gen_ashrsi3 (outh, in, GEN_INT (16)));
+  DONE;
+})
+
+(define_code_iterator addsub [plus minus])
+
+(define_code_iterator alu_binop
+  [plus minus and ior xor])
+
+(define_code_attr insn_opname
+  [(plus "add") (minus "sub") (mult "mul") (div "div")
+   (and "and") (ior "ior") (xor "xor")])
+
+; The addsi3 / subsi3 do checks that we don't want when splitting V2SImode
+; operations into two SImode operations.
+(define_code_attr si_pattern_suffix
+  [(plus "_i") (minus "_i") (and "") (ior "") (xor "")])
+
+; You might think that this would work better as a define_expand, but
+; again lower_subreg pessimizes the code if it sees indiviudual operations.
+; We need to keep inputs and outputs as register pairs if we want to
+; get sensible register allocation for double-word load and store operations.
+(define_insn_and_split "<insn_opname>v2si3"
+  [(set (match_operand:V2SI 0 "gpr_operand" "=r")
+	(alu_binop:V2SI (match_operand:V2SI 1 "gpr_operand" "r")
+			(match_operand:V2SI 2 "gpr_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)"
+  [(const_int 0)]
+{
+  rtx o0l, o0h, o1l, o1h, o2l, o2h;
+
+  o0l = simplify_gen_subreg (SImode, operands[0], V2SImode, 0);
+  o0h = simplify_gen_subreg (SImode, operands[0], V2SImode, UNITS_PER_WORD);
+  o1l = simplify_gen_subreg (SImode, operands[1], V2SImode, 0);
+  o1h = simplify_gen_subreg (SImode, operands[1], V2SImode, UNITS_PER_WORD);
+  o2l = simplify_gen_subreg (SImode, operands[2], V2SImode, 0);
+  o2h = simplify_gen_subreg (SImode, operands[2], V2SImode, UNITS_PER_WORD);
+  if (reg_overlap_mentioned_p (o0l, o1h))
+    o1h = copy_to_mode_reg (SImode, o1h);
+  if (reg_overlap_mentioned_p (o0l, o2h))
+    o2h = copy_to_mode_reg (SImode, o2h);
+  emit_insn (gen_<insn_opname>si3<si_pattern_suffix> (o0l, o1l, o2l));
+  emit_insn (gen_<insn_opname>si3<si_pattern_suffix> (o0h, o1h, o2h));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_expand "<insn_opname>v2sf3"
+  [(parallel
+     [(set (match_operand:V2SF 0 "gpr_operand" "")
+	   (addsub:V2SF (match_operand:V2SF 1 "gpr_operand" "")
+			(match_operand:V2SF 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn_and_split "<insn_opname>v2sf3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:V2SF 0 "gpr_operand" "=r")
+	   (addsub:V2SF (match_operand:V2SF 1 "gpr_operand" "r")
+			(match_operand:V2SF 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)"
+  [(parallel
+     [(set (match_dup 4) (addsub:SF (match_dup 5) (match_dup 6)))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 10)
+      (match_dup 11)])
+   (parallel
+     [(set (match_dup 7) (addsub:SF (match_dup 8) (match_dup 9)))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 10)
+      (match_dup 11)])]
+{
+  operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0);
+  operands[5] = simplify_gen_subreg (SFmode, operands[1], V2SFmode, 0);
+  operands[6] = simplify_gen_subreg (SFmode, operands[2], V2SFmode, 0);
+  operands[7]
+    = simplify_gen_subreg (SFmode, operands[0], V2SFmode, UNITS_PER_WORD);
+  operands[8]
+    = simplify_gen_subreg (SFmode, operands[1], V2SFmode, UNITS_PER_WORD);
+  operands[9]
+    = simplify_gen_subreg (SFmode, operands[2], V2SFmode, UNITS_PER_WORD);
+  if (!reload_completed)
+    {
+      if (reg_overlap_mentioned_p (operands[4], operands[8]))
+	operands[8] = copy_to_mode_reg (SFmode, operands[8]);
+      if (reg_overlap_mentioned_p (operands[4], operands[9]))
+	operands[9] = copy_to_mode_reg (SFmode, operands[9]);
+      emit_insn (gen_<insn_opname>sf3 (operands[4], operands[5], operands[6]));
+      emit_insn (gen_<insn_opname>sf3 (operands[7], operands[8], operands[9]));
+      DONE;
+    }
+  gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[8]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[9]));
+  operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2);
+  operands[11] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "v2fp")])
+
+(define_expand "ashlv2si3"
+  [(parallel
+     [(set (match_operand:V2SI 0 "gpr_operand" "")
+	   (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "")
+			(match_operand:SI 2 "general_operand")))
+      (use (match_dup 3))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+{
+  if (const_int_operand (operands[2], VOIDmode))
+    operands[3]
+      = copy_to_mode_reg (SImode, GEN_INT (1 << INTVAL (operands[2])));
+  else
+    {
+      int o, i;
+      rtx xop[2], last_out = pc_rtx;
+
+      for (o = 0; o <= UNITS_PER_WORD; o += UNITS_PER_WORD)
+	{
+	  for (i = 0; i < 2; i++)
+	    {
+	      xop[i]
+		= (i == 2 ? operands[2]
+		   : simplify_gen_subreg (SImode, operands[i], V2SImode, o));
+	      gcc_assert (!reg_overlap_mentioned_p (last_out, xop[i])
+			  /* ??? reg_overlap_mentioned_p doesn't understand
+			     about multi-word SUBREGs.  */
+			  || (GET_CODE (last_out) == SUBREG
+			      && GET_CODE (xop[i]) == SUBREG
+			      && SUBREG_REG (last_out) == SUBREG_REG (xop[i])
+			      && ((SUBREG_BYTE (last_out) & -UNITS_PER_WORD)
+				  != (SUBREG_BYTE (xop[i]) & -UNITS_PER_WORD))));
+	    }
+	  emit_insn (gen_ashlsi3 (xop[0], xop[1], operands[2]));
+	  last_out = xop[0];
+	}
+      DONE;
+    }
+})
+
+(define_insn_and_split "*ashlv2si3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:V2SI 0 "gpr_operand" "=&r,*1*2")
+	   (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "r,r")
+			(match_operand 2 "const_int_operand" "n,n")))
+      (use (match_operand:SI 4 "gpr_operand" "r,r"))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+     [(set (match_dup 5) (mult:SI (match_dup 6) (match_dup 4)))
+	   (clobber (reg:CC_FP CCFP_REGNUM))
+	   (match_dup 9)
+	   (match_dup 10)])
+   (parallel
+     [(set (match_dup 7) (mult:SI (match_dup 8) (match_dup 4)))
+	   (clobber (reg:CC_FP CCFP_REGNUM))
+	   (match_dup 9)
+	   (match_dup 10)])]
+{
+  operands[5] = simplify_gen_subreg (SImode, operands[0], V2SImode, 0);
+  operands[6] = simplify_gen_subreg (SImode, operands[1], V2SImode, 0);
+  operands[7] = simplify_gen_subreg (SImode, operands[0],
+				     V2SImode, UNITS_PER_WORD);
+  operands[8] = simplify_gen_subreg (SImode, operands[1],
+				     V2SImode, UNITS_PER_WORD);
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[8]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[4]));
+  operands[9] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2);
+  operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1);
+  rtx insn
+    = (gen_rtx_PARALLEL
+	(VOIDmode,
+	 gen_rtvec
+	  (4,
+	   gen_rtx_SET (VOIDmode, operands[5],
+			gen_rtx_MULT (SImode, operands[6], operands[4])),
+	   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)),
+	   operands[9], operands[10])));
+  insn = emit_insn (insn);
+  add_reg_note (insn, REG_EQUAL,
+		gen_rtx_ASHIFT (SImode, operands[6], operands[2]));
+  insn
+    = (gen_rtx_PARALLEL
+	(VOIDmode,
+	 gen_rtvec
+	  (4,
+	   gen_rtx_SET (VOIDmode, operands[7],
+			gen_rtx_MULT (SImode, operands[8], operands[4])),
+	   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)),
+	   operands[9], operands[10])));
+  insn = emit_insn (insn);
+  add_reg_note (insn, REG_EQUAL,
+		gen_rtx_ASHIFT (SImode, operands[7], operands[2]));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "fp_int")])
+
+(define_expand "mul<mode>3"
+  [(parallel
+     [(set (match_operand:DWV2MODE 0 "gpr_operand" "")
+	   (mult:DWV2MODE (match_operand:DWV2MODE 1 "gpr_operand" "")
+			  (match_operand:DWV2MODE 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])])
+
+(define_insn_and_split "mul<mode>3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:DWV2MODE 0 "gpr_operand" "=r")
+	   (mult:DWV2MODE (match_operand:DWV2MODE 1 "gpr_operand" "r")
+			  (match_operand:DWV2MODE 2 "gpr_operand" "r")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)"
+  [(parallel
+     [(set (match_dup 4) (mult:<vmode_PART> (match_dup 5) (match_dup 6)))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 10)
+      (match_dup 11)])
+   (parallel
+     [(set (match_dup 7) (mult:<vmode_PART> (match_dup 8) (match_dup 9)))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 10)
+      (match_dup 11)])]
+{
+  operands[4]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[0], <MODE>mode, 0);
+  operands[5]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[1], <MODE>mode, 0);
+  operands[6]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[2], <MODE>mode, 0);
+  operands[7] = simplify_gen_subreg (<vmode_PART>mode, operands[0],
+				     <MODE>mode, UNITS_PER_WORD);
+  operands[8] = simplify_gen_subreg (<vmode_PART>mode, operands[1],
+				     <MODE>mode, UNITS_PER_WORD);
+  operands[9] = simplify_gen_subreg (<vmode_PART>mode, operands[2],
+				     <MODE>mode, UNITS_PER_WORD);
+  if (!reload_completed)
+    {
+      if (reg_overlap_mentioned_p (operands[4], operands[8]))
+	operands[8] = copy_to_mode_reg (<vmode_PART>mode, operands[8]);
+      if (reg_overlap_mentioned_p (operands[4], operands[9]))
+	operands[9] = copy_to_mode_reg (<vmode_PART>mode, operands[9]);
+      emit_insn (gen_mul<vmode_part>3 (operands[4], operands[5], operands[6]));
+      emit_insn (gen_mul<vmode_part>3 (operands[7], operands[8], operands[9]));
+      DONE;
+    }
+  gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[8]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[9]));
+  operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2);
+  operands[11] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vmode_fp_type>")])
+
+(define_insn_and_split "*fmadd<mode>_combine"
+  [(match_parallel 4 "float_operation"
+     [(set (match_operand:DWV2MODE 0 "gpr_operand" "=r")
+	   (plus:DWV2MODE (mult:<MODE>
+				(match_operand:<MODE> 1 "gpr_operand" "r")
+				(match_operand:<MODE> 2 "gpr_operand" "r"))
+			  (match_operand:<MODE> 3 "gpr_operand" "0")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "TARGET_FUSED_MADD || <MODE>mode == V2SImode"
+  "#"
+  "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)"
+  [(parallel
+     [(set (match_dup 5)
+	   (plus:<vmode_PART> (mult:<vmode_PART> (match_dup 6) (match_dup 7))
+			      (match_dup 8)))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 13)
+      (match_dup 14)])
+   (parallel
+     [(set (match_dup 9)
+	   (plus:<vmode_PART> (mult:<vmode_PART> (match_dup 10) (match_dup 11))
+			      (match_dup 12)))
+      (clobber (reg:CC_FP CCFP_REGNUM))
+      (match_dup 13)
+      (match_dup 14)])]
+{
+  operands[5]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[0], <MODE>mode, 0);
+  operands[6]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[1], <MODE>mode, 0);
+  operands[7]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[2], <MODE>mode, 0);
+  operands[8]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[3], <MODE>mode, 0);
+  operands[9] = simplify_gen_subreg (<vmode_PART>mode, operands[0],
+				     <MODE>mode, UNITS_PER_WORD);
+  operands[10] = simplify_gen_subreg (<vmode_PART>mode, operands[1],
+				      <MODE>mode, UNITS_PER_WORD);
+  operands[11] = simplify_gen_subreg (<vmode_PART>mode, operands[2],
+				      <MODE>mode, UNITS_PER_WORD);
+  operands[12] = simplify_gen_subreg (<vmode_PART>mode, operands[3],
+				      <MODE>mode, UNITS_PER_WORD);
+  if (!reload_completed)
+    {
+      if (reg_overlap_mentioned_p (operands[5], operands[10]))
+	operands[10] = copy_to_mode_reg (<vmode_PART>mode, operands[10]);
+      if (reg_overlap_mentioned_p (operands[5], operands[11]))
+	operands[11] = copy_to_mode_reg (<vmode_PART>mode, operands[11]);
+      if (reg_overlap_mentioned_p (operands[5], operands[12]))
+	operands[12] = copy_to_mode_reg (<vmode_PART>mode, operands[12]);
+      emit_insn (gen_madd<vmode_part> (operands[5], operands[6], operands[7],
+				       operands[8]));
+      emit_insn (gen_madd<vmode_part> (operands[9], operands[10], operands[11],
+				       operands[12]));
+      DONE;
+    }
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[10]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[11]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[12]));
+  operands[13] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2);
+  operands[14] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vmode_fp_type>")])
+
+(define_expand "vec_set<mode>"
+  [(match_operand:DWV2MODE 0 "register_operand")
+   (match_operand:<vmode_PART> 1 "register_operand")
+   (match_operand 2 "const_int_operand" "")]
+  ""
+{
+  operands[0]
+    = simplify_gen_subreg (<vmode_PART>mode, operands[0], <MODE>mode,
+			   UNITS_PER_WORD * INTVAL (operands[2]));
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:DWV2MODE 0 "nonimmediate_operand" "")
+       (match_operand:DWV2MODE 1 "general_operand" ""))]
+ ""
+{
+  rtx op00, op01, op10, op11;
+
+  op00 = simplify_gen_subreg (<vmode_PART>mode, operands[0], <MODE>mode, 0);
+  op01 = simplify_gen_subreg (<vmode_PART>mode, operands[0], <MODE>mode,
+			      UNITS_PER_WORD);
+  op10 = simplify_gen_subreg (<vmode_PART>mode, operands[1], <MODE>mode, 0);
+  op11 = simplify_gen_subreg (<vmode_PART>mode, operands[1], <MODE>mode,
+			      UNITS_PER_WORD);
+  emit_move_insn (op00, op10);
+  emit_move_insn (op01, op11);
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "flow")])
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.opt b/gcc-4.9/gcc/config/epiphany/epiphany.opt
new file mode 100644
index 000000000..2acff323c
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany.opt
@@ -0,0 +1,148 @@
+; Options for the Adapteva EPIPHANY port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Embecosm on behalf of Adapteva, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mhalf-reg-file
+Target Mask(HALF_REG_FILE)
+Don't use any of r32..r63.
+
+mprefer-short-insn-regs
+Target Mask(PREFER_SHORT_INSN_REGS)
+preferentially allocate registers that allow short instruction generation.
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(epiphany_branch_cost) Init(3)
+Set branch cost
+
+mcmove
+Target Mask(CMOVE)
+enable conditional move instruction usage.
+
+mnops=
+Target RejectNegative Joined UInteger Var(epiphany_n_nops) Init(0)
+set number of nops to emit before each insn pattern
+
+; Problems with using the flags from fsub for comparison are:
+; - Because of underflow (lack of subnormal numbers), different small numbers
+;   can compare as equal.
+; - the set of comparisons is limited, and reversing comparisons doesn't work
+;   in the presence of NaNs.
+; The latter problem might be tolerated with -ffinite-math-only , but nothing
+; in -funsafe-math-optimizations says different small numbers may be considered
+; equal.
+msoft-cmpsf
+Target Mask(SOFT_CMPSF)
+Use software floating point comparisons
+
+msplit-lohi
+Target Mask(SPLIT_LOHI)
+Enable split of 32 bit immediate loads into low / high part
+
+mpost-inc
+Target Mask(POST_INC)
+Enable use of POST_INC / POST_DEC
+
+mpost-modify
+Target Mask(POST_MODIFY)
+Enable use of POST_MODIFY
+
+mstack-offset=
+Target RejectNegative Joined UInteger Var(epiphany_stack_offset) Init(EPIPHANY_STACK_OFFSET)
+Set number of bytes on the stack preallocated for use by the callee.
+
+mround-nearest
+target Mask(ROUND_NEAREST)
+Assume round to nearest is selected for purposes of scheduling.
+
+mlong-calls
+Target Mask(LONG_CALLS)
+Generate call insns as indirect calls
+
+mshort-calls
+Target Mask(SHORT_CALLS)
+Generate call insns as direct calls
+
+msmall16
+Target Mask(SMALL16)
+Assume labels and symbols can be addressed using 16 bit absolute addresses.
+
+mfp-mode=
+Target RejectNegative Joined Var(epiphany_normal_fp_mode) Enum(attr_fp_mode) Init(FP_MODE_CALLER)
+
+; The values are from enum attr_fp_mode, but using that enum would bring
+; problems with enum forward declarations.
+Enum
+Name(attr_fp_mode) Type(int)
+
+EnumValue
+Enum(attr_fp_mode) String(caller) Value(FP_MODE_CALLER)
+
+EnumValue
+Enum(attr_fp_mode) String(round-nearest) Value(FP_MODE_ROUND_NEAREST)
+
+EnumValue
+Enum(attr_fp_mode) String(truncate) Value(FP_MODE_ROUND_TRUNC)
+
+EnumValue
+Enum(attr_fp_mode) String(int) Value(FP_MODE_INT)
+
+may-round-for-trunc
+Target Mask(MAY_ROUND_FOR_TRUNC)
+A floatig point to integer truncation may be replaced with rounding to save mode switching
+
+mvect-double
+Target Mask(VECT_DOUBLE)
+Vectorize for double-word operations.
+
+max-vect-align=
+Target RejectNegative Joined Var(epiphany_vect_align) Enum(vect_align) Init(8)
+
+Enum
+Name(vect_align) Type(int)
+
+EnumValue
+Enum(vect_align) String(4) Value(4)
+
+EnumValue
+Enum(vect_align) String(8) Value(8)
+
+msplit-vecmove-early
+Target Mask(SPLIT_VECMOVE_EARLY)
+Split unaligned 8 byte vector moves before post-modify address generation.
+
+mfp-iarith
+Target Mask(FP_IARITH)
+Use the floating point unit for integer add/subtract.
+
+m1reg-
+Target RejectNegative Joined Var(epiphany_m1reg) Enum(m1reg) Init(-1)
+Set register to hold -1.
+
+Enum
+Name(m1reg) Type(int)
+
+EnumValue
+Enum(m1reg) String(none) Value(-1)
+
+EnumValue
+Enum(m1reg) String(r43) Value(43)
+
+EnumValue
+Enum(m1reg) String(r63) Value(63)
diff --git a/gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h b/gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h
new file mode 100644
index 000000000..3dd89b0cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h
@@ -0,0 +1,27 @@
+/* Epiphany intrinsic functions
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define __builtin_epiphany_fmadd(a, b, c) __builtin_fmaf (b, c, a)
+#define __builtin_epiphany_fmsub(a, b, c) __builtin_fmaf (-(b), c, a)
diff --git a/gcc-4.9/gcc/config/epiphany/mode-switch-use.c b/gcc-4.9/gcc/config/epiphany/mode-switch-use.c
new file mode 100644
index 000000000..a0aa24925
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/mode-switch-use.c
@@ -0,0 +1,109 @@
+/* Insert USEs in instructions that require mode switching.
+   This should probably be merged into mode-switching.c .
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "function.h"
+#include "emit-rtl.h"
+#include "tree-pass.h"
+#include "insn-attr.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "tm_p.h"
+#include "df.h"
+
+#ifndef TARGET_INSERT_MODE_SWITCH_USE
+#define TARGET_INSERT_MODE_SWITCH_USE NULL
+#endif
+
+static unsigned int
+insert_uses (void)
+{
+  static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
+#define N_ENTITIES ARRAY_SIZE (num_modes)
+  int e;
+  void (*target_insert_mode_switch_use) (rtx insn, int, int)
+    = TARGET_INSERT_MODE_SWITCH_USE;
+
+  for (e = N_ENTITIES - 1; e >= 0; e--)
+    {
+      int no_mode = num_modes[e];
+      rtx insn;
+      int mode;
+
+      if (!OPTIMIZE_MODE_SWITCHING (e))
+	continue;
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+	  mode = MODE_NEEDED (e, insn);
+	  if (mode == no_mode)
+	    continue;
+	  if (target_insert_mode_switch_use)
+	    {
+	      target_insert_mode_switch_use (insn, e, mode);
+	      df_insn_rescan (insn);
+	    }
+	}
+    }
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_mode_switch_use =
+{
+  RTL_PASS, /* type */
+  "mode_switch_use", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_mode_switch_use : public rtl_opt_pass
+{
+public:
+  pass_mode_switch_use(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_mode_switch_use, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return insert_uses (); }
+
+}; // class pass_mode_switch_use
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_mode_switch_use (gcc::context *ctxt)
+{
+  return new pass_mode_switch_use (ctxt);
+}
diff --git a/gcc-4.9/gcc/config/epiphany/predicates.md b/gcc-4.9/gcc/config/epiphany/predicates.md
new file mode 100644
index 000000000..fb8fd88ba
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/predicates.md
@@ -0,0 +1,368 @@
+;; Predicate definitions for code generation on the EPIPHANY cpu.
+;; Copyright (C) 1994-2014 Free Software Foundation, Inc.
+;; Contributed by Embecosm on behalf of Adapteva, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Returns true iff OP is a symbol reference that is a valid operand
+;; in a jump or call instruction.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  if (GET_CODE (op) == SYMBOL_REF)
+    return (!epiphany_is_long_call_p (op)
+	    && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+  if (GET_CODE (op) == CONST)
+    {
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS || !symbolic_operand (XEXP (op, 0), mode))
+	return false;
+      /* The idea here is that a 'small' constant offset should be OK.
+	 What exactly is considered 'small' is a bit arbitrary.  */
+      return satisfies_constraint_L (XEXP (op, 1));
+    }
+  gcc_unreachable ();
+})
+
+;; Acceptable arguments to the call insn.
+
+(define_predicate "call_address_operand"
+  (ior (match_code "reg")
+       (match_operand 0 "symbolic_operand")))
+
+(define_predicate "call_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  return call_address_operand (op, mode);
+})
+
+;; general purpose register.
+(define_predicate "gpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (!register_operand (op, mode))
+    return 0;
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+  regno = REGNO (op);
+  return regno >= FIRST_PSEUDO_REGISTER || regno <= 63;
+})
+
+(define_special_predicate "any_gpr_operand"
+  (match_code "subreg,reg")
+{
+  return gpr_operand (op, mode);
+})
+
+;; register suitable for integer add / sub operations; besides general purpose
+;; registers we allow fake hard registers that are eliminated to a real
+;; hard register via an offset.
+(define_predicate "add_reg_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (!register_operand (op, mode))
+    return 0;
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || regno <= 63
+	  || regno == FRAME_POINTER_REGNUM
+	  || regno == ARG_POINTER_REGNUM);
+})
+
+;; Also allows suitable constants
+(define_predicate "add_operand"
+  (match_code "reg,subreg,const_int,symbol_ref,label_ref,const")
+{
+  if (GET_CODE (op) == REG || GET_CODE (op) == SUBREG)
+    return add_reg_operand (op, mode);
+  return satisfies_constraint_L (op) || satisfies_constraint_CnL (op);
+})
+
+;; Ordinary 3rd operand for arithmetic operations
+(define_predicate "arith_operand"
+  (match_code "reg,subreg,const_int,symbol_ref,label_ref,const")
+{
+  if (GET_CODE (op) == REG || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  return satisfies_constraint_L (op);
+})
+
+;; Constant integer 3rd operand for arithmetic operations
+(define_predicate "arith_int_operand"
+  (match_code "const_int,symbol_ref,label_ref,const")
+{
+  return satisfies_constraint_L (op);
+})
+
+;; Return true if OP is an acceptable argument for a single word move source.
+
+(define_predicate "move_src_operand"
+  (match_code
+   "symbol_ref,label_ref,const,const_int,const_double,reg,subreg,mem,unspec")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      return immediate_operand (op, mode);
+    case CONST_DOUBLE :
+      /* SImode constants should always fit into a CONST_INT.  Large
+	 unsigned 32-bit constants are represented as negative CONST_INTs.  */
+      gcc_assert (GET_MODE (op) != SImode);
+      /* We can handle 32-bit floating point constants.  */
+      if (mode == SFmode)
+	return GET_MODE (op) == SFmode;
+      return 0;
+    case REG :
+      return op != frame_pointer_rtx && register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    case UNSPEC:
+      return satisfies_constraint_Sra (op);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is an acceptable argument for a double word move source.
+
+(define_predicate "move_double_src_operand"
+  (match_code "reg,subreg,mem,const_int,const_double,const_vector")
+{
+  if (GET_CODE (op) == MEM && misaligned_operand (op, mode)
+      && !address_operand (plus_constant (Pmode, XEXP (op, 0), 4), SImode))
+    return 0;
+  return general_operand (op, mode);
+})
+
+;; Return true if OP is an acceptable argument for a move destination.
+
+(define_predicate "move_dest_operand"
+  (match_code "reg,subreg,mem")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	{
+	  return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+	}
+      else
+	{
+	  return register_operand (op, mode);
+	}
+    case MEM :
+      if (GET_MODE_SIZE (mode) == 8 && misaligned_operand (op, mode)
+	  && !address_operand (plus_constant (Pmode, XEXP (op, 0), 4), SImode))
+	return 0;
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+(define_special_predicate "stacktop_operand"
+  (match_code "mem")
+{
+  if (mode != VOIDmode && GET_MODE (op) != mode)
+    return false;
+  return rtx_equal_p (XEXP (op, 0), stack_pointer_rtx);
+})
+
+;; Return 1 if OP is a comparison operator valid for the mode of CC.
+;; This allows the use of MATCH_OPERATOR to recognize all the branch insns.
+;;
+;; Some insns only set a few bits in the condition code.  So only allow those
+;; comparisons that use the bits that are valid.
+
+(define_predicate "proper_comparison_operator"
+  (match_code "eq, ne, le, lt, ge, gt, leu, ltu, geu, gtu, unordered, ordered, uneq, unge, ungt, unle, unlt, ltgt")
+{
+  enum rtx_code code = GET_CODE (op);
+  rtx cc = XEXP (op, 0);
+
+  /* combine can try strange things.  */
+  if (!REG_P (cc))
+    return 0;
+  switch (GET_MODE (cc))
+    {
+    case CC_Zmode:
+    case CC_N_NEmode:
+    case CC_FP_EQmode:
+      return REGNO (cc) == CC_REGNUM && (code == EQ || code == NE);
+    case CC_C_LTUmode:
+      return REGNO (cc) == CC_REGNUM && (code == LTU || code == GEU);
+    case CC_C_GTUmode:
+      return REGNO (cc) == CC_REGNUM && (code == GTU || code == LEU);
+    case CC_FPmode:
+      return (REGNO (cc) == CCFP_REGNUM
+	      && (code == EQ || code == NE || code == LT || code == LE));
+    case CC_FP_GTEmode:
+      return (REGNO (cc) == CC_REGNUM
+	      && (code == EQ || code == NE || code == GT || code == GE
+		  || code == UNLE || code == UNLT));
+    case CC_FP_ORDmode:
+      return REGNO (cc) == CC_REGNUM && (code == ORDERED || code == UNORDERED);
+    case CC_FP_UNEQmode:
+      return REGNO (cc) == CC_REGNUM && (code == UNEQ || code == LTGT);
+    case CCmode:
+      return REGNO (cc) == CC_REGNUM;
+    /* From combiner.  */
+    case QImode: case SImode: case SFmode: case HImode:
+    /* From cse.c:dead_libcall_p.  */
+    case DFmode:
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_predicate "addsub_operator"
+  (match_code "plus, minus"))
+
+(define_predicate "cc_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CC_REGNUM || REGNO (op) == CCFP_REGNUM")))
+
+(define_predicate "const0_operand"
+  (match_code "const_int, const_double")
+{
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  return op == CONST0_RTX (mode);
+})
+
+(define_predicate "const_float_1_operand"
+  (match_code "const_double")
+{
+  return op == CONST1_RTX (mode);
+})
+
+(define_predicate "cc_move_operand"
+  (and (match_code "reg")
+       (ior (match_test "REGNO (op) == CC_REGNUM")
+	    (match_test "gpr_operand (op, mode)"))))
+
+(define_predicate "float_operation"
+  (match_code "parallel")
+{
+  /* Most patterns start out with one SET and one CLOBBER, and gain a USE
+     or two of FP_NEAREST_REGNUM / FP_TRUNCATE_REGNUM / FP_ANYFP_REGNUM
+     after mode switching.  The longer patterns are
+     all beyond length 4, and before mode switching, end with a
+     CLOBBER of CCFP_REGNUM.  */
+  int count = XVECLEN (op, 0);
+  bool inserted = MACHINE_FUNCTION (cfun)->control_use_inserted;
+  int i;
+
+  if (count == 2
+      /* Vector ashift has an extra use for the constant factor required to
+	 implement the shift as multiply.  */
+      || (count == 3 && GET_CODE (XVECEXP (op, 0, 0)) == SET
+	  && GET_CODE (XEXP (XVECEXP (op, 0, 0), 1)) == ASHIFT))
+    return !inserted;
+
+  /* combine / recog will pass any old garbage here before checking the
+     rest of the insn.  */
+  if (count <= 3)
+    return false;
+
+  i = 1;
+  if (count > 4)
+    for (i = 2; i < count; i++)
+      {
+	rtx x = XVECEXP (op, 0, i);
+
+	if (GET_CODE (x) == CLOBBER)
+	  {
+	    if (!REG_P (XEXP (x, 0)))
+	      return false;
+	    if (REGNO (XEXP (x, 0)) == CCFP_REGNUM)
+	      {
+		if (count == i + 1)
+		  return !inserted;
+		break;
+	    }
+	  /* Just an ordinary clobber, keep looking.  */
+	}
+      else if (GET_CODE (x) == USE
+	       || (GET_CODE (x) == SET && i == 2))
+	continue;
+      else
+	return false;
+    }
+  if (count != i + 3 || !inserted)
+    return false;
+  for (i = i+1; i < count; i++)
+    {
+      rtx x = XVECEXP (op, 0, i);
+
+      if (GET_CODE (x) != USE && GET_CODE (x) != CLOBBER)
+	return false;
+      x = XEXP (x, 0);
+      if (!REG_P (x)
+	  || (REGNO (x) != FP_NEAREST_REGNUM
+	      && REGNO (x) != FP_TRUNCATE_REGNUM
+	      && REGNO (x) != FP_ANYFP_REGNUM))
+	return false;
+    }
+  return true;
+})
+
+(define_predicate "set_fp_mode_operand"
+  (ior (match_test "gpr_operand (op, mode)")
+       (and (match_code "const")
+	    (match_test "satisfies_constraint_Cfm (op)"))))
+
+(define_predicate "post_modify_address"
+  (match_code "post_modify,post_inc,post_dec"))
+
+(define_predicate "post_modify_operand"
+  (and (match_code "mem")
+       (match_test "post_modify_address (XEXP (op, 0), Pmode)")))
+
+(define_predicate "nonsymbolic_immediate_operand"
+  (ior (match_test "immediate_operand (op, mode)")
+       (match_code "const_vector"))) /* Is this specific enough?  */
+
+;; Return true if OP is misaligned memory operand
+(define_predicate "misaligned_operand"
+  (and (match_code "mem")
+       (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
diff --git a/gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c b/gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c
new file mode 100644
index 000000000..16849182c
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c
@@ -0,0 +1,200 @@
+/* Mode switching cleanup pass for the EPIPHANY cpu.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "tm.h"
+#include "hard-reg-set.h"
+#include "tm_p.h"
+#include "vec.h"
+#include "sbitmap.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "function.h"
+#include "insn-attr-common.h"
+#include "tree-pass.h"
+
+/* Clean-up after mode switching:
+   Check for mode setting insns that have FP_MODE_ROUND_UNKNOWN.
+   If only one rounding mode is required, select that one.
+   Else we have to choose one to use in this mode setting insn and
+   insert new mode setting insns on the edges where the other mode
+   becomes unambigous.  */
+
+static bool
+gate_resolve_sw_modes (void)
+{
+  return optimize;
+}
+
+static unsigned
+resolve_sw_modes (void)
+{
+  basic_block bb;
+  rtx insn, src;
+  vec<basic_block> todo;
+  sbitmap pushed;
+  bool need_commit = false;
+  bool finalize_fp_sets = (MACHINE_FUNCTION (cfun)->unknown_mode_sets == 0);
+
+  todo.create (last_basic_block_for_fn (cfun));
+  pushed = sbitmap_alloc (last_basic_block_for_fn (cfun));
+  bitmap_clear (pushed);
+  if (!finalize_fp_sets)
+    {
+      df_note_add_problem ();
+      df_analyze ();
+    }
+  FOR_EACH_BB_FN (bb, cfun)
+    FOR_BB_INSNS (bb, insn)
+      {
+	enum attr_fp_mode selected_mode;
+
+	if (!NONJUMP_INSN_P (insn)
+	    || recog_memoized (insn) != CODE_FOR_set_fp_mode)
+	  continue;
+	src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+	if (finalize_fp_sets)
+	  {
+	    SET_SRC (XVECEXP (PATTERN (insn), 0, 2)) = copy_rtx (src);
+	    if (REG_P (src))
+	      df_insn_rescan (insn);
+	    continue;
+	  }
+	if (REG_P (src)
+	    || XINT (XVECEXP (XEXP (src, 0), 0, 0), 0) != FP_MODE_ROUND_UNKNOWN)
+	  continue;
+	if (find_regno_note (insn, REG_UNUSED, FP_TRUNCATE_REGNUM))
+	  selected_mode = FP_MODE_ROUND_NEAREST;
+	else if (find_regno_note (insn, REG_UNUSED, FP_NEAREST_REGNUM))
+	  selected_mode = FP_MODE_ROUND_TRUNC;
+	else
+	  {
+	    /* We could get more fancy in the selection of the mode by
+	       checking the total frequency of the affected edges.  */
+	    selected_mode = (enum attr_fp_mode) epiphany_normal_fp_rounding;
+
+	    todo.quick_push (bb);
+	    bitmap_set_bit (pushed, bb->index);
+	  }
+	XVECEXP (XEXP (src, 0), 0, 0) = GEN_INT (selected_mode);
+	SET_SRC (XVECEXP (PATTERN (insn), 0, 1)) = copy_rtx (src);
+	SET_SRC (XVECEXP (PATTERN (insn), 0, 2)) = copy_rtx (src);
+	df_insn_rescan (insn);
+      }
+  while (todo.length ())
+    {
+      basic_block bb = todo.pop ();
+      int selected_reg, jilted_reg;
+      enum attr_fp_mode jilted_mode;
+      edge e;
+      edge_iterator ei;
+
+      bitmap_set_bit (pushed, bb->index);
+      bitmap_set_bit (pushed, bb->index);
+
+      if (epiphany_normal_fp_rounding == FP_MODE_ROUND_NEAREST)
+	{
+	  selected_reg = FP_NEAREST_REGNUM;
+	  jilted_reg = FP_TRUNCATE_REGNUM;
+	  jilted_mode = FP_MODE_ROUND_TRUNC;
+	}
+      else
+	{
+	  selected_reg = FP_TRUNCATE_REGNUM;
+	  jilted_reg = FP_NEAREST_REGNUM;
+	  jilted_mode = FP_MODE_ROUND_NEAREST;
+	}
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  basic_block succ = e->dest;
+	  rtx seq;
+
+	  if (!REGNO_REG_SET_P (DF_LIVE_IN (succ), jilted_reg))
+	    continue;
+	  if (REGNO_REG_SET_P (DF_LIVE_IN (succ), selected_reg))
+	    {
+	      if (bitmap_bit_p (pushed, succ->index))
+		continue;
+	      todo.quick_push (succ);
+	      bitmap_set_bit (pushed, bb->index);
+	      continue;
+	    }
+	  start_sequence ();
+	  emit_set_fp_mode (EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN,
+			    jilted_mode, NULL);
+	  seq = get_insns ();
+	  end_sequence ();
+	  need_commit = true;
+	  insert_insn_on_edge (seq, e);
+	}
+    }
+  todo.release ();
+  sbitmap_free (pushed);
+  if (need_commit)
+    commit_edge_insertions ();
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_resolve_sw_modes =
+{
+  RTL_PASS, /* type */
+  "resolve_sw_modes", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MODE_SWITCH, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
+};
+
+class pass_resolve_sw_modes : public rtl_opt_pass
+{
+public:
+  pass_resolve_sw_modes(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_resolve_sw_modes, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_resolve_sw_modes (); }
+  unsigned int execute () { return resolve_sw_modes (); }
+
+}; // class pass_resolve_sw_modes
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_resolve_sw_modes (gcc::context *ctxt)
+{
+  return new pass_resolve_sw_modes (ctxt);
+}
diff --git a/gcc-4.9/gcc/config/epiphany/t-epiphany b/gcc-4.9/gcc/config/epiphany/t-epiphany
new file mode 100644
index 000000000..7a329dad7
--- /dev/null
+++ b/gcc-4.9/gcc/config/epiphany/t-epiphany
@@ -0,0 +1,38 @@
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+# Contributed by Embecosm on behalf of Adapteva, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+mode-switch-use.o : $(srcdir)/config/epiphany/mode-switch-use.c \
+   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) \
+   $(TREE_PASS_H) $(INSN_ATTR_H) $(EMIT_RTL_H) $(FUNCTION_H) $(RECOG_H) \
+   insn-config.h $(DF_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $<
+
+resolve-sw-modes.o : $(srcdir)/config/epiphany/resolve-sw-modes.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(MACHMODE_H) $(TM_H) hard-reg-set.h \
+  $(TM_P_H) $(VEC_H) sbitmap.h $(BASIC_BLOCK_H) $(DF_H) $(RTL_H) \
+  insn-config.h insn-codes.h $(EMIT_RTL_H) $(RECOG_H) $(FUNCTION_H) \
+  insn-attr-common.h $(TREE_PASS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $<
+
+SPECS = specs.install
+
+specs: specs.install
+	sed -e 's,epiphany_library_extra_spec,epiphany_library_stub_spec,' \
+	-e 's,epiphany_library_build_spec,epiphany_library_extra_spec,' \
+	  < specs.install > $@ ; \
diff --git a/gcc-4.9/gcc/config/flat.h b/gcc-4.9/gcc/config/flat.h
new file mode 100644
index 000000000..3af4e57e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/flat.h
@@ -0,0 +1,22 @@
+/* Defines to be used for targets that support flat executables.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This macro applies on top of OBJECT_FORMAT_ELF and indicates that
+   we want to support both flat and ELF output.  */
+#define OBJECT_FORMAT_FLAT
diff --git a/gcc-4.9/gcc/config/fr30/constraints.md b/gcc-4.9/gcc/config/fr30/constraints.md
new file mode 100644
index 000000000..dc8fa77d1
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/constraints.md
@@ -0,0 +1,71 @@
+;; Constraint definitions for the FR30.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "d" "MULTIPLY_64_REG"
+  "The MDH,MDL register pair as used by MUL and MULU.")
+
+(define_register_constraint "e" "MULTIPLY_32_REG"
+  "The MDL register as used by MULH and MULUH.")
+
+(define_register_constraint "h" "HIGH_REGS"
+  "Registers 8 through 15.")
+
+(define_register_constraint "l" "LOW_REGS"
+  "Registers 0 through 7.")
+
+(define_register_constraint "a" "ALL_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "An integer in the range 0 to 15."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")))
+
+(define_constraint "J"
+  "An integer in the range -16 to -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -16, -1)")))
+
+(define_constraint "K"
+  "An integer in the range 16 to 31."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 16, 31)")))
+
+(define_constraint "L"
+  "An integer in the range 0 to 255."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "M"
+  "An integer in the range 0 to 1048575."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 1048575)")))
+
+(define_constraint "P"
+  "An integer in the range -256 to 255."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -256, 255)")))
+
+;; Extra constraints.
+(define_constraint "Q"
+  "@internal"
+  (and (match_code "mem")
+       (match_code "symbol_ref" "0")))
diff --git a/gcc-4.9/gcc/config/fr30/fr30-protos.h b/gcc-4.9/gcc/config/fr30/fr30-protos.h
new file mode 100644
index 000000000..a2a7d793f
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/fr30-protos.h
@@ -0,0 +1,32 @@
+/* Prototypes for fr30.c functions used in the md file & elsewhere.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void  fr30_expand_prologue (void);
+extern void  fr30_expand_epilogue (void);
+extern unsigned int fr30_compute_frame_size (int, int);
+
+#ifdef RTX_CODE
+extern int   fr30_check_multiple_regs (rtx *, int, int);
+extern void  fr30_print_operand (FILE *, rtx, int);
+extern void  fr30_print_operand_address (FILE *, rtx);
+extern rtx   fr30_move_double (rtx *);
+#ifdef HAVE_MACHINE_MODES
+extern int   fr30_const_double_is_zero (rtx);
+#endif /* HAVE_MACHINE_MODES */
+#endif /* RTX_CODE */
diff --git a/gcc-4.9/gcc/config/fr30/fr30.c b/gcc-4.9/gcc/config/fr30/fr30.c
new file mode 100644
index 000000000..65084f69c
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/fr30.c
@@ -0,0 +1,1062 @@
+/* FR30 specific functions.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Solutions.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/*{{{  Includes */ 
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "output.h"
+#include "expr.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "df.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+
+/*}}}*/
+/*{{{  Function Prologues & Epilogues */ 
+
+/* The FR30 stack looks like this:
+
+             Before call                       After call
+   FP ->|                       |       |                       |
+        +-----------------------+       +-----------------------+       high 
+        |                       |       |                       |       memory
+        |  local variables,     |       |  local variables,     |
+        |  reg save area, etc.  |       |  reg save area, etc.  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        | args to the func that |       |  args to this func.   |
+        | is being called that  |       |                       |
+   SP ->| do not fit in regs    |       |                       |
+        +-----------------------+       +-----------------------+
+                                        |  args that used to be |  \
+                                        | in regs; only created |   |  pretend_size 
+                                   AP-> |   for vararg funcs    |  /  
+                                        +-----------------------+    
+                                        |                       |  \  
+                                        |  register save area   |   |
+                                        |                       |   |
+					+-----------------------+   |  reg_size
+                                        |    return address     |   | 
+					+-----------------------+   |
+                                   FP ->|   previous frame ptr  |  /
+                                        +-----------------------+    
+                                        |                       |  \   
+                                        |  local variables      |   |  var_size 
+                                        |                       |  /  
+                                        +-----------------------+    
+                                        |                       |  \       
+     low                                |  room for args to     |   |
+     memory                             |  other funcs called   |   |  args_size     
+                                        |  from this one        |   |
+                                   SP ->|                       |  /  
+                                        +-----------------------+    
+   
+   Note, AP is a fake hard register.  It will be eliminated in favor of
+   SP or FP as appropriate.
+
+   Note, Some or all of the stack sections above may be omitted if they 
+   are not needed.  */
+
+/* Structure to be filled in by fr30_compute_frame_size() with register
+   save masks, and offsets for the current function.  */
+struct fr30_frame_info
+{
+  unsigned int total_size;	/* # Bytes that the entire frame takes up.  */
+  unsigned int pretend_size;	/* # Bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # Bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # Bytes needed to store regs.  */
+  unsigned int var_size;	/* # Bytes that variables take up.  */
+  unsigned int frame_size;      /* # Bytes in current frame.  */
+  unsigned int gmask;		/* Mask of saved registers.  */
+  unsigned int save_fp;		/* Nonzero if frame pointer must be saved.  */
+  unsigned int save_rp;		/* Nonzero if return pointer must be saved.  */
+  int          initialised;	/* Nonzero if frame size already calculated.  */
+};
+
+/* Current frame information calculated by fr30_compute_frame_size().  */
+static struct fr30_frame_info 	current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+static struct fr30_frame_info 	zero_frame_info;
+
+static void fr30_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+					 tree, int *, int);
+static bool fr30_must_pass_in_stack (enum machine_mode, const_tree);
+static int fr30_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				   tree, bool);
+static rtx fr30_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static void fr30_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static bool fr30_frame_pointer_required (void);
+static rtx fr30_function_value (const_tree, const_tree, bool);
+static rtx fr30_libcall_value (enum machine_mode, const_rtx);
+static bool fr30_function_value_regno_p (const unsigned int);
+static bool fr30_can_eliminate (const int, const int);
+static void fr30_asm_trampoline_template (FILE *);
+static void fr30_trampoline_init (rtx, tree, rtx);
+static int fr30_num_arg_regs (enum machine_mode, const_tree);
+
+#define FRAME_POINTER_MASK 	(1 << (FRAME_POINTER_REGNUM))
+#define RETURN_POINTER_MASK 	(1 << (RETURN_POINTER_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.  */
+#define MUST_SAVE_REGISTER(regno)      \
+  (   (regno) != RETURN_POINTER_REGNUM \
+   && (regno) != FRAME_POINTER_REGNUM  \
+   && df_regs_ever_live_p (regno)      \
+   && ! call_used_regs [regno]         )
+
+#define MUST_SAVE_FRAME_POINTER	 (df_regs_ever_live_p (FRAME_POINTER_REGNUM)  || frame_pointer_needed)
+#define MUST_SAVE_RETURN_POINTER (df_regs_ever_live_p (RETURN_POINTER_REGNUM) || crtl->profile)
+
+#if UNITS_PER_WORD == 4
+#define WORD_ALIGN(SIZE) (((SIZE) + 3) & ~3)
+#endif
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES fr30_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG fr30_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE fr30_function_arg_advance
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE fr30_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE fr30_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P fr30_function_value_regno_p
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS fr30_setup_incoming_varargs
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK fr30_must_pass_in_stack
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED fr30_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE fr30_can_eliminate
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE fr30_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT fr30_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+fr30_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == FRAME_POINTER_REGNUM || ! frame_pointer_needed);
+}
+
+/* Returns the number of bytes offset between FROM_REG and TO_REG
+   for the current function.  As a side effect it fills in the 
+   current_frame_info structure, if the data is available.  */
+unsigned int
+fr30_compute_frame_size (int from_reg, int to_reg)
+{
+  int 		regno;
+  unsigned int 	return_value;
+  unsigned int	var_size;
+  unsigned int	args_size;
+  unsigned int	pretend_size;
+  unsigned int 	reg_size;
+  unsigned int 	gmask;
+
+  var_size	= WORD_ALIGN (get_frame_size ());
+  args_size	= WORD_ALIGN (crtl->outgoing_args_size);
+  pretend_size	= crtl->args.pretend_args_size;
+
+  reg_size	= 0;
+  gmask		= 0;
+
+  /* Calculate space needed for registers.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
+    {
+      if (MUST_SAVE_REGISTER (regno))
+	{
+	  reg_size += UNITS_PER_WORD;
+	  gmask |= 1 << regno;
+	}
+    }
+
+  current_frame_info.save_fp = MUST_SAVE_FRAME_POINTER;
+  current_frame_info.save_rp = MUST_SAVE_RETURN_POINTER;
+
+  reg_size += (current_frame_info.save_fp + current_frame_info.save_rp)
+	       * UNITS_PER_WORD;
+
+  /* Save computed information.  */
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.var_size     = var_size;
+  current_frame_info.args_size    = args_size;
+  current_frame_info.reg_size	  = reg_size;
+  current_frame_info.frame_size   = args_size + var_size;
+  current_frame_info.total_size   = args_size + var_size + reg_size + pretend_size;
+  current_frame_info.gmask	  = gmask;
+  current_frame_info.initialised  = reload_completed;
+
+  /* Calculate the required distance.  */
+  return_value = 0;
+  
+  if (to_reg == STACK_POINTER_REGNUM)
+    return_value += args_size + var_size;
+  
+  if (from_reg == ARG_POINTER_REGNUM)
+    return_value += reg_size;
+
+  return return_value;
+}
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in output_function_prologue(), since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.  */
+
+void
+fr30_expand_prologue (void)
+{
+  int regno;
+  rtx insn;
+
+  if (! current_frame_info.initialised)
+    fr30_compute_frame_size (0, 0);
+
+  /* This cases shouldn't happen.  Catch it now.  */
+  gcc_assert (current_frame_info.total_size || !current_frame_info.gmask);
+
+  /* Allocate space for register arguments if this is a variadic function.  */
+  if (current_frame_info.pretend_size)
+    {
+      int regs_to_save = current_frame_info.pretend_size / UNITS_PER_WORD;
+      
+      /* Push argument registers into the pretend arg area.  */
+      for (regno = FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS; regno --, regs_to_save --;)
+        {
+	  insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (current_frame_info.gmask)
+    {
+      /* Save any needed call-saved regs.  */
+      for (regno = STACK_POINTER_REGNUM; regno--;)
+	{
+	  if ((current_frame_info.gmask & (1 << regno)) != 0)
+	    {
+	      insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+    }
+
+  /* Save return address if necessary.  */
+  if (current_frame_info.save_rp)
+    {
+      insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, 
+      						     RETURN_POINTER_REGNUM)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Save old frame pointer and create new one, if necessary.  */
+  if (current_frame_info.save_fp)
+    {
+      if (current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD))
+        {
+	  int enter_size = current_frame_info.frame_size + UNITS_PER_WORD;
+	  rtx pattern;
+	  
+	  insn = emit_insn (gen_enter_func (GEN_INT (enter_size)));
+          RTX_FRAME_RELATED_P (insn) = 1;
+	  
+	  pattern = PATTERN (insn);
+	  
+	  /* Also mark all 3 subexpressions as RTX_FRAME_RELATED_P. */
+          if (GET_CODE (pattern) == PARALLEL)
+            {
+              int x;
+              for (x = XVECLEN (pattern, 0); x--;)
+		{
+		  rtx part = XVECEXP (pattern, 0, x);
+		  
+		  /* One of the insns in the ENTER pattern updates the
+		     frame pointer.  If we do not actually need the frame
+		     pointer in this function then this is a side effect
+		     rather than a desired effect, so we do not mark that
+		     insn as being related to the frame set up.  Doing this
+		     allows us to compile the crash66.C test file in the
+		     G++ testsuite.  */
+		  if (! frame_pointer_needed
+		      && GET_CODE (part) == SET
+		      && SET_DEST (part) == hard_frame_pointer_rtx)
+		    RTX_FRAME_RELATED_P (part) = 0;
+		  else
+		    RTX_FRAME_RELATED_P (part) = 1;
+		}
+            }
+	}
+      else
+	{
+	  insn = emit_insn (gen_movsi_push (frame_pointer_rtx));
+          RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (frame_pointer_needed)
+	    {
+	      insn = emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+    }
+
+  /* Allocate the stack frame.  */
+  if (current_frame_info.frame_size == 0)
+    ; /* Nothing to do.  */
+  else if (current_frame_info.save_fp
+	   && current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD))
+    ; /* Nothing to do.  */
+  else if (current_frame_info.frame_size <= 512)
+    {
+      insn = emit_insn (gen_add_to_stack
+			 (GEN_INT (- (signed) current_frame_info.frame_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+      insn = emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+}
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using an epilogue insn is favored compared to putting all of the
+   instructions in output_function_epilogue(), since it allows the scheduler
+   to intermix instructions with the restores of the caller saved registers.
+   In some cases, it might be necessary to emit a barrier instruction as the
+   first insn to prevent such scheduling.  */
+void
+fr30_expand_epilogue (void)
+{
+  int regno;
+
+  /* Perform the inversion operations of the prologue.  */
+  gcc_assert (current_frame_info.initialised);
+  
+  /* Pop local variables and arguments off the stack.
+     If frame_pointer_needed is TRUE then the frame pointer register
+     has actually been used as a frame pointer, and we can recover
+     the stack pointer from it, otherwise we must unwind the stack
+     manually.  */
+  if (current_frame_info.frame_size > 0)
+    {
+      if (current_frame_info.save_fp && frame_pointer_needed)
+	{
+	  emit_insn (gen_leave_func ());
+	  current_frame_info.save_fp = 0;
+	}
+      else if (current_frame_info.frame_size <= 508)
+	emit_insn (gen_add_to_stack
+		   (GEN_INT (current_frame_info.frame_size)));
+      else
+	{
+	  rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+	  emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size)));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+	}
+    }
+  
+  if (current_frame_info.save_fp)
+    emit_insn (gen_movsi_pop (frame_pointer_rtx));
+  
+  /* Pop all the registers that were pushed.  */
+  if (current_frame_info.save_rp)
+    emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, RETURN_POINTER_REGNUM)));
+    
+  for (regno = 0; regno < STACK_POINTER_REGNUM; regno ++)
+    if (current_frame_info.gmask & (1 << regno))
+      emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno)));
+  
+  if (current_frame_info.pretend_size)
+    emit_insn (gen_add_to_stack (GEN_INT (current_frame_info.pretend_size)));
+
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+
+  emit_jump_insn (gen_return_from_func ());
+}
+
+/* Do any needed setup for a variadic function.  We must create a register
+   parameter block, and then copy any anonymous arguments, plus the last
+   named argument, from registers into memory.  * copying actually done in
+   fr30_expand_prologue().
+
+   ARG_REGS_USED_SO_FAR has *not* been updated for the last named argument
+   which has type TYPE and mode MODE, and we rely on this fact.  */
+void
+fr30_setup_incoming_varargs (cumulative_args_t arg_regs_used_so_far_v,
+			     enum machine_mode mode,
+			     tree type ATTRIBUTE_UNUSED,
+			     int *pretend_size,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *arg_regs_used_so_far
+    = get_cumulative_args (arg_regs_used_so_far_v);
+  int size;
+
+  /* All BLKmode values are passed by reference.  */
+  gcc_assert (mode != BLKmode);
+
+  /* ??? This run-time test as well as the code inside the if
+     statement is probably unnecessary.  */
+  if (targetm.calls.strict_argument_naming (arg_regs_used_so_far_v))
+    /* If TARGET_STRICT_ARGUMENT_NAMING returns true, then the last named
+       arg must not be treated as an anonymous arg.  */
+    /* ??? This is a pointer increment, which makes no sense.  */
+    arg_regs_used_so_far += fr30_num_arg_regs (mode, type);
+
+  size = FR30_NUM_ARG_REGS - (* arg_regs_used_so_far);
+
+  if (size <= 0)
+    return;
+
+  * pretend_size = (size * UNITS_PER_WORD);
+}
+
+/*}}}*/
+/*{{{  Printing operands */ 
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+void
+fr30_print_operand_address (FILE *stream, rtx address)
+{
+  switch (GET_CODE (address))
+    {
+    case SYMBOL_REF:
+      output_addr_const (stream, address);
+      break;
+      
+    default:
+      fprintf (stderr, "code = %x\n", GET_CODE (address));
+      debug_rtx (address);
+      output_operand_lossage ("fr30_print_operand_address: unhandled address");
+      break;
+    }
+}
+
+/* Print an operand.  */
+
+void
+fr30_print_operand (FILE *file, rtx x, int code)
+{
+  rtx x0;
+  
+  switch (code)
+    {
+    case '#':
+      /* Output a :D if this instruction is delayed.  */
+      if (dbr_sequence_length () != 0)
+	fputs (":D", file);
+      return;
+      
+    case 'p':
+      /* Compute the register name of the second register in a hi/lo
+	 register pair.  */
+      if (GET_CODE (x) != REG)
+	output_operand_lossage ("fr30_print_operand: unrecognized %%p code");
+      else
+	fprintf (file, "r%d", REGNO (x) + 1);
+      return;
+      
+    case 'b':
+      /* Convert GCC's comparison operators into FR30 comparison codes.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:  fprintf (file, "eq"); break;
+	case NE:  fprintf (file, "ne"); break;
+	case LT:  fprintf (file, "lt"); break;
+	case LE:  fprintf (file, "le"); break;
+	case GT:  fprintf (file, "gt"); break;
+	case GE:  fprintf (file, "ge"); break;
+	case LTU: fprintf (file, "c"); break;
+	case LEU: fprintf (file, "ls"); break;
+	case GTU: fprintf (file, "hi"); break;
+	case GEU: fprintf (file, "nc");  break;
+	default:
+	  output_operand_lossage ("fr30_print_operand: unrecognized %%b code");
+	  break;
+	}
+      return;
+      
+    case 'B':
+      /* Convert GCC's comparison operators into the complimentary FR30
+	 comparison codes.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:  fprintf (file, "ne"); break;
+	case NE:  fprintf (file, "eq"); break;
+	case LT:  fprintf (file, "ge"); break;
+	case LE:  fprintf (file, "gt"); break;
+	case GT:  fprintf (file, "le"); break;
+	case GE:  fprintf (file, "lt"); break;
+	case LTU: fprintf (file, "nc"); break;
+	case LEU: fprintf (file, "hi"); break;
+	case GTU: fprintf (file, "ls"); break;
+	case GEU: fprintf (file, "c"); break;
+	default:
+	  output_operand_lossage ("fr30_print_operand: unrecognized %%B code");
+	  break;
+	}
+      return;
+
+    case 'A':
+      /* Print a signed byte value as an unsigned value.  */
+      if (GET_CODE (x) != CONST_INT)
+	output_operand_lossage ("fr30_print_operand: invalid operand to %%A code");
+      else
+	{
+	  HOST_WIDE_INT val;
+	  
+	  val = INTVAL (x);
+
+	  val &= 0xff;
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+	}
+      return;
+      
+    case 'x':
+      if (GET_CODE (x) != CONST_INT
+	  || INTVAL (x) < 16
+	  || INTVAL (x) > 32)
+	output_operand_lossage ("fr30_print_operand: invalid %%x code");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) - 16);
+      return;
+
+    case 'F':
+      if (GET_CODE (x) != CONST_DOUBLE)
+	output_operand_lossage ("fr30_print_operand: invalid %%F code");
+      else
+	{
+	  char str[30];
+
+	  real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x),
+			   sizeof (str), 0, 1);
+	  fputs (str, file);
+	}
+      return;
+      
+    case 0:
+      /* Handled below.  */
+      break;
+      
+    default:
+      fprintf (stderr, "unknown code = %x\n", code);
+      output_operand_lossage ("fr30_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      x0 = XEXP (x,0);
+      
+      switch (GET_CODE (x0))
+	{
+	case REG:
+	  gcc_assert ((unsigned) REGNO (x0) < ARRAY_SIZE (reg_names));
+	  fprintf (file, "@%s", reg_names [REGNO (x0)]);
+	  break;
+
+	case PLUS:
+	  if (GET_CODE (XEXP (x0, 0)) != REG
+	      || REGNO (XEXP (x0, 0)) < FRAME_POINTER_REGNUM
+	      || REGNO (XEXP (x0, 0)) > STACK_POINTER_REGNUM
+	      || GET_CODE (XEXP (x0, 1)) != CONST_INT)
+	    {
+	      fprintf (stderr, "bad INDEXed address:");
+	      debug_rtx (x);
+	      output_operand_lossage ("fr30_print_operand: unhandled MEM");
+	    }
+	  else if (REGNO (XEXP (x0, 0)) == FRAME_POINTER_REGNUM)
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (x0, 1));
+	      if (val < -(1 << 9) || val > ((1 << 9) - 4))
+		{
+		  fprintf (stderr, "frame INDEX out of range:");
+		  debug_rtx (x);
+		  output_operand_lossage ("fr30_print_operand: unhandled MEM");
+		}
+	      fprintf (file, "@(r14, #" HOST_WIDE_INT_PRINT_DEC ")", val);
+	    }
+	  else
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (x0, 1));
+	      if (val < 0 || val > ((1 << 6) - 4))
+		{
+		  fprintf (stderr, "stack INDEX out of range:");
+		  debug_rtx (x);
+		  output_operand_lossage ("fr30_print_operand: unhandled MEM");
+		}
+	      fprintf (file, "@(r15, #" HOST_WIDE_INT_PRINT_DEC ")", val);
+	    }
+	  break;
+	  
+	case SYMBOL_REF:
+	  output_address (x0);
+	  break;
+	  
+	default:
+	  fprintf (stderr, "bad MEM code = %x\n", GET_CODE (x0));
+	  debug_rtx (x);
+	  output_operand_lossage ("fr30_print_operand: unhandled MEM");
+	  break;
+	}
+      break;
+      
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+
+      /* Fall through.  Let output_addr_const deal with it.  */
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+/*}}}*/
+
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+fr30_function_value (const_tree valtype,
+		     const_tree fntype_or_decli ATTRIBUTE_UNUSED,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM);
+}
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+fr30_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+fr30_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
+/*{{{  Function arguments */ 
+
+/* Return true if we should pass an argument on the stack rather than
+   in registers.  */
+
+static bool
+fr30_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode)
+    return true;
+  if (type == NULL)
+    return false;
+  return AGGREGATE_TYPE_P (type);
+}
+
+/* Compute the number of word sized registers needed to hold a
+   function argument of mode INT_MODE and tree type TYPE.  */
+static int
+fr30_num_arg_regs (enum machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  if (type && mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Returns the number of bytes in which *part* of a parameter of machine
+   mode MODE and tree type TYPE (which may be NULL if the type is not known).
+   If the argument fits entirely in the argument registers, or entirely on
+   the stack, then 0 is returned.
+   CUM is the number of argument registers already used by earlier
+   parameters to the function.  */
+
+static int
+fr30_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+			tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  /* Unnamed arguments, i.e. those that are prototyped as ...
+     are always passed on the stack.
+     Also check here to see if all the argument registers are full.  */
+  if (named == 0 || *cum >= FR30_NUM_ARG_REGS)
+    return 0;
+
+  /* Work out how many argument registers would be needed if this
+     parameter were to be passed entirely in registers.  If there
+     are sufficient argument registers available (or if no registers
+     are needed because the parameter must be passed on the stack)
+     then return zero, as this parameter does not require partial
+     register, partial stack stack space.  */
+  if (*cum + fr30_num_arg_regs (mode, type) <= FR30_NUM_ARG_REGS)
+    return 0;
+  
+  return (FR30_NUM_ARG_REGS - *cum) * UNITS_PER_WORD;
+}
+
+static rtx
+fr30_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (!named
+      || fr30_must_pass_in_stack (mode, type)
+      || *cum >= FR30_NUM_ARG_REGS)
+    return NULL_RTX;
+  else
+    return gen_rtx_REG (mode, *cum + FIRST_ARG_REGNUM);
+}
+
+/* A C statement (sans semicolon) to update the summarizer variable CUM to
+   advance past an argument in the argument list.  The values MODE, TYPE and
+   NAMED describe that argument.  Once this is done, the variable CUM is
+   suitable for analyzing the *following* argument with `FUNCTION_ARG', etc.
+
+   This macro need not do anything if the argument in question was passed on
+   the stack.  The compiler knows how to track the amount of stack space used
+   for arguments without any special help.  */
+static void
+fr30_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  *get_cumulative_args (cum) += named * fr30_num_arg_regs (mode, type);
+}
+
+/*}}}*/
+/*{{{  Operand predicates */ 
+
+#ifndef Mmode
+#define Mmode enum machine_mode
+#endif
+
+/* Returns true iff all the registers in the operands array
+   are in descending or ascending order.  */
+int
+fr30_check_multiple_regs (rtx *operands, int num_operands, int descending)
+{
+  if (descending)
+    {
+      unsigned int prev_regno = 0;
+      
+      while (num_operands --)
+	{
+	  if (GET_CODE (operands [num_operands]) != REG)
+	    return 0;
+	  
+	  if (REGNO (operands [num_operands]) < prev_regno)
+	    return 0;
+	  
+	  prev_regno = REGNO (operands [num_operands]);
+	}
+    }
+  else
+    {
+      unsigned int prev_regno = CONDITION_CODE_REGNUM;
+      
+      while (num_operands --)
+	{
+	  if (GET_CODE (operands [num_operands]) != REG)
+	    return 0;
+	  
+	  if (REGNO (operands [num_operands]) > prev_regno)
+	    return 0;
+	  
+	  prev_regno = REGNO (operands [num_operands]);
+	}
+    }
+
+  return 1;
+}
+
+int
+fr30_const_double_is_zero (rtx operand)
+{
+  REAL_VALUE_TYPE d;
+
+  if (operand == NULL || GET_CODE (operand) != CONST_DOUBLE)
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (d, operand);
+
+  return REAL_VALUES_EQUAL (d, dconst0);
+}
+
+/*}}}*/
+/*{{{  Instruction Output Routines  */
+
+/* Output a double word move.
+   It must be REG<-REG, REG<-MEM, MEM<-REG or REG<-CONST.
+   On the FR30 we are constrained by the fact that it does not
+   support offsetable addresses, and so we have to load the
+   address of the secnd word into the second destination register
+   before we can use it.  */
+
+rtx
+fr30_move_double (rtx * operands)
+{
+  rtx src  = operands[1];
+  rtx dest = operands[0];
+  enum rtx_code src_code = GET_CODE (src);
+  enum rtx_code dest_code = GET_CODE (dest);
+  enum machine_mode mode = GET_MODE (dest);
+  rtx val;
+
+  start_sequence ();
+
+  if (dest_code == REG)
+    {
+      if (src_code == REG)
+	{
+	  int reverse = (REGNO (dest) == REGNO (src) + 1);
+	  
+	  /* We normally copy the low-numbered register first.  However, if
+	     the first register of operand 0 is the same as the second register
+	     of operand 1, we must copy in the opposite order.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  operand_subword (src,  reverse, TRUE, mode)));
+	  
+	  emit_insn (gen_rtx_SET (VOIDmode,
+			      operand_subword (dest, !reverse, TRUE, mode),
+			      operand_subword (src,  !reverse, TRUE, mode)));
+	}
+      else if (src_code == MEM)
+	{
+	  rtx addr = XEXP (src, 0);
+	  rtx dest0 = operand_subword (dest, 0, TRUE, mode);
+	  rtx dest1 = operand_subword (dest, 1, TRUE, mode);
+	  rtx new_mem;
+	  
+	  gcc_assert (GET_CODE (addr) == REG);
+	  
+	  /* Copy the address before clobbering it.  See PR 34174.  */
+	  emit_insn (gen_rtx_SET (SImode, dest1, addr));
+	  emit_insn (gen_rtx_SET (VOIDmode, dest0,
+				  adjust_address (src, SImode, 0)));
+	  emit_insn (gen_rtx_SET (SImode, dest1,
+				  plus_constant (SImode, dest1,
+						 UNITS_PER_WORD)));
+
+	  new_mem = gen_rtx_MEM (SImode, dest1);
+	  MEM_COPY_ATTRIBUTES (new_mem, src);
+	      
+	  emit_insn (gen_rtx_SET (VOIDmode, dest1, new_mem));
+	}
+      else if (src_code == CONST_INT || src_code == CONST_DOUBLE)
+	{
+	  rtx words[2];
+	  split_double (src, &words[0], &words[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 0, TRUE, mode),
+				  words[0]));
+      
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 1, TRUE, mode),
+				  words[1]));
+	}
+    }
+  else if (src_code == REG && dest_code == MEM)
+    {
+      rtx addr = XEXP (dest, 0);
+      rtx src0;
+      rtx src1;
+
+      gcc_assert (GET_CODE (addr) == REG);
+
+      src0 = operand_subword (src, 0, TRUE, mode);
+      src1 = operand_subword (src, 1, TRUE, mode);
+
+      emit_move_insn (adjust_address (dest, SImode, 0), src0);
+
+      if (REGNO (addr) == STACK_POINTER_REGNUM
+	  || REGNO (addr) == FRAME_POINTER_REGNUM)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				adjust_address (dest, SImode, UNITS_PER_WORD),
+				src1));
+      else
+	{
+	  rtx new_mem;
+	  rtx scratch_reg_r0 = gen_rtx_REG (SImode, 0);
+
+	  /* We need a scratch register to hold the value of 'address + 4'.
+	     We use r0 for this purpose. It is used for example for long
+	     jumps and is already marked to not be used by normal register
+	     allocation.  */
+	  emit_insn (gen_movsi_internal (scratch_reg_r0, addr));
+	  emit_insn (gen_addsi_small_int (scratch_reg_r0, scratch_reg_r0,
+					  GEN_INT (UNITS_PER_WORD)));
+	  new_mem = gen_rtx_MEM (SImode, scratch_reg_r0);
+	  MEM_COPY_ATTRIBUTES (new_mem, dest);
+	  emit_move_insn (new_mem, src1);
+	  emit_insn (gen_blockage ());
+	}
+    }
+  else
+    /* This should have been prevented by the constraints on movdi_insn.  */
+    gcc_unreachable ();
+
+  val = get_insns ();
+  end_sequence ();
+
+  return val;
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+bool
+fr30_frame_pointer_required (void)
+{
+  return (flag_omit_frame_pointer == 0 || crtl->args.pretend_args_size > 0);
+}
+
+/*}}}*/
+/*{{{  Trampoline Output Routines  */
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
+   On the FR30, the trampoline is:
+
+   nop
+   ldi:32 STATIC, r12
+   nop
+   ldi:32 FUNCTION, r0
+   jmp    @r0
+
+   The no-ops are to guarantee that the static chain and final
+   target are 32 bit aligned within the trampoline.  That allows us to
+   initialize those locations with simple SImode stores.   The alternative
+   would be to use HImode stores.  */
+   
+static void
+fr30_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\tldi:32\t#0, %s\n", reg_names [STATIC_CHAIN_REGNUM]);
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\tldi:32\t#0, %s\n", reg_names [COMPILER_SCRATCH_REGISTER]);
+  fprintf (f, "\tjmp\t@%s\n", reg_names [COMPILER_SCRATCH_REGISTER]);
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+fr30_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, fnaddr);
+}
+
+/*}}}*/
+/* Local Variables: */
+/* folded-file: t   */
+/* End:		    */
diff --git a/gcc-4.9/gcc/config/fr30/fr30.h b/gcc-4.9/gcc/config/fr30/fr30.h
new file mode 100644
index 000000000..ff3115af6
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/fr30.h
@@ -0,0 +1,845 @@
+/*{{{  Comment.  */ 
+
+/* Definitions of FR30 target. 
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Solutions.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/*}}}*/ 
+/*{{{  Run-time target specifications.  */ 
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+/* Define this to be a string constant containing `-D' options to define the
+   predefined macros that identify this machine and system.  These macros will
+   be predefined unless the `-ansi' option is specified.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("fr30");		\
+      builtin_assert ("machine=fr30");		\
+    }						\
+   while (0)
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+/* Include the OS stub library, so that the code can be simulated.
+   This is not the right way to do this.  Ideally this kind of thing
+   should be done in the linker script - but I have not worked out how
+   to specify the location of a linker script in a gcc command line yet... */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC  "%{!mno-lsim:-lsim} crtend.o%s crtn.o%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-lc"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/*}}}*/ 
+/*{{{  Storage Layout.  */ 
+
+#define BITS_BIG_ENDIAN 1
+
+#define BYTES_BIG_ENDIAN 1
+
+#define WORDS_BIG_ENDIAN 1
+
+#define UNITS_PER_WORD 	4
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+	  && GET_MODE_SIZE (MODE) < 4)		\
+	(MODE) = SImode;			\
+    }						\
+  while (0)
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 32
+
+#define FUNCTION_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 32
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/*}}}*/ 
+/*{{{  Layout of Source Language Data Types.  */ 
+
+#define SHORT_TYPE_SIZE 	16
+#define INT_TYPE_SIZE 		32
+#define LONG_TYPE_SIZE 		32
+#define LONG_LONG_TYPE_SIZE 	64
+#define FLOAT_TYPE_SIZE 	32
+#define DOUBLE_TYPE_SIZE 	64
+#define LONG_DOUBLE_TYPE_SIZE 	64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/*}}}*/ 
+/*{{{  REGISTER BASICS.  */ 
+
+/* Number of hardware registers known to the compiler.  They receive numbers 0
+   through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number
+   really is assigned the number `FIRST_PSEUDO_REGISTER'.  */
+#define FIRST_PSEUDO_REGISTER	21
+
+/* Fixed register assignments: */
+
+/* Here we do a BAD THING - reserve a register for use by the machine
+   description file.  There are too many places in compiler where it
+   assumes that it can issue a branch or jump instruction without
+   providing a scratch register for it, and reload just cannot cope, so
+   we keep a register back for these situations.  */
+#define COMPILER_SCRATCH_REGISTER 0
+
+/* The register that contains the result of a function call.  */
+#define RETURN_VALUE_REGNUM	 4
+
+/* The first register that can contain the arguments to a function.  */
+#define FIRST_ARG_REGNUM	 4
+
+/* A call-used register that can be used during the function prologue.  */
+#define PROLOGUE_TMP_REGNUM	 COMPILER_SCRATCH_REGISTER
+     
+/* Register numbers used for passing a function's static chain pointer.  If
+   register windows are used, the register number as seen by the called
+   function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as
+   seen by the calling function is `STATIC_CHAIN_REGNUM'.  If these registers
+   are the same, `STATIC_CHAIN_INCOMING_REGNUM' need not be defined.
+
+   The static chain register need not be a fixed register.
+
+   If the static chain is passed in memory, these macros should not be defined;
+   instead, the next two macros should be defined.  */
+#define STATIC_CHAIN_REGNUM 	12
+/* #define STATIC_CHAIN_INCOMING_REGNUM */
+
+/* An FR30 specific hardware register.  */
+#define ACCUMULATOR_REGNUM	13
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+#define FRAME_POINTER_REGNUM	14
+     
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+#define STACK_POINTER_REGNUM	15
+
+/* The following a fake hard registers that describe some of the dedicated
+   registers on the FR30.  */
+#define CONDITION_CODE_REGNUM	16
+#define RETURN_POINTER_REGNUM	17
+#define MD_HIGH_REGNUM		18
+#define MD_LOW_REGNUM		19
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.  These would include the stack pointer, the frame pointer
+   (except on machines where that can be used as a general register when no
+   frame pointer is needed), the program counter on machines where that is
+   considered one of the addressable registers, and any other numbered register
+   with a standard use.
+
+   This information is expressed as a sequence of numbers, separated by commas
+   and surrounded by braces.  The Nth number is 1 if register N is fixed, 0
+   otherwise.
+
+   The table initialized from this macro, and the table initialized by the
+   following one, may be overridden at run time either automatically, by the
+   actions of the macro `TARGET_CONDITIONAL_REGISTER_USAGE', or by the user
+   with the command options `-ffixed-REG', `-fcall-used-REG' and
+   `-fcall-saved-REG'.  */
+#define FIXED_REGISTERS 			\
+  { 1, 0, 0, 0, 0, 0, 0, 0, 	/*  0 -  7 */ 	\
+    0, 0, 0, 0, 0, 0, 0, 1,	/*  8 - 15 */ 	\
+    1, 1, 1, 1, 1 }		/* 16 - 20 */
+
+/* XXX - MDL and MDH set as fixed for now - this is until I can get the
+   mul patterns working.  */
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
+   general) by function calls as well as for fixed registers.  This macro
+   therefore identifies the registers that are not available for general
+   allocation of values that must live across function calls.
+
+   If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically
+   saves it on function entry and restores it on function exit, if the register
+   is used within the function.  */
+#define CALL_USED_REGISTERS 			\
+  { 1, 1, 1, 1, 1, 1, 1, 1,	/*  0 -  7 */ 	\
+    0, 0, 0, 0, 1, 1, 0, 1,	/*  8 - 15 */ 	\
+    1, 1, 1, 1, 1 }		/* 16 - 20 */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  This is what translates register numbers
+   in the compiler into assembler language.  */
+#define REGISTER_NAMES 						\
+{   "r0", "r1", "r2",  "r3",  "r4",  "r5", "r6", "r7",	\
+    "r8", "r9", "r10", "r11", "r12", "ac", "fp", "sp",	\
+    "cc", "rp", "mdh", "mdl", "ap"			\
+}
+
+/* If defined, a C initializer for an array of structures containing a name and
+   a register number.  This macro defines additional names for hard registers,
+   thus allowing the `asm' option in declarations to refer to registers using
+   alternate names.  */
+#define ADDITIONAL_REGISTER_NAMES 				\
+{								\
+  {"r13", 13}, {"r14", 14}, {"r15", 15}, {"usp", 15}, {"ps", 16}\
+}
+
+/*}}}*/ 
+/*{{{  How Values Fit in Registers.  */ 
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) 			\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+/*}}}*/ 
+/*{{{  Register Classes.  */ 
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there are.
+
+   Each register class has a number, which is the value of casting the class
+   name to type `int'.  The number serves as an index in many of the tables
+   described below.  */
+enum reg_class
+{
+  NO_REGS,
+  MULTIPLY_32_REG,	/* the MDL register as used by the MULH, MULUH insns */
+  MULTIPLY_64_REG,	/* the MDH,MDL register pair as used by MUL and MULU */
+  LOW_REGS,		/* registers 0 through 7 */
+  HIGH_REGS,		/* registers 8 through 15 */
+  REAL_REGS,		/* i.e. all the general hardware registers on the FR30 */
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS 	REAL_REGS
+#define N_REG_CLASSES 	((int) LIM_REG_CLASSES)
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES \
+{			\
+  "NO_REGS",		\
+  "MULTIPLY_32_REG",	\
+  "MULTIPLY_64_REG",	\
+  "LOW_REGS", 		\
+  "HIGH_REGS", 		\
+  "REAL_REGS",		\
+  "ALL_REGS"		\
+ }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not suffice.
+   Then the integers are replaced by sub-initializers, braced groupings
+   containing several integers.  Each sub-initializer must be suitable as an
+   initializer for the type `HARD_REG_SET' which is defined in
+   `hard-reg-set.h'.  */
+#define REG_CLASS_CONTENTS 				\
+{ 							\
+  { 0 },						\
+  { 1 << MD_LOW_REGNUM },				\
+  { (1 << MD_LOW_REGNUM) | (1 << MD_HIGH_REGNUM) },	\
+  { (1 << 8) - 1 },					\
+  { ((1 << 8) - 1) << 8 },				\
+  { (1 << CONDITION_CODE_REGNUM) - 1 },			\
+  { (1 << FIRST_PSEUDO_REGISTER) - 1 }			\
+}
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+#define REGNO_REG_CLASS(REGNO) 			\
+  ( (REGNO) < 8 ? LOW_REGS			\
+  : (REGNO) < CONDITION_CODE_REGNUM ? HIGH_REGS	\
+  : (REGNO) == MD_LOW_REGNUM ? MULTIPLY_32_REG	\
+  : (REGNO) == MD_HIGH_REGNUM ? MULTIPLY_64_REG	\
+  : ALL_REGS)
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS 	REAL_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  */
+#define INDEX_REG_CLASS REAL_REGS
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.  */
+#define REGNO_OK_FOR_BASE_P(NUM) 1
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) 1
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+
+   This is closely related to the macro `HARD_REGNO_NREGS'.  In fact, the value
+   of the macro `CLASS_MAX_NREGS (CLASS, MODE)' should be the maximum value of
+   `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class CLASS.
+
+   This macro helps control the handling of multiple-word values in
+   the reload pass.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) HARD_REGNO_NREGS (0, MODE)
+
+/*}}}*/ 
+/*{{{  Basic Stack Layout.  */ 
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this to macro nonzero if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to be
+   allocated.
+
+   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the
+   first slot's length from `STARTING_FRAME_OFFSET'.  Otherwise, it is found by
+   adding the length of the first slot to the value `STARTING_FRAME_OFFSET'.  */
+/* #define STARTING_FRAME_OFFSET -4 */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   location at which outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET 0
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   argument's address.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.
+
+   You only need to define this macro if you want to support call frame
+   debugging information like that provided by DWARF 2.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM)
+
+/*}}}*/ 
+/*{{{  Register That Address the Stack Frame.  */ 
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  On some machines, this is the same as the frame
+   pointer register.  On some machines, the hardware determines which register
+   this is.  On other machines, you can choose any register you wish for this
+   purpose.  If this is not the same register as the frame pointer register,
+   then you must mark it as a fixed register according to `FIXED_REGISTERS', or
+   arrange to be able to eliminate it.  */
+#define ARG_POINTER_REGNUM 20
+
+/*}}}*/ 
+/*{{{  Eliminating the Frame Pointer and the Arg Pointer.  */ 
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  If it is not defined,
+   the only elimination attempted by the compiler is to replace references to
+   the frame pointer with references to the stack pointer.
+
+   The definition of this macro is a list of structure initializations, each of
+   which specifies an original and replacement register.
+
+   On some machines, the position of the argument pointer is not known until
+   the compilation is completed.  In such a case, a separate hard register must
+   be used for the argument pointer.  This register can be eliminated by
+   replacing it with either the frame pointer or the argument pointer,
+   depending on whether or not the frame pointer has been eliminated.
+
+   In this case, you might specify:
+        #define ELIMINABLE_REGS  \
+        {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+         {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+         {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+   Note that the elimination of the argument pointer with the stack pointer is
+   specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS				\
+{						\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}	\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+     (OFFSET) = fr30_compute_frame_size (FROM, TO)
+
+/*}}}*/ 
+/*{{{  Passing Function Arguments on the Stack.  */ 
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed onto the
+   stack for each call; instead, the function prologue should increase the
+   stack frame size by this amount.
+
+   Defining both `PUSH_ROUNDING' and `ACCUMULATE_OUTGOING_ARGS' is not
+   proper.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/*}}}*/ 
+/*{{{  Function Arguments in Registers.  */ 
+
+/* The number of register assigned to holding function arguments.  */
+     
+#define FR30_NUM_ARG_REGS	 4
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.
+
+   There is no need to record in `CUMULATIVE_ARGS' anything about the arguments
+   that have been passed on the stack.  The compiler has other variables to
+   keep track of that.  For target machines on which all arguments are passed
+   on the stack, there is no need to store anything in `CUMULATIVE_ARGS';
+   however, the data structure must exist and should not be empty, so use
+   `int'.  */
+/* On the FR30 this value is an accumulating count of the number of argument
+   registers that have been filled with argument values, as opposed to say,
+   the number of bytes of argument accumulated so far.  */
+#define CUMULATIVE_ARGS int
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  The variable has type
+   `CUMULATIVE_ARGS'.  The value of FNTYPE is the tree node for the data type
+   of the function which will receive the args, or 0 if the args are to a
+   compiler support library function.  The value of INDIRECT is nonzero when
+   processing an indirect call, for example a call through a function pointer.
+   The value of INDIRECT is zero for a call to an explicitly named function, a
+   library function call, or when `INIT_CUMULATIVE_ARGS' is used to find
+   arguments for the function being compiled.
+
+   When processing a call to a compiler support library function, LIBNAME
+   identifies which one.  It is a `symbol_ref' rtx which contains the name of
+   the function, as a string.  LIBNAME is 0 when an ordinary C function call is
+   being processed.  Thus, each time this macro is called, either LIBNAME or
+   FNTYPE is nonzero, but never both of them at once.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) < FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS))
+
+/*}}}*/ 
+/*{{{  How Large Values are Returned.  */ 
+
+/* Define this macro to be 1 if all structure and union return values must be
+   in memory.  Since this results in slower code, this should be defined only
+   if needed for compatibility with other compilers or with an ABI.  If you
+   define this macro to be 0, then the conventions used for structure and union
+   return values are decided by the `TARGET_RETURN_IN_MEMORY' macro.
+
+   If not defined, this defaults to the value 1.  */
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/*}}}*/ 
+/*{{{  Generating Code for Profiling.  */ 
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  Before calling, the assembler code
+   must load the address of a counter variable into a register where `mcount'
+   expects to find the address.  The name of this variable is `LP' followed by
+   the number LABELNO, so you would generate the name using `LP%d' in a
+   `fprintf'.
+
+   The details of how the address should be passed to `mcount' are determined
+   by your operating system environment, not by GCC.  To figure them out,
+   compile a small program for profiling using the system's installed C
+   compiler and look at the assembler code that results.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+{						\
+  fprintf (FILE, "\t mov rp, r1\n" );		\
+  fprintf (FILE, "\t ldi:32 mcount, r0\n" );	\
+  fprintf (FILE, "\t call @r0\n" );		\
+  fprintf (FILE, ".word\tLP%d\n", LABELNO);	\
+}
+
+/*}}}*/ 
+/*{{{  Trampolines for Nested Functions.  */ 
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+#define TRAMPOLINE_SIZE 18
+
+/* We want the trampoline to be aligned on a 32bit boundary so that we can
+   make sure the location of the static chain & target function within
+   the trampoline is also aligned on a 32bit boundary.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/*}}}*/ 
+/*{{{  Addressing Modes.  */ 
+
+/* A number, the maximum number of registers that can appear in a valid memory
+   address.  Note that it is up to you to specify a value equal to the maximum
+   number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C compound statement with a conditional `goto LABEL;' executed if X (an
+   RTX) is a legitimate memory address on the target machine for a memory
+   operand of mode MODE.  */
+
+/* On the FR30 we only have one real addressing mode - an address in a
+   register.  There are three special cases however:
+   
+   * indexed addressing using small positive offsets from the stack pointer
+   
+   * indexed addressing using small signed offsets from the frame pointer
+
+   * register plus register addressing using R13 as the base register.
+
+   At the moment we only support the first two of these special cases.  */
+   
+#ifdef REG_OK_STRICT
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)			\
+  do									\
+    {									\
+      if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+        goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 <<  6) - 4))		\
+	goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 <<  9) - 4))	\
+        goto LABEL;							\
+    }									\
+  while (0)
+#else
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)			\
+  do									\
+    {									\
+      if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+        goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 <<  6) - 4))		\
+	goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && (REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM		\
+	      || REGNO (XEXP (X, 0)) == ARG_POINTER_REGNUM)		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 <<  9) - 4))	\
+        goto LABEL;							\
+    }									\
+  while (0)
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as a base register.  For hard registers, it should always accept those
+   which the hardware permits and reject the others.  Whether the macro accepts
+   or rejects pseudo registers must be controlled by `REG_OK_STRICT' as
+   described above.  This usually requires two variant definitions, of which
+   `REG_OK_STRICT' controls the one actually used.  */
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) (((unsigned) REGNO (X)) <= STACK_POINTER_REGNUM)
+#else
+#define REG_OK_FOR_BASE_P(X) 1
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as an index register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+/*}}}*/ 
+/*{{{  Describing Relative Costs of Operations */ 
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory, i.e., if such access require more than one instruction or if
+   there is no difference in cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by finding
+   the smallest containing object; when it is defined, a fullword load will be
+   used if alignment permits.  Unless bytes accesses are faster than word
+   accesses, using word accesses is preferable since it may eliminate
+   subsequent memory access if subsequent accesses occur to other fields in the
+   same word of the structure, but to different bytes.  */
+#define SLOW_BYTE_ACCESS 1
+
+/*}}}*/ 
+/*{{{  Dividing the output into sections.  */ 
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  Normally `".text"' is
+   right.  */
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  Normally
+   `".data"' is right.  */
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+/*}}}*/ 
+/*{{{  The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  Normally this is `"#APP"', which is a comment
+   that has no effect on most assemblers but tells the GNU assembler that it
+   must check the lines that follow for all valid assembler constructs.  */
+#define ASM_APP_ON "#APP\n"
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  Normally this is `"#NO_APP"', which tells the
+   GNU assembler to resume making the time-saving assumptions that are valid
+   for ordinary compiler output.  */
+#define ASM_APP_OFF "#NO_APP\n"
+
+/*}}}*/ 
+/*{{{  Output and Generation of Labels.  */ 
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/*}}}*/ 
+/*{{{  Output of Assembler Instructions.  */ 
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand X.  X is an RTL expression.
+
+   CODE is a value that can be used to specify one of several ways of printing
+   the operand.  It is used when identical operands must be printed differently
+   depending on the context.  CODE comes from the `%' specification that was
+   used to request printing of the operand.  If the specification was just
+   `%DIGIT' then CODE is 0; if the specification was `%LTR DIGIT' then CODE is
+   the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.  The names
+   can be found in an array `reg_names' whose type is `char *[]'.  `reg_names'
+   is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%' followed by
+   a punctuation character), this macro is called with a null pointer for X and
+   the punctuation character for CODE.  */
+#define PRINT_OPERAND(STREAM, X, CODE)	fr30_print_operand (STREAM, X, CODE)
+
+/* A C expression which evaluates to true if CODE is a valid punctuation
+   character for use in the `PRINT_OPERAND' macro.  If
+   `PRINT_OPERAND_PUNCT_VALID_P' is not defined, it means that no punctuation
+   characters (except for the standard one, `%') are used in this way.  */
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#')
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand that is a memory reference whose address is X.  X
+   is an RTL expression.  */
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) fr30_print_operand_address (STREAM, X)
+
+#define REGISTER_PREFIX "%"
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX ""
+
+/*}}}*/ 
+/*{{{  Output of Dispatch Tables.  */ 
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a difference between two labels.
+   VALUE and REL are the numbers of two internal labels.  The definitions of
+   these labels are output using `(*targetm.asm_out.internal_label)', and they must be
+   printed in the same way here.  For example,
+
+        fprintf (STREAM, "\t.word L%d-L%d\n", VALUE, REL)  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are absolute.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a reference to a label.  VALUE
+   is the number of an internal label whose definition is output using
+   `(*targetm.asm_out.internal_label)'.  For example,
+
+        fprintf (STREAM, "\t.word L%d\n", VALUE)  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+fprintf (STREAM, "\t.word .L%d\n", VALUE)
+
+/*}}}*/ 
+/*{{{  Assembler Commands for Alignment.  */ 
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  POWER
+   will be a C expression of type `int'.  */
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+/*}}}*/ 
+/*{{{  Miscellaneous Parameters.  */ 
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+#define CASE_VECTOR_MODE SImode
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.
+
+   On many machines, this expression can be 1.
+
+   When `TRULY_NOOP_TRUNCATION' returns 1 for a pair of sizes for modes for
+   which `MODES_TIEABLE_P' is 0, suboptimal code can result.  If this is the
+   case, making `TRULY_NOOP_TRUNCATION' return 0 in such cases may improve
+   things.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* An alias for the machine mode for pointers.  On most machines, define this
+   to be the integer mode corresponding to the width of a hardware pointer;
+   `SImode' on 32-bit machine or `DImode' on 64-bit machines.  On some machines
+   you must define this to be one of the partial integer modes, such as
+   `PSImode'.
+
+   The width of `Pmode' must be at least as large as the value of
+   `POINTER_SIZE'.  If it is not equal, you must define the macro
+   `POINTERS_EXTEND_UNSIGNED' to specify how pointers are extended to `Pmode'.  */
+#define Pmode SImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  On most machines this should be
+   `QImode'.  */
+#define FUNCTION_MODE QImode
+
+/*}}}*/ 
+
+/* Local Variables: */
+/* folded-file: t   */
+/* End:		    */
diff --git a/gcc-4.9/gcc/config/fr30/fr30.md b/gcc-4.9/gcc/config/fr30/fr30.md
new file mode 100644
index 000000000..8bbd77c69
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/fr30.md
@@ -0,0 +1,1267 @@
+;; FR30 machine description.
+;; Copyright (C) 1998-2014 Free Software Foundation, Inc.
+;; Contributed by Cygnus Solutions.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;{{{ Attributes 
+
+(define_attr "length" "" (const_int 2))
+
+;; Used to distinguish between small memory model targets and big mode targets.
+
+(define_attr "size" "small,big"
+  (const (if_then_else (symbol_ref "TARGET_SMALL_MODEL")
+		       (const_string "small")
+		       (const_string "big"))))
+
+
+;; Define an attribute to be used by the delay slot code.
+;; An instruction by default is considered to be 'delayable'
+;; that is, it can be placed into a delay slot, but it is not
+;; itself a delayed branch type instruction.  An instruction
+;; whose type is 'delayed' is one which has a delay slot, and
+;; an instruction whose delay_type is 'other' is one which does
+;; not have a delay slot, nor can it be placed into a delay slot.
+
+(define_attr "delay_type" "delayable,delayed,other" (const_string "delayable"))
+
+;;}}} 
+;;{{{ Delay Slot Specifications 
+
+(define_delay (eq_attr "delay_type" "delayed")
+  [(and (eq_attr "delay_type" "delayable")
+	(eq_attr "length" "2"))
+   (nil)
+   (nil)]
+)
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;}}}
+;;{{{ Moves 
+
+;;{{{ Comment 
+
+;; Wrap moves in define_expand to prevent memory->memory moves from being
+;; generated at the RTL level, which generates better code for most machines
+;; which can't do mem->mem moves.
+
+;; If operand 0 is a `subreg' with mode M of a register whose own mode is wider
+;; than M, the effect of this instruction is to store the specified value in
+;; the part of the register that corresponds to mode M.  The effect on the rest
+;; of the register is undefined.
+
+;; This class of patterns is special in several ways.  First of all, each of
+;; these names *must* be defined, because there is no other way to copy a datum
+;; from one place to another.
+
+;; Second, these patterns are not used solely in the RTL generation pass.  Even
+;; the reload pass can generate move insns to copy values from stack slots into
+;; temporary registers.  When it does so, one of the operands is a hard
+;; register and the other is an operand that can need to be reloaded into a
+;; register.
+
+;; Therefore, when given such a pair of operands, the pattern must
+;; generate RTL which needs no reloading and needs no temporary
+;; registers--no registers other than the operands.  For example, if
+;; you support the pattern with a `define_expand', then in such a
+;; case the `define_expand' mustn't call `force_reg' or any other such
+;; function which might generate new pseudo registers.
+
+;; This requirement exists even for subword modes on a RISC machine
+;; where fetching those modes from memory normally requires several
+;; insns and some temporary registers.  Look in `spur.md' to see how
+;; the requirement can be satisfied.
+
+;; During reload a memory reference with an invalid address may be passed as an
+;; operand.  Such an address will be replaced with a valid address later in the
+;; reload pass.  In this case, nothing may be done with the address except to
+;; use it as it stands.  If it is copied, it will not be replaced with a valid
+;; address.  No attempt should be made to make such an address into a valid
+;; address and no routine (such as `change_address') that will do so may be
+;; called.  Note that `general_operand' will fail when applied to such an
+;; address.
+;;
+;; The global variable `reload_in_progress' (which must be explicitly declared
+;; if required) can be used to determine whether such special handling is
+;; required.
+;;
+;; The variety of operands that have reloads depends on the rest of
+;; the machine description, but typically on a RISC machine these can
+;; only be pseudo registers that did not get hard registers, while on
+;; other machines explicit memory references will get optional
+;; reloads.
+;;
+;; If a scratch register is required to move an object to or from memory, it
+;; can be allocated using `gen_reg_rtx' prior to reload.  But this is
+;; impossible during and after reload.  If there are cases needing scratch
+;; registers after reload, you must define `SECONDARY_INPUT_RELOAD_CLASS' and
+;; perhaps also `SECONDARY_OUTPUT_RELOAD_CLASS' to detect them, and provide
+;; patterns `reload_inM' or `reload_outM' to handle them.
+
+;; The constraints on a `moveM' must permit moving any hard register to any
+;; other hard register provided that `HARD_REGNO_MODE_OK' permits mode M in
+;; both registers and `REGISTER_MOVE_COST' applied to their classes returns a
+;; value of 2.
+
+;; It is obligatory to support floating point `moveM' instructions
+;; into and out of any registers that can hold fixed point values,
+;; because unions and structures (which have modes `SImode' or
+;; `DImode') can be in those registers and they may have floating
+;; point members.
+
+;; There may also be a need to support fixed point `moveM' instructions in and
+;; out of floating point registers.  Unfortunately, I have forgotten why this
+;; was so, and I don't know whether it is still true.  If `HARD_REGNO_MODE_OK'
+;; rejects fixed point values in floating point registers, then the constraints
+;; of the fixed point `moveM' instructions must be designed to avoid ever
+;; trying to reload into a floating point register.
+
+;;}}}
+;;{{{ Push and Pop  
+
+;; Push a register onto the stack
+(define_insn "movsi_push"
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "register_operand" "a"))]
+  ""
+  "st	%0, @-r15"
+)
+
+;; Pop a register off the stack
+(define_insn "movsi_pop"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  ""
+  "ld	@r15+, %0"
+)
+
+;;}}}
+;;{{{ 1 Byte Moves 
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+         || immediate_operand (operands[1], QImode)))
+    operands[1] = copy_to_mode_reg (QImode, operands[1]);
+}")
+
+(define_insn "movqi_unsigned_register_load"
+  [(set (match_operand:SI 0 "register_operand"              "=r")
+	(zero_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
+  ""
+  "ldub	%1, %0"
+)
+
+(define_expand "movqi_signed_register_load"
+  [(set (match_operand:SI 0 "register_operand"               "")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "")))]
+  ""
+  "
+  emit_insn (gen_movqi_unsigned_register_load (operands[0], operands[1]));
+  emit_insn (gen_extendqisi2 (operands[0], operands[0]));
+  DONE;
+  "
+)
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,red,m,r")
+	(match_operand:QI 1 "general_operand"       "i,red,r,rm"))]
+  ""
+  "@
+   ldi:8\\t#%A1, %0
+   mov  \\t%1, %0
+   stb  \\t%1, %0
+   ldub \\t%1, %0"
+)
+
+;;}}}
+;;{{{ 2 Byte Moves 
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+	 || immediate_operand (operands[1], HImode)))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+}")
+
+(define_insn "movhi_unsigned_register_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
+  ""
+  "lduh	%1, %0"
+)
+
+(define_expand "movhi_signed_register_load"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "")))]
+  ""
+  "
+  emit_insn (gen_movhi_unsigned_register_load (operands[0], operands[1]));
+  emit_insn (gen_extendhisi2 (operands[0], operands[0]));
+  DONE;
+  "
+)
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,red,m,r")
+	(match_operand:HI 1 "general_operand"       "L,M,n,red,r,rm"))]
+  ""
+  "@
+   ldi:8 \\t#%1, %0
+   ldi:20\\t#%1, %0
+   ldi:32\\t#%1, %0
+   mov   \\t%1, %0
+   sth   \\t%1, %0
+   lduh  \\t%1, %0"
+  [(set_attr "length" "*,4,6,*,*,*")]
+)
+
+;;}}}
+;;{{{ 4 Byte Moves 
+
+;; If the destination is a MEM and the source is a
+;; MEM or an CONST_INT move the source into a register.
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE(operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+	  || immediate_operand (operands[1], SImode)))
+     operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  }"
+)
+
+;; We can do some clever tricks when loading certain immediate
+;; values.  We implement these tricks as define_splits, rather
+;; than putting the code into the define_expand "movsi" above,
+;; because if we put them there, they will be evaluated at RTL
+;; generation time and then the combiner pass will come along
+;; and replace the multiple insns that have been generated with
+;; the original, slower, load insns.  (The combiner pass only
+;; cares about reducing the number of instructions, it does not
+;; care about instruction lengths or speeds).  Splits are
+;; evaluated after the combine pass and before the scheduling
+;; passes, so that they are the perfect place to put this
+;; intelligence.
+;;
+;; XXX we probably ought to implement these for QI and HI mode
+;; loads as well.
+
+;; If we are loading a small negative constant we can save space
+;; and time by loading the positive value and then sign extending it.
+(define_split
+  [(set (match_operand:SI 0 "register_operand"  "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+   "INTVAL (operands[1]) <= -1 && INTVAL (operands[1]) >= -128
+    && (GET_CODE (operands[0]) != SUBREG
+	|| SCALAR_INT_MODE_P (GET_MODE (XEXP (operands[0], 0))))"
+   [(set (match_dup 0) (match_dup 1))
+    (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+   "{
+   operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+   operands[2] = gen_lowpart (QImode, operands[0]);
+   }"
+)
+
+;; If we are loading a large negative constant, one which does
+;; not have any of its bottom 24 bit set, then we can save time
+;; and space by loading the byte value and shifting it into place.
+(define_split
+  [(set (match_operand:SI 0 "register_operand"  "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+   "(INTVAL (operands[1]) < 0) && ((INTVAL (operands[1]) & 0x00ffffff) == 0)"
+   [(set (match_dup 0) (match_dup 2))
+    (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 24)))
+	       (clobber (reg:CC 16))])]
+   "{
+   HOST_WIDE_INT val = INTVAL (operands[1]);
+   operands[2] = GEN_INT (val >> 24);
+   }"
+)
+
+;; If we are loading a large positive constant, one which has bits
+;; in the top byte set, but whose set bits all lie within an 8 bit
+;; range, then we can save time and space by loading the byte value
+;; and shifting it into place.
+(define_split
+  [(set (match_operand:SI 0 "register_operand"  "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+   "(INTVAL (operands[1]) > 0x00ffffff)
+   && ((INTVAL (operands[1]) >> exact_log2 (INTVAL (operands[1]) & (- INTVAL (operands[1])))) < 0x100)"
+   [(set (match_dup 0) (match_dup 2))
+    (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))
+	       (clobber (reg:CC 16))])]
+   "{
+   HOST_WIDE_INT val = INTVAL (operands[1]);
+   int shift = exact_log2 (val & ( - val));
+   operands[2] = GEN_INT (val >> shift);
+   operands[3] = GEN_INT (shift);
+   }"
+)
+
+;; When TARGET_SMALL_MODEL is defined we assume that all symbolic
+;; values are addresses which will fit in 20 bits.
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,red,V,r,m")
+	(match_operand:SI 1 "general_operand"       "L,M,n,i,rde,r,rm,r"))]
+  ""
+  "*
+  {
+    switch (which_alternative)
+    {
+    case 0: return   \"ldi:8 \\t#%1, %0\";
+    case 1: return   \"ldi:20\\t#%1, %0\";
+    case 2: return   \"ldi:32\\t#%1, %0\";
+    case 3: if (TARGET_SMALL_MODEL)
+	      return \"ldi:20\\t%1, %0\";
+            else
+	      return \"ldi:32\\t%1, %0\";
+    case 4: return   \"mov   \\t%1, %0\";
+    case 5: return   \"st    \\t%1, %0\";
+    case 6: return   \"ld    \\t%1, %0\";
+    case 7: return   \"st    \\t%1, %0\";
+    default: gcc_unreachable ();
+   }
+  }"
+  [(set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 4)
+			       (eq_attr "alternative" "2") (const_int 6)
+			       (eq_attr "alternative" "3") 
+			                (if_then_else (eq_attr "size" "small")
+						      (const_int 4)
+						      (const_int 6))]
+			      (const_int 2)))]
+)
+
+;;}}}
+;;{{{ 8 Byte Moves
+
+;; Note - the FR30 does not have an 8 byte load/store instruction
+;; but we have to support this pattern because some other patterns
+;; (e.g. muldisi2) can produce a DImode result.
+;; (This code is stolen from the M32R port.)
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DImode, operands[1]);
+  "
+)
+
+;; We use an insn and a split so that we can generate
+;; RTL rather than text from fr30_move_double().
+
+(define_insn "*movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,m,r")
+        (match_operand:DI 1 "di_operand"               "r,m,r,nF"))]
+  "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)"
+  "#"
+  [(set_attr "length" "4,8,12,12")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "")
+        (match_operand:DI 1 "di_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = fr30_move_double (operands);"
+)
+
+;;}}}
+;;{{{ Load & Store Multiple Registers 
+
+;; The load multiple and store multiple patterns are implemented
+;; as peepholes because the only time they are expected to occur
+;; is during function prologues and epilogues.
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 3 "high_register_operand" "h"))]
+  "fr30_check_multiple_regs (operands, 4, 1)"
+  "stm1	(%0, %1, %2, %3)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "high_register_operand" "h"))]
+  "fr30_check_multiple_regs (operands, 3, 1)"
+  "stm1	(%0, %1, %2)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "high_register_operand" "h"))]
+  "fr30_check_multiple_regs (operands, 2, 1)"
+  "stm1	(%0, %1)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 1 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 2 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 3 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  "fr30_check_multiple_regs (operands, 4, 0)"
+  "ldm1	(%0, %1, %2, %3)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 1 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 2 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  "fr30_check_multiple_regs (operands, 3, 0)"
+  "ldm1	(%0, %1, %2)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 1 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  "fr30_check_multiple_regs (operands, 2, 0)"
+  "ldm1	(%0, %1)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 3 "low_register_operand" "l"))]
+  "fr30_check_multiple_regs (operands, 4, 1)"
+  "stm0	(%0, %1, %2, %3)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "low_register_operand" "l"))]
+  "fr30_check_multiple_regs (operands, 3, 1)"
+  "stm0	(%0, %1, %2)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "low_register_operand" "l"))]
+  "fr30_check_multiple_regs (operands, 2, 1)"
+  "stm0	(%0, %1)"
+  [(set_attr "delay_type" "other")]
+)
+
+;;}}}
+;;{{{ Floating Point Moves 
+
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesize them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "{
+  if (!reload_in_progress && !reload_completed
+      && memory_operand (operands[0], SFmode)
+      && memory_operand (operands[1], SFmode))
+    operands[1] = copy_to_mode_reg (SFmode, operands[1]);
+  }"
+)
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,red,m,r")
+	(match_operand:SF 1 "general_operand"      "Fn,i,rde,r,rm"))]
+  ""
+  "*
+  {
+    switch (which_alternative)
+    {
+    case 0: return   \"ldi:32\\t%1, %0\";
+    case 1: if (TARGET_SMALL_MODEL)
+	      return \"ldi:20\\t%1, %0\";
+            else
+	      return \"ldi:32\\t%1, %0\";
+    case 2: return   \"mov   \\t%1, %0\";
+    case 3: return   \"st    \\t%1, %0\";
+    case 4: return   \"ld    \\t%1, %0\";
+    default: gcc_unreachable ();	       
+    }
+  }"
+  [(set (attr "length") (cond [(eq_attr "alternative" "0") (const_int 6)
+			       (eq_attr "alternative" "1") 
+			                (if_then_else (eq_attr "size" "small")
+						      (const_int 4)
+						      (const_int 6))]
+			      (const_int 2)))]
+)
+
+(define_insn "*movsf_constant_store"
+  [(set (match_operand:SF 0 "memory_operand"    "=m")
+	(match_operand:SF 1 "immediate_operand" "F"))]
+  ""
+  "*
+  {
+  const char *    ldi_instr;
+  const char *    tmp_reg;
+  static char     buffer[100];
+
+  ldi_instr = fr30_const_double_is_zero (operands[1]) ? \"ldi:8\" : \"ldi:32\";
+
+  tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+  
+  sprintf (buffer, \"%s\\t#%%1, %s\\t;\\n\\tst\\t%s, %%0\\t; Created by movsf_constant_store\",
+    ldi_instr, tmp_reg, tmp_reg);
+
+  return buffer;
+  }"
+  [(set_attr "length" "8")]
+)
+
+;;}}}
+
+;;}}} 
+;;{{{ Conversions 
+
+;; Signed conversions from a smaller integer to a larger integer
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "extsb	%0"
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "extsh	%0"
+)
+
+;; Unsigned conversions from a smaller integer to a larger integer
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "extub	%0"
+)
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "extuh	%0"
+)
+
+;;}}} 
+;;{{{ Arithmetic 
+
+;;{{{ Addition 
+
+;; This is a special pattern just for adjusting the stack size.
+(define_insn "add_to_stack"
+  [(set (reg:SI 15)
+	(plus:SI (reg:SI 15)
+		 (match_operand:SI 0 "stack_add_operand" "i")))]
+  ""
+  "addsp	%0"
+)
+
+;; We need some trickery to be able to handle the addition of
+;; large (i.e. outside +/- 16) constants.  We need to be able to
+;; handle this because reload assumes that it can generate add
+;; instructions with arbitrary sized constants.
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand"           "")
+	(plus:SI (match_operand:SI 1 "register_operand"  "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "{
+  if (   GET_CODE (operands[2]) == REG
+      || GET_CODE (operands[2]) == SUBREG)
+    emit_insn (gen_addsi_regs (operands[0], operands[1], operands[2]));
+  else if (GET_CODE (operands[2]) != CONST_INT)
+    emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2]));
+  else if (INTVAL (operands[2]) >= -16
+	   && INTVAL (operands[2]) <= 15
+	   && (!REG_P (operands[1])
+	       || !REGNO_PTR_FRAME_P (REGNO (operands[1]))
+	       || REGNO (operands[1]) == STACK_POINTER_REGNUM))
+    emit_insn (gen_addsi_small_int (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2]));
+  DONE;
+  }"
+)
+
+(define_insn "addsi_regs"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand"  "r")))]
+  ""
+  "addn	%2, %0"
+)
+
+;; Do not allow an eliminable register in the source register.  It
+;; might be eliminated in favor of the stack pointer, probably
+;; increasing the offset, and so rendering the instruction illegal.
+(define_insn "addsi_small_int"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"      "0,0")
+		 (match_operand:SI 2 "add_immediate_operand" "I,J")))]
+  "!REG_P (operands[1])
+   || !REGNO_PTR_FRAME_P (REGNO (operands[1]))
+   || REGNO (operands[1]) == STACK_POINTER_REGNUM"
+  "@
+   addn	%2, %0
+   addn2	%2, %0"
+)
+
+(define_expand "addsi_big_int"
+  [(set (match_operand:SI 0 "register_operand"           "")
+	(plus:SI (match_operand:SI 1 "register_operand"  "")
+		 (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+  "{
+  /* Cope with the possibility that ops 0 and 1 are the same register.  */
+  if (rtx_equal_p (operands[0], operands[1]))
+    {
+      if (reload_in_progress || reload_completed)
+        {
+	  rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/);
+	  
+	  emit_insn (gen_movsi (reg, operands[2]));
+	  emit_insn (gen_addsi_regs (operands[0], operands[0], reg));
+	}
+      else
+	{
+	  operands[2] = force_reg (SImode, operands[2]);
+	  emit_insn (gen_addsi_regs (operands[0], operands[0], operands[2]));
+	}
+    }
+  else
+    {
+      emit_insn (gen_movsi (operands[0], operands[2]));
+      emit_insn (gen_addsi_regs (operands[0], operands[0], operands[1]));
+    }
+  DONE;
+  }"
+)
+
+(define_insn "*addsi_for_reload"
+  [(set (match_operand:SI 0 "register_operand"         "=&r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "r,r,r")
+		 (match_operand:SI 2 "immediate_operand" "L,M,n")))]
+  "reload_in_progress || reload_completed"
+  "@
+  ldi:8\\t#%2, %0  \\n\\taddn\\t%1, %0
+  ldi:20\\t#%2, %0 \\n\\taddn\\t%1, %0
+  ldi:32\\t#%2, %0 \\n\\taddn\\t%1, %0"
+  [(set_attr "length" "4,6,8")]
+)
+
+;;}}}
+;;{{{ Subtraction 
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+	          (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "subn	%2, %0"
+)
+
+;;}}}
+;;{{{ Multiplication 
+
+;; Signed multiplication producing 64-bit results from 32-bit inputs
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	   (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		    (sign_extend:DI (match_operand:SI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "mul	%2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0"
+  [(set_attr "length" "6")]
+)
+
+;; Unsigned multiplication producing 64-bit results from 32-bit inputs
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	   (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		    (zero_extend:DI (match_operand:SI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "mulu	%2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0"
+  [(set_attr "length" "6")]
+)
+
+;; Signed multiplication producing 32-bit result from 16-bit inputs
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand"                             "=r")
+	   (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%r"))
+		    (sign_extend:SI (match_operand:HI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "mulh	%2, %1\\n\\tmov\\tmdl, %0"
+  [(set_attr "length" "4")]
+)
+
+;; Unsigned multiplication producing 32-bit result from 16-bit inputs
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand"                             "=r")
+	   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%r"))
+		    (zero_extend:SI (match_operand:HI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "muluh	%2, %1\\n\\tmov\\tmdl, %0"
+  [(set_attr "length" "4")]
+)
+
+;; Signed multiplication producing 32-bit result from 32-bit inputs
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	   (mult:SI (match_operand:SI 1 "register_operand" "%r")
+		    (match_operand:SI 2 "register_operand"  "r")))
+   (clobber (reg:CC 16))]
+  ""
+  "mul	%2, %1\\n\\tmov\\tmdl, %0"
+  [(set_attr "length" "4")]
+)
+
+;;}}}
+;;}}} 
+;;{{{ Shifts 
+
+;; Arithmetic Shift Left
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,0")
+		   (match_operand:SI 2 "nonmemory_operand" "r,I,K")))
+   (clobber (reg:CC 16))]
+  ""
+  "@
+  lsl	%2, %0
+  lsl	%2, %0
+  lsl2	%x2, %0"
+)
+
+;; Arithmetic Shift Right
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,0")
+		     (match_operand:SI 2 "nonmemory_operand" "r,I,K")))
+   (clobber (reg:CC 16))]
+  ""
+  "@
+  asr	%2, %0
+  asr	%2, %0
+  asr2	%x2, %0"
+)
+
+;; Logical Shift Right
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,0")
+		     (match_operand:SI 2 "nonmemory_operand" "r,I,K")))
+   (clobber (reg:CC 16))]
+  ""
+  "@
+  lsr	%2, %0
+  lsr	%2, %0
+  lsr2	%x2, %0"
+)
+
+;;}}} 
+;;{{{ Logical Operations 
+
+;; Logical AND, 32-bit integers
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(and:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand"  "0")))
+   (clobber (reg:CC 16))]
+  ""
+  "and	%1, %0"
+)
+
+;; Inclusive OR, 32-bit integers
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand"  "0")))
+   (clobber (reg:CC 16))]
+  ""
+  "or	%1, %0"
+)
+
+;; Exclusive OR, 32-bit integers
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand"  "0")))
+   (clobber (reg:CC 16))]
+  ""
+  "eor	%1, %0"
+)
+
+;; One's complement, 32-bit integers
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "{
+  if (rtx_equal_p (operands[0], operands[1]))
+    {
+      if (reload_in_progress || reload_completed)
+        {
+	  rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/);
+	  
+	  emit_insn (gen_movsi (reg, constm1_rtx));
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], reg));
+	}
+      else
+	{
+	  rtx reg = gen_reg_rtx (SImode);
+	
+	  emit_insn (gen_movsi (reg, constm1_rtx));
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], reg));
+	}
+    }
+  else
+    {
+      emit_insn (gen_movsi_internal (operands[0], constm1_rtx));
+      emit_insn (gen_xorsi3 (operands[0], operands[1], operands[0]));
+    }
+  DONE;
+  }"
+)
+
+;;}}} 
+;;{{{ Comparisons 
+
+;; The actual comparisons, generated by the cbranch and/or cstore expanders
+
+(define_insn "*cmpsi_internal"
+  [(set (reg:CC 16)
+	(compare:CC (match_operand:SI 0 "register_operand"  "r,r,r")
+		    (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+  cmp	%1, %0
+  cmp	%1, %0
+  cmp2	%1, %0"
+)
+
+;;}}} 
+;;{{{ Branches 
+
+;; Define_expands called by the machine independent part of the compiler
+;; to allocate a new comparison register
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC 16)
+	(compare:CC (match_operand:SI 1 "register_operand"  "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(reg:CC 16) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  ""
+)
+
+
+;; Actual branches.  We must allow for the (label_ref) and the (pc) to be
+;; swapped.  If they are swapped, it reverses the sense of the branch.
+
+;; This pattern matches the (branch-if-true) branches generated above.
+;; It generates two different instruction sequences depending upon how
+;; far away the destination is.
+
+;; The calculation for the instruction length is derived as follows:
+;; The branch instruction has a 9-bit signed displacement so we have
+;; this inequality for the displacement:
+;;
+;;               -256 <= pc < 256
+;; or
+;;	   -256 + 256 <= pc + 256 < 256 + 256
+;; i.e.
+;;		    0 <= pc + 256 < 512 
+;;
+;; if we consider the displacement as an unsigned value, then negative
+;; displacements become very large positive displacements, and the
+;; inequality becomes:
+;;
+;;		pc + 256 < 512
+;;
+;; In order to allow for the fact that the real branch instruction works
+;; from pc + 2, we increase the offset to 258.
+;;
+;; Note - we do not have to worry about whether the branch is delayed or
+;; not, as branch shortening happens after delay slot reorganization.
+
+(define_insn "*branch_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+				      [(reg:CC 16)
+				       (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "*
+  {
+    if (get_attr_length (insn) == 2)
+      return \"b%b0%#\\t%l1\";
+    else
+      {
+        static char   buffer [100];
+	const char *  tmp_reg; 
+	const char *  ldi_insn;
+	
+        tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+	
+	ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\";
+
+	/* The code produced here is, for say the EQ case:
+
+	       Bne  1f
+	       LDI  <label>, r0
+	       JMP  r0
+	     1:                                         */
+	     
+	sprintf (buffer,
+	  \"b%%B0\\t1f\\t;\\n\\t%s\\t%%l1, %s\\t;\\n\\tjmp%%#\\t@%s\\t;\\n1:\",
+	  ldi_insn, tmp_reg, tmp_reg);
+ 
+        return buffer;
+    }
+  }"
+  [(set (attr "length") (if_then_else
+			  (ltu
+			    (plus
+			      (minus
+			        (match_dup 1)
+				(pc))
+			      (const_int 254))
+			    (const_int 506))
+			  (const_int 2)
+			  (if_then_else (eq_attr "size" "small")
+					(const_int 8)
+					(const_int 10))))
+   (set_attr "delay_type" "delayed")]
+)
+
+
+;; This pattern is a duplicate of the previous one, except that the
+;; branch occurs if the test is false, so the %B operator is used.
+(define_insn "*branch_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+				      [(reg:CC 16)
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "*
+  {
+    if (get_attr_length (insn) == 2)
+      return \"b%B0%#\\t%l1 \";
+    else
+      {
+        static char   buffer [100];
+	const char *  tmp_reg; 
+	const char *  ldi_insn;
+	
+        tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+	
+	ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\";
+
+	sprintf (buffer,
+	  \"b%%b0\\t1f\\t;\\n\\t%s\\t%%l1, %s\\t;\\n\\tjmp%%#\\t@%s\\t;\\n1:\",
+	  ldi_insn, tmp_reg, tmp_reg);
+ 
+        return buffer;
+      }
+  }"
+  [(set (attr "length") (if_then_else (ltu (plus (minus (match_dup 1) (pc))
+						 (const_int 254))
+					   (const_int 506))
+				      (const_int 2)
+				      (if_then_else (eq_attr "size" "small")
+						    (const_int 8)
+						    (const_int 10))))
+   (set_attr "delay_type" "delayed")]
+)
+
+;;}}} 
+;;{{{ Calls & Jumps 
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+(define_insn "call"
+  [(call (match_operand 0 "call_operand" "Qm")
+	 (match_operand 1 ""             "g"))
+   (clobber (reg:SI 17))]
+  ""
+  "call%#\\t%0"
+  [(set_attr "delay_type" "delayed")]
+)
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_insn "call_value"
+  [(set (match_operand 0 "register_operand"  "=r")
+	(call (match_operand 1 "call_operand" "Qm")
+	      (match_operand 2 ""             "g")))
+   (clobber (reg:SI 17))]
+  ""
+  "call%#\\t%1"
+  [(set_attr "delay_type" "delayed")]
+)
+
+;; Normal unconditional jump.
+;; For a description of the computation of the length 
+;; attribute see the branch patterns above.
+;;
+;; Although this instruction really clobbers r0, flow
+;; relies on jump being simplejump_p in several places
+;; and as r0 is fixed, this doesn't change anything
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+  {
+    if (get_attr_length (insn) == 2)
+       return \"bra%#\\t%0\";
+    else
+      {
+        static char   buffer [100];
+	const char *  tmp_reg; 
+	const char *  ldi_insn;
+	
+        tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+
+	ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\";
+
+	sprintf (buffer, \"%s\\t%%0, %s\\t;\\n\\tjmp%%#\\t@%s\\t;\",
+	  ldi_insn, tmp_reg, tmp_reg);
+ 
+        return buffer;
+      }
+  }"
+  [(set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						(const_int 254))
+					  (const_int 506))
+				     (const_int 2)
+				     (if_then_else (eq_attr "size" "small")
+						   (const_int 6)
+						   (const_int 8))))
+   (set_attr "delay_type" "delayed")]
+)
+
+;; Indirect jump through a register
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "r"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (XEXP (operands[0], 0)) != PLUS"
+  "jmp%#\\t@%0"
+  [(set_attr "delay_type" "delayed")]
+)
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp%#\\t@%0"
+  [(set_attr "delay_type" "delayed")]
+)
+
+;;}}} 
+;;{{{ Function Prologues and Epilogues 
+
+;; Called after register allocation to add any instructions needed for the
+;; prologue.  Using a prologue insn is favored compared to putting all of the
+;; instructions in output_function_prologue(), since it allows the scheduler
+;; to intermix instructions with the saves of the caller saved registers.  In
+;; some cases, it might be necessary to emit a barrier instruction as the last
+;; insn to prevent such scheduling.
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+  "{
+  fr30_expand_prologue ();
+  DONE;
+  }"
+)
+
+;; Called after register allocation to add any instructions needed for the
+;; epilogue.  Using an epilogue insn is favored compared to putting all of the
+;; instructions in output_function_epilogue(), since it allows the scheduler
+;; to intermix instructions with the restores of the caller saved registers.
+;; In some cases, it might be necessary to emit a barrier instruction as the
+;; first insn to prevent such scheduling.
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "{
+  fr30_expand_epilogue ();
+  DONE;
+  }"
+)
+
+(define_insn "return_from_func"
+  [(return)
+   (use (reg:SI 17))]
+  "reload_completed"
+  "ret%#"
+  [(set_attr "delay_type" "delayed")]
+)
+
+(define_insn "leave_func"
+  [(set (reg:SI 15) (plus:SI (reg:SI 14) (const_int 4)))
+   (set (reg:SI 14) (mem:SI (minus:SI (reg:SI 15) (const_int 4))))]
+  "reload_completed"
+  "leave"
+)
+
+(define_expand "enter_func"
+  [(parallel
+  [(set (mem:SI (minus:SI (match_dup 1)
+			  (const_int 4)))
+	(match_dup 2))
+   (set (match_dup 2)
+	(minus:SI (match_dup 1)
+		  (const_int 4)))
+   (set (match_dup 1)
+	(minus:SI (match_dup 1)
+		  (match_operand:SI 0 "immediate_operand")))]
+  )]
+  ""
+{
+  operands[1] = stack_pointer_rtx;
+  operands[2] = hard_frame_pointer_rtx;
+})
+
+(define_insn "*enter_func"
+  [(set (mem:SI (minus:SI (reg:SI 15)
+			  (const_int 4)))
+	(reg:SI 14))
+   (set (reg:SI 14)
+	(minus:SI (reg:SI 15)
+		  (const_int 4)))
+   (set (reg:SI 15)
+	(minus:SI (reg:SI 15)
+		  (match_operand 0 "immediate_operand" "i")))]
+  "reload_completed"
+  "enter	#%0"
+  [(set_attr "delay_type" "other")]
+)
+
+;;}}} 
+;;{{{ Miscellaneous 
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+)
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
+;;}}} 
+  
+;; Local Variables:
+;; mode: md
+;; folded-file: t
+;; End:
diff --git a/gcc-4.9/gcc/config/fr30/fr30.opt b/gcc-4.9/gcc/config/fr30/fr30.opt
new file mode 100644
index 000000000..c60017500
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/fr30.opt
@@ -0,0 +1,27 @@
+; Options for the FR30 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msmall-model
+Target Report Mask(SMALL_MODEL)
+Assume small address space
+
+mno-lsim
+Target RejectNegative
+Assume that run-time support has been provided, so omit -lsim from the linker command line
diff --git a/gcc-4.9/gcc/config/fr30/predicates.md b/gcc-4.9/gcc/config/fr30/predicates.md
new file mode 100644
index 000000000..26f0b6b7e
--- /dev/null
+++ b/gcc-4.9/gcc/config/fr30/predicates.md
@@ -0,0 +1,123 @@
+;; Predicate definitions for FR30.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Returns true if OP is an integer value suitable for use in an
+;; ADDSP instruction.
+
+(define_predicate "stack_add_operand"
+  (match_code "const_int")
+{
+  return
+    (GET_CODE (op) == CONST_INT
+     && INTVAL (op) >= -512
+     && INTVAL (op) <=  508
+     && ((INTVAL (op) & 3) == 0));
+})
+
+;; Returns true if OP is hard register in the range 8 - 15.
+
+(define_predicate "high_register_operand"
+  (match_code "reg")
+{
+  return
+    (GET_CODE (op) == REG
+     && REGNO (op) <= 15
+     && REGNO (op) >= 8);
+})
+
+;; Returns true if OP is hard register in the range 0 - 7.
+
+(define_predicate "low_register_operand"
+  (match_code "reg")
+{
+  return
+    (GET_CODE (op) == REG
+     && REGNO (op) <= 7);
+})
+
+;; Returns true if OP is suitable for use in a CALL insn.
+
+(define_predicate "call_operand"
+  (match_code "mem")
+{
+  return (GET_CODE (op) == MEM
+	  && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	      || GET_CODE (XEXP (op, 0)) == REG));
+})
+
+;; Returns TRUE if OP is a valid operand of a DImode operation.
+
+(define_predicate "di_operand"
+  (match_code "const_int,const_double,reg,mem")
+{
+  if (register_operand (op, mode))
+    return TRUE;
+
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && GET_MODE (op) != DImode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  switch (GET_CODE (op))
+    {
+    case CONST_DOUBLE:
+    case CONST_INT:
+      return TRUE;
+
+    case MEM:
+      return memory_address_p (DImode, XEXP (op, 0));
+
+    default:
+      return FALSE;
+    }
+})
+
+;; Returns TRUE if OP is a DImode register or MEM.
+
+(define_predicate "nonimmediate_di_operand"
+  (match_code "reg,mem")
+{
+  if (register_operand (op, mode))
+    return TRUE;
+
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && GET_MODE (op) != DImode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) == MEM)
+    return memory_address_p (DImode, XEXP (op, 0));
+
+  return FALSE;
+})
+
+;; Returns true if OP is an integer value suitable for use in an ADD
+;; or ADD2 instruction, or if it is a register.
+
+(define_predicate "add_immediate_operand"
+  (match_code "reg,const_int")
+{
+  return
+    (GET_CODE (op) == REG
+     || (GET_CODE (op) == CONST_INT
+	 && INTVAL (op) >= -16
+	 && INTVAL (op) <=  15));
+})
diff --git a/gcc-4.9/gcc/config/freebsd-nthr.h b/gcc-4.9/gcc/config/freebsd-nthr.h
new file mode 100644
index 000000000..cfee7aa5c
--- /dev/null
+++ b/gcc-4.9/gcc/config/freebsd-nthr.h
@@ -0,0 +1,21 @@
+/* FreeBSD configuration setting for FreeBSD systems.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Loren J. Rittle <ljrittle@acm.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define FBSD_NO_THREADS
diff --git a/gcc-4.9/gcc/config/freebsd-spec.h b/gcc-4.9/gcc/config/freebsd-spec.h
new file mode 100644
index 000000000..8e7700669
--- /dev/null
+++ b/gcc-4.9/gcc/config/freebsd-spec.h
@@ -0,0 +1,138 @@
+/* Base configuration file for all FreeBSD targets.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Common FreeBSD configuration. 
+   All FreeBSD architectures should include this file, which will specify
+   their commonalities.
+   Adapted from gcc/config/freebsd.h by 
+   David O'Brien <obrien@FreeBSD.org>
+   Loren J. Rittle <ljrittle@acm.org>.  */
+
+
+/* In case we need to know.  */
+#define USING_CONFIG_FREEBSD_SPEC 1
+
+#define FBSD_TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_define_with_int_value ("__FreeBSD__", FBSD_MAJOR);	\
+	builtin_define_std ("unix");					\
+	builtin_define ("__KPRINTF_ATTRIBUTE__");		       	\
+	builtin_assert ("system=unix");					\
+	builtin_assert ("system=bsd");					\
+	builtin_assert ("system=FreeBSD");				\
+	FBSD_TARGET_CPU_CPP_BUILTINS();					\
+    }									\
+  while (0)
+
+/* Define the default FreeBSD-specific per-CPU hook code.  */
+#define FBSD_TARGET_CPU_CPP_BUILTINS() do {} while (0)
+
+/* Provide a CPP_SPEC appropriate for FreeBSD.  We just deal with the GCC 
+   option `-posix', and PIC issues.  */
+
+#define FBSD_CPP_SPEC "							\
+  %(cpp_cpu)								\
+  %(cpp_arch)								\
+  %{posix:-D_POSIX_SOURCE}"
+
+/* Provide a STARTFILE_SPEC appropriate for FreeBSD.  Here we add
+   the magical crtbegin.o file (see crtstuff.c) which provides part 
+	of the support for getting C++ file-scope static object constructed 
+	before entering `main'.  */
+   
+#define FBSD_STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+		       %{!p:%{profile:gcrt1.o%s} \
+			 %{!profile:crt1.o%s}}}} \
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for FreeBSD.  Here we tack on
+   the magical crtend.o file (see crtstuff.c) which provides part of 
+	the support for getting C++ file-scope static object constructed 
+	before entering `main', followed by a normal "finalizer" file, 
+	`crtn.o'.  */
+
+#define FBSD_ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+/* Provide a LIB_SPEC appropriate for FreeBSD as configured and as
+   required by the user-land thread model.  Before __FreeBSD_version
+   500016, select the appropriate libc, depending on whether we're
+   doing profiling or need threads support.  At __FreeBSD_version
+   500016 and later, when threads support is requested include both
+   -lc and the threading lib instead of only -lc_r.  To make matters
+   interesting, we can't actually use __FreeBSD_version provided by
+   <osreldate.h> directly since it breaks cross-compiling.  As a final
+   twist, make it a hard error if -pthread is provided on the command
+   line and gcc was configured with --disable-threads (this will help
+   avoid bug reports from users complaining about threading when they
+   misconfigured the gcc bootstrap but are later consulting FreeBSD
+   manual pages that refer to the mythical -pthread option).  */
+
+/* Provide a LIB_SPEC appropriate for FreeBSD.  Just select the appropriate
+   libc, depending on whether we're doing profiling or need threads support.
+   (similar to the default, except no -lg, and no -p).  */
+
+#ifdef FBSD_NO_THREADS
+#define FBSD_LIB_SPEC "							\
+  %{pthread: %eThe -pthread option is only supported on FreeBSD when gcc \
+is built with the --enable-threads configure-time option.}		\
+  %{!shared:								\
+    %{!pg: -lc}								\
+    %{pg:  -lc_p}							\
+  }"
+#else
+#if FBSD_MAJOR < 5
+#define FBSD_LIB_SPEC "							\
+  %{!shared:								\
+    %{!pg:								\
+      %{!pthread:-lc}							\
+      %{pthread:-lc_r}}							\
+    %{pg:								\
+      %{!pthread:-lc_p}							\
+      %{pthread:-lc_r_p}}						\
+  }"
+#else
+#define FBSD_LIB_SPEC "							\
+  %{!shared:								\
+    %{!pg: %{pthread:-lpthread} -lc}					\
+    %{pg:  %{pthread:-lpthread_p} -lc_p}				\
+  }									\
+  %{shared:								\
+    %{pthread:-lpthread} -lc						\
+  }"
+#endif
+#endif
+
+#if FBSD_MAJOR < 6
+#define FBSD_DYNAMIC_LINKER "/usr/libexec/ld-elf.so.1"
+#else
+#define FBSD_DYNAMIC_LINKER "/libexec/ld-elf.so.1"
+#endif
+
+/* NOTE: The freebsd-spec.h header is included also for various
+   non-FreeBSD powerpc targets, thus it should never define macros
+   other than FBSD_* prefixed ones, or USING_CONFIG_FREEBSD_SPEC.  */
diff --git a/gcc-4.9/gcc/config/freebsd-stdint.h b/gcc-4.9/gcc/config/freebsd-stdint.h
new file mode 100644
index 000000000..201393e25
--- /dev/null
+++ b/gcc-4.9/gcc/config/freebsd-stdint.h
@@ -0,0 +1,56 @@
+/* Definitions for <stdint.h> types for FreeBSD systems.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Gerald Pfeifer <gerald@pfeifer.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE   "int"
+
+#define INT8_TYPE         "signed char"
+#define INT16_TYPE        "short int"
+#define INT32_TYPE        "int"
+#define INT64_TYPE        (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE        "unsigned char"
+#define UINT16_TYPE       "short unsigned int"
+#define UINT32_TYPE       "unsigned int"
+#define UINT64_TYPE       (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE   "signed char"
+#define INT_LEAST16_TYPE  "short int"
+#define INT_LEAST32_TYPE  "int"
+#define INT_LEAST64_TYPE  (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE  "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE    "int"
+#define INT_FAST16_TYPE   "int"
+#define INT_FAST32_TYPE   "int"
+#define INT_FAST64_TYPE   (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE   "unsigned int"
+#define UINT_FAST16_TYPE  "unsigned int"
+#define UINT_FAST32_TYPE  "unsigned int"
+#define UINT_FAST64_TYPE  (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE       (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define UINTPTR_TYPE      (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
diff --git a/gcc-4.9/gcc/config/freebsd.h b/gcc-4.9/gcc/config/freebsd.h
new file mode 100644
index 000000000..c872a6da4
--- /dev/null
+++ b/gcc-4.9/gcc/config/freebsd.h
@@ -0,0 +1,93 @@
+/* Base configuration file for all FreeBSD targets.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Common FreeBSD configuration. 
+   All FreeBSD architectures should include this file, which will specify
+   their commonalities.
+   Adapted from gcc/config/i386/freebsd-elf.h by 
+   David O'Brien <obrien@FreeBSD.org>.  
+   Further work by David O'Brien <obrien@FreeBSD.org> and
+   Loren J. Rittle <ljrittle@acm.org>.  */
+
+
+/* In case we need to know.  */
+#define USING_CONFIG_FREEBSD 1
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() FBSD_TARGET_OS_CPP_BUILTINS()
+
+#undef  CPP_SPEC
+#define CPP_SPEC FBSD_CPP_SPEC
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC FBSD_STARTFILE_SPEC
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC FBSD_ENDFILE_SPEC
+
+#undef  LIB_SPEC
+#define LIB_SPEC FBSD_LIB_SPEC
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+#define LINK_SSP_SPEC "%{fstack-protector|fstack-protector-all:-lssp_nonshared}"
+#endif
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+/************************[  Target stuff  ]***********************************/
+
+/* All FreeBSD Architectures support the ELF object file format.  */
+#undef  OBJECT_FORMAT_ELF
+#define OBJECT_FORMAT_ELF
+
+/* Don't assume anything about the header files.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C	1
+
+/* Follow FreeBSD's standard headers (<sys/_types.h> etc...).  */
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef  WINT_TYPE
+#define WINT_TYPE "int"
+
+#define MATH_LIBRARY_PROFILE    "m_p"
+
+/* Code generation parameters.  */
+
+/* Use periods rather than dollar signs in special g++ assembler names.
+   This ensures the configuration knows our system correctly so we can link
+   with libraries compiled with the native cc.  */
+#undef NO_DOLLAR_IN_LABEL
+
+/* Used by libgcc2.c.  We support file locking with fcntl / F_SETLKW.
+   This enables the test coverage code to use file locking when exiting a
+   program, which avoids race conditions if the program has forked.  */
+#define TARGET_POSIX_IO
diff --git a/gcc-4.9/gcc/config/freebsd.opt b/gcc-4.9/gcc/config/freebsd.opt
new file mode 100644
index 000000000..1d710f331
--- /dev/null
+++ b/gcc-4.9/gcc/config/freebsd.opt
@@ -0,0 +1,64 @@
+; FreeBSD options.
+
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+assert
+Driver Separate
+
+assert=
+Driver JoinedOrMissing
+
+defsym
+Driver Separate
+
+defsym=
+Driver JoinedOrMissing
+
+posix
+Driver
+
+profile
+Driver
+
+pthread
+Driver
+
+rdynamic
+Driver
+
+rpath-link
+Driver Separate
+
+rpath-link=
+Driver JoinedOrMissing
+
+rpath=
+Driver JoinedOrMissing
+
+soname
+Driver Separate
+
+soname=
+Driver JoinedOrMissing
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/frv/constraints.md b/gcc-4.9/gcc/config/frv/constraints.md
new file mode 100644
index 000000000..519bb2210
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/constraints.md
@@ -0,0 +1,174 @@
+;; Constraint definitions for FRV.
+;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "ACC_REGS"
+  "@internal")
+
+(define_register_constraint "b" "EVEN_ACC_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CC_REGS"
+  "@internal")
+
+(define_register_constraint "d" "GPR_REGS"
+  "@internal")
+
+(define_register_constraint "e" "EVEN_REGS"
+  "@internal")
+
+(define_register_constraint "f" "FPR_REGS"
+  "@internal")
+
+(define_register_constraint "h" "FEVEN_REGS"
+  "@internal")
+
+(define_register_constraint "l" "LR_REG"
+  "@internal")
+
+(define_register_constraint "q" "QUAD_REGS"
+  "@internal")
+
+(define_register_constraint "t" "ICC_REGS"
+  "@internal")
+
+(define_register_constraint "u" "FCC_REGS"
+  "@internal")
+
+(define_register_constraint "v" "ICR_REGS"
+  "@internal")
+
+(define_register_constraint "w" "FCR_REGS"
+  "@internal")
+
+(define_register_constraint "x" "QUAD_FPR_REGS"
+  "@internal")
+
+(define_register_constraint "y" "LCR_REG"
+  "@internal")
+
+(define_register_constraint "z" "SPR_REGS"
+  "@internal")
+
+(define_register_constraint "A" "QUAD_ACC_REGS"
+  "@internal")
+
+(define_register_constraint "B" "ACCG_REGS"
+  "@internal")
+
+(define_register_constraint "C" "CR_REGS"
+  "@internal")
+
+(define_register_constraint "D89" "GR89_REGS"
+  "@internal")
+
+(define_register_constraint "D09" "GR9_REGS"
+  "@internal")
+
+(define_register_constraint "D08" "GR8_REGS"
+  "@internal")
+
+(define_register_constraint "D14" "FDPIC_FPTR_REGS"
+  "@internal")
+
+(define_register_constraint "D15" "FDPIC_REGS"
+  "@internal")
+
+(define_register_constraint "W" "FDPIC_CALL_REGS"
+  "@internal")
+
+(define_register_constraint "Z" "FDPIC_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "A signed 6-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32, 31)")))
+
+(define_constraint "J"
+  "A signed 10-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -512, 511)")))
+
+(define_constraint "K"
+  "@internal"
+  ;; Unused.
+  (and (match_code "const_int")
+       (match_test "0")))
+
+(define_constraint "L"
+  "A signed 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "M"
+  "An unsigned 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "N"
+  "A signed 12-bit immediate that is negative."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -2048, -1)")))
+
+(define_constraint "O"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P"
+  "A signed 12-bit immediate that is positive."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 2047)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (ior (and (match_test "mode == VOIDmode")
+		 (match_test "hval == 0 && lval == 0"))
+	    (and (match_test "mode == SFmode || mode == DFmode")
+		 (match_test "op == CONST0_RTX (mode)")))))
+
+(define_constraint "H"
+  "@internal"
+  ;; Unused.
+  (and (match_code "const_double")
+       (match_test "0")))
+
+(define_constraint "Q"
+  "12-bit relocations."
+  (match_test "got12_operand (op, mode)"))
+
+(define_memory_constraint "R"
+  "Double word memory ops that take one instruction."
+  (match_test "dbl_memory_one_insn_operand (op, mode)"))
+
+(define_constraint "S"
+  "SYMBOL_REF."
+  (match_test "CONSTANT_P (op) && call_operand (op, VOIDmode)"))
+
+(define_memory_constraint "T"
+  "Double word memory ops that take two instructions."
+  (match_test "dbl_memory_two_insn_operand (op, mode)"))
+
+(define_memory_constraint "U"
+  "Memory operand for conditional execution."
+  (match_test "condexec_memory_operand (op, mode)"))
diff --git a/gcc-4.9/gcc/config/frv/frv-asm.h b/gcc-4.9/gcc/config/frv/frv-asm.h
new file mode 100644
index 000000000..b7a7b7cf5
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv-asm.h
@@ -0,0 +1,48 @@
+/* Assembler Support.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation * either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* P(INSN): Emit INSN.P for VLIW machines, otherwise emit plain INSN.
+   P2(INSN): Emit INSN.P on the FR500 and above, otherwise emit plain INSN.  */
+#ifdef __FRV_VLIW__
+#ifdef __STDC__
+#define P(A) A.p
+#else
+#define P(A) A/**/.p
+#endif
+#if __FRV_VLIW__ > 2
+#define P2(A) P(A)
+#else
+#define P2(A) A
+#endif
+#else
+#define P(A) A
+#define P2(A) A
+#endif
+
+/* Add underscore if necessary to external name.  */
+#ifdef __FRV_UNDERSCORE__
+#ifdef __STDC__
+#define EXT(NAME) _##NAME
+#else
+#define EXT(NAME) _/**/NAME
+#endif
+#else
+#define EXT(NAME) NAME
+#endif
diff --git a/gcc-4.9/gcc/config/frv/frv-modes.def b/gcc-4.9/gcc/config/frv/frv-modes.def
new file mode 100644
index 000000000..d756cd8de
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv-modes.def
@@ -0,0 +1,34 @@
+/* Definitions of target machine for GNU compiler for FRV.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* On the FRV, the CC modes used are:
+
+   CCmode	set ICCs from comparing signed integers
+   CC_UNSmode	set ICCs from comparing unsigned integers
+   CC_NZmode	set ICCs for comparisons that just need the Z and N flags
+   CC_FPmode	set FCCs from comparing floating point
+   CC_CCRmode	set CCRs to do conditional execution */
+
+CC_MODE (CC_UNS);
+CC_MODE (CC_NZ);
+CC_MODE (CC_FP);
+CC_MODE (CC_CCR);
+
+VECTOR_MODE (INT, QI, 4);     /*                 V4QI */
+VECTOR_MODE (INT, SI, 4);     /*                 V4SI */
diff --git a/gcc-4.9/gcc/config/frv/frv-opts.h b/gcc-4.9/gcc/config/frv/frv-opts.h
new file mode 100644
index 000000000..37377e7d8
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv-opts.h
@@ -0,0 +1,37 @@
+/* Frv option-handling defitions.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef FRV_OPTS_H
+#define FRV_OPTS_H
+
+/* CPU type.  This must be identical to the cpu enumeration in frv.md.  */
+typedef enum frv_cpu
+{
+  FRV_CPU_GENERIC,
+  FRV_CPU_FR550,
+  FRV_CPU_FR500,
+  FRV_CPU_FR450,
+  FRV_CPU_FR405,
+  FRV_CPU_FR400,
+  FRV_CPU_FR300,
+  FRV_CPU_SIMPLE,
+  FRV_CPU_TOMCAT
+} frv_cpu_t;
+
+#endif
diff --git a/gcc-4.9/gcc/config/frv/frv-protos.h b/gcc-4.9/gcc/config/frv/frv-protos.h
new file mode 100644
index 000000000..d50ca64f3
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv-protos.h
@@ -0,0 +1,178 @@
+/* Frv prototypes.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Define functions defined in frv.c */
+extern void frv_expand_prologue			(void);
+extern void frv_expand_epilogue			(bool);
+extern frv_stack_t *frv_stack_info		(void);
+extern void frv_debug_stack			(frv_stack_t *);
+extern int frv_initial_elimination_offset	(int, int);
+extern void frv_ifcvt_machdep_init		(void *);
+
+#ifdef RTX_CODE
+extern int frv_legitimate_address_p_1		(enum machine_mode, rtx,
+						 int, int, int);
+extern rtx frv_find_base_term			(rtx);
+
+#ifdef TREE_CODE
+extern void frv_init_cumulative_args		(CUMULATIVE_ARGS *, tree,
+						 rtx, tree, int);
+
+extern bool frv_function_value_regno_p		(const unsigned int);
+#endif /* TREE_CODE */
+
+extern int frv_expand_block_move		(rtx *);
+extern int frv_expand_block_clear		(rtx *);
+extern rtx frv_dynamic_chain_address		(rtx);
+extern rtx frv_return_addr_rtx			(int, rtx);
+extern rtx frv_index_memory			(rtx, enum machine_mode, int);
+extern const char *frv_asm_output_opcode
+				 	(FILE *, const char *);
+extern void frv_final_prescan_insn	(rtx, rtx *, int);
+extern void frv_emit_move		(enum machine_mode, rtx, rtx);
+extern int frv_emit_movsi		(rtx, rtx);
+extern const char *output_move_single	(rtx *, rtx);
+extern const char *output_move_double	(rtx *, rtx);
+extern const char *output_condmove_single
+					(rtx *, rtx);
+extern int frv_emit_cond_branch		(rtx *);
+extern int frv_emit_scc			(rtx *);
+extern rtx frv_split_scc		(rtx, rtx, rtx, rtx, HOST_WIDE_INT);
+extern int frv_emit_cond_move		(rtx, rtx, rtx, rtx);
+extern rtx frv_split_cond_move		(rtx *);
+extern rtx frv_split_minmax		(rtx *);
+extern rtx frv_split_abs		(rtx *);
+extern void frv_split_double_load	(rtx, rtx);
+extern void frv_split_double_store	(rtx, rtx);
+#ifdef BB_HEAD
+extern void frv_ifcvt_init_extra_fields	(ce_if_block *);
+extern void frv_ifcvt_modify_tests	(ce_if_block *, rtx *, rtx *);
+extern void frv_ifcvt_modify_multiple_tests
+					(ce_if_block *, basic_block,
+					 rtx *, rtx *);
+extern rtx frv_ifcvt_modify_insn	(ce_if_block *, rtx, rtx);
+extern void frv_ifcvt_modify_final	(ce_if_block *);
+extern void frv_ifcvt_modify_cancel	(ce_if_block *);
+#endif
+extern enum reg_class frv_secondary_reload_class
+					(enum reg_class,
+					 enum machine_mode, rtx);
+extern int frv_hard_regno_mode_ok	(int, enum machine_mode);
+extern int frv_hard_regno_nregs		(int, enum machine_mode);
+extern int frv_class_max_nregs		(enum reg_class rclass,
+					 enum machine_mode mode);
+extern enum machine_mode frv_select_cc_mode (enum rtx_code, rtx, rtx);
+#endif	/* RTX_CODE */
+
+extern int frv_trampoline_size		(void);
+extern int direct_return_p		(void);
+extern int frv_issue_rate		(void);
+extern int frv_acc_group		(rtx);
+
+#ifdef TREE_CODE
+extern int frv_adjust_field_align	(tree, int);
+#endif
+
+#ifdef RTX_CODE
+extern int integer_register_operand	(rtx, enum machine_mode);
+extern int frv_load_operand		(rtx, enum machine_mode);
+extern int gpr_or_fpr_operand		(rtx, enum machine_mode);
+extern int gpr_no_subreg_operand	(rtx, enum machine_mode);
+extern int gpr_or_int6_operand		(rtx, enum machine_mode);
+extern int fpr_or_int6_operand		(rtx, enum machine_mode);
+extern int gpr_or_int_operand		(rtx, enum machine_mode);
+extern int gpr_or_int12_operand		(rtx, enum machine_mode);
+extern int gpr_fpr_or_int12_operand	(rtx, enum machine_mode);
+extern int gpr_or_int10_operand		(rtx, enum machine_mode);
+extern int move_source_operand		(rtx, enum machine_mode);
+extern int move_destination_operand	(rtx, enum machine_mode);
+extern int condexec_source_operand	(rtx, enum machine_mode);
+extern int condexec_dest_operand	(rtx, enum machine_mode);
+extern int lr_operand			(rtx, enum machine_mode);
+extern int gpr_or_memory_operand	(rtx, enum machine_mode);
+extern int fpr_or_memory_operand	(rtx, enum machine_mode);
+extern int reg_or_0_operand		(rtx, enum machine_mode);
+extern int fcc_operand			(rtx, enum machine_mode);
+extern int icc_operand			(rtx, enum machine_mode);
+extern int cc_operand			(rtx, enum machine_mode);
+extern int fcr_operand			(rtx, enum machine_mode);
+extern int icr_operand			(rtx, enum machine_mode);
+extern int cr_operand			(rtx, enum machine_mode);
+extern int call_operand			(rtx, enum machine_mode);
+extern int fpr_operand			(rtx, enum machine_mode);
+extern int even_reg_operand		(rtx, enum machine_mode);
+extern int odd_reg_operand		(rtx, enum machine_mode);
+extern int even_gpr_operand		(rtx, enum machine_mode);
+extern int odd_gpr_operand		(rtx, enum machine_mode);
+extern int quad_fpr_operand		(rtx, enum machine_mode);
+extern int even_fpr_operand		(rtx, enum machine_mode);
+extern int odd_fpr_operand		(rtx, enum machine_mode);
+extern int dbl_memory_one_insn_operand	(rtx, enum machine_mode);
+extern int dbl_memory_two_insn_operand	(rtx, enum machine_mode);
+extern int int12_operand		(rtx, enum machine_mode);
+extern int int6_operand			(rtx, enum machine_mode);
+extern int int5_operand			(rtx, enum machine_mode);
+extern int uint5_operand		(rtx, enum machine_mode);
+extern int uint4_operand		(rtx, enum machine_mode);
+extern int uint1_operand		(rtx, enum machine_mode);
+extern int int_2word_operand		(rtx, enum machine_mode);
+extern int pic_register_operand		(rtx, enum machine_mode);
+extern int pic_symbolic_operand		(rtx, enum machine_mode);
+extern int small_data_register_operand	(rtx, enum machine_mode);
+extern int small_data_symbolic_operand	(rtx, enum machine_mode);
+extern int upper_int16_operand		(rtx, enum machine_mode);
+extern int uint16_operand		(rtx, enum machine_mode);
+extern int symbolic_operand		(rtx, enum machine_mode);
+extern int relational_operator		(rtx, enum machine_mode);
+extern int signed_relational_operator	(rtx, enum machine_mode);
+extern int unsigned_relational_operator	(rtx, enum machine_mode);
+extern int float_relational_operator	(rtx, enum machine_mode);
+extern int ccr_eqne_operator		(rtx, enum machine_mode);
+extern int minmax_operator		(rtx, enum machine_mode);
+extern int condexec_si_binary_operator	(rtx, enum machine_mode);
+extern int condexec_si_media_operator	(rtx, enum machine_mode);
+extern int condexec_si_divide_operator	(rtx, enum machine_mode);
+extern int condexec_si_unary_operator	(rtx, enum machine_mode);
+extern int condexec_sf_conv_operator	(rtx, enum machine_mode);
+extern int condexec_sf_add_operator	(rtx, enum machine_mode);
+extern int condexec_memory_operand	(rtx, enum machine_mode);
+extern int intop_compare_operator	(rtx, enum machine_mode);
+extern int acc_operand			(rtx, enum machine_mode);
+extern int even_acc_operand		(rtx, enum machine_mode);
+extern int quad_acc_operand		(rtx, enum machine_mode);
+extern int accg_operand			(rtx, enum machine_mode);
+extern rtx frv_matching_accg_for_acc	(rtx);
+extern void frv_expand_fdpic_call	(rtx *, bool, bool);
+extern rtx frv_gen_GPsym2reg		(rtx, rtx);
+extern int frv_legitimate_memory_operand (rtx, enum machine_mode, int);
+
+/* Information about a relocation unspec.  SYMBOL is the relocation symbol
+   (a SYMBOL_REF or LABEL_REF), RELOC is the type of relocation and OFFSET
+   is the constant addend.  */
+struct frv_unspec {
+  rtx symbol;
+  int reloc;
+  HOST_WIDE_INT offset;
+};
+
+extern bool frv_const_unspec_p (rtx, struct frv_unspec *);
+
+#endif
+
diff --git a/gcc-4.9/gcc/config/frv/frv.c b/gcc-4.9/gcc/config/frv/frv.c
new file mode 100644
index 000000000..f2dadc3d9
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv.c
@@ -0,0 +1,9655 @@
+/* Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "expr.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "targhooks.h"
+#include "langhooks.h"
+#include "df.h"
+#include "dumpfile.h"
+
+#ifndef FRV_INLINE
+#define FRV_INLINE inline
+#endif
+
+/* The maximum number of distinct NOP patterns.  There are three:
+   nop, fnop and mnop.  */
+#define NUM_NOP_PATTERNS 3
+
+/* Classification of instructions and units: integer, floating-point/media,
+   branch and control.  */
+enum frv_insn_group { GROUP_I, GROUP_FM, GROUP_B, GROUP_C, NUM_GROUPS };
+
+/* The DFA names of the units, in packet order.  */
+static const char *const frv_unit_names[] =
+{
+  "c",
+  "i0", "f0",
+  "i1", "f1",
+  "i2", "f2",
+  "i3", "f3",
+  "b0", "b1"
+};
+
+/* The classification of each unit in frv_unit_names[].  */
+static const enum frv_insn_group frv_unit_groups[ARRAY_SIZE (frv_unit_names)] =
+{
+  GROUP_C,
+  GROUP_I, GROUP_FM,
+  GROUP_I, GROUP_FM,
+  GROUP_I, GROUP_FM,
+  GROUP_I, GROUP_FM,
+  GROUP_B, GROUP_B
+};
+
+/* Return the DFA unit code associated with the Nth unit of integer
+   or floating-point group GROUP,  */
+#define NTH_UNIT(GROUP, N) frv_unit_codes[(GROUP) + (N) * 2 + 1]
+
+/* Return the number of integer or floating-point unit UNIT
+   (1 for I1, 2 for F2, etc.).  */
+#define UNIT_NUMBER(UNIT) (((UNIT) - 1) / 2)
+
+/* The DFA unit number for each unit in frv_unit_names[].  */
+static int frv_unit_codes[ARRAY_SIZE (frv_unit_names)];
+
+/* FRV_TYPE_TO_UNIT[T] is the last unit in frv_unit_names[] that can issue
+   an instruction of type T.  The value is ARRAY_SIZE (frv_unit_names) if
+   no instruction of type T has been seen.  */
+static unsigned int frv_type_to_unit[TYPE_UNKNOWN + 1];
+
+/* An array of dummy nop INSNs, one for each type of nop that the
+   target supports.  */
+static GTY(()) rtx frv_nops[NUM_NOP_PATTERNS];
+
+/* The number of nop instructions in frv_nops[].  */
+static unsigned int frv_num_nops;
+
+  /* The type of access.  FRV_IO_UNKNOWN means the access can be either
+     a read or a write.  */
+enum frv_io_type { FRV_IO_UNKNOWN, FRV_IO_READ, FRV_IO_WRITE };
+
+/* Information about one __builtin_read or __builtin_write access, or
+   the combination of several such accesses.  The most general value
+   is all-zeros (an unknown access to an unknown address).  */
+struct frv_io {
+  enum frv_io_type type;
+
+  /* The constant address being accessed, or zero if not known.  */
+  HOST_WIDE_INT const_address;
+
+  /* The run-time address, as used in operand 0 of the membar pattern.  */
+  rtx var_address;
+};
+
+/* Return true if instruction INSN should be packed with the following
+   instruction.  */
+#define PACKING_FLAG_P(INSN) (GET_MODE (INSN) == TImode)
+
+/* Set the value of PACKING_FLAG_P(INSN).  */
+#define SET_PACKING_FLAG(INSN) PUT_MODE (INSN, TImode)
+#define CLEAR_PACKING_FLAG(INSN) PUT_MODE (INSN, VOIDmode)
+
+/* Loop with REG set to each hard register in rtx X.  */
+#define FOR_EACH_REGNO(REG, X)						\
+  for (REG = REGNO (X);							\
+       REG < REGNO (X) + HARD_REGNO_NREGS (REGNO (X), GET_MODE (X));	\
+       REG++)
+
+/* This structure contains machine specific function data.  */
+struct GTY(()) machine_function
+{
+  /* True if we have created an rtx that relies on the stack frame.  */
+  int frame_needed;
+
+  /* True if this function contains at least one __builtin_{read,write}*.  */
+  bool has_membar_p;
+};
+
+/* Temporary register allocation support structure.  */
+typedef struct frv_tmp_reg_struct
+  {
+    HARD_REG_SET regs;		/* possible registers to allocate */
+    int next_reg[N_REG_CLASSES];	/* next register to allocate per class */
+  }
+frv_tmp_reg_t;
+
+/* Register state information for VLIW re-packing phase.  */
+#define REGSTATE_CC_MASK	0x07	/* Mask to isolate CCn for cond exec */
+#define REGSTATE_MODIFIED	0x08	/* reg modified in current VLIW insn */
+#define REGSTATE_IF_TRUE	0x10	/* reg modified in cond exec true */
+#define REGSTATE_IF_FALSE	0x20	/* reg modified in cond exec false */
+
+#define REGSTATE_IF_EITHER	(REGSTATE_IF_TRUE | REGSTATE_IF_FALSE)
+
+typedef unsigned char regstate_t;
+
+/* Used in frv_frame_accessor_t to indicate the direction of a register-to-
+   memory move.  */
+enum frv_stack_op
+{
+  FRV_LOAD,
+  FRV_STORE
+};
+
+/* Information required by frv_frame_access.  */
+typedef struct
+{
+  /* This field is FRV_LOAD if registers are to be loaded from the stack and
+     FRV_STORE if they should be stored onto the stack.  FRV_STORE implies
+     the move is being done by the prologue code while FRV_LOAD implies it
+     is being done by the epilogue.  */
+  enum frv_stack_op op;
+
+  /* The base register to use when accessing the stack.  This may be the
+     frame pointer, stack pointer, or a temporary.  The choice of register
+     depends on which part of the frame is being accessed and how big the
+     frame is.  */
+  rtx base;
+
+  /* The offset of BASE from the bottom of the current frame, in bytes.  */
+  int base_offset;
+} frv_frame_accessor_t;
+
+/* Conditional execution support gathered together in one structure.  */
+typedef struct
+  {
+    /* Linked list of insns to add if the conditional execution conversion was
+       successful.  Each link points to an EXPR_LIST which points to the pattern
+       of the insn to add, and the insn to be inserted before.  */
+    rtx added_insns_list;
+
+    /* Identify which registers are safe to allocate for if conversions to
+       conditional execution.  We keep the last allocated register in the
+       register classes between COND_EXEC statements.  This will mean we allocate
+       different registers for each different COND_EXEC group if we can.  This
+       might allow the scheduler to intermix two different COND_EXEC sections.  */
+    frv_tmp_reg_t tmp_reg;
+
+    /* For nested IFs, identify which CC registers are used outside of setting
+       via a compare isnsn, and using via a check insn.  This will allow us to
+       know if we can rewrite the register to use a different register that will
+       be paired with the CR register controlling the nested IF-THEN blocks.  */
+    HARD_REG_SET nested_cc_ok_rewrite;
+
+    /* Temporary registers allocated to hold constants during conditional
+       execution.  */
+    rtx scratch_regs[FIRST_PSEUDO_REGISTER];
+
+    /* Current number of temp registers available.  */
+    int cur_scratch_regs;
+
+    /* Number of nested conditional execution blocks.  */
+    int num_nested_cond_exec;
+
+    /* Map of insns that set up constants in scratch registers.  */
+    bitmap scratch_insns_bitmap;
+
+    /* Conditional execution test register (CC0..CC7).  */
+    rtx cr_reg;
+
+    /* Conditional execution compare register that is paired with cr_reg, so that
+       nested compares can be done.  The csubcc and caddcc instructions don't
+       have enough bits to specify both a CC register to be set and a CR register
+       to do the test on, so the same bit number is used for both.  Needless to
+       say, this is rather inconvenient for GCC.  */
+    rtx nested_cc_reg;
+
+    /* Extra CR registers used for &&, ||.  */
+    rtx extra_int_cr;
+    rtx extra_fp_cr;
+
+    /* Previous CR used in nested if, to make sure we are dealing with the same
+       nested if as the previous statement.  */
+    rtx last_nested_if_cr;
+  }
+frv_ifcvt_t;
+
+static /* GTY(()) */ frv_ifcvt_t frv_ifcvt;
+
+/* Map register number to smallest register class.  */
+enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+/* Cached value of frv_stack_info.  */
+static frv_stack_t *frv_stack_cache = (frv_stack_t *)0;
+
+/* Forward references */
+
+static void frv_option_override			(void);
+static bool frv_legitimate_address_p		(enum machine_mode, rtx, bool);
+static int frv_default_flags_for_cpu		(void);
+static int frv_string_begins_with		(const_tree, const char *);
+static FRV_INLINE bool frv_small_data_reloc_p	(rtx, int);
+static void frv_print_operand			(FILE *, rtx, int);
+static void frv_print_operand_address		(FILE *, rtx);
+static bool frv_print_operand_punct_valid_p	(unsigned char code);
+static void frv_print_operand_memory_reference_reg
+						(FILE *, rtx);
+static void frv_print_operand_memory_reference	(FILE *, rtx, int);
+static int frv_print_operand_jump_hint		(rtx);
+static const char *comparison_string		(enum rtx_code, rtx);
+static rtx frv_function_value			(const_tree, const_tree,
+						 bool);
+static rtx frv_libcall_value			(enum machine_mode,
+						 const_rtx);
+static FRV_INLINE int frv_regno_ok_for_base_p	(int, int);
+static rtx single_set_pattern			(rtx);
+static int frv_function_contains_far_jump	(void);
+static rtx frv_alloc_temp_reg			(frv_tmp_reg_t *,
+						 enum reg_class,
+						 enum machine_mode,
+						 int, int);
+static rtx frv_frame_offset_rtx			(int);
+static rtx frv_frame_mem			(enum machine_mode, rtx, int);
+static rtx frv_dwarf_store			(rtx, int);
+static void frv_frame_insn			(rtx, rtx);
+static void frv_frame_access			(frv_frame_accessor_t*,
+						 rtx, int);
+static void frv_frame_access_multi		(frv_frame_accessor_t*,
+						 frv_stack_t *, int);
+static void frv_frame_access_standard_regs	(enum frv_stack_op,
+						 frv_stack_t *);
+static struct machine_function *frv_init_machine_status		(void);
+static rtx frv_int_to_acc			(enum insn_code, int, rtx);
+static enum machine_mode frv_matching_accg_mode	(enum machine_mode);
+static rtx frv_read_argument			(tree, unsigned int);
+static rtx frv_read_iacc_argument		(enum machine_mode, tree, unsigned int);
+static int frv_check_constant_argument		(enum insn_code, int, rtx);
+static rtx frv_legitimize_target		(enum insn_code, rtx);
+static rtx frv_legitimize_argument		(enum insn_code, int, rtx);
+static rtx frv_legitimize_tls_address		(rtx, enum tls_model);
+static rtx frv_legitimize_address		(rtx, rtx, enum machine_mode);
+static rtx frv_expand_set_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_unop_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_binop_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_cut_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_binopimm_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_voidbinop_builtin		(enum insn_code, tree);
+static rtx frv_expand_int_void2arg		(enum insn_code, tree);
+static rtx frv_expand_prefetches		(enum insn_code, tree);
+static rtx frv_expand_voidtriop_builtin		(enum insn_code, tree);
+static rtx frv_expand_voidaccop_builtin		(enum insn_code, tree);
+static rtx frv_expand_mclracc_builtin		(tree);
+static rtx frv_expand_mrdacc_builtin		(enum insn_code, tree);
+static rtx frv_expand_mwtacc_builtin		(enum insn_code, tree);
+static rtx frv_expand_noargs_builtin		(enum insn_code);
+static void frv_split_iacc_move			(rtx, rtx);
+static rtx frv_emit_comparison			(enum rtx_code, rtx, rtx);
+static int frv_clear_registers_used		(rtx *, void *);
+static void frv_ifcvt_add_insn			(rtx, rtx, int);
+static rtx frv_ifcvt_rewrite_mem		(rtx, enum machine_mode, rtx);
+static rtx frv_ifcvt_load_value			(rtx, rtx);
+static int frv_acc_group_1			(rtx *, void *);
+static unsigned int frv_insn_unit		(rtx);
+static bool frv_issues_to_branch_unit_p		(rtx);
+static int frv_cond_flags 			(rtx);
+static bool frv_regstate_conflict_p 		(regstate_t, regstate_t);
+static int frv_registers_conflict_p_1 		(rtx *, void *);
+static bool frv_registers_conflict_p 		(rtx);
+static void frv_registers_update_1 		(rtx, const_rtx, void *);
+static void frv_registers_update 		(rtx);
+static void frv_start_packet 			(void);
+static void frv_start_packet_block 		(void);
+static void frv_finish_packet 			(void (*) (void));
+static bool frv_pack_insn_p 			(rtx);
+static void frv_add_insn_to_packet		(rtx);
+static void frv_insert_nop_in_packet		(rtx);
+static bool frv_for_each_packet 		(void (*) (void));
+static bool frv_sort_insn_group_1		(enum frv_insn_group,
+						 unsigned int, unsigned int,
+						 unsigned int, unsigned int,
+						 state_t);
+static int frv_compare_insns			(const void *, const void *);
+static void frv_sort_insn_group			(enum frv_insn_group);
+static void frv_reorder_packet 			(void);
+static void frv_fill_unused_units		(enum frv_insn_group);
+static void frv_align_label 			(void);
+static void frv_reorg_packet 			(void);
+static void frv_register_nop			(rtx);
+static void frv_reorg 				(void);
+static void frv_pack_insns			(void);
+static void frv_function_prologue		(FILE *, HOST_WIDE_INT);
+static void frv_function_epilogue		(FILE *, HOST_WIDE_INT);
+static bool frv_assemble_integer		(rtx, unsigned, int);
+static void frv_init_builtins			(void);
+static rtx frv_expand_builtin			(tree, rtx, rtx, enum machine_mode, int);
+static void frv_init_libfuncs			(void);
+static bool frv_in_small_data_p			(const_tree);
+static void frv_asm_output_mi_thunk
+  (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void frv_setup_incoming_varargs		(cumulative_args_t,
+						 enum machine_mode,
+						 tree, int *, int);
+static rtx frv_expand_builtin_saveregs		(void);
+static void frv_expand_builtin_va_start		(tree, rtx);
+static bool frv_rtx_costs			(rtx, int, int, int, int*,
+						 bool);
+static int frv_register_move_cost		(enum machine_mode,
+						 reg_class_t, reg_class_t);
+static int frv_memory_move_cost			(enum machine_mode,
+						 reg_class_t, bool);
+static void frv_asm_out_constructor		(rtx, int);
+static void frv_asm_out_destructor		(rtx, int);
+static bool frv_function_symbol_referenced_p	(rtx);
+static bool frv_legitimate_constant_p		(enum machine_mode, rtx);
+static bool frv_cannot_force_const_mem		(enum machine_mode, rtx);
+static const char *unspec_got_name		(int);
+static void frv_output_const_unspec		(FILE *,
+						 const struct frv_unspec *);
+static bool frv_function_ok_for_sibcall		(tree, tree);
+static rtx frv_struct_value_rtx			(tree, int);
+static bool frv_must_pass_in_stack (enum machine_mode mode, const_tree type);
+static int frv_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				  tree, bool);
+static rtx frv_function_arg (cumulative_args_t, enum machine_mode,
+			     const_tree, bool);
+static rtx frv_function_incoming_arg (cumulative_args_t, enum machine_mode,
+				      const_tree, bool);
+static void frv_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static unsigned int frv_function_arg_boundary	(enum machine_mode,
+						 const_tree);
+static void frv_output_dwarf_dtprel		(FILE *, int, rtx)
+  ATTRIBUTE_UNUSED;
+static reg_class_t frv_secondary_reload		(bool, rtx, reg_class_t,
+						 enum machine_mode,
+						 secondary_reload_info *);
+static bool frv_frame_pointer_required		(void);
+static bool frv_can_eliminate			(const int, const int);
+static void frv_conditional_register_usage	(void);
+static void frv_trampoline_init			(rtx, tree, rtx);
+static bool frv_class_likely_spilled_p 		(reg_class_t);
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND frv_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS frv_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P frv_print_operand_punct_valid_p
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE frv_function_prologue
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE frv_function_epilogue
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER frv_assemble_integer
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE frv_option_override
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS frv_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN frv_expand_builtin
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS frv_init_libfuncs
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P frv_in_small_data_p
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST frv_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST frv_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS frv_rtx_costs
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR frv_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR frv_asm_out_destructor
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK frv_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE frv_issue_rate
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS frv_legitimize_address
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL frv_function_ok_for_sibcall
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P frv_legitimate_constant_p
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM frv_cannot_force_const_mem
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX frv_struct_value_rtx
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK frv_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES frv_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG frv_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG frv_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE frv_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY frv_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS frv_expand_builtin_saveregs
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS frv_setup_incoming_varargs
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG frv_reorg
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START frv_expand_builtin_va_start
+
+#if HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL frv_output_dwarf_dtprel
+#endif
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P frv_class_likely_spilled_p
+
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD frv_secondary_reload
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P frv_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED frv_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE frv_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE frv_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT frv_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE frv_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE frv_libcall_value
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#define FRV_SYMBOL_REF_TLS_P(RTX) \
+  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0)
+
+
+/* Any function call that satisfies the machine-independent
+   requirements is eligible on FR-V.  */
+
+static bool
+frv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+			     tree exp ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Return true if SYMBOL is a small data symbol and relocation RELOC
+   can be used to access it directly in a load or store.  */
+
+static FRV_INLINE bool
+frv_small_data_reloc_p (rtx symbol, int reloc)
+{
+  return (GET_CODE (symbol) == SYMBOL_REF
+	  && SYMBOL_REF_SMALL_P (symbol)
+	  && (!TARGET_FDPIC || flag_pic == 1)
+	  && (reloc == R_FRV_GOTOFF12 || reloc == R_FRV_GPREL12));
+}
+
+/* Return true if X is a valid relocation unspec.  If it is, fill in UNSPEC
+   appropriately.  */
+
+bool
+frv_const_unspec_p (rtx x, struct frv_unspec *unspec)
+{
+  if (GET_CODE (x) == CONST)
+    {
+      unspec->offset = 0;
+      x = XEXP (x, 0);
+      if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  unspec->offset += INTVAL (XEXP (x, 1));
+	  x = XEXP (x, 0);
+	}
+      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOT)
+	{
+	  unspec->symbol = XVECEXP (x, 0, 0);
+	  unspec->reloc = INTVAL (XVECEXP (x, 0, 1));
+
+	  if (unspec->offset == 0)
+	    return true;
+
+	  if (frv_small_data_reloc_p (unspec->symbol, unspec->reloc)
+	      && unspec->offset > 0
+	      && unspec->offset < g_switch_value)
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* Decide whether we can force certain constants to memory.  If we
+   decide we can't, the caller should be able to cope with it in
+   another way.
+
+   We never allow constants to be forced into memory for TARGET_FDPIC.
+   This is necessary for several reasons:
+
+   1. Since frv_legitimate_constant_p rejects constant pool addresses, the
+      target-independent code will try to force them into the constant
+      pool, thus leading to infinite recursion.
+
+   2. We can never introduce new constant pool references during reload.
+      Any such reference would require use of the pseudo FDPIC register.
+
+   3. We can't represent a constant added to a function pointer (which is
+      not the same as a pointer to a function+constant).
+
+   4. In many cases, it's more efficient to calculate the constant in-line.  */
+
+static bool
+frv_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    rtx x ATTRIBUTE_UNUSED)
+{
+  return TARGET_FDPIC;
+}
+
+static int
+frv_default_flags_for_cpu (void)
+{
+  switch (frv_cpu_type)
+    {
+    case FRV_CPU_GENERIC:
+      return MASK_DEFAULT_FRV;
+
+    case FRV_CPU_FR550:
+      return MASK_DEFAULT_FR550;
+
+    case FRV_CPU_FR500:
+    case FRV_CPU_TOMCAT:
+      return MASK_DEFAULT_FR500;
+
+    case FRV_CPU_FR450:
+      return MASK_DEFAULT_FR450;
+
+    case FRV_CPU_FR405:
+    case FRV_CPU_FR400:
+      return MASK_DEFAULT_FR400;
+
+    case FRV_CPU_FR300:
+    case FRV_CPU_SIMPLE:
+      return MASK_DEFAULT_SIMPLE;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+frv_option_override (void)
+{
+  int regno;
+  unsigned int i;
+
+  target_flags |= (frv_default_flags_for_cpu () & ~target_flags_explicit);
+
+  /* -mlibrary-pic sets -fPIC and -G0 and also suppresses warnings from the
+     linker about linking pic and non-pic code.  */
+  if (TARGET_LIBPIC)
+    {
+      if (!flag_pic)		/* -fPIC */
+	flag_pic = 2;
+
+      if (!global_options_set.x_g_switch_value)	/* -G0 */
+	{
+	  g_switch_value = 0;
+	}
+    }
+
+  /* A C expression whose value is a register class containing hard
+     register REGNO.  In general there is more than one such class;
+     choose a class which is "minimal", meaning that no smaller class
+     also contains the register.  */
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      enum reg_class rclass;
+
+      if (GPR_P (regno))
+	{
+	  int gpr_reg = regno - GPR_FIRST;
+
+	  if (gpr_reg == GR8_REG)
+	    rclass = GR8_REGS;
+
+	  else if (gpr_reg == GR9_REG)
+	    rclass = GR9_REGS;
+
+	  else if (gpr_reg == GR14_REG)
+	    rclass = FDPIC_FPTR_REGS;
+
+	  else if (gpr_reg == FDPIC_REGNO)
+	    rclass = FDPIC_REGS;
+
+	  else if ((gpr_reg & 3) == 0)
+	    rclass = QUAD_REGS;
+
+	  else if ((gpr_reg & 1) == 0)
+	    rclass = EVEN_REGS;
+
+	  else
+	    rclass = GPR_REGS;
+	}
+
+      else if (FPR_P (regno))
+	{
+	  int fpr_reg = regno - GPR_FIRST;
+	  if ((fpr_reg & 3) == 0)
+	    rclass = QUAD_FPR_REGS;
+
+	  else if ((fpr_reg & 1) == 0)
+	    rclass = FEVEN_REGS;
+
+	  else
+	    rclass = FPR_REGS;
+	}
+
+      else if (regno == LR_REGNO)
+	rclass = LR_REG;
+
+      else if (regno == LCR_REGNO)
+	rclass = LCR_REG;
+
+      else if (ICC_P (regno))
+	rclass = ICC_REGS;
+
+      else if (FCC_P (regno))
+	rclass = FCC_REGS;
+
+      else if (ICR_P (regno))
+	rclass = ICR_REGS;
+
+      else if (FCR_P (regno))
+	rclass = FCR_REGS;
+
+      else if (ACC_P (regno))
+	{
+	  int r = regno - ACC_FIRST;
+	  if ((r & 3) == 0)
+	    rclass = QUAD_ACC_REGS;
+	  else if ((r & 1) == 0)
+	    rclass = EVEN_ACC_REGS;
+	  else
+	    rclass = ACC_REGS;
+	}
+
+      else if (ACCG_P (regno))
+	rclass = ACCG_REGS;
+
+      else
+	rclass = NO_REGS;
+
+      regno_reg_class[regno] = rclass;
+    }
+
+  /* Check for small data option */
+  if (!global_options_set.x_g_switch_value && !TARGET_LIBPIC)
+    g_switch_value = SDATA_DEFAULT_SIZE;
+
+  /* There is no single unaligned SI op for PIC code.  Sometimes we
+     need to use ".4byte" and sometimes we need to use ".picptr".
+     See frv_assemble_integer for details.  */
+  if (flag_pic || TARGET_FDPIC)
+    targetm.asm_out.unaligned_op.si = 0;
+
+  if ((target_flags_explicit & MASK_LINKED_FP) == 0)
+    target_flags |= MASK_LINKED_FP;
+
+  if ((target_flags_explicit & MASK_OPTIMIZE_MEMBAR) == 0)
+    target_flags |= MASK_OPTIMIZE_MEMBAR;
+
+  for (i = 0; i < ARRAY_SIZE (frv_unit_names); i++)
+    frv_unit_codes[i] = get_cpu_unit_code (frv_unit_names[i]);
+
+  for (i = 0; i < ARRAY_SIZE (frv_type_to_unit); i++)
+    frv_type_to_unit[i] = ARRAY_SIZE (frv_unit_codes);
+
+  init_machine_status = frv_init_machine_status;
+}
+
+
+/* Return true if NAME (a STRING_CST node) begins with PREFIX.  */
+
+static int
+frv_string_begins_with (const_tree name, const char *prefix)
+{
+  const int prefix_len = strlen (prefix);
+
+  /* Remember: NAME's length includes the null terminator.  */
+  return (TREE_STRING_LENGTH (name) > prefix_len
+	  && strncmp (TREE_STRING_POINTER (name), prefix, prefix_len) == 0);
+}
+
+/* Zero or more C statements that may conditionally modify two variables
+   `fixed_regs' and `call_used_regs' (both of type `char []') after they have
+   been initialized from the two preceding macros.
+
+   This is necessary in case the fixed or call-clobbered registers depend on
+   target flags.
+
+   You need not define this macro if it has no work to do.
+
+   If the usage of an entire class of registers depends on the target flags,
+   you may indicate this to GCC by using this macro to modify `fixed_regs' and
+   `call_used_regs' to 1 for each of the registers in the classes which should
+   not be used by GCC.  Also define the macro `REG_CLASS_FROM_LETTER' to return
+   `NO_REGS' if it is called with a letter for a class that shouldn't be used.
+
+   (However, if this class is not included in `GENERAL_REGS' and all of the
+   insn patterns whose constraints permit this class are controlled by target
+   switches, then GCC will automatically avoid using these registers when the
+   target switches are opposed to them.)  */
+
+static void
+frv_conditional_register_usage (void)
+{
+  int i;
+
+  for (i = GPR_FIRST + NUM_GPRS; i <= GPR_LAST; i++)
+    fixed_regs[i] = call_used_regs[i] = 1;
+
+  for (i = FPR_FIRST + NUM_FPRS; i <= FPR_LAST; i++)
+    fixed_regs[i] = call_used_regs[i] = 1;
+
+  /* Reserve the registers used for conditional execution.  At present, we need
+     1 ICC and 1 ICR register.  */
+  fixed_regs[ICC_TEMP] = call_used_regs[ICC_TEMP] = 1;
+  fixed_regs[ICR_TEMP] = call_used_regs[ICR_TEMP] = 1;
+
+  if (TARGET_FIXED_CC)
+    {
+      fixed_regs[ICC_FIRST] = call_used_regs[ICC_FIRST] = 1;
+      fixed_regs[FCC_FIRST] = call_used_regs[FCC_FIRST] = 1;
+      fixed_regs[ICR_FIRST] = call_used_regs[ICR_FIRST] = 1;
+      fixed_regs[FCR_FIRST] = call_used_regs[FCR_FIRST] = 1;
+    }
+
+  if (TARGET_FDPIC)
+    fixed_regs[GPR_FIRST + 16] = fixed_regs[GPR_FIRST + 17] =
+      call_used_regs[GPR_FIRST + 16] = call_used_regs[GPR_FIRST + 17] = 0;
+
+#if 0
+  /* If -fpic, SDA_BASE_REG is the PIC register.  */
+  if (g_switch_value == 0 && !flag_pic)
+    fixed_regs[SDA_BASE_REG] = call_used_regs[SDA_BASE_REG] = 0;
+
+  if (!flag_pic)
+    fixed_regs[PIC_REGNO] = call_used_regs[PIC_REGNO] = 0;
+#endif
+}
+
+
+/*
+ * Compute the stack frame layout
+ *
+ * Register setup:
+ * +---------------+-----------------------+-----------------------+
+ * |Register       |type                   |caller-save/callee-save|
+ * +---------------+-----------------------+-----------------------+
+ * |GR0            |Zero register          |        -              |
+ * |GR1            |Stack pointer(SP)      |        -              |
+ * |GR2            |Frame pointer(FP)      |        -              |
+ * |GR3            |Hidden parameter       |        caller save    |
+ * |GR4-GR7        |        -              |        caller save    |
+ * |GR8-GR13       |Argument register      |        caller save    |
+ * |GR14-GR15      |        -              |        caller save    |
+ * |GR16-GR31      |        -              |        callee save    |
+ * |GR32-GR47      |        -              |        caller save    |
+ * |GR48-GR63      |        -              |        callee save    |
+ * |FR0-FR15       |        -              |        caller save    |
+ * |FR16-FR31      |        -              |        callee save    |
+ * |FR32-FR47      |        -              |        caller save    |
+ * |FR48-FR63      |        -              |        callee save    |
+ * +---------------+-----------------------+-----------------------+
+ *
+ * Stack frame setup:
+ * Low
+ *     SP-> |-----------------------------------|
+ *	    |         Argument area		|
+ *	    |-----------------------------------|
+ *	    |	 Register save area		|
+ *	    |-----------------------------------|
+ *	    |	Local variable save area	|
+ *     FP-> |-----------------------------------|
+ *	    |	    Old FP			|
+ *	    |-----------------------------------|
+ *	    |    Hidden parameter save area     |
+ *	    |-----------------------------------|
+ *	    | Return address(LR) storage area   |
+ *	    |-----------------------------------|
+ *	    |     Padding for alignment         |
+ *	    |-----------------------------------|
+ *	    |     Register argument area	|
+ * OLD SP-> |-----------------------------------|
+ *          |       Parameter area		|
+ *          |-----------------------------------|
+ * High
+ *
+ * Argument area/Parameter area:
+ *
+ * When a function is called, this area is used for argument transfer.  When
+ * the argument is set up by the caller function, this area is referred to as
+ * the argument area.  When the argument is referenced by the callee function,
+ * this area is referred to as the parameter area.  The area is allocated when
+ * all arguments cannot be placed on the argument register at the time of
+ * argument transfer.
+ *
+ * Register save area:
+ *
+ * This is a register save area that must be guaranteed for the caller
+ * function.  This area is not secured when the register save operation is not
+ * needed.
+ *
+ * Local variable save area:
+ *
+ * This is the area for local variables and temporary variables.
+ *
+ * Old FP:
+ *
+ * This area stores the FP value of the caller function.
+ *
+ * Hidden parameter save area:
+ *
+ * This area stores the start address of the return value storage
+ * area for a struct/union return function.
+ * When a struct/union is used as the return value, the caller
+ * function stores the return value storage area start address in
+ * register GR3 and passes it to the caller function.
+ * The callee function interprets the address stored in the GR3
+ * as the return value storage area start address.
+ * When register GR3 needs to be saved into memory, the callee
+ * function saves it in the hidden parameter save area.  This
+ * area is not secured when the save operation is not needed.
+ *
+ * Return address(LR) storage area:
+ *
+ * This area saves the LR.  The LR stores the address of a return to the caller
+ * function for the purpose of function calling.
+ *
+ * Argument register area:
+ *
+ * This area saves the argument register.  This area is not secured when the
+ * save operation is not needed.
+ *
+ * Argument:
+ *
+ * Arguments, the count of which equals the count of argument registers (6
+ * words), are positioned in registers GR8 to GR13 and delivered to the callee
+ * function.  When a struct/union return function is called, the return value
+ * area address is stored in register GR3.  Arguments not placed in the
+ * argument registers will be stored in the stack argument area for transfer
+ * purposes.  When an 8-byte type argument is to be delivered using registers,
+ * it is divided into two and placed in two registers for transfer.  When
+ * argument registers must be saved to memory, the callee function secures an
+ * argument register save area in the stack.  In this case, a continuous
+ * argument register save area must be established in the parameter area.  The
+ * argument register save area must be allocated as needed to cover the size of
+ * the argument register to be saved.  If the function has a variable count of
+ * arguments, it saves all argument registers in the argument register save
+ * area.
+ *
+ * Argument Extension Format:
+ *
+ * When an argument is to be stored in the stack, its type is converted to an
+ * extended type in accordance with the individual argument type.  The argument
+ * is freed by the caller function after the return from the callee function is
+ * made.
+ *
+ * +-----------------------+---------------+------------------------+
+ * |    Argument Type      |Extended Type  |Stack Storage Size(byte)|
+ * +-----------------------+---------------+------------------------+
+ * |char                   |int            |        4		    |
+ * |signed char            |int            |        4		    |
+ * |unsigned char          |int            |        4		    |
+ * |[signed] short int     |int            |        4		    |
+ * |unsigned short int     |int            |        4		    |
+ * |[signed] int           |No extension   |        4		    |
+ * |unsigned int           |No extension   |        4		    |
+ * |[signed] long int      |No extension   |        4		    |
+ * |unsigned long int      |No extension   |        4		    |
+ * |[signed] long long int |No extension   |        8		    |
+ * |unsigned long long int |No extension   |        8		    |
+ * |float                  |double         |        8		    |
+ * |double                 |No extension   |        8		    |
+ * |long double            |No extension   |        8		    |
+ * |pointer                |No extension   |        4		    |
+ * |struct/union           |-              |        4 (*1)	    |
+ * +-----------------------+---------------+------------------------+
+ *
+ * When a struct/union is to be delivered as an argument, the caller copies it
+ * to the local variable area and delivers the address of that area.
+ *
+ * Return Value:
+ *
+ * +-------------------------------+----------------------+
+ * |Return Value Type              |Return Value Interface|
+ * +-------------------------------+----------------------+
+ * |void                           |None                  |
+ * |[signed|unsigned] char         |GR8                   |
+ * |[signed|unsigned] short int    |GR8                   |
+ * |[signed|unsigned] int          |GR8                   |
+ * |[signed|unsigned] long int     |GR8                   |
+ * |pointer                        |GR8                   |
+ * |[signed|unsigned] long long int|GR8 & GR9             |
+ * |float                          |GR8                   |
+ * |double                         |GR8 & GR9             |
+ * |long double                    |GR8 & GR9             |
+ * |struct/union                   |(*1)                  |
+ * +-------------------------------+----------------------+
+ *
+ * When a struct/union is used as the return value, the caller function stores
+ * the start address of the return value storage area into GR3 and then passes
+ * it to the callee function.  The callee function interprets GR3 as the start
+ * address of the return value storage area.  When this address needs to be
+ * saved in memory, the callee function secures the hidden parameter save area
+ * and saves the address in that area.
+ */
+
+frv_stack_t *
+frv_stack_info (void)
+{
+  static frv_stack_t info, zero_info;
+  frv_stack_t *info_ptr	= &info;
+  tree fndecl		= current_function_decl;
+  int varargs_p		= 0;
+  tree cur_arg;
+  tree next_arg;
+  int range;
+  int alignment;
+  int offset;
+
+  /* If we've already calculated the values and reload is complete,
+     just return now.  */
+  if (frv_stack_cache)
+    return frv_stack_cache;
+
+  /* Zero all fields.  */
+  info = zero_info;
+
+  /* Set up the register range information.  */
+  info_ptr->regs[STACK_REGS_GPR].name         = "gpr";
+  info_ptr->regs[STACK_REGS_GPR].first        = LAST_ARG_REGNUM + 1;
+  info_ptr->regs[STACK_REGS_GPR].last         = GPR_LAST;
+  info_ptr->regs[STACK_REGS_GPR].dword_p      = TRUE;
+
+  info_ptr->regs[STACK_REGS_FPR].name         = "fpr";
+  info_ptr->regs[STACK_REGS_FPR].first        = FPR_FIRST;
+  info_ptr->regs[STACK_REGS_FPR].last         = FPR_LAST;
+  info_ptr->regs[STACK_REGS_FPR].dword_p      = TRUE;
+
+  info_ptr->regs[STACK_REGS_LR].name          = "lr";
+  info_ptr->regs[STACK_REGS_LR].first         = LR_REGNO;
+  info_ptr->regs[STACK_REGS_LR].last          = LR_REGNO;
+  info_ptr->regs[STACK_REGS_LR].special_p     = 1;
+
+  info_ptr->regs[STACK_REGS_CC].name          = "cc";
+  info_ptr->regs[STACK_REGS_CC].first         = CC_FIRST;
+  info_ptr->regs[STACK_REGS_CC].last          = CC_LAST;
+  info_ptr->regs[STACK_REGS_CC].field_p       = TRUE;
+
+  info_ptr->regs[STACK_REGS_LCR].name         = "lcr";
+  info_ptr->regs[STACK_REGS_LCR].first        = LCR_REGNO;
+  info_ptr->regs[STACK_REGS_LCR].last         = LCR_REGNO;
+
+  info_ptr->regs[STACK_REGS_STDARG].name      = "stdarg";
+  info_ptr->regs[STACK_REGS_STDARG].first     = FIRST_ARG_REGNUM;
+  info_ptr->regs[STACK_REGS_STDARG].last      = LAST_ARG_REGNUM;
+  info_ptr->regs[STACK_REGS_STDARG].dword_p   = 1;
+  info_ptr->regs[STACK_REGS_STDARG].special_p = 1;
+
+  info_ptr->regs[STACK_REGS_STRUCT].name      = "struct";
+  info_ptr->regs[STACK_REGS_STRUCT].first     = FRV_STRUCT_VALUE_REGNUM;
+  info_ptr->regs[STACK_REGS_STRUCT].last      = FRV_STRUCT_VALUE_REGNUM;
+  info_ptr->regs[STACK_REGS_STRUCT].special_p = 1;
+
+  info_ptr->regs[STACK_REGS_FP].name          = "fp";
+  info_ptr->regs[STACK_REGS_FP].first         = FRAME_POINTER_REGNUM;
+  info_ptr->regs[STACK_REGS_FP].last          = FRAME_POINTER_REGNUM;
+  info_ptr->regs[STACK_REGS_FP].special_p     = 1;
+
+  /* Determine if this is a stdarg function.  If so, allocate space to store
+     the 6 arguments.  */
+  if (cfun->stdarg)
+    varargs_p = 1;
+
+  else
+    {
+      /* Find the last argument, and see if it is __builtin_va_alist.  */
+      for (cur_arg = DECL_ARGUMENTS (fndecl); cur_arg != (tree)0; cur_arg = next_arg)
+	{
+	  next_arg = DECL_CHAIN (cur_arg);
+	  if (next_arg == (tree)0)
+	    {
+	      if (DECL_NAME (cur_arg)
+		  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (cur_arg)), "__builtin_va_alist"))
+		varargs_p = 1;
+
+	      break;
+	    }
+	}
+    }
+
+  /* Iterate over all of the register ranges.  */
+  for (range = 0; range < STACK_REGS_MAX; range++)
+    {
+      frv_stack_regs_t *reg_ptr = &(info_ptr->regs[range]);
+      int first = reg_ptr->first;
+      int last = reg_ptr->last;
+      int size_1word = 0;
+      int size_2words = 0;
+      int regno;
+
+      /* Calculate which registers need to be saved & save area size.  */
+      switch (range)
+	{
+	default:
+	  for (regno = first; regno <= last; regno++)
+	    {
+	      if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
+		  || (crtl->calls_eh_return
+		      && (regno >= FIRST_EH_REGNUM && regno <= LAST_EH_REGNUM))
+		  || (!TARGET_FDPIC && flag_pic
+		      && crtl->uses_pic_offset_table && regno == PIC_REGNO))
+		{
+		  info_ptr->save_p[regno] = REG_SAVE_1WORD;
+		  size_1word += UNITS_PER_WORD;
+		}
+	    }
+	  break;
+
+	  /* Calculate whether we need to create a frame after everything else
+             has been processed.  */
+	case STACK_REGS_FP:
+	  break;
+
+	case STACK_REGS_LR:
+	  if (df_regs_ever_live_p (LR_REGNO)
+              || profile_flag
+	      /* This is set for __builtin_return_address, etc.  */
+	      || cfun->machine->frame_needed
+              || (TARGET_LINKED_FP && frame_pointer_needed)
+              || (!TARGET_FDPIC && flag_pic
+		  && crtl->uses_pic_offset_table))
+	    {
+	      info_ptr->save_p[LR_REGNO] = REG_SAVE_1WORD;
+	      size_1word += UNITS_PER_WORD;
+	    }
+	  break;
+
+	case STACK_REGS_STDARG:
+	  if (varargs_p)
+	    {
+	      /* If this is a stdarg function with a non varardic
+		 argument split between registers and the stack,
+		 adjust the saved registers downward.  */
+	      last -= (ADDR_ALIGN (crtl->args.pretend_args_size, UNITS_PER_WORD)
+		       / UNITS_PER_WORD);
+
+	      for (regno = first; regno <= last; regno++)
+		{
+		  info_ptr->save_p[regno] = REG_SAVE_1WORD;
+		  size_1word += UNITS_PER_WORD;
+		}
+
+	      info_ptr->stdarg_size = size_1word;
+	    }
+	  break;
+
+	case STACK_REGS_STRUCT:
+	  if (cfun->returns_struct)
+	    {
+	      info_ptr->save_p[FRV_STRUCT_VALUE_REGNUM] = REG_SAVE_1WORD;
+	      size_1word += UNITS_PER_WORD;
+	    }
+	  break;
+	}
+
+
+      if (size_1word)
+	{
+	  /* If this is a field, it only takes one word.  */
+	  if (reg_ptr->field_p)
+	    size_1word = UNITS_PER_WORD;
+
+	  /* Determine which register pairs can be saved together.  */
+	  else if (reg_ptr->dword_p && TARGET_DWORD)
+	    {
+	      for (regno = first; regno < last; regno += 2)
+		{
+		  if (info_ptr->save_p[regno] && info_ptr->save_p[regno+1])
+		    {
+		      size_2words += 2 * UNITS_PER_WORD;
+		      size_1word -= 2 * UNITS_PER_WORD;
+		      info_ptr->save_p[regno] = REG_SAVE_2WORDS;
+		      info_ptr->save_p[regno+1] = REG_SAVE_NO_SAVE;
+		    }
+		}
+	    }
+
+	  reg_ptr->size_1word = size_1word;
+	  reg_ptr->size_2words = size_2words;
+
+	  if (! reg_ptr->special_p)
+	    {
+	      info_ptr->regs_size_1word += size_1word;
+	      info_ptr->regs_size_2words += size_2words;
+	    }
+	}
+    }
+
+  /* Set up the sizes of each each field in the frame body, making the sizes
+     of each be divisible by the size of a dword if dword operations might
+     be used, or the size of a word otherwise.  */
+  alignment = (TARGET_DWORD? 2 * UNITS_PER_WORD : UNITS_PER_WORD);
+
+  info_ptr->parameter_size = ADDR_ALIGN (crtl->outgoing_args_size, alignment);
+  info_ptr->regs_size = ADDR_ALIGN (info_ptr->regs_size_2words
+				    + info_ptr->regs_size_1word,
+				    alignment);
+  info_ptr->vars_size = ADDR_ALIGN (get_frame_size (), alignment);
+
+  info_ptr->pretend_size = crtl->args.pretend_args_size;
+
+  /* Work out the size of the frame, excluding the header.  Both the frame
+     body and register parameter area will be dword-aligned.  */
+  info_ptr->total_size
+    = (ADDR_ALIGN (info_ptr->parameter_size
+		   + info_ptr->regs_size
+		   + info_ptr->vars_size,
+		   2 * UNITS_PER_WORD)
+       + ADDR_ALIGN (info_ptr->pretend_size
+		     + info_ptr->stdarg_size,
+		     2 * UNITS_PER_WORD));
+
+  /* See if we need to create a frame at all, if so add header area.  */
+  if (info_ptr->total_size  > 0
+      || frame_pointer_needed
+      || info_ptr->regs[STACK_REGS_LR].size_1word > 0
+      || info_ptr->regs[STACK_REGS_STRUCT].size_1word > 0)
+    {
+      offset = info_ptr->parameter_size;
+      info_ptr->header_size = 4 * UNITS_PER_WORD;
+      info_ptr->total_size += 4 * UNITS_PER_WORD;
+
+      /* Calculate the offsets to save normal register pairs.  */
+      for (range = 0; range < STACK_REGS_MAX; range++)
+	{
+	  frv_stack_regs_t *reg_ptr = &(info_ptr->regs[range]);
+	  if (! reg_ptr->special_p)
+	    {
+	      int first = reg_ptr->first;
+	      int last = reg_ptr->last;
+	      int regno;
+
+	      for (regno = first; regno <= last; regno++)
+		if (info_ptr->save_p[regno] == REG_SAVE_2WORDS
+		    && regno != FRAME_POINTER_REGNUM
+		    && (regno < FIRST_ARG_REGNUM
+			|| regno > LAST_ARG_REGNUM))
+		  {
+		    info_ptr->reg_offset[regno] = offset;
+		    offset += 2 * UNITS_PER_WORD;
+		  }
+	    }
+	}
+
+      /* Calculate the offsets to save normal single registers.  */
+      for (range = 0; range < STACK_REGS_MAX; range++)
+	{
+	  frv_stack_regs_t *reg_ptr = &(info_ptr->regs[range]);
+	  if (! reg_ptr->special_p)
+	    {
+	      int first = reg_ptr->first;
+	      int last = reg_ptr->last;
+	      int regno;
+
+	      for (regno = first; regno <= last; regno++)
+		if (info_ptr->save_p[regno] == REG_SAVE_1WORD
+		    && regno != FRAME_POINTER_REGNUM
+		    && (regno < FIRST_ARG_REGNUM
+			|| regno > LAST_ARG_REGNUM))
+		  {
+		    info_ptr->reg_offset[regno] = offset;
+		    offset += UNITS_PER_WORD;
+		  }
+	    }
+	}
+
+      /* Calculate the offset to save the local variables at.  */
+      offset = ADDR_ALIGN (offset, alignment);
+      if (info_ptr->vars_size)
+	{
+	  info_ptr->vars_offset = offset;
+	  offset += info_ptr->vars_size;
+	}
+
+      /* Align header to a dword-boundary.  */
+      offset = ADDR_ALIGN (offset, 2 * UNITS_PER_WORD);
+
+      /* Calculate the offsets in the fixed frame.  */
+      info_ptr->save_p[FRAME_POINTER_REGNUM] = REG_SAVE_1WORD;
+      info_ptr->reg_offset[FRAME_POINTER_REGNUM] = offset;
+      info_ptr->regs[STACK_REGS_FP].size_1word = UNITS_PER_WORD;
+
+      info_ptr->save_p[LR_REGNO] = REG_SAVE_1WORD;
+      info_ptr->reg_offset[LR_REGNO] = offset + 2*UNITS_PER_WORD;
+      info_ptr->regs[STACK_REGS_LR].size_1word = UNITS_PER_WORD;
+
+      if (cfun->returns_struct)
+	{
+	  info_ptr->save_p[FRV_STRUCT_VALUE_REGNUM] = REG_SAVE_1WORD;
+	  info_ptr->reg_offset[FRV_STRUCT_VALUE_REGNUM] = offset + UNITS_PER_WORD;
+	  info_ptr->regs[STACK_REGS_STRUCT].size_1word = UNITS_PER_WORD;
+	}
+
+      /* Calculate the offsets to store the arguments passed in registers
+         for stdarg functions.  The register pairs are first and the single
+         register if any is last.  The register save area starts on a
+         dword-boundary.  */
+      if (info_ptr->stdarg_size)
+	{
+	  int first = info_ptr->regs[STACK_REGS_STDARG].first;
+	  int last  = info_ptr->regs[STACK_REGS_STDARG].last;
+	  int regno;
+
+	  /* Skip the header.  */
+	  offset += 4 * UNITS_PER_WORD;
+	  for (regno = first; regno <= last; regno++)
+	    {
+	      if (info_ptr->save_p[regno] == REG_SAVE_2WORDS)
+		{
+		  info_ptr->reg_offset[regno] = offset;
+		  offset += 2 * UNITS_PER_WORD;
+		}
+	      else if (info_ptr->save_p[regno] == REG_SAVE_1WORD)
+		{
+		  info_ptr->reg_offset[regno] = offset;
+		  offset += UNITS_PER_WORD;
+		}
+	    }
+	}
+    }
+
+  if (reload_completed)
+    frv_stack_cache = info_ptr;
+
+  return info_ptr;
+}
+
+
+/* Print the information about the frv stack offsets, etc. when debugging.  */
+
+void
+frv_debug_stack (frv_stack_t *info)
+{
+  int range;
+
+  if (!info)
+    info = frv_stack_info ();
+
+  fprintf (stderr, "\nStack information for function %s:\n",
+	   ((current_function_decl && DECL_NAME (current_function_decl))
+	    ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
+	    : "<unknown>"));
+
+  fprintf (stderr, "\ttotal_size\t= %6d\n", info->total_size);
+  fprintf (stderr, "\tvars_size\t= %6d\n", info->vars_size);
+  fprintf (stderr, "\tparam_size\t= %6d\n", info->parameter_size);
+  fprintf (stderr, "\tregs_size\t= %6d, 1w = %3d, 2w = %3d\n",
+	   info->regs_size, info->regs_size_1word, info->regs_size_2words);
+
+  fprintf (stderr, "\theader_size\t= %6d\n", info->header_size);
+  fprintf (stderr, "\tpretend_size\t= %6d\n", info->pretend_size);
+  fprintf (stderr, "\tvars_offset\t= %6d\n", info->vars_offset);
+  fprintf (stderr, "\tregs_offset\t= %6d\n", info->regs_offset);
+
+  for (range = 0; range < STACK_REGS_MAX; range++)
+    {
+      frv_stack_regs_t *regs = &(info->regs[range]);
+      if ((regs->size_1word + regs->size_2words) > 0)
+	{
+	  int first = regs->first;
+	  int last  = regs->last;
+	  int regno;
+
+	  fprintf (stderr, "\t%s\tsize\t= %6d, 1w = %3d, 2w = %3d, save =",
+		   regs->name, regs->size_1word + regs->size_2words,
+		   regs->size_1word, regs->size_2words);
+
+	  for (regno = first; regno <= last; regno++)
+	    {
+	      if (info->save_p[regno] == REG_SAVE_1WORD)
+		fprintf (stderr, " %s (%d)", reg_names[regno],
+			 info->reg_offset[regno]);
+
+	      else if (info->save_p[regno] == REG_SAVE_2WORDS)
+		fprintf (stderr, " %s-%s (%d)", reg_names[regno],
+			 reg_names[regno+1], info->reg_offset[regno]);
+	    }
+
+	  fputc ('\n', stderr);
+	}
+    }
+
+  fflush (stderr);
+}
+
+
+
+
+/* Used during final to control the packing of insns.  The value is
+   1 if the current instruction should be packed with the next one,
+   0 if it shouldn't or -1 if packing is disabled altogether.  */
+
+static int frv_insn_packing_flag;
+
+/* True if the current function contains a far jump.  */
+
+static int
+frv_function_contains_far_jump (void)
+{
+  rtx insn = get_insns ();
+  while (insn != NULL
+	 && !(JUMP_P (insn)
+	      && get_attr_far_jump (insn) == FAR_JUMP_YES))
+    insn = NEXT_INSN (insn);
+  return (insn != NULL);
+}
+
+/* For the FRV, this function makes sure that a function with far jumps
+   will return correctly.  It also does the VLIW packing.  */
+
+static void
+frv_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  rtx insn, next, last_call;
+
+  /* If no frame was created, check whether the function uses a call
+     instruction to implement a far jump.  If so, save the link in gr3 and
+     replace all returns to LR with returns to GR3.  GR3 is used because it
+     is call-clobbered, because is not available to the register allocator,
+     and because all functions that take a hidden argument pointer will have
+     a stack frame.  */
+  if (frv_stack_info ()->total_size == 0 && frv_function_contains_far_jump ())
+    {
+      rtx insn;
+
+      /* Just to check that the above comment is true.  */
+      gcc_assert (!df_regs_ever_live_p (GPR_FIRST + 3));
+
+      /* Generate the instruction that saves the link register.  */
+      fprintf (file, "\tmovsg lr,gr3\n");
+
+      /* Replace the LR with GR3 in *return_internal patterns.  The insn
+	 will now return using jmpl @(gr3,0) rather than bralr.  We cannot
+	 simply emit a different assembly directive because bralr and jmpl
+	 execute in different units.  */
+      for (insn = get_insns(); insn != NULL; insn = NEXT_INSN (insn))
+	if (JUMP_P (insn))
+	  {
+	    rtx pattern = PATTERN (insn);
+	    if (GET_CODE (pattern) == PARALLEL
+		&& XVECLEN (pattern, 0) >= 2
+		&& GET_CODE (XVECEXP (pattern, 0, 0)) == RETURN
+		&& GET_CODE (XVECEXP (pattern, 0, 1)) == USE)
+	      {
+		rtx address = XEXP (XVECEXP (pattern, 0, 1), 0);
+		if (GET_CODE (address) == REG && REGNO (address) == LR_REGNO)
+		  SET_REGNO (address, GPR_FIRST + 3);
+	      }
+	  }
+    }
+
+  frv_pack_insns ();
+
+  /* Allow the garbage collector to free the nops created by frv_reorg.  */
+  memset (frv_nops, 0, sizeof (frv_nops));
+
+  /* Locate CALL_ARG_LOCATION notes that have been misplaced
+     and move them back to where they should be located.  */
+  last_call = NULL_RTX;
+  for (insn = get_insns (); insn; insn = next)
+    {
+      next = NEXT_INSN (insn);
+      if (CALL_P (insn)
+	  || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
+	      && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
+	last_call = insn;
+
+      if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
+	continue;
+
+      if (NEXT_INSN (last_call) == insn)
+	continue;
+
+      NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
+      PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
+      PREV_INSN (insn) = last_call;
+      NEXT_INSN (insn) = NEXT_INSN (last_call);
+      PREV_INSN (NEXT_INSN (insn)) = insn;
+      NEXT_INSN (PREV_INSN (insn)) = insn;
+      last_call = insn;
+    }
+}
+
+
+/* Return the next available temporary register in a given class.  */
+
+static rtx
+frv_alloc_temp_reg (
+     frv_tmp_reg_t *info,	/* which registers are available */
+     enum reg_class rclass,	/* register class desired */
+     enum machine_mode mode,	/* mode to allocate register with */
+     int mark_as_used,		/* register not available after allocation */
+     int no_abort)		/* return NULL instead of aborting */
+{
+  int regno = info->next_reg[ (int)rclass ];
+  int orig_regno = regno;
+  HARD_REG_SET *reg_in_class = &reg_class_contents[ (int)rclass ];
+  int i, nr;
+
+  for (;;)
+    {
+      if (TEST_HARD_REG_BIT (*reg_in_class, regno)
+	  && TEST_HARD_REG_BIT (info->regs, regno))
+	  break;
+
+      if (++regno >= FIRST_PSEUDO_REGISTER)
+	regno = 0;
+      if (regno == orig_regno)
+	{
+	  gcc_assert (no_abort);
+	  return NULL_RTX;
+	}
+    }
+
+  nr = HARD_REGNO_NREGS (regno, mode);
+  info->next_reg[ (int)rclass ] = regno + nr;
+
+  if (mark_as_used)
+    for (i = 0; i < nr; i++)
+      CLEAR_HARD_REG_BIT (info->regs, regno+i);
+
+  return gen_rtx_REG (mode, regno);
+}
+
+
+/* Return an rtx with the value OFFSET, which will either be a register or a
+   signed 12-bit integer.  It can be used as the second operand in an "add"
+   instruction, or as the index in a load or store.
+
+   The function returns a constant rtx if OFFSET is small enough, otherwise
+   it loads the constant into register OFFSET_REGNO and returns that.  */
+static rtx
+frv_frame_offset_rtx (int offset)
+{
+  rtx offset_rtx = GEN_INT (offset);
+  if (IN_RANGE (offset, -2048, 2047))
+    return offset_rtx;
+  else
+    {
+      rtx reg_rtx = gen_rtx_REG (SImode, OFFSET_REGNO);
+      if (IN_RANGE (offset, -32768, 32767))
+	emit_insn (gen_movsi (reg_rtx, offset_rtx));
+      else
+	{
+	  emit_insn (gen_movsi_high (reg_rtx, offset_rtx));
+	  emit_insn (gen_movsi_lo_sum (reg_rtx, offset_rtx));
+	}
+      return reg_rtx;
+    }
+}
+
+/* Generate (mem:MODE (plus:Pmode BASE (frv_frame_offset OFFSET)))).  The
+   prologue and epilogue uses such expressions to access the stack.  */
+static rtx
+frv_frame_mem (enum machine_mode mode, rtx base, int offset)
+{
+  return gen_rtx_MEM (mode, gen_rtx_PLUS (Pmode,
+					  base,
+					  frv_frame_offset_rtx (offset)));
+}
+
+/* Generate a frame-related expression:
+
+	(set REG (mem (plus (sp) (const_int OFFSET)))).
+
+   Such expressions are used in FRAME_RELATED_EXPR notes for more complex
+   instructions.  Marking the expressions as frame-related is superfluous if
+   the note contains just a single set.  But if the note contains a PARALLEL
+   or SEQUENCE that has several sets, each set must be individually marked
+   as frame-related.  */
+static rtx
+frv_dwarf_store (rtx reg, int offset)
+{
+  rtx set = gen_rtx_SET (VOIDmode,
+			 gen_rtx_MEM (GET_MODE (reg),
+				      plus_constant (Pmode, stack_pointer_rtx,
+						     offset)),
+			 reg);
+  RTX_FRAME_RELATED_P (set) = 1;
+  return set;
+}
+
+/* Emit a frame-related instruction whose pattern is PATTERN.  The
+   instruction is the last in a sequence that cumulatively performs the
+   operation described by DWARF_PATTERN.  The instruction is marked as
+   frame-related and has a REG_FRAME_RELATED_EXPR note containing
+   DWARF_PATTERN.  */
+static void
+frv_frame_insn (rtx pattern, rtx dwarf_pattern)
+{
+  rtx insn = emit_insn (pattern);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      dwarf_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Emit instructions that transfer REG to or from the memory location (sp +
+   STACK_OFFSET).  The register is stored in memory if ACCESSOR->OP is
+   FRV_STORE and loaded if it is FRV_LOAD.  Only the prologue uses this
+   function to store registers and only the epilogue uses it to load them.
+
+   The caller sets up ACCESSOR so that BASE is equal to (sp + BASE_OFFSET).
+   The generated instruction will use BASE as its base register.  BASE may
+   simply be the stack pointer, but if several accesses are being made to a
+   region far away from the stack pointer, it may be more efficient to set
+   up a temporary instead.
+
+   Store instructions will be frame-related and will be annotated with the
+   overall effect of the store.  Load instructions will be followed by a
+   (use) to prevent later optimizations from zapping them.
+
+   The function takes care of the moves to and from SPRs, using TEMP_REGNO
+   as a temporary in such cases.  */
+static void
+frv_frame_access (frv_frame_accessor_t *accessor, rtx reg, int stack_offset)
+{
+  enum machine_mode mode = GET_MODE (reg);
+  rtx mem = frv_frame_mem (mode,
+			   accessor->base,
+			   stack_offset - accessor->base_offset);
+
+  if (accessor->op == FRV_LOAD)
+    {
+      if (SPR_P (REGNO (reg)))
+	{
+	  rtx temp = gen_rtx_REG (mode, TEMP_REGNO);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, mem));
+	  emit_insn (gen_rtx_SET (VOIDmode, reg, temp));
+	}
+      else
+	{
+	  /* We cannot use reg+reg addressing for DImode access.  */
+	  if (mode == DImode
+	      && GET_CODE (XEXP (mem, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (mem, 0), 0)) == REG
+	      && GET_CODE (XEXP (XEXP (mem, 0), 1)) == REG)
+	    {
+	      rtx temp = gen_rtx_REG (SImode, TEMP_REGNO);
+
+	      emit_move_insn (temp,
+			      gen_rtx_PLUS (SImode, XEXP (XEXP (mem, 0), 0),
+					    XEXP (XEXP (mem, 0), 1)));
+	      mem = gen_rtx_MEM (DImode, temp);
+	    }
+	  emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+	}
+      emit_use (reg);
+    }
+  else
+    {
+      if (SPR_P (REGNO (reg)))
+	{
+	  rtx temp = gen_rtx_REG (mode, TEMP_REGNO);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, reg));
+	  frv_frame_insn (gen_rtx_SET (Pmode, mem, temp),
+			  frv_dwarf_store (reg, stack_offset));
+	}
+      else if (mode == DImode)
+	{
+	  /* For DImode saves, the dwarf2 version needs to be a SEQUENCE
+	     with a separate save for each register.  */
+	  rtx reg1 = gen_rtx_REG (SImode, REGNO (reg));
+	  rtx reg2 = gen_rtx_REG (SImode, REGNO (reg) + 1);
+	  rtx set1 = frv_dwarf_store (reg1, stack_offset);
+	  rtx set2 = frv_dwarf_store (reg2, stack_offset + 4);
+
+	  /* Also we cannot use reg+reg addressing.  */
+	  if (GET_CODE (XEXP (mem, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (mem, 0), 0)) == REG
+	      && GET_CODE (XEXP (XEXP (mem, 0), 1)) == REG)
+	    {
+	      rtx temp = gen_rtx_REG (SImode, TEMP_REGNO);
+	      emit_move_insn (temp,
+			      gen_rtx_PLUS (SImode, XEXP (XEXP (mem, 0), 0),
+					    XEXP (XEXP (mem, 0), 1)));
+	      mem = gen_rtx_MEM (DImode, temp);
+	    }
+
+	  frv_frame_insn (gen_rtx_SET (Pmode, mem, reg),
+			  gen_rtx_PARALLEL (VOIDmode,
+					    gen_rtvec (2, set1, set2)));
+	}
+      else
+	frv_frame_insn (gen_rtx_SET (Pmode, mem, reg),
+			frv_dwarf_store (reg, stack_offset));
+    }
+}
+
+/* A function that uses frv_frame_access to transfer a group of registers to
+   or from the stack.  ACCESSOR is passed directly to frv_frame_access, INFO
+   is the stack information generated by frv_stack_info, and REG_SET is the
+   number of the register set to transfer.  */
+static void
+frv_frame_access_multi (frv_frame_accessor_t *accessor,
+                        frv_stack_t *info,
+                        int reg_set)
+{
+  frv_stack_regs_t *regs_info;
+  int regno;
+
+  regs_info = &info->regs[reg_set];
+  for (regno = regs_info->first; regno <= regs_info->last; regno++)
+    if (info->save_p[regno])
+      frv_frame_access (accessor,
+			info->save_p[regno] == REG_SAVE_2WORDS
+			? gen_rtx_REG (DImode, regno)
+			: gen_rtx_REG (SImode, regno),
+			info->reg_offset[regno]);
+}
+
+/* Save or restore callee-saved registers that are kept outside the frame
+   header.  The function saves the registers if OP is FRV_STORE and restores
+   them if OP is FRV_LOAD.  INFO is the stack information generated by
+   frv_stack_info.  */
+static void
+frv_frame_access_standard_regs (enum frv_stack_op op, frv_stack_t *info)
+{
+  frv_frame_accessor_t accessor;
+
+  accessor.op = op;
+  accessor.base = stack_pointer_rtx;
+  accessor.base_offset = 0;
+  frv_frame_access_multi (&accessor, info, STACK_REGS_GPR);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_FPR);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_LCR);
+}
+
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in the TARGET_ASM_FUNCTION_PROLOGUE target hook, since
+   it allows the scheduler to intermix instructions with the saves of
+   the caller saved registers.  In some cases, it might be necessary
+   to emit a barrier instruction as the last insn to prevent such
+   scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.  */
+void
+frv_expand_prologue (void)
+{
+  frv_stack_t *info = frv_stack_info ();
+  rtx sp = stack_pointer_rtx;
+  rtx fp = frame_pointer_rtx;
+  frv_frame_accessor_t accessor;
+
+  if (TARGET_DEBUG_STACK)
+    frv_debug_stack (info);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = info->total_size;
+
+  if (info->total_size == 0)
+    return;
+
+  /* We're interested in three areas of the frame here:
+
+         A: the register save area
+	 B: the old FP
+	 C: the header after B
+
+     If the frame pointer isn't used, we'll have to set up A, B and C
+     using the stack pointer.  If the frame pointer is used, we'll access
+     them as follows:
+
+         A: set up using sp
+	 B: set up using sp or a temporary (see below)
+	 C: set up using fp
+
+     We set up B using the stack pointer if the frame is small enough.
+     Otherwise, it's more efficient to copy the old stack pointer into a
+     temporary and use that.
+
+     Note that it's important to make sure the prologue and epilogue use the
+     same registers to access A and C, since doing otherwise will confuse
+     the aliasing code.  */
+
+  /* Set up ACCESSOR for accessing region B above.  If the frame pointer
+     isn't used, the same method will serve for C.  */
+  accessor.op = FRV_STORE;
+  if (frame_pointer_needed && info->total_size > 2048)
+    {
+      accessor.base = gen_rtx_REG (Pmode, OLD_SP_REGNO);
+      accessor.base_offset = info->total_size;
+      emit_insn (gen_movsi (accessor.base, sp));
+    }
+  else
+    {
+      accessor.base = stack_pointer_rtx;
+      accessor.base_offset = 0;
+    }
+
+  /* Allocate the stack space.  */
+  {
+    rtx asm_offset = frv_frame_offset_rtx (-info->total_size);
+    rtx dwarf_offset = GEN_INT (-info->total_size);
+
+    frv_frame_insn (gen_stack_adjust (sp, sp, asm_offset),
+		    gen_rtx_SET (Pmode,
+				 sp,
+				 gen_rtx_PLUS (Pmode, sp, dwarf_offset)));
+  }
+
+  /* If the frame pointer is needed, store the old one at (sp + FP_OFFSET)
+     and point the new one to that location.  */
+  if (frame_pointer_needed)
+    {
+      int fp_offset = info->reg_offset[FRAME_POINTER_REGNUM];
+
+      /* ASM_SRC and DWARF_SRC both point to the frame header.  ASM_SRC is
+	 based on ACCESSOR.BASE but DWARF_SRC is always based on the stack
+	 pointer.  */
+      rtx asm_src = plus_constant (Pmode, accessor.base,
+				   fp_offset - accessor.base_offset);
+      rtx dwarf_src = plus_constant (Pmode, sp, fp_offset);
+
+      /* Store the old frame pointer at (sp + FP_OFFSET).  */
+      frv_frame_access (&accessor, fp, fp_offset);
+
+      /* Set up the new frame pointer.  */
+      frv_frame_insn (gen_rtx_SET (VOIDmode, fp, asm_src),
+		      gen_rtx_SET (VOIDmode, fp, dwarf_src));
+
+      /* Access region C from the frame pointer.  */
+      accessor.base = fp;
+      accessor.base_offset = fp_offset;
+    }
+
+  /* Set up region C.  */
+  frv_frame_access_multi (&accessor, info, STACK_REGS_STRUCT);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_LR);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_STDARG);
+
+  /* Set up region A.  */
+  frv_frame_access_standard_regs (FRV_STORE, info);
+
+  /* If this is a varargs/stdarg function, issue a blockage to prevent the
+     scheduler from moving loads before the stores saving the registers.  */
+  if (info->stdarg_size > 0)
+    emit_insn (gen_blockage ());
+
+  /* Set up pic register/small data register for this function.  */
+  if (!TARGET_FDPIC && flag_pic && crtl->uses_pic_offset_table)
+    emit_insn (gen_pic_prologue (gen_rtx_REG (Pmode, PIC_REGNO),
+				 gen_rtx_REG (Pmode, LR_REGNO),
+				 gen_rtx_REG (SImode, OFFSET_REGNO)));
+}
+
+
+/* Under frv, all of the work is done via frv_expand_epilogue, but
+   this function provides a convenient place to do cleanup.  */
+
+static void
+frv_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+                       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  frv_stack_cache = (frv_stack_t *)0;
+
+  /* Zap last used registers for conditional execution.  */
+  memset (&frv_ifcvt.tmp_reg, 0, sizeof (frv_ifcvt.tmp_reg));
+
+  /* Release the bitmap of created insns.  */
+  BITMAP_FREE (frv_ifcvt.scratch_insns_bitmap);
+}
+
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using an epilogue insn is favored compared to putting all of the
+   instructions in the TARGET_ASM_FUNCTION_PROLOGUE target hook, since
+   it allows the scheduler to intermix instructions with the saves of
+   the caller saved registers.  In some cases, it might be necessary
+   to emit a barrier instruction as the last insn to prevent such
+   scheduling.  */
+
+void
+frv_expand_epilogue (bool emit_return)
+{
+  frv_stack_t *info = frv_stack_info ();
+  rtx fp = frame_pointer_rtx;
+  rtx sp = stack_pointer_rtx;
+  rtx return_addr;
+  int fp_offset;
+
+  fp_offset = info->reg_offset[FRAME_POINTER_REGNUM];
+
+  /* Restore the stack pointer to its original value if alloca or the like
+     is used.  */
+  if (! crtl->sp_is_unchanging)
+    emit_insn (gen_addsi3 (sp, fp, frv_frame_offset_rtx (-fp_offset)));
+
+  /* Restore the callee-saved registers that were used in this function.  */
+  frv_frame_access_standard_regs (FRV_LOAD, info);
+
+  /* Set RETURN_ADDR to the address we should return to.  Set it to NULL if
+     no return instruction should be emitted.  */
+  if (info->save_p[LR_REGNO])
+    {
+      int lr_offset;
+      rtx mem;
+
+      /* Use the same method to access the link register's slot as we did in
+	 the prologue.  In other words, use the frame pointer if available,
+	 otherwise use the stack pointer.
+
+	 LR_OFFSET is the offset of the link register's slot from the start
+	 of the frame and MEM is a memory rtx for it.  */
+      lr_offset = info->reg_offset[LR_REGNO];
+      if (frame_pointer_needed)
+	mem = frv_frame_mem (Pmode, fp, lr_offset - fp_offset);
+      else
+	mem = frv_frame_mem (Pmode, sp, lr_offset);
+
+      /* Load the old link register into a GPR.  */
+      return_addr = gen_rtx_REG (Pmode, TEMP_REGNO);
+      emit_insn (gen_rtx_SET (VOIDmode, return_addr, mem));
+    }
+  else
+    return_addr = gen_rtx_REG (Pmode, LR_REGNO);
+
+  /* Restore the old frame pointer.  Emit a USE afterwards to make sure
+     the load is preserved.  */
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, fp, gen_rtx_MEM (Pmode, fp)));
+      emit_use (fp);
+    }
+
+  /* Deallocate the stack frame.  */
+  if (info->total_size != 0)
+    {
+      rtx offset = frv_frame_offset_rtx (info->total_size);
+      emit_insn (gen_stack_adjust (sp, sp, offset));
+    }
+
+  /* If this function uses eh_return, add the final stack adjustment now.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_stack_adjust (sp, sp, EH_RETURN_STACKADJ_RTX));
+
+  if (emit_return)
+    emit_jump_insn (gen_epilogue_return (return_addr));
+  else
+    {
+      rtx lr = return_addr;
+
+      if (REGNO (return_addr) != LR_REGNO)
+	{
+	  lr = gen_rtx_REG (Pmode, LR_REGNO);
+	  emit_move_insn (lr, return_addr);
+	}
+
+      emit_use (lr);
+    }
+}
+
+
+/* Worker function for TARGET_ASM_OUTPUT_MI_THUNK.  */
+
+static void
+frv_asm_output_mi_thunk (FILE *file,
+                         tree thunk_fndecl ATTRIBUTE_UNUSED,
+                         HOST_WIDE_INT delta,
+                         HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+                         tree function)
+{
+  const char *name_func = XSTR (XEXP (DECL_RTL (function), 0), 0);
+  const char *name_arg0 = reg_names[FIRST_ARG_REGNUM];
+  const char *name_jmp = reg_names[JUMP_REGNO];
+  const char *parallel = (frv_issue_rate () > 1 ? ".p" : "");
+
+  /* Do the add using an addi if possible.  */
+  if (IN_RANGE (delta, -2048, 2047))
+    fprintf (file, "\taddi %s,#%d,%s\n", name_arg0, (int) delta, name_arg0);
+  else
+    {
+      const char *const name_add = reg_names[TEMP_REGNO];
+      fprintf (file, "\tsethi%s #hi(" HOST_WIDE_INT_PRINT_DEC "),%s\n",
+	       parallel, delta, name_add);
+      fprintf (file, "\tsetlo #lo(" HOST_WIDE_INT_PRINT_DEC "),%s\n",
+	       delta, name_add);
+      fprintf (file, "\tadd %s,%s,%s\n", name_add, name_arg0, name_arg0);
+    }
+
+  if (TARGET_FDPIC)
+    {
+      const char *name_pic = reg_names[FDPIC_REGNO];
+      name_jmp = reg_names[FDPIC_FPTR_REGNO];
+
+      if (flag_pic != 1)
+	{
+	  fprintf (file, "\tsethi%s #gotofffuncdeschi(", parallel);
+	  assemble_name (file, name_func);
+	  fprintf (file, "),%s\n", name_jmp);
+
+	  fprintf (file, "\tsetlo #gotofffuncdesclo(");
+	  assemble_name (file, name_func);
+	  fprintf (file, "),%s\n", name_jmp);
+
+	  fprintf (file, "\tldd @(%s,%s), %s\n", name_jmp, name_pic, name_jmp);
+	}
+      else
+	{
+	  fprintf (file, "\tlddo @(%s,#gotofffuncdesc12(", name_pic);
+	  assemble_name (file, name_func);
+	  fprintf (file, "\t)), %s\n", name_jmp);
+	}
+    }
+  else if (!flag_pic)
+    {
+      fprintf (file, "\tsethi%s #hi(", parallel);
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_jmp);
+
+      fprintf (file, "\tsetlo #lo(");
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_jmp);
+    }
+  else
+    {
+      /* Use JUMP_REGNO as a temporary PIC register.  */
+      const char *name_lr = reg_names[LR_REGNO];
+      const char *name_gppic = name_jmp;
+      const char *name_tmp = reg_names[TEMP_REGNO];
+
+      fprintf (file, "\tmovsg %s,%s\n", name_lr, name_tmp);
+      fprintf (file, "\tcall 1f\n");
+      fprintf (file, "1:\tmovsg %s,%s\n", name_lr, name_gppic);
+      fprintf (file, "\tmovgs %s,%s\n", name_tmp, name_lr);
+      fprintf (file, "\tsethi%s #gprelhi(1b),%s\n", parallel, name_tmp);
+      fprintf (file, "\tsetlo #gprello(1b),%s\n", name_tmp);
+      fprintf (file, "\tsub %s,%s,%s\n", name_gppic, name_tmp, name_gppic);
+
+      fprintf (file, "\tsethi%s #gprelhi(", parallel);
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_tmp);
+
+      fprintf (file, "\tsetlo #gprello(");
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_tmp);
+
+      fprintf (file, "\tadd %s,%s,%s\n", name_gppic, name_tmp, name_jmp);
+    }
+
+  /* Jump to the function address.  */
+  fprintf (file, "\tjmpl @(%s,%s)\n", name_jmp, reg_names[GPR_FIRST+0]);
+}
+
+
+
+/* On frv, create a frame whenever we need to create stack.  */
+
+static bool
+frv_frame_pointer_required (void)
+{
+  /* If we forgoing the usual linkage requirements, we only need
+     a frame pointer if the stack pointer might change.  */
+  if (!TARGET_LINKED_FP)
+    return !crtl->sp_is_unchanging;
+
+  if (! crtl->is_leaf)
+    return true;
+
+  if (get_frame_size () != 0)
+    return true;
+
+  if (cfun->stdarg)
+    return true;
+
+  if (!crtl->sp_is_unchanging)
+    return true;
+
+  if (!TARGET_FDPIC && flag_pic && crtl->uses_pic_offset_table)
+    return true;
+
+  if (profile_flag)
+    return true;
+
+  if (cfun->machine->frame_needed)
+    return true;
+
+  return false;
+}
+
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+frv_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+
+/* See frv_stack_info for more details on the frv stack frame.  */
+
+int
+frv_initial_elimination_offset (int from, int to)
+{
+  frv_stack_t *info = frv_stack_info ();
+  int ret = 0;
+
+  if (to == STACK_POINTER_REGNUM && from == ARG_POINTER_REGNUM)
+    ret = info->total_size - info->pretend_size;
+
+  else if (to == STACK_POINTER_REGNUM && from == FRAME_POINTER_REGNUM)
+    ret = info->reg_offset[FRAME_POINTER_REGNUM];
+
+  else if (to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM)
+    ret = (info->total_size
+	   - info->reg_offset[FRAME_POINTER_REGNUM]
+	   - info->pretend_size);
+
+  else
+    gcc_unreachable ();
+
+  if (TARGET_DEBUG_STACK)
+    fprintf (stderr, "Eliminate %s to %s by adding %d\n",
+	     reg_names [from], reg_names[to], ret);
+
+  return ret;
+}
+
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+frv_setup_incoming_varargs (cumulative_args_t cum_v,
+                            enum machine_mode mode,
+                            tree type ATTRIBUTE_UNUSED,
+                            int *pretend_size,
+                            int second_time)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr,
+	     "setup_vararg: words = %2d, mode = %4s, pretend_size = %d, second_time = %d\n",
+	     *cum, GET_MODE_NAME (mode), *pretend_size, second_time);
+}
+
+
+/* Worker function for TARGET_EXPAND_BUILTIN_SAVEREGS.  */
+
+static rtx
+frv_expand_builtin_saveregs (void)
+{
+  int offset = UNITS_PER_WORD * FRV_NUM_ARG_REGS;
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr, "expand_builtin_saveregs: offset from ap = %d\n",
+	     offset);
+
+  return gen_rtx_PLUS (Pmode, virtual_incoming_args_rtx, GEN_INT (- offset));
+}
+
+
+/* Expand __builtin_va_start to do the va_start macro.  */
+
+static void
+frv_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+  tree t;
+  int num = crtl->args.info - FIRST_ARG_REGNUM - FRV_NUM_ARG_REGS;
+
+  nextarg = gen_rtx_PLUS (Pmode, virtual_incoming_args_rtx,
+			  GEN_INT (UNITS_PER_WORD * num));
+
+  if (TARGET_DEBUG_ARG)
+    {
+      fprintf (stderr, "va_start: args_info = %d, num = %d\n",
+	       crtl->args.info, num);
+
+      debug_rtx (nextarg);
+    }
+
+  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist,
+	      fold_convert (TREE_TYPE (valist),
+			    make_tree (sizetype, nextarg)));
+  TREE_SIDE_EFFECTS (t) = 1;
+
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Expand a block move operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+/* Maximum number of loads to do before doing the stores */
+#ifndef MAX_MOVE_REG
+#define MAX_MOVE_REG 4
+#endif
+
+/* Maximum number of total loads to do.  */
+#ifndef TOTAL_MOVE_REG
+#define TOTAL_MOVE_REG 8
+#endif
+
+int
+frv_expand_block_move (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx orig_src	= operands[1];
+  rtx bytes_rtx	= operands[2];
+  rtx align_rtx = operands[3];
+  int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  int align;
+  int bytes;
+  int offset;
+  int num_reg;
+  int i;
+  rtx src_reg;
+  rtx dest_reg;
+  rtx src_addr;
+  rtx dest_addr;
+  rtx src_mem;
+  rtx dest_mem;
+  rtx tmp_reg;
+  rtx stores[MAX_MOVE_REG];
+  int move_bytes;
+  enum machine_mode mode;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (! constp)
+    return FALSE;
+
+  /* This should be a fixed size alignment.  */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+
+  align = INTVAL (align_rtx);
+
+  /* Anything to move? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return TRUE;
+
+  /* Don't support real large moves.  */
+  if (bytes > TOTAL_MOVE_REG*align)
+    return FALSE;
+
+  /* Move the address into scratch registers.  */
+  dest_reg = copy_addr_to_reg (XEXP (orig_dest, 0));
+  src_reg  = copy_addr_to_reg (XEXP (orig_src,  0));
+
+  num_reg = offset = 0;
+  for ( ; bytes > 0; (bytes -= move_bytes), (offset += move_bytes))
+    {
+      /* Calculate the correct offset for src/dest.  */
+      if (offset == 0)
+	{
+	  src_addr  = src_reg;
+	  dest_addr = dest_reg;
+	}
+      else
+	{
+	  src_addr = plus_constant (Pmode, src_reg, offset);
+	  dest_addr = plus_constant (Pmode, dest_reg, offset);
+	}
+
+      /* Generate the appropriate load and store, saving the stores
+	 for later.  */
+      if (bytes >= 4 && align >= 4)
+	mode = SImode;
+      else if (bytes >= 2 && align >= 2)
+	mode = HImode;
+      else
+	mode = QImode;
+
+      move_bytes = GET_MODE_SIZE (mode);
+      tmp_reg = gen_reg_rtx (mode);
+      src_mem = change_address (orig_src, mode, src_addr);
+      dest_mem = change_address (orig_dest, mode, dest_addr);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, src_mem));
+      stores[num_reg++] = gen_rtx_SET (VOIDmode, dest_mem, tmp_reg);
+
+      if (num_reg >= MAX_MOVE_REG)
+	{
+	  for (i = 0; i < num_reg; i++)
+	    emit_insn (stores[i]);
+	  num_reg = 0;
+	}
+    }
+
+  for (i = 0; i < num_reg; i++)
+    emit_insn (stores[i]);
+
+  return TRUE;
+}
+
+
+/* Expand a block clear operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the length
+   operands[3] is the alignment */
+
+int
+frv_expand_block_clear (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx bytes_rtx	= operands[1];
+  rtx align_rtx = operands[3];
+  int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  int align;
+  int bytes;
+  int offset;
+  rtx dest_reg;
+  rtx dest_addr;
+  rtx dest_mem;
+  int clear_bytes;
+  enum machine_mode mode;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (! constp)
+    return FALSE;
+
+  /* This should be a fixed size alignment.  */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+
+  align = INTVAL (align_rtx);
+
+  /* Anything to move? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return TRUE;
+
+  /* Don't support real large clears.  */
+  if (bytes > TOTAL_MOVE_REG*align)
+    return FALSE;
+
+  /* Move the address into a scratch register.  */
+  dest_reg = copy_addr_to_reg (XEXP (orig_dest, 0));
+
+  offset = 0;
+  for ( ; bytes > 0; (bytes -= clear_bytes), (offset += clear_bytes))
+    {
+      /* Calculate the correct offset for src/dest.  */
+      dest_addr = ((offset == 0)
+		   ? dest_reg
+		   : plus_constant (Pmode, dest_reg, offset));
+
+      /* Generate the appropriate store of gr0.  */
+      if (bytes >= 4 && align >= 4)
+	mode = SImode;
+      else if (bytes >= 2 && align >= 2)
+	mode = HImode;
+      else
+	mode = QImode;
+
+      clear_bytes = GET_MODE_SIZE (mode);
+      dest_mem = change_address (orig_dest, mode, dest_addr);
+      emit_insn (gen_rtx_SET (VOIDmode, dest_mem, const0_rtx));
+    }
+
+  return TRUE;
+}
+
+
+/* The following variable is used to output modifiers of assembler
+   code of the current output insn.  */
+
+static rtx *frv_insn_operands;
+
+/* The following function is used to add assembler insn code suffix .p
+   if it is necessary.  */
+
+const char *
+frv_asm_output_opcode (FILE *f, const char *ptr)
+{
+  int c;
+
+  if (frv_insn_packing_flag <= 0)
+    return ptr;
+
+  for (; *ptr && *ptr != ' ' && *ptr != '\t';)
+    {
+      c = *ptr++;
+      if (c == '%' && ((*ptr >= 'a' && *ptr <= 'z')
+		       || (*ptr >= 'A' && *ptr <= 'Z')))
+	{
+	  int letter = *ptr++;
+
+	  c = atoi (ptr);
+	  frv_print_operand (f, frv_insn_operands [c], letter);
+	  while ((c = *ptr) >= '0' && c <= '9')
+	    ptr++;
+	}
+      else
+	fputc (c, f);
+    }
+
+  fprintf (f, ".p");
+
+  return ptr;
+}
+
+/* Set up the packing bit for the current output insn.  Note that this
+   function is not called for asm insns.  */
+
+void
+frv_final_prescan_insn (rtx insn, rtx *opvec,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  if (INSN_P (insn))
+    {
+      if (frv_insn_packing_flag >= 0)
+	{
+	  frv_insn_operands = opvec;
+	  frv_insn_packing_flag = PACKING_FLAG_P (insn);
+	}
+      else if (recog_memoized (insn) >= 0
+	       && get_attr_acc_group (insn) == ACC_GROUP_ODD)
+	/* Packing optimizations have been disabled, but INSN can only
+	   be issued in M1.  Insert an mnop in M0.  */
+	fprintf (asm_out_file, "\tmnop.p\n");
+    }
+}
+
+
+
+/* A C expression whose value is RTL representing the address in a stack frame
+   where the pointer to the caller's frame is stored.  Assume that FRAMEADDR is
+   an RTL expression for the address of the stack frame itself.
+
+   If you don't define this macro, the default is to return the value of
+   FRAMEADDR--that is, the stack frame address is also the address of the stack
+   word that points to the previous frame.  */
+
+/* The default is correct, but we need to make sure the frame gets created.  */
+rtx
+frv_dynamic_chain_address (rtx frame)
+{
+  cfun->machine->frame_needed = 1;
+  return frame;
+}
+
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  FRAMEADDR is the frame pointer of the COUNT frame, or the frame
+   pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' is
+   defined.
+
+   The value of the expression must always be the correct address when COUNT is
+   zero, but may be `NULL_RTX' if there is not way to determine the return
+   address of other frames.  */
+
+rtx
+frv_return_addr_rtx (int count, rtx frame)
+{
+  if (count != 0)
+    return const0_rtx;
+  cfun->machine->frame_needed = 1;
+  return gen_rtx_MEM (Pmode, plus_constant (Pmode, frame, 8));
+}
+
+/* Given a memory reference MEMREF, interpret the referenced memory as
+   an array of MODE values, and return a reference to the element
+   specified by INDEX.  Assume that any pre-modification implicit in
+   MEMREF has already happened.
+
+   MEMREF must be a legitimate operand for modes larger than SImode.
+   frv_legitimate_address_p forbids register+register addresses, which
+   this function cannot handle.  */
+rtx
+frv_index_memory (rtx memref, enum machine_mode mode, int index)
+{
+  rtx base = XEXP (memref, 0);
+  if (GET_CODE (base) == PRE_MODIFY)
+    base = XEXP (base, 0);
+  return change_address (memref, mode,
+			 plus_constant (Pmode, base,
+					index * GET_MODE_SIZE (mode)));
+}
+
+
+/* Print a memory address as an operand to reference that memory location.  */
+static void
+frv_print_operand_address (FILE * stream, rtx x)
+{
+  if (GET_CODE (x) == MEM)
+    x = XEXP (x, 0);
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fputs (reg_names [ REGNO (x)], stream);
+      return;
+
+    case CONST_INT:
+      fprintf (stream, "%ld", (long) INTVAL (x));
+      return;
+
+    case SYMBOL_REF:
+      assemble_name (stream, XSTR (x, 0));
+      return;
+
+    case LABEL_REF:
+    case CONST:
+      output_addr_const (stream, x);
+      return;
+
+    case PLUS:
+      /* Poorly constructed asm statements can trigger this alternative.
+	 See gcc/testsuite/gcc.dg/asm-4.c for an example.  */
+      frv_print_operand_memory_reference (stream, x, 0);
+      return;
+      
+    default:
+      break;
+    }
+
+  fatal_insn ("bad insn to frv_print_operand_address:", x);
+}
+
+
+static void
+frv_print_operand_memory_reference_reg (FILE * stream, rtx x)
+{
+  int regno = true_regnum (x);
+  if (GPR_P (regno))
+    fputs (reg_names[regno], stream);
+  else
+    fatal_insn ("bad register to frv_print_operand_memory_reference_reg:", x);
+}
+
+/* Print a memory reference suitable for the ld/st instructions.  */
+
+static void
+frv_print_operand_memory_reference (FILE * stream, rtx x, int addr_offset)
+{
+  struct frv_unspec unspec;
+  rtx x0 = NULL_RTX;
+  rtx x1 = NULL_RTX;
+
+  switch (GET_CODE (x))
+    {
+    case SUBREG:
+    case REG:
+      x0 = x;
+      break;
+
+    case PRE_MODIFY:		/* (pre_modify (reg) (plus (reg) (reg))) */
+      x0 = XEXP (x, 0);
+      x1 = XEXP (XEXP (x, 1), 1);
+      break;
+
+    case CONST_INT:
+      x1 = x;
+      break;
+
+    case PLUS:
+      x0 = XEXP (x, 0);
+      x1 = XEXP (x, 1);
+      if (GET_CODE (x0) == CONST_INT)
+	{
+	  x0 = XEXP (x, 1);
+	  x1 = XEXP (x, 0);
+	}
+      break;
+
+    default:
+      fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+      break;
+
+    }
+
+  if (addr_offset)
+    {
+      if (!x1)
+	x1 = const0_rtx;
+      else if (GET_CODE (x1) != CONST_INT)
+	fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+    }
+
+  fputs ("@(", stream);
+  if (!x0)
+    fputs (reg_names[GPR_R0], stream);
+  else if (GET_CODE (x0) == REG || GET_CODE (x0) == SUBREG)
+    frv_print_operand_memory_reference_reg (stream, x0);
+  else
+    fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+
+  fputs (",", stream);
+  if (!x1)
+    fputs (reg_names [GPR_R0], stream);
+
+  else
+    {
+      switch (GET_CODE (x1))
+	{
+	case SUBREG:
+	case REG:
+	  frv_print_operand_memory_reference_reg (stream, x1);
+	  break;
+
+	case CONST_INT:
+	  fprintf (stream, "%ld", (long) (INTVAL (x1) + addr_offset));
+	  break;
+
+	case CONST:
+	  if (!frv_const_unspec_p (x1, &unspec))
+	    fatal_insn ("bad insn to frv_print_operand_memory_reference:", x1);
+	  frv_output_const_unspec (stream, &unspec);
+	  break;
+
+	default:
+	  fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+	}
+    }
+
+  fputs (")", stream);
+}
+
+
+/* Return 2 for likely branches and 0 for non-likely branches  */
+
+#define FRV_JUMP_LIKELY 2
+#define FRV_JUMP_NOT_LIKELY 0
+
+static int
+frv_print_operand_jump_hint (rtx insn)
+{
+  rtx note;
+  rtx labelref;
+  int ret;
+  int prob = -1;
+  enum { UNKNOWN, BACKWARD, FORWARD } jump_type = UNKNOWN;
+
+  gcc_assert (JUMP_P (insn));
+
+  /* Assume any non-conditional jump is likely.  */
+  if (! any_condjump_p (insn))
+    ret = FRV_JUMP_LIKELY;
+
+  else
+    {
+      labelref = condjump_label (insn);
+      if (labelref)
+	{
+	  rtx label = XEXP (labelref, 0);
+	  jump_type = (insn_current_address > INSN_ADDRESSES (INSN_UID (label))
+		       ? BACKWARD
+		       : FORWARD);
+	}
+
+      note = find_reg_note (insn, REG_BR_PROB, 0);
+      if (!note)
+	ret = ((jump_type == BACKWARD) ? FRV_JUMP_LIKELY : FRV_JUMP_NOT_LIKELY);
+
+      else
+	{
+	  prob = XINT (note, 0);
+	  ret = ((prob >= (REG_BR_PROB_BASE / 2))
+		 ? FRV_JUMP_LIKELY
+		 : FRV_JUMP_NOT_LIKELY);
+	}
+    }
+
+#if 0
+  if (TARGET_DEBUG)
+    {
+      char *direction;
+
+      switch (jump_type)
+	{
+	default:
+	case UNKNOWN:	direction = "unknown jump direction";	break;
+	case BACKWARD:	direction = "jump backward";		break;
+	case FORWARD:	direction = "jump forward";		break;
+	}
+
+      fprintf (stderr,
+	       "%s: uid %ld, %s, probability = %d, max prob. = %d, hint = %d\n",
+	       IDENTIFIER_POINTER (DECL_NAME (current_function_decl)),
+	       (long)INSN_UID (insn), direction, prob,
+	       REG_BR_PROB_BASE, ret);
+    }
+#endif
+
+  return ret;
+}
+
+
+/* Return the comparison operator to use for CODE given that the ICC
+   register is OP0.  */
+
+static const char *
+comparison_string (enum rtx_code code, rtx op0)
+{
+  bool is_nz_p = GET_MODE (op0) == CC_NZmode;
+  switch (code)
+    {
+    default:  output_operand_lossage ("bad condition code");
+    case EQ:  return "eq";
+    case NE:  return "ne";
+    case LT:  return is_nz_p ? "n" : "lt";
+    case LE:  return "le";
+    case GT:  return "gt";
+    case GE:  return is_nz_p ? "p" : "ge";
+    case LTU: return is_nz_p ? "no" : "c";
+    case LEU: return is_nz_p ? "eq" : "ls";
+    case GTU: return is_nz_p ? "ne" : "hi";
+    case GEU: return is_nz_p ? "ra" : "nc";
+    }
+}
+
+/* Print an operand to an assembler instruction.
+
+   `%' followed by a letter and a digit says to output an operand in an
+   alternate fashion.  Four letters have standard, built-in meanings
+   described below.  The hook `TARGET_PRINT_OPERAND' can define
+   additional letters with nonstandard meanings.
+
+   `%cDIGIT' can be used to substitute an operand that is a constant value
+   without the syntax that normally indicates an immediate operand.
+
+   `%nDIGIT' is like `%cDIGIT' except that the value of the constant is negated
+   before printing.
+
+   `%aDIGIT' can be used to substitute an operand as if it were a memory
+   reference, with the actual operand treated as the address.  This may be
+   useful when outputting a "load address" instruction, because often the
+   assembler syntax for such an instruction requires you to write the operand
+   as if it were a memory reference.
+
+   `%lDIGIT' is used to substitute a `label_ref' into a jump instruction.
+
+   `%=' outputs a number which is unique to each instruction in the entire
+   compilation.  This is useful for making local labels to be referred to more
+   than once in a single template that generates multiple assembler
+   instructions.
+
+   `%' followed by a punctuation character specifies a substitution that
+   does not use an operand.  Only one case is standard: `%%' outputs a
+   `%' into the assembler code.  Other nonstandard cases can be defined
+   in the `TARGET_PRINT_OPERAND' hook.  You must also define which
+   punctuation characters are valid with the
+   `TARGET_PRINT_OPERAND_PUNCT_VALID_P' hook.  */
+
+static void
+frv_print_operand (FILE * file, rtx x, int code)
+{
+  struct frv_unspec unspec;
+  HOST_WIDE_INT value;
+  int offset;
+
+  if (code != 0 && !ISALPHA (code))
+    value = 0;
+
+  else if (GET_CODE (x) == CONST_INT)
+    value = INTVAL (x);
+
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE rv;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+	  value = l;
+	}
+
+      else if (GET_MODE (x) == VOIDmode)
+	value = CONST_DOUBLE_LOW (x);
+
+      else
+        fatal_insn ("bad insn in frv_print_operand, bad const_double", x);
+    }
+
+  else
+    value = 0;
+
+  switch (code)
+    {
+
+    case '.':
+      /* Output r0.  */
+      fputs (reg_names[GPR_R0], file);
+      break;
+
+    case '#':
+      fprintf (file, "%d", frv_print_operand_jump_hint (current_output_insn));
+      break;
+
+    case '@':
+      /* Output small data area base register (gr16).  */
+      fputs (reg_names[SDA_BASE_REG], file);
+      break;
+
+    case '~':
+      /* Output pic register (gr17).  */
+      fputs (reg_names[PIC_REGNO], file);
+      break;
+
+    case '*':
+      /* Output the temporary integer CCR register.  */
+      fputs (reg_names[ICR_TEMP], file);
+      break;
+
+    case '&':
+      /* Output the temporary integer CC register.  */
+      fputs (reg_names[ICC_TEMP], file);
+      break;
+
+    /* case 'a': print an address.  */
+
+    case 'C':
+      /* Print appropriate test for integer branch false operation.  */
+      fputs (comparison_string (reverse_condition (GET_CODE (x)),
+				XEXP (x, 0)), file);
+      break;
+
+    case 'c':
+      /* Print appropriate test for integer branch true operation.  */
+      fputs (comparison_string (GET_CODE (x), XEXP (x, 0)), file);
+      break;
+
+    case 'e':
+      /* Print 1 for a NE and 0 for an EQ to give the final argument
+	 for a conditional instruction.  */
+      if (GET_CODE (x) == NE)
+	fputs ("1", file);
+
+      else if (GET_CODE (x) == EQ)
+	fputs ("0", file);
+
+      else
+	fatal_insn ("bad insn to frv_print_operand, 'e' modifier:", x);
+      break;
+
+    case 'F':
+      /* Print appropriate test for floating point branch false operation.  */
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'F' modifier:", x);
+
+	case EQ:  fputs ("ne",  file); break;
+	case NE:  fputs ("eq",  file); break;
+	case LT:  fputs ("uge", file); break;
+	case LE:  fputs ("ug",  file); break;
+	case GT:  fputs ("ule", file); break;
+	case GE:  fputs ("ul",  file); break;
+	}
+      break;
+
+    case 'f':
+      /* Print appropriate test for floating point branch true operation.  */
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'f' modifier:", x);
+
+	case EQ:  fputs ("eq",  file); break;
+	case NE:  fputs ("ne",  file); break;
+	case LT:  fputs ("lt",  file); break;
+	case LE:  fputs ("le",  file); break;
+	case GT:  fputs ("gt",  file); break;
+	case GE:  fputs ("ge",  file); break;
+	}
+      break;
+
+    case 'g':
+      /* Print appropriate GOT function.  */
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("bad insn to frv_print_operand, 'g' modifier:", x);
+      fputs (unspec_got_name (INTVAL (x)), file);
+      break;
+
+    case 'I':
+      /* Print 'i' if the operand is a constant, or is a memory reference that
+         adds a constant.  */
+      if (GET_CODE (x) == MEM)
+	x = ((GET_CODE (XEXP (x, 0)) == PLUS)
+	     ? XEXP (XEXP (x, 0), 1)
+	     : XEXP (x, 0));
+      else if (GET_CODE (x) == PLUS)
+	x = XEXP (x, 1);
+
+      switch (GET_CODE (x))
+	{
+	default:
+	  break;
+
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	  fputs ("i", file);
+	  break;
+	}
+      break;
+
+    case 'i':
+      /* For jump instructions, print 'i' if the operand is a constant or
+         is an expression that adds a constant.  */
+      if (GET_CODE (x) == CONST_INT)
+        fputs ("i", file);
+
+      else
+        {
+          if (GET_CODE (x) == CONST_INT
+              || (GET_CODE (x) == PLUS
+                  && (GET_CODE (XEXP (x, 1)) == CONST_INT
+                      || GET_CODE (XEXP (x, 0)) == CONST_INT)))
+            fputs ("i", file);
+        }
+      break;
+
+    case 'L':
+      /* Print the lower register of a double word register pair */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[ REGNO (x)+1 ], file);
+      else
+	fatal_insn ("bad insn to frv_print_operand, 'L' modifier:", x);
+      break;
+
+    /* case 'l': print a LABEL_REF.  */
+
+    case 'M':
+    case 'N':
+      /* Print a memory reference for ld/st/jmp, %N prints a memory reference
+         for the second word of double memory operations.  */
+      offset = (code == 'M') ? 0 : UNITS_PER_WORD;
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'M/N' modifier:", x);
+
+	case MEM:
+	  frv_print_operand_memory_reference (file, XEXP (x, 0), offset);
+	  break;
+
+	case REG:
+	case SUBREG:
+	case CONST_INT:
+	case PLUS:
+        case SYMBOL_REF:
+	  frv_print_operand_memory_reference (file, x, offset);
+	  break;
+	}
+      break;
+
+    case 'O':
+      /* Print the opcode of a command.  */
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'O' modifier:", x);
+
+	case PLUS:     fputs ("add", file); break;
+	case MINUS:    fputs ("sub", file); break;
+	case AND:      fputs ("and", file); break;
+	case IOR:      fputs ("or",  file); break;
+	case XOR:      fputs ("xor", file); break;
+	case ASHIFT:   fputs ("sll", file); break;
+	case ASHIFTRT: fputs ("sra", file); break;
+	case LSHIFTRT: fputs ("srl", file); break;
+	}
+      break;
+
+    /* case 'n': negate and print a constant int.  */
+
+    case 'P':
+      /* Print PIC label using operand as the number.  */
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("bad insn to frv_print_operand, P modifier:", x);
+
+      fprintf (file, ".LCF%ld", (long)INTVAL (x));
+      break;
+
+    case 'U':
+      /* Print 'u' if the operand is a update load/store.  */
+      if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	fputs ("u", file);
+      break;
+
+    case 'z':
+      /* If value is 0, print gr0, otherwise it must be a register.  */
+      if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0)
+	fputs (reg_names[GPR_R0], file);
+
+      else if (GET_CODE (x) == REG)
+        fputs (reg_names [REGNO (x)], file);
+
+      else
+        fatal_insn ("bad insn in frv_print_operand, z case", x);
+      break;
+
+    case 'x':
+      /* Print constant in hex.  */
+      if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+        {
+	  fprintf (file, "%s0x%.4lx", IMMEDIATE_PREFIX, (long) value);
+	  break;
+	}
+
+      /* Fall through.  */
+
+    case '\0':
+      if (GET_CODE (x) == REG)
+        fputs (reg_names [REGNO (x)], file);
+
+      else if (GET_CODE (x) == CONST_INT
+              || GET_CODE (x) == CONST_DOUBLE)
+        fprintf (file, "%s%ld", IMMEDIATE_PREFIX, (long) value);
+
+      else if (frv_const_unspec_p (x, &unspec))
+	frv_output_const_unspec (file, &unspec);
+
+      else if (GET_CODE (x) == MEM)
+        frv_print_operand_address (file, XEXP (x, 0));
+
+      else if (CONSTANT_ADDRESS_P (x))
+        frv_print_operand_address (file, x);
+
+      else
+        fatal_insn ("bad insn in frv_print_operand, 0 case", x);
+
+      break;
+
+    default:
+      fatal_insn ("frv_print_operand: unknown code", x);
+      break;
+    }
+
+  return;
+}
+
+static bool
+frv_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '@' || code == '~'
+	  || code == '*' || code == '&');
+}
+
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  The variable has type
+   `CUMULATIVE_ARGS'.  The value of FNTYPE is the tree node for the data type
+   of the function which will receive the args, or 0 if the args are to a
+   compiler support library function.  The value of INDIRECT is nonzero when
+   processing an indirect call, for example a call through a function pointer.
+   The value of INDIRECT is zero for a call to an explicitly named function, a
+   library function call, or when `INIT_CUMULATIVE_ARGS' is used to find
+   arguments for the function being compiled.
+
+   When processing a call to a compiler support library function, LIBNAME
+   identifies which one.  It is a `symbol_ref' rtx which contains the name of
+   the function, as a string.  LIBNAME is 0 when an ordinary C function call is
+   being processed.  Thus, each time this macro is called, either LIBNAME or
+   FNTYPE is nonzero, but never both of them at once.  */
+
+void
+frv_init_cumulative_args (CUMULATIVE_ARGS *cum,
+                          tree fntype,
+                          rtx libname,
+                          tree fndecl,
+                          int incoming)
+{
+  *cum = FIRST_ARG_REGNUM;
+
+  if (TARGET_DEBUG_ARG)
+    {
+      fprintf (stderr, "\ninit_cumulative_args:");
+      if (!fndecl && fntype)
+	fputs (" indirect", stderr);
+
+      if (incoming)
+	fputs (" incoming", stderr);
+
+      if (fntype)
+	{
+	  tree ret_type = TREE_TYPE (fntype);
+	  fprintf (stderr, " return=%s,",
+		   get_tree_code_name (TREE_CODE (ret_type)));
+	}
+
+      if (libname && GET_CODE (libname) == SYMBOL_REF)
+	fprintf (stderr, " libname=%s", XSTR (libname, 0));
+
+      if (cfun->returns_struct)
+	fprintf (stderr, " return-struct");
+
+      putc ('\n', stderr);
+    }
+}
+
+
+/* Return true if we should pass an argument on the stack rather than
+   in registers.  */
+
+static bool
+frv_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode)
+    return true;
+  if (type == NULL)
+    return false;
+  return AGGREGATE_TYPE_P (type);
+}
+
+/* If defined, a C expression that gives the alignment boundary, in bits, of an
+   argument with the specified mode and type.  If it is not defined,
+   `PARM_BOUNDARY' is used for all arguments.  */
+
+static unsigned int
+frv_function_arg_boundary (enum machine_mode mode ATTRIBUTE_UNUSED,
+                           const_tree type ATTRIBUTE_UNUSED)
+{
+  return BITS_PER_WORD;
+}
+
+static rtx
+frv_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
+		    const_tree type ATTRIBUTE_UNUSED, bool named,
+		    bool incoming ATTRIBUTE_UNUSED)
+{
+  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  enum machine_mode xmode = (mode == BLKmode) ? SImode : mode;
+  int arg_num = *cum;
+  rtx ret;
+  const char *debstr;
+
+  /* Return a marker for use in the call instruction.  */
+  if (xmode == VOIDmode)
+    {
+      ret = const0_rtx;
+      debstr = "<0>";
+    }
+
+  else if (arg_num <= LAST_ARG_REGNUM)
+    {
+      ret = gen_rtx_REG (xmode, arg_num);
+      debstr = reg_names[arg_num];
+    }
+
+  else
+    {
+      ret = NULL_RTX;
+      debstr = "memory";
+    }
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr,
+	     "function_arg: words = %2d, mode = %4s, named = %d, size = %3d, arg = %s\n",
+	     arg_num, GET_MODE_NAME (mode), named, GET_MODE_SIZE (mode), debstr);
+
+  return ret;
+}
+
+static rtx
+frv_function_arg (cumulative_args_t cum, enum machine_mode mode,
+		  const_tree type, bool named)
+{
+  return frv_function_arg_1 (cum, mode, type, named, false);
+}
+
+static rtx
+frv_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  return frv_function_arg_1 (cum, mode, type, named, true);
+}
+
+
+/* A C statement (sans semicolon) to update the summarizer variable CUM to
+   advance past an argument in the argument list.  The values MODE, TYPE and
+   NAMED describe that argument.  Once this is done, the variable CUM is
+   suitable for analyzing the *following* argument with `FUNCTION_ARG', etc.
+
+   This macro need not do anything if the argument in question was passed on
+   the stack.  The compiler knows how to track the amount of stack space used
+   for arguments without any special help.  */
+
+static void
+frv_function_arg_advance (cumulative_args_t cum_v,
+                          enum machine_mode mode,
+                          const_tree type ATTRIBUTE_UNUSED,
+                          bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  enum machine_mode xmode = (mode == BLKmode) ? SImode : mode;
+  int bytes = GET_MODE_SIZE (xmode);
+  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
+  int arg_num = *cum;
+
+  *cum = arg_num + words;
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr,
+	     "function_adv: words = %2d, mode = %4s, named = %d, size = %3d\n",
+	     arg_num, GET_MODE_NAME (mode), named, words * UNITS_PER_WORD);
+}
+
+
+/* A C expression for the number of words, at the beginning of an argument,
+   must be put in registers.  The value must be zero for arguments that are
+   passed entirely in registers or that are entirely pushed on the stack.
+
+   On some machines, certain arguments must be passed partially in registers
+   and partially in memory.  On these machines, typically the first N words of
+   arguments are passed in registers, and the rest on the stack.  If a
+   multi-word argument (a `double' or a structure) crosses that boundary, its
+   first few words must be passed in registers and the rest must be pushed.
+   This macro tells the compiler when this occurs, and how many of the words
+   should go in registers.
+
+   `FUNCTION_ARG' for these arguments should return the first register to be
+   used by the caller for this argument; likewise `FUNCTION_INCOMING_ARG', for
+   the called function.  */
+
+static int
+frv_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
+		       tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED)
+{
+
+  enum machine_mode xmode = (mode == BLKmode) ? SImode : mode;
+  int bytes = GET_MODE_SIZE (xmode);
+  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int arg_num = *get_cumulative_args (cum);
+  int ret;
+
+  ret = ((arg_num <= LAST_ARG_REGNUM && arg_num + words > LAST_ARG_REGNUM+1)
+	 ? LAST_ARG_REGNUM - arg_num + 1
+	 : 0);
+  ret *= UNITS_PER_WORD;
+
+  if (TARGET_DEBUG_ARG && ret)
+    fprintf (stderr, "frv_arg_partial_bytes: %d\n", ret);
+
+  return ret;
+}
+
+
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+frv_function_value (const_tree valtype,
+		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM);
+}
+
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+frv_libcall_value (enum machine_mode mode,
+		   const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+frv_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
+/* Return true if a register is ok to use as a base or index register.  */
+
+static FRV_INLINE int
+frv_regno_ok_for_base_p (int regno, int strict_p)
+{
+  if (GPR_P (regno))
+    return TRUE;
+
+  if (strict_p)
+    return (reg_renumber[regno] >= 0 && GPR_P (reg_renumber[regno]));
+
+  if (regno == ARG_POINTER_REGNUM)
+    return TRUE;
+
+  return (regno >= FIRST_PSEUDO_REGISTER);
+}
+
+
+/* A C compound statement with a conditional `goto LABEL;' executed if X (an
+   RTX) is a legitimate memory address on the target machine for a memory
+   operand of mode MODE.
+
+   It usually pays to define several simpler macros to serve as subroutines for
+   this one.  Otherwise it may be too complicated to understand.
+
+   This macro must exist in two variants: a strict variant and a non-strict
+   one.  The strict variant is used in the reload pass.  It must be defined so
+   that any pseudo-register that has not been allocated a hard register is
+   considered a memory reference.  In contexts where some kind of register is
+   required, a pseudo-register with no hard register must be rejected.
+
+   The non-strict variant is used in other passes.  It must be defined to
+   accept all pseudo-registers in every context where some kind of register is
+   required.
+
+   Compiler source files that want to use the strict variant of this macro
+   define the macro `REG_OK_STRICT'.  You should use an `#ifdef REG_OK_STRICT'
+   conditional to define the strict variant in that case and the non-strict
+   variant otherwise.
+
+   Normally, constant addresses which are the sum of a `symbol_ref' and an
+   integer are stored inside a `const' RTX to mark them as constant.
+   Therefore, there is no need to recognize such sums specifically as
+   legitimate addresses.  Normally you would simply recognize any `const' as
+   legitimate.
+
+   Usually `TARGET_PRINT_OPERAND_ADDRESS' is not prepared to handle
+   constant sums that are not marked with `const'.  It assumes that a
+   naked `plus' indicates indexing.  If so, then you *must* reject such
+   naked constant sums as illegitimate addresses, so that none of them
+   will be given to `TARGET_PRINT_OPERAND_ADDRESS'.  */
+
+int
+frv_legitimate_address_p_1 (enum machine_mode mode,
+                            rtx x,
+                            int strict_p,
+                            int condexec_p,
+			    int allow_double_reg_p)
+{
+  rtx x0, x1;
+  int ret = 0;
+  HOST_WIDE_INT value;
+  unsigned regno0;
+
+  if (FRV_SYMBOL_REF_TLS_P (x))
+    return 0;
+
+  switch (GET_CODE (x))
+    {
+    default:
+      break;
+
+    case SUBREG:
+      x = SUBREG_REG (x);
+      if (GET_CODE (x) != REG)
+        break;
+
+      /* Fall through.  */
+
+    case REG:
+      ret = frv_regno_ok_for_base_p (REGNO (x), strict_p);
+      break;
+
+    case PRE_MODIFY:
+      x0 = XEXP (x, 0);
+      x1 = XEXP (x, 1);
+      if (GET_CODE (x0) != REG
+	  || ! frv_regno_ok_for_base_p (REGNO (x0), strict_p)
+	  || GET_CODE (x1) != PLUS
+	  || ! rtx_equal_p (x0, XEXP (x1, 0))
+	  || GET_CODE (XEXP (x1, 1)) != REG
+	  || ! frv_regno_ok_for_base_p (REGNO (XEXP (x1, 1)), strict_p))
+	break;
+
+      ret = 1;
+      break;
+
+    case CONST_INT:
+      /* 12-bit immediate */
+      if (condexec_p)
+	ret = FALSE;
+      else
+	{
+	  ret = IN_RANGE (INTVAL (x), -2048, 2047);
+
+	  /* If we can't use load/store double operations, make sure we can
+	     address the second word.  */
+	  if (ret && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	    ret = IN_RANGE (INTVAL (x) + GET_MODE_SIZE (mode) - 1,
+			    -2048, 2047);
+	}
+      break;
+
+    case PLUS:
+      x0 = XEXP (x, 0);
+      x1 = XEXP (x, 1);
+
+      if (GET_CODE (x0) == SUBREG)
+	x0 = SUBREG_REG (x0);
+
+      if (GET_CODE (x0) != REG)
+	break;
+
+      regno0 = REGNO (x0);
+      if (!frv_regno_ok_for_base_p (regno0, strict_p))
+	break;
+
+      switch (GET_CODE (x1))
+	{
+	default:
+	  break;
+
+	case SUBREG:
+	  x1 = SUBREG_REG (x1);
+	  if (GET_CODE (x1) != REG)
+	    break;
+
+	  /* Fall through.  */
+
+	case REG:
+	  /* Do not allow reg+reg addressing for modes > 1 word if we
+	     can't depend on having move double instructions.  */
+	  if (!allow_double_reg_p && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	    ret = FALSE;
+	  else
+	    ret = frv_regno_ok_for_base_p (REGNO (x1), strict_p);
+	  break;
+
+	case CONST_INT:
+          /* 12-bit immediate */
+	  if (condexec_p)
+	    ret = FALSE;
+	  else
+	    {
+	      value = INTVAL (x1);
+	      ret = IN_RANGE (value, -2048, 2047);
+
+	      /* If we can't use load/store double operations, make sure we can
+		 address the second word.  */
+	      if (ret && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+		ret = IN_RANGE (value + GET_MODE_SIZE (mode) - 1, -2048, 2047);
+	    }
+	  break;
+
+	case CONST:
+	  if (!condexec_p && got12_operand (x1, VOIDmode))
+	    ret = TRUE;
+	  break;
+
+	}
+      break;
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== legitimate_address_p, mode = %s, result = %d, addresses are %sstrict%s\n",
+	       GET_MODE_NAME (mode), ret, (strict_p) ? "" : "not ",
+	       (condexec_p) ? ", inside conditional code" : "");
+      debug_rtx (x);
+    }
+
+  return ret;
+}
+
+bool
+frv_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  return frv_legitimate_address_p_1 (mode, x, strict_p, FALSE, FALSE);
+}
+
+/* Given an ADDR, generate code to inline the PLT.  */
+static rtx
+gen_inlined_tls_plt (rtx addr)
+{
+  rtx retval, dest;
+  rtx picreg = get_hard_reg_initial_val (Pmode, FDPIC_REG);
+
+
+  dest = gen_reg_rtx (DImode);
+
+  if (flag_pic == 1)
+    {
+      /*
+	-fpic version:
+
+	lddi.p  @(gr15, #gottlsdesc12(ADDR)), gr8
+	calll    #gettlsoff(ADDR)@(gr8, gr0)
+      */
+      emit_insn (gen_tls_lddi (dest, addr, picreg));
+    }
+  else
+    {
+      /*
+	-fPIC version:
+
+	sethi.p #gottlsdeschi(ADDR), gr8
+	setlo   #gottlsdesclo(ADDR), gr8
+	ldd     #tlsdesc(ADDR)@(gr15, gr8), gr8
+	calll   #gettlsoff(ADDR)@(gr8, gr0)
+      */
+      rtx reguse = gen_reg_rtx (Pmode);
+      emit_insn (gen_tlsoff_hilo (reguse, addr, GEN_INT (R_FRV_GOTTLSDESCHI)));
+      emit_insn (gen_tls_tlsdesc_ldd (dest, picreg, reguse, addr));
+    }
+
+  retval = gen_reg_rtx (Pmode);
+  emit_insn (gen_tls_indirect_call (retval, addr, dest, picreg));
+  return retval;
+}
+
+/* Emit a TLSMOFF or TLSMOFF12 offset, depending on -mTLS.  Returns
+   the destination address.  */
+static rtx
+gen_tlsmoff (rtx addr, rtx reg)
+{
+  rtx dest = gen_reg_rtx (Pmode);
+
+  if (TARGET_BIG_TLS)
+    {
+      /* sethi.p #tlsmoffhi(x), grA
+	 setlo   #tlsmofflo(x), grA
+      */
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_tlsoff_hilo (dest, addr,
+				  GEN_INT (R_FRV_TLSMOFFHI)));
+      dest = gen_rtx_PLUS (Pmode, dest, reg);
+    }
+  else
+    {
+      /* addi grB, #tlsmoff12(x), grC
+	   -or-
+	 ld/st @(grB, #tlsmoff12(x)), grC
+      */
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_symGOTOFF2reg_i (dest, addr, reg,
+				      GEN_INT (R_FRV_TLSMOFF12)));
+    }
+  return dest;
+}
+
+/* Generate code for a TLS address.  */
+static rtx
+frv_legitimize_tls_address (rtx addr, enum tls_model model)
+{
+  rtx dest, tp = gen_rtx_REG (Pmode, 29);
+  rtx picreg = get_hard_reg_initial_val (Pmode, 15);
+
+  switch (model)
+    {
+    case TLS_MODEL_INITIAL_EXEC:
+      if (flag_pic == 1)
+	{
+	  /* -fpic version.
+	     ldi @(gr15, #gottlsoff12(x)), gr5
+	   */
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (gen_tls_load_gottlsoff12 (dest, addr, picreg));
+	  dest = gen_rtx_PLUS (Pmode, tp, dest);
+	}
+      else
+	{
+	  /* -fPIC or anything else.
+
+	    sethi.p #gottlsoffhi(x), gr14
+	    setlo   #gottlsofflo(x), gr14
+	    ld      #tlsoff(x)@(gr15, gr14), gr9
+	  */
+	  rtx tmp = gen_reg_rtx (Pmode);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (gen_tlsoff_hilo (tmp, addr,
+				      GEN_INT (R_FRV_GOTTLSOFF_HI)));
+
+	  emit_insn (gen_tls_tlsoff_ld (dest, picreg, tmp, addr));
+	  dest = gen_rtx_PLUS (Pmode, tp, dest);
+	}
+      break;
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      {
+	rtx reg, retval;
+
+	if (TARGET_INLINE_PLT)
+	  retval = gen_inlined_tls_plt (GEN_INT (0));
+	else
+	  {
+	    /* call #gettlsoff(0) */
+	    retval = gen_reg_rtx (Pmode);
+	    emit_insn (gen_call_gettlsoff (retval, GEN_INT (0), picreg));
+	  }
+
+	reg = gen_reg_rtx (Pmode);
+	emit_insn (gen_rtx_SET (VOIDmode, reg,
+				gen_rtx_PLUS (Pmode,
+					      retval, tp)));
+
+	dest = gen_tlsmoff (addr, reg);
+
+	/*
+	dest = gen_reg_rtx (Pmode);
+	emit_insn (gen_tlsoff_hilo (dest, addr,
+				    GEN_INT (R_FRV_TLSMOFFHI)));
+	dest = gen_rtx_PLUS (Pmode, dest, reg);
+	*/
+	break;
+      }
+    case TLS_MODEL_LOCAL_EXEC:
+      dest = gen_tlsmoff (addr, gen_rtx_REG (Pmode, 29));
+      break;
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      {
+	rtx retval;
+
+	if (TARGET_INLINE_PLT)
+	  retval = gen_inlined_tls_plt (addr);
+	else
+	  {
+	    /* call #gettlsoff(x) */
+	    retval = gen_reg_rtx (Pmode);
+	    emit_insn (gen_call_gettlsoff (retval, addr, picreg));
+	  }
+	dest = gen_rtx_PLUS (Pmode, retval, tp);
+	break;
+      }
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+rtx
+frv_legitimize_address (rtx x,
+			rtx oldx ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
+      if (model != 0)
+        return frv_legitimize_tls_address (x, model);
+    }
+
+  return x;
+}
+
+/* Test whether a local function descriptor is canonical, i.e.,
+   whether we can use FUNCDESC_GOTOFF to compute the address of the
+   function.  */
+
+static bool
+frv_local_funcdesc_p (rtx fnx)
+{
+  tree fn;
+  enum symbol_visibility vis;
+  bool ret;
+
+  if (! SYMBOL_REF_LOCAL_P (fnx))
+    return FALSE;
+
+  fn = SYMBOL_REF_DECL (fnx);
+
+  if (! fn)
+    return FALSE;
+
+  vis = DECL_VISIBILITY (fn);
+
+  if (vis == VISIBILITY_PROTECTED)
+    /* Private function descriptors for protected functions are not
+       canonical.  Temporarily change the visibility to global.  */
+    vis = VISIBILITY_DEFAULT;
+  else if (flag_shlib)
+    /* If we're already compiling for a shared library (that, unlike
+       executables, can't assume that the existence of a definition
+       implies local binding), we can skip the re-testing.  */
+    return TRUE;
+
+  ret = default_binds_local_p_1 (fn, flag_pic);
+
+  DECL_VISIBILITY (fn) = vis;
+
+  return ret;
+}
+
+/* Load the _gp symbol into DEST.  SRC is supposed to be the FDPIC
+   register.  */
+
+rtx
+frv_gen_GPsym2reg (rtx dest, rtx src)
+{
+  tree gp = get_identifier ("_gp");
+  rtx gp_sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (gp));
+
+  return gen_symGOT2reg (dest, gp_sym, src, GEN_INT (R_FRV_GOT12));
+}
+
+static const char *
+unspec_got_name (int i)
+{
+  switch (i)
+    {
+    case R_FRV_GOT12: return "got12";
+    case R_FRV_GOTHI: return "gothi";
+    case R_FRV_GOTLO: return "gotlo";
+    case R_FRV_FUNCDESC: return "funcdesc";
+    case R_FRV_FUNCDESC_GOT12: return "gotfuncdesc12";
+    case R_FRV_FUNCDESC_GOTHI: return "gotfuncdeschi";
+    case R_FRV_FUNCDESC_GOTLO: return "gotfuncdesclo";
+    case R_FRV_FUNCDESC_VALUE: return "funcdescvalue";
+    case R_FRV_FUNCDESC_GOTOFF12: return "gotofffuncdesc12";
+    case R_FRV_FUNCDESC_GOTOFFHI: return "gotofffuncdeschi";
+    case R_FRV_FUNCDESC_GOTOFFLO: return "gotofffuncdesclo";
+    case R_FRV_GOTOFF12: return "gotoff12";
+    case R_FRV_GOTOFFHI: return "gotoffhi";
+    case R_FRV_GOTOFFLO: return "gotofflo";
+    case R_FRV_GPREL12: return "gprel12";
+    case R_FRV_GPRELHI: return "gprelhi";
+    case R_FRV_GPRELLO: return "gprello";
+    case R_FRV_GOTTLSOFF_HI: return "gottlsoffhi";
+    case R_FRV_GOTTLSOFF_LO: return "gottlsofflo";
+    case R_FRV_TLSMOFFHI: return "tlsmoffhi";
+    case R_FRV_TLSMOFFLO: return "tlsmofflo";
+    case R_FRV_TLSMOFF12: return "tlsmoff12";
+    case R_FRV_TLSDESCHI: return "tlsdeschi";
+    case R_FRV_TLSDESCLO: return "tlsdesclo";
+    case R_FRV_GOTTLSDESCHI: return "gottlsdeschi";
+    case R_FRV_GOTTLSDESCLO: return "gottlsdesclo";
+    default: gcc_unreachable ();
+    }
+}
+
+/* Write the assembler syntax for UNSPEC to STREAM.  Note that any offset
+   is added inside the relocation operator.  */
+
+static void
+frv_output_const_unspec (FILE *stream, const struct frv_unspec *unspec)
+{
+  fprintf (stream, "#%s(", unspec_got_name (unspec->reloc));
+  output_addr_const (stream, plus_constant (Pmode, unspec->symbol,
+					    unspec->offset));
+  fputs (")", stream);
+}
+
+/* Implement FIND_BASE_TERM.  See whether ORIG_X represents #gprel12(foo)
+   or #gotoff12(foo) for some small data symbol foo.  If so, return foo,
+   otherwise return ORIG_X.  */
+
+rtx
+frv_find_base_term (rtx x)
+{
+  struct frv_unspec unspec;
+
+  if (frv_const_unspec_p (x, &unspec)
+      && frv_small_data_reloc_p (unspec.symbol, unspec.reloc))
+    return plus_constant (Pmode, unspec.symbol, unspec.offset);
+
+  return x;
+}
+
+/* Return 1 if operand is a valid FRV address.  CONDEXEC_P is true if
+   the operand is used by a predicated instruction.  */
+
+int
+frv_legitimate_memory_operand (rtx op, enum machine_mode mode, int condexec_p)
+{
+  return ((GET_MODE (op) == mode || mode == VOIDmode)
+	  && GET_CODE (op) == MEM
+	  && frv_legitimate_address_p_1 (mode, XEXP (op, 0),
+				         reload_completed, condexec_p, FALSE));
+}
+
+void
+frv_expand_fdpic_call (rtx *operands, bool ret_value, bool sibcall)
+{
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REG);
+  rtx c, rvrtx=0;
+  rtx addr;
+
+  if (ret_value)
+    {
+      rvrtx = operands[0];
+      operands ++;
+    }
+
+  addr = XEXP (operands[0], 0);
+
+  /* Inline PLTs if we're optimizing for speed.  We'd like to inline
+     any calls that would involve a PLT, but can't tell, since we
+     don't know whether an extern function is going to be provided by
+     a separate translation unit or imported from a separate module.
+     When compiling for shared libraries, if the function has default
+     visibility, we assume it's overridable, so we inline the PLT, but
+     for executables, we don't really have a way to make a good
+     decision: a function is as likely to be imported from a shared
+     library as it is to be defined in the executable itself.  We
+     assume executables will get global functions defined locally,
+     whereas shared libraries will have them potentially overridden,
+     so we only inline PLTs when compiling for shared libraries.
+
+     In order to mark a function as local to a shared library, any
+     non-default visibility attribute suffices.  Unfortunately,
+     there's no simple way to tag a function declaration as ``in a
+     different module'', which we could then use to trigger PLT
+     inlining on executables.  There's -minline-plt, but it affects
+     all external functions, so one would have to also mark function
+     declarations available in the same module with non-default
+     visibility, which is advantageous in itself.  */
+  if (GET_CODE (addr) == SYMBOL_REF
+      && ((!SYMBOL_REF_LOCAL_P (addr) && TARGET_INLINE_PLT)
+	  || sibcall))
+    {
+      rtx x, dest;
+      dest = gen_reg_rtx (SImode);
+      if (flag_pic != 1)
+	x = gen_symGOTOFF2reg_hilo (dest, addr, OUR_FDPIC_REG,
+				    GEN_INT (R_FRV_FUNCDESC_GOTOFF12));
+      else
+	x = gen_symGOTOFF2reg (dest, addr, OUR_FDPIC_REG,
+			       GEN_INT (R_FRV_FUNCDESC_GOTOFF12));
+      emit_insn (x);
+      crtl->uses_pic_offset_table = TRUE;
+      addr = dest;
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      /* These are always either local, or handled through a local
+	 PLT.  */
+      if (ret_value)
+	c = gen_call_value_fdpicsi (rvrtx, addr, operands[1],
+				    operands[2], picreg, lr);
+      else
+	c = gen_call_fdpicsi (addr, operands[1], operands[2], picreg, lr);
+      emit_call_insn (c);
+      return;
+    }
+  else if (! ldd_address_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  picreg = gen_reg_rtx (DImode);
+  emit_insn (gen_movdi_ldd (picreg, addr));
+
+  if (sibcall && ret_value)
+    c = gen_sibcall_value_fdpicdi (rvrtx, picreg, const0_rtx);
+  else if (sibcall)
+    c = gen_sibcall_fdpicdi (picreg, const0_rtx);
+  else if (ret_value)
+    c = gen_call_value_fdpicdi (rvrtx, picreg, const0_rtx, lr);
+  else
+    c = gen_call_fdpicdi (picreg, const0_rtx, lr);
+  emit_call_insn (c);
+}
+
+/* Look for a SYMBOL_REF of a function in an rtx.  We always want to
+   process these separately from any offsets, such that we add any
+   offsets to the function descriptor (the actual pointer), not to the
+   function address.  */
+
+static bool
+frv_function_symbol_referenced_p (rtx x)
+{
+  const char *format;
+  int length;
+  int j;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return SYMBOL_REF_FUNCTION_P (x);
+
+  length = GET_RTX_LENGTH (GET_CODE (x));
+  format = GET_RTX_FORMAT (GET_CODE (x));
+
+  for (j = 0; j < length; ++j)
+    {
+      switch (format[j])
+	{
+	case 'e':
+	  if (frv_function_symbol_referenced_p (XEXP (x, j)))
+	    return TRUE;
+	  break;
+
+	case 'V':
+	case 'E':
+	  if (XVEC (x, j) != 0)
+	    {
+	      int k;
+	      for (k = 0; k < XVECLEN (x, j); ++k)
+		if (frv_function_symbol_referenced_p (XVECEXP (x, j, k)))
+		  return TRUE;
+	    }
+	  break;
+
+	default:
+	  /* Nothing to do.  */
+	  break;
+	}
+    }
+
+  return FALSE;
+}
+
+/* Return true if the memory operand is one that can be conditionally
+   executed.  */
+
+int
+condexec_memory_operand (rtx op, enum machine_mode mode)
+{
+  enum machine_mode op_mode = GET_MODE (op);
+  rtx addr;
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (op_mode)
+    {
+    default:
+      return FALSE;
+
+    case QImode:
+    case HImode:
+    case SImode:
+    case SFmode:
+      break;
+    }
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  addr = XEXP (op, 0);
+  return frv_legitimate_address_p_1 (mode, addr, reload_completed, TRUE, FALSE);
+}
+
+/* Return true if the bare return instruction can be used outside of the
+   epilog code.  For frv, we only do it if there was no stack allocation.  */
+
+int
+direct_return_p (void)
+{
+  frv_stack_t *info;
+
+  if (!reload_completed)
+    return FALSE;
+
+  info = frv_stack_info ();
+  return (info->total_size == 0);
+}
+
+
+void
+frv_emit_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  if (GET_CODE (src) == SYMBOL_REF)
+    {
+      enum tls_model model = SYMBOL_REF_TLS_MODEL (src);
+      if (model != 0)
+	src = frv_legitimize_tls_address (src, model);
+    }
+
+  switch (mode)
+    {
+    case SImode:
+      if (frv_emit_movsi (dest, src))
+	return;
+      break;
+
+    case QImode:
+    case HImode:
+    case DImode:
+    case SFmode:
+    case DFmode:
+      if (!reload_in_progress
+	  && !reload_completed
+	  && !register_operand (dest, mode)
+	  && !reg_or_0_operand (src, mode))
+	src = copy_to_mode_reg (mode, src);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, src));
+}
+
+/* Emit code to handle a MOVSI, adding in the small data register or pic
+   register if needed to load up addresses.  Return TRUE if the appropriate
+   instructions are emitted.  */
+
+int
+frv_emit_movsi (rtx dest, rtx src)
+{
+  int base_regno = -1;
+  int unspec = 0;
+  rtx sym = src;
+  struct frv_unspec old_unspec;
+
+  if (!reload_in_progress
+      && !reload_completed
+      && !register_operand (dest, SImode)
+      && (!reg_or_0_operand (src, SImode)
+	     /* Virtual registers will almost always be replaced by an
+		add instruction, so expose this to CSE by copying to
+		an intermediate register.  */
+	  || (GET_CODE (src) == REG
+	      && IN_RANGE (REGNO (src),
+			   FIRST_VIRTUAL_REGISTER,
+			   LAST_VIRTUAL_POINTER_REGISTER))))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, copy_to_mode_reg (SImode, src)));
+      return TRUE;
+    }
+
+  /* Explicitly add in the PIC or small data register if needed.  */
+  switch (GET_CODE (src))
+    {
+    default:
+      break;
+
+    case LABEL_REF:
+    handle_label:
+      if (TARGET_FDPIC)
+	{
+	  /* Using GPREL12, we use a single GOT entry for all symbols
+	     in read-only sections, but trade sequences such as:
+
+	     sethi #gothi(label), gr#
+	     setlo #gotlo(label), gr#
+	     ld    @(gr15,gr#), gr#
+
+	     for
+
+	     ld    @(gr15,#got12(_gp)), gr#
+	     sethi #gprelhi(label), gr##
+	     setlo #gprello(label), gr##
+	     add   gr#, gr##, gr##
+
+	     We may often be able to share gr# for multiple
+	     computations of GPREL addresses, and we may often fold
+	     the final add into the pair of registers of a load or
+	     store instruction, so it's often profitable.  Even when
+	     optimizing for size, we're trading a GOT entry for an
+	     additional instruction, which trades GOT space
+	     (read-write) for code size (read-only, shareable), as
+	     long as the symbol is not used in more than two different
+	     locations.
+
+	     With -fpie/-fpic, we'd be trading a single load for a
+	     sequence of 4 instructions, because the offset of the
+	     label can't be assumed to be addressable with 12 bits, so
+	     we don't do this.  */
+	  if (TARGET_GPREL_RO)
+	    unspec = R_FRV_GPREL12;
+	  else
+	    unspec = R_FRV_GOT12;
+	}
+      else if (flag_pic)
+	base_regno = PIC_REGNO;
+
+      break;
+
+    case CONST:
+      if (frv_const_unspec_p (src, &old_unspec))
+	break;
+
+      if (TARGET_FDPIC && frv_function_symbol_referenced_p (XEXP (src, 0)))
+	{
+	handle_whatever:
+	  src = force_reg (GET_MODE (XEXP (src, 0)), XEXP (src, 0));
+	  emit_move_insn (dest, src);
+	  return TRUE;
+	}
+      else
+	{
+	  sym = XEXP (sym, 0);
+	  if (GET_CODE (sym) == PLUS
+	      && GET_CODE (XEXP (sym, 0)) == SYMBOL_REF
+	      && GET_CODE (XEXP (sym, 1)) == CONST_INT)
+	    sym = XEXP (sym, 0);
+	  if (GET_CODE (sym) == SYMBOL_REF)
+	    goto handle_sym;
+	  else if (GET_CODE (sym) == LABEL_REF)
+	    goto handle_label;
+	  else
+	    goto handle_whatever;
+	}
+      break;
+
+    case SYMBOL_REF:
+    handle_sym:
+      if (TARGET_FDPIC)
+	{
+	  enum tls_model model = SYMBOL_REF_TLS_MODEL (sym);
+
+	  if (model != 0)
+	    {
+	      src = frv_legitimize_tls_address (src, model);
+	      emit_move_insn (dest, src);
+	      return TRUE;
+	    }
+
+	  if (SYMBOL_REF_FUNCTION_P (sym))
+	    {
+	      if (frv_local_funcdesc_p (sym))
+		unspec = R_FRV_FUNCDESC_GOTOFF12;
+	      else
+		unspec = R_FRV_FUNCDESC_GOT12;
+	    }
+	  else
+	    {
+	      if (CONSTANT_POOL_ADDRESS_P (sym))
+		switch (GET_CODE (get_pool_constant (sym)))
+		  {
+		  case CONST:
+		  case SYMBOL_REF:
+		  case LABEL_REF:
+		    if (flag_pic)
+		      {
+			unspec = R_FRV_GOTOFF12;
+			break;
+		      }
+		    /* Fall through.  */
+		  default:
+		    if (TARGET_GPREL_RO)
+		      unspec = R_FRV_GPREL12;
+		    else
+		      unspec = R_FRV_GOT12;
+		    break;
+		  }
+	      else if (SYMBOL_REF_LOCAL_P (sym)
+		       && !SYMBOL_REF_EXTERNAL_P (sym)
+		       && SYMBOL_REF_DECL (sym)
+		       && (!DECL_P (SYMBOL_REF_DECL (sym))
+			   || !DECL_COMMON (SYMBOL_REF_DECL (sym))))
+		{
+		  tree decl = SYMBOL_REF_DECL (sym);
+		  tree init = TREE_CODE (decl) == VAR_DECL
+		    ? DECL_INITIAL (decl)
+		    : TREE_CODE (decl) == CONSTRUCTOR
+		    ? decl : 0;
+		  int reloc = 0;
+		  bool named_section, readonly;
+
+		  if (init && init != error_mark_node)
+		    reloc = compute_reloc_for_constant (init);
+
+		  named_section = TREE_CODE (decl) == VAR_DECL
+		    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
+		  readonly = decl_readonly_section (decl, reloc);
+
+		  if (named_section)
+		    unspec = R_FRV_GOT12;
+		  else if (!readonly)
+		    unspec = R_FRV_GOTOFF12;
+		  else if (readonly && TARGET_GPREL_RO)
+		    unspec = R_FRV_GPREL12;
+		  else
+		    unspec = R_FRV_GOT12;
+		}
+	      else
+		unspec = R_FRV_GOT12;
+	    }
+	}
+
+      else if (SYMBOL_REF_SMALL_P (sym))
+	base_regno = SDA_BASE_REG;
+
+      else if (flag_pic)
+	base_regno = PIC_REGNO;
+
+      break;
+    }
+
+  if (base_regno >= 0)
+    {
+      if (GET_CODE (sym) == SYMBOL_REF && SYMBOL_REF_SMALL_P (sym))
+	emit_insn (gen_symGOTOFF2reg (dest, src,
+				      gen_rtx_REG (Pmode, base_regno),
+				      GEN_INT (R_FRV_GPREL12)));
+      else
+	emit_insn (gen_symGOTOFF2reg_hilo (dest, src,
+					   gen_rtx_REG (Pmode, base_regno),
+					   GEN_INT (R_FRV_GPREL12)));
+      if (base_regno == PIC_REGNO)
+	crtl->uses_pic_offset_table = TRUE;
+      return TRUE;
+    }
+
+  if (unspec)
+    {
+      rtx x;
+
+      /* Since OUR_FDPIC_REG is a pseudo register, we can't safely introduce
+	 new uses of it once reload has begun.  */
+      gcc_assert (!reload_in_progress && !reload_completed);
+
+      switch (unspec)
+	{
+	case R_FRV_GOTOFF12:
+	  if (!frv_small_data_reloc_p (sym, unspec))
+	    x = gen_symGOTOFF2reg_hilo (dest, src, OUR_FDPIC_REG,
+					GEN_INT (unspec));
+	  else
+	    x = gen_symGOTOFF2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	case R_FRV_GPREL12:
+	  if (!frv_small_data_reloc_p (sym, unspec))
+	    x = gen_symGPREL2reg_hilo (dest, src, OUR_FDPIC_REG,
+				       GEN_INT (unspec));
+	  else
+	    x = gen_symGPREL2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	case R_FRV_FUNCDESC_GOTOFF12:
+	  if (flag_pic != 1)
+	    x = gen_symGOTOFF2reg_hilo (dest, src, OUR_FDPIC_REG,
+					GEN_INT (unspec));
+	  else
+	    x = gen_symGOTOFF2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	default:
+	  if (flag_pic != 1)
+	    x = gen_symGOT2reg_hilo (dest, src, OUR_FDPIC_REG,
+				     GEN_INT (unspec));
+	  else
+	    x = gen_symGOT2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	}
+      emit_insn (x);
+      crtl->uses_pic_offset_table = TRUE;
+      return TRUE;
+    }
+
+
+  return FALSE;
+}
+
+
+/* Return a string to output a single word move.  */
+
+const char *
+output_move_single (rtx operands[], rtx insn)
+{
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+
+  if (GET_CODE (dest) == REG)
+    {
+      int dest_regno = REGNO (dest);
+      enum machine_mode mode = GET_MODE (dest);
+
+      if (GPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* gpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "mov %1, %0";
+
+	      else if (FPR_P (src_regno))
+		return "movfg %1, %0";
+
+	      else if (SPR_P (src_regno))
+		return "movsg %1, %0";
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* gpr <- memory */
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "ldsb%I1%U1 %M1,%0";
+
+		case HImode:
+		  return "ldsh%I1%U1 %M1,%0";
+
+		case SImode:
+		case SFmode:
+		  return "ld%I1%U1 %M1, %0";
+		}
+	    }
+
+	  else if (GET_CODE (src) == CONST_INT
+		   || GET_CODE (src) == CONST_DOUBLE)
+	    {
+	      /* gpr <- integer/floating constant */
+	      HOST_WIDE_INT value;
+
+	      if (GET_CODE (src) == CONST_INT)
+		value = INTVAL (src);
+
+	      else if (mode == SFmode)
+		{
+		  REAL_VALUE_TYPE rv;
+		  long l;
+
+		  REAL_VALUE_FROM_CONST_DOUBLE (rv, src);
+		  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+		  value = l;
+		}
+
+	      else
+		value = CONST_DOUBLE_LOW (src);
+
+	      if (IN_RANGE (value, -32768, 32767))
+		return "setlos %1, %0";
+
+	      return "#";
+	    }
+
+          else if (GET_CODE (src) == SYMBOL_REF
+		   || GET_CODE (src) == LABEL_REF
+		   || GET_CODE (src) == CONST)
+	    {
+	      return "#";
+	    }
+	}
+
+      else if (FPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* fpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "movgf %1, %0";
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (TARGET_HARD_FLOAT)
+		    return "fmovs %1, %0";
+		  else
+		    return "mor %1, %1, %0";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* fpr <- memory */
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "ldbf%I1%U1 %M1,%0";
+
+		case HImode:
+		  return "ldhf%I1%U1 %M1,%0";
+
+		case SImode:
+		case SFmode:
+		  return "ldf%I1%U1 %M1, %0";
+		}
+	    }
+
+	  else if (ZERO_P (src))
+	    return "movgf %., %0";
+	}
+
+      else if (SPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* spr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "movgs %1, %0";
+	    }
+	  else if (ZERO_P (src))
+	    return "movgs %., %0";
+	}
+    }
+
+  else if (GET_CODE (dest) == MEM)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int src_regno = REGNO (src);
+	  enum machine_mode mode = GET_MODE (dest);
+
+	  if (GPR_P (src_regno))
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "stb%I0%U0 %1, %M0";
+
+		case HImode:
+		  return "sth%I0%U0 %1, %M0";
+
+		case SImode:
+		case SFmode:
+		  return "st%I0%U0 %1, %M0";
+		}
+	    }
+
+	  else if (FPR_P (src_regno))
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "stbf%I0%U0 %1, %M0";
+
+		case HImode:
+		  return "sthf%I0%U0 %1, %M0";
+
+		case SImode:
+		case SFmode:
+		  return "stf%I0%U0 %1, %M0";
+		}
+	    }
+	}
+
+      else if (ZERO_P (src))
+	{
+	  switch (GET_MODE (dest))
+	    {
+	    default:
+	      break;
+
+	    case QImode:
+	      return "stb%I0%U0 %., %M0";
+
+	    case HImode:
+	      return "sth%I0%U0 %., %M0";
+
+	    case SImode:
+	    case SFmode:
+	      return "st%I0%U0 %., %M0";
+	    }
+	}
+    }
+
+  fatal_insn ("bad output_move_single operand", insn);
+  return "";
+}
+
+
+/* Return a string to output a double word move.  */
+
+const char *
+output_move_double (rtx operands[], rtx insn)
+{
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+
+  if (GET_CODE (dest) == REG)
+    {
+      int dest_regno = REGNO (dest);
+
+      if (GPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* gpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "#";
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (((dest_regno - GPR_FIRST) & 1) == 0
+		      && ((src_regno - FPR_FIRST) & 1) == 0)
+		    return "movfgd %1, %0";
+
+		  return "#";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* gpr <- memory */
+	      if (dbl_memory_one_insn_operand (src, mode))
+		return "ldd%I1%U1 %M1, %0";
+
+	      return "#";
+	    }
+
+	  else if (GET_CODE (src) == CONST_INT
+		   || GET_CODE (src) == CONST_DOUBLE)
+	    return "#";
+	}
+
+      else if (FPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* fpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		{
+		  if (((dest_regno - FPR_FIRST) & 1) == 0
+		      && ((src_regno - GPR_FIRST) & 1) == 0)
+		    return "movgfd %1, %0";
+
+		  return "#";
+		}
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (TARGET_DOUBLE
+		      && ((dest_regno - FPR_FIRST) & 1) == 0
+		      && ((src_regno - FPR_FIRST) & 1) == 0)
+		    return "fmovd %1, %0";
+
+		  return "#";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* fpr <- memory */
+	      if (dbl_memory_one_insn_operand (src, mode))
+		return "lddf%I1%U1 %M1, %0";
+
+	      return "#";
+	    }
+
+	  else if (ZERO_P (src))
+	    return "#";
+	}
+    }
+
+  else if (GET_CODE (dest) == MEM)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int src_regno = REGNO (src);
+
+	  if (GPR_P (src_regno))
+	    {
+	      if (((src_regno - GPR_FIRST) & 1) == 0
+		  && dbl_memory_one_insn_operand (dest, mode))
+		return "std%I0%U0 %1, %M0";
+
+	      return "#";
+	    }
+
+	  if (FPR_P (src_regno))
+	    {
+	      if (((src_regno - FPR_FIRST) & 1) == 0
+		  && dbl_memory_one_insn_operand (dest, mode))
+		return "stdf%I0%U0 %1, %M0";
+
+	      return "#";
+	    }
+	}
+
+      else if (ZERO_P (src))
+	{
+	  if (dbl_memory_one_insn_operand (dest, mode))
+	    return "std%I0%U0 %., %M0";
+
+	  return "#";
+	}
+    }
+
+  fatal_insn ("bad output_move_double operand", insn);
+  return "";
+}
+
+
+/* Return a string to output a single word conditional move.
+   Operand0 -- EQ/NE of ccr register and 0
+   Operand1 -- CCR register
+   Operand2 -- destination
+   Operand3 -- source  */
+
+const char *
+output_condmove_single (rtx operands[], rtx insn)
+{
+  rtx dest = operands[2];
+  rtx src  = operands[3];
+
+  if (GET_CODE (dest) == REG)
+    {
+      int dest_regno = REGNO (dest);
+      enum machine_mode mode = GET_MODE (dest);
+
+      if (GPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* gpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "cmov %z3, %2, %1, %e0";
+
+	      else if (FPR_P (src_regno))
+		return "cmovfg %3, %2, %1, %e0";
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* gpr <- memory */
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "cldsb%I3%U3 %M3, %2, %1, %e0";
+
+		case HImode:
+		  return "cldsh%I3%U3 %M3, %2, %1, %e0";
+
+		case SImode:
+		case SFmode:
+		  return "cld%I3%U3 %M3, %2, %1, %e0";
+		}
+	    }
+
+	  else if (ZERO_P (src))
+	    return "cmov %., %2, %1, %e0";
+	}
+
+      else if (FPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* fpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "cmovgf %3, %2, %1, %e0";
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (TARGET_HARD_FLOAT)
+		    return "cfmovs %3,%2,%1,%e0";
+		  else
+		    return "cmor %3, %3, %2, %1, %e0";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* fpr <- memory */
+	      if (mode == SImode || mode == SFmode)
+		return "cldf%I3%U3 %M3, %2, %1, %e0";
+	    }
+
+	  else if (ZERO_P (src))
+	    return "cmovgf %., %2, %1, %e0";
+	}
+    }
+
+  else if (GET_CODE (dest) == MEM)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int src_regno = REGNO (src);
+	  enum machine_mode mode = GET_MODE (dest);
+
+	  if (GPR_P (src_regno))
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "cstb%I2%U2 %3, %M2, %1, %e0";
+
+		case HImode:
+		  return "csth%I2%U2 %3, %M2, %1, %e0";
+
+		case SImode:
+		case SFmode:
+		  return "cst%I2%U2 %3, %M2, %1, %e0";
+		}
+	    }
+
+	  else if (FPR_P (src_regno) && (mode == SImode || mode == SFmode))
+	    return "cstf%I2%U2 %3, %M2, %1, %e0";
+	}
+
+      else if (ZERO_P (src))
+	{
+	  enum machine_mode mode = GET_MODE (dest);
+	  switch (mode)
+	    {
+	    default:
+	      break;
+
+	    case QImode:
+	      return "cstb%I2%U2 %., %M2, %1, %e0";
+
+	    case HImode:
+	      return "csth%I2%U2 %., %M2, %1, %e0";
+
+	    case SImode:
+	    case SFmode:
+	      return "cst%I2%U2 %., %M2, %1, %e0";
+	    }
+	}
+    }
+
+  fatal_insn ("bad output_condmove_single operand", insn);
+  return "";
+}
+
+
+/* Emit the appropriate code to do a comparison, returning the register the
+   comparison was done it.  */
+
+static rtx
+frv_emit_comparison (enum rtx_code test, rtx op0, rtx op1)
+{
+  enum machine_mode cc_mode;
+  rtx cc_reg;
+
+  /* Floating point doesn't have comparison against a constant.  */
+  if (GET_MODE (op0) == CC_FPmode && GET_CODE (op1) != REG)
+    op1 = force_reg (GET_MODE (op0), op1);
+
+  /* Possibly disable using anything but a fixed register in order to work
+     around cse moving comparisons past function calls.  */
+  cc_mode = SELECT_CC_MODE (test, op0, op1);
+  cc_reg = ((TARGET_ALLOC_CC)
+	    ? gen_reg_rtx (cc_mode)
+	    : gen_rtx_REG (cc_mode,
+			   (cc_mode == CC_FPmode) ? FCC_FIRST : ICC_FIRST));
+
+  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
+			  gen_rtx_COMPARE (cc_mode, op0, op1)));
+
+  return cc_reg;
+}
+
+
+/* Emit code for a conditional branch.
+   XXX: I originally wanted to add a clobber of a CCR register to use in
+   conditional execution, but that confuses the rest of the compiler.  */
+
+int
+frv_emit_cond_branch (rtx operands[])
+{
+  rtx test_rtx;
+  rtx label_ref;
+  rtx if_else;
+  enum rtx_code test = GET_CODE (operands[0]);
+  rtx cc_reg = frv_emit_comparison (test, operands[1], operands[2]);
+  enum machine_mode cc_mode = GET_MODE (cc_reg);
+
+  /* Branches generate:
+	(set (pc)
+	     (if_then_else (<test>, <cc_reg>, (const_int 0))
+			    (label_ref <branch_label>)
+			    (pc))) */
+  label_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  test_rtx = gen_rtx_fmt_ee (test, cc_mode, cc_reg, const0_rtx);
+  if_else = gen_rtx_IF_THEN_ELSE (cc_mode, test_rtx, label_ref, pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_else));
+  return TRUE;
+}
+
+
+/* Emit code to set a gpr to 1/0 based on a comparison.  */
+
+int
+frv_emit_scc (rtx operands[])
+{
+  rtx set;
+  rtx test_rtx;
+  rtx clobber;
+  rtx cr_reg;
+  enum rtx_code test = GET_CODE (operands[1]);
+  rtx cc_reg = frv_emit_comparison (test, operands[2], operands[3]);
+
+  /* SCC instructions generate:
+	(parallel [(set <target> (<test>, <cc_reg>, (const_int 0))
+		   (clobber (<ccr_reg>))])  */
+  test_rtx = gen_rtx_fmt_ee (test, SImode, cc_reg, const0_rtx);
+  set = gen_rtx_SET (VOIDmode, operands[0], test_rtx);
+
+  cr_reg = ((TARGET_ALLOC_CC)
+	    ? gen_reg_rtx (CC_CCRmode)
+	    : gen_rtx_REG (CC_CCRmode,
+			   ((GET_MODE (cc_reg) == CC_FPmode)
+			    ? FCR_FIRST
+			    : ICR_FIRST)));
+
+  clobber = gen_rtx_CLOBBER (VOIDmode, cr_reg);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+  return TRUE;
+}
+
+
+/* Split a SCC instruction into component parts, returning a SEQUENCE to hold
+   the separate insns.  */
+
+rtx
+frv_split_scc (rtx dest, rtx test, rtx cc_reg, rtx cr_reg, HOST_WIDE_INT value)
+{
+  rtx ret;
+
+  start_sequence ();
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (GET_CODE (test),
+					  GET_MODE (cr_reg),
+					  cc_reg,
+					  const0_rtx)));
+
+  /* Move the value into the destination.  */
+  emit_move_insn (dest, GEN_INT (value));
+
+  /* Move 0 into the destination if the test failed */
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_EQ (GET_MODE (cr_reg),
+					    cr_reg,
+					    const0_rtx),
+				gen_rtx_SET (VOIDmode, dest, const0_rtx)));
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* Emit the code for a conditional move, return TRUE if we could do the
+   move.  */
+
+int
+frv_emit_cond_move (rtx dest, rtx test_rtx, rtx src1, rtx src2)
+{
+  rtx set;
+  rtx clobber_cc;
+  rtx test2;
+  rtx cr_reg;
+  rtx if_rtx;
+  enum rtx_code test = GET_CODE (test_rtx);
+  rtx cc_reg = frv_emit_comparison (test,
+				    XEXP (test_rtx, 0), XEXP (test_rtx, 1));
+  enum machine_mode cc_mode = GET_MODE (cc_reg);
+
+  /* Conditional move instructions generate:
+	(parallel [(set <target>
+			(if_then_else (<test> <cc_reg> (const_int 0))
+				      <src1>
+				      <src2>))
+		   (clobber (<ccr_reg>))])  */
+
+  /* Handle various cases of conditional move involving two constants.  */
+  if (GET_CODE (src1) == CONST_INT && GET_CODE (src2) == CONST_INT)
+    {
+      HOST_WIDE_INT value1 = INTVAL (src1);
+      HOST_WIDE_INT value2 = INTVAL (src2);
+
+      /* Having 0 as one of the constants can be done by loading the other
+         constant, and optionally moving in gr0.  */
+      if (value1 == 0 || value2 == 0)
+	;
+
+      /* If the first value is within an addi range and also the difference
+         between the two fits in an addi's range, load up the difference, then
+         conditionally move in 0, and then unconditionally add the first
+	 value.  */
+      else if (IN_RANGE (value1, -2048, 2047)
+	       && IN_RANGE (value2 - value1, -2048, 2047))
+	;
+
+      /* If neither condition holds, just force the constant into a
+	 register.  */
+      else
+	{
+	  src1 = force_reg (GET_MODE (dest), src1);
+	  src2 = force_reg (GET_MODE (dest), src2);
+	}
+    }
+
+  /* If one value is a register, insure the other value is either 0 or a
+     register.  */
+  else
+    {
+      if (GET_CODE (src1) == CONST_INT && INTVAL (src1) != 0)
+	src1 = force_reg (GET_MODE (dest), src1);
+
+      if (GET_CODE (src2) == CONST_INT && INTVAL (src2) != 0)
+	src2 = force_reg (GET_MODE (dest), src2);
+    }
+
+  test2 = gen_rtx_fmt_ee (test, cc_mode, cc_reg, const0_rtx);
+  if_rtx = gen_rtx_IF_THEN_ELSE (GET_MODE (dest), test2, src1, src2);
+
+  set = gen_rtx_SET (VOIDmode, dest, if_rtx);
+
+  cr_reg = ((TARGET_ALLOC_CC)
+	    ? gen_reg_rtx (CC_CCRmode)
+	    : gen_rtx_REG (CC_CCRmode,
+			   (cc_mode == CC_FPmode) ? FCR_FIRST : ICR_FIRST));
+
+  clobber_cc = gen_rtx_CLOBBER (VOIDmode, cr_reg);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber_cc)));
+  return TRUE;
+}
+
+
+/* Split a conditional move into constituent parts, returning a SEQUENCE
+   containing all of the insns.  */
+
+rtx
+frv_split_cond_move (rtx operands[])
+{
+  rtx dest	= operands[0];
+  rtx test	= operands[1];
+  rtx cc_reg	= operands[2];
+  rtx src1	= operands[3];
+  rtx src2	= operands[4];
+  rtx cr_reg	= operands[5];
+  rtx ret;
+  enum machine_mode cr_mode = GET_MODE (cr_reg);
+
+  start_sequence ();
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (GET_CODE (test),
+					  GET_MODE (cr_reg),
+					  cc_reg,
+					  const0_rtx)));
+
+  /* Handle various cases of conditional move involving two constants.  */
+  if (GET_CODE (src1) == CONST_INT && GET_CODE (src2) == CONST_INT)
+    {
+      HOST_WIDE_INT value1 = INTVAL (src1);
+      HOST_WIDE_INT value2 = INTVAL (src2);
+
+      /* Having 0 as one of the constants can be done by loading the other
+         constant, and optionally moving in gr0.  */
+      if (value1 == 0)
+	{
+	  emit_move_insn (dest, src2);
+	  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+					gen_rtx_NE (cr_mode, cr_reg,
+						    const0_rtx),
+					gen_rtx_SET (VOIDmode, dest, src1)));
+	}
+
+      else if (value2 == 0)
+	{
+	  emit_move_insn (dest, src1);
+	  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+					gen_rtx_EQ (cr_mode, cr_reg,
+						    const0_rtx),
+					gen_rtx_SET (VOIDmode, dest, src2)));
+	}
+
+      /* If the first value is within an addi range and also the difference
+         between the two fits in an addi's range, load up the difference, then
+         conditionally move in 0, and then unconditionally add the first
+	 value.  */
+      else if (IN_RANGE (value1, -2048, 2047)
+	       && IN_RANGE (value2 - value1, -2048, 2047))
+	{
+	  rtx dest_si = ((GET_MODE (dest) == SImode)
+			 ? dest
+			 : gen_rtx_SUBREG (SImode, dest, 0));
+
+	  emit_move_insn (dest_si, GEN_INT (value2 - value1));
+	  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+					gen_rtx_NE (cr_mode, cr_reg,
+						    const0_rtx),
+					gen_rtx_SET (VOIDmode, dest_si,
+						     const0_rtx)));
+	  emit_insn (gen_addsi3 (dest_si, dest_si, src1));
+	}
+
+      else
+	gcc_unreachable ();
+    }
+  else
+    {
+      /* Emit the conditional move for the test being true if needed.  */
+      if (! rtx_equal_p (dest, src1))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_NE (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src1)));
+
+      /* Emit the conditional move for the test being false if needed.  */
+      if (! rtx_equal_p (dest, src2))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_EQ (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src2)));
+    }
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* Split (set DEST SOURCE), where DEST is a double register and SOURCE is a
+   memory location that is not known to be dword-aligned.  */
+void
+frv_split_double_load (rtx dest, rtx source)
+{
+  int regno = REGNO (dest);
+  rtx dest1 = gen_highpart (SImode, dest);
+  rtx dest2 = gen_lowpart (SImode, dest);
+  rtx address = XEXP (source, 0);
+
+  /* If the address is pre-modified, load the lower-numbered register
+     first, then load the other register using an integer offset from
+     the modified base register.  This order should always be safe,
+     since the pre-modification cannot affect the same registers as the
+     load does.
+
+     The situation for other loads is more complicated.  Loading one
+     of the registers could affect the value of ADDRESS, so we must
+     be careful which order we do them in.  */
+  if (GET_CODE (address) == PRE_MODIFY
+      || ! refers_to_regno_p (regno, regno + 1, address, NULL))
+    {
+      /* It is safe to load the lower-numbered register first.  */
+      emit_move_insn (dest1, change_address (source, SImode, NULL));
+      emit_move_insn (dest2, frv_index_memory (source, SImode, 1));
+    }
+  else
+    {
+      /* ADDRESS is not pre-modified and the address depends on the
+         lower-numbered register.  Load the higher-numbered register
+         first.  */
+      emit_move_insn (dest2, frv_index_memory (source, SImode, 1));
+      emit_move_insn (dest1, change_address (source, SImode, NULL));
+    }
+}
+
+/* Split (set DEST SOURCE), where DEST refers to a dword memory location
+   and SOURCE is either a double register or the constant zero.  */
+void
+frv_split_double_store (rtx dest, rtx source)
+{
+  rtx dest1 = change_address (dest, SImode, NULL);
+  rtx dest2 = frv_index_memory (dest, SImode, 1);
+  if (ZERO_P (source))
+    {
+      emit_move_insn (dest1, CONST0_RTX (SImode));
+      emit_move_insn (dest2, CONST0_RTX (SImode));
+    }
+  else
+    {
+      emit_move_insn (dest1, gen_highpart (SImode, source));
+      emit_move_insn (dest2, gen_lowpart (SImode, source));
+    }
+}
+
+
+/* Split a min/max operation returning a SEQUENCE containing all of the
+   insns.  */
+
+rtx
+frv_split_minmax (rtx operands[])
+{
+  rtx dest	= operands[0];
+  rtx minmax	= operands[1];
+  rtx src1	= operands[2];
+  rtx src2	= operands[3];
+  rtx cc_reg	= operands[4];
+  rtx cr_reg	= operands[5];
+  rtx ret;
+  enum rtx_code test_code;
+  enum machine_mode cr_mode = GET_MODE (cr_reg);
+
+  start_sequence ();
+
+  /* Figure out which test to use.  */
+  switch (GET_CODE (minmax))
+    {
+    default:
+      gcc_unreachable ();
+
+    case SMIN: test_code = LT;  break;
+    case SMAX: test_code = GT;  break;
+    case UMIN: test_code = LTU; break;
+    case UMAX: test_code = GTU; break;
+    }
+
+  /* Issue the compare instruction.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cc_reg,
+			  gen_rtx_COMPARE (GET_MODE (cc_reg),
+					   src1, src2)));
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (test_code,
+					  GET_MODE (cr_reg),
+					  cc_reg,
+					  const0_rtx)));
+
+  /* If are taking the min/max of a nonzero constant, load that first, and
+     then do a conditional move of the other value.  */
+  if (GET_CODE (src2) == CONST_INT && INTVAL (src2) != 0)
+    {
+      gcc_assert (!rtx_equal_p (dest, src1));
+
+      emit_move_insn (dest, src2);
+      emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				    gen_rtx_NE (cr_mode, cr_reg, const0_rtx),
+				    gen_rtx_SET (VOIDmode, dest, src1)));
+    }
+
+  /* Otherwise, do each half of the move.  */
+  else
+    {
+      /* Emit the conditional move for the test being true if needed.  */
+      if (! rtx_equal_p (dest, src1))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_NE (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src1)));
+
+      /* Emit the conditional move for the test being false if needed.  */
+      if (! rtx_equal_p (dest, src2))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_EQ (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src2)));
+    }
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* Split an integer abs operation returning a SEQUENCE containing all of the
+   insns.  */
+
+rtx
+frv_split_abs (rtx operands[])
+{
+  rtx dest	= operands[0];
+  rtx src	= operands[1];
+  rtx cc_reg	= operands[2];
+  rtx cr_reg	= operands[3];
+  rtx ret;
+
+  start_sequence ();
+
+  /* Issue the compare < 0 instruction.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cc_reg,
+			  gen_rtx_COMPARE (CCmode, src, const0_rtx)));
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (LT, CC_CCRmode, cc_reg, const0_rtx)));
+
+  /* Emit the conditional negate if the value is negative.  */
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, cr_reg, const0_rtx),
+				gen_negsi2 (dest, src)));
+
+  /* Emit the conditional move for the test being false if needed.  */
+  if (! rtx_equal_p (dest, src))
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				  gen_rtx_EQ (CC_CCRmode, cr_reg, const0_rtx),
+				  gen_rtx_SET (VOIDmode, dest, src)));
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* An internal function called by for_each_rtx to clear in a hard_reg set each
+   register used in an insn.  */
+
+static int
+frv_clear_registers_used (rtx *ptr, void *data)
+{
+  if (GET_CODE (*ptr) == REG)
+    {
+      int regno = REGNO (*ptr);
+      HARD_REG_SET *p_regs = (HARD_REG_SET *)data;
+
+      if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  int reg_max = regno + HARD_REGNO_NREGS (regno, GET_MODE (*ptr));
+
+	  while (regno < reg_max)
+	    {
+	      CLEAR_HARD_REG_BIT (*p_regs, regno);
+	      regno++;
+	    }
+	}
+    }
+
+  return 0;
+}
+
+
+/* Initialize machine-specific if-conversion data.
+   On the FR-V, we don't have any extra fields per se, but it is useful hook to
+   initialize the static storage.  */
+void
+frv_ifcvt_machdep_init (void *ce_info ATTRIBUTE_UNUSED)
+{
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+  frv_ifcvt.num_nested_cond_exec = 0;
+  frv_ifcvt.cr_reg = NULL_RTX;
+  frv_ifcvt.nested_cc_reg = NULL_RTX;
+  frv_ifcvt.extra_int_cr = NULL_RTX;
+  frv_ifcvt.extra_fp_cr = NULL_RTX;
+  frv_ifcvt.last_nested_if_cr = NULL_RTX;
+}
+
+
+/* Internal function to add a potential insn to the list of insns to be inserted
+   if the conditional execution conversion is successful.  */
+
+static void
+frv_ifcvt_add_insn (rtx pattern, rtx insn, int before_p)
+{
+  rtx link = alloc_EXPR_LIST (VOIDmode, pattern, insn);
+
+  link->jump = before_p;	/* Mark to add this before or after insn.  */
+  frv_ifcvt.added_insns_list = alloc_EXPR_LIST (VOIDmode, link,
+						frv_ifcvt.added_insns_list);
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_add_insn: add the following %s insn %d:\n",
+	       (before_p) ? "before" : "after",
+	       (int)INSN_UID (insn));
+
+      debug_rtx (pattern);
+    }
+}
+
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, possibly updating the tests in TRUE_EXPR, and
+   FALSE_EXPR for converting if-then and if-then-else code to conditional
+   instructions.  Set either TRUE_EXPR or FALSE_EXPR to a null pointer if the
+   tests cannot be converted.  */
+
+void
+frv_ifcvt_modify_tests (ce_if_block *ce_info, rtx *p_true, rtx *p_false)
+{
+  basic_block test_bb = ce_info->test_bb;	/* test basic block */
+  basic_block then_bb = ce_info->then_bb;	/* THEN */
+  basic_block else_bb = ce_info->else_bb;	/* ELSE or NULL */
+  basic_block join_bb = ce_info->join_bb;	/* join block or NULL */
+  rtx true_expr = *p_true;
+  rtx cr;
+  rtx cc;
+  rtx nested_cc;
+  enum machine_mode mode = GET_MODE (true_expr);
+  int j;
+  basic_block *bb;
+  int num_bb;
+  frv_tmp_reg_t *tmp_reg = &frv_ifcvt.tmp_reg;
+  rtx check_insn;
+  rtx sub_cond_exec_reg;
+  enum rtx_code code;
+  enum rtx_code code_true;
+  enum rtx_code code_false;
+  enum reg_class cc_class;
+  enum reg_class cr_class;
+  int cc_first;
+  int cc_last;
+  reg_set_iterator rsi;
+
+  /* Make sure we are only dealing with hard registers.  Also honor the
+     -mno-cond-exec switch, and -mno-nested-cond-exec switches if
+     applicable.  */
+  if (!reload_completed || !TARGET_COND_EXEC
+      || (!TARGET_NESTED_CE && ce_info->pass > 1))
+    goto fail;
+
+  /* Figure out which registers we can allocate for our own purposes.  Only
+     consider registers that are not preserved across function calls and are
+     not fixed.  However, allow the ICC/ICR temporary registers to be allocated
+     if we did not need to use them in reloading other registers.  */
+  memset (&tmp_reg->regs, 0, sizeof (tmp_reg->regs));
+  COPY_HARD_REG_SET (tmp_reg->regs, call_used_reg_set);
+  AND_COMPL_HARD_REG_SET (tmp_reg->regs, fixed_reg_set);
+  SET_HARD_REG_BIT (tmp_reg->regs, ICC_TEMP);
+  SET_HARD_REG_BIT (tmp_reg->regs, ICR_TEMP);
+
+  /* If this is a nested IF, we need to discover whether the CC registers that
+     are set/used inside of the block are used anywhere else.  If not, we can
+     change them to be the CC register that is paired with the CR register that
+     controls the outermost IF block.  */
+  if (ce_info->pass > 1)
+    {
+      CLEAR_HARD_REG_SET (frv_ifcvt.nested_cc_ok_rewrite);
+      for (j = CC_FIRST; j <= CC_LAST; j++)
+	if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	  {
+	    if (REGNO_REG_SET_P (df_get_live_in (then_bb), j))
+	      continue;
+
+	    if (else_bb
+		&& REGNO_REG_SET_P (df_get_live_in (else_bb), j))
+	      continue;
+
+	    if (join_bb
+		&& REGNO_REG_SET_P (df_get_live_in (join_bb), j))
+	      continue;
+
+	    SET_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite, j);
+	  }
+    }
+
+  for (j = 0; j < frv_ifcvt.cur_scratch_regs; j++)
+    frv_ifcvt.scratch_regs[j] = NULL_RTX;
+
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+
+  bb = (basic_block *) alloca ((2 + ce_info->num_multiple_test_blocks)
+			       * sizeof (basic_block));
+
+  if (join_bb)
+    {
+      unsigned int regno;
+
+      /* Remove anything live at the beginning of the join block from being
+         available for allocation.  */
+      EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (join_bb), 0, regno, rsi)
+	{
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    CLEAR_HARD_REG_BIT (tmp_reg->regs, regno);
+	}
+    }
+
+  /* Add in all of the blocks in multiple &&/|| blocks to be scanned.  */
+  num_bb = 0;
+  if (ce_info->num_multiple_test_blocks)
+    {
+      basic_block multiple_test_bb = ce_info->last_test_bb;
+
+      while (multiple_test_bb != test_bb)
+	{
+	  bb[num_bb++] = multiple_test_bb;
+	  multiple_test_bb = EDGE_PRED (multiple_test_bb, 0)->src;
+	}
+    }
+
+  /* Add in the THEN and ELSE blocks to be scanned.  */
+  bb[num_bb++] = then_bb;
+  if (else_bb)
+    bb[num_bb++] = else_bb;
+
+  sub_cond_exec_reg = NULL_RTX;
+  frv_ifcvt.num_nested_cond_exec = 0;
+
+  /* Scan all of the blocks for registers that must not be allocated.  */
+  for (j = 0; j < num_bb; j++)
+    {
+      rtx last_insn = BB_END (bb[j]);
+      rtx insn = BB_HEAD (bb[j]);
+      unsigned int regno;
+
+      if (dump_file)
+	fprintf (dump_file, "Scanning %s block %d, start %d, end %d\n",
+		 (bb[j] == else_bb) ? "else" : ((bb[j] == then_bb) ? "then" : "test"),
+		 (int) bb[j]->index,
+		 (int) INSN_UID (BB_HEAD (bb[j])),
+		 (int) INSN_UID (BB_END (bb[j])));
+
+      /* Anything live at the beginning of the block is obviously unavailable
+         for allocation.  */
+      EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (bb[j]), 0, regno, rsi)
+	{
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    CLEAR_HARD_REG_BIT (tmp_reg->regs, regno);
+	}
+
+      /* Loop through the insns in the block.  */
+      for (;;)
+	{
+	  /* Mark any new registers that are created as being unavailable for
+             allocation.  Also see if the CC register used in nested IFs can be
+             reallocated.  */
+	  if (INSN_P (insn))
+	    {
+	      rtx pattern;
+	      rtx set;
+	      int skip_nested_if = FALSE;
+
+	      for_each_rtx (&PATTERN (insn), frv_clear_registers_used,
+			    (void *)&tmp_reg->regs);
+
+	      pattern = PATTERN (insn);
+	      if (GET_CODE (pattern) == COND_EXEC)
+		{
+		  rtx reg = XEXP (COND_EXEC_TEST (pattern), 0);
+
+		  if (reg != sub_cond_exec_reg)
+		    {
+		      sub_cond_exec_reg = reg;
+		      frv_ifcvt.num_nested_cond_exec++;
+		    }
+		}
+
+	      set = single_set_pattern (pattern);
+	      if (set)
+		{
+		  rtx dest = SET_DEST (set);
+		  rtx src = SET_SRC (set);
+
+		  if (GET_CODE (dest) == REG)
+		    {
+		      int regno = REGNO (dest);
+		      enum rtx_code src_code = GET_CODE (src);
+
+		      if (CC_P (regno) && src_code == COMPARE)
+			skip_nested_if = TRUE;
+
+		      else if (CR_P (regno)
+			       && (src_code == IF_THEN_ELSE
+				   || COMPARISON_P (src)))
+			skip_nested_if = TRUE;
+		    }
+		}
+
+	      if (! skip_nested_if)
+		for_each_rtx (&PATTERN (insn), frv_clear_registers_used,
+			      (void *)&frv_ifcvt.nested_cc_ok_rewrite);
+	    }
+
+	  if (insn == last_insn)
+	    break;
+
+	  insn = NEXT_INSN (insn);
+	}
+    }
+
+  /* If this is a nested if, rewrite the CC registers that are available to
+     include the ones that can be rewritten, to increase the chance of being
+     able to allocate a paired CC/CR register combination.  */
+  if (ce_info->pass > 1)
+    {
+      for (j = CC_FIRST; j <= CC_LAST; j++)
+	if (TEST_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite, j))
+	  SET_HARD_REG_BIT (tmp_reg->regs, j);
+	else
+	  CLEAR_HARD_REG_BIT (tmp_reg->regs, j);
+    }
+
+  if (dump_file)
+    {
+      int num_gprs = 0;
+      fprintf (dump_file, "Available GPRs: ");
+
+      for (j = GPR_FIRST; j <= GPR_LAST; j++)
+	if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	  {
+	    fprintf (dump_file, " %d [%s]", j, reg_names[j]);
+	    if (++num_gprs > GPR_TEMP_NUM+2)
+	      break;
+	  }
+
+      fprintf (dump_file, "%s\nAvailable CRs:  ",
+	       (num_gprs > GPR_TEMP_NUM+2) ? " ..." : "");
+
+      for (j = CR_FIRST; j <= CR_LAST; j++)
+	if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	  fprintf (dump_file, " %d [%s]", j, reg_names[j]);
+
+      fputs ("\n", dump_file);
+
+      if (ce_info->pass > 1)
+	{
+	  fprintf (dump_file, "Modifiable CCs: ");
+	  for (j = CC_FIRST; j <= CC_LAST; j++)
+	    if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	      fprintf (dump_file, " %d [%s]", j, reg_names[j]);
+
+	  fprintf (dump_file, "\n%d nested COND_EXEC statements\n",
+		   frv_ifcvt.num_nested_cond_exec);
+	}
+    }
+
+  /* Allocate the appropriate temporary condition code register.  Try to
+     allocate the ICR/FCR register that corresponds to the ICC/FCC register so
+     that conditional cmp's can be done.  */
+  if (mode == CCmode || mode == CC_UNSmode || mode == CC_NZmode)
+    {
+      cr_class = ICR_REGS;
+      cc_class = ICC_REGS;
+      cc_first = ICC_FIRST;
+      cc_last = ICC_LAST;
+    }
+  else if (mode == CC_FPmode)
+    {
+      cr_class = FCR_REGS;
+      cc_class = FCC_REGS;
+      cc_first = FCC_FIRST;
+      cc_last = FCC_LAST;
+    }
+  else
+    {
+      cc_first = cc_last = 0;
+      cr_class = cc_class = NO_REGS;
+    }
+
+  cc = XEXP (true_expr, 0);
+  nested_cc = cr = NULL_RTX;
+  if (cc_class != NO_REGS)
+    {
+      /* For nested IFs and &&/||, see if we can find a CC and CR register pair
+         so we can execute a csubcc/caddcc/cfcmps instruction.  */
+      int cc_regno;
+
+      for (cc_regno = cc_first; cc_regno <= cc_last; cc_regno++)
+	{
+	  int cr_regno = cc_regno - CC_FIRST + CR_FIRST;
+
+	  if (TEST_HARD_REG_BIT (frv_ifcvt.tmp_reg.regs, cc_regno)
+	      && TEST_HARD_REG_BIT (frv_ifcvt.tmp_reg.regs, cr_regno))
+	    {
+	      frv_ifcvt.tmp_reg.next_reg[ (int)cr_class ] = cr_regno;
+	      cr = frv_alloc_temp_reg (tmp_reg, cr_class, CC_CCRmode, TRUE,
+				       TRUE);
+
+	      frv_ifcvt.tmp_reg.next_reg[ (int)cc_class ] = cc_regno;
+	      nested_cc = frv_alloc_temp_reg (tmp_reg, cc_class, CCmode,
+						  TRUE, TRUE);
+	      break;
+	    }
+	}
+    }
+
+  if (! cr)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Could not allocate a CR temporary register\n");
+
+      goto fail;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+	     "Will use %s for conditional execution, %s for nested comparisons\n",
+	     reg_names[ REGNO (cr)],
+	     (nested_cc) ? reg_names[ REGNO (nested_cc) ] : "<none>");
+
+  /* Set the CCR bit.  Note for integer tests, we reverse the condition so that
+     in an IF-THEN-ELSE sequence, we are testing the TRUE case against the CCR
+     bit being true.  We don't do this for floating point, because of NaNs.  */
+  code = GET_CODE (true_expr);
+  if (GET_MODE (cc) != CC_FPmode)
+    {
+      code = reverse_condition (code);
+      code_true = EQ;
+      code_false = NE;
+    }
+  else
+    {
+      code_true = NE;
+      code_false = EQ;
+    }
+
+  check_insn = gen_rtx_SET (VOIDmode, cr,
+			    gen_rtx_fmt_ee (code, CC_CCRmode, cc, const0_rtx));
+
+  /* Record the check insn to be inserted later.  */
+  frv_ifcvt_add_insn (check_insn, BB_END (test_bb), TRUE);
+
+  /* Update the tests.  */
+  frv_ifcvt.cr_reg = cr;
+  frv_ifcvt.nested_cc_reg = nested_cc;
+  *p_true = gen_rtx_fmt_ee (code_true, CC_CCRmode, cr, const0_rtx);
+  *p_false = gen_rtx_fmt_ee (code_false, CC_CCRmode, cr, const0_rtx);
+  return;
+
+  /* Fail, don't do this conditional execution.  */
+ fail:
+  *p_true = NULL_RTX;
+  *p_false = NULL_RTX;
+  if (dump_file)
+    fprintf (dump_file, "Disabling this conditional execution.\n");
+
+  return;
+}
+
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, for the basic block BB, possibly updating the tests in
+   TRUE_EXPR, and FALSE_EXPR for converting the && and || parts of if-then or
+   if-then-else code to conditional instructions.  Set either TRUE_EXPR or
+   FALSE_EXPR to a null pointer if the tests cannot be converted.  */
+
+/* p_true and p_false are given expressions of the form:
+
+	(and (eq:CC_CCR (reg:CC_CCR)
+			(const_int 0))
+	     (eq:CC (reg:CC)
+		    (const_int 0))) */
+
+void
+frv_ifcvt_modify_multiple_tests (ce_if_block *ce_info,
+                                 basic_block bb,
+                                 rtx *p_true,
+                                 rtx *p_false)
+{
+  rtx old_true = XEXP (*p_true, 0);
+  rtx old_false = XEXP (*p_false, 0);
+  rtx true_expr = XEXP (*p_true, 1);
+  rtx false_expr = XEXP (*p_false, 1);
+  rtx test_expr;
+  rtx old_test;
+  rtx cr = XEXP (old_true, 0);
+  rtx check_insn;
+  rtx new_cr = NULL_RTX;
+  rtx *p_new_cr = (rtx *)0;
+  rtx if_else;
+  rtx compare;
+  rtx cc;
+  enum reg_class cr_class;
+  enum machine_mode mode = GET_MODE (true_expr);
+  rtx (*logical_func)(rtx, rtx, rtx);
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_modify_multiple_tests, before modification for %s\ntrue insn:\n",
+	       ce_info->and_and_p ? "&&" : "||");
+
+      debug_rtx (*p_true);
+
+      fputs ("\nfalse insn:\n", stderr);
+      debug_rtx (*p_false);
+    }
+
+  if (!TARGET_MULTI_CE)
+    goto fail;
+
+  if (GET_CODE (cr) != REG)
+    goto fail;
+
+  if (mode == CCmode || mode == CC_UNSmode || mode == CC_NZmode)
+    {
+      cr_class = ICR_REGS;
+      p_new_cr = &frv_ifcvt.extra_int_cr;
+    }
+  else if (mode == CC_FPmode)
+    {
+      cr_class = FCR_REGS;
+      p_new_cr = &frv_ifcvt.extra_fp_cr;
+    }
+  else
+    goto fail;
+
+  /* Allocate a temp CR, reusing a previously allocated temp CR if we have 3 or
+     more &&/|| tests.  */
+  new_cr = *p_new_cr;
+  if (! new_cr)
+    {
+      new_cr = *p_new_cr = frv_alloc_temp_reg (&frv_ifcvt.tmp_reg, cr_class,
+					       CC_CCRmode, TRUE, TRUE);
+      if (! new_cr)
+	goto fail;
+    }
+
+  if (ce_info->and_and_p)
+    {
+      old_test = old_false;
+      test_expr = true_expr;
+      logical_func = (GET_CODE (old_true) == EQ) ? gen_andcr : gen_andncr;
+      *p_true = gen_rtx_NE (CC_CCRmode, cr, const0_rtx);
+      *p_false = gen_rtx_EQ (CC_CCRmode, cr, const0_rtx);
+    }
+  else
+    {
+      old_test = old_false;
+      test_expr = false_expr;
+      logical_func = (GET_CODE (old_false) == EQ) ? gen_orcr : gen_orncr;
+      *p_true = gen_rtx_EQ (CC_CCRmode, cr, const0_rtx);
+      *p_false = gen_rtx_NE (CC_CCRmode, cr, const0_rtx);
+    }
+
+  /* First add the andcr/andncr/orcr/orncr, which will be added after the
+     conditional check instruction, due to frv_ifcvt_add_insn being a LIFO
+     stack.  */
+  frv_ifcvt_add_insn ((*logical_func) (cr, cr, new_cr), BB_END (bb), TRUE);
+
+  /* Now add the conditional check insn.  */
+  cc = XEXP (test_expr, 0);
+  compare = gen_rtx_fmt_ee (GET_CODE (test_expr), CC_CCRmode, cc, const0_rtx);
+  if_else = gen_rtx_IF_THEN_ELSE (CC_CCRmode, old_test, compare, const0_rtx);
+
+  check_insn = gen_rtx_SET (VOIDmode, new_cr, if_else);
+
+  /* Add the new check insn to the list of check insns that need to be
+     inserted.  */
+  frv_ifcvt_add_insn (check_insn, BB_END (bb), TRUE);
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      fputs ("\n:::::::::: frv_ifcvt_modify_multiple_tests, after modification\ntrue insn:\n",
+	     stderr);
+
+      debug_rtx (*p_true);
+
+      fputs ("\nfalse insn:\n", stderr);
+      debug_rtx (*p_false);
+    }
+
+  return;
+
+ fail:
+  *p_true = *p_false = NULL_RTX;
+
+  /* If we allocated a CR register, release it.  */
+  if (new_cr)
+    {
+      CLEAR_HARD_REG_BIT (frv_ifcvt.tmp_reg.regs, REGNO (new_cr));
+      *p_new_cr = NULL_RTX;
+    }
+
+  if (TARGET_DEBUG_COND_EXEC)
+    fputs ("\n:::::::::: frv_ifcvt_modify_multiple_tests, failed.\n", stderr);
+
+  return;
+}
+
+
+/* Return a register which will be loaded with a value if an IF block is
+   converted to conditional execution.  This is used to rewrite instructions
+   that use constants to ones that just use registers.  */
+
+static rtx
+frv_ifcvt_load_value (rtx value, rtx insn ATTRIBUTE_UNUSED)
+{
+  int num_alloc = frv_ifcvt.cur_scratch_regs;
+  int i;
+  rtx reg;
+
+  /* We know gr0 == 0, so replace any errant uses.  */
+  if (value == const0_rtx)
+    return gen_rtx_REG (SImode, GPR_FIRST);
+
+  /* First search all registers currently loaded to see if we have an
+     applicable constant.  */
+  if (CONSTANT_P (value)
+      || (GET_CODE (value) == REG && REGNO (value) == LR_REGNO))
+    {
+      for (i = 0; i < num_alloc; i++)
+	{
+	  if (rtx_equal_p (SET_SRC (frv_ifcvt.scratch_regs[i]), value))
+	    return SET_DEST (frv_ifcvt.scratch_regs[i]);
+	}
+    }
+
+  /* Have we exhausted the number of registers available?  */
+  if (num_alloc >= GPR_TEMP_NUM)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Too many temporary registers allocated\n");
+
+      return NULL_RTX;
+    }
+
+  /* Allocate the new register.  */
+  reg = frv_alloc_temp_reg (&frv_ifcvt.tmp_reg, GPR_REGS, SImode, TRUE, TRUE);
+  if (! reg)
+    {
+      if (dump_file)
+	fputs ("Could not find a scratch register\n", dump_file);
+
+      return NULL_RTX;
+    }
+
+  frv_ifcvt.cur_scratch_regs++;
+  frv_ifcvt.scratch_regs[num_alloc] = gen_rtx_SET (VOIDmode, reg, value);
+
+  if (dump_file)
+    {
+      if (GET_CODE (value) == CONST_INT)
+	fprintf (dump_file, "Register %s will hold %ld\n",
+		 reg_names[ REGNO (reg)], (long)INTVAL (value));
+
+      else if (GET_CODE (value) == REG && REGNO (value) == LR_REGNO)
+	fprintf (dump_file, "Register %s will hold LR\n",
+		 reg_names[ REGNO (reg)]);
+
+      else
+	fprintf (dump_file, "Register %s will hold a saved value\n",
+		 reg_names[ REGNO (reg)]);
+    }
+
+  return reg;
+}
+
+
+/* Update a MEM used in conditional code that might contain an offset to put
+   the offset into a scratch register, so that the conditional load/store
+   operations can be used.  This function returns the original pointer if the
+   MEM is valid to use in conditional code, NULL if we can't load up the offset
+   into a temporary register, or the new MEM if we were successful.  */
+
+static rtx
+frv_ifcvt_rewrite_mem (rtx mem, enum machine_mode mode, rtx insn)
+{
+  rtx addr = XEXP (mem, 0);
+
+  if (!frv_legitimate_address_p_1 (mode, addr, reload_completed, TRUE, FALSE))
+    {
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx addr_op0 = XEXP (addr, 0);
+	  rtx addr_op1 = XEXP (addr, 1);
+
+	  if (GET_CODE (addr_op0) == REG && CONSTANT_P (addr_op1))
+	    {
+	      rtx reg = frv_ifcvt_load_value (addr_op1, insn);
+	      if (!reg)
+		return NULL_RTX;
+
+	      addr = gen_rtx_PLUS (Pmode, addr_op0, reg);
+	    }
+
+	  else
+	    return NULL_RTX;
+	}
+
+      else if (CONSTANT_P (addr))
+	addr = frv_ifcvt_load_value (addr, insn);
+
+      else
+	return NULL_RTX;
+
+      if (addr == NULL_RTX)
+	return NULL_RTX;
+
+      else if (XEXP (mem, 0) != addr)
+	return change_address (mem, mode, addr);
+    }
+
+  return mem;
+}
+
+
+/* Given a PATTERN, return a SET expression if this PATTERN has only a single
+   SET, possibly conditionally executed.  It may also have CLOBBERs, USEs.  */
+
+static rtx
+single_set_pattern (rtx pattern)
+{
+  rtx set;
+  int i;
+
+  if (GET_CODE (pattern) == COND_EXEC)
+    pattern = COND_EXEC_CODE (pattern);
+
+  if (GET_CODE (pattern) == SET)
+    return pattern;
+
+  else if (GET_CODE (pattern) == PARALLEL)
+    {
+      for (i = 0, set = 0; i < XVECLEN (pattern, 0); i++)
+	{
+	  rtx sub = XVECEXP (pattern, 0, i);
+
+	  switch (GET_CODE (sub))
+	    {
+	    case USE:
+	    case CLOBBER:
+	      break;
+
+	    case SET:
+	      if (set)
+		return 0;
+	      else
+		set = sub;
+	      break;
+
+	    default:
+	      return 0;
+	    }
+	}
+      return set;
+    }
+
+  return 0;
+}
+
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO with the new PATTERN in INSN.  If PATTERN is a null
+   pointer after the IFCVT_MODIFY_INSN macro executes, it is assumed that that
+   insn cannot be converted to be executed conditionally.  */
+
+rtx
+frv_ifcvt_modify_insn (ce_if_block *ce_info,
+                       rtx pattern,
+                       rtx insn)
+{
+  rtx orig_ce_pattern = pattern;
+  rtx set;
+  rtx op0;
+  rtx op1;
+  rtx test;
+
+  gcc_assert (GET_CODE (pattern) == COND_EXEC);
+
+  test = COND_EXEC_TEST (pattern);
+  if (GET_CODE (test) == AND)
+    {
+      rtx cr = frv_ifcvt.cr_reg;
+      rtx test_reg;
+
+      op0 = XEXP (test, 0);
+      if (! rtx_equal_p (cr, XEXP (op0, 0)))
+	goto fail;
+
+      op1 = XEXP (test, 1);
+      test_reg = XEXP (op1, 0);
+      if (GET_CODE (test_reg) != REG)
+	goto fail;
+
+      /* Is this the first nested if block in this sequence?  If so, generate
+         an andcr or andncr.  */
+      if (! frv_ifcvt.last_nested_if_cr)
+	{
+	  rtx and_op;
+
+	  frv_ifcvt.last_nested_if_cr = test_reg;
+	  if (GET_CODE (op0) == NE)
+	    and_op = gen_andcr (test_reg, cr, test_reg);
+	  else
+	    and_op = gen_andncr (test_reg, cr, test_reg);
+
+	  frv_ifcvt_add_insn (and_op, insn, TRUE);
+	}
+
+      /* If this isn't the first statement in the nested if sequence, see if we
+         are dealing with the same register.  */
+      else if (! rtx_equal_p (test_reg, frv_ifcvt.last_nested_if_cr))
+	goto fail;
+
+      COND_EXEC_TEST (pattern) = test = op1;
+    }
+
+  /* If this isn't a nested if, reset state variables.  */
+  else
+    {
+      frv_ifcvt.last_nested_if_cr = NULL_RTX;
+    }
+
+  set = single_set_pattern (pattern);
+  if (set)
+    {
+      rtx dest = SET_DEST (set);
+      rtx src = SET_SRC (set);
+      enum machine_mode mode = GET_MODE (dest);
+
+      /* Check for normal binary operators.  */
+      if (mode == SImode && ARITHMETIC_P (src))
+	{
+	  op0 = XEXP (src, 0);
+	  op1 = XEXP (src, 1);
+
+	  if (integer_register_operand (op0, SImode) && CONSTANT_P (op1))
+	    {
+	      op1 = frv_ifcvt_load_value (op1, insn);
+	      if (op1)
+		COND_EXEC_CODE (pattern)
+		  = gen_rtx_SET (VOIDmode, dest, gen_rtx_fmt_ee (GET_CODE (src),
+								 GET_MODE (src),
+								 op0, op1));
+	      else
+		goto fail;
+	    }
+	}
+
+      /* For multiply by a constant, we need to handle the sign extending
+         correctly.  Add a USE of the value after the multiply to prevent flow
+         from cratering because only one register out of the two were used.  */
+      else if (mode == DImode && GET_CODE (src) == MULT)
+	{
+	  op0 = XEXP (src, 0);
+	  op1 = XEXP (src, 1);
+	  if (GET_CODE (op0) == SIGN_EXTEND && GET_CODE (op1) == CONST_INT)
+	    {
+	      op1 = frv_ifcvt_load_value (op1, insn);
+	      if (op1)
+		{
+		  op1 = gen_rtx_SIGN_EXTEND (DImode, op1);
+		  COND_EXEC_CODE (pattern)
+		    = gen_rtx_SET (VOIDmode, dest,
+				   gen_rtx_MULT (DImode, op0, op1));
+		}
+	      else
+		goto fail;
+	    }
+
+	  frv_ifcvt_add_insn (gen_use (dest), insn, FALSE);
+	}
+
+      /* If we are just loading a constant created for a nested conditional
+         execution statement, just load the constant without any conditional
+         execution, since we know that the constant will not interfere with any
+         other registers.  */
+      else if (frv_ifcvt.scratch_insns_bitmap
+	       && bitmap_bit_p (frv_ifcvt.scratch_insns_bitmap,
+				INSN_UID (insn))
+	       && REG_P (SET_DEST (set))
+	       /* We must not unconditionally set a scratch reg chosen
+		  for a nested if-converted block if its incoming
+		  value from the TEST block (or the result of the THEN
+		  branch) could/should propagate to the JOIN block.
+		  It suffices to test whether the register is live at
+		  the JOIN point: if it's live there, we can infer
+		  that we set it in the former JOIN block of the
+		  nested if-converted block (otherwise it wouldn't
+		  have been available as a scratch register), and it
+		  is either propagated through or set in the other
+		  conditional block.  It's probably not worth trying
+		  to catch the latter case, and it could actually
+		  limit scheduling of the combined block quite
+		  severely.  */
+	       && ce_info->join_bb
+	       && ! (REGNO_REG_SET_P (df_get_live_in (ce_info->join_bb),
+				      REGNO (SET_DEST (set))))
+	       /* Similarly, we must not unconditionally set a reg
+		  used as scratch in the THEN branch if the same reg
+		  is live in the ELSE branch.  */
+	       && (! ce_info->else_bb
+		   || BLOCK_FOR_INSN (insn) == ce_info->else_bb
+		   || ! (REGNO_REG_SET_P (df_get_live_in (ce_info->else_bb),
+					  REGNO (SET_DEST (set))))))
+	pattern = set;
+
+      else if (mode == QImode || mode == HImode || mode == SImode
+	       || mode == SFmode)
+	{
+	  int changed_p = FALSE;
+
+	  /* Check for just loading up a constant */
+	  if (CONSTANT_P (src) && integer_register_operand (dest, mode))
+	    {
+	      src = frv_ifcvt_load_value (src, insn);
+	      if (!src)
+		goto fail;
+
+	      changed_p = TRUE;
+	    }
+
+	  /* See if we need to fix up stores */
+	  if (GET_CODE (dest) == MEM)
+	    {
+	      rtx new_mem = frv_ifcvt_rewrite_mem (dest, mode, insn);
+
+	      if (!new_mem)
+		goto fail;
+
+	      else if (new_mem != dest)
+		{
+		  changed_p = TRUE;
+		  dest = new_mem;
+		}
+	    }
+
+	  /* See if we need to fix up loads */
+	  if (GET_CODE (src) == MEM)
+	    {
+	      rtx new_mem = frv_ifcvt_rewrite_mem (src, mode, insn);
+
+	      if (!new_mem)
+		goto fail;
+
+	      else if (new_mem != src)
+		{
+		  changed_p = TRUE;
+		  src = new_mem;
+		}
+	    }
+
+	  /* If either src or destination changed, redo SET.  */
+	  if (changed_p)
+	    COND_EXEC_CODE (pattern) = gen_rtx_SET (VOIDmode, dest, src);
+	}
+
+      /* Rewrite a nested set cccr in terms of IF_THEN_ELSE.  Also deal with
+         rewriting the CC register to be the same as the paired CC/CR register
+         for nested ifs.  */
+      else if (mode == CC_CCRmode && COMPARISON_P (src))
+	{
+	  int regno = REGNO (XEXP (src, 0));
+	  rtx if_else;
+
+	  if (ce_info->pass > 1
+	      && regno != (int)REGNO (frv_ifcvt.nested_cc_reg)
+	      && TEST_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite, regno))
+	    {
+	      src = gen_rtx_fmt_ee (GET_CODE (src),
+				    CC_CCRmode,
+				    frv_ifcvt.nested_cc_reg,
+				    XEXP (src, 1));
+	    }
+
+	  if_else = gen_rtx_IF_THEN_ELSE (CC_CCRmode, test, src, const0_rtx);
+	  pattern = gen_rtx_SET (VOIDmode, dest, if_else);
+	}
+
+      /* Remap a nested compare instruction to use the paired CC/CR reg.  */
+      else if (ce_info->pass > 1
+	       && GET_CODE (dest) == REG
+	       && CC_P (REGNO (dest))
+	       && REGNO (dest) != REGNO (frv_ifcvt.nested_cc_reg)
+	       && TEST_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite,
+				     REGNO (dest))
+	       && GET_CODE (src) == COMPARE)
+	{
+	  PUT_MODE (frv_ifcvt.nested_cc_reg, GET_MODE (dest));
+	  COND_EXEC_CODE (pattern)
+	    = gen_rtx_SET (VOIDmode, frv_ifcvt.nested_cc_reg, copy_rtx (src));
+	}
+    }
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      rtx orig_pattern = PATTERN (insn);
+
+      PATTERN (insn) = pattern;
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_modify_insn: pass = %d, insn after modification:\n",
+	       ce_info->pass);
+
+      debug_rtx (insn);
+      PATTERN (insn) = orig_pattern;
+    }
+
+  return pattern;
+
+ fail:
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      rtx orig_pattern = PATTERN (insn);
+
+      PATTERN (insn) = orig_ce_pattern;
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_modify_insn: pass = %d, insn could not be modified:\n",
+	       ce_info->pass);
+
+      debug_rtx (insn);
+      PATTERN (insn) = orig_pattern;
+    }
+
+  return NULL_RTX;
+}
+
+
+/* A C expression to perform any final machine dependent modifications in
+   converting code to conditional execution in the code described by the
+   conditional if information CE_INFO.  */
+
+void
+frv_ifcvt_modify_final (ce_if_block *ce_info ATTRIBUTE_UNUSED)
+{
+  rtx existing_insn;
+  rtx check_insn;
+  rtx p = frv_ifcvt.added_insns_list;
+  int i;
+
+  /* Loop inserting the check insns.  The last check insn is the first test,
+     and is the appropriate place to insert constants.  */
+  gcc_assert (p);
+
+  do
+    {
+      rtx check_and_insert_insns = XEXP (p, 0);
+      rtx old_p = p;
+
+      check_insn = XEXP (check_and_insert_insns, 0);
+      existing_insn = XEXP (check_and_insert_insns, 1);
+      p = XEXP (p, 1);
+
+      /* The jump bit is used to say that the new insn is to be inserted BEFORE
+         the existing insn, otherwise it is to be inserted AFTER.  */
+      if (check_and_insert_insns->jump)
+	{
+	  emit_insn_before (check_insn, existing_insn);
+	  check_and_insert_insns->jump = 0;
+	}
+      else
+	emit_insn_after (check_insn, existing_insn);
+
+      free_EXPR_LIST_node (check_and_insert_insns);
+      free_EXPR_LIST_node (old_p);
+    }
+  while (p != NULL_RTX);
+
+  /* Load up any constants needed into temp gprs */
+  for (i = 0; i < frv_ifcvt.cur_scratch_regs; i++)
+    {
+      rtx insn = emit_insn_before (frv_ifcvt.scratch_regs[i], existing_insn);
+      if (! frv_ifcvt.scratch_insns_bitmap)
+	frv_ifcvt.scratch_insns_bitmap = BITMAP_ALLOC (NULL);
+      bitmap_set_bit (frv_ifcvt.scratch_insns_bitmap, INSN_UID (insn));
+      frv_ifcvt.scratch_regs[i] = NULL_RTX;
+    }
+
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+}
+
+
+/* A C expression to cancel any machine dependent modifications in converting
+   code to conditional execution in the code described by the conditional if
+   information CE_INFO.  */
+
+void
+frv_ifcvt_modify_cancel (ce_if_block *ce_info ATTRIBUTE_UNUSED)
+{
+  int i;
+  rtx p = frv_ifcvt.added_insns_list;
+
+  /* Loop freeing up the EXPR_LIST's allocated.  */
+  while (p != NULL_RTX)
+    {
+      rtx check_and_jump = XEXP (p, 0);
+      rtx old_p = p;
+
+      p = XEXP (p, 1);
+      free_EXPR_LIST_node (check_and_jump);
+      free_EXPR_LIST_node (old_p);
+    }
+
+  /* Release any temporary gprs allocated.  */
+  for (i = 0; i < frv_ifcvt.cur_scratch_regs; i++)
+    frv_ifcvt.scratch_regs[i] = NULL_RTX;
+
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+  return;
+}
+
+/* A C expression for the size in bytes of the trampoline, as an integer.
+   The template is:
+
+	setlo #0, <jmp_reg>
+	setlo #0, <static_chain>
+	sethi #0, <jmp_reg>
+	sethi #0, <static_chain>
+	jmpl @(gr0,<jmp_reg>) */
+
+int
+frv_trampoline_size (void)
+{
+  if (TARGET_FDPIC)
+    /* Allocate room for the function descriptor and the lddi
+       instruction.  */
+    return 8 + 6 * 4;
+  return 5 /* instructions */ * 4 /* instruction size.  */;
+}
+
+
+/* A C statement to initialize the variable parts of a trampoline.  ADDR is an
+   RTX for the address of the trampoline; FNADDR is an RTX for the address of
+   the nested function; STATIC_CHAIN is an RTX for the static chain value that
+   should be passed to the function when it is called.
+
+   The template is:
+
+	setlo #0, <jmp_reg>
+	setlo #0, <static_chain>
+	sethi #0, <jmp_reg>
+	sethi #0, <static_chain>
+	jmpl @(gr0,<jmp_reg>) */
+
+static void
+frv_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx addr = XEXP (m_tramp, 0);
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx sc_reg = force_reg (Pmode, static_chain);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (SImode, "__trampoline_setup"),
+		     LCT_NORMAL, VOIDmode, 4,
+		     addr, Pmode,
+		     GEN_INT (frv_trampoline_size ()), SImode,
+		     fnaddr, Pmode,
+		     sc_reg, Pmode);
+}
+
+
+/* Many machines have some registers that cannot be copied directly to or from
+   memory or even from other types of registers.  An example is the `MQ'
+   register, which on most machines, can only be copied to or from general
+   registers, but not memory.  Some machines allow copying all registers to and
+   from memory, but require a scratch register for stores to some memory
+   locations (e.g., those with symbolic address on the RT, and those with
+   certain symbolic address on the SPARC when compiling PIC).  In some cases,
+   both an intermediate and a scratch register are required.
+
+   You should define these macros to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   RCLASS in MODE requires an intermediate register, you should define
+   `SECONDARY_INPUT_RELOAD_CLASS' to return the largest register class all of
+   whose registers can be used as intermediate registers or scratch registers.
+
+   If copying a register RCLASS in MODE to X requires an intermediate or scratch
+   register, `SECONDARY_OUTPUT_RELOAD_CLASS' should be defined to return the
+   largest register class required.  If the requirements for input and output
+   reloads are the same, the macro `SECONDARY_RELOAD_CLASS' should be used
+   instead of defining both macros identically.
+
+   The values returned by these macros are often `GENERAL_REGS'.  Return
+   `NO_REGS' if no spare register is needed; i.e., if X can be directly copied
+   to or from a register of RCLASS in MODE without requiring a scratch register.
+   Do not define this macro if it would always return `NO_REGS'.
+
+   If a scratch register is required (either with or without an intermediate
+   register), you should define patterns for `reload_inM' or `reload_outM', as
+   required..  These patterns, which will normally be implemented with a
+   `define_expand', should be similar to the `movM' patterns, except that
+   operand 2 is the scratch register.
+
+   Define constraints for the reload register and scratch register that contain
+   a single register class.  If the original reload register (whose class is
+   RCLASS) can meet the constraint given in the pattern, the value returned by
+   these macros is used for the class of the scratch register.  Otherwise, two
+   additional reload registers are required.  Their classes are obtained from
+   the constraints in the insn pattern.
+
+   X might be a pseudo-register or a `subreg' of a pseudo-register, which could
+   either be in a hard register or in memory.  Use `true_regnum' to find out;
+   it will return -1 if the pseudo is in memory and the hard register number if
+   it is in a register.
+
+   These macros should not be used in the case where a particular class of
+   registers can only be copied to memory and not to another class of
+   registers.  In that case, secondary reload registers are not needed and
+   would not be helpful.  Instead, a stack location must be used to perform the
+   copy and the `movM' pattern should use memory as an intermediate storage.
+   This case often occurs between floating-point and general registers.  */
+
+enum reg_class
+frv_secondary_reload_class (enum reg_class rclass,
+                            enum machine_mode mode ATTRIBUTE_UNUSED,
+                            rtx x)
+{
+  enum reg_class ret;
+
+  switch (rclass)
+    {
+    default:
+      ret = NO_REGS;
+      break;
+
+      /* Accumulators/Accumulator guard registers need to go through floating
+         point registers.  */
+    case QUAD_REGS:
+    case GPR_REGS:
+      ret = NO_REGS;
+      if (x && GET_CODE (x) == REG)
+	{
+	  int regno = REGNO (x);
+
+	  if (ACC_P (regno) || ACCG_P (regno))
+	    ret = FPR_REGS;
+	}
+      break;
+
+      /* Nonzero constants should be loaded into an FPR through a GPR.  */
+    case QUAD_FPR_REGS:
+      if (x && CONSTANT_P (x) && !ZERO_P (x))
+	ret = GPR_REGS;
+      else
+	ret = NO_REGS;
+      break;
+
+      /* All of these types need gpr registers.  */
+    case ICC_REGS:
+    case FCC_REGS:
+    case CC_REGS:
+    case ICR_REGS:
+    case FCR_REGS:
+    case CR_REGS:
+    case LCR_REG:
+    case LR_REG:
+      ret = GPR_REGS;
+      break;
+
+      /* The accumulators need fpr registers.  */
+    case QUAD_ACC_REGS:
+    case ACCG_REGS:
+      ret = FPR_REGS;
+      break;
+    }
+
+  return ret;
+}
+
+/* This hook exists to catch the case where secondary_reload_class() is
+   called from init_reg_autoinc() in regclass.c - before the reload optabs
+   have been initialised.  */
+   
+static reg_class_t
+frv_secondary_reload (bool in_p, rtx x, reg_class_t reload_class_i,
+		      enum machine_mode reload_mode,
+		      secondary_reload_info * sri)
+{
+  enum reg_class rclass = NO_REGS;
+  enum reg_class reload_class = (enum reg_class) reload_class_i;
+
+  if (sri->prev_sri && sri->prev_sri->t_icode != CODE_FOR_nothing)
+    {
+      sri->icode = sri->prev_sri->t_icode;
+      return NO_REGS;
+    }
+
+  rclass = frv_secondary_reload_class (reload_class, reload_mode, x);
+
+  if (rclass != NO_REGS)
+    {
+      enum insn_code icode
+	= direct_optab_handler (in_p ? reload_in_optab : reload_out_optab,
+				reload_mode);
+      if (icode == 0)
+	{
+	  /* This happens when then the reload_[in|out]_optabs have
+	     not been initialised.  */
+	  sri->t_icode = CODE_FOR_nothing;
+	  return rclass;
+	}
+    }
+
+  /* Fall back to the default secondary reload handler.  */
+  return default_secondary_reload (in_p, x, reload_class, reload_mode, sri);
+
+}
+
+/* Worker function for TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+frv_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+    default:
+      break;
+
+    case GR8_REGS:
+    case GR9_REGS:
+    case GR89_REGS:
+    case FDPIC_FPTR_REGS:
+    case FDPIC_REGS:
+    case ICC_REGS:
+    case FCC_REGS:
+    case CC_REGS:
+    case ICR_REGS:
+    case FCR_REGS:
+    case CR_REGS:
+    case LCR_REG:
+    case LR_REG:
+    case SPR_REGS:
+    case QUAD_ACC_REGS:
+    case ACCG_REGS:
+      return true;
+    }
+
+  return false;
+}
+
+
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  GCC uses this
+   value instead of the value in `BIGGEST_ALIGNMENT' or
+   `BIGGEST_FIELD_ALIGNMENT', if defined, for structure fields only.  */
+
+/* The definition type of the bit field data is either char, short, long or
+   long long. The maximum bit size is the number of bits of its own type.
+
+   The bit field data is assigned to a storage unit that has an adequate size
+   for bit field data retention and is located at the smallest address.
+
+   Consecutive bit field data are packed at consecutive bits having the same
+   storage unit, with regard to the type, beginning with the MSB and continuing
+   toward the LSB.
+
+   If a field to be assigned lies over a bit field type boundary, its
+   assignment is completed by aligning it with a boundary suitable for the
+   type.
+
+   When a bit field having a bit length of 0 is declared, it is forcibly
+   assigned to the next storage unit.
+
+   e.g)
+	struct {
+		int	a:2;
+		int	b:6;
+		char	c:4;
+		int	d:10;
+		int	 :0;
+		int	f:2;
+	} x;
+
+		+0	  +1	    +2	      +3
+	&x	00000000  00000000  00000000  00000000
+		MLM----L
+		a    b
+	&x+4	00000000  00000000  00000000  00000000
+		M--L
+		c
+	&x+8	00000000  00000000  00000000  00000000
+		M----------L
+		d
+	&x+12	00000000  00000000  00000000  00000000
+		ML
+		f
+*/
+
+int
+frv_adjust_field_align (tree field, int computed)
+{
+  /* Make sure that the bitfield is not wider than the type.  */
+  if (DECL_BIT_FIELD (field)
+      && !DECL_ARTIFICIAL (field))
+    {
+      tree parent = DECL_CONTEXT (field);
+      tree prev = NULL_TREE;
+      tree cur;
+
+      for (cur = TYPE_FIELDS (parent); cur && cur != field; cur = DECL_CHAIN (cur))
+	{
+	  if (TREE_CODE (cur) != FIELD_DECL)
+	    continue;
+
+	  prev = cur;
+	}
+
+      gcc_assert (cur);
+
+      /* If this isn't a :0 field and if the previous element is a bitfield
+	 also, see if the type is different, if so, we will need to align the
+	 bit-field to the next boundary.  */
+      if (prev
+	  && ! DECL_PACKED (field)
+	  && ! integer_zerop (DECL_SIZE (field))
+	  && DECL_BIT_FIELD_TYPE (field) != DECL_BIT_FIELD_TYPE (prev))
+	{
+	  int prev_align = TYPE_ALIGN (TREE_TYPE (prev));
+	  int cur_align  = TYPE_ALIGN (TREE_TYPE (field));
+	  computed = (prev_align > cur_align) ? prev_align : cur_align;
+	}
+    }
+
+  return computed;
+}
+
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  For a machine where all registers are equivalent, a suitable
+   definition is
+
+        #define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+   It is not necessary for this macro to check for the numbers of fixed
+   registers, because the allocation mechanism considers them to be always
+   occupied.
+
+   On some machines, double-precision values must be kept in even/odd register
+   pairs.  The way to implement that is to define this macro to reject odd
+   register numbers for such modes.
+
+   The minimum requirement for a mode to be OK in a register is that the
+   `movMODE' instruction pattern support moves between the register and any
+   other hard register for which the mode is OK; and that moving a value into
+   the register and back out not alter it.
+
+   Since the same instruction used to move `SImode' will work for all narrower
+   integer modes, it is not necessary on any machine for `HARD_REGNO_MODE_OK'
+   to distinguish between these modes, provided you define patterns `movhi',
+   etc., to take advantage of this.  This is useful because of the interaction
+   between `HARD_REGNO_MODE_OK' and `MODES_TIEABLE_P'; it is very desirable for
+   all integer modes to be tieable.
+
+   Many machines have special registers for floating point arithmetic.  Often
+   people assume that floating point machine modes are allowed only in floating
+   point registers.  This is not true.  Any registers that can hold integers
+   can safely *hold* a floating point machine mode, whether or not floating
+   arithmetic can be done on it in those registers.  Integer move instructions
+   can be used to move the values.
+
+   On some machines, though, the converse is true: fixed-point machine modes
+   may not go in floating registers.  This is true if the floating registers
+   normalize any value stored in them, because storing a non-floating value
+   there would garble it.  In this case, `HARD_REGNO_MODE_OK' should reject
+   fixed-point machine modes in floating registers.  But if the floating
+   registers do not automatically normalize, if you can store any bit pattern
+   in one and retrieve it unchanged without a trap, then any machine mode may
+   go in a floating register, so you can define this macro to say so.
+
+   The primary significance of special floating registers is rather that they
+   are the registers acceptable in floating point arithmetic instructions.
+   However, this is of no concern to `HARD_REGNO_MODE_OK'.  You handle it by
+   writing the proper constraints for those instructions.
+
+   On some machines, the floating registers are especially slow to access, so
+   that it is better to store a value in a stack frame than in such a register
+   if floating point arithmetic is not being done.  As long as the floating
+   registers are not in class `GENERAL_REGS', they will not be used unless some
+   pattern's constraint asks for one.  */
+
+int
+frv_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  int base;
+  int mask;
+
+  switch (mode)
+    {
+    case CCmode:
+    case CC_UNSmode:
+    case CC_NZmode:
+      return ICC_P (regno) || GPR_P (regno);
+
+    case CC_CCRmode:
+      return CR_P (regno) || GPR_P (regno);
+
+    case CC_FPmode:
+      return FCC_P (regno) || GPR_P (regno);
+
+    default:
+      break;
+    }
+
+  /* Set BASE to the first register in REGNO's class.  Set MASK to the
+     bits that must be clear in (REGNO - BASE) for the register to be
+     well-aligned.  */
+  if (INTEGRAL_MODE_P (mode) || FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
+    {
+      if (ACCG_P (regno))
+	{
+	  /* ACCGs store one byte.  Two-byte quantities must start in
+	     even-numbered registers, four-byte ones in registers whose
+	     numbers are divisible by four, and so on.  */
+	  base = ACCG_FIRST;
+	  mask = GET_MODE_SIZE (mode) - 1;
+	}
+      else
+	{
+	   /* The other registers store one word.  */
+	  if (GPR_P (regno) || regno == AP_FIRST)
+	    base = GPR_FIRST;
+
+	  else if (FPR_P (regno))
+	    base = FPR_FIRST;
+
+	  else if (ACC_P (regno))
+	    base = ACC_FIRST;
+
+	  else if (SPR_P (regno))
+	    return mode == SImode;
+
+	  /* Fill in the table.  */
+	  else
+	    return 0;
+
+	  /* Anything smaller than an SI is OK in any word-sized register.  */
+	  if (GET_MODE_SIZE (mode) < 4)
+	    return 1;
+
+	  mask = (GET_MODE_SIZE (mode) / 4) - 1;
+	}
+      return (((regno - base) & mask) == 0);
+    }
+
+  return 0;
+}
+
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.
+
+   On a machine where all registers are exactly one word, a suitable definition
+   of this macro is
+
+        #define HARD_REGNO_NREGS(REGNO, MODE)            \
+           ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)  \
+            / UNITS_PER_WORD))  */
+
+/* On the FRV, make the CC_FP mode take 3 words in the integer registers, so
+   that we can build the appropriate instructions to properly reload the
+   values.  Also, make the byte-sized accumulator guards use one guard
+   for each byte.  */
+
+int
+frv_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  if (ACCG_P (regno))
+    return GET_MODE_SIZE (mode);
+  else
+    return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+
+/* A C expression for the maximum number of consecutive registers of
+   class RCLASS needed to hold a value of mode MODE.
+
+   This is closely related to the macro `HARD_REGNO_NREGS'.  In fact, the value
+   of the macro `CLASS_MAX_NREGS (RCLASS, MODE)' should be the maximum value of
+   `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class RCLASS.
+
+   This macro helps control the handling of multiple-word values in
+   the reload pass.
+
+   This declaration is required.  */
+
+int
+frv_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+{
+  if (rclass == ACCG_REGS)
+    /* An N-byte value requires N accumulator guards.  */
+    return GET_MODE_SIZE (mode);
+  else
+    return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand on the target machine.  You can assume that X satisfies
+   `CONSTANT_P', so you need not check this.  In fact, `1' is a suitable
+   definition for this macro on machines where anything `CONSTANT_P' is valid.  */
+
+static bool
+frv_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  /* frv_cannot_force_const_mem always returns true for FDPIC.  This
+     means that the move expanders will be expected to deal with most
+     kinds of constant, regardless of what we return here.
+
+     However, among its other duties, frv_legitimate_constant_p decides whether
+     a constant can be entered into reg_equiv_constant[].  If we return true,
+     reload can create new instances of the constant whenever it likes.
+
+     The idea is therefore to accept as many constants as possible (to give
+     reload more freedom) while rejecting constants that can only be created
+     at certain times.  In particular, anything with a symbolic component will
+     require use of the pseudo FDPIC register, which is only available before
+     reload.  */
+  if (TARGET_FDPIC)
+    return LEGITIMATE_PIC_OPERAND_P (x);
+
+  /* All of the integer constants are ok.  */
+  if (GET_CODE (x) != CONST_DOUBLE)
+    return TRUE;
+
+  /* double integer constants are ok.  */
+  if (GET_MODE (x) == VOIDmode || mode == DImode)
+    return TRUE;
+
+  /* 0 is always ok.  */
+  if (x == CONST0_RTX (mode))
+    return TRUE;
+
+  /* If floating point is just emulated, allow any constant, since it will be
+     constructed in the GPRs.  */
+  if (!TARGET_HAS_FPRS)
+    return TRUE;
+
+  if (mode == DFmode && !TARGET_DOUBLE)
+    return TRUE;
+
+  /* Otherwise store the constant away and do a load.  */
+  return FALSE;
+}
+
+/* Implement SELECT_CC_MODE.  Choose CC_FP for floating-point comparisons,
+   CC_NZ for comparisons against zero in which a single Z or N flag test
+   is enough, CC_UNS for other unsigned comparisons, and CC for other
+   signed comparisons.  */
+
+enum machine_mode
+frv_select_cc_mode (enum rtx_code code, rtx x, rtx y)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    return CC_FPmode;
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LT:
+    case GE:
+      return y == const0_rtx ? CC_NZmode : CCmode;
+
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      return y == const0_rtx ? CC_NZmode : CC_UNSmode;
+
+    default:
+      return CCmode;
+    }
+}
+
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+#define HIGH_COST 40
+#define MEDIUM_COST 3
+#define LOW_COST 1
+
+static int
+frv_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			reg_class_t from, reg_class_t to)
+{
+  switch (from)
+    {
+    default:
+      break;
+
+    case QUAD_REGS:
+    case GPR_REGS:
+    case GR8_REGS:
+    case GR9_REGS:
+    case GR89_REGS:
+    case FDPIC_REGS:
+    case FDPIC_FPTR_REGS:
+    case FDPIC_CALL_REGS:
+
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_REGS:	
+	case GPR_REGS:
+	case GR8_REGS:
+	case GR9_REGS:
+	case GR89_REGS:
+	case FDPIC_REGS:
+	case FDPIC_FPTR_REGS:
+	case FDPIC_CALL_REGS:
+
+	  return LOW_COST;
+
+	case FPR_REGS:
+	  return LOW_COST;
+
+	case LCR_REG:
+	case LR_REG:
+	case SPR_REGS:
+	  return LOW_COST;
+	}
+
+    case QUAD_FPR_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_REGS:
+	case GPR_REGS:
+	case GR8_REGS:
+	case GR9_REGS:
+	case GR89_REGS:
+	case FDPIC_REGS:
+	case FDPIC_FPTR_REGS:
+	case FDPIC_CALL_REGS:
+
+	case QUAD_ACC_REGS:
+	case ACCG_REGS:
+	  return MEDIUM_COST;
+
+	case QUAD_FPR_REGS:
+	  return LOW_COST;
+	}
+
+    case LCR_REG:
+    case LR_REG:
+    case SPR_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_REGS:
+	case GPR_REGS:
+	case GR8_REGS:
+	case GR9_REGS:
+	case GR89_REGS:
+	case FDPIC_REGS:
+	case FDPIC_FPTR_REGS:
+	case FDPIC_CALL_REGS:
+
+	  return MEDIUM_COST;
+	}
+
+    case QUAD_ACC_REGS:
+    case ACCG_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_FPR_REGS:
+	  return MEDIUM_COST;
+
+	}
+    }
+
+  return HIGH_COST;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+frv_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                      reg_class_t rclass ATTRIBUTE_UNUSED,
+                      bool in ATTRIBUTE_UNUSED)
+{
+  return 4;
+}
+
+
+/* Implementation of TARGET_ASM_INTEGER.  In the FRV case we need to
+   use ".picptr" to generate safe relocations for PIC code.  We also
+   need a fixup entry for aligned (non-debugging) code.  */
+
+static bool
+frv_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+  if ((flag_pic || TARGET_FDPIC) && size == UNITS_PER_WORD)
+    {
+      if (GET_CODE (value) == CONST
+	  || GET_CODE (value) == SYMBOL_REF
+	  || GET_CODE (value) == LABEL_REF)
+	{
+	  if (TARGET_FDPIC && GET_CODE (value) == SYMBOL_REF
+	      && SYMBOL_REF_FUNCTION_P (value))
+	    {
+	      fputs ("\t.picptr\tfuncdesc(", asm_out_file);
+	      output_addr_const (asm_out_file, value);
+	      fputs (")\n", asm_out_file);
+	      return true;
+	    }
+	  else if (TARGET_FDPIC && GET_CODE (value) == CONST
+		   && frv_function_symbol_referenced_p (value))
+	    return false;
+	  if (aligned_p && !TARGET_FDPIC)
+	    {
+	      static int label_num = 0;
+	      char buf[256];
+	      const char *p;
+
+	      ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", label_num++);
+	      p = (* targetm.strip_name_encoding) (buf);
+
+	      fprintf (asm_out_file, "%s:\n", p);
+	      fprintf (asm_out_file, "%s\n", FIXUP_SECTION_ASM_OP);
+	      fprintf (asm_out_file, "\t.picptr\t%s\n", p);
+	      fprintf (asm_out_file, "\t.previous\n");
+	    }
+	  assemble_integer_with_op ("\t.picptr\t", value);
+	  return true;
+	}
+      if (!aligned_p)
+	{
+	  /* We've set the unaligned SI op to NULL, so we always have to
+	     handle the unaligned case here.  */
+	  assemble_integer_with_op ("\t.4byte\t", value);
+	  return true;
+	}
+    }
+  return default_assemble_integer (value, size, aligned_p);
+}
+
+/* Function to set up the backend function structure.  */
+
+static struct machine_function *
+frv_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+
+int
+frv_issue_rate (void)
+{
+  if (!TARGET_PACK)
+    return 1;
+
+  switch (frv_cpu_type)
+    {
+    default:
+    case FRV_CPU_FR300:
+    case FRV_CPU_SIMPLE:
+      return 1;
+
+    case FRV_CPU_FR400:
+    case FRV_CPU_FR405:
+    case FRV_CPU_FR450:
+      return 2;
+
+    case FRV_CPU_GENERIC:
+    case FRV_CPU_FR500:
+    case FRV_CPU_TOMCAT:
+      return 4;
+
+    case FRV_CPU_FR550:
+      return 8;
+    }
+}
+
+/* A for_each_rtx callback.  If X refers to an accumulator, return
+   ACC_GROUP_ODD if the bit 2 of the register number is set and
+   ACC_GROUP_EVEN if it is clear.  Return 0 (ACC_GROUP_NONE)
+   otherwise.  */
+
+static int
+frv_acc_group_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (REG_P (*x))
+    {
+      if (ACC_P (REGNO (*x)))
+	return (REGNO (*x) - ACC_FIRST) & 4 ? ACC_GROUP_ODD : ACC_GROUP_EVEN;
+      if (ACCG_P (REGNO (*x)))
+	return (REGNO (*x) - ACCG_FIRST) & 4 ? ACC_GROUP_ODD : ACC_GROUP_EVEN;
+    }
+  return 0;
+}
+
+/* Return the value of INSN's acc_group attribute.  */
+
+int
+frv_acc_group (rtx insn)
+{
+  /* This distinction only applies to the FR550 packing constraints.  */
+  if (frv_cpu_type != FRV_CPU_FR550)
+    return ACC_GROUP_NONE;
+  return for_each_rtx (&PATTERN (insn), frv_acc_group_1, 0);
+}
+
+/* Return the index of the DFA unit in FRV_UNIT_NAMES[] that instruction
+   INSN will try to claim first.  Since this value depends only on the
+   type attribute, we can cache the results in FRV_TYPE_TO_UNIT[].  */
+
+static unsigned int
+frv_insn_unit (rtx insn)
+{
+  enum attr_type type;
+
+  type = get_attr_type (insn);
+  if (frv_type_to_unit[type] == ARRAY_SIZE (frv_unit_codes))
+    {
+      /* We haven't seen this type of instruction before.  */
+      state_t state;
+      unsigned int unit;
+
+      /* Issue the instruction on its own to see which unit it prefers.  */
+      state = alloca (state_size ());
+      state_reset (state);
+      state_transition (state, insn);
+
+      /* Find out which unit was taken.  */
+      for (unit = 0; unit < ARRAY_SIZE (frv_unit_codes); unit++)
+	if (cpu_unit_reservation_p (state, frv_unit_codes[unit]))
+	  break;
+
+      gcc_assert (unit != ARRAY_SIZE (frv_unit_codes));
+
+      frv_type_to_unit[type] = unit;
+    }
+  return frv_type_to_unit[type];
+}
+
+/* Return true if INSN issues to a branch unit.  */
+
+static bool
+frv_issues_to_branch_unit_p (rtx insn)
+{
+  return frv_unit_groups[frv_insn_unit (insn)] == GROUP_B;
+}
+
+/* The instructions in the packet, partitioned into groups.  */
+struct frv_packet_group {
+  /* How many instructions in the packet belong to this group.  */
+  unsigned int num_insns;
+
+  /* A list of the instructions that belong to this group, in the order
+     they appear in the rtl stream.  */
+  rtx insns[ARRAY_SIZE (frv_unit_codes)];
+
+  /* The contents of INSNS after they have been sorted into the correct
+     assembly-language order.  Element X issues to unit X.  The list may
+     contain extra nops.  */
+  rtx sorted[ARRAY_SIZE (frv_unit_codes)];
+
+  /* The member of frv_nops[] to use in sorted[].  */
+  rtx nop;
+};
+
+/* The current state of the packing pass, implemented by frv_pack_insns.  */
+static struct {
+  /* The state of the pipeline DFA.  */
+  state_t dfa_state;
+
+  /* Which hardware registers are set within the current packet,
+     and the conditions under which they are set.  */
+  regstate_t regstate[FIRST_PSEUDO_REGISTER];
+
+  /* The memory locations that have been modified so far in this
+     packet.  MEM is the memref and COND is the regstate_t condition
+     under which it is set.  */
+  struct {
+    rtx mem;
+    regstate_t cond;
+  } mems[2];
+
+  /* The number of valid entries in MEMS.  The value is larger than
+     ARRAY_SIZE (mems) if there were too many mems to record.  */
+  unsigned int num_mems;
+
+  /* The maximum number of instructions that can be packed together.  */
+  unsigned int issue_rate;
+
+  /* The instructions in the packet, partitioned into groups.  */
+  struct frv_packet_group groups[NUM_GROUPS];
+
+  /* The instructions that make up the current packet.  */
+  rtx insns[ARRAY_SIZE (frv_unit_codes)];
+  unsigned int num_insns;
+} frv_packet;
+
+/* Return the regstate_t flags for the given COND_EXEC condition.
+   Abort if the condition isn't in the right form.  */
+
+static int
+frv_cond_flags (rtx cond)
+{
+  gcc_assert ((GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
+	      && GET_CODE (XEXP (cond, 0)) == REG
+	      && CR_P (REGNO (XEXP (cond, 0)))
+	      && XEXP (cond, 1) == const0_rtx);
+  return ((REGNO (XEXP (cond, 0)) - CR_FIRST)
+	  | (GET_CODE (cond) == NE
+	     ? REGSTATE_IF_TRUE
+	     : REGSTATE_IF_FALSE));
+}
+
+
+/* Return true if something accessed under condition COND2 can
+   conflict with something written under condition COND1.  */
+
+static bool
+frv_regstate_conflict_p (regstate_t cond1, regstate_t cond2)
+{
+  /* If either reference was unconditional, we have a conflict.  */
+  if ((cond1 & REGSTATE_IF_EITHER) == 0
+      || (cond2 & REGSTATE_IF_EITHER) == 0)
+    return true;
+
+  /* The references might conflict if they were controlled by
+     different CRs.  */
+  if ((cond1 & REGSTATE_CC_MASK) != (cond2 & REGSTATE_CC_MASK))
+    return true;
+
+  /* They definitely conflict if they are controlled by the
+     same condition.  */
+  if ((cond1 & cond2 & REGSTATE_IF_EITHER) != 0)
+    return true;
+
+  return false;
+}
+
+
+/* A for_each_rtx callback.  Return 1 if *X depends on an instruction in
+   the current packet.  DATA points to a regstate_t that describes the
+   condition under which *X might be set or used.  */
+
+static int
+frv_registers_conflict_p_1 (rtx *x, void *data)
+{
+  unsigned int regno, i;
+  regstate_t cond;
+
+  cond = *(regstate_t *) data;
+
+  if (GET_CODE (*x) == REG)
+    FOR_EACH_REGNO (regno, *x)
+      if ((frv_packet.regstate[regno] & REGSTATE_MODIFIED) != 0)
+	if (frv_regstate_conflict_p (frv_packet.regstate[regno], cond))
+	  return 1;
+
+  if (GET_CODE (*x) == MEM)
+    {
+      /* If we ran out of memory slots, assume a conflict.  */
+      if (frv_packet.num_mems > ARRAY_SIZE (frv_packet.mems))
+	return 1;
+
+      /* Check for output or true dependencies with earlier MEMs.  */
+      for (i = 0; i < frv_packet.num_mems; i++)
+	if (frv_regstate_conflict_p (frv_packet.mems[i].cond, cond))
+	  {
+	    if (true_dependence (frv_packet.mems[i].mem, VOIDmode, *x))
+	      return 1;
+
+	    if (output_dependence (frv_packet.mems[i].mem, *x))
+	      return 1;
+	  }
+    }
+
+  /* The return values of calls aren't significant: they describe
+     the effect of the call as a whole, not of the insn itself.  */
+  if (GET_CODE (*x) == SET && GET_CODE (SET_SRC (*x)) == CALL)
+    {
+      if (for_each_rtx (&SET_SRC (*x), frv_registers_conflict_p_1, data))
+	return 1;
+      return -1;
+    }
+
+  /* Check subexpressions.  */
+  return 0;
+}
+
+
+/* Return true if something in X might depend on an instruction
+   in the current packet.  */
+
+static bool
+frv_registers_conflict_p (rtx x)
+{
+  regstate_t flags;
+
+  flags = 0;
+  if (GET_CODE (x) == COND_EXEC)
+    {
+      if (for_each_rtx (&XEXP (x, 0), frv_registers_conflict_p_1, &flags))
+	return true;
+
+      flags |= frv_cond_flags (XEXP (x, 0));
+      x = XEXP (x, 1);
+    }
+  return for_each_rtx (&x, frv_registers_conflict_p_1, &flags);
+}
+
+
+/* A note_stores callback.  DATA points to the regstate_t condition
+   under which X is modified.  Update FRV_PACKET accordingly.  */
+
+static void
+frv_registers_update_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  unsigned int regno;
+
+  if (GET_CODE (x) == REG)
+    FOR_EACH_REGNO (regno, x)
+      frv_packet.regstate[regno] |= *(regstate_t *) data;
+
+  if (GET_CODE (x) == MEM)
+    {
+      if (frv_packet.num_mems < ARRAY_SIZE (frv_packet.mems))
+	{
+	  frv_packet.mems[frv_packet.num_mems].mem = x;
+	  frv_packet.mems[frv_packet.num_mems].cond = *(regstate_t *) data;
+	}
+      frv_packet.num_mems++;
+    }
+}
+
+
+/* Update the register state information for an instruction whose
+   body is X.  */
+
+static void
+frv_registers_update (rtx x)
+{
+  regstate_t flags;
+
+  flags = REGSTATE_MODIFIED;
+  if (GET_CODE (x) == COND_EXEC)
+    {
+      flags |= frv_cond_flags (XEXP (x, 0));
+      x = XEXP (x, 1);
+    }
+  note_stores (x, frv_registers_update_1, &flags);
+}
+
+
+/* Initialize frv_packet for the start of a new packet.  */
+
+static void
+frv_start_packet (void)
+{
+  enum frv_insn_group group;
+
+  memset (frv_packet.regstate, 0, sizeof (frv_packet.regstate));
+  frv_packet.num_mems = 0;
+  frv_packet.num_insns = 0;
+  for (group =  GROUP_I; group < NUM_GROUPS;
+       group = (enum frv_insn_group) (group + 1))
+    frv_packet.groups[group].num_insns = 0;
+}
+
+
+/* Likewise for the start of a new basic block.  */
+
+static void
+frv_start_packet_block (void)
+{
+  state_reset (frv_packet.dfa_state);
+  frv_start_packet ();
+}
+
+
+/* Finish the current packet, if any, and start a new one.  Call
+   HANDLE_PACKET with FRV_PACKET describing the completed packet.  */
+
+static void
+frv_finish_packet (void (*handle_packet) (void))
+{
+  if (frv_packet.num_insns > 0)
+    {
+      handle_packet ();
+      state_transition (frv_packet.dfa_state, 0);
+      frv_start_packet ();
+    }
+}
+
+
+/* Return true if INSN can be added to the current packet.  Update
+   the DFA state on success.  */
+
+static bool
+frv_pack_insn_p (rtx insn)
+{
+  /* See if the packet is already as long as it can be.  */
+  if (frv_packet.num_insns == frv_packet.issue_rate)
+    return false;
+
+  /* If the scheduler thought that an instruction should start a packet,
+     it's usually a good idea to believe it.  It knows much more about
+     the latencies than we do.
+
+     There are some exceptions though:
+
+       - Conditional instructions are scheduled on the assumption that
+	 they will be executed.  This is usually a good thing, since it
+	 tends to avoid unnecessary stalls in the conditional code.
+	 But we want to pack conditional instructions as tightly as
+	 possible, in order to optimize the case where they aren't
+	 executed.
+
+       - The scheduler will always put branches on their own, even
+	 if there's no real dependency.
+
+       - There's no point putting a call in its own packet unless
+	 we have to.  */
+  if (frv_packet.num_insns > 0
+      && NONJUMP_INSN_P (insn)
+      && GET_MODE (insn) == TImode
+      && GET_CODE (PATTERN (insn)) != COND_EXEC)
+    return false;
+
+  /* Check for register conflicts.  Don't do this for setlo since any
+     conflict will be with the partnering sethi, with which it can
+     be packed.  */
+  if (get_attr_type (insn) != TYPE_SETLO)
+    if (frv_registers_conflict_p (PATTERN (insn)))
+      return false;
+
+  return state_transition (frv_packet.dfa_state, insn) < 0;
+}
+
+
+/* Add instruction INSN to the current packet.  */
+
+static void
+frv_add_insn_to_packet (rtx insn)
+{
+  struct frv_packet_group *packet_group;
+
+  packet_group = &frv_packet.groups[frv_unit_groups[frv_insn_unit (insn)]];
+  packet_group->insns[packet_group->num_insns++] = insn;
+  frv_packet.insns[frv_packet.num_insns++] = insn;
+
+  frv_registers_update (PATTERN (insn));
+}
+
+
+/* Insert INSN (a member of frv_nops[]) into the current packet.  If the
+   packet ends in a branch or call, insert the nop before it, otherwise
+   add to the end.  */
+
+static void
+frv_insert_nop_in_packet (rtx insn)
+{
+  struct frv_packet_group *packet_group;
+  rtx last;
+
+  packet_group = &frv_packet.groups[frv_unit_groups[frv_insn_unit (insn)]];
+  last = frv_packet.insns[frv_packet.num_insns - 1];
+  if (! NONJUMP_INSN_P (last))
+    {
+      insn = emit_insn_before (PATTERN (insn), last);
+      frv_packet.insns[frv_packet.num_insns - 1] = insn;
+      frv_packet.insns[frv_packet.num_insns++] = last;
+    }
+  else
+    {
+      insn = emit_insn_after (PATTERN (insn), last);
+      frv_packet.insns[frv_packet.num_insns++] = insn;
+    }
+  packet_group->insns[packet_group->num_insns++] = insn;
+}
+
+
+/* If packing is enabled, divide the instructions into packets and
+   return true.  Call HANDLE_PACKET for each complete packet.  */
+
+static bool
+frv_for_each_packet (void (*handle_packet) (void))
+{
+  rtx insn, next_insn;
+
+  frv_packet.issue_rate = frv_issue_rate ();
+
+  /* Early exit if we don't want to pack insns.  */
+  if (!optimize
+      || !flag_schedule_insns_after_reload
+      || !TARGET_VLIW_BRANCH
+      || frv_packet.issue_rate == 1)
+    return false;
+
+  /* Set up the initial packing state.  */
+  dfa_start ();
+  frv_packet.dfa_state = alloca (state_size ());
+
+  frv_start_packet_block ();
+  for (insn = get_insns (); insn != 0; insn = next_insn)
+    {
+      enum rtx_code code;
+      bool eh_insn_p;
+
+      code = GET_CODE (insn);
+      next_insn = NEXT_INSN (insn);
+
+      if (code == CODE_LABEL)
+	{
+	  frv_finish_packet (handle_packet);
+	  frv_start_packet_block ();
+	}
+
+      if (INSN_P (insn))
+	switch (GET_CODE (PATTERN (insn)))
+	  {
+	  case USE:
+	  case CLOBBER:
+	    break;
+
+	  default:
+	    /* Calls mustn't be packed on a TOMCAT.  */
+	    if (CALL_P (insn) && frv_cpu_type == FRV_CPU_TOMCAT)
+	      frv_finish_packet (handle_packet);
+
+	    /* Since the last instruction in a packet determines the EH
+	       region, any exception-throwing instruction must come at
+	       the end of reordered packet.  Insns that issue to a
+	       branch unit are bound to come last; for others it's
+	       too hard to predict.  */
+	    eh_insn_p = (find_reg_note (insn, REG_EH_REGION, NULL) != NULL);
+	    if (eh_insn_p && !frv_issues_to_branch_unit_p (insn))
+	      frv_finish_packet (handle_packet);
+
+	    /* Finish the current packet if we can't add INSN to it.
+	       Simulate cycles until INSN is ready to issue.  */
+	    if (!frv_pack_insn_p (insn))
+	      {
+		frv_finish_packet (handle_packet);
+		while (!frv_pack_insn_p (insn))
+		  state_transition (frv_packet.dfa_state, 0);
+	      }
+
+	    /* Add the instruction to the packet.  */
+	    frv_add_insn_to_packet (insn);
+
+	    /* Calls and jumps end a packet, as do insns that throw
+	       an exception.  */
+	    if (code == CALL_INSN || code == JUMP_INSN || eh_insn_p)
+	      frv_finish_packet (handle_packet);
+	    break;
+	  }
+    }
+  frv_finish_packet (handle_packet);
+  dfa_finish ();
+  return true;
+}
+
+/* Subroutine of frv_sort_insn_group.  We are trying to sort
+   frv_packet.groups[GROUP].sorted[0...NUM_INSNS-1] into assembly
+   language order.  We have already picked a new position for
+   frv_packet.groups[GROUP].sorted[X] if bit X of ISSUED is set.
+   These instructions will occupy elements [0, LOWER_SLOT) and
+   [UPPER_SLOT, NUM_INSNS) of the final (sorted) array.  STATE is
+   the DFA state after issuing these instructions.
+
+   Try filling elements [LOWER_SLOT, UPPER_SLOT) with every permutation
+   of the unused instructions.  Return true if one such permutation gives
+   a valid ordering, leaving the successful permutation in sorted[].
+   Do not modify sorted[] until a valid permutation is found.  */
+
+static bool
+frv_sort_insn_group_1 (enum frv_insn_group group,
+		       unsigned int lower_slot, unsigned int upper_slot,
+		       unsigned int issued, unsigned int num_insns,
+		       state_t state)
+{
+  struct frv_packet_group *packet_group;
+  unsigned int i;
+  state_t test_state;
+  size_t dfa_size;
+  rtx insn;
+
+  /* Early success if we've filled all the slots.  */
+  if (lower_slot == upper_slot)
+    return true;
+
+  packet_group = &frv_packet.groups[group];
+  dfa_size = state_size ();
+  test_state = alloca (dfa_size);
+
+  /* Try issuing each unused instruction.  */
+  for (i = num_insns - 1; i + 1 != 0; i--)
+    if (~issued & (1 << i))
+      {
+	insn = packet_group->sorted[i];
+	memcpy (test_state, state, dfa_size);
+	if (state_transition (test_state, insn) < 0
+	    && cpu_unit_reservation_p (test_state,
+				       NTH_UNIT (group, upper_slot - 1))
+	    && frv_sort_insn_group_1 (group, lower_slot, upper_slot - 1,
+				      issued | (1 << i), num_insns,
+				      test_state))
+	  {
+	    packet_group->sorted[upper_slot - 1] = insn;
+	    return true;
+	  }
+      }
+
+  return false;
+}
+
+/* Compare two instructions by their frv_insn_unit.  */
+
+static int
+frv_compare_insns (const void *first, const void *second)
+{
+  const rtx *const insn1 = (rtx const *) first,
+    *const insn2 = (rtx const *) second;
+  return frv_insn_unit (*insn1) - frv_insn_unit (*insn2);
+}
+
+/* Copy frv_packet.groups[GROUP].insns[] to frv_packet.groups[GROUP].sorted[]
+   and sort it into assembly language order.  See frv.md for a description of
+   the algorithm.  */
+
+static void
+frv_sort_insn_group (enum frv_insn_group group)
+{
+  struct frv_packet_group *packet_group;
+  unsigned int first, i, nop, max_unit, num_slots;
+  state_t state, test_state;
+  size_t dfa_size;
+
+  packet_group = &frv_packet.groups[group];
+
+  /* Assume no nop is needed.  */
+  packet_group->nop = 0;
+
+  if (packet_group->num_insns == 0)
+    return;
+
+  /* Copy insns[] to sorted[].  */
+  memcpy (packet_group->sorted, packet_group->insns,
+	  sizeof (rtx) * packet_group->num_insns);
+
+  /* Sort sorted[] by the unit that each insn tries to take first.  */
+  if (packet_group->num_insns > 1)
+    qsort (packet_group->sorted, packet_group->num_insns,
+	   sizeof (rtx), frv_compare_insns);
+
+  /* That's always enough for branch and control insns.  */
+  if (group == GROUP_B || group == GROUP_C)
+    return;
+
+  dfa_size = state_size ();
+  state = alloca (dfa_size);
+  test_state = alloca (dfa_size);
+
+  /* Find the highest FIRST such that sorted[0...FIRST-1] can issue
+     consecutively and such that the DFA takes unit X when sorted[X]
+     is added.  Set STATE to the new DFA state.  */
+  state_reset (test_state);
+  for (first = 0; first < packet_group->num_insns; first++)
+    {
+      memcpy (state, test_state, dfa_size);
+      if (state_transition (test_state, packet_group->sorted[first]) >= 0
+	  || !cpu_unit_reservation_p (test_state, NTH_UNIT (group, first)))
+	break;
+    }
+
+  /* If all the instructions issued in ascending order, we're done.  */
+  if (first == packet_group->num_insns)
+    return;
+
+  /* Add nops to the end of sorted[] and try each permutation until
+     we find one that works.  */
+  for (nop = 0; nop < frv_num_nops; nop++)
+    {
+      max_unit = frv_insn_unit (frv_nops[nop]);
+      if (frv_unit_groups[max_unit] == group)
+	{
+	  packet_group->nop = frv_nops[nop];
+	  num_slots = UNIT_NUMBER (max_unit) + 1;
+	  for (i = packet_group->num_insns; i < num_slots; i++)
+	    packet_group->sorted[i] = frv_nops[nop];
+	  if (frv_sort_insn_group_1 (group, first, num_slots,
+				     (1 << first) - 1, num_slots, state))
+	    return;
+	}
+    }
+  gcc_unreachable ();
+}
+
+/* Sort the current packet into assembly-language order.  Set packing
+   flags as appropriate.  */
+
+static void
+frv_reorder_packet (void)
+{
+  unsigned int cursor[NUM_GROUPS];
+  rtx insns[ARRAY_SIZE (frv_unit_groups)];
+  unsigned int unit, to, from;
+  enum frv_insn_group group;
+  struct frv_packet_group *packet_group;
+
+  /* First sort each group individually.  */
+  for (group = GROUP_I; group < NUM_GROUPS;
+       group = (enum frv_insn_group) (group + 1))
+    {
+      cursor[group] = 0;
+      frv_sort_insn_group (group);
+    }
+
+  /* Go through the unit template and try add an instruction from
+     that unit's group.  */
+  to = 0;
+  for (unit = 0; unit < ARRAY_SIZE (frv_unit_groups); unit++)
+    {
+      group = frv_unit_groups[unit];
+      packet_group = &frv_packet.groups[group];
+      if (cursor[group] < packet_group->num_insns)
+	{
+	  /* frv_reorg should have added nops for us.  */
+	  gcc_assert (packet_group->sorted[cursor[group]]
+		      != packet_group->nop);
+	  insns[to++] = packet_group->sorted[cursor[group]++];
+	}
+    }
+
+  gcc_assert (to == frv_packet.num_insns);
+
+  /* Clear the last instruction's packing flag, thus marking the end of
+     a packet.  Reorder the other instructions relative to it.  */
+  CLEAR_PACKING_FLAG (insns[to - 1]);
+  for (from = 0; from < to - 1; from++)
+    {
+      remove_insn (insns[from]);
+      add_insn_before (insns[from], insns[to - 1], NULL);
+      SET_PACKING_FLAG (insns[from]);
+    }
+}
+
+
+/* Divide instructions into packets.  Reorder the contents of each
+   packet so that they are in the correct assembly-language order.
+
+   Since this pass can change the raw meaning of the rtl stream, it must
+   only be called at the last minute, just before the instructions are
+   written out.  */
+
+static void
+frv_pack_insns (void)
+{
+  if (frv_for_each_packet (frv_reorder_packet))
+    frv_insn_packing_flag = 0;
+  else
+    frv_insn_packing_flag = -1;
+}
+
+/* See whether we need to add nops to group GROUP in order to
+   make a valid packet.  */
+
+static void
+frv_fill_unused_units (enum frv_insn_group group)
+{
+  unsigned int non_nops, nops, i;
+  struct frv_packet_group *packet_group;
+
+  packet_group = &frv_packet.groups[group];
+
+  /* Sort the instructions into assembly-language order.
+     Use nops to fill slots that are otherwise unused.  */
+  frv_sort_insn_group (group);
+
+  /* See how many nops are needed before the final useful instruction.  */
+  i = nops = 0;
+  for (non_nops = 0; non_nops < packet_group->num_insns; non_nops++)
+    while (packet_group->sorted[i++] == packet_group->nop)
+      nops++;
+
+  /* Insert that many nops into the instruction stream.  */
+  while (nops-- > 0)
+    frv_insert_nop_in_packet (packet_group->nop);
+}
+
+/* Return true if accesses IO1 and IO2 refer to the same doubleword.  */
+
+static bool
+frv_same_doubleword_p (const struct frv_io *io1, const struct frv_io *io2)
+{
+  if (io1->const_address != 0 && io2->const_address != 0)
+    return io1->const_address == io2->const_address;
+
+  if (io1->var_address != 0 && io2->var_address != 0)
+    return rtx_equal_p (io1->var_address, io2->var_address);
+
+  return false;
+}
+
+/* Return true if operations IO1 and IO2 are guaranteed to complete
+   in order.  */
+
+static bool
+frv_io_fixed_order_p (const struct frv_io *io1, const struct frv_io *io2)
+{
+  /* The order of writes is always preserved.  */
+  if (io1->type == FRV_IO_WRITE && io2->type == FRV_IO_WRITE)
+    return true;
+
+  /* The order of reads isn't preserved.  */
+  if (io1->type != FRV_IO_WRITE && io2->type != FRV_IO_WRITE)
+    return false;
+
+  /* One operation is a write and the other is (or could be) a read.
+     The order is only guaranteed if the accesses are to the same
+     doubleword.  */
+  return frv_same_doubleword_p (io1, io2);
+}
+
+/* Generalize I/O operation X so that it covers both X and Y. */
+
+static void
+frv_io_union (struct frv_io *x, const struct frv_io *y)
+{
+  if (x->type != y->type)
+    x->type = FRV_IO_UNKNOWN;
+  if (!frv_same_doubleword_p (x, y))
+    {
+      x->const_address = 0;
+      x->var_address = 0;
+    }
+}
+
+/* Fill IO with information about the load or store associated with
+   membar instruction INSN.  */
+
+static void
+frv_extract_membar (struct frv_io *io, rtx insn)
+{
+  extract_insn (insn);
+  io->type = (enum frv_io_type) INTVAL (recog_data.operand[2]);
+  io->const_address = INTVAL (recog_data.operand[1]);
+  io->var_address = XEXP (recog_data.operand[0], 0);
+}
+
+/* A note_stores callback for which DATA points to an rtx.  Nullify *DATA
+   if X is a register and *DATA depends on X.  */
+
+static void
+frv_io_check_address (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx *other = (rtx *) data;
+
+  if (REG_P (x) && *other != 0 && reg_overlap_mentioned_p (x, *other))
+    *other = 0;
+}
+
+/* A note_stores callback for which DATA points to a HARD_REG_SET.
+   Remove every modified register from the set.  */
+
+static void
+frv_io_handle_set (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  HARD_REG_SET *set = (HARD_REG_SET *) data;
+  unsigned int regno;
+
+  if (REG_P (x))
+    FOR_EACH_REGNO (regno, x)
+      CLEAR_HARD_REG_BIT (*set, regno);
+}
+
+/* A for_each_rtx callback for which DATA points to a HARD_REG_SET.
+   Add every register in *X to the set.  */
+
+static int
+frv_io_handle_use_1 (rtx *x, void *data)
+{
+  HARD_REG_SET *set = (HARD_REG_SET *) data;
+  unsigned int regno;
+
+  if (REG_P (*x))
+    FOR_EACH_REGNO (regno, *x)
+      SET_HARD_REG_BIT (*set, regno);
+
+  return 0;
+}
+
+/* A note_stores callback that applies frv_io_handle_use_1 to an
+   entire rhs value.  */
+
+static void
+frv_io_handle_use (rtx *x, void *data)
+{
+  for_each_rtx (x, frv_io_handle_use_1, data);
+}
+
+/* Go through block BB looking for membars to remove.  There are two
+   cases where intra-block analysis is enough:
+
+   - a membar is redundant if it occurs between two consecutive I/O
+   operations and if those operations are guaranteed to complete
+   in order.
+
+   - a membar for a __builtin_read is redundant if the result is
+   used before the next I/O operation is issued.
+
+   If the last membar in the block could not be removed, and there
+   are guaranteed to be no I/O operations between that membar and
+   the end of the block, store the membar in *LAST_MEMBAR, otherwise
+   store null.
+
+   Describe the block's first I/O operation in *NEXT_IO.  Describe
+   an unknown operation if the block doesn't do any I/O.  */
+
+static void
+frv_optimize_membar_local (basic_block bb, struct frv_io *next_io,
+			   rtx *last_membar)
+{
+  HARD_REG_SET used_regs;
+  rtx next_membar, set, insn;
+  bool next_is_end_p;
+
+  /* NEXT_IO is the next I/O operation to be performed after the current
+     instruction.  It starts off as being an unknown operation.  */
+  memset (next_io, 0, sizeof (*next_io));
+
+  /* NEXT_IS_END_P is true if NEXT_IO describes the end of the block.  */
+  next_is_end_p = true;
+
+  /* If the current instruction is a __builtin_read or __builtin_write,
+     NEXT_MEMBAR is the membar instruction associated with it.  NEXT_MEMBAR
+     is null if the membar has already been deleted.
+
+     Note that the initialization here should only be needed to
+     suppress warnings.  */
+  next_membar = 0;
+
+  /* USED_REGS is the set of registers that are used before the
+     next I/O instruction.  */
+  CLEAR_HARD_REG_SET (used_regs);
+
+  for (insn = BB_END (bb); insn != BB_HEAD (bb); insn = PREV_INSN (insn))
+    if (CALL_P (insn))
+      {
+	/* We can't predict what a call will do to volatile memory.  */
+	memset (next_io, 0, sizeof (struct frv_io));
+	next_is_end_p = false;
+	CLEAR_HARD_REG_SET (used_regs);
+      }
+    else if (INSN_P (insn))
+      switch (recog_memoized (insn))
+	{
+	case CODE_FOR_optional_membar_qi:
+	case CODE_FOR_optional_membar_hi:
+	case CODE_FOR_optional_membar_si:
+	case CODE_FOR_optional_membar_di:
+	  next_membar = insn;
+	  if (next_is_end_p)
+	    {
+	      /* Local information isn't enough to decide whether this
+		 membar is needed.  Stash it away for later.  */
+	      *last_membar = insn;
+	      frv_extract_membar (next_io, insn);
+	      next_is_end_p = false;
+	    }
+	  else
+	    {
+	      /* Check whether the I/O operation before INSN could be
+		 reordered with one described by NEXT_IO.  If it can't,
+		 INSN will not be needed.  */
+	      struct frv_io prev_io;
+
+	      frv_extract_membar (&prev_io, insn);
+	      if (frv_io_fixed_order_p (&prev_io, next_io))
+		{
+		  if (dump_file)
+		    fprintf (dump_file,
+			     ";; [Local] Removing membar %d since order"
+			     " of accesses is guaranteed\n",
+			     INSN_UID (next_membar));
+
+		  insn = NEXT_INSN (insn);
+		  delete_insn (next_membar);
+		  next_membar = 0;
+		}
+	      *next_io = prev_io;
+	    }
+	  break;
+
+	default:
+	  /* Invalidate NEXT_IO's address if it depends on something that
+	     is clobbered by INSN.  */
+	  if (next_io->var_address)
+	    note_stores (PATTERN (insn), frv_io_check_address,
+			 &next_io->var_address);
+
+	  /* If the next membar is associated with a __builtin_read,
+	     see if INSN reads from that address.  If it does, and if
+	     the destination register is used before the next I/O access,
+	     there is no need for the membar.  */
+	  set = PATTERN (insn);
+	  if (next_io->type == FRV_IO_READ
+	      && next_io->var_address != 0
+	      && next_membar != 0
+	      && GET_CODE (set) == SET
+	      && GET_CODE (SET_DEST (set)) == REG
+	      && TEST_HARD_REG_BIT (used_regs, REGNO (SET_DEST (set))))
+	    {
+	      rtx src;
+
+	      src = SET_SRC (set);
+	      if (GET_CODE (src) == ZERO_EXTEND)
+		src = XEXP (src, 0);
+
+	      if (GET_CODE (src) == MEM
+		  && rtx_equal_p (XEXP (src, 0), next_io->var_address))
+		{
+		  if (dump_file)
+		    fprintf (dump_file,
+			     ";; [Local] Removing membar %d since the target"
+			     " of %d is used before the I/O operation\n",
+			     INSN_UID (next_membar), INSN_UID (insn));
+
+		  if (next_membar == *last_membar)
+		    *last_membar = 0;
+
+		  delete_insn (next_membar);
+		  next_membar = 0;
+		}
+	    }
+
+	  /* If INSN has volatile references, forget about any registers
+	     that are used after it.  Otherwise forget about uses that
+	     are (or might be) defined by INSN.  */
+	  if (volatile_refs_p (PATTERN (insn)))
+	    CLEAR_HARD_REG_SET (used_regs);
+	  else
+	    note_stores (PATTERN (insn), frv_io_handle_set, &used_regs);
+
+	  note_uses (&PATTERN (insn), frv_io_handle_use, &used_regs);
+	  break;
+	}
+}
+
+/* See if MEMBAR, the last membar instruction in BB, can be removed.
+   FIRST_IO[X] describes the first operation performed by basic block X.  */
+
+static void
+frv_optimize_membar_global (basic_block bb, struct frv_io *first_io,
+			    rtx membar)
+{
+  struct frv_io this_io, next_io;
+  edge succ;
+  edge_iterator ei;
+
+  /* We need to keep the membar if there is an edge to the exit block.  */
+  FOR_EACH_EDGE (succ, ei, bb->succs)
+  /* for (succ = bb->succ; succ != 0; succ = succ->succ_next) */
+    if (succ->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+      return;
+
+  /* Work out the union of all successor blocks.  */
+  ei = ei_start (bb->succs);
+  ei_cond (ei, &succ);
+  /* next_io = first_io[bb->succ->dest->index]; */
+  next_io = first_io[succ->dest->index];
+  ei = ei_start (bb->succs);
+  if (ei_cond (ei, &succ))
+    {
+      for (ei_next (&ei); ei_cond (ei, &succ); ei_next (&ei))
+	/*for (succ = bb->succ->succ_next; succ != 0; succ = succ->succ_next)*/
+	frv_io_union (&next_io, &first_io[succ->dest->index]);
+    }
+  else
+    gcc_unreachable ();
+
+  frv_extract_membar (&this_io, membar);
+  if (frv_io_fixed_order_p (&this_io, &next_io))
+    {
+      if (dump_file)
+	fprintf (dump_file,
+		 ";; [Global] Removing membar %d since order of accesses"
+		 " is guaranteed\n", INSN_UID (membar));
+
+      delete_insn (membar);
+    }
+}
+
+/* Remove redundant membars from the current function.  */
+
+static void
+frv_optimize_membar (void)
+{
+  basic_block bb;
+  struct frv_io *first_io;
+  rtx *last_membar;
+
+  compute_bb_for_insn ();
+  first_io = XCNEWVEC (struct frv_io, last_basic_block_for_fn (cfun));
+  last_membar = XCNEWVEC (rtx, last_basic_block_for_fn (cfun));
+
+  FOR_EACH_BB_FN (bb, cfun)
+    frv_optimize_membar_local (bb, &first_io[bb->index],
+			       &last_membar[bb->index]);
+
+  FOR_EACH_BB_FN (bb, cfun)
+    if (last_membar[bb->index] != 0)
+      frv_optimize_membar_global (bb, first_io, last_membar[bb->index]);
+
+  free (first_io);
+  free (last_membar);
+}
+
+/* Used by frv_reorg to keep track of the current packet's address.  */
+static unsigned int frv_packet_address;
+
+/* If the current packet falls through to a label, try to pad the packet
+   with nops in order to fit the label's alignment requirements.  */
+
+static void
+frv_align_label (void)
+{
+  unsigned int alignment, target, nop;
+  rtx x, last, barrier, label;
+
+  /* Walk forward to the start of the next packet.  Set ALIGNMENT to the
+     maximum alignment of that packet, LABEL to the last label between
+     the packets, and BARRIER to the last barrier.  */
+  last = frv_packet.insns[frv_packet.num_insns - 1];
+  label = barrier = 0;
+  alignment = 4;
+  for (x = NEXT_INSN (last); x != 0 && !INSN_P (x); x = NEXT_INSN (x))
+    {
+      if (LABEL_P (x))
+	{
+	  unsigned int subalign = 1 << label_to_alignment (x);
+	  alignment = MAX (alignment, subalign);
+	  label = x;
+	}
+      if (BARRIER_P (x))
+	barrier = x;
+    }
+
+  /* If -malign-labels, and the packet falls through to an unaligned
+     label, try introducing a nop to align that label to 8 bytes.  */
+  if (TARGET_ALIGN_LABELS
+      && label != 0
+      && barrier == 0
+      && frv_packet.num_insns < frv_packet.issue_rate)
+    alignment = MAX (alignment, 8);
+
+  /* Advance the address to the end of the current packet.  */
+  frv_packet_address += frv_packet.num_insns * 4;
+
+  /* Work out the target address, after alignment.  */
+  target = (frv_packet_address + alignment - 1) & -alignment;
+
+  /* If the packet falls through to the label, try to find an efficient
+     padding sequence.  */
+  if (barrier == 0)
+    {
+      /* First try adding nops to the current packet.  */
+      for (nop = 0; nop < frv_num_nops; nop++)
+	while (frv_packet_address < target && frv_pack_insn_p (frv_nops[nop]))
+	  {
+	    frv_insert_nop_in_packet (frv_nops[nop]);
+	    frv_packet_address += 4;
+	  }
+
+      /* If we still haven't reached the target, add some new packets that
+	 contain only nops.  If there are two types of nop, insert an
+	 alternating sequence of frv_nops[0] and frv_nops[1], which will
+	 lead to packets like:
+
+		nop.p
+		mnop.p/fnop.p
+		nop.p
+		mnop/fnop
+
+	 etc.  Just emit frv_nops[0] if that's the only nop we have.  */
+      last = frv_packet.insns[frv_packet.num_insns - 1];
+      nop = 0;
+      while (frv_packet_address < target)
+	{
+	  last = emit_insn_after (PATTERN (frv_nops[nop]), last);
+	  frv_packet_address += 4;
+	  if (frv_num_nops > 1)
+	    nop ^= 1;
+	}
+    }
+
+  frv_packet_address = target;
+}
+
+/* Subroutine of frv_reorg, called after each packet has been constructed
+   in frv_packet.  */
+
+static void
+frv_reorg_packet (void)
+{
+  frv_fill_unused_units (GROUP_I);
+  frv_fill_unused_units (GROUP_FM);
+  frv_align_label ();
+}
+
+/* Add an instruction with pattern NOP to frv_nops[].  */
+
+static void
+frv_register_nop (rtx nop)
+{
+  nop = make_insn_raw (nop);
+  NEXT_INSN (nop) = 0;
+  PREV_INSN (nop) = 0;
+  frv_nops[frv_num_nops++] = nop;
+}
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  Divide the instructions
+   into packets and check whether we need to insert nops in order to
+   fulfill the processor's issue requirements.  Also, if the user has
+   requested a certain alignment for a label, try to meet that alignment
+   by inserting nops in the previous packet.  */
+
+static void
+frv_reorg (void)
+{
+  if (optimize > 0 && TARGET_OPTIMIZE_MEMBAR && cfun->machine->has_membar_p)
+    frv_optimize_membar ();
+
+  frv_num_nops = 0;
+  frv_register_nop (gen_nop ());
+  if (TARGET_MEDIA)
+    frv_register_nop (gen_mnop ());
+  if (TARGET_HARD_FLOAT)
+    frv_register_nop (gen_fnop ());
+
+  /* Estimate the length of each branch.  Although this may change after
+     we've inserted nops, it will only do so in big functions.  */
+  shorten_branches (get_insns ());
+
+  frv_packet_address = 0;
+  frv_for_each_packet (frv_reorg_packet);
+}
+
+#define def_builtin(name, type, code) \
+  add_builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL)
+
+struct builtin_description
+{
+  enum insn_code icode;
+  const char *name;
+  enum frv_builtins code;
+  enum rtx_code comparison;
+  unsigned int flag;
+};
+
+/* Media intrinsics that take a single, constant argument.  */
+
+static struct builtin_description bdesc_set[] =
+{
+  { CODE_FOR_mhdsets, "__MHDSETS", FRV_BUILTIN_MHDSETS, UNKNOWN, 0 }
+};
+
+/* Media intrinsics that take just one argument.  */
+
+static struct builtin_description bdesc_1arg[] =
+{
+  { CODE_FOR_mnot, "__MNOT", FRV_BUILTIN_MNOT, UNKNOWN, 0 },
+  { CODE_FOR_munpackh, "__MUNPACKH", FRV_BUILTIN_MUNPACKH, UNKNOWN, 0 },
+  { CODE_FOR_mbtoh, "__MBTOH", FRV_BUILTIN_MBTOH, UNKNOWN, 0 },
+  { CODE_FOR_mhtob, "__MHTOB", FRV_BUILTIN_MHTOB, UNKNOWN, 0},
+  { CODE_FOR_mabshs, "__MABSHS", FRV_BUILTIN_MABSHS, UNKNOWN, 0 },
+  { CODE_FOR_scutss, "__SCUTSS", FRV_BUILTIN_SCUTSS, UNKNOWN, 0 }
+};
+
+/* Media intrinsics that take two arguments.  */
+
+static struct builtin_description bdesc_2arg[] =
+{
+  { CODE_FOR_mand, "__MAND", FRV_BUILTIN_MAND, UNKNOWN, 0},
+  { CODE_FOR_mor, "__MOR", FRV_BUILTIN_MOR, UNKNOWN, 0},
+  { CODE_FOR_mxor, "__MXOR", FRV_BUILTIN_MXOR, UNKNOWN, 0},
+  { CODE_FOR_maveh, "__MAVEH", FRV_BUILTIN_MAVEH, UNKNOWN, 0},
+  { CODE_FOR_msaths, "__MSATHS", FRV_BUILTIN_MSATHS, UNKNOWN, 0},
+  { CODE_FOR_msathu, "__MSATHU", FRV_BUILTIN_MSATHU, UNKNOWN, 0},
+  { CODE_FOR_maddhss, "__MADDHSS", FRV_BUILTIN_MADDHSS, UNKNOWN, 0},
+  { CODE_FOR_maddhus, "__MADDHUS", FRV_BUILTIN_MADDHUS, UNKNOWN, 0},
+  { CODE_FOR_msubhss, "__MSUBHSS", FRV_BUILTIN_MSUBHSS, UNKNOWN, 0},
+  { CODE_FOR_msubhus, "__MSUBHUS", FRV_BUILTIN_MSUBHUS, UNKNOWN, 0},
+  { CODE_FOR_mqaddhss, "__MQADDHSS", FRV_BUILTIN_MQADDHSS, UNKNOWN, 0},
+  { CODE_FOR_mqaddhus, "__MQADDHUS", FRV_BUILTIN_MQADDHUS, UNKNOWN, 0},
+  { CODE_FOR_mqsubhss, "__MQSUBHSS", FRV_BUILTIN_MQSUBHSS, UNKNOWN, 0},
+  { CODE_FOR_mqsubhus, "__MQSUBHUS", FRV_BUILTIN_MQSUBHUS, UNKNOWN, 0},
+  { CODE_FOR_mpackh, "__MPACKH", FRV_BUILTIN_MPACKH, UNKNOWN, 0},
+  { CODE_FOR_mcop1, "__Mcop1", FRV_BUILTIN_MCOP1, UNKNOWN, 0},
+  { CODE_FOR_mcop2, "__Mcop2", FRV_BUILTIN_MCOP2, UNKNOWN, 0},
+  { CODE_FOR_mwcut, "__MWCUT", FRV_BUILTIN_MWCUT, UNKNOWN, 0},
+  { CODE_FOR_mqsaths, "__MQSATHS", FRV_BUILTIN_MQSATHS, UNKNOWN, 0},
+  { CODE_FOR_mqlclrhs, "__MQLCLRHS", FRV_BUILTIN_MQLCLRHS, UNKNOWN, 0},
+  { CODE_FOR_mqlmths, "__MQLMTHS", FRV_BUILTIN_MQLMTHS, UNKNOWN, 0},
+  { CODE_FOR_smul, "__SMUL", FRV_BUILTIN_SMUL, UNKNOWN, 0},
+  { CODE_FOR_umul, "__UMUL", FRV_BUILTIN_UMUL, UNKNOWN, 0},
+  { CODE_FOR_addss, "__ADDSS", FRV_BUILTIN_ADDSS, UNKNOWN, 0},
+  { CODE_FOR_subss, "__SUBSS", FRV_BUILTIN_SUBSS, UNKNOWN, 0},
+  { CODE_FOR_slass, "__SLASS", FRV_BUILTIN_SLASS, UNKNOWN, 0},
+  { CODE_FOR_scan, "__SCAN", FRV_BUILTIN_SCAN, UNKNOWN, 0}
+};
+
+/* Integer intrinsics that take two arguments and have no return value.  */
+
+static struct builtin_description bdesc_int_void2arg[] =
+{
+  { CODE_FOR_smass, "__SMASS", FRV_BUILTIN_SMASS, UNKNOWN, 0},
+  { CODE_FOR_smsss, "__SMSSS", FRV_BUILTIN_SMSSS, UNKNOWN, 0},
+  { CODE_FOR_smu, "__SMU", FRV_BUILTIN_SMU, UNKNOWN, 0}
+};
+
+static struct builtin_description bdesc_prefetches[] =
+{
+  { CODE_FOR_frv_prefetch0, "__data_prefetch0", FRV_BUILTIN_PREFETCH0, UNKNOWN,
+    0},
+  { CODE_FOR_frv_prefetch, "__data_prefetch", FRV_BUILTIN_PREFETCH, UNKNOWN, 0}
+};
+
+/* Media intrinsics that take two arguments, the first being an ACC number.  */
+
+static struct builtin_description bdesc_cut[] =
+{
+  { CODE_FOR_mcut, "__MCUT", FRV_BUILTIN_MCUT, UNKNOWN, 0},
+  { CODE_FOR_mcutss, "__MCUTSS", FRV_BUILTIN_MCUTSS, UNKNOWN, 0},
+  { CODE_FOR_mdcutssi, "__MDCUTSSI", FRV_BUILTIN_MDCUTSSI, UNKNOWN, 0}
+};
+
+/* Two-argument media intrinsics with an immediate second argument.  */
+
+static struct builtin_description bdesc_2argimm[] =
+{
+  { CODE_FOR_mrotli, "__MROTLI", FRV_BUILTIN_MROTLI, UNKNOWN, 0},
+  { CODE_FOR_mrotri, "__MROTRI", FRV_BUILTIN_MROTRI, UNKNOWN, 0},
+  { CODE_FOR_msllhi, "__MSLLHI", FRV_BUILTIN_MSLLHI, UNKNOWN, 0},
+  { CODE_FOR_msrlhi, "__MSRLHI", FRV_BUILTIN_MSRLHI, UNKNOWN, 0},
+  { CODE_FOR_msrahi, "__MSRAHI", FRV_BUILTIN_MSRAHI, UNKNOWN, 0},
+  { CODE_FOR_mexpdhw, "__MEXPDHW", FRV_BUILTIN_MEXPDHW, UNKNOWN, 0},
+  { CODE_FOR_mexpdhd, "__MEXPDHD", FRV_BUILTIN_MEXPDHD, UNKNOWN, 0},
+  { CODE_FOR_mdrotli, "__MDROTLI", FRV_BUILTIN_MDROTLI, UNKNOWN, 0},
+  { CODE_FOR_mcplhi, "__MCPLHI", FRV_BUILTIN_MCPLHI, UNKNOWN, 0},
+  { CODE_FOR_mcpli, "__MCPLI", FRV_BUILTIN_MCPLI, UNKNOWN, 0},
+  { CODE_FOR_mhsetlos, "__MHSETLOS", FRV_BUILTIN_MHSETLOS, UNKNOWN, 0},
+  { CODE_FOR_mhsetloh, "__MHSETLOH", FRV_BUILTIN_MHSETLOH, UNKNOWN, 0},
+  { CODE_FOR_mhsethis, "__MHSETHIS", FRV_BUILTIN_MHSETHIS, UNKNOWN, 0},
+  { CODE_FOR_mhsethih, "__MHSETHIH", FRV_BUILTIN_MHSETHIH, UNKNOWN, 0},
+  { CODE_FOR_mhdseth, "__MHDSETH", FRV_BUILTIN_MHDSETH, UNKNOWN, 0},
+  { CODE_FOR_mqsllhi, "__MQSLLHI", FRV_BUILTIN_MQSLLHI, UNKNOWN, 0},
+  { CODE_FOR_mqsrahi, "__MQSRAHI", FRV_BUILTIN_MQSRAHI, UNKNOWN, 0}
+};
+
+/* Media intrinsics that take two arguments and return void, the first argument
+   being a pointer to 4 words in memory.  */
+
+static struct builtin_description bdesc_void2arg[] =
+{
+  { CODE_FOR_mdunpackh, "__MDUNPACKH", FRV_BUILTIN_MDUNPACKH, UNKNOWN, 0},
+  { CODE_FOR_mbtohe, "__MBTOHE", FRV_BUILTIN_MBTOHE, UNKNOWN, 0},
+};
+
+/* Media intrinsics that take three arguments, the first being a const_int that
+   denotes an accumulator, and that return void.  */
+
+static struct builtin_description bdesc_void3arg[] =
+{
+  { CODE_FOR_mcpxrs, "__MCPXRS", FRV_BUILTIN_MCPXRS, UNKNOWN, 0},
+  { CODE_FOR_mcpxru, "__MCPXRU", FRV_BUILTIN_MCPXRU, UNKNOWN, 0},
+  { CODE_FOR_mcpxis, "__MCPXIS", FRV_BUILTIN_MCPXIS, UNKNOWN, 0},
+  { CODE_FOR_mcpxiu, "__MCPXIU", FRV_BUILTIN_MCPXIU, UNKNOWN, 0},
+  { CODE_FOR_mmulhs, "__MMULHS", FRV_BUILTIN_MMULHS, UNKNOWN, 0},
+  { CODE_FOR_mmulhu, "__MMULHU", FRV_BUILTIN_MMULHU, UNKNOWN, 0},
+  { CODE_FOR_mmulxhs, "__MMULXHS", FRV_BUILTIN_MMULXHS, UNKNOWN, 0},
+  { CODE_FOR_mmulxhu, "__MMULXHU", FRV_BUILTIN_MMULXHU, UNKNOWN, 0},
+  { CODE_FOR_mmachs, "__MMACHS", FRV_BUILTIN_MMACHS, UNKNOWN, 0},
+  { CODE_FOR_mmachu, "__MMACHU", FRV_BUILTIN_MMACHU, UNKNOWN, 0},
+  { CODE_FOR_mmrdhs, "__MMRDHS", FRV_BUILTIN_MMRDHS, UNKNOWN, 0},
+  { CODE_FOR_mmrdhu, "__MMRDHU", FRV_BUILTIN_MMRDHU, UNKNOWN, 0},
+  { CODE_FOR_mqcpxrs, "__MQCPXRS", FRV_BUILTIN_MQCPXRS, UNKNOWN, 0},
+  { CODE_FOR_mqcpxru, "__MQCPXRU", FRV_BUILTIN_MQCPXRU, UNKNOWN, 0},
+  { CODE_FOR_mqcpxis, "__MQCPXIS", FRV_BUILTIN_MQCPXIS, UNKNOWN, 0},
+  { CODE_FOR_mqcpxiu, "__MQCPXIU", FRV_BUILTIN_MQCPXIU, UNKNOWN, 0},
+  { CODE_FOR_mqmulhs, "__MQMULHS", FRV_BUILTIN_MQMULHS, UNKNOWN, 0},
+  { CODE_FOR_mqmulhu, "__MQMULHU", FRV_BUILTIN_MQMULHU, UNKNOWN, 0},
+  { CODE_FOR_mqmulxhs, "__MQMULXHS", FRV_BUILTIN_MQMULXHS, UNKNOWN, 0},
+  { CODE_FOR_mqmulxhu, "__MQMULXHU", FRV_BUILTIN_MQMULXHU, UNKNOWN, 0},
+  { CODE_FOR_mqmachs, "__MQMACHS", FRV_BUILTIN_MQMACHS, UNKNOWN, 0},
+  { CODE_FOR_mqmachu, "__MQMACHU", FRV_BUILTIN_MQMACHU, UNKNOWN, 0},
+  { CODE_FOR_mqxmachs, "__MQXMACHS", FRV_BUILTIN_MQXMACHS, UNKNOWN, 0},
+  { CODE_FOR_mqxmacxhs, "__MQXMACXHS", FRV_BUILTIN_MQXMACXHS, UNKNOWN, 0},
+  { CODE_FOR_mqmacxhs, "__MQMACXHS", FRV_BUILTIN_MQMACXHS, UNKNOWN, 0}
+};
+
+/* Media intrinsics that take two accumulator numbers as argument and
+   return void.  */
+
+static struct builtin_description bdesc_voidacc[] =
+{
+  { CODE_FOR_maddaccs, "__MADDACCS", FRV_BUILTIN_MADDACCS, UNKNOWN, 0},
+  { CODE_FOR_msubaccs, "__MSUBACCS", FRV_BUILTIN_MSUBACCS, UNKNOWN, 0},
+  { CODE_FOR_masaccs, "__MASACCS", FRV_BUILTIN_MASACCS, UNKNOWN, 0},
+  { CODE_FOR_mdaddaccs, "__MDADDACCS", FRV_BUILTIN_MDADDACCS, UNKNOWN, 0},
+  { CODE_FOR_mdsubaccs, "__MDSUBACCS", FRV_BUILTIN_MDSUBACCS, UNKNOWN, 0},
+  { CODE_FOR_mdasaccs, "__MDASACCS", FRV_BUILTIN_MDASACCS, UNKNOWN, 0}
+};
+
+/* Intrinsics that load a value and then issue a MEMBAR.  The load is
+   a normal move and the ICODE is for the membar.  */
+
+static struct builtin_description bdesc_loads[] =
+{
+  { CODE_FOR_optional_membar_qi, "__builtin_read8",
+    FRV_BUILTIN_READ8, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_hi, "__builtin_read16",
+    FRV_BUILTIN_READ16, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_si, "__builtin_read32",
+    FRV_BUILTIN_READ32, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_di, "__builtin_read64",
+    FRV_BUILTIN_READ64, UNKNOWN, 0}
+};
+
+/* Likewise stores.  */
+
+static struct builtin_description bdesc_stores[] =
+{
+  { CODE_FOR_optional_membar_qi, "__builtin_write8",
+    FRV_BUILTIN_WRITE8, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_hi, "__builtin_write16",
+    FRV_BUILTIN_WRITE16, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_si, "__builtin_write32",
+    FRV_BUILTIN_WRITE32, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_di, "__builtin_write64",
+    FRV_BUILTIN_WRITE64, UNKNOWN, 0},
+};
+
+/* Initialize media builtins.  */
+
+static void
+frv_init_builtins (void)
+{
+  tree accumulator = integer_type_node;
+  tree integer = integer_type_node;
+  tree voidt = void_type_node;
+  tree uhalf = short_unsigned_type_node;
+  tree sword1 = long_integer_type_node;
+  tree uword1 = long_unsigned_type_node;
+  tree sword2 = long_long_integer_type_node;
+  tree uword2 = long_long_unsigned_type_node;
+  tree uword4 = build_pointer_type (uword1);
+  tree vptr   = build_pointer_type (build_type_variant (void_type_node, 0, 1));
+  tree ubyte  = unsigned_char_type_node;
+  tree iacc   = integer_type_node;
+
+#define UNARY(RET, T1) \
+  build_function_type_list (RET, T1, NULL_TREE)
+
+#define BINARY(RET, T1, T2) \
+  build_function_type_list (RET, T1, T2, NULL_TREE)
+
+#define TRINARY(RET, T1, T2, T3) \
+  build_function_type_list (RET, T1, T2, T3, NULL_TREE)
+
+#define QUAD(RET, T1, T2, T3, T4) \
+  build_function_type_list (RET, T1, T2, T3, T4, NULL_TREE)
+
+  tree void_ftype_void = build_function_type_list (voidt, NULL_TREE);
+
+  tree void_ftype_acc = UNARY (voidt, accumulator);
+  tree void_ftype_uw4_uw1 = BINARY (voidt, uword4, uword1);
+  tree void_ftype_uw4_uw2 = BINARY (voidt, uword4, uword2);
+  tree void_ftype_acc_uw1 = BINARY (voidt, accumulator, uword1);
+  tree void_ftype_acc_acc = BINARY (voidt, accumulator, accumulator);
+  tree void_ftype_acc_uw1_uw1 = TRINARY (voidt, accumulator, uword1, uword1);
+  tree void_ftype_acc_sw1_sw1 = TRINARY (voidt, accumulator, sword1, sword1);
+  tree void_ftype_acc_uw2_uw2 = TRINARY (voidt, accumulator, uword2, uword2);
+  tree void_ftype_acc_sw2_sw2 = TRINARY (voidt, accumulator, sword2, sword2);
+
+  tree uw1_ftype_uw1 = UNARY (uword1, uword1);
+  tree uw1_ftype_sw1 = UNARY (uword1, sword1);
+  tree uw1_ftype_uw2 = UNARY (uword1, uword2);
+  tree uw1_ftype_acc = UNARY (uword1, accumulator);
+  tree uw1_ftype_uh_uh = BINARY (uword1, uhalf, uhalf);
+  tree uw1_ftype_uw1_uw1 = BINARY (uword1, uword1, uword1);
+  tree uw1_ftype_uw1_int = BINARY (uword1, uword1, integer);
+  tree uw1_ftype_acc_uw1 = BINARY (uword1, accumulator, uword1);
+  tree uw1_ftype_acc_sw1 = BINARY (uword1, accumulator, sword1);
+  tree uw1_ftype_uw2_uw1 = BINARY (uword1, uword2, uword1);
+  tree uw1_ftype_uw2_int = BINARY (uword1, uword2, integer);
+
+  tree sw1_ftype_int = UNARY (sword1, integer);
+  tree sw1_ftype_sw1_sw1 = BINARY (sword1, sword1, sword1);
+  tree sw1_ftype_sw1_int = BINARY (sword1, sword1, integer);
+
+  tree uw2_ftype_uw1 = UNARY (uword2, uword1);
+  tree uw2_ftype_uw1_int = BINARY (uword2, uword1, integer);
+  tree uw2_ftype_uw2_uw2 = BINARY (uword2, uword2, uword2);
+  tree uw2_ftype_uw2_int = BINARY (uword2, uword2, integer);
+  tree uw2_ftype_acc_int = BINARY (uword2, accumulator, integer);
+  tree uw2_ftype_uh_uh_uh_uh = QUAD (uword2, uhalf, uhalf, uhalf, uhalf);
+
+  tree sw2_ftype_sw2_sw2 = BINARY (sword2, sword2, sword2);
+  tree sw2_ftype_sw2_int   = BINARY (sword2, sword2, integer);
+  tree uw2_ftype_uw1_uw1   = BINARY (uword2, uword1, uword1);
+  tree sw2_ftype_sw1_sw1   = BINARY (sword2, sword1, sword1);
+  tree void_ftype_sw1_sw1  = BINARY (voidt, sword1, sword1);
+  tree void_ftype_iacc_sw2 = BINARY (voidt, iacc, sword2);
+  tree void_ftype_iacc_sw1 = BINARY (voidt, iacc, sword1);
+  tree sw1_ftype_sw1       = UNARY (sword1, sword1);
+  tree sw2_ftype_iacc      = UNARY (sword2, iacc);
+  tree sw1_ftype_iacc      = UNARY (sword1, iacc);
+  tree void_ftype_ptr      = UNARY (voidt, const_ptr_type_node);
+  tree uw1_ftype_vptr      = UNARY (uword1, vptr);
+  tree uw2_ftype_vptr      = UNARY (uword2, vptr);
+  tree void_ftype_vptr_ub  = BINARY (voidt, vptr, ubyte);
+  tree void_ftype_vptr_uh  = BINARY (voidt, vptr, uhalf);
+  tree void_ftype_vptr_uw1 = BINARY (voidt, vptr, uword1);
+  tree void_ftype_vptr_uw2 = BINARY (voidt, vptr, uword2);
+
+  def_builtin ("__MAND", uw1_ftype_uw1_uw1, FRV_BUILTIN_MAND);
+  def_builtin ("__MOR", uw1_ftype_uw1_uw1, FRV_BUILTIN_MOR);
+  def_builtin ("__MXOR", uw1_ftype_uw1_uw1, FRV_BUILTIN_MXOR);
+  def_builtin ("__MNOT", uw1_ftype_uw1, FRV_BUILTIN_MNOT);
+  def_builtin ("__MROTLI", uw1_ftype_uw1_int, FRV_BUILTIN_MROTLI);
+  def_builtin ("__MROTRI", uw1_ftype_uw1_int, FRV_BUILTIN_MROTRI);
+  def_builtin ("__MWCUT", uw1_ftype_uw2_uw1, FRV_BUILTIN_MWCUT);
+  def_builtin ("__MAVEH", uw1_ftype_uw1_uw1, FRV_BUILTIN_MAVEH);
+  def_builtin ("__MSLLHI", uw1_ftype_uw1_int, FRV_BUILTIN_MSLLHI);
+  def_builtin ("__MSRLHI", uw1_ftype_uw1_int, FRV_BUILTIN_MSRLHI);
+  def_builtin ("__MSRAHI", sw1_ftype_sw1_int, FRV_BUILTIN_MSRAHI);
+  def_builtin ("__MSATHS", sw1_ftype_sw1_sw1, FRV_BUILTIN_MSATHS);
+  def_builtin ("__MSATHU", uw1_ftype_uw1_uw1, FRV_BUILTIN_MSATHU);
+  def_builtin ("__MADDHSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_MADDHSS);
+  def_builtin ("__MADDHUS", uw1_ftype_uw1_uw1, FRV_BUILTIN_MADDHUS);
+  def_builtin ("__MSUBHSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_MSUBHSS);
+  def_builtin ("__MSUBHUS", uw1_ftype_uw1_uw1, FRV_BUILTIN_MSUBHUS);
+  def_builtin ("__MMULHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMULHS);
+  def_builtin ("__MMULHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMULHU);
+  def_builtin ("__MMULXHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMULXHS);
+  def_builtin ("__MMULXHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMULXHU);
+  def_builtin ("__MMACHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMACHS);
+  def_builtin ("__MMACHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMACHU);
+  def_builtin ("__MMRDHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMRDHS);
+  def_builtin ("__MMRDHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMRDHU);
+  def_builtin ("__MQADDHSS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQADDHSS);
+  def_builtin ("__MQADDHUS", uw2_ftype_uw2_uw2, FRV_BUILTIN_MQADDHUS);
+  def_builtin ("__MQSUBHSS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQSUBHSS);
+  def_builtin ("__MQSUBHUS", uw2_ftype_uw2_uw2, FRV_BUILTIN_MQSUBHUS);
+  def_builtin ("__MQMULHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMULHS);
+  def_builtin ("__MQMULHU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQMULHU);
+  def_builtin ("__MQMULXHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMULXHS);
+  def_builtin ("__MQMULXHU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQMULXHU);
+  def_builtin ("__MQMACHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMACHS);
+  def_builtin ("__MQMACHU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQMACHU);
+  def_builtin ("__MCPXRS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MCPXRS);
+  def_builtin ("__MCPXRU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MCPXRU);
+  def_builtin ("__MCPXIS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MCPXIS);
+  def_builtin ("__MCPXIU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MCPXIU);
+  def_builtin ("__MQCPXRS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQCPXRS);
+  def_builtin ("__MQCPXRU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQCPXRU);
+  def_builtin ("__MQCPXIS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQCPXIS);
+  def_builtin ("__MQCPXIU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQCPXIU);
+  def_builtin ("__MCUT", uw1_ftype_acc_uw1, FRV_BUILTIN_MCUT);
+  def_builtin ("__MCUTSS", uw1_ftype_acc_sw1, FRV_BUILTIN_MCUTSS);
+  def_builtin ("__MEXPDHW", uw1_ftype_uw1_int, FRV_BUILTIN_MEXPDHW);
+  def_builtin ("__MEXPDHD", uw2_ftype_uw1_int, FRV_BUILTIN_MEXPDHD);
+  def_builtin ("__MPACKH", uw1_ftype_uh_uh, FRV_BUILTIN_MPACKH);
+  def_builtin ("__MUNPACKH", uw2_ftype_uw1, FRV_BUILTIN_MUNPACKH);
+  def_builtin ("__MDPACKH", uw2_ftype_uh_uh_uh_uh, FRV_BUILTIN_MDPACKH);
+  def_builtin ("__MDUNPACKH", void_ftype_uw4_uw2, FRV_BUILTIN_MDUNPACKH);
+  def_builtin ("__MBTOH", uw2_ftype_uw1, FRV_BUILTIN_MBTOH);
+  def_builtin ("__MHTOB", uw1_ftype_uw2, FRV_BUILTIN_MHTOB);
+  def_builtin ("__MBTOHE", void_ftype_uw4_uw1, FRV_BUILTIN_MBTOHE);
+  def_builtin ("__MCLRACC", void_ftype_acc, FRV_BUILTIN_MCLRACC);
+  def_builtin ("__MCLRACCA", void_ftype_void, FRV_BUILTIN_MCLRACCA);
+  def_builtin ("__MRDACC", uw1_ftype_acc, FRV_BUILTIN_MRDACC);
+  def_builtin ("__MRDACCG", uw1_ftype_acc, FRV_BUILTIN_MRDACCG);
+  def_builtin ("__MWTACC", void_ftype_acc_uw1, FRV_BUILTIN_MWTACC);
+  def_builtin ("__MWTACCG", void_ftype_acc_uw1, FRV_BUILTIN_MWTACCG);
+  def_builtin ("__Mcop1", uw1_ftype_uw1_uw1, FRV_BUILTIN_MCOP1);
+  def_builtin ("__Mcop2", uw1_ftype_uw1_uw1, FRV_BUILTIN_MCOP2);
+  def_builtin ("__MTRAP", void_ftype_void, FRV_BUILTIN_MTRAP);
+  def_builtin ("__MQXMACHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQXMACHS);
+  def_builtin ("__MQXMACXHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQXMACXHS);
+  def_builtin ("__MQMACXHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMACXHS);
+  def_builtin ("__MADDACCS", void_ftype_acc_acc, FRV_BUILTIN_MADDACCS);
+  def_builtin ("__MSUBACCS", void_ftype_acc_acc, FRV_BUILTIN_MSUBACCS);
+  def_builtin ("__MASACCS", void_ftype_acc_acc, FRV_BUILTIN_MASACCS);
+  def_builtin ("__MDADDACCS", void_ftype_acc_acc, FRV_BUILTIN_MDADDACCS);
+  def_builtin ("__MDSUBACCS", void_ftype_acc_acc, FRV_BUILTIN_MDSUBACCS);
+  def_builtin ("__MDASACCS", void_ftype_acc_acc, FRV_BUILTIN_MDASACCS);
+  def_builtin ("__MABSHS", uw1_ftype_sw1, FRV_BUILTIN_MABSHS);
+  def_builtin ("__MDROTLI", uw2_ftype_uw2_int, FRV_BUILTIN_MDROTLI);
+  def_builtin ("__MCPLHI", uw1_ftype_uw2_int, FRV_BUILTIN_MCPLHI);
+  def_builtin ("__MCPLI", uw1_ftype_uw2_int, FRV_BUILTIN_MCPLI);
+  def_builtin ("__MDCUTSSI", uw2_ftype_acc_int, FRV_BUILTIN_MDCUTSSI);
+  def_builtin ("__MQSATHS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQSATHS);
+  def_builtin ("__MHSETLOS", sw1_ftype_sw1_int, FRV_BUILTIN_MHSETLOS);
+  def_builtin ("__MHSETHIS", sw1_ftype_sw1_int, FRV_BUILTIN_MHSETHIS);
+  def_builtin ("__MHDSETS", sw1_ftype_int, FRV_BUILTIN_MHDSETS);
+  def_builtin ("__MHSETLOH", uw1_ftype_uw1_int, FRV_BUILTIN_MHSETLOH);
+  def_builtin ("__MHSETHIH", uw1_ftype_uw1_int, FRV_BUILTIN_MHSETHIH);
+  def_builtin ("__MHDSETH", uw1_ftype_uw1_int, FRV_BUILTIN_MHDSETH);
+  def_builtin ("__MQLCLRHS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQLCLRHS);
+  def_builtin ("__MQLMTHS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQLMTHS);
+  def_builtin ("__MQSLLHI", uw2_ftype_uw2_int, FRV_BUILTIN_MQSLLHI);
+  def_builtin ("__MQSRAHI", sw2_ftype_sw2_int, FRV_BUILTIN_MQSRAHI);
+  def_builtin ("__SMUL", sw2_ftype_sw1_sw1, FRV_BUILTIN_SMUL);
+  def_builtin ("__UMUL", uw2_ftype_uw1_uw1, FRV_BUILTIN_UMUL);
+  def_builtin ("__SMASS", void_ftype_sw1_sw1, FRV_BUILTIN_SMASS);
+  def_builtin ("__SMSSS", void_ftype_sw1_sw1, FRV_BUILTIN_SMSSS);
+  def_builtin ("__SMU", void_ftype_sw1_sw1, FRV_BUILTIN_SMU);
+  def_builtin ("__ADDSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_ADDSS);
+  def_builtin ("__SUBSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_SUBSS);
+  def_builtin ("__SLASS", sw1_ftype_sw1_sw1, FRV_BUILTIN_SLASS);
+  def_builtin ("__SCAN", sw1_ftype_sw1_sw1, FRV_BUILTIN_SCAN);
+  def_builtin ("__SCUTSS", sw1_ftype_sw1, FRV_BUILTIN_SCUTSS);
+  def_builtin ("__IACCreadll", sw2_ftype_iacc, FRV_BUILTIN_IACCreadll);
+  def_builtin ("__IACCreadl", sw1_ftype_iacc, FRV_BUILTIN_IACCreadl);
+  def_builtin ("__IACCsetll", void_ftype_iacc_sw2, FRV_BUILTIN_IACCsetll);
+  def_builtin ("__IACCsetl", void_ftype_iacc_sw1, FRV_BUILTIN_IACCsetl);
+  def_builtin ("__data_prefetch0", void_ftype_ptr, FRV_BUILTIN_PREFETCH0);
+  def_builtin ("__data_prefetch", void_ftype_ptr, FRV_BUILTIN_PREFETCH);
+  def_builtin ("__builtin_read8", uw1_ftype_vptr, FRV_BUILTIN_READ8);
+  def_builtin ("__builtin_read16", uw1_ftype_vptr, FRV_BUILTIN_READ16);
+  def_builtin ("__builtin_read32", uw1_ftype_vptr, FRV_BUILTIN_READ32);
+  def_builtin ("__builtin_read64", uw2_ftype_vptr, FRV_BUILTIN_READ64);
+
+  def_builtin ("__builtin_write8", void_ftype_vptr_ub, FRV_BUILTIN_WRITE8);
+  def_builtin ("__builtin_write16", void_ftype_vptr_uh, FRV_BUILTIN_WRITE16);
+  def_builtin ("__builtin_write32", void_ftype_vptr_uw1, FRV_BUILTIN_WRITE32);
+  def_builtin ("__builtin_write64", void_ftype_vptr_uw2, FRV_BUILTIN_WRITE64);
+
+#undef UNARY
+#undef BINARY
+#undef TRINARY
+#undef QUAD
+}
+
+/* Set the names for various arithmetic operations according to the
+   FRV ABI.  */
+static void
+frv_init_libfuncs (void)
+{
+  set_optab_libfunc (smod_optab,     SImode, "__modi");
+  set_optab_libfunc (umod_optab,     SImode, "__umodi");
+
+  set_optab_libfunc (add_optab,      DImode, "__addll");
+  set_optab_libfunc (sub_optab,      DImode, "__subll");
+  set_optab_libfunc (smul_optab,     DImode, "__mulll");
+  set_optab_libfunc (sdiv_optab,     DImode, "__divll");
+  set_optab_libfunc (smod_optab,     DImode, "__modll");
+  set_optab_libfunc (umod_optab,     DImode, "__umodll");
+  set_optab_libfunc (and_optab,      DImode, "__andll");
+  set_optab_libfunc (ior_optab,      DImode, "__orll");
+  set_optab_libfunc (xor_optab,      DImode, "__xorll");
+  set_optab_libfunc (one_cmpl_optab, DImode, "__notll");
+
+  set_optab_libfunc (add_optab,      SFmode, "__addf");
+  set_optab_libfunc (sub_optab,      SFmode, "__subf");
+  set_optab_libfunc (smul_optab,     SFmode, "__mulf");
+  set_optab_libfunc (sdiv_optab,     SFmode, "__divf");
+
+  set_optab_libfunc (add_optab,      DFmode, "__addd");
+  set_optab_libfunc (sub_optab,      DFmode, "__subd");
+  set_optab_libfunc (smul_optab,     DFmode, "__muld");
+  set_optab_libfunc (sdiv_optab,     DFmode, "__divd");
+
+  set_conv_libfunc (sext_optab,   DFmode, SFmode, "__ftod");
+  set_conv_libfunc (trunc_optab,  SFmode, DFmode, "__dtof");
+
+  set_conv_libfunc (sfix_optab,   SImode, SFmode, "__ftoi");
+  set_conv_libfunc (sfix_optab,   DImode, SFmode, "__ftoll");
+  set_conv_libfunc (sfix_optab,   SImode, DFmode, "__dtoi");
+  set_conv_libfunc (sfix_optab,   DImode, DFmode, "__dtoll");
+
+  set_conv_libfunc (ufix_optab,   SImode, SFmode, "__ftoui");
+  set_conv_libfunc (ufix_optab,   DImode, SFmode, "__ftoull");
+  set_conv_libfunc (ufix_optab,   SImode, DFmode, "__dtoui");
+  set_conv_libfunc (ufix_optab,   DImode, DFmode, "__dtoull");
+
+  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__itof");
+  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__lltof");
+  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__itod");
+  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__lltod");
+}
+
+/* Convert an integer constant to an accumulator register.  ICODE is the
+   code of the target instruction, OPNUM is the number of the
+   accumulator operand and OPVAL is the constant integer.  Try both
+   ACC and ACCG registers; only report an error if neither fit the
+   instruction.  */
+
+static rtx
+frv_int_to_acc (enum insn_code icode, int opnum, rtx opval)
+{
+  rtx reg;
+  int i;
+
+  /* ACCs and ACCGs are implicit global registers if media intrinsics
+     are being used.  We set up this lazily to avoid creating lots of
+     unnecessary call_insn rtl in non-media code.  */
+  for (i = 0; i <= ACC_MASK; i++)
+    if ((i & ACC_MASK) == i)
+      global_regs[i + ACC_FIRST] = global_regs[i + ACCG_FIRST] = 1;
+
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("accumulator is not a constant integer");
+      return NULL_RTX;
+    }
+  if ((INTVAL (opval) & ~ACC_MASK) != 0)
+    {
+      error ("accumulator number is out of bounds");
+      return NULL_RTX;
+    }
+
+  reg = gen_rtx_REG (insn_data[icode].operand[opnum].mode,
+		     ACC_FIRST + INTVAL (opval));
+  if (! (*insn_data[icode].operand[opnum].predicate) (reg, VOIDmode))
+    SET_REGNO (reg, ACCG_FIRST + INTVAL (opval));
+
+  if (! (*insn_data[icode].operand[opnum].predicate) (reg, VOIDmode))
+    {
+      error ("inappropriate accumulator for %qs", insn_data[icode].name);
+      return NULL_RTX;
+    }
+  return reg;
+}
+
+/* If an ACC rtx has mode MODE, return the mode that the matching ACCG
+   should have.  */
+
+static enum machine_mode
+frv_matching_accg_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V4SImode:
+      return V4QImode;
+
+    case DImode:
+      return HImode;
+
+    case SImode:
+      return QImode;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Given that a __builtin_read or __builtin_write function is accessing
+   address ADDRESS, return the value that should be used as operand 1
+   of the membar.  */
+
+static rtx
+frv_io_address_cookie (rtx address)
+{
+  return (GET_CODE (address) == CONST_INT
+	  ? GEN_INT (INTVAL (address) / 8 * 8)
+	  : const0_rtx);
+}
+
+/* Return the accumulator guard that should be paired with accumulator
+   register ACC.  The mode of the returned register is in the same
+   class as ACC, but is four times smaller.  */
+
+rtx
+frv_matching_accg_for_acc (rtx acc)
+{
+  return gen_rtx_REG (frv_matching_accg_mode (GET_MODE (acc)),
+		      REGNO (acc) - ACC_FIRST + ACCG_FIRST);
+}
+
+/* Read the requested argument from the call EXP given by INDEX.
+   Return the value as an rtx.  */
+
+static rtx
+frv_read_argument (tree exp, unsigned int index)
+{
+  return expand_normal (CALL_EXPR_ARG (exp, index));
+}
+
+/* Like frv_read_argument, but interpret the argument as the number
+   of an IACC register and return a (reg:MODE ...) rtx for it.  */
+
+static rtx
+frv_read_iacc_argument (enum machine_mode mode, tree call,
+			unsigned int index)
+{
+  int i, regno;
+  rtx op;
+
+  op = frv_read_argument (call, index);
+  if (GET_CODE (op) != CONST_INT
+      || INTVAL (op) < 0
+      || INTVAL (op) > IACC_LAST - IACC_FIRST
+      || ((INTVAL (op) * 4) & (GET_MODE_SIZE (mode) - 1)) != 0)
+    {
+      error ("invalid IACC argument");
+      op = const0_rtx;
+    }
+
+  /* IACCs are implicit global registers.  We set up this lazily to
+     avoid creating lots of unnecessary call_insn rtl when IACCs aren't
+     being used.  */
+  regno = INTVAL (op) + IACC_FIRST;
+  for (i = 0; i < HARD_REGNO_NREGS (regno, mode); i++)
+    global_regs[regno + i] = 1;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
+   The instruction should require a constant operand of some sort.  The
+   function prints an error if OPVAL is not valid.  */
+
+static int
+frv_check_constant_argument (enum insn_code icode, int opnum, rtx opval)
+{
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("%qs expects a constant argument", insn_data[icode].name);
+      return FALSE;
+    }
+  if (! (*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
+    {
+      error ("constant argument out of range for %qs", insn_data[icode].name);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/* Return a legitimate rtx for instruction ICODE's return value.  Use TARGET
+   if it's not null, has the right mode, and satisfies operand 0's
+   predicate.  */
+
+static rtx
+frv_legitimize_target (enum insn_code icode, rtx target)
+{
+  enum machine_mode mode = insn_data[icode].operand[0].mode;
+
+  if (! target
+      || GET_MODE (target) != mode
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode))
+    return gen_reg_rtx (mode);
+  else
+    return target;
+}
+
+/* Given that ARG is being passed as operand OPNUM to instruction ICODE,
+   check whether ARG satisfies the operand's constraints.  If it doesn't,
+   copy ARG to a temporary register and return that.  Otherwise return ARG
+   itself.  */
+
+static rtx
+frv_legitimize_argument (enum insn_code icode, int opnum, rtx arg)
+{
+  enum machine_mode mode = insn_data[icode].operand[opnum].mode;
+
+  if ((*insn_data[icode].operand[opnum].predicate) (arg, mode))
+    return arg;
+  else
+    return copy_to_mode_reg (mode, arg);
+}
+
+/* Return a volatile memory reference of mode MODE whose address is ARG.  */
+
+static rtx
+frv_volatile_memref (enum machine_mode mode, rtx arg)
+{
+  rtx mem;
+
+  mem = gen_rtx_MEM (mode, memory_address (mode, arg));
+  MEM_VOLATILE_P (mem) = 1;
+  return mem;
+}
+
+/* Expand builtins that take a single, constant argument.  At the moment,
+   only MHDSETS falls into this category.  */
+
+static rtx
+frv_expand_set_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  if (! frv_check_constant_argument (icode, 1, op0))
+    return NULL_RTX;
+
+  target = frv_legitimize_target (icode, target);
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take one operand.  */
+
+static rtx
+frv_expand_unop_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands.  */
+
+static rtx
+frv_expand_binop_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  op1 = frv_legitimize_argument (icode, 2, op1);
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand cut-style builtins, which take two operands and an implicit ACCG
+   one.  */
+
+static rtx
+frv_expand_cut_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx op2;
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_int_to_acc (icode, 1, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  if (icode == CODE_FOR_mdcutssi || GET_CODE (op1) == CONST_INT)
+    {
+      if (! frv_check_constant_argument (icode, 2, op1))
+    	return NULL_RTX;
+    }
+  else
+    op1 = frv_legitimize_argument (icode, 2, op1);
+
+  op2 = frv_matching_accg_for_acc (op0);
+  pat = GEN_FCN (icode) (target, op0, op1, op2);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands and the second is immediate.  */
+
+static rtx
+frv_expand_binopimm_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  if (! frv_check_constant_argument (icode, 2, op1))
+    return NULL_RTX;
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands, the first operand being a pointer to
+   ints and return void.  */
+
+static rtx
+frv_expand_voidbinop_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+  rtx addr;
+
+  if (GET_CODE (op0) != MEM)
+    {
+      rtx reg = op0;
+
+      if (! offsettable_address_p (0, mode0, op0))
+	{
+	  reg = gen_reg_rtx (Pmode);
+	  emit_insn (gen_rtx_SET (VOIDmode, reg, op0));
+	}
+
+      op0 = gen_rtx_MEM (SImode, reg);
+    }
+
+  addr = XEXP (op0, 0);
+  if (! offsettable_address_p (0, mode0, addr))
+    addr = copy_to_mode_reg (Pmode, op0);
+
+  op0 = change_address (op0, V4SImode, addr);
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  pat = GEN_FCN (icode) (op0, op1);
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return 0;
+}
+
+/* Expand builtins that take two long operands and return void.  */
+
+static rtx
+frv_expand_int_void2arg (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  pat = GEN_FCN (icode) (op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Expand prefetch builtins.  These take a single address as argument.  */
+
+static rtx
+frv_expand_prefetches (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  pat = GEN_FCN (icode) (force_reg (Pmode, op0));
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return 0;
+}
+
+/* Expand builtins that take three operands and return void.  The first
+   argument must be a constant that describes a pair or quad accumulators.  A
+   fourth argument is created that is the accumulator guard register that
+   corresponds to the accumulator.  */
+
+static rtx
+frv_expand_voidtriop_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx op2 = frv_read_argument (call, 2);
+  rtx op3;
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  op2 = frv_legitimize_argument (icode, 2, op2);
+  op3 = frv_matching_accg_for_acc (op0);
+  pat = GEN_FCN (icode) (op0, op1, op2, op3);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Expand builtins that perform accumulator-to-accumulator operations.
+   These builtins take two accumulator numbers as argument and return
+   void.  */
+
+static rtx
+frv_expand_voidaccop_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx op2;
+  rtx op3;
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  op1 = frv_int_to_acc (icode, 1, op1);
+  if (! op1)
+    return NULL_RTX;
+
+  op2 = frv_matching_accg_for_acc (op0);
+  op3 = frv_matching_accg_for_acc (op1);
+  pat = GEN_FCN (icode) (op0, op1, op2, op3);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Expand a __builtin_read* function.  ICODE is the instruction code for the
+   membar and TARGET_MODE is the mode that the loaded value should have.  */
+
+static rtx
+frv_expand_load_builtin (enum insn_code icode, enum machine_mode target_mode,
+                         tree call, rtx target)
+{
+  rtx op0 = frv_read_argument (call, 0);
+  rtx cookie = frv_io_address_cookie (op0);
+
+  if (target == 0 || !REG_P (target))
+    target = gen_reg_rtx (target_mode);
+  op0 = frv_volatile_memref (insn_data[icode].operand[0].mode, op0);
+  convert_move (target, op0, 1);
+  emit_insn (GEN_FCN (icode) (copy_rtx (op0), cookie, GEN_INT (FRV_IO_READ)));
+  cfun->machine->has_membar_p = 1;
+  return target;
+}
+
+/* Likewise __builtin_write* functions.  */
+
+static rtx
+frv_expand_store_builtin (enum insn_code icode, tree call)
+{
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx cookie = frv_io_address_cookie (op0);
+
+  op0 = frv_volatile_memref (insn_data[icode].operand[0].mode, op0);
+  convert_move (op0, force_reg (insn_data[icode].operand[0].mode, op1), 1);
+  emit_insn (GEN_FCN (icode) (copy_rtx (op0), cookie, GEN_INT (FRV_IO_WRITE)));
+  cfun->machine->has_membar_p = 1;
+  return NULL_RTX;
+}
+
+/* Expand the MDPACKH builtin.  It takes four unsigned short arguments and
+   each argument forms one word of the two double-word input registers.
+   CALL is the tree for the call and TARGET, if nonnull, suggests a good place
+   to put the return value.  */
+
+static rtx
+frv_expand_mdpackh_builtin (tree call, rtx target)
+{
+  enum insn_code icode = CODE_FOR_mdpackh;
+  rtx pat, op0, op1;
+  rtx arg1 = frv_read_argument (call, 0);
+  rtx arg2 = frv_read_argument (call, 1);
+  rtx arg3 = frv_read_argument (call, 2);
+  rtx arg4 = frv_read_argument (call, 3);
+
+  target = frv_legitimize_target (icode, target);
+  op0 = gen_reg_rtx (DImode);
+  op1 = gen_reg_rtx (DImode);
+
+  /* The high half of each word is not explicitly initialized, so indicate
+     that the input operands are not live before this point.  */
+  emit_clobber (op0);
+  emit_clobber (op1);
+
+  /* Move each argument into the low half of its associated input word.  */
+  emit_move_insn (simplify_gen_subreg (HImode, op0, DImode, 2), arg1);
+  emit_move_insn (simplify_gen_subreg (HImode, op0, DImode, 6), arg2);
+  emit_move_insn (simplify_gen_subreg (HImode, op1, DImode, 2), arg3);
+  emit_move_insn (simplify_gen_subreg (HImode, op1, DImode, 6), arg4);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand the MCLRACC builtin.  This builtin takes a single accumulator
+   number as argument.  */
+
+static rtx
+frv_expand_mclracc_builtin (tree call)
+{
+  enum insn_code icode = CODE_FOR_mclracc;
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  pat = GEN_FCN (icode) (op0);
+  if (pat)
+    emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+/* Expand builtins that take no arguments.  */
+
+static rtx
+frv_expand_noargs_builtin (enum insn_code icode)
+{
+  rtx pat = GEN_FCN (icode) (const0_rtx);
+  if (pat)
+    emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+/* Expand MRDACC and MRDACCG.  These builtins take a single accumulator
+   number or accumulator guard number as argument and return an SI integer.  */
+
+static rtx
+frv_expand_mrdacc_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx target = gen_reg_rtx (SImode);
+  rtx op0 = frv_read_argument (call, 0);
+
+  op0 = frv_int_to_acc (icode, 1, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand MWTACC and MWTACCG.  These builtins take an accumulator or
+   accumulator guard as their first argument and an SImode value as their
+   second.  */
+
+static rtx
+frv_expand_mwtacc_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  pat = GEN_FCN (icode) (op0, op1);
+  if (pat)
+    emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+/* Emit a move from SRC to DEST in SImode chunks.  This can be used
+   to move DImode values into and out of IACC0.  */
+
+static void
+frv_split_iacc_move (rtx dest, rtx src)
+{
+  enum machine_mode inner;
+  int i;
+
+  inner = GET_MODE (dest);
+  for (i = 0; i < GET_MODE_SIZE (inner); i += GET_MODE_SIZE (SImode))
+    emit_move_insn (simplify_gen_subreg (SImode, dest, inner, i),
+		    simplify_gen_subreg (SImode, src, inner, i));
+}
+
+/* Expand builtins.  */
+
+static rtx
+frv_expand_builtin (tree exp,
+                    rtx target,
+                    rtx subtarget ATTRIBUTE_UNUSED,
+                    enum machine_mode mode ATTRIBUTE_UNUSED,
+                    int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned fcode = (unsigned)DECL_FUNCTION_CODE (fndecl);
+  unsigned i;
+  struct builtin_description *d;
+
+  if (fcode < FRV_BUILTIN_FIRST_NONMEDIA && !TARGET_MEDIA)
+    {
+      error ("media functions are not available unless -mmedia is used");
+      return NULL_RTX;
+    }
+
+  switch (fcode)
+    {
+    case FRV_BUILTIN_MCOP1:
+    case FRV_BUILTIN_MCOP2:
+    case FRV_BUILTIN_MDUNPACKH:
+    case FRV_BUILTIN_MBTOHE:
+      if (! TARGET_MEDIA_REV1)
+	{
+	  error ("this media function is only available on the fr500");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_MQXMACHS:
+    case FRV_BUILTIN_MQXMACXHS:
+    case FRV_BUILTIN_MQMACXHS:
+    case FRV_BUILTIN_MADDACCS:
+    case FRV_BUILTIN_MSUBACCS:
+    case FRV_BUILTIN_MASACCS:
+    case FRV_BUILTIN_MDADDACCS:
+    case FRV_BUILTIN_MDSUBACCS:
+    case FRV_BUILTIN_MDASACCS:
+    case FRV_BUILTIN_MABSHS:
+    case FRV_BUILTIN_MDROTLI:
+    case FRV_BUILTIN_MCPLHI:
+    case FRV_BUILTIN_MCPLI:
+    case FRV_BUILTIN_MDCUTSSI:
+    case FRV_BUILTIN_MQSATHS:
+    case FRV_BUILTIN_MHSETLOS:
+    case FRV_BUILTIN_MHSETLOH:
+    case FRV_BUILTIN_MHSETHIS:
+    case FRV_BUILTIN_MHSETHIH:
+    case FRV_BUILTIN_MHDSETS:
+    case FRV_BUILTIN_MHDSETH:
+      if (! TARGET_MEDIA_REV2)
+	{
+	  error ("this media function is only available on the fr400"
+		 " and fr550");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_SMASS:
+    case FRV_BUILTIN_SMSSS:
+    case FRV_BUILTIN_SMU:
+    case FRV_BUILTIN_ADDSS:
+    case FRV_BUILTIN_SUBSS:
+    case FRV_BUILTIN_SLASS:
+    case FRV_BUILTIN_SCUTSS:
+    case FRV_BUILTIN_IACCreadll:
+    case FRV_BUILTIN_IACCreadl:
+    case FRV_BUILTIN_IACCsetll:
+    case FRV_BUILTIN_IACCsetl:
+      if (!TARGET_FR405_BUILTINS)
+	{
+	  error ("this builtin function is only available"
+		 " on the fr405 and fr450");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_PREFETCH:
+      if (!TARGET_FR500_FR550_BUILTINS)
+	{
+	  error ("this builtin function is only available on the fr500"
+		 " and fr550");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_MQLCLRHS:
+    case FRV_BUILTIN_MQLMTHS:
+    case FRV_BUILTIN_MQSLLHI:
+    case FRV_BUILTIN_MQSRAHI:
+      if (!TARGET_MEDIA_FR450)
+	{
+	  error ("this builtin function is only available on the fr450");
+	  return NULL_RTX;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  /* Expand unique builtins.  */
+
+  switch (fcode)
+    {
+    case FRV_BUILTIN_MTRAP:
+      return frv_expand_noargs_builtin (CODE_FOR_mtrap);
+
+    case FRV_BUILTIN_MCLRACC:
+      return frv_expand_mclracc_builtin (exp);
+
+    case FRV_BUILTIN_MCLRACCA:
+      if (TARGET_ACC_8)
+	return frv_expand_noargs_builtin (CODE_FOR_mclracca8);
+      else
+	return frv_expand_noargs_builtin (CODE_FOR_mclracca4);
+
+    case FRV_BUILTIN_MRDACC:
+      return frv_expand_mrdacc_builtin (CODE_FOR_mrdacc, exp);
+
+    case FRV_BUILTIN_MRDACCG:
+      return frv_expand_mrdacc_builtin (CODE_FOR_mrdaccg, exp);
+
+    case FRV_BUILTIN_MWTACC:
+      return frv_expand_mwtacc_builtin (CODE_FOR_mwtacc, exp);
+
+    case FRV_BUILTIN_MWTACCG:
+      return frv_expand_mwtacc_builtin (CODE_FOR_mwtaccg, exp);
+
+    case FRV_BUILTIN_MDPACKH:
+      return frv_expand_mdpackh_builtin (exp, target);
+
+    case FRV_BUILTIN_IACCreadll:
+      {
+	rtx src = frv_read_iacc_argument (DImode, exp, 0);
+	if (target == 0 || !REG_P (target))
+	  target = gen_reg_rtx (DImode);
+	frv_split_iacc_move (target, src);
+	return target;
+      }
+
+    case FRV_BUILTIN_IACCreadl:
+      return frv_read_iacc_argument (SImode, exp, 0);
+
+    case FRV_BUILTIN_IACCsetll:
+      {
+	rtx dest = frv_read_iacc_argument (DImode, exp, 0);
+	rtx src = frv_read_argument (exp, 1);
+	frv_split_iacc_move (dest, force_reg (DImode, src));
+	return 0;
+      }
+
+    case FRV_BUILTIN_IACCsetl:
+      {
+	rtx dest = frv_read_iacc_argument (SImode, exp, 0);
+	rtx src = frv_read_argument (exp, 1);
+	emit_move_insn (dest, force_reg (SImode, src));
+	return 0;
+      }
+
+    default:
+      break;
+    }
+
+  /* Expand groups of builtins.  */
+
+  for (i = 0, d = bdesc_set; i < ARRAY_SIZE (bdesc_set); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_set_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_unop_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_binop_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_cut; i < ARRAY_SIZE (bdesc_cut); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_cut_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_2argimm; i < ARRAY_SIZE (bdesc_2argimm); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_binopimm_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_void2arg; i < ARRAY_SIZE (bdesc_void2arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_voidbinop_builtin (d->icode, exp);
+
+  for (i = 0, d = bdesc_void3arg; i < ARRAY_SIZE (bdesc_void3arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_voidtriop_builtin (d->icode, exp);
+
+  for (i = 0, d = bdesc_voidacc; i < ARRAY_SIZE (bdesc_voidacc); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_voidaccop_builtin (d->icode, exp);
+
+  for (i = 0, d = bdesc_int_void2arg;
+       i < ARRAY_SIZE (bdesc_int_void2arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_int_void2arg (d->icode, exp);
+
+  for (i = 0, d = bdesc_prefetches;
+       i < ARRAY_SIZE (bdesc_prefetches); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_prefetches (d->icode, exp);
+
+  for (i = 0, d = bdesc_loads; i < ARRAY_SIZE (bdesc_loads); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_load_builtin (d->icode, TYPE_MODE (TREE_TYPE (exp)),
+				      exp, target);
+
+  for (i = 0, d = bdesc_stores; i < ARRAY_SIZE (bdesc_stores); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_store_builtin (d->icode, exp);
+
+  return 0;
+}
+
+static bool
+frv_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+  const_tree section_name;
+
+  /* Don't apply the -G flag to internal compiler structures.  We
+     should leave such structures in the main data section, partly
+     for efficiency and partly because the size of some of them
+     (such as C++ typeinfos) is not known until later.  */
+  if (TREE_CODE (decl) != VAR_DECL || DECL_ARTIFICIAL (decl))
+    return false;
+
+  /* If we already know which section the decl should be in, see if
+     it's a small data section.  */
+  section_name = DECL_SECTION_NAME (decl);
+  if (section_name)
+    {
+      gcc_assert (TREE_CODE (section_name) == STRING_CST);
+      if (frv_string_begins_with (section_name, ".sdata"))
+	return true;
+      if (frv_string_begins_with (section_name, ".sbss"))
+	return true;
+      return false;
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  if (size > 0 && size <= g_switch_value)
+    return true;
+
+  return false;
+}
+
+static bool
+frv_rtx_costs (rtx x,
+               int code ATTRIBUTE_UNUSED,
+               int outer_code ATTRIBUTE_UNUSED,
+	       int opno ATTRIBUTE_UNUSED,
+               int *total,
+	       bool speed ATTRIBUTE_UNUSED)
+{
+  if (outer_code == MEM)
+    {
+      /* Don't differentiate between memory addresses.  All the ones
+	 we accept have equal cost.  */
+      *total = COSTS_N_INSNS (0);
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* Make 12-bit integers really cheap.  */
+      if (IN_RANGE (INTVAL (x), -2048, 2047))
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* Fall through.  */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case NOT:
+    case NEG:
+    case COMPARE:
+      if (GET_MODE (x) == SImode)
+	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE (x) == DImode)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (3);
+      return true;
+
+    case MULT:
+      if (GET_MODE (x) == SImode)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (6);	/* guess */
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (18);
+      return true;
+
+    case MEM:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+static void
+frv_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (ctors_section);
+  assemble_align (POINTER_SIZE);
+  if (TARGET_FDPIC)
+    {
+      int ok = frv_assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, 1);
+
+      gcc_assert (ok);
+      return;
+    }
+  assemble_integer_with_op ("\t.picptr\t", symbol);
+}
+
+static void
+frv_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (dtors_section);
+  assemble_align (POINTER_SIZE);
+  if (TARGET_FDPIC)
+    {
+      int ok = frv_assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, 1);
+
+      gcc_assert (ok);
+      return;
+    }
+  assemble_integer_with_op ("\t.picptr\t", symbol);
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+frv_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, FRV_STRUCT_VALUE_REGNUM);
+}
+
+#define TLS_BIAS (2048 - 16)
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+frv_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.picptr\ttlsmoff(", file);
+  /* We want the unbiased TLS offset, so add the bias to the
+     expression, such that the implicit biasing cancels out.  */
+  output_addr_const (file, plus_constant (Pmode, x, TLS_BIAS));
+  fputs (")", file);
+}
+
+#include "gt-frv.h"
diff --git a/gcc-4.9/gcc/config/frv/frv.h b/gcc-4.9/gcc/config/frv/frv.h
new file mode 100644
index 000000000..c936fd5f3
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv.h
@@ -0,0 +1,2133 @@
+/* Target macros for the FRV port of GCC.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __FRV_H__
+#define __FRV_H__
+
+/* Frv general purpose macros.  */
+/* Align an address.  */
+#define ADDR_ALIGN(addr,align) (((addr) + (align) - 1) & ~((align) - 1))
+
+/* Driver configuration.  */
+
+/* -fpic and -fPIC used to imply the -mlibrary-pic multilib, but with
+    FDPIC which multilib to use depends on whether FDPIC is in use or
+    not.  The trick we use is to introduce -multilib-library-pic as a
+    pseudo-flag that selects the library-pic multilib, and map fpic
+    and fPIC to it only if fdpic is not selected.  Also, if fdpic is
+    selected and no PIC/PIE options are present, we imply -fPIE.
+    Otherwise, if -fpic or -fPIC are enabled and we're optimizing for
+    speed, or if we have -On with n>=3, enable inlining of PLTs.  As
+    for -mgprel-ro, we want to enable it by default, but not for -fpic or
+    -fpie.  */
+
+#define DRIVER_SELF_SPECS SUBTARGET_DRIVER_SELF_SPECS \
+"%{mno-pack:\
+   %{!mhard-float:-msoft-float}\
+   %{!mmedia:-mno-media}}\
+ %{!mfdpic:%{fpic|fPIC: -multilib-library-pic}}\
+ %{mfdpic:%{!fpic:%{!fpie:%{!fPIC:%{!fPIE:\
+   	    %{!fno-pic:%{!fno-pie:%{!fno-PIC:%{!fno-PIE:-fPIE}}}}}}}} \
+	  %{!mno-inline-plt:%{O*:%{!O0:%{!Os:%{fpic|fPIC:-minline-plt} \
+                    %{!fpic:%{!fPIC:%{!O:%{!O1:%{!O2:-minline-plt}}}}}}}}} \
+	  %{!mno-gprel-ro:%{!fpic:%{!fpie:-mgprel-ro}}}} \
+"
+#ifndef SUBTARGET_DRIVER_SELF_SPECS
+# define SUBTARGET_DRIVER_SELF_SPECS
+#endif
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{G*} \
+%{mtomcat-stats} \
+%{!mno-eflags: \
+    %{mcpu=*} \
+    %{mgpr-*} %{mfpr-*} \
+    %{msoft-float} %{mhard-float} \
+    %{mdword} %{mno-dword} \
+    %{mdouble} %{mno-double} \
+    %{mmedia} %{mno-media} \
+    %{mmuladd} %{mno-muladd} \
+    %{mpack} %{mno-pack} \
+    %{mno-fdpic:-mnopic} %{mfdpic} \
+    %{fpic|fpie: -mpic} %{fPIC|fPIE: -mPIC} %{mlibrary-pic}}"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0%O%s frvbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "frvend%O%s"
+
+
+#define MASK_DEFAULT_FRV	\
+  (MASK_MEDIA			\
+   | MASK_DOUBLE		\
+   | MASK_MULADD		\
+   | MASK_DWORD			\
+   | MASK_PACK)
+
+#define MASK_DEFAULT_FR500 \
+  (MASK_MEDIA | MASK_DWORD | MASK_PACK)
+
+#define MASK_DEFAULT_FR550 \
+  (MASK_MEDIA | MASK_DWORD | MASK_PACK)
+
+#define MASK_DEFAULT_FR450	\
+  (MASK_GPR_32			\
+   | MASK_FPR_32		\
+   | MASK_MEDIA			\
+   | MASK_SOFT_FLOAT		\
+   | MASK_DWORD			\
+   | MASK_PACK)
+
+#define MASK_DEFAULT_FR400	\
+  (MASK_GPR_32			\
+   | MASK_FPR_32		\
+   | MASK_MEDIA			\
+   | MASK_ACC_4			\
+   | MASK_SOFT_FLOAT		\
+   | MASK_DWORD			\
+   | MASK_PACK)
+
+#define MASK_DEFAULT_SIMPLE \
+  (MASK_GPR_32 | MASK_SOFT_FLOAT)
+
+/* A C string constant that tells the GCC driver program options to pass to
+   `cc1'.  It can also specify how to translate options you give to GCC into
+   options for GCC to pass to the `cc1'.
+
+   Do not define this macro if it does not need to do anything.  */
+/* For ABI compliance, we need to put bss data into the normal data section.  */
+#define CC1_SPEC "%{G*}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+%{h*} %{v:-V} \
+%{mfdpic:-melf32frvfd -z text} \
+%{static:-dn -Bstatic} \
+%{shared:-Bdynamic} \
+%{symbolic:-Bsymbolic} \
+%{G*}"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "--start-group -lc -lsim --end-group"
+
+#ifndef CPU_TYPE
+#define CPU_TYPE		FRV_CPU_FR500
+#endif
+
+/* Run-time target specifications */
+
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      int issue_rate;							\
+									\
+      builtin_define ("__frv__");					\
+      builtin_assert ("machine=frv");					\
+									\
+      issue_rate = frv_issue_rate ();					\
+      if (issue_rate > 1)						\
+	builtin_define_with_int_value ("__FRV_VLIW__", issue_rate);	\
+      builtin_define_with_int_value ("__FRV_GPR__", NUM_GPRS);		\
+      builtin_define_with_int_value ("__FRV_FPR__", NUM_FPRS);		\
+      builtin_define_with_int_value ("__FRV_ACC__", NUM_ACCS);		\
+									\
+      switch (frv_cpu_type)						\
+	{								\
+	case FRV_CPU_GENERIC:						\
+	  builtin_define ("__CPU_GENERIC__");				\
+	  break;							\
+	case FRV_CPU_FR550:						\
+	  builtin_define ("__CPU_FR550__");				\
+	  break;							\
+	case FRV_CPU_FR500:						\
+	case FRV_CPU_TOMCAT:						\
+	  builtin_define ("__CPU_FR500__");				\
+	  break;							\
+	case FRV_CPU_FR450:						\
+	  builtin_define ("__CPU_FR450__");				\
+	  break;							\
+	case FRV_CPU_FR405:						\
+	  builtin_define ("__CPU_FR405__");				\
+	  break;							\
+	case FRV_CPU_FR400:						\
+	  builtin_define ("__CPU_FR400__");				\
+	  break;							\
+	case FRV_CPU_FR300:						\
+	case FRV_CPU_SIMPLE:						\
+	  builtin_define ("__CPU_FR300__");				\
+	  break;							\
+	}								\
+									\
+      if (TARGET_HARD_FLOAT)						\
+	builtin_define ("__FRV_HARD_FLOAT__");				\
+      if (TARGET_DWORD)							\
+	builtin_define ("__FRV_DWORD__");				\
+      if (TARGET_FDPIC)							\
+	builtin_define ("__FRV_FDPIC__");				\
+      if (flag_leading_underscore > 0)					\
+	builtin_define ("__FRV_UNDERSCORE__");				\
+    }									\
+  while (0)
+
+
+#define TARGET_HAS_FPRS		(TARGET_HARD_FLOAT || TARGET_MEDIA)
+
+#define NUM_GPRS		(TARGET_GPR_32? 32 : 64)
+#define NUM_FPRS		(!TARGET_HAS_FPRS? 0 : TARGET_FPR_32? 32 : 64)
+#define NUM_ACCS		(!TARGET_MEDIA? 0 : TARGET_ACC_4? 4 : 8)
+
+/* X is a valid accumulator number if (X & ACC_MASK) == X.  */
+#define ACC_MASK						\
+  (!TARGET_MEDIA ? 0						\
+   : TARGET_ACC_4 ? 3						\
+   : frv_cpu_type == FRV_CPU_FR450 ? 11				\
+   : 7)
+
+/* Macros to identify the blend of media instructions available.  Revision 1
+   is the one found on the FR500.  Revision 2 includes the changes made for
+   the FR400.
+
+   Treat the generic processor as a revision 1 machine for now, for
+   compatibility with earlier releases.  */
+
+#define TARGET_MEDIA_REV1					\
+  (TARGET_MEDIA							\
+   && (frv_cpu_type == FRV_CPU_GENERIC				\
+       || frv_cpu_type == FRV_CPU_FR500))
+
+#define TARGET_MEDIA_REV2					\
+  (TARGET_MEDIA							\
+   && (frv_cpu_type == FRV_CPU_FR400				\
+       || frv_cpu_type == FRV_CPU_FR405				\
+       || frv_cpu_type == FRV_CPU_FR450				\
+       || frv_cpu_type == FRV_CPU_FR550))
+
+#define TARGET_MEDIA_FR450					\
+  (frv_cpu_type == FRV_CPU_FR450)
+
+#define TARGET_FR500_FR550_BUILTINS				\
+   (frv_cpu_type == FRV_CPU_FR500				\
+    || frv_cpu_type == FRV_CPU_FR550)
+
+#define TARGET_FR405_BUILTINS					\
+  (frv_cpu_type == FRV_CPU_FR405				\
+   || frv_cpu_type == FRV_CPU_FR450)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL) (TARGET_ALIGN_LABELS ? 3 : 0)
+
+/* Small Data Area Support.  */
+/* Maximum size of variables that go in .sdata/.sbss.
+   The -msdata=foo switch also controls how small variables are handled.  */
+#ifndef SDATA_DEFAULT_SIZE
+#define SDATA_DEFAULT_SIZE 8
+#endif
+
+
+/* Storage Layout */
+
+/* Define this macro to have the value 1 if the most significant bit in a byte
+   has the lowest number; otherwise define it to have the value zero.  This
+   means that bit-field instructions count from the most significant bit.  If
+   the machine has no bit-field instructions, then this must still be defined,
+   but it doesn't matter which value it is defined to.  This macro need not be
+   a constant.
+
+   This macro does not affect the way structure fields are packed into bytes or
+   words; that is controlled by `BYTES_BIG_ENDIAN'.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this macro to have the value 1 if the most significant byte in a word
+   has the lowest number.  This macro need not be a constant.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this macro to have the value 1 if, in a multiword object, the most
+   significant word has the lowest number.  This applies to both memory
+   locations and registers; GCC fundamentally assumes that the order of
+   words in memory is the same as the order in registers.  This macro need not
+   be a constant.  */
+#define WORDS_BIG_ENDIAN 1
+
+/* Number of storage units in a word; normally 4.  */
+#define UNITS_PER_WORD 4
+
+/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and
+   which has the specified mode and signedness is to be stored in a register.
+   This macro is only called when TYPE is a scalar type.
+
+   On most RISC machines, which only have operations that operate on a full
+   register, define this macro to set M to `word_mode' if M is an integer mode
+   narrower than `BITS_PER_WORD'.  In most cases, only integer modes should be
+   widened because wider-precision floating-point operations are usually more
+   expensive than their narrower counterparts.
+
+   For most machines, the macro definition does not change UNSIGNEDP.  However,
+   some machines, have instructions that preferentially handle either signed or
+   unsigned quantities of certain modes.  For example, on the DEC Alpha, 32-bit
+   loads from memory and 32-bit add instructions sign-extend the result to 64
+   bits.  On such machines, set UNSIGNEDP according to which kind of extension
+   is more efficient.
+
+   Do not define this macro if it would never modify MODE.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+	  && GET_MODE_SIZE (MODE) < 4)		\
+	(MODE) = SImode;			\
+    }						\
+  while (0)
+
+/* Normal alignment required for function parameters on the stack, in bits.
+   All stack parameters receive at least this much alignment regardless of data
+   type.  On most machines, this is the same as the size of an integer.  */
+#define PARM_BOUNDARY 32
+
+/* Define this macro if you wish to preserve a certain alignment for the stack
+   pointer.  The definition is a C expression for the desired alignment
+   (measured in bits).
+
+   If `PUSH_ROUNDING' is not defined, the stack will always be aligned to the
+   specified boundary.  If `PUSH_ROUNDING' is defined and specifies a less
+   strict alignment than `STACK_BOUNDARY', the stack may be momentarily
+   unaligned while pushing arguments.  */
+#define STACK_BOUNDARY 64
+
+/* Alignment required for a function entry point, in bits.  */
+#define FUNCTION_BOUNDARY 128
+
+/* Biggest alignment that any data type can require on this machine,
+   in bits.  */
+#define BIGGEST_ALIGNMENT 64
+
+/* @@@ A hack, needed because libobjc wants to use ADJUST_FIELD_ALIGN for
+   some reason.  */
+#ifdef IN_TARGET_LIBS
+#define BIGGEST_FIELD_ALIGNMENT 64
+#else
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  GCC uses this
+   value instead of the value in `BIGGEST_ALIGNMENT' or
+   `BIGGEST_FIELD_ALIGNMENT', if defined, for structure fields only.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) 				\
+  frv_adjust_field_align (FIELD, COMPUTED)
+#endif
+
+/* If defined, a C expression to compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that the object
+   would ordinarily have.  The value of this macro is used instead of that
+   alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size data to make
+   it all fit in fewer cache lines.  Another is to cause character arrays to be
+   word-aligned so that `strcpy' calls that copy constants to character arrays
+   can be done inline.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a constant that
+   is being placed in memory.  CONSTANT is the constant and ALIGN is the
+   alignment that the object would ordinarily have.  The value of this macro is
+   used instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string constants
+   to be word aligned so that `strcpy' calls that copy constants can be done
+   inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Define this macro to be the value 1 if instructions will fail to work if
+   given data not on the nominal alignment.  If instructions will merely go
+   slower in that case, define this macro as 0.  */
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+
+/* Layout of Source Language Data Types.  */
+
+#define CHAR_TYPE_SIZE         8
+#define SHORT_TYPE_SIZE       16
+#define INT_TYPE_SIZE         32
+#define LONG_TYPE_SIZE        32
+#define LONG_LONG_TYPE_SIZE   64
+#define FLOAT_TYPE_SIZE       32
+#define DOUBLE_TYPE_SIZE      64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* An expression whose value is 1 or 0, according to whether the type `char'
+   should be signed or unsigned by default.  The user can always override this
+   default with the options `-fsigned-char' and `-funsigned-char'.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+
+/* General purpose registers.  */
+#define GPR_FIRST       0                       /* First gpr */
+#define GPR_LAST        (GPR_FIRST + 63)        /* Last gpr */
+#define GPR_R0          GPR_FIRST               /* R0, constant 0 */
+#define GPR_FP          (GPR_FIRST + 2)         /* Frame pointer */
+#define GPR_SP          (GPR_FIRST + 1)         /* Stack pointer */
+						/* small data register */
+#define SDA_BASE_REG    ((unsigned)(TARGET_FDPIC ? -1 : flag_pic ? PIC_REGNO : (GPR_FIRST + 16)))
+#define PIC_REGNO       (GPR_FIRST + (TARGET_FDPIC?15:17))        /* PIC register.  */
+#define FDPIC_FPTR_REGNO  (GPR_FIRST + 14)        /* uClinux PIC function pointer register.  */
+#define FDPIC_REGNO   (GPR_FIRST + 15)        /* uClinux PIC register.  */
+
+#define HARD_REGNO_RENAME_OK(from,to) (TARGET_FDPIC ? ((to) != FDPIC_REG) : 1)
+
+#define OUR_FDPIC_REG	get_hard_reg_initial_val (SImode, FDPIC_REGNO)
+
+#define FPR_FIRST       64			/* First FP reg */
+#define FPR_LAST        127			/* Last  FP reg */
+
+#define GPR_TEMP_NUM	frv_condexec_temps	/* # gprs to reserve for temps */
+
+/* We reserve the last CR and CCR in each category to be used as a reload
+   register to reload the CR/CCR registers.  This is a kludge.  */
+#define CC_FIRST	128			/* First ICC/FCC reg */
+#define CC_LAST		135			/* Last  ICC/FCC reg */
+#define ICC_FIRST	(CC_FIRST + 4)		/* First ICC reg */
+#define ICC_LAST	(CC_FIRST + 7)		/* Last  ICC reg */
+#define ICC_TEMP	(CC_FIRST + 7)		/* Temporary ICC reg */
+#define FCC_FIRST	(CC_FIRST)		/* First FCC reg */
+#define FCC_LAST	(CC_FIRST + 3)		/* Last  FCC reg */
+
+/* Amount to shift a value to locate a ICC or FCC register in the CCR
+   register and shift it to the bottom 4 bits.  */
+#define CC_SHIFT_RIGHT(REGNO) (((REGNO) - CC_FIRST) << 2)
+
+/* Mask to isolate a single ICC/FCC value.  */
+#define CC_MASK		0xf
+
+/* Masks to isolate the various bits in an ICC field.  */
+#define ICC_MASK_N	0x8	/* negative */
+#define ICC_MASK_Z	0x4	/* zero */
+#define ICC_MASK_V	0x2	/* overflow */
+#define ICC_MASK_C	0x1	/* carry */
+
+/* Mask to isolate the N/Z flags in an ICC.  */
+#define ICC_MASK_NZ (ICC_MASK_N | ICC_MASK_Z)
+
+/* Mask to isolate the Z/C flags in an ICC.  */
+#define ICC_MASK_ZC (ICC_MASK_Z | ICC_MASK_C)
+
+/* Masks to isolate the various bits in a FCC field.  */
+#define FCC_MASK_E	0x8	/* equal */
+#define FCC_MASK_L	0x4	/* less than */
+#define FCC_MASK_G	0x2	/* greater than */
+#define FCC_MASK_U	0x1	/* unordered */
+
+/* For CCR registers, the machine wants CR4..CR7 to be used for integer
+   code and CR0..CR3 to be used for floating point.  */
+#define CR_FIRST	136			/* First CCR */
+#define CR_LAST		143			/* Last  CCR */
+#define CR_NUM		(CR_LAST-CR_FIRST+1)	/* # of CCRs (8) */
+#define ICR_FIRST	(CR_FIRST + 4)		/* First integer CCR */
+#define ICR_LAST	(CR_FIRST + 7)		/* Last  integer CCR */
+#define ICR_TEMP	ICR_LAST		/* Temp  integer CCR */
+#define FCR_FIRST	(CR_FIRST + 0)		/* First float CCR */
+#define FCR_LAST	(CR_FIRST + 3)		/* Last  float CCR */
+
+/* Amount to shift a value to locate a CR register in the CCCR special purpose
+   register and shift it to the bottom 2 bits.  */
+#define CR_SHIFT_RIGHT(REGNO) (((REGNO) - CR_FIRST) << 1)
+
+/* Mask to isolate a single CR value.  */
+#define CR_MASK		0x3
+
+#define ACC_FIRST	144			/* First acc register */
+#define ACC_LAST	155			/* Last  acc register */
+
+#define ACCG_FIRST	156			/* First accg register */
+#define ACCG_LAST	167			/* Last  accg register */
+
+#define AP_FIRST	168			/* fake argument pointer */
+
+#define SPR_FIRST	169
+#define SPR_LAST	172
+#define LR_REGNO	(SPR_FIRST)
+#define LCR_REGNO	(SPR_FIRST + 1)
+#define IACC_FIRST	(SPR_FIRST + 2)
+#define IACC_LAST	(SPR_FIRST + 3)
+
+#define GPR_P(R)	IN_RANGE (R, GPR_FIRST, GPR_LAST)
+#define GPR_OR_AP_P(R)	(GPR_P (R) || (R) == ARG_POINTER_REGNUM)
+#define FPR_P(R)	IN_RANGE (R, FPR_FIRST, FPR_LAST)
+#define CC_P(R)		IN_RANGE (R, CC_FIRST, CC_LAST)
+#define ICC_P(R)	IN_RANGE (R, ICC_FIRST, ICC_LAST)
+#define FCC_P(R)	IN_RANGE (R, FCC_FIRST, FCC_LAST)
+#define CR_P(R)		IN_RANGE (R, CR_FIRST, CR_LAST)
+#define ICR_P(R)	IN_RANGE (R, ICR_FIRST, ICR_LAST)
+#define FCR_P(R)	IN_RANGE (R, FCR_FIRST, FCR_LAST)
+#define ACC_P(R)	IN_RANGE (R, ACC_FIRST, ACC_LAST)
+#define ACCG_P(R)	IN_RANGE (R, ACCG_FIRST, ACCG_LAST)
+#define SPR_P(R)	IN_RANGE (R, SPR_FIRST, SPR_LAST)
+
+#define GPR_OR_PSEUDO_P(R)	(GPR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define FPR_OR_PSEUDO_P(R)	(FPR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define GPR_AP_OR_PSEUDO_P(R)	(GPR_OR_AP_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define CC_OR_PSEUDO_P(R)	(CC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ICC_OR_PSEUDO_P(R)	(ICC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define FCC_OR_PSEUDO_P(R)	(FCC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define CR_OR_PSEUDO_P(R)	(CR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ICR_OR_PSEUDO_P(R)	(ICR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define FCR_OR_PSEUDO_P(R)	(FCR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ACC_OR_PSEUDO_P(R)	(ACC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ACCG_OR_PSEUDO_P(R)	(ACCG_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+
+#define MAX_STACK_IMMEDIATE_OFFSET 2047
+
+
+/* Register Basics.  */
+
+/* Number of hardware registers known to the compiler.  They receive numbers 0
+   through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number
+   really is assigned the number `FIRST_PSEUDO_REGISTER'.  */
+#define FIRST_PSEUDO_REGISTER (SPR_LAST + 1)
+
+/* The first/last register that can contain the arguments to a function.  */
+#define FIRST_ARG_REGNUM	(GPR_FIRST + 8)
+#define LAST_ARG_REGNUM		(FIRST_ARG_REGNUM + FRV_NUM_ARG_REGS - 1)
+
+/* Registers used by the exception handling functions.  These should be
+   registers that are not otherwise used by the calling sequence.  */
+#define FIRST_EH_REGNUM		14
+#define LAST_EH_REGNUM		15
+
+/* Scratch registers used in the prologue, epilogue and thunks.
+   OFFSET_REGNO is for loading constant addends that are too big for a
+   single instruction.  TEMP_REGNO is used for transferring SPRs to and from
+   the stack, and various other activities.  */
+#define OFFSET_REGNO		4
+#define TEMP_REGNO		5
+
+/* Registers used in the prologue.  OLD_SP_REGNO is the old stack pointer,
+   which is sometimes used to set up the frame pointer.  */
+#define OLD_SP_REGNO		6
+
+/* Registers used in the epilogue.  STACKADJ_REGNO stores the exception
+   handler's stack adjustment.  */
+#define STACKADJ_REGNO		6
+
+/* Registers used in thunks.  JMP_REGNO is used for loading the target
+   address.  */
+#define JUMP_REGNO		6
+
+#define EH_RETURN_DATA_REGNO(N)	((N) <= (LAST_EH_REGNUM - FIRST_EH_REGNUM)? \
+				 (N) + FIRST_EH_REGNUM : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (SImode, STACKADJ_REGNO)
+#define EH_RETURN_HANDLER_RTX   RETURN_ADDR_RTX (0, frame_pointer_rtx)
+
+#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNO)
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.  These would include the stack pointer, the frame pointer
+   (except on machines where that can be used as a general register when no
+   frame pointer is needed), the program counter on machines where that is
+   considered one of the addressable registers, and any other numbered register
+   with a standard use.
+
+   This information is expressed as a sequence of numbers, separated by commas
+   and surrounded by braces.  The Nth number is 1 if register N is fixed, 0
+   otherwise.
+
+   The table initialized from this macro, and the table initialized by the
+   following one, may be overridden at run time either automatically, by the
+   actions of the macro `CONDITIONAL_REGISTER_USAGE', or by the user with the
+   command options `-ffixed-REG', `-fcall-used-REG' and `-fcall-saved-REG'.  */
+
+/* gr0  -- Hard Zero
+   gr1  -- Stack Pointer
+   gr2  -- Frame Pointer
+   gr3  -- Hidden Parameter
+   gr16 -- Small Data reserved
+   gr17 -- Pic reserved
+   gr28 -- OS reserved
+   gr29 -- OS reserved
+   gr30 -- OS reserved
+   gr31 -- OS reserved
+   cr3  -- reserved to reload FCC registers.
+   cr7  -- reserved to reload ICC registers.  */
+#define FIXED_REGISTERS							\
+{	/* Integer Registers */						\
+	1, 1, 1, 1, 0, 0, 0, 0,		/* 000-007, gr0  - gr7  */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 008-015, gr8  - gr15 */	\
+	1, 1, 0, 0, 0, 0, 0, 0,		/* 016-023, gr16 - gr23 */	\
+	0, 0, 0, 0, 1, 1, 1, 1,		/* 024-031, gr24 - gr31 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 032-039, gr32 - gr39 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 040-040, gr48 - gr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 048-055, gr48 - gr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 056-063, gr56 - gr63 */	\
+	/* Float Registers */						\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 064-071, fr0  - fr7  */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 072-079, fr8  - fr15 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 080-087, fr16 - fr23 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 088-095, fr24 - fr31 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 096-103, fr32 - fr39 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 104-111, fr48 - fr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 112-119, fr48 - fr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 120-127, fr56 - fr63 */	\
+	/* Condition Code Registers */					\
+	0, 0, 0, 0,			/* 128-131, fcc0 - fcc3  */	\
+	0, 0, 0, 1,			/* 132-135, icc0 - icc3 */	\
+	/* Conditional execution Registers (CCR) */			\
+	0, 0, 0, 0, 0, 0, 0, 1,		/* 136-143, cr0 - cr7 */	\
+	/* Accumulators */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 144-151, acc0  - acc7 */	\
+	1, 1, 1, 1,			/* 152-155, acc8  - acc11 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 156-163, accg0 - accg7 */	\
+	1, 1, 1, 1,			/* 164-167, accg8 - accg11 */	\
+	/* Other registers */						\
+	1,				/* 168, AP   - fake arg ptr */	\
+	1,				/* 169, LR   - Link register*/	\
+	0,				/* 170, LCR  - Loop count reg*/	\
+	1, 1				/* 171-172, iacc0 */		\
+}
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
+   general) by function calls as well as for fixed registers.  This macro
+   therefore identifies the registers that are not available for general
+   allocation of values that must live across function calls.
+
+   If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically
+   saves it on function entry and restores it on function exit, if the register
+   is used within the function.  */
+#define CALL_USED_REGISTERS						\
+{	/* Integer Registers */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 000-007, gr0  - gr7  */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 008-015, gr8  - gr15 */	\
+	1, 1, 0, 0, 0, 0, 0, 0,		/* 016-023, gr16 - gr23 */	\
+	0, 0, 0, 0, 1, 1, 1, 1,		/* 024-031, gr24 - gr31 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 032-039, gr32 - gr39 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 040-040, gr48 - gr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 048-055, gr48 - gr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 056-063, gr56 - gr63 */	\
+	/* Float Registers */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 064-071, fr0  - fr7  */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 072-079, fr8  - fr15 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 080-087, fr16 - fr23 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 088-095, fr24 - fr31 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 096-103, fr32 - fr39 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 104-111, fr48 - fr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 112-119, fr48 - fr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 120-127, fr56 - fr63 */	\
+	/* Condition Code Registers */					\
+	1, 1, 1, 1,			/* 128-131, fcc0 - fcc3 */	\
+	1, 1, 1, 1,			/* 132-135, icc0 - icc3  */	\
+	/* Conditional execution Registers (CCR) */			\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 136-143, cr0 - cr7 */	\
+	/* Accumulators */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 144-151, acc0 - acc7 */	\
+	1, 1, 1, 1,			/* 152-155, acc8 - acc11 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 156-163, accg0 - accg7 */	\
+	1, 1, 1, 1,			/* 164-167, accg8 - accg11 */	\
+	/* Other registers */						\
+	1,				/* 168, AP  - fake arg ptr */	\
+	1,				/* 169, LR  - Link register*/	\
+	1,				/* 170, LCR - Loop count reg */	\
+	1, 1				/* 171-172, iacc0 */		\
+}
+
+
+/* Order of allocation of registers.  */
+
+/* If defined, an initializer for a vector of integers, containing the numbers
+   of hard registers in the order in which GCC should prefer to use them
+   (from most preferred to least).
+
+   If this macro is not defined, registers are used lowest numbered first (all
+   else being equal).
+
+   One use of this macro is on machines where the highest numbered registers
+   must always be saved and the save-multiple-registers instruction supports
+   only sequences of consecutive registers.  On such machines, define
+   `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered
+   allocatable register first.  */
+
+/* On the FRV, allocate GR16 and GR17 after other saved registers so that we
+   have a better chance of allocating 2 registers at a time and can use the
+   double word load/store instructions in the prologue.  */
+#define REG_ALLOC_ORDER							\
+{									\
+  /* volatile registers */						\
+  GPR_FIRST  +  4, GPR_FIRST  +  5, GPR_FIRST  +  6, GPR_FIRST 	+  7,	\
+  GPR_FIRST  +  8, GPR_FIRST  +  9, GPR_FIRST  + 10, GPR_FIRST 	+ 11,	\
+  GPR_FIRST  + 12, GPR_FIRST  + 13, GPR_FIRST  + 14, GPR_FIRST 	+ 15,	\
+  GPR_FIRST  + 32, GPR_FIRST  + 33, GPR_FIRST  + 34, GPR_FIRST 	+ 35,	\
+  GPR_FIRST  + 36, GPR_FIRST  + 37, GPR_FIRST  + 38, GPR_FIRST 	+ 39,	\
+  GPR_FIRST  + 40, GPR_FIRST  + 41, GPR_FIRST  + 42, GPR_FIRST 	+ 43,	\
+  GPR_FIRST  + 44, GPR_FIRST  + 45, GPR_FIRST  + 46, GPR_FIRST 	+ 47,	\
+									\
+  FPR_FIRST  +  0, FPR_FIRST  +  1, FPR_FIRST  +  2, FPR_FIRST 	+  3,	\
+  FPR_FIRST  +  4, FPR_FIRST  +  5, FPR_FIRST  +  6, FPR_FIRST 	+  7,	\
+  FPR_FIRST  +  8, FPR_FIRST  +  9, FPR_FIRST  + 10, FPR_FIRST 	+ 11,	\
+  FPR_FIRST  + 12, FPR_FIRST  + 13, FPR_FIRST  + 14, FPR_FIRST 	+ 15,	\
+  FPR_FIRST  + 32, FPR_FIRST  + 33, FPR_FIRST  + 34, FPR_FIRST 	+ 35,	\
+  FPR_FIRST  + 36, FPR_FIRST  + 37, FPR_FIRST  + 38, FPR_FIRST 	+ 39,	\
+  FPR_FIRST  + 40, FPR_FIRST  + 41, FPR_FIRST  + 42, FPR_FIRST 	+ 43,	\
+  FPR_FIRST  + 44, FPR_FIRST  + 45, FPR_FIRST  + 46, FPR_FIRST 	+ 47,	\
+									\
+  ICC_FIRST  +  0, ICC_FIRST  +  1, ICC_FIRST  +  2, ICC_FIRST 	+  3,	\
+  FCC_FIRST  +  0, FCC_FIRST  +  1, FCC_FIRST  +  2, FCC_FIRST 	+  3,	\
+  CR_FIRST   +  0, CR_FIRST   +  1, CR_FIRST   +  2, CR_FIRST  	+  3,	\
+  CR_FIRST   +  4, CR_FIRST   +  5, CR_FIRST   +  6, CR_FIRST  	+  7,	\
+									\
+  /* saved registers */							\
+  GPR_FIRST  + 18, GPR_FIRST  + 19,					\
+  GPR_FIRST  + 20, GPR_FIRST  + 21, GPR_FIRST  + 22, GPR_FIRST 	+ 23,	\
+  GPR_FIRST  + 24, GPR_FIRST  + 25, GPR_FIRST  + 26, GPR_FIRST 	+ 27,	\
+  GPR_FIRST  + 48, GPR_FIRST  + 49, GPR_FIRST  + 50, GPR_FIRST 	+ 51,	\
+  GPR_FIRST  + 52, GPR_FIRST  + 53, GPR_FIRST  + 54, GPR_FIRST 	+ 55,	\
+  GPR_FIRST  + 56, GPR_FIRST  + 57, GPR_FIRST  + 58, GPR_FIRST 	+ 59,	\
+  GPR_FIRST  + 60, GPR_FIRST  + 61, GPR_FIRST  + 62, GPR_FIRST 	+ 63,	\
+  GPR_FIRST  + 16, GPR_FIRST  + 17,					\
+									\
+  FPR_FIRST  + 16, FPR_FIRST  + 17, FPR_FIRST  + 18, FPR_FIRST 	+ 19,	\
+  FPR_FIRST  + 20, FPR_FIRST  + 21, FPR_FIRST  + 22, FPR_FIRST 	+ 23,	\
+  FPR_FIRST  + 24, FPR_FIRST  + 25, FPR_FIRST  + 26, FPR_FIRST 	+ 27,	\
+  FPR_FIRST  + 28, FPR_FIRST  + 29, FPR_FIRST  + 30, FPR_FIRST 	+ 31,	\
+  FPR_FIRST  + 48, FPR_FIRST  + 49, FPR_FIRST  + 50, FPR_FIRST 	+ 51,	\
+  FPR_FIRST  + 52, FPR_FIRST  + 53, FPR_FIRST  + 54, FPR_FIRST 	+ 55,	\
+  FPR_FIRST  + 56, FPR_FIRST  + 57, FPR_FIRST  + 58, FPR_FIRST 	+ 59,	\
+  FPR_FIRST  + 60, FPR_FIRST  + 61, FPR_FIRST  + 62, FPR_FIRST 	+ 63,	\
+									\
+  /* special or fixed registers */					\
+  GPR_FIRST  +  0, GPR_FIRST  +  1, GPR_FIRST  +  2, GPR_FIRST 	+  3,	\
+  GPR_FIRST  + 28, GPR_FIRST  + 29, GPR_FIRST  + 30, GPR_FIRST 	+ 31,	\
+  ACC_FIRST  +  0, ACC_FIRST  +  1, ACC_FIRST  +  2, ACC_FIRST 	+  3,	\
+  ACC_FIRST  +  4, ACC_FIRST  +  5, ACC_FIRST  +  6, ACC_FIRST 	+  7,	\
+  ACC_FIRST  +  8, ACC_FIRST  +  9, ACC_FIRST  + 10, ACC_FIRST 	+ 11,	\
+  ACCG_FIRST +  0, ACCG_FIRST +  1, ACCG_FIRST +  2, ACCG_FIRST	+  3,	\
+  ACCG_FIRST +  4, ACCG_FIRST +  5, ACCG_FIRST +  6, ACCG_FIRST	+  7,	\
+  ACCG_FIRST +  8, ACCG_FIRST +  9, ACCG_FIRST + 10, ACCG_FIRST	+ 11,	\
+  AP_FIRST, 	   LR_REGNO,       LCR_REGNO,				\
+  IACC_FIRST +  0, IACC_FIRST +  1					\
+}
+
+
+/* How Values Fit in Registers.  */
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.
+
+   On a machine where all registers are exactly one word, a suitable definition
+   of this macro is
+
+        #define HARD_REGNO_NREGS(REGNO, MODE)            \
+           ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)  \
+            / UNITS_PER_WORD))  */
+
+/* On the FRV, make the CC modes take 3 words in the integer registers, so that
+   we can build the appropriate instructions to properly reload the values.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) frv_hard_regno_nregs (REGNO, MODE)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  For a machine where all registers are equivalent, a suitable
+   definition is
+
+        #define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+   It is not necessary for this macro to check for the numbers of fixed
+   registers, because the allocation mechanism considers them to be always
+   occupied.
+
+   On some machines, double-precision values must be kept in even/odd register
+   pairs.  The way to implement that is to define this macro to reject odd
+   register numbers for such modes.
+
+   The minimum requirement for a mode to be OK in a register is that the
+   `movMODE' instruction pattern support moves between the register and any
+   other hard register for which the mode is OK; and that moving a value into
+   the register and back out not alter it.
+
+   Since the same instruction used to move `SImode' will work for all narrower
+   integer modes, it is not necessary on any machine for `HARD_REGNO_MODE_OK'
+   to distinguish between these modes, provided you define patterns `movhi',
+   etc., to take advantage of this.  This is useful because of the interaction
+   between `HARD_REGNO_MODE_OK' and `MODES_TIEABLE_P'; it is very desirable for
+   all integer modes to be tieable.
+
+   Many machines have special registers for floating point arithmetic.  Often
+   people assume that floating point machine modes are allowed only in floating
+   point registers.  This is not true.  Any registers that can hold integers
+   can safely *hold* a floating point machine mode, whether or not floating
+   arithmetic can be done on it in those registers.  Integer move instructions
+   can be used to move the values.
+
+   On some machines, though, the converse is true: fixed-point machine modes
+   may not go in floating registers.  This is true if the floating registers
+   normalize any value stored in them, because storing a non-floating value
+   there would garble it.  In this case, `HARD_REGNO_MODE_OK' should reject
+   fixed-point machine modes in floating registers.  But if the floating
+   registers do not automatically normalize, if you can store any bit pattern
+   in one and retrieve it unchanged without a trap, then any machine mode may
+   go in a floating register, so you can define this macro to say so.
+
+   The primary significance of special floating registers is rather that they
+   are the registers acceptable in floating point arithmetic instructions.
+   However, this is of no concern to `HARD_REGNO_MODE_OK'.  You handle it by
+   writing the proper constraints for those instructions.
+
+   On some machines, the floating registers are especially slow to access, so
+   that it is better to store a value in a stack frame than in such a register
+   if floating point arithmetic is not being done.  As long as the floating
+   registers are not in class `GENERAL_REGS', they will not be used unless some
+   pattern's constraint asks for one.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) frv_hard_regno_mode_ok (REGNO, MODE)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) (MODE1 == MODE2)
+
+/* Define this macro if the compiler should avoid copies to/from CCmode
+   registers.  You should only define this macro if support fo copying to/from
+   CCmode is incomplete.  */
+#define AVOID_CCMODE_COPIES
+
+
+/* Register Classes.  */
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there are.
+
+   Each register class has a number, which is the value of casting the class
+   name to type `int'.  The number serves as an index in many of the tables
+   described below.  */
+enum reg_class
+{
+  NO_REGS,
+  ICC_REGS,
+  FCC_REGS,
+  CC_REGS,
+  ICR_REGS,
+  FCR_REGS,
+  CR_REGS,
+  LCR_REG,
+  LR_REG,
+  GR8_REGS,
+  GR9_REGS,
+  GR89_REGS,
+  FDPIC_REGS,
+  FDPIC_FPTR_REGS,
+  FDPIC_CALL_REGS,
+  SPR_REGS,
+  QUAD_ACC_REGS,
+  ACCG_REGS,
+  QUAD_FPR_REGS,
+  QUAD_REGS,
+  GPR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS GPR_REGS
+
+/* The number of distinct register classes, defined as follows:
+
+        #define N_REG_CLASSES (int) LIM_REG_CLASSES  */
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES {						\
+   "NO_REGS",								\
+   "ICC_REGS",								\
+   "FCC_REGS",								\
+   "CC_REGS",								\
+   "ICR_REGS",								\
+   "FCR_REGS",								\
+   "CR_REGS",								\
+   "LCR_REG",								\
+   "LR_REG",								\
+   "GR8_REGS",                                                          \
+   "GR9_REGS",                                                          \
+   "GR89_REGS",                                                         \
+   "FDPIC_REGS",							\
+   "FDPIC_FPTR_REGS",							\
+   "FDPIC_CALL_REGS",							\
+   "SPR_REGS",								\
+   "QUAD_ACC_REGS",							\
+   "ACCG_REGS",								\
+   "QUAD_FPR_REGS",							\
+   "QUAD_REGS",								\
+   "GPR_REGS",								\
+   "ALL_REGS"								\
+}
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not suffice.
+   Then the integers are replaced by sub-initializers, braced groupings
+   containing several integers.  Each sub-initializer must be suitable as an
+   initializer for the type `HARD_REG_SET' which is defined in
+   `hard-reg-set.h'.  */
+#define REG_CLASS_CONTENTS						       \
+{  /* gr0-gr31 gr32-gr63  fr0-fr31   fr32-fr-63 cc/ccr/acc ap/spr */	       \
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* NO_REGS  */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x000000f0,0x0}, /* ICC_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0000000f,0x0}, /* FCC_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x000000ff,0x0}, /* CC_REGS  */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0000f000,0x0}, /* ICR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000f00,0x0}, /* FCR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0000ff00,0x0}, /* CR_REGS  */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x400}, /* LCR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x200}, /* LR_REGS  */\
+  { 0x00000100,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* GR8_REGS */\
+  { 0x00000200,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* GR9_REGS */\
+  { 0x00000300,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* GR89_REGS */\
+  { 0x00008000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* FDPIC_REGS */\
+  { 0x00004000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* FDPIC_FPTR_REGS */\
+  { 0x0000c000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* FDPIC_CALL_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x1e00}, /* SPR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0fff0000,0x0}, /* QUAD_ACC */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0xf0000000,0xff}, /* ACCG_REGS*/\
+  { 0x00000000,0x00000000,0xffffffff,0xffffffff,0x00000000,0x0}, /* QUAD_FPR */\
+  { 0x0ffffffc,0xffffffff,0x00000000,0x00000000,0x00000000,0x0}, /* QUAD_REGS*/\
+  { 0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,0x100}, /* GPR_REGS */\
+  { 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x1fff}, /* ALL_REGS */\
+}
+
+#define EVEN_ACC_REGS   QUAD_ACC_REGS
+#define ACC_REGS        QUAD_ACC_REGS
+#define FEVEN_REGS      QUAD_FPR_REGS
+#define FPR_REGS        QUAD_FPR_REGS
+#define EVEN_REGS       QUAD_REGS
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+
+extern enum reg_class regno_reg_class[];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class [REGNO]
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS GPR_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  */
+#define INDEX_REG_CLASS GPR_REGS
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.  */
+#define REGNO_OK_FOR_BASE_P(NUM)           \
+  ((NUM) < FIRST_PSEUDO_REGISTER           \
+   ? GPR_P (NUM)                           \
+   : (reg_renumber [NUM] >= 0 && GPR_P (reg_renumber [NUM])))
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REGNO_OK_FOR_INDEX_P(NUM)                                       \
+  ((NUM) < FIRST_PSEUDO_REGISTER                                        \
+   ? GPR_P (NUM)                                                        \
+   : (reg_renumber [NUM] >= 0 && GPR_P (reg_renumber [NUM])))
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  frv_secondary_reload_class (CLASS, MODE, X)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  frv_secondary_reload_class (CLASS, MODE, X)
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+
+   This is closely related to the macro `HARD_REGNO_NREGS'.  In fact, the value
+   of the macro `CLASS_MAX_NREGS (CLASS, MODE)' should be the maximum value of
+   `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class CLASS.
+
+   This macro helps control the handling of multiple-word values in
+   the reload pass.
+
+   This declaration is required.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) frv_class_max_nregs (CLASS, MODE)
+
+#define ZERO_P(x) (x == CONST0_RTX (GET_MODE (x)))
+
+
+/* Basic Stack Layout.  */
+
+/* Structure to describe information about a saved range of registers */
+
+typedef struct frv_stack_regs {
+  const char * name;		/* name of the register ranges */
+  int first;			/* first register in the range */
+  int last;			/* last register in the range */
+  int size_1word;		/* # of bytes to be stored via 1 word stores */
+  int size_2words;		/* # of bytes to be stored via 2 word stores */
+  unsigned char field_p;	/* true if the registers are a single SPR */
+  unsigned char dword_p;	/* true if we can do dword stores */
+  unsigned char special_p;	/* true if the regs have a fixed save loc.  */
+} frv_stack_regs_t;
+
+/* Register ranges to look into saving.  */
+#define STACK_REGS_GPR		0	/* Gprs (normally gr16..gr31, gr48..gr63) */
+#define STACK_REGS_FPR		1	/* Fprs (normally fr16..fr31, fr48..fr63) */
+#define STACK_REGS_LR		2	/* LR register */
+#define STACK_REGS_CC		3	/* CCrs (normally not saved) */
+#define STACK_REGS_LCR		5	/* lcr register */
+#define STACK_REGS_STDARG	6	/* stdarg registers */
+#define STACK_REGS_STRUCT	7	/* structure return (gr3) */
+#define STACK_REGS_FP		8	/* FP register */
+#define STACK_REGS_MAX		9	/* # of register ranges */
+
+/* Values for save_p field.  */
+#define REG_SAVE_NO_SAVE	0	/* register not saved */
+#define REG_SAVE_1WORD		1	/* save the register */
+#define REG_SAVE_2WORDS		2	/* save register and register+1 */
+
+/* Structure used to define the frv stack.  */
+
+typedef struct frv_stack {
+  int total_size;		/* total bytes allocated for stack */
+  int vars_size;		/* variable save area size */
+  int parameter_size;		/* outgoing parameter size */
+  int stdarg_size;		/* size of regs needed to be saved for stdarg */
+  int regs_size;		/* size of the saved registers */
+  int regs_size_1word;		/* # of bytes to be stored via 1 word stores */
+  int regs_size_2words;		/* # of bytes to be stored via 2 word stores */
+  int header_size;		/* size of the old FP, struct ret., LR save */
+  int pretend_size;		/* size of pretend args */
+  int vars_offset;		/* offset to save local variables from new SP*/
+  int regs_offset;		/* offset to save registers from new SP */
+				/* register range information */
+  frv_stack_regs_t regs[STACK_REGS_MAX];
+				/* offset to store each register */
+  int reg_offset[FIRST_PSEUDO_REGISTER];
+				/* whether to save register (& reg+1) */
+  unsigned char save_p[FIRST_PSEUDO_REGISTER];
+} frv_stack_t;
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this macro to nonzero if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to be
+   allocated.
+
+   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the
+   first slot's length from `STARTING_FRAME_OFFSET'.  Otherwise, it is found by
+   adding the length of the first slot to the value `STARTING_FRAME_OFFSET'.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   location at which outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET 0
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   argument's address.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the address in a stack frame
+   where the pointer to the caller's frame is stored.  Assume that FRAMEADDR is
+   an RTL expression for the address of the stack frame itself.
+
+   If you don't define this macro, the default is to return the value of
+   FRAMEADDR--that is, the stack frame address is also the address of the stack
+   word that points to the previous frame.  */
+#define DYNAMIC_CHAIN_ADDRESS(FRAMEADDR) frv_dynamic_chain_address (FRAMEADDR)
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  FRAMEADDR is the frame pointer of the COUNT frame, or the frame
+   pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' is
+   defined.
+
+   The value of the expression must always be the correct address when COUNT is
+   zero, but may be `NULL_RTX' if there is not way to determine the return
+   address of other frames.  */
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) frv_return_addr_rtx (COUNT, FRAMEADDR)
+
+#define RETURN_POINTER_REGNUM LR_REGNO
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.
+
+   You only need to define this macro if you want to support call frame
+   debugging information like that provided by DWARF 2.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM)
+
+
+/* Register That Address the Stack Frame.  */
+
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+#define STACK_POINTER_REGNUM (GPR_FIRST + 1)
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+#define FRAME_POINTER_REGNUM (GPR_FIRST + 2)
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  On some machines, this is the same as the frame
+   pointer register.  On some machines, the hardware determines which register
+   this is.  On other machines, you can choose any register you wish for this
+   purpose.  If this is not the same register as the frame pointer register,
+   then you must mark it as a fixed register according to `FIXED_REGISTERS', or
+   arrange to be able to eliminate it.  */
+
+/* On frv this is a fake register that is eliminated in
+   terms of either the frame pointer or stack pointer.  */
+#define ARG_POINTER_REGNUM AP_FIRST
+
+/* Register numbers used for passing a function's static chain pointer.  If
+   register windows are used, the register number as seen by the called
+   function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as
+   seen by the calling function is `STATIC_CHAIN_REGNUM'.  If these registers
+   are the same, `STATIC_CHAIN_INCOMING_REGNUM' need not be defined.
+
+   The static chain register need not be a fixed register.
+
+   If the static chain is passed in memory, these macros should not be defined;
+   instead, the next two macros should be defined.  */
+#define STATIC_CHAIN_REGNUM (GPR_FIRST + 7)
+#define STATIC_CHAIN_INCOMING_REGNUM (GPR_FIRST + 7)
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer.  */
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  If it is not defined,
+   the only elimination attempted by the compiler is to replace references to
+   the frame pointer with references to the stack pointer.
+
+   The definition of this macro is a list of structure initializations, each of
+   which specifies an original and replacement register.
+
+   On some machines, the position of the argument pointer is not known until
+   the compilation is completed.  In such a case, a separate hard register must
+   be used for the argument pointer.  This register can be eliminated by
+   replacing it with either the frame pointer or the argument pointer,
+   depending on whether or not the frame pointer has been eliminated.
+
+   In this case, you might specify:
+        #define ELIMINABLE_REGS  \
+        {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+         {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+         {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+   Note that the elimination of the argument pointer with the stack pointer is
+   specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}				\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  (OFFSET) = frv_initial_elimination_offset (FROM, TO)
+
+
+/* Passing Function Arguments on the Stack.  */
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed onto the
+   stack for each call; instead, the function prologue should increase the
+   stack frame size by this amount.
+
+   Defining both `PUSH_ROUNDING' and `ACCUMULATE_OUTGOING_ARGS' is not
+   proper.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* The number of register assigned to holding function arguments.  */
+
+#define FRV_NUM_ARG_REGS        6
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.
+
+   There is no need to record in `CUMULATIVE_ARGS' anything about the arguments
+   that have been passed on the stack.  The compiler has other variables to
+   keep track of that.  For target machines on which all arguments are passed
+   on the stack, there is no need to store anything in `CUMULATIVE_ARGS';
+   however, the data structure must exist and should not be empty, so use
+   `int'.  */
+#define CUMULATIVE_ARGS int
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  The variable has type
+   `CUMULATIVE_ARGS'.  The value of FNTYPE is the tree node for the data type
+   of the function which will receive the args, or 0 if the args are to a
+   compiler support library function.  The value of INDIRECT is nonzero when
+   processing an indirect call, for example a call through a function pointer.
+   The value of INDIRECT is zero for a call to an explicitly named function, a
+   library function call, or when `INIT_CUMULATIVE_ARGS' is used to find
+   arguments for the function being compiled.
+
+   When processing a call to a compiler support library function, LIBNAME
+   identifies which one.  It is a `symbol_ref' rtx which contains the name of
+   the function, as a string.  LIBNAME is 0 when an ordinary C function call is
+   being processed.  Thus, each time this macro is called, either LIBNAME or
+   FNTYPE is nonzero, but never both of them at once.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  frv_init_cumulative_args (&CUM, FNTYPE, LIBNAME, FNDECL, FALSE)
+
+/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the
+   arguments for the function being compiled.  If this macro is undefined,
+   `INIT_CUMULATIVE_ARGS' is used instead.
+
+   The value passed for LIBNAME is always 0, since library routines with
+   special calling conventions are never compiled with GCC.  The argument
+   LIBNAME exists for symmetry with `INIT_CUMULATIVE_ARGS'.  */
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+  frv_init_cumulative_args (&CUM, FNTYPE, LIBNAME, NULL, TRUE)
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) <= LAST_ARG_REGNUM))
+
+
+/* How Scalar Function Values are Returned.  */
+
+/* The number of the hard register that is used to return a scalar value from a
+   function call.  */
+#define RETURN_VALUE_REGNUM	(GPR_FIRST + 8)
+
+#define FUNCTION_VALUE_REGNO_P(REGNO) frv_function_value_regno_p (REGNO)
+
+
+/* How Large Values are Returned.  */
+
+/* The number of the register that is used to pass the structure
+   value address.  */
+#define FRV_STRUCT_VALUE_REGNUM (GPR_FIRST + 3)
+
+
+/* Function Entry and Exit.  */
+
+/* Define this macro as a C expression that is nonzero if the return
+   instruction or the function epilogue ignores the value of the stack pointer;
+   in other words, if it is safe to delete an instruction to adjust the stack
+   pointer before a return from the function.
+
+   Note that this macro's value is relevant only for functions for which frame
+   pointers are maintained.  It is never safe to delete a final stack
+   adjustment in a function that has no frame pointer, and the compiler knows
+   this regardless of `EXIT_IGNORE_STACK'.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Generating Code for Profiling.  */
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  Before calling, the assembler code
+   must load the address of a counter variable into a register where `mcount'
+   expects to find the address.  The name of this variable is `LP' followed by
+   the number LABELNO, so you would generate the name using `LP%d' in a
+   `fprintf'.
+
+   The details of how the address should be passed to `mcount' are determined
+   by your operating system environment, not by GCC.  To figure them out,
+   compile a small program for profiling using the system's installed C
+   compiler and look at the assembler code that results.
+
+   This declaration must be present, but it can be an abort if profiling is
+   not implemented.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/* Trampolines for Nested Functions.  */
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+#define TRAMPOLINE_SIZE frv_trampoline_size ()
+
+/* Alignment required for trampolines, in bits.
+
+   If you don't define this macro, the value of `BIGGEST_ALIGNMENT' is used for
+   aligning trampolines.  */
+#define TRAMPOLINE_ALIGNMENT (TARGET_FDPIC ? 64 : 32)
+
+/* Define this macro if trampolines need a special subroutine to do their work.
+   The macro should expand to a series of `asm' statements which will be
+   compiled with GCC.  They go in a library function named
+   `__transfer_from_trampoline'.
+
+   If you need to avoid executing the ordinary prologue code of a compiled C
+   function when you jump to the subroutine, you can do so by placing a special
+   label of your own in the assembler code.  Use one `asm' statement to
+   generate an assembler label, and another to make the label global.  Then
+   trampolines can use that label to jump directly to your special assembler
+   code.  */
+
+#ifdef __FRV_UNDERSCORE__
+#define TRAMPOLINE_TEMPLATE_NAME "___trampoline_template"
+#else
+#define TRAMPOLINE_TEMPLATE_NAME "__trampoline_template"
+#endif
+
+#define Twrite _write
+
+#if ! __FRV_FDPIC__
+#define TRANSFER_FROM_TRAMPOLINE					\
+extern int Twrite (int, const void *, unsigned);			\
+									\
+void									\
+__trampoline_setup (short * addr, int size, int fnaddr, int sc)		\
+{									\
+  extern short __trampoline_template[];					\
+  short * to = addr;							\
+  short * from = &__trampoline_template[0];				\
+  int i;								\
+									\
+  if (size < 20)							\
+    {									\
+      Twrite (2, "__trampoline_setup bad size\n",			\
+	      sizeof ("__trampoline_setup bad size\n") - 1);		\
+      exit (-1);							\
+    }									\
+									\
+  to[0] = from[0];							\
+  to[1] = (short)(fnaddr);						\
+  to[2] = from[2];							\
+  to[3] = (short)(sc);							\
+  to[4] = from[4];							\
+  to[5] = (short)(fnaddr >> 16);					\
+  to[6] = from[6];							\
+  to[7] = (short)(sc >> 16);						\
+  to[8] = from[8];							\
+  to[9] = from[9];							\
+									\
+  for (i = 0; i < 20; i++)						\
+    __asm__ volatile ("dcf @(%0,%1)\n\tici @(%0,%1)" :: "r" (to), "r" (i)); \
+}									\
+									\
+__asm__("\n"								\
+	"\t.globl " TRAMPOLINE_TEMPLATE_NAME "\n"			\
+	"\t.text\n"							\
+	TRAMPOLINE_TEMPLATE_NAME ":\n"					\
+	"\tsetlos #0, gr6\n"	/* jump register */			\
+	"\tsetlos #0, gr7\n"	/* static chain */			\
+	"\tsethi #0, gr6\n"						\
+	"\tsethi #0, gr7\n"						\
+	"\tjmpl @(gr0,gr6)\n");
+#else
+#define TRANSFER_FROM_TRAMPOLINE					\
+extern int Twrite (int, const void *, unsigned);			\
+									\
+void									\
+__trampoline_setup (addr, size, fnaddr, sc)				\
+     short * addr;							\
+     int size;								\
+     int fnaddr;							\
+     int sc;								\
+{									\
+  extern short __trampoline_template[];					\
+  short * from = &__trampoline_template[0];				\
+  int i;								\
+  short **desc = (short **)addr;					\
+  short * to = addr + 4;						\
+									\
+  if (size != 32)							\
+    {									\
+      Twrite (2, "__trampoline_setup bad size\n",			\
+	      sizeof ("__trampoline_setup bad size\n") - 1);		\
+      exit (-1);							\
+    }									\
+									\
+  /* Create a function descriptor with the address of the code below	\
+     and NULL as the FDPIC value.  We don't need the real GOT value	\
+     here, since we don't use it, so we use NULL, that is just as	\
+     good.  */								\
+  desc[0] = to;								\
+  desc[1] = NULL;							\
+  size -= 8;								\
+									\
+  to[0] = from[0];							\
+  to[1] = (short)(fnaddr);						\
+  to[2] = from[2];							\
+  to[3] = (short)(sc);							\
+  to[4] = from[4];							\
+  to[5] = (short)(fnaddr >> 16);					\
+  to[6] = from[6];							\
+  to[7] = (short)(sc >> 16);						\
+  to[8] = from[8];							\
+  to[9] = from[9];							\
+  to[10] = from[10];							\
+  to[11] = from[11];							\
+									\
+  for (i = 0; i < size; i++)						\
+    __asm__ volatile ("dcf @(%0,%1)\n\tici @(%0,%1)" :: "r" (to), "r" (i)); \
+}									\
+									\
+__asm__("\n"								\
+	"\t.globl " TRAMPOLINE_TEMPLATE_NAME "\n"			\
+	"\t.text\n"							\
+	TRAMPOLINE_TEMPLATE_NAME ":\n"					\
+	"\tsetlos #0, gr6\n"	/* Jump register.  */			\
+	"\tsetlos #0, gr7\n"	/* Static chain.  */			\
+	"\tsethi #0, gr6\n"						\
+	"\tsethi #0, gr7\n"						\
+	"\tldd @(gr6,gr0),gr14\n"					\
+	"\tjmpl @(gr14,gr0)\n"						\
+	);
+#endif
+
+
+/* Addressing Modes.  */
+
+/* A number, the maximum number of registers that can appear in a valid memory
+   address.  Note that it is up to you to specify a value equal to the maximum
+   number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as a base register.  For hard registers, it should always accept those
+   which the hardware permits and reject the others.  Whether the macro accepts
+   or rejects pseudo registers must be controlled by `REG_OK_STRICT' as
+   described above.  This usually requires two variant definitions, of which
+   `REG_OK_STRICT' controls the one actually used.  */
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) GPR_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X) GPR_AP_OR_PSEUDO_P (REGNO (X))
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as an index register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+#define FIND_BASE_TERM frv_find_base_term
+
+/* The load-and-update commands allow pre-modification in addresses.
+   The index has to be in a register.  */
+#define HAVE_PRE_MODIFY_REG 1
+
+
+/* We define extra CC modes in frv-modes.def so we need a selector.  */
+
+#define SELECT_CC_MODE frv_select_cc_mode
+
+/* A C expression whose value is one if it is always safe to reverse a
+   comparison whose mode is MODE.  If `SELECT_CC_MODE' can ever return MODE for
+   a floating-point inequality comparison, then `REVERSIBLE_CC_MODE (MODE)'
+   must be zero.
+
+   You need not define this macro if it would always returns zero or if the
+   floating-point format is anything other than `IEEE_FLOAT_FORMAT'.  For
+   example, here is the definition used on the SPARC, where floating-point
+   inequality comparisons are always given `CCFPEmode':
+
+        #define REVERSIBLE_CC_MODE(MODE)  ((MODE) != CCFPEmode)  */
+
+/* On frv, don't consider floating point comparisons to be reversible.  In
+   theory, fp equality comparisons can be reversible.  */
+#define REVERSIBLE_CC_MODE(MODE) \
+  ((MODE) == CCmode || (MODE) == CC_UNSmode || (MODE) == CC_NZmode)
+
+
+/* Describing Relative Costs of Operations.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1 is the
+   default; other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) frv_branch_cost_int
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory, i.e., if such access require more than one instruction or if
+   there is no difference in cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by finding
+   the smallest containing object; when it is defined, a fullword load will be
+   used if alignment permits.  Unless bytes accesses are faster than word
+   accesses, using word accesses is preferable since it may eliminate
+   subsequent memory access if subsequent accesses occur to other fields in the
+   same word of the structure, but to different bytes.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the output into sections.  */
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  Normally `".text"' is
+   right.  */
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  Normally
+   `".data"' is right.  */
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#define BSS_SECTION_ASM_OP "\t.section .bss,\"aw\""
+
+/* Short Data Support */
+#define SDATA_SECTION_ASM_OP	"\t.section .sdata,\"aw\""
+
+#undef	INIT_SECTION_ASM_OP
+#undef	FINI_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section .init,\"ax\""
+#define FINI_SECTION_ASM_OP	"\t.section .fini,\"ax\""
+
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"a\""
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"a\""
+
+/* A C expression whose value is a string containing the assembler operation to
+   switch to the fixup section that records all initialized pointers in a -fpic
+   program so they can be changed program startup time if the program is loaded
+   at a different address than linked for.  */
+#define FIXUP_SECTION_ASM_OP	"\t.section .rofixup,\"a\""
+
+/* Position Independent Code.  */
+
+/* A C expression that is nonzero if X is a legitimate immediate operand on the
+   target machine when generating position independent code.  You can assume
+   that X satisfies `CONSTANT_P', so you need not check this.  You can also
+   assume FLAG_PIC is true, so you need not check it either.  You need not
+   define this macro if all constants (including `SYMBOL_REF') can be immediate
+   operands when generating position independent code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+  (   GET_CODE (X) == CONST_INT						\
+   || GET_CODE (X) == CONST_DOUBLE					\
+   || (GET_CODE (X) == HIGH && GET_CODE (XEXP (X, 0)) == CONST_INT)	\
+   || got12_operand (X, VOIDmode))					\
+
+
+/* The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  Normally this is `"#APP"', which is a comment
+   that has no effect on most assemblers but tells the GNU assembler that it
+   must check the lines that follow for all valid assembler constructs.  */
+#define ASM_APP_ON "#APP\n"
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  Normally this is `"#NO_APP"', which tells the
+   GNU assembler to resume making the time-saving assumptions that are valid
+   for ordinary compiler output.  */
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Output of Data.  */
+
+/* This is how to output a label to dwarf/dwarf2.  */
+#define ASM_OUTPUT_DWARF_ADDR(STREAM, LABEL)				\
+do {									\
+  fprintf (STREAM, "\t.picptr\t");					\
+  assemble_name (STREAM, LABEL);					\
+} while (0)
+
+/* Whether to emit the gas specific dwarf2 line number support.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DEBUG_LOC)
+
+/* Output of Uninitialized Variables.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a local-common-label named NAME whose size is SIZE
+   bytes.  The variable ROUNDED is the size rounded up to whatever alignment
+   the caller wants.
+
+   Use the expression `assemble_name (STREAM, NAME)' to output the name itself;
+   before and after that, output the additional assembler syntax for defining
+   the name, and a newline.
+
+   This macro controls how the assembler definitions of uninitialized static
+   variables are output.  */
+#undef ASM_OUTPUT_LOCAL
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+
+/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME.  */
+extern int size_directive_output;
+
+/* Like `ASM_OUTPUT_ALIGNED_LOCAL' except that it takes an additional
+   parameter - the DECL of variable to be output, if there is one.
+   This macro can be called with DECL == NULL_TREE.  If you define
+   this macro, it is used in place of `ASM_OUTPUT_LOCAL' and
+   `ASM_OUTPUT_ALIGNED_LOCAL', and gives you more flexibility in
+   handling the destination of the variable.  */
+#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGN)	\
+do {                                                                   	\
+  if ((SIZE) > 0 && (SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)	\
+    switch_to_section (get_named_section (NULL, ".sbss", 0));           \
+  else                                                                 	\
+    switch_to_section (bss_section);                                  	\
+  ASM_OUTPUT_ALIGN (STREAM, floor_log2 ((ALIGN) / BITS_PER_UNIT));     	\
+  ASM_DECLARE_OBJECT_NAME (STREAM, NAME, DECL);                        	\
+  ASM_OUTPUT_SKIP (STREAM, (SIZE) ? (SIZE) : 1);                       	\
+} while (0)
+
+
+/* Output and Generation of Labels.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a label named NAME.  Use the expression
+   `assemble_name (STREAM, NAME)' to output the name itself; before and after
+   that, output the additional assembler syntax for defining the name, and a
+   newline.  */
+#define ASM_OUTPUT_LABEL(STREAM, NAME)					\
+do {									\
+  assemble_name (STREAM, NAME);						\
+  fputs (":\n", STREAM);						\
+} while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)			\
+do {									\
+  sprintf (LABEL, "*.%s%ld", PREFIX, (long)NUM);			\
+} while (0)
+
+
+/* Macros Controlling Initialization Routines.  */
+
+#undef INIT_SECTION_ASM_OP
+
+/* If defined, `main' will call `__main' despite the presence of
+   `INIT_SECTION_ASM_OP'.  This macro should be defined for systems where the
+   init section is not actually run automatically, but is still useful for
+   collecting the lists of constructors and destructors.  */
+#define INVOKE__main
+
+/* Output of Assembler Instructions.  */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  This is what translates register numbers
+   in the compiler into assembler language.  */
+#define REGISTER_NAMES							\
+{									\
+ "gr0",  "sp",   "fp",   "gr3",  "gr4",  "gr5",  "gr6",  "gr7",		\
+  "gr8",  "gr9",  "gr10", "gr11", "gr12", "gr13", "gr14", "gr15",	\
+  "gr16", "gr17", "gr18", "gr19", "gr20", "gr21", "gr22", "gr23",	\
+  "gr24", "gr25", "gr26", "gr27", "gr28", "gr29", "gr30", "gr31",	\
+  "gr32", "gr33", "gr34", "gr35", "gr36", "gr37", "gr38", "gr39",	\
+  "gr40", "gr41", "gr42", "gr43", "gr44", "gr45", "gr46", "gr47",	\
+  "gr48", "gr49", "gr50", "gr51", "gr52", "gr53", "gr54", "gr55",	\
+  "gr56", "gr57", "gr58", "gr59", "gr60", "gr61", "gr62", "gr63",	\
+									\
+  "fr0",  "fr1",  "fr2",  "fr3",  "fr4",  "fr5",  "fr6",  "fr7",	\
+  "fr8",  "fr9",  "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",	\
+  "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",	\
+  "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31",	\
+  "fr32", "fr33", "fr34", "fr35", "fr36", "fr37", "fr38", "fr39",	\
+  "fr40", "fr41", "fr42", "fr43", "fr44", "fr45", "fr46", "fr47",	\
+  "fr48", "fr49", "fr50", "fr51", "fr52", "fr53", "fr54", "fr55",	\
+  "fr56", "fr57", "fr58", "fr59", "fr60", "fr61", "fr62", "fr63",	\
+									\
+  "fcc0", "fcc1", "fcc2", "fcc3", "icc0", "icc1", "icc2", "icc3",	\
+  "cc0",  "cc1",  "cc2",  "cc3",  "cc4",  "cc5",  "cc6",  "cc7",	\
+  "acc0", "acc1", "acc2", "acc3", "acc4", "acc5", "acc6", "acc7",	\
+  "acc8", "acc9", "acc10", "acc11",					\
+  "accg0","accg1","accg2","accg3","accg4","accg5","accg6","accg7",	\
+  "accg8", "accg9", "accg10", "accg11",					\
+  "ap",   "lr",   "lcr",  "iacc0h", "iacc0l"				\
+}
+
+/* Define this macro if you are using an unusual assembler that
+   requires different names for the machine instructions.
+
+   The definition is a C statement or statements which output an
+   assembler instruction opcode to the stdio stream STREAM.  The
+   macro-operand PTR is a variable of type `char *' which points to
+   the opcode name in its "internal" form--the form that is written
+   in the machine description.  The definition should output the
+   opcode name to STREAM, performing any translation you desire, and
+   increment the variable PTR to point at the end of the opcode so
+   that it will not be output twice.
+
+   In fact, your macro definition may process less than the entire
+   opcode name, or more than the opcode name; but if you want to
+   process text that includes `%'-sequences to substitute operands,
+   you must take care of the substitution yourself.  Just be sure to
+   increment PTR over whatever text should not be output normally.
+
+   If you need to look at the operand values, they can be found as the
+   elements of `recog_operand'.
+
+   If the macro definition does nothing, the instruction is output in
+   the usual way.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)\
+   (PTR) = frv_asm_output_opcode (STREAM, PTR)
+
+/* If defined, a C statement to be executed just prior to the output
+   of assembler code for INSN, to modify the extracted operands so
+   they will be output differently.
+
+   Here the argument OPVEC is the vector containing the operands
+   extracted from INSN, and NOPERANDS is the number of elements of
+   the vector which contain meaningful data for this insn.  The
+   contents of this vector are what will be used to convert the insn
+   template into assembler code, so you can change the assembler
+   output by changing the contents of the vector.
+
+   This macro is useful when various assembler syntaxes share a single
+   file of instruction patterns; by defining this macro differently,
+   you can cause a large class of instructions to be output
+   differently (such as with rearranged operands).  Naturally,
+   variations in assembler syntax affecting individual insn patterns
+   ought to be handled by writing conditional output routines in
+   those patterns.
+
+   If this macro is not defined, it is equivalent to a null statement.  */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)\
+  frv_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define IMMEDIATE_PREFIX "#"
+
+
+/* Output of dispatch tables.  */
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a difference between two labels.
+   VALUE and REL are the numbers of two internal labels.  The definitions of
+   these labels are output using `(*targetm.asm_out.internal_label)', and they must be
+   printed in the same way here.  For example,
+
+        fprintf (STREAM, "\t.word L%d-L%d\n", VALUE, REL)  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are absolute.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a reference to a label.  VALUE
+   is the number of an internal label whose definition is output using
+   `(*targetm.asm_out.internal_label)'.  For example,
+
+        fprintf (STREAM, "\t.word L%d\n", VALUE)  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+fprintf (STREAM, "\t.word .L%d\n", VALUE)
+
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+/* Assembler Commands for Exception Regions.  */
+
+/* Define this macro to 0 if your target supports DWARF 2 frame unwind
+   information, but it does not yet work with exception handling.  Otherwise,
+   if your target supports this information (if it defines
+   `INCOMING_RETURN_ADDR_RTX' and `OBJECT_FORMAT_ELF'), GCC will provide
+   a default definition of 1.
+
+   If this macro is defined to 1, the DWARF 2 unwinder will be the default
+   exception handling mechanism; otherwise, setjmp/longjmp will be used by
+   default.
+
+   If this macro is defined to anything, the DWARF 2 unwinder will be used
+   instead of inline unwinders and __unwind_function in the non-setjmp case.  */
+#define DWARF2_UNWIND_INFO 1
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LR_REGNO)
+
+/* Assembler Commands for Alignment.  */
+
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES) \
+  fprintf (STREAM, "\t.zero\t%u\n", (int)(NBYTES))
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  POWER
+   will be a C expression of type `int'.  */
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+/* Inside the text section, align with unpacked nops rather than zeros.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2alignl %d,0x80880000\n", (POWER))
+
+/* Macros Affecting all Debug Formats.  */
+
+/* A C expression that returns the DBX register number for the compiler
+   register number REGNO.  In simple cases, the value of this expression may be
+   REGNO itself.  But sometimes there are some registers that the compiler
+   knows about and DBX does not, or vice versa.  In such cases, some register
+   may need to have one number in the compiler and another for DBX.
+
+   If two registers have consecutive numbers inside GCC, and they can be
+   used as a pair to hold a multiword value, then they *must* have consecutive
+   numbers after renumbering with `DBX_REGISTER_NUMBER'.  Otherwise, debuggers
+   will be unable to access such a pair, because they expect register pairs to
+   be consecutive in their own numbering scheme.
+
+   If you find yourself defining `DBX_REGISTER_NUMBER' in way that does not
+   preserve register pairs, then what you must do instead is redefine the
+   actual register numbering scheme.
+
+   This declaration is required.  */
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Miscellaneous Parameters.  */
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define this macro if operations between registers with integral mode smaller
+   than a word are always performed on the entire register.  Most RISC machines
+   have this property and most CISC machines do not.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define this macro to be a C expression indicating when insns that read
+   memory in MODE, an integral mode narrower than a word, set the bits outside
+   of MODE to be either the sign-extension or the zero-extension of the data
+   read.  Return `SIGN_EXTEND' for values of MODE for which the insn
+   sign-extends, `ZERO_EXTEND' for which it zero-extends, and `UNKNOWN' for other
+   modes.
+
+   This macro is not called with MODE non-integral or with a width greater than
+   or equal to `BITS_PER_WORD', so you may return any value in this case.  Do
+   not define this macro if it would always return `UNKNOWN'.  On machines where
+   this macro is defined, you will normally define it as the constant
+   `SIGN_EXTEND' or `ZERO_EXTEND'.  */
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.
+
+   On many machines, this expression can be 1.
+
+   When `TRULY_NOOP_TRUNCATION' returns 1 for a pair of sizes for modes for
+   which `MODES_TIEABLE_P' is 0, suboptimal code can result.  If this is the
+   case, making `TRULY_NOOP_TRUNCATION' return 0 in such cases may improve
+   things.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* An alias for the machine mode for pointers.  On most machines, define this
+   to be the integer mode corresponding to the width of a hardware pointer;
+   `SImode' on 32-bit machine or `DImode' on 64-bit machines.  On some machines
+   you must define this to be one of the partial integer modes, such as
+   `PSImode'.
+
+   The width of `Pmode' must be at least as large as the value of
+   `POINTER_SIZE'.  If it is not equal, you must define the macro
+   `POINTERS_EXTEND_UNSIGNED' to specify how pointers are extended to `Pmode'.  */
+#define Pmode SImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  On most machines this should be
+   `QImode'.  */
+#define FUNCTION_MODE QImode
+
+/* A C expression for the maximum number of instructions to execute via
+   conditional execution instructions instead of a branch.  A value of
+   BRANCH_COST+1 is the default if the machine does not use
+   cc0, and 1 if it does use cc0.  */
+#define MAX_CONDITIONAL_EXECUTE frv_condexec_insns
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, possibly updating the tests in TRUE_EXPR, and
+   FALSE_EXPR for converting if-then and if-then-else code to conditional
+   instructions.  Set either TRUE_EXPR or FALSE_EXPR to a null pointer if the
+   tests cannot be converted.  */
+#define IFCVT_MODIFY_TESTS(CE_INFO, TRUE_EXPR, FALSE_EXPR)		\
+frv_ifcvt_modify_tests (CE_INFO, &TRUE_EXPR, &FALSE_EXPR)
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, for the basic block BB, possibly updating the tests in
+   TRUE_EXPR, and FALSE_EXPR for converting the && and || parts of if-then or
+   if-then-else code to conditional instructions.  OLD_TRUE and OLD_FALSE are
+   the previous tests.  Set either TRUE_EXPR or FALSE_EXPR to a null pointer if
+   the tests cannot be converted.  */
+#define IFCVT_MODIFY_MULTIPLE_TESTS(CE_INFO, BB, TRUE_EXPR, FALSE_EXPR) \
+frv_ifcvt_modify_multiple_tests (CE_INFO, BB, &TRUE_EXPR, &FALSE_EXPR)
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO with the new PATTERN in INSN.  If PATTERN is a null
+   pointer after the IFCVT_MODIFY_INSN macro executes, it is assumed that that
+   insn cannot be converted to be executed conditionally.  */
+#define IFCVT_MODIFY_INSN(CE_INFO, PATTERN, INSN) \
+(PATTERN) = frv_ifcvt_modify_insn (CE_INFO, PATTERN, INSN)
+
+/* A C expression to perform any final machine dependent modifications in
+   converting code to conditional execution in the code described by the
+   conditional if information CE_INFO.  */
+#define IFCVT_MODIFY_FINAL(CE_INFO) frv_ifcvt_modify_final (CE_INFO)
+
+/* A C expression to cancel any machine dependent modifications in converting
+   code to conditional execution in the code described by the conditional if
+   information CE_INFO.  */
+#define IFCVT_MODIFY_CANCEL(CE_INFO) frv_ifcvt_modify_cancel (CE_INFO)
+
+/* Initialize the machine-specific static data for if-conversion.  */
+#define IFCVT_MACHDEP_INIT(CE_INFO) frv_ifcvt_machdep_init (CE_INFO)
+
+/* The definition of the following macro results in that the 2nd jump
+   optimization (after the 2nd insn scheduling) is minimal.  It is
+   necessary to define when start cycle marks of insns (TImode is used
+   for this) is used for VLIW insn packing.  Some jump optimizations
+   make such marks invalid.  These marks are corrected for some
+   (minimal) optimizations.  ??? Probably the macro is temporary.
+   Final solution could making the 2nd jump optimizations before the
+   2nd instruction scheduling or corrections of the marks for all jump
+   optimizations.  Although some jump optimizations are actually
+   deoptimizations for VLIW (super-scalar) processors.  */
+
+#define MINIMAL_SECOND_JUMP_OPTIMIZATION
+
+
+/* If the following macro is defined and nonzero and deterministic
+   finite state automata are used for pipeline hazard recognition, we
+   will try to exchange insns in queue ready to improve the schedule.
+   The more macro value, the more tries will be made.  */
+#define FIRST_CYCLE_MULTIPASS_SCHEDULING 1
+
+/* The following macro is used only when value of
+   FIRST_CYCLE_MULTIPASS_SCHEDULING is nonzero.  The more macro value,
+   the more tries will be made to choose better schedule.  If the
+   macro value is zero or negative there will be no multi-pass
+   scheduling.  */
+#define FIRST_CYCLE_MULTIPASS_SCHEDULING_LOOKAHEAD frv_sched_lookahead
+
+enum frv_builtins
+{
+  FRV_BUILTIN_MAND,
+  FRV_BUILTIN_MOR,
+  FRV_BUILTIN_MXOR,
+  FRV_BUILTIN_MNOT,
+  FRV_BUILTIN_MAVEH,
+  FRV_BUILTIN_MSATHS,
+  FRV_BUILTIN_MSATHU,
+  FRV_BUILTIN_MADDHSS,
+  FRV_BUILTIN_MADDHUS,
+  FRV_BUILTIN_MSUBHSS,
+  FRV_BUILTIN_MSUBHUS,
+  FRV_BUILTIN_MPACKH,
+  FRV_BUILTIN_MQADDHSS,
+  FRV_BUILTIN_MQADDHUS,
+  FRV_BUILTIN_MQSUBHSS,
+  FRV_BUILTIN_MQSUBHUS,
+  FRV_BUILTIN_MUNPACKH,
+  FRV_BUILTIN_MDPACKH,
+  FRV_BUILTIN_MBTOH,
+  FRV_BUILTIN_MHTOB,
+  FRV_BUILTIN_MCOP1,
+  FRV_BUILTIN_MCOP2,
+  FRV_BUILTIN_MROTLI,
+  FRV_BUILTIN_MROTRI,
+  FRV_BUILTIN_MWCUT,
+  FRV_BUILTIN_MSLLHI,
+  FRV_BUILTIN_MSRLHI,
+  FRV_BUILTIN_MSRAHI,
+  FRV_BUILTIN_MEXPDHW,
+  FRV_BUILTIN_MEXPDHD,
+  FRV_BUILTIN_MMULHS,
+  FRV_BUILTIN_MMULHU,
+  FRV_BUILTIN_MMULXHS,
+  FRV_BUILTIN_MMULXHU,
+  FRV_BUILTIN_MMACHS,
+  FRV_BUILTIN_MMACHU,
+  FRV_BUILTIN_MMRDHS,
+  FRV_BUILTIN_MMRDHU,
+  FRV_BUILTIN_MQMULHS,
+  FRV_BUILTIN_MQMULHU,
+  FRV_BUILTIN_MQMULXHU,
+  FRV_BUILTIN_MQMULXHS,
+  FRV_BUILTIN_MQMACHS,
+  FRV_BUILTIN_MQMACHU,
+  FRV_BUILTIN_MCPXRS,
+  FRV_BUILTIN_MCPXRU,
+  FRV_BUILTIN_MCPXIS,
+  FRV_BUILTIN_MCPXIU,
+  FRV_BUILTIN_MQCPXRS,
+  FRV_BUILTIN_MQCPXRU,
+  FRV_BUILTIN_MQCPXIS,
+  FRV_BUILTIN_MQCPXIU,
+  FRV_BUILTIN_MCUT,
+  FRV_BUILTIN_MCUTSS,
+  FRV_BUILTIN_MWTACC,
+  FRV_BUILTIN_MWTACCG,
+  FRV_BUILTIN_MRDACC,
+  FRV_BUILTIN_MRDACCG,
+  FRV_BUILTIN_MTRAP,
+  FRV_BUILTIN_MCLRACC,
+  FRV_BUILTIN_MCLRACCA,
+  FRV_BUILTIN_MDUNPACKH,
+  FRV_BUILTIN_MBTOHE,
+  FRV_BUILTIN_MQXMACHS,
+  FRV_BUILTIN_MQXMACXHS,
+  FRV_BUILTIN_MQMACXHS,
+  FRV_BUILTIN_MADDACCS,
+  FRV_BUILTIN_MSUBACCS,
+  FRV_BUILTIN_MASACCS,
+  FRV_BUILTIN_MDADDACCS,
+  FRV_BUILTIN_MDSUBACCS,
+  FRV_BUILTIN_MDASACCS,
+  FRV_BUILTIN_MABSHS,
+  FRV_BUILTIN_MDROTLI,
+  FRV_BUILTIN_MCPLHI,
+  FRV_BUILTIN_MCPLI,
+  FRV_BUILTIN_MDCUTSSI,
+  FRV_BUILTIN_MQSATHS,
+  FRV_BUILTIN_MQLCLRHS,
+  FRV_BUILTIN_MQLMTHS,
+  FRV_BUILTIN_MQSLLHI,
+  FRV_BUILTIN_MQSRAHI,
+  FRV_BUILTIN_MHSETLOS,
+  FRV_BUILTIN_MHSETLOH,
+  FRV_BUILTIN_MHSETHIS,
+  FRV_BUILTIN_MHSETHIH,
+  FRV_BUILTIN_MHDSETS,
+  FRV_BUILTIN_MHDSETH,
+  FRV_BUILTIN_SMUL,
+  FRV_BUILTIN_UMUL,
+  FRV_BUILTIN_PREFETCH0,
+  FRV_BUILTIN_PREFETCH,
+  FRV_BUILTIN_SMASS,
+  FRV_BUILTIN_SMSSS,
+  FRV_BUILTIN_SMU,
+  FRV_BUILTIN_SCUTSS,
+  FRV_BUILTIN_ADDSS,
+  FRV_BUILTIN_SUBSS,
+  FRV_BUILTIN_SLASS,
+  FRV_BUILTIN_IACCreadll,
+  FRV_BUILTIN_IACCreadl,
+  FRV_BUILTIN_IACCsetll,
+  FRV_BUILTIN_IACCsetl,
+  FRV_BUILTIN_SCAN,
+  FRV_BUILTIN_READ8,
+  FRV_BUILTIN_READ16,
+  FRV_BUILTIN_READ32,
+  FRV_BUILTIN_READ64,
+  FRV_BUILTIN_WRITE8,
+  FRV_BUILTIN_WRITE16,
+  FRV_BUILTIN_WRITE32,
+  FRV_BUILTIN_WRITE64
+};
+#define FRV_BUILTIN_FIRST_NONMEDIA FRV_BUILTIN_SMUL
+
+/* Enable prototypes on the call rtl functions.  */
+#define MD_CALL_PROTOTYPES 1
+
+#define CPU_UNITS_QUERY 1
+
+#ifdef __FRV_FDPIC__
+#define CRT_GET_RFIB_DATA(dbase) \
+  ({ extern void *_GLOBAL_OFFSET_TABLE_; (dbase) = &_GLOBAL_OFFSET_TABLE_; })
+#endif
+
+#endif /* __FRV_H__ */
diff --git a/gcc-4.9/gcc/config/frv/frv.md b/gcc-4.9/gcc/config/frv/frv.md
new file mode 100644
index 000000000..d6268bf25
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv.md
@@ -0,0 +1,8021 @@
+;; Frv Machine Description
+;; Copyright (C) 1999-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Unspec's used
+;; ::
+;; ::::::::::::::::::::
+
+;; GOT constants must go 12/HI/LO for the splitter to work
+
+(define_constants
+  [(UNSPEC_BLOCKAGE		0)
+   (UNSPEC_CC_TO_GPR		1)
+   (UNSPEC_GPR_TO_CC		2)
+   (UNSPEC_PIC_PROLOGUE		3)
+   (UNSPEC_CR_LOGIC		4)
+   (UNSPEC_STACK_ADJUST		5)
+   (UNSPEC_EH_RETURN_EPILOGUE	6)
+   (UNSPEC_GOT			7)
+   (UNSPEC_LDD			8)
+   (UNSPEC_OPTIONAL_MEMBAR	9)
+
+   (UNSPEC_GETTLSOFF			200)
+   (UNSPEC_TLS_LOAD_GOTTLSOFF12		201)
+   (UNSPEC_TLS_INDIRECT_CALL		202)
+   (UNSPEC_TLS_TLSDESC_LDD		203)
+   (UNSPEC_TLS_TLSDESC_LDD_AUX		204)
+   (UNSPEC_TLS_TLSOFF_LD		205)
+   (UNSPEC_TLS_LDDI			206)
+   (UNSPEC_TLSOFF_HILO			207)
+
+   (R_FRV_GOT12			11)
+   (R_FRV_GOTHI			12)
+   (R_FRV_GOTLO			13)
+   (R_FRV_FUNCDESC		14)
+   (R_FRV_FUNCDESC_GOT12	15)
+   (R_FRV_FUNCDESC_GOTHI	16)
+   (R_FRV_FUNCDESC_GOTLO	17)
+   (R_FRV_FUNCDESC_VALUE	18)
+   (R_FRV_FUNCDESC_GOTOFF12	19)
+   (R_FRV_FUNCDESC_GOTOFFHI	20)
+   (R_FRV_FUNCDESC_GOTOFFLO	21)
+   (R_FRV_GOTOFF12		22)
+   (R_FRV_GOTOFFHI		23)
+   (R_FRV_GOTOFFLO		24)
+   (R_FRV_GPREL12		25)
+   (R_FRV_GPRELHI		26)
+   (R_FRV_GPRELLO		27)
+   (R_FRV_GOTTLSOFF_HI		28)
+   (R_FRV_GOTTLSOFF_LO		29)
+   (R_FRV_TLSMOFFHI		30)
+   (R_FRV_TLSMOFFLO           	31)
+   (R_FRV_TLSMOFF12           	32)
+   (R_FRV_TLSDESCHI           	33)
+   (R_FRV_TLSDESCLO           	34)
+   (R_FRV_GOTTLSDESCHI		35)
+   (R_FRV_GOTTLSDESCLO		36)
+
+   (GR8_REG			8)
+   (GR9_REG			9)
+   (GR14_REG			14)
+   ;; LR_REG conflicts with definition in frv.h
+   (LRREG                       169)
+   (FDPIC_REG			15)
+   ])
+
+(define_mode_iterator IMODE [QI HI SI DI])
+(define_mode_attr IMODEsuffix [(QI "b") (HI "h") (SI "") (DI "d")])
+(define_mode_attr BREADsuffix [(QI "ub") (HI "uh") (SI "") (DI "d")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Constraints
+;; ::
+;; ::::::::::::::::::::
+
+;; Standard Constraints
+;;
+;; `m' A memory operand is allowed, with any kind of address that the
+;;     machine supports in general.
+;;
+;; `o' A memory operand is allowed, but only if the address is
+;;     "offsettable".  This means that adding a small integer (actually, the
+;;     width in bytes of the operand, as determined by its machine mode) may be
+;;     added to the address and the result is also a valid memory address.
+;;
+;; `V' A memory operand that is not offsettable.  In other words,
+;;     anything that would fit the `m' constraint but not the `o' constraint.
+;;
+;; `<' A memory operand with autodecrement addressing (either
+;;     predecrement or postdecrement) is allowed.
+;;
+;; `>' A memory operand with autoincrement addressing (either
+;;     preincrement or postincrement) is allowed.
+;;
+;; `r' A register operand is allowed provided that it is in a general
+;;     register.
+;;
+;; `d', `a', `f', ...
+;;     Other letters can be defined in machine-dependent fashion to stand for
+;;     particular classes of registers.  `d', `a' and `f' are defined on the
+;;     68000/68020 to stand for data, address and floating point registers.
+;;
+;; `i' An immediate integer operand (one with constant value) is allowed.
+;;     This includes symbolic constants whose values will be known only at
+;;     assembly time.
+;;
+;; `n' An immediate integer operand with a known numeric value is allowed.
+;;     Many systems cannot support assembly-time constants for operands less
+;;     than a word wide.  Constraints for these operands should use `n' rather
+;;     than `i'.
+;;
+;; 'I' First machine-dependent integer constant (6-bit signed ints).
+;; 'J' Second machine-dependent integer constant (10-bit signed ints).
+;; 'K' Third machine-dependent integer constant (-2048).
+;; 'L' Fourth machine-dependent integer constant (16-bit signed ints).
+;; 'M' Fifth machine-dependent integer constant (16-bit unsigned ints).
+;; 'N' Sixth machine-dependent integer constant (-2047..-1).
+;; 'O' Seventh machine-dependent integer constant (zero).
+;; 'P' Eighth machine-dependent integer constant (1..2047).
+;;
+;;     Other letters in the range `I' through `P' may be defined in a
+;;     machine-dependent fashion to permit immediate integer operands with
+;;     explicit integer values in specified ranges.  For example, on the 68000,
+;;     `I' is defined to stand for the range of values 1 to 8.  This is the
+;;     range permitted as a shift count in the shift instructions.
+;;
+;; `E' An immediate floating operand (expression code `const_double') is
+;;     allowed, but only if the target floating point format is the same as
+;;     that of the host machine (on which the compiler is running).
+;;
+;; `F' An immediate floating operand (expression code `const_double') is
+;;     allowed.
+;;
+;; 'G' First machine-dependent const_double.
+;; 'H' Second machine-dependent const_double.
+;;
+;; `s' An immediate integer operand whose value is not an explicit
+;;     integer is allowed.
+;;
+;;     This might appear strange; if an insn allows a constant operand with a
+;;     value not known at compile time, it certainly must allow any known
+;;     value.  So why use `s' instead of `i'?  Sometimes it allows better code
+;;     to be generated.
+;;
+;;     For example, on the 68000 in a fullword instruction it is possible to
+;;     use an immediate operand; but if the immediate value is between -128 and
+;;     127, better code results from loading the value into a register and
+;;     using the register.  This is because the load into the register can be
+;;     done with a `moveq' instruction.  We arrange for this to happen by
+;;     defining the letter `K' to mean "any integer outside the range -128 to
+;;     127", and then specifying `Ks' in the operand constraints.
+;;
+;; `g' Any register, memory or immediate integer operand is allowed,
+;;     except for registers that are not general registers.
+;;
+;; `X' Any operand whatsoever is allowed, even if it does not satisfy
+;;     `general_operand'.  This is normally used in the constraint of a
+;;     `match_scratch' when certain alternatives will not actually require a
+;;     scratch register.
+;;
+;; `0' Match operand 0.
+;; `1' Match operand 1.
+;; `2' Match operand 2.
+;; `3' Match operand 3.
+;; `4' Match operand 4.
+;; `5' Match operand 5.
+;; `6' Match operand 6.
+;; `7' Match operand 7.
+;; `8' Match operand 8.
+;; `9' Match operand 9.
+;;
+;;     An operand that matches the specified operand number is allowed.  If a
+;;     digit is used together with letters within the same alternative, the
+;;     digit should come last.
+;;
+;;     This is called a "matching constraint" and what it really means is that
+;;     the assembler has only a single operand that fills two roles considered
+;;     separate in the RTL insn.  For example, an add insn has two input
+;;     operands and one output operand in the RTL, but on most CISC machines an
+;;     add instruction really has only two operands, one of them an
+;;     input-output operand:
+;;
+;;          addl #35,r12
+;;
+;;     Matching constraints are used in these circumstances.  More precisely,
+;;     the two operands that match must include one input-only operand and one
+;;     output-only operand.  Moreover, the digit must be a smaller number than
+;;     the number of the operand that uses it in the constraint.
+;;
+;;     For operands to match in a particular case usually means that they are
+;;     identical-looking RTL expressions.  But in a few special cases specific
+;;     kinds of dissimilarity are allowed.  For example, `*x' as an input
+;;     operand will match `*x++' as an output operand.  For proper results in
+;;     such cases, the output template should always use the output-operand's
+;;     number when printing the operand.
+;;
+;; `p' An operand that is a valid memory address is allowed.  This is for
+;;     "load address" and "push address" instructions.
+;;
+;;     `p' in the constraint must be accompanied by `address_operand' as the
+;;     predicate in the `match_operand'.  This predicate interprets the mode
+;;     specified in the `match_operand' as the mode of the memory reference for
+;;     which the address would be valid.
+;;
+;; `Q` First non constant, non register machine-dependent insns
+;; `R` Second non constant, non register machine-dependent insns
+;; `S` Third non constant, non register machine-dependent insns
+;; `T` Fourth non constant, non register machine-dependent insns
+;; `U` Fifth non constant, non register machine-dependent insns
+;;
+;;     Letters in the range `Q' through `U' may be defined in a
+;;     machine-dependent fashion to stand for arbitrary operand types.  The
+;;     machine description macro `EXTRA_CONSTRAINT' is passed the operand as
+;;     its first argument and the constraint letter as its second operand.
+;;
+;;     A typical use for this would be to distinguish certain types of memory
+;;     references that affect other insn operands.
+;;
+;;     Do not define these constraint letters to accept register references
+;;     (`reg'); the reload pass does not expect this and would not handle it
+;;     properly.
+
+;; Multiple Alternative Constraints
+;; `?' Disparage slightly the alternative that the `?' appears in, as a
+;;     choice when no alternative applies exactly.  The compiler regards this
+;;     alternative as one unit more costly for each `?' that appears in it.
+;;
+;; `!' Disparage severely the alternative that the `!' appears in.  This
+;;     alternative can still be used if it fits without reloading, but if
+;;     reloading is needed, some other alternative will be used.
+
+;; Constraint modifiers
+;; `=' Means that this operand is write-only for this instruction: the
+;;     previous value is discarded and replaced by output data.
+;;
+;; `+' Means that this operand is both read and written by the
+;;     instruction.
+;;
+;;     When the compiler fixes up the operands to satisfy the constraints, it
+;;     needs to know which operands are inputs to the instruction and which are
+;;     outputs from it.  `=' identifies an output; `+' identifies an operand
+;;     that is both input and output; all other operands are assumed to be
+;;     input only.
+;;
+;; `&' Means (in a particular alternative) that this operand is written
+;;     before the instruction is finished using the input operands.  Therefore,
+;;     this operand may not lie in a register that is used as an input operand
+;;     or as part of any memory address.
+;;
+;;     `&' applies only to the alternative in which it is written.  In
+;;     constraints with multiple alternatives, sometimes one alternative
+;;     requires `&' while others do not.
+;;
+;;     `&' does not obviate the need to write `='.
+;;
+;; `%' Declares the instruction to be commutative for this operand and the
+;;     following operand.  This means that the compiler may interchange the two
+;;     operands if that is the cheapest way to make all operands fit the
+;;     constraints.  This is often used in patterns for addition instructions
+;;     that really have only two operands: the result must go in one of the
+;;     arguments.
+;;
+;; `#' Says that all following characters, up to the next comma, are to be
+;;     ignored as a constraint.  They are significant only for choosing
+;;     register preferences.
+;;
+;; `*' Says that the following character should be ignored when choosing
+;;     register preferences.  `*' has no effect on the meaning of the
+;;     constraint as a constraint, and no effect on reloading.
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; The `define_attr' expression is used to define each attribute required by
+;; the target machine.  It looks like:
+;;
+;; (define_attr NAME LIST-OF-VALUES DEFAULT)
+
+;; NAME is a string specifying the name of the attribute being defined.
+
+;; LIST-OF-VALUES is either a string that specifies a comma-separated list of
+;; values that can be assigned to the attribute, or a null string to indicate
+;; that the attribute takes numeric values.
+
+;; DEFAULT is an attribute expression that gives the value of this attribute
+;; for insns that match patterns whose definition does not include an explicit
+;; value for this attribute.
+
+;; For each defined attribute, a number of definitions are written to the
+;; `insn-attr.h' file.  For cases where an explicit set of values is specified
+;; for an attribute, the following are defined:
+
+;; * A `#define' is written for the symbol `HAVE_ATTR_NAME'.
+;;
+;; * An enumeral class is defined for `attr_NAME' with elements of the
+;;   form `UPPER-NAME_UPPER-VALUE' where the attribute name and value are first
+;;   converted to upper case.
+;;
+;; * A function `get_attr_NAME' is defined that is passed an insn and
+;;   returns the attribute value for that insn.
+
+;; For example, if the following is present in the `md' file:
+;;
+;; (define_attr "type" "branch,fp,load,store,arith" ...)
+;;
+;; the following lines will be written to the file `insn-attr.h'.
+;;
+;; #define HAVE_ATTR_type
+;; enum attr_type {TYPE_BRANCH, TYPE_FP, TYPE_LOAD, TYPE_STORE, TYPE_ARITH};
+;; extern enum attr_type get_attr_type ();
+
+;; If the attribute takes numeric values, no `enum' type will be defined and
+;; the function to obtain the attribute's value will return `int'.
+
+(define_attr "length" "" (const_int 4))
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in frv-protos.h.
+
+(define_attr "cpu" "generic,fr550,fr500,fr450,fr405,fr400,fr300,simple,tomcat"
+  (const (symbol_ref "(enum attr_cpu) frv_cpu_type")))
+
+;; Attribute is "yes" for branches and jumps that span too great a distance
+;; to be implemented in the most natural way.  Such instructions will use
+;; a call instruction in some way.
+
+(define_attr "far_jump" "yes,no" (const_string "no"))
+
+;; Instruction type
+;; "unknown" must come last.
+(define_attr "type"
+  "int,sethi,setlo,mul,div,gload,gstore,fload,fstore,movfg,movgf,macc,scan,cut,branch,jump,jumpl,call,spr,trap,fnop,fsconv,fsadd,fscmp,fsmul,fsmadd,fsdiv,sqrt_single,fdconv,fdadd,fdcmp,fdmul,fdmadd,fddiv,sqrt_double,mnop,mlogic,maveh,msath,maddh,mqaddh,mpackh,munpackh,mdpackh,mbhconv,mrot,mshift,mexpdhw,mexpdhd,mwcut,mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,mcpx,mqcpx,mcut,mclracc,mclracca,mdunpackh,mbhconve,mrdacc,mwtacc,maddacc,mdaddacc,mabsh,mdrot,mcpl,mdcut,mqsath,mqlimh,mqshift,mset,ccr,multi,load_or_call,unknown"
+  (const_string "unknown"))
+
+(define_attr "acc_group" "none,even,odd"
+  (symbol_ref "(enum attr_acc_group) frv_acc_group (insn)"))
+
+;; Scheduling and Packing Overview
+;; -------------------------------
+;;
+;; FR-V instructions are divided into five groups: integer, floating-point,
+;; media, branch and control.  Each group is associated with a separate set
+;; of processing units, the number and behavior of which depend on the target
+;; target processor.  Integer units have names like I0 and I1, floating-point
+;; units have names like F0 and F1, and so on.
+;;
+;; Each member of the FR-V family has its own restrictions on which
+;; instructions can issue to which units.  For example, some processors
+;; allow loads to issue to I0 or I1 while others only allow them to issue
+;; to I0.  As well as these processor-specific restrictions, there is a
+;; general rule that an instruction can only issue to unit X + 1 if an
+;; instruction in the same packet issued to unit X.
+;;
+;; Sometimes the only way to honor these restrictions is by adding nops
+;; to a packet.  For example, on the fr550, media instructions that access
+;; ACC4-7 can only issue to M1 or M3.  It is therefore only possible to
+;; execute these instructions by packing them with something that issues
+;; to M0.  When no useful M0 instruction exists, an "mnop" can be used
+;; instead.
+;;
+;; Having decided which instructions should issue to which units, the packet
+;; should be ordered according to the following template:
+;;
+;;     I0 F0/M0 I1 F1/M1 .... B0 B1 ...
+;;
+;; Note that VLIW packets execute strictly in parallel.  Every instruction
+;; in the packet will stall until all input operands are ready.  These
+;; operands are then read simultaneously before any registers are modified.
+;; This means that it's OK to have write-after-read hazards between
+;; instructions in the same packet, even if the write is listed earlier
+;; than the read.
+;;
+;; Three gcc passes are involved in generating VLIW packets:
+;;
+;;    (1) The scheduler.  This pass uses the standard scheduling code and
+;;	  behaves in much the same way as it would for a superscalar RISC
+;;	  architecture.
+;;
+;;    (2) frv_reorg.  This pass inserts nops into packets in order to meet
+;;	  the processor's issue requirements.  It also has code to optimize
+;;	  the type of padding used to align labels.
+;;
+;;    (3) frv_pack_insns.  The final packing phase, which puts the
+;;	  instructions into assembly language order according to the
+;;	  "I0 F0/M0 ..." template above.
+;;
+;; In the ideal case, these three passes will agree on which instructions
+;; should be packed together, but this won't always happen.  In particular:
+;;
+;;    (a) (2) might not pack predicated instructions in the same way as (1).
+;;	  The scheduler tries to schedule predicated instructions for the
+;;	  worst case, assuming the predicate is true.  However, if we have
+;;	  something like a predicated load, it isn't always possible to
+;;	  fill the load delay with useful instructions.  (2) should then
+;;	  pack the user of the loaded value as aggressively as possible,
+;;	  in order to optimize the case when the predicate is false.
+;;	  See frv_pack_insn_p for more details.
+;;
+;;    (b) The final shorten_branches pass runs between (2) and (3).
+;;	  Since (2) inserts nops, it is possible that some branches
+;;	  that were thought to be in range during (2) turned out to
+;;	  out-of-range in (3).
+;;
+;; All three passes use DFAs to model issue restrictions.  The main
+;; question that the DFAs are supposed to answer is simply: can these
+;; instructions be packed together?  The DFAs are not responsible for
+;; assigning instructions to execution units; that's the job of
+;; frv_sort_insn_group, see below for details.
+;;
+;; To get the best results, the DFAs should try to allow packets to
+;; be built in every possible order.  This gives the scheduler more
+;; flexibility, removing the need for things like multipass lookahead.
+;; It also means we can take more advantage of inter-packet dependencies.
+;;
+;; For example, suppose we're compiling for the fr400 and we have:
+;;
+;;	addi	gr4,#1,gr5
+;;	ldi	@(gr6,gr0),gr4
+;;
+;; We can pack these instructions together by assigning the load to I0 and
+;; the addition to I1.  However, because of the anti dependence between the
+;; two instructions, the scheduler must schedule the addition first.
+;; We should generally get better schedules if the DFA allows both
+;; (ldi, addi) and (addi, ldi), leaving the final packing pass to
+;; reorder the packet where appropriate.
+;;
+;; Almost all integer instructions can issue to any unit in the range I0
+;; to Ix, where the value of "x" depends on the type of instruction and
+;; on the target processor.  The rules for other instruction groups are
+;; usually similar.
+;;
+;; When the restrictions are as regular as this, we can get the desired
+;; behavior by claiming the DFA unit associated with the highest unused
+;; execution unit.  For example, if an instruction can issue to I0 or I1,
+;; the DFA first tries to take the DFA unit associated with I1, and will
+;; only take I0's unit if I1 isn't free.  (Note that, as mentioned above,
+;; the DFA does not assign instructions to units.  An instruction that
+;; claims DFA unit I1 will not necessarily issue to I1 in the final packet.)
+;;
+;; There are some cases, such as the fr550 media restriction mentioned
+;; above, where the rule is not as simple as "any unit between 0 and X".
+;; Even so, allocating higher units first brings us close to the ideal.
+;;
+;; Having divided instructions into packets, passes (2) and (3) must
+;; assign instructions to specific execution units.  They do this using
+;; the following algorithm:
+;;
+;;    1. Partition the instructions into groups (integer, float/media, etc.)
+;;
+;;    2. For each group of instructions:
+;;
+;;	 (a) Issue each instruction in the reset DFA state and use the
+;;	     DFA cpu_unit_query interface to find out which unit it picks
+;;	     first.
+;;
+;;	 (b) Sort the instructions into ascending order of picked units.
+;;	     Instructions that pick I1 first come after those that pick
+;;	     I0 first, and so on.  Let S be the sorted sequence and S[i]
+;;	     be the ith element of it (counting from zero).
+;;
+;;	 (c) If this is the control or branch group, goto (i)
+;;
+;;	 (d) Find the largest L such that S[0]...S[L-1] can be issued
+;;	     consecutively from the reset state and such that the DFA
+;;	     claims unit X when S[X] is added.  Let D be the DFA state
+;;	     after instructions S[0]...S[L-1] have been issued.
+;;
+;;	 (e) If L is the length of S, goto (i)
+;;
+;;	 (f) Let U be the number of units belonging to this group and #S be
+;;	     the length of S.  Create a new sequence S' by concatenating
+;;	     S[L]...S[#S-1] and (U - #S) nops.
+;;
+;;	 (g) For each permutation S'' of S', try issuing S'' from last to
+;;	     first, starting with state D.  See if the DFA claims unit
+;;	     X + L when each S''[X] is added.  If so, set S to the
+;;	     concatenation of S[0]...S[L-1] and S'', then goto (i).
+;;
+;;	 (h) If (g) found no permutation, abort.
+;;
+;;	 (i) S is now the sorted sequence for this group, meaning that S[X]
+;;	     issues to unit X.  Trim any unwanted nops from the end of S.
+;;
+;; The sequence calculated by (b) is trivially correct for control
+;; instructions since they can't be packed.  It is also correct for branch
+;; instructions due to their simple issue requirements.  For integer and
+;; floating-point/media instructions, the sequence calculated by (b) is
+;; often the correct answer; the rest of the algorithm is optimized for
+;; the case in which it is correct.
+;;
+;; If there were no irregularities in the issue restrictions then step
+;; (d) would not be needed.  It is mainly there to cope with the fr550
+;; integer restrictions, where a store can issue to I1, but only if a store
+;; also issues to I0.  (Note that if a packet has two stores, they will be
+;; at the beginning of the sequence calculated by (b).)  It also copes
+;; with fr400 M-2 instructions, which must issue to M0, and which cannot
+;; be issued together with an mnop in M1.
+;;
+;; Step (g) is the main one for integer and float/media instructions.
+;; The first permutation it tries is S' itself (because, as noted above,
+;; the sequence calculated by (b) is often correct).  If S' doesn't work,
+;; the implementation tries varying the beginning of the sequence first.
+;; Thus the nops towards the end of the sequence will only move to lower
+;; positions if absolutely necessary.
+;;
+;; The algorithm is theoretically exponential in the number of instructions
+;; in a group, although it's only O(n log(n)) if the sequence calculated by
+;; (b) is acceptable.  In practice, the algorithm completes quickly even
+;; in the rare cases where (g) needs to try other permutations.
+(define_automaton "integer, float_media, branch, control, idiv, div")
+
+;; The main issue units.  Note that not all units are available on
+;; all processors.
+(define_query_cpu_unit "i0,i1,i2,i3" "integer")
+(define_query_cpu_unit "f0,f1,f2,f3" "float_media")
+(define_query_cpu_unit "b0,b1" "branch")
+(define_query_cpu_unit "c" "control")
+
+;; Division units.
+(define_cpu_unit "idiv1,idiv2" "idiv")
+(define_cpu_unit "div1,div2,root" "div")
+
+;; Control instructions cannot be packed with others.
+(define_reservation "control" "i0+i1+i2+i3+f0+f1+f2+f3+b0+b1")
+
+;; Generic reservation for control insns
+(define_insn_reservation "control" 1
+  (eq_attr "type" "trap,spr,unknown,multi")
+  "c + control")
+
+;; Reservation for relaxable calls to gettlsoff.
+(define_insn_reservation "load_or_call" 3
+  (eq_attr "type" "load_or_call")
+  "c + control")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Generic/FR500 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Integer insns
+;; Synthetic units used to describe issue restrictions.
+(define_automaton "fr500_integer")
+(define_cpu_unit "fr500_load0,fr500_load1,fr500_store0" "fr500_integer")
+(exclusion_set "fr500_load0,fr500_load1" "fr500_store0")
+
+(define_bypass 0 "fr500_i1_sethi" "fr500_i1_setlo")
+(define_insn_reservation "fr500_i1_sethi" 1
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "sethi"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_setlo" 1
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "setlo"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_int" 1
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "int"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_mul" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mul"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_div" 19
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "div"))
+  "(i1|i0),(idiv1*18|idiv2*18)")
+
+(define_insn_reservation "fr500_i2" 4
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "gload,fload"))
+  "(i1|i0) + (fr500_load0|fr500_load1)")
+
+(define_insn_reservation "fr500_i3" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "gstore,fstore"))
+  "i0 + fr500_store0")
+
+(define_insn_reservation "fr500_i4" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "movgf,movfg"))
+  "i0")
+
+(define_insn_reservation "fr500_i5" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "jumpl"))
+  "i0")
+
+;;
+;; Branch-instructions
+;;
+(define_insn_reservation "fr500_branch" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "jump,branch,ccr"))
+  "b1|b0")
+
+(define_insn_reservation "fr500_call" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "call"))
+  "b0")
+
+;; Floating point insns.  The default latencies are for non-media
+;; instructions; media instructions incur an extra cycle.
+
+(define_bypass 4 "fr500_farith" "fr500_m1,fr500_m2,fr500_m3,
+			         fr500_m4,fr500_m5,fr500_m6")
+(define_insn_reservation "fr500_farith" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "fnop,fsconv,fsadd,fsmul,fsmadd,fdconv,fdadd,fdmul,fdmadd"))
+  "(f1|f0)")
+
+(define_insn_reservation "fr500_fcmp" 4
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "fscmp,fdcmp"))
+  "(f1|f0)")
+
+(define_bypass 11 "fr500_fdiv" "fr500_m1,fr500_m2,fr500_m3,
+			        fr500_m4,fr500_m5,fr500_m6")
+(define_insn_reservation "fr500_fdiv" 10
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "fsdiv,fddiv"))
+  "(f1|f0),(div1*9 | div2*9)")
+
+(define_bypass 16 "fr500_froot" "fr500_m1,fr500_m2,fr500_m3,
+				 fr500_m4,fr500_m5,fr500_m6")
+(define_insn_reservation "fr500_froot" 15
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "sqrt_single,sqrt_double"))
+  "(f1|f0) + root*15")
+
+;; Media insns.  Conflict table is as follows:
+;;
+;;           M1  M2  M3  M4  M5  M6
+;;        M1  -   -   -   -   -   -
+;;        M2  -   -   -   -   X   X
+;;        M3  -   -   -   -   X   X
+;;        M4  -   -   -   -   -   X
+;;        M5  -   X   X   -   X   X
+;;        M6  -   X   X   X   X   X
+;;
+;; where X indicates an invalid combination.
+;;
+;; Target registers are as follows:
+;;
+;;	  M1 : FPRs
+;;	  M2 : FPRs
+;;	  M3 : ACCs
+;;	  M4 : ACCs
+;;	  M5 : FPRs
+;;	  M6 : ACCs
+;;
+;; The default FPR latencies are for integer instructions.
+;; Floating-point instructions need one cycle more and media
+;; instructions need one cycle less.
+(define_automaton "fr500_media")
+(define_cpu_unit "fr500_m2_0,fr500_m2_1" "fr500_media")
+(define_cpu_unit "fr500_m3_0,fr500_m3_1" "fr500_media")
+(define_cpu_unit "fr500_m4_0,fr500_m4_1" "fr500_media")
+(define_cpu_unit "fr500_m5" "fr500_media")
+(define_cpu_unit "fr500_m6" "fr500_media")
+
+(exclusion_set "fr500_m5,fr500_m6" "fr500_m2_0,fr500_m2_1,
+				    fr500_m3_0,fr500_m3_1")
+(exclusion_set "fr500_m6" "fr500_m4_0,fr500_m4_1,fr500_m5")
+
+(define_bypass 2 "fr500_m1" "fr500_m1,fr500_m2,fr500_m3,
+			     fr500_m4,fr500_m5,fr500_m6")
+(define_bypass 4 "fr500_m1" "fr500_farith,fr500_fcmp,fr500_fdiv,fr500_froot")
+(define_insn_reservation "fr500_m1" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mnop,mlogic,maveh,msath,maddh,mqaddh"))
+  "(f1|f0)")
+
+(define_bypass 2 "fr500_m2" "fr500_m1,fr500_m2,fr500_m3,
+			     fr500_m4,fr500_m5,fr500_m6")
+(define_bypass 4 "fr500_m2" "fr500_farith,fr500_fcmp,fr500_fdiv,fr500_froot")
+(define_insn_reservation "fr500_m2" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mrdacc,mpackh,munpackh,mbhconv,mrot,mshift,mexpdhw,mexpdhd,mwcut,mcut,mdunpackh,mbhconve"))
+  "(f1|f0) + (fr500_m2_0|fr500_m2_1)")
+
+(define_bypass 1 "fr500_m3" "fr500_m4")
+(define_insn_reservation "fr500_m3" 2
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mclracc,mwtacc"))
+  "(f1|f0) + (fr500_m3_0|fr500_m3_1)")
+
+(define_bypass 1 "fr500_m4" "fr500_m4")
+(define_insn_reservation "fr500_m4" 2
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,mcpx,mqcpx"))
+  "(f1|f0) + (fr500_m4_0|fr500_m4_1)")
+
+(define_bypass 2 "fr500_m5" "fr500_m1,fr500_m2,fr500_m3,
+			     fr500_m4,fr500_m5,fr500_m6")
+(define_bypass 4 "fr500_m5" "fr500_farith,fr500_fcmp,fr500_fdiv,fr500_froot")
+(define_insn_reservation "fr500_m5" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mdpackh"))
+  "(f1|f0) + fr500_m5")
+
+(define_bypass 1 "fr500_m6" "fr500_m4")
+(define_insn_reservation "fr500_m6" 2
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mclracca"))
+  "(f1|f0) + fr500_m6")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: FR400 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Category 2 media instructions use both media units, but can be packed
+;; with non-media instructions.  Use fr400_m1unit to claim the M1 unit
+;; without claiming a slot.
+
+;; Name		Class	Units	Latency
+;; ====	        =====	=====	=======
+;; int		I1	I0/I1	1
+;; sethi	I1	I0/I1	0       -- does not interfere with setlo
+;; setlo	I1	I0/I1	1
+;; mul		I1	I0	3  (*)
+;; div		I1	I0	20 (*)
+;; gload	I2	I0	4  (*)
+;; fload	I2	I0	4       -- only 3 if read by a media insn
+;; gstore	I3	I0	0       -- provides no result
+;; fstore	I3	I0	0       -- provides no result
+;; movfg	I4	I0	3  (*)
+;; movgf	I4	I0	3  (*)
+;; jumpl	I5	I0	0       -- provides no result
+;;
+;; (*) The results of these instructions can be read one cycle earlier
+;; than indicated.  The penalty given is for instructions with write-after-
+;; write dependencies.
+
+;; The FR400 can only do loads and stores in I0, so we there's no danger
+;; of memory unit collision in the same packet.  There's only one divide
+;; unit too.
+
+(define_automaton "fr400_integer")
+(define_cpu_unit "fr400_mul" "fr400_integer")
+
+(define_insn_reservation "fr400_i1_int" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "int"))
+  "i1|i0")
+
+(define_bypass 0 "fr400_i1_sethi" "fr400_i1_setlo")
+(define_insn_reservation "fr400_i1_sethi" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "sethi"))
+  "i1|i0")
+
+(define_insn_reservation "fr400_i1_setlo" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "setlo"))
+  "i1|i0")
+
+;; 3 is the worst case (write-after-write hazard).
+(define_insn_reservation "fr400_i1_mul" 3
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mul"))
+  "i0 + fr400_mul")
+
+(define_insn_reservation "fr450_i1_mul" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mul"))
+  "i0 + fr400_mul")
+
+(define_bypass 1 "fr400_i1_macc" "fr400_i1_macc")
+(define_insn_reservation "fr400_i1_macc" 2
+  (and (eq_attr "cpu" "fr405,fr450")
+       (eq_attr "type" "macc"))
+  "(i0|i1) + fr400_mul")
+
+(define_insn_reservation "fr400_i1_scan" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "scan"))
+  "i0")
+
+(define_insn_reservation "fr400_i1_cut" 2
+  (and (eq_attr "cpu" "fr405,fr450")
+       (eq_attr "type" "cut"))
+  "i0 + fr400_mul")
+
+;; 20 is for a write-after-write hazard.
+(define_insn_reservation "fr400_i1_div" 20
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "div"))
+  "i0 + idiv1*19")
+
+(define_insn_reservation "fr450_i1_div" 19
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "div"))
+  "i0 + idiv1*19")
+
+;; 4 is for a write-after-write hazard.
+(define_insn_reservation "fr400_i2" 4
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "gload,fload"))
+  "i0")
+
+(define_insn_reservation "fr450_i2_gload" 3
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "gload"))
+  "i0")
+
+;; 4 is for a write-after-write hazard.
+(define_insn_reservation "fr450_i2_fload" 4
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "fload"))
+  "i0")
+
+(define_insn_reservation "fr400_i3" 0
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "gstore,fstore"))
+  "i0")
+
+;; 3 is for a write-after-write hazard.
+(define_insn_reservation "fr400_i4" 3
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "movfg,movgf"))
+  "i0")
+
+(define_insn_reservation "fr450_i4_movfg" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "movfg"))
+  "i0")
+
+;; 3 is for a write-after-write hazard.
+(define_insn_reservation "fr450_i4_movgf" 3
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "movgf"))
+  "i0")
+
+(define_insn_reservation "fr400_i5" 0
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "jumpl"))
+  "i0")
+
+;; The bypass between FPR loads and media instructions, described above.
+
+(define_bypass 3
+  "fr400_i2"
+  "fr400_m1_1,fr400_m1_2,\
+   fr400_m2_1,fr400_m2_2,\
+   fr400_m3_1,fr400_m3_2,\
+   fr400_m4_1,fr400_m4_2,\
+   fr400_m5")
+
+;; The branch instructions all use the B unit and produce no result.
+
+(define_insn_reservation "fr400_b" 0
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "jump,branch,ccr,call"))
+  "b0")
+
+;; FP->FP moves are marked as "fsconv" instructions in the define_insns
+;; below, but are implemented on the FR400 using "mlogic" instructions.
+;; It's easier to class "fsconv" as a "m1:1" instruction than provide
+;; separate define_insns for the FR400.
+
+;; M1 instructions store their results in FPRs.  Any instruction can read
+;; the result in the following cycle, so no penalty occurs.
+
+(define_automaton "fr400_media")
+(define_cpu_unit "fr400_m1a,fr400_m1b,fr400_m2a" "fr400_media")
+(exclusion_set "fr400_m1a,fr400_m1b" "fr400_m2a")
+
+(define_reservation "fr400_m1" "(f1|f0) + (fr400_m1a|fr400_m1b)")
+(define_reservation "fr400_m2" "f0 + fr400_m2a")
+
+(define_insn_reservation "fr400_m1_1" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "fsconv,mnop,mlogic,maveh,msath,maddh,mabsh,mset"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m1_2" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mqaddh,mqsath,mqlimh,mqshift"))
+  "fr400_m2")
+
+;; M2 instructions store their results in accumulators, which are read
+;; by M2 or M4 media commands.  M2 instructions can read the results in
+;; the following cycle, but M4 instructions must wait a cycle more.
+
+(define_bypass 1
+  "fr400_m2_1,fr400_m2_2"
+  "fr400_m2_1,fr400_m2_2")
+
+(define_insn_reservation "fr400_m2_1" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mcpx,maddacc"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m2_2" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mqmulh,mqmulxh,mqmach,mqcpx,mdaddacc"))
+  "fr400_m2")
+
+;; For our purposes, there seems to be little real difference between
+;; M1 and M3 instructions.  Keep them separate anyway in case the distinction
+;; is needed later.
+
+(define_insn_reservation "fr400_m3_1" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mpackh,mrot,mshift,mexpdhw"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m3_2" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "munpackh,mdpackh,mbhconv,mexpdhd,mwcut,mdrot,mcpl"))
+  "fr400_m2")
+
+;; M4 instructions write to accumulators or FPRs.  MOVFG and STF
+;; instructions can read an FPR result in the following cycle, but
+;; M-unit instructions must wait a cycle more for either kind of result.
+
+(define_bypass 1 "fr400_m4_1,fr400_m4_2" "fr400_i3,fr400_i4")
+
+(define_insn_reservation "fr400_m4_1" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mrdacc,mcut,mclracc"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m4_2" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mclracca,mdcut"))
+  "fr400_m2")
+
+;; M5 instructions always incur a 1-cycle penalty.
+
+(define_insn_reservation "fr400_m5" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mwtacc"))
+  "fr400_m2")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: FR450 media scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; The FR451 media restrictions are similar to the FR400's, but not as
+;; strict and not as regular.  There are 6 categories with the following
+;; restrictions:
+;;
+;;		          M1
+;;	      M-1  M-2  M-3  M-4  M-5  M-6
+;;	M-1:         x         x         x
+;;	M-2:    x    x    x    x    x    x
+;;  M0	M-3:         x         x         x
+;;	M-4:    x    x    x    x
+;;	M-5:         x         x         x
+;;	M-6:    x    x    x    x    x    x
+;;
+;; where "x" indicates a conflict.
+;;
+;; There is no difference between M-1 and M-3 as far as issue
+;; restrictions are concerned, so they are combined as "m13".
+
+;; Units for odd-numbered categories.  There can be two of these
+;; in a packet.
+(define_cpu_unit "fr450_m13a,fr450_m13b" "float_media")
+(define_cpu_unit "fr450_m5a,fr450_m5b" "float_media")
+
+;; Units for even-numbered categories.  There can only be one per packet.
+(define_cpu_unit "fr450_m2a,fr450_m4a,fr450_m6a" "float_media")
+
+;; Enforce the restriction matrix above.
+(exclusion_set "fr450_m2a,fr450_m4a,fr450_m6a" "fr450_m13a,fr450_m13b")
+(exclusion_set "fr450_m2a,fr450_m6a" "fr450_m5a,fr450_m5b")
+(exclusion_set "fr450_m4a,fr450_m6a" "fr450_m2a")
+
+(define_reservation "fr450_m13" "(f1|f0) + (fr450_m13a|fr450_m13b)")
+(define_reservation "fr450_m2" "f0 + fr450_m2a")
+(define_reservation "fr450_m4" "f0 + fr450_m4a")
+(define_reservation "fr450_m5" "(f1|f0) + (fr450_m5a|fr450_m5b)")
+(define_reservation "fr450_m6" "(f0|f1) + fr450_m6a")
+
+;; MD-1, MD-3 and MD-8 instructions, which are the same as far
+;; as scheduling is concerned.  The inputs and outputs are FPRs.
+;; Instructions that have 32-bit inputs and outputs belong to M-1 while
+;; the rest belong to M-2.
+;;
+;; ??? Arithmetic shifts (MD-6) have an extra cycle latency, but we don't
+;; make the distinction between them and logical shifts.
+(define_insn_reservation "fr450_md138_1" 1
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "fsconv,mnop,mlogic,maveh,msath,maddh,mabsh,mset,
+			mrot,mshift,mexpdhw,mpackh"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md138_2" 1
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mqaddh,mqsath,mqlimh,
+			mdrot,mwcut,mqshift,mexpdhd,
+			munpackh,mdpackh,mbhconv,mcpl"))
+  "fr450_m2")
+
+;; MD-2 instructions.  These take FPR or ACC inputs and produce an ACC output.
+;; Instructions that write to double ACCs belong to M-3 while those that write
+;; to quad ACCs belong to M-4.
+(define_insn_reservation "fr450_md2_3" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mmulh,mmach,mcpx,mmulxh,mmrdh,maddacc"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md2_4" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mqmulh,mqmach,mqcpx,mqmulxh,mdaddacc"))
+  "fr450_m4")
+
+;; Another MD-2 instruction can use the result on the following cycle.
+(define_bypass 1 "fr450_md2_3,fr450_md2_4" "fr450_md2_3,fr450_md2_4")
+
+;; MD-4 instructions that write to ACCs.
+(define_insn_reservation "fr450_md4_3" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mclracc"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md4_4" 3
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mclracca"))
+  "fr450_m4")
+
+;; MD-4 instructions that write to FPRs.
+(define_insn_reservation "fr450_md4_1" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mcut"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md4_5" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mrdacc"))
+  "fr450_m5")
+
+(define_insn_reservation "fr450_md4_6" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mdcut"))
+  "fr450_m6")
+
+;; Integer instructions can read the FPR result of an MD-4 instruction on
+;; the following cycle.
+(define_bypass 1 "fr450_md4_1,fr450_md4_5,fr450_md4_6"
+		 "fr400_i3,fr450_i4_movfg")
+
+;; MD-5 instructions, which belong to M-3.  They take FPR inputs and
+;; write to ACCs.
+(define_insn_reservation "fr450_md5_3" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mwtacc"))
+  "fr450_m13")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: FR550 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Prevent loads and stores from being issued in the same packet.
+;; These units must go into the generic "integer" reservation because
+;; of the constraints on fr550_store0 and fr550_store1.
+(define_cpu_unit "fr550_load0,fr550_load1" "integer")
+(define_cpu_unit "fr550_store0,fr550_store1" "integer")
+(exclusion_set "fr550_load0,fr550_load1" "fr550_store0,fr550_store1")
+
+;; A store can only issue to I1 if one has also been issued to I0.
+(presence_set "fr550_store1" "fr550_store0")
+
+(define_bypass 0 "fr550_sethi" "fr550_setlo")
+(define_insn_reservation "fr550_sethi" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "sethi"))
+  "i3|i2|i1|i0")
+
+(define_insn_reservation "fr550_setlo" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "setlo"))
+  "i3|i2|i1|i0")
+
+(define_insn_reservation "fr550_int" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "int"))
+  "i3|i2|i1|i0")
+
+(define_insn_reservation "fr550_mul" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mul"))
+  "i1|i0")
+
+(define_insn_reservation "fr550_div" 19
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "div"))
+  "(i1|i0),(idiv1*18 | idiv2*18)")
+
+(define_insn_reservation "fr550_load" 3
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "gload,fload"))
+  "(i1|i0)+(fr550_load0|fr550_load1)")
+
+;; We can only issue a store to I1 if one was also issued to I0.
+;; This means that, as far as frv_reorder_packet is concerned,
+;; the instruction has the same priority as an I0-only instruction.
+(define_insn_reservation "fr550_store" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "gstore,fstore"))
+  "(i0+fr550_store0)|(i1+fr550_store1)")
+
+(define_insn_reservation "fr550_transfer" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "movgf,movfg"))
+  "i0")
+
+(define_insn_reservation "fr550_jumpl" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "jumpl"))
+  "i0")
+
+(define_cpu_unit "fr550_ccr0,fr550_ccr1" "float_media")
+
+(define_insn_reservation "fr550_branch" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "jump,branch"))
+  "b1|b0")
+
+(define_insn_reservation "fr550_ccr" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "ccr"))
+  "(b1|b0) + (fr550_ccr1|fr550_ccr0)")
+
+(define_insn_reservation "fr550_call" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "call"))
+  "b0")
+
+(define_automaton "fr550_float_media")
+(define_cpu_unit "fr550_add0,fr550_add1" "fr550_float_media")
+
+;; There are three possible combinations of floating-point/media instructions:
+;;
+;;    - one media and one float
+;;    - up to four float, no media
+;;    - up to four media, no float
+(define_cpu_unit "fr550_f0,fr550_f1,fr550_f2,fr550_f3" "fr550_float_media")
+(define_cpu_unit "fr550_m0,fr550_m1,fr550_m2,fr550_m3" "fr550_float_media")
+(exclusion_set "fr550_f1,fr550_f2,fr550_f3" "fr550_m1,fr550_m2,fr550_m3")
+(exclusion_set "fr550_m0" "fr550_f1,fr550_f2,fr550_f3")
+;; FIXME: This next exclusion set should be defined as well, so that we do
+;; not get a packet containing multiple media instructions plus a single
+;; floating point instruction.  At the moment we can get away with not
+;; defining it because gcc does not seem to generate such packets.
+;;
+;; If we do enable the exclusion however the insertion of fnop insns into
+;; a packet containing media instructions will stop working, because the
+;; fnop insn counts as a floating point instruction.  The correct solution
+;; is to fix the reservation for the fnop insn so that it does not have the
+;; same restrictions as ordinary floating point insns.
+;;(exclusion_set "fr550_f0" "fr550_m1,fr550_m2,fr550_m3")
+
+(define_reservation "fr550_float" "fr550_f0|fr550_f1|fr550_f2|fr550_f3")
+(define_reservation "fr550_media" "fr550_m0|fr550_m1|fr550_m2|fr550_m3")
+
+(define_insn_reservation "fr550_f1" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fnop"))
+  "(f3|f2|f1|f0) + fr550_float")
+
+(define_insn_reservation "fr550_f2" 3
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fsconv,fsadd,fscmp"))
+  "(f3|f2|f1|f0) + (fr550_add0|fr550_add1) + fr550_float")
+
+(define_insn_reservation "fr550_f3_mul" 3
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fsmul"))
+  "(f1|f0) + fr550_float")
+
+(define_insn_reservation "fr550_f3_div" 10
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fsdiv"))
+  "(f1|f0) + fr550_float")
+
+(define_insn_reservation "fr550_f3_sqrt" 15
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "sqrt_single"))
+  "(f1|f0) + fr550_float")
+
+;; Synthetic units for enforcing media issue restrictions.  Certain types
+;; of insn in M2 conflict with certain types in M0:
+;;
+;;			     M2
+;;               MNOP   MALU   MSFT   MMAC   MSET
+;;         MNOP     -      -      x      -      -
+;;         MALU     -      x      x      -      -
+;;   M0    MSFT     -      -      x      -      x
+;;         MMAC     -      -      x      x      -
+;;         MSET     -      -      x      -      -
+;;
+;; where "x" indicates a conflict.  The same restrictions apply to
+;; M3 and M1.
+;;
+;; In addition -- and this is the awkward bit! -- instructions that
+;; access ACC0-3 can only issue to M0 or M2.  Those that access ACC4-7
+;; can only issue to M1 or M3.  We refer to such instructions as "even"
+;; and "odd" respectively.
+(define_cpu_unit "fr550_malu0,fr550_malu1" "float_media")
+(define_cpu_unit "fr550_malu2,fr550_malu3" "float_media")
+(define_cpu_unit "fr550_msft0,fr550_msft1" "float_media")
+(define_cpu_unit "fr550_mmac0,fr550_mmac1" "float_media")
+(define_cpu_unit "fr550_mmac2,fr550_mmac3" "float_media")
+(define_cpu_unit "fr550_mset0,fr550_mset1" "float_media")
+(define_cpu_unit "fr550_mset2,fr550_mset3" "float_media")
+
+(exclusion_set "fr550_malu0" "fr550_malu2")
+(exclusion_set "fr550_malu1" "fr550_malu3")
+
+(exclusion_set "fr550_msft0" "fr550_mset2")
+(exclusion_set "fr550_msft1" "fr550_mset3")
+
+(exclusion_set "fr550_mmac0" "fr550_mmac2")
+(exclusion_set "fr550_mmac1" "fr550_mmac3")
+
+;; If an MSFT or MMAC instruction issues to a unit other than M0, we may
+;; need to insert some nops.  In the worst case, the packet will end up
+;; having 4 integer instructions and 4 media instructions, leaving no
+;; room for any branch instructions that the DFA might have accepted.
+;;
+;; This doesn't matter for JUMP_INSNs and CALL_INSNs because they are
+;; always the last instructions to be passed to the DFA, and could be
+;; pushed out to a separate packet once the nops have been added.
+;; However, it does cause problems for ccr instructions since they
+;; can occur anywhere in the unordered packet.
+(exclusion_set "fr550_msft1,fr550_mmac1,fr550_mmac2,fr550_mmac3"
+	       "fr550_ccr0,fr550_ccr1")
+
+(define_reservation "fr550_malu"
+  "(f3 + fr550_malu3) | (f2 + fr550_malu2)
+   | (f1 + fr550_malu1) | (f0 + fr550_malu0)")
+
+(define_reservation "fr550_msft_even"
+  "f0 + fr550_msft0")
+
+(define_reservation "fr550_msft_odd"
+  "f1 + fr550_msft1")
+
+(define_reservation "fr550_msft_either"
+  "(f1 + fr550_msft1) | (f0 + fr550_msft0)")
+
+(define_reservation "fr550_mmac_even"
+  "(f2 + fr550_mmac2) | (f0 + fr550_mmac0)")
+
+(define_reservation "fr550_mmac_odd"
+  "(f3 + fr550_mmac3) | (f1 + fr550_mmac1)")
+
+(define_reservation "fr550_mset"
+  "(f3 + fr550_mset3) | (f2 + fr550_mset2)
+    | (f1 + fr550_mset1) | (f0 + fr550_mset0)")
+
+(define_insn_reservation "fr550_mnop" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mnop"))
+  "fr550_media + (f3|f2|f1|f0)")
+
+(define_insn_reservation "fr550_malu" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mlogic,maveh,msath,mabsh,maddh,mqaddh,mqsath"))
+  "fr550_media + fr550_malu")
+
+;; These insns only operate on FPRs and so don't need to be classified
+;; as even/odd.
+(define_insn_reservation "fr550_msft_1_either" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mrot,mwcut,mshift,mexpdhw,mexpdhd,mpackh,
+			munpackh,mdpackh,mbhconv,mdrot,mcpl"))
+  "fr550_media + fr550_msft_either")
+
+;; These insns read from ACC0-3.
+(define_insn_reservation "fr550_msft_1_even" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mcut,mrdacc,mdcut")
+	    (eq_attr "acc_group" "even")))
+  "fr550_media + fr550_msft_even")
+
+;; These insns read from ACC4-7.
+(define_insn_reservation "fr550_msft_1_odd" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mcut,mrdacc,mdcut")
+	    (eq_attr "acc_group" "odd")))
+  "fr550_media + fr550_msft_odd")
+
+;; MCLRACC with A=1 can issue to either M0 or M1.
+(define_insn_reservation "fr550_msft_2_either" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mclracca"))
+  "fr550_media + fr550_msft_either")
+
+;; These insns write to ACC0-3.
+(define_insn_reservation "fr550_msft_2_even" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mclracc,mwtacc")
+	    (eq_attr "acc_group" "even")))
+  "fr550_media + fr550_msft_even")
+
+;; These insns write to ACC4-7.
+(define_insn_reservation "fr550_msft_2_odd" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mclracc,mwtacc")
+	    (eq_attr "acc_group" "odd")))
+  "fr550_media + fr550_msft_odd")
+
+;; These insns read from and write to ACC0-3.
+(define_insn_reservation "fr550_mmac_even" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,
+			     maddacc,mdaddacc,mcpx,mqcpx")
+	    (eq_attr "acc_group" "even")))
+  "fr550_media + fr550_mmac_even")
+
+;; These insns read from and write to ACC4-7.
+(define_insn_reservation "fr550_mmac_odd" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,
+			     maddacc,mdaddacc,mcpx,mqcpx")
+	    (eq_attr "acc_group" "odd")))
+  "fr550_media + fr550_mmac_odd")
+
+(define_insn_reservation "fr550_mset" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mset"))
+  "fr550_media + fr550_mset")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Simple/FR300 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Fr300 or simple processor.  To describe it as 1 insn issue
+;; processor, we use control unit.
+
+(define_insn_reservation "fr300_lat1" 1
+  (and (eq_attr "cpu" "fr300,simple")
+       (eq_attr "type" "!gload,fload,movfg,movgf"))
+  "c + control")
+
+(define_insn_reservation "fr300_lat2" 2
+  (and (eq_attr "cpu" "fr300,simple")
+       (eq_attr "type" "gload,fload,movfg,movgf"))
+  "c + control")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Delay Slots
+;; ::
+;; ::::::::::::::::::::
+
+;; The insn attribute mechanism can be used to specify the requirements for
+;; delay slots, if any, on a target machine.  An instruction is said to require
+;; a "delay slot" if some instructions that are physically after the
+;; instruction are executed as if they were located before it.  Classic
+;; examples are branch and call instructions, which often execute the following
+;; instruction before the branch or call is performed.
+
+;; On some machines, conditional branch instructions can optionally "annul"
+;; instructions in the delay slot.  This means that the instruction will not be
+;; executed for certain branch outcomes.  Both instructions that annul if the
+;; branch is true and instructions that annul if the branch is false are
+;; supported.
+
+;; Delay slot scheduling differs from instruction scheduling in that
+;; determining whether an instruction needs a delay slot is dependent only
+;; on the type of instruction being generated, not on data flow between the
+;; instructions.  See the next section for a discussion of data-dependent
+;; instruction scheduling.
+
+;; The requirement of an insn needing one or more delay slots is indicated via
+;; the `define_delay' expression.  It has the following form:
+;;
+;; (define_delay TEST
+;;   [DELAY-1 ANNUL-TRUE-1 ANNUL-FALSE-1
+;;    DELAY-2 ANNUL-TRUE-2 ANNUL-FALSE-2
+;;    ...])
+
+;; TEST is an attribute test that indicates whether this `define_delay' applies
+;; to a particular insn.  If so, the number of required delay slots is
+;; determined by the length of the vector specified as the second argument.  An
+;; insn placed in delay slot N must satisfy attribute test DELAY-N.
+;; ANNUL-TRUE-N is an attribute test that specifies which insns may be annulled
+;; if the branch is true.  Similarly, ANNUL-FALSE-N specifies which insns in
+;; the delay slot may be annulled if the branch is false.  If annulling is not
+;; supported for that delay slot, `(nil)' should be coded.
+
+;; For example, in the common case where branch and call insns require a single
+;; delay slot, which may contain any insn other than a branch or call, the
+;; following would be placed in the `md' file:
+
+;; (define_delay (eq_attr "type" "branch,call")
+;;		 [(eq_attr "type" "!branch,call") (nil) (nil)])
+
+;; Multiple `define_delay' expressions may be specified.  In this case, each
+;; such expression specifies different delay slot requirements and there must
+;; be no insn for which tests in two `define_delay' expressions are both true.
+
+;; For example, if we have a machine that requires one delay slot for branches
+;; but two for calls, no delay slot can contain a branch or call insn, and any
+;; valid insn in the delay slot for the branch can be annulled if the branch is
+;; true, we might represent this as follows:
+
+;; (define_delay (eq_attr "type" "branch")
+;;   [(eq_attr "type" "!branch,call")
+;;    (eq_attr "type" "!branch,call")
+;;    (nil)])
+;;
+;; (define_delay (eq_attr "type" "call")
+;;   [(eq_attr "type" "!branch,call") (nil) (nil)
+;;    (eq_attr "type" "!branch,call") (nil) (nil)])
+
+;; Note - it is the backend's responsibility to fill any unfilled delay slots
+;; at assembler generation time.  This is usually done by adding a special print
+;; operand to the delayed instruction, and then in the PRINT_OPERAND function
+;; calling dbr_sequence_length() to determine how many delay slots were filled.
+;; For example:
+;;
+;; --------------<machine>.md-----------------
+;; (define_insn "call"
+;;  [(call (match_operand 0 "memory_operand" "m")
+;;         (match_operand 1 "" ""))]
+;;   ""
+;;   "call_delayed %0,%1,%2%#"
+;;  [(set_attr "length" "4")
+;;   (set_attr "type" "call")])
+;;
+;; -------------<machine>.h-------------------
+;; #define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#')
+;;
+;;  ------------<machine>.c------------------
+;; void
+;; machine_print_operand (file, x, code)
+;;     FILE * file;
+;;     rtx    x;
+;;     int    code;
+;; {
+;;   switch (code)
+;;   {
+;;   case '#':
+;;     if (dbr_sequence_length () == 0)
+;;       fputs ("\n\tnop", file);
+;;     return;
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Notes on Patterns
+;; ::
+;; ::::::::::::::::::::
+
+;; If you need to construct a sequence of assembler instructions in order
+;; to implement a pattern be sure to escape any backslashes and double quotes
+;; that you use, e.g.:
+;;
+;; (define_insn "an example"
+;;   [(some rtl)]
+;;   ""
+;;   "*
+;;    { static char buffer [100];
+;;      sprintf (buffer, \"insn \\t %d\", REGNO (operands[1]));
+;;      return buffer;
+;;    }"
+;; )
+;;
+;; Also if there is more than one instruction, they can be separated by \\;
+;; which is a space saving synonym for \\n\\t:
+;;
+;; (define_insn "another example"
+;;   [(some rtl)]
+;;   ""
+;;   "*
+;;    { static char buffer [100];
+;;      sprintf (buffer, \"insn1 \\t %d\\;insn2 \\t %%1\",
+;;        REGNO (operands[1]));
+;;      return buffer;
+;;    }"
+;; )
+;;
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+;; Wrap moves in define_expand to prevent memory->memory moves from being
+;; generated at the RTL level, which generates better code for most machines
+;; which can't do mem->mem moves.
+
+;; If operand 0 is a `subreg' with mode M of a register whose own mode is wider
+;; than M, the effect of this instruction is to store the specified value in
+;; the part of the register that corresponds to mode M.  The effect on the rest
+;; of the register is undefined.
+
+;; This class of patterns is special in several ways.  First of all, each of
+;; these names *must* be defined, because there is no other way to copy a datum
+;; from one place to another.
+
+;; Second, these patterns are not used solely in the RTL generation pass.  Even
+;; the reload pass can generate move insns to copy values from stack slots into
+;; temporary registers.  When it does so, one of the operands is a hard
+;; register and the other is an operand that can need to be reloaded into a
+;; register.
+
+;; Therefore, when given such a pair of operands, the pattern must
+;; generate RTL which needs no reloading and needs no temporary
+;; registers--no registers other than the operands.  For example, if
+;; you support the pattern with a `define_expand', then in such a
+;; case the `define_expand' mustn't call `force_reg' or any other such
+;; function which might generate new pseudo registers.
+
+;; This requirement exists even for subword modes on a RISC machine
+;; where fetching those modes from memory normally requires several
+;; insns and some temporary registers.  Look in `spur.md' to see how
+;; the requirement can be satisfied.
+
+;; During reload a memory reference with an invalid address may be passed as an
+;; operand.  Such an address will be replaced with a valid address later in the
+;; reload pass.  In this case, nothing may be done with the address except to
+;; use it as it stands.  If it is copied, it will not be replaced with a valid
+;; address.  No attempt should be made to make such an address into a valid
+;; address and no routine (such as `change_address') that will do so may be
+;; called.  Note that `general_operand' will fail when applied to such an
+;; address.
+;;
+;; The global variable `reload_in_progress' (which must be explicitly declared
+;; if required) can be used to determine whether such special handling is
+;; required.
+;;
+;; The variety of operands that have reloads depends on the rest of
+;; the machine description, but typically on a RISC machine these can
+;; only be pseudo registers that did not get hard registers, while on
+;; other machines explicit memory references will get optional
+;; reloads.
+;;
+;; If a scratch register is required to move an object to or from memory, it
+;; can be allocated using `gen_reg_rtx' prior to reload.  But this is
+;; impossible during and after reload.  If there are cases needing scratch
+;; registers after reload, you must define `SECONDARY_INPUT_RELOAD_CLASS' and
+;; perhaps also `SECONDARY_OUTPUT_RELOAD_CLASS' to detect them, and provide
+;; patterns `reload_inM' or `reload_outM' to handle them.
+
+;; The constraints on a `moveM' must permit moving any hard register to any
+;; other hard register provided that `HARD_REGNO_MODE_OK' permits mode M in
+;; both registers and `REGISTER_MOVE_COST' applied to their classes returns a
+;; value of 2.
+
+;; It is obligatory to support floating point `moveM' instructions
+;; into and out of any registers that can hold fixed point values,
+;; because unions and structures (which have modes `SImode' or
+;; `DImode') can be in those registers and they may have floating
+;; point members.
+
+;; There may also be a need to support fixed point `moveM' instructions in and
+;; out of floating point registers.  Unfortunately, I have forgotten why this
+;; was so, and I don't know whether it is still true.  If `HARD_REGNO_MODE_OK'
+;; rejects fixed point values in floating point registers, then the constraints
+;; of the fixed point `moveM' instructions must be designed to avoid ever
+;; trying to reload into a floating point register.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (QImode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movqi_load"
+  [(set (match_operand:QI 0 "register_operand" "=d,f")
+	(match_operand:QI 1 "frv_load_operand" "m,m"))]
+  ""
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload,fload")])
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "move_destination_operand" "=d,d,m,m,?f,?f,?d,?m,f,d,f")
+	(match_operand:QI 1 "move_source_operand"       "L,d,d,O, d, f, f, f,GO,!m,!m"))]
+  "register_operand(operands[0], QImode) || reg_or_0_operand (operands[1], QImode)"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,int,gstore,gstore,movgf,fsconv,movfg,fstore,movgf,gload,fload")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (HImode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movhi_load"
+  [(set (match_operand:HI 0 "register_operand" "=d,f")
+	(match_operand:HI 1 "frv_load_operand" "m,m"))]
+  ""
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload,fload")])
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "move_destination_operand" "=d,d,d,m,m,?f,?f,?d,?m,f,d,f")
+	(match_operand:HI 1 "move_source_operand"       "L,n,d,d,O, d, f, f, f,GO,!m,!m"))]
+  "register_operand(operands[0], HImode) || reg_or_0_operand (operands[1], HImode)"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,8,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "type" "int,multi,int,gstore,gstore,movgf,fsconv,movfg,fstore,movgf,gload,fload")])
+
+;; Split 2 word load of constants into sethi/setlo instructions
+(define_split
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(match_operand:HI 1 "int_2word_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(high:HI (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:HI (match_dup 0)
+		(match_dup 1)))]
+  "")
+
+(define_insn "movhi_high"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d")
+	(high:HI (match_operand:HI 1 "int_2word_operand" "i")))]
+  ""
+  "sethi #hi(%1), %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "movhi_lo_sum"
+  [(set (match_operand:HI 0 "integer_register_operand" "+d")
+	(lo_sum:HI (match_dup 0)
+		   (match_operand:HI 1 "int_2word_operand" "i")))]
+  ""
+  "setlo #lo(%1), %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "move_destination_operand" "")
+	(match_operand:SI 1 "move_source_operand" ""))]
+  ""
+  "{ frv_emit_move (SImode, operands[0], operands[1]); DONE; }")
+
+;; Note - it is best to only have one movsi pattern and to handle
+;; all the various contingencies by the use of alternatives.  This
+;; allows reload the greatest amount of flexibility (since reload will
+;; only choose amongst alternatives for a selected insn, it will not
+;; replace the insn with another one).
+
+;; Unfortunately, we do have to separate out load-type moves from the rest,
+;; and only allow memory source operands in the former.  If we do memory and
+;; constant loads in a single pattern, reload will be tempted to force
+;; constants into memory when the destination is a floating-point register.
+;; That may make a function use a PIC pointer when it didn't before, and we
+;; cannot change PIC usage (and hence stack layout) so late in the game.
+;; The resulting sequences for loading constants into FPRs are preferable
+;; even when we're not generating PIC code.
+
+;; However, if we don't accept input from memory at all in the generic
+;; movsi pattern, reloads for asm instructions that reference pseudos
+;; that end up assigned to memory will fail to match, because we
+;; recognize them right after they're emitted, and we don't
+;; re-recognize them again after the substitution for memory.  So keep
+;; a memory constraint available, just make sure reload won't be
+;; tempted to use it.
+;;
+		   
+		   
+(define_insn "*movsi_load"
+  [(set (match_operand:SI 0 "register_operand" "=d,f")
+	(match_operand:SI 1 "frv_load_operand" "m,m"))]
+  ""
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload,fload")])
+
+(define_insn "*movsi_got"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operand:SI 1 "got12_operand" ""))]
+  ""
+  "addi gr0, %1, %0"
+  [(set_attr "type" "int")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_high_got"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(high:SI (match_operand:SI 1 "const_unspec_operand" "")))]
+  ""
+  "sethi %1, %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_lo_sum_got"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(lo_sum:SI (match_operand:SI 1 "integer_register_operand" "0")
+		   (match_operand:SI 2 "const_unspec_operand" "")))]
+  ""
+  "setlo %2, %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "move_destination_operand" "=d,d,d,m,m,z,d,d,f,f,m,?f,?z,d,f")
+	(match_operand:SI 1 "move_source_operand"      "L,n,d,d,O,d,z,f,d,f,f,GO,GO,!m,!m"))]
+  "register_operand (operands[0], SImode) || reg_or_0_operand (operands[1], SImode)"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,8,4,4,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "type" "int,multi,int,gstore,gstore,spr,spr,movfg,movgf,fsconv,fstore,movgf,spr,gload,fload")])
+
+;; Split 2 word load of constants into sethi/setlo instructions
+(define_insn_and_split "*movsi_2word"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operand:SI 1 "int_2word_operand" "i"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(high:SI (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0)
+		(match_dup 1)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+(define_insn "movsi_high"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(high:SI (match_operand:SI 1 "int_2word_operand" "i")))]
+  ""
+  "sethi #hi(%1), %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_lo_sum"
+  [(set (match_operand:SI 0 "integer_register_operand" "+d")
+	(lo_sum:SI (match_dup 0)
+		   (match_operand:SI 1 "int_2word_operand" "i")))]
+  ""
+  "setlo #lo(%1), %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (DImode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movdi_double"
+  [(set (match_operand:DI 0 "move_destination_operand" "=e,?h,??d,??f,R,?R,??m,??m,e,?h,??d,??f,?e,??d,?h,??f,R,m,e,??d,e,??d,?h,??f")
+	(match_operand:DI 1 "move_source_operand"      " e,h,d,f,e,h,d,f,R,R,m,m,h,f,e,d,GO,GO,GO,GO,nF,nF,GO,GO"))]
+  "TARGET_DOUBLE
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "8,4,8,8,4,4,8,8,4,4,8,8,4,8,4,8,4,8,8,8,16,16,8,8")
+   (set_attr "type" "multi,fdconv,multi,multi,gstore,fstore,gstore,fstore,gload,fload,gload,fload,movfg,movfg,movgf,movgf,gstore,gstore,multi,multi,multi,multi,movgf,movgf")])
+
+(define_insn "*movdi_nodouble"
+  [(set (match_operand:DI 0 "move_destination_operand" "=e,?h,??d,??f,R,?R,??m,??m,e,?h,??d,??f,?e,??d,?h,??f,R,m,e,??d,e,??d,?h,??f")
+	(match_operand:DI 1 "move_source_operand"      " e,h,d,f,e,h,d,f,R,R,m,m,h,f,e,d,GO,GO,GO,GO,nF,nF,GO,GO"))]
+  "!TARGET_DOUBLE
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "8,8,8,8,4,4,8,8,4,4,8,8,8,8,8,8,4,8,8,8,16,16,8,8")
+   (set_attr "type" "multi,multi,multi,multi,gstore,fstore,gstore,fstore,gload,fload,gload,fload,movfg,movfg,movgf,movgf,gstore,gstore,multi,multi,multi,multi,movgf,movgf")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "dbl_memory_two_insn_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "odd_reg_operand" "")
+	(match_operand:DI 1 "memory_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "dbl_memory_two_insn_operand" "")
+	(match_operand:DI 1 "reg_or_0_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(match_operand:DI 1 "odd_reg_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))]
+  "reload_completed
+   && (odd_reg_operand (operands[0], DImode)
+       || odd_reg_operand (operands[1], DImode)
+       || (integer_register_operand (operands[0], DImode)
+	   && integer_register_operand (operands[1], DImode))
+       || (!TARGET_DOUBLE
+	   && fpr_operand (operands[0], DImode)
+	   && fpr_operand (operands[1], DImode)))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0      = operands[0];
+  rtx op0_low  = gen_lowpart (SImode, op0);
+  rtx op0_high = gen_highpart (SImode, op0);
+  rtx op1      = operands[1];
+  rtx op1_low  = gen_lowpart (SImode, op1);
+  rtx op1_high = gen_highpart (SImode, op1);
+
+  /* We normally copy the low-numbered register first.  However, if the first
+     register operand 0 is the same as the second register of operand 1, we
+     must copy in the opposite order.  */
+
+  if (REGNO (op0_high) == REGNO (op1_low))
+    {
+      operands[2] = op0_low;
+      operands[3] = op0_high;
+      operands[4] = op1_low;
+      operands[5] = op1_high;
+    }
+  else
+    {
+      operands[2] = op0_high;
+      operands[3] = op0_low;
+      operands[4] = op1_high;
+      operands[5] = op1_low;
+    }
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  if (HOST_BITS_PER_WIDE_INT <= 32)
+    {
+      operands[4] = GEN_INT ((INTVAL (op1) < 0) ? -1 : 0);
+      operands[5] = op1;
+    }
+  else
+    {
+      operands[4] = gen_int_mode ((INTVAL (op1) >> 16) >> 16, SImode);
+      operands[5] = gen_int_mode (INTVAL (op1), SImode);
+    }
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  operands[4] = GEN_INT (CONST_DOUBLE_HIGH (op1));
+  operands[5] = GEN_INT (CONST_DOUBLE_LOW (op1));
+}")
+
+;; Floating Point Moves
+;;
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesize them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (SFmode, operands[0], operands[1]); DONE; }")
+
+(define_split
+  [(set (match_operand:SF 0 "integer_register_operand" "")
+	(match_operand:SF 1 "int_2word_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(high:SF (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:SF (match_dup 0)
+		(match_dup 1)))]
+  "")
+
+(define_insn "*movsf_load_has_fprs"
+  [(set (match_operand:SF 0 "register_operand" "=f,d")
+	(match_operand:SF 1 "frv_load_operand" "m,m"))]
+  "TARGET_HAS_FPRS"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "fload,gload")])
+
+(define_insn "*movsf_internal_has_fprs"
+  [(set (match_operand:SF 0 "move_destination_operand" "=f,f,m,m,?f,?d,?d,m,?d")
+	(match_operand:SF 1 "move_source_operand" "f,OG,f,OG,d,f,d,d,F"))]
+  "TARGET_HAS_FPRS
+   && (register_operand (operands[0], SFmode) || reg_or_0_operand (operands[1], SFmode))"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,4,4,4,4,4,4,4,8")
+   (set_attr "type" "fsconv,movgf,fstore,gstore,movgf,movfg,int,gstore,multi")])
+
+;; If we don't support the double instructions, prefer gprs over fprs, since it
+;; will all be emulated
+(define_insn "*movsf_internal_no_fprs"
+  [(set (match_operand:SF 0 "move_destination_operand" "=d,d,m,d,d")
+	(match_operand:SF 1 "move_source_operand"      " d,OG,dOG,m,F"))]
+  "!TARGET_HAS_FPRS
+   && (register_operand (operands[0], SFmode) || reg_or_0_operand (operands[1], SFmode))"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,4,4,4,8")
+   (set_attr "type" "int,int,gstore,gload,multi")])
+
+(define_insn "movsf_high"
+  [(set (match_operand:SF 0 "integer_register_operand" "=d")
+	(high:SF (match_operand:SF 1 "int_2word_operand" "i")))]
+  ""
+  "sethi #hi(%1), %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_lo_sum"
+  [(set (match_operand:SF 0 "integer_register_operand" "+d")
+	(lo_sum:SF (match_dup 0)
+		   (match_operand:SF 1 "int_2word_operand" "i")))]
+  ""
+  "setlo #lo(%1), %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (DFmode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movdf_double"
+  [(set (match_operand:DF 0 "move_destination_operand" "=h,?e,??f,??d,R,?R,??m,??m,h,?e,??f,??d,?h,??f,?e,??d,R,m,h,??f,e,??d,e,??d")
+	(match_operand:DF 1 "move_source_operand"      " h,e,f,d,h,e,f,d,R,R,m,m,e,d,h,f,GO,GO,GO,GO,GO,GO,F,F"))]
+  "TARGET_DOUBLE
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "4,8,8,8,4,4,8,8,4,4,8,8,4,8,4,8,4,8,8,8,8,8,16,16")
+   (set_attr "type" "fdconv,multi,multi,multi,fstore,gstore,fstore,gstore,fload,gload,fload,gload,movgf,movgf,movfg,movfg,gstore,gstore,movgf,movgf,multi,multi,multi,multi")])
+
+;; If we don't support the double instructions, prefer gprs over fprs, since it
+;; will all be emulated
+(define_insn "*movdf_nodouble"
+  [(set (match_operand:DF 0 "move_destination_operand" "=e,?h,??d,??f,R,?R,??m,??m,e,?h,??d,??f,?e,??d,?h,??f,R,m,e,??d,e,??d,?h,??f")
+	(match_operand:DF 1 "move_source_operand"      " e,h,d,f,e,h,d,f,R,R,m,m,h,f,e,d,GO,GO,GO,GO,nF,nF,GO,GO"))]
+  "!TARGET_DOUBLE
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "8,8,8,8,4,4,8,8,4,4,8,8,8,8,8,8,4,8,8,8,16,16,8,8")
+   (set_attr "type" "multi,multi,multi,multi,gstore,fstore,gstore,fstore,gload,fload,gload,fload,movfg,movfg,movgf,movgf,gstore,gstore,multi,multi,multi,multi,movgf,movgf")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "dbl_memory_two_insn_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "odd_reg_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "dbl_memory_two_insn_operand" "")
+	(match_operand:DF 1 "reg_or_0_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "odd_reg_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && (odd_reg_operand (operands[0], DFmode)
+       || odd_reg_operand (operands[1], DFmode)
+       || (integer_register_operand (operands[0], DFmode)
+	   && integer_register_operand (operands[1], DFmode))
+       || (!TARGET_DOUBLE
+	   && fpr_operand (operands[0], DFmode)
+	   && fpr_operand (operands[1], DFmode)))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0      = operands[0];
+  rtx op0_low  = gen_lowpart (SImode, op0);
+  rtx op0_high = gen_highpart (SImode, op0);
+  rtx op1      = operands[1];
+  rtx op1_low  = gen_lowpart (SImode, op1);
+  rtx op1_high = gen_highpart (SImode, op1);
+
+  /* We normally copy the low-numbered register first.  However, if the first
+     register operand 0 is the same as the second register of operand 1, we
+     must copy in the opposite order.  */
+
+  if (REGNO (op0_high) == REGNO (op1_low))
+    {
+      operands[2] = op0_low;
+      operands[3] = op0_high;
+      operands[4] = op1_low;
+      operands[5] = op1_high;
+    }
+  else
+    {
+      operands[2] = op0_high;
+      operands[3] = op0_low;
+      operands[4] = op1_high;
+      operands[5] = op1_low;
+    }
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  if (HOST_BITS_PER_WIDE_INT <= 32)
+    {
+      operands[4] = GEN_INT ((INTVAL (op1) < 0) ? -1 : 0);
+      operands[5] = op1;
+    }
+  else
+    {
+      operands[4] = GEN_INT (((((unsigned HOST_WIDE_INT)INTVAL (op1) >> 16)
+			      >> 16) ^ ((unsigned HOST_WIDE_INT)1 << 31))
+			     - ((unsigned HOST_WIDE_INT)1 << 31));
+      operands[5] = GEN_INT (trunc_int_for_mode (INTVAL (op1), SImode));
+    }
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  REAL_VALUE_TYPE rv;
+  long l[2];
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, op1);
+  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  operands[4] = GEN_INT (l[0]);
+  operands[5] = GEN_INT (l[1]);
+}")
+
+;; String/block move insn.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  if (frv_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; String/block set insn.
+;; Argument 0 is the destination
+;; Argument 1 is the length
+;; Argument 2 is the byte value -- ignore any value but zero
+;; Argument 3 is the alignment
+
+(define_expand "setmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "" ""))
+	      (use (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (frv_expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+
+;; The "membar" part of a __builtin_read* or __builtin_write* function.
+;; Operand 0 is a volatile reference to the memory that the function reads
+;; or writes.  Operand 1 is the address being accessed, or zero if the
+;; address isn't a known constant.  Operand 2 describes the __builtin
+;; function (either FRV_IO_READ or FRV_IO_WRITE).
+(define_insn "optional_membar_<mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "=m")
+	(unspec:IMODE [(match_operand 1 "const_int_operand" "")
+		       (match_operand 2 "const_int_operand" "")]
+		      UNSPEC_OPTIONAL_MEMBAR))]
+  ""
+  "membar"
+  [(set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Reload CC registers
+;; ::
+;; ::::::::::::::::::::
+
+;; Use as a define_expand so that cse/gcse/combine can't accidentally
+;; create movcc insns.
+
+(define_expand "movcc"
+  [(parallel [(set (match_operand:CC 0 "move_destination_operand" "")
+		   (match_operand:CC 1 "move_source_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "
+{
+ if (! reload_in_progress && ! reload_completed)
+    FAIL;
+
+ operands[2] = gen_rtx_REG (CC_CCRmode, ICR_TEMP);
+}")
+
+(define_insn "*internal_movcc"
+  [(set (match_operand:CC 0 "move_destination_operand" "=t,d,d,m,d")
+	(match_operand:CC 1 "move_source_operand" "d,d,m,d,t"))
+   (clobber (match_scratch:CC_CCR 2 "=X,X,X,X,&v"))]
+  "reload_in_progress || reload_completed"
+  "@
+   cmpi %1, #0, %0
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #"
+  [(set_attr "length" "4,4,4,4,20")
+   (set_attr "type" "int,int,gload,gstore,multi")])
+
+;; To move an ICC value to a GPR for a signed comparison, we create a value
+;; that when compared to 0, sets the N and Z flags appropriately (we don't care
+;; about the V and C flags, since these comparisons are signed).
+
+(define_split
+  [(set (match_operand:CC 0 "integer_register_operand" "")
+	(match_operand:CC 1 "icc_operand" ""))
+   (clobber (match_operand:CC_CCR 2 "icr_operand" ""))]
+  "reload_in_progress || reload_completed"
+  [(match_dup 3)]
+  "
+{
+  rtx dest = simplify_gen_subreg (SImode, operands[0], CCmode, 0);
+  rtx icc  = operands[1];
+  rtx icr  = operands[2];
+
+  start_sequence ();
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_LT (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_movsi (dest, const1_rtx));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_rtx_SET (VOIDmode, dest,
+					     gen_rtx_NEG (SImode, dest))));
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_EQ (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_rtx_SET (VOIDmode, dest, const0_rtx)));
+
+  operands[3] = get_insns ();
+  end_sequence ();
+}")
+
+;; Reload CC_UNSmode for unsigned integer comparisons
+;; Use define_expand so that cse/gcse/combine can't create movcc_uns insns
+
+(define_expand "movcc_uns"
+  [(parallel [(set (match_operand:CC_UNS 0 "move_destination_operand" "")
+		   (match_operand:CC_UNS 1 "move_source_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "
+{
+ if (! reload_in_progress && ! reload_completed)
+    FAIL;
+ operands[2] = gen_rtx_REG (CC_CCRmode, ICR_TEMP);
+}")
+
+(define_insn "*internal_movcc_uns"
+  [(set (match_operand:CC_UNS 0 "move_destination_operand" "=t,d,d,m,d")
+	(match_operand:CC_UNS 1 "move_source_operand" "d,d,m,d,t"))
+   (clobber (match_scratch:CC_CCR 2 "=X,X,X,X,&v"))]
+  "reload_in_progress || reload_completed"
+  "@
+   cmpi %1, #1, %0
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #"
+  [(set_attr "length" "4,4,4,4,20")
+   (set_attr "type" "int,int,gload,gstore,multi")])
+
+;; To move an ICC value to a GPR for an unsigned comparison, we create a value
+;; that when compared to 1, sets the Z, V, and C flags appropriately (we don't
+;; care about the N flag, since these comparisons are unsigned).
+
+(define_split
+  [(set (match_operand:CC_UNS 0 "integer_register_operand" "")
+	(match_operand:CC_UNS 1 "icc_operand" ""))
+   (clobber (match_operand:CC_CCR 2 "icr_operand" ""))]
+  "reload_in_progress || reload_completed"
+  [(match_dup 3)]
+  "
+{
+  rtx dest = simplify_gen_subreg (SImode, operands[0], CC_UNSmode, 0);
+  rtx icc  = operands[1];
+  rtx icr  = operands[2];
+
+  start_sequence ();
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_GTU (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_movsi (dest, const1_rtx));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_addsi3 (dest, dest, dest)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_LTU (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_rtx_SET (VOIDmode, dest, const0_rtx)));
+
+  operands[3] = get_insns ();
+  end_sequence ();
+}")
+
+;; Reload CC_NZmode.  This is mostly the same as the CCmode and CC_UNSmode
+;; handling, but it uses different sequences for moving between GPRs and ICCs.
+
+(define_expand "movcc_nz"
+  [(parallel [(set (match_operand:CC_NZ 0 "move_destination_operand" "")
+		   (match_operand:CC_NZ 1 "move_source_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "
+{
+  if (!reload_in_progress && !reload_completed)
+    FAIL;
+  operands[2] = gen_rtx_REG (CC_CCRmode, ICR_TEMP);
+}")
+
+(define_insn "*internal_movcc_nz"
+  [(set (match_operand:CC_NZ 0 "move_destination_operand" "=t,d,d,m,d")
+	(match_operand:CC_NZ 1 "move_source_operand" "d,d,m,d,t"))
+   (clobber (match_scratch:CC_CCR 2 "=X,X,X,X,&v"))]
+  "reload_in_progress || reload_completed"
+  "@
+   cmpi %1, #0, %0
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #"
+  [(set_attr "length" "4,4,4,4,20")
+   (set_attr "type" "int,int,gload,gstore,multi")])
+
+;; Set the destination to a value that, when compared with zero, will
+;; restore the value of the Z and N flags.  The values of the other
+;; flags don't matter.  The sequence is:
+;;
+;;     setlos op0,#-1
+;;     ckp op1,op2
+;;     csub gr0,op0,op0,op2
+;;     ckeq op1,op2
+;;     cmov gr0,op0,op2
+(define_split
+  [(set (match_operand:CC_NZ 0 "integer_register_operand" "")
+	(match_operand:CC_NZ 1 "icc_operand" ""))
+   (clobber (match_operand:CC_CCR 2 "icr_operand" ""))]
+  "reload_in_progress || reload_completed"
+  [(set (match_dup 3)
+	(const_int -1))
+   (set (match_dup 2)
+	(ge:CC_CCR (match_dup 1)
+		   (const_int 0)))
+   (cond_exec (ne:CC_CCR (match_dup 2)
+			 (const_int 0))
+	      (set (match_dup 3)
+		   (neg:SI (match_dup 3))))
+   (set (match_dup 2)
+	(eq:CC_CCR (match_dup 1)
+		   (const_int 0)))
+   (cond_exec (ne:CC_CCR (match_dup 2)
+			 (const_int 0))
+	      (set (match_dup 3) (const_int 0)))]
+  "operands[3] = simplify_gen_subreg (SImode, operands[0], CC_NZmode, 0);")
+
+;; Reload CC_FPmode for floating point comparisons
+;; We use a define_expand here so that cse/gcse/combine can't accidentally
+;; create movcc insns.  If this was a named define_insn, we would not be able
+;; to make it conditional on reload.
+
+(define_expand "movcc_fp"
+  [(set (match_operand:CC_FP 0 "movcc_fp_destination_operand" "")
+	(match_operand:CC_FP 1 "move_source_operand" ""))]
+  "TARGET_HAS_FPRS"
+  "
+{
+ if (! reload_in_progress && ! reload_completed)
+    FAIL;
+}")
+
+(define_insn "*movcc_fp_internal"
+  [(set (match_operand:CC_FP 0 "movcc_fp_destination_operand" "=d,d,d,m")
+	(match_operand:CC_FP 1 "move_source_operand" "u,d,m,d"))]
+  "TARGET_HAS_FPRS && (reload_in_progress || reload_completed)"
+  "@
+   #
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0"
+  [(set_attr "length" "12,4,4,4")
+   (set_attr "type" "multi,int,gload,gstore")])
+
+
+(define_expand "reload_incc_fp"
+  [(match_operand:CC_FP 0 "fcc_operand" "=u")
+   (match_operand:CC_FP 1 "gpr_or_memory_operand_with_scratch" "m")
+   (match_operand:TI 2 "integer_register_operand" "=&d")]
+  "TARGET_HAS_FPRS"
+  "
+{
+  rtx cc_op2 = simplify_gen_subreg (CC_FPmode, operands[2], TImode, 0);
+  rtx int_op2 = simplify_gen_subreg (SImode, operands[2], TImode, 0);
+  rtx temp1 = simplify_gen_subreg (SImode, operands[2], TImode, 4);
+  rtx temp2 = simplify_gen_subreg (SImode, operands[2], TImode, 8);
+  int shift = CC_SHIFT_RIGHT (REGNO (operands[0]));
+  HOST_WIDE_INT mask;
+
+  if (!gpr_or_memory_operand (operands[1], CC_FPmode))
+    {
+      rtx addr;
+      rtx temp3 = simplify_gen_subreg (SImode, operands[2], TImode, 12);
+
+      gcc_assert (GET_CODE (operands[1]) == MEM);
+
+      addr = XEXP (operands[1], 0);
+
+      gcc_assert (GET_CODE (addr) == PLUS);
+
+      emit_move_insn (temp3, XEXP (addr, 1));
+
+      operands[1] = replace_equiv_address (operands[1],
+					   gen_rtx_PLUS (GET_MODE (addr),
+							 XEXP (addr, 0),
+							 temp3));
+    }
+
+  emit_insn (gen_movcc_fp (cc_op2, operands[1]));
+  if (shift)
+    emit_insn (gen_ashlsi3 (int_op2, int_op2, GEN_INT (shift)));
+
+  mask = ~ ((HOST_WIDE_INT)CC_MASK << shift);
+  emit_insn (gen_movsi (temp1, GEN_INT (mask)));
+  emit_insn (gen_update_fcc (operands[0], int_op2, temp1, temp2));
+  DONE;
+}")
+
+(define_expand "reload_outcc_fp"
+  [(set (match_operand:CC_FP 2 "integer_register_operand" "=&d")
+	(match_operand:CC_FP 1 "fcc_operand" "u"))
+   (set (match_operand:CC_FP 0 "memory_operand" "=m")
+	(match_dup 2))]
+  "TARGET_HAS_FPRS"
+ "")
+
+;; Convert a FCC value to gpr
+(define_insn "read_fcc"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(unspec:SI [(match_operand:CC_FP 1 "fcc_operand" "u")]
+		   UNSPEC_CC_TO_GPR))]
+  "TARGET_HAS_FPRS"
+  "movsg ccr, %0"
+  [(set_attr "type" "spr")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:CC_FP 0 "integer_register_operand" "")
+	(match_operand:CC_FP 1 "fcc_operand" ""))]
+  "reload_completed && TARGET_HAS_FPRS"
+  [(match_dup 2)]
+  "
+{
+  rtx int_op0 = simplify_gen_subreg (SImode, operands[0], CC_FPmode, 0);
+  int shift = CC_SHIFT_RIGHT (REGNO (operands[1]));
+
+  start_sequence ();
+
+  emit_insn (gen_read_fcc (int_op0, operands[1]));
+  if (shift)
+    emit_insn (gen_lshrsi3 (int_op0, int_op0, GEN_INT (shift)));
+
+  emit_insn (gen_andsi3 (int_op0, int_op0, GEN_INT (CC_MASK)));
+
+  operands[2] = get_insns ();
+  end_sequence ();
+}")
+
+;; Move a gpr value to FCC.
+;; Operand0 = FCC
+;; Operand1 = reloaded value shifted appropriately
+;; Operand2 = mask to eliminate current register
+;; Operand3 = temporary to load/store ccr
+(define_insn "update_fcc"
+  [(set (match_operand:CC_FP 0 "fcc_operand" "=u")
+	(unspec:CC_FP [(match_operand:SI 1 "integer_register_operand" "d")
+		       (match_operand:SI 2 "integer_register_operand" "d")]
+		      UNSPEC_GPR_TO_CC))
+   (clobber (match_operand:SI 3 "integer_register_operand" "=&d"))]
+  "TARGET_HAS_FPRS"
+  "movsg ccr, %3\;and %2, %3, %3\;or %1, %3, %3\;movgs %3, ccr"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+;; Reload CC_CCRmode for conditional execution registers
+(define_insn "movcc_ccr"
+  [(set (match_operand:CC_CCR 0 "move_destination_operand" "=d,d,d,m,v,?w,C,d")
+	(match_operand:CC_CCR 1 "move_source_operand" "C,d,m,d,n,n,C,L"))]
+  ""
+  "@
+   #
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #
+   #
+   orcr %1, %1, %0
+   setlos #%1, %0"
+  [(set_attr "length" "8,4,4,4,8,12,4,4")
+   (set_attr "type" "multi,int,gload,gstore,multi,multi,ccr,int")])
+
+(define_expand "reload_incc_ccr"
+  [(match_operand:CC_CCR 0 "cr_operand" "=C")
+   (match_operand:CC_CCR 1 "memory_operand" "m")
+   (match_operand:CC_CCR 2 "integer_register_operand" "=&d")]
+  ""
+  "
+{
+  rtx icc = gen_rtx_REG (CCmode, ICC_TEMP);
+  rtx int_op2 = simplify_gen_subreg (SImode, operands[2], CC_CCRmode, 0);
+  rtx icr = (ICR_P (REGNO (operands[0]))
+	     ? operands[0] : gen_rtx_REG (CC_CCRmode, ICR_TEMP));
+
+  emit_insn (gen_movcc_ccr (operands[2], operands[1]));
+  emit_insn (gen_cmpsi_cc (icc, int_op2, const0_rtx));
+  emit_insn (gen_movcc_ccr (icr, gen_rtx_NE (CC_CCRmode, icc, const0_rtx)));
+
+  if (! ICR_P (REGNO (operands[0])))
+    emit_insn (gen_movcc_ccr (operands[0], icr));
+
+  DONE;
+}")
+
+(define_expand "reload_outcc_ccr"
+  [(set (match_operand:CC_CCR 2 "integer_register_operand" "=&d")
+	(match_operand:CC_CCR 1 "cr_operand" "C"))
+   (set (match_operand:CC_CCR 0 "memory_operand" "=m")
+	(match_dup 2))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:CC_CCR 0 "integer_register_operand" "")
+	(match_operand:CC_CCR 1 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "
+{
+  rtx int_op0 = simplify_gen_subreg (SImode, operands[0], CC_CCRmode, 0);
+
+  start_sequence ();
+  emit_move_insn (operands[0], const1_rtx);
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_EQ (CC_CCRmode,
+					    operands[1],
+					    const0_rtx),
+				gen_rtx_SET (VOIDmode, int_op0,
+					     const0_rtx)));
+
+  operands[2] = get_insns ();
+  end_sequence ();
+}")
+
+(define_split
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(match_operand:CC_CCR 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "
+{
+  rtx icc = gen_rtx_REG (CCmode, ICC_TEMP);
+  rtx r0  = gen_rtx_REG (SImode, GPR_FIRST);
+  rtx icr = (ICR_P (REGNO (operands[0]))
+	     ? operands[0] : gen_rtx_REG (CC_CCRmode, ICR_TEMP));
+
+  start_sequence ();
+
+ emit_insn (gen_cmpsi_cc (icc, r0, const0_rtx));
+
+  emit_insn (gen_movcc_ccr (icr,
+			    gen_rtx_fmt_ee (((INTVAL (operands[1]) == 0)
+					     ? EQ : NE), CC_CCRmode,
+					    r0, const0_rtx)));
+
+  if (! ICR_P (REGNO (operands[0])))
+    emit_insn (gen_movcc_ccr (operands[0], icr));
+
+  operands[2] = get_insns ();
+  end_sequence ();
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+;; Signed conversions from a smaller integer to a larger integer
+;;
+;; These operations are optional.  If they are not
+;; present GCC will synthesize them for itself
+;; Even though frv does not provide these instructions, we define them
+;; to allow load + sign extend to be collapsed together
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d")
+	(sign_extend:HI (match_operand:QI 1 "gpr_or_memory_operand" "d,m")))]
+  ""
+  "@
+   #
+   ldsb%I1%U1 %M1,%0"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "multi,gload")])
+
+(define_split
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "integer_register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx op1   = gen_lowpart (SImode, operands[1]);
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (op0, op1, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(sign_extend:SI (match_operand:QI 1 "gpr_or_memory_operand" "d,m")))]
+  ""
+  "@
+   #
+   ldsb%I1%U1 %M1,%0"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "multi,gload")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "integer_register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx op1   = gen_lowpart (SImode, operands[1]);
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (op0, op1, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+;;(define_insn "extendqidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(sign_extend:DI (match_operand:QI 1 "general_operand" "g")))]
+;;  ""
+;;  "extendqihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(sign_extend:SI (match_operand:HI 1 "gpr_or_memory_operand" "d,m")))]
+  ""
+  "@
+   #
+   ldsh%I1%U1 %M1,%0"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "multi,gload")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "integer_register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx op1   = gen_lowpart (SImode, operands[1]);
+  rtx shift = GEN_INT (16);
+
+  operands[2] = gen_ashlsi3 (op0, op1, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+;;(define_insn "extendhidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(sign_extend:DI (match_operand:HI 1 "general_operand" "g")))]
+;;  ""
+;;  "extendhihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "extendsidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(sign_extend:DI (match_operand:SI 1 "general_operand" "g")))]
+;;  ""
+;;  "extendsidi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+;; Unsigned conversions from a smaller integer to a larger integer
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d")
+	(zero_extend:HI
+	  (match_operand:QI 1 "gpr_or_memory_operand" "d,L,m")))]
+  ""
+  "@
+   andi %1,#0xff,%0
+   setlos %1,%0
+   ldub%I1%U1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,int,gload")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d")
+	(zero_extend:SI
+	  (match_operand:QI 1 "gpr_or_memory_operand" "d,L,m")))]
+  ""
+  "@
+   andi %1,#0xff,%0
+   setlos %1,%0
+   ldub%I1%U1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,int,gload")])
+
+;;(define_insn "zero_extendqidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(zero_extend:DI (match_operand:QI 1 "general_operand" "g")))]
+;;  ""
+;;  "zero_extendqihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+;; Do not set the type for the sethi to "sethi", since the scheduler will think
+;; the sethi takes 0 cycles as part of allowing sethi/setlo to be in the same
+;; VLIW instruction.
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(zero_extend:SI (match_operand:HI 1 "gpr_or_memory_operand" "0,m")))]
+  ""
+  "@
+    sethi #hi(#0),%0
+    lduh%I1%U1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload")])
+
+;;(define_insn "zero_extendhidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(zero_extend:DI (match_operand:HI 1 "general_operand" "g")))]
+;;  ""
+;;  "zero_extendhihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "zero_extendsidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(zero_extend:DI (match_operand:SI 1 "general_operand" "g")))]
+;;  ""
+;;  "zero_extendsidi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;;; Convert between floating point types of different sizes.
+;;
+;;(define_insn "extendsfdf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(float_extend:DF (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "extendsfdf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "truncdfsf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(float_truncate:SF (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "truncdfsf2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+;;;; Convert between signed integer types and floating point.
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fitos %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "fpr_operand" "=h")
+	(float:DF (match_operand:SI 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fitod %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;;(define_insn "floatdisf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(float:SF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatdisf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatdidf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(float:DF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatdidf2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(fix:SI (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fstoi %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(fix:SI (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fdtoi %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;;(define_insn "fix_truncsfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(fix:DI (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "fix_truncsfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fix_truncdfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(fix:DI (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "fix_truncdfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;;; Convert between unsigned integer types and floating point.
+;;
+;;(define_insn "floatunssisf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(unsigned_float:SF (match_operand:SI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunssisf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatunssidf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(unsigned_float:DF (match_operand:SI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunssidf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatunsdisf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(unsigned_float:SF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunsdisf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatunsdidf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunsdidf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncsfsi2"
+;;  [(set (match_operand:SI 0 "register_operand" "=r")
+;;	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncsfsi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncdfsi2"
+;;  [(set (match_operand:SI 0 "register_operand" "=r")
+;;	(unsigned_fix:SI (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncdfsi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncsfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(unsigned_fix:DI (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncsfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncdfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(unsigned_fix:DI (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncdfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		 (match_operand:SI 2 "gpr_or_int12_operand" "dNOPQ")))]
+  ""
+  "add%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Subtraction.  No need to worry about constants, since the compiler
+;; canonicalizes them into addsi3's.  We prevent SUBREG's here to work around a
+;; combine bug, that combines the 32x32->upper 32 bit multiply that uses a
+;; SUBREG with a minus that shows up in modulus by constants.
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "gpr_no_subreg_operand" "d")
+		  (match_operand:SI 2 "gpr_no_subreg_operand" "d")))]
+  ""
+  "sub %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Signed multiplication producing 64-bit results from 32-bit inputs
+;; Note, frv doesn't have a 32x32->32 bit multiply, but the compiler
+;; will do the 32x32->64 bit multiply and use the bottom word.
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "integer_register_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "integer_register_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "gpr_or_int12_operand" ""))))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_mulsidi3_const (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*mulsidi3_reg"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "integer_register_operand" "%d"))
+		 (sign_extend:DI (match_operand:SI 2 "integer_register_operand" "d"))))]
+  ""
+  "smul %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "mulsidi3_const"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "integer_register_operand" "d"))
+		 (match_operand:SI 2 "int12_operand" "NOP")))]
+  ""
+  "smuli %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+;; Unsigned multiplication producing 64-bit results from 32-bit inputs
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "even_gpr_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "integer_register_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "gpr_or_int12_operand" ""))))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_umulsidi3_const (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*mulsidi3_reg"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "integer_register_operand" "%d"))
+		 (zero_extend:DI (match_operand:SI 2 "integer_register_operand" "d"))))]
+  ""
+  "umul %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "umulsidi3_const"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "integer_register_operand" "d"))
+		 (match_operand:SI 2 "int12_operand" "NOP")))]
+  ""
+  "umuli %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+;; Signed Division
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(div:SI (match_operand:SI 1 "register_operand" "d,d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "sdiv%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "div")])
+
+;; Unsigned Division
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(udiv:SI (match_operand:SI 1 "register_operand" "d,d")
+		 (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "udiv%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "div")])
+
+;; Negation
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "integer_register_operand" "d")))]
+  ""
+  "sub %.,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Find first one bit
+;; (define_insn "ffssi2"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(ffs:SI (match_operand:SI 1 "register_operand" "r")))]
+;;   ""
+;;   "ffssi2 %0,%1"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn_and_split "adddi3"
+  [(set (match_operand:DI 0 "integer_register_operand" "=&e,e")
+	(plus:DI (match_operand:DI 1 "integer_register_operand" "%e,0")
+		 (match_operand:DI 2 "gpr_or_int10_operand" "eJ,eJ")))
+   (clobber (match_scratch:CC 3 "=t,t"))]
+  ""
+  "#"
+  "reload_completed"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx parts[3][2];
+  int op, part;
+
+  for (op = 0; op < 3; op++)
+    for (part = 0; part < 2; part++)
+      parts[op][part] = simplify_gen_subreg (SImode, operands[op],
+					     DImode, part * UNITS_PER_WORD);
+
+  operands[4] = gen_adddi3_lower (parts[0][1], parts[1][1], parts[2][1],
+				  operands[3]);
+  operands[5] = gen_adddi3_upper (parts[0][0], parts[1][0], parts[2][0],
+				  copy_rtx (operands[3]));
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Subtraction  No need to worry about constants, since the compiler
+;; canonicalizes them into adddi3's.
+(define_insn_and_split "subdi3"
+  [(set (match_operand:DI 0 "integer_register_operand" "=&e,e,e")
+	(minus:DI (match_operand:DI 1 "integer_register_operand" "e,0,e")
+		  (match_operand:DI 2 "integer_register_operand" "e,e,0")))
+   (clobber (match_scratch:CC 3 "=t,t,t"))]
+  ""
+  "#"
+  "reload_completed"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx op0_high = gen_highpart (SImode, operands[0]);
+  rtx op1_high = gen_highpart (SImode, operands[1]);
+  rtx op2_high = gen_highpart (SImode, operands[2]);
+  rtx op0_low  = gen_lowpart (SImode, operands[0]);
+  rtx op1_low  = gen_lowpart (SImode, operands[1]);
+  rtx op2_low  = gen_lowpart (SImode, operands[2]);
+  rtx op3 = operands[3];
+
+  operands[4] = gen_subdi3_lower (op0_low, op1_low, op2_low, op3);
+  operands[5] = gen_subdi3_upper (op0_high, op1_high, op2_high, op3);
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Patterns for addsi3/subdi3 after splitting
+(define_insn "adddi3_lower"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		 (match_operand:SI 2 "gpr_or_int10_operand" "dJ")))
+   (set (match_operand:CC 3 "icc_operand" "=t")
+	(compare:CC (plus:SI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "add%I2cc %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "adddi3_upper"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		 (plus:SI (match_operand:SI 2 "gpr_or_int10_operand" "dJ")
+			  (match_operand:CC 3 "icc_operand" "t"))))]
+  ""
+  "addx%I2 %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "subdi3_lower"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		  (match_operand:SI 2 "integer_register_operand" "d")))
+   (set (match_operand:CC 3 "icc_operand" "=t")
+	(compare:CC (plus:SI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "subcc %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "subdi3_upper"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		  (minus:SI (match_operand:SI 2 "integer_register_operand" "d")
+			    (match_operand:CC 3 "icc_operand" "t"))))]
+  ""
+  "subx %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn_and_split "negdi2"
+  [(set (match_operand:DI 0 "integer_register_operand" "=&e,e")
+	(neg:DI (match_operand:DI 1 "integer_register_operand" "e,0")))
+   (clobber (match_scratch:CC 2 "=t,t"))]
+  ""
+  "#"
+  "reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+  rtx op0_high = gen_highpart (SImode, operands[0]);
+  rtx op1_high = gen_rtx_REG (SImode, GPR_FIRST);
+  rtx op2_high = gen_highpart (SImode, operands[1]);
+  rtx op0_low  = gen_lowpart (SImode, operands[0]);
+  rtx op1_low  = op1_high;
+  rtx op2_low  = gen_lowpart (SImode, operands[1]);
+  rtx op3 = operands[2];
+
+  operands[3] = gen_subdi3_lower (op0_low, op1_low, op2_low, op3);
+  operands[4] = gen_subdi3_upper (op0_high, op1_high, op2_high, op3);
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Multiplication (same size)
+;; (define_insn "muldi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(mult:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		 (match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "muldi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Signed Division
+;; (define_insn "divdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(div:DI (match_operand:DI 1 "register_operand" "r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "divdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Undsgned Division
+;; (define_insn "udivdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(udiv:DI (match_operand:DI 1 "register_operand" "r")
+;; 		 (match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "udivdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Negation
+;; (define_insn "negdi2"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+;;   ""
+;;   "negdi2 %0,%1"
+;;   [(set_attr "length" "4")])
+
+;; Find first one bit
+;; (define_insn "ffsdi2"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ffs:DI (match_operand:DI 1 "register_operand" "r")))]
+;;   ""
+;;   "ffsdi2 %0,%1"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fpr_operand" "%f")
+		 (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fadds %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsadd")])
+
+;; Subtraction
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fpr_operand" "f")
+		  (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fsubs %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsadd")])
+
+;; Multiplication
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fpr_operand" "%f")
+		 (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fmuls %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmul")])
+
+;; Multiplication with addition/subtraction
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fpr_operand" "f")
+		(match_operand:SF 2 "fpr_operand" "f")
+		(match_operand:SF 3 "fpr_operand" "0")))]
+  "TARGET_HARD_FLOAT && TARGET_MULADD"
+  "fmadds %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmadd")])
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fpr_operand" "f")
+		(match_operand:SF 2 "fpr_operand" "f")
+		(neg:SF (match_operand:SF 3 "fpr_operand" "0"))))]
+  "TARGET_HARD_FLOAT && TARGET_MULADD"
+  "fmsubs %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmadd")])
+
+;; Division
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(div:SF (match_operand:SF 1 "fpr_operand" "f")
+		(match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fdivs %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsdiv")])
+
+;; Negation
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fnegs %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+;; Absolute value
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fabss %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+;; Square root
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fsqrts %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "sqrt_single")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(plus:DF (match_operand:DF 1 "fpr_operand" "%h")
+		 (match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "faddd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdadd")])
+
+;; Subtraction
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(minus:DF (match_operand:DF 1 "fpr_operand" "h")
+		  (match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fsubd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdadd")])
+
+;; Multiplication
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(mult:DF (match_operand:DF 1 "fpr_operand" "%h")
+		 (match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fmuld %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdmul")])
+
+;; Multiplication with addition/subtraction
+(define_insn "*muladddf4"
+  [(set (match_operand:DF 0 "fpr_operand" "=f")
+	(plus:DF (mult:DF (match_operand:DF 1 "fpr_operand" "%f")
+			  (match_operand:DF 2 "fpr_operand" "f"))
+		 (match_operand:DF 3 "fpr_operand" "0")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE && TARGET_MULADD"
+  "fmaddd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdmadd")])
+
+(define_insn "*mulsubdf4"
+  [(set (match_operand:DF 0 "fpr_operand" "=f")
+	(minus:DF (mult:DF (match_operand:DF 1 "fpr_operand" "%f")
+			   (match_operand:DF 2 "fpr_operand" "f"))
+		  (match_operand:DF 3 "fpr_operand" "0")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE && TARGET_MULADD"
+  "fmsubd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdmadd")])
+
+;; Division
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(div:DF (match_operand:DF 1 "fpr_operand" "h")
+		(match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fdivd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fddiv")])
+
+;; Negation
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(neg:DF (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fnegd %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;; Absolute value
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(abs:DF (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fabsd %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;; Square root
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(sqrt:DF (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fsqrtd %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "sqrt_double")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(ashift:SI (match_operand:SI 1 "integer_register_operand" "d,d")
+		   (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "sll%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Arithmetic Shift Right
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(ashiftrt:SI (match_operand:SI 1 "integer_register_operand" "d,d")
+		     (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "sra%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Logical Shift Right
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(lshiftrt:SI (match_operand:SI 1 "integer_register_operand" "d,d")
+		     (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "srl%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Rotate Left
+;; (define_insn "rotlsi3"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(rotate:SI (match_operand:SI 1 "register_operand" "r")
+;; 		   (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotlsi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Rotate Right
+;; (define_insn "rotrsi3"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(rotatert:SI (match_operand:SI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotrsi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+;; (define_insn "ashldi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+;; 		   (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "ashldi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Arithmetic Shift Right
+;; (define_insn "ashrdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "ashrdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Logical Shift Right
+;; (define_insn "lshrdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "lshrdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Rotate Left
+;; (define_insn "rotldi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(rotate:DI (match_operand:DI 1 "register_operand" "r")
+;; 		   (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotldi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Rotate Right
+;; (define_insn "rotrdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(rotatert:DI (match_operand:DI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotrdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Logical AND, 32-bit integers
+(define_insn "andsi3_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(and:SI (match_operand:SI 1 "gpr_or_fpr_operand" "%d,f")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "dNOP,f")))]
+  "TARGET_MEDIA"
+  "@
+   and%I2 %1, %2, %0
+   mand %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "andsi3_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(and:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "dNOP")))]
+  "!TARGET_MEDIA"
+  "and%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(and:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "")))]
+  ""
+  "")
+
+;; Inclusive OR, 32-bit integers
+(define_insn "iorsi3_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(ior:SI (match_operand:SI 1 "gpr_or_fpr_operand" "%d,f")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "dNOP,f")))]
+  "TARGET_MEDIA"
+  "@
+   or%I2 %1, %2, %0
+   mor %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "iorsi3_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(ior:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "dNOP")))]
+  "!TARGET_MEDIA"
+  "or%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(ior:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "")))]
+  ""
+  "")
+
+;; Exclusive OR, 32-bit integers
+(define_insn "xorsi3_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(xor:SI (match_operand:SI 1 "gpr_or_fpr_operand" "%d,f")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "dNOP,f")))]
+  "TARGET_MEDIA"
+  "@
+   xor%I2 %1, %2, %0
+   mxor %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "xorsi3_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(xor:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "dNOP")))]
+  "!TARGET_MEDIA"
+  "xor%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(xor:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "")))]
+  ""
+  "")
+
+;; One's complement, 32-bit integers
+(define_insn "one_cmplsi2_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(not:SI (match_operand:SI 1 "gpr_or_fpr_operand" "d,f")))]
+  "TARGET_MEDIA"
+  "@
+   not %1, %0
+   mnot %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "one_cmplsi2_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(not:SI (match_operand:SI 1 "integer_register_operand" "d")))]
+  "!TARGET_MEDIA"
+  "not %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(not:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")))]
+  ""
+  "")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Logical AND, 64-bit integers
+;; (define_insn "anddi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(and:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "anddi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Inclusive OR, 64-bit integers
+;; (define_insn "iordi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ior:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "iordi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Exclusive OR, 64-bit integers
+;; (define_insn "xordi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "xordi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; One's complement, 64-bit integers
+;; (define_insn "one_cmpldi2"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+;;   ""
+;;   "notdi3 %0,%1"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Combination of integer operation with comparison
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "*combo_intop_compare1"
+  [(set (match_operand:CC_NZ 0 "icc_operand" "=t")
+	(compare:CC_NZ
+	 (match_operator:SI 1 "intop_compare_operator"
+		       [(match_operand:SI 2 "integer_register_operand" "d")
+			(match_operand:SI 3 "gpr_or_int10_operand" "dJ")])
+	 (const_int 0)))]
+  ""
+  "%O1%I3cc %2, %3, %., %0"
+  [(set_attr "type" "int")
+   (set_attr "length" "4")])
+
+(define_insn "*combo_intop_compare2"
+  [(set (match_operand:CC_NZ 0 "icc_operand" "=t")
+	(compare:CC_NZ
+	 (match_operator:SI 1 "intop_compare_operator"
+			[(match_operand:SI 2 "integer_register_operand" "d")
+			 (match_operand:SI 3 "gpr_or_int10_operand" "dJ")])
+	 (const_int 0)))
+   (set (match_operand:SI 4 "integer_register_operand" "=d")
+	(match_operator:SI 5 "intop_compare_operator"
+			   [(match_dup 2)
+			    (match_dup 3)]))]
+  "GET_CODE (operands[1]) == GET_CODE (operands[5])"
+  "%O1%I3cc %2, %3, %4, %0"
+  [(set_attr "type" "int")
+   (set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Comparisons
+;; ::
+;; ::::::::::::::::::::
+
+;; The comparisons are generated by the branch and/or scc operations
+
+(define_insn "cmpsi_cc"
+  [(set (match_operand:CC 0 "icc_operand" "=t,t")
+	(compare:CC (match_operand:SI 1 "integer_register_operand" "d,d")
+		    (match_operand:SI 2 "gpr_or_int10_operand" "d,J")))]
+  ""
+  "cmp%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cmpsi_cc_uns"
+  [(set (match_operand:CC_UNS 0 "icc_operand" "=t,t")
+	(compare:CC_UNS (match_operand:SI 1 "integer_register_operand" "d,d")
+			(match_operand:SI 2 "gpr_or_int10_operand" "d,J")))]
+  ""
+  "cmp%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; The only requirement for a CC_NZmode GPR or memory value is that
+;; comparing it against zero must set the Z and N flags appropriately.
+;; The source operand is therefore a valid CC_NZmode value.
+(define_insn "*cmpsi_cc_nz"
+  [(set (match_operand:CC_NZ 0 "nonimmediate_operand" "=t,d,m")
+	(compare:CC_NZ (match_operand:SI 1 "integer_register_operand" "d,d,d")
+		       (const_int 0)))]
+  ""
+  "@
+   cmpi %1, #0, %0
+   mov %1, %0
+   st%I0%U0 %1, %M0"
+  [(set_attr "length" "4,4,4")
+   (set_attr "type" "int,int,gstore")])
+
+(define_insn "*cmpsf_cc_fp"
+  [(set (match_operand:CC_FP 0 "fcc_operand" "=u")
+	(compare:CC_FP (match_operand:SF 1 "fpr_operand" "f")
+		       (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fcmps %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fscmp")])
+
+(define_insn "*cmpdf_cc_fp"
+  [(set (match_operand:CC_FP 0 "fcc_operand" "=u")
+	(compare:CC_FP (match_operand:DF 1 "even_fpr_operand" "h")
+		       (match_operand:DF 2 "even_fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fcmpd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdcmp")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+;; Define_expands called by the machine independent part of the compiler
+;; to allocate a new comparison register.
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:DF 1 "fpr_operand" "")
+          (match_operand:DF 2 "fpr_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  { if (frv_emit_cond_branch (operands)) DONE; gcc_unreachable (); })
+
+(define_expand "cbranchsf4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SF 1 "fpr_operand" "")
+          (match_operand:SF 2 "fpr_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_HARD_FLOAT"
+  { if (frv_emit_cond_branch (operands)) DONE; gcc_unreachable (); })
+
+(define_expand "cbranchsi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SI 1 "integer_register_operand" "")
+          (match_operand:SI 2 "gpr_or_int10_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { if (frv_emit_cond_branch (operands)) DONE; gcc_unreachable (); })
+
+;; Actual branches.  We must allow for the (label_ref) and the (pc) to be
+;; swapped.  If they are swapped, it reverses the sense of the branch.
+;;
+;; Note - unlike the define expands above, these patterns can be amalgamated
+;; into one pattern for branch-if-true and one for branch-if-false.  This does
+;; require an operand operator to select the correct branch mnemonic.
+;;
+;; If a fixed condition code register is being used, (as opposed to, say,
+;; using cc0), then the expands could look like this:
+;;
+;; (define_insn "*branch_true"
+;;   [(set (pc)
+;; 	(if_then_else (match_operator:CC 0 "comparison_operator"
+;; 					 [(reg:CC <number_of_CC_register>)
+;; 					  (const_int 0)])
+;; 		      (label_ref (match_operand 1 "" ""))
+;; 		      (pc)))]
+;;   ""
+;;   "b%B0 %1"
+;;   [(set_attr "length" "4")]
+;; )
+;;
+;; In the above example the %B is a directive to frv_print_operand()
+;; to decode and print the correct branch mnemonic.
+
+(define_insn "*branch_int_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"b%c0 %1,%#,%l2\";
+  else
+    return \"b%C0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+(define_insn "*branch_int_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"b%C0 %1,%#,%l2\";
+  else
+    return \"b%c0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+(define_insn "*branch_fp_true"
+  [(set (pc)
+	(if_then_else (match_operator:CC_FP 0 "float_relational_operator"
+					    [(match_operand 1 "fcc_operand" "u")
+					     (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"fb%f0 %1,%#,%l2\";
+  else
+    return \"fb%F0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+(define_insn "*branch_fp_false"
+  [(set (pc)
+	(if_then_else (match_operator:CC_FP 0 "float_relational_operator"
+					    [(match_operand 1 "fcc_operand" "u")
+					     (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"fb%F0 %1,%#,%l2\";
+  else
+    return \"fb%f0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Set flag operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Define_expands called by the machine independent part of the compiler
+;; to allocate a new comparison register
+
+(define_expand "cstoredf4"
+  [(use (match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:DF 2 "fpr_operand")
+          (match_operand:DF 3 "fpr_operand")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  { if (frv_emit_scc (operands)) DONE; else FAIL; })
+
+(define_expand "cstoresf4"
+  [(use (match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:SF 2 "fpr_operand")
+          (match_operand:SF 3 "fpr_operand")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  "TARGET_HARD_FLOAT"
+  { if (frv_emit_scc (operands)) DONE; else FAIL; })
+
+(define_expand "cstoresi4"
+  [(use (match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:SI 2 "integer_register_operand")
+          (match_operand:SI 3 "gpr_or_int10_operand")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+  { if (frv_emit_scc (operands)) DONE; else FAIL; })
+
+(define_insn "*scc_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operator:SI 1 "integer_relational_operator"
+			   [(match_operand 2 "icc_operand" "t")
+			    (const_int 0)]))
+   (clobber (match_operand:CC_CCR 3 "icr_operand" "=v"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*scc_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operator:SI 1 "float_relational_operator"
+			   [(match_operand:CC_FP 2 "fcc_operand" "u")
+			    (const_int 0)]))
+   (clobber (match_operand:CC_CCR 3 "fcr_operand" "=w"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+;; XXX -- add reload_completed to the splits, because register allocation
+;; currently isn't ready to see cond_exec packets.
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(match_operator:SI 1 "relational_operator"
+			   [(match_operand 2 "cc_operand" "")
+			    (const_int 0)]))
+   (clobber (match_operand 3 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 4)]
+  "operands[4] = frv_split_scc (operands[0], operands[1], operands[2],
+				operands[3], (HOST_WIDE_INT) 1);")
+
+(define_insn "*scc_neg1_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(neg:SI (match_operator:SI 1 "integer_relational_operator"
+				   [(match_operand 2 "icc_operand" "t")
+				    (const_int 0)])))
+   (clobber (match_operand:CC_CCR 3 "icr_operand" "=v"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*scc_neg1_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(neg:SI (match_operator:SI 1 "float_relational_operator"
+				   [(match_operand:CC_FP 2 "fcc_operand" "u")
+				    (const_int 0)])))
+   (clobber (match_operand:CC_CCR 3 "fcr_operand" "=w"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(neg:SI (match_operator:SI 1 "relational_operator"
+				   [(match_operand 2 "cc_operand" "")
+				    (const_int 0)])))
+   (clobber (match_operand 3 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 4)]
+  "operands[4] = frv_split_scc (operands[0], operands[1], operands[2],
+				operands[3], (HOST_WIDE_INT) -1);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditionally executed instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Convert ICC/FCC comparison into CCR bits so we can do conditional execution
+(define_insn "*ck_signed"
+  [(set (match_operand:CC_CCR 0 "icr_operand" "=v")
+	(match_operator:CC_CCR 1 "integer_relational_operator"
+			       [(match_operand 2 "icc_operand" "t")
+				(const_int 0)]))]
+  ""
+  "ck%c1 %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+(define_insn "*fck_float"
+  [(set (match_operand:CC_CCR 0 "fcr_operand" "=w")
+	(match_operator:CC_CCR 1 "float_relational_operator"
+			       [(match_operand:CC_FP 2 "fcc_operand" "u")
+				(const_int 0)]))]
+  "TARGET_HAS_FPRS"
+  "fck%c1 %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+;; Conditionally convert ICC/FCC comparison into CCR bits to provide && and ||
+;; tests in conditional execution
+(define_insn "cond_exec_ck"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "=v,w")
+	(if_then_else:CC_CCR (match_operator 1 "ccr_eqne_operator"
+					     [(match_operand 2 "cr_operand" "C,C")
+					      (const_int 0)])
+			     (match_operator 3 "relational_operator"
+					     [(match_operand 4 "cc_operand" "t,u")
+					      (const_int 0)])
+			     (const_int 0)))]
+  ""
+  "@
+   cck%c3 %4, %0, %2, %e1
+   cfck%f3 %4, %0, %2, %e1"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+;; Conditionally set a register to either 0 or another register
+(define_insn "*cond_exec_movqi"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:QI 2 "condexec_dest_operand" "=d,d,U,?f,?f,?d")
+	 (match_operand:QI 3 "condexec_source_operand" "dO,U,dO,f,d,f")))]
+  "register_operand(operands[2], QImode) || reg_or_0_operand (operands[3], QImode)"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore,fsconv,movgf,movfg")])
+
+(define_insn "*cond_exec_movhi"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:HI 2 "condexec_dest_operand" "=d,d,U,?f,?f,?d")
+	 (match_operand:HI 3 "condexec_source_operand" "dO,U,dO,f,d,f")))]
+  "register_operand(operands[2], HImode) || reg_or_0_operand (operands[3], HImode)"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore,fsconv,movgf,movfg")])
+
+(define_insn "*cond_exec_movsi"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "condexec_dest_operand" "=d,d,U,?f,?f,?d,?f,?m")
+	 (match_operand:SI 3 "condexec_source_operand" "dO,U,dO,f,d,f,m,f")))]
+  "register_operand(operands[2], SImode) || reg_or_0_operand (operands[3], SImode)"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore,fsconv,movgf,movfg,fload,fstore")])
+
+
+(define_insn "*cond_exec_movsf_has_fprs"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "condexec_dest_operand" "=f,?d,?d,?f,f,f,?d,U,?U,U")
+	 (match_operand:SF 3 "condexec_source_operand" "f,d,f,d,G,U,U,f,d,G")))]
+  "TARGET_HAS_FPRS"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv,int,movgf,movfg,movgf,fload,gload,fstore,gstore,gstore")])
+
+(define_insn "*cond_exec_movsf_no_fprs"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "condexec_dest_operand" "=d,d,U")
+	 (match_operand:SF 3 "condexec_source_operand" "d,U,dG")))]
+  "! TARGET_HAS_FPRS"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore")])
+
+(define_insn "*cond_exec_si_binary1"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "integer_register_operand" "=d")
+	 (match_operator:SI 3 "condexec_si_binary_operator"
+			    [(match_operand:SI 4 "integer_register_operand" "d")
+			     (match_operand:SI 5 "integer_register_operand" "d")])))]
+  ""
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case PLUS:     return \"cadd %4, %z5, %2, %1, %e0\";
+      case MINUS:    return \"csub %4, %z5, %2, %1, %e0\";
+      case AND:      return \"cand %4, %z5, %2, %1, %e0\";
+      case IOR:      return \"cor %4, %z5, %2, %1, %e0\";
+      case XOR:      return \"cxor %4, %z5, %2, %1, %e0\";
+      case ASHIFT:   return \"csll %4, %z5, %2, %1, %e0\";
+      case ASHIFTRT: return \"csra %4, %z5, %2, %1, %e0\";
+      case LSHIFTRT: return \"csrl %4, %z5, %2, %1, %e0\";
+      default:       gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_si_binary2"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (match_operator:SI 3 "condexec_si_media_operator"
+			    [(match_operand:SI 4 "fpr_operand" "f")
+			     (match_operand:SI 5 "fpr_operand" "f")])))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case AND: return \"cmand %4, %5, %2, %1, %e0\";
+      case IOR: return \"cmor %4, %5, %2, %1, %e0\";
+      case XOR: return \"cmxor %4, %5, %2, %1, %e0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+;; Note, flow does not (currently) know how to handle an operation that uses
+;; only part of the hard registers allocated for a multiregister value, such as
+;; DImode in this case if the user is only interested in the lower 32-bits.  So
+;; we emit a USE of the entire register after the csmul instruction so it won't
+;; get confused.  See frv_ifcvt_modify_insn for more details.
+
+(define_insn "*cond_exec_si_smul"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_gpr_operand" "=e")
+	 (mult:DI (sign_extend:DI (match_operand:SI 3 "integer_register_operand" "%d"))
+		  (sign_extend:DI (match_operand:SI 4 "integer_register_operand" "d")))))]
+  ""
+  "csmul %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "*cond_exec_si_divide"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "integer_register_operand" "=d")
+	 (match_operator:SI 3 "condexec_si_divide_operator"
+			    [(match_operand:SI 4 "integer_register_operand" "d")
+			     (match_operand:SI 5 "integer_register_operand" "d")])))]
+  ""
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case DIV:  return \"csdiv %4, %z5, %2, %1, %e0\";
+      case UDIV: return \"cudiv %4, %z5, %2, %1, %e0\";
+      default:   gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "div")])
+
+(define_insn "*cond_exec_si_unary1"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "integer_register_operand" "=d")
+	 (match_operator:SI 3 "condexec_si_unary_operator"
+			    [(match_operand:SI 4 "integer_register_operand" "d")])))]
+  ""
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case NOT: return \"cnot %4, %2, %1, %e0\";
+      case NEG: return \"csub %., %4, %2, %1, %e0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_si_unary2"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (not:SI (match_operand:SI 3 "fpr_operand" "f"))))]
+  "TARGET_MEDIA"
+  "cmnot %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+(define_insn "*cond_exec_cmpsi_cc"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC 2 "icc_operand" "=t")
+	 (compare:CC (match_operand:SI 3 "integer_register_operand" "d")
+		     (match_operand:SI 4 "reg_or_0_operand" "dO"))))]
+  "reload_completed
+   && REGNO (operands[1]) == REGNO (operands[2]) - ICC_FIRST + ICR_FIRST"
+  "ccmp %3, %z4, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_cmpsi_cc_uns"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC_UNS 2 "icc_operand" "=t")
+	 (compare:CC_UNS (match_operand:SI 3 "integer_register_operand" "d")
+			 (match_operand:SI 4 "reg_or_0_operand" "dO"))))]
+  "reload_completed
+   && REGNO (operands[1]) == REGNO (operands[2]) - ICC_FIRST + ICR_FIRST"
+  "ccmp %3, %z4, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_cmpsi_cc_nz"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC_NZ 2 "icc_operand" "=t")
+	 (compare:CC_NZ (match_operand:SI 3 "integer_register_operand" "d")
+			(const_int 0))))]
+  "reload_completed
+   && REGNO (operands[1]) == REGNO (operands[2]) - ICC_FIRST + ICR_FIRST"
+  "ccmp %3, %., %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_sf_conv"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (match_operator:SF 3 "condexec_sf_conv_operator"
+			    [(match_operand:SF 4 "fpr_operand" "f")])))]
+  "TARGET_HARD_FLOAT"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case ABS: return \"cfabss %4, %2, %1, %e0\";
+      case NEG: return \"cfnegs %4, %2, %1, %e0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+(define_insn "*cond_exec_sf_add"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (match_operator:SF 3 "condexec_sf_add_operator"
+			    [(match_operand:SF 4 "fpr_operand" "f")
+			     (match_operand:SF 5 "fpr_operand" "f")])))]
+  "TARGET_HARD_FLOAT"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case PLUS:  return \"cfadds %4, %5, %2, %1, %e0\";
+      case MINUS: return \"cfsubs %4, %5, %2, %1, %e0\";
+      default:    gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsadd")])
+
+(define_insn "*cond_exec_sf_mul"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (mult:SF (match_operand:SF 3 "fpr_operand" "f")
+		  (match_operand:SF 4 "fpr_operand" "f"))))]
+  "TARGET_HARD_FLOAT"
+  "cfmuls %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmul")])
+
+(define_insn "*cond_exec_sf_div"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (div:SF (match_operand:SF 3 "fpr_operand" "f")
+		 (match_operand:SF 4 "fpr_operand" "f"))))]
+  "TARGET_HARD_FLOAT"
+  "cfdivs %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsdiv")])
+
+(define_insn "*cond_exec_sf_sqrt"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (sqrt:SF (match_operand:SF 3 "fpr_operand" "f"))))]
+  "TARGET_HARD_FLOAT"
+  "cfsqrts %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsdiv")])
+
+(define_insn "*cond_exec_cmpsi_cc_fp"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC_FP 2 "fcc_operand" "=u")
+	 (compare:CC_FP (match_operand:SF 3 "fpr_operand" "f")
+			(match_operand:SF 4 "fpr_operand" "f"))))]
+  "reload_completed && TARGET_HARD_FLOAT
+   && REGNO (operands[1]) == REGNO (operands[2]) - FCC_FIRST + FCR_FIRST"
+  "cfcmps %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Logical operations on CR registers
+;; ::
+;; ::::::::::::::::::::
+
+;; We use UNSPEC to encode andcr/iorcr/etc. rather than the normal RTL
+;; operations, since the RTL operations only have an idea of TRUE and FALSE,
+;; while the CRs have TRUE, FALSE, and UNDEFINED.
+
+(define_expand "andcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 0)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "orcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 1)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "xorcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 2)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "nandcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 3)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "norcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 4)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "andncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 5)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "orncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 6)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "nandncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 7)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "norncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 8)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "notcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_dup 1)
+			(const_int 9)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_insn "*logical_cr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "=C")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "C")
+			(match_operand:CC_CCR 2 "cr_operand" "C")
+			(match_operand:SI 3 "const_int_operand" "n")]
+		       UNSPEC_CR_LOGIC))]
+  ""
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default: break;
+  case 0: return \"andcr %1, %2, %0\";
+  case 1: return \"orcr %1, %2, %0\";
+  case 2: return \"xorcr %1, %2, %0\";
+  case 3: return \"nandcr %1, %2, %0\";
+  case 4: return \"norcr %1, %2, %0\";
+  case 5: return \"andncr %1, %2, %0\";
+  case 6: return \"orncr %1, %2, %0\";
+  case 7: return \"nandncr %1, %2, %0\";
+  case 8: return \"norncr %1, %2, %0\";
+  case 9: return \"notcr %1, %0\";
+  }
+
+  fatal_insn (\"logical_cr\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional move instructions
+;; ::
+;; ::::::::::::::::::::
+
+
+;; - conditional moves based on floating-point comparisons require
+;;   TARGET_HARD_FLOAT, because an FPU is required to do the comparison.
+
+;; - conditional moves between FPRs based on integer comparisons
+;;   require TARGET_HAS_FPRS.
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "integer_register_operand" "")
+	(if_then_else:QI (match_operand 1 "" "")
+			 (match_operand:QI 2 "gpr_or_int_operand" "")
+			 (match_operand:QI 3 "gpr_or_int_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movqicc_internal1_int"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:QI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:QI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:QI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  ""
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movqicc_internal1_float"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:QI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u")
+			      (const_int 0)])
+			 (match_operand:QI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:QI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movqicc_internal2_int"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:QI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:QI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:QI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[4]) == 0
+    || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+        && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movqicc_internal2_float"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:QI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:QI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:QI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT
+   && (INTVAL (operands[3]) == 0
+       || INTVAL (operands[4]) == 0
+       || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+	   && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:QI 0 "integer_register_operand" "")
+	(if_then_else:QI (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:QI 3 "gpr_or_int_operand" "")
+			 (match_operand:QI 4 "gpr_or_int_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+(define_expand "movhicc"
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(if_then_else:HI (match_operand 1 "" "")
+			 (match_operand:HI 2 "gpr_or_int_operand" "")
+			 (match_operand:HI 3 "gpr_or_int_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movhicc_internal1_int"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:HI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:HI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:HI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  ""
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movhicc_internal1_float"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:HI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u")
+			      (const_int 0)])
+			 (match_operand:HI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:HI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movhicc_internal2_int"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:HI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:HI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:HI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[4]) == 0
+    || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+        && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movhicc_internal2_float"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:HI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:HI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:HI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT
+   && (INTVAL (operands[3]) == 0
+       || INTVAL (operands[4]) == 0
+       || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+	   && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(if_then_else:HI (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:HI 3 "gpr_or_int_operand" "")
+			 (match_operand:HI 4 "gpr_or_int_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(if_then_else:SI (match_operand 1 "" "")
+			 (match_operand:SI 2 "gpr_or_int_operand" "")
+			 (match_operand:SI 3 "gpr_or_int_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movsicc_internal1_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:SI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:SI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:SI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  ""
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsicc_internal1_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:SI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u")
+			      (const_int 0)])
+			 (match_operand:SI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:SI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsicc_internal2_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:SI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:SI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:SI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[4]) == 0
+    || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+        && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsicc_internal2_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:SI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:SI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:SI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT
+   && (INTVAL (operands[3]) == 0
+       || INTVAL (operands[4]) == 0
+       || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+	   && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(if_then_else:SI (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:SI 3 "gpr_or_int_operand" "")
+			 (match_operand:SI 4 "gpr_or_int_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "" "")
+			 (match_operand:SF 2 "register_operand" "")
+			 (match_operand:SF 3 "register_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movsfcc_has_fprs_int"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,f,?f,?f,?d")
+	(if_then_else:SF (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "0,f,f,f,d,fd")
+			 (match_operand:SF 4 "register_operand" "f,0,f,d,fd,fd")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v,v"))]
+  "TARGET_HAS_FPRS"
+  "#"
+  [(set_attr "length" "8,8,12,12,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsfcc_hardfloat_float"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,f,?f,?f,?d")
+	(if_then_else:SF (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "0,f,f,f,d,fd")
+			 (match_operand:SF 4 "register_operand" "f,0,f,d,fd,fd")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12,12,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsfcc_no_fprs_int"
+  [(set (match_operand:SF 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:SF (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:SF 3 "integer_register_operand" "0,d,d")
+			 (match_operand:SF 4 "integer_register_operand" "d,0,d")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  "! TARGET_HAS_FPRS"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "")
+			 (match_operand:SF 4 "register_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Minimum, maximum, and integer absolute value
+;; ::
+;; ::::::::::::::::::::
+
+;; These 'instructions' are provided to give the compiler a slightly better
+;; nudge at register allocation, then it would if it constructed the
+;; instructions from basic building blocks (since it indicates it prefers one
+;; of the operands to be the same as the destination.  It also helps the
+;; earlier passes of the compiler, by not breaking things into small basic
+;; blocks.
+
+(define_expand "abssi2"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (abs:SI (match_operand:SI 1 "integer_register_operand" "")))
+	      (clobber (match_dup 2))
+	      (clobber (match_dup 3))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[2] = gen_reg_rtx (CCmode);
+  operands[3] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*abssi2_internal"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(abs:SI (match_operand:SI 1 "integer_register_operand" "0,d")))
+   (clobber (match_operand:CC 2 "icc_operand" "=t,t"))
+   (clobber (match_operand:CC_CCR 3 "icr_operand" "=v,v"))]
+  "TARGET_COND_MOVE"
+  "#"
+  "reload_completed"
+  [(match_dup 4)]
+  "operands[4] = frv_split_abs (operands);"
+  [(set_attr "length" "12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "sminsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (smin:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "smaxsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (smax:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_si_signed"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,&d")
+	(match_operator:SI 1 "minmax_operator"
+			   [(match_operand:SI 2 "integer_register_operand" "%0,dO,d")
+			    (match_operand:SI 3 "gpr_or_int10_operand" "dO,0,dJ")]))
+   (clobber (match_operand:CC 4 "icc_operand" "=t,t,t"))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  "TARGET_COND_MOVE"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "uminsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (umin:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_UNSmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "umaxsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (umax:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_UNSmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_si_unsigned"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,&d")
+	(match_operator:SI 1 "minmax_operator"
+			   [(match_operand:SI 2 "integer_register_operand" "%0,dO,d")
+			    (match_operand:SI 3 "gpr_or_int10_operand" "dO,0,dJ")]))
+   (clobber (match_operand:CC_UNS 4 "icc_operand" "=t,t,t"))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  "TARGET_COND_MOVE"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "sminsf3"
+  [(parallel [(set (match_operand:SF 0 "fpr_operand" "")
+		   (smin:SF (match_operand:SF 1 "fpr_operand" "")
+			    (match_operand:SF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "smaxsf3"
+  [(parallel [(set (match_operand:SF 0 "fpr_operand" "")
+		   (smax:SF (match_operand:SF 1 "fpr_operand" "")
+			    (match_operand:SF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_sf"
+  [(set (match_operand:SF 0 "fpr_operand" "=f,f,f")
+	(match_operator:SF 1 "minmax_operator"
+			   [(match_operand:SF 2 "fpr_operand" "%0,f,f")
+			    (match_operand:SF 3 "fpr_operand" "f,0,f")]))
+   (clobber (match_operand:CC_FP 4 "fcc_operand" "=u,u,u"))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "smindf3"
+  [(parallel [(set (match_operand:DF 0 "fpr_operand" "")
+		   (smin:DF (match_operand:DF 1 "fpr_operand" "")
+			    (match_operand:DF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "smaxdf3"
+  [(parallel [(set (match_operand:DF 0 "fpr_operand" "")
+		   (smax:DF (match_operand:DF 1 "fpr_operand" "")
+			    (match_operand:DF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_df"
+  [(set (match_operand:DF 0 "fpr_operand" "=f,f,f")
+	(match_operator:DF 1 "minmax_operator"
+			   [(match_operand:DF 2 "fpr_operand" "%0,f,f")
+			    (match_operand:DF 3 "fpr_operand" "f,0,f")]))
+   (clobber (match_operand:CC_FP 4 "fcc_operand" "=u,u,u"))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(use (match_operand:QI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+  "
+{
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+  addr = XEXP (operands[0], 0);
+  if (! call_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[2])
+    operands[2] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, false, false);
+  else
+    emit_call_insn (gen_call_internal (addr, operands[1], operands[2], lr));
+
+  DONE;
+}")
+
+(define_insn "call_internal"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "S,dNOP"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (match_operand:SI 3 "lr_operand" "=l,l"))]
+  "! TARGET_FDPIC"
+  "@
+   call %0
+   call%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+;; The odd use of GR0 within the UNSPEC below prevents cseing or
+;; hoisting function descriptor loads out of loops.  This is almost
+;; never desirable, since if we preserve the function descriptor in a
+;; pair of registers, it takes two insns to move it to gr14/gr15, and
+;; if it's in the stack, we just waste space with the store, since
+;; we'll have to load back from memory anyway.  And, in the worst
+;; case, we may end up reusing a function descriptor still pointing at
+;; a PLT entry, instead of to the resolved function, which means going
+;; through the resolver for every call that uses the outdated value.
+;; Bad!
+
+;; The explicit MEM inside the SPEC prevents the compiler from moving
+;; the load before a branch after a NULL test, or before a store that
+;; initializes a function descriptor.
+
+(define_insn "movdi_ldd"
+  [(set (match_operand:DI 0 "fdpic_fptr_operand" "=e")
+	(unspec:DI [(mem:DI (match_operand:SI 1 "ldd_address_operand" "p"))
+		    (reg:SI 0)] UNSPEC_LDD))]
+  ""
+  "ldd%I1 %M1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
+
+(define_insn "call_fdpicdi"
+  [(call (mem:QI (match_operand:DI 0 "fdpic_fptr_operand" "W"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "lr_operand" "=l"))]
+  "TARGET_FDPIC"
+  "call%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "call_fdpicsi"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "S,dNOP"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand:SI 3 "fdpic_operand" "Z,Z"))
+   (clobber (match_operand:SI 4 "lr_operand" "=l,l"))]
+  "TARGET_FDPIC"
+  "@
+   call %0
+   call%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+(define_expand "sibcall"
+  [(use (match_operand:QI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+  "
+{
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+  addr = XEXP (operands[0], 0);
+  if (! sibcall_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[2])
+    operands[2] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, false, true);
+  else
+    emit_call_insn (gen_sibcall_internal (addr, operands[1], operands[2]));
+
+  DONE;
+}")
+  
+;; It might seem that these sibcall patterns are missing references to
+;; LR, but they're not necessary because sibcall_epilogue will make
+;; sure LR is restored, and having LR here will set
+;; regs_ever_used[REG_LR], forcing it to be saved on the stack, and
+;; then restored in sibcalls and regular return code paths, even if
+;; the function becomes a leaf function after tail-call elimination.
+
+;; We must not use a call-saved register here.  `W' limits ourselves
+;; to gr14 or gr15, but since we're almost running out of constraint
+;; letters, and most other call-clobbered registers are often used for
+;; argument-passing, this will do.
+(define_insn "sibcall_internal"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_operand" "WNOP"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (return)]
+  "! TARGET_FDPIC"
+  "jmp%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "sibcall_fdpicdi"
+  [(call (mem:QI (match_operand:DI 0 "fdpic_fptr_operand" "W"))
+	 (match_operand 1 "" ""))
+   (return)]
+  "TARGET_FDPIC"
+  "jmp%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:QI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+  "
+{
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  addr = XEXP (operands[1], 0);
+  if (! call_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[3])
+    operands[3] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, true, false);
+  else
+    emit_call_insn (gen_call_value_internal (operands[0], addr, operands[2],
+					     operands[3], lr));
+
+  DONE;
+}")
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=d,d")
+	(call (mem:QI (match_operand:SI 1 "call_operand" "S,dNOP"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (match_operand:SI 4 "lr_operand" "=l,l"))]
+  "! TARGET_FDPIC"
+  "@
+   call %1
+   call%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+(define_insn "call_value_fdpicdi"
+  [(set (match_operand 0 "register_operand" "=d")
+	(call (mem:QI (match_operand:DI 1 "fdpic_fptr_operand" "W"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "lr_operand" "=l"))]
+  "TARGET_FDPIC"
+  "call%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "call_value_fdpicsi"
+  [(set (match_operand 0 "register_operand" "=d,d")
+	(call (mem:QI (match_operand:SI 1 "call_operand" "S,dNOP"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (use (match_operand:SI 4 "fdpic_operand" "Z,Z"))
+   (clobber (match_operand:SI 5 "lr_operand" "=l,l"))]
+  "TARGET_FDPIC"
+  "@
+   call %1
+   call%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+(define_expand "sibcall_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:QI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+  "
+{
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  addr = XEXP (operands[1], 0);
+  if (! sibcall_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[3])
+    operands[3] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, true, true);
+  else
+    emit_call_insn (gen_sibcall_value_internal (operands[0], addr, operands[2],
+						operands[3]));
+  DONE;
+}")
+
+(define_insn "sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "=d")
+	(call (mem:QI (match_operand:SI 1 "sibcall_operand" "WNOP"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "! TARGET_FDPIC"
+  "jmp%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "sibcall_value_fdpicdi"
+  [(set (match_operand 0 "register_operand" "=d")
+	(call (mem:QI (match_operand:DI 1 "fdpic_fptr_operand" "W"))
+	      (match_operand 2 "" "")))
+   (return)]
+  "TARGET_FDPIC"
+  "jmp%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+;; return instruction generated instead of jmp to epilog
+(define_expand "return"
+  [(parallel [(return)
+	      (use (match_dup 0))
+	      (use (const_int 1))])]
+  "direct_return_p ()"
+  "
+{
+  operands[0] = gen_rtx_REG (Pmode, LR_REGNO);
+}")
+
+;; return instruction generated by the epilogue
+(define_expand "epilogue_return"
+  [(parallel [(return)
+	      (use (match_operand:SI 0 "register_operand" ""))
+	      (use (const_int 0))])]
+  ""
+  "")
+
+(define_insn "*return_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" "l,d"))
+   (use (match_operand:SI 1 "immediate_operand" "n,n"))]
+  ""
+  "@
+    ret
+    jmpl @(%0,%.)"
+  [(set_attr "length" "4")
+   (set_attr "type" "jump,jumpl")])
+
+(define_insn "*return_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (return)
+		      (pc)))]
+  "direct_return_p ()"
+  "b%c0lr %1,%#"
+  [(set_attr "length" "4")
+   (set_attr "type" "jump")])
+
+(define_insn "*return_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (pc)
+		      (return)))]
+  "direct_return_p ()"
+  "b%C0lr %1,%#"
+  [(set_attr "length" "4")
+   (set_attr "type" "jump")])
+
+;; A version of addsi3 for deallocating stack space at the end of the
+;; epilogue.  The addition is done in parallel with an (unspec_volatile),
+;; which represents the clobbering of the deallocated space.
+(define_insn "stack_adjust"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (plus:SI (match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "general_operand" "dNOP")))
+   (unspec_volatile [(const_int 0)] UNSPEC_STACK_ADJUST)]
+  ""
+  "add%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Normal unconditional jump
+
+;; Use the "call" instruction for long branches, but prefer to use "bra" for
+;; short ones since it does not force us to save the link register.
+
+;; This define_insn uses the branch-shortening code to decide which
+;; instruction it emits.  Since the main branch-shortening interface is
+;; through get_attr_length(), the two alternatives must be given different
+;; lengths.  Here we pretend that the far jump is 8 rather than 4 bytes
+;; long, though both alternatives are really the same size.
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"bra %l0\";
+  else
+    return \"call %l0\";
+}"
+  [(set (attr "length")
+        (if_then_else
+	    (and (ge (minus (match_dup 0) (pc)) (const_int -32768))
+		 (le (minus (match_dup 0) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "jump")
+	    (const_string "call")))])
+
+;; Indirect jump through a register
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "d,l"))]
+  ""
+  "@
+   jmpl @(%0,%.)
+   bralr"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl,branch")])
+
+;; Instruction to jump to a variable address.  This is a low-level capability
+;; which can be used to implement a dispatch table when there is no `casesi'
+;; pattern.  Either the 'casesi' pattern or the 'tablejump' pattern, or both,
+;; MUST be present in this file.
+
+;; This pattern requires two operands: the address or offset, and a label which
+;; should immediately precede the jump table.  If the macro
+;; `CASE_VECTOR_PC_RELATIVE' is defined then the first operand is an offset
+;; which counts from the address of the table; otherwise, it is an absolute
+;; address to jump to.  In either case, the first operand has mode `Pmode'.
+
+;; The `tablejump' insn is always the last insn before the jump table it uses.
+;; Its assembler code normally has no need to use the second operand, but you
+;; should incorporate it in the RTL pattern so that the jump optimizer will not
+;; delete the table as unreachable code.
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "!flag_pic"
+  "")
+
+(define_insn "tablejump_insn"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp%I0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+;; Implement switch statements when generating PIC code.  Switches are
+;; implemented by `tablejump' when not using -fpic.
+
+;; Emit code here to do the range checking and make the index zero based.
+;; operand 0 is the index
+;; operand 1 is the lower bound
+;; operand 2 is the range of indices (highest - lowest + 1)
+;; operand 3 is the label that precedes the table itself
+;; operand 4 is the fall through label
+
+(define_expand "casesi"
+  [(use (match_operand:SI 0 "integer_register_operand" ""))
+   (use (match_operand:SI 1 "const_int_operand" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  "flag_pic"
+  "
+{
+  rtx indx;
+  rtx scale;
+  rtx low = operands[1];
+  rtx range = operands[2];
+  rtx table = operands[3];
+  rtx treg;
+  rtx fail = operands[4];
+  rtx mem;
+  rtx reg2;
+  rtx reg3;
+
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  /* If we can't generate an immediate instruction, promote to register.  */
+  if (! IN_RANGE (INTVAL (range), -2048, 2047))
+    range = force_reg (SImode, range);
+
+  /* If low bound is 0, we don't have to subtract it.  */
+  if (INTVAL (operands[1]) == 0)
+    indx = operands[0];
+  else
+    {
+      indx = gen_reg_rtx (SImode);
+      if (IN_RANGE (INTVAL (low), -2047, 2048))
+	emit_insn (gen_addsi3 (indx, operands[0], GEN_INT (- INTVAL (low))));
+      else
+	emit_insn (gen_subsi3 (indx, operands[0], force_reg (SImode, low)));
+    }
+
+  /* Do an unsigned comparison (in the proper mode) between the index
+     expression and the value which represents the length of the range.
+     Since we just finished subtracting the lower bound of the range
+     from the index expression, this comparison allows us to simultaneously
+     check that the original index expression value is both greater than
+     or equal to the minimum value of the range and less than or equal to
+     the maximum value of the range.  */
+
+  emit_cmp_and_jump_insns (indx, range, GTU, NULL_RTX, SImode, 1, fail);
+
+  /* Move the table address to a register.  */
+  treg = gen_reg_rtx (Pmode);
+  emit_insn (gen_movsi (treg, gen_rtx_LABEL_REF (VOIDmode, table)));
+
+  /* Scale index-low by wordsize.  */
+  scale = gen_reg_rtx (SImode);
+  emit_insn (gen_ashlsi3 (scale, indx, const2_rtx));
+
+  /* Load the address, add the start of the table back in,
+     and jump to it.  */
+  mem = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, scale, treg));
+  reg2 = gen_reg_rtx (SImode);
+  reg3 = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (reg2, mem));
+  emit_insn (gen_addsi3 (reg3, reg2, treg));
+  emit_jump_insn (gen_tablejump_insn (reg3, table));
+  DONE;
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Called after register allocation to add any instructions needed for the
+;; prologue.  Using a prologue insn is favored compared to putting all of the
+;; instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
+;; to intermix instructions with the saves of the caller saved registers.  In
+;; some cases, it might be necessary to emit a barrier instruction as the last
+;; insn to prevent such scheduling.
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  frv_expand_prologue ();
+  DONE;
+}")
+
+;; Called after register allocation to add any instructions needed for the
+;; epilogue.  Using an epilogue insn is favored compared to putting all of the
+;; instructions in the FUNCTION_EPILOGUE macro, since it allows the scheduler
+;; to intermix instructions with the restores of the caller saved registers.
+;; In some cases, it might be necessary to emit a barrier instruction as the
+;; first insn to prevent such scheduling.
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  "
+{
+  frv_expand_epilogue (true);
+  DONE;
+}")
+
+;; This pattern, if defined, emits RTL for exit from a function without the final
+;; branch back to the calling function.  This pattern will be emitted before any
+;; sibling call (aka tail call) sites.
+;;
+;; The sibcall_epilogue pattern must not clobber any arguments used for
+;; parameter passing or any stack slots for arguments passed to the current
+;; function.
+(define_expand "sibcall_epilogue"
+  [(const_int 3)]
+  ""
+  "
+{
+  frv_expand_epilogue (false);
+  DONE;
+}")
+
+;; Set up the pic register to hold the address of the pic table
+(define_insn "pic_prologue"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec_volatile:SI [(const_int 0)] UNSPEC_PIC_PROLOGUE))
+   (clobber (match_operand:SI 1 "lr_operand" "=l"))
+   (clobber (match_operand:SI 2 "integer_register_operand" "=d"))]
+  ""
+  "*
+{
+  static int frv_pic_labelno = 0;
+
+  operands[3] = GEN_INT (frv_pic_labelno++);
+  return \"call %P3\\n%P3:\;movsg %1, %0\;sethi #gprelhi(%P3), %2\;setlo #gprello(%P3), %2\;sub %0,%2,%0\";
+}"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "fnop"
+  [(const_int 1)]
+  ""
+  "fnop"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnop")])
+
+(define_insn "mnop"
+  [(const_int 2)]
+  ""
+  "mnop"
+  [(set_attr "length" "4")
+   (set_attr "type" "mnop")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.  Note, type unknown is used to make sure the VLIW instructions are
+;; not continued past this point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  "# blockage"
+  [(set_attr "length" "0")
+   (set_attr "type" "unknown")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Media instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Unimplemented instructions:
+;;   - MCMPSH, MCMPUH
+
+(define_constants
+  [(UNSPEC_MLOGIC		100)
+   (UNSPEC_MNOT			101)
+   (UNSPEC_MAVEH		102)
+   (UNSPEC_MSATH		103)
+   (UNSPEC_MADDH		104)
+   (UNSPEC_MQADDH		105)
+   (UNSPEC_MPACKH		106)
+   (UNSPEC_MUNPACKH		107)
+   (UNSPEC_MDPACKH		108)
+   (UNSPEC_MBTOH		109)
+   (UNSPEC_MHTOB		110)
+   (UNSPEC_MROT			111)
+   (UNSPEC_MSHIFT		112)
+   (UNSPEC_MEXPDHW		113)
+   (UNSPEC_MEXPDHD		114)
+   (UNSPEC_MWCUT		115)
+   (UNSPEC_MMULH		116)
+   (UNSPEC_MMULXH		117)
+   (UNSPEC_MMACH		118)
+   (UNSPEC_MMRDH		119)
+   (UNSPEC_MQMULH		120)
+   (UNSPEC_MQMULXH		121)
+   (UNSPEC_MQMACH		122)
+   (UNSPEC_MCPX			123)
+   (UNSPEC_MQCPX		124)
+   (UNSPEC_MCUT			125)
+   (UNSPEC_MRDACC		126)
+   (UNSPEC_MRDACCG		127)
+   (UNSPEC_MWTACC		128)
+   (UNSPEC_MWTACCG		129)
+   (UNSPEC_MTRAP		130)
+   (UNSPEC_MCLRACC		131)
+   (UNSPEC_MCLRACCA		132)
+   (UNSPEC_MCOP1		133)
+   (UNSPEC_MCOP2		134)
+   (UNSPEC_MDUNPACKH		135)
+   (UNSPEC_MDUNPACKH_INTERNAL	136)
+   (UNSPEC_MBTOHE		137)
+   (UNSPEC_MBTOHE_INTERNAL	138)
+   (UNSPEC_MBTOHE		137)
+   (UNSPEC_MBTOHE_INTERNAL	138)
+   (UNSPEC_MQMACH2		139)
+   (UNSPEC_MADDACC		140)
+   (UNSPEC_MDADDACC		141)
+   (UNSPEC_MABSHS		142)
+   (UNSPEC_MDROTLI		143)
+   (UNSPEC_MCPLHI		144)
+   (UNSPEC_MCPLI		145)
+   (UNSPEC_MDCUTSSI		146)
+   (UNSPEC_MQSATHS		147)
+   (UNSPEC_MHSETLOS		148)
+   (UNSPEC_MHSETLOH		149)
+   (UNSPEC_MHSETHIS		150)
+   (UNSPEC_MHSETHIH		151)
+   (UNSPEC_MHDSETS		152)
+   (UNSPEC_MHDSETH		153)
+   (UNSPEC_MQLCLRHS		154)
+   (UNSPEC_MQLMTHS		155)
+   (UNSPEC_MQSLLHI		156)
+   (UNSPEC_MQSRAHI		157)
+   (UNSPEC_MASACCS		158)
+   (UNSPEC_MDASACCS		159)
+])
+
+;; Logic operations: type "mlogic"
+
+(define_expand "mand"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+		    (match_operand:SI 2 "fpr_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MAND);")
+
+(define_expand "mor"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+		    (match_operand:SI 2 "fpr_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MOR);")
+
+(define_expand "mxor"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+		    (match_operand:SI 2 "fpr_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MXOR);")
+
+(define_insn "*mlogic"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+		    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		 break;
+  case FRV_BUILTIN_MAND: return \"mand %1, %2, %0\";
+  case FRV_BUILTIN_MOR:  return \"mor %1, %2, %0\";
+  case FRV_BUILTIN_MXOR: return \"mxor %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mlogic\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+(define_insn "*cond_exec_mlogic"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+         (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "fpr_operand" "f")
+		     (match_operand:SI 5 "const_int_operand" "n")]
+		    UNSPEC_MLOGIC)))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MAND: return \"cmand %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MOR:  return \"cmor %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MXOR: return \"cmxor %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mlogic\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+;; Logical not: type "mlogic"
+
+(define_insn "mnot"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MNOT))]
+  "TARGET_MEDIA"
+  "mnot %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+(define_insn "*cond_exec_mnot"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+         (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")] UNSPEC_MNOT)))]
+  "TARGET_MEDIA"
+  "cmnot %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+;; Dual average (halfword): type "maveh"
+
+(define_insn "maveh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")]
+		   UNSPEC_MAVEH))]
+  "TARGET_MEDIA"
+  "maveh %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "maveh")])
+
+;; Dual saturation (halfword): type "msath"
+
+(define_expand "msaths"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MSATH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSATHS);")
+
+(define_expand "msathu"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MSATH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSATHU);")
+
+(define_insn "*msath"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MSATH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MSATHS:  return \"msaths %1, %2, %0\";
+  case FRV_BUILTIN_MSATHU:  return \"msathu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, msath\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "msath")])
+
+;; Dual addition/subtraction with saturation (halfword): type "maddh"
+
+(define_expand "maddhss"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+		   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MADDHSS);")
+
+(define_expand "maddhus"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MADDHUS);")
+
+(define_expand "msubhss"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSUBHSS);")
+
+(define_expand "msubhus"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSUBHUS);")
+
+(define_insn "*maddh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MADDHSS: return \"maddhss %1, %2, %0\";
+  case FRV_BUILTIN_MADDHUS: return \"maddhus %1, %2, %0\";
+  case FRV_BUILTIN_MSUBHSS: return \"msubhss %1, %2, %0\";
+  case FRV_BUILTIN_MSUBHUS: return \"msubhus %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, maddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddh")])
+
+(define_insn "*cond_exec_maddh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "fpr_operand" "f")
+		     (match_operand:SI 5 "const_int_operand" "n")]
+		    UNSPEC_MADDH)))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MADDHSS: return \"cmaddhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MADDHUS: return \"cmaddhus %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MSUBHSS: return \"cmsubhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MSUBHUS: return \"cmsubhus %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_maddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddh")])
+
+;; Quad addition/subtraction with saturation (halfword): type "mqaddh"
+
+(define_expand "mqaddhss"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQADDHSS);")
+
+(define_expand "mqaddhus"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQADDHUS);")
+
+(define_expand "mqsubhss"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQSUBHSS);")
+
+(define_expand "mqsubhus"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQSUBHUS);")
+
+(define_insn "*mqaddh"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQADDHSS: return \"mqaddhss %1, %2, %0\";
+  case FRV_BUILTIN_MQADDHUS: return \"mqaddhus %1, %2, %0\";
+  case FRV_BUILTIN_MQSUBHSS: return \"mqsubhss %1, %2, %0\";
+  case FRV_BUILTIN_MQSUBHUS: return \"mqsubhus %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqaddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqaddh")])
+
+(define_insn "*cond_exec_mqaddh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_fpr_operand" "=h")
+         (unspec:DI [(match_operand:DI 3 "even_fpr_operand" "h")
+                     (match_operand:DI 4 "even_fpr_operand" "h")
+		     (match_operand:SI 5 "const_int_operand" "n")]
+		    UNSPEC_MQADDH)))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQADDHSS: return \"cmqaddhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQADDHUS: return \"cmqaddhus %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQSUBHSS: return \"cmqsubhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQSUBHUS: return \"cmqsubhus %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mqaddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqaddh")])
+
+;; Pack halfword: type "mpackh"
+
+(define_insn "mpackh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:HI 1 "fpr_operand" "f")
+                    (match_operand:HI 2 "fpr_operand" "f")]
+		   UNSPEC_MPACKH))]
+  "TARGET_MEDIA"
+  "mpackh %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mpackh")])
+
+;; Unpack halfword: type "mpackh"
+
+(define_insn "munpackh"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")]
+		   UNSPEC_MUNPACKH))]
+  "TARGET_MEDIA"
+  "munpackh %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "munpackh")])
+
+;; Dual pack halfword: type "mdpackh"
+
+(define_insn "mdpackh"
+    [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+	  (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")]
+		     UNSPEC_MDPACKH))]
+  "TARGET_MEDIA"
+  "mdpackh %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdpackh")])
+
+;; Byte-halfword conversion: type "mbhconv"
+
+(define_insn "mbtoh"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")]
+		   UNSPEC_MBTOH))]
+  "TARGET_MEDIA"
+  "mbtoh %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+(define_insn "*cond_exec_mbtoh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_fpr_operand" "=h")
+	 (unspec:DI [(match_operand:SI 3 "fpr_operand" "f")]
+		    UNSPEC_MBTOH)))]
+  "TARGET_MEDIA"
+  "cmbtoh %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+(define_insn "mhtob"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "even_fpr_operand" "h")]
+		   UNSPEC_MHTOB))]
+  "TARGET_MEDIA"
+  "mhtob %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+(define_insn "*cond_exec_mhtob"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (unspec:SI [(match_operand:DI 3 "even_fpr_operand" "h")]
+		    UNSPEC_MHTOB)))]
+  "TARGET_MEDIA"
+  "cmhtob %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+;; Rotate: type "mrot"
+
+(define_expand "mrotli"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint5_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MROT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MROTLI);")
+
+(define_expand "mrotri"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint5_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MROT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MROTRI);")
+
+(define_insn "*mrot"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint5_operand" "I")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MROT))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MROTLI: return \"mrotli %1, %2, %0\";
+  case FRV_BUILTIN_MROTRI: return \"mrotri %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mrot\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mrot")])
+
+;; Dual shift halfword: type "msh"
+
+(define_expand "msllhi"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint4_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSLLHI);")
+
+(define_expand "msrlhi"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint4_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSRLHI);")
+
+(define_expand "msrahi"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint4_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSRAHI);")
+
+(define_insn "*mshift"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint4_operand" "I")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MSLLHI: return \"msllhi %1, %2, %0\";
+  case FRV_BUILTIN_MSRLHI: return \"msrlhi %1, %2, %0\";
+  case FRV_BUILTIN_MSRAHI: return \"msrahi %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mshift\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mshift")])
+
+;; Expand halfword to word: type "mexpdhw"
+
+(define_insn "mexpdhw"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint1_operand" "I")]
+		   UNSPEC_MEXPDHW))]
+  "TARGET_MEDIA"
+  "mexpdhw %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhw")])
+
+(define_insn "*cond_exec_mexpdhw"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "uint1_operand" "I")]
+		    UNSPEC_MEXPDHW)))]
+  "TARGET_MEDIA"
+  "cmexpdhw %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhw")])
+
+;; Expand halfword to double: type "mexpdhd"
+
+(define_insn "mexpdhd"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint1_operand" "I")]
+		   UNSPEC_MEXPDHD))]
+  "TARGET_MEDIA"
+  "mexpdhd %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhd")])
+
+(define_insn "*cond_exec_mexpdhd"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_fpr_operand" "=h")
+	 (unspec:DI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "uint1_operand" "I")]
+		    UNSPEC_MEXPDHD)))]
+  "TARGET_MEDIA"
+  "cmexpdhd %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhd")])
+
+;; FR cut: type "mwcut"
+
+(define_insn "mwcut"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")]
+		   UNSPEC_MWCUT))]
+  "TARGET_MEDIA"
+  "mwcut%i2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mwcut")])
+
+;; Dual multiplication (halfword): type "mmulh"
+
+(define_expand "mmulhs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULHS);")
+
+(define_expand "mmulhu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULHU);")
+
+(define_insn "*mmulh"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MMULH))
+   (set (match_operand:HI 4 "accg_operand" "=B")
+	(unspec:HI [(const_int 0)] UNSPEC_MMULH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MMULHS:  return \"mmulhs %1, %2, %0\";
+  case FRV_BUILTIN_MMULHU:  return \"mmulhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmulh")])
+
+(define_insn "*cond_exec_mmulh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:DI 2 "even_acc_operand" "=b")
+		    (unspec:DI [(match_operand:SI 3 "fpr_operand" "f")
+				(match_operand:SI 4 "fpr_operand" "f")
+				(match_operand:SI 5 "const_int_operand" "n")]
+			       UNSPEC_MMULH))
+	       (set (match_operand:HI 6 "accg_operand" "=B")
+		    (unspec:HI [(const_int 0)] UNSPEC_MMULH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MMULHS:  return \"cmmulhs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MMULHU:  return \"cmmulhu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmulh")])
+
+;; Dual cross multiplication (halfword): type "mmulxh"
+
+(define_expand "mmulxhs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULXH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULXHS);")
+
+(define_expand "mmulxhu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULXH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULXHU);")
+
+(define_insn "*mmulxh"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MMULXH))
+   (set (match_operand:HI 4 "accg_operand" "=B")
+	(unspec:HI [(const_int 0)] UNSPEC_MMULXH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MMULXHS: return \"mmulxhs %1, %2, %0\";
+  case FRV_BUILTIN_MMULXHU: return \"mmulxhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mmulxh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmulxh")])
+
+;; Dual product-sum (halfword): type "mmach"
+
+(define_expand "mmachs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMACH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMACHS);")
+
+(define_expand "mmachu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMACH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMACHU);")
+
+(define_insn "*mmach"
+  [(set (match_operand:DI 0 "even_acc_operand" "+b")
+        (unspec:DI [(match_dup 0)
+		    (match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:HI 3 "accg_operand" "+B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MMACH))
+   (set (match_dup 3) (unspec:HI [(const_int 0)] UNSPEC_MMACH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MMACHS: return \"mmachs %1, %2, %0\";
+  case FRV_BUILTIN_MMACHU: return \"mmachu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmach")])
+
+(define_insn "*cond_exec_mmach"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:DI 2 "even_acc_operand" "+b")
+		    (unspec:DI [(match_dup 2)
+				(match_operand:SI 3 "fpr_operand" "f")
+				(match_operand:SI 4 "fpr_operand" "f")
+				(match_operand:HI 5 "accg_operand" "+B")
+				(match_operand:SI 6 "const_int_operand" "n")]
+			       UNSPEC_MMACH))
+	       (set (match_dup 5)
+		    (unspec:HI [(const_int 0)] UNSPEC_MMACH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[6]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MMACHS: return \"cmmachs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MMACHU: return \"cmmachu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmach")])
+
+;; Dual product-difference: type "mmrdh"
+
+(define_expand "mmrdhs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMRDH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMRDH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMRDHS);")
+
+(define_expand "mmrdhu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMRDH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMRDH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMRDHU);")
+
+(define_insn "*mmrdh"
+  [(set (match_operand:DI 0 "even_acc_operand" "+b")
+        (unspec:DI [(match_dup 0)
+		    (match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:HI 3 "accg_operand" "+B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MMRDH))
+   (set (match_dup 3)
+	(unspec:HI [(const_int 0)] UNSPEC_MMRDH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MMRDHS: return \"mmrdhs %1, %2, %0\";
+  case FRV_BUILTIN_MMRDHU: return \"mmrdhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mrdh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmrdh")])
+
+;; Quad multiply (halfword): type "mqmulh"
+
+(define_expand "mqmulhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULHS);")
+
+(define_expand "mqmulhu"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULHU);")
+
+(define_insn "*mqmulh"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_MQMULH))
+   (set (match_operand:V4QI 4 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQMULHS:  return \"mqmulhs %1, %2, %0\";
+  case FRV_BUILTIN_MQMULHU:  return \"mqmulhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmulh")])
+
+(define_insn "*cond_exec_mqmulh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:V4SI 2 "quad_acc_operand" "=A")
+		    (unspec:V4SI [(match_operand:DI 3 "even_fpr_operand" "h")
+				  (match_operand:DI 4 "even_fpr_operand" "h")
+				  (match_operand:SI 5 "const_int_operand" "n")]
+				 UNSPEC_MQMULH))
+	       (set (match_operand:V4QI 6 "accg_operand" "=B")
+		    (unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQMULHS:  return \"cmqmulhs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQMULHU:  return \"cmqmulhu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mqmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmulh")])
+
+;; Quad cross multiply (halfword): type "mqmulxh"
+
+(define_expand "mqmulxhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULXH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULXHS);")
+
+(define_expand "mqmulxhu"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULXH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULXHU);")
+
+(define_insn "*mqmulxh"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_MQMULXH))
+   (set (match_operand:V4QI 4 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMULXH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQMULXHS: return \"mqmulxhs %1, %2, %0\";
+  case FRV_BUILTIN_MQMULXHU: return \"mqmulxhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmulxh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmulxh")])
+
+;; Quad product-sum (halfword): type "mqmach"
+
+(define_expand "mqmachs"
+  [(parallel [(set (match_operand:V4SI 0 "even_acc_operand" "+A")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_operand:V4QI 3 "accg_operand" "+B")
+				 (match_dup 4)]
+				UNSPEC_MQMACH))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMACHS);")
+
+(define_expand "mqmachu"
+  [(parallel [(set (match_operand:V4SI 0 "even_acc_operand" "+A")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_operand:V4QI 3 "accg_operand" "+B")
+				 (match_dup 4)]
+				UNSPEC_MQMACH))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMACHU);")
+
+(define_insn "*mqmach"
+  [(set (match_operand:V4SI 0 "even_acc_operand" "+A")
+        (unspec:V4SI [(match_dup 0)
+		      (match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:V4QI 3 "accg_operand" "+B")
+		      (match_operand:SI 4 "const_int_operand" "n")]
+		     UNSPEC_MQMACH))
+   (set (match_dup 3)
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MQMACHS: return \"mqmachs %1, %2, %0\";
+  case FRV_BUILTIN_MQMACHU: return \"mqmachu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmach")])
+
+(define_insn "*cond_exec_mqmach"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:V4SI 2 "even_acc_operand" "+A")
+		    (unspec:V4SI [(match_dup 2)
+				  (match_operand:DI 3 "even_fpr_operand" "h")
+				  (match_operand:DI 4 "even_fpr_operand" "h")
+				  (match_operand:V4QI 5 "accg_operand" "+B")
+				  (match_operand:SI 6 "const_int_operand" "n")]
+				 UNSPEC_MQMACH))
+	       (set (match_dup 5)
+		    (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[6]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MQMACHS: return \"cmqmachs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQMACHU: return \"cmqmachu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mqmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmach")])
+
+;; Dual complex number product-sum (halfword)
+
+(define_expand "mcpxrs"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXRS);")
+
+(define_expand "mcpxru"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXRU);")
+
+(define_expand "mcpxis"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXIS);")
+
+(define_expand "mcpxiu"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXIU);")
+
+(define_insn "*mcpx"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:SI 3 "const_int_operand" "n")]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 4 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MCPXRS: return \"mcpxrs %1, %2, %0\";
+  case FRV_BUILTIN_MCPXRU: return \"mcpxru %1, %2, %0\";
+  case FRV_BUILTIN_MCPXIS: return \"mcpxis %1, %2, %0\";
+  case FRV_BUILTIN_MCPXIU: return \"mcpxiu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mcpx\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpx")])
+
+(define_insn "*cond_exec_mcpx"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:SI 2 "acc_operand" "=a")
+		    (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+				(match_operand:SI 4 "fpr_operand" "f")
+				(match_operand:SI 5 "const_int_operand" "n")]
+			       UNSPEC_MCPX))
+	       (set (match_operand:QI 6 "accg_operand" "=B")
+		    (unspec:QI [(const_int 0)] UNSPEC_MCPX))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MCPXRS: return \"cmcpxrs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MCPXRU: return \"cmcpxru %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MCPXIS: return \"cmcpxis %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MCPXIU: return \"cmcpxiu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mcpx\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpx")])
+
+;; Quad complex number product-sum (halfword): type "mqcpx"
+
+(define_expand "mqcpxrs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXRS);")
+
+(define_expand "mqcpxru"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXRU);")
+
+(define_expand "mqcpxis"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXIS);")
+
+(define_expand "mqcpxiu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXIU);")
+
+(define_insn "*mqcpx"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+        (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+                    (match_operand:DI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MQCPX))
+   (set (match_operand:HI 4 "accg_operand" "=B")
+	(unspec:HI [(const_int 0)] UNSPEC_MQCPX))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MQCPXRS: return \"mqcpxrs %1, %2, %0\";
+  case FRV_BUILTIN_MQCPXRU: return \"mqcpxru %1, %2, %0\";
+  case FRV_BUILTIN_MQCPXIS: return \"mqcpxis %1, %2, %0\";
+  case FRV_BUILTIN_MQCPXIU: return \"mqcpxiu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqcpx\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqcpx")])
+
+;; Cut: type "mcut"
+
+(define_expand "mcut"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "acc_operand" "a")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")
+		    (match_operand:QI 3 "accg_operand" "B")
+		    (match_dup 4)]
+		   UNSPEC_MCUT))]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCUT);")
+
+(define_expand "mcutss"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "acc_operand" "a")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")
+		    (match_operand:QI 3 "accg_operand" "B")
+		    (match_dup 4)]
+		   UNSPEC_MCUT))]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCUTSS);")
+
+(define_insn "*mcut"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "acc_operand" "a")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")
+		    (match_operand:QI 3 "accg_operand" "B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MCUT))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MCUT:   return \"mcut%i2 %1, %2, %0\";
+  case FRV_BUILTIN_MCUTSS: return \"mcutss%i2 %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mcut\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcut")])
+
+;; Accumulator read: type "mrdacc"
+
+(define_insn "mrdacc"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "acc_operand" "a")] UNSPEC_MRDACC))]
+  "TARGET_MEDIA"
+  "mrdacc %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mrdacc")])
+
+(define_insn "mrdaccg"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:QI 1 "accg_operand" "B")] UNSPEC_MRDACCG))]
+  "TARGET_MEDIA"
+  "mrdaccg %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mrdacc")])
+
+;; Accumulator write: type "mwtacc"
+
+(define_insn "mwtacc"
+  [(set (match_operand:SI 0 "acc_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MWTACC))]
+  "TARGET_MEDIA"
+  "mwtacc %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mwtacc")])
+
+(define_insn "mwtaccg"
+  [(set (match_operand:QI 0 "accg_operand" "=B")
+	(unspec:QI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MWTACCG))]
+  "TARGET_MEDIA"
+  "mwtaccg %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mwtacc")])
+
+;; Trap: This one executes on the control unit, not the media units.
+
+(define_insn "mtrap"
+  [(unspec_volatile [(const_int 0)] UNSPEC_MTRAP)]
+  "TARGET_MEDIA"
+  "mtrap"
+  [(set_attr "length" "4")
+   (set_attr "type" "trap")])
+
+;; Clear single accumulator: type "mclracc"
+
+(define_insn "mclracc_internal"
+  [(set (match_operand:SI 0 "acc_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_MCLRACC))
+   (set (match_operand:QI 1 "accg_operand" "=B")
+	(unspec:QI [(const_int 0)] UNSPEC_MCLRACC))]
+  "TARGET_MEDIA"
+  "mclracc %0,#0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mclracc")])
+
+(define_expand "mclracc"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(const_int 0)] UNSPEC_MCLRACC))
+	      (set (match_dup 1)
+		   (unspec:QI [(const_int 0)] UNSPEC_MCLRACC))])]
+  "TARGET_MEDIA"
+  "
+{
+  if (GET_CODE (operands[0]) != REG || !ACC_P (REGNO (operands[0])))
+    FAIL;
+
+  operands[1] = frv_matching_accg_for_acc (operands[0]);
+}")
+
+;; Clear all accumulators: type "mclracca"
+
+(define_insn "mclracca8_internal"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=b")
+	(unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4SI 1 "quad_acc_operand" "=b")
+	(unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4QI 2 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4QI 3 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))]
+  "TARGET_MEDIA && TARGET_ACC_8"
+  "mclracc acc0,#1"
+  [(set_attr "length" "4")
+   (set_attr "type" "mclracca")])
+
+(define_insn "mclracca4_internal"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=b")
+	(unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4QI 1 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))]
+  "TARGET_MEDIA && TARGET_ACC_4"
+  "mclracc acc0,#1"
+  [(set_attr "length" "4")
+   (set_attr "type" "mclracca")])
+
+(define_expand "mclracca8"
+  [(parallel [(set (match_dup 0) (unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 1) (unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 2) (unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 3) (unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))])]
+  "TARGET_MEDIA && TARGET_ACC_8"
+  "
+{
+  operands[0] = gen_rtx_REG (V4SImode, ACC_FIRST);
+  operands[1] = gen_rtx_REG (V4SImode, ACC_FIRST + (~3 & ACC_MASK));
+  operands[2] = gen_rtx_REG (V4QImode, ACCG_FIRST);
+  operands[3] = gen_rtx_REG (V4QImode, ACCG_FIRST + (~3 & ACC_MASK));
+}")
+
+(define_expand "mclracca4"
+  [(parallel [(set (match_dup 0) (unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 1) (unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))])]
+  "TARGET_MEDIA && TARGET_ACC_4"
+  "
+{
+  operands[0] = gen_rtx_REG (V4SImode, ACC_FIRST);
+  operands[1] = gen_rtx_REG (V4QImode, ACCG_FIRST);
+}")
+
+(define_insn "mcop1"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")] UNSPEC_MCOP1))]
+  "TARGET_MEDIA_REV1"
+  "mcop1 %1, %2, %0"
+  [(set_attr "length" "4")
+;; What is the class of the insn ???
+   (set_attr "type" "multi")])
+
+(define_insn "mcop2"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")] UNSPEC_MCOP2))]
+  "TARGET_MEDIA_REV1"
+  "mcop2 %1, %2, %0"
+  [(set_attr "length" "4")
+;; What is the class of the insn ???
+   (set_attr "type" "multi")])
+
+(define_insn "*mdunpackh_internal"
+  [(set (match_operand:V4SI 0 "quad_fpr_operand" "=x")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")]
+		     UNSPEC_MDUNPACKH_INTERNAL))]
+  "TARGET_MEDIA_REV1"
+  "mdunpackh %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdunpackh")])
+
+(define_insn_and_split "mdunpackh"
+  [(set (match_operand:V4SI 0 "memory_operand" "=o")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")]
+		     UNSPEC_MDUNPACKH))
+   (clobber (match_scratch:V4SI 2 "=x"))]
+  "TARGET_MEDIA_REV1"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_dup 1)] UNSPEC_MDUNPACKH_INTERNAL))
+   (set (match_dup 3)
+	(match_dup 4))
+   (set (match_dup 5)
+	(match_dup 6))]
+  "
+{
+  operands[3] = change_address (operands[0], DImode, NULL_RTX);
+  operands[4] = gen_rtx_REG (DImode, REGNO (operands[2]));
+  operands[5] = frv_index_memory (operands[0], DImode, 1);
+  operands[6] = gen_rtx_REG (DImode, REGNO (operands[2])+2);
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "multi")])
+
+(define_insn "*mbtohe_internal"
+  [(set (match_operand:V4SI 0 "quad_fpr_operand" "=x")
+        (unspec:V4SI [(match_operand:SI 1 "fpr_operand" "f")]
+		     UNSPEC_MBTOHE_INTERNAL))]
+  "TARGET_MEDIA_REV1"
+  "mbtohe %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconve")])
+
+(define_insn_and_split "mbtohe"
+  [(set (match_operand:V4SI 0 "memory_operand" "=o")
+        (unspec:V4SI [(match_operand:SI 1 "fpr_operand" "f")]
+		     UNSPEC_MBTOHE))
+   (clobber (match_scratch:V4SI 2 "=x"))]
+  "TARGET_MEDIA_REV1"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_dup 1)] UNSPEC_MBTOHE_INTERNAL))
+   (set (match_dup 3)
+	(match_dup 4))
+   (set (match_dup 5)
+	(match_dup 6))]
+  "
+{
+  operands[3] = change_address (operands[0], DImode, NULL_RTX);
+  operands[4] = gen_rtx_REG (DImode, REGNO (operands[2]));
+  operands[5] = frv_index_memory (operands[0], DImode, 1);
+  operands[6] = gen_rtx_REG (DImode, REGNO (operands[2])+2);
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "multi")])
+
+;; Quad product-sum (halfword) instructions only found on the FR400.
+;; type "mqmach"
+
+(define_expand "mqxmachs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "")
+	           (unspec:V4SI [(match_dup 0)
+		   	         (match_operand:DI 1 "even_fpr_operand" "")
+			         (match_operand:DI 2 "even_fpr_operand" "")
+				 (match_operand:V4QI 3 "accg_operand" "")
+				 (match_dup 4)]
+				UNSPEC_MQMACH2))
+		(set (match_dup 3)
+		     (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQXMACHS);")
+
+(define_expand "mqxmacxhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "")
+				 (match_operand:DI 2 "even_fpr_operand" "")
+				 (match_operand:V4QI 3 "accg_operand" "")
+				 (match_dup 4)]
+				UNSPEC_MQMACH2))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQXMACXHS);")
+
+(define_expand "mqmacxhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "")
+				 (match_operand:DI 2 "even_fpr_operand" "")
+				 (match_operand:V4QI 3 "accg_operand" "")
+				 (match_dup 4)]
+				UNSPEC_MQMACH2))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMACXHS);")
+
+(define_insn "*mqmach2"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "+A")
+        (unspec:V4SI [(match_dup 0)
+		      (match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:V4QI 3 "accg_operand" "+B")
+		      (match_operand:SI 4 "const_int_operand" "n")]
+		     UNSPEC_MQMACH2))
+   (set (match_dup 3)
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))]
+  "TARGET_MEDIA_REV2"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		      break;
+  case FRV_BUILTIN_MQXMACHS:  return \"mqxmachs %1, %2, %0\";
+  case FRV_BUILTIN_MQXMACXHS: return \"mqxmacxhs %1, %2, %0\";
+  case FRV_BUILTIN_MQMACXHS:  return \"mqmacxhs %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmach2\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmach")])
+
+;; Accumulator addition/subtraction: type "maddacc"
+
+(define_expand "maddaccs"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "")
+		   (unspec:SI [(match_operand:DI 1 "even_acc_operand" "")]
+			      UNSPEC_MADDACC))
+	      (set (match_operand:QI 2 "accg_operand" "")
+		   (unspec:QI [(match_operand:HI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MADDACCS);")
+
+(define_expand "msubaccs"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "")
+		   (unspec:SI [(match_operand:DI 1 "even_acc_operand" "")]
+			      UNSPEC_MADDACC))
+	      (set (match_operand:QI 2 "accg_operand" "")
+		   (unspec:QI [(match_operand:HI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MSUBACCS);")
+
+(define_insn "masaccs"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+	(unspec:DI [(match_operand:DI 1 "even_acc_operand" "b")]
+		   UNSPEC_MASACCS))
+   (set (match_operand:HI 2 "accg_operand" "=B")
+	(unspec:HI [(match_operand:HI 3 "accg_operand" "B")]
+		   UNSPEC_MASACCS))]
+  "TARGET_MEDIA_REV2"
+  "masaccs %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddacc")])
+
+(define_insn "*maddacc"
+  [(set (match_operand:SI 0 "acc_operand" "=a")
+	(unspec:SI [(match_operand:DI 1 "even_acc_operand" "b")]
+		   UNSPEC_MADDACC))
+   (set (match_operand:QI 2 "accg_operand" "=B")
+	(unspec:QI [(match_operand:HI 3 "accg_operand" "B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MADDACC))]
+  "TARGET_MEDIA_REV2"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MADDACCS: return \"maddaccs %1, %0\";
+  case FRV_BUILTIN_MSUBACCS: return \"msubaccs %1, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, maddacc\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddacc")])
+
+;; Dual accumulator addition/subtraction: type "mdaddacc"
+
+(define_expand "mdaddaccs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "")
+		   (unspec:DI [(match_operand:V4SI 1 "quad_acc_operand" "")]
+			      UNSPEC_MDADDACC))
+	      (set (match_operand:HI 2 "accg_operand" "")
+		   (unspec:HI [(match_operand:V4QI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MDADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MDADDACCS);")
+
+(define_expand "mdsubaccs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "")
+		   (unspec:DI [(match_operand:V4SI 1 "quad_acc_operand" "")]
+			      UNSPEC_MDADDACC))
+	      (set (match_operand:HI 2 "accg_operand" "")
+	      	   (unspec:HI [(match_operand:V4QI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MDADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MDSUBACCS);")
+
+(define_insn "mdasaccs"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+	(unspec:V4SI [(match_operand:V4SI 1 "quad_acc_operand" "A")]
+		     UNSPEC_MDASACCS))
+   (set (match_operand:V4QI 2 "accg_operand" "=B")
+	(unspec:V4QI [(match_operand:V4QI 3 "accg_operand" "B")]
+		     UNSPEC_MDASACCS))]
+  "TARGET_MEDIA_REV2"
+  "mdasaccs %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdaddacc")])
+
+(define_insn "*mdaddacc"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+	(unspec:DI [(match_operand:V4SI 1 "quad_acc_operand" "A")]
+		   UNSPEC_MDADDACC))
+   (set (match_operand:HI 2 "accg_operand" "=B")
+	(unspec:HI [(match_operand:V4QI 3 "accg_operand" "B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MDADDACC))]
+  "TARGET_MEDIA_REV2"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		      break;
+  case FRV_BUILTIN_MDADDACCS: return \"mdaddaccs %1, %0\";
+  case FRV_BUILTIN_MDSUBACCS: return \"mdsubaccs %1, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mdaddacc\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdaddacc")])
+
+;; Dual absolute (halfword): type "mabsh"
+
+(define_insn "mabshs"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MABSHS))]
+  "TARGET_MEDIA_REV2"
+  "mabshs %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mabsh")])
+
+;; Dual rotate: type "mdrot"
+
+(define_insn "mdrotli"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:SI 2 "uint5_operand" "I")]
+		   UNSPEC_MDROTLI))]
+  "TARGET_MEDIA_REV2"
+  "mdrotli %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdrot")])
+
+;; Dual coupling (concatenation): type "mcpl"
+
+(define_insn "mcplhi"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "fpr_operand" "h")
+		    (match_operand:SI 2 "uint4_operand" "I")]
+		   UNSPEC_MCPLHI))]
+  "TARGET_MEDIA_REV2"
+  "mcplhi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpl")])
+
+(define_insn "mcpli"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "fpr_operand" "h")
+		    (match_operand:SI 2 "uint5_operand" "I")]
+		   UNSPEC_MCPLI))]
+  "TARGET_MEDIA_REV2"
+  "mcpli %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpl")])
+
+;; Dual cut: type "mdcut"
+
+(define_insn "mdcutssi"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_acc_operand" "b")
+		    (match_operand:SI 2 "int6_operand" "I")
+		    (match_operand:HI 3 "accg_operand" "B")]
+		   UNSPEC_MDCUTSSI))]
+  "TARGET_MEDIA_REV2"
+  "mdcutssi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdcut")])
+
+;; Quad saturate (halfword): type "mqsath"
+
+(define_insn "mqsaths"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:DI 2 "even_fpr_operand" "h")]
+		   UNSPEC_MQSATHS))]
+  "TARGET_MEDIA_REV2"
+  "mqsaths %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqsath")])
+
+;; Quad limit instructions: type "mqlimh"
+
+(define_insn "mqlclrhs"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:DI 2 "even_fpr_operand" "h")]
+		   UNSPEC_MQLCLRHS))]
+  "TARGET_MEDIA_FR450"
+  "mqlclrhs %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqlimh")])
+
+(define_insn "mqlmths"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:DI 2 "even_fpr_operand" "h")]
+		   UNSPEC_MQLMTHS))]
+  "TARGET_MEDIA_FR450"
+  "mqlmths %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqlimh")])
+
+(define_insn "mqsllhi"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:SI 2 "int6_operand" "I")]
+		   UNSPEC_MQSLLHI))]
+  "TARGET_MEDIA_FR450"
+  "mqsllhi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqshift")])
+
+(define_insn "mqsrahi"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:SI 2 "int6_operand" "I")]
+		   UNSPEC_MQSRAHI))]
+  "TARGET_MEDIA_FR450"
+  "mqsrahi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqshift")])
+
+;; Set hi/lo instructions: type "mset"
+
+(define_insn "mhsetlos"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int12_operand" "NOP")]
+		   UNSPEC_MHSETLOS))]
+  "TARGET_MEDIA_REV2"
+  "mhsetlos %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhsetloh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int5_operand" "I")]
+		   UNSPEC_MHSETLOH))]
+  "TARGET_MEDIA_REV2"
+  "mhsetloh %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhsethis"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int12_operand" "NOP")]
+		   UNSPEC_MHSETHIS))]
+  "TARGET_MEDIA_REV2"
+  "mhsethis %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhsethih"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int5_operand" "I")]
+		   UNSPEC_MHSETHIH))]
+  "TARGET_MEDIA_REV2"
+  "mhsethih %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhdsets"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "int12_operand" "NOP")]
+		   UNSPEC_MHDSETS))]
+  "TARGET_MEDIA_REV2"
+  "mhdsets %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhdseth"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int5_operand" "I")]
+		   UNSPEC_MHDSETH))]
+  "TARGET_MEDIA_REV2"
+  "mhdseth %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+;;-----------------------------------------------------------------------------
+
+(define_expand "symGOT2reg"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")]
+  ""
+  "
+{
+  rtx insn;
+
+  insn = emit_insn (gen_symGOT2reg_i (operands[0], operands[1], operands[2], operands[3]));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symGOT2reg_i"
+  [(set (match_operand:SI 0 "" "")
+	(mem:SI (plus:SI (match_operand:SI 2 "" "")
+			 (const:SI (unspec:SI [(match_operand:SI 1 "" "")
+					       (match_operand:SI 3 "" "")]
+					      UNSPEC_GOT)))))]
+  ""
+  "")
+
+(define_expand "symGOT2reg_hilo"
+  [(set (match_dup 6)
+	(high:SI (const:SI (unspec:SI [(match_operand:SI 1 "" "")
+				       (match_dup 4)] UNSPEC_GOT))))
+   (set (match_dup 5)
+	(lo_sum:SI (match_dup 6)
+		   (const:SI (unspec:SI [(match_dup 1)
+					 (match_operand:SI 3 "" "")]
+					UNSPEC_GOT))))
+   (set (match_operand:SI 0 "" "")
+	(mem:SI (plus:SI (match_dup 5)
+			 (match_operand:SI 2 "" ""))))
+   ]
+  ""
+  "
+{
+  if (!can_create_pseudo_p ())
+    operands[6] = operands[5] = operands[0];
+  else
+    {
+      operands[6] = gen_reg_rtx (SImode);
+      operands[5] = gen_reg_rtx (SImode);
+    }
+
+  operands[4] = GEN_INT (INTVAL (operands[3]) + 1);
+  operands[3] = GEN_INT (INTVAL (operands[3]) + 2);
+}")
+
+(define_expand "symGOTOFF2reg_hilo"
+  [(set (match_dup 6)
+	(high:SI (const:SI (unspec:SI [(match_operand:SI 1 "" "")
+				       (match_dup 4)] UNSPEC_GOT))))
+   (set (match_dup 5)
+	(lo_sum:SI (match_dup 6)
+		   (const:SI (unspec:SI [(match_dup 1)
+					 (match_operand:SI 3 "" "")]
+					UNSPEC_GOT))))
+   (set (match_operand:SI 0 "" "")
+	(plus:SI (match_dup 5)
+		 (match_operand:SI 2 "" "")))
+   ]
+  ""
+  "
+{
+  if (!can_create_pseudo_p ())
+    operands[6] = operands[5] = operands[0];
+  else
+    {
+      operands[6] = gen_reg_rtx (SImode);
+      operands[5] = gen_reg_rtx (SImode);
+    }
+
+  operands[4] = GEN_INT (INTVAL (operands[3]) + 1);
+  operands[3] = GEN_INT (INTVAL (operands[3]) + 2);
+}")
+
+(define_expand "symGOTOFF2reg"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")]
+  ""
+  "
+{
+  rtx insn = emit_insn (gen_symGOTOFF2reg_i (operands[0], operands[1], operands[2], operands[3]));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symGOTOFF2reg_i"
+  [(set (match_operand:SI 0 "" "")
+	(plus:SI (match_operand:SI 2 "" "")
+		 (const:SI
+		  (unspec:SI [(match_operand:SI 1 "" "")
+			     (match_operand:SI 3 "" "")]
+			     UNSPEC_GOT))))]
+  ""
+  "")
+
+(define_expand "symGPREL2reg"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")
+   (match_dup 4)]
+  ""
+  "
+{
+  rtx insn;
+
+  if (!can_create_pseudo_p ())
+    operands[4] = operands[0];
+  else
+    operands[4] = gen_reg_rtx (SImode);
+
+  emit_insn (frv_gen_GPsym2reg (operands[4], operands[2]));
+
+  insn = emit_insn (gen_symGOTOFF2reg_i (operands[0], operands[1],
+					 operands[4], operands[3]));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symGPREL2reg_hilo"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")
+   (match_dup 4)]
+  ""
+  "
+{
+  rtx insn;
+
+  if (!can_create_pseudo_p ())
+    {
+      emit_insn (gen_symGOT2reg (operands[0], operands[1], operands[2],
+				 GEN_INT (R_FRV_GOT12)));
+      DONE;
+    }
+
+  operands[4] = gen_reg_rtx (SImode);
+
+  emit_insn (frv_gen_GPsym2reg (operands[4], operands[2]));
+
+  insn = emit_insn (gen_symGOTOFF2reg_hilo (operands[0], operands[1],
+					    operands[4], operands[3]));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_constants
+  [
+   (UNSPEC_SMUL			154)
+   (UNSPEC_UMUL			155)
+   (UNSPEC_SMU			156)
+   (UNSPEC_ADDSS		157)
+   (UNSPEC_SUBSS		158)
+   (UNSPEC_SLASS		159)
+   (UNSPEC_SCAN			160)
+   (UNSPEC_INTSS                161)
+   (UNSPEC_SCUTSS		162)
+   (UNSPEC_PREFETCH0		163)
+   (UNSPEC_PREFETCH		164)
+   (UNSPEC_IACCreadll		165)
+   (UNSPEC_IACCreadl		166)
+   (UNSPEC_IACCsetll		167)
+   (UNSPEC_IACCsetl		168)
+   (UNSPEC_SMASS		169)
+   (UNSPEC_SMSSS		170)
+   (UNSPEC_IMUL			171)
+
+   (IACC0_REG			171)
+])
+
+(define_insn "smul"
+  [(set (match_operand:DI 0 "integer_register_operand" "=d")
+        (unspec:DI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SMUL))]
+  ""
+  "smul %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "umul"
+  [(set (match_operand:DI 0 "integer_register_operand" "=d")
+        (unspec:DI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_UMUL))]
+  ""
+  "umul %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "smass"
+  [(set (reg:DI IACC0_REG)
+	(unspec:DI [(match_operand:SI 0 "integer_register_operand" "d")
+		    (match_operand:SI 1 "integer_register_operand" "d")
+		    (reg:DI IACC0_REG)]
+		   UNSPEC_SMASS))]
+  "TARGET_FR405_BUILTINS"
+  "smass %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "macc")])
+
+(define_insn "smsss"
+  [(set (reg:DI IACC0_REG)
+	(unspec:DI [(match_operand:SI 0 "integer_register_operand" "d")
+		    (match_operand:SI 1 "integer_register_operand" "d")
+		    (reg:DI IACC0_REG)]
+		   UNSPEC_SMSSS))]
+  "TARGET_FR405_BUILTINS"
+  "smsss %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "macc")])
+
+(define_insn "smu"
+  [(set (reg:DI IACC0_REG)
+	(unspec:DI [(match_operand:SI 0 "integer_register_operand" "d")
+		    (match_operand:SI 1 "integer_register_operand" "d")]
+		   UNSPEC_SMU))]
+  "TARGET_FR405_BUILTINS"
+  "smu %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "macc")])
+
+(define_insn "addss"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_ADDSS))]
+  "TARGET_FR405_BUILTINS"
+  "addss %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "subss"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SUBSS))]
+  "TARGET_FR405_BUILTINS"
+  "subss %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "slass"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SLASS))]
+  "TARGET_FR405_BUILTINS"
+  "slass %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "scan"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SCAN))]
+  ""
+  "scan %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "scan")])
+
+(define_insn "scutss"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (reg:DI IACC0_REG)]
+		   UNSPEC_SCUTSS))]
+  "TARGET_FR405_BUILTINS"
+  "scutss %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "cut")])
+
+(define_insn "frv_prefetch0"
+  [(prefetch (unspec:SI [(match_operand:SI 0 "register_operand" "r")]
+			UNSPEC_PREFETCH0)
+	     (const_int 0)
+	     (const_int 0))]
+  ""
+  "dcpl %0, gr0, #0"
+  [(set_attr "length" "4")])
+
+(define_insn "frv_prefetch"
+  [(prefetch (unspec:SI [(match_operand:SI 0 "register_operand" "r")]
+			UNSPEC_PREFETCH)
+	     (const_int 0)
+	     (const_int 0))]
+  "TARGET_FR500_FR550_BUILTINS"
+  "nop.p\\n\\tnldub @(%0, gr0), gr0"
+  [(set_attr "length" "8")])
+
+;; TLS patterns
+
+(define_insn "call_gettlsoff"
+  [(set (match_operand:SI 0 "register_operand" "=D09")
+	(unspec:SI
+	 [(match_operand:SI 1 "symbolic_operand" "")]
+	 UNSPEC_GETTLSOFF))
+   (clobber (reg:SI GR8_REG))
+   (clobber (reg:SI LRREG))
+   (use (match_operand:SI 2 "register_operand" "D15"))]
+  "HAVE_AS_TLS"
+  "call #gettlsoff(%a1)"
+  [(set_attr "length" "4")
+   (set_attr "type" "load_or_call")])
+
+;; We have to expand this like a libcall (it sort of actually is)
+;; because otherwise sched may move, for example, an insn that sets up
+;; GR8 for a subsequence call before the *tls_indirect_call insn, and
+;; then reload won't be able to fix things up.
+(define_expand "tls_indirect_call"
+  [(set (reg:DI GR8_REG)
+	(match_operand:DI 2 "register_operand" ""))
+   (parallel
+    [(set (reg:SI GR9_REG)
+	  (unspec:SI
+	   [(match_operand:SI 1 "symbolic_operand" "")
+	   (reg:DI GR8_REG)]
+	   UNSPEC_TLS_INDIRECT_CALL))
+    (clobber (reg:SI GR8_REG))
+    (clobber (reg:SI LRREG))
+    (use (match_operand:SI 3 "register_operand" ""))])
+   (set (match_operand:SI 0 "register_operand" "")
+	(reg:SI GR9_REG))]
+  "HAVE_AS_TLS")
+
+(define_insn "*tls_indirect_call"
+  [(set (reg:SI GR9_REG)
+	(unspec:SI
+	 [(match_operand:SI 0 "symbolic_operand" "")
+	  (reg:DI GR8_REG)]
+	 UNSPEC_TLS_INDIRECT_CALL))
+   (clobber (reg:SI GR8_REG))
+   (clobber (reg:SI LRREG))
+   ;; If there was a way to represent the fact that we don't need GR9
+   ;; or GR15 to be set before this instruction (it could be in
+   ;; parallel), we could use it here.  This change wouldn't apply to
+   ;; call_gettlsoff, thought, since the linker may turn the latter
+   ;; into ldi @(gr15,offset),gr9.
+   (use (match_operand:SI 1 "register_operand" "D15"))]
+  "HAVE_AS_TLS"
+  "calll #gettlsoff(%a0)@(gr8,gr0)"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "tls_load_gottlsoff12"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	 [(match_operand:SI 1 "symbolic_operand" "")
+	  (match_operand:SI 2 "register_operand" "r")]
+	 UNSPEC_TLS_LOAD_GOTTLSOFF12))]
+  "HAVE_AS_TLS"
+  "ldi @(%2, #gottlsoff12(%1)), %0"
+  [(set_attr "length" "4")])
+
+(define_expand "tlsoff_hilo"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (const:SI (unspec:SI
+			    [(match_operand:SI 1 "symbolic_operand" "")
+			     (match_operand:SI 2 "immediate_operand" "n")]
+			    UNSPEC_GOT))))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0)
+		   (const:SI (unspec:SI [(match_dup 1)
+					 (match_dup 3)] UNSPEC_GOT))))]
+  ""
+  "
+{
+  operands[3] = GEN_INT (INTVAL (operands[2]) + 1);
+}")
+
+;; Just like movdi_ldd, but with relaxation annotations.
+(define_insn "tls_tlsdesc_ldd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (unspec:SI
+			     [(match_operand:SI 1 "register_operand" "r")
+			      (match_operand:SI 2 "register_operand" "r")
+			      (match_operand:SI 3 "symbolic_operand" "")]
+			     UNSPEC_TLS_TLSDESC_LDD_AUX))]
+		   UNSPEC_TLS_TLSDESC_LDD))]
+  ""
+  "ldd #tlsdesc(%a3)@(%1,%2), %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
+
+(define_insn "tls_tlsoff_ld"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (unspec:SI
+		 [(match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r")
+		  (match_operand:SI 3 "symbolic_operand" "")]
+		 UNSPEC_TLS_TLSOFF_LD)))]
+  ""
+  "ld #tlsoff(%a3)@(%1,%2), %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
+
+(define_insn "tls_lddi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:SI 1 "symbolic_operand" "")
+		    (match_operand:SI 2 "register_operand" "d")]
+		   UNSPEC_TLS_LDDI))]
+  ""
+  "lddi @(%2, #gottlsdesc12(%a1)), %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
diff --git a/gcc-4.9/gcc/config/frv/frv.opt b/gcc-4.9/gcc/config/frv/frv.opt
new file mode 100644
index 000000000..824d6ea1d
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/frv.opt
@@ -0,0 +1,237 @@
+; Options for the FR-V port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/frv/frv-opts.h
+
+; Value of -mcpu=.
+Variable
+frv_cpu_t frv_cpu_type = CPU_TYPE
+
+macc-4
+Target Report RejectNegative Mask(ACC_4)
+Use 4 media accumulators
+
+macc-8
+Target Report RejectNegative InverseMask(ACC_4, ACC_8)
+Use 8 media accumulators
+
+malign-labels
+Target Report Mask(ALIGN_LABELS)
+Enable label alignment optimizations
+
+malloc-cc
+Target Report RejectNegative Mask(ALLOC_CC)
+Dynamically allocate cc registers
+
+; We used to default the branch cost to 2, but it was changed it to 1 to avoid
+; generating SCC instructions and or/and-ing them together, and then doing the
+; branch on the result, which collectively generate much worse code.
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(frv_branch_cost_int) Init(1)
+Set the cost of branches
+
+mcond-exec
+Target Report Mask(COND_EXEC)
+Enable conditional execution other than moves/scc
+
+mcond-exec-insns=
+Target RejectNegative Joined UInteger Var(frv_condexec_insns) Init(8)
+Change the maximum length of conditionally-executed sequences
+
+mcond-exec-temps=
+Target RejectNegative Joined UInteger Var(frv_condexec_temps) Init(4)
+Change the number of temporary registers that are available to conditionally-executed sequences
+
+mcond-move
+Target Report Mask(COND_MOVE)
+Enable conditional moves
+
+mcpu=
+Target RejectNegative Joined Enum(frv_cpu) Var(frv_cpu_type)
+Set the target CPU type
+
+Enum
+Name(frv_cpu) Type(frv_cpu_t)
+Known FR-V CPUs (for use with the -mcpu= option):
+
+EnumValue
+Enum(frv_cpu) String(simple) Value(FRV_CPU_SIMPLE)
+
+EnumValue
+Enum(frv_cpu) String(tomcat) Value(FRV_CPU_TOMCAT)
+
+EnumValue
+Enum(frv_cpu) String(fr550) Value(FRV_CPU_FR550)
+
+EnumValue
+Enum(frv_cpu) String(fr500) Value(FRV_CPU_FR500)
+
+EnumValue
+Enum(frv_cpu) String(fr450) Value(FRV_CPU_FR450)
+
+EnumValue
+Enum(frv_cpu) String(fr405) Value(FRV_CPU_FR405)
+
+EnumValue
+Enum(frv_cpu) String(fr400) Value(FRV_CPU_FR400)
+
+EnumValue
+Enum(frv_cpu) String(fr300) Value(FRV_CPU_FR300)
+
+EnumValue
+Enum(frv_cpu) String(frv) Value(FRV_CPU_GENERIC)
+
+mdebug
+Target Undocumented Var(TARGET_DEBUG)
+
+mdebug-arg
+Target Undocumented Var(TARGET_DEBUG_ARG)
+
+mdebug-addr
+Target Undocumented Var(TARGET_DEBUG_ADDR)
+
+mdebug-cond-exec
+Target Undocumented Var(TARGET_DEBUG_COND_EXEC)
+
+mdebug-loc
+Target Undocumented Var(TARGET_DEBUG_LOC)
+
+mdebug-stack
+Target Undocumented Var(TARGET_DEBUG_STACK)
+
+mdouble
+Target Report Mask(DOUBLE)
+Use fp double instructions
+
+mdword
+Target Report Mask(DWORD)
+Change the ABI to allow double word insns
+
+mfdpic
+Target Report Mask(FDPIC)
+Enable Function Descriptor PIC mode
+
+mfixed-cc
+Target Report RejectNegative InverseMask(ALLOC_CC, FIXED_CC)
+Just use icc0/fcc0
+
+mfpr-32
+Target Report RejectNegative Mask(FPR_32)
+Only use 32 FPRs
+
+mfpr-64
+Target Report RejectNegative InverseMask(FPR_32, FPR_64)
+Use 64 FPRs
+
+mgpr-32
+Target Report RejectNegative Mask(GPR_32)
+Only use 32 GPRs
+
+mgpr-64
+Target Report RejectNegative InverseMask(GPR_32, GPR_64)
+Use 64 GPRs
+
+mgprel-ro
+Target Report Mask(GPREL_RO)
+Enable use of GPREL for read-only data in FDPIC
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Use hardware floating point
+
+minline-plt
+Target Report Mask(INLINE_PLT)
+Enable inlining of PLT in function calls
+
+mlibrary-pic
+Target Report Mask(LIBPIC)
+Enable PIC support for building libraries
+
+mlinked-fp
+Target Report Mask(LINKED_FP)
+Follow the EABI linkage requirements
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Disallow direct calls to global functions
+
+mmedia
+Target Report Mask(MEDIA)
+Use media instructions
+
+mmuladd
+Target Report Mask(MULADD)
+Use multiply add/subtract instructions
+
+mmulti-cond-exec
+Target Report Mask(MULTI_CE)
+Enable optimizing &&/|| in conditional execution
+
+mnested-cond-exec
+Target Report Mask(NESTED_CE)
+Enable nested conditional execution optimizations
+
+; Not used by the compiler proper.
+mno-eflags
+Target RejectNegative
+Do not mark ABI switches in e_flags
+
+moptimize-membar
+Target Report Mask(OPTIMIZE_MEMBAR)
+Remove redundant membars
+
+mpack
+Target Report Mask(PACK)
+Pack VLIW instructions
+
+mscc
+Target Report Mask(SCC)
+Enable setting GPRs to the result of comparisons
+
+msched-lookahead=
+Target RejectNegative Joined UInteger Var(frv_sched_lookahead) Init(4)
+Change the amount of scheduler lookahead
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT)
+Use software floating point
+
+mTLS
+Target Report RejectNegative Mask(BIG_TLS)
+Assume a large TLS segment
+
+mtls
+Target Report RejectNegative InverseMask(BIG_TLS)
+Do not assume a large TLS segment
+
+; Not used by the compiler proper.
+mtomcat-stats
+Target
+Cause gas to print tomcat statistics
+
+; Not used by the compiler proper.
+multilib-library-pic
+Target RejectNegative
+Link with the library-pic libraries
+
+mvliw-branch
+Target Report Mask(VLIW_BRANCH)
+Allow branches to be packed with other instructions
diff --git a/gcc-4.9/gcc/config/frv/linux.h b/gcc-4.9/gcc/config/frv/linux.h
new file mode 100644
index 000000000..0fc44966c
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/linux.h
@@ -0,0 +1,74 @@
+/* Target macros for the FRV Linux port of GCC.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __FRV_LINUX_H__
+#define __FRV_LINUX_H__
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS \
+  "%{!mno-fdpic:-mfdpic}",
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{mfdpic: -m elf32frvfd -z text} %{shared} %{pie} \
+  %{!shared: %{!static: \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+   %{static}}"
+
+/* Support for compile-time default CPU.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }
+
+/* Define OS-specific predefined preprocessor macros.  */
+#define TARGET_OS_CPP_BUILTINS()	\
+  do {					\
+    builtin_define ("__gnu_linux__");	\
+    builtin_define_std ("linux");	\
+    builtin_define_std ("unix");	\
+    builtin_assert ("system=linux");	\
+  } while (0)
+
+#define HAS_INIT_SECTION 1
+#define INIT_SECTION_ASM_OP	"\t.section .init,\"ax\""
+#define FINI_SECTION_ASM_OP	"\t.section .fini,\"ax\""
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+asm (SECTION_OP); \
+asm ("ldi.p @(fp,4), gr15 ! call " #FUNC); \
+asm (TEXT_SECTION_ASM_OP);
+
+#undef INVOKE__main
+
+#undef Twrite
+#define Twrite __write
+
+#endif /* __FRV_LINUX_H__ */
diff --git a/gcc-4.9/gcc/config/frv/predicates.md b/gcc-4.9/gcc/config/frv/predicates.md
new file mode 100644
index 000000000..972db9991
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/predicates.md
@@ -0,0 +1,1543 @@
+;; Predicate definitions for Frv.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if operand is a GPR register.
+
+(define_predicate "integer_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_AP_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 is OP is a memory operand, or will be turned into one by
+;; reload.
+
+(define_predicate "frv_load_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (reload_in_progress)
+    {
+      rtx tmp = op;
+      if (GET_CODE (tmp) == SUBREG)
+	tmp = SUBREG_REG (tmp);
+      if (GET_CODE (tmp) == REG
+	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
+	op = reg_equiv_memory_loc (REGNO (tmp));
+    }
+
+  return op && memory_operand (op, mode);
+})
+
+;; Return true if operand is a GPR register.  Do not allow SUBREG's
+;; here, in order to prevent a combine bug.
+
+(define_predicate "gpr_no_subreg_operand"
+  (match_code "reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a GPR register or a FPR register.
+
+(define_predicate "gpr_or_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (GPR_P (regno) || FPR_P (regno) || regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return 1 if operand is a GPR register or 12-bit signed immediate.
+
+(define_predicate "gpr_or_int12_operand"
+  (match_code "reg,subreg,const_int,const")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -2048, 2047);
+
+  if (got12_operand (op, mode))
+    return true;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a GPR register, or a FPR register, or a 12
+;; bit signed immediate.
+
+(define_predicate "gpr_fpr_or_int12_operand"
+  (match_code "reg,subreg,const_int")
+{
+  int regno;
+
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -2048, 2047);
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (GPR_P (regno) || FPR_P (regno) || regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return 1 if operand is a register or 10-bit signed immediate.
+
+(define_predicate "gpr_or_int10_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -512, 511);
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a register or an integer immediate.
+
+(define_predicate "gpr_or_int_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return TRUE;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return true if operand is something that can be an input for a move
+;; operation.
+
+(define_predicate "move_source_operand"
+  (match_code "reg,subreg,const_int,mem,const_double,const,symbol_ref,label_ref")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return immediate_operand (op, mode);
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, FALSE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, FALSE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is something that can be an output for a
+;; move operation.
+
+(define_predicate "move_destination_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, FALSE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, FALSE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if we the operand is a valid destination for a movcc_fp
+;; instruction.  This means rejecting fcc_operands, since we need
+;; scratch registers to write to them.
+
+(define_predicate "movcc_fp_destination_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (fcc_operand (op, mode))
+    return FALSE;
+
+  return move_destination_operand (op, mode);
+})
+
+;; Return true if operand is something that can be an input for a
+;; conditional move operation.
+
+(define_predicate "condexec_source_operand"
+  (match_code "reg,subreg,const_int,mem,const_double")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return ZERO_P (op);
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, TRUE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, TRUE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is something that can be an output for a
+;; conditional move operation.
+
+(define_predicate "condexec_dest_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, TRUE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, TRUE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is a register of any flavor or a 0 of the
+;; appropriate type.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case REG:
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+	return FALSE;
+
+      return register_operand (op, mode);
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return ZERO_P (op);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is the link register.
+
+(define_predicate "lr_operand"
+  (match_code "reg")
+{
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (REGNO (op) != LR_REGNO && REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; Return true if operand is a gpr register or a valid memory operand.
+
+(define_predicate "gpr_or_memory_operand"
+  (match_code "reg,subreg,mem")
+{
+  return (integer_register_operand (op, mode)
+	  || frv_legitimate_memory_operand (op, mode, FALSE));
+})
+
+;; Return true if operand is a gpr register, a valid memory operand,
+;; or a memory operand that can be made valid using an additional gpr
+;; register.
+
+(define_predicate "gpr_or_memory_operand_with_scratch"
+  (match_code "reg,subreg,mem")
+{
+  rtx addr;
+
+  if (gpr_or_memory_operand (op, mode))
+    return TRUE;
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  if (GET_MODE (op) != mode)
+    return FALSE;
+
+  addr = XEXP (op, 0);
+
+  if (GET_CODE (addr) != PLUS)
+    return FALSE;
+      
+  if (!integer_register_operand (XEXP (addr, 0), Pmode))
+    return FALSE;
+
+  if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; Return true if operand is a fpr register or a valid memory
+;; operation.
+
+(define_predicate "fpr_or_memory_operand"
+  (match_code "reg,subreg,mem")
+{
+  return (fpr_operand (op, mode)
+	  || frv_legitimate_memory_operand (op, mode, FALSE));
+})
+
+;; Return 1 if operand is a 12-bit signed immediate.
+
+(define_predicate "int12_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return IN_RANGE (INTVAL (op), -2048, 2047);
+})
+
+;; Return 1 if operand is an integer constant that takes 2
+;; instructions to load up and can be split into sethi/setlo
+;; instructions..
+
+(define_predicate "int_2word_operand"
+  (match_code "const_int,const_double,symbol_ref,label_ref,const")
+{
+  HOST_WIDE_INT value;
+  REAL_VALUE_TYPE rv;
+  long l;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case LABEL_REF:
+      if (TARGET_FDPIC)
+	return FALSE;
+      
+      return (flag_pic == 0);
+
+    case CONST:
+      if (flag_pic || TARGET_FDPIC)
+	return FALSE;
+
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
+	op = XEXP (op, 0);
+      return GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF;
+
+    case SYMBOL_REF:
+      if (TARGET_FDPIC)
+	return FALSE;
+      
+      /* small data references are already 1 word */
+      return (flag_pic == 0) && (! SYMBOL_REF_SMALL_P (op));
+
+    case CONST_INT:
+      return ! IN_RANGE (INTVAL (op), -32768, 32767);
+
+    case CONST_DOUBLE:
+      if (GET_MODE (op) == SFmode)
+	{
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+	  value = l;
+	  return ! IN_RANGE (value, -32768, 32767);
+	}
+      else if (GET_MODE (op) == VOIDmode)
+	{
+	  value = CONST_DOUBLE_LOW (op);
+	  return ! IN_RANGE (value, -32768, 32767);
+	}
+      break;
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is the uClinux PIC register.
+
+(define_predicate "fdpic_operand"
+  (match_code "reg")
+{
+  if (!TARGET_FDPIC)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (REGNO (op) != FDPIC_REGNO && REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fdpic_fptr_operand"
+  (match_code "reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+  if (GET_CODE (op) != REG)
+    return FALSE;
+  if (REGNO (op) != FDPIC_FPTR_REGNO && REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return FALSE;
+  return TRUE;
+})
+
+;; An address operand that may use a pair of registers, an addressing
+;; mode that we reject in general.
+
+(define_predicate "ldd_address_operand"
+  (match_code "reg,subreg,plus")
+{
+  if (GET_MODE (op) != mode && GET_MODE (op) != VOIDmode)
+    return FALSE;
+
+  return frv_legitimate_address_p_1 (DImode, op, reload_completed, FALSE, TRUE);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "got12_operand"
+  (match_code "const")
+{
+  struct frv_unspec unspec;
+
+  if (frv_const_unspec_p (op, &unspec))
+    switch (unspec.reloc)
+      {
+      case R_FRV_GOT12:
+      case R_FRV_GOTOFF12:
+      case R_FRV_FUNCDESC_GOT12:
+      case R_FRV_FUNCDESC_GOTOFF12:
+      case R_FRV_GPREL12:
+      case R_FRV_TLSMOFF12:
+	return true;
+      }
+  return false;
+})
+
+;; Return true if OP is a valid const-unspec expression.
+
+(define_predicate "const_unspec_operand"
+  (match_code "const")
+{
+  struct frv_unspec unspec;
+
+  return frv_const_unspec_p (op, &unspec);
+})
+
+;; Return true if operand is an icc register.
+
+(define_predicate "icc_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return ICC_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is an fcc register.
+
+(define_predicate "fcc_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return FCC_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is either an fcc or icc register.
+
+(define_predicate "cc_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (CC_OR_PSEUDO_P (regno))
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operand is an integer CCR register.
+
+(define_predicate "icr_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return ICR_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is an fcc register.
+
+(define_predicate "fcr_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return FCR_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is either an fcc or icc register.
+
+(define_predicate "cr_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (CR_OR_PSEUDO_P (regno))
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operand is a FPR register.
+
+(define_predicate "fpr_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return FPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return true if operand is an even GPR or FPR register.
+
+(define_predicate "even_reg_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (GPR_P (regno))
+    return (((regno - GPR_FIRST) & 1) == 0);
+
+  if (FPR_P (regno))
+    return (((regno - FPR_FIRST) & 1) == 0);
+
+  return FALSE;
+})
+
+;; Return true if operand is an odd GPR register.
+
+(define_predicate "odd_reg_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  /* Assume that reload will give us an even register.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  if (GPR_P (regno))
+    return (((regno - GPR_FIRST) & 1) != 0);
+
+  if (FPR_P (regno))
+    return (((regno - FPR_FIRST) & 1) != 0);
+
+  return FALSE;
+})
+
+;; Return true if operand is an even GPR register.
+
+(define_predicate "even_gpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (! GPR_P (regno))
+    return FALSE;
+
+  return (((regno - GPR_FIRST) & 1) == 0);
+})
+
+;; Return true if operand is an odd GPR register.
+
+(define_predicate "odd_gpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  /* Assume that reload will give us an even register.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  if (! GPR_P (regno))
+    return FALSE;
+
+  return (((regno - GPR_FIRST) & 1) != 0);
+})
+
+;; Return true if operand is a quad aligned FPR register.
+
+(define_predicate "quad_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (! FPR_P (regno))
+    return FALSE;
+
+  return (((regno - FPR_FIRST) & 3) == 0);
+})
+
+;; Return true if operand is an even FPR register.
+
+(define_predicate "even_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (! FPR_P (regno))
+    return FALSE;
+
+  return (((regno - FPR_FIRST) & 1) == 0);
+})
+
+;; Return true if operand is an odd FPR register.
+
+(define_predicate "odd_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  /* Assume that reload will give us an even register.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  if (! FPR_P (regno))
+    return FALSE;
+
+  return (((regno - FPR_FIRST) & 1) != 0);
+})
+
+;; Return true if operand is a 2 word memory address that can be
+;; loaded in one instruction to load or store.  We assume the stack
+;; and frame pointers are suitably aligned, and variables in the small
+;; data area.  FIXME -- at some we should recognize other globals and
+;; statics. We can't assume that any old pointer is aligned, given
+;; that arguments could be passed on an odd word on the stack and the
+;; address taken and passed through to another function.
+
+(define_predicate "dbl_memory_one_insn_operand"
+  (match_code "mem")
+{
+  rtx addr;
+  rtx addr_reg;
+
+  if (! TARGET_DWORD)
+    return FALSE;
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  if (mode != VOIDmode && GET_MODE_SIZE (mode) != 2*UNITS_PER_WORD)
+    return FALSE;
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == REG)
+    addr_reg = addr;
+
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rtx addr0 = XEXP (addr, 0);
+      rtx addr1 = XEXP (addr, 1);
+
+      if (GET_CODE (addr0) != REG)
+	return FALSE;
+
+      if (got12_operand (addr1, VOIDmode))
+	return TRUE;
+
+      if (GET_CODE (addr1) != CONST_INT)
+	return FALSE;
+
+      if ((INTVAL (addr1) & 7) != 0)
+	return FALSE;
+
+      addr_reg = addr0;
+    }
+
+  else
+    return FALSE;
+
+  if (addr_reg == frame_pointer_rtx || addr_reg == stack_pointer_rtx)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operand is a 2 word memory address that needs to use
+;; two instructions to load or store.
+
+(define_predicate "dbl_memory_two_insn_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  if (mode != VOIDmode && GET_MODE_SIZE (mode) != 2*UNITS_PER_WORD)
+    return FALSE;
+
+  if (! TARGET_DWORD)
+    return TRUE;
+
+  return ! dbl_memory_one_insn_operand (op, mode);
+})
+
+;; Return true if operand is a memory reference suitable for a call.
+
+(define_predicate "call_operand"
+  (match_code "reg,subreg,const_int,const,symbol_ref")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode && GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return !TARGET_LONG_CALLS || SYMBOL_REF_LOCAL_P (op);
+
+  /* Note this doesn't allow reg+reg or reg+imm12 addressing (which should
+     never occur anyway), but prevents reload from not handling the case
+     properly of a call through a pointer on a function that calls
+     vfork/setjmp, etc. due to the need to flush all of the registers to stack.  */
+  return gpr_or_int12_operand (op, mode);
+})
+
+;; Return true if operand is a memory reference suitable for a
+;; sibcall.
+
+(define_predicate "sibcall_operand"
+  (match_code "reg,subreg,const_int,const")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode && GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  /* Note this doesn't allow reg+reg or reg+imm12 addressing (which should
+     never occur anyway), but prevents reload from not handling the case
+     properly of a call through a pointer on a function that calls
+     vfork/setjmp, etc. due to the need to flush all of the registers to stack.  */
+  return gpr_or_int12_operand (op, mode);
+})
+
+;; Return 1 if operand is an integer constant with the bottom 16 bits
+;; clear.
+
+(define_predicate "upper_int16_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return ((INTVAL (op) & 0xffff) == 0);
+})
+
+;; Return 1 if operand is a 16-bit unsigned immediate.
+
+(define_predicate "uint16_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return IN_RANGE (INTVAL (op), 0, 0xffff);
+})
+
+;; Returns 1 if OP is either a SYMBOL_REF or a constant.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,const,const_int")
+{
+  enum rtx_code c = GET_CODE (op);
+
+  if (c == CONST)
+    {
+      /* Allow (const:SI (plus:SI (symbol_ref) (const_int))).  */
+      return GET_MODE (op) == SImode
+	&& GET_CODE (XEXP (op, 0)) == PLUS
+	&& GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT;
+    }
+
+  return c == SYMBOL_REF || c == CONST_INT;
+})
+
+;; Return true if operator is a kind of relational operator.
+
+(define_predicate "relational_operator"
+  (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu")
+{
+  return (integer_relational_operator (op, mode)
+	  || float_relational_operator (op, mode));
+})
+
+;; Return true if OP is a relational operator suitable for CCmode,
+;; CC_UNSmode or CC_NZmode.
+
+(define_predicate "integer_relational_operator"
+  (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return FALSE;
+
+  /* The allowable relations depend on the mode of the ICC register.  */
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case EQ:
+    case NE:
+    case LT:
+    case GE:
+      return (GET_MODE (XEXP (op, 0)) == CC_NZmode
+	      || GET_MODE (XEXP (op, 0)) == CCmode);
+
+    case LE:
+    case GT:
+      return GET_MODE (XEXP (op, 0)) == CCmode;
+
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      return (GET_MODE (XEXP (op, 0)) == CC_NZmode
+	      || GET_MODE (XEXP (op, 0)) == CC_UNSmode);
+    }
+})
+
+;; Return true if operator is a floating point relational operator.
+
+(define_predicate "float_relational_operator"
+  (match_code "eq,ne,le,lt,ge,gt")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case EQ: case NE:
+    case LE: case LT:
+    case GE: case GT:
+#if 0
+    case UEQ: case UNE:
+    case ULE: case ULT:
+    case UGE: case UGT:
+    case ORDERED:
+    case UNORDERED:
+#endif
+      return GET_MODE (XEXP (op, 0)) == CC_FPmode;
+    }
+})
+
+;; Return true if operator is EQ/NE of a conditional execution
+;; register.
+
+(define_predicate "ccr_eqne_operator"
+  (match_code "eq,ne")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+  rtx op0;
+  rtx op1;
+  int regno;
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case EQ:
+    case NE:
+      break;
+    }
+
+  op1 = XEXP (op, 1);
+  if (op1 != const0_rtx)
+    return FALSE;
+
+  op0 = XEXP (op, 0);
+  if (GET_CODE (op0) != REG)
+    return FALSE;
+
+  regno = REGNO (op0);
+  if (op_mode == CC_CCRmode && CR_OR_PSEUDO_P (regno))
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operator is a minimum or maximum operator (both
+;; signed and unsigned).
+
+(define_predicate "minmax_operator"
+  (match_code "smin,smax,umin,umax")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case SMIN:
+    case SMAX:
+    case UMIN:
+    case UMAX:
+      break;
+    }
+
+  return TRUE;
+})
+
+;; Return true if operator is an integer binary operator that can
+;; executed conditionally and takes 1 cycle.
+
+(define_predicate "condexec_si_binary_operator"
+  (match_code "plus,minus,and,ior,xor,ashift,ashiftrt,lshiftrt")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an integer binary operator that can be
+;; executed conditionally by a media instruction.
+
+(define_predicate "condexec_si_media_operator"
+  (match_code "and,ior,xor")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case AND:
+    case IOR:
+    case XOR:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an integer division operator that can
+;; executed conditionally.
+
+(define_predicate "condexec_si_divide_operator"
+  (match_code "div,udiv")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case DIV:
+    case UDIV:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an integer unary operator that can
+;; executed conditionally.
+
+(define_predicate "condexec_si_unary_operator"
+  (match_code "not,neg")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case NEG:
+    case NOT:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an addition or subtraction
+;; expression. Such expressions can be evaluated conditionally by
+;; floating-point instructions.
+
+(define_predicate "condexec_sf_add_operator"
+  (match_code "plus,minus")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case PLUS:
+    case MINUS:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is a conversion-type expression that can be
+;; evaluated conditionally by floating-point instructions.
+
+(define_predicate "condexec_sf_conv_operator"
+  (match_code "abs,neg")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case NEG:
+    case ABS:
+      return TRUE;
+    }
+})
+
+;; Return true if OP is an integer binary operator that can be
+;; combined with a (set ... (compare:CC_NZ ...)) pattern.
+
+(define_predicate "intop_compare_operator"
+  (match_code "plus,minus,and,ior,xor,ashift,ashiftrt,lshiftrt")
+{
+  if (mode != VOIDmode && GET_MODE (op) != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      return GET_MODE (op) == SImode;
+    }
+})
+
+;; Return 1 if operand is a register or 6-bit signed immediate.
+
+(define_predicate "fpr_or_int6_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -32, 31);
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return FPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a 6-bit signed immediate.
+
+(define_predicate "int6_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return IN_RANGE (INTVAL (op), -32, 31);
+})
+
+;; Return 1 if operand is a 5-bit signed immediate.
+
+(define_predicate "int5_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), -16, 15);
+})
+
+;; Return 1 if operand is a 5-bit unsigned immediate.
+
+(define_predicate "uint5_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), 0, 31);
+})
+
+;; Return 1 if operand is a 4-bit unsigned immediate.
+
+(define_predicate "uint4_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), 0, 15);
+})
+
+;; Return 1 if operand is a 1-bit unsigned immediate (0 or 1).
+
+(define_predicate "uint1_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), 0, 1);
+})
+
+;; Return 1 if operand is a valid ACC register number.
+
+(define_predicate "acc_operand"
+  (match_code "reg,subreg")
+{
+  return ((mode == VOIDmode || mode == GET_MODE (op))
+	  && REG_P (op) && ACC_P (REGNO (op))
+	  && ((REGNO (op) - ACC_FIRST) & ~ACC_MASK) == 0);
+})
+
+;; Return 1 if operand is a valid even ACC register number.
+
+(define_predicate "even_acc_operand"
+  (match_code "reg,subreg")
+{
+  return acc_operand (op, mode) && ((REGNO (op) - ACC_FIRST) & 1) == 0;
+})
+
+;; Return 1 if operand is zero or four.
+
+(define_predicate "quad_acc_operand"
+  (match_code "reg,subreg")
+{
+  return acc_operand (op, mode) && ((REGNO (op) - ACC_FIRST) & 3) == 0;
+})
+
+;; Return 1 if operand is a valid ACCG register number.
+
+(define_predicate "accg_operand"
+  (match_code "reg,subreg")
+{
+  return ((mode == VOIDmode || mode == GET_MODE (op))
+	  && REG_P (op) && ACCG_P (REGNO (op))
+	  && ((REGNO (op) - ACCG_FIRST) & ~ACC_MASK) == 0);
+})
diff --git a/gcc-4.9/gcc/config/frv/t-frv b/gcc-4.9/gcc/config/frv/t-frv
new file mode 100644
index 000000000..d8bf8fc86
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/t-frv
@@ -0,0 +1,36 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Enable the following if multilibs are needed.
+# See gcc/genmultilib, gcc/gcc.texi and gcc/tm.texi for a
+# description of the options and their values.
+#
+#MULTILIB_OPTIONS	= mcpu=fr500/mcpu=tomcat/mcpu=simple/mcpu=frv msoft-float mdword/mno-dword
+#MULTILIB_DIRNAMES	= fr500 tomcat simple frv nof dw no-dw
+#MULTILIB_MATCHES	= mcpu?simple=mcpu?fr300 mno-double=mcpu?fr500 mcpu?frv=mdouble
+#MULTILIB_EXCEPTIONS	= *mcpu=simple/*msoft-float* *mcpu=frv/*msoft-float*
+#MULTILIB_EXTRA_OPTS	= mlibrary-pic
+
+MULTILIB_OPTIONS	= mcpu=fr400/mcpu=fr550 mno-pack mlibrary-pic/mfdpic
+MULTILIB_DIRNAMES	= fr400 fr550 unpacked pic fdpic
+MULTILIB_MATCHES	= mcpu?simple=mcpu?fr300 \
+			  mlibrary-pic=multilib-library-pic \
+			  mcpu?fr400=mcpu?fr405 mcpu?fr400=mcpu?fr450
+MULTILIB_EXCEPTIONS	= mcpu=frv/mno-pack* mcpu=simple/mno-pack*
+
+EXTRA_HEADERS = $(srcdir)/config/frv/frv-asm.h
diff --git a/gcc-4.9/gcc/config/frv/t-linux b/gcc-4.9/gcc/config/frv/t-linux
new file mode 100644
index 000000000..9426c9096
--- /dev/null
+++ b/gcc-4.9/gcc/config/frv/t-linux
@@ -0,0 +1,24 @@
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We don't want multilibs.
+MULTILIB_OPTIONS=
+MULTILIB_DIRNAMES=
+MULTILIB_MATCHES=
+MULTILIB_EXCEPTIONS=
+MULTILIB_EXTRA_OPTS=
diff --git a/gcc-4.9/gcc/config/fused-madd.opt b/gcc-4.9/gcc/config/fused-madd.opt
new file mode 100644
index 000000000..0c4c2af92
--- /dev/null
+++ b/gcc-4.9/gcc/config/fused-madd.opt
@@ -0,0 +1,24 @@
+; -mfused-madd option (some targets only).
+;
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mfused-madd
+Target Undocumented Alias(ffp-contract=, fast, off) Warn(%<-mfused-madd%> is deprecated; use %<-ffp-contract=%> instead)
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/g.opt b/gcc-4.9/gcc/config/g.opt
new file mode 100644
index 000000000..3bb104586
--- /dev/null
+++ b/gcc-4.9/gcc/config/g.opt
@@ -0,0 +1,29 @@
+; -G option (small data, some targets only).
+
+; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+G
+Target Joined Separate UInteger Var(g_switch_value)
+-G<number>	Put global and static data smaller than <number> bytes into a special section (on some targets)
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/glibc-c.c b/gcc-4.9/gcc/config/glibc-c.c
new file mode 100644
index 000000000..e2a10d823
--- /dev/null
+++ b/gcc-4.9/gcc/config/glibc-c.c
@@ -0,0 +1,37 @@
+/* C-family target hooks initializer for targets possibly using glibc.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "c-family/c-target.h"
+#include "c-family/c-target-def.h"
+
+/* Implement TARGET_C_PREINCLUDE for glibc targets.  */
+
+static const char *
+glibc_c_preinclude (void)
+{
+  return "stdc-predef.h";
+}
+
+#undef TARGET_C_PREINCLUDE
+#define TARGET_C_PREINCLUDE glibc_c_preinclude
+
+struct gcc_targetcm targetcm = TARGETCM_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/glibc-stdint.h b/gcc-4.9/gcc/config/glibc-stdint.h
new file mode 100644
index 000000000..7fa22a986
--- /dev/null
+++ b/gcc-4.9/gcc/config/glibc-stdint.h
@@ -0,0 +1,55 @@
+/* Definitions for <stdint.h> types on systems using GNU libc or uClibc.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define UINTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
diff --git a/gcc-4.9/gcc/config/gnu-user.h b/gcc-4.9/gcc/config/gnu-user.h
new file mode 100644
index 000000000..a1955a7e7
--- /dev/null
+++ b/gcc-4.9/gcc/config/gnu-user.h
@@ -0,0 +1,128 @@
+/* Definitions for systems using, at least optionally, a GNU
+   (glibc-based) userspace or other userspace with libc derived from
+   glibc (e.g. uClibc) or for which similar specs are appropriate.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Provide a STARTFILE_SPEC appropriate for GNU userspace.  Here we add
+   the GNU userspace magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+
+#if defined HAVE_LD_PIE
+#define GNU_USER_TARGET_STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s}"
+#else
+#define GNU_USER_TARGET_STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s}"
+#endif
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC
+
+/* Provide a ENDFILE_SPEC appropriate for GNU userspace.  Here we tack on
+   the GNU userspace magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU userspace "finalizer" file, `crtn.o'.  */
+
+#define GNU_USER_TARGET_ENDFILE_SPEC \
+  "%{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end_preinit.o%s; \
+     fvtable-verify=std:vtv_end.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#define GNU_USER_TARGET_CC1_SPEC "%{profile:-p}"
+#ifndef CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+#endif
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC \
+  "%{shared:-lc} \
+   %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}}"
+
+#define GNU_USER_TARGET_LIB_SPEC \
+  "%{pthread:-lpthread} " \
+  GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC
+
+#undef  LIB_SPEC
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#define TARGET_POSIX_IO
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
+/* Link -lasan early on the command line.  For -static-libasan, don't link
+   it for -shared link, the executable should be compiled with -static-libasan
+   in that case, and for executable link link with --{,no-}whole-archive around
+   it to force everything into the executable.  And similarly for -ltsan.  */
+#if defined(HAVE_LD_STATIC_DYNAMIC)
+#undef LIBASAN_EARLY_SPEC
+#define LIBASAN_EARLY_SPEC "%{!shared:libasan_preinit%O%s} " \
+  "%{static-libasan:%{!shared:" \
+  LD_STATIC_OPTION " --whole-archive -lasan --no-whole-archive " \
+  LD_DYNAMIC_OPTION "}}%{!static-libasan:-lasan}"
+#undef LIBTSAN_EARLY_SPEC
+#define LIBTSAN_EARLY_SPEC "%{static-libtsan:%{!shared:" \
+  LD_STATIC_OPTION " --whole-archive -ltsan --no-whole-archive " \
+  LD_DYNAMIC_OPTION "}}%{!static-libtsan:-ltsan}"
+#endif
diff --git a/gcc-4.9/gcc/config/gnu-user.opt b/gcc-4.9/gcc/config/gnu-user.opt
new file mode 100644
index 000000000..1a9e3cb0b
--- /dev/null
+++ b/gcc-4.9/gcc/config/gnu-user.opt
@@ -0,0 +1,38 @@
+; Options for systems using gnu-user.h.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+posix
+Driver
+
+profile
+Driver
+
+pthread
+Driver
+
+rdynamic
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/gnu.h b/gcc-4.9/gcc/config/gnu.h
new file mode 100644
index 000000000..6ce67beb7
--- /dev/null
+++ b/gcc-4.9/gcc/config/gnu.h
@@ -0,0 +1,41 @@
+/* Configuration common to all targets running the GNU system.  */
+
+/*
+Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* Provide GCC options for standard feature-test macros.  */
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+/* Default C library spec.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{pthread:-lpthread} %{pg|p|profile:-lc_p;:-lc}"
+
+#undef GNU_USER_TARGET_OS_CPP_BUILTINS
+#define GNU_USER_TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define ("__gnu_hurd__");	\
+	builtin_define ("__GNU__");		\
+	builtin_define_std ("unix");		\
+	builtin_define ("__MACH__");		\
+	builtin_assert ("system=gnu");		\
+	builtin_assert ("system=mach");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    } while (0)
diff --git a/gcc-4.9/gcc/config/h8300/constraints.md b/gcc-4.9/gcc/config/h8300/constraints.md
new file mode 100644
index 000000000..a1d295a47
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/constraints.md
@@ -0,0 +1,214 @@
+;; Constraint definitions for Renesas H8/300.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "MAC_REGS"
+  "@internal")
+
+(define_register_constraint "c" "COUNTER_REGS"
+  "@internal")
+
+;; Some patterns need to use er6 as a scratch register.  This is
+;; difficult to arrange since er6 is the frame pointer and usually can't
+;; be spilled.
+
+;; Such patterns should define two alternatives, one which allows only
+;; er6 and one which allows any general register.  The former
+;; alternative should have a 'd' constraint while the latter should be
+;; disparaged and use 'D'.
+
+;; Normally, 'd' maps to DESTINATION_REGS and 'D' maps to GENERAL_REGS.
+;; However, there are cases where they should be NO_REGS:
+
+;;   - 'd' should be NO_REGS when reloading a function that uses the
+;;     frame pointer.  In this case, DESTINATION_REGS won't contain any
+;;     spillable registers, so the first alternative can't be used.
+
+;;   - -fno-omit-frame-pointer means that the frame pointer will
+;;     always be in use.  It's therefore better to map 'd' to NO_REGS
+;;     before reload so that register allocator will pick the second
+;;     alternative.
+
+;;   - we would like 'D' to be be NO_REGS when the frame pointer isn't
+;;     live, but we the frame pointer may turn out to be needed after
+;;     we start reload, and then we may have already decided we don't
+;;     have a choice, so we can't do that.  Forcing the register
+;;     allocator to use er6 if possible might produce better code for
+;;     small functions: it's more efficient to save and restore er6 in
+;;     the prologue & epilogue than to do it in a define_split.
+;;     Hopefully disparaging 'D' will have a similar effect, without
+;;     forcing a reload failure if the frame pointer is found to be
+;;     needed too late.
+
+(define_register_constraint "d"
+  "(!flag_omit_frame_pointer && !reload_completed
+    ? NO_REGS
+    : (frame_pointer_needed && reload_in_progress
+       ? NO_REGS
+       : DESTINATION_REGS))"
+  "@internal")
+
+(define_register_constraint "D" "GENERAL_REGS"
+  "@internal")
+
+(define_register_constraint "f" "SOURCE_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "Integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "J"
+  "An integer with its low byte clear."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xff) == 0")))
+
+(define_constraint "L"
+  "1, 2 or 4 on the H8300H or S; 1 or 2 otherwise."
+  (and (match_code "const_int")
+       (if_then_else (match_test "TARGET_H8300H || TARGET_H8300S")
+		     (match_test "ival == 1 || ival == 2 || ival == 4")
+		     (match_test "ival == 1 || ival == 2"))))
+
+(define_constraint "M"
+  "Integer 1 or 2."
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 2")))
+
+(define_constraint "N"
+  "-1, -2, or -4 on the H8300H or S; -1 or -2 otherwise."
+  (and (match_code "const_int")
+       (if_then_else (match_test "TARGET_H8300H || TARGET_H8300S")
+		     (match_test "ival == -1 || ival == -2 || ival == -4")
+		     (match_test "ival == -1 || ival == -2"))))
+
+(define_constraint "O"
+  "Integer -1 or -2."
+  (and (match_code "const_int")
+       (match_test "ival == -1 || ival == -2")))
+
+(define_constraint "P1>X"
+  "A positive, non-zero integer that fits in 1 bits."
+  (and (match_code "const_int")
+       (match_test "TARGET_H8300SX")
+       (match_test "IN_RANGE (ival, 1, (1 << 1) - 1)")))
+
+(define_constraint "P3>X"
+  "A positive, non-zero integer that fits in 3 bits."
+  (and (match_code "const_int")
+       (match_test "TARGET_H8300SX")
+       (match_test "IN_RANGE (ival, 1, (1 << 3) - 1)")))
+
+(define_constraint "P4>X"
+  "A positive, non-zero integer that fits in 4 bits."
+  (and (match_code "const_int")
+       (match_test "TARGET_H8300SX")
+       (match_test "IN_RANGE (ival, 1, (1 << 4) - 1)")))
+
+(define_constraint "P5>X"
+  "A positive, non-zero integer that fits in 5 bits."
+  (and (match_code "const_int")
+       (match_test "TARGET_H8300SX")
+       (match_test "IN_RANGE (ival, 1, (1 << 5) - 1)")))
+
+(define_constraint "P8>X"
+  "A positive, non-zero integer that fits in 8 bits."
+  (and (match_code "const_int")
+       (match_test "TARGET_H8300SX")
+       (match_test "IN_RANGE (ival, 1, (1 << 8) - 1)")))
+
+(define_constraint "P3<X"
+  "A negative, non-zero integer that fits in 3 bits."
+  (and (match_code "const_int")
+       (match_test "TARGET_H8300SX")
+       (match_test "IN_RANGE (ival, (-(1 << 3)) + 1, -1)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "Single-float zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (SFmode)")))
+
+;; Extra constraints.
+(define_constraint "Q"
+  "@internal"
+  (and (match_test "TARGET_H8300SX")
+       (match_operand 0 "memory_operand")))
+
+(define_constraint "R"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "!h8300_shift_needs_scratch_p (ival, QImode)")))
+
+(define_constraint "S"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "!h8300_shift_needs_scratch_p (ival, HImode)")))
+
+(define_constraint "T"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "!h8300_shift_needs_scratch_p (ival, SImode)")))
+
+(define_constraint "U"
+  "An operand valid for a bset destination."
+  (ior (and (match_code "reg")
+	    (match_test "REG_OK_FOR_BASE_P (op)"))
+       (and (match_code "mem")
+	    (match_code "reg" "0")
+	    (match_test "REG_OK_FOR_BASE_P (XEXP (op, 0))"))
+       (and (match_code "mem")
+	    (match_code "symbol_ref" "0")
+	    (match_test "TARGET_H8300S"))
+       (and (match_code "mem")
+	    (match_code "const" "0")
+	    (match_code "plus" "00")
+	    (match_code "symbol_ref" "000")
+	    (match_code "const_int" "001")
+	    (ior (match_test "TARGET_H8300S")
+		 (match_test "SYMBOL_REF_FLAG (XEXP (XEXP (XEXP (op, 0), 0), 0))")))
+       (and (match_code "mem")
+	    (match_test "h8300_eightbit_constant_address_p (XEXP (op, 0))"))
+       (and (match_code "mem")
+	    (ior (match_test "TARGET_H8300S")
+		 (match_test "TARGET_H8300SX"))
+	    (match_code "const_int" "0"))))
+
+(define_memory_constraint "WU"
+  "@internal"
+  (and (match_code "mem")
+       (match_test "satisfies_constraint_U (op)")))
+
+(define_constraint "Y0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "exact_log2 (~ival & 0xff) != -1")))
+
+(define_constraint "Y2"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "exact_log2 (ival & 0xff) != -1")))
+
+(define_constraint "Z"
+  "@internal"
+  (and (match_test "TARGET_H8300SX")
+       (match_code "mem")
+       (match_test "CONSTANT_P (XEXP (op, 0))")))
diff --git a/gcc-4.9/gcc/config/h8300/elf.h b/gcc-4.9/gcc/config/h8300/elf.h
new file mode 100644
index 000000000..97e844ba2
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/elf.h
@@ -0,0 +1,44 @@
+/* Definitions of target machine for GNU compiler.
+   Renesas H8/300 version generating elf
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_H8300_ELF_H
+#define GCC_H8300_ELF_H
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s %{pg:gcrtn.o%s}%{!pg:crtn.o%s}"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			%{pg:gcrti.o%s}%{!pg:crti.o%s} \
+			crtbegin.o%s"
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{mh:%{mn:-m h8300hnelf}} %{mh:%{!mn:-m h8300helf}} %{ms:%{mn:-m h8300snelf}} %{ms:%{!mn:-m h8300self}} %{msx:%{mn:-m h8300sxnelf;:-m h8300sxelf}}"
+
+#endif /* h8300/elf.h */
diff --git a/gcc-4.9/gcc/config/h8300/genmova.sh b/gcc-4.9/gcc/config/h8300/genmova.sh
new file mode 100644
index 000000000..aebc37139
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/genmova.sh
@@ -0,0 +1,198 @@
+#!/bin/sh
+# Generate mova.md, a file containing patterns that can be implemented
+# using the h8sx mova instruction.
+
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically from genmova.sh"
+echo ";; Copyright (C) 2004-2014 Free Software Foundation, Inc."
+echo ";;"
+echo ";; This file is part of GCC."
+echo ";;"
+echo ";; GCC is free software; you can redistribute it and/or modify"
+echo ";; it under the terms of the GNU General Public License as published by"
+echo ";; the Free Software Foundation; either version 3, or (at your option)"
+echo ";; any later version."
+echo ";;"
+echo ";; GCC is distributed in the hope that it will be useful,"
+echo ";; but WITHOUT ANY WARRANTY; without even the implied warranty of"
+echo ";; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
+echo ";; GNU General Public License for more details."
+echo ";;"
+echo ";; You should have received a copy of the GNU General Public License"
+echo ";; along with GCC; see the file COPYING3.  If not see"
+echo ";; <http://www.gnu.org/licenses/>."
+
+# Loop over modes for the source operand (the index).  Only 8-bit and
+# 16-bit indices are allowed.
+for s in QI HI; do
+
+  # Set $src to the operand syntax for this size of index.
+  case $s in
+    QI) src=%X1.b;;
+    HI) src=%T1.w;;
+  esac
+
+  # A match_operand for the source.
+  operand="(match_operand:$s 1 \"h8300_dst_operand\" \"0,rQ\")"
+
+  # Loop over the destination register's mode.  The QI and HI versions use
+  # the same instructions as the SI ones, they just ignore the upper bits
+  # of the result.
+  for d in QI HI SI; do
+
+    # If the destination is larger than the source, include a
+    # zero_extend/plus pattern.  We could also match zero extensions
+    # of memory without the plus, but it's not any smaller or faster
+    # than separate insns.
+    case $d:$s in
+      SI:QI | SI:HI | HI:QI)
+	cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	(plus:$d (zero_extend:$d $operand)
+		 (match_operand:$d 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2%C2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+	;;
+    esac
+
+    # Loop over the shift amount.
+    for shift in 1 2; do
+      case $shift in
+	1) opsize=w mult=2;;
+	2) opsize=l mult=4;;
+      esac
+
+      # Calculate the mask of bits that will be nonzero after the source
+      # has been extended and shifted.
+      case $s:$shift in
+	QI:1) mask=510;;
+	QI:2) mask=1020;;
+	HI:1) mask=131070;;
+	HI:2) mask=262140;;
+      esac
+
+      # There doesn't seem to be a well-established canonical form for
+      # some of the patterns we need.  Emit both shift and multiplication
+      # patterns.
+      for form in mult ashift; do
+	case $form in
+	  mult) amount=$mult;;
+	  ashift) amount=$shift;;
+	esac
+
+	case $d:$s in
+	  # If the source and destination are the same size, we can treat
+	  # mova as a sort of multiply-add instruction.
+	  QI:QI | HI:HI)
+	    cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	(plus:$d ($form:$d $operand
+			   (const_int $amount))
+		 (match_operand:$d 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(%o2%C2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+	    ;;
+
+	  # Handle the cases where the source is smaller than the
+	  # destination.  Sometimes combine will keep the extension,
+	  # sometimes it will use an AND.
+	  SI:QI | SI:HI | HI:QI)
+
+	    # Emit the forms that use zero_extend.
+	    cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	($form:$d (zero_extend:$d $operand)
+		  (const_int $amount)))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(0,$src),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	(plus:$d ($form:$d (zero_extend:$d $operand)
+			   (const_int $amount))
+		 (match_operand:$d 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(%o2%C2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+
+	    # Now emit the forms that use AND.  When the index is a register,
+	    # these forms are effectively $d-mode operations: the index will
+	    # be a $d-mode REG or SUBREG.  When the index is a memory
+	    # location, we will have a paradoxical subreg such as:
+	    #
+	    #	(and:SI (mult:SI (subreg:SI (mem:QI ...) 0)
+	    #			 (const_int 4))
+	    #		(const_int 1020))
+	    #
+	    # Match the two case separately: a $d-mode register_operand
+	    # or a $d-mode subreg of an $s-mode memory_operand.  Match the
+	    # memory form first since register_operand accepts mem subregs
+	    # before reload.
+	    memory="(match_operand:$s 1 \"memory_operand\" \"m\")"
+	    memory="(subreg:$d $memory 0)"
+	    register="(match_operand:$d 1 \"register_operand\" \"0\")"
+	    for paradoxical in "$memory" "$register"; do
+	      cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r")
+	(and:$d ($form:$d $paradoxical
+			  (const_int $amount))
+		(const_int $mask)))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(0,$src),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r")
+	(plus:$d (and:$d ($form:$d $paradoxical
+				   (const_int $amount))
+			 (const_int $mask))
+		 (match_operand:$d 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(%o2%C2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+	      done
+	    ;;
+	esac
+      done
+    done
+  done
+done
diff --git a/gcc-4.9/gcc/config/h8300/h8300-protos.h b/gcc-4.9/gcc/config/h8300/h8300-protos.h
new file mode 100644
index 000000000..1af2cc664
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/h8300-protos.h
@@ -0,0 +1,118 @@
+/* Definitions of target machine for GNU compiler.
+   Renesas H8/300 version
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_H8300_PROTOS_H
+#define GCC_H8300_PROTOS_H
+
+/* Declarations for functions used in insn-output.c.  */
+#ifdef RTX_CODE
+extern unsigned int compute_mov_length (rtx *);
+extern const char *output_plussi (rtx *);
+extern unsigned int compute_plussi_length (rtx *);
+extern const char *output_a_shift (rtx *);
+extern unsigned int compute_a_shift_length (rtx, rtx *);
+extern const char *output_a_rotate (enum rtx_code, rtx *);
+extern unsigned int compute_a_rotate_length (rtx *);
+extern const char *output_simode_bld (int, rtx[]);
+extern void final_prescan_insn (rtx, rtx *, int);
+extern int h8300_expand_movsi (rtx[]);
+extern void notice_update_cc (rtx, rtx);
+extern const char *output_logical_op (enum machine_mode, rtx *);
+extern unsigned int compute_logical_op_length (enum machine_mode,
+					       rtx *);
+#ifdef HAVE_ATTR_cc
+extern enum attr_cc compute_plussi_cc (rtx *);
+extern enum attr_cc compute_a_shift_cc (rtx, rtx *);
+extern enum attr_cc compute_logical_op_cc (enum machine_mode, rtx *);
+#endif
+extern void h8300_expand_branch (rtx[]);
+extern void h8300_expand_store (rtx[]);
+extern bool expand_a_shift (enum machine_mode, enum rtx_code, rtx[]);
+extern int h8300_shift_needs_scratch_p (int, enum machine_mode);
+extern int expand_a_rotate (rtx[]);
+extern int fix_bit_operand (rtx *, enum rtx_code);
+extern int h8300_adjust_insn_length (rtx, int);
+extern void split_adds_subs (enum machine_mode, rtx[]);
+
+extern int h8300_eightbit_constant_address_p (rtx);
+extern int h8300_tiny_constant_address_p (rtx);
+extern int byte_accesses_mergeable_p (rtx, rtx);
+extern int same_cmp_preceding_p (rtx);
+extern int same_cmp_following_p (rtx);
+
+/* Used in builtins.c */
+extern rtx h8300_return_addr_rtx (int, rtx);
+
+/* Classifies an h8sx shift operation.
+
+   H8SX_SHIFT_NONE
+	The shift cannot be done in a single instruction.
+
+   H8SX_SHIFT_UNARY
+	The shift is effectively a unary operation.  The instruction will
+	allow any sort of destination operand and have a format similar
+	to neg and not.  This is true of certain power-of-2 shifts.
+
+   H8SX_SHIFT_BINARY
+	The shift is a binary operation.  The destination must be a
+	register and the source can be a register or a constant.  */
+enum h8sx_shift_type {
+  H8SX_SHIFT_NONE,
+  H8SX_SHIFT_UNARY,
+  H8SX_SHIFT_BINARY
+};
+
+extern enum h8sx_shift_type h8sx_classify_shift (enum machine_mode, enum rtx_code, rtx);
+extern int h8300_ldm_stm_parallel (rtvec, int, int);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern int h8300_funcvec_function_p (tree);
+extern int h8300_eightbit_data_p (tree);
+extern int h8300_tiny_data_p (tree);
+#endif /* TREE_CODE */
+
+extern int h8300_can_use_return_insn_p (void);
+extern void h8300_expand_prologue (void);
+extern void h8300_expand_epilogue (void);
+extern int h8300_current_function_interrupt_function_p (void);
+extern int h8300_current_function_monitor_function_p (void);
+extern int h8300_initial_elimination_offset (int, int);
+extern int h8300_regs_ok_for_stm (int, rtx[]);
+extern int h8300_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int h8300_hard_regno_nregs (int, enum machine_mode);
+extern int h8300_hard_regno_mode_ok (int, enum machine_mode);
+extern bool h8300_move_ok (rtx, rtx);
+
+struct cpp_reader;
+extern void h8300_pr_interrupt (struct cpp_reader *);
+extern void h8300_pr_saveall (struct cpp_reader *);
+extern enum reg_class  h8300_reg_class_from_letter (int);
+extern unsigned int    h8300_insn_length_from_table (rtx, rtx *);
+extern const char *    output_h8sx_shift (rtx *, int, int);
+extern bool            h8300_operands_match_p (rtx *);
+extern bool            h8sx_mergeable_memrefs_p (rtx, rtx);
+extern bool            h8sx_emit_movmd (rtx, rtx, rtx, HOST_WIDE_INT);
+extern void            h8300_swap_into_er6 (rtx);
+extern void            h8300_swap_out_of_er6 (rtx);
+
+#endif /* ! GCC_H8300_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/h8300/h8300.c b/gcc-4.9/gcc/config/h8300/h8300.c
new file mode 100644
index 000000000..e7ed03a66
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/h8300.c
@@ -0,0 +1,6102 @@
+/* Subroutines for insn-output.c for Renesas H8/300.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "function.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "c-family/c-pragma.h"	/* ??? */
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* Classifies a h8300_src_operand or h8300_dst_operand.
+
+   H8OP_IMMEDIATE
+	A constant operand of some sort.
+
+   H8OP_REGISTER
+	An ordinary register.
+
+   H8OP_MEM_ABSOLUTE
+	A memory reference with a constant address.
+
+   H8OP_MEM_BASE
+	A memory reference with a register as its address.
+
+   H8OP_MEM_COMPLEX
+	Some other kind of memory reference.  */
+enum h8300_operand_class
+{
+  H8OP_IMMEDIATE,
+  H8OP_REGISTER,
+  H8OP_MEM_ABSOLUTE,
+  H8OP_MEM_BASE,
+  H8OP_MEM_COMPLEX,
+  NUM_H8OPS
+};
+
+/* For a general two-operand instruction, element [X][Y] gives
+   the length of the opcode fields when the first operand has class
+   (X + 1) and the second has class Y.  */
+typedef unsigned char h8300_length_table[NUM_H8OPS - 1][NUM_H8OPS];
+
+/* Forward declarations.  */
+static const char *byte_reg (rtx, int);
+static int h8300_interrupt_function_p (tree);
+static int h8300_saveall_function_p (tree);
+static int h8300_monitor_function_p (tree);
+static int h8300_os_task_function_p (tree);
+static void h8300_emit_stack_adjustment (int, HOST_WIDE_INT, bool);
+static HOST_WIDE_INT round_frame_size (HOST_WIDE_INT);
+static unsigned int compute_saved_regs (void);
+static const char *cond_string (enum rtx_code);
+static unsigned int h8300_asm_insn_count (const char *);
+static tree h8300_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree h8300_handle_eightbit_data_attribute (tree *, tree, tree, int, bool *);
+static tree h8300_handle_tiny_data_attribute (tree *, tree, tree, int, bool *);
+static void h8300_print_operand_address (FILE *, rtx);
+static void h8300_print_operand (FILE *, rtx, int);
+static bool h8300_print_operand_punct_valid_p (unsigned char code);
+#ifndef OBJECT_FORMAT_ELF
+static void h8300_asm_named_section (const char *, unsigned int, tree);
+#endif
+static int h8300_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int h8300_and_costs (rtx);
+static int h8300_shift_costs (rtx);
+static void          h8300_push_pop               (int, int, bool, bool);
+static int           h8300_stack_offset_p         (rtx, int);
+static int           h8300_ldm_stm_regno          (rtx, int, int, int);
+static void          h8300_reorg                  (void);
+static unsigned int  h8300_constant_length        (rtx);
+static unsigned int  h8300_displacement_length    (rtx, int);
+static unsigned int  h8300_classify_operand       (rtx, int, enum h8300_operand_class *);
+static unsigned int  h8300_length_from_table      (rtx, rtx, const h8300_length_table *);
+static unsigned int  h8300_unary_length           (rtx);
+static unsigned int  h8300_short_immediate_length (rtx);
+static unsigned int  h8300_bitfield_length        (rtx, rtx);
+static unsigned int  h8300_binary_length          (rtx, const h8300_length_table *);
+static bool          h8300_short_move_mem_p       (rtx, enum rtx_code);
+static unsigned int  h8300_move_length            (rtx *, const h8300_length_table *);
+static bool	     h8300_hard_regno_scratch_ok  (unsigned int);
+static rtx	     h8300_get_index (rtx, enum machine_mode mode, int *);
+
+/* CPU_TYPE, says what cpu we're compiling for.  */
+int cpu_type;
+
+/* True if a #pragma interrupt has been seen for the current function.  */
+static int pragma_interrupt;
+
+/* True if a #pragma saveall has been seen for the current function.  */
+static int pragma_saveall;
+
+static const char *const names_big[] =
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7" };
+
+static const char *const names_extended[] =
+{ "er0", "er1", "er2", "er3", "er4", "er5", "er6", "er7" };
+
+static const char *const names_upper_extended[] =
+{ "e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7" };
+
+/* Points to one of the above.  */
+/* ??? The above could be put in an array indexed by CPU_TYPE.  */
+const char * const *h8_reg_names;
+
+/* Various operations needed by the following, indexed by CPU_TYPE.  */
+
+const char *h8_push_op, *h8_pop_op, *h8_mov_op;
+
+/* Value of MOVE_RATIO.  */
+int h8300_move_ratio;
+
+/* See below where shifts are handled for explanation of this enum.  */
+
+enum shift_alg
+{
+  SHIFT_INLINE,
+  SHIFT_ROT_AND,
+  SHIFT_SPECIAL,
+  SHIFT_LOOP
+};
+
+/* Symbols of the various shifts which can be used as indices.  */
+
+enum shift_type
+{
+  SHIFT_ASHIFT, SHIFT_LSHIFTRT, SHIFT_ASHIFTRT
+};
+
+/* Macros to keep the shift algorithm tables small.  */
+#define INL SHIFT_INLINE
+#define ROT SHIFT_ROT_AND
+#define LOP SHIFT_LOOP
+#define SPC SHIFT_SPECIAL
+
+/* The shift algorithms for each machine, mode, shift type, and shift
+   count are defined below.  The three tables below correspond to
+   QImode, HImode, and SImode, respectively.  Each table is organized
+   by, in the order of indices, machine, shift type, and shift count.  */
+
+static enum shift_alg shift_alg_qi[3][3][8] = {
+  {
+    /* TARGET_H8300  */
+    /* 0    1    2    3    4    5    6    7  */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC }  /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300H  */
+    /* 0    1    2    3    4    5    6    7  */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC }  /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300S  */
+    /*  0    1    2    3    4    5    6    7  */
+    { INL, INL, INL, INL, INL, INL, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, SPC }  /* SHIFT_ASHIFTRT */
+  }
+};
+
+static enum shift_alg shift_alg_hi[3][3][16] = {
+  {
+    /* TARGET_H8300  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300H  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300S  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */
+  }
+};
+
+static enum shift_alg shift_alg_si[3][3][32] = {
+  {
+    /* TARGET_H8300  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    /* 16   17   18   19   20   21   22   23  */
+    /* 24   25   26   27   28   29   30   31  */
+    { INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, LOP, LOP, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, LOP, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300H  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    /* 16   17   18   19   20   21   22   23  */
+    /* 24   25   26   27   28   29   30   31  */
+    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300S  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    /* 16   17   18   19   20   21   22   23  */
+    /* 24   25   26   27   28   29   30   31  */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      INL, INL, INL, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, LOP, LOP,
+      SPC, SPC, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      INL, INL, INL, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, LOP, LOP,
+      SPC, SPC, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, SPC, LOP, LOP,
+      SPC, SPC, LOP, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+  }
+};
+
+#undef INL
+#undef ROT
+#undef LOP
+#undef SPC
+
+enum h8_cpu
+{
+  H8_300,
+  H8_300H,
+  H8_S
+};
+
+/* Initialize various cpu specific globals at start up.  */
+
+static void
+h8300_option_override (void)
+{
+  static const char *const h8_push_ops[2] = { "push" , "push.l" };
+  static const char *const h8_pop_ops[2]  = { "pop"  , "pop.l"  };
+  static const char *const h8_mov_ops[2]  = { "mov.w", "mov.l"  };
+
+#ifndef OBJECT_FORMAT_ELF
+  if (TARGET_H8300SX)
+    {
+      error ("-msx is not supported in coff");
+      target_flags |= MASK_H8300S;
+    }
+#endif
+
+  if (TARGET_H8300)
+    {
+      cpu_type = (int) CPU_H8300;
+      h8_reg_names = names_big;
+    }
+  else
+    {
+      /* For this we treat the H8/300H and H8S the same.  */
+      cpu_type = (int) CPU_H8300H;
+      h8_reg_names = names_extended;
+    }
+  h8_push_op = h8_push_ops[cpu_type];
+  h8_pop_op = h8_pop_ops[cpu_type];
+  h8_mov_op = h8_mov_ops[cpu_type];
+
+  if (!TARGET_H8300S && TARGET_MAC)
+    {
+      error ("-ms2600 is used without -ms");
+      target_flags |= MASK_H8300S_1;
+    }
+
+  if (TARGET_H8300 && TARGET_NORMAL_MODE)
+    {
+      error ("-mn is used without -mh or -ms or -msx");
+      target_flags ^= MASK_NORMAL_MODE;
+    }
+
+  if (! TARGET_H8300S &&  TARGET_EXR)
+    {
+      error ("-mexr is used without -ms");
+      target_flags |= MASK_H8300S_1;
+    }
+
+  if (TARGET_H8300 && TARGET_INT32)
+   {
+      error ("-mint32 is not supported for H8300 and H8300L targets");
+      target_flags ^= MASK_INT32;
+   }
+
+ if ((!TARGET_H8300S  &&  TARGET_EXR) && (!TARGET_H8300SX && TARGET_EXR))
+   {
+      error ("-mexr is used without -ms or -msx");
+      target_flags |= MASK_H8300S_1;
+   }
+
+ if ((!TARGET_H8300S  &&  TARGET_NEXR) && (!TARGET_H8300SX && TARGET_NEXR))
+   {
+      warning (OPT_mno_exr, "-mno-exr valid only with -ms or -msx    \
+               - Option ignored!");
+   }
+
+  /* Some of the shifts are optimized for speed by default.
+     See http://gcc.gnu.org/ml/gcc-patches/2002-07/msg01858.html
+     If optimizing for size, change shift_alg for those shift to
+     SHIFT_LOOP.  */
+  if (optimize_size)
+    {
+      /* H8/300 */
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][6] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][14] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300][SHIFT_LSHIFTRT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_LSHIFTRT][14] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300][SHIFT_ASHIFTRT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
+
+      /* H8/300H */
+      shift_alg_hi[H8_300H][SHIFT_ASHIFT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFT][6] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300H][SHIFT_LSHIFTRT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_LSHIFTRT][6] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][6] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
+
+      /* H8S */
+      shift_alg_hi[H8_S][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
+    }
+
+  /* Work out a value for MOVE_RATIO.  */
+  if (!TARGET_H8300SX)
+    {
+      /* Memory-memory moves are quite expensive without the
+	 h8sx instructions.  */
+      h8300_move_ratio = 3;
+    }
+  else if (flag_omit_frame_pointer)
+    {
+      /* movmd sequences are fairly cheap when er6 isn't fixed.  They can
+	 sometimes be as short as two individual memory-to-memory moves,
+	 but since they use all the call-saved registers, it seems better
+	 to allow up to three moves here.  */
+      h8300_move_ratio = 4;
+    }
+  else if (optimize_size)
+    {
+      /* In this case we don't use movmd sequences since they tend
+	 to be longer than calls to memcpy().  Memory-to-memory
+	 moves are cheaper than for !TARGET_H8300SX, so it makes
+	 sense to have a slightly higher threshold.  */
+      h8300_move_ratio = 4;
+    }
+  else
+    {
+      /* We use movmd sequences for some moves since it can be quicker
+	 than calling memcpy().  The sequences will need to save and
+	 restore er6 though, so bump up the cost.  */
+      h8300_move_ratio = 6;
+    }
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+}
+
+/* Return the byte register name for a register rtx X.  B should be 0
+   if you want a lower byte register.  B should be 1 if you want an
+   upper byte register.  */
+
+static const char *
+byte_reg (rtx x, int b)
+{
+  static const char *const names_small[] = {
+    "r0l", "r0h", "r1l", "r1h", "r2l", "r2h", "r3l", "r3h",
+    "r4l", "r4h", "r5l", "r5h", "r6l", "r6h", "r7l", "r7h"
+  };
+
+  gcc_assert (REG_P (x));
+
+  return names_small[REGNO (x) * 2 + b];
+}
+
+/* REGNO must be saved/restored across calls if this macro is true.  */
+
+#define WORD_REG_USED(regno)						\
+  (regno < SP_REG							\
+   /* No need to save registers if this function will not return.  */	\
+   && ! TREE_THIS_VOLATILE (current_function_decl)			\
+   && (h8300_saveall_function_p (current_function_decl)			\
+       /* Save any call saved register that was used.  */		\
+       || (df_regs_ever_live_p (regno) && !call_used_regs[regno])	\
+       /* Save the frame pointer if it was used.  */			\
+       || (regno == HARD_FRAME_POINTER_REGNUM && df_regs_ever_live_p (regno)) \
+       /* Save any register used in an interrupt handler.  */		\
+       || (h8300_current_function_interrupt_function_p ()		\
+	   && df_regs_ever_live_p (regno))				\
+       /* Save call clobbered registers in non-leaf interrupt		\
+	  handlers.  */							\
+       || (h8300_current_function_interrupt_function_p ()		\
+	   && call_used_regs[regno]					\
+	   && !crtl->is_leaf)))
+
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x, bool set_it)
+{
+  if (set_it)
+    RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Mark all the subexpressions of the PARALLEL rtx PAR as
+   frame-related.  Return PAR.
+
+   dwarf2out.c:dwarf2out_frame_debug_expr ignores sub-expressions of a
+   PARALLEL rtx other than the first if they do not have the
+   FRAME_RELATED flag set on them.  */
+static rtx
+Fpa (rtx par)
+{
+  int len = XVECLEN (par, 0);
+  int i;
+
+  for (i = 0; i < len; i++)
+    F (XVECEXP (par, 0, i), true);
+
+  return par;
+}
+
+/* Output assembly language to FILE for the operation OP with operand size
+   SIZE to adjust the stack pointer.  */
+
+static void
+h8300_emit_stack_adjustment (int sign, HOST_WIDE_INT size, bool in_prologue)
+{
+  /* If the frame size is 0, we don't have anything to do.  */
+  if (size == 0)
+    return;
+
+  /* H8/300 cannot add/subtract a large constant with a single
+     instruction.  If a temporary register is available, load the
+     constant to it and then do the addition.  */
+  if (TARGET_H8300
+      && size > 4
+      && !h8300_current_function_interrupt_function_p ()
+      && !(cfun->static_chain_decl != NULL && sign < 0))
+    {
+      rtx r3 = gen_rtx_REG (Pmode, 3);
+      F (emit_insn (gen_movhi (r3, GEN_INT (sign * size))), in_prologue);
+      F (emit_insn (gen_addhi3 (stack_pointer_rtx,
+				stack_pointer_rtx, r3)), in_prologue);
+    }
+  else
+    {
+      /* The stack adjustment made here is further optimized by the
+	 splitter.  In case of H8/300, the splitter always splits the
+	 addition emitted here to make the adjustment interrupt-safe.
+	 FIXME: We don't always tag those, because we don't know what
+	 the splitter will do.  */
+      if (Pmode == HImode)
+	{
+	  rtx x = emit_insn (gen_addhi3 (stack_pointer_rtx,
+					 stack_pointer_rtx, GEN_INT (sign * size)));
+	  if (size < 4)
+	    F (x, in_prologue);
+	}
+      else
+	F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				  stack_pointer_rtx, GEN_INT (sign * size))), in_prologue);
+    }
+}
+
+/* Round up frame size SIZE.  */
+
+static HOST_WIDE_INT
+round_frame_size (HOST_WIDE_INT size)
+{
+  return ((size + STACK_BOUNDARY / BITS_PER_UNIT - 1)
+	  & -STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Compute which registers to push/pop.
+   Return a bit vector of registers.  */
+
+static unsigned int
+compute_saved_regs (void)
+{
+  unsigned int saved_regs = 0;
+  int regno;
+
+  /* Construct a bit vector of registers to be pushed/popped.  */
+  for (regno = 0; regno <= HARD_FRAME_POINTER_REGNUM; regno++)
+    {
+      if (WORD_REG_USED (regno))
+	saved_regs |= 1 << regno;
+    }
+
+  /* Don't push/pop the frame pointer as it is treated separately.  */
+  if (frame_pointer_needed)
+    saved_regs &= ~(1 << HARD_FRAME_POINTER_REGNUM);
+
+  return saved_regs;
+}
+
+/* Emit an insn to push register RN.  */
+
+static rtx
+push (int rn)
+{
+  rtx reg = gen_rtx_REG (word_mode, rn);
+  rtx x;
+
+  if (TARGET_H8300)
+    x = gen_push_h8300 (reg);
+  else if (!TARGET_NORMAL_MODE)
+    x = gen_push_h8300hs_advanced (reg);
+  else
+    x = gen_push_h8300hs_normal (reg);
+  x = F (emit_insn (x), true);
+  add_reg_note (x, REG_INC, stack_pointer_rtx);
+  return x;
+}
+
+/* Emit an insn to pop register RN.  */
+
+static rtx
+pop (int rn)
+{
+  rtx reg = gen_rtx_REG (word_mode, rn);
+  rtx x;
+
+  if (TARGET_H8300)
+    x = gen_pop_h8300 (reg);
+  else if (!TARGET_NORMAL_MODE)
+    x = gen_pop_h8300hs_advanced (reg);
+  else
+    x = gen_pop_h8300hs_normal (reg);
+  x = emit_insn (x);
+  add_reg_note (x, REG_INC, stack_pointer_rtx);
+  return x;
+}
+
+/* Emit an instruction to push or pop NREGS consecutive registers
+   starting at register REGNO.  POP_P selects a pop rather than a
+   push and RETURN_P is true if the instruction should return.
+
+   It must be possible to do the requested operation in a single
+   instruction.  If NREGS == 1 && !RETURN_P, use a normal push
+   or pop insn.  Otherwise emit a parallel of the form:
+
+     (parallel
+       [(return)  ;; if RETURN_P
+	(save or restore REGNO)
+	(save or restore REGNO + 1)
+	...
+	(save or restore REGNO + NREGS - 1)
+	(set sp (plus sp (const_int adjust)))]  */
+
+static void
+h8300_push_pop (int regno, int nregs, bool pop_p, bool return_p)
+{
+  int i, j;
+  rtvec vec;
+  rtx sp, offset, x;
+
+  /* See whether we can use a simple push or pop.  */
+  if (!return_p && nregs == 1)
+    {
+      if (pop_p)
+	pop (regno);
+      else
+	push (regno);
+      return;
+    }
+
+  /* We need one element for the return insn, if present, one for each
+     register, and one for stack adjustment.  */
+  vec = rtvec_alloc ((return_p ? 1 : 0) + nregs + 1);
+  sp = stack_pointer_rtx;
+  i = 0;
+
+  /* Add the return instruction.  */
+  if (return_p)
+    {
+      RTVEC_ELT (vec, i) = ret_rtx;
+      i++;
+    }
+
+  /* Add the register moves.  */
+  for (j = 0; j < nregs; j++)
+    {
+      rtx lhs, rhs;
+
+      if (pop_p)
+	{
+	  /* Register REGNO + NREGS - 1 is popped first.  Before the
+	     stack adjustment, its slot is at address @sp.  */
+	  lhs = gen_rtx_REG (SImode, regno + j);
+	  rhs = gen_rtx_MEM (SImode, plus_constant (Pmode, sp,
+						    (nregs - j - 1) * 4));
+	}
+      else
+	{
+	  /* Register REGNO is pushed first and will be stored at @(-4,sp).  */
+	  lhs = gen_rtx_MEM (SImode, plus_constant (Pmode, sp, (j + 1) * -4));
+	  rhs = gen_rtx_REG (SImode, regno + j);
+	}
+      RTVEC_ELT (vec, i + j) = gen_rtx_SET (VOIDmode, lhs, rhs);
+    }
+
+  /* Add the stack adjustment.  */
+  offset = GEN_INT ((pop_p ? nregs : -nregs) * 4);
+  RTVEC_ELT (vec, i + j) = gen_rtx_SET (VOIDmode, sp,
+					gen_rtx_PLUS (Pmode, sp, offset));
+
+  x = gen_rtx_PARALLEL (VOIDmode, vec);
+  if (!pop_p)
+    x = Fpa (x);
+
+  if (return_p)
+    emit_jump_insn (x);
+  else
+    emit_insn (x);
+}
+
+/* Return true if X has the value sp + OFFSET.  */
+
+static int
+h8300_stack_offset_p (rtx x, int offset)
+{
+  if (offset == 0)
+    return x == stack_pointer_rtx;
+
+  return (GET_CODE (x) == PLUS
+	  && XEXP (x, 0) == stack_pointer_rtx
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) == offset);
+}
+
+/* A subroutine of h8300_ldm_stm_parallel.  X is one pattern in
+   something that may be an ldm or stm instruction.  If it fits
+   the required template, return the register it loads or stores,
+   otherwise return -1.
+
+   LOAD_P is true if X should be a load, false if it should be a store.
+   NREGS is the number of registers that the whole instruction is expected
+   to load or store.  INDEX is the index of the register that X should
+   load or store, relative to the lowest-numbered register.  */
+
+static int
+h8300_ldm_stm_regno (rtx x, int load_p, int index, int nregs)
+{
+  int regindex, memindex, offset;
+
+  if (load_p)
+    regindex = 0, memindex = 1, offset = (nregs - index - 1) * 4;
+  else
+    memindex = 0, regindex = 1, offset = (index + 1) * -4;
+
+  if (GET_CODE (x) == SET
+      && GET_CODE (XEXP (x, regindex)) == REG
+      && GET_CODE (XEXP (x, memindex)) == MEM
+      && h8300_stack_offset_p (XEXP (XEXP (x, memindex), 0), offset))
+    return REGNO (XEXP (x, regindex));
+
+  return -1;
+}
+
+/* Return true if the elements of VEC starting at FIRST describe an
+   ldm or stm instruction (LOAD_P says which).  */
+
+int
+h8300_ldm_stm_parallel (rtvec vec, int load_p, int first)
+{
+  rtx last;
+  int nregs, i, regno, adjust;
+
+  /* There must be a stack adjustment, a register move, and at least one
+     other operation (a return or another register move).  */
+  if (GET_NUM_ELEM (vec) < 3)
+    return false;
+
+  /* Get the range of registers to be pushed or popped.  */
+  nregs = GET_NUM_ELEM (vec) - first - 1;
+  regno = h8300_ldm_stm_regno (RTVEC_ELT (vec, first), load_p, 0, nregs);
+
+  /* Check that the call to h8300_ldm_stm_regno succeeded and
+     that we're only dealing with GPRs.  */
+  if (regno < 0 || regno + nregs > 8)
+    return false;
+
+  /* 2-register h8s instructions must start with an even-numbered register.
+     3- and 4-register instructions must start with er0 or er4.  */
+  if (!TARGET_H8300SX)
+    {
+      if ((regno & 1) != 0)
+	return false;
+      if (nregs > 2 && (regno & 3) != 0)
+	return false;
+    }
+
+  /* Check the other loads or stores.  */
+  for (i = 1; i < nregs; i++)
+    if (h8300_ldm_stm_regno (RTVEC_ELT (vec, first + i), load_p, i, nregs)
+	!= regno + i)
+      return false;
+
+  /* Check the stack adjustment.  */
+  last = RTVEC_ELT (vec, first + nregs);
+  adjust = (load_p ? nregs : -nregs) * 4;
+  return (GET_CODE (last) == SET
+	  && SET_DEST (last) == stack_pointer_rtx
+	  && h8300_stack_offset_p (SET_SRC (last), adjust));
+}
+
+/* This is what the stack looks like after the prolog of
+   a function with a frame has been set up:
+
+   <args>
+   PC
+   FP			<- fp
+   <locals>
+   <saved registers>	<- sp
+
+   This is what the stack looks like after the prolog of
+   a function which doesn't have a frame:
+
+   <args>
+   PC
+   <locals>
+   <saved registers>	<- sp
+*/
+
+/* Generate RTL code for the function prologue.  */
+
+void
+h8300_expand_prologue (void)
+{
+  int regno;
+  int saved_regs;
+  int n_regs;
+
+  /* If the current function has the OS_Task attribute set, then
+     we have a naked prologue.  */
+  if (h8300_os_task_function_p (current_function_decl))
+    return;
+
+  if (h8300_monitor_function_p (current_function_decl))
+ /* The monitor function act as normal functions, which means it
+    can accept parameters and return values. In addition to this, 
+    interrupts are masked in prologue and return with "rte" in epilogue. */
+    emit_insn (gen_monitor_prologue ());
+
+  if (frame_pointer_needed)
+    {
+      /* Push fp.  */
+      push (HARD_FRAME_POINTER_REGNUM);
+      F (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx), true);
+    }
+
+  /* Push the rest of the registers in ascending order.  */
+  saved_regs = compute_saved_regs ();
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno += n_regs)
+    {
+      n_regs = 1;
+      if (saved_regs & (1 << regno))
+	{
+	  if (TARGET_H8300S)
+	    {
+	      /* See how many registers we can push at the same time.  */
+	      if ((!TARGET_H8300SX || (regno & 3) == 0)
+		  && ((saved_regs >> regno) & 0x0f) == 0x0f)
+		n_regs = 4;
+
+	      else if ((!TARGET_H8300SX || (regno & 3) == 0)
+		       && ((saved_regs >> regno) & 0x07) == 0x07)
+		n_regs = 3;
+
+	      else if ((!TARGET_H8300SX || (regno & 1) == 0)
+		       && ((saved_regs >> regno) & 0x03) == 0x03)
+		n_regs = 2;
+	    }
+
+	  h8300_push_pop (regno, n_regs, false, false);
+	}
+    }
+
+  /* Leave room for locals.  */
+  h8300_emit_stack_adjustment (-1, round_frame_size (get_frame_size ()), true);
+}
+
+/* Return nonzero if we can use "rts" for the function currently being
+   compiled.  */
+
+int
+h8300_can_use_return_insn_p (void)
+{
+  return (reload_completed
+	  && !frame_pointer_needed
+	  && get_frame_size () == 0
+	  && compute_saved_regs () == 0);
+}
+
+/* Generate RTL code for the function epilogue.  */
+
+void
+h8300_expand_epilogue (void)
+{
+  int regno;
+  int saved_regs;
+  int n_regs;
+  HOST_WIDE_INT frame_size;
+  bool returned_p;
+
+  if (h8300_os_task_function_p (current_function_decl))
+    /* OS_Task epilogues are nearly naked -- they just have an
+       rts instruction.  */
+    return;
+
+  frame_size = round_frame_size (get_frame_size ());
+  returned_p = false;
+
+  /* Deallocate locals.  */
+  h8300_emit_stack_adjustment (1, frame_size, false);
+
+  /* Pop the saved registers in descending order.  */
+  saved_regs = compute_saved_regs ();
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno -= n_regs)
+    {
+      n_regs = 1;
+      if (saved_regs & (1 << regno))
+	{
+	  if (TARGET_H8300S)
+	    {
+	      /* See how many registers we can pop at the same time.  */
+	      if ((TARGET_H8300SX || (regno & 3) == 3)
+		  && ((saved_regs << 3 >> regno) & 0x0f) == 0x0f)
+		n_regs = 4;
+
+	      else if ((TARGET_H8300SX || (regno & 3) == 2)
+		       && ((saved_regs << 2 >> regno) & 0x07) == 0x07)
+		n_regs = 3;
+
+	      else if ((TARGET_H8300SX || (regno & 1) == 1)
+		       && ((saved_regs << 1 >> regno) & 0x03) == 0x03)
+		n_regs = 2;
+	    }
+
+	  /* See if this pop would be the last insn before the return.
+	     If so, use rte/l or rts/l instead of pop or ldm.l.  */
+	  if (TARGET_H8300SX
+	      && !frame_pointer_needed
+	      && frame_size == 0
+	      && (saved_regs & ((1 << (regno - n_regs + 1)) - 1)) == 0)
+	    returned_p = true;
+
+	  h8300_push_pop (regno - n_regs + 1, n_regs, true, returned_p);
+	}
+    }
+
+  /* Pop frame pointer if we had one.  */
+  if (frame_pointer_needed)
+    {
+      if (TARGET_H8300SX)
+	returned_p = true;
+      h8300_push_pop (HARD_FRAME_POINTER_REGNUM, 1, true, returned_p);
+    }
+
+  if (!returned_p)
+    emit_jump_insn (ret_rtx);
+}
+
+/* Return nonzero if the current function is an interrupt
+   function.  */
+
+int
+h8300_current_function_interrupt_function_p (void)
+{
+  return (h8300_interrupt_function_p (current_function_decl));
+}
+
+int
+h8300_current_function_monitor_function_p ()
+{
+  return (h8300_monitor_function_p (current_function_decl));
+}
+
+/* Output assembly code for the start of the file.  */
+
+static void
+h8300_file_start (void)
+{
+  default_file_start ();
+
+  if (TARGET_H8300H)
+    fputs (TARGET_NORMAL_MODE ? "\t.h8300hn\n" : "\t.h8300h\n", asm_out_file);
+  else if (TARGET_H8300SX)
+    fputs (TARGET_NORMAL_MODE ? "\t.h8300sxn\n" : "\t.h8300sx\n", asm_out_file);
+  else if (TARGET_H8300S)
+    fputs (TARGET_NORMAL_MODE ? "\t.h8300sn\n" : "\t.h8300s\n", asm_out_file);
+}
+
+/* Output assembly language code for the end of file.  */
+
+static void
+h8300_file_end (void)
+{
+  fputs ("\t.end\n", asm_out_file);
+}
+
+/* Split an add of a small constant into two adds/subs insns.
+
+   If USE_INCDEC_P is nonzero, we generate the last insn using inc/dec
+   instead of adds/subs.  */
+
+void
+split_adds_subs (enum machine_mode mode, rtx *operands)
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+  rtx reg = operands[0];
+  HOST_WIDE_INT sign = 1;
+  HOST_WIDE_INT amount;
+  rtx (*gen_add) (rtx, rtx, rtx);
+
+  /* Force VAL to be positive so that we do not have to consider the
+     sign.  */
+  if (val < 0)
+    {
+      val = -val;
+      sign = -1;
+    }
+
+  switch (mode)
+    {
+    case HImode:
+      gen_add = gen_addhi3;
+      break;
+
+    case SImode:
+      gen_add = gen_addsi3;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Try different amounts in descending order.  */
+  for (amount = (TARGET_H8300H || TARGET_H8300S) ? 4 : 2;
+       amount > 0;
+       amount /= 2)
+    {
+      for (; val >= amount; val -= amount)
+	emit_insn (gen_add (reg, reg, GEN_INT (sign * amount)));
+    }
+
+  return;
+}
+
+/* Handle machine specific pragmas for compatibility with existing
+   compilers for the H8/300.
+
+   pragma saveall generates prologue/epilogue code which saves and
+   restores all the registers on function entry.
+
+   pragma interrupt saves and restores all registers, and exits with
+   an rte instruction rather than an rts.  A pointer to a function
+   with this attribute may be safely used in an interrupt vector.  */
+
+void
+h8300_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  pragma_interrupt = 1;
+}
+
+void
+h8300_pr_saveall (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  pragma_saveall = 1;
+}
+
+/* If the next function argument with MODE and TYPE is to be passed in
+   a register, return a reg RTX for the hard register in which to pass
+   the argument.  CUM represents the state after the last argument.
+   If the argument is to be pushed, NULL_RTX is returned.
+
+   On the H8/300 all normal args are pushed, unless -mquickcall in which
+   case the first 3 arguments are passed in registers.  */
+
+static rtx
+h8300_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  static const char *const hand_list[] = {
+    "__main",
+    "__cmpsi2",
+    "__divhi3",
+    "__modhi3",
+    "__udivhi3",
+    "__umodhi3",
+    "__divsi3",
+    "__modsi3",
+    "__udivsi3",
+    "__umodsi3",
+    "__mulhi3",
+    "__mulsi3",
+    "__reg_memcpy",
+    "__reg_memset",
+    "__ucmpsi2",
+    0,
+  };
+
+  rtx result = NULL_RTX;
+  const char *fname;
+  int regpass = 0;
+
+  /* Never pass unnamed arguments in registers.  */
+  if (!named)
+    return NULL_RTX;
+
+  /* Pass 3 regs worth of data in regs when user asked on the command line.  */
+  if (TARGET_QUICKCALL)
+    regpass = 3;
+
+  /* If calling hand written assembler, use 4 regs of args.  */
+  if (cum->libcall)
+    {
+      const char * const *p;
+
+      fname = XSTR (cum->libcall, 0);
+
+      /* See if this libcall is one of the hand coded ones.  */
+      for (p = hand_list; *p && strcmp (*p, fname) != 0; p++)
+	;
+
+      if (*p)
+	regpass = 4;
+    }
+
+  if (regpass)
+    {
+      int size;
+
+      if (mode == BLKmode)
+	size = int_size_in_bytes (type);
+      else
+	size = GET_MODE_SIZE (mode);
+
+      if (size + cum->nbytes <= regpass * UNITS_PER_WORD
+	  && cum->nbytes / UNITS_PER_WORD <= 3)
+	result = gen_rtx_REG (mode, cum->nbytes / UNITS_PER_WORD);
+    }
+
+  return result;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+h8300_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  cum->nbytes += (mode != BLKmode
+		  ? (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD
+		  : (int_size_in_bytes (type) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD);
+}
+
+
+/* Implements TARGET_REGISTER_MOVE_COST.
+
+   Any SI register-to-register move may need to be reloaded,
+   so inmplement h8300_register_move_cost to return > 2 so that reload never
+   shortcuts.  */
+
+static int
+h8300_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                         reg_class_t from, reg_class_t to)
+{
+  if (from == MAC_REGS || to == MAC_REG)
+    return 6;
+  else
+    return 3;
+}
+
+/* Compute the cost of an and insn.  */
+
+static int
+h8300_and_costs (rtx x)
+{
+  rtx operands[4];
+
+  if (GET_MODE (x) == QImode)
+    return 1;
+
+  if (GET_MODE (x) != HImode
+      && GET_MODE (x) != SImode)
+    return 100;
+
+  operands[0] = NULL;
+  operands[1] = XEXP (x, 0);
+  operands[2] = XEXP (x, 1);
+  operands[3] = x;
+  return compute_logical_op_length (GET_MODE (x), operands) / 2;
+}
+
+/* Compute the cost of a shift insn.  */
+
+static int
+h8300_shift_costs (rtx x)
+{
+  rtx operands[4];
+
+  if (GET_MODE (x) != QImode
+      && GET_MODE (x) != HImode
+      && GET_MODE (x) != SImode)
+    return 100;
+
+  operands[0] = NULL;
+  operands[1] = NULL;
+  operands[2] = XEXP (x, 1);
+  operands[3] = x;
+  return compute_a_shift_length (NULL, operands) / 2;
+}
+
+/* Worker function for TARGET_RTX_COSTS.  */
+
+static bool
+h8300_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		 int *total, bool speed)
+{
+  if (TARGET_H8300SX && outer_code == MEM)
+    {
+      /* Estimate the number of execution states needed to calculate
+	 the address.  */
+      if (register_operand (x, VOIDmode)
+	  || GET_CODE (x) == POST_INC
+	  || GET_CODE (x) == POST_DEC
+	  || CONSTANT_P (x))
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      {
+	HOST_WIDE_INT n = INTVAL (x);
+
+	if (TARGET_H8300SX)
+	  {
+	    /* Constant operands need the same number of processor
+	       states as register operands.  Although we could try to
+	       use a size-based cost for !speed, the lack of
+	       of a mode makes the results very unpredictable.  */
+	    *total = 0;
+	    return true;
+	  }
+	if (-4 <= n && n <= 4)
+	  {
+	    switch ((int) n)
+	      {
+	      case 0:
+		*total = 0;
+		return true;
+	      case 1:
+	      case 2:
+	      case -1:
+	      case -2:
+		*total = 0 + (outer_code == SET);
+		return true;
+	      case 4:
+	      case -4:
+		if (TARGET_H8300H || TARGET_H8300S)
+		  *total = 0 + (outer_code == SET);
+		else
+		  *total = 1;
+		return true;
+	      }
+	  }
+	*total = 1;
+	return true;
+      }
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_H8300SX)
+	{
+	  /* See comment for CONST_INT.  */
+	  *total = 0;
+	  return true;
+	}
+      *total = 3;
+      return true;
+
+    case CONST_DOUBLE:
+      *total = 20;
+      return true;
+
+    case COMPARE:
+      if (XEXP (x, 1) == const0_rtx)
+	*total = 0;
+      return false;
+
+    case AND:
+      if (!h8300_dst_operand (XEXP (x, 0), VOIDmode)
+	  || !h8300_src_operand (XEXP (x, 1), VOIDmode))
+	return false;
+      *total = COSTS_N_INSNS (h8300_and_costs (x));
+      return true;
+
+    /* We say that MOD and DIV are so expensive because otherwise we'll
+       generate some really horrible code for division of a power of two.  */
+    case MOD:
+    case DIV:
+    case UMOD:
+    case UDIV:
+      if (TARGET_H8300SX)
+	switch (GET_MODE (x))
+	  {
+	  case QImode:
+	  case HImode:
+	    *total = COSTS_N_INSNS (!speed ? 4 : 10);
+	    return false;
+
+	  case SImode:
+	    *total = COSTS_N_INSNS (!speed ? 4 : 18);
+	    return false;
+
+	  default:
+	    break;
+	  }
+      *total = COSTS_N_INSNS (12);
+      return true;
+
+    case MULT:
+      if (TARGET_H8300SX)
+	switch (GET_MODE (x))
+	  {
+	  case QImode:
+	  case HImode:
+	    *total = COSTS_N_INSNS (2);
+	    return false;
+
+	  case SImode:
+	    *total = COSTS_N_INSNS (5);
+	    return false;
+
+	  default:
+	    break;
+	  }
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (h8sx_binary_shift_operator (x, VOIDmode))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+      else if (h8sx_unary_shift_operator (x, VOIDmode))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      *total = COSTS_N_INSNS (h8300_shift_costs (x));
+      return true;
+
+    case ROTATE:
+    case ROTATERT:
+      if (GET_MODE (x) == HImode)
+	*total = 2;
+      else
+	*total = 8;
+      return true;
+
+    default:
+      *total = COSTS_N_INSNS (1);
+      return false;
+    }
+}
+
+/* Documentation for the machine specific operand escapes:
+
+   'E' like s but negative.
+   'F' like t but negative.
+   'G' constant just the negative
+   'R' print operand as a byte:8 address if appropriate, else fall back to
+       'X' handling.
+   'S' print operand as a long word
+   'T' print operand as a word
+   'V' find the set bit, and print its number.
+   'W' find the clear bit, and print its number.
+   'X' print operand as a byte
+   'Y' print either l or h depending on whether last 'Z' operand < 8 or >= 8.
+       If this operand isn't a register, fall back to 'R' handling.
+   'Z' print int & 7.
+   'c' print the opcode corresponding to rtl
+   'e' first word of 32-bit value - if reg, then least reg. if mem
+       then least. if const then most sig word
+   'f' second word of 32-bit value - if reg, then biggest reg. if mem
+       then +2. if const then least sig word
+   'j' print operand as condition code.
+   'k' print operand as reverse condition code.
+   'm' convert an integer operand to a size suffix (.b, .w or .l)
+   'o' print an integer without a leading '#'
+   's' print as low byte of 16-bit value
+   't' print as high byte of 16-bit value
+   'w' print as low byte of 32-bit value
+   'x' print as 2nd byte of 32-bit value
+   'y' print as 3rd byte of 32-bit value
+   'z' print as msb of 32-bit value
+*/
+
+/* Return assembly language string which identifies a comparison type.  */
+
+static const char *
+cond_string (enum rtx_code code)
+{
+  switch (code)
+    {
+    case NE:
+      return "ne";
+    case EQ:
+      return "eq";
+    case GE:
+      return "ge";
+    case GT:
+      return "gt";
+    case LE:
+      return "le";
+    case LT:
+      return "lt";
+    case GEU:
+      return "hs";
+    case GTU:
+      return "hi";
+    case LEU:
+      return "ls";
+    case LTU:
+      return "lo";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Print operand X using operand code CODE to assembly language output file
+   FILE.  */
+
+static void
+h8300_print_operand (FILE *file, rtx x, int code)
+{
+  /* This is used for communication between codes V,W,Z and Y.  */
+  static int bitint;
+
+  switch (code)
+    {
+    case 'C':
+      if (h8300_constant_length (x) == 2)
+       fprintf (file, ":16");
+      else
+       fprintf (file, ":32");
+      return;
+    case 'E':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, "%sl", names_big[REGNO (x)]);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", (-INTVAL (x)) & 0xff);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'F':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, "%sh", names_big[REGNO (x)]);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", ((-INTVAL (x)) & 0xff00) >> 8);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'G':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, "#%ld", 0xff & (-INTVAL (x)));
+      break;
+    case 'S':
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", names_extended[REGNO (x)]);
+      else
+	goto def;
+      break;
+    case 'T':
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", names_big[REGNO (x)]);
+      else
+	goto def;
+      break;
+    case 'V':
+      bitint = (INTVAL (x) & 0xffff);
+      if ((exact_log2 ((bitint >> 8) & 0xff)) == -1)
+	bitint = exact_log2 (bitint & 0xff);
+      else
+        bitint = exact_log2 ((bitint >> 8) & 0xff);	      
+      gcc_assert (bitint >= 0);
+      fprintf (file, "#%d", bitint);
+      break;
+    case 'W':
+      bitint = ((~INTVAL (x)) & 0xffff);
+      if ((exact_log2 ((bitint >> 8) & 0xff)) == -1 )
+	bitint = exact_log2 (bitint & 0xff);
+      else
+	bitint = (exact_log2 ((bitint >> 8) & 0xff));      
+      gcc_assert (bitint >= 0);
+      fprintf (file, "#%d", bitint);
+      break;
+    case 'R':
+    case 'X':
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", byte_reg (x, 0));
+      else
+	goto def;
+      break;
+    case 'Y':
+      gcc_assert (bitint >= 0);
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s%c", names_big[REGNO (x)], bitint > 7 ? 'h' : 'l');
+      else
+	h8300_print_operand (file, x, 'R');
+      bitint = -1;
+      break;
+    case 'Z':
+      bitint = INTVAL (x);
+      fprintf (file, "#%d", bitint & 7);
+      break;
+    case 'c':
+      switch (GET_CODE (x))
+	{
+	case IOR:
+	  fprintf (file, "or");
+	  break;
+	case XOR:
+	  fprintf (file, "xor");
+	  break;
+	case AND:
+	  fprintf (file, "and");
+	  break;
+	default:
+	  break;
+	}
+      break;
+    case 'e':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (TARGET_H8300)
+	    fprintf (file, "%s", names_big[REGNO (x)]);
+	  else
+	    fprintf (file, "%s", names_upper_extended[REGNO (x)]);
+	  break;
+	case MEM:
+	  h8300_print_operand (file, x, 0);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", ((INTVAL (x) >> 16) & 0xffff));
+	  break;
+	case CONST_DOUBLE:
+	  {
+	    long val;
+	    REAL_VALUE_TYPE rv;
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "#%ld", ((val >> 16) & 0xffff));
+	    break;
+	  }
+	default:
+	  gcc_unreachable ();
+	  break;
+	}
+      break;
+    case 'f':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (TARGET_H8300)
+	    fprintf (file, "%s", names_big[REGNO (x) + 1]);
+	  else
+	    fprintf (file, "%s", names_big[REGNO (x)]);
+	  break;
+	case MEM:
+	  x = adjust_address (x, HImode, 2);
+	  h8300_print_operand (file, x, 0);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", INTVAL (x) & 0xffff);
+	  break;
+	case CONST_DOUBLE:
+	  {
+	    long val;
+	    REAL_VALUE_TYPE rv;
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "#%ld", (val & 0xffff));
+	    break;
+	  }
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'j':
+      fputs (cond_string (GET_CODE (x)), file);
+      break;
+    case 'k':
+      fputs (cond_string (reverse_condition (GET_CODE (x))), file);
+      break;
+    case 'm':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      switch (INTVAL (x))
+	{
+	case 1:
+	  fputs (".b", file);
+	  break;
+
+	case 2:
+	  fputs (".w", file);
+	  break;
+
+	case 4:
+	  fputs (".l", file);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'o':
+      h8300_print_operand_address (file, x);
+      break;
+    case 's':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x)) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 0));
+      break;
+    case 't':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 8) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 1));
+      break;
+    case 'w':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", INTVAL (x) & 0xff);
+      else
+	fprintf (file, "%s",
+		 byte_reg (x, TARGET_H8300 ? 2 : 0));
+      break;
+    case 'x':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 8) & 0xff);
+      else
+	fprintf (file, "%s",
+		 byte_reg (x, TARGET_H8300 ? 3 : 1));
+      break;
+    case 'y':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 16) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 0));
+      break;
+    case 'z':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 24) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 1));
+      break;
+
+    default:
+    def:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  switch (GET_MODE (x))
+	    {
+	    case QImode:
+#if 0 /* Is it asm ("mov.b %0,r2l", ...) */
+	      fprintf (file, "%s", byte_reg (x, 0));
+#else /* ... or is it asm ("mov.b %0l,r2l", ...) */
+	      fprintf (file, "%s", names_big[REGNO (x)]);
+#endif
+	      break;
+	    case HImode:
+	      fprintf (file, "%s", names_big[REGNO (x)]);
+	      break;
+	    case SImode:
+	    case SFmode:
+	      fprintf (file, "%s", names_extended[REGNO (x)]);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	case MEM:
+	  {
+	    rtx addr = XEXP (x, 0);
+
+	    fprintf (file, "@");
+	    output_address (addr);
+
+	    /* Add a length suffix to constant addresses.  Although this
+	       is often unnecessary, it helps to avoid ambiguity in the
+	       syntax of mova.  If we wrote an insn like:
+
+		    mova/w.l @(1,@foo.b),er0
+
+	       then .b would be considered part of the symbol name.
+	       Adding a length after foo will avoid this.  */
+	    if (CONSTANT_P (addr))
+	      switch (code)
+		{
+		case 'R':
+		  /* Used for mov.b and bit operations.  */
+		  if (h8300_eightbit_constant_address_p (addr))
+		    {
+		      fprintf (file, ":8");
+		      break;
+		    }
+
+		  /* Fall through.  We should not get here if we are
+		     processing bit operations on H8/300 or H8/300H
+		     because 'U' constraint does not allow bit
+		     operations on the tiny area on these machines.  */
+
+		case 'X':
+		case 'T':
+		case 'S':
+		  if (h8300_constant_length (addr) == 2)
+		    fprintf (file, ":16");
+		  else
+		    fprintf (file, ":32");
+		  break;
+		default:
+		  break;
+		}
+	  }
+	  break;
+
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	  fprintf (file, "#");
+	  h8300_print_operand_address (file, x);
+	  break;
+	case CONST_DOUBLE:
+	  {
+	    long val;
+	    REAL_VALUE_TYPE rv;
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "#%ld", val);
+	    break;
+	  }
+	default:
+	  break;
+	}
+    }
+}
+
+/* Implements TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+h8300_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '#');
+}
+
+/* Output assembly language output for the address ADDR to FILE.  */
+
+static void
+h8300_print_operand_address (FILE *file, rtx addr)
+{
+  rtx index;
+  int size;
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "%s", h8_reg_names[REGNO (addr)]);
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "-%s", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (file, "%s+", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PRE_INC:
+      fprintf (file, "+%s", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_DEC:
+      fprintf (file, "%s-", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PLUS:
+      fprintf (file, "(");
+
+      index = h8300_get_index (XEXP (addr, 0), VOIDmode, &size);
+      if (GET_CODE (index) == REG)
+	{
+	  /* reg,foo */
+	  h8300_print_operand_address (file, XEXP (addr, 1));
+	  fprintf (file, ",");
+	  switch (size)
+	    {
+	    case 0:
+	      h8300_print_operand_address (file, index);
+	      break;
+
+	    case 1:
+	      h8300_print_operand (file, index, 'X');
+	      fputs (".b", file);
+	      break;
+
+	    case 2:
+	      h8300_print_operand (file, index, 'T');
+	      fputs (".w", file);
+	      break;
+
+	    case 4:
+	      h8300_print_operand (file, index, 'S');
+	      fputs (".l", file);
+	      break;
+	    }
+	  /* h8300_print_operand_address (file, XEXP (addr, 0)); */
+	}
+      else
+	{
+	  /* foo+k */
+	  h8300_print_operand_address (file, XEXP (addr, 0));
+	  fprintf (file, "+");
+	  h8300_print_operand_address (file, XEXP (addr, 1));
+	}
+      fprintf (file, ")");
+      break;
+
+    case CONST_INT:
+      {
+	/* Since the H8/300 only has 16-bit pointers, negative values are also
+	   those >= 32768.  This happens for example with pointer minus a
+	   constant.  We don't want to turn (char *p - 2) into
+	   (char *p + 65534) because loop unrolling can build upon this
+	   (IE: char *p + 131068).  */
+	int n = INTVAL (addr);
+	if (TARGET_H8300)
+	  n = (int) (short) n;
+	fprintf (file, "%d", n);
+	break;
+      }
+
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Output all insn addresses and their sizes into the assembly language
+   output file.  This is helpful for debugging whether the length attributes
+   in the md file are correct.  This is not meant to be a user selectable
+   option.  */
+
+void
+final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+		    int num_operands ATTRIBUTE_UNUSED)
+{
+  /* This holds the last insn address.  */
+  static int last_insn_address = 0;
+
+  const int uid = INSN_UID (insn);
+
+  if (TARGET_ADDRESSES)
+    {
+      fprintf (asm_out_file, "; 0x%x %d\n", INSN_ADDRESSES (uid),
+	       INSN_ADDRESSES (uid) - last_insn_address);
+      last_insn_address = INSN_ADDRESSES (uid);
+    }
+}
+
+/* Prepare for an SI sized move.  */
+
+int
+h8300_expand_movsi (rtx operands[])
+{
+  rtx src = operands[1];
+  rtx dst = operands[0];
+  if (!reload_in_progress && !reload_completed)
+    {
+      if (!register_operand (dst, GET_MODE (dst)))
+	{
+	  rtx tmp = gen_reg_rtx (GET_MODE (dst));
+	  emit_move_insn (tmp, src);
+	  operands[1] = tmp;
+	}
+    }
+  return 0;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   For the h8300, if frame pointer elimination is being done, we would like to
+   convert ap and rp into sp, not fp.
+
+   All other eliminations are valid.  */
+
+static bool
+h8300_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Conditionally modify register usage based on target flags.  */
+
+static void
+h8300_conditional_register_usage (void)
+{
+  if (!TARGET_MAC)
+    fixed_regs[MAC_REG] = call_used_regs[MAC_REG] = 1;
+}
+
+/* Function for INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET).
+   Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+h8300_initial_elimination_offset (int from, int to)
+{
+  /* The number of bytes that the return address takes on the stack.  */
+  int pc_size = POINTER_SIZE / BITS_PER_UNIT;
+
+  /* The number of bytes that the saved frame pointer takes on the stack.  */
+  int fp_size = frame_pointer_needed * UNITS_PER_WORD;
+
+  /* The number of bytes that the saved registers, excluding the frame
+     pointer, take on the stack.  */
+  int saved_regs_size = 0;
+
+  /* The number of bytes that the locals takes on the stack.  */
+  int frame_size = round_frame_size (get_frame_size ());
+
+  int regno;
+
+  for (regno = 0; regno <= HARD_FRAME_POINTER_REGNUM; regno++)
+    if (WORD_REG_USED (regno))
+      saved_regs_size += UNITS_PER_WORD;
+
+  /* Adjust saved_regs_size because the above loop took the frame
+     pointer int account.  */
+  saved_regs_size -= fp_size;
+
+  switch (to)
+    {
+    case HARD_FRAME_POINTER_REGNUM:
+      switch (from)
+	{
+	case ARG_POINTER_REGNUM:
+	  return pc_size + fp_size;
+	case RETURN_ADDRESS_POINTER_REGNUM:
+	  return fp_size;
+	case FRAME_POINTER_REGNUM:
+	  return -saved_regs_size;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case STACK_POINTER_REGNUM:
+      switch (from)
+	{
+	case ARG_POINTER_REGNUM:
+	  return pc_size + saved_regs_size + frame_size;
+	case RETURN_ADDRESS_POINTER_REGNUM:
+	  return saved_regs_size + frame_size;
+	case FRAME_POINTER_REGNUM:
+	  return frame_size;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Worker function for RETURN_ADDR_RTX.  */
+
+rtx
+h8300_return_addr_rtx (int count, rtx frame)
+{
+  rtx ret;
+
+  if (count == 0)
+    ret = gen_rtx_MEM (Pmode,
+		       gen_rtx_REG (Pmode, RETURN_ADDRESS_POINTER_REGNUM));
+  else if (flag_omit_frame_pointer)
+    return (rtx) 0;
+  else
+    ret = gen_rtx_MEM (Pmode,
+		       memory_address (Pmode,
+				       plus_constant (Pmode, frame,
+						      UNITS_PER_WORD)));
+  set_mem_alias_set (ret, get_frame_alias_set ());
+  return ret;
+}
+
+/* Update the condition code from the insn.  */
+
+void
+notice_update_cc (rtx body, rtx insn)
+{
+  rtx set;
+
+  switch (get_attr_cc (insn))
+    {
+    case CC_NONE:
+      /* Insn does not affect CC at all.  */
+      break;
+
+    case CC_NONE_0HIT:
+      /* Insn does not change CC, but the 0'th operand has been changed.  */
+      if (cc_status.value1 != 0
+	  && reg_overlap_mentioned_p (recog_data.operand[0], cc_status.value1))
+	cc_status.value1 = 0;
+      if (cc_status.value2 != 0
+	  && reg_overlap_mentioned_p (recog_data.operand[0], cc_status.value2))
+	cc_status.value2 = 0;
+      break;
+
+    case CC_SET_ZN:
+      /* Insn sets the Z,N flags of CC to recog_data.operand[0].
+	 The V flag is unusable.  The C flag may or may not be known but
+	 that's ok because alter_cond will change tests to use EQ/NE.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
+      set = single_set (insn);
+      cc_status.value1 = SET_SRC (set);
+      if (SET_DEST (set) != cc0_rtx)
+	cc_status.value2 = SET_DEST (set);
+      break;
+
+    case CC_SET_ZNV:
+      /* Insn sets the Z,N,V flags of CC to recog_data.operand[0].
+	 The C flag may or may not be known but that's ok because
+	 alter_cond will change tests to use EQ/NE.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_NO_CARRY;
+      set = single_set (insn);
+      cc_status.value1 = SET_SRC (set);
+      if (SET_DEST (set) != cc0_rtx)
+	{
+	  /* If the destination is STRICT_LOW_PART, strip off
+	     STRICT_LOW_PART.  */
+	  if (GET_CODE (SET_DEST (set)) == STRICT_LOW_PART)
+	    cc_status.value2 = XEXP (SET_DEST (set), 0);
+	  else
+	    cc_status.value2 = SET_DEST (set);
+	}
+      break;
+
+    case CC_COMPARE:
+      /* The insn is a compare instruction.  */
+      CC_STATUS_INIT;
+      cc_status.value1 = SET_SRC (body);
+      break;
+
+    case CC_CLOBBER:
+      /* Insn doesn't leave CC in a usable state.  */
+      CC_STATUS_INIT;
+      break;
+    }
+}
+
+/* Given that X occurs in an address of the form (plus X constant),
+   return the part of X that is expected to be a register.  There are
+   four kinds of addressing mode to recognize:
+
+	@(dd,Rn)
+	@(dd,RnL.b)
+	@(dd,Rn.w)
+	@(dd,ERn.l)
+
+   If SIZE is nonnull, and the address is one of the last three forms,
+   set *SIZE to the index multiplication factor.  Set it to 0 for
+   plain @(dd,Rn) addresses.
+
+   MODE is the mode of the value being accessed.  It can be VOIDmode
+   if the address is known to be valid, but its mode is unknown.  */
+
+static rtx
+h8300_get_index (rtx x, enum machine_mode mode, int *size)
+{
+  int dummy, factor;
+
+  if (size == 0)
+    size = &dummy;
+
+  factor = (mode == VOIDmode ? 0 : GET_MODE_SIZE (mode));
+  if (TARGET_H8300SX
+      && factor <= 4
+      && (mode == VOIDmode
+	  || GET_MODE_CLASS (mode) == MODE_INT
+	  || GET_MODE_CLASS (mode) == MODE_FLOAT))
+    {
+      if (factor <= 1 && GET_CODE (x) == ZERO_EXTEND)
+	{
+	  /* When accessing byte-sized values, the index can be
+	     a zero-extended QImode or HImode register.  */
+	  *size = GET_MODE_SIZE (GET_MODE (XEXP (x, 0)));
+	  return XEXP (x, 0);
+	}
+      else
+	{
+	  /* We're looking for addresses of the form:
+
+		 (mult X I)
+	      or (mult (zero_extend X) I)
+
+	     where I is the size of the operand being accessed.
+	     The canonical form of the second expression is:
+
+		 (and (mult (subreg X) I) J)
+
+	     where J == GET_MODE_MASK (GET_MODE (X)) * I.  */
+	  rtx index;
+
+	  if (GET_CODE (x) == AND
+	      && GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && (factor == 0
+		  || INTVAL (XEXP (x, 1)) == 0xff * factor
+		  || INTVAL (XEXP (x, 1)) == 0xffff * factor))
+	    {
+	      index = XEXP (x, 0);
+	      *size = (INTVAL (XEXP (x, 1)) >= 0xffff ? 2 : 1);
+	    }
+	  else
+	    {
+	      index = x;
+	      *size = 4;
+	    }
+
+	  if (GET_CODE (index) == MULT
+	      && GET_CODE (XEXP (index, 1)) == CONST_INT
+	      && (factor == 0 || factor == INTVAL (XEXP (index, 1))))
+	    return XEXP (index, 0);
+	}
+    }
+  *size = 0;
+  return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+   On the H8/300, the predecrement and postincrement address depend thus
+   (the amount of decrement or increment being the length of the operand).  */
+
+static bool
+h8300_mode_dependent_address_p (const_rtx addr,
+				addr_space_t as ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (addr) == PLUS
+      && h8300_get_index (XEXP (addr, 0), VOIDmode, 0) != XEXP (addr, 0))
+    return true;
+
+  return false;
+}
+
+static const h8300_length_table addb_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* add.b xx,Rd  */
+  {  4,   4,   4,   4,   6  }, /* add.b xx,@aa */
+  {  4,   4,   4,   4,   6  }, /* add.b xx,@Rd */
+  {  6,   4,   4,   4,   6  }  /* add.b xx,@xx */
+};
+
+static const h8300_length_table addw_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* add.w xx,Rd  */
+  {  4,   4,   4,   4,   6  }, /* add.w xx,@aa */
+  {  4,   4,   4,   4,   6  }, /* add.w xx,@Rd */
+  {  4,   4,   4,   4,   6  }  /* add.w xx,@xx */
+};
+
+static const h8300_length_table addl_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* add.l xx,Rd  */
+  {  4,   4,   6,   6,   6  }, /* add.l xx,@aa */
+  {  4,   4,   6,   6,   6  }, /* add.l xx,@Rd */
+  {  4,   4,   6,   6,   6  }  /* add.l xx,@xx */
+};
+
+#define logicb_length_table addb_length_table
+#define logicw_length_table addw_length_table
+
+static const h8300_length_table logicl_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   4,   4,   4,   4  }, /* and.l xx,Rd  */
+  {  4,   4,   6,   6,   6  }, /* and.l xx,@aa */
+  {  4,   4,   6,   6,   6  }, /* and.l xx,@Rd */
+  {  4,   4,   6,   6,   6  }  /* and.l xx,@xx */
+};
+
+static const h8300_length_table movb_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   2,   2,   4  }, /* mov.b xx,Rd  */
+  {  4,   2,   4,   4,   4  }, /* mov.b xx,@aa */
+  {  4,   2,   4,   4,   4  }, /* mov.b xx,@Rd */
+  {  4,   4,   4,   4,   4  }  /* mov.b xx,@xx */
+};
+
+#define movw_length_table movb_length_table
+
+static const h8300_length_table movl_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* mov.l xx,Rd  */
+  {  4,   4,   4,   4,   4  }, /* mov.l xx,@aa */
+  {  4,   4,   4,   4,   4  }, /* mov.l xx,@Rd */
+  {  4,   4,   4,   4,   4  }  /* mov.l xx,@xx */
+};
+
+/* Return the size of the given address or displacement constant.  */
+
+static unsigned int
+h8300_constant_length (rtx constant)
+{
+  /* Check for (@d:16,Reg).  */
+  if (GET_CODE (constant) == CONST_INT
+      && IN_RANGE (INTVAL (constant), -0x8000, 0x7fff))
+    return 2;
+
+  /* Check for (@d:16,Reg) in cases where the displacement is
+     an absolute address.  */
+  if (Pmode == HImode || h8300_tiny_constant_address_p (constant))
+    return 2;
+
+  return 4;
+}
+
+/* Return the size of a displacement field in address ADDR, which should
+   have the form (plus X constant).  SIZE is the number of bytes being
+   accessed.  */
+
+static unsigned int
+h8300_displacement_length (rtx addr, int size)
+{
+  rtx offset;
+
+  offset = XEXP (addr, 1);
+
+  /* Check for @(d:2,Reg).  */
+  if (register_operand (XEXP (addr, 0), VOIDmode)
+      && GET_CODE (offset) == CONST_INT
+      && (INTVAL (offset) == size
+	  || INTVAL (offset) == size * 2
+	  || INTVAL (offset) == size * 3))
+    return 0;
+
+  return h8300_constant_length (offset);
+}
+
+/* Store the class of operand OP in *OPCLASS and return the length of any
+   extra operand fields.  SIZE is the number of bytes in OP.  OPCLASS
+   can be null if only the length is needed.  */
+
+static unsigned int
+h8300_classify_operand (rtx op, int size, enum h8300_operand_class *opclass)
+{
+  enum h8300_operand_class dummy;
+
+  if (opclass == 0)
+    opclass = &dummy;
+
+  if (CONSTANT_P (op))
+    {
+      *opclass = H8OP_IMMEDIATE;
+
+      /* Byte-sized immediates are stored in the opcode fields.  */
+      if (size == 1)
+	return 0;
+
+      /* If this is a 32-bit instruction, see whether the constant
+	 will fit into a 16-bit immediate field.  */
+      if (TARGET_H8300SX
+	  && size == 4
+	  && GET_CODE (op) == CONST_INT
+	  && IN_RANGE (INTVAL (op), 0, 0xffff))
+	return 2;
+
+      return size;
+    }
+  else if (GET_CODE (op) == MEM)
+    {
+      op = XEXP (op, 0);
+      if (CONSTANT_P (op))
+	{
+	  *opclass = H8OP_MEM_ABSOLUTE;
+	  return h8300_constant_length (op);
+	}
+      else if (GET_CODE (op) == PLUS && CONSTANT_P (XEXP (op, 1)))
+	{
+	  *opclass = H8OP_MEM_COMPLEX;
+	  return h8300_displacement_length (op, size);
+	}
+      else if (GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+	{
+	  *opclass = H8OP_MEM_COMPLEX;
+	  return 0;
+	}
+      else if (register_operand (op, VOIDmode))
+	{
+	  *opclass = H8OP_MEM_BASE;
+	  return 0;
+	}
+    }
+  gcc_assert (register_operand (op, VOIDmode));
+  *opclass = H8OP_REGISTER;
+  return 0;
+}
+
+/* Return the length of the instruction described by TABLE given that
+   its operands are OP1 and OP2.  OP1 must be an h8300_dst_operand
+   and OP2 must be an h8300_src_operand.  */
+
+static unsigned int
+h8300_length_from_table (rtx op1, rtx op2, const h8300_length_table *table)
+{
+  enum h8300_operand_class op1_class, op2_class;
+  unsigned int size, immediate_length;
+
+  size = GET_MODE_SIZE (GET_MODE (op1));
+  immediate_length = (h8300_classify_operand (op1, size, &op1_class)
+		      + h8300_classify_operand (op2, size, &op2_class));
+  return immediate_length + (*table)[op1_class - 1][op2_class];
+}
+
+/* Return the length of a unary instruction such as neg or not given that
+   its operand is OP.  */
+
+unsigned int
+h8300_unary_length (rtx op)
+{
+  enum h8300_operand_class opclass;
+  unsigned int size, operand_length;
+
+  size = GET_MODE_SIZE (GET_MODE (op));
+  operand_length = h8300_classify_operand (op, size, &opclass);
+  switch (opclass)
+    {
+    case H8OP_REGISTER:
+      return 2;
+
+    case H8OP_MEM_BASE:
+      return (size == 4 ? 6 : 4);
+
+    case H8OP_MEM_ABSOLUTE:
+      return operand_length + (size == 4 ? 6 : 4);
+
+    case H8OP_MEM_COMPLEX:
+      return operand_length + 6;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Likewise short immediate instructions such as add.w #xx:3,OP.  */
+
+static unsigned int
+h8300_short_immediate_length (rtx op)
+{
+  enum h8300_operand_class opclass;
+  unsigned int size, operand_length;
+
+  size = GET_MODE_SIZE (GET_MODE (op));
+  operand_length = h8300_classify_operand (op, size, &opclass);
+
+  switch (opclass)
+    {
+    case H8OP_REGISTER:
+      return 2;
+
+    case H8OP_MEM_BASE:
+    case H8OP_MEM_ABSOLUTE:
+    case H8OP_MEM_COMPLEX:
+      return 4 + operand_length;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Likewise bitfield load and store instructions.  */
+
+static unsigned int
+h8300_bitfield_length (rtx op, rtx op2)
+{
+  enum h8300_operand_class opclass;
+  unsigned int size, operand_length;
+
+  if (GET_CODE (op) == REG)
+    op = op2;
+  gcc_assert (GET_CODE (op) != REG);
+  
+  size = GET_MODE_SIZE (GET_MODE (op));
+  operand_length = h8300_classify_operand (op, size, &opclass);
+
+  switch (opclass)
+    {
+    case H8OP_MEM_BASE:
+    case H8OP_MEM_ABSOLUTE:
+    case H8OP_MEM_COMPLEX:
+      return 4 + operand_length;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Calculate the length of general binary instruction INSN using TABLE.  */
+
+static unsigned int
+h8300_binary_length (rtx insn, const h8300_length_table *table)
+{
+  rtx set;
+
+  set = single_set (insn);
+  gcc_assert (set);
+
+  if (BINARY_P (SET_SRC (set)))
+    return h8300_length_from_table (XEXP (SET_SRC (set), 0),
+				    XEXP (SET_SRC (set), 1), table);
+  else
+    {
+      gcc_assert (GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == RTX_TERNARY);
+      return h8300_length_from_table (XEXP (XEXP (SET_SRC (set), 1), 0),
+				      XEXP (XEXP (SET_SRC (set), 1), 1),
+				      table);
+    }
+}
+
+/* Subroutine of h8300_move_length.  Return true if OP is 1- or 2-byte
+   memory reference and either (1) it has the form @(d:16,Rn) or
+   (2) its address has the code given by INC_CODE.  */
+
+static bool
+h8300_short_move_mem_p (rtx op, enum rtx_code inc_code)
+{
+  rtx addr;
+  unsigned int size;
+
+  if (GET_CODE (op) != MEM)
+    return false;
+
+  addr = XEXP (op, 0);
+  size = GET_MODE_SIZE (GET_MODE (op));
+  if (size != 1 && size != 2)
+    return false;
+
+  return (GET_CODE (addr) == inc_code
+	  || (GET_CODE (addr) == PLUS
+	      && GET_CODE (XEXP (addr, 0)) == REG
+	      && h8300_displacement_length (addr, size) == 2));
+}
+
+/* Calculate the length of move instruction INSN using the given length
+   table.  Although the tables are correct for most cases, there is some
+   irregularity in the length of mov.b and mov.w.  The following forms:
+
+	mov @ERs+, Rd
+	mov @(d:16,ERs), Rd
+	mov Rs, @-ERd
+	mov Rs, @(d:16,ERd)
+
+   are two bytes shorter than most other "mov Rs, @complex" or
+   "mov @complex,Rd" combinations.  */
+
+static unsigned int
+h8300_move_length (rtx *operands, const h8300_length_table *table)
+{
+  unsigned int size;
+
+  size = h8300_length_from_table (operands[0], operands[1], table);
+  if (REG_P (operands[0]) && h8300_short_move_mem_p (operands[1], POST_INC))
+    size -= 2;
+  if (REG_P (operands[1]) && h8300_short_move_mem_p (operands[0], PRE_DEC))
+    size -= 2;
+  return size;
+}
+
+/* Return the length of a mova instruction with the given operands.
+   DEST is the register destination, SRC is the source address and
+   OFFSET is the 16-bit or 32-bit displacement.  */
+
+static unsigned int
+h8300_mova_length (rtx dest, rtx src, rtx offset)
+{
+  unsigned int size;
+
+  size = (2
+	  + h8300_constant_length (offset)
+	  + h8300_classify_operand (src, GET_MODE_SIZE (GET_MODE (src)), 0));
+  if (!REG_P (dest) || !REG_P (src) || REGNO (src) != REGNO (dest))
+    size += 2;
+  return size;
+}
+
+/* Compute the length of INSN based on its length_table attribute.
+   OPERANDS is the array of its operands.  */
+
+unsigned int
+h8300_insn_length_from_table (rtx insn, rtx * operands)
+{
+  switch (get_attr_length_table (insn))
+    {
+    case LENGTH_TABLE_NONE:
+      gcc_unreachable ();
+
+    case LENGTH_TABLE_ADDB:
+      return h8300_binary_length (insn, &addb_length_table);
+
+    case LENGTH_TABLE_ADDW:
+      return h8300_binary_length (insn, &addw_length_table);
+
+    case LENGTH_TABLE_ADDL:
+      return h8300_binary_length (insn, &addl_length_table);
+
+    case LENGTH_TABLE_LOGICB:
+      return h8300_binary_length (insn, &logicb_length_table);
+
+    case LENGTH_TABLE_MOVB:
+      return h8300_move_length (operands, &movb_length_table);
+
+    case LENGTH_TABLE_MOVW:
+      return h8300_move_length (operands, &movw_length_table);
+
+    case LENGTH_TABLE_MOVL:
+      return h8300_move_length (operands, &movl_length_table);
+
+    case LENGTH_TABLE_MOVA:
+      return h8300_mova_length (operands[0], operands[1], operands[2]);
+
+    case LENGTH_TABLE_MOVA_ZERO:
+      return h8300_mova_length (operands[0], operands[1], const0_rtx);
+
+    case LENGTH_TABLE_UNARY:
+      return h8300_unary_length (operands[0]);
+
+    case LENGTH_TABLE_MOV_IMM4:
+      return 2 + h8300_classify_operand (operands[0], 0, 0);
+
+    case LENGTH_TABLE_SHORT_IMMEDIATE:
+      return h8300_short_immediate_length (operands[0]);
+
+    case LENGTH_TABLE_BITFIELD:
+      return h8300_bitfield_length (operands[0], operands[1]);
+      
+    case LENGTH_TABLE_BITBRANCH:
+      return h8300_bitfield_length (operands[1], operands[2]) - 2;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if LHS and RHS are memory references that can be mapped
+   to the same h8sx assembly operand.  LHS appears as the destination of
+   an instruction and RHS appears as a source.
+
+   Three cases are allowed:
+
+	- RHS is @+Rn or @-Rn, LHS is @Rn
+	- RHS is @Rn, LHS is @Rn+ or @Rn-
+	- RHS and LHS have the same address and neither has side effects.  */
+
+bool
+h8sx_mergeable_memrefs_p (rtx lhs, rtx rhs)
+{
+  if (GET_CODE (rhs) == MEM && GET_CODE (lhs) == MEM)
+    {
+      rhs = XEXP (rhs, 0);
+      lhs = XEXP (lhs, 0);
+
+      if (GET_CODE (rhs) == PRE_INC || GET_CODE (rhs) == PRE_DEC)
+	return rtx_equal_p (XEXP (rhs, 0), lhs);
+
+      if (GET_CODE (lhs) == POST_INC || GET_CODE (lhs) == POST_DEC)
+	return rtx_equal_p (rhs, XEXP (lhs, 0));
+
+      if (rtx_equal_p (rhs, lhs))
+	return true;
+    }
+  return false;
+}
+
+/* Return true if OPERANDS[1] can be mapped to the same assembly
+   operand as OPERANDS[0].  */
+
+bool
+h8300_operands_match_p (rtx *operands)
+{
+  if (register_operand (operands[0], VOIDmode)
+      && register_operand (operands[1], VOIDmode))
+    return true;
+
+  if (h8sx_mergeable_memrefs_p (operands[0], operands[1]))
+    return true;
+
+  return false;
+}
+
+/* Try using movmd to move LENGTH bytes from memory region SRC to memory
+   region DEST.  The two regions do not overlap and have the common
+   alignment given by ALIGNMENT.  Return true on success.
+
+   Using movmd for variable-length moves seems to involve some
+   complex trade-offs.  For instance:
+
+      - Preparing for a movmd instruction is similar to preparing
+	for a memcpy.  The main difference is that the arguments
+	are moved into er4, er5 and er6 rather than er0, er1 and er2.
+
+      - Since movmd clobbers the frame pointer, we need to save
+	and restore it somehow when frame_pointer_needed.  This can
+	sometimes make movmd sequences longer than calls to memcpy().
+
+      - The counter register is 16 bits, so the instruction is only
+	suitable for variable-length moves when sizeof (size_t) == 2.
+	That's only true in normal mode.
+
+      - We will often lack static alignment information.  Falling back
+	on movmd.b would likely be slower than calling memcpy(), at least
+	for big moves.
+
+   This function therefore only uses movmd when the length is a
+   known constant, and only then if -fomit-frame-pointer is in
+   effect or if we're not optimizing for size.
+
+   At the moment the function uses movmd for all in-range constants,
+   but it might be better to fall back on memcpy() for large moves
+   if ALIGNMENT == 1.  */
+
+bool
+h8sx_emit_movmd (rtx dest, rtx src, rtx length,
+		 HOST_WIDE_INT alignment)
+{
+  if (!flag_omit_frame_pointer && optimize_size)
+    return false;
+
+  if (GET_CODE (length) == CONST_INT)
+    {
+      rtx dest_reg, src_reg, first_dest, first_src;
+      HOST_WIDE_INT n;
+      int factor;
+
+      /* Use movmd.l if the alignment allows it, otherwise fall back
+	 on movmd.b.  */
+      factor = (alignment >= 2 ? 4 : 1);
+
+      /* Make sure the length is within range.  We can handle counter
+	 values up to 65536, although HImode truncation will make
+	 the count appear negative in rtl dumps.  */
+      n = INTVAL (length);
+      if (n <= 0 || n / factor > 65536)
+	return false;
+
+      /* Create temporary registers for the source and destination
+	 pointers.  Initialize them to the start of each region.  */
+      dest_reg = copy_addr_to_reg (XEXP (dest, 0));
+      src_reg = copy_addr_to_reg (XEXP (src, 0));
+
+      /* Create references to the movmd source and destination blocks.  */
+      first_dest = replace_equiv_address (dest, dest_reg);
+      first_src = replace_equiv_address (src, src_reg);
+
+      set_mem_size (first_dest, n & -factor);
+      set_mem_size (first_src, n & -factor);
+
+      length = copy_to_mode_reg (HImode, gen_int_mode (n / factor, HImode));
+      emit_insn (gen_movmd (first_dest, first_src, length, GEN_INT (factor)));
+
+      if ((n & -factor) != n)
+	{
+	  /* Move SRC and DEST past the region we just copied.
+	     This is done to update the memory attributes.  */
+	  dest = adjust_address (dest, BLKmode, n & -factor);
+	  src = adjust_address (src, BLKmode, n & -factor);
+
+	  /* Replace the addresses with the source and destination
+	     registers, which movmd has left with the right values.  */
+	  dest = replace_equiv_address (dest, dest_reg);
+	  src = replace_equiv_address (src, src_reg);
+
+	  /* Mop up the left-over bytes.  */
+	  if (n & 2)
+	    emit_move_insn (adjust_address (dest, HImode, 0),
+			    adjust_address (src, HImode, 0));
+	  if (n & 1)
+	    emit_move_insn (adjust_address (dest, QImode, n & 2),
+			    adjust_address (src, QImode, n & 2));
+	}
+      return true;
+    }
+  return false;
+}
+
+/* Move ADDR into er6 after pushing its old value onto the stack.  */
+
+void
+h8300_swap_into_er6 (rtx addr)
+{
+  rtx insn = push (HARD_FRAME_POINTER_REGNUM);
+  if (frame_pointer_needed)
+    add_reg_note (insn, REG_CFA_DEF_CFA,
+		  plus_constant (Pmode, gen_rtx_MEM (Pmode, stack_pointer_rtx),
+				 2 * UNITS_PER_WORD));
+  else
+    add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		  gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			       plus_constant (Pmode, stack_pointer_rtx, 4)));
+
+  emit_move_insn (hard_frame_pointer_rtx, addr);
+  if (REGNO (addr) == SP_REG)
+    emit_move_insn (hard_frame_pointer_rtx,
+		    plus_constant (Pmode, hard_frame_pointer_rtx,
+				   GET_MODE_SIZE (word_mode)));
+}
+
+/* Move the current value of er6 into ADDR and pop its old value
+   from the stack.  */
+
+void
+h8300_swap_out_of_er6 (rtx addr)
+{
+  rtx insn;
+
+  if (REGNO (addr) != SP_REG)
+    emit_move_insn (addr, hard_frame_pointer_rtx);
+
+  insn = pop (HARD_FRAME_POINTER_REGNUM);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  if (frame_pointer_needed)
+    add_reg_note (insn, REG_CFA_DEF_CFA,
+		  plus_constant (Pmode, hard_frame_pointer_rtx,
+				 2 * UNITS_PER_WORD));
+  else
+    add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		  gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			       plus_constant (Pmode, stack_pointer_rtx, -4)));
+}
+
+/* Return the length of mov instruction.  */
+
+unsigned int
+compute_mov_length (rtx *operands)
+{
+  /* If the mov instruction involves a memory operand, we compute the
+     length, assuming the largest addressing mode is used, and then
+     adjust later in the function.  Otherwise, we compute and return
+     the exact length in one step.  */
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx addr;
+
+  if (GET_CODE (src) == MEM)
+    addr = XEXP (src, 0);
+  else if (GET_CODE (dest) == MEM)
+    addr = XEXP (dest, 0);
+  else
+    addr = NULL_RTX;
+
+  if (TARGET_H8300)
+    {
+      unsigned int base_length;
+
+      switch (mode)
+	{
+	case QImode:
+	  if (addr == NULL_RTX)
+	    return 2;
+
+	  /* The eightbit addressing is available only in QImode, so
+	     go ahead and take care of it.  */
+	  if (h8300_eightbit_constant_address_p (addr))
+	    return 2;
+
+	  base_length = 4;
+	  break;
+
+	case HImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 2;
+
+	      if (src == const0_rtx)
+		return 2;
+
+	      return 4;
+	    }
+
+	  base_length = 4;
+	  break;
+
+	case SImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 4;
+
+	      if (GET_CODE (src) == CONST_INT)
+		{
+		  if (src == const0_rtx)
+		    return 4;
+
+		  if ((INTVAL (src) & 0xffff) == 0)
+		    return 6;
+
+		  if ((INTVAL (src) & 0xffff) == 0)
+		    return 6;
+
+		  if ((INTVAL (src) & 0xffff)
+		      == ((INTVAL (src) >> 16) & 0xffff))
+		    return 6;
+		}
+	      return 8;
+	    }
+
+	  base_length = 8;
+	  break;
+
+	case SFmode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 4;
+
+	      if (satisfies_constraint_G (src))
+		return 4;
+
+	      return 8;
+	    }
+
+	  base_length = 8;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Adjust the length based on the addressing mode used.
+	 Specifically, we subtract the difference between the actual
+	 length and the longest one, which is @(d:16,Rs).  For SImode
+	 and SFmode, we double the adjustment because two mov.w are
+	 used to do the job.  */
+
+      /* @Rs+ and @-Rd are 2 bytes shorter than the longest.  */
+      if (GET_CODE (addr) == PRE_DEC
+	  || GET_CODE (addr) == POST_INC)
+	{
+	  if (mode == QImode || mode == HImode)
+	    return base_length - 2;
+	  else
+	    /* In SImode and SFmode, we use two mov.w instructions, so
+	       double the adjustment.  */
+	    return base_length - 4;
+	}
+
+      /* @Rs and @Rd are 2 bytes shorter than the longest.  Note that
+	 in SImode and SFmode, the second mov.w involves an address
+	 with displacement, namely @(2,Rs) or @(2,Rd), so we subtract
+	 only 2 bytes.  */
+      if (GET_CODE (addr) == REG)
+	return base_length - 2;
+
+      return base_length;
+    }
+  else
+    {
+      unsigned int base_length;
+
+      switch (mode)
+	{
+	case QImode:
+	  if (addr == NULL_RTX)
+	    return 2;
+
+	  /* The eightbit addressing is available only in QImode, so
+	     go ahead and take care of it.  */
+	  if (h8300_eightbit_constant_address_p (addr))
+	    return 2;
+
+	  base_length = 8;
+	  break;
+
+	case HImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 2;
+
+	      if (src == const0_rtx)
+		return 2;
+
+	      return 4;
+	    }
+
+	  base_length = 8;
+	  break;
+
+	case SImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		{
+		  if (REGNO (src) == MAC_REG || REGNO (dest) == MAC_REG)
+		    return 4;
+		  else
+		    return 2;
+		}
+
+	      if (GET_CODE (src) == CONST_INT)
+		{
+		  int val = INTVAL (src);
+
+		  if (val == 0)
+		    return 2;
+
+		  if (val == (val & 0x00ff) || val == (val & 0xff00))
+		    return 4;
+
+		  switch (val & 0xffffffff)
+		    {
+		    case 0xffffffff:
+		    case 0xfffffffe:
+		    case 0xfffffffc:
+		    case 0x0000ffff:
+		    case 0x0000fffe:
+		    case 0xffff0000:
+		    case 0xfffe0000:
+		    case 0x00010000:
+		    case 0x00020000:
+		      return 4;
+		    }
+		}
+	      return 6;
+	    }
+
+	  base_length = 10;
+	  break;
+
+	case SFmode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 2;
+
+	      if (satisfies_constraint_G (src))
+		return 2;
+
+	      return 6;
+	    }
+
+	  base_length = 10;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Adjust the length based on the addressing mode used.
+	 Specifically, we subtract the difference between the actual
+	 length and the longest one, which is @(d:24,ERs).  */
+
+      /* @ERs+ and @-ERd are 6 bytes shorter than the longest.  */
+      if (GET_CODE (addr) == PRE_DEC
+	  || GET_CODE (addr) == POST_INC)
+	return base_length - 6;
+
+      /* @ERs and @ERd are 6 bytes shorter than the longest.  */
+      if (GET_CODE (addr) == REG)
+	return base_length - 6;
+
+      /* @(d:16,ERs) and @(d:16,ERd) are 4 bytes shorter than the
+	 longest.  */
+      if (GET_CODE (addr) == PLUS
+	  && GET_CODE (XEXP (addr, 0)) == REG
+	  && GET_CODE (XEXP (addr, 1)) == CONST_INT
+	  && INTVAL (XEXP (addr, 1)) > -32768
+	  && INTVAL (XEXP (addr, 1)) < 32767)
+	return base_length - 4;
+
+      /* @aa:16 is 4 bytes shorter than the longest.  */
+      if (h8300_tiny_constant_address_p (addr))
+	return base_length - 4;
+
+      /* @aa:24 is 2 bytes shorter than the longest.  */
+      if (CONSTANT_P (addr))
+	return base_length - 2;
+
+      return base_length;
+    }
+}
+
+/* Output an addition insn.  */
+
+const char *
+output_plussi (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  gcc_assert (mode == SImode);
+
+  if (TARGET_H8300)
+    {
+      if (GET_CODE (operands[2]) == REG)
+	return "add.w\t%f2,%f0\n\taddx\t%y2,%y0\n\taddx\t%z2,%z0";
+
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  HOST_WIDE_INT n = INTVAL (operands[2]);
+
+	  if ((n & 0xffffff) == 0)
+	    return "add\t%z2,%z0";
+	  if ((n & 0xffff) == 0)
+	    return "add\t%y2,%y0\n\taddx\t%z2,%z0";
+	  if ((n & 0xff) == 0)
+	    return "add\t%x2,%x0\n\taddx\t%y2,%y0\n\taddx\t%z2,%z0";
+	}
+
+      return "add\t%w2,%w0\n\taddx\t%x2,%x0\n\taddx\t%y2,%y0\n\taddx\t%z2,%z0";
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && register_operand (operands[1], VOIDmode))
+	{
+	  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+	  if (TARGET_H8300SX && (intval >= 1 && intval <= 7))
+	    return "add.l\t%S2,%S0";
+	  if (TARGET_H8300SX && (intval >= -7 && intval <= -1))
+	    return "sub.l\t%G2,%S0";
+
+	  /* See if we can finish with 2 bytes.  */
+
+	  switch ((unsigned int) intval & 0xffffffff)
+	    {
+	    case 0x00000001:
+	    case 0x00000002:
+	    case 0x00000004:
+	      return "adds\t%2,%S0";
+
+	    case 0xffffffff:
+	    case 0xfffffffe:
+	    case 0xfffffffc:
+	      return "subs\t%G2,%S0";
+
+	    case 0x00010000:
+	    case 0x00020000:
+	      operands[2] = GEN_INT (intval >> 16);
+	      return "inc.w\t%2,%e0";
+
+	    case 0xffff0000:
+	    case 0xfffe0000:
+	      operands[2] = GEN_INT (intval >> 16);
+	      return "dec.w\t%G2,%e0";
+	    }
+
+	  /* See if we can finish with 4 bytes.  */
+	  if ((intval & 0xffff) == 0)
+	    {
+	      operands[2] = GEN_INT (intval >> 16);
+	      return "add.w\t%2,%e0";
+	    }
+	}
+
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub.l\t%S2,%S0";
+	}
+      return "add.l\t%S2,%S0";
+    }
+}
+
+/* ??? It would be much easier to add the h8sx stuff if a single function
+   classified the addition as either inc/dec, adds/subs, add.w or add.l.  */
+/* Compute the length of an addition insn.  */
+
+unsigned int
+compute_plussi_length (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  gcc_assert (mode == SImode);
+
+  if (TARGET_H8300)
+    {
+      if (GET_CODE (operands[2]) == REG)
+	return 6;
+
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  HOST_WIDE_INT n = INTVAL (operands[2]);
+
+	  if ((n & 0xffffff) == 0)
+	    return 2;
+	  if ((n & 0xffff) == 0)
+	    return 4;
+	  if ((n & 0xff) == 0)
+	    return 6;
+	}
+
+      return 8;
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && register_operand (operands[1], VOIDmode))
+	{
+	  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+	  if (TARGET_H8300SX && (intval >= 1 && intval <= 7))
+	    return 2;
+	  if (TARGET_H8300SX && (intval >= -7 && intval <= -1))
+	    return 2;
+
+	  /* See if we can finish with 2 bytes.  */
+
+	  switch ((unsigned int) intval & 0xffffffff)
+	    {
+	    case 0x00000001:
+	    case 0x00000002:
+	    case 0x00000004:
+	      return 2;
+
+	    case 0xffffffff:
+	    case 0xfffffffe:
+	    case 0xfffffffc:
+	      return 2;
+
+	    case 0x00010000:
+	    case 0x00020000:
+	      return 2;
+
+	    case 0xffff0000:
+	    case 0xfffe0000:
+	      return 2;
+	    }
+
+	  /* See if we can finish with 4 bytes.  */
+	  if ((intval & 0xffff) == 0)
+	    return 4;
+	}
+
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+	return h8300_length_from_table (operands[0],
+					GEN_INT (-INTVAL (operands[2])),
+					&addl_length_table);
+      else
+	return h8300_length_from_table (operands[0], operands[2],
+					&addl_length_table);
+      return 6;
+    }
+}
+
+/* Compute which flag bits are valid after an addition insn.  */
+
+enum attr_cc
+compute_plussi_cc (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  gcc_assert (mode == SImode);
+
+  if (TARGET_H8300)
+    {
+      return CC_CLOBBER;
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && register_operand (operands[1], VOIDmode))
+	{
+	  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+	  if (TARGET_H8300SX && (intval >= 1 && intval <= 7))
+	    return CC_SET_ZN;
+	  if (TARGET_H8300SX && (intval >= -7 && intval <= -1))
+	    return CC_SET_ZN;
+
+	  /* See if we can finish with 2 bytes.  */
+
+	  switch ((unsigned int) intval & 0xffffffff)
+	    {
+	    case 0x00000001:
+	    case 0x00000002:
+	    case 0x00000004:
+	      return CC_NONE_0HIT;
+
+	    case 0xffffffff:
+	    case 0xfffffffe:
+	    case 0xfffffffc:
+	      return CC_NONE_0HIT;
+
+	    case 0x00010000:
+	    case 0x00020000:
+	      return CC_CLOBBER;
+
+	    case 0xffff0000:
+	    case 0xfffe0000:
+	      return CC_CLOBBER;
+	    }
+
+	  /* See if we can finish with 4 bytes.  */
+	  if ((intval & 0xffff) == 0)
+	    return CC_CLOBBER;
+	}
+
+      return CC_SET_ZN;
+    }
+}
+
+/* Output a logical insn.  */
+
+const char *
+output_logical_op (enum machine_mode mode, rtx *operands)
+{
+  /* Figure out the logical op that we need to perform.  */
+  enum rtx_code code = GET_CODE (operands[3]);
+  /* Pretend that every byte is affected if both operands are registers.  */
+  const unsigned HOST_WIDE_INT intval =
+    (unsigned HOST_WIDE_INT) ((GET_CODE (operands[2]) == CONST_INT)
+			      /* Always use the full instruction if the
+				 first operand is in memory.  It is better
+				 to use define_splits to generate the shorter
+				 sequence where valid.  */
+			      && register_operand (operands[1], VOIDmode)
+			      ? INTVAL (operands[2]) : 0x55555555);
+  /* The determinant of the algorithm.  If we perform an AND, 0
+     affects a bit.  Otherwise, 1 affects a bit.  */
+  const unsigned HOST_WIDE_INT det = (code != AND) ? intval : ~intval;
+  /* Break up DET into pieces.  */
+  const unsigned HOST_WIDE_INT b0 = (det >>  0) & 0xff;
+  const unsigned HOST_WIDE_INT b1 = (det >>  8) & 0xff;
+  const unsigned HOST_WIDE_INT b2 = (det >> 16) & 0xff;
+  const unsigned HOST_WIDE_INT b3 = (det >> 24) & 0xff;
+  const unsigned HOST_WIDE_INT w0 = (det >>  0) & 0xffff;
+  const unsigned HOST_WIDE_INT w1 = (det >> 16) & 0xffff;
+  int lower_half_easy_p = 0;
+  int upper_half_easy_p = 0;
+  /* The name of an insn.  */
+  const char *opname;
+  char insn_buf[100];
+
+  switch (code)
+    {
+    case AND:
+      opname = "and";
+      break;
+    case IOR:
+      opname = "or";
+      break;
+    case XOR:
+      opname = "xor";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (mode)
+    {
+    case HImode:
+      /* First, see if we can finish with one insn.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && b0 != 0
+	  && b1 != 0)
+	{
+	  sprintf (insn_buf, "%s.w\t%%T2,%%T0", opname);
+	  output_asm_insn (insn_buf, operands);
+	}
+      else
+	{
+	  /* Take care of the lower byte.  */
+	  if (b0 != 0)
+	    {
+	      sprintf (insn_buf, "%s\t%%s2,%%s0", opname);
+	      output_asm_insn (insn_buf, operands);
+	    }
+	  /* Take care of the upper byte.  */
+	  if (b1 != 0)
+	    {
+	      sprintf (insn_buf, "%s\t%%t2,%%t0", opname);
+	      output_asm_insn (insn_buf, operands);
+	    }
+	}
+      break;
+    case SImode:
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* Determine if the lower half can be taken care of in no more
+	     than two bytes.  */
+	  lower_half_easy_p = (b0 == 0
+			       || b1 == 0
+			       || (code != IOR && w0 == 0xffff));
+
+	  /* Determine if the upper half can be taken care of in no more
+	     than two bytes.  */
+	  upper_half_easy_p = ((code != IOR && w1 == 0xffff)
+			       || (code == AND && w1 == 0xff00));
+	}
+
+      /* Check if doing everything with one insn is no worse than
+	 using multiple insns.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && w0 != 0 && w1 != 0
+	  && !(lower_half_easy_p && upper_half_easy_p)
+	  && !(code == IOR && w1 == 0xffff
+	       && (w0 & 0x8000) != 0 && lower_half_easy_p))
+	{
+	  sprintf (insn_buf, "%s.l\t%%S2,%%S0", opname);
+	  output_asm_insn (insn_buf, operands);
+	}
+      else
+	{
+	  /* Take care of the lower and upper words individually.  For
+	     each word, we try different methods in the order of
+
+	     1) the special insn (in case of AND or XOR),
+	     2) the word-wise insn, and
+	     3) The byte-wise insn.  */
+	  if (w0 == 0xffff
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    output_asm_insn ((code == AND)
+			     ? "sub.w\t%f0,%f0" : "not.w\t%f0",
+			     operands);
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && (b0 != 0)
+		   && (b1 != 0))
+	    {
+	      sprintf (insn_buf, "%s.w\t%%f2,%%f0", opname);
+	      output_asm_insn (insn_buf, operands);
+	    }
+	  else
+	    {
+	      if (b0 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%w2,%%w0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	      if (b1 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%x2,%%x0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	    }
+
+	  if ((w1 == 0xffff)
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    output_asm_insn ((code == AND)
+			     ? "sub.w\t%e0,%e0" : "not.w\t%e0",
+			     operands);
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == IOR
+		   && w1 == 0xffff
+		   && (w0 & 0x8000) != 0)
+	    {
+	      output_asm_insn ("exts.l\t%S0", operands);
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == AND
+		   && w1 == 0xff00)
+	    {
+	      output_asm_insn ("extu.w\t%e0", operands);
+	    }
+	  else if (TARGET_H8300H || TARGET_H8300S)
+	    {
+	      if (w1 != 0)
+		{
+		  sprintf (insn_buf, "%s.w\t%%e2,%%e0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	    }
+	  else
+	    {
+	      if (b2 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%y2,%%y0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	      if (b3 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%z2,%%z0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	    }
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return "";
+}
+
+/* Compute the length of a logical insn.  */
+
+unsigned int
+compute_logical_op_length (enum machine_mode mode, rtx *operands)
+{
+  /* Figure out the logical op that we need to perform.  */
+  enum rtx_code code = GET_CODE (operands[3]);
+  /* Pretend that every byte is affected if both operands are registers.  */
+  const unsigned HOST_WIDE_INT intval =
+    (unsigned HOST_WIDE_INT) ((GET_CODE (operands[2]) == CONST_INT)
+			      /* Always use the full instruction if the
+				 first operand is in memory.  It is better
+				 to use define_splits to generate the shorter
+				 sequence where valid.  */
+			      && register_operand (operands[1], VOIDmode)
+			      ? INTVAL (operands[2]) : 0x55555555);
+  /* The determinant of the algorithm.  If we perform an AND, 0
+     affects a bit.  Otherwise, 1 affects a bit.  */
+  const unsigned HOST_WIDE_INT det = (code != AND) ? intval : ~intval;
+  /* Break up DET into pieces.  */
+  const unsigned HOST_WIDE_INT b0 = (det >>  0) & 0xff;
+  const unsigned HOST_WIDE_INT b1 = (det >>  8) & 0xff;
+  const unsigned HOST_WIDE_INT b2 = (det >> 16) & 0xff;
+  const unsigned HOST_WIDE_INT b3 = (det >> 24) & 0xff;
+  const unsigned HOST_WIDE_INT w0 = (det >>  0) & 0xffff;
+  const unsigned HOST_WIDE_INT w1 = (det >> 16) & 0xffff;
+  int lower_half_easy_p = 0;
+  int upper_half_easy_p = 0;
+  /* Insn length.  */
+  unsigned int length = 0;
+
+  switch (mode)
+    {
+    case HImode:
+      /* First, see if we can finish with one insn.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && b0 != 0
+	  && b1 != 0)
+	{
+	  length = h8300_length_from_table (operands[1], operands[2],
+					    &logicw_length_table);
+	}
+      else
+	{
+	  /* Take care of the lower byte.  */
+	  if (b0 != 0)
+	    length += 2;
+
+	  /* Take care of the upper byte.  */
+	  if (b1 != 0)
+	    length += 2;
+	}
+      break;
+    case SImode:
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* Determine if the lower half can be taken care of in no more
+	     than two bytes.  */
+	  lower_half_easy_p = (b0 == 0
+			       || b1 == 0
+			       || (code != IOR && w0 == 0xffff));
+
+	  /* Determine if the upper half can be taken care of in no more
+	     than two bytes.  */
+	  upper_half_easy_p = ((code != IOR && w1 == 0xffff)
+			       || (code == AND && w1 == 0xff00));
+	}
+
+      /* Check if doing everything with one insn is no worse than
+	 using multiple insns.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && w0 != 0 && w1 != 0
+	  && !(lower_half_easy_p && upper_half_easy_p)
+	  && !(code == IOR && w1 == 0xffff
+	       && (w0 & 0x8000) != 0 && lower_half_easy_p))
+	{
+	  length = h8300_length_from_table (operands[1], operands[2],
+					    &logicl_length_table);
+	}
+      else
+	{
+	  /* Take care of the lower and upper words individually.  For
+	     each word, we try different methods in the order of
+
+	     1) the special insn (in case of AND or XOR),
+	     2) the word-wise insn, and
+	     3) The byte-wise insn.  */
+	  if (w0 == 0xffff
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    {
+	      length += 2;
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && (b0 != 0)
+		   && (b1 != 0))
+	    {
+	      length += 4;
+	    }
+	  else
+	    {
+	      if (b0 != 0)
+		length += 2;
+
+	      if (b1 != 0)
+		length += 2;
+	    }
+
+	  if (w1 == 0xffff
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    {
+	      length += 2;
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == IOR
+		   && w1 == 0xffff
+		   && (w0 & 0x8000) != 0)
+	    {
+	      length += 2;
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == AND
+		   && w1 == 0xff00)
+	    {
+	      length += 2;
+	    }
+	  else if (TARGET_H8300H || TARGET_H8300S)
+	    {
+	      if (w1 != 0)
+		length += 4;
+	    }
+	  else
+	    {
+	      if (b2 != 0)
+		length += 2;
+
+	      if (b3 != 0)
+		length += 2;
+	    }
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return length;
+}
+
+/* Compute which flag bits are valid after a logical insn.  */
+
+enum attr_cc
+compute_logical_op_cc (enum machine_mode mode, rtx *operands)
+{
+  /* Figure out the logical op that we need to perform.  */
+  enum rtx_code code = GET_CODE (operands[3]);
+  /* Pretend that every byte is affected if both operands are registers.  */
+  const unsigned HOST_WIDE_INT intval =
+    (unsigned HOST_WIDE_INT) ((GET_CODE (operands[2]) == CONST_INT)
+			      /* Always use the full instruction if the
+				 first operand is in memory.  It is better
+				 to use define_splits to generate the shorter
+				 sequence where valid.  */
+			      && register_operand (operands[1], VOIDmode)
+			      ? INTVAL (operands[2]) : 0x55555555);
+  /* The determinant of the algorithm.  If we perform an AND, 0
+     affects a bit.  Otherwise, 1 affects a bit.  */
+  const unsigned HOST_WIDE_INT det = (code != AND) ? intval : ~intval;
+  /* Break up DET into pieces.  */
+  const unsigned HOST_WIDE_INT b0 = (det >>  0) & 0xff;
+  const unsigned HOST_WIDE_INT b1 = (det >>  8) & 0xff;
+  const unsigned HOST_WIDE_INT w0 = (det >>  0) & 0xffff;
+  const unsigned HOST_WIDE_INT w1 = (det >> 16) & 0xffff;
+  int lower_half_easy_p = 0;
+  int upper_half_easy_p = 0;
+  /* Condition code.  */
+  enum attr_cc cc = CC_CLOBBER;
+
+  switch (mode)
+    {
+    case HImode:
+      /* First, see if we can finish with one insn.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && b0 != 0
+	  && b1 != 0)
+	{
+	  cc = CC_SET_ZNV;
+	}
+      break;
+    case SImode:
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* Determine if the lower half can be taken care of in no more
+	     than two bytes.  */
+	  lower_half_easy_p = (b0 == 0
+			       || b1 == 0
+			       || (code != IOR && w0 == 0xffff));
+
+	  /* Determine if the upper half can be taken care of in no more
+	     than two bytes.  */
+	  upper_half_easy_p = ((code != IOR && w1 == 0xffff)
+			       || (code == AND && w1 == 0xff00));
+	}
+
+      /* Check if doing everything with one insn is no worse than
+	 using multiple insns.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && w0 != 0 && w1 != 0
+	  && !(lower_half_easy_p && upper_half_easy_p)
+	  && !(code == IOR && w1 == 0xffff
+	       && (w0 & 0x8000) != 0 && lower_half_easy_p))
+	{
+	  cc = CC_SET_ZNV;
+	}
+      else
+	{
+	  if ((TARGET_H8300H || TARGET_H8300S)
+	      && code == IOR
+	      && w1 == 0xffff
+	      && (w0 & 0x8000) != 0)
+	    {
+	      cc = CC_SET_ZNV;
+	    }
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return cc;
+}
+
+/* Expand a conditional branch.  */
+
+void
+h8300_expand_branch (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx label = operands[3];
+  rtx tmp;
+
+  tmp = gen_rtx_COMPARE (VOIDmode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, cc0_rtx, tmp));
+
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+}
+
+
+/* Expand a conditional store.  */
+
+void
+h8300_expand_store (rtx operands[])
+{
+  rtx dest = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx tmp;
+
+  tmp = gen_rtx_COMPARE (VOIDmode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, cc0_rtx, tmp));
+
+  tmp = gen_rtx_fmt_ee (code, GET_MODE (dest), cc0_rtx, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+}
+
+/* Shifts.
+
+   We devote a fair bit of code to getting efficient shifts since we
+   can only shift one bit at a time on the H8/300 and H8/300H and only
+   one or two bits at a time on the H8S.
+
+   All shift code falls into one of the following ways of
+   implementation:
+
+   o SHIFT_INLINE: Emit straight line code for the shift; this is used
+     when a straight line shift is about the same size or smaller than
+     a loop.
+
+   o SHIFT_ROT_AND: Rotate the value the opposite direction, then mask
+     off the bits we don't need.  This is used when only a few of the
+     bits in the original value will survive in the shifted value.
+
+   o SHIFT_SPECIAL: Often it's possible to move a byte or a word to
+     simulate a shift by 8, 16, or 24 bits.  Once moved, a few inline
+     shifts can be added if the shift count is slightly more than 8 or
+     16.  This case also includes other oddballs that are not worth
+     explaining here.
+
+   o SHIFT_LOOP: Emit a loop using one (or two on H8S) bit shifts.
+
+   For each shift count, we try to use code that has no trade-off
+   between code size and speed whenever possible.
+
+   If the trade-off is unavoidable, we try to be reasonable.
+   Specifically, the fastest version is one instruction longer than
+   the shortest version, we take the fastest version.  We also provide
+   the use a way to switch back to the shortest version with -Os.
+
+   For the details of the shift algorithms for various shift counts,
+   refer to shift_alg_[qhs]i.  */
+
+/* Classify a shift with the given mode and code.  OP is the shift amount.  */
+
+enum h8sx_shift_type
+h8sx_classify_shift (enum machine_mode mode, enum rtx_code code, rtx op)
+{
+  if (!TARGET_H8300SX)
+    return H8SX_SHIFT_NONE;
+
+  switch (code)
+    {
+    case ASHIFT:
+    case LSHIFTRT:
+      /* Check for variable shifts (shll Rs,Rd and shlr Rs,Rd).  */
+      if (GET_CODE (op) != CONST_INT)
+	return H8SX_SHIFT_BINARY;
+
+      /* Reject out-of-range shift amounts.  */
+      if (INTVAL (op) <= 0 || INTVAL (op) >= GET_MODE_BITSIZE (mode))
+	return H8SX_SHIFT_NONE;
+
+      /* Power-of-2 shifts are effectively unary operations.  */
+      if (exact_log2 (INTVAL (op)) >= 0)
+	return H8SX_SHIFT_UNARY;
+
+      return H8SX_SHIFT_BINARY;
+
+    case ASHIFTRT:
+      if (op == const1_rtx || op == const2_rtx)
+	return H8SX_SHIFT_UNARY;
+      return H8SX_SHIFT_NONE;
+
+    case ROTATE:
+      if (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) == 1
+	      || INTVAL (op) == 2
+	      || INTVAL (op) == GET_MODE_BITSIZE (mode) - 2
+	      || INTVAL (op) == GET_MODE_BITSIZE (mode) - 1))
+	return H8SX_SHIFT_UNARY;
+      return H8SX_SHIFT_NONE;
+
+    default:
+      return H8SX_SHIFT_NONE;
+    }
+}
+
+/* Return the asm template for a single h8sx shift instruction.
+   OPERANDS[0] and OPERANDS[1] are the destination, OPERANDS[2]
+   is the source and OPERANDS[3] is the shift.  SUFFIX is the
+   size suffix ('b', 'w' or 'l') and OPTYPE is the h8300_print_operand
+   prefix for the destination operand.  */
+
+const char *
+output_h8sx_shift (rtx *operands, int suffix, int optype)
+{
+  static char buffer[16];
+  const char *stem;
+
+  switch (GET_CODE (operands[3]))
+    {
+    case ASHIFT:
+      stem = "shll";
+      break;
+
+    case ASHIFTRT:
+      stem = "shar";
+      break;
+
+    case LSHIFTRT:
+      stem = "shlr";
+      break;
+
+    case ROTATE:
+      stem = "rotl";
+      if (INTVAL (operands[2]) > 2)
+	{
+	  /* This is really a right rotate.  */
+	  operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[0]))
+				 - INTVAL (operands[2]));
+	  stem = "rotr";
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  if (operands[2] == const1_rtx)
+    sprintf (buffer, "%s.%c\t%%%c0", stem, suffix, optype);
+  else
+    sprintf (buffer, "%s.%c\t%%X2,%%%c0", stem, suffix, optype);
+  return buffer;
+}
+
+/* Emit code to do shifts.  */
+
+bool
+expand_a_shift (enum machine_mode mode, enum rtx_code code, rtx operands[])
+{
+  switch (h8sx_classify_shift (mode, code, operands[2]))
+    {
+    case H8SX_SHIFT_BINARY:
+      operands[1] = force_reg (mode, operands[1]);
+      return false;
+
+    case H8SX_SHIFT_UNARY:
+      return false;
+
+    case H8SX_SHIFT_NONE:
+      break;
+    }
+
+  emit_move_insn (copy_rtx (operands[0]), operands[1]);
+
+  /* Need a loop to get all the bits we want  - we generate the
+     code at emit time, but need to allocate a scratch reg now.  */
+
+  emit_insn (gen_rtx_PARALLEL
+	     (VOIDmode,
+	      gen_rtvec (2,
+			 gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+				      gen_rtx_fmt_ee (code, mode,
+						      copy_rtx (operands[0]), operands[2])),
+			 gen_rtx_CLOBBER (VOIDmode,
+					  gen_rtx_SCRATCH (QImode)))));
+  return true;
+}
+
+/* Symbols of the various modes which can be used as indices.  */
+
+enum shift_mode
+{
+  QIshift, HIshift, SIshift
+};
+
+/* For single bit shift insns, record assembler and what bits of the
+   condition code are valid afterwards (represented as various CC_FOO
+   bits, 0 means CC isn't left in a usable state).  */
+
+struct shift_insn
+{
+  const char *const assembler;
+  const enum attr_cc cc_valid;
+};
+
+/* Assembler instruction shift table.
+
+   These tables are used to look up the basic shifts.
+   They are indexed by cpu, shift_type, and mode.  */
+
+static const struct shift_insn shift_one[2][3][3] =
+{
+/* H8/300 */
+  {
+/* SHIFT_ASHIFT */
+    {
+      { "shll\t%X0", CC_SET_ZNV },
+      { "add.w\t%T0,%T0", CC_SET_ZN },
+      { "add.w\t%f0,%f0\n\taddx\t%y0,%y0\n\taddx\t%z0,%z0", CC_CLOBBER }
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      { "shlr\t%X0", CC_SET_ZNV },
+      { "shlr\t%t0\n\trotxr\t%s0", CC_CLOBBER },
+      { "shlr\t%z0\n\trotxr\t%y0\n\trotxr\t%x0\n\trotxr\t%w0", CC_CLOBBER }
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      { "shar\t%X0", CC_SET_ZNV },
+      { "shar\t%t0\n\trotxr\t%s0", CC_CLOBBER },
+      { "shar\t%z0\n\trotxr\t%y0\n\trotxr\t%x0\n\trotxr\t%w0", CC_CLOBBER }
+    }
+  },
+/* H8/300H */
+  {
+/* SHIFT_ASHIFT */
+    {
+      { "shll.b\t%X0", CC_SET_ZNV },
+      { "shll.w\t%T0", CC_SET_ZNV },
+      { "shll.l\t%S0", CC_SET_ZNV }
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      { "shlr.b\t%X0", CC_SET_ZNV },
+      { "shlr.w\t%T0", CC_SET_ZNV },
+      { "shlr.l\t%S0", CC_SET_ZNV }
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      { "shar.b\t%X0", CC_SET_ZNV },
+      { "shar.w\t%T0", CC_SET_ZNV },
+      { "shar.l\t%S0", CC_SET_ZNV }
+    }
+  }
+};
+
+static const struct shift_insn shift_two[3][3] =
+{
+/* SHIFT_ASHIFT */
+    {
+      { "shll.b\t#2,%X0", CC_SET_ZNV },
+      { "shll.w\t#2,%T0", CC_SET_ZNV },
+      { "shll.l\t#2,%S0", CC_SET_ZNV }
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      { "shlr.b\t#2,%X0", CC_SET_ZNV },
+      { "shlr.w\t#2,%T0", CC_SET_ZNV },
+      { "shlr.l\t#2,%S0", CC_SET_ZNV }
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      { "shar.b\t#2,%X0", CC_SET_ZNV },
+      { "shar.w\t#2,%T0", CC_SET_ZNV },
+      { "shar.l\t#2,%S0", CC_SET_ZNV }
+    }
+};
+
+/* Rotates are organized by which shift they'll be used in implementing.
+   There's no need to record whether the cc is valid afterwards because
+   it is the AND insn that will decide this.  */
+
+static const char *const rotate_one[2][3][3] =
+{
+/* H8/300 */
+  {
+/* SHIFT_ASHIFT */
+    {
+      "rotr\t%X0",
+      "shlr\t%t0\n\trotxr\t%s0\n\tbst\t#7,%t0",
+      0
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      "rotl\t%X0",
+      "shll\t%s0\n\trotxl\t%t0\n\tbst\t#0,%s0",
+      0
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      "rotl\t%X0",
+      "shll\t%s0\n\trotxl\t%t0\n\tbst\t#0,%s0",
+      0
+    }
+  },
+/* H8/300H */
+  {
+/* SHIFT_ASHIFT */
+    {
+      "rotr.b\t%X0",
+      "rotr.w\t%T0",
+      "rotr.l\t%S0"
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      "rotl.b\t%X0",
+      "rotl.w\t%T0",
+      "rotl.l\t%S0"
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      "rotl.b\t%X0",
+      "rotl.w\t%T0",
+      "rotl.l\t%S0"
+    }
+  }
+};
+
+static const char *const rotate_two[3][3] =
+{
+/* SHIFT_ASHIFT */
+    {
+      "rotr.b\t#2,%X0",
+      "rotr.w\t#2,%T0",
+      "rotr.l\t#2,%S0"
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      "rotl.b\t#2,%X0",
+      "rotl.w\t#2,%T0",
+      "rotl.l\t#2,%S0"
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      "rotl.b\t#2,%X0",
+      "rotl.w\t#2,%T0",
+      "rotl.l\t#2,%S0"
+    }
+};
+
+struct shift_info {
+  /* Shift algorithm.  */
+  enum shift_alg alg;
+
+  /* The number of bits to be shifted by shift1 and shift2.  Valid
+     when ALG is SHIFT_SPECIAL.  */
+  unsigned int remainder;
+
+  /* Special insn for a shift.  Valid when ALG is SHIFT_SPECIAL.  */
+  const char *special;
+
+  /* Insn for a one-bit shift.  Valid when ALG is either SHIFT_INLINE
+     or SHIFT_SPECIAL, and REMAINDER is nonzero.  */
+  const char *shift1;
+
+  /* Insn for a two-bit shift.  Valid when ALG is either SHIFT_INLINE
+     or SHIFT_SPECIAL, and REMAINDER is nonzero.  */
+  const char *shift2;
+
+  /* CC status for SHIFT_INLINE.  */
+  enum attr_cc cc_inline;
+
+  /* CC status  for SHIFT_SPECIAL.  */
+  enum attr_cc cc_special;
+};
+
+static void get_shift_alg (enum shift_type,
+			   enum shift_mode, unsigned int,
+			   struct shift_info *);
+
+/* Given SHIFT_TYPE, SHIFT_MODE, and shift count COUNT, determine the
+   best algorithm for doing the shift.  The assembler code is stored
+   in the pointers in INFO.  We achieve the maximum efficiency in most
+   cases when !TARGET_H8300.  In case of TARGET_H8300, shifts in
+   SImode in particular have a lot of room to optimize.
+
+   We first determine the strategy of the shift algorithm by a table
+   lookup.  If that tells us to use a hand crafted assembly code, we
+   go into the big switch statement to find what that is.  Otherwise,
+   we resort to a generic way, such as inlining.  In either case, the
+   result is returned through INFO.  */
+
+static void
+get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode,
+	       unsigned int count, struct shift_info *info)
+{
+  enum h8_cpu cpu;
+
+  /* Find the target CPU.  */
+  if (TARGET_H8300)
+    cpu = H8_300;
+  else if (TARGET_H8300H)
+    cpu = H8_300H;
+  else
+    cpu = H8_S;
+
+  /* Find the shift algorithm.  */
+  info->alg = SHIFT_LOOP;
+  switch (shift_mode)
+    {
+    case QIshift:
+      if (count < GET_MODE_BITSIZE (QImode))
+	info->alg = shift_alg_qi[cpu][shift_type][count];
+      break;
+
+    case HIshift:
+      if (count < GET_MODE_BITSIZE (HImode))
+	info->alg = shift_alg_hi[cpu][shift_type][count];
+      break;
+
+    case SIshift:
+      if (count < GET_MODE_BITSIZE (SImode))
+	info->alg = shift_alg_si[cpu][shift_type][count];
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Fill in INFO.  Return unless we have SHIFT_SPECIAL.  */
+  switch (info->alg)
+    {
+    case SHIFT_INLINE:
+      info->remainder = count;
+      /* Fall through.  */
+
+    case SHIFT_LOOP:
+      /* It is up to the caller to know that looping clobbers cc.  */
+      info->shift1 = shift_one[cpu_type][shift_type][shift_mode].assembler;
+      info->shift2 = shift_two[shift_type][shift_mode].assembler;
+      info->cc_inline = shift_one[cpu_type][shift_type][shift_mode].cc_valid;
+      goto end;
+
+    case SHIFT_ROT_AND:
+      info->shift1 = rotate_one[cpu_type][shift_type][shift_mode];
+      info->shift2 = rotate_two[shift_type][shift_mode];
+      info->cc_inline = CC_CLOBBER;
+      goto end;
+
+    case SHIFT_SPECIAL:
+      /* REMAINDER is 0 for most cases, so initialize it to 0.  */
+      info->remainder = 0;
+      info->shift1 = shift_one[cpu_type][shift_type][shift_mode].assembler;
+      info->shift2 = shift_two[shift_type][shift_mode].assembler;
+      info->cc_inline = shift_one[cpu_type][shift_type][shift_mode].cc_valid;
+      info->cc_special = CC_CLOBBER;
+      break;
+    }
+
+  /* Here we only deal with SHIFT_SPECIAL.  */
+  switch (shift_mode)
+    {
+    case QIshift:
+      /* For ASHIFTRT by 7 bits, the sign bit is simply replicated
+	 through the entire value.  */
+      gcc_assert (shift_type == SHIFT_ASHIFTRT && count == 7);
+      info->special = "shll\t%X0\n\tsubx\t%X0,%X0";
+      goto end;
+
+    case HIshift:
+      if (count == 7)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300)
+		info->special = "shar.b\t%t0\n\tmov.b\t%s0,%t0\n\trotxr.b\t%t0\n\trotr.b\t%s0\n\tand.b\t#0x80,%s0";
+	      else
+		info->special = "shar.b\t%t0\n\tmov.b\t%s0,%t0\n\trotxr.w\t%T0\n\tand.b\t#0x80,%s0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		info->special = "shal.b\t%s0\n\tmov.b\t%t0,%s0\n\trotxl.b\t%s0\n\trotl.b\t%t0\n\tand.b\t#0x01,%t0";
+	      else
+		info->special = "shal.b\t%s0\n\tmov.b\t%t0,%s0\n\trotxl.w\t%T0\n\tand.b\t#0x01,%t0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "shal.b\t%s0\n\tmov.b\t%t0,%s0\n\trotxl.b\t%s0\n\tsubx\t%t0,%t0";
+	      goto end;
+	    }
+	}
+      else if ((8 <= count && count <= 13)
+	       || (TARGET_H8300S && count == 14))
+	{
+	  info->remainder = count - 8;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%s0,%t0\n\tsub.b\t%s0,%s0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.b\t%t0,%s0\n\tsub.b\t%t0,%t0";
+		  info->shift1  = "shlr.b\t%s0";
+		  info->cc_inline = CC_SET_ZNV;
+		}
+	      else
+		{
+		  info->special = "mov.b\t%t0,%s0\n\textu.w\t%T0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.b\t%t0,%s0\n\tbld\t#7,%s0\n\tsubx\t%t0,%t0";
+		  info->shift1  = "shar.b\t%s0";
+		}
+	      else
+		{
+		  info->special = "mov.b\t%t0,%s0\n\texts.w\t%T0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    }
+	}
+      else if (count == 14)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300)
+		info->special = "mov.b\t%s0,%t0\n\trotr.b\t%t0\n\trotr.b\t%t0\n\tand.b\t#0xC0,%t0\n\tsub.b\t%s0,%s0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		info->special = "mov.b\t%t0,%s0\n\trotl.b\t%s0\n\trotl.b\t%s0\n\tand.b\t#3,%s0\n\tsub.b\t%t0,%t0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      if (TARGET_H8300)
+		info->special = "mov.b\t%t0,%s0\n\tshll.b\t%s0\n\tsubx.b\t%t0,%t0\n\tshll.b\t%s0\n\tmov.b\t%t0,%s0\n\tbst.b\t#0,%s0";
+	      else if (TARGET_H8300H)
+		{
+		  info->special = "shll.b\t%t0\n\tsubx.b\t%s0,%s0\n\tshll.b\t%t0\n\trotxl.b\t%s0\n\texts.w\t%T0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      else /* TARGET_H8300S */
+		gcc_unreachable ();
+	      goto end;
+	    }
+	}
+      else if (count == 15)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "bld\t#0,%s0\n\txor\t%s0,%s0\n\txor\t%t0,%t0\n\tbst\t#7,%t0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "bld\t#7,%t0\n\txor\t%s0,%s0\n\txor\t%t0,%t0\n\tbst\t#0,%s0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "shll\t%t0\n\tsubx\t%t0,%t0\n\tmov.b\t%t0,%s0";
+	      goto end;
+	    }
+	}
+      gcc_unreachable ();
+
+    case SIshift:
+      if (TARGET_H8300 && 8 <= count && count <= 9)
+	{
+	  info->remainder = count - 8;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%y0,%z0\n\tmov.b\t%x0,%y0\n\tmov.b\t%w0,%x0\n\tsub.b\t%w0,%w0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.b\t%x0,%w0\n\tmov.b\t%y0,%x0\n\tmov.b\t%z0,%y0\n\tsub.b\t%z0,%z0";
+	      info->shift1  = "shlr\t%y0\n\trotxr\t%x0\n\trotxr\t%w0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.b\t%x0,%w0\n\tmov.b\t%y0,%x0\n\tmov.b\t%z0,%y0\n\tshll\t%z0\n\tsubx\t%z0,%z0";
+	      goto end;
+	    }
+	}
+      else if (count == 8 && !TARGET_H8300)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.w\t%e0,%f4\n\tmov.b\t%s4,%t4\n\tmov.b\t%t0,%s4\n\tmov.b\t%s0,%t0\n\tsub.b\t%s0,%s0\n\tmov.w\t%f4,%e0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.w\t%e0,%f4\n\tmov.b\t%t0,%s0\n\tmov.b\t%s4,%t0\n\tmov.b\t%t4,%s4\n\textu.w\t%f4\n\tmov.w\t%f4,%e0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.w\t%e0,%f4\n\tmov.b\t%t0,%s0\n\tmov.b\t%s4,%t0\n\tmov.b\t%t4,%s4\n\texts.w\t%f4\n\tmov.w\t%f4,%e0";
+	      goto end;
+	    }
+	}
+      else if (count == 15 && TARGET_H8300)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      gcc_unreachable ();
+	    case SHIFT_LSHIFTRT:
+	      info->special = "bld\t#7,%z0\n\tmov.w\t%e0,%f0\n\txor\t%y0,%y0\n\txor\t%z0,%z0\n\trotxl\t%w0\n\trotxl\t%x0\n\trotxl\t%y0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "bld\t#7,%z0\n\tmov.w\t%e0,%f0\n\trotxl\t%w0\n\trotxl\t%x0\n\tsubx\t%y0,%y0\n\tsubx\t%z0,%z0";
+	      goto end;
+	    }
+	}
+      else if (count == 15 && !TARGET_H8300)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "shlr.w\t%e0\n\tmov.w\t%f0,%e0\n\txor.w\t%f0,%f0\n\trotxr.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "shll.w\t%f0\n\tmov.w\t%e0,%f0\n\txor.w\t%e0,%e0\n\trotxl.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if ((TARGET_H8300 && 16 <= count && count <= 20)
+	       || (TARGET_H8300H && 16 <= count && count <= 19)
+	       || (TARGET_H8300S && 16 <= count && count <= 21))
+	{
+	  info->remainder = count - 16;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.w\t%f0,%e0\n\tsub.w\t%f0,%f0";
+	      if (TARGET_H8300)
+		info->shift1 = "add.w\t%e0,%e0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.w\t%e0,%f0\n\tsub.w\t%e0,%e0";
+		  info->shift1  = "shlr\t%x0\n\trotxr\t%w0";
+		}
+	      else
+		{
+		  info->special = "mov.w\t%e0,%f0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.w\t%e0,%f0\n\tshll\t%z0\n\tsubx\t%z0,%z0\n\tmov.b\t%z0,%y0";
+		  info->shift1  = "shar\t%x0\n\trotxr\t%w0";
+		}
+	      else
+		{
+		  info->special = "mov.w\t%e0,%f0\n\texts.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    }
+	}
+      else if (TARGET_H8300 && 24 <= count && count <= 28)
+	{
+	  info->remainder = count - 24;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%w0,%z0\n\tsub.b\t%y0,%y0\n\tsub.w\t%f0,%f0";
+	      info->shift1  = "shll.b\t%z0";
+	      info->cc_inline = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.b\t%z0,%w0\n\tsub.b\t%x0,%x0\n\tsub.w\t%e0,%e0";
+	      info->shift1  = "shlr.b\t%w0";
+	      info->cc_inline = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.b\t%z0,%w0\n\tbld\t#7,%w0\n\tsubx\t%x0,%x0\n\tsubx\t%x0,%x0\n\tsubx\t%x0,%x0";
+	      info->shift1  = "shar.b\t%w0";
+	      info->cc_inline = CC_SET_ZNV;
+	      goto end;
+	    }
+	}
+      else if ((TARGET_H8300H && count == 24)
+	       || (TARGET_H8300S && 24 <= count && count <= 25))
+	{
+	  info->remainder = count - 24;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%s0,%t0\n\tsub.b\t%s0,%s0\n\tmov.w\t%f0,%e0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.w\t%e0,%f0\n\tmov.b\t%t0,%s0\n\textu.w\t%f0\n\textu.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.w\t%e0,%f0\n\tmov.b\t%t0,%s0\n\texts.w\t%f0\n\texts.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    }
+	}
+      else if (!TARGET_H8300 && count == 28)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      else
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\trotr.l\t#2,%S0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300H)
+		{
+		  info->special = "sub.w\t%f0,%f0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      else
+		info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\trotl.l\t#2,%S0\n\textu.l\t%S0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (!TARGET_H8300 && count == 29)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      else
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300H)
+		{
+		  info->special = "sub.w\t%f0,%f0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      else
+		{
+		  info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (!TARGET_H8300 && count == 30)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      else
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%f0,%f0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+	      else
+		info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\textu.l\t%S0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (count == 31)
+	{
+	  if (TARGET_H8300)
+	    {
+	      switch (shift_type)
+		{
+		case SHIFT_ASHIFT:
+		  info->special = "sub.w\t%e0,%e0\n\tshlr\t%w0\n\tmov.w\t%e0,%f0\n\trotxr\t%z0";
+		  goto end;
+		case SHIFT_LSHIFTRT:
+		  info->special = "sub.w\t%f0,%f0\n\tshll\t%z0\n\tmov.w\t%f0,%e0\n\trotxl\t%w0";
+		  goto end;
+		case SHIFT_ASHIFTRT:
+		  info->special = "shll\t%z0\n\tsubx\t%w0,%w0\n\tmov.b\t%w0,%x0\n\tmov.w\t%f0,%e0";
+		  goto end;
+		}
+	    }
+	  else
+	    {
+	      switch (shift_type)
+		{
+		case SHIFT_ASHIFT:
+		  info->special = "shlr.l\t%S0\n\txor.l\t%S0,%S0\n\trotxr.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		  goto end;
+		case SHIFT_LSHIFTRT:
+		  info->special = "shll.l\t%S0\n\txor.l\t%S0,%S0\n\trotxl.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		  goto end;
+		case SHIFT_ASHIFTRT:
+		  info->special = "shll\t%e0\n\tsubx\t%w0,%w0\n\texts.w\t%T0\n\texts.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		  goto end;
+		}
+	    }
+	}
+      gcc_unreachable ();
+
+    default:
+      gcc_unreachable ();
+    }
+
+ end:
+  if (!TARGET_H8300S)
+    info->shift2 = NULL;
+}
+
+/* Given COUNT and MODE of a shift, return 1 if a scratch reg may be
+   needed for some shift with COUNT and MODE.  Return 0 otherwise.  */
+
+int
+h8300_shift_needs_scratch_p (int count, enum machine_mode mode)
+{
+  enum h8_cpu cpu;
+  int a, lr, ar;
+
+  if (GET_MODE_BITSIZE (mode) <= count)
+    return 1;
+
+  /* Find out the target CPU.  */
+  if (TARGET_H8300)
+    cpu = H8_300;
+  else if (TARGET_H8300H)
+    cpu = H8_300H;
+  else
+    cpu = H8_S;
+
+  /* Find the shift algorithm.  */
+  switch (mode)
+    {
+    case QImode:
+      a  = shift_alg_qi[cpu][SHIFT_ASHIFT][count];
+      lr = shift_alg_qi[cpu][SHIFT_LSHIFTRT][count];
+      ar = shift_alg_qi[cpu][SHIFT_ASHIFTRT][count];
+      break;
+
+    case HImode:
+      a  = shift_alg_hi[cpu][SHIFT_ASHIFT][count];
+      lr = shift_alg_hi[cpu][SHIFT_LSHIFTRT][count];
+      ar = shift_alg_hi[cpu][SHIFT_ASHIFTRT][count];
+      break;
+
+    case SImode:
+      a  = shift_alg_si[cpu][SHIFT_ASHIFT][count];
+      lr = shift_alg_si[cpu][SHIFT_LSHIFTRT][count];
+      ar = shift_alg_si[cpu][SHIFT_ASHIFTRT][count];
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* On H8/300H, count == 8 uses a scratch register.  */
+  return (a == SHIFT_LOOP || lr == SHIFT_LOOP || ar == SHIFT_LOOP
+	  || (TARGET_H8300H && mode == SImode && count == 8));
+}
+
+/* Output the assembler code for doing shifts.  */
+
+const char *
+output_a_shift (rtx *operands)
+{
+  static int loopend_lab;
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  enum shift_type shift_type;
+  enum shift_mode shift_mode;
+  struct shift_info info;
+  int n;
+
+  loopend_lab++;
+
+  switch (mode)
+    {
+    case QImode:
+      shift_mode = QIshift;
+      break;
+    case HImode:
+      shift_mode = HIshift;
+      break;
+    case SImode:
+      shift_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ASHIFTRT:
+      shift_type = SHIFT_ASHIFTRT;
+      break;
+    case LSHIFTRT:
+      shift_type = SHIFT_LSHIFTRT;
+      break;
+    case ASHIFT:
+      shift_type = SHIFT_ASHIFT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* This case must be taken care of by one of the two splitters
+     that convert a variable shift into a loop.  */
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+  
+  n = INTVAL (operands[2]);
+
+  /* If the count is negative, make it 0.  */
+  if (n < 0)
+    n = 0;
+  /* If the count is too big, truncate it.
+     ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+     do the intuitive thing.  */
+  else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
+    n = GET_MODE_BITSIZE (mode);
+
+  get_shift_alg (shift_type, shift_mode, n, &info);
+  
+  switch (info.alg)
+    {
+    case SHIFT_SPECIAL:
+      output_asm_insn (info.special, operands);
+      /* Fall through.  */
+
+    case SHIFT_INLINE:
+      n = info.remainder;
+
+      /* Emit two bit shifts first.  */
+      if (info.shift2 != NULL)
+	{
+	  for (; n > 1; n -= 2)
+	    output_asm_insn (info.shift2, operands);
+	}
+
+      /* Now emit one bit shifts for any residual.  */
+      for (; n > 0; n--)
+	output_asm_insn (info.shift1, operands);
+      return "";
+      
+    case SHIFT_ROT_AND:
+      {
+	int m = GET_MODE_BITSIZE (mode) - n;
+	const int mask = (shift_type == SHIFT_ASHIFT
+			  ? ((1 << m) - 1) << n
+			  : (1 << m) - 1);
+	char insn_buf[200];
+
+	/* Not all possibilities of rotate are supported.  They shouldn't
+	   be generated, but let's watch for 'em.  */
+	gcc_assert (info.shift1);
+	
+	/* Emit two bit rotates first.  */
+	if (info.shift2 != NULL)
+	  {
+	    for (; m > 1; m -= 2)
+	      output_asm_insn (info.shift2, operands);
+	  }
+	
+	/* Now single bit rotates for any residual.  */
+	for (; m > 0; m--)
+	  output_asm_insn (info.shift1, operands);
+	
+	/* Now mask off the high bits.  */
+	switch (mode)
+	  {
+	  case QImode:
+	    sprintf (insn_buf, "and\t#%d,%%X0", mask);
+	    break;
+
+	  case HImode:
+	    gcc_assert (TARGET_H8300H || TARGET_H8300S);
+	    sprintf (insn_buf, "and.w\t#%d,%%T0", mask);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	output_asm_insn (insn_buf, operands);
+	return "";
+      }
+
+    case SHIFT_LOOP:
+      /* A loop to shift by a "large" constant value.
+	 If we have shift-by-2 insns, use them.  */
+      if (info.shift2 != NULL)
+	{
+	  fprintf (asm_out_file, "\tmov.b	#%d,%sl\n", n / 2,
+		   names_big[REGNO (operands[4])]);
+	  fprintf (asm_out_file, ".Llt%d:\n", loopend_lab);
+	  output_asm_insn (info.shift2, operands);
+	  output_asm_insn ("add	#0xff,%X4", operands);
+	  fprintf (asm_out_file, "\tbne	.Llt%d\n", loopend_lab);
+	  if (n % 2)
+	    output_asm_insn (info.shift1, operands);
+	}
+      else
+	{
+	  fprintf (asm_out_file, "\tmov.b	#%d,%sl\n", n,
+		   names_big[REGNO (operands[4])]);
+	  fprintf (asm_out_file, ".Llt%d:\n", loopend_lab);
+	  output_asm_insn (info.shift1, operands);
+	  output_asm_insn ("add	#0xff,%X4", operands);
+	  fprintf (asm_out_file, "\tbne	.Llt%d\n", loopend_lab);
+	}
+      return "";
+      
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Count the number of assembly instructions in a string TEMPL.  */
+
+static unsigned int
+h8300_asm_insn_count (const char *templ)
+{
+  unsigned int count = 1;
+
+  for (; *templ; templ++)
+    if (*templ == '\n')
+      count++;
+
+  return count;
+}
+
+/* Compute the length of a shift insn.  */
+
+unsigned int
+compute_a_shift_length (rtx insn ATTRIBUTE_UNUSED, rtx *operands)
+{
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  enum shift_type shift_type;
+  enum shift_mode shift_mode;
+  struct shift_info info;
+  unsigned int wlength = 0;
+
+  switch (mode)
+    {
+    case QImode:
+      shift_mode = QIshift;
+      break;
+    case HImode:
+      shift_mode = HIshift;
+      break;
+    case SImode:
+      shift_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ASHIFTRT:
+      shift_type = SHIFT_ASHIFTRT;
+      break;
+    case LSHIFTRT:
+      shift_type = SHIFT_LSHIFTRT;
+      break;
+    case ASHIFT:
+      shift_type = SHIFT_ASHIFT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      /* Get the assembler code to do one shift.  */
+      get_shift_alg (shift_type, shift_mode, 1, &info);
+
+      return (4 + h8300_asm_insn_count (info.shift1)) * 2;
+    }
+  else
+    {
+      int n = INTVAL (operands[2]);
+
+      /* If the count is negative, make it 0.  */
+      if (n < 0)
+	n = 0;
+      /* If the count is too big, truncate it.
+         ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+	 do the intuitive thing.  */
+      else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
+	n = GET_MODE_BITSIZE (mode);
+
+      get_shift_alg (shift_type, shift_mode, n, &info);
+
+      switch (info.alg)
+	{
+	case SHIFT_SPECIAL:
+	  wlength += h8300_asm_insn_count (info.special);
+
+	  /* Every assembly instruction used in SHIFT_SPECIAL case
+	     takes 2 bytes except xor.l, which takes 4 bytes, so if we
+	     see xor.l, we just pretend that xor.l counts as two insns
+	     so that the insn length will be computed correctly.  */
+	  if (strstr (info.special, "xor.l") != NULL)
+	    wlength++;
+
+	  /* Fall through.  */
+
+	case SHIFT_INLINE:
+	  n = info.remainder;
+
+	  if (info.shift2 != NULL)
+	    {
+	      wlength += h8300_asm_insn_count (info.shift2) * (n / 2);
+	      n = n % 2;
+	    }
+
+	  wlength += h8300_asm_insn_count (info.shift1) * n;
+
+	  return 2 * wlength;
+
+	case SHIFT_ROT_AND:
+	  {
+	    int m = GET_MODE_BITSIZE (mode) - n;
+
+	    /* Not all possibilities of rotate are supported.  They shouldn't
+	       be generated, but let's watch for 'em.  */
+	    gcc_assert (info.shift1);
+
+	    if (info.shift2 != NULL)
+	      {
+		wlength += h8300_asm_insn_count (info.shift2) * (m / 2);
+		m = m % 2;
+	      }
+
+	    wlength += h8300_asm_insn_count (info.shift1) * m;
+
+	    /* Now mask off the high bits.  */
+	    switch (mode)
+	      {
+	      case QImode:
+		wlength += 1;
+		break;
+	      case HImode:
+		wlength += 2;
+		break;
+	      case SImode:
+		gcc_assert (!TARGET_H8300);
+		wlength += 3;
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	    return 2 * wlength;
+	  }
+
+	case SHIFT_LOOP:
+	  /* A loop to shift by a "large" constant value.
+	     If we have shift-by-2 insns, use them.  */
+	  if (info.shift2 != NULL)
+	    {
+	      wlength += 3 + h8300_asm_insn_count (info.shift2);
+	      if (n % 2)
+		wlength += h8300_asm_insn_count (info.shift1);
+	    }
+	  else
+	    {
+	      wlength += 3 + h8300_asm_insn_count (info.shift1);
+	    }
+	  return 2 * wlength;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}
+
+/* Compute which flag bits are valid after a shift insn.  */
+
+enum attr_cc
+compute_a_shift_cc (rtx insn ATTRIBUTE_UNUSED, rtx *operands)
+{
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  enum shift_type shift_type;
+  enum shift_mode shift_mode;
+  struct shift_info info;
+  int n;
+  
+  switch (mode)
+    {
+    case QImode:
+      shift_mode = QIshift;
+      break;
+    case HImode:
+      shift_mode = HIshift;
+      break;
+    case SImode:
+      shift_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ASHIFTRT:
+      shift_type = SHIFT_ASHIFTRT;
+      break;
+    case LSHIFTRT:
+      shift_type = SHIFT_LSHIFTRT;
+      break;
+    case ASHIFT:
+      shift_type = SHIFT_ASHIFT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* This case must be taken care of by one of the two splitters
+     that convert a variable shift into a loop.  */
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+  
+  n = INTVAL (operands[2]);
+
+  /* If the count is negative, make it 0.  */
+  if (n < 0)
+    n = 0;
+  /* If the count is too big, truncate it.
+     ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+     do the intuitive thing.  */
+  else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
+    n = GET_MODE_BITSIZE (mode);
+  
+  get_shift_alg (shift_type, shift_mode, n, &info);
+  
+  switch (info.alg)
+    {
+    case SHIFT_SPECIAL:
+      if (info.remainder == 0)
+	return info.cc_special;
+
+      /* Fall through.  */
+
+    case SHIFT_INLINE:
+      return info.cc_inline;
+      
+    case SHIFT_ROT_AND:
+      /* This case always ends with an and instruction.  */
+      return CC_SET_ZNV;
+      
+    case SHIFT_LOOP:
+      /* A loop to shift by a "large" constant value.
+	 If we have shift-by-2 insns, use them.  */
+      if (info.shift2 != NULL)
+	{
+	  if (n % 2)
+	    return info.cc_inline;
+	}
+      return CC_CLOBBER;
+      
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A rotation by a non-constant will cause a loop to be generated, in
+   which a rotation by one bit is used.  A rotation by a constant,
+   including the one in the loop, will be taken care of by
+   output_a_rotate () at the insn emit time.  */
+
+int
+expand_a_rotate (rtx operands[])
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx rotate_amount = operands[2];
+  enum machine_mode mode = GET_MODE (dst);
+
+  if (h8sx_classify_shift (mode, ROTATE, rotate_amount) == H8SX_SHIFT_UNARY)
+    return false;
+
+  /* We rotate in place.  */
+  emit_move_insn (dst, src);
+
+  if (GET_CODE (rotate_amount) != CONST_INT)
+    {
+      rtx counter = gen_reg_rtx (QImode);
+      rtx start_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+
+      /* If the rotate amount is less than or equal to 0,
+	 we go out of the loop.  */
+      emit_cmp_and_jump_insns (rotate_amount, const0_rtx, LE, NULL_RTX,
+			       QImode, 0, end_label);
+
+      /* Initialize the loop counter.  */
+      emit_move_insn (counter, rotate_amount);
+
+      emit_label (start_label);
+
+      /* Rotate by one bit.  */
+      switch (mode)
+	{
+	case QImode:
+	  emit_insn (gen_rotlqi3_1 (dst, dst, const1_rtx));
+	  break;
+	case HImode:
+	  emit_insn (gen_rotlhi3_1 (dst, dst, const1_rtx));
+	  break;
+	case SImode:
+	  emit_insn (gen_rotlsi3_1 (dst, dst, const1_rtx));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Decrement the counter by 1.  */
+      emit_insn (gen_addqi3 (counter, counter, constm1_rtx));
+
+      /* If the loop counter is nonzero, we go back to the beginning
+	 of the loop.  */
+      emit_cmp_and_jump_insns (counter, const0_rtx, NE, NULL_RTX, QImode, 1,
+			       start_label);
+
+      emit_label (end_label);
+    }
+  else
+    {
+      /* Rotate by AMOUNT bits.  */
+      switch (mode)
+	{
+	case QImode:
+	  emit_insn (gen_rotlqi3_1 (dst, dst, rotate_amount));
+	  break;
+	case HImode:
+	  emit_insn (gen_rotlhi3_1 (dst, dst, rotate_amount));
+	  break;
+	case SImode:
+	  emit_insn (gen_rotlsi3_1 (dst, dst, rotate_amount));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  return 1;
+}
+
+/* Output a rotate insn.  */
+
+const char *
+output_a_rotate (enum rtx_code code, rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx rotate_amount = operands[2];
+  enum shift_mode rotate_mode;
+  enum shift_type rotate_type;
+  const char *insn_buf;
+  int bits;
+  int amount;
+  enum machine_mode mode = GET_MODE (dst);
+
+  gcc_assert (GET_CODE (rotate_amount) == CONST_INT);
+
+  switch (mode)
+    {
+    case QImode:
+      rotate_mode = QIshift;
+      break;
+    case HImode:
+      rotate_mode = HIshift;
+      break;
+    case SImode:
+      rotate_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ROTATERT:
+      rotate_type = SHIFT_ASHIFT;
+      break;
+    case ROTATE:
+      rotate_type = SHIFT_LSHIFTRT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  amount = INTVAL (rotate_amount);
+
+  /* Clean up AMOUNT.  */
+  if (amount < 0)
+    amount = 0;
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode))
+    amount = GET_MODE_BITSIZE (mode);
+
+  /* Determine the faster direction.  After this phase, amount will be
+     at most a half of GET_MODE_BITSIZE (mode).  */
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode) / (unsigned) 2)
+    {
+      /* Flip the direction.  */
+      amount = GET_MODE_BITSIZE (mode) - amount;
+      rotate_type =
+	(rotate_type == SHIFT_ASHIFT) ? SHIFT_LSHIFTRT : SHIFT_ASHIFT;
+    }
+
+  /* See if a byte swap (in HImode) or a word swap (in SImode) can
+     boost up the rotation.  */
+  if ((mode == HImode && TARGET_H8300 && amount >= 5)
+      || (mode == HImode && TARGET_H8300H && amount >= 6)
+      || (mode == HImode && TARGET_H8300S && amount == 8)
+      || (mode == SImode && TARGET_H8300H && amount >= 10)
+      || (mode == SImode && TARGET_H8300S && amount >= 13))
+    {
+      switch (mode)
+	{
+	case HImode:
+	  /* This code works on any family.  */
+	  insn_buf = "xor.b\t%s0,%t0\n\txor.b\t%t0,%s0\n\txor.b\t%s0,%t0";
+	  output_asm_insn (insn_buf, operands);
+	  break;
+
+	case SImode:
+	  /* This code works on the H8/300H and H8S.  */
+	  insn_buf = "xor.w\t%e0,%f0\n\txor.w\t%f0,%e0\n\txor.w\t%e0,%f0";
+	  output_asm_insn (insn_buf, operands);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Adjust AMOUNT and flip the direction.  */
+      amount = GET_MODE_BITSIZE (mode) / 2 - amount;
+      rotate_type =
+	(rotate_type == SHIFT_ASHIFT) ? SHIFT_LSHIFTRT : SHIFT_ASHIFT;
+    }
+
+  /* Output rotate insns.  */
+  for (bits = TARGET_H8300S ? 2 : 1; bits > 0; bits /= 2)
+    {
+      if (bits == 2)
+	insn_buf = rotate_two[rotate_type][rotate_mode];
+      else
+	insn_buf = rotate_one[cpu_type][rotate_type][rotate_mode];
+
+      for (; amount >= bits; amount -= bits)
+	output_asm_insn (insn_buf, operands);
+    }
+
+  return "";
+}
+
+/* Compute the length of a rotate insn.  */
+
+unsigned int
+compute_a_rotate_length (rtx *operands)
+{
+  rtx src = operands[1];
+  rtx amount_rtx = operands[2];
+  enum machine_mode mode = GET_MODE (src);
+  int amount;
+  unsigned int length = 0;
+
+  gcc_assert (GET_CODE (amount_rtx) == CONST_INT);
+
+  amount = INTVAL (amount_rtx);
+
+  /* Clean up AMOUNT.  */
+  if (amount < 0)
+    amount = 0;
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode))
+    amount = GET_MODE_BITSIZE (mode);
+
+  /* Determine the faster direction.  After this phase, amount
+     will be at most a half of GET_MODE_BITSIZE (mode).  */
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode) / (unsigned) 2)
+    /* Flip the direction.  */
+    amount = GET_MODE_BITSIZE (mode) - amount;
+
+  /* See if a byte swap (in HImode) or a word swap (in SImode) can
+     boost up the rotation.  */
+  if ((mode == HImode && TARGET_H8300 && amount >= 5)
+      || (mode == HImode && TARGET_H8300H && amount >= 6)
+      || (mode == HImode && TARGET_H8300S && amount == 8)
+      || (mode == SImode && TARGET_H8300H && amount >= 10)
+      || (mode == SImode && TARGET_H8300S && amount >= 13))
+    {
+      /* Adjust AMOUNT and flip the direction.  */
+      amount = GET_MODE_BITSIZE (mode) / 2 - amount;
+      length += 6;
+    }
+
+  /* We use 2-bit rotations on the H8S.  */
+  if (TARGET_H8300S)
+    amount = amount / 2 + amount % 2;
+
+  /* The H8/300 uses three insns to rotate one bit, taking 6
+     length.  */
+  length += amount * ((TARGET_H8300 && mode == HImode) ? 6 : 2);
+
+  return length;
+}
+
+/* Fix the operands of a gen_xxx so that it could become a bit
+   operating insn.  */
+
+int
+fix_bit_operand (rtx *operands, enum rtx_code code)
+{
+  /* The bit_operand predicate accepts any memory during RTL generation, but
+     only 'U' memory afterwards, so if this is a MEM operand, we must force
+     it to be valid for 'U' by reloading the address.  */
+
+  if (code == AND
+      ? single_zero_operand (operands[2], QImode)
+      : single_one_operand (operands[2], QImode))
+    {
+      /* OK to have a memory dest.  */
+      if (GET_CODE (operands[0]) == MEM
+	  && !satisfies_constraint_U (operands[0]))
+	{
+	  rtx mem = gen_rtx_MEM (GET_MODE (operands[0]),
+				 copy_to_mode_reg (Pmode,
+						   XEXP (operands[0], 0)));
+	  MEM_COPY_ATTRIBUTES (mem, operands[0]);
+	  operands[0] = mem;
+	}
+
+      if (GET_CODE (operands[1]) == MEM
+	  && !satisfies_constraint_U (operands[1]))
+	{
+	  rtx mem = gen_rtx_MEM (GET_MODE (operands[1]),
+				 copy_to_mode_reg (Pmode,
+						   XEXP (operands[1], 0)));
+	  MEM_COPY_ATTRIBUTES (mem, operands[0]);
+	  operands[1] = mem;
+	}
+      return 0;
+    }
+
+  /* Dest and src op must be register.  */
+
+  operands[1] = force_reg (QImode, operands[1]);
+  {
+    rtx res = gen_reg_rtx (QImode);
+    switch (code)
+      {
+      case AND:
+	emit_insn (gen_andqi3_1 (res, operands[1], operands[2]));
+	break;
+      case IOR:
+	emit_insn (gen_iorqi3_1 (res, operands[1], operands[2]));
+	break;
+      case XOR:
+	emit_insn (gen_xorqi3_1 (res, operands[1], operands[2]));
+	break;
+      default:
+	gcc_unreachable ();
+      }
+    emit_insn (gen_movqi (operands[0], res));
+  }
+  return 1;
+}
+
+/* Return nonzero if FUNC is an interrupt function as specified
+   by the "interrupt" attribute.  */
+
+static int
+h8300_interrupt_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a saveall function as specified by the
+   "saveall" attribute.  */
+
+static int
+h8300_saveall_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("saveall", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is an OS_Task function as specified
+   by the "OS_Task" attribute.  */
+
+static int
+h8300_os_task_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("OS_Task", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a monitor function as specified
+   by the "monitor" attribute.  */
+
+static int
+h8300_monitor_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("monitor", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a function that should be called
+   through the function vector.  */
+
+int
+h8300_funcvec_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("function_vector", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if DECL is a variable that's in the eight bit
+   data area.  */
+
+int
+h8300_eightbit_data_p (tree decl)
+{
+  tree a;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return 0;
+
+  a = lookup_attribute ("eightbit_data", DECL_ATTRIBUTES (decl));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if DECL is a variable that's in the tiny
+   data area.  */
+
+int
+h8300_tiny_data_p (tree decl)
+{
+  tree a;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return 0;
+
+  a = lookup_attribute ("tiny_data", DECL_ATTRIBUTES (decl));
+  return a != NULL_TREE;
+}
+
+/* Generate an 'interrupt_handler' attribute for decls.  We convert
+   all the pragmas to corresponding attributes.  */
+
+static void
+h8300_insert_attributes (tree node, tree *attributes)
+{
+  if (TREE_CODE (node) == FUNCTION_DECL)
+    {
+      if (pragma_interrupt)
+	{
+	  pragma_interrupt = 0;
+
+	  /* Add an 'interrupt_handler' attribute.  */
+	  *attributes = tree_cons (get_identifier ("interrupt_handler"),
+				   NULL, *attributes);
+	}
+
+      if (pragma_saveall)
+	{
+	  pragma_saveall = 0;
+
+	  /* Add an 'saveall' attribute.  */
+	  *attributes = tree_cons (get_identifier ("saveall"),
+				   NULL, *attributes);
+	}
+    }
+}
+
+/* Supported attributes:
+
+   interrupt_handler: output a prologue and epilogue suitable for an
+   interrupt handler.
+
+   saveall: output a prologue and epilogue that saves and restores
+   all registers except the stack pointer.
+
+   function_vector: This function should be called through the
+   function vector.
+
+   eightbit_data: This variable lives in the 8-bit data area and can
+   be referenced with 8-bit absolute memory addresses.
+
+   tiny_data: This variable lives in the tiny data area and can be
+   referenced with 16-bit absolute memory references.  */
+
+static const struct attribute_spec h8300_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt_handler", 0, 0, true,  false, false,
+    h8300_handle_fndecl_attribute, false },
+  { "saveall",           0, 0, true,  false, false,
+    h8300_handle_fndecl_attribute, false },
+  { "OS_Task",           0, 0, true,  false, false,
+    h8300_handle_fndecl_attribute, false },
+  { "monitor",           0, 0, true,  false, false,
+    h8300_handle_fndecl_attribute, false },
+  { "function_vector",   0, 0, true,  false, false,
+    h8300_handle_fndecl_attribute, false },
+  { "eightbit_data",     0, 0, true,  false, false,
+    h8300_handle_eightbit_data_attribute, false },
+  { "tiny_data",         0, 0, true,  false, false,
+    h8300_handle_tiny_data_attribute, false },
+  { NULL,                0, 0, false, false, false, NULL, false }
+};
+
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+h8300_handle_fndecl_attribute (tree *node, tree name,
+			       tree args ATTRIBUTE_UNUSED,
+			       int flags ATTRIBUTE_UNUSED,
+			       bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "eightbit_data" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+h8300_handle_eightbit_data_attribute (tree *node, tree name,
+				      tree args ATTRIBUTE_UNUSED,
+				      int flags ATTRIBUTE_UNUSED,
+				      bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+    {
+      DECL_SECTION_NAME (decl) = build_string (7, ".eight");
+    }
+  else
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "tiny_data" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+h8300_handle_tiny_data_attribute (tree *node, tree name,
+				  tree args ATTRIBUTE_UNUSED,
+				  int flags ATTRIBUTE_UNUSED,
+				  bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+    {
+      DECL_SECTION_NAME (decl) = build_string (6, ".tiny");
+    }
+  else
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Mark function vectors, and various small data objects.  */
+
+static void
+h8300_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int extra_flags = 0;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && h8300_funcvec_function_p (decl))
+    extra_flags = SYMBOL_FLAG_FUNCVEC_FUNCTION;
+  else if (TREE_CODE (decl) == VAR_DECL
+	   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    {
+      if (h8300_eightbit_data_p (decl))
+	extra_flags = SYMBOL_FLAG_EIGHTBIT_DATA;
+      else if (first && h8300_tiny_data_p (decl))
+	extra_flags = SYMBOL_FLAG_TINY_DATA;
+    }
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= extra_flags;
+}
+
+/* Output a single-bit extraction.  */
+
+const char *
+output_simode_bld (int bild, rtx operands[])
+{
+  if (TARGET_H8300)
+    {
+      /* Clear the destination register.  */
+      output_asm_insn ("sub.w\t%e0,%e0\n\tsub.w\t%f0,%f0", operands);
+
+      /* Now output the bit load or bit inverse load, and store it in
+	 the destination.  */
+      if (bild)
+	output_asm_insn ("bild\t%Z2,%Y1", operands);
+      else
+	output_asm_insn ("bld\t%Z2,%Y1", operands);
+
+      output_asm_insn ("bst\t#0,%w0", operands);
+    }
+  else
+    {
+      /* Determine if we can clear the destination first.  */
+      int clear_first = (REG_P (operands[0]) && REG_P (operands[1])
+			 && REGNO (operands[0]) != REGNO (operands[1]));
+
+      if (clear_first)
+	output_asm_insn ("sub.l\t%S0,%S0", operands);
+
+      /* Output the bit load or bit inverse load.  */
+      if (bild)
+	output_asm_insn ("bild\t%Z2,%Y1", operands);
+      else
+	output_asm_insn ("bld\t%Z2,%Y1", operands);
+
+      if (!clear_first)
+	output_asm_insn ("xor.l\t%S0,%S0", operands);
+
+      /* Perform the bit store.  */
+      output_asm_insn ("rotxl.l\t%S0", operands);
+    }
+
+  /* All done.  */
+  return "";
+}
+
+/* Delayed-branch scheduling is more effective if we have some idea
+   how long each instruction will be.  Use a shorten_branches pass
+   to get an initial estimate.  */
+
+static void
+h8300_reorg (void)
+{
+  if (flag_delayed_branch)
+    shorten_branches (get_insns ());
+}
+
+#ifndef OBJECT_FORMAT_ELF
+static void
+h8300_asm_named_section (const char *name, unsigned int flags ATTRIBUTE_UNUSED,
+			 tree decl)
+{
+  /* ??? Perhaps we should be using default_coff_asm_named_section.  */
+  fprintf (asm_out_file, "\t.section %s\n", name);
+}
+#endif /* ! OBJECT_FORMAT_ELF */
+
+/* Nonzero if X is a constant address suitable as an 8-bit absolute,
+   which is a special case of the 'R' operand.  */
+
+int
+h8300_eightbit_constant_address_p (rtx x)
+{
+  /* The ranges of the 8-bit area.  */
+  const unsigned HOST_WIDE_INT n1 = trunc_int_for_mode (0xff00, HImode);
+  const unsigned HOST_WIDE_INT n2 = trunc_int_for_mode (0xffff, HImode);
+  const unsigned HOST_WIDE_INT h1 = trunc_int_for_mode (0x00ffff00, SImode);
+  const unsigned HOST_WIDE_INT h2 = trunc_int_for_mode (0x00ffffff, SImode);
+  const unsigned HOST_WIDE_INT s1 = trunc_int_for_mode (0xffffff00, SImode);
+  const unsigned HOST_WIDE_INT s2 = trunc_int_for_mode (0xffffffff, SImode);
+
+  unsigned HOST_WIDE_INT addr;
+
+  /* We accept symbols declared with eightbit_data.  */
+  if (GET_CODE (x) == SYMBOL_REF)
+    return (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_EIGHTBIT_DATA) != 0;
+
+  if (GET_CODE (x) != CONST_INT)
+    return 0;
+
+  addr = INTVAL (x);
+
+  return (0
+	  || ((TARGET_H8300 || TARGET_NORMAL_MODE) && IN_RANGE (addr, n1, n2))
+	  || (TARGET_H8300H && IN_RANGE (addr, h1, h2))
+	  || (TARGET_H8300S && IN_RANGE (addr, s1, s2)));
+}
+
+/* Nonzero if X is a constant address suitable as an 16-bit absolute
+   on H8/300H and H8S.  */
+
+int
+h8300_tiny_constant_address_p (rtx x)
+{
+  /* The ranges of the 16-bit area.  */
+  const unsigned HOST_WIDE_INT h1 = trunc_int_for_mode (0x00000000, SImode);
+  const unsigned HOST_WIDE_INT h2 = trunc_int_for_mode (0x00007fff, SImode);
+  const unsigned HOST_WIDE_INT h3 = trunc_int_for_mode (0x00ff8000, SImode);
+  const unsigned HOST_WIDE_INT h4 = trunc_int_for_mode (0x00ffffff, SImode);
+  const unsigned HOST_WIDE_INT s1 = trunc_int_for_mode (0x00000000, SImode);
+  const unsigned HOST_WIDE_INT s2 = trunc_int_for_mode (0x00007fff, SImode);
+  const unsigned HOST_WIDE_INT s3 = trunc_int_for_mode (0xffff8000, SImode);
+  const unsigned HOST_WIDE_INT s4 = trunc_int_for_mode (0xffffffff, SImode);
+
+  unsigned HOST_WIDE_INT addr;
+
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      /* In the normal mode, any symbol fits in the 16-bit absolute
+	 address range.  We also accept symbols declared with
+	 tiny_data.  */
+      return (TARGET_NORMAL_MODE
+	      || (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_TINY_DATA) != 0);
+
+    case CONST_INT:
+      addr = INTVAL (x);
+      return (TARGET_NORMAL_MODE
+	      || (TARGET_H8300H
+		  && (IN_RANGE (addr, h1, h2) || IN_RANGE (addr, h3, h4)))
+	      || (TARGET_H8300S
+		  && (IN_RANGE (addr, s1, s2) || IN_RANGE (addr, s3, s4))));
+
+    case CONST:
+      return TARGET_NORMAL_MODE;
+
+    default:
+      return 0;
+    }
+
+}
+
+/* Return nonzero if ADDR1 and ADDR2 point to consecutive memory
+   locations that can be accessed as a 16-bit word.  */
+
+int
+byte_accesses_mergeable_p (rtx addr1, rtx addr2)
+{
+  HOST_WIDE_INT offset1, offset2;
+  rtx reg1, reg2;
+
+  if (REG_P (addr1))
+    {
+      reg1 = addr1;
+      offset1 = 0;
+    }
+  else if (GET_CODE (addr1) == PLUS
+	   && REG_P (XEXP (addr1, 0))
+	   && GET_CODE (XEXP (addr1, 1)) == CONST_INT)
+    {
+      reg1 = XEXP (addr1, 0);
+      offset1 = INTVAL (XEXP (addr1, 1));
+    }
+  else
+    return 0;
+
+  if (REG_P (addr2))
+    {
+      reg2 = addr2;
+      offset2 = 0;
+    }
+  else if (GET_CODE (addr2) == PLUS
+	   && REG_P (XEXP (addr2, 0))
+	   && GET_CODE (XEXP (addr2, 1)) == CONST_INT)
+    {
+      reg2 = XEXP (addr2, 0);
+      offset2 = INTVAL (XEXP (addr2, 1));
+    }
+  else
+    return 0;
+
+  if (((reg1 == stack_pointer_rtx && reg2 == stack_pointer_rtx)
+       || (reg1 == frame_pointer_rtx && reg2 == frame_pointer_rtx))
+      && offset1 % 2 == 0
+      && offset1 + 1 == offset2)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if we have the same comparison insn as I3 two insns
+   before I3.  I3 is assumed to be a comparison insn.  */
+
+int
+same_cmp_preceding_p (rtx i3)
+{
+  rtx i1, i2;
+
+  /* Make sure we have a sequence of three insns.  */
+  i2 = prev_nonnote_insn (i3);
+  if (i2 == NULL_RTX)
+    return 0;
+  i1 = prev_nonnote_insn (i2);
+  if (i1 == NULL_RTX)
+    return 0;
+
+  return (INSN_P (i1) && rtx_equal_p (PATTERN (i1), PATTERN (i3))
+	  && any_condjump_p (i2) && onlyjump_p (i2));
+}
+
+/* Return nonzero if we have the same comparison insn as I1 two insns
+   after I1.  I1 is assumed to be a comparison insn.  */
+
+int
+same_cmp_following_p (rtx i1)
+{
+  rtx i2, i3;
+
+  /* Make sure we have a sequence of three insns.  */
+  i2 = next_nonnote_insn (i1);
+  if (i2 == NULL_RTX)
+    return 0;
+  i3 = next_nonnote_insn (i2);
+  if (i3 == NULL_RTX)
+    return 0;
+
+  return (INSN_P (i3) && rtx_equal_p (PATTERN (i1), PATTERN (i3))
+	  && any_condjump_p (i2) && onlyjump_p (i2));
+}
+
+/* Return nonzero if OPERANDS are valid for stm (or ldm) that pushes
+   (or pops) N registers.  OPERANDS are assumed to be an array of
+   registers.  */
+
+int
+h8300_regs_ok_for_stm (int n, rtx operands[])
+{
+  switch (n)
+    {
+    case 2:
+      return ((REGNO (operands[0]) == 0 && REGNO (operands[1]) == 1)
+	      || (REGNO (operands[0]) == 2 && REGNO (operands[1]) == 3)
+	      || (REGNO (operands[0]) == 4 && REGNO (operands[1]) == 5));
+    case 3:
+      return ((REGNO (operands[0]) == 0
+	       && REGNO (operands[1]) == 1
+	       && REGNO (operands[2]) == 2)
+	      || (REGNO (operands[0]) == 4
+		  && REGNO (operands[1]) == 5
+		  && REGNO (operands[2]) == 6));
+
+    case 4:
+      return (REGNO (operands[0]) == 0
+	      && REGNO (operands[1]) == 1
+	      && REGNO (operands[2]) == 2
+	      && REGNO (operands[3]) == 3);
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+int
+h8300_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			    unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if (h8300_current_function_interrupt_function_p ()
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Returns true if register REGNO is safe to be allocated as a scratch
+   register in the current function.  */
+
+static bool
+h8300_hard_regno_scratch_ok (unsigned int regno)
+{
+  if (h8300_current_function_interrupt_function_p ()
+      && ! WORD_REG_USED (regno))
+    return false;
+
+  return true;
+}
+
+
+/* Return nonzero if X is a REG or SUBREG suitable as a base register.  */
+
+static int
+h8300_rtx_ok_for_base_p (rtx x, int strict)
+{
+  /* Strip off SUBREG if any.  */
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && (strict
+	      ? REG_OK_FOR_BASE_STRICT_P (x)
+	      : REG_OK_FOR_BASE_NONSTRICT_P (x)));
+}
+
+/* Return nozero if X is a legitimate address.  On the H8/300, a
+   legitimate address has the form REG, REG+CONSTANT_ADDRESS or
+   CONSTANT_ADDRESS.  */
+
+static bool
+h8300_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  /* The register indirect addresses like @er0 is always valid.  */
+  if (h8300_rtx_ok_for_base_p (x, strict))
+    return 1;
+
+  if (CONSTANT_ADDRESS_P (x))
+    return 1;
+
+  if (TARGET_H8300SX
+      && (   GET_CODE (x) == PRE_INC
+	  || GET_CODE (x) == PRE_DEC
+	  || GET_CODE (x) == POST_INC
+	  || GET_CODE (x) == POST_DEC)
+      && h8300_rtx_ok_for_base_p (XEXP (x, 0), strict))
+    return 1;
+
+  if (GET_CODE (x) == PLUS
+      && CONSTANT_ADDRESS_P (XEXP (x, 1))
+      && h8300_rtx_ok_for_base_p (h8300_get_index (XEXP (x, 0),
+						   mode, 0), strict))
+    return 1;
+
+  return 0;
+}
+
+/* Worker function for HARD_REGNO_NREGS.
+
+   We pretend the MAC register is 32bits -- we don't have any data
+   types on the H8 series to handle more than 32bits.  */
+
+int
+h8300_hard_regno_nregs (int regno ATTRIBUTE_UNUSED, enum machine_mode mode)
+{
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Worker function for HARD_REGNO_MODE_OK.  */
+
+int
+h8300_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  if (TARGET_H8300)
+    /* If an even reg, then anything goes.  Otherwise the mode must be
+       QI or HI.  */
+    return ((regno & 1) == 0) || (mode == HImode) || (mode == QImode);
+  else
+    /* MAC register can only be of SImode.  Otherwise, anything
+       goes.  */
+    return regno == MAC_REG ? mode == SImode : 1;
+}
+
+/* Helper function for the move patterns.  Make sure a move is legitimate.  */
+
+bool
+h8300_move_ok (rtx dest, rtx src)
+{
+  rtx addr, other;
+
+  /* Validate that at least one operand is a register.  */
+  if (MEM_P (dest))
+    {
+      if (MEM_P (src) || CONSTANT_P (src))
+	return false;
+      addr = XEXP (dest, 0);
+      other = src;
+    }
+  else if (MEM_P (src))
+    {
+      addr = XEXP (src, 0);
+      other = dest;
+    }
+  else
+    return true;
+
+  /* Validate that auto-inc doesn't affect OTHER.  */
+  if (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC)
+    return true;
+  addr = XEXP (addr, 0);
+
+  if (addr == stack_pointer_rtx)
+    return register_no_sp_elim_operand (other, VOIDmode);
+  else
+    return !reg_overlap_mentioned_p(other, addr);
+}
+
+/* Perform target dependent optabs initialization.  */
+static void
+h8300_init_libfuncs (void)
+{
+  set_optab_libfunc (smul_optab, HImode, "__mulhi3");
+  set_optab_libfunc (sdiv_optab, HImode, "__divhi3");
+  set_optab_libfunc (udiv_optab, HImode, "__udivhi3");
+  set_optab_libfunc (smod_optab, HImode, "__modhi3");
+  set_optab_libfunc (umod_optab, HImode, "__umodhi3");
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   On the H8 the return value is in R0/R1.  */
+
+static rtx
+h8300_function_value (const_tree ret_type,
+		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (ret_type), R0_REG);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.
+
+   On the H8 the return value is in R0/R1.  */
+
+static rtx
+h8300_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, R0_REG);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the H8, R0 is the only register thus used.  */
+
+static bool
+h8300_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == R0_REG);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+h8300_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return (TYPE_MODE (type) == BLKmode
+	  || GET_MODE_SIZE (TYPE_MODE (type)) > (TARGET_H8300 ? 4 : 8));
+}
+
+/* We emit the entire trampoline here.  Depending on the pointer size,
+   we use a different trampoline.
+
+   Pmode == HImode
+	      vvvv context
+   1 0000 7903xxxx		mov.w	#0x1234,r3
+   2 0004 5A00xxxx		jmp	@0x1234
+	      ^^^^ function
+
+   Pmode == SImode
+	      vvvvvvvv context
+   2 0000 7A03xxxxxxxx		mov.l	#0x12345678,er3
+   3 0006 5Axxxxxx		jmp	@0x123456
+	    ^^^^^^ function
+*/
+
+static void
+h8300_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  if (Pmode == HImode)
+    {
+      mem = adjust_address (m_tramp, HImode, 0);
+      emit_move_insn (mem, GEN_INT (0x7903));
+      mem = adjust_address (m_tramp, Pmode, 2);
+      emit_move_insn (mem, cxt);
+      mem = adjust_address (m_tramp, HImode, 4);
+      emit_move_insn (mem, GEN_INT (0x5a00));
+      mem = adjust_address (m_tramp, Pmode, 6);
+      emit_move_insn (mem, fnaddr);
+    }
+  else
+    {
+      rtx tem;
+
+      mem = adjust_address (m_tramp, HImode, 0);
+      emit_move_insn (mem, GEN_INT (0x7a03));
+      mem = adjust_address (m_tramp, Pmode, 2);
+      emit_move_insn (mem, cxt);
+
+      tem = copy_to_reg (fnaddr);
+      emit_insn (gen_andsi3 (tem, tem, GEN_INT (0x00ffffff)));
+      emit_insn (gen_iorsi3 (tem, tem, GEN_INT (0x5a000000)));
+      mem = adjust_address (m_tramp, SImode, 6);
+      emit_move_insn (mem, tem);
+    }
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE h8300_attribute_table
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START h8300_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END h8300_file_end
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND h8300_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS h8300_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P h8300_print_operand_punct_valid_p
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO h8300_encode_section_info
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES h8300_insert_attributes
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST h8300_register_move_cost
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS h8300_rtx_costs
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS h8300_init_libfuncs
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE h8300_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE h8300_libcall_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P h8300_function_value_regno_p
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY h8300_return_in_memory
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG h8300_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE h8300_function_arg_advance
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG h8300_reorg
+
+#undef TARGET_HARD_REGNO_SCRATCH_OK
+#define TARGET_HARD_REGNO_SCRATCH_OK h8300_hard_regno_scratch_ok
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	h8300_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE h8300_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE h8300_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT h8300_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE h8300_option_override
+
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P h8300_mode_dependent_address_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/h8300/h8300.h b/gcc-4.9/gcc/config/h8300/h8300.h
new file mode 100644
index 000000000..f89bbebdb
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/h8300.h
@@ -0,0 +1,789 @@
+/* Definitions of target machine for GNU compiler.
+   Renesas H8/300 (generic)
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_H8300_H
+#define GCC_H8300_H
+
+/* Which CPU to compile for.
+   We use int for CPU_TYPE to avoid lots of casts.  */
+#if 0 /* defined in insn-attr.h, here for documentation */
+enum attr_cpu { CPU_H8300, CPU_H8300H };
+#endif
+extern int cpu_type;
+
+/* Various globals defined in h8300.c.  */
+
+extern const char *h8_push_op, *h8_pop_op, *h8_mov_op;
+extern const char * const *h8_reg_names;
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+      if (TARGET_H8300H)				\
+	{						\
+	  builtin_define ("__H8300H__");		\
+	  builtin_assert ("cpu=h8300h");		\
+	  builtin_assert ("machine=h8300h");		\
+	  if (TARGET_NORMAL_MODE)			\
+	    {						\
+	      builtin_define ("__NORMAL_MODE__");	\
+	    }						\
+	}						\
+      else if (TARGET_H8300SX)				\
+	{						\
+	  builtin_define ("__H8300SX__");		\
+	  if (TARGET_NORMAL_MODE)			\
+	    {						\
+	      builtin_define ("__NORMAL_MODE__");	\
+	    }						\
+	}						\
+      else if (TARGET_H8300S)				\
+	{						\
+	  builtin_define ("__H8300S__");		\
+	  builtin_assert ("cpu=h8300s");		\
+	  builtin_assert ("machine=h8300s");		\
+	  if (TARGET_NORMAL_MODE)			\
+	    {						\
+	      builtin_define ("__NORMAL_MODE__");	\
+	    }						\
+	}						\
+      else						\
+	{						\
+	  builtin_define ("__H8300__");			\
+	  builtin_assert ("cpu=h8300");			\
+	  builtin_assert ("machine=h8300");		\
+	}						\
+    }							\
+  while (0)
+
+#define LINK_SPEC "%{mh:%{mn:-m h8300hn}} %{mh:%{!mn:-m h8300h}} %{ms:%{mn:-m h8300sn}} %{ms:%{!mn:-m h8300s}}"
+
+#define LIB_SPEC "%{mrelax:-relax} %{g:-lg} %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}"
+
+/* Macros used in the machine description to test the flags.  */
+
+/* Select between the H8/300 and H8/300H CPUs.  */
+#define TARGET_H8300	(! TARGET_H8300H && ! TARGET_H8300S)
+#define TARGET_H8300S	(TARGET_H8300S_1 || TARGET_H8300SX)
+/* Some multiply instructions are not available in all H8SX variants.
+   Use this macro instead of TARGET_H8300SX to indicate this, even
+   though we don't actually generate different code for now.  */
+#define TARGET_H8300SXMUL TARGET_H8300SX
+
+#ifdef IN_LIBGCC2
+#undef TARGET_H8300H
+#undef TARGET_H8300S
+#undef TARGET_NORMAL_MODE
+/* If compiling libgcc2, make these compile time constants based on what
+   flags are we actually compiling with.  */
+#ifdef __H8300H__
+#define TARGET_H8300H	1
+#else
+#define TARGET_H8300H	0
+#endif
+#ifdef __H8300S__
+#define TARGET_H8300S	1
+#else
+#define TARGET_H8300S	0
+#endif
+#ifdef __NORMAL_MODE__
+#define TARGET_NORMAL_MODE 1
+#else
+#define TARGET_NORMAL_MODE 0
+#endif
+#endif /* !IN_LIBGCC2 */
+
+/* Default target_flags if no switches specified.  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_QUICKCALL)
+#endif
+
+/* We want dwarf2 info available to gdb.  */
+#define DWARF2_DEBUGGING_INFO        1
+
+/* The return address is pushed on the stack.  */
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_MEM (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM))
+#define INCOMING_FRAME_SP_OFFSET   (POINTER_SIZE / 8)
+
+#define DWARF_CIE_DATA_ALIGNMENT	2
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.
+
+   Calls through a register are cheaper than calls to named
+   functions; however, the register pressure this causes makes
+   CSEing of function addresses generally a lose.  */
+#define NO_FUNCTION_CSE
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the H8/300.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on the H8/300.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD	32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		(TARGET_H8300H || TARGET_H8300S ? 4 : 2)
+#define MIN_UNITS_PER_WORD	2
+
+#define SHORT_TYPE_SIZE	16
+#define INT_TYPE_SIZE		(TARGET_INT32 ? 32 : 16)
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE	32
+#define DOUBLE_TYPE_SIZE	32
+#define LONG_DOUBLE_TYPE_SIZE	DOUBLE_TYPE_SIZE
+
+#define MAX_FIXED_MODE_SIZE	32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_H8300H || TARGET_H8300S ? 32 : 16)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Alignment of field after `int : 0' in a structure.  */
+/* One can argue this should be 32 for -mint32, but since 32-bit ints only
+   need 16-bit alignment, this is left as is so that -mint32 doesn't change
+   structure layouts.  */
+#define EMPTY_FIELD_BOUNDARY 16
+
+/* No data type wants to be aligned rounder than this.
+   32-bit values are aligned as such on the H8/300H and H8S for speed.  */
+#define BIGGEST_ALIGNMENT \
+(((TARGET_H8300H || TARGET_H8300S) && ! TARGET_ALIGN_300) ? 32 : 16)
+
+/* The stack goes in 16/32 bit lumps.  */
+#define STACK_BOUNDARY (TARGET_H8300 ? 16 : 32)
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+/* On the H8/300, longs can be aligned on halfword boundaries, but not
+   byte boundaries.  */
+#define STRICT_ALIGNMENT 1
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   Reg 9 does not correspond to any hardware register, but instead
+   appears in the RTL as an argument pointer prior to reload, and is
+   eliminated during reloading in favor of either the stack or frame
+   pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 12
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS				\
+/* r0 r1 r2 r3 r4 r5 r6 r7 mac ap rap fp */	\
+  { 0, 0, 0, 0, 0, 0, 0, 1,  0, 1,  1, 1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you
+   like.
+
+   H8 destroys r0,r1,r2,r3.  */
+
+#define CALL_USED_REGISTERS			\
+/* r0 r1 r2 r3 r4 r5 r6 r7 mac ap rap fp */	\
+  { 1, 1, 1, 1, 0, 0, 0, 1,  1, 1,  1, 1 }
+
+#define REG_ALLOC_ORDER				\
+/* r0 r1 r2 r3 r4 r5 r6 r7 mac ap rap  fp */	\
+  { 2, 3, 0, 1, 4, 5, 6, 8,  7, 9, 10, 11 }
+
+#define HARD_REGNO_NREGS(REGNO, MODE)		\
+  h8300_hard_regno_nregs ((REGNO), (MODE))
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)		\
+  h8300_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)					  \
+  ((MODE1) == (MODE2)							  \
+   || (((MODE1) == QImode || (MODE1) == HImode				  \
+	|| ((TARGET_H8300H || TARGET_H8300S) && (MODE1) == SImode))	  \
+       &&  ((MODE2) == QImode || (MODE2) == HImode			  \
+	    || ((TARGET_H8300H || TARGET_H8300S) && (MODE2) == SImode))))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG)		\
+   h8300_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* H8/300 pc is not overloaded on a register.  */
+
+/*#define PC_REGNUM 15*/
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM SP_REG
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM HFP_REG
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM FP_REG
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM AP_REG
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM SC_REG
+
+/* Fake register that holds the address on the stack of the
+   current function's return address.  */
+#define RETURN_ADDRESS_POINTER_REGNUM RAP_REG
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, assuming
+   a stack layout with the frame pointer as the first saved register.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME) h8300_return_addr_rtx ((COUNT), (FRAME))
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class {
+  NO_REGS, COUNTER_REGS, SOURCE_REGS, DESTINATION_REGS,
+  GENERAL_REGS, MAC_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "COUNTER_REGS", "SOURCE_REGS", "DESTINATION_REGS", \
+  "GENERAL_REGS", "MAC_REGS", "ALL_REGS", "LIM_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS			\
+{      {0},		/* No regs      */	\
+   {0x010},		/* COUNTER_REGS */	\
+   {0x020},		/* SOURCE_REGS */	\
+   {0x040},		/* DESTINATION_REGS */	\
+   {0xeff},		/* GENERAL_REGS */	\
+   {0x100},		/* MAC_REGS */		\
+   {0xfff},		/* ALL_REGS	*/	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)				\
+  ((REGNO) == MAC_REG ? MAC_REGS			\
+   : (REGNO) == COUNTER_REG ? COUNTER_REGS		\
+   : (REGNO) == SOURCE_REG ? SOURCE_REGS		\
+   : (REGNO) == DESTINATION_REG ? DESTINATION_REGS	\
+   : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS (TARGET_H8300SX ? GENERAL_REGS : NO_REGS)
+#define BASE_REG_CLASS  GENERAL_REGS
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+
+   On the H8/300, @-sp really pushes a byte if you ask it to - but that's
+   dangerous, so we claim that it always pushes a word, then we catch
+   the mov.b rx,@-sp and turn it into a mov.w rx,@-sp on output.
+
+   On the H8/300H, we simplify TARGET_QUICKCALL by setting this to 4
+   and doing a similar thing.  */
+
+#define PUSH_ROUNDING(BYTES) \
+  (((BYTES) + PARM_BOUNDARY / 8 - 1) & -PARM_BOUNDARY / 8)
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* Is equal to the size of the saved fp + pc, even if an fp isn't
+   saved since the value is used before we know.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have three registers that can be eliminated on the h8300.
+   First, the frame pointer register can often be eliminated in favor
+   of the stack pointer register.  Secondly, the argument pointer
+   register and the return address pointer register are always
+   eliminated; they are replaced with either the stack or frame
+   pointer.  */
+
+#define ELIMINABLE_REGS						\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)		\
+  ((OFFSET) = h8300_initial_elimination_offset ((FROM), (TO)))
+
+/* Define this if PCC uses the nonreentrant convention for returning
+   structure and union values.  */
+
+/*#define PCC_STATIC_STRUCT_RETURN*/
+
+/* 1 if N is a possible register number for function argument passing.
+   On the H8, no registers are used in this way.  */
+
+#define FUNCTION_ARG_REGNO_P(N) (TARGET_QUICKCALL ? N < 3 : 0)
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the H8/300, this is a two item struct, the first is the number
+   of bytes scanned so far and the second is the rtx of the called
+   library function if any.  */
+
+#define CUMULATIVE_ARGS struct cum_arg
+struct cum_arg
+{
+  int nbytes;
+  rtx libcall;
+};
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On the H8/300, the offset starts at 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).nbytes = 0, (CUM).libcall = LIBNAME)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fprintf (FILE, "\t%s\t#LP%d,%s\n\tjsr @mcount\n", \
+	   h8_mov_op, (LABELNO), h8_reg_names[0]);
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 0
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE ((Pmode == HImode) ? 8 : 12)
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_DECREMENT TARGET_H8300SX
+#define HAVE_PRE_INCREMENT TARGET_H8300SX
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(regno) 0
+
+#define REGNO_OK_FOR_BASE_P(regno)				\
+  (((regno) < FIRST_PSEUDO_REGISTER && regno != MAC_REG)	\
+   || reg_renumber[regno] >= 0)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* 1 if X is an rtx for a constant that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X)					\
+  (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
+   || (GET_CODE (X) == CONST_INT				\
+       /* We handle signed and unsigned offsets here.  */	\
+       && INTVAL (X) > (TARGET_H8300 ? -0x10000 : -0x1000000)	\
+       && INTVAL (X) < (TARGET_H8300 ? 0x10000 : 0x1000000))	\
+   || (GET_CODE (X) == HIGH || GET_CODE (X) == CONST))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Non-strict versions.  */
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X) 0
+/* Don't use REGNO_OK_FOR_BASE_P here because it uses reg_renumber.  */
+#define REG_OK_FOR_BASE_NONSTRICT_P(X)				\
+  (REGNO (X) >= FIRST_PSEUDO_REGISTER || REGNO (X) != MAC_REG)
+
+/* Strict versions.  */
+#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#define REG_OK_FOR_BASE_STRICT_P(X)  REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifndef REG_OK_STRICT
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_NONSTRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)  REG_OK_FOR_BASE_NONSTRICT_P (X)
+
+#else
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_STRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)  REG_OK_FOR_BASE_STRICT_P (X)
+
+#endif
+
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+/* Define this as 1 if `char' should by default be signed; else as 0.
+
+   On the H8/300, sign extension is expensive, so we'll say that chars
+   are unsigned.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX	(TARGET_H8300H || TARGET_H8300S ? 4 : 2)
+#define MAX_MOVE_MAX	4
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS TARGET_SLOWBYTE
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode								      \
+  ((TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE ? SImode : HImode)
+
+/* ANSI C types.
+   We use longs for the H8/300H and the H8S because ints can be 16 or 32.
+   GCC requires SIZE_TYPE to be the same size as pointers.  */
+#define SIZE_TYPE								\
+  (TARGET_H8300 || TARGET_NORMAL_MODE ? TARGET_INT32 ? "short unsigned int" : "unsigned int" : "long unsigned int")
+#define PTRDIFF_TYPE						\
+  (TARGET_H8300 || TARGET_NORMAL_MODE ? TARGET_INT32 ? "short int" : "int" : "long int")
+
+#define POINTER_SIZE							\
+  ((TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE ? 32 : 16)
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Return the length of JUMP's delay slot insn (0 if it has none).
+   If JUMP is a delayed branch, NEXT_INSN (PREV_INSN (JUMP)) will
+   be the containing SEQUENCE, not JUMP itself.  */
+#define DELAY_SLOT_LENGTH(JUMP) \
+  (NEXT_INSN (PREV_INSN (JUMP)) == JUMP ? 0 : 2)
+
+#define BRANCH_COST(speed_p, predictable_p) 0
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  No extra ones are needed for the h8300.  */
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) notice_update_cc (EXP, INSN)
+
+/* The add insns don't set overflow in a usable way.  */
+#define CC_OVERFLOW_UNUSABLE 01000
+/* The mov,and,or,xor insns don't set carry.  That's OK though as the
+   Z bit is all we need when doing unsigned comparisons on the result of
+   these insns (since they're always with 0).  However, conditions.h has
+   CC_NO_OVERFLOW defined for this purpose.  Rename it to something more
+   understandable.  */
+#define CC_NO_CARRY CC_NO_OVERFLOW
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "; #APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "; #NO_APP\n"
+
+#define FILE_ASM_OP "\t.file\n"
+
+/* The assembler op to get a word, 2 bytes for the H8/300, 4 for H8/300H.  */
+#define ASM_WORD_OP							\
+  (TARGET_H8300 || TARGET_NORMAL_MODE ? "\t.word\t" : "\t.long\t")
+
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+#define DATA_SECTION_ASM_OP "\t.section .data"
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+#undef DO_GLOBAL_CTORS_BODY
+#define DO_GLOBAL_CTORS_BODY			\
+{						\
+  extern func_ptr __ctors[];			\
+  extern func_ptr __ctors_end[];		\
+  func_ptr *p;					\
+  for (p = __ctors_end; p > __ctors; )		\
+    {						\
+      (*--p)();					\
+    }						\
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY			\
+{						\
+  extern func_ptr __dtors[];			\
+  extern func_ptr __dtors_end[];		\
+  func_ptr *p;					\
+  for (p = __dtors; p < __dtors_end; p++)	\
+    {						\
+      (*p)();					\
+    }						\
+}
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "sp", "mac", "ap", "rap", "fp" }
+
+#define ADDITIONAL_REGISTER_NAMES \
+{ {"er0", 0}, {"er1", 1}, {"er2", 2}, {"er3", 3}, {"er4", 4}, \
+  {"er5", 5}, {"er6", 6}, {"er7", 7}, {"r7", 7} }
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+   ASM_OUTPUT_LABEL (FILE, NAME)
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.
+
+   N.B.: The h8300.md branch_true and branch_false patterns also know
+   how to generate internal labels.  */
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
+  sprintf (LABEL, "*.%s%lu", PREFIX, (unsigned long)(NUM))
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \
+  fprintf (FILE, "\t%s\t%s\n", h8_push_op, h8_reg_names[REGNO])
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO) \
+  fprintf (FILE, "\t%s\t%s\n", h8_pop_op, h8_reg_names[REGNO])
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "%s.L%d\n", ASM_WORD_OP, VALUE)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "%s.L%d-.L%d\n", ASM_WORD_OP, VALUE, REL)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)		\
+  if ((LOG) != 0)				\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+  fprintf (FILE, "\t.space %d\n", (int)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+( fputs ("\t.comm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ",%lu\n", (unsigned long)(SIZE)))
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+( fputs ("\t.lcomm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ",%d\n", (int)(SIZE)))
+
+#define ASM_PN_FORMAT "%s___%lu"
+
+/* H8300 specific pragmas.  */
+#define REGISTER_TARGET_PRAGMAS()				\
+  do								\
+    {								\
+      c_register_pragma (0, "saveall", h8300_pr_saveall);	\
+      c_register_pragma (0, "interrupt", h8300_pr_interrupt);	\
+    }								\
+  while (0)
+
+#define FINAL_PRESCAN_INSN(insn, operand, nop)	\
+  final_prescan_insn (insn, operand, nop)
+
+extern int h8300_move_ratio;
+#define MOVE_RATIO(speed) h8300_move_ratio
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_FLAG_EIGHTBIT_DATA	(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_FLAG_TINY_DATA		(SYMBOL_FLAG_MACH_DEP << 2)
+
+#endif /* ! GCC_H8300_H */
diff --git a/gcc-4.9/gcc/config/h8300/h8300.md b/gcc-4.9/gcc/config/h8300/h8300.md
new file mode 100644
index 000000000..e20ed35fe
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/h8300.md
@@ -0,0 +1,6322 @@
+;; GCC machine description for Renesas H8/300
+;; Copyright (C) 1992-2014 Free Software Foundation, Inc.
+
+;;   Contributed by Steve Chamberlain (sac@cygnus.com),
+;;   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; We compute exact length on each instruction for most of the time.
+;; In some case, most notably bit operations that may involve memory
+;; operands, the lengths in this file are "worst case".
+
+;; On the H8/300H and H8S, adds/subs operate on the 32bit "er"
+;; registers.  Right now GCC doesn't expose the "e" half to the
+;; compiler, so using add/subs for addhi and subhi is safe.  Long
+;; term, we want to expose the "e" half to the compiler (gives us 8
+;; more 16bit registers).  At that point addhi and subhi can't use
+;; adds/subs.
+
+;; There's currently no way to have an insv/extzv expander for the H8/300H
+;; because word_mode is different for the H8/300 and H8/300H.
+
+;; Shifts/rotates by small constants should be handled by special
+;; patterns so we get the length and cc status correct.
+
+;; Bitfield operations no longer accept memory operands.  We need
+;; to add variants which operate on memory back to the MD.
+
+;; ??? Implement remaining bit ops available on the h8300
+
+;; ----------------------------------------------------------------------
+;; CONSTANTS
+;; ----------------------------------------------------------------------
+
+(define_constants
+  [(UNSPEC_INCDEC	0)
+   (UNSPEC_MONITOR	1)])
+
+(define_constants
+  [(UNSPEC_MOVMD	100)
+   (UNSPEC_STPCPY	101)])
+
+(define_constants
+  [(R0_REG	 0)
+   (SC_REG	 3)
+   (COUNTER_REG  4)
+   (SOURCE_REG   5)
+   (DESTINATION_REG 6)
+   (HFP_REG	 6)
+   (SP_REG	 7)
+   (MAC_REG	 8)
+   (AP_REG	 9)
+   (RAP_REG	10)
+   (FP_REG	11)])
+
+;; ----------------------------------------------------------------------
+;; ATTRIBUTES
+;; ----------------------------------------------------------------------
+
+(define_attr "cpu" "h8300,h8300h"
+  (const (symbol_ref "cpu_type")))
+
+(define_attr "type" "branch,arith,bitbranch,call"
+  (const_string "arith"))
+
+(define_attr "length_table" "none,addb,addw,addl,logicb,movb,movw,movl,mova_zero,mova,unary,mov_imm4,short_immediate,bitfield,bitbranch"
+  (const_string "none"))
+
+;; The size of instructions in bytes.
+
+(define_attr "length" ""
+  (cond [(eq_attr "type" "branch")
+	 ;; In a forward delayed branch, (pc) represents the end of the
+	 ;; delay sequence, not the end of the branch itself.
+	 (if_then_else (and (ge (minus (match_dup 0) (pc))
+				(const_int -126))
+			    (le (plus (minus (match_dup 0) (pc))
+				      (symbol_ref "DELAY_SLOT_LENGTH (insn)"))
+				(const_int 125)))
+		       (const_int 2)
+		       (if_then_else (and (eq_attr "cpu" "h8300h")
+					  (and (ge (minus (pc) (match_dup 0))
+						   (const_int -32000))
+					       (le (minus (pc) (match_dup 0))
+						   (const_int 32000))))
+				     (const_int 4)
+				     (const_int 6)))
+	 (eq_attr "type" "bitbranch")
+	 (if_then_else (and (ge (minus (match_dup 0) (pc))
+				(const_int -126))
+			    (le (minus (match_dup 0) (pc))
+				(const_int 126)))
+		       (plus (symbol_ref "h8300_insn_length_from_table (insn, operands)")
+			     (const_int 2))
+		       (if_then_else (and (eq_attr "cpu" "h8300h")
+					  (and (ge (minus (pc) (match_dup 0))
+						   (const_int -32000))
+					       (le (minus (pc) (match_dup 0))
+						   (const_int 32000))))
+				     (plus (symbol_ref "h8300_insn_length_from_table (insn, operands)")
+					   (const_int 4))
+				     (plus (symbol_ref "h8300_insn_length_from_table (insn, operands)")
+					   (const_int 6))))
+	 (eq_attr "length_table" "!none")
+	 (symbol_ref "h8300_insn_length_from_table (insn, operands)")]
+	(const_int 200)))
+
+;; Condition code settings.
+;;
+;; none - insn does not affect cc
+;; none_0hit - insn does not affect cc but it does modify operand 0
+;;	This attribute is used to keep track of when operand 0 changes.
+;;	See the description of NOTICE_UPDATE_CC for more info.
+;; set_znv - insn sets z,n,v to usable values (like a tst insn); c is unknown.
+;; set_zn  - insn sets z,n to usable values; v,c are unknown.
+;; compare - compare instruction
+;; clobber - value of cc is unknown
+
+(define_attr "cc" "none,none_0hit,set_znv,set_zn,compare,clobber"
+  (const_string "clobber"))
+
+;; Type of delay slot.  NONE means the instruction has no delay slot.
+;; JUMP means it is an unconditional jump that (if short enough)
+;; could be implemented using bra/s.
+
+(define_attr "delay_slot" "none,jump"
+  (const_string "none"))
+
+;; "yes" if the instruction can be put into a delay slot.  It's not
+;; entirely clear that jsr is not valid in delay slots, but it
+;; definitely doesn't have the effect of causing the called function
+;; to return to the target of the delayed branch.
+
+(define_attr "can_delay" "no,yes"
+  (cond [(eq_attr "type" "branch,bitbranch,call")
+	   (const_string "no")
+	 (geu (symbol_ref "get_attr_length (insn)") (const_int 2))
+	   (const_string "no")]
+	(const_string "yes")))
+
+;; Only allow jumps to have a delay slot if we think they might
+;; be short enough.  This is just an optimization: we don't know
+;; for certain whether they will be or not.
+
+(define_delay (and (eq_attr "delay_slot" "jump")
+		   (eq (symbol_ref "get_attr_length (insn)") (const_int 2)))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+;; Provide the maximum length of an assembly instruction in an asm
+;; statement.  The maximum length of 14 bytes is achieved on H8SX.
+
+(define_asm_attributes
+  [(set (attr "length")
+	(cond [(match_test "TARGET_H8300") (const_int 4)
+	       (match_test "TARGET_H8300H") (const_int 10)
+	       (match_test "TARGET_H8300S") (const_int 10)]
+	      (const_int 14)))])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ----------------------------------------------------------------------
+;; MACRO DEFINITIONS
+;; ----------------------------------------------------------------------
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+
+(define_mode_iterator P [(HI "Pmode == HImode") (SI "Pmode == SImode")])
+
+
+;; ----------------------------------------------------------------------
+;; MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; movqi
+
+(define_insn "*movqi_h8300"
+  [(set (match_operand:QI 0 "general_operand_dst" "=r,r ,<,r,r,m")
+	(match_operand:QI 1 "general_operand_src" " I,r>,r,n,m,r"))]
+  "TARGET_H8300
+   && h8300_move_ok (operands[0], operands[1])"
+  "@
+   sub.b	%X0,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0
+   mov.b	%R1,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0"
+  [(set_attr "length" "2,2,2,2,4,4")
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+(define_insn "*movqi_h8300hs"
+  [(set (match_operand:QI 0 "general_operand_dst" "=r,r ,<,r,r,m")
+	(match_operand:QI 1 "general_operand_src" " I,r>,r,n,m,r"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+    && h8300_move_ok (operands[0], operands[1])"
+  "@
+   sub.b	%X0,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0
+   mov.b	%R1,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,clobber,set_znv,set_znv")])
+
+(define_insn "*movqi_h8sx"
+  [(set (match_operand:QI 0 "general_operand_dst" "=Z,rQ")
+	(match_operand:QI 1 "general_operand_src" "P4>X,rQi"))]
+  "TARGET_H8300SX"
+  "@
+    mov.b	%X1:4,%X0
+    mov.b	%X1,%X0"
+  [(set_attr "length_table" "mov_imm4,movb")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand_dst" "")
+	(match_operand:QI 1 "general_operand_src" ""))]
+  ""
+  {
+    /* One of the ops has to be in a register.  */
+    if (!TARGET_H8300SX && !h8300_move_ok (operands[0], operands[1]))
+      operands[1] = copy_to_mode_reg (QImode, operands[1]);
+  })
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "general_operand_dst" "+r,r"))
+			 (match_operand:QI 1 "general_operand_src" "I,rmi>"))]
+  ""
+  "@
+   sub.b	%X0,%X0
+   mov.b	%X1,%X0"
+  [(set_attr "length" "2,*")
+   (set_attr "length_table" "*,movb")
+   (set_attr "cc" "set_zn,set_znv")])
+
+;; movhi
+
+(define_insn "*movhi_h8300"
+  [(set (match_operand:HI 0 "general_operand_dst" "=r,r,<,r,r,m")
+	(match_operand:HI 1 "general_operand_src" "I,r>,r,i,m,r"))]
+  "TARGET_H8300
+   && h8300_move_ok (operands[0], operands[1])"
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+(define_insn "*movhi_h8300hs"
+  [(set (match_operand:HI 0 "general_operand_dst" "=r,r,<,r,r,m")
+	(match_operand:HI 1 "general_operand_src" "I,r>,r,i,m,r"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+    && h8300_move_ok (operands[0], operands[1])"
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+(define_insn "*movhi_h8sx"
+  [(set (match_operand:HI 0 "general_operand_dst" "=r,r,Z,Q,rQ")
+	(match_operand:HI 1 "general_operand_src" "I,P3>X,P4>X,IP8>X,rQi"))]
+  "TARGET_H8300SX"
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1:3,%T0
+   mov.w	%T1:4,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set_attr "length_table" "*,*,mov_imm4,short_immediate,movw")
+   (set_attr "length" "2,2,*,*,*")
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand_dst" "")
+	(match_operand:HI 1 "general_operand_src" ""))]
+  ""
+  {
+    /* One of the ops has to be in a register.  */
+    if (!h8300_move_ok (operands[0], operands[1]))
+      operands[1] = copy_to_mode_reg (HImode, operand1);
+  })
+
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "general_operand_dst" "+r,r,r"))
+			 (match_operand:HI 1 "general_operand_src" "I,P3>X,rmi"))]
+  ""
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set_attr "length" "2,2,*")
+   (set_attr "length_table" "*,*,movw")
+   (set_attr "cc" "set_zn,set_znv,set_znv")])
+
+;; movsi
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand_dst" "")
+	(match_operand:SI 1 "general_operand_src" ""))]
+  ""
+  {
+    if (TARGET_H8300)
+      {
+	if (h8300_expand_movsi (operands))
+	  DONE;
+      }
+    else if (!TARGET_H8300SX)
+      {
+	/* One of the ops has to be in a register.  */
+	if (!h8300_move_ok (operands[0], operands[1]))
+	  operands[1] = copy_to_mode_reg (SImode, operand1);
+      }
+  })
+
+(define_insn "*movsi_h8300"
+  [(set (match_operand:SI 0 "general_operand_dst" "=r,r,r,o,<,r")
+	(match_operand:SI 1 "general_operand_src" "I,r,io,r,r,>"))]
+  "TARGET_H8300
+   && h8300_move_ok (operands[0], operands[1])"
+{
+  unsigned int rn = -1;
+  switch (which_alternative)
+    {
+    case 0:
+      return "sub.w	%e0,%e0\;sub.w	%f0,%f0";
+    case 1:
+      if (REGNO (operands[0]) < REGNO (operands[1]))
+	return "mov.w	%e1,%e0\;mov.w	%f1,%f0";
+      else
+	return "mov.w	%f1,%f0\;mov.w	%e1,%e0";
+    case 2:
+      /* Make sure we don't trample the register we index with.  */
+      if (GET_CODE (operands[1]) == MEM)
+	{
+	  rtx inside = XEXP (operands[1], 0);
+	  if (REG_P (inside))
+	    {
+	      rn = REGNO (inside);
+	    }
+	  else if (GET_CODE (inside) == PLUS)
+	    {
+	      rtx lhs = XEXP (inside, 0);
+	      rtx rhs = XEXP (inside, 1);
+	      if (REG_P (lhs)) rn = REGNO (lhs);
+	      if (REG_P (rhs)) rn = REGNO (rhs);
+	    }
+	}
+      if (rn == REGNO (operands[0]))
+	{
+	  /* Move the second word first.  */
+	  return "mov.w	%f1,%f0\;mov.w	%e1,%e0";
+	}
+      else
+	{
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      /* If either half is zero, use sub.w to clear that
+		 half.  */
+	      if ((INTVAL (operands[1]) & 0xffff) == 0)
+		return "mov.w	%e1,%e0\;sub.w	%f0,%f0";
+	      if (((INTVAL (operands[1]) >> 16) & 0xffff) == 0)
+		return "sub.w	%e0,%e0\;mov.w	%f1,%f0";
+	      /* If the upper half and the lower half are the same,
+		 copy one half to the other.  */
+	      if ((INTVAL (operands[1]) & 0xffff)
+		  == ((INTVAL (operands[1]) >> 16) & 0xffff))
+		return "mov.w\\t%e1,%e0\;mov.w\\t%e0,%f0";
+	    }
+	  return "mov.w	%e1,%e0\;mov.w	%f1,%f0";
+	}
+    case 3:
+      return "mov.w	%e1,%e0\;mov.w	%f1,%f0";
+    case 4:
+      return "mov.w	%f1,%T0\;mov.w	%e1,%T0";
+    case 5:
+      return "mov.w	%T1,%e0\;mov.w	%T1,%f0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))])
+
+(define_insn "*movsi_h8300hs"
+  [(set (match_operand:SI 0 "general_operand_dst" "=r,r,r,<,r,r,m,*a,*a,r")
+	(match_operand:SI 1 "general_operand_src" "I,r,i,r,>,m,r,I,r,*a"))]
+  "(TARGET_H8300S || TARGET_H8300H) && !TARGET_H8300SX
+    && h8300_move_ok (operands[0], operands[1])"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "sub.l	%S0,%S0";
+    case 7:
+      return "clrmac";
+    case 8:
+      return "clrmac\;ldmac %1,macl";
+    case 9:
+      return "stmac	macl,%0";
+    default:
+      if (GET_CODE (operands[1]) == CONST_INT)
+	{
+	  int val = INTVAL (operands[1]);
+
+	  /* Look for constants which can be made by adding an 8-bit
+	     number to zero in one of the two low bytes.  */
+	  if (val == (val & 0xff))
+	    {
+	      operands[1] = GEN_INT ((char) val & 0xff);
+	      return "sub.l\\t%S0,%S0\;add.b\\t%1,%w0";
+	    }
+
+	  if (val == (val & 0xff00))
+	    {
+	      operands[1] = GEN_INT ((char) (val >> 8) & 0xff);
+	      return "sub.l\\t%S0,%S0\;add.b\\t%1,%x0";
+	    }
+
+	  /* Look for constants that can be obtained by subs, inc, and
+	     dec to 0.  */
+	  switch (val & 0xffffffff)
+	    {
+	    case 0xffffffff:
+	      return "sub.l\\t%S0,%S0\;subs\\t#1,%S0";
+	    case 0xfffffffe:
+	      return "sub.l\\t%S0,%S0\;subs\\t#2,%S0";
+	    case 0xfffffffc:
+	      return "sub.l\\t%S0,%S0\;subs\\t#4,%S0";
+
+	    case 0x0000ffff:
+	      return "sub.l\\t%S0,%S0\;dec.w\\t#1,%f0";
+	    case 0x0000fffe:
+	      return "sub.l\\t%S0,%S0\;dec.w\\t#2,%f0";
+
+	    case 0xffff0000:
+	      return "sub.l\\t%S0,%S0\;dec.w\\t#1,%e0";
+	    case 0xfffe0000:
+	      return "sub.l\\t%S0,%S0\;dec.w\\t#2,%e0";
+
+	    case 0x00010000:
+	      return "sub.l\\t%S0,%S0\;inc.w\\t#1,%e0";
+	    case 0x00020000:
+	      return "sub.l\\t%S0,%S0\;inc.w\\t#2,%e0";
+	    }
+	}
+    }
+   return "mov.l	%S1,%S0";
+}
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,clobber,set_znv,set_znv,set_znv,set_znv,none_0hit,none_0hit,set_znv")])
+
+(define_insn "*movsi_h8sx"
+  [(set (match_operand:SI 0 "general_operand_dst" "=r,r,Q,rQ,*a,*a,r")
+	(match_operand:SI 1 "general_operand_src" "I,P3>X,IP8>X,rQi,I,r,*a"))]
+  "TARGET_H8300SX"
+  "@
+   sub.l	%S0,%S0
+   mov.l	%S1:3,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   clrmac
+   clrmac\;ldmac	%1,macl
+   stmac	macl,%0"
+  [(set_attr "length_table" "*,*,short_immediate,movl,*,*,*")
+   (set_attr "length" "2,2,*,*,2,6,4")
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,none_0hit,none_0hit,set_znv")])
+
+(define_insn "*movsf_h8sx"
+  [(set (match_operand:SF 0 "general_operand_dst" "=r,rQ")
+	(match_operand:SF 1 "general_operand_src" "G,rQi"))]
+  "TARGET_H8300SX"
+  "@
+    sub.l	%S0,%S0
+    mov.l	%S1,%S0"
+  [(set_attr "length" "2,*")
+   (set_attr "length_table" "*,movl")
+   (set_attr "cc" "set_zn,set_znv")])
+
+;; Implement block moves using movmd.  Defining movmemsi allows the full
+;; range of constant lengths (up to 0x40000 bytes when using movmd.l).
+;; See h8sx_emit_movmd for details.
+
+(define_expand "movmemsi"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:SI 2 "" ""))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  "TARGET_H8300SX"
+  {
+    if (h8sx_emit_movmd (operands[0], operands[1], operands[2], INTVAL (operands[3])))
+      DONE;
+    else
+      FAIL;
+  })
+
+;; Expander for generating movmd insns.  Operand 0 is the destination
+;; memory region, operand 1 is the source, operand 2 is the counter
+;; register and operand 3 is the chunk size (1, 2 or 4).
+
+(define_expand "movmd"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+	  (match_operand:BLK 1 "memory_operand" ""))
+     (unspec [(match_operand:HI 2 "register_operand" "")
+	      (match_operand:HI 3 "const_int_operand" "")] UNSPEC_MOVMD)
+     (clobber (match_dup 4))
+     (clobber (match_dup 5))
+     (set (match_dup 2)
+	  (const_int 0))])]
+  "TARGET_H8300SX"
+  {
+    operands[4] = copy_rtx (XEXP (operands[0], 0));
+    operands[5] = copy_rtx (XEXP (operands[1], 0));
+  })
+
+;; This is a difficult instruction to reload since operand 0 must be the
+;; frame pointer.  See h8300_reg_class_from_letter for an explanation.
+
+(define_insn "movmd_internal_normal"
+  [(set (mem:BLK (match_operand:HI 3 "register_operand" "0,r"))
+	(mem:BLK (match_operand:HI 4 "register_operand" "1,1")))
+   (unspec [(match_operand:HI 5 "register_operand" "2,2")
+	    (match_operand:HI 6 "const_int_operand" "n,n")] UNSPEC_MOVMD)
+   (clobber (match_operand:HI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:HI 1 "register_operand" "=f,f"))
+   (set (match_operand:HI 2 "register_operand" "=c,c")
+	(const_int 0))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE"
+  "@
+    movmd%m6
+    #"
+  [(set_attr "length" "2,14")
+   (set_attr "can_delay" "no")
+   (set_attr "cc" "none,clobber")])
+
+(define_insn "movmd_internal"
+  [(set (mem:BLK (match_operand:SI 3 "register_operand" "0,r"))
+	(mem:BLK (match_operand:SI 4 "register_operand" "1,1")))
+   (unspec [(match_operand:HI 5 "register_operand" "2,2")
+	    (match_operand:HI 6 "const_int_operand" "n,n")] UNSPEC_MOVMD)
+   (clobber (match_operand:SI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:SI 1 "register_operand" "=f,f"))
+   (set (match_operand:HI 2 "register_operand" "=c,c")
+	(const_int 0))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE"
+  "@
+    movmd%m6
+    #"
+  [(set_attr "length" "2,14")
+   (set_attr "can_delay" "no")
+   (set_attr "cc" "none,clobber")])
+
+;; Split the above instruction if the destination register isn't er6.
+;; We need a sequence like:
+;;
+;;	mov.l	er6,@-er7
+;;	mov.l	<dest>,er6
+;;	movmd.sz
+;;	mov.l	er6,<dest>
+;;	mov.l	@er7+,er6
+;;
+;; where <dest> is the current destination register (operand 4).
+;; The fourth instruction will be deleted if <dest> dies here.
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(match_operand:BLK 1 "memory_operand" ""))
+   (unspec [(match_operand:HI 2 "register_operand" "")
+	    (match_operand:HI 3 "const_int_operand" "")] UNSPEC_MOVMD)
+   (clobber (match_operand:HI 4 "register_operand" ""))
+   (clobber (match_operand:HI 5 "register_operand" ""))
+   (set (match_dup 2)
+	(const_int 0))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE && reload_completed
+   && REGNO (operands[4]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movmd (dest, operands[1], operands[2], operands[3]));
+    h8300_swap_out_of_er6 (operands[4]);
+    DONE;
+  })
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(match_operand:BLK 1 "memory_operand" ""))
+   (unspec [(match_operand:HI 2 "register_operand" "")
+	    (match_operand:HI 3 "const_int_operand" "")] UNSPEC_MOVMD)
+   (clobber (match_operand:SI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))
+   (set (match_dup 2)
+	(const_int 0))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE && reload_completed
+   && REGNO (operands[4]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movmd (dest, operands[1], operands[2], operands[3]));
+    h8300_swap_out_of_er6 (operands[4]);
+    DONE;
+  })
+
+;; Expand a call to stpcpy() using movsd.  Operand 0 should point to
+;; the final character, but movsd leaves it pointing to the character
+;; after that.
+
+(define_expand "movstr"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:BLK 2 "memory_operand" ""))]
+  "TARGET_H8300SX"
+  {
+    operands[1] = replace_equiv_address
+      (operands[1], copy_to_mode_reg (Pmode, XEXP (operands[1], 0)));
+    operands[2] = replace_equiv_address
+      (operands[2], copy_to_mode_reg (Pmode, XEXP (operands[2], 0)));
+    emit_insn (gen_movsd (operands[1], operands[2], gen_reg_rtx (Pmode)));
+    emit_insn (gen_add3_insn (operands[0], XEXP (operands[1], 0), constm1_rtx));
+    DONE;
+  })
+
+;; Expander for generating a movsd instruction.  Operand 0 is the
+;; destination string, operand 1 is the source string and operand 2
+;; is a scratch register.
+
+(define_expand "movsd"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+	  (unspec:BLK [(match_operand:BLK 1 "memory_operand" "")]
+	  UNSPEC_STPCPY))
+     (clobber (match_dup 3))
+     (clobber (match_dup 4))
+     (clobber (match_operand 2 "register_operand" ""))])]
+  "TARGET_H8300SX"
+  {
+    operands[3] = copy_rtx (XEXP (operands[0], 0));
+    operands[4] = copy_rtx (XEXP (operands[1], 0));
+  })
+
+;; See comments above memcpy_internal().
+
+(define_insn "stpcpy_internal_normal"
+  [(set (mem:BLK (match_operand:HI 3 "register_operand" "0,r"))
+	(unspec:BLK [(mem:BLK (match_operand:HI 4 "register_operand" "1,1"))]
+	UNSPEC_STPCPY))
+   (clobber (match_operand:HI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:HI 1 "register_operand" "=f,f"))
+   (clobber (match_operand:HI 2 "register_operand" "=c,c"))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE"
+  "@
+    \n1:\tmovsd\t2f\;bra\t1b\n2:
+    #"
+  [(set_attr "length" "6,18")
+   (set_attr "cc" "none,clobber")])
+
+(define_insn "stpcpy_internal"
+  [(set (mem:BLK (match_operand:SI 3 "register_operand" "0,r"))
+	(unspec:BLK [(mem:BLK (match_operand:SI 4 "register_operand" "1,1"))]
+	UNSPEC_STPCPY))
+   (clobber (match_operand:SI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:SI 1 "register_operand" "=f,f"))
+   (clobber (match_operand:SI 2 "register_operand" "=c,c"))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE"
+  "@
+    \n1:\tmovsd\t2f\;bra\t1b\n2:
+    #"
+  [(set_attr "length" "6,18")
+   (set_attr "cc" "none,clobber")])
+
+;; Split the above instruction if the destination isn't er6.  This works
+;; in the same way as the movmd splitter.
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(unspec:BLK [(match_operand:BLK 1 "memory_operand" "")] UNSPEC_STPCPY))
+   (clobber (match_operand:HI 2 "register_operand" ""))
+   (clobber (match_operand:HI 3 "register_operand" ""))
+   (clobber (match_operand:HI 4 "register_operand" ""))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE && reload_completed
+   && REGNO (operands[2]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movsd (dest, operands[1], operands[4]));
+    h8300_swap_out_of_er6 (operands[2]);
+    DONE;
+  })
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(unspec:BLK [(match_operand:BLK 1 "memory_operand" "")] UNSPEC_STPCPY))
+   (clobber (match_operand:SI 2 "register_operand" ""))
+   (clobber (match_operand:SI 3 "register_operand" ""))
+   (clobber (match_operand:SI 4 "register_operand" ""))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE && reload_completed
+   && REGNO (operands[2]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movsd (dest, operands[1], operands[4]));
+    h8300_swap_out_of_er6 (operands[2]);
+    DONE;
+  })
+
+(include "mova.md")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand_dst" "")
+	(match_operand:SF 1 "general_operand_src" ""))]
+  ""
+  {
+    if (TARGET_H8300)
+      {
+	if (h8300_expand_movsi (operands))
+	  DONE;
+      }
+    else if (!TARGET_H8300SX)
+      {
+	/* One of the ops has to be in a register.  */
+	if (!register_operand (operand1, SFmode)
+	    && !register_operand (operand0, SFmode))
+	  {
+	    operands[1] = copy_to_mode_reg (SFmode, operand1);
+	  }
+      }
+  })
+
+(define_insn "*movsf_h8300"
+  [(set (match_operand:SF 0 "general_operand_dst" "=r,r,r,o,<,r")
+	(match_operand:SF 1 "general_operand_src" "G,r,io,r,r,>"))]
+  "TARGET_H8300
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+{
+  /* Copy of the movsi stuff.  */
+  unsigned int rn = -1;
+  switch (which_alternative)
+    {
+    case 0:
+      return "sub.w	%e0,%e0\;sub.w	%f0,%f0";
+    case 1:
+      if (REGNO (operands[0]) < REGNO (operands[1]))
+	return "mov.w	%e1,%e0\;mov.w	%f1,%f0";
+      else
+	return "mov.w	%f1,%f0\;mov.w	%e1,%e0";
+    case 2:
+      /* Make sure we don't trample the register we index with.  */
+      if (GET_CODE (operands[1]) == MEM)
+	{
+	  rtx inside = XEXP (operands[1], 0);
+	  if (REG_P (inside))
+	    {
+	      rn = REGNO (inside);
+	    }
+	  else if (GET_CODE (inside) == PLUS)
+	    {
+	      rtx lhs = XEXP (inside, 0);
+	      rtx rhs = XEXP (inside, 1);
+	      if (REG_P (lhs)) rn = REGNO (lhs);
+	      if (REG_P (rhs)) rn = REGNO (rhs);
+	    }
+	}
+      if (rn == REGNO (operands[0]))
+	/* Move the second word first.  */
+	return "mov.w	%f1,%f0\;mov.w	%e1,%e0";
+      else
+	/* Move the first word first.  */
+	return "mov.w	%e1,%e0\;mov.w	%f1,%f0";
+
+    case 3:
+      return "mov.w	%e1,%e0\;mov.w	%f1,%f0";
+    case 4:
+      return "mov.w	%f1,%T0\;mov.w	%e1,%T0";
+    case 5:
+      return "mov.w	%T1,%e0\;mov.w	%T1,%f0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))])
+
+(define_insn "*movsf_h8300hs"
+  [(set (match_operand:SF 0 "general_operand_dst" "=r,r,r,m,<,r")
+	(match_operand:SF 1 "general_operand_src" "G,r,im,r,r,>"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+    && (register_operand (operands[0], SFmode)
+	|| register_operand (operands[1], SFmode))"
+  "@
+   sub.l	%S0,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+;; ----------------------------------------------------------------------
+;; PUSH INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*pushqi1_h8300"
+  [(set (mem:QI
+	(pre_modify:HI
+	  (reg:HI SP_REG)
+	  (plus:HI (reg:HI SP_REG) (const_int -2))))
+	(match_operand:QI 0 "register_no_sp_elim_operand" "r"))]
+  "TARGET_H8300"
+  "mov.w\\t%T0,@-r7"
+  [(set_attr "length" "2")])
+
+(define_insn "*pushqi1_h8300hs_<mode>"
+  [(set (mem:QI
+	(pre_modify:P
+	  (reg:P SP_REG)
+	  (plus:P (reg:P SP_REG) (const_int -4))))
+	(match_operand:QI 0 "register_no_sp_elim_operand" "r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.l\\t%S0,@-er7"
+  [(set_attr "length" "4")])
+
+(define_insn "*pushhi1_h8300hs_<mode>"
+  [(set (mem:HI
+	(pre_modify:P
+	  (reg:P SP_REG)
+	  (plus:P (reg:P SP_REG) (const_int -4))))
+	(match_operand:HI 0 "register_no_sp_elim_operand" "r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.l\\t%S0,@-er7"
+  [(set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------
+;; TEST INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn ""
+  [(set (cc0) 
+	(compare (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "r,U")
+				  (const_int 1)
+				  (match_operand 1 "const_int_operand" "n,n"))
+		 (const_int 0)))]
+  "TARGET_H8300"
+  "btst	%Z1,%Y0"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn ""
+  [(set (cc0)
+	(compare (zero_extract:HI (match_operand:HI 0 "register_operand" "r")
+				  (const_int 1)
+				  (match_operand 1 "const_int_operand" "n"))
+		 (const_int 0)))]
+  "TARGET_H8300"
+  "btst	%Z1,%Y0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn_and_split "*tst_extzv_1_n"
+  [(set (cc0) 
+	(compare (zero_extract:SI (match_operand:QI 0 "general_operand_src" "r,U,mn>")
+				  (const_int 1)
+				  (match_operand 1 "const_int_operand" "n,n,n"))
+		 (const_int 0)))
+   (clobber (match_scratch:QI 2 "=X,X,&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   btst\\t%Z1,%Y0
+   btst\\t%Z1,%Y0
+   #"
+  "&& reload_completed
+   && !satisfies_constraint_U (operands[0])"
+  [(set (match_dup 2)
+	(match_dup 0))
+   (parallel [(set (cc0) (compare (zero_extract:SI (match_dup 2)
+						   (const_int 1)
+						   (match_dup 1))
+				  (const_int 0)))
+	      (clobber (scratch:QI))])]
+  ""
+  [(set_attr "length" "2,8,10")
+   (set_attr "cc" "set_zn,set_zn,set_zn")])
+
+(define_insn ""
+  [(set (cc0) 
+	(compare (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+				  (const_int 1)
+				  (match_operand 1 "const_int_operand" "n"))
+		 (const_int 0)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[1]) <= 15"
+  "btst	%Z1,%Y0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn_and_split "*tstsi_upper_bit"
+  [(set (cc0) 
+	(compare (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+				  (const_int 1)
+				  (match_operand 1 "const_int_operand" "n"))
+		 (const_int 0)))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[1]) >= 16"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ior:SI (and:SI (match_dup 2)
+			(const_int -65536))
+		(lshiftrt:SI (match_dup 0)
+			     (const_int 16))))
+   (set (cc0)
+	(compare (zero_extract:SI (match_dup 2)
+				  (const_int 1)
+				  (match_dup 3))
+		 (const_int 0)))]
+  {
+    operands[3] = GEN_INT (INTVAL (operands[1]) - 16);
+  })
+
+(define_insn "*tstsi_variable_bit"
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+				  (const_int 1)
+				  (and:SI (match_operand:SI 1 "register_operand" "r")
+					  (const_int 7)))
+		 (const_int 0)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "btst	%w1,%w0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn_and_split "*tstsi_variable_bit_qi"
+  [(set (cc0)
+	(compare (zero_extract:SI (zero_extend:SI (match_operand:QI 0 "general_operand_src" "r,U,mn>"))
+				  (const_int 1)
+				  (and:SI (match_operand:SI 1 "register_operand" "r,r,r")
+					  (const_int 7)))
+		 (const_int 0)))
+   (clobber (match_scratch:QI 2 "=X,X,&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   btst\\t%w1,%X0
+   btst\\t%w1,%X0
+   #"
+  "&& reload_completed
+   && !satisfies_constraint_U (operands[0])"
+  [(set (match_dup 2)
+	(match_dup 0))
+   (parallel [(set (cc0)
+		   (compare (zero_extract:SI (zero_extend:SI (match_dup 2))
+					     (const_int 1)
+					     (and:SI (match_dup 1)
+						     (const_int 7)))
+			    (const_int 0)))
+	      (clobber (scratch:QI))])]
+  ""
+  [(set_attr "length" "2,8,10")
+   (set_attr "cc" "set_zn,set_zn,set_zn")])
+
+(define_insn "*tstqi"
+  [(set (cc0) 
+	(compare (match_operand:QI 0 "register_operand" "r")
+		 (const_int 0)))]
+  ""
+  "mov.b	%X0,%X0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tsthi"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "r")
+		 (const_int 0)))]
+  ""
+  "mov.w	%T0,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tsthi_upper"
+  [(set (cc0)
+	(compare (and:HI (match_operand:HI 0 "register_operand" "r")
+			 (const_int -256))
+		 (const_int 0)))]
+  ""
+  "mov.b	%t0,%t0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tstsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "r")
+		 (const_int 0)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.l	%S0,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tstsi_upper"
+  [(set (cc0)
+	(compare (and:SI (match_operand:SI 0 "register_operand" "r")
+			 (const_int -65536))
+		 (const_int 0)))]
+  ""
+  "mov.w	%e0,%e0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*cmpqi"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
+		 (match_operand:QI 1 "h8300_src_operand" "rQi")))]
+  ""
+  "cmp.b	%X1,%X0"
+  [(set_attr "length_table" "addb")
+   (set_attr "cc" "compare")])
+
+(define_insn "*cmphi_h8300_znvc"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "r")
+		 (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_H8300"
+  "cmp.w	%T1,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "compare")])
+
+(define_insn "*cmphi_h8300hs_znvc"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
+		 (match_operand:HI 1 "h8300_src_operand" "P3>X,rQi")))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (!TARGET_H8300SX)
+	return "cmp.w	%T1,%T0";
+      else
+	return "cmp.w	%T1:3,%T0";
+    case 1:
+      return "cmp.w	%T1,%T0";
+    default:
+      gcc_unreachable ();
+      }
+}
+  [(set_attr "length_table" "short_immediate,addw")
+   (set_attr "cc" "compare,compare")])
+
+(define_insn "cmpsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
+		 (match_operand:SI 1 "h8300_src_operand" "P3>X,rQi")))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (!TARGET_H8300SX)
+	return "cmp.l	%S1,%S0";
+      else
+	return "cmp.l	%S1:3,%S0";
+    case 1:
+      return "cmp.l	%S1,%S0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "length" "2,*")
+   (set_attr "length_table" "*,addl")
+   (set_attr "cc" "compare,compare")])
+
+;; ----------------------------------------------------------------------
+;; ADD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "addqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(plus:QI (match_operand:QI 1 "register_operand" "")
+		 (match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*addqi3"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(plus:QI (match_operand:QI 1 "h8300_dst_operand" "%0")
+		 (match_operand:QI 2 "h8300_src_operand" "rQi")))]
+  "h8300_operands_match_p (operands)"
+  "add.b	%X2,%X0"
+  [(set_attr "length_table" "addb")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*addhi3_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0")
+		 (match_operand:HI 2 "h8300_src_operand" "L,N,J,n,r")))]
+  "TARGET_H8300"
+  "@
+   adds	%2,%T0
+   subs	%G2,%T0
+   add.b	%t2,%t0
+   add.b	%s2,%s0\;addx	%t2,%t0
+   add.w	%T2,%T0"
+  [(set_attr "length" "2,2,2,4,2")
+   (set_attr "cc" "none_0hit,none_0hit,clobber,clobber,set_zn")])
+
+;; This splitter is very important to make the stack adjustment
+;; interrupt-safe.  The combination of add.b and addx is unsafe!
+;;
+;; We apply this split after the peephole2 pass so that we won't end
+;; up creating too many adds/subs when a scratch register is
+;; available, which is actually a common case because stack unrolling
+;; tends to happen immediately after a function call.
+
+(define_split
+  [(set (match_operand:HI 0 "stack_pointer_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand 1 "const_int_gt_2_operand" "")))]
+  "TARGET_H8300 && epilogue_completed"
+  [(const_int 0)]
+  {
+    split_adds_subs (HImode, operands); 
+    DONE;
+  })
+
+(define_peephole2
+  [(match_scratch:HI 2 "r")
+   (set (match_operand:HI 0 "stack_pointer_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "const_int_ge_8_operand" "")))]
+  "TARGET_H8300"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_dup 2)))]
+  "")
+
+(define_insn "*addhi3_h8300hs"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0")
+		 (match_operand:HI 2 "h8300_src_operand" "L,N,J,n,r")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX"
+  "@
+   adds	%2,%S0
+   subs	%G2,%S0
+   add.b	%t2,%t0
+   add.w	%T2,%T0
+   add.w	%T2,%T0"
+  [(set_attr "length" "2,2,2,4,2")
+   (set_attr "cc" "none_0hit,none_0hit,clobber,set_zn,set_zn")])
+
+(define_insn "*addhi3_incdec"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "0,0")
+		    (match_operand:HI 2 "incdec_operand" "M,O")]
+		   UNSPEC_INCDEC))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   inc.w	%2,%T0
+   dec.w	%G2,%T0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "*addhi3_h8sx"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rU,rU,r,rQ")
+	(plus:HI (match_operand:HI 1 "h8300_dst_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "h8300_src_operand" "P3>X,P3<X,J,rQi")))]
+  "TARGET_H8300SX && h8300_operands_match_p (operands)"
+  "@
+   add.w	%T2:3,%T0
+   sub.w	%G2:3,%T0
+   add.b	%t2,%t0
+   add.w	%T2,%T0"
+  [(set_attr "length_table" "short_immediate,short_immediate,*,addw")
+   (set_attr "length" "*,*,2,*")
+   (set_attr "cc" "set_zn")])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "two_insn_adds_subs_operand" "")))]
+  ""
+  [(const_int 0)]
+  {
+    split_adds_subs (HImode, operands); 
+    DONE;
+  })
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*addsi_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0")
+		 (match_operand:SI 2 "h8300_src_operand" "n,r")))]
+  "TARGET_H8300"
+{
+  return output_plussi (operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_plussi_length (operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_plussi_cc (operands)"))])
+
+(define_insn "*addsi_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ,rQ")
+	(plus:SI (match_operand:SI 1 "h8300_dst_operand" "%0,0")
+		 (match_operand:SI 2 "h8300_src_operand" "i,rQ")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+{  
+  return output_plussi (operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_plussi_length (operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_plussi_cc (operands)"))])
+
+(define_insn "*addsi3_incdec"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0,0")
+		    (match_operand:SI 2 "incdec_operand" "M,O")]
+		   UNSPEC_INCDEC))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   inc.l	%2,%S0
+   dec.l	%G2,%S0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "two_insn_adds_subs_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(const_int 0)]
+  {
+    split_adds_subs (SImode, operands); 
+    DONE;
+  })
+
+;; ----------------------------------------------------------------------
+;; SUBTRACT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "subqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(minus:QI (match_operand:QI 1 "register_operand" "")
+		  (match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*subqi3"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(minus:QI (match_operand:QI 1 "h8300_dst_operand" "0")
+		  (match_operand:QI 2 "h8300_dst_operand" "rQ")))]
+  "h8300_operands_match_p (operands)"
+  "sub.b	%X2,%X0"
+  [(set_attr "length_table" "addb")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(minus:HI (match_operand:HI 1 "register_operand" "")
+		  (match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*subhi3_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0")
+		  (match_operand:HI 2 "h8300_src_operand" "r,n")))]
+  "TARGET_H8300"
+  "@
+   sub.w	%T2,%T0
+   add.b	%E2,%s0\;addx	%F2,%t0"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "set_zn,clobber")])
+
+(define_insn "*subhi3_h8300hs"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ,rQ")
+	(minus:HI (match_operand:HI 1 "h8300_dst_operand" "0,0")
+		  (match_operand:HI 2 "h8300_src_operand" "rQ,i")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "@
+   sub.w	%T2,%T0
+   sub.w	%T2,%T0"
+  [(set_attr "length_table" "addw")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "register_operand" "")
+		  (match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  {
+    if (TARGET_H8300)
+      operands[2] = force_reg (SImode, operands[2]);
+  })
+
+(define_insn "*subsi3_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_H8300"
+  "sub.w	%f2,%f0\;subx	%y2,%y0\;subx	%z2,%z0"
+  [(set_attr "length" "6")])
+
+(define_insn "*subsi3_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ,rQ")
+	(minus:SI (match_operand:SI 1 "h8300_dst_operand" "0,0")
+		  (match_operand:SI 2 "h8300_src_operand" "rQ,i")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "@
+   sub.l	%S2,%S0
+   sub.l	%S2,%S0"
+  [(set_attr "length_table" "addl")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; MULTIPLY INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Note that the H8/300 can only handle umulqihi3.
+
+(define_expand "mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:QI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  {
+    if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_SIGN_EXTEND (HImode, operands[2]);
+  })
+
+(define_insn "*mulqihi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (match_operand:QI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxs.b	%X2,%T0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mulxs.b	%X2,%T0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:HI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  {
+    if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_SIGN_EXTEND (SImode, operands[2]);
+  })
+
+(define_insn "*mulhisi3_const"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (match_operand:SI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxs.w	%T2,%S0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mulxs.w	%T2,%S0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:QI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  {
+    if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_ZERO_EXTEND (HImode, operands[2]);
+  })
+
+(define_insn "*umulqihi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (match_operand:QI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxu.b	%X2,%T0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "mulxu.b	%X2,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")])
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:HI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  {
+    if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
+  })
+
+(define_insn "*umulhisi3_const"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (match_operand:SI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxu.w	%T2,%S0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mulxu.w	%T2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")])
+
+;; We could have used mulu.[wl] here, but mulu.[lw] is only available
+;; on a H8SX with a multiplier, whereas muls.w seems to be available
+;; on all H8SX variants.
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (mult:HI (match_operand:HI 1 "register_operand" "%0")
+		 (match_operand:HI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "muls.w\\t%T2,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "muls.l\\t%S2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+	   (sign_extend:DI (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))
+	  (const_int 32))))]
+  "TARGET_H8300SXMUL"
+  "muls/u.l\\t%S2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI
+	      (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+	      (zero_extend:DI (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))
+	    (const_int 32))))]
+  "TARGET_H8300SX"
+  "mulu/u.l\\t%S2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")])
+
+;; This is a "bridge" instruction.  Combine can't cram enough insns
+;; together to crate a MAC instruction directly, but it can create
+;; this instruction, which then allows combine to create the real
+;; MAC insn.
+;;
+;; Unfortunately, if combine doesn't create a MAC instruction, this
+;; insn must generate reasonably correct code.  Egad.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mult:SI
+	  (sign_extend:SI
+	    (mem:HI (post_inc:SI (match_operand:SI 1 "register_operand" "r"))))
+	  (sign_extend:SI
+	    (mem:HI (post_inc:SI (match_operand:SI 2 "register_operand" "r"))))))]
+  "TARGET_MAC"
+  "clrmac\;mac	@%2+,@%1+"
+  [(set_attr "length" "6")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI
+	  (sign_extend:SI (mem:HI
+	    (post_inc:SI (match_operand:SI 1 "register_operand" "r"))))
+	  (sign_extend:SI (mem:HI
+	    (post_inc:SI (match_operand:SI 2 "register_operand" "r")))))
+	      (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_MAC"
+  "mac	@%2+,@%1+"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+;; ----------------------------------------------------------------------
+;; DIVIDE/MOD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "udivhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(udiv:HI (match_operand:HI 1 "register_operand" "0")
+		 (match_operand:HI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divu.w\\t%T2,%T0"
+  [(set_attr "length" "2")])
+  
+(define_insn "divhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(div:HI (match_operand:HI 1 "register_operand" "0")
+		(match_operand:HI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divs.w\\t%T2,%T0"
+  [(set_attr "length" "2")])
+  
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divu.l\\t%S2,%S0"
+  [(set_attr "length" "2")])
+  
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divs.l\\t%S2,%S0"
+  [(set_attr "length" "2")])
+  
+(define_insn "udivmodqi4"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(truncate:QI
+	  (udiv:HI
+	    (match_operand:HI 1 "register_operand" "0")
+	    (zero_extend:HI (match_operand:QI 2 "register_operand" "r")))))
+   (set (match_operand:QI 3 "register_operand" "=r")
+	(truncate:QI
+	  (umod:HI
+	    (match_dup 1)
+	    (zero_extend:HI (match_dup 2)))))]
+  ""
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divxu.b\\t%X2,%T0";
+  else
+    return "divxu.b\\t%X2,%T0\;mov.b\\t%t0,%s3";
+}
+  [(set_attr "length" "4")])
+
+(define_insn "divmodqi4"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(truncate:QI
+	  (div:HI
+	    (match_operand:HI 1 "register_operand" "0")
+	    (sign_extend:HI (match_operand:QI 2 "register_operand" "r")))))
+   (set (match_operand:QI 3 "register_operand" "=r")
+	(truncate:QI
+	  (mod:HI
+	    (match_dup 1)
+	    (sign_extend:HI (match_dup 2)))))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divxs.b\\t%X2,%T0";
+  else
+    return "divxs.b\\t%X2,%T0\;mov.b\\t%t0,%s3";
+}
+  [(set_attr "length" "6")])
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(truncate:HI
+	  (udiv:SI
+	    (match_operand:SI 1 "register_operand" "0")
+	    (zero_extend:SI (match_operand:HI 2 "register_operand" "r")))))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(truncate:HI
+	  (umod:SI
+	    (match_dup 1)
+	    (zero_extend:SI (match_dup 2)))))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divxu.w\\t%T2,%S0";
+  else
+    return "divxu.w\\t%T2,%S0\;mov.w\\t%e0,%f3";
+}
+  [(set_attr "length" "4")])
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(truncate:HI
+	  (div:SI
+	    (match_operand:SI 1 "register_operand" "0")
+	    (sign_extend:SI (match_operand:HI 2 "register_operand" "r")))))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(truncate:HI
+	  (mod:SI
+	    (match_dup 1)
+	    (sign_extend:SI (match_dup 2)))))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divxs.w\\t%T2,%S0";
+  else
+    return "divxs.w\\t%T2,%S0\;mov.w\\t%e0,%f3";
+}
+  [(set_attr "length" "6")])
+
+;; ----------------------------------------------------------------------
+;; AND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "bclrqi_msx"
+  [(set (match_operand:QI 0 "bit_register_indirect_operand" "=WU")
+	(and:QI (match_operand:QI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:QI 2 "single_zero_operand" "Y0")))]
+  "TARGET_H8300SX && rtx_equal_p (operands[0], operands[1])"
+  "bclr\\t%W2,%0"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:HI 0 "bit_register_indirect_operand")
+	(and:HI (match_operand:HI 1 "bit_register_indirect_operand")
+		(match_operand:HI 2 "single_zero_operand")))]
+  "TARGET_H8300SX"
+  [(set (match_dup 0)
+	(and:QI (match_dup 1)
+		(match_dup 2)))]
+  {
+    if (abs (INTVAL (operands[2])) > 0xFF)
+      {
+	operands[0] = adjust_address (operands[0], QImode, 0);
+	operands[1] = adjust_address (operands[1], QImode, 0);
+	operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
+      }
+    else
+      {
+	operands[0] = adjust_address (operands[0], QImode, 1);
+	operands[1] = adjust_address (operands[1], QImode, 1);
+      }
+  })
+
+(define_insn "bclrhi_msx"
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=m")
+	(and:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_zero_operand" "Y0")))]
+  "TARGET_H8300SX"
+  "bclr\\t%W2,%0"
+  [(set_attr "length" "8")])
+
+(define_insn "*andqi3_2"
+  [(set (match_operand:QI 0 "bit_operand" "=U,rQ,r")
+	(and:QI (match_operand:QI 1 "bit_operand" "%0,0,WU")
+		(match_operand:QI 2 "h8300_src_operand" "Y0,rQi,IP1>X")))]
+  "TARGET_H8300SX"
+  "@
+   bclr\\t %W2,%R0
+   and  %X2,%X0
+   bfld %2,%1,%R0"
+  [(set_attr "length" "8,*,8")
+   (set_attr "length_table" "*,logicb,*")
+   (set_attr "cc" "none_0hit,set_znv,none_0hit")])
+
+(define_insn "andqi3_1"
+  [(set (match_operand:QI 0 "bit_operand" "=U,r")
+	(and:QI (match_operand:QI 1 "bit_operand" "%0,0")
+		(match_operand:QI 2 "h8300_src_operand" "Y0,rn")))]
+  "register_operand (operands[0], QImode)
+   || single_zero_operand (operands[2], QImode)"
+  "@
+   bclr %W2,%R0
+   and  %X2,%X0"
+  [(set_attr "length" "2,8")
+   (set_attr "cc" "none_0hit,set_znv")])
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(and:QI (match_operand:QI 1 "register_operand" "")
+		(match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*andorqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(ior:QI (and:QI (match_operand:QI 2 "register_operand" "r")
+			(match_operand:QI 3 "single_one_operand" "n"))
+		(match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "bld\\t%V3,%X2\;bor\\t%V3,%X0\;bst\\t%V3,%X0"
+  [(set_attr "length" "6")])
+
+(define_insn "*andorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (and:HI (match_operand:HI 2 "register_operand" "r")
+			(match_operand:HI 3 "single_one_operand" "n"))
+		(match_operand:HI 1 "register_operand" "0")))]
+  ""
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xffff);
+  if (INTVAL (operands[3]) > 128)
+    {
+      operands[3] = GEN_INT (INTVAL (operands[3]) >> 8);
+      return "bld\\t%V3,%t2\;bor\\t%V3,%t0\;bst\\t%V3,%t0";
+    }
+  return "bld\\t%V3,%s2\;bor\\t%V3,%s0\;bst\\t%V3,%s0";
+}
+  [(set_attr "length" "6")])
+
+(define_insn "*andorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand:SI 3 "single_one_operand" "n"))
+		(match_operand:SI 1 "register_operand" "0")))]
+  "(INTVAL (operands[3]) & 0xffff) != 0"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xffff);
+  if (INTVAL (operands[3]) > 128)
+    {
+      operands[3] = GEN_INT (INTVAL (operands[3]) >> 8);
+      return "bld\\t%V3,%x2\;bor\\t%V3,%x0\;bst\\t%V3,%x0";
+    }
+  return "bld\\t%V3,%w2\;bor\\t%V3,%w0\;bst\\t%V3,%w0";
+}
+  [(set_attr "length" "6")])
+
+(define_insn "*andorsi3_shift_8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "or.b\\t%w2,%x0"
+  [(set_attr "length" "2")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+;; ----------------------------------------------------------------------
+;; OR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "bsetqi_msx"
+  [(set (match_operand:QI 0 "bit_register_indirect_operand" "=WU")
+	(ior:QI (match_operand:QI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:QI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX && rtx_equal_p (operands[0], operands[1])"
+  "bset\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:HI 0 "bit_register_indirect_operand")
+	(ior:HI (match_operand:HI 1 "bit_register_indirect_operand")
+		(match_operand:HI 2 "single_one_operand")))]
+  "TARGET_H8300SX"
+  [(set (match_dup 0)
+	(ior:QI (match_dup 1)
+		(match_dup 2)))]
+  {
+    if (abs (INTVAL (operands[2])) > 0xFF)
+      {
+	operands[0] = adjust_address (operands[0], QImode, 0);
+	operands[1] = adjust_address (operands[1], QImode, 0);
+	operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
+      }
+    else
+      {
+	operands[0] = adjust_address (operands[0], QImode, 1);
+	operands[1] = adjust_address (operands[1], QImode, 1);
+      }
+  })
+
+(define_insn "bsethi_msx"
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=m")
+	(ior:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  "bset\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_insn "iorqi3_1"
+  [(set (match_operand:QI 0 "bit_operand" "=U,rQ")
+	(ior:QI (match_operand:QI 1 "bit_operand" "%0,0")
+		(match_operand:QI 2 "h8300_src_operand" "Y2,rQi")))]
+  "TARGET_H8300SX || register_operand (operands[0], QImode)
+   || single_one_operand (operands[2], QImode)"
+  "@
+   bset\\t%V2,%R0
+   or\\t%X2,%X0"
+  [(set_attr "length" "8,*")
+   (set_attr "length_table" "*,logicb")
+   (set_attr "cc" "none_0hit,set_znv")])
+
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ior:QI (match_operand:QI 1 "register_operand" "")
+		(match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "iorhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ior:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+;; ----------------------------------------------------------------------
+;; XOR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "bnotqi_msx"
+  [(set (match_operand:QI 0 "bit_register_indirect_operand" "=WU")
+	(xor:QI (match_operand:QI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:QI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX
+   && rtx_equal_p (operands[0], operands[1])"
+  "bnot\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:HI 0 "bit_register_indirect_operand")
+	(xor:HI (match_operand:HI 1 "bit_register_indirect_operand")
+		(match_operand:HI 2 "single_one_operand")))]
+  "TARGET_H8300SX"
+  [(set (match_dup 0)
+	(xor:QI (match_dup 1)
+		(match_dup 2)))]
+  {
+    if (abs (INTVAL (operands[2])) > 0xFF)
+      {
+	operands[0] = adjust_address (operands[0], QImode, 0);
+	operands[1] = adjust_address (operands[1], QImode, 0);
+	operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
+      }
+    else
+      {
+	operands[0] = adjust_address (operands[0], QImode, 1);
+	operands[1] = adjust_address (operands[1], QImode, 1);
+      }
+  })
+
+(define_insn "bnothi_msx"
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=m")
+	(xor:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  "bnot\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_insn "xorqi3_1"
+  [(set (match_operand:QI 0 "bit_operand" "=U,r")
+	(xor:QI (match_operand:QI 1 "bit_operand" "%0,0")
+		(match_operand:QI 2 "h8300_src_operand" "Y2,rQi")))]
+  "TARGET_H8300SX || register_operand (operands[0], QImode)
+   || single_one_operand (operands[2], QImode)"
+  "@
+   bnot\\t%V2,%R0
+   xor\\t%X2,%X0"
+  [(set_attr "length" "8,*")
+   (set_attr "length_table" "*,logicb")
+   (set_attr "cc" "none_0hit,set_znv")])
+
+(define_expand "xorqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(xor:QI (match_operand:QI 1 "register_operand" "")
+		(match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(xor:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+;; ----------------------------------------------------------------------
+;; {AND,IOR,XOR}{HI3,SI3} PATTERNS
+;; ----------------------------------------------------------------------
+
+;; We need a separate pattern here because machines other than the
+;; original H8300 don't have to split the 16-bit operand into a pair
+;; of high/low instructions, so we can accept literal addresses, that
+;; have to be loaded into a register on H8300.
+
+(define_insn "*logicalhi3_sn"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:HI 3 "bit_operator"
+	 [(match_operand:HI 1 "h8300_dst_operand" "%0")
+	  (match_operand:HI 2 "h8300_src_operand" "rQi")]))]
+  "(TARGET_H8300S || TARGET_H8300H) && h8300_operands_match_p (operands)"
+{
+  return output_logical_op (HImode, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (HImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (HImode, operands)"))])
+
+(define_insn "*logicalsi3_sn"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:SI 3 "bit_operator"
+	 [(match_operand:SI 1 "h8300_dst_operand" "%0")
+	  (match_operand:SI 2 "h8300_src_operand" "rQi")]))]
+  "(TARGET_H8300S || TARGET_H8300H) && h8300_operands_match_p (operands)"
+{
+  return output_logical_op (SImode, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (SImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (SImode, operands)"))])
+
+(define_insn "*logicalhi3"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:HI 3 "bit_operator"
+	  [(match_operand:HI 1 "h8300_dst_operand" "%0")
+	   (match_operand:HI 2 "h8300_src_operand" "rQi")]))]
+  "h8300_operands_match_p (operands)"
+{
+  return output_logical_op (HImode, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (HImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (HImode, operands)"))])
+
+(define_insn "*logicalsi3"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:SI 3 "bit_operator"
+	 [(match_operand:SI 1 "h8300_dst_operand" "%0")
+	  (match_operand:SI 2 "h8300_src_operand" "rQi")]))]
+  "h8300_operands_match_p (operands)"
+{
+  return output_logical_op (SImode, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (SImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (SImode, operands)"))])
+
+;; ----------------------------------------------------------------------
+;; NEGATION INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "negqi2"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(neg:QI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*negqi2"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(neg:QI (match_operand:QI 1 "h8300_dst_operand" "0")))]
+  ""
+  "neg	%X0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(neg:HI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  {
+    if (TARGET_H8300)
+      {
+	emit_insn (gen_neghi2_h8300 (operands[0], operands[1]));
+	DONE;
+      }
+  })
+
+(define_expand "neghi2_h8300"
+  [(set (match_dup 2)
+	(not:HI (match_operand:HI 1 "register_operand" "")))
+   (set (match_dup 2) (plus:HI (match_dup 2) (const_int 1)))
+   (set (match_operand:HI 0 "register_operand" "")
+	(match_dup 2))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*neghi2_h8300hs"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(neg:HI (match_operand:HI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "neg.w	%T0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  {
+    if (TARGET_H8300)
+      {
+	emit_insn (gen_negsi2_h8300 (operands[0], operands[1]));
+	DONE;
+      }
+  })
+
+(define_expand "negsi2_h8300"
+  [(set (match_dup 2)
+	(not:SI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 2) (plus:SI (match_dup 2) (const_int 1)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(match_dup 2))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (SImode);
+  })
+
+(define_insn "*negsi2_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(neg:SI (match_operand:SI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "neg.l	%S0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (match_operand:SF 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*negsf2_h8300"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "xor.b\\t#128,%z0"
+  [(set_attr "length" "2")])
+
+(define_insn "*negsf2_h8300hs"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "xor.w\\t#32768,%e0"
+  [(set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------
+;; ABSOLUTE VALUE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(abs:SF (match_operand:SF 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*abssf2_h8300"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "and.b\\t#127,%z0"
+  [(set_attr "length" "2")])
+
+(define_insn "*abssf2_h8300hs"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "and.w\\t#32767,%e0"
+  [(set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------
+;; NOT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "one_cmplqi2"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(not:QI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*one_cmplqi2"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(not:QI (match_operand:QI 1 "h8300_dst_operand" "0")))]
+  ""
+  "not	%X0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(not:HI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*one_cmplhi2_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(not:HI (match_operand:HI 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "not	%s0\;not	%t0"
+  [(set_attr "length" "4")])
+
+(define_insn "*one_cmplhi2_h8300hs"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(not:HI (match_operand:HI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "not.w	%T0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length_table" "unary")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*one_cmplsi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "not	%w0\;not	%x0\;not	%y0\;not	%z0"
+  [(set_attr "length" "8")])
+
+(define_insn "*one_cmplsi2_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(not:SI (match_operand:SI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "not.l	%S0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length_table" "unary")])
+
+;; ----------------------------------------------------------------------
+;; JUMP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Conditional jump instructions
+
+(define_expand "cbranchqi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:QI 1 "h8300_dst_operand" "")
+          (match_operand:QI 2 "h8300_src_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  {
+    h8300_expand_branch (operands);
+    DONE;
+  })
+
+(define_expand "cbranchhi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:HI 1 "h8300_dst_operand" "")
+          (match_operand:HI 2 "h8300_src_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  {
+    /* Force operand1 into a register if we're compiling
+       for the H8/300.  */
+    if ((GET_CODE (operands[2]) != REG && operands[2] != const0_rtx)
+	&& TARGET_H8300)
+      operands[2] = force_reg (HImode, operands[2]);
+    h8300_expand_branch (operands); 
+    DONE;
+  })
+
+(define_expand "cbranchsi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SI 1 "h8300_dst_operand" "")
+          (match_operand:SI 2 "h8300_src_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_H8300H || TARGET_H8300S"
+  {
+    h8300_expand_branch (operands);
+    DONE;
+  })
+
+(define_insn "branch_true"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  if (get_attr_length (insn) == 2)
+    return "b%j1	%l0";
+  else if (get_attr_length (insn) == 4)
+    return "b%j1	%l0:16";
+  else
+    return "b%k1	.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:";
+}
+ [(set_attr "type" "branch")
+   (set_attr "cc" "none")])
+
+(define_insn "branch_false"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  if (get_attr_length (insn) == 2)
+    return "b%k1	%l0";
+  else if (get_attr_length (insn) == 4)
+    return "b%k1	%l0:16";
+  else
+    return "b%j1	.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:";
+}
+  [(set_attr "type" "branch")
+   (set_attr "cc" "none")])
+
+(define_insn "*brabc"
+  [(set (pc)
+	(if_then_else (eq (zero_extract (match_operand:QI 1 "bit_memory_operand" "WU")
+					(const_int 1)
+					(match_operand:QI 2 "immediate_operand" "n"))
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_H8300SX"
+{
+  switch (get_attr_length (insn)
+	  - h8300_insn_length_from_table (insn, operands))
+    {
+    case 2:
+      return "bra/bc	%2,%R1,%l0";
+    case 4:
+      return "bra/bc	%2,%R1,%l0:16";
+    default:
+      return "bra/bs	%2,%R1,.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:";
+    }
+}
+  [(set_attr "type" "bitbranch")
+   (set_attr "length_table" "bitbranch")
+   (set_attr "cc" "none")])
+
+(define_insn "*brabs"
+  [(set (pc)
+	(if_then_else (ne (zero_extract (match_operand:QI 1 "bit_memory_operand" "WU")
+					(const_int 1)
+					(match_operand:QI 2 "immediate_operand" "n"))
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_H8300SX"
+{
+  switch (get_attr_length (insn)
+	  - h8300_insn_length_from_table (insn, operands))
+    {
+    case 2:
+      return "bra/bs	%2,%R1,%l0";
+    case 4:
+      return "bra/bs	%2,%R1,%l0:16";
+    default:
+      return "bra/bc	%2,%R1,.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:";
+    }
+}
+  [(set_attr "type" "bitbranch")
+   (set_attr "length_table" "bitbranch")
+   (set_attr "cc" "none")])
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (final_sequence != 0)
+    {
+      if (get_attr_length (insn) == 2)
+	return "bra/s	%l0";
+      else
+	{
+	  /* The branch isn't short enough to use bra/s.  Output the
+	     branch and delay slot in their normal order.
+
+	     If this is a backward branch, it will now be branching two
+	     bytes further than previously thought.  The length-based
+	     test for bra vs. jump is very conservative though, so the
+	     branch will still be within range.  */
+	  rtvec vec;
+	  int seen;
+
+	  vec = XVEC (final_sequence, 0);
+	  final_sequence = 0;
+	  final_scan_insn (RTVEC_ELT (vec, 1), asm_out_file, optimize, 1, & seen);
+	  final_scan_insn (RTVEC_ELT (vec, 0), asm_out_file, optimize, 1, & seen);
+	  INSN_DELETED_P (RTVEC_ELT (vec, 1)) = 1;
+	  return "";
+	}
+    }
+  else if (get_attr_length (insn) == 2)
+    return "bra	%l0";
+  else if (get_attr_length (insn) == 4)
+    return "bra	%l0:16";
+  else
+    return "jmp	@%l0";
+}
+  [(set_attr "type" "branch")
+   (set (attr "delay_slot")
+	(if_then_else (match_test "TARGET_H8300SX")
+		      (const_string "jump")
+		      (const_string "none")))
+   (set_attr "cc" "none")])
+
+;; This is a define expand, because pointers may be either 16 or 32 bits.
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+  "")
+
+(define_insn "*tablejump_h8300"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_H8300"
+  "jmp	@%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*tablejump_h8300hs_advanced"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE"
+  "jmp	@%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*tablejump_h8300hs_normal"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && TARGET_NORMAL_MODE"
+  "jmp @%S0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+;; This is a define expand, because pointers may be either 16 or 32 bits.
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "jump_address_operand" ""))]
+  ""
+  "")
+
+(define_insn "*indirect_jump_h8300"
+  [(set (pc) (match_operand:HI 0 "jump_address_operand" "Vr"))]
+  "TARGET_H8300"
+  "jmp	@%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*indirect_jump_h8300hs_advanced"
+  [(set (pc) (match_operand:SI 0 "jump_address_operand" "Vr"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE"
+  "jmp @%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*indirect_jump_h8300hs_normal"
+  [(set (pc) (match_operand:HI 0 "jump_address_operand" "Vr"))]
+  "(TARGET_H8300H || TARGET_H8300S) && TARGET_NORMAL_MODE"
+  "jmp @%S0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+;; Call subroutine with no return value.
+
+;; ??? Even though we use HImode here, this works on the H8/300H and H8S.
+
+(define_insn "call"
+  [(call (match_operand:QI 0 "call_insn_operand" "or")
+	 (match_operand:HI 1 "general_operand" "g"))]
+  ""
+{
+  if (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+      && SYMBOL_REF_FLAG (XEXP (operands[0], 0)))
+    return "jsr\\t@%0:8";
+  else
+    return "jsr\\t%0";
+}
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (match_operand:QI 0 "small_call_insn_operand" "")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+
+;; ??? Even though we use HImode here, this works on the H8/300H and H8S.
+
+(define_insn "call_value"
+  [(set (match_operand 0 "" "=r")
+	(call (match_operand:QI 1 "call_insn_operand" "or")
+	      (match_operand:HI 2 "general_operand" "g")))]
+  ""
+{
+  if (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+      && SYMBOL_REF_FLAG (XEXP (operands[1], 0)))
+    return "jsr\\t@%1:8";
+  else
+    return "jsr\\t%1";
+}
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (match_operand:QI 0 "small_call_insn_operand" "")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+;; ----------------------------------------------------------------------
+;; PROLOGUE/EPILOGUE-RELATED INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "push_h8300"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REG)))
+        (match_operand:HI 0 "register_operand" ""))]
+  "TARGET_H8300"
+  "")
+
+(define_expand "push_h8300hs_advanced"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+        (match_operand:SI 0 "register_operand" ""))]
+  "TARGET_H8300H && TARGET_H8300S && !TARGET_NORMAL_MODE"
+  "")
+
+(define_expand "push_h8300hs_normal"
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+        (match_operand:SI 0 "register_operand" ""))]
+  "TARGET_H8300H && TARGET_H8300S && TARGET_NORMAL_MODE"
+  "")
+
+(define_expand "pop_h8300"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mem:HI (post_inc:HI (reg:HI SP_REG))))]
+  "TARGET_H8300"
+  "")
+
+(define_expand "pop_h8300hs_advanced"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_H8300H && TARGET_H8300S && !TARGET_NORMAL_MODE"
+  "")
+
+(define_expand "pop_h8300hs_normal"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (post_inc:HI (reg:HI SP_REG))))]
+  "TARGET_H8300H && TARGET_H8300S && TARGET_NORMAL_MODE"
+  "")
+
+(define_insn "ldm_h8300sx"
+  [(match_parallel           0 "h8300_ldm_parallel"
+    [(set (match_operand:SI 1 "register_operand" "")
+	  (match_operand:SI 2 "memory_operand" ""))])]
+  "TARGET_H8300S"
+{
+  operands[3] = SET_DEST (XVECEXP (operands[0], 0,
+				   XVECLEN (operands[0], 0) - 2));
+  return "ldm.l\t@er7+,%S1-%S3";
+}
+  [(set_attr "cc" "none")
+   (set_attr "length" "4")])
+
+(define_insn "stm_h8300sx"
+  [(match_parallel           0 "h8300_stm_parallel"
+    [(set (match_operand:SI 1 "memory_operand" "")
+	  (match_operand:SI 2 "register_operand" ""))])]
+  "TARGET_H8300S"
+{
+  operands[3] = SET_SRC (XVECEXP (operands[0], 0,
+				  XVECLEN (operands[0], 0) - 2));
+  return "stm.l\t%S2-%S3,@-er7";
+}
+  [(set_attr "cc" "none")
+   (set_attr "length" "4")])
+
+(define_insn "return_h8sx"
+  [(match_parallel           0 "h8300_return_parallel"
+    [(return)
+     (set (match_operand:SI 1 "register_operand" "")
+	  (match_operand:SI 2 "memory_operand" ""))])]
+  "TARGET_H8300SX"
+{
+  operands[3] = SET_DEST (XVECEXP (operands[0], 0,
+				   XVECLEN (operands[0], 0) - 2));
+  if (h8300_current_function_interrupt_function_p ()
+      || h8300_current_function_monitor_function_p ())
+    return "rte/l\t%S1-%S3";
+  else
+    return "rts/l\t%S1-%S3";
+}
+  [(set_attr "cc" "none")
+   (set_attr "can_delay" "no")
+   (set_attr "length" "2")])
+
+(define_expand "return"
+  [(return)]
+  "h8300_can_use_return_insn_p ()"
+  "")
+
+(define_insn "*return_1"
+  [(return)]
+  "reload_completed"
+{
+  if (h8300_current_function_interrupt_function_p ()
+      || h8300_current_function_monitor_function_p ())
+    return "rte";
+  else
+    return "rts";
+}
+  [(set_attr "cc" "none")
+   (set_attr "can_delay" "no")
+   (set_attr "length" "2")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  {
+    h8300_expand_prologue (); 
+    DONE;
+  })
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  {
+    h8300_expand_epilogue (); 
+    DONE;
+  })
+
+(define_insn "monitor_prologue"
+  [(unspec_volatile [(const_int 0)] UNSPEC_MONITOR)]
+  ""
+{
+  if (TARGET_H8300)
+    return "subs\\t#2,r7\;mov.w\\tr0,@-r7\;stc\\tccr,r0l\;mov.b\tr0l,@(2,r7)\;mov.w\\t@r7+,r0\;orc\t#128,ccr";
+  else if (TARGET_H8300H && TARGET_NORMAL_MODE)
+    return "subs\\t#2,er7\;mov.l\\ter0,@-er7\;stc\\tccr,r0l\;mov.b\\tr0l,@(4,er7)\;mov.l\\t@er7+,er0\;orc\\t#128,ccr";
+  else if (TARGET_H8300H)
+    return "mov.l\\ter0,@-er7\;stc\\tccr,r0l\;mov.b\\tr0l,@(4,er7)\;mov.l\\t@er7+,er0\;orc\\t#128,ccr";
+  else if (TARGET_H8300S && TARGET_NEXR )
+    return "mov.l\\ter0,@-er7\;stc\tccr,r0l\;mov.b\tr0l,@(4,er7)\;mov.l\\t@er7+,er0\;orc\t#128,ccr"; 
+  else if (TARGET_H8300S && TARGET_NEXR && TARGET_NORMAL_MODE)
+    return "subs\\t#2,er7\;mov.l\\ter0,@-er7\;stc\tccr,r0l\;mov.b\tr0l,@(4,er7)\;mov.l\\t@er7+,er0\;orc\t#128,ccr";
+  else if (TARGET_H8300S && TARGET_NORMAL_MODE)
+    return "subs\\t#2,er7\;stc\texr,@-er7\;mov.l\\ter0,@-er7\;stc\tccr,r0l\;mov.b\tr0l,@(6,er7)\;mov.l\\t@er7+,er0\;orc\t#128,ccr";
+  else if (TARGET_H8300S)
+    return "stc\texr,@-er7\;mov.l\\ter0,@-er7\;stc\tccr,r0l\;mov.b\tr0l,@(6,er7)\;mov.l\\t@er7+,er0\;orc\t#128,ccr";
+  gcc_unreachable ();
+}
+  [(set_attr "length" "20")])
+
+;; ----------------------------------------------------------------------
+;; EXTEND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendqihi2_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  mov.b	#0,%t0
+  #"
+  [(set_attr "length" "2,10")])
+
+(define_insn "*zero_extendqihi2_h8300hs"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+  extu.w	%T0
+  #"
+  [(set_attr "length" "2,10")
+   (set_attr "cc" "set_znv,set_znv")])
+
+;; Split the zero extension of a general operand (actually a memory
+;; operand) into a load of the operand and the actual zero extension
+;; so that 1) the length will be accurate, and 2) the zero extensions
+;; appearing at the end of basic blocks may be merged.
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "")))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 0)
+	(zero_extend:HI (match_dup 2)))]
+  {
+    operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));
+  })
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "")))]
+  ""
+  {
+    if (TARGET_H8300SX)
+      operands[1] = force_reg (QImode, operands[1]);
+  })
+
+(define_insn "*zero_extendqisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  mov.b	#0,%x0\;sub.w	%e0,%e0
+  mov.b	%R1,%w0\;mov.b	#0,%x0\;sub.w	%e0,%e0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*zero_extendqisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+    && reg_overlap_mentioned_p (operands[0], operands[1])
+    && reload_completed"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 3)
+	(zero_extend:HI (match_dup 2)))
+   (set (match_dup 0)
+	(zero_extend:SI (match_dup 3)))]
+  {
+    operands[2] = gen_lowpart (QImode, operands[0]);
+    operands[3] = gen_lowpart (HImode, operands[0]);
+  })
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+    && !reg_overlap_mentioned_p (operands[0], operands[1])
+    && reload_completed"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (strict_low_part (match_dup 2))
+	(match_dup 1))]
+  {
+    operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));
+  })
+
+(define_insn "*zero_extendqisi2_h8sx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  "TARGET_H8300SX"
+  "extu.l\t#2,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+;; %e prints the high part of a CONST_INT, not the low part.  Arggh.
+(define_insn "*zero_extendhisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_operand_src" "0,i,g>")))]
+  "TARGET_H8300"
+  "@
+  sub.w	%e0,%e0
+  mov.w	%f1,%f0\;sub.w	%e0,%e0
+  mov.w	%e1,%f0\;sub.w	%e0,%e0"
+  [(set_attr "length" "2,4,6")])
+
+(define_insn "*zero_extendhisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "extu.l	%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendqihi2_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  bld	#7,%s0\;subx	%t0,%t0
+  mov.b	%R1,%s0\;bld	#7,%s0\;subx	%t0,%t0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*extendqihi2_h8300hs"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "exts.w	%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendqisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  bld	#7,%w0\;subx	%x0,%x0\;subx	%y0,%y0\;subx	%z0,%z0
+  mov.b	%R1,%w0\;bld	#7,%w0\;subx	%x0,%x0\;subx	%y0,%y0\;subx	%z0,%z0"
+  [(set_attr "length" "8,12")])
+
+;; The following pattern is needed because without the pattern, the
+;; combiner would split (sign_extend:SI (reg:QI)) into two 24-bit
+;; shifts, one ashift and one ashiftrt.
+
+(define_insn_and_split "*extendqisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:HI (match_dup 1)))
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 2)))]
+  {
+    operands[2] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  })
+
+(define_insn "*extendqisi2_h8sx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  "TARGET_H8300SX"
+  "exts.l\t#2,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendhisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  bld	#7,%x0\;subx	%y0,%y0\;subx	%z0,%z0
+  mov.w	%T1,%f0\;bld	#7,%x0\;subx	%y0,%y0\;subx	%z0,%z0"
+  [(set_attr "length" "6,10")])
+
+(define_insn "*extendhisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "exts.l	%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+;; ----------------------------------------------------------------------
+;; SHIFTS
+;; ----------------------------------------------------------------------
+;;
+;; We make some attempt to provide real efficient shifting.  One example is
+;; doing an 8-bit shift of a 16-bit value by moving a byte reg into the other
+;; reg and moving 0 into the former reg.
+;;
+;; We also try to achieve this in a uniform way.  IE: We don't try to achieve
+;; this in both rtl and at insn emit time.  Ideally, we'd use rtl as that would
+;; give the optimizer more cracks at the code.  However, we wish to do things
+;; like optimizing shifting the sign bit to bit 0 by rotating the other way.
+;; There is rtl to handle this (rotate + and), but the H8/300 doesn't handle
+;; 16-bit rotates.  Also, if we emit complicated rtl, combine may not be able
+;; to detect cases it can optimize.
+;;
+;; For these and other fuzzy reasons, I've decided to go the less pretty but
+;; easier "do it at insn emit time" route.
+
+;; QI BIT SHIFTS
+
+(define_expand "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ashift:QI (match_operand:QI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (QImode, ASHIFT, operands)) 
+    DONE;
+  })
+
+(define_expand "ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (QImode, ASHIFTRT, operands)) 
+    DONE;
+  })
+
+(define_expand "lshrqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (QImode, LSHIFTRT, operands)) 
+    DONE;
+  })
+
+(define_insn ""
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:QI 3 "h8sx_unary_shift_operator"
+	 [(match_operand:QI 1 "h8300_dst_operand" "0")
+	  (match_operand:QI 2 "const_int_operand" "")]))]
+  "h8300_operands_match_p (operands)"
+{ 
+  return output_h8sx_shift (operands, 'b', 'X'); 
+}
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(match_operator:QI 3 "h8sx_binary_shift_operator"
+	 [(match_operand:QI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "r P3>X")]))]
+  ""
+{
+  return output_h8sx_shift (operands, 'b', 'X'); 
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*shiftqi"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(match_operator:QI 3 "nshift_operator"
+	 [(match_operand:QI 1 "register_operand" "0,0")
+	  (match_operand:QI 2 "nonmemory_operand" "R,rn")]))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  ""
+{
+  return output_a_shift (operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_a_shift_length (insn, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_a_shift_cc (insn, operands)"))])
+
+;; HI BIT SHIFTS
+
+(define_expand "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ashift:HI (match_operand:HI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (HImode, ASHIFT, operands)) 
+    DONE;
+  })
+
+(define_expand "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (HImode, LSHIFTRT, operands)) 
+    DONE;
+  })
+
+(define_expand "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (HImode, ASHIFTRT, operands)) 
+    DONE;
+  })
+
+(define_insn ""
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:HI 3 "h8sx_unary_shift_operator"
+	 [(match_operand:HI 1 "h8300_dst_operand" "0")
+	  (match_operand:QI 2 "const_int_operand" "")]))]
+  "h8300_operands_match_p (operands)"
+{ 
+  return output_h8sx_shift (operands, 'w', 'T'); 
+}
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 3 "h8sx_binary_shift_operator"
+	 [(match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "r P4>X")]))]
+  ""
+{
+  return output_h8sx_shift (operands, 'w', 'T'); 
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*shifthi"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI 3 "nshift_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0")
+	  (match_operand:QI 2 "nonmemory_operand" "S,rn")]))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  ""
+{
+  return output_a_shift (operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_a_shift_length (insn, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_a_shift_cc (insn, operands)"))])
+
+;;  SI BIT SHIFTS
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (SImode, ASHIFT, operands)) 
+    DONE;
+  })
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (SImode, LSHIFTRT, operands)) 
+    DONE;
+  })
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_shift (SImode, ASHIFTRT, operands)) 
+    DONE;
+  })
+
+(define_insn ""
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:SI 3 "h8sx_unary_shift_operator"
+	 [(match_operand:SI 1 "h8300_dst_operand" "0")
+	  (match_operand:QI 2 "const_int_operand" "")]))]
+  "h8300_operands_match_p (operands)"
+{
+  return output_h8sx_shift (operands, 'l', 'S'); 
+}
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 3 "h8sx_binary_shift_operator"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "r P5>X")]))]
+  ""
+{ 
+  return output_h8sx_shift (operands, 'l', 'S'); 
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*shiftsi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI 3 "nshift_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:QI 2 "nonmemory_operand" "T,rn")]))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  ""
+{
+  return output_a_shift (operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_a_shift_length (insn, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_a_shift_cc (insn, operands)"))])
+
+;; Split a variable shift into a loop.  If the register containing
+;; the shift count dies, then we just use that register.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 2 "nshift_operator"
+	 [(match_dup 0)
+	  (match_operand:QI 1 "register_operand" "")]))
+   (clobber (match_operand:QI 3 "register_operand" ""))]
+  "epilogue_completed
+   && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))"
+  [(set (cc0) (compare (match_dup 1) (const_int 0)))
+   (set (pc)
+        (if_then_else (le (cc0) (const_int 0))
+		      (label_ref (match_dup 5))
+		      (pc)))
+   (match_dup 4)
+   (parallel
+     [(set (match_dup 0)
+	   (match_op_dup 2 [(match_dup 0) (const_int 1)]))
+      (clobber (scratch:QI))])
+   (set (match_dup 1) (plus:QI (match_dup 1) (const_int -1)))
+   (set (cc0) (compare (match_dup 1) (const_int 0)))
+   (set (pc)
+        (if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_dup 4))
+		      (pc)))
+   (match_dup 5)]
+  {
+    operands[4] = gen_label_rtx ();
+    operands[5] = gen_label_rtx ();
+  })
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 2 "nshift_operator"
+	 [(match_dup 0)
+	  (match_operand:QI 1 "register_operand" "")]))
+   (clobber (match_operand:QI 3 "register_operand" ""))]
+  "epilogue_completed
+   && !find_regno_note (insn, REG_DEAD, REGNO (operands[1]))"
+  [(set (match_dup 3)
+	(match_dup 1))
+   (set (cc0) (compare (match_dup 3) (const_int 0)))
+   (set (pc)
+        (if_then_else (le (cc0) (const_int 0))
+		      (label_ref (match_dup 5))
+		      (pc)))
+   (match_dup 4)
+   (parallel
+     [(set (match_dup 0)
+	   (match_op_dup 2 [(match_dup 0) (const_int 1)]))
+      (clobber (scratch:QI))])
+   (set (match_dup 3) (plus:QI (match_dup 3) (const_int -1)))
+   (set (cc0) (compare (match_dup 3) (const_int 0)))
+   (set (pc)
+        (if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_dup 4))
+		      (pc)))
+   (match_dup 5)]
+  {
+    operands[4] = gen_label_rtx ();
+    operands[5] = gen_label_rtx ();
+  })
+
+;; ----------------------------------------------------------------------
+;; ROTATIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(rotate:QI (match_operand:QI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_rotate (operands)) 
+    DONE;
+  })
+
+(define_insn "rotlqi3_1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0")
+		   (match_operand:QI 2 "immediate_operand" "")))]
+  ""
+{
+  return output_a_rotate (ROTATE, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_a_rotate_length (operands)"))])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(rotate:HI (match_operand:HI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  {
+    if (expand_a_rotate (operands)) 
+    DONE;
+  })
+
+(define_insn "rotlhi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:QI 2 "immediate_operand" "")))]
+  ""
+{
+  return output_a_rotate (ROTATE, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_a_rotate_length (operands)"))])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(rotate:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  {
+    if (expand_a_rotate (operands)) 
+    DONE;
+  })
+
+(define_insn "rotlsi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:QI 2 "immediate_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  return output_a_rotate (ROTATE, operands);
+}
+  [(set (attr "length")
+	(symbol_ref "compute_a_rotate_length (operands)"))])
+
+;; -----------------------------------------------------------------
+;; BIT FIELDS
+;; -----------------------------------------------------------------
+;; The H8/300 has given 1/8th of its opcode space to bitfield
+;; instructions so let's use them as well as we can.
+
+;; You'll never believe all these patterns perform one basic action --
+;; load a bit from the source, optionally invert the bit, then store it
+;; in the destination (which is known to be zero).
+;;
+;; Combine obviously need some work to better identify this situation and
+;; canonicalize the form better.
+
+;;
+;; Normal loads with a 16bit destination.
+;;
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(zero_extract:HI (match_operand:HI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:HI 2 "immediate_operand" "n")))]
+  "TARGET_H8300"
+  "sub.w	%0,%0\;bld	%Z2,%Y1\;bst	#0,%X0"
+  [(set_attr "length" "6")])
+
+;;
+;; Inverted loads with a 16bit destination.
+;;
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(zero_extract:HI (xor:HI (match_operand:HI 1 "register_operand" "r")
+				 (match_operand:HI 3 "const_int_operand" "n"))
+			 (const_int 1)
+			 (match_operand:HI 2 "const_int_operand" "n")))]
+  "(TARGET_H8300 || TARGET_H8300SX)
+    && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
+  "sub.w	%0,%0\;bild	%Z2,%Y1\;bst	#0,%X0"
+  [(set_attr "length" "8")])
+
+;;
+;; Normal loads with a 32bit destination.
+;;
+
+(define_insn "*extzv_1_r_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(zero_extract:SI (match_operand:HI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n")))]
+  "TARGET_H8300 && INTVAL (operands[2]) < 16"
+{
+  return output_simode_bld (0, operands);
+}
+  [(set_attr "length" "8")])
+
+(define_insn "*extzv_1_r_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "?0,r")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n,n")))]
+  "(TARGET_H8300H || TARGET_H8300S) && INTVAL (operands[2]) < 16"
+{
+  return output_simode_bld (0, operands);
+}
+  [(set_attr "cc" "set_znv,set_znv")
+   (set_attr "length" "8,6")])
+
+;;
+;; Inverted loads with a 32bit destination.
+;;
+
+(define_insn "*extzv_1_r_inv_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(zero_extract:SI (xor:HI (match_operand:HI 1 "register_operand" "r")
+				 (match_operand:HI 3 "const_int_operand" "n"))
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n")))]
+  "TARGET_H8300 && INTVAL (operands[2]) < 16
+   && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
+{
+  return output_simode_bld (1, operands);
+}
+  [(set_attr "length" "8")])
+
+(define_insn "*extzv_1_r_inv_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extract:SI (xor:SI (match_operand:SI 1 "register_operand" "?0,r")
+				 (match_operand 3 "const_int_operand" "n,n"))
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n,n")))]
+  "(TARGET_H8300H || TARGET_H8300S) && INTVAL (operands[2]) < 16
+    && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
+{
+  return output_simode_bld (1, operands);
+}
+  [(set_attr "cc" "set_znv,set_znv")
+   (set_attr "length" "8,6")])
+
+(define_expand "insv"
+  [(set (zero_extract:HI (match_operand:HI 0 "general_operand" "")
+			 (match_operand:HI 1 "general_operand" "")
+			 (match_operand:HI 2 "general_operand" ""))
+	(match_operand:HI 3 "general_operand" ""))]
+  "TARGET_H8300 || TARGET_H8300SX"
+  {
+    if (TARGET_H8300SX)
+      {
+	if (GET_CODE (operands[1]) == CONST_INT
+	    && GET_CODE (operands[2]) == CONST_INT
+	    && INTVAL (operands[1]) <= 8
+	    && INTVAL (operands[2]) >= 0
+	    && INTVAL (operands[1]) + INTVAL (operands[2]) <= 8
+	    && memory_operand (operands[0], GET_MODE (operands[0])))
+	  {
+	    /* If the source operand is zero, it's better to use AND rather
+	       than BFST.  Likewise OR if the operand is all ones.  */
+	    if (GET_CODE (operands[3]) == CONST_INT)
+	      {
+		HOST_WIDE_INT mask = (1 << INTVAL (operands[1])) - 1;
+		if ((INTVAL (operands[3]) & mask) == 0)
+		  FAIL;
+		if ((INTVAL (operands[3]) & mask) == mask)
+		  FAIL;
+	      }
+	    if (! bit_memory_operand (operands[0], GET_MODE (operands[0])))
+	      {
+		if (!can_create_pseudo_p ())
+		  FAIL;
+		operands[0] =  replace_equiv_address (operands[0], force_reg (Pmode,
+						      XEXP (operands[0], 0)));
+	      }
+	    operands[3] = gen_lowpart (QImode, operands[3]);
+	    if (! operands[3])
+	      FAIL;
+	    if (! register_operand (operands[3], QImode))
+	      {
+		if (!can_create_pseudo_p ())
+		  FAIL;
+		operands[3] = force_reg (QImode, operands[3]);
+	      }
+	    emit_insn (gen_bfst (adjust_address (operands[0], QImode, 0),
+						 operands[3], operands[1], operands[2]));
+	    DONE;
+	  }
+	FAIL;
+      }
+
+    /* We only have single bit bit-field instructions.  */
+    if (INTVAL (operands[1]) != 1)
+      FAIL;
+
+    /* For now, we don't allow memory operands.  */
+    if (GET_CODE (operands[0]) == MEM
+	|| GET_CODE (operands[3]) == MEM)
+      FAIL;
+
+    if (GET_CODE (operands[3]) != REG)
+      operands[3] = force_reg (HImode, operands[3]);
+  })
+
+(define_insn ""
+  [(set (zero_extract:HI (match_operand:HI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:HI 1 "immediate_operand" "n"))
+	(match_operand:HI 2 "register_operand" "r"))]
+  ""
+  "bld	#0,%R2\;bst	%Z1,%Y0 ; i1"
+  [(set_attr "length" "4")])
+
+(define_expand "extzv"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extract:HI (match_operand:HI 1 "bit_operand" "")
+			 (match_operand:HI 2 "general_operand" "")
+			 (match_operand:HI 3 "general_operand" "")))]
+  "TARGET_H8300 || TARGET_H8300SX"
+  {
+    if (TARGET_H8300SX)
+      {
+	if (GET_CODE (operands[2]) == CONST_INT
+	    && GET_CODE (operands[3]) == CONST_INT
+	    && INTVAL (operands[2]) <= 8
+	    && INTVAL (operands[3]) >= 0
+	    && INTVAL (operands[2]) + INTVAL (operands[3]) <= 8
+	    && memory_operand (operands[1], QImode))
+	  {
+	    rtx temp;
+
+	    /* Optimize the case where we're extracting into a paradoxical
+	       subreg.  It's only necessary to extend to the inner reg.  */
+	    if (GET_CODE (operands[0]) == SUBREG
+		&& subreg_lowpart_p (operands[0])
+		&& (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))
+		    < GET_MODE_SIZE (GET_MODE (operands[0])))
+		&& (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0])))
+		    == MODE_INT))
+	      operands[0] = SUBREG_REG (operands[0]);
+
+	    if (!can_create_pseudo_p ())
+	      temp = gen_lowpart (QImode, operands[0]);
+	    else
+	      temp = gen_reg_rtx (QImode);
+	    if (! temp)
+	      FAIL;
+            if (! bit_memory_operand (operands[1], QImode))
+	      {
+		if (!can_create_pseudo_p ())
+		  FAIL;
+		operands[1] = replace_equiv_address (operands[1],
+						     force_reg (Pmode, XEXP (operands[1], 0)));
+	      }
+	    emit_insn (gen_bfld (temp, operands[1], operands[2], operands[3]));
+	    convert_move (operands[0], temp, 1);
+	    DONE;
+          }
+	FAIL;
+      }
+
+    /* We only have single bit bit-field instructions.  */
+    if (INTVAL (operands[2]) != 1)
+      FAIL;
+
+    /* For now, we don't allow memory operands.  */
+    if (GET_CODE (operands[1]) == MEM)
+      FAIL;
+  })
+
+;; BAND, BOR, and BXOR patterns
+
+(define_insn ""
+  [(set (match_operand:HI 0 "bit_operand" "=Ur")
+	(match_operator:HI 4 "bit_operator"
+	 [(zero_extract:HI (match_operand:HI 1 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand:HI 2 "immediate_operand" "n"))
+	  (match_operand:HI 3 "bit_operand" "0")]))]
+  ""
+  "bld	%Z2,%Y1\;b%c4	#0,%R0\;bst	#0,%R0; bl1"
+  [(set_attr "length" "6")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "bit_operand" "=Ur")
+	(match_operator:HI 5 "bit_operator"
+	 [(zero_extract:HI (match_operand:HI 1 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand:HI 2 "immediate_operand" "n"))
+	  (zero_extract:HI (match_operand:HI 3 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand:HI 4 "immediate_operand" "n"))]))]
+  ""
+  "bld	%Z2,%Y1\;b%c5	%Z4,%Y3\;bst	#0,%R0; bl3"
+  [(set_attr "length" "6")])
+
+(define_insn "bfld"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(zero_extract:QI (match_operand:QI 1 "bit_memory_operand" "WU")
+			 (match_operand:QI 2 "immediate_operand" "n")
+			 (match_operand:QI 3 "immediate_operand" "n")))]
+  "TARGET_H8300SX && INTVAL (operands[2]) + INTVAL (operands[3]) <= 8"
+{
+  operands[2] = GEN_INT ((1 << (INTVAL (operands[2]) + INTVAL (operands[3])))
+			 - (1 << INTVAL (operands[3])));
+  return "bfld	%2,%1,%R0";
+}
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "bitfield")])
+
+(define_insn "bfst"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU")
+			 (match_operand:QI 2 "immediate_operand" "n")
+			 (match_operand:QI 3 "immediate_operand" "n"))
+	(match_operand:QI 1 "register_operand" "r"))]
+  "TARGET_H8300SX && INTVAL (operands[2]) + INTVAL (operands[3]) <= 8"
+{
+  operands[2] = GEN_INT ((1 << (INTVAL (operands[2]) + INTVAL (operands[3])))
+			 - (1 << INTVAL (operands[3])));
+  return "bfst	%R1,%2,%0";
+}
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "bitfield")])
+
+(define_expand "cstoreqi4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:QI 2 "h8300_dst_operand" "")
+          (match_operand:QI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:HI 0 "register_operand"))]
+  "TARGET_H8300SX"
+  {
+    h8300_expand_store (operands); 
+    DONE;
+  })
+
+(define_expand "cstorehi4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:HI 2 "h8300_dst_operand" "")
+          (match_operand:HI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:HI 0 "register_operand"))]
+  "TARGET_H8300SX"
+  {
+    h8300_expand_store (operands); 
+    DONE;
+  })
+
+(define_expand "cstoresi4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:SI 2 "h8300_dst_operand" "")
+          (match_operand:SI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:HI 0 "register_operand"))]
+  "TARGET_H8300SX"
+  {
+    h8300_expand_store (operands); 
+    DONE;
+  })
+
+(define_insn "*bstzhireg"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "eqne_operator" [(cc0) (const_int 0)]))]
+  "TARGET_H8300SX"
+  "mulu.w	#0,%T0\;b%k1	.Lh8BR%=\;inc.w	#1,%T0\\n.Lh8BR%=:"
+  [(set_attr "cc" "clobber")])
+
+(define_insn_and_split "*cmpstz"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU,+WU")
+			 (const_int 1)
+			 (match_operand:QI 1 "immediate_operand" "n,n"))
+	(match_operator:QI 2 "eqne_operator"
+	 [(match_operand 3 "h8300_dst_operand" "r,rQ")
+	  (match_operand 4 "h8300_src_operand" "I,rQi")]))]
+  "TARGET_H8300SX
+   && (GET_MODE (operands[3]) == GET_MODE (operands[4])
+       || GET_CODE (operands[4]) == CONST_INT)
+   && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[3])) <= 4"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 5))
+   (set (zero_extract:QI (match_dup 0) (const_int 1) (match_dup 1))
+	(match_op_dup:QI 2 [(cc0) (const_int 0)]))]
+  {
+    operands[5] = gen_rtx_COMPARE (VOIDmode, operands[3], operands[4]);
+  }
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*bstz"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU")
+			 (const_int 1)
+			 (match_operand:QI 1 "immediate_operand" "n"))
+	(eq:QI (cc0) (const_int 0)))]
+  "TARGET_H8300SX && reload_completed"
+  "bstz	%1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "unary")])
+
+(define_insn "*bistz"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU")
+			 (const_int 1)
+			 (match_operand:QI 1 "immediate_operand" "n"))
+	(ne:QI (cc0) (const_int 0)))]
+  "TARGET_H8300SX && reload_completed"
+  "bistz	%1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "unary")])
+
+(define_insn_and_split "*cmpcondbset"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI (match_operator 1 "eqne_operator"
+			  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+			   (match_operand 3 "h8300_src_operand" "I,rQi")])
+			 (ior:QI (match_operand:QI 4 "bit_memory_operand" "0,0")
+				 (match_operand:QI 5 "single_one_operand" "n,n"))
+			 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI (match_op_dup 1 [(cc0) (const_int 0)])
+			 (ior:QI (match_dup 4) (match_dup 5))
+			 (match_dup 4)))]
+  {
+    operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);
+  }
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbset"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI (match_operator:QI 2 "eqne_operator"
+			  [(cc0) (const_int 0)])
+			 (ior:QI (match_operand:QI 3 "bit_memory_operand" "0")
+				 (match_operand:QI 1 "single_one_operand" "n"))
+			 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bset/%j2\t%V1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+(define_insn_and_split "*cmpcondbclr"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI (match_operator 1 "eqne_operator"
+			  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+			   (match_operand 3 "h8300_src_operand" "I,rQi")])
+			 (and:QI (match_operand:QI 4 "bit_memory_operand" "0,0")
+				 (match_operand:QI 5 "single_zero_operand" "n,n"))
+			 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI (match_op_dup 1 [(cc0) (const_int 0)])
+			 (and:QI (match_dup 4) (match_dup 5))
+			 (match_dup 4)))]
+  {
+    operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);
+  }
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbclr"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI (match_operator:QI 2 "eqne_operator"
+			  [(cc0) (const_int 0)])
+			 (and:QI (match_operand:QI 3 "bit_memory_operand" "0")
+				 (match_operand:QI 1 "single_zero_operand" "n"))
+			 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bclr/%j2\t%W1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+(define_insn_and_split "*cmpcondbsetreg"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI (match_operator 1 "eqne_operator"
+			  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+			   (match_operand 3 "h8300_src_operand" "I,rQi")])
+			 (ior:QI (match_operand:QI 4 "bit_memory_operand" "0,0")
+				 (ashift:QI (const_int 1)
+					    (match_operand:QI 5 "register_operand" "r,r")))
+			 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI (match_op_dup 1 [(cc0) (const_int 0)])
+			 (ior:QI (match_dup 4)
+				 (ashift:QI (const_int 1)
+					    (match_operand:QI 5 "register_operand" "r,r")))
+			 (match_dup 4)))]
+  {
+    operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);
+  }
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbsetreg"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI (match_operator:QI 2 "eqne_operator"
+			  [(cc0) (const_int 0)])
+			 (ior:QI (match_operand:QI 3 "bit_memory_operand" "0")
+				 (ashift:QI (const_int 1)
+					    (match_operand:QI 1 "register_operand" "r")))
+			 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bset/%j2\t%R1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+(define_insn_and_split "*cmpcondbclrreg"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI (match_operator 1 "eqne_operator"
+			  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+			   (match_operand 3 "h8300_src_operand" "I,rQi")])
+			 (and:QI (match_operand:QI 4 "bit_memory_operand" "0,0")
+				 (ashift:QI (const_int 1)
+					    (match_operand:QI 5 "register_operand" "r,r")))
+			 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI (match_op_dup 1 [(cc0) (const_int 0)])
+			 (and:QI (match_dup 4)
+				 (ashift:QI (const_int 1)
+					    (match_operand:QI 5 "register_operand" "r,r")))
+			 (match_dup 4)))]
+  {
+    operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);
+  }
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbclrreg"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI (match_operator:QI 2 "eqne_operator"
+			  [(cc0) (const_int 0)])
+			 (and:QI (match_operand:QI 3 "bit_memory_operand" "0")
+				 (ashift:QI (const_int 1)
+					    (match_operand:QI 1 "register_operand" "r")))
+			 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bclr/%j2\t%R1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+
+;; -----------------------------------------------------------------
+;; COMBINE PATTERNS
+;; -----------------------------------------------------------------
+
+;; insv:SI
+
+(define_insn "*insv_si_1_n"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(match_operand:SI 2 "register_operand" "r"))]
+  "(TARGET_H8300H || TARGET_H8300S) && INTVAL (operands[1]) < 16"
+  "bld\\t#0,%w2\;bst\\t%Z1,%Y0"
+  [(set_attr "length" "4")])
+
+(define_insn "*insv_si_1_n_lshiftrt"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+		     (match_operand:SI 3 "const_int_operand" "n")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[1]) < 16
+    && INTVAL (operands[3]) < 16"
+  "bld\\t%Z3,%Y2\;bst\\t%Z1,%Y0"
+  [(set_attr "length" "4")])
+
+(define_insn "*insv_si_1_n_lshiftrt_16"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+		     (const_int 16)))]
+  "(TARGET_H8300H || TARGET_H8300S) && INTVAL (operands[1]) < 16"
+  "rotr.w\\t%e2\;rotl.w\\t%e2\;bst\\t%Z1,%Y0"
+  [(set_attr "length" "6")])
+
+(define_insn "*insv_si_8_8"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:SI 1 "register_operand" "r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.b\\t%w1,%x0"
+  [(set_attr "length" "2")])
+
+(define_insn "*insv_si_8_8_lshiftrt_8"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (const_int 8)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.b\\t%x1,%x0"
+  [(set_attr "length" "2")])
+
+;; extzv:SI
+
+(define_insn "*extzv_8_8"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "?0,r")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   mov.b\\t%x1,%w0\;extu.w\\t%f0\;extu.l\\t%S0
+   sub.l\\t%S0,%S0\;mov.b\\t%x1,%w0"
+  [(set_attr "cc" "set_znv,clobber")
+   (set_attr "length" "6,4")])
+
+(define_insn "*extzv_8_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 8)
+			 (const_int 16)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e1,%f0\;extu.w\\t%f0\;extu.l\\t%S0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "6")])
+
+(define_insn "*extzv_16_8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 16)
+			 (const_int 8)))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_H8300H"
+  "mov.w\\t%e1,%f2\;mov.b\\t%x1,%w0\;mov.b\\t%w2,%x0\;extu.l\\t%S0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "set_znv")])
+
+;; Extract the exponent of a float.
+
+(define_insn_and_split "*extzv_8_23"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "0")
+			 (const_int 8)
+			 (const_int 23)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 1)))
+	      (clobber (scratch:QI))])
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:SI (match_dup 0)
+				(const_int 24)))
+	      (clobber (scratch:QI))])]
+  "")
+
+;; and:SI
+
+;; ((SImode) HImode) << 15
+
+(define_insn_and_split "*twoshifts_l16_r1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (const_int 15))
+		(const_int 2147450880)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:SI (match_dup 0)
+				(const_int 1)))
+	      (clobber (scratch:QI))])]
+  "")
+
+;; Transform (SImode << B) & 0xffff into (SImode) (HImode << B).
+
+(define_insn_and_split "*andsi3_ashift_n_lower"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0,0")
+			   (match_operand:QI 2 "const_int_operand" "S,n"))
+		(match_operand:SI 3 "const_int_operand" "n,n")))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[2]) <= 15
+    && INTVAL (operands[3]) == ((-1 << INTVAL (operands[2])) & 0xffff)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 5)
+		   (ashift:HI (match_dup 5)
+			      (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(zero_extend:SI (match_dup 5)))]
+  {
+    operands[5] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  })
+
+;; Accept (A >> 30) & 2 and the like.
+
+(define_insn "*andsi3_lshiftrt_n_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			     (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "single_one_operand" "n")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && exact_log2 (INTVAL (operands[3])) < 16
+    && INTVAL (operands[2]) + exact_log2 (INTVAL (operands[3])) == 31"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "shll.l\\t%S0\;xor.l\\t%S0,%S0\;bst\\t%Z3,%Y0";
+}
+  [(set_attr "length" "8")])
+
+(define_insn_and_split "*andsi3_lshiftrt_9_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			     (const_int 9))
+		(const_int 4194304)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(and:SI (lshiftrt:SI (match_dup 0)
+			     (const_int 25))
+		(const_int 64)))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])]
+  "")
+
+;; plus:SI
+
+(define_insn "*addsi3_upper"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (const_int 65536))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "add.w\\t%f1,%e0"
+  [(set_attr "length" "2")])
+
+(define_insn "*addsi3_lshiftrt_16_zexthi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+			      (const_int 16))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "0"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "add.w\\t%e1,%f0\;xor.w\\t%e0,%e0\;rotxl.w\\t%e0"
+  [(set_attr "length" "6")])
+
+(define_insn_and_split "*addsi3_and_r_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 1))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (cc0) (compare (zero_extract:SI (match_dup 1)
+					(const_int 1)
+					(const_int 0))
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (eq (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 3))
+		      (pc)))
+   (set (match_dup 2)
+        (plus:SI (match_dup 2)
+		 (const_int 1)))
+   (match_dup 3)]
+  {
+    operands[3] = gen_label_rtx ();
+  })
+
+(define_insn_and_split "*addsi3_and_not_r_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+			 (const_int 1))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (cc0) (compare (zero_extract:SI (match_dup 1)
+					(const_int 1)
+					(const_int 0))
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (ne (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 3))
+		      (pc)))
+   (set (match_dup 2)
+        (plus:SI (match_dup 2)
+		 (const_int 1)))
+   (match_dup 3)]
+  {
+    operands[3] = gen_label_rtx ();
+  })
+
+;; [ix]or:HI
+
+(define_insn "*ixorhi3_zext"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "iorxor_operator"
+	 [(zero_extend:HI (match_operand:QI 2 "register_operand" "r"))
+	  (match_operand:HI 3 "register_operand" "0")]))]
+  ""
+  "%c1.b\\t%X2,%s0"
+  [(set_attr "length" "2")])
+
+;; [ix]or:SI
+
+(define_insn "*ixorsi3_zext_qi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	 [(zero_extend:SI (match_operand:QI 2 "register_operand" "r"))
+	  (match_operand:SI 3 "register_operand" "0")]))]
+  ""
+  "%c1.b\\t%X2,%w0"
+  [(set_attr "length" "2")])
+
+(define_insn "*ixorsi3_zext_hi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	 [(zero_extend:SI (match_operand:HI 2 "register_operand" "r"))
+	  (match_operand:SI 3 "register_operand" "0")]))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "%c1.w\\t%T2,%f0"
+  [(set_attr "length" "2")])
+
+(define_insn "*ixorsi3_ashift_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	 [(ashift:SI (match_operand:SI 2 "register_operand" "r")
+		     (const_int 16))
+	  (match_operand:SI 3 "register_operand" "0")]))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "%c1.w\\t%f2,%e0"
+  [(set_attr "length" "2")])
+
+(define_insn "*ixorsi3_lshiftrt_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	 [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+		       (const_int 16))
+	  (match_operand:SI 3 "register_operand" "0")]))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "%c1.w\\t%e2,%f0"
+  [(set_attr "length" "2")])
+
+;; ior:HI
+
+(define_insn "*iorhi3_ashift_8"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (ashift:HI (match_operand:HI 1 "register_operand" "r")
+			   (const_int 8))
+		(match_operand:HI 2 "register_operand" "0")))]
+  ""
+  "or.b\\t%s1,%t0"
+  [(set_attr "length" "2")])
+
+(define_insn "*iorhi3_lshiftrt_8"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (lshiftrt:HI (match_operand:HI 1 "register_operand" "r")
+			     (const_int 8))
+		(match_operand:HI 2 "register_operand" "0")))]
+  ""
+  "or.b\\t%t1,%s0"
+  [(set_attr "length" "2")])
+
+(define_insn "*iorhi3_two_qi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "0"))
+		(ashift:HI (match_operand:HI 2 "register_operand" "r")
+			   (const_int 8))))]
+  ""
+  "mov.b\\t%s2,%t0"
+  [(set_attr "length" "2")])
+
+(define_insn "*iorhi3_two_qi_mem"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(ior:HI (zero_extend:HI (match_operand:QI 1 "memory_operand" "m"))
+		(ashift:HI (subreg:HI (match_operand:QI 2 "memory_operand" "m") 0)
+			   (const_int 8))))]
+  ""
+  "mov.b\\t%X2,%t0\;mov.b\\t%X1,%s0"
+  [(set_attr "length" "16")])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ior:HI (zero_extend:HI (match_operand:QI 1 "memory_operand" ""))
+		(ashift:HI (subreg:HI (match_operand:QI 2 "memory_operand" "") 0)
+			   (const_int 8))))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && reload_completed
+    && byte_accesses_mergeable_p (XEXP (operands[2], 0), XEXP (operands[1], 0))"
+  [(set (match_dup 0)
+	(match_dup 3))]
+  {
+    operands[3] = gen_rtx_MEM (HImode, XEXP (operands[2], 0));
+  })
+
+;; ior:SI
+
+(define_insn "*iorsi3_two_hi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%f2,%e0"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*iorsi3_two_qi_zext"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ior:SI (zero_extend:SI (match_operand:QI 1 "memory_operand" "m"))
+		(and:SI (ashift:SI (subreg:SI (match_operand:QI 2 "memory_operand" "m") 0)
+				   (const_int 8))
+			(const_int 65280))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(ior:HI (zero_extend:HI (match_dup 1))
+		(ashift:HI (subreg:HI (match_dup 2) 0)
+			   (const_int 8))))
+   (set (match_dup 0)
+	(zero_extend:SI (match_dup 3)))]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  })
+
+(define_insn "*iorsi3_e2f"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int -65536))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			     (const_int 16))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e2,%f0"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*iorsi3_two_qi_sext"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI (match_operand:QI 1 "register_operand" "0"))
+		(ashift:SI (sign_extend:SI (match_operand:QI 2 "register_operand" "r"))
+			   (const_int 8))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(ior:HI (zero_extend:HI (match_dup 1))
+		(ashift:HI (match_dup 4)
+			   (const_int 8))))
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 3)))]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[2]));
+  })
+
+(define_insn "*iorsi3_w"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0,0")
+			(const_int -256))
+		(zero_extend:SI (match_operand:QI 2 "general_operand_src" "r,g>"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.b\\t%X2,%w0"
+  [(set_attr "length" "2,8")])
+
+(define_insn "*iorsi3_ashift_31"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 31))
+		(match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "rotxl.l\\t%S0\;bor\\t#0,%w1\;rotxr.l\\t%S0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*iorsi3_and_ashift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+				   (match_operand:SI 2 "const_int_operand" "n"))
+			(match_operand:SI 3 "single_one_operand" "n"))
+		(match_operand:SI 4 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && (INTVAL (operands[3]) & ~0xffff) == 0"
+{
+  rtx srcpos = GEN_INT (exact_log2 (INTVAL (operands[3]))
+			- INTVAL (operands[2]));
+  rtx dstpos = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  operands[2] = srcpos;
+  operands[3] = dstpos;
+  return "bld\\t%Z2,%Y1\;bor\\t%Z3,%Y0\;bst\\t%Z3,%Y0";
+}
+  [(set_attr "length" "6")])
+
+(define_insn "*iorsi3_and_lshiftrt"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (match_operand:SI 2 "const_int_operand" "n"))
+			(match_operand:SI 3 "single_one_operand" "n"))
+		(match_operand:SI 4 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && ((INTVAL (operands[3]) << INTVAL (operands[2])) & ~0xffff) == 0"
+{
+  rtx srcpos = GEN_INT (exact_log2 (INTVAL (operands[3]))
+			+ INTVAL (operands[2]));
+  rtx dstpos = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  operands[2] = srcpos;
+  operands[3] = dstpos;
+  return "bld\\t%Z2,%Y1\;bor\\t%Z3,%Y0\;bst\\t%Z3,%Y0";
+}
+  [(set_attr "length" "6")])
+
+(define_insn "*iorsi3_zero_extract"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+				 (const_int 1)
+				 (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && INTVAL (operands[2]) < 16"
+  "bld\\t%Z2,%Y1\;bor\\t#0,%w0\;bst\\t#0,%w0"
+  [(set_attr "length" "6")])
+
+(define_insn "*iorsi3_and_lshiftrt_n_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (const_int 30))
+			(const_int 2))
+		(match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "rotl.l\\t%S1\;rotr.l\\t%S1\;bor\\t#1,%w0\;bst\\t#1,%w0"
+  [(set_attr "length" "8")])
+
+(define_insn "*iorsi3_and_lshiftrt_9_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (const_int 9))
+			(const_int 4194304))
+		(match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:HI 3 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+{
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return "shll.l\\t%S1\;xor.w\\t%T3,%T3\;bst\\t#6,%s3\;or.w\\t%T3,%e0";
+  else
+    return "rotl.l\\t%S1\;rotr.l\\t%S1\;xor.w\\t%T3,%T3\;bst\\t#6,%s3\;or.w\\t%T3,%e0";
+}
+  [(set_attr "length" "10")])
+
+;; Used to OR the exponent of a float.
+
+(define_insn "*iorsi3_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 23))
+		(match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 23))
+		(match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && epilogue_completed
+    && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+    && REGNO (operands[0]) != REGNO (operands[1])"
+  [(parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 1)
+			   (const_int 16))
+		(match_dup 0)))]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[1]));
+  })
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 23))
+		(match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && epilogue_completed
+    && !(find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+	 && REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 2)
+			   (const_int 16))
+		(match_dup 0)))]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[2]));
+  })
+
+(define_insn "*iorsi2_and_1_lshiftrt_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int 1))
+		(lshiftrt:SI (match_dup 1)
+			     (const_int 1))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "shlr.l\\t%S0\;bor\\t#0,%w0\;bst\\t#0,%w0"
+  [(set_attr "length" "6")])
+
+(define_insn_and_split "*iorsi3_ashift_16_ashift_24"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (const_int 16))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 24))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (ior:HI (ashift:HI (match_dup 4)
+			   (const_int 8))
+		(match_dup 3)))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[2]));
+  })
+
+(define_insn_and_split "*iorsi3_ashift_16_ashift_24_mem"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ior:SI (and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 16))
+			(const_int 16711680))
+		(ashift:SI (subreg:SI (match_operand:QI 2 "memory_operand" "m") 0)
+			   (const_int 24))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (ior:HI (zero_extend:HI (match_dup 1))
+		(ashift:HI (subreg:HI (match_dup 2) 0)
+			   (const_int 8))))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  })
+
+;; Used to add the exponent of a float.
+
+(define_insn "*addsi3_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (const_int 8388608))
+		 (match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8388608))
+		 (match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && epilogue_completed
+    && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+    && REGNO (operands[0]) != REGNO (operands[1])"
+  [(parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 1)
+			  (const_int 65536))
+		 (match_dup 0)))]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[1]));
+  })
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8388608))
+		 (match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && epilogue_completed
+    && !(find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+	 && REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 2)
+			  (const_int 65536))
+		 (match_dup 0)))]
+  {
+    operands[3] = gen_rtx_REG (HImode, REGNO (operands[2]));
+  })
+
+;; ashift:SI
+
+(define_insn_and_split "*ashiftsi_sextqi_7"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (sign_extend:SI (match_operand:QI 1 "register_operand" "0"))
+		   (const_int 7)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 2)
+		   (ashift:HI (match_dup 2)
+			      (const_int 8)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 2)))
+   (parallel [(set (match_dup 0)
+		   (ashiftrt:SI (match_dup 0)
+				(const_int 1)))
+	      (clobber (scratch:QI))])]
+  { 
+    operands[2] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  })
+
+;; Storing a part of HImode to QImode.
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:HI (match_operand:HI 1 "register_operand" "r")
+				(const_int 8)) 1))]
+  ""
+  "mov.b\\t%t1,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "8")])
+
+;; Storing a part of SImode to QImode.
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				(const_int 8)) 3))]
+  ""
+  "mov.b\\t%x1,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				(const_int 16)) 3))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e1,%f2\;mov.b\\t%w2,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "10")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				(const_int 24)) 3))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e1,%f2\;mov.b\\t%x2,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "10")])
+
+(define_insn_and_split ""
+  [(set (pc)
+	(if_then_else (eq (zero_extract:SI (subreg:SI (match_operand:QI 0 "register_operand" "") 0)
+					   (const_int 1)
+					   (const_int 7))
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "#"
+  ""
+  [(set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+(define_insn_and_split ""
+  [(set (pc)
+	(if_then_else (ne (zero_extract:SI (subreg:SI (match_operand:QI 0 "register_operand" "") 0)
+					   (const_int 1)
+					   (const_int 7))
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "#"
+  ""
+  [(set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+;; -----------------------------------------------------------------
+;; PEEPHOLE PATTERNS
+;; -----------------------------------------------------------------
+
+;; Convert (A >> B) & C to (A & 255) >> B if C == 255 >> B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (lshiftrt:HI (match_dup 0)
+				(match_operand:HI 1 "const_int_operand" "")))
+	      (clobber (match_operand:HI 2 "" ""))])
+   (set (match_dup 0)
+	(and:HI (match_dup 0)
+		(match_operand:HI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 >> INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:HI (match_dup 0)
+		(const_int 255)))
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:HI (match_dup 0) (match_dup 1)))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A << B) & C to (A & 255) << B if C == 255 << B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (ashift:HI (match_dup 0)
+			      (match_operand:HI 1 "const_int_operand" "")))
+	      (clobber (match_operand:HI 2 "" ""))])
+   (set (match_dup 0)
+	(and:HI (match_dup 0)
+		(match_operand:HI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 << INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:HI (match_dup 0)
+		(const_int 255)))
+   (parallel [(set (match_dup 0)
+		   (ashift:HI (match_dup 0) (match_dup 1)))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A >> B) & C to (A & 255) >> B if C == 255 >> B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (lshiftrt:SI (match_dup 0)
+				(match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 >> INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 255)))
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:SI (match_dup 0) (match_dup 1)))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A << B) & C to (A & 255) << B if C == 255 << B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashift:SI (match_dup 0)
+			      (match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 << INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 255)))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0) (match_dup 1)))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A >> B) & C to (A & 65535) >> B if C == 65535 >> B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (lshiftrt:SI (match_dup 0)
+				(match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (65535 >> INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 65535)))
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:SI (match_dup 0) (match_dup 1)))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A << B) & C to (A & 65535) << B if C == 65535 << B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashift:SI (match_dup 0)
+			      (match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (65535 << INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 65535)))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0) (match_dup 1)))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; Convert a QImode push into an SImode push so that the
+;; define_peephole2 below can cram multiple pushes into one stm.l.
+
+(define_peephole2
+  [(parallel [(set (reg:SI SP_REG)
+                   (plus:SI (reg:SI SP_REG) (const_int -4)))
+              (set (mem:QI (plus:SI (reg:SI SP_REG) (const_int -3)))
+                   (match_operand:QI 0 "register_operand" ""))])]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  { 
+    operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  })
+
+(define_peephole2
+  [(parallel [(set (reg:HI SP_REG)
+                   (plus:HI (reg:HI SP_REG) (const_int -4)))
+              (set (mem:QI (plus:HI (reg:HI SP_REG) (const_int -3)))
+                   (match_operand:QI 0 "register_operand" ""))])]
+  "TARGET_H8300S && TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_dup 0))]
+  {
+    operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  })
+
+;; Convert a HImode push into an SImode push so that the
+;; define_peephole2 below can cram multiple pushes into one stm.l.
+
+(define_peephole2
+  [(parallel [(set (reg:SI SP_REG)
+                   (plus:SI (reg:SI SP_REG) (const_int -4)))
+              (set (mem:HI (plus:SI (reg:SI SP_REG) (const_int -2)))
+                   (match_operand:HI 0 "register_operand" ""))])]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  {
+    operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  })
+
+(define_peephole2
+  [(parallel [(set (reg:HI SP_REG)
+                   (plus:HI (reg:HI SP_REG) (const_int -4)))
+              (set (mem:HI (plus:HI (reg:HI SP_REG) (const_int -2)))
+                   (match_operand:HI 0 "register_operand" ""))])]
+  "TARGET_H8300S && TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_dup 0))]
+  {
+    operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  })
+
+;; Cram four pushes into stm.l.
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[3])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && REGNO (operands[3]) == REGNO (operands[0]) + 3
+       && (TARGET_H8300SX || REGNO (operands[0]) == 0))"
+  [(parallel [(set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -16)))
+		   (match_dup 3))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (const_int -16)))])]
+  "")
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_H8300S && TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[3])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && REGNO (operands[3]) == REGNO (operands[0]) + 3
+       && (TARGET_H8300SX || REGNO (operands[0]) == 0))"
+  [(parallel [(set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -16)))
+		   (match_dup 3))
+	      (set (reg:HI SP_REG)
+		   (plus:HI (reg:HI SP_REG)
+			    (const_int -16)))])]
+  "")
+
+;; Cram three pushes into stm.l.
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[2])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 3) == 0))"
+  [(parallel [(set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (const_int -12)))])]
+  "")
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))]
+  "TARGET_H8300S && TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[2])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 3) == 0))"
+  [(parallel [(set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (reg:HI SP_REG)
+		   (plus:HI (reg:HI SP_REG)
+			    (const_int -12)))])]
+  "")
+
+;; Cram two pushes into stm.l.
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[1])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 1) == 0))"
+  [(parallel [(set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (const_int -8)))])]
+  "")
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_H8300S && TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[1])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 1) == 0))"
+  [(parallel [(set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (reg:HI SP_REG)
+		   (plus:HI (reg:HI SP_REG)
+			    (const_int -8)))])]
+  "")
+
+;; Turn
+;;
+;;   mov.w #2,r0
+;;   add.w r7,r0  (6 bytes)
+;;
+;; into
+;;
+;;   mov.w r7,r0
+;;   adds  #2,r0  (4 bytes)
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 2 "register_operand" "")))]
+  "REG_P (operands[0]) && REG_P (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && (satisfies_constraint_J (operands[1])
+       || satisfies_constraint_L (operands[1])
+       || satisfies_constraint_N (operands[1]))"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_dup 1)))]
+  "")
+
+;; Turn
+;;
+;;   sub.l  er0,er0
+;;   add.b  #4,r0l
+;;   add.l  er7,er0  (6 bytes)
+;;
+;; into
+;;
+;;   mov.l  er7,er0
+;;   adds   #4,er0   (4 bytes)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "register_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && REG_P (operands[0]) && REG_P (operands[2])
+    && REGNO (operands[0]) != REGNO (operands[2])
+    && (satisfies_constraint_L (operands[1])
+	|| satisfies_constraint_N (operands[1]))"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "")
+
+;; Turn
+;;
+;;   mov.l er7,er0
+;;   add.l #10,er0  (takes 8 bytes)
+;;
+;; into
+;;
+;;   sub.l er0,er0
+;;   add.b #10,r0l
+;;   add.l er7,er0  (takes 6 bytes)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "const_int_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && REG_P (operands[0]) && REG_P (operands[1])
+    && REGNO (operands[0]) != REGNO (operands[1])
+    && !satisfies_constraint_L (operands[2])
+    && !satisfies_constraint_N (operands[2])
+    && ((INTVAL (operands[2]) & 0xff) == INTVAL (operands[2])
+	|| (INTVAL (operands[2]) & 0xff00) == INTVAL (operands[2])
+	|| INTVAL (operands[2]) == 0xffff
+	|| INTVAL (operands[2]) == 0xfffe)"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "")
+
+;; Turn
+;;
+;;   subs   #1,er4
+;;   mov.w  r4,r4
+;;   bne    .L2028
+;;
+;; into
+;;
+;;   dec.w  #1,r4
+;;   bne    .L2028
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand 1 "incdec_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(unspec:HI [(match_dup 0)
+		    (match_dup 1)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; The SImode version of the previous pattern.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0)
+		 (match_operand 1 "incdec_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_dup 0)
+		    (match_dup 1)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+(define_peephole2
+  [(parallel [(set (cc0)
+		   (compare (zero_extract:SI (match_operand:QI 0 "register_operand" "")
+					     (const_int 1)
+					     (const_int 7))
+			    (const_int 0)))
+	      (clobber (scratch:QI))])
+   (set (pc)
+	(if_then_else (match_operator 1 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[3] = ((GET_CODE (operands[1]) == EQ)
+		   ? gen_rtx_GE (VOIDmode, cc0_rtx, const0_rtx)
+		   : gen_rtx_LT (VOIDmode, cc0_rtx, const0_rtx));
+  })
+
+;; The next three peephole2's will try to transform
+;;
+;;   mov.b A,r0l    (or mov.l A,er0)
+;;   and.l #CST,er0
+;;
+;; into
+;;
+;;   sub.l er0
+;;   mov.b A,r0l
+;;   and.b #CST,r0l (if CST is not 255)
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand" "")
+	(match_operand:QI 1 "general_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(and:SI (match_dup 2)
+		(const_int 255)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && !reg_overlap_mentioned_p (operands[2], operands[1])
+    && REGNO (operands[0]) == REGNO (operands[2])"
+  [(set (match_dup 2)
+	(const_int 0))
+   (set (strict_low_part (match_dup 0))
+	(match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "general_operand" ""))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 255)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && !reg_overlap_mentioned_p (operands[0], operands[1])
+    && !(GET_CODE (operands[1]) == MEM && !offsettable_memref_p (operands[1]))
+    && !(GET_CODE (operands[1]) == MEM && MEM_VOLATILE_P (operands[1]))"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (strict_low_part (match_dup 2))
+	(match_dup 3))]
+  {
+    operands[2] = gen_lowpart (QImode, operands[0]);
+    operands[3] = gen_lowpart (QImode, operands[1]);
+  })
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "general_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(and:SI (match_dup 2)
+		(match_operand:SI 3 "const_int_qi_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && (GET_MODE (operands[0]) == QImode
+	|| GET_MODE (operands[0]) == HImode
+	|| GET_MODE (operands[0]) == SImode)
+    && GET_MODE (operands[0]) == GET_MODE (operands[1])
+    && REGNO (operands[0]) == REGNO (operands[2])
+    && !reg_overlap_mentioned_p (operands[2], operands[1])
+    && !(GET_MODE (operands[1]) != QImode
+	 && GET_CODE (operands[1]) == MEM
+	 && !offsettable_memref_p (operands[1]))
+    && !(GET_MODE (operands[1]) != QImode
+	 && GET_CODE (operands[1]) == MEM
+	 && MEM_VOLATILE_P (operands[1]))"
+  [(set (match_dup 2)
+	(const_int 0))
+   (set (strict_low_part (match_dup 4))
+	(match_dup 5))
+   (set (match_dup 2)
+	(and:SI (match_dup 2)
+		(match_dup 6)))]
+  {
+    operands[4] = gen_lowpart (QImode, operands[0]);
+    operands[5] = gen_lowpart (QImode, operands[1]);
+    operands[6] = GEN_INT (~0xff | INTVAL (operands[3]));
+  })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 65280)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (zero_extract:SI (match_dup 0)
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_dup 1)
+		     (const_int 8)))]
+  "")
+
+;; If a load of mem:SI is followed by an AND that turns off the upper
+;; half, then we can load mem:HI instead.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "memory_operand" ""))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 2 "const_int_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && !MEM_VOLATILE_P (operands[1])
+    && offsettable_memref_p (operands[1])
+    && (INTVAL (operands[2]) & ~0xffff) == 0
+    && INTVAL (operands[2]) != 255"
+  [(set (match_dup 3)
+	(match_dup 4))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_dup 2)))]
+  {
+    operands[3] = gen_lowpart (HImode, operands[0]);
+    operands[4] = gen_lowpart (HImode, operands[1]);
+  })
+
+;; Convert a memory comparison to a move if there is a scratch register.
+
+(define_peephole2
+  [(match_scratch:QI 1 "r")
+   (set (cc0)
+	(compare (match_operand:QI 0 "memory_operand" "")
+		 (const_int 0)))]
+  ""
+  [(set (match_dup 1)
+	(match_dup 0))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:HI 1 "r")
+   (set (cc0)
+	(compare (match_operand:HI 0 "memory_operand" "")
+		 (const_int 0)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (match_dup 1)
+	(match_dup 0))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 1 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "memory_operand" "")
+		 (const_int 0)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (match_dup 1)
+	(match_dup 0))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))]
+  "")
+
+
+;; (compare (reg:HI) (const_int)) takes 4 bytes, so we try to achieve
+;; the equivalent with shorter sequences.  Here is the summary.  Cases
+;; are grouped for each define_peephole2.
+;;
+;; reg  const_int                   use     insn
+;; --------------------------------------------------------
+;; dead    -2                       eq/ne   inc.l
+;; dead    -1                       eq/ne   inc.l
+;; dead     1                       eq/ne   dec.l
+;; dead     2                       eq/ne   dec.l
+;;
+;; dead     1                       ge/lt shar.l
+;; dead     3 (H8S)                 ge/lt shar.l
+;;
+;; dead     1                       geu/ltu shar.l
+;; dead     3 (H8S)                 geu/ltu shar.l
+;;
+;; ----   255                       ge/lt mov.b
+;;
+;; ----   255                       geu/ltu mov.b
+
+;; Transform
+;;
+;;	cmp.w	#1,r0
+;;	bne	.L1
+;;
+;; into
+;;
+;;	dec.w	#1,r0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (match_operand:HI 1 "incdec_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[1]) != 0
+    && peep2_reg_dead_p (1, operands[0])"
+  [(set (match_dup 0)
+	(unspec:HI [(match_dup 0)
+		    (match_dup 4)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (- INTVAL (operands[1]));
+  })
+
+;; Transform
+;;
+;;	cmp.w	#1,r0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	shar.w	r0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (match_operand:HI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:HI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+  })
+
+;; Transform
+;;
+;;	cmp.w	#1,r0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	shar.w	r0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (match_operand:HI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:HI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 5)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+    operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				  VOIDmode, cc0_rtx, const0_rtx);
+  })
+
+;; Transform
+;;
+;;	cmp.w	#255,r0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	mov.b	r0h,r0h
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (const_int 255)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtle_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:HI (match_dup 0)
+			       (const_int -256))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; Transform
+;;
+;;	cmp.w	#255,r0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	mov.b	r0h,r0h
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (const_int 255)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtuleu_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:HI (match_dup 0)
+			       (const_int -256))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 3)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == GTU ? NE : EQ,
+				  VOIDmode, cc0_rtx, const0_rtx);
+  })
+
+;; (compare (reg:SI) (const_int)) takes 6 bytes, so we try to achieve
+;; the equivalent with shorter sequences.  Here is the summary.  Cases
+;; are grouped for each define_peephole2.
+;;
+;; reg  const_int                   use     insn
+;; --------------------------------------------------------
+;; live    -2                       eq/ne   copy and inc.l
+;; live    -1                       eq/ne   copy and inc.l
+;; live     1                       eq/ne   copy and dec.l
+;; live     2                       eq/ne   copy and dec.l
+;;
+;; dead    -2                       eq/ne   inc.l
+;; dead    -1                       eq/ne   inc.l
+;; dead     1                       eq/ne   dec.l
+;; dead     2                       eq/ne   dec.l
+;;
+;; dead -131072                     eq/ne   inc.w and test
+;; dead  -65536                     eq/ne   inc.w and test
+;; dead   65536                     eq/ne   dec.w and test
+;; dead  131072                     eq/ne   dec.w and test
+;;
+;; dead 0x000000?? except 1 and 2   eq/ne   xor.b and test
+;; dead 0x0000??00                  eq/ne   xor.b and test
+;; dead 0x0000ffff                  eq/ne   not.w and test
+;;
+;; dead 0xffffff?? except -1 and -2 eq/ne   xor.b and not.l
+;; dead 0xffff??ff                  eq/ne   xor.b and not.l
+;; dead 0x40000000 (H8S)            eq/ne   rotl.l and dec.l
+;; dead 0x80000000                  eq/ne   rotl.l and dec.l
+;;
+;; live     1                       ge/lt copy and shar.l
+;; live     3 (H8S)                 ge/lt copy and shar.l
+;;
+;; live     1                       geu/ltu copy and shar.l
+;; live     3 (H8S)                 geu/ltu copy and shar.l
+;;
+;; dead     1                       ge/lt shar.l
+;; dead     3 (H8S)                 ge/lt shar.l
+;;
+;; dead     1                       geu/ltu shar.l
+;; dead     3 (H8S)                 geu/ltu shar.l
+;;
+;; dead     3 (H8/300H)             ge/lt and.b and test
+;; dead     7                       ge/lt and.b and test
+;; dead    15                       ge/lt and.b and test
+;; dead    31                       ge/lt and.b and test
+;; dead    63                       ge/lt and.b and test
+;; dead   127                       ge/lt and.b and test
+;; dead   255                       ge/lt and.b and test
+;;
+;; dead     3 (H8/300H)             geu/ltu and.b and test
+;; dead     7                       geu/ltu and.b and test
+;; dead    15                       geu/ltu and.b and test
+;; dead    31                       geu/ltu and.b and test
+;; dead    63                       geu/ltu and.b and test
+;; dead   127                       geu/ltu and.b and test
+;; dead   255                       geu/ltu and.b and test
+;;
+;; ---- 65535                       ge/lt mov.w
+;;
+;; ---- 65535                       geu/ltu mov.w
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	dec.l	#1,er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "incdec_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[1]) != 0
+    && peep2_reg_dead_p (1, operands[0])"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 0)
+		    (match_dup 4)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (- INTVAL (operands[1]));
+  })
+
+;; Transform
+;;
+;;	cmp.l	#65536,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	dec.l	#1,e0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == -131072
+	|| INTVAL (operands[1]) == -65536
+	|| INTVAL (operands[1]) == 65536
+	|| INTVAL (operands[1]) == 131072)"
+  [(set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 4)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (- INTVAL (operands[1]));
+  })
+
+;; Transform
+;;
+;;	cmp.l	#100,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	xor.b	#100,er0
+;;	mov.l	er0,er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && ((INTVAL (operands[1]) & 0x00ff) == INTVAL (operands[1])
+	|| (INTVAL (operands[1]) & 0xff00) == INTVAL (operands[1])
+	|| INTVAL (operands[1]) == 0x0000ffff)
+    && INTVAL (operands[1]) != 0
+    && INTVAL (operands[1]) != 1
+    && INTVAL (operands[1]) != 2"
+  [(set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(match_dup 1)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; Transform
+;;
+;;	cmp.l	#-100,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	xor.b	#99,er0
+;;	not.l	er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && ((INTVAL (operands[1]) | 0x00ff) == -1
+	|| (INTVAL (operands[1]) | 0xff00) == -1)
+    && INTVAL (operands[1]) != -1
+    && INTVAL (operands[1]) != -2"
+  [(set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(match_dup 4)))
+   (set (match_dup 0)
+	(not:SI (match_dup 0)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) ^ -1);
+  })
+
+;; Transform
+;;
+;;	cmp.l	#-2147483648,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	rotl.l	er0
+;;	dec.l	#1,er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == -2147483647 - 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 1073741824))"
+  [(set (match_dup 0)
+	(rotate:SI (match_dup 0)
+		   (match_dup 4)))
+   (set (match_dup 0)
+	(unspec:SI [(match_dup 0)
+		    (const_int -1)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) == -2147483647 - 1 ? 1 : 2);
+  })
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	mov.l	er0,er1
+;;	shar.l	er1
+;;	bgt	.L1
+
+;; We avoid this transformation if we see more than one copy of the
+;; same compare insn immediately before this one.
+
+(define_peephole2
+  [(match_scratch:SI 4 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && !peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))
+    && !same_cmp_preceding_p (insn)"
+  [(set (match_dup 4)
+	(match_dup 0))
+   (parallel [(set (match_dup 4)
+		   (ashiftrt:SI (match_dup 4)
+				(match_dup 5)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[5] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+  })
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	mov.l	er0,er1
+;;	shar.l	er1
+;;	bne	.L1
+
+;; We avoid this transformation if we see more than one copy of the
+;; same compare insn immediately before this one.
+
+(define_peephole2
+  [(match_scratch:SI 4 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+		         [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && !peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))
+    && !same_cmp_preceding_p (insn)"
+  [(set (match_dup 4)
+	(match_dup 0))
+   (parallel [(set (match_dup 4)
+		   (ashiftrt:SI (match_dup 4)
+				(match_dup 5)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 6)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[5] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+    operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				  VOIDmode, cc0_rtx, const0_rtx);
+  })
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	shar.l	er0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == 1
+        || (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:SI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+  })
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	shar.l	er0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:SI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 5)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+    operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				  VOIDmode, cc0_rtx, const0_rtx);
+  })
+
+;; Transform
+;;
+;;	cmp.l	#15,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	and	#240,r0l
+;;	mov.l	er0,er0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && ((TARGET_H8300H && INTVAL (operands[1]) == 3)
+	 || INTVAL (operands[1]) == 7
+	 || INTVAL (operands[1]) == 15
+	 || INTVAL (operands[1]) == 31
+	 || INTVAL (operands[1]) == 63
+	 || INTVAL (operands[1]) == 127
+	 || INTVAL (operands[1]) == 255)"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_dup 4)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (~INTVAL (operands[1]));
+  })
+
+;; Transform
+;;
+;;	cmp.l	#15,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	and	#240,r0l
+;;	mov.l	er0,er0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && peep2_reg_dead_p (1, operands[0])
+    && ((TARGET_H8300H && INTVAL (operands[1]) == 3)
+	 || INTVAL (operands[1]) == 7
+	 || INTVAL (operands[1]) == 15
+	 || INTVAL (operands[1]) == 31
+	 || INTVAL (operands[1]) == 63
+	 || INTVAL (operands[1]) == 127
+	 || INTVAL (operands[1]) == 255)"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_dup 4)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 5)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[4] = GEN_INT (~INTVAL (operands[1]));
+    operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				  VOIDmode, cc0_rtx, const0_rtx);
+  })
+
+;; Transform
+;;
+;;	cmp.l	#65535,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	mov.l	e0,e0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (const_int 65535)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtle_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:SI (match_dup 0)
+			       (const_int -65536))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; Transform
+;;
+;;	cmp.l	#65535,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	mov.l	e0,e0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (const_int 65535)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtuleu_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:SI (match_dup 0)
+			       (const_int -65536))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 3)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {  
+    operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == GTU ? NE : EQ,
+				  VOIDmode, cc0_rtx, const0_rtx);
+  })
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	mov.l	er0,er1
+;;	dec.l	#1,er1
+;;	beq	.L1
+
+;; We avoid this transformation if we see more than one copy of the
+;; same compare insn.
+
+(define_peephole2
+  [(match_scratch:SI 4 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "incdec_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+    && INTVAL (operands[1]) != 0
+    && !peep2_reg_dead_p (1, operands[0])
+    && !same_cmp_following_p (insn)"
+  [(set (match_dup 4)
+	(match_dup 0))
+   (set (match_dup 4)
+	(unspec:SI [(match_dup 4)
+		    (match_dup 5)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[5] = GEN_INT (- INTVAL (operands[1]));
+  })
+;; Narrow the mode of testing if possible.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_dup 0)
+		(match_operand:HI 1 "const_int_qi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4)
+	(and:QI (match_dup 4)
+		(match_dup 5)))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = gen_rtx_REG (QImode, REGNO (operands[0]));
+    operands[5] = gen_int_mode (INTVAL (operands[1]), QImode);
+  })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "const_int_qi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4)
+	(and:QI (match_dup 4)
+		(match_dup 5)))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = gen_rtx_REG (QImode, REGNO (operands[0]));
+    operands[5] = gen_int_mode (INTVAL (operands[1]), QImode);
+  })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "const_int_hi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4)
+	(and:HI (match_dup 4)
+		(match_dup 5)))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  {
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[0]));
+    operands[5] = gen_int_mode (INTVAL (operands[1]), HImode);
+  })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "const_int_qi_operand" "")))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(match_operand:SI 2 "const_int_qi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 4 "eqne_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (3, operands[0])
+   && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0"
+  [(set (match_dup 5)
+	(and:QI (match_dup 5)
+		(match_dup 6)))
+   (set (match_dup 5)
+	(xor:QI (match_dup 5)
+		(match_dup 7)))
+   (set (cc0) (compare (match_dup 5)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 4 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  {
+    operands[5] = gen_rtx_REG (QImode, REGNO (operands[0]));
+    operands[6] = gen_int_mode (INTVAL (operands[1]), QImode);
+    operands[7] = gen_int_mode (INTVAL (operands[2]), QImode);
+  })
+
+;; These triggers right at the end of allocation of locals in the
+;; prologue (and possibly at other places).
+
+;; stack adjustment of -4, generate one push
+;;
+;; before : 6 bytes, 10 clocks
+;; after  : 4 bytes, 10 clocks
+
+(define_peephole2
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -4)))
+   (set (mem:SI (reg:SI SP_REG))
+	(match_operand:SI 0 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE
+    && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  "")
+
+;; stack adjustment of -12, generate one push
+;;
+;; before : 10 bytes, 14 clocks
+;; after  :  8 bytes, 14 clocks
+
+(define_peephole2
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -12)))
+   (set (mem:SI (reg:SI SP_REG))
+	(match_operand:SI 0 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE
+    && REGNO (operands[0]) != SP_REG"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -4)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -4)))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  "")
+
+;; Transform
+;;
+;;	mov	dst,reg
+;;	op	src,reg
+;;	mov	reg,dst
+;;
+;; into
+;;
+;;	op	src,dst
+;;
+;; if "reg" dies at the end of the sequence.
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))
+   (set (match_dup 0)
+	(match_operator 2 "h8sx_binary_memory_operator"
+	 [(match_dup 0)
+	  (match_operand 3 "h8300_src_operand" "")]))
+   (set (match_operand 4 "memory_operand" "")
+	(match_dup 0))]
+  "0 /* Disable because it breaks compiling fp-bit.c.  */
+   && TARGET_H8300SX
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])
+   && h8sx_mergeable_memrefs_p (operands[4], operands[1])"
+  [(set (match_dup 4)
+	(match_dup 5))]
+  {
+    operands[5] = shallow_copy_rtx (operands[2]);
+    XEXP (operands[5], 0) = operands[1];
+  })
+
+;; Transform
+;;
+;;	mov	src,reg
+;;	op	reg,dst
+;;
+;; into
+;;
+;;	op	src,dst
+;;
+;; if "reg" dies in the second insn.
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_src_operand" ""))
+   (set (match_operand 2 "h8300_dst_operand" "")
+	(match_operator 3 "h8sx_binary_memory_operator"
+	 [(match_operand 4 "h8300_dst_operand" "")
+	  (match_dup 0)]))]
+  "0 /* Disable because it breaks compiling fp-bit.c.  */
+   && TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(set (match_dup 2)
+	(match_dup 5))]
+  {
+    operands[5] = shallow_copy_rtx (operands[3]);
+    XEXP (operands[5], 1) = operands[1];
+  })
+
+;; Transform
+;;
+;;	mov	dst,reg
+;;	op	reg
+;;	mov	reg,dst
+;;
+;; into
+;;
+;;	op	dst
+;;
+;; if "reg" dies at the end of the sequence.
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))
+   (set (match_dup 0)
+	(match_operator 2 "h8sx_unary_memory_operator"
+	 [(match_dup 0)]))
+   (set (match_operand 3 "memory_operand" "")
+	(match_dup 0))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && h8sx_mergeable_memrefs_p (operands[3], operands[1])"
+  [(set (match_dup 3)
+	(match_dup 4))]
+  {
+    operands[4] = shallow_copy_rtx (operands[2]);
+    XEXP (operands[4], 0) = operands[1];
+  })
+
+;; Transform
+;;
+;;	mov	src1,reg
+;;	cmp	reg,src2
+;;
+;; into
+;;
+;;	cmp	src1,src2
+;;
+;; if "reg" dies in the comparison.
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_dst_operand" ""))
+   (set (cc0)
+	(compare (match_dup 0)
+		 (match_operand 2 "h8300_src_operand" "")))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && operands[2] != const0_rtx"
+  [(set (cc0)
+	(compare (match_dup 1)
+		 (match_dup 2)))])
+
+;; Likewise for the second operand.
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_src_operand" ""))
+   (set (cc0)
+	(compare (match_operand 2 "h8300_dst_operand" "")
+		 (match_dup 0)))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (cc0)
+	(compare (match_dup 2)
+		 (match_dup 1)))])
+
+;; Combine two moves.
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_src_operand" ""))
+   (set (match_operand 2 "h8300_dst_operand" "")
+	(match_dup 0))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 2)
+	(match_dup 1))])
+	
+
diff --git a/gcc-4.9/gcc/config/h8300/h8300.opt b/gcc-4.9/gcc/config/h8300/h8300.opt
new file mode 100644
index 000000000..d901d01f6
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/h8300.opt
@@ -0,0 +1,71 @@
+; Options for the Renesas H8/300 port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+ms
+Target Mask(H8300S_1)
+Generate H8S code
+
+msx
+Target Mask(H8300SX)
+Generate H8SX code
+
+ms2600
+Target Mask(MAC)
+Generate H8S/2600 code
+
+mint32
+Target RejectNegative Mask(INT32)
+Make integers 32 bits wide
+
+maddresses
+Target Undocumented RejectNegative Mask(ADDRESSES)
+
+mquickcall
+Target Mask(QUICKCALL)
+Use registers for argument passing
+
+mslowbyte
+Target RejectNegative Mask(SLOWBYTE)
+Consider access to byte sized memory slow
+
+mrelax
+Target RejectNegative Mask(RELAX)
+Enable linker relaxing
+
+mh
+Target Mask(H8300H)
+Generate H8/300H code
+
+mn
+Target Mask(NORMAL_MODE)
+Enable the normal mode
+
+malign-300
+Target RejectNegative Mask(ALIGN_300)
+Use H8/300 alignment rules
+
+mexr
+Target Mask(EXR)
+Push extended registers on stack in monitor functions
+
+mno-exr
+Target Mask(NEXR)
+Do not push extended registers on stack in monitor functions
+
diff --git a/gcc-4.9/gcc/config/h8300/mova.md b/gcc-4.9/gcc/config/h8300/mova.md
new file mode 100644
index 000000000..79d37a59b
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/mova.md
@@ -0,0 +1,858 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically from genmova.sh
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (mult:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (ashift:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 1))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (mult:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 4))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (ashift:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 1)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 1))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 4)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 4))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 1)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 1))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 4)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 4))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 1))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 4))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 1)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 1))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 1))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 1))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 1))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 1))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 4)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 4))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 4))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 4))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 4))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 4))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2%C2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
diff --git a/gcc-4.9/gcc/config/h8300/predicates.md b/gcc-4.9/gcc/config/h8300/predicates.md
new file mode 100644
index 000000000..9f4dbea80
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/predicates.md
@@ -0,0 +1,504 @@
+;; Predicate definitions for Renesas H8/300.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is a valid source operand for an integer move
+;; instruction.
+
+(define_predicate "general_operand_src"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  if (GET_MODE (op) == mode
+      && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == POST_INC)
+    return 1;
+  return general_operand (op, mode);
+})
+
+;; Return true if OP is a valid destination operand for an integer
+;; move instruction.
+
+(define_predicate "general_operand_dst"
+  (match_code "subreg,reg,mem")
+{
+  if (GET_MODE (op) == mode
+      && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == PRE_DEC)
+    return 1;
+  return general_operand (op, mode);
+})
+
+;; Likewise the second operand.
+
+(define_predicate "h8300_src_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  if (TARGET_H8300SX)
+    return general_operand (op, mode);
+  return nonmemory_operand (op, mode);
+})
+
+;; Return true if OP is a suitable first operand for a general
+;; arithmetic insn such as "add".
+
+(define_predicate "h8300_dst_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (TARGET_H8300SX)
+    return nonimmediate_operand (op, mode);
+  return register_operand (op, mode);
+})
+
+;; Check that an operand is either a register or an unsigned 4-bit
+;; constant.
+
+(define_predicate "nibble_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && TARGET_H8300SX
+	  && INTVAL (op) >= 0 && INTVAL (op) <= 15);
+})
+
+;; Check that an operand is either a register or an unsigned 4-bit
+;; constant.
+
+(define_predicate "reg_or_nibble_operand"
+  (match_code "const_int,subreg,reg")
+{
+  return (nibble_operand (op, mode) || register_operand (op, mode));
+})
+
+;; Return true if X is a shift operation of type H8SX_SHIFT_UNARY.
+
+(define_predicate "h8sx_unary_shift_operator"
+  (match_code "ashiftrt,lshiftrt,ashift,rotate")
+{
+  return (BINARY_P (op) && NON_COMMUTATIVE_P (op)
+	  && (h8sx_classify_shift (GET_MODE (op), GET_CODE (op), XEXP (op, 1))
+	      == H8SX_SHIFT_UNARY));
+})
+
+;; Likewise H8SX_SHIFT_BINARY.
+
+(define_predicate "h8sx_binary_shift_operator"
+  (match_code "ashiftrt,lshiftrt,ashift")
+{
+  return (BINARY_P (op) && NON_COMMUTATIVE_P (op)
+	  && (h8sx_classify_shift (GET_MODE (op), GET_CODE (op), XEXP (op, 1))
+	      == H8SX_SHIFT_BINARY));
+})
+
+;; Return true if OP is a binary operator in which it would be safe to
+;; replace register operands with memory operands.
+
+(define_predicate "h8sx_binary_memory_operator"
+  (match_code "plus,minus,and,ior,xor,ashift,ashiftrt,lshiftrt,rotate")
+{
+  if (!TARGET_H8300SX)
+    return false;
+
+  if (GET_MODE (op) != QImode
+      && GET_MODE (op) != HImode
+      && GET_MODE (op) != SImode)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+      return true;
+
+    default:
+      return h8sx_unary_shift_operator (op, mode);
+    }
+})
+
+;; Like h8sx_binary_memory_operator, but applies to unary operators.
+
+(define_predicate "h8sx_unary_memory_operator"
+  (match_code "neg,not")
+{
+  if (!TARGET_H8300SX)
+    return false;
+
+  if (GET_MODE (op) != QImode
+      && GET_MODE (op) != HImode
+      && GET_MODE (op) != SImode)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case NEG:
+    case NOT:
+      return true;
+
+    default:
+      return false;
+    }
+})
+
+;; Return true if X is an ldm.l pattern.  X is known to be parallel.
+
+(define_predicate "h8300_ldm_parallel"
+  (match_code "parallel")
+{
+  return h8300_ldm_stm_parallel (XVEC (op, 0), 1, 0);
+})
+
+;; Likewise stm.l.
+
+(define_predicate "h8300_stm_parallel"
+  (match_code "parallel")
+{
+  return h8300_ldm_stm_parallel (XVEC (op, 0), 0, 0);
+})
+
+;; Likewise rts/l and rte/l.  Note that the .md pattern will check for
+;; the return so there's no need to do that here.
+
+(define_predicate "h8300_return_parallel"
+  (match_code "parallel")
+{
+  return h8300_ldm_stm_parallel (XVEC (op, 0), 1, 1);
+})
+
+;; Return true if OP is a constant that contains only one 1 in its
+;; binary representation.
+
+(define_predicate "single_one_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    {
+      /* We really need to do this masking because 0x80 in QImode is
+	 represented as -128 for example.  */
+      if (exact_log2 (INTVAL (op) & GET_MODE_MASK (mode)) >= 0)
+	return 1;
+    }
+
+  return 0;
+})
+
+;; Return true if OP is a constant that contains only one 0 in its
+;; binary representation.
+
+(define_predicate "single_zero_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    {
+      /* We really need to do this masking because 0x80 in QImode is
+	 represented as -128 for example.  */
+      if (exact_log2 (~INTVAL (op) & GET_MODE_MASK (mode)) >= 0)
+	return 1;
+    }
+
+  return 0;
+})
+
+;; Return true if OP is a valid call operand.
+
+(define_predicate "call_insn_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+      if (register_operand (inside, Pmode))
+	return 1;
+      if (CONSTANT_ADDRESS_P (inside))
+	return 1;
+    }
+  return 0;
+})
+
+;; Return true if OP is a valid call operand, and OP represents an
+;; operand for a small call (4 bytes instead of 6 bytes).
+
+(define_predicate "small_call_insn_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+
+      /* Register indirect is a small call.  */
+      if (register_operand (inside, Pmode))
+	return 1;
+
+      /* A call through the function vector is a small call too.  */
+      if (GET_CODE (inside) == SYMBOL_REF
+	  && (SYMBOL_REF_FLAGS (inside) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+	return 1;
+    }
+  /* Otherwise it's a large call.  */
+  return 0;
+})
+
+;; Return true if OP is a valid jump operand.
+
+(define_predicate "jump_address_operand"
+  (match_code "reg,mem")
+{
+  if (GET_CODE (op) == REG)
+    return GET_MODE (op) == Pmode;
+
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+      if (register_operand (inside, Pmode))
+	return 1;
+      if (CONSTANT_ADDRESS_P (inside))
+	return 1;
+    }
+  return 0;
+})
+
+;; Return 1 if an addition/subtraction of a constant integer can be
+;; transformed into two consecutive adds/subs that are faster than the
+;; straightforward way.  Otherwise, return 0.
+
+(define_predicate "two_insn_adds_subs_operand"
+  (match_code "const_int")
+{
+  if (TARGET_H8300SX)
+    return 0;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (op);
+
+      /* Force VALUE to be positive so that we do not have to consider
+         the negative case.  */
+      if (value < 0)
+	value = -value;
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* A constant addition/subtraction takes 2 states in QImode,
+	     4 states in HImode, and 6 states in SImode.  Thus, the
+	     only case we can win is when SImode is used, in which
+	     case, two adds/subs are used, taking 4 states.  */
+	  if (mode == SImode
+	      && (value == 2 + 1
+		  || value == 4 + 1
+		  || value == 4 + 2
+		  || value == 4 + 4))
+	    return 1;
+	}
+      else
+	{
+	  /* We do not profit directly by splitting addition or
+	     subtraction of 3 and 4.  However, since these are
+	     implemented as a sequence of adds or subs, they do not
+	     clobber (cc0) unlike a sequence of add.b and add.x.  */
+	  if (mode == HImode
+	      && (value == 2 + 1
+		  || value == 2 + 2))
+	    return 1;
+	}
+    }
+
+  return 0;
+})
+
+;; Recognize valid operands for bit-field instructions.
+
+(define_predicate "bit_operand"
+  (match_code "reg,subreg,mem")
+{
+  /* We can accept any nonimmediate operand, except that MEM operands must
+     be limited to those that use addresses valid for the 'U' constraint.  */
+  if (!nonimmediate_operand (op, mode) && !satisfies_constraint_U (op))
+    return 0;
+
+  /* H8SX accepts pretty much anything here.  */
+  if (TARGET_H8300SX)
+    return 1;
+
+  /* Accept any mem during RTL generation.  Otherwise, the code that does
+     insv and extzv will think that we cannot handle memory.  However,
+     to avoid reload problems, we only accept 'U' MEM operands after RTL
+     generation.  This means that any named pattern which uses this predicate
+     must force its operands to match 'U' before emitting RTL.  */
+
+  if (GET_CODE (op) == REG)
+    return 1;
+  if (GET_CODE (op) == SUBREG)
+    return 1;
+  return (GET_CODE (op) == MEM
+	  && satisfies_constraint_U (op));
+})
+
+;; Return nonzero if OP is a MEM suitable for bit manipulation insns.
+
+(define_predicate "bit_memory_operand"
+  (match_code "mem")
+{
+  return (GET_CODE (op) == MEM
+	  && satisfies_constraint_U (op));
+})
+
+;; Return nonzero if OP is indirect register or constant memory
+;; suitable for bit manipulation insns.
+
+(define_predicate "bit_register_indirect_operand"
+  (match_code "mem")
+{
+  return (GET_CODE (op) == MEM
+          && (GET_CODE (XEXP (op, 0)) == REG
+              || GET_CODE (XEXP (op, 0)) == CONST_INT));
+})
+
+;; Return nonzero if X is a stack pointer.
+
+(define_predicate "stack_pointer_operand"
+  (match_code "reg")
+{
+  return op == stack_pointer_rtx;
+})
+
+;; False if X is anything that might eliminate to the stack pointer.
+
+(define_predicate "register_no_sp_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return !(op == stack_pointer_rtx
+	   || op == arg_pointer_rtx
+	   || op == frame_pointer_rtx
+	   || IN_RANGE (REGNO (op),
+			FIRST_PSEUDO_REGISTER, LAST_VIRTUAL_REGISTER));
+})
+
+;; Return nonzero if X is a constant whose absolute value is greater
+;; than 2.
+
+(define_predicate "const_int_gt_2_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && abs (INTVAL (op)) > 2);
+})
+
+;; Return nonzero if X is a constant whose absolute value is no
+;; smaller than 8.
+
+(define_predicate "const_int_ge_8_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && abs (INTVAL (op)) >= 8);
+})
+
+;; Return nonzero if X is a constant expressible in QImode.
+
+(define_predicate "const_int_qi_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) & 0xff) == INTVAL (op));
+})
+
+;; Return nonzero if X is a constant expressible in HImode.
+
+(define_predicate "const_int_hi_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) & 0xffff) == INTVAL (op));
+})
+
+;; Return nonzero if X is a constant suitable for inc/dec.
+
+(define_predicate "incdec_operand"
+  (and (match_code "const_int")
+       (ior (match_test "satisfies_constraint_M (op)")
+	    (match_test "satisfies_constraint_O (op)"))))
+
+;; Recognize valid operators for bit instructions.
+
+(define_predicate "bit_operator"
+  (match_code "xor,and,ior")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == XOR
+	  || code == AND
+	  || code == IOR);
+})
+
+;; Return nonzero if OP is a shift operator.
+
+(define_predicate "nshift_operator"
+  (match_code "ashiftrt,lshiftrt,ashift")
+{
+  switch (GET_CODE (op))
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ASHIFT:
+      return 1;
+
+    default:
+      return 0;
+    }
+})
+
+;; Return nonzero if X is either EQ or NE.
+
+(define_predicate "eqne_operator"
+  (match_code "eq,ne")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == EQ || code == NE);
+})
+
+;; Return nonzero if X is either GT or LE.
+
+(define_predicate "gtle_operator"
+  (match_code "gt,le,gtu,leu")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == GT || code == LE);
+})
+
+;; Return nonzero if X is either GTU or LEU.
+
+(define_predicate "gtuleu_operator"
+  (match_code "gtu,leu")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == GTU || code == LEU);
+})
+
+;; Return nonzero if X is either IOR or XOR.
+
+(define_predicate "iorxor_operator"
+  (match_code "ior,xor")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == IOR || code == XOR);
+})
diff --git a/gcc-4.9/gcc/config/h8300/rtems.h b/gcc-4.9/gcc/config/h8300/rtems.h
new file mode 100644
index 000000000..f00d4af7f
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/rtems.h
@@ -0,0 +1,29 @@
+/* Definitions for rtems targeting a H8
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS preprocessor built-ins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("h8300");		\
+      builtin_define ("__rtems__");		\
+      builtin_assert ("system=rtems");		\
+    }						\
+  while (0)
diff --git a/gcc-4.9/gcc/config/h8300/t-h8300 b/gcc-4.9/gcc/config/h8300/t-h8300
new file mode 100644
index 000000000..d8ddcb956
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/t-h8300
@@ -0,0 +1,29 @@
+# Copyright (C) 1993-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mh/ms/msx mn mint32
+MULTILIB_DIRNAMES = h8300h h8300s h8sx normal int32
+MULTILIB_EXCEPTIONS = mint32 mn mn/mint32
+
+s-config s-conditions s-flags s-codes s-constants s-emit s-recog \
+s-opinit s-extract s-peep s-attr s-attrtab s-output: \
+	$(srcdir)/config/h8300/mova.md
+
+$(srcdir)/config/h8300/mova.md: $(srcdir)/config/h8300/genmova.sh
+	$(SHELL) $(srcdir)/config/h8300/genmova.sh \
+	  > $(srcdir)/config/h8300/mova.md
diff --git a/gcc-4.9/gcc/config/h8300/t-rtems b/gcc-4.9/gcc/config/h8300/t-rtems
new file mode 100644
index 000000000..0d76437f8
--- /dev/null
+++ b/gcc-4.9/gcc/config/h8300/t-rtems
@@ -0,0 +1,7 @@
+# Custom multilibs for RTEMS
+
+# -mn is not applicable to RTEMS (-mn implies 16bit void*)
+
+MULTILIB_OPTIONS = mh/ms/msx mint32
+MULTILIB_DIRNAMES = h8300h h8300s h8sx int32
+MULTILIB_EXCEPTIONS = mint32
diff --git a/gcc-4.9/gcc/config/host-darwin.c b/gcc-4.9/gcc/config/host-darwin.c
new file mode 100644
index 000000000..4a19734af
--- /dev/null
+++ b/gcc-4.9/gcc/config/host-darwin.c
@@ -0,0 +1,77 @@
+/* Darwin host-specific hook definitions.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "config/host-darwin.h"
+
+/* Yes, this is really supposed to work.  */
+static char pch_address_space[1024*1024*1024] __attribute__((aligned (4096)));
+
+/* Return the address of the PCH address space, if the PCH will fit in it.  */
+
+void *
+darwin_gt_pch_get_address (size_t sz, int fd ATTRIBUTE_UNUSED)
+{
+  if (sz <= sizeof (pch_address_space))
+    return pch_address_space;
+  else
+    return NULL;
+}
+
+/* Check ADDR and SZ for validity, and deallocate (using munmap) that part of
+   pch_address_space beyond SZ.  */
+
+int
+darwin_gt_pch_use_address (void *addr, size_t sz, int fd, size_t off)
+{
+  const size_t pagesize = getpagesize();
+  void *mmap_result;
+  int ret;
+
+  gcc_assert ((size_t)pch_address_space % pagesize == 0
+	      && sizeof (pch_address_space) % pagesize == 0);
+  
+  ret = (addr == pch_address_space && sz <= sizeof (pch_address_space));
+  if (! ret)
+    sz = 0;
+
+  /* Round the size to a whole page size.  Normally this is a no-op.  */
+  sz = (sz + pagesize - 1) / pagesize * pagesize;
+
+  if (munmap (pch_address_space + sz, sizeof (pch_address_space) - sz) != 0)
+    fatal_error ("couldn%'t unmap pch_address_space: %m");
+
+  if (ret)
+    {
+      mmap_result = mmap (addr, sz,
+			  PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
+			  fd, off);
+
+      /* The file might not be mmap-able.  */
+      ret = mmap_result != (void *) MAP_FAILED;
+
+      /* Sanity check for broken MAP_FIXED.  */
+      gcc_assert (!ret || mmap_result == addr);
+    }
+
+  return ret;
+}
diff --git a/gcc-4.9/gcc/config/host-darwin.h b/gcc-4.9/gcc/config/host-darwin.h
new file mode 100644
index 000000000..0a4d6a4c1
--- /dev/null
+++ b/gcc-4.9/gcc/config/host-darwin.h
@@ -0,0 +1,27 @@
+/* Darwin host-specific hook definitions.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+extern void * darwin_gt_pch_get_address (size_t sz, int fd);
+extern int darwin_gt_pch_use_address (void *addr, size_t sz, int fd, 
+				      size_t off);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS darwin_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS darwin_gt_pch_use_address
diff --git a/gcc-4.9/gcc/config/host-hpux.c b/gcc-4.9/gcc/config/host-hpux.c
new file mode 100644
index 000000000..d6218b870
--- /dev/null
+++ b/gcc-4.9/gcc/config/host-hpux.c
@@ -0,0 +1,129 @@
+/* HP-UX host-specific hook definitions.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+static void *hpux_gt_pch_get_address (size_t, int);
+static int hpux_gt_pch_use_address (void *, size_t, int, size_t);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS hpux_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS hpux_gt_pch_use_address
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  */
+#if (defined(__hppa__) || defined(__ia64__)) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000000000
+#elif defined(__hppa__) || defined(__ia64__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate
+   SIZE bytes.  FD is the PCH file, though we should return with the
+   file unmapped.  */
+
+static void *
+hpux_gt_pch_get_address (size_t size, int fd)
+{
+  void *addr;
+
+  addr = mmap ((void *)TRY_EMPTY_VM_SPACE, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap (addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
+   mapping the data at BASE, -1 if we couldn't.
+
+   It's not possibly to reliably mmap a file using MAP_PRIVATE to
+   a specific START address on either hpux or linux.  First we see
+   if mmap with MAP_PRIVATE works.  If it does, we are off to the
+   races.  If it doesn't, we try an anonymous private mmap since the
+   kernel is more likely to honor the BASE address in anonymous maps.
+   We then copy the data to the anonymous private map.  This assumes
+   of course that we don't need to change the data in the PCH file
+   after it is created.
+
+   This approach obviously causes a performance penalty but there is
+   little else we can do given the current PCH implementation.  */
+
+static int
+hpux_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  /* Try to map the file with MAP_PRIVATE.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  if (addr == base)
+    return 1;
+
+  if (addr != (void *) MAP_FAILED)
+    munmap (addr, size);
+
+  /* Try to make an anonymous private mmap at the desired location.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+  if (addr != base)
+    {
+      if (addr != (void *) MAP_FAILED)
+        munmap (addr, size);
+      return -1;
+    }
+
+  if (lseek (fd, offset, SEEK_SET) == (off_t)-1)
+    return -1;
+
+  while (size)
+    {
+      ssize_t nbytes;
+
+      nbytes = read (fd, base, MIN (size, SSIZE_MAX));
+      if (nbytes <= 0)
+        return -1;
+      base = (char *) base + nbytes;
+      size -= nbytes;
+    }
+
+  return 1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/host-linux.c b/gcc-4.9/gcc/config/host-linux.c
new file mode 100644
index 000000000..17048d7b5
--- /dev/null
+++ b/gcc-4.9/gcc/config/host-linux.c
@@ -0,0 +1,228 @@
+/* Linux host-specific hook definitions.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+
+/* Linux has a feature called exec-shield-randomize that perturbs the
+   address of non-fixed mapped segments by a (relatively) small amount.
+   The feature is intended to make it harder to attack the system with
+   buffer overflow attacks, since every invocation of a program will
+   have its libraries and data segments at slightly different addresses.
+
+   This feature causes us problems with PCH because it makes it that
+   much harder to acquire a stable location at which to map our PCH
+   data file.
+
+   [ The feature causes other points of non-determinism within the
+     compiler as well, so we'd *really* like to be able to have the
+     driver disable exec-shield-randomize for the process group, but
+     that isn't possible at present.  ]
+
+   We're going to try several things:
+
+      * Select an architecture specific address as "likely" and see
+	if that's free.  For our 64-bit hosts, we can easily choose
+	an address in Never Never Land.
+
+      * If exec-shield-randomize is disabled, then just use the
+	address chosen by mmap in step one.
+
+      * If exec-shield-randomize is enabled, then temporarily allocate
+	32M of memory as a buffer, then allocate PCH memory, then
+	free the buffer.  The theory here is that the perturbation is
+	no more than 16M, and so by allocating our buffer larger than
+	that we make it considerably more likely that the address will
+	be free when we want to load the data back.
+*/
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS linux_gt_pch_get_address
+
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS linux_gt_pch_use_address
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  */
+#if defined(__alpha)
+# define TRY_EMPTY_VM_SPACE	0x10000000000
+#elif defined(__ia64)
+# define TRY_EMPTY_VM_SPACE	0x2000000100000000
+#elif defined(__x86_64) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x1000000000
+#elif defined(__x86_64)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__i386)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__powerpc__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__s390x__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000
+#elif defined(__s390__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__sparc__) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000
+#elif defined(__sparc__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__mc68000__)
+# define TRY_EMPTY_VM_SPACE	0x40000000
+#elif defined(__aarch64__)
+# define TRY_EMPTY_VM_SPACE	0x1000000000
+#elif defined(__ARM_EABI__)
+# define TRY_EMPTY_VM_SPACE     0x60000000
+#elif defined(__mips__) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000
+#elif defined(__mips__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate SIZE
+   bytes.  FD is the PCH file, though we should return with the file 
+   unmapped.  */
+
+static void *
+linux_gt_pch_get_address (size_t size, int fd)
+{
+  size_t buffer_size = 32 * 1024 * 1024;
+  void *addr, *buffer;
+  FILE *f;
+  bool randomize_on;
+
+  addr = mmap ((void *)TRY_EMPTY_VM_SPACE, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap (addr, size);
+
+  /* If we got the exact area we requested, then that's great.  */
+  if (TRY_EMPTY_VM_SPACE && addr == (void *) TRY_EMPTY_VM_SPACE)
+    return addr;
+
+  /* If we didn't, then we need to look to see if virtual address
+     randomization is on.  That is recorded in
+     kernel.randomize_va_space.  An older implementation used
+     kernel.exec-shield-randomize.  */
+  f = fopen ("/proc/sys/kernel/randomize_va_space", "r");
+  if (f == NULL)
+    f = fopen ("/proc/sys/kernel/exec-shield-randomize", "r");
+  randomize_on = false;
+  if (f != NULL)
+    {
+      char buf[100];
+      size_t c;
+
+      c = fread (buf, 1, sizeof buf - 1, f);
+      if (c > 0)
+	{
+	  buf[c] = '\0';
+	  randomize_on = (atoi (buf) > 0);
+	}
+      fclose (f);
+    }
+
+  /* If it isn't, then accept the address that mmap selected as fine.  */
+  if (!randomize_on)
+    return addr;
+
+  /* Otherwise, we need to try again with buffer space.  */
+  buffer = mmap (0, buffer_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+  addr = mmap (0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+  if (buffer != (void *) MAP_FAILED)
+    munmap (buffer, buffer_size);
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  munmap (addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
+   mapping the data at BASE, -1 if we couldn't.
+
+   It's not possibly to reliably mmap a file using MAP_PRIVATE to
+   a specific START address on either hpux or linux.  First we see
+   if mmap with MAP_PRIVATE works.  If it does, we are off to the
+   races.  If it doesn't, we try an anonymous private mmap since the
+   kernel is more likely to honor the BASE address in anonymous maps.
+   We then copy the data to the anonymous private map.  This assumes
+   of course that we don't need to change the data in the PCH file
+   after it is created.
+
+   This approach obviously causes a performance penalty but there is
+   little else we can do given the current PCH implementation.  */
+
+static int
+linux_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  /* Try to map the file with MAP_PRIVATE.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  if (addr == base)
+    return 1;
+
+  if (addr != (void *) MAP_FAILED)
+    munmap (addr, size);
+
+  /* Try to make an anonymous private mmap at the desired location.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+  if (addr != base)
+    {
+      if (addr != (void *) MAP_FAILED)
+        munmap (addr, size);
+      return -1;
+    }
+
+  if (lseek (fd, offset, SEEK_SET) == (off_t)-1)
+    return -1;
+
+  while (size)
+    {
+      ssize_t nbytes;
+
+      nbytes = read (fd, base, MIN (size, (size_t)-1 >> 1));
+      if (nbytes <= 0)
+        return -1;
+      base = (char *) base + nbytes;
+      size -= nbytes;
+    }
+
+  return 1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/host-openbsd.c b/gcc-4.9/gcc/config/host-openbsd.c
new file mode 100644
index 000000000..01c726308
--- /dev/null
+++ b/gcc-4.9/gcc/config/host-openbsd.c
@@ -0,0 +1,85 @@
+/* OpenBSD host-specific hook definitions.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS openbsd_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS openbsd_gt_pch_use_address
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  */
+#if defined(__amd64__)
+# define TRY_EMPTY_VM_SPACE	0x400000000000
+#elif defined(__hppa__)
+# define TRY_EMPTY_VM_SPACE	0xb0000000
+#elif defined(__i386__)
+# define TRY_EMPTY_VM_SPACE	0xb0000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate
+   SIZE bytes.  FD is the PCH file, though we should return with the
+   file unmapped.  */
+
+static void *
+openbsd_gt_pch_get_address (size_t size, int fd)
+{
+  void *addr;
+
+  addr = mmap ((void *) TRY_EMPTY_VM_SPACE, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap (addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+   mapping the data at BASE, -1 if we couldn't.  */
+
+static int
+openbsd_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  addr = mmap (base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  return addr == base ? 1 : -1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/host-solaris.c b/gcc-4.9/gcc/config/host-solaris.c
new file mode 100644
index 000000000..b6afef4f7
--- /dev/null
+++ b/gcc-4.9/gcc/config/host-solaris.c
@@ -0,0 +1,125 @@
+/* Solaris host-specific hook definitions.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS sol_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS sol_gt_pch_use_address
+
+/* Before Solaris 11, the mmap ADDR parameter is mostly ignored without
+   MAP_FIXED set.  Before we give up, search the desired address space with
+   mincore to see if the space is really free.  */
+
+static void *
+mmap_fixed (void *addr, size_t len, int prot, int flags, int fd, off_t off)
+{
+  void *base;
+
+  base = mmap ((caddr_t) addr, len, prot, flags, fd, off);
+  
+  if (base != addr)
+    {
+      size_t page_size = getpagesize();
+      char one_byte;
+      size_t i;
+
+      if (base != (void *) MAP_FAILED)
+	munmap ((caddr_t) base, len);
+
+      errno = 0;
+      for (i = 0; i < len; i += page_size)
+	if (mincore ((char *)addr + i, page_size, (char *) &one_byte) == -1
+	    && errno == ENOMEM)
+	  continue; /* The page is not mapped.  */
+	else
+	  break;
+
+      if (i >= len)
+	base = mmap ((caddr_t) addr, len, prot, flags | MAP_FIXED, fd, off);
+    }
+
+  return base;
+}
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  Based on McDougall, Mauro, Solaris Internals, 2nd
+   ed., p.460-461, fig. 9-3, 9-4, 9-5.  */
+#if defined(__sparcv9__)
+/* This low to avoid VA hole on UltraSPARC I/II.  */
+# define TRY_EMPTY_VM_SPACE	0x70000000000
+#elif defined(__sparc__)
+# define TRY_EMPTY_VM_SPACE	0x80000000
+#elif defined(__x86_64__)
+# define TRY_EMPTY_VM_SPACE	0x80000000000
+#elif defined(__i386__)
+# define TRY_EMPTY_VM_SPACE	0xB0000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate
+   SIZE bytes.  FD is the PCH file, though we should return with the
+   file unmapped.  */
+
+static void *
+sol_gt_pch_get_address (size_t size, int fd)
+{
+  void *addr;
+
+  addr = mmap_fixed ((caddr_t) TRY_EMPTY_VM_SPACE, size,
+		     PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap ((caddr_t) addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+   mapping the data at BASE, -1 if we couldn't.  */
+
+static int
+sol_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  addr = mmap_fixed ((caddr_t) base, size,
+		     PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  return addr == base ? 1 : -1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/hpux-stdint.h b/gcc-4.9/gcc/config/hpux-stdint.h
new file mode 100644
index 000000000..b06813b47
--- /dev/null
+++ b/gcc-4.9/gcc/config/hpux-stdint.h
@@ -0,0 +1,34 @@
+
+/* These should be correct for ia64-hp-hpux11.23.  */
+
+#define SIG_ATOMIC_TYPE "unsigned int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "int"
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned int"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
diff --git a/gcc-4.9/gcc/config/hpux11.opt b/gcc-4.9/gcc/config/hpux11.opt
new file mode 100644
index 000000000..63d6a8fd6
--- /dev/null
+++ b/gcc-4.9/gcc/config/hpux11.opt
@@ -0,0 +1,32 @@
+; HP-UX 11 options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+mt
+Target RejectNegative
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/i386/adxintrin.h b/gcc-4.9/gcc/config/i386/adxintrin.h
new file mode 100644
index 000000000..611890044
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/adxintrin.h
@@ -0,0 +1,49 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <adxintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _ADXINTRIN_H_INCLUDED
+#define _ADXINTRIN_H_INCLUDED
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarryx_u32 (unsigned char __CF, unsigned int __X,
+		unsigned int __Y, unsigned int *__P)
+{
+    return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarryx_u64 (unsigned char __CF, unsigned long __X,
+		unsigned long __Y, unsigned long long *__P)
+{
+    return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
+}
+#endif
+
+#endif /* _ADXINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/ammintrin.h b/gcc-4.9/gcc/config/i386/ammintrin.h
new file mode 100644
index 000000000..a89b2046d
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/ammintrin.h
@@ -0,0 +1,93 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the AMD Programmers
+   Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+#ifndef __SSE4A__
+#pragma GCC push_options
+#pragma GCC target("sse4a")
+#define __DISABLE_SSE4A__
+#endif /* __SSE4A__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+  __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+  __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
+{
+  return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
+}
+#else
+#define _mm_extracti_si64(X, I, L)					\
+  ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X),		\
+				    (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+  return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
+{
+  return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
+}
+#else
+#define _mm_inserti_si64(X, Y, I, L)					\
+  ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X),		\
+				      (__v2di)(__m128i)(Y),		\
+				      (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+#ifdef __DISABLE_SSE4A__
+#undef __DISABLE_SSE4A__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/athlon.md b/gcc-4.9/gcc/config/i386/athlon.md
new file mode 100644
index 000000000..b207d882f
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/athlon.md
@@ -0,0 +1,1186 @@
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; AMD Athlon Scheduling
+;;
+;; The Athlon does contain three pipelined FP units, three integer units and
+;; three address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line. So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available. They can decode three DirectPath instructions or one VectorPath
+;; instruction per cycle.
+;; Decoded macro instructions are then passed to 72 entry instruction control
+;; unit, that passes
+;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+
+(define_attr "athlon_decode" "direct,vector,double"
+  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
+	   (const_string "vector")
+         (and (eq_attr "type" "push")
+              (match_operand 1 "memory_operand"))
+	   (const_string "vector")
+         (and (eq_attr "type" "fmov")
+	      (and (eq_attr "memory" "load,store")
+		   (eq_attr "mode" "XF")))
+	   (const_string "vector")]
+	(const_string "direct")))
+
+(define_attr "amdfam10_decode" "direct,vector,double"
+  (const_string "direct"))
+;;
+;;           decode0 decode1 decode2
+;;                 \    |   /
+;;    instruction control unit (72 entry scheduler)
+;;                |                        |
+;;      integer scheduler (18)         stack map
+;;     /  |    |    |    |   \        stack rename
+;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
+;;    |  agu0  |   agu1      agu2    register file
+;;    |      \ |    |       /         |     |     |
+;;     \      /\    |     /         fadd  fmul  fstore
+;;       \  /    \  |   /           fadd  fmul  fstore
+;;       imul  load/store (2x)      fadd  fmul  fstore
+
+(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_cpu_unit "athlon-decodev" "athlon")
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow troughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too. Vector decoded instructions then can't be issued when
+;; modeled as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "athlon-decode2" "athlon-decode0")
+(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
+(define_reservation "athlon-vector" "nothing,athlon-decodev")
+(define_reservation "athlon-direct0" "nothing,athlon-decode0")
+(define_reservation "athlon-direct" "nothing,
+				     (athlon-decode0 | athlon-decode1
+				     | athlon-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
+				     | (nothing,(athlon-decode0 + athlon-decode1))
+				     | (nothing,(athlon-decode1 + athlon-decode2)))")
+
+;; Agu and ieu unit results in extremely large automatons and
+;; in our approximation they are hardly filled in.  Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations.  Skip the models.
+
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+(define_cpu_unit "athlon-ieu0" "athlon")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing")
+
+(define_cpu_unit "athlon-mult" "athlon_mult")
+
+(define_cpu_unit "athlon-load0" "athlon_load")
+(define_cpu_unit "athlon-load1" "athlon_load")
+(define_reservation "athlon-load" "athlon-agu,
+				   (athlon-load0 | athlon-load1),nothing")
+;; 128bit SSE instructions issue two loads at once
+(define_reservation "athlon-load2" "athlon-agu,
+				   (athlon-load0 + athlon-load1),nothing")
+
+(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
+;; 128bit SSE instructions issue two stores at once
+(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
+
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for Athlon and 11 for K8
+;; Compensate the difference for Athlon because it results in significantly
+;; smaller automata.
+(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
+(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
+
+
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
+
+
+;; Jump instructions are executed in the branch unit completely transparent to us
+(define_insn_reservation "athlon_branch" 0
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "ibr"))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_call" 0
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "type" "call,callv"))
+			 "athlon-vector,athlon-ieu")
+(define_insn_reservation "athlon_call_amdfam10" 0
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "call,callv"))
+			 "athlon-double,athlon-ieu")
+
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "push"))
+			 "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "type" "pop"))
+			 "athlon-vector,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_pop_k8" 3
+			 (and (eq_attr "cpu" "k8,generic")
+			      (eq_attr "type" "pop"))
+			 "athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_pop_amdfam10" 3
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "pop"))
+			 "athlon-direct,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "leave"))
+			 "athlon-vector,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave_k8" 3
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (eq_attr "type" "leave"))
+			 "athlon-double,(athlon-ieu+athlon-load)")
+
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "type" "lea"))
+			 "athlon-direct,athlon-agu,nothing")
+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
+(define_insn_reservation "athlon_lea_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "lea"))
+			 "athlon-direct,athlon-agu,nothing")
+
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
+;; ??? Widening multiply is vector or double.
+(define_insn_reservation "athlon_imul_k8_DI" 4
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_k8" 3
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_mem" 8
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8_DI" 7
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8" 6
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
+
+;; Idiv cannot execute in parallel with other instructions.  Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
+;; of the other code
+;; Using the same heuristics for amdfam10 as K8 with idiv
+
+(define_insn_reservation "athlon_idiv" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
+(define_insn_reservation "athlon_idiv_mem" 9
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
+;; The parallelism of string instructions is not documented.  Model it same way
+;; as idiv to create smaller automata.  This probably does not matter much.
+;; Using the same heuristics for amdfam10 as K8 with idiv
+(define_insn_reservation "athlon_str" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "athlon-vector,athlon-load,athlon-ieu0*6")
+
+(define_insn_reservation "athlon_idirect" 1
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_idirect_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu,athlon-ieu")
+
+(define_insn_reservation "athlon_idirect_loadmov" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-load")
+
+(define_insn_reservation "athlon_idirect_load" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+
+(define_insn_reservation "athlon_idirect_movstore" 1
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "athlon-direct,athlon-agu,athlon-store")
+
+(define_insn_reservation "athlon_idirect_both" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-direct,athlon-load,
+			  athlon-ieu,athlon-store,
+			  athlon-store")
+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-direct,athlon-load,
+			  athlon-ieu,athlon-store,
+			  athlon-store")
+
+(define_insn_reservation "athlon_ivector_both" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-vector,athlon-load,
+			  athlon-ieu,
+			  athlon-ieu,
+			  athlon-store")
+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-vector,athlon-load,
+			  athlon-ieu,
+			  athlon-ieu,
+			  athlon-store")
+
+(define_insn_reservation "athlon_idirect_store" 1
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-direct,(athlon-ieu+athlon-agu),
+			  athlon-store")
+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-direct,(athlon-ieu+athlon-agu),
+			  athlon-store")
+
+(define_insn_reservation "athlon_ivector_store" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+			  athlon-store")
+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+			  athlon-store")
+
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
+(define_insn_reservation "athlon_fldxf_k8" 13
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
+;; Assume superforwarding to take place so effective latency of fany op is 0.
+(define_insn_reservation "athlon_fld" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_fld_k8" 2
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+
+(define_insn_reservation "athlon_fstxf" 10
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
+(define_insn_reservation "athlon_fstxf_k8" 8
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
+(define_insn_reservation "athlon_fst" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fst_k8" 2
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fist" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fmov" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fmov"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fadd_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fop"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fmul_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fmul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fsgn"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load_k8" 13
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "fdiv"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_k8" 11
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (eq_attr "type" "fdiv"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fpspc" 100
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fpspc"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load" 7
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fcmov" 7
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "fcmov"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load_k8" 17
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_k8" 15
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (eq_attr "type" "fcmov"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+;; fcomi is vector decoded by uses only one pipe.
+(define_insn_reservation "athlon_fcomi_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "athlon_decode" "vector")
+				        (eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcomi_load_k8" 5
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "athlon_decode" "vector")
+				        (eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (eq_attr "type" "fcmp")))
+			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load_k8" 4
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (eq_attr "type" "fcmp"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+;; Never seen by the scheduler because we still don't do post reg-stack
+;; scheduling.
+;(define_insn_reservation "athlon_fxch" 2
+;			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+;			      (eq_attr "type" "fxch"))
+;			 "athlon-direct,athlon-fpsched,athlon-fany")
+
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+
+(define_insn_reservation "athlon_movlpd_load" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_movlpd_load_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_movsd_load_generic" 2
+			 (and (eq_attr "cpu" "generic")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand")))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
+(define_insn_reservation "athlon_movaps_load_k8" 2
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_movaps_load" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
+(define_insn_reservation "athlon_movss_load" 1
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
+(define_insn_reservation "athlon_movss_load_k8" 1
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
+(define_insn_reservation "athlon_mmxsseld" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_mmxsseld_k8" 2
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
+;; loads  generated are direct path, latency of 2 and do not use any FP
+;; executions units. No separate entries for movlpx/movhpx loads, which
+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
+;; as they will not be generated.
+(define_insn_reservation "athlon_sseld_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8")
+;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
+;; and can use any  FP executions units
+(define_insn_reservation "athlon_mmxld_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "mmxmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8, athlon-fany")
+(define_insn_reservation "athlon_mmxssest" 3
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_k8" 3
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_short" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+;; On AMDFAM10 all double, single and integer packed SSEx data stores
+;; generated are all double path, latency of 2 and use the FSTORE FP
+;; execution unit. No entries separate for movupx/movdqu, which are
+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
+;; as they will not be generated.
+(define_insn_reservation "athlon_ssest_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
+;; data stores generated are all direct path, latency of 2 and use
+;; the FSTORE FP execution unit
+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_movaps_k8" 2
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "mode" "V4SF,V2DF,TI")))
+			 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
+(define_insn_reservation "athlon_movaps" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "mode" "V4SF,V2DF,TI")))
+			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
+(define_insn_reservation "athlon_mmxssemov" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "type" "mmxmov,ssemov"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "mmxmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "type" "mmxmul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "unit" "mmx")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "unit" "mmx"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well.  The latency
+;; is same as for i387 operations for scalar operations
+
+(define_insn_reservation "athlon_sselog_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_k8" 5
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
+(define_insn_reservation "athlon_sselog" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
+			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
+(define_insn_reservation "athlon_sselog_k8" 3
+			 (and (eq_attr "cpu" "k8,generic")
+			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
+			 "athlon-double,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_sselog_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
+			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
+
+;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
+(define_insn_reservation "athlon_ssecmp_load" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecmp")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp_load_k8" 4
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (and (eq_attr "mode" "SF,DF,DI,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "mode" "SF,DF,DI,TI")))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_k8" 3
+			 (and (eq_attr "cpu" "k8,generic")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (eq_attr "type" "ssecomi"))
+			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
+			 (and (eq_attr "cpu" "amdfam10")
+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
+			      (eq_attr "type" "ssecomi"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseadd,sseadd1")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "sseadd,sseadd1")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "sseadd,sseadd1")
+				   (eq_attr "mode" "SF,DF,DI")))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseadd,sseadd1")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_k8" 7
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "sseadd,sseadd1")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseadd,sseadd1")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "sseadd,sseadd1"))
+			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_k8" 5
+			 (and (eq_attr "cpu" "k8,generic")
+			      (eq_attr "type" "sseadd,sseadd1"))
+			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "sseadd,sseadd1"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately.  Assume that the mode is always set to the
+;; destination one and athlon_decode is set to the K8 versions.
+
+;; cvtss2sd
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
+			 (and (eq_attr "cpu" "k8,athlon,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(and (eq_attr "mode" "DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(eq_attr "mode" "DF"))))
+			 "athlon-direct,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(eq_attr "mode" "DF"))))
+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
+;; cvtps2pd.  Model same way the other double decoded FP conversions.
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
+			 (and (eq_attr "cpu" "k8,athlon,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "V2DF,V4SF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "direct")
+					(and (eq_attr "mode" "V2DF,V4SF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
+			 (and (eq_attr "cpu" "k8,athlon,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(eq_attr "mode" "V2DF,V4SF,TI"))))
+			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "direct")
+					(eq_attr "mode" "V2DF,V4SF,TI"))))
+			 "athlon-direct,athlon-fpsched,athlon-fstore")
+;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
+;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2ss mem, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
+			 (and (eq_attr "cpu" "k8,athlon,generic")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2ss reg, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
+			 (and (eq_attr "cpu" "k8,athlon,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
+;; ??? Why it is fater than cvtsd2ss?
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
+			 (and (eq_attr "cpu" "athlon,k8,generic")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
+;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
+
+
+(define_insn_reservation "athlon_ssemul_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemul")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_ssemul_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "mode" "SF,DF")))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_k8" 7
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_k8" 5
+			 (and (eq_attr "cpu" "k8,generic")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+;; divsd timings.  divss is faster
+(define_insn_reservation "athlon_ssediv_load" 20
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv_load_k8" 22
+			 (and (eq_attr "cpu" "k8,generic,amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv" 20
+			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "mode" "SF,DF")))
+			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector_load" 39
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_k8" 35
+			 (and (eq_attr "cpu" "k8,generic")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector" 39
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-vector,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_k8" 39
+			 (and (eq_attr "cpu" "k8,generic")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-double,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-direct,athlon-fmul*17")
+(define_insn_reservation "athlon_sseins_amdfam10" 5
+                         (and (eq_attr "cpu" "amdfam10")
+                              (and (eq_attr "type" "sseins")
+                                   (eq_attr "mode" "TI")))
+                         "athlon-vector,athlon-fpsched,athlon-faddmul")
diff --git a/gcc-4.9/gcc/config/i386/atom.md b/gcc-4.9/gcc/config/i386/atom.md
new file mode 100644
index 000000000..7102df123
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/atom.md
@@ -0,0 +1,794 @@
+;; Atom Scheduling
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Atom is an in-order core with two integer pipelines.
+
+
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" 
+  (const_string "other"))
+
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
+  (const_string "other"))
+
+(define_automaton "atom")
+
+;;  Atom has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
+
+;;  EU: Execution Unit
+;;  Atom EUs are connected by port 0 or port 1. 
+
+(define_cpu_unit "atom-eu-0, atom-eu-1,
+                  atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
+                  "atom")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
+
+;;; Some instructions is dual-pipe execution, need both ports
+;;; Complex multi-op macro-instructoins need both ports and all EUs
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + 
+                                    atom-imul-1 + atom-imul-2 + atom-imul-3 +
+                                    atom-imul-4)")
+
+;;; Most of simple instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
+
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
+
+;;; fadd can has 5 cycles latency depends on instruction forms
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
+
+;;; imul insn has 5 cycles latency
+(define_reservation "atom-imul-32" 
+                    "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, 
+                     atom-port-0")
+;;; imul instruction excludes other non-FP instructions.
+(exclusion_set "atom-eu-0, atom-eu-1" 
+               "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on 
+;;; instruction forms
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
+(define_reservation "atom-dual-2c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
+(define_reservation "atom-dual-5c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
+
+(define_insn_reservation  "atom_other" 9
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "!jeu")))
+  "atom-complex, atom-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation  "atom_other_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "jeu")))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_multi" 9
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "multi"))
+  "atom-complex, atom-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32
+(define_insn_reservation  "atom_imovx" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation  "atom_imovx_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation  "atom_imovx_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation  "atom_imovx_2_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 16<-8
+(define_insn_reservation  "atom_imovx_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (match_operand:HI 0 "register_operand")
+                 (match_operand:QI 1 "general_operand"))))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_lea" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "!HI")))
+  "atom-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation  "atom_lea_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "HI")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_incdec" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_incdec_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation  "atom_ishift" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation  "atom_ishift_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
+(define_insn_reservation  "atom_ishift_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (eq_attr "prefix_0f" "1")))
+  "atom-complex, atom-all-eu*6")
+
+(define_insn_reservation  "atom_ishift1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_ishift1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_imul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+(define_insn_reservation  "atom_imul_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+;; latency set to 10 as common 64x64 imul
+(define_insn_reservation  "atom_imul_3" 10
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (eq_attr "mode" "!SI")))
+  "atom-complex, atom-all-eu*9")
+
+(define_insn_reservation  "atom_idiv" 65
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "idiv"))
+  "atom-complex, atom-all-eu*32, nothing*32")
+
+(define_insn_reservation  "atom_icmp" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmp_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ibr" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "!load")))
+  "atom-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation  "atom_ibr_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "load")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_setcc" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "!store")))
+  "atom-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation  "atom_setcc_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "store")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_icmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_push" 2
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "push"))
+  "atom-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "DI")))
+  "atom-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "!DI")))
+  "atom-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_call" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "call"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_callv" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "callv"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_leave" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "leave"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_str" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "str"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_sselog" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog,sseshuf")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog,sseshuf")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1,sseshuf1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_sselog1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1,sseshuf1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation  "atom_sseiadd" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "!simul")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation  "atom_sseiadd_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "DI")))))
+  "atom-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation  "atom_sseiadd_3" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "TI")))))
+  "atom-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation  "atom_sseiadd_4" 6
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (ior (match_operand:V2DI 0 "register_operand")
+                 (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*5")
+
+;; if immediate op. 
+(define_insn_reservation  "atom_sseishft" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "!sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "atom-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation  "atom_sseishft_2" 1
+  (and (eq_attr "cpu" "atom")
+       (ior (eq_attr "type" "sseishft1")
+	    (and (eq_attr "type" "sseishft")
+		 (and (eq_attr "atom_unit" "sishuf")
+		      (match_operand 2 "immediate_operand")))))
+  "atom-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation  "atom_sseishft_3" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (not (match_operand 2 "immediate_operand"))))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_sseimul" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "sseimul"))
+  "atom-simple-0")
+
+;; rcpss or rsqrtss
+(define_insn_reservation  "atom_sse" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+  "atom-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation  "atom_sse_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "movdup")))
+  "atom-simple-0")
+
+;; lfence
+(define_insn_reservation  "atom_sse_3" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "lfence")))
+  "atom-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation  "atom_sse_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "fence,prefetch")))
+  "atom-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation  "atom_sse_5" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (eq_attr "atom_sse_attr" "sqrt,mxcsr")
+                 (and (eq_attr "atom_sse_attr" "rcp")
+                      (eq_attr "mode" "V4SF")))))
+  "atom-complex, atom-all-eu*6")
+
+;; xmm->xmm
+(define_insn_reservation  "atom_ssemov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
+  "atom-simple-either")
+
+;; reg->xmm
+(define_insn_reservation  "atom_ssemov_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
+  "atom-simple-0")
+
+;; xmm->reg
+(define_insn_reservation  "atom_ssemov_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
+  "atom-eu-0-3-1")
+
+;; mov mem
+(define_insn_reservation  "atom_ssemov_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+  "atom-simple-0")
+
+;; movu mem
+(define_insn_reservation  "atom_ssemov_5" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+  "atom-complex, atom-all-eu")
+
+;; no memory simple
+(define_insn_reservation  "atom_sseadd" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd,sseadd1")
+            (and (eq_attr "memory" "none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-fadd-5c")
+
+;; memory simple
+(define_insn_reservation  "atom_sseadd_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd,sseadd1")
+            (and (eq_attr "memory" "!none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-dual-5c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation  "atom_sseadd_3" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd,sseadd1")
+            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*7")
+
+;; Except dppd/dpps
+(define_insn_reservation  "atom_ssemul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "!SF")))
+  "atom-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation  "atom_ssemul_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "SF")))
+  "atom-fmul-4c")
+
+(define_insn_reservation  "atom_ssecmp" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecmp"))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ssecomi" 10
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecomi"))
+  "atom-complex, atom-all-eu*9")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "register_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand")))))
+  "atom-fadd-5c")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt_2" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "memory_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand")))))
+  "atom-dual-5c")
+
+;; otherwise. 7 cycles average for cvtss2sd
+(define_insn_reservation  "atom_ssecvt_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (not (ior (and (match_operand:V2SI 0 "register_operand")
+                           (match_operand:V4SF 1 "nonimmediate_operand"))
+                      (and (match_operand:V4SF 0 "register_operand")
+                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
+  "atom-complex, atom-all-eu*6")
+
+;; memory and cvtsi2sd
+(define_insn_reservation  "atom_sseicvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (and (match_operand:V2DF 0 "register_operand")
+                 (match_operand:SI 1 "memory_operand"))))
+  "atom-dual-5c")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation  "atom_sseicvt_2" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (not (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:SI 1 "memory_operand")))))
+  "atom-complex, atom-all-eu*7")
+
+(define_insn_reservation  "atom_ssediv" 62
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssediv"))
+  "atom-complex, atom-all-eu*12, nothing*49")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; Define bypass here
+
+;; There will be no stall from lea to non-mem EX insns
+(define_bypass 0 "atom_lea"
+                 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec, atom_setcc, atom_icmov, atom_pop")
+
+(define_bypass 0 "atom_lea"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "!ix86_agi_dependent")
+
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_lea")
+
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                  "ix86_agi_dependent")
+
+;; Stall from imul to lea is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
+
+;; Stall from imul to memory address is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" 
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
+                  atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
+                  "ix86_agi_dependent")
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
+                  atom_alu1, atom_negnot, atom_incdec, atom_ishift,
+                  atom_ishift1, atom_rotate, atom_rotate1"
+                 "atom_icmov, atom_alu_carry")
+
+;; lea to shift count stall is 2 cycles
+(define_bypass 3 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
+                 "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
diff --git a/gcc-4.9/gcc/config/i386/att.h b/gcc-4.9/gcc/config/i386/att.h
new file mode 100644
index 000000000..f559a83bb
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/att.h
@@ -0,0 +1,91 @@
+/* Definitions for AT&T assembler syntax for the Intel 80386.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Define the syntax of instructions and addresses.  */
+
+/* Prefix for internally generated assembler labels.  */
+#define LPREFIX ".L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.value\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"  /* Should not be used for 32bit compilation.  */
+
+/* How to output an ASCII string constant.  */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, PTR, SIZE)			\
+do								\
+{ size_t i = 0, limit = (SIZE); 				\
+  while (i < limit)						\
+    { if (i%10 == 0) { if (i!=0) putc ('\n', (FILE));		\
+		       fputs (ASM_BYTE, (FILE)); }		\
+      else putc (',', (FILE));					\
+      fprintf ((FILE), "0x%x", ((PTR)[i++] & 0377)) ;}		\
+      putc ('\n', (FILE));					\
+} while (0)
+
+/* Output at beginning of assembler file.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf ((FILE), "\t.set .,.+%u\n", (int)(SIZE))
+
+/* Can't use ASM_OUTPUT_SKIP in text section; it doesn't leave 0s.  */
+
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* Define the syntax of labels and symbol definitions/declarations.  */
+
+/* The prefix to add for compiler private assembler symbols.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* This is how to store into the string BUF
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER)	\
+  sprintf ((BUF), LOCAL_LABEL_PREFIX "%s%ld", (PREFIX), (long)(NUMBER))
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
diff --git a/gcc-4.9/gcc/config/i386/avx2intrin.h b/gcc-4.9/gcc/config/i386/avx2intrin.h
new file mode 100644
index 000000000..d04c972ed
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avx2intrin.h
@@ -0,0 +1,1889 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX2INTRIN_H_INCLUDED
+#define _AVX2INTRIN_H_INCLUDED
+
+#ifndef __AVX2__
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#define __DISABLE_AVX2__
+#endif /* __AVX2__ */
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+   byte integers in the first 2 operands.  Starting offsets within
+   operands are determined by the 3rd mask operand.  */
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
+{
+  return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
+					      (__v32qi)__Y, __M);
+}
+#else
+#define _mm256_mpsadbw_epu8(X, Y, M)					\
+  ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X),		\
+					(__v32qi)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi8 (__m256i __A)
+{
+  return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi16 (__m256i __A)
+{
+  return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi32 (__m256i __A)
+{
+  return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packs_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packs_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packus_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packus_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epu8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epu16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
+{
+  return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
+					      (__v4di)__B,
+					      __N * 8);
+}
+#else
+/* In that case (__N*8) will be in vreg, and insn will not be matched. */
+/* Use define instead */
+#define _mm256_alignr_epi8(A, B, N)				   \
+  ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A),	   \
+					(__v4di)(__m256i)(B),	   \
+					(int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_si256 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_si256 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avg_epu8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avg_epu16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
+{
+  return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
+					       (__v32qi)__Y,
+					       (__v32qi)__M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
+{
+  return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
+					      (__v16hi)__Y,
+					       __M);
+}
+#else
+#define _mm256_blend_epi16(X, Y, M)					\
+  ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X),		\
+					(__v16hi)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A,
+					     (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A,
+					     (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A,
+					     (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
+					     (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
+					      (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
+					     (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
+					      (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
+						(__v32qi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
+					     (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_epi8 (__m256i __A)
+{
+  return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi16 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi32 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi32 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi16 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi32 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_epi32 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
+					       (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_epu32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_si256 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sad_epu8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
+					     (__v32qi)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_epi32 (__m256i __A, const int __mask)
+{
+  return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
+{
+  return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
+{
+  return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
+}
+#else
+#define _mm256_shuffle_epi32(A, N) \
+  ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
+#define _mm256_shufflehi_epi16(A, N) \
+  ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
+#define _mm256_shufflelo_epi16(A, N) \
+  ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi8 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi16 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_si256 (__m256i __A, const int __N)
+{
+  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
+}
+#else
+#define _mm256_slli_si256(A, N) \
+  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi16 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi16 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi32 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi32 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi64 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi64 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi16 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi16 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi32 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi32 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_si256 (__m256i __A, const int __N)
+{
+  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
+}
+#else
+#define _mm256_srli_si256(A, N) \
+  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi16 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi16 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi32 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi32 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi64 (__m256i __A, int __B)
+{
+  return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi64 (__m256i __A, __m128i __B)
+{
+  return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epu8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epu16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_si256 (__m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_load_si256 (__m256i const *__X)
+{
+  return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastss_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastss_ps (__m128 __X)
+{
+  return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastsd_pd (__m128d __X)
+{
+  return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastsi128_si256 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
+					      (__v4si)__Y,
+					      __M);
+}
+#else
+#define _mm_blend_epi32(X, Y, M)					\
+  ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X),		\
+					(__v4si)(__m128i)(Y), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
+{
+  return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
+					      (__v8si)__Y,
+					      __M);
+}
+#else
+#define _mm256_blend_epi32(X, Y, M)					\
+  ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X),		\
+					(__v8si)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastb_epi8 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastw_epi16 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastd_epi32 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastq_epi64 (__m128i __X)
+{
+  return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastb_epi8 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastw_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastd_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastq_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute4x64_pd (__m256d __X, const int __M)
+{
+  return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
+}
+#else
+#define _mm256_permute4x64_pd(X, M)			       \
+  ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
+#endif
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
+{
+  return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute4x64_epi64 (__m256i __X, const int __M)
+{
+  return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
+}
+#else
+#define _mm256_permute4x64_epi64(X, M)			       \
+  ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
+{
+  return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
+}
+#else
+#define _mm256_permute2x128_si256(X, Y, M)				\
+  ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti128_si256 (__m256i __X, const int __M)
+{
+  return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
+}
+#else
+#define _mm256_extracti128_si256(X, M)				\
+  ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
+{
+  return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
+}
+#else
+#define _mm256_inserti128_si256(X, Y, M)			 \
+  ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
+					   (__v2di)(__m128i)(Y), \
+					   (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_epi32 (int const *__X, __m256i __M )
+{
+  return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
+						(__v8si)__M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_epi64 (long long const *__X, __m256i __M )
+{
+  return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
+						(__v4di)__M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_epi32 (int const *__X, __m128i __M )
+{
+  return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
+					     (__v4si)__M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_epi64 (long long const *__X, __m128i __M )
+{
+  return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
+					     (__v2di)__M);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
+{
+  __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
+{
+  __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
+{
+  __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
+{
+  __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *base, __m128i index, const int scale)
+{
+  __v2df zero = _mm_setzero_pd ();
+  __v2df mask = _mm_cmpeq_pd (zero, zero);
+
+  return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
+						base,
+						(__v4si)index,
+						mask,
+						scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
+		       __m128d mask, const int scale)
+{
+  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
+						base,
+						(__v4si)index,
+						(__v2df)mask,
+						scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
+{
+  __v4df zero = _mm256_setzero_pd ();
+  __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
+
+  return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
+						base,
+						(__v4si)index,
+						mask,
+						scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_pd (__m256d src, double const *base,
+			  __m128i index, __m256d mask, const int scale)
+{
+  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
+						base,
+						(__v4si)index,
+						(__v4df)mask,
+						scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_pd (double const *base, __m128i index, const int scale)
+{
+  __v2df src = _mm_setzero_pd ();
+  __v2df mask = _mm_cmpeq_pd (src, src);
+
+  return (__m128d) __builtin_ia32_gatherdiv2df (src,
+						base,
+						(__v2di)index,
+						mask,
+						scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
+		       __m128d mask, const int scale)
+{
+  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
+						base,
+						(__v2di)index,
+						(__v2df)mask,
+						scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
+{
+  __v4df src = _mm256_setzero_pd ();
+  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
+
+  return (__m256d) __builtin_ia32_gatherdiv4df (src,
+						base,
+						(__v4di)index,
+						mask,
+						scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_pd (__m256d src, double const *base,
+			  __m256i index, __m256d mask, const int scale)
+{
+  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
+						base,
+						(__v4di)index,
+						(__v4df)mask,
+						scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_ps (float const *base, __m128i index, const int scale)
+{
+  __v4sf src = _mm_setzero_ps ();
+  __v4sf mask = _mm_cmpeq_ps (src, src);
+
+  return (__m128) __builtin_ia32_gathersiv4sf (src,
+					       base,
+					       (__v4si)index,
+					       mask,
+					       scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
+		       __m128 mask, const int scale)
+{
+  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
+					       base,
+					       (__v4si)index,
+					       (__v4sf)mask,
+					       scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
+{
+  __v8sf src = _mm256_setzero_ps ();
+  __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+
+  return (__m256) __builtin_ia32_gathersiv8sf (src,
+					       base,
+					       (__v8si)index,
+					       mask,
+					       scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_ps (__m256 src, float const *base,
+			  __m256i index, __m256 mask, const int scale)
+{
+  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
+					       base,
+					       (__v8si)index,
+					       (__v8sf)mask,
+					       scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_ps (float const *base, __m128i index, const int scale)
+{
+  __v4sf src = _mm_setzero_ps ();
+  __v4sf mask = _mm_cmpeq_ps (src, src);
+
+  return (__m128) __builtin_ia32_gatherdiv4sf (src,
+					       base,
+					       (__v2di)index,
+					       mask,
+					       scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
+		       __m128 mask, const int scale)
+{
+  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
+						base,
+						(__v2di)index,
+						(__v4sf)mask,
+						scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
+{
+  __v4sf src = _mm_setzero_ps ();
+  __v4sf mask = _mm_cmpeq_ps (src, src);
+
+  return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
+						  base,
+						  (__v4di)index,
+						  mask,
+						  scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_ps (__m128 src, float const *base,
+			  __m256i index, __m128 mask, const int scale)
+{
+  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
+						  base,
+						  (__v4di)index,
+						  (__v4sf)mask,
+						  scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_epi64 (long long int const *base,
+		     __m128i index, const int scale)
+{
+  __v2di src = __extension__ (__v2di){ 0, 0 };
+  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+
+  return (__m128i) __builtin_ia32_gathersiv2di (src,
+						base,
+						(__v4si)index,
+						mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
+			  __m128i index, __m128i mask, const int scale)
+{
+  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
+						base,
+						(__v4si)index,
+						(__v2di)mask,
+						scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_epi64 (long long int const *base,
+			__m128i index, const int scale)
+{
+  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
+  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+
+  return (__m256i) __builtin_ia32_gathersiv4di (src,
+						base,
+						(__v4si)index,
+						mask,
+						scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
+			     __m128i index, __m256i mask, const int scale)
+{
+  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
+						base,
+						(__v4si)index,
+						(__v4di)mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_epi64 (long long int const *base,
+		     __m128i index, const int scale)
+{
+  __v2di src = __extension__ (__v2di){ 0, 0 };
+  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+
+  return (__m128i) __builtin_ia32_gatherdiv2di (src,
+						base,
+						(__v2di)index,
+						mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
+			  __m128i mask, const int scale)
+{
+  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
+						base,
+						(__v2di)index,
+						(__v2di)mask,
+						scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_epi64 (long long int const *base,
+			__m256i index, const int scale)
+{
+  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
+  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+
+  return (__m256i) __builtin_ia32_gatherdiv4di (src,
+						base,
+						(__v4di)index,
+						mask,
+						scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
+			     __m256i index, __m256i mask, const int scale)
+{
+  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
+						base,
+						(__v4di)index,
+						(__v4di)mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
+{
+  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+  return (__m128i) __builtin_ia32_gathersiv4si (src,
+					       base,
+					       (__v4si)index,
+					       mask,
+					       scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
+			  __m128i mask, const int scale)
+{
+  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
+						base,
+						(__v4si)index,
+						(__v4si)mask,
+						scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
+{
+  __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
+
+  return (__m256i) __builtin_ia32_gathersiv8si (src,
+						base,
+						(__v8si)index,
+						mask,
+						scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
+			     __m256i index, __m256i mask, const int scale)
+{
+  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
+						base,
+						(__v8si)index,
+						(__v8si)mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
+{
+  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+  return (__m128i) __builtin_ia32_gatherdiv4si (src,
+						base,
+						(__v2di)index,
+						mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
+			  __m128i mask, const int scale)
+{
+  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
+						base,
+						(__v2di)index,
+						(__v4si)mask,
+						scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
+{
+  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+  return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
+						  base,
+						  (__v4di)index,
+						  mask,
+						  scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
+			     __m256i index, __m128i mask, const int scale)
+{
+  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
+						   base,
+						   (__v4di)index,
+						   (__v4si)mask,
+						   scale);
+}
+#else /* __OPTIMIZE__ */
+#define _mm_i32gather_pd(BASE, INDEX, SCALE)				\
+  (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (),	\
+					 (double const *)BASE,		\
+					 (__v4si)(__m128i)INDEX,	\
+					 (__v2df)_mm_set1_pd(		\
+					   (double)(long long int) -1), \
+					 (int)SCALE)
+
+#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
+  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC,	 \
+					 (double const *)BASE,	 \
+					 (__v4si)(__m128i)INDEX, \
+					 (__v2df)(__m128d)MASK,	 \
+					 (int)SCALE)
+
+#define _mm256_i32gather_pd(BASE, INDEX, SCALE)				\
+  (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (),	\
+					 (double const *)BASE,		\
+					 (__v4si)(__m128i)INDEX,	\
+					 (__v4df)_mm256_set1_pd(	\
+					   (double)(long long int) -1), \
+					 (int)SCALE)
+
+#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
+  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC,	 \
+					 (double const *)BASE,	 \
+					 (__v4si)(__m128i)INDEX, \
+					 (__v4df)(__m256d)MASK,	 \
+					 (int)SCALE)
+
+#define _mm_i64gather_pd(BASE, INDEX, SCALE)				\
+  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (),	\
+					 (double const *)BASE,		\
+					 (__v2di)(__m128i)INDEX,	\
+					 (__v2df)_mm_set1_pd(		\
+					   (double)(long long int) -1), \
+					 (int)SCALE)
+
+#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
+  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC,	 \
+					 (double const *)BASE,	 \
+					 (__v2di)(__m128i)INDEX, \
+					 (__v2df)(__m128d)MASK,	 \
+					 (int)SCALE)
+
+#define _mm256_i64gather_pd(BASE, INDEX, SCALE)				\
+  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (),	\
+					 (double const *)BASE,		\
+					 (__v4di)(__m256i)INDEX,	\
+					 (__v4df)_mm256_set1_pd(	\
+					   (double)(long long int) -1), \
+					 (int)SCALE)
+
+#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
+  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC,	 \
+					 (double const *)BASE,	 \
+					 (__v4di)(__m256i)INDEX, \
+					 (__v4df)(__m256d)MASK,	 \
+					 (int)SCALE)
+
+#define _mm_i32gather_ps(BASE, INDEX, SCALE)				\
+  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (),	\
+					(float const *)BASE,		\
+					(__v4si)(__m128i)INDEX,		\
+					_mm_set1_ps ((float)(int) -1),	\
+					(int)SCALE)
+
+#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
+  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC,	 \
+					(float const *)BASE,	 \
+					(__v4si)(__m128i)INDEX,	 \
+					(__v4sf)(__m128d)MASK,	 \
+					(int)SCALE)
+
+#define _mm256_i32gather_ps(BASE, INDEX, SCALE)			       \
+  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
+					(float const *)BASE,	       \
+					(__v8si)(__m256i)INDEX,	       \
+					(__v8sf)_mm256_set1_ps (       \
+					  (float)(int) -1),	       \
+					(int)SCALE)
+
+#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,	\
+					(float const *)BASE,	\
+					(__v8si)(__m256i)INDEX, \
+					(__v8sf)(__m256d)MASK,	\
+					(int)SCALE)
+
+#define _mm_i64gather_ps(BASE, INDEX, SCALE)				\
+  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (),	\
+					(float const *)BASE,		\
+					(__v2di)(__m128i)INDEX,		\
+					(__v4sf)_mm_set1_ps (		\
+					  (float)(int) -1),		\
+					(int)SCALE)
+
+#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
+  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,	 \
+					(float const *)BASE,	 \
+					(__v2di)(__m128i)INDEX,	 \
+					(__v4sf)(__m128d)MASK,	 \
+					(int)SCALE)
+
+#define _mm256_i64gather_ps(BASE, INDEX, SCALE)				\
+  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (),	\
+					   (float const *)BASE,		\
+					   (__v4di)(__m256i)INDEX,	\
+					   (__v4sf)_mm_set1_ps(		\
+					     (float)(int) -1),		\
+					   (int)SCALE)
+
+#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	   \
+  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC,	   \
+					   (float const *)BASE,	   \
+					   (__v4di)(__m256i)INDEX, \
+					   (__v4sf)(__m128)MASK,   \
+					   (int)SCALE)
+
+#define _mm_i32gather_epi64(BASE, INDEX, SCALE)				\
+  (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
+					 (long long const *)BASE,	\
+					 (__v4si)(__m128i)INDEX,	\
+					 (__v2di)_mm_set1_epi64x (-1),	\
+					 (int)SCALE)
+
+#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
+  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC,	  \
+					 (long long const *)BASE, \
+					 (__v4si)(__m128i)INDEX,  \
+					 (__v2di)(__m128i)MASK,	  \
+					 (int)SCALE)
+
+#define _mm256_i32gather_epi64(BASE, INDEX, SCALE)			   \
+  (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
+					 (long long const *)BASE,	   \
+					 (__v4si)(__m128i)INDEX,	   \
+					 (__v4di)_mm256_set1_epi64x (-1),  \
+					 (int)SCALE)
+
+#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC,	   \
+					 (long long const *)BASE,  \
+					 (__v4si)(__m128i)INDEX,   \
+					 (__v4di)(__m256i)MASK,	   \
+					 (int)SCALE)
+
+#define _mm_i64gather_epi64(BASE, INDEX, SCALE)				\
+  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
+					 (long long const *)BASE,	\
+					 (__v2di)(__m128i)INDEX,	\
+					 (__v2di)_mm_set1_epi64x (-1),	\
+					 (int)SCALE)
+
+#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
+  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC,	  \
+					 (long long const *)BASE, \
+					 (__v2di)(__m128i)INDEX,  \
+					 (__v2di)(__m128i)MASK,	  \
+					 (int)SCALE)
+
+#define _mm256_i64gather_epi64(BASE, INDEX, SCALE)			   \
+  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
+					 (long long const *)BASE,	   \
+					 (__v4di)(__m256i)INDEX,	   \
+					 (__v4di)_mm256_set1_epi64x (-1),  \
+					 (int)SCALE)
+
+#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC,	   \
+					 (long long const *)BASE,  \
+					 (__v4di)(__m256i)INDEX,   \
+					 (__v4di)(__m256i)MASK,	   \
+					 (int)SCALE)
+
+#define _mm_i32gather_epi32(BASE, INDEX, SCALE)				\
+  (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (),	\
+					 (int const *)BASE,		\
+					 (__v4si)(__m128i)INDEX,	\
+					 (__v4si)_mm_set1_epi32 (-1),	\
+					 (int)SCALE)
+
+#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC,	\
+					(int const *)BASE,	\
+					(__v4si)(__m128i)INDEX, \
+					(__v4si)(__m128i)MASK,	\
+					(int)SCALE)
+
+#define _mm256_i32gather_epi32(BASE, INDEX, SCALE)			   \
+  (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
+					 (int const *)BASE,		   \
+					 (__v8si)(__m256i)INDEX,	   \
+					 (__v8si)_mm256_set1_epi32 (-1),   \
+					 (int)SCALE)
+
+#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC,	   \
+					(int const *)BASE,	   \
+					(__v8si)(__m256i)INDEX,	   \
+					(__v8si)(__m256i)MASK,	   \
+					(int)SCALE)
+
+#define _mm_i64gather_epi32(BASE, INDEX, SCALE)				\
+  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (),	\
+					 (int const *)BASE,		\
+					 (__v2di)(__m128i)INDEX,	\
+					 (__v4si)_mm_set1_epi32 (-1),	\
+					 (int)SCALE)
+
+#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC,	\
+					(int const *)BASE,	\
+					(__v2di)(__m128i)INDEX, \
+					(__v4si)(__m128i)MASK,	\
+					(int)SCALE)
+
+#define _mm256_i64gather_epi32(BASE, INDEX, SCALE)			   \
+  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
+					    (int const *)BASE,		   \
+					    (__v4di)(__m256i)INDEX,	   \
+					    (__v4si)_mm_set1_epi32(-1),	   \
+					    (int)SCALE)
+
+#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC,  \
+					   (int const *)BASE,	   \
+					   (__v4di)(__m256i)INDEX, \
+					   (__v4si)(__m128i)MASK,  \
+					   (int)SCALE)
+#endif  /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_AVX2__
+#undef __DISABLE_AVX2__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX2__ */
+
+#endif /* _AVX2INTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/avx512cdintrin.h b/gcc-4.9/gcc/config/i386/avx512cdintrin.h
new file mode 100644
index 000000000..a4939f7ac
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avx512cdintrin.h
@@ -0,0 +1,184 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512CDINTRIN_H_INCLUDED
+#define _AVX512CDINTRIN_H_INCLUDED
+
+#ifndef __AVX512CD__
+#pragma GCC push_options
+#pragma GCC target("avx512cd")
+#define __DISABLE_AVX512CD__
+#endif /* __AVX512CD__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char  __mmask8;
+typedef unsigned short __mmask16;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conflict_epi32 (__m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+					       (__v16si) _mm512_setzero_si512 (),
+					       (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+							 (__v16si) __W,
+							 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+					       (__v16si) _mm512_setzero_si512 (),
+					       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conflict_epi64 (__m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+					       (__v8di) _mm512_setzero_si512 (),
+					       (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+							 (__v8di) __W,
+							 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+					       (__v8di) _mm512_setzero_si512 (),
+					       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_lzcnt_epi64 (__m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+					   (__v8di) _mm512_setzero_si512 (),
+					   (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+						     (__v8di) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+					   (__v8di) _mm512_setzero_si512 (),
+					   (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_lzcnt_epi32 (__m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+					   (__v16si) _mm512_setzero_si512 (),
+					   (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i)
+	 __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+					   (__v16si) _mm512_setzero_si512 (),
+					   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastmb_epi64 (__mmask8 __A)
+{
+  return (__m512i) __builtin_ia32_broadcastmb512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastmw_epi32 (__mmask16 __A)
+{
+  return (__m512i) __builtin_ia32_broadcastmw512 (__A);
+}
+
+#ifdef __DISABLE_AVX512CD__
+#undef __DISABLE_AVX512CD__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512CD__ */
+
+#endif /* _AVX512CDINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/avx512erintrin.h b/gcc-4.9/gcc/config/i386/avx512erintrin.h
new file mode 100644
index 000000000..f6870a5f7
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avx512erintrin.h
@@ -0,0 +1,394 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512ERINTRIN_H_INCLUDED
+#define _AVX512ERINTRIN_H_INCLUDED
+
+#ifndef __AVX512ER__
+#pragma GCC push_options
+#pragma GCC target("avx512er")
+#define __DISABLE_AVX512ER__
+#endif /* __AVX512ER__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char  __mmask8;
+typedef unsigned short __mmask16;
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_exp2a23_round_pd (__m512d __A, int __R)
+{
+  __m512d __W;
+  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
+					       (__v8df) __W,
+					       (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
+					       (__v8df) __W,
+					       (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
+					       (__v8df) _mm512_setzero_pd (),
+					       (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_exp2a23_round_ps (__m512 __A, int __R)
+{
+  __m512 __W;
+  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
+					      (__v16sf) __W,
+					      (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
+					      (__v16sf) __W,
+					      (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
+					      (__v16sf) _mm512_setzero_ps (),
+					      (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp28_round_pd (__m512d __A, int __R)
+{
+  __m512d __W;
+  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+						(__v8df) __W,
+						(__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+						(__v8df) __W,
+						(__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+						(__v8df) _mm512_setzero_pd (),
+						(__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp28_round_ps (__m512 __A, int __R)
+{
+  __m512 __W;
+  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+					       (__v16sf) __W,
+					       (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+					       (__v16sf) __W,
+					       (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+					       (__v16sf) _mm512_setzero_ps (),
+					       (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
+						 (__v2df) __A,
+						 __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
+						(__v4sf) __A,
+						__R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt28_round_pd (__m512d __A, int __R)
+{
+  __m512d __W;
+  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+						  (__v8df) __W,
+						  (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+						  (__v8df) __W,
+						  (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+						  (__v8df) _mm512_setzero_pd (),
+						  (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt28_round_ps (__m512 __A, int __R)
+{
+  __m512 __W;
+  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+						 (__v16sf) __W,
+						 (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+						 (__v16sf) __W,
+						 (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+						 (__v16sf) _mm512_setzero_ps (),
+						 (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
+						   (__v2df) __A,
+						   __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
+						  (__v4sf) __A,
+						  __R);
+}
+
+#else
+#define _mm512_exp2a23_round_pd(A, C)            \
+    __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+
+#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
+    __builtin_ia32_exp2pd_mask(A, W, U, C)
+
+#define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
+    __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_exp2a23_round_ps(A, C)            \
+    __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+
+#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
+    __builtin_ia32_exp2ps_mask(A, W, U, C)
+
+#define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
+    __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_rcp28_round_pd(A, C)            \
+    __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+
+#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
+    __builtin_ia32_rcp28pd_mask(A, W, U, C)
+
+#define _mm512_maskz_rcp28_round_pd(U, A, C)   \
+    __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_rcp28_round_ps(A, C)            \
+    __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+
+#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
+    __builtin_ia32_rcp28ps_mask(A, W, U, C)
+
+#define _mm512_maskz_rcp28_round_ps(U, A, C)   \
+    __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_rsqrt28_round_pd(A, C)            \
+    __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+
+#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
+    __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
+
+#define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
+    __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_rsqrt28_round_ps(A, C)            \
+    __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+
+#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
+    __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
+
+#define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
+    __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_rcp28_round_sd(A, B, R)	\
+    __builtin_ia32_rcp28sd_round(A, B, R)
+
+#define _mm_rcp28_round_ss(A, B, R)	\
+    __builtin_ia32_rcp28ss_round(A, B, R)
+
+#define _mm_rsqrt28_round_sd(A, B, R)	\
+    __builtin_ia32_rsqrt28sd_round(A, B, R)
+
+#define _mm_rsqrt28_round_ss(A, B, R)	\
+    __builtin_ia32_rsqrt28ss_round(A, B, R)
+
+#endif
+
+#define _mm512_exp2a23_pd(A)                    \
+    _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(W, U, A)   \
+    _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(U, A)     \
+    _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_ps(A)                    \
+    _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(W, U, A)   \
+    _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(U, A)     \
+    _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_pd(A)                    \
+    _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(W, U, A)   \
+    _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(U, A)     \
+    _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_ps(A)                    \
+    _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_ps(W, U, A)   \
+    _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_ps(U, A)     \
+    _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_pd(A)                    \
+    _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(W, U, A)   \
+    _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(U, A)     \
+    _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_ps(A)                    \
+    _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(W, U, A)   \
+    _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(U, A)     \
+    _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_sd(A, B)	\
+    __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_ss(A, B)	\
+    __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_sd(A, B)	\
+    __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_ss(A, B)	\
+    __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#ifdef __DISABLE_AVX512ER__
+#undef __DISABLE_AVX512ER__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512ER__ */
+
+#endif /* _AVX512ERINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/avx512fintrin.h b/gcc-4.9/gcc/config/i386/avx512fintrin.h
new file mode 100644
index 000000000..960286618
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avx512fintrin.h
@@ -0,0 +1,12832 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512FINTRIN_H_INCLUDED
+#define _AVX512FINTRIN_H_INCLUDED
+
+#ifndef __AVX512F__
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#define __DISABLE_AVX512F__
+#endif /* __AVX512F__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char  __mmask8;
+typedef unsigned short __mmask16;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_epi64 (long long __A, long long __B, long long __C,
+		  long long __D, long long __E, long long __F,
+		  long long __G, long long __H)
+{
+  return __extension__ (__m512i) (__v8di)
+	 { __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H I J K L M N O P].  */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_epi32 (int __A, int __B, int __C, int __D,
+		  int __E, int __F, int __G, int __H,
+		  int __I, int __J, int __K, int __L,
+		  int __M, int __N, int __O, int __P)
+{
+  return __extension__ (__m512i)(__v16si)
+	 { __P, __O, __N, __M, __L, __K, __J, __I,
+	   __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_pd (double __A, double __B, double __C, double __D,
+	       double __E, double __F, double __G, double __H)
+{
+  return __extension__ (__m512d)
+	 { __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_ps (float __A, float __B, float __C, float __D,
+	       float __E, float __F, float __G, float __H,
+	       float __I, float __J, float __K, float __L,
+	       float __M, float __N, float __O, float __P)
+{
+  return __extension__ (__m512)
+	 { __P, __O, __N, __M, __L, __K, __J, __I,
+	   __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)			      \
+  _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
+
+#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,			      \
+			  e8,e9,e10,e11,e12,e13,e14,e15)		      \
+  _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
+
+#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)				      \
+  _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
+
+#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
+  _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ps (void)
+{
+  __m512 __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_pd (void)
+{
+  __m512d __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_si512 (void)
+{
+  __m512i __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_pd (double __A)
+{
+  return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
+						  (__v2df) { __A, },
+						  (__v8df)
+						  _mm512_undefined_pd (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_ps (float __A)
+{
+  return (__m512) __builtin_ia32_broadcastss512 (__extension__
+						 (__v4sf) { __A, },
+						 (__v16sf)
+						 _mm512_undefined_ps (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_ps (void)
+{
+  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+				 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_pd (void)
+{
+  return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_si512 (void)
+{
+  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
+						  (__v8df) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
+						  (__v8df)
+						  _mm512_setzero_pd (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
+						 (__v16sf) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
+						 (__v16sf)
+						 _mm512_setzero_ps (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_pd (void const *__P)
+{
+  return *(__m512d *) __P;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+						   (__v8df) __W,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_pd (void *__P, __m512d __A)
+{
+  *(__m512d *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+  __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
+				   (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_ps (void const *__P)
+{
+  return *(__m512 *) __P;
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+						  (__v16sf) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+						  (__v16sf)
+						  _mm512_setzero_ps (),
+						  (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_ps (void *__P, __m512 __A)
+{
+  *(__m512 *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+  __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
+				   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
+						     (__v8di) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
+						     (__v8di)
+						     _mm512_setzero_si512 (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_epi64 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
+							(__v8di) __W,
+							(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
+							(__v8di)
+							_mm512_setzero_si512 (),
+							(__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_epi64 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
+					(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_si512 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_epi32 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
+							(__v16si) __W,
+							(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
+							(__v16si)
+							_mm512_setzero_si512 (),
+							(__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_si512 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_epi32 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+						 (__v8di) __Y,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+						  (__v16si) __Y,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_epu32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+						   (__v16si) __Y,
+						   (__v8di)
+						   _mm512_undefined_si512 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+						   (__v16si) __Y,
+						   (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+						   (__v16si) __Y,
+						   (__v8di)
+						   _mm512_setzero_si512 (),
+						   __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi64 (__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+			unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+						  (__v8di) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+#else
+#define _mm512_slli_epi64(X, C)						   \
+  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask8)-1))
+
+#define _mm512_mask_slli_epi64(W, U, X, C)				   \
+  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_slli_epi64(U, X, C)                                   \
+  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi64 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi64 (__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
+			__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+						  (__v8di) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+#else
+#define _mm512_srli_epi64(X, C)						   \
+  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask8)-1))
+
+#define _mm512_mask_srli_epi64(W, U, X, C)				   \
+  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_srli_epi64(U, X, C)                                   \
+  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi64 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi64 (__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+			unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+						  (__v8di) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+#else
+#define _mm512_srai_epi64(X, C)						   \
+  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask8)-1))
+
+#define _mm512_mask_srai_epi64(W, U, X, C)				   \
+  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_srai_epi64(U, X, C)				   \
+  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+    (__v8di)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi64 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+						 (__v2di) __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi32 (__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+			unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+#else
+#define _mm512_slli_epi32(X, C)						    \
+  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask16)-1))
+
+#define _mm512_mask_slli_epi32(W, U, X, C)                                  \
+  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_slli_epi32(U, X, C)                                    \
+  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi32 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi32 (__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
+			__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+#else
+#define _mm512_srli_epi32(X, C)						    \
+  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask16)-1))
+
+#define _mm512_mask_srli_epi32(W, U, X, C)                                  \
+  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_srli_epi32(U, X, C)				    \
+  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi32 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi32 (__m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+			unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+#else
+#define _mm512_srai_epi32(X, C)						    \
+  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask16)-1))
+
+#define _mm512_mask_srai_epi32(W, U, X, C)				    \
+  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_srai_epi32(U, X, C)				    \
+  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi32 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+						 (__v4si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+					       (__v2df) __B,
+					       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+					      (__v4sf) __B,
+					      __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+					       (__v2df) __B,
+					       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+					      (__v4sf) __B,
+					      __R);
+}
+
+#else
+#define _mm_add_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_add_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_sub_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_sub_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_subss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
+{
+  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+						     (__v8di) __B,
+						     (__v8di) __C, imm,
+						     (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
+				__m512i __C, const int imm)
+{
+  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+						     (__v8di) __B,
+						     (__v8di) __C, imm,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
+				 __m512i __C, const int imm)
+{
+  return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di) __C,
+						      imm, (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
+{
+  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si) __C,
+						     imm, (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
+				__m512i __C, const int imm)
+{
+  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si) __C,
+						     imm, (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
+				 __m512i __C, const int imm)
+{
+  return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
+						      (__v16si) __B,
+						      (__v16si) __C,
+						      imm, (__mmask16) __U);
+}
+#else
+#define _mm512_ternarylogic_epi64(A, B, C, I)				\
+  ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
+    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
+#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)			\
+  ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
+    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
+#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)			\
+  ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),	\
+    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
+#define _mm512_ternarylogic_epi32(A, B, C, I)				\
+  ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
+    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
+    (__mmask16)-1))
+#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)			\
+  ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
+    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
+    (__mmask16)(U)))
+#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)			\
+  ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),	\
+    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
+    (__mmask16)(U)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp14_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+						   (__v8df)
+						   _mm512_undefined_pd (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+						   (__v8df) __W,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp14_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+						  (__v16sf)
+						  _mm512_undefined_ps (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+						  (__v16sf) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+						  (__v16sf)
+						  _mm512_setzero_ps (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
+					   (__v2df) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
+					  (__v4sf) __A);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt14_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+						     (__v8df)
+						     _mm512_undefined_pd (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+						     (__v8df) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt14_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+						    (__v16sf) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
+					     (__v2df) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
+					    (__v4sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_pd (__m512d __A, const int __R)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+						  (__v8df)
+						  _mm512_undefined_pd (),
+						  (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+						  (__v8df) __W,
+						  (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+						  (__v8df)
+						  _mm512_setzero_pd (),
+						  (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_ps (__m512 __A, const int __R)
+{
+  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+						 (__v16sf)
+						 _mm512_undefined_ps (),
+						 (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+						 (__v16sf) __W,
+						 (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+						 (__v16sf)
+						 _mm512_setzero_ps (),
+						 (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
+						(__v2df) __A,
+						__R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
+					       (__v4sf) __A,
+					       __R);
+}
+#else
+#define _mm512_sqrt_round_pd(A, C)            \
+    (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
+    (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
+
+#define _mm512_maskz_sqrt_round_pd(U, A, C)   \
+    (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_sqrt_round_ps(A, C)            \
+    (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
+    (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
+
+#define _mm512_maskz_sqrt_round_ps(U, A, C)   \
+    (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_sqrt_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
+
+#define _mm_sqrt_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_sqrtss_round(A, B, C)
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi32 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+						    (__v16si)
+						    _mm512_undefined_si512 (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi32 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+						    (__v16si)
+						    _mm512_undefined_si512 (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi64 (__m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi32 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+						    (__v16si)
+						    _mm512_undefined_si512 (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_epi32 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+						    (__v16si)
+						    _mm512_undefined_si512 (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_epi64 (__m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			  __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			  __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			  __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			  __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U, __R);
+}
+#else
+#define _mm512_add_round_pd(A, B, C)            \
+    (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_add_round_pd(W, U, A, B, C) \
+    (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_add_round_pd(U, A, B, C)   \
+    (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_add_round_ps(A, B, C)            \
+    (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_add_round_ps(W, U, A, B, C) \
+    (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_add_round_ps(U, A, B, C)   \
+    (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_sub_round_pd(A, B, C)            \
+    (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
+    (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_sub_round_pd(U, A, B, C)   \
+    (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_sub_round_ps(A, B, C)            \
+    (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
+    (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_sub_round_ps(U, A, B, C)   \
+    (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			  __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			  __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
+{
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+						 (__v8df) __V,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
+			  __m512d __V, const int __R)
+{
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+						 (__v8df) __V,
+						 (__v8df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+						 (__v8df) __V,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			  __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+					       (__v2df) __B,
+					       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+					      (__v4sf) __B,
+					      __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+					       (__v2df) __B,
+					       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+					      (__v4sf) __B,
+					      __R);
+}
+
+#else
+#define _mm512_mul_round_pd(A, B, C)            \
+    (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
+    (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_mul_round_pd(U, A, B, C)   \
+    (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_mul_round_ps(A, B, C)            \
+    (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
+    (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_mul_round_ps(U, A, B, C)   \
+    (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_div_round_pd(A, B, C)            \
+    (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_div_round_pd(W, U, A, B, C) \
+    (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_div_round_pd(U, A, B, C)   \
+    (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_div_round_ps(A, B, C)            \
+    (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_div_round_ps(W, U, A, B, C) \
+    (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_div_round_ps(U, A, B, C)   \
+    (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_mul_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_mulsd_round(A, B, C)
+
+#define _mm_mul_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_mulss_round(A, B, C)
+
+#define _mm_div_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_divsd_round(A, B, C)
+
+#define _mm_div_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_divss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			  __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			  __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			  __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			   const int __R)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			  __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U, __R);
+}
+#else
+#define _mm512_max_round_pd(A, B,  R) \
+    (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
+
+#define _mm512_mask_max_round_pd(W, U,  A, B, R) \
+    (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_max_round_pd(U, A,  B, R) \
+    (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
+
+#define _mm512_max_round_ps(A, B,  R) \
+    (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
+
+#define _mm512_mask_max_round_ps(W, U,  A, B, R) \
+    (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_max_round_ps(U, A,  B, R) \
+    (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
+
+#define _mm512_min_round_pd(A, B,  R) \
+    (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
+
+#define _mm512_mask_min_round_pd(W, U,  A, B, R) \
+    (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_min_round_pd(U, A,  B, R) \
+    (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
+
+#define _mm512_min_round_ps(A, B, R) \
+    (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
+
+#define _mm512_mask_min_round_ps(W, U,  A, B, R) \
+    (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_min_round_ps(U, A,  B, R) \
+    (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			     __m512d __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __W,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			      const int __R)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			     __m512 __B, const int __R)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __W,
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			      const int __R)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
+						  (__v2df) __B,
+						  __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
+						 (__v4sf) __B,
+						 __R);
+}
+#else
+#define _mm512_scalef_round_pd(A, B, C)            \
+    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
+    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
+    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_scalef_round_ps(A, B, C)            \
+    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
+    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
+    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_scalef_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
+
+#define _mm_scalef_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_scalefss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __C,
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			    __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __C,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+			     __mmask8 __U, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			     __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __C,
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			    __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __C,
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+			     __mmask16 __U, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			     __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    -(__v8df) __C,
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			    __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    -(__v8df) __C,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+			     __mmask8 __U, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			     __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+						     (__v8df) __B,
+						     -(__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   -(__v16sf) __C,
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			    __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   -(__v16sf) __C,
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+			     __mmask16 __U, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			     __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+						    (__v16sf) __B,
+						    -(__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       (__v8df) __C,
+						       (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			       __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       (__v8df) __C,
+						       (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+				__mmask8 __U, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+							(__v8df) __B,
+							(__v8df) __C,
+							(__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+				__m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+							(__v8df) __B,
+							(__v8df) __C,
+							(__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      (__v16sf) __C,
+						      (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			       __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      (__v16sf) __C,
+						      (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+				__mmask16 __U, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+						       (__v16sf) __B,
+						       (__v16sf) __C,
+						       (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+				__m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+						       (__v16sf) __B,
+						       (__v16sf) __C,
+						       (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       -(__v8df) __C,
+						       (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			       __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       -(__v8df) __C,
+						       (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+				__mmask8 __U, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+							(__v8df) __B,
+							(__v8df) __C,
+							(__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+				__m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+							(__v8df) __B,
+							-(__v8df) __C,
+							(__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      -(__v16sf) __C,
+						      (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			       __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      -(__v16sf) __C,
+						      (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+				__mmask16 __U, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+						       (__v16sf) __B,
+						       (__v16sf) __C,
+						       (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+				__m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+						       (__v16sf) __B,
+						       -(__v16sf) __C,
+						       (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __C,
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			     __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+			      __mmask8 __U, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			      __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __C,
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			     __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+			      __mmask16 __U, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			      __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+						    (__v8df) __B,
+						    -(__v8df) __C,
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			     __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+			      __mmask8 __U, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+						      (__v8df) __B,
+						      (__v8df) __C,
+						      (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			      __m512d __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+						     (__v8df) __B,
+						     -(__v8df) __C,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+						   (__v16sf) __B,
+						   -(__v16sf) __C,
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			     __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+			      __mmask16 __U, const int __R)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+						     (__v16sf) __B,
+						     (__v16sf) __C,
+						     (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			      __m512 __C, const int __R)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+						    (__v16sf) __B,
+						    -(__v16sf) __C,
+						    (__mmask16) __U, __R);
+}
+#else
+#define _mm512_fmadd_round_pd(A, B, C, R)            \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
+    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
+
+#define _mm512_fmadd_round_ps(A, B, C, R)            \
+    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
+    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
+    (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
+    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
+
+#define _mm512_fmsub_round_pd(A, B, C, R)            \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
+    (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
+    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fmsub_round_ps(A, B, C, R)            \
+    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
+    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
+    (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
+    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fmaddsub_round_pd(A, B, C, R)            \
+    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
+    (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
+    (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
+
+#define _mm512_fmaddsub_round_ps(A, B, C, R)            \
+    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
+    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
+    (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
+    (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
+
+#define _mm512_fmsubadd_round_pd(A, B, C, R)            \
+    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
+    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
+    (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
+    (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fmsubadd_round_ps(A, B, C, R)            \
+    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
+    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
+    (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
+    (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fnmadd_round_pd(A, B, C, R)            \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
+
+#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
+    (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
+
+#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
+
+#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
+    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
+
+#define _mm512_fnmadd_round_ps(A, B, C, R)            \
+    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
+
+#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
+    (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
+
+#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
+    (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
+
+#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
+    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
+
+#define _mm512_fnmsub_round_pd(A, B, C, R)            \
+    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
+
+#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
+    (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
+    (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
+    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
+
+#define _mm512_fnmsub_round_ps(A, B, C, R)            \
+    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
+
+#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
+    (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
+    (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
+    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi64 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi32 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastss_ps (__m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+						 (__v16sf)
+						 _mm512_undefined_ps (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+						 (__v16sf) __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+						 (__v16sf)
+						 _mm512_setzero_ps (),
+						 __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastsd_pd (__m128d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+						  (__v8df)
+						  _mm512_undefined_pd (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+						  (__v8df) __O, __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+						  (__v8df)
+						  _mm512_setzero_pd (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastd_epi32 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+						  (__v16si) __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi32 (int __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+							   (__v16si)
+							   _mm512_undefined_si512 (),
+							   (__mmask16)(-1));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
+							   __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
+{
+  return (__m512i)
+	 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+						 (__v16si) _mm512_setzero_si512 (),
+						 __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastq_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+						  (__v8di) __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi64 (long long __A)
+{
+#ifdef TARGET_64BIT
+  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+							   (__v8di)
+							   _mm512_undefined_si512 (),
+							   (__mmask8)(-1));
+#else
+  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
+							   (__v8di)
+							   _mm512_undefined_si512 (),
+							   (__mmask8)(-1));
+#endif
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
+							   __M);
+#else
+  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
+							   __M);
+#endif
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+  return (__m512i)
+	 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+						 (__v8di) _mm512_setzero_si512 (),
+						 __M);
+#else
+  return (__m512i)
+	 __builtin_ia32_pbroadcastq512_mem_mask (__A,
+						 (__v8di) _mm512_setzero_si512 (),
+						 __M);
+#endif
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x4 (__m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+						     (__v16sf)
+						     _mm512_undefined_ps (),
+						     (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+						     (__v16sf) __O,
+						     __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+						     (__v16sf)
+						     _mm512_setzero_ps (),
+						     __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x4 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+						      (__v16si)
+						      _mm512_undefined_si512 (),
+						      (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+						      (__v16si) __O,
+						      __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+						      (__v16si)
+						      _mm512_setzero_si512 (),
+						      __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f64x4 (__m256d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+						      (__v8df)
+						      _mm512_undefined_pd (),
+						      (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+						      (__v8df) __O,
+						      __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+						      (__v8df)
+						      _mm512_setzero_pd (),
+						      __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i64x4 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+						      (__v8di)
+						      _mm512_undefined_si512 (),
+						      (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+						      (__v8di) __O,
+						      __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+						      (__v8di)
+						      _mm512_setzero_si512 (),
+						      __M);
+}
+
+typedef enum
+{
+  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
+  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
+  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
+  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
+  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
+  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
+  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
+  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
+  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
+  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
+  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
+  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
+  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
+  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
+  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
+  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
+  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
+  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
+  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
+  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
+  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
+  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
+  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
+  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
+  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
+  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
+  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
+  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
+  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
+  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
+  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
+  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
+  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
+  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
+  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
+  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
+  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
+  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
+  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
+  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
+  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
+  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
+  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
+  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
+  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
+  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
+  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
+  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
+  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
+  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
+  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
+  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
+  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
+  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
+  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
+  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
+  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
+  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
+  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
+  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
+  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
+  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
+  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
+  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
+  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
+  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
+  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
+  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
+  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
+  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
+  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
+  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
+  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
+  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
+  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
+  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
+  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
+  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
+  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
+  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
+  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
+  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
+  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
+  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
+  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
+  _MM_PERM_DDDD = 0xFF
+} _MM_PERM_ENUM;
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
+{
+  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+						  __mask,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+			   _MM_PERM_ENUM __mask)
+{
+  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+						  __mask,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
+{
+  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+						  __mask,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+						   (__v8di) __B, __imm,
+						   (__v8di)
+						   _mm512_undefined_si512 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
+			   __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+						   (__v8di) __B, __imm,
+						   (__v8di) __W,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
+			    const int __imm)
+{
+  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+						   (__v8di) __B, __imm,
+						   (__v8di)
+						   _mm512_setzero_si512 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+						   (__v16si) __B,
+						   __imm,
+						   (__v16si)
+						   _mm512_undefined_si512 (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
+			   __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+						   (__v16si) __B,
+						   __imm,
+						   (__v16si) __W,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
+			    const int __imm)
+{
+  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+						   (__v16si) __B,
+						   __imm,
+						   (__v16si)
+						   _mm512_setzero_si512 (),
+						   (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
+{
+  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+						   (__v8df) __B, __imm,
+						   (__v8df)
+						   _mm512_undefined_pd (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
+			   __m512d __B, const int __imm)
+{
+  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+						   (__v8df) __B, __imm,
+						   (__v8df) __W,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
+			    const int __imm)
+{
+  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+						   (__v8df) __B, __imm,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
+{
+  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+						  (__v16sf) __B, __imm,
+						  (__v16sf)
+						  _mm512_undefined_ps (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
+			   __m512 __B, const int __imm)
+{
+  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+						  (__v16sf) __B, __imm,
+						  (__v16sf) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
+			    const int __imm)
+{
+  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+						  (__v16sf) __B, __imm,
+						  (__v16sf)
+						  _mm512_setzero_ps (),
+						  (__mmask16) __U);
+}
+
+#else
+#define _mm512_shuffle_epi32(X, C)                                      \
+  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask16)-1))
+
+#define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
+  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_epi32(U, X, C)                             \
+  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+    (__v16si)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask16)(U)))
+
+#define _mm512_shuffle_i64x2(X, Y, C)                                   \
+  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
+      (__v8di)(__m512i)(Y), (int)(C),\
+    (__v8di)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask8)-1))
+
+#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
+  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
+      (__v8di)(__m512i)(Y), (int)(C),\
+    (__v8di)(__m512i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
+  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
+      (__v8di)(__m512i)(Y), (int)(C),\
+    (__v8di)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask8)(U)))
+
+#define _mm512_shuffle_i32x4(X, Y, C)                                   \
+  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
+      (__v16si)(__m512i)(Y), (int)(C),\
+    (__v16si)(__m512i)_mm512_undefined_si512 (),\
+    (__mmask16)-1))
+
+#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
+  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
+      (__v16si)(__m512i)(Y), (int)(C),\
+    (__v16si)(__m512i)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
+  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
+      (__v16si)(__m512i)(Y), (int)(C),\
+    (__v16si)(__m512i)_mm512_setzero_si512 (),\
+    (__mmask16)(U)))
+
+#define _mm512_shuffle_f64x2(X, Y, C)                                   \
+  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
+      (__v8df)(__m512d)(Y), (int)(C),\
+    (__v8df)(__m512d)_mm512_undefined_pd(),\
+    (__mmask8)-1))
+
+#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
+  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
+      (__v8df)(__m512d)(Y), (int)(C),\
+    (__v8df)(__m512d)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
+  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
+      (__v8df)(__m512d)(Y), (int)(C),\
+    (__v8df)(__m512d)_mm512_setzero_pd(),\
+    (__mmask8)(U)))
+
+#define _mm512_shuffle_f32x4(X, Y, C)                                  \
+  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
+      (__v16sf)(__m512)(Y), (int)(C),\
+    (__v16sf)(__m512)_mm512_undefined_ps(),\
+    (__mmask16)-1))
+
+#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
+  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
+      (__v16sf)(__m512)(Y), (int)(C),\
+    (__v16sf)(__m512)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
+  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
+      (__v16sf)(__m512)(Y), (int)(C),\
+    (__v16sf)(__m512)_mm512_setzero_ps(),\
+    (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rolv_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rorv_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rolv_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rorv_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_undefined_si256 (),
+						     (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+				const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+						     (__v8si) __W,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+						      (__v8si)
+						      _mm256_undefined_si256 (),
+						      (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+				const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+						      (__v8si)
+						      _mm256_setzero_si256 (),
+						      (__mmask8) __U, __R);
+}
+#else
+#define _mm512_cvtt_roundpd_epi32(A, B)		     \
+    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
+    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
+    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+
+#define _mm512_cvtt_roundpd_epu32(A, B)		     \
+    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
+    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
+    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+						    (__v8si)
+						    _mm256_undefined_si256 (),
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+			       const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+						    (__v8si) __W,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+						    (__v8si)
+						    _mm256_setzero_si256 (),
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_undefined_si256 (),
+						     (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+			       const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+						     (__v8si) __W,
+						     (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) __U, __R);
+}
+#else
+#define _mm512_cvt_roundpd_epi32(A, B)		    \
+    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
+    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
+    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+
+#define _mm512_cvt_roundpd_epu32(A, B)		    \
+    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
+    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
+    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+				const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_undefined_si512 (),
+						      (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+				const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_setzero_si512 (),
+						      (__mmask16) __U, __R);
+}
+#else
+#define _mm512_cvtt_roundps_epi32(A, B)		     \
+    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
+    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
+    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+
+#define _mm512_cvtt_roundps_epu32(A, B)		     \
+    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
+    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
+    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+						    (__v16si)
+						    _mm512_undefined_si512 (),
+						    (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+			       const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+			       const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U, __R);
+}
+#else
+#define _mm512_cvt_roundps_epi32(A, B)		    \
+    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
+    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
+    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+
+#define _mm512_cvt_roundps_epu32(A, B)		    \
+    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
+    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
+    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sd (__m128d __A, unsigned __B)
+{
+  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_sd(A, B, C)   \
+    (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
+
+#define _mm_cvt_roundi64_sd(A, B, C)   \
+    (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
+
+#define _mm_cvt_roundsi64_sd(A, B, C)   \
+    (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
+#endif
+
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu32_ss(A, B, C)   \
+    (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
+
+#define _mm_cvt_roundi32_ss(A, B, C)   \
+    (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
+
+#define _mm_cvt_roundsi32_ss(A, B, C)   \
+    (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_ss(A, B, C)   \
+    (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
+
+#define _mm_cvt_roundi64_ss(A, B, C)   \
+    (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
+
+#define _mm_cvt_roundsi64_ss(A, B, C)   \
+    (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
+#endif
+
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+						  (__v16qi)
+						  _mm_undefined_si128 (),
+						  (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+						  (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+						  (__v16qi)
+						  _mm_setzero_si128 (),
+						  __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi32_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+						   (__v16qi)
+						   _mm_undefined_si128 (),
+						   (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+						   (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+						   (__v16qi)
+						   _mm_setzero_si128 (),
+						   __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi32_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+						    (__v16qi)
+						    _mm_undefined_si128 (),
+						    (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+						    (__v16qi) __O,
+						    __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+						    (__v16qi)
+						    _mm_setzero_si128 (),
+						    __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi16 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+						  (__v16hi)
+						  _mm256_undefined_si256 (),
+						  (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+						  (__v16hi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+						  (__v16hi)
+						  _mm256_setzero_si256 (),
+						  __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi32_epi16 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+						   (__v16hi)
+						   _mm256_undefined_si256 (),
+						   (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+						   (__v16hi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+						   (__v16hi)
+						   _mm256_setzero_si256 (),
+						   __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi32_epi16 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+						    (__v16hi)
+						    _mm256_undefined_si256 (),
+						    (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+						    (__v16hi) __O,
+						    __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+						    (__v16hi)
+						    _mm256_setzero_si256 (),
+						    __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi32 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+						  (__v8si)
+						  _mm256_undefined_si256 (),
+						  (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+						  (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+						  (__v8si)
+						  _mm256_setzero_si256 (),
+						  __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi32 (__m512i __A)
+{
+  __v8si __O;
+  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+						   (__v8si)
+						   _mm256_undefined_si256 (),
+						   (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+						   (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+						   (__v8si)
+						   _mm256_setzero_si256 (),
+						   __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi32 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+						    (__v8si)
+						    _mm256_undefined_si256 (),
+						    (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+						    (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+						    (__v8si)
+						    _mm256_setzero_si256 (),
+						    __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi16 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+						  (__v8hi)
+						  _mm_undefined_si128 (),
+						  (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+						  (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+						  (__v8hi)
+						  _mm_setzero_si128 (),
+						  __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi16 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+						   (__v8hi)
+						   _mm_undefined_si128 (),
+						   (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+						   (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+						   (__v8hi)
+						   _mm_setzero_si128 (),
+						   __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi16 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+						    (__v8hi)
+						    _mm_undefined_si128 (),
+						    (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+						    (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+						    (__v8hi)
+						    _mm_setzero_si128 (),
+						    __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+						  (__v16qi)
+						  _mm_undefined_si128 (),
+						  (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+						  (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+						  (__v16qi)
+						  _mm_setzero_si128 (),
+						  __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+						   (__v16qi)
+						   _mm_undefined_si128 (),
+						   (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+						   (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+						   (__v16qi)
+						   _mm_setzero_si128 (),
+						   __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+						    (__v16qi)
+						    _mm_undefined_si128 (),
+						    (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+						    (__v16qi) __O,
+						    __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+						    (__v16qi)
+						    _mm_setzero_si128 (),
+						    __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_pd (__m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+						    (__v8df) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_pd (__m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+						     (__v8df)
+						     _mm512_undefined_pd (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+						     (__v8df) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
+			       const int __R)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
+			       const int __R)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+						    (__v16sf) __W,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U, __R);
+}
+
+#else
+#define _mm512_cvt_roundepi32_ps(A, B)        \
+    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
+    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
+
+#define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
+    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
+
+#define _mm512_cvt_roundepu32_ps(A, B)        \
+    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
+    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
+
+#define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
+    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf64x4_pd (__m512d __A, const int __imm)
+{
+  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+						     __imm,
+						     (__v4df)
+						     _mm256_undefined_pd (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
+			     const int __imm)
+{
+  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+						     __imm,
+						     (__v4df) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
+{
+  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+						     __imm,
+						     (__v4df)
+						     _mm256_setzero_pd (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf32x4_ps (__m512 __A, const int __imm)
+{
+  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+						    __imm,
+						    (__v4sf)
+						    _mm_undefined_ps (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
+			     const int __imm)
+{
+  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+						    __imm,
+						    (__v4sf) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
+{
+  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+						    __imm,
+						    (__v4sf)
+						    _mm_setzero_ps (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
+{
+  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+						     __imm,
+						     (__v4di)
+						     _mm256_undefined_si256 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
+				const int __imm)
+{
+  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+						     __imm,
+						     (__v4di) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
+{
+  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+						     __imm,
+						     (__v4di)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
+{
+  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+						     __imm,
+						     (__v4si)
+						     _mm_undefined_si128 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
+				const int __imm)
+{
+  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+						     __imm,
+						     (__v4si) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
+{
+  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+						     __imm,
+						     (__v4si)
+						     _mm_setzero_si128 (),
+						     (__mmask8) __U);
+}
+#else
+
+#define _mm512_extractf64x4_pd(X, C)                                    \
+  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
+    (int) (C),\
+    (__v4df)(__m256d)_mm256_undefined_pd(),\
+    (__mmask8)-1))
+
+#define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
+  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
+    (int) (C),\
+    (__v4df)(__m256d)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
+  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
+    (int) (C),\
+    (__v4df)(__m256d)_mm256_setzero_pd(),\
+    (__mmask8)(U)))
+
+#define _mm512_extractf32x4_ps(X, C)                                    \
+  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
+    (int) (C),\
+    (__v4sf)(__m128)_mm_undefined_ps(),\
+    (__mmask8)-1))
+
+#define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
+  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
+    (int) (C),\
+    (__v4sf)(__m128)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
+  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
+    (int) (C),\
+    (__v4sf)(__m128)_mm_setzero_ps(),\
+    (__mmask8)(U)))
+
+#define _mm512_extracti64x4_epi64(X, C)                                 \
+  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
+    (int) (C),\
+    (__v4di)(__m256i)_mm256_undefined_si256 (),\
+    (__mmask8)-1))
+
+#define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
+  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
+    (int) (C),\
+    (__v4di)(__m256i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
+  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
+    (int) (C),\
+    (__v4di)(__m256i)_mm256_setzero_si256 (),\
+    (__mmask8)(U)))
+
+#define _mm512_extracti32x4_epi32(X, C)                                 \
+  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
+    (int) (C),\
+    (__v4si)(__m128i)_mm_undefined_si128 (),\
+    (__mmask8)-1))
+
+#define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
+  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
+    (int) (C),\
+    (__v4si)(__m128i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
+  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
+    (int) (C),\
+    (__v4si)(__m128i)_mm_setzero_si128 (),\
+    (__mmask8)(U)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
+						    (__v4si) __B,
+						    __imm,
+						    (__v16si) __A, -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
+{
+  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
+						   (__v4sf) __B,
+						   __imm,
+						   (__v16sf) __A, -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+						    (__v4di) __B,
+						    __imm,
+						    (__v8di)
+						    _mm512_undefined_si512 (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
+			 __m256i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+						    (__v4di) __B,
+						    __imm,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
+			  const int __imm)
+{
+  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+						    (__v4di) __B,
+						    __imm,
+						    (__v8di)
+						    _mm512_setzero_si512 (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
+{
+  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+						    (__v4df) __B,
+						    __imm,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
+			 __m256d __B, const int __imm)
+{
+  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+						    (__v4df) __B,
+						    __imm,
+						    (__v8df) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
+			  const int __imm)
+{
+  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+						    (__v4df) __B,
+						    __imm,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U);
+}
+#else
+#define _mm512_insertf32x4(X, Y, C)                                     \
+  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
+    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
+
+#define _mm512_inserti32x4(X, Y, C)                                     \
+  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
+    (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
+
+#define _mm512_insertf64x4(X, Y, C)                                     \
+  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
+    (__v4df)(__m256d) (Y), (int) (C),					\
+    (__v8df)(__m512d)_mm512_undefined_pd(),				\
+    (__mmask8)-1))
+
+#define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
+  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
+    (__v4df)(__m256d) (Y), (int) (C),					\
+    (__v8df)(__m512d)(W),						\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
+  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
+    (__v4df)(__m256d) (Y), (int) (C),					\
+    (__v8df)(__m512d)_mm512_setzero_pd(),				\
+    (__mmask8)(U)))
+
+#define _mm512_inserti64x4(X, Y, C)                                     \
+  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
+    (__v4di)(__m256i) (Y), (int) (C),					\
+    (__v8di)(__m512i)_mm512_undefined_si512 (),				\
+    (__mmask8)-1))
+
+#define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
+  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
+    (__v4di)(__m256i) (Y), (int) (C),\
+    (__v8di)(__m512i)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
+  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
+    (__v4di)(__m256i) (Y), (int) (C),					\
+    (__v8di)(__m512i)_mm512_setzero_si512 (),				\
+    (__mmask8)(U)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_pd (void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+						   (__v8df)
+						   _mm512_undefined_pd (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+						   (__v8df) __W,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_pd (void *__P, __m512d __A)
+{
+  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
+				   (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
+				   (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_ps (void const *__P)
+{
+  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+						  (__v16sf)
+						  _mm512_undefined_ps (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+						  (__v16sf) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+						  (__v16sf)
+						  _mm512_setzero_ps (),
+						  (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_ps (void *__P, __m512 __A)
+{
+  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
+				   (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
+				   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+						     (__v8di) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+						     (__v8di)
+						     _mm512_setzero_si512 (),
+						     (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+  __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
+				     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_si512 (void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+				     (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+				     (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutevar_pd (__m512d __A, __m512i __C)
+{
+  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+							(__v8di) __C,
+							(__v8df)
+							_mm512_undefined_pd (),
+							(__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
+{
+  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+							(__v8di) __C,
+							(__v8df) __W,
+							(__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
+{
+  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+							(__v8di) __C,
+							(__v8df)
+							_mm512_setzero_pd (),
+							(__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutevar_ps (__m512 __A, __m512i __C)
+{
+  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+						       (__v16si) __C,
+						       (__v16sf)
+						       _mm512_undefined_ps (),
+						       (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
+{
+  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+						       (__v16si) __C,
+						       (__v16sf) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
+{
+  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+						       (__v16si) __C,
+						       (__v16sf)
+						       _mm512_setzero_ps (),
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+						       /* idx */ ,
+						       (__v8di) __A,
+						       (__v8di) __B,
+						       (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
+				__m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+						       /* idx */ ,
+						       (__v8di) __A,
+						       (__v8di) __B,
+						       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
+				 __mmask8 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
+						       (__v8di) __I
+						       /* idx */ ,
+						       (__v8di) __B,
+						       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
+				 __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
+							/* idx */ ,
+							(__v8di) __A,
+							(__v8di) __B,
+							(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+						       /* idx */ ,
+						       (__v16si) __A,
+						       (__v16si) __B,
+						       (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
+				__m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+						       /* idx */ ,
+						       (__v16si) __A,
+						       (__v16si) __B,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
+				 __mmask16 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
+						       (__v16si) __I
+						       /* idx */ ,
+						       (__v16si) __B,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
+				 __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
+							/* idx */ ,
+							(__v16si) __A,
+							(__v16si) __B,
+							(__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+							/* idx */ ,
+							(__v8df) __A,
+							(__v8df) __B,
+							(__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
+			     __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+							/* idx */ ,
+							(__v8df) __A,
+							(__v8df) __B,
+							(__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
+			      __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
+							(__v8di) __I
+							/* idx */ ,
+							(__v8df) __B,
+							(__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
+			      __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
+							 /* idx */ ,
+							 (__v8df) __A,
+							 (__v8df) __B,
+							 (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+						       /* idx */ ,
+						       (__v16sf) __A,
+						       (__v16sf) __B,
+						       (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+						       /* idx */ ,
+						       (__v16sf) __A,
+						       (__v16sf) __B,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
+			      __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
+						       (__v16si) __I
+						       /* idx */ ,
+						       (__v16sf) __B,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
+			      __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
+							/* idx */ ,
+							(__v16sf) __A,
+							(__v16sf) __B,
+							(__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permute_pd (__m512d __X, const int __C)
+{
+  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+						     (__v8df)
+						     _mm512_undefined_pd (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
+{
+  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+						     (__v8df) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
+{
+  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permute_ps (__m512 __X, const int __C)
+{
+  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
+{
+  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+						    (__v16sf) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
+{
+  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U);
+}
+#else
+#define _mm512_permute_pd(X, C)							    \
+  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
+					      (__v8df)(__m512d)_mm512_undefined_pd(),\
+					      (__mmask8)(-1)))
+
+#define _mm512_mask_permute_pd(W, U, X, C)					    \
+  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
+					      (__v8df)(__m512d)(W),		    \
+					      (__mmask8)(U)))
+
+#define _mm512_maskz_permute_pd(U, X, C)					    \
+  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
+					      (__v8df)(__m512d)_mm512_setzero_pd(), \
+					      (__mmask8)(U)))
+
+#define _mm512_permute_ps(X, C)							    \
+  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
+					      (__v16sf)(__m512)_mm512_undefined_ps(),\
+					      (__mmask16)(-1)))
+
+#define _mm512_mask_permute_ps(W, U, X, C)					    \
+  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
+					      (__v16sf)(__m512)(W),		    \
+					      (__mmask16)(U)))
+
+#define _mm512_maskz_permute_ps(U, X, C)					    \
+  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
+					      (__v16sf)(__m512)_mm512_setzero_ps(), \
+					      (__mmask16)(U)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex_epi64 (__m512i __X, const int __I)
+{
+  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) (-1));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
+			    __m512i __X, const int __I)
+{
+  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
+						  (__v8di) __W,
+						  (__mmask8) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
+{
+  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex_pd (__m512d __X, const int __M)
+{
+  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+						  (__v8df)
+						  _mm512_undefined_pd (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
+{
+  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+						  (__v8df) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
+{
+  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+						  (__v8df)
+						  _mm512_setzero_pd (),
+						  (__mmask8) __U);
+}
+#else
+#define _mm512_permutex_pd(X, M)						\
+  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
+					    (__v8df)(__m512d)_mm512_undefined_pd(),\
+					    (__mmask8)-1))
+
+#define _mm512_mask_permutex_pd(W, U, X, M)					\
+  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
+					    (__v8df)(__m512d)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_permutex_pd(U, X, M)					\
+  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
+					    (__v8df)(__m512d)_mm512_setzero_pd(),\
+					    (__mmask8)(U)))
+
+#define _mm512_permutex_epi64(X, I)			          \
+  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+					    (int)(I),             \
+					    (__v8di)(__m512i)	  \
+					    (_mm512_undefined_si512 ()),\
+					    (__mmask8)(-1)))
+
+#define _mm512_maskz_permutex_epi64(M, X, I)                 \
+  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+					    (int)(I),             \
+					    (__v8di)(__m512i)     \
+					    (_mm512_setzero_si512 ()),\
+					    (__mmask8)(M)))
+
+#define _mm512_mask_permutex_epi64(W, M, X, I)               \
+  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+					    (int)(I),             \
+					    (__v8di)(__m512i)(W), \
+					    (__mmask8)(M)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+						     (__v8di) __X,
+						     (__v8di)
+						     _mm512_setzero_si512 (),
+						     __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+						     (__v8di) __X,
+						     (__v8di)
+						     _mm512_undefined_si512 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
+			       __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+						     (__v8di) __X,
+						     (__v8di) __W,
+						     __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+						     (__v16si) __X,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+						     (__v16si) __X,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
+			       __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+						     (__v16si) __X,
+						     (__v16si) __W,
+						     __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
+{
+  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+						     (__v8di) __X,
+						     (__v8df)
+						     _mm512_undefined_pd (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
+{
+  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+						     (__v8di) __X,
+						     (__v8df) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
+{
+  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+						     (__v8di) __X,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
+{
+  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+						    (__v16si) __X,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
+{
+  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+						    (__v16si) __X,
+						    (__v16sf) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
+{
+  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+						    (__v16si) __X,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
+{
+  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+						 (__v16sf) __V, __imm,
+						 (__v16sf)
+						 _mm512_undefined_ps (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
+			__m512 __V, const int __imm)
+{
+  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+						 (__v16sf) __V, __imm,
+						 (__v16sf) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
+{
+  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+						 (__v16sf) __V, __imm,
+						 (__v16sf)
+						 _mm512_setzero_ps (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
+{
+  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+						  (__v8df) __V, __imm,
+						  (__v8df)
+						  _mm512_undefined_pd (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
+			__m512d __V, const int __imm)
+{
+  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+						  (__v8df) __V, __imm,
+						  (__v8df) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
+			 const int __imm)
+{
+  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+						  (__v8df) __V, __imm,
+						  (__v8df)
+						  _mm512_setzero_pd (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
+			  const int __imm, const int __R)
+{
+  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+						      (__v8df) __B,
+						      (__v8di) __C,
+						      __imm,
+						      (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			       __m512i __C, const int __imm, const int __R)
+{
+  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+						      (__v8df) __B,
+						      (__v8di) __C,
+						      __imm,
+						      (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+				__m512i __C, const int __imm, const int __R)
+{
+  return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
+						       (__v8df) __B,
+						       (__v8di) __C,
+						       __imm,
+						       (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
+			  const int __imm, const int __R)
+{
+  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+						     (__v16sf) __B,
+						     (__v16si) __C,
+						     __imm,
+						     (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			       __m512i __C, const int __imm, const int __R)
+{
+  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+						     (__v16sf) __B,
+						     (__v16si) __C,
+						     __imm,
+						     (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+				__m512i __C, const int __imm, const int __R)
+{
+  return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
+						      (__v16sf) __B,
+						      (__v16si) __C,
+						      __imm,
+						      (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
+		       const int __imm, const int __R)
+{
+  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+						   (__v2df) __B,
+						   (__v2di) __C, __imm,
+						   (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
+			    __m128i __C, const int __imm, const int __R)
+{
+  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+						   (__v2df) __B,
+						   (__v2di) __C, __imm,
+						   (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+			     __m128i __C, const int __imm, const int __R)
+{
+  return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+						    (__v2df) __B,
+						    (__v2di) __C,
+						    __imm,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
+		       const int __imm, const int __R)
+{
+  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+						  (__v4sf) __B,
+						  (__v4si) __C, __imm,
+						  (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
+			    __m128i __C, const int __imm, const int __R)
+{
+  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+						  (__v4sf) __B,
+						  (__v4si) __C, __imm,
+						  (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+			     __m128i __C, const int __imm, const int __R)
+{
+  return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+						   (__v4sf) __B,
+						   (__v4si) __C, __imm,
+						   (__mmask8) __U, __R);
+}
+
+#else
+#define _mm512_shuffle_pd(X, Y, C)                                      \
+    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
+        (__v8df)(__m512d)(Y), (int)(C),\
+    (__v8df)(__m512d)_mm512_undefined_pd(),\
+    (__mmask8)-1))
+
+#define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
+    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
+        (__v8df)(__m512d)(Y), (int)(C),\
+    (__v8df)(__m512d)(W),\
+    (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
+    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
+        (__v8df)(__m512d)(Y), (int)(C),\
+    (__v8df)(__m512d)_mm512_setzero_pd(),\
+    (__mmask8)(U)))
+
+#define _mm512_shuffle_ps(X, Y, C)                                      \
+    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
+        (__v16sf)(__m512)(Y), (int)(C),\
+    (__v16sf)(__m512)_mm512_undefined_ps(),\
+    (__mmask16)-1))
+
+#define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
+    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
+        (__v16sf)(__m512)(Y), (int)(C),\
+    (__v16sf)(__m512)(W),\
+    (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
+    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
+        (__v16sf)(__m512)(Y), (int)(C),\
+    (__v16sf)(__m512)_mm512_setzero_ps(),\
+    (__mmask16)(U)))
+
+#define _mm512_fixupimm_round_pd(X, Y, Z, C, R)					\
+  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
+      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
+      (__mmask8)(-1), (R)))
+
+#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
+  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
+      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
+      (__mmask8)(U), (R)))
+
+#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
+  ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
+      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
+      (__mmask8)(U), (R)))
+
+#define _mm512_fixupimm_round_ps(X, Y, Z, C, R)					\
+  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
+    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
+    (__mmask16)(-1), (R)))
+
+#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
+  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
+    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
+    (__mmask16)(U), (R)))
+
+#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
+  ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
+    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
+    (__mmask16)(U), (R)))
+
+#define _mm_fixupimm_round_sd(X, Y, Z, C, R)					\
+    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
+      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)				\
+    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
+      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)				\
+    ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
+      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), (R)))
+
+#define _mm_fixupimm_round_ss(X, Y, Z, C, R)					\
+    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
+      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)				\
+    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
+      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)				\
+    ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
+      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), (R)))
+#endif
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movehdup_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_moveldup_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_si512 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+						(__v16si) __B,
+						(__v16si)
+						_mm512_undefined_si512 (),
+						(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+						(__v16si) __B,
+						(__v16si)
+						_mm512_undefined_si512 (),
+						(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+						(__v16si) __B,
+						(__v16si) __W,
+						(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+						(__v16si) __B,
+						(__v16si)
+						_mm512_setzero_si512 (),
+						(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+						(__v8di) __B,
+						(__v8di)
+						_mm512_undefined_si512 (),
+						(__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+						(__v8di) __B,
+						(__v8di) __W,
+						(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+						(__v8di) __B,
+						(__v8di)
+						_mm512_setzero_si512 (),
+						(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_si512 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rol_epi32 (__m512i __A, const int __B)
+{
+  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
+{
+  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
+{
+  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ror_epi32 (__m512i __A, int __B)
+{
+  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
+{
+  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
+{
+  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rol_epi64 (__m512i __A, const int __B)
+{
+  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
+{
+  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
+{
+  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ror_epi64 (__m512i __A, int __B)
+{
+  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
+{
+  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+						 (__v8di) __W,
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
+{
+  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+#else
+#define _mm512_rol_epi32(A, B)						  \
+    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v16si)_mm512_undefined_si512 (), \
+					    (__mmask16)(-1)))
+#define _mm512_mask_rol_epi32(W, U, A, B)				  \
+    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v16si)(__m512i)(W),	  \
+					    (__mmask16)(U)))
+#define _mm512_maskz_rol_epi32(U, A, B)					  \
+    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v16si)_mm512_setzero_si512 (), \
+					    (__mmask16)(U)))
+#define _mm512_ror_epi32(A, B)						  \
+    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v16si)_mm512_undefined_si512 (), \
+					    (__mmask16)(-1)))
+#define _mm512_mask_ror_epi32(W, U, A, B)				  \
+    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v16si)(__m512i)(W),	  \
+					    (__mmask16)(U)))
+#define _mm512_maskz_ror_epi32(U, A, B)					  \
+    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v16si)_mm512_setzero_si512 (), \
+					    (__mmask16)(U)))
+#define _mm512_rol_epi64(A, B)						  \
+    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v8di)_mm512_undefined_si512 (),  \
+					    (__mmask8)(-1)))
+#define _mm512_mask_rol_epi64(W, U, A, B)				  \
+    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v8di)(__m512i)(W),	  \
+					    (__mmask8)(U)))
+#define _mm512_maskz_rol_epi64(U, A, B)					  \
+    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v8di)_mm512_setzero_si512 (),  \
+					    (__mmask8)(U)))
+
+#define _mm512_ror_epi64(A, B)						  \
+    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v8di)_mm512_undefined_si512 (),  \
+					    (__mmask8)(-1)))
+#define _mm512_mask_ror_epi64(W, U, A, B)				  \
+    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v8di)(__m512i)(W),	  \
+					    (__mmask8)(U)))
+#define _mm512_maskz_ror_epi64(U, A, B)					  \
+    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
+					    (int)(B),			  \
+					    (__v8di)_mm512_setzero_si512 (),  \
+					    (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_si512 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si) __W,
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+						 (__v16si) __B,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_undefined_si512 (),
+						 (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di) __W, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+						 (__v8di) __B,
+						 (__v8di)
+						 _mm512_setzero_pd (),
+						 __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_si512 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_pd (),
+						  __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi32_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+						(__v16si) __B,
+						(__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+						(__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi64_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
+					       (__v8di) __B,
+					       (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+						 (__v16si) __B,
+						 (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+						 (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+						(__v8di) __B,
+						(__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+						(__v8di) __B, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+			    __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di)
+						      _mm512_undefined_si512 (),
+						      (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di)
+						      _mm512_setzero_si512 (),
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+			    __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di)
+						      _mm512_undefined_si512 (),
+						      (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+						      (__v8di) __B,
+						      (__v8di)
+						      _mm512_setzero_si512 (),
+						      (__mmask8) __U);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u64 (__m128 __A, const int __R)
+{
+  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si64 (__m128 __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i64 (__m128 __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
+{
+  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u64(A, B)   \
+    ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
+
+#define _mm_cvt_roundss_si64(A, B)   \
+    ((long long)__builtin_ia32_vcvtss2si64(A, B))
+
+#define _mm_cvt_roundss_i64(A, B)   \
+    ((long long)__builtin_ia32_vcvtss2si64(A, B))
+
+#define _mm_cvtt_roundss_u64(A, B)  \
+    ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
+
+#define _mm_cvtt_roundss_i64(A, B)  \
+    ((long long)__builtin_ia32_vcvttss2si64(A, B))
+
+#define _mm_cvtt_roundss_si64(A, B)  \
+    ((long long)__builtin_ia32_vcvttss2si64(A, B))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u32 (__m128 __A, const int __R)
+{
+  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si32 (__m128 __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i32 (__m128 __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
+{
+  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u32(A, B)   \
+    ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
+
+#define _mm_cvt_roundss_si32(A, B)   \
+    ((int)__builtin_ia32_vcvtss2si32(A, B))
+
+#define _mm_cvt_roundss_i32(A, B)   \
+    ((int)__builtin_ia32_vcvtss2si32(A, B))
+
+#define _mm_cvtt_roundss_u32(A, B)  \
+    ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
+
+#define _mm_cvtt_roundss_si32(A, B)  \
+    ((int)__builtin_ia32_vcvttss2si32(A, B))
+
+#define _mm_cvtt_roundss_i32(A, B)  \
+    ((int)__builtin_ia32_vcvttss2si32(A, B))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
+{
+  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
+{
+  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u64(A, B)   \
+    ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
+
+#define _mm_cvt_roundsd_si64(A, B)   \
+    ((long long)__builtin_ia32_vcvtsd2si64(A, B))
+
+#define _mm_cvt_roundsd_i64(A, B)   \
+    ((long long)__builtin_ia32_vcvtsd2si64(A, B))
+
+#define _mm_cvtt_roundsd_u64(A, B)   \
+    ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
+
+#define _mm_cvtt_roundsd_si64(A, B)   \
+    ((long long)__builtin_ia32_vcvttsd2si64(A, B))
+
+#define _mm_cvtt_roundsd_i64(A, B)   \
+    ((long long)__builtin_ia32_vcvttsd2si64(A, B))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
+{
+  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
+{
+  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u32(A, B)   \
+    ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
+
+#define _mm_cvt_roundsd_si32(A, B)   \
+    ((int)__builtin_ia32_vcvtsd2si32(A, B))
+
+#define _mm_cvt_roundsd_i32(A, B)   \
+    ((int)__builtin_ia32_vcvtsd2si32(A, B))
+
+#define _mm_cvtt_roundsd_u32(A, B)   \
+    ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
+
+#define _mm_cvtt_roundsd_si32(A, B)   \
+    ((int)__builtin_ia32_vcvttsd2si32(A, B))
+
+#define _mm_cvtt_roundsd_i32(A, B)   \
+    ((int)__builtin_ia32_vcvttsd2si32(A, B))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movedup_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+						   (__v8df)
+						   _mm512_undefined_pd (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+						   (__v8df) __W,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __W,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_pd (__m256 __A, const int __R)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
+			    const int __R)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+						    (__v8df) __W,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_ps (__m256i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
+			    const int __R)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+						    (__v16sf) __W,
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_ph (__m512 __A, const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+						     __I,
+						     (__v16hi)
+						     _mm256_undefined_si256 (),
+						     -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_ph (__m512 __A, const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+						     __I,
+						     (__v16hi)
+						     _mm256_undefined_si256 (),
+						     -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
+			    const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+						     __I,
+						     (__v16hi) __U,
+						     (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+						     __I,
+						     (__v16hi) __U,
+						     (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+						     __I,
+						     (__v16hi)
+						     _mm256_setzero_si256 (),
+						     (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+						     __I,
+						     (__v16hi)
+						     _mm256_setzero_si256 (),
+						     (__mmask16) __W);
+}
+#else
+#define _mm512_cvt_roundps_pd(A, B)		 \
+    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
+
+#define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
+    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
+
+#define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
+    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
+
+#define _mm512_cvt_roundph_ps(A, B)		 \
+    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
+    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
+
+#define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
+    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
+
+#define _mm512_cvt_roundps_ph(A, I)						 \
+  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+    (__v16hi)_mm256_undefined_si256 (), -1))
+#define _mm512_cvtps_ph(A, I)						 \
+  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+    (__v16hi)_mm256_undefined_si256 (), -1))
+#define _mm512_mask_cvt_roundps_ph(U, W, A, I)				 \
+  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+    (__v16hi)(__m256i)(U), (__mmask16) (W)))
+#define _mm512_mask_cvtps_ph(U, W, A, I)				 \
+  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+    (__v16hi)(__m256i)(U), (__mmask16) (W)))
+#define _mm512_maskz_cvt_roundps_ph(W, A, I)					 \
+  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+    (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
+#define _mm512_maskz_cvtps_ph(W, A, I)					 \
+  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+    (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+						   (__v8sf)
+						   _mm256_undefined_ps (),
+						   (__mmask8) -1, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
+			    const int __R)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+						   (__v8sf) __W,
+						   (__mmask8) __U, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+						   (__v8sf)
+						   _mm256_setzero_ps (),
+						   (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
+						 (__v2df) __B,
+						 __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
+						  (__v4sf) __B,
+						  __R);
+}
+#else
+#define _mm512_cvt_roundpd_ps(A, B)		 \
+    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
+    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
+
+#define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
+    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
+
+#define _mm_cvt_roundsd_ss(A, B, C)		 \
+    (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
+
+#define _mm_cvt_roundss_sd(A, B, C)		 \
+    (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
+#endif
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_si512 (__m512i * __P, __m512i __A)
+{
+  __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_ps (float *__P, __m512 __A)
+{
+  __builtin_ia32_movntps512 (__P, (__v16sf) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_pd (double *__P, __m512d __A)
+{
+  __builtin_ia32_movntpd512 (__P, (__v8df) __A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_load_si512 (void *__P)
+{
+  return __builtin_ia32_movntdqa512 ((__v8di *)__P);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+						    (__v4sf) __B,
+						    __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+						     (__v2df) __B,
+						     __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_ps (__m512 __A, const int __R)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			     const int __R)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_pd (__m512d __A, const int __R)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			     const int __R)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+						    (__v8df) __W,
+						    (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U, __R);
+}
+
+/* Constants for mantissa extraction */
+typedef enum
+{
+  _MM_MANT_NORM_1_2,		/* interval [1, 2)      */
+  _MM_MANT_NORM_p5_2,		/* interval [0.5, 2)    */
+  _MM_MANT_NORM_p5_1,		/* interval [0.5, 1)    */
+  _MM_MANT_NORM_p75_1p5		/* interval [0.75, 1.5) */
+} _MM_MANTISSA_NORM_ENUM;
+
+typedef enum
+{
+  _MM_MANT_SIGN_src,		/* sign = sign(SRC)     */
+  _MM_MANT_SIGN_zero,		/* sign = 0             */
+  _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
+} _MM_MANTISSA_SIGN_ENUM;
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
+			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+						     (__C << 2) | __B,
+						     _mm512_undefined_pd (),
+						     (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			      _MM_MANTISSA_NORM_ENUM __B,
+			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+						     (__C << 2) | __B,
+						     (__v8df) __W, __U,
+						     __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
+			       _MM_MANTISSA_NORM_ENUM __B,
+			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+						     (__C << 2) | __B,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
+			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+						    (__C << 2) | __B,
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			      _MM_MANTISSA_NORM_ENUM __B,
+			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+						    (__C << 2) | __B,
+						    (__v16sf) __W, __U,
+						    __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
+			       _MM_MANTISSA_NORM_ENUM __B,
+			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+						    (__C << 2) | __B,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_sd (__m128d __A, __m128d __B,
+		      _MM_MANTISSA_NORM_ENUM __C,
+		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+  return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+						  (__v2df) __B,
+						  (__D << 2) | __C,
+						   __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_ss (__m128 __A, __m128 __B,
+		      _MM_MANTISSA_NORM_ENUM __C,
+		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+  return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+						  (__v4sf) __B,
+						  (__D << 2) | __C,
+						  __R);
+}
+
+#else
+#define _mm512_getmant_round_pd(X, B, C, R)                                                  \
+  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
+                                              (int)(((C)<<2) | (B)),                \
+                                              (__v8df)(__m512d)_mm512_undefined_pd(), \
+                                              (__mmask8)-1,\
+					      (R)))
+
+#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
+  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
+                                              (int)(((C)<<2) | (B)),                \
+                                              (__v8df)(__m512d)(W),                 \
+                                              (__mmask8)(U),\
+					      (R)))
+
+#define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
+  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
+                                              (int)(((C)<<2) | (B)),                \
+                                              (__v8df)(__m512d)_mm512_setzero_pd(), \
+                                              (__mmask8)(U),\
+					      (R)))
+#define _mm512_getmant_round_ps(X, B, C, R)                                                  \
+  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
+                                             (int)(((C)<<2) | (B)),                 \
+                                             (__v16sf)(__m512)_mm512_undefined_ps(), \
+                                             (__mmask16)-1,\
+					     (R)))
+
+#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
+  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
+                                             (int)(((C)<<2) | (B)),                 \
+                                             (__v16sf)(__m512)(W),                  \
+                                             (__mmask16)(U),\
+					     (R)))
+
+#define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
+  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
+                                             (int)(((C)<<2) | (B)),                 \
+                                             (__v16sf)(__m512)_mm512_setzero_ps(),  \
+                                             (__mmask16)(U),\
+					     (R)))
+#define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
+  ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
+					    (__v2df)(__m128d)(Y),	\
+					    (int)(((D)<<2) | (C)),	\
+					    (R)))
+
+#define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
+  ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
+					   (__v4sf)(__m128)(Y),		\
+					   (int)(((D)<<2) | (C)),	\
+					   (R)))
+
+#define _mm_getexp_round_ss(A, B, R)						      \
+  ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
+
+#define _mm_getexp_round_sd(A, B, R)						       \
+  ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
+
+#define _mm512_getexp_round_ps(A, R)						\
+  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
+  (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
+
+#define _mm512_mask_getexp_round_ps(W, U, A, R)					\
+  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
+  (__v16sf)(__m512)(W), (__mmask16)(U), R))
+
+#define _mm512_maskz_getexp_round_ps(U, A, R)					\
+  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
+  (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
+
+#define _mm512_getexp_round_pd(A, R)						\
+  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
+  (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
+
+#define _mm512_mask_getexp_round_pd(W, U, A, R)					\
+  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
+  (__v8df)(__m512d)(W), (__mmask8)(U), R))
+
+#define _mm512_maskz_getexp_round_pd(U, A, R)					\
+  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
+  (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+						  (__v16sf)
+						  _mm512_undefined_ps (),
+						  -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
+				 const int __imm, const int __R)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
+						  (__v16sf) __A,
+						  (__mmask16) __B, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
+				  const int __imm, const int __R)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
+						  __imm,
+						  (__v16sf)
+						  _mm512_setzero_ps (),
+						  (__mmask16) __A, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+						   (__v8df)
+						   _mm512_undefined_pd (),
+						   -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
+				 __m512d __C, const int __imm, const int __R)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
+						   (__v8df) __A,
+						   (__mmask8) __B, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
+				  const int __imm, const int __R)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
+						   __imm,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __A, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
+{
+  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
+						   (__v4sf) __B, __imm, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
+			 const int __R)
+{
+  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
+						    (__v2df) __B, __imm, __R);
+}
+
+#else
+#define _mm512_roundscale_round_ps(A, B, R) \
+  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
+    (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
+#define _mm512_mask_roundscale_round_ps(A, B, C, D, R)				\
+  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
+					    (int)(D),			\
+					    (__v16sf)(__m512)(A),	\
+					    (__mmask16)(B), R))
+#define _mm512_maskz_roundscale_round_ps(A, B, C, R)				\
+  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
+					    (int)(C),			\
+					    (__v16sf)_mm512_setzero_ps(),\
+					    (__mmask16)(A), R))
+#define _mm512_roundscale_round_pd(A, B, R) \
+  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
+    (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
+#define _mm512_mask_roundscale_round_pd(A, B, C, D, R)				\
+  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
+					     (int)(D),			\
+					     (__v8df)(__m512d)(A),	\
+					     (__mmask8)(B), R))
+#define _mm512_maskz_roundscale_round_pd(A, B, C, R)				\
+  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
+					     (int)(C),			\
+					     (__v8df)_mm512_setzero_pd(),\
+					     (__mmask8)(A), R))
+#define _mm_roundscale_round_ss(A, B, C, R)					\
+  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
+    (__v4sf)(__m128)(B), (int)(C), R))
+#define _mm_roundscale_round_sd(A, B, C, R)					\
+  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
+    (__v2df)(__m128d)(B), (int)(C), R))
+#endif
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+						  _MM_FROUND_FLOOR,
+						  (__v16sf) __A, -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+						   _MM_FROUND_FLOOR,
+						   (__v8df) __A, -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+						  _MM_FROUND_CEIL,
+						  (__v16sf) __A, -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+						   _MM_FROUND_CEIL,
+						   (__v8df) __A, -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+						  _MM_FROUND_FLOOR,
+						  (__v16sf) __W, __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+						   _MM_FROUND_FLOOR,
+						   (__v8df) __W, __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+						  _MM_FROUND_CEIL,
+						  (__v16sf) __W, __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+						   _MM_FROUND_CEIL,
+						   (__v8df) __W, __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+						  (__v16si) __B, __imm,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+			  __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+						  (__v16si) __B, __imm,
+						  (__v16si) __W,
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
+			   const int __imm)
+{
+  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+						  (__v16si) __B, __imm,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+						  (__v8di) __B, __imm,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+			  __m512i __B, const int __imm)
+{
+  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+						  (__v8di) __B, __imm,
+						  (__v8di) __W,
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
+			   const int __imm)
+{
+  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+						  (__v8di) __B, __imm,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+#else
+#define _mm512_alignr_epi32(X, Y, C)                                        \
+    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
+        (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
+        (__mmask16)-1))
+
+#define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
+    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
+        (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
+        (__mmask16)(U)))
+
+#define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
+    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
+        (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
+        (__mmask16)(U)))
+
+#define _mm512_alignr_epi64(X, Y, C)                                        \
+    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
+        (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (),  \
+	(__mmask8)-1))
+
+#define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
+    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
+        (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
+    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
+        (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
+        (__mmask8)(U)))
+#endif
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
+						     (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
+						    (__v8di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
+						    (__v8di) __B,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
+						     (__v16si) __B,
+						     (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
+						     (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
+						    (__v8di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
+						    (__v8di) __B,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 5,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 5,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 5,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 5,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 2,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 2,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 2,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 2,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 1,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 1,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 1,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 1,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 4,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+						    (__v16si) __Y, 4,
+						    (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 4,
+						    (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
+{
+  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+						    (__v8di) __Y, 4,
+						    (__mmask8) -1);
+}
+
+#define _MM_CMPINT_EQ	    0x0
+#define _MM_CMPINT_LT	    0x1
+#define _MM_CMPINT_LE	    0x2
+#define _MM_CMPINT_UNUSED   0x3
+#define _MM_CMPINT_NE	    0x4
+#define _MM_CMPINT_NLT	    0x5
+#define _MM_CMPINT_GE	    0x5
+#define _MM_CMPINT_NLE	    0x6
+#define _MM_CMPINT_GT	    0x6
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+						 (__v8di) __Y, __P,
+						 (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
+{
+  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+						  (__v16si) __Y, __P,
+						  (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+						  (__v8di) __Y, __P,
+						  (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
+{
+  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+						   (__v16si) __Y, __P,
+						   (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
+			  const int __R)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, __P,
+						  (__mmask8) -1, __R);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, __P,
+						   (__mmask16) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
+			    const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+						 (__v8di) __Y, __P,
+						 (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
+			    const int __P)
+{
+  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+						  (__v16si) __Y, __P,
+						  (__mmask16) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
+			    const int __P)
+{
+  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+						  (__v8di) __Y, __P,
+						  (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
+			    const int __P)
+{
+  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+						   (__v16si) __Y, __P,
+						   (__mmask16) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
+			       const int __P, const int __R)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, __P,
+						  (__mmask8) __U, __R);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
+			       const int __P, const int __R)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, __P,
+						   (__mmask16) __U, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
+{
+  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+					       (__v2df) __Y, __P,
+					       (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
+			    const int __P, const int __R)
+{
+  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+					       (__v2df) __Y, __P,
+					       (__mmask8) __M, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
+{
+  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+					       (__v4sf) __Y, __P,
+					       (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
+			    const int __P, const int __R)
+{
+  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+					       (__v4sf) __Y, __P,
+					       (__mmask8) __M, __R);
+}
+
+#else
+#define _mm512_cmp_epi64_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
+					   (__v8di)(__m512i)(Y), (int)(P),\
+					   (__mmask8)-1))
+
+#define _mm512_cmp_epi32_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
+					   (__v16si)(__m512i)(Y), (int)(P),\
+					   (__mmask16)-1))
+
+#define _mm512_cmp_epu64_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
+					    (__v8di)(__m512i)(Y), (int)(P),\
+					    (__mmask8)-1))
+
+#define _mm512_cmp_epu32_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
+					    (__v16si)(__m512i)(Y), (int)(P),\
+					    (__mmask16)-1))
+
+#define _mm512_cmp_round_pd_mask(X, Y, P, R)					\
+  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
+					    (__v8df)(__m512d)(Y), (int)(P),\
+					    (__mmask8)-1, R))
+
+#define _mm512_cmp_round_ps_mask(X, Y, P, R)					\
+  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
+					     (__v16sf)(__m512)(Y), (int)(P),\
+					     (__mmask16)-1, R))
+
+#define _mm512_mask_cmp_epi64_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
+					   (__v8di)(__m512i)(Y), (int)(P),\
+					   (__mmask8)M))
+
+#define _mm512_mask_cmp_epi32_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
+					   (__v16si)(__m512i)(Y), (int)(P),\
+					   (__mmask16)M))
+
+#define _mm512_mask_cmp_epu64_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
+					    (__v8di)(__m512i)(Y), (int)(P),\
+					    (__mmask8)M))
+
+#define _mm512_mask_cmp_epu32_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
+					    (__v16si)(__m512i)(Y), (int)(P),\
+					    (__mmask16)M))
+
+#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)					\
+  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
+					    (__v8df)(__m512d)(Y), (int)(P),\
+					    (__mmask8)M, R))
+
+#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)					\
+  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
+					     (__v16sf)(__m512)(Y), (int)(P),\
+					     (__mmask16)M, R))
+
+#define _mm_cmp_round_sd_mask(X, Y, P, R)					\
+  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
+					 (__v2df)(__m128d)(Y), (int)(P),\
+					 (__mmask8)-1, R))
+
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)					\
+  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
+					 (__v2df)(__m128d)(Y), (int)(P),\
+					 (M), R))
+
+#define _mm_cmp_round_ss_mask(X, Y, P, R)					\
+  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
+					 (__v4sf)(__m128)(Y), (int)(P), \
+					 (__mmask8)-1, R))
+
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)					\
+  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
+					 (__v4sf)(__m128)(Y), (int)(P), \
+					 (M), R))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
+{
+  __m512 v1_old = _mm512_undefined_ps ();
+  __mmask16 mask = 0xFFFF;
+
+  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+						__addr,
+						(__v16si) __index,
+						mask, __scale);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
+			  __m512i __index, float const *__addr, int __scale)
+{
+  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+						__addr,
+						(__v16si) __index,
+						__mask, __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
+{
+  __m512d v1_old = _mm512_undefined_pd ();
+  __mmask8 mask = 0xFF;
+
+  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
+						__addr,
+						(__v8si) __index, mask,
+						__scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
+			  __m256i __index, double const *__addr, int __scale)
+{
+  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
+						__addr,
+						(__v8si) __index,
+						__mask, __scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
+{
+  __m256 v1_old = _mm256_undefined_ps ();
+  __mmask8 mask = 0xFF;
+
+  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
+						__addr,
+						(__v8di) __index, mask,
+						__scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
+			  __m512i __index, float const *__addr, int __scale)
+{
+  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
+						__addr,
+						(__v8di) __index,
+						__mask, __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
+{
+  __m512d v1_old = _mm512_undefined_pd ();
+  __mmask8 mask = 0xFF;
+
+  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
+						__addr,
+						(__v8di) __index, mask,
+						__scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
+			  __m512i __index, double const *__addr, int __scale)
+{
+  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
+						__addr,
+						(__v8di) __index,
+						__mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
+{
+  __m512i v1_old = _mm512_undefined_si512 ();
+  __mmask16 mask = 0xFFFF;
+
+  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
+						 __addr,
+						 (__v16si) __index,
+						 mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
+			     __m512i __index, int const *__addr, int __scale)
+{
+  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
+						 __addr,
+						 (__v16si) __index,
+						 __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
+{
+  __m512i v1_old = _mm512_undefined_si512 ();
+  __mmask8 mask = 0xFF;
+
+  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
+						__addr,
+						(__v8si) __index, mask,
+						__scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
+			     __m256i __index, long long const *__addr,
+			     int __scale)
+{
+  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
+						__addr,
+						(__v8si) __index,
+						__mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
+{
+  __m256i v1_old = _mm256_undefined_si256 ();
+  __mmask8 mask = 0xFF;
+
+  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
+						 __addr,
+						 (__v8di) __index,
+						 mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
+			     __m512i __index, int const *__addr, int __scale)
+{
+  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
+						 __addr,
+						 (__v8di) __index,
+						 __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
+{
+  __m512i v1_old = _mm512_undefined_si512 ();
+  __mmask8 mask = 0xFF;
+
+  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
+						__addr,
+						(__v8di) __index, mask,
+						__scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
+			     __m512i __index, long long const *__addr,
+			     int __scale)
+{
+  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
+						__addr,
+						(__v8di) __index,
+						__mask, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
+{
+  __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
+				 (__v16si) __index, (__v16sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
+			   __m512i __index, __m512 __v1, int __scale)
+{
+  __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
+				 (__v16sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+				(__v8si) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+			   __m256i __index, __m512d __v1, int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
+				(__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
+				 (__v8di) __index, (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+			   __m512i __index, __m256 __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
+				 (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
+				(__v8di) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+			   __m512i __index, __m512d __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
+				(__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
+			 __m512i __v1, int __scale)
+{
+  __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
+				 (__v16si) __index, (__v16si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
+			      __m512i __index, __m512i __v1, int __scale)
+{
+  __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
+				 (__v16si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
+			 __m512i __v1, int __scale)
+{
+  __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
+				(__v8si) __index, (__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+			      __m256i __index, __m512i __v1, int __scale)
+{
+  __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
+				(__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
+			 __m256i __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
+				 (__v8di) __index, (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+			      __m512i __index, __m256i __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
+				 (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
+			 __m512i __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
+				(__v8di) __index, (__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+			      __m512i __index, __m512i __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
+				(__v8di) __v1, __scale);
+}
+#else
+#define _mm512_i32gather_ps(INDEX, ADDR, SCALE)				\
+  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
+					 (float const *)ADDR,		\
+					 (__v16si)(__m512i)INDEX,	\
+					 (__mmask16)0xFFFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,	\
+					 (float const *)ADDR,		\
+					 (__v16si)(__m512i)INDEX,	\
+					 (__mmask16)MASK, (int)SCALE)
+
+#define _mm512_i32gather_pd(INDEX, ADDR, SCALE)				\
+  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(),	\
+					 (double const *)ADDR,		\
+					 (__v8si)(__m256i)INDEX,	\
+					 (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,	\
+					 (double const *)ADDR,		\
+					 (__v8si)(__m256i)INDEX,	\
+					 (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_ps(INDEX, ADDR, SCALE)				\
+  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(),	\
+					 (float const *)ADDR,		\
+					 (__v8di)(__m512i)INDEX,	\
+					 (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,		\
+					 (float const *)ADDR,		\
+					 (__v8di)(__m512i)INDEX,	\
+					 (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_pd(INDEX, ADDR, SCALE)				\
+  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(),	\
+					 (double const *)ADDR,		\
+					 (__v8di)(__m512i)INDEX,	\
+					 (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,	\
+					 (double const *)ADDR,		\
+					 (__v8di)(__m512i)INDEX,	\
+					 (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)			\
+  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (),	\
+					  (int const *)ADDR,		\
+					  (__v16si)(__m512i)INDEX,	\
+					  (__mmask16)0xFFFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,	\
+					  (int const *)ADDR,		\
+					  (__v16si)(__m512i)INDEX,	\
+					  (__mmask16)MASK, (int)SCALE)
+
+#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)			\
+  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (),	\
+					 (long long const *)ADDR,	\
+					 (__v8si)(__m256i)INDEX,	\
+					 (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,	\
+					 (long long const *)ADDR,	\
+					 (__v8si)(__m256i)INDEX,	\
+					 (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)			  \
+  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
+					  (int const *)ADDR,		  \
+					  (__v8di)(__m512i)INDEX,	  \
+					  (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,	\
+					  (int const *)ADDR,		\
+					  (__v8di)(__m512i)INDEX,	\
+					  (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)			\
+  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (),	\
+					 (long long const *)ADDR,	\
+					 (__v8di)(__m512i)INDEX,	\
+					 (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
+  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,	\
+					 (long long const *)ADDR,	\
+					 (__v8di)(__m512i)INDEX,	\
+					 (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF,	\
+				 (__v16si)(__m512i)INDEX,		\
+				 (__v16sf)(__m512)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
+  __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK,		\
+				 (__v16si)(__m512i)INDEX,		\
+				 (__v16sf)(__m512)V1, (int)SCALE)
+
+#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF,		\
+				(__v8si)(__m256i)INDEX,			\
+				(__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
+  __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK,		\
+				(__v8si)(__m256i)INDEX,			\
+				(__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF,		\
+				 (__v8di)(__m512i)INDEX,		\
+				 (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
+  __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK,		\
+				 (__v8di)(__m512i)INDEX,		\
+				 (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF,		\
+				(__v8di)(__m512i)INDEX,			\
+				(__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
+  __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK,		\
+				(__v8di)(__m512i)INDEX,			\
+				(__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF,	\
+				 (__v16si)(__m512i)INDEX,		\
+				 (__v16si)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
+  __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK,		\
+				 (__v16si)(__m512i)INDEX,		\
+				 (__v16si)(__m512i)V1, (int)SCALE)
+
+#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
+				(__v8si)(__m256i)INDEX,			\
+				(__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
+  __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK,	\
+				(__v8si)(__m256i)INDEX,			\
+				(__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF,		\
+				 (__v8di)(__m512i)INDEX,		\
+				 (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
+  __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK,		\
+				 (__v8di)(__m512i)INDEX,		\
+				 (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
+  __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
+				(__v8di)(__m512i)INDEX,			\
+				(__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
+  __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK,	\
+				(__v8di)(__m512i)INDEX,			\
+				(__v8di)(__m512i)V1, (int)SCALE)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+						      (__v8df) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+						      (__v8df)
+						      _mm512_setzero_pd (),
+						      (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
+					  (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+						     (__v16sf) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+						     (__v16sf)
+						     _mm512_setzero_ps (),
+						     (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+						      (__v8di)
+						      _mm512_setzero_si512 (),
+						      (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
+					  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+						      (__v16si)
+						      _mm512_setzero_si512 (),
+						      (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
+						    (__v8df) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
+							(__v8df) __W,
+							(__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
+							 (__v8df)
+							 _mm512_setzero_pd (),
+							 (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
+						       (__v16sf) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
+							(__v16sf)
+							_mm512_setzero_ps (),
+							(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
+						     (__v8di)
+						     _mm512_setzero_si512 (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
+							(__v8di) __W,
+							(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m512i)
+	 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
+					       (__v8di)
+					       _mm512_setzero_si512 (),
+					       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
+							(__v16si) __W,
+							(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
+							 (__v16si)
+							 _mm512_setzero_si512
+							 (), (__mmask16) __U);
+}
+
+/* Mask arithmetic operations */
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kand (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kandn (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kor (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestz (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
+						(__mmask16) __B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestc (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
+						(__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kxnor (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kxor (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_knot (__mmask16 __A)
+{
+  return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
+			  const int __imm)
+{
+  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
+						    (__v4si) __D,
+						    __imm,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    __B);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
+			  const int __imm)
+{
+  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
+						   (__v4sf) __D,
+						   __imm,
+						   (__v16sf)
+						   _mm512_setzero_ps (), __B);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
+			 __m128i __D, const int __imm)
+{
+  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
+						    (__v4si) __D,
+						    __imm,
+						    (__v16si) __A,
+						    __B);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
+			 __m128 __D, const int __imm)
+{
+  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
+						   (__v4sf) __D,
+						   __imm,
+						   (__v16sf) __A, __B);
+}
+#else
+#define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
+  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
+    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
+    (__mmask8)(A)))
+
+#define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
+  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
+    (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
+    (__mmask8)(A)))
+
+#define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
+  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
+    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
+					     (__mmask8)(B)))
+
+#define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
+  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
+    (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
+					      (__mmask8)(B)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_undefined_si512 (),
+						  (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+						  (__v8di) __B,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_undefined_si512 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si)
+						  _mm512_setzero_si512 (),
+						  __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+						  (__v16si) __B,
+						  (__v16si) __W, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __W,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+					       (__v2df) __B,
+					       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+					      (__v4sf) __B,
+					      __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+					       (__v2df) __B,
+					       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+					      (__v4sf) __B,
+					      __R);
+}
+
+#else
+#define _mm_max_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_max_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_min_round_sd(A, B, C)            \
+    (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_min_round_ss(A, B, C)            \
+    (__m128)__builtin_ia32_subss_round(A, B, C)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
+{
+  return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
+						     (__v8df) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
+{
+  return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
+						    (__v16sf) __W,
+						    (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
+{
+  return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
+						    (__v8di) __W,
+						    (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
+{
+  return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+						   (__v2df) __A,
+						   (__v2df) __B,
+						   __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+						  (__v4sf) __A,
+						  (__v4sf) __B,
+						  __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+						   (__v2df) __A,
+						   -(__v2df) __B,
+						   __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+						  (__v4sf) __A,
+						  -(__v4sf) __B,
+						  __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+						   -(__v2df) __A,
+						   (__v2df) __B,
+						   __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+						  -(__v4sf) __A,
+						  (__v4sf) __B,
+						  __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+						   -(__v2df) __A,
+						   -(__v2df) __B,
+						   __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+						  -(__v4sf) __A,
+						  -(__v4sf) __B,
+						  __R);
+}
+#else
+#define _mm_fmadd_round_sd(A, B, C, R)            \
+    (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
+
+#define _mm_fmadd_round_ss(A, B, C, R)            \
+    (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
+
+#define _mm_fmsub_round_sd(A, B, C, R)            \
+    (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
+
+#define _mm_fmsub_round_ss(A, B, C, R)            \
+    (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
+
+#define _mm_fnmadd_round_sd(A, B, C, R)            \
+    (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
+
+#define _mm_fnmadd_round_ss(A, B, C, R)            \
+   (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
+
+#define _mm_fnmsub_round_sd(A, B, C, R)            \
+    (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
+
+#define _mm_fnmsub_round_ss(A, B, C, R)            \
+    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
+{
+  return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
+{
+  return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
+}
+#else
+#define _mm_comi_round_ss(A, B, C, D)\
+__builtin_ia32_vcomiss(A, B, C, D)
+#define _mm_comi_round_sd(A, B, C, D)\
+__builtin_ia32_vcomisd(A, B, C, D)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+						  (__v8df)
+						  _mm512_undefined_pd (),
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+						  (__v8df) __W,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+						  (__v8df)
+						  _mm512_setzero_pd (),
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+						 (__v16sf)
+						 _mm512_undefined_ps (),
+						 (__mmask16) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+						 (__v16sf) __W,
+						 (__mmask16) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+						 (__v16sf)
+						 _mm512_setzero_ps (),
+						 (__mmask16) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pd (__m512d __M, __m512d __V)
+{
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+						 (__v8df) __V,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
+{
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+						 (__v8df) __V,
+						 (__v8df) __W,
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
+{
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+						 (__v8df) __V,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_undefined_pd (),
+						 (__mmask8) -1,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df) __W,
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+						 (__v8df) __B,
+						 (__v8df)
+						 _mm512_setzero_pd (),
+						 (__mmask8) __U,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_undefined_ps (),
+						(__mmask16) -1,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf) __W,
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+						(__v16sf) __B,
+						(__v16sf)
+						_mm512_setzero_ps (),
+						(__mmask16) __U,
+						_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __W,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __W,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
+						  (__v2df) __B,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
+						 (__v4sf) __B,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __C,
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __C,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __C,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __C,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    -(__v8df) __C,
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+						    (__v8df) __B,
+						    -(__v8df) __C,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+						     (__v8df) __B,
+						     -(__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   -(__v16sf) __C,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+						   (__v16sf) __B,
+						   -(__v16sf) __C,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+						    (__v16sf) __B,
+						    -(__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       (__v8df) __C,
+						       (__mmask8) -1,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       (__v8df) __C,
+						       (__mmask8) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+							(__v8df) __B,
+							(__v8df) __C,
+							(__mmask8) __U,
+							_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+							(__v8df) __B,
+							(__v8df) __C,
+							(__mmask8) __U,
+							_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      (__v16sf) __C,
+						      (__mmask16) -1,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      (__v16sf) __C,
+						      (__mmask16) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+						       (__v16sf) __B,
+						       (__v16sf) __C,
+						       (__mmask16) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+						       (__v16sf) __B,
+						       (__v16sf) __C,
+						       (__mmask16) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       -(__v8df) __C,
+						       (__mmask8) -1,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+						       (__v8df) __B,
+						       -(__v8df) __C,
+						       (__mmask8) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+							(__v8df) __B,
+							(__v8df) __C,
+							(__mmask8) __U,
+							_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+							(__v8df) __B,
+							-(__v8df) __C,
+							(__mmask8) __U,
+							_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      -(__v16sf) __C,
+						      (__mmask16) -1,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+						      (__v16sf) __B,
+						      -(__v16sf) __C,
+						      (__mmask16) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+						       (__v16sf) __B,
+						       (__v16sf) __C,
+						       (__mmask16) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+						       (__v16sf) __B,
+						       -(__v16sf) __C,
+						       (__mmask16) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+						    (__v8df) __B,
+						    (__v8df) __C,
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+						   (__v16sf) __B,
+						   (__v16sf) __C,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+						    (__v8df) __B,
+						    -(__v8df) __C,
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+						     (__v8df) __B,
+						     (__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+						      (__v8df) __B,
+						      (__v8df) __C,
+						      (__mmask8) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+						     (__v8df) __B,
+						     -(__v8df) __C,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+						   (__v16sf) __B,
+						   -(__v16sf) __C,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+						    (__v16sf) __B,
+						    (__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+						     (__v16sf) __B,
+						     (__v16sf) __C,
+						     (__mmask16) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+						    (__v16sf) __B,
+						    -(__v16sf) __C,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epi32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_undefined_si256 (),
+						     (__mmask8) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+						     (__v8si) __W,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epu32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+						      (__v8si)
+						      _mm256_undefined_si256 (),
+						      (__mmask8) -1,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+						      (__v8si)
+						      _mm256_setzero_si256 (),
+						      (__mmask8) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epi32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+						    (__v8si)
+						    _mm256_undefined_si256 (),
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+						    (__v8si) __W,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+						    (__v8si)
+						    _mm256_setzero_si256 (),
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epu32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_undefined_si256 (),
+						     (__mmask8) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+						     (__v8si) __W,
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+						     (__v8si)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epi32 (__m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epu32 (__m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_undefined_si512 (),
+						      (__mmask16) -1,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_setzero_si512 (),
+						      (__mmask16) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epi32 (__m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+						    (__v16si)
+						    _mm512_undefined_si512 (),
+						    (__mmask16) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+						    (__v16si) __W,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+						    (__v16si)
+						    _mm512_setzero_si512 (),
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epu32 (__m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_undefined_si512 (),
+						     (__mmask16) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+						     (__v16si)
+						     _mm512_setzero_si512 (),
+						     (__mmask16) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
+{
+  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
+{
+  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
+					       _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_ss (__m128 __A, unsigned __B)
+{
+  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_ps (__m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_ps (__m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+						    (__v16sf) __W,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
+{
+  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+						      (__v8df) __B,
+						      (__v8di) __C,
+						      __imm,
+						      (__mmask8) -1,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
+			 __m512i __C, const int __imm)
+{
+  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+						      (__v8df) __B,
+						      (__v8di) __C,
+						      __imm,
+						      (__mmask8) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			  __m512i __C, const int __imm)
+{
+  return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
+						       (__v8df) __B,
+						       (__v8di) __C,
+						       __imm,
+						       (__mmask8) __U,
+						       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
+{
+  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+						     (__v16sf) __B,
+						     (__v16si) __C,
+						     __imm,
+						     (__mmask16) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
+			 __m512i __C, const int __imm)
+{
+  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+						     (__v16sf) __B,
+						     (__v16si) __C,
+						     __imm,
+						     (__mmask16) __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			  __m512i __C, const int __imm)
+{
+  return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
+						      (__v16sf) __B,
+						      (__v16si) __C,
+						      __imm,
+						      (__mmask16) __U,
+						      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
+{
+  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+						   (__v2df) __B,
+						   (__v2di) __C, __imm,
+						   (__mmask8) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
+		      __m128i __C, const int __imm)
+{
+  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+						   (__v2df) __B,
+						   (__v2di) __C, __imm,
+						   (__mmask8) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
+		       __m128i __C, const int __imm)
+{
+  return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+						    (__v2df) __B,
+						    (__v2di) __C,
+						    __imm,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
+{
+  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+						  (__v4sf) __B,
+						  (__v4si) __C, __imm,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
+		      __m128i __C, const int __imm)
+{
+  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+						  (__v4sf) __B,
+						  (__v4si) __C, __imm,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
+		       __m128i __C, const int __imm)
+{
+  return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+						   (__v4sf) __B,
+						   (__v4si) __C, __imm,
+						   (__mmask8) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+#else
+#define _mm512_fixupimm_pd(X, Y, Z, C)					\
+  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
+      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
+      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
+  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
+      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
+  ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
+      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_fixupimm_ps(X, Y, Z, C)					\
+  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
+    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
+    (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
+  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
+    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
+    (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
+  ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
+    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
+    (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_sd(X, Y, Z, C)					\
+    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
+      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_sd(X, U, Y, Z, C)				\
+    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
+      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)				\
+    ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
+      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_ss(X, Y, Z, C)					\
+    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
+      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_ss(X, U, Y, Z, C)				\
+    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
+      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)				\
+    ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
+      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u64 (__m128 __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
+							   __A,
+							   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u64 (__m128 __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
+							    __A,
+							    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i64 (__m128 __A)
+{
+  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u32 (__m128 __A)
+{
+  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u32 (__m128 __A)
+{
+  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i32 (__m128 __A)
+{
+  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
+					    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u64 (__m128d __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
+							   __A,
+							   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u64 (__m128d __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
+							    __A,
+							    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i64 (__m128d __A)
+{
+  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u32 (__m128d __A)
+{
+  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
+						 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u32 (__m128d __A)
+{
+  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i32 (__m128d __A)
+{
+  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
+					    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_pd (__m256 __A)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+						    (__v8df) __W,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_ps (__m256i __A)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+						    (__v16sf)
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+						    (__v16sf) __W,
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    (__mmask16) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_ps (__m512d __A)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+						   (__v8sf)
+						   _mm256_undefined_ps (),
+						   (__mmask8) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+						   (__v8sf) __W,
+						   (__mmask8) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+						   (__v8sf)
+						   _mm256_setzero_ps (),
+						   (__mmask8) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_undefined_ps (),
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+						   (__v16sf) __W,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+						   (__v16sf)
+						   _mm512_setzero_ps (),
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+						    (__v8df)
+						    _mm512_undefined_pd (),
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+						    (__v8df) __W,
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+						    (__v8df)
+						    _mm512_setzero_pd (),
+						    (__mmask8) __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+						    (__v4sf) __B,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+						     (__v2df) __B,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
+		   _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+						     (__C << 2) | __B,
+						     _mm512_undefined_pd (),
+						     (__mmask8) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+						     (__C << 2) | __B,
+						     (__v8df) __W, __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
+			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+						     (__C << 2) | __B,
+						     (__v8df)
+						     _mm512_setzero_pd (),
+						     __U,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
+		   _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+						    (__C << 2) | __B,
+						    _mm512_undefined_ps (),
+						    (__mmask16) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+						    (__C << 2) | __B,
+						    (__v16sf) __W, __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
+			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+						    (__C << 2) | __B,
+						    (__v16sf)
+						    _mm512_setzero_ps (),
+						    __U,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+		_MM_MANTISSA_SIGN_ENUM __D)
+{
+  return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+						   (__v2df) __B,
+						   (__D << 2) | __C,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+		_MM_MANTISSA_SIGN_ENUM __D)
+{
+  return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+						  (__v4sf) __B,
+						  (__D << 2) | __C,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_getmant_pd(X, B, C)                                                  \
+  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
+                                              (int)(((C)<<2) | (B)),                \
+                                              (__v8df)_mm512_undefined_pd(),        \
+                                              (__mmask8)-1,\
+					      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
+  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
+                                              (int)(((C)<<2) | (B)),                \
+                                              (__v8df)(__m512d)(W),                 \
+                                              (__mmask8)(U),\
+					      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
+  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
+                                              (int)(((C)<<2) | (B)),                \
+                                              (__v8df)_mm512_setzero_pd(),          \
+                                              (__mmask8)(U),\
+					      _MM_FROUND_CUR_DIRECTION))
+#define _mm512_getmant_ps(X, B, C)                                                  \
+  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
+                                             (int)(((C)<<2) | (B)),                 \
+                                             (__v16sf)_mm512_undefined_ps(),        \
+                                             (__mmask16)-1,\
+					     _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
+  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
+                                             (int)(((C)<<2) | (B)),                 \
+                                             (__v16sf)(__m512)(W),                  \
+                                             (__mmask16)(U),\
+					     _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
+  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
+                                             (int)(((C)<<2) | (B)),                 \
+                                             (__v16sf)_mm512_setzero_ps(),          \
+                                             (__mmask16)(U),\
+					     _MM_FROUND_CUR_DIRECTION))
+#define _mm_getmant_sd(X, Y, C, D)                                                  \
+  ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
+                                           (__v2df)(__m128d)(Y),                    \
+                                           (int)(((D)<<2) | (C)),                   \
+					   _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getmant_ss(X, Y, C, D)                                                  \
+  ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
+                                          (__v4sf)(__m128)(Y),                      \
+                                          (int)(((D)<<2) | (C)),                    \
+					  _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_ss(A, B)						      \
+  ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
+					   _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_sd(A, B)						       \
+  ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+					    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getexp_ps(A)						\
+  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
+  (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getexp_ps(W, U, A)					\
+  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
+  (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getexp_ps(U, A)					\
+  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
+  (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getexp_pd(A)						\
+  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
+  (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getexp_pd(W, U, A)					\
+  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
+  (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getexp_pd(U, A)					\
+  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
+  (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_ps (__m512 __A, const int __imm)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+						  (__v16sf)
+						  _mm512_undefined_ps (),
+						  -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
+			   const int __imm)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
+						  (__v16sf) __A,
+						  (__mmask16) __B,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
+						  __imm,
+						  (__v16sf)
+						  _mm512_setzero_ps (),
+						  (__mmask16) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pd (__m512d __A, const int __imm)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+						   (__v8df)
+						   _mm512_undefined_pd (),
+						   -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
+			   const int __imm)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
+						   (__v8df) __A,
+						   (__mmask8) __B,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
+						   __imm,
+						   (__v8df)
+						   _mm512_setzero_pd (),
+						   (__mmask8) __A,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
+{
+  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
+						   (__v4sf) __B, __imm,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
+{
+  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
+						    (__v2df) __B, __imm,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_roundscale_ps(A, B) \
+  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
+    (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_ps(A, B, C, D)				\
+  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
+					    (int)(D),			\
+					    (__v16sf)(__m512)(A),	\
+					    (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_ps(A, B, C)				\
+  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
+					    (int)(C),			\
+					    (__v16sf)_mm512_setzero_ps(),\
+					    (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_roundscale_pd(A, B) \
+  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
+    (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_pd(A, B, C, D)				\
+  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
+					     (int)(D),			\
+					     (__v8df)(__m512d)(A),	\
+					     (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_pd(A, B, C)				\
+  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
+					     (int)(C),			\
+					     (__v8df)_mm512_setzero_pd(),\
+					     (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_ss(A, B, C)					\
+  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
+  (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_sd(A, B, C)					\
+  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
+    (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, __P,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, __P,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, __P,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, __P,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+					       (__v2df) __Y, __P,
+					       (__mmask8) -1,
+					       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+					       (__v2df) __Y, __P,
+					       (__mmask8) __M,
+					       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+					       (__v4sf) __Y, __P,
+					       (__mmask8) -1,
+					       _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+					       (__v4sf) __Y, __P,
+					       (__mmask8) __M,
+					       _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_cmp_pd_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
+					    (__v8df)(__m512d)(Y), (int)(P),\
+					    (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_cmp_ps_mask(X, Y, P)					\
+  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
+					     (__v16sf)(__m512)(Y), (int)(P),\
+					     (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_pd_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
+					    (__v8df)(__m512d)(Y), (int)(P),\
+					    (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_ps_mask(M, X, Y, P)					\
+  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
+					     (__v16sf)(__m512)(Y), (int)(P),\
+					     (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_sd_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
+					 (__v2df)(__m128d)(Y), (int)(P),\
+					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_sd_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
+					 (__v2df)(__m128d)(Y), (int)(P),\
+					 M,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_ss_mask(X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
+					 (__v4sf)(__m128)(Y), (int)(P), \
+					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_ss_mask(M, X, Y, P)					\
+  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
+					 (__v4sf)(__m128)(Y), (int)(P), \
+					 M,_MM_FROUND_CUR_DIRECTION))
+#endif
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kmov (__mmask16 __A)
+{
+  return __builtin_ia32_kmov16 (__A);
+}
+
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512F__ */
+
+#endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/avx512pfintrin.h b/gcc-4.9/gcc/config/i386/avx512pfintrin.h
new file mode 100644
index 000000000..bc7598e7a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avx512pfintrin.h
@@ -0,0 +1,212 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512PFINTRIN_H_INCLUDED
+#define _AVX512PFINTRIN_H_INCLUDED
+
+#ifndef __AVX512PF__
+#pragma GCC push_options
+#pragma GCC target("avx512pf")
+#define __DISABLE_AVX512PF__
+#endif /* __AVX512PF__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char  __mmask8;
+typedef unsigned short __mmask16;
+
+#ifdef __OPTIMIZE__
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
+				   void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
+			      scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
+				   void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
+			      scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
+				   void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
+			      scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
+				   void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
+			      scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
+			       int hint)
+{
+  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index, 
+			       (long long const *)addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
+			       int hint)
+{
+  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
+				    __m256i index, int scale, int hint)
+{
+  __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
+				    __m512i index, int scale, int hint)
+{
+  __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
+			       int hint)
+{
+  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
+			       int hint)
+{
+  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
+				    __m512i index, int scale, int hint)
+{
+  __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
+				    __m512i index, int scale, int hint)
+{
+  __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
+			       scale, hint);
+}
+
+#else
+#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
+  __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,	     \
+			      (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
+  __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
+			      (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
+  __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
+			      (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
+  __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
+			      (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
+  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
+  __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,   \
+			       (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
+  __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
+  __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,     \
+			       (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
+  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
+  __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
+			       (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
+  __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
+  __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
+			       (int const *)ADDR, (int)SCALE, (int)HINT)
+#endif
+
+#ifdef __DISABLE_AVX512PF__
+#undef __DISABLE_AVX512PF__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512PF__ */
+
+#endif /* _AVX512PFINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/avxintrin.h b/gcc-4.9/gcc/config/i386/avxintrin.h
new file mode 100644
index 000000000..2ea327c5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avxintrin.h
@@ -0,0 +1,1463 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 11.0.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXINTRIN_H_INCLUDED
+#define _AVXINTRIN_H_INCLUDED
+
+#ifndef __AVX__
+#pragma GCC push_options
+#pragma GCC target("avx")
+#define __DISABLE_AVX__
+#endif /* __AVX__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef double __v4df __attribute__ ((__vector_size__ (32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m256 __attribute__ ((__vector_size__ (32),
+				     __may_alias__));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef double __m256d __attribute__ ((__vector_size__ (32),
+				       __may_alias__));
+
+/* Compare predicates for scalar and packed compare intrinsics.  */
+
+/* Equal (ordered, non-signaling)  */
+#define _CMP_EQ_OQ	0x00
+/* Less-than (ordered, signaling)  */
+#define _CMP_LT_OS	0x01
+/* Less-than-or-equal (ordered, signaling)  */
+#define _CMP_LE_OS	0x02
+/* Unordered (non-signaling)  */
+#define _CMP_UNORD_Q	0x03
+/* Not-equal (unordered, non-signaling)  */
+#define _CMP_NEQ_UQ	0x04
+/* Not-less-than (unordered, signaling)  */
+#define _CMP_NLT_US	0x05
+/* Not-less-than-or-equal (unordered, signaling)  */
+#define _CMP_NLE_US	0x06
+/* Ordered (nonsignaling)   */
+#define _CMP_ORD_Q	0x07
+/* Equal (unordered, non-signaling)  */
+#define _CMP_EQ_UQ	0x08
+/* Not-greater-than-or-equal (unordered, signaling)  */
+#define _CMP_NGE_US	0x09
+/* Not-greater-than (unordered, signaling)  */
+#define _CMP_NGT_US	0x0a
+/* False (ordered, non-signaling)  */
+#define _CMP_FALSE_OQ	0x0b
+/* Not-equal (ordered, non-signaling)  */
+#define _CMP_NEQ_OQ	0x0c
+/* Greater-than-or-equal (ordered, signaling)  */
+#define _CMP_GE_OS	0x0d
+/* Greater-than (ordered, signaling)  */
+#define _CMP_GT_OS	0x0e
+/* True (unordered, non-signaling)  */
+#define _CMP_TRUE_UQ	0x0f
+/* Equal (ordered, signaling)  */
+#define _CMP_EQ_OS	0x10
+/* Less-than (ordered, non-signaling)  */
+#define _CMP_LT_OQ	0x11
+/* Less-than-or-equal (ordered, non-signaling)  */
+#define _CMP_LE_OQ	0x12
+/* Unordered (signaling)  */
+#define _CMP_UNORD_S	0x13
+/* Not-equal (unordered, signaling)  */
+#define _CMP_NEQ_US	0x14
+/* Not-less-than (unordered, non-signaling)  */
+#define _CMP_NLT_UQ	0x15
+/* Not-less-than-or-equal (unordered, non-signaling)  */
+#define _CMP_NLE_UQ	0x16
+/* Ordered (signaling)  */
+#define _CMP_ORD_S	0x17
+/* Equal (unordered, signaling)  */
+#define _CMP_EQ_US	0x18
+/* Not-greater-than-or-equal (unordered, non-signaling)  */
+#define _CMP_NGE_UQ	0x19
+/* Not-greater-than (unordered, non-signaling)  */
+#define _CMP_NGT_UQ	0x1a
+/* False (ordered, signaling)  */
+#define _CMP_FALSE_OS	0x1b
+/* Not-equal (ordered, signaling)  */
+#define _CMP_NEQ_OS	0x1c
+/* Greater-than-or-equal (ordered, non-signaling)  */
+#define _CMP_GE_OQ	0x1d
+/* Greater-than (ordered, non-signaling)  */
+#define _CMP_GT_OQ	0x1e
+/* True (unordered, signaling)  */
+#define _CMP_TRUE_US	0x1f
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Double/single precision floating point blend instructions - select
+   data from 2 sources using constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
+{
+  return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
+					      (__v4df)__Y,
+					      __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
+{
+  return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
+					     (__v8sf)__Y,
+					     __M);
+}
+#else
+#define _mm256_blend_pd(X, Y, M)					\
+  ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X),		\
+					(__v4df)(__m256d)(Y), (int)(M)))
+
+#define _mm256_blend_ps(X, Y, M)					\
+  ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X),		\
+				       (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
+{
+  return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
+					       (__v4df)__Y,
+					       (__v4df)__M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
+{
+  return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
+					      (__v8sf)__Y,
+					      (__v8sf)__M);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+   of result.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
+{
+  return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
+					  (__v8sf)__Y,
+					  __M);
+}
+#else
+#define _mm256_dp_ps(X, Y, M)						\
+  ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X),		\
+				    (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_pd (__m256d __X, __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_ps (__m256 __X, __m256 __Y)
+{
+  return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_pd (__m256d __X, __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_ps (__m256 __X, __m256 __Y)
+{
+  return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
+{
+  return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
+					     __mask);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
+{
+  return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
+					    __mask);
+}
+#else
+#define _mm256_shuffle_pd(A, B, N)					\
+  ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A),		\
+				      (__v4df)(__m256d)(B), (int)(N)))
+
+#define _mm256_shuffle_ps(A, B, N)					\
+  ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A),		\
+				      (__v8sf)(__m256)(B), (int)(N)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
+{
+  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
+{
+  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
+{
+  return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
+					    __P);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
+{
+  return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
+					   __P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
+{
+  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
+{
+  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+#else
+#define _mm_cmp_pd(X, Y, P)						\
+  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),		\
+				   (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ps(X, Y, P)						\
+  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X),			\
+				  (__v4sf)(__m128)(Y), (int)(P)))
+
+#define _mm256_cmp_pd(X, Y, P)						\
+  ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X),		\
+				      (__v4df)(__m256d)(Y), (int)(P)))
+
+#define _mm256_cmp_ps(X, Y, P)						\
+  ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X),		\
+				     (__v8sf)(__m256)(Y), (int)(P)))
+
+#define _mm_cmp_sd(X, Y, P)						\
+  ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X),		\
+				   (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ss(X, Y, P)						\
+  ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X),			\
+				  (__v4sf)(__m128)(Y), (int)(P)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_pd (__m128i __A)
+{
+  return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ps (__m256i __A)
+{
+  return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_ps (__m256d __A)
+{
+  return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi32 (__m256 __A)
+{
+  return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_pd (__m128 __A)
+{
+  return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi32 (__m256d __A)
+{
+  return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi32 (__m256d __A)
+{
+  return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi32 (__m256 __A)
+{
+  return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_pd (__m256d __X, const int __N)
+{
+  return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_ps (__m256 __X, const int __N)
+{
+  return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_si256 (__m256i __X, const int __N)
+{
+  return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi32 (__m256i __X, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+  return _mm_extract_epi32 (__Y, __N % 4);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi16 (__m256i __X, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+  return _mm_extract_epi16 (__Y, __N % 8);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi8 (__m256i __X, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+  return _mm_extract_epi8 (__Y, __N % 16);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi64 (__m256i __X, const int __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+  return _mm_extract_epi64 (__Y, __N % 2);
+}
+#endif
+#else
+#define _mm256_extractf128_pd(X, N)					\
+  ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X),	\
+						(int)(N)))
+
+#define _mm256_extractf128_ps(X, N)					\
+  ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X),	\
+					       (int)(N)))
+
+#define _mm256_extractf128_si256(X, N)					\
+  ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X),	\
+						(int)(N)))
+
+#define _mm256_extract_epi32(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
+      _mm_extract_epi32 (__Y, (N) % 4);					\
+    }))
+
+#define _mm256_extract_epi16(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
+      _mm_extract_epi16 (__Y, (N) % 8);					\
+    }))
+
+#define _mm256_extract_epi8(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
+      _mm_extract_epi8 (__Y, (N) % 16);					\
+    }))
+
+#ifdef __x86_64__
+#define _mm256_extract_epi64(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
+      _mm_extract_epi64 (__Y, (N) % 2);					\
+    }))
+#endif
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroall (void)
+{
+  __builtin_ia32_vzeroall ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroupper (void)
+{
+  __builtin_ia32_vzeroupper ();
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_pd (__m128d __A, __m128i __C)
+{
+  return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
+						(__v2di)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_pd (__m256d __A, __m256i __C)
+{
+  return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
+						   (__v4di)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_ps (__m128 __A, __m128i __C)
+{
+  return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
+					       (__v4si)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_ps (__m256 __A, __m256i __C)
+{
+  return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
+						  (__v8si)__C);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_pd (__m128d __X, const int __C)
+{
+  return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_pd (__m256d __X, const int __C)
+{
+  return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_ps (__m128 __X, const int __C)
+{
+  return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_ps (__m256 __X, const int __C)
+{
+  return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
+}
+#else
+#define _mm_permute_pd(X, C)						\
+  ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
+
+#define _mm256_permute_pd(X, C)						\
+  ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X),	(int)(C)))
+
+#define _mm_permute_ps(X, C)						\
+  ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
+
+#define _mm256_permute_ps(X, C)						\
+  ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
+{
+  return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
+						    (__v4df)__Y,
+						    __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
+{
+  return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
+						   (__v8sf)__Y,
+						   __C);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
+{
+  return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
+						    (__v8si)__Y,
+						    __C);
+}
+#else
+#define _mm256_permute2f128_pd(X, Y, C)					\
+  ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X),	\
+					      (__v4df)(__m256d)(Y),	\
+					      (int)(C)))
+
+#define _mm256_permute2f128_ps(X, Y, C)					\
+  ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X),	\
+					     (__v8sf)(__m256)(Y),	\
+					     (int)(C)))
+
+#define _mm256_permute2f128_si256(X, Y, C)				\
+  ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X),	\
+					      (__v8si)(__m256i)(Y),	\
+					      (int)(C)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_ss (float const *__X)
+{
+  return (__m128) __builtin_ia32_vbroadcastss (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_sd (double const *__X)
+{
+  return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ss (float const *__X)
+{
+  return (__m256) __builtin_ia32_vbroadcastss256 (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_pd (__m128d const *__X)
+{
+  return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ps (__m128 const *__X)
+{
+  return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
+{
+  return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
+						     (__v2df)__Y,
+						     __O);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
+{
+  return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
+						    (__v4sf)__Y,
+						    __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
+{
+  return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
+						     (__v4si)__Y,
+						     __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+  __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+  __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+  __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
+}
+
+#ifdef __x86_64__
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+  __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
+}
+#endif
+#else
+#define _mm256_insertf128_pd(X, Y, O)					\
+  ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X),	\
+					       (__v2df)(__m128d)(Y),	\
+					       (int)(O)))
+
+#define _mm256_insertf128_ps(X, Y, O)					\
+  ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X),	\
+					      (__v4sf)(__m128)(Y),  	\
+					      (int)(O)))
+
+#define _mm256_insertf128_si256(X, Y, O)				\
+  ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X),	\
+					       (__v4si)(__m128i)(Y),	\
+					       (int)(O)))
+
+#define _mm256_insert_epi32(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
+      __Y = _mm_insert_epi32 (__Y, (D), (N) % 4);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 2);			\
+    }))
+
+#define _mm256_insert_epi16(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
+      __Y = _mm_insert_epi16 (__Y, (D), (N) % 8);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 3);			\
+    }))
+
+#define _mm256_insert_epi8(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
+      __Y = _mm_insert_epi8 (__Y, (D), (N) % 16);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 4);			\
+    }))
+
+#ifdef __x86_64__
+#define _mm256_insert_epi64(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
+      __Y = _mm_insert_epi64 (__Y, (D), (N) % 2);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 1);			\
+    }))
+#endif
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_pd (double const *__P)
+{
+  return *(__m256d *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_pd (double *__P, __m256d __A)
+{
+  *(__m256d *)__P = __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ps (float const *__P)
+{
+  return *(__m256 *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ps (float *__P, __m256 __A)
+{
+  *(__m256 *)__P = __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_pd (double const *__P)
+{
+  return (__m256d) __builtin_ia32_loadupd256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_pd (double *__P, __m256d __A)
+{
+  __builtin_ia32_storeupd256 (__P, (__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ps (float const *__P)
+{
+  return (__m256) __builtin_ia32_loadups256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ps (float *__P, __m256 __A)
+{
+  __builtin_ia32_storeups256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_si256 (__m256i const *__P)
+{
+  return *__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_si256 (__m256i *__P, __m256i __A)
+{
+  *__P = __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_si256 (__m256i const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_si256 (__m256i *__P, __m256i __A)
+{
+  __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_pd (double const *__P, __m128i __M)
+{
+  return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
+					      (__v2di)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
+{
+  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_pd (double const *__P, __m256i __M)
+{
+  return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
+						 (__v4di)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
+{
+  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_ps (float const *__P, __m128i __M)
+{
+  return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
+					     (__v4si)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
+{
+  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_ps (float const *__P, __m256i __M)
+{
+  return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
+						(__v8si)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
+{
+  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movehdup_ps (__m256 __X)
+{
+  return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_moveldup_ps (__m256 __X)
+{
+  return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movedup_pd (__m256d __X)
+{
+  return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lddqu_si256 (__m256i const *__P)
+{
+  return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_si256 (__m256i *__A, __m256i __B)
+{
+  __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_pd (double *__A, __m256d __B)
+{
+  __builtin_ia32_movntpd256 (__A, (__v4df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_ps (float *__P, __m256 __A)
+{
+  __builtin_ia32_movntps256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_pd (__m256d __V, const int __M)
+{
+  return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_ps (__m256 __V, const int __M)
+{
+  return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+}
+#else
+#define _mm256_round_pd(V, M) \
+  ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
+
+#define _mm256_round_ps(V, M) \
+  ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
+#endif
+
+#define _mm256_ceil_pd(V)	_mm256_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_pd(V)	_mm256_round_pd ((V), _MM_FROUND_FLOOR)
+#define _mm256_ceil_ps(V)	_mm256_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_ps(V)	_mm256_round_ps ((V), _MM_FROUND_FLOOR)
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_pd (__m256d __M, __m256d __V)
+{
+  return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_pd (__m256d __M, __m256d __V)
+{
+  return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_pd (__m256d __M, __m256d __V)
+{
+  return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_ps (__m256 __M, __m256 __V)
+{
+  return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_ps (__m256 __M, __m256 __V)
+{
+  return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_ps (__m256 __M, __m256 __V)
+{
+  return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_si256 (__m256i __M, __m256i __V)
+{
+  return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_si256 (__m256i __M, __m256i __V)
+{
+  return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_si256 (__m256i __M, __m256i __V)
+{
+  return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_pd (__m256d __A)
+{
+  return __builtin_ia32_movmskpd256 ((__v4df)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_ps (__m256 __A)
+{
+  return __builtin_ia32_movmskps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_pd (void)
+{
+  __m256d __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ps (void)
+{
+  __m256 __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_si256 (void)
+{
+  __m256i __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_pd (void)
+{
+  return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_ps (void)
+{
+  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+				 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_si256 (void)
+{
+  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
+}
+
+/* Create the vector [A B C D].  */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_pd (double __A, double __B, double __C, double __D)
+{
+  return __extension__ (__m256d){ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H].  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_ps (float __A, float __B, float __C, float __D,
+	       float __E, float __F, float __G, float __H)
+{
+  return __extension__ (__m256){ __H, __G, __F, __E,
+				 __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H].  */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi32 (int __A, int __B, int __C, int __D,
+		  int __E, int __F, int __G, int __H)
+{
+  return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+					  __D, __C, __B, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
+		  short __q11, short __q10, short __q09, short __q08,
+		  short __q07, short __q06, short __q05, short __q04,
+		  short __q03, short __q02, short __q01, short __q00)
+{
+  return __extension__ (__m256i)(__v16hi){
+    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+  };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi8  (char __q31, char __q30, char __q29, char __q28,
+		  char __q27, char __q26, char __q25, char __q24,
+		  char __q23, char __q22, char __q21, char __q20,
+		  char __q19, char __q18, char __q17, char __q16,
+		  char __q15, char __q14, char __q13, char __q12,
+		  char __q11, char __q10, char __q09, char __q08,
+		  char __q07, char __q06, char __q05, char __q04,
+		  char __q03, char __q02, char __q01, char __q00)
+{
+  return __extension__ (__m256i)(__v32qi){
+    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
+    __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
+    __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
+  };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi64x (long long __A, long long __B, long long __C,
+		   long long __D)
+{
+  return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
+}
+
+/* Create a vector with all elements equal to A.  */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pd (double __A)
+{
+  return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A.  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_ps (float __A)
+{
+  return __extension__ (__m256){ __A, __A, __A, __A,
+				 __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A.  */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi32 (int __A)
+{
+  return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+					  __A, __A, __A, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi16 (short __A)
+{
+  return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
+			   __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi8 (char __A)
+{
+  return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+			  __A, __A, __A, __A, __A, __A, __A, __A,
+			  __A, __A, __A, __A, __A, __A, __A, __A,
+			  __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi64x (long long __A)
+{
+  return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
+}
+
+/* Create vectors of elements in the reversed order from the
+   _mm256_set_XXX functions.  */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_pd (double __A, double __B, double __C, double __D)
+{
+  return _mm256_set_pd (__D, __C, __B, __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_ps (float __A, float __B, float __C, float __D,
+		float __E, float __F, float __G, float __H)
+{
+  return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
+		   int __E, int __F, int __G, int __H)
+{
+  return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
+		   short __q11, short __q10, short __q09, short __q08,
+		   short __q07, short __q06, short __q05, short __q04,
+		   short __q03, short __q02, short __q01, short __q00)
+{
+  return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
+			   __q04, __q05, __q06, __q07,
+			   __q08, __q09, __q10, __q11,
+			   __q12, __q13, __q14, __q15);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi8  (char __q31, char __q30, char __q29, char __q28,
+		   char __q27, char __q26, char __q25, char __q24,
+		   char __q23, char __q22, char __q21, char __q20,
+		   char __q19, char __q18, char __q17, char __q16,
+		   char __q15, char __q14, char __q13, char __q12,
+		   char __q11, char __q10, char __q09, char __q08,
+		   char __q07, char __q06, char __q05, char __q04,
+		   char __q03, char __q02, char __q01, char __q00)
+{
+  return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
+			  __q04, __q05, __q06, __q07,
+			  __q08, __q09, __q10, __q11,
+			  __q12, __q13, __q14, __q15,
+			  __q16, __q17, __q18, __q19,
+			  __q20, __q21, __q22, __q23,
+			  __q24, __q25, __q26, __q27,
+			  __q28, __q29, __q30, __q31);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi64x (long long __A, long long __B, long long __C,
+		    long long __D)
+{
+  return _mm256_set_epi64x (__D, __C, __B, __A);
+}
+
+/* Casts between various SP, DP, INT vector types.  Note that these do no
+   conversion of values, they just change the type.  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ps (__m256d __A)
+{
+  return (__m256) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_si256 (__m256d __A)
+{
+  return (__m256i) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_pd (__m256 __A)
+{
+  return (__m256d) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_si256(__m256 __A)
+{
+  return (__m256i) __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ps (__m256i __A)
+{
+  return (__m256) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_pd (__m256i __A)
+{
+  return (__m256d) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd256_pd128 (__m256d __A)
+{
+  return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps256_ps128 (__m256 __A)
+{
+  return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_si128 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
+}
+
+/* When cast is done from a 128 to 256-bit type, the low 128 bits of
+   the 256-bit result contain source parameter value and the upper 128
+   bits of the result are undefined.  Those intrinsics shouldn't
+   generate any extra moves.  */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd128_pd256 (__m128d __A)
+{
+  return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps128_ps256 (__m128 __A)
+{
+  return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi128_si256 (__m128i __A)
+{
+  return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
+}
+
+#ifdef __DISABLE_AVX__
+#undef __DISABLE_AVX__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX__ */
+
+#endif /* _AVXINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/avxmath.h b/gcc-4.9/gcc/config/i386/avxmath.h
new file mode 100644
index 000000000..e444993a0
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/avxmath.h
@@ -0,0 +1,28 @@
+/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT FPMATH_SSE
+
+#undef TARGET_SUBTARGET_ISA_DEFAULT
+#define TARGET_SUBTARGET_ISA_DEFAULT					\
+  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2	\
+   | OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3			\
+   | OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2			\
+   | OPTION_MASK_ISA_AVX)
+
diff --git a/gcc-4.9/gcc/config/i386/bdver1.md b/gcc-4.9/gcc/config/i386/bdver1.md
new file mode 100644
index 000000000..578bb3b21
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/bdver1.md
@@ -0,0 +1,800 @@
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; AMD bdver1 Scheduling
+;;
+;; The bdver1 contains four pipelined FP units, two integer units and
+;; two address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line.  So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available.  They can decode three DirectPath instructions or one
+;; VectorPath instruction per cycle.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+
+
+(define_attr "bdver1_decode" "direct,vector,double"
+  (const_string "direct"))
+
+(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu")
+
+(define_cpu_unit "bdver1-decode0" "bdver1")
+(define_cpu_unit "bdver1-decode1" "bdver1")
+(define_cpu_unit "bdver1-decode2" "bdver1")
+(define_cpu_unit "bdver1-decodev" "bdver1")
+
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow throughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too.  Vector decoded instructions then can't be issued when modeled
+;; as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "bdver1-decode2" "bdver1-decode0")
+(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
+
+(define_reservation "bdver1-vector" "nothing,bdver1-decodev")
+(define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
+(define_reservation "bdver1-direct" "nothing,
+				     (bdver1-decode0 | bdver1-decode1
+				     | bdver1-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
+				     | (nothing,(bdver1-decode0 + bdver1-decode1))
+				     | (nothing,(bdver1-decode1 + bdver1-decode2)))")
+
+
+(define_cpu_unit "bdver1-ieu0" "bdver1_ieu")
+(define_cpu_unit "bdver1-ieu1" "bdver1_ieu")
+(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
+
+(define_cpu_unit "bdver1-agu0" "bdver1_agu")
+(define_cpu_unit "bdver1-agu1" "bdver1_agu")
+(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
+
+(define_cpu_unit "bdver1-load0" "bdver1_load")
+(define_cpu_unit "bdver1-load1" "bdver1_load")
+(define_reservation "bdver1-load" "bdver1-agu,
+				   (bdver1-load0 | bdver1-load1),nothing")
+;; 128bit SSE instructions issue two loads at once.
+(define_reservation "bdver1-load2" "bdver1-agu,
+				   (bdver1-load0 + bdver1-load1),nothing")
+
+(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
+;; 128bit SSE instructions issue two stores at once.
+(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
+
+;; vectorpath (microcoded) instructions are single issue instructions.
+;; So, they occupy all the integer units.
+(define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+
+                                      bdver1-agu0+bdver1-agu1+
+                                      bdver1-load0+bdver1-load1")
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for athlon and 11 for K8
+;; Compensate the difference for athlon because it results in significantly
+;; smaller automata.
+;; NOTE: the above information was just copied from athlon.md, and was not
+;; actually verified for bdver1.
+(define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
+(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
+
+;; Four FP units.
+(define_cpu_unit "bdver1-ffma0" "bdver1_fp")
+(define_cpu_unit "bdver1-ffma1" "bdver1_fp")
+(define_cpu_unit "bdver1-fmal0" "bdver1_fp")
+(define_cpu_unit "bdver1-fmal1" "bdver1_fp")
+
+(define_reservation "bdver1-ffma"     "(bdver1-ffma0 | bdver1-ffma1)")
+(define_reservation "bdver1-fcvt"     "bdver1-ffma0")
+(define_reservation "bdver1-fmma"     "bdver1-ffma0")
+(define_reservation "bdver1-fxbar"    "bdver1-ffma1")
+(define_reservation "bdver1-fmal"     "(bdver1-fmal0 | bdver1-fmal1)")
+(define_reservation "bdver1-fsto"     "bdver1-fmal1")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "bdver1-fvector"  "(bdver1-ffma0 + bdver1-ffma1
+					+ bdver1-fmal0 + bdver1-fmal1)")
+
+;; Jump instructions are executed in the branch unit completely transparent to us.
+(define_insn_reservation "bdver1_call" 0
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "call,callv"))
+			 "bdver1-double,bdver1-agu")
+;; PUSH mem is double path.
+(define_insn_reservation "bdver1_push" 1
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "push"))
+			 "bdver1-direct,bdver1-agu,bdver1-store")
+;; POP r16/mem are double path.
+(define_insn_reservation "bdver1_pop" 1
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "pop"))
+			 "bdver1-direct,bdver1-ivector")
+;; LEAVE no latency info so far, assume same with amdfam10.
+(define_insn_reservation "bdver1_leave" 3
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "leave"))
+			 "bdver1-vector,bdver1-ivector")
+;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
+(define_insn_reservation "bdver1_lea" 1
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "lea"))
+			 "bdver1-direct,bdver1-agu")
+
+;; MUL executes in special multiplier unit attached to IEU1.
+(define_insn_reservation "bdver1_imul_DI" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver1-direct1,bdver1-ieu1")
+(define_insn_reservation "bdver1_imul" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "bdver1-direct1,bdver1-ieu1")
+(define_insn_reservation "bdver1_imul_mem_DI" 10
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+                         "bdver1-direct1,bdver1-load,bdver1-ieu1")
+(define_insn_reservation "bdver1_imul_mem" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "bdver1-direct1,bdver1-load,bdver1-ieu1")
+
+;; IDIV cannot execute in parallel with other instructions.  Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
+;; of the other code.
+(define_insn_reservation "bdver1_idiv" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none,unknown")))
+			 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
+
+(define_insn_reservation "bdver1_idiv_mem" 10
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "load,both")))
+			 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
+
+;; The parallelism of string instructions is not documented.  Model it same way
+;; as IDIV to create smaller automata.  This probably does not matter much.
+;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
+(define_insn_reservation "bdver1_str" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
+
+;; Integer instructions.
+(define_insn_reservation "bdver1_idirect" 1
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver1-direct,bdver1-ieu")
+(define_insn_reservation "bdver1_ivector" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver1-vector,bdver1-ieu,bdver1-ieu")
+(define_insn_reservation "bdver1_idirect_loadmov" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-load")
+(define_insn_reservation "bdver1_idirect_load" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-load,bdver1-ieu")
+(define_insn_reservation "bdver1_ivector_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
+(define_insn_reservation "bdver1_idirect_movstore" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "bdver1-direct,bdver1-agu,bdver1-store")
+(define_insn_reservation "bdver1_idirect_both" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "bdver1-direct,bdver1-load,
+			  bdver1-ieu,bdver1-store,
+			  bdver1-store")
+(define_insn_reservation "bdver1_ivector_both" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "bdver1-vector,bdver1-load,
+			  bdver1-ieu,
+			  bdver1-ieu,
+			  bdver1-store")
+(define_insn_reservation "bdver1_idirect_store" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "bdver1-direct,(bdver1-ieu+bdver1-agu),
+			  bdver1-store")
+(define_insn_reservation "bdver1_ivector_store" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
+			  bdver1-store")
+
+;; BDVER1 floating point units.
+(define_insn_reservation "bdver1_fldxf" 13
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
+(define_insn_reservation "bdver1_fld" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fstxf" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
+(define_insn_reservation "bdver1_fst" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
+(define_insn_reservation "bdver1_fist" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
+(define_insn_reservation "bdver1_fmov_bdver1" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fmov"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fadd_load" 10
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fadd" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fop"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fmul_load" 10
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "bdver1-double,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fmul" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fmul"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fsgn" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fsgn"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fdiv_load" 46
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fdiv" 42
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fdiv"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fpspc_load" 103
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_fpspc" 100
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_fcmov_load" 17
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_fcmov" 15
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fcmov"))
+			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
+(define_insn_reservation "bdver1_fcomi_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "bdver1_decode" "double")
+					(eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
+(define_insn_reservation "bdver1_fcomi" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "bdver1_decode" "double")
+				   (eq_attr "type" "fcmp")))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
+(define_insn_reservation "bdver1_fcom_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fcom" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fcmp"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fxch" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "fxch"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+
+;; SSE loads.
+(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+					(and (eq_attr "movu" "1")
+					     (and (eq_attr "mode" "V4SF,V2DF")
+						  (eq_attr "memory" "load"))))))
+			 "bdver1-direct,bdver1-fpload")
+(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "movu" "1")
+				        (and (eq_attr "mode" "V8SF,V4DF")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-double,bdver1-fpload")
+(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "movu" "1")
+				        (and (eq_attr "mode" "V4SF,V2DF")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssevector_avx128_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+				        (and (eq_attr "mode" "V4SF,V2DF,TI")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssevector_avx256_load" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssevector_sse128_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload")
+(define_insn_reservation "bdver1_ssescalar_movq_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "DI")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+				        (and (eq_attr "mode" "SF")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-direct,bdver1-fpload")
+(define_insn_reservation "bdver1_ssescalar_sse128_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DF")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload, bdver1-ffma")
+(define_insn_reservation "bdver1_mmxsse_load" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload, bdver1-fmal")
+
+;; SSE stores.
+(define_insn_reservation "bdver1_sse_store_avx256" 5
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+					(eq_attr "memory" "store,both"))))
+			 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
+(define_insn_reservation "bdver1_sse_store" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
+(define_insn_reservation "bdver1_mmxsse_store_short" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
+
+;; Register moves.
+(define_insn_reservation "bdver1_ssevector_avx256" 3
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
+(define_insn_reservation "bdver1_movss_movsd" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DF")
+                                        (eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_mmxssemov" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
+;; SSE logs.
+(define_insn_reservation "bdver1_sselog_load_256" 7
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
+				   (and (eq_attr "mode" "V8SF")
+				   (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_sselog_256" 3
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
+                                   (eq_attr "mode" "V8SF")))
+			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
+(define_insn_reservation "bdver1_sselog_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
+(define_insn_reservation "bdver1_sselog" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
+
+;; PCMP actually executes in FMAL.
+(define_insn_reservation "bdver1_ssecmp_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_ssecmp" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "ssecmp"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_ssecomi_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
+(define_insn_reservation "bdver1_ssecomi" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (eq_attr "type" "ssecomi"))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately.
+
+;; 256 bit conversion.
+(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+					(ior (ior (match_operand:V4DF 0 "register_operand")
+					          (ior (match_operand:V8SF 0 "register_operand")
+						       (match_operand:V8SI 0 "register_operand")))
+					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
+						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
+						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+					(ior (ior (match_operand:V4DF 0 "register_operand")
+					          (ior (match_operand:V8SF 0 "register_operand")
+						       (match_operand:V8SI 0 "register_operand")))
+					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
+						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
+						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
+			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
+;; CVTSS2SD, CVTSD2SS.
+(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
+;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
+(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
+;; CVTPD2PS.
+(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (match_operand:V2DF 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
+(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (match_operand:V2DF 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
+;; CVTPI2PS, CVTDQ2PS.
+(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
+;; CVTDQ2PD.
+(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (match_operand:V4SI 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
+(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (match_operand:V4SI 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
+;; CVTPS2PD, CVTPI2PD.
+(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
+(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
+;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
+(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SI,DI")
+					(eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
+(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SI,DI")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
+;; CVTPD2PI, CVTTPD2PI.
+(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V2SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
+(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V2SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
+;; CVTPD2DQ, CVTTPD2DQ.
+(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V4SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
+(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V4SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
+;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
+(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+                                   (and (eq_attr "memory" "load")
+				        (and (match_operand:V4SF 1 "nonimmediate_operand")
+				             (ior (match_operand: V2SI 0 "register_operand")
+						  (match_operand: V4SI 0 "register_operand"))))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V4SF 1 "nonimmediate_operand")
+				             (ior (match_operand: V2SI 0 "register_operand")
+						  (match_operand: V4SI 0 "register_operand"))))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
+
+;; SSE MUL, ADD, and MULADD.
+(define_insn_reservation "bdver1_ssemuladd_load_256" 11
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (and (eq_attr "mode" "V8SF,V4DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_ssemuladd_256" 7
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (and (eq_attr "mode" "V8SF,V4DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_ssemuladd_load" 10
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_ssemuladd" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_sseimul_load" 8
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseimul")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmma")
+(define_insn_reservation "bdver1_sseimul" 4
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseimul")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
+(define_insn_reservation "bdver1_sseiadd_load" 6
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseiadd")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_sseiadd" 2
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "sseiadd")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
+
+;; SSE DIV: no throughput information (assume same as amdfam10).
+(define_insn_reservation "bdver1_ssediv_double_load_256" 31
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V4DF")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_double_256" 27
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V4DF")
+				        (eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single_load_256" 28
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V8SF")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single_256" 24
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V8SF")
+				        (eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_double_load" 31
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_double" 27
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single_load" 28
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single" 24
+			 (and (eq_attr "cpu" "bdver1,bdver2")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+
+(define_insn_reservation "bdver1_sseins" 3
+                         (and (eq_attr "cpu" "bdver1,bdver2")
+                              (and (eq_attr "type" "sseins")
+                                   (eq_attr "mode" "TI")))
+                         "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
+
diff --git a/gcc-4.9/gcc/config/i386/bdver3.md b/gcc-4.9/gcc/config/i386/bdver3.md
new file mode 100644
index 000000000..4c85ade31
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/bdver3.md
@@ -0,0 +1,748 @@
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; AMD bdver3 and bdver4 Scheduling
+;;
+;; The bdver3 and bdver4 contains three pipelined FP units and two integer
+;; units. ;; Fetching and decoding logic is different from previous fam15 
+;; processors. Fetching is done every two cycles rather than every cycle 
+;; and two decode units are available. The decode units therefore decode
+;; four instructions in two cycles.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+;;
+;; bdver3 and bdver4 belong to fam15 processors. We use the same insn 
+;; attribute that was used for bdver1 decoding scheme.
+
+(define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu")
+
+(define_cpu_unit "bdver3-decode0" "bdver3")
+(define_cpu_unit "bdver3-decode1" "bdver3")
+(define_cpu_unit "bdver3-decode2" "bdver3")
+(define_cpu_unit "bdver3-decode3" "bdver3")
+
+;; Double decoded instructions take two cycles whereas
+;; direct instructions take one cycle.
+;; Vectorpath instructions are single issue instructions.
+;; So, we engage all units vector instructions.
+(define_reservation "bdver3-vector" "bdver3-decode0+bdver3-decode1+bdver3-decode2+bdver3-decode3")
+
+;; Direct instructions can be issued to any of the four decoders
+(define_reservation "bdver3-direct" "(bdver3-decode0|bdver3-decode1|bdver3-decode2|bdver3-decode3)")
+
+;; Double instructions take two cycles to decode.
+(define_reservation "bdver3-double" "(bdver3-decode0,bdver3-decode0)|
+               (bdver3-decode1,bdver3-decode1)| (bdver3-decode2,bdver3-decode2)|
+               (bdver3-decode3,bdver3-decode3)")
+
+(define_cpu_unit "bdver3-ieu0" "bdver3_ieu")
+(define_cpu_unit "bdver3-ieu1" "bdver3_ieu")
+(define_reservation "bdver3-ieu" "(bdver3-ieu0|bdver3-ieu1)")
+
+(define_cpu_unit "bdver3-agu0" "bdver3_agu")
+(define_cpu_unit "bdver3-agu1" "bdver3_agu")
+(define_reservation "bdver3-agu" "(bdver3-agu0|bdver3-agu1)")
+
+(define_cpu_unit "bdver3-load0" "bdver3_load")
+(define_cpu_unit "bdver3-load1" "bdver3_load")
+(define_reservation "bdver3-load" "bdver3-agu,
+				   (bdver3-load0|bdver3-load1),nothing")
+;; 128bit SSE instructions issue two loads at once.
+(define_reservation "bdver3-load2" "bdver3-agu,
+				   (bdver3-load0+bdver3-load1),nothing")
+
+(define_reservation "bdver3-store" "(bdver3-load0 | bdver3-load1)")
+;; 128bit SSE instructions issue two stores at once.
+(define_reservation "bdver3-store2" "(bdver3-load0+bdver3-load1)")
+
+;; vectorpath (microcoded) instructions are single issue instructions.
+;; So, they occupy all the integer units.
+(define_reservation "bdver3-ivector" "bdver3-ieu0+bdver3-ieu1+
+                                      bdver3-agu0+bdver3-agu1+
+                                      bdver3-load0+bdver3-load1")
+
+(define_reservation "bdver3-fpsched" "nothing,nothing,nothing")
+
+;; The floating point loads.
+(define_reservation "bdver3-fpload" "(bdver3-fpsched + bdver3-load)")
+(define_reservation "bdver3-fpload2" "(bdver3-fpsched + bdver3-load2)")
+
+;; Three FP units.
+(define_cpu_unit "bdver3-ffma0" "bdver3_fp")
+(define_cpu_unit "bdver3-ffma1" "bdver3_fp")
+(define_cpu_unit "bdver3-fpsto" "bdver3_fp")
+
+(define_reservation "bdver3-fvector" "bdver3-ffma0+bdver3-ffma1+
+                                      bdver3-fpsto+bdver3-load0+
+                                      bdver3-load1")
+
+(define_reservation "bdver3-ffma"     "(bdver3-ffma0 | bdver3-ffma1)")
+(define_reservation "bdver3-fcvt"     "bdver3-ffma0")
+(define_reservation "bdver3-fmma"     "bdver3-ffma0")
+(define_reservation "bdver3-fxbar"    "bdver3-ffma1")
+(define_reservation "bdver3-fmal"     "(bdver3-ffma0 | bdver3-fpsto)")
+(define_reservation "bdver3-fsto"     "bdver3-fpsto")
+(define_reservation "bdver3-fpshuf"    "bdver3-fpsto")
+
+;; Jump instructions are executed in the branch unit completely transparent to us.
+(define_insn_reservation "bdver3_call" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "call,callv"))
+			 "bdver3-double,(bdver3-agu | bdver3-ieu),nothing")
+;; PUSH mem is double path.
+(define_insn_reservation "bdver3_push" 1
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "push"))
+			 "bdver3-direct,bdver3-ieu,bdver3-store")
+;; POP r16/mem are double path.
+(define_insn_reservation "bdver3_pop" 1
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (eq_attr "type" "pop"))
+                         "bdver3-direct,bdver3-ivector")
+;; LEAVE no latency info so far, assume same with amdfam10.
+(define_insn_reservation "bdver3_leave" 3
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (eq_attr "type" "leave"))
+                         "bdver3-vector,bdver3-ivector")
+;; LEA executes in AGU unit with 1 cycle latency on BDVER3.
+(define_insn_reservation "bdver3_lea" 1
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "lea"))
+			 "bdver3-direct,bdver3-ieu")
+;; MUL executes in special multiplier unit attached to IEU1.
+(define_insn_reservation "bdver3_imul_DI" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver3-direct,bdver3-ieu1")
+(define_insn_reservation "bdver3_imul" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "bdver3-direct,bdver3-ieu1")
+(define_insn_reservation "bdver3_imul_mem_DI" 10
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+			 "bdver3-direct,bdver3-load,bdver3-ieu1")
+(define_insn_reservation "bdver3_imul_mem" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "bdver3-direct,bdver3-load,bdver3-ieu1")
+
+(define_insn_reservation "bdver3_str" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "bdver3-vector,bdver3-load,bdver3-ivector")
+
+;; Integer instructions.
+(define_insn_reservation "bdver3_idirect" 1
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver3-direct,(bdver3-ieu|bdver3-agu)")
+(define_insn_reservation "bdver3_ivector" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver3-vector,bdver3-ivector")
+(define_insn_reservation "bdver3_idirect_loadmov" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-load")
+(define_insn_reservation "bdver3_idirect_load" 5
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-load,bdver3-ieu")
+(define_insn_reservation "bdver3_idirect_movstore" 5
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "bdver3-direct,bdver3-ieu,bdver3-store")
+(define_insn_reservation "bdver3_idirect_both" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "bdver3-direct,bdver3-load,
+			  bdver3-ieu,bdver3-store,
+			  bdver3-store")
+(define_insn_reservation "bdver3_idirect_store" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "bdver3-direct,(bdver3-ieu+bdver3-agu),
+			  bdver3-store")
+;; BDVER3 floating point units.
+(define_insn_reservation "bdver3_fldxf" 13
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "bdver3-vector,bdver3-fpload2,bdver3-fvector*9")
+(define_insn_reservation "bdver3_fld" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_fstxf" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))")
+(define_insn_reservation "bdver3_fst" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
+(define_insn_reservation "bdver3_fist" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
+(define_insn_reservation "bdver3_fmov_bdver3" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fmov"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_fadd_load" 10
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_fadd" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fop"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_fmul_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "bdver3-double,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_fmul" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fmul"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_fsgn" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fsgn"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_fdiv_load" 42
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_fdiv" 42
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fdiv"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_fpspc_load" 143
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
+(define_insn_reservation "bdver3_fcmov_load" 17
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
+(define_insn_reservation "bdver3_fcmov" 15
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fcmov"))
+			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
+(define_insn_reservation "bdver3_fcomi_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "bdver1_decode" "double")
+					(eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
+(define_insn_reservation "bdver3_fcomi" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "bdver1_decode" "double")
+				   (eq_attr "type" "fcmp")))
+			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
+(define_insn_reservation "bdver3_fcom_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_fcom" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fcmp"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_fxch" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "fxch"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+
+;; SSE loads.
+(define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+					(and (eq_attr "movu" "1")
+					     (and (eq_attr "mode" "V4SF,V2DF")
+						  (eq_attr "memory" "load"))))))
+			 "bdver3-direct,bdver3-fpload")
+(define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "movu" "1")
+				        (and (eq_attr "mode" "V8SF,V4DF")
+				             (eq_attr "memory" "load")))))
+			 "bdver3-double,bdver3-fpload")
+(define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "movu" "1")
+				        (and (eq_attr "mode" "V4SF,V2DF")
+				             (eq_attr "memory" "load")))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
+(define_insn_reservation "bdver3_ssevector_avx128_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+				        (and (eq_attr "mode" "V4SF,V2DF,TI")
+				             (eq_attr "memory" "load")))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
+(define_insn_reservation "bdver3_ssevector_avx256_load" 5
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+				        (eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,bdver3-fmal")
+(define_insn_reservation "bdver3_ssevector_sse128_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+				        (eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload")
+(define_insn_reservation "bdver3_ssescalar_movq_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "DI")
+				        (eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
+(define_insn_reservation "bdver3_ssescalar_vmovss_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+				        (and (eq_attr "mode" "SF")
+				             (eq_attr "memory" "load")))))
+			 "bdver3-direct,bdver3-fpload")
+(define_insn_reservation "bdver3_ssescalar_sse128_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DF")
+				        (eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload, bdver3-ffma")
+(define_insn_reservation "bdver3_mmxsse_load" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload, bdver3-fmal")
+
+;; SSE stores.
+(define_insn_reservation "bdver3_sse_store_avx256" 5
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+					(eq_attr "memory" "store,both"))))
+			 "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
+(define_insn_reservation "bdver3_sse_store" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
+(define_insn_reservation "bdver3_mmxsse_store_short" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)")
+
+;; Register moves.
+(define_insn_reservation "bdver3_ssevector_avx256" 3
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+					(eq_attr "memory" "none"))))
+			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
+(define_insn_reservation "bdver3_movss_movsd" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DF")
+                                        (eq_attr "memory" "none"))))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_mmxssemov" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "none")))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
+;; SSE logs.
+(define_insn_reservation "bdver3_sselog_load_256" 7
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (and (eq_attr "mode" "V8SF")
+				   (eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,bdver3-fmal")
+(define_insn_reservation "bdver3_sselog_256" 3
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sselog,sselog1")
+                                   (eq_attr "mode" "V8SF")))
+			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
+(define_insn_reservation "bdver3_sselog_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-fxbar")
+(define_insn_reservation "bdver3_sselog" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "sselog,sselog1"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
+
+;; SSE Shuffles
+(define_insn_reservation "bdver3_sseshuf_load_256" 7
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (and (eq_attr "type" "sseshuf,sseshuf1")
+                                   (and (eq_attr "mode" "V8SF")
+                                   (eq_attr "memory" "load"))))
+                         "bdver3-double,bdver3-fpload,bdver3-fpshuf")
+(define_insn_reservation "bdver3_sseshuf_load" 6
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (and (eq_attr "type" "sseshuf,sseshuf1")
+                                   (eq_attr "memory" "load")))
+                         "bdver3-direct,bdver3-fpload,bdver3-fpshuf")
+
+(define_insn_reservation "bdver3_sseshuf_256" 3
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (and (eq_attr "type" "sseshuf")
+                                   (eq_attr "mode" "V8SF")))
+                         "bdver3-double,bdver3-fpsched,bdver3-fpshuf")
+(define_insn_reservation "bdver3_sseshuf" 2
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (eq_attr "type" "sseshuf,sseshuf1"))
+                         "bdver3-direct,bdver3-fpsched,bdver3-fpshuf")
+
+;; PCMP actually executes in FMAL.
+(define_insn_reservation "bdver3_ssecmp_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_ssecmp" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "ssecmp"))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_ssecomi_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
+(define_insn_reservation "bdver3_ssecomi" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (eq_attr "type" "ssecomi"))
+			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately.
+
+;; 256 bit conversion.
+(define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+					(ior (ior (match_operand:V4DF 0 "register_operand")
+					          (ior (match_operand:V8SF 0 "register_operand")
+						       (match_operand:V8SI 0 "register_operand")))
+					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
+						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
+						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
+			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
+(define_insn_reservation "bdver3_vcvtX2Y_avx256" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+					(ior (ior (match_operand:V4DF 0 "register_operand")
+					          (ior (match_operand:V8SF 0 "register_operand")
+						       (match_operand:V8SI 0 "register_operand")))
+					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
+						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
+						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
+			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
+;; CVTSS2SD, CVTSD2SS.
+(define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
+(define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "none"))))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
+;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
+(define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
+(define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "none"))))
+			 "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)")
+;; CVTPD2PS.
+(define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (match_operand:V2DF 1 "nonimmediate_operand")))))
+			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
+(define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (match_operand:V2DF 1 "nonimmediate_operand")))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
+;; CVTPI2PS, CVTDQ2PS.
+(define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
+(define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
+;; CVTDQ2PD.
+(define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (match_operand:V4SI 1 "nonimmediate_operand")))))
+			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
+(define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (match_operand:V4SI 1 "nonimmediate_operand")))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
+;; CVTPS2PD, CVTPI2PD.
+(define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
+			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
+(define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
+;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
+(define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SI,DI")
+					(eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)")
+(define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SI,DI")
+					(eq_attr "memory" "none"))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)")
+;; CVTPD2PI, CVTTPD2PI.
+(define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V2SI 0 "register_operand")))))
+			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
+(define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V2SI 0 "register_operand")))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
+;; CVTPD2DQ, CVTTPD2DQ.
+(define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V4SI 0 "register_operand")))))
+			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
+(define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V4SI 0 "register_operand")))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
+;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
+(define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+                                   (and (eq_attr "memory" "load")
+				        (and (match_operand:V4SF 1 "nonimmediate_operand")
+				             (ior (match_operand: V2SI 0 "register_operand")
+						  (match_operand: V4SI 0 "register_operand"))))))
+			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
+(define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V4SF 1 "nonimmediate_operand")
+				             (ior (match_operand: V2SI 0 "register_operand")
+						  (match_operand: V4SI 0 "register_operand"))))))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
+
+;; SSE MUL, ADD, and MULADD.
+(define_insn_reservation "bdver3_ssemuladd_load_256" 11
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (and (eq_attr "mode" "V8SF,V4DF")
+					(eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_ssemuladd_256" 7
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (and (eq_attr "mode" "V8SF,V4DF")
+					(eq_attr "memory" "none"))))
+			 "bdver3-double,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_ssemuladd_load" 10
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
+(define_insn_reservation "bdver3_ssemuladd" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
+				   (eq_attr "memory" "none")))
+			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
+(define_insn_reservation "bdver3_sseimul_load" 8
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseimul")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-fmma")
+(define_insn_reservation "bdver3_sseimul" 4
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseimul")
+				   (eq_attr "memory" "none")))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fmma")
+(define_insn_reservation "bdver3_sseiadd_load" 6
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseiadd")
+				   (eq_attr "memory" "load")))
+			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
+(define_insn_reservation "bdver3_sseiadd" 2
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "sseiadd")
+				   (eq_attr "memory" "none")))
+			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
+
+;; SSE DIV: no throughput information (assume same as amdfam10).
+(define_insn_reservation "bdver3_ssediv_double_load_256" 27
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V4DF")
+				        (eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_double_256" 27
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V4DF")
+				        (eq_attr "memory" "none"))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_single_load_256" 27
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V8SF")
+				        (eq_attr "memory" "load"))))
+			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_single_256" 24
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V8SF")
+				        (eq_attr "memory" "none"))))
+			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_double_load" 27
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_double" 27
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "memory" "none"))))
+			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_single_load" 27 
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "memory" "load"))))
+			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+(define_insn_reservation "bdver3_ssediv_single" 24
+			 (and (eq_attr "cpu" "bdver3,bdver4")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "memory" "none"))))
+			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
+
+(define_insn_reservation "bdver3_sseins" 3
+                         (and (eq_attr "cpu" "bdver3,bdver4")
+                              (and (eq_attr "type" "sseins")
+                                   (eq_attr "mode" "TI")))
+                         "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
+
diff --git a/gcc-4.9/gcc/config/i386/biarch64.h b/gcc-4.9/gcc/config/i386/biarch64.h
new file mode 100644
index 000000000..c2c816401
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/biarch64.h
@@ -0,0 +1,29 @@
+/* Make configure files to produce biarch compiler defaulting to 64bit mode.
+   This file must be included very first, while the OS specific file later
+   to overwrite otherwise wrong defaults. 
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_64BIT_DEFAULT (OPTION_MASK_ISA_64BIT | OPTION_MASK_ABI_64)
+#define TARGET_BI_ARCH 1
diff --git a/gcc-4.9/gcc/config/i386/biarchx32.h b/gcc-4.9/gcc/config/i386/biarchx32.h
new file mode 100644
index 000000000..941b93b3d
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/biarchx32.h
@@ -0,0 +1,28 @@
+/* Make configure files to produce biarch compiler defaulting to x32 mode.
+   This file must be included very first, while the OS specific file later
+   to overwrite otherwise wrong defaults.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_64BIT_DEFAULT (OPTION_MASK_ISA_64BIT | OPTION_MASK_ABI_X32)
+#define TARGET_BI_ARCH 2
diff --git a/gcc-4.9/gcc/config/i386/bmi2intrin.h b/gcc-4.9/gcc/config/i386/bmi2intrin.h
new file mode 100644
index 000000000..ff962962e
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/bmi2intrin.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _BMI2INTRIN_H_INCLUDED
+#define _BMI2INTRIN_H_INCLUDED
+
+#ifndef __BMI2__
+#pragma GCC push_options
+#pragma GCC target("bmi2")
+#define __DISABLE_BMI2__
+#endif /* __BMI2__ */
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bzhi_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pdep_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pext_si (__X, __Y);
+}
+
+#ifdef  __x86_64__
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bzhi_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pdep_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pext_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mulx_u64 (unsigned long long __X, unsigned long long __Y,
+	   unsigned long long *__P)
+{
+  unsigned __int128 __res = (unsigned __int128) __X * __Y;
+  *__P = (unsigned long long) (__res >> 64);
+  return (unsigned long long) __res;
+}
+
+#else /* !__x86_64__ */
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
+{
+  unsigned long long __res = (unsigned long long) __X * __Y;
+  *__P = (unsigned int) (__res >> 32);
+  return (unsigned int) __res;
+}
+
+#endif /* !__x86_64__  */
+
+#ifdef __DISABLE_BMI2__
+#undef __DISABLE_BMI2__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI2__ */
+
+#endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/bmiintrin.h b/gcc-4.9/gcc/config/i386/bmiintrin.h
new file mode 100644
index 000000000..b86adf179
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/bmiintrin.h
@@ -0,0 +1,138 @@
+/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _BMIINTRIN_H_INCLUDED
+#define _BMIINTRIN_H_INCLUDED
+
+#ifndef __BMI__
+#pragma GCC push_options
+#pragma GCC target("bmi")
+#define __DISABLE_BMI__
+#endif /* __BMI__ */
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ctzs (__X);
+}
+
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u32 (unsigned int __X, unsigned int __Y)
+{
+  return ~__X & __Y;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bextr_u32 (__X, __Y);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
+{
+  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u32 (unsigned int __X)
+{
+  return __X & -__X;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u32 (unsigned int __X)
+{
+  return __X ^ (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u32 (unsigned int __X)
+{
+  return __X & (__X - 1);
+}
+
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u32 (unsigned int __X)
+{
+  return __builtin_ctz (__X);
+}
+
+
+#ifdef  __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return ~__X & __Y;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bextr_u64 (__X, __Y);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
+{
+  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u64 (unsigned long long __X)
+{
+  return __X & -__X;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u64 (unsigned long long __X)
+{
+  return __X ^ (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u64 (unsigned long long __X)
+{
+  return __X & (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_ctzll (__X);
+}
+
+#endif /* __x86_64__  */
+
+#ifdef __DISABLE_BMI__
+#undef __DISABLE_BMI__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI__ */
+
+#endif /* _BMIINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/bmmintrin.h b/gcc-4.9/gcc/config/i386/bmmintrin.h
new file mode 100644
index 000000000..24cf26e85
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/bmmintrin.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _BMMINTRIN_H_INCLUDED
+#define _BMMINTRIN_H_INCLUDED
+
+# error "SSE5 instruction set removed from compiler"
+
+#endif /* _BMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/bsd.h b/gcc-4.9/gcc/config/i386/bsd.h
new file mode 100644
index 000000000..54715a853
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/bsd.h
@@ -0,0 +1,99 @@
+/* Definitions for BSD assembler syntax for Intel 386
+   (actually AT&T syntax for insns and operands,
+   adapted to BSD conventions for symbol names and debugging.)
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use the Sequent Symmetry assembler syntax.  */
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+/* Prefix for internally generated assembler labels.  If we aren't using
+   underscores, we are using prefix `.'s to identify labels that should
+   be ignored, as in `i386/gas.h' --karl@cs.umb.edu  */
+
+#define LPREFIX "L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"  /* Should not be used for 32bit compilation.  */
+
+/* This was suggested, but it shouldn't be right for DBX output. -- RMS
+   #define ASM_OUTPUT_SOURCE_FILENAME(FILE, NAME) */
+
+
+/* Define the syntax of labels and symbol definitions/declarations.  */
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* Define the syntax of labels and symbol definitions/declarations.  */
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#ifdef HAVE_GAS_LCOMM_WITH_ALIGNMENT
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGNMENT)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (int)(ALIGNMENT) / BITS_PER_UNIT))
+#endif
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* This is how to store into the string BUF
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER)	\
+    sprintf ((BUF), "*%s%ld", (PREFIX), (long)(NUMBER))
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* Sequent has some changes in the format of DBX symbols.  */
+#define DBX_NO_XREFS 1
+
+/* Don't split DBX symbols into continuations.  */
+#define DBX_CONTIN_LENGTH 0
diff --git a/gcc-4.9/gcc/config/i386/btver2.md b/gcc-4.9/gcc/config/i386/btver2.md
new file mode 100644
index 000000000..06a97cb95
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/btver2.md
@@ -0,0 +1,1391 @@
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; AMD btver2 scheduling
+
+;; Instructions decoded are that are classifed as direct (fast path single),
+;; double (fast path double) and vector instructions.
+;; Direct instrucions are decoded and convereted into 1 cop
+;; Double instrucions are decoded and converetd into 2 cops
+;; Vector instrucions are microcoded and they generated converted to 
+;; 3 or more cops.
+
+(define_attr "btver2_decode" "direct,vector,double"
+  (const_string "direct"))
+
+(define_attr "btver2_sse_attr" "other,rcp,sqrt,maxmin"
+  (const_string "other"))
+
+(define_automaton "btver2,btver2_int,btver2_agu,btver2_fp")
+
+;; Decoder decodes up to two insns (2 fastpath singles) or 
+;;(2 fastpath doubles) or combination of both at a cycle.
+;; In case of vector (microded) instruction decoder decodes only one insn
+;; at a cycle .To model that we have 2 "decoder" units.
+
+(define_cpu_unit "btver2-decode0" "btver2")
+(define_cpu_unit "btver2-decode1" "btver2")
+
+;; "me" unit converts the decoded insn into cops.
+;; It can generate upto 2 cops from two fast path singles in cycle x+1, 
+;; to model we have two "mes". In case of fast path double it converts
+;; them to 2 cops in cycle x+1. Vector instructions are modelled to block
+;; all decoder units. 
+
+(define_cpu_unit "me0" "btver2")
+(define_cpu_unit "me1" "btver2")
+
+(define_reservation "btver2-direct" "(btver2-decode0|btver2-decode1),(me0|me1)")
+
+(define_reservation "btver2-double" "(btver2-decode0|btver2-decode1),(me0+me1)")
+
+(define_reservation "btver2-vector" "(btver2-decode0+btver2-decode1),(me0+me1)")
+
+;; Integer operations 
+;; There are 2 ALU pipes 
+
+(define_cpu_unit "btver2-ieu0" "btver2_int")
+(define_cpu_unit "btver2-ieu1" "btver2_int")
+
+;; There are 2 AGU pipes one for load and one for store.
+
+(define_cpu_unit "btver2-load"  "btver2_agu")
+(define_cpu_unit "btver2-store" "btver2_agu")
+
+;; ALU operations can take place in ALU pipe0 or pipe1. 
+(define_reservation "btver2-alu" "(btver2-ieu0|btver2-ieu1)")
+
+;; MUL and DIV operations can take place in to ALU pipe1.
+(define_reservation "btver2-mul" "btver2-ieu1")
+(define_reservation "btver2-div" "btver2-ieu1")
+
+;; vectorpath (microcoded) instructions are single issue instructions.
+;; So, they occupy all the integer units.
+(define_reservation "btver2-ivector" "btver2-ieu0+btver2-ieu1+
+                                      btver2-load+btver2-store")
+
+;;Floating point pipes.
+(define_cpu_unit "btver2-fp0" "btver2_fp")
+(define_cpu_unit "btver2-fp1" "btver2_fp")
+
+(define_reservation "btver2-fpa" "btver2-fp0")
+(define_reservation "btver2-vimul" "btver2-fp0")
+(define_reservation "btver2-valu" "btver2-fp0|btver2-fp1")
+(define_reservation "btver2-stc" "btver2-fp1")
+(define_reservation "btver2-fpm" "btver2-fp1")
+
+;; vectorpath (microcoded) instructions are single issue instructions.
+;; So, they occupy all the fp units.
+(define_reservation "btver2-fvector" "btver2-fp0+btver2-fp1+
+                                      btver2-load+btver2-store")
+
+;; Call instruction
+(define_insn_reservation "btver2_call" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "call,callv"))
+			 "btver2-double,btver2-load")
+
+;; General instructions
+;;
+
+(define_insn_reservation "btver2_push_mem" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "push")))
+			 "btver2-direct,btver2-load,btver2-alu")
+
+(define_insn_reservation "btver2_push" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "push"))
+			 "btver2-direct,btver2-alu")
+
+(define_insn_reservation "btver2_pop_mem" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "pop")))
+			 "btver2-direct,btver2-load,btver2-alu")
+
+(define_insn_reservation "btver2_pop" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "pop"))
+			 "btver2-direct,btver2-alu")
+
+(define_insn_reservation "btver2_leave" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "leave"))
+			 "btver2-double,btver2-alu")
+
+(define_insn_reservation "btver2_lea" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "lea"))
+			 "btver2-direct,btver2-alu")
+
+;; Integer  
+(define_insn_reservation "btver2_imul_DI" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "btver2-direct,btver2-mul*4")
+
+(define_insn_reservation "btver2_imul" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "btver2-direct,btver2-mul")
+
+(define_insn_reservation "btver2_imul_mem_DI" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+			 "btver2-direct,btver2-load,btver2-mul*4")
+
+(define_insn_reservation "btver2_imul_mem" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "btver2-direct,btver2-load,btver2-mul")
+
+(define_insn_reservation "btver2_idiv_DI" 41
+			    (and (eq_attr "cpu" "btver2")
+				 (and (eq_attr "type" "idiv")
+				      (and (eq_attr "mode" "DI")
+					   (eq_attr "memory" "none,unknown"))))
+			 "btver2-double,btver2-div")
+
+(define_insn_reservation "btver2_idiv_mem_DI" 44
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load"))))
+			 "btver2-double,btver2-load,btver2-div")
+
+(define_insn_reservation "btver2_idiv_SI" 25
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "memory" "none,unknown"))))
+			 "btver2-double,btver2-div*25")
+
+(define_insn_reservation "btver2_idiv_mem_SI" 28
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "memory" "load"))))
+			 "btver2-double,btver2-load,btver2-div*25")
+
+(define_insn_reservation "btver2_idiv_HI" 17
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "memory" "none,unknown"))))
+			 "btver2-double,btver2-div*17")
+
+(define_insn_reservation "btver2_idiv_mem_HI" 20
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "memory" "load"))))
+			 "btver2-double,btver2-load,btver2-div*17")
+
+(define_insn_reservation "btver2_idiv_QI" 12
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "memory" "none,unknown"))))
+			 "btver2-direct,btver2-div*12")
+
+(define_insn_reservation "btver2_idiv_mem_QI" 15
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "idiv")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "memory" "load"))))
+			 "btver2-direct,btver2-load,btver2-div*12")
+
+(define_insn_reservation "btver2_str" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "btver2-vector,btver2-ivector")
+
+(define_insn_reservation "btver2_idirect_loadmov" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "btver2-direct,btver2-load,btver2-alu")
+
+(define_insn_reservation "btver2_idirect_load" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "btver2-direct,btver2-load,btver2-alu")
+
+(define_insn_reservation "btver2_idirect_movstore" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "btver2-direct,btver2-alu,btver2-store")
+
+(define_insn_reservation "btver2_idirect_both" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "btver2-direct,btver2-load,btver2-alu,btver2-store")
+
+(define_insn_reservation "btver2_idirect_store" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "btver2-direct,btver2-alu,btver2-store")
+
+;; Other integer instrucions 
+(define_insn_reservation "btver2_idirect" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "btver2_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "btver2-direct,btver2-alu")
+
+;; Floating point instructions 
+(define_insn_reservation "btver2_fldxf" 19
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "btver2-vector,btver2-load,btver2-fvector*5")
+
+(define_insn_reservation "btver2_fld" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "btver2-direct,btver2-load,(btver2-fp0|btver2-fp1)")
+
+(define_insn_reservation "btver2_fstxf" 24
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "both")
+					(eq_attr "mode" "XF"))))
+			 "btver2-vector,btver2-fvector*9,btver2-store")
+
+(define_insn_reservation "btver2_fst" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "btver2-direct,btver2-fp1,btver2-store")
+
+(define_insn_reservation "btver2_fist" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "btver2-direct,btver2-load,btver2-fp1")
+
+(define_insn_reservation "btver2_fmov" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fmov"))
+			 "btver2-direct,(btver2-fp0|btver2-fp1)")
+
+(define_insn_reservation "btver2_fadd_load" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "btver2-direct,btver2-load,btver2-fp0")
+
+(define_insn_reservation "btver2_fadd" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fop"))
+			 "btver2-direct,btver2-fp0")
+
+(define_insn_reservation "btver2_fmul_load" 10
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "btver2-direct,btver2-load,btver2-fp1*3")
+
+(define_insn_reservation "btver2_fmul" 5
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fmul"))
+			 "btver2-direct,(btver2-fp1*3)")
+
+(define_insn_reservation "btver2_fsgn" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fsgn"))
+			 "btver2-direct,btver2-fp1*2")
+
+(define_insn_reservation "btver2_fdiv_load" 24
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "btver2-direct,btver2-load,btver2-fp1*19")
+
+(define_insn_reservation "btver2_fdiv" 19
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fdiv"))
+			 "btver2-direct,btver2-fp1*19")
+
+(define_insn_reservation "btver2_fcmov_load" 12
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "btver2-vector,btver2-load,(btver2-fp0|btver2-fp1)*7")
+
+(define_insn_reservation "btver2_fcmov" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fcmov"))
+			 "btver2-vector,(btver2-fp0|btver2-fp1)*7")
+
+(define_insn_reservation "btver2_fcomi_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "bdver1_decode" "double")
+					(eq_attr "memory" "load"))))
+			 "btver2-direct,btver2-load,btver2-fp0*2")
+
+(define_insn_reservation "btver2_fcomi" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "bdver1_decode" "double")
+				   (eq_attr "type" "fcmp")))
+			 "btver2-direct, btver2-fp0*2")
+
+(define_insn_reservation "btver2_fcom_load" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "btver2-direct,btver2-load,btver2-fp0")
+
+(define_insn_reservation "btver2_fcom" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fcmp"))
+			  "btver2-direct,btver2-fp0")
+
+(define_insn_reservation "btver2_fxch" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (eq_attr "type" "fxch"))
+			 "btver2-direct,btver2-fp1")
+
+;; SSE AVX maxmin,rcp,sqrt
+(define_insn_reservation "btver2_sse_maxmin" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4DF,V2DF,V4SF,SF,DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "maxmin")
+					     (eq_attr "type" "sse,sseadd")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_sse_maxmin_mem" 7 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4DF,V2DF,V4SF,SF,DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "maxmin")
+					     (eq_attr "type" "sse,sseadd")))))
+			 "btver2-direct,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_sse_rcp" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF,SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "rcp")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-fpm")
+
+(define_insn_reservation "btver2_sse_rcp_mem" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF,SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "rcp")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-load,btver2-fpm")
+
+(define_insn_reservation "btver2_avx_rcp" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "rcp")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-fpm*2")
+
+(define_insn_reservation "btver2_avx_rcp_mem" 7 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "rcp")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-load,btver2-fpm*2")
+
+(define_insn_reservation "btver2_sse_sqrt_v4sf" 21
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-fpm*21")
+
+(define_insn_reservation "btver2_sse_sqrt_v4sf_mem" 26
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-load,btver2-fpm*21")
+
+(define_insn_reservation "btver2_sse_sqrt_v4df" 54
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-fpm*54")
+
+(define_insn_reservation "btver2_sse_sqrt_v4df_mem" 59
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-load,btver2-fpm*54")
+
+(define_insn_reservation "btver2_sse_sqrt_sf" 16
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-fpm*16")
+
+(define_insn_reservation "btver2_sse_sqrt_sf_mem" 21
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-load,btver2-fpm*16")
+
+(define_insn_reservation "btver2_sse_sqrt_df" 27
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-fpm*27")
+
+(define_insn_reservation "btver2_sse_sqrt_df_mem" 32
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-direct,btver2-load,btver2-fpm*27")
+
+(define_insn_reservation "btver2_sse_sqrt_v8sf" 42
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-fpm*42")
+
+(define_insn_reservation "btver2_sse_sqrt_v8sf_mem" 42
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_sse_attr" "sqrt")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-load,btver2-fpm*42")
+
+;; Bitmanipulation instrucions BMI LZCNT POPCNT 
+(define_insn_reservation "btver2_bmi_reg_direct"   1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "btver2_decode" "direct")
+				   (and (eq_attr "memory" "none")
+					(eq_attr "type" "bitmanip"))))
+			 "btver2-direct,btver2-alu")
+
+(define_insn_reservation "btver2_bmi_mem_direct" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "btver2_decode" "direct")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "bitmanip"))))
+			 "btver2-direct,btver2-load,btver2-alu")
+
+(define_insn_reservation "btver2_bmi_reg_double"  2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "btver2_decode" "double")
+				   (and (eq_attr "memory" "none")
+					(eq_attr "type" "bitmanip,alu1"))))
+			 "btver2-double,btver2-alu")
+
+(define_insn_reservation "btver2_bmi_double_store"  5
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "btver2_decode" "double")
+					(eq_attr "type" "bitmanip,alu1"))))
+			 "btver2-double,btver2-alu,btver2-store")
+
+(define_insn_reservation "btver2_bmi_double_load" 4 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "btver2_decode" "double")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "bitmanip,alu1"))))
+			 "btver2-double,btver2-load,btver2-alu")
+
+;; F16C converts
+(define_insn_reservation "btver2_ssecvt_load_direct" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-direct,btver2-load,btver2-stc")
+
+(define_insn_reservation "btver2_ssecvt_store_direct" 8
+			 (and (eq_attr "cpu" "btver2")
+			     (and (eq_attr "mode" "V8SF,V4SF")
+				  (and (eq_attr "memory" "store")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-direct,btver2-stc,btver2-store")
+
+(define_insn_reservation "btver2_ssecvt_reg_direct" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4SF")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "ssecvt"))))
+			 "btver2-direct,btver2-stc")
+
+(define_insn_reservation "btver2_ssecvt_load_double" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4SF")
+				  (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "double")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-double,btver2-load,btver2-stc*2")
+
+(define_insn_reservation "btver2_ssecvt_reg_double" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4SF")
+				   (and (eq_attr "btver2_decode" "double")
+					(eq_attr "type" "ssecvt"))))
+			 "btver2-double,btver2-stc*2")
+
+(define_insn_reservation "btver2_ssecvt_store_vector" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4SF")
+				   (and (eq_attr "memory" "store")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-vector,btver2-stc,(btver2-fpa|btver2-fpm),btver2-store")
+
+(define_insn_reservation "btver2_ssecvt_reg_vector" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4SF")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "ssecvt"))))
+			 "btver2-vector,btver2-stc,(btver2-fpa|btver2-fpm)")
+
+;; avx256 adds
+(define_insn_reservation "btver2_avx_add_load_256" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "sseadd,sseadd1"))))
+			 "btver2-double,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_avx_add_reg_256" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "sseadd,sseadd1"))))
+			 "btver2-double,btver2-fpa")
+
+;; avx256 logs 
+(define_insn_reservation "btver2_avx_load_log" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "sselog,sselog1")))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_avx_reg_log" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "sselog,sselog1")))))
+			 "btver2-double,(btver2-fpa|btver2-fpm)")
+
+;; avx256 sse
+
+(define_insn_reservation "btver2_avx_load_sse" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_avx_reg_sse" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "sse")))))
+			 "btver2-double,(btver2-fpa|btver2-fpm)")
+
+;; avx256 moves
+(define_insn_reservation "btver2_avx_load_int_mov" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "OI")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "ssemov"))))
+			 "btver2-double,btver2-load,btver2-valu")
+
+(define_insn_reservation "btver2_avx_store_int_mov" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "OI")
+				   (and (eq_attr "memory" "store")
+					(eq_attr "type" "ssemov"))))
+			 "btver2-double,btver2-valu,btver2-store")
+
+(define_insn_reservation "btver2_avx_int_mov" 1 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "OI")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "ssemov"))))
+			 "btver2-double,btver2-valu")
+
+(define_insn_reservation "btver2_avx_load_from_vectors" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4DF")
+				   (and (ior ( match_operand:V4SF 1 "memory_operand")
+					     ( match_operand:V2DF 1 "memory_operand"))
+					(and (eq_attr "memory" "load")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_avx_loads_from_scalar" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF,V4DF")
+				   (and (ior ( match_operand:SF 1 "memory_operand")
+					     ( match_operand:DF 1 "memory_operand"))
+					(and (eq_attr "memory" "load")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)*2")
+
+(define_insn_reservation "btver2_avx_store_move" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "store")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-double,(btver2-fpa|btver2-fpm),btver2-store")
+
+(define_insn_reservation "btver2_avx_load_move" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_avx_reg_move" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-double,(btver2-fpa|btver2-fpm)")
+;; avx256 cmps
+(define_insn_reservation "btver2_avx_load_cmp" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "ssecmp"))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)*2")
+
+(define_insn_reservation "btver2_avx_cmp" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "ssecmp"))))
+			 "btver2-double,(btver2-fpa|btver2-fpm)*2")
+
+;; ssecvts 256 
+(define_insn_reservation "btver2_ssecvt_256_load" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,OI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-double,btver2-load,btver2-stc*2")
+
+(define_insn_reservation "btver2_ssecvt_256" 3 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,OI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-double,btver2-stc*2")
+
+(define_insn_reservation "btver2_ssecvt_256_vector_load" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,OI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-vector,btver2-load,btver2-stc*2,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_ssecvt_256_vector" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,OI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-vector,btver2-stc*2,(btver2-fpa|btver2-fpm)")
+
+;; avx256 divides
+(define_insn_reservation "btver2_avx_load_div" 43
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssediv")))))
+			 "btver2-double,btver2-load,btver2-fpm*38")
+
+(define_insn_reservation "btver2_avx_div" 38
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF,V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssediv")))))
+			 "btver2-double,btver2-fpm*38")
+
+;; avx256  multiply
+
+(define_insn_reservation "btver2_avx_mul_load_pd" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemul")))))
+			"btver2-double,btver2-load,btver2-fpm*4")
+
+(define_insn_reservation "btver2_avx_mul_load_ps" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-double,btver2-load,btver2-fpm*2")
+
+
+(define_insn_reservation "btver2_avx_mul_256_pd" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-double,btver2-fpm*4")
+
+(define_insn_reservation "btver2_avx_mul_256_ps" 2	
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-double,btver2-fpm*2")
+
+(define_insn_reservation "btver2_avx_dpps_load_ps" 17
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-vector,btver2-fpm*6,btver2-fpa*6")
+
+(define_insn_reservation "btver2_avx_dpps_ps" 12
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V8SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-vector,btver2-fpm*6,btver2-fpa*6")
+
+;; AES/CLMUL
+
+(define_insn_reservation "btver2_aes_double" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (match_operand:V2DI 0 "register_operand")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "double")
+					     (eq_attr "type" "sselog1")))))
+			 "btver2-double,btver2-valu,btver2-vimul")
+
+(define_insn_reservation "btver2_aes_direct" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (match_operand:V2DI 0 "register_operand")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sselog1")))))
+			 "btver2-direct,btver2-vimul")
+
+;; AVX128 SSE4* SSSE3 SSE3* SSE2 SSE instructions 
+
+(define_insn_reservation "btver2_sseint_load_direct" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sse,ssecmp,sseiadd")))))
+			 "btver2-direct,btver2-load,btver2-valu")
+
+(define_insn_reservation "btver2_sseint_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sse,ssecmp,sseiadd")))))
+			 "btver2-direct,btver2-valu")
+
+(define_insn_reservation "btver2_sselog_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sse,sselog")))))
+			 "btver2-direct,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sselog_load_direct" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,V4SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sse,sselog")))))
+			 "btver2-direct,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_intext_reg_128" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SF,QI,SI,HI,SI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sselog")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_sse_mov_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-direct,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sse_mov_vector" 2 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-vector,(btver2-fpa|btver2-fpm)*2")
+
+(define_insn_reservation "btver2_ssecomi_load_128" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssecomi")))))
+			 "btver2-direct,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_ssecomi_reg_128" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "!vector")
+					     (eq_attr "type" "ssecomi")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_ssemul_load_v2df" 14
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-vector,btver2-load,btver2-fpm*2,btver2-fpa")
+
+(define_insn_reservation "btver2_ssemul_reg_v2df" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-vector,btver2-fpm*2,btver2-fpa")
+
+(define_insn_reservation "btver2_ssemul_load_v4sf" 16
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemul")))))
+			"btver2-vector,btver2-load,btver2-fpm*3,btver2-fpa*2")
+
+(define_insn_reservation "btver2_ssemul_reg_v4sf" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "vector")
+					     (eq_attr "type" "ssemul")))))
+			 "btver2-vector,btver2-fpm*3,btver2-fpa*2")
+
+(define_insn_reservation "btver2_sse_store_vectmov" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "ssemov"))))
+			"btver2-vector,btver2-valu*3,btver2-store")
+
+(define_insn_reservation "btver2_sse_load_vectmov" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "ssemov"))))
+			 "btver2-vector,btver2-load,btver2-valu*3")
+
+(define_insn_reservation "btver2_sse_vectmov" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "ssemov"))))
+			 "btver2-vector,btver2-valu*3")
+
+
+(define_insn_reservation "btver2_sseimul" 2 
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "sseimul"))))
+			 "btver2-direct,btver2-vimul")
+
+(define_insn_reservation "btver2_sseimul_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "sseimul"))))
+			 "btver2-direct,btver2-load,btver2-vimul")
+
+(define_insn_reservation "btver2_sseimul_load_vect" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "sseimul"))))
+			 "btver2-vector,btver2-load,btver2-vimul*2,btver2-valu")
+
+(define_insn_reservation "btver2_sseimul_vect" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "sseimul"))))
+			 "btver2-vector,btver2-vimul*2,btver2-valu")
+
+(define_insn_reservation "btver2_sseins" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "sseins")))
+			 "btver2-vector,btver2-valu*3")
+
+(define_insn_reservation "btver2_sseishft_load" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "sseishft"))))
+			 "btver2-direct,btver2-load,btver2-valu")
+
+(define_insn_reservation "btver2_sseishft_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "btver2_decode" "direct") 
+					(eq_attr "type" "sseishft"))))
+			 "btver2-direct,btver2-valu")
+
+(define_insn_reservation "btver2_sselog1_load" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode"  "!V8SF,!V4DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sselog1")))))
+			 "btver2-direct,btver2-load,btver2-valu")
+
+(define_insn_reservation "btver2_sselog1_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode"  "!V8SF,!V4DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sselog1")))))
+			 "btver2-direct,btver2-valu")
+
+(define_insn_reservation "btver2_sselog1_vector_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "sselog1"))))
+			 "btver2-vector,btver2-valu*2")
+
+(define_insn_reservation "btver2_sselog1_vector" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "btver2_decode" "vector")
+					(eq_attr "type" "sselog1"))))
+			 "btver2-vector,btver2-valu*2")
+
+(define_insn_reservation "btver2_sseadd_load" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF,V2DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sseadd,sseadd1")))))
+			 "btver2-direct,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_sseadd_reg" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V4SF,V2DF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sseadd,sseadd1")))))
+			 "btver2-direct,btver2-fpa")
+
+;;SSE2 SSEint SSEfp SSE
+
+(define_insn_reservation "btver2_sseint_to_scalar_move_with_load" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SI,DI")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-direct,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_sseint_to_scalar_move_with_store" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SI,DI")
+				   (and (eq_attr "memory" "store")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-direct,btver2-fpa,btver2-store")
+
+
+(define_insn_reservation "btver2_scalar_to_sseint_move_with_load" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (ior ( match_operand:SI 1 "memory_operand")
+					     ( match_operand:DI 1 "memory_operand"))
+					(eq_attr "type" "ssemov"))))
+			 "btver2-direct,btver2-load,btver2-stc,btver2-valu")
+
+(define_insn_reservation "btver2_sseint_to_scalar" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SI,DI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_scalar_to_sseint" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (ior ( match_operand:SI 1 "register_operand")
+					     ( match_operand:DI 1 "register_operand"))
+					(eq_attr "type" "ssemov"))))
+			    "btver2-direct,btver2-stc,btver2-valu")
+
+(define_insn_reservation "btver2_sse_int_load" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssemov,sselog,sseishft1")))))
+			 "btver2-direct,btver2-load,btver2-valu")
+
+(define_insn_reservation "btver2_sse_int_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct") 
+					     (eq_attr "type" "ssemov,sselog,sseishft1")))))
+			 "btver2-direct,btver2-valu")
+
+(define_insn_reservation "btver2_sse_int_cvt_load" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DI")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sseicvt")))))
+			 "btver2-direct,btver2-load,btver2-valu")
+
+(define_insn_reservation "btver2_sse_int_cvt" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct") 
+					     (eq_attr "type" "sseicvt")))))
+			 "btver2-direct,btver2-valu")
+
+(define_insn_reservation "btver2_sse_int_32_move" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SI,DI")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssemov")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_int_32_sse_move" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI")
+				   (and (ior ( match_operand:SI 1 "register_operand")
+					     ( match_operand:DI 1 "register_operand"))
+					(eq_attr "type" "ssemov"))))
+			 "btver2-direct,btver2-stc,btver2-valu")
+
+(define_insn_reservation "btver2_sse2cvt_load_direct" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI,V4SF,V2DF,DI")
+				   (and (eq_attr "memory" "load") 
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-direct,btver2-load,btver2-stc")
+
+(define_insn_reservation "btver2_sse2cvt_reg_direct" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "TI,V4SF,V2DF,DI") 
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "ssecvt"))))
+			 "btver2-direct,btver2-stc")
+
+(define_insn_reservation "btver2_sseicvt_load_si" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SI")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "double")
+					     (eq_attr "type" "sseicvt")))))
+			 "btver2-double,btver2-load,btver2-stc,btver2-fpa")
+
+(define_insn_reservation "btver2_sseicvt_si" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SI")
+				   (and (eq_attr "btver2_decode" "double")
+					(eq_attr "type" "sseicvt"))))
+			 "btver2-double,btver2-stc,btver2-fpa")
+
+(define_insn_reservation "btver2_ssecvt_load_df" 11
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "double")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-double,btver2-load,btver2-stc*2")
+
+
+(define_insn_reservation "btver2_ssecvt_df" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF")
+				   (and (eq_attr "btver2_decode" "double")
+					(eq_attr "type" "ssecvt"))))
+			 "btver2-double,btver2-stc*2")
+
+(define_insn_reservation "btver2_ssecvt_load_sf" 12
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "double")
+					     (eq_attr "type" "ssecvt")))))
+			 "btver2-double,btver2-load,btver2-stc*2")
+
+(define_insn_reservation "btver2_ssecvt_sf" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SF")
+				   (and (eq_attr "btver2_decode" "double")
+					(eq_attr "type" "ssecvt"))))
+			 "btver2-double,btver2-stc*2")
+
+(define_insn_reservation "btver2_sseicvt_load_df" 14
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF,SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "double")
+					     (eq_attr "type" "sseicvt")))))
+			 "btver2-double,btver2-load,btver2-stc")
+;;st,ld-stc
+(define_insn_reservation "btver2_sseicvt_df" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF,SF")
+				   (and (eq_attr "btver2_decode" "double")
+					(eq_attr "type" "sseicvt"))))
+			 "btver2-double,btver2-stc")
+
+
+(define_insn_reservation "btver2_scalar_sse_load_add" 8
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF,SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sseadd")))))
+			 "btver2-direct,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_scalar_sse_add" 3
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF,SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "sseadd")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_int_sse_cmp_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,V4SF,DF,SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecmp")))))
+			 "btver2-direct,btver2-load,btver2-fpa")
+
+(define_insn_reservation "btver2_int_sse_cmp" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,V4SF,DF,SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecmp")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_int_sse_comsi_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "DF,SF")
+				   (and (eq_attr "memory" "load")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecomi")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_int_sse_comsi" 2
+			 (and (eq_attr "cpu" "btver2")
+			     (and (eq_attr "mode" "DF,SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(and (eq_attr "btver2_decode" "direct")
+					     (eq_attr "type" "ssecomi")))))
+			 "btver2-direct,btver2-fpa")
+
+(define_insn_reservation "btver2_ssemmx_mov_load_default" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "ssemov,mmxmov"))))
+			 "btver2-direct,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_ssemmx_mov_store_default" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "store,both")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "ssemov,mmxmov"))))
+			 "btver2-direct,(btver2-fpa|btver2-fpm),btver2-store")
+
+(define_insn_reservation "btver2_sse_mov_default" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "btver2_decode" "direct")
+					(eq_attr "type" "ssemov,mmxmov"))))
+			 "btver2-direct,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sse_shuf_double" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "mode" "V4DF,V8SF")     
+					(eq_attr "type" "sseshuf"))))
+			 "btver2-double,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sse_shuf_direct" 1
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "mode" "V2DF,V4SF")
+					(eq_attr "type" "sseshuf,sseshuf1"))))
+			 "btver2-direct,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sse_shuf_double_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4DF,V8SF")
+					(eq_attr "type" "sseshuf"))))
+			 "btver2-double,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sse_shuf_direct_load" 6
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V2DF,V4SF")
+					(eq_attr "type" "sseshuf,sseshuf1"))))
+			 "btver2-direct,btver2-load,(btver2-fpa|btver2-fpm)")
+
+(define_insn_reservation "btver2_sse_div" 19
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF,V4SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "ssediv"))))
+			 "btver2-direct,btver2-fpm*19")
+
+(define_insn_reservation "btver2_sse_div_sf" 14
+			 (and (eq_attr "cpu" "btver2")
+			     (and (eq_attr "mode" "SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "ssediv"))))
+			 "btver2-direct,btver2-fpm*14")
+
+(define_insn_reservation "btver2_sse_mul" 4
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF,V4SF,SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "ssemul"))))
+			 "btver2-direct,btver2-fpm*2")
+
+(define_insn_reservation "btver2_sse_mul_sf" 2
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF,V4SF,SF")
+				   (and (eq_attr "memory" "none,unknown")
+					(eq_attr "type" "ssemul"))))
+			 "btver2-direct,btver2-fpm")
+
+(define_insn_reservation "btver2_sse_div_load" 24
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF,V4SF")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "ssediv"))))
+			 "btver2-direct,btver2-load,btver2-fpm*19")
+
+(define_insn_reservation "btver2_sse_div_sf_load" 19
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "SF")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "ssediv"))))
+			 "btver2-direct,btver2-load,btver2-fpm*14")
+
+(define_insn_reservation "btver2_sse_mul_load" 9
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF,V4SF,SF")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "ssemul"))))
+			 "btver2-direct,btver2-load,btver2-fpm*2")
+
+(define_insn_reservation "btver2_sse_mul_sf_load" 7
+			 (and (eq_attr "cpu" "btver2")
+			      (and (eq_attr "mode" "V2DF,DF,V4SF,SF")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "type" "ssemul"))))
+			 "btver2-direct,btver2-load,btver2-fpm")
+
diff --git a/gcc-4.9/gcc/config/i386/constraints.md b/gcc-4.9/gcc/config/i386/constraints.md
new file mode 100644
index 000000000..65335f128
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/constraints.md
@@ -0,0 +1,246 @@
+;; Constraint definitions for IA-32 and x86-64.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;     B     H
+;;;           h j
+
+;; Integer register constraints.
+;; It is not necessary to define 'r' here.
+(define_register_constraint "R" "LEGACY_REGS"
+ "Legacy register---the eight integer registers available on all
+  i386 processors (@code{a}, @code{b}, @code{c}, @code{d},
+  @code{si}, @code{di}, @code{bp}, @code{sp}).")
+
+(define_register_constraint "q" "TARGET_64BIT ? GENERAL_REGS : Q_REGS"
+ "Any register accessible as @code{@var{r}l}.  In 32-bit mode, @code{a},
+  @code{b}, @code{c}, and @code{d}; in 64-bit mode, any integer register.")
+
+(define_register_constraint "Q" "Q_REGS"
+ "Any register accessible as @code{@var{r}h}: @code{a}, @code{b},
+  @code{c}, and @code{d}.")
+
+(define_register_constraint "l" "INDEX_REGS"
+ "@internal Any register that can be used as the index in a base+index
+  memory access: that is, any general register except the stack pointer.")
+
+(define_register_constraint "a" "AREG"
+ "The @code{a} register.")
+
+(define_register_constraint "b" "BREG"
+ "The @code{b} register.")
+
+(define_register_constraint "c" "CREG"
+ "The @code{c} register.")
+
+(define_register_constraint "d" "DREG"
+ "The @code{d} register.")
+
+(define_register_constraint "S" "SIREG"
+ "The @code{si} register.")
+
+(define_register_constraint "D" "DIREG"
+ "The @code{di} register.")
+
+(define_register_constraint "A" "AD_REGS"
+ "The @code{a} and @code{d} registers, as a pair (for instructions
+  that return half the result in one and half in the other).")
+
+(define_register_constraint "U" "CLOBBERED_REGS"
+ "The call-clobbered integer registers.")
+
+;; Floating-point register constraints.
+(define_register_constraint "f"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FLOAT_REGS : NO_REGS"
+ "Any 80387 floating-point (stack) register.")
+
+(define_register_constraint "t"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_TOP_REG : NO_REGS"
+ "Top of 80387 floating-point stack (@code{%st(0)}).")
+
+(define_register_constraint "u"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
+ "Second from top of 80387 floating-point stack (@code{%st(1)}).")
+
+(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_EVEX_REGS : NO_REGS"
+"@internal Any mask register that can be used as predicate, i.e. k1-k7.")
+
+(define_register_constraint "k" "TARGET_AVX512F ? MASK_REGS : NO_REGS"
+"@internal Any mask register.")
+
+;; Vector registers (also used for plain floating point nowadays).
+(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
+ "Any MMX register.")
+
+(define_register_constraint "x" "TARGET_SSE ? SSE_REGS : NO_REGS"
+ "Any SSE register.")
+
+;; We use the Y prefix to denote any number of conditional register sets:
+;;  z	First SSE register.
+;;  i	SSE2 inter-unit moves to SSE register enabled
+;;  j	SSE2 inter-unit moves from SSE register enabled
+;;  m	MMX inter-unit moves to MMX register enabled
+;;  n	MMX inter-unit moves from MMX register enabled
+;;  a	Integer register when zero extensions with AND are disabled
+;;  p	Integer register when TARGET_PARTIAL_REG_STALL is disabled
+;;  d	Integer register when integer DFmode moves are enabled
+;;  x	Integer register when integer XFmode moves are enabled
+;;  f	x87 register when 80387 floating point arithmetic is enabled
+
+(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
+ "First SSE register (@code{%xmm0}).")
+
+(define_register_constraint "Yi"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves to vector registers are enabled.")
+
+(define_register_constraint "Yj"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.")
+
+(define_register_constraint "Ym"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES_TO_VEC ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves to vector registers are enabled.")
+
+(define_register_constraint "Yn"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES_FROM_VEC ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves from vector registers are enabled.")
+
+(define_register_constraint "Yp"
+ "TARGET_PARTIAL_REG_STALL ? NO_REGS : GENERAL_REGS"
+ "@internal Any integer register when TARGET_PARTIAL_REG_STALL is disabled.")
+
+(define_register_constraint "Ya"
+ "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)
+  ? NO_REGS : GENERAL_REGS"
+ "@internal Any integer register when zero extensions with AND are disabled.")
+
+(define_register_constraint "Yd"
+ "TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun)
+  ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when integer DFmode moves are enabled.")
+
+(define_register_constraint "Yx"
+ "optimize_function_for_speed_p (cfun) ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when integer XFmode moves are enabled.")
+
+(define_register_constraint "Yf"
+ "(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS"
+ "@internal Any x87 register when 80387 FP arithmetic is enabled.")
+
+(define_register_constraint "v" "TARGET_SSE ? ALL_SSE_REGS : NO_REGS"
+ "Any EVEX encodable SSE register (@code{%xmm0-%xmm31}).")
+
+(define_constraint "z"
+  "@internal Constant call address operand."
+  (match_operand 0 "constant_call_address_operand"))
+
+(define_constraint "w"
+  "@internal Call memory operand."
+  (and (not (match_test "TARGET_X32"))
+       (match_operand 0 "memory_operand")))
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 31)")))
+
+(define_constraint "J"
+  "Integer constant in the range 0 @dots{} 63, for 64-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "K"
+  "Signed 8-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -128, 127)")))
+
+(define_constraint "L"
+  "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF}
+   for AND as a zero-extending move."
+  (and (match_code "const_int")
+       (match_test "ival == 0xff || ival == 0xffff
+		    || ival == (HOST_WIDE_INT) 0xffffffff")))
+
+(define_constraint "M"
+  "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "N"
+  "Unsigned 8-bit integer constant (for @code{in} and @code{out}
+   instructions)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "O"
+  "@internal Integer constant in the range 0 @dots{} 127, for 128-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 127)")))
+
+;; Floating-point constant constraints.
+;; We allow constants even if TARGET_80387 isn't set, because the
+;; stack register converter may need to load 0.0 into the function
+;; value register (top of stack).
+(define_constraint "G"
+  "Standard 80387 floating point constant."
+  (and (match_code "const_double")
+       (match_test "standard_80387_constant_p (op) > 0")))
+
+;; This can theoretically be any mode's CONST0_RTX.
+(define_constraint "C"
+  "Standard SSE floating point constant."
+  (match_test "standard_sse_constant_p (op)"))
+
+;; Constant-or-symbol-reference constraints.
+
+(define_constraint "e"
+  "32-bit signed integer constant, or a symbolic reference known
+   to fit that range (for immediate operands in sign-extending x86-64
+   instructions)."
+  (match_operand 0 "x86_64_immediate_operand"))
+
+;; We use W prefix to denote any number of
+;; constant-or-symbol-reference constraints
+
+(define_constraint "Wz"
+  "32-bit unsigned integer constant, or a symbolic reference known
+   to fit that range (for zero-extending conversion operations that
+   require non-VOIDmode immediate operands)."
+  (and (match_operand 0 "x86_64_zext_immediate_operand")
+       (match_test "GET_MODE (op) != VOIDmode")))
+
+(define_constraint "Z"
+  "32-bit unsigned integer constant, or a symbolic reference known
+   to fit that range (for immediate operands in zero-extending x86-64
+   instructions)."
+  (match_operand 0 "x86_64_zext_immediate_operand"))
+
+;; T prefix is used for different address constraints
+;;   v - VSIB address
+;;   s - address with no segment register
+
+(define_address_constraint "Tv"
+  "VSIB address operand"
+  (match_operand 0 "vsib_address_operand"))
+
+(define_address_constraint "Ts"
+  "Address operand without segment register"
+  (match_operand 0 "address_no_seg_operand"))
diff --git a/gcc-4.9/gcc/config/i386/core2.md b/gcc-4.9/gcc/config/i386/core2.md
new file mode 100644
index 000000000..53df9eede
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/core2.md
@@ -0,0 +1,691 @@
+;; Scheduling for Core 2 and derived processors.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; The scheduling description in this file is based on the one in ppro.md,
+;; with additional information obtained from
+;;
+;;    "How to optimize for the Pentium family of microprocessors",
+;;    by Agner Fog, PhD.
+;;
+;; The major difference from the P6 pipeline is one extra decoder, and
+;; one extra execute unit.  Due to micro-op fusion, many insns no longer
+;; need to be decoded in decoder 0, but can be handled by all of them.
+
+;; The core2_idiv, core2_fdiv and core2_ssediv automata are used to
+;; model issue latencies of idiv, fdiv and ssediv type insns.
+(define_automaton "core2_decoder,core2_core,core2_idiv,core2_fdiv,core2_ssediv,core2_load,core2_store")
+
+;; The CPU domain, used for Core i7 bypass latencies
+(define_attr "i7_domain" "int,float,simd"
+  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
+	   (const_string "float")
+	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
+			  sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
+			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
+	   (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
+		    (const_string "float")
+		  (eq_attr "mode" "SI")
+		    (const_string "int")]
+		  (const_string "simd"))
+	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+	   (const_string "simd")]
+	(const_string "int")))
+
+;; As for the Pentium Pro,
+;;  - an instruction with 1 uop can be decoded by any of the three
+;;    decoders in one cycle.
+;;  - an instruction with 1 to 4 uops can be decoded only by decoder 0
+;;    but still in only one cycle.
+;;  - a complex (microcode) instruction can also only be decoded by
+;;    decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-one uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "c2_decoder0" "core2_decoder")
+(define_cpu_unit "c2_decoder1" "core2_decoder")
+(define_cpu_unit "c2_decoder2" "core2_decoder")
+(define_cpu_unit "c2_decoder3" "core2_decoder")
+
+;; We first wish to find an instruction for c2_decoder0, so exclude
+;; c2_decoder1 and c2_decoder2 from being reserved until c2_decoder 0 is
+;; reserved.
+(presence_set "c2_decoder1" "c2_decoder0")
+(presence_set "c2_decoder2" "c2_decoder0")
+(presence_set "c2_decoder3" "c2_decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "c2_decodern" "(c2_decoder0|c2_decoder1|c2_decoder2|c2_decoder3)")
+
+;; The out-of-order core has six pipelines.  These are similar to the
+;; Pentium Pro's five pipelines.  Port 2 is responsible for memory loads,
+;; port 3 for store address calculations, port 4 for memory stores, and
+;; ports 0, 1 and 5 for everything else.
+
+(define_cpu_unit "c2_p0,c2_p1,c2_p5" "core2_core")
+(define_cpu_unit "c2_p2" "core2_load")
+(define_cpu_unit "c2_p3,c2_p4" "core2_store")
+(define_cpu_unit "c2_idiv" "core2_idiv")
+(define_cpu_unit "c2_fdiv" "core2_fdiv")
+(define_cpu_unit "c2_ssediv" "core2_ssediv")
+
+;; Only the irregular instructions have to be modeled here.  A load
+;; increases the latency by 2 or 3, or by nothing if the manual gives
+;; a latency already.  Store latencies are not accounted for.
+;;
+;; The simple instructions follow a very regular pattern of 1 uop per
+;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
+;; on port 4 and port 3.  These instructions are modelled at the bottom
+;; of this file.
+;;
+;; For microcoded instructions we don't know how many uops are produced.
+;; These instructions are the "complex" ones in the Intel manuals.  All
+;; we _do_ know is that they typically produce four or more uops, so
+;; they can only be decoded on c2_decoder0.  Modelling their latencies
+;; doesn't make sense because we don't know how these instructions are
+;; executed in the core.  So we just model that they can only be decoded
+;; on decoder 0, and say that it takes a little while before the result
+;; is available.
+(define_insn_reservation "c2_complex_insn" 6
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "other,multi,str"))
+			 "c2_decoder0")
+
+(define_insn_reservation "c2_call" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "call,callv"))
+			 "c2_decoder0")
+
+;; imov with memory operands does not use the integer units.
+;; imovx always decodes to one uop, and also doesn't use the integer
+;; units if it has memory operands.
+(define_insn_reservation "c2_imov" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imov,imovx")))
+			 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
+
+(define_insn_reservation "c2_imov_load" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "imov,imovx")))
+			 "c2_decodern,c2_p2")
+
+(define_insn_reservation "c2_imov_store" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "imov")))
+			 "c2_decodern,c2_p4+c2_p3")
+
+(define_insn_reservation "c2_icmov" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "icmov")))
+			 "c2_decoder0,(c2_p0|c2_p1|c2_p5)*2")
+
+(define_insn_reservation "c2_icmov_load" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "icmov")))
+			 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5)*2")
+
+(define_insn_reservation "c2_push_reg" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "push")))
+			 "c2_decodern,c2_p4+c2_p3")
+
+(define_insn_reservation "c2_push_mem" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "push")))
+			 "c2_decoder0,c2_p2,c2_p4+c2_p3")
+
+;; lea executes on port 0 with latency one and throughput 1.
+(define_insn_reservation "c2_lea" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "lea")))
+			 "c2_decodern,c2_p0")
+
+;; Shift and rotate decode as two uops which can go to port 0 or 5.
+;; The load and store units need to be reserved when memory operands
+;; are involved.
+(define_insn_reservation "c2_shift_rotate" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "c2_decodern,(c2_p0|c2_p5)")
+
+(define_insn_reservation "c2_shift_rotate_mem" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "c2_decoder0,c2_p2,(c2_p0|c2_p5),c2_p4+c2_p3")
+
+;; See comments in ppro.md for the corresponding reservation.
+(define_insn_reservation "c2_branch" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ibr")))
+			 "c2_decodern,c2_p5")
+
+;; ??? Indirect branches probably have worse latency than this.
+(define_insn_reservation "c2_indirect_branch" 6
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ibr")))
+			 "c2_decoder0,c2_p2+c2_p5")
+
+(define_insn_reservation "c2_leave" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "leave"))
+			 "c2_decoder0,c2_p2+(c2_p0|c2_p1),(c2_p0|c2_p1)")
+
+;; mul and imul with two/three operands only execute on port 1 for HImode
+;; and SImode, port 0 for DImode.
+(define_insn_reservation "c2_imul_hisi" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "HI,SI")
+					(eq_attr "type" "imul"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_imul_hisi_mem" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "HI,SI")
+					(eq_attr "type" "imul"))))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_imul_di" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "type" "imul"))))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_imul_di_mem" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "type" "imul"))))
+			 "c2_decoder0,c2_p2+c2_p0")
+
+;; div and idiv are very similar, so we model them the same.
+;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
+;; These issue latencies are modelled via the c2_div automaton.
+(define_insn_reservation "c2_idiv_QI" 19
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,(c2_p0+c2_idiv)*2,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9")
+
+(define_insn_reservation "c2_idiv_QI_load" 19
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9")
+
+(define_insn_reservation "c2_idiv_HI" 23
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*17")
+
+(define_insn_reservation "c2_idiv_HI_load" 23
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*18")
+
+(define_insn_reservation "c2_idiv_SI" 39
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*33")
+
+(define_insn_reservation "c2_idiv_SI_load" 39
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*34")
+
+;; x87 floating point operations.
+
+(define_insn_reservation "c2_fxch" 0
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "fxch"))
+			 "c2_decodern")
+
+(define_insn_reservation "c2_fop" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "fop")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_fop_load" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fop")))
+			 "c2_decoder0,c2_p2+c2_p1,c2_p1")
+
+(define_insn_reservation "c2_fop_store" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "fop")))
+			 "c2_decoder0,c2_p0,c2_p0,c2_p0+c2_p4+c2_p3")
+
+(define_insn_reservation "c2_fop_both" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "fop")))
+			 "c2_decoder0,c2_p2+c2_p0,c2_p0+c2_p4+c2_p3")
+
+(define_insn_reservation "c2_fsgn" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "fsgn"))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_fistp" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "fistp"))
+			 "c2_decoder0,c2_p0*2,c2_p4+c2_p3")
+
+(define_insn_reservation "c2_fcmov" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (eq_attr "type" "fcmov"))
+			 "c2_decoder0,c2_p0*2")
+
+(define_insn_reservation "c2_fcmp" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fcmp")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_fcmp_load" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fcmp")))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_fmov" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmov")))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_fmov_load" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decodern,c2_p2")
+
+(define_insn_reservation "c2_fmov_XF_load" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decoder0,(c2_p2+c2_p0)*2")
+
+(define_insn_reservation "c2_fmov_store" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decodern,c2_p3+c2_p4")
+
+(define_insn_reservation "c2_fmov_XF_store" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decoder0,(c2_p3+c2_p4),(c2_p3+c2_p4)")
+
+;; fmul executes on port 0 with latency 5.  It has issue latency 2,
+;; but we don't model this.
+(define_insn_reservation "c2_fmul" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmul")))
+			 "c2_decoder0,c2_p0*2")
+
+(define_insn_reservation "c2_fmul_load" 6
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fmul")))
+			 "c2_decoder0,c2_p2+c2_p0,c2_p0")
+
+;; fdiv latencies depend on the mode of the operands.  XFmode gives
+;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
+;; Division by a power of 2 takes only 9 cycles, but we cannot model
+;; that.  Throughput is equal to latency - 1, which we model using the
+;; c2_div automaton.
+(define_insn_reservation "c2_fdiv_SF" 18
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*16")
+
+(define_insn_reservation "c2_fdiv_SF_load" 19
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*16")
+
+(define_insn_reservation "c2_fdiv_DF" 32
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*30")
+
+(define_insn_reservation "c2_fdiv_DF_load" 33
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*30")
+
+(define_insn_reservation "c2_fdiv_XF" 38
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*36")
+
+(define_insn_reservation "c2_fdiv_XF_load" 39
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*36")
+
+;; MMX instructions.
+
+(define_insn_reservation "c2_mmx_add" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxadd,sseiadd")))
+			 "c2_decodern,c2_p0|c2_p5")
+
+(define_insn_reservation "c2_mmx_add_load" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "mmxadd,sseiadd")))
+			 "c2_decodern,c2_p2+c2_p0|c2_p5")
+
+(define_insn_reservation "c2_mmx_shft" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxshft")))
+			 "c2_decodern,c2_p0|c2_p5")
+
+(define_insn_reservation "c2_mmx_shft_load" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "mmxshft")))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "!0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft_load" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "!0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft1" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft1_load" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_mul" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul,sseimul")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_mul_load" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul,sseimul")))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_sse_mmxcvt" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "mode" "DI")
+				   (eq_attr "type" "mmxcvt")))
+			 "c2_decodern,c2_p1")
+
+;; FIXME: These are Pentium III only, but we cannot tell here if
+;; we're generating code for PentiumPro/Pentium II or Pentium III
+;; (define_insn_reservation "c2_sse_mmxshft" 2
+;;			 (and (eq_attr "cpu" "core2,nehalem")
+;;			      (and (eq_attr "mode" "TI")
+;;				   (eq_attr "type" "mmxshft")))
+;;			 "c2_decodern,c2_p0")
+
+;; The sfence instruction.
+(define_insn_reservation "c2_sse_sfence" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "unknown")
+				   (eq_attr "type" "sse")))
+			 "c2_decoder0,c2_p4+c2_p3")
+
+;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
+(define_insn_reservation "c2_sse_SFDF" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "mode" "SF,DF")
+				   (eq_attr "type" "sse")))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_sse_V4SF" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "mode" "V4SF")
+				   (eq_attr "type" "sse")))
+			 "c2_decoder0,c2_p1*2")
+
+(define_insn_reservation "c2_sse_addcmp" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_sse_addcmp_load" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
+			 "c2_decodern,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_sse_mul_SF" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_sse_mul_SF_load" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p2+c2_p0")
+
+(define_insn_reservation "c2_sse_mul_DF" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_sse_mul_DF_load" 5
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p2+c2_p0")
+
+(define_insn_reservation "c2_sse_div_SF" 18
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,c2_p0,c2_ssediv*17")
+
+(define_insn_reservation "c2_sse_div_SF_load" 18
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*17")
+
+(define_insn_reservation "c2_sse_div_DF" 32
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,c2_p0,c2_ssediv*31")
+
+(define_insn_reservation "c2_sse_div_DF_load" 32
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*31")
+
+;; FIXME: these have limited throughput
+(define_insn_reservation "c2_sse_icvt_SF" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_SF_load" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_DF" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decoder0,c2_p0+c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_DF_load" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decoder0,(c2_p2+c2_p1)")
+
+(define_insn_reservation "c2_sse_icvt_SI" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_SI_load" 3
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,(c2_p2+c2_p1)")
+
+(define_insn_reservation "c2_sse_mov" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ssemov")))
+			 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
+
+(define_insn_reservation "c2_sse_mov_load" 2
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "ssemov")))
+			 "c2_decodern,c2_p2")
+
+(define_insn_reservation "c2_sse_mov_store" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "ssemov")))
+			 "c2_decodern,c2_p4+c2_p3")
+
+;; All other instructions are modelled as simple instructions.
+;; We have already modelled all i387 floating point instructions, so all
+;; other instructions execute on either port 0, 1 or 5.  This includes
+;; the ALU units, and the MMX units.
+;;
+;; reg-reg instructions produce 1 uop so they can be decoded on any of
+;; the three decoders.  Loads benefit from micro-op fusion and can be
+;; treated in the same way.
+(define_insn_reservation "c2_insn" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
+			 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
+
+(define_insn_reservation "c2_insn_load" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
+			 "c2_decodern,c2_p2,(c2_p0|c2_p1|c2_p5)")
+
+;; register-memory instructions have three uops,  so they have to be
+;; decoded on c2_decoder0.
+(define_insn_reservation "c2_insn_store" 1
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
+			 "c2_decoder0,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3")
+
+;; read-modify-store instructions produce 4 uops so they have to be
+;; decoded on c2_decoder0 as well.
+(define_insn_reservation "c2_insn_both" 4
+			 (and (eq_attr "cpu" "core2,nehalem")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
+			 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3")
diff --git a/gcc-4.9/gcc/config/i386/cpuid.h b/gcc-4.9/gcc/config/i386/cpuid.h
new file mode 100644
index 000000000..8c323ae3a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cpuid.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2007-2014 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* %ecx */
+#define bit_SSE3	(1 << 0)
+#define bit_PCLMUL	(1 << 1)
+#define bit_LZCNT	(1 << 5)
+#define bit_SSSE3	(1 << 9)
+#define bit_FMA		(1 << 12)
+#define bit_CMPXCHG16B	(1 << 13)
+#define bit_SSE4_1	(1 << 19)
+#define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
+#define bit_POPCNT	(1 << 23)
+#define bit_AES		(1 << 25)
+#define bit_XSAVE	(1 << 26)
+#define bit_OSXSAVE	(1 << 27)
+#define bit_AVX		(1 << 28)
+#define bit_F16C	(1 << 29)
+#define bit_RDRND	(1 << 30)
+
+/* %edx */
+#define bit_CMPXCHG8B	(1 << 8)
+#define bit_CMOV	(1 << 15)
+#define bit_MMX		(1 << 23)
+#define bit_FXSAVE	(1 << 24)
+#define bit_SSE		(1 << 25)
+#define bit_SSE2	(1 << 26)
+
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM	(1 << 0)
+#define bit_ABM		(1 << 5)
+#define bit_SSE4a	(1 << 6)
+#define bit_PRFCHW	(1 << 8)
+#define bit_XOP         (1 << 11)
+#define bit_LWP 	(1 << 15)
+#define bit_FMA4        (1 << 16)
+#define bit_TBM         (1 << 21)
+
+/* %edx */
+#define bit_MMXEXT	(1 << 22)
+#define bit_LM		(1 << 29)
+#define bit_3DNOWP	(1 << 30)
+#define bit_3DNOW	(1 << 31)
+
+/* Extended Features (%eax == 7) */
+/* %ebx */
+#define bit_FSGSBASE	(1 << 0)
+#define bit_BMI	(1 << 3)
+#define bit_HLE	(1 << 4)
+#define bit_AVX2	(1 << 5)
+#define bit_BMI2	(1 << 8)
+#define bit_RTM	(1 << 11)
+#define bit_AVX512F	(1 << 16)
+#define bit_RDSEED	(1 << 18)
+#define bit_ADX	(1 << 19)
+#define bit_AVX512PF	(1 << 26)
+#define bit_AVX512ER	(1 << 27)
+#define bit_AVX512CD	(1 << 28)
+#define bit_SHA		(1 << 29)
+
+/* %ecx */
+#define bit_PREFETCHWT1	  (1 << 0)
+
+/* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
+#define bit_XSAVEOPT	(1 << 0)
+
+/* Signatures for different CPU implementations as returned in uses
+   of cpuid with level 0.  */
+#define signature_AMD_ebx	0x68747541
+#define signature_AMD_ecx	0x444d4163
+#define signature_AMD_edx	0x69746e65
+
+#define signature_CENTAUR_ebx	0x746e6543
+#define signature_CENTAUR_ecx	0x736c7561
+#define signature_CENTAUR_edx	0x48727561
+
+#define signature_CYRIX_ebx	0x69727943
+#define signature_CYRIX_ecx	0x64616574
+#define signature_CYRIX_edx	0x736e4978
+
+#define signature_INTEL_ebx	0x756e6547
+#define signature_INTEL_ecx	0x6c65746e
+#define signature_INTEL_edx	0x49656e69
+
+#define signature_TM1_ebx	0x6e617254
+#define signature_TM1_ecx	0x55504361
+#define signature_TM1_edx	0x74656d73
+
+#define signature_TM2_ebx	0x756e6547
+#define signature_TM2_ecx	0x3638784d
+#define signature_TM2_edx	0x54656e69
+
+#define signature_NSC_ebx	0x646f6547
+#define signature_NSC_ecx	0x43534e20
+#define signature_NSC_edx	0x79622065
+
+#define signature_NEXGEN_ebx	0x4778654e
+#define signature_NEXGEN_ecx	0x6e657669
+#define signature_NEXGEN_edx	0x72446e65
+
+#define signature_RISE_ebx	0x65736952
+#define signature_RISE_ecx	0x65736952
+#define signature_RISE_edx	0x65736952
+
+#define signature_SIS_ebx	0x20536953
+#define signature_SIS_ecx	0x20536953
+#define signature_SIS_edx	0x20536953
+
+#define signature_UMC_ebx	0x20434d55
+#define signature_UMC_ecx	0x20434d55
+#define signature_UMC_edx	0x20434d55
+
+#define signature_VIA_ebx	0x20414956
+#define signature_VIA_ecx	0x20414956
+#define signature_VIA_edx	0x20414956
+
+#define signature_VORTEX_ebx	0x74726f56
+#define signature_VORTEX_ecx	0x436f5320
+#define signature_VORTEX_edx	0x36387865
+
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register.  */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchg{l}\t{%%}ebx, %k1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %k1\n\t"			\
+	   : "=a" (a), "=&r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchg{l}\t{%%}ebx, %k1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %k1\n\t"			\
+	   : "=a" (a), "=&r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchgl\t%%ebx, %k1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %k1\n\t"			\
+	   : "=a" (a), "=&r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchgl\t%%ebx, %k1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %k1\n\t"			\
+	   : "=a" (a), "=&r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+#elif defined(__x86_64__) && (defined(__code_model_medium__) || defined(__code_model_large__)) && defined(__PIC__)
+/* %rbx may be the PIC register.  */
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchg{q}\t{%%}rbx, %q1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{q}\t{%%}rbx, %q1\n\t"			\
+	   : "=a" (a), "=&r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchg{q}\t{%%}rbx, %q1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{q}\t{%%}rbx, %q1\n\t"			\
+	   : "=a" (a), "=&r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#else
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction.  ext can
+   be either 0x0 or 0x8000000 to return highest supported value for
+   basic or extended cpuid information.  Function returns 0 if cpuid
+   is not supported or whatever cpuid returns in eax register.  If sig
+   pointer is non-null, then first four bytes of the signature
+   (as found in ebx register) are returned in location pointed by sig.  */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+  unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+  /* See if we can use cpuid.  On AMD64 we always can.  */
+#if __GNUC__ >= 3
+  __asm__ ("pushf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "mov{l}\t{%0, %1|%1, %0}\n\t"
+	   "xor{l}\t{%2, %0|%0, %2}\n\t"
+	   "push{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+  __asm__ ("pushfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "movl\t%0, %1\n\t"
+	   "xorl\t%2, %0\n\t"
+	   "pushl\t%0\n\t"
+	   "popfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "popfl\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#endif
+
+  if (!((__eax ^ __ebx) & 0x00200000))
+    return 0;
+#endif
+
+  /* Host supports cpuid.  Return highest supported cpuid input value.  */
+  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+  if (__sig)
+    *__sig = __ebx;
+
+  return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+   eax, ebx, ecx and edx registers.  The function checks if cpuid is
+   supported and returns 1 for valid cpuid information or 0 for
+   unsupported cpuid level.  All pointers are required to be non-null.  */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+	     unsigned int *__eax, unsigned int *__ebx,
+	     unsigned int *__ecx, unsigned int *__edx)
+{
+  unsigned int __ext = __level & 0x80000000;
+
+  if (__get_cpuid_max (__ext, 0) < __level)
+    return 0;
+
+  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+  return 1;
+}
diff --git a/gcc-4.9/gcc/config/i386/cross-stdarg.h b/gcc-4.9/gcc/config/i386/cross-stdarg.h
new file mode 100644
index 000000000..d16cef820
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cross-stdarg.h
@@ -0,0 +1,72 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __CROSS_STDARG_H_INCLUDED
+#define __CROSS_STDARG_H_INCLUDED
+
+/* Make sure that for non x64 targets cross builtins are defined.  */
+#ifndef __x86_64__
+/* Call abi ms_abi.  */
+#define __builtin_ms_va_list __builtin_va_list
+#define __builtin_ms_va_copy __builtin_va_copy
+#define __builtin_ms_va_start __builtin_va_start
+#define __builtin_ms_va_end __builtin_va_end
+
+/* Call abi sysv_abi.  */
+#define __builtin_sysv_va_list __builtin_va_list
+#define __builtin_sysv_va_copy __builtin_va_copy
+#define __builtin_sysv_va_start __builtin_va_start
+#define __builtin_sysv_va_end __builtin_va_end
+#endif
+
+#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
+#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
+#define __ms_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
+#define __ms_va_end(__v) __builtin_ms_va_end(__v)
+
+#define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s)
+#define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l)
+#define __sysv_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
+#define __sysv_va_end(__v) __builtin_sysv_va_end(__v)
+
+#ifndef __GNUC_SYSV_VA_LIST
+#define __GNUC_SYSV_VA_LIST
+  typedef __builtin_sysv_va_list __gnuc_sysv_va_list;
+#endif
+
+#ifndef _SYSV_VA_LIST_DEFINED
+#define _SYSV_VA_LIST_DEFINED
+  typedef __gnuc_sysv_va_list sysv_va_list;
+#endif
+
+#ifndef __GNUC_MS_VA_LIST
+#define __GNUC_MS_VA_LIST
+  typedef __builtin_ms_va_list __gnuc_ms_va_list;
+#endif
+
+#ifndef _MS_VA_LIST_DEFINED
+#define _MS_VA_LIST_DEFINED
+  typedef __gnuc_ms_va_list ms_va_list;
+#endif
+
+#endif /* __CROSS_STDARG_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/crtdll.h b/gcc-4.9/gcc/config/i386/crtdll.h
new file mode 100644
index 000000000..b18168ee7
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/crtdll.h
@@ -0,0 +1,42 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using GNU tools and the Windows32 API Library.
+   This variant uses CRTDLL.DLL instead of MSVCRTDLL.DLL.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef EXTRA_OS_CPP_BUILTINS
+#define EXTRA_OS_CPP_BUILTINS()					\
+  do								\
+    {								\
+      builtin_define ("__CRTDLL__");				\
+      builtin_define ("__MINGW32__");			   	\
+      builtin_define ("_WIN32");				\
+      builtin_define_std ("WIN32");				\
+      builtin_define_std ("WINNT");				\
+    }								\
+  while (0)
+
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC \
+  "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lcoldname -libmingwex -lcrtdll"
+
+/* Specify a different entry point when linking a DLL */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt1%O%s} \
+  %{!shared:%{!mdll:crt1%O%s}} %{pg:gcrt1%O%s}"
+
diff --git a/gcc-4.9/gcc/config/i386/cygming.h b/gcc-4.9/gcc/config/i386/cygming.h
new file mode 100644
index 000000000..039edccb9
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cygming.h
@@ -0,0 +1,487 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using a Unix style C library and tools.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#if TARGET_64BIT_DEFAULT || defined (HAVE_GAS_PE_SECREL32_RELOC)
+#define DWARF2_DEBUGGING_INFO 1
+#endif
+
+#undef PREFERRED_DEBUGGING_TYPE
+#if (DWARF2_DEBUGGING_INFO)
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#else
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+#endif
+
+#undef TARGET_SEH
+#define TARGET_SEH  (TARGET_64BIT_MS_ABI && flag_unwind_tables)
+
+/* Win64 with SEH cannot represent DRAP stack frames.  Disable its use.
+   Force the use of different mechanisms to allocate aligned local data.  */
+#undef MAX_STACK_ALIGNMENT
+#define MAX_STACK_ALIGNMENT  (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
+
+/* Support hooks for SEH.  */
+#undef  TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
+#undef  TARGET_ASM_UNWIND_EMIT_BEFORE_INSN
+#define TARGET_ASM_UNWIND_EMIT_BEFORE_INSN  false
+#undef  TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE  i386_pe_seh_end_prologue
+#undef  TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY i386_pe_seh_emit_except_personality
+#undef  TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS  i386_pe_seh_init_sections
+#define SUBTARGET_ASM_UNWIND_INIT  i386_pe_seh_init
+
+#undef DEFAULT_ABI
+#define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI)
+
+#undef TARGET_PECOFF
+#define TARGET_PECOFF 1
+
+#if ! defined (USE_MINGW64_LEADING_UNDERSCORES)
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX (TARGET_64BIT ? "" : "_")
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX (TARGET_64BIT ? "." : "")
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER)  \
+  sprintf ((BUF), "*%s%s%ld", LOCAL_LABEL_PREFIX, \
+	   (PREFIX), (long)(NUMBER))
+
+#undef LPREFIX
+#define LPREFIX (TARGET_64BIT ? ".L" : "L")
+
+#endif
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)				\
+  (TARGET_64BIT ? dbx64_register_map[n]			\
+   : (write_symbols == DWARF2_DEBUG			\
+      ? svr4_dbx_register_map[n] : dbx_register_map[n]))
+
+/* Map gcc register number to DWARF 2 CFA column number. For 32 bit
+   target, always use the svr4_dbx_register_map for DWARF .eh_frame
+   even if we don't use DWARF .debug_frame. */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(n)				\
+  (TARGET_64BIT ? dbx64_register_map[(n)]		\
+		: svr4_dbx_register_map[(n)])
+
+/* The 64-bit MS_ABI changes the set of call-used registers.  */
+#undef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS (TARGET_64BIT ? 33 : 17)
+
+#ifdef HAVE_GAS_PE_SECREL32_RELOC
+/* Use section relative relocations for debugging offsets.  Unlike
+   other targets that fake this by putting the section VMA at 0, PE
+   won't allow it.  */
+#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL, SECTION)	\
+  do {								\
+    switch (SIZE)						\
+      {								\
+      case 4:							\
+	fputs ("\t.secrel32\t", FILE);				\
+	assemble_name (FILE, LABEL);				\
+	break;							\
+      case 8:							\
+	/* This is a hack.  There is no 64-bit section relative	\
+	   relocation.  However, the COFF format also does not	\
+	   support 64-bit file offsets; 64-bit applications are	\
+	   limited to 32-bits of code+data in any one module.	\
+	   Fake the 64-bit offset by zero-extending it.  */	\
+	fputs ("\t.secrel32\t", FILE);				\
+	assemble_name (FILE, LABEL);				\
+	fputs ("\n\t.long\t0", FILE);				\
+	break;							\
+      default:							\
+	gcc_unreachable ();					\
+      }								\
+  } while (0)
+#endif
+
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	if (!TARGET_64BIT)						\
+	  builtin_define ("_X86_=1");					\
+	if (TARGET_SEH)							\
+	  builtin_define ("__SEH__");				\
+	builtin_assert ("system=winnt");				\
+	builtin_define ("__stdcall=__attribute__((__stdcall__))");	\
+	builtin_define ("__fastcall=__attribute__((__fastcall__))");	\
+	builtin_define ("__thiscall=__attribute__((__thiscall__))");	\
+	builtin_define ("__cdecl=__attribute__((__cdecl__))");		\
+	if (!flag_iso)							\
+	  {								\
+	    builtin_define ("_stdcall=__attribute__((__stdcall__))");	\
+	    builtin_define ("_fastcall=__attribute__((__fastcall__))");	\
+	    builtin_define ("_thiscall=__attribute__((__thiscall__))");	\
+	    builtin_define ("_cdecl=__attribute__((__cdecl__))");	\
+	  }								\
+	/* Even though linkonce works with static libs, this is needed 	\
+	    to compare typeinfo symbols across dll boundaries.  */	\
+	builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0");		\
+	builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0");		\
+	EXTRA_OS_CPP_BUILTINS ();					\
+  }									\
+  while (0)
+
+/* Get tree.c to declare a target-specific specialization of
+   merge_decl_attributes.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "mingw_include_path", DEFAULT_TARGET_MACHINE }
+
+#undef MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+#define SIZE_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long long int" : "int")
+
+#define WCHAR_TYPE_SIZE 16
+#define WCHAR_TYPE "short unsigned int"
+
+/* Windows64 continues to use a 32-bit long type.  */
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+
+#define drectve_section() \
+  (fprintf (asm_out_file, "\t.section .drectve\n"), \
+   in_section = NULL)
+
+/* Older versions of gas don't handle 'r' as data.
+   Explicitly set data flag with 'd'.  */  
+#define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata,\"dr\""
+
+/* Don't allow flag_pic to propagate since gas may produce invalid code
+   otherwise.  */
+
+#undef  SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && flag_pic != 1)					\
+    {									\
+      if (flag_pic > 1)							\
+        warning (0,							\
+	         "-fPIC ignored for target (all code is position independent)"\
+                 );                         				\
+      flag_pic = 1;							\
+    }									\
+  else if (!TARGET_64BIT && flag_pic)					\
+    {									\
+      warning (0, "-f%s ignored for target (all code is position independent)",\
+	       (flag_pic > 1) ? "PIC" : "pic");				\
+      flag_pic = 0;							\
+    }									\
+} while (0)
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variable or
+   function named by the symbol (such as what section it is in).
+
+   On i386 running Windows NT, modify the assembler name with a suffix
+   consisting of an atsign (@) followed by string of digits that represents
+   the number of bytes of arguments passed to the function, if it has the
+   attribute STDCALL.
+
+   In addition, we must mark dll symbols specially. Definitions of
+   dllexport'd objects install some info in the .drectve section.
+   References to dllimport'd objects are fetched indirectly via
+   _imp__.  If both are declared, dllexport overrides.  This is also
+   needed to implement one-only vtables: they go into their own
+   section and we need to set DECL_SECTION_NAME so we do that here.
+   Note that we can be called twice on the same decl.  */
+
+#define SUBTARGET_ENCODE_SECTION_INFO  i386_pe_encode_section_info
+
+/* Local and global relocs can be placed always into readonly memory
+   for PE-COFF targets.  */
+#undef TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK i386_pe_reloc_rw_mask
+
+/* Output a common block.  */
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON \
+  i386_pe_asm_output_aligned_decl_common
+
+/* Output the label for an initialized variable.  */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)	\
+do {							\
+  i386_pe_maybe_record_exported_symbol (DECL, NAME, 1);	\
+  ASM_OUTPUT_LABEL ((STREAM), (NAME));			\
+} while (0)
+
+/* Output a reference to a label. Fastcall function symbols
+   keep their '@' prefix, while other symbols are prefixed
+   with user_label_prefix.  */
+#undef ASM_OUTPUT_LABELREF
+#define  ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+do {						\
+  if ((NAME)[0] != FASTCALL_PREFIX)		\
+    fputs (user_label_prefix, (STREAM));	\
+  fputs ((NAME), (STREAM));			\
+} while (0)
+
+/* This does much the same in memory rather than to a stream.  */
+#undef TARGET_MANGLE_ASSEMBLER_NAME
+#define TARGET_MANGLE_ASSEMBLER_NAME i386_pe_mangle_assembler_name
+
+
+/* Emit code to check the stack when allocating more than 4000
+   bytes in one go.  */
+#define CHECK_STACK_LIMIT 4000
+
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY	(TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   returns float values in the 387 and needs stack probes.
+   We also align doubles to 64-bits for MSVC default compatibility.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS \
+	 | MASK_STACK_PROBE | MASK_ALIGN_DOUBLE)
+
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT \
+	MASK_128BIT_LONG_DOUBLE
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* Windows uses explicit import from shared libraries.  */
+#define MULTIPLE_SYMBOL_SPACES 1
+
+#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  i386_pe_asm_named_section
+
+/* Select attributes for named sections.  */
+#define TARGET_SECTION_TYPE_FLAGS  i386_pe_section_type_flags
+
+/* Write the extra assembler code needed to declare a function
+   properly.  If we are generating SDB debugging information, this
+   will happen automatically, so we only need to handle other cases.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+  i386_pe_start_function (FILE, NAME, DECL)
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE,NAME,DECL) \
+  i386_pe_end_function (FILE, NAME, DECL)
+
+/* Add an external function to the list of functions to be declared at
+   the end of the file.  */
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)				\
+  do									\
+    {									\
+      if (TREE_CODE (DECL) == FUNCTION_DECL)				\
+	i386_pe_record_external_function ((DECL), (NAME));		\
+    }									\
+  while (0)
+
+/* Declare the type properly for any external libcall.  */
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
+  i386_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+
+/* This says out to put a global symbol in the BSS section.  */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+/* Output function declarations at the end of the file.  */
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END i386_pe_file_end
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START " #"
+
+#ifndef DWARF2_UNWIND_INFO
+/* If configured with --disable-sjlj-exceptions, use DWARF2, else
+   default to SJLJ.  */
+#if  (defined (CONFIG_SJLJ_EXCEPTIONS) && !CONFIG_SJLJ_EXCEPTIONS)
+/* The logic of this #if must be kept synchronised with the logic
+   for selecting the tmake_eh_file fragment in config.gcc.  */
+#define DWARF2_UNWIND_INFO 1
+/* If multilib is selected break build as sjlj is required.  */
+#if defined (TARGET_BI_ARCH)
+#error For 64-bit windows and 32-bit based multilib version of gcc just SJLJ exceptions are supported.
+#endif
+#else
+#define DWARF2_UNWIND_INFO 0
+#endif
+#endif
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef PROFILE_HOOK
+#define PROFILE_HOOK(LABEL)						\
+  if (MAIN_NAME_P (DECL_NAME (current_function_decl)))			\
+    {									\
+      emit_call_insn (gen_rtx_CALL (VOIDmode,				\
+	gen_rtx_MEM (FUNCTION_MODE,					\
+		     gen_rtx_SYMBOL_REF (Pmode, "_monstartup")),	\
+	const0_rtx));							\
+    }
+
+/* Java Native Interface (JNI) methods on Win32 are invoked using the
+   stdcall calling convention.  */
+#undef MODIFY_JNI_METHOD_CALL
+#define MODIFY_JNI_METHOD_CALL(MDECL)					      \
+  build_type_attribute_variant ((MDECL),				      \
+			       build_tree_list (get_identifier ("stdcall"),   \
+						NULL))
+
+/* For Win32 ABI compatibility */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* MSVC returns aggregate types of up to 8 bytes via registers.
+   See i386.c:ix86_return_in_memory.  */
+#undef MS_AGGREGATE_RETURN
+#define MS_AGGREGATE_RETURN 1
+
+/* Biggest alignment supported by the object file format of this
+   machine.  Use this macro to limit the alignment which can be
+   specified using the `__attribute__ ((aligned (N)))' construct.  If
+   not defined, the default value is `BIGGEST_ALIGNMENT'.  */
+/* IMAGE_SCN_ALIGN_8192BYTES is the largest section alignment flag
+   specified in the PECOFF60 spec.  Native MS compiler also limits
+   user-specified alignment to 8192 bytes.  */
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (8192 * 8)
+
+/* BIGGEST_FIELD_ALIGNMENT macro is used directly by libobjc, There, we
+   align internal doubles in structures on dword boundaries. Otherwise,
+   support vector modes using ADJUST_FIELD_ALIGN, defined in i386.h.  */
+#ifdef IN_TARGET_LIBS
+#undef	BIGGEST_FIELD_ALIGNMENT
+#define BIGGEST_FIELD_ALIGNMENT 64
+#endif
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#undef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#define GROUP_BITFIELDS_BY_ALIGN TYPE_NATIVE(rec)
+
+/* Enable alias attribute support.  */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* This implements the `alias' attribute, keeping any stdcall or
+   fastcall decoration.  */
+#undef	ASM_OUTPUT_DEF_FROM_DECLS
+#define	ASM_OUTPUT_DEF_FROM_DECLS(STREAM, DECL, TARGET)			\
+  do									\
+    {									\
+      const char *alias							\
+	= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));		\
+      i386_pe_maybe_record_exported_symbol (DECL, alias, 0);		\
+      if (TREE_CODE (DECL) == FUNCTION_DECL)				\
+	i386_pe_declare_function_type (STREAM, alias,			\
+				       TREE_PUBLIC (DECL));		\
+      ASM_OUTPUT_DEF (STREAM, alias, IDENTIFIER_POINTER (TARGET));	\
+    } while (0)
+
+/* GNU as supports weak symbols on PECOFF. */
+#ifdef HAVE_GAS_WEAK
+#define ASM_WEAKEN_LABEL(FILE, NAME)  \
+  do                                  \
+    {                                 \
+      fputs ("\t.weak\t", (FILE));    \
+      assemble_name ((FILE), (NAME)); \
+      fputc ('\n', (FILE));           \
+    }                                 \
+  while (0)
+#endif /* HAVE_GAS_WEAK */
+
+/* FIXME: SUPPORTS_WEAK && TARGET_HAVE_NAMED_SECTIONS is true,
+   but for .jcr section to work we also need crtbegin and crtend
+   objects.  */
+#define TARGET_USE_JCR_SECTION 1
+
+/* Decide whether it is safe to use a local alias for a virtual function
+   when constructing thunks.  */
+#undef TARGET_USE_LOCAL_THUNK_ALIAS_P
+#define TARGET_USE_LOCAL_THUNK_ALIAS_P(DECL) (!DECL_ONE_ONLY (DECL))
+
+#define SUBTARGET_ATTRIBUTE_TABLE \
+  { "selectany", 0, 0, true, false, false, ix86_handle_selectany_attribute, \
+    false }
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+
+/*  mcount() does not need a counter variable.  */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+#define TARGET_VALID_DLLIMPORT_ATTRIBUTE_P i386_pe_valid_dllimport_attribute_p
+#define TARGET_CXX_ADJUST_CLASS_AT_DEFINITION i386_pe_adjust_class_at_definition
+#define SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME i386_pe_mangle_decl_assembler_name
+
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY i386_pe_assemble_visibility
+
+#undef SUB_TARGET_RECORD_STUB
+#define SUB_TARGET_RECORD_STUB i386_pe_record_stub
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc-4.9/gcc/config/i386/cygming.opt b/gcc-4.9/gcc/config/i386/cygming.opt
new file mode 100644
index 000000000..3437123f4
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cygming.opt
@@ -0,0 +1,60 @@
+; Cygwin- and MinGW-specific options.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mconsole
+Target RejectNegative
+Create console application
+
+mdll
+Target RejectNegative
+Generate code for a DLL
+
+mnop-fun-dllimport
+Target Report Var(TARGET_NOP_FUN_DLLIMPORT)
+Ignore dllimport for functions
+
+mthreads
+Target RejectNegative
+Use Mingw-specific thread support
+
+mwin32
+Target
+Set Windows defines
+
+mwindows
+Target
+Create GUI application
+
+mpe-aligned-commons
+Target Var(use_pe_aligned_common) Init(HAVE_GAS_ALIGNED_COMM)
+Use the GNU extension to the PE format for aligned common data
+
+muse-libstdc-wrappers
+Target Condition({defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)})
+Compile code that relies on Cygwin DLL wrappers to support C++ operator new/delete replacement
+
+posix
+Driver
+
+fwritable-relocated-rdata
+Common Report Var(flag_writable_rel_rdata) Init(0)
+Put relocated read-only data into .data section.
+
+; Retain blank line above
diff --git a/gcc-4.9/gcc/config/i386/cygwin-stdint.h b/gcc-4.9/gcc/config/i386/cygwin-stdint.h
new file mode 100644
index 000000000..3c82cc6c3
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cygwin-stdint.h
@@ -0,0 +1,94 @@
+/* Definitions for <stdint.h> types on systems using Cygwin.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Exact-width integer types */
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#ifdef __x86_64__
+#define INT64_TYPE "long int"
+#else
+#define INT64_TYPE "long long int"
+#endif
+
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#ifdef __x86_64__
+#define UINT64_TYPE "long unsigned int"
+#else
+#define UINT64_TYPE "long long unsigned int"
+#endif
+
+/* Minimum-width integer types */
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#ifdef __x86_64__
+#define INT_LEAST64_TYPE "long int"
+#else
+#define INT_LEAST64_TYPE "long long int"
+#endif
+
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#ifdef __x86_64__
+#define UINT_LEAST64_TYPE "long unsigned int"
+#else
+#define UINT_LEAST64_TYPE "long long unsigned int"
+#endif
+
+/* Fastest minimum-width integer types */
+
+#define INT_FAST8_TYPE "signed char"
+#ifdef __x86_64__
+#define INT_FAST16_TYPE "long int"
+#define INT_FAST32_TYPE "long int"
+#define INT_FAST64_TYPE "long int"
+#else
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#endif
+
+#define UINT_FAST8_TYPE "unsigned char"
+#ifdef __x86_64__
+#define UINT_FAST16_TYPE "long unsigned int"
+#define UINT_FAST32_TYPE "long unsigned int"
+#define UINT_FAST64_TYPE "long unsigned int"
+#else
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+#endif
+
+/* Integer types capable of holding object pointers */
+
+#ifdef __x86_64__
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+#else
+#define INTPTR_TYPE "int"
+#define UINTPTR_TYPE "unsigned int"
+#endif
diff --git a/gcc-4.9/gcc/config/i386/cygwin-w64.h b/gcc-4.9/gcc/config/i386/cygwin-w64.h
new file mode 100644
index 000000000..06a6cd98c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cygwin-w64.h
@@ -0,0 +1,83 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows 32/64 via Cygwin runtime, using GNU tools and
+   the Windows API Library.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable multilib.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m32:--32} %{m64:--64}"
+
+/* To implement C++ function replacement we always wrap the cxx
+   malloc-like operators.  See N2800 #17.6.4.6 [replacement.functions] */
+#undef CXX_WRAP_SPEC_LIST
+#define CXX_WRAP_SPEC_LIST " \
+  --wrap _Znwm \
+  --wrap _Znam \
+  --wrap _ZdlPv \
+  --wrap _ZdaPv \
+  --wrap _ZnwmRKSt9nothrow_t \
+  --wrap _ZnamRKSt9nothrow_t \
+  --wrap _ZdlPvRKSt9nothrow_t \
+  --wrap _ZdaPvRKSt9nothrow_t \
+"
+
+#undef SPEC_32
+#undef SPEC_64
+#define SPEC_32 "m32"
+#define SPEC_64 "!m32"
+
+#undef SUB_LINK_ENTRY32
+#undef SUB_LINK_ENTRY64
+#define SUB_LINK_ENTRY32 "-e __cygwin_dll_entry@12"
+#define SUB_LINK_ENTRY64 "-e _cygwin_dll_entry"
+
+#undef SUB_LINK_SPEC
+#undef SUB_LINK_ENTRY
+#define SUB_LINK_SPEC "%{" SPEC_64 ":-m i386pep} %{" SPEC_32 ":-m i386pe}"
+#define SUB_LINK_ENTRY "%{" SPEC_64 ":" SUB_LINK_ENTRY64 "} %{" SPEC_32 ":" SUB_LINK_ENTRY32 "}"
+
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "m64" }
+
+#undef LINK_SPEC
+#define LINK_SPEC SUB_LINK_SPEC "\
+  %{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  " CXX_WRAP_SPEC " \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
+  %(shared_libgcc_undefs) \
+  --dll-search-prefix=cyg -tsaware"
+
+/* Cygwin64 will have a 64-bit long type. */
+#undef LONG_TYPE_SIZE
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32)
+
+/* Override default "long long unsigned int" from cygming.h. */
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef LIBGCC_SONAME
+#define LIBGCC_SONAME "cyggcc_s-seh-1.dll"
diff --git a/gcc-4.9/gcc/config/i386/cygwin.h b/gcc-4.9/gcc/config/i386/cygwin.h
new file mode 100644
index 000000000..f7b9a284c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/cygwin.h
@@ -0,0 +1,146 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using a Unix style C library and tools.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define EXTRA_OS_CPP_BUILTINS()					\
+  do								\
+    {								\
+      builtin_define ("__CYGWIN__");				\
+      if (!TARGET_64BIT)					\
+	builtin_define ("__CYGWIN32__");			\
+      builtin_define ("__unix__");				\
+      builtin_define ("__unix");				\
+    }								\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} \
+  %{!ansi:-Dunix} \
+  %{mwin32:-DWIN32 -D_WIN32 -D__WIN32 -D__WIN32__ %{!ansi:-DWINNT}} \
+  %{!nostdinc:%{!mno-win32:-idirafter ../include/w32api%s -idirafter ../../include/w32api%s}}\
+"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+  %{!shared: %{!mdll: crt0%O%s \
+  %{pg:gcrt0%O%s}}}\
+  crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
+   crtend.o%s"
+
+/* Normally, -lgcc is not needed since everything in it is in the DLL, but we
+   want to allow things to be added to it when installing new versions of
+   GCC without making a new CYGWIN.DLL, so we leave it.  Profiling is handled
+   by calling the init function from main.  */
+
+#ifdef ENABLE_SHARED_LIBGCC
+#define SHARED_LIBGCC_SPEC " \
+ %{static|static-libgcc:-lgcc -lgcc_eh} \
+ %{!static: \
+   %{!static-libgcc: \
+     -lgcc_s -lgcc \
+    } \
+  } "
+#else
+#define SHARED_LIBGCC_SPEC " -lgcc "
+#endif
+
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC SHARED_LIBGCC_SPEC
+
+/* We have to dynamic link to get to the system DLLs.  All of libc, libm and
+   the Unix stuff is in cygwin.dll.  The import library is called
+   'libcygwin.a'.  For Windows applications, include more libraries, but
+   always include kernel32.  We'd like to specific subsystem windows to
+   ld, but that doesn't work just yet.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+  %{pg:-lgmon} \
+  -lcygwin \
+  %{mwindows:-lgdi32 -lcomdlg32} \
+  -ladvapi32 -lshell32 -luser32 -lkernel32"
+
+/* To implement C++ function replacement we always wrap the cxx
+   malloc-like operators.  See N2800 #17.6.4.6 [replacement.functions] */
+#define CXX_WRAP_SPEC_LIST " \
+  --wrap _Znwj \
+  --wrap _Znaj \
+  --wrap _ZdlPv \
+  --wrap _ZdaPv \
+  --wrap _ZnwjRKSt9nothrow_t \
+  --wrap _ZnajRKSt9nothrow_t \
+  --wrap _ZdlPvRKSt9nothrow_t \
+  --wrap _ZdaPvRKSt9nothrow_t \
+"
+
+#if defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)
+
+#if USE_CYGWIN_LIBSTDCXX_WRAPPERS
+/* Default on, only explict -mno disables.  */
+#define CXX_WRAP_SPEC_OPT "!mno-use-libstdc-wrappers"
+#else
+/* Default off, only explict -m enables.  */
+#define CXX_WRAP_SPEC_OPT "muse-libstdc-wrappers"
+#endif
+
+#define CXX_WRAP_SPEC "%{" CXX_WRAP_SPEC_OPT ":" CXX_WRAP_SPEC_LIST "}"
+
+#else /* !defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)  */
+
+#define CXX_WRAP_SPEC ""
+
+#endif /* ?defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS) */
+
+#define LINK_SPEC "\
+  %{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  " CXX_WRAP_SPEC " \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: --enable-auto-image-base -e __cygwin_dll_entry@12} \
+  --dll-search-prefix=cyg -tsaware"
+
+/* Binutils does not handle weak symbols from dlls correctly.  For now,
+   do not use them unnecessarily in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+/* Every program on cygwin links against cygwin1.dll which contains 
+   the pthread routines.  There is no need to explicitly link them
+   and the -pthread flag is not recognized.  */
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS ""
+#undef GTM_SELF_SPECS
+#define GTM_SELF_SPECS ""
+
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygwin. */
+#if DWARF2_UNWIND_INFO
+#define LIBGCC_EH_EXTN ""
+#else
+#define LIBGCC_EH_EXTN "-sjlj"
+#endif
+#define LIBGCC_SONAME "cyggcc_s" LIBGCC_EH_EXTN "-1.dll"
+
+/* We should find a way to not have to update this manually.  */
+#define LIBGCJ_SONAME "cyggcj" /*LIBGCC_EH_EXTN*/ "-15.dll"
+
diff --git a/gcc-4.9/gcc/config/i386/darwin.h b/gcc-4.9/gcc/config/i386/darwin.h
new file mode 100644
index 000000000..a85aa42d5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/darwin.h
@@ -0,0 +1,313 @@
+/* Target definitions for x86 running Darwin.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable Mach-O bits in generic x86 code.  */
+#undef TARGET_MACHO
+#define TARGET_MACHO 1
+
+#undef DARWIN_X86
+#define DARWIN_X86 1
+
+#undef  TARGET_64BIT
+#undef	TARGET_64BIT_P
+#define TARGET_64BIT TARGET_ISA_64BIT
+#define	TARGET_64BIT_P(x) TARGET_ISA_64BIT_P(x)
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* Size of the Obj-C jump buffer.  */
+#define OBJC_JBLEN ((TARGET_64BIT) ? ((9 * 2) + 3 + 16) : (18))
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      builtin_define ("__LITTLE_ENDIAN__");     \
+      darwin_cpp_builtins (pfile);		\
+    }                                           \
+  while (0)
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Generate branch islands stubs if this is true.  */
+extern int darwin_emit_branch_islands;
+
+#undef TARGET_MACHO_BRANCH_ISLANDS
+#define TARGET_MACHO_BRANCH_ISLANDS darwin_emit_branch_islands
+
+/* For compatibility with OSX system tools, use the new style of pic stub
+   if this is set.  */
+#undef  MACHOPIC_ATT_STUB
+#define MACHOPIC_ATT_STUB (darwin_macho_att_stub)
+
+#undef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD 64
+
+#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
+
+#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
+
+/* On Darwin, the stack is 128-bit aligned at the point of every call.
+   Failure to ensure this will lead to a crash in the system libraries
+   or dynamic loader.  */
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY \
+ ((profile_flag || (TARGET_64BIT && ix86_abi == MS_ABI)) \
+  ? 128 : BITS_PER_WORD)
+
+#undef MAIN_STACK_BOUNDARY
+#define MAIN_STACK_BOUNDARY 128
+
+/* Since we'll never want a stack boundary less aligned than 128 bits
+   we need the extra work here otherwise bits of gcc get very grumpy
+   when we ask for lower alignment.  We could just reject values less
+   than 128 bits for Darwin, but it's easier to up the alignment if
+   it's below the minimum.  */
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY			\
+  MAX (128, ix86_preferred_stack_boundary)
+
+/* We want -fPIC by default, unless we're using -static to compile for
+   the kernel or some such.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) \
+  %{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \
+  %{!mmacosx-version-min=*:-mmacosx-version-min=%(darwin_minversion)} \
+  %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }} " \
+  DARWIN_CC1_SPEC
+
+#undef ASM_SPEC
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
+  %{static}"
+
+#define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}"
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+/* Determine a minimum version based on compiler options.  */
+#define DARWIN_MINVERSION_SPEC				\
+ "%{!m64|fgnu-runtime:10.4;				\
+    ,objective-c|,objc-cpp-output:10.5;			\
+    ,objective-c-header:10.5;				\
+    ,objective-c++|,objective-c++-cpp-output:10.5;	\
+    ,objective-c++-header|,objc++-cpp-output:10.5;	\
+    :10.4}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{mpc32:crtprec32.o%s} \
+   %{mpc64:crtprec64.o%s} \
+   %{mpc80:crtprec80.o%s}" TM_DESTRUCTOR
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS                                   \
+  DARWIN_EXTRA_SPECS                                            \
+  { "darwin_arch", DARWIN_ARCH_SPEC },                          \
+  { "darwin_crt2", "" },                                        \
+  { "darwin_subarch", DARWIN_SUBARCH_SPEC },
+
+/* The Darwin assembler mostly follows AT&T syntax.  */
+#undef ASSEMBLER_DIALECT
+#define ASSEMBLER_DIALECT ASM_ATT
+
+/* Define macro used to output shift-double opcodes when the shift
+   count is in %cl.  Some assemblers require %cl as an argument;
+   some don't.  This macro controls what to do: by default, don't
+   print %cl.  */
+
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_file_end
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+/* String containing the assembler's comment-starter.  */
+
+#define ASM_COMMENT_START "#"
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   and returns float values in the 387.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE)
+
+/* For darwin we want to target specific processor features as a minimum,
+   but these unfortunately don't correspond to a specific processor.  */
+#undef TARGET_SUBTARGET32_ISA_DEFAULT
+#define TARGET_SUBTARGET32_ISA_DEFAULT (OPTION_MASK_ISA_MMX		\
+					| OPTION_MASK_ISA_SSE		\
+					| OPTION_MASK_ISA_SSE2		\
+					| OPTION_MASK_ISA_SSE3)
+
+#undef TARGET_SUBTARGET64_ISA_DEFAULT
+#define TARGET_SUBTARGET64_ISA_DEFAULT TARGET_SUBTARGET32_ISA_DEFAULT
+
+#undef GOT_SYMBOL_NAME
+#define GOT_SYMBOL_NAME MACHOPIC_FUNCTION_BASE_NAME
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+#define LPREFIX "L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"
+
+#define SUBTARGET_ENCODE_SECTION_INFO  darwin_encode_section_info
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+ do { if ((LOG) != 0)			\
+        {				\
+          if (in_section == text_section) \
+            fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \
+          else				\
+            fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \
+        }				\
+    } while (0)
+
+/* Darwin profiling -- call mcount.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+    do {								\
+      if (TARGET_MACHO_BRANCH_ISLANDS 					\
+	   && MACHOPIC_INDIRECT && !TARGET_64BIT)			\
+	{								\
+	  const char *name = machopic_mcount_stub_name ();		\
+	  fprintf (FILE, "\tcall %s\n", name+1);  /*  skip '&'  */	\
+	  machopic_validate_stub_or_non_lazy_ptr (name);		\
+	}								\
+      else fprintf (FILE, "\tcall mcount\n");				\
+    } while (0)
+
+#define C_COMMON_OVERRIDE_OPTIONS					\
+  do {									\
+    SUBTARGET_C_COMMON_OVERRIDE_OPTIONS;				\
+  } while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do {									\
+  if (TARGET_64BIT && MACHO_DYNAMIC_NO_PIC_P)				\
+    target_flags &= ~MASK_MACHO_DYNAMIC_NO_PIC;				\
+} while (0)
+
+/* Darwin on x86_64 uses dwarf-2 by default.  Pre-darwin9 32-bit
+   compiles default to stabs+.  darwin9+ defaults to dwarf-2.  */
+#ifndef DARWIN_PREFER_DWARF
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE (TARGET_64BIT ? DWARF2_DEBUG : DBX_DEBUG)
+#endif
+
+/* Darwin uses the standard DWARF register numbers but the default
+   register numbers for STABS.  Fortunately for 64-bit code the
+   default and the standard are the same.  */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) 					\
+  (TARGET_64BIT ? dbx64_register_map[n]				\
+   : write_symbols == DWARF2_DEBUG ? svr4_dbx_register_map[n]	\
+   : dbx_register_map[n])
+
+/* Unfortunately, the 32-bit EH information also doesn't use the standard
+   DWARF register numbers.  */
+#define DWARF2_FRAME_REG_OUT(n, for_eh)					\
+  (! (for_eh) || write_symbols != DWARF2_DEBUG || TARGET_64BIT ? (n)	\
+   : (n) == 5 ? 4							\
+   : (n) == 4 ? 5							\
+   : (n) >= 11 && (n) <= 18 ? (n) + 1					\
+   : (n))
+
+#undef REGISTER_SUBTARGET_PRAGMAS
+#define REGISTER_SUBTARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS()
+
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes
+
+/* For 64-bit, we need to add 4 because @GOTPCREL is relative to the
+   end of the instruction, but without the 4 we'd only have the right
+   address for the start of the instruction.  */
+#undef ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE)	\
+  if (TARGET_64BIT)				                                \
+    {                                                                           \
+      if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_pcrel)			\
+        {                                                                       \
+	   fputs (ASM_LONG, FILE);                                              \
+	   assemble_name (FILE, XSTR (ADDR, 0));				\
+	   fputs ("+4@GOTPCREL", FILE);                                         \
+	   goto DONE;                                                           \
+        }									\
+    }										\
+  else                                                                          \
+    {										\
+      if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1))                      \
+        {                                                                       \
+          darwin_non_lazy_pcrel (FILE, ADDR);                                   \
+          goto DONE;								\
+        }                                                                       \
+    }
+
+/* This needs to move since i386 uses the first flag and other flags are
+   used in Mach-O.  */
+#undef MACHO_SYMBOL_FLAG_VARIABLE
+#define MACHO_SYMBOL_FLAG_VARIABLE ((SYMBOL_FLAG_MACH_DEP) << 3)
+
+#undef MACHOPIC_NL_SYMBOL_PTR_SECTION
+#define MACHOPIC_NL_SYMBOL_PTR_SECTION \
+		".section __IMPORT,__pointers,non_lazy_symbol_pointers"
+
+#define SUBTARGET32_DEFAULT_CPU "i686"
+
+#undef  SUBTARGET_INIT_BUILTINS
+#define SUBTARGET_INIT_BUILTINS					\
+do {								\
+  ix86_builtins[(int) IX86_BUILTIN_CFSTRING]			\
+    = darwin_init_cfstring_builtins ((unsigned) (IX86_BUILTIN_CFSTRING));	\
+  darwin_rename_builtins ();					\
+} while(0)
diff --git a/gcc-4.9/gcc/config/i386/darwin64.h b/gcc-4.9/gcc/config/i386/darwin64.h
new file mode 100644
index 000000000..143971136
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/darwin64.h
@@ -0,0 +1,32 @@
+/* Target definitions for x86_64 running Darwin.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  DARWIN_ARCH_SPEC
+#define DARWIN_ARCH_SPEC "%{m32:i386;:x86_64}"
+
+#undef  DARWIN_SUBARCH_SPEC
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS                                   \
+  DARWIN_EXTRA_SPECS                                            \
+  { "darwin_arch", DARWIN_ARCH_SPEC },                          \
+  { "darwin_crt2", "" },                                        \
+  { "darwin_subarch", DARWIN_SUBARCH_SPEC },
diff --git a/gcc-4.9/gcc/config/i386/djgpp-stdint.h b/gcc-4.9/gcc/config/i386/djgpp-stdint.h
new file mode 100644
index 000000000..5af1771b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/djgpp-stdint.h
@@ -0,0 +1,62 @@
+/* Definitions for <stdint.h> types on systems using DJGPP.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Exact-width integer types */
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "signed short int"
+#define INT32_TYPE "signed long int"
+#define INT64_TYPE "signed long long int"
+
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "long unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+/* Minimum-width integer types */
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "signed short int"
+#define INT_LEAST32_TYPE "signed int"
+#define INT_LEAST64_TYPE "signed long long int"
+
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+/* Fastest minimum-width integer types */
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "signed int"
+#define INT_FAST32_TYPE "signed int"
+#define INT_FAST64_TYPE "long long signed int"
+
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+/* Integer types capable of holding object pointers */
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
diff --git a/gcc-4.9/gcc/config/i386/djgpp.h b/gcc-4.9/gcc/config/i386/djgpp.h
new file mode 100644
index 000000000..6ddd833ba
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/djgpp.h
@@ -0,0 +1,178 @@
+/* Configuration for an i386 running MS-DOS with DJGPP.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Support generation of DWARF2 debugging info.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Define the name of the .data section.  */
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.section .data"
+
+/* Enable alias attribute support.  */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* Define the name of the .text section.  */
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+
+/* Define standard DJGPP installation paths.  */
+/* We override default /usr or /usr/local part with /dev/env/DJDIR which */
+/* points to actual DJGPP installation directory.  */
+
+/* Search for as.exe and ld.exe in DJGPP's binary directory.  */ 
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/dev/env/DJDIR/bin/"
+
+/* Standard DJGPP library and startup files */
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/dev/env/DJDIR/lib/"
+
+/* Correctly handle absolute filename detection in cp/xref.c */
+#define FILE_NAME_ABSOLUTE_P(NAME) \
+        (((NAME)[0] == '/') || ((NAME)[0] == '\\') || \
+        (((NAME)[0] >= 'A') && ((NAME)[0] <= 'z') && ((NAME)[1] == ':')))
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define_std ("MSDOS");		\
+	builtin_define_std ("GO32");		\
+	builtin_assert ("system=msdos");	\
+    }						\
+  while (0)
+
+/* Include <sys/version.h> so __DJGPP__ and __DJGPP_MINOR__ are defined.  */
+#undef CPP_SPEC
+#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \
+  -imacros %s../include/sys/version.h"
+
+/* We need to override link_command_spec in gcc.c so support -Tdjgpp.djl.
+   This cannot be done in LINK_SPECS as that LINK_SPECS is processed
+   before library search directories are known by the linker.
+   This avoids problems when specs file is not available. An alternate way,
+   suggested by Robert Hoehne, is to use SUBTARGET_EXTRA_SPECS instead.
+*/ 
+
+#undef LINK_COMMAND_SPEC
+#define LINK_COMMAND_SPEC \
+"%{!fsyntax-only: \
+%{!c:%{!M:%{!MM:%{!E:%{!S:%(linker) %l %X %{o*} %{e*} %{N} %{n} \
+\t%{r} %{s} %{t} %{u*} %{z} %{Z}\
+\t%{!nostdlib:%{!nostartfiles:%S}}\
+\t%{static:} %{L*} %D %o\
+\t%{!nostdlib:%{!nodefaultlibs:%G %L %G}}\
+\t%{!nostdlib:%{!nostartfiles:%E}}\
+\t-Tdjgpp.djl %{T*}}}}}}}\n\
+%{!c:%{!M:%{!MM:%{!E:%{!S:stubify %{v} %{o*:%*} %{!o*:a.out} }}}}}"
+
+/* Always just link in 'libc.a'.  */
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* Pick the right startup code depending on the -pg flag.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s}"
+
+/* Make sure that gcc will not look for .h files in /usr/local/include 
+   unless user explicitly requests it.  */
+#undef LOCAL_INCLUDE_DIR
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_coff_asm_named_section
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG) != 0) fprintf ((FILE), "\t.p2align %d\n", LOG)
+
+/* This is how to output a global symbol in the BSS section.  */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Write the extra assembler code needed to declare a function properly.  */
+
+#ifndef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);		\
+    }								\
+  while (0)
+#endif
+
+/* This is how to tell assembler that a symbol is weak  */ 
+#undef ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+       fputc ('\n', FILE); } while (0)
+
+/* djgpp automatically calls its own version of __main, so don't define one
+   in libgcc, nor call one in main().  */
+#define HAS_INIT_SECTION
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Definitions for types and sizes. Wide characters are 16-bits long so
+   Win32 compiler add-ons will be wide character compatible.  */
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Used to be defined in xm-djgpp.h, but moved here for cross-compilers.  */
+#define LIBSTDCXX "stdcxx"
+
+/* Warn that -mbnu210 is now obsolete.  */
+#undef  SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do \
+  { \
+    if (TARGET_BNU210) \
+      {	\
+        warning (0, "-mbnu210 is ignored (option is obsolete)"); \
+      }	\
+  } \
+while (0)
+
+/* Support for C++ templates.  */
+#undef MAKE_DECL_ONE_ONLY
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
diff --git a/gcc-4.9/gcc/config/i386/djgpp.opt b/gcc-4.9/gcc/config/i386/djgpp.opt
new file mode 100644
index 000000000..21fc54afb
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/djgpp.opt
@@ -0,0 +1,28 @@
+; DJGPP-specific options.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; -mbnu210 is now ignored and obsolete.  It was used to enable support for
+;; weak symbols, and .gnu.linkonce support.
+mbnu210
+Target Var(TARGET_BNU210)
+Ignored (obsolete)
+
+posix
+Driver
diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c
new file mode 100644
index 000000000..1f5a11c9c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/driver-i386.c
@@ -0,0 +1,913 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+const char *host_detect_local_cpu (int argc, const char **argv);
+
+#ifdef __GNUC__
+#include "cpuid.h"
+
+struct cache_desc
+{
+  unsigned sizekb;
+  unsigned assoc;
+  unsigned line;
+};
+
+/* Returns command line parameters that describe size and
+   cache line size of the processor caches.  */
+
+static char *
+describe_cache (struct cache_desc level1, struct cache_desc level2)
+{
+  char size[100], line[100], size2[100];
+
+  /* At the moment, gcc does not use the information
+     about the associativity of the cache.  */
+
+  snprintf (size, sizeof (size),
+	    "--param l1-cache-size=%u ", level1.sizekb);
+  snprintf (line, sizeof (line),
+	    "--param l1-cache-line-size=%u ", level1.line);
+
+  snprintf (size2, sizeof (size2),
+	    "--param l2-cache-size=%u ", level2.sizekb);
+
+  return concat (size, line, size2, NULL);
+}
+
+/* Detect L2 cache parameters using CPUID extended function 0x80000006.  */
+
+static void
+detect_l2_cache (struct cache_desc *level2)
+{
+  unsigned eax, ebx, ecx, edx;
+  unsigned assoc;
+
+  __cpuid (0x80000006, eax, ebx, ecx, edx);
+
+  level2->sizekb = (ecx >> 16) & 0xffff;
+  level2->line = ecx & 0xff;
+
+  assoc = (ecx >> 12) & 0xf;
+  if (assoc == 6)
+    assoc = 8;
+  else if (assoc == 8)
+    assoc = 16;
+  else if (assoc >= 0xa && assoc <= 0xc)
+    assoc = 32 + (assoc - 0xa) * 16;
+  else if (assoc >= 0xd && assoc <= 0xe)
+    assoc = 96 + (assoc - 0xd) * 32;
+
+  level2->assoc = assoc;
+}
+
+/* Returns the description of caches for an AMD processor.  */
+
+static const char *
+detect_caches_amd (unsigned max_ext_level)
+{
+  unsigned eax, ebx, ecx, edx;
+
+  struct cache_desc level1, level2 = {0, 0, 0};
+
+  if (max_ext_level < 0x80000005)
+    return "";
+
+  __cpuid (0x80000005, eax, ebx, ecx, edx);
+
+  level1.sizekb = (ecx >> 24) & 0xff;
+  level1.assoc = (ecx >> 16) & 0xff;
+  level1.line = ecx & 0xff;
+
+  if (max_ext_level >= 0x80000006)
+    detect_l2_cache (&level2);
+
+  return describe_cache (level1, level2);
+}
+
+/* Decodes the size, the associativity and the cache line size of
+   L1/L2 caches of an Intel processor.  Values are based on
+   "Intel Processor Identification and the CPUID Instruction"
+   [Application Note 485], revision -032, December 2007.  */
+
+static void
+decode_caches_intel (unsigned reg, bool xeon_mp,
+		     struct cache_desc *level1, struct cache_desc *level2)
+{
+  int i;
+
+  for (i = 24; i >= 0; i -= 8)
+    switch ((reg >> i) & 0xff)
+      {
+      case 0x0a:
+	level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
+	break;
+      case 0x0c:
+	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
+	break;
+      case 0x0d:
+	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x0e:
+	level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
+	break;
+      case 0x21:
+	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x24:
+	level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
+	break;
+      case 0x2c:
+	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
+	break;
+      case 0x39:
+	level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x3a:
+	level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
+	break;
+      case 0x3b:
+	level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
+	break;
+      case 0x3c:
+	level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x3d:
+	level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
+	break;
+      case 0x3e:
+	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x41:
+	level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x42:
+	level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x43:
+	level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x44:
+	level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x45:
+	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x48:
+	level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
+	break;
+      case 0x49:
+	if (xeon_mp)
+	  break;
+	level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
+	break;
+      case 0x4e:
+	level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
+	break;
+      case 0x60:
+	level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
+	break;
+      case 0x66:
+	level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x67:
+	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x68:
+	level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x78:
+	level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x79:
+	level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7a:
+	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7b:
+	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7c:
+	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7d:
+	level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7f:
+	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
+	break;
+      case 0x80:
+	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x82:
+	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x83:
+	level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x84:
+	level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x85:
+	level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x86:
+	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x87:
+	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
+
+      default:
+	break;
+      }
+}
+
+/* Detect cache parameters using CPUID function 2.  */
+
+static void
+detect_caches_cpuid2 (bool xeon_mp, 
+		      struct cache_desc *level1, struct cache_desc *level2)
+{
+  unsigned regs[4];
+  int nreps, i;
+
+  __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
+
+  nreps = regs[0] & 0x0f;
+  regs[0] &= ~0x0f;
+
+  while (--nreps >= 0)
+    {
+      for (i = 0; i < 4; i++)
+	if (regs[i] && !((regs[i] >> 31) & 1))
+	  decode_caches_intel (regs[i], xeon_mp, level1, level2);
+
+      if (nreps)
+	__cpuid (2, regs[0], regs[1], regs[2], regs[3]);
+    }
+}
+
+/* Detect cache parameters using CPUID function 4. This
+   method doesn't require hardcoded tables.  */
+
+enum cache_type
+{
+  CACHE_END = 0,
+  CACHE_DATA = 1,
+  CACHE_INST = 2,
+  CACHE_UNIFIED = 3
+};
+
+static void
+detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
+		      struct cache_desc *level3)
+{
+  struct cache_desc *cache;
+
+  unsigned eax, ebx, ecx, edx;
+  int count;
+
+  for (count = 0;; count++)
+    { 
+      __cpuid_count(4, count, eax, ebx, ecx, edx);
+      switch (eax & 0x1f)
+	{
+	case CACHE_END:
+	  return;
+	case CACHE_DATA:
+	case CACHE_UNIFIED:
+	  {
+	    switch ((eax >> 5) & 0x07)
+	      {
+	      case 1:
+		cache = level1;
+		break;
+	      case 2:
+		cache = level2;
+		break;
+	      case 3:
+		cache = level3;
+		break;
+	      default:
+		cache = NULL;
+	      }
+
+	    if (cache)
+	      {
+		unsigned sets = ecx + 1;
+		unsigned part = ((ebx >> 12) & 0x03ff) + 1;
+
+		cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
+		cache->line = (ebx & 0x0fff) + 1;
+
+		cache->sizekb = (cache->assoc * part
+				 * cache->line * sets) / 1024;
+	      }
+	  }
+	default:
+	  break;
+	}
+    }
+}
+
+/* Returns the description of caches for an Intel processor.  */
+
+static const char *
+detect_caches_intel (bool xeon_mp, unsigned max_level,
+		     unsigned max_ext_level, unsigned *l2sizekb)
+{
+  struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
+
+  if (max_level >= 4)
+    detect_caches_cpuid4 (&level1, &level2, &level3);
+  else if (max_level >= 2)
+    detect_caches_cpuid2 (xeon_mp, &level1, &level2);
+  else
+    return "";
+
+  if (level1.sizekb == 0)
+    return "";
+
+  /* Let the L3 replace the L2. This assumes inclusive caches
+     and single threaded program for now. */
+  if (level3.sizekb)
+    level2 = level3;
+
+  /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
+     method if other methods fail to provide L2 cache parameters.  */
+  if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
+    detect_l2_cache (&level2);
+
+  *l2sizekb = level2.sizekb;
+
+  return describe_cache (level1, level2);
+}
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch" or "tune" as argument depending on if -march=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-march=k8" on an AMD64 machine
+   for -march=native.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+
+const char *host_detect_local_cpu (int argc, const char **argv)
+{
+  enum processor_type processor = PROCESSOR_I386;
+  const char *cpu = "i386";
+
+  const char *cache = "";
+  const char *options = "";
+
+  unsigned int eax, ebx, ecx, edx;
+
+  unsigned int max_level, ext_level;
+
+  unsigned int vendor;
+  unsigned int model, family;
+
+  unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
+  unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
+
+  /* Extended features */
+  unsigned int has_lahf_lm = 0, has_sse4a = 0;
+  unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
+  unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
+  unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
+  unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
+  unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
+  unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
+  unsigned int has_hle = 0, has_rtm = 0;
+  unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
+  unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
+  unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
+  unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
+  unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
+
+  bool arch;
+
+  unsigned int l2sizekb = 0;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = !strcmp (argv[0], "arch");
+
+  if (!arch && strcmp (argv[0], "tune"))
+    return NULL;
+
+  max_level = __get_cpuid_max (0, &vendor);
+  if (max_level < 1)
+    goto done;
+
+  __cpuid (1, eax, ebx, ecx, edx);
+
+  model = (eax >> 4) & 0x0f;
+  family = (eax >> 8) & 0x0f;
+  if (vendor == signature_INTEL_ebx)
+    {
+      unsigned int extended_model, extended_family;
+
+      extended_model = (eax >> 12) & 0xf0;
+      extended_family = (eax >> 20) & 0xff;
+      if (family == 0x0f)
+	{
+	  family += extended_family;
+	  model += extended_model;
+	}
+      else if (family == 0x06)
+	model += extended_model;
+    }
+
+  has_sse3 = ecx & bit_SSE3;
+  has_ssse3 = ecx & bit_SSSE3;
+  has_sse4_1 = ecx & bit_SSE4_1;
+  has_sse4_2 = ecx & bit_SSE4_2;
+  has_avx = ecx & bit_AVX;
+  has_osxsave = ecx & bit_OSXSAVE;
+  has_cmpxchg16b = ecx & bit_CMPXCHG16B;
+  has_movbe = ecx & bit_MOVBE;
+  has_popcnt = ecx & bit_POPCNT;
+  has_aes = ecx & bit_AES;
+  has_pclmul = ecx & bit_PCLMUL;
+  has_fma = ecx & bit_FMA;
+  has_f16c = ecx & bit_F16C;
+  has_rdrnd = ecx & bit_RDRND;
+  has_xsave = ecx & bit_XSAVE;
+
+  has_cmpxchg8b = edx & bit_CMPXCHG8B;
+  has_cmov = edx & bit_CMOV;
+  has_mmx = edx & bit_MMX;
+  has_fxsr = edx & bit_FXSAVE;
+  has_sse = edx & bit_SSE;
+  has_sse2 = edx & bit_SSE2;
+
+  if (max_level >= 7)
+    {
+      __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+      has_bmi = ebx & bit_BMI;
+      has_hle = ebx & bit_HLE;
+      has_rtm = ebx & bit_RTM;
+      has_avx2 = ebx & bit_AVX2;
+      has_bmi2 = ebx & bit_BMI2;
+      has_fsgsbase = ebx & bit_FSGSBASE;
+      has_rdseed = ebx & bit_RDSEED;
+      has_adx = ebx & bit_ADX;
+      has_avx512f = ebx & bit_AVX512F;
+      has_avx512er = ebx & bit_AVX512ER;
+      has_avx512pf = ebx & bit_AVX512PF;
+      has_avx512cd = ebx & bit_AVX512CD;
+      has_sha = ebx & bit_SHA;
+
+      has_prefetchwt1 = ecx & bit_PREFETCHWT1;
+    }
+
+  if (max_level >= 13)
+    {
+      __cpuid_count (13, 1, eax, ebx, ecx, edx);
+
+      has_xsaveopt = eax & bit_XSAVEOPT;
+    }
+
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      has_lahf_lm = ecx & bit_LAHF_LM;
+      has_sse4a = ecx & bit_SSE4a;
+      has_abm = ecx & bit_ABM;
+      has_lwp = ecx & bit_LWP;
+      has_fma4 = ecx & bit_FMA4;
+      has_xop = ecx & bit_XOP;
+      has_tbm = ecx & bit_TBM;
+      has_lzcnt = ecx & bit_LZCNT;
+      has_prfchw = ecx & bit_PRFCHW;
+
+      has_longmode = edx & bit_LM;
+      has_3dnowp = edx & bit_3DNOWP;
+      has_3dnow = edx & bit_3DNOW;
+    }
+
+  /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
+#define XCR_XFEATURE_ENABLED_MASK	0x0
+#define XSTATE_FP			0x1
+#define XSTATE_SSE			0x2
+#define XSTATE_YMM			0x4
+  if (has_osxsave)
+    asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
+	 : "=a" (eax), "=d" (edx)
+	 : "c" (XCR_XFEATURE_ENABLED_MASK));
+
+  /* Check if SSE and YMM states are supported.  */
+  if (!has_osxsave
+      || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM))
+    {
+      has_avx = 0;
+      has_avx2 = 0;
+      has_fma = 0;
+      has_fma4 = 0;
+      has_f16c = 0;
+      has_xop = 0;
+      has_xsave = 0;
+      has_xsaveopt = 0;
+    }
+
+  if (!arch)
+    {
+      if (vendor == signature_AMD_ebx
+	  || vendor == signature_CENTAUR_ebx
+	  || vendor == signature_CYRIX_ebx
+	  || vendor == signature_NSC_ebx)
+	cache = detect_caches_amd (ext_level);
+      else if (vendor == signature_INTEL_ebx)
+	{
+	  bool xeon_mp = (family == 15 && model == 6);
+	  cache = detect_caches_intel (xeon_mp, max_level,
+				       ext_level, &l2sizekb);
+	}
+    }
+
+  if (vendor == signature_AMD_ebx)
+    {
+      unsigned int name;
+
+      /* Detect geode processor by its processor signature.  */
+      if (ext_level > 0x80000001)
+	__cpuid (0x80000002, name, ebx, ecx, edx);
+      else
+	name = 0;
+
+      if (name == signature_NSC_ebx)
+	processor = PROCESSOR_GEODE;
+      else if (has_movbe)
+	processor = PROCESSOR_BTVER2;
+      else if (has_avx2)
+        processor = PROCESSOR_BDVER4;
+      else if (has_xsaveopt)
+        processor = PROCESSOR_BDVER3;
+      else if (has_bmi)
+        processor = PROCESSOR_BDVER2;
+      else if (has_xop)
+	processor = PROCESSOR_BDVER1;
+      else if (has_sse4a && has_ssse3)
+        processor = PROCESSOR_BTVER1;
+      else if (has_sse4a)
+	processor = PROCESSOR_AMDFAM10;
+      else if (has_sse2 || has_longmode)
+	processor = PROCESSOR_K8;
+      else if (has_3dnowp && family == 6)
+	processor = PROCESSOR_ATHLON;
+      else if (has_mmx)
+	processor = PROCESSOR_K6;
+      else
+	processor = PROCESSOR_PENTIUM;
+    }
+  else if (vendor == signature_CENTAUR_ebx)
+    {
+      if (arch)
+	{
+	  switch (family)
+	    {
+	    case 6:
+	      if (model > 9)
+		/* Use the default detection procedure.  */
+		processor = PROCESSOR_GENERIC;
+	      else if (model == 9)
+		cpu = "c3-2";
+	      else if (model >= 6)
+		cpu = "c3";
+	      else
+		processor = PROCESSOR_GENERIC;
+	      break;
+	    case 5:
+	      if (has_3dnow)
+		cpu = "winchip2";
+	      else if (has_mmx)
+		cpu = "winchip2-c6";
+	      else
+		processor = PROCESSOR_GENERIC;
+	      break;
+	    default:
+	      /* We have no idea.  */
+	      processor = PROCESSOR_GENERIC;
+	    }
+	}
+    }
+  else
+    {
+      switch (family)
+	{
+	case 4:
+	  processor = PROCESSOR_I486;
+	  break;
+	case 5:
+	  processor = PROCESSOR_PENTIUM;
+	  break;
+	case 6:
+	  processor = PROCESSOR_PENTIUMPRO;
+	  break;
+	case 15:
+	  processor = PROCESSOR_PENTIUM4;
+	  break;
+	default:
+	  /* We have no idea.  */
+	  processor = PROCESSOR_GENERIC;
+	}
+    }
+
+  switch (processor)
+    {
+    case PROCESSOR_I386:
+      /* Default.  */
+      break;
+    case PROCESSOR_I486:
+      cpu = "i486";
+      break;
+    case PROCESSOR_PENTIUM:
+      if (arch && has_mmx)
+	cpu = "pentium-mmx";
+      else
+	cpu = "pentium";
+      break;
+    case PROCESSOR_PENTIUMPRO:
+      switch (model)
+	{
+	case 0x1c:
+	case 0x26:
+	  /* Bonnell.  */
+	  cpu = "bonnell";
+	  break;
+	case 0x37:
+	case 0x4d:
+	  /* Silvermont.  */
+	  cpu = "silvermont";
+	  break;
+	case 0x0f:
+	  /* Merom.  */
+	case 0x17:
+	case 0x1d:
+	  /* Penryn.  */
+	  cpu = "core2";
+	  break;
+	case 0x1a:
+	case 0x1e:
+	case 0x1f:
+	case 0x2e:
+	  /* Nehalem.  */
+	  cpu = "nehalem";
+	  break;
+	case 0x25:
+	case 0x2c:
+	case 0x2f:
+	  /* Westmere.  */
+	  cpu = "westmere";
+	  break;
+	case 0x2a:
+	case 0x2d:
+	  /* Sandy Bridge.  */
+	  cpu = "sandybridge";
+	  break;
+	case 0x3a:
+	case 0x3e:
+	  /* Ivy Bridge.  */
+	  cpu = "ivybridge";
+	  break;
+	case 0x3c:
+	case 0x45:
+	case 0x46:
+	  /* Haswell.  */
+	  cpu = "haswell";
+	  break;
+	default:
+	  if (arch)
+	    {
+	      /* This is unknown family 0x6 CPU.  */
+	      if (has_adx)
+		cpu = "broadwell";
+	      else if (has_avx2)
+		/* Assume Haswell.  */
+		cpu = "haswell";
+	      else if (has_avx)
+		/* Assume Sandy Bridge.  */
+		cpu = "sandybridge";
+	      else if (has_sse4_2)
+		{
+		  if (has_movbe)
+		    /* Assume Silvermont.  */
+		    cpu = "silvermont";
+		  else
+		    /* Assume Nehalem.  */
+		    cpu = "nehalem";
+		}
+	      else if (has_ssse3)
+		{
+		  if (has_movbe)
+		    /* Assume Bonnell.  */
+		    cpu = "bonnell";
+		  else
+		    /* Assume Core 2.  */
+		    cpu = "core2";
+		}
+	      else if (has_sse3)
+		/* It is Core Duo.  */
+		cpu = "pentium-m";
+	      else if (has_sse2)
+		/* It is Pentium M.  */
+		cpu = "pentium-m";
+	      else if (has_sse)
+		/* It is Pentium III.  */
+		cpu = "pentium3";
+	      else if (has_mmx)
+		/* It is Pentium II.  */
+		cpu = "pentium2";
+	      else
+		/* Default to Pentium Pro.  */
+		cpu = "pentiumpro";
+	    }
+	  else
+	    /* For -mtune, we default to -mtune=generic.  */
+	    cpu = "generic";
+	  break;
+	}
+      break;
+    case PROCESSOR_PENTIUM4:
+      if (has_sse3)
+	{
+	  if (has_longmode)
+	    cpu = "nocona";
+	  else
+	    cpu = "prescott";
+	}
+      else
+	cpu = "pentium4";
+      break;
+    case PROCESSOR_GEODE:
+      cpu = "geode";
+      break;
+    case PROCESSOR_K6:
+      if (arch && has_3dnow)
+	cpu = "k6-3";
+      else
+	cpu = "k6";
+      break;
+    case PROCESSOR_ATHLON:
+      if (arch && has_sse)
+	cpu = "athlon-4";
+      else
+	cpu = "athlon";
+      break;
+    case PROCESSOR_K8:
+      if (arch && has_sse3)
+	cpu = "k8-sse3";
+      else
+	cpu = "k8";
+      break;
+    case PROCESSOR_AMDFAM10:
+      cpu = "amdfam10";
+      break;
+    case PROCESSOR_BDVER1:
+      cpu = "bdver1";
+      break;
+    case PROCESSOR_BDVER2:
+      cpu = "bdver2";
+      break;
+    case PROCESSOR_BDVER3:
+      cpu = "bdver3";
+      break;
+    case PROCESSOR_BDVER4:
+      cpu = "bdver4";
+      break;
+    case PROCESSOR_BTVER1:
+      cpu = "btver1";
+      break;
+    case PROCESSOR_BTVER2:
+      cpu = "btver2";
+      break;
+
+    default:
+      /* Use something reasonable.  */
+      if (arch)
+	{
+	  if (has_ssse3)
+	    cpu = "core2";
+	  else if (has_sse3)
+	    {
+	      if (has_longmode)
+		cpu = "nocona";
+	      else
+		cpu = "prescott";
+	    }
+	  else if (has_sse2)
+	    cpu = "pentium4";
+	  else if (has_cmov)
+	    cpu = "pentiumpro";
+	  else if (has_mmx)
+	    cpu = "pentium-mmx";
+	  else if (has_cmpxchg8b)
+	    cpu = "pentium";
+	}
+      else
+	cpu = "generic";
+    }
+
+  if (arch)
+    {
+      const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
+      const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
+      const char *sse = has_sse ? " -msse" : " -mno-sse";
+      const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
+      const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
+      const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
+      const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
+      const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
+      const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
+      const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
+      const char *aes = has_aes ? " -maes" : " -mno-aes";
+      const char *sha = has_sha ? " -msha" : " -mno-sha";
+      const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
+      const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
+      const char *abm = has_abm ? " -mabm" : " -mno-abm";
+      const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
+      const char *fma = has_fma ? " -mfma" : " -mno-fma";
+      const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
+      const char *xop = has_xop ? " -mxop" : " -mno-xop";
+      const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
+      const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
+      const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
+      const char *avx = has_avx ? " -mavx" : " -mno-avx";
+      const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
+      const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
+      const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
+      const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
+      const char *hle = has_hle ? " -mhle" : " -mno-hle";
+      const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
+      const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
+      const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
+      const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
+      const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
+      const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
+      const char *adx = has_adx ? " -madx" : " -mno-adx";
+      const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
+      const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
+      const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
+      const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
+      const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
+      const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
+      const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
+      const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
+
+      options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
+			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
+			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
+			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
+			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
+			fxsr, xsave, xsaveopt, avx512f, avx512er,
+			avx512cd, avx512pf, prefetchwt1, NULL);
+    }
+
+done:
+  return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
+}
+#else
+
+/* If we aren't compiling with GCC then the driver will just ignore
+   -march and -mtune "native" target and will leave to the newly
+   built compiler to generate code for its default target.  */
+
+const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED,
+				   const char **argv ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+#endif /* __GNUC__ */
diff --git a/gcc-4.9/gcc/config/i386/emmintrin.h b/gcc-4.9/gcc/config/i386/emmintrin.h
new file mode 100644
index 000000000..a2bdf0eda
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/emmintrin.h
@@ -0,0 +1,1541 @@
+/* Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _EMMINTRIN_H_INCLUDED
+#define _EMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE header files*/
+#include <xmmintrin.h>
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
+/* SSE2 */
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Create a selector for use with the SHUFPD instruction.  */
+#define _MM_SHUFFLE2(fp1,fp0) \
+ (((fp1) << 1) | (fp0))
+
+/* Create a vector with element 0 as F and the rest zero.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_sd (double __F)
+{
+  return __extension__ (__m128d){ __F, 0.0 };
+}
+
+/* Create a vector with both elements equal to F.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pd (double __F)
+{
+  return __extension__ (__m128d){ __F, __F };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd1 (double __F)
+{
+  return _mm_set1_pd (__F);
+}
+
+/* Create a vector with the lower value X and upper value W.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd (double __W, double __X)
+{
+  return __extension__ (__m128d){ __X, __W };
+}
+
+/* Create a vector with the lower value W and upper value X.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pd (double __W, double __X)
+{
+  return __extension__ (__m128d){ __W, __X };
+}
+
+/* Create an undefined vector.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_pd (void)
+{
+  __m128d __Y = __Y;
+  return __Y;
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_pd (void)
+{
+  return __extension__ (__m128d){ 0.0, 0.0 };
+}
+
+/* Sets the low DPFP value of A from the low value of B.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd (double const *__P)
+{
+  return *(__m128d *)__P;
+}
+
+/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_pd (double const *__P)
+{
+  return __builtin_ia32_loadupd (__P);
+}
+
+/* Create a vector with all two elements equal to *P.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_pd (double const *__P)
+{
+  return _mm_set1_pd (*__P);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_sd (double const *__P)
+{
+  return _mm_set_sd (*__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd1 (double const *__P)
+{
+  return _mm_load1_pd (__P);
+}
+
+/* Load two DPFP values in reverse order.  The address must be aligned.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_pd (double const *__P)
+{
+  __m128d __tmp = _mm_load_pd (__P);
+  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
+}
+
+/* Store two DPFP values.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd (double *__P, __m128d __A)
+{
+  *(__m128d *)__P = __A;
+}
+
+/* Store two DPFP values.  The address need not be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_pd (double *__P, __m128d __A)
+{
+  __builtin_ia32_storeupd (__P, __A);
+}
+
+/* Stores the lower DPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_sd (double *__P, __m128d __A)
+{
+  *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_f64 (__m128d __A)
+{
+  return __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pd (double *__P, __m128d __A)
+{
+  _mm_store_sd (__P, __A);
+}
+
+/* Stores the upper DPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pd (double *__P, __m128d __A)
+{
+  *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
+}
+
+/* Store the lower DPFP value across two words.
+   The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_pd (double *__P, __m128d __A)
+{
+  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd1 (double *__P, __m128d __A)
+{
+  _mm_store1_pd (__P, __A);
+}
+
+/* Store two DPFP values in reverse order.  The address must be aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_pd (double *__P, __m128d __A)
+{
+  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si32 (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64 (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64x (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_pd (__m128d __A)
+{
+  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+}
+
+/* Return pair {sqrt (A[0), B[1]}.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_sd (__m128d __A, __m128d __B)
+{
+  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpltsd ((__v2df) __B,
+								 (__v2df)
+								 __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmplesd ((__v2df) __B,
+								 (__v2df)
+								 __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpnltsd ((__v2df) __B,
+								  (__v2df)
+								  __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpnlesd ((__v2df) __B,
+								  (__v2df)
+								  __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* Create a vector of Qi, where i is the element number.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64 (__m64 __q1,  __m64 __q0)
+{
+  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
+	       short __q3, short __q2, short __q1, short __q0)
+{
+  return __extension__ (__m128i)(__v8hi){
+    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
+	      char __q11, char __q10, char __q09, char __q08,
+	      char __q07, char __q06, char __q05, char __q04,
+	      char __q03, char __q02, char __q01, char __q00)
+{
+  return __extension__ (__m128i)(__v16qi){
+    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+  };
+}
+
+/* Set all of the elements of the vector to A.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64x (long long __A)
+{
+  return _mm_set_epi64x (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64 (__m64 __A)
+{
+  return _mm_set_epi64 (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi32 (int __A)
+{
+  return _mm_set_epi32 (__A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi16 (short __A)
+{
+  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi8 (char __A)
+{
+  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+		       __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+/* Create a vector of Qi, where i is the element number.
+   The parameter order is reversed from the _mm_set_epi* functions.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi64 (__m64 __q0, __m64 __q1)
+{
+  return _mm_set_epi64 (__q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
+{
+  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
+	        short __q4, short __q5, short __q6, short __q7)
+{
+  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
+	       char __q04, char __q05, char __q06, char __q07,
+	       char __q08, char __q09, char __q10, char __q11,
+	       char __q12, char __q13, char __q14, char __q15)
+{
+  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
+		       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_si128 (__m128i const *__P)
+{
+  return *__P;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_epi64 (__m128i const *__P)
+{
+  return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+  *__P = __B;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si128 (__m128i *__P, __m128i __B)
+{
+  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_epi64 (__m128i *__P, __m128i __B)
+{
+  *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi64_pi64 (__m128i __B)
+{
+  return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movpi64_epi64 (__m64 __A)
+{
+  return _mm_set_epi64 ((__m64)0LL, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi64 (__m128i __A)
+{
+  return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
+}
+
+/* Create an undefined vector.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_si128 (void)
+{
+  __m128i __Y = __Y;
+  return __Y;
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si128 (void)
+{
+  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_pd (__m128i __A)
+{
+  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_ps (__m128i __A)
+{
+  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epi32 (__m128d __A)
+{
+  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_pi32 (__m128d __A)
+{
+  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_ps (__m128d __A)
+{
+  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epi32 (__m128d __A)
+{
+  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_pi32 (__m128d __A)
+{
+  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_pd (__m64 __A)
+{
+  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi32 (__m128 __A)
+{
+  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epi32 (__m128 __A)
+{
+  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pd (__m128 __A)
+{
+  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si32 (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64 (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si32 (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64 (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_ss (__m128 __A, __m128d __B)
+{
+  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_sd (__m128d __A, int __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_sd (__m128d __A, long long __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sd (__m128d __A, __m128 __B)
+{
+  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
+{
+  return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
+}
+#else
+#define _mm_shuffle_pd(A, B, N)						\
+  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),		\
+				   (__v2df)(__m128d)(B), (int)(N)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pd (__m128d __A, double const *__B)
+{
+  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pd (__m128d __A, double const *__B)
+{
+  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pd (__m128d __A)
+{
+  return __builtin_ia32_movmskpd ((__v2df)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_su32 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi64 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si128 (__m128i __A, const int __N)
+{
+  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si128 (__m128i __A, const int __N)
+{
+  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
+}
+#else
+#define _mm_srli_si128(A, N) \
+  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
+#define _mm_slli_si128(A, N) \
+  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi64 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi16 (__m128i const __A, int const __N)
+{
+  return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
+}
+#else
+#define _mm_extract_epi16(A, N) \
+  ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_insert_epi16(A, D, N)				\
+  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
+					  (int)(D), (int)(N)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_epi8 (__m128i __A)
+{
+  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflehi_epi16 (__m128i __A, const int __mask)
+{
+  return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflelo_epi16 (__m128i __A, const int __mask)
+{
+  return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi32 (__m128i __A, const int __mask)
+{
+  return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
+}
+#else
+#define _mm_shufflehi_epi16(A, N) \
+  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shufflelo_epi16(A, N) \
+  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shuffle_epi32(A, N) \
+  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+{
+  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si32 (int *__A, int __B)
+{
+  __builtin_ia32_movnti (__A, __B);
+}
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si64 (long long int *__A, long long int __B)
+{
+  __builtin_ia32_movnti64 (__A, __B);
+}
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si128 (__m128i *__A, __m128i __B)
+{
+  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pd (double *__A, __m128d __B)
+{
+  __builtin_ia32_movntpd (__A, (__v2df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clflush (void const *__A)
+{
+  __builtin_ia32_clflush (__A);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lfence (void)
+{
+  __builtin_ia32_lfence ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mfence (void)
+{
+  __builtin_ia32_mfence ();
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si128 (int __A)
+{
+  return _mm_set_epi32 (0, 0, 0, __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si128 (long long __A)
+{
+  return _mm_set_epi64x (0, __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si128 (long long __A)
+{
+  return _mm_set_epi64x (0, __A);
+}
+#endif
+
+/* Casts between various SP, DP, INT vector types.  Note that these do no
+   conversion of values, they just change the type.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ps(__m128d __A)
+{
+  return (__m128) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_si128(__m128d __A)
+{
+  return (__m128i) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_pd(__m128 __A)
+{
+  return (__m128d) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_si128(__m128 __A)
+{
+  return (__m128i) __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ps(__m128i __A)
+{
+  return (__m128) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_pd(__m128i __A)
+{
+  return (__m128d) __A;
+}
+
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
+
+#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/f16cintrin.h b/gcc-4.9/gcc/config/i386/f16cintrin.h
new file mode 100644
index 000000000..1181f8b94
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/f16cintrin.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
+#endif
+
+#ifndef _F16CINTRIN_H_INCLUDED
+#define _F16CINTRIN_H_INCLUDED
+
+#ifndef __F16C__
+#pragma GCC push_options
+#pragma GCC target("f16c")
+#define __DISABLE_F16C__
+#endif /* __F16C__ */
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtsh_ss (unsigned short __S)
+{
+  __v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
+  __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
+  return __builtin_ia32_vec_ext_v4sf (__A, 0);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_ps (__m128i __A)
+{
+  return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_ps (__m128i __A)
+{
+  return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtss_sh (float __F, const int __I)
+{
+  __v4sf __A =  __extension__ (__v4sf){ __F, 0, 0, 0 };
+  __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
+  return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_ph (__m128 __A, const int __I)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_ph (__m256 __A, const int __I)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
+}
+#else
+#define _cvtss_sh(__F, __I)						\
+  (__extension__ 							\
+   ({									\
+      __v4sf __A =  __extension__ (__v4sf){ __F, 0, 0, 0 };		\
+      __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);			\
+      (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);		\
+    }))
+
+#define _mm_cvtps_ph(A, I) \
+  ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I)))
+
+#define _mm256_cvtps_ph(A, I) \
+  ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
+#endif /* __OPTIMIZE */
+
+#ifdef __DISABLE_F16C__
+#undef __DISABLE_F16C__
+#pragma GCC pop_options
+#endif /* __DISABLE_F16C__ */
+
+#endif /* _F16CINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/fma4intrin.h b/gcc-4.9/gcc/config/i386/fma4intrin.h
new file mode 100644
index 000000000..e1bdef7b5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/fma4intrin.h
@@ -0,0 +1,241 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _FMA4INTRIN_H_INCLUDED
+#define _FMA4INTRIN_H_INCLUDED
+
+/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
+#include <ammintrin.h>
+
+#ifndef __FMA4__
+#pragma GCC push_options
+#pragma GCC target("fma4")
+#define __DISABLE_FMA4__
+#endif /* __FMA4__ */
+
+/* 128b Floating point multiply/add type instructions.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
+
+{
+  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+/* 256b Floating point multiply/add type instructions.  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
+
+{
+  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+#ifdef __DISABLE_FMA4__
+#undef __DISABLE_FMA4__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA4__ */
+
+#endif
diff --git a/gcc-4.9/gcc/config/i386/fmaintrin.h b/gcc-4.9/gcc/config/i386/fmaintrin.h
new file mode 100644
index 000000000..bfbb75d59
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/fmaintrin.h
@@ -0,0 +1,302 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _FMAINTRIN_H_INCLUDED
+#define _FMAINTRIN_H_INCLUDED
+
+#ifndef __FMA__
+#pragma GCC push_options
+#pragma GCC target("fma")
+#define __DISABLE_FMA__
+#endif /* __FMA__ */
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
+                                           (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
+                                              (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
+                                          (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
+                                             (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
+                                             (__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
+                                            (__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
+                                           -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
+                                              -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
+                                          -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
+                                             -(__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
+                                           (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
+                                              (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
+                                          (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
+                                             (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
+                                            (__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
+                                           (__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
+                                           -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
+                                              -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
+                                          -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
+                                             -(__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
+                                            -(__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
+                                           -(__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
+                                              (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
+                                                 (__v4df)__B,
+                                                 (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
+                                             (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
+                                                (__v8sf)__B,
+                                                (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
+                                              -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
+                                                 (__v4df)__B,
+                                                 -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
+                                             -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
+                                                (__v8sf)__B,
+                                                -(__v8sf)__C);
+}
+
+#ifdef __DISABLE_FMA__
+#undef __DISABLE_FMA__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA__ */
+
+#endif
diff --git a/gcc-4.9/gcc/config/i386/freebsd.h b/gcc-4.9/gcc/config/i386/freebsd.h
new file mode 100644
index 000000000..bdca1b80b
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/freebsd.h
@@ -0,0 +1,149 @@
+/* Definitions for Intel 386 running FreeBSD with ELF format
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu.
+   Adapted from GNU/Linux version by John Polstra.
+   Continued development by David O'Brien <obrien@freebsd.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Override the default comment-starter of "/".  */
+#undef  ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+#undef  NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
+
+/* Tell final.c that we don't need a label passed to mcount.  */
+
+#undef  MCOUNT_NAME
+#define MCOUNT_NAME ".mcount"
+
+/* Make gcc agree with <machine/ansi.h>.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE	(TARGET_64BIT ? "long unsigned int" : "unsigned int")
+ 
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE	(TARGET_64BIT ? "long int" : "int")
+  
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE	(TARGET_64BIT ? 32 : BITS_PER_WORD)
+
+#undef  SUBTARGET_EXTRA_SPECS	/* i386.h bogusly defines it.  */
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+    
+/* Provide a STARTFILE_SPEC appropriate for FreeBSD.  Here we add
+   the magical crtbegin.o file (see crtstuff.c) which provides part 
+	of the support for getting C++ file-scope static object constructed 
+	before entering `main'.  */
+   
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+		       %{!p:%{profile:gcrt1.o%s} \
+			 %{!profile:crt1.o%s}}}} \
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for FreeBSD.  Here we tack on
+   the magical crtend.o file (see crtstuff.c) which provides part of 
+	the support for getting C++ file-scope static object constructed 
+	before entering `main', followed by a normal "finalizer" file, 
+	`crtn.o'.  */
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+/* Provide a LINK_SPEC appropriate for FreeBSD.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+    %{!shared: \
+      %{!static: \
+        %{rdynamic:-export-dynamic} \
+        -dynamic-linker %(fbsd_dynamic_linker) } \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#undef  ASM_OUTPUT_MAX_SKIP_ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)					\
+  if ((LOG) != 0) {														\
+    if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+    else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+  }
+#endif
+
+/* Don't default to pcc-struct-return, we want to retain compatibility with
+   older gcc versions AND pcc-struct-return is nonreentrant.
+   (even though the SVR4 ABI for the i386 says that records and unions are
+   returned in memory).  */
+
+#undef  DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* FreeBSD sets the rounding precision of the FPU to 53 bits.  Let the
+   compiler get the contents of <float.h> and std::numeric_limits correct.  */
+#undef TARGET_96_ROUND_53_LONG_DOUBLE
+#define TARGET_96_ROUND_53_LONG_DOUBLE (!TARGET_64BIT)
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Support for i386 has been removed from FreeBSD 6.0 onward.  */
+#if FBSD_MAJOR >= 6
+#define SUBTARGET32_DEFAULT_CPU "i486"
+#endif
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
diff --git a/gcc-4.9/gcc/config/i386/freebsd64.h b/gcc-4.9/gcc/config/i386/freebsd64.h
new file mode 100644
index 000000000..89430c432
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/freebsd64.h
@@ -0,0 +1,44 @@
+/* Definitions for AMD x86-64 running FreeBSD with ELF format
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by David O'Brien <obrien@FreeBSD.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+/* Provide a LINK_SPEC appropriate for the FreeBSD/x86-64 ELF target.
+   This is a copy of LINK_SPEC from <i386/freebsd.h> tweaked for
+   the x86-64 target.  */
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+  %{m32:-m elf_i386_fbsd} \
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+    %{!shared: \
+      %{!static: \
+        %{rdynamic:-export-dynamic} \
+	-dynamic-linker %(fbsd_dynamic_linker) } \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
diff --git a/gcc-4.9/gcc/config/i386/fxsrintrin.h b/gcc-4.9/gcc/config/i386/fxsrintrin.h
new file mode 100644
index 000000000..98e73ee27
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/fxsrintrin.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
+/* # error "Never use <fxsrintrin.h> directly; include <x86intrin.h> instead." */
+/* #endif */
+
+#ifndef _FXSRINTRIN_H_INCLUDED
+#define _FXSRINTRIN_H_INCLUDED
+
+#ifndef __FXSR__
+#pragma GCC push_options
+#pragma GCC target("fxsr")
+#define __DISABLE_FXSR__
+#endif /* __FXSR__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxsave (void *__P)
+{
+  return __builtin_ia32_fxsave (__P);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxrstor (void *__P)
+{
+  return __builtin_ia32_fxrstor (__P);
+}
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxsave64 (void *__P)
+{
+    return __builtin_ia32_fxsave64 (__P);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxrstor64 (void *__P)
+{
+    return __builtin_ia32_fxrstor64 (__P);
+}
+#endif
+
+#ifdef __DISABLE_FXSR__
+#undef __DISABLE_FXSR__
+#pragma GCC pop_options
+#endif /* __DISABLE_FXSR__ */
+
+
+#endif /* _FXSRINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/gas.h b/gcc-4.9/gcc/config/i386/gas.h
new file mode 100644
index 000000000..edefc9aad
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gas.h
@@ -0,0 +1,124 @@
+/* Definitions for Intel 386 using GAS.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Note that i386/seq-gas.h is a GAS configuration that does not use this
+   file.  */
+
+/* Use the bsd assembler syntax.  */
+/* we need to do this because gas is really a bsd style assembler,
+ * and so doesn't work well this these att-isms:
+ *
+ *  ASM_OUTPUT_SKIP is .set .,.+N, which isn't implemented in gas
+ *  ASM_OUTPUT_LOCAL is done with .set .,.+N, but that can't be
+ *   used to define bss static space
+ *
+ * Next is the question of whether to uses underscores.  RMS didn't
+ * like this idea at first, but since it is now obvious that we
+ * need this separate tm file for use with gas, at least to get
+ * dbx debugging info, I think we should also switch to underscores.
+ * We can keep i386v for real att style output, and the few
+ * people who want both form will have to compile twice.
+ */
+
+/* these come from i386/bsd.h, but are specific to sequent */
+#undef DBX_NO_XREFS
+#undef DBX_CONTIN_LENGTH
+
+/* Ask for COFF symbols.  */
+
+#define SDB_DEBUGGING_INFO 1
+
+/* Output #ident as a .ident.  */
+
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT default_asm_output_ident_directive
+
+/* In the past there was confusion as to what the argument to .align was
+   in GAS.  For the last several years the rule has been this: for a.out
+   file formats that argument is LOG, and for all other file formats the
+   argument is 1<<LOG.
+
+   However, GAS now has .p2align and .balign pseudo-ops so to remove any
+   doubt or guess work, and since this file is used for both a.out and other
+   file formats, we use one of them.  */
+
+#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN 
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
+#endif
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#  define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+     if ((LOG) != 0) {\
+       if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+       else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+     }
+#endif
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in its
+   "internal" form--the form that is written in the machine description.
+
+   GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
+   So use `repe' instead.  */
+
+#undef ASM_OUTPUT_OPCODE
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
+{									\
+  if ((PTR)[0] == 'r'							\
+      && (PTR)[1] == 'e'						\
+      && (PTR)[2] == 'p')						\
+    {									\
+      if ((PTR)[3] == 'z')						\
+	{								\
+	  fputs ("repe", (STREAM));					\
+	  (PTR) += 4;							\
+	}								\
+      else if ((PTR)[3] == 'n' && (PTR)[4] == 'z')			\
+	{								\
+	  fputs ("repne", (STREAM));					\
+	  (PTR) += 5;							\
+	}								\
+    }									\
+  else									\
+    ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR));				\
+}
+
+/* Define macro used to output shift-double opcodes when the shift
+   count is in %cl.  Some assemblers require %cl as an argument;
+   some don't.
+
+   GAS requires the %cl argument, so override i386/unix.h.  */
+
+#undef SHIFT_DOUBLE_OMITS_COUNT
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+/* The comment-starter string as GAS expects it. */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
diff --git a/gcc-4.9/gcc/config/i386/geode.md b/gcc-4.9/gcc/config/i386/geode.md
new file mode 100644
index 000000000..7ca39ae35
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/geode.md
@@ -0,0 +1,151 @@
+;; Geode Scheduling
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; The Geode architecture is one insn issue processor.
+;;
+;; This description is based on data from the following documents:
+;;
+;;    "AMD Geode GX Processor Data Book"
+;;    Advanced Micro Devices, Inc., Aug 2005.
+;;
+;;    "AMD Geode LX Processor Data Book"
+;;    Advanced Micro Devices, Inc., Jan 2006.
+;;
+;;
+;; CPU execution units of the Geode:
+;;
+;; issue	describes the issue pipeline.
+;; alu		describes the Integer unit
+;; fpu		describes the FP unit
+;;
+;; The fp unit is out of order execution unit with register renaming.
+;; There is also memory management unit and execution pipeline for
+;; load/store operations.  We ignore it and difference between insns
+;; using memory and registers.
+
+(define_automaton "geode")
+
+(define_cpu_unit "geode_issue,geode_alu,geode_fpu" "geode")
+
+(define_insn_reservation "alu" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "alu,alu1,negnot,icmp,lea,test,imov,imovx,icmov,incdec,setcc"))
+			 "geode_issue,geode_alu")
+
+(define_insn_reservation "shift" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "ishift,ishift1,rotate,rotate1"))
+			 "geode_issue,geode_alu*2")
+
+(define_insn_reservation "imul" 7
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "imul"))
+			 "geode_issue,geode_alu*7")
+
+(define_insn_reservation "idiv" 40
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "idiv"))
+			 "geode_issue,geode_alu*40")
+
+;; The branch unit.
+(define_insn_reservation "call" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "call,callv"))
+			 "geode_issue,geode_alu*2")
+
+(define_insn_reservation "geode_branch" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "ibr"))
+			 "geode_issue,geode_alu")
+
+(define_insn_reservation "geode_pop_push" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "pop,push"))
+			 "geode_issue,geode_alu")
+
+(define_insn_reservation "geode_leave" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "leave"))
+			 "geode_issue,geode_alu*2")
+
+(define_insn_reservation "geode_load_str" 4
+			 (and (eq_attr "cpu" "geode")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both")))
+			 "geode_issue,geode_alu*4")
+
+(define_insn_reservation "geode_store_str" 2
+			 (and (eq_attr "cpu" "geode")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "store")))
+			 "geode_issue,geode_alu*2")
+
+;; Be optimistic
+(define_insn_reservation "geode_unknown" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "multi,other"))
+			 "geode_issue,geode_alu")
+
+;; FPU
+
+(define_insn_reservation "geode_fop" 6
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fop,fcmp"))
+			 "geode_issue,geode_fpu*6")
+
+(define_insn_reservation "geode_fsimple" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fmov,fcmov,fsgn,fxch"))
+			 "geode_issue,geode_fpu")
+
+(define_insn_reservation "geode_fist" 4
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "geode_issue,geode_fpu*4")
+
+(define_insn_reservation "geode_fmul" 10
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fmul"))
+			 "geode_issue,geode_fpu*10")
+
+(define_insn_reservation "geode_fdiv" 47
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fdiv"))
+			 "geode_issue,geode_fpu*47")
+
+;; We use minimal latency (fsin) here
+(define_insn_reservation "geode_fpspc" 54
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fpspc"))
+			 "geode_issue,geode_fpu*54")
+
+(define_insn_reservation "geode_frndint" 12
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "frndint"))
+			 "geode_issue,geode_fpu*12")
+
+(define_insn_reservation "geode_mmxmov" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "mmxmov"))
+			 "geode_issue,geode_fpu")
+
+(define_insn_reservation "geode_mmx" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "mmx,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"))
+			 "geode_issue,geode_fpu*2")
diff --git a/gcc-4.9/gcc/config/i386/gmm_malloc.h b/gcc-4.9/gcc/config/i386/gmm_malloc.h
new file mode 100644
index 000000000..516b13b9c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gmm_malloc.h
@@ -0,0 +1,74 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+#include <errno.h>
+
+static __inline__ void* 
+_mm_malloc (size_t size, size_t align)
+{
+  void * malloc_ptr;
+  void * aligned_ptr;
+
+  /* Error if align is not a power of two.  */
+  if (align & (align - 1))
+    {
+      errno = EINVAL;
+      return ((void*) 0);
+    }
+
+  if (size == 0)
+    return ((void *) 0);
+
+ /* Assume malloc'd pointer is aligned at least to sizeof (void*).
+    If necessary, add another sizeof (void*) to store the value
+    returned by malloc. Effectively this enforces a minimum alignment
+    of sizeof double. */     
+    if (align < 2 * sizeof (void *))
+      align = 2 * sizeof (void *);
+
+  malloc_ptr = malloc (size + align);
+  if (!malloc_ptr)
+    return ((void *) 0);
+
+  /* Align  We have at least sizeof (void *) space below malloc'd ptr. */
+  aligned_ptr = (void *) (((size_t) malloc_ptr + align)
+			  & ~((size_t) (align) - 1));
+
+  /* Store the original pointer just before p.  */	
+  ((void **) aligned_ptr) [-1] = malloc_ptr;
+
+  return aligned_ptr;
+}
+
+static __inline__ void
+_mm_free (void * aligned_ptr)
+{
+  if (aligned_ptr)
+    free (((void **) aligned_ptr) [-1]);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/gnu-user-common.h b/gcc-4.9/gcc/config/i386/gnu-user-common.h
new file mode 100644
index 000000000..b34528217
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gnu-user-common.h
@@ -0,0 +1,72 @@
+/* Common definitions for Intel 386 and AMD x86-64 systems using
+   GNU userspace.  Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Ilya Enkovich.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  In the 64bit compilation we will turn this flag off in
+   ix86_option_override_internal, as we never do pcc_struct_return
+   scheme on this target.  */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* We arrange for the whole %fs segment to map the tls area.  */
+#undef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT MASK_TLS_DIRECT_SEG_REFS
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	GNU_USER_TARGET_OS_CPP_BUILTINS();			\
+    }								\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef GNU_USER_TARGET_CC1_SPEC
+#define GNU_USER_TARGET_CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+#undef CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+
+/* Similar to standard GNU userspace, but adding -ffast-math support.  */
+#define GNU_USER_TARGET_MATHFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{mpc32:crtprec32.o%s} \
+   %{mpc64:crtprec64.o%s} \
+   %{mpc80:crtprec80.o%s}"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  GNU_USER_TARGET_MATHFILE_SPEC " " \
+  GNU_USER_TARGET_ENDFILE_SPEC
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc-4.9/gcc/config/i386/gnu-user.h b/gcc-4.9/gcc/config/i386/gnu-user.h
new file mode 100644
index 000000000..e1163c9da
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gnu-user.h
@@ -0,0 +1,164 @@
+/* Definitions for Intel 386 systems using GNU userspace.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Output at beginning of assembler file.  */
+/* The .file command should always begin the output.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* Output assembler code to FILE to call the profiler.
+   To the best of my knowledge, no GNU userspace libc has required the label
+   argument to mcount.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "mcount"
+
+/* The GLIBC version of mcount for the x86 assumes that there is a
+   frame, so we cannot allow profiling without a frame pointer.  */
+
+#undef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+ 
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+  
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+    
+/* Provide a LINK_SPEC appropriate for GNU userspace.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC \
+  "--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_emulation", GNU_USER_LINK_EMULATION },\
+  { "dynamic_linker", GNU_USER_DYNAMIC_LINKER }
+
+#define GNU_USER_TARGET_LINK_SPEC "-m %(link_emulation) %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker %(dynamic_linker)} \
+      %{static:-static}}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else {								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+	/* Make sure that we have at least 8 byte alignment if > 8 byte \
+	   alignment is preferred.  */					\
+	if ((LOG) > 3							\
+	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
+	    && (MAX_SKIP) >= 7)						\
+	  fputs ("\t.p2align 3\n", (FILE));				\
+      }									\
+    }									\
+  } while (0)
+#endif
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel)		\
+      {									\
+        fputs (ASM_LONG, FILE);			\
+        assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), FILE); \
+        goto DONE;							\
+      }									\
+  } while (0)
+
+/* Used by crtstuff.c to initialize the base of data-relative relocations.
+   These are GOT relative on x86, so return the pic register.  */
+#ifdef __PIC__
+#define CRT_GET_RFIB_DATA(BASE)			\
+  {						\
+    register void *ebx_ __asm__("ebx");		\
+    BASE = ebx_;				\
+  }
+#else
+#define CRT_GET_RFIB_DATA(BASE)						\
+  __asm__ ("call\t.LPR%=\n"						\
+	   ".LPR%=:\n\t"						\
+	   "pop{l}\t%0\n\t"						\
+	   /* Due to a GAS bug, this cannot use EAX.  That encodes	\
+	      smaller than the traditional EBX, which results in the	\
+	      offset being off by one.  */				\
+	   "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0"		\
+		   "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}"		\
+	   : "=d"(BASE))
+#endif
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
+#define TARGET_THREAD_SSP_OFFSET	0x14
+
+/* We only build the -fsplit-stack support in libgcc if the
+   assembler has full support for the CFI directives.  */
+#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
+#define TARGET_CAN_SPLIT_STACK
+#endif
+/* We steal the last transactional memory word.  */
+#define TARGET_THREAD_SPLIT_STACK_OFFSET 0x30
+#endif
diff --git a/gcc-4.9/gcc/config/i386/gnu-user64.h b/gcc-4.9/gcc/config/i386/gnu-user64.h
new file mode 100644
index 000000000..1c72b41e4
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gnu-user64.h
@@ -0,0 +1,99 @@
+/* Definitions for AMD x86-64 using GNU userspace.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Provide a LINK_SPEC.  Here we provide support for the special GCC
+   options -static and -shared, which allow us to link things in one
+   of these three modes by applying the appropriate combinations of
+   options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#if TARGET_64BIT_DEFAULT
+#define SPEC_32 "m16|m32"
+#if TARGET_BI_ARCH == 2
+#define SPEC_64 "m64"
+#define SPEC_X32 "m16|m32|m64:;"
+#else
+#define SPEC_64 "m16|m32|mx32:;"
+#define SPEC_X32 "mx32"
+#endif
+#else
+#define SPEC_32 "m64|mx32:;"
+#define SPEC_64 "m64"
+#define SPEC_X32 "mx32"
+#endif
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{" SPEC_32 ":--32} \
+ %{" SPEC_64 ":--64} \
+ %{" SPEC_X32 ":--x32} \
+ %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
+#define GNU_USER_TARGET_LINK_SPEC				   \
+                  "%{" SPEC_64 ":-m " GNU_USER_LINK_EMULATION64 "} \
+                   %{" SPEC_32 ":-m " GNU_USER_LINK_EMULATION32 "} \
+                   %{" SPEC_X32 ":-m " GNU_USER_LINK_EMULATIONX32 "} \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      %{" SPEC_32 ":-dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "} \
+      %{" SPEC_64 ":-dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "} \
+      %{" SPEC_X32 ":-dynamic-linker " GNU_USER_DYNAMIC_LINKERX32 "}} \
+    %{static:-static}}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+
+#if TARGET_64BIT_DEFAULT
+#if TARGET_BI_ARCH == 2
+#define MULTILIB_DEFAULTS { "mx32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* i386 glibc provides __stack_chk_guard in %gs:0x14,
+   x32 glibc provides it in %fs:0x18.
+   x86_64 glibc provides it in %fs:0x28.  */
+#define TARGET_THREAD_SSP_OFFSET \
+  (TARGET_64BIT ? (TARGET_X32 ? 0x18 : 0x28) : 0x14)
+
+/* We only build the -fsplit-stack support in libgcc if the
+   assembler has full support for the CFI directives.  */
+#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
+#define TARGET_CAN_SPLIT_STACK
+#endif
+/* We steal the last transactional memory word.  */
+#define TARGET_THREAD_SPLIT_STACK_OFFSET \
+  (TARGET_64BIT ? (TARGET_X32 ? 0x40 : 0x70) : 0x30)
+#endif
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_LP64 ? "int" : "long int")
diff --git a/gcc-4.9/gcc/config/i386/gnu.h b/gcc-4.9/gcc/config/i386/gnu.h
new file mode 100644
index 000000000..29896e9af
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gnu.h
@@ -0,0 +1,47 @@
+/* Configuration for an i386 running GNU with ELF as the target machine.  */
+
+/*
+Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define GNU_USER_LINK_EMULATION "elf_i386"
+
+#undef GNU_USER_DYNAMIC_LINKER
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld.so"
+
+#undef	STARTFILE_SPEC
+#if defined HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt0.o%s;pie:Scrt1.o%s;static:crt0.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt0.o%s;static:crt0.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+
+/* Not supported yet.  */
+# undef TARGET_THREAD_SSP_OFFSET
+
+/* Not supported yet.  */
+# undef TARGET_CAN_SPLIT_STACK
+# undef TARGET_THREAD_SPLIT_STACK_OFFSET
+
+#endif
diff --git a/gcc-4.9/gcc/config/i386/gstabs.h b/gcc-4.9/gcc/config/i386/gstabs.h
new file mode 100644
index 000000000..e9a621871
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/gstabs.h
@@ -0,0 +1,7 @@
+/* We do not want to output SDB debugging information.  */
+
+#undef SDB_DEBUGGING_INFO
+
+/* We want to output DBX debugging information.  */
+
+#define DBX_DEBUGGING_INFO 1
diff --git a/gcc-4.9/gcc/config/i386/host-cygwin.c b/gcc-4.9/gcc/config/i386/host-cygwin.c
new file mode 100644
index 000000000..0bc04bb5e
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/host-cygwin.c
@@ -0,0 +1,78 @@
+/* Cygwin host-specific hook definitions.
+ Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "diagnostic.h"
+
+static void * cygwin_gt_pch_get_address (size_t, int fd);
+static size_t cygwin_gt_pch_alloc_granularity (void);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS cygwin_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY
+#define HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY cygwin_gt_pch_alloc_granularity
+
+/* Granularity for reserving address space.  */
+static const size_t va_granularity = 0x10000;
+
+/*  Return the alignment required for allocating virtual memory. */
+static size_t
+cygwin_gt_pch_alloc_granularity (void)
+{
+  return va_granularity;
+}
+
+/* Identify an address that's likely to be free in a subsequent invocation
+   of the compiler.  The area should be able to hold SIZE bytes.  FD is an
+   open file descriptor if the host would like to probe with mmap.  */
+static void *
+cygwin_gt_pch_get_address (size_t sz, int fd)
+{
+  void *base;
+  off_t p = lseek(fd, 0, SEEK_CUR);
+
+  if (p == (off_t) -1)
+    fatal_error ("can%'t get position in PCH file: %m");
+
+   /* Cygwin requires that the underlying file be at least
+      as large as the requested mapping.  */
+  if ((size_t) p < sz)
+  { 
+    if ( ftruncate (fd, sz) == -1 )
+      fatal_error ("can%'t extend PCH file: %m");
+  }
+
+  base = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+  if (base == MAP_FAILED)
+    base = NULL;
+  else
+    munmap (base, sz);
+
+  if (lseek (fd, p, SEEK_SET) == (off_t) -1 )
+    fatal_error ("can%'t set position in PCH file: %m");
+
+  return base;
+}
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/i386/host-i386-darwin.c b/gcc-4.9/gcc/config/i386/host-i386-darwin.c
new file mode 100644
index 000000000..9325e8dd3
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/host-i386-darwin.c
@@ -0,0 +1,30 @@
+/* i386-darwin host-specific hook definitions.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for x86 hosts; this file exists just
+   to include config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/i386/host-mingw32.c b/gcc-4.9/gcc/config/i386/host-mingw32.c
new file mode 100644
index 000000000..fc01ceb24
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/host-mingw32.c
@@ -0,0 +1,198 @@
+/* mingw32 host-specific hook definitions.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "diagnostic.h"
+
+
+#define WIN32_LEAN_AND_MEAN  /* Not so important if we have windows.h.gch.  */
+#include <windows.h>
+#include <stdlib.h>
+
+static void * mingw32_gt_pch_get_address (size_t, int);
+static int mingw32_gt_pch_use_address (void *, size_t, int, size_t);
+static size_t mingw32_gt_pch_alloc_granularity (void);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS mingw32_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS mingw32_gt_pch_use_address
+#undef HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY
+#define HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY mingw32_gt_pch_alloc_granularity
+
+static inline void w32_error(const char*, const char*, int, const char*);
+
+/* FIXME: Is this big enough?  */
+static const size_t pch_VA_max_size  = 128 * 1024 * 1024;
+
+/* Granularity for reserving address space.  */
+static size_t va_granularity = 0x10000;
+
+/* Print out the GetLastError() translation.  */ 
+static inline void
+w32_error (const char* function, const char* file, int line,
+	   const char* my_msg)
+{
+  LPSTR w32_msgbuf;
+  FormatMessageA (FORMAT_MESSAGE_ALLOCATE_BUFFER
+		  | FORMAT_MESSAGE_FROM_SYSTEM
+		  | FORMAT_MESSAGE_IGNORE_INSERTS
+		  | FORMAT_MESSAGE_MAX_WIDTH_MASK,
+    		  NULL, GetLastError(),
+		  MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+		  (LPSTR) &w32_msgbuf, 0, NULL);
+  fprintf(stderr, "internal error in %s, at %s:%d: %s: %s\n",
+	  function, trim_filename (file), line, my_msg, w32_msgbuf);
+  LocalFree ((HLOCAL)w32_msgbuf);
+}
+
+/* Granularity for reserving address space.  */
+static size_t
+mingw32_gt_pch_alloc_granularity (void)
+{
+  SYSTEM_INFO si;
+
+  GetSystemInfo (&si);
+  va_granularity = (size_t) si.dwAllocationGranularity;
+
+  return va_granularity;
+}
+
+/* Identify an address that's likely to be free in a subsequent invocation
+   of the compiler.  The area should be able to hold SIZE bytes.  FD is an
+   open file descriptor if the host would like to probe with mmap.  */
+
+static void *
+mingw32_gt_pch_get_address (size_t size, int fd  ATTRIBUTE_UNUSED)
+{
+  void* res;
+  size = (size + va_granularity - 1) & ~(va_granularity - 1);
+  if (size > pch_VA_max_size)
+    return NULL;
+
+  /* FIXME: We let system determine base by setting first arg to NULL.
+     Allocating at top of available address space avoids unnecessary
+     fragmentation of "ordinary" (malloc's)  address space but may not
+     be safe  with delayed load of system dll's. Preferred addresses
+     for NT system dlls is in 0x70000000 to 0x78000000 range.
+     If we allocate at bottom we need to reserve the address as early
+     as possible and at the same point in each invocation. */
+ 
+  res = VirtualAlloc (NULL, pch_VA_max_size,
+		      MEM_RESERVE | MEM_TOP_DOWN,
+		      PAGE_NOACCESS);
+  if (!res)
+    w32_error (__FUNCTION__, __FILE__, __LINE__, "VirtualAlloc");
+  else
+    /* We do not need the address space for now, so free it.  */
+    VirtualFree (res, 0, MEM_RELEASE);
+
+  return res; 
+}
+
+/* ADDR is an address returned by gt_pch_get_address.  Attempt to allocate
+   SIZE bytes at the same address and load it with the data from FD at 
+   OFFSET.  Return -1 if we couldn't allocate memory at ADDR, return 0
+   if the memory is allocated but the data not loaded, return 1 if done.  */
+
+static int
+mingw32_gt_pch_use_address (void *addr, size_t size, int fd,
+			    size_t offset)
+{
+  void * mmap_addr;
+  HANDLE mmap_handle;
+ 
+  /* Apparently, MS Vista puts unnamed file mapping objects into Global
+     namespace when running an application in a Terminal Server
+     session.  This causes failure since, by default, applications 
+     don't get SeCreateGlobalPrivilege. We don't need global
+     memory sharing so explicitly put object into Local namespace.
+
+     If multiple concurrent GCC processes are using PCH functionality,
+     MapViewOfFileEx returns "Access Denied" error.  So we ensure the
+     session-wide mapping name is unique by appending process ID.  */
+
+#define OBJECT_NAME_FMT "Local\\MinGWGCCPCH-"
+
+  char* object_name = NULL;
+  /* However, the documentation for CreateFileMapping says that on NT4
+     and earlier, backslashes are invalid in object name.  So, we need
+     to check if we are on Windows2000 or higher.  */
+  OSVERSIONINFO version_info;
+  int r;
+
+  version_info.dwOSVersionInfoSize = sizeof (version_info);
+
+  if (size == 0)
+    return 0; 
+
+  /* Offset must be also be a multiple of allocation granularity for
+     this to work.  We can't change the offset. */ 
+  if ((offset & (va_granularity - 1)) != 0 || size > pch_VA_max_size)
+    return -1;
+
+
+  /* Determine the version of Windows we are running on and use a
+     uniquely-named local object if running > 4.  */
+  GetVersionEx (&version_info);
+  if (version_info.dwMajorVersion > 4)
+    {
+      char local_object_name [sizeof (OBJECT_NAME_FMT)
+			      + sizeof (DWORD) * 2];
+      snprintf (local_object_name, sizeof (local_object_name),
+		OBJECT_NAME_FMT "%lx", GetCurrentProcessId());
+      object_name = local_object_name;
+    }
+  mmap_handle = CreateFileMappingA ((HANDLE) _get_osfhandle (fd), NULL,
+				    PAGE_WRITECOPY | SEC_COMMIT, 0, 0,
+				    object_name);
+
+  if (mmap_handle == NULL)
+    {
+      w32_error (__FUNCTION__,  __FILE__, __LINE__, "CreateFileMapping");
+      return -1; 
+    }
+
+  /* Retry five times, as here might occure a race with multiple gcc's
+     instances at same time.  */
+  for (r = 0; r < 5; r++)
+   {
+      mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+				   size, addr);
+      if (mmap_addr == addr)
+	break;
+      if (r != 4)
+        Sleep (500);
+   }
+      
+  if (mmap_addr != addr)
+    {
+      w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+      CloseHandle(mmap_handle);
+      return  -1;
+    }
+
+  return 1;
+}
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/i386/i386-builtin-types.awk b/gcc-4.9/gcc/config/i386/i386-builtin-types.awk
new file mode 100644
index 000000000..3fc1455ec
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-builtin-types.awk
@@ -0,0 +1,280 @@
+#  Copyright (C) 2009-2014 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Generates compressed tables for types for i386 builtin functions.
+
+function do_error(string) {
+    print FILENAME ":" FNR ": " string > "/dev/stderr"
+    errors = 1
+}
+
+function check_type(string) {
+    if (!(string in type_hash))
+	do_error("undefined type code " string)
+}
+
+# We can significantly reduce the size of the read-only tables
+# by forcing the compiler to use a smaller implementation type
+# for the enumerations.
+function attribute_mode(count) {
+    # ??? Except that we get strange "comparison always false" warnings
+    # for comparisons between different elements of the enumeration.
+    #    print "#ifdef __GNUC__"
+    #    if (count < 256)
+    #	print "  __attribute__((__mode__(__QI__)))"
+    #    else
+    #	print "  __attribute__((__mode__(__HI__)))"
+    #    print "#endif"
+}
+
+BEGIN {
+    FS = "[() \t,]+"
+   
+    prim_defs = 0
+    vect_defs = 0
+    ptr_defs = 0
+    cptr_defs = 0
+    func_defs = 0
+    func_args = 0
+    alias_defs = 0
+}
+
+# Skip blank lines or comments.
+/^[ \t]*(#|$)/ {
+    next
+}
+
+$1 == "DEF_PRIMITIVE_TYPE" {
+    if (NF == 4) {
+	type_hash[$2] = 1
+	prim_name[prim_defs] = $2
+	prim_base[prim_defs] = $3
+	prim_defs++
+    } else
+	do_error("DEF_PRIMITIVE_TYPE expected 2 arguments")
+    next
+}
+
+$1 == "DEF_VECTOR_TYPE" {
+    if (NF == 4 || NF == 5) {
+	check_type($3)
+	type_hash[$2] = 1
+	vect_name[vect_defs] = $2
+	vect_base[vect_defs] = $3
+	vect_mode[vect_defs] = (NF == 5 ? $4 : $2)
+	vect_defs++
+    } else
+	do_error("DEF_VECTOR_TYPE expected 2 arguments")
+    next
+}
+
+$1 == "DEF_POINTER_TYPE" {
+    if (NF == 4) {
+	check_type($3)
+	type_hash[$2] = 1
+	ptr_name[ptr_defs] = $2
+	ptr_base[ptr_defs] = $3
+	ptr_defs++
+    } else if (NF == 5) {
+	check_type($3)
+	if ($4 == "CONST") {
+	    type_hash[$2] = 1
+	    cptr_name[cptr_defs] = $2
+	    cptr_base[cptr_defs] = $3
+	    cptr_defs++
+	} else
+	    do_error("invalid qualifier \"" $4 "\"")
+    }
+    else
+	do_error("DEF_POINTER_TYPE expected 2 or 3 arguments")
+    next
+}
+
+$1 == "DEF_FUNCTION_TYPE" {
+    func_start[func_defs] = func_args
+    for (i = 2; i < NF; ++i) {
+	check_type($i)
+	func_types[func_args++] = $i
+    }
+
+    if (NF < 3)
+	do_error("DEF_FUNCTION_TYPE expected at least 1 argument")
+    else if (NF == 3)
+	name = $2 "_FTYPE_VOID"
+    else {
+	name = $2 "_FTYPE"
+	for (i = 3; i < NF; ++i)
+	    name = name "_" $i
+    }
+    func_hash[name] = 1
+    func_name[func_defs++] = name
+    next
+}
+
+$1 == "DEF_FUNCTION_TYPE_ALIAS" {
+    if (NF == 4) {
+	if ($2 in func_hash) {
+	    alias_base[alias_defs] = $2
+	    alias_name[alias_defs] = $2 "_" $3
+	    alias_defs++
+	} else
+	    do_error("undefined function code " $2)
+    } else
+	do_error("DEF_FUNCTION_TYPE_ALIAS expected 2 arguments")
+    next
+}
+
+{
+    do_error("unknown directive \"" $1 "\"");
+}
+
+END {
+    if (errors)
+	exit 1
+
+    print "/* This file is auto-generated by i386-builtin-types.awk.  */\n"
+
+    # This first enumeration contains all of the non-function types.
+    print "enum ix86_builtin_type {"
+    for (i = 0; i < prim_defs; ++i)
+	print "  IX86_BT_" prim_name[i] ","
+    print "  IX86_BT_LAST_PRIM = IX86_BT_" prim_name[i-1] ","
+    for (i = 0; i < vect_defs; ++i)
+	print "  IX86_BT_" vect_name[i] ","
+    print "  IX86_BT_LAST_VECT = IX86_BT_" vect_name[i-1] ","
+    for (i = 0; i < ptr_defs; ++i)
+	print "  IX86_BT_" ptr_name[i] ","
+    print "  IX86_BT_LAST_PTR = IX86_BT_" ptr_name[i-1] ","
+    for (i = 0; i < cptr_defs; ++i)
+	print "  IX86_BT_" cptr_name[i] ","
+    print "  IX86_BT_LAST_CPTR = IX86_BT_" cptr_name[i-1] "\n}"
+    attribute_mode(prim_defs + vect_defs + ptr_defs + cptr_defs)
+    print ";\n\n"
+
+    # We can't tabularize the initialization of the primitives, since
+    # at least one of them is created via a local variable.  That's ok,
+    # just create a nice big macro to do all the work.
+    print "#define DEFINE_BUILTIN_PRIMITIVE_TYPES \\"
+    for (i = 0; i < prim_defs; ++i) {
+	printf "  ix86_builtin_type_tab[(int)IX86_BT_" prim_name[i] \
+	    "] = " prim_base[i]
+	if (i < prim_defs - 1)
+	    print ", \\"
+    }
+    print "\n\n"
+
+    # The vector types are defined via two tables defining the real
+    # machine mode and the builtin primitive type.  We use two tables
+    # rather than a structure to avoid structure padding and save space.
+    print "static const enum machine_mode ix86_builtin_type_vect_mode[] = {"
+    for (i = 0; i < vect_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 6 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf vect_mode[i] "mode"
+    }
+    print "\n};\n\n"
+
+    print "static const enum ix86_builtin_type " \
+	"ix86_builtin_type_vect_base[] = {"
+    for (i = 0; i < vect_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 4 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf "IX86_BT_" vect_base[i]
+    }
+    print "\n};\n\n"
+
+    # The pointer types are defined via a single table defining the
+    # builtin primitive type.  The const-ness of the pointer is taken
+    # from the enumeration value > IX86_BT_LAST_PTR.
+    print "static const enum ix86_builtin_type " \
+	"ix86_builtin_type_ptr_base[] = {"
+    for (i = 0; i < ptr_defs; ++i) {
+	if (i == 0)
+	    printf " "
+	else if (i % 4 == 0)
+	    printf "\n "
+	printf " IX86_BT_" ptr_base[i] ","
+    }
+    print "\n  /* pointer-to-constant defs start here */"
+    for (i = 0; i < cptr_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 4 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf "IX86_BT_" cptr_base[i]
+    }
+    print "\n};\n\n"
+
+    # This second enumeration contains all of the function types.
+    print "enum ix86_builtin_func_type {"
+    for (i = 0; i < func_defs; ++i)
+	print "  " func_name[i] ","
+    print "  IX86_BT_LAST_FUNC = " func_name[i-1] ","
+    for (i = 0; i < alias_defs; ++i)
+	print "  " alias_name[i] ","
+    print "  IX86_BT_LAST_ALIAS = " alias_name[i-1] "\n}"
+    attribute_mode(func_defs + alias_defs)
+    print ";\n\n"
+
+    # The function types are defined via two tables.  The first contains
+    # ranges consiting of the function's return type, followed by all of
+    # the function argument types.  The ranges for all of the builtin
+    # functions are smooshed together in the same array.  The second array
+    # contains, for each builtin, the index of the function's return type
+    # within the first array.
+    print "static const enum ix86_builtin_type ix86_builtin_func_args[] = {"
+    for (i = 0; i < func_args; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 4 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf "IX86_BT_" func_types[i]
+    }
+    print "\n};\n\n"
+
+    print "static const unsigned short ix86_builtin_func_start[] = {"
+    for (i = 0; i < func_defs; ++i) {
+	if (i == 0)
+	    printf " "
+	else if (i % 10 == 0)
+	    printf "\n "
+	printf " " func_start[i] ","
+    }
+    print " " func_args "\n};\n\n"
+
+    print "static const enum ix86_builtin_func_type " \
+	"ix86_builtin_func_alias_base[] = {"
+    for (i = 0; i < alias_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else
+	    printf ",\n  "
+	printf alias_base[i]
+    }
+    print "\n};"
+}
diff --git a/gcc-4.9/gcc/config/i386/i386-builtin-types.def b/gcc-4.9/gcc/config/i386/i386-builtin-types.def
new file mode 100644
index 000000000..822c5e504
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-builtin-types.def
@@ -0,0 +1,808 @@
+# This file provides a declarative way of describing the types that
+# are used when declaring ix86 builtin functions.  It is processed
+# with i386-builtin-type.awk to produce C code.
+#
+# DEF_PRIMITIVE_TYPE (ENUM, TYPE)
+#
+#   The ENUM is an identifier indicating which type is being defined.
+#   TYPE is a variable that represents the type.
+#   ??? Note that the awk program expects a single token for TYPE.
+#   At present, that's all that's required; revisit if it turns out
+#   that we need more than that.
+#
+# DEF_VECTOR_TYPE (ENUM, TYPE [, MODE])
+#
+#  This describes a vector type.  ENUM is an identifier as above.
+#  TYPE is the enumeral for the inner type which should of course
+#  name a type of the proper inner mode.  If present, MODE is the
+#  machine mode, else the machine mode should be the same as ENUM.
+#
+# DEF_POINTER_TYPE (ENUM, TYPE [, CONST])
+#
+#  This describes a pointer type.  ENUM is an identifier as above;
+#  TYPE is the enumeral for the type pointed to.  An optional third 
+#  argument is the keyword CONST, which defines this to be a pointer to
+#  a constant type.
+#
+# DEF_FUNCTION_TYPE (RETURN, ARGN*)
+#
+#   This describes a function type.  The return type and the arguments
+#   are the enumerals defined above.  The enumeration name for the 
+#   function is formed by RETURN ## _FTYPE_ ## ARG1 ## _ ## ARG2 ...
+#
+# DEF_FUNCTION_TYPE_ALIAS (ENUM, SUFFIX)
+#
+#   This defines an enumeration ENUM ## _ ## SUFFIX and arranges for
+#   the function type to be copied from ENUM.  This is used to control
+#   how the expanders treat the function.
+#
+
+DEF_PRIMITIVE_TYPE (VOID, void_type_node)
+DEF_PRIMITIVE_TYPE (CHAR, char_type_node)
+DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
+# ??? Logically this should be intQI_type_node, but that maps to "signed char"
+# which is a different type than "char" even if "char" is signed.  This must
+# match the usage in emmintrin.h and changing this would change name mangling
+# and so is not advisable.
+DEF_PRIMITIVE_TYPE (QI, char_type_node)
+DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
+DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+# ??? Logically this should be intDI_type_node, but that maps to "long"
+# with 64-bit, and that's not how the emmintrin.h is written.  Again, 
+# changing this would change name mangling.
+DEF_PRIMITIVE_TYPE (DI, long_long_integer_type_node)
+DEF_PRIMITIVE_TYPE (UQI, unsigned_intQI_type_node)
+DEF_PRIMITIVE_TYPE (UHI, unsigned_intHI_type_node)
+DEF_PRIMITIVE_TYPE (USI, unsigned_intSI_type_node)
+DEF_PRIMITIVE_TYPE (UDI, long_long_unsigned_type_node)
+# ??? Some of the types below should use the mode types above.
+DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (INT, integer_type_node)
+DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
+DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
+DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node)
+DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)
+DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)
+DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node)
+DEF_PRIMITIVE_TYPE (FLOAT128, float128_type_node)
+
+# MMX vectors
+DEF_VECTOR_TYPE (V2SF, FLOAT)
+DEF_VECTOR_TYPE (V1DI, DI)
+DEF_VECTOR_TYPE (V2SI, SI)
+DEF_VECTOR_TYPE (V4HI, HI)
+DEF_VECTOR_TYPE (V8QI, QI)
+
+# SSE vectors
+DEF_VECTOR_TYPE (V2DF, DOUBLE)
+DEF_VECTOR_TYPE (V4SF, FLOAT)
+DEF_VECTOR_TYPE (V2DI, DI)
+DEF_VECTOR_TYPE (V4SI, SI)
+DEF_VECTOR_TYPE (V8HI, HI)
+DEF_VECTOR_TYPE (V16QI, QI)
+DEF_VECTOR_TYPE (V2UDI, UDI, V2DI)
+DEF_VECTOR_TYPE (V4USI, USI, V4SI)
+DEF_VECTOR_TYPE (V8UHI, UHI, V8HI)
+DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
+
+# AVX vectors
+DEF_VECTOR_TYPE (V4DF, DOUBLE)
+DEF_VECTOR_TYPE (V8SF, FLOAT)
+DEF_VECTOR_TYPE (V4DI, DI)
+DEF_VECTOR_TYPE (V8SI, SI)
+DEF_VECTOR_TYPE (V16HI, HI)
+DEF_VECTOR_TYPE (V32QI, QI)
+DEF_VECTOR_TYPE (V4UDI, UDI, V4DI)
+DEF_VECTOR_TYPE (V8USI, USI, V8SI)
+DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
+
+# AVX512F vectors
+DEF_VECTOR_TYPE (V32SF, FLOAT)
+DEF_VECTOR_TYPE (V16SF, FLOAT)
+DEF_VECTOR_TYPE (V8DF, DOUBLE)
+DEF_VECTOR_TYPE (V8DI, DI)
+DEF_VECTOR_TYPE (V16SI, SI)
+DEF_VECTOR_TYPE (V64QI, QI)
+
+DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
+DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
+DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST)
+DEF_POINTER_TYPE (PCINT, INT, CONST)
+DEF_POINTER_TYPE (PCINT64, INT64, CONST)
+DEF_POINTER_TYPE (PCHAR, CHAR)
+DEF_POINTER_TYPE (PCVOID, VOID, CONST)
+DEF_POINTER_TYPE (PVOID, VOID)
+DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
+DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PUSHORT, USHORT)
+DEF_POINTER_TYPE (PINT, INT)
+DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
+DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
+DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
+
+DEF_POINTER_TYPE (PV2SI, V2SI)
+DEF_POINTER_TYPE (PV2DF, V2DF)
+DEF_POINTER_TYPE (PV2DI, V2DI)
+DEF_POINTER_TYPE (PV2SF, V2SF)
+DEF_POINTER_TYPE (PV4DF, V4DF)
+DEF_POINTER_TYPE (PV4DI, V4DI)
+DEF_POINTER_TYPE (PV4SF, V4SF)
+DEF_POINTER_TYPE (PV8DF, V8DF)
+DEF_POINTER_TYPE (PV8SF, V8SF)
+DEF_POINTER_TYPE (PV4SI, V4SI)
+DEF_POINTER_TYPE (PV8HI, V8HI)
+DEF_POINTER_TYPE (PV8SI, V8SI)
+DEF_POINTER_TYPE (PV8DI, V8DI)
+DEF_POINTER_TYPE (PV16QI, V16QI)
+DEF_POINTER_TYPE (PV16HI, V16HI)
+DEF_POINTER_TYPE (PV16SI, V16SI)
+DEF_POINTER_TYPE (PV16SF, V16SF)
+
+DEF_POINTER_TYPE (PCV2SI, V2SI, CONST)
+DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
+DEF_POINTER_TYPE (PCV2SF, V2SF, CONST)
+DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
+DEF_POINTER_TYPE (PCV4SF, V4SF, CONST)
+DEF_POINTER_TYPE (PCV8DF, V8DF, CONST)
+DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
+DEF_POINTER_TYPE (PCV16SF, V16SF, CONST)
+
+DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
+DEF_POINTER_TYPE (PCV4SI, V4SI, CONST)
+DEF_POINTER_TYPE (PCV4DI, V4DI, CONST)
+DEF_POINTER_TYPE (PCV8SI, V8SI, CONST)
+DEF_POINTER_TYPE (PCV8DI, V8DI, CONST)
+DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
+
+DEF_FUNCTION_TYPE (FLOAT128)
+DEF_FUNCTION_TYPE (UINT64)
+DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (INT)
+DEF_FUNCTION_TYPE (VOID)
+DEF_FUNCTION_TYPE (PVOID)
+
+DEF_FUNCTION_TYPE (FLOAT, FLOAT)
+DEF_FUNCTION_TYPE (FLOAT128, FLOAT128)
+DEF_FUNCTION_TYPE (INT, INT)
+DEF_FUNCTION_TYPE (INT, V16QI)
+DEF_FUNCTION_TYPE (INT, V2DF)
+DEF_FUNCTION_TYPE (INT, V4DF)
+DEF_FUNCTION_TYPE (INT, V4SF)
+DEF_FUNCTION_TYPE (INT, V8QI)
+DEF_FUNCTION_TYPE (INT, V8SF)
+DEF_FUNCTION_TYPE (INT, V32QI)
+DEF_FUNCTION_TYPE (INT, PCCHAR)
+DEF_FUNCTION_TYPE (INT64, INT64)
+DEF_FUNCTION_TYPE (INT64, V2DF)
+DEF_FUNCTION_TYPE (INT64, V4SF)
+DEF_FUNCTION_TYPE (UINT64, INT)
+DEF_FUNCTION_TYPE (UINT16, UINT16)
+DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
+DEF_FUNCTION_TYPE (V16QI, PCCHAR)
+DEF_FUNCTION_TYPE (V16QI, V16QI)
+DEF_FUNCTION_TYPE (V16QI, V16SI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE)
+DEF_FUNCTION_TYPE (V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2SI)
+DEF_FUNCTION_TYPE (V2DF, V4DF)
+DEF_FUNCTION_TYPE (V2DF, V4SF)
+DEF_FUNCTION_TYPE (V2DF, V4SI)
+DEF_FUNCTION_TYPE (V2DI, PV2DI)
+DEF_FUNCTION_TYPE (V2DI, V16QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI)
+DEF_FUNCTION_TYPE (V2DI, V4SI)
+DEF_FUNCTION_TYPE (V2DI, V8HI)
+DEF_FUNCTION_TYPE (V2SF, V2SF)
+DEF_FUNCTION_TYPE (V2SF, V2SI)
+DEF_FUNCTION_TYPE (V2SI, V2DF)
+DEF_FUNCTION_TYPE (V2SI, V2SF)
+DEF_FUNCTION_TYPE (V2SI, V2SI)
+DEF_FUNCTION_TYPE (V2SI, PCV2SI)
+DEF_FUNCTION_TYPE (V2SI, V4SF)
+DEF_FUNCTION_TYPE (V32QI, PCCHAR)
+DEF_FUNCTION_TYPE (V4DF, PCDOUBLE)
+DEF_FUNCTION_TYPE (V4DF, PCV2DF)
+DEF_FUNCTION_TYPE (V4DF, V2DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF)
+DEF_FUNCTION_TYPE (V4DF, V4SF)
+DEF_FUNCTION_TYPE (V4DF, V4SI)
+DEF_FUNCTION_TYPE (V8DF, V8SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF)
+DEF_FUNCTION_TYPE (V4HI, V4HI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
+DEF_FUNCTION_TYPE (V4SF, V2DF)
+DEF_FUNCTION_TYPE (V4SF, V4DF)
+DEF_FUNCTION_TYPE (V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V8SF)
+DEF_FUNCTION_TYPE (V4SF, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V16QI)
+DEF_FUNCTION_TYPE (V4SI, V2DF)
+DEF_FUNCTION_TYPE (V4SI, V4DF)
+DEF_FUNCTION_TYPE (V4SI, V4SF)
+DEF_FUNCTION_TYPE (V4SI, V4SI)
+DEF_FUNCTION_TYPE (V4SI, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V8SI)
+DEF_FUNCTION_TYPE (V8HI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V8DI)
+DEF_FUNCTION_TYPE (V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8QI, V8QI)
+DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
+DEF_FUNCTION_TYPE (V8SF, PCV4SF)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF)
+DEF_FUNCTION_TYPE (V8SF, V4SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF)
+DEF_FUNCTION_TYPE (V8SF, V8SI)
+DEF_FUNCTION_TYPE (V8SF, V8HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF)
+DEF_FUNCTION_TYPE (V8SI, V8DI)
+DEF_FUNCTION_TYPE (V8SI, V4SI)
+DEF_FUNCTION_TYPE (V8SF, V8DF)
+DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SF)
+DEF_FUNCTION_TYPE (V32QI, V32QI)
+DEF_FUNCTION_TYPE (V32QI, V16QI)
+DEF_FUNCTION_TYPE (V16HI, V16SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16HI, V8HI)
+DEF_FUNCTION_TYPE (V8SI, V8SI)
+DEF_FUNCTION_TYPE (VOID, PCVOID)
+DEF_FUNCTION_TYPE (VOID, PVOID)
+DEF_FUNCTION_TYPE (VOID, UINT64)
+DEF_FUNCTION_TYPE (VOID, UNSIGNED)
+DEF_FUNCTION_TYPE (VOID, PUSHORT)
+DEF_FUNCTION_TYPE (INT, PUSHORT)
+DEF_FUNCTION_TYPE (INT, PUNSIGNED)
+DEF_FUNCTION_TYPE (INT, PULONGLONG)
+DEF_FUNCTION_TYPE (V16HI, V16QI)
+DEF_FUNCTION_TYPE (V8SI, V16QI)
+DEF_FUNCTION_TYPE (V4DI, V16QI)
+DEF_FUNCTION_TYPE (V8SI, V8HI)
+DEF_FUNCTION_TYPE (V4DI, V8HI)
+DEF_FUNCTION_TYPE (V4DI, V4SI)
+DEF_FUNCTION_TYPE (V4DI, PV4DI)
+DEF_FUNCTION_TYPE (V4DI, V2DI)
+DEF_FUNCTION_TYPE (V16SF, FLOAT)
+DEF_FUNCTION_TYPE (V16SI, INT)
+DEF_FUNCTION_TYPE (V8DF, DOUBLE)
+DEF_FUNCTION_TYPE (V8DI, INT64)
+DEF_FUNCTION_TYPE (V16SF, V4SF)
+DEF_FUNCTION_TYPE (V8DF, V4DF)
+DEF_FUNCTION_TYPE (V8DI, V4DI)
+DEF_FUNCTION_TYPE (V16QI, V8DI)
+DEF_FUNCTION_TYPE (UINT, V4SF)
+DEF_FUNCTION_TYPE (UINT64, V4SF)
+DEF_FUNCTION_TYPE (UINT, V2DF)
+DEF_FUNCTION_TYPE (UINT64, V2DF)
+DEF_FUNCTION_TYPE (V16SI, V16SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, V8DI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V16SI, PV4SI)
+DEF_FUNCTION_TYPE (V16SF, PV4SF)
+DEF_FUNCTION_TYPE (V8DI, PV4DI)
+DEF_FUNCTION_TYPE (V8DF, PV4DF)
+DEF_FUNCTION_TYPE (V8UHI, V8UHI)
+DEF_FUNCTION_TYPE (V8USI, V8USI)
+DEF_FUNCTION_TYPE (V8DI, PV8DI)
+
+DEF_FUNCTION_TYPE (DI, V2DI, INT)
+DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT)
+DEF_FUNCTION_TYPE (FLOAT, V4SF, INT)
+DEF_FUNCTION_TYPE (FLOAT128, FLOAT128, FLOAT128)
+DEF_FUNCTION_TYPE (HI, V4HI, INT)
+DEF_FUNCTION_TYPE (HI, V8HI, INT)
+DEF_FUNCTION_TYPE (INT, V2DF, V2DF)
+DEF_FUNCTION_TYPE (INT, V2DI, V2DI)
+DEF_FUNCTION_TYPE (INT, V4DF, V4DF)
+DEF_FUNCTION_TYPE (INT, V4DI, V4DI)
+DEF_FUNCTION_TYPE (INT, V4SF, V4SF)
+DEF_FUNCTION_TYPE (INT, V8SF, V8SF)
+DEF_FUNCTION_TYPE (QI, V16QI, INT)
+DEF_FUNCTION_TYPE (QI, V8QI, INT)
+DEF_FUNCTION_TYPE (SI, V2SI, INT)
+DEF_FUNCTION_TYPE (SI, V4SI, INT)
+DEF_FUNCTION_TYPE (UINT, UINT, UCHAR)
+DEF_FUNCTION_TYPE (UINT, UINT, UINT)
+DEF_FUNCTION_TYPE (UINT, UINT, USHORT)
+DEF_FUNCTION_TYPE (UINT16, UINT16, INT)
+DEF_FUNCTION_TYPE (UINT64, UINT64, UINT64)
+DEF_FUNCTION_TYPE (UINT8, UINT8, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, SI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V16QI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V1DI, V1DI, SI)
+DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI)
+DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI)
+DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI)
+DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, UINT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, UINT64)
+DEF_FUNCTION_TYPE (V2DF, V2DF, DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE)
+DEF_FUNCTION_TYPE (V2DF, V2DF, SI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF)
+DEF_FUNCTION_TYPE (V2DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V2DI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V2DI, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V2UDI, V4USI, V4USI)
+DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI)
+DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
+DEF_FUNCTION_TYPE (V2SI, INT, INT)
+DEF_FUNCTION_TYPE (V2SI, V2SF, V2SF)
+DEF_FUNCTION_TYPE (V2SI, V2SI, SI)
+DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI)
+DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI)
+DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V8DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V8DF, INT, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI)
+DEF_FUNCTION_TYPE (V4HI, V2SI, V2SI)
+DEF_FUNCTION_TYPE (V4HI, V4HI, INT)
+DEF_FUNCTION_TYPE (V4HI, V4HI, SI)
+DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI)
+DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT64)
+DEF_FUNCTION_TYPE (V4SF, V4SF, DI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (INT, V4SF, V4SF, INT, INT)
+DEF_FUNCTION_TYPE (INT, V2DF, V2DF, INT, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V4SI, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V4SI, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, SI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, INT)
+DEF_FUNCTION_TYPE (V8HI, V8HI, SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8HI, V8SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI)
+DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V16SF, INT, V4SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI)
+DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V4DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
+DEF_FUNCTION_TYPE (V16HI, V16SF, INT)
+DEF_FUNCTION_TYPE (V16HI, V16SF, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V16SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V16SI, INT, V4SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, INT, V16SI, HI)
+DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, INT, V16SI, HI)
+DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (V16SI, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI)
+DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
+DEF_FUNCTION_TYPE (V4DI, V8DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V8DI, INT, V4DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, INT, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V4DI, INT)
+DEF_FUNCTION_TYPE (VOID, PVOID, INT64)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V16SF)
+DEF_FUNCTION_TYPE (VOID, PINT, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, LONGLONG)
+DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG)
+DEF_FUNCTION_TYPE (VOID, PV2SI, V2SI)
+DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI)
+DEF_FUNCTION_TYPE (VOID, PV2SF, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF)
+DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
+DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
+
+# Instructions returning mask
+DEF_FUNCTION_TYPE (HI, HI)
+DEF_FUNCTION_TYPE (HI, HI, HI)
+DEF_FUNCTION_TYPE (HI, HI, INT)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI, INT)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI, INT ,HI)
+DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT)
+DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT, QI, INT)
+DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT)
+DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT, HI)
+DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT, HI, INT)
+DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT, QI, INT)
+DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI, INT)
+DEF_FUNCTION_TYPE (V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, QI)
+
+DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
+DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT)
+DEF_FUNCTION_TYPE (UCHAR, UINT64, UINT, UINT)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, QI, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, UINT, UINT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V2DI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, HI, INT)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT)
+
+# Instructions with masking
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8SI, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DI, V8SI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8HI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF, HI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V16SF, V4SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V8DF, V4DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V2DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, V4SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, V16HI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, V16QI, V16SI, HI)
+DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI)
+DEF_FUNCTION_TYPE (V8DI, V4DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V2DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI)
+DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, QI)
+DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (VOID, PV8SI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (VOID, PV16HI, V16SI, HI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V16SI, HI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, QI)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, QI)
+DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT, HI)
+
+DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED)
+DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF)
+DEF_FUNCTION_TYPE (VOID, PV4DF, V4DI, V4DF)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SI, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SI, V8SF)
+DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (VOID, UINT, UINT, UINT)
+DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT)
+DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR)
+DEF_FUNCTION_TYPE (VOID, V8QI, V8QI, PCHAR)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI)
+DEF_FUNCTION_TYPE (V2UDI, V2UDI, V2UDI, V2UDI)
+DEF_FUNCTION_TYPE (V4USI, V4USI, V4USI, V4USI)
+DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI)
+DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI)
+
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
+DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
+
+DEF_FUNCTION_TYPE (INT, V16QI, INT, V16QI, INT, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, INT, V16QI, INT, INT)
+
+DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI)
+
+DEF_FUNCTION_TYPE (UCHAR, UCHAR, UINT, UINT, PUNSIGNED)
+DEF_FUNCTION_TYPE (UCHAR, UCHAR, ULONGLONG, ULONGLONG, PULONGLONG)
+
+# Instructions with rounding
+DEF_FUNCTION_TYPE (UINT64, V2DF, INT)
+DEF_FUNCTION_TYPE (UINT64, V4SF, INT)
+DEF_FUNCTION_TYPE (UINT, V2DF, INT)
+DEF_FUNCTION_TYPE (UINT, V4SF, INT)
+DEF_FUNCTION_TYPE (INT64, V2DF, INT)
+DEF_FUNCTION_TYPE (INT64, V4SF, INT)
+DEF_FUNCTION_TYPE (INT, V2DF, INT)
+DEF_FUNCTION_TYPE (INT, V4SF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, UINT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, INT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16HI, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, INT)
+
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, SI, V8DI, V8DI)
+
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, V4DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V8SI, V4DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V2DI, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4DI, V4DF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4SI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8SI, V8SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V2DI, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4DI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V4DI, V8SF, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V4SI, V2DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4SI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V8SI, V4DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V4DI, V8SI, INT)
+
+DEF_FUNCTION_TYPE (V16SF, V16SF, PCFLOAT, V16SI, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, PCFLOAT, V8DI, HI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V16SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, PCINT, V16SI, HI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, PCINT, V8DI, HI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V16SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V16SI, V16SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8DI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8DI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, HI, V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
+
+DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
+DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT)
+DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT64, INT, INT)
+DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
+
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND)
+
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V2DF_V2DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V4DF_V4DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V8DF_V8DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SF, ROUND)
+
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DF_V4DF, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DI_V4DI, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4SF_V4SF, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V8SF_V8SF, PTEST)
+
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, VEC_MERGE)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, VEC_MERGE)
+
+DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2SI_FTYPE_V2SI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4HI_FTYPE_V4HI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2SI_FTYPE_V2SI_V2SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4HI_FTYPE_V4HI_V4HI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V16HI_FTYPE_V16HI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V16HI_FTYPE_V16HI_V8HI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SI_V4SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_INT, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V2DI, COUNT)
+
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, SWAP)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, SWAP)
+
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V4DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI_INT, CONVERT)
+
+DEF_FUNCTION_TYPE_ALIAS (V16QI_FTYPE_V16QI_V16QI, CMP)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, CMP)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, CMP)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, CMP)
+
+DEF_FUNCTION_TYPE_ALIAS (V16QI_FTYPE_V16QI_V16QI, TF)
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, TF)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
diff --git a/gcc-4.9/gcc/config/i386/i386-c.c b/gcc-4.9/gcc/config/i386/i386-c.c
new file mode 100644
index 000000000..c9977bf2b
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-c.c
@@ -0,0 +1,546 @@
+/* Subroutines used for macro/preprocessor support on the ia-32.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "flags.h"
+#include "c-family/c-common.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "cpplib.h"
+#include "c-family/c-pragma.h"
+
+static bool ix86_pragma_target_parse (tree, tree);
+static void ix86_target_macros_internal
+  (HOST_WIDE_INT, enum processor_type, enum processor_type, enum fpmath_unit,
+   void (*def_or_undef) (cpp_reader *, const char *));
+
+
+/* Internal function to either define or undef the appropriate system
+   macros.  */
+static void
+ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
+			     enum processor_type arch,
+			     enum processor_type tune,
+			     enum fpmath_unit fpmath,
+			     void (*def_or_undef) (cpp_reader *,
+						   const char *))
+{
+  /* For some of the k6/pentium varients there weren't separate ISA bits to
+     identify which tune/arch flag was passed, so figure it out here.  */
+  size_t arch_len = strlen (ix86_arch_string);
+  size_t tune_len = strlen (ix86_tune_string);
+  int last_arch_char = ix86_arch_string[arch_len - 1];
+  int last_tune_char = ix86_tune_string[tune_len - 1];
+
+  /* Built-ins based on -march=.  */
+  switch (arch)
+    {
+    case PROCESSOR_I386:
+      break;
+    case PROCESSOR_I486:
+      def_or_undef (parse_in, "__i486");
+      def_or_undef (parse_in, "__i486__");
+      break;
+    case PROCESSOR_PENTIUM:
+      def_or_undef (parse_in, "__i586");
+      def_or_undef (parse_in, "__i586__");
+      def_or_undef (parse_in, "__pentium");
+      def_or_undef (parse_in, "__pentium__");
+      if (isa_flag & OPTION_MASK_ISA_MMX)
+	def_or_undef (parse_in, "__pentium_mmx__");
+      break;
+    case PROCESSOR_PENTIUMPRO:
+      def_or_undef (parse_in, "__i686");
+      def_or_undef (parse_in, "__i686__");
+      def_or_undef (parse_in, "__pentiumpro");
+      def_or_undef (parse_in, "__pentiumpro__");
+      break;
+    case PROCESSOR_GEODE:
+      def_or_undef (parse_in, "__geode");
+      def_or_undef (parse_in, "__geode__");
+      break;
+    case PROCESSOR_K6:
+      def_or_undef (parse_in, "__k6");
+      def_or_undef (parse_in, "__k6__");
+      if (last_arch_char == '2')
+	def_or_undef (parse_in, "__k6_2__");
+      else if (last_arch_char == '3')
+	def_or_undef (parse_in, "__k6_3__");
+      else if (isa_flag & OPTION_MASK_ISA_3DNOW)
+	def_or_undef (parse_in, "__k6_3__");
+      break;
+    case PROCESSOR_ATHLON:
+      def_or_undef (parse_in, "__athlon");
+      def_or_undef (parse_in, "__athlon__");
+      if (isa_flag & OPTION_MASK_ISA_SSE)
+	def_or_undef (parse_in, "__athlon_sse__");
+      break;
+    case PROCESSOR_K8:
+      def_or_undef (parse_in, "__k8");
+      def_or_undef (parse_in, "__k8__");
+      break;
+    case PROCESSOR_AMDFAM10:
+      def_or_undef (parse_in, "__amdfam10");
+      def_or_undef (parse_in, "__amdfam10__");
+      break;
+    case PROCESSOR_BDVER1:
+      def_or_undef (parse_in, "__bdver1");
+      def_or_undef (parse_in, "__bdver1__");
+      break;
+    case PROCESSOR_BDVER2:
+      def_or_undef (parse_in, "__bdver2");
+      def_or_undef (parse_in, "__bdver2__");
+      break;
+    case PROCESSOR_BDVER3:
+      def_or_undef (parse_in, "__bdver3");
+      def_or_undef (parse_in, "__bdver3__");
+      break;
+    case PROCESSOR_BDVER4:
+      def_or_undef (parse_in, "__bdver4");
+      def_or_undef (parse_in, "__bdver4__");
+      break;
+    case PROCESSOR_BTVER1:
+      def_or_undef (parse_in, "__btver1");
+      def_or_undef (parse_in, "__btver1__");
+      break;
+    case PROCESSOR_BTVER2:
+      def_or_undef (parse_in, "__btver2");
+      def_or_undef (parse_in, "__btver2__");
+      break;
+    case PROCESSOR_PENTIUM4:
+      def_or_undef (parse_in, "__pentium4");
+      def_or_undef (parse_in, "__pentium4__");
+      break;
+    case PROCESSOR_NOCONA:
+      def_or_undef (parse_in, "__nocona");
+      def_or_undef (parse_in, "__nocona__");
+      break;
+    case PROCESSOR_CORE2:
+      def_or_undef (parse_in, "__core2");
+      def_or_undef (parse_in, "__core2__");
+      break;
+    case PROCESSOR_NEHALEM:
+      def_or_undef (parse_in, "__corei7");
+      def_or_undef (parse_in, "__corei7__");
+      def_or_undef (parse_in, "__nehalem");
+      def_or_undef (parse_in, "__nehalem__");
+      break;
+    case PROCESSOR_SANDYBRIDGE:
+      def_or_undef (parse_in, "__corei7_avx");
+      def_or_undef (parse_in, "__corei7_avx__");
+      def_or_undef (parse_in, "__sandybridge");
+      def_or_undef (parse_in, "__sandybridge__");
+      break;
+    case PROCESSOR_HASWELL:
+      def_or_undef (parse_in, "__core_avx2");
+      def_or_undef (parse_in, "__core_avx2__");
+      def_or_undef (parse_in, "__haswell");
+      def_or_undef (parse_in, "__haswell__");
+      break;
+    case PROCESSOR_BONNELL:
+      def_or_undef (parse_in, "__atom");
+      def_or_undef (parse_in, "__atom__");
+      def_or_undef (parse_in, "__bonnell");
+      def_or_undef (parse_in, "__bonnell__");
+      break;
+    case PROCESSOR_SILVERMONT:
+      def_or_undef (parse_in, "__slm");
+      def_or_undef (parse_in, "__slm__");
+      def_or_undef (parse_in, "__silvermont");
+      def_or_undef (parse_in, "__silvermont__");
+      break;
+    /* use PROCESSOR_max to not set/unset the arch macro.  */
+    case PROCESSOR_max:
+      break;
+    case PROCESSOR_INTEL:
+    case PROCESSOR_GENERIC:
+      gcc_unreachable ();
+    }
+
+  /* Built-ins based on -mtune=.  */
+  switch (tune)
+    {
+    case PROCESSOR_I386:
+      def_or_undef (parse_in, "__tune_i386__");
+      break;
+    case PROCESSOR_I486:
+      def_or_undef (parse_in, "__tune_i486__");
+      break;
+    case PROCESSOR_PENTIUM:
+      def_or_undef (parse_in, "__tune_i586__");
+      def_or_undef (parse_in, "__tune_pentium__");
+      if (last_tune_char == 'x')
+	def_or_undef (parse_in, "__tune_pentium_mmx__");
+      break;
+    case PROCESSOR_PENTIUMPRO:
+      def_or_undef (parse_in, "__tune_i686__");
+      def_or_undef (parse_in, "__tune_pentiumpro__");
+      switch (last_tune_char)
+	{
+	case '3':
+	  def_or_undef (parse_in, "__tune_pentium3__");
+	  /* FALLTHRU */
+	case '2':
+	  def_or_undef (parse_in, "__tune_pentium2__");
+	  break;
+	}
+      break;
+    case PROCESSOR_GEODE:
+      def_or_undef (parse_in, "__tune_geode__");
+      break;
+    case PROCESSOR_K6:
+      def_or_undef (parse_in, "__tune_k6__");
+      if (last_tune_char == '2')
+	def_or_undef (parse_in, "__tune_k6_2__");
+      else if (last_tune_char == '3')
+	def_or_undef (parse_in, "__tune_k6_3__");
+      else if (isa_flag & OPTION_MASK_ISA_3DNOW)
+	def_or_undef (parse_in, "__tune_k6_3__");
+      break;
+    case PROCESSOR_ATHLON:
+      def_or_undef (parse_in, "__tune_athlon__");
+      if (isa_flag & OPTION_MASK_ISA_SSE)
+	def_or_undef (parse_in, "__tune_athlon_sse__");
+      break;
+    case PROCESSOR_K8:
+      def_or_undef (parse_in, "__tune_k8__");
+      break;
+    case PROCESSOR_AMDFAM10:
+      def_or_undef (parse_in, "__tune_amdfam10__");
+      break;
+    case PROCESSOR_BDVER1:
+      def_or_undef (parse_in, "__tune_bdver1__");
+      break;
+    case PROCESSOR_BDVER2:
+      def_or_undef (parse_in, "__tune_bdver2__");
+      break;
+    case PROCESSOR_BDVER3:
+      def_or_undef (parse_in, "__tune_bdver3__");
+      break;
+    case PROCESSOR_BDVER4:
+      def_or_undef (parse_in, "__tune_bdver4__");
+      break;
+    case PROCESSOR_BTVER1:
+      def_or_undef (parse_in, "__tune_btver1__");
+      break;
+    case PROCESSOR_BTVER2:
+      def_or_undef (parse_in, "__tune_btver2__");
+       break;
+    case PROCESSOR_PENTIUM4:
+      def_or_undef (parse_in, "__tune_pentium4__");
+      break;
+    case PROCESSOR_NOCONA:
+      def_or_undef (parse_in, "__tune_nocona__");
+      break;
+    case PROCESSOR_CORE2:
+      def_or_undef (parse_in, "__tune_core2__");
+      break;
+    case PROCESSOR_NEHALEM:
+      def_or_undef (parse_in, "__tune_corei7__");
+      def_or_undef (parse_in, "__tune_nehalem__");
+      break;
+    case PROCESSOR_SANDYBRIDGE:
+      def_or_undef (parse_in, "__tune_corei7_avx__");
+      def_or_undef (parse_in, "__tune_sandybridge__");
+      break;
+    case PROCESSOR_HASWELL:
+      def_or_undef (parse_in, "__tune_core_avx2__");
+      def_or_undef (parse_in, "__tune_haswell__");
+      break;
+    case PROCESSOR_BONNELL:
+      def_or_undef (parse_in, "__tune_atom__");
+      def_or_undef (parse_in, "__tune_bonnell__");
+      break;
+    case PROCESSOR_SILVERMONT:
+      def_or_undef (parse_in, "__tune_slm__");
+      def_or_undef (parse_in, "__tune_silvermont__");
+      break;
+    case PROCESSOR_INTEL:
+    case PROCESSOR_GENERIC:
+      break;
+    /* use PROCESSOR_max to not set/unset the tune macro.  */
+    case PROCESSOR_max:
+      break;
+    }
+
+  switch (ix86_cmodel)
+    {
+    case CM_SMALL:
+    case CM_SMALL_PIC:
+      def_or_undef (parse_in, "__code_model_small__");
+      break;
+    case CM_MEDIUM:
+    case CM_MEDIUM_PIC:
+      def_or_undef (parse_in, "__code_model_medium__");
+      break;
+    case CM_LARGE:
+    case CM_LARGE_PIC:
+      def_or_undef (parse_in, "__code_model_large__");
+      break;
+    case CM_32:
+      def_or_undef (parse_in, "__code_model_32__");
+      break;
+    case CM_KERNEL:
+      def_or_undef (parse_in, "__code_model_kernel__");
+      break;
+    default:
+      ;
+    }
+
+  if (isa_flag & OPTION_MASK_ISA_MMX)
+    def_or_undef (parse_in, "__MMX__");
+  if (isa_flag & OPTION_MASK_ISA_3DNOW)
+    def_or_undef (parse_in, "__3dNOW__");
+  if (isa_flag & OPTION_MASK_ISA_3DNOW_A)
+    def_or_undef (parse_in, "__3dNOW_A__");
+  if (isa_flag & OPTION_MASK_ISA_SSE)
+    def_or_undef (parse_in, "__SSE__");
+  if (isa_flag & OPTION_MASK_ISA_SSE2)
+    def_or_undef (parse_in, "__SSE2__");
+  if (isa_flag & OPTION_MASK_ISA_SSE3)
+    def_or_undef (parse_in, "__SSE3__");
+  if (isa_flag & OPTION_MASK_ISA_SSSE3)
+    def_or_undef (parse_in, "__SSSE3__");
+  if (isa_flag & OPTION_MASK_ISA_SSE4_1)
+    def_or_undef (parse_in, "__SSE4_1__");
+  if (isa_flag & OPTION_MASK_ISA_SSE4_2)
+    def_or_undef (parse_in, "__SSE4_2__");
+  if (isa_flag & OPTION_MASK_ISA_AES)
+    def_or_undef (parse_in, "__AES__");
+  if (isa_flag & OPTION_MASK_ISA_SHA)
+    def_or_undef (parse_in, "__SHA__");
+  if (isa_flag & OPTION_MASK_ISA_PCLMUL)
+    def_or_undef (parse_in, "__PCLMUL__");
+  if (isa_flag & OPTION_MASK_ISA_AVX)
+    def_or_undef (parse_in, "__AVX__");
+  if (isa_flag & OPTION_MASK_ISA_AVX2)
+    def_or_undef (parse_in, "__AVX2__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512F)
+    def_or_undef (parse_in, "__AVX512F__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512ER)
+    def_or_undef (parse_in, "__AVX512ER__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512CD)
+    def_or_undef (parse_in, "__AVX512CD__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512PF)
+    def_or_undef (parse_in, "__AVX512PF__");
+  if (isa_flag & OPTION_MASK_ISA_FMA)
+    def_or_undef (parse_in, "__FMA__");
+  if (isa_flag & OPTION_MASK_ISA_RTM)
+    def_or_undef (parse_in, "__RTM__");
+  if (isa_flag & OPTION_MASK_ISA_SSE4A)
+    def_or_undef (parse_in, "__SSE4A__");
+  if (isa_flag & OPTION_MASK_ISA_FMA4)
+    def_or_undef (parse_in, "__FMA4__");
+  if (isa_flag & OPTION_MASK_ISA_XOP)
+    def_or_undef (parse_in, "__XOP__");
+  if (isa_flag & OPTION_MASK_ISA_LWP)
+    def_or_undef (parse_in, "__LWP__");
+  if (isa_flag & OPTION_MASK_ISA_ABM)
+    def_or_undef (parse_in, "__ABM__");
+  if (isa_flag & OPTION_MASK_ISA_BMI)
+    def_or_undef (parse_in, "__BMI__");
+  if (isa_flag & OPTION_MASK_ISA_BMI2)
+    def_or_undef (parse_in, "__BMI2__");
+  if (isa_flag & OPTION_MASK_ISA_LZCNT)
+    def_or_undef (parse_in, "__LZCNT__");
+  if (isa_flag & OPTION_MASK_ISA_TBM)
+    def_or_undef (parse_in, "__TBM__");
+  if (isa_flag & OPTION_MASK_ISA_POPCNT)
+    def_or_undef (parse_in, "__POPCNT__");
+  if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
+    def_or_undef (parse_in, "__FSGSBASE__");
+  if (isa_flag & OPTION_MASK_ISA_RDRND)
+    def_or_undef (parse_in, "__RDRND__");
+  if (isa_flag & OPTION_MASK_ISA_F16C)
+    def_or_undef (parse_in, "__F16C__");
+  if (isa_flag & OPTION_MASK_ISA_RDSEED)
+    def_or_undef (parse_in, "__RDSEED__");
+  if (isa_flag & OPTION_MASK_ISA_PRFCHW)
+    def_or_undef (parse_in, "__PRFCHW__");
+  if (isa_flag & OPTION_MASK_ISA_ADX)
+    def_or_undef (parse_in, "__ADX__");
+  if (isa_flag & OPTION_MASK_ISA_FXSR)
+    def_or_undef (parse_in, "__FXSR__");
+  if (isa_flag & OPTION_MASK_ISA_XSAVE)
+    def_or_undef (parse_in, "__XSAVE__");
+  if (isa_flag & OPTION_MASK_ISA_XSAVEOPT)
+    def_or_undef (parse_in, "__XSAVEOPT__");
+  if (isa_flag & OPTION_MASK_ISA_PREFETCHWT1)
+    def_or_undef (parse_in, "__PREFETCHWT1__");
+  if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
+    def_or_undef (parse_in, "__SSE_MATH__");
+  if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
+    def_or_undef (parse_in, "__SSE2_MATH__");
+}
+
+
+/* Hook to validate the current #pragma GCC target and set the state, and
+   update the macros based on what was changed.  If ARGS is NULL, then
+   POP_TARGET is used to reset the options.  */
+
+static bool
+ix86_pragma_target_parse (tree args, tree pop_target)
+{
+  tree prev_tree = build_target_option_node (&global_options);
+  tree cur_tree;
+  struct cl_target_option *prev_opt;
+  struct cl_target_option *cur_opt;
+  HOST_WIDE_INT prev_isa;
+  HOST_WIDE_INT cur_isa;
+  HOST_WIDE_INT diff_isa;
+  enum processor_type prev_arch;
+  enum processor_type prev_tune;
+  enum processor_type cur_arch;
+  enum processor_type cur_tune;
+
+  if (! args)
+    {
+      cur_tree = (pop_target ? pop_target : target_option_default_node);
+      cl_target_option_restore (&global_options,
+				TREE_TARGET_OPTION (cur_tree));
+    }
+  else
+    {
+      cur_tree = ix86_valid_target_attribute_tree (args, &global_options,
+						   &global_options_set);
+      if (!cur_tree || cur_tree == error_mark_node)
+       {
+         cl_target_option_restore (&global_options,
+                                   TREE_TARGET_OPTION (prev_tree));
+         return false;
+       }
+    }
+
+  target_option_current_node = cur_tree;
+  ix86_reset_previous_fndecl ();
+
+  /* Figure out the previous/current isa, arch, tune and the differences.  */
+  prev_opt  = TREE_TARGET_OPTION (prev_tree);
+  cur_opt   = TREE_TARGET_OPTION (cur_tree);
+  prev_isa  = prev_opt->x_ix86_isa_flags;
+  cur_isa   = cur_opt->x_ix86_isa_flags;
+  diff_isa  = (prev_isa ^ cur_isa);
+  prev_arch = (enum processor_type) prev_opt->arch;
+  prev_tune = (enum processor_type) prev_opt->tune;
+  cur_arch  = (enum processor_type) cur_opt->arch;
+  cur_tune  = (enum processor_type) cur_opt->tune;
+
+  /* If the same processor is used for both previous and current options, don't
+     change the macros.  */
+  if (cur_arch == prev_arch)
+    cur_arch = prev_arch = PROCESSOR_max;
+
+  if (cur_tune == prev_tune)
+    cur_tune = prev_tune = PROCESSOR_max;
+
+  /* Undef all of the macros for that are no longer current.  */
+  ix86_target_macros_internal (prev_isa & diff_isa,
+			       prev_arch,
+			       prev_tune,
+			       (enum fpmath_unit) prev_opt->x_ix86_fpmath,
+			       cpp_undef);
+
+  /* For the definitions, ensure all newly defined macros are considered
+     as used for -Wunused-macros.  There is no point warning about the
+     compiler predefined macros.  */
+  cpp_options *cpp_opts = cpp_get_options (parse_in);
+  unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros;
+  cpp_opts->warn_unused_macros = 0;
+
+  /* Define all of the macros for new options that were just turned on.  */
+  ix86_target_macros_internal (cur_isa & diff_isa,
+			       cur_arch,
+			       cur_tune,
+			       (enum fpmath_unit) cur_opt->x_ix86_fpmath,
+			       cpp_define);
+
+  cpp_opts->warn_unused_macros = saved_warn_unused_macros;
+
+  return true;
+}
+
+/* Function to tell the preprocessor about the defines for the current target.  */
+
+void
+ix86_target_macros (void)
+{
+  /* 32/64-bit won't change with target specific options, so do the assert and
+     builtin_define_std calls here.  */
+  if (TARGET_64BIT)
+    {
+      cpp_assert (parse_in, "cpu=x86_64");
+      cpp_assert (parse_in, "machine=x86_64");
+      cpp_define (parse_in, "__amd64");
+      cpp_define (parse_in, "__amd64__");
+      cpp_define (parse_in, "__x86_64");
+      cpp_define (parse_in, "__x86_64__");
+      if (TARGET_X32)
+	{
+	  cpp_define (parse_in, "_ILP32");
+	  cpp_define (parse_in, "__ILP32__");
+	}
+    }
+  else
+    {
+      cpp_assert (parse_in, "cpu=i386");
+      cpp_assert (parse_in, "machine=i386");
+      builtin_define_std ("i386");
+    }
+
+  if (!TARGET_80387)
+    cpp_define (parse_in, "_SOFT_FLOAT");
+
+  if (TARGET_LONG_DOUBLE_64)
+    cpp_define (parse_in, "__LONG_DOUBLE_64__");
+
+  if (TARGET_LONG_DOUBLE_128)
+    cpp_define (parse_in, "__LONG_DOUBLE_128__");
+
+  cpp_define_formatted (parse_in, "__ATOMIC_HLE_ACQUIRE=%d", IX86_HLE_ACQUIRE);
+  cpp_define_formatted (parse_in, "__ATOMIC_HLE_RELEASE=%d", IX86_HLE_RELEASE);
+
+  ix86_target_macros_internal (ix86_isa_flags,
+			       ix86_arch,
+			       ix86_tune,
+			       ix86_fpmath,
+			       cpp_define);
+}
+
+
+/* Register target pragmas.  We need to add the hook for parsing #pragma GCC
+   option here rather than in i386.c since it will pull in various preprocessor
+   functions, and those are not present in languages like fortran without a
+   preprocessor.  */
+
+void
+ix86_register_pragmas (void)
+{
+  /* Update pragma hook to allow parsing #pragma GCC target.  */
+  targetm.target_option.pragma_parse = ix86_pragma_target_parse;
+
+#ifdef REGISTER_SUBTARGET_PRAGMAS
+  REGISTER_SUBTARGET_PRAGMAS ();
+#endif
+}
diff --git a/gcc-4.9/gcc/config/i386/i386-interix.h b/gcc-4.9/gcc/config/i386/i386-interix.h
new file mode 100644
index 000000000..59bbb508d
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-interix.h
@@ -0,0 +1,346 @@
+/* Target definitions for GCC for Intel 80386 running Interix
+   Parts Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+   Parts:
+     by Douglas B. Rupp (drupp@cs.washington.edu).
+     by Ron Guilmette (rfg@netcom.com).
+     by Donn Terry (donn@softway.com).
+     by Mumit Khan (khan@xraylith.wisc.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Note: Interix doesn't support user-written DLLs (use conventional
+   shared libs (.so) instead).  Thus a lot of the stuff that might apply
+   about dllimport/dllexport and the like does not apply here. */
+
+#include <stdio.h>
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+/* Our strategy for finding global constructors is a bit different, although
+   not a lot.  */
+#define DO_GLOBAL_CTORS_BODY						\
+do {									\
+  int i;								\
+  unsigned long nptrs;							\
+  func_ptr *p;								\
+  asm(									\
+       "     .section .ctor_head, \"rw\"\n"				\
+       "1:\n"								\
+       "     .text \n"							\
+       ASM_LOAD_ADDR(1b,%0)						\
+       : "=r" (p) : : "cc");						\
+  for (nptrs = 0; p[nptrs] != 0; nptrs++);				\
+  for (i = nptrs-1; i >= 0; i--)					\
+    p[i] ();								\
+} while (0)
+
+#define DO_GLOBAL_DTORS_BODY						\
+do {									\
+  func_ptr *p;								\
+  asm(									\
+       "     .section .dtor_head, \"rw\"\n"				\
+       "1:\n"								\
+       "     .text \n"							\
+       ASM_LOAD_ADDR(1b,%0)						\
+       : "=r" (p) : : "cc");						\
+  while (*p)								\
+    {									\
+      p++;								\
+      (*(p-1)) ();							\
+    }									\
+} while (0)
+
+/* We don't use the "usual" push-an-address solution. */
+#undef TARGET_ASM_CONSTRUCTOR
+
+#undef  SUBTARGET_SWITCHES
+#define SUBTARGET_SWITCHES \
+{ "ms-bitfields", MASK_MS_BITFIELD_LAYOUT, N_("Use native (MS) bitfield layout") }, \
+{ "no-ms-bitfields", -MASK_MS_BITFIELD_LAYOUT, N_("Use gcc default bitfield layout") },
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+ %{!shared:%{!dynamic:-lc -lpsxdll \
+ }} \
+ %{!G:%{!dynamic:-lc -lpsxdll \
+ }} \
+ %{dynamic:-lc -lpsxdll \
+ } \
+ %{v}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{!shared:-stack 0x4000000,0x10000} \
+		   %{g} \
+		   %{dynamic:-Bdynamic} \
+		   %{static:-Bstatic} \
+		   %{shared:--shared -Bdynamic} \
+		   %{G:--shared -Bdynamic} \
+		   %{symbolic:--shared -Bsymbolic -Bdynamic} \
+   		   %{rpath*:--rpath %*} \
+		   "
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC  \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}} %{shared:crti%O%s}"
+
+#define TARGET_DECLSPEC 1
+
+/* cpp handles __STDC__ */
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_define ("__INTERIX");					\
+	builtin_define ("_M_IX86=300");					\
+	builtin_define ("_X86_=1");					\
+	builtin_define ("__stdcall=__attribute__((__stdcall__))");	\
+	builtin_define ("__cdecl=__attribute__((__cdecl__))");		\
+	builtin_define ("__declspec(x)=__attribute__((x))");		\
+	builtin_assert ("system=unix");					\
+	builtin_assert ("system=interix");				\
+	if (preprocessing_asm_p ())					\
+	  builtin_define_std ("LANGUAGE_ASSEMBLY");			\
+	else								\
+	  {								\
+	     builtin_define_std ("LANGUAGE_C");				\
+	     if (c_dialect_cxx ())					\
+	       builtin_define_std ("LANGUAGE_C_PLUS_PLUS");		\
+	     if (c_dialect_objc ())					\
+	       builtin_define_std ("LANGUAGE_OBJECTIVE_C");		\
+	  } 								\
+    }									\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+#define SIZE_TYPE "unsigned int"
+#define PTRDIFF_TYPE "int"
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Turn off long double being 96 bits.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* The following are needed for us to be able to use winnt.c, but are not
+   otherwise meaningful to Interix.  (The functions that use these are
+   never called because we don't do DLLs.) */
+#define TARGET_NOP_FUN_DLLIMPORT 1
+#define drectve_section()  /* nothing */
+
+
+#define EH_FRAME_IN_DATA_SECTION
+
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rdata,\"r\""
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variale or
+   function named by the symbol (such as what section it is in).  */
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  i386_pe_strip_name_encoding_full
+
+/* Emit code to check the stack when allocating more that 4000
+   bytes in one go.  */
+
+#define CHECK_STACK_LIMIT 4000
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   and returns float values in the 387 and needs stack probes
+   We also align doubles to 64-bits forMSVC default compatibility
+   Ditto for bitfields. */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+   (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE | \
+    MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
+
+/* The MS compilers take alignment as a number of bytes, so we do as well */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
+
+
+/* Define this macro if in some cases global symbols from one translation
+   unit may not be bound to undefined symbols in another translation unit
+   without user intervention.  For instance, under Microsoft Windows
+   symbols must be explicitly imported from shared libraries (DLLs).  */
+/*
+ * Old gcc(3.3) did not have 1 here
+ */
+#define MULTIPLE_SYMBOL_SPACES	1
+
+extern void i386_pe_unique_section (tree, int);
+#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_pe_asm_named_section
+
+/* Select attributes for named sections.  */
+#define TARGET_SECTION_TYPE_FLAGS  i386_pe_section_type_flags
+
+/* Write the extra assembler code needed to declare a function
+   properly.  If we are generating SDB debugging information, this
+   will happen automatically, so we only need to handle other cases.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  do									\
+    {									\
+      if (write_symbols != SDB_DEBUG)					\
+       i386_pe_declare_function_type (FILE, NAME, TREE_PUBLIC (DECL));	\
+      ASM_OUTPUT_LABEL (FILE, NAME);       				\
+    }									\
+  while (0)
+
+/* Add an external function to the list of functions to be declared at
+   the end of the file.  */
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)				\
+  do									\
+    {									\
+      if (TREE_CODE (DECL) == FUNCTION_DECL)				\
+        i386_pe_record_external_function (DECL, NAME);			\
+    }									\
+  while (0)
+
+/* Declare the type properly for any external libcall.  */
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
+  i386_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+
+/* This says out to put a global symbol in the BSS section.  */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* External function declarations.  */
+extern void i386_pe_record_external_function (tree, const char *);
+extern void i386_pe_declare_function_type (FILE *, const char *, int);
+extern void i386_pe_record_exported_symbol (const char *, int);
+extern void i386_pe_asm_file_end (FILE *);
+
+/* For Win32 ABI compatibility */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* A bitfield declared as `int' forces `int' alignment for the struct.  */
+#undef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Enable alias attribute support.  */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* Note that there appears to be two different ways to support const
+   sections at the moment.  You can either #define the symbol
+   READONLY_DATA_SECTION (giving it some code which switches to the
+   readonly data section) or else you can #define the symbols
+   EXTRA_SECTIONS, EXTRA_SECTION_FUNCTIONS, SELECT_SECTION, and
+   SELECT_RTX_SECTION.  We do both here just to be on the safe side.  */
+
+#define USE_CONST_SECTION 1
+
+/* The linker will take care of this, and having them causes problems with
+   ld -r (specifically -rU).  */
+#define CTOR_LISTS_DEFINED_EXTERNALLY 1
+
+/* Output a definition (implements alias) */
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+do									\
+{									\
+    fputs (SET_ASM_OP, (FILE));						\
+    assemble_name (FILE, LABEL1);					\
+    fputc (',', (FILE));						\
+    assemble_name (FILE, LABEL2);					\
+    fputc ('\n', (FILE));						\
+    }									\
+while (0)
+
+#define HOST_PTR_AS_INT unsigned long
+
+/* The following two flags are usually "off" for i386, because some non-gnu
+   tools (for the i386) don't handle them.  However, we don't have that
+   problem, so....  */
+
+/* Forward references to tags are allowed.  */
+#define SDB_ALLOW_FORWARD_REFERENCES
+/* Unknown tags are also allowed.  */
+#define SDB_ALLOW_UNKNOWN_REFERENCES
+/* DWARF2 Unwinding doesn't work with exception handling yet.  */
+#define DWARF2_UNWIND_INFO 0
+/* MSVC returns structs of up to 8 bytes via registers. */
+
+#undef SUBTARGET_RETURN_IN_MEMORY
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+  (TYPE_MODE (TYPE) == BLKmode || \
+     (AGGREGATE_TYPE_P (TYPE) && int_size_in_bytes(TYPE) > 8 ))
+
+#define ASM_LOAD_ADDR(loc, reg)   "     leal " #loc "," #reg "\n"
+
+/* The integer half of this list needs to be constant.  However, there's
+   a lot of disagreement about what the floating point adjustments should
+   be.  We pick one that works with gdb.  (The underlying problem is
+   what to do about the segment registers.  Since we have access to them
+   from /proc, we'll allow them to be accessed in gdb, even tho the
+   gcc compiler can't generate them.  (There's some evidence that
+   MSVC does, but possibly only for certain special "canned" sequences.)  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+(TARGET_64BIT ? dbx64_register_map[n] \
+ : (n) == 0 ? (int) 0 \
+ : (n) == 1 ? (int) 2 \
+ : (n) == 2 ? (int) 1 \
+ : (n) == 3 ? (int) 3 \
+ : (n) == 4 ? (int) 6 \
+ : (n) == 5 ? (int) 7 \
+ : (n) == 6 ? (int) 5 \
+ : (n) == 7 ? (int) 4 \
+ : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (int) (n)+8 \
+ : (int) (-1))
+
+#define EH_FRAME_IN_DATA_SECTION
+
+/* the following are OSF linker (not gld) specific... we don't want them */
+#undef HAS_INIT_SECTION
+#undef LD_INIT_SWITCH
+#undef LD_FINI_SWITCH
+
+/* The following are needed for us to be able to use winnt.c, but are not
+   otherwise meaningful to Interix.  (The functions that use these are
+   never called because we don't do DLLs.) */
+#define TARGET_NOP_FUN_DLLIMPORT 1
+#define I386_PE_STRIP_ENCODING(SYM_NAME) \
+  ((SYM_NAME) + ((SYM_NAME)[0] == '@' \
+                 ? ((SYM_NAME)[3] == '*' ? 4 : 3) : 0) \
+             + ((SYM_NAME)[0] == '*' ? 1 : 0))
+
+#define drectve_section()  /* nothing */
+
diff --git a/gcc-4.9/gcc/config/i386/i386-modes.def b/gcc-4.9/gcc/config/i386/i386-modes.def
new file mode 100644
index 000000000..07e572058
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-modes.def
@@ -0,0 +1,99 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The x86_64 ABI specifies both XF and TF modes.
+   XFmode is __float80 is IEEE extended; TFmode is __float128
+   is IEEE quad.  */
+
+FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* In ILP32 mode, XFmode has size 12 and alignment 4.
+   In LP64 mode, XFmode has size and alignment 16.  */
+ADJUST_FLOAT_FORMAT (XF, (TARGET_128BIT_LONG_DOUBLE
+			  ? &ieee_extended_intel_128_format
+			  : TARGET_96_ROUND_53_LONG_DOUBLE
+			  ? &ieee_extended_intel_96_round_53_format
+			  : &ieee_extended_intel_96_format));
+ADJUST_BYTESIZE  (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 12);
+ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
+
+/* Add any extra modes needed to represent the condition code.
+
+   For the i386, we need separate modes when floating-point
+   equality comparisons are being done.
+
+   Add CCNO to indicate comparisons against zero that requires
+   Overflow flag to be unset.  Sign bit test is used instead and
+   thus can be used to form "a&b>0" type of tests.
+
+   Add CCGC to indicate comparisons against zero that allows
+   unspecified garbage in the Carry flag.  This mode is used
+   by inc/dec instructions.
+
+   Add CCGOC to indicate comparisons against zero that allows
+   unspecified garbage in the Carry and Overflow flag. This
+   mode is used to simulate comparisons of (a-b) and (a+b)
+   against zero using sub/cmp/add operations.
+
+   Add CCA to indicate that only the Above flag is valid.
+   Add CCC to indicate that only the Carry flag is valid.
+   Add CCO to indicate that only the Overflow flag is valid.
+   Add CCS to indicate that only the Sign flag is valid.
+   Add CCZ to indicate that only the Zero flag is valid.  */
+
+CC_MODE (CCGC);
+CC_MODE (CCGOC);
+CC_MODE (CCNO);
+CC_MODE (CCA);
+CC_MODE (CCC);
+CC_MODE (CCO);
+CC_MODE (CCS);
+CC_MODE (CCZ);
+CC_MODE (CCFP);
+CC_MODE (CCFPU);
+
+/* Vector modes.  Note that VEC_CONCAT patterns require vector
+   sizes twice as big as implemented in hardware.  */
+VECTOR_MODES (INT, 4);        /*              V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*         V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /*   V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 32);       /*  V32QI V16HI V8SI V4DI */
+VECTOR_MODES (INT, 64);       /* V64QI V32HI V16SI V8DI */
+VECTOR_MODES (INT, 128);      /* V128QI V64HI V32SI V16DI */
+VECTOR_MODES (FLOAT, 8);      /*              V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*         V8HF V4SF V2DF */
+VECTOR_MODES (FLOAT, 32);     /*        V16HF V8SF V4DF */
+VECTOR_MODES (FLOAT, 64);     /*       V32HF V16SF V8DF */
+VECTOR_MODES (FLOAT, 128);    /*      V64HF V32SF V16DF */
+VECTOR_MODE (INT, TI, 1);     /*                   V1TI */
+VECTOR_MODE (INT, DI, 1);     /*                   V1DI */
+VECTOR_MODE (INT, SI, 1);     /*                   V1SI */
+VECTOR_MODE (INT, QI, 2);     /*                   V2QI */
+
+INT_MODE (OI, 32);
+INT_MODE (XI, 64);
+
+/* Keep the OI and XI modes from confusing the compiler into thinking
+   that these modes could actually be used for computation.  They are
+   only holders for vectors during data movement.  */
+#define MAX_BITSIZE_MODE_ANY_INT (128)
+
+/* The symbol Pmode stands for one of the above machine modes (usually SImode).
+   The tm.h file specifies which one.  It is not a distinct mode.  */
diff --git a/gcc-4.9/gcc/config/i386/i386-opts.h b/gcc-4.9/gcc/config/i386/i386-opts.h
new file mode 100644
index 000000000..47a34dbf7
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-opts.h
@@ -0,0 +1,96 @@
+/* Definitions for option handling for IA-32.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef I386_OPTS_H
+#define I386_OPTS_H
+
+/* Algorithm to expand string function with.  */
+enum stringop_alg
+{
+#undef DEF_ENUM
+#define DEF_ENUM
+
+#undef DEF_ALG
+#define DEF_ALG(alg, name) alg, 
+
+#include "stringop.def"
+last_alg
+
+#undef DEF_ENUM
+#undef DEF_ALG
+};
+
+/* Available call abi.  */
+enum calling_abi
+{
+  SYSV_ABI = 0,
+  MS_ABI = 1
+};
+
+enum fpmath_unit
+{
+  FPMATH_387 = 1,
+  FPMATH_SSE = 2
+};
+
+enum tls_dialect
+{
+  TLS_DIALECT_GNU,
+  TLS_DIALECT_GNU2,
+  TLS_DIALECT_SUN
+};
+
+enum cmodel {
+  CM_32,	/* The traditional 32-bit ABI.  */
+  CM_SMALL,	/* Assumes all code and data fits in the low 31 bits.  */
+  CM_KERNEL,	/* Assumes all code and data fits in the high 31 bits.  */
+  CM_MEDIUM,	/* Assumes code fits in the low 31 bits; data unlimited.  */
+  CM_LARGE,	/* No assumptions.  */
+  CM_SMALL_PIC,	/* Assumes code+data+got/plt fits in a 31 bit region.  */
+  CM_MEDIUM_PIC,/* Assumes code+got/plt fits in a 31 bit region.  */
+  CM_LARGE_PIC	/* No assumptions.  */
+};
+
+enum pmode {
+  PMODE_SI,	/* Pmode == SImode. */
+  PMODE_DI 	/* Pmode == DImode. */
+};
+
+enum asm_dialect {
+  ASM_ATT,
+  ASM_INTEL
+};
+
+enum ix86_veclibabi {
+  ix86_veclibabi_type_none,
+  ix86_veclibabi_type_svml,
+  ix86_veclibabi_type_acml
+};
+
+enum stack_protector_guard {
+  SSP_TLS,      /* per-thread canary in TLS block */
+  SSP_GLOBAL    /* global canary */
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/i386/i386-protos.h b/gcc-4.9/gcc/config/i386/i386-protos.h
new file mode 100644
index 000000000..6e3297880
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386-protos.h
@@ -0,0 +1,332 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* In i386-common.c.  */
+extern bool ix86_handle_option (struct gcc_options *opts,
+				struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+				const struct cl_decoded_option *decoded,
+				location_t loc);
+
+/* Functions in i386.c */
+extern bool ix86_target_stack_probe (void);
+extern bool ix86_can_use_return_insn_p (void);
+extern void ix86_setup_frame_addresses (void);
+
+extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
+extern void ix86_expand_prologue (void);
+extern void ix86_maybe_emit_epilogue_vzeroupper (void);
+extern void ix86_expand_epilogue (int);
+extern void ix86_expand_split_stack_prologue (void);
+
+extern void ix86_output_addr_vec_elt (FILE *, int);
+extern void ix86_output_addr_diff_elt (FILE *, int, int);
+
+extern enum calling_abi ix86_cfun_abi (void);
+extern enum calling_abi ix86_function_type_abi (const_tree);
+
+extern void ix86_reset_previous_fndecl (void);
+
+#ifdef RTX_CODE
+extern int standard_80387_constant_p (rtx);
+extern const char *standard_80387_constant_opcode (rtx);
+extern rtx standard_80387_constant_rtx (int);
+extern int standard_sse_constant_p (rtx);
+extern const char *standard_sse_constant_opcode (rtx, rtx);
+extern bool symbolic_reference_mentioned_p (rtx);
+extern bool extended_reg_mentioned_p (rtx);
+extern bool x86_extended_QIreg_mentioned_p (rtx);
+extern bool x86_extended_reg_mentioned_p (rtx);
+extern bool x86_maybe_negate_const_int (rtx *, enum machine_mode);
+extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
+
+extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
+extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
+
+extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
+extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx,
+				       rtx, rtx, rtx, rtx, bool);
+
+extern bool constant_address_p (rtx);
+extern bool legitimate_pic_operand_p (rtx);
+extern bool legitimate_pic_address_disp_p (rtx);
+extern bool ix86_legitimize_reload_address (rtx, enum machine_mode,
+					    int, int, int);
+extern void print_reg (rtx, int, FILE*);
+extern void ix86_print_operand (FILE *, rtx, int);
+
+extern void split_double_mode (enum machine_mode, rtx[], int, rtx[], rtx[]);
+
+extern const char *output_set_got (rtx, rtx);
+extern const char *output_387_binary_op (rtx, rtx*);
+extern const char *output_387_reg_move (rtx, rtx*);
+extern const char *output_fix_trunc (rtx, rtx*, bool);
+extern const char *output_fp_compare (rtx, rtx*, bool, bool);
+extern const char *output_adjust_stack_and_probe (rtx);
+extern const char *output_probe_stack_range (rtx, rtx);
+
+extern void ix86_expand_clear (rtx);
+extern void ix86_expand_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
+extern rtx ix86_fixup_binary_operands (enum rtx_code,
+				       enum machine_mode, rtx[]);
+extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
+						enum machine_mode, rtx[]);
+extern void ix86_expand_binary_operator (enum rtx_code,
+					 enum machine_mode, rtx[]);
+extern void ix86_expand_vector_logical_operator (enum rtx_code,
+						 enum machine_mode, rtx[]);
+extern bool ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_avoid_lea_for_add (rtx, rtx[]);
+extern bool ix86_use_lea_for_mov (rtx, rtx[]);
+extern bool ix86_avoid_lea_for_addr (rtx, rtx[]);
+extern void ix86_split_lea_for_addr (rtx, rtx[], enum machine_mode);
+extern bool ix86_lea_for_add_ok (rtx, rtx[]);
+extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
+extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
+extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
+					rtx[]);
+extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
+extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
+extern void ix86_split_convert_uns_si_sse (rtx[]);
+extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
+extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
+extern void ix86_expand_vector_convert_uns_vsivsf (rtx, rtx);
+extern rtx ix86_expand_adjust_ufix_to_sfix_si (rtx, rtx *);
+extern enum ix86_fpcmp_strategy ix86_fp_comparison_strategy (enum rtx_code);
+extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
+					    rtx[]);
+extern void ix86_expand_copysign (rtx []);
+extern void ix86_split_copysign_const (rtx []);
+extern void ix86_split_copysign_var (rtx []);
+extern bool ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_match_ccmode (rtx, enum machine_mode);
+extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
+extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx);
+extern bool ix86_expand_int_movcc (rtx[]);
+extern bool ix86_expand_fp_movcc (rtx[]);
+extern bool ix86_expand_fp_vcond (rtx[]);
+extern bool ix86_expand_int_vcond (rtx[]);
+extern void ix86_expand_vec_perm (rtx[]);
+extern bool ix86_expand_vec_perm_const (rtx[]);
+extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
+extern bool ix86_expand_int_addcc (rtx[]);
+extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
+extern void ix86_split_call_vzeroupper (rtx, rtx);
+extern void x86_initialize_trampoline (rtx, rtx, rtx);
+extern rtx ix86_zero_extend_to_Pmode (rtx);
+extern void ix86_split_long_move (rtx[]);
+extern void ix86_split_ashl (rtx *, rtx, enum machine_mode);
+extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
+extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
+extern rtx ix86_find_base_term (rtx);
+extern bool ix86_check_movabs (rtx, int);
+extern void ix86_split_idivmod (enum machine_mode, rtx[], bool);
+extern bool ix86_emit_cfi ();
+
+extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
+extern int ix86_attr_length_immediate_default (rtx, bool);
+extern int ix86_attr_length_address_default (rtx);
+extern int ix86_attr_length_vex_default (rtx, bool, bool);
+
+extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
+
+extern rtx ix86_libcall_value (enum machine_mode);
+extern bool ix86_function_arg_regno_p (int);
+extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern void ix86_call_abi_override (const_tree);
+extern int ix86_reg_parm_stack_space (const_tree);
+
+extern void ix86_split_fp_branch (enum rtx_code code, rtx, rtx,
+				  rtx, rtx, rtx);
+extern bool ix86_hard_regno_mode_ok (int, enum machine_mode);
+extern bool ix86_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern bool ix86_secondary_memory_needed (enum reg_class, enum reg_class,
+					  enum machine_mode, int);
+extern bool ix86_cannot_change_mode_class (enum machine_mode,
+					   enum machine_mode, enum reg_class);
+
+extern int ix86_mode_needed (int, rtx);
+extern int ix86_mode_after (int, int, rtx);
+extern int ix86_mode_entry (int);
+extern int ix86_mode_exit (int);
+
+extern bool ix86_libc_has_function (enum function_class fn_class);
+
+#ifdef HARD_CONST
+extern void ix86_emit_mode_set (int, int, HARD_REG_SET);
+#endif
+
+extern void x86_order_regs_for_local_alloc (void);
+extern void x86_function_profiler (FILE *, int);
+extern void x86_emit_floatuns (rtx [2]);
+extern void ix86_emit_fp_unordered_jump (rtx);
+
+extern void ix86_emit_i387_log1p (rtx, rtx);
+extern void ix86_emit_i387_round (rtx, rtx);
+extern void ix86_emit_swdivsf (rtx, rtx, rtx, enum machine_mode);
+extern void ix86_emit_swsqrtsf (rtx, rtx, enum machine_mode, bool);
+
+extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
+
+extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
+extern void ix86_expand_floorceil (rtx, rtx, bool);
+extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
+extern void ix86_expand_round_sse4 (rtx, rtx);
+extern void ix86_expand_round (rtx, rtx);
+extern void ix86_expand_rounddf_32 (rtx, rtx);
+extern void ix86_expand_trunc (rtx, rtx);
+extern void ix86_expand_truncdf_32 (rtx, rtx);
+
+extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+#endif	/* TREE_CODE  */
+
+#endif	/* RTX_CODE  */
+
+#ifdef TREE_CODE
+extern int ix86_data_alignment (tree, int, bool);
+extern unsigned int ix86_local_alignment (tree, enum machine_mode,
+					  unsigned int);
+extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
+					    unsigned int);
+extern int ix86_constant_alignment (tree, int);
+extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *);
+extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *);
+extern int x86_field_alignment (tree, int);
+extern tree ix86_valid_target_attribute_tree (tree,
+					      struct gcc_options *,
+					      struct gcc_options *);
+extern unsigned int ix86_get_callcvt (const_tree);
+
+#endif
+
+extern rtx ix86_tls_module_base (void);
+
+extern void ix86_expand_vector_init (bool, rtx, rtx);
+extern void ix86_expand_vector_set (bool, rtx, rtx, int);
+extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
+extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
+
+extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
+extern bool ix86_expand_pinsr (rtx *);
+extern void ix86_expand_mul_widen_evenodd (rtx, rtx, rtx, bool, bool);
+extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
+extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
+extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
+extern void ix86_expand_sse2_abs (rtx, rtx);
+
+/* In i386-c.c  */
+extern void ix86_target_macros (void);
+extern void ix86_register_pragmas (void);
+
+/* In winnt.c  */
+extern void i386_pe_unique_section (tree, int);
+extern void i386_pe_declare_function_type (FILE *, const char *, int);
+extern void i386_pe_record_external_function (tree, const char *);
+extern void i386_pe_maybe_record_exported_symbol (tree, const char *, int);
+extern void i386_pe_encode_section_info (tree, rtx, int);
+extern bool i386_pe_binds_local_p (const_tree);
+extern const char *i386_pe_strip_name_encoding_full (const char *);
+extern bool i386_pe_valid_dllimport_attribute_p (const_tree);
+extern unsigned int i386_pe_section_type_flags (tree, const char *, int);
+extern void i386_pe_asm_named_section (const char *, unsigned int, tree);
+extern void i386_pe_asm_output_aligned_decl_common (FILE *, tree,
+						    const char *,
+						    HOST_WIDE_INT,
+						    HOST_WIDE_INT);
+extern void i386_pe_file_end (void);
+extern void i386_pe_start_function (FILE *, const char *, tree);
+extern void i386_pe_end_function (FILE *, const char *, tree);
+extern void i386_pe_assemble_visibility (tree, int);
+extern tree i386_pe_mangle_decl_assembler_name (tree, tree);
+extern tree i386_pe_mangle_assembler_name (const char *);
+extern void i386_pe_record_stub (const char *);
+
+extern void i386_pe_seh_init (FILE *);
+extern void i386_pe_seh_end_prologue (FILE *);
+extern void i386_pe_seh_unwind_emit (FILE *, rtx);
+extern void i386_pe_seh_emit_except_personality (rtx);
+extern void i386_pe_seh_init_sections (void);
+
+/* In winnt-cxx.c and winnt-stubs.c  */
+extern void i386_pe_adjust_class_at_definition (tree);
+extern bool i386_pe_type_dllimport_p (tree);
+extern bool i386_pe_type_dllexport_p (tree);
+
+extern int i386_pe_reloc_rw_mask (void);
+
+extern rtx maybe_get_pool_constant (rtx);
+
+extern char internal_label_prefix[16];
+extern int internal_label_prefix_len;
+
+enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS };
+struct ix86_address
+{
+  rtx base, index, disp;
+  HOST_WIDE_INT scale;
+  enum ix86_address_seg seg;
+};
+
+extern int ix86_decompose_address (rtx, struct ix86_address *);
+extern int memory_address_length (rtx, bool);
+extern void x86_output_aligned_bss (FILE *, tree, const char *,
+				    unsigned HOST_WIDE_INT, int);
+extern void x86_elf_aligned_common (FILE *, const char *,
+				    unsigned HOST_WIDE_INT, int);
+
+#ifdef RTX_CODE
+extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
+				      enum rtx_code *, enum rtx_code *);
+extern enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
+#endif
+extern int asm_preferred_eh_data_format (int, int);
+
+#ifdef HAVE_ATTR_cpu
+extern enum attr_cpu ix86_schedule;
+#endif
+
+extern const char * ix86_output_call_insn (rtx insn, rtx call_op);
+
+#ifdef RTX_CODE
+/* Target data for multipass lookahead scheduling.
+   Currently used for Core 2/i7 tuning.  */
+struct ix86_first_cycle_multipass_data_
+{
+  /* The length (in bytes) of ifetch block in this solution.  */
+  int ifetch_block_len;
+  /* Number of instructions in ifetch block in this solution.  */
+  int ifetch_block_n_insns;
+  /* Bitmap to remember changes to ready_try for backtracking.  */
+  sbitmap ready_try_change;
+  /* Size of the bitmap.  */
+  int ready_try_change_size;
+};
+# define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T	\
+  struct ix86_first_cycle_multipass_data_
+#endif /* RTX_CODE */
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
new file mode 100644
index 000000000..842be686d
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -0,0 +1,47138 @@
+/* Subroutines used for code generation on IA-32.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "common/common-target.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "cgraph.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "cfgloop.h"
+#include "dwarf2.h"
+#include "df.h"
+#include "tm-constrs.h"
+#include "params.h"
+#include "cselib.h"
+#include "debug.h"
+#include "sched-int.h"
+#include "sbitmap.h"
+#include "fibheap.h"
+#include "opts.h"
+#include "diagnostic.h"
+#include "dumpfile.h"
+#include "tree-pass.h"
+#include "context.h"
+#include "pass_manager.h"
+#include "target-globals.h"
+
+static rtx legitimize_dllimport_symbol (rtx, bool);
+static rtx legitimize_pe_coff_extern_decl (rtx, bool);
+static rtx legitimize_pe_coff_symbol (rtx, bool);
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT (-1)
+#endif
+
+/* Return index of given mode in mult and division cost tables.  */
+#define MODE_INDEX(mode)					\
+  ((mode) == QImode ? 0						\
+   : (mode) == HImode ? 1					\
+   : (mode) == SImode ? 2					\
+   : (mode) == DImode ? 3					\
+   : 4)
+
+/* Processor costs (relative to an add) */
+/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
+#define COSTS_N_BYTES(N) ((N) * 2)
+
+#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
+
+static stringop_algs ix86_size_memcpy[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
+static stringop_algs ix86_size_memset[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
+
+const
+struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+  COSTS_N_BYTES (2),			/* cost of an add instruction */
+  COSTS_N_BYTES (3),			/* cost of a lea instruction */
+  COSTS_N_BYTES (2),			/* variable shift costs */
+  COSTS_N_BYTES (3),			/* constant shift costs */
+  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
+   COSTS_N_BYTES (3),			/*				 HI */
+   COSTS_N_BYTES (3),			/*				 SI */
+   COSTS_N_BYTES (3),			/*				 DI */
+   COSTS_N_BYTES (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
+   COSTS_N_BYTES (3),			/*			    HI */
+   COSTS_N_BYTES (3),			/*			    SI */
+   COSTS_N_BYTES (3),			/*			    DI */
+   COSTS_N_BYTES (5)},			/*			    other */
+  COSTS_N_BYTES (3),			/* cost of movsx */
+  COSTS_N_BYTES (3),			/* cost of movzx */
+  0,					/* "large" insn */
+  2,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {2, 2, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 2},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {2, 2, 2},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  3,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {3, 3},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  3,					/* cost of moving SSE register */
+  {3, 3, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {3, 3, 3},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  0,					/* size of l1 cache  */
+  0,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
+  ix86_size_memcpy,
+  ix86_size_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  1,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  1,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Processor costs (relative to an add) */
+static stringop_algs i386_memcpy[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs i386_memset[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs i386_cost = {	/* 386 specific costs */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (6),			/*				 HI */
+   COSTS_N_INSNS (6),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*			    HI */
+   COSTS_N_INSNS (23),			/*			    SI */
+   COSTS_N_INSNS (23),			/*			    DI */
+   COSTS_N_INSNS (23)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  15,					/* "large" insn */
+  3,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {8, 8, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {8, 8, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  0,					/* size of l1 cache  */
+  0,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
+  i386_memcpy,
+  i386_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs i486_memcpy[2] = {
+  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs i486_memset[2] = {
+  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs i486_cost = {	/* 486 specific costs */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (12),			/*				 HI */
+   COSTS_N_INSNS (12),			/*				 SI */
+   COSTS_N_INSNS (12),			/*				 DI */
+   COSTS_N_INSNS (12)},			/*			      other */
+  1,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (40),			/*			    HI */
+   COSTS_N_INSNS (40),			/*			    SI */
+   COSTS_N_INSNS (40),			/*			    DI */
+   COSTS_N_INSNS (40)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  15,					/* "large" insn */
+  3,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {8, 8, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {8, 8, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  4,					/* size of l1 cache.  486 has 8kB cache
+					   shared for code and data, so 4kB is
+					   not really precise.  */
+  4,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
+  i486_memcpy,
+  i486_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs pentium_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentium_memset[2] = {
+  {libcall, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs pentium_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (11),			/*				 HI */
+   COSTS_N_INSNS (11),			/*				 SI */
+   COSTS_N_INSNS (11),			/*				 DI */
+   COSTS_N_INSNS (11)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (25),			/*			    HI */
+   COSTS_N_INSNS (25),			/*			    SI */
+   COSTS_N_INSNS (25),			/*			    DI */
+   COSTS_N_INSNS (25)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  6,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  8,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  8,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
+  pentium_memcpy,
+  pentium_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+   (we ensure the alignment).  For small blocks inline loop is still a
+   noticeable win, for bigger blocks either rep movsl or rep movsb is
+   way to go.  Rep movsb has apparently more expensive startup time in CPU,
+   but after 4K the difference is down in the noise.  */
+static stringop_algs pentiumpro_memcpy[2] = {
+  {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentiumpro_memset[2] = {
+  {rep_prefix_4_byte, {{1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs pentiumpro_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (4)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (17),			/*			    HI */
+   COSTS_N_INSNS (17),			/*			    SI */
+   COSTS_N_INSNS (17),			/*			    DI */
+   COSTS_N_INSNS (17)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache  */
+  32,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  pentiumpro_memcpy,
+  pentiumpro_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs geode_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs geode_memset[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs geode_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (2),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (7),			/*				 SI */
+   COSTS_N_INSNS (7),			/*				 DI */
+   COSTS_N_INSNS (7)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*			    HI */
+   COSTS_N_INSNS (39),			/*			    SI */
+   COSTS_N_INSNS (39),			/*			    DI */
+   COSTS_N_INSNS (39)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  4,					/* MOVE_RATIO */
+  1,				     /* cost for loading QImode using movzbl */
+  {1, 1, 1},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {1, 1, 1},				/* cost of storing integer registers */
+  1,					/* cost of reg,reg fld/fst */
+  {1, 1, 1},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 6, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+
+  1,					/* cost of moving MMX register */
+  {1, 1},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {1, 1},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  1,					/* cost of moving SSE register */
+  {1, 1, 1},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {1, 1, 1},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  1,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  128,					/* size of l2 cache.  */
+  32,					/* size of prefetch block */
+  1,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
+  geode_memcpy,
+  geode_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs k6_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs k6_memset[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs k6_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (3),			/*				 DI */
+   COSTS_N_INSNS (3)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (18),			/*			    HI */
+   COSTS_N_INSNS (18),			/*			    SI */
+   COSTS_N_INSNS (18),			/*			    DI */
+   COSTS_N_INSNS (18)},			/*			    other */
+  COSTS_N_INSNS (2),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  4,					/* MOVE_RATIO */
+  3,				     /* cost for loading QImode using movzbl */
+  {4, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 3, 2},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {6, 6, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 4},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  6,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  32,					/* size of l2 cache.  Some models
+					   have integrated l2 cache, but
+					   optimizing for k6 is not important
+					   enough to worry about that.  */
+  32,					/* size of prefetch block */
+  1,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  k6_memcpy,
+  k6_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* For some reason, Athlon deals better with REP prefix (relative to loops)
+   compared to K8. Alignment becomes important after 8 bytes for memcpy and
+   128 bytes for memset.  */
+static stringop_algs athlon_memcpy[2] = {
+  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs athlon_memset[2] = {
+  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs athlon_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (5),			/*				 HI */
+   COSTS_N_INSNS (5),			/*				 SI */
+   COSTS_N_INSNS (5),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 6},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  5,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  athlon_memcpy,
+  athlon_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* K8 has optimized REP instruction for medium sized blocks, but for very
+   small blocks it is better to use loop. For large blocks, libcall can
+   do nontemporary accesses and beat inline considerably.  */
+static stringop_algs k8_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs k8_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs k8_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 3, 6},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+
+  k8_memcpy,
+  k8_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  5,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  3,					/* vec_unalign_load_cost.  */
+  3,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
+   very small blocks it is better to use loop. For large blocks, libcall can
+   do nontemporary accesses and beat inline considerably.  */
+static stringop_algs amdfam10_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs amdfam10_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs amdfam10_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  					/* On K8:
+  					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+
+  amdfam10_memcpy,
+  amdfam10_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/*  BDVER1 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+const struct processor_costs bdver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  					/* On K8:
+					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+
+  bdver1_memcpy,
+  bdver1_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/*  BDVER2 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+
+static stringop_algs bdver2_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver2_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+const struct processor_costs bdver2_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  					/* On K8:
+					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+
+  bdver2_memcpy,
+  bdver2_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+
+  /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
+      very small blocks it is better to use loop. For large blocks, libcall
+      can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver3_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver3_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs bdver3_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+
+  bdver3_memcpy,
+  bdver3_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/*  BDVER4 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver4_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver4_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs bdver4_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+
+  bdver4_memcpy,
+  bdver4_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
+     very small blocks it is better to use loop. For large blocks, libcall can
+     do nontemporary accesses and beat inline considerably.  */
+static stringop_algs btver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs btver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const struct processor_costs btver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+					/* On K8:
+					   MOVD reg64, xmmreg Double FSTORE 4
+					   MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					   MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+
+  btver1_memcpy,
+  btver1_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs btver2_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs btver2_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const struct processor_costs btver2_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+					/* On K8:
+					   MOVD reg64, xmmreg Double FSTORE 4
+					   MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					   MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  32,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  btver2_memcpy,
+  btver2_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs pentium4_memcpy[2] = {
+  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentium4_memset[2] = {
+  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
+             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs pentium4_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (3),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (4),			/* constant shift costs */
+  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (15),			/*				 HI */
+   COSTS_N_INSNS (15),			/*				 SI */
+   COSTS_N_INSNS (15),			/*				 DI */
+   COSTS_N_INSNS (15)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (56),			/*			    HI */
+   COSTS_N_INSNS (56),			/*			    SI */
+   COSTS_N_INSNS (56),			/*			    DI */
+   COSTS_N_INSNS (56)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  16,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 3, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  12,					/* cost of moving SSE register */
+  {12, 12, 12},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  10,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
+  pentium4_memcpy,
+  pentium4_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs nocona_memcpy[2] = {
+  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
+             {100000, unrolled_loop, false}, {-1, libcall, false}}}};
+
+static stringop_algs nocona_memset[2] = {
+  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
+             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {64, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+
+static const
+struct processor_costs nocona_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (10),			/*				 HI */
+   COSTS_N_INSNS (10),			/*				 SI */
+   COSTS_N_INSNS (10),			/*				 DI */
+   COSTS_N_INSNS (10)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (66),			/*			    HI */
+   COSTS_N_INSNS (66),			/*			    SI */
+   COSTS_N_INSNS (66),			/*			    DI */
+   COSTS_N_INSNS (66)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  16,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  3,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 4},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  6,					/* cost of moving MMX register */
+  {12, 12},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {12, 12},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  6,					/* cost of moving SSE register */
+  {12, 12, 12},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {12, 12, 12},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  8,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  1024,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  8,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
+  nocona_memcpy,
+  nocona_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs atom_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs atom_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs atom_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  atom_memcpy,
+  atom_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs slm_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs slm_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs slm_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  slm_memcpy,
+  slm_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs intel_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs intel_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs intel_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  intel_memcpy,
+  intel_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Generic should produce code tuned for Core-i7 (and newer chips)
+   and btver1 (and newer chips).  */
+
+static stringop_algs generic_memcpy[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs generic_memset[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static const
+struct processor_costs generic_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  /* On all chips taken into consideration lea is 2 cycles and more.  With
+     this cost however our current implementation of synth_mult results in
+     use of unnecessary temporary registers causing regression on several
+     SPECfp benchmarks.  */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
+     value is increased to perhaps more appropriate value of 5.  */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  generic_memcpy,
+  generic_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* core_cost should produce code tuned for Core familly of CPUs.  */
+static stringop_algs core_memcpy[2] = {
+  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
+             {-1, libcall, false}}}};
+static stringop_algs core_memset[2] = {
+  {libcall, {{6, loop_1_byte, true},
+             {24, loop, true},
+             {8192, rep_prefix_4_byte, true},
+             {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
+             {-1, libcall, false}}}};
+
+static const
+struct processor_costs core_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  /* On all chips taken into consideration lea is 2 cycles and more.  With
+     this cost however our current implementation of synth_mult results in
+     use of unnecessary temporary registers causing regression on several
+     SPECfp benchmarks.  */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  /* FIXME perhaps more appropriate value is 5.  */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  core_memcpy,
+  core_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+
+/* Set by -mtune.  */
+const struct processor_costs *ix86_tune_cost = &pentium_cost;
+
+/* Set by -mtune or -Os.  */
+const struct processor_costs *ix86_cost = &pentium_cost;
+
+/* Processor feature/optimization bitmasks.  */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+#define m_P4_NOCONA (m_PENT4 | m_NOCONA)
+#define m_CORE2 (1<<PROCESSOR_CORE2)
+#define m_NEHALEM (1<<PROCESSOR_NEHALEM)
+#define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
+#define m_HASWELL (1<<PROCESSOR_HASWELL)
+#define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_HASWELL)
+#define m_BONNELL (1<<PROCESSOR_BONNELL)
+#define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
+#define m_INTEL (1<<PROCESSOR_INTEL)
+
+#define m_GEODE (1<<PROCESSOR_GEODE)
+#define m_K6 (1<<PROCESSOR_K6)
+#define m_K6_GEODE (m_K6 | m_GEODE)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
+#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
+#define m_BDVER1 (1<<PROCESSOR_BDVER1)
+#define m_BDVER2 (1<<PROCESSOR_BDVER2)
+#define m_BDVER3 (1<<PROCESSOR_BDVER3)
+#define m_BDVER4 (1<<PROCESSOR_BDVER4)
+#define m_BTVER1 (1<<PROCESSOR_BTVER1)
+#define m_BTVER2 (1<<PROCESSOR_BTVER2)
+#define m_BDVER	(m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
+#define m_BTVER (m_BTVER1 | m_BTVER2)
+#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
+
+#define m_GENERIC (1<<PROCESSOR_GENERIC)
+
+const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name, selector) name,
+#include "x86-tune.def"
+#undef DEF_TUNE
+};
+
+/* Feature tests against the various tunings.  */
+unsigned char ix86_tune_features[X86_TUNE_LAST];
+
+/* Feature tests against the various tunings used to create ix86_tune_features
+   based on the processor mask.  */
+static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name, selector) selector,
+#include "x86-tune.def"
+#undef DEF_TUNE
+};
+
+/* Feature tests against the various architecture variations.  */
+unsigned char ix86_arch_features[X86_ARCH_LAST];
+
+/* Feature tests against the various architecture variations, used to create
+   ix86_arch_features based on the processor mask.  */
+static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
+  /* X86_ARCH_CMOV: Conditional move was added for pentiumpro.  */
+  ~(m_386 | m_486 | m_PENT | m_K6),
+
+  /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
+  ~m_386,
+
+  /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
+  ~(m_386 | m_486),
+
+  /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
+  ~m_386,
+
+  /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
+  ~m_386,
+};
+
+/* In case the average insn count for single function invocation is
+   lower than this constant, emit fast (but longer) prologue and
+   epilogue code.  */
+#define FAST_PROLOGUE_INSN_COUNT 20
+
+/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
+static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
+static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
+static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
+
+/* Array of the smallest class containing reg number REGNO, indexed by
+   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
+
+enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
+{
+  /* ax, dx, cx, bx */
+  AREG, DREG, CREG, BREG,
+  /* si, di, bp, sp */
+  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
+  /* FP registers */
+  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
+  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
+  /* arg pointer */
+  NON_Q_REGS,
+  /* flags, fpsr, fpcr, frame */
+  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
+  /* SSE registers */
+  SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+  SSE_REGS, SSE_REGS,
+  /* MMX registers */
+  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
+  MMX_REGS, MMX_REGS,
+  /* REX registers */
+  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+  /* SSE REX registers */
+  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+  SSE_REGS, SSE_REGS,
+  /* AVX-512 SSE registers */
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  /* Mask registers.  */
+  MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
+  MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
+};
+
+/* The "default" register map used in 32bit mode.  */
+
+int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
+  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
+  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
+  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
+  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
+  93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
+};
+
+/* The "default" register map used in 64bit mode.  */
+
+int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
+  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
+  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
+  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
+  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
+  8,9,10,11,12,13,14,15,		/* extended integer registers */
+  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
+  67, 68, 69, 70, 71, 72, 73, 74,       /* AVX-512 registers 16-23 */
+  75, 76, 77, 78, 79, 80, 81, 82,       /* AVX-512 registers 24-31 */
+  118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
+};
+
+/* Define the register numbers to be used in Dwarf debugging information.
+   The SVR4 reference port C compiler uses the following register numbers
+   in its Dwarf output code:
+	0 for %eax (gcc regno = 0)
+	1 for %ecx (gcc regno = 2)
+	2 for %edx (gcc regno = 1)
+	3 for %ebx (gcc regno = 3)
+	4 for %esp (gcc regno = 7)
+	5 for %ebp (gcc regno = 6)
+	6 for %esi (gcc regno = 4)
+	7 for %edi (gcc regno = 5)
+   The following three DWARF register numbers are never generated by
+   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
+   believes these numbers have these meanings.
+	8  for %eip    (no gcc equivalent)
+	9  for %eflags (gcc regno = 17)
+	10 for %trapno (no gcc equivalent)
+   It is not at all clear how we should number the FP stack registers
+   for the x86 architecture.  If the version of SDB on x86/svr4 were
+   a bit less brain dead with respect to floating-point then we would
+   have a precedent to follow with respect to DWARF register numbers
+   for x86 FP registers, but the SDB on x86/svr4 is so completely
+   broken with respect to FP registers that it is hardly worth thinking
+   of it as something to strive for compatibility with.
+   The version of x86/svr4 SDB I have at the moment does (partially)
+   seem to believe that DWARF register number 11 is associated with
+   the x86 register %st(0), but that's about all.  Higher DWARF
+   register numbers don't seem to be associated with anything in
+   particular, and even for DWARF regno 11, SDB only seems to under-
+   stand that it should say that a variable lives in %st(0) (when
+   asked via an `=' command) if we said it was in DWARF regno 11,
+   but SDB still prints garbage when asked for the value of the
+   variable in question (via a `/' command).
+   (Also note that the labels SDB prints for various FP stack regs
+   when doing an `x' command are all wrong.)
+   Note that these problems generally don't affect the native SVR4
+   C compiler because it doesn't allow the use of -O with -g and
+   because when it is *not* optimizing, it allocates a memory
+   location for each floating-point variable, and the memory
+   location is what gets described in the DWARF AT_location
+   attribute for the variable in question.
+   Regardless of the severe mental illness of the x86/svr4 SDB, we
+   do something sensible here and we use the following DWARF
+   register numbers.  Note that these are all stack-top-relative
+   numbers.
+	11 for %st(0) (gcc regno = 8)
+	12 for %st(1) (gcc regno = 9)
+	13 for %st(2) (gcc regno = 10)
+	14 for %st(3) (gcc regno = 11)
+	15 for %st(4) (gcc regno = 12)
+	16 for %st(5) (gcc regno = 13)
+	17 for %st(6) (gcc regno = 14)
+	18 for %st(7) (gcc regno = 15)
+*/
+int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
+  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
+  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
+  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
+  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
+  93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
+};
+
+/* Define parameter passing and return registers.  */
+
+static int const x86_64_int_parameter_registers[6] =
+{
+  DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_ms_abi_int_parameter_registers[4] =
+{
+  CX_REG, DX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_int_return_registers[4] =
+{
+  AX_REG, DX_REG, DI_REG, SI_REG
+};
+
+/* Additional registers that are clobbered by SYSV calls.  */
+
+int const x86_64_ms_sysv_extra_clobbered_registers[12] =
+{
+  SI_REG, DI_REG,
+  XMM6_REG, XMM7_REG,
+  XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
+  XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
+};
+
+/* Define the structure for the machine field in struct function.  */
+
+struct GTY(()) stack_local_entry {
+  unsigned short mode;
+  unsigned short n;
+  rtx rtl;
+  struct stack_local_entry *next;
+};
+
+/* Structure describing stack frame layout.
+   Stack grows downward:
+
+   [arguments]
+					<- ARG_POINTER
+   saved pc
+
+   saved static chain			if ix86_static_chain_on_stack
+
+   saved frame pointer			if frame_pointer_needed
+					<- HARD_FRAME_POINTER
+   [saved regs]
+					<- regs_save_offset
+   [padding0]
+
+   [saved SSE regs]
+					<- sse_regs_save_offset
+   [padding1]          |
+		       |		<- FRAME_POINTER
+   [va_arg registers]  |
+		       |
+   [frame]	       |
+		       |
+   [padding2]	       | = to_allocate
+					<- STACK_POINTER
+  */
+struct ix86_frame
+{
+  int nsseregs;
+  int nregs;
+  int va_arg_size;
+  int red_zone_size;
+  int outgoing_arguments_size;
+
+  /* The offsets relative to ARG_POINTER.  */
+  HOST_WIDE_INT frame_pointer_offset;
+  HOST_WIDE_INT hard_frame_pointer_offset;
+  HOST_WIDE_INT stack_pointer_offset;
+  HOST_WIDE_INT hfp_save_offset;
+  HOST_WIDE_INT reg_save_offset;
+  HOST_WIDE_INT sse_reg_save_offset;
+
+  /* When save_regs_using_mov is set, emit prologue using
+     move instead of push instructions.  */
+  bool save_regs_using_mov;
+};
+
+/* Which cpu are we scheduling for.  */
+enum attr_cpu ix86_schedule;
+
+/* Which cpu are we optimizing for.  */
+enum processor_type ix86_tune;
+
+/* Which instruction set architecture to use.  */
+enum processor_type ix86_arch;
+
+/* True if processor has SSE prefetch instruction.  */
+unsigned char x86_prefetch_sse;
+
+/* -mstackrealign option */
+static const char ix86_force_align_arg_pointer_string[]
+  = "force_align_arg_pointer";
+
+static rtx (*ix86_gen_leave) (void);
+static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
+static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
+static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
+static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
+static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
+static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
+static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
+static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
+static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
+
+/* Preferred alignment for stack boundary in bits.  */
+unsigned int ix86_preferred_stack_boundary;
+
+/* Alignment for incoming stack boundary in bits specified at
+   command line.  */
+static unsigned int ix86_user_incoming_stack_boundary;
+
+/* Default alignment for incoming stack boundary in bits.  */
+static unsigned int ix86_default_incoming_stack_boundary;
+
+/* Alignment for incoming stack boundary in bits.  */
+unsigned int ix86_incoming_stack_boundary;
+
+/* Calling abi specific va_list type nodes.  */
+static GTY(()) tree sysv_va_list_type_node;
+static GTY(()) tree ms_va_list_type_node;
+
+/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
+char internal_label_prefix[16];
+int internal_label_prefix_len;
+
+/* Fence to use after loop using movnt.  */
+tree x86_mfence;
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the exception
+   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
+   use SF or DFmode move instead of DImode to avoid reformatting penalties.
+
+   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 8
+
+/* Table of constants used by fldpi, fldln2, etc....  */
+static REAL_VALUE_TYPE ext_80387_constants_table [5];
+static bool ext_80387_constants_init = 0;
+
+
+static struct machine_function * ix86_init_machine_status (void);
+static rtx ix86_function_value (const_tree, const_tree, bool);
+static bool ix86_function_value_regno_p (const unsigned int);
+static unsigned int ix86_function_arg_boundary (enum machine_mode,
+						const_tree);
+static rtx ix86_static_chain (const_tree, bool);
+static int ix86_function_regparm (const_tree, const_tree);
+static void ix86_compute_frame_layout (struct ix86_frame *);
+static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
+						 rtx, rtx, int);
+static void ix86_add_new_builtins (HOST_WIDE_INT);
+static tree ix86_canonical_va_list_type (tree);
+static void predict_jump (int);
+static unsigned int split_stack_prologue_scratch_regno (void);
+static bool i386_asm_output_addr_const_extra (FILE *, rtx);
+
+enum ix86_function_specific_strings
+{
+  IX86_FUNCTION_SPECIFIC_ARCH,
+  IX86_FUNCTION_SPECIFIC_TUNE,
+  IX86_FUNCTION_SPECIFIC_MAX
+};
+
+static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
+				 const char *, enum fpmath_unit, bool);
+static void ix86_function_specific_save (struct cl_target_option *,
+					 struct gcc_options *opts);
+static void ix86_function_specific_restore (struct gcc_options *opts,
+					    struct cl_target_option *);
+static void ix86_function_specific_print (FILE *, int,
+					  struct cl_target_option *);
+static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
+static bool ix86_valid_target_attribute_inner_p (tree, char *[],
+						 struct gcc_options *,
+						 struct gcc_options *,
+						 struct gcc_options *);
+static bool ix86_can_inline_p (tree, tree);
+static void ix86_set_current_function (tree);
+static unsigned int ix86_minimum_incoming_stack_boundary (bool);
+
+static enum calling_abi ix86_function_abi (const_tree);
+
+
+#ifndef SUBTARGET32_DEFAULT_CPU
+#define SUBTARGET32_DEFAULT_CPU "i386"
+#endif
+
+/* Whether -mtune= or -march= were specified */
+static int ix86_tune_defaulted;
+static int ix86_arch_specified;
+
+/* Vectorization library interface and handlers.  */
+static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
+
+static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
+static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
+
+/* Processor target table, indexed by processor number */
+struct ptt
+{
+  const char *const name;			/* processor name  */
+  const struct processor_costs *cost;		/* Processor costs */
+  const int align_loop;				/* Default alignments.  */
+  const int align_loop_max_skip;
+  const int align_jump;
+  const int align_jump_max_skip;
+  const int align_func;
+};
+
+/* This table must be in sync with enum processor_type in i386.h.  */ 
+static const struct ptt processor_target_table[PROCESSOR_max] =
+{
+  {"generic", &generic_cost, 16, 10, 16, 10, 16},
+  {"i386", &i386_cost, 4, 3, 4, 3, 4},
+  {"i486", &i486_cost, 16, 15, 16, 15, 16},
+  {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
+  {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
+  {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
+  {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
+  {"core2", &core_cost, 16, 10, 16, 10, 16},
+  {"nehalem", &core_cost, 16, 10, 16, 10, 16},
+  {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
+  {"haswell", &core_cost, 16, 10, 16, 10, 16},
+  {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
+  {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
+  {"intel", &intel_cost, 16, 15, 16, 7, 16},
+  {"geode", &geode_cost, 0, 0, 0, 0, 0},
+  {"k6", &k6_cost, 32, 7, 32, 7, 32},
+  {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
+  {"k8", &k8_cost, 16, 7, 16, 7, 16},
+  {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
+  {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
+  {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
+  {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
+  {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
+  {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
+  {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
+};
+
+static bool
+gate_insert_vzeroupper (void)
+{
+  return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
+}
+
+static unsigned int
+rest_of_handle_insert_vzeroupper (void)
+{
+  int i;
+
+  /* vzeroupper instructions are inserted immediately after reload to
+     account for possible spills from 256bit registers.  The pass
+     reuses mode switching infrastructure by re-running mode insertion
+     pass, so disable entities that have already been processed.  */
+  for (i = 0; i < MAX_386_ENTITIES; i++)
+    ix86_optimize_mode_switching[i] = 0;
+
+  ix86_optimize_mode_switching[AVX_U128] = 1;
+
+  /* Call optimize_mode_switching.  */
+  g->get_passes ()->execute_pass_mode_switching ();
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_insert_vzeroupper =
+{
+  RTL_PASS, /* type */
+  "vzeroupper", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
+};
+
+class pass_insert_vzeroupper : public rtl_opt_pass
+{
+public:
+  pass_insert_vzeroupper(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_insert_vzeroupper (); }
+  unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
+
+}; // class pass_insert_vzeroupper
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_insert_vzeroupper (gcc::context *ctxt)
+{
+  return new pass_insert_vzeroupper (ctxt);
+}
+
+/* Return true if a red-zone is in use.  */
+
+static inline bool
+ix86_using_red_zone (void)
+{
+  return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
+}
+
+/* Return a string that documents the current -m options.  The caller is
+   responsible for freeing the string.  */
+
+static char *
+ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
+		    const char *tune, enum fpmath_unit fpmath,
+		    bool add_nl_p)
+{
+  struct ix86_target_opts
+  {
+    const char *option;		/* option string */
+    HOST_WIDE_INT mask;		/* isa mask options */
+  };
+
+  /* This table is ordered so that options like -msse4.2 that imply
+     preceding options while match those first.  */
+  static struct ix86_target_opts isa_opts[] =
+  {
+    { "-mfma4",		OPTION_MASK_ISA_FMA4 },
+    { "-mfma",		OPTION_MASK_ISA_FMA },
+    { "-mxop",		OPTION_MASK_ISA_XOP },
+    { "-mlwp",		OPTION_MASK_ISA_LWP },
+    { "-mavx512f",	OPTION_MASK_ISA_AVX512F },
+    { "-mavx512er",	OPTION_MASK_ISA_AVX512ER },
+    { "-mavx512cd",	OPTION_MASK_ISA_AVX512CD },
+    { "-mavx512pf",	OPTION_MASK_ISA_AVX512PF },
+    { "-msse4a",	OPTION_MASK_ISA_SSE4A },
+    { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
+    { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
+    { "-mssse3",	OPTION_MASK_ISA_SSSE3 },
+    { "-msse3",		OPTION_MASK_ISA_SSE3 },
+    { "-msse2",		OPTION_MASK_ISA_SSE2 },
+    { "-msse",		OPTION_MASK_ISA_SSE },
+    { "-m3dnow",	OPTION_MASK_ISA_3DNOW },
+    { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
+    { "-mmmx",		OPTION_MASK_ISA_MMX },
+    { "-mabm",		OPTION_MASK_ISA_ABM },
+    { "-mbmi",		OPTION_MASK_ISA_BMI },
+    { "-mbmi2",		OPTION_MASK_ISA_BMI2 },
+    { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
+    { "-mhle",		OPTION_MASK_ISA_HLE },
+    { "-mfxsr",		OPTION_MASK_ISA_FXSR },
+    { "-mrdseed",	OPTION_MASK_ISA_RDSEED },
+    { "-mprfchw",	OPTION_MASK_ISA_PRFCHW },
+    { "-madx",		OPTION_MASK_ISA_ADX },
+    { "-mtbm",		OPTION_MASK_ISA_TBM },
+    { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
+    { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
+    { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
+    { "-maes",		OPTION_MASK_ISA_AES },
+    { "-msha",		OPTION_MASK_ISA_SHA },
+    { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
+    { "-mfsgsbase",	OPTION_MASK_ISA_FSGSBASE },
+    { "-mrdrnd",	OPTION_MASK_ISA_RDRND },
+    { "-mf16c",		OPTION_MASK_ISA_F16C },
+    { "-mrtm",		OPTION_MASK_ISA_RTM },
+    { "-mxsave",	OPTION_MASK_ISA_XSAVE },
+    { "-mxsaveopt",	OPTION_MASK_ISA_XSAVEOPT },
+    { "-mprefetchwt1",	OPTION_MASK_ISA_PREFETCHWT1 },
+  };
+
+  /* Flag options.  */
+  static struct ix86_target_opts flag_opts[] =
+  {
+    { "-m128bit-long-double",		MASK_128BIT_LONG_DOUBLE },
+    { "-mlong-double-128",		MASK_LONG_DOUBLE_128 },
+    { "-mlong-double-64",		MASK_LONG_DOUBLE_64 },
+    { "-m80387",			MASK_80387 },
+    { "-maccumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS },
+    { "-malign-double",			MASK_ALIGN_DOUBLE },
+    { "-mcld",				MASK_CLD },
+    { "-mfp-ret-in-387",		MASK_FLOAT_RETURNS },
+    { "-mieee-fp",			MASK_IEEE_FP },
+    { "-minline-all-stringops",		MASK_INLINE_ALL_STRINGOPS },
+    { "-minline-stringops-dynamically",	MASK_INLINE_STRINGOPS_DYNAMICALLY },
+    { "-mms-bitfields",			MASK_MS_BITFIELD_LAYOUT },
+    { "-mno-align-stringops",		MASK_NO_ALIGN_STRINGOPS },
+    { "-mno-fancy-math-387",		MASK_NO_FANCY_MATH_387 },
+    { "-mno-push-args",			MASK_NO_PUSH_ARGS },
+    { "-mno-red-zone",			MASK_NO_RED_ZONE },
+    { "-momit-leaf-frame-pointer",	MASK_OMIT_LEAF_FRAME_POINTER },
+    { "-mrecip",			MASK_RECIP },
+    { "-mrtd",				MASK_RTD },
+    { "-msseregparm",			MASK_SSEREGPARM },
+    { "-mstack-arg-probe",		MASK_STACK_PROBE },
+    { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
+    { "-mvect8-ret-in-mem",		MASK_VECT8_RETURNS },
+    { "-m8bit-idiv",			MASK_USE_8BIT_IDIV },
+    { "-mvzeroupper",			MASK_VZEROUPPER },
+    { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD},
+    { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE},
+    { "-mprefer-avx128",		MASK_PREFER_AVX128},
+  };
+
+  const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
+
+  char isa_other[40];
+  char target_other[40];
+  unsigned num = 0;
+  unsigned i, j;
+  char *ret;
+  char *ptr;
+  size_t len;
+  size_t line_len;
+  size_t sep_len;
+  const char *abi;
+
+  memset (opts, '\0', sizeof (opts));
+
+  /* Add -march= option.  */
+  if (arch)
+    {
+      opts[num][0] = "-march=";
+      opts[num++][1] = arch;
+    }
+
+  /* Add -mtune= option.  */
+  if (tune)
+    {
+      opts[num][0] = "-mtune=";
+      opts[num++][1] = tune;
+    }
+
+  /* Add -m32/-m64/-mx32.  */
+  if ((isa & OPTION_MASK_ISA_64BIT) != 0)
+    {
+      if ((isa & OPTION_MASK_ABI_64) != 0)
+	abi = "-m64";
+      else
+	abi = "-mx32";
+      isa &= ~ (OPTION_MASK_ISA_64BIT
+		| OPTION_MASK_ABI_64
+		| OPTION_MASK_ABI_X32);
+    }
+  else
+    abi = "-m32";
+  opts[num++][0] = abi;
+
+  /* Pick out the options in isa options.  */
+  for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
+    {
+      if ((isa & isa_opts[i].mask) != 0)
+	{
+	  opts[num++][0] = isa_opts[i].option;
+	  isa &= ~ isa_opts[i].mask;
+	}
+    }
+
+  if (isa && add_nl_p)
+    {
+      opts[num++][0] = isa_other;
+      sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
+	       isa);
+    }
+
+  /* Add flag options.  */
+  for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
+    {
+      if ((flags & flag_opts[i].mask) != 0)
+	{
+	  opts[num++][0] = flag_opts[i].option;
+	  flags &= ~ flag_opts[i].mask;
+	}
+    }
+
+  if (flags && add_nl_p)
+    {
+      opts[num++][0] = target_other;
+      sprintf (target_other, "(other flags: %#x)", flags);
+    }
+
+  /* Add -fpmath= option.  */
+  if (fpmath)
+    {
+      opts[num][0] = "-mfpmath=";
+      switch ((int) fpmath)
+	{
+	case FPMATH_387:
+	  opts[num++][1] = "387";
+	  break;
+
+	case FPMATH_SSE:
+	  opts[num++][1] = "sse";
+	  break;
+
+	case FPMATH_387 | FPMATH_SSE:
+	  opts[num++][1] = "sse+387";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* Any options?  */
+  if (num == 0)
+    return NULL;
+
+  gcc_assert (num < ARRAY_SIZE (opts));
+
+  /* Size the string.  */
+  len = 0;
+  sep_len = (add_nl_p) ? 3 : 1;
+  for (i = 0; i < num; i++)
+    {
+      len += sep_len;
+      for (j = 0; j < 2; j++)
+	if (opts[i][j])
+	  len += strlen (opts[i][j]);
+    }
+
+  /* Build the string.  */
+  ret = ptr = (char *) xmalloc (len);
+  line_len = 0;
+
+  for (i = 0; i < num; i++)
+    {
+      size_t len2[2];
+
+      for (j = 0; j < 2; j++)
+	len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
+
+      if (i != 0)
+	{
+	  *ptr++ = ' ';
+	  line_len++;
+
+	  if (add_nl_p && line_len + len2[0] + len2[1] > 70)
+	    {
+	      *ptr++ = '\\';
+	      *ptr++ = '\n';
+	      line_len = 0;
+	    }
+	}
+
+      for (j = 0; j < 2; j++)
+	if (opts[i][j])
+	  {
+	    memcpy (ptr, opts[i][j], len2[j]);
+	    ptr += len2[j];
+	    line_len += len2[j];
+	  }
+    }
+
+  *ptr = '\0';
+  gcc_assert (ret + len >= ptr);
+
+  return ret;
+}
+
+/* Return true, if profiling code should be emitted before
+   prologue. Otherwise it returns false.
+   Note: For x86 with "hotfix" it is sorried.  */
+static bool
+ix86_profile_before_prologue (void)
+{
+  return flag_fentry != 0;
+}
+
+/* Function that is callable from the debugger to print the current
+   options.  */
+void ATTRIBUTE_UNUSED
+ix86_debug_options (void)
+{
+  char *opts = ix86_target_string (ix86_isa_flags, target_flags,
+				   ix86_arch_string, ix86_tune_string,
+				   ix86_fpmath, true);
+
+  if (opts)
+    {
+      fprintf (stderr, "%s\n\n", opts);
+      free (opts);
+    }
+  else
+    fputs ("<no options>\n\n", stderr);
+
+  return;
+}
+
+static const char *stringop_alg_names[] = {
+#define DEF_ENUM
+#define DEF_ALG(alg, name) #name,
+#include "stringop.def"
+#undef DEF_ENUM
+#undef DEF_ALG
+};
+
+/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
+   The string is of the following form (or comma separated list of it):
+
+     strategy_alg:max_size:[align|noalign]
+
+   where the full size range for the strategy is either [0, max_size] or
+   [min_size, max_size], in which min_size is the max_size + 1 of the
+   preceding range.  The last size range must have max_size == -1.
+
+   Examples:
+
+    1.
+       -mmemcpy-strategy=libcall:-1:noalign
+
+      this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
+
+
+   2.
+      -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
+
+      This is to tell the compiler to use the following strategy for memset
+      1) when the expected size is between [1, 16], use rep_8byte strategy;
+      2) when the size is between [17, 2048], use vector_loop;
+      3) when the size is > 2048, use libcall.  */
+
+struct stringop_size_range
+{
+  int max;
+  stringop_alg alg;
+  bool noalign;
+};
+
+static void
+ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
+{
+  const struct stringop_algs *default_algs;
+  stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
+  char *curr_range_str, *next_range_str;
+  int i = 0, n = 0;
+
+  if (is_memset)
+    default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
+  else
+    default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
+
+  curr_range_str = strategy_str;
+
+  do
+    {
+      int maxs;
+      char alg_name[128];
+      char align[16];
+      next_range_str = strchr (curr_range_str, ',');
+      if (next_range_str)
+        *next_range_str++ = '\0';
+
+      if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
+                       alg_name, &maxs, align))
+        {
+          error ("wrong arg %s to option %s", curr_range_str,
+                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+
+      if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
+        {
+          error ("size ranges of option %s should be increasing",
+                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+
+      for (i = 0; i < last_alg; i++)
+	if (!strcmp (alg_name, stringop_alg_names[i]))
+	  break;
+
+      if (i == last_alg)
+        {
+          error ("wrong stringop strategy name %s specified for option %s",
+                 alg_name,
+                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+
+      input_ranges[n].max = maxs;
+      input_ranges[n].alg = (stringop_alg) i;
+      if (!strcmp (align, "align"))
+        input_ranges[n].noalign = false;
+      else if (!strcmp (align, "noalign"))
+        input_ranges[n].noalign = true;
+      else
+        {
+          error ("unknown alignment %s specified for option %s",
+                 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+      n++;
+      curr_range_str = next_range_str;
+    }
+  while (curr_range_str);
+
+  if (input_ranges[n - 1].max != -1)
+    {
+      error ("the max value for the last size range should be -1"
+             " for option %s",
+             is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+      return;
+    }
+
+  if (n > MAX_STRINGOP_ALGS)
+    {
+      error ("too many size ranges specified in option %s",
+             is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+      return;
+    }
+
+  /* Now override the default algs array.  */
+  for (i = 0; i < n; i++)
+    {
+      *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
+      *const_cast<stringop_alg *>(&default_algs->size[i].alg)
+          = input_ranges[i].alg;
+      *const_cast<int *>(&default_algs->size[i].noalign)
+          = input_ranges[i].noalign;
+    }
+}
+
+
+/* parse -mtune-ctrl= option. When DUMP is true,
+   print the features that are explicitly set.  */
+
+static void
+parse_mtune_ctrl_str (bool dump)
+{
+  if (!ix86_tune_ctrl_string)
+    return;
+
+  char *next_feature_string = NULL;
+  char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
+  char *orig = curr_feature_string;
+  int i;
+  do
+    {
+      bool clear = false;
+
+      next_feature_string = strchr (curr_feature_string, ',');
+      if (next_feature_string)
+        *next_feature_string++ = '\0';
+      if (*curr_feature_string == '^')
+        {
+          curr_feature_string++;
+          clear = true;
+        }
+      for (i = 0; i < X86_TUNE_LAST; i++)
+        {
+          if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
+            {
+              ix86_tune_features[i] = !clear;
+              if (dump)
+                fprintf (stderr, "Explicitly %s feature %s\n",
+                         clear ? "clear" : "set", ix86_tune_feature_names[i]);
+              break;
+            }
+        }
+      if (i == X86_TUNE_LAST)
+        error ("Unknown parameter to option -mtune-ctrl: %s",
+               clear ? curr_feature_string - 1 : curr_feature_string);
+      curr_feature_string = next_feature_string;
+    }
+  while (curr_feature_string);
+  free (orig);
+}
+
+/* Helper function to set ix86_tune_features. IX86_TUNE is the
+   processor type.  */
+
+static void
+set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
+{
+  unsigned int ix86_tune_mask = 1u << ix86_tune;
+  int i;
+
+  for (i = 0; i < X86_TUNE_LAST; ++i)
+    {
+      if (ix86_tune_no_default)
+        ix86_tune_features[i] = 0;
+      else
+        ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+    }
+
+  if (dump)
+    {
+      fprintf (stderr, "List of x86 specific tuning parameter names:\n");
+      for (i = 0; i < X86_TUNE_LAST; i++)
+        fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
+                 ix86_tune_features[i] ? "on" : "off");
+    }
+
+  parse_mtune_ctrl_str (dump);
+}
+
+
+/* Override various settings based on options.  If MAIN_ARGS_P, the
+   options are from the command line, otherwise they are from
+   attributes.  */
+
+static void
+ix86_option_override_internal (bool main_args_p,
+			       struct gcc_options *opts,
+			       struct gcc_options *opts_set)
+{
+  int i;
+  unsigned int ix86_arch_mask;
+  const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
+  const char *prefix;
+  const char *suffix;
+  const char *sw;
+
+#define PTA_3DNOW	 	(HOST_WIDE_INT_1 << 0)
+#define PTA_3DNOW_A	 	(HOST_WIDE_INT_1 << 1)
+#define PTA_64BIT		(HOST_WIDE_INT_1 << 2)
+#define PTA_ABM			(HOST_WIDE_INT_1 << 3)
+#define PTA_AES		 	(HOST_WIDE_INT_1 << 4)
+#define PTA_AVX			(HOST_WIDE_INT_1 << 5)
+#define PTA_BMI		 	(HOST_WIDE_INT_1 << 6)
+#define PTA_CX16		(HOST_WIDE_INT_1 << 7)
+#define PTA_F16C		(HOST_WIDE_INT_1 << 8)
+#define PTA_FMA			(HOST_WIDE_INT_1 << 9)
+#define PTA_FMA4	 	(HOST_WIDE_INT_1 << 10)
+#define PTA_FSGSBASE		(HOST_WIDE_INT_1 << 11)
+#define PTA_LWP		 	(HOST_WIDE_INT_1 << 12)
+#define PTA_LZCNT	 	(HOST_WIDE_INT_1 << 13)
+#define PTA_MMX			(HOST_WIDE_INT_1 << 14)
+#define PTA_MOVBE		(HOST_WIDE_INT_1 << 15)
+#define PTA_NO_SAHF		(HOST_WIDE_INT_1 << 16)
+#define PTA_PCLMUL		(HOST_WIDE_INT_1 << 17)
+#define PTA_POPCNT		(HOST_WIDE_INT_1 << 18)
+#define PTA_PREFETCH_SSE	(HOST_WIDE_INT_1 << 19)
+#define PTA_RDRND	 	(HOST_WIDE_INT_1 << 20)
+#define PTA_SSE			(HOST_WIDE_INT_1 << 21)
+#define PTA_SSE2		(HOST_WIDE_INT_1 << 22)
+#define PTA_SSE3		(HOST_WIDE_INT_1 << 23)
+#define PTA_SSE4_1	 	(HOST_WIDE_INT_1 << 24)
+#define PTA_SSE4_2	 	(HOST_WIDE_INT_1 << 25)
+#define PTA_SSE4A		(HOST_WIDE_INT_1 << 26)
+#define PTA_SSSE3		(HOST_WIDE_INT_1 << 27)
+#define PTA_TBM		 	(HOST_WIDE_INT_1 << 28)
+#define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
+#define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
+#define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
+#define PTA_RTM		 	(HOST_WIDE_INT_1 << 32)
+#define PTA_HLE			(HOST_WIDE_INT_1 << 33)
+#define PTA_PRFCHW		(HOST_WIDE_INT_1 << 34)
+#define PTA_RDSEED		(HOST_WIDE_INT_1 << 35)
+#define PTA_ADX			(HOST_WIDE_INT_1 << 36)
+#define PTA_FXSR		(HOST_WIDE_INT_1 << 37)
+#define PTA_XSAVE		(HOST_WIDE_INT_1 << 38)
+#define PTA_XSAVEOPT		(HOST_WIDE_INT_1 << 39)
+#define PTA_AVX512F		(HOST_WIDE_INT_1 << 40)
+#define PTA_AVX512ER		(HOST_WIDE_INT_1 << 41)
+#define PTA_AVX512PF		(HOST_WIDE_INT_1 << 42)
+#define PTA_AVX512CD		(HOST_WIDE_INT_1 << 43)
+#define PTA_SHA			(HOST_WIDE_INT_1 << 45)
+#define PTA_PREFETCHWT1		(HOST_WIDE_INT_1 << 46)
+
+#define PTA_CORE2 \
+  (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
+   | PTA_CX16 | PTA_FXSR)
+#define PTA_NEHALEM \
+  (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
+#define PTA_WESTMERE \
+  (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
+#define PTA_SANDYBRIDGE \
+  (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
+#define PTA_IVYBRIDGE \
+  (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
+#define PTA_HASWELL \
+  (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
+   | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
+#define PTA_BROADWELL \
+  (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
+#define PTA_BONNELL \
+  (PTA_CORE2 | PTA_MOVBE)
+#define PTA_SILVERMONT \
+  (PTA_WESTMERE | PTA_MOVBE)
+
+/* if this reaches 64, need to widen struct pta flags below */
+
+  static struct pta
+    {
+      const char *const name;		/* processor name or nickname.  */
+      const enum processor_type processor;
+      const enum attr_cpu schedule;
+      const unsigned HOST_WIDE_INT flags;
+    }
+  const processor_alias_table[] =
+    {
+      {"i386", PROCESSOR_I386, CPU_NONE, 0},
+      {"i486", PROCESSOR_I486, CPU_NONE, 0},
+      {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+      {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+      {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
+      {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
+      {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
+      {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
+      {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE | PTA_FXSR},
+      {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+      {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+      {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
+      {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE | PTA_FXSR},
+      {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE | PTA_FXSR},
+      {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
+      {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
+	PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
+      {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
+	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
+      {"prescott", PROCESSOR_NOCONA, CPU_NONE,
+	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
+      {"nocona", PROCESSOR_NOCONA, CPU_NONE,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
+      {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
+      {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
+      {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
+      {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
+      {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
+	PTA_SANDYBRIDGE},
+      {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
+	PTA_SANDYBRIDGE},
+      {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
+	PTA_IVYBRIDGE},
+      {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
+	PTA_IVYBRIDGE},
+      {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
+      {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
+      {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
+      {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
+      {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
+      {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
+      {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
+      {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
+      {"geode", PROCESSOR_GEODE, CPU_GEODE,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
+      {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
+      {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
+      {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
+      {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
+      {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
+      {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
+      {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
+      {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
+      {"x86-64", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
+      {"k8", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"k8-sse3", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"opteron", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"opteron-sse3", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"athlon64", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"athlon-fx", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
+      {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
+	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
+      {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
+	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
+      {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
+	| PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
+      {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
+	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
+      {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
+	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE 
+	| PTA_XSAVEOPT | PTA_FSGSBASE},
+     {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
+        PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+        | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+        | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 
+	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 
+	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR 
+	| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
+      {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
+	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
+	| PTA_FXSR | PTA_XSAVE},
+      {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
+	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+	| PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
+	| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
+
+      {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
+	PTA_64BIT
+	| PTA_HLE /* flags are only used for -march switch.  */ },
+    };
+
+  /* -mrecip options.  */
+  static struct
+    {
+      const char *string;           /* option name */
+      unsigned int mask;            /* mask bits to set */
+    }
+  const recip_options[] =
+    {
+      { "all",       RECIP_MASK_ALL },
+      { "none",      RECIP_MASK_NONE },
+      { "div",       RECIP_MASK_DIV },
+      { "sqrt",      RECIP_MASK_SQRT },
+      { "vec-div",   RECIP_MASK_VEC_DIV },
+      { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
+    };
+
+  int const pta_size = ARRAY_SIZE (processor_alias_table);
+
+  /* Set up prefix/suffix so the error messages refer to either the command
+     line argument, or the attribute(target).  */
+  if (main_args_p)
+    {
+      prefix = "-m";
+      suffix = "";
+      sw = "switch";
+    }
+  else
+    {
+      prefix = "option(\"";
+      suffix = "\")";
+      sw = "attribute";
+    }
+
+  /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
+     TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false.  */
+  if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
+#ifdef TARGET_BI_ARCH
+  else
+    {
+#if TARGET_BI_ARCH == 1
+      /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
+	 is on and OPTION_MASK_ABI_X32 is off.  We turn off
+	 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
+	 -mx32.  */
+      if (TARGET_X32_P (opts->x_ix86_isa_flags))
+	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
+#else
+      /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
+	 on and OPTION_MASK_ABI_64 is off.  We turn off
+	 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
+	 -m64.  */
+      if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
+#endif
+    }
+#endif
+
+  if (TARGET_X32_P (opts->x_ix86_isa_flags))
+    {
+      /* Always turn on OPTION_MASK_ISA_64BIT and turn off
+	 OPTION_MASK_ABI_64 for TARGET_X32.  */
+      opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
+      opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
+    }
+  else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
+				| OPTION_MASK_ABI_X32
+				| OPTION_MASK_ABI_64);
+  else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+    {
+      /* Always turn on OPTION_MASK_ISA_64BIT and turn off
+	 OPTION_MASK_ABI_X32 for TARGET_LP64.  */
+      opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
+      opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
+    }
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+  SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* -fPIC is the default for x86_64.  */
+  if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    opts->x_flag_pic = 2;
+
+  /* Need to check -mtune=generic first.  */
+  if (opts->x_ix86_tune_string)
+    {
+      /* As special support for cross compilers we read -mtune=native
+	     as -mtune=generic.  With native compilers we won't see the
+	     -mtune=native, as it was changed by the driver.  */
+      if (!strcmp (opts->x_ix86_tune_string, "native"))
+	{
+	  opts->x_ix86_tune_string = "generic";
+	}
+      else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
+        warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
+                 "%stune=k8%s or %stune=generic%s instead as appropriate",
+                 prefix, suffix, prefix, suffix, prefix, suffix);
+    }
+  else
+    {
+      if (opts->x_ix86_arch_string)
+	opts->x_ix86_tune_string = opts->x_ix86_arch_string;
+      if (!opts->x_ix86_tune_string)
+	{
+	  opts->x_ix86_tune_string
+	    = processor_target_table[TARGET_CPU_DEFAULT].name;
+	  ix86_tune_defaulted = 1;
+	}
+
+      /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
+	 or defaulted.  We need to use a sensible tune option.  */
+      if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
+	{
+	  opts->x_ix86_tune_string = "generic";
+	}
+    }
+
+  if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
+      && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    {
+      /* rep; movq isn't available in 32-bit code.  */
+      error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
+      opts->x_ix86_stringop_alg = no_stringop;
+    }
+
+  if (!opts->x_ix86_arch_string)
+    opts->x_ix86_arch_string
+      = TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	? "x86-64" : SUBTARGET32_DEFAULT_CPU;
+  else
+    ix86_arch_specified = 1;
+
+  if (opts_set->x_ix86_pmode)
+    {
+      if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
+	   && opts->x_ix86_pmode == PMODE_SI)
+	  || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	       && opts->x_ix86_pmode == PMODE_DI))
+	error ("address mode %qs not supported in the %s bit mode",
+	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
+	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
+    }
+  else
+    opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
+			 ? PMODE_DI : PMODE_SI;
+
+  if (!opts_set->x_ix86_abi)
+    opts->x_ix86_abi = DEFAULT_ABI;
+
+  /* For targets using ms ABI enable ms-extensions, if not
+     explicit turned off.  For non-ms ABI we turn off this
+     option.  */
+  if (!opts_set->x_flag_ms_extensions)
+    opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
+
+  if (opts_set->x_ix86_cmodel)
+    {
+      switch (opts->x_ix86_cmodel)
+	{
+	case CM_SMALL:
+	case CM_SMALL_PIC:
+	  if (opts->x_flag_pic)
+	    opts->x_ix86_cmodel = CM_SMALL_PIC;
+	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in the %s bit mode",
+		   "small", "32");
+	  break;
+
+	case CM_MEDIUM:
+	case CM_MEDIUM_PIC:
+	  if (opts->x_flag_pic)
+	    opts->x_ix86_cmodel = CM_MEDIUM_PIC;
+	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in the %s bit mode",
+		   "medium", "32");
+	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in x32 mode",
+		   "medium");
+	  break;
+
+	case CM_LARGE:
+	case CM_LARGE_PIC:
+	  if (opts->x_flag_pic)
+	    opts->x_ix86_cmodel = CM_LARGE_PIC;
+	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in the %s bit mode",
+		   "large", "32");
+	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in x32 mode",
+		   "large");
+	  break;
+
+	case CM_32:
+	  if (opts->x_flag_pic)
+	    error ("code model %s does not support PIC mode", "32");
+	  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in the %s bit mode",
+		   "32", "64");
+	  break;
+
+	case CM_KERNEL:
+	  if (opts->x_flag_pic)
+	    {
+	      error ("code model %s does not support PIC mode", "kernel");
+	      opts->x_ix86_cmodel = CM_32;
+	    }
+	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	    error ("code model %qs not supported in the %s bit mode",
+		   "kernel", "32");
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
+	 use of rip-relative addressing.  This eliminates fixups that
+	 would otherwise be needed if this object is to be placed in a
+	 DLL, and is essentially just as efficient as direct addressing.  */
+      if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	  && (TARGET_RDOS || TARGET_PECOFF))
+	opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
+      else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
+      else
+	opts->x_ix86_cmodel = CM_32;
+    }
+  if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
+    {
+      error ("-masm=intel not supported in this configuration");
+      opts->x_ix86_asm_dialect = ASM_ATT;
+    }
+  if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
+      != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
+    sorry ("%i-bit mode not compiled in",
+	   (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
+
+  for (i = 0; i < pta_size; i++)
+    if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
+      {
+	ix86_schedule = processor_alias_table[i].schedule;
+	ix86_arch = processor_alias_table[i].processor;
+	/* Default cpu tuning to the architecture.  */
+	ix86_tune = ix86_arch;
+
+	if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	    && !(processor_alias_table[i].flags & PTA_64BIT))
+	  error ("CPU you selected does not support x86-64 "
+		 "instruction set");
+
+	if (processor_alias_table[i].flags & PTA_MMX
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
+	if (processor_alias_table[i].flags & PTA_3DNOW
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
+	if (processor_alias_table[i].flags & PTA_3DNOW_A
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
+	if (processor_alias_table[i].flags & PTA_SSE
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
+	if (processor_alias_table[i].flags & PTA_SSE2
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
+	if (processor_alias_table[i].flags & PTA_SSE3
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
+	if (processor_alias_table[i].flags & PTA_SSSE3
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
+	if (processor_alias_table[i].flags & PTA_SSE4_1
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
+	if (processor_alias_table[i].flags & PTA_SSE4_2
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
+	if (processor_alias_table[i].flags & PTA_AVX
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
+	if (processor_alias_table[i].flags & PTA_AVX2
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
+	if (processor_alias_table[i].flags & PTA_FMA
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
+	if (processor_alias_table[i].flags & PTA_SSE4A
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
+	if (processor_alias_table[i].flags & PTA_FMA4
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
+	if (processor_alias_table[i].flags & PTA_XOP
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
+	if (processor_alias_table[i].flags & PTA_LWP
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
+	if (processor_alias_table[i].flags & PTA_ABM
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
+	if (processor_alias_table[i].flags & PTA_BMI
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
+	if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
+	if (processor_alias_table[i].flags & PTA_TBM
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
+	if (processor_alias_table[i].flags & PTA_BMI2
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
+	if (processor_alias_table[i].flags & PTA_CX16
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
+	if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
+	if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	    && (processor_alias_table[i].flags & PTA_NO_SAHF))
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
+	if (processor_alias_table[i].flags & PTA_MOVBE
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
+	if (processor_alias_table[i].flags & PTA_AES
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
+	if (processor_alias_table[i].flags & PTA_SHA
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SHA;
+	if (processor_alias_table[i].flags & PTA_PCLMUL
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
+	if (processor_alias_table[i].flags & PTA_FSGSBASE
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
+	if (processor_alias_table[i].flags & PTA_RDRND
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
+	if (processor_alias_table[i].flags & PTA_F16C
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
+	if (processor_alias_table[i].flags & PTA_RTM
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
+	if (processor_alias_table[i].flags & PTA_HLE
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
+	if (processor_alias_table[i].flags & PTA_PRFCHW
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
+	if (processor_alias_table[i].flags & PTA_RDSEED
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
+	if (processor_alias_table[i].flags & PTA_ADX
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
+	if (processor_alias_table[i].flags & PTA_FXSR
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
+	if (processor_alias_table[i].flags & PTA_XSAVE
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
+	if (processor_alias_table[i].flags & PTA_XSAVEOPT
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
+	if (processor_alias_table[i].flags & PTA_AVX512F
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
+	if (processor_alias_table[i].flags & PTA_AVX512ER
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
+	if (processor_alias_table[i].flags & PTA_AVX512PF
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
+	if (processor_alias_table[i].flags & PTA_AVX512CD
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
+	if (processor_alias_table[i].flags & PTA_PREFETCHWT1
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
+	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
+	  x86_prefetch_sse = true;
+
+	break;
+      }
+
+  if (!strcmp (opts->x_ix86_arch_string, "generic"))
+    error ("generic CPU can be used only for %stune=%s %s",
+	   prefix, suffix, sw);
+  else if (!strcmp (opts->x_ix86_arch_string, "intel"))
+    error ("intel CPU can be used only for %stune=%s %s",
+	   prefix, suffix, sw);
+  else if (i == pta_size)
+    error ("bad value (%s) for %sarch=%s %s",
+	   opts->x_ix86_arch_string, prefix, suffix, sw);
+
+  ix86_arch_mask = 1u << ix86_arch;
+  for (i = 0; i < X86_ARCH_LAST; ++i)
+    ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
+
+  for (i = 0; i < pta_size; i++)
+    if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
+      {
+	ix86_schedule = processor_alias_table[i].schedule;
+	ix86_tune = processor_alias_table[i].processor;
+	if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	  {
+	    if (!(processor_alias_table[i].flags & PTA_64BIT))
+	      {
+		if (ix86_tune_defaulted)
+		  {
+		    opts->x_ix86_tune_string = "x86-64";
+		    for (i = 0; i < pta_size; i++)
+		      if (! strcmp (opts->x_ix86_tune_string,
+				    processor_alias_table[i].name))
+			break;
+		    ix86_schedule = processor_alias_table[i].schedule;
+		    ix86_tune = processor_alias_table[i].processor;
+		  }
+		else
+		  error ("CPU you selected does not support x86-64 "
+			 "instruction set");
+	      }
+	  }
+	/* Intel CPUs have always interpreted SSE prefetch instructions as
+	   NOPs; so, we can enable SSE prefetch instructions even when
+	   -mtune (rather than -march) points us to a processor that has them.
+	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
+	   higher processors.  */
+	if (TARGET_CMOV
+	    && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
+	  x86_prefetch_sse = true;
+	break;
+      }
+
+  if (ix86_tune_specified && i == pta_size)
+    error ("bad value (%s) for %stune=%s %s",
+	   opts->x_ix86_tune_string, prefix, suffix, sw);
+
+  set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
+
+#ifndef USE_IX86_FRAME_POINTER
+#define USE_IX86_FRAME_POINTER 0
+#endif
+
+#ifndef USE_X86_64_FRAME_POINTER
+#define USE_X86_64_FRAME_POINTER 0
+#endif
+
+  /* Set the default values for switches whose default depends on TARGET_64BIT
+     in case they weren't overwritten by command line options.  */
+  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    {
+      if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
+	opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
+      if (opts->x_flag_asynchronous_unwind_tables
+	  && !opts_set->x_flag_unwind_tables
+	  && TARGET_64BIT_MS_ABI)
+	opts->x_flag_unwind_tables = 1;
+      if (opts->x_flag_asynchronous_unwind_tables == 2)
+	opts->x_flag_unwind_tables
+	  = opts->x_flag_asynchronous_unwind_tables = 1;
+      if (opts->x_flag_pcc_struct_return == 2)
+	opts->x_flag_pcc_struct_return = 0;
+    }
+  else
+    {
+      if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
+	opts->x_flag_omit_frame_pointer
+	  = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
+      if (opts->x_flag_asynchronous_unwind_tables == 2)
+	opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
+      if (opts->x_flag_pcc_struct_return == 2)
+	opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
+    }
+
+  ix86_tune_cost = processor_target_table[ix86_tune].cost;
+  if (opts->x_optimize_size)
+    ix86_cost = &ix86_size_cost;
+  else
+    ix86_cost = ix86_tune_cost;
+
+  /* Arrange to set up i386_stack_locals for all functions.  */
+  init_machine_status = ix86_init_machine_status;
+
+  /* Validate -mregparm= value.  */
+  if (opts_set->x_ix86_regparm)
+    {
+      if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+	warning (0, "-mregparm is ignored in 64-bit mode");
+      if (opts->x_ix86_regparm > REGPARM_MAX)
+	{
+	  error ("-mregparm=%d is not between 0 and %d",
+		 opts->x_ix86_regparm, REGPARM_MAX);
+	  opts->x_ix86_regparm = 0;
+	}
+    }
+  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_regparm = REGPARM_MAX;
+
+  /* Default align_* from the processor table.  */
+  if (opts->x_align_loops == 0)
+    {
+      opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
+      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+    }
+  if (opts->x_align_jumps == 0)
+    {
+      opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
+      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+    }
+  if (opts->x_align_functions == 0)
+    {
+      opts->x_align_functions = processor_target_table[ix86_tune].align_func;
+    }
+
+  /* Provide default for -mbranch-cost= value.  */
+  if (!opts_set->x_ix86_branch_cost)
+    opts->x_ix86_branch_cost = ix86_cost->branch_cost;
+
+  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    {
+      opts->x_target_flags
+	|= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
+
+      /* Enable by default the SSE and MMX builtins.  Do allow the user to
+	 explicitly disable any of these.  In particular, disabling SSE and
+	 MMX for kernel code is extremely useful.  */
+      if (!ix86_arch_specified)
+      opts->x_ix86_isa_flags
+	|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
+	     | TARGET_SUBTARGET64_ISA_DEFAULT)
+            & ~opts->x_ix86_isa_flags_explicit);
+
+      if (TARGET_RTD_P (opts->x_target_flags))
+	warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
+    }
+  else
+    {
+      opts->x_target_flags
+	|= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
+
+      if (!ix86_arch_specified)
+        opts->x_ix86_isa_flags
+	  |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
+
+      /* i386 ABI does not specify red zone.  It still makes sense to use it
+         when programmer takes care to stack from being destroyed.  */
+      if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
+        opts->x_target_flags |= MASK_NO_RED_ZONE;
+    }
+
+  /* Keep nonleaf frame pointers.  */
+  if (opts->x_flag_omit_frame_pointer)
+    opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
+    opts->x_flag_omit_frame_pointer = 1;
+
+  /* If we're doing fast math, we don't care about comparison order
+     wrt NaNs.  This lets us use a shorter comparison sequence.  */
+  if (opts->x_flag_finite_math_only)
+    opts->x_target_flags &= ~MASK_IEEE_FP;
+
+  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
+     since the insns won't need emulation.  */
+  if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
+    opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
+
+  /* Likewise, if the target doesn't have a 387, or we've specified
+     software floating point, don't use 387 inline intrinsics.  */
+  if (!TARGET_80387_P (opts->x_target_flags))
+    opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
+
+  /* Turn on MMX builtins for -msse.  */
+  if (TARGET_SSE_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_isa_flags
+      |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
+
+  /* Enable SSE prefetch.  */
+  if (TARGET_SSE_P (opts->x_ix86_isa_flags)
+      || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
+    x86_prefetch_sse = true;
+
+  /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1.  */
+  if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
+      || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_isa_flags
+      |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
+
+  /* Enable popcnt instruction for -msse4.2 or -mabm.  */
+  if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
+      || TARGET_ABM_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_isa_flags
+      |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
+
+  /* Enable lzcnt instruction for -mabm.  */
+  if (TARGET_ABM_P(opts->x_ix86_isa_flags))
+    opts->x_ix86_isa_flags
+      |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
+
+  /* Validate -mpreferred-stack-boundary= value or default it to
+     PREFERRED_STACK_BOUNDARY_DEFAULT.  */
+  ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
+  if (opts_set->x_ix86_preferred_stack_boundary_arg)
+    {
+      int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
+		 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
+      int max = (TARGET_SEH ? 4 : 12);
+
+      if (opts->x_ix86_preferred_stack_boundary_arg < min
+	  || opts->x_ix86_preferred_stack_boundary_arg > max)
+	{
+	  if (min == max)
+	    error ("-mpreferred-stack-boundary is not supported "
+		   "for this target");
+	  else
+	    error ("-mpreferred-stack-boundary=%d is not between %d and %d",
+		   opts->x_ix86_preferred_stack_boundary_arg, min, max);
+	}
+      else
+	ix86_preferred_stack_boundary
+	  = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
+    }
+
+  /* Set the default value for -mstackrealign.  */
+  if (opts->x_ix86_force_align_arg_pointer == -1)
+    opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
+
+  ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
+  /* Validate -mincoming-stack-boundary= value or default it to
+     MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
+  ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
+  if (opts_set->x_ix86_incoming_stack_boundary_arg)
+    {
+      if (opts->x_ix86_incoming_stack_boundary_arg
+	  < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
+	  || opts->x_ix86_incoming_stack_boundary_arg > 12)
+	error ("-mincoming-stack-boundary=%d is not between %d and 12",
+	       opts->x_ix86_incoming_stack_boundary_arg,
+	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
+      else
+	{
+	  ix86_user_incoming_stack_boundary
+	    = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
+	  ix86_incoming_stack_boundary
+	    = ix86_user_incoming_stack_boundary;
+	}
+    }
+
+  /* Accept -msseregparm only if at least SSE support is enabled.  */
+  if (TARGET_SSEREGPARM_P (opts->x_target_flags)
+      && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
+    error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
+
+  if (opts_set->x_ix86_fpmath)
+    {
+      if (opts->x_ix86_fpmath & FPMATH_SSE)
+	{
+	  if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
+	    {
+	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
+	      opts->x_ix86_fpmath = FPMATH_387;
+	    }
+	  else if ((opts->x_ix86_fpmath & FPMATH_387)
+		   && !TARGET_80387_P (opts->x_target_flags))
+	    {
+	      warning (0, "387 instruction set disabled, using SSE arithmetics");
+	      opts->x_ix86_fpmath = FPMATH_SSE;
+	    }
+	}
+    }
+  /* For all chips supporting SSE2, -mfpmath=sse performs better than
+     fpmath=387.  The second is however default at many targets since the
+     extra 80bit precision of temporaries is considered to be part of ABI.
+     Overwrite the default at least for -ffast-math. 
+     TODO: -mfpmath=both seems to produce same performing code with bit
+     smaller binaries.  It is however not clear if register allocation is
+     ready for this setting.
+     Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
+     codegen.  We may switch to 387 with -ffast-math for size optimized
+     functions. */
+  else if (fast_math_flags_set_p (&global_options)
+	   && TARGET_SSE2_P (opts->x_ix86_isa_flags))
+    opts->x_ix86_fpmath = FPMATH_SSE;
+  else
+    opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
+
+  /* If the i387 is disabled, then do not return values in it. */
+  if (!TARGET_80387_P (opts->x_target_flags))
+    opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
+
+  /* Use external vectorized library in vectorizing intrinsics.  */
+  if (opts_set->x_ix86_veclibabi_type)
+    switch (opts->x_ix86_veclibabi_type)
+      {
+      case ix86_veclibabi_type_svml:
+	ix86_veclib_handler = ix86_veclibabi_svml;
+	break;
+
+      case ix86_veclibabi_type_acml:
+	ix86_veclib_handler = ix86_veclibabi_acml;
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
+      && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
+      && !opts->x_optimize_size)
+    opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+
+  /* If stack probes are required, the space used for large function
+     arguments on the stack must also be probed, so enable
+     -maccumulate-outgoing-args so this happens in the prologue.  */
+  if (TARGET_STACK_PROBE_P (opts->x_target_flags)
+      && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+    {
+      if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
+	warning (0, "stack probing requires %saccumulate-outgoing-args%s "
+		 "for correctness", prefix, suffix);
+      opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+    }
+
+  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
+  {
+    char *p;
+    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
+    p = strchr (internal_label_prefix, 'X');
+    internal_label_prefix_len = p - internal_label_prefix;
+    *p = '\0';
+  }
+
+  /* When scheduling description is not available, disable scheduler pass
+     so it won't slow down the compilation and make x87 code slower.  */
+  if (!TARGET_SCHEDULE)
+    opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
+
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			 ix86_tune_cost->simultaneous_prefetches,
+			 opts->x_param_values,
+			 opts_set->x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+			 ix86_tune_cost->prefetch_block,
+			 opts->x_param_values,
+			 opts_set->x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+			 ix86_tune_cost->l1_cache_size,
+			 opts->x_param_values,
+			 opts_set->x_param_values);
+  maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+			 ix86_tune_cost->l2_cache_size,
+			 opts->x_param_values,
+			 opts_set->x_param_values);
+
+  /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && HAVE_prefetch
+      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+      && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
+  /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
+     can be opts->x_optimized to ap = __builtin_next_arg (0).  */
+  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
+    targetm.expand_builtin_va_start = NULL;
+
+  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    {
+      ix86_gen_leave = gen_leave_rex64;
+      if (Pmode == DImode)
+	{
+	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
+	  ix86_gen_tls_local_dynamic_base_64
+	    = gen_tls_local_dynamic_base_64_di;
+	}
+      else
+	{
+	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
+	  ix86_gen_tls_local_dynamic_base_64
+	    = gen_tls_local_dynamic_base_64_si;
+	}
+    }
+  else
+    ix86_gen_leave = gen_leave;
+
+  if (Pmode == DImode)
+    {
+      ix86_gen_add3 = gen_adddi3;
+      ix86_gen_sub3 = gen_subdi3;
+      ix86_gen_sub3_carry = gen_subdi3_carry;
+      ix86_gen_one_cmpl2 = gen_one_cmpldi2;
+      ix86_gen_andsp = gen_anddi3;
+      ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
+      ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
+      ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
+      ix86_gen_monitor = gen_sse3_monitor_di;
+    }
+  else
+    {
+      ix86_gen_add3 = gen_addsi3;
+      ix86_gen_sub3 = gen_subsi3;
+      ix86_gen_sub3_carry = gen_subsi3_carry;
+      ix86_gen_one_cmpl2 = gen_one_cmplsi2;
+      ix86_gen_andsp = gen_andsi3;
+      ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
+      ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
+      ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
+      ix86_gen_monitor = gen_sse3_monitor_si;
+    }
+
+#ifdef USE_IX86_CLD
+  /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
+  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
+#endif
+
+  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
+    {
+      if (opts->x_flag_fentry > 0)
+        sorry ("-mfentry isn%'t supported for 32-bit in combination "
+	       "with -fpic");
+      opts->x_flag_fentry = 0;
+    }
+  else if (TARGET_SEH)
+    {
+      if (opts->x_flag_fentry == 0)
+	sorry ("-mno-fentry isn%'t compatible with SEH");
+      opts->x_flag_fentry = 1;
+    }
+  else if (opts->x_flag_fentry < 0)
+   {
+#if defined(PROFILE_BEFORE_PROLOGUE)
+     opts->x_flag_fentry = 1;
+#else
+     opts->x_flag_fentry = 0;
+#endif
+   }
+
+  /* When not opts->x_optimize for size, enable vzeroupper optimization for
+     TARGET_AVX with -fexpensive-optimizations and split 32-byte
+     AVX unaligned load/store.  */
+  if (!opts->x_optimize_size)
+    {
+      if (flag_expensive_optimizations
+	  && !(opts_set->x_target_flags & MASK_VZEROUPPER))
+	opts->x_target_flags |= MASK_VZEROUPPER;
+      if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
+	  && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+	opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+      if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
+	  && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+	opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+      /* Enable 128-bit AVX instruction generation
+         for the auto-vectorizer.  */
+      if (TARGET_AVX128_OPTIMAL
+	  && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
+	opts->x_target_flags |= MASK_PREFER_AVX128;
+    }
+
+  if (opts->x_ix86_recip_name)
+    {
+      char *p = ASTRDUP (opts->x_ix86_recip_name);
+      char *q;
+      unsigned int mask, i;
+      bool invert;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  p = NULL;
+	  if (*q == '!')
+	    {
+	      invert = true;
+	      q++;
+	    }
+	  else
+	    invert = false;
+
+	  if (!strcmp (q, "default"))
+	    mask = RECIP_MASK_ALL;
+	  else
+	    {
+	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+		if (!strcmp (q, recip_options[i].string))
+		  {
+		    mask = recip_options[i].mask;
+		    break;
+		  }
+
+	      if (i == ARRAY_SIZE (recip_options))
+		{
+		  error ("unknown option for -mrecip=%s", q);
+		  invert = false;
+		  mask = RECIP_MASK_NONE;
+		}
+	    }
+
+	  opts->x_recip_mask_explicit |= mask;
+	  if (invert)
+	    opts->x_recip_mask &= ~mask;
+	  else
+	    opts->x_recip_mask |= mask;
+	}
+    }
+
+  if (TARGET_RECIP_P (opts->x_target_flags))
+    opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
+  else if (opts_set->x_target_flags & MASK_RECIP)
+    opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
+
+  /* Default long double to 64-bit for 32-bit Bionic and to __float128
+     for 64-bit Bionic.  */
+  if (TARGET_HAS_BIONIC
+      && !(opts_set->x_target_flags
+	   & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
+    opts->x_target_flags |= (TARGET_64BIT
+			     ? MASK_LONG_DOUBLE_128
+			     : MASK_LONG_DOUBLE_64);
+
+  /* Only one of them can be active.  */
+  gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
+	      || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
+
+  /* Save the initial options in case the user does function specific
+     options.  */
+  if (main_args_p)
+    target_option_default_node = target_option_current_node
+      = build_target_option_node (opts);
+
+  /* Handle stack protector */
+  if (!opts_set->x_ix86_stack_protector_guard)
+    opts->x_ix86_stack_protector_guard
+      = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
+
+  /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
+  if (opts->x_ix86_tune_memcpy_strategy)
+    {
+      char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
+      ix86_parse_stringop_strategy_string (str, false);
+      free (str);
+    }
+
+  if (opts->x_ix86_tune_memset_strategy)
+    {
+      char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
+      ix86_parse_stringop_strategy_string (str, true);
+      free (str);
+    }
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook.  */
+
+static void
+ix86_option_override (void)
+{
+  opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
+  static struct register_pass_info insert_vzeroupper_info
+    = { pass_insert_vzeroupper, "reload",
+	1, PASS_POS_INSERT_AFTER
+      };
+
+  ix86_option_override_internal (true, &global_options, &global_options_set);
+
+
+  /* This needs to be done at start up.  It's convenient to do it here.  */
+  register_pass (&insert_vzeroupper_info);
+}
+
+/* Update register usage after having seen the compiler flags.  */
+
+static void
+ix86_conditional_register_usage (void)
+{
+  int i, c_mask;
+  unsigned int j;
+
+  /* The PIC register, if it exists, is fixed.  */
+  j = PIC_OFFSET_TABLE_REGNUM;
+  if (j != INVALID_REGNUM)
+    fixed_regs[j] = call_used_regs[j] = 1;
+
+  /* For 32-bit targets, squash the REX registers.  */
+  if (! TARGET_64BIT)
+    {
+      for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+    }
+
+  /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
+  c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
+	    : TARGET_64BIT ? (1 << 2)
+	    : (1 << 1));
+  
+  CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      /* Set/reset conditionally defined registers from
+	 CALL_USED_REGISTERS initializer.  */
+      if (call_used_regs[i] > 1)
+	call_used_regs[i] = !!(call_used_regs[i] & c_mask);
+
+      /* Calculate registers of CLOBBERED_REGS register set
+	 as call used registers from GENERAL_REGS register set.  */
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
+	  && call_used_regs[i])
+	SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
+    }
+
+  /* If MMX is disabled, squash the registers.  */
+  if (! TARGET_MMX)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If SSE is disabled, squash the registers.  */
+  if (! TARGET_SSE)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If the FPU is disabled, squash the registers.  */
+  if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If AVX512F is disabled, squash the registers.  */
+  if (! TARGET_AVX512F)
+    {
+      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+      for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+    }
+}
+
+
+/* Save the current options */
+
+static void
+ix86_function_specific_save (struct cl_target_option *ptr,
+			     struct gcc_options *opts)
+{
+  ptr->arch = ix86_arch;
+  ptr->schedule = ix86_schedule;
+  ptr->tune = ix86_tune;
+  ptr->branch_cost = ix86_branch_cost;
+  ptr->tune_defaulted = ix86_tune_defaulted;
+  ptr->arch_specified = ix86_arch_specified;
+  ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
+  ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
+  ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
+  ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
+  ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
+  ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
+  ptr->x_ix86_abi = opts->x_ix86_abi;
+  ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
+  ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
+  ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
+  ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
+  ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
+  ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
+  ptr->x_ix86_pmode = opts->x_ix86_pmode;
+  ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
+  ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
+  ptr->x_ix86_regparm = opts->x_ix86_regparm;
+  ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
+  ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
+  ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
+  ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
+  ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
+  ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
+  ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
+  ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
+  ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
+  ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
+
+  /* The fields are char but the variables are not; make sure the
+     values fit in the fields.  */
+  gcc_assert (ptr->arch == ix86_arch);
+  gcc_assert (ptr->schedule == ix86_schedule);
+  gcc_assert (ptr->tune == ix86_tune);
+  gcc_assert (ptr->branch_cost == ix86_branch_cost);
+}
+
+/* Restore the current options */
+
+static void
+ix86_function_specific_restore (struct gcc_options *opts,
+				struct cl_target_option *ptr)
+{
+  enum processor_type old_tune = ix86_tune;
+  enum processor_type old_arch = ix86_arch;
+  unsigned int ix86_arch_mask;
+  int i;
+
+  /* We don't change -fPIC.  */
+  opts->x_flag_pic = flag_pic;
+
+  ix86_arch = (enum processor_type) ptr->arch;
+  ix86_schedule = (enum attr_cpu) ptr->schedule;
+  ix86_tune = (enum processor_type) ptr->tune;
+  opts->x_ix86_branch_cost = ptr->branch_cost;
+  ix86_tune_defaulted = ptr->tune_defaulted;
+  ix86_arch_specified = ptr->arch_specified;
+  opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
+  opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
+  opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
+  opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
+  opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
+  opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
+  opts->x_ix86_abi = ptr->x_ix86_abi;
+  opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
+  opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
+  opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
+  opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
+  opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
+  opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
+  opts->x_ix86_pmode = ptr->x_ix86_pmode;
+  opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
+  opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
+  opts->x_ix86_regparm = ptr->x_ix86_regparm;
+  opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
+  opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
+  opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
+  opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
+  opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
+  opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
+  opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
+  opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
+  opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
+  opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
+
+  /* Recreate the arch feature tests if the arch changed */
+  if (old_arch != ix86_arch)
+    {
+      ix86_arch_mask = 1u << ix86_arch;
+      for (i = 0; i < X86_ARCH_LAST; ++i)
+	ix86_arch_features[i]
+	  = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
+    }
+
+  /* Recreate the tune optimization tests */
+  if (old_tune != ix86_tune)
+    set_ix86_tune_features (ix86_tune, false);
+}
+
+/* Print the current options */
+
+static void
+ix86_function_specific_print (FILE *file, int indent,
+			      struct cl_target_option *ptr)
+{
+  char *target_string
+    = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
+			  NULL, NULL, ptr->x_ix86_fpmath, false);
+
+  gcc_assert (ptr->arch < PROCESSOR_max);
+  fprintf (file, "%*sarch = %d (%s)\n",
+	   indent, "",
+	   ptr->arch, processor_target_table[ptr->arch].name);
+
+  gcc_assert (ptr->tune < PROCESSOR_max);
+  fprintf (file, "%*stune = %d (%s)\n",
+	   indent, "",
+	   ptr->tune, processor_target_table[ptr->tune].name);
+
+  fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
+
+  if (target_string)
+    {
+      fprintf (file, "%*s%s\n", indent, "", target_string);
+      free (target_string);
+    }
+}
+
+
+/* Inner function to process the attribute((target(...))), take an argument and
+   set the current options from the argument. If we have a list, recursively go
+   over the list.  */
+
+static bool
+ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
+				     struct gcc_options *opts,
+				     struct gcc_options *opts_set,
+				     struct gcc_options *enum_opts_set)
+{
+  char *next_optstr;
+  bool ret = true;
+
+#define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
+#define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
+#define IX86_ATTR_ENUM(S,O)  { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
+#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
+#define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
+
+  enum ix86_opt_type
+  {
+    ix86_opt_unknown,
+    ix86_opt_yes,
+    ix86_opt_no,
+    ix86_opt_str,
+    ix86_opt_enum,
+    ix86_opt_isa
+  };
+
+  static const struct
+  {
+    const char *string;
+    size_t len;
+    enum ix86_opt_type type;
+    int opt;
+    int mask;
+  } attrs[] = {
+    /* isa options */
+    IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
+    IX86_ATTR_ISA ("abm",	OPT_mabm),
+    IX86_ATTR_ISA ("bmi",	OPT_mbmi),
+    IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
+    IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
+    IX86_ATTR_ISA ("tbm",	OPT_mtbm),
+    IX86_ATTR_ISA ("aes",	OPT_maes),
+    IX86_ATTR_ISA ("sha",	OPT_msha),
+    IX86_ATTR_ISA ("avx",	OPT_mavx),
+    IX86_ATTR_ISA ("avx2",	OPT_mavx2),
+    IX86_ATTR_ISA ("avx512f",	OPT_mavx512f),
+    IX86_ATTR_ISA ("avx512pf",	OPT_mavx512pf),
+    IX86_ATTR_ISA ("avx512er",	OPT_mavx512er),
+    IX86_ATTR_ISA ("avx512cd",	OPT_mavx512cd),
+    IX86_ATTR_ISA ("mmx",	OPT_mmmx),
+    IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
+    IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
+    IX86_ATTR_ISA ("sse",	OPT_msse),
+    IX86_ATTR_ISA ("sse2",	OPT_msse2),
+    IX86_ATTR_ISA ("sse3",	OPT_msse3),
+    IX86_ATTR_ISA ("sse4",	OPT_msse4),
+    IX86_ATTR_ISA ("sse4.1",	OPT_msse4_1),
+    IX86_ATTR_ISA ("sse4.2",	OPT_msse4_2),
+    IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
+    IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
+    IX86_ATTR_ISA ("fma4",	OPT_mfma4),
+    IX86_ATTR_ISA ("fma",	OPT_mfma),
+    IX86_ATTR_ISA ("xop",	OPT_mxop),
+    IX86_ATTR_ISA ("lwp",	OPT_mlwp),
+    IX86_ATTR_ISA ("fsgsbase",	OPT_mfsgsbase),
+    IX86_ATTR_ISA ("rdrnd",	OPT_mrdrnd),
+    IX86_ATTR_ISA ("f16c",	OPT_mf16c),
+    IX86_ATTR_ISA ("rtm",	OPT_mrtm),
+    IX86_ATTR_ISA ("hle",	OPT_mhle),
+    IX86_ATTR_ISA ("prfchw",	OPT_mprfchw),
+    IX86_ATTR_ISA ("rdseed",	OPT_mrdseed),
+    IX86_ATTR_ISA ("adx",	OPT_madx),
+    IX86_ATTR_ISA ("fxsr",	OPT_mfxsr),
+    IX86_ATTR_ISA ("xsave",	OPT_mxsave),
+    IX86_ATTR_ISA ("xsaveopt",	OPT_mxsaveopt),
+    IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
+
+    /* enum options */
+    IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
+
+    /* string options */
+    IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
+    IX86_ATTR_STR ("tune=",	IX86_FUNCTION_SPECIFIC_TUNE),
+
+    /* flag options */
+    IX86_ATTR_YES ("cld",
+		   OPT_mcld,
+		   MASK_CLD),
+
+    IX86_ATTR_NO ("fancy-math-387",
+		  OPT_mfancy_math_387,
+		  MASK_NO_FANCY_MATH_387),
+
+    IX86_ATTR_YES ("ieee-fp",
+		   OPT_mieee_fp,
+		   MASK_IEEE_FP),
+
+    IX86_ATTR_YES ("inline-all-stringops",
+		   OPT_minline_all_stringops,
+		   MASK_INLINE_ALL_STRINGOPS),
+
+    IX86_ATTR_YES ("inline-stringops-dynamically",
+		   OPT_minline_stringops_dynamically,
+		   MASK_INLINE_STRINGOPS_DYNAMICALLY),
+
+    IX86_ATTR_NO ("align-stringops",
+		  OPT_mno_align_stringops,
+		  MASK_NO_ALIGN_STRINGOPS),
+
+    IX86_ATTR_YES ("recip",
+		   OPT_mrecip,
+		   MASK_RECIP),
+
+  };
+
+  /* If this is a list, recurse to get the options.  */
+  if (TREE_CODE (args) == TREE_LIST)
+    {
+      bool ret = true;
+
+      for (; args; args = TREE_CHAIN (args))
+	if (TREE_VALUE (args)
+	    && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
+						     p_strings, opts, opts_set,
+						     enum_opts_set))
+	  ret = false;
+
+      return ret;
+    }
+
+  else if (TREE_CODE (args) != STRING_CST)
+    {
+      error ("attribute %<target%> argument not a string");
+      return false;
+    }
+
+  /* Handle multiple arguments separated by commas.  */
+  next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
+
+  while (next_optstr && *next_optstr != '\0')
+    {
+      char *p = next_optstr;
+      char *orig_p = p;
+      char *comma = strchr (next_optstr, ',');
+      const char *opt_string;
+      size_t len, opt_len;
+      int opt;
+      bool opt_set_p;
+      char ch;
+      unsigned i;
+      enum ix86_opt_type type = ix86_opt_unknown;
+      int mask = 0;
+
+      if (comma)
+	{
+	  *comma = '\0';
+	  len = comma - next_optstr;
+	  next_optstr = comma + 1;
+	}
+      else
+	{
+	  len = strlen (p);
+	  next_optstr = NULL;
+	}
+
+      /* Recognize no-xxx.  */
+      if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
+	{
+	  opt_set_p = false;
+	  p += 3;
+	  len -= 3;
+	}
+      else
+	opt_set_p = true;
+
+      /* Find the option.  */
+      ch = *p;
+      opt = N_OPTS;
+      for (i = 0; i < ARRAY_SIZE (attrs); i++)
+	{
+	  type = attrs[i].type;
+	  opt_len = attrs[i].len;
+	  if (ch == attrs[i].string[0]
+	      && ((type != ix86_opt_str && type != ix86_opt_enum)
+		  ? len == opt_len
+		  : len > opt_len)
+	      && memcmp (p, attrs[i].string, opt_len) == 0)
+	    {
+	      opt = attrs[i].opt;
+	      mask = attrs[i].mask;
+	      opt_string = attrs[i].string;
+	      break;
+	    }
+	}
+
+      /* Process the option.  */
+      if (opt == N_OPTS)
+	{
+	  error ("attribute(target(\"%s\")) is unknown", orig_p);
+	  ret = false;
+	}
+
+      else if (type == ix86_opt_isa)
+	{
+	  struct cl_decoded_option decoded;
+
+	  generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
+	  ix86_handle_option (opts, opts_set,
+			      &decoded, input_location);
+	}
+
+      else if (type == ix86_opt_yes || type == ix86_opt_no)
+	{
+	  if (type == ix86_opt_no)
+	    opt_set_p = !opt_set_p;
+
+	  if (opt_set_p)
+	    opts->x_target_flags |= mask;
+	  else
+	    opts->x_target_flags &= ~mask;
+	}
+
+      else if (type == ix86_opt_str)
+	{
+	  if (p_strings[opt])
+	    {
+	      error ("option(\"%s\") was already specified", opt_string);
+	      ret = false;
+	    }
+	  else
+	    p_strings[opt] = xstrdup (p + opt_len);
+	}
+
+      else if (type == ix86_opt_enum)
+	{
+	  bool arg_ok;
+	  int value;
+
+	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
+	  if (arg_ok)
+	    set_option (opts, enum_opts_set, opt, value,
+			p + opt_len, DK_UNSPECIFIED, input_location,
+			global_dc);
+	  else
+	    {
+	      error ("attribute(target(\"%s\")) is unknown", orig_p);
+	      ret = false;
+	    }
+	}
+
+      else
+	gcc_unreachable ();
+    }
+
+  return ret;
+}
+
+/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
+
+tree
+ix86_valid_target_attribute_tree (tree args,
+				  struct gcc_options *opts,
+				  struct gcc_options *opts_set)
+{
+  const char *orig_arch_string = opts->x_ix86_arch_string;
+  const char *orig_tune_string = opts->x_ix86_tune_string;
+  enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
+  int orig_tune_defaulted = ix86_tune_defaulted;
+  int orig_arch_specified = ix86_arch_specified;
+  char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
+  tree t = NULL_TREE;
+  int i;
+  struct cl_target_option *def
+    = TREE_TARGET_OPTION (target_option_default_node);
+  struct gcc_options enum_opts_set;
+
+  memset (&enum_opts_set, 0, sizeof (enum_opts_set));
+
+  /* Process each of the options on the chain.  */
+  if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
+					     opts_set, &enum_opts_set))
+    return error_mark_node;
+
+  /* If the changed options are different from the default, rerun
+     ix86_option_override_internal, and then save the options away.
+     The string options are are attribute options, and will be undone
+     when we copy the save structure.  */
+  if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
+      || opts->x_target_flags != def->x_target_flags
+      || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
+      || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
+      || enum_opts_set.x_ix86_fpmath)
+    {
+      /* If we are using the default tune= or arch=, undo the string assigned,
+	 and use the default.  */
+      if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
+	opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
+      else if (!orig_arch_specified)
+	opts->x_ix86_arch_string = NULL;
+
+      if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
+	opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
+      else if (orig_tune_defaulted)
+	opts->x_ix86_tune_string = NULL;
+
+      /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
+      if (enum_opts_set.x_ix86_fpmath)
+	opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
+      else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	       && TARGET_SSE_P (opts->x_ix86_isa_flags))
+	{
+	  opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
+	  opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
+	}
+
+      /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
+      ix86_option_override_internal (false, opts, opts_set);
+
+      /* Add any builtin functions with the new isa if any.  */
+      ix86_add_new_builtins (opts->x_ix86_isa_flags);
+
+      /* Save the current options unless we are validating options for
+	 #pragma.  */
+      t = build_target_option_node (opts);
+
+      opts->x_ix86_arch_string = orig_arch_string;
+      opts->x_ix86_tune_string = orig_tune_string;
+      opts_set->x_ix86_fpmath = orig_fpmath_set;
+
+      /* Free up memory allocated to hold the strings */
+      for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
+	free (option_strings[i]);
+    }
+
+  return t;
+}
+
+/* Hook to validate attribute((target("string"))).  */
+
+static bool
+ix86_valid_target_attribute_p (tree fndecl,
+			       tree ARG_UNUSED (name),
+			       tree args,
+			       int ARG_UNUSED (flags))
+{
+  struct gcc_options func_options;
+  tree new_target, new_optimize;
+  bool ret = true;
+
+  /* attribute((target("default"))) does nothing, beyond
+     affecting multi-versioning.  */
+  if (TREE_VALUE (args)
+      && TREE_CODE (TREE_VALUE (args)) == STRING_CST
+      && TREE_CHAIN (args) == NULL_TREE
+      && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
+    return true;
+
+  tree old_optimize = build_optimization_node (&global_options);
+
+  /* Get the optimization options of the current function.  */  
+  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+ 
+  if (!func_optimize)
+    func_optimize = old_optimize;
+
+  /* Init func_options.  */
+  memset (&func_options, 0, sizeof (func_options));
+  init_options_struct (&func_options, NULL);
+  lang_hooks.init_options_struct (&func_options);
+ 
+  cl_optimization_restore (&func_options,
+			   TREE_OPTIMIZATION (func_optimize));
+
+  /* Initialize func_options to the default before its target options can
+     be set.  */
+  cl_target_option_restore (&func_options,
+			    TREE_TARGET_OPTION (target_option_default_node));
+
+  new_target = ix86_valid_target_attribute_tree (args, &func_options,
+						 &global_options_set);
+
+  new_optimize = build_optimization_node (&func_options);
+
+  if (new_target == error_mark_node)
+    ret = false;
+
+  else if (fndecl && new_target)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+      if (old_optimize != new_optimize)
+	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+    }
+
+  return ret;
+}
+
+
+/* Hook to determine if one function can safely inline another.  */
+
+static bool
+ix86_can_inline_p (tree caller, tree callee)
+{
+  bool ret = false;
+  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+  /* If callee has no option attributes, then it is ok to inline.  */
+  if (!callee_tree)
+    ret = true;
+
+  /* If caller has no option attributes, but callee does then it is not ok to
+     inline.  */
+  else if (!caller_tree)
+    ret = false;
+
+  else
+    {
+      struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+
+      /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
+	 can inline a SSE2 function but a SSE2 function can't inline a SSE4
+	 function.  */
+      if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
+	  != callee_opts->x_ix86_isa_flags)
+	ret = false;
+
+      /* See if we have the same non-isa options.  */
+      else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
+	ret = false;
+
+      /* See if arch, tune, etc. are the same.  */
+      else if (caller_opts->arch != callee_opts->arch)
+	ret = false;
+
+      else if (caller_opts->tune != callee_opts->tune)
+	ret = false;
+
+      else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
+	ret = false;
+
+      else if (caller_opts->branch_cost != callee_opts->branch_cost)
+	ret = false;
+
+      else
+	ret = true;
+    }
+
+  return ret;
+}
+
+
+/* Remember the last target of ix86_set_current_function.  */
+static GTY(()) tree ix86_previous_fndecl;
+
+/* Invalidate ix86_previous_fndecl cache.  */
+void
+ix86_reset_previous_fndecl (void)
+{
+  ix86_previous_fndecl = NULL_TREE;
+}
+
+/* Establish appropriate back-end context for processing the function
+   FNDECL.  The argument might be NULL to indicate processing at top
+   level, outside of any function scope.  */
+static void
+ix86_set_current_function (tree fndecl)
+{
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want to
+     slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl && fndecl != ix86_previous_fndecl)
+    {
+      tree old_tree = (ix86_previous_fndecl
+		       ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
+		       : NULL_TREE);
+
+      tree new_tree = (fndecl
+		       ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+		       : NULL_TREE);
+
+      ix86_previous_fndecl = fndecl;
+      if (old_tree == new_tree)
+	;
+
+      else if (new_tree)
+	{
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  if (TREE_TARGET_GLOBALS (new_tree))
+	    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
+	  else
+	    TREE_TARGET_GLOBALS (new_tree)
+	      = save_target_globals_default_opts ();
+	}
+
+      else if (old_tree)
+	{
+	  new_tree = target_option_current_node;
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  if (TREE_TARGET_GLOBALS (new_tree))
+	    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
+	  else if (new_tree == target_option_default_node)
+	    restore_target_globals (&default_target_globals);
+	  else
+	    TREE_TARGET_GLOBALS (new_tree)
+	      = save_target_globals_default_opts ();
+	}
+    }
+}
+
+
+/* Return true if this goes in large data/bss.  */
+
+static bool
+ix86_in_large_data_p (tree exp)
+{
+  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
+    return false;
+
+  /* Functions are never large data.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+      if (strcmp (section, ".ldata") == 0
+	  || strcmp (section, ".lbss") == 0)
+	return true;
+      return false;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in data because it might be too big when completed.  */
+      if (!size || size > ix86_section_threshold)
+	return true;
+    }
+
+  return false;
+}
+
+/* Switch to the appropriate section for output of DECL.
+   DECL is either a `VAR_DECL' node or a constant of some sort.
+   RELOC indicates whether forming the initial value of DECL requires
+   link-time relocations.  */
+
+ATTRIBUTE_UNUSED static section *
+x86_64_elf_select_section (tree decl, int reloc,
+			   unsigned HOST_WIDE_INT align)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && ix86_in_large_data_p (decl))
+    {
+      const char *sname = NULL;
+      unsigned int flags = SECTION_WRITE;
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_DATA:
+	  sname = ".ldata";
+	  break;
+	case SECCAT_DATA_REL:
+	  sname = ".ldata.rel";
+	  break;
+	case SECCAT_DATA_REL_LOCAL:
+	  sname = ".ldata.rel.local";
+	  break;
+	case SECCAT_DATA_REL_RO:
+	  sname = ".ldata.rel.ro";
+	  break;
+	case SECCAT_DATA_REL_RO_LOCAL:
+	  sname = ".ldata.rel.ro.local";
+	  break;
+	case SECCAT_BSS:
+	  sname = ".lbss";
+	  flags |= SECTION_BSS;
+	  break;
+	case SECCAT_RODATA:
+	case SECCAT_RODATA_MERGE_STR:
+	case SECCAT_RODATA_MERGE_STR_INIT:
+	case SECCAT_RODATA_MERGE_CONST:
+	  sname = ".lrodata";
+	  flags = 0;
+	  break;
+	case SECCAT_SRODATA:
+	case SECCAT_SDATA:
+	case SECCAT_SBSS:
+	  gcc_unreachable ();
+	case SECCAT_TEXT:
+	case SECCAT_TDATA:
+	case SECCAT_TBSS:
+	  /* We don't split these for medium model.  Place them into
+	     default sections and hope for best.  */
+	  break;
+	}
+      if (sname)
+	{
+	  /* We might get called with string constants, but get_named_section
+	     doesn't like them as they are not DECLs.  Also, we need to set
+	     flags in that case.  */
+	  if (!DECL_P (decl))
+	    return get_section (sname, flags, NULL);
+	  return get_named_section (decl, sname, reloc);
+	}
+    }
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Select a set of attributes for section NAME based on the properties
+   of DECL and whether or not RELOC indicates that DECL's initializer
+   might contain runtime relocations.  */
+
+static unsigned int ATTRIBUTE_UNUSED
+x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (decl == NULL_TREE
+      && (strcmp (name, ".ldata.rel.ro") == 0
+	  || strcmp (name, ".ldata.rel.ro.local") == 0))
+    flags |= SECTION_RELRO;
+
+  if (strcmp (name, ".lbss") == 0
+      || strncmp (name, ".lbss.", 5) == 0
+      || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
+    flags |= SECTION_BSS;
+
+  return flags;
+}
+
+/* Build up a unique section name, expressed as a
+   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+   RELOC indicates whether the initial value of EXP requires
+   link-time relocations.  */
+
+static void ATTRIBUTE_UNUSED
+x86_64_elf_unique_section (tree decl, int reloc)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && ix86_in_large_data_p (decl))
+    {
+      const char *prefix = NULL;
+      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
+      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
+
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_DATA:
+	case SECCAT_DATA_REL:
+	case SECCAT_DATA_REL_LOCAL:
+	case SECCAT_DATA_REL_RO:
+	case SECCAT_DATA_REL_RO_LOCAL:
+          prefix = one_only ? ".ld" : ".ldata";
+	  break;
+	case SECCAT_BSS:
+          prefix = one_only ? ".lb" : ".lbss";
+	  break;
+	case SECCAT_RODATA:
+	case SECCAT_RODATA_MERGE_STR:
+	case SECCAT_RODATA_MERGE_STR_INIT:
+	case SECCAT_RODATA_MERGE_CONST:
+          prefix = one_only ? ".lr" : ".lrodata";
+	  break;
+	case SECCAT_SRODATA:
+	case SECCAT_SDATA:
+	case SECCAT_SBSS:
+	  gcc_unreachable ();
+	case SECCAT_TEXT:
+	case SECCAT_TDATA:
+	case SECCAT_TBSS:
+	  /* We don't split these for medium model.  Place them into
+	     default sections and hope for best.  */
+	  break;
+	}
+      if (prefix)
+	{
+	  const char *name, *linkonce;
+	  char *string;
+
+	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+	  name = targetm.strip_name_encoding (name);
+
+	  /* If we're using one_only, then there needs to be a .gnu.linkonce
+     	     prefix to the section name.  */
+	  linkonce = one_only ? ".gnu.linkonce" : "";
+
+	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
+
+	  DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
+	  return;
+	}
+    }
+  default_unique_section (decl, reloc);
+}
+
+#ifdef COMMON_ASM_OP
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.
+
+   For medium model x86-64 we need to use .largecomm opcode for
+   large objects.  */
+void
+x86_elf_aligned_common (FILE *file,
+			const char *name, unsigned HOST_WIDE_INT size,
+			int align)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && size > (unsigned int)ix86_section_threshold)
+    fputs (".largecomm\t", file);
+  else
+    fputs (COMMON_ASM_OP, file);
+  assemble_name (file, name);
+  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
+	   size, align / BITS_PER_UNIT);
+}
+#endif
+
+/* Utility function for targets to use in implementing
+   ASM_OUTPUT_ALIGNED_BSS.  */
+
+void
+x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
+			const char *name, unsigned HOST_WIDE_INT size,
+			int align)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && size > (unsigned int)ix86_section_threshold)
+    switch_to_section (get_named_section (decl, ".lbss", 0));
+  else
+    switch_to_section (bss_section);
+  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+  last_assemble_variable_decl = decl;
+  ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+  /* Standard thing is just output label for the object.  */
+  ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+  ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+
+/* Decide whether we must probe the stack before any space allocation
+   on this target.  It's essentially TARGET_STACK_PROBE except when
+   -fstack-check causes the stack to be already probed differently.  */
+
+bool
+ix86_target_stack_probe (void)
+{
+  /* Do not probe the stack twice if static stack checking is enabled.  */
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    return false;
+
+  return TARGET_STACK_PROBE;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+
+static bool
+ix86_function_ok_for_sibcall (tree decl, tree exp)
+{
+  tree type, decl_or_type;
+  rtx a, b;
+
+  /* If we are generating position-independent code, we cannot sibcall
+     optimize any indirect call, or a direct call to a global function,
+     as the PLT requires %ebx be live. (Darwin does not have a PLT.)  */
+  if (!TARGET_MACHO
+      && !TARGET_64BIT
+      && flag_pic
+      && (!decl || !targetm.binds_local_p (decl)))
+    return false;
+
+  /* If we need to align the outgoing stack, then sibcalling would
+     unalign the stack, which may break the called function.  */
+  if (ix86_minimum_incoming_stack_boundary (true)
+      < PREFERRED_STACK_BOUNDARY)
+    return false;
+
+  if (decl)
+    {
+      decl_or_type = decl;
+      type = TREE_TYPE (decl);
+    }
+  else
+    {
+      /* We're looking at the CALL_EXPR, we need the type of the function.  */
+      type = CALL_EXPR_FN (exp);		/* pointer expression */
+      type = TREE_TYPE (type);			/* pointer type */
+      type = TREE_TYPE (type);			/* function type */
+      decl_or_type = type;
+    }
+
+  /* Check that the return value locations are the same.  Like
+     if we are returning floats on the 80387 register stack, we cannot
+     make a sibcall from a function that doesn't return a float to a
+     function that does or, conversely, from a function that does return
+     a float to a function that doesn't; the necessary stack adjustment
+     would not be executed.  This is also the place we notice
+     differences in the return value ABI.  Note that it is ok for one
+     of the functions to have void return type as long as the return
+     value of the other is passed in a register.  */
+  a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
+  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+			   cfun->decl, false);
+  if (STACK_REG_P (a) || STACK_REG_P (b))
+    {
+      if (!rtx_equal_p (a, b))
+	return false;
+    }
+  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    ;
+  else if (!rtx_equal_p (a, b))
+    return false;
+
+  if (TARGET_64BIT)
+    {
+      /* The SYSV ABI has more call-clobbered registers;
+	 disallow sibcalls from MS to SYSV.  */
+      if (cfun->machine->call_abi == MS_ABI
+	  && ix86_function_type_abi (type) == SYSV_ABI)
+	return false;
+    }
+  else
+    {
+      /* If this call is indirect, we'll need to be able to use a
+	 call-clobbered register for the address of the target function.
+	 Make sure that all such registers are not used for passing
+	 parameters.  Note that DLLIMPORT functions are indirect.  */
+      if (!decl
+	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
+	{
+	  if (ix86_function_regparm (type, NULL) >= 3)
+	    {
+	      /* ??? Need to count the actual number of registers to be used,
+		 not the possible number of registers.  Fix later.  */
+	      return false;
+	    }
+	}
+    }
+
+  /* Otherwise okay.  That also includes certain types of indirect calls.  */
+  return true;
+}
+
+/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
+   and "sseregparm" calling convention attributes;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+ix86_handle_cconv_attribute (tree *node, tree name,
+				   tree args,
+				   int flags ATTRIBUTE_UNUSED,
+				   bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  /* Can combine regparm with all attributes but fastcall, and thiscall.  */
+  if (is_attribute_p ("regparm", name))
+    {
+      tree cst;
+
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and regparm attributes are not compatible");
+	}
+
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("regparam and thiscall attributes are not compatible");
+	}
+
+      cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != INTEGER_CST)
+	{
+	  warning (OPT_Wattributes,
+		   "%qE attribute requires an integer constant argument",
+		   name);
+	  *no_add_attrs = true;
+	}
+      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
+	{
+	  warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
+		   name, REGPARM_MAX);
+	  *no_add_attrs = true;
+	}
+
+      return NULL_TREE;
+    }
+
+  if (TARGET_64BIT)
+    {
+      /* Do not warn when emulating the MS ABI.  */
+      if ((TREE_CODE (*node) != FUNCTION_TYPE
+	   && TREE_CODE (*node) != METHOD_TYPE)
+	  || ix86_function_type_abi (*node) != MS_ABI)
+	warning (OPT_Wattributes, "%qE attribute ignored",
+	         name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
+  if (is_attribute_p ("fastcall", name))
+    {
+      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and stdcall attributes are not compatible");
+	}
+      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and regparm attributes are not compatible");
+	}
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("fastcall and thiscall attributes are not compatible");
+	}
+    }
+
+  /* Can combine stdcall with fastcall (redundant), regparm and
+     sseregparm.  */
+  else if (is_attribute_p ("stdcall", name))
+    {
+      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("stdcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("stdcall and fastcall attributes are not compatible");
+	}
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("stdcall and thiscall attributes are not compatible");
+	}
+    }
+
+  /* Can combine cdecl with regparm and sseregparm.  */
+  else if (is_attribute_p ("cdecl", name))
+    {
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("stdcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("cdecl and thiscall attributes are not compatible");
+	}
+    }
+  else if (is_attribute_p ("thiscall", name))
+    {
+      if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
+	warning (OPT_Wattributes, "%qE attribute is used for none class-method",
+	         name);
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("stdcall and thiscall attributes are not compatible");
+	}
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("fastcall and thiscall attributes are not compatible");
+	}
+      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("cdecl and thiscall attributes are not compatible");
+	}
+    }
+
+  /* Can combine sseregparm with all attributes.  */
+
+  return NULL_TREE;
+}
+
+/* The transactional memory builtins are implicitly regparm or fastcall
+   depending on the ABI.  Override the generic do-nothing attribute that
+   these builtins were declared with, and replace it with one of the two
+   attributes that we expect elsewhere.  */
+
+static tree
+ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
+    				  tree args ATTRIBUTE_UNUSED,
+				  int flags, bool *no_add_attrs)
+{
+  tree alt;
+
+  /* In no case do we want to add the placeholder attribute.  */
+  *no_add_attrs = true;
+
+  /* The 64-bit ABI is unchanged for transactional memory.  */
+  if (TARGET_64BIT)
+    return NULL_TREE;
+
+  /* ??? Is there a better way to validate 32-bit windows?  We have
+     cfun->machine->call_abi, but that seems to be set only for 64-bit.  */
+  if (CHECK_STACK_LIMIT > 0)
+    alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
+  else
+    {
+      alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
+      alt = tree_cons (get_identifier ("regparm"), alt, NULL);
+    }
+  decl_attributes (node, alt, flags);
+
+  return NULL_TREE;
+}
+
+/* This function determines from TYPE the calling-convention.  */
+
+unsigned int
+ix86_get_callcvt (const_tree type)
+{
+  unsigned int ret = 0;
+  bool is_stdarg;
+  tree attrs;
+
+  if (TARGET_64BIT)
+    return IX86_CALLCVT_CDECL;
+
+  attrs = TYPE_ATTRIBUTES (type);
+  if (attrs != NULL_TREE)
+    {
+      if (lookup_attribute ("cdecl", attrs))
+	ret |= IX86_CALLCVT_CDECL;
+      else if (lookup_attribute ("stdcall", attrs))
+	ret |= IX86_CALLCVT_STDCALL;
+      else if (lookup_attribute ("fastcall", attrs))
+	ret |= IX86_CALLCVT_FASTCALL;
+      else if (lookup_attribute ("thiscall", attrs))
+	ret |= IX86_CALLCVT_THISCALL;
+
+      /* Regparam isn't allowed for thiscall and fastcall.  */
+      if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
+	{
+	  if (lookup_attribute ("regparm", attrs))
+	    ret |= IX86_CALLCVT_REGPARM;
+	  if (lookup_attribute ("sseregparm", attrs))
+	    ret |= IX86_CALLCVT_SSEREGPARM;
+	}
+
+      if (IX86_BASE_CALLCVT(ret) != 0)
+	return ret;
+    }
+
+  is_stdarg = stdarg_p (type);
+  if (TARGET_RTD && !is_stdarg)
+    return IX86_CALLCVT_STDCALL | ret;
+
+  if (ret != 0
+      || is_stdarg
+      || TREE_CODE (type) != METHOD_TYPE
+      || ix86_function_type_abi (type) != MS_ABI)
+    return IX86_CALLCVT_CDECL | ret;
+
+  return IX86_CALLCVT_THISCALL;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible, and 2 if they are nearly compatible (which causes a
+   warning to be generated).  */
+
+static int
+ix86_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  unsigned int ccvt1, ccvt2;
+
+  if (TREE_CODE (type1) != FUNCTION_TYPE
+      && TREE_CODE (type1) != METHOD_TYPE)
+    return 1;
+
+  ccvt1 = ix86_get_callcvt (type1);
+  ccvt2 = ix86_get_callcvt (type2);
+  if (ccvt1 != ccvt2)
+    return 0;
+  if (ix86_function_regparm (type1, NULL)
+      != ix86_function_regparm (type2, NULL))
+    return 0;
+
+  return 1;
+}
+
+/* Return the regparm value for a function with the indicated TYPE and DECL.
+   DECL may be NULL when calling function indirectly
+   or considering a libcall.  */
+
+static int
+ix86_function_regparm (const_tree type, const_tree decl)
+{
+  tree attr;
+  int regparm;
+  unsigned int ccvt;
+
+  if (TARGET_64BIT)
+    return (ix86_function_type_abi (type) == SYSV_ABI
+	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
+  ccvt = ix86_get_callcvt (type);
+  regparm = ix86_regparm;
+
+  if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
+    {
+      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+      if (attr)
+	{
+	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+	  return regparm;
+	}
+    }
+  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+    return 2;
+  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+    return 1;
+
+  /* Use register calling convention for local functions when possible.  */
+  if (decl
+      && TREE_CODE (decl) == FUNCTION_DECL
+      /* Caller and callee must agree on the calling convention, so
+	 checking here just optimize means that with
+	 __attribute__((optimize (...))) caller could use regparm convention
+	 and callee not, or vice versa.  Instead look at whether the callee
+	 is optimized or not.  */
+      && opt_for_fn (decl, optimize)
+      && !(profile_flag && !flag_fentry))
+    {
+      /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
+      struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
+      if (i && i->local && i->can_change_signature)
+	{
+	  int local_regparm, globals = 0, regno;
+
+	  /* Make sure no regparm register is taken by a
+	     fixed register variable.  */
+	  for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
+	    if (fixed_regs[local_regparm])
+	      break;
+
+	  /* We don't want to use regparm(3) for nested functions as
+	     these use a static chain pointer in the third argument.  */
+	  if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
+	    local_regparm = 2;
+
+	  /* In 32-bit mode save a register for the split stack.  */
+	  if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
+	    local_regparm = 2;
+
+	  /* Each fixed register usage increases register pressure,
+	     so less registers should be used for argument passing.
+	     This functionality can be overriden by an explicit
+	     regparm value.  */
+	  for (regno = AX_REG; regno <= DI_REG; regno++)
+	    if (fixed_regs[regno])
+	      globals++;
+
+	  local_regparm
+	    = globals < local_regparm ? local_regparm - globals : 0;
+
+	  if (local_regparm > regparm)
+	    regparm = local_regparm;
+	}
+    }
+
+  return regparm;
+}
+
+/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
+   DFmode (2) arguments in SSE registers for a function with the
+   indicated TYPE and DECL.  DECL may be NULL when calling function
+   indirectly or considering a libcall.  Otherwise return 0.  */
+
+static int
+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
+{
+  gcc_assert (!TARGET_64BIT);
+
+  /* Use SSE registers to pass SFmode and DFmode arguments if requested
+     by the sseregparm attribute.  */
+  if (TARGET_SSEREGPARM
+      || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
+    {
+      if (!TARGET_SSE)
+	{
+	  if (warn)
+	    {
+	      if (decl)
+		error ("calling %qD with attribute sseregparm without "
+		       "SSE/SSE2 enabled", decl);
+	      else
+		error ("calling %qT with attribute sseregparm without "
+		       "SSE/SSE2 enabled", type);
+	    }
+	  return 0;
+	}
+
+      return 2;
+    }
+
+  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
+     (and DFmode for SSE2) arguments in SSE registers.  */
+  if (decl && TARGET_SSE_MATH && optimize
+      && !(profile_flag && !flag_fentry))
+    {
+      /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
+      struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+      if (i && i->local && i->can_change_signature)
+	return TARGET_SSE2 ? 2 : 1;
+    }
+
+  return 0;
+}
+
+/* Return true if EAX is live at the start of the function.  Used by
+   ix86_expand_prologue to determine if we need special help before
+   calling allocate_stack_worker.  */
+
+static bool
+ix86_eax_live_at_start_p (void)
+{
+  /* Cheat.  Don't bother working forward from ix86_function_regparm
+     to the function type to whether an actual argument is located in
+     eax.  Instead just look at cfg info, which is still close enough
+     to correct at this point.  This gives false positives for broken
+     functions that might use uninitialized data that happens to be
+     allocated in eax, but who cares?  */
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
+}
+
+static bool
+ix86_keep_aggregate_return_pointer (tree fntype)
+{
+  tree attr;
+
+  if (!TARGET_64BIT)
+    {
+      attr = lookup_attribute ("callee_pop_aggregate_return",
+			       TYPE_ATTRIBUTES (fntype));
+      if (attr)
+	return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
+
+      /* For 32-bit MS-ABI the default is to keep aggregate
+         return pointer.  */
+      if (ix86_function_type_abi (fntype) == MS_ABI)
+	return true;
+    }
+  return KEEP_AGGREGATE_RETURN_POINTER != 0;
+}
+
+/* Value is the number of bytes of arguments automatically
+   popped when returning from a subroutine call.
+   FUNDECL is the declaration node of the function (as a tree),
+   FUNTYPE is the data type of the function (as a tree),
+   or for a library call it is an identifier node for the subroutine name.
+   SIZE is the number of bytes of arguments passed on the stack.
+
+   On the 80386, the RTD insn may be used to pop them if the number
+     of args is fixed, but if the number is variable then the caller
+     must pop them all.  RTD can't be used for library calls now
+     because the library is compiled with the Unix compiler.
+   Use of RTD is a selectable option, since it is incompatible with
+   standard Unix calling sequences.  If the option is not selected,
+   the caller must always pop the args.
+
+   The attribute stdcall is equivalent to RTD on a per module basis.  */
+
+static int
+ix86_return_pops_args (tree fundecl, tree funtype, int size)
+{
+  unsigned int ccvt;
+
+  /* None of the 64-bit ABIs pop arguments.  */
+  if (TARGET_64BIT)
+    return 0;
+
+  ccvt = ix86_get_callcvt (funtype);
+
+  if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
+	       | IX86_CALLCVT_THISCALL)) != 0
+      && ! stdarg_p (funtype))
+    return size;
+
+  /* Lose any fake structure return argument if it is passed on the stack.  */
+  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
+      && !ix86_keep_aggregate_return_pointer (funtype))
+    {
+      int nregs = ix86_function_regparm (funtype, fundecl);
+      if (nregs == 0)
+	return GET_MODE_SIZE (Pmode);
+    }
+
+  return 0;
+}
+
+/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
+
+static bool
+ix86_legitimate_combined_insn (rtx insn)
+{
+  /* Check operand constraints in case hard registers were propagated
+     into insn pattern.  This check prevents combine pass from
+     generating insn patterns with invalid hard register operands.
+     These invalid insns can eventually confuse reload to error out
+     with a spill failure.  See also PRs 46829 and 46843.  */
+  if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
+    {
+      int i;
+
+      extract_insn (insn);
+      preprocess_constraints ();
+
+      for (i = 0; i < recog_data.n_operands; i++)
+	{
+	  rtx op = recog_data.operand[i];
+	  enum machine_mode mode = GET_MODE (op);
+	  struct operand_alternative *op_alt;
+	  int offset = 0;
+	  bool win;
+	  int j;
+
+	  /* For pre-AVX disallow unaligned loads/stores where the
+	     instructions don't support it.  */
+	  if (!TARGET_AVX
+	      && VECTOR_MODE_P (GET_MODE (op))
+	      && misaligned_operand (op, GET_MODE (op)))
+	    {
+	      int min_align = get_attr_ssememalign (insn);
+	      if (min_align == 0)
+		return false;
+	    }
+
+	  /* A unary operator may be accepted by the predicate, but it
+	     is irrelevant for matching constraints.  */
+	  if (UNARY_P (op))
+	    op = XEXP (op, 0);
+
+	  if (GET_CODE (op) == SUBREG)
+	    {
+	      if (REG_P (SUBREG_REG (op))
+		  && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
+		offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
+					      GET_MODE (SUBREG_REG (op)),
+					      SUBREG_BYTE (op),
+					      GET_MODE (op));
+	      op = SUBREG_REG (op);
+	    }
+
+	  if (!(REG_P (op) && HARD_REGISTER_P (op)))
+	    continue;
+
+	  op_alt = recog_op_alt[i];
+
+	  /* Operand has no constraints, anything is OK.  */
+ 	  win = !recog_data.n_alternatives;
+
+	  for (j = 0; j < recog_data.n_alternatives; j++)
+	    {
+	      if (op_alt[j].anything_ok
+		  || (op_alt[j].matches != -1
+		      && operands_match_p
+			  (recog_data.operand[i],
+			   recog_data.operand[op_alt[j].matches]))
+		  || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
+		{
+		  win = true;
+		  break;
+		}
+	    }
+
+	  if (!win)
+	    return false;
+	}
+    }
+
+  return true;
+}
+
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
+
+static unsigned HOST_WIDE_INT
+ix86_asan_shadow_offset (void)
+{
+  return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
+				     : HOST_WIDE_INT_C (0x7fff8000))
+		     : (HOST_WIDE_INT_1 << 29);
+}
+
+/* Argument support functions.  */
+
+/* Return true when register may be used to pass function parameters.  */
+bool
+ix86_function_arg_regno_p (int regno)
+{
+  int i;
+  const int *parm_regs;
+
+  if (!TARGET_64BIT)
+    {
+      if (TARGET_MACHO)
+        return (regno < REGPARM_MAX
+                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
+      else
+        return (regno < REGPARM_MAX
+	        || (TARGET_MMX && MMX_REGNO_P (regno)
+	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
+	        || (TARGET_SSE && SSE_REGNO_P (regno)
+		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
+    }
+
+  if (TARGET_SSE && SSE_REGNO_P (regno)
+      && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
+    return true;
+
+  /* TODO: The function should depend on current function ABI but
+     builtins.c would need updating then. Therefore we use the
+     default ABI.  */
+
+  /* RAX is used as hidden argument to va_arg functions.  */
+  if (ix86_abi == SYSV_ABI && regno == AX_REG)
+    return true;
+
+  if (ix86_abi == MS_ABI)
+    parm_regs = x86_64_ms_abi_int_parameter_registers;
+  else
+    parm_regs = x86_64_int_parameter_registers;
+  for (i = 0; i < (ix86_abi == MS_ABI
+		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
+    if (regno == parm_regs[i])
+      return true;
+  return false;
+}
+
+/* Return if we do not know how to pass TYPE solely in registers.  */
+
+static bool
+ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (must_pass_in_stack_var_size_or_pad (mode, type))
+    return true;
+
+  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
+     The layout_type routine is crafty and tries to trick us into passing
+     currently unsupported vector types on the stack by using TImode.  */
+  return (!TARGET_64BIT && mode == TImode
+	  && type && TREE_CODE (type) != VECTOR_TYPE);
+}
+
+/* It returns the size, in bytes, of the area reserved for arguments passed
+   in registers for the function represented by fndecl dependent to the used
+   abi format.  */
+int
+ix86_reg_parm_stack_space (const_tree fndecl)
+{
+  enum calling_abi call_abi = SYSV_ABI;
+  if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
+    call_abi = ix86_function_abi (fndecl);
+  else
+    call_abi = ix86_function_type_abi (fndecl);
+  if (TARGET_64BIT && call_abi == MS_ABI)
+    return 32;
+  return 0;
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
+   call abi used.  */
+enum calling_abi
+ix86_function_type_abi (const_tree fntype)
+{
+  if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
+    {
+      enum calling_abi abi = ix86_abi;
+      if (abi == SYSV_ABI)
+	{
+	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
+	    abi = MS_ABI;
+	}
+      else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
+	abi = SYSV_ABI;
+      return abi;
+    }
+  return ix86_abi;
+}
+
+/* We add this as a workaround in order to use libc_has_function
+   hook in i386.md.  */
+bool
+ix86_libc_has_function (enum function_class fn_class)
+{
+  return targetm.libc_has_function (fn_class);
+}
+
+static bool
+ix86_function_ms_hook_prologue (const_tree fn)
+{
+  if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
+    {
+      if (decl_function_context (fn) != NULL_TREE)
+	error_at (DECL_SOURCE_LOCATION (fn),
+		  "ms_hook_prologue is not compatible with nested function");
+      else
+        return true;
+    }
+  return false;
+}
+
+static enum calling_abi
+ix86_function_abi (const_tree fndecl)
+{
+  if (! fndecl)
+    return ix86_abi;
+  return ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
+   call abi used.  */
+enum calling_abi
+ix86_cfun_abi (void)
+{
+  if (! cfun)
+    return ix86_abi;
+  return cfun->machine->call_abi;
+}
+
+/* Write the extra assembler code needed to declare a function properly.  */
+
+void
+ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
+				tree decl)
+{
+  bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
+
+  if (is_ms_hook)
+    {
+      int i, filler_count = (TARGET_64BIT ? 32 : 16);
+      unsigned int filler_cc = 0xcccccccc;
+
+      for (i = 0; i < filler_count; i += 4)
+        fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
+    }
+
+#ifdef SUBTARGET_ASM_UNWIND_INIT
+  SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
+#endif
+
+  ASM_OUTPUT_LABEL (asm_out_file, fname);
+
+  /* Output magic byte marker, if hot-patch attribute is set.  */
+  if (is_ms_hook)
+    {
+      if (TARGET_64BIT)
+	{
+	  /* leaq [%rsp + 0], %rsp  */
+	  asm_fprintf (asm_out_file, ASM_BYTE
+		       "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
+	}
+      else
+	{
+          /* movl.s %edi, %edi
+	     push   %ebp
+	     movl.s %esp, %ebp */
+	  asm_fprintf (asm_out_file, ASM_BYTE
+		       "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
+	}
+    }
+}
+
+/* regclass.c  */
+extern void init_regs (void);
+
+/* Implementation of call abi switching target hook. Specific to FNDECL
+   the specific call register sets are set.  See also
+   ix86_conditional_register_usage for more details.  */
+void
+ix86_call_abi_override (const_tree fndecl)
+{
+  if (fndecl == NULL_TREE)
+    cfun->machine->call_abi = ix86_abi;
+  else
+    cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* 64-bit MS and SYSV ABI have different set of call used registers.  Avoid
+   expensive re-initialization of init_regs each time we switch function context
+   since this is needed only during RTL expansion.  */
+static void
+ix86_maybe_switch_abi (void)
+{
+  if (TARGET_64BIT &&
+      call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
+    reinit_regs ();
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
+		      tree fntype,	/* tree ptr for function decl */
+		      rtx libname,	/* SYMBOL_REF of library name or 0 */
+		      tree fndecl,
+		      int caller)
+{
+  struct cgraph_local_info *i;
+
+  memset (cum, 0, sizeof (*cum));
+
+  if (fndecl)
+    {
+      i = cgraph_local_info (fndecl);
+      cum->call_abi = ix86_function_abi (fndecl);
+    }
+  else
+    {
+      i = NULL;
+      cum->call_abi = ix86_function_type_abi (fntype);
+    }
+
+  cum->caller = caller;
+
+  /* Set up the number of registers to use for passing arguments.  */
+  cum->nregs = ix86_regparm;
+  if (TARGET_64BIT)
+    {
+      cum->nregs = (cum->call_abi == SYSV_ABI
+                   ? X86_64_REGPARM_MAX
+                   : X86_64_MS_REGPARM_MAX);
+    }
+  if (TARGET_SSE)
+    {
+      cum->sse_nregs = SSE_REGPARM_MAX;
+      if (TARGET_64BIT)
+        {
+          cum->sse_nregs = (cum->call_abi == SYSV_ABI
+                           ? X86_64_SSE_REGPARM_MAX
+                           : X86_64_MS_SSE_REGPARM_MAX);
+        }
+    }
+  if (TARGET_MMX)
+    cum->mmx_nregs = MMX_REGPARM_MAX;
+  cum->warn_avx512f = true;
+  cum->warn_avx = true;
+  cum->warn_sse = true;
+  cum->warn_mmx = true;
+
+  /* Because type might mismatch in between caller and callee, we need to
+     use actual type of function for local calls.
+     FIXME: cgraph_analyze can be told to actually record if function uses
+     va_start so for local functions maybe_vaarg can be made aggressive
+     helping K&R code.
+     FIXME: once typesytem is fixed, we won't need this code anymore.  */
+  if (i && i->local && i->can_change_signature)
+    fntype = TREE_TYPE (fndecl);
+  cum->maybe_vaarg = (fntype
+		      ? (!prototype_p (fntype) || stdarg_p (fntype))
+		      : !libname);
+
+  if (!TARGET_64BIT)
+    {
+      /* If there are variable arguments, then we won't pass anything
+         in registers in 32-bit mode. */
+      if (stdarg_p (fntype))
+	{
+	  cum->nregs = 0;
+	  cum->sse_nregs = 0;
+	  cum->mmx_nregs = 0;
+	  cum->warn_avx512f = false;
+	  cum->warn_avx = false;
+	  cum->warn_sse = false;
+	  cum->warn_mmx = false;
+	  return;
+	}
+
+      /* Use ecx and edx registers if function has fastcall attribute,
+	 else look for regparm information.  */
+      if (fntype)
+	{
+	  unsigned int ccvt = ix86_get_callcvt (fntype);
+	  if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+	    {
+	      cum->nregs = 1;
+	      cum->fastcall = 1; /* Same first register as in fastcall.  */
+	    }
+	  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+	    {
+	      cum->nregs = 2;
+	      cum->fastcall = 1;
+	    }
+	  else
+	    cum->nregs = ix86_function_regparm (fntype, fndecl);
+	}
+
+      /* Set up the number of SSE registers used for passing SFmode
+	 and DFmode arguments.  Warn for mismatching ABI.  */
+      cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
+    }
+}
+
+/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
+   But in the case of vector types, it is some vector mode.
+
+   When we have only some of our vector isa extensions enabled, then there
+   are some modes for which vector_mode_supported_p is false.  For these
+   modes, the generic vector support in gcc will choose some non-vector mode
+   in order to implement the type.  By computing the natural mode, we'll
+   select the proper ABI location for the operand and not depend on whatever
+   the middle-end decides to do with these vector types.
+
+   The midde-end can't deal with the vector types > 16 bytes.  In this
+   case, we return the original mode and warn ABI change if CUM isn't
+   NULL. 
+
+   If INT_RETURN is true, warn ABI change if the vector mode isn't
+   available for function return value.  */
+
+static enum machine_mode
+type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
+		   bool in_return)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if ((size == 8 || size == 16 || size == 32 || size == 64)
+	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
+	  && TYPE_VECTOR_SUBPARTS (type) > 1)
+	{
+	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
+
+	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+	    mode = MIN_MODE_VECTOR_FLOAT;
+	  else
+	    mode = MIN_MODE_VECTOR_INT;
+
+	  /* Get the mode which has this inner mode and number of units.  */
+	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
+	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
+		&& GET_MODE_INNER (mode) == innermode)
+	      {
+		if (size == 64 && !TARGET_AVX512F)
+		  {
+		    static bool warnedavx512f;
+		    static bool warnedavx512f_ret;
+
+		    if (cum && cum->warn_avx512f && !warnedavx512f)
+		      {
+			if (warning (OPT_Wpsabi, "AVX512F vector argument "
+				     "without AVX512F enabled changes the ABI"))
+			  warnedavx512f = true;
+		      }
+		    else if (in_return && !warnedavx512f_ret)
+		      {
+			if (warning (OPT_Wpsabi, "AVX512F vector return "
+				     "without AVX512F enabled changes the ABI"))
+			  warnedavx512f_ret = true;
+		      }
+
+		    return TYPE_MODE (type);
+		  }
+		else if (size == 32 && !TARGET_AVX)
+		  {
+		    static bool warnedavx;
+		    static bool warnedavx_ret;
+
+		    if (cum && cum->warn_avx && !warnedavx)
+		      {
+			if (warning (OPT_Wpsabi, "AVX vector argument "
+				     "without AVX enabled changes the ABI"))
+			  warnedavx = true;
+		      }
+		    else if (in_return && !warnedavx_ret)
+		      {
+			if (warning (OPT_Wpsabi, "AVX vector return "
+				     "without AVX enabled changes the ABI"))
+			  warnedavx_ret = true;
+		      }
+
+		    return TYPE_MODE (type);
+		  }
+		else if (((size == 8 && TARGET_64BIT) || size == 16)
+			 && !TARGET_SSE)
+		  {
+		    static bool warnedsse;
+		    static bool warnedsse_ret;
+
+		    if (cum && cum->warn_sse && !warnedsse)
+		      {
+			if (warning (OPT_Wpsabi, "SSE vector argument "
+				     "without SSE enabled changes the ABI"))
+			  warnedsse = true;
+		      }
+		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
+		      {
+			if (warning (OPT_Wpsabi, "SSE vector return "
+				     "without SSE enabled changes the ABI"))
+			  warnedsse_ret = true;
+		      }
+		  }
+		else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
+		  {
+		    static bool warnedmmx;
+		    static bool warnedmmx_ret;
+
+		    if (cum && cum->warn_mmx && !warnedmmx)
+		      {
+			if (warning (OPT_Wpsabi, "MMX vector argument "
+				     "without MMX enabled changes the ABI"))
+			  warnedmmx = true;
+		      }
+		    else if (in_return && !warnedmmx_ret)
+		      {
+			if (warning (OPT_Wpsabi, "MMX vector return "
+				     "without MMX enabled changes the ABI"))
+			  warnedmmx_ret = true;
+		      }
+		  }
+		return mode;
+	      }
+
+	  gcc_unreachable ();
+	}
+    }
+
+  return mode;
+}
+
+/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
+   this may not agree with the mode that the type system has chosen for the
+   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
+   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
+
+static rtx
+gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
+		     unsigned int regno)
+{
+  rtx tmp;
+
+  if (orig_mode != BLKmode)
+    tmp = gen_rtx_REG (orig_mode, regno);
+  else
+    {
+      tmp = gen_rtx_REG (mode, regno);
+      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
+      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
+    }
+
+  return tmp;
+}
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   BIT_OFFSET is used internally for handling records and specifies offset
+   of the offset in bits modulo 512 to avoid overflow cases.
+
+   See the x86-64 PS ABI for details.
+*/
+
+static int
+classify_argument (enum machine_mode mode, const_tree type,
+		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
+{
+  HOST_WIDE_INT bytes =
+    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+  int words
+    = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* Variable sized entities are always passed/returned in memory.  */
+  if (bytes < 0)
+    return 0;
+
+  if (mode != VOIDmode
+      && targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  if (type && AGGREGATE_TYPE_P (type))
+    {
+      int i;
+      tree field;
+      enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+      /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
+      if (bytes > 64)
+	return 0;
+
+      for (i = 0; i < words; i++)
+	classes[i] = X86_64_NO_CLASS;
+
+      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	 signalize memory class, so handle it as special case.  */
+      if (!words)
+	{
+	  classes[0] = X86_64_NO_CLASS;
+	  return 1;
+	}
+
+      /* Classify each field of record and merge classes.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	  /* And now merge the fields of structure.  */
+	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	    {
+	      if (TREE_CODE (field) == FIELD_DECL)
+		{
+		  int num;
+
+		  if (TREE_TYPE (field) == error_mark_node)
+		    continue;
+
+		  /* Bitfields are always classified as integer.  Handle them
+		     early, since later code would consider them to be
+		     misaligned integers.  */
+		  if (DECL_BIT_FIELD (field))
+		    {
+		      for (i = (int_bit_position (field)
+				+ (bit_offset % 64)) / 8 / 8;
+			   i < ((int_bit_position (field) + (bit_offset % 64))
+			        + tree_to_shwi (DECL_SIZE (field))
+				+ 63) / 8 / 8; i++)
+			classes[i] =
+			  merge_classes (X86_64_INTEGER_CLASS,
+					 classes[i]);
+		    }
+		  else
+		    {
+		      int pos;
+
+		      type = TREE_TYPE (field);
+
+		      /* Flexible array member is ignored.  */
+		      if (TYPE_MODE (type) == BLKmode
+			  && TREE_CODE (type) == ARRAY_TYPE
+			  && TYPE_SIZE (type) == NULL_TREE
+			  && TYPE_DOMAIN (type) != NULL_TREE
+			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
+			      == NULL_TREE))
+			{
+			  static bool warned;
+
+			  if (!warned && warn_psabi)
+			    {
+			      warned = true;
+			      inform (input_location,
+				      "the ABI of passing struct with"
+				      " a flexible array member has"
+				      " changed in GCC 4.4");
+			    }
+			  continue;
+			}
+		      num = classify_argument (TYPE_MODE (type), type,
+					       subclasses,
+					       (int_bit_position (field)
+						+ bit_offset) % 512);
+		      if (!num)
+			return 0;
+		      pos = (int_bit_position (field)
+			     + (bit_offset % 64)) / 8 / 8;
+		      for (i = 0; i < num && (i + pos) < words; i++)
+			classes[i + pos] =
+			  merge_classes (subclasses[i], classes[i + pos]);
+		    }
+		}
+	    }
+	  break;
+
+	case ARRAY_TYPE:
+	  /* Arrays are handled as small records.  */
+	  {
+	    int num;
+	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
+				     TREE_TYPE (type), subclasses, bit_offset);
+	    if (!num)
+	      return 0;
+
+	    /* The partial classes are now full classes.  */
+	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
+	      subclasses[0] = X86_64_SSE_CLASS;
+	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
+		&& !((bit_offset % 64) == 0 && bytes == 4))
+	      subclasses[0] = X86_64_INTEGER_CLASS;
+
+	    for (i = 0; i < words; i++)
+	      classes[i] = subclasses[i % num];
+
+	    break;
+	  }
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  /* Unions are similar to RECORD_TYPE but offset is always 0.
+	     */
+	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	    {
+	      if (TREE_CODE (field) == FIELD_DECL)
+		{
+		  int num;
+
+		  if (TREE_TYPE (field) == error_mark_node)
+		    continue;
+
+		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+					   TREE_TYPE (field), subclasses,
+					   bit_offset);
+		  if (!num)
+		    return 0;
+		  for (i = 0; i < num; i++)
+		    classes[i] = merge_classes (subclasses[i], classes[i]);
+		}
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (words > 2)
+	{
+	  /* When size > 16 bytes, if the first one isn't
+	     X86_64_SSE_CLASS or any other ones aren't
+	     X86_64_SSEUP_CLASS, everything should be passed in
+	     memory.  */
+	  if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	  for (i = 1; i < words; i++)
+	    if (classes[i] != X86_64_SSEUP_CLASS)
+	      return 0;
+	}
+
+      /* Final merger cleanup.  */
+      for (i = 0; i < words; i++)
+	{
+	  /* If one class is MEMORY, everything should be passed in
+	     memory.  */
+	  if (classes[i] == X86_64_MEMORY_CLASS)
+	    return 0;
+
+	  /* The X86_64_SSEUP_CLASS should be always preceded by
+	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	  if (classes[i] == X86_64_SSEUP_CLASS
+	      && classes[i - 1] != X86_64_SSE_CLASS
+	      && classes[i - 1] != X86_64_SSEUP_CLASS)
+	    {
+	      /* The first one should never be X86_64_SSEUP_CLASS.  */
+	      gcc_assert (i != 0);
+	      classes[i] = X86_64_SSE_CLASS;
+	    }
+
+	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+	       everything should be passed in memory.  */
+	  if (classes[i] == X86_64_X87UP_CLASS
+	      && (classes[i - 1] != X86_64_X87_CLASS))
+	    {
+	      static bool warned;
+
+	      /* The first one should never be X86_64_X87UP_CLASS.  */
+	      gcc_assert (i != 0);
+	      if (!warned && warn_psabi)
+		{
+		  warned = true;
+		  inform (input_location,
+			  "the ABI of passing union with long double"
+			  " has changed in GCC 4.4");
+		}
+	      return 0;
+	    }
+	}
+      return words;
+    }
+
+  /* Compute alignment needed.  We align all types to natural boundaries with
+     exception of XFmode that is aligned to 64bits.  */
+  if (mode != VOIDmode && mode != BLKmode)
+    {
+      int mode_alignment = GET_MODE_BITSIZE (mode);
+
+      if (mode == XFmode)
+	mode_alignment = 128;
+      else if (mode == XCmode)
+	mode_alignment = 256;
+      if (COMPLEX_MODE_P (mode))
+	mode_alignment /= 2;
+      /* Misaligned fields are always returned in memory.  */
+      if (bit_offset % mode_alignment)
+	return 0;
+    }
+
+  /* for V1xx modes, just use the base mode */
+  if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
+      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
+    mode = GET_MODE_INNER (mode);
+
+  /* Classification of atomic types.  */
+  switch (mode)
+    {
+    case SDmode:
+    case DDmode:
+      classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case TDmode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      return 2;
+    case DImode:
+    case SImode:
+    case HImode:
+    case QImode:
+    case CSImode:
+    case CHImode:
+    case CQImode:
+      {
+	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
+
+	/* Analyze last 128 bits only.  */
+	size = (size - 1) & 0x7f;
+
+	if (size < 32)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size < 64)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size < 64+32)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size < 64+64)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+	    return 2;
+	  }
+	else
+	  gcc_unreachable ();
+      }
+    case CDImode:
+    case TImode:
+      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+      return 2;
+    case COImode:
+    case OImode:
+      /* OImode shouldn't be used directly.  */
+      gcc_unreachable ();
+    case CTImode:
+      return 0;
+    case SFmode:
+      if (!(bit_offset % 64))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case DFmode:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case XFmode:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case TFmode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      return 2;
+    case SCmode:
+      classes[0] = X86_64_SSE_CLASS;
+      if (!(bit_offset % 64))
+	return 1;
+      else
+	{
+	  static bool warned;
+
+	  if (!warned && warn_psabi)
+	    {
+	      warned = true;
+	      inform (input_location,
+		      "the ABI of passing structure with complex float"
+		      " member has changed in GCC 4.4");
+	    }
+	  classes[1] = X86_64_SSESF_CLASS;
+	  return 2;
+	}
+    case DCmode:
+      classes[0] = X86_64_SSEDF_CLASS;
+      classes[1] = X86_64_SSEDF_CLASS;
+      return 2;
+    case XCmode:
+      classes[0] = X86_64_COMPLEX_X87_CLASS;
+      return 1;
+    case TCmode:
+      /* This modes is larger than 16 bytes.  */
+      return 0;
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      classes[2] = X86_64_SSEUP_CLASS;
+      classes[3] = X86_64_SSEUP_CLASS;
+      return 4;
+    case V8DFmode:
+    case V16SFmode:
+    case V8DImode:
+    case V16SImode:
+    case V32HImode:
+    case V64QImode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      classes[2] = X86_64_SSEUP_CLASS;
+      classes[3] = X86_64_SSEUP_CLASS;
+      classes[4] = X86_64_SSEUP_CLASS;
+      classes[5] = X86_64_SSEUP_CLASS;
+      classes[6] = X86_64_SSEUP_CLASS;
+      classes[7] = X86_64_SSEUP_CLASS;
+      return 8;
+    case V4SFmode:
+    case V4SImode:
+    case V16QImode:
+    case V8HImode:
+    case V2DFmode:
+    case V2DImode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      return 2;
+    case V1TImode:
+    case V1DImode:
+    case V2SFmode:
+    case V2SImode:
+    case V4HImode:
+    case V8QImode:
+      classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case BLKmode:
+    case VOIDmode:
+      return 0;
+    default:
+      gcc_assert (VECTOR_MODE_P (mode));
+
+      if (bytes > 16)
+	return 0;
+
+      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
+
+      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+	classes[0] = X86_64_INTEGERSI_CLASS;
+      else
+	classes[0] = X86_64_INTEGER_CLASS;
+      classes[1] = X86_64_INTEGER_CLASS;
+      return 1 + (bytes > 8);
+    }
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return 0 iff parameter should be passed in memory.  */
+static int
+examine_argument (enum machine_mode mode, const_tree type, int in_return,
+		  int *int_nregs, int *sse_nregs)
+{
+  enum x86_64_reg_class regclass[MAX_CLASSES];
+  int n = classify_argument (mode, type, regclass, 0);
+
+  *int_nregs = 0;
+  *sse_nregs = 0;
+  if (!n)
+    return 0;
+  for (n--; n >= 0; n--)
+    switch (regclass[n])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	(*int_nregs)++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	(*sse_nregs)++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+	if (!in_return)
+	  return 0;
+	break;
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return ? 2 : 0;
+      case X86_64_MEMORY_CLASS:
+	gcc_unreachable ();
+      }
+  return 1;
+}
+
+/* Construct container for the argument used by GCC interface.  See
+   FUNCTION_ARG for the detailed description.  */
+
+static rtx
+construct_container (enum machine_mode mode, enum machine_mode orig_mode,
+		     const_tree type, int in_return, int nintregs, int nsseregs,
+		     const int *intreg, int sse_regno)
+{
+  /* The following variables hold the static issued_error state.  */
+  static bool issued_sse_arg_error;
+  static bool issued_sse_ret_error;
+  static bool issued_x87_ret_error;
+
+  enum machine_mode tmpmode;
+  int bytes =
+    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+  enum x86_64_reg_class regclass[MAX_CLASSES];
+  int n;
+  int i;
+  int nexps = 0;
+  int needed_sseregs, needed_intregs;
+  rtx exp[MAX_CLASSES];
+  rtx ret;
+
+  n = classify_argument (mode, type, regclass, 0);
+  if (!n)
+    return NULL;
+  if (!examine_argument (mode, type, in_return, &needed_intregs,
+			 &needed_sseregs))
+    return NULL;
+  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
+    return NULL;
+
+  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
+     some less clueful developer tries to use floating-point anyway.  */
+  if (needed_sseregs && !TARGET_SSE)
+    {
+      if (in_return)
+	{
+	  if (!issued_sse_ret_error)
+	    {
+	      error ("SSE register return with SSE disabled");
+	      issued_sse_ret_error = true;
+	    }
+	}
+      else if (!issued_sse_arg_error)
+	{
+	  error ("SSE register argument with SSE disabled");
+	  issued_sse_arg_error = true;
+	}
+      return NULL;
+    }
+
+  /* Likewise, error if the ABI requires us to return values in the
+     x87 registers and the user specified -mno-80387.  */
+  if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
+    for (i = 0; i < n; i++)
+      if (regclass[i] == X86_64_X87_CLASS
+	  || regclass[i] == X86_64_X87UP_CLASS
+	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
+	{
+	  if (!issued_x87_ret_error)
+	    {
+	      error ("x87 register return with x87 disabled");
+	      issued_x87_ret_error = true;
+	    }
+	  return NULL;
+	}
+
+  /* First construct simple cases.  Avoid SCmode, since we want to use
+     single register to pass this type.  */
+  if (n == 1 && mode != SCmode)
+    switch (regclass[0])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	return gen_rtx_REG (mode, intreg[0]);
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	if (mode != BLKmode)
+	  return gen_reg_or_parallel (mode, orig_mode,
+				      SSE_REGNO (sse_regno));
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return gen_rtx_REG (mode, FIRST_STACK_REG);
+      case X86_64_NO_CLASS:
+	/* Zero sized array, struct or class.  */
+	return NULL;
+      default:
+	gcc_unreachable ();
+      }
+  if (n == 2
+      && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS
+      && mode != BLKmode)
+    return gen_reg_or_parallel (mode, orig_mode,
+				SSE_REGNO (sse_regno));
+  if (n == 4
+      && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS
+      && regclass[2] == X86_64_SSEUP_CLASS
+      && regclass[3] == X86_64_SSEUP_CLASS
+      && mode != BLKmode)
+    return gen_reg_or_parallel (mode, orig_mode,
+				SSE_REGNO (sse_regno));
+  if (n == 8
+      && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS
+      && regclass[2] == X86_64_SSEUP_CLASS
+      && regclass[3] == X86_64_SSEUP_CLASS
+      && regclass[4] == X86_64_SSEUP_CLASS
+      && regclass[5] == X86_64_SSEUP_CLASS
+      && regclass[6] == X86_64_SSEUP_CLASS
+      && regclass[7] == X86_64_SSEUP_CLASS
+      && mode != BLKmode)
+    return gen_reg_or_parallel (mode, orig_mode,
+				SSE_REGNO (sse_regno));
+  if (n == 2
+      && regclass[0] == X86_64_X87_CLASS
+      && regclass[1] == X86_64_X87UP_CLASS)
+    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
+
+  if (n == 2
+      && regclass[0] == X86_64_INTEGER_CLASS
+      && regclass[1] == X86_64_INTEGER_CLASS
+      && (mode == CDImode || mode == TImode)
+      && intreg[0] + 1 == intreg[1])
+    return gen_rtx_REG (mode, intreg[0]);
+
+  /* Otherwise figure out the entries of the PARALLEL.  */
+  for (i = 0; i < n; i++)
+    {
+      int pos;
+
+      switch (regclass[i])
+        {
+	  case X86_64_NO_CLASS:
+	    break;
+	  case X86_64_INTEGER_CLASS:
+	  case X86_64_INTEGERSI_CLASS:
+	    /* Merge TImodes on aligned occasions here too.  */
+	    if (i * 8 + 8 > bytes)
+	      tmpmode
+		= mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
+	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
+	      tmpmode = SImode;
+	    else
+	      tmpmode = DImode;
+	    /* We've requested 24 bytes we
+	       don't have mode for.  Use DImode.  */
+	    if (tmpmode == BLKmode)
+	      tmpmode = DImode;
+	    exp [nexps++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (tmpmode, *intreg),
+				   GEN_INT (i*8));
+	    intreg++;
+	    break;
+	  case X86_64_SSESF_CLASS:
+	    exp [nexps++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (SFmode,
+						SSE_REGNO (sse_regno)),
+				   GEN_INT (i*8));
+	    sse_regno++;
+	    break;
+	  case X86_64_SSEDF_CLASS:
+	    exp [nexps++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (DFmode,
+						SSE_REGNO (sse_regno)),
+				   GEN_INT (i*8));
+	    sse_regno++;
+	    break;
+	  case X86_64_SSE_CLASS:
+	    pos = i;
+	    switch (n)
+	      {
+	      case 1:
+		tmpmode = DImode;
+		break;
+	      case 2:
+		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
+		  {
+		    tmpmode = TImode;
+		    i++;
+		  }
+		else
+		  tmpmode = DImode;
+		break;
+	      case 4:
+		gcc_assert (i == 0
+			    && regclass[1] == X86_64_SSEUP_CLASS
+			    && regclass[2] == X86_64_SSEUP_CLASS
+			    && regclass[3] == X86_64_SSEUP_CLASS);
+		tmpmode = OImode;
+		i += 3;
+		break;
+	      case 8:
+		gcc_assert (i == 0
+			    && regclass[1] == X86_64_SSEUP_CLASS
+			    && regclass[2] == X86_64_SSEUP_CLASS
+			    && regclass[3] == X86_64_SSEUP_CLASS
+			    && regclass[4] == X86_64_SSEUP_CLASS
+			    && regclass[5] == X86_64_SSEUP_CLASS
+			    && regclass[6] == X86_64_SSEUP_CLASS
+			    && regclass[7] == X86_64_SSEUP_CLASS);
+		tmpmode = XImode;
+		i += 7;
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	    exp [nexps++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (tmpmode,
+						SSE_REGNO (sse_regno)),
+				   GEN_INT (pos*8));
+	    sse_regno++;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+    }
+
+  /* Empty aligned struct, union or class.  */
+  if (nexps == 0)
+    return NULL;
+
+  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
+  for (i = 0; i < nexps; i++)
+    XVECEXP (ret, 0, i) = exp [i];
+  return ret;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE
+   and data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type, HOST_WIDE_INT bytes,
+			 HOST_WIDE_INT words)
+{
+  switch (mode)
+    {
+    default:
+      break;
+
+    case BLKmode:
+      if (bytes < 0)
+	break;
+      /* FALLTHRU */
+
+    case DImode:
+    case SImode:
+    case HImode:
+    case QImode:
+      cum->words += words;
+      cum->nregs -= words;
+      cum->regno += words;
+
+      if (cum->nregs <= 0)
+	{
+	  cum->nregs = 0;
+	  cum->regno = 0;
+	}
+      break;
+
+    case OImode:
+      /* OImode shouldn't be used directly.  */
+      gcc_unreachable ();
+
+    case DFmode:
+      if (cum->float_in_sse < 2)
+	break;
+    case SFmode:
+      if (cum->float_in_sse < 1)
+	break;
+      /* FALLTHRU */
+
+    case V8SFmode:
+    case V8SImode:
+    case V64QImode:
+    case V32HImode:
+    case V16SImode:
+    case V8DImode:
+    case V16SFmode:
+    case V8DFmode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+    case TImode:
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V4SFmode:
+    case V2DFmode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  cum->sse_words += words;
+	  cum->sse_nregs -= 1;
+	  cum->sse_regno += 1;
+	  if (cum->sse_nregs <= 0)
+	    {
+	      cum->sse_nregs = 0;
+	      cum->sse_regno = 0;
+	    }
+	}
+      break;
+
+    case V8QImode:
+    case V4HImode:
+    case V2SImode:
+    case V2SFmode:
+    case V1TImode:
+    case V1DImode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  cum->mmx_words += words;
+	  cum->mmx_nregs -= 1;
+	  cum->mmx_regno += 1;
+	  if (cum->mmx_nregs <= 0)
+	    {
+	      cum->mmx_nregs = 0;
+	      cum->mmx_regno = 0;
+	    }
+	}
+      break;
+    }
+}
+
+static void
+function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type, HOST_WIDE_INT words, bool named)
+{
+  int int_nregs, sse_nregs;
+
+  /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
+  if (!named && (VALID_AVX512F_REG_MODE (mode)
+		 || VALID_AVX256_REG_MODE (mode)))
+    return;
+
+  if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
+      && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+    {
+      cum->nregs -= int_nregs;
+      cum->sse_nregs -= sse_nregs;
+      cum->regno += int_nregs;
+      cum->sse_regno += sse_nregs;
+    }
+  else
+    {
+      int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
+      cum->words = (cum->words + align - 1) & ~(align - 1);
+      cum->words += words;
+    }
+}
+
+static void
+function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
+			    HOST_WIDE_INT words)
+{
+  /* Otherwise, this should be passed indirect.  */
+  gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
+
+  cum->words += words;
+  if (cum->nregs > 0)
+    {
+      cum->nregs -= 1;
+      cum->regno += 1;
+    }
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  HOST_WIDE_INT bytes, words;
+
+  if (mode == BLKmode)
+    bytes = int_size_in_bytes (type);
+  else
+    bytes = GET_MODE_SIZE (mode);
+  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (type)
+    mode = type_natural_mode (type, NULL, false);
+
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+    function_arg_advance_ms_64 (cum, bytes, words);
+  else if (TARGET_64BIT)
+    function_arg_advance_64 (cum, mode, type, words, named);
+  else
+    function_arg_advance_32 (cum, mode, type, bytes, words);
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 enum machine_mode orig_mode, const_tree type,
+		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
+{
+  /* Avoid the AL settings for the Unix64 ABI.  */
+  if (mode == VOIDmode)
+    return constm1_rtx;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case BLKmode:
+      if (bytes < 0)
+	break;
+      /* FALLTHRU */
+    case DImode:
+    case SImode:
+    case HImode:
+    case QImode:
+      if (words <= cum->nregs)
+	{
+	  int regno = cum->regno;
+
+	  /* Fastcall allocates the first two DWORD (SImode) or
+            smaller arguments to ECX and EDX if it isn't an
+            aggregate type .  */
+	  if (cum->fastcall)
+	    {
+	      if (mode == BLKmode
+		  || mode == DImode
+		  || (type && AGGREGATE_TYPE_P (type)))
+	        break;
+
+	      /* ECX not EAX is the first allocated register.  */
+	      if (regno == AX_REG)
+		regno = CX_REG;
+	    }
+	  return gen_rtx_REG (mode, regno);
+	}
+      break;
+
+    case DFmode:
+      if (cum->float_in_sse < 2)
+	break;
+    case SFmode:
+      if (cum->float_in_sse < 1)
+	break;
+      /* FALLTHRU */
+    case TImode:
+      /* In 32bit, we pass TImode in xmm registers.  */
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V4SFmode:
+    case V2DFmode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  if (cum->sse_nregs)
+	    return gen_reg_or_parallel (mode, orig_mode,
+				        cum->sse_regno + FIRST_SSE_REG);
+	}
+      break;
+
+    case OImode:
+    case XImode:
+      /* OImode and XImode shouldn't be used directly.  */
+      gcc_unreachable ();
+
+    case V64QImode:
+    case V32HImode:
+    case V16SImode:
+    case V8DImode:
+    case V16SFmode:
+    case V8DFmode:
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  if (cum->sse_nregs)
+	    return gen_reg_or_parallel (mode, orig_mode,
+				        cum->sse_regno + FIRST_SSE_REG);
+	}
+      break;
+
+    case V8QImode:
+    case V4HImode:
+    case V2SImode:
+    case V2SFmode:
+    case V1TImode:
+    case V1DImode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  if (cum->mmx_nregs)
+	    return gen_reg_or_parallel (mode, orig_mode,
+				        cum->mmx_regno + FIRST_MMX_REG);
+	}
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+static rtx
+function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 enum machine_mode orig_mode, const_tree type, bool named)
+{
+  /* Handle a hidden AL argument containing number of registers
+     for varargs x86-64 functions.  */
+  if (mode == VOIDmode)
+    return GEN_INT (cum->maybe_vaarg
+		    ? (cum->sse_nregs < 0
+		       ? X86_64_SSE_REGPARM_MAX
+		       : cum->sse_regno)
+		    : -1);
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+    case V16SFmode:
+    case V16SImode:
+    case V64QImode:
+    case V32HImode:
+    case V8DFmode:
+    case V8DImode:
+      /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
+      if (!named)
+	return NULL;
+      break;
+    }
+
+  return construct_container (mode, orig_mode, type, 0, cum->nregs,
+			      cum->sse_nregs,
+			      &x86_64_int_parameter_registers [cum->regno],
+			      cum->sse_regno);
+}
+
+static rtx
+function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    enum machine_mode orig_mode, bool named,
+		    HOST_WIDE_INT bytes)
+{
+  unsigned int regno;
+
+  /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
+     We use value of -2 to specify that current function call is MSABI.  */
+  if (mode == VOIDmode)
+    return GEN_INT (-2);
+
+  /* If we've run out of registers, it goes on the stack.  */
+  if (cum->nregs == 0)
+    return NULL_RTX;
+
+  regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
+
+  /* Only floating point modes are passed in anything but integer regs.  */
+  if (TARGET_SSE && (mode == SFmode || mode == DFmode))
+    {
+      if (named)
+	regno = cum->regno + FIRST_SSE_REG;
+      else
+	{
+	  rtx t1, t2;
+
+	  /* Unnamed floating parameters are passed in both the
+	     SSE and integer registers.  */
+	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
+	  t2 = gen_rtx_REG (mode, regno);
+	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
+	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
+	}
+    }
+  /* Handle aggregated types passed in register.  */
+  if (orig_mode == BLKmode)
+    {
+      if (bytes > 0 && bytes <= 8)
+        mode = (bytes > 4 ? DImode : SImode);
+      if (mode == BLKmode)
+        mode = DImode;
+    }
+
+  return gen_reg_or_parallel (mode, orig_mode, regno);
+}
+
+/* Return where to put the arguments to a function.
+   Return zero to push the argument on the stack, or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.  TYPE is the data type of the
+   argument.  It is null for libcalls where that information may not be
+   available.  CUM gives information about the preceding args and about
+   the function being called.  NAMED is nonzero if this argument is a
+   named parameter (otherwise it is an extra parameter matching an
+   ellipsis).  */
+
+static rtx
+ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
+		   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  enum machine_mode mode = omode;
+  HOST_WIDE_INT bytes, words;
+  rtx arg;
+
+  if (mode == BLKmode)
+    bytes = int_size_in_bytes (type);
+  else
+    bytes = GET_MODE_SIZE (mode);
+  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* To simplify the code below, represent vector types with a vector mode
+     even if MMX/SSE are not active.  */
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    mode = type_natural_mode (type, cum, false);
+
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+    arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
+  else if (TARGET_64BIT)
+    arg = function_arg_64 (cum, mode, omode, type, named);
+  else
+    arg = function_arg_32 (cum, mode, omode, type, bytes, words);
+
+  return arg;
+}
+
+/* A C expression that indicates when an argument must be passed by
+   reference.  If nonzero for an argument, a copy of that argument is
+   made in memory and a pointer to the argument is passed instead of
+   the argument itself.  The pointer is passed in whatever way is
+   appropriate for passing a pointer to that type.  */
+
+static bool
+ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  /* See Windows x64 Software Convention.  */
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+    {
+      int msize = (int) GET_MODE_SIZE (mode);
+      if (type)
+	{
+	  /* Arrays are passed by reference.  */
+	  if (TREE_CODE (type) == ARRAY_TYPE)
+	    return true;
+
+	  if (AGGREGATE_TYPE_P (type))
+	    {
+	      /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
+	         are passed by reference.  */
+	      msize = int_size_in_bytes (type);
+	    }
+	}
+
+      /* __m128 is passed by reference.  */
+      switch (msize) {
+      case 1: case 2: case 4: case 8:
+        break;
+      default:
+        return true;
+      }
+    }
+  else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
+    return 1;
+
+  return 0;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument
+   passing ABI.  XXX: This function is obsolete and is only used for
+   checking psABI compatibility with previous versions of GCC.  */
+
+static bool
+ix86_compat_aligned_value_p (const_tree type)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
+       || mode == TDmode
+       || mode == TFmode
+       || mode == TCmode)
+      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+    return true;
+  if (TYPE_ALIGN (type) < 128)
+    return false;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      /* Walk the aggregates recursively.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  {
+	    tree field;
+
+	    /* Walk all the structure fields.  */
+	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	      {
+		if (TREE_CODE (field) == FIELD_DECL
+		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
+		  return true;
+	      }
+	    break;
+	  }
+
+	case ARRAY_TYPE:
+	  /* Just for use if some languages passes arrays by value.  */
+	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
+	    return true;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  return false;
+}
+
+/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
+   XXX: This function is obsolete and is only used for checking psABI
+   compatibility with previous versions of GCC.  */
+
+static unsigned int
+ix86_compat_function_arg_boundary (enum machine_mode mode,
+				   const_tree type, unsigned int align)
+{
+  /* In 32bit, only _Decimal128 and __float128 are aligned to their
+     natural boundaries.  */
+  if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
+    {
+      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
+	 make an exception for SSE modes since these require 128bit
+	 alignment.
+
+	 The handling here differs from field_alignment.  ICC aligns MMX
+	 arguments to 4 byte boundaries, while structure fields are aligned
+	 to 8 byte boundaries.  */
+      if (!type)
+	{
+	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
+	    align = PARM_BOUNDARY;
+	}
+      else
+	{
+	  if (!ix86_compat_aligned_value_p (type))
+	    align = PARM_BOUNDARY;
+	}
+    }
+  if (align > BIGGEST_ALIGNMENT)
+    align = BIGGEST_ALIGNMENT;
+  return align;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument
+   passing ABI.  */
+
+static bool
+ix86_contains_aligned_value_p (const_tree type)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (mode == XFmode || mode == XCmode)
+    return false;
+
+  if (TYPE_ALIGN (type) < 128)
+    return false;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      /* Walk the aggregates recursively.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  {
+	    tree field;
+
+	    /* Walk all the structure fields.  */
+	    for (field = TYPE_FIELDS (type);
+		 field;
+		 field = DECL_CHAIN (field))
+	      {
+		if (TREE_CODE (field) == FIELD_DECL
+		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
+		  return true;
+	      }
+	    break;
+	  }
+
+	case ARRAY_TYPE:
+	  /* Just for use if some languages passes arrays by value.  */
+	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
+	    return true;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    return TYPE_ALIGN (type) >= 128;
+
+  return false;
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+   specified mode and type.  */
+
+static unsigned int
+ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int align;
+  if (type)
+    {
+      /* Since the main variant type is used for call, we convert it to
+	 the main variant type.  */
+      type = TYPE_MAIN_VARIANT (type);
+      align = TYPE_ALIGN (type);
+    }
+  else
+    align = GET_MODE_ALIGNMENT (mode);
+  if (align < PARM_BOUNDARY)
+    align = PARM_BOUNDARY;
+  else
+    {
+      static bool warned;
+      unsigned int saved_align = align;
+
+      if (!TARGET_64BIT)
+	{
+	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
+	  if (!type)
+	    {
+	      if (mode == XFmode || mode == XCmode)
+		align = PARM_BOUNDARY;
+	    }
+	  else if (!ix86_contains_aligned_value_p (type))
+	    align = PARM_BOUNDARY;
+
+	  if (align < 128)
+	    align = PARM_BOUNDARY;
+	}
+
+      if (warn_psabi
+	  && !warned
+	  && align != ix86_compat_function_arg_boundary (mode, type,
+							 saved_align))
+	{
+	  warned = true;
+	  inform (input_location,
+		  "The ABI for passing parameters with %d-byte"
+		  " alignment has changed in GCC 4.6",
+		  align / BITS_PER_UNIT);
+	}
+    }
+
+  return align;
+}
+
+/* Return true if N is a possible register number of function value.  */
+
+static bool
+ix86_function_value_regno_p (const unsigned int regno)
+{
+  switch (regno)
+    {
+    case AX_REG:
+    case DX_REG:
+      return true;
+    case DI_REG:
+    case SI_REG:
+      return TARGET_64BIT && ix86_abi != MS_ABI;
+
+      /* Complex values are returned in %st(0)/%st(1) pair.  */
+    case ST0_REG:
+    case ST1_REG:
+      /* TODO: The function should depend on current function ABI but
+       builtins.c would need updating then. Therefore we use the
+       default ABI.  */
+      if (TARGET_64BIT && ix86_abi == MS_ABI)
+	return false;
+      return TARGET_FLOAT_RETURNS_IN_80387;
+
+      /* Complex values are returned in %xmm0/%xmm1 pair.  */
+    case XMM0_REG:
+    case XMM1_REG:
+      return TARGET_SSE;
+
+    case MM0_REG:
+      if (TARGET_MACHO || TARGET_64BIT)
+	return false;
+      return TARGET_MMX;
+    }
+
+  return false;
+}
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+
+static rtx
+function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
+		   const_tree fntype, const_tree fn)
+{
+  unsigned int regno;
+
+  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+     we normally prevent this case when mmx is not available.  However
+     some ABIs may require the result to be returned like DImode.  */
+  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+    regno = FIRST_MMX_REG;
+
+  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
+     we prevent this case when sse is not available.  However some ABIs
+     may require the result to be returned like integer TImode.  */
+  else if (mode == TImode
+	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+    regno = FIRST_SSE_REG;
+
+  /* 32-byte vector modes in %ymm0.   */
+  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
+    regno = FIRST_SSE_REG;
+
+  /* 64-byte vector modes in %zmm0.   */
+  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+    regno = FIRST_SSE_REG;
+
+  /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
+  else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
+    regno = FIRST_FLOAT_REG;
+  else
+    /* Most things go in %eax.  */
+    regno = AX_REG;
+
+  /* Override FP return register with %xmm0 for local functions when
+     SSE math is enabled or for functions with sseregparm attribute.  */
+  if ((fn || fntype) && (mode == SFmode || mode == DFmode))
+    {
+      int sse_level = ix86_function_sseregparm (fntype, fn, false);
+      if ((sse_level >= 1 && mode == SFmode)
+	  || (sse_level == 2 && mode == DFmode))
+	regno = FIRST_SSE_REG;
+    }
+
+  /* OImode shouldn't be used directly.  */
+  gcc_assert (mode != OImode);
+
+  return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
+function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
+		   const_tree valtype)
+{
+  rtx ret;
+
+  /* Handle libcalls, which don't provide a type node.  */
+  if (valtype == NULL)
+    {
+      unsigned int regno;
+
+      switch (mode)
+	{
+	case SFmode:
+	case SCmode:
+	case DFmode:
+	case DCmode:
+	case TFmode:
+	case SDmode:
+	case DDmode:
+	case TDmode:
+	  regno = FIRST_SSE_REG;
+	  break;
+	case XFmode:
+	case XCmode:
+	  regno = FIRST_FLOAT_REG;
+	  break;
+	case TCmode:
+	  return NULL;
+	default:
+	  regno = AX_REG;
+	}
+
+      return gen_rtx_REG (mode, regno);
+    }
+  else if (POINTER_TYPE_P (valtype))
+    {
+      /* Pointers are always returned in word_mode.  */
+      mode = word_mode;
+    }
+
+  ret = construct_container (mode, orig_mode, valtype, 1,
+			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
+			     x86_64_int_return_registers, 0);
+
+  /* For zero sized structures, construct_container returns NULL, but we
+     need to keep rest of compiler happy by returning meaningful value.  */
+  if (!ret)
+    ret = gen_rtx_REG (orig_mode, AX_REG);
+
+  return ret;
+}
+
+static rtx
+function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
+		      const_tree valtype)
+{
+  unsigned int regno = AX_REG;
+
+  if (TARGET_SSE)
+    {
+      switch (GET_MODE_SIZE (mode))
+	{
+	case 16:
+	  if (valtype != NULL_TREE
+	      && !VECTOR_INTEGER_TYPE_P (valtype)
+	      && !VECTOR_INTEGER_TYPE_P (valtype)
+	      && !INTEGRAL_TYPE_P (valtype)
+	      && !VECTOR_FLOAT_TYPE_P (valtype))
+	    break;
+	  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
+	      && !COMPLEX_MODE_P (mode))
+	    regno = FIRST_SSE_REG;
+	  break;
+	case 8:
+	case 4:
+	  if (mode == SFmode || mode == DFmode)
+	    regno = FIRST_SSE_REG;
+	  break;
+	default:
+	  break;
+        }
+    }
+  return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
+ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
+		       enum machine_mode orig_mode, enum machine_mode mode)
+{
+  const_tree fn, fntype;
+
+  fn = NULL_TREE;
+  if (fntype_or_decl && DECL_P (fntype_or_decl))
+    fn = fntype_or_decl;
+  fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
+
+  if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
+    return function_value_ms_64 (orig_mode, mode, valtype);
+  else if (TARGET_64BIT)
+    return function_value_64 (orig_mode, mode, valtype);
+  else
+    return function_value_32 (orig_mode, mode, fntype, fn);
+}
+
+static rtx
+ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode, orig_mode;
+
+  orig_mode = TYPE_MODE (valtype);
+  mode = type_natural_mode (valtype, NULL, true);
+  return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
+}
+
+/* Pointer function arguments and return values are promoted to
+   word_mode.  */
+
+static enum machine_mode
+ix86_promote_function_mode (const_tree type, enum machine_mode mode,
+			    int *punsignedp, const_tree fntype,
+			    int for_return)
+{
+  if (type != NULL_TREE && POINTER_TYPE_P (type))
+    {
+      *punsignedp = POINTERS_EXTEND_UNSIGNED;
+      return word_mode;
+    }
+  return default_promote_function_mode (type, mode, punsignedp, fntype,
+					for_return);
+}
+
+/* Return true if a structure, union or array with MODE containing FIELD
+   should be accessed using BLKmode.  */
+
+static bool
+ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
+{
+  /* Union with XFmode must be in BLKmode.  */
+  return (mode == XFmode
+	  && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
+	      || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
+}
+
+rtx
+ix86_libcall_value (enum machine_mode mode)
+{
+  return ix86_function_value_1 (NULL, NULL, mode, mode);
+}
+
+/* Return true iff type is returned in memory.  */
+
+static bool ATTRIBUTE_UNUSED
+return_in_memory_32 (const_tree type, enum machine_mode mode)
+{
+  HOST_WIDE_INT size;
+
+  if (mode == BLKmode)
+    return true;
+
+  size = int_size_in_bytes (type);
+
+  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
+    return false;
+
+  if (VECTOR_MODE_P (mode) || mode == TImode)
+    {
+      /* User-created vectors small enough to fit in EAX.  */
+      if (size < 8)
+	return false;
+
+      /* MMX/3dNow values are returned in MM0,
+	 except when it doesn't exits or the ABI prescribes otherwise.  */
+      if (size == 8)
+	return !TARGET_MMX || TARGET_VECT8_RETURNS;
+
+      /* SSE values are returned in XMM0, except when it doesn't exist.  */
+      if (size == 16)
+	return !TARGET_SSE;
+
+      /* AVX values are returned in YMM0, except when it doesn't exist.  */
+      if (size == 32)
+	return !TARGET_AVX;
+
+      /* AVX512F values are returned in ZMM0, except when it doesn't exist.  */
+      if (size == 64)
+	return !TARGET_AVX512F;
+    }
+
+  if (mode == XFmode)
+    return false;
+
+  if (size > 12)
+    return true;
+
+  /* OImode shouldn't be used directly.  */
+  gcc_assert (mode != OImode);
+
+  return false;
+}
+
+static bool ATTRIBUTE_UNUSED
+return_in_memory_64 (const_tree type, enum machine_mode mode)
+{
+  int needed_intregs, needed_sseregs;
+  return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+}
+
+static bool ATTRIBUTE_UNUSED
+return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
+{
+  HOST_WIDE_INT size = int_size_in_bytes (type);
+
+  /* __m128 is returned in xmm0.  */
+  if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
+       || VECTOR_FLOAT_TYPE_P (type))
+      && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
+      && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
+    return false;
+
+  /* Otherwise, the size must be exactly in [1248]. */
+  return size != 1 && size != 2 && size != 4 && size != 8;
+}
+
+static bool
+ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+#ifdef SUBTARGET_RETURN_IN_MEMORY
+  return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
+#else
+  const enum machine_mode mode = type_natural_mode (type, NULL, true);
+
+  if (TARGET_64BIT)
+    {
+      if (ix86_function_type_abi (fntype) == MS_ABI)
+	return return_in_memory_ms_64 (type, mode);
+      else
+	return return_in_memory_64 (type, mode);
+    }
+  else
+    return return_in_memory_32 (type, mode);
+#endif
+}
+
+
+/* Create the va_list data type.  */
+
+/* Returns the calling convention specific va_list date type.
+   The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
+
+static tree
+ix86_build_builtin_va_list_abi (enum calling_abi abi)
+{
+  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+  /* For i386 we use plain pointer to argument area.  */
+  if (!TARGET_64BIT || abi == MS_ABI)
+    return build_pointer_type (char_type_node);
+
+  record = lang_hooks.types.make_type (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_gpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("gp_offset"),
+		      unsigned_type_node);
+  f_fpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("fp_offset"),
+		      unsigned_type_node);
+  f_ovf = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("overflow_arg_area"),
+		      ptr_type_node);
+  f_sav = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("reg_save_area"),
+		      ptr_type_node);
+
+  va_list_gpr_counter_field = f_gpr;
+  va_list_fpr_counter_field = f_fpr;
+
+  DECL_FIELD_CONTEXT (f_gpr) = record;
+  DECL_FIELD_CONTEXT (f_fpr) = record;
+  DECL_FIELD_CONTEXT (f_ovf) = record;
+  DECL_FIELD_CONTEXT (f_sav) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_gpr;
+  DECL_CHAIN (f_gpr) = f_fpr;
+  DECL_CHAIN (f_fpr) = f_ovf;
+  DECL_CHAIN (f_ovf) = f_sav;
+
+  layout_type (record);
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Setup the builtin va_list data type and for 64-bit the additional
+   calling convention specific va_list data types.  */
+
+static tree
+ix86_build_builtin_va_list (void)
+{
+  tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
+
+  /* Initialize abi specific va_list builtin types.  */
+  if (TARGET_64BIT)
+    {
+      tree t;
+      if (ix86_abi == MS_ABI)
+        {
+          t = ix86_build_builtin_va_list_abi (SYSV_ABI);
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          sysv_va_list_type_node = t;
+        }
+      else
+        {
+          t = ret;
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          sysv_va_list_type_node = t;
+        }
+      if (ix86_abi != MS_ABI)
+        {
+          t = ix86_build_builtin_va_list_abi (MS_ABI);
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          ms_va_list_type_node = t;
+        }
+      else
+        {
+          t = ret;
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          ms_va_list_type_node = t;
+        }
+    }
+
+  return ret;
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
+{
+  rtx save_area, mem;
+  alias_set_type set;
+  int i, max;
+
+  /* GPR size of varargs save area.  */
+  if (cfun->va_list_gpr_size)
+    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+  else
+    ix86_varargs_gpr_size = 0;
+
+  /* FPR size of varargs save area.  We don't need it if we don't pass
+     anything in SSE registers.  */
+  if (TARGET_SSE && cfun->va_list_fpr_size)
+    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+  else
+    ix86_varargs_fpr_size = 0;
+
+  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+    return;
+
+  save_area = frame_pointer_rtx;
+  set = get_varargs_alias_set ();
+
+  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+  if (max > X86_64_REGPARM_MAX)
+    max = X86_64_REGPARM_MAX;
+
+  for (i = cum->regno; i < max; i++)
+    {
+      mem = gen_rtx_MEM (word_mode,
+			 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
+      MEM_NOTRAP_P (mem) = 1;
+      set_mem_alias_set (mem, set);
+      emit_move_insn (mem,
+		      gen_rtx_REG (word_mode,
+				   x86_64_int_parameter_registers[i]));
+    }
+
+  if (ix86_varargs_fpr_size)
+    {
+      enum machine_mode smode;
+      rtx label, test;
+
+      /* Now emit code to save SSE registers.  The AX parameter contains number
+	 of SSE parameter registers used to call this function, though all we
+	 actually check here is the zero/non-zero status.  */
+
+      label = gen_label_rtx ();
+      test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
+      emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
+				      label));
+
+      /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
+	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
+	 be if we could determine the real mode of the data, via a hook
+	 into pass_stdarg.  Ignore all that for now.  */
+      smode = V4SFmode;
+      if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
+	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
+
+      max = cum->sse_regno + cfun->va_list_fpr_size / 16;
+      if (max > X86_64_SSE_REGPARM_MAX)
+	max = X86_64_SSE_REGPARM_MAX;
+
+      for (i = cum->sse_regno; i < max; ++i)
+	{
+	  mem = plus_constant (Pmode, save_area,
+			       i * 16 + ix86_varargs_gpr_size);
+	  mem = gen_rtx_MEM (smode, mem);
+	  MEM_NOTRAP_P (mem) = 1;
+	  set_mem_alias_set (mem, set);
+	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
+
+	  emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
+	}
+
+      emit_label (label);
+    }
+}
+
+static void
+setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
+{
+  alias_set_type set = get_varargs_alias_set ();
+  int i;
+
+  /* Reset to zero, as there might be a sysv vaarg used
+     before.  */
+  ix86_varargs_gpr_size = 0;
+  ix86_varargs_fpr_size = 0;
+
+  for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
+    {
+      rtx reg, mem;
+
+      mem = gen_rtx_MEM (Pmode,
+			 plus_constant (Pmode, virtual_incoming_args_rtx,
+					i * UNITS_PER_WORD));
+      MEM_NOTRAP_P (mem) = 1;
+      set_mem_alias_set (mem, set);
+
+      reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
+      emit_move_insn (mem, reg);
+    }
+}
+
+static void
+ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
+			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
+			     int no_rtl)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  CUMULATIVE_ARGS next_cum;
+  tree fntype;
+
+  /* This argument doesn't appear to be used anymore.  Which is good,
+     because the old code here didn't suppress rtl generation.  */
+  gcc_assert (!no_rtl);
+
+  if (!TARGET_64BIT)
+    return;
+
+  fntype = TREE_TYPE (current_function_decl);
+
+  /* For varargs, we do not want to skip the dummy va_dcl argument.
+     For stdargs, we do want to skip the last named argument.  */
+  next_cum = *cum;
+  if (stdarg_p (fntype))
+    ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
+			       true);
+
+  if (cum->call_abi == MS_ABI)
+    setup_incoming_varargs_ms_64 (&next_cum);
+  else
+    setup_incoming_varargs_64 (&next_cum);
+}
+
+/* Checks if TYPE is of kind va_list char *.  */
+
+static bool
+is_va_list_char_pointer (tree type)
+{
+  tree canonic;
+
+  /* For 32-bit it is always true.  */
+  if (!TARGET_64BIT)
+    return true;
+  canonic = ix86_canonical_va_list_type (type);
+  return (canonic == ms_va_list_type_node
+          || (ix86_abi == MS_ABI && canonic == va_list_type_node));
+}
+
+/* Implement va_start.  */
+
+static void
+ix86_va_start (tree valist, rtx nextarg)
+{
+  HOST_WIDE_INT words, n_gpr, n_fpr;
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+  tree type;
+  rtx ovf_rtx;
+
+  if (flag_split_stack
+      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+    {
+      unsigned int scratch_regno;
+
+      /* When we are splitting the stack, we can't refer to the stack
+	 arguments using internal_arg_pointer, because they may be on
+	 the old stack.  The split stack prologue will arrange to
+	 leave a pointer to the old stack arguments in a scratch
+	 register, which we here copy to a pseudo-register.  The split
+	 stack prologue can't set the pseudo-register directly because
+	 it (the prologue) runs before any registers have been saved.  */
+
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      if (scratch_regno != INVALID_REGNUM)
+	{
+	  rtx reg, seq;
+
+	  reg = gen_reg_rtx (Pmode);
+	  cfun->machine->split_stack_varargs_pointer = reg;
+
+	  start_sequence ();
+	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
+	  seq = get_insns ();
+	  end_sequence ();
+
+	  push_topmost_sequence ();
+	  emit_insn_after (seq, entry_of_function ());
+	  pop_topmost_sequence ();
+	}
+    }
+
+  /* Only 64bit target needs something special.  */
+  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+    {
+      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+	std_expand_builtin_va_start (valist, nextarg);
+      else
+	{
+	  rtx va_r, next;
+
+	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
+	  next = expand_binop (ptr_mode, add_optab,
+			       cfun->machine->split_stack_varargs_pointer,
+			       crtl->args.arg_offset_rtx,
+			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
+	  convert_move (va_r, next, 0);
+	}
+      return;
+    }
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_simple_mem_ref (valist);
+  TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
+  /* The following should be folded into the MEM_REF offset.  */
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
+		f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+		f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+		f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+		f_sav, NULL_TREE);
+
+  /* Count number of gp and fp argument registers used.  */
+  words = crtl->args.info.words;
+  n_gpr = crtl->args.info.regno;
+  n_fpr = crtl->args.info.sse_regno;
+
+  if (cfun->va_list_gpr_size)
+    {
+      type = TREE_TYPE (gpr);
+      t = build2 (MODIFY_EXPR, type,
+		  gpr, build_int_cst (type, n_gpr * 8));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  if (TARGET_SSE && cfun->va_list_fpr_size)
+    {
+      type = TREE_TYPE (fpr);
+      t = build2 (MODIFY_EXPR, type, fpr,
+		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* Find the overflow area.  */
+  type = TREE_TYPE (ovf);
+  if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+    ovf_rtx = crtl->args.internal_arg_pointer;
+  else
+    ovf_rtx = cfun->machine->split_stack_varargs_pointer;
+  t = make_tree (type, ovf_rtx);
+  if (words != 0)
+    t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
+  t = build2 (MODIFY_EXPR, type, ovf, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
+    {
+      /* Find the register save area.
+	 Prologue of the function save it right above stack frame.  */
+      type = TREE_TYPE (sav);
+      t = make_tree (type, frame_pointer_rtx);
+      if (!ix86_varargs_gpr_size)
+	t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
+      t = build2 (MODIFY_EXPR, type, sav, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+}
+
+/* Implement va_arg.  */
+
+static tree
+ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		      gimple_seq *post_p)
+{
+  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+  int size, rsize;
+  tree lab_false, lab_over = NULL_TREE;
+  tree addr, t2;
+  rtx container;
+  int indirect_p = 0;
+  tree ptrtype;
+  enum machine_mode nat_mode;
+  unsigned int arg_boundary;
+
+  /* Only 64bit target needs something special.  */
+  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
+		build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
+  valist = build_va_arg_indirect_ref (valist);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect_p)
+    type = build_pointer_type (type);
+  size = int_size_in_bytes (type);
+  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  nat_mode = type_natural_mode (type, NULL, false);
+  switch (nat_mode)
+    {
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+    case V16SFmode:
+    case V16SImode:
+    case V64QImode:
+    case V32HImode:
+    case V8DFmode:
+    case V8DImode:
+      /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
+      if (!TARGET_64BIT_MS_ABI)
+	{
+	  container = NULL;
+	  break;
+	}
+
+    default:
+      container = construct_container (nat_mode, TYPE_MODE (type),
+				       type, 0, X86_64_REGPARM_MAX,
+				       X86_64_SSE_REGPARM_MAX, intreg,
+				       0);
+      break;
+    }
+
+  /* Pull the value out of the saved registers.  */
+
+  addr = create_tmp_var (ptr_type_node, "addr");
+
+  if (container)
+    {
+      int needed_intregs, needed_sseregs;
+      bool need_temp;
+      tree int_addr, sse_addr;
+
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
+
+      need_temp = (!REG_P (container)
+		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
+		       || TYPE_ALIGN (type) > 128));
+
+      /* In case we are passing structure, verify that it is consecutive block
+         on the register save area.  If not we need to do moves.  */
+      if (!need_temp && !REG_P (container))
+	{
+	  /* Verify that all registers are strictly consecutive  */
+	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
+	    {
+	      int i;
+
+	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+		{
+		  rtx slot = XVECEXP (container, 0, i);
+		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
+		      || INTVAL (XEXP (slot, 1)) != i * 16)
+		    need_temp = 1;
+		}
+	    }
+	  else
+	    {
+	      int i;
+
+	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+		{
+		  rtx slot = XVECEXP (container, 0, i);
+		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
+		      || INTVAL (XEXP (slot, 1)) != i * 8)
+		    need_temp = 1;
+		}
+	    }
+	}
+      if (!need_temp)
+	{
+	  int_addr = addr;
+	  sse_addr = addr;
+	}
+      else
+	{
+	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
+	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
+	}
+
+      /* First ensure that we fit completely in registers.  */
+      if (needed_intregs)
+	{
+	  t = build_int_cst (TREE_TYPE (gpr),
+			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
+	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
+	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+	  gimplify_and_add (t, pre_p);
+	}
+      if (needed_sseregs)
+	{
+	  t = build_int_cst (TREE_TYPE (fpr),
+			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+			     + X86_64_REGPARM_MAX * 8);
+	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
+	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+	  gimplify_and_add (t, pre_p);
+	}
+
+      /* Compute index to start of area used for integer regs.  */
+      if (needed_intregs)
+	{
+	  /* int_addr = gpr + sav; */
+	  t = fold_build_pointer_plus (sav, gpr);
+	  gimplify_assign (int_addr, t, pre_p);
+	}
+      if (needed_sseregs)
+	{
+	  /* sse_addr = fpr + sav; */
+	  t = fold_build_pointer_plus (sav, fpr);
+	  gimplify_assign (sse_addr, t, pre_p);
+	}
+      if (need_temp)
+	{
+	  int i, prev_size = 0;
+	  tree temp = create_tmp_var (type, "va_arg_tmp");
+
+	  /* addr = &temp; */
+	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
+	  gimplify_assign (addr, t, pre_p);
+
+	  for (i = 0; i < XVECLEN (container, 0); i++)
+	    {
+	      rtx slot = XVECEXP (container, 0, i);
+	      rtx reg = XEXP (slot, 0);
+	      enum machine_mode mode = GET_MODE (reg);
+	      tree piece_type;
+	      tree addr_type;
+	      tree daddr_type;
+	      tree src_addr, src;
+	      int src_offset;
+	      tree dest_addr, dest;
+	      int cur_size = GET_MODE_SIZE (mode);
+
+	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
+	      prev_size = INTVAL (XEXP (slot, 1));
+	      if (prev_size + cur_size > size)
+		{
+		  cur_size = size - prev_size;
+		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
+		  if (mode == BLKmode)
+		    mode = QImode;
+		}
+	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
+	      if (mode == GET_MODE (reg))
+		addr_type = build_pointer_type (piece_type);
+	      else
+		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							 true);
+	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							true);
+
+	      if (SSE_REGNO_P (REGNO (reg)))
+		{
+		  src_addr = sse_addr;
+		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
+		}
+	      else
+		{
+		  src_addr = int_addr;
+		  src_offset = REGNO (reg) * 8;
+		}
+	      src_addr = fold_convert (addr_type, src_addr);
+	      src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
+
+	      dest_addr = fold_convert (daddr_type, addr);
+	      dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
+	      if (cur_size == GET_MODE_SIZE (mode))
+		{
+		  src = build_va_arg_indirect_ref (src_addr);
+		  dest = build_va_arg_indirect_ref (dest_addr);
+
+		  gimplify_assign (dest, src, pre_p);
+		}
+	      else
+		{
+		  tree copy
+		    = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
+				       3, dest_addr, src_addr,
+				       size_int (cur_size));
+		  gimplify_and_add (copy, pre_p);
+		}
+	      prev_size += cur_size;
+	    }
+	}
+
+      if (needed_intregs)
+	{
+	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
+		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
+	  gimplify_assign (gpr, t, pre_p);
+	}
+
+      if (needed_sseregs)
+	{
+	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
+		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
+	  gimplify_assign (fpr, t, pre_p);
+	}
+
+      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+    }
+
+  /* ... otherwise out of the overflow area.  */
+
+  /* When we align parameter on stack for caller, if the parameter
+     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
+     here with caller.  */
+  arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
+  if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+    arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  /* Care for on-stack alignment if needed.  */
+  if (arg_boundary <= 64 || size == 0)
+    t = ovf;
+ else
+    {
+      HOST_WIDE_INT align = arg_boundary / 8;
+      t = fold_build_pointer_plus_hwi (ovf, align - 1);
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  build_int_cst (TREE_TYPE (t), -align));
+    }
+
+  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+  gimplify_assign (addr, t, pre_p);
+
+  t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
+  gimplify_assign (unshare_expr (ovf), t, pre_p);
+
+  if (container)
+    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+
+  ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
+  addr = fold_convert (ptrtype, addr);
+
+  if (indirect_p)
+    addr = build_va_arg_indirect_ref (addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Return true if OPNUM's MEM should be matched
+   in movabs* patterns.  */
+
+bool
+ix86_check_movabs (rtx insn, int opnum)
+{
+  rtx set, mem;
+
+  set = PATTERN (insn);
+  if (GET_CODE (set) == PARALLEL)
+    set = XVECEXP (set, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  mem = XEXP (set, opnum);
+  while (GET_CODE (mem) == SUBREG)
+    mem = SUBREG_REG (mem);
+  gcc_assert (MEM_P (mem));
+  return volatile_ok || !MEM_VOLATILE_P (mem);
+}
+
+/* Initialize the table of extra 80387 mathematical constants.  */
+
+static void
+init_ext_80387_constants (void)
+{
+  static const char * cst[5] =
+  {
+    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
+    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
+    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
+    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
+    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
+  };
+  int i;
+
+  for (i = 0; i < 5; i++)
+    {
+      real_from_string (&ext_80387_constants_table[i], cst[i]);
+      /* Ensure each constant is rounded to XFmode precision.  */
+      real_convert (&ext_80387_constants_table[i],
+		    XFmode, &ext_80387_constants_table[i]);
+    }
+
+  ext_80387_constants_init = 1;
+}
+
+/* Return non-zero if the constant is something that
+   can be loaded with a special instruction.  */
+
+int
+standard_80387_constant_p (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  REAL_VALUE_TYPE r;
+
+  if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
+    return -1;
+
+  if (x == CONST0_RTX (mode))
+    return 1;
+  if (x == CONST1_RTX (mode))
+    return 2;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* For XFmode constants, try to find a special 80387 instruction when
+     optimizing for size or on those CPUs that benefit from them.  */
+  if (mode == XFmode
+      && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
+    {
+      int i;
+
+      if (! ext_80387_constants_init)
+	init_ext_80387_constants ();
+
+      for (i = 0; i < 5; i++)
+        if (real_identical (&r, &ext_80387_constants_table[i]))
+	  return i + 3;
+    }
+
+  /* Load of the constant -0.0 or -1.0 will be split as
+     fldz;fchs or fld1;fchs sequence.  */
+  if (real_isnegzero (&r))
+    return 8;
+  if (real_identical (&r, &dconstm1))
+    return 9;
+
+  return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+   the constant X.  */
+
+const char *
+standard_80387_constant_opcode (rtx x)
+{
+  switch (standard_80387_constant_p (x))
+    {
+    case 1:
+      return "fldz";
+    case 2:
+      return "fld1";
+    case 3:
+      return "fldlg2";
+    case 4:
+      return "fldln2";
+    case 5:
+      return "fldl2e";
+    case 6:
+      return "fldl2t";
+    case 7:
+      return "fldpi";
+    case 8:
+    case 9:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the CONST_DOUBLE representing the 80387 constant that is
+   loaded by the specified special instruction.  The argument IDX
+   matches the return value from standard_80387_constant_p.  */
+
+rtx
+standard_80387_constant_rtx (int idx)
+{
+  int i;
+
+  if (! ext_80387_constants_init)
+    init_ext_80387_constants ();
+
+  switch (idx)
+    {
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+      i = idx - 3;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
+				       XFmode);
+}
+
+/* Return 1 if X is all 0s and 2 if x is all 1s
+   in supported SSE/AVX vector mode.  */
+
+int
+standard_sse_constant_p (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+    return 1;
+  if (vector_all_ones_operand (x, mode))
+    switch (mode)
+      {
+      case V16QImode:
+      case V8HImode:
+      case V4SImode:
+      case V2DImode:
+	if (TARGET_SSE2)
+	  return 2;
+      case V32QImode:
+      case V16HImode:
+      case V8SImode:
+      case V4DImode:
+	if (TARGET_AVX2)
+	  return 2;
+      case V64QImode:
+      case V32HImode:
+      case V16SImode:
+      case V8DImode:
+	if (TARGET_AVX512F)
+	  return 2;
+      default:
+	break;
+      }
+
+  return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+   the constant X.  */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+  switch (standard_sse_constant_p (x))
+    {
+    case 1:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_XI:
+	case MODE_V16SF:
+	  return "vpxord\t%g0, %g0, %g0";
+	case MODE_V8DF:
+	  return "vpxorq\t%g0, %g0, %g0";
+	case MODE_TI:
+	  return "%vpxor\t%0, %d0";
+	case MODE_V2DF:
+	  return "%vxorpd\t%0, %d0";
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+
+	case MODE_OI:
+	  return "vpxor\t%x0, %x0, %x0";
+	case MODE_V4DF:
+	  return "vxorpd\t%x0, %x0, %x0";
+	case MODE_V8SF:
+	  return "vxorps\t%x0, %x0, %x0";
+
+	default:
+	  break;
+	}
+
+    case 2:
+      if (get_attr_mode (insn) == MODE_XI
+	  || get_attr_mode (insn) == MODE_V8DF
+	  || get_attr_mode (insn) == MODE_V16SF)
+	return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+      if (TARGET_AVX)
+	return "vpcmpeqd\t%0, %0, %0";
+      else
+	return "pcmpeqd\t%0, %0";
+
+    default:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+/* Returns true if OP contains a symbol reference */
+
+bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if it is appropriate to emit `ret' instructions in the
+   body of a function.  Do this only if the epilogue is simple, needing a
+   couple of insns.  Prior to reloading, we can't tell how many registers
+   must be saved, so return false then.  Return false if there is no frame
+   marker to de-allocate.  */
+
+bool
+ix86_can_use_return_insn_p (void)
+{
+  struct ix86_frame frame;
+
+  if (! reload_completed || frame_pointer_needed)
+    return 0;
+
+  /* Don't allow more than 32k pop, since that's all we can do
+     with one instruction.  */
+  if (crtl->args.pops_args && crtl->args.size >= 32768)
+    return 0;
+
+  ix86_compute_frame_layout (&frame);
+  return (frame.stack_pointer_offset == UNITS_PER_WORD
+	  && (frame.nregs + frame.nsseregs) == 0);
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may
+   be accessed via the stack pointer) in functions that seem suitable.  */
+
+static bool
+ix86_frame_pointer_required (void)
+{
+  /* If we accessed previous frames, then the generated code expects
+     to be able to access the saved ebp value in our frame.  */
+  if (cfun->machine->accesses_prev_frame)
+    return true;
+
+  /* Several x86 os'es need a frame pointer for other reasons,
+     usually pertaining to setjmp.  */
+  if (SUBTARGET_FRAME_POINTER_REQUIRED)
+    return true;
+
+  /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
+  if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
+    return true;
+
+  /* Win64 SEH, very large frames need a frame-pointer as maximum stack
+     allocation is 4GB.  */
+  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
+    return true;
+
+  /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
+     turns off the frame pointer by default.  Turn it back on now if
+     we've not got a leaf function.  */
+  if (TARGET_OMIT_LEAF_FRAME_POINTER
+      && (!crtl->is_leaf
+	  || ix86_current_function_calls_tls_descriptor))
+    return true;
+
+  if (crtl->profile && !flag_fentry)
+    return true;
+
+  return false;
+}
+
+/* Record that the current function accesses previous call frames.  */
+
+void
+ix86_setup_frame_addresses (void)
+{
+  cfun->machine->accesses_prev_frame = 1;
+}
+
+#ifndef USE_HIDDEN_LINKONCE
+# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
+#  define USE_HIDDEN_LINKONCE 1
+# else
+#  define USE_HIDDEN_LINKONCE 0
+# endif
+#endif
+
+static int pic_labels_used;
+
+/* Fills in the label name that should be used for a pc thunk for
+   the given register.  */
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+  gcc_assert (!TARGET_64BIT);
+
+  if (USE_HIDDEN_LINKONCE)
+    sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
+  else
+    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
+}
+
+
+/* This function generates code for -fpic that loads %ebx with
+   the return address of the caller and then returns.  */
+
+static void
+ix86_code_end (void)
+{
+  rtx xops[2];
+  int regno;
+
+  for (regno = AX_REG; regno <= SP_REG; regno++)
+    {
+      char name[32];
+      tree decl;
+
+      if (!(pic_labels_used & (1 << regno)))
+	continue;
+
+      get_pc_thunk_name (name, regno);
+
+      decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+			 get_identifier (name),
+			 build_function_type_list (void_type_node, NULL_TREE));
+      DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				       NULL_TREE, void_type_node);
+      TREE_PUBLIC (decl) = 1;
+      TREE_STATIC (decl) = 1;
+      DECL_IGNORED_P (decl) = 1;
+
+#if TARGET_MACHO
+      if (TARGET_MACHO)
+	{
+	  switch_to_section (darwin_sections[text_coal_section]);
+	  fputs ("\t.weak_definition\t", asm_out_file);
+	  assemble_name (asm_out_file, name);
+	  fputs ("\n\t.private_extern\t", asm_out_file);
+	  assemble_name (asm_out_file, name);
+	  putc ('\n', asm_out_file);
+	  ASM_OUTPUT_LABEL (asm_out_file, name);
+	  DECL_WEAK (decl) = 1;
+	}
+      else
+#endif
+      if (USE_HIDDEN_LINKONCE)
+	{
+	  DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+
+	  targetm.asm_out.unique_section (decl, 0);
+	  switch_to_section (get_named_section (decl, NULL, 0));
+
+	  targetm.asm_out.globalize_label (asm_out_file, name);
+	  fputs ("\t.hidden\t", asm_out_file);
+	  assemble_name (asm_out_file, name);
+	  putc ('\n', asm_out_file);
+	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+	}
+      else
+	{
+	  switch_to_section (text_section);
+	  ASM_OUTPUT_LABEL (asm_out_file, name);
+	}
+
+      DECL_INITIAL (decl) = make_node (BLOCK);
+      current_function_decl = decl;
+      init_function_start (decl);
+      first_function_block_is_cold = false;
+      /* Make sure unwind info is emitted for the thunk if needed.  */
+      final_start_function (emit_barrier (), asm_out_file, 1);
+
+      /* Pad stack IP move with 4 instructions (two NOPs count
+	 as one instruction).  */
+      if (TARGET_PAD_SHORT_FUNCTION)
+	{
+	  int i = 8;
+
+	  while (i--)
+	    fputs ("\tnop\n", asm_out_file);
+	}
+
+      xops[0] = gen_rtx_REG (Pmode, regno);
+      xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+      output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
+      fputs ("\tret\n", asm_out_file);
+      final_end_function ();
+      init_insn_lengths ();
+      free_after_compilation (cfun);
+      set_cfun (NULL);
+      current_function_decl = NULL;
+    }
+
+  if (flag_split_stack)
+    file_end_indicate_split_stack ();
+}
+
+/* Emit code for the SET_GOT patterns.  */
+
+const char *
+output_set_got (rtx dest, rtx label)
+{
+  rtx xops[3];
+
+  xops[0] = dest;
+
+  if (TARGET_VXWORKS_RTP && flag_pic)
+    {
+      /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
+      xops[2] = gen_rtx_MEM (Pmode,
+			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
+      output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+
+      /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
+	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
+	 an unadorned address.  */
+      xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
+      output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
+      return "";
+    }
+
+  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+
+  if (!flag_pic)
+    {
+      if (TARGET_MACHO)
+	/* We don't need a pic base, we're not producing pic.  */
+	gcc_unreachable ();
+
+      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+      output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
+      targetm.asm_out.internal_label (asm_out_file, "L",
+				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+    }
+  else
+    {
+      char name[32];
+      get_pc_thunk_name (name, REGNO (dest));
+      pic_labels_used |= 1 << REGNO (dest);
+
+      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+      xops[2] = gen_rtx_MEM (QImode, xops[2]);
+      output_asm_insn ("call\t%X2", xops);
+
+#if TARGET_MACHO
+      /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
+         This is what will be referenced by the Mach-O PIC subsystem.  */
+      if (machopic_should_output_picbase_label () || !label)
+	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
+
+      /* When we are restoring the pic base at the site of a nonlocal label,
+         and we decided to emit the pic base above, we will still output a
+         local label used for calculating the correction offset (even though
+         the offset will be 0 in that case).  */
+      if (label)
+        targetm.asm_out.internal_label (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (label));
+#endif
+    }
+
+  if (!TARGET_MACHO)
+    output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
+
+  return "";
+}
+
+/* Generate an "push" pattern for input ARG.  */
+
+static rtx
+gen_push (rtx arg)
+{
+  struct machine_function *m = cfun->machine;
+
+  if (m->fs.cfa_reg == stack_pointer_rtx)
+    m->fs.cfa_offset += UNITS_PER_WORD;
+  m->fs.sp_offset += UNITS_PER_WORD;
+
+  if (REG_P (arg) && GET_MODE (arg) != word_mode)
+    arg = gen_rtx_REG (word_mode, REGNO (arg));
+
+  return gen_rtx_SET (VOIDmode,
+		      gen_rtx_MEM (word_mode,
+				   gen_rtx_PRE_DEC (Pmode,
+						    stack_pointer_rtx)),
+		      arg);
+}
+
+/* Generate an "pop" pattern for input ARG.  */
+
+static rtx
+gen_pop (rtx arg)
+{
+  if (REG_P (arg) && GET_MODE (arg) != word_mode)
+    arg = gen_rtx_REG (word_mode, REGNO (arg));
+
+  return gen_rtx_SET (VOIDmode,
+		      arg,
+		      gen_rtx_MEM (word_mode,
+				   gen_rtx_POST_INC (Pmode,
+						     stack_pointer_rtx)));
+}
+
+/* Return >= 0 if there is an unused call-clobbered register available
+   for the entire function.  */
+
+static unsigned int
+ix86_select_alt_pic_regnum (void)
+{
+  if (crtl->is_leaf
+      && !crtl->profile
+      && !ix86_current_function_calls_tls_descriptor)
+    {
+      int i, drap;
+      /* Can't use the same register for both PIC and DRAP.  */
+      if (crtl->drap_reg)
+	drap = REGNO (crtl->drap_reg);
+      else
+	drap = -1;
+      for (i = 2; i >= 0; --i)
+        if (i != drap && !df_regs_ever_live_p (i))
+	  return i;
+    }
+
+  return INVALID_REGNUM;
+}
+
+/* Return TRUE if we need to save REGNO.  */
+
+static bool
+ix86_save_reg (unsigned int regno, bool maybe_eh_return)
+{
+  if (pic_offset_table_rtx
+      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
+      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
+	  || crtl->profile
+	  || crtl->calls_eh_return
+	  || crtl->uses_const_pool
+	  || cfun->has_nonlocal_label))
+    return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
+
+  if (crtl->calls_eh_return && maybe_eh_return)
+    {
+      unsigned i;
+      for (i = 0; ; i++)
+	{
+	  unsigned test = EH_RETURN_DATA_REGNO (i);
+	  if (test == INVALID_REGNUM)
+	    break;
+	  if (test == regno)
+	    return true;
+	}
+    }
+
+  if (crtl->drap_reg
+      && regno == REGNO (crtl->drap_reg)
+      && !cfun->machine->no_drap_save_restore)
+    return true;
+
+  return (df_regs_ever_live_p (regno)
+	  && !call_used_regs[regno]
+	  && !fixed_regs[regno]
+	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
+}
+
+/* Return number of saved general prupose registers.  */
+
+static int
+ix86_nsaved_regs (void)
+{
+  int nregs = 0;
+  int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      nregs ++;
+  return nregs;
+}
+
+/* Return number of saved SSE registrers.  */
+
+static int
+ix86_nsaved_sseregs (void)
+{
+  int nregs = 0;
+  int regno;
+
+  if (!TARGET_64BIT_MS_ABI)
+    return 0;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      nregs ++;
+  return nregs;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  If stack alignment is needed, we can only replace argument
+   pointer with hard frame pointer, or replace frame pointer with stack
+   pointer.  Otherwise, frame pointer elimination is automatically
+   handled and all other eliminations are valid.  */
+
+static bool
+ix86_can_eliminate (const int from, const int to)
+{
+  if (stack_realign_fp)
+    return ((from == ARG_POINTER_REGNUM
+	     && to == HARD_FRAME_POINTER_REGNUM)
+	    || (from == FRAME_POINTER_REGNUM
+		&& to == STACK_POINTER_REGNUM));
+  else
+    return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+ix86_initial_elimination_offset (int from, int to)
+{
+  struct ix86_frame frame;
+  ix86_compute_frame_layout (&frame);
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return frame.hard_frame_pointer_offset;
+  else if (from == FRAME_POINTER_REGNUM
+	   && to == HARD_FRAME_POINTER_REGNUM)
+    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
+  else
+    {
+      gcc_assert (to == STACK_POINTER_REGNUM);
+
+      if (from == ARG_POINTER_REGNUM)
+	return frame.stack_pointer_offset;
+
+      gcc_assert (from == FRAME_POINTER_REGNUM);
+      return frame.stack_pointer_offset - frame.frame_pointer_offset;
+    }
+}
+
+/* In a dynamically-aligned function, we can't know the offset from
+   stack pointer to frame pointer, so we must ensure that setjmp
+   eliminates fp against the hard fp (%ebp) rather than trying to
+   index from %esp up to the top of the frame across a gap that is
+   of unknown (at compile-time) size.  */
+static rtx
+ix86_builtin_setjmp_frame_value (void)
+{
+  return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
+}
+
+/* When using -fsplit-stack, the allocation routines set a field in
+   the TCB to the bottom of the stack plus this much space, measured
+   in bytes.  */
+
+#define SPLIT_STACK_AVAILABLE 256
+
+/* Fill structure ix86_frame about frame of currently computed function.  */
+
+static void
+ix86_compute_frame_layout (struct ix86_frame *frame)
+{
+  unsigned HOST_WIDE_INT stack_alignment_needed;
+  HOST_WIDE_INT offset;
+  unsigned HOST_WIDE_INT preferred_alignment;
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT to_allocate;
+
+  frame->nregs = ix86_nsaved_regs ();
+  frame->nsseregs = ix86_nsaved_sseregs ();
+
+  stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
+  preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+
+  /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
+     function prologues and leaf.  */
+  if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
+      && (!crtl->is_leaf || cfun->calls_alloca != 0
+          || ix86_current_function_calls_tls_descriptor))
+    {
+      preferred_alignment = 16;
+      stack_alignment_needed = 16;
+      crtl->preferred_stack_boundary = 128;
+      crtl->stack_alignment_needed = 128;
+    }
+
+  gcc_assert (!size || stack_alignment_needed);
+  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
+  gcc_assert (preferred_alignment <= stack_alignment_needed);
+
+  /* For SEH we have to limit the amount of code movement into the prologue.
+     At present we do this via a BLOCKAGE, at which point there's very little
+     scheduling that can be done, which means that there's very little point
+     in doing anything except PUSHs.  */
+  if (TARGET_SEH)
+    cfun->machine->use_fast_prologue_epilogue = false;
+
+  /* During reload iteration the amount of registers saved can change.
+     Recompute the value as needed.  Do not recompute when amount of registers
+     didn't change as reload does multiple calls to the function and does not
+     expect the decision to change within single iteration.  */
+  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
+           && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+    {
+      int count = frame->nregs;
+      struct cgraph_node *node = cgraph_get_node (current_function_decl);
+
+      cfun->machine->use_fast_prologue_epilogue_nregs = count;
+
+      /* The fast prologue uses move instead of push to save registers.  This
+         is significantly longer, but also executes faster as modern hardware
+         can execute the moves in parallel, but can't do that for push/pop.
+
+	 Be careful about choosing what prologue to emit:  When function takes
+	 many instructions to execute we may use slow version as well as in
+	 case function is known to be outside hot spot (this is known with
+	 feedback only).  Weight the size of function by number of registers
+	 to save as it is cheap to use one or two push instructions but very
+	 slow to use many of them.  */
+      if (count)
+	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
+      if (node->frequency < NODE_FREQUENCY_NORMAL
+	  || (flag_branch_probabilities
+	      && node->frequency < NODE_FREQUENCY_HOT))
+        cfun->machine->use_fast_prologue_epilogue = false;
+      else
+        cfun->machine->use_fast_prologue_epilogue
+	   = !expensive_function_p (count);
+    }
+
+  frame->save_regs_using_mov
+    = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
+       /* If static stack checking is enabled and done with probes,
+	  the registers need to be saved before allocating the frame.  */
+       && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
+
+  /* Skip return address.  */
+  offset = UNITS_PER_WORD;
+
+  /* Skip pushed static chain.  */
+  if (ix86_static_chain_on_stack)
+    offset += UNITS_PER_WORD;
+
+  /* Skip saved base pointer.  */
+  if (frame_pointer_needed)
+    offset += UNITS_PER_WORD;
+  frame->hfp_save_offset = offset;
+
+  /* The traditional frame pointer location is at the top of the frame.  */
+  frame->hard_frame_pointer_offset = offset;
+
+  /* Register save area */
+  offset += frame->nregs * UNITS_PER_WORD;
+  frame->reg_save_offset = offset;
+
+  /* On SEH target, registers are pushed just before the frame pointer
+     location.  */
+  if (TARGET_SEH)
+    frame->hard_frame_pointer_offset = offset;
+
+  /* Align and set SSE register save area.  */
+  if (frame->nsseregs)
+    {
+      /* The only ABI that has saved SSE registers (Win64) also has a
+         16-byte aligned default stack, and thus we don't need to be
+	 within the re-aligned local stack frame to save them.  */
+      gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
+      offset = (offset + 16 - 1) & -16;
+      offset += frame->nsseregs * 16;
+    }
+  frame->sse_reg_save_offset = offset;
+
+  /* The re-aligned stack starts here.  Values before this point are not
+     directly comparable with values below this point.  In order to make
+     sure that no value happens to be the same before and after, force
+     the alignment computation below to add a non-zero value.  */
+  if (stack_realign_fp)
+    offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
+
+  /* Va-arg area */
+  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+  offset += frame->va_arg_size;
+
+  /* Align start of frame for local function.  */
+  if (stack_realign_fp
+      || offset != frame->sse_reg_save_offset
+      || size != 0
+      || !crtl->is_leaf
+      || cfun->calls_alloca
+      || ix86_current_function_calls_tls_descriptor)
+    offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
+
+  /* Frame pointer points here.  */
+  frame->frame_pointer_offset = offset;
+
+  offset += size;
+
+  /* Add outgoing arguments area.  Can be skipped if we eliminated
+     all the function calls as dead code.
+     Skipping is however impossible when function calls alloca.  Alloca
+     expander assumes that last crtl->outgoing_args_size
+     of stack frame are unused.  */
+  if (ACCUMULATE_OUTGOING_ARGS
+      && (!crtl->is_leaf || cfun->calls_alloca
+	  || ix86_current_function_calls_tls_descriptor))
+    {
+      offset += crtl->outgoing_args_size;
+      frame->outgoing_arguments_size = crtl->outgoing_args_size;
+    }
+  else
+    frame->outgoing_arguments_size = 0;
+
+  /* Align stack boundary.  Only needed if we're calling another function
+     or using alloca.  */
+  if (!crtl->is_leaf || cfun->calls_alloca
+      || ix86_current_function_calls_tls_descriptor)
+    offset = (offset + preferred_alignment - 1) & -preferred_alignment;
+
+  /* We've reached end of stack frame.  */
+  frame->stack_pointer_offset = offset;
+
+  /* Size prologue needs to allocate.  */
+  to_allocate = offset - frame->sse_reg_save_offset;
+
+  if ((!to_allocate && frame->nregs <= 1)
+      || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
+    frame->save_regs_using_mov = false;
+
+  if (ix86_using_red_zone ()
+      && crtl->sp_is_unchanging
+      && crtl->is_leaf
+      && !ix86_current_function_calls_tls_descriptor)
+    {
+      frame->red_zone_size = to_allocate;
+      if (frame->save_regs_using_mov)
+	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
+	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
+    }
+  else
+    frame->red_zone_size = 0;
+  frame->stack_pointer_offset -= frame->red_zone_size;
+
+  /* The SEH frame pointer location is near the bottom of the frame.
+     This is enforced by the fact that the difference between the
+     stack pointer and the frame pointer is limited to 240 bytes in
+     the unwind data structure.  */
+  if (TARGET_SEH)
+    {
+      HOST_WIDE_INT diff;
+
+      /* If we can leave the frame pointer where it is, do so.  Also, returns
+	 the establisher frame for __builtin_frame_address (0).  */
+      diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
+      if (diff <= SEH_MAX_FRAME_SIZE
+	  && (diff > 240 || (diff & 15) != 0)
+	  && !crtl->accesses_prior_frames)
+	{
+	  /* Ideally we'd determine what portion of the local stack frame
+	     (within the constraint of the lowest 240) is most heavily used.
+	     But without that complication, simply bias the frame pointer
+	     by 128 bytes so as to maximize the amount of the local stack
+	     frame that is addressable with 8-bit offsets.  */
+	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
+	}
+    }
+}
+
+/* This is semi-inlined memory_address_length, but simplified
+   since we know that we're always dealing with reg+offset, and
+   to avoid having to create and discard all that rtl.  */
+
+static inline int
+choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
+{
+  int len = 4;
+
+  if (offset == 0)
+    {
+      /* EBP and R13 cannot be encoded without an offset.  */
+      len = (regno == BP_REG || regno == R13_REG);
+    }
+  else if (IN_RANGE (offset, -128, 127))
+    len = 1;
+
+  /* ESP and R12 must be encoded with a SIB byte.  */
+  if (regno == SP_REG || regno == R12_REG)
+    len++;
+
+  return len;
+}
+
+/* Return an RTX that points to CFA_OFFSET within the stack frame.
+   The valid base registers are taken from CFUN->MACHINE->FS.  */
+
+static rtx
+choose_baseaddr (HOST_WIDE_INT cfa_offset)
+{
+  const struct machine_function *m = cfun->machine;
+  rtx base_reg = NULL;
+  HOST_WIDE_INT base_offset = 0;
+
+  if (m->use_fast_prologue_epilogue)
+    {
+      /* Choose the base register most likely to allow the most scheduling
+         opportunities.  Generally FP is valid throughout the function,
+         while DRAP must be reloaded within the epilogue.  But choose either
+         over the SP due to increased encoding size.  */
+
+      if (m->fs.fp_valid)
+	{
+	  base_reg = hard_frame_pointer_rtx;
+	  base_offset = m->fs.fp_offset - cfa_offset;
+	}
+      else if (m->fs.drap_valid)
+	{
+	  base_reg = crtl->drap_reg;
+	  base_offset = 0 - cfa_offset;
+	}
+      else if (m->fs.sp_valid)
+	{
+	  base_reg = stack_pointer_rtx;
+	  base_offset = m->fs.sp_offset - cfa_offset;
+	}
+    }
+  else
+    {
+      HOST_WIDE_INT toffset;
+      int len = 16, tlen;
+
+      /* Choose the base register with the smallest address encoding.
+         With a tie, choose FP > DRAP > SP.  */
+      if (m->fs.sp_valid)
+	{
+	  base_reg = stack_pointer_rtx;
+	  base_offset = m->fs.sp_offset - cfa_offset;
+          len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
+	}
+      if (m->fs.drap_valid)
+	{
+	  toffset = 0 - cfa_offset;
+	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
+	  if (tlen <= len)
+	    {
+	      base_reg = crtl->drap_reg;
+	      base_offset = toffset;
+	      len = tlen;
+	    }
+	}
+      if (m->fs.fp_valid)
+	{
+	  toffset = m->fs.fp_offset - cfa_offset;
+	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
+	  if (tlen <= len)
+	    {
+	      base_reg = hard_frame_pointer_rtx;
+	      base_offset = toffset;
+	      len = tlen;
+	    }
+	}
+    }
+  gcc_assert (base_reg != NULL);
+
+  return plus_constant (Pmode, base_reg, base_offset);
+}
+
+/* Emit code to save registers in the prologue.  */
+
+static void
+ix86_emit_save_regs (void)
+{
+  unsigned int regno;
+  rtx insn;
+
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      {
+	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
+	RTX_FRAME_RELATED_P (insn) = 1;
+      }
+}
+
+/* Emit a single register save at CFA - CFA_OFFSET.  */
+
+static void
+ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
+			      HOST_WIDE_INT cfa_offset)
+{
+  struct machine_function *m = cfun->machine;
+  rtx reg = gen_rtx_REG (mode, regno);
+  rtx mem, addr, base, insn;
+
+  addr = choose_baseaddr (cfa_offset);
+  mem = gen_frame_mem (mode, addr);
+
+  /* For SSE saves, we need to indicate the 128-bit alignment.  */
+  set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
+
+  insn = emit_move_insn (mem, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  base = addr;
+  if (GET_CODE (base) == PLUS)
+    base = XEXP (base, 0);
+  gcc_checking_assert (REG_P (base));
+
+  /* When saving registers into a re-aligned local stack frame, avoid
+     any tricky guessing by dwarf2out.  */
+  if (m->fs.realigned)
+    {
+      gcc_checking_assert (stack_realign_drap);
+
+      if (regno == REGNO (crtl->drap_reg))
+	{
+	  /* A bit of a hack.  We force the DRAP register to be saved in
+	     the re-aligned stack frame, which provides us with a copy
+	     of the CFA that will last past the prologue.  Install it.  */
+	  gcc_checking_assert (cfun->machine->fs.fp_valid);
+	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
+				cfun->machine->fs.fp_offset - cfa_offset);
+	  mem = gen_rtx_MEM (mode, addr);
+	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
+	}
+      else
+	{
+	  /* The frame pointer is a stable reference within the
+	     aligned frame.  Use it.  */
+	  gcc_checking_assert (cfun->machine->fs.fp_valid);
+	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
+				cfun->machine->fs.fp_offset - cfa_offset);
+	  mem = gen_rtx_MEM (mode, addr);
+	  add_reg_note (insn, REG_CFA_EXPRESSION,
+			gen_rtx_SET (VOIDmode, mem, reg));
+	}
+    }
+
+  /* The memory may not be relative to the current CFA register,
+     which means that we may need to generate a new pattern for
+     use by the unwind info.  */
+  else if (base != m->fs.cfa_reg)
+    {
+      addr = plus_constant (Pmode, m->fs.cfa_reg,
+			    m->fs.cfa_offset - cfa_offset);
+      mem = gen_rtx_MEM (mode, addr);
+      add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
+    }
+}
+
+/* Emit code to save registers using MOV insns.
+   First register is stored at CFA - CFA_OFFSET.  */
+static void
+ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      {
+        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Emit code to save SSE registers using MOV insns.
+   First register is stored at CFA - CFA_OFFSET.  */
+static void
+ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      {
+	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
+	cfa_offset -= 16;
+      }
+}
+
+static GTY(()) rtx queued_cfa_restores;
+
+/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
+   manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
+   Don't add the note if the previously saved value will be left untouched
+   within stack red-zone till return, as unwinders can find the same value
+   in the register and on the stack.  */
+
+static void
+ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
+{
+  if (!crtl->shrink_wrapped
+      && cfa_offset <= cfun->machine->fs.red_zone_offset)
+    return;
+
+  if (insn)
+    {
+      add_reg_note (insn, REG_CFA_RESTORE, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    queued_cfa_restores
+      = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
+}
+
+/* Add queued REG_CFA_RESTORE notes if any to INSN.  */
+
+static void
+ix86_add_queued_cfa_restore_notes (rtx insn)
+{
+  rtx last;
+  if (!queued_cfa_restores)
+    return;
+  for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
+    ;
+  XEXP (last, 1) = REG_NOTES (insn);
+  REG_NOTES (insn) = queued_cfa_restores;
+  queued_cfa_restores = NULL_RTX;
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Expand prologue or epilogue stack adjustment.
+   The pattern exist to put a dependency on all ebp-based memory accesses.
+   STYLE should be negative if instructions should be marked as frame related,
+   zero if %r11 register is live and cannot be freely used and positive
+   otherwise.  */
+
+static void
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
+			   int style, bool set_cfa)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn;
+  bool add_frame_related_expr = false;
+
+  if (Pmode == SImode)
+    insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
+  else if (x86_64_immediate_operand (offset, DImode))
+    insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
+  else
+    {
+      rtx tmp;
+      /* r11 is used by indirect sibcall return as well, set before the
+	 epilogue and used after the epilogue.  */
+      if (style)
+        tmp = gen_rtx_REG (DImode, R11_REG);
+      else
+	{
+	  gcc_assert (src != hard_frame_pointer_rtx
+		      && dest != hard_frame_pointer_rtx);
+	  tmp = hard_frame_pointer_rtx;
+	}
+      insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
+      if (style < 0)
+	add_frame_related_expr = true;
+
+      insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
+    }
+
+  insn = emit_insn (insn);
+  if (style >= 0)
+    ix86_add_queued_cfa_restore_notes (insn);
+
+  if (set_cfa)
+    {
+      rtx r;
+
+      gcc_assert (m->fs.cfa_reg == src);
+      m->fs.cfa_offset += INTVAL (offset);
+      m->fs.cfa_reg = dest;
+
+      r = gen_rtx_PLUS (Pmode, src, offset);
+      r = gen_rtx_SET (VOIDmode, dest, r);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else if (style < 0)
+    {
+      RTX_FRAME_RELATED_P (insn) = 1;
+      if (add_frame_related_expr)
+	{
+	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
+	  r = gen_rtx_SET (VOIDmode, dest, r);
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
+	}
+    }
+
+  if (dest == stack_pointer_rtx)
+    {
+      HOST_WIDE_INT ooffset = m->fs.sp_offset;
+      bool valid = m->fs.sp_valid;
+
+      if (src == hard_frame_pointer_rtx)
+	{
+	  valid = m->fs.fp_valid;
+	  ooffset = m->fs.fp_offset;
+	}
+      else if (src == crtl->drap_reg)
+	{
+	  valid = m->fs.drap_valid;
+	  ooffset = 0;
+	}
+      else
+	{
+	  /* Else there are two possibilities: SP itself, which we set
+	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
+	     taken care of this by hand along the eh_return path.  */
+	  gcc_checking_assert (src == stack_pointer_rtx
+			       || offset == const0_rtx);
+	}
+
+      m->fs.sp_offset = ooffset - INTVAL (offset);
+      m->fs.sp_valid = valid;
+    }
+}
+
+/* Find an available register to be used as dynamic realign argument
+   pointer regsiter.  Such a register will be written in prologue and
+   used in begin of body, so it must not be
+	1. parameter passing register.
+	2. GOT pointer.
+   We reuse static-chain register if it is available.  Otherwise, we
+   use DI for i386 and R13 for x86-64.  We chose R13 since it has
+   shorter encoding.
+
+   Return: the regno of chosen register.  */
+
+static unsigned int
+find_drap_reg (void)
+{
+  tree decl = cfun->decl;
+
+  if (TARGET_64BIT)
+    {
+      /* Use R13 for nested function or function need static chain.
+	 Since function with tail call may use any caller-saved
+	 registers in epilogue, DRAP must not use caller-saved
+	 register in such case.  */
+      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
+	return R13_REG;
+
+      return R10_REG;
+    }
+  else
+    {
+      /* Use DI for nested function or function need static chain.
+	 Since function with tail call may use any caller-saved
+	 registers in epilogue, DRAP must not use caller-saved
+	 register in such case.  */
+      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
+	return DI_REG;
+
+      /* Reuse static chain register if it isn't used for parameter
+         passing.  */
+      if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
+	{
+	  unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
+	  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
+	    return CX_REG;
+	}
+      return DI_REG;
+    }
+}
+
+/* Return minimum incoming stack alignment.  */
+
+static unsigned int
+ix86_minimum_incoming_stack_boundary (bool sibcall)
+{
+  unsigned int incoming_stack_boundary;
+
+  /* Prefer the one specified at command line. */
+  if (ix86_user_incoming_stack_boundary)
+    incoming_stack_boundary = ix86_user_incoming_stack_boundary;
+  /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
+     if -mstackrealign is used, it isn't used for sibcall check and
+     estimated stack alignment is 128bit.  */
+  else if (!sibcall
+	   && !TARGET_64BIT
+	   && ix86_force_align_arg_pointer
+	   && crtl->stack_alignment_estimated == 128)
+    incoming_stack_boundary = MIN_STACK_BOUNDARY;
+  else
+    incoming_stack_boundary = ix86_default_incoming_stack_boundary;
+
+  /* Incoming stack alignment can be changed on individual functions
+     via force_align_arg_pointer attribute.  We use the smallest
+     incoming stack boundary.  */
+  if (incoming_stack_boundary > MIN_STACK_BOUNDARY
+      && lookup_attribute (ix86_force_align_arg_pointer_string,
+			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    incoming_stack_boundary = MIN_STACK_BOUNDARY;
+
+  /* The incoming stack frame has to be aligned at least at
+     parm_stack_boundary.  */
+  if (incoming_stack_boundary < crtl->parm_stack_boundary)
+    incoming_stack_boundary = crtl->parm_stack_boundary;
+
+  /* Stack at entrance of main is aligned by runtime.  We use the
+     smallest incoming stack boundary. */
+  if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
+      && DECL_NAME (current_function_decl)
+      && MAIN_NAME_P (DECL_NAME (current_function_decl))
+      && DECL_FILE_SCOPE_P (current_function_decl))
+    incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+
+  return incoming_stack_boundary;
+}
+
+/* Update incoming stack boundary and estimated stack alignment.  */
+
+static void
+ix86_update_stack_boundary (void)
+{
+  ix86_incoming_stack_boundary
+    = ix86_minimum_incoming_stack_boundary (false);
+
+  /* x86_64 vararg needs 16byte stack alignment for register save
+     area.  */
+  if (TARGET_64BIT
+      && cfun->stdarg
+      && crtl->stack_alignment_estimated < 128)
+    crtl->stack_alignment_estimated = 128;
+}
+
+/* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
+   needed or an rtx for DRAP otherwise.  */
+
+static rtx
+ix86_get_drap_rtx (void)
+{
+  if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
+    crtl->need_drap = true;
+
+  if (stack_realign_drap)
+    {
+      /* Assign DRAP to vDRAP and returns vDRAP */
+      unsigned int regno = find_drap_reg ();
+      rtx drap_vreg;
+      rtx arg_ptr;
+      rtx seq, insn;
+
+      arg_ptr = gen_rtx_REG (Pmode, regno);
+      crtl->drap_reg = arg_ptr;
+
+      start_sequence ();
+      drap_vreg = copy_to_reg (arg_ptr);
+      seq = get_insns ();
+      end_sequence ();
+
+      insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
+      if (!optimize)
+	{
+	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      return drap_vreg;
+    }
+  else
+    return NULL;
+}
+
+/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
+
+static rtx
+ix86_internal_arg_pointer (void)
+{
+  return virtual_incoming_args_rtx;
+}
+
+struct scratch_reg {
+  rtx reg;
+  bool saved;
+};
+
+/* Return a short-lived scratch register for use on function entry.
+   In 32-bit mode, it is valid only after the registers are saved
+   in the prologue.  This register must be released by means of
+   release_scratch_register_on_entry once it is dead.  */
+
+static void
+get_scratch_register_on_entry (struct scratch_reg *sr)
+{
+  int regno;
+
+  sr->saved = false;
+
+  if (TARGET_64BIT)
+    {
+      /* We always use R11 in 64-bit mode.  */
+      regno = R11_REG;
+    }
+  else
+    {
+      tree decl = current_function_decl, fntype = TREE_TYPE (decl);
+      bool fastcall_p
+	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+      bool thiscall_p
+	= lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+      bool static_chain_p = DECL_STATIC_CHAIN (decl);
+      int regparm = ix86_function_regparm (fntype, decl);
+      int drap_regno
+	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
+
+      /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
+	  for the static chain register.  */
+      if ((regparm < 1 || (fastcall_p && !static_chain_p))
+	  && drap_regno != AX_REG)
+	regno = AX_REG;
+      /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
+	  for the static chain register.  */
+      else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
+        regno = AX_REG;
+      else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
+	regno = DX_REG;
+      /* ecx is the static chain register.  */
+      else if (regparm < 3 && !fastcall_p && !thiscall_p
+	       && !static_chain_p
+	       && drap_regno != CX_REG)
+	regno = CX_REG;
+      else if (ix86_save_reg (BX_REG, true))
+	regno = BX_REG;
+      /* esi is the static chain register.  */
+      else if (!(regparm == 3 && static_chain_p)
+	       && ix86_save_reg (SI_REG, true))
+	regno = SI_REG;
+      else if (ix86_save_reg (DI_REG, true))
+	regno = DI_REG;
+      else
+	{
+	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
+	  sr->saved = true;
+	}
+    }
+
+  sr->reg = gen_rtx_REG (Pmode, regno);
+  if (sr->saved)
+    {
+      rtx insn = emit_insn (gen_push (sr->reg));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Release a scratch register obtained from the preceding function.  */
+
+static void
+release_scratch_register_on_entry (struct scratch_reg *sr)
+{
+  if (sr->saved)
+    {
+      struct machine_function *m = cfun->machine;
+      rtx x, insn = emit_insn (gen_pop (sr->reg));
+
+      /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop.  */
+      RTX_FRAME_RELATED_P (insn) = 1;
+      x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
+      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+      m->fs.sp_offset -= UNITS_PER_WORD;
+    }
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Emit code to adjust the stack pointer by SIZE bytes while probing it.  */
+
+static void
+ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
+{
+  /* We skip the probe for the first interval + a small dope of 4 words and
+     probe that many bytes past the specified size to maintain a protection
+     area at the botton of the stack.  */
+  const int dope = 4 * UNITS_PER_WORD;
+  rtx size_rtx = GEN_INT (size), last;
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  The run-time loop is made up of 11 insns in the
+     generic case while the compile-time loop is made up of 3+2*(n-1) insns
+     for n # of intervals.  */
+  if (size <= 5 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i, adjust;
+      bool first_probe = true;
+
+      /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
+	 values of N from 1 until it exceeds SIZE.  If only one probe is
+	 needed, this will not generate any code.  Then adjust and probe
+	 to PROBE_INTERVAL + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  if (first_probe)
+	    {
+	      adjust = 2 * PROBE_INTERVAL + dope;
+	      first_probe = false;
+	    }
+	  else
+	    adjust = PROBE_INTERVAL;
+
+	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				  plus_constant (Pmode, stack_pointer_rtx,
+						 -adjust)));
+	  emit_stack_probe (stack_pointer_rtx);
+	}
+
+      if (first_probe)
+	adjust = size + PROBE_INTERVAL + dope;
+      else
+        adjust = size + PROBE_INTERVAL - i;
+
+      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     -adjust)));
+      emit_stack_probe (stack_pointer_rtx);
+
+      /* Adjust back to account for the additional first interval.  */
+      last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    PROBE_INTERVAL + dope)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      struct scratch_reg sr;
+
+      get_scratch_register_on_entry (&sr);
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* SP = SP_0 + PROBE_INTERVAL.  */
+      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     - (PROBE_INTERVAL + dope))));
+
+      /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
+      emit_move_insn (sr.reg, GEN_INT (-rounded_size));
+      emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
+			      gen_rtx_PLUS (Pmode, sr.reg,
+					    stack_pointer_rtx)));
+
+
+      /* Step 3: the loop
+
+	 while (SP != LAST_ADDR)
+	   {
+	     SP = SP + PROBE_INTERVAL
+	     probe at SP
+	   }
+
+	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
+	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
+
+
+      /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
+	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			          plus_constant (Pmode, stack_pointer_rtx,
+						 rounded_size - size)));
+	  emit_stack_probe (stack_pointer_rtx);
+	}
+
+      /* Adjust back to account for the additional first interval.  */
+      last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    PROBE_INTERVAL + dope)));
+
+      release_scratch_register_on_entry (&sr);
+    }
+
+  gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
+
+  /* Even if the stack pointer isn't the CFA register, we need to correctly
+     describe the adjustments made to it, in particular differentiate the
+     frame-related ones from the frame-unrelated ones.  */
+  if (size > 0)
+    {
+      rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
+      XVECEXP (expr, 0, 0)
+	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		       plus_constant (Pmode, stack_pointer_rtx, -size));
+      XVECEXP (expr, 0, 1)
+	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		       plus_constant (Pmode, stack_pointer_rtx,
+				      PROBE_INTERVAL + dope + size));
+      add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
+      RTX_FRAME_RELATED_P (last) = 1;
+
+      cfun->machine->fs.sp_offset += size;
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Adjust the stack pointer up to REG while probing it.  */
+
+const char *
+output_adjust_stack_and_probe (rtx reg)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if SP == LAST_ADDR.  */
+  xops[0] = stack_pointer_rtx;
+  xops[1] = reg;
+  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+  fputs ("\tje\t", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* SP = SP + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (PROBE_INTERVAL);
+  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+  /* Probe at SP.  */
+  xops[1] = const0_rtx;
+  output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
+
+  fprintf (asm_out_file, "\tjmp\t");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  The run-time loop is made up of 7 insns in the
+     generic case while the compile-time loop is made up of n insns for n #
+     of intervals.  */
+  if (size <= 7 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i;
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+	 it exceeds SIZE.  If only one probe is needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					 -(first + i)));
+
+      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+				       -(first + size)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size, last;
+      struct scratch_reg sr;
+
+      get_scratch_register_on_entry (&sr);
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_OFFSET = FIRST.  */
+      emit_move_insn (sr.reg, GEN_INT (-first));
+
+      /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
+      last = first + rounded_size;
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+         probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+         until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (Pmode,
+					 gen_rtx_PLUS (Pmode,
+						       stack_pointer_rtx,
+						       sr.reg),
+					 rounded_size - size));
+
+      release_scratch_register_on_entry (&sr);
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG to END, inclusive.  These are
+   offsets from the current stack pointer.  */
+
+const char *
+output_probe_stack_range (rtx reg, rtx end)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[3];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg;
+  xops[1] = end;
+  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+  fputs ("\tje\t", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (PROBE_INTERVAL);
+  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+  /* Probe at TEST_ADDR.  */
+  xops[0] = stack_pointer_rtx;
+  xops[1] = reg;
+  xops[2] = const0_rtx;
+  output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
+
+  fprintf (asm_out_file, "\tjmp\t");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
+   to be generated in correct form.  */
+static void
+ix86_finalize_stack_realign_flags (void)
+{
+  /* Check if stack realign is really needed after reload, and
+     stores result in cfun */
+  unsigned int incoming_stack_boundary
+    = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
+       ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
+  unsigned int stack_realign = (incoming_stack_boundary
+				< (crtl->is_leaf
+				   ? crtl->max_used_stack_slot_alignment
+				   : crtl->stack_alignment_needed));
+
+  if (crtl->stack_realign_finalized)
+    {
+      /* After stack_realign_needed is finalized, we can't no longer
+	 change it.  */
+      gcc_assert (crtl->stack_realign_needed == stack_realign);
+      return;
+    }
+
+  /* If the only reason for frame_pointer_needed is that we conservatively
+     assumed stack realignment might be needed, but in the end nothing that
+     needed the stack alignment had been spilled, clear frame_pointer_needed
+     and say we don't need stack realignment.  */
+  if (stack_realign
+      && frame_pointer_needed
+      && crtl->is_leaf
+      && flag_omit_frame_pointer
+      && crtl->sp_is_unchanging
+      && !ix86_current_function_calls_tls_descriptor
+      && !crtl->accesses_prior_frames
+      && !cfun->calls_alloca
+      && !crtl->calls_eh_return
+      && !(flag_stack_check && STACK_CHECK_MOVING_SP)
+      && !ix86_frame_pointer_required ()
+      && get_frame_size () == 0
+      && ix86_nsaved_sseregs () == 0
+      && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
+    {
+      HARD_REG_SET set_up_by_prologue, prologue_used;
+      basic_block bb;
+
+      CLEAR_HARD_REG_SET (prologue_used);
+      CLEAR_HARD_REG_SET (set_up_by_prologue);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode,
+			   HARD_FRAME_POINTER_REGNUM);
+      FOR_EACH_BB_FN (bb, cfun)
+        {
+          rtx insn;
+	  FOR_BB_INSNS (bb, insn)
+	    if (NONDEBUG_INSN_P (insn)
+		&& requires_stack_frame_p (insn, prologue_used,
+					   set_up_by_prologue))
+	      {
+		crtl->stack_realign_needed = stack_realign;
+		crtl->stack_realign_finalized = true;
+		return;
+	      }
+	}
+
+      /* If drap has been set, but it actually isn't live at the start
+	 of the function, there is no reason to set it up.  */
+      if (crtl->drap_reg)
+	{
+	  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+	  if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+	    {
+	      crtl->drap_reg = NULL_RTX;
+	      crtl->need_drap = false;
+	    }
+	}
+      else
+	cfun->machine->no_drap_save_restore = true;
+
+      frame_pointer_needed = false;
+      stack_realign = false;
+      crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
+      crtl->stack_alignment_needed = incoming_stack_boundary;
+      crtl->stack_alignment_estimated = incoming_stack_boundary;
+      if (crtl->preferred_stack_boundary > incoming_stack_boundary)
+	crtl->preferred_stack_boundary = incoming_stack_boundary;
+      df_finish_pass (true);
+      df_scan_alloc (NULL);
+      df_scan_blocks ();
+      df_compute_regs_ever_live (true);
+      df_analyze ();
+    }
+
+  crtl->stack_realign_needed = stack_realign;
+  crtl->stack_realign_finalized = true;
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+ix86_expand_prologue (void)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn, t;
+  bool pic_reg_used;
+  struct ix86_frame frame;
+  HOST_WIDE_INT allocate;
+  bool int_registers_saved;
+  bool sse_registers_saved;
+
+  ix86_finalize_stack_realign_flags ();
+
+  /* DRAP should not coexist with stack_realign_fp */
+  gcc_assert (!(crtl->drap_reg && stack_realign_fp));
+
+  memset (&m->fs, 0, sizeof (m->fs));
+
+  /* Initialize CFA state for before the prologue.  */
+  m->fs.cfa_reg = stack_pointer_rtx;
+  m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
+
+  /* Track SP offset to the CFA.  We continue tracking this after we've
+     swapped the CFA register away from SP.  In the case of re-alignment
+     this is fudged; we're interested to offsets within the local frame.  */
+  m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+  m->fs.sp_valid = true;
+
+  ix86_compute_frame_layout (&frame);
+
+  if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
+    {
+      /* We should have already generated an error for any use of
+         ms_hook on a nested function.  */
+      gcc_checking_assert (!ix86_static_chain_on_stack);
+
+      /* Check if profiling is active and we shall use profiling before
+         prologue variant. If so sorry.  */
+      if (crtl->profile && flag_fentry != 0)
+        sorry ("ms_hook_prologue attribute isn%'t compatible "
+	       "with -mfentry for 32-bit");
+
+      /* In ix86_asm_output_function_label we emitted:
+	 8b ff     movl.s %edi,%edi
+	 55        push   %ebp
+	 8b ec     movl.s %esp,%ebp
+
+	 This matches the hookable function prologue in Win32 API
+	 functions in Microsoft Windows XP Service Pack 2 and newer.
+	 Wine uses this to enable Windows apps to hook the Win32 API
+	 functions provided by Wine.
+
+	 What that means is that we've already set up the frame pointer.  */
+
+      if (frame_pointer_needed
+	  && !(crtl->drap_reg && crtl->stack_realign_needed))
+	{
+	  rtx push, mov;
+
+	  /* We've decided to use the frame pointer already set up.
+	     Describe this to the unwinder by pretending that both
+	     push and mov insns happen right here.
+
+	     Putting the unwind info here at the end of the ms_hook
+	     is done so that we can make absolutely certain we get
+	     the required byte sequence at the start of the function,
+	     rather than relying on an assembler that can produce
+	     the exact encoding required.
+
+	     However it does mean (in the unpatched case) that we have
+	     a 1 insn window where the asynchronous unwind info is
+	     incorrect.  However, if we placed the unwind info at
+	     its correct location we would have incorrect unwind info
+	     in the patched case.  Which is probably all moot since
+	     I don't expect Wine generates dwarf2 unwind info for the
+	     system libraries that use this feature.  */
+
+	  insn = emit_insn (gen_blockage ());
+
+	  push = gen_push (hard_frame_pointer_rtx);
+	  mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+			     stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (push) = 1;
+	  RTX_FRAME_RELATED_P (mov) = 1;
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
+
+	  /* Note that gen_push incremented m->fs.cfa_offset, even
+	     though we didn't emit the push insn here.  */
+	  m->fs.cfa_reg = hard_frame_pointer_rtx;
+	  m->fs.fp_offset = m->fs.cfa_offset;
+	  m->fs.fp_valid = true;
+	}
+      else
+	{
+	  /* The frame pointer is not needed so pop %ebp again.
+	     This leaves us with a pristine state.  */
+	  emit_insn (gen_pop (hard_frame_pointer_rtx));
+	}
+    }
+
+  /* The first insn of a function that accepts its static chain on the
+     stack is to push the register that would be filled in by a direct
+     call.  This insn will be skipped by the trampoline.  */
+  else if (ix86_static_chain_on_stack)
+    {
+      insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
+      emit_insn (gen_blockage ());
+
+      /* We don't want to interpret this push insn as a register save,
+	 only as a stack adjustment.  The real copy of the register as
+	 a save will be done later, if needed.  */
+      t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
+      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Emit prologue code to adjust stack alignment and setup DRAP, in case
+     of DRAP is needed and stack realignment is really needed after reload */
+  if (stack_realign_drap)
+    {
+      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+
+      /* Only need to push parameter pointer reg if it is caller saved.  */
+      if (!call_used_regs[REGNO (crtl->drap_reg)])
+	{
+	  /* Push arg pointer reg */
+	  insn = emit_insn (gen_push (crtl->drap_reg));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* Grab the argument pointer.  */
+      t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
+      insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      m->fs.cfa_reg = crtl->drap_reg;
+      m->fs.cfa_offset = 0;
+
+      /* Align the stack.  */
+      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+					stack_pointer_rtx,
+					GEN_INT (-align_bytes)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Replicate the return address on the stack so that return
+	 address can be reached via (argp - 1) slot.  This is needed
+	 to implement macro RETURN_ADDR_RTX and intrinsic function
+	 expand_builtin_return_addr etc.  */
+      t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
+      t = gen_frame_mem (word_mode, t);
+      insn = emit_insn (gen_push (t));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* For the purposes of frame and register save area addressing,
+	 we've started over with a new frame.  */
+      m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+      m->fs.realigned = true;
+    }
+
+  int_registers_saved = (frame.nregs == 0);
+  sse_registers_saved = (frame.nsseregs == 0);
+
+  if (frame_pointer_needed && !m->fs.fp_valid)
+    {
+      /* Note: AT&T enter does NOT have reversed args.  Enter is probably
+         slower on all targets.  Also sdb doesn't like it.  */
+      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Push registers now, before setting the frame pointer
+	 on SEH target.  */
+      if (!int_registers_saved
+	  && TARGET_SEH
+	  && !frame.save_regs_using_mov)
+	{
+	  ix86_emit_save_regs ();
+	  int_registers_saved = true;
+	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+	}
+
+      if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
+	{
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (m->fs.cfa_reg == stack_pointer_rtx)
+	    m->fs.cfa_reg = hard_frame_pointer_rtx;
+	  m->fs.fp_offset = m->fs.sp_offset;
+	  m->fs.fp_valid = true;
+	}
+    }
+
+  if (!int_registers_saved)
+    {
+      /* If saving registers via PUSH, do so now.  */
+      if (!frame.save_regs_using_mov)
+	{
+	  ix86_emit_save_regs ();
+	  int_registers_saved = true;
+	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+	}
+
+      /* When using red zone we may start register saving before allocating
+	 the stack frame saving one cycle of the prologue.  However, avoid
+	 doing this if we have to probe the stack; at least on x86_64 the
+	 stack probe can turn into a call that clobbers a red zone location. */
+      else if (ix86_using_red_zone ()
+	       && (! TARGET_STACK_PROBE
+		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+	{
+	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+	  int_registers_saved = true;
+	}
+    }
+
+  if (stack_realign_fp)
+    {
+      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+      gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
+
+      /* The computation of the size of the re-aligned stack frame means
+	 that we must allocate the size of the register save area before
+	 performing the actual alignment.  Otherwise we cannot guarantee
+	 that there's enough storage above the realignment point.  */
+      if (m->fs.sp_offset != frame.sse_reg_save_offset)
+        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (m->fs.sp_offset
+					    - frame.sse_reg_save_offset),
+				   -1, false);
+
+      /* Align the stack.  */
+      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+					stack_pointer_rtx,
+					GEN_INT (-align_bytes)));
+
+      /* For the purposes of register save area addressing, the stack
+         pointer is no longer valid.  As for the value of sp_offset,
+	 see ix86_compute_frame_layout, which we need to match in order
+	 to pass verification of stack_pointer_offset at the end.  */
+      m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
+      m->fs.sp_valid = false;
+    }
+
+  allocate = frame.stack_pointer_offset - m->fs.sp_offset;
+
+  if (flag_stack_usage_info)
+    {
+      /* We start to count from ARG_POINTER.  */
+      HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
+
+      /* If it was realigned, take into account the fake frame.  */
+      if (stack_realign_drap)
+	{
+	  if (ix86_static_chain_on_stack)
+	    stack_size += UNITS_PER_WORD;
+
+	  if (!call_used_regs[REGNO (crtl->drap_reg)])
+	    stack_size += UNITS_PER_WORD;
+
+	  /* This over-estimates by 1 minimal-stack-alignment-unit but
+	     mitigates that by counting in the new return address slot.  */
+	  current_function_dynamic_stack_size
+	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
+	}
+
+      current_function_static_stack_size = stack_size;
+    }
+
+  /* On SEH target with very large frame size, allocate an area to save
+     SSE registers (as the very large allocation won't be described).  */
+  if (TARGET_SEH
+      && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+      && !sse_registers_saved)
+    {
+      HOST_WIDE_INT sse_size =
+	frame.sse_reg_save_offset - frame.reg_save_offset;
+
+      gcc_assert (int_registers_saved);
+
+      /* No need to do stack checking as the area will be immediately
+	 written.  */
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			         GEN_INT (-sse_size), -1,
+				 m->fs.cfa_reg == stack_pointer_rtx);
+      allocate -= sse_size;
+      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+      sse_registers_saved = true;
+    }
+
+  /* The stack has already been decremented by the instruction calling us
+     so probe if the size is non-negative to preserve the protection area.  */
+  if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      /* We expect the registers to be saved when probes are used.  */
+      gcc_assert (int_registers_saved);
+
+      if (STACK_CHECK_MOVING_SP)
+	{
+	  if (!(crtl->is_leaf && !cfun->calls_alloca
+		&& allocate <= PROBE_INTERVAL))
+	    {
+	      ix86_adjust_stack_and_probe (allocate);
+	      allocate = 0;
+	    }
+	}
+      else
+	{
+	  HOST_WIDE_INT size = allocate;
+
+	  if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
+	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+
+	  if (TARGET_STACK_PROBE)
+	    {
+	      if (crtl->is_leaf && !cfun->calls_alloca)
+		{
+		  if (size > PROBE_INTERVAL)
+		    ix86_emit_probe_stack_range (0, size);
+		}
+	      else
+		ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+	    }
+	  else
+	    {
+	      if (crtl->is_leaf && !cfun->calls_alloca)
+		{
+		  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+		    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
+						 size - STACK_CHECK_PROTECT);
+		}
+	      else
+		ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	    }
+	}
+    }
+
+  if (allocate == 0)
+    ;
+  else if (!ix86_target_stack_probe ()
+	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
+    {
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			         GEN_INT (-allocate), -1,
+			         m->fs.cfa_reg == stack_pointer_rtx);
+    }
+  else
+    {
+      rtx eax = gen_rtx_REG (Pmode, AX_REG);
+      rtx r10 = NULL;
+      rtx (*adjust_stack_insn)(rtx, rtx, rtx);
+      const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
+      bool eax_live = ix86_eax_live_at_start_p ();
+      bool r10_live = false;
+
+      if (TARGET_64BIT)
+        r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
+
+      if (eax_live)
+	{
+	  insn = emit_insn (gen_push (eax));
+	  allocate -= UNITS_PER_WORD;
+	  /* Note that SEH directives need to continue tracking the stack
+	     pointer even after the frame pointer has been set up.  */
+	  if (sp_is_cfa_reg || TARGET_SEH)
+	    {
+	      if (sp_is_cfa_reg)
+		m->fs.cfa_offset += UNITS_PER_WORD;
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+      if (r10_live)
+	{
+	  r10 = gen_rtx_REG (Pmode, R10_REG);
+	  insn = emit_insn (gen_push (r10));
+	  allocate -= UNITS_PER_WORD;
+	  if (sp_is_cfa_reg || TARGET_SEH)
+	    {
+	      if (sp_is_cfa_reg)
+		m->fs.cfa_offset += UNITS_PER_WORD;
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+      emit_move_insn (eax, GEN_INT (allocate));
+      emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
+
+      /* Use the fact that AX still contains ALLOCATE.  */
+      adjust_stack_insn = (Pmode == DImode
+			   ? gen_pro_epilogue_adjust_stack_di_sub
+			   : gen_pro_epilogue_adjust_stack_si_sub);
+
+      insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
+					   stack_pointer_rtx, eax));
+
+      if (sp_is_cfa_reg || TARGET_SEH)
+	{
+	  if (sp_is_cfa_reg)
+	    m->fs.cfa_offset += allocate;
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    -allocate)));
+	}
+      m->fs.sp_offset += allocate;
+
+      /* Use stack_pointer_rtx for relative addressing so that code
+	 works for realigned stack, too.  */
+      if (r10_live && eax_live)
+        {
+	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
+	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
+			  gen_frame_mem (word_mode, t));
+	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
+	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
+			  gen_frame_mem (word_mode, t));
+	}
+      else if (eax_live || r10_live)
+	{
+	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
+	  emit_move_insn (gen_rtx_REG (word_mode,
+				       (eax_live ? AX_REG : R10_REG)),
+			  gen_frame_mem (word_mode, t));
+	}
+    }
+  gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
+
+  /* If we havn't already set up the frame pointer, do so now.  */
+  if (frame_pointer_needed && !m->fs.fp_valid)
+    {
+      insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (frame.stack_pointer_offset
+				     - frame.hard_frame_pointer_offset));
+      insn = emit_insn (insn);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
+
+      if (m->fs.cfa_reg == stack_pointer_rtx)
+	m->fs.cfa_reg = hard_frame_pointer_rtx;
+      m->fs.fp_offset = frame.hard_frame_pointer_offset;
+      m->fs.fp_valid = true;
+    }
+
+  if (!int_registers_saved)
+    ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+  if (!sse_registers_saved)
+    ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+
+  pic_reg_used = false;
+  /* We don't use pic-register for pe-coff target.  */
+  if (pic_offset_table_rtx
+      && !TARGET_PECOFF
+      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
+	  || crtl->profile))
+    {
+      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
+
+      if (alt_pic_reg_used != INVALID_REGNUM)
+	SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
+
+      pic_reg_used = true;
+    }
+
+  if (pic_reg_used)
+    {
+      if (TARGET_64BIT)
+	{
+	  if (ix86_cmodel == CM_LARGE_PIC)
+	    {
+	      rtx label, tmp_reg;
+
+	      gcc_assert (Pmode == DImode);
+	      label = gen_label_rtx ();
+	      emit_label (label);
+	      LABEL_PRESERVE_P (label) = 1;
+	      tmp_reg = gen_rtx_REG (Pmode, R11_REG);
+	      gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
+	      insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
+						   label));
+	      insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
+	      insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
+					       pic_offset_table_rtx, tmp_reg));
+	    }
+	  else
+            insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+	}
+      else
+	{
+          insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
+	}
+    }
+
+  /* In the pic_reg_used case, make sure that the got load isn't deleted
+     when mcount needs it.  Blockage to avoid call movement across mcount
+     call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
+     note.  */
+  if (crtl->profile && !flag_fentry && pic_reg_used)
+    emit_insn (gen_prologue_use (pic_offset_table_rtx));
+
+  if (crtl->drap_reg && !crtl->stack_realign_needed)
+    {
+      /* vDRAP is setup but after reload it turns out stack realign
+         isn't necessary, here we will emit prologue to setup DRAP
+         without stack realign adjustment */
+      t = choose_baseaddr (0);
+      emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
+    }
+
+  /* Prevent instructions from being scheduled into register save push
+     sequence when access to the redzone area is done through frame pointer.
+     The offset between the frame pointer and the stack pointer is calculated
+     relative to the value of the stack pointer at the end of the function
+     prologue, and moving instructions that access redzone area via frame
+     pointer inside push sequence violates this assumption.  */
+  if (frame_pointer_needed && frame.red_zone_size)
+    emit_insn (gen_memory_blockage ());
+
+  /* Emit cld instruction if stringops are used in the function.  */
+  if (TARGET_CLD && ix86_current_function_needs_cld)
+    emit_insn (gen_cld ());
+
+  /* SEH requires that the prologue end within 256 bytes of the start of
+     the function.  Prevent instruction schedules that would extend that.
+     Further, prevent alloca modifications to the stack pointer from being
+     combined with prologue modifications.  */
+  if (TARGET_SEH)
+    emit_insn (gen_prologue_use (stack_pointer_rtx));
+}
+
+/* Emit code to restore REG using a POP insn.  */
+
+static void
+ix86_emit_restore_reg_using_pop (rtx reg)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn = emit_insn (gen_pop (reg));
+
+  ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
+  m->fs.sp_offset -= UNITS_PER_WORD;
+
+  if (m->fs.cfa_reg == crtl->drap_reg
+      && REGNO (reg) == REGNO (crtl->drap_reg))
+    {
+      /* Previously we'd represented the CFA as an expression
+	 like *(%ebp - 8).  We've just popped that value from
+	 the stack, which means we need to reset the CFA to
+	 the drap register.  This will remain until we restore
+	 the stack pointer.  */
+      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* This means that the DRAP register is valid for addressing too.  */
+      m->fs.drap_valid = true;
+      return;
+    }
+
+  if (m->fs.cfa_reg == stack_pointer_rtx)
+    {
+      rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
+      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      m->fs.cfa_offset -= UNITS_PER_WORD;
+    }
+
+  /* When the frame pointer is the CFA, and we pop it, we are
+     swapping back to the stack pointer as the CFA.  This happens
+     for stack frames that don't allocate other data, so we assume
+     the stack pointer is now pointing at the return address, i.e.
+     the function entry state, which makes the offset be 1 word.  */
+  if (reg == hard_frame_pointer_rtx)
+    {
+      m->fs.fp_valid = false;
+      if (m->fs.cfa_reg == hard_frame_pointer_rtx)
+	{
+	  m->fs.cfa_reg = stack_pointer_rtx;
+	  m->fs.cfa_offset -= UNITS_PER_WORD;
+
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				      GEN_INT (m->fs.cfa_offset)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Emit code to restore saved registers using POP insns.  */
+
+static void
+ix86_emit_restore_regs_using_pop (void)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
+      ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
+}
+
+/* Emit code and notes for the LEAVE instruction.  */
+
+static void
+ix86_emit_leave (void)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn = emit_insn (ix86_gen_leave ());
+
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  gcc_assert (m->fs.fp_valid);
+  m->fs.sp_valid = true;
+  m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
+  m->fs.fp_valid = false;
+
+  if (m->fs.cfa_reg == hard_frame_pointer_rtx)
+    {
+      m->fs.cfa_reg = stack_pointer_rtx;
+      m->fs.cfa_offset = m->fs.sp_offset;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (Pmode, stack_pointer_rtx,
+				   m->fs.sp_offset));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
+			     m->fs.fp_offset);
+}
+
+/* Emit code to restore saved registers using MOV insns.
+   First register is restored from CFA - CFA_OFFSET.  */
+static void
+ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
+				  bool maybe_eh_return)
+{
+  struct machine_function *m = cfun->machine;
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+      {
+	rtx reg = gen_rtx_REG (word_mode, regno);
+	rtx insn, mem;
+
+	mem = choose_baseaddr (cfa_offset);
+	mem = gen_frame_mem (word_mode, mem);
+	insn = emit_move_insn (reg, mem);
+
+        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+	  {
+	    /* Previously we'd represented the CFA as an expression
+	       like *(%ebp - 8).  We've just popped that value from
+	       the stack, which means we need to reset the CFA to
+	       the drap register.  This will remain until we restore
+	       the stack pointer.  */
+	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	    /* This means that the DRAP register is valid for addressing.  */
+	    m->fs.drap_valid = true;
+	  }
+	else
+	  ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
+
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Emit code to restore saved registers using MOV insns.
+   First register is restored from CFA - CFA_OFFSET.  */
+static void
+ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
+				      bool maybe_eh_return)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+      {
+	rtx reg = gen_rtx_REG (V4SFmode, regno);
+	rtx mem;
+
+	mem = choose_baseaddr (cfa_offset);
+	mem = gen_rtx_MEM (V4SFmode, mem);
+	set_mem_align (mem, 128);
+	emit_move_insn (reg, mem);
+
+	ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
+
+	cfa_offset -= 16;
+      }
+}
+
+/* Restore function stack, frame, and registers.  */
+
+void
+ix86_expand_epilogue (int style)
+{
+  struct machine_function *m = cfun->machine;
+  struct machine_frame_state frame_state_save = m->fs;
+  struct ix86_frame frame;
+  bool restore_regs_via_mov;
+  bool using_drap;
+
+  ix86_finalize_stack_realign_flags ();
+  ix86_compute_frame_layout (&frame);
+
+  m->fs.sp_valid = (!frame_pointer_needed
+		    || (crtl->sp_is_unchanging
+			&& !stack_realign_fp));
+  gcc_assert (!m->fs.sp_valid
+	      || m->fs.sp_offset == frame.stack_pointer_offset);
+
+  /* The FP must be valid if the frame pointer is present.  */
+  gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+  gcc_assert (!m->fs.fp_valid
+	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
+
+  /* We must have *some* valid pointer to the stack frame.  */
+  gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
+
+  /* The DRAP is never valid at this point.  */
+  gcc_assert (!m->fs.drap_valid);
+
+  /* See the comment about red zone and frame
+     pointer usage in ix86_expand_prologue.  */
+  if (frame_pointer_needed && frame.red_zone_size)
+    emit_insn (gen_memory_blockage ());
+
+  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
+  gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
+
+  /* Determine the CFA offset of the end of the red-zone.  */
+  m->fs.red_zone_offset = 0;
+  if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
+    {
+      /* The red-zone begins below the return address.  */
+      m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
+
+      /* When the register save area is in the aligned portion of
+         the stack, determine the maximum runtime displacement that
+	 matches up with the aligned frame.  */
+      if (stack_realign_drap)
+	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
+				  + UNITS_PER_WORD);
+    }
+
+  /* Special care must be taken for the normal return case of a function
+     using eh_return: the eax and edx registers are marked as saved, but
+     not restored along this path.  Adjust the save location to match.  */
+  if (crtl->calls_eh_return && style != 2)
+    frame.reg_save_offset -= 2 * UNITS_PER_WORD;
+
+  /* EH_RETURN requires the use of moves to function properly.  */
+  if (crtl->calls_eh_return)
+    restore_regs_via_mov = true;
+  /* SEH requires the use of pops to identify the epilogue.  */
+  else if (TARGET_SEH)
+    restore_regs_via_mov = false;
+  /* If we're only restoring one register and sp is not valid then
+     using a move instruction to restore the register since it's
+     less work than reloading sp and popping the register.  */
+  else if (!m->fs.sp_valid && frame.nregs <= 1)
+    restore_regs_via_mov = true;
+  else if (TARGET_EPILOGUE_USING_MOVE
+	   && cfun->machine->use_fast_prologue_epilogue
+	   && (frame.nregs > 1
+	       || m->fs.sp_offset != frame.reg_save_offset))
+    restore_regs_via_mov = true;
+  else if (frame_pointer_needed
+	   && !frame.nregs
+	   && m->fs.sp_offset != frame.reg_save_offset)
+    restore_regs_via_mov = true;
+  else if (frame_pointer_needed
+	   && TARGET_USE_LEAVE
+	   && cfun->machine->use_fast_prologue_epilogue
+	   && frame.nregs == 1)
+    restore_regs_via_mov = true;
+  else
+    restore_regs_via_mov = false;
+
+  if (restore_regs_via_mov || frame.nsseregs)
+    {
+      /* Ensure that the entire register save area is addressable via
+	 the stack pointer, if we will restore via sp.  */
+      if (TARGET_64BIT
+	  && m->fs.sp_offset > 0x7fffffff
+	  && !(m->fs.fp_valid || m->fs.drap_valid)
+	  && (frame.nsseregs + frame.nregs) != 0)
+	{
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (m->fs.sp_offset
+					      - frame.sse_reg_save_offset),
+				     style,
+				     m->fs.cfa_reg == stack_pointer_rtx);
+	}
+    }
+
+  /* If there are any SSE registers to restore, then we have to do it
+     via moves, since there's obviously no pop for SSE regs.  */
+  if (frame.nsseregs)
+    ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
+					  style == 2);
+
+  if (restore_regs_via_mov)
+    {
+      rtx t;
+
+      if (frame.nregs)
+	ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
+
+      /* eh_return epilogues need %ecx added to the stack pointer.  */
+      if (style == 2)
+	{
+	  rtx insn, sa = EH_RETURN_STACKADJ_RTX;
+
+	  /* Stack align doesn't work with eh_return.  */
+	  gcc_assert (!stack_realign_drap);
+	  /* Neither does regparm nested functions.  */
+	  gcc_assert (!ix86_static_chain_on_stack);
+
+	  if (frame_pointer_needed)
+	    {
+	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+	      t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
+	      emit_insn (gen_rtx_SET (VOIDmode, sa, t));
+
+	      t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
+	      insn = emit_move_insn (hard_frame_pointer_rtx, t);
+
+	      /* Note that we use SA as a temporary CFA, as the return
+		 address is at the proper place relative to it.  We
+		 pretend this happens at the FP restore insn because
+		 prior to this insn the FP would be stored at the wrong
+		 offset relative to SA, and after this insn we have no
+		 other reasonable register to use for the CFA.  We don't
+		 bother resetting the CFA to the SP for the duration of
+		 the return insn.  */
+	      add_reg_note (insn, REG_CFA_DEF_CFA,
+			    plus_constant (Pmode, sa, UNITS_PER_WORD));
+	      ix86_add_queued_cfa_restore_notes (insn);
+	      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      m->fs.cfa_reg = sa;
+	      m->fs.cfa_offset = UNITS_PER_WORD;
+	      m->fs.fp_valid = false;
+
+	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
+					 const0_rtx, style, false);
+	    }
+	  else
+	    {
+	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+	      t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
+	      ix86_add_queued_cfa_restore_notes (insn);
+
+	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+	      if (m->fs.cfa_offset != UNITS_PER_WORD)
+		{
+		  m->fs.cfa_offset = UNITS_PER_WORD;
+		  add_reg_note (insn, REG_CFA_DEF_CFA,
+				plus_constant (Pmode, stack_pointer_rtx,
+					       UNITS_PER_WORD));
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		}
+	    }
+	  m->fs.sp_offset = UNITS_PER_WORD;
+	  m->fs.sp_valid = true;
+	}
+    }
+  else
+    {
+      /* SEH requires that the function end with (1) a stack adjustment
+	 if necessary, (2) a sequence of pops, and (3) a return or
+	 jump instruction.  Prevent insns from the function body from
+	 being scheduled into this sequence.  */
+      if (TARGET_SEH)
+	{
+	  /* Prevent a catch region from being adjacent to the standard
+	     epilogue sequence.  Unfortuantely crtl->uses_eh_lsda nor
+	     several other flags that would be interesting to test are
+	     not yet set up.  */
+	  if (flag_non_call_exceptions)
+	    emit_insn (gen_nops (const1_rtx));
+	  else
+	    emit_insn (gen_blockage ());
+	}
+
+      /* First step is to deallocate the stack frame so that we can
+	 pop the registers.  Also do it on SEH target for very large
+	 frame as the emitted instructions aren't allowed by the ABI in
+	 epilogues.  */
+      if (!m->fs.sp_valid
+ 	  || (TARGET_SEH
+	      && (m->fs.sp_offset - frame.reg_save_offset
+		  >= SEH_MAX_FRAME_SIZE)))
+	{
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
+				     GEN_INT (m->fs.fp_offset
+					      - frame.reg_save_offset),
+				     style, false);
+	}
+      else if (m->fs.sp_offset != frame.reg_save_offset)
+	{
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (m->fs.sp_offset
+					      - frame.reg_save_offset),
+				     style,
+				     m->fs.cfa_reg == stack_pointer_rtx);
+	}
+
+      ix86_emit_restore_regs_using_pop ();
+    }
+
+  /* If we used a stack pointer and haven't already got rid of it,
+     then do so now.  */
+  if (m->fs.fp_valid)
+    {
+      /* If the stack pointer is valid and pointing at the frame
+	 pointer store address, then we only need a pop.  */
+      if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
+	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+      /* Leave results in shorter dependency chains on CPUs that are
+	 able to grok it fast.  */
+      else if (TARGET_USE_LEAVE
+	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
+	       || !cfun->machine->use_fast_prologue_epilogue)
+	ix86_emit_leave ();
+      else
+        {
+	  pro_epilogue_adjust_stack (stack_pointer_rtx,
+				     hard_frame_pointer_rtx,
+				     const0_rtx, style, !using_drap);
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+        }
+    }
+
+  if (using_drap)
+    {
+      int param_ptr_offset = UNITS_PER_WORD;
+      rtx insn;
+
+      gcc_assert (stack_realign_drap);
+
+      if (ix86_static_chain_on_stack)
+	param_ptr_offset += UNITS_PER_WORD;
+      if (!call_used_regs[REGNO (crtl->drap_reg)])
+	param_ptr_offset += UNITS_PER_WORD;
+
+      insn = emit_insn (gen_rtx_SET
+			(VOIDmode, stack_pointer_rtx,
+			 gen_rtx_PLUS (Pmode,
+				       crtl->drap_reg,
+				       GEN_INT (-param_ptr_offset))));
+      m->fs.cfa_reg = stack_pointer_rtx;
+      m->fs.cfa_offset = param_ptr_offset;
+      m->fs.sp_offset = param_ptr_offset;
+      m->fs.realigned = false;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				  GEN_INT (param_ptr_offset)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (!call_used_regs[REGNO (crtl->drap_reg)])
+	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
+    }
+
+  /* At this point the stack pointer must be valid, and we must have
+     restored all of the registers.  We may not have deallocated the
+     entire stack frame.  We've delayed this until now because it may
+     be possible to merge the local stack deallocation with the
+     deallocation forced by ix86_static_chain_on_stack.   */
+  gcc_assert (m->fs.sp_valid);
+  gcc_assert (!m->fs.fp_valid);
+  gcc_assert (!m->fs.realigned);
+  if (m->fs.sp_offset != UNITS_PER_WORD)
+    {
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
+				 style, true);
+    }
+  else
+    ix86_add_queued_cfa_restore_notes (get_last_insn ());
+
+  /* Sibcall epilogues don't want a return instruction.  */
+  if (style == 0)
+    {
+      m->fs = frame_state_save;
+      return;
+    }
+
+  if (crtl->args.pops_args && crtl->args.size)
+    {
+      rtx popc = GEN_INT (crtl->args.pops_args);
+
+      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
+	 address, do explicit add, and jump indirectly to the caller.  */
+
+      if (crtl->args.pops_args >= 65536)
+	{
+	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
+	  rtx insn;
+
+	  /* There is no "pascal" calling convention in any 64bit ABI.  */
+	  gcc_assert (!TARGET_64BIT);
+
+	  insn = emit_insn (gen_pop (ecx));
+	  m->fs.cfa_offset -= UNITS_PER_WORD;
+	  m->fs.sp_offset -= UNITS_PER_WORD;
+
+	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
+	  x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     popc, -1, true);
+	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
+	}
+      else
+	emit_jump_insn (gen_simple_return_pop_internal (popc));
+    }
+  else
+    emit_jump_insn (gen_simple_return_internal ());
+
+  /* Restore the state back to the state from the prologue,
+     so that it's correct for the next epilogue.  */
+  m->fs = frame_state_save;
+}
+
+/* Reset from the function's potential modifications.  */
+
+static void
+ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (pic_offset_table_rtx)
+    SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
+#if TARGET_MACHO
+  /* Mach-O doesn't support labels at the end of objects, so if
+     it looks like we might want one, insert a NOP.  */
+  {
+    rtx insn = get_last_insn ();
+    rtx deleted_debug_label = NULL_RTX;
+    while (insn
+	   && NOTE_P (insn)
+	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
+      {
+	/* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
+	   notes only, instead set their CODE_LABEL_NUMBER to -1,
+	   otherwise there would be code generation differences
+	   in between -g and -g0.  */
+	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+	  deleted_debug_label = insn;
+	insn = PREV_INSN (insn);
+      }
+    if (insn
+	&& (LABEL_P (insn)
+	    || (NOTE_P (insn)
+		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
+      fputs ("\tnop\n", file);
+    else if (deleted_debug_label)
+      for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
+	if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+	  CODE_LABEL_NUMBER (insn) = -1;
+  }
+#endif
+
+}
+
+/* Return a scratch register to use in the split stack prologue.  The
+   split stack prologue is used for -fsplit-stack.  It is the first
+   instructions in the function, even before the regular prologue.
+   The scratch register can be any caller-saved register which is not
+   used for parameters or for the static chain.  */
+
+static unsigned int
+split_stack_prologue_scratch_regno (void)
+{
+  if (TARGET_64BIT)
+    return R11_REG;
+  else
+    {
+      bool is_fastcall, is_thiscall;
+      int regparm;
+
+      is_fastcall = (lookup_attribute ("fastcall",
+				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
+		     != NULL);
+      is_thiscall = (lookup_attribute ("thiscall",
+				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
+		     != NULL);
+      regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
+
+      if (is_fastcall)
+	{
+	  if (DECL_STATIC_CHAIN (cfun->decl))
+	    {
+	      sorry ("-fsplit-stack does not support fastcall with "
+		     "nested function");
+	      return INVALID_REGNUM;
+	    }
+	  return AX_REG;
+	}
+      else if (is_thiscall)
+        {
+	  if (!DECL_STATIC_CHAIN (cfun->decl))
+	    return DX_REG;
+	  return AX_REG;
+	}
+      else if (regparm < 3)
+	{
+	  if (!DECL_STATIC_CHAIN (cfun->decl))
+	    return CX_REG;
+	  else
+	    {
+	      if (regparm >= 2)
+		{
+		  sorry ("-fsplit-stack does not support 2 register "
+			 " parameters for a nested function");
+		  return INVALID_REGNUM;
+		}
+	      return DX_REG;
+	    }
+	}
+      else
+	{
+	  /* FIXME: We could make this work by pushing a register
+	     around the addition and comparison.  */
+	  sorry ("-fsplit-stack does not support 3 register parameters");
+	  return INVALID_REGNUM;
+	}
+    }
+}
+
+/* A SYMBOL_REF for the function which allocates new stackspace for
+   -fsplit-stack.  */
+
+static GTY(()) rtx split_stack_fn;
+
+/* A SYMBOL_REF for the more stack function when using the large
+   model.  */
+
+static GTY(()) rtx split_stack_fn_large;
+
+/* Handle -fsplit-stack.  These are the first instructions in the
+   function, even before the regular prologue.  */
+
+void
+ix86_expand_split_stack_prologue (void)
+{
+  struct ix86_frame frame;
+  HOST_WIDE_INT allocate;
+  unsigned HOST_WIDE_INT args_size;
+  rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
+  rtx scratch_reg = NULL_RTX;
+  rtx varargs_label = NULL_RTX;
+  rtx fn;
+
+  gcc_assert (flag_split_stack && reload_completed);
+
+  ix86_finalize_stack_realign_flags ();
+  ix86_compute_frame_layout (&frame);
+  allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
+
+  /* This is the label we will branch to if we have enough stack
+     space.  We expect the basic block reordering pass to reverse this
+     branch if optimizing, so that we branch in the unlikely case.  */
+  label = gen_label_rtx ();
+
+  /* We need to compare the stack pointer minus the frame size with
+     the stack boundary in the TCB.  The stack boundary always gives
+     us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
+     can compare directly.  Otherwise we need to do an addition.  */
+
+  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			  UNSPEC_STACK_CHECK);
+  limit = gen_rtx_CONST (Pmode, limit);
+  limit = gen_rtx_MEM (Pmode, limit);
+  if (allocate < SPLIT_STACK_AVAILABLE)
+    current = stack_pointer_rtx;
+  else
+    {
+      unsigned int scratch_regno;
+      rtx offset;
+
+      /* We need a scratch register to hold the stack pointer minus
+	 the required frame size.  Since this is the very start of the
+	 function, the scratch register can be any caller-saved
+	 register which is not used for parameters.  */
+      offset = GEN_INT (- allocate);
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      if (scratch_regno == INVALID_REGNUM)
+	return;
+      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+      if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
+	{
+	  /* We don't use ix86_gen_add3 in this case because it will
+	     want to split to lea, but when not optimizing the insn
+	     will not be split after this point.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
+				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						offset)));
+	}
+      else
+	{
+	  emit_move_insn (scratch_reg, offset);
+	  emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
+				    stack_pointer_rtx));
+	}
+      current = scratch_reg;
+    }
+
+  ix86_expand_branch (GEU, current, limit, label);
+  jump_insn = get_last_insn ();
+  JUMP_LABEL (jump_insn) = label;
+
+  /* Mark the jump as very likely to be taken.  */
+  add_int_reg_note (jump_insn, REG_BR_PROB,
+		    REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
+
+  if (split_stack_fn == NULL_RTX)
+    split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+  fn = split_stack_fn;
+
+  /* Get more stack space.  We pass in the desired stack space and the
+     size of the arguments to copy to the new stack.  In 32-bit mode
+     we push the parameters; __morestack will return on a new stack
+     anyhow.  In 64-bit mode we pass the parameters in r10 and
+     r11.  */
+  allocate_rtx = GEN_INT (allocate);
+  args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
+  call_fusage = NULL_RTX;
+  if (TARGET_64BIT)
+    {
+      rtx reg10, reg11;
+
+      reg10 = gen_rtx_REG (Pmode, R10_REG);
+      reg11 = gen_rtx_REG (Pmode, R11_REG);
+
+      /* If this function uses a static chain, it will be in %r10.
+	 Preserve it across the call to __morestack.  */
+      if (DECL_STATIC_CHAIN (cfun->decl))
+	{
+	  rtx rax;
+
+	  rax = gen_rtx_REG (word_mode, AX_REG);
+	  emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
+	  use_reg (&call_fusage, rax);
+	}
+
+      if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
+          && !TARGET_PECOFF)
+	{
+	  HOST_WIDE_INT argval;
+
+	  gcc_assert (Pmode == DImode);
+	  /* When using the large model we need to load the address
+	     into a register, and we've run out of registers.  So we
+	     switch to a different calling convention, and we call a
+	     different function: __morestack_large.  We pass the
+	     argument size in the upper 32 bits of r10 and pass the
+	     frame size in the lower 32 bits.  */
+	  gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
+	  gcc_assert ((args_size & 0xffffffff) == args_size);
+
+	  if (split_stack_fn_large == NULL_RTX)
+	    split_stack_fn_large =
+	      gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
+
+	  if (ix86_cmodel == CM_LARGE_PIC)
+	    {
+	      rtx label, x;
+
+	      label = gen_label_rtx ();
+	      emit_label (label);
+	      LABEL_PRESERVE_P (label) = 1;
+	      emit_insn (gen_set_rip_rex64 (reg10, label));
+	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
+	      emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
+	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
+				  UNSPEC_GOT);
+	      x = gen_rtx_CONST (Pmode, x);
+	      emit_move_insn (reg11, x);
+	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
+	      x = gen_const_mem (Pmode, x);
+	      emit_move_insn (reg11, x);
+	    }
+	  else
+	    emit_move_insn (reg11, split_stack_fn_large);
+
+	  fn = reg11;
+
+	  argval = ((args_size << 16) << 16) + allocate;
+	  emit_move_insn (reg10, GEN_INT (argval));
+	}
+      else
+	{
+	  emit_move_insn (reg10, allocate_rtx);
+	  emit_move_insn (reg11, GEN_INT (args_size));
+	  use_reg (&call_fusage, reg11);
+	}
+
+      use_reg (&call_fusage, reg10);
+    }
+  else
+    {
+      emit_insn (gen_push (GEN_INT (args_size)));
+      emit_insn (gen_push (allocate_rtx));
+    }
+  call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
+				GEN_INT (UNITS_PER_WORD), constm1_rtx,
+				NULL_RTX, false);
+  add_function_usage_to (call_insn, call_fusage);
+
+  /* In order to make call/return prediction work right, we now need
+     to execute a return instruction.  See
+     libgcc/config/i386/morestack.S for the details on how this works.
+
+     For flow purposes gcc must not see this as a return
+     instruction--we need control flow to continue at the subsequent
+     label.  Therefore, we use an unspec.  */
+  gcc_assert (crtl->args.pops_args < 65536);
+  emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
+
+  /* If we are in 64-bit mode and this function uses a static chain,
+     we saved %r10 in %rax before calling _morestack.  */
+  if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
+    emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
+		    gen_rtx_REG (word_mode, AX_REG));
+
+  /* If this function calls va_start, we need to store a pointer to
+     the arguments on the old stack, because they may not have been
+     all copied to the new stack.  At this point the old stack can be
+     found at the frame pointer value used by __morestack, because
+     __morestack has set that up before calling back to us.  Here we
+     store that pointer in a scratch register, and in
+     ix86_expand_prologue we store the scratch register in a stack
+     slot.  */
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      unsigned int scratch_regno;
+      rtx frame_reg;
+      int words;
+
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+      frame_reg = gen_rtx_REG (Pmode, BP_REG);
+
+      /* 64-bit:
+	 fp -> old fp value
+	       return address within this function
+	       return address of caller of this function
+	       stack arguments
+	 So we add three words to get to the stack arguments.
+
+	 32-bit:
+	 fp -> old fp value
+	       return address within this function
+               first argument to __morestack
+               second argument to __morestack
+               return address of caller of this function
+               stack arguments
+         So we add five words to get to the stack arguments.
+      */
+      words = TARGET_64BIT ? 3 : 5;
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
+			      gen_rtx_PLUS (Pmode, frame_reg,
+					    GEN_INT (words * UNITS_PER_WORD))));
+
+      varargs_label = gen_label_rtx ();
+      emit_jump_insn (gen_jump (varargs_label));
+      JUMP_LABEL (get_last_insn ()) = varargs_label;
+
+      emit_barrier ();
+    }
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  /* If this function calls va_start, we now have to set the scratch
+     register for the case where we do not call __morestack.  In this
+     case we need to set it based on the stack pointer.  */
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    GEN_INT (UNITS_PER_WORD))));
+
+      emit_label (varargs_label);
+      LABEL_NUSES (varargs_label) = 1;
+    }
+}
+
+/* We may have to tell the dataflow pass that the split stack prologue
+   is initializing a scratch register.  */
+
+static void
+ix86_live_on_entry (bitmap regs)
+{
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      gcc_assert (flag_split_stack);
+      bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
+    }
+}
+
+/* Extract the parts of an RTL expression that is a valid memory address
+   for an instruction.  Return 0 if the structure of the address is
+   grossly off.  Return -1 if the address contains ASHIFT, so it is not
+   strictly valid, but still used for computing length of lea instruction.  */
+
+int
+ix86_decompose_address (rtx addr, struct ix86_address *out)
+{
+  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+  rtx base_reg, index_reg;
+  HOST_WIDE_INT scale = 1;
+  rtx scale_rtx = NULL_RTX;
+  rtx tmp;
+  int retval = 1;
+  enum ix86_address_seg seg = SEG_DEFAULT;
+
+  /* Allow zero-extended SImode addresses,
+     they will be emitted with addr32 prefix.  */
+  if (TARGET_64BIT && GET_MODE (addr) == DImode)
+    {
+      if (GET_CODE (addr) == ZERO_EXTEND
+	  && GET_MODE (XEXP (addr, 0)) == SImode)
+	{
+	  addr = XEXP (addr, 0);
+	  if (CONST_INT_P (addr))
+	    return 0;
+	}	      
+      else if (GET_CODE (addr) == AND
+	       && const_32bit_mask (XEXP (addr, 1), DImode))
+	{
+	  addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
+	  if (addr == NULL_RTX)
+	    return 0;
+
+	  if (CONST_INT_P (addr))
+	    return 0;
+	}
+    }
+
+  /* Allow SImode subregs of DImode addresses,
+     they will be emitted with addr32 prefix.  */
+  if (TARGET_64BIT && GET_MODE (addr) == SImode)
+    {
+      if (GET_CODE (addr) == SUBREG
+	  && GET_MODE (SUBREG_REG (addr)) == DImode)
+	{
+	  addr = SUBREG_REG (addr);
+	  if (CONST_INT_P (addr))
+	    return 0;
+	}
+    }
+
+  if (REG_P (addr))
+    base = addr;
+  else if (GET_CODE (addr) == SUBREG)
+    {
+      if (REG_P (SUBREG_REG (addr)))
+	base = addr;
+      else
+	return 0;
+    }
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rtx addends[4], op;
+      int n = 0, i;
+
+      op = addr;
+      do
+	{
+	  if (n >= 4)
+	    return 0;
+	  addends[n++] = XEXP (op, 1);
+	  op = XEXP (op, 0);
+	}
+      while (GET_CODE (op) == PLUS);
+      if (n >= 4)
+	return 0;
+      addends[n] = op;
+
+      for (i = n; i >= 0; --i)
+	{
+	  op = addends[i];
+	  switch (GET_CODE (op))
+	    {
+	    case MULT:
+	      if (index)
+		return 0;
+	      index = XEXP (op, 0);
+	      scale_rtx = XEXP (op, 1);
+	      break;
+
+	    case ASHIFT:
+	      if (index)
+		return 0;
+	      index = XEXP (op, 0);
+	      tmp = XEXP (op, 1);
+	      if (!CONST_INT_P (tmp))
+		return 0;
+	      scale = INTVAL (tmp);
+	      if ((unsigned HOST_WIDE_INT) scale > 3)
+		return 0;
+	      scale = 1 << scale;
+	      break;
+
+	    case ZERO_EXTEND:
+	      op = XEXP (op, 0);
+	      if (GET_CODE (op) != UNSPEC)
+		return 0;
+	      /* FALLTHRU */
+
+	    case UNSPEC:
+	      if (XINT (op, 1) == UNSPEC_TP
+	          && TARGET_TLS_DIRECT_SEG_REFS
+	          && seg == SEG_DEFAULT)
+		seg = DEFAULT_TLS_SEG_REG;
+	      else
+		return 0;
+	      break;
+
+	    case SUBREG:
+	      if (!REG_P (SUBREG_REG (op)))
+		return 0;
+	      /* FALLTHRU */
+
+	    case REG:
+	      if (!base)
+		base = op;
+	      else if (!index)
+		index = op;
+	      else
+		return 0;
+	      break;
+
+	    case CONST:
+	    case CONST_INT:
+	    case SYMBOL_REF:
+	    case LABEL_REF:
+	      if (disp)
+		return 0;
+	      disp = op;
+	      break;
+
+	    default:
+	      return 0;
+	    }
+	}
+    }
+  else if (GET_CODE (addr) == MULT)
+    {
+      index = XEXP (addr, 0);		/* index*scale */
+      scale_rtx = XEXP (addr, 1);
+    }
+  else if (GET_CODE (addr) == ASHIFT)
+    {
+      /* We're called for lea too, which implements ashift on occasion.  */
+      index = XEXP (addr, 0);
+      tmp = XEXP (addr, 1);
+      if (!CONST_INT_P (tmp))
+	return 0;
+      scale = INTVAL (tmp);
+      if ((unsigned HOST_WIDE_INT) scale > 3)
+	return 0;
+      scale = 1 << scale;
+      retval = -1;
+    }
+  else
+    disp = addr;			/* displacement */
+
+  if (index)
+    {
+      if (REG_P (index))
+	;
+      else if (GET_CODE (index) == SUBREG
+	       && REG_P (SUBREG_REG (index)))
+	;
+      else
+	return 0;
+    }
+
+  /* Extract the integral value of scale.  */
+  if (scale_rtx)
+    {
+      if (!CONST_INT_P (scale_rtx))
+	return 0;
+      scale = INTVAL (scale_rtx);
+    }
+
+  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
+  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
+
+  /* Avoid useless 0 displacement.  */
+  if (disp == const0_rtx && (base || index))
+    disp = NULL_RTX;
+
+  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
+  if (base_reg && index_reg && scale == 1
+      && (index_reg == arg_pointer_rtx
+	  || index_reg == frame_pointer_rtx
+	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
+    {
+      rtx tmp;
+      tmp = base, base = index, index = tmp;
+      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
+    }
+
+  /* Special case: %ebp cannot be encoded as a base without a displacement.
+     Similarly %r13.  */
+  if (!disp
+      && base_reg
+      && (base_reg == hard_frame_pointer_rtx
+	  || base_reg == frame_pointer_rtx
+	  || base_reg == arg_pointer_rtx
+	  || (REG_P (base_reg)
+	      && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
+		  || REGNO (base_reg) == R13_REG))))
+    disp = const0_rtx;
+
+  /* Special case: on K6, [%esi] makes the instruction vector decoded.
+     Avoid this by transforming to [%esi+0].
+     Reload calls address legitimization without cfun defined, so we need
+     to test cfun for being non-NULL. */
+  if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
+      && base_reg && !index_reg && !disp
+      && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
+    disp = const0_rtx;
+
+  /* Special case: encode reg+reg instead of reg*2.  */
+  if (!base && index && scale == 2)
+    base = index, base_reg = index_reg, scale = 1;
+
+  /* Special case: scaling cannot be encoded without base or displacement.  */
+  if (!base && !disp && index && scale != 1)
+    disp = const0_rtx;
+
+  out->base = base;
+  out->index = index;
+  out->disp = disp;
+  out->scale = scale;
+  out->seg = seg;
+
+  return retval;
+}
+
+/* Return cost of the memory address x.
+   For i386, it is better to use a complex address than let gcc copy
+   the address into a reg and make a new pseudo.  But not if the address
+   requires to two regs - that would mean more pseudos with longer
+   lifetimes.  */
+static int
+ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  struct ix86_address parts;
+  int cost = 1;
+  int ok = ix86_decompose_address (x, &parts);
+
+  gcc_assert (ok);
+
+  if (parts.base && GET_CODE (parts.base) == SUBREG)
+    parts.base = SUBREG_REG (parts.base);
+  if (parts.index && GET_CODE (parts.index) == SUBREG)
+    parts.index = SUBREG_REG (parts.index);
+
+  /* Attempt to minimize number of registers in the address.  */
+  if ((parts.base
+       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
+      || (parts.index
+	  && (!REG_P (parts.index)
+	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
+    cost++;
+
+  if (parts.base
+      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
+      && parts.index
+      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
+      && parts.base != parts.index)
+    cost++;
+
+  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
+     since it's predecode logic can't detect the length of instructions
+     and it degenerates to vector decoded.  Increase cost of such
+     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
+     to split such addresses or even refuse such addresses at all.
+
+     Following addressing modes are affected:
+      [base+scale*index]
+      [scale*index+disp]
+      [base+index]
+
+     The first and last case  may be avoidable by explicitly coding the zero in
+     memory address, but I don't have AMD-K6 machine handy to check this
+     theory.  */
+
+  if (TARGET_K6
+      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
+	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
+	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
+    cost += 10;
+
+  return cost;
+}
+
+/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
+   this is used for to form addresses to local data when -fPIC is in
+   use.  */
+
+static bool
+darwin_local_data_pic (rtx disp)
+{
+  return (GET_CODE (disp) == UNSPEC
+	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
+}
+
+/* Determine if a given RTX is a valid constant.  We already know this
+   satisfies CONSTANT_P.  */
+
+static bool
+ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (!CONST_INT_P (XEXP (x, 1)))
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      if (TARGET_MACHO && darwin_local_data_pic (x))
+	return true;
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (x) == UNSPEC)
+	switch (XINT (x, 1))
+	  {
+	  case UNSPEC_GOT:
+	  case UNSPEC_GOTOFF:
+	  case UNSPEC_PLTOFF:
+	    return TARGET_64BIT;
+	  case UNSPEC_TPOFF:
+	  case UNSPEC_NTPOFF:
+	    x = XVECEXP (x, 0, 0);
+	    return (GET_CODE (x) == SYMBOL_REF
+		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_DTPOFF:
+	    x = XVECEXP (x, 0, 0);
+	    return (GET_CODE (x) == SYMBOL_REF
+		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+	  default:
+	    return false;
+	  }
+
+      /* We must have drilled down to a symbol.  */
+      if (GET_CODE (x) == LABEL_REF)
+	return true;
+      if (GET_CODE (x) != SYMBOL_REF)
+	return false;
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* TLS symbols are never valid.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return false;
+
+      /* DLLIMPORT symbols are never valid.  */
+      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+	  && SYMBOL_REF_DLLIMPORT_P (x))
+	return false;
+
+#if TARGET_MACHO
+      /* mdynamic-no-pic */
+      if (MACHO_DYNAMIC_NO_PIC_P)
+	return machopic_symbol_defined_p (x);
+#endif
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == TImode
+	  && x != CONST0_RTX (TImode)
+          && !TARGET_64BIT)
+	return false;
+      break;
+
+    case CONST_VECTOR:
+      if (!standard_sse_constant_p (x))
+	return false;
+
+    default:
+      break;
+    }
+
+  /* Otherwise we handle everything else in the move patterns.  */
+  return true;
+}
+
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible for the address of thread-local symbols, which
+   is checked above.  */
+
+static bool
+ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
+{
+  /* We can always put integral constants and vectors in memory.  */
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+      return false;
+
+    default:
+      break;
+    }
+  return !ix86_legitimate_constant_p (mode, x);
+}
+
+/*  Nonzero if the symbol is marked as dllimport, or as stub-variable,
+    otherwise zero.  */
+
+static bool
+is_imported_p (rtx x)
+{
+  if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
+      || GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
+}
+
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+  rtx inner;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      inner = XEXP (x, 0);
+      if (GET_CODE (inner) == PLUS
+	  && CONST_INT_P (XEXP (inner, 1)))
+	inner = XEXP (inner, 0);
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (inner) == UNSPEC)
+	switch (XINT (inner, 1))
+	  {
+	  case UNSPEC_GOT:
+	  case UNSPEC_GOTOFF:
+	  case UNSPEC_PLTOFF:
+	    return TARGET_64BIT;
+	  case UNSPEC_TPOFF:
+	    x = XVECEXP (inner, 0, 0);
+	    return (GET_CODE (x) == SYMBOL_REF
+		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_MACHOPIC_OFFSET:
+	    return legitimate_pic_address_disp_p (x);
+	  default:
+	    return false;
+	  }
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return legitimate_pic_address_disp_p (x);
+
+    default:
+      return true;
+    }
+}
+
+/* Determine if a given CONST RTX is a valid memory displacement
+   in PIC mode.  */
+
+bool
+legitimate_pic_address_disp_p (rtx disp)
+{
+  bool saw_plus;
+
+  /* In 64bit mode we can allow direct addresses of symbols and labels
+     when they are not dynamic symbols.  */
+  if (TARGET_64BIT)
+    {
+      rtx op0 = disp, op1;
+
+      switch (GET_CODE (disp))
+	{
+	case LABEL_REF:
+	  return true;
+
+	case CONST:
+	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
+	    break;
+	  op0 = XEXP (XEXP (disp, 0), 0);
+	  op1 = XEXP (XEXP (disp, 0), 1);
+	  if (!CONST_INT_P (op1)
+	      || INTVAL (op1) >= 16*1024*1024
+	      || INTVAL (op1) < -16*1024*1024)
+            break;
+	  if (GET_CODE (op0) == LABEL_REF)
+	    return true;
+	  if (GET_CODE (op0) == CONST
+	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
+	      && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
+	    return true;
+	  if (GET_CODE (op0) == UNSPEC
+	      && XINT (op0, 1) == UNSPEC_PCREL)
+	    return true;
+	  if (GET_CODE (op0) != SYMBOL_REF)
+	    break;
+	  /* FALLTHRU */
+
+	case SYMBOL_REF:
+	  /* TLS references should always be enclosed in UNSPEC.
+	     The dllimported symbol needs always to be resolved.  */
+	  if (SYMBOL_REF_TLS_MODEL (op0)
+	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
+	    return false;
+
+	  if (TARGET_PECOFF)
+	    {
+	      if (is_imported_p (op0))
+		return true;
+
+	      if (SYMBOL_REF_FAR_ADDR_P (op0)
+		  || !SYMBOL_REF_LOCAL_P (op0))
+		break;
+
+	      /* Function-symbols need to be resolved only for
+	         large-model.
+	         For the small-model we don't need to resolve anything
+	         here.  */
+	      if ((ix86_cmodel != CM_LARGE_PIC
+	           && SYMBOL_REF_FUNCTION_P (op0))
+		  || ix86_cmodel == CM_SMALL_PIC)
+		return true;
+	      /* Non-external symbols don't need to be resolved for
+	         large, and medium-model.  */
+	      if ((ix86_cmodel == CM_LARGE_PIC
+		   || ix86_cmodel == CM_MEDIUM_PIC)
+		  && !SYMBOL_REF_EXTERNAL_P (op0))
+		return true;
+	    }
+	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
+		   && SYMBOL_REF_LOCAL_P (op0)
+		   && ix86_cmodel != CM_LARGE_PIC)
+	    return true;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+  if (GET_CODE (disp) != CONST)
+    return false;
+  disp = XEXP (disp, 0);
+
+  if (TARGET_64BIT)
+    {
+      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
+         of GOT tables.  We should not need these anyway.  */
+      if (GET_CODE (disp) != UNSPEC
+	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
+	      && XINT (disp, 1) != UNSPEC_GOTOFF
+	      && XINT (disp, 1) != UNSPEC_PCREL
+	      && XINT (disp, 1) != UNSPEC_PLTOFF))
+	return false;
+
+      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+	return false;
+      return true;
+    }
+
+  saw_plus = false;
+  if (GET_CODE (disp) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (disp, 1)))
+	return false;
+      disp = XEXP (disp, 0);
+      saw_plus = true;
+    }
+
+  if (TARGET_MACHO && darwin_local_data_pic (disp))
+    return true;
+
+  if (GET_CODE (disp) != UNSPEC)
+    return false;
+
+  switch (XINT (disp, 1))
+    {
+    case UNSPEC_GOT:
+      if (saw_plus)
+	return false;
+      /* We need to check for both symbols and labels because VxWorks loads
+	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
+	 details.  */
+      return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
+    case UNSPEC_GOTOFF:
+      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
+	 While ABI specify also 32bit relocation but we don't produce it in
+	 small PIC model at all.  */
+      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
+	  && !TARGET_64BIT)
+        return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
+      return false;
+    case UNSPEC_GOTTPOFF:
+    case UNSPEC_GOTNTPOFF:
+    case UNSPEC_INDNTPOFF:
+      if (saw_plus)
+	return false;
+      disp = XVECEXP (disp, 0, 0);
+      return (GET_CODE (disp) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
+    case UNSPEC_NTPOFF:
+      disp = XVECEXP (disp, 0, 0);
+      return (GET_CODE (disp) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
+    case UNSPEC_DTPOFF:
+      disp = XVECEXP (disp, 0, 0);
+      return (GET_CODE (disp) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+    }
+
+  return false;
+}
+
+/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
+   replace the input X, or the original X if no replacement is called for.
+   The output parameter *WIN is 1 if the calling macro should goto WIN,
+   0 if it should not.  */
+
+bool
+ix86_legitimize_reload_address (rtx x,
+				enum machine_mode mode ATTRIBUTE_UNUSED,
+				int opnum, int type,
+				int ind_levels ATTRIBUTE_UNUSED)
+{
+  /* Reload can generate:
+
+     (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
+		       (reg:DI 97))
+	      (reg:DI 2 cx))
+
+     This RTX is rejected from ix86_legitimate_address_p due to
+     non-strictness of base register 97.  Following this rejection, 
+     reload pushes all three components into separate registers,
+     creating invalid memory address RTX.
+
+     Following code reloads only the invalid part of the
+     memory address RTX.  */
+
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 1))
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 1)))
+    {
+      rtx base, index;
+      bool something_reloaded = false;
+
+      base = XEXP (XEXP (x, 0), 1);      
+      if (!REG_OK_FOR_BASE_STRICT_P (base))
+	{
+	  push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
+		       BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		       opnum, (enum reload_type) type);
+	  something_reloaded = true;
+	}
+
+      index = XEXP (x, 1);
+      if (!REG_OK_FOR_INDEX_STRICT_P (index))
+	{
+	  push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
+		       INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		       opnum, (enum reload_type) type);
+	  something_reloaded = true;
+	}
+
+      gcc_assert (something_reloaded);
+      return true;
+    }
+
+  return false;
+}
+
+/* Determine if op is suitable RTX for an address register.
+   Return naked register if a register or a register subreg is
+   found, otherwise return NULL_RTX.  */
+
+static rtx
+ix86_validate_address_register (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+
+  /* Only SImode or DImode registers can form the address.  */
+  if (mode != SImode && mode != DImode)
+    return NULL_RTX;
+
+  if (REG_P (op))
+    return op;
+  else if (GET_CODE (op) == SUBREG)
+    {
+      rtx reg = SUBREG_REG (op);
+
+      if (!REG_P (reg))
+	return NULL_RTX;
+
+      mode = GET_MODE (reg);
+
+      /* Don't allow SUBREGs that span more than a word.  It can
+	 lead to spill failures when the register is one word out
+	 of a two word structure.  */
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return NULL_RTX;
+
+      /* Allow only SUBREGs of non-eliminable hard registers.  */
+      if (register_no_elim_operand (reg, mode))
+	return reg;
+    }
+
+  /* Op is not a register.  */
+  return NULL_RTX;
+}
+
+/* Recognizes RTL expressions that are valid memory addresses for an
+   instruction.  The MODE argument is the machine mode for the MEM
+   expression that wants to use this address.
+
+   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
+   convert common non-canonical forms to canonical form so that they will
+   be recognized.  */
+
+static bool
+ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+		           rtx addr, bool strict)
+{
+  struct ix86_address parts;
+  rtx base, index, disp;
+  HOST_WIDE_INT scale;
+  enum ix86_address_seg seg;
+
+  if (ix86_decompose_address (addr, &parts) <= 0)
+    /* Decomposition failed.  */
+    return false;
+
+  base = parts.base;
+  index = parts.index;
+  disp = parts.disp;
+  scale = parts.scale;
+  seg = parts.seg;
+
+  /* Validate base register.  */
+  if (base)
+    {
+      rtx reg = ix86_validate_address_register (base);
+
+      if (reg == NULL_RTX)
+	return false;
+
+      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
+	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
+	/* Base is not valid.  */
+	return false;
+    }
+
+  /* Validate index register.  */
+  if (index)
+    {
+      rtx reg = ix86_validate_address_register (index);
+
+      if (reg == NULL_RTX)
+	return false;
+
+      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
+	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
+	/* Index is not valid.  */
+	return false;
+    }
+
+  /* Index and base should have the same mode.  */
+  if (base && index
+      && GET_MODE (base) != GET_MODE (index))
+    return false;
+
+  /* Address override works only on the (%reg) part of %fs:(%reg).  */
+  if (seg != SEG_DEFAULT
+      && ((base && GET_MODE (base) != word_mode)
+	  || (index && GET_MODE (index) != word_mode)))
+    return false;
+
+  /* Validate scale factor.  */
+  if (scale != 1)
+    {
+      if (!index)
+	/* Scale without index.  */
+	return false;
+
+      if (scale != 2 && scale != 4 && scale != 8)
+	/* Scale is not a valid multiplier.  */
+	return false;
+    }
+
+  /* Validate displacement.  */
+  if (disp)
+    {
+      if (GET_CODE (disp) == CONST
+	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
+	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
+	switch (XINT (XEXP (disp, 0), 1))
+	  {
+	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
+	     used.  While ABI specify also 32bit relocations, we don't produce
+	     them at all and use IP relative instead.  */
+	  case UNSPEC_GOT:
+	  case UNSPEC_GOTOFF:
+	    gcc_assert (flag_pic);
+	    if (!TARGET_64BIT)
+	      goto is_legitimate_pic;
+
+	    /* 64bit address unspec.  */
+	    return false;
+
+	  case UNSPEC_GOTPCREL:
+	  case UNSPEC_PCREL:
+	    gcc_assert (flag_pic);
+	    goto is_legitimate_pic;
+
+	  case UNSPEC_GOTTPOFF:
+	  case UNSPEC_GOTNTPOFF:
+	  case UNSPEC_INDNTPOFF:
+	  case UNSPEC_NTPOFF:
+	  case UNSPEC_DTPOFF:
+	    break;
+
+	  case UNSPEC_STACK_CHECK:
+	    gcc_assert (flag_split_stack);
+	    break;
+
+	  default:
+	    /* Invalid address unspec.  */
+	    return false;
+	  }
+
+      else if (SYMBOLIC_CONST (disp)
+	       && (flag_pic
+		   || (TARGET_MACHO
+#if TARGET_MACHO
+		       && MACHOPIC_INDIRECT
+		       && !machopic_operand_p (disp)
+#endif
+	       )))
+	{
+
+	is_legitimate_pic:
+	  if (TARGET_64BIT && (index || base))
+	    {
+	      /* foo@dtpoff(%rX) is ok.  */
+	      if (GET_CODE (disp) != CONST
+		  || GET_CODE (XEXP (disp, 0)) != PLUS
+		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
+		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
+		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
+		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+		/* Non-constant pic memory reference.  */
+		return false;
+	    }
+	  else if ((!TARGET_MACHO || flag_pic)
+		    && ! legitimate_pic_address_disp_p (disp))
+	    /* Displacement is an invalid pic construct.  */
+	    return false;
+#if TARGET_MACHO
+	  else if (MACHO_DYNAMIC_NO_PIC_P
+		   && !ix86_legitimate_constant_p (Pmode, disp))
+	    /* displacment must be referenced via non_lazy_pointer */
+	    return false;
+#endif
+
+          /* This code used to verify that a symbolic pic displacement
+	     includes the pic_offset_table_rtx register.
+
+	     While this is good idea, unfortunately these constructs may
+	     be created by "adds using lea" optimization for incorrect
+	     code like:
+
+	     int a;
+	     int foo(int i)
+	       {
+	         return *(&a+i);
+	       }
+
+	     This code is nonsensical, but results in addressing
+	     GOT table with pic_offset_table_rtx base.  We can't
+	     just refuse it easily, since it gets matched by
+	     "addsi3" pattern, that later gets split to lea in the
+	     case output register differs from input.  While this
+	     can be handled by separate addsi pattern for this case
+	     that never results in lea, this seems to be easier and
+	     correct fix for crash to disable this test.  */
+	}
+      else if (GET_CODE (disp) != LABEL_REF
+	       && !CONST_INT_P (disp)
+	       && (GET_CODE (disp) != CONST
+		   || !ix86_legitimate_constant_p (Pmode, disp))
+	       && (GET_CODE (disp) != SYMBOL_REF
+		   || !ix86_legitimate_constant_p (Pmode, disp)))
+	/* Displacement is not constant.  */
+	return false;
+      else if (TARGET_64BIT
+	       && !x86_64_immediate_operand (disp, VOIDmode))
+	/* Displacement is out of range.  */
+	return false;
+      /* In x32 mode, constant addresses are sign extended to 64bit, so
+	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
+      else if (TARGET_X32 && !(index || base)
+	       && CONST_INT_P (disp)
+	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
+	return false;
+    }
+
+  /* Everything looks valid.  */
+  return true;
+}
+
+/* Determine if a given RTX is a valid constant address.  */
+
+bool
+constant_address_p (rtx x)
+{
+  return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
+}
+
+/* Return a unique alias set for the GOT.  */
+
+static alias_set_type
+ix86_GOT_alias_set (void)
+{
+  static alias_set_type set = -1;
+  if (set == -1)
+    set = new_alias_set ();
+  return set;
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+   register REG.  If REG is 0, a new pseudo is generated.
+
+   There are two types of references that must be handled:
+
+   1. Global data references must load the address from the GOT, via
+      the PIC reg.  An insn is emitted to do this load, and the reg is
+      returned.
+
+   2. Static data references, constant pool addresses, and code labels
+      compute the address as an offset from the GOT, whose base is in
+      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
+      differentiate them from global data objects.  The returned
+      address is the PIC reg + an unspec constant.
+
+   TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
+   reg also appears in the address.  */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+  rtx addr = orig;
+  rtx new_rtx = orig;
+
+#if TARGET_MACHO
+  if (TARGET_MACHO && !TARGET_64BIT)
+    {
+      if (reg == 0)
+	reg = gen_reg_rtx (Pmode);
+      /* Use the generic Mach-O PIC machinery.  */
+      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
+    }
+#endif
+
+  if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+    {
+      rtx tmp = legitimize_pe_coff_symbol (addr, true);
+      if (tmp)
+        return tmp;
+    }
+
+  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
+    new_rtx = addr;
+  else if (TARGET_64BIT && !TARGET_PECOFF
+	   && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
+    {
+      rtx tmpreg;
+      /* This symbol may be referenced via a displacement from the PIC
+	 base address (@GOTOFF).  */
+
+      if (reload_in_progress)
+	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      if (GET_CODE (addr) == CONST)
+	addr = XEXP (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	  {
+            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
+				      UNSPEC_GOTOFF);
+	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
+	  }
+	else
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+      if (!reg)
+        tmpreg = gen_reg_rtx (Pmode);
+      else
+	tmpreg = reg;
+      emit_move_insn (tmpreg, new_rtx);
+
+      if (reg != 0)
+	{
+	  new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
+					 tmpreg, 1, OPTAB_DIRECT);
+	  new_rtx = reg;
+	}
+      else
+        new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
+    }
+  else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
+    {
+      /* This symbol may be referenced via a displacement from the PIC
+	 base address (@GOTOFF).  */
+
+      if (reload_in_progress)
+	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      if (GET_CODE (addr) == CONST)
+	addr = XEXP (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	  {
+            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
+				      UNSPEC_GOTOFF);
+	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
+	  }
+	else
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+
+      if (reg != 0)
+	{
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+    }
+  else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
+	   /* We can't use @GOTOFF for text labels on VxWorks;
+	      see gotoff_operand.  */
+	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
+    {
+      rtx tmp = legitimize_pe_coff_symbol (addr, true);
+      if (tmp)
+        return tmp;
+
+      /* For x64 PE-COFF there is no GOT table.  So we use address
+         directly.  */
+      if (TARGET_64BIT && TARGET_PECOFF)
+	{
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+
+	  if (reg == 0)
+	    reg = gen_reg_rtx (Pmode);
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+      else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
+	{
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  new_rtx = gen_const_mem (Pmode, new_rtx);
+	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
+
+	  if (reg == 0)
+	    reg = gen_reg_rtx (Pmode);
+	  /* Use directly gen_movsi, otherwise the address is loaded
+	     into register for CSE.  We don't want to CSE this addresses,
+	     instead we CSE addresses from the GOT table, so skip this.  */
+	  emit_insn (gen_movsi (reg, new_rtx));
+	  new_rtx = reg;
+	}
+      else
+	{
+	  /* This symbol must be referenced via a load from the
+	     Global Offset Table (@GOT).  */
+
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  if (TARGET_64BIT)
+	    new_rtx = force_reg (Pmode, new_rtx);
+	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+	  new_rtx = gen_const_mem (Pmode, new_rtx);
+	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
+
+	  if (reg == 0)
+	    reg = gen_reg_rtx (Pmode);
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+    }
+  else
+    {
+      if (CONST_INT_P (addr)
+	  && !x86_64_immediate_operand (addr, VOIDmode))
+	{
+	  if (reg)
+	    {
+	      emit_move_insn (reg, addr);
+	      new_rtx = reg;
+	    }
+	  else
+	    new_rtx = force_reg (Pmode, addr);
+	}
+      else if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+
+	  /* We must match stuff we generate before.  Assume the only
+	     unspecs that can get here are ours.  Not that we could do
+	     anything with them anyway....  */
+	  if (GET_CODE (addr) == UNSPEC
+	      || (GET_CODE (addr) == PLUS
+		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
+	    return orig;
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	}
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+	  /* Check first to see if this is a constant offset from a @GOTOFF
+	     symbol reference.  */
+	  if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
+	      && CONST_INT_P (op1))
+	    {
+	      if (!TARGET_64BIT)
+		{
+		  if (reload_in_progress)
+		    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
+					    UNSPEC_GOTOFF);
+		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
+		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+		  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+
+		  if (reg != 0)
+		    {
+		      emit_move_insn (reg, new_rtx);
+		      new_rtx = reg;
+		    }
+		}
+	      else
+		{
+		  if (INTVAL (op1) < -16*1024*1024
+		      || INTVAL (op1) >= 16*1024*1024)
+		    {
+		      if (!x86_64_immediate_operand (op1, Pmode))
+			op1 = force_reg (Pmode, op1);
+		      new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
+		    }
+		}
+	    }
+	  else
+	    {
+	      rtx base = legitimize_pic_address (op0, reg);
+	      enum machine_mode mode = GET_MODE (base);
+	      new_rtx
+	        = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
+
+	      if (CONST_INT_P (new_rtx))
+		{
+		  if (INTVAL (new_rtx) < -16*1024*1024
+		      || INTVAL (new_rtx) >= 16*1024*1024)
+		    {
+		      if (!x86_64_immediate_operand (new_rtx, mode))
+			new_rtx = force_reg (mode, new_rtx);
+		      new_rtx
+		        = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
+		    }
+		  else
+		    new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
+		}
+	      else
+		{
+		  if (GET_CODE (new_rtx) == PLUS
+		      && CONSTANT_P (XEXP (new_rtx, 1)))
+		    {
+		      base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
+		      new_rtx = XEXP (new_rtx, 1);
+		    }
+		  new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
+		}
+	    }
+	}
+    }
+  return new_rtx;
+}
+
+/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
+
+static rtx
+get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
+{
+  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+
+  if (GET_MODE (tp) != tp_mode)
+    {
+      gcc_assert (GET_MODE (tp) == SImode);
+      gcc_assert (tp_mode == DImode);
+
+      tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
+    }
+
+  if (to_reg)
+    tp = copy_to_mode_reg (tp_mode, tp);
+
+  return tp;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function.  */
+
+static GTY(()) rtx ix86_tls_symbol;
+
+static rtx
+ix86_tls_get_addr (void)
+{
+  if (!ix86_tls_symbol)
+    {
+      const char *sym
+	= ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
+	   ? "___tls_get_addr" : "__tls_get_addr");
+
+      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
+    }
+
+  if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
+    {
+      rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
+				   UNSPEC_PLTOFF);
+      return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+			   gen_rtx_CONST (Pmode, unspec));
+    }
+
+  return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+
+rtx
+ix86_tls_module_base (void)
+{
+  if (!ix86_tls_module_base_symbol)
+    {
+      ix86_tls_module_base_symbol
+	= gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+
+      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+    }
+
+  return ix86_tls_module_base_symbol;
+}
+
+/* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
+   false if we expect this to be used for a memory address and true if
+   we expect to load the address into a register.  */
+
+static rtx
+legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
+{
+  rtx dest, base, off;
+  rtx pic = NULL_RTX, tp = NULL_RTX;
+  enum machine_mode tp_mode = Pmode;
+  int type;
+
+  /* Fall back to global dynamic model if tool chain cannot support local
+     dynamic.  */
+  if (TARGET_SUN_TLS && !TARGET_64BIT
+      && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
+      && model == TLS_MODEL_LOCAL_DYNAMIC)
+    model = TLS_MODEL_GLOBAL_DYNAMIC;
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      dest = gen_reg_rtx (Pmode);
+
+      if (!TARGET_64BIT)
+	{
+	  if (flag_pic && !TARGET_PECOFF)
+	    pic = pic_offset_table_rtx;
+	  else
+	    {
+	      pic = gen_reg_rtx (Pmode);
+	      emit_insn (gen_set_got (pic));
+	    }
+	}
+
+      if (TARGET_GNU2_TLS)
+	{
+	  if (TARGET_64BIT)
+	    emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
+	  else
+	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
+
+	  tp = get_thread_pointer (Pmode, true);
+	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+
+	  if (GET_MODE (x) != Pmode)
+	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
+	}
+      else
+	{
+	  rtx caddr = ix86_tls_get_addr ();
+
+	  if (TARGET_64BIT)
+	    {
+	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
+	      rtx insns;
+
+	      start_sequence ();
+	      emit_call_insn
+		(ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
+	      insns = get_insns ();
+	      end_sequence ();
+
+	      if (GET_MODE (x) != Pmode)
+		x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
+	      RTL_CONST_CALL_P (insns) = 1;
+	      emit_libcall_block (insns, dest, rax, x);
+	    }
+	  else
+	    emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
+	}
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      base = gen_reg_rtx (Pmode);
+
+      if (!TARGET_64BIT)
+	{
+	  if (flag_pic)
+	    pic = pic_offset_table_rtx;
+	  else
+	    {
+	      pic = gen_reg_rtx (Pmode);
+	      emit_insn (gen_set_got (pic));
+	    }
+	}
+
+      if (TARGET_GNU2_TLS)
+	{
+	  rtx tmp = ix86_tls_module_base ();
+
+	  if (TARGET_64BIT)
+	    emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
+	  else
+	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
+
+	  tp = get_thread_pointer (Pmode, true);
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
+			       gen_rtx_MINUS (Pmode, tmp, tp));
+	}
+      else
+	{
+	  rtx caddr = ix86_tls_get_addr ();
+
+	  if (TARGET_64BIT)
+	    {
+	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
+	      rtx insns, eqv;
+
+	      start_sequence ();
+	      emit_call_insn
+		(ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
+	      insns = get_insns ();
+	      end_sequence ();
+
+	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
+		 share the LD_BASE result with other LD model accesses.  */
+	      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				    UNSPEC_TLS_LD_BASE);
+
+	      RTL_CONST_CALL_P (insns) = 1;
+	      emit_libcall_block (insns, base, rax, eqv);
+	    }
+	  else
+	    emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
+	}
+
+      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
+      off = gen_rtx_CONST (Pmode, off);
+
+      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+      if (TARGET_GNU2_TLS)
+	{
+	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+	  if (GET_MODE (x) != Pmode)
+	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
+	}
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_SUN_TLS && !TARGET_X32)
+	    {
+	      /* The Sun linker took the AMD64 TLS spec literally
+		 and can only handle %rax as destination of the
+		 initial executable code sequence.  */
+
+	      dest = gen_reg_rtx (DImode);
+	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
+	      return dest;
+	    }
+
+	  /* Generate DImode references to avoid %fs:(%reg32)
+	     problems and linker IE->LE relaxation bug.  */
+	  tp_mode = DImode;
+	  pic = NULL;
+	  type = UNSPEC_GOTNTPOFF;
+	}
+      else if (flag_pic)
+	{
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  pic = pic_offset_table_rtx;
+	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+	}
+      else if (!TARGET_ANY_GNU_TLS)
+	{
+	  pic = gen_reg_rtx (Pmode);
+	  emit_insn (gen_set_got (pic));
+	  type = UNSPEC_GOTTPOFF;
+	}
+      else
+	{
+	  pic = NULL;
+	  type = UNSPEC_INDNTPOFF;
+	}
+
+      off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
+      off = gen_rtx_CONST (tp_mode, off);
+      if (pic)
+	off = gen_rtx_PLUS (tp_mode, pic, off);
+      off = gen_const_mem (tp_mode, off);
+      set_mem_alias_set (off, ix86_GOT_alias_set ());
+
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+	{
+	  base = get_thread_pointer (tp_mode,
+				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+	  off = force_reg (tp_mode, off);
+	  return gen_rtx_PLUS (tp_mode, base, off);
+	}
+      else
+	{
+	  base = get_thread_pointer (Pmode, true);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (ix86_gen_sub3 (dest, base, off));
+	}
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
+      off = gen_rtx_CONST (Pmode, off);
+
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+	{
+	  base = get_thread_pointer (Pmode,
+				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+	  return gen_rtx_PLUS (Pmode, base, off);
+	}
+      else
+	{
+	  base = get_thread_pointer (Pmode, true);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (ix86_gen_sub3 (dest, base, off));
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+/* Create or return the unique __imp_DECL dllimport symbol corresponding
+   to symbol DECL if BEIMPORT is true.  Otherwise create or return the
+   unique refptr-DECL symbol corresponding to symbol DECL.  */
+
+static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
+  htab_t dllimport_map;
+
+static tree
+get_dllimport_decl (tree decl, bool beimport)
+{
+  struct tree_map *h, in;
+  void **loc;
+  const char *name;
+  const char *prefix;
+  size_t namelen, prefixlen;
+  char *imp_name;
+  tree to;
+  rtx rtl;
+
+  if (!dllimport_map)
+    dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
+
+  in.hash = htab_hash_pointer (decl);
+  in.base.from = decl;
+  loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
+  h = (struct tree_map *) *loc;
+  if (h)
+    return h->to;
+
+  *loc = h = ggc_alloc_tree_map ();
+  h->hash = in.hash;
+  h->base.from = decl;
+  h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
+			   VAR_DECL, NULL, ptr_type_node);
+  DECL_ARTIFICIAL (to) = 1;
+  DECL_IGNORED_P (to) = 1;
+  DECL_EXTERNAL (to) = 1;
+  TREE_READONLY (to) = 1;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = targetm.strip_name_encoding (name);
+  if (beimport)
+    prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
+      ? "*__imp_" : "*__imp__";
+  else
+    prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
+  namelen = strlen (name);
+  prefixlen = strlen (prefix);
+  imp_name = (char *) alloca (namelen + prefixlen + 1);
+  memcpy (imp_name, prefix, prefixlen);
+  memcpy (imp_name + prefixlen, name, namelen + 1);
+
+  name = ggc_alloc_string (imp_name, namelen + prefixlen);
+  rtl = gen_rtx_SYMBOL_REF (Pmode, name);
+  SET_SYMBOL_REF_DECL (rtl, to);
+  SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
+  if (!beimport)
+    {
+      SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
+#ifdef SUB_TARGET_RECORD_STUB
+      SUB_TARGET_RECORD_STUB (name);
+#endif
+    }      
+
+  rtl = gen_const_mem (Pmode, rtl);
+  set_mem_alias_set (rtl, ix86_GOT_alias_set ());
+
+  SET_DECL_RTL (to, rtl);
+  SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
+
+  return to;
+}
+
+/* Expand SYMBOL into its corresponding far-addresse symbol.
+   WANT_REG is true if we require the result be a register.  */
+
+static rtx
+legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
+{
+  tree imp_decl;
+  rtx x;
+
+  gcc_assert (SYMBOL_REF_DECL (symbol));
+  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
+
+  x = DECL_RTL (imp_decl);
+  if (want_reg)
+    x = force_reg (Pmode, x);
+  return x;
+}
+
+/* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
+   true if we require the result be a register.  */
+
+static rtx
+legitimize_dllimport_symbol (rtx symbol, bool want_reg)
+{
+  tree imp_decl;
+  rtx x;
+
+  gcc_assert (SYMBOL_REF_DECL (symbol));
+  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
+
+  x = DECL_RTL (imp_decl);
+  if (want_reg)
+    x = force_reg (Pmode, x);
+  return x;
+}
+
+/* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG 
+   is true if we require the result be a register.  */
+
+static rtx
+legitimize_pe_coff_symbol (rtx addr, bool inreg)
+{
+  if (!TARGET_PECOFF)
+    return NULL_RTX;
+
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+    {
+      if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
+	return legitimize_dllimport_symbol (addr, inreg);
+      if (GET_CODE (addr) == CONST
+	  && GET_CODE (XEXP (addr, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
+	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
+	{
+	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
+	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
+	}
+    }
+
+  if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
+    return NULL_RTX;
+  if (GET_CODE (addr) == SYMBOL_REF
+      && !is_imported_p (addr)
+      && SYMBOL_REF_EXTERNAL_P (addr)
+      && SYMBOL_REF_DECL (addr))
+    return legitimize_pe_coff_extern_decl (addr, inreg);
+
+  if (GET_CODE (addr) == CONST
+      && GET_CODE (XEXP (addr, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
+      && !is_imported_p (XEXP (XEXP (addr, 0), 0))
+      && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
+      && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
+    {
+      rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
+      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
+    }
+  return NULL_RTX;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   It is always safe for this macro to do nothing.  It exists to recognize
+   opportunities to optimize the output.
+
+   For the 80386, we handle X+REG by loading X into a register R and
+   using R+REG.  R will go in a general reg and indexing will be used.
+   However, if REG is a broken-out memory address or multiplication,
+   nothing needs to be done because REG can certainly go in a general reg.
+
+   When -fpic is used, special handling is needed for symbolic references.
+   See comments by legitimize_pic_address in i386.c for details.  */
+
+static rtx
+ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  int changed = 0;
+  unsigned log;
+
+  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
+  if (log)
+    return legitimize_tls_address (x, (enum tls_model) log, false);
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
+    {
+      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
+				      (enum tls_model) log, false);
+      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+    }
+
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+    {
+      rtx tmp = legitimize_pe_coff_symbol (x, true);
+      if (tmp)
+        return tmp;
+    }
+
+  if (flag_pic && SYMBOLIC_CONST (x))
+    return legitimize_pic_address (x, 0);
+
+#if TARGET_MACHO
+  if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
+    return machopic_indirect_data_reference (x, 0);
+#endif
+
+  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
+  if (GET_CODE (x) == ASHIFT
+      && CONST_INT_P (XEXP (x, 1))
+      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
+    {
+      changed = 1;
+      log = INTVAL (XEXP (x, 1));
+      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+			GEN_INT (1 << log));
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
+
+      if (GET_CODE (XEXP (x, 0)) == ASHIFT
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
+	{
+	  changed = 1;
+	  log = INTVAL (XEXP (XEXP (x, 0), 1));
+	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
+				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
+				      GEN_INT (1 << log));
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == ASHIFT
+	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
+	{
+	  changed = 1;
+	  log = INTVAL (XEXP (XEXP (x, 1), 1));
+	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
+				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
+				      GEN_INT (1 << log));
+	}
+
+      /* Put multiply first if it isn't already.  */
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  rtx tmp = XEXP (x, 0);
+	  XEXP (x, 0) = XEXP (x, 1);
+	  XEXP (x, 1) = tmp;
+	  changed = 1;
+	}
+
+      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
+	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
+	 created by virtual register instantiation, register elimination, and
+	 similar optimizations.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
+	{
+	  changed = 1;
+	  x = gen_rtx_PLUS (Pmode,
+			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
+					  XEXP (XEXP (x, 1), 0)),
+			    XEXP (XEXP (x, 1), 1));
+	}
+
+      /* Canonicalize
+	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
+	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
+      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
+	       && CONSTANT_P (XEXP (x, 1)))
+	{
+	  rtx constant;
+	  rtx other = NULL_RTX;
+
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      constant = XEXP (x, 1);
+	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
+	    }
+	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
+	    {
+	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
+	      other = XEXP (x, 1);
+	    }
+	  else
+	    constant = 0;
+
+	  if (constant)
+	    {
+	      changed = 1;
+	      x = gen_rtx_PLUS (Pmode,
+				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
+					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
+				plus_constant (Pmode, other,
+					       INTVAL (constant)));
+	    }
+	}
+
+      if (changed && ix86_legitimate_address_p (mode, x, false))
+	return x;
+
+      if (GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  changed = 1;
+	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  changed = 1;
+	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+	}
+
+      if (changed
+	  && REG_P (XEXP (x, 1))
+	  && REG_P (XEXP (x, 0)))
+	return x;
+
+      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
+	{
+	  changed = 1;
+	  x = legitimize_pic_address (x, 0);
+	}
+
+      if (changed && ix86_legitimate_address_p (mode, x, false))
+	return x;
+
+      if (REG_P (XEXP (x, 0)))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 1), temp);
+	  if (val != temp)
+	    {
+	      val = convert_to_mode (Pmode, val, 1);
+	      emit_move_insn (temp, val);
+	    }
+
+	  XEXP (x, 1) = temp;
+	  return x;
+	}
+
+      else if (REG_P (XEXP (x, 1)))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 0), temp);
+	  if (val != temp)
+	    {
+	      val = convert_to_mode (Pmode, val, 1);
+	      emit_move_insn (temp, val);
+	    }
+
+	  XEXP (x, 0) = temp;
+	  return x;
+	}
+    }
+
+  return x;
+}
+
+/* Print an integer constant expression in assembler syntax.  Addition
+   and subtraction are the only arithmetic that may appear in these
+   expressions.  FILE is the stdio stream to write to, X is the rtx, and
+   CODE is the operand print code from the output string.  */
+
+static void
+output_pic_addr_const (FILE *file, rtx x, int code)
+{
+  char buf[256];
+
+  switch (GET_CODE (x))
+    {
+    case PC:
+      gcc_assert (flag_pic);
+      putc ('.', file);
+      break;
+
+    case SYMBOL_REF:
+      if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
+	output_addr_const (file, x);
+      else
+	{
+	  const char *name = XSTR (x, 0);
+
+	  /* Mark the decl as referenced so that cgraph will
+	     output the function.  */
+	  if (SYMBOL_REF_DECL (x))
+	    mark_decl_referenced (SYMBOL_REF_DECL (x));
+
+#if TARGET_MACHO
+	  if (MACHOPIC_INDIRECT
+	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+	    name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+	  assemble_name (file, name);
+	}
+      if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
+	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+	fputs ("@PLT", file);
+      break;
+
+    case LABEL_REF:
+      x = XEXP (x, 0);
+      /* FALLTHRU */
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      assemble_name (asm_out_file, buf);
+      break;
+
+    case CONST_INT:
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+
+    case CONST:
+      /* This used to output parentheses around the expression,
+	 but that does not work on the 386 (either ATT or BSD assembler).  */
+      output_pic_addr_const (file, XEXP (x, 0), code);
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+	{
+	  /* We can use %d if the number is <32 bits and positive.  */
+	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
+	    fprintf (file, "0x%lx%08lx",
+		     (unsigned long) CONST_DOUBLE_HIGH (x),
+		     (unsigned long) CONST_DOUBLE_LOW (x));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   TARGET_PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case PLUS:
+      /* Some assemblers need integer constants to appear first.  */
+      if (CONST_INT_P (XEXP (x, 0)))
+	{
+	  output_pic_addr_const (file, XEXP (x, 0), code);
+	  putc ('+', file);
+	  output_pic_addr_const (file, XEXP (x, 1), code);
+	}
+      else
+	{
+	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
+	  output_pic_addr_const (file, XEXP (x, 1), code);
+	  putc ('+', file);
+	  output_pic_addr_const (file, XEXP (x, 0), code);
+	}
+      break;
+
+    case MINUS:
+      if (!TARGET_MACHO)
+	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
+      output_pic_addr_const (file, XEXP (x, 0), code);
+      putc ('-', file);
+      output_pic_addr_const (file, XEXP (x, 1), code);
+      if (!TARGET_MACHO)
+	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
+      break;
+
+     case UNSPEC:
+       if (XINT (x, 1) == UNSPEC_STACK_CHECK)
+	 {
+	   bool f = i386_asm_output_addr_const_extra (file, x);
+	   gcc_assert (f);
+	   break;
+	 }
+
+       gcc_assert (XVECLEN (x, 0) == 1);
+       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+       switch (XINT (x, 1))
+	{
+	case UNSPEC_GOT:
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLTOFF:
+	  fputs ("@PLTOFF", file);
+	  break;
+	case UNSPEC_PCREL:
+	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+		 "(%rip)" : "[rip]", file);
+	  break;
+	case UNSPEC_GOTPCREL:
+	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
+	  break;
+	case UNSPEC_GOTTPOFF:
+	  /* FIXME: This might be @TPOFF in Sun ld too.  */
+	  fputs ("@gottpoff", file);
+	  break;
+	case UNSPEC_TPOFF:
+	  fputs ("@tpoff", file);
+	  break;
+	case UNSPEC_NTPOFF:
+	  if (TARGET_64BIT)
+	    fputs ("@tpoff", file);
+	  else
+	    fputs ("@ntpoff", file);
+	  break;
+	case UNSPEC_DTPOFF:
+	  fputs ("@dtpoff", file);
+	  break;
+	case UNSPEC_GOTNTPOFF:
+	  if (TARGET_64BIT)
+	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
+	  else
+	    fputs ("@gotntpoff", file);
+	  break;
+	case UNSPEC_INDNTPOFF:
+	  fputs ("@indntpoff", file);
+	  break;
+#if TARGET_MACHO
+	case UNSPEC_MACHOPIC_OFFSET:
+	  putc ('-', file);
+	  machopic_output_function_base_name (file);
+	  break;
+#endif
+	default:
+	  output_operand_lossage ("invalid UNSPEC as operand");
+	  break;
+	}
+       break;
+
+    default:
+      output_operand_lossage ("invalid expression as operand");
+    }
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void ATTRIBUTE_UNUSED
+i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  fputs (ASM_LONG, file);
+  output_addr_const (file, x);
+  fputs ("@dtpoff", file);
+  switch (size)
+    {
+    case 4:
+      break;
+    case 8:
+      fputs (", 0", file);
+      break;
+    default:
+      gcc_unreachable ();
+   }
+}
+
+/* Return true if X is a representation of the PIC register.  This copes
+   with calls from ix86_find_base_term, where the register might have
+   been replaced by a cselib value.  */
+
+static bool
+ix86_pic_register_p (rtx x)
+{
+  if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
+    return (pic_offset_table_rtx
+	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+  else
+    return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
+}
+
+/* Helper function for ix86_delegitimize_address.
+   Attempt to delegitimize TLS local-exec accesses.  */
+
+static rtx
+ix86_delegitimize_tls_address (rtx orig_x)
+{
+  rtx x = orig_x, unspec;
+  struct ix86_address addr;
+
+  if (!TARGET_TLS_DIRECT_SEG_REFS)
+    return orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
+    return orig_x;
+  if (ix86_decompose_address (x, &addr) == 0
+      || addr.seg != DEFAULT_TLS_SEG_REG
+      || addr.disp == NULL_RTX
+      || GET_CODE (addr.disp) != CONST)
+    return orig_x;
+  unspec = XEXP (addr.disp, 0);
+  if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
+    unspec = XEXP (unspec, 0);
+  if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
+    return orig_x;
+  x = XVECEXP (unspec, 0, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  if (unspec != XEXP (addr.disp, 0))
+    x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
+  if (addr.index)
+    {
+      rtx idx = addr.index;
+      if (addr.scale != 1)
+	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
+      x = gen_rtx_PLUS (Pmode, idx, x);
+    }
+  if (addr.base)
+    x = gen_rtx_PLUS (Pmode, addr.base, x);
+  if (MEM_P (orig_x))
+    x = replace_equiv_address_nv (orig_x, x);
+  return x;
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize PIC+GOTOFF and turn it back
+   into a direct symbol reference.
+
+   On Darwin, this is necessary to avoid a crash, because Darwin
+   has a different PIC label for each routine but the DWARF debugging
+   information is not associated with any particular routine, so it's
+   necessary to remove references to the PIC label from RTL stored by
+   the DWARF output code.  */
+
+static rtx
+ix86_delegitimize_address (rtx x)
+{
+  rtx orig_x = delegitimize_mem_from_attrs (x);
+  /* addend is NULL or some rtx if x is something+GOTOFF where
+     something doesn't include the PIC register.  */
+  rtx addend = NULL_RTX;
+  /* reg_addend is NULL or a multiple of some register.  */
+  rtx reg_addend = NULL_RTX;
+  /* const_addend is NULL or a const_int.  */
+  rtx const_addend = NULL_RTX;
+  /* This is the result, or NULL.  */
+  rtx result = NULL_RTX;
+
+  x = orig_x;
+
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+
+  if (TARGET_64BIT)
+    {
+      if (GET_CODE (x) == CONST
+          && GET_CODE (XEXP (x, 0)) == PLUS
+          && GET_MODE (XEXP (x, 0)) == Pmode
+          && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+          && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
+          && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
+        {
+	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
+	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
+	  if (MEM_P (orig_x))
+	    x = replace_equiv_address_nv (orig_x, x);
+	  return x;
+	}
+
+      if (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == UNSPEC
+	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
+	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
+	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
+	{
+	  x = XVECEXP (XEXP (x, 0), 0, 0);
+	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
+	    {
+	      x = simplify_gen_subreg (GET_MODE (orig_x), x,
+				       GET_MODE (x), 0);
+	      if (x == NULL_RTX)
+		return orig_x;
+	    }
+	  return x;
+	}
+
+      if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
+	return ix86_delegitimize_tls_address (orig_x);
+
+      /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
+	 and -mcmodel=medium -fpic.  */
+    }
+
+  if (GET_CODE (x) != PLUS
+      || GET_CODE (XEXP (x, 1)) != CONST)
+    return ix86_delegitimize_tls_address (orig_x);
+
+  if (ix86_pic_register_p (XEXP (x, 0)))
+    /* %ebx + GOT/GOTOFF */
+    ;
+  else if (GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      /* %ebx + %reg * scale + GOT/GOTOFF */
+      reg_addend = XEXP (x, 0);
+      if (ix86_pic_register_p (XEXP (reg_addend, 0)))
+	reg_addend = XEXP (reg_addend, 1);
+      else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
+	reg_addend = XEXP (reg_addend, 0);
+      else
+	{
+	  reg_addend = NULL_RTX;
+	  addend = XEXP (x, 0);
+	}
+    }
+  else
+    addend = XEXP (x, 0);
+
+  x = XEXP (XEXP (x, 1), 0);
+  if (GET_CODE (x) == PLUS
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      const_addend = XEXP (x, 1);
+      x = XEXP (x, 0);
+    }
+
+  if (GET_CODE (x) == UNSPEC
+      && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
+	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
+	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
+	      && !MEM_P (orig_x) && !addend)))
+    result = XVECEXP (x, 0, 0);
+
+  if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
+      && !MEM_P (orig_x))
+    result = XVECEXP (x, 0, 0);
+
+  if (! result)
+    return ix86_delegitimize_tls_address (orig_x);
+
+  if (const_addend)
+    result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
+  if (reg_addend)
+    result = gen_rtx_PLUS (Pmode, reg_addend, result);
+  if (addend)
+    {
+      /* If the rest of original X doesn't involve the PIC register, add
+	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
+	 for code like:
+	 leal (%ebx, %ecx, 4), %ecx
+	 ...
+	 movl foo@GOTOFF(%ecx), %edx
+	 in which case we return (%ecx - %ebx) + foo.  */
+      if (pic_offset_table_rtx)
+        result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
+						     pic_offset_table_rtx),
+			       result);
+      else
+	return orig_x;
+    }
+  if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
+    {
+      result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
+      if (result == NULL_RTX)
+	return orig_x;
+    }
+  return result;
+}
+
+/* If X is a machine specific address (i.e. a symbol or label being
+   referenced as a displacement from the GOT implemented using an
+   UNSPEC), then return the base term.  Otherwise return X.  */
+
+rtx
+ix86_find_base_term (rtx x)
+{
+  rtx term;
+
+  if (TARGET_64BIT)
+    {
+      if (GET_CODE (x) != CONST)
+	return x;
+      term = XEXP (x, 0);
+      if (GET_CODE (term) == PLUS
+	  && (CONST_INT_P (XEXP (term, 1))
+	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
+	term = XEXP (term, 0);
+      if (GET_CODE (term) != UNSPEC
+	  || (XINT (term, 1) != UNSPEC_GOTPCREL
+	      && XINT (term, 1) != UNSPEC_PCREL))
+	return x;
+
+      return XVECEXP (term, 0, 0);
+    }
+
+  return ix86_delegitimize_address (x);
+}
+
+static void
+put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
+		    bool fp, FILE *file)
+{
+  const char *suffix;
+
+  if (mode == CCFPmode || mode == CCFPUmode)
+    {
+      code = ix86_fp_compare_code_to_integer (code);
+      mode = CCmode;
+    }
+  if (reverse)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case EQ:
+      switch (mode)
+	{
+	case CCAmode:
+	  suffix = "a";
+	  break;
+
+	case CCCmode:
+	  suffix = "c";
+	  break;
+
+	case CCOmode:
+	  suffix = "o";
+	  break;
+
+	case CCSmode:
+	  suffix = "s";
+	  break;
+
+	default:
+	  suffix = "e";
+	}
+      break;
+    case NE:
+      switch (mode)
+	{
+	case CCAmode:
+	  suffix = "na";
+	  break;
+
+	case CCCmode:
+	  suffix = "nc";
+	  break;
+
+	case CCOmode:
+	  suffix = "no";
+	  break;
+
+	case CCSmode:
+	  suffix = "ns";
+	  break;
+
+	default:
+	  suffix = "ne";
+	}
+      break;
+    case GT:
+      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
+      suffix = "g";
+      break;
+    case GTU:
+      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
+	 Those same assemblers have the same but opposite lossage on cmov.  */
+      if (mode == CCmode)
+	suffix = fp ? "nbe" : "a";
+      else
+	gcc_unreachable ();
+      break;
+    case LT:
+      switch (mode)
+	{
+	case CCNOmode:
+	case CCGOCmode:
+	  suffix = "s";
+	  break;
+
+	case CCmode:
+	case CCGCmode:
+	  suffix = "l";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case LTU:
+      if (mode == CCmode)
+	suffix = "b";
+      else if (mode == CCCmode)
+	suffix = "c";
+      else
+	gcc_unreachable ();
+      break;
+    case GE:
+      switch (mode)
+	{
+	case CCNOmode:
+	case CCGOCmode:
+	  suffix = "ns";
+	  break;
+
+	case CCmode:
+	case CCGCmode:
+	  suffix = "ge";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case GEU:
+      if (mode == CCmode)
+	suffix = fp ? "nb" : "ae";
+      else if (mode == CCCmode)
+	suffix = "nc";
+      else
+	gcc_unreachable ();
+      break;
+    case LE:
+      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
+      suffix = "le";
+      break;
+    case LEU:
+      if (mode == CCmode)
+	suffix = "be";
+      else
+	gcc_unreachable ();
+      break;
+    case UNORDERED:
+      suffix = fp ? "u" : "p";
+      break;
+    case ORDERED:
+      suffix = fp ? "nu" : "np";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  fputs (suffix, file);
+}
+
+/* Print the name of register X to FILE based on its machine mode and number.
+   If CODE is 'w', pretend the mode is HImode.
+   If CODE is 'b', pretend the mode is QImode.
+   If CODE is 'k', pretend the mode is SImode.
+   If CODE is 'q', pretend the mode is DImode.
+   If CODE is 'x', pretend the mode is V4SFmode.
+   If CODE is 't', pretend the mode is V8SFmode.
+   If CODE is 'g', pretend the mode is V16SFmode.
+   If CODE is 'h', pretend the reg is the 'high' byte register.
+   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'd', duplicate the operand for AVX instruction.
+ */
+
+void
+print_reg (rtx x, int code, FILE *file)
+{
+  const char *reg;
+  unsigned int regno;
+  bool duplicated = code == 'd' && TARGET_AVX;
+
+  if (ASSEMBLER_DIALECT == ASM_ATT)
+    putc ('%', file);
+
+  if (x == pc_rtx)
+    {
+      gcc_assert (TARGET_64BIT);
+      fputs ("rip", file);
+      return;
+    }
+
+  regno = true_regnum (x);
+  gcc_assert (regno != ARG_POINTER_REGNUM
+	      && regno != FRAME_POINTER_REGNUM
+	      && regno != FLAGS_REG
+	      && regno != FPSR_REG
+	      && regno != FPCR_REG);
+
+  if (code == 'w' || MMX_REG_P (x))
+    code = 2;
+  else if (code == 'b')
+    code = 1;
+  else if (code == 'k')
+    code = 4;
+  else if (code == 'q')
+    code = 8;
+  else if (code == 'y')
+    code = 3;
+  else if (code == 'h')
+    code = 0;
+  else if (code == 'x')
+    code = 16;
+  else if (code == 't')
+    code = 32;
+  else if (code == 'g')
+    code = 64;
+  else
+    code = GET_MODE_SIZE (GET_MODE (x));
+
+  /* Irritatingly, AMD extended registers use different naming convention
+     from the normal registers: "r%d[bwd]"  */
+  if (REX_INT_REGNO_P (regno))
+    {
+      gcc_assert (TARGET_64BIT);
+      putc ('r', file);
+      fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
+      switch (code)
+	{
+	  case 0:
+	    error ("extended registers have no high halves");
+	    break;
+	  case 1:
+	    putc ('b', file);
+	    break;
+	  case 2:
+	    putc ('w', file);
+	    break;
+	  case 4:
+	    putc ('d', file);
+	    break;
+	  case 8:
+	    /* no suffix */
+	    break;
+	  default:
+	    error ("unsupported operand size for extended register");
+	    break;
+	}
+      return;
+    }
+
+  reg = NULL;
+  switch (code)
+    {
+    case 3:
+      if (STACK_TOP_P (x))
+	{
+	  reg = "st(0)";
+	  break;
+	}
+      /* FALLTHRU */
+    case 8:
+    case 4:
+    case 12:
+      if (! ANY_FP_REG_P (x))
+	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
+      /* FALLTHRU */
+    case 16:
+    case 2:
+    normal:
+      reg = hi_reg_name[regno];
+      break;
+    case 1:
+      if (regno >= ARRAY_SIZE (qi_reg_name))
+	goto normal;
+      reg = qi_reg_name[regno];
+      break;
+    case 0:
+      if (regno >= ARRAY_SIZE (qi_high_reg_name))
+	goto normal;
+      reg = qi_high_reg_name[regno];
+      break;
+    case 32:
+      if (SSE_REG_P (x))
+	{
+	  gcc_assert (!duplicated);
+	  putc ('y', file);
+	  fputs (hi_reg_name[regno] + 1, file);
+	  return;
+	}
+    case 64:
+      if (SSE_REG_P (x))
+        {
+          gcc_assert (!duplicated);
+          putc ('z', file);
+          fputs (hi_reg_name[REGNO (x)] + 1, file);
+          return;
+        }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  fputs (reg, file);
+  if (duplicated)
+    {
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	fprintf (file, ", %%%s", reg);
+      else
+	fprintf (file, ", %s", reg);
+    }
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in some tls_local_dynamic_base
+   pattern.  */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (NONDEBUG_INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  return NULL;
+}
+
+/* Meaning of CODE:
+   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+   C -- print opcode suffix for set/cmov insn.
+   c -- like C, but print reversed condition
+   F,f -- likewise, but for floating-point.
+   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+	otherwise nothing
+   R -- print embeded rounding and sae.
+   r -- print only sae.
+   z -- print the opcode suffix for the size of the current operand.
+   Z -- likewise, with special suffixes for x87 instructions.
+   * -- print a star (in certain assembler syntax)
+   A -- print an absolute memory reference.
+   E -- print address with DImode register names if TARGET_64BIT.
+   w -- print the operand as if it's a "word" (HImode) even if it isn't.
+   s -- print a shift double count, followed by the assemblers argument
+	delimiter.
+   b -- print the QImode name of the register for the indicated operand.
+	%b0 would print %al if operands[0] is reg 0.
+   w --  likewise, print the HImode name of the register.
+   k --  likewise, print the SImode name of the register.
+   q --  likewise, print the DImode name of the register.
+   x --  likewise, print the V4SFmode name of the register.
+   t --  likewise, print the V8SFmode name of the register.
+   g --  likewise, print the V16SFmode name of the register.
+   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+   y -- print "st(0)" instead of "st" as a register.
+   d -- print duplicated register operand for AVX instruction.
+   D -- print condition for SSE cmp instruction.
+   P -- if PIC, print an @PLT suffix.
+   p -- print raw symbol name.
+   X -- don't print any sort of PIC '@' suffix for a symbol.
+   & -- print some in-use local-dynamic symbol name.
+   H -- print a memory address offset by 8; used for sse high-parts
+   Y -- print condition for XOP pcom* instruction.
+   + -- print a branch hint as 'cs' or 'ds' prefix
+   ; -- print a semicolon (after prefixes due to bug in older gas).
+   ~ -- print "i" if TARGET_AVX2, "f" otherwise.
+   @ -- print a segment register of thread base pointer load
+   ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+ */
+
+void
+ix86_print_operand (FILE *file, rtx x, int code)
+{
+  if (code)
+    {
+      switch (code)
+	{
+	case 'A':
+	  switch (ASSEMBLER_DIALECT)
+	    {
+	    case ASM_ATT:
+	      putc ('*', file);
+	      break;
+
+	    case ASM_INTEL:
+	      /* Intel syntax. For absolute addresses, registers should not
+		 be surrounded by braces.  */
+	      if (!REG_P (x))
+		{
+		  putc ('[', file);
+		  ix86_print_operand (file, x, 0);
+		  putc (']', file);
+		  return;
+		}
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  ix86_print_operand (file, x, 0);
+	  return;
+
+	case 'E':
+	  /* Wrap address in an UNSPEC to declare special handling.  */
+	  if (TARGET_64BIT)
+	    x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
+
+	  output_address (x);
+	  return;
+
+	case 'L':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('l', file);
+	  return;
+
+	case 'W':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('w', file);
+	  return;
+
+	case 'B':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('b', file);
+	  return;
+
+	case 'Q':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('l', file);
+	  return;
+
+	case 'S':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('s', file);
+	  return;
+
+	case 'T':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('t', file);
+	  return;
+
+	case 'O':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+	  if (ASSEMBLER_DIALECT != ASM_ATT)
+	    return;
+
+	  switch (GET_MODE_SIZE (GET_MODE (x)))
+	    {
+	    case 2:
+	      putc ('w', file);
+	      break;
+  
+	    case 4:
+	      putc ('l', file);
+	      break;
+
+	    case 8:
+	      putc ('q', file);
+	      break;
+
+	    default:
+	      output_operand_lossage
+		("invalid operand size for operand code 'O'");
+	      return;
+	    }
+
+	  putc ('.', file);
+#endif
+	  return;
+
+	case 'z':
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	    {
+	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
+	      if (ASSEMBLER_DIALECT == ASM_INTEL)
+		return;
+
+	      switch (GET_MODE_SIZE (GET_MODE (x)))
+		{
+		case 1:
+		  putc ('b', file);
+		  return;
+
+		case 2:
+		  putc ('w', file);
+		  return;
+
+		case 4:
+		  putc ('l', file);
+		  return;
+
+		case 8:
+		  putc ('q', file);
+		  return;
+
+		default:
+		  output_operand_lossage
+		    ("invalid operand size for operand code 'z'");
+		  return;
+		}
+	    }
+
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	    warning
+	      (0, "non-integer operand used with operand code 'z'");
+	  /* FALLTHRU */
+
+	case 'Z':
+	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
+	  if (ASSEMBLER_DIALECT == ASM_INTEL)
+	    return;
+
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	    {
+	      switch (GET_MODE_SIZE (GET_MODE (x)))
+		{
+		case 2:
+#ifdef HAVE_AS_IX86_FILDS
+		  putc ('s', file);
+#endif
+		  return;
+
+		case 4:
+		  putc ('l', file);
+		  return;
+
+		case 8:
+#ifdef HAVE_AS_IX86_FILDQ
+		  putc ('q', file);
+#else
+		  fputs ("ll", file);
+#endif
+		  return;
+
+		default:
+		  break;
+		}
+	    }
+	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	    {
+	      /* 387 opcodes don't get size suffixes
+		 if the operands are registers.  */
+	      if (STACK_REG_P (x))
+		return;
+
+	      switch (GET_MODE_SIZE (GET_MODE (x)))
+		{
+		case 4:
+		  putc ('s', file);
+		  return;
+
+		case 8:
+		  putc ('l', file);
+		  return;
+
+		case 12:
+		case 16:
+		  putc ('t', file);
+		  return;
+
+		default:
+		  break;
+		}
+	    }
+	  else
+	    {
+	      output_operand_lossage
+		("invalid operand type used with operand code 'Z'");
+	      return;
+	    }
+
+	  output_operand_lossage
+	    ("invalid operand size for operand code 'Z'");
+	  return;
+
+	case 'd':
+	case 'b':
+	case 'w':
+	case 'k':
+	case 'q':
+	case 'h':
+	case 't':
+	case 'g':
+	case 'y':
+	case 'x':
+	case 'X':
+	case 'P':
+	case 'p':
+	  break;
+
+	case 's':
+	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
+	    {
+	      ix86_print_operand (file, x, 0);
+	      fputs (", ", file);
+	    }
+	  return;
+
+	case 'Y':
+	  switch (GET_CODE (x))
+	    {
+	    case NE:
+	      fputs ("neq", file);
+	      break;
+	    case EQ:
+	      fputs ("eq", file);
+	      break;
+	    case GE:
+	    case GEU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+	      break;
+	    case GT:
+	    case GTU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+	      break;
+	    case LE:
+	    case LEU:
+	      fputs ("le", file);
+	      break;
+	    case LT:
+	    case LTU:
+	      fputs ("lt", file);
+	      break;
+	    case UNORDERED:
+	      fputs ("unord", file);
+	      break;
+	    case ORDERED:
+	      fputs ("ord", file);
+	      break;
+	    case UNEQ:
+	      fputs ("ueq", file);
+	      break;
+	    case UNGE:
+	      fputs ("nlt", file);
+	      break;
+	    case UNGT:
+	      fputs ("nle", file);
+	      break;
+	    case UNLE:
+	      fputs ("ule", file);
+	      break;
+	    case UNLT:
+	      fputs ("ult", file);
+	      break;
+	    case LTGT:
+	      fputs ("une", file);
+	      break;
+	    default:
+	      output_operand_lossage ("operand is not a condition code, "
+				      "invalid operand code 'Y'");
+	      return;
+	    }
+	  return;
+
+	case 'D':
+	  /* Little bit of braindamage here.  The SSE compare instructions
+	     does use completely different names for the comparisons that the
+	     fp conditional moves.  */
+	  switch (GET_CODE (x))
+	    {
+	    case UNEQ:
+	      if (TARGET_AVX)
+		{
+		  fputs ("eq_us", file);
+		  break;
+		}
+	    case EQ:
+	      fputs ("eq", file);
+	      break;
+	    case UNLT:
+	      if (TARGET_AVX)
+		{
+		  fputs ("nge", file);
+		  break;
+		}
+	    case LT:
+	      fputs ("lt", file);
+	      break;
+	    case UNLE:
+	      if (TARGET_AVX)
+		{
+		  fputs ("ngt", file);
+		  break;
+		}
+	    case LE:
+	      fputs ("le", file);
+	      break;
+	    case UNORDERED:
+	      fputs ("unord", file);
+	      break;
+	    case LTGT:
+	      if (TARGET_AVX)
+		{
+		  fputs ("neq_oq", file);
+		  break;
+		}
+	    case NE:
+	      fputs ("neq", file);
+	      break;
+	    case GE:
+	      if (TARGET_AVX)
+		{
+		  fputs ("ge", file);
+		  break;
+		}
+	    case UNGE:
+	      fputs ("nlt", file);
+	      break;
+	    case GT:
+	      if (TARGET_AVX)
+		{
+		  fputs ("gt", file);
+		  break;
+		}
+	    case UNGT:
+	      fputs ("nle", file);
+	      break;
+	    case ORDERED:
+	      fputs ("ord", file);
+	      break;
+	    default:
+	      output_operand_lossage ("operand is not a condition code, "
+				      "invalid operand code 'D'");
+	      return;
+	    }
+	  return;
+
+	case 'F':
+	case 'f':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('.', file);
+#endif
+
+	case 'C':
+	case 'c':
+	  if (!COMPARISON_P (x))
+	    {
+	      output_operand_lossage ("operand is not a condition code, "
+				      "invalid operand code '%c'", code);
+	      return;
+	    }
+	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
+			      code == 'c' || code == 'f',
+			      code == 'F' || code == 'f',
+			      file);
+	  return;
+
+	case 'H':
+	  if (!offsettable_memref_p (x))
+	    {
+	      output_operand_lossage ("operand is not an offsettable memory "
+				      "reference, invalid operand code 'H'");
+	      return;
+	    }
+	  /* It doesn't actually matter what mode we use here, as we're
+	     only going to use this for printing.  */
+	  x = adjust_address_nv (x, DImode, 8);
+	  /* Output 'qword ptr' for intel assembler dialect.  */
+	  if (ASSEMBLER_DIALECT == ASM_INTEL)
+	    code = 'q';
+	  break;
+
+	case 'K':
+	  gcc_assert (CONST_INT_P (x));
+
+	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
+#ifdef HAVE_AS_IX86_HLE
+	    fputs ("xacquire ", file);
+#else
+	    fputs ("\n" ASM_BYTE "0xf2\n\t", file);
+#endif
+	  else if (INTVAL (x) & IX86_HLE_RELEASE)
+#ifdef HAVE_AS_IX86_HLE
+	    fputs ("xrelease ", file);
+#else
+	    fputs ("\n" ASM_BYTE "0xf3\n\t", file);
+#endif
+	  /* We do not want to print value of the operand.  */
+	  return;
+
+	case 'N':
+	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+	    fputs ("{z}", file);
+	  return;
+
+	case 'r':
+	  gcc_assert (CONST_INT_P (x));
+	  gcc_assert (INTVAL (x) == ROUND_SAE);
+
+	  if (ASSEMBLER_DIALECT == ASM_INTEL)
+	    fputs (", ", file);
+
+	  fputs ("{sae}", file);
+
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    fputs (", ", file);
+
+	  return;
+
+	case 'R':
+	  gcc_assert (CONST_INT_P (x));
+
+	  if (ASSEMBLER_DIALECT == ASM_INTEL)
+	    fputs (", ", file);
+
+	  switch (INTVAL (x))
+	    {
+	    case ROUND_NEAREST_INT | ROUND_SAE:
+	      fputs ("{rn-sae}", file);
+	      break;
+	    case ROUND_NEG_INF | ROUND_SAE:
+	      fputs ("{rd-sae}", file);
+	      break;
+	    case ROUND_POS_INF | ROUND_SAE:
+	      fputs ("{ru-sae}", file);
+	      break;
+	    case ROUND_ZERO | ROUND_SAE:
+	      fputs ("{rz-sae}", file);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    fputs (", ", file);
+
+	  return;
+
+	case '*':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('*', file);
+	  return;
+
+	case '&':
+	  {
+	    const char *name = get_some_local_dynamic_name ();
+	    if (name == NULL)
+	      output_operand_lossage ("'%%&' used without any "
+				      "local dynamic TLS references");
+	    else
+	      assemble_name (file, name);
+	    return;
+	  }
+
+	case '+':
+	  {
+	    rtx x;
+
+	    if (!optimize
+	        || optimize_function_for_size_p (cfun)
+		|| !TARGET_BRANCH_PREDICTION_HINTS)
+	      return;
+
+	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+	    if (x)
+	      {
+		int pred_val = XINT (x, 0);
+
+		if (pred_val < REG_BR_PROB_BASE * 45 / 100
+		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
+		  {
+		    bool taken = pred_val > REG_BR_PROB_BASE / 2;
+		    bool cputaken
+		      = final_forward_branch_p (current_output_insn) == 0;
+
+		    /* Emit hints only in the case default branch prediction
+		       heuristics would fail.  */
+		    if (taken != cputaken)
+		      {
+			/* We use 3e (DS) prefix for taken branches and
+			   2e (CS) prefix for not taken branches.  */
+			if (taken)
+			  fputs ("ds ; ", file);
+			else
+			  fputs ("cs ; ", file);
+		      }
+		  }
+	      }
+	    return;
+	  }
+
+	case ';':
+#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
+	  putc (';', file);
+#endif
+	  return;
+
+	case '@':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('%', file);
+
+	  /* The kernel uses a different segment register for performance
+	     reasons; a system call would not have to trash the userspace
+	     segment register, which would be expensive.  */
+	  if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
+	    fputs ("fs", file);
+	  else
+	    fputs ("gs", file);
+	  return;
+
+	case '~':
+	  putc (TARGET_AVX2 ? 'i' : 'f', file);
+	  return;
+
+	case '^':
+	  if (TARGET_64BIT && Pmode != word_mode)
+	    fputs ("addr32 ", file);
+	  return;
+
+	default:
+	    output_operand_lossage ("invalid operand code '%c'", code);
+	}
+    }
+
+  if (REG_P (x))
+    print_reg (x, code, file);
+
+  else if (MEM_P (x))
+    {
+      /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
+      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
+	  && GET_MODE (x) != BLKmode)
+	{
+	  const char * size;
+	  switch (GET_MODE_SIZE (GET_MODE (x)))
+	    {
+	    case 1: size = "BYTE"; break;
+	    case 2: size = "WORD"; break;
+	    case 4: size = "DWORD"; break;
+	    case 8: size = "QWORD"; break;
+	    case 12: size = "TBYTE"; break;
+	    case 16:
+	      if (GET_MODE (x) == XFmode)
+		size = "TBYTE";
+              else
+		size = "XMMWORD";
+              break;
+	    case 32: size = "YMMWORD"; break;
+	    case 64: size = "ZMMWORD"; break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  /* Check for explicit size override (codes 'b', 'w', 'k',
+	     'q' and 'x')  */
+	  if (code == 'b')
+	    size = "BYTE";
+	  else if (code == 'w')
+	    size = "WORD";
+	  else if (code == 'k')
+	    size = "DWORD";
+	  else if (code == 'q')
+	    size = "QWORD";
+	  else if (code == 'x')
+	    size = "XMMWORD";
+
+	  fputs (size, file);
+	  fputs (" PTR ", file);
+	}
+
+      x = XEXP (x, 0);
+      /* Avoid (%rip) for call operands.  */
+      if (CONSTANT_ADDRESS_P (x) && code == 'P'
+	  && !CONST_INT_P (x))
+	output_addr_const (file, x);
+      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
+	output_operand_lossage ("invalid constraints for operand");
+      else
+	output_address (x);
+    }
+
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('$', file);
+      /* Sign extend 32bit SFmode immediate to 8 bytes.  */
+      if (code == 'q')
+	fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
+		 (unsigned long long) (int) l);
+      else
+	fprintf (file, "0x%08x", (unsigned int) l);
+    }
+
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l[2];
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('$', file);
+      fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
+    }
+
+  /* These float cases don't actually occur as immediate operands.  */
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
+    {
+      char dstr[30];
+
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+      fputs (dstr, file);
+    }
+
+  else
+    {
+      /* We have patterns that allow zero sets of memory, for instance.
+	 In 64-bit mode, we should probably support all 8-byte vectors,
+	 since we can in fact encode that into an immediate.  */
+      if (GET_CODE (x) == CONST_VECTOR)
+	{
+	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
+	  x = const0_rtx;
+	}
+
+      if (code != 'P' && code != 'p')
+	{
+	  if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
+	    {
+	      if (ASSEMBLER_DIALECT == ASM_ATT)
+		putc ('$', file);
+	    }
+	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
+		   || GET_CODE (x) == LABEL_REF)
+	    {
+	      if (ASSEMBLER_DIALECT == ASM_ATT)
+		putc ('$', file);
+	      else
+		fputs ("OFFSET FLAT:", file);
+	    }
+	}
+      if (CONST_INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      else if (flag_pic || MACHOPIC_INDIRECT)
+	output_pic_addr_const (file, x, code);
+      else
+	output_addr_const (file, x);
+    }
+}
+
+static bool
+ix86_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '@' || code == '*' || code == '+' || code == '&'
+	  || code == ';' || code == '~' || code == '^');
+}
+
+/* Print a memory operand whose address is ADDR.  */
+
+static void
+ix86_print_operand_address (FILE *file, rtx addr)
+{
+  struct ix86_address parts;
+  rtx base, index, disp;
+  int scale;
+  int ok;
+  bool vsib = false;
+  int code = 0;
+
+  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
+    {
+      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
+      gcc_assert (parts.index == NULL_RTX);
+      parts.index = XVECEXP (addr, 0, 1);
+      parts.scale = INTVAL (XVECEXP (addr, 0, 2));
+      addr = XVECEXP (addr, 0, 0);
+      vsib = true;
+    }
+  else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
+    {
+      gcc_assert (TARGET_64BIT);
+      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
+      code = 'q';
+    }
+  else
+    ok = ix86_decompose_address (addr, &parts);
+
+  gcc_assert (ok);
+
+  base = parts.base;
+  index = parts.index;
+  disp = parts.disp;
+  scale = parts.scale;
+
+  switch (parts.seg)
+    {
+    case SEG_DEFAULT:
+      break;
+    case SEG_FS:
+    case SEG_GS:
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('%', file);
+      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Use one byte shorter RIP relative addressing for 64bit mode.  */
+  if (TARGET_64BIT && !base && !index)
+    {
+      rtx symbol = disp;
+
+      if (GET_CODE (disp) == CONST
+	  && GET_CODE (XEXP (disp, 0)) == PLUS
+	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+	symbol = XEXP (XEXP (disp, 0), 0);
+
+      if (GET_CODE (symbol) == LABEL_REF
+	  || (GET_CODE (symbol) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
+	base = pc_rtx;
+    }
+  if (!base && !index)
+    {
+      /* Displacement only requires special attention.  */
+
+      if (CONST_INT_P (disp))
+	{
+	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
+	    fputs ("ds:", file);
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
+	}
+      else if (flag_pic)
+	output_pic_addr_const (file, disp, 0);
+      else
+	output_addr_const (file, disp);
+    }
+  else
+    {
+      /* Print SImode register names to force addr32 prefix.  */
+      if (SImode_address_operand (addr, VOIDmode))
+	{
+#ifdef ENABLE_CHECKING
+	  gcc_assert (TARGET_64BIT);
+	  switch (GET_CODE (addr))
+	    {
+	    case SUBREG:
+	      gcc_assert (GET_MODE (addr) == SImode);
+	      gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
+	      break;
+	    case ZERO_EXTEND:
+	    case AND:
+	      gcc_assert (GET_MODE (addr) == DImode);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+#endif
+	  gcc_assert (!code);
+	  code = 'k';
+	}
+      else if (code == 0
+	       && TARGET_X32
+	       && disp
+	       && CONST_INT_P (disp)
+	       && INTVAL (disp) < -16*1024*1024)
+	{
+	  /* X32 runs in 64-bit mode, where displacement, DISP, in
+	     address DISP(%r64), is encoded as 32-bit immediate sign-
+	     extended from 32-bit to 64-bit.  For -0x40000300(%r64),
+	     address is %r64 + 0xffffffffbffffd00.  When %r64 <
+	     0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
+	     which is invalid for x32.  The correct address is %r64
+	     - 0x40000300 == 0xf7ffdd64.  To properly encode
+	     -0x40000300(%r64) for x32, we zero-extend negative
+	     displacement by forcing addr32 prefix which truncates
+	     0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
+	     zero-extend all negative displacements, including -1(%rsp).
+	     However, for small negative displacements, sign-extension
+	     won't cause overflow.  We only zero-extend negative
+	     displacements if they < -16*1024*1024, which is also used
+	     to check legitimate address displacements for PIC.  */
+	  code = 'k';
+	}
+
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	{
+	  if (disp)
+	    {
+	      if (flag_pic)
+		output_pic_addr_const (file, disp, 0);
+	      else if (GET_CODE (disp) == LABEL_REF)
+		output_asm_label (disp);
+	      else
+		output_addr_const (file, disp);
+	    }
+
+	  putc ('(', file);
+	  if (base)
+	    print_reg (base, code, file);
+	  if (index)
+	    {
+	      putc (',', file);
+	      print_reg (index, vsib ? 0 : code, file);
+	      if (scale != 1 || vsib)
+		fprintf (file, ",%d", scale);
+	    }
+	  putc (')', file);
+	}
+      else
+	{
+	  rtx offset = NULL_RTX;
+
+	  if (disp)
+	    {
+	      /* Pull out the offset of a symbol; print any symbol itself.  */
+	      if (GET_CODE (disp) == CONST
+		  && GET_CODE (XEXP (disp, 0)) == PLUS
+		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+		{
+		  offset = XEXP (XEXP (disp, 0), 1);
+		  disp = gen_rtx_CONST (VOIDmode,
+					XEXP (XEXP (disp, 0), 0));
+		}
+
+	      if (flag_pic)
+		output_pic_addr_const (file, disp, 0);
+	      else if (GET_CODE (disp) == LABEL_REF)
+		output_asm_label (disp);
+	      else if (CONST_INT_P (disp))
+		offset = disp;
+	      else
+		output_addr_const (file, disp);
+	    }
+
+	  putc ('[', file);
+	  if (base)
+	    {
+	      print_reg (base, code, file);
+	      if (offset)
+		{
+		  if (INTVAL (offset) >= 0)
+		    putc ('+', file);
+		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+		}
+	    }
+	  else if (offset)
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+	  else
+	    putc ('0', file);
+
+	  if (index)
+	    {
+	      putc ('+', file);
+	      print_reg (index, vsib ? 0 : code, file);
+	      if (scale != 1 || vsib)
+		fprintf (file, "*%d", scale);
+	    }
+	  putc (']', file);
+	}
+    }
+}
+
+/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+i386_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  rtx op;
+
+  if (GET_CODE (x) != UNSPEC)
+    return false;
+
+  op = XVECEXP (x, 0, 0);
+  switch (XINT (x, 1))
+    {
+    case UNSPEC_GOTTPOFF:
+      output_addr_const (file, op);
+      /* FIXME: This might be @TPOFF in Sun ld.  */
+      fputs ("@gottpoff", file);
+      break;
+    case UNSPEC_TPOFF:
+      output_addr_const (file, op);
+      fputs ("@tpoff", file);
+      break;
+    case UNSPEC_NTPOFF:
+      output_addr_const (file, op);
+      if (TARGET_64BIT)
+	fputs ("@tpoff", file);
+      else
+	fputs ("@ntpoff", file);
+      break;
+    case UNSPEC_DTPOFF:
+      output_addr_const (file, op);
+      fputs ("@dtpoff", file);
+      break;
+    case UNSPEC_GOTNTPOFF:
+      output_addr_const (file, op);
+      if (TARGET_64BIT)
+	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
+      else
+	fputs ("@gotntpoff", file);
+      break;
+    case UNSPEC_INDNTPOFF:
+      output_addr_const (file, op);
+      fputs ("@indntpoff", file);
+      break;
+#if TARGET_MACHO
+    case UNSPEC_MACHOPIC_OFFSET:
+      output_addr_const (file, op);
+      putc ('-', file);
+      machopic_output_function_base_name (file);
+      break;
+#endif
+
+    case UNSPEC_STACK_CHECK:
+      {
+	int offset;
+
+	gcc_assert (flag_split_stack);
+
+#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
+	offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
+#else
+	gcc_unreachable ();
+#endif
+
+	fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
+      }
+      break;
+
+    default:
+      return false;
+    }
+
+  return true;
+}
+
+/* Split one or more double-mode RTL references into pairs of half-mode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_double_mode (enum machine_mode mode, rtx operands[],
+		   int num, rtx lo_half[], rtx hi_half[])
+{
+  enum machine_mode half_mode;
+  unsigned int byte;
+
+  switch (mode)
+    {
+    case TImode:
+      half_mode = DImode;
+      break;
+    case DImode:
+      half_mode = SImode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  byte = GET_MODE_SIZE (half_mode);
+
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuse to split volatile memory addresses,
+         but we still have to handle it.  */
+      if (MEM_P (op))
+	{
+	  lo_half[num] = adjust_address (op, half_mode, 0);
+	  hi_half[num] = adjust_address (op, half_mode, byte);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (half_mode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? mode : GET_MODE (op), 0);
+	  hi_half[num] = simplify_gen_subreg (half_mode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? mode : GET_MODE (op), byte);
+	}
+    }
+}
+
+/* Output code to perform a 387 binary operation in INSN, one of PLUS,
+   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
+   is the expression of the binary operation.  The output may either be
+   emitted here, or returned to the caller, like all output_* functions.
+
+   There is no guarantee that the operands are the same mode, as they
+   might be within FLOAT or FLOAT_EXTEND expressions.  */
+
+#ifndef SYSV386_COMPAT
+/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
+   wants to fix the assemblers because that causes incompatibility
+   with gcc.  No-one wants to fix gcc because that causes
+   incompatibility with assemblers...  You can use the option of
+   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
+#define SYSV386_COMPAT 1
+#endif
+
+const char *
+output_387_binary_op (rtx insn, rtx *operands)
+{
+  static char buf[40];
+  const char *p;
+  const char *ssep;
+  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
+
+#ifdef ENABLE_CHECKING
+  /* Even if we do not want to check the inputs, this documents input
+     constraints.  Which helps in understanding the following code.  */
+  if (STACK_REG_P (operands[0])
+      && ((REG_P (operands[1])
+	   && REGNO (operands[0]) == REGNO (operands[1])
+	   && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
+	  || (REG_P (operands[2])
+	      && REGNO (operands[0]) == REGNO (operands[2])
+	      && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
+      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
+    ; /* ok */
+  else
+    gcc_assert (is_sse);
+#endif
+
+  switch (GET_CODE (operands[3]))
+    {
+    case PLUS:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fiadd";
+      else
+	p = "fadd";
+      ssep = "vadd";
+      break;
+
+    case MINUS:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fisub";
+      else
+	p = "fsub";
+      ssep = "vsub";
+      break;
+
+    case MULT:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fimul";
+      else
+	p = "fmul";
+      ssep = "vmul";
+      break;
+
+    case DIV:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fidiv";
+      else
+	p = "fdiv";
+      ssep = "vdiv";
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (is_sse)
+   {
+     if (TARGET_AVX)
+       {
+	 strcpy (buf, ssep);
+	 if (GET_MODE (operands[0]) == SFmode)
+	   strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
+	 else
+	   strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
+       }
+     else
+       {
+	 strcpy (buf, ssep + 1);
+	 if (GET_MODE (operands[0]) == SFmode)
+	   strcat (buf, "ss\t{%2, %0|%0, %2}");
+	 else
+	   strcat (buf, "sd\t{%2, %0|%0, %2}");
+       }
+      return buf;
+   }
+  strcpy (buf, p);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case MULT:
+    case PLUS:
+      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
+	{
+	  rtx temp = operands[2];
+	  operands[2] = operands[1];
+	  operands[1] = temp;
+	}
+
+      /* know operands[0] == operands[1].  */
+
+      if (MEM_P (operands[2]))
+	{
+	  p = "%Z2\t%2";
+	  break;
+	}
+
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+	{
+	  if (STACK_TOP_P (operands[0]))
+	    /* How is it that we are storing to a dead operand[2]?
+	       Well, presumably operands[1] is dead too.  We can't
+	       store the result to st(0) as st(0) gets popped on this
+	       instruction.  Instead store to operands[2] (which I
+	       think has to be st(1)).  st(1) will be popped later.
+	       gcc <= 2.8.1 didn't have this check and generated
+	       assembly code that the Unixware assembler rejected.  */
+	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
+	  else
+	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
+	  break;
+	}
+
+      if (STACK_TOP_P (operands[0]))
+	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
+      else
+	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
+      break;
+
+    case MINUS:
+    case DIV:
+      if (MEM_P (operands[1]))
+	{
+	  p = "r%Z1\t%1";
+	  break;
+	}
+
+      if (MEM_P (operands[2]))
+	{
+	  p = "%Z2\t%2";
+	  break;
+	}
+
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+	{
+#if SYSV386_COMPAT
+	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
+	     derived assemblers, confusingly reverse the direction of
+	     the operation for fsub{r} and fdiv{r} when the
+	     destination register is not st(0).  The Intel assembler
+	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
+	     figure out what the hardware really does.  */
+	  if (STACK_TOP_P (operands[0]))
+	    p = "{p\t%0, %2|rp\t%2, %0}";
+	  else
+	    p = "{rp\t%2, %0|p\t%0, %2}";
+#else
+	  if (STACK_TOP_P (operands[0]))
+	    /* As above for fmul/fadd, we can't store to st(0).  */
+	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
+	  else
+	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
+#endif
+	  break;
+	}
+
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+	{
+#if SYSV386_COMPAT
+	  if (STACK_TOP_P (operands[0]))
+	    p = "{rp\t%0, %1|p\t%1, %0}";
+	  else
+	    p = "{p\t%1, %0|rp\t%0, %1}";
+#else
+	  if (STACK_TOP_P (operands[0]))
+	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
+	  else
+	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
+#endif
+	  break;
+	}
+
+      if (STACK_TOP_P (operands[0]))
+	{
+	  if (STACK_TOP_P (operands[1]))
+	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
+	  else
+	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
+	  break;
+	}
+      else if (STACK_TOP_P (operands[1]))
+	{
+#if SYSV386_COMPAT
+	  p = "{\t%1, %0|r\t%0, %1}";
+#else
+	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
+#endif
+	}
+      else
+	{
+#if SYSV386_COMPAT
+	  p = "{r\t%2, %0|\t%0, %2}";
+#else
+	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
+#endif
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  strcat (buf, p);
+  return buf;
+}
+
+/* Check if a 256bit AVX register is referenced inside of EXP.   */
+
+static int
+ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
+{
+  rtx exp = *pexp;
+
+  if (GET_CODE (exp) == SUBREG)
+    exp = SUBREG_REG (exp);
+
+  if (REG_P (exp)
+      && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
+    return 1;
+
+  return 0;
+}
+
+/* Return needed mode for entity in optimize_mode_switching pass.  */
+
+static int
+ix86_avx_u128_mode_needed (rtx insn)
+{
+  if (CALL_P (insn))
+    {
+      rtx link;
+
+      /* Needed mode is set to AVX_U128_CLEAN if there are
+	 no 256bit modes used in function arguments.  */
+      for (link = CALL_INSN_FUNCTION_USAGE (insn);
+	   link;
+	   link = XEXP (link, 1))
+	{
+	  if (GET_CODE (XEXP (link, 0)) == USE)
+	    {
+	      rtx arg = XEXP (XEXP (link, 0), 0);
+
+	      if (ix86_check_avx256_register (&arg, NULL))
+		return AVX_U128_DIRTY;
+	    }
+	}
+
+      return AVX_U128_CLEAN;
+    }
+
+  /* Require DIRTY mode if a 256bit AVX register is referenced.  Hardware
+     changes state only when a 256bit register is written to, but we need
+     to prevent the compiler from moving optimal insertion point above
+     eventual read from 256bit register.  */
+  if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
+    return AVX_U128_DIRTY;
+
+  return AVX_U128_ANY;
+}
+
+/* Return mode that i387 must be switched into
+   prior to the execution of insn.  */
+
+static int
+ix86_i387_mode_needed (int entity, rtx insn)
+{
+  enum attr_i387_cw mode;
+
+  /* The mode UNINITIALIZED is used to store control word after a
+     function call or ASM pattern.  The mode ANY specify that function
+     has no requirements on the control word and make no changes in the
+     bits we are interested in.  */
+
+  if (CALL_P (insn)
+      || (NONJUMP_INSN_P (insn)
+	  && (asm_noperands (PATTERN (insn)) >= 0
+	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
+    return I387_CW_UNINITIALIZED;
+
+  if (recog_memoized (insn) < 0)
+    return I387_CW_ANY;
+
+  mode = get_attr_i387_cw (insn);
+
+  switch (entity)
+    {
+    case I387_TRUNC:
+      if (mode == I387_CW_TRUNC)
+	return mode;
+      break;
+
+    case I387_FLOOR:
+      if (mode == I387_CW_FLOOR)
+	return mode;
+      break;
+
+    case I387_CEIL:
+      if (mode == I387_CW_CEIL)
+	return mode;
+      break;
+
+    case I387_MASK_PM:
+      if (mode == I387_CW_MASK_PM)
+	return mode;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return I387_CW_ANY;
+}
+
+/* Return mode that entity must be switched into
+   prior to the execution of insn.  */
+
+int
+ix86_mode_needed (int entity, rtx insn)
+{
+  switch (entity)
+    {
+    case AVX_U128:
+      return ix86_avx_u128_mode_needed (insn);
+    case I387_TRUNC:
+    case I387_FLOOR:
+    case I387_CEIL:
+    case I387_MASK_PM:
+      return ix86_i387_mode_needed (entity, insn);
+    default:
+      gcc_unreachable ();
+    }
+  return 0;
+}
+
+/* Check if a 256bit AVX register is referenced in stores.   */
+ 
+static void
+ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
+ {
+   if (ix86_check_avx256_register (&dest, NULL))
+    {
+      bool *used = (bool *) data;
+      *used = true;
+    }
+ } 
+
+/* Calculate mode of upper 128bit AVX registers after the insn.  */
+
+static int
+ix86_avx_u128_mode_after (int mode, rtx insn)
+{
+  rtx pat = PATTERN (insn);
+
+  if (vzeroupper_operation (pat, VOIDmode)
+      || vzeroall_operation (pat, VOIDmode))
+    return AVX_U128_CLEAN;
+
+  /* We know that state is clean after CALL insn if there are no
+     256bit registers used in the function return register.  */
+  if (CALL_P (insn))
+    {
+      bool avx_reg256_found = false;
+      note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
+
+      return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
+    }
+
+  /* Otherwise, return current mode.  Remember that if insn
+     references AVX 256bit registers, the mode was already changed
+     to DIRTY from MODE_NEEDED.  */
+  return mode;
+}
+
+/* Return the mode that an insn results in.  */
+
+int
+ix86_mode_after (int entity, int mode, rtx insn)
+{
+  switch (entity)
+    {
+    case AVX_U128:
+      return ix86_avx_u128_mode_after (mode, insn);
+    case I387_TRUNC:
+    case I387_FLOOR:
+    case I387_CEIL:
+    case I387_MASK_PM:
+      return mode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int
+ix86_avx_u128_mode_entry (void)
+{
+  tree arg;
+
+  /* Entry mode is set to AVX_U128_DIRTY if there are
+     256bit modes used in function arguments.  */
+  for (arg = DECL_ARGUMENTS (current_function_decl); arg;
+       arg = TREE_CHAIN (arg))
+    {
+      rtx incoming = DECL_INCOMING_RTL (arg);
+
+      if (incoming && ix86_check_avx256_register (&incoming, NULL))
+	return AVX_U128_DIRTY;
+    }
+
+  return AVX_U128_CLEAN;
+}
+
+/* Return a mode that ENTITY is assumed to be
+   switched to at function entry.  */
+
+int
+ix86_mode_entry (int entity)
+{
+  switch (entity)
+    {
+    case AVX_U128:
+      return ix86_avx_u128_mode_entry ();
+    case I387_TRUNC:
+    case I387_FLOOR:
+    case I387_CEIL:
+    case I387_MASK_PM:
+      return I387_CW_ANY;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int
+ix86_avx_u128_mode_exit (void)
+{
+  rtx reg = crtl->return_rtx;
+
+  /* Exit mode is set to AVX_U128_DIRTY if there are
+     256bit modes used in the function return register.  */
+  if (reg && ix86_check_avx256_register (&reg, NULL))
+    return AVX_U128_DIRTY;
+
+  return AVX_U128_CLEAN;
+}
+
+/* Return a mode that ENTITY is assumed to be
+   switched to at function exit.  */
+
+int
+ix86_mode_exit (int entity)
+{
+  switch (entity)
+    {
+    case AVX_U128:
+      return ix86_avx_u128_mode_exit ();
+    case I387_TRUNC:
+    case I387_FLOOR:
+    case I387_CEIL:
+    case I387_MASK_PM:
+      return I387_CW_ANY;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output code to initialize control word copies used by trunc?f?i and
+   rounding patterns.  CURRENT_MODE is set to current control word,
+   while NEW_MODE is set to new control word.  */
+
+static void
+emit_i387_cw_initialization (int mode)
+{
+  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  rtx new_mode;
+
+  enum ix86_stack_slot slot;
+
+  rtx reg = gen_reg_rtx (HImode);
+
+  emit_insn (gen_x86_fnstcw_1 (stored_mode));
+  emit_move_insn (reg, copy_rtx (stored_mode));
+
+  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
+      || optimize_insn_for_size_p ())
+    {
+      switch (mode)
+	{
+	case I387_CW_TRUNC:
+	  /* round toward zero (truncate) */
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+	  slot = SLOT_CW_TRUNC;
+	  break;
+
+	case I387_CW_FLOOR:
+	  /* round down toward -oo */
+	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+	  slot = SLOT_CW_FLOOR;
+	  break;
+
+	case I387_CW_CEIL:
+	  /* round up toward +oo */
+	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+	  slot = SLOT_CW_CEIL;
+	  break;
+
+	case I387_CW_MASK_PM:
+	  /* mask precision exception for nearbyint() */
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+	  slot = SLOT_CW_MASK_PM;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (mode)
+	{
+	case I387_CW_TRUNC:
+	  /* round toward zero (truncate) */
+	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+	  slot = SLOT_CW_TRUNC;
+	  break;
+
+	case I387_CW_FLOOR:
+	  /* round down toward -oo */
+	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
+	  slot = SLOT_CW_FLOOR;
+	  break;
+
+	case I387_CW_CEIL:
+	  /* round up toward +oo */
+	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
+	  slot = SLOT_CW_CEIL;
+	  break;
+
+	case I387_CW_MASK_PM:
+	  /* mask precision exception for nearbyint() */
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+	  slot = SLOT_CW_MASK_PM;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  gcc_assert (slot < MAX_386_STACK_LOCALS);
+
+  new_mode = assign_386_stack_local (HImode, slot);
+  emit_move_insn (new_mode, reg);
+}
+
+/* Emit vzeroupper.  */
+
+void
+ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
+{
+  int i;
+
+  /* Cancel automatic vzeroupper insertion if there are
+     live call-saved SSE registers at the insertion point.  */
+
+  for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+    if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
+      return;
+
+  if (TARGET_64BIT)
+    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+      if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
+	return;
+
+  emit_insn (gen_avx_vzeroupper ());
+}
+
+/* Generate one or more insns to set ENTITY to MODE.  */
+
+void
+ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
+{
+  switch (entity)
+    {
+    case AVX_U128:
+      if (mode == AVX_U128_CLEAN)
+	ix86_avx_emit_vzeroupper (regs_live);
+      break;
+    case I387_TRUNC:
+    case I387_FLOOR:
+    case I387_CEIL:
+    case I387_MASK_PM:
+      if (mode != I387_CW_ANY
+	  && mode != I387_CW_UNINITIALIZED)
+	emit_i387_cw_initialization (mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output code for INSN to convert a float to a signed int.  OPERANDS
+   are the insn operands.  The output may be [HSD]Imode and the input
+   operand may be [SDX]Fmode.  */
+
+const char *
+output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
+{
+  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+  int dimode_p = GET_MODE (operands[0]) == DImode;
+  int round_mode = get_attr_i387_cw (insn);
+
+  /* Jump through a hoop or two for DImode, since the hardware has no
+     non-popping instruction.  We used to do this a different way, but
+     that was somewhat fragile and broke with post-reload splitters.  */
+  if ((dimode_p || fisttp) && !stack_top_dies)
+    output_asm_insn ("fld\t%y1", operands);
+
+  gcc_assert (STACK_TOP_P (operands[1]));
+  gcc_assert (MEM_P (operands[0]));
+  gcc_assert (GET_MODE (operands[1]) != TFmode);
+
+  if (fisttp)
+      output_asm_insn ("fisttp%Z0\t%0", operands);
+  else
+    {
+      if (round_mode != I387_CW_ANY)
+	output_asm_insn ("fldcw\t%3", operands);
+      if (stack_top_dies || dimode_p)
+	output_asm_insn ("fistp%Z0\t%0", operands);
+      else
+	output_asm_insn ("fist%Z0\t%0", operands);
+      if (round_mode != I387_CW_ANY)
+	output_asm_insn ("fldcw\t%2", operands);
+    }
+
+  return "";
+}
+
+/* Output code for x87 ffreep insn.  The OPNO argument, which may only
+   have the values zero or one, indicates the ffreep insn's operand
+   from the OPERANDS array.  */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+  if (TARGET_USE_FFREEP)
+#ifdef HAVE_AS_IX86_FFREEP
+    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+    {
+      static char retval[32];
+      int regno = REGNO (operands[opno]);
+
+      gcc_assert (STACK_REGNO_P (regno));
+
+      regno -= FIRST_STACK_REG;
+
+      snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
+      return retval;
+    }
+#endif
+
+  return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
+/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
+   should be used.  UNORDERED_P is true when fucom should be used.  */
+
+const char *
+output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
+{
+  int stack_top_dies;
+  rtx cmp_op0, cmp_op1;
+  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
+
+  if (eflags_p)
+    {
+      cmp_op0 = operands[0];
+      cmp_op1 = operands[1];
+    }
+  else
+    {
+      cmp_op0 = operands[1];
+      cmp_op1 = operands[2];
+    }
+
+  if (is_sse)
+    {
+      if (GET_MODE (operands[0]) == SFmode)
+	if (unordered_p)
+	  return "%vucomiss\t{%1, %0|%0, %1}";
+	else
+	  return "%vcomiss\t{%1, %0|%0, %1}";
+      else
+	if (unordered_p)
+	  return "%vucomisd\t{%1, %0|%0, %1}";
+	else
+	  return "%vcomisd\t{%1, %0|%0, %1}";
+    }
+
+  gcc_assert (STACK_TOP_P (cmp_op0));
+
+  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+
+  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
+    {
+      if (stack_top_dies)
+	{
+	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
+	  return output_387_ffreep (operands, 1);
+	}
+      else
+	return "ftst\n\tfnstsw\t%0";
+    }
+
+  if (STACK_REG_P (cmp_op1)
+      && stack_top_dies
+      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
+      && REGNO (cmp_op1) != FIRST_STACK_REG)
+    {
+      /* If both the top of the 387 stack dies, and the other operand
+	 is also a stack register that dies, then this must be a
+	 `fcompp' float compare */
+
+      if (eflags_p)
+	{
+	  /* There is no double popping fcomi variant.  Fortunately,
+	     eflags is immune from the fstp's cc clobbering.  */
+	  if (unordered_p)
+	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
+	  else
+	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
+	  return output_387_ffreep (operands, 0);
+	}
+      else
+	{
+	  if (unordered_p)
+	    return "fucompp\n\tfnstsw\t%0";
+	  else
+	    return "fcompp\n\tfnstsw\t%0";
+	}
+    }
+  else
+    {
+      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
+
+      static const char * const alt[16] =
+      {
+	"fcom%Z2\t%y2\n\tfnstsw\t%0",
+	"fcomp%Z2\t%y2\n\tfnstsw\t%0",
+	"fucom%Z2\t%y2\n\tfnstsw\t%0",
+	"fucomp%Z2\t%y2\n\tfnstsw\t%0",
+
+	"ficom%Z2\t%y2\n\tfnstsw\t%0",
+	"ficomp%Z2\t%y2\n\tfnstsw\t%0",
+	NULL,
+	NULL,
+
+	"fcomi\t{%y1, %0|%0, %y1}",
+	"fcomip\t{%y1, %0|%0, %y1}",
+	"fucomi\t{%y1, %0|%0, %y1}",
+	"fucomip\t{%y1, %0|%0, %y1}",
+
+	NULL,
+	NULL,
+	NULL,
+	NULL
+      };
+
+      int mask;
+      const char *ret;
+
+      mask  = eflags_p << 3;
+      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
+      mask |= unordered_p << 1;
+      mask |= stack_top_dies;
+
+      gcc_assert (mask < 16);
+      ret = alt[mask];
+      gcc_assert (ret);
+
+      return ret;
+    }
+}
+
+void
+ix86_output_addr_vec_elt (FILE *file, int value)
+{
+  const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+  if (TARGET_LP64)
+    directive = ASM_QUAD;
+#else
+  gcc_assert (!TARGET_64BIT);
+#endif
+
+  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
+}
+
+void
+ix86_output_addr_diff_elt (FILE *file, int value, int rel)
+{
+  const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+  if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
+    directive = ASM_QUAD;
+#else
+  gcc_assert (!TARGET_64BIT);
+#endif
+  /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
+  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+    fprintf (file, "%s%s%d-%s%d\n",
+	     directive, LPREFIX, value, LPREFIX, rel);
+  else if (HAVE_AS_GOTOFF_IN_DATA)
+    fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
+#if TARGET_MACHO
+  else if (TARGET_MACHO)
+    {
+      fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
+      machopic_output_function_base_name (file);
+      putc ('\n', file);
+    }
+#endif
+  else
+    asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
+		 GOT_SYMBOL_NAME, LPREFIX, value);
+}
+
+/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
+   for the target.  */
+
+void
+ix86_expand_clear (rtx dest)
+{
+  rtx tmp;
+
+  /* We play register width games, which are only valid after reload.  */
+  gcc_assert (reload_completed);
+
+  /* Avoid HImode and its attendant prefix byte.  */
+  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
+    dest = gen_rtx_REG (SImode, REGNO (dest));
+  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
+
+  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
+  if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
+    {
+      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+    }
+
+  emit_insn (tmp);
+}
+
+/* X is an unchanging MEM.  If it is a constant pool reference, return
+   the constant pool rtx, else NULL.  */
+
+rtx
+maybe_get_pool_constant (rtx x)
+{
+  x = ix86_delegitimize_address (XEXP (x, 0));
+
+  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+    return get_pool_constant (x);
+
+  return NULL_RTX;
+}
+
+void
+ix86_expand_move (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1;
+  enum tls_model model;
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (GET_CODE (op1) == SYMBOL_REF)
+    {
+      rtx tmp;
+
+      model = SYMBOL_REF_TLS_MODEL (op1);
+      if (model)
+	{
+	  op1 = legitimize_tls_address (op1, model, true);
+	  op1 = force_operand (op1, op0);
+	  if (op1 == op0)
+	    return;
+	  op1 = convert_to_mode (mode, op1, 1);
+	}
+      else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
+	op1 = tmp;
+    }
+  else if (GET_CODE (op1) == CONST
+	   && GET_CODE (XEXP (op1, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
+    {
+      rtx addend = XEXP (XEXP (op1, 0), 1);
+      rtx symbol = XEXP (XEXP (op1, 0), 0);
+      rtx tmp;
+
+      model = SYMBOL_REF_TLS_MODEL (symbol);
+      if (model)
+	tmp = legitimize_tls_address (symbol, model, true);
+      else
+        tmp = legitimize_pe_coff_symbol (symbol, true);
+
+      if (tmp)
+	{
+	  tmp = force_operand (tmp, NULL);
+	  tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
+				     op0, 1, OPTAB_DIRECT);
+	  if (tmp == op0)
+	    return;
+	  op1 = convert_to_mode (mode, tmp, 1);
+	}
+    }
+
+  if ((flag_pic || MACHOPIC_INDIRECT)
+      && symbolic_operand (op1, mode))
+    {
+      if (TARGET_MACHO && !TARGET_64BIT)
+	{
+#if TARGET_MACHO
+	  /* dynamic-no-pic */
+	  if (MACHOPIC_INDIRECT)
+	    {
+	      rtx temp = ((reload_in_progress
+			   || ((op0 && REG_P (op0))
+			       && mode == Pmode))
+			  ? op0 : gen_reg_rtx (Pmode));
+	      op1 = machopic_indirect_data_reference (op1, temp);
+	      if (MACHOPIC_PURE)
+		op1 = machopic_legitimize_pic_address (op1, mode,
+						       temp == op1 ? 0 : temp);
+	    }
+	  if (op0 != op1 && GET_CODE (op0) != MEM)
+	    {
+	      rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
+	      emit_insn (insn);
+	      return;
+	    }
+	  if (GET_CODE (op0) == MEM)
+	    op1 = force_reg (Pmode, op1);
+	  else
+	    {
+	      rtx temp = op0;
+	      if (GET_CODE (temp) != REG)
+		temp = gen_reg_rtx (Pmode);
+	      temp = legitimize_pic_address (op1, temp);
+	      if (temp == op0)
+	    return;
+	      op1 = temp;
+	    }
+      /* dynamic-no-pic */
+#endif
+	}
+      else
+	{
+	  if (MEM_P (op0))
+	    op1 = force_reg (mode, op1);
+	  else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
+	    {
+	      rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
+	      op1 = legitimize_pic_address (op1, reg);
+	      if (op0 == op1)
+		return;
+	      op1 = convert_to_mode (mode, op1, 1);
+	    }
+	}
+    }
+  else
+    {
+      if (MEM_P (op0)
+	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
+	      || !push_operand (op0, mode))
+	  && MEM_P (op1))
+	op1 = force_reg (mode, op1);
+
+      if (push_operand (op0, mode)
+	  && ! general_no_elim_operand (op1, mode))
+	op1 = copy_to_mode_reg (mode, op1);
+
+      /* Force large constants in 64bit compilation into register
+	 to get them CSEed.  */
+      if (can_create_pseudo_p ()
+	  && (mode == DImode) && TARGET_64BIT
+	  && immediate_operand (op1, mode)
+	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
+	  && !register_operand (op0, mode)
+	  && optimize)
+	op1 = copy_to_mode_reg (mode, op1);
+
+      if (can_create_pseudo_p ()
+	  && FLOAT_MODE_P (mode)
+	  && GET_CODE (op1) == CONST_DOUBLE)
+	{
+	  /* If we are loading a floating point constant to a register,
+	     force the value to memory now, since we'll get better code
+	     out the back end.  */
+
+	  op1 = validize_mem (force_const_mem (mode, op1));
+	  if (!register_operand (op0, mode))
+	    {
+	      rtx temp = gen_reg_rtx (mode);
+	      emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+	      emit_move_insn (op0, temp);
+	      return;
+	    }
+	}
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+void
+ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
+{
+  rtx op0 = operands[0], op1 = operands[1];
+  unsigned int align = GET_MODE_ALIGNMENT (mode);
+
+  if (push_operand (op0, VOIDmode))
+    op0 = emit_move_resolve_push (mode, op0);
+
+  /* Force constants other than zero into memory.  We do not know how
+     the instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if (can_create_pseudo_p ()
+      && register_operand (op0, mode)
+      && (CONSTANT_P (op1)
+	  || (GET_CODE (op1) == SUBREG
+	      && CONSTANT_P (SUBREG_REG (op1))))
+      && !standard_sse_constant_p (op1))
+    op1 = validize_mem (force_const_mem (mode, op1));
+
+  /* We need to check memory alignment for SSE mode since attribute
+     can make operands unaligned.  */
+  if (can_create_pseudo_p ()
+      && SSE_REG_MODE_P (mode)
+      && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
+	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
+    {
+      rtx tmp[2];
+
+      /* ix86_expand_vector_move_misalign() does not like constants ... */
+      if (CONSTANT_P (op1)
+	  || (GET_CODE (op1) == SUBREG
+	      && CONSTANT_P (SUBREG_REG (op1))))
+	op1 = validize_mem (force_const_mem (mode, op1));
+
+      /* ... nor both arguments in memory.  */
+      if (!register_operand (op0, mode)
+	  && !register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      tmp[0] = op0; tmp[1] = op1;
+      ix86_expand_vector_move_misalign (mode, tmp);
+      return;
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if (can_create_pseudo_p ()
+      && !register_operand (op0, mode)
+      && !register_operand (op1, mode))
+    {
+      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+      return;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Split 32-byte AVX unaligned load and store if needed.  */
+
+static void
+ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
+{
+  rtx m;
+  rtx (*extract) (rtx, rtx, rtx);
+  rtx (*load_unaligned) (rtx, rtx);
+  rtx (*store_unaligned) (rtx, rtx);
+  enum machine_mode mode;
+
+  switch (GET_MODE (op0))
+    {
+    default:
+      gcc_unreachable ();
+    case V32QImode:
+      extract = gen_avx_vextractf128v32qi;
+      load_unaligned = gen_avx_loaddquv32qi;
+      store_unaligned = gen_avx_storedquv32qi;
+      mode = V16QImode;
+      break;
+    case V8SFmode:
+      extract = gen_avx_vextractf128v8sf;
+      load_unaligned = gen_avx_loadups256;
+      store_unaligned = gen_avx_storeups256;
+      mode = V4SFmode;
+      break;
+    case V4DFmode:
+      extract = gen_avx_vextractf128v4df;
+      load_unaligned = gen_avx_loadupd256;
+      store_unaligned = gen_avx_storeupd256;
+      mode = V2DFmode;
+      break;
+    }
+
+  if (MEM_P (op1))
+    {
+      if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+	{
+	  rtx r = gen_reg_rtx (mode);
+	  m = adjust_address (op1, mode, 0);
+	  emit_move_insn (r, m);
+	  m = adjust_address (op1, mode, 16);
+	  r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+	  emit_move_insn (op0, r);
+	}
+      /* Normal *mov<mode>_internal pattern will handle
+	 unaligned loads just fine if misaligned_operand
+	 is true, and without the UNSPEC it can be combined
+	 with arithmetic instructions.  */
+      else if (misaligned_operand (op1, GET_MODE (op1)))
+	emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+      else
+	emit_insn (load_unaligned (op0, op1));
+    }
+  else if (MEM_P (op0))
+    {
+      if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+	{
+	  m = adjust_address (op0, mode, 0);
+	  emit_insn (extract (m, op1, const0_rtx));
+	  m = adjust_address (op0, mode, 16);
+	  emit_insn (extract (m, op1, const1_rtx));
+	}
+      else
+	emit_insn (store_unaligned (op0, op1));
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
+   straight to ix86_expand_vector_move.  */
+/* Code generation for scalar reg-reg moves of single and double precision data:
+     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
+       movaps reg, reg
+     else
+       movss reg, reg
+     if (x86_sse_partial_reg_dependency == true)
+       movapd reg, reg
+     else
+       movsd reg, reg
+
+   Code generation for scalar loads of double precision data:
+     if (x86_sse_split_regs == true)
+       movlpd mem, reg      (gas syntax)
+     else
+       movsd mem, reg
+
+   Code generation for unaligned packed loads of single precision data
+   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
+     if (x86_sse_unaligned_move_optimal)
+       movups mem, reg
+
+     if (x86_sse_partial_reg_dependency == true)
+       {
+         xorps  reg, reg
+         movlps mem, reg
+         movhps mem+8, reg
+       }
+     else
+       {
+         movlps mem, reg
+         movhps mem+8, reg
+       }
+
+   Code generation for unaligned packed loads of double precision data
+   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
+     if (x86_sse_unaligned_move_optimal)
+       movupd mem, reg
+
+     if (x86_sse_split_regs == true)
+       {
+         movlpd mem, reg
+         movhpd mem+8, reg
+       }
+     else
+       {
+         movsd  mem, reg
+         movhpd mem+8, reg
+       }
+ */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1, orig_op0 = NULL_RTX, m;
+  rtx (*load_unaligned) (rtx, rtx);
+  rtx (*store_unaligned) (rtx, rtx);
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (GET_MODE_SIZE (mode) == 64)
+    {
+      switch (GET_MODE_CLASS (mode))
+	{
+	case MODE_VECTOR_INT:
+	case MODE_INT:
+	  if (GET_MODE (op0) != V16SImode)
+	    {
+	      if (!MEM_P (op0))
+		{
+		  orig_op0 = op0;
+		  op0 = gen_reg_rtx (V16SImode);
+		}
+	      else
+		op0 = gen_lowpart (V16SImode, op0);
+	    }
+	  op1 = gen_lowpart (V16SImode, op1);
+	  /* FALLTHRU */
+
+	case MODE_VECTOR_FLOAT:
+	  switch (GET_MODE (op0))
+	    {
+	    default:
+	      gcc_unreachable ();
+	    case V16SImode:
+	      load_unaligned = gen_avx512f_loaddquv16si;
+	      store_unaligned = gen_avx512f_storedquv16si;
+	      break;
+	    case V16SFmode:
+	      load_unaligned = gen_avx512f_loadups512;
+	      store_unaligned = gen_avx512f_storeups512;
+	      break;
+	    case V8DFmode:
+	      load_unaligned = gen_avx512f_loadupd512;
+	      store_unaligned = gen_avx512f_storeupd512;
+	      break;
+	    }
+
+	  if (MEM_P (op1))
+	    emit_insn (load_unaligned (op0, op1));
+	  else if (MEM_P (op0))
+	    emit_insn (store_unaligned (op0, op1));
+	  else
+	    gcc_unreachable ();
+	  if (orig_op0)
+	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      return;
+    }
+
+  if (TARGET_AVX
+      && GET_MODE_SIZE (mode) == 32)
+    {
+      switch (GET_MODE_CLASS (mode))
+	{
+	case MODE_VECTOR_INT:
+	case MODE_INT:
+	  if (GET_MODE (op0) != V32QImode)
+	    {
+	      if (!MEM_P (op0))
+		{
+		  orig_op0 = op0;
+		  op0 = gen_reg_rtx (V32QImode);
+		}
+	      else
+		op0 = gen_lowpart (V32QImode, op0);
+	    }
+	  op1 = gen_lowpart (V32QImode, op1);
+	  /* FALLTHRU */
+
+	case MODE_VECTOR_FLOAT:
+	  ix86_avx256_split_vector_move_misalign (op0, op1);
+	  if (orig_op0)
+	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      return;
+    }
+
+  if (MEM_P (op1))
+    {
+      /* Normal *mov<mode>_internal pattern will handle
+	 unaligned loads just fine if misaligned_operand
+	 is true, and without the UNSPEC it can be combined
+	 with arithmetic instructions.  */
+      if (TARGET_AVX
+	  && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+	  && misaligned_operand (op1, GET_MODE (op1)))
+	emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+      /* ??? If we have typed data, then it would appear that using
+	 movdqu is the only way to get unaligned data loaded with
+	 integer type.  */
+      else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  if (GET_MODE (op0) != V16QImode)
+	    {
+	      orig_op0 = op0;
+	      op0 = gen_reg_rtx (V16QImode);
+	    }
+	  op1 = gen_lowpart (V16QImode, op1);
+	  /* We will eventually emit movups based on insn attributes.  */
+	  emit_insn (gen_sse2_loaddquv16qi (op0, op1));
+	  if (orig_op0)
+	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
+	}
+      else if (TARGET_SSE2 && mode == V2DFmode)
+        {
+          rtx zero;
+
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_insn_for_size_p ())
+	    {
+	      /* We will eventually emit movups based on insn attributes.  */
+	      emit_insn (gen_sse2_loadupd (op0, op1));
+	      return;
+	    }
+
+	  /* When SSE registers are split into halves, we can avoid
+	     writing to the top half twice.  */
+	  if (TARGET_SSE_SPLIT_REGS)
+	    {
+	      emit_clobber (op0);
+	      zero = op0;
+	    }
+	  else
+	    {
+	      /* ??? Not sure about the best option for the Intel chips.
+		 The following would seem to satisfy; the register is
+		 entirely cleared, breaking the dependency chain.  We
+		 then store to the upper half, with a dependency depth
+		 of one.  A rumor has it that Intel recommends two movsd
+		 followed by an unpacklpd, but this is unconfirmed.  And
+		 given that the dependency depth of the unpacklpd would
+		 still be one, I'm not sure why this would be better.  */
+	      zero = CONST0_RTX (V2DFmode);
+	    }
+
+	  m = adjust_address (op1, DFmode, 0);
+	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
+	  m = adjust_address (op1, DFmode, 8);
+	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
+	}
+      else
+        {
+	  rtx t;
+
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_insn_for_size_p ())
+	    {
+	      if (GET_MODE (op0) != V4SFmode)
+		{
+		  orig_op0 = op0;
+		  op0 = gen_reg_rtx (V4SFmode);
+		}
+	      op1 = gen_lowpart (V4SFmode, op1);
+	      emit_insn (gen_sse_loadups (op0, op1));
+	      if (orig_op0)
+		emit_move_insn (orig_op0,
+				gen_lowpart (GET_MODE (orig_op0), op0));
+	      return;
+            }
+
+	  if (mode != V4SFmode)
+	    t = gen_reg_rtx (V4SFmode);
+	  else
+	    t = op0;
+	    
+	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+	    emit_move_insn (t, CONST0_RTX (V4SFmode));
+	  else
+	    emit_clobber (t);
+
+	  m = adjust_address (op1, V2SFmode, 0);
+	  emit_insn (gen_sse_loadlps (t, t, m));
+	  m = adjust_address (op1, V2SFmode, 8);
+	  emit_insn (gen_sse_loadhps (t, t, m));
+	  if (mode != V4SFmode)
+	    emit_move_insn (op0, gen_lowpart (mode, t));
+	}
+    }
+  else if (MEM_P (op0))
+    {
+      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        {
+	  op0 = gen_lowpart (V16QImode, op0);
+	  op1 = gen_lowpart (V16QImode, op1);
+	  /* We will eventually emit movups based on insn attributes.  */
+	  emit_insn (gen_sse2_storedquv16qi (op0, op1));
+	}
+      else if (TARGET_SSE2 && mode == V2DFmode)
+	{
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_insn_for_size_p ())
+	    /* We will eventually emit movups based on insn attributes.  */
+	    emit_insn (gen_sse2_storeupd (op0, op1));
+	  else
+	    {
+	      m = adjust_address (op0, DFmode, 0);
+	      emit_insn (gen_sse2_storelpd (m, op1));
+	      m = adjust_address (op0, DFmode, 8);
+	      emit_insn (gen_sse2_storehpd (m, op1));
+	    }
+	}
+      else
+	{
+	  if (mode != V4SFmode)
+	    op1 = gen_lowpart (V4SFmode, op1);
+
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_insn_for_size_p ())
+	    {
+	      op0 = gen_lowpart (V4SFmode, op0);
+	      emit_insn (gen_sse_storeups (op0, op1));
+	    }
+	  else
+	    {
+	      m = adjust_address (op0, V2SFmode, 0);
+	      emit_insn (gen_sse_storelps (m, op1));
+	      m = adjust_address (op0, V2SFmode, 8);
+	      emit_insn (gen_sse_storehps (m, op1));
+	    }
+	}
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Helper function of ix86_fixup_binary_operands to canonicalize
+   operand order.  Returns true if the operands should be swapped.  */
+
+static bool
+ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
+			     rtx operands[])
+{
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+
+  /* If the operation is not commutative, we can't do anything.  */
+  if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+    return false;
+
+  /* Highest priority is that src1 should match dst.  */
+  if (rtx_equal_p (dst, src1))
+    return false;
+  if (rtx_equal_p (dst, src2))
+    return true;
+
+  /* Next highest priority is that immediate constants come second.  */
+  if (immediate_operand (src2, mode))
+    return false;
+  if (immediate_operand (src1, mode))
+    return true;
+
+  /* Lowest priority is that memory references should come second.  */
+  if (MEM_P (src2))
+    return false;
+  if (MEM_P (src1))
+    return true;
+
+  return false;
+}
+
+
+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
+   destination to use for the operation.  If different from the true
+   destination in operands[0], a copy operation will be required.  */
+
+rtx
+ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
+			    rtx operands[])
+{
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+
+  /* Canonicalize operand order.  */
+  if (ix86_swap_binary_operands_p (code, mode, operands))
+    {
+      rtx temp;
+
+      /* It is invalid to swap operands of different modes.  */
+      gcc_assert (GET_MODE (src1) == GET_MODE (src2));
+
+      temp = src1;
+      src1 = src2;
+      src2 = temp;
+    }
+
+  /* Both source operands cannot be in memory.  */
+  if (MEM_P (src1) && MEM_P (src2))
+    {
+      /* Optimization: Only read from memory once.  */
+      if (rtx_equal_p (src1, src2))
+	{
+	  src2 = force_reg (mode, src2);
+	  src1 = src2;
+	}
+      else if (rtx_equal_p (dst, src1))
+	src2 = force_reg (mode, src2);
+      else
+	src1 = force_reg (mode, src1);
+    }
+
+  /* If the destination is memory, and we do not have matching source
+     operands, do things in registers.  */
+  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
+    dst = gen_reg_rtx (mode);
+
+  /* Source 1 cannot be a constant.  */
+  if (CONSTANT_P (src1))
+    src1 = force_reg (mode, src1);
+
+  /* Source 1 cannot be a non-matching memory.  */
+  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+    src1 = force_reg (mode, src1);
+
+  /* Improve address combine.  */
+  if (code == PLUS
+      && GET_MODE_CLASS (mode) == MODE_INT
+      && MEM_P (src2))
+    src2 = force_reg (mode, src2);
+
+  operands[1] = src1;
+  operands[2] = src2;
+  return dst;
+}
+
+/* Similarly, but assume that the destination has already been
+   set up properly.  */
+
+void
+ix86_fixup_binary_operands_no_copy (enum rtx_code code,
+				    enum machine_mode mode, rtx operands[])
+{
+  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+  gcc_assert (dst == operands[0]);
+}
+
+/* Attempt to expand a binary operator.  Make the expansion closer to the
+   actual machine, then just general_operand, which will allow 3 separate
+   memory references (one output, two input) in a single insn.  */
+
+void
+ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
+			     rtx operands[])
+{
+  rtx src1, src2, dst, op, clob;
+
+  dst = ix86_fixup_binary_operands (code, mode, operands);
+  src1 = operands[1];
+  src2 = operands[2];
+
+ /* Emit the instruction.  */
+
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
+  if (reload_in_progress)
+    {
+      /* Reload doesn't know about the flags register, and doesn't know that
+         it doesn't want to clobber it.  We can only do this with PLUS.  */
+      gcc_assert (code == PLUS);
+      emit_insn (op);
+    }
+  else if (reload_completed
+	   && code == PLUS
+	   && !rtx_equal_p (dst, src1))
+    {
+      /* This is going to be an LEA; avoid splitting it later.  */
+      emit_insn (op);
+    }
+  else
+    {
+      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+    }
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+}
+
+/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
+   the given OPERANDS.  */
+
+void
+ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
+				     rtx operands[])
+{
+  rtx op1 = NULL_RTX, op2 = NULL_RTX;
+  if (GET_CODE (operands[1]) == SUBREG)
+    {
+      op1 = operands[1];
+      op2 = operands[2];
+    }
+  else if (GET_CODE (operands[2]) == SUBREG)
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+  /* Optimize (__m128i) d | (__m128i) e and similar code
+     when d and e are float vectors into float vector logical
+     insn.  In C/C++ without using intrinsics there is no other way
+     to express vector logical operation on float vectors than
+     to cast them temporarily to integer vectors.  */
+  if (op1
+      && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+      && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
+      && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
+      && SUBREG_BYTE (op1) == 0
+      && (GET_CODE (op2) == CONST_VECTOR
+	  || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
+	      && SUBREG_BYTE (op2) == 0))
+      && can_create_pseudo_p ())
+    {
+      rtx dst;
+      switch (GET_MODE (SUBREG_REG (op1)))
+	{
+	case V4SFmode:
+	case V8SFmode:
+	case V2DFmode:
+	case V4DFmode:
+	  dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
+	  if (GET_CODE (op2) == CONST_VECTOR)
+	    {
+	      op2 = gen_lowpart (GET_MODE (dst), op2);
+	      op2 = force_reg (GET_MODE (dst), op2);
+	    }
+	  else
+	    {
+	      op1 = operands[1];
+	      op2 = SUBREG_REG (operands[2]);
+	      if (!nonimmediate_operand (op2, GET_MODE (dst)))
+		op2 = force_reg (GET_MODE (dst), op2);
+	    }
+	  op1 = SUBREG_REG (op1);
+	  if (!nonimmediate_operand (op1, GET_MODE (dst)))
+	    op1 = force_reg (GET_MODE (dst), op1);
+	  emit_insn (gen_rtx_SET (VOIDmode, dst,
+				  gen_rtx_fmt_ee (code, GET_MODE (dst),
+						  op1, op2)));
+	  emit_move_insn (operands[0], gen_lowpart (mode, dst));
+	  return;
+	default:
+	  break;
+	}
+    }
+  if (!nonimmediate_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+  if (!nonimmediate_operand (operands[2], mode))
+    operands[2] = force_reg (mode, operands[2]);
+  ix86_fixup_binary_operands_no_copy (code, mode, operands);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (code, mode, operands[1],
+					  operands[2])));
+}
+
+/* Return TRUE or FALSE depending on whether the binary operator meets the
+   appropriate constraints.  */
+
+bool
+ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
+			 rtx operands[3])
+{
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+
+  /* Both source operands cannot be in memory.  */
+  if (MEM_P (src1) && MEM_P (src2))
+    return false;
+
+  /* Canonicalize operand order for commutative operators.  */
+  if (ix86_swap_binary_operands_p (code, mode, operands))
+    {
+      rtx temp = src1;
+      src1 = src2;
+      src2 = temp;
+    }
+
+  /* If the destination is memory, we must have a matching source operand.  */
+  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
+      return false;
+
+  /* Source 1 cannot be a constant.  */
+  if (CONSTANT_P (src1))
+    return false;
+
+  /* Source 1 cannot be a non-matching memory.  */
+  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+    /* Support "andhi/andsi/anddi" as a zero-extending move.  */
+    return (code == AND
+	    && (mode == HImode
+		|| mode == SImode
+		|| (TARGET_64BIT && mode == DImode))
+	    && satisfies_constraint_L (src2));
+
+  return true;
+}
+
+/* Attempt to expand a unary operator.  Make the expansion closer to the
+   actual machine, then just general_operand, which will allow 2 separate
+   memory references (one output, one input) in a single insn.  */
+
+void
+ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
+			    rtx operands[])
+{
+  int matching_memory;
+  rtx src, dst, op, clob;
+
+  dst = operands[0];
+  src = operands[1];
+
+  /* If the destination is memory, and we do not have matching source
+     operands, do things in registers.  */
+  matching_memory = 0;
+  if (MEM_P (dst))
+    {
+      if (rtx_equal_p (dst, src))
+	matching_memory = 1;
+      else
+	dst = gen_reg_rtx (mode);
+    }
+
+  /* When source operand is memory, destination must match.  */
+  if (MEM_P (src) && !matching_memory)
+    src = force_reg (mode, src);
+
+  /* Emit the instruction.  */
+
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
+  if (reload_in_progress || code == NOT)
+    {
+      /* Reload doesn't know about the flags register, and doesn't know that
+         it doesn't want to clobber it.  */
+      gcc_assert (code == NOT);
+      emit_insn (op);
+    }
+  else
+    {
+      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+    }
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+}
+
+/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
+   divisor are within the range [0-255].  */
+
+void
+ix86_split_idivmod (enum machine_mode mode, rtx operands[],
+		    bool signed_p)
+{
+  rtx end_label, qimode_label;
+  rtx insn, div, mod;
+  rtx scratch, tmp0, tmp1, tmp2;
+  rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
+  rtx (*gen_zero_extend) (rtx, rtx);
+  rtx (*gen_test_ccno_1) (rtx, rtx);
+
+  switch (mode)
+    {
+    case SImode:
+      gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+      gen_test_ccno_1 = gen_testsi_ccno_1;
+      gen_zero_extend = gen_zero_extendqisi2;
+      break;
+    case DImode:
+      gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
+      gen_test_ccno_1 = gen_testdi_ccno_1;
+      gen_zero_extend = gen_zero_extendqidi2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  end_label = gen_label_rtx ();
+  qimode_label = gen_label_rtx ();
+
+  scratch = gen_reg_rtx (mode);
+
+  /* Use 8bit unsigned divimod if dividend and divisor are within
+     the range [0-255].  */
+  emit_move_insn (scratch, operands[2]);
+  scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
+				 scratch, 1, OPTAB_DIRECT);
+  emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
+  tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
+  tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
+			       gen_rtx_LABEL_REF (VOIDmode, qimode_label),
+			       pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = qimode_label;
+
+  /* Generate original signed/unsigned divimod.  */
+  div = gen_divmod4_1 (operands[0], operands[1],
+		       operands[2], operands[3]);
+  emit_insn (div);
+
+  /* Branch to the end.  */
+  emit_jump_insn (gen_jump (end_label));
+  emit_barrier ();
+
+  /* Generate 8bit unsigned divide.  */
+  emit_label (qimode_label);
+  /* Don't use operands[0] for result of 8bit divide since not all
+     registers support QImode ZERO_EXTRACT.  */
+  tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
+  tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
+  tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
+  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
+
+  if (signed_p)
+    {
+      div = gen_rtx_DIV (SImode, operands[2], operands[3]);
+      mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
+    }
+  else
+    {
+      div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
+      mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
+    }
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+  if (REG_P (operands[1]))
+    insn = emit_move_insn (operands[1], tmp1);
+  else
+    {
+      /* Need a new scratch register since the old one has result
+	 of 8bit divide.  */
+      scratch = gen_reg_rtx (mode);
+      emit_move_insn (scratch, tmp1);
+      insn = emit_move_insn (operands[1], scratch);
+    }
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Zero extend quotient from AL.  */
+  tmp1 = gen_lowpart (QImode, tmp0);
+  insn = emit_insn (gen_zero_extend (operands[0], tmp1));
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  emit_label (end_label);
+}
+
+/* Whether it is OK to emit CFI directives when emitting asm code.  */
+
+bool
+ix86_emit_cfi ()
+{
+  return dwarf2out_do_cfi_asm ();
+}
+
+#define LEA_MAX_STALL (3)
+#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
+
+/* Increase given DISTANCE in half-cycles according to
+   dependencies between PREV and NEXT instructions.
+   Add 1 half-cycle if there is no dependency and
+   go to next cycle if there is some dependecy.  */
+
+static unsigned int
+increase_distance (rtx prev, rtx next, unsigned int distance)
+{
+  df_ref *use_rec;
+  df_ref *def_rec;
+
+  if (!prev || !next)
+    return distance + (distance & 1) + 2;
+
+  if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
+    return distance + 1;
+
+  for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+    for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+      if (!DF_REF_IS_ARTIFICIAL (*def_rec)
+	  && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
+	return distance + (distance & 1) + 2;
+
+  return distance + 1;
+}
+
+/* Function checks if instruction INSN defines register number
+   REGNO1 or REGNO2.  */
+
+static bool
+insn_defines_reg (unsigned int regno1, unsigned int regno2,
+		  rtx insn)
+{
+  df_ref *def_rec;
+
+  for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
+    if (DF_REF_REG_DEF_P (*def_rec)
+	&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+	&& (regno1 == DF_REF_REGNO (*def_rec)
+	    || regno2 == DF_REF_REGNO (*def_rec)))
+      {
+	return true;
+      }
+
+  return false;
+}
+
+/* Function checks if instruction INSN uses register number
+   REGNO as a part of address expression.  */
+
+static bool
+insn_uses_reg_mem (unsigned int regno, rtx insn)
+{
+  df_ref *use_rec;
+
+  for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
+    if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
+      return true;
+
+  return false;
+}
+
+/* Search backward for non-agu definition of register number REGNO1
+   or register number REGNO2 in basic block starting from instruction
+   START up to head of basic block or instruction INSN.
+
+   Function puts true value into *FOUND var if definition was found
+   and false otherwise.
+
+   Distance in half-cycles between START and found instruction or head
+   of BB is added to DISTANCE and returned.  */
+
+static int
+distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
+			       rtx insn, int distance,
+			       rtx start, bool *found)
+{
+  basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
+  rtx prev = start;
+  rtx next = NULL;
+
+  *found = false;
+
+  while (prev
+	 && prev != insn
+	 && distance < LEA_SEARCH_THRESHOLD)
+    {
+      if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
+	{
+	  distance = increase_distance (prev, next, distance);
+	  if (insn_defines_reg (regno1, regno2, prev))
+	    {
+	      if (recog_memoized (prev) < 0
+		  || get_attr_type (prev) != TYPE_LEA)
+		{
+		  *found = true;
+		  return distance;
+		}
+	    }
+
+	  next = prev;
+	}
+      if (prev == BB_HEAD (bb))
+	break;
+
+      prev = PREV_INSN (prev);
+    }
+
+  return distance;
+}
+
+/* Search backward for non-agu definition of register number REGNO1
+   or register number REGNO2 in INSN's basic block until
+   1. Pass LEA_SEARCH_THRESHOLD instructions, or
+   2. Reach neighbour BBs boundary, or
+   3. Reach agu definition.
+   Returns the distance between the non-agu definition point and INSN.
+   If no definition point, returns -1.  */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+			 rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  bool found = false;
+
+  if (insn != BB_HEAD (bb))
+    distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
+					      distance, PREV_INSN (insn),
+					      &found);
+
+  if (!found && distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	if (e->src == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+
+      if (simple_loop)
+	distance = distance_non_agu_define_in_bb (regno1, regno2,
+						  insn, distance,
+						  BB_END (bb), &found);
+      else
+	{
+	  int shortest_dist = -1;
+	  bool found_in_bb = false;
+
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    {
+	      int bb_dist
+		= distance_non_agu_define_in_bb (regno1, regno2,
+						 insn, distance,
+						 BB_END (e->src),
+						 &found_in_bb);
+	      if (found_in_bb)
+		{
+		  if (shortest_dist < 0)
+		    shortest_dist = bb_dist;
+		  else if (bb_dist > 0)
+		    shortest_dist = MIN (bb_dist, shortest_dist);
+
+		  found = true;
+		}
+	    }
+
+	  distance = shortest_dist;
+	}
+    }
+
+  /* get_attr_type may modify recog data.  We want to make sure
+     that recog data is valid for instruction INSN, on which
+     distance_non_agu_define is called.  INSN is unchanged here.  */
+  extract_insn_cached (insn);
+
+  if (!found)
+    return -1;
+
+  return distance >> 1;
+}
+
+/* Return the distance in half-cycles between INSN and the next
+   insn that uses register number REGNO in memory address added
+   to DISTANCE.  Return -1 if REGNO0 is set.
+
+   Put true value into *FOUND if register usage was found and
+   false otherwise.
+   Put true value into *REDEFINED if register redefinition was
+   found and false otherwise.  */
+
+static int
+distance_agu_use_in_bb (unsigned int regno,
+			rtx insn, int distance, rtx start,
+			bool *found, bool *redefined)
+{
+  basic_block bb = NULL;
+  rtx next = start;
+  rtx prev = NULL;
+
+  *found = false;
+  *redefined = false;
+
+  if (start != NULL_RTX)
+    {
+      bb = BLOCK_FOR_INSN (start);
+      if (start != BB_HEAD (bb))
+	/* If insn and start belong to the same bb, set prev to insn,
+	   so the call to increase_distance will increase the distance
+	   between insns by 1.  */
+	prev = insn;
+    }
+
+  while (next
+	 && next != insn
+	 && distance < LEA_SEARCH_THRESHOLD)
+    {
+      if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
+	{
+	  distance = increase_distance(prev, next, distance);
+	  if (insn_uses_reg_mem (regno, next))
+	    {
+	      /* Return DISTANCE if OP0 is used in memory
+		 address in NEXT.  */
+	      *found = true;
+	      return distance;
+	    }
+
+	  if (insn_defines_reg (regno, INVALID_REGNUM, next))
+	    {
+	      /* Return -1 if OP0 is set in NEXT.  */
+	      *redefined = true;
+	      return -1;
+	    }
+
+	  prev = next;
+	}
+
+      if (next == BB_END (bb))
+	break;
+
+      next = NEXT_INSN (next);
+    }
+
+  return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+   register number REGNO0 in memory address.  Return -1 if no such
+   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  bool found = false;
+  bool redefined = false;
+
+  if (insn != BB_END (bb))
+    distance = distance_agu_use_in_bb (regno0, insn, distance,
+				       NEXT_INSN (insn),
+				       &found, &redefined);
+
+  if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+        if (e->dest == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+
+      if (simple_loop)
+	distance = distance_agu_use_in_bb (regno0, insn,
+					   distance, BB_HEAD (bb),
+					   &found, &redefined);
+      else
+	{
+	  int shortest_dist = -1;
+	  bool found_in_bb = false;
+	  bool redefined_in_bb = false;
+
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    {
+	      int bb_dist
+		= distance_agu_use_in_bb (regno0, insn,
+					  distance, BB_HEAD (e->dest),
+					  &found_in_bb, &redefined_in_bb);
+	      if (found_in_bb)
+		{
+		  if (shortest_dist < 0)
+		    shortest_dist = bb_dist;
+		  else if (bb_dist > 0)
+		    shortest_dist = MIN (bb_dist, shortest_dist);
+
+		  found = true;
+		}
+	    }
+
+	  distance = shortest_dist;
+	}
+    }
+
+  if (!found || redefined)
+    return -1;
+
+  return distance >> 1;
+}
+
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+   there is a dilemma of choicing LEA or ADD
+   Negative value: ADD is more preferred than LEA
+   Zero: Netrual
+   Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 0
+
+/* Return true if usage of lea INSN has performance advantage
+   over a sequence of instructions.  Instructions sequence has
+   SPLIT_COST cycles higher latency than lea latency.  */
+
+static bool
+ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
+		      unsigned int regno2, int split_cost, bool has_scale)
+{
+  int dist_define, dist_use;
+
+  /* For Silvermont if using a 2-source or 3-source LEA for
+     non-destructive destination purposes, or due to wanting
+     ability to use SCALE, the use of LEA is justified.  */
+  if (TARGET_SILVERMONT || TARGET_INTEL)
+    {
+      if (has_scale)
+	return true;
+      if (split_cost < 1)
+	return false;
+      if (regno0 == regno1 || regno0 == regno2)
+	return false;
+      return true;
+    }
+
+  dist_define = distance_non_agu_define (regno1, regno2, insn);
+  dist_use = distance_agu_use (regno0, insn);
+
+  if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
+    {
+      /* If there is no non AGU operand definition, no AGU
+	 operand usage and split cost is 0 then both lea
+	 and non lea variants have same priority.  Currently
+	 we prefer lea for 64 bit code and non lea on 32 bit
+	 code.  */
+      if (dist_use < 0 && split_cost == 0)
+	return TARGET_64BIT || IX86_LEA_PRIORITY;
+      else
+	return true;
+    }
+
+  /* With longer definitions distance lea is more preferable.
+     Here we change it to take into account splitting cost and
+     lea priority.  */
+  dist_define += split_cost + IX86_LEA_PRIORITY;
+
+  /* If there is no use in memory addess then we just check
+     that split cost exceeds AGU stall.  */
+  if (dist_use < 0)
+    return dist_define > LEA_MAX_STALL;
+
+  /* If this insn has both backward non-agu dependence and forward
+     agu dependence, the one with short distance takes effect.  */
+  return dist_define >= dist_use;
+}
+
+/* Return true if it is legal to clobber flags by INSN and
+   false otherwise.  */
+
+static bool
+ix86_ok_to_clobber_flags (rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  df_ref *use;
+  bitmap live;
+
+  while (insn)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  for (use = DF_INSN_USES (insn); *use; use++)
+	    if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
+	      return false;
+
+	  if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
+	    return true;
+	}
+
+      if (insn == BB_END (bb))
+	break;
+
+      insn = NEXT_INSN (insn);
+    }
+
+  live = df_get_live_out(bb);
+  return !REGNO_REG_SET_P (live, FLAGS_REG);
+}
+
+/* Return true if we need to split op0 = op1 + op2 into a sequence of
+   move and add to avoid AGU stalls.  */
+
+bool
+ix86_avoid_lea_for_add (rtx insn, rtx operands[])
+{
+  unsigned int regno0, regno1, regno2;
+
+  /* Check if we need to optimize.  */
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return false;
+
+  /* Check it is correct to split here.  */
+  if (!ix86_ok_to_clobber_flags(insn))
+    return false;
+
+  regno0 = true_regnum (operands[0]);
+  regno1 = true_regnum (operands[1]);
+  regno2 = true_regnum (operands[2]);
+
+  /* We need to split only adds with non destructive
+     destination operand.  */
+  if (regno0 == regno1 || regno0 == regno2)
+    return false;
+  else
+    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
+}
+
+/* Return true if we should emit lea instruction instead of mov
+   instruction.  */
+
+bool
+ix86_use_lea_for_mov (rtx insn, rtx operands[])
+{
+  unsigned int regno0, regno1;
+
+  /* Check if we need to optimize.  */
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return false;
+
+  /* Use lea for reg to reg moves only.  */
+  if (!REG_P (operands[0]) || !REG_P (operands[1]))
+    return false;
+
+  regno0 = true_regnum (operands[0]);
+  regno1 = true_regnum (operands[1]);
+
+  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
+}
+
+/* Return true if we need to split lea into a sequence of
+   instructions to avoid AGU stalls. */
+
+bool
+ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
+{
+  unsigned int regno0, regno1, regno2;
+  int split_cost;
+  struct ix86_address parts;
+  int ok;
+
+  /* Check we need to optimize.  */
+  if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
+    return false;
+
+  /* The "at least two components" test below might not catch simple
+     move or zero extension insns if parts.base is non-NULL and parts.disp
+     is const0_rtx as the only components in the address, e.g. if the
+     register is %rbp or %r13.  As this test is much cheaper and moves or
+     zero extensions are the common case, do this check first.  */
+  if (REG_P (operands[1])
+      || (SImode_address_operand (operands[1], VOIDmode)
+	  && REG_P (XEXP (operands[1], 0))))
+    return false;
+
+  /* Check if it is OK to split here.  */
+  if (!ix86_ok_to_clobber_flags (insn))
+    return false;
+
+  ok = ix86_decompose_address (operands[1], &parts);
+  gcc_assert (ok);
+
+  /* There should be at least two components in the address.  */
+  if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+      + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+    return false;
+
+  /* We should not split into add if non legitimate pic
+     operand is used as displacement. */
+  if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
+    return false;
+
+  regno0 = true_regnum (operands[0]) ;
+  regno1 = INVALID_REGNUM;
+  regno2 = INVALID_REGNUM;
+
+  if (parts.base)
+    regno1 = true_regnum (parts.base);
+  if (parts.index)
+    regno2 = true_regnum (parts.index);
+
+  split_cost = 0;
+
+  /* Compute how many cycles we will add to execution time
+     if split lea into a sequence of instructions.  */
+  if (parts.base || parts.index)
+    {
+      /* Have to use mov instruction if non desctructive
+	 destination form is used.  */
+      if (regno1 != regno0 && regno2 != regno0)
+	split_cost += 1;
+
+      /* Have to add index to base if both exist.  */
+      if (parts.base && parts.index)
+	split_cost += 1;
+
+      /* Have to use shift and adds if scale is 2 or greater.  */
+      if (parts.scale > 1)
+	{
+	  if (regno0 != regno1)
+	    split_cost += 1;
+	  else if (regno2 == regno0)
+	    split_cost += 4;
+	  else
+	    split_cost += parts.scale;
+	}
+
+      /* Have to use add instruction with immediate if
+	 disp is non zero.  */
+      if (parts.disp && parts.disp != const0_rtx)
+	split_cost += 1;
+
+      /* Subtract the price of lea.  */
+      split_cost -= 1;
+    }
+
+  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
+				parts.scale > 1);
+}
+
+/* Emit x86 binary operand CODE in mode MODE, where the first operand
+   matches destination.  RTX includes clobber of FLAGS_REG.  */
+
+static void
+ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
+		 rtx dst, rtx src)
+{
+  rtx op, clob;
+
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
+  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+  
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+}
+
+/* Return true if regno1 def is nearest to the insn.  */
+
+static bool
+find_nearest_reg_def (rtx insn, int regno1, int regno2)
+{
+  rtx prev = insn;
+  rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
+
+  if (insn == start)
+    return false;
+  while (prev && prev != start)
+    {
+      if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
+	{
+	  prev = PREV_INSN (prev);
+	  continue;
+	}
+      if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
+	return true;
+      else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
+	return false;
+      prev = PREV_INSN (prev);
+    }
+
+  /* None of the regs is defined in the bb.  */
+  return false;
+}
+
+/* Split lea instructions into a sequence of instructions
+   which are executed on ALU to avoid AGU stalls.
+   It is assumed that it is allowed to clobber flags register
+   at lea position.  */
+
+void
+ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
+{
+  unsigned int regno0, regno1, regno2;
+  struct ix86_address parts;
+  rtx target, tmp;
+  int ok, adds;
+
+  ok = ix86_decompose_address (operands[1], &parts);
+  gcc_assert (ok);
+
+  target = gen_lowpart (mode, operands[0]);
+
+  regno0 = true_regnum (target);
+  regno1 = INVALID_REGNUM;
+  regno2 = INVALID_REGNUM;
+
+  if (parts.base)
+    {
+      parts.base = gen_lowpart (mode, parts.base);
+      regno1 = true_regnum (parts.base);
+    }
+
+  if (parts.index)
+    {
+      parts.index = gen_lowpart (mode, parts.index);
+      regno2 = true_regnum (parts.index);
+    }
+
+  if (parts.disp)
+    parts.disp = gen_lowpart (mode, parts.disp);
+
+  if (parts.scale > 1)
+    {
+      /* Case r1 = r1 + ...  */
+      if (regno1 == regno0)
+	{
+	  /* If we have a case r1 = r1 + C * r2 then we
+	     should use multiplication which is very
+	     expensive.  Assume cost model is wrong if we
+	     have such case here.  */
+	  gcc_assert (regno2 != regno0);
+
+	  for (adds = parts.scale; adds > 0; adds--)
+	    ix86_emit_binop (PLUS, mode, target, parts.index);
+	}
+      else
+	{
+	  /* r1 = r2 + r3 * C case.  Need to move r3 into r1.  */
+	  if (regno0 != regno2)
+	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
+
+	  /* Use shift for scaling.  */
+	  ix86_emit_binop (ASHIFT, mode, target,
+			   GEN_INT (exact_log2 (parts.scale)));
+
+	  if (parts.base)
+	    ix86_emit_binop (PLUS, mode, target, parts.base);
+
+	  if (parts.disp && parts.disp != const0_rtx)
+	    ix86_emit_binop (PLUS, mode, target, parts.disp);
+	}
+    }
+  else if (!parts.base && !parts.index)
+    {
+      gcc_assert(parts.disp);
+      emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
+    }
+  else
+    {
+      if (!parts.base)
+	{
+	  if (regno0 != regno2)
+	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
+	}
+      else if (!parts.index)
+	{
+	  if (regno0 != regno1)
+	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
+	}
+      else
+	{
+	  if (regno0 == regno1)
+	    tmp = parts.index;
+	  else if (regno0 == regno2)
+	    tmp = parts.base;
+	  else
+	    {
+	      rtx tmp1;
+
+	      /* Find better operand for SET instruction, depending
+		 on which definition is farther from the insn.  */
+	      if (find_nearest_reg_def (insn, regno1, regno2))
+		tmp = parts.index, tmp1 = parts.base;
+	      else
+		tmp = parts.base, tmp1 = parts.index;
+
+	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+
+	      if (parts.disp && parts.disp != const0_rtx)
+		ix86_emit_binop (PLUS, mode, target, parts.disp);
+
+	      ix86_emit_binop (PLUS, mode, target, tmp1);
+	      return;
+	    }
+
+	  ix86_emit_binop (PLUS, mode, target, tmp);
+	}
+
+      if (parts.disp && parts.disp != const0_rtx)
+	ix86_emit_binop (PLUS, mode, target, parts.disp);
+    }
+}
+
+/* Return true if it is ok to optimize an ADD operation to LEA
+   operation to avoid flag register consumation.  For most processors,
+   ADD is faster than LEA.  For the processors like BONNELL, if the
+   destination register of LEA holds an actual address which will be
+   used soon, LEA is better and otherwise ADD is better.  */
+
+bool
+ix86_lea_for_add_ok (rtx insn, rtx operands[])
+{
+  unsigned int regno0 = true_regnum (operands[0]);
+  unsigned int regno1 = true_regnum (operands[1]);
+  unsigned int regno2 = true_regnum (operands[2]);
+
+  /* If a = b + c, (a!=b && a!=c), must use lea form. */
+  if (regno0 != regno1 && regno0 != regno2)
+    return true;
+
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return false;
+
+  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
+}
+
+/* Return true if destination reg of SET_BODY is shift count of
+   USE_BODY.  */
+
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+  rtx set_dest;
+  rtx shift_rtx;
+  int i;
+
+  /* Retrieve destination of SET_BODY.  */
+  switch (GET_CODE (set_body))
+    {
+    case SET:
+      set_dest = SET_DEST (set_body);
+      if (!set_dest || !REG_P (set_dest))
+	return false;
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+					  use_body))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  /* Retrieve shift count of USE_BODY.  */
+  switch (GET_CODE (use_body))
+    {
+    case SET:
+      shift_rtx = XEXP (use_body, 1);
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (set_body,
+					  XVECEXP (use_body, 0, i)))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  if (shift_rtx
+      && (GET_CODE (shift_rtx) == ASHIFT
+	  || GET_CODE (shift_rtx) == LSHIFTRT
+	  || GET_CODE (shift_rtx) == ASHIFTRT
+	  || GET_CODE (shift_rtx) == ROTATE
+	  || GET_CODE (shift_rtx) == ROTATERT))
+    {
+      rtx shift_count = XEXP (shift_rtx, 1);
+
+      /* Return true if shift count is dest of SET_BODY.  */
+      if (REG_P (shift_count))
+	{
+	  /* Add check since it can be invoked before register
+	     allocation in pre-reload schedule.  */
+	  if (reload_completed
+	      && true_regnum (set_dest) == true_regnum (shift_count))
+	    return true;
+	  else if (REGNO(set_dest) == REGNO(shift_count))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+   USE_INSN.  */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+				       PATTERN (use_insn));
+}
+
+/* Return TRUE or FALSE depending on whether the unary operator meets the
+   appropriate constraints.  */
+
+bool
+ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			rtx operands[2])
+{
+  /* If one of operands is memory, source and destination must match.  */
+  if ((MEM_P (operands[0])
+       || MEM_P (operands[1]))
+      && ! rtx_equal_p (operands[0], operands[1]))
+    return false;
+  return true;
+}
+
+/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
+   are ok, keeping in mind the possible movddup alternative.  */
+
+bool
+ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
+{
+  if (MEM_P (operands[0]))
+    return rtx_equal_p (operands[0], operands[1 + high]);
+  if (MEM_P (operands[1]) && MEM_P (operands[2]))
+    return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
+  return true;
+}
+
+/* Post-reload splitter for converting an SF or DFmode value in an
+   SSE register into an unsigned SImode.  */
+
+void
+ix86_split_convert_uns_si_sse (rtx operands[])
+{
+  enum machine_mode vecmode;
+  rtx value, large, zero_or_two31, input, two31, x;
+
+  large = operands[1];
+  zero_or_two31 = operands[2];
+  input = operands[3];
+  two31 = operands[4];
+  vecmode = GET_MODE (large);
+  value = gen_rtx_REG (vecmode, REGNO (operands[0]));
+
+  /* Load up the value into the low element.  We must ensure that the other
+     elements are valid floats -- zero is the easiest such value.  */
+  if (MEM_P (input))
+    {
+      if (vecmode == V4SFmode)
+	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
+      else
+	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
+    }
+  else
+    {
+      input = gen_rtx_REG (vecmode, REGNO (input));
+      emit_move_insn (value, CONST0_RTX (vecmode));
+      if (vecmode == V4SFmode)
+	emit_insn (gen_sse_movss (value, value, input));
+      else
+	emit_insn (gen_sse2_movsd (value, value, input));
+    }
+
+  emit_move_insn (large, two31);
+  emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
+
+  x = gen_rtx_fmt_ee (LE, vecmode, large, value);
+  emit_insn (gen_rtx_SET (VOIDmode, large, x));
+
+  x = gen_rtx_AND (vecmode, zero_or_two31, large);
+  emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
+
+  x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
+  emit_insn (gen_rtx_SET (VOIDmode, value, x));
+
+  large = gen_rtx_REG (V4SImode, REGNO (large));
+  emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
+
+  x = gen_rtx_REG (V4SImode, REGNO (value));
+  if (vecmode == V4SFmode)
+    emit_insn (gen_fix_truncv4sfv4si2 (x, value));
+  else
+    emit_insn (gen_sse2_cvttpd2dq (x, value));
+  value = x;
+
+  emit_insn (gen_xorv4si3 (value, value, large));
+}
+
+/* Convert an unsigned DImode value into a DFmode, using only SSE.
+   Expects the 64-bit DImode to be supplied in a pair of integral
+   registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
+   -mfpmath=sse, !optimize_size only.  */
+
+void
+ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
+  rtx int_xmm, fp_xmm;
+  rtx biases, exponents;
+  rtx x;
+
+  int_xmm = gen_reg_rtx (V4SImode);
+  if (TARGET_INTER_UNIT_MOVES_TO_VEC)
+    emit_insn (gen_movdi_to_sse (int_xmm, input));
+  else if (TARGET_SSE_SPLIT_REGS)
+    {
+      emit_clobber (int_xmm);
+      emit_move_insn (gen_lowpart (DImode, int_xmm), input);
+    }
+  else
+    {
+      x = gen_reg_rtx (V2DImode);
+      ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
+      emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
+    }
+
+  x = gen_rtx_CONST_VECTOR (V4SImode,
+			    gen_rtvec (4, GEN_INT (0x43300000UL),
+				       GEN_INT (0x45300000UL),
+				       const0_rtx, const0_rtx));
+  exponents = validize_mem (force_const_mem (V4SImode, x));
+
+  /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
+  emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
+
+  /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
+     yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
+     Similarly (0x45300000UL ## fp_value_hi_xmm) yields
+     (0x1.0p84 + double(fp_value_hi_xmm)).
+     Note these exponents differ by 32.  */
+
+  fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
+
+  /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
+     in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
+  real_ldexp (&bias_lo_rvt, &dconst1, 52);
+  real_ldexp (&bias_hi_rvt, &dconst1, 84);
+  biases = const_double_from_real_value (bias_lo_rvt, DFmode);
+  x = const_double_from_real_value (bias_hi_rvt, DFmode);
+  biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
+  biases = validize_mem (force_const_mem (V2DFmode, biases));
+  emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
+
+  /* Add the upper and lower DFmode values together.  */
+  if (TARGET_SSE3)
+    emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
+  else
+    {
+      x = copy_to_mode_reg (V2DFmode, fp_xmm);
+      emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
+      emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
+    }
+
+  ix86_expand_vector_extract (false, target, fp_xmm, 0);
+}
+
+/* Not used, but eases macroization of patterns.  */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+				  rtx input ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+
+/* Convert an unsigned SImode value into a DFmode.  Only currently used
+   for SSE, but applicable anywhere.  */
+
+void
+ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE TWO31r;
+  rtx x, fp;
+
+  x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
+			   NULL, 1, OPTAB_DIRECT);
+
+  fp = gen_reg_rtx (DFmode);
+  emit_insn (gen_floatsidf2 (fp, x));
+
+  real_ldexp (&TWO31r, &dconst1, 31);
+  x = const_double_from_real_value (TWO31r, DFmode);
+
+  x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
+  if (x != target)
+    emit_move_insn (target, x);
+}
+
+/* Convert a signed DImode value into a DFmode.  Only used for SSE in
+   32-bit mode; otherwise we have a direct convert instruction.  */
+
+void
+ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx fp_lo, fp_hi, x;
+
+  fp_lo = gen_reg_rtx (DFmode);
+  fp_hi = gen_reg_rtx (DFmode);
+
+  emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+  fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
+
+  ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
+
+  x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
+			   0, OPTAB_DIRECT);
+  if (x != target)
+    emit_move_insn (target, x);
+}
+
+/* Convert an unsigned SImode value into a SFmode, using only SSE.
+   For x86_32, -mfpmath=sse, !optimize_size only.  */
+void
+ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE ONE16r;
+  rtx fp_hi, fp_lo, int_hi, int_lo, x;
+
+  real_ldexp (&ONE16r, &dconst1, 16);
+  x = const_double_from_real_value (ONE16r, SFmode);
+  int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
+				      NULL, 0, OPTAB_DIRECT);
+  int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
+				      NULL, 0, OPTAB_DIRECT);
+  fp_hi = gen_reg_rtx (SFmode);
+  fp_lo = gen_reg_rtx (SFmode);
+  emit_insn (gen_floatsisf2 (fp_hi, int_hi));
+  emit_insn (gen_floatsisf2 (fp_lo, int_lo));
+  fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
+			       0, OPTAB_DIRECT);
+  fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
+			       0, OPTAB_DIRECT);
+  if (!rtx_equal_p (target, fp_hi))
+    emit_move_insn (target, fp_hi);
+}
+
+/* floatunsv{4,8}siv{4,8}sf2 expander.  Expand code to convert
+   a vector of unsigned ints VAL to vector of floats TARGET.  */
+
+void
+ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
+{
+  rtx tmp[8];
+  REAL_VALUE_TYPE TWO16r;
+  enum machine_mode intmode = GET_MODE (val);
+  enum machine_mode fltmode = GET_MODE (target);
+  rtx (*cvt) (rtx, rtx);
+
+  if (intmode == V4SImode)
+    cvt = gen_floatv4siv4sf2;
+  else
+    cvt = gen_floatv8siv8sf2;
+  tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
+  tmp[0] = force_reg (intmode, tmp[0]);
+  tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
+				OPTAB_DIRECT);
+  tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
+				NULL_RTX, 1, OPTAB_DIRECT);
+  tmp[3] = gen_reg_rtx (fltmode);
+  emit_insn (cvt (tmp[3], tmp[1]));
+  tmp[4] = gen_reg_rtx (fltmode);
+  emit_insn (cvt (tmp[4], tmp[2]));
+  real_ldexp (&TWO16r, &dconst1, 16);
+  tmp[5] = const_double_from_real_value (TWO16r, SFmode);
+  tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
+  tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
+				OPTAB_DIRECT);
+  tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
+				OPTAB_DIRECT);
+  if (tmp[7] != target)
+    emit_move_insn (target, tmp[7]);
+}
+
+/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
+   pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
+   This is done by doing just signed conversion if < 0x1p31, and otherwise by
+   subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards.  */
+
+rtx
+ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
+{
+  REAL_VALUE_TYPE TWO31r;
+  rtx two31r, tmp[4];
+  enum machine_mode mode = GET_MODE (val);
+  enum machine_mode scalarmode = GET_MODE_INNER (mode);
+  enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
+  rtx (*cmp) (rtx, rtx, rtx, rtx);
+  int i;
+
+  for (i = 0; i < 3; i++)
+    tmp[i] = gen_reg_rtx (mode);
+  real_ldexp (&TWO31r, &dconst1, 31);
+  two31r = const_double_from_real_value (TWO31r, scalarmode);
+  two31r = ix86_build_const_vector (mode, 1, two31r);
+  two31r = force_reg (mode, two31r);
+  switch (mode)
+    {
+    case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
+    case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
+    case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
+    case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
+    default: gcc_unreachable ();
+    }
+  tmp[3] = gen_rtx_LE (mode, two31r, val);
+  emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
+  tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
+				0, OPTAB_DIRECT);
+  if (intmode == V4SImode || TARGET_AVX2)
+    *xorp = expand_simple_binop (intmode, ASHIFT,
+				 gen_lowpart (intmode, tmp[0]),
+				 GEN_INT (31), NULL_RTX, 0,
+				 OPTAB_DIRECT);
+  else
+    {
+      rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
+      two31 = ix86_build_const_vector (intmode, 1, two31);
+      *xorp = expand_simple_binop (intmode, AND,
+				   gen_lowpart (intmode, tmp[0]),
+				   two31, NULL_RTX, 0,
+				   OPTAB_DIRECT);
+    }
+  return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
+			      0, OPTAB_DIRECT);
+}
+
+/* A subroutine of ix86_build_signbit_mask.  If VECT is true,
+   then replicate the value for all elements of the vector
+   register.  */
+
+rtx
+ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
+{
+  int i, n_elt;
+  rtvec v;
+  enum machine_mode scalar_mode;
+
+  switch (mode)
+    {
+    case V64QImode:
+    case V32QImode:
+    case V16QImode:
+    case V32HImode:
+    case V16HImode:
+    case V8HImode:
+    case V16SImode:
+    case V8SImode:
+    case V4SImode:
+    case V8DImode:
+    case V4DImode:
+    case V2DImode:
+      gcc_assert (vect);
+    case V16SFmode:
+    case V8SFmode:
+    case V4SFmode:
+    case V8DFmode:
+    case V4DFmode:
+    case V2DFmode:
+      n_elt = GET_MODE_NUNITS (mode);
+      v = rtvec_alloc (n_elt);
+      scalar_mode = GET_MODE_INNER (mode);
+
+      RTVEC_ELT (v, 0) = value;
+
+      for (i = 1; i < n_elt; ++i)
+	RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
+
+      return gen_rtx_CONST_VECTOR (mode, v);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
+   and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
+   for an SSE register.  If VECT is true, then replicate the mask for
+   all elements of the vector register.  If INVERT is true, then create
+   a mask excluding the sign bit.  */
+
+rtx
+ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
+{
+  enum machine_mode vec_mode, imode;
+  HOST_WIDE_INT hi, lo;
+  int shift = 63;
+  rtx v;
+  rtx mask;
+
+  /* Find the sign bit, sign extended to 2*HWI.  */
+  switch (mode)
+    {
+    case V16SImode:
+    case V16SFmode:
+    case V8SImode:
+    case V4SImode:
+    case V8SFmode:
+    case V4SFmode:
+      vec_mode = mode;
+      mode = GET_MODE_INNER (mode);
+      imode = SImode;
+      lo = 0x80000000, hi = lo < 0;
+      break;
+
+    case V8DImode:
+    case V4DImode:
+    case V2DImode:
+    case V8DFmode:
+    case V4DFmode:
+    case V2DFmode:
+      vec_mode = mode;
+      mode = GET_MODE_INNER (mode);
+      imode = DImode;
+      if (HOST_BITS_PER_WIDE_INT >= 64)
+	lo = (HOST_WIDE_INT)1 << shift, hi = -1;
+      else
+	lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+      break;
+
+    case TImode:
+    case TFmode:
+      vec_mode = VOIDmode;
+      if (HOST_BITS_PER_WIDE_INT >= 64)
+	{
+	  imode = TImode;
+	  lo = 0, hi = (HOST_WIDE_INT)1 << shift;
+	}
+      else
+	{
+	  rtvec vec;
+
+	  imode = DImode;
+	  lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+
+	  if (invert)
+	    {
+	      lo = ~lo, hi = ~hi;
+	      v = constm1_rtx;
+	    }
+	  else
+	    v = const0_rtx;
+
+	  mask = immed_double_const (lo, hi, imode);
+
+	  vec = gen_rtvec (2, v, mask);
+	  v = gen_rtx_CONST_VECTOR (V2DImode, vec);
+	  v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
+
+	  return v;
+	}
+     break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (invert)
+    lo = ~lo, hi = ~hi;
+
+  /* Force this value into the low part of a fp vector constant.  */
+  mask = immed_double_const (lo, hi, imode);
+  mask = gen_lowpart (mode, mask);
+
+  if (vec_mode == VOIDmode)
+    return force_reg (mode, mask);
+
+  v = ix86_build_const_vector (vec_mode, vect, mask);
+  return force_reg (vec_mode, v);
+}
+
+/* Generate code for floating point ABS or NEG.  */
+
+void
+ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
+				rtx operands[])
+{
+  rtx mask, set, dst, src;
+  bool use_sse = false;
+  bool vector_mode = VECTOR_MODE_P (mode);
+  enum machine_mode vmode = mode;
+
+  if (vector_mode)
+    use_sse = true;
+  else if (mode == TFmode)
+    use_sse = true;
+  else if (TARGET_SSE_MATH)
+    {
+      use_sse = SSE_FLOAT_MODE_P (mode);
+      if (mode == SFmode)
+	vmode = V4SFmode;
+      else if (mode == DFmode)
+	vmode = V2DFmode;
+    }
+
+  /* NEG and ABS performed with SSE use bitwise mask operations.
+     Create the appropriate mask now.  */
+  if (use_sse)
+    mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
+  else
+    mask = NULL_RTX;
+
+  dst = operands[0];
+  src = operands[1];
+
+  set = gen_rtx_fmt_e (code, mode, src);
+  set = gen_rtx_SET (VOIDmode, dst, set);
+
+  if (mask)
+    {
+      rtx use, clob;
+      rtvec par;
+
+      use = gen_rtx_USE (VOIDmode, mask);
+      if (vector_mode)
+	par = gen_rtvec (2, set, use);
+      else
+	{
+          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+	  par = gen_rtvec (3, set, use, clob);
+        }
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+    }
+  else
+    emit_insn (set);
+}
+
+/* Expand a copysign operation.  Special case operand 0 being a constant.  */
+
+void
+ix86_expand_copysign (rtx operands[])
+{
+  enum machine_mode mode, vmode;
+  rtx dest, op0, op1, mask, nmask;
+
+  dest = operands[0];
+  op0 = operands[1];
+  op1 = operands[2];
+
+  mode = GET_MODE (dest);
+
+  if (mode == SFmode)
+    vmode = V4SFmode;
+  else if (mode == DFmode)
+    vmode = V2DFmode;
+  else
+    vmode = mode;
+
+  if (GET_CODE (op0) == CONST_DOUBLE)
+    {
+      rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
+
+      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
+	op0 = simplify_unary_operation (ABS, mode, op0, mode);
+
+      if (mode == SFmode || mode == DFmode)
+	{
+	  if (op0 == CONST0_RTX (mode))
+	    op0 = CONST0_RTX (vmode);
+	  else
+	    {
+	      rtx v = ix86_build_const_vector (vmode, false, op0);
+
+	      op0 = force_reg (vmode, v);
+	    }
+	}
+      else if (op0 != CONST0_RTX (mode))
+	op0 = force_reg (mode, op0);
+
+      mask = ix86_build_signbit_mask (vmode, 0, 0);
+
+      if (mode == SFmode)
+	copysign_insn = gen_copysignsf3_const;
+      else if (mode == DFmode)
+	copysign_insn = gen_copysigndf3_const;
+      else
+	copysign_insn = gen_copysigntf3_const;
+
+	emit_insn (copysign_insn (dest, op0, op1, mask));
+    }
+  else
+    {
+      rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
+
+      nmask = ix86_build_signbit_mask (vmode, 0, 1);
+      mask = ix86_build_signbit_mask (vmode, 0, 0);
+
+      if (mode == SFmode)
+	copysign_insn = gen_copysignsf3_var;
+      else if (mode == DFmode)
+	copysign_insn = gen_copysigndf3_var;
+      else
+	copysign_insn = gen_copysigntf3_var;
+
+      emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
+    }
+}
+
+/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
+   be a constant, and so has already been expanded into a vector constant.  */
+
+void
+ix86_split_copysign_const (rtx operands[])
+{
+  enum machine_mode mode, vmode;
+  rtx dest, op0, mask, x;
+
+  dest = operands[0];
+  op0 = operands[1];
+  mask = operands[3];
+
+  mode = GET_MODE (dest);
+  vmode = GET_MODE (mask);
+
+  dest = simplify_gen_subreg (vmode, dest, mode, 0);
+  x = gen_rtx_AND (vmode, dest, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+  if (op0 != CONST0_RTX (vmode))
+    {
+      x = gen_rtx_IOR (vmode, dest, op0);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+}
+
+/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
+   so we have to do two masks.  */
+
+void
+ix86_split_copysign_var (rtx operands[])
+{
+  enum machine_mode mode, vmode;
+  rtx dest, scratch, op0, op1, mask, nmask, x;
+
+  dest = operands[0];
+  scratch = operands[1];
+  op0 = operands[2];
+  op1 = operands[3];
+  nmask = operands[4];
+  mask = operands[5];
+
+  mode = GET_MODE (dest);
+  vmode = GET_MODE (mask);
+
+  if (rtx_equal_p (op0, op1))
+    {
+      /* Shouldn't happen often (it's useless, obviously), but when it does
+	 we'd generate incorrect code if we continue below.  */
+      emit_move_insn (dest, op0);
+      return;
+    }
+
+  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
+    {
+      gcc_assert (REGNO (op1) == REGNO (scratch));
+
+      x = gen_rtx_AND (vmode, scratch, mask);
+      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+      dest = mask;
+      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+      x = gen_rtx_NOT (vmode, dest);
+      x = gen_rtx_AND (vmode, x, op0);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else
+    {
+      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
+	{
+	  x = gen_rtx_AND (vmode, scratch, mask);
+	}
+      else						/* alternative 2,4 */
+	{
+          gcc_assert (REGNO (mask) == REGNO (scratch));
+          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
+	  x = gen_rtx_AND (vmode, scratch, op1);
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
+	{
+	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
+	  x = gen_rtx_AND (vmode, dest, nmask);
+	}
+      else						/* alternative 3,4 */
+	{
+          gcc_assert (REGNO (nmask) == REGNO (dest));
+	  dest = nmask;
+	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+	  x = gen_rtx_AND (vmode, dest, op0);
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+
+  x = gen_rtx_IOR (vmode, dest, scratch);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Return TRUE or FALSE depending on whether the first SET in INSN
+   has source and destination with matching CC modes, and that the
+   CC mode is at least as constrained as REQ_MODE.  */
+
+bool
+ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+  rtx set;
+  enum machine_mode set_mode;
+
+  set = PATTERN (insn);
+  if (GET_CODE (set) == PARALLEL)
+    set = XVECEXP (set, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
+
+  set_mode = GET_MODE (SET_DEST (set));
+  switch (set_mode)
+    {
+    case CCNOmode:
+      if (req_mode != CCNOmode
+	  && (req_mode != CCmode
+	      || XEXP (SET_SRC (set), 1) != const0_rtx))
+	return false;
+      break;
+    case CCmode:
+      if (req_mode == CCGCmode)
+	return false;
+      /* FALLTHRU */
+    case CCGCmode:
+      if (req_mode == CCGOCmode || req_mode == CCNOmode)
+	return false;
+      /* FALLTHRU */
+    case CCGOCmode:
+      if (req_mode == CCZmode)
+	return false;
+      /* FALLTHRU */
+    case CCZmode:
+      break;
+
+    case CCAmode:
+    case CCCmode:
+    case CCOmode:
+    case CCSmode:
+      if (set_mode != req_mode)
+	return false;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return GET_MODE (SET_SRC (set)) == set_mode;
+}
+
+/* Generate insn patterns to do an integer compare of OPERANDS.  */
+
+static rtx
+ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode cmpmode;
+  rtx tmp, flags;
+
+  cmpmode = SELECT_CC_MODE (code, op0, op1);
+  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
+
+  /* This is very simple, but making the interface the same as in the
+     FP case makes the rest of the code easier.  */
+  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
+
+  /* Return the test that should be put into the flags user, i.e.
+     the bcc, scc, or cmov instruction.  */
+  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
+}
+
+/* Figure out whether to use ordered or unordered fp comparisons.
+   Return the appropriate mode to use.  */
+
+enum machine_mode
+ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+  /* ??? In order to make all comparisons reversible, we do all comparisons
+     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
+     all forms trapping and nontrapping comparisons, we can make inequality
+     comparisons trapping again, since it results in better code when using
+     FCOM based compares.  */
+  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
+}
+
+enum machine_mode
+ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+
+  if (SCALAR_FLOAT_MODE_P (mode))
+    {
+      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+      return ix86_fp_compare_mode (code);
+    }
+
+  switch (code)
+    {
+      /* Only zero flag is needed.  */
+    case EQ:			/* ZF=0 */
+    case NE:			/* ZF!=0 */
+      return CCZmode;
+      /* Codes needing carry flag.  */
+    case GEU:			/* CF=0 */
+    case LTU:			/* CF=1 */
+      /* Detect overflow checks.  They need just the carry flag.  */
+      if (GET_CODE (op0) == PLUS
+	  && rtx_equal_p (op1, XEXP (op0, 0)))
+	return CCCmode;
+      else
+	return CCmode;
+    case GTU:			/* CF=0 & ZF=0 */
+    case LEU:			/* CF=1 | ZF=1 */
+      return CCmode;
+      /* Codes possibly doable only with sign flag when
+         comparing against zero.  */
+    case GE:			/* SF=OF   or   SF=0 */
+    case LT:			/* SF<>OF  or   SF=1 */
+      if (op1 == const0_rtx)
+	return CCGOCmode;
+      else
+	/* For other cases Carry flag is not required.  */
+	return CCGCmode;
+      /* Codes doable only with sign flag when comparing
+         against zero, but we miss jump instruction for it
+         so we need to use relational tests against overflow
+         that thus needs to be zero.  */
+    case GT:			/* ZF=0 & SF=OF */
+    case LE:			/* ZF=1 | SF<>OF */
+      if (op1 == const0_rtx)
+	return CCNOmode;
+      else
+	return CCGCmode;
+      /* strcmp pattern do (use flags) and combine may ask us for proper
+	 mode.  */
+    case USE:
+      return CCmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = FLAGS_REG;
+  *p2 = FPSR_REG;
+  return true;
+}
+
+/* If two condition code modes are compatible, return a condition code
+   mode which is compatible with both.  Otherwise, return
+   VOIDmode.  */
+
+static enum machine_mode
+ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+  if (m1 == m2)
+    return m1;
+
+  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
+    return VOIDmode;
+
+  if ((m1 == CCGCmode && m2 == CCGOCmode)
+      || (m1 == CCGOCmode && m2 == CCGCmode))
+    return CCGCmode;
+
+  if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
+    return m2;
+  else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
+    return m1;
+
+  switch (m1)
+    {
+    default:
+      gcc_unreachable ();
+
+    case CCmode:
+    case CCGCmode:
+    case CCGOCmode:
+    case CCNOmode:
+    case CCAmode:
+    case CCCmode:
+    case CCOmode:
+    case CCSmode:
+    case CCZmode:
+      switch (m2)
+	{
+	default:
+	  return VOIDmode;
+
+	case CCmode:
+	case CCGCmode:
+	case CCGOCmode:
+	case CCNOmode:
+	case CCAmode:
+	case CCCmode:
+	case CCOmode:
+	case CCSmode:
+	case CCZmode:
+	  return CCmode;
+	}
+
+    case CCFPmode:
+    case CCFPUmode:
+      /* These are only compatible with themselves, which we already
+	 checked above.  */
+      return VOIDmode;
+    }
+}
+
+
+/* Return a comparison we can do and that it is equivalent to
+   swap_condition (code) apart possibly from orderedness.
+   But, never change orderedness if TARGET_IEEE_FP, returning
+   UNKNOWN in that case if necessary.  */
+
+static enum rtx_code
+ix86_fp_swap_condition (enum rtx_code code)
+{
+  switch (code)
+    {
+    case GT:                   /* GTU - CF=0 & ZF=0 */
+      return TARGET_IEEE_FP ? UNKNOWN : UNLT;
+    case GE:                   /* GEU - CF=0 */
+      return TARGET_IEEE_FP ? UNKNOWN : UNLE;
+    case UNLT:                 /* LTU - CF=1 */
+      return TARGET_IEEE_FP ? UNKNOWN : GT;
+    case UNLE:                 /* LEU - CF=1 | ZF=1 */
+      return TARGET_IEEE_FP ? UNKNOWN : GE;
+    default:
+      return swap_condition (code);
+    }
+}
+
+/* Return cost of comparison CODE using the best strategy for performance.
+   All following functions do use number of instructions as a cost metrics.
+   In future this should be tweaked to compute bytes for optimize_size and
+   take into account performance of various instructions on various CPUs.  */
+
+static int
+ix86_fp_comparison_cost (enum rtx_code code)
+{
+  int arith_cost;
+
+  /* The cost of code using bit-twiddling on %ah.  */
+  switch (code)
+    {
+    case UNLE:
+    case UNLT:
+    case LTGT:
+    case GT:
+    case GE:
+    case UNORDERED:
+    case ORDERED:
+    case UNEQ:
+      arith_cost = 4;
+      break;
+    case LT:
+    case NE:
+    case EQ:
+    case UNGE:
+      arith_cost = TARGET_IEEE_FP ? 5 : 4;
+      break;
+    case LE:
+    case UNGT:
+      arith_cost = TARGET_IEEE_FP ? 6 : 4;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (ix86_fp_comparison_strategy (code))
+    {
+    case IX86_FPCMP_COMI:
+      return arith_cost > 4 ? 3 : 2;
+    case IX86_FPCMP_SAHF:
+      return arith_cost > 4 ? 4 : 3;
+    default:
+      return arith_cost;
+    }
+}
+
+/* Return strategy to use for floating-point.  We assume that fcomi is always
+   preferrable where available, since that is also true when looking at size
+   (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
+
+enum ix86_fpcmp_strategy
+ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+  /* Do fcomi/sahf based test when profitable.  */
+
+  if (TARGET_CMOVE)
+    return IX86_FPCMP_COMI;
+
+  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
+    return IX86_FPCMP_SAHF;
+
+  return IX86_FPCMP_ARITH;
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+   to a fp comparison.  The operands are updated in place; the new
+   comparison code is returned.  */
+
+static enum rtx_code
+ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
+{
+  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
+  rtx op0 = *pop0, op1 = *pop1;
+  enum machine_mode op_mode = GET_MODE (op0);
+  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
+
+  /* All of the unordered compare instructions only work on registers.
+     The same is true of the fcomi compare instructions.  The XFmode
+     compare instructions require registers except when comparing
+     against zero or when converting operand 1 from fixed point to
+     floating point.  */
+
+  if (!is_sse
+      && (fpcmp_mode == CCFPUmode
+	  || (op_mode == XFmode
+	      && ! (standard_80387_constant_p (op0) == 1
+		    || standard_80387_constant_p (op1) == 1)
+	      && GET_CODE (op1) != FLOAT)
+	  || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
+    {
+      op0 = force_reg (op_mode, op0);
+      op1 = force_reg (op_mode, op1);
+    }
+  else
+    {
+      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
+	 things around if they appear profitable, otherwise force op0
+	 into a register.  */
+
+      if (standard_80387_constant_p (op0) == 0
+	  || (MEM_P (op0)
+	      && ! (standard_80387_constant_p (op1) == 0
+		    || MEM_P (op1))))
+	{
+	  enum rtx_code new_code = ix86_fp_swap_condition (code);
+	  if (new_code != UNKNOWN)
+	    {
+	      rtx tmp;
+	      tmp = op0, op0 = op1, op1 = tmp;
+	      code = new_code;
+	    }
+	}
+
+      if (!REG_P (op0))
+	op0 = force_reg (op_mode, op0);
+
+      if (CONSTANT_P (op1))
+	{
+	  int tmp = standard_80387_constant_p (op1);
+	  if (tmp == 0)
+	    op1 = validize_mem (force_const_mem (op_mode, op1));
+	  else if (tmp == 1)
+	    {
+	      if (TARGET_CMOVE)
+		op1 = force_reg (op_mode, op1);
+	    }
+	  else
+	    op1 = force_reg (op_mode, op1);
+	}
+    }
+
+  /* Try to rearrange the comparison to make it cheaper.  */
+  if (ix86_fp_comparison_cost (code)
+      > ix86_fp_comparison_cost (swap_condition (code))
+      && (REG_P (op1) || can_create_pseudo_p ()))
+    {
+      rtx tmp;
+      tmp = op0, op0 = op1, op1 = tmp;
+      code = swap_condition (code);
+      if (!REG_P (op0))
+	op0 = force_reg (op_mode, op0);
+    }
+
+  *pop0 = op0;
+  *pop1 = op1;
+  return code;
+}
+
+/* Convert comparison codes we use to represent FP comparison to integer
+   code that will result in proper branch.  Return UNKNOWN if no such code
+   is available.  */
+
+enum rtx_code
+ix86_fp_compare_code_to_integer (enum rtx_code code)
+{
+  switch (code)
+    {
+    case GT:
+      return GTU;
+    case GE:
+      return GEU;
+    case ORDERED:
+    case UNORDERED:
+      return code;
+      break;
+    case UNEQ:
+      return EQ;
+      break;
+    case UNLT:
+      return LTU;
+      break;
+    case UNLE:
+      return LEU;
+      break;
+    case LTGT:
+      return NE;
+      break;
+    default:
+      return UNKNOWN;
+    }
+}
+
+/* Generate insn patterns to do a floating point compare of OPERANDS.  */
+
+static rtx
+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
+{
+  enum machine_mode fpcmp_mode, intcmp_mode;
+  rtx tmp, tmp2;
+
+  fpcmp_mode = ix86_fp_compare_mode (code);
+  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
+
+  /* Do fcomi/sahf based test when profitable.  */
+  switch (ix86_fp_comparison_strategy (code))
+    {
+    case IX86_FPCMP_COMI:
+      intcmp_mode = fpcmp_mode;
+      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+			 tmp);
+      emit_insn (tmp);
+      break;
+
+    case IX86_FPCMP_SAHF:
+      intcmp_mode = fpcmp_mode;
+      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+			 tmp);
+
+      if (!scratch)
+	scratch = gen_reg_rtx (HImode);
+      tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
+      break;
+
+    case IX86_FPCMP_ARITH:
+      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
+      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+      if (!scratch)
+	scratch = gen_reg_rtx (HImode);
+      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+
+      /* In the unordered case, we have to check C2 for NaN's, which
+	 doesn't happen to work out to anything nice combination-wise.
+	 So do some bit twiddling on the value we've got in AH to come
+	 up with an appropriate set of condition codes.  */
+
+      intcmp_mode = CCNOmode;
+      switch (code)
+	{
+	case GT:
+	case UNGT:
+	  if (code == GT || !TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
+	      intcmp_mode = CCmode;
+	      code = GEU;
+	    }
+	  break;
+	case LT:
+	case UNLT:
+	  if (code == LT && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
+	      intcmp_mode = CCmode;
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
+	      code = NE;
+	    }
+	  break;
+	case GE:
+	case UNGE:
+	  if (code == GE || !TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
+	      code = NE;
+	    }
+	  break;
+	case LE:
+	case UNLE:
+	  if (code == LE && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+	      intcmp_mode = CCmode;
+	      code = LTU;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+	      code = NE;
+	    }
+	  break;
+	case EQ:
+	case UNEQ:
+	  if (code == EQ && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+	      intcmp_mode = CCmode;
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+	      code = NE;
+	    }
+	  break;
+	case NE:
+	case LTGT:
+	  if (code == NE && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+					     GEN_INT (0x40)));
+	      code = NE;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+	      code = EQ;
+	    }
+	  break;
+
+	case UNORDERED:
+	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+	  code = NE;
+	  break;
+	case ORDERED:
+	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+	  code = EQ;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+	break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  /* Return the test that should be put into the flags user, i.e.
+     the bcc, scc, or cmov instruction.  */
+  return gen_rtx_fmt_ee (code, VOIDmode,
+			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
+			 const0_rtx);
+}
+
+static rtx
+ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx ret;
+
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+    ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+
+  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+    {
+      gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
+      ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
+    }
+  else
+    ret = ix86_expand_int_compare (code, op0, op1);
+
+  return ret;
+}
+
+void
+ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx tmp;
+
+  switch (mode)
+    {
+    case SFmode:
+    case DFmode:
+    case XFmode:
+    case QImode:
+    case HImode:
+    case SImode:
+      simple:
+      tmp = ix86_expand_compare (code, op0, op1);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, label),
+				  pc_rtx);
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+      return;
+
+    case DImode:
+      if (TARGET_64BIT)
+	goto simple;
+    case TImode:
+      /* Expand DImode branch into multiple compare+branch.  */
+      {
+	rtx lo[2], hi[2], label2;
+	enum rtx_code code1, code2, code3;
+	enum machine_mode submode;
+
+	if (CONSTANT_P (op0) && !CONSTANT_P (op1))
+	  {
+	    tmp = op0, op0 = op1, op1 = tmp;
+	    code = swap_condition (code);
+	  }
+
+	split_double_mode (mode, &op0, 1, lo+0, hi+0);
+	split_double_mode (mode, &op1, 1, lo+1, hi+1);
+
+	submode = mode == DImode ? SImode : DImode;
+
+	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
+	   avoid two branches.  This costs one extra insn, so disable when
+	   optimizing for size.  */
+
+	if ((code == EQ || code == NE)
+	    && (!optimize_insn_for_size_p ()
+	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
+	  {
+	    rtx xor0, xor1;
+
+	    xor1 = hi[0];
+	    if (hi[1] != const0_rtx)
+	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
+				   NULL_RTX, 0, OPTAB_WIDEN);
+
+	    xor0 = lo[0];
+	    if (lo[1] != const0_rtx)
+	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
+				   NULL_RTX, 0, OPTAB_WIDEN);
+
+	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
+				NULL_RTX, 0, OPTAB_WIDEN);
+
+	    ix86_expand_branch (code, tmp, const0_rtx, label);
+	    return;
+	  }
+
+	/* Otherwise, if we are doing less-than or greater-or-equal-than,
+	   op1 is a constant and the low word is zero, then we can just
+	   examine the high word.  Similarly for low word -1 and
+	   less-or-equal-than or greater-than.  */
+
+	if (CONST_INT_P (hi[1]))
+	  switch (code)
+	    {
+	    case LT: case LTU: case GE: case GEU:
+	      if (lo[1] == const0_rtx)
+		{
+		  ix86_expand_branch (code, hi[0], hi[1], label);
+		  return;
+		}
+	      break;
+	    case LE: case LEU: case GT: case GTU:
+	      if (lo[1] == constm1_rtx)
+		{
+		  ix86_expand_branch (code, hi[0], hi[1], label);
+		  return;
+		}
+	      break;
+	    default:
+	      break;
+	    }
+
+	/* Otherwise, we need two or three jumps.  */
+
+	label2 = gen_label_rtx ();
+
+	code1 = code;
+	code2 = swap_condition (code);
+	code3 = unsigned_condition (code);
+
+	switch (code)
+	  {
+	  case LT: case GT: case LTU: case GTU:
+	    break;
+
+	  case LE:   code1 = LT;  code2 = GT;  break;
+	  case GE:   code1 = GT;  code2 = LT;  break;
+	  case LEU:  code1 = LTU; code2 = GTU; break;
+	  case GEU:  code1 = GTU; code2 = LTU; break;
+
+	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
+	  case NE:   code2 = UNKNOWN; break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	/*
+	 * a < b =>
+	 *    if (hi(a) < hi(b)) goto true;
+	 *    if (hi(a) > hi(b)) goto false;
+	 *    if (lo(a) < lo(b)) goto true;
+	 *  false:
+	 */
+
+	if (code1 != UNKNOWN)
+	  ix86_expand_branch (code1, hi[0], hi[1], label);
+	if (code2 != UNKNOWN)
+	  ix86_expand_branch (code2, hi[0], hi[1], label2);
+
+	ix86_expand_branch (code3, lo[0], lo[1], label);
+
+	if (code2 != UNKNOWN)
+	  emit_label (label2);
+	return;
+      }
+
+    default:
+      gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
+      goto simple;
+    }
+}
+
+/* Split branch based on floating point condition.  */
+void
+ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
+		      rtx target1, rtx target2, rtx tmp)
+{
+  rtx condition;
+  rtx i;
+
+  if (target2 != pc_rtx)
+    {
+      rtx tmp = target2;
+      code = reverse_condition_maybe_unordered (code);
+      target2 = target1;
+      target1 = tmp;
+    }
+
+  condition = ix86_expand_fp_compare (code, op1, op2,
+				      tmp);
+
+  i = emit_jump_insn (gen_rtx_SET
+		      (VOIDmode, pc_rtx,
+		       gen_rtx_IF_THEN_ELSE (VOIDmode,
+					     condition, target1, target2)));
+  if (split_branch_probability >= 0)
+    add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
+}
+
+void
+ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx ret;
+
+  gcc_assert (GET_MODE (dest) == QImode);
+
+  ret = ix86_expand_compare (code, op0, op1);
+  PUT_MODE (ret, QImode);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
+}
+
+/* Expand comparison setting or clearing carry flag.  Return true when
+   successful and set pop for the operation.  */
+static bool
+ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
+{
+  enum machine_mode mode =
+    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+
+  /* Do not handle double-mode compares that go through special path.  */
+  if (mode == (TARGET_64BIT ? TImode : DImode))
+    return false;
+
+  if (SCALAR_FLOAT_MODE_P (mode))
+    {
+      rtx compare_op, compare_seq;
+
+      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+
+      /* Shortcut:  following common codes never translate
+	 into carry flag compares.  */
+      if (code == EQ || code == NE || code == UNEQ || code == LTGT
+	  || code == ORDERED || code == UNORDERED)
+	return false;
+
+      /* These comparisons require zero flag; swap operands so they won't.  */
+      if ((code == GT || code == UNLE || code == LE || code == UNGT)
+	  && !TARGET_IEEE_FP)
+	{
+	  rtx tmp = op0;
+	  op0 = op1;
+	  op1 = tmp;
+	  code = swap_condition (code);
+	}
+
+      /* Try to expand the comparison and verify that we end up with
+	 carry flag based comparison.  This fails to be true only when
+	 we decide to expand comparison using arithmetic that is not
+	 too common scenario.  */
+      start_sequence ();
+      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
+      compare_seq = get_insns ();
+      end_sequence ();
+
+      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
+      else
+	code = GET_CODE (compare_op);
+
+      if (code != LTU && code != GEU)
+	return false;
+
+      emit_insn (compare_seq);
+      *pop = compare_op;
+      return true;
+    }
+
+  if (!INTEGRAL_MODE_P (mode))
+    return false;
+
+  switch (code)
+    {
+    case LTU:
+    case GEU:
+      break;
+
+    /* Convert a==0 into (unsigned)a<1.  */
+    case EQ:
+    case NE:
+      if (op1 != const0_rtx)
+	return false;
+      op1 = const1_rtx;
+      code = (code == EQ ? LTU : GEU);
+      break;
+
+    /* Convert a>b into b<a or a>=b-1.  */
+    case GTU:
+    case LEU:
+      if (CONST_INT_P (op1))
+	{
+	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+	  /* Bail out on overflow.  We still can swap operands but that
+	     would force loading of the constant into register.  */
+	  if (op1 == const0_rtx
+	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+	    return false;
+	  code = (code == GTU ? GEU : LTU);
+	}
+      else
+	{
+	  rtx tmp = op1;
+	  op1 = op0;
+	  op0 = tmp;
+	  code = (code == GTU ? LTU : GEU);
+	}
+      break;
+
+    /* Convert a>=0 into (unsigned)a<0x80000000.  */
+    case LT:
+    case GE:
+      if (mode == DImode || op1 != const0_rtx)
+	return false;
+      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+      code = (code == LT ? GEU : LTU);
+      break;
+    case LE:
+    case GT:
+      if (mode == DImode || op1 != constm1_rtx)
+	return false;
+      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+      code = (code == LE ? GEU : LTU);
+      break;
+
+    default:
+      return false;
+    }
+  /* Swapping operands may cause constant to appear as first operand.  */
+  if (!nonimmediate_operand (op0, VOIDmode))
+    {
+      if (!can_create_pseudo_p ())
+	return false;
+      op0 = force_reg (mode, op0);
+    }
+  *pop = ix86_expand_compare (code, op0, op1);
+  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
+  return true;
+}
+
+bool
+ix86_expand_int_movcc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]), compare_code;
+  rtx compare_seq, compare_op;
+  enum machine_mode mode = GET_MODE (operands[0]);
+  bool sign_bit_compare_p = false;
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  if (GET_MODE (op0) == TImode
+      || (GET_MODE (op0) == DImode
+	  && !TARGET_64BIT))
+    return false;
+
+  start_sequence ();
+  compare_op = ix86_expand_compare (code, op0, op1);
+  compare_seq = get_insns ();
+  end_sequence ();
+
+  compare_code = GET_CODE (compare_op);
+
+  if ((op1 == const0_rtx && (code == GE || code == LT))
+      || (op1 == constm1_rtx && (code == GT || code == LE)))
+    sign_bit_compare_p = true;
+
+  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
+     HImode insns, we'd be swallowed in word prefix ops.  */
+
+  if ((mode != HImode || TARGET_FAST_PREFIX)
+      && (mode != (TARGET_64BIT ? TImode : DImode))
+      && CONST_INT_P (operands[2])
+      && CONST_INT_P (operands[3]))
+    {
+      rtx out = operands[0];
+      HOST_WIDE_INT ct = INTVAL (operands[2]);
+      HOST_WIDE_INT cf = INTVAL (operands[3]);
+      HOST_WIDE_INT diff;
+
+      diff = ct - cf;
+      /*  Sign bit compares are better done using shifts than we do by using
+	  sbb.  */
+      if (sign_bit_compare_p
+	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
+	{
+	  /* Detect overlap between destination and compare sources.  */
+	  rtx tmp = out;
+
+          if (!sign_bit_compare_p)
+	    {
+	      rtx flags;
+	      bool fpcmp = false;
+
+	      compare_code = GET_CODE (compare_op);
+
+	      flags = XEXP (compare_op, 0);
+
+	      if (GET_MODE (flags) == CCFPmode
+		  || GET_MODE (flags) == CCFPUmode)
+		{
+		  fpcmp = true;
+		  compare_code
+		    = ix86_fp_compare_code_to_integer (compare_code);
+		}
+
+	      /* To simplify rest of code, restrict to the GEU case.  */
+	      if (compare_code == LTU)
+		{
+		  HOST_WIDE_INT tmp = ct;
+		  ct = cf;
+		  cf = tmp;
+		  compare_code = reverse_condition (compare_code);
+		  code = reverse_condition (code);
+		}
+	      else
+		{
+		  if (fpcmp)
+		    PUT_CODE (compare_op,
+			      reverse_condition_maybe_unordered
+			        (GET_CODE (compare_op)));
+		  else
+		    PUT_CODE (compare_op,
+			      reverse_condition (GET_CODE (compare_op)));
+		}
+	      diff = ct - cf;
+
+	      if (reg_overlap_mentioned_p (out, op0)
+		  || reg_overlap_mentioned_p (out, op1))
+		tmp = gen_reg_rtx (mode);
+
+	      if (mode == DImode)
+		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
+	      else
+		emit_insn (gen_x86_movsicc_0_m1	(gen_lowpart (SImode, tmp),
+						 flags, compare_op));
+	    }
+	  else
+	    {
+	      if (code == GT || code == GE)
+		code = reverse_condition (code);
+	      else
+		{
+		  HOST_WIDE_INT tmp = ct;
+		  ct = cf;
+		  cf = tmp;
+		  diff = ct - cf;
+		}
+	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
+	    }
+
+	  if (diff == 1)
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * [addl dest, ct]
+	       *
+	       * Size 5 - 8.
+	       */
+	      if (ct)
+		tmp = expand_simple_binop (mode, PLUS,
+					   tmp, GEN_INT (ct),
+					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+	  else if (cf == -1)
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * orl $ct, dest
+	       *
+	       * Size 8.
+	       */
+	      tmp = expand_simple_binop (mode, IOR,
+					 tmp, GEN_INT (ct),
+					 copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+	  else if (diff == -1 && ct)
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * notl dest
+	       * [addl dest, cf]
+	       *
+	       * Size 8 - 11.
+	       */
+	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+	      if (cf)
+		tmp = expand_simple_binop (mode, PLUS,
+					   copy_rtx (tmp), GEN_INT (cf),
+					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+	  else
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * [notl dest]
+	       * andl cf - ct, dest
+	       * [addl dest, ct]
+	       *
+	       * Size 8 - 11.
+	       */
+
+	      if (cf == 0)
+		{
+		  cf = ct;
+		  ct = 0;
+		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+		}
+
+	      tmp = expand_simple_binop (mode, AND,
+					 copy_rtx (tmp),
+					 gen_int_mode (cf - ct, mode),
+					 copy_rtx (tmp), 1, OPTAB_DIRECT);
+	      if (ct)
+		tmp = expand_simple_binop (mode, PLUS,
+					   copy_rtx (tmp), GEN_INT (ct),
+					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+
+	  if (!rtx_equal_p (tmp, out))
+	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
+
+	  return true;
+	}
+
+      if (diff < 0)
+	{
+	  enum machine_mode cmp_mode = GET_MODE (op0);
+
+	  HOST_WIDE_INT tmp;
+	  tmp = ct, ct = cf, cf = tmp;
+	  diff = -diff;
+
+	  if (SCALAR_FLOAT_MODE_P (cmp_mode))
+	    {
+	      gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+	      /* We may be reversing unordered compare to normal compare, that
+		 is not valid in general (we may convert non-trapping condition
+		 to trapping one), however on i386 we currently emit all
+		 comparisons unordered.  */
+	      compare_code = reverse_condition_maybe_unordered (compare_code);
+	      code = reverse_condition_maybe_unordered (code);
+	    }
+	  else
+	    {
+	      compare_code = reverse_condition (compare_code);
+	      code = reverse_condition (code);
+	    }
+	}
+
+      compare_code = UNKNOWN;
+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
+	  && CONST_INT_P (op1))
+	{
+	  if (op1 == const0_rtx
+	      && (code == LT || code == GE))
+	    compare_code = code;
+	  else if (op1 == constm1_rtx)
+	    {
+	      if (code == LE)
+		compare_code = LT;
+	      else if (code == GT)
+		compare_code = GE;
+	    }
+	}
+
+      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
+      if (compare_code != UNKNOWN
+	  && GET_MODE (op0) == GET_MODE (out)
+	  && (cf == -1 || ct == -1))
+	{
+	  /* If lea code below could be used, only optimize
+	     if it results in a 2 insn sequence.  */
+
+	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
+		 || diff == 3 || diff == 5 || diff == 9)
+	      || (compare_code == LT && ct == -1)
+	      || (compare_code == GE && cf == -1))
+	    {
+	      /*
+	       * notl op1	(if necessary)
+	       * sarl $31, op1
+	       * orl cf, op1
+	       */
+	      if (ct != -1)
+		{
+		  cf = ct;
+		  ct = -1;
+		  code = reverse_condition (code);
+		}
+
+	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
+
+	      out = expand_simple_binop (mode, IOR,
+					 out, GEN_INT (cf),
+					 out, 1, OPTAB_DIRECT);
+	      if (out != operands[0])
+		emit_move_insn (operands[0], out);
+
+	      return true;
+	    }
+	}
+
+
+      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
+	   || diff == 3 || diff == 5 || diff == 9)
+	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
+	  && (mode != DImode
+	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
+	{
+	  /*
+	   * xorl dest,dest
+	   * cmpl op1,op2
+	   * setcc dest
+	   * lea cf(dest*(ct-cf)),dest
+	   *
+	   * Size 14.
+	   *
+	   * This also catches the degenerate setcc-only case.
+	   */
+
+	  rtx tmp;
+	  int nops;
+
+	  out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
+
+	  nops = 0;
+	  /* On x86_64 the lea instruction operates on Pmode, so we need
+	     to get arithmetics done in proper mode to match.  */
+	  if (diff == 1)
+	    tmp = copy_rtx (out);
+	  else
+	    {
+	      rtx out1;
+	      out1 = copy_rtx (out);
+	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
+	      nops++;
+	      if (diff & 1)
+		{
+		  tmp = gen_rtx_PLUS (mode, tmp, out1);
+		  nops++;
+		}
+	    }
+	  if (cf != 0)
+	    {
+	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
+	      nops++;
+	    }
+	  if (!rtx_equal_p (tmp, out))
+	    {
+	      if (nops == 1)
+		out = force_operand (tmp, copy_rtx (out));
+	      else
+		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
+	    }
+	  if (!rtx_equal_p (out, operands[0]))
+	    emit_move_insn (operands[0], copy_rtx (out));
+
+	  return true;
+	}
+
+      /*
+       * General case:			Jumpful:
+       *   xorl dest,dest		cmpl op1, op2
+       *   cmpl op1, op2		movl ct, dest
+       *   setcc dest			jcc 1f
+       *   decl dest			movl cf, dest
+       *   andl (cf-ct),dest		1:
+       *   addl ct,dest
+       *
+       * Size 20.			Size 14.
+       *
+       * This is reasonably steep, but branch mispredict costs are
+       * high on modern cpus, so consider failing only if optimizing
+       * for space.
+       */
+
+      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+	  && BRANCH_COST (optimize_insn_for_speed_p (),
+		  	  false) >= 2)
+	{
+	  if (cf == 0)
+	    {
+	      enum machine_mode cmp_mode = GET_MODE (op0);
+
+	      cf = ct;
+	      ct = 0;
+
+	      if (SCALAR_FLOAT_MODE_P (cmp_mode))
+		{
+		  gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+		  /* We may be reversing unordered compare to normal compare,
+		     that is not valid in general (we may convert non-trapping
+		     condition to trapping one), however on i386 we currently
+		     emit all comparisons unordered.  */
+		  code = reverse_condition_maybe_unordered (code);
+		}
+	      else
+		{
+		  code = reverse_condition (code);
+		  if (compare_code != UNKNOWN)
+		    compare_code = reverse_condition (compare_code);
+		}
+	    }
+
+	  if (compare_code != UNKNOWN)
+	    {
+	      /* notl op1	(if needed)
+		 sarl $31, op1
+		 andl (cf-ct), op1
+		 addl ct, op1
+
+		 For x < 0 (resp. x <= -1) there will be no notl,
+		 so if possible swap the constants to get rid of the
+		 complement.
+		 True/false will be -1/0 while code below (store flag
+		 followed by decrement) is 0/-1, so the constants need
+		 to be exchanged once more.  */
+
+	      if (compare_code == GE || !cf)
+		{
+		  code = reverse_condition (code);
+		  compare_code = LT;
+		}
+	      else
+		{
+		  HOST_WIDE_INT tmp = cf;
+		  cf = ct;
+		  ct = tmp;
+		}
+
+	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
+	    }
+	  else
+	    {
+	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
+
+	      out = expand_simple_binop (mode, PLUS, copy_rtx (out),
+					 constm1_rtx,
+					 copy_rtx (out), 1, OPTAB_DIRECT);
+	    }
+
+	  out = expand_simple_binop (mode, AND, copy_rtx (out),
+				     gen_int_mode (cf - ct, mode),
+				     copy_rtx (out), 1, OPTAB_DIRECT);
+	  if (ct)
+	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
+				       copy_rtx (out), 1, OPTAB_DIRECT);
+	  if (!rtx_equal_p (out, operands[0]))
+	    emit_move_insn (operands[0], copy_rtx (out));
+
+	  return true;
+	}
+    }
+
+  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+    {
+      /* Try a few things more with specific constants and a variable.  */
+
+      optab op;
+      rtx var, orig_out, out, tmp;
+
+      if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
+	return false;
+
+      /* If one of the two operands is an interesting constant, load a
+	 constant with the above and mask it in with a logical operation.  */
+
+      if (CONST_INT_P (operands[2]))
+	{
+	  var = operands[3];
+	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
+	    operands[3] = constm1_rtx, op = and_optab;
+	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
+	    operands[3] = const0_rtx, op = ior_optab;
+	  else
+	    return false;
+	}
+      else if (CONST_INT_P (operands[3]))
+	{
+	  var = operands[2];
+	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
+	    operands[2] = constm1_rtx, op = and_optab;
+	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
+	    operands[2] = const0_rtx, op = ior_optab;
+	  else
+	    return false;
+	}
+      else
+        return false;
+
+      orig_out = operands[0];
+      tmp = gen_reg_rtx (mode);
+      operands[0] = tmp;
+
+      /* Recurse to get the constant loaded.  */
+      if (ix86_expand_int_movcc (operands) == 0)
+        return false;
+
+      /* Mask in the interesting variable.  */
+      out = expand_binop (mode, op, var, tmp, orig_out, 0,
+			  OPTAB_WIDEN);
+      if (!rtx_equal_p (out, orig_out))
+	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
+
+      return true;
+    }
+
+  /*
+   * For comparison with above,
+   *
+   * movl cf,dest
+   * movl ct,tmp
+   * cmpl op1,op2
+   * cmovcc tmp,dest
+   *
+   * Size 15.
+   */
+
+  if (! nonimmediate_operand (operands[2], mode))
+    operands[2] = force_reg (mode, operands[2]);
+  if (! nonimmediate_operand (operands[3], mode))
+    operands[3] = force_reg (mode, operands[3]);
+
+  if (! register_operand (operands[2], VOIDmode)
+      && (mode == QImode
+          || ! register_operand (operands[3], VOIDmode)))
+    operands[2] = force_reg (mode, operands[2]);
+
+  if (mode == QImode
+      && ! register_operand (operands[3], VOIDmode))
+    operands[3] = force_reg (mode, operands[3]);
+
+  emit_insn (compare_seq);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_IF_THEN_ELSE (mode,
+						compare_op, operands[2],
+						operands[3])));
+  return true;
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+   to an sse comparison with a mask result.  Thus we differ a bit from
+   ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+   The DEST operand exists to help determine whether to commute commutative
+   operators.  The POP0/POP1 operands are updated in place.  The new
+   comparison code is returned, or UNKNOWN if not implementable.  */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+				  rtx *pop0, rtx *pop1)
+{
+  rtx tmp;
+
+  switch (code)
+    {
+    case LTGT:
+    case UNEQ:
+      /* AVX supports all the needed comparisons.  */
+      if (TARGET_AVX)
+	break;
+      /* We have no LTGT as an operator.  We could implement it with
+	 NE & ORDERED, but this requires an extra temporary.  It's
+	 not clear that it's worth it.  */
+      return UNKNOWN;
+
+    case LT:
+    case LE:
+    case UNGT:
+    case UNGE:
+      /* These are supported directly.  */
+      break;
+
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case ORDERED:
+      /* AVX has 3 operand comparisons, no need to swap anything.  */
+      if (TARGET_AVX)
+	break;
+      /* For commutative operators, try to canonicalize the destination
+	 operand to be first in the comparison - this helps reload to
+	 avoid extra moves.  */
+      if (!dest || !rtx_equal_p (dest, *pop1))
+	break;
+      /* FALLTHRU */
+
+    case GE:
+    case GT:
+    case UNLE:
+    case UNLT:
+      /* These are not supported directly before AVX, and furthermore
+	 ix86_expand_sse_fp_minmax only optimizes LT/UNGE.  Swap the
+	 comparison operands to transform into something that is
+	 supported.  */
+      tmp = *pop0;
+      *pop0 = *pop1;
+      *pop1 = tmp;
+      code = swap_condition (code);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+   semantics.  Note that this is IEEE safe, as long as we don't
+   interchange the operands.
+
+   Returns FALSE if this conditional move doesn't match a MIN/MAX,
+   and TRUE if the operation is successful and instructions are emitted.  */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+			   rtx cmp_op1, rtx if_true, rtx if_false)
+{
+  enum machine_mode mode;
+  bool is_min;
+  rtx tmp;
+
+  if (code == LT)
+    ;
+  else if (code == UNGE)
+    {
+      tmp = if_true;
+      if_true = if_false;
+      if_false = tmp;
+    }
+  else
+    return false;
+
+  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+    is_min = true;
+  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+    is_min = false;
+  else
+    return false;
+
+  mode = GET_MODE (dest);
+
+  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+     but MODE may be a vector mode and thus not appropriate.  */
+  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+    {
+      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+      rtvec v;
+
+      if_true = force_reg (mode, if_true);
+      v = gen_rtvec (2, if_true, if_false);
+      tmp = gen_rtx_UNSPEC (mode, v, u);
+    }
+  else
+    {
+      code = is_min ? SMIN : SMAX;
+      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+  return true;
+}
+
+/* Expand an sse vector comparison.  Return the register with the result.  */
+
+static rtx
+ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+		     rtx op_true, rtx op_false)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
+
+  /* In general case result of comparison can differ from operands' type.  */
+  enum machine_mode cmp_mode;
+
+  /* In AVX512F the result of comparison is an integer mask.  */
+  bool maskcmp = false;
+  rtx x;
+
+  if (GET_MODE_SIZE (cmp_ops_mode) == 64)
+    {
+      cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
+      gcc_assert (cmp_mode != BLKmode);
+
+      maskcmp = true;
+    }
+  else
+    cmp_mode = cmp_ops_mode;
+
+
+  cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
+  if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
+    cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
+
+  if (optimize
+      || reg_overlap_mentioned_p (dest, op_true)
+      || reg_overlap_mentioned_p (dest, op_false))
+    dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
+
+  /* Compare patterns for int modes are unspec in AVX512F only.  */
+  if (maskcmp && (code == GT || code == EQ))
+    {
+      rtx (*gen)(rtx, rtx, rtx);
+
+      switch (cmp_ops_mode)
+	{
+	case V16SImode:
+	  gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
+	  break;
+	case V8DImode:
+	  gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
+	  break;
+	default:
+	  gen = NULL;
+	}
+
+      if (gen)
+	{
+	  emit_insn (gen (dest, cmp_op0, cmp_op1));
+	  return dest;
+	}
+    }
+  x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
+
+  if (cmp_mode != mode && !maskcmp)
+    {
+      x = force_reg (cmp_ops_mode, x);
+      convert_move (dest, x, false);
+    }
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+  return dest;
+}
+
+/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
+   operations.  This is used for both scalar and vector conditional moves.  */
+
+static void
+ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  enum machine_mode cmpmode = GET_MODE (cmp);
+
+  /* In AVX512F the result of comparison is an integer mask.  */
+  bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
+
+  rtx t2, t3, x;
+
+  if (vector_all_ones_operand (op_true, mode)
+      && rtx_equal_p (op_false, CONST0_RTX (mode))
+      && !maskcmp)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
+    }
+  else if (op_false == CONST0_RTX (mode)
+      && !maskcmp)
+    {
+      op_true = force_reg (mode, op_true);
+      x = gen_rtx_AND (mode, cmp, op_true);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else if (op_true == CONST0_RTX (mode)
+      && !maskcmp)
+    {
+      op_false = force_reg (mode, op_false);
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
+      && !maskcmp)
+    {
+      op_false = force_reg (mode, op_false);
+      x = gen_rtx_IOR (mode, cmp, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else if (TARGET_XOP
+      && !maskcmp)
+    {
+      op_true = force_reg (mode, op_true);
+
+      if (!nonimmediate_operand (op_false, mode))
+	op_false = force_reg (mode, op_false);
+
+      emit_insn (gen_rtx_SET (mode, dest,
+			      gen_rtx_IF_THEN_ELSE (mode, cmp,
+						    op_true,
+						    op_false)));
+    }
+  else
+    {
+      rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
+      rtx d = dest;
+
+      if (!nonimmediate_operand (op_true, mode))
+	op_true = force_reg (mode, op_true);
+
+      op_false = force_reg (mode, op_false);
+
+      switch (mode)
+	{
+	case V4SFmode:
+	  if (TARGET_SSE4_1)
+	    gen = gen_sse4_1_blendvps;
+	  break;
+	case V2DFmode:
+	  if (TARGET_SSE4_1)
+	    gen = gen_sse4_1_blendvpd;
+	  break;
+	case V16QImode:
+	case V8HImode:
+	case V4SImode:
+	case V2DImode:
+	  if (TARGET_SSE4_1)
+	    {
+	      gen = gen_sse4_1_pblendvb;
+	      if (mode != V16QImode)
+		d = gen_reg_rtx (V16QImode);
+	      op_false = gen_lowpart (V16QImode, op_false);
+	      op_true = gen_lowpart (V16QImode, op_true);
+	      cmp = gen_lowpart (V16QImode, cmp);
+	    }
+	  break;
+	case V8SFmode:
+	  if (TARGET_AVX)
+	    gen = gen_avx_blendvps256;
+	  break;
+	case V4DFmode:
+	  if (TARGET_AVX)
+	    gen = gen_avx_blendvpd256;
+	  break;
+	case V32QImode:
+	case V16HImode:
+	case V8SImode:
+	case V4DImode:
+	  if (TARGET_AVX2)
+	    {
+	      gen = gen_avx2_pblendvb;
+	      if (mode != V32QImode)
+		d = gen_reg_rtx (V32QImode);
+	      op_false = gen_lowpart (V32QImode, op_false);
+	      op_true = gen_lowpart (V32QImode, op_true);
+	      cmp = gen_lowpart (V32QImode, cmp);
+	    }
+	  break;
+
+	case V16SImode:
+	  gen = gen_avx512f_blendmv16si;
+	  break;
+	case V8DImode:
+	  gen = gen_avx512f_blendmv8di;
+	  break;
+	case V8DFmode:
+	  gen = gen_avx512f_blendmv8df;
+	  break;
+	case V16SFmode:
+	  gen = gen_avx512f_blendmv16sf;
+	  break;
+
+	default:
+	  break;
+	}
+
+      if (gen != NULL)
+	{
+	  emit_insn (gen (d, op_false, op_true, cmp));
+	  if (d != dest)
+	    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
+	}
+      else
+	{
+	  op_true = force_reg (mode, op_true);
+
+	  t2 = gen_reg_rtx (mode);
+	  if (optimize)
+	    t3 = gen_reg_rtx (mode);
+	  else
+	    t3 = dest;
+
+	  x = gen_rtx_AND (mode, op_true, cmp);
+	  emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+	  x = gen_rtx_NOT (mode, cmp);
+	  x = gen_rtx_AND (mode, x, op_false);
+	  emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+	  x = gen_rtx_IOR (mode, t3, t2);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+	}
+    }
+}
+
+/* Expand a floating-point conditional move.  Return true if successful.  */
+
+bool
+ix86_expand_fp_movcc (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx tmp, compare_op;
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+    {
+      enum machine_mode cmode;
+
+      /* Since we've no cmove for sse registers, don't force bad register
+	 allocation just to gain access to it.  Deny movcc when the
+	 comparison mode doesn't match the move mode.  */
+      cmode = GET_MODE (op0);
+      if (cmode == VOIDmode)
+	cmode = GET_MODE (op1);
+      if (cmode != mode)
+	return false;
+
+      code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
+      if (code == UNKNOWN)
+	return false;
+
+      if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
+				     operands[2], operands[3]))
+	return true;
+
+      tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
+				 operands[2], operands[3]);
+      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
+      return true;
+    }
+
+  if (GET_MODE (op0) == TImode
+      || (GET_MODE (op0) == DImode
+	  && !TARGET_64BIT))
+    return false;
+
+  /* The floating point conditional move instructions don't directly
+     support conditions resulting from a signed integer comparison.  */
+
+  compare_op = ix86_expand_compare (code, op0, op1);
+  if (!fcmov_comparison_operator (compare_op, VOIDmode))
+    {
+      tmp = gen_reg_rtx (QImode);
+      ix86_expand_setcc (tmp, code, op0, op1);
+
+      compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
+						operands[2], operands[3])));
+
+  return true;
+}
+
+/* Expand a floating-point vector conditional move; a vcond operation
+   rather than a movcc operation.  */
+
+bool
+ix86_expand_fp_vcond (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[3]);
+  rtx cmp;
+
+  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+					   &operands[4], &operands[5]);
+  if (code == UNKNOWN)
+    {
+      rtx temp;
+      switch (GET_CODE (operands[3]))
+	{
+	case LTGT:
+	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = AND;
+	  break;
+	case UNEQ:
+	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = IOR;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+				 OPTAB_DIRECT);
+      ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+      return true;
+    }
+
+  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
+				 operands[5], operands[1], operands[2]))
+    return true;
+
+  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
+			     operands[1], operands[2]);
+  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+  return true;
+}
+
+/* Expand a signed/unsigned integral vector conditional move.  */
+
+bool
+ix86_expand_int_vcond (rtx operands[])
+{
+  enum machine_mode data_mode = GET_MODE (operands[0]);
+  enum machine_mode mode = GET_MODE (operands[4]);
+  enum rtx_code code = GET_CODE (operands[3]);
+  bool negate = false;
+  rtx x, cop0, cop1;
+
+  cop0 = operands[4];
+  cop1 = operands[5];
+
+  /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
+     and x < 0 ? 1 : 0 into (unsigned) x >> 31.  */
+  if ((code == LT || code == GE)
+      && data_mode == mode
+      && cop1 == CONST0_RTX (mode)
+      && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
+      && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
+      && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
+      && (GET_MODE_SIZE (data_mode) == 16
+	  || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
+    {
+      rtx negop = operands[2 - (code == LT)];
+      int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
+      if (negop == CONST1_RTX (data_mode))
+	{
+	  rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
+					 operands[0], 1, OPTAB_DIRECT);
+	  if (res != operands[0])
+	    emit_move_insn (operands[0], res);
+	  return true;
+	}
+      else if (GET_MODE_INNER (data_mode) != DImode
+	       && vector_all_ones_operand (negop, data_mode))
+	{
+	  rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
+					 operands[0], 0, OPTAB_DIRECT);
+	  if (res != operands[0])
+	    emit_move_insn (operands[0], res);
+	  return true;
+	}
+    }
+
+  if (!nonimmediate_operand (cop1, mode))
+    cop1 = force_reg (mode, cop1);
+  if (!general_operand (operands[1], data_mode))
+    operands[1] = force_reg (data_mode, operands[1]);
+  if (!general_operand (operands[2], data_mode))
+    operands[2] = force_reg (data_mode, operands[2]);
+
+  /* XOP supports all of the comparisons on all 128-bit vector int types.  */
+  if (TARGET_XOP
+      && (mode == V16QImode || mode == V8HImode
+	  || mode == V4SImode || mode == V2DImode))
+    ;
+  else
+    {
+      /* Canonicalize the comparison to EQ, GT, GTU.  */
+      switch (code)
+	{
+	case EQ:
+	case GT:
+	case GTU:
+	  break;
+
+	case NE:
+	case LE:
+	case LEU:
+	  code = reverse_condition (code);
+	  negate = true;
+	  break;
+
+	case GE:
+	case GEU:
+	  code = reverse_condition (code);
+	  negate = true;
+	  /* FALLTHRU */
+
+	case LT:
+	case LTU:
+	  code = swap_condition (code);
+	  x = cop0, cop0 = cop1, cop1 = x;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Only SSE4.1/SSE4.2 supports V2DImode.  */
+      if (mode == V2DImode)
+	{
+	  switch (code)
+	    {
+	    case EQ:
+	      /* SSE4.1 supports EQ.  */
+	      if (!TARGET_SSE4_1)
+		return false;
+	      break;
+
+	    case GT:
+	    case GTU:
+	      /* SSE4.2 supports GT/GTU.  */
+	      if (!TARGET_SSE4_2)
+		return false;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Unsigned parallel compare is not supported by the hardware.
+	 Play some tricks to turn this into a signed comparison
+	 against 0.  */
+      if (code == GTU)
+	{
+	  cop0 = force_reg (mode, cop0);
+
+	  switch (mode)
+	    {
+	    case V16SImode:
+	    case V8DImode:
+	    case V8SImode:
+	    case V4DImode:
+	    case V4SImode:
+	    case V2DImode:
+		{
+		  rtx t1, t2, mask;
+		  rtx (*gen_sub3) (rtx, rtx, rtx);
+
+		  switch (mode)
+		    {
+		    case V16SImode: gen_sub3 = gen_subv16si3; break;
+		    case V8DImode: gen_sub3 = gen_subv8di3; break;
+		    case V8SImode: gen_sub3 = gen_subv8si3; break;
+		    case V4DImode: gen_sub3 = gen_subv4di3; break;
+		    case V4SImode: gen_sub3 = gen_subv4si3; break;
+		    case V2DImode: gen_sub3 = gen_subv2di3; break;
+		    default:
+		      gcc_unreachable ();
+		    }
+		  /* Subtract (-(INT MAX) - 1) from both operands to make
+		     them signed.  */
+		  mask = ix86_build_signbit_mask (mode, true, false);
+		  t1 = gen_reg_rtx (mode);
+		  emit_insn (gen_sub3 (t1, cop0, mask));
+
+		  t2 = gen_reg_rtx (mode);
+		  emit_insn (gen_sub3 (t2, cop1, mask));
+
+		  cop0 = t1;
+		  cop1 = t2;
+		  code = GT;
+		}
+	      break;
+
+	    case V32QImode:
+	    case V16HImode:
+	    case V16QImode:
+	    case V8HImode:
+	      /* Perform a parallel unsigned saturating subtraction.  */
+	      x = gen_reg_rtx (mode);
+	      emit_insn (gen_rtx_SET (VOIDmode, x,
+				      gen_rtx_US_MINUS (mode, cop0, cop1)));
+
+	      cop0 = x;
+	      cop1 = CONST0_RTX (mode);
+	      code = EQ;
+	      negate = !negate;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+
+  /* Allow the comparison to be done in one mode, but the movcc to
+     happen in another mode.  */
+  if (data_mode == mode)
+    {
+      x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+			       operands[1+negate], operands[2-negate]);
+    }
+  else
+    {
+      gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
+      x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
+			       operands[1+negate], operands[2-negate]);
+      if (GET_MODE (x) == mode)
+	x = gen_lowpart (data_mode, x);
+    }
+
+  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
+			 operands[2-negate]);
+  return true;
+}
+
+static bool
+ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  switch (mode)
+    {
+    case V16SImode:
+      emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
+					      force_reg (V16SImode, mask),
+					      op1));
+      return true;
+    case V16SFmode:
+      emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
+					      force_reg (V16SImode, mask),
+					      op1));
+      return true;
+    case V8DImode:
+      emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
+					     force_reg (V8DImode, mask), op1));
+      return true;
+    case V8DFmode:
+      emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
+					     force_reg (V8DImode, mask), op1));
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Expand a variable vector permutation.  */
+
+void
+ix86_expand_vec_perm (rtx operands[])
+{
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx mask = operands[3];
+  rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
+  enum machine_mode mode = GET_MODE (op0);
+  enum machine_mode maskmode = GET_MODE (mask);
+  int w, e, i;
+  bool one_operand_shuffle = rtx_equal_p (op0, op1);
+
+  /* Number of elements in the vector.  */
+  w = GET_MODE_NUNITS (mode);
+  e = GET_MODE_UNIT_SIZE (mode);
+  gcc_assert (w <= 64);
+
+  if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
+    return;
+
+  if (TARGET_AVX2)
+    {
+      if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
+	{
+	  /* Unfortunately, the VPERMQ and VPERMPD instructions only support
+	     an constant shuffle operand.  With a tiny bit of effort we can
+	     use VPERMD instead.  A re-interpretation stall for V4DFmode is
+	     unfortunate but there's no avoiding it.
+	     Similarly for V16HImode we don't have instructions for variable
+	     shuffling, while for V32QImode we can use after preparing suitable
+	     masks vpshufb; vpshufb; vpermq; vpor.  */
+
+	  if (mode == V16HImode)
+	    {
+	      maskmode = mode = V32QImode;
+	      w = 32;
+	      e = 1;
+	    }
+	  else
+	    {
+	      maskmode = mode = V8SImode;
+	      w = 8;
+	      e = 4;
+	    }
+	  t1 = gen_reg_rtx (maskmode);
+
+	  /* Replicate the low bits of the V4DImode mask into V8SImode:
+	       mask = { A B C D }
+	       t1 = { A A B B C C D D }.  */
+	  for (i = 0; i < w / 2; ++i)
+	    vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
+	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+	  vt = force_reg (maskmode, vt);
+	  mask = gen_lowpart (maskmode, mask);
+	  if (maskmode == V8SImode)
+	    emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
+	  else
+	    emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
+
+	  /* Multiply the shuffle indicies by two.  */
+	  t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
+				    OPTAB_DIRECT);
+
+	  /* Add one to the odd shuffle indicies:
+		t1 = { A*2, A*2+1, B*2, B*2+1, ... }.  */
+	  for (i = 0; i < w / 2; ++i)
+	    {
+	      vec[i * 2] = const0_rtx;
+	      vec[i * 2 + 1] = const1_rtx;
+	    }
+	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+	  vt = validize_mem (force_const_mem (maskmode, vt));
+	  t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
+				    OPTAB_DIRECT);
+
+	  /* Continue as if V8SImode (resp. V32QImode) was used initially.  */
+	  operands[3] = mask = t1;
+	  target = gen_reg_rtx (mode);
+	  op0 = gen_lowpart (mode, op0);
+	  op1 = gen_lowpart (mode, op1);
+	}
+
+      switch (mode)
+	{
+	case V8SImode:
+	  /* The VPERMD and VPERMPS instructions already properly ignore
+	     the high bits of the shuffle elements.  No need for us to
+	     perform an AND ourselves.  */
+	  if (one_operand_shuffle)
+	    {
+	      emit_insn (gen_avx2_permvarv8si (target, op0, mask));
+	      if (target != operands[0])
+		emit_move_insn (operands[0],
+				gen_lowpart (GET_MODE (operands[0]), target));
+	    }
+	  else
+	    {
+	      t1 = gen_reg_rtx (V8SImode);
+	      t2 = gen_reg_rtx (V8SImode);
+	      emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
+	      emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
+	      goto merge_two;
+	    }
+	  return;
+
+	case V8SFmode:
+	  mask = gen_lowpart (V8SImode, mask);
+	  if (one_operand_shuffle)
+	    emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
+	  else
+	    {
+	      t1 = gen_reg_rtx (V8SFmode);
+	      t2 = gen_reg_rtx (V8SFmode);
+	      emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
+	      emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
+	      goto merge_two;
+	    }
+	  return;
+
+        case V4SImode:
+	  /* By combining the two 128-bit input vectors into one 256-bit
+	     input vector, we can use VPERMD and VPERMPS for the full
+	     two-operand shuffle.  */
+	  t1 = gen_reg_rtx (V8SImode);
+	  t2 = gen_reg_rtx (V8SImode);
+	  emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
+	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
+	  emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
+	  emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
+	  return;
+
+        case V4SFmode:
+	  t1 = gen_reg_rtx (V8SFmode);
+	  t2 = gen_reg_rtx (V8SImode);
+	  mask = gen_lowpart (V4SImode, mask);
+	  emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
+	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
+	  emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
+	  emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
+	  return;
+
+	case V32QImode:
+	  t1 = gen_reg_rtx (V32QImode);
+	  t2 = gen_reg_rtx (V32QImode);
+	  t3 = gen_reg_rtx (V32QImode);
+	  vt2 = GEN_INT (128);
+	  for (i = 0; i < 32; i++)
+	    vec[i] = vt2;
+	  vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
+	  vt = force_reg (V32QImode, vt);
+	  for (i = 0; i < 32; i++)
+	    vec[i] = i < 16 ? vt2 : const0_rtx;
+	  vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
+	  vt2 = force_reg (V32QImode, vt2);
+	  /* From mask create two adjusted masks, which contain the same
+	     bits as mask in the low 7 bits of each vector element.
+	     The first mask will have the most significant bit clear
+	     if it requests element from the same 128-bit lane
+	     and MSB set if it requests element from the other 128-bit lane.
+	     The second mask will have the opposite values of the MSB,
+	     and additionally will have its 128-bit lanes swapped.
+	     E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
+	     t1   { 07 92 9e 09 ... | 17 19 85 1f ... } and
+	     t3   { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
+	     stands for other 12 bytes.  */
+	  /* The bit whether element is from the same lane or the other
+	     lane is bit 4, so shift it up by 3 to the MSB position.  */
+	  t5 = gen_reg_rtx (V4DImode);
+	  emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
+				    GEN_INT (3)));
+	  /* Clear MSB bits from the mask just in case it had them set.  */
+	  emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
+	  /* After this t1 will have MSB set for elements from other lane.  */
+	  emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
+	  /* Clear bits other than MSB.  */
+	  emit_insn (gen_andv32qi3 (t1, t1, vt));
+	  /* Or in the lower bits from mask into t3.  */
+	  emit_insn (gen_iorv32qi3 (t3, t1, t2));
+	  /* And invert MSB bits in t1, so MSB is set for elements from the same
+	     lane.  */
+	  emit_insn (gen_xorv32qi3 (t1, t1, vt));
+	  /* Swap 128-bit lanes in t3.  */
+	  t6 = gen_reg_rtx (V4DImode);
+	  emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
+					  const2_rtx, GEN_INT (3),
+					  const0_rtx, const1_rtx));
+	  /* And or in the lower bits from mask into t1.  */
+	  emit_insn (gen_iorv32qi3 (t1, t1, t2));
+	  if (one_operand_shuffle)
+	    {
+	      /* Each of these shuffles will put 0s in places where
+		 element from the other 128-bit lane is needed, otherwise
+		 will shuffle in the requested value.  */
+	      emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
+						gen_lowpart (V32QImode, t6)));
+	      emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
+	      /* For t3 the 128-bit lanes are swapped again.  */
+	      t7 = gen_reg_rtx (V4DImode);
+	      emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
+					      const2_rtx, GEN_INT (3),
+					      const0_rtx, const1_rtx));
+	      /* And oring both together leads to the result.  */
+	      emit_insn (gen_iorv32qi3 (target, t1,
+					gen_lowpart (V32QImode, t7)));
+	      if (target != operands[0])
+		emit_move_insn (operands[0],
+				gen_lowpart (GET_MODE (operands[0]), target));
+	      return;
+	    }
+
+	  t4 = gen_reg_rtx (V32QImode);
+	  /* Similarly to the above one_operand_shuffle code,
+	     just for repeated twice for each operand.  merge_two:
+	     code will merge the two results together.  */
+	  emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
+					    gen_lowpart (V32QImode, t6)));
+	  emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
+					    gen_lowpart (V32QImode, t6)));
+	  emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
+	  emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
+	  t7 = gen_reg_rtx (V4DImode);
+	  emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
+					  const2_rtx, GEN_INT (3),
+					  const0_rtx, const1_rtx));
+	  t8 = gen_reg_rtx (V4DImode);
+	  emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
+					  const2_rtx, GEN_INT (3),
+					  const0_rtx, const1_rtx));
+	  emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
+	  emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
+	  t1 = t4;
+	  t2 = t3;
+	  goto merge_two;
+
+	default:
+	  gcc_assert (GET_MODE_SIZE (mode) <= 16);
+	  break;
+	}
+    }
+
+  if (TARGET_XOP)
+    {
+      /* The XOP VPPERM insn supports three inputs.  By ignoring the 
+	 one_operand_shuffle special case, we avoid creating another
+	 set of constant vectors in memory.  */
+      one_operand_shuffle = false;
+
+      /* mask = mask & {2*w-1, ...} */
+      vt = GEN_INT (2*w - 1);
+    }
+  else
+    {
+      /* mask = mask & {w-1, ...} */
+      vt = GEN_INT (w - 1);
+    }
+
+  for (i = 0; i < w; i++)
+    vec[i] = vt;
+  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+  mask = expand_simple_binop (maskmode, AND, mask, vt,
+			      NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* For non-QImode operations, convert the word permutation control
+     into a byte permutation control.  */
+  if (mode != V16QImode)
+    {
+      mask = expand_simple_binop (maskmode, ASHIFT, mask,
+				  GEN_INT (exact_log2 (e)),
+				  NULL_RTX, 0, OPTAB_DIRECT);
+
+      /* Convert mask to vector of chars.  */
+      mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
+
+      /* Replicate each of the input bytes into byte positions:
+	 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
+	 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
+	 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}.  */
+      for (i = 0; i < 16; ++i)
+	vec[i] = GEN_INT (i/e * e);
+      vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+      vt = validize_mem (force_const_mem (V16QImode, vt));
+      if (TARGET_XOP)
+	emit_insn (gen_xop_pperm (mask, mask, mask, vt));
+      else
+	emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
+
+      /* Convert it into the byte positions by doing
+	 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...}  */
+      for (i = 0; i < 16; ++i)
+	vec[i] = GEN_INT (i % e);
+      vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+      vt = validize_mem (force_const_mem (V16QImode, vt));
+      emit_insn (gen_addv16qi3 (mask, mask, vt));
+    }
+
+  /* The actual shuffle operations all operate on V16QImode.  */
+  op0 = gen_lowpart (V16QImode, op0);
+  op1 = gen_lowpart (V16QImode, op1);
+
+  if (TARGET_XOP)
+    {
+      if (GET_MODE (target) != V16QImode)
+	target = gen_reg_rtx (V16QImode);
+      emit_insn (gen_xop_pperm (target, op0, op1, mask));
+      if (target != operands[0])
+	emit_move_insn (operands[0],
+			gen_lowpart (GET_MODE (operands[0]), target));
+    }
+  else if (one_operand_shuffle)
+    {
+      if (GET_MODE (target) != V16QImode)
+	target = gen_reg_rtx (V16QImode);
+      emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
+      if (target != operands[0])
+	emit_move_insn (operands[0],
+			gen_lowpart (GET_MODE (operands[0]), target));
+    }
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      /* Shuffle the two input vectors independently.  */
+      t1 = gen_reg_rtx (V16QImode);
+      t2 = gen_reg_rtx (V16QImode);
+      emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
+      emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
+
+ merge_two:
+      /* Then merge them together.  The key is whether any given control
+         element contained a bit set that indicates the second word.  */
+      mask = operands[3];
+      vt = GEN_INT (w);
+      if (maskmode == V2DImode && !TARGET_SSE4_1)
+	{
+	  /* Without SSE4.1, we don't have V2DImode EQ.  Perform one
+	     more shuffle to convert the V2DI input mask into a V4SI
+	     input mask.  At which point the masking that expand_int_vcond
+	     will work as desired.  */
+	  rtx t3 = gen_reg_rtx (V4SImode);
+	  emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
+				        const0_rtx, const0_rtx,
+				        const2_rtx, const2_rtx));
+	  mask = t3;
+	  maskmode = V4SImode;
+	  e = w = 4;
+	}
+
+      for (i = 0; i < w; i++)
+	vec[i] = vt;
+      vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+      vt = force_reg (maskmode, vt);
+      mask = expand_simple_binop (maskmode, AND, mask, vt,
+				  NULL_RTX, 0, OPTAB_DIRECT);
+
+      if (GET_MODE (target) != mode)
+	target = gen_reg_rtx (mode);
+      xops[0] = target;
+      xops[1] = gen_lowpart (mode, t2);
+      xops[2] = gen_lowpart (mode, t1);
+      xops[3] = gen_rtx_EQ (maskmode, mask, vt);
+      xops[4] = mask;
+      xops[5] = vt;
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      if (target != operands[0])
+	emit_move_insn (operands[0],
+			gen_lowpart (GET_MODE (operands[0]), target));
+    }
+}
+
+/* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
+   true if we should do zero extension, else sign extension.  HIGH_P is
+   true if we want the N/2 high elements, else the low elements.  */
+
+void
+ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (src);
+  rtx tmp;
+
+  if (TARGET_SSE4_1)
+    {
+      rtx (*unpack)(rtx, rtx);
+      rtx (*extract)(rtx, rtx) = NULL;
+      enum machine_mode halfmode = BLKmode;
+
+      switch (imode)
+	{
+	case V32QImode:
+	  if (unsigned_p)
+	    unpack = gen_avx2_zero_extendv16qiv16hi2;
+	  else
+	    unpack = gen_avx2_sign_extendv16qiv16hi2;
+	  halfmode = V16QImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
+	  break;
+	case V32HImode:
+	  if (unsigned_p)
+	    unpack = gen_avx512f_zero_extendv16hiv16si2;
+	  else
+	    unpack = gen_avx512f_sign_extendv16hiv16si2;
+	  halfmode = V16HImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
+	  break;
+	case V16HImode:
+	  if (unsigned_p)
+	    unpack = gen_avx2_zero_extendv8hiv8si2;
+	  else
+	    unpack = gen_avx2_sign_extendv8hiv8si2;
+	  halfmode = V8HImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
+	  break;
+	case V16SImode:
+	  if (unsigned_p)
+	    unpack = gen_avx512f_zero_extendv8siv8di2;
+	  else
+	    unpack = gen_avx512f_sign_extendv8siv8di2;
+	  halfmode = V8SImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
+	  break;
+	case V8SImode:
+	  if (unsigned_p)
+	    unpack = gen_avx2_zero_extendv4siv4di2;
+	  else
+	    unpack = gen_avx2_sign_extendv4siv4di2;
+	  halfmode = V4SImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
+	  break;
+	case V16QImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+	  break;
+	case V8HImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv4hiv4si2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv4hiv4si2;
+	  break;
+	case V4SImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv2siv2di2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv2siv2di2;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (GET_MODE_SIZE (imode) >= 32)
+	{
+	  tmp = gen_reg_rtx (halfmode);
+	  emit_insn (extract (tmp, src));
+	}
+      else if (high_p)
+	{
+	  /* Shift higher 8 bytes to lower 8 bytes.  */
+	  tmp = gen_reg_rtx (V1TImode);
+	  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
+					 GEN_INT (64)));
+	  tmp = gen_lowpart (imode, tmp);
+	}
+      else
+	tmp = src;
+
+      emit_insn (unpack (dest, tmp));
+    }
+  else
+    {
+      rtx (*unpack)(rtx, rtx, rtx);
+
+      switch (imode)
+	{
+	case V16QImode:
+	  if (high_p)
+	    unpack = gen_vec_interleave_highv16qi;
+	  else
+	    unpack = gen_vec_interleave_lowv16qi;
+	  break;
+	case V8HImode:
+	  if (high_p)
+	    unpack = gen_vec_interleave_highv8hi;
+	  else
+	    unpack = gen_vec_interleave_lowv8hi;
+	  break;
+	case V4SImode:
+	  if (high_p)
+	    unpack = gen_vec_interleave_highv4si;
+	  else
+	    unpack = gen_vec_interleave_lowv4si;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (unsigned_p)
+	tmp = force_reg (imode, CONST0_RTX (imode));
+      else
+	tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+				   src, pc_rtx, pc_rtx);
+
+      rtx tmp2 = gen_reg_rtx (imode);
+      emit_insn (unpack (tmp2, src, tmp));
+      emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
+    }
+}
+
+/* Expand conditional increment or decrement using adb/sbb instructions.
+   The default case using setcc followed by the conditional move can be
+   done by generic code.  */
+bool
+ix86_expand_int_addcc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx flags;
+  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
+  rtx compare_op;
+  rtx val = const0_rtx;
+  bool fpcmp = false;
+  enum machine_mode mode;
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  if (operands[3] != const1_rtx
+      && operands[3] != constm1_rtx)
+    return false;
+  if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
+     return false;
+  code = GET_CODE (compare_op);
+
+  flags = XEXP (compare_op, 0);
+
+  if (GET_MODE (flags) == CCFPmode
+      || GET_MODE (flags) == CCFPUmode)
+    {
+      fpcmp = true;
+      code = ix86_fp_compare_code_to_integer (code);
+    }
+
+  if (code != LTU)
+    {
+      val = constm1_rtx;
+      if (fpcmp)
+	PUT_CODE (compare_op,
+		  reverse_condition_maybe_unordered
+		    (GET_CODE (compare_op)));
+      else
+	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+    }
+
+  mode = GET_MODE (operands[0]);
+
+  /* Construct either adc or sbb insn.  */
+  if ((code == LTU) == (operands[3] == constm1_rtx))
+    {
+      switch (mode)
+	{
+	  case QImode:
+	    insn = gen_subqi3_carry;
+	    break;
+	  case HImode:
+	    insn = gen_subhi3_carry;
+	    break;
+	  case SImode:
+	    insn = gen_subsi3_carry;
+	    break;
+	  case DImode:
+	    insn = gen_subdi3_carry;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (mode)
+	{
+	  case QImode:
+	    insn = gen_addqi3_carry;
+	    break;
+	  case HImode:
+	    insn = gen_addhi3_carry;
+	    break;
+	  case SImode:
+	    insn = gen_addsi3_carry;
+	    break;
+	  case DImode:
+	    insn = gen_adddi3_carry;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+    }
+  emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
+
+  return true;
+}
+
+
+/* Split operands 0 and 1 into half-mode parts.  Similar to split_double_mode,
+   but works for floating pointer parameters and nonoffsetable memories.
+   For pushes, it returns just stack offsets; the values will be saved
+   in the right order.  Maximally three parts are generated.  */
+
+static int
+ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
+{
+  int size;
+
+  if (!TARGET_64BIT)
+    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
+  else
+    size = (GET_MODE_SIZE (mode) + 4) / 8;
+
+  gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
+  gcc_assert (size >= 2 && size <= 4);
+
+  /* Optimize constant pool reference to immediates.  This is used by fp
+     moves, that force all constants to memory to allow combining.  */
+  if (MEM_P (operand) && MEM_READONLY_P (operand))
+    {
+      rtx tmp = maybe_get_pool_constant (operand);
+      if (tmp)
+	operand = tmp;
+    }
+
+  if (MEM_P (operand) && !offsettable_memref_p (operand))
+    {
+      /* The only non-offsetable memories we handle are pushes.  */
+      int ok = push_operand (operand, VOIDmode);
+
+      gcc_assert (ok);
+
+      operand = copy_rtx (operand);
+      PUT_MODE (operand, word_mode);
+      parts[0] = parts[1] = parts[2] = parts[3] = operand;
+      return size;
+    }
+
+  if (GET_CODE (operand) == CONST_VECTOR)
+    {
+      enum machine_mode imode = int_mode_for_mode (mode);
+      /* Caution: if we looked through a constant pool memory above,
+	 the operand may actually have a different mode now.  That's
+	 ok, since we want to pun this all the way back to an integer.  */
+      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
+      gcc_assert (operand != NULL);
+      mode = imode;
+    }
+
+  if (!TARGET_64BIT)
+    {
+      if (mode == DImode)
+	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
+      else
+	{
+	  int i;
+
+	  if (REG_P (operand))
+	    {
+	      gcc_assert (reload_completed);
+	      for (i = 0; i < size; i++)
+		parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
+	    }
+	  else if (offsettable_memref_p (operand))
+	    {
+	      operand = adjust_address (operand, SImode, 0);
+	      parts[0] = operand;
+	      for (i = 1; i < size; i++)
+		parts[i] = adjust_address (operand, SImode, 4 * i);
+	    }
+	  else if (GET_CODE (operand) == CONST_DOUBLE)
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l[4];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+	      switch (mode)
+		{
+		case TFmode:
+		  real_to_target (l, &r, mode);
+		  parts[3] = gen_int_mode (l[3], SImode);
+		  parts[2] = gen_int_mode (l[2], SImode);
+		  break;
+		case XFmode:
+		  /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
+		     long double may not be 80-bit.  */
+		  real_to_target (l, &r, mode);
+		  parts[2] = gen_int_mode (l[2], SImode);
+		  break;
+		case DFmode:
+		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      parts[1] = gen_int_mode (l[1], SImode);
+	      parts[0] = gen_int_mode (l[0], SImode);
+	    }
+	  else
+	    gcc_unreachable ();
+	}
+    }
+  else
+    {
+      if (mode == TImode)
+	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
+      if (mode == XFmode || mode == TFmode)
+	{
+	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
+	  if (REG_P (operand))
+	    {
+	      gcc_assert (reload_completed);
+	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
+	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
+	    }
+	  else if (offsettable_memref_p (operand))
+	    {
+	      operand = adjust_address (operand, DImode, 0);
+	      parts[0] = operand;
+	      parts[1] = adjust_address (operand, upper_mode, 8);
+	    }
+	  else if (GET_CODE (operand) == CONST_DOUBLE)
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l[4];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+	      real_to_target (l, &r, mode);
+
+	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
+	      if (HOST_BITS_PER_WIDE_INT >= 64)
+	        parts[0]
+		  = gen_int_mode
+		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
+		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
+		       DImode);
+	      else
+	        parts[0] = immed_double_const (l[0], l[1], DImode);
+
+	      if (upper_mode == SImode)
+	        parts[1] = gen_int_mode (l[2], SImode);
+	      else if (HOST_BITS_PER_WIDE_INT >= 64)
+	        parts[1]
+		  = gen_int_mode
+		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
+		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
+		       DImode);
+	      else
+	        parts[1] = immed_double_const (l[2], l[3], DImode);
+	    }
+	  else
+	    gcc_unreachable ();
+	}
+    }
+
+  return size;
+}
+
+/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
+   Return false when normal moves are needed; true when all required
+   insns have been emitted.  Operands 2-4 contain the input values
+   int the correct order; operands 5-7 contain the output values.  */
+
+void
+ix86_split_long_move (rtx operands[])
+{
+  rtx part[2][4];
+  int nparts, i, j;
+  int push = 0;
+  int collisions = 0;
+  enum machine_mode mode = GET_MODE (operands[0]);
+  bool collisionparts[4];
+
+  /* The DFmode expanders may ask us to move double.
+     For 64bit target this is single move.  By hiding the fact
+     here we simplify i386.md splitters.  */
+  if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
+    {
+      /* Optimize constant pool reference to immediates.  This is used by
+	 fp moves, that force all constants to memory to allow combining.  */
+
+      if (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
+	operands[1] = get_pool_constant (XEXP (operands[1], 0));
+      if (push_operand (operands[0], VOIDmode))
+	{
+	  operands[0] = copy_rtx (operands[0]);
+	  PUT_MODE (operands[0], word_mode);
+	}
+      else
+        operands[0] = gen_lowpart (DImode, operands[0]);
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      emit_move_insn (operands[0], operands[1]);
+      return;
+    }
+
+  /* The only non-offsettable memory we handle is push.  */
+  if (push_operand (operands[0], VOIDmode))
+    push = 1;
+  else
+    gcc_assert (!MEM_P (operands[0])
+		|| offsettable_memref_p (operands[0]));
+
+  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
+  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
+
+  /* When emitting push, take care for source operands on the stack.  */
+  if (push && MEM_P (operands[1])
+      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+    {
+      rtx src_base = XEXP (part[1][nparts - 1], 0);
+
+      /* Compensate for the stack decrement by 4.  */
+      if (!TARGET_64BIT && nparts == 3
+	  && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
+	src_base = plus_constant (Pmode, src_base, 4);
+
+      /* src_base refers to the stack pointer and is
+	 automatically decreased by emitted push.  */
+      for (i = 0; i < nparts; i++)
+	part[1][i] = change_address (part[1][i],
+				     GET_MODE (part[1][i]), src_base);
+    }
+
+  /* We need to do copy in the right order in case an address register
+     of the source overlaps the destination.  */
+  if (REG_P (part[0][0]) && MEM_P (part[1][0]))
+    {
+      rtx tmp;
+
+      for (i = 0; i < nparts; i++)
+	{
+	  collisionparts[i]
+	    = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
+	  if (collisionparts[i])
+	    collisions++;
+	}
+
+      /* Collision in the middle part can be handled by reordering.  */
+      if (collisions == 1 && nparts == 3 && collisionparts [1])
+	{
+	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+	}
+      else if (collisions == 1
+	       && nparts == 4
+	       && (collisionparts [1] || collisionparts [2]))
+	{
+	  if (collisionparts [1])
+	    {
+	      tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+	      tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+	    }
+	  else
+	    {
+	      tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
+	      tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
+	    }
+	}
+
+      /* If there are more collisions, we can't handle it by reordering.
+	 Do an lea to the last part and use only one colliding move.  */
+      else if (collisions > 1)
+	{
+	  rtx base;
+
+	  collisions = 1;
+
+	  base = part[0][nparts - 1];
+
+	  /* Handle the case when the last part isn't valid for lea.
+	     Happens in 64-bit mode storing the 12-byte XFmode.  */
+	  if (GET_MODE (base) != Pmode)
+	    base = gen_rtx_REG (Pmode, REGNO (base));
+
+	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+	  part[1][0] = replace_equiv_address (part[1][0], base);
+	  for (i = 1; i < nparts; i++)
+	    {
+	      tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
+	      part[1][i] = replace_equiv_address (part[1][i], tmp);
+	    }
+	}
+    }
+
+  if (push)
+    {
+      if (!TARGET_64BIT)
+	{
+	  if (nparts == 3)
+	    {
+	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+                emit_insn (ix86_gen_add3 (stack_pointer_rtx,
+					  stack_pointer_rtx, GEN_INT (-4)));
+	      emit_move_insn (part[0][2], part[1][2]);
+	    }
+	  else if (nparts == 4)
+	    {
+	      emit_move_insn (part[0][3], part[1][3]);
+	      emit_move_insn (part[0][2], part[1][2]);
+	    }
+	}
+      else
+	{
+	  /* In 64bit mode we don't have 32bit push available.  In case this is
+	     register, it is OK - we will just use larger counterpart.  We also
+	     retype memory - these comes from attempt to avoid REX prefix on
+	     moving of second half of TFmode value.  */
+	  if (GET_MODE (part[1][1]) == SImode)
+	    {
+	      switch (GET_CODE (part[1][1]))
+		{
+		case MEM:
+		  part[1][1] = adjust_address (part[1][1], DImode, 0);
+		  break;
+
+		case REG:
+		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
+		  break;
+
+		default:
+		  gcc_unreachable ();
+		}
+
+	      if (GET_MODE (part[1][0]) == SImode)
+		part[1][0] = part[1][1];
+	    }
+	}
+      emit_move_insn (part[0][1], part[1][1]);
+      emit_move_insn (part[0][0], part[1][0]);
+      return;
+    }
+
+  /* Choose correct order to not overwrite the source before it is copied.  */
+  if ((REG_P (part[0][0])
+       && REG_P (part[1][1])
+       && (REGNO (part[0][0]) == REGNO (part[1][1])
+	   || (nparts == 3
+	       && REGNO (part[0][0]) == REGNO (part[1][2]))
+	   || (nparts == 4
+	       && REGNO (part[0][0]) == REGNO (part[1][3]))))
+      || (collisions > 0
+	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
+    {
+      for (i = 0, j = nparts - 1; i < nparts; i++, j--)
+	{
+	  operands[2 + i] = part[0][j];
+	  operands[6 + i] = part[1][j];
+	}
+    }
+  else
+    {
+      for (i = 0; i < nparts; i++)
+	{
+	  operands[2 + i] = part[0][i];
+	  operands[6 + i] = part[1][i];
+	}
+    }
+
+  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
+  if (optimize_insn_for_size_p ())
+    {
+      for (j = 0; j < nparts - 1; j++)
+	if (CONST_INT_P (operands[6 + j])
+	    && operands[6 + j] != const0_rtx
+	    && REG_P (operands[2 + j]))
+	  for (i = j; i < nparts - 1; i++)
+	    if (CONST_INT_P (operands[7 + i])
+		&& INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
+	      operands[7 + i] = operands[2 + j];
+    }
+
+  for (i = 0; i < nparts; i++)
+    emit_move_insn (operands[2 + i], operands[6 + i]);
+
+  return;
+}
+
+/* Helper function of ix86_split_ashl used to generate an SImode/DImode
+   left shift by a constant, either using a single shift or
+   a sequence of add instructions.  */
+
+static void
+ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
+{
+  rtx (*insn)(rtx, rtx, rtx);
+
+  if (count == 1
+      || (count * ix86_cost->add <= ix86_cost->shift_const
+	  && !optimize_insn_for_size_p ()))
+    {
+      insn = mode == DImode ? gen_addsi3 : gen_adddi3;
+      while (count-- > 0)
+	emit_insn (insn (operand, operand, operand));
+    }
+  else
+    {
+      insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
+      emit_insn (insn (operand, operand, GEN_INT (count)));
+    }
+}
+
+void
+ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+  rtx (*gen_ashl3)(rtx, rtx, rtx);
+  rtx (*gen_shld)(rtx, rtx, rtx);
+  int half_width = GET_MODE_BITSIZE (mode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      split_double_mode (mode, operands, 2, low, high);
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
+
+      if (count >= half_width)
+	{
+	  emit_move_insn (high[0], low[1]);
+	  emit_move_insn (low[0], const0_rtx);
+
+	  if (count > half_width)
+	    ix86_expand_ashl_const (high[0], count - half_width, mode);
+	}
+      else
+	{
+	  gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
+
+	  if (!rtx_equal_p (operands[0], operands[1]))
+	    emit_move_insn (operands[0], operands[1]);
+
+	  emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
+	  ix86_expand_ashl_const (low[0], count, mode);
+	}
+      return;
+    }
+
+  split_double_mode (mode, operands, 1, low, high);
+
+  gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
+
+  if (operands[1] == const1_rtx)
+    {
+      /* Assuming we've chosen a QImode capable registers, then 1 << N
+	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
+      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+	{
+	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+	  ix86_expand_clear (low[0]);
+	  ix86_expand_clear (high[0]);
+	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
+
+	  d = gen_lowpart (QImode, low[0]);
+	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
+	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
+
+	  d = gen_lowpart (QImode, high[0]);
+	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+	  s = gen_rtx_NE (QImode, flags, const0_rtx);
+	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
+	}
+
+      /* Otherwise, we can get the same results by manually performing
+	 a bit extract operation on bit 5/6, and then performing the two
+	 shifts.  The two methods of getting 0/1 into low/high are exactly
+	 the same size.  Avoiding the shift in the bit extract case helps
+	 pentium4 a bit; no one else seems to care much either way.  */
+      else
+	{
+	  enum machine_mode half_mode;
+	  rtx (*gen_lshr3)(rtx, rtx, rtx);
+	  rtx (*gen_and3)(rtx, rtx, rtx);
+	  rtx (*gen_xor3)(rtx, rtx, rtx);
+	  HOST_WIDE_INT bits;
+	  rtx x;
+
+	  if (mode == DImode)
+	    {
+	      half_mode = SImode;
+	      gen_lshr3 = gen_lshrsi3;
+	      gen_and3 = gen_andsi3;
+	      gen_xor3 = gen_xorsi3;
+	      bits = 5;
+	    }
+	  else
+	    {
+	      half_mode = DImode;
+	      gen_lshr3 = gen_lshrdi3;
+	      gen_and3 = gen_anddi3;
+	      gen_xor3 = gen_xordi3;
+	      bits = 6;
+	    }
+
+	  if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
+	    x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
+	  else
+	    x = gen_lowpart (half_mode, operands[2]);
+	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+
+	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
+	  emit_insn (gen_and3 (high[0], high[0], const1_rtx));
+	  emit_move_insn (low[0], high[0]);
+	  emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
+	}
+
+      emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
+      emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
+      return;
+    }
+
+  if (operands[1] == constm1_rtx)
+    {
+      /* For -1 << N, we can avoid the shld instruction, because we
+	 know that we're shifting 0...31/63 ones into a -1.  */
+      emit_move_insn (low[0], constm1_rtx);
+      if (optimize_insn_for_size_p ())
+	emit_move_insn (high[0], low[0]);
+      else
+	emit_move_insn (high[0], constm1_rtx);
+    }
+  else
+    {
+      gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
+
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+
+      split_double_mode (mode, operands, 1, low, high);
+      emit_insn (gen_shld (high[0], low[0], operands[2]));
+    }
+
+  emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
+
+  if (TARGET_CMOVE && scratch)
+    {
+      rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
+	= mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
+
+      ix86_expand_clear (scratch);
+      emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
+    }
+  else
+    {
+      rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
+	= mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
+
+      emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
+    }
+}
+
+void
+ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+  rtx (*gen_ashr3)(rtx, rtx, rtx)
+    = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
+  rtx (*gen_shrd)(rtx, rtx, rtx);
+  int half_width = GET_MODE_BITSIZE (mode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      split_double_mode (mode, operands, 2, low, high);
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
+
+      if (count == GET_MODE_BITSIZE (mode) - 1)
+	{
+	  emit_move_insn (high[0], high[1]);
+	  emit_insn (gen_ashr3 (high[0], high[0],
+				GEN_INT (half_width - 1)));
+	  emit_move_insn (low[0], high[0]);
+
+	}
+      else if (count >= half_width)
+	{
+	  emit_move_insn (low[0], high[1]);
+	  emit_move_insn (high[0], low[0]);
+	  emit_insn (gen_ashr3 (high[0], high[0],
+				GEN_INT (half_width - 1)));
+
+	  if (count > half_width)
+	    emit_insn (gen_ashr3 (low[0], low[0],
+				  GEN_INT (count - half_width)));
+	}
+      else
+	{
+	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+	  if (!rtx_equal_p (operands[0], operands[1]))
+	    emit_move_insn (operands[0], operands[1]);
+
+	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
+	  emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
+	}
+    }
+  else
+    {
+      gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+     if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+
+      split_double_mode (mode, operands, 1, low, high);
+
+      emit_insn (gen_shrd (low[0], high[0], operands[2]));
+      emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
+
+      if (TARGET_CMOVE && scratch)
+	{
+	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
+
+	  emit_move_insn (scratch, high[0]);
+	  emit_insn (gen_ashr3 (scratch, scratch,
+				GEN_INT (half_width - 1)));
+	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
+					  scratch));
+	}
+      else
+	{
+	  rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
+
+	  emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
+	}
+    }
+}
+
+void
+ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+  rtx (*gen_lshr3)(rtx, rtx, rtx)
+    = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
+  rtx (*gen_shrd)(rtx, rtx, rtx);
+  int half_width = GET_MODE_BITSIZE (mode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      split_double_mode (mode, operands, 2, low, high);
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
+
+      if (count >= half_width)
+	{
+	  emit_move_insn (low[0], high[1]);
+	  ix86_expand_clear (high[0]);
+
+	  if (count > half_width)
+	    emit_insn (gen_lshr3 (low[0], low[0],
+				  GEN_INT (count - half_width)));
+	}
+      else
+	{
+	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+	  if (!rtx_equal_p (operands[0], operands[1]))
+	    emit_move_insn (operands[0], operands[1]);
+
+	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
+	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
+	}
+    }
+  else
+    {
+      gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+
+      split_double_mode (mode, operands, 1, low, high);
+
+      emit_insn (gen_shrd (low[0], high[0], operands[2]));
+      emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
+
+      if (TARGET_CMOVE && scratch)
+	{
+	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
+
+	  ix86_expand_clear (scratch);
+	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
+					  scratch));
+	}
+      else
+	{
+	  rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
+
+	  emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
+	}
+    }
+}
+
+/* Predict just emitted jump instruction to be taken with probability PROB.  */
+static void
+predict_jump (int prob)
+{
+  rtx insn = get_last_insn ();
+  gcc_assert (JUMP_P (insn));
+  add_int_reg_note (insn, REG_BR_PROB, prob);
+}
+
+/* Helper function for the string operations below.  Dest VARIABLE whether
+   it is aligned to VALUE bytes.  If true, jump to the label.  */
+static rtx
+ix86_expand_aligntest (rtx variable, int value, bool epilogue)
+{
+  rtx label = gen_label_rtx ();
+  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+  if (GET_MODE (variable) == DImode)
+    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+  else
+    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+			   1, label);
+  if (epilogue)
+    predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  else
+    predict_jump (REG_BR_PROB_BASE * 90 / 100);
+  return label;
+}
+
+/* Adjust COUNTER by the VALUE.  */
+static void
+ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
+{
+  rtx (*gen_add)(rtx, rtx, rtx)
+    = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
+
+  emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register.  */
+rtx
+ix86_zero_extend_to_Pmode (rtx exp)
+{
+  return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
+}
+
+/* Divide COUNTREG by SCALE.  */
+static rtx
+scale_counter (rtx countreg, int scale)
+{
+  rtx sc;
+
+  if (scale == 1)
+    return countreg;
+  if (CONST_INT_P (countreg))
+    return GEN_INT (INTVAL (countreg) / scale);
+  gcc_assert (REG_P (countreg));
+
+  sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
+			    GEN_INT (exact_log2 (scale)),
+			    NULL, 1, OPTAB_DIRECT);
+  return sc;
+}
+
+/* Return mode for the memcpy/memset loop counter.  Prefer SImode over
+   DImode for constant loop counts.  */
+
+static enum machine_mode
+counter_mode (rtx count_exp)
+{
+  if (GET_MODE (count_exp) != VOIDmode)
+    return GET_MODE (count_exp);
+  if (!CONST_INT_P (count_exp))
+    return Pmode;
+  if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
+    return DImode;
+  return SImode;
+}
+
+/* Copy the address to a Pmode register.  This is used for x32 to
+   truncate DImode TLS address to a SImode register. */
+
+static rtx
+ix86_copy_addr_to_reg (rtx addr)
+{
+  if (GET_MODE (addr) == Pmode)
+    return copy_addr_to_reg (addr);
+  else
+    {
+      gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
+      return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
+    }
+}
+
+/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
+   to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
+   specified in bytes.  When ISSETMEM is TRUE, output the equivalent loop to set
+   memory by VALUE (supposed to be in MODE).
+
+   The size is rounded down to whole number of chunk size moved at once.
+   SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
+
+
+static void
+expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
+			       rtx destptr, rtx srcptr, rtx value,
+			       rtx count, enum machine_mode mode, int unroll,
+			       int expected_size, bool issetmem)
+{
+  rtx out_label, top_label, iter, tmp;
+  enum machine_mode iter_mode = counter_mode (count);
+  int piece_size_n = GET_MODE_SIZE (mode) * unroll;
+  rtx piece_size = GEN_INT (piece_size_n);
+  rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
+  rtx size;
+  int i;
+
+  top_label = gen_label_rtx ();
+  out_label = gen_label_rtx ();
+  iter = gen_reg_rtx (iter_mode);
+
+  size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
+			      NULL, 1, OPTAB_DIRECT);
+  /* Those two should combine.  */
+  if (piece_size == const1_rtx)
+    {
+      emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
+			       true, out_label);
+      predict_jump (REG_BR_PROB_BASE * 10 / 100);
+    }
+  emit_move_insn (iter, const0_rtx);
+
+  emit_label (top_label);
+
+  tmp = convert_modes (Pmode, iter_mode, iter, true);
+
+  /* This assert could be relaxed - in this case we'll need to compute
+     smallest power of two, containing in PIECE_SIZE_N and pass it to
+     offset_address.  */
+  gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
+  destmem = offset_address (destmem, tmp, piece_size_n);
+  destmem = adjust_address (destmem, mode, 0);
+
+  if (!issetmem)
+    {
+      srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
+      srcmem = adjust_address (srcmem, mode, 0);
+
+      /* When unrolling for chips that reorder memory reads and writes,
+	 we can save registers by using single temporary.
+	 Also using 4 temporaries is overkill in 32bit mode.  */
+      if (!TARGET_64BIT && 0)
+	{
+	  for (i = 0; i < unroll; i++)
+	    {
+	      if (i)
+		{
+		  destmem =
+		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+		  srcmem =
+		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
+		}
+	      emit_move_insn (destmem, srcmem);
+	    }
+	}
+      else
+	{
+	  rtx tmpreg[4];
+	  gcc_assert (unroll <= 4);
+	  for (i = 0; i < unroll; i++)
+	    {
+	      tmpreg[i] = gen_reg_rtx (mode);
+	      if (i)
+		{
+		  srcmem =
+		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
+		}
+	      emit_move_insn (tmpreg[i], srcmem);
+	    }
+	  for (i = 0; i < unroll; i++)
+	    {
+	      if (i)
+		{
+		  destmem =
+		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+		}
+	      emit_move_insn (destmem, tmpreg[i]);
+	    }
+	}
+    }
+  else
+    for (i = 0; i < unroll; i++)
+      {
+	if (i)
+	  destmem =
+	    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+	emit_move_insn (destmem, value);
+      }
+
+  tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
+			     true, OPTAB_LIB_WIDEN);
+  if (tmp != iter)
+    emit_move_insn (iter, tmp);
+
+  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+			   true, top_label);
+  if (expected_size != -1)
+    {
+      expected_size /= GET_MODE_SIZE (mode) * unroll;
+      if (expected_size == 0)
+	predict_jump (0);
+      else if (expected_size > REG_BR_PROB_BASE)
+	predict_jump (REG_BR_PROB_BASE - 1);
+      else
+        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+    }
+  else
+    predict_jump (REG_BR_PROB_BASE * 80 / 100);
+  iter = ix86_zero_extend_to_Pmode (iter);
+  tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
+			     true, OPTAB_LIB_WIDEN);
+  if (tmp != destptr)
+    emit_move_insn (destptr, tmp);
+  if (!issetmem)
+    {
+      tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
+				 true, OPTAB_LIB_WIDEN);
+      if (tmp != srcptr)
+	emit_move_insn (srcptr, tmp);
+    }
+  emit_label (out_label);
+}
+
+/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
+   When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
+   When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
+   For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
+   ORIG_VALUE is the original value passed to memset to fill the memory with.
+   Other arguments have same meaning as for previous function.  */
+
+static void
+expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
+			   rtx destptr, rtx srcptr, rtx value, rtx orig_value,
+			   rtx count,
+			   enum machine_mode mode, bool issetmem)
+{
+  rtx destexp;
+  rtx srcexp;
+  rtx countreg;
+  HOST_WIDE_INT rounded_count;
+
+  /* If possible, it is shorter to use rep movs.
+     TODO: Maybe it is better to move this logic to decide_alg.  */
+  if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
+      && (!issetmem || orig_value == const0_rtx))
+    mode = SImode;
+
+  if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
+    destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
+
+  countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
+						       GET_MODE_SIZE (mode)));
+  if (mode != QImode)
+    {
+      destexp = gen_rtx_ASHIFT (Pmode, countreg,
+				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+      destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
+    }
+  else
+    destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+  if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
+    {
+      rounded_count = (INTVAL (count)
+		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+      destmem = shallow_copy_rtx (destmem);
+      set_mem_size (destmem, rounded_count);
+    }
+  else if (MEM_SIZE_KNOWN_P (destmem))
+    clear_mem_size (destmem);
+
+  if (issetmem)
+    {
+      value = force_reg (mode, gen_lowpart (mode, value));
+      emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
+    }
+  else
+    {
+      if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
+	srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
+      if (mode != QImode)
+	{
+	  srcexp = gen_rtx_ASHIFT (Pmode, countreg,
+				   GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+	  srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
+	}
+      else
+	srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
+      if (CONST_INT_P (count))
+	{
+	  rounded_count = (INTVAL (count)
+			   & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+	  srcmem = shallow_copy_rtx (srcmem);
+	  set_mem_size (srcmem, rounded_count);
+	}
+      else
+	{
+	  if (MEM_SIZE_KNOWN_P (srcmem))
+	    clear_mem_size (srcmem);
+	}
+      emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
+			      destexp, srcexp));
+    }
+}
+
+/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
+   DESTMEM.
+   SRC is passed by pointer to be updated on return.
+   Return value is updated DST.  */
+static rtx
+emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
+	     HOST_WIDE_INT size_to_move)
+{
+  rtx dst = destmem, src = *srcmem, adjust, tempreg;
+  enum insn_code code;
+  enum machine_mode move_mode;
+  int piece_size, i;
+
+  /* Find the widest mode in which we could perform moves.
+     Start with the biggest power of 2 less than SIZE_TO_MOVE and half
+     it until move of such size is supported.  */
+  piece_size = 1 << floor_log2 (size_to_move);
+  move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
+  code = optab_handler (mov_optab, move_mode);
+  while (code == CODE_FOR_nothing && piece_size > 1)
+    {
+      piece_size >>= 1;
+      move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
+      code = optab_handler (mov_optab, move_mode);
+    }
+
+  /* Find the corresponding vector mode with the same size as MOVE_MODE.
+     MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
+  if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
+    {
+      int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
+      move_mode = mode_for_vector (word_mode, nunits);
+      code = optab_handler (mov_optab, move_mode);
+      if (code == CODE_FOR_nothing)
+	{
+	  move_mode = word_mode;
+	  piece_size = GET_MODE_SIZE (move_mode);
+	  code = optab_handler (mov_optab, move_mode);
+	}
+    }
+  gcc_assert (code != CODE_FOR_nothing);
+
+  dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
+  src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
+
+  /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
+  gcc_assert (size_to_move % piece_size == 0);
+  adjust = GEN_INT (piece_size);
+  for (i = 0; i < size_to_move; i += piece_size)
+    {
+      /* We move from memory to memory, so we'll need to do it via
+	 a temporary register.  */
+      tempreg = gen_reg_rtx (move_mode);
+      emit_insn (GEN_FCN (code) (tempreg, src));
+      emit_insn (GEN_FCN (code) (dst, tempreg));
+
+      emit_move_insn (destptr,
+		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
+      emit_move_insn (srcptr,
+		      gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
+
+      dst = adjust_automodify_address_nv (dst, move_mode, destptr,
+					  piece_size);
+      src = adjust_automodify_address_nv (src, move_mode, srcptr,
+					  piece_size);
+    }
+
+  /* Update DST and SRC rtx.  */
+  *srcmem = src;
+  return dst;
+}
+
+/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
+static void
+expand_movmem_epilogue (rtx destmem, rtx srcmem,
+			rtx destptr, rtx srcptr, rtx count, int max_size)
+{
+  rtx src, dest;
+  if (CONST_INT_P (count))
+    {
+      HOST_WIDE_INT countval = INTVAL (count);
+      HOST_WIDE_INT epilogue_size = countval % max_size;
+      int i;
+
+      /* For now MAX_SIZE should be a power of 2.  This assert could be
+	 relaxed, but it'll require a bit more complicated epilogue
+	 expanding.  */
+      gcc_assert ((max_size & (max_size - 1)) == 0);
+      for (i = max_size; i >= 1; i >>= 1)
+	{
+	  if (epilogue_size & i)
+	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
+	}
+      return;
+    }
+  if (max_size > 8)
+    {
+      count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
+				    count, 1, OPTAB_DIRECT);
+      expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
+				     count, QImode, 1, 4, false);
+      return;
+    }
+
+  /* When there are stringops, we can cheaply increase dest and src pointers.
+     Otherwise we save code size by maintaining offset (zero is readily
+     available from preceding rep operation) and using x86 addressing modes.
+   */
+  if (TARGET_SINGLE_STRINGOP)
+    {
+      if (max_size > 4)
+	{
+	  rtx label = ix86_expand_aligntest (count, 4, true);
+	  src = change_address (srcmem, SImode, srcptr);
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 2)
+	{
+	  rtx label = ix86_expand_aligntest (count, 2, true);
+	  src = change_address (srcmem, HImode, srcptr);
+	  dest = change_address (destmem, HImode, destptr);
+	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 1)
+	{
+	  rtx label = ix86_expand_aligntest (count, 1, true);
+	  src = change_address (srcmem, QImode, srcptr);
+	  dest = change_address (destmem, QImode, destptr);
+	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+    }
+  else
+    {
+      rtx offset = force_reg (Pmode, const0_rtx);
+      rtx tmp;
+
+      if (max_size > 4)
+	{
+	  rtx label = ix86_expand_aligntest (count, 4, true);
+	  src = change_address (srcmem, SImode, srcptr);
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_move_insn (dest, src);
+	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
+				     true, OPTAB_LIB_WIDEN);
+	  if (tmp != offset)
+	    emit_move_insn (offset, tmp);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 2)
+	{
+	  rtx label = ix86_expand_aligntest (count, 2, true);
+	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
+	  src = change_address (srcmem, HImode, tmp);
+	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
+	  dest = change_address (destmem, HImode, tmp);
+	  emit_move_insn (dest, src);
+	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
+				     true, OPTAB_LIB_WIDEN);
+	  if (tmp != offset)
+	    emit_move_insn (offset, tmp);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 1)
+	{
+	  rtx label = ix86_expand_aligntest (count, 1, true);
+	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
+	  src = change_address (srcmem, QImode, tmp);
+	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
+	  dest = change_address (destmem, QImode, tmp);
+	  emit_move_insn (dest, src);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+    }
+}
+
+/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
+   with value PROMOTED_VAL.
+   SRC is passed by pointer to be updated on return.
+   Return value is updated DST.  */
+static rtx
+emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
+	     HOST_WIDE_INT size_to_move)
+{
+  rtx dst = destmem, adjust;
+  enum insn_code code;
+  enum machine_mode move_mode;
+  int piece_size, i;
+
+  /* Find the widest mode in which we could perform moves.
+     Start with the biggest power of 2 less than SIZE_TO_MOVE and half
+     it until move of such size is supported.  */
+  move_mode = GET_MODE (promoted_val);
+  if (move_mode == VOIDmode)
+    move_mode = QImode;
+  if (size_to_move < GET_MODE_SIZE (move_mode))
+    {
+      move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
+      promoted_val = gen_lowpart (move_mode, promoted_val);
+    }
+  piece_size = GET_MODE_SIZE (move_mode);
+  code = optab_handler (mov_optab, move_mode);
+  gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
+
+  dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
+
+  /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
+  gcc_assert (size_to_move % piece_size == 0);
+  adjust = GEN_INT (piece_size);
+  for (i = 0; i < size_to_move; i += piece_size)
+    {
+      if (piece_size <= GET_MODE_SIZE (word_mode))
+	{
+	  emit_insn (gen_strset (destptr, dst, promoted_val));
+	  dst = adjust_automodify_address_nv (dst, move_mode, destptr,
+					      piece_size);
+	  continue;
+	}
+
+      emit_insn (GEN_FCN (code) (dst, promoted_val));
+
+      emit_move_insn (destptr,
+		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
+
+      dst = adjust_automodify_address_nv (dst, move_mode, destptr,
+					  piece_size);
+    }
+
+  /* Update DST rtx.  */
+  return dst;
+}
+/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
+static void
+expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
+				 rtx count, int max_size)
+{
+  count =
+    expand_simple_binop (counter_mode (count), AND, count,
+			 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
+  expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
+				 gen_lowpart (QImode, value), count, QImode,
+				 1, max_size / 2, true);
+}
+
+/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
+static void
+expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
+			rtx count, int max_size)
+{
+  rtx dest;
+
+  if (CONST_INT_P (count))
+    {
+      HOST_WIDE_INT countval = INTVAL (count);
+      HOST_WIDE_INT epilogue_size = countval % max_size;
+      int i;
+
+      /* For now MAX_SIZE should be a power of 2.  This assert could be
+	 relaxed, but it'll require a bit more complicated epilogue
+	 expanding.  */
+      gcc_assert ((max_size & (max_size - 1)) == 0);
+      for (i = max_size; i >= 1; i >>= 1)
+	{
+	  if (epilogue_size & i)
+	    {
+	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
+		destmem = emit_memset (destmem, destptr, vec_value, i);
+	      else
+		destmem = emit_memset (destmem, destptr, value, i);
+	    }
+	}
+      return;
+    }
+  if (max_size > 32)
+    {
+      expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
+      return;
+    }
+  if (max_size > 16)
+    {
+      rtx label = ix86_expand_aligntest (count, 16, true);
+      if (TARGET_64BIT)
+	{
+	  dest = change_address (destmem, DImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      else
+	{
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 8)
+    {
+      rtx label = ix86_expand_aligntest (count, 8, true);
+      if (TARGET_64BIT)
+	{
+	  dest = change_address (destmem, DImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      else
+	{
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 4)
+    {
+      rtx label = ix86_expand_aligntest (count, 4, true);
+      dest = change_address (destmem, SImode, destptr);
+      emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 2)
+    {
+      rtx label = ix86_expand_aligntest (count, 2, true);
+      dest = change_address (destmem, HImode, destptr);
+      emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 1)
+    {
+      rtx label = ix86_expand_aligntest (count, 1, true);
+      dest = change_address (destmem, QImode, destptr);
+      emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+}
+
+/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
+   DESTMEM to align it to DESIRED_ALIGNMENT.  Original alignment is ALIGN.
+   Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
+   ignored.
+   Return value is updated DESTMEM.  */
+static rtx
+expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
+				  rtx destptr, rtx srcptr, rtx value,
+				  rtx vec_value, rtx count, int align,
+				  int desired_alignment, bool issetmem)
+{
+  int i;
+  for (i = 1; i < desired_alignment; i <<= 1)
+    {
+      if (align <= i)
+	{
+	  rtx label = ix86_expand_aligntest (destptr, i, false);
+	  if (issetmem)
+	    {
+	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
+		destmem = emit_memset (destmem, destptr, vec_value, i);
+	      else
+		destmem = emit_memset (destmem, destptr, value, i);
+	    }
+	  else
+	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
+	  ix86_adjust_counter (count, i);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	  set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
+	}
+    }
+  return destmem;
+}
+
+/* Test if COUNT&SIZE is nonzero and if so, expand movme
+   or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
+   and jump to DONE_LABEL.  */
+static void
+expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
+			       rtx destptr, rtx srcptr,
+			       rtx value, rtx vec_value,
+			       rtx count, int size,
+			       rtx done_label, bool issetmem)
+{
+  rtx label = ix86_expand_aligntest (count, size, false);
+  enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
+  rtx modesize;
+  int n;
+
+  /* If we do not have vector value to copy, we must reduce size.  */
+  if (issetmem)
+    {
+      if (!vec_value)
+	{
+	  if (GET_MODE (value) == VOIDmode && size > 8)
+	    mode = Pmode;
+	  else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
+	    mode = GET_MODE (value);
+	}
+      else
+	mode = GET_MODE (vec_value), value = vec_value;
+    }
+  else
+    {
+      /* Choose appropriate vector mode.  */
+      if (size >= 32)
+	mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
+      else if (size >= 16)
+	mode = TARGET_SSE ? V16QImode : DImode;
+      srcmem = change_address (srcmem, mode, srcptr);
+    }
+  destmem = change_address (destmem, mode, destptr);
+  modesize = GEN_INT (GET_MODE_SIZE (mode));
+  gcc_assert (GET_MODE_SIZE (mode) <= size);
+  for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
+    {
+      if (issetmem)
+	emit_move_insn (destmem, gen_lowpart (mode, value));
+      else
+	{
+          emit_move_insn (destmem, srcmem);
+          srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
+	}
+      destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
+    }
+
+  destmem = offset_address (destmem, count, 1);
+  destmem = offset_address (destmem, GEN_INT (-2 * size),
+			    GET_MODE_SIZE (mode));
+  if (!issetmem)
+    {
+      srcmem = offset_address (srcmem, count, 1);
+      srcmem = offset_address (srcmem, GEN_INT (-2 * size),
+			       GET_MODE_SIZE (mode));
+    }
+  for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
+    {
+      if (issetmem)
+	emit_move_insn (destmem, gen_lowpart (mode, value));
+      else
+	{
+	  emit_move_insn (destmem, srcmem);
+	  srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
+	}
+      destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
+    }
+  emit_jump_insn (gen_jump (done_label));
+  emit_barrier ();
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+}
+
+/* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
+   and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
+   bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
+   proceed with an loop copying SIZE bytes at once. Do moves in MODE.
+   DONE_LABEL is a label after the whole copying sequence. The label is created
+   on demand if *DONE_LABEL is NULL.
+   MIN_SIZE is minimal size of block copied.  This value gets adjusted for new
+   bounds after the initial copies. 
+
+   DESTMEM/SRCMEM are memory expressions pointing to the copies block,
+   DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
+   we will dispatch to a library call for large blocks.
+
+   In pseudocode we do:
+
+   if (COUNT < SIZE)
+     {
+       Assume that SIZE is 4. Bigger sizes are handled analogously
+       if (COUNT & 4)
+	 {
+	    copy 4 bytes from SRCPTR to DESTPTR
+	    copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
+	    goto done_label
+	 }
+       if (!COUNT)
+	 goto done_label;
+       copy 1 byte from SRCPTR to DESTPTR
+       if (COUNT & 2)
+	 {
+	    copy 2 bytes from SRCPTR to DESTPTR
+	    copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
+	 }
+     }
+   else
+     {
+       copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
+       copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
+
+       OLD_DESPTR = DESTPTR;
+       Align DESTPTR up to DESIRED_ALIGN
+       SRCPTR += DESTPTR - OLD_DESTPTR
+       COUNT -= DEST_PTR - OLD_DESTPTR
+       if (DYNAMIC_CHECK)
+	 Round COUNT down to multiple of SIZE
+       << optional caller supplied zero size guard is here >>
+       << optional caller suppplied dynamic check is here >>
+       << caller supplied main copy loop is here >>
+     }
+   done_label:
+  */
+static void
+expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
+							    rtx *destptr, rtx *srcptr,
+							    enum machine_mode mode,
+							    rtx value, rtx vec_value,
+							    rtx *count,
+							    rtx *done_label,
+							    int size,
+							    int desired_align,
+							    int align,
+							    unsigned HOST_WIDE_INT *min_size,
+							    bool dynamic_check,
+							    bool issetmem)
+{
+  rtx loop_label = NULL, label;
+  int n;
+  rtx modesize;
+  int prolog_size = 0;
+  rtx mode_value;
+
+  /* Chose proper value to copy.  */
+  if (issetmem && VECTOR_MODE_P (mode))
+    mode_value = vec_value;
+  else
+    mode_value = value;
+  gcc_assert (GET_MODE_SIZE (mode) <= size);
+
+  /* See if block is big or small, handle small blocks.  */
+  if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
+    {
+      int size2 = size;
+      loop_label = gen_label_rtx ();
+
+      if (!*done_label)
+	*done_label = gen_label_rtx ();
+
+      emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
+			       1, loop_label);
+      size2 >>= 1;
+
+      /* Handle sizes > 3.  */
+      for (;size2 > 2; size2 >>= 1)
+	expand_small_movmem_or_setmem (destmem, srcmem,
+				       *destptr, *srcptr,
+				       value, vec_value,
+				       *count,
+				       size2, *done_label, issetmem);
+      /* Nothing to copy?  Jump to DONE_LABEL if so */
+      emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
+			       1, *done_label);
+
+      /* Do a byte copy.  */
+      destmem = change_address (destmem, QImode, *destptr);
+      if (issetmem)
+	emit_move_insn (destmem, gen_lowpart (QImode, value));
+      else
+	{
+          srcmem = change_address (srcmem, QImode, *srcptr);
+          emit_move_insn (destmem, srcmem);
+	}
+
+      /* Handle sizes 2 and 3.  */
+      label = ix86_expand_aligntest (*count, 2, false);
+      destmem = change_address (destmem, HImode, *destptr);
+      destmem = offset_address (destmem, *count, 1);
+      destmem = offset_address (destmem, GEN_INT (-2), 2);
+      if (issetmem)
+        emit_move_insn (destmem, gen_lowpart (HImode, value));
+      else
+	{
+	  srcmem = change_address (srcmem, HImode, *srcptr);
+	  srcmem = offset_address (srcmem, *count, 1);
+	  srcmem = offset_address (srcmem, GEN_INT (-2), 2);
+	  emit_move_insn (destmem, srcmem);
+	}
+
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+      emit_jump_insn (gen_jump (*done_label));
+      emit_barrier ();
+    }
+  else
+    gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
+		|| UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
+
+  /* Start memcpy for COUNT >= SIZE.  */
+  if (loop_label)
+    {
+       emit_label (loop_label);
+       LABEL_NUSES (loop_label) = 1;
+    }
+
+  /* Copy first desired_align bytes.  */
+  if (!issetmem)
+    srcmem = change_address (srcmem, mode, *srcptr);
+  destmem = change_address (destmem, mode, *destptr);
+  modesize = GEN_INT (GET_MODE_SIZE (mode));
+  for (n = 0; prolog_size < desired_align - align; n++)
+    {
+      if (issetmem)
+        emit_move_insn (destmem, mode_value);
+      else
+	{
+          emit_move_insn (destmem, srcmem);
+          srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
+	}
+      destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
+      prolog_size += GET_MODE_SIZE (mode);
+    }
+
+
+  /* Copy last SIZE bytes.  */
+  destmem = offset_address (destmem, *count, 1);
+  destmem = offset_address (destmem,
+			    GEN_INT (-size - prolog_size),
+			    1);
+  if (issetmem)
+    emit_move_insn (destmem, mode_value);
+  else
+    {
+      srcmem = offset_address (srcmem, *count, 1);
+      srcmem = offset_address (srcmem,
+			       GEN_INT (-size - prolog_size),
+			       1);
+      emit_move_insn (destmem, srcmem);
+    }
+  for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
+    {
+      destmem = offset_address (destmem, modesize, 1);
+      if (issetmem)
+	emit_move_insn (destmem, mode_value);
+      else
+	{
+          srcmem = offset_address (srcmem, modesize, 1);
+          emit_move_insn (destmem, srcmem);
+	}
+    }
+
+  /* Align destination.  */
+  if (desired_align > 1 && desired_align > align)
+    {
+      rtx saveddest = *destptr;
+
+      gcc_assert (desired_align <= size);
+      /* Align destptr up, place it to new register.  */
+      *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
+				      GEN_INT (prolog_size),
+				      NULL_RTX, 1, OPTAB_DIRECT);
+      *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
+				      GEN_INT (-desired_align),
+				      *destptr, 1, OPTAB_DIRECT);
+      /* See how many bytes we skipped.  */
+      saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
+				       *destptr,
+				       saveddest, 1, OPTAB_DIRECT);
+      /* Adjust srcptr and count.  */
+      if (!issetmem)
+	*srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
+					*srcptr, 1, OPTAB_DIRECT);
+      *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
+				    saveddest, *count, 1, OPTAB_DIRECT);
+      /* We copied at most size + prolog_size.  */
+      if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
+	*min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
+      else
+	*min_size = 0;
+
+      /* Our loops always round down the bock size, but for dispatch to library
+	 we need precise value.  */
+      if (dynamic_check)
+	*count = expand_simple_binop (GET_MODE (*count), AND, *count,
+				      GEN_INT (-size), *count, 1, OPTAB_DIRECT);
+    }
+  else
+    {
+      gcc_assert (prolog_size == 0);
+      /* Decrease count, so we won't end up copying last word twice.  */
+      if (!CONST_INT_P (*count))
+	*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
+				      constm1_rtx, *count, 1, OPTAB_DIRECT);
+      else
+	*count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
+      if (*min_size)
+	*min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
+    }
+}
+
+
+/* This function is like the previous one, except here we know how many bytes
+   need to be copied.  That allows us to update alignment not only of DST, which
+   is returned, but also of SRC, which is passed as a pointer for that
+   reason.  */
+static rtx
+expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
+					   rtx srcreg, rtx value, rtx vec_value,
+					   int desired_align, int align_bytes,
+					   bool issetmem)
+{
+  rtx src = NULL;
+  rtx orig_dst = dst;
+  rtx orig_src = NULL;
+  int piece_size = 1;
+  int copied_bytes = 0;
+
+  if (!issetmem)
+    {
+      gcc_assert (srcp != NULL);
+      src = *srcp;
+      orig_src = src;
+    }
+
+  for (piece_size = 1;
+       piece_size <= desired_align && copied_bytes < align_bytes;
+       piece_size <<= 1)
+    {
+      if (align_bytes & piece_size)
+	{
+	  if (issetmem)
+	    {
+	      if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
+		dst = emit_memset (dst, destreg, vec_value, piece_size);
+	      else
+		dst = emit_memset (dst, destreg, value, piece_size);
+	    }
+	  else
+	    dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
+	  copied_bytes += piece_size;
+	}
+    }
+  if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+    set_mem_align (dst, desired_align * BITS_PER_UNIT);
+  if (MEM_SIZE_KNOWN_P (orig_dst))
+    set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
+
+  if (!issetmem)
+    {
+      int src_align_bytes = get_mem_align_offset (src, desired_align
+						       * BITS_PER_UNIT);
+      if (src_align_bytes >= 0)
+	src_align_bytes = desired_align - src_align_bytes;
+      if (src_align_bytes >= 0)
+	{
+	  unsigned int src_align;
+	  for (src_align = desired_align; src_align >= 2; src_align >>= 1)
+	    {
+	      if ((src_align_bytes & (src_align - 1))
+		   == (align_bytes & (src_align - 1)))
+		break;
+	    }
+	  if (src_align > (unsigned int) desired_align)
+	    src_align = desired_align;
+	  if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+	    set_mem_align (src, src_align * BITS_PER_UNIT);
+	}
+      if (MEM_SIZE_KNOWN_P (orig_src))
+	set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
+      *srcp = src;
+    }
+
+  return dst;
+}
+
+/* Return true if ALG can be used in current context.  
+   Assume we expand memset if MEMSET is true.  */
+static bool
+alg_usable_p (enum stringop_alg alg, bool memset)
+{
+  if (alg == no_stringop)
+    return false;
+  if (alg == vector_loop)
+    return TARGET_SSE || TARGET_AVX;
+  /* Algorithms using the rep prefix want at least edi and ecx;
+     additionally, memset wants eax and memcpy wants esi.  Don't
+     consider such algorithms if the user has appropriated those
+     registers for their own purposes.	*/
+  if (alg == rep_prefix_1_byte
+      || alg == rep_prefix_4_byte
+      || alg == rep_prefix_8_byte)
+    return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
+             || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
+  return true;
+}
+
+/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
+static enum stringop_alg
+decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
+	    unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
+	    bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
+{
+  const struct stringop_algs * algs;
+  bool optimize_for_speed;
+  int max = -1;
+  const struct processor_costs *cost;
+  int i;
+  bool any_alg_usable_p = false;
+
+  *noalign = false;
+  *dynamic_check = -1;
+
+  /* Even if the string operation call is cold, we still might spend a lot
+     of time processing large blocks.  */
+  if (optimize_function_for_size_p (cfun)
+      || (optimize_insn_for_size_p ()
+ 	  && (max_size < 256
+              || (expected_size != -1 && expected_size < 256))))
+    optimize_for_speed = false;
+  else
+    optimize_for_speed = true;
+
+  cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
+  if (memset)
+    algs = &cost->memset[TARGET_64BIT != 0];
+  else
+    algs = &cost->memcpy[TARGET_64BIT != 0];
+
+  /* See maximal size for user defined algorithm.  */
+  for (i = 0; i < MAX_STRINGOP_ALGS; i++)
+    {
+      enum stringop_alg candidate = algs->size[i].alg;
+      bool usable = alg_usable_p (candidate, memset);
+      any_alg_usable_p |= usable;
+
+      if (candidate != libcall && candidate && usable)
+	  max = algs->size[i].max;
+    }
+
+  /* If expected size is not known but max size is small enough
+     so inline version is a win, set expected size into
+     the range.  */
+  if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+      && expected_size == -1)
+    expected_size = min_size / 2 + max_size / 2;
+
+  /* If user specified the algorithm, honnor it if possible.  */
+  if (ix86_stringop_alg != no_stringop
+      && alg_usable_p (ix86_stringop_alg, memset))
+    return ix86_stringop_alg;
+  /* rep; movq or rep; movl is the smallest variant.  */
+  else if (!optimize_for_speed)
+    {
+      *noalign = true;
+      if (!count || (count & 3) || (memset && !zero_memset))
+	return alg_usable_p (rep_prefix_1_byte, memset)
+	       ? rep_prefix_1_byte : loop_1_byte;
+      else
+	return alg_usable_p (rep_prefix_4_byte, memset)
+	       ? rep_prefix_4_byte : loop;
+    }
+  /* Very tiny blocks are best handled via the loop, REP is expensive to
+     setup.  */
+  else if (expected_size != -1 && expected_size < 4)
+    return loop_1_byte;
+  else if (expected_size != -1)
+    {
+      enum stringop_alg alg = libcall;
+      bool alg_noalign = false;
+      for (i = 0; i < MAX_STRINGOP_ALGS; i++)
+	{
+	  /* We get here if the algorithms that were not libcall-based
+	     were rep-prefix based and we are unable to use rep prefixes
+	     based on global register usage.  Break out of the loop and
+	     use the heuristic below.  */
+	  if (algs->size[i].max == 0)
+	    break;
+	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
+	    {
+	      enum stringop_alg candidate = algs->size[i].alg;
+
+	      if (candidate != libcall && alg_usable_p (candidate, memset))
+		{
+		  alg = candidate;
+		  alg_noalign = algs->size[i].noalign;
+		}
+	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
+		 last non-libcall inline algorithm.  */
+	      if (TARGET_INLINE_ALL_STRINGOPS)
+		{
+		  /* When the current size is best to be copied by a libcall,
+		     but we are still forced to inline, run the heuristic below
+		     that will pick code for medium sized blocks.  */
+		  if (alg != libcall)
+		    {
+		      *noalign = alg_noalign;
+		      return alg;
+		    }
+		  break;
+		}
+	      else if (alg_usable_p (candidate, memset))
+		{
+		  *noalign = algs->size[i].noalign;
+		  return candidate;
+		}
+	    }
+	}
+    }
+  /* When asked to inline the call anyway, try to pick meaningful choice.
+     We look for maximal size of block that is faster to copy by hand and
+     take blocks of at most of that size guessing that average size will
+     be roughly half of the block.
+
+     If this turns out to be bad, we might simply specify the preferred
+     choice in ix86_costs.  */
+  if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+      && (algs->unknown_size == libcall
+	  || !alg_usable_p (algs->unknown_size, memset)))
+    {
+      enum stringop_alg alg;
+
+      /* If there aren't any usable algorithms, then recursing on
+         smaller sizes isn't going to find anything.  Just return the
+         simple byte-at-a-time copy loop.  */
+      if (!any_alg_usable_p)
+        {
+          /* Pick something reasonable.  */
+          if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+            *dynamic_check = 128;
+          return loop_1_byte;
+        }
+      if (max == -1)
+	max = 4096;
+      alg = decide_alg (count, max / 2, min_size, max_size, memset,
+			zero_memset, dynamic_check, noalign);
+      gcc_assert (*dynamic_check == -1);
+      gcc_assert (alg != libcall);
+      if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+	*dynamic_check = max;
+      return alg;
+    }
+  return (alg_usable_p (algs->unknown_size, memset)
+	  ? algs->unknown_size : libcall);
+}
+
+/* Decide on alignment.  We know that the operand is already aligned to ALIGN
+   (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
+static int
+decide_alignment (int align,
+		  enum stringop_alg alg,
+		  int expected_size,
+		  enum machine_mode move_mode)
+{
+  int desired_align = 0;
+
+  gcc_assert (alg != no_stringop);
+
+  if (alg == libcall)
+    return 0;
+  if (move_mode == VOIDmode)
+    return 0;
+
+  desired_align = GET_MODE_SIZE (move_mode);
+  /* PentiumPro has special logic triggering for 8 byte aligned blocks.
+     copying whole cacheline at once.  */
+  if (TARGET_PENTIUMPRO
+      && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
+    desired_align = 8;
+
+  if (optimize_size)
+    desired_align = 1;
+  if (desired_align < align)
+    desired_align = align;
+  if (expected_size != -1 && expected_size < 4)
+    desired_align = align;
+
+  return desired_align;
+}
+
+
+/* Helper function for memcpy.  For QImode value 0xXY produce
+   0xXYXYXYXY of wide specified by MODE.  This is essentially
+   a * 0x10101010, but we can do slightly better than
+   synth_mult by unwinding the sequence by hand on CPUs with
+   slow multiply.  */
+static rtx
+promote_duplicated_reg (enum machine_mode mode, rtx val)
+{
+  enum machine_mode valmode = GET_MODE (val);
+  rtx tmp;
+  int nops = mode == DImode ? 3 : 2;
+
+  gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
+  if (val == const0_rtx)
+    return copy_to_mode_reg (mode, CONST0_RTX (mode));
+  if (CONST_INT_P (val))
+    {
+      HOST_WIDE_INT v = INTVAL (val) & 255;
+
+      v |= v << 8;
+      v |= v << 16;
+      if (mode == DImode)
+        v |= (v << 16) << 16;
+      return copy_to_mode_reg (mode, gen_int_mode (v, mode));
+    }
+
+  if (valmode == VOIDmode)
+    valmode = QImode;
+  if (valmode != QImode)
+    val = gen_lowpart (QImode, val);
+  if (mode == QImode)
+    return val;
+  if (!TARGET_PARTIAL_REG_STALL)
+    nops--;
+  if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
+      + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
+      <= (ix86_cost->shift_const + ix86_cost->add) * nops
+          + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
+    {
+      rtx reg = convert_modes (mode, QImode, val, true);
+      tmp = promote_duplicated_reg (mode, const1_rtx);
+      return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
+				  OPTAB_DIRECT);
+    }
+  else
+    {
+      rtx reg = convert_modes (mode, QImode, val, true);
+
+      if (!TARGET_PARTIAL_REG_STALL)
+	if (mode == SImode)
+	  emit_insn (gen_movsi_insv_1 (reg, reg));
+	else
+	  emit_insn (gen_movdi_insv_1 (reg, reg));
+      else
+	{
+	  tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
+				     NULL, 1, OPTAB_DIRECT);
+	  reg =
+	    expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+	}
+      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
+			         NULL, 1, OPTAB_DIRECT);
+      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+      if (mode == SImode)
+	return reg;
+      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
+				 NULL, 1, OPTAB_DIRECT);
+      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+      return reg;
+    }
+}
+
+/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
+   be needed by main loop copying SIZE_NEEDED chunks and prologue getting
+   alignment from ALIGN to DESIRED_ALIGN.  */
+static rtx
+promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
+				int align)
+{
+  rtx promoted_val;
+
+  if (TARGET_64BIT
+      && (size_needed > 4 || (desired_align > align && desired_align > 4)))
+    promoted_val = promote_duplicated_reg (DImode, val);
+  else if (size_needed > 2 || (desired_align > align && desired_align > 2))
+    promoted_val = promote_duplicated_reg (SImode, val);
+  else if (size_needed > 1 || (desired_align > align && desired_align > 1))
+    promoted_val = promote_duplicated_reg (HImode, val);
+  else
+    promoted_val = val;
+
+  return promoted_val;
+}
+
+/* Expand string move (memcpy) ot store (memset) operation.  Use i386 string
+   operations when profitable.  The code depends upon architecture, block size
+   and alignment, but always has one of the following overall structures:
+
+   Aligned move sequence:
+
+     1) Prologue guard: Conditional that jumps up to epilogues for small
+	blocks that can be handled by epilogue alone.  This is faster
+	but also needed for correctness, since prologue assume the block
+	is larger than the desired alignment.
+
+	Optional dynamic check for size and libcall for large
+	blocks is emitted here too, with -minline-stringops-dynamically.
+
+     2) Prologue: copy first few bytes in order to get destination
+	aligned to DESIRED_ALIGN.  It is emitted only when ALIGN is less
+	than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
+	copied.  We emit either a jump tree on power of two sized
+	blocks, or a byte loop.
+
+     3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
+	with specified algorithm.
+
+     4) Epilogue: code copying tail of the block that is too small to be
+	handled by main body (or up to size guarded by prologue guard). 
+
+  Misaligned move sequence
+
+     1) missaligned move prologue/epilogue containing:
+        a) Prologue handling small memory blocks and jumping to done_label
+	   (skipped if blocks are known to be large enough)
+	b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
+           needed by single possibly misaligned move
+	   (skipped if alignment is not needed)
+        c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
+
+     2) Zero size guard dispatching to done_label, if needed
+
+     3) dispatch to library call, if needed,
+
+     3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
+	with specified algorithm.  */
+bool
+ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
+			   rtx align_exp, rtx expected_align_exp,
+			   rtx expected_size_exp, rtx min_size_exp,
+			   rtx max_size_exp, rtx probable_max_size_exp,
+			   bool issetmem)
+{
+  rtx destreg;
+  rtx srcreg = NULL;
+  rtx label = NULL;
+  rtx tmp;
+  rtx jump_around_label = NULL;
+  HOST_WIDE_INT align = 1;
+  unsigned HOST_WIDE_INT count = 0;
+  HOST_WIDE_INT expected_size = -1;
+  int size_needed = 0, epilogue_size_needed;
+  int desired_align = 0, align_bytes = 0;
+  enum stringop_alg alg;
+  rtx promoted_val = NULL;
+  rtx vec_promoted_val = NULL;
+  bool force_loopy_epilogue = false;
+  int dynamic_check;
+  bool need_zero_guard = false;
+  bool noalign;
+  enum machine_mode move_mode = VOIDmode;
+  int unroll_factor = 1;
+  /* TODO: Once value ranges are available, fill in proper data.  */
+  unsigned HOST_WIDE_INT min_size = 0;
+  unsigned HOST_WIDE_INT max_size = -1;
+  unsigned HOST_WIDE_INT probable_max_size = -1;
+  bool misaligned_prologue_used = false;
+
+  if (CONST_INT_P (align_exp))
+    align = INTVAL (align_exp);
+  /* i386 can do misaligned access on reasonably increased cost.  */
+  if (CONST_INT_P (expected_align_exp)
+      && INTVAL (expected_align_exp) > align)
+    align = INTVAL (expected_align_exp);
+  /* ALIGN is the minimum of destination and source alignment, but we care here
+     just about destination alignment.  */
+  else if (!issetmem
+	   && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
+    align = MEM_ALIGN (dst) / BITS_PER_UNIT;
+
+  if (CONST_INT_P (count_exp))
+    min_size = max_size = probable_max_size = count = expected_size
+      = INTVAL (count_exp);
+  else
+    {
+      if (min_size_exp)
+	min_size = INTVAL (min_size_exp);
+      if (max_size_exp)
+	max_size = INTVAL (max_size_exp);
+      if (probable_max_size_exp)
+	probable_max_size = INTVAL (probable_max_size_exp);
+      if (CONST_INT_P (expected_size_exp) && count == 0)
+	expected_size = INTVAL (expected_size_exp);
+     }
+
+  /* Make sure we don't need to care about overflow later on.  */
+  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+    return false;
+
+  /* Step 0: Decide on preferred algorithm, desired alignment and
+     size of chunks to be copied by main loop.  */
+  alg = decide_alg (count, expected_size, min_size, probable_max_size,
+		    issetmem,
+		    issetmem && val_exp == const0_rtx,
+		    &dynamic_check, &noalign);
+  if (alg == libcall)
+    return false;
+  gcc_assert (alg != no_stringop);
+
+  /* For now vector-version of memset is generated only for memory zeroing, as
+     creating of promoted vector value is very cheap in this case.  */
+  if (issetmem && alg == vector_loop && val_exp != const0_rtx)
+    alg = unrolled_loop;
+
+  if (!count)
+    count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
+  destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
+  if (!issetmem)
+    srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
+
+  unroll_factor = 1;
+  move_mode = word_mode;
+  switch (alg)
+    {
+    case libcall:
+    case no_stringop:
+    case last_alg:
+      gcc_unreachable ();
+    case loop_1_byte:
+      need_zero_guard = true;
+      move_mode = QImode;
+      break;
+    case loop:
+      need_zero_guard = true;
+      break;
+    case unrolled_loop:
+      need_zero_guard = true;
+      unroll_factor = (TARGET_64BIT ? 4 : 2);
+      break;
+    case vector_loop:
+      need_zero_guard = true;
+      unroll_factor = 4;
+      /* Find the widest supported mode.  */
+      move_mode = word_mode;
+      while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
+	     != CODE_FOR_nothing)
+	  move_mode = GET_MODE_WIDER_MODE (move_mode);
+
+      /* Find the corresponding vector mode with the same size as MOVE_MODE.
+	 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
+      if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
+	{
+	  int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
+	  move_mode = mode_for_vector (word_mode, nunits);
+	  if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
+	    move_mode = word_mode;
+	}
+      gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
+      break;
+    case rep_prefix_8_byte:
+      move_mode = DImode;
+      break;
+    case rep_prefix_4_byte:
+      move_mode = SImode;
+      break;
+    case rep_prefix_1_byte:
+      move_mode = QImode;
+      break;
+    }
+  size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
+  epilogue_size_needed = size_needed;
+
+  desired_align = decide_alignment (align, alg, expected_size, move_mode);
+  if (!TARGET_ALIGN_STRINGOPS || noalign)
+    align = desired_align;
+
+  /* Step 1: Prologue guard.  */
+
+  /* Alignment code needs count to be in register.  */
+  if (CONST_INT_P (count_exp) && desired_align > align)
+    {
+      if (INTVAL (count_exp) > desired_align
+	  && INTVAL (count_exp) > size_needed)
+	{
+	  align_bytes
+	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+	  if (align_bytes <= 0)
+	    align_bytes = 0;
+	  else
+	    align_bytes = desired_align - align_bytes;
+	}
+      if (align_bytes == 0)
+	count_exp = force_reg (counter_mode (count_exp), count_exp);
+    }
+  gcc_assert (desired_align >= 1 && align >= 1);
+
+  /* Misaligned move sequences handle both prologue and epilogue at once.
+     Default code generation results in a smaller code for large alignments
+     and also avoids redundant job when sizes are known precisely.  */
+  misaligned_prologue_used
+    = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
+       && MAX (desired_align, epilogue_size_needed) <= 32
+       && desired_align <= epilogue_size_needed
+       && ((desired_align > align && !align_bytes)
+	   || (!count && epilogue_size_needed > 1)));
+
+  /* Do the cheap promotion to allow better CSE across the
+     main loop and epilogue (ie one load of the big constant in the
+     front of all code.  
+     For now the misaligned move sequences do not have fast path
+     without broadcasting.  */
+  if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
+    {
+      if (alg == vector_loop)
+	{
+	  gcc_assert (val_exp == const0_rtx);
+	  vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
+	  promoted_val = promote_duplicated_reg_to_size (val_exp,
+							 GET_MODE_SIZE (word_mode),
+							 desired_align, align);
+	}
+      else
+	{
+	  promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
+							 desired_align, align);
+	}
+    }
+  /* Misaligned move sequences handles both prologues and epilogues at once.
+     Default code generation results in smaller code for large alignments and
+     also avoids redundant job when sizes are known precisely.  */
+  if (misaligned_prologue_used)
+    {
+      /* Misaligned move prologue handled small blocks by itself.  */
+      expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
+	   (dst, src, &destreg, &srcreg,
+	    move_mode, promoted_val, vec_promoted_val,
+	    &count_exp,
+	    &jump_around_label,
+            desired_align < align
+	    ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
+	    desired_align, align, &min_size, dynamic_check, issetmem);
+      if (!issetmem)
+        src = change_address (src, BLKmode, srcreg);
+      dst = change_address (dst, BLKmode, destreg);
+      set_mem_align (dst, desired_align * BITS_PER_UNIT);
+      epilogue_size_needed = 0;
+      if (need_zero_guard && !min_size)
+	{
+	  /* It is possible that we copied enough so the main loop will not
+	     execute.  */
+	  gcc_assert (size_needed > 1);
+	  if (jump_around_label == NULL_RTX)
+	    jump_around_label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, jump_around_label);
+	  if (expected_size == -1
+	      || expected_size < (desired_align - align) / 2 + size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	}
+    }
+  /* Ensure that alignment prologue won't copy past end of block.  */
+  else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
+    {
+      epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
+      /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
+	 Make sure it is power of 2.  */
+      epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
+
+      /* To improve performance of small blocks, we jump around the VAL
+	 promoting mode.  This mean that if the promoted VAL is not constant,
+	 we might not use it in the epilogue and have to use byte
+	 loop variant.  */
+      if (issetmem && epilogue_size_needed > 2 && !promoted_val)
+	force_loopy_epilogue = true;
+      if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
+	  || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
+	{
+	  /* If main algorithm works on QImode, no epilogue is needed.
+	     For small sizes just don't align anything.  */
+	  if (size_needed == 1)
+	    desired_align = align;
+	  else
+	    goto epilogue;
+	}
+      else if (!count
+	       && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
+	{
+	  label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (epilogue_size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, label);
+	  if (expected_size == -1 || expected_size < epilogue_size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	}
+    }
+
+  /* Emit code to decide on runtime whether library call or inline should be
+     used.  */
+  if (dynamic_check != -1)
+    {
+      if (!issetmem && CONST_INT_P (count_exp))
+	{
+	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
+	    {
+	      emit_block_move_via_libcall (dst, src, count_exp, false);
+	      count_exp = const0_rtx;
+	      goto epilogue;
+	    }
+	}
+      else
+	{
+	  rtx hot_label = gen_label_rtx ();
+	  if (jump_around_label == NULL_RTX)
+	    jump_around_label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
+				   LEU, 0, GET_MODE (count_exp), 1, hot_label);
+	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
+	  if (issetmem)
+	    set_storage_via_libcall (dst, count_exp, val_exp, false);
+	  else
+	    emit_block_move_via_libcall (dst, src, count_exp, false);
+	  emit_jump (jump_around_label);
+	  emit_label (hot_label);
+	}
+    }
+
+  /* Step 2: Alignment prologue.  */
+  /* Do the expensive promotion once we branched off the small blocks.  */
+  if (issetmem && !promoted_val)
+    promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
+						   desired_align, align);
+
+  if (desired_align > align && !misaligned_prologue_used)
+    {
+      if (align_bytes == 0)
+	{
+	  /* Except for the first move in prologue, we no longer know
+	     constant offset in aliasing info.  It don't seems to worth
+	     the pain to maintain it for the first move, so throw away
+	     the info early.  */
+	  dst = change_address (dst, BLKmode, destreg);
+	  if (!issetmem)
+	    src = change_address (src, BLKmode, srcreg);
+	  dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
+					    promoted_val, vec_promoted_val,
+					    count_exp, align, desired_align,
+					    issetmem);
+	  /* At most desired_align - align bytes are copied.  */
+	  if (min_size < (unsigned)(desired_align - align))
+	    min_size = 0;
+	  else
+	    min_size -= desired_align - align;
+	}
+      else
+	{
+	  /* If we know how many bytes need to be stored before dst is
+	     sufficiently aligned, maintain aliasing info accurately.  */
+	  dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
+							   srcreg,
+							   promoted_val,
+							   vec_promoted_val,
+							   desired_align,
+							   align_bytes,
+							   issetmem);
+
+	  count_exp = plus_constant (counter_mode (count_exp),
+				     count_exp, -align_bytes);
+	  count -= align_bytes;
+	  min_size -= align_bytes;
+	  max_size -= align_bytes;
+	}
+      if (need_zero_guard
+	  && !min_size
+	  && (count < (unsigned HOST_WIDE_INT) size_needed
+	      || (align_bytes == 0
+		  && count < ((unsigned HOST_WIDE_INT) size_needed
+			      + desired_align - align))))
+	{
+	  /* It is possible that we copied enough so the main loop will not
+	     execute.  */
+	  gcc_assert (size_needed > 1);
+	  if (label == NULL_RTX)
+	    label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, label);
+	  if (expected_size == -1
+	      || expected_size < (desired_align - align) / 2 + size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	}
+    }
+  if (label && size_needed == 1)
+    {
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+      label = NULL;
+      epilogue_size_needed = 1;
+      if (issetmem)
+	promoted_val = val_exp;
+    }
+  else if (label == NULL_RTX && !misaligned_prologue_used)
+    epilogue_size_needed = size_needed;
+
+  /* Step 3: Main loop.  */
+
+  switch (alg)
+    {
+    case libcall:
+    case no_stringop:
+    case last_alg:
+      gcc_unreachable ();
+    case loop_1_byte:
+    case loop:
+    case unrolled_loop:
+      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
+				     count_exp, move_mode, unroll_factor,
+				     expected_size, issetmem);
+      break;
+    case vector_loop:
+      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
+				     vec_promoted_val, count_exp, move_mode,
+				     unroll_factor, expected_size, issetmem);
+      break;
+    case rep_prefix_8_byte:
+    case rep_prefix_4_byte:
+    case rep_prefix_1_byte:
+      expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
+				       val_exp, count_exp, move_mode, issetmem);
+      break;
+    }
+  /* Adjust properly the offset of src and dest memory for aliasing.  */
+  if (CONST_INT_P (count_exp))
+    {
+      if (!issetmem)
+	src = adjust_automodify_address_nv (src, BLKmode, srcreg,
+					    (count / size_needed) * size_needed);
+      dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
+					  (count / size_needed) * size_needed);
+    }
+  else
+    {
+      if (!issetmem)
+	src = change_address (src, BLKmode, srcreg);
+      dst = change_address (dst, BLKmode, destreg);
+    }
+
+  /* Step 4: Epilogue to copy the remaining bytes.  */
+ epilogue:
+  if (label)
+    {
+      /* When the main loop is done, COUNT_EXP might hold original count,
+	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
+	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
+	 bytes. Compensate if needed.  */
+
+      if (size_needed < epilogue_size_needed)
+	{
+	  tmp =
+	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
+				 GEN_INT (size_needed - 1), count_exp, 1,
+				 OPTAB_DIRECT);
+	  if (tmp != count_exp)
+	    emit_move_insn (count_exp, tmp);
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+
+  if (count_exp != const0_rtx && epilogue_size_needed > 1)
+    {
+      if (force_loopy_epilogue)
+	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
+					 epilogue_size_needed);
+      else
+	{
+	  if (issetmem)
+	    expand_setmem_epilogue (dst, destreg, promoted_val,
+				    vec_promoted_val, count_exp,
+				    epilogue_size_needed);
+	  else
+	    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
+				    epilogue_size_needed);
+	}
+    }
+  if (jump_around_label)
+    emit_label (jump_around_label);
+  return true;
+}
+
+
+/* Expand the appropriate insns for doing strlen if not just doing
+   repnz; scasb
+
+   out = result, initialized with the start address
+   align_rtx = alignment of the address.
+   scratch = scratch register, initialized with the startaddress when
+	not aligned, otherwise undefined
+
+   This is just the body. It needs the initializations mentioned above and
+   some address computing at the end.  These things are done in i386.md.  */
+
+static void
+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
+{
+  int align;
+  rtx tmp;
+  rtx align_2_label = NULL_RTX;
+  rtx align_3_label = NULL_RTX;
+  rtx align_4_label = gen_label_rtx ();
+  rtx end_0_label = gen_label_rtx ();
+  rtx mem;
+  rtx tmpreg = gen_reg_rtx (SImode);
+  rtx scratch = gen_reg_rtx (SImode);
+  rtx cmp;
+
+  align = 0;
+  if (CONST_INT_P (align_rtx))
+    align = INTVAL (align_rtx);
+
+  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
+
+  /* Is there a known alignment and is it less than 4?  */
+  if (align < 4)
+    {
+      rtx scratch1 = gen_reg_rtx (Pmode);
+      emit_move_insn (scratch1, out);
+      /* Is there a known alignment and is it not 2? */
+      if (align != 2)
+	{
+	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
+	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
+
+	  /* Leave just the 3 lower bits.  */
+	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
+				    NULL_RTX, 0, OPTAB_WIDEN);
+
+	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+				   Pmode, 1, align_4_label);
+	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
+				   Pmode, 1, align_2_label);
+	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
+				   Pmode, 1, align_3_label);
+	}
+      else
+        {
+	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
+	     check if is aligned to 4 - byte.  */
+
+	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
+				    NULL_RTX, 0, OPTAB_WIDEN);
+
+	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+				   Pmode, 1, align_4_label);
+        }
+
+      mem = change_address (src, QImode, out);
+
+      /* Now compare the bytes.  */
+
+      /* Compare the first n unaligned byte on a byte per byte basis.  */
+      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
+			       QImode, 1, end_0_label);
+
+      /* Increment the address.  */
+      emit_insn (ix86_gen_add3 (out, out, const1_rtx));
+
+      /* Not needed with an alignment of 2 */
+      if (align != 2)
+	{
+	  emit_label (align_2_label);
+
+	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+				   end_0_label);
+
+	  emit_insn (ix86_gen_add3 (out, out, const1_rtx));
+
+	  emit_label (align_3_label);
+	}
+
+      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+			       end_0_label);
+
+      emit_insn (ix86_gen_add3 (out, out, const1_rtx));
+    }
+
+  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
+     align this loop.  It gives only huge programs, but does not help to
+     speed up.  */
+  emit_label (align_4_label);
+
+  mem = change_address (src, SImode, out);
+  emit_move_insn (scratch, mem);
+  emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
+
+  /* This formula yields a nonzero result iff one of the bytes is zero.
+     This saves three branches inside loop and many cycles.  */
+
+  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
+  emit_insn (gen_one_cmplsi2 (scratch, scratch));
+  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
+  emit_insn (gen_andsi3 (tmpreg, tmpreg,
+			 gen_int_mode (0x80808080, SImode)));
+  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
+			   align_4_label);
+
+  if (TARGET_CMOVE)
+    {
+       rtx reg = gen_reg_rtx (SImode);
+       rtx reg2 = gen_reg_rtx (Pmode);
+       emit_move_insn (reg, tmpreg);
+       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
+
+       /* If zero is not in the first two bytes, move two bytes forward.  */
+       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
+			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
+						     reg,
+						     tmpreg)));
+       /* Emit lea manually to avoid clobbering of flags.  */
+       emit_insn (gen_rtx_SET (SImode, reg2,
+			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
+
+       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+       emit_insn (gen_rtx_SET (VOIDmode, out,
+			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+						     reg2,
+						     out)));
+    }
+  else
+    {
+       rtx end_2_label = gen_label_rtx ();
+       /* Is zero in the first two bytes? */
+
+       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
+                            pc_rtx);
+       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+       JUMP_LABEL (tmp) = end_2_label;
+
+       /* Not in the first two.  Move two bytes forward.  */
+       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
+       emit_insn (ix86_gen_add3 (out, out, const2_rtx));
+
+       emit_label (end_2_label);
+
+    }
+
+  /* Avoid branch in fixing the byte.  */
+  tmpreg = gen_lowpart (QImode, tmpreg);
+  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+  tmp = gen_rtx_REG (CCmode, FLAGS_REG);
+  cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
+  emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
+
+  emit_label (end_0_label);
+}
+
+/* Expand strlen.  */
+
+bool
+ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
+{
+  rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+  /* The generic case of strlen expander is long.  Avoid it's
+     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
+
+  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+      && !TARGET_INLINE_ALL_STRINGOPS
+      && !optimize_insn_for_size_p ()
+      && (!CONST_INT_P (align) || INTVAL (align) < 4))
+    return false;
+
+  addr = force_reg (Pmode, XEXP (src, 0));
+  scratch1 = gen_reg_rtx (Pmode);
+
+  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+      && !optimize_insn_for_size_p ())
+    {
+      /* Well it seems that some optimizer does not combine a call like
+         foo(strlen(bar), strlen(bar));
+         when the move and the subtraction is done here.  It does calculate
+         the length just once when these instructions are done inside of
+         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
+         often used and I use one fewer register for the lifetime of
+         output_strlen_unroll() this is better.  */
+
+      emit_move_insn (out, addr);
+
+      ix86_expand_strlensi_unroll_1 (out, src, align);
+
+      /* strlensi_unroll_1 returns the address of the zero at the end of
+         the string, like memchr(), so compute the length by subtracting
+         the start address.  */
+      emit_insn (ix86_gen_sub3 (out, out, addr));
+    }
+  else
+    {
+      rtx unspec;
+
+      /* Can't use this if the user has appropriated eax, ecx, or edi.  */
+      if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
+        return false;
+
+      scratch2 = gen_reg_rtx (Pmode);
+      scratch3 = gen_reg_rtx (Pmode);
+      scratch4 = force_reg (Pmode, constm1_rtx);
+
+      emit_move_insn (scratch3, addr);
+      eoschar = force_reg (QImode, eoschar);
+
+      src = replace_equiv_address_nv (src, scratch3);
+
+      /* If .md starts supporting :P, this can be done in .md.  */
+      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
+						 scratch4), UNSPEC_SCAS);
+      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
+      emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
+      emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
+    }
+  return true;
+}
+
+/* For given symbol (function) construct code to compute address of it's PLT
+   entry in large x86-64 PIC model.  */
+static rtx
+construct_plt_address (rtx symbol)
+{
+  rtx tmp, unspec;
+
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+  gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
+  gcc_assert (Pmode == DImode);
+
+  tmp = gen_reg_rtx (Pmode);
+  unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
+
+  emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
+  emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
+  return tmp;
+}
+
+rtx
+ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+		  rtx callarg2,
+		  rtx pop, bool sibcall)
+{
+  unsigned int const cregs_size
+    = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
+  rtx vec[3 + cregs_size];
+  rtx use = NULL, call;
+  unsigned int vec_len = 0;
+
+  if (pop == const0_rtx)
+    pop = NULL;
+  gcc_assert (!TARGET_64BIT || !pop);
+
+  if (TARGET_MACHO && !TARGET_64BIT)
+    {
+#if TARGET_MACHO
+      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+	fnaddr = machopic_indirect_call_target (fnaddr);
+#endif
+    }
+  else
+    {
+      /* Static functions and indirect calls don't need the pic register.  */
+      if (flag_pic
+	  && (!TARGET_64BIT
+	      || (ix86_cmodel == CM_LARGE_PIC
+		  && DEFAULT_ABI != MS_ABI))
+	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+	use_reg (&use, pic_offset_table_rtx);
+    }
+
+  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
+    {
+      rtx al = gen_rtx_REG (QImode, AX_REG);
+      emit_move_insn (al, callarg2);
+      use_reg (&use, al);
+    }
+
+  if (ix86_cmodel == CM_LARGE_PIC
+      && !TARGET_PECOFF
+      && MEM_P (fnaddr)
+      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+      && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
+    fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
+  else if (sibcall
+	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
+    {
+      fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
+      fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
+    }
+
+  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+  if (retval)
+    call = gen_rtx_SET (VOIDmode, retval, call);
+  vec[vec_len++] = call;
+
+  if (pop)
+    {
+      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
+      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
+      vec[vec_len++] = pop;
+    }
+
+  if (TARGET_64BIT_MS_ABI
+      && (!callarg2 || INTVAL (callarg2) != -2))
+    {
+      unsigned i;
+
+      vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+				       UNSPEC_MS_TO_SYSV_CALL);
+
+      for (i = 0; i < cregs_size; i++)
+	{
+	  int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
+	  enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
+
+	  vec[vec_len++]
+	    = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
+	}
+    }
+
+  if (vec_len > 1)
+    call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
+  call = emit_call_insn (call);
+  if (use)
+    CALL_INSN_FUNCTION_USAGE (call) = use;
+
+  return call;
+}
+
+/* Output the assembly for a call instruction.  */
+
+const char *
+ix86_output_call_insn (rtx insn, rtx call_op)
+{
+  bool direct_p = constant_call_address_operand (call_op, VOIDmode);
+  bool seh_nop_p = false;
+  const char *xasm;
+
+  if (SIBLING_CALL_P (insn))
+    {
+      if (direct_p)
+	xasm = "jmp\t%P0";
+      /* SEH epilogue detection requires the indirect branch case
+	 to include REX.W.  */
+      else if (TARGET_SEH)
+	xasm = "rex.W jmp %A0";
+      else
+	xasm = "jmp\t%A0";
+
+      output_asm_insn (xasm, &call_op);
+      return "";
+    }
+
+  /* SEH unwinding can require an extra nop to be emitted in several
+     circumstances.  Determine if we have one of those.  */
+  if (TARGET_SEH)
+    {
+      rtx i;
+
+      for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
+	{
+	  /* If we get to another real insn, we don't need the nop.  */
+	  if (INSN_P (i))
+	    break;
+
+	  /* If we get to the epilogue note, prevent a catch region from
+	     being adjacent to the standard epilogue sequence.  If non-
+	     call-exceptions, we'll have done this during epilogue emission. */
+	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
+	      && !flag_non_call_exceptions
+	      && !can_throw_internal (insn))
+	    {
+	      seh_nop_p = true;
+	      break;
+	    }
+	}
+
+      /* If we didn't find a real insn following the call, prevent the
+	 unwinder from looking into the next function.  */
+      if (i == NULL)
+	seh_nop_p = true;
+    }
+
+  if (direct_p)
+    xasm = "call\t%P0";
+  else
+    xasm = "call\t%A0";
+
+  output_asm_insn (xasm, &call_op);
+
+  if (seh_nop_p)
+    return "nop";
+
+  return "";
+}
+
+/* Clear stack slot assignments remembered from previous functions.
+   This is called from INIT_EXPANDERS once before RTL is emitted for each
+   function.  */
+
+static struct machine_function *
+ix86_init_machine_status (void)
+{
+  struct machine_function *f;
+
+  f = ggc_alloc_cleared_machine_function ();
+  f->use_fast_prologue_epilogue_nregs = -1;
+  f->call_abi = ix86_abi;
+
+  return f;
+}
+
+/* Return a MEM corresponding to a stack slot with mode MODE.
+   Allocate a new slot if necessary.
+
+   The RTL for a function can have several slots available: N is
+   which slot to use.  */
+
+rtx
+assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
+{
+  struct stack_local_entry *s;
+
+  gcc_assert (n < MAX_386_STACK_LOCALS);
+
+  for (s = ix86_stack_locals; s; s = s->next)
+    if (s->mode == mode && s->n == n)
+      return validize_mem (copy_rtx (s->rtl));
+
+  s = ggc_alloc_stack_local_entry ();
+  s->n = n;
+  s->mode = mode;
+  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+
+  s->next = ix86_stack_locals;
+  ix86_stack_locals = s;
+  return validize_mem (s->rtl);
+}
+
+static void
+ix86_instantiate_decls (void)
+{
+  struct stack_local_entry *s;
+
+  for (s = ix86_stack_locals; s; s = s->next)
+    if (s->rtl != NULL_RTX)
+      instantiate_decl_rtl (s->rtl);
+}
+
+/* Check whether x86 address PARTS is a pc-relative address.  */
+
+static bool
+rip_relative_addr_p (struct ix86_address *parts)
+{
+  rtx base, index, disp;
+
+  base = parts->base;
+  index = parts->index;
+  disp = parts->disp;
+
+  if (disp && !base && !index)
+    {
+      if (TARGET_64BIT)
+	{
+	  rtx symbol = disp;
+
+	  if (GET_CODE (disp) == CONST)
+	    symbol = XEXP (disp, 0);
+	  if (GET_CODE (symbol) == PLUS
+	      && CONST_INT_P (XEXP (symbol, 1)))
+	    symbol = XEXP (symbol, 0);
+
+	  if (GET_CODE (symbol) == LABEL_REF
+	      || (GET_CODE (symbol) == SYMBOL_REF
+		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
+	      || (GET_CODE (symbol) == UNSPEC
+		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
+		      || XINT (symbol, 1) == UNSPEC_PCREL
+		      || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* Calculate the length of the memory address in the instruction encoding.
+   Includes addr32 prefix, does not include the one-byte modrm, opcode,
+   or other prefixes.  We never generate addr32 prefix for LEA insn.  */
+
+int
+memory_address_length (rtx addr, bool lea)
+{
+  struct ix86_address parts;
+  rtx base, index, disp;
+  int len;
+  int ok;
+
+  if (GET_CODE (addr) == PRE_DEC
+      || GET_CODE (addr) == POST_INC
+      || GET_CODE (addr) == PRE_MODIFY
+      || GET_CODE (addr) == POST_MODIFY)
+    return 0;
+
+  ok = ix86_decompose_address (addr, &parts);
+  gcc_assert (ok);
+
+  len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
+
+  /*  If this is not LEA instruction, add the length of addr32 prefix.  */
+  if (TARGET_64BIT && !lea
+      && (SImode_address_operand (addr, VOIDmode)
+	  || (parts.base && GET_MODE (parts.base) == SImode)
+	  || (parts.index && GET_MODE (parts.index) == SImode)))
+    len++;
+
+  base = parts.base;
+  index = parts.index;
+  disp = parts.disp;
+
+  if (base && GET_CODE (base) == SUBREG)
+    base = SUBREG_REG (base);
+  if (index && GET_CODE (index) == SUBREG)
+    index = SUBREG_REG (index);
+
+  gcc_assert (base == NULL_RTX || REG_P (base));
+  gcc_assert (index == NULL_RTX || REG_P (index));
+
+  /* Rule of thumb:
+       - esp as the base always wants an index,
+       - ebp as the base always wants a displacement,
+       - r12 as the base always wants an index,
+       - r13 as the base always wants a displacement.  */
+
+  /* Register Indirect.  */
+  if (base && !index && !disp)
+    {
+      /* esp (for its index) and ebp (for its displacement) need
+	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
+	 code.  */
+      if (base == arg_pointer_rtx
+	  || base == frame_pointer_rtx
+	  || REGNO (base) == SP_REG
+	  || REGNO (base) == BP_REG
+	  || REGNO (base) == R12_REG
+	  || REGNO (base) == R13_REG)
+	len++;
+    }
+
+  /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
+     is not disp32, but disp32(%rip), so for disp32
+     SIB byte is needed, unless print_operand_address
+     optimizes it into disp32(%rip) or (%rip) is implied
+     by UNSPEC.  */
+  else if (disp && !base && !index)
+    {
+      len += 4;
+      if (rip_relative_addr_p (&parts))
+	len++;
+    }
+  else
+    {
+      /* Find the length of the displacement constant.  */
+      if (disp)
+	{
+	  if (base && satisfies_constraint_K (disp))
+	    len += 1;
+	  else
+	    len += 4;
+	}
+      /* ebp always wants a displacement.  Similarly r13.  */
+      else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
+	len++;
+
+      /* An index requires the two-byte modrm form....  */
+      if (index
+	  /* ...like esp (or r12), which always wants an index.  */
+	  || base == arg_pointer_rtx
+	  || base == frame_pointer_rtx
+	  || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
+	len++;
+    }
+
+  return len;
+}
+
+/* Compute default value for "length_immediate" attribute.  When SHORTFORM
+   is set, expect that insn have 8bit immediate alternative.  */
+int
+ix86_attr_length_immediate_default (rtx insn, bool shortform)
+{
+  int len = 0;
+  int i;
+  extract_insn_cached (insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (CONSTANT_P (recog_data.operand[i]))
+      {
+        enum attr_mode mode = get_attr_mode (insn);
+
+	gcc_assert (!len);
+	if (shortform && CONST_INT_P (recog_data.operand[i]))
+	  {
+	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
+	    switch (mode)
+	      {
+	      case MODE_QI:
+		len = 1;
+		continue;
+	      case MODE_HI:
+		ival = trunc_int_for_mode (ival, HImode);
+		break;
+	      case MODE_SI:
+		ival = trunc_int_for_mode (ival, SImode);
+		break;
+	      default:
+		break;
+	      }
+	    if (IN_RANGE (ival, -128, 127))
+	      {
+		len = 1;
+		continue;
+	      }
+	  }
+	switch (mode)
+	  {
+	  case MODE_QI:
+	    len = 1;
+	    break;
+	  case MODE_HI:
+	    len = 2;
+	    break;
+	  case MODE_SI:
+	    len = 4;
+	    break;
+	  /* Immediates for DImode instructions are encoded
+	     as 32bit sign extended values.  */
+	  case MODE_DI:
+	    len = 4;
+	    break;
+	  default:
+	    fatal_insn ("unknown insn mode", insn);
+	}
+      }
+  return len;
+}
+
+/* Compute default value for "length_address" attribute.  */
+int
+ix86_attr_length_address_default (rtx insn)
+{
+  int i;
+
+  if (get_attr_type (insn) == TYPE_LEA)
+    {
+      rtx set = PATTERN (insn), addr;
+
+      if (GET_CODE (set) == PARALLEL)
+	set = XVECEXP (set, 0, 0);
+
+      gcc_assert (GET_CODE (set) == SET);
+
+      addr = SET_SRC (set);
+
+      return memory_address_length (addr, true);
+    }
+
+  extract_insn_cached (insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+        constrain_operands_cached (reload_completed);
+        if (which_alternative != -1)
+	  {
+	    const char *constraints = recog_data.constraints[i];
+	    int alt = which_alternative;
+
+	    while (*constraints == '=' || *constraints == '+')
+	      constraints++;
+	    while (alt-- > 0)
+	      while (*constraints++ != ',')
+		;
+	    /* Skip ignored operands.  */
+	    if (*constraints == 'X')
+	      continue;
+	  }
+	return memory_address_length (XEXP (recog_data.operand[i], 0), false);
+      }
+  return 0;
+}
+
+/* Compute default value for "length_vex" attribute. It includes
+   2 or 3 byte VEX prefix and 1 opcode byte.  */
+
+int
+ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
+{
+  int i;
+
+  /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
+     byte VEX prefix.  */
+  if (!has_0f_opcode || has_vex_w)
+    return 3 + 1;
+
+ /* We can always use 2 byte VEX prefix in 32bit.  */
+  if (!TARGET_64BIT)
+    return 2 + 1;
+
+  extract_insn_cached (insn);
+
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (REG_P (recog_data.operand[i]))
+      {
+	/* REX.W bit uses 3 byte VEX prefix.  */
+	if (GET_MODE (recog_data.operand[i]) == DImode
+	    && GENERAL_REG_P (recog_data.operand[i]))
+	  return 3 + 1;
+      }
+    else
+      {
+	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
+	if (MEM_P (recog_data.operand[i])
+	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
+	  return 3 + 1;
+      }
+
+  return 2 + 1;
+}
+
+/* Return the maximum number of instructions a cpu can issue.  */
+
+static int
+ix86_issue_rate (void)
+{
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+    case PROCESSOR_BONNELL:
+    case PROCESSOR_SILVERMONT:
+    case PROCESSOR_INTEL:
+    case PROCESSOR_K6:
+    case PROCESSOR_BTVER2:
+    case PROCESSOR_PENTIUM4:
+    case PROCESSOR_NOCONA:
+      return 2;
+
+    case PROCESSOR_PENTIUMPRO:
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_GENERIC:
+    case PROCESSOR_BTVER1:
+      return 3;
+
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BDVER2:
+    case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
+    case PROCESSOR_CORE2:
+    case PROCESSOR_NEHALEM:
+    case PROCESSOR_SANDYBRIDGE:
+    case PROCESSOR_HASWELL:
+      return 4;
+
+    default:
+      return 1;
+    }
+}
+
+/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
+   by DEP_INSN and nothing set by DEP_INSN.  */
+
+static bool
+ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+  rtx set, set2;
+
+  /* Simplify the test for uninteresting insns.  */
+  if (insn_type != TYPE_SETCC
+      && insn_type != TYPE_ICMOV
+      && insn_type != TYPE_FCMOV
+      && insn_type != TYPE_IBR)
+    return false;
+
+  if ((set = single_set (dep_insn)) != 0)
+    {
+      set = SET_DEST (set);
+      set2 = NULL_RTX;
+    }
+  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
+	   && XVECLEN (PATTERN (dep_insn), 0) == 2
+	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
+	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
+    {
+      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+    }
+  else
+    return false;
+
+  if (!REG_P (set) || REGNO (set) != FLAGS_REG)
+    return false;
+
+  /* This test is true if the dependent insn reads the flags but
+     not any other potentially set register.  */
+  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
+    return false;
+
+  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
+    return false;
+
+  return true;
+}
+
+/* Return true iff USE_INSN has a memory address with operands set by
+   SET_INSN.  */
+
+bool
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
+{
+  int i;
+  extract_insn_cached (use_insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+	rtx addr = XEXP (recog_data.operand[i], 0);
+	return modified_in_p (addr, set_insn) != 0;
+      }
+  return false;
+}
+
+/* Helper function for exact_store_load_dependency.
+   Return true if addr is found in insn.  */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+  enum rtx_code code;
+  const char *format_ptr;
+  int i, j;
+
+  code = GET_CODE (insn);
+  switch (code)
+    {
+    case MEM:
+      if (rtx_equal_p (addr, insn))
+	return true;
+      break;
+    case REG:
+    CASE_CONST_ANY:
+    case SYMBOL_REF:
+    case CODE_LABEL:
+    case PC:
+    case CC0:
+    case EXPR_LIST:
+      return false;
+    default:
+      break;
+    }
+
+  format_ptr = GET_RTX_FORMAT (code);
+  for (i = 0; i < GET_RTX_LENGTH (code); i++)
+    {
+      switch (*format_ptr++)
+	{
+	case 'e':
+	  if (exact_dependency_1 (addr, XEXP (insn, i)))
+	    return true;
+	  break;
+	case 'E':
+	  for (j = 0; j < XVECLEN (insn, i); j++)
+	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+	      return true;
+	    break;
+	}
+    }
+  return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+   the same memory address is used in them.  */
+static bool
+exact_store_load_dependency (rtx store, rtx load)
+{
+  rtx set1, set2;
+
+  set1 = single_set (store);
+  if (!set1)
+    return false;
+  if (!MEM_P (SET_DEST (set1)))
+    return false;
+  set2 = single_set (load);
+  if (!set2)
+    return false;
+  if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+    return true;
+  return false;
+}
+
+static int
+ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type, dep_insn_type;
+  enum attr_memory memory;
+  rtx set, set2;
+  int dep_insn_code_number;
+
+  /* Anti and output dependencies have zero cost on all CPUs.  */
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+
+  dep_insn_code_number = recog_memoized (dep_insn);
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+    return cost;
+
+  insn_type = get_attr_type (insn);
+  dep_insn_type = get_attr_type (dep_insn);
+
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+      /* Address Generation Interlock adds a cycle of latency.  */
+      if (insn_type == TYPE_LEA)
+	{
+	  rtx addr = PATTERN (insn);
+
+	  if (GET_CODE (addr) == PARALLEL)
+	    addr = XVECEXP (addr, 0, 0);
+
+	  gcc_assert (GET_CODE (addr) == SET);
+
+	  addr = SET_SRC (addr);
+	  if (modified_in_p (addr, dep_insn))
+	    cost += 1;
+	}
+      else if (ix86_agi_dependent (dep_insn, insn))
+	cost += 1;
+
+      /* ??? Compares pair with jump/setcc.  */
+      if (ix86_flags_dependent (insn, dep_insn, insn_type))
+	cost = 0;
+
+      /* Floating point stores require value to be ready one cycle earlier.  */
+      if (insn_type == TYPE_FMOV
+	  && get_attr_memory (insn) == MEMORY_STORE
+	  && !ix86_agi_dependent (dep_insn, insn))
+	cost += 1;
+      break;
+
+    case PROCESSOR_PENTIUMPRO:
+      /* INT->FP conversion is expensive.  */
+      if (get_attr_fp_int_src (dep_insn))
+	cost += 5;
+
+      /* There is one cycle extra latency between an FP op and a store.  */
+      if (insn_type == TYPE_FMOV
+	  && (set = single_set (dep_insn)) != NULL_RTX
+	  && (set2 = single_set (insn)) != NULL_RTX
+	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
+	  && MEM_P (SET_DEST (set2)))
+	cost += 1;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  /* Claim moves to take one cycle, as core can issue one load
+	     at time and the next load can start cycle later.  */
+	  if (dep_insn_type == TYPE_IMOV
+	      || dep_insn_type == TYPE_FMOV)
+	    cost = 1;
+	  else if (cost > 1)
+	    cost--;
+	}
+      break;
+
+    case PROCESSOR_K6:
+     /* The esp dependency is resolved before
+	the instruction is really finished.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 1;
+
+      /* INT->FP conversion is expensive.  */
+      if (get_attr_fp_int_src (dep_insn))
+	cost += 5;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  /* Claim moves to take one cycle, as core can issue one load
+	     at time and the next load can start cycle later.  */
+	  if (dep_insn_type == TYPE_IMOV
+	      || dep_insn_type == TYPE_FMOV)
+	    cost = 1;
+	  else if (cost > 2)
+	    cost -= 2;
+	  else
+	    cost = 1;
+	}
+      break;
+
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BDVER2:
+    case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
+    case PROCESSOR_BTVER1:
+    case PROCESSOR_BTVER2:
+    case PROCESSOR_GENERIC:
+      /* Stack engine allows to execute push&pop instructions in parall.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 0;
+      /* FALLTHRU */
+
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  enum attr_unit unit = get_attr_unit (insn);
+	  int loadcost = 3;
+
+	  /* Because of the difference between the length of integer and
+	     floating unit pipeline preparation stages, the memory operands
+	     for floating point are cheaper.
+
+	     ??? For Athlon it the difference is most probably 2.  */
+	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+	    loadcost = 3;
+	  else
+	    loadcost = TARGET_ATHLON ? 2 : 0;
+
+	  if (cost >= loadcost)
+	    cost -= loadcost;
+	  else
+	    cost = 0;
+	}
+      break;
+
+    case PROCESSOR_CORE2:
+    case PROCESSOR_NEHALEM:
+    case PROCESSOR_SANDYBRIDGE:
+    case PROCESSOR_HASWELL:
+      /* Stack engine allows to execute push&pop instructions in parall.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 0;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  if (cost >= 4)
+	    cost -= 4;
+	  else
+	    cost = 0;
+	}
+      break;
+
+    case PROCESSOR_SILVERMONT:
+    case PROCESSOR_INTEL:
+      if (!reload_completed)
+	return cost;
+
+      /* Increase cost of integer loads.  */
+      memory = get_attr_memory (dep_insn);
+      if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	{
+	  enum attr_unit unit = get_attr_unit (dep_insn);
+	  if (unit == UNIT_INTEGER && cost == 1)
+	    {
+	      if (memory == MEMORY_LOAD)
+		cost = 3;
+	      else
+		{
+		  /* Increase cost of ld/st for short int types only
+		     because of store forwarding issue.  */
+		  rtx set = single_set (dep_insn);
+		  if (set && (GET_MODE (SET_DEST (set)) == QImode
+			      || GET_MODE (SET_DEST (set)) == HImode))
+		    {
+		      /* Increase cost of store/load insn if exact
+			 dependence exists and it is load insn.  */
+		      enum attr_memory insn_memory = get_attr_memory (insn);
+		      if (insn_memory == MEMORY_LOAD
+			  && exact_store_load_dependency (dep_insn, insn))
+			cost = 3;
+		    }
+		}
+	    }
+	}
+
+    default:
+      break;
+    }
+
+  return cost;
+}
+
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.  */
+
+static int
+ia32_multipass_dfa_lookahead (void)
+{
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+      return 2;
+
+    case PROCESSOR_PENTIUMPRO:
+    case PROCESSOR_K6:
+      return 1;
+
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BDVER2:
+    case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
+      /* We use lookahead value 4 for BD both before and after reload
+	 schedules. Plan is to have value 8 included for O3. */
+        return 4;
+
+    case PROCESSOR_CORE2:
+    case PROCESSOR_NEHALEM:
+    case PROCESSOR_SANDYBRIDGE:
+    case PROCESSOR_HASWELL:
+    case PROCESSOR_BONNELL:
+    case PROCESSOR_SILVERMONT:
+    case PROCESSOR_INTEL:
+      /* Generally, we want haifa-sched:max_issue() to look ahead as far
+	 as many instructions can be executed on a cycle, i.e.,
+	 issue_rate.  I wonder why tuning for many CPUs does not do this.  */
+      if (reload_completed)
+        return ix86_issue_rate ();
+      /* Don't use lookahead for pre-reload schedule to save compile time.  */
+      return 0;
+
+    default:
+      return 0;
+    }
+}
+
+/* Return true if target platform supports macro-fusion.  */
+
+static bool
+ix86_macro_fusion_p ()
+{
+  return TARGET_FUSE_CMP_AND_BRANCH;
+}
+
+/* Check whether current microarchitecture support macro fusion
+   for insn pair "CONDGEN + CONDJMP". Refer to
+   "Intel Architectures Optimization Reference Manual". */
+
+static bool
+ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+{
+  rtx src, dest;
+  rtx single_set = single_set (condgen);
+  enum rtx_code ccode;
+  rtx compare_set = NULL_RTX, test_if, cond;
+  rtx alu_set = NULL_RTX, addr = NULL_RTX;
+
+  if (get_attr_type (condgen) != TYPE_TEST
+      && get_attr_type (condgen) != TYPE_ICMP
+      && get_attr_type (condgen) != TYPE_INCDEC
+      && get_attr_type (condgen) != TYPE_ALU)
+    return false;
+
+  if (single_set == NULL_RTX
+      && !TARGET_FUSE_ALU_AND_BRANCH)
+    return false;
+
+  if (single_set != NULL_RTX)
+    compare_set = single_set;
+  else
+    {
+      int i;
+      rtx pat = PATTERN (condgen);
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
+	  {
+	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
+	    if (GET_CODE (set_src) == COMPARE)
+	      compare_set = XVECEXP (pat, 0, i);
+	    else
+	      alu_set = XVECEXP (pat, 0, i);
+	  }
+    }
+  if (compare_set == NULL_RTX)
+    return false;
+  src = SET_SRC (compare_set);
+  if (GET_CODE (src) != COMPARE)
+    return false;
+
+  /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
+     supported.  */
+  if ((MEM_P (XEXP (src, 0))
+       && CONST_INT_P (XEXP (src, 1)))
+      || (MEM_P (XEXP (src, 1))
+	  && CONST_INT_P (XEXP (src, 0))))
+    return false;
+
+  /* No fusion for RIP-relative address.  */
+  if (MEM_P (XEXP (src, 0)))
+    addr = XEXP (XEXP (src, 0), 0);
+  else if (MEM_P (XEXP (src, 1)))
+    addr = XEXP (XEXP (src, 1), 0);
+
+  if (addr) {
+    ix86_address parts;
+    int ok = ix86_decompose_address (addr, &parts);
+    gcc_assert (ok);
+
+    if (rip_relative_addr_p (&parts))
+      return false;
+  }
+
+  test_if = SET_SRC (pc_set (condjmp));
+  cond = XEXP (test_if, 0);
+  ccode = GET_CODE (cond);
+  /* Check whether conditional jump use Sign or Overflow Flags.  */
+  if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
+      && (ccode == GE
+          || ccode == GT
+	  || ccode == LE
+	  || ccode == LT))
+    return false;
+
+  /* Return true for TYPE_TEST and TYPE_ICMP.  */
+  if (get_attr_type (condgen) == TYPE_TEST
+      || get_attr_type (condgen) == TYPE_ICMP)
+    return true;
+
+  /* The following is the case that macro-fusion for alu + jmp.  */
+  if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
+    return false;
+
+  /* No fusion for alu op with memory destination operand.  */
+  dest = SET_DEST (alu_set);
+  if (MEM_P (dest))
+    return false;
+
+  /* Macro-fusion for inc/dec + unsigned conditional jump is not
+     supported.  */
+  if (get_attr_type (condgen) == TYPE_INCDEC
+      && (ccode == GEU
+	  || ccode == GTU
+	  || ccode == LEU
+	  || ccode == LTU))
+    return false;
+
+  return true;
+}
+
+/* Try to reorder ready list to take advantage of Atom pipelined IMUL
+   execution. It is applied if
+   (1) IMUL instruction is on the top of list;
+   (2) There exists the only producer of independent IMUL instruction in
+       ready list.
+   Return index of IMUL producer if it was found and -1 otherwise.  */
+static int
+do_reorder_for_imul (rtx *ready, int n_ready)
+{
+  rtx insn, set, insn1, insn2;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int index = -1;
+  int i;
+
+  if (!TARGET_BONNELL)
+    return index;
+
+  /* Check that IMUL instruction is on the top of ready list.  */
+  insn = ready[n_ready - 1];
+  set = single_set (insn);
+  if (!set)
+    return index;
+  if (!(GET_CODE (SET_SRC (set)) == MULT
+      && GET_MODE (SET_SRC (set)) == SImode))
+    return index;
+
+  /* Search for producer of independent IMUL instruction.  */
+  for (i = n_ready - 2; i >= 0; i--)
+    {
+      insn = ready[i];
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+      /* Skip IMUL instruction.  */
+      insn2 = PATTERN (insn);
+      if (GET_CODE (insn2) == PARALLEL)
+	insn2 = XVECEXP (insn2, 0, 0);
+      if (GET_CODE (insn2) == SET
+	  && GET_CODE (SET_SRC (insn2)) == MULT
+	  && GET_MODE (SET_SRC (insn2)) == SImode)
+	continue;
+
+      FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+	{
+	  rtx con;
+	  con = DEP_CON (dep);
+	  if (!NONDEBUG_INSN_P (con))
+	    continue;
+	  insn1 = PATTERN (con);
+	  if (GET_CODE (insn1) == PARALLEL)
+	    insn1 = XVECEXP (insn1, 0, 0);
+
+	  if (GET_CODE (insn1) == SET
+	      && GET_CODE (SET_SRC (insn1)) == MULT
+	      && GET_MODE (SET_SRC (insn1)) == SImode)
+	    {
+	      sd_iterator_def sd_it1;
+	      dep_t dep1;
+	      /* Check if there is no other dependee for IMUL.  */
+	      index = i;
+	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+		{
+		  rtx pro;
+		  pro = DEP_PRO (dep1);
+		  if (!NONDEBUG_INSN_P (pro))
+		    continue;
+		  if (pro != insn)
+		    index = -1;
+		}
+	      if (index >= 0)
+		break;
+	    }
+	}
+      if (index >= 0)
+	break;
+    }
+  return index;
+}
+
+/* Try to find the best candidate on the top of ready list if two insns
+   have the same priority - candidate is best if its dependees were
+   scheduled earlier. Applied for Silvermont only.
+   Return true if top 2 insns must be interchanged.  */
+static bool
+swap_top_of_ready_list (rtx *ready, int n_ready)
+{
+  rtx top = ready[n_ready - 1];
+  rtx next = ready[n_ready - 2];
+  rtx set;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int clock1 = -1;
+  int clock2 = -1;
+  #define INSN_TICK(INSN) (HID (INSN)->tick)
+
+  if (!TARGET_SILVERMONT && !TARGET_INTEL)
+    return false;
+
+  if (!NONDEBUG_INSN_P (top))
+    return false;
+  if (!NONJUMP_INSN_P (top))
+    return false;
+  if (!NONDEBUG_INSN_P (next))
+    return false;
+  if (!NONJUMP_INSN_P (next))
+    return false;
+  set = single_set (top);
+  if (!set)
+    return false;
+  set = single_set (next);
+  if (!set)
+    return false;
+
+  if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+    {
+      if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+	return false;
+      /* Determine winner more precise.  */
+      FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock1)
+	    clock1 = INSN_TICK (pro);
+	}
+      FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock2)
+	    clock2 = INSN_TICK (pro);
+	}
+
+      if (clock1 == clock2)
+	{
+	  /* Determine winner - load must win. */
+	  enum attr_memory memory1, memory2;
+	  memory1 = get_attr_memory (top);
+	  memory2 = get_attr_memory (next);
+	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+	    return true;
+	}
+	return (bool) (clock2 < clock1);
+    }
+  return false;
+  #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+   Return issue rate.  */
+static int
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+		   int clock_var)
+{
+  int issue_rate = -1;
+  int n_ready = *pn_ready;
+  int i;
+  rtx insn;
+  int index = -1;
+
+  /* Set up issue rate.  */
+  issue_rate = ix86_issue_rate ();
+
+  /* Do reodering for BONNELL/SILVERMONT only.  */
+  if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
+    return issue_rate;
+
+  /* Nothing to do if ready list contains only 1 instruction.  */
+  if (n_ready <= 1)
+    return issue_rate;
+
+  /* Do reodering for post-reload scheduler only.  */
+  if (!reload_completed)
+    return issue_rate;
+
+  if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+		 INSN_UID (ready[index]));
+
+      /* Put IMUL producer (ready[index]) at the top of ready list.  */
+      insn = ready[index];
+      for (i = index; i < n_ready - 1; i++)
+	ready[i] = ready[i + 1];
+      ready[n_ready - 1] = insn;
+      return issue_rate;
+    }
+  if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+      /* Swap 2 top elements of ready list.  */
+      insn = ready[n_ready - 1];
+      ready[n_ready - 1] = ready[n_ready - 2];
+      ready[n_ready - 2] = insn;
+    }
+  return issue_rate;
+}
+
+static bool
+ix86_class_likely_spilled_p (reg_class_t);
+
+/* Returns true if lhs of insn is HW function argument register and set up
+   is_spilled to true if it is likely spilled HW register.  */
+static bool
+insn_is_function_arg (rtx insn, bool* is_spilled)
+{
+  rtx dst;
+
+  if (!NONDEBUG_INSN_P (insn))
+    return false;
+  /* Call instructions are not movable, ignore it.  */
+  if (CALL_P (insn))
+    return false;
+  insn = PATTERN (insn);
+  if (GET_CODE (insn) == PARALLEL)
+    insn = XVECEXP (insn, 0, 0);
+  if (GET_CODE (insn) != SET)
+    return false;
+  dst = SET_DEST (insn);
+  if (REG_P (dst) && HARD_REGISTER_P (dst)
+      && ix86_function_arg_regno_p (REGNO (dst)))
+    {
+      /* Is it likely spilled HW register?  */
+      if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
+	  && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
+	*is_spilled = true;
+      return true;
+    }
+  return false;
+}
+
+/* Add output dependencies for chain of function adjacent arguments if only
+   there is a move to likely spilled HW register.  Return first argument
+   if at least one dependence was added or NULL otherwise.  */
+static rtx
+add_parameter_dependencies (rtx call, rtx head)
+{
+  rtx insn;
+  rtx last = call;
+  rtx first_arg = NULL;
+  bool is_spilled = false;
+
+  head = PREV_INSN (head);
+
+  /* Find nearest to call argument passing instruction.  */
+  while (true)
+    {
+      last = PREV_INSN (last);
+      if (last == head)
+	return NULL;
+      if (!NONDEBUG_INSN_P (last))
+	continue;
+      if (insn_is_function_arg (last, &is_spilled))
+	break;
+      return NULL;
+    }
+
+  first_arg = last;
+  while (true)
+    {
+      insn = PREV_INSN (last);
+      if (!INSN_P (insn))
+	break;
+      if (insn == head)
+	break;
+      if (!NONDEBUG_INSN_P (insn))
+	{
+	  last = insn;
+	  continue;
+	}
+      if (insn_is_function_arg (insn, &is_spilled))
+	{
+	  /* Add output depdendence between two function arguments if chain
+	     of output arguments contains likely spilled HW registers.  */
+	  if (is_spilled)
+	    add_dependence (first_arg, insn, REG_DEP_OUTPUT);
+	  first_arg = last = insn;
+	}
+      else
+	break;
+    }
+  if (!is_spilled)
+    return NULL;
+  return first_arg;
+}
+
+/* Add output or anti dependency from insn to first_arg to restrict its code
+   motion.  */
+static void
+avoid_func_arg_motion (rtx first_arg, rtx insn)
+{
+  rtx set;
+  rtx tmp;
+
+  set = single_set (insn);
+  if (!set)
+    return;
+  tmp = SET_DEST (set);
+  if (REG_P (tmp))
+    {
+      /* Add output dependency to the first function argument.  */
+      add_dependence (first_arg, insn, REG_DEP_OUTPUT);
+      return;
+    }
+  /* Add anti dependency.  */
+  add_dependence (first_arg, insn, REG_DEP_ANTI);
+}
+
+/* Avoid cross block motion of function argument through adding dependency
+   from the first non-jump instruction in bb.  */
+static void
+add_dependee_for_func_arg (rtx arg, basic_block bb)
+{
+  rtx insn = BB_END (bb);
+
+  while (insn)
+    {
+      if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
+	{
+	  rtx set = single_set (insn);
+	  if (set)
+	    {
+	      avoid_func_arg_motion (arg, insn);
+	      return;
+	    }
+	}
+      if (insn == BB_HEAD (bb))
+	return;
+      insn = PREV_INSN (insn);
+    }
+}
+
+/* Hook for pre-reload schedule - avoid motion of function arguments
+   passed in likely spilled HW registers.  */
+static void
+ix86_dependencies_evaluation_hook (rtx head, rtx tail)
+{
+  rtx insn;
+  rtx first_arg = NULL;
+  if (reload_completed)
+    return;
+  while (head != tail && DEBUG_INSN_P (head))
+    head = NEXT_INSN (head);
+  for (insn = tail; insn != head; insn = PREV_INSN (insn))
+    if (INSN_P (insn) && CALL_P (insn))
+      {
+	first_arg = add_parameter_dependencies (insn, head);
+	if (first_arg)
+	  {
+	    /* Add dependee for first argument to predecessors if only
+	       region contains more than one block.  */
+	    basic_block bb =  BLOCK_FOR_INSN (insn);
+	    int rgn = CONTAINING_RGN (bb->index);
+	    int nr_blks = RGN_NR_BLOCKS (rgn);
+	    /* Skip trivial regions and region head blocks that can have
+	       predecessors outside of region.  */
+	    if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
+	      {
+		edge e;
+		edge_iterator ei;
+		/* Assume that region is SCC, i.e. all immediate predecessors
+	           of non-head block are in the same region.  */
+		FOR_EACH_EDGE (e, ei, bb->preds)
+		  {
+		    /* Avoid creating of loop-carried dependencies through
+		       using topological odering in region.  */
+		    if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
+		      add_dependee_for_func_arg (first_arg, e->src); 
+		  }
+	      }
+	    insn = first_arg;
+	    if (insn == head)
+	      break;
+	  }
+      }
+    else if (first_arg)
+      avoid_func_arg_motion (first_arg, insn);
+}
+
+/* Hook for pre-reload schedule - set priority of moves from likely spilled
+   HW registers to maximum, to schedule them at soon as possible. These are
+   moves from function argument registers at the top of the function entry
+   and moves from function return value registers after call.  */
+static int
+ix86_adjust_priority (rtx insn, int priority)
+{
+  rtx set;
+
+  if (reload_completed)
+    return priority;
+
+  if (!NONDEBUG_INSN_P (insn))
+    return priority;
+
+  set = single_set (insn);
+  if (set)
+    {
+      rtx tmp = SET_SRC (set);
+      if (REG_P (tmp)
+          && HARD_REGISTER_P (tmp)
+          && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
+          && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
+	return current_sched_info->sched_max_insns_priority;
+    }
+
+  return priority;
+}
+
+/* Model decoder of Core 2/i7.
+   Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
+   track the instruction fetch block boundaries and make sure that long
+   (9+ bytes) instructions are assigned to D0.  */
+
+/* Maximum length of an insn that can be handled by
+   a secondary decoder unit.  '8' for Core 2/i7.  */
+static int core2i7_secondary_decoder_max_insn_size;
+
+/* Ifetch block size, i.e., number of bytes decoder reads per cycle.
+   '16' for Core 2/i7.  */
+static int core2i7_ifetch_block_size;
+
+/* Maximum number of instructions decoder can handle per cycle.
+   '6' for Core 2/i7.  */
+static int core2i7_ifetch_block_max_insns;
+
+typedef struct ix86_first_cycle_multipass_data_ *
+  ix86_first_cycle_multipass_data_t;
+typedef const struct ix86_first_cycle_multipass_data_ *
+  const_ix86_first_cycle_multipass_data_t;
+
+/* A variable to store target state across calls to max_issue within
+   one cycle.  */
+static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
+  *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
+
+/* Initialize DATA.  */
+static void
+core2i7_first_cycle_multipass_init (void *_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+
+  data->ifetch_block_len = 0;
+  data->ifetch_block_n_insns = 0;
+  data->ready_try_change = NULL;
+  data->ready_try_change_size = 0;
+}
+
+/* Advancing the cycle; reset ifetch block counts.  */
+static void
+core2i7_dfa_post_advance_cycle (void)
+{
+  ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
+
+  gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
+
+  data->ifetch_block_len = 0;
+  data->ifetch_block_n_insns = 0;
+}
+
+static int min_insn_size (rtx);
+
+/* Filter out insns from ready_try that the core will not be able to issue
+   on current cycle due to decoder.  */
+static void
+core2i7_first_cycle_multipass_filter_ready_try
+(const_ix86_first_cycle_multipass_data_t data,
+ char *ready_try, int n_ready, bool first_cycle_insn_p)
+{
+  while (n_ready--)
+    {
+      rtx insn;
+      int insn_size;
+
+      if (ready_try[n_ready])
+	continue;
+
+      insn = get_ready_element (n_ready);
+      insn_size = min_insn_size (insn);
+
+      if (/* If this is a too long an insn for a secondary decoder ...  */
+	  (!first_cycle_insn_p
+	   && insn_size > core2i7_secondary_decoder_max_insn_size)
+	  /* ... or it would not fit into the ifetch block ...  */
+	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
+	  /* ... or the decoder is full already ...  */
+	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
+	/* ... mask the insn out.  */
+	{
+	  ready_try[n_ready] = 1;
+
+	  if (data->ready_try_change)
+	    bitmap_set_bit (data->ready_try_change, n_ready);
+	}
+    }
+}
+
+/* Prepare for a new round of multipass lookahead scheduling.  */
+static void
+core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
+				     bool first_cycle_insn_p)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+  const_ix86_first_cycle_multipass_data_t prev_data
+    = ix86_first_cycle_multipass_data;
+
+  /* Restore the state from the end of the previous round.  */
+  data->ifetch_block_len = prev_data->ifetch_block_len;
+  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
+
+  /* Filter instructions that cannot be issued on current cycle due to
+     decoder restrictions.  */
+  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
+						  first_cycle_insn_p);
+}
+
+/* INSN is being issued in current solution.  Account for its impact on
+   the decoder model.  */
+static void
+core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
+				     rtx insn, const void *_prev_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+  const_ix86_first_cycle_multipass_data_t prev_data
+    = (const_ix86_first_cycle_multipass_data_t) _prev_data;
+
+  int insn_size = min_insn_size (insn);
+
+  data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
+  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
+  gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
+	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
+
+  /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
+  if (!data->ready_try_change)
+    {
+      data->ready_try_change = sbitmap_alloc (n_ready);
+      data->ready_try_change_size = n_ready;
+    }
+  else if (data->ready_try_change_size < n_ready)
+    {
+      data->ready_try_change = sbitmap_resize (data->ready_try_change,
+					       n_ready, 0);
+      data->ready_try_change_size = n_ready;
+    }
+  bitmap_clear (data->ready_try_change);
+
+  /* Filter out insns from ready_try that the core will not be able to issue
+     on current cycle due to decoder.  */
+  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
+						  false);
+}
+
+/* Revert the effect on ready_try.  */
+static void
+core2i7_first_cycle_multipass_backtrack (const void *_data,
+					 char *ready_try,
+					 int n_ready ATTRIBUTE_UNUSED)
+{
+  const_ix86_first_cycle_multipass_data_t data
+    = (const_ix86_first_cycle_multipass_data_t) _data;
+  unsigned int i = 0;
+  sbitmap_iterator sbi;
+
+  gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
+  EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
+    {
+      ready_try[i] = 0;
+    }
+}
+
+/* Save the result of multipass lookahead scheduling for the next round.  */
+static void
+core2i7_first_cycle_multipass_end (const void *_data)
+{
+  const_ix86_first_cycle_multipass_data_t data
+    = (const_ix86_first_cycle_multipass_data_t) _data;
+  ix86_first_cycle_multipass_data_t next_data
+    = ix86_first_cycle_multipass_data;
+
+  if (data != NULL)
+    {
+      next_data->ifetch_block_len = data->ifetch_block_len;
+      next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
+    }
+}
+
+/* Deallocate target data.  */
+static void
+core2i7_first_cycle_multipass_fini (void *_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+
+  if (data->ready_try_change)
+    {
+      sbitmap_free (data->ready_try_change);
+      data->ready_try_change = NULL;
+      data->ready_try_change_size = 0;
+    }
+}
+
+/* Prepare for scheduling pass.  */
+static void
+ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+			int verbose ATTRIBUTE_UNUSED,
+			int max_uid ATTRIBUTE_UNUSED)
+{
+  /* Install scheduling hooks for current CPU.  Some of these hooks are used
+     in time-critical parts of the scheduler, so we only set them up when
+     they are actually used.  */
+  switch (ix86_tune)
+    {
+    case PROCESSOR_CORE2:
+    case PROCESSOR_NEHALEM:
+    case PROCESSOR_SANDYBRIDGE:
+    case PROCESSOR_HASWELL:
+      /* Do not perform multipass scheduling for pre-reload schedule
+         to save compile time.  */
+      if (reload_completed)
+	{
+	  targetm.sched.dfa_post_advance_cycle
+	    = core2i7_dfa_post_advance_cycle;
+	  targetm.sched.first_cycle_multipass_init
+	    = core2i7_first_cycle_multipass_init;
+	  targetm.sched.first_cycle_multipass_begin
+	    = core2i7_first_cycle_multipass_begin;
+	  targetm.sched.first_cycle_multipass_issue
+	    = core2i7_first_cycle_multipass_issue;
+	  targetm.sched.first_cycle_multipass_backtrack
+	    = core2i7_first_cycle_multipass_backtrack;
+	  targetm.sched.first_cycle_multipass_end
+	    = core2i7_first_cycle_multipass_end;
+	  targetm.sched.first_cycle_multipass_fini
+	    = core2i7_first_cycle_multipass_fini;
+
+	  /* Set decoder parameters.  */
+	  core2i7_secondary_decoder_max_insn_size = 8;
+	  core2i7_ifetch_block_size = 16;
+	  core2i7_ifetch_block_max_insns = 6;
+	  break;
+	}
+      /* ... Fall through ...  */
+    default:
+      targetm.sched.dfa_post_advance_cycle = NULL;
+      targetm.sched.first_cycle_multipass_init = NULL;
+      targetm.sched.first_cycle_multipass_begin = NULL;
+      targetm.sched.first_cycle_multipass_issue = NULL;
+      targetm.sched.first_cycle_multipass_backtrack = NULL;
+      targetm.sched.first_cycle_multipass_end = NULL;
+      targetm.sched.first_cycle_multipass_fini = NULL;
+      break;
+    }
+}
+
+
+/* Compute the alignment given to a constant that is being placed in memory.
+   EXP is the constant and ALIGN is the alignment that the object would
+   ordinarily have.
+   The value of this function is used instead of that alignment to align
+   the object.  */
+
+int
+ix86_constant_alignment (tree exp, int align)
+{
+  if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+      || TREE_CODE (exp) == INTEGER_CST)
+    {
+      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
+	return 64;
+      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
+	return 128;
+    }
+  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
+	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
+    return BITS_PER_WORD;
+
+  return align;
+}
+
+/* Compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this function is used
+   instead of that alignment to align the object.  */
+
+int
+ix86_data_alignment (tree type, int align, bool opt)
+{
+  /* GCC 4.8 and earlier used to incorrectly assume this alignment even
+     for symbols from other compilation units or symbols that don't need
+     to bind locally.  In order to preserve some ABI compatibility with
+     those compilers, ensure we don't decrease alignment from what we
+     used to assume.  */
+
+  int max_align_compat
+    = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+
+  /* A data structure, equal or greater than the size of a cache line
+     (64 bytes in the Pentium 4 and other recent Intel processors, including
+     processors based on Intel Core microarchitecture) should be aligned
+     so that its base address is a multiple of a cache line size.  */
+
+  int max_align
+    = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
+
+  if (max_align < BITS_PER_WORD)
+    max_align = BITS_PER_WORD;
+
+  if (opt
+      && AGGREGATE_TYPE_P (type)
+      && TYPE_SIZE (type)
+      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
+    {
+      if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
+	   || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
+	  && align < max_align_compat)
+	align = max_align_compat;
+      if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
+	   || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
+	  && align < max_align)
+	align = max_align;
+    }
+
+  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+     to 16byte boundary.  */
+  if (TARGET_64BIT)
+    {
+      if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
+	  && TYPE_SIZE (type)
+	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+	      || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+	return 128;
+    }
+
+  if (!opt)
+    return align;
+
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+
+      if (TYPE_MODE (type) == DCmode && align < 64)
+	return 64;
+      if ((TYPE_MODE (type) == XCmode
+	   || TYPE_MODE (type) == TCmode) && align < 128)
+	return 128;
+    }
+  else if ((TREE_CODE (type) == RECORD_TYPE
+	    || TREE_CODE (type) == UNION_TYPE
+	    || TREE_CODE (type) == QUAL_UNION_TYPE)
+	   && TYPE_FIELDS (type))
+    {
+      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+	   || TREE_CODE (type) == INTEGER_TYPE)
+    {
+      if (TYPE_MODE (type) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+	return 128;
+    }
+
+  return align;
+}
+
+/* Compute the alignment for a local variable or a stack slot.  EXP is
+   the data type or decl itself, MODE is the widest mode available and
+   ALIGN is the alignment that the object would ordinarily have.  The
+   value of this macro is used instead of that alignment to align the
+   object.  */
+
+unsigned int
+ix86_local_alignment (tree exp, enum machine_mode mode,
+		      unsigned int align)
+{
+  tree type, decl;
+
+  if (exp && DECL_P (exp))
+    {
+      type = TREE_TYPE (exp);
+      decl = exp;
+    }
+  else
+    {
+      type = exp;
+      decl = NULL;
+    }
+
+  /* Don't do dynamic stack realignment for long long objects with
+     -mpreferred-stack-boundary=2.  */
+  if (!TARGET_64BIT
+      && align == 64
+      && ix86_preferred_stack_boundary < 64
+      && (mode == DImode || (type && TYPE_MODE (type) == DImode))
+      && (!type || !TYPE_USER_ALIGN (type))
+      && (!decl || !DECL_USER_ALIGN (decl)))
+    align = 32;
+
+  /* If TYPE is NULL, we are allocating a stack slot for caller-save
+     register in MODE.  We will return the largest alignment of XF
+     and DF.  */
+  if (!type)
+    {
+      if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
+	align = GET_MODE_ALIGNMENT (DFmode);
+      return align;
+    }
+
+  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+     to 16byte boundary.  Exact wording is:
+
+     An array uses the same alignment as its elements, except that a local or
+     global array variable of length at least 16 bytes or
+     a C99 variable-length array variable always has alignment of at least 16 bytes.
+
+     This was added to allow use of aligned SSE instructions at arrays.  This
+     rule is meant for static storage (where compiler can not do the analysis
+     by itself).  We follow it for automatic variables only when convenient.
+     We fully control everything in the function compiled and functions from
+     other unit can not rely on the alignment.
+
+     Exclude va_list type.  It is the common case of local array where
+     we can not benefit from the alignment.  
+
+     TODO: Probably one should optimize for size only when var is not escaping.  */
+  if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
+      && TARGET_SSE)
+    {
+      if (AGGREGATE_TYPE_P (type)
+	   && (va_list_type_node == NULL_TREE
+	       || (TYPE_MAIN_VARIANT (type)
+		   != TYPE_MAIN_VARIANT (va_list_type_node)))
+	   && TYPE_SIZE (type)
+	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
+	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+	return 128;
+    }
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      if (TYPE_MODE (type) == DCmode && align < 64)
+	return 64;
+      if ((TYPE_MODE (type) == XCmode
+	   || TYPE_MODE (type) == TCmode) && align < 128)
+	return 128;
+    }
+  else if ((TREE_CODE (type) == RECORD_TYPE
+	    || TREE_CODE (type) == UNION_TYPE
+	    || TREE_CODE (type) == QUAL_UNION_TYPE)
+	   && TYPE_FIELDS (type))
+    {
+      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+	   || TREE_CODE (type) == INTEGER_TYPE)
+    {
+
+      if (TYPE_MODE (type) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+	return 128;
+    }
+  return align;
+}
+
+/* Compute the minimum required alignment for dynamic stack realignment
+   purposes for a local variable, parameter or a stack slot.  EXP is
+   the data type or decl itself, MODE is its mode and ALIGN is the
+   alignment that the object would ordinarily have.  */
+
+unsigned int
+ix86_minimum_alignment (tree exp, enum machine_mode mode,
+			unsigned int align)
+{
+  tree type, decl;
+
+  if (exp && DECL_P (exp))
+    {
+      type = TREE_TYPE (exp);
+      decl = exp;
+    }
+  else
+    {
+      type = exp;
+      decl = NULL;
+    }
+
+  if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
+    return align;
+
+  /* Don't do dynamic stack realignment for long long objects with
+     -mpreferred-stack-boundary=2.  */
+  if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
+      && (!type || !TYPE_USER_ALIGN (type))
+      && (!decl || !DECL_USER_ALIGN (decl)))
+    return 32;
+
+  return align;
+}
+
+/* Find a location for the static chain incoming to a nested function.
+   This is a register, unless all free registers are used by arguments.  */
+
+static rtx
+ix86_static_chain (const_tree fndecl, bool incoming_p)
+{
+  unsigned regno;
+
+  if (!DECL_STATIC_CHAIN (fndecl))
+    return NULL;
+
+  if (TARGET_64BIT)
+    {
+      /* We always use R10 in 64-bit mode.  */
+      regno = R10_REG;
+    }
+  else
+    {
+      tree fntype;
+      unsigned int ccvt;
+
+      /* By default in 32-bit mode we use ECX to pass the static chain.  */
+      regno = CX_REG;
+
+      fntype = TREE_TYPE (fndecl);
+      ccvt = ix86_get_callcvt (fntype);
+      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+	{
+	  /* Fastcall functions use ecx/edx for arguments, which leaves
+	     us with EAX for the static chain.
+	     Thiscall functions use ecx for arguments, which also
+	     leaves us with EAX for the static chain.  */
+	  regno = AX_REG;
+	}
+      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+	{
+	  /* Thiscall functions use ecx for arguments, which leaves
+	     us with EAX and EDX for the static chain.
+	     We are using for abi-compatibility EAX.  */
+	  regno = AX_REG;
+	}
+      else if (ix86_function_regparm (fntype, fndecl) == 3)
+	{
+	  /* For regparm 3, we have no free call-clobbered registers in
+	     which to store the static chain.  In order to implement this,
+	     we have the trampoline push the static chain to the stack.
+	     However, we can't push a value below the return address when
+	     we call the nested function directly, so we have to use an
+	     alternate entry point.  For this we use ESI, and have the
+	     alternate entry point push ESI, so that things appear the
+	     same once we're executing the nested function.  */
+	  if (incoming_p)
+	    {
+	      if (fndecl == current_function_decl)
+		ix86_static_chain_on_stack = true;
+	      return gen_frame_mem (SImode,
+				    plus_constant (Pmode,
+						   arg_pointer_rtx, -8));
+	    }
+	  regno = SI_REG;
+	}
+    }
+
+  return gen_rtx_REG (Pmode, regno);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNDECL is the decl of the target address; M_TRAMP is a MEM for
+   the trampoline, and CHAIN_VALUE is an RTX for the static chain
+   to be passed to the target function.  */
+
+static void
+ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, fnaddr;
+  int opcode;
+  int offset = 0;
+
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  if (TARGET_64BIT)
+    {
+      int size;
+
+      /* Load the function address to r11.  Try to load address using
+	 the shorter movl instead of movabs.  We may want to support
+	 movq for kernel mode, but kernel does not use trampolines at
+	 the moment.  FNADDR is a 32bit address and may not be in
+	 DImode when ptr_mode == SImode.  Always use movl in this
+	 case.  */
+      if (ptr_mode == SImode
+	  || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
+	{
+	  fnaddr = copy_addr_to_reg (fnaddr);
+
+	  mem = adjust_address (m_tramp, HImode, offset);
+	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
+
+	  mem = adjust_address (m_tramp, SImode, offset + 2);
+	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
+	  offset += 6;
+	}
+      else
+	{
+	  mem = adjust_address (m_tramp, HImode, offset);
+	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
+
+	  mem = adjust_address (m_tramp, DImode, offset + 2);
+	  emit_move_insn (mem, fnaddr);
+	  offset += 10;
+	}
+
+      /* Load static chain using movabs to r10.  Use the shorter movl
+         instead of movabs when ptr_mode == SImode.  */
+      if (ptr_mode == SImode)
+	{
+	  opcode = 0xba41;
+	  size = 6;
+	}
+      else
+	{
+	  opcode = 0xba49;
+	  size = 10;
+	}
+
+      mem = adjust_address (m_tramp, HImode, offset);
+      emit_move_insn (mem, gen_int_mode (opcode, HImode));
+
+      mem = adjust_address (m_tramp, ptr_mode, offset + 2);
+      emit_move_insn (mem, chain_value);
+      offset += size;
+
+      /* Jump to r11; the last (unused) byte is a nop, only there to
+	 pad the write out to a single 32-bit store.  */
+      mem = adjust_address (m_tramp, SImode, offset);
+      emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
+      offset += 4;
+    }
+  else
+    {
+      rtx disp, chain;
+
+      /* Depending on the static chain location, either load a register
+	 with a constant, or push the constant to the stack.  All of the
+	 instructions are the same size.  */
+      chain = ix86_static_chain (fndecl, true);
+      if (REG_P (chain))
+	{
+	  switch (REGNO (chain))
+	    {
+	    case AX_REG:
+	      opcode = 0xb8; break;
+	    case CX_REG:
+	      opcode = 0xb9; break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	opcode = 0x68;
+
+      mem = adjust_address (m_tramp, QImode, offset);
+      emit_move_insn (mem, gen_int_mode (opcode, QImode));
+
+      mem = adjust_address (m_tramp, SImode, offset + 1);
+      emit_move_insn (mem, chain_value);
+      offset += 5;
+
+      mem = adjust_address (m_tramp, QImode, offset);
+      emit_move_insn (mem, gen_int_mode (0xe9, QImode));
+
+      mem = adjust_address (m_tramp, SImode, offset + 1);
+
+      /* Compute offset from the end of the jmp to the target function.
+	 In the case in which the trampoline stores the static chain on
+	 the stack, we need to skip the first insn which pushes the
+	 (call-saved) register static chain; this push is 1 byte.  */
+      offset += 5;
+      disp = expand_binop (SImode, sub_optab, fnaddr,
+			   plus_constant (Pmode, XEXP (m_tramp, 0),
+					  offset - (MEM_P (chain) ? 1 : 0)),
+			   NULL_RTX, 1, OPTAB_DIRECT);
+      emit_move_insn (mem, disp);
+    }
+
+  gcc_assert (offset <= TRAMPOLINE_SIZE);
+
+#ifdef HAVE_ENABLE_EXECUTE_STACK
+#ifdef CHECK_EXECUTE_STACK_ENABLED
+  if (CHECK_EXECUTE_STACK_ENABLED)
+#endif
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+}
+
+/* The following file contains several enumerations and data structures
+   built from the definitions in i386-builtin-types.def.  */
+
+#include "i386-builtin-types.inc"
+
+/* Table for the ix86 builtin non-function types.  */
+static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
+
+/* Retrieve an element from the above table, building some of
+   the types lazily.  */
+
+static tree
+ix86_get_builtin_type (enum ix86_builtin_type tcode)
+{
+  unsigned int index;
+  tree type, itype;
+
+  gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
+
+  type = ix86_builtin_type_tab[(int) tcode];
+  if (type != NULL)
+    return type;
+
+  gcc_assert (tcode > IX86_BT_LAST_PRIM);
+  if (tcode <= IX86_BT_LAST_VECT)
+    {
+      enum machine_mode mode;
+
+      index = tcode - IX86_BT_LAST_PRIM - 1;
+      itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
+      mode = ix86_builtin_type_vect_mode[index];
+
+      type = build_vector_type_for_mode (itype, mode);
+    }
+  else
+    {
+      int quals;
+
+      index = tcode - IX86_BT_LAST_VECT - 1;
+      if (tcode <= IX86_BT_LAST_PTR)
+	quals = TYPE_UNQUALIFIED;
+      else
+	quals = TYPE_QUAL_CONST;
+
+      itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
+      if (quals != TYPE_UNQUALIFIED)
+	itype = build_qualified_type (itype, quals);
+
+      type = build_pointer_type (itype);
+    }
+
+  ix86_builtin_type_tab[(int) tcode] = type;
+  return type;
+}
+
+/* Table for the ix86 builtin function types.  */
+static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
+
+/* Retrieve an element from the above table, building some of
+   the types lazily.  */
+
+static tree
+ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
+{
+  tree type;
+
+  gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
+
+  type = ix86_builtin_func_type_tab[(int) tcode];
+  if (type != NULL)
+    return type;
+
+  if (tcode <= IX86_BT_LAST_FUNC)
+    {
+      unsigned start = ix86_builtin_func_start[(int) tcode];
+      unsigned after = ix86_builtin_func_start[(int) tcode + 1];
+      tree rtype, atype, args = void_list_node;
+      unsigned i;
+
+      rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
+      for (i = after - 1; i > start; --i)
+	{
+	  atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
+	  args = tree_cons (NULL, atype, args);
+	}
+
+      type = build_function_type (rtype, args);
+    }
+  else
+    {
+      unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
+      enum ix86_builtin_func_type icode;
+
+      icode = ix86_builtin_func_alias_base[index];
+      type = ix86_get_builtin_func_type (icode);
+    }
+
+  ix86_builtin_func_type_tab[(int) tcode] = type;
+  return type;
+}
+
+
+/* Codes for all the SSE/MMX builtins.  */
+enum ix86_builtins
+{
+  IX86_BUILTIN_ADDPS,
+  IX86_BUILTIN_ADDSS,
+  IX86_BUILTIN_DIVPS,
+  IX86_BUILTIN_DIVSS,
+  IX86_BUILTIN_MULPS,
+  IX86_BUILTIN_MULSS,
+  IX86_BUILTIN_SUBPS,
+  IX86_BUILTIN_SUBSS,
+
+  IX86_BUILTIN_CMPEQPS,
+  IX86_BUILTIN_CMPLTPS,
+  IX86_BUILTIN_CMPLEPS,
+  IX86_BUILTIN_CMPGTPS,
+  IX86_BUILTIN_CMPGEPS,
+  IX86_BUILTIN_CMPNEQPS,
+  IX86_BUILTIN_CMPNLTPS,
+  IX86_BUILTIN_CMPNLEPS,
+  IX86_BUILTIN_CMPNGTPS,
+  IX86_BUILTIN_CMPNGEPS,
+  IX86_BUILTIN_CMPORDPS,
+  IX86_BUILTIN_CMPUNORDPS,
+  IX86_BUILTIN_CMPEQSS,
+  IX86_BUILTIN_CMPLTSS,
+  IX86_BUILTIN_CMPLESS,
+  IX86_BUILTIN_CMPNEQSS,
+  IX86_BUILTIN_CMPNLTSS,
+  IX86_BUILTIN_CMPNLESS,
+  IX86_BUILTIN_CMPORDSS,
+  IX86_BUILTIN_CMPUNORDSS,
+
+  IX86_BUILTIN_COMIEQSS,
+  IX86_BUILTIN_COMILTSS,
+  IX86_BUILTIN_COMILESS,
+  IX86_BUILTIN_COMIGTSS,
+  IX86_BUILTIN_COMIGESS,
+  IX86_BUILTIN_COMINEQSS,
+  IX86_BUILTIN_UCOMIEQSS,
+  IX86_BUILTIN_UCOMILTSS,
+  IX86_BUILTIN_UCOMILESS,
+  IX86_BUILTIN_UCOMIGTSS,
+  IX86_BUILTIN_UCOMIGESS,
+  IX86_BUILTIN_UCOMINEQSS,
+
+  IX86_BUILTIN_CVTPI2PS,
+  IX86_BUILTIN_CVTPS2PI,
+  IX86_BUILTIN_CVTSI2SS,
+  IX86_BUILTIN_CVTSI642SS,
+  IX86_BUILTIN_CVTSS2SI,
+  IX86_BUILTIN_CVTSS2SI64,
+  IX86_BUILTIN_CVTTPS2PI,
+  IX86_BUILTIN_CVTTSS2SI,
+  IX86_BUILTIN_CVTTSS2SI64,
+
+  IX86_BUILTIN_MAXPS,
+  IX86_BUILTIN_MAXSS,
+  IX86_BUILTIN_MINPS,
+  IX86_BUILTIN_MINSS,
+
+  IX86_BUILTIN_LOADUPS,
+  IX86_BUILTIN_STOREUPS,
+  IX86_BUILTIN_MOVSS,
+
+  IX86_BUILTIN_MOVHLPS,
+  IX86_BUILTIN_MOVLHPS,
+  IX86_BUILTIN_LOADHPS,
+  IX86_BUILTIN_LOADLPS,
+  IX86_BUILTIN_STOREHPS,
+  IX86_BUILTIN_STORELPS,
+
+  IX86_BUILTIN_MASKMOVQ,
+  IX86_BUILTIN_MOVMSKPS,
+  IX86_BUILTIN_PMOVMSKB,
+
+  IX86_BUILTIN_MOVNTPS,
+  IX86_BUILTIN_MOVNTQ,
+
+  IX86_BUILTIN_LOADDQU,
+  IX86_BUILTIN_STOREDQU,
+
+  IX86_BUILTIN_PACKSSWB,
+  IX86_BUILTIN_PACKSSDW,
+  IX86_BUILTIN_PACKUSWB,
+
+  IX86_BUILTIN_PADDB,
+  IX86_BUILTIN_PADDW,
+  IX86_BUILTIN_PADDD,
+  IX86_BUILTIN_PADDQ,
+  IX86_BUILTIN_PADDSB,
+  IX86_BUILTIN_PADDSW,
+  IX86_BUILTIN_PADDUSB,
+  IX86_BUILTIN_PADDUSW,
+  IX86_BUILTIN_PSUBB,
+  IX86_BUILTIN_PSUBW,
+  IX86_BUILTIN_PSUBD,
+  IX86_BUILTIN_PSUBQ,
+  IX86_BUILTIN_PSUBSB,
+  IX86_BUILTIN_PSUBSW,
+  IX86_BUILTIN_PSUBUSB,
+  IX86_BUILTIN_PSUBUSW,
+
+  IX86_BUILTIN_PAND,
+  IX86_BUILTIN_PANDN,
+  IX86_BUILTIN_POR,
+  IX86_BUILTIN_PXOR,
+
+  IX86_BUILTIN_PAVGB,
+  IX86_BUILTIN_PAVGW,
+
+  IX86_BUILTIN_PCMPEQB,
+  IX86_BUILTIN_PCMPEQW,
+  IX86_BUILTIN_PCMPEQD,
+  IX86_BUILTIN_PCMPGTB,
+  IX86_BUILTIN_PCMPGTW,
+  IX86_BUILTIN_PCMPGTD,
+
+  IX86_BUILTIN_PMADDWD,
+
+  IX86_BUILTIN_PMAXSW,
+  IX86_BUILTIN_PMAXUB,
+  IX86_BUILTIN_PMINSW,
+  IX86_BUILTIN_PMINUB,
+
+  IX86_BUILTIN_PMULHUW,
+  IX86_BUILTIN_PMULHW,
+  IX86_BUILTIN_PMULLW,
+
+  IX86_BUILTIN_PSADBW,
+  IX86_BUILTIN_PSHUFW,
+
+  IX86_BUILTIN_PSLLW,
+  IX86_BUILTIN_PSLLD,
+  IX86_BUILTIN_PSLLQ,
+  IX86_BUILTIN_PSRAW,
+  IX86_BUILTIN_PSRAD,
+  IX86_BUILTIN_PSRLW,
+  IX86_BUILTIN_PSRLD,
+  IX86_BUILTIN_PSRLQ,
+  IX86_BUILTIN_PSLLWI,
+  IX86_BUILTIN_PSLLDI,
+  IX86_BUILTIN_PSLLQI,
+  IX86_BUILTIN_PSRAWI,
+  IX86_BUILTIN_PSRADI,
+  IX86_BUILTIN_PSRLWI,
+  IX86_BUILTIN_PSRLDI,
+  IX86_BUILTIN_PSRLQI,
+
+  IX86_BUILTIN_PUNPCKHBW,
+  IX86_BUILTIN_PUNPCKHWD,
+  IX86_BUILTIN_PUNPCKHDQ,
+  IX86_BUILTIN_PUNPCKLBW,
+  IX86_BUILTIN_PUNPCKLWD,
+  IX86_BUILTIN_PUNPCKLDQ,
+
+  IX86_BUILTIN_SHUFPS,
+
+  IX86_BUILTIN_RCPPS,
+  IX86_BUILTIN_RCPSS,
+  IX86_BUILTIN_RSQRTPS,
+  IX86_BUILTIN_RSQRTPS_NR,
+  IX86_BUILTIN_RSQRTSS,
+  IX86_BUILTIN_RSQRTF,
+  IX86_BUILTIN_SQRTPS,
+  IX86_BUILTIN_SQRTPS_NR,
+  IX86_BUILTIN_SQRTSS,
+
+  IX86_BUILTIN_UNPCKHPS,
+  IX86_BUILTIN_UNPCKLPS,
+
+  IX86_BUILTIN_ANDPS,
+  IX86_BUILTIN_ANDNPS,
+  IX86_BUILTIN_ORPS,
+  IX86_BUILTIN_XORPS,
+
+  IX86_BUILTIN_EMMS,
+  IX86_BUILTIN_LDMXCSR,
+  IX86_BUILTIN_STMXCSR,
+  IX86_BUILTIN_SFENCE,
+
+  IX86_BUILTIN_FXSAVE,
+  IX86_BUILTIN_FXRSTOR,
+  IX86_BUILTIN_FXSAVE64,
+  IX86_BUILTIN_FXRSTOR64,
+
+  IX86_BUILTIN_XSAVE,
+  IX86_BUILTIN_XRSTOR,
+  IX86_BUILTIN_XSAVE64,
+  IX86_BUILTIN_XRSTOR64,
+
+  IX86_BUILTIN_XSAVEOPT,
+  IX86_BUILTIN_XSAVEOPT64,
+
+  /* 3DNow! Original */
+  IX86_BUILTIN_FEMMS,
+  IX86_BUILTIN_PAVGUSB,
+  IX86_BUILTIN_PF2ID,
+  IX86_BUILTIN_PFACC,
+  IX86_BUILTIN_PFADD,
+  IX86_BUILTIN_PFCMPEQ,
+  IX86_BUILTIN_PFCMPGE,
+  IX86_BUILTIN_PFCMPGT,
+  IX86_BUILTIN_PFMAX,
+  IX86_BUILTIN_PFMIN,
+  IX86_BUILTIN_PFMUL,
+  IX86_BUILTIN_PFRCP,
+  IX86_BUILTIN_PFRCPIT1,
+  IX86_BUILTIN_PFRCPIT2,
+  IX86_BUILTIN_PFRSQIT1,
+  IX86_BUILTIN_PFRSQRT,
+  IX86_BUILTIN_PFSUB,
+  IX86_BUILTIN_PFSUBR,
+  IX86_BUILTIN_PI2FD,
+  IX86_BUILTIN_PMULHRW,
+
+  /* 3DNow! Athlon Extensions */
+  IX86_BUILTIN_PF2IW,
+  IX86_BUILTIN_PFNACC,
+  IX86_BUILTIN_PFPNACC,
+  IX86_BUILTIN_PI2FW,
+  IX86_BUILTIN_PSWAPDSI,
+  IX86_BUILTIN_PSWAPDSF,
+
+  /* SSE2 */
+  IX86_BUILTIN_ADDPD,
+  IX86_BUILTIN_ADDSD,
+  IX86_BUILTIN_DIVPD,
+  IX86_BUILTIN_DIVSD,
+  IX86_BUILTIN_MULPD,
+  IX86_BUILTIN_MULSD,
+  IX86_BUILTIN_SUBPD,
+  IX86_BUILTIN_SUBSD,
+
+  IX86_BUILTIN_CMPEQPD,
+  IX86_BUILTIN_CMPLTPD,
+  IX86_BUILTIN_CMPLEPD,
+  IX86_BUILTIN_CMPGTPD,
+  IX86_BUILTIN_CMPGEPD,
+  IX86_BUILTIN_CMPNEQPD,
+  IX86_BUILTIN_CMPNLTPD,
+  IX86_BUILTIN_CMPNLEPD,
+  IX86_BUILTIN_CMPNGTPD,
+  IX86_BUILTIN_CMPNGEPD,
+  IX86_BUILTIN_CMPORDPD,
+  IX86_BUILTIN_CMPUNORDPD,
+  IX86_BUILTIN_CMPEQSD,
+  IX86_BUILTIN_CMPLTSD,
+  IX86_BUILTIN_CMPLESD,
+  IX86_BUILTIN_CMPNEQSD,
+  IX86_BUILTIN_CMPNLTSD,
+  IX86_BUILTIN_CMPNLESD,
+  IX86_BUILTIN_CMPORDSD,
+  IX86_BUILTIN_CMPUNORDSD,
+
+  IX86_BUILTIN_COMIEQSD,
+  IX86_BUILTIN_COMILTSD,
+  IX86_BUILTIN_COMILESD,
+  IX86_BUILTIN_COMIGTSD,
+  IX86_BUILTIN_COMIGESD,
+  IX86_BUILTIN_COMINEQSD,
+  IX86_BUILTIN_UCOMIEQSD,
+  IX86_BUILTIN_UCOMILTSD,
+  IX86_BUILTIN_UCOMILESD,
+  IX86_BUILTIN_UCOMIGTSD,
+  IX86_BUILTIN_UCOMIGESD,
+  IX86_BUILTIN_UCOMINEQSD,
+
+  IX86_BUILTIN_MAXPD,
+  IX86_BUILTIN_MAXSD,
+  IX86_BUILTIN_MINPD,
+  IX86_BUILTIN_MINSD,
+
+  IX86_BUILTIN_ANDPD,
+  IX86_BUILTIN_ANDNPD,
+  IX86_BUILTIN_ORPD,
+  IX86_BUILTIN_XORPD,
+
+  IX86_BUILTIN_SQRTPD,
+  IX86_BUILTIN_SQRTSD,
+
+  IX86_BUILTIN_UNPCKHPD,
+  IX86_BUILTIN_UNPCKLPD,
+
+  IX86_BUILTIN_SHUFPD,
+
+  IX86_BUILTIN_LOADUPD,
+  IX86_BUILTIN_STOREUPD,
+  IX86_BUILTIN_MOVSD,
+
+  IX86_BUILTIN_LOADHPD,
+  IX86_BUILTIN_LOADLPD,
+
+  IX86_BUILTIN_CVTDQ2PD,
+  IX86_BUILTIN_CVTDQ2PS,
+
+  IX86_BUILTIN_CVTPD2DQ,
+  IX86_BUILTIN_CVTPD2PI,
+  IX86_BUILTIN_CVTPD2PS,
+  IX86_BUILTIN_CVTTPD2DQ,
+  IX86_BUILTIN_CVTTPD2PI,
+
+  IX86_BUILTIN_CVTPI2PD,
+  IX86_BUILTIN_CVTSI2SD,
+  IX86_BUILTIN_CVTSI642SD,
+
+  IX86_BUILTIN_CVTSD2SI,
+  IX86_BUILTIN_CVTSD2SI64,
+  IX86_BUILTIN_CVTSD2SS,
+  IX86_BUILTIN_CVTSS2SD,
+  IX86_BUILTIN_CVTTSD2SI,
+  IX86_BUILTIN_CVTTSD2SI64,
+
+  IX86_BUILTIN_CVTPS2DQ,
+  IX86_BUILTIN_CVTPS2PD,
+  IX86_BUILTIN_CVTTPS2DQ,
+
+  IX86_BUILTIN_MOVNTI,
+  IX86_BUILTIN_MOVNTI64,
+  IX86_BUILTIN_MOVNTPD,
+  IX86_BUILTIN_MOVNTDQ,
+
+  IX86_BUILTIN_MOVQ128,
+
+  /* SSE2 MMX */
+  IX86_BUILTIN_MASKMOVDQU,
+  IX86_BUILTIN_MOVMSKPD,
+  IX86_BUILTIN_PMOVMSKB128,
+
+  IX86_BUILTIN_PACKSSWB128,
+  IX86_BUILTIN_PACKSSDW128,
+  IX86_BUILTIN_PACKUSWB128,
+
+  IX86_BUILTIN_PADDB128,
+  IX86_BUILTIN_PADDW128,
+  IX86_BUILTIN_PADDD128,
+  IX86_BUILTIN_PADDQ128,
+  IX86_BUILTIN_PADDSB128,
+  IX86_BUILTIN_PADDSW128,
+  IX86_BUILTIN_PADDUSB128,
+  IX86_BUILTIN_PADDUSW128,
+  IX86_BUILTIN_PSUBB128,
+  IX86_BUILTIN_PSUBW128,
+  IX86_BUILTIN_PSUBD128,
+  IX86_BUILTIN_PSUBQ128,
+  IX86_BUILTIN_PSUBSB128,
+  IX86_BUILTIN_PSUBSW128,
+  IX86_BUILTIN_PSUBUSB128,
+  IX86_BUILTIN_PSUBUSW128,
+
+  IX86_BUILTIN_PAND128,
+  IX86_BUILTIN_PANDN128,
+  IX86_BUILTIN_POR128,
+  IX86_BUILTIN_PXOR128,
+
+  IX86_BUILTIN_PAVGB128,
+  IX86_BUILTIN_PAVGW128,
+
+  IX86_BUILTIN_PCMPEQB128,
+  IX86_BUILTIN_PCMPEQW128,
+  IX86_BUILTIN_PCMPEQD128,
+  IX86_BUILTIN_PCMPGTB128,
+  IX86_BUILTIN_PCMPGTW128,
+  IX86_BUILTIN_PCMPGTD128,
+
+  IX86_BUILTIN_PMADDWD128,
+
+  IX86_BUILTIN_PMAXSW128,
+  IX86_BUILTIN_PMAXUB128,
+  IX86_BUILTIN_PMINSW128,
+  IX86_BUILTIN_PMINUB128,
+
+  IX86_BUILTIN_PMULUDQ,
+  IX86_BUILTIN_PMULUDQ128,
+  IX86_BUILTIN_PMULHUW128,
+  IX86_BUILTIN_PMULHW128,
+  IX86_BUILTIN_PMULLW128,
+
+  IX86_BUILTIN_PSADBW128,
+  IX86_BUILTIN_PSHUFHW,
+  IX86_BUILTIN_PSHUFLW,
+  IX86_BUILTIN_PSHUFD,
+
+  IX86_BUILTIN_PSLLDQI128,
+  IX86_BUILTIN_PSLLWI128,
+  IX86_BUILTIN_PSLLDI128,
+  IX86_BUILTIN_PSLLQI128,
+  IX86_BUILTIN_PSRAWI128,
+  IX86_BUILTIN_PSRADI128,
+  IX86_BUILTIN_PSRLDQI128,
+  IX86_BUILTIN_PSRLWI128,
+  IX86_BUILTIN_PSRLDI128,
+  IX86_BUILTIN_PSRLQI128,
+
+  IX86_BUILTIN_PSLLDQ128,
+  IX86_BUILTIN_PSLLW128,
+  IX86_BUILTIN_PSLLD128,
+  IX86_BUILTIN_PSLLQ128,
+  IX86_BUILTIN_PSRAW128,
+  IX86_BUILTIN_PSRAD128,
+  IX86_BUILTIN_PSRLW128,
+  IX86_BUILTIN_PSRLD128,
+  IX86_BUILTIN_PSRLQ128,
+
+  IX86_BUILTIN_PUNPCKHBW128,
+  IX86_BUILTIN_PUNPCKHWD128,
+  IX86_BUILTIN_PUNPCKHDQ128,
+  IX86_BUILTIN_PUNPCKHQDQ128,
+  IX86_BUILTIN_PUNPCKLBW128,
+  IX86_BUILTIN_PUNPCKLWD128,
+  IX86_BUILTIN_PUNPCKLDQ128,
+  IX86_BUILTIN_PUNPCKLQDQ128,
+
+  IX86_BUILTIN_CLFLUSH,
+  IX86_BUILTIN_MFENCE,
+  IX86_BUILTIN_LFENCE,
+  IX86_BUILTIN_PAUSE,
+
+  IX86_BUILTIN_FNSTENV,
+  IX86_BUILTIN_FLDENV,
+  IX86_BUILTIN_FNSTSW,
+  IX86_BUILTIN_FNCLEX,
+
+  IX86_BUILTIN_BSRSI,
+  IX86_BUILTIN_BSRDI,
+  IX86_BUILTIN_RDPMC,
+  IX86_BUILTIN_RDTSC,
+  IX86_BUILTIN_RDTSCP,
+  IX86_BUILTIN_ROLQI,
+  IX86_BUILTIN_ROLHI,
+  IX86_BUILTIN_RORQI,
+  IX86_BUILTIN_RORHI,
+
+  /* SSE3.  */
+  IX86_BUILTIN_ADDSUBPS,
+  IX86_BUILTIN_HADDPS,
+  IX86_BUILTIN_HSUBPS,
+  IX86_BUILTIN_MOVSHDUP,
+  IX86_BUILTIN_MOVSLDUP,
+  IX86_BUILTIN_ADDSUBPD,
+  IX86_BUILTIN_HADDPD,
+  IX86_BUILTIN_HSUBPD,
+  IX86_BUILTIN_LDDQU,
+
+  IX86_BUILTIN_MONITOR,
+  IX86_BUILTIN_MWAIT,
+
+  /* SSSE3.  */
+  IX86_BUILTIN_PHADDW,
+  IX86_BUILTIN_PHADDD,
+  IX86_BUILTIN_PHADDSW,
+  IX86_BUILTIN_PHSUBW,
+  IX86_BUILTIN_PHSUBD,
+  IX86_BUILTIN_PHSUBSW,
+  IX86_BUILTIN_PMADDUBSW,
+  IX86_BUILTIN_PMULHRSW,
+  IX86_BUILTIN_PSHUFB,
+  IX86_BUILTIN_PSIGNB,
+  IX86_BUILTIN_PSIGNW,
+  IX86_BUILTIN_PSIGND,
+  IX86_BUILTIN_PALIGNR,
+  IX86_BUILTIN_PABSB,
+  IX86_BUILTIN_PABSW,
+  IX86_BUILTIN_PABSD,
+
+  IX86_BUILTIN_PHADDW128,
+  IX86_BUILTIN_PHADDD128,
+  IX86_BUILTIN_PHADDSW128,
+  IX86_BUILTIN_PHSUBW128,
+  IX86_BUILTIN_PHSUBD128,
+  IX86_BUILTIN_PHSUBSW128,
+  IX86_BUILTIN_PMADDUBSW128,
+  IX86_BUILTIN_PMULHRSW128,
+  IX86_BUILTIN_PSHUFB128,
+  IX86_BUILTIN_PSIGNB128,
+  IX86_BUILTIN_PSIGNW128,
+  IX86_BUILTIN_PSIGND128,
+  IX86_BUILTIN_PALIGNR128,
+  IX86_BUILTIN_PABSB128,
+  IX86_BUILTIN_PABSW128,
+  IX86_BUILTIN_PABSD128,
+
+  /* AMDFAM10 - SSE4A New Instructions.  */
+  IX86_BUILTIN_MOVNTSD,
+  IX86_BUILTIN_MOVNTSS,
+  IX86_BUILTIN_EXTRQI,
+  IX86_BUILTIN_EXTRQ,
+  IX86_BUILTIN_INSERTQI,
+  IX86_BUILTIN_INSERTQ,
+
+  /* SSE4.1.  */
+  IX86_BUILTIN_BLENDPD,
+  IX86_BUILTIN_BLENDPS,
+  IX86_BUILTIN_BLENDVPD,
+  IX86_BUILTIN_BLENDVPS,
+  IX86_BUILTIN_PBLENDVB128,
+  IX86_BUILTIN_PBLENDW128,
+
+  IX86_BUILTIN_DPPD,
+  IX86_BUILTIN_DPPS,
+
+  IX86_BUILTIN_INSERTPS128,
+
+  IX86_BUILTIN_MOVNTDQA,
+  IX86_BUILTIN_MPSADBW128,
+  IX86_BUILTIN_PACKUSDW128,
+  IX86_BUILTIN_PCMPEQQ,
+  IX86_BUILTIN_PHMINPOSUW128,
+
+  IX86_BUILTIN_PMAXSB128,
+  IX86_BUILTIN_PMAXSD128,
+  IX86_BUILTIN_PMAXUD128,
+  IX86_BUILTIN_PMAXUW128,
+
+  IX86_BUILTIN_PMINSB128,
+  IX86_BUILTIN_PMINSD128,
+  IX86_BUILTIN_PMINUD128,
+  IX86_BUILTIN_PMINUW128,
+
+  IX86_BUILTIN_PMOVSXBW128,
+  IX86_BUILTIN_PMOVSXBD128,
+  IX86_BUILTIN_PMOVSXBQ128,
+  IX86_BUILTIN_PMOVSXWD128,
+  IX86_BUILTIN_PMOVSXWQ128,
+  IX86_BUILTIN_PMOVSXDQ128,
+
+  IX86_BUILTIN_PMOVZXBW128,
+  IX86_BUILTIN_PMOVZXBD128,
+  IX86_BUILTIN_PMOVZXBQ128,
+  IX86_BUILTIN_PMOVZXWD128,
+  IX86_BUILTIN_PMOVZXWQ128,
+  IX86_BUILTIN_PMOVZXDQ128,
+
+  IX86_BUILTIN_PMULDQ128,
+  IX86_BUILTIN_PMULLD128,
+
+  IX86_BUILTIN_ROUNDSD,
+  IX86_BUILTIN_ROUNDSS,
+
+  IX86_BUILTIN_ROUNDPD,
+  IX86_BUILTIN_ROUNDPS,
+
+  IX86_BUILTIN_FLOORPD,
+  IX86_BUILTIN_CEILPD,
+  IX86_BUILTIN_TRUNCPD,
+  IX86_BUILTIN_RINTPD,
+  IX86_BUILTIN_ROUNDPD_AZ,
+
+  IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
+  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
+  IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
+
+  IX86_BUILTIN_FLOORPS,
+  IX86_BUILTIN_CEILPS,
+  IX86_BUILTIN_TRUNCPS,
+  IX86_BUILTIN_RINTPS,
+  IX86_BUILTIN_ROUNDPS_AZ,
+
+  IX86_BUILTIN_FLOORPS_SFIX,
+  IX86_BUILTIN_CEILPS_SFIX,
+  IX86_BUILTIN_ROUNDPS_AZ_SFIX,
+
+  IX86_BUILTIN_PTESTZ,
+  IX86_BUILTIN_PTESTC,
+  IX86_BUILTIN_PTESTNZC,
+
+  IX86_BUILTIN_VEC_INIT_V2SI,
+  IX86_BUILTIN_VEC_INIT_V4HI,
+  IX86_BUILTIN_VEC_INIT_V8QI,
+  IX86_BUILTIN_VEC_EXT_V2DF,
+  IX86_BUILTIN_VEC_EXT_V2DI,
+  IX86_BUILTIN_VEC_EXT_V4SF,
+  IX86_BUILTIN_VEC_EXT_V4SI,
+  IX86_BUILTIN_VEC_EXT_V8HI,
+  IX86_BUILTIN_VEC_EXT_V2SI,
+  IX86_BUILTIN_VEC_EXT_V4HI,
+  IX86_BUILTIN_VEC_EXT_V16QI,
+  IX86_BUILTIN_VEC_SET_V2DI,
+  IX86_BUILTIN_VEC_SET_V4SF,
+  IX86_BUILTIN_VEC_SET_V4SI,
+  IX86_BUILTIN_VEC_SET_V8HI,
+  IX86_BUILTIN_VEC_SET_V4HI,
+  IX86_BUILTIN_VEC_SET_V16QI,
+
+  IX86_BUILTIN_VEC_PACK_SFIX,
+  IX86_BUILTIN_VEC_PACK_SFIX256,
+
+  /* SSE4.2.  */
+  IX86_BUILTIN_CRC32QI,
+  IX86_BUILTIN_CRC32HI,
+  IX86_BUILTIN_CRC32SI,
+  IX86_BUILTIN_CRC32DI,
+
+  IX86_BUILTIN_PCMPESTRI128,
+  IX86_BUILTIN_PCMPESTRM128,
+  IX86_BUILTIN_PCMPESTRA128,
+  IX86_BUILTIN_PCMPESTRC128,
+  IX86_BUILTIN_PCMPESTRO128,
+  IX86_BUILTIN_PCMPESTRS128,
+  IX86_BUILTIN_PCMPESTRZ128,
+  IX86_BUILTIN_PCMPISTRI128,
+  IX86_BUILTIN_PCMPISTRM128,
+  IX86_BUILTIN_PCMPISTRA128,
+  IX86_BUILTIN_PCMPISTRC128,
+  IX86_BUILTIN_PCMPISTRO128,
+  IX86_BUILTIN_PCMPISTRS128,
+  IX86_BUILTIN_PCMPISTRZ128,
+
+  IX86_BUILTIN_PCMPGTQ,
+
+  /* AES instructions */
+  IX86_BUILTIN_AESENC128,
+  IX86_BUILTIN_AESENCLAST128,
+  IX86_BUILTIN_AESDEC128,
+  IX86_BUILTIN_AESDECLAST128,
+  IX86_BUILTIN_AESIMC128,
+  IX86_BUILTIN_AESKEYGENASSIST128,
+
+  /* PCLMUL instruction */
+  IX86_BUILTIN_PCLMULQDQ128,
+
+  /* AVX */
+  IX86_BUILTIN_ADDPD256,
+  IX86_BUILTIN_ADDPS256,
+  IX86_BUILTIN_ADDSUBPD256,
+  IX86_BUILTIN_ADDSUBPS256,
+  IX86_BUILTIN_ANDPD256,
+  IX86_BUILTIN_ANDPS256,
+  IX86_BUILTIN_ANDNPD256,
+  IX86_BUILTIN_ANDNPS256,
+  IX86_BUILTIN_BLENDPD256,
+  IX86_BUILTIN_BLENDPS256,
+  IX86_BUILTIN_BLENDVPD256,
+  IX86_BUILTIN_BLENDVPS256,
+  IX86_BUILTIN_DIVPD256,
+  IX86_BUILTIN_DIVPS256,
+  IX86_BUILTIN_DPPS256,
+  IX86_BUILTIN_HADDPD256,
+  IX86_BUILTIN_HADDPS256,
+  IX86_BUILTIN_HSUBPD256,
+  IX86_BUILTIN_HSUBPS256,
+  IX86_BUILTIN_MAXPD256,
+  IX86_BUILTIN_MAXPS256,
+  IX86_BUILTIN_MINPD256,
+  IX86_BUILTIN_MINPS256,
+  IX86_BUILTIN_MULPD256,
+  IX86_BUILTIN_MULPS256,
+  IX86_BUILTIN_ORPD256,
+  IX86_BUILTIN_ORPS256,
+  IX86_BUILTIN_SHUFPD256,
+  IX86_BUILTIN_SHUFPS256,
+  IX86_BUILTIN_SUBPD256,
+  IX86_BUILTIN_SUBPS256,
+  IX86_BUILTIN_XORPD256,
+  IX86_BUILTIN_XORPS256,
+  IX86_BUILTIN_CMPSD,
+  IX86_BUILTIN_CMPSS,
+  IX86_BUILTIN_CMPPD,
+  IX86_BUILTIN_CMPPS,
+  IX86_BUILTIN_CMPPD256,
+  IX86_BUILTIN_CMPPS256,
+  IX86_BUILTIN_CVTDQ2PD256,
+  IX86_BUILTIN_CVTDQ2PS256,
+  IX86_BUILTIN_CVTPD2PS256,
+  IX86_BUILTIN_CVTPS2DQ256,
+  IX86_BUILTIN_CVTPS2PD256,
+  IX86_BUILTIN_CVTTPD2DQ256,
+  IX86_BUILTIN_CVTPD2DQ256,
+  IX86_BUILTIN_CVTTPS2DQ256,
+  IX86_BUILTIN_EXTRACTF128PD256,
+  IX86_BUILTIN_EXTRACTF128PS256,
+  IX86_BUILTIN_EXTRACTF128SI256,
+  IX86_BUILTIN_VZEROALL,
+  IX86_BUILTIN_VZEROUPPER,
+  IX86_BUILTIN_VPERMILVARPD,
+  IX86_BUILTIN_VPERMILVARPS,
+  IX86_BUILTIN_VPERMILVARPD256,
+  IX86_BUILTIN_VPERMILVARPS256,
+  IX86_BUILTIN_VPERMILPD,
+  IX86_BUILTIN_VPERMILPS,
+  IX86_BUILTIN_VPERMILPD256,
+  IX86_BUILTIN_VPERMILPS256,
+  IX86_BUILTIN_VPERMIL2PD,
+  IX86_BUILTIN_VPERMIL2PS,
+  IX86_BUILTIN_VPERMIL2PD256,
+  IX86_BUILTIN_VPERMIL2PS256,
+  IX86_BUILTIN_VPERM2F128PD256,
+  IX86_BUILTIN_VPERM2F128PS256,
+  IX86_BUILTIN_VPERM2F128SI256,
+  IX86_BUILTIN_VBROADCASTSS,
+  IX86_BUILTIN_VBROADCASTSD256,
+  IX86_BUILTIN_VBROADCASTSS256,
+  IX86_BUILTIN_VBROADCASTPD256,
+  IX86_BUILTIN_VBROADCASTPS256,
+  IX86_BUILTIN_VINSERTF128PD256,
+  IX86_BUILTIN_VINSERTF128PS256,
+  IX86_BUILTIN_VINSERTF128SI256,
+  IX86_BUILTIN_LOADUPD256,
+  IX86_BUILTIN_LOADUPS256,
+  IX86_BUILTIN_STOREUPD256,
+  IX86_BUILTIN_STOREUPS256,
+  IX86_BUILTIN_LDDQU256,
+  IX86_BUILTIN_MOVNTDQ256,
+  IX86_BUILTIN_MOVNTPD256,
+  IX86_BUILTIN_MOVNTPS256,
+  IX86_BUILTIN_LOADDQU256,
+  IX86_BUILTIN_STOREDQU256,
+  IX86_BUILTIN_MASKLOADPD,
+  IX86_BUILTIN_MASKLOADPS,
+  IX86_BUILTIN_MASKSTOREPD,
+  IX86_BUILTIN_MASKSTOREPS,
+  IX86_BUILTIN_MASKLOADPD256,
+  IX86_BUILTIN_MASKLOADPS256,
+  IX86_BUILTIN_MASKSTOREPD256,
+  IX86_BUILTIN_MASKSTOREPS256,
+  IX86_BUILTIN_MOVSHDUP256,
+  IX86_BUILTIN_MOVSLDUP256,
+  IX86_BUILTIN_MOVDDUP256,
+
+  IX86_BUILTIN_SQRTPD256,
+  IX86_BUILTIN_SQRTPS256,
+  IX86_BUILTIN_SQRTPS_NR256,
+  IX86_BUILTIN_RSQRTPS256,
+  IX86_BUILTIN_RSQRTPS_NR256,
+
+  IX86_BUILTIN_RCPPS256,
+
+  IX86_BUILTIN_ROUNDPD256,
+  IX86_BUILTIN_ROUNDPS256,
+
+  IX86_BUILTIN_FLOORPD256,
+  IX86_BUILTIN_CEILPD256,
+  IX86_BUILTIN_TRUNCPD256,
+  IX86_BUILTIN_RINTPD256,
+  IX86_BUILTIN_ROUNDPD_AZ256,
+
+  IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
+  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
+  IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
+
+  IX86_BUILTIN_FLOORPS256,
+  IX86_BUILTIN_CEILPS256,
+  IX86_BUILTIN_TRUNCPS256,
+  IX86_BUILTIN_RINTPS256,
+  IX86_BUILTIN_ROUNDPS_AZ256,
+
+  IX86_BUILTIN_FLOORPS_SFIX256,
+  IX86_BUILTIN_CEILPS_SFIX256,
+  IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
+
+  IX86_BUILTIN_UNPCKHPD256,
+  IX86_BUILTIN_UNPCKLPD256,
+  IX86_BUILTIN_UNPCKHPS256,
+  IX86_BUILTIN_UNPCKLPS256,
+
+  IX86_BUILTIN_SI256_SI,
+  IX86_BUILTIN_PS256_PS,
+  IX86_BUILTIN_PD256_PD,
+  IX86_BUILTIN_SI_SI256,
+  IX86_BUILTIN_PS_PS256,
+  IX86_BUILTIN_PD_PD256,
+
+  IX86_BUILTIN_VTESTZPD,
+  IX86_BUILTIN_VTESTCPD,
+  IX86_BUILTIN_VTESTNZCPD,
+  IX86_BUILTIN_VTESTZPS,
+  IX86_BUILTIN_VTESTCPS,
+  IX86_BUILTIN_VTESTNZCPS,
+  IX86_BUILTIN_VTESTZPD256,
+  IX86_BUILTIN_VTESTCPD256,
+  IX86_BUILTIN_VTESTNZCPD256,
+  IX86_BUILTIN_VTESTZPS256,
+  IX86_BUILTIN_VTESTCPS256,
+  IX86_BUILTIN_VTESTNZCPS256,
+  IX86_BUILTIN_PTESTZ256,
+  IX86_BUILTIN_PTESTC256,
+  IX86_BUILTIN_PTESTNZC256,
+
+  IX86_BUILTIN_MOVMSKPD256,
+  IX86_BUILTIN_MOVMSKPS256,
+
+  /* AVX2 */
+  IX86_BUILTIN_MPSADBW256,
+  IX86_BUILTIN_PABSB256,
+  IX86_BUILTIN_PABSW256,
+  IX86_BUILTIN_PABSD256,
+  IX86_BUILTIN_PACKSSDW256,
+  IX86_BUILTIN_PACKSSWB256,
+  IX86_BUILTIN_PACKUSDW256,
+  IX86_BUILTIN_PACKUSWB256,
+  IX86_BUILTIN_PADDB256,
+  IX86_BUILTIN_PADDW256,
+  IX86_BUILTIN_PADDD256,
+  IX86_BUILTIN_PADDQ256,
+  IX86_BUILTIN_PADDSB256,
+  IX86_BUILTIN_PADDSW256,
+  IX86_BUILTIN_PADDUSB256,
+  IX86_BUILTIN_PADDUSW256,
+  IX86_BUILTIN_PALIGNR256,
+  IX86_BUILTIN_AND256I,
+  IX86_BUILTIN_ANDNOT256I,
+  IX86_BUILTIN_PAVGB256,
+  IX86_BUILTIN_PAVGW256,
+  IX86_BUILTIN_PBLENDVB256,
+  IX86_BUILTIN_PBLENDVW256,
+  IX86_BUILTIN_PCMPEQB256,
+  IX86_BUILTIN_PCMPEQW256,
+  IX86_BUILTIN_PCMPEQD256,
+  IX86_BUILTIN_PCMPEQQ256,
+  IX86_BUILTIN_PCMPGTB256,
+  IX86_BUILTIN_PCMPGTW256,
+  IX86_BUILTIN_PCMPGTD256,
+  IX86_BUILTIN_PCMPGTQ256,
+  IX86_BUILTIN_PHADDW256,
+  IX86_BUILTIN_PHADDD256,
+  IX86_BUILTIN_PHADDSW256,
+  IX86_BUILTIN_PHSUBW256,
+  IX86_BUILTIN_PHSUBD256,
+  IX86_BUILTIN_PHSUBSW256,
+  IX86_BUILTIN_PMADDUBSW256,
+  IX86_BUILTIN_PMADDWD256,
+  IX86_BUILTIN_PMAXSB256,
+  IX86_BUILTIN_PMAXSW256,
+  IX86_BUILTIN_PMAXSD256,
+  IX86_BUILTIN_PMAXUB256,
+  IX86_BUILTIN_PMAXUW256,
+  IX86_BUILTIN_PMAXUD256,
+  IX86_BUILTIN_PMINSB256,
+  IX86_BUILTIN_PMINSW256,
+  IX86_BUILTIN_PMINSD256,
+  IX86_BUILTIN_PMINUB256,
+  IX86_BUILTIN_PMINUW256,
+  IX86_BUILTIN_PMINUD256,
+  IX86_BUILTIN_PMOVMSKB256,
+  IX86_BUILTIN_PMOVSXBW256,
+  IX86_BUILTIN_PMOVSXBD256,
+  IX86_BUILTIN_PMOVSXBQ256,
+  IX86_BUILTIN_PMOVSXWD256,
+  IX86_BUILTIN_PMOVSXWQ256,
+  IX86_BUILTIN_PMOVSXDQ256,
+  IX86_BUILTIN_PMOVZXBW256,
+  IX86_BUILTIN_PMOVZXBD256,
+  IX86_BUILTIN_PMOVZXBQ256,
+  IX86_BUILTIN_PMOVZXWD256,
+  IX86_BUILTIN_PMOVZXWQ256,
+  IX86_BUILTIN_PMOVZXDQ256,
+  IX86_BUILTIN_PMULDQ256,
+  IX86_BUILTIN_PMULHRSW256,
+  IX86_BUILTIN_PMULHUW256,
+  IX86_BUILTIN_PMULHW256,
+  IX86_BUILTIN_PMULLW256,
+  IX86_BUILTIN_PMULLD256,
+  IX86_BUILTIN_PMULUDQ256,
+  IX86_BUILTIN_POR256,
+  IX86_BUILTIN_PSADBW256,
+  IX86_BUILTIN_PSHUFB256,
+  IX86_BUILTIN_PSHUFD256,
+  IX86_BUILTIN_PSHUFHW256,
+  IX86_BUILTIN_PSHUFLW256,
+  IX86_BUILTIN_PSIGNB256,
+  IX86_BUILTIN_PSIGNW256,
+  IX86_BUILTIN_PSIGND256,
+  IX86_BUILTIN_PSLLDQI256,
+  IX86_BUILTIN_PSLLWI256,
+  IX86_BUILTIN_PSLLW256,
+  IX86_BUILTIN_PSLLDI256,
+  IX86_BUILTIN_PSLLD256,
+  IX86_BUILTIN_PSLLQI256,
+  IX86_BUILTIN_PSLLQ256,
+  IX86_BUILTIN_PSRAWI256,
+  IX86_BUILTIN_PSRAW256,
+  IX86_BUILTIN_PSRADI256,
+  IX86_BUILTIN_PSRAD256,
+  IX86_BUILTIN_PSRLDQI256,
+  IX86_BUILTIN_PSRLWI256,
+  IX86_BUILTIN_PSRLW256,
+  IX86_BUILTIN_PSRLDI256,
+  IX86_BUILTIN_PSRLD256,
+  IX86_BUILTIN_PSRLQI256,
+  IX86_BUILTIN_PSRLQ256,
+  IX86_BUILTIN_PSUBB256,
+  IX86_BUILTIN_PSUBW256,
+  IX86_BUILTIN_PSUBD256,
+  IX86_BUILTIN_PSUBQ256,
+  IX86_BUILTIN_PSUBSB256,
+  IX86_BUILTIN_PSUBSW256,
+  IX86_BUILTIN_PSUBUSB256,
+  IX86_BUILTIN_PSUBUSW256,
+  IX86_BUILTIN_PUNPCKHBW256,
+  IX86_BUILTIN_PUNPCKHWD256,
+  IX86_BUILTIN_PUNPCKHDQ256,
+  IX86_BUILTIN_PUNPCKHQDQ256,
+  IX86_BUILTIN_PUNPCKLBW256,
+  IX86_BUILTIN_PUNPCKLWD256,
+  IX86_BUILTIN_PUNPCKLDQ256,
+  IX86_BUILTIN_PUNPCKLQDQ256,
+  IX86_BUILTIN_PXOR256,
+  IX86_BUILTIN_MOVNTDQA256,
+  IX86_BUILTIN_VBROADCASTSS_PS,
+  IX86_BUILTIN_VBROADCASTSS_PS256,
+  IX86_BUILTIN_VBROADCASTSD_PD256,
+  IX86_BUILTIN_VBROADCASTSI256,
+  IX86_BUILTIN_PBLENDD256,
+  IX86_BUILTIN_PBLENDD128,
+  IX86_BUILTIN_PBROADCASTB256,
+  IX86_BUILTIN_PBROADCASTW256,
+  IX86_BUILTIN_PBROADCASTD256,
+  IX86_BUILTIN_PBROADCASTQ256,
+  IX86_BUILTIN_PBROADCASTB128,
+  IX86_BUILTIN_PBROADCASTW128,
+  IX86_BUILTIN_PBROADCASTD128,
+  IX86_BUILTIN_PBROADCASTQ128,
+  IX86_BUILTIN_VPERMVARSI256,
+  IX86_BUILTIN_VPERMDF256,
+  IX86_BUILTIN_VPERMVARSF256,
+  IX86_BUILTIN_VPERMDI256,
+  IX86_BUILTIN_VPERMTI256,
+  IX86_BUILTIN_VEXTRACT128I256,
+  IX86_BUILTIN_VINSERT128I256,
+  IX86_BUILTIN_MASKLOADD,
+  IX86_BUILTIN_MASKLOADQ,
+  IX86_BUILTIN_MASKLOADD256,
+  IX86_BUILTIN_MASKLOADQ256,
+  IX86_BUILTIN_MASKSTORED,
+  IX86_BUILTIN_MASKSTOREQ,
+  IX86_BUILTIN_MASKSTORED256,
+  IX86_BUILTIN_MASKSTOREQ256,
+  IX86_BUILTIN_PSLLVV4DI,
+  IX86_BUILTIN_PSLLVV2DI,
+  IX86_BUILTIN_PSLLVV8SI,
+  IX86_BUILTIN_PSLLVV4SI,
+  IX86_BUILTIN_PSRAVV8SI,
+  IX86_BUILTIN_PSRAVV4SI,
+  IX86_BUILTIN_PSRLVV4DI,
+  IX86_BUILTIN_PSRLVV2DI,
+  IX86_BUILTIN_PSRLVV8SI,
+  IX86_BUILTIN_PSRLVV4SI,
+
+  IX86_BUILTIN_GATHERSIV2DF,
+  IX86_BUILTIN_GATHERSIV4DF,
+  IX86_BUILTIN_GATHERDIV2DF,
+  IX86_BUILTIN_GATHERDIV4DF,
+  IX86_BUILTIN_GATHERSIV4SF,
+  IX86_BUILTIN_GATHERSIV8SF,
+  IX86_BUILTIN_GATHERDIV4SF,
+  IX86_BUILTIN_GATHERDIV8SF,
+  IX86_BUILTIN_GATHERSIV2DI,
+  IX86_BUILTIN_GATHERSIV4DI,
+  IX86_BUILTIN_GATHERDIV2DI,
+  IX86_BUILTIN_GATHERDIV4DI,
+  IX86_BUILTIN_GATHERSIV4SI,
+  IX86_BUILTIN_GATHERSIV8SI,
+  IX86_BUILTIN_GATHERDIV4SI,
+  IX86_BUILTIN_GATHERDIV8SI,
+
+  /* AVX512F */
+  IX86_BUILTIN_ADDPD512,
+  IX86_BUILTIN_ADDPS512,
+  IX86_BUILTIN_ADDSD_ROUND,
+  IX86_BUILTIN_ADDSS_ROUND,
+  IX86_BUILTIN_ALIGND512,
+  IX86_BUILTIN_ALIGNQ512,
+  IX86_BUILTIN_BLENDMD512,
+  IX86_BUILTIN_BLENDMPD512,
+  IX86_BUILTIN_BLENDMPS512,
+  IX86_BUILTIN_BLENDMQ512,
+  IX86_BUILTIN_BROADCASTF32X4_512,
+  IX86_BUILTIN_BROADCASTF64X4_512,
+  IX86_BUILTIN_BROADCASTI32X4_512,
+  IX86_BUILTIN_BROADCASTI64X4_512,
+  IX86_BUILTIN_BROADCASTSD512,
+  IX86_BUILTIN_BROADCASTSS512,
+  IX86_BUILTIN_CMPD512,
+  IX86_BUILTIN_CMPPD512,
+  IX86_BUILTIN_CMPPS512,
+  IX86_BUILTIN_CMPQ512,
+  IX86_BUILTIN_CMPSD_MASK,
+  IX86_BUILTIN_CMPSS_MASK,
+  IX86_BUILTIN_COMIDF,
+  IX86_BUILTIN_COMISF,
+  IX86_BUILTIN_COMPRESSPD512,
+  IX86_BUILTIN_COMPRESSPDSTORE512,
+  IX86_BUILTIN_COMPRESSPS512,
+  IX86_BUILTIN_COMPRESSPSSTORE512,
+  IX86_BUILTIN_CVTDQ2PD512,
+  IX86_BUILTIN_CVTDQ2PS512,
+  IX86_BUILTIN_CVTPD2DQ512,
+  IX86_BUILTIN_CVTPD2PS512,
+  IX86_BUILTIN_CVTPD2UDQ512,
+  IX86_BUILTIN_CVTPH2PS512,
+  IX86_BUILTIN_CVTPS2DQ512,
+  IX86_BUILTIN_CVTPS2PD512,
+  IX86_BUILTIN_CVTPS2PH512,
+  IX86_BUILTIN_CVTPS2UDQ512,
+  IX86_BUILTIN_CVTSD2SS_ROUND,
+  IX86_BUILTIN_CVTSI2SD64,
+  IX86_BUILTIN_CVTSI2SS32,
+  IX86_BUILTIN_CVTSI2SS64,
+  IX86_BUILTIN_CVTSS2SD_ROUND,
+  IX86_BUILTIN_CVTTPD2DQ512,
+  IX86_BUILTIN_CVTTPD2UDQ512,
+  IX86_BUILTIN_CVTTPS2DQ512,
+  IX86_BUILTIN_CVTTPS2UDQ512,
+  IX86_BUILTIN_CVTUDQ2PD512,
+  IX86_BUILTIN_CVTUDQ2PS512,
+  IX86_BUILTIN_CVTUSI2SD32,
+  IX86_BUILTIN_CVTUSI2SD64,
+  IX86_BUILTIN_CVTUSI2SS32,
+  IX86_BUILTIN_CVTUSI2SS64,
+  IX86_BUILTIN_DIVPD512,
+  IX86_BUILTIN_DIVPS512,
+  IX86_BUILTIN_DIVSD_ROUND,
+  IX86_BUILTIN_DIVSS_ROUND,
+  IX86_BUILTIN_EXPANDPD512,
+  IX86_BUILTIN_EXPANDPD512Z,
+  IX86_BUILTIN_EXPANDPDLOAD512,
+  IX86_BUILTIN_EXPANDPDLOAD512Z,
+  IX86_BUILTIN_EXPANDPS512,
+  IX86_BUILTIN_EXPANDPS512Z,
+  IX86_BUILTIN_EXPANDPSLOAD512,
+  IX86_BUILTIN_EXPANDPSLOAD512Z,
+  IX86_BUILTIN_EXTRACTF32X4,
+  IX86_BUILTIN_EXTRACTF64X4,
+  IX86_BUILTIN_EXTRACTI32X4,
+  IX86_BUILTIN_EXTRACTI64X4,
+  IX86_BUILTIN_FIXUPIMMPD512_MASK,
+  IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
+  IX86_BUILTIN_FIXUPIMMPS512_MASK,
+  IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
+  IX86_BUILTIN_FIXUPIMMSD128_MASK,
+  IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
+  IX86_BUILTIN_FIXUPIMMSS128_MASK,
+  IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
+  IX86_BUILTIN_GETEXPPD512,
+  IX86_BUILTIN_GETEXPPS512,
+  IX86_BUILTIN_GETEXPSD128,
+  IX86_BUILTIN_GETEXPSS128,
+  IX86_BUILTIN_GETMANTPD512,
+  IX86_BUILTIN_GETMANTPS512,
+  IX86_BUILTIN_GETMANTSD128,
+  IX86_BUILTIN_GETMANTSS128,
+  IX86_BUILTIN_INSERTF32X4,
+  IX86_BUILTIN_INSERTF64X4,
+  IX86_BUILTIN_INSERTI32X4,
+  IX86_BUILTIN_INSERTI64X4,
+  IX86_BUILTIN_LOADAPD512,
+  IX86_BUILTIN_LOADAPS512,
+  IX86_BUILTIN_LOADDQUDI512,
+  IX86_BUILTIN_LOADDQUSI512,
+  IX86_BUILTIN_LOADUPD512,
+  IX86_BUILTIN_LOADUPS512,
+  IX86_BUILTIN_MAXPD512,
+  IX86_BUILTIN_MAXPS512,
+  IX86_BUILTIN_MAXSD_ROUND,
+  IX86_BUILTIN_MAXSS_ROUND,
+  IX86_BUILTIN_MINPD512,
+  IX86_BUILTIN_MINPS512,
+  IX86_BUILTIN_MINSD_ROUND,
+  IX86_BUILTIN_MINSS_ROUND,
+  IX86_BUILTIN_MOVAPD512,
+  IX86_BUILTIN_MOVAPS512,
+  IX86_BUILTIN_MOVDDUP512,
+  IX86_BUILTIN_MOVDQA32LOAD512,
+  IX86_BUILTIN_MOVDQA32STORE512,
+  IX86_BUILTIN_MOVDQA32_512,
+  IX86_BUILTIN_MOVDQA64LOAD512,
+  IX86_BUILTIN_MOVDQA64STORE512,
+  IX86_BUILTIN_MOVDQA64_512,
+  IX86_BUILTIN_MOVNTDQ512,
+  IX86_BUILTIN_MOVNTDQA512,
+  IX86_BUILTIN_MOVNTPD512,
+  IX86_BUILTIN_MOVNTPS512,
+  IX86_BUILTIN_MOVSHDUP512,
+  IX86_BUILTIN_MOVSLDUP512,
+  IX86_BUILTIN_MULPD512,
+  IX86_BUILTIN_MULPS512,
+  IX86_BUILTIN_MULSD_ROUND,
+  IX86_BUILTIN_MULSS_ROUND,
+  IX86_BUILTIN_PABSD512,
+  IX86_BUILTIN_PABSQ512,
+  IX86_BUILTIN_PADDD512,
+  IX86_BUILTIN_PADDQ512,
+  IX86_BUILTIN_PANDD512,
+  IX86_BUILTIN_PANDND512,
+  IX86_BUILTIN_PANDNQ512,
+  IX86_BUILTIN_PANDQ512,
+  IX86_BUILTIN_PBROADCASTD512,
+  IX86_BUILTIN_PBROADCASTD512_GPR,
+  IX86_BUILTIN_PBROADCASTMB512,
+  IX86_BUILTIN_PBROADCASTMW512,
+  IX86_BUILTIN_PBROADCASTQ512,
+  IX86_BUILTIN_PBROADCASTQ512_GPR,
+  IX86_BUILTIN_PBROADCASTQ512_MEM,
+  IX86_BUILTIN_PCMPEQD512_MASK,
+  IX86_BUILTIN_PCMPEQQ512_MASK,
+  IX86_BUILTIN_PCMPGTD512_MASK,
+  IX86_BUILTIN_PCMPGTQ512_MASK,
+  IX86_BUILTIN_PCOMPRESSD512,
+  IX86_BUILTIN_PCOMPRESSDSTORE512,
+  IX86_BUILTIN_PCOMPRESSQ512,
+  IX86_BUILTIN_PCOMPRESSQSTORE512,
+  IX86_BUILTIN_PEXPANDD512,
+  IX86_BUILTIN_PEXPANDD512Z,
+  IX86_BUILTIN_PEXPANDDLOAD512,
+  IX86_BUILTIN_PEXPANDDLOAD512Z,
+  IX86_BUILTIN_PEXPANDQ512,
+  IX86_BUILTIN_PEXPANDQ512Z,
+  IX86_BUILTIN_PEXPANDQLOAD512,
+  IX86_BUILTIN_PEXPANDQLOAD512Z,
+  IX86_BUILTIN_PMAXSD512,
+  IX86_BUILTIN_PMAXSQ512,
+  IX86_BUILTIN_PMAXUD512,
+  IX86_BUILTIN_PMAXUQ512,
+  IX86_BUILTIN_PMINSD512,
+  IX86_BUILTIN_PMINSQ512,
+  IX86_BUILTIN_PMINUD512,
+  IX86_BUILTIN_PMINUQ512,
+  IX86_BUILTIN_PMOVDB512,
+  IX86_BUILTIN_PMOVDB512_MEM,
+  IX86_BUILTIN_PMOVDW512,
+  IX86_BUILTIN_PMOVDW512_MEM,
+  IX86_BUILTIN_PMOVQB512,
+  IX86_BUILTIN_PMOVQB512_MEM,
+  IX86_BUILTIN_PMOVQD512,
+  IX86_BUILTIN_PMOVQD512_MEM,
+  IX86_BUILTIN_PMOVQW512,
+  IX86_BUILTIN_PMOVQW512_MEM,
+  IX86_BUILTIN_PMOVSDB512,
+  IX86_BUILTIN_PMOVSDB512_MEM,
+  IX86_BUILTIN_PMOVSDW512,
+  IX86_BUILTIN_PMOVSDW512_MEM,
+  IX86_BUILTIN_PMOVSQB512,
+  IX86_BUILTIN_PMOVSQB512_MEM,
+  IX86_BUILTIN_PMOVSQD512,
+  IX86_BUILTIN_PMOVSQD512_MEM,
+  IX86_BUILTIN_PMOVSQW512,
+  IX86_BUILTIN_PMOVSQW512_MEM,
+  IX86_BUILTIN_PMOVSXBD512,
+  IX86_BUILTIN_PMOVSXBQ512,
+  IX86_BUILTIN_PMOVSXDQ512,
+  IX86_BUILTIN_PMOVSXWD512,
+  IX86_BUILTIN_PMOVSXWQ512,
+  IX86_BUILTIN_PMOVUSDB512,
+  IX86_BUILTIN_PMOVUSDB512_MEM,
+  IX86_BUILTIN_PMOVUSDW512,
+  IX86_BUILTIN_PMOVUSDW512_MEM,
+  IX86_BUILTIN_PMOVUSQB512,
+  IX86_BUILTIN_PMOVUSQB512_MEM,
+  IX86_BUILTIN_PMOVUSQD512,
+  IX86_BUILTIN_PMOVUSQD512_MEM,
+  IX86_BUILTIN_PMOVUSQW512,
+  IX86_BUILTIN_PMOVUSQW512_MEM,
+  IX86_BUILTIN_PMOVZXBD512,
+  IX86_BUILTIN_PMOVZXBQ512,
+  IX86_BUILTIN_PMOVZXDQ512,
+  IX86_BUILTIN_PMOVZXWD512,
+  IX86_BUILTIN_PMOVZXWQ512,
+  IX86_BUILTIN_PMULDQ512,
+  IX86_BUILTIN_PMULLD512,
+  IX86_BUILTIN_PMULUDQ512,
+  IX86_BUILTIN_PORD512,
+  IX86_BUILTIN_PORQ512,
+  IX86_BUILTIN_PROLD512,
+  IX86_BUILTIN_PROLQ512,
+  IX86_BUILTIN_PROLVD512,
+  IX86_BUILTIN_PROLVQ512,
+  IX86_BUILTIN_PRORD512,
+  IX86_BUILTIN_PRORQ512,
+  IX86_BUILTIN_PRORVD512,
+  IX86_BUILTIN_PRORVQ512,
+  IX86_BUILTIN_PSHUFD512,
+  IX86_BUILTIN_PSLLD512,
+  IX86_BUILTIN_PSLLDI512,
+  IX86_BUILTIN_PSLLQ512,
+  IX86_BUILTIN_PSLLQI512,
+  IX86_BUILTIN_PSLLVV16SI,
+  IX86_BUILTIN_PSLLVV8DI,
+  IX86_BUILTIN_PSRAD512,
+  IX86_BUILTIN_PSRADI512,
+  IX86_BUILTIN_PSRAQ512,
+  IX86_BUILTIN_PSRAQI512,
+  IX86_BUILTIN_PSRAVV16SI,
+  IX86_BUILTIN_PSRAVV8DI,
+  IX86_BUILTIN_PSRLD512,
+  IX86_BUILTIN_PSRLDI512,
+  IX86_BUILTIN_PSRLQ512,
+  IX86_BUILTIN_PSRLQI512,
+  IX86_BUILTIN_PSRLVV16SI,
+  IX86_BUILTIN_PSRLVV8DI,
+  IX86_BUILTIN_PSUBD512,
+  IX86_BUILTIN_PSUBQ512,
+  IX86_BUILTIN_PTESTMD512,
+  IX86_BUILTIN_PTESTMQ512,
+  IX86_BUILTIN_PTESTNMD512,
+  IX86_BUILTIN_PTESTNMQ512,
+  IX86_BUILTIN_PUNPCKHDQ512,
+  IX86_BUILTIN_PUNPCKHQDQ512,
+  IX86_BUILTIN_PUNPCKLDQ512,
+  IX86_BUILTIN_PUNPCKLQDQ512,
+  IX86_BUILTIN_PXORD512,
+  IX86_BUILTIN_PXORQ512,
+  IX86_BUILTIN_RCP14PD512,
+  IX86_BUILTIN_RCP14PS512,
+  IX86_BUILTIN_RCP14SD,
+  IX86_BUILTIN_RCP14SS,
+  IX86_BUILTIN_RNDSCALEPD,
+  IX86_BUILTIN_RNDSCALEPS,
+  IX86_BUILTIN_RNDSCALESD,
+  IX86_BUILTIN_RNDSCALESS,
+  IX86_BUILTIN_RSQRT14PD512,
+  IX86_BUILTIN_RSQRT14PS512,
+  IX86_BUILTIN_RSQRT14SD,
+  IX86_BUILTIN_RSQRT14SS,
+  IX86_BUILTIN_SCALEFPD512,
+  IX86_BUILTIN_SCALEFPS512,
+  IX86_BUILTIN_SCALEFSD,
+  IX86_BUILTIN_SCALEFSS,
+  IX86_BUILTIN_SHUFPD512,
+  IX86_BUILTIN_SHUFPS512,
+  IX86_BUILTIN_SHUF_F32x4,
+  IX86_BUILTIN_SHUF_F64x2,
+  IX86_BUILTIN_SHUF_I32x4,
+  IX86_BUILTIN_SHUF_I64x2,
+  IX86_BUILTIN_SQRTPD512,
+  IX86_BUILTIN_SQRTPD512_MASK,
+  IX86_BUILTIN_SQRTPS512_MASK,
+  IX86_BUILTIN_SQRTPS_NR512,
+  IX86_BUILTIN_SQRTSD_ROUND,
+  IX86_BUILTIN_SQRTSS_ROUND,
+  IX86_BUILTIN_STOREAPD512,
+  IX86_BUILTIN_STOREAPS512,
+  IX86_BUILTIN_STOREDQUDI512,
+  IX86_BUILTIN_STOREDQUSI512,
+  IX86_BUILTIN_STOREUPD512,
+  IX86_BUILTIN_STOREUPS512,
+  IX86_BUILTIN_SUBPD512,
+  IX86_BUILTIN_SUBPS512,
+  IX86_BUILTIN_SUBSD_ROUND,
+  IX86_BUILTIN_SUBSS_ROUND,
+  IX86_BUILTIN_UCMPD512,
+  IX86_BUILTIN_UCMPQ512,
+  IX86_BUILTIN_UNPCKHPD512,
+  IX86_BUILTIN_UNPCKHPS512,
+  IX86_BUILTIN_UNPCKLPD512,
+  IX86_BUILTIN_UNPCKLPS512,
+  IX86_BUILTIN_VCVTSD2SI32,
+  IX86_BUILTIN_VCVTSD2SI64,
+  IX86_BUILTIN_VCVTSD2USI32,
+  IX86_BUILTIN_VCVTSD2USI64,
+  IX86_BUILTIN_VCVTSS2SI32,
+  IX86_BUILTIN_VCVTSS2SI64,
+  IX86_BUILTIN_VCVTSS2USI32,
+  IX86_BUILTIN_VCVTSS2USI64,
+  IX86_BUILTIN_VCVTTSD2SI32,
+  IX86_BUILTIN_VCVTTSD2SI64,
+  IX86_BUILTIN_VCVTTSD2USI32,
+  IX86_BUILTIN_VCVTTSD2USI64,
+  IX86_BUILTIN_VCVTTSS2SI32,
+  IX86_BUILTIN_VCVTTSS2SI64,
+  IX86_BUILTIN_VCVTTSS2USI32,
+  IX86_BUILTIN_VCVTTSS2USI64,
+  IX86_BUILTIN_VFMADDPD512_MASK,
+  IX86_BUILTIN_VFMADDPD512_MASK3,
+  IX86_BUILTIN_VFMADDPD512_MASKZ,
+  IX86_BUILTIN_VFMADDPS512_MASK,
+  IX86_BUILTIN_VFMADDPS512_MASK3,
+  IX86_BUILTIN_VFMADDPS512_MASKZ,
+  IX86_BUILTIN_VFMADDSD3_ROUND,
+  IX86_BUILTIN_VFMADDSS3_ROUND,
+  IX86_BUILTIN_VFMADDSUBPD512_MASK,
+  IX86_BUILTIN_VFMADDSUBPD512_MASK3,
+  IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
+  IX86_BUILTIN_VFMADDSUBPS512_MASK,
+  IX86_BUILTIN_VFMADDSUBPS512_MASK3,
+  IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
+  IX86_BUILTIN_VFMSUBADDPD512_MASK3,
+  IX86_BUILTIN_VFMSUBADDPS512_MASK3,
+  IX86_BUILTIN_VFMSUBPD512_MASK3,
+  IX86_BUILTIN_VFMSUBPS512_MASK3,
+  IX86_BUILTIN_VFMSUBSD3_MASK3,
+  IX86_BUILTIN_VFMSUBSS3_MASK3,
+  IX86_BUILTIN_VFNMADDPD512_MASK,
+  IX86_BUILTIN_VFNMADDPS512_MASK,
+  IX86_BUILTIN_VFNMSUBPD512_MASK,
+  IX86_BUILTIN_VFNMSUBPD512_MASK3,
+  IX86_BUILTIN_VFNMSUBPS512_MASK,
+  IX86_BUILTIN_VFNMSUBPS512_MASK3,
+  IX86_BUILTIN_VPCLZCNTD512,
+  IX86_BUILTIN_VPCLZCNTQ512,
+  IX86_BUILTIN_VPCONFLICTD512,
+  IX86_BUILTIN_VPCONFLICTQ512,
+  IX86_BUILTIN_VPERMDF512,
+  IX86_BUILTIN_VPERMDI512,
+  IX86_BUILTIN_VPERMI2VARD512,
+  IX86_BUILTIN_VPERMI2VARPD512,
+  IX86_BUILTIN_VPERMI2VARPS512,
+  IX86_BUILTIN_VPERMI2VARQ512,
+  IX86_BUILTIN_VPERMILPD512,
+  IX86_BUILTIN_VPERMILPS512,
+  IX86_BUILTIN_VPERMILVARPD512,
+  IX86_BUILTIN_VPERMILVARPS512,
+  IX86_BUILTIN_VPERMT2VARD512,
+  IX86_BUILTIN_VPERMT2VARD512_MASKZ,
+  IX86_BUILTIN_VPERMT2VARPD512,
+  IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
+  IX86_BUILTIN_VPERMT2VARPS512,
+  IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
+  IX86_BUILTIN_VPERMT2VARQ512,
+  IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
+  IX86_BUILTIN_VPERMVARDF512,
+  IX86_BUILTIN_VPERMVARDI512,
+  IX86_BUILTIN_VPERMVARSF512,
+  IX86_BUILTIN_VPERMVARSI512,
+  IX86_BUILTIN_VTERNLOGD512_MASK,
+  IX86_BUILTIN_VTERNLOGD512_MASKZ,
+  IX86_BUILTIN_VTERNLOGQ512_MASK,
+  IX86_BUILTIN_VTERNLOGQ512_MASKZ,
+
+  /* Mask arithmetic operations */
+  IX86_BUILTIN_KAND16,
+  IX86_BUILTIN_KANDN16,
+  IX86_BUILTIN_KNOT16,
+  IX86_BUILTIN_KOR16,
+  IX86_BUILTIN_KORTESTC16,
+  IX86_BUILTIN_KORTESTZ16,
+  IX86_BUILTIN_KUNPCKBW,
+  IX86_BUILTIN_KXNOR16,
+  IX86_BUILTIN_KXOR16,
+  IX86_BUILTIN_KMOV16,
+
+  /* Alternate 4 and 8 element gather/scatter for the vectorizer
+     where all operands are 32-byte or 64-byte wide respectively.  */
+  IX86_BUILTIN_GATHERALTSIV4DF,
+  IX86_BUILTIN_GATHERALTDIV8SF,
+  IX86_BUILTIN_GATHERALTSIV4DI,
+  IX86_BUILTIN_GATHERALTDIV8SI,
+  IX86_BUILTIN_GATHER3ALTDIV16SF,
+  IX86_BUILTIN_GATHER3ALTDIV16SI,
+  IX86_BUILTIN_GATHER3ALTSIV8DF,
+  IX86_BUILTIN_GATHER3ALTSIV8DI,
+  IX86_BUILTIN_GATHER3DIV16SF,
+  IX86_BUILTIN_GATHER3DIV16SI,
+  IX86_BUILTIN_GATHER3DIV8DF,
+  IX86_BUILTIN_GATHER3DIV8DI,
+  IX86_BUILTIN_GATHER3SIV16SF,
+  IX86_BUILTIN_GATHER3SIV16SI,
+  IX86_BUILTIN_GATHER3SIV8DF,
+  IX86_BUILTIN_GATHER3SIV8DI,
+  IX86_BUILTIN_SCATTERDIV16SF,
+  IX86_BUILTIN_SCATTERDIV16SI,
+  IX86_BUILTIN_SCATTERDIV8DF,
+  IX86_BUILTIN_SCATTERDIV8DI,
+  IX86_BUILTIN_SCATTERSIV16SF,
+  IX86_BUILTIN_SCATTERSIV16SI,
+  IX86_BUILTIN_SCATTERSIV8DF,
+  IX86_BUILTIN_SCATTERSIV8DI,
+
+  /* AVX512PF */
+  IX86_BUILTIN_GATHERPFQPD,
+  IX86_BUILTIN_GATHERPFDPS,
+  IX86_BUILTIN_GATHERPFDPD,
+  IX86_BUILTIN_GATHERPFQPS,
+  IX86_BUILTIN_SCATTERPFDPD,
+  IX86_BUILTIN_SCATTERPFDPS,
+  IX86_BUILTIN_SCATTERPFQPD,
+  IX86_BUILTIN_SCATTERPFQPS,
+
+  /* AVX-512ER */
+  IX86_BUILTIN_EXP2PD_MASK,
+  IX86_BUILTIN_EXP2PS_MASK,
+  IX86_BUILTIN_EXP2PS,
+  IX86_BUILTIN_RCP28PD,
+  IX86_BUILTIN_RCP28PS,
+  IX86_BUILTIN_RCP28SD,
+  IX86_BUILTIN_RCP28SS,
+  IX86_BUILTIN_RSQRT28PD,
+  IX86_BUILTIN_RSQRT28PS,
+  IX86_BUILTIN_RSQRT28SD,
+  IX86_BUILTIN_RSQRT28SS,
+
+  /* SHA builtins.  */
+  IX86_BUILTIN_SHA1MSG1,
+  IX86_BUILTIN_SHA1MSG2,
+  IX86_BUILTIN_SHA1NEXTE,
+  IX86_BUILTIN_SHA1RNDS4,
+  IX86_BUILTIN_SHA256MSG1,
+  IX86_BUILTIN_SHA256MSG2,
+  IX86_BUILTIN_SHA256RNDS2,
+
+  /* TFmode support builtins.  */
+  IX86_BUILTIN_INFQ,
+  IX86_BUILTIN_HUGE_VALQ,
+  IX86_BUILTIN_FABSQ,
+  IX86_BUILTIN_COPYSIGNQ,
+
+  /* Vectorizer support builtins.  */
+  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
+  IX86_BUILTIN_CPYSGNPS,
+  IX86_BUILTIN_CPYSGNPD,
+  IX86_BUILTIN_CPYSGNPS256,
+  IX86_BUILTIN_CPYSGNPS512,
+  IX86_BUILTIN_CPYSGNPD256,
+  IX86_BUILTIN_CPYSGNPD512,
+  IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
+  IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
+
+
+  /* FMA4 instructions.  */
+  IX86_BUILTIN_VFMADDSS,
+  IX86_BUILTIN_VFMADDSD,
+  IX86_BUILTIN_VFMADDPS,
+  IX86_BUILTIN_VFMADDPD,
+  IX86_BUILTIN_VFMADDPS256,
+  IX86_BUILTIN_VFMADDPD256,
+  IX86_BUILTIN_VFMADDSUBPS,
+  IX86_BUILTIN_VFMADDSUBPD,
+  IX86_BUILTIN_VFMADDSUBPS256,
+  IX86_BUILTIN_VFMADDSUBPD256,
+
+  /* FMA3 instructions.  */
+  IX86_BUILTIN_VFMADDSS3,
+  IX86_BUILTIN_VFMADDSD3,
+
+  /* XOP instructions.  */
+  IX86_BUILTIN_VPCMOV,
+  IX86_BUILTIN_VPCMOV_V2DI,
+  IX86_BUILTIN_VPCMOV_V4SI,
+  IX86_BUILTIN_VPCMOV_V8HI,
+  IX86_BUILTIN_VPCMOV_V16QI,
+  IX86_BUILTIN_VPCMOV_V4SF,
+  IX86_BUILTIN_VPCMOV_V2DF,
+  IX86_BUILTIN_VPCMOV256,
+  IX86_BUILTIN_VPCMOV_V4DI256,
+  IX86_BUILTIN_VPCMOV_V8SI256,
+  IX86_BUILTIN_VPCMOV_V16HI256,
+  IX86_BUILTIN_VPCMOV_V32QI256,
+  IX86_BUILTIN_VPCMOV_V8SF256,
+  IX86_BUILTIN_VPCMOV_V4DF256,
+
+  IX86_BUILTIN_VPPERM,
+
+  IX86_BUILTIN_VPMACSSWW,
+  IX86_BUILTIN_VPMACSWW,
+  IX86_BUILTIN_VPMACSSWD,
+  IX86_BUILTIN_VPMACSWD,
+  IX86_BUILTIN_VPMACSSDD,
+  IX86_BUILTIN_VPMACSDD,
+  IX86_BUILTIN_VPMACSSDQL,
+  IX86_BUILTIN_VPMACSSDQH,
+  IX86_BUILTIN_VPMACSDQL,
+  IX86_BUILTIN_VPMACSDQH,
+  IX86_BUILTIN_VPMADCSSWD,
+  IX86_BUILTIN_VPMADCSWD,
+
+  IX86_BUILTIN_VPHADDBW,
+  IX86_BUILTIN_VPHADDBD,
+  IX86_BUILTIN_VPHADDBQ,
+  IX86_BUILTIN_VPHADDWD,
+  IX86_BUILTIN_VPHADDWQ,
+  IX86_BUILTIN_VPHADDDQ,
+  IX86_BUILTIN_VPHADDUBW,
+  IX86_BUILTIN_VPHADDUBD,
+  IX86_BUILTIN_VPHADDUBQ,
+  IX86_BUILTIN_VPHADDUWD,
+  IX86_BUILTIN_VPHADDUWQ,
+  IX86_BUILTIN_VPHADDUDQ,
+  IX86_BUILTIN_VPHSUBBW,
+  IX86_BUILTIN_VPHSUBWD,
+  IX86_BUILTIN_VPHSUBDQ,
+
+  IX86_BUILTIN_VPROTB,
+  IX86_BUILTIN_VPROTW,
+  IX86_BUILTIN_VPROTD,
+  IX86_BUILTIN_VPROTQ,
+  IX86_BUILTIN_VPROTB_IMM,
+  IX86_BUILTIN_VPROTW_IMM,
+  IX86_BUILTIN_VPROTD_IMM,
+  IX86_BUILTIN_VPROTQ_IMM,
+
+  IX86_BUILTIN_VPSHLB,
+  IX86_BUILTIN_VPSHLW,
+  IX86_BUILTIN_VPSHLD,
+  IX86_BUILTIN_VPSHLQ,
+  IX86_BUILTIN_VPSHAB,
+  IX86_BUILTIN_VPSHAW,
+  IX86_BUILTIN_VPSHAD,
+  IX86_BUILTIN_VPSHAQ,
+
+  IX86_BUILTIN_VFRCZSS,
+  IX86_BUILTIN_VFRCZSD,
+  IX86_BUILTIN_VFRCZPS,
+  IX86_BUILTIN_VFRCZPD,
+  IX86_BUILTIN_VFRCZPS256,
+  IX86_BUILTIN_VFRCZPD256,
+
+  IX86_BUILTIN_VPCOMEQUB,
+  IX86_BUILTIN_VPCOMNEUB,
+  IX86_BUILTIN_VPCOMLTUB,
+  IX86_BUILTIN_VPCOMLEUB,
+  IX86_BUILTIN_VPCOMGTUB,
+  IX86_BUILTIN_VPCOMGEUB,
+  IX86_BUILTIN_VPCOMFALSEUB,
+  IX86_BUILTIN_VPCOMTRUEUB,
+
+  IX86_BUILTIN_VPCOMEQUW,
+  IX86_BUILTIN_VPCOMNEUW,
+  IX86_BUILTIN_VPCOMLTUW,
+  IX86_BUILTIN_VPCOMLEUW,
+  IX86_BUILTIN_VPCOMGTUW,
+  IX86_BUILTIN_VPCOMGEUW,
+  IX86_BUILTIN_VPCOMFALSEUW,
+  IX86_BUILTIN_VPCOMTRUEUW,
+
+  IX86_BUILTIN_VPCOMEQUD,
+  IX86_BUILTIN_VPCOMNEUD,
+  IX86_BUILTIN_VPCOMLTUD,
+  IX86_BUILTIN_VPCOMLEUD,
+  IX86_BUILTIN_VPCOMGTUD,
+  IX86_BUILTIN_VPCOMGEUD,
+  IX86_BUILTIN_VPCOMFALSEUD,
+  IX86_BUILTIN_VPCOMTRUEUD,
+
+  IX86_BUILTIN_VPCOMEQUQ,
+  IX86_BUILTIN_VPCOMNEUQ,
+  IX86_BUILTIN_VPCOMLTUQ,
+  IX86_BUILTIN_VPCOMLEUQ,
+  IX86_BUILTIN_VPCOMGTUQ,
+  IX86_BUILTIN_VPCOMGEUQ,
+  IX86_BUILTIN_VPCOMFALSEUQ,
+  IX86_BUILTIN_VPCOMTRUEUQ,
+
+  IX86_BUILTIN_VPCOMEQB,
+  IX86_BUILTIN_VPCOMNEB,
+  IX86_BUILTIN_VPCOMLTB,
+  IX86_BUILTIN_VPCOMLEB,
+  IX86_BUILTIN_VPCOMGTB,
+  IX86_BUILTIN_VPCOMGEB,
+  IX86_BUILTIN_VPCOMFALSEB,
+  IX86_BUILTIN_VPCOMTRUEB,
+
+  IX86_BUILTIN_VPCOMEQW,
+  IX86_BUILTIN_VPCOMNEW,
+  IX86_BUILTIN_VPCOMLTW,
+  IX86_BUILTIN_VPCOMLEW,
+  IX86_BUILTIN_VPCOMGTW,
+  IX86_BUILTIN_VPCOMGEW,
+  IX86_BUILTIN_VPCOMFALSEW,
+  IX86_BUILTIN_VPCOMTRUEW,
+
+  IX86_BUILTIN_VPCOMEQD,
+  IX86_BUILTIN_VPCOMNED,
+  IX86_BUILTIN_VPCOMLTD,
+  IX86_BUILTIN_VPCOMLED,
+  IX86_BUILTIN_VPCOMGTD,
+  IX86_BUILTIN_VPCOMGED,
+  IX86_BUILTIN_VPCOMFALSED,
+  IX86_BUILTIN_VPCOMTRUED,
+
+  IX86_BUILTIN_VPCOMEQQ,
+  IX86_BUILTIN_VPCOMNEQ,
+  IX86_BUILTIN_VPCOMLTQ,
+  IX86_BUILTIN_VPCOMLEQ,
+  IX86_BUILTIN_VPCOMGTQ,
+  IX86_BUILTIN_VPCOMGEQ,
+  IX86_BUILTIN_VPCOMFALSEQ,
+  IX86_BUILTIN_VPCOMTRUEQ,
+
+  /* LWP instructions.  */
+  IX86_BUILTIN_LLWPCB,
+  IX86_BUILTIN_SLWPCB,
+  IX86_BUILTIN_LWPVAL32,
+  IX86_BUILTIN_LWPVAL64,
+  IX86_BUILTIN_LWPINS32,
+  IX86_BUILTIN_LWPINS64,
+
+  IX86_BUILTIN_CLZS,
+
+  /* RTM */
+  IX86_BUILTIN_XBEGIN,
+  IX86_BUILTIN_XEND,
+  IX86_BUILTIN_XABORT,
+  IX86_BUILTIN_XTEST,
+
+  /* BMI instructions.  */
+  IX86_BUILTIN_BEXTR32,
+  IX86_BUILTIN_BEXTR64,
+  IX86_BUILTIN_CTZS,
+
+  /* TBM instructions.  */
+  IX86_BUILTIN_BEXTRI32,
+  IX86_BUILTIN_BEXTRI64,
+
+  /* BMI2 instructions. */
+  IX86_BUILTIN_BZHI32,
+  IX86_BUILTIN_BZHI64,
+  IX86_BUILTIN_PDEP32,
+  IX86_BUILTIN_PDEP64,
+  IX86_BUILTIN_PEXT32,
+  IX86_BUILTIN_PEXT64,
+
+  /* ADX instructions.  */
+  IX86_BUILTIN_ADDCARRYX32,
+  IX86_BUILTIN_ADDCARRYX64,
+
+  /* FSGSBASE instructions.  */
+  IX86_BUILTIN_RDFSBASE32,
+  IX86_BUILTIN_RDFSBASE64,
+  IX86_BUILTIN_RDGSBASE32,
+  IX86_BUILTIN_RDGSBASE64,
+  IX86_BUILTIN_WRFSBASE32,
+  IX86_BUILTIN_WRFSBASE64,
+  IX86_BUILTIN_WRGSBASE32,
+  IX86_BUILTIN_WRGSBASE64,
+
+  /* RDRND instructions.  */
+  IX86_BUILTIN_RDRAND16_STEP,
+  IX86_BUILTIN_RDRAND32_STEP,
+  IX86_BUILTIN_RDRAND64_STEP,
+
+  /* RDSEED instructions.  */
+  IX86_BUILTIN_RDSEED16_STEP,
+  IX86_BUILTIN_RDSEED32_STEP,
+  IX86_BUILTIN_RDSEED64_STEP,
+
+  /* F16C instructions.  */
+  IX86_BUILTIN_CVTPH2PS,
+  IX86_BUILTIN_CVTPH2PS256,
+  IX86_BUILTIN_CVTPS2PH,
+  IX86_BUILTIN_CVTPS2PH256,
+
+  /* CFString built-in for darwin */
+  IX86_BUILTIN_CFSTRING,
+
+  /* Builtins to get CPU type and supported features. */
+  IX86_BUILTIN_CPU_INIT,
+  IX86_BUILTIN_CPU_IS,
+  IX86_BUILTIN_CPU_SUPPORTS,
+
+  /* Read/write FLAGS register built-ins.  */
+  IX86_BUILTIN_READ_FLAGS,
+  IX86_BUILTIN_WRITE_FLAGS,
+
+  IX86_BUILTIN_MAX
+};
+
+/* Table for the ix86 builtin decls.  */
+static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
+
+/* Table of all of the builtin functions that are possible with different ISA's
+   but are waiting to be built until a function is declared to use that
+   ISA.  */
+struct builtin_isa {
+  const char *name;		/* function name */
+  enum ix86_builtin_func_type tcode; /* type to use in the declaration */
+  HOST_WIDE_INT isa;		/* isa_flags this builtin is defined for */
+  bool const_p;			/* true if the declaration is constant */
+  bool set_and_not_built_p;
+};
+
+static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+
+
+/* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
+   of which isa_flags to use in the ix86_builtins_isa array.  Stores the
+   function decl in the ix86_builtins array.  Returns the function decl or
+   NULL_TREE, if the builtin was not added.
+
+   If the front end has a special hook for builtin functions, delay adding
+   builtin functions that aren't in the current ISA until the ISA is changed
+   with function specific optimization.  Doing so, can save about 300K for the
+   default compiler.  When the builtin is expanded, check at that time whether
+   it is valid.
+
+   If the front end doesn't have a special hook, record all builtins, even if
+   it isn't an instruction set in the current ISA in case the user uses
+   function specific options for a different ISA, so that we don't get scope
+   errors if a builtin is added in the middle of a function scope.  */
+
+static inline tree
+def_builtin (HOST_WIDE_INT mask, const char *name,
+	     enum ix86_builtin_func_type tcode,
+	     enum ix86_builtins code)
+{
+  tree decl = NULL_TREE;
+
+  if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
+    {
+      ix86_builtins_isa[(int) code].isa = mask;
+
+      mask &= ~OPTION_MASK_ISA_64BIT;
+      if (mask == 0
+	  || (mask & ix86_isa_flags) != 0
+	  || (lang_hooks.builtin_function
+	      == lang_hooks.builtin_function_ext_scope))
+
+	{
+	  tree type = ix86_get_builtin_func_type (tcode);
+	  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
+				       NULL, NULL_TREE);
+	  ix86_builtins[(int) code] = decl;
+	  ix86_builtins_isa[(int) code].set_and_not_built_p = false;
+	}
+      else
+	{
+	  ix86_builtins[(int) code] = NULL_TREE;
+	  ix86_builtins_isa[(int) code].tcode = tcode;
+	  ix86_builtins_isa[(int) code].name = name;
+	  ix86_builtins_isa[(int) code].const_p = false;
+	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
+	}
+    }
+
+  return decl;
+}
+
+/* Like def_builtin, but also marks the function decl "const".  */
+
+static inline tree
+def_builtin_const (HOST_WIDE_INT mask, const char *name,
+		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
+{
+  tree decl = def_builtin (mask, name, tcode, code);
+  if (decl)
+    TREE_READONLY (decl) = 1;
+  else
+    ix86_builtins_isa[(int) code].const_p = true;
+
+  return decl;
+}
+
+/* Add any new builtin functions for a given ISA that may not have been
+   declared.  This saves a bit of space compared to adding all of the
+   declarations to the tree, even if we didn't use them.  */
+
+static void
+ix86_add_new_builtins (HOST_WIDE_INT isa)
+{
+  int i;
+
+  for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
+    {
+      if ((ix86_builtins_isa[i].isa & isa) != 0
+	  && ix86_builtins_isa[i].set_and_not_built_p)
+	{
+	  tree decl, type;
+
+	  /* Don't define the builtin again.  */
+	  ix86_builtins_isa[i].set_and_not_built_p = false;
+
+	  type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
+	  decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
+						 type, i, BUILT_IN_MD, NULL,
+						 NULL_TREE);
+
+	  ix86_builtins[i] = decl;
+	  if (ix86_builtins_isa[i].const_p)
+	    TREE_READONLY (decl) = 1;
+	}
+    }
+}
+
+/* Bits for builtin_description.flag.  */
+
+/* Set when we don't support the comparison natively, and should
+   swap_comparison in order to support it.  */
+#define BUILTIN_DESC_SWAP_OPERANDS	1
+
+struct builtin_description
+{
+  const HOST_WIDE_INT mask;
+  const enum insn_code icode;
+  const char *const name;
+  const enum ix86_builtins code;
+  const enum rtx_code comparison;
+  const int flag;
+};
+
+static const struct builtin_description bdesc_comi[] =
+{
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
+};
+
+static const struct builtin_description bdesc_pcmpestr[] =
+{
+  /* SSE4.2 */
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_pcmpistr[] =
+{
+  /* SSE4.2 */
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+/* Special builtins with variable number of arguments.  */
+static const struct builtin_description bdesc_special_args[] =
+{
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* 80387 (for use internally for atomic compound assignment).  */
+  { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
+  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
+  { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* MMX */
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* 3DNow! */
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* FXSR, XSAVE and XSAVEOPT */
+  { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
+  { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
+  { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
+
+  { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
+  { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
+  { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
+
+  /* SSE */
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+
+  /* SSE or 3DNow!A  */
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
+
+  /* SSE2 */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+
+  /* SSE3 */
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+  /* SSE4.1 */
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
+
+  /* SSE4A */
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+
+  /* AVX */
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
+
+  /* AVX2 */
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
+
+  /* AVX512F */
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+
+  /* FSGSBASE */
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+
+  /* RTM */
+  { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
+};
+
+/* Builtins with variable number of arguments.  */
+static const struct builtin_description bdesc_args[] =
+{
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
+  { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
+
+  /* MMX */
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+
+  /* 3DNow! */
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  /* 3DNow!A */
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+
+  /* SSE */
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3,  "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+
+  /* SSE MMX or 3Dnow!A */
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
+
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
+
+  /* SSE2 */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
+  /* SSE2 MMX */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
+
+  /* SSE3 */
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  /* SSSE3 */
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
+
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  /* SSSE3.  */
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
+
+  /* SSE4.1 */
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
+
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
+  /* SSE4.1 */
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+
+  /* SSE4.2 */
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+  /* SSE4A */
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  /* AES */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  /* PCLMUL */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
+
+  /* AVX */
+  { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3,  "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3,  "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
+  /* AVX2 */
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256",  IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256",  IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256",  IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256",  IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI  },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI  },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI  },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI  },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2  , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2  , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2  , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2  , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2  , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2  , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2  , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2  , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2  , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2  , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256"  , IX86_BUILTIN_PMULHW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256"  , IX86_BUILTIN_PMULLW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256"  , IX86_BUILTIN_PMULLD256  , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI  },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN,  (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
+  { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
+
+  /* BMI */
+  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2,       "__builtin_ctzs",           IX86_BUILTIN_CTZS,    UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+
+  /* TBM */
+  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+  /* F16C */
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
+
+  /* BMI2 */
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+  /* AVX512F */
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
+
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
+
+  /* Mask arithmetic operations */
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
+
+  /* SHA */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
+};
+
+/* Builtins with rounding support.  */
+static const struct builtin_description bdesc_round_args[] =
+{
+  /* AVX512F */
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+
+  /* AVX512ER */
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+};
+
+/* FMA4 and XOP.  */
+#define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
+#define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
+#define MULTI_ARG_4_SF2_SI_I	V4SF_FTYPE_V4SF_V4SF_V4SI_INT
+#define MULTI_ARG_4_SF2_SI_I1	V8SF_FTYPE_V8SF_V8SF_V8SI_INT
+#define MULTI_ARG_3_SF		V4SF_FTYPE_V4SF_V4SF_V4SF
+#define MULTI_ARG_3_DF		V2DF_FTYPE_V2DF_V2DF_V2DF
+#define MULTI_ARG_3_SF2		V8SF_FTYPE_V8SF_V8SF_V8SF
+#define MULTI_ARG_3_DF2		V4DF_FTYPE_V4DF_V4DF_V4DF
+#define MULTI_ARG_3_DI		V2DI_FTYPE_V2DI_V2DI_V2DI
+#define MULTI_ARG_3_SI		V4SI_FTYPE_V4SI_V4SI_V4SI
+#define MULTI_ARG_3_SI_DI	V4SI_FTYPE_V4SI_V4SI_V2DI
+#define MULTI_ARG_3_HI		V8HI_FTYPE_V8HI_V8HI_V8HI
+#define MULTI_ARG_3_HI_SI	V8HI_FTYPE_V8HI_V8HI_V4SI
+#define MULTI_ARG_3_QI		V16QI_FTYPE_V16QI_V16QI_V16QI
+#define MULTI_ARG_3_DI2		V4DI_FTYPE_V4DI_V4DI_V4DI
+#define MULTI_ARG_3_SI2		V8SI_FTYPE_V8SI_V8SI_V8SI
+#define MULTI_ARG_3_HI2		V16HI_FTYPE_V16HI_V16HI_V16HI
+#define MULTI_ARG_3_QI2		V32QI_FTYPE_V32QI_V32QI_V32QI
+#define MULTI_ARG_2_SF		V4SF_FTYPE_V4SF_V4SF
+#define MULTI_ARG_2_DF		V2DF_FTYPE_V2DF_V2DF
+#define MULTI_ARG_2_DI		V2DI_FTYPE_V2DI_V2DI
+#define MULTI_ARG_2_SI		V4SI_FTYPE_V4SI_V4SI
+#define MULTI_ARG_2_HI		V8HI_FTYPE_V8HI_V8HI
+#define MULTI_ARG_2_QI		V16QI_FTYPE_V16QI_V16QI
+#define MULTI_ARG_2_DI_IMM	V2DI_FTYPE_V2DI_SI
+#define MULTI_ARG_2_SI_IMM	V4SI_FTYPE_V4SI_SI
+#define MULTI_ARG_2_HI_IMM	V8HI_FTYPE_V8HI_SI
+#define MULTI_ARG_2_QI_IMM	V16QI_FTYPE_V16QI_SI
+#define MULTI_ARG_2_DI_CMP	V2DI_FTYPE_V2DI_V2DI_CMP
+#define MULTI_ARG_2_SI_CMP	V4SI_FTYPE_V4SI_V4SI_CMP
+#define MULTI_ARG_2_HI_CMP	V8HI_FTYPE_V8HI_V8HI_CMP
+#define MULTI_ARG_2_QI_CMP	V16QI_FTYPE_V16QI_V16QI_CMP
+#define MULTI_ARG_2_SF_TF	V4SF_FTYPE_V4SF_V4SF_TF
+#define MULTI_ARG_2_DF_TF	V2DF_FTYPE_V2DF_V2DF_TF
+#define MULTI_ARG_2_DI_TF	V2DI_FTYPE_V2DI_V2DI_TF
+#define MULTI_ARG_2_SI_TF	V4SI_FTYPE_V4SI_V4SI_TF
+#define MULTI_ARG_2_HI_TF	V8HI_FTYPE_V8HI_V8HI_TF
+#define MULTI_ARG_2_QI_TF	V16QI_FTYPE_V16QI_V16QI_TF
+#define MULTI_ARG_1_SF		V4SF_FTYPE_V4SF
+#define MULTI_ARG_1_DF		V2DF_FTYPE_V2DF
+#define MULTI_ARG_1_SF2		V8SF_FTYPE_V8SF
+#define MULTI_ARG_1_DF2		V4DF_FTYPE_V4DF
+#define MULTI_ARG_1_DI		V2DI_FTYPE_V2DI
+#define MULTI_ARG_1_SI		V4SI_FTYPE_V4SI
+#define MULTI_ARG_1_HI		V8HI_FTYPE_V8HI
+#define MULTI_ARG_1_QI		V16QI_FTYPE_V16QI
+#define MULTI_ARG_1_SI_DI	V2DI_FTYPE_V4SI
+#define MULTI_ARG_1_HI_DI	V2DI_FTYPE_V8HI
+#define MULTI_ARG_1_HI_SI	V4SI_FTYPE_V8HI
+#define MULTI_ARG_1_QI_DI	V2DI_FTYPE_V16QI
+#define MULTI_ARG_1_QI_SI	V4SI_FTYPE_V16QI
+#define MULTI_ARG_1_QI_HI	V8HI_FTYPE_V16QI
+
+static const struct builtin_description bdesc_multi_arg[] =
+{
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
+    "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
+    "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+
+  { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
+    "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
+    "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
+    "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
+    "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
+    "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
+    UNKNOWN, (int)MULTI_ARG_3_SF2 },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
+    "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
+    UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
+    "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
+    "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
+    "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
+    UNKNOWN, (int)MULTI_ARG_3_SF2 },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
+    "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
+    UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3,     "__builtin_ia32_vpermil2pd",  IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3,     "__builtin_ia32_vpermil2ps",  IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3,     "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3,     "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
+
+};
+
+/* TM vector builtins.  */
+
+/* Reuse the existing x86-specific `struct builtin_description' cause
+   we're lazy.  Add casts to make them fit.  */
+static const struct builtin_description bdesc_tm[] =
+{
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
+};
+
+/* TM callbacks.  */
+
+/* Return the builtin decl needed to load a vector of TYPE.  */
+
+static tree
+ix86_builtin_tm_load (tree type)
+{
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      switch (tree_to_uhwi (TYPE_SIZE (type)))
+	{
+	case 64:
+	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
+	case 128:
+	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
+	case 256:
+	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
+	}
+    }
+  return NULL_TREE;
+}
+
+/* Return the builtin decl needed to store a vector of TYPE.  */
+
+static tree
+ix86_builtin_tm_store (tree type)
+{
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      switch (tree_to_uhwi (TYPE_SIZE (type)))
+	{
+	case 64:
+	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
+	case 128:
+	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
+	case 256:
+	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
+	}
+    }
+  return NULL_TREE;
+}
+
+/* Initialize the transactional memory vector load/store builtins.  */
+
+static void
+ix86_init_tm_builtins (void)
+{
+  enum ix86_builtin_func_type ftype;
+  const struct builtin_description *d;
+  size_t i;
+  tree decl;
+  tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
+  tree attrs_log, attrs_type_log;
+
+  if (!flag_tm)
+    return;
+
+  /* If there are no builtins defined, we must be compiling in a
+     language without trans-mem support.  */
+  if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
+    return;
+
+  /* Use whatever attributes a normal TM load has.  */
+  decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
+  attrs_load = DECL_ATTRIBUTES (decl);
+  attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  /* Use whatever attributes a normal TM store has.  */
+  decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
+  attrs_store = DECL_ATTRIBUTES (decl);
+  attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  /* Use whatever attributes a normal TM log has.  */
+  decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
+  attrs_log = DECL_ATTRIBUTES (decl);
+  attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+
+  for (i = 0, d = bdesc_tm;
+       i < ARRAY_SIZE (bdesc_tm);
+       i++, d++)
+    {
+      if ((d->mask & ix86_isa_flags) != 0
+	  || (lang_hooks.builtin_function
+	      == lang_hooks.builtin_function_ext_scope))
+	{
+	  tree type, attrs, attrs_type;
+	  enum built_in_function code = (enum built_in_function) d->code;
+
+	  ftype = (enum ix86_builtin_func_type) d->flag;
+	  type = ix86_get_builtin_func_type (ftype);
+
+	  if (BUILTIN_TM_LOAD_P (code))
+	    {
+	      attrs = attrs_load;
+	      attrs_type = attrs_type_load;
+	    }
+	  else if (BUILTIN_TM_STORE_P (code))
+	    {
+	      attrs = attrs_store;
+	      attrs_type = attrs_type_store;
+	    }
+	  else
+	    {
+	      attrs = attrs_log;
+	      attrs_type = attrs_type_log;
+	    }
+	  decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
+				       /* The builtin without the prefix for
+					  calling it directly.  */
+				       d->name + strlen ("__builtin_"),
+				       attrs);
+	  /* add_builtin_function() will set the DECL_ATTRIBUTES, now
+	     set the TYPE_ATTRIBUTES.  */
+	  decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
+
+	  set_builtin_decl (code, decl, false);
+	}
+    }
+}
+
+/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
+   in the current target ISA to allow the user to compile particular modules
+   with different target specific options that differ from the command line
+   options.  */
+static void
+ix86_init_mmx_sse_builtins (void)
+{
+  const struct builtin_description * d;
+  enum ix86_builtin_func_type ftype;
+  size_t i;
+
+  /* Add all special builtins with variable number of operands.  */
+  for (i = 0, d = bdesc_special_args;
+       i < ARRAY_SIZE (bdesc_special_args);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin (d->mask, d->name, ftype, d->code);
+    }
+
+  /* Add all builtins with variable number of operands.  */
+  for (i = 0, d = bdesc_args;
+       i < ARRAY_SIZE (bdesc_args);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* Add all builtins with rounding.  */
+  for (i = 0, d = bdesc_round_args;
+       i < ARRAY_SIZE (bdesc_round_args);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* pcmpestr[im] insns.  */
+  for (i = 0, d = bdesc_pcmpestr;
+       i < ARRAY_SIZE (bdesc_pcmpestr);
+       i++, d++)
+    {
+      if (d->code == IX86_BUILTIN_PCMPESTRM128)
+	ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
+      else
+	ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* pcmpistr[im] insns.  */
+  for (i = 0, d = bdesc_pcmpistr;
+       i < ARRAY_SIZE (bdesc_pcmpistr);
+       i++, d++)
+    {
+      if (d->code == IX86_BUILTIN_PCMPISTRM128)
+	ftype = V16QI_FTYPE_V16QI_V16QI_INT;
+      else
+	ftype = INT_FTYPE_V16QI_V16QI_INT;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* comi/ucomi insns.  */
+  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+    {
+      if (d->mask == OPTION_MASK_ISA_SSE2)
+	ftype = INT_FTYPE_V2DF_V2DF;
+      else
+	ftype = INT_FTYPE_V4SF_V4SF;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* SSE */
+  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
+	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
+  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
+	       UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
+
+  /* SSE or 3DNow!A */
+  def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
+	       IX86_BUILTIN_MASKMOVQ);
+
+  /* SSE2 */
+  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
+	       VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
+
+  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
+	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
+  x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
+			    VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
+
+  /* SSE3.  */
+  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
+	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
+  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
+	       VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
+
+  /* AES */
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
+		     V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
+		     V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
+
+  /* PCLMUL */
+  def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
+		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
+
+  /* RDRND */
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
+	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
+	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
+	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
+	       IX86_BUILTIN_RDRAND64_STEP);
+
+  /* AVX2 */
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
+	       V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
+	       IX86_BUILTIN_GATHERSIV2DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
+	       V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
+	       IX86_BUILTIN_GATHERSIV4DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
+	       V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
+	       IX86_BUILTIN_GATHERDIV2DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
+	       V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
+	       IX86_BUILTIN_GATHERDIV4DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
+	       V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
+	       IX86_BUILTIN_GATHERSIV4SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
+	       V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
+	       IX86_BUILTIN_GATHERSIV8SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
+	       V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
+	       IX86_BUILTIN_GATHERDIV4SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
+	       V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
+	       IX86_BUILTIN_GATHERDIV8SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
+	       V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
+	       IX86_BUILTIN_GATHERSIV2DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
+	       V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
+	       IX86_BUILTIN_GATHERSIV4DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
+	       V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
+	       IX86_BUILTIN_GATHERDIV2DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
+	       V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
+	       IX86_BUILTIN_GATHERDIV4DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
+	       V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
+	       IX86_BUILTIN_GATHERSIV4SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
+	       V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
+	       IX86_BUILTIN_GATHERSIV8SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
+	       V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
+	       IX86_BUILTIN_GATHERDIV4SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
+	       V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
+	       IX86_BUILTIN_GATHERDIV8SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
+	       V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
+	       IX86_BUILTIN_GATHERALTSIV4DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
+	       V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
+	       IX86_BUILTIN_GATHERALTDIV8SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
+	       V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
+	       IX86_BUILTIN_GATHERALTSIV4DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
+	       V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
+	       IX86_BUILTIN_GATHERALTDIV8SI);
+
+  /* AVX512F */
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
+	       V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
+	       IX86_BUILTIN_GATHER3SIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
+	       V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
+	       IX86_BUILTIN_GATHER3SIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
+	       V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
+	       IX86_BUILTIN_GATHER3DIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
+	       V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
+	       IX86_BUILTIN_GATHER3DIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
+	       V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
+	       IX86_BUILTIN_GATHER3SIV16SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
+	       V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
+	       IX86_BUILTIN_GATHER3SIV8DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
+	       V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
+	       IX86_BUILTIN_GATHER3DIV16SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
+	       V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
+	       IX86_BUILTIN_GATHER3DIV8DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
+	       V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
+	       IX86_BUILTIN_GATHER3ALTSIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
+	       V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
+	       IX86_BUILTIN_GATHER3ALTDIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
+	       V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
+	       IX86_BUILTIN_GATHER3ALTSIV8DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
+	       V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
+	       IX86_BUILTIN_GATHER3ALTDIV16SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
+	       VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
+	       IX86_BUILTIN_SCATTERSIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
+	       VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
+	       IX86_BUILTIN_SCATTERSIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
+	       VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
+	       IX86_BUILTIN_SCATTERDIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
+	       VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
+	       IX86_BUILTIN_SCATTERDIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
+	       VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
+	       IX86_BUILTIN_SCATTERSIV16SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
+	       VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
+	       IX86_BUILTIN_SCATTERSIV8DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
+	       VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
+	       IX86_BUILTIN_SCATTERDIV16SI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
+	       VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
+	       IX86_BUILTIN_SCATTERDIV8DI);
+
+  /* AVX512PF */
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
+	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
+	       IX86_BUILTIN_GATHERPFDPD);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
+	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
+	       IX86_BUILTIN_GATHERPFDPS);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
+	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
+	       IX86_BUILTIN_GATHERPFQPD);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
+	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
+	       IX86_BUILTIN_GATHERPFQPS);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
+	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
+	       IX86_BUILTIN_SCATTERPFDPD);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
+	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
+	       IX86_BUILTIN_SCATTERPFDPS);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
+	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
+	       IX86_BUILTIN_SCATTERPFQPD);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
+	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
+	       IX86_BUILTIN_SCATTERPFQPS);
+
+  /* SHA */
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
+		     V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
+		     V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
+
+  /* RTM.  */
+  def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
+	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
+
+  /* MMX access to the vec_init patterns.  */
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
+		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
+		     V4HI_FTYPE_HI_HI_HI_HI,
+		     IX86_BUILTIN_VEC_INIT_V4HI);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
+		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
+		     IX86_BUILTIN_VEC_INIT_V8QI);
+
+  /* Access to the vec_extract patterns.  */
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
+		     DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
+		     DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
+  def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
+		     FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
+		     SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
+		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+		     "__builtin_ia32_vec_ext_v4hi",
+		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
+		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
+		     QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
+
+  /* Access to the vec_set patterns.  */
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
+		     "__builtin_ia32_vec_set_v2di",
+		     V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
+		     V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
+		     V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
+		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+		     "__builtin_ia32_vec_set_v4hi",
+		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
+		     V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
+
+  /* RDSEED */
+  def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
+	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
+  def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
+	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
+  def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
+	       "__builtin_ia32_rdseed_di_step",
+	       INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
+
+  /* ADCX */
+  def_builtin (0, "__builtin_ia32_addcarryx_u32",
+	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
+  def_builtin (OPTION_MASK_ISA_64BIT,
+	       "__builtin_ia32_addcarryx_u64",
+	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
+	       IX86_BUILTIN_ADDCARRYX64);
+
+  /* Read/write FLAGS.  */
+  def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
+               UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
+  def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
+               UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
+  def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
+               VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
+  def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
+               VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
+
+
+  /* Add FMA4 multi-arg argument instructions */
+  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+}
+
+/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
+   to return a pointer to VERSION_DECL if the outcome of the expression
+   formed by PREDICATE_CHAIN is true.  This function will be called during
+   version dispatch to decide which function version to execute.  It returns
+   the basic block at the end, to which more conditions can be added.  */
+
+static basic_block
+add_condition_to_bb (tree function_decl, tree version_decl,
+		     tree predicate_chain, basic_block new_bb)
+{
+  gimple return_stmt;
+  tree convert_expr, result_var;
+  gimple convert_stmt;
+  gimple call_cond_stmt;
+  gimple if_else_stmt;
+
+  basic_block bb1, bb2, bb3;
+  edge e12, e23;
+
+  tree cond_var, and_expr_var = NULL_TREE;
+  gimple_seq gseq;
+
+  tree predicate_decl, predicate_arg;
+
+  push_cfun (DECL_STRUCT_FUNCTION (function_decl));
+
+  gcc_assert (new_bb != NULL);
+  gseq = bb_seq (new_bb);
+
+
+  convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
+	     		 build_fold_addr_expr (version_decl));
+  result_var = create_tmp_var (ptr_type_node, NULL);
+  convert_stmt = gimple_build_assign (result_var, convert_expr); 
+  return_stmt = gimple_build_return (result_var);
+
+  if (predicate_chain == NULL_TREE)
+    {
+      gimple_seq_add_stmt (&gseq, convert_stmt);
+      gimple_seq_add_stmt (&gseq, return_stmt);
+      set_bb_seq (new_bb, gseq);
+      gimple_set_bb (convert_stmt, new_bb);
+      gimple_set_bb (return_stmt, new_bb);
+      pop_cfun ();
+      return new_bb;
+    }
+
+  while (predicate_chain != NULL)
+    {
+      cond_var = create_tmp_var (integer_type_node, NULL);
+      predicate_decl = TREE_PURPOSE (predicate_chain);
+      predicate_arg = TREE_VALUE (predicate_chain);
+      call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
+      gimple_call_set_lhs (call_cond_stmt, cond_var);
+
+      gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
+      gimple_set_bb (call_cond_stmt, new_bb);
+      gimple_seq_add_stmt (&gseq, call_cond_stmt);
+
+      predicate_chain = TREE_CHAIN (predicate_chain);
+      
+      if (and_expr_var == NULL)
+        and_expr_var = cond_var;
+      else
+	{
+	  gimple assign_stmt;
+	  /* Use MIN_EXPR to check if any integer is zero?.
+	     and_expr_var = min_expr <cond_var, and_expr_var>  */
+	  assign_stmt = gimple_build_assign (and_expr_var,
+			  build2 (MIN_EXPR, integer_type_node,
+				  cond_var, and_expr_var));
+
+	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
+	  gimple_set_bb (assign_stmt, new_bb);
+	  gimple_seq_add_stmt (&gseq, assign_stmt);
+	}
+    }
+
+  if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
+	  		            integer_zero_node,
+				    NULL_TREE, NULL_TREE);
+  gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
+  gimple_set_bb (if_else_stmt, new_bb);
+  gimple_seq_add_stmt (&gseq, if_else_stmt);
+
+  gimple_seq_add_stmt (&gseq, convert_stmt);
+  gimple_seq_add_stmt (&gseq, return_stmt);
+  set_bb_seq (new_bb, gseq);
+
+  bb1 = new_bb;
+  e12 = split_block (bb1, if_else_stmt);
+  bb2 = e12->dest;
+  e12->flags &= ~EDGE_FALLTHRU;
+  e12->flags |= EDGE_TRUE_VALUE;
+
+  e23 = split_block (bb2, return_stmt);
+
+  gimple_set_bb (convert_stmt, bb2);
+  gimple_set_bb (return_stmt, bb2);
+
+  bb3 = e23->dest;
+  make_edge (bb1, bb3, EDGE_FALSE_VALUE); 
+
+  remove_edge (e23);
+  make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+
+  pop_cfun ();
+
+  return bb3;
+}
+
+/* This parses the attribute arguments to target in DECL and determines
+   the right builtin to use to match the platform specification.
+   It returns the priority value for this version decl.  If PREDICATE_LIST
+   is not NULL, it stores the list of cpu features that need to be checked
+   before dispatching this function.  */
+
+static unsigned int
+get_builtin_code_for_version (tree decl, tree *predicate_list)
+{
+  tree attrs;
+  struct cl_target_option cur_target;
+  tree target_node;
+  struct cl_target_option *new_target;
+  const char *arg_str = NULL;
+  const char *attrs_str = NULL;
+  char *tok_str = NULL;
+  char *token;
+
+  /* Priority of i386 features, greater value is higher priority.   This is
+     used to decide the order in which function dispatch must happen.  For
+     instance, a version specialized for SSE4.2 should be checked for dispatch
+     before a version for SSE3, as SSE4.2 implies SSE3.  */
+  enum feature_priority
+  {
+    P_ZERO = 0,
+    P_MMX,
+    P_SSE,
+    P_SSE2,
+    P_SSE3,
+    P_SSSE3,
+    P_PROC_SSSE3,
+    P_SSE4_A,
+    P_PROC_SSE4_A,
+    P_SSE4_1,
+    P_SSE4_2,
+    P_PROC_SSE4_2,
+    P_POPCNT,
+    P_AVX,
+    P_PROC_AVX,
+    P_FMA4,
+    P_XOP,
+    P_PROC_XOP,
+    P_FMA,    
+    P_PROC_FMA,
+    P_AVX2,
+    P_PROC_AVX2
+  };
+
+ enum feature_priority priority = P_ZERO;
+
+  /* These are the target attribute strings for which a dispatcher is
+     available, from fold_builtin_cpu.  */
+
+  static struct _feature_list
+    {
+      const char *const name;
+      const enum feature_priority priority;
+    }
+  const feature_list[] =
+    {
+      {"mmx", P_MMX},
+      {"sse", P_SSE},
+      {"sse2", P_SSE2},
+      {"sse3", P_SSE3},
+      {"sse4a", P_SSE4_A},
+      {"ssse3", P_SSSE3},
+      {"sse4.1", P_SSE4_1},
+      {"sse4.2", P_SSE4_2},
+      {"popcnt", P_POPCNT},
+      {"avx", P_AVX},
+      {"fma4", P_FMA4},
+      {"xop", P_XOP},
+      {"fma", P_FMA},
+      {"avx2", P_AVX2}
+    };
+
+
+  static unsigned int NUM_FEATURES
+    = sizeof (feature_list) / sizeof (struct _feature_list);
+
+  unsigned int i;
+
+  tree predicate_chain = NULL_TREE;
+  tree predicate_decl, predicate_arg;
+
+  attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+  gcc_assert (attrs != NULL);
+
+  attrs = TREE_VALUE (TREE_VALUE (attrs));
+
+  gcc_assert (TREE_CODE (attrs) == STRING_CST);
+  attrs_str = TREE_STRING_POINTER (attrs);
+
+  /* Return priority zero for default function.  */
+  if (strcmp (attrs_str, "default") == 0)
+    return 0;
+
+  /* Handle arch= if specified.  For priority, set it to be 1 more than
+     the best instruction set the processor can handle.  For instance, if
+     there is a version for atom and a version for ssse3 (the highest ISA
+     priority for atom), the atom version must be checked for dispatch
+     before the ssse3 version. */
+  if (strstr (attrs_str, "arch=") != NULL)
+    {
+      cl_target_option_save (&cur_target, &global_options);
+      target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
+						      &global_options_set);
+    
+      gcc_assert (target_node);
+      new_target = TREE_TARGET_OPTION (target_node);
+      gcc_assert (new_target);
+      
+      if (new_target->arch_specified && new_target->arch > 0)
+	{
+	  switch (new_target->arch)
+	    {
+	    case PROCESSOR_CORE2:
+	      arg_str = "core2";
+	      priority = P_PROC_SSSE3;
+	      break;
+	    case PROCESSOR_NEHALEM:
+	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
+		arg_str = "westmere";
+	      else
+		/* We translate "arch=corei7" and "arch=nehalem" to
+		   "corei7" so that it will be mapped to M_INTEL_COREI7
+		   as cpu type to cover all M_INTEL_COREI7_XXXs.  */
+		arg_str = "corei7";
+	      priority = P_PROC_SSE4_2;
+	      break;
+	    case PROCESSOR_SANDYBRIDGE:
+	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
+		arg_str = "ivybridge";
+	      else
+		arg_str = "sandybridge";
+	      priority = P_PROC_AVX;
+	      break;
+	    case PROCESSOR_HASWELL:
+	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
+		arg_str = "broadwell";
+	      else
+		arg_str = "haswell";
+	      priority = P_PROC_AVX2;
+	      break;
+	    case PROCESSOR_BONNELL:
+	      arg_str = "bonnell";
+	      priority = P_PROC_SSSE3;
+	      break;
+	    case PROCESSOR_SILVERMONT:
+	      arg_str = "silvermont";
+	      priority = P_PROC_SSE4_2;
+	      break;
+	    case PROCESSOR_AMDFAM10:
+	      arg_str = "amdfam10h";
+	      priority = P_PROC_SSE4_A;
+	      break;
+	    case PROCESSOR_BTVER1:
+	      arg_str = "btver1";
+	      priority = P_PROC_SSE4_A;
+	      break;
+	    case PROCESSOR_BTVER2:
+	      arg_str = "btver2";
+	      priority = P_PROC_AVX;
+	      break;
+	    case PROCESSOR_BDVER1:
+	      arg_str = "bdver1";
+	      priority = P_PROC_XOP;
+	      break;
+	    case PROCESSOR_BDVER2:
+	      arg_str = "bdver2";
+	      priority = P_PROC_FMA;
+	      break;
+	    case PROCESSOR_BDVER3:
+	      arg_str = "bdver3";
+	      priority = P_PROC_FMA;
+	      break;
+	    case PROCESSOR_BDVER4:
+	      arg_str = "bdver4";
+	      priority = P_PROC_AVX2;
+	      break;
+	    }  
+	}    
+    
+      cl_target_option_restore (&global_options, &cur_target);
+	
+      if (predicate_list && arg_str == NULL)
+	{
+	  error_at (DECL_SOURCE_LOCATION (decl),
+	    	"No dispatcher found for the versioning attributes");
+	  return 0;
+	}
+    
+      if (predicate_list)
+	{
+          predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
+          /* For a C string literal the length includes the trailing NULL.  */
+          predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
+          predicate_chain = tree_cons (predicate_decl, predicate_arg,
+				       predicate_chain);
+	}
+    }
+
+  /* Process feature name.  */
+  tok_str =  (char *) xmalloc (strlen (attrs_str) + 1);
+  strcpy (tok_str, attrs_str);
+  token = strtok (tok_str, ",");
+  predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
+
+  while (token != NULL)
+    {
+      /* Do not process "arch="  */
+      if (strncmp (token, "arch=", 5) == 0)
+	{
+	  token = strtok (NULL, ",");
+	  continue;
+	}
+      for (i = 0; i < NUM_FEATURES; ++i)
+	{
+	  if (strcmp (token, feature_list[i].name) == 0)
+	    {
+	      if (predicate_list)
+		{
+		  predicate_arg = build_string_literal (
+				  strlen (feature_list[i].name) + 1,
+				  feature_list[i].name);
+		  predicate_chain = tree_cons (predicate_decl, predicate_arg,
+					       predicate_chain);
+		}
+	      /* Find the maximum priority feature.  */
+	      if (feature_list[i].priority > priority)
+		priority = feature_list[i].priority;
+
+	      break;
+	    }
+	}
+      if (predicate_list && i == NUM_FEATURES)
+	{
+	  error_at (DECL_SOURCE_LOCATION (decl),
+		    "No dispatcher found for %s", token);
+	  return 0;
+	}
+      token = strtok (NULL, ",");
+    }
+  free (tok_str);
+
+  if (predicate_list && predicate_chain == NULL_TREE)
+    {
+      error_at (DECL_SOURCE_LOCATION (decl),
+	        "No dispatcher found for the versioning attributes : %s",
+	        attrs_str);
+      return 0;
+    }
+  else if (predicate_list)
+    {
+      predicate_chain = nreverse (predicate_chain);
+      *predicate_list = predicate_chain;
+    }
+
+  return priority; 
+}
+
+/* This compares the priority of target features in function DECL1
+   and DECL2.  It returns positive value if DECL1 is higher priority,
+   negative value if DECL2 is higher priority and 0 if they are the
+   same.  */
+
+static int
+ix86_compare_version_priority (tree decl1, tree decl2)
+{
+  unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
+  unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
+
+  return (int)priority1 - (int)priority2;
+}
+
+/* V1 and V2 point to function versions with different priorities
+   based on the target ISA.  This function compares their priorities.  */
+ 
+static int
+feature_compare (const void *v1, const void *v2)
+{
+  typedef struct _function_version_info
+    {
+      tree version_decl;
+      tree predicate_chain;
+      unsigned int dispatch_priority;
+    } function_version_info;
+
+  const function_version_info c1 = *(const function_version_info *)v1;
+  const function_version_info c2 = *(const function_version_info *)v2;
+  return (c2.dispatch_priority - c1.dispatch_priority);
+}
+
+/* This function generates the dispatch function for
+   multi-versioned functions.  DISPATCH_DECL is the function which will
+   contain the dispatch logic.  FNDECLS are the function choices for
+   dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
+   in DISPATCH_DECL in which the dispatch code is generated.  */
+
+static int
+dispatch_function_versions (tree dispatch_decl,
+			    void *fndecls_p,
+			    basic_block *empty_bb)
+{
+  tree default_decl;
+  gimple ifunc_cpu_init_stmt;
+  gimple_seq gseq;
+  int ix;
+  tree ele;
+  vec<tree> *fndecls;
+  unsigned int num_versions = 0;
+  unsigned int actual_versions = 0;
+  unsigned int i;
+
+  struct _function_version_info
+    {
+      tree version_decl;
+      tree predicate_chain;
+      unsigned int dispatch_priority;
+    }*function_version_info;
+
+  gcc_assert (dispatch_decl != NULL
+	      && fndecls_p != NULL
+	      && empty_bb != NULL);
+
+  /*fndecls_p is actually a vector.  */
+  fndecls = static_cast<vec<tree> *> (fndecls_p);
+
+  /* At least one more version other than the default.  */
+  num_versions = fndecls->length ();
+  gcc_assert (num_versions >= 2);
+
+  function_version_info = (struct _function_version_info *)
+    XNEWVEC (struct _function_version_info, (num_versions - 1));
+
+  /* The first version in the vector is the default decl.  */
+  default_decl = (*fndecls)[0];
+
+  push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
+
+  gseq = bb_seq (*empty_bb);
+  /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
+     constructors, so explicity call __builtin_cpu_init here.  */
+  ifunc_cpu_init_stmt = gimple_build_call_vec (
+                     ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
+  gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
+  gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
+  set_bb_seq (*empty_bb, gseq);
+
+  pop_cfun ();
+
+
+  for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
+    {
+      tree version_decl = ele;
+      tree predicate_chain = NULL_TREE;
+      unsigned int priority;
+      /* Get attribute string, parse it and find the right predicate decl.
+         The predicate function could be a lengthy combination of many
+	 features, like arch-type and various isa-variants.  */
+      priority = get_builtin_code_for_version (version_decl,
+	 			               &predicate_chain);
+
+      if (predicate_chain == NULL_TREE)
+	continue;
+
+      function_version_info [actual_versions].version_decl = version_decl;
+      function_version_info [actual_versions].predicate_chain
+	 = predicate_chain;
+      function_version_info [actual_versions].dispatch_priority = priority;
+      actual_versions++;
+    }
+
+  /* Sort the versions according to descending order of dispatch priority.  The
+     priority is based on the ISA.  This is not a perfect solution.  There
+     could still be ambiguity.  If more than one function version is suitable
+     to execute,  which one should be dispatched?  In future, allow the user
+     to specify a dispatch  priority next to the version.  */
+  qsort (function_version_info, actual_versions,
+         sizeof (struct _function_version_info), feature_compare);
+
+  for  (i = 0; i < actual_versions; ++i)
+    *empty_bb = add_condition_to_bb (dispatch_decl,
+				     function_version_info[i].version_decl,
+				     function_version_info[i].predicate_chain,
+				     *empty_bb);
+
+  /* dispatch default version at the end.  */
+  *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
+				   NULL, *empty_bb);
+
+  free (function_version_info);
+  return 0;
+}
+
+/* Comparator function to be used in qsort routine to sort attribute
+   specification strings to "target".  */
+
+static int
+attr_strcmp (const void *v1, const void *v2)
+{
+  const char *c1 = *(char *const*)v1;
+  const char *c2 = *(char *const*)v2;
+  return strcmp (c1, c2);
+}
+
+/* ARGLIST is the argument to target attribute.  This function tokenizes
+   the comma separated arguments, sorts them and returns a string which
+   is a unique identifier for the comma separated arguments.   It also
+   replaces non-identifier characters "=,-" with "_".  */
+
+static char *
+sorted_attr_string (tree arglist)
+{
+  tree arg;
+  size_t str_len_sum = 0;
+  char **args = NULL;
+  char *attr_str, *ret_str;
+  char *attr = NULL;
+  unsigned int argnum = 1;
+  unsigned int i;
+
+  for (arg = arglist; arg; arg = TREE_CHAIN (arg))
+    {
+      const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
+      size_t len = strlen (str);
+      str_len_sum += len + 1;
+      if (arg != arglist)
+	argnum++;
+      for (i = 0; i < strlen (str); i++)
+	if (str[i] == ',')
+	  argnum++;
+    }
+
+  attr_str = XNEWVEC (char, str_len_sum);
+  str_len_sum = 0;
+  for (arg = arglist; arg; arg = TREE_CHAIN (arg))
+    {
+      const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
+      size_t len = strlen (str);
+      memcpy (attr_str + str_len_sum, str, len);
+      attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
+      str_len_sum += len + 1;
+    }
+
+  /* Replace "=,-" with "_".  */
+  for (i = 0; i < strlen (attr_str); i++)
+    if (attr_str[i] == '=' || attr_str[i]== '-')
+      attr_str[i] = '_';
+
+  if (argnum == 1)
+    return attr_str;
+
+  args = XNEWVEC (char *, argnum);
+
+  i = 0;
+  attr = strtok (attr_str, ",");
+  while (attr != NULL)
+    {
+      args[i] = attr;
+      i++;
+      attr = strtok (NULL, ",");
+    }
+
+  qsort (args, argnum, sizeof (char *), attr_strcmp);
+
+  ret_str = XNEWVEC (char, str_len_sum);
+  str_len_sum = 0;
+  for (i = 0; i < argnum; i++)
+    {
+      size_t len = strlen (args[i]);
+      memcpy (ret_str + str_len_sum, args[i], len);
+      ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
+      str_len_sum += len + 1;
+    }
+
+  XDELETEVEC (args);
+  XDELETEVEC (attr_str);
+  return ret_str;
+}
+
+/* This function changes the assembler name for functions that are
+   versions.  If DECL is a function version and has a "target"
+   attribute, it appends the attribute string to its assembler name.  */
+
+static tree
+ix86_mangle_function_version_assembler_name (tree decl, tree id)
+{
+  tree version_attr;
+  const char *orig_name, *version_string;
+  char *attr_str, *assembler_name;
+
+  if (DECL_DECLARED_INLINE_P (decl)
+      && lookup_attribute ("gnu_inline",
+			   DECL_ATTRIBUTES (decl)))
+    error_at (DECL_SOURCE_LOCATION (decl),
+	      "Function versions cannot be marked as gnu_inline,"
+	      " bodies have to be generated");
+
+  if (DECL_VIRTUAL_P (decl)
+      || DECL_VINDEX (decl))
+    sorry ("Virtual function multiversioning not supported");
+
+  version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+
+  /* target attribute string cannot be NULL.  */
+  gcc_assert (version_attr != NULL_TREE);
+
+  orig_name = IDENTIFIER_POINTER (id);
+  version_string
+    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
+
+  if (strcmp (version_string, "default") == 0)
+    return id;
+
+  attr_str = sorted_attr_string (TREE_VALUE (version_attr));
+  assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
+
+  sprintf (assembler_name, "%s.%s", orig_name, attr_str);
+
+  /* Allow assembler name to be modified if already set.  */
+  if (DECL_ASSEMBLER_NAME_SET_P (decl))
+    SET_DECL_RTL (decl, NULL);
+
+  tree ret = get_identifier (assembler_name);
+  XDELETEVEC (attr_str);
+  XDELETEVEC (assembler_name);
+  return ret;
+}
+
+/* This function returns true if FN1 and FN2 are versions of the same function,
+   that is, the target strings of the function decls are different.  This assumes
+   that FN1 and FN2 have the same signature.  */
+
+static bool
+ix86_function_versions (tree fn1, tree fn2)
+{
+  tree attr1, attr2;
+  char *target1, *target2;
+  bool result;
+
+  if (TREE_CODE (fn1) != FUNCTION_DECL
+      || TREE_CODE (fn2) != FUNCTION_DECL)
+    return false;
+
+  attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
+  attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
+
+  /* At least one function decl should have the target attribute specified.  */
+  if (attr1 == NULL_TREE && attr2 == NULL_TREE)
+    return false;
+
+  /* Diagnose missing target attribute if one of the decls is already
+     multi-versioned.  */
+  if (attr1 == NULL_TREE || attr2 == NULL_TREE)
+    {
+      if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
+	{
+	  if (attr2 != NULL_TREE)
+	    {
+	      tree tem = fn1;
+	      fn1 = fn2;
+	      fn2 = tem;
+	      attr1 = attr2;
+	    }
+	  error_at (DECL_SOURCE_LOCATION (fn2),
+		    "missing %<target%> attribute for multi-versioned %D",
+		    fn2);
+	  inform (DECL_SOURCE_LOCATION (fn1),
+		  "previous declaration of %D", fn1);
+	  /* Prevent diagnosing of the same error multiple times.  */
+	  DECL_ATTRIBUTES (fn2)
+	    = tree_cons (get_identifier ("target"),
+			 copy_node (TREE_VALUE (attr1)),
+			 DECL_ATTRIBUTES (fn2));
+	}
+      return false;
+    }
+
+  target1 = sorted_attr_string (TREE_VALUE (attr1));
+  target2 = sorted_attr_string (TREE_VALUE (attr2));
+
+  /* The sorted target strings must be different for fn1 and fn2
+     to be versions.  */
+  if (strcmp (target1, target2) == 0)
+    result = false;
+  else
+    result = true;
+
+  XDELETEVEC (target1);
+  XDELETEVEC (target2); 
+  
+  return result;
+}
+
+static tree 
+ix86_mangle_decl_assembler_name (tree decl, tree id)
+{
+  /* For function version, add the target suffix to the assembler name.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && DECL_FUNCTION_VERSIONED (decl))
+    id = ix86_mangle_function_version_assembler_name (decl, id);
+#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
+  id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
+#endif
+
+  return id;
+}
+
+/* Return a new name by appending SUFFIX to the DECL name.  If make_unique
+   is true, append the full path name of the source file.  */
+
+static char *
+make_name (tree decl, const char *suffix, bool make_unique)
+{
+  char *global_var_name;
+  int name_len;
+  const char *name;
+  const char *unique_name = NULL;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+  /* Get a unique name that can be used globally without any chances
+     of collision at link time.  */
+  if (make_unique)
+    unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
+
+  name_len = strlen (name) + strlen (suffix) + 2;
+
+  if (make_unique)
+    name_len += strlen (unique_name) + 1;
+  global_var_name = XNEWVEC (char, name_len);
+
+  /* Use '.' to concatenate names as it is demangler friendly.  */
+  if (make_unique)
+    snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
+	      suffix);
+  else
+    snprintf (global_var_name, name_len, "%s.%s", name, suffix);
+
+  return global_var_name;
+}
+
+#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
+
+/* Make a dispatcher declaration for the multi-versioned function DECL.
+   Calls to DECL function will be replaced with calls to the dispatcher
+   by the front-end.  Return the decl created.  */
+
+static tree
+make_dispatcher_decl (const tree decl)
+{
+  tree func_decl;
+  char *func_name;
+  tree fn_type, func_type;
+  bool is_uniq = false;
+
+  if (TREE_PUBLIC (decl) == 0)
+    is_uniq = true;
+
+  func_name = make_name (decl, "ifunc", is_uniq);
+
+  fn_type = TREE_TYPE (decl);
+  func_type = build_function_type (TREE_TYPE (fn_type),
+				   TYPE_ARG_TYPES (fn_type));
+  
+  func_decl = build_fn_decl (func_name, func_type);
+  XDELETEVEC (func_name);
+  TREE_USED (func_decl) = 1;
+  DECL_CONTEXT (func_decl) = NULL_TREE;
+  DECL_INITIAL (func_decl) = error_mark_node;
+  DECL_ARTIFICIAL (func_decl) = 1;
+  /* Mark this func as external, the resolver will flip it again if
+     it gets generated.  */
+  DECL_EXTERNAL (func_decl) = 1;
+  /* This will be of type IFUNCs have to be externally visible.  */
+  TREE_PUBLIC (func_decl) = 1;
+
+  return func_decl;  
+}
+
+#endif
+
+/* Returns true if decl is multi-versioned and DECL is the default function,
+   that is it is not tagged with target specific optimization.  */
+
+static bool
+is_function_default_version (const tree decl)
+{
+  if (TREE_CODE (decl) != FUNCTION_DECL
+      || !DECL_FUNCTION_VERSIONED (decl))
+    return false;
+  tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+  gcc_assert (attr);
+  attr = TREE_VALUE (TREE_VALUE (attr));
+  return (TREE_CODE (attr) == STRING_CST
+	  && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
+}
+
+/* Make a dispatcher declaration for the multi-versioned function DECL.
+   Calls to DECL function will be replaced with calls to the dispatcher
+   by the front-end.  Returns the decl of the dispatcher function.  */
+
+static tree
+ix86_get_function_versions_dispatcher (void *decl)
+{
+  tree fn = (tree) decl;
+  struct cgraph_node *node = NULL;
+  struct cgraph_node *default_node = NULL;
+  struct cgraph_function_version_info *node_v = NULL;
+  struct cgraph_function_version_info *first_v = NULL;
+
+  tree dispatch_decl = NULL;
+
+  struct cgraph_function_version_info *default_version_info = NULL;
+ 
+  gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
+
+  node = cgraph_get_node (fn);
+  gcc_assert (node != NULL);
+
+  node_v = get_cgraph_node_version (node);
+  gcc_assert (node_v != NULL);
+ 
+  if (node_v->dispatcher_resolver != NULL)
+    return node_v->dispatcher_resolver;
+
+  /* Find the default version and make it the first node.  */
+  first_v = node_v;
+  /* Go to the beginning of the chain.  */
+  while (first_v->prev != NULL)
+    first_v = first_v->prev;
+  default_version_info = first_v;
+  while (default_version_info != NULL)
+    {
+      if (is_function_default_version
+	    (default_version_info->this_node->decl))
+        break;
+      default_version_info = default_version_info->next;
+    }
+
+  /* If there is no default node, just return NULL.  */
+  if (default_version_info == NULL)
+    return NULL;
+
+  /* Make default info the first node.  */
+  if (first_v != default_version_info)
+    {
+      default_version_info->prev->next = default_version_info->next;
+      if (default_version_info->next)
+        default_version_info->next->prev = default_version_info->prev;
+      first_v->prev = default_version_info;
+      default_version_info->next = first_v;
+      default_version_info->prev = NULL;
+    }
+
+  default_node = default_version_info->this_node;
+
+#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
+  if (targetm.has_ifunc_p ())
+    {
+      struct cgraph_function_version_info *it_v = NULL;
+      struct cgraph_node *dispatcher_node = NULL;
+      struct cgraph_function_version_info *dispatcher_version_info = NULL;
+
+      /* Right now, the dispatching is done via ifunc.  */
+      dispatch_decl = make_dispatcher_decl (default_node->decl);
+
+      dispatcher_node = cgraph_get_create_node (dispatch_decl);
+      gcc_assert (dispatcher_node != NULL);
+      dispatcher_node->dispatcher_function = 1;
+      dispatcher_version_info
+	= insert_new_cgraph_node_version (dispatcher_node);
+      dispatcher_version_info->next = default_version_info;
+      dispatcher_node->definition = 1;
+
+      /* Set the dispatcher for all the versions.  */
+      it_v = default_version_info;
+      while (it_v != NULL)
+	{
+	  it_v->dispatcher_resolver = dispatch_decl;
+	  it_v = it_v->next;
+	}
+    }
+  else
+#endif
+    {
+      error_at (DECL_SOURCE_LOCATION (default_node->decl),
+		"multiversioning needs ifunc which is not supported "
+		"on this target");
+    }
+
+  return dispatch_decl;
+}
+
+/* Makes a function attribute of the form NAME(ARG_NAME) and chains
+   it to CHAIN.  */
+
+static tree
+make_attribute (const char *name, const char *arg_name, tree chain)
+{
+  tree attr_name;
+  tree attr_arg_name;
+  tree attr_args;
+  tree attr;
+
+  attr_name = get_identifier (name);
+  attr_arg_name = build_string (strlen (arg_name), arg_name);
+  attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
+  attr = tree_cons (attr_name, attr_args, chain);
+  return attr;
+}
+
+/* Make the resolver function decl to dispatch the versions of
+   a multi-versioned function,  DEFAULT_DECL.  Create an
+   empty basic block in the resolver and store the pointer in
+   EMPTY_BB.  Return the decl of the resolver function.  */
+
+static tree
+make_resolver_func (const tree default_decl,
+		    const tree dispatch_decl,
+		    basic_block *empty_bb)
+{
+  char *resolver_name;
+  tree decl, type, decl_name, t;
+  bool is_uniq = false;
+
+  /* IFUNC's have to be globally visible.  So, if the default_decl is
+     not, then the name of the IFUNC should be made unique.  */
+  if (TREE_PUBLIC (default_decl) == 0)
+    is_uniq = true;
+
+  /* Append the filename to the resolver function if the versions are
+     not externally visible.  This is because the resolver function has
+     to be externally visible for the loader to find it.  So, appending
+     the filename will prevent conflicts with a resolver function from
+     another module which is based on the same version name.  */
+  resolver_name = make_name (default_decl, "resolver", is_uniq);
+
+  /* The resolver function should return a (void *). */
+  type = build_function_type_list (ptr_type_node, NULL_TREE);
+
+  decl = build_fn_decl (resolver_name, type);
+  decl_name = get_identifier (resolver_name);
+  SET_DECL_ASSEMBLER_NAME (decl, decl_name);
+
+  DECL_NAME (decl) = decl_name;
+  TREE_USED (decl) = 1;
+  DECL_ARTIFICIAL (decl) = 1;
+  DECL_IGNORED_P (decl) = 0;
+  /* IFUNC resolvers have to be externally visible.  */
+  TREE_PUBLIC (decl) = 1;
+  DECL_UNINLINABLE (decl) = 1;
+
+  /* Resolver is not external, body is generated.  */
+  DECL_EXTERNAL (decl) = 0;
+  DECL_EXTERNAL (dispatch_decl) = 0;
+
+  DECL_CONTEXT (decl) = NULL_TREE;
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  DECL_STATIC_CONSTRUCTOR (decl) = 0;
+
+  if (DECL_COMDAT_GROUP (default_decl)
+      || TREE_PUBLIC (default_decl))
+    {
+      /* In this case, each translation unit with a call to this
+	 versioned function will put out a resolver.  Ensure it
+	 is comdat to keep just one copy.  */
+      DECL_COMDAT (decl) = 1;
+      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+    }
+  /* Build result decl and add to function_decl. */
+  t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
+  DECL_ARTIFICIAL (t) = 1;
+  DECL_IGNORED_P (t) = 1;
+  DECL_RESULT (decl) = t;
+
+  gimplify_function_tree (decl);
+  push_cfun (DECL_STRUCT_FUNCTION (decl));
+  *empty_bb = init_lowered_empty_function (decl, false);
+
+  cgraph_add_new_function (decl, true);
+  cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
+
+  pop_cfun ();
+
+  gcc_assert (dispatch_decl != NULL);
+  /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
+  DECL_ATTRIBUTES (dispatch_decl) 
+    = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
+
+  /* Create the alias for dispatch to resolver here.  */
+  /*cgraph_create_function_alias (dispatch_decl, decl);*/
+  cgraph_same_body_alias (NULL, dispatch_decl, decl);
+  XDELETEVEC (resolver_name);
+  return decl;
+}
+
+/* Generate the dispatching code body to dispatch multi-versioned function
+   DECL.  The target hook is called to process the "target" attributes and
+   provide the code to dispatch the right function at run-time.  NODE points
+   to the dispatcher decl whose body will be created.  */
+
+static tree 
+ix86_generate_version_dispatcher_body (void *node_p)
+{
+  tree resolver_decl;
+  basic_block empty_bb;
+  tree default_ver_decl;
+  struct cgraph_node *versn;
+  struct cgraph_node *node;
+
+  struct cgraph_function_version_info *node_version_info = NULL;
+  struct cgraph_function_version_info *versn_info = NULL;
+
+  node = (cgraph_node *)node_p;
+
+  node_version_info = get_cgraph_node_version (node);
+  gcc_assert (node->dispatcher_function
+	      && node_version_info != NULL);
+
+  if (node_version_info->dispatcher_resolver)
+    return node_version_info->dispatcher_resolver;
+
+  /* The first version in the chain corresponds to the default version.  */
+  default_ver_decl = node_version_info->next->this_node->decl;
+
+  /* node is going to be an alias, so remove the finalized bit.  */
+  node->definition = false;
+
+  resolver_decl = make_resolver_func (default_ver_decl,
+				      node->decl, &empty_bb);
+
+  node_version_info->dispatcher_resolver = resolver_decl;
+
+  push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
+
+  auto_vec<tree, 2> fn_ver_vec;
+
+  for (versn_info = node_version_info->next; versn_info;
+       versn_info = versn_info->next)
+    {
+      versn = versn_info->this_node;
+      /* Check for virtual functions here again, as by this time it should
+	 have been determined if this function needs a vtable index or
+	 not.  This happens for methods in derived classes that override
+	 virtual methods in base classes but are not explicitly marked as
+	 virtual.  */
+      if (DECL_VINDEX (versn->decl))
+	sorry ("Virtual function multiversioning not supported");
+
+      fn_ver_vec.safe_push (versn->decl);
+    }
+
+  dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
+  rebuild_cgraph_edges (); 
+  pop_cfun ();
+  return resolver_decl;
+}
+/* This builds the processor_model struct type defined in
+   libgcc/config/i386/cpuinfo.c  */
+
+static tree
+build_processor_model_struct (void)
+{
+  const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
+			      "__cpu_features"};
+  tree field = NULL_TREE, field_chain = NULL_TREE;
+  int i;
+  tree type = make_node (RECORD_TYPE);
+
+  /* The first 3 fields are unsigned int.  */
+  for (i = 0; i < 3; ++i)
+    {
+      field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			  get_identifier (field_name[i]), unsigned_type_node);
+      if (field_chain != NULL_TREE)
+	DECL_CHAIN (field) = field_chain;
+      field_chain = field;
+    }
+
+  /* The last field is an array of unsigned integers of size one.  */
+  field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+		      get_identifier (field_name[3]),
+		      build_array_type (unsigned_type_node,
+					build_index_type (size_one_node)));
+  if (field_chain != NULL_TREE)
+    DECL_CHAIN (field) = field_chain;
+  field_chain = field;
+
+  finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
+  return type;
+}
+
+/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
+
+static tree
+make_var_decl (tree type, const char *name)
+{
+  tree new_decl;
+
+  new_decl = build_decl (UNKNOWN_LOCATION,
+	                 VAR_DECL,
+	  	         get_identifier(name),
+		         type);
+
+  DECL_EXTERNAL (new_decl) = 1;
+  TREE_STATIC (new_decl) = 1;
+  TREE_PUBLIC (new_decl) = 1;
+  DECL_INITIAL (new_decl) = 0;
+  DECL_ARTIFICIAL (new_decl) = 0;
+  DECL_PRESERVE_P (new_decl) = 1;
+
+  make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
+  assemble_variable (new_decl, 0, 0, 0);
+
+  return new_decl;
+}
+
+/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
+   into an integer defined in libgcc/config/i386/cpuinfo.c */
+
+static tree
+fold_builtin_cpu (tree fndecl, tree *args)
+{
+  unsigned int i;
+  enum ix86_builtins fn_code = (enum ix86_builtins)
+				DECL_FUNCTION_CODE (fndecl);
+  tree param_string_cst = NULL;
+
+  /* This is the order of bit-fields in __processor_features in cpuinfo.c */
+  enum processor_features
+  {
+    F_CMOV = 0,
+    F_MMX,
+    F_POPCNT,
+    F_SSE,
+    F_SSE2,
+    F_SSE3,
+    F_SSSE3,
+    F_SSE4_1,
+    F_SSE4_2,
+    F_AVX,
+    F_AVX2,
+    F_SSE4_A,
+    F_FMA4,
+    F_XOP,
+    F_FMA,
+    F_MAX
+  };
+
+  /* These are the values for vendor types and cpu types  and subtypes
+     in cpuinfo.c.  Cpu types and subtypes should be subtracted by
+     the corresponding start value.  */
+  enum processor_model
+  {
+    M_INTEL = 1,
+    M_AMD,
+    M_CPU_TYPE_START,
+    M_INTEL_BONNELL,
+    M_INTEL_CORE2,
+    M_INTEL_COREI7,
+    M_AMDFAM10H,
+    M_AMDFAM15H,
+    M_INTEL_SILVERMONT,
+    M_AMD_BTVER1,
+    M_AMD_BTVER2,    
+    M_CPU_SUBTYPE_START,
+    M_INTEL_COREI7_NEHALEM,
+    M_INTEL_COREI7_WESTMERE,
+    M_INTEL_COREI7_SANDYBRIDGE,
+    M_AMDFAM10H_BARCELONA,
+    M_AMDFAM10H_SHANGHAI,
+    M_AMDFAM10H_ISTANBUL,
+    M_AMDFAM15H_BDVER1,
+    M_AMDFAM15H_BDVER2,
+    M_AMDFAM15H_BDVER3,
+    M_AMDFAM15H_BDVER4,
+    M_INTEL_COREI7_IVYBRIDGE,
+    M_INTEL_COREI7_HASWELL
+  };
+
+  static struct _arch_names_table
+    {
+      const char *const name;
+      const enum processor_model model;
+    }
+  const arch_names_table[] =
+    {
+      {"amd", M_AMD},
+      {"intel", M_INTEL},
+      {"atom", M_INTEL_BONNELL},
+      {"slm", M_INTEL_SILVERMONT},
+      {"core2", M_INTEL_CORE2},
+      {"corei7", M_INTEL_COREI7},
+      {"nehalem", M_INTEL_COREI7_NEHALEM},
+      {"westmere", M_INTEL_COREI7_WESTMERE},
+      {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
+      {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
+      {"haswell", M_INTEL_COREI7_HASWELL},
+      {"bonnell", M_INTEL_BONNELL},
+      {"silvermont", M_INTEL_SILVERMONT},
+      {"amdfam10h", M_AMDFAM10H},
+      {"barcelona", M_AMDFAM10H_BARCELONA},
+      {"shanghai", M_AMDFAM10H_SHANGHAI},
+      {"istanbul", M_AMDFAM10H_ISTANBUL},
+      {"btver1", M_AMD_BTVER1},      
+      {"amdfam15h", M_AMDFAM15H},
+      {"bdver1", M_AMDFAM15H_BDVER1},
+      {"bdver2", M_AMDFAM15H_BDVER2},
+      {"bdver3", M_AMDFAM15H_BDVER3},
+      {"bdver4", M_AMDFAM15H_BDVER4},
+      {"btver2", M_AMD_BTVER2},      
+    };
+
+  static struct _isa_names_table
+    {
+      const char *const name;
+      const enum processor_features feature;
+    }
+  const isa_names_table[] =
+    {
+      {"cmov",   F_CMOV},
+      {"mmx",    F_MMX},
+      {"popcnt", F_POPCNT},
+      {"sse",    F_SSE},
+      {"sse2",   F_SSE2},
+      {"sse3",   F_SSE3},
+      {"ssse3",  F_SSSE3},
+      {"sse4a",  F_SSE4_A},
+      {"sse4.1", F_SSE4_1},
+      {"sse4.2", F_SSE4_2},
+      {"avx",    F_AVX},
+      {"fma4",   F_FMA4},
+      {"xop",    F_XOP},
+      {"fma",    F_FMA},
+      {"avx2",   F_AVX2}
+    };
+
+  tree __processor_model_type = build_processor_model_struct ();
+  tree __cpu_model_var = make_var_decl (__processor_model_type,
+					"__cpu_model");
+
+
+  varpool_add_new_variable (__cpu_model_var);
+
+  gcc_assert ((args != NULL) && (*args != NULL));
+
+  param_string_cst = *args;
+  while (param_string_cst
+	 && TREE_CODE (param_string_cst) !=  STRING_CST)
+    {
+      /* *args must be a expr that can contain other EXPRS leading to a
+	 STRING_CST.   */
+      if (!EXPR_P (param_string_cst))
+ 	{
+	  error ("Parameter to builtin must be a string constant or literal");
+	  return integer_zero_node;
+	}
+      param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
+    }
+
+  gcc_assert (param_string_cst);
+
+  if (fn_code == IX86_BUILTIN_CPU_IS)
+    {
+      tree ref;
+      tree field;
+      tree final;
+
+      unsigned int field_val = 0;
+      unsigned int NUM_ARCH_NAMES
+	= sizeof (arch_names_table) / sizeof (struct _arch_names_table);
+
+      for (i = 0; i < NUM_ARCH_NAMES; i++)
+	if (strcmp (arch_names_table[i].name,
+	    TREE_STRING_POINTER (param_string_cst)) == 0)
+	  break;
+
+      if (i == NUM_ARCH_NAMES)
+	{
+	  error ("Parameter to builtin not valid: %s",
+	         TREE_STRING_POINTER (param_string_cst));
+	  return integer_zero_node;
+	}
+
+      field = TYPE_FIELDS (__processor_model_type);
+      field_val = arch_names_table[i].model;
+
+      /* CPU types are stored in the next field.  */
+      if (field_val > M_CPU_TYPE_START
+	  && field_val < M_CPU_SUBTYPE_START)
+	{
+	  field = DECL_CHAIN (field);
+	  field_val -= M_CPU_TYPE_START;
+	}
+
+      /* CPU subtypes are stored in the next field.  */
+      if (field_val > M_CPU_SUBTYPE_START)
+	{
+	  field = DECL_CHAIN ( DECL_CHAIN (field));
+	  field_val -= M_CPU_SUBTYPE_START;
+	}
+
+      /* Get the appropriate field in __cpu_model.  */
+      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
+		     field, NULL_TREE);
+
+      /* Check the value.  */
+      final = build2 (EQ_EXPR, unsigned_type_node, ref,
+		      build_int_cstu (unsigned_type_node, field_val));
+      return build1 (CONVERT_EXPR, integer_type_node, final);
+    }
+  else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
+    {
+      tree ref;
+      tree array_elt;
+      tree field;
+      tree final;
+
+      unsigned int field_val = 0;
+      unsigned int NUM_ISA_NAMES
+	= sizeof (isa_names_table) / sizeof (struct _isa_names_table);
+
+      for (i = 0; i < NUM_ISA_NAMES; i++)
+	if (strcmp (isa_names_table[i].name,
+	    TREE_STRING_POINTER (param_string_cst)) == 0)
+	  break;
+
+      if (i == NUM_ISA_NAMES)
+	{
+	  error ("Parameter to builtin not valid: %s",
+	       	 TREE_STRING_POINTER (param_string_cst));
+	  return integer_zero_node;
+	}
+
+      field = TYPE_FIELDS (__processor_model_type);
+      /* Get the last field, which is __cpu_features.  */
+      while (DECL_CHAIN (field))
+        field = DECL_CHAIN (field);
+
+      /* Get the appropriate field: __cpu_model.__cpu_features  */
+      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
+		     field, NULL_TREE);
+
+      /* Access the 0th element of __cpu_features array.  */
+      array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
+			  integer_zero_node, NULL_TREE, NULL_TREE);
+
+      field_val = (1 << isa_names_table[i].feature);
+      /* Return __cpu_model.__cpu_features[0] & field_val  */
+      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
+		      build_int_cstu (unsigned_type_node, field_val));
+      return build1 (CONVERT_EXPR, integer_type_node, final);
+    }
+  gcc_unreachable ();
+}
+
+static tree
+ix86_fold_builtin (tree fndecl, int n_args,
+		   tree *args, bool ignore ATTRIBUTE_UNUSED)
+{
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+    {
+      enum ix86_builtins fn_code = (enum ix86_builtins)
+				   DECL_FUNCTION_CODE (fndecl);
+      if (fn_code ==  IX86_BUILTIN_CPU_IS
+	  || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
+	{
+	  gcc_assert (n_args == 1);
+          return fold_builtin_cpu (fndecl, args);
+	}
+    }
+
+#ifdef SUBTARGET_FOLD_BUILTIN
+  return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
+#endif
+
+  return NULL_TREE;
+}
+
+/* Make builtins to detect cpu type and features supported.  NAME is
+   the builtin name, CODE is the builtin code, and FTYPE is the function
+   type of the builtin.  */
+
+static void
+make_cpu_type_builtin (const char* name, int code,
+		       enum ix86_builtin_func_type ftype, bool is_const)
+{
+  tree decl;
+  tree type;
+
+  type = ix86_get_builtin_func_type (ftype);
+  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
+			       NULL, NULL_TREE);
+  gcc_assert (decl != NULL_TREE);
+  ix86_builtins[(int) code] = decl;
+  TREE_READONLY (decl) = is_const;
+}
+
+/* Make builtins to get CPU type and features supported.  The created
+   builtins are :
+
+   __builtin_cpu_init (), to detect cpu type and features,
+   __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
+   __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
+   */
+
+static void
+ix86_init_platform_type_builtins (void)
+{
+  make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
+			 INT_FTYPE_VOID, false);
+  make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
+			 INT_FTYPE_PCCHAR, true);
+  make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
+			 INT_FTYPE_PCCHAR, true);
+}
+
+/* Internal method for ix86_init_builtins.  */
+
+static void
+ix86_init_builtins_va_builtins_abi (void)
+{
+  tree ms_va_ref, sysv_va_ref;
+  tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
+  tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
+  tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
+  tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
+
+  if (!TARGET_64BIT)
+    return;
+  fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
+  fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
+  ms_va_ref = build_reference_type (ms_va_list_type_node);
+  sysv_va_ref =
+    build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
+
+  fnvoid_va_end_ms =
+    build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+  fnvoid_va_start_ms =
+    build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+  fnvoid_va_end_sysv =
+    build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
+  fnvoid_va_start_sysv =
+    build_varargs_function_type_list (void_type_node, sysv_va_ref,
+    				       NULL_TREE);
+  fnvoid_va_copy_ms =
+    build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
+    			      NULL_TREE);
+  fnvoid_va_copy_sysv =
+    build_function_type_list (void_type_node, sysv_va_ref,
+    			      sysv_va_ref, NULL_TREE);
+
+  add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
+  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
+  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
+			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
+  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+  add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
+  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+  add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
+			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+}
+
+static void
+ix86_init_builtin_types (void)
+{
+  tree float128_type_node, float80_type_node;
+
+  /* The __float80 type.  */
+  float80_type_node = long_double_type_node;
+  if (TYPE_MODE (float80_type_node) != XFmode)
+    {
+      /* The __float80 type.  */
+      float80_type_node = make_node (REAL_TYPE);
+
+      TYPE_PRECISION (float80_type_node) = 80;
+      layout_type (float80_type_node);
+    }
+  lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
+
+  /* The __float128 type.  */
+  float128_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (float128_type_node) = 128;
+  layout_type (float128_type_node);
+  lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
+
+  /* This macro is built by i386-builtin-types.awk.  */
+  DEFINE_BUILTIN_PRIMITIVE_TYPES;
+}
+
+static void
+ix86_init_builtins (void)
+{
+  tree t;
+
+  ix86_init_builtin_types ();
+
+  /* Builtins to get CPU type and features. */
+  ix86_init_platform_type_builtins ();
+
+  /* TFmode support builtins.  */
+  def_builtin_const (0, "__builtin_infq",
+		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
+  def_builtin_const (0, "__builtin_huge_valq",
+		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
+
+  /* We will expand them to normal call if SSE isn't available since
+     they are used by libgcc. */
+  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
+  t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
+			    BUILT_IN_MD, "__fabstf2", NULL_TREE);
+  TREE_READONLY (t) = 1;
+  ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
+
+  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
+  t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
+			    BUILT_IN_MD, "__copysigntf3", NULL_TREE);
+  TREE_READONLY (t) = 1;
+  ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
+
+  ix86_init_tm_builtins ();
+  ix86_init_mmx_sse_builtins ();
+
+  if (TARGET_LP64)
+    ix86_init_builtins_va_builtins_abi ();
+
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
+}
+
+/* Return the ix86 builtin for CODE.  */
+
+static tree
+ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= IX86_BUILTIN_MAX)
+    return error_mark_node;
+
+  return ix86_builtins[code];
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x == const0_rtx)
+    x = CONST0_RTX (mode);
+  return x;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
+
+static rtx
+ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (GET_MODE (op1) == SImode && mode1 == TImode)
+    {
+      rtx x = gen_reg_rtx (V4SImode);
+      emit_insn (gen_sse2_loadd (x, op1));
+      op1 = gen_lowpart (TImode, x);
+    }
+
+  if (!insn_data[icode].operand[1].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!insn_data[icode].operand[2].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
+
+static rtx
+ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
+			       enum ix86_builtin_func_type m_type,
+			       enum rtx_code sub_code)
+{
+  rtx pat;
+  int i;
+  int nargs;
+  bool comparison_p = false;
+  bool tf_p = false;
+  bool last_arg_constant = false;
+  int num_memory = 0;
+  struct {
+    rtx op;
+    enum machine_mode mode;
+  } args[4];
+
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+  switch (m_type)
+    {
+    case MULTI_ARG_4_DF2_DI_I:
+    case MULTI_ARG_4_DF2_DI_I1:
+    case MULTI_ARG_4_SF2_SI_I:
+    case MULTI_ARG_4_SF2_SI_I1:
+      nargs = 4;
+      last_arg_constant = true;
+      break;
+
+    case MULTI_ARG_3_SF:
+    case MULTI_ARG_3_DF:
+    case MULTI_ARG_3_SF2:
+    case MULTI_ARG_3_DF2:
+    case MULTI_ARG_3_DI:
+    case MULTI_ARG_3_SI:
+    case MULTI_ARG_3_SI_DI:
+    case MULTI_ARG_3_HI:
+    case MULTI_ARG_3_HI_SI:
+    case MULTI_ARG_3_QI:
+    case MULTI_ARG_3_DI2:
+    case MULTI_ARG_3_SI2:
+    case MULTI_ARG_3_HI2:
+    case MULTI_ARG_3_QI2:
+      nargs = 3;
+      break;
+
+    case MULTI_ARG_2_SF:
+    case MULTI_ARG_2_DF:
+    case MULTI_ARG_2_DI:
+    case MULTI_ARG_2_SI:
+    case MULTI_ARG_2_HI:
+    case MULTI_ARG_2_QI:
+      nargs = 2;
+      break;
+
+    case MULTI_ARG_2_DI_IMM:
+    case MULTI_ARG_2_SI_IMM:
+    case MULTI_ARG_2_HI_IMM:
+    case MULTI_ARG_2_QI_IMM:
+      nargs = 2;
+      last_arg_constant = true;
+      break;
+
+    case MULTI_ARG_1_SF:
+    case MULTI_ARG_1_DF:
+    case MULTI_ARG_1_SF2:
+    case MULTI_ARG_1_DF2:
+    case MULTI_ARG_1_DI:
+    case MULTI_ARG_1_SI:
+    case MULTI_ARG_1_HI:
+    case MULTI_ARG_1_QI:
+    case MULTI_ARG_1_SI_DI:
+    case MULTI_ARG_1_HI_DI:
+    case MULTI_ARG_1_HI_SI:
+    case MULTI_ARG_1_QI_DI:
+    case MULTI_ARG_1_QI_SI:
+    case MULTI_ARG_1_QI_HI:
+      nargs = 1;
+      break;
+
+    case MULTI_ARG_2_DI_CMP:
+    case MULTI_ARG_2_SI_CMP:
+    case MULTI_ARG_2_HI_CMP:
+    case MULTI_ARG_2_QI_CMP:
+      nargs = 2;
+      comparison_p = true;
+      break;
+
+    case MULTI_ARG_2_SF_TF:
+    case MULTI_ARG_2_DF_TF:
+    case MULTI_ARG_2_DI_TF:
+    case MULTI_ARG_2_SI_TF:
+    case MULTI_ARG_2_HI_TF:
+    case MULTI_ARG_2_QI_TF:
+      nargs = 2;
+      tf_p = true;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert (nargs <= 4);
+
+  for (i = 0; i < nargs; i++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      rtx op = expand_normal (arg);
+      int adjust = (comparison_p) ? 1 : 0;
+      enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
+
+      if (last_arg_constant && i == nargs - 1)
+	{
+	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
+	    {
+	      enum insn_code new_icode = icode;
+	      switch (icode)
+		{
+		case CODE_FOR_xop_vpermil2v2df3:
+		case CODE_FOR_xop_vpermil2v4sf3:
+		case CODE_FOR_xop_vpermil2v4df3:
+		case CODE_FOR_xop_vpermil2v8sf3:
+		  error ("the last argument must be a 2-bit immediate");
+		  return gen_reg_rtx (tmode);
+		case CODE_FOR_xop_rotlv2di3:
+		  new_icode = CODE_FOR_rotlv2di3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv4si3:
+		  new_icode = CODE_FOR_rotlv4si3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv8hi3:
+		  new_icode = CODE_FOR_rotlv8hi3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv16qi3:
+		  new_icode = CODE_FOR_rotlv16qi3;
+		xop_rotl:
+		  if (CONST_INT_P (op))
+		    {
+		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+		      op = GEN_INT (INTVAL (op) & mask);
+		      gcc_checking_assert
+			(insn_data[icode].operand[i + 1].predicate (op, mode));
+		    }
+		  else
+		    {
+		      gcc_checking_assert
+			(nargs == 2
+			 && insn_data[new_icode].operand[0].mode == tmode
+			 && insn_data[new_icode].operand[1].mode == tmode
+			 && insn_data[new_icode].operand[2].mode == mode
+			 && insn_data[new_icode].operand[0].predicate
+			    == insn_data[icode].operand[0].predicate
+			 && insn_data[new_icode].operand[1].predicate
+			    == insn_data[icode].operand[1].predicate);
+		      icode = new_icode;
+		      goto non_constant;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+	}
+      else
+	{
+	non_constant:
+	  if (VECTOR_MODE_P (mode))
+	    op = safe_vector_operand (op, mode);
+
+	  /* If we aren't optimizing, only allow one memory operand to be
+	     generated.  */
+	  if (memory_operand (op, mode))
+	    num_memory++;
+
+	  gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
+
+	  if (optimize
+	      || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
+	      || num_memory > 1)
+	    op = force_reg (mode, op);
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (target, args[0].op);
+      break;
+
+    case 2:
+      if (tf_p)
+	pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+			       GEN_INT ((int)sub_code));
+      else if (! comparison_p)
+	pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+      else
+	{
+	  rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
+				       args[0].op,
+				       args[1].op);
+
+	  pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
+	}
+      break;
+
+    case 3:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+      break;
+
+    case 4:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
+   insns with vec_merge.  */
+
+static rtx
+ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
+				    rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op1, op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[icode].operand[1].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  op1 = op0;
+  if (!insn_data[icode].operand[2].predicate (op1, mode0))
+    op1 = copy_to_mode_reg (mode0, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
+
+static rtx
+ix86_expand_sse_compare (const struct builtin_description *d,
+			 tree exp, rtx target, bool swap)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2;
+  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+  enum rtx_code comparison = d->comparison;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  /* Swap operands if we have a comparison that isn't available in
+     hardware.  */
+  if (swap)
+    {
+      rtx tmp = gen_reg_rtx (mode1);
+      emit_move_insn (tmp, op1);
+      op1 = op0;
+      op0 = tmp;
+    }
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[d->icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[1].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[2].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
+
+static rtx
+ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
+		      rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+  enum rtx_code comparison = d->comparison;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  /* Swap operands if we have a comparison that isn't available in
+     hardware.  */
+  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+    {
+      rtx tmp = op1;
+      op1 = op0;
+      op0 = tmp;
+    }
+
+  target = gen_reg_rtx (SImode);
+  emit_move_insn (target, const0_rtx);
+  target = gen_rtx_SUBREG (QImode, target, 0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (d->icode) (op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+			  gen_rtx_fmt_ee (comparison, QImode,
+					  SET_DEST (pat),
+					  const0_rtx)));
+
+  return SUBREG_REG (target);
+}
+
+/* Subroutines of ix86_expand_args_builtin to take care of round insns.  */
+
+static rtx
+ix86_expand_sse_round (const struct builtin_description *d, tree exp,
+		       rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op1, op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+
+  if (optimize || target == 0
+      || GET_MODE (target) != tmode
+      || !insn_data[d->icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  op1 = GEN_INT (d->comparison);
+
+  pat = GEN_FCN (d->icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+static rtx
+ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
+				     tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2;
+  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+
+  if (optimize || target == 0
+      || GET_MODE (target) != tmode
+      || !insn_data[d->icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op0 = safe_vector_operand (op0, mode0);
+  op1 = safe_vector_operand (op1, mode1);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  op2 = GEN_INT (d->comparison);
+
+  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+		       rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+  enum rtx_code comparison = d->comparison;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  target = gen_reg_rtx (SImode);
+  emit_move_insn (target, const0_rtx);
+  target = gen_rtx_SUBREG (QImode, target, 0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (d->icode) (op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+			  gen_rtx_fmt_ee (comparison, QImode,
+					  SET_DEST (pat),
+					  const0_rtx)));
+
+  return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+			  tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  tree arg3 = CALL_EXPR_ARG (exp, 3);
+  tree arg4 = CALL_EXPR_ARG (exp, 4);
+  rtx scratch0, scratch1;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx op3 = expand_normal (arg3);
+  rtx op4 = expand_normal (arg4);
+  enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+  tmode0 = insn_data[d->icode].operand[0].mode;
+  tmode1 = insn_data[d->icode].operand[1].mode;
+  modev2 = insn_data[d->icode].operand[2].mode;
+  modei3 = insn_data[d->icode].operand[3].mode;
+  modev4 = insn_data[d->icode].operand[4].mode;
+  modei5 = insn_data[d->icode].operand[5].mode;
+  modeimm = insn_data[d->icode].operand[6].mode;
+
+  if (VECTOR_MODE_P (modev2))
+    op0 = safe_vector_operand (op0, modev2);
+  if (VECTOR_MODE_P (modev4))
+    op2 = safe_vector_operand (op2, modev4);
+
+  if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
+    op0 = copy_to_mode_reg (modev2, op0);
+  if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
+    op1 = copy_to_mode_reg (modei3, op1);
+  if ((optimize && !register_operand (op2, modev4))
+      || !insn_data[d->icode].operand[4].predicate (op2, modev4))
+    op2 = copy_to_mode_reg (modev4, op2);
+  if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
+    op3 = copy_to_mode_reg (modei5, op3);
+
+  if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
+    {
+      error ("the fifth argument must be an 8-bit immediate");
+      return const0_rtx;
+    }
+
+  if (d->code == IX86_BUILTIN_PCMPESTRI128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode0
+	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
+	target = gen_reg_rtx (tmode0);
+
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+    }
+  else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode1
+	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
+	target = gen_reg_rtx (tmode1);
+
+      scratch0 = gen_reg_rtx (tmode0);
+
+      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+    }
+  else
+    {
+      gcc_assert (d->flag);
+
+      scratch0 = gen_reg_rtx (tmode0);
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  if (d->flag)
+    {
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (target, const0_rtx);
+      target = gen_rtx_SUBREG (QImode, target, 0);
+
+      emit_insn
+	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+		      gen_rtx_fmt_ee (EQ, QImode,
+				      gen_rtx_REG ((enum machine_mode) d->flag,
+						   FLAGS_REG),
+				      const0_rtx)));
+      return SUBREG_REG (target);
+    }
+  else
+    return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+			  tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx scratch0, scratch1;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+  tmode0 = insn_data[d->icode].operand[0].mode;
+  tmode1 = insn_data[d->icode].operand[1].mode;
+  modev2 = insn_data[d->icode].operand[2].mode;
+  modev3 = insn_data[d->icode].operand[3].mode;
+  modeimm = insn_data[d->icode].operand[4].mode;
+
+  if (VECTOR_MODE_P (modev2))
+    op0 = safe_vector_operand (op0, modev2);
+  if (VECTOR_MODE_P (modev3))
+    op1 = safe_vector_operand (op1, modev3);
+
+  if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
+    op0 = copy_to_mode_reg (modev2, op0);
+  if ((optimize && !register_operand (op1, modev3))
+      || !insn_data[d->icode].operand[3].predicate (op1, modev3))
+    op1 = copy_to_mode_reg (modev3, op1);
+
+  if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
+    {
+      error ("the third argument must be an 8-bit immediate");
+      return const0_rtx;
+    }
+
+  if (d->code == IX86_BUILTIN_PCMPISTRI128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode0
+	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
+	target = gen_reg_rtx (tmode0);
+
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+    }
+  else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode1
+	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
+	target = gen_reg_rtx (tmode1);
+
+      scratch0 = gen_reg_rtx (tmode0);
+
+      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+    }
+  else
+    {
+      gcc_assert (d->flag);
+
+      scratch0 = gen_reg_rtx (tmode0);
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  if (d->flag)
+    {
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (target, const0_rtx);
+      target = gen_rtx_SUBREG (QImode, target, 0);
+
+      emit_insn
+	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+		      gen_rtx_fmt_ee (EQ, QImode,
+				      gen_rtx_REG ((enum machine_mode) d->flag,
+						   FLAGS_REG),
+				      const0_rtx)));
+      return SUBREG_REG (target);
+    }
+  else
+    return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of insns with
+   variable number of operands.  */
+
+static rtx
+ix86_expand_args_builtin (const struct builtin_description *d,
+			  tree exp, rtx target)
+{
+  rtx pat, real_target;
+  unsigned int i, nargs;
+  unsigned int nargs_constant = 0;
+  unsigned int mask_pos = 0;
+  int num_memory = 0;
+  struct
+    {
+      rtx op;
+      enum machine_mode mode;
+    } args[6];
+  bool last_arg_count = false;
+  enum insn_code icode = d->icode;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  enum machine_mode tmode = insn_p->operand[0].mode;
+  enum machine_mode rmode = VOIDmode;
+  bool swap = false;
+  enum rtx_code comparison = d->comparison;
+
+  switch ((enum ix86_builtin_func_type) d->flag)
+    {
+    case V2DF_FTYPE_V2DF_ROUND:
+    case V4DF_FTYPE_V4DF_ROUND:
+    case V4SF_FTYPE_V4SF_ROUND:
+    case V8SF_FTYPE_V8SF_ROUND:
+    case V4SI_FTYPE_V4SF_ROUND:
+    case V8SI_FTYPE_V8SF_ROUND:
+      return ix86_expand_sse_round (d, exp, target);
+    case V4SI_FTYPE_V2DF_V2DF_ROUND:
+    case V8SI_FTYPE_V4DF_V4DF_ROUND:
+    case V16SI_FTYPE_V8DF_V8DF_ROUND:
+      return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
+    case INT_FTYPE_V8SF_V8SF_PTEST:
+    case INT_FTYPE_V4DI_V4DI_PTEST:
+    case INT_FTYPE_V4DF_V4DF_PTEST:
+    case INT_FTYPE_V4SF_V4SF_PTEST:
+    case INT_FTYPE_V2DI_V2DI_PTEST:
+    case INT_FTYPE_V2DF_V2DF_PTEST:
+      return ix86_expand_sse_ptest (d, exp, target);
+    case FLOAT128_FTYPE_FLOAT128:
+    case FLOAT_FTYPE_FLOAT:
+    case INT_FTYPE_INT:
+    case UINT64_FTYPE_INT:
+    case UINT16_FTYPE_UINT16:
+    case INT64_FTYPE_INT64:
+    case INT64_FTYPE_V4SF:
+    case INT64_FTYPE_V2DF:
+    case INT_FTYPE_V16QI:
+    case INT_FTYPE_V8QI:
+    case INT_FTYPE_V8SF:
+    case INT_FTYPE_V4DF:
+    case INT_FTYPE_V4SF:
+    case INT_FTYPE_V2DF:
+    case INT_FTYPE_V32QI:
+    case V16QI_FTYPE_V16QI:
+    case V8SI_FTYPE_V8SF:
+    case V8SI_FTYPE_V4SI:
+    case V8HI_FTYPE_V8HI:
+    case V8HI_FTYPE_V16QI:
+    case V8QI_FTYPE_V8QI:
+    case V8SF_FTYPE_V8SF:
+    case V8SF_FTYPE_V8SI:
+    case V8SF_FTYPE_V4SF:
+    case V8SF_FTYPE_V8HI:
+    case V4SI_FTYPE_V4SI:
+    case V4SI_FTYPE_V16QI:
+    case V4SI_FTYPE_V4SF:
+    case V4SI_FTYPE_V8SI:
+    case V4SI_FTYPE_V8HI:
+    case V4SI_FTYPE_V4DF:
+    case V4SI_FTYPE_V2DF:
+    case V4HI_FTYPE_V4HI:
+    case V4DF_FTYPE_V4DF:
+    case V4DF_FTYPE_V4SI:
+    case V4DF_FTYPE_V4SF:
+    case V4DF_FTYPE_V2DF:
+    case V4SF_FTYPE_V4SF:
+    case V4SF_FTYPE_V4SI:
+    case V4SF_FTYPE_V8SF:
+    case V4SF_FTYPE_V4DF:
+    case V4SF_FTYPE_V8HI:
+    case V4SF_FTYPE_V2DF:
+    case V2DI_FTYPE_V2DI:
+    case V2DI_FTYPE_V16QI:
+    case V2DI_FTYPE_V8HI:
+    case V2DI_FTYPE_V4SI:
+    case V2DF_FTYPE_V2DF:
+    case V2DF_FTYPE_V4SI:
+    case V2DF_FTYPE_V4DF:
+    case V2DF_FTYPE_V4SF:
+    case V2DF_FTYPE_V2SI:
+    case V2SI_FTYPE_V2SI:
+    case V2SI_FTYPE_V4SF:
+    case V2SI_FTYPE_V2SF:
+    case V2SI_FTYPE_V2DF:
+    case V2SF_FTYPE_V2SF:
+    case V2SF_FTYPE_V2SI:
+    case V32QI_FTYPE_V32QI:
+    case V32QI_FTYPE_V16QI:
+    case V16HI_FTYPE_V16HI:
+    case V16HI_FTYPE_V8HI:
+    case V8SI_FTYPE_V8SI:
+    case V16HI_FTYPE_V16QI:
+    case V8SI_FTYPE_V16QI:
+    case V4DI_FTYPE_V16QI:
+    case V8SI_FTYPE_V8HI:
+    case V4DI_FTYPE_V8HI:
+    case V4DI_FTYPE_V4SI:
+    case V4DI_FTYPE_V2DI:
+    case HI_FTYPE_HI:
+    case UINT_FTYPE_V2DF:
+    case UINT_FTYPE_V4SF:
+    case UINT64_FTYPE_V2DF:
+    case UINT64_FTYPE_V4SF:
+    case V16QI_FTYPE_V8DI:
+    case V16HI_FTYPE_V16SI:
+    case V16SI_FTYPE_HI:
+    case V16SI_FTYPE_V16SI:
+    case V16SI_FTYPE_INT:
+    case V16SF_FTYPE_FLOAT:
+    case V16SF_FTYPE_V4SF:
+    case V16SF_FTYPE_V16SF:
+    case V8HI_FTYPE_V8DI:
+    case V8UHI_FTYPE_V8UHI:
+    case V8SI_FTYPE_V8DI:
+    case V8USI_FTYPE_V8USI:
+    case V8SF_FTYPE_V8DF:
+    case V8DI_FTYPE_QI:
+    case V8DI_FTYPE_INT64:
+    case V8DI_FTYPE_V4DI:
+    case V8DI_FTYPE_V8DI:
+    case V8DF_FTYPE_DOUBLE:
+    case V8DF_FTYPE_V4DF:
+    case V8DF_FTYPE_V8DF:
+    case V8DF_FTYPE_V8SI:
+      nargs = 1;
+      break;
+    case V4SF_FTYPE_V4SF_VEC_MERGE:
+    case V2DF_FTYPE_V2DF_VEC_MERGE:
+      return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
+    case FLOAT128_FTYPE_FLOAT128_FLOAT128:
+    case V16QI_FTYPE_V16QI_V16QI:
+    case V16QI_FTYPE_V8HI_V8HI:
+    case V16SI_FTYPE_V16SI_V16SI:
+    case V16SF_FTYPE_V16SF_V16SF:
+    case V16SF_FTYPE_V16SF_V16SI:
+    case V8QI_FTYPE_V8QI_V8QI:
+    case V8QI_FTYPE_V4HI_V4HI:
+    case V8HI_FTYPE_V8HI_V8HI:
+    case V8HI_FTYPE_V16QI_V16QI:
+    case V8HI_FTYPE_V4SI_V4SI:
+    case V8SF_FTYPE_V8SF_V8SF:
+    case V8SF_FTYPE_V8SF_V8SI:
+    case V8DI_FTYPE_V8DI_V8DI:
+    case V8DF_FTYPE_V8DF_V8DF:
+    case V8DF_FTYPE_V8DF_V8DI:
+    case V4SI_FTYPE_V4SI_V4SI:
+    case V4SI_FTYPE_V8HI_V8HI:
+    case V4SI_FTYPE_V4SF_V4SF:
+    case V4SI_FTYPE_V2DF_V2DF:
+    case V4HI_FTYPE_V4HI_V4HI:
+    case V4HI_FTYPE_V8QI_V8QI:
+    case V4HI_FTYPE_V2SI_V2SI:
+    case V4DF_FTYPE_V4DF_V4DF:
+    case V4DF_FTYPE_V4DF_V4DI:
+    case V4SF_FTYPE_V4SF_V4SF:
+    case V4SF_FTYPE_V4SF_V4SI:
+    case V4SF_FTYPE_V4SF_V2SI:
+    case V4SF_FTYPE_V4SF_V2DF:
+    case V4SF_FTYPE_V4SF_UINT:
+    case V4SF_FTYPE_V4SF_UINT64:
+    case V4SF_FTYPE_V4SF_DI:
+    case V4SF_FTYPE_V4SF_SI:
+    case V2DI_FTYPE_V2DI_V2DI:
+    case V2DI_FTYPE_V16QI_V16QI:
+    case V2DI_FTYPE_V4SI_V4SI:
+    case V2UDI_FTYPE_V4USI_V4USI:
+    case V2DI_FTYPE_V2DI_V16QI:
+    case V2DI_FTYPE_V2DF_V2DF:
+    case V2SI_FTYPE_V2SI_V2SI:
+    case V2SI_FTYPE_V4HI_V4HI:
+    case V2SI_FTYPE_V2SF_V2SF:
+    case V2DF_FTYPE_V2DF_V2DF:
+    case V2DF_FTYPE_V2DF_V4SF:
+    case V2DF_FTYPE_V2DF_V2DI:
+    case V2DF_FTYPE_V2DF_DI:
+    case V2DF_FTYPE_V2DF_SI:
+    case V2DF_FTYPE_V2DF_UINT:
+    case V2DF_FTYPE_V2DF_UINT64:
+    case V2SF_FTYPE_V2SF_V2SF:
+    case V1DI_FTYPE_V1DI_V1DI:
+    case V1DI_FTYPE_V8QI_V8QI:
+    case V1DI_FTYPE_V2SI_V2SI:
+    case V32QI_FTYPE_V16HI_V16HI:
+    case V16HI_FTYPE_V8SI_V8SI:
+    case V32QI_FTYPE_V32QI_V32QI:
+    case V16HI_FTYPE_V32QI_V32QI:
+    case V16HI_FTYPE_V16HI_V16HI:
+    case V8SI_FTYPE_V4DF_V4DF:
+    case V8SI_FTYPE_V8SI_V8SI:
+    case V8SI_FTYPE_V16HI_V16HI:
+    case V4DI_FTYPE_V4DI_V4DI:
+    case V4DI_FTYPE_V8SI_V8SI:
+    case V4UDI_FTYPE_V8USI_V8USI:
+    case QI_FTYPE_V8DI_V8DI:
+    case HI_FTYPE_V16SI_V16SI:
+      if (comparison == UNKNOWN)
+	return ix86_expand_binop_builtin (icode, exp, target);
+      nargs = 2;
+      break;
+    case V4SF_FTYPE_V4SF_V4SF_SWAP:
+    case V2DF_FTYPE_V2DF_V2DF_SWAP:
+      gcc_assert (comparison != UNKNOWN);
+      nargs = 2;
+      swap = true;
+      break;
+    case V16HI_FTYPE_V16HI_V8HI_COUNT:
+    case V16HI_FTYPE_V16HI_SI_COUNT:
+    case V8SI_FTYPE_V8SI_V4SI_COUNT:
+    case V8SI_FTYPE_V8SI_SI_COUNT:
+    case V4DI_FTYPE_V4DI_V2DI_COUNT:
+    case V4DI_FTYPE_V4DI_INT_COUNT:
+    case V8HI_FTYPE_V8HI_V8HI_COUNT:
+    case V8HI_FTYPE_V8HI_SI_COUNT:
+    case V4SI_FTYPE_V4SI_V4SI_COUNT:
+    case V4SI_FTYPE_V4SI_SI_COUNT:
+    case V4HI_FTYPE_V4HI_V4HI_COUNT:
+    case V4HI_FTYPE_V4HI_SI_COUNT:
+    case V2DI_FTYPE_V2DI_V2DI_COUNT:
+    case V2DI_FTYPE_V2DI_SI_COUNT:
+    case V2SI_FTYPE_V2SI_V2SI_COUNT:
+    case V2SI_FTYPE_V2SI_SI_COUNT:
+    case V1DI_FTYPE_V1DI_V1DI_COUNT:
+    case V1DI_FTYPE_V1DI_SI_COUNT:
+      nargs = 2;
+      last_arg_count = true;
+      break;
+    case UINT64_FTYPE_UINT64_UINT64:
+    case UINT_FTYPE_UINT_UINT:
+    case UINT_FTYPE_UINT_USHORT:
+    case UINT_FTYPE_UINT_UCHAR:
+    case UINT16_FTYPE_UINT16_INT:
+    case UINT8_FTYPE_UINT8_INT:
+    case HI_FTYPE_HI_HI:
+    case V16SI_FTYPE_V8DF_V8DF:
+      nargs = 2;
+      break;
+    case V2DI_FTYPE_V2DI_INT_CONVERT:
+      nargs = 2;
+      rmode = V1TImode;
+      nargs_constant = 1;
+      break;
+    case V4DI_FTYPE_V4DI_INT_CONVERT:
+      nargs = 2;
+      rmode = V2TImode;
+      nargs_constant = 1;
+      break;
+    case V8HI_FTYPE_V8HI_INT:
+    case V8HI_FTYPE_V8SF_INT:
+    case V16HI_FTYPE_V16SF_INT:
+    case V8HI_FTYPE_V4SF_INT:
+    case V8SF_FTYPE_V8SF_INT:
+    case V4SF_FTYPE_V16SF_INT:
+    case V16SF_FTYPE_V16SF_INT:
+    case V4SI_FTYPE_V4SI_INT:
+    case V4SI_FTYPE_V8SI_INT:
+    case V4HI_FTYPE_V4HI_INT:
+    case V4DF_FTYPE_V4DF_INT:
+    case V4DF_FTYPE_V8DF_INT:
+    case V4SF_FTYPE_V4SF_INT:
+    case V4SF_FTYPE_V8SF_INT:
+    case V2DI_FTYPE_V2DI_INT:
+    case V2DF_FTYPE_V2DF_INT:
+    case V2DF_FTYPE_V4DF_INT:
+    case V16HI_FTYPE_V16HI_INT:
+    case V8SI_FTYPE_V8SI_INT:
+    case V16SI_FTYPE_V16SI_INT:
+    case V4SI_FTYPE_V16SI_INT:
+    case V4DI_FTYPE_V4DI_INT:
+    case V2DI_FTYPE_V4DI_INT:
+    case V4DI_FTYPE_V8DI_INT:
+    case HI_FTYPE_HI_INT:
+      nargs = 2;
+      nargs_constant = 1;
+      break;
+    case V16QI_FTYPE_V16QI_V16QI_V16QI:
+    case V8SF_FTYPE_V8SF_V8SF_V8SF:
+    case V4DF_FTYPE_V4DF_V4DF_V4DF:
+    case V4SF_FTYPE_V4SF_V4SF_V4SF:
+    case V2DF_FTYPE_V2DF_V2DF_V2DF:
+    case V32QI_FTYPE_V32QI_V32QI_V32QI:
+    case HI_FTYPE_V16SI_V16SI_HI:
+    case QI_FTYPE_V8DI_V8DI_QI:
+    case V16HI_FTYPE_V16SI_V16HI_HI:
+    case V16QI_FTYPE_V16SI_V16QI_HI:
+    case V16QI_FTYPE_V8DI_V16QI_QI:
+    case V16SF_FTYPE_V16SF_V16SF_HI:
+    case V16SF_FTYPE_V16SF_V16SF_V16SF:
+    case V16SF_FTYPE_V16SF_V16SI_V16SF:
+    case V16SF_FTYPE_V16SI_V16SF_HI:
+    case V16SF_FTYPE_V16SI_V16SF_V16SF:
+    case V16SF_FTYPE_V4SF_V16SF_HI:
+    case V16SI_FTYPE_SI_V16SI_HI:
+    case V16SI_FTYPE_V16HI_V16SI_HI:
+    case V16SI_FTYPE_V16QI_V16SI_HI:
+    case V16SI_FTYPE_V16SF_V16SI_HI:
+    case V16SI_FTYPE_V16SI_V16SI_HI:
+    case V16SI_FTYPE_V16SI_V16SI_V16SI:
+    case V16SI_FTYPE_V4SI_V16SI_HI:
+    case V2DI_FTYPE_V2DI_V2DI_V2DI:
+    case V4DI_FTYPE_V4DI_V4DI_V4DI:
+    case V8DF_FTYPE_V2DF_V8DF_QI:
+    case V8DF_FTYPE_V4DF_V8DF_QI:
+    case V8DF_FTYPE_V8DF_V8DF_QI:
+    case V8DF_FTYPE_V8DF_V8DF_V8DF:
+    case V8DF_FTYPE_V8DF_V8DI_V8DF:
+    case V8DF_FTYPE_V8DI_V8DF_V8DF:
+    case V8DF_FTYPE_V8SF_V8DF_QI:
+    case V8DF_FTYPE_V8SI_V8DF_QI:
+    case V8DI_FTYPE_DI_V8DI_QI:
+    case V8DI_FTYPE_V16QI_V8DI_QI:
+    case V8DI_FTYPE_V2DI_V8DI_QI:
+    case V8DI_FTYPE_V4DI_V8DI_QI:
+    case V8DI_FTYPE_V8DI_V8DI_QI:
+    case V8DI_FTYPE_V8DI_V8DI_V8DI:
+    case V8DI_FTYPE_V8HI_V8DI_QI:
+    case V8DI_FTYPE_V8SI_V8DI_QI:
+    case V8HI_FTYPE_V8DI_V8HI_QI:
+    case V8SF_FTYPE_V8DF_V8SF_QI:
+    case V8SI_FTYPE_V8DF_V8SI_QI:
+    case V8SI_FTYPE_V8DI_V8SI_QI:
+    case V4SI_FTYPE_V4SI_V4SI_V4SI:
+      nargs = 3;
+      break;
+    case V32QI_FTYPE_V32QI_V32QI_INT:
+    case V16HI_FTYPE_V16HI_V16HI_INT:
+    case V16QI_FTYPE_V16QI_V16QI_INT:
+    case V4DI_FTYPE_V4DI_V4DI_INT:
+    case V8HI_FTYPE_V8HI_V8HI_INT:
+    case V8SI_FTYPE_V8SI_V8SI_INT:
+    case V8SI_FTYPE_V8SI_V4SI_INT:
+    case V8SF_FTYPE_V8SF_V8SF_INT:
+    case V8SF_FTYPE_V8SF_V4SF_INT:
+    case V4SI_FTYPE_V4SI_V4SI_INT:
+    case V4DF_FTYPE_V4DF_V4DF_INT:
+    case V16SF_FTYPE_V16SF_V16SF_INT:
+    case V16SF_FTYPE_V16SF_V4SF_INT:
+    case V16SI_FTYPE_V16SI_V4SI_INT:
+    case V4DF_FTYPE_V4DF_V2DF_INT:
+    case V4SF_FTYPE_V4SF_V4SF_INT:
+    case V2DI_FTYPE_V2DI_V2DI_INT:
+    case V4DI_FTYPE_V4DI_V2DI_INT:
+    case V2DF_FTYPE_V2DF_V2DF_INT:
+    case QI_FTYPE_V8DI_V8DI_INT:
+    case QI_FTYPE_V8DF_V8DF_INT:
+    case QI_FTYPE_V2DF_V2DF_INT:
+    case QI_FTYPE_V4SF_V4SF_INT:
+    case HI_FTYPE_V16SI_V16SI_INT:
+    case HI_FTYPE_V16SF_V16SF_INT:
+      nargs = 3;
+      nargs_constant = 1;
+      break;
+    case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
+      nargs = 3;
+      rmode = V4DImode;
+      nargs_constant = 1;
+      break;
+    case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
+      nargs = 3;
+      rmode = V2DImode;
+      nargs_constant = 1;
+      break;
+    case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
+      nargs = 3;
+      rmode = DImode;
+      nargs_constant = 1;
+      break;
+    case V2DI_FTYPE_V2DI_UINT_UINT:
+      nargs = 3;
+      nargs_constant = 2;
+      break;
+    case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
+    case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
+    case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
+    case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
+    case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
+    case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
+    case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
+    case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
+    case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
+    case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
+    case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
+    case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
+    case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
+    case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
+    case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
+    case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
+      nargs = 4;
+      break;
+    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+    case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+    case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+    case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
+      nargs = 4;
+      nargs_constant = 1;
+      break;
+    case QI_FTYPE_V2DF_V2DF_INT_QI:
+    case QI_FTYPE_V4SF_V4SF_INT_QI:
+      nargs = 4;
+      mask_pos = 1;
+      nargs_constant = 1;
+      break;
+    case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
+      nargs = 4;
+      nargs_constant = 2;
+      break;
+    case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
+    case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
+      nargs = 4;
+      break;
+    case QI_FTYPE_V8DI_V8DI_INT_QI:
+    case HI_FTYPE_V16SI_V16SI_INT_HI:
+    case QI_FTYPE_V8DF_V8DF_INT_QI:
+    case HI_FTYPE_V16SF_V16SF_INT_HI:
+      mask_pos = 1;
+      nargs = 4;
+      nargs_constant = 1;
+      break;
+    case V8DF_FTYPE_V8DF_INT_V8DF_QI:
+    case V16SF_FTYPE_V16SF_INT_V16SF_HI:
+    case V16HI_FTYPE_V16SF_INT_V16HI_HI:
+    case V16SI_FTYPE_V16SI_INT_V16SI_HI:
+    case V4SI_FTYPE_V16SI_INT_V4SI_QI:
+    case V4DI_FTYPE_V8DI_INT_V4DI_QI:
+    case V4DF_FTYPE_V8DF_INT_V4DF_QI:
+    case V4SF_FTYPE_V16SF_INT_V4SF_QI:
+    case V8DI_FTYPE_V8DI_INT_V8DI_QI:
+      nargs = 4;
+      mask_pos = 2;
+      nargs_constant = 1;
+      break;
+    case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
+    case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
+    case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
+    case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
+    case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
+    case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
+    case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
+    case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
+    case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
+    case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
+      nargs = 5;
+      mask_pos = 2;
+      nargs_constant = 1;
+      break;
+    case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
+    case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
+    case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
+    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
+    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
+      nargs = 5;
+      mask_pos = 1;
+      nargs_constant = 1;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (nargs <= ARRAY_SIZE (args));
+
+  if (comparison != UNKNOWN)
+    {
+      gcc_assert (nargs == 2);
+      return ix86_expand_sse_compare (d, exp, target, swap);
+    }
+
+  if (rmode == VOIDmode || rmode == tmode)
+    {
+      if (optimize
+	  || target == 0
+	  || GET_MODE (target) != tmode
+	  || !insn_p->operand[0].predicate (target, tmode))
+	target = gen_reg_rtx (tmode);
+      real_target = target;
+    }
+  else
+    {
+      real_target = gen_reg_rtx (tmode);
+      target = simplify_gen_subreg (rmode, real_target, tmode, 0);
+    }
+
+  for (i = 0; i < nargs; i++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      rtx op = expand_normal (arg);
+      enum machine_mode mode = insn_p->operand[i + 1].mode;
+      bool match = insn_p->operand[i + 1].predicate (op, mode);
+
+      if (last_arg_count && (i + 1) == nargs)
+	{
+	  /* SIMD shift insns take either an 8-bit immediate or
+	     register as count.  But builtin functions take int as
+	     count.  If count doesn't match, we put it in register.  */
+	  if (!match)
+	    {
+	      op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
+	      if (!insn_p->operand[i + 1].predicate (op, mode))
+		op = copy_to_reg (op);
+	    }
+	}
+      else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
+	       (!mask_pos && (nargs - i) <= nargs_constant))
+	{
+	  if (!match)
+	    switch (icode)
+	      {
+	      case CODE_FOR_avx2_inserti128:
+	      case CODE_FOR_avx2_extracti128:
+		error ("the last argument must be an 1-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_avx512f_cmpv8di3_mask:
+	      case CODE_FOR_avx512f_cmpv16si3_mask:
+	      case CODE_FOR_avx512f_ucmpv8di3_mask:
+	      case CODE_FOR_avx512f_ucmpv16si3_mask:
+		error ("the last argument must be a 3-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_sse4_1_roundsd:
+	      case CODE_FOR_sse4_1_roundss:
+
+	      case CODE_FOR_sse4_1_roundpd:
+	      case CODE_FOR_sse4_1_roundps:
+	      case CODE_FOR_avx_roundpd256:
+	      case CODE_FOR_avx_roundps256:
+
+	      case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
+	      case CODE_FOR_sse4_1_roundps_sfix:
+	      case CODE_FOR_avx_roundpd_vec_pack_sfix256:
+	      case CODE_FOR_avx_roundps_sfix256:
+
+	      case CODE_FOR_sse4_1_blendps:
+	      case CODE_FOR_avx_blendpd256:
+	      case CODE_FOR_avx_vpermilv4df:
+	      case CODE_FOR_avx512f_getmantv8df_mask:
+	      case CODE_FOR_avx512f_getmantv16sf_mask:
+		error ("the last argument must be a 4-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_sha1rnds4:
+	      case CODE_FOR_sse4_1_blendpd:
+	      case CODE_FOR_avx_vpermilv2df:
+	      case CODE_FOR_xop_vpermil2v2df3:
+	      case CODE_FOR_xop_vpermil2v4sf3:
+	      case CODE_FOR_xop_vpermil2v4df3:
+	      case CODE_FOR_xop_vpermil2v8sf3:
+	      case CODE_FOR_avx512f_vinsertf32x4_mask:
+	      case CODE_FOR_avx512f_vinserti32x4_mask:
+	      case CODE_FOR_avx512f_vextractf32x4_mask:
+	      case CODE_FOR_avx512f_vextracti32x4_mask:
+		error ("the last argument must be a 2-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_avx_vextractf128v4df:
+	      case CODE_FOR_avx_vextractf128v8sf:
+	      case CODE_FOR_avx_vextractf128v8si:
+	      case CODE_FOR_avx_vinsertf128v4df:
+	      case CODE_FOR_avx_vinsertf128v8sf:
+	      case CODE_FOR_avx_vinsertf128v8si:
+	      case CODE_FOR_avx512f_vinsertf64x4_mask:
+	      case CODE_FOR_avx512f_vinserti64x4_mask:
+	      case CODE_FOR_avx512f_vextractf64x4_mask:
+	      case CODE_FOR_avx512f_vextracti64x4_mask:
+		error ("the last argument must be a 1-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_avx_vmcmpv2df3:
+	      case CODE_FOR_avx_vmcmpv4sf3:
+	      case CODE_FOR_avx_cmpv2df3:
+	      case CODE_FOR_avx_cmpv4sf3:
+	      case CODE_FOR_avx_cmpv4df3:
+	      case CODE_FOR_avx_cmpv8sf3:
+	      case CODE_FOR_avx512f_cmpv8df3_mask:
+	      case CODE_FOR_avx512f_cmpv16sf3_mask:
+	      case CODE_FOR_avx512f_vmcmpv2df3_mask:
+	      case CODE_FOR_avx512f_vmcmpv4sf3_mask:
+		error ("the last argument must be a 5-bit immediate");
+		return const0_rtx;
+
+	      default:
+		switch (nargs_constant)
+		  {
+		  case 2:
+		    if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
+			(!mask_pos && (nargs - i) == nargs_constant))
+		      {
+			error ("the next to last argument must be an 8-bit immediate");
+			break;
+		      }
+		  case 1:
+		    error ("the last argument must be an 8-bit immediate");
+		    break;
+		  default:
+		    gcc_unreachable ();
+		  }
+		return const0_rtx;
+	      }
+	}
+      else
+	{
+	  if (VECTOR_MODE_P (mode))
+	    op = safe_vector_operand (op, mode);
+
+	  /* If we aren't optimizing, only allow one memory operand to
+	     be generated.  */
+	  if (memory_operand (op, mode))
+	    num_memory++;
+
+	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+	    {
+	      if (optimize || !match || num_memory > 1)
+		op = copy_to_mode_reg (mode, op);
+	    }
+	  else
+	    {
+	      op = copy_to_reg (op);
+	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+	    }
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (real_target, args[0].op);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+			     args[2].op);
+      break;
+    case 4:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+			     args[2].op, args[3].op);
+      break;
+    case 5:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+			     args[2].op, args[3].op, args[4].op);
+    case 6:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+			     args[2].op, args[3].op, args[4].op,
+			     args[5].op);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Transform pattern of following layout:
+     (parallel [
+       set (A B)
+       (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
+     ])
+   into:
+     (set (A B))
+
+   Or:
+     (parallel [ A B
+     ...
+     (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
+     ...
+     ])
+   into:
+     (parallel [ A B ... ])  */
+
+static rtx
+ix86_erase_embedded_rounding (rtx pat)
+{
+  if (GET_CODE (pat) == INSN)
+    pat = PATTERN (pat);
+
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+
+  if (XVECLEN (pat, 0) == 2)
+    {
+      rtx p0 = XVECEXP (pat, 0, 0);
+      rtx p1 = XVECEXP (pat, 0, 1);
+
+      gcc_assert (GET_CODE (p0) == SET
+		  && GET_CODE (p1) == UNSPEC
+		  && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
+
+      return p0;
+    }
+  else
+    {
+      rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
+      int i = 0;
+      int j = 0;
+
+      for (; i < XVECLEN (pat, 0); ++i)
+	{
+	  rtx elem = XVECEXP (pat, 0, i);
+	  if (GET_CODE (elem) != UNSPEC
+	      || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
+	    res [j++] = elem;
+	}
+
+      /*  No more than 1 occurence was removed.  */
+      gcc_assert (j >= XVECLEN (pat, 0) - 1);
+
+      return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
+    }
+}
+
+/* Subroutine of ix86_expand_round_builtin to take care of comi insns
+   with rounding.  */
+static rtx
+ix86_expand_sse_comi_round (const struct builtin_description *d,
+			    tree exp, rtx target)
+{
+  rtx pat, set_dst;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  tree arg3 = CALL_EXPR_ARG (exp, 3);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx op3 = expand_normal (arg3);
+  enum insn_code icode = d->icode;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  enum machine_mode mode0 = insn_p->operand[0].mode;
+  enum machine_mode mode1 = insn_p->operand[1].mode;
+  enum rtx_code comparison = UNEQ;
+  bool need_ucomi = false;
+
+  /* See avxintrin.h for values.  */
+  enum rtx_code comi_comparisons[32] =
+    {
+      UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
+      UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
+      UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
+    };
+  bool need_ucomi_values[32] =
+    {
+      true,  false, false, true,  true,  false, false, true,
+      true,  false, false, true,  true,  false, false, true,
+      false, true,  true,  false, false, true,  true,  false,
+      false, true,  true,  false, false, true,  true,  false
+    };
+
+  if (!CONST_INT_P (op2))
+    {
+      error ("the third argument must be comparison constant");
+      return const0_rtx;
+    }
+  if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
+    {
+      error ("incorect comparison mode");
+      return const0_rtx;
+    }
+
+  if (!insn_p->operand[2].predicate (op3, SImode))
+    {
+      error ("incorrect rounding operand");
+      return const0_rtx;
+    }
+
+  comparison = comi_comparisons[INTVAL (op2)];
+  need_ucomi = need_ucomi_values[INTVAL (op2)];
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  target = gen_reg_rtx (SImode);
+  emit_move_insn (target, const0_rtx);
+  target = gen_rtx_SUBREG (QImode, target, 0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_p->operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_p->operand[1].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  if (need_ucomi)
+    icode = icode == CODE_FOR_sse_comi_round
+		     ? CODE_FOR_sse_ucomi_round
+		     : CODE_FOR_sse2_ucomi_round;
+
+  pat = GEN_FCN (icode) (op0, op1, op3);
+  if (! pat)
+    return 0;
+
+  /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point.  */
+  if (INTVAL (op3) == NO_ROUND)
+    {
+      pat = ix86_erase_embedded_rounding (pat);
+      if (! pat)
+	return 0;
+
+      set_dst = SET_DEST (pat);
+    }
+  else
+    {
+      gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
+      set_dst = SET_DEST (XVECEXP (pat, 0, 0));
+    }
+
+  emit_insn (pat);
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+			  gen_rtx_fmt_ee (comparison, QImode,
+					  set_dst,
+					  const0_rtx)));
+
+  return SUBREG_REG (target);
+}
+
+static rtx
+ix86_expand_round_builtin (const struct builtin_description *d,
+			   tree exp, rtx target)
+{
+  rtx pat;
+  unsigned int i, nargs;
+  struct
+    {
+      rtx op;
+      enum machine_mode mode;
+    } args[6];
+  enum insn_code icode = d->icode;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  enum machine_mode tmode = insn_p->operand[0].mode;
+  unsigned int nargs_constant = 0;
+  unsigned int redundant_embed_rnd = 0;
+
+  switch ((enum ix86_builtin_func_type) d->flag)
+    {
+    case UINT64_FTYPE_V2DF_INT:
+    case UINT64_FTYPE_V4SF_INT:
+    case UINT_FTYPE_V2DF_INT:
+    case UINT_FTYPE_V4SF_INT:
+    case INT64_FTYPE_V2DF_INT:
+    case INT64_FTYPE_V4SF_INT:
+    case INT_FTYPE_V2DF_INT:
+    case INT_FTYPE_V4SF_INT:
+      nargs = 2;
+      break;
+    case V4SF_FTYPE_V4SF_UINT_INT:
+    case V4SF_FTYPE_V4SF_UINT64_INT:
+    case V2DF_FTYPE_V2DF_UINT64_INT:
+    case V4SF_FTYPE_V4SF_INT_INT:
+    case V4SF_FTYPE_V4SF_INT64_INT:
+    case V2DF_FTYPE_V2DF_INT64_INT:
+    case V4SF_FTYPE_V4SF_V4SF_INT:
+    case V2DF_FTYPE_V2DF_V2DF_INT:
+    case V4SF_FTYPE_V4SF_V2DF_INT:
+    case V2DF_FTYPE_V2DF_V4SF_INT:
+      nargs = 3;
+      break;
+    case V8SF_FTYPE_V8DF_V8SF_QI_INT:
+    case V8DF_FTYPE_V8DF_V8DF_QI_INT:
+    case V8SI_FTYPE_V8DF_V8SI_QI_INT:
+    case V16SF_FTYPE_V16SF_V16SF_HI_INT:
+    case V16SF_FTYPE_V16SI_V16SF_HI_INT:
+    case V16SI_FTYPE_V16SF_V16SI_HI_INT:
+    case V8DF_FTYPE_V8SF_V8DF_QI_INT:
+    case V16SF_FTYPE_V16HI_V16SF_HI_INT:
+    case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
+    case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
+      nargs = 4;
+      break;
+    case V4SF_FTYPE_V4SF_V4SF_INT_INT:
+    case V2DF_FTYPE_V2DF_V2DF_INT_INT:
+      nargs_constant = 2;
+      nargs = 4;
+      break;
+    case INT_FTYPE_V4SF_V4SF_INT_INT:
+    case INT_FTYPE_V2DF_V2DF_INT_INT:
+      return ix86_expand_sse_comi_round (d, exp, target);
+    case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
+    case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
+    case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
+    case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
+    case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
+    case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
+      nargs = 5;
+      break;
+    case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
+    case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
+      nargs_constant = 4;
+      nargs = 5;
+      break;
+    case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
+    case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
+    case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
+    case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
+      nargs_constant = 3;
+      nargs = 5;
+      break;
+    case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
+    case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
+      nargs = 6;
+      nargs_constant = 4;
+      break;
+    case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
+    case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
+    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
+    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
+      nargs = 6;
+      nargs_constant = 3;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  gcc_assert (nargs <= ARRAY_SIZE (args));
+
+  if (optimize
+      || target == 0
+      || GET_MODE (target) != tmode
+      || !insn_p->operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  for (i = 0; i < nargs; i++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      rtx op = expand_normal (arg);
+      enum machine_mode mode = insn_p->operand[i + 1].mode;
+      bool match = insn_p->operand[i + 1].predicate (op, mode);
+
+      if (i == nargs - nargs_constant)
+	{
+	  if (!match)
+	    {
+	      switch (icode)
+		{
+		case CODE_FOR_avx512f_getmantv8df_mask_round:
+		case CODE_FOR_avx512f_getmantv16sf_mask_round:
+		case CODE_FOR_avx512f_getmantv2df_round:
+		case CODE_FOR_avx512f_getmantv4sf_round:
+		  error ("the immediate argument must be a 4-bit immediate");
+		  return const0_rtx;
+		case CODE_FOR_avx512f_cmpv8df3_mask_round:
+		case CODE_FOR_avx512f_cmpv16sf3_mask_round:
+		case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
+		case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
+		  error ("the immediate argument must be a 5-bit immediate");
+		  return const0_rtx;
+		default:
+		  error ("the immediate argument must be an 8-bit immediate");
+		  return const0_rtx;
+		}
+	    }
+	}
+      else if (i == nargs-1)
+	{
+	  if (!insn_p->operand[nargs].predicate (op, SImode))
+	    {
+	      error ("incorrect rounding operand");
+	      return const0_rtx;
+	    }
+
+	  /* If there is no rounding use normal version of the pattern.  */
+	  if (INTVAL (op) == NO_ROUND)
+	    redundant_embed_rnd = 1;
+	}
+      else
+	{
+	  if (VECTOR_MODE_P (mode))
+	    op = safe_vector_operand (op, mode);
+
+	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+	    {
+	      if (optimize || !match)
+		op = copy_to_mode_reg (mode, op);
+	    }
+	  else
+	    {
+	      op = copy_to_reg (op);
+	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+	    }
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (target, args[0].op);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+			     args[2].op);
+      break;
+    case 4:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+			     args[2].op, args[3].op);
+      break;
+    case 5:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+			     args[2].op, args[3].op, args[4].op);
+    case 6:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+			     args[2].op, args[3].op, args[4].op,
+			     args[5].op);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (!pat)
+    return 0;
+
+  if (redundant_embed_rnd)
+    pat = ix86_erase_embedded_rounding (pat);
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of special insns
+   with variable number of operands.  */
+
+static rtx
+ix86_expand_special_args_builtin (const struct builtin_description *d,
+				  tree exp, rtx target)
+{
+  tree arg;
+  rtx pat, op;
+  unsigned int i, nargs, arg_adjust, memory;
+  bool aligned_mem = false;
+  struct
+    {
+      rtx op;
+      enum machine_mode mode;
+    } args[3];
+  enum insn_code icode = d->icode;
+  bool last_arg_constant = false;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  enum machine_mode tmode = insn_p->operand[0].mode;
+  enum { load, store } klass;
+
+  switch ((enum ix86_builtin_func_type) d->flag)
+    {
+    case VOID_FTYPE_VOID:
+      emit_insn (GEN_FCN (icode) (target));
+      return 0;
+    case VOID_FTYPE_UINT64:
+    case VOID_FTYPE_UNSIGNED:
+      nargs = 0;
+      klass = store;
+      memory = 0;
+      break;
+
+    case INT_FTYPE_VOID:
+    case UINT64_FTYPE_VOID:
+    case UNSIGNED_FTYPE_VOID:
+      nargs = 0;
+      klass = load;
+      memory = 0;
+      break;
+    case UINT64_FTYPE_PUNSIGNED:
+    case V2DI_FTYPE_PV2DI:
+    case V4DI_FTYPE_PV4DI:
+    case V32QI_FTYPE_PCCHAR:
+    case V16QI_FTYPE_PCCHAR:
+    case V8SF_FTYPE_PCV4SF:
+    case V8SF_FTYPE_PCFLOAT:
+    case V4SF_FTYPE_PCFLOAT:
+    case V4DF_FTYPE_PCV2DF:
+    case V4DF_FTYPE_PCDOUBLE:
+    case V2DF_FTYPE_PCDOUBLE:
+    case VOID_FTYPE_PVOID:
+    case V16SI_FTYPE_PV4SI:
+    case V16SF_FTYPE_PV4SF:
+    case V8DI_FTYPE_PV4DI:
+    case V8DI_FTYPE_PV8DI:
+    case V8DF_FTYPE_PV4DF:
+      nargs = 1;
+      klass = load;
+      memory = 0;
+      switch (icode)
+	{
+	case CODE_FOR_sse4_1_movntdqa:
+	case CODE_FOR_avx2_movntdqa:
+	case CODE_FOR_avx512f_movntdqa:
+	  aligned_mem = true;
+	  break;
+	default:
+	  break;
+	}
+      break;
+    case VOID_FTYPE_PV2SF_V4SF:
+    case VOID_FTYPE_PV8DI_V8DI:
+    case VOID_FTYPE_PV4DI_V4DI:
+    case VOID_FTYPE_PV2DI_V2DI:
+    case VOID_FTYPE_PCHAR_V32QI:
+    case VOID_FTYPE_PCHAR_V16QI:
+    case VOID_FTYPE_PFLOAT_V16SF:
+    case VOID_FTYPE_PFLOAT_V8SF:
+    case VOID_FTYPE_PFLOAT_V4SF:
+    case VOID_FTYPE_PDOUBLE_V8DF:
+    case VOID_FTYPE_PDOUBLE_V4DF:
+    case VOID_FTYPE_PDOUBLE_V2DF:
+    case VOID_FTYPE_PLONGLONG_LONGLONG:
+    case VOID_FTYPE_PULONGLONG_ULONGLONG:
+    case VOID_FTYPE_PINT_INT:
+      nargs = 1;
+      klass = store;
+      /* Reserve memory operand for target.  */
+      memory = ARRAY_SIZE (args);
+      switch (icode)
+	{
+	/* These builtins and instructions require the memory
+	   to be properly aligned.  */
+	case CODE_FOR_avx_movntv4di:
+	case CODE_FOR_sse2_movntv2di:
+	case CODE_FOR_avx_movntv8sf:
+	case CODE_FOR_sse_movntv4sf:
+	case CODE_FOR_sse4a_vmmovntv4sf:
+	case CODE_FOR_avx_movntv4df:
+	case CODE_FOR_sse2_movntv2df:
+	case CODE_FOR_sse4a_vmmovntv2df:
+	case CODE_FOR_sse2_movntidi:
+	case CODE_FOR_sse_movntq:
+	case CODE_FOR_sse2_movntisi:
+	case CODE_FOR_avx512f_movntv16sf:
+	case CODE_FOR_avx512f_movntv8df:
+	case CODE_FOR_avx512f_movntv8di:
+	  aligned_mem = true;
+	  break;
+	default:
+	  break;
+	}
+      break;
+    case V4SF_FTYPE_V4SF_PCV2SF:
+    case V2DF_FTYPE_V2DF_PCDOUBLE:
+      nargs = 2;
+      klass = load;
+      memory = 1;
+      break;
+    case V8SF_FTYPE_PCV8SF_V8SI:
+    case V4DF_FTYPE_PCV4DF_V4DI:
+    case V4SF_FTYPE_PCV4SF_V4SI:
+    case V2DF_FTYPE_PCV2DF_V2DI:
+    case V8SI_FTYPE_PCV8SI_V8SI:
+    case V4DI_FTYPE_PCV4DI_V4DI:
+    case V4SI_FTYPE_PCV4SI_V4SI:
+    case V2DI_FTYPE_PCV2DI_V2DI:
+      nargs = 2;
+      klass = load;
+      memory = 0;
+      break;
+    case VOID_FTYPE_PV8DF_V8DF_QI:
+    case VOID_FTYPE_PV16SF_V16SF_HI:
+    case VOID_FTYPE_PV8DI_V8DI_QI:
+    case VOID_FTYPE_PV16SI_V16SI_HI:
+      switch (icode)
+	{
+	/* These builtins and instructions require the memory
+	   to be properly aligned.  */
+	case CODE_FOR_avx512f_storev16sf_mask:
+	case CODE_FOR_avx512f_storev16si_mask:
+	case CODE_FOR_avx512f_storev8df_mask:
+	case CODE_FOR_avx512f_storev8di_mask:
+	  aligned_mem = true;
+	  break;
+	default:
+	  break;
+	}
+      /* FALLTHRU */
+    case VOID_FTYPE_PV8SF_V8SI_V8SF:
+    case VOID_FTYPE_PV4DF_V4DI_V4DF:
+    case VOID_FTYPE_PV4SF_V4SI_V4SF:
+    case VOID_FTYPE_PV2DF_V2DI_V2DF:
+    case VOID_FTYPE_PV8SI_V8SI_V8SI:
+    case VOID_FTYPE_PV4DI_V4DI_V4DI:
+    case VOID_FTYPE_PV4SI_V4SI_V4SI:
+    case VOID_FTYPE_PV2DI_V2DI_V2DI:
+    case VOID_FTYPE_PDOUBLE_V2DF_QI:
+    case VOID_FTYPE_PFLOAT_V4SF_QI:
+    case VOID_FTYPE_PV8SI_V8DI_QI:
+    case VOID_FTYPE_PV8HI_V8DI_QI:
+    case VOID_FTYPE_PV16HI_V16SI_HI:
+    case VOID_FTYPE_PV16QI_V8DI_QI:
+    case VOID_FTYPE_PV16QI_V16SI_HI:
+      nargs = 2;
+      klass = store;
+      /* Reserve memory operand for target.  */
+      memory = ARRAY_SIZE (args);
+      break;
+    case V16SF_FTYPE_PCV16SF_V16SF_HI:
+    case V16SI_FTYPE_PCV16SI_V16SI_HI:
+    case V8DF_FTYPE_PCV8DF_V8DF_QI:
+    case V8DI_FTYPE_PCV8DI_V8DI_QI:
+    case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
+    case V4SF_FTYPE_PCFLOAT_V4SF_QI:
+      nargs = 3;
+      klass = load;
+      memory = 0;
+      switch (icode)
+	{
+	/* These builtins and instructions require the memory
+	   to be properly aligned.  */
+	case CODE_FOR_avx512f_loadv16sf_mask:
+	case CODE_FOR_avx512f_loadv16si_mask:
+	case CODE_FOR_avx512f_loadv8df_mask:
+	case CODE_FOR_avx512f_loadv8di_mask:
+	  aligned_mem = true;
+	  break;
+	default:
+	  break;
+	}
+      break;
+    case VOID_FTYPE_UINT_UINT_UINT:
+    case VOID_FTYPE_UINT64_UINT_UINT:
+    case UCHAR_FTYPE_UINT_UINT_UINT:
+    case UCHAR_FTYPE_UINT64_UINT_UINT:
+      nargs = 3;
+      klass = load;
+      memory = ARRAY_SIZE (args);
+      last_arg_constant = true;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (nargs <= ARRAY_SIZE (args));
+
+  if (klass == store)
+    {
+      arg = CALL_EXPR_ARG (exp, 0);
+      op = expand_normal (arg);
+      gcc_assert (target == 0);
+      if (memory)
+	{
+	  op = ix86_zero_extend_to_Pmode (op);
+	  target = gen_rtx_MEM (tmode, op);
+	  /* target at this point has just BITS_PER_UNIT MEM_ALIGN
+	     on it.  Try to improve it using get_pointer_alignment,
+	     and if the special builtin is one that requires strict
+	     mode alignment, also from it's GET_MODE_ALIGNMENT.
+	     Failure to do so could lead to ix86_legitimate_combined_insn
+	     rejecting all changes to such insns.  */
+	  unsigned int align = get_pointer_alignment (arg);
+	  if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
+	    align = GET_MODE_ALIGNMENT (tmode);
+	  if (MEM_ALIGN (target) < align)
+	    set_mem_align (target, align);
+	}
+      else
+	target = force_reg (tmode, op);
+      arg_adjust = 1;
+    }
+  else
+    {
+      arg_adjust = 0;
+      if (optimize
+	  || target == 0
+	  || !register_operand (target, tmode)
+	  || GET_MODE (target) != tmode)
+	target = gen_reg_rtx (tmode);
+    }
+
+  for (i = 0; i < nargs; i++)
+    {
+      enum machine_mode mode = insn_p->operand[i + 1].mode;
+      bool match;
+
+      arg = CALL_EXPR_ARG (exp, i + arg_adjust);
+      op = expand_normal (arg);
+      match = insn_p->operand[i + 1].predicate (op, mode);
+
+      if (last_arg_constant && (i + 1) == nargs)
+	{
+	  if (!match)
+	    {
+	      if (icode == CODE_FOR_lwp_lwpvalsi3
+		  || icode == CODE_FOR_lwp_lwpinssi3
+		  || icode == CODE_FOR_lwp_lwpvaldi3
+		  || icode == CODE_FOR_lwp_lwpinsdi3)
+		error ("the last argument must be a 32-bit immediate");
+	      else
+		error ("the last argument must be an 8-bit immediate");
+	      return const0_rtx;
+	    }
+	}
+      else
+	{
+	  if (i == memory)
+	    {
+	      /* This must be the memory operand.  */
+	      op = ix86_zero_extend_to_Pmode (op);
+	      op = gen_rtx_MEM (mode, op);
+	      /* op at this point has just BITS_PER_UNIT MEM_ALIGN
+		 on it.  Try to improve it using get_pointer_alignment,
+		 and if the special builtin is one that requires strict
+		 mode alignment, also from it's GET_MODE_ALIGNMENT.
+		 Failure to do so could lead to ix86_legitimate_combined_insn
+		 rejecting all changes to such insns.  */
+	      unsigned int align = get_pointer_alignment (arg);
+	      if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
+		align = GET_MODE_ALIGNMENT (mode);
+	      if (MEM_ALIGN (op) < align)
+		set_mem_align (op, align);
+	    }
+	  else
+	    {
+	      /* This must be register.  */
+	      if (VECTOR_MODE_P (mode))
+		op = safe_vector_operand (op, mode);
+
+	      if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+		op = copy_to_mode_reg (mode, op);
+	      else
+	        {
+	          op = copy_to_reg (op);
+	          op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+	        }
+	    }
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      pat = GEN_FCN (icode) (target, args[0].op);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return klass == store ? 0 : target;
+}
+
+/* Return the integer constant in ARG.  Constrain it to be in the range
+   of the subparts of VEC_TYPE; issue an error if not.  */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+  if (!tree_fits_uhwi_p (arg)
+      || (elt = tree_to_uhwi (arg), elt > max))
+    {
+      error ("selector must be an integer constant in the range 0..%wi", max);
+      return 0;
+    }
+
+  return elt;
+}
+
+/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
+   ix86_expand_vector_init.  We DO have language-level syntax for this, in
+   the form of  (type){ init-list }.  Except that since we can't place emms
+   instructions from inside the compiler, we can't allow the use of MMX
+   registers unless the user explicitly asks for it.  So we do *not* define
+   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
+   we have builtins invoked by mmintrin.h that gives us license to emit
+   these sorts of instructions.  */
+
+static rtx
+ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
+{
+  enum machine_mode tmode = TYPE_MODE (type);
+  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+  int i, n_elt = GET_MODE_NUNITS (tmode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  gcc_assert (VECTOR_MODE_P (tmode));
+  gcc_assert (call_expr_nargs (exp) == n_elt);
+
+  for (i = 0; i < n_elt; ++i)
+    {
+      rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
+      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+    }
+
+  if (!target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
+  return target;
+}
+
+/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
+   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
+   had a language-level syntax for referencing vector elements.  */
+
+static rtx
+ix86_expand_vec_ext_builtin (tree exp, rtx target)
+{
+  enum machine_mode tmode, mode0;
+  tree arg0, arg1;
+  int elt;
+  rtx op0;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+
+  op0 = expand_normal (arg0);
+  elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  mode0 = TYPE_MODE (TREE_TYPE (arg0));
+  gcc_assert (VECTOR_MODE_P (mode0));
+
+  op0 = force_reg (mode0, op0);
+
+  if (optimize || !target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  ix86_expand_vector_extract (true, target, op0, elt);
+
+  return target;
+}
+
+/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
+   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
+   a language-level syntax for referencing vector elements.  */
+
+static rtx
+ix86_expand_vec_set_builtin (tree exp)
+{
+  enum machine_mode tmode, mode1;
+  tree arg0, arg1, arg2;
+  int elt;
+  rtx op0, op1, target;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  tmode = TYPE_MODE (TREE_TYPE (arg0));
+  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  gcc_assert (VECTOR_MODE_P (tmode));
+
+  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+  elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+  op0 = force_reg (tmode, op0);
+  op1 = force_reg (mode1, op1);
+
+  /* OP0 is the source of these builtin functions and shouldn't be
+     modified.  Create a copy, use it and return it as target.  */
+  target = gen_reg_rtx (tmode);
+  emit_move_insn (target, op0);
+  ix86_expand_vector_set (true, target, op1, elt);
+
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
+		     enum machine_mode mode, int ignore)
+{
+  const struct builtin_description *d;
+  size_t i;
+  enum insn_code icode;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg0, arg1, arg2, arg3, arg4;
+  rtx op0, op1, op2, op3, op4, pat, insn;
+  enum machine_mode mode0, mode1, mode2, mode3, mode4;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  /* For CPU builtins that can be folded, fold first and expand the fold.  */
+  switch (fcode)
+    {
+    case IX86_BUILTIN_CPU_INIT:
+      {
+	/* Make it call __cpu_indicator_init in libgcc. */
+	tree call_expr, fndecl, type;
+        type = build_function_type_list (integer_type_node, NULL_TREE); 
+	fndecl = build_fn_decl ("__cpu_indicator_init", type);
+	call_expr = build_call_expr (fndecl, 0); 
+	return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
+      }
+    case IX86_BUILTIN_CPU_IS:
+    case IX86_BUILTIN_CPU_SUPPORTS:
+      {
+	tree arg0 = CALL_EXPR_ARG (exp, 0);
+	tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
+	gcc_assert (fold_expr != NULL_TREE);
+	return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
+      }
+    }
+
+  /* Determine whether the builtin function is available under the current ISA.
+     Originally the builtin was not created if it wasn't applicable to the
+     current ISA based on the command line switches.  With function specific
+     options, we need to check in the context of the function making the call
+     whether it is supported.  */
+  if (ix86_builtins_isa[fcode].isa
+      && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
+    {
+      char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
+				       NULL, (enum fpmath_unit) 0, false);
+
+      if (!opts)
+	error ("%qE needs unknown isa option", fndecl);
+      else
+	{
+	  gcc_assert (opts != NULL);
+	  error ("%qE needs isa option %s", fndecl, opts);
+	  free (opts);
+	}
+      return const0_rtx;
+    }
+
+  switch (fcode)
+    {
+    case IX86_BUILTIN_MASKMOVQ:
+    case IX86_BUILTIN_MASKMOVDQU:
+      icode = (fcode == IX86_BUILTIN_MASKMOVQ
+	       ? CODE_FOR_mmx_maskmovq
+	       : CODE_FOR_sse2_maskmovdqu);
+      /* Note the arg order is different from the operand order.  */
+      arg1 = CALL_EXPR_ARG (exp, 0);
+      arg2 = CALL_EXPR_ARG (exp, 1);
+      arg0 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+
+      op0 = ix86_zero_extend_to_Pmode (op0);
+      op0 = gen_rtx_MEM (mode1, op0);
+
+      if (!insn_data[icode].operand[0].predicate (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (!insn_data[icode].operand[1].predicate (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+      if (!insn_data[icode].operand[2].predicate (op2, mode2))
+	op2 = copy_to_mode_reg (mode2, op2);
+      pat = GEN_FCN (icode) (op0, op1, op2);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return 0;
+
+    case IX86_BUILTIN_LDMXCSR:
+      op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+      target = assign_386_stack_local (SImode, SLOT_TEMP);
+      emit_move_insn (target, op0);
+      emit_insn (gen_sse_ldmxcsr (target));
+      return 0;
+
+    case IX86_BUILTIN_STMXCSR:
+      target = assign_386_stack_local (SImode, SLOT_TEMP);
+      emit_insn (gen_sse_stmxcsr (target));
+      return copy_to_mode_reg (SImode, target);
+
+    case IX86_BUILTIN_CLFLUSH:
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_normal (arg0);
+	icode = CODE_FOR_sse2_clflush;
+	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+	  op0 = ix86_zero_extend_to_Pmode (op0);
+
+	emit_insn (gen_sse2_clflush (op0));
+	return 0;
+
+    case IX86_BUILTIN_MONITOR:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      if (!REG_P (op0))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      if (!REG_P (op1))
+	op1 = copy_to_mode_reg (SImode, op1);
+      if (!REG_P (op2))
+	op2 = copy_to_mode_reg (SImode, op2);
+      emit_insn (ix86_gen_monitor (op0, op1, op2));
+      return 0;
+
+    case IX86_BUILTIN_MWAIT:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      if (!REG_P (op0))
+	op0 = copy_to_mode_reg (SImode, op0);
+      if (!REG_P (op1))
+	op1 = copy_to_mode_reg (SImode, op1);
+      emit_insn (gen_sse3_mwait (op0, op1));
+      return 0;
+
+    case IX86_BUILTIN_VEC_INIT_V2SI:
+    case IX86_BUILTIN_VEC_INIT_V4HI:
+    case IX86_BUILTIN_VEC_INIT_V8QI:
+      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
+
+    case IX86_BUILTIN_VEC_EXT_V2DF:
+    case IX86_BUILTIN_VEC_EXT_V2DI:
+    case IX86_BUILTIN_VEC_EXT_V4SF:
+    case IX86_BUILTIN_VEC_EXT_V4SI:
+    case IX86_BUILTIN_VEC_EXT_V8HI:
+    case IX86_BUILTIN_VEC_EXT_V2SI:
+    case IX86_BUILTIN_VEC_EXT_V4HI:
+    case IX86_BUILTIN_VEC_EXT_V16QI:
+      return ix86_expand_vec_ext_builtin (exp, target);
+
+    case IX86_BUILTIN_VEC_SET_V2DI:
+    case IX86_BUILTIN_VEC_SET_V4SF:
+    case IX86_BUILTIN_VEC_SET_V4SI:
+    case IX86_BUILTIN_VEC_SET_V8HI:
+    case IX86_BUILTIN_VEC_SET_V4HI:
+    case IX86_BUILTIN_VEC_SET_V16QI:
+      return ix86_expand_vec_set_builtin (exp);
+
+    case IX86_BUILTIN_INFQ:
+    case IX86_BUILTIN_HUGE_VALQ:
+      {
+	REAL_VALUE_TYPE inf;
+	rtx tmp;
+
+	real_inf (&inf);
+	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
+
+	tmp = validize_mem (force_const_mem (mode, tmp));
+
+	if (target == 0)
+	  target = gen_reg_rtx (mode);
+
+	emit_move_insn (target, tmp);
+	return target;
+      }
+
+    case IX86_BUILTIN_RDPMC:
+    case IX86_BUILTIN_RDTSC:
+    case IX86_BUILTIN_RDTSCP:
+
+      op0 = gen_reg_rtx (DImode);
+      op1 = gen_reg_rtx (DImode);
+
+      if (fcode == IX86_BUILTIN_RDPMC)
+	{
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op2 = expand_normal (arg0);
+	  if (!register_operand (op2, SImode))
+	    op2 = copy_to_mode_reg (SImode, op2);
+
+	  insn = (TARGET_64BIT
+		  ? gen_rdpmc_rex64 (op0, op1, op2)
+		  : gen_rdpmc (op0, op2));
+	  emit_insn (insn);
+	}
+      else if (fcode == IX86_BUILTIN_RDTSC)
+	{
+	  insn = (TARGET_64BIT
+		  ? gen_rdtsc_rex64 (op0, op1)
+		  : gen_rdtsc (op0));
+	  emit_insn (insn);
+	}
+      else
+	{
+	  op2 = gen_reg_rtx (SImode);
+
+	  insn = (TARGET_64BIT
+		  ? gen_rdtscp_rex64 (op0, op1, op2)
+		  : gen_rdtscp (op0, op2));
+	  emit_insn (insn);
+
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op4 = expand_normal (arg0);
+	  if (!address_operand (op4, VOIDmode))
+	    {
+	      op4 = convert_memory_address (Pmode, op4);
+	      op4 = copy_addr_to_reg (op4);
+	    }
+	  emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
+	}
+
+      if (target == 0)
+	{
+	  /* mode is VOIDmode if __builtin_rd* has been called
+	     without lhs.  */
+	  if (mode == VOIDmode)
+	    return target;
+	  target = gen_reg_rtx (mode);
+	}
+
+      if (TARGET_64BIT)
+	{
+	  op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
+				     op1, 1, OPTAB_DIRECT);
+	  op0 = expand_simple_binop (DImode, IOR, op0, op1,
+				     op0, 1, OPTAB_DIRECT);
+	}
+
+      emit_move_insn (target, op0);
+      return target;
+
+    case IX86_BUILTIN_FXSAVE:
+    case IX86_BUILTIN_FXRSTOR:
+    case IX86_BUILTIN_FXSAVE64:
+    case IX86_BUILTIN_FXRSTOR64:
+    case IX86_BUILTIN_FNSTENV:
+    case IX86_BUILTIN_FLDENV:
+    case IX86_BUILTIN_FNSTSW:
+      mode0 = BLKmode;
+      switch (fcode)
+	{
+	case IX86_BUILTIN_FXSAVE:
+	  icode = CODE_FOR_fxsave;
+	  break;
+	case IX86_BUILTIN_FXRSTOR:
+	  icode = CODE_FOR_fxrstor;
+	  break;
+	case IX86_BUILTIN_FXSAVE64:
+	  icode = CODE_FOR_fxsave64;
+	  break;
+	case IX86_BUILTIN_FXRSTOR64:
+	  icode = CODE_FOR_fxrstor64;
+	  break;
+	case IX86_BUILTIN_FNSTENV:
+	  icode = CODE_FOR_fnstenv;
+	  break;
+	case IX86_BUILTIN_FLDENV:
+	  icode = CODE_FOR_fldenv;
+	  break;
+	case IX86_BUILTIN_FNSTSW:
+	  icode = CODE_FOR_fnstsw;
+	  mode0 = HImode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+
+      if (!address_operand (op0, VOIDmode))
+	{
+	  op0 = convert_memory_address (Pmode, op0);
+	  op0 = copy_addr_to_reg (op0);
+	}
+      op0 = gen_rtx_MEM (mode0, op0);
+
+      pat = GEN_FCN (icode) (op0);
+      if (pat)
+	emit_insn (pat);
+      return 0;
+
+    case IX86_BUILTIN_XSAVE:
+    case IX86_BUILTIN_XRSTOR:
+    case IX86_BUILTIN_XSAVE64:
+    case IX86_BUILTIN_XRSTOR64:
+    case IX86_BUILTIN_XSAVEOPT:
+    case IX86_BUILTIN_XSAVEOPT64:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      if (!address_operand (op0, VOIDmode))
+	{
+	  op0 = convert_memory_address (Pmode, op0);
+	  op0 = copy_addr_to_reg (op0);
+	}
+      op0 = gen_rtx_MEM (BLKmode, op0);
+
+      op1 = force_reg (DImode, op1);
+
+      if (TARGET_64BIT)
+	{
+	  op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
+				     NULL, 1, OPTAB_DIRECT);
+	  switch (fcode)
+	    {
+	    case IX86_BUILTIN_XSAVE:
+	      icode = CODE_FOR_xsave_rex64;
+	      break;
+	    case IX86_BUILTIN_XRSTOR:
+	      icode = CODE_FOR_xrstor_rex64;
+	      break;
+	    case IX86_BUILTIN_XSAVE64:
+	      icode = CODE_FOR_xsave64;
+	      break;
+	    case IX86_BUILTIN_XRSTOR64:
+	      icode = CODE_FOR_xrstor64;
+	      break;
+	    case IX86_BUILTIN_XSAVEOPT:
+	      icode = CODE_FOR_xsaveopt_rex64;
+	      break;
+	    case IX86_BUILTIN_XSAVEOPT64:
+	      icode = CODE_FOR_xsaveopt64;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  op2 = gen_lowpart (SImode, op2);
+	  op1 = gen_lowpart (SImode, op1);
+	  pat = GEN_FCN (icode) (op0, op1, op2);
+	}
+      else
+	{
+	  switch (fcode)
+	    {
+	    case IX86_BUILTIN_XSAVE:
+	      icode = CODE_FOR_xsave;
+	      break;
+	    case IX86_BUILTIN_XRSTOR:
+	      icode = CODE_FOR_xrstor;
+	      break;
+	    case IX86_BUILTIN_XSAVEOPT:
+	      icode = CODE_FOR_xsaveopt;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  pat = GEN_FCN (icode) (op0, op1);
+	}
+
+      if (pat)
+	emit_insn (pat);
+      return 0;
+
+    case IX86_BUILTIN_LLWPCB:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      icode = CODE_FOR_lwp_llwpcb;
+      if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      emit_insn (gen_lwp_llwpcb (op0));
+      return 0;
+
+    case IX86_BUILTIN_SLWPCB:
+      icode = CODE_FOR_lwp_slwpcb;
+      if (!target
+	  || !insn_data[icode].operand[0].predicate (target, Pmode))
+	target = gen_reg_rtx (Pmode);
+      emit_insn (gen_lwp_slwpcb (target));
+      return target;
+
+    case IX86_BUILTIN_BEXTRI32:
+    case IX86_BUILTIN_BEXTRI64:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      icode = (fcode == IX86_BUILTIN_BEXTRI32
+	  ? CODE_FOR_tbm_bextri_si
+	  : CODE_FOR_tbm_bextri_di);
+      if (!CONST_INT_P (op1))
+        {
+          error ("last argument must be an immediate");
+          return const0_rtx;
+        }
+      else
+        {
+          unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
+          unsigned char lsb_index = INTVAL (op1) & 0xFF;
+          op1 = GEN_INT (length);
+          op2 = GEN_INT (lsb_index);
+          pat = GEN_FCN (icode) (target, op0, op1, op2);
+          if (pat)
+            emit_insn (pat);
+          return target;
+        }
+
+    case IX86_BUILTIN_RDRAND16_STEP:
+      icode = CODE_FOR_rdrandhi_1;
+      mode0 = HImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND32_STEP:
+      icode = CODE_FOR_rdrandsi_1;
+      mode0 = SImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND64_STEP:
+      icode = CODE_FOR_rdranddi_1;
+      mode0 = DImode;
+
+rdrand_step:
+      op0 = gen_reg_rtx (mode0);
+      emit_insn (GEN_FCN (icode) (op0));
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op1 = expand_normal (arg0);
+      if (!address_operand (op1, VOIDmode))
+	{
+	  op1 = convert_memory_address (Pmode, op1);
+	  op1 = copy_addr_to_reg (op1);
+	}
+      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+
+      op1 = gen_reg_rtx (SImode);
+      emit_move_insn (op1, CONST1_RTX (SImode));
+
+      /* Emit SImode conditional move.  */
+      if (mode0 == HImode)
+	{
+	  op2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_zero_extendhisi2 (op2, op0));
+	}
+      else if (mode0 == SImode)
+	op2 = op0;
+      else
+	op2 = gen_rtx_SUBREG (SImode, op0, 0);
+
+      if (target == 0)
+	target = gen_reg_rtx (SImode);
+
+      pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			 const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
+      return target;
+
+    case IX86_BUILTIN_RDSEED16_STEP:
+      icode = CODE_FOR_rdseedhi_1;
+      mode0 = HImode;
+      goto rdseed_step;
+
+    case IX86_BUILTIN_RDSEED32_STEP:
+      icode = CODE_FOR_rdseedsi_1;
+      mode0 = SImode;
+      goto rdseed_step;
+
+    case IX86_BUILTIN_RDSEED64_STEP:
+      icode = CODE_FOR_rdseeddi_1;
+      mode0 = DImode;
+
+rdseed_step:
+      op0 = gen_reg_rtx (mode0);
+      emit_insn (GEN_FCN (icode) (op0));
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op1 = expand_normal (arg0);
+      if (!address_operand (op1, VOIDmode))
+	{
+	  op1 = convert_memory_address (Pmode, op1);
+	  op1 = copy_addr_to_reg (op1);
+	}
+      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+
+      op2 = gen_reg_rtx (QImode);
+
+      pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
+                         const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
+
+      if (target == 0)
+        target = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extendqisi2 (target, op2));
+      return target;
+
+    case IX86_BUILTIN_ADDCARRYX32:
+      icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
+      mode0 = SImode;
+      goto addcarryx;
+
+    case IX86_BUILTIN_ADDCARRYX64:
+      icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
+      mode0 = DImode;
+
+addcarryx:
+      arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */
+      arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1.  */
+      arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2.  */
+      arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out.  */
+
+      op0 = gen_reg_rtx (QImode);
+
+      /* Generate CF from input operand.  */
+      op1 = expand_normal (arg0);
+      op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
+      emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
+
+      /* Gen ADCX instruction to compute X+Y+CF.  */
+      op2 = expand_normal (arg1);
+      op3 = expand_normal (arg2);
+
+      if (!REG_P (op2))
+	op2 = copy_to_mode_reg (mode0, op2);
+      if (!REG_P (op3))
+	op3 = copy_to_mode_reg (mode0, op3);
+
+      op0 = gen_reg_rtx (mode0);
+
+      op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
+      pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
+      emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
+
+      /* Store the result.  */
+      op4 = expand_normal (arg3);
+      if (!address_operand (op4, VOIDmode))
+	{
+	  op4 = convert_memory_address (Pmode, op4);
+	  op4 = copy_addr_to_reg (op4);
+	}
+      emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
+
+      /* Return current CF value.  */
+      if (target == 0)
+        target = gen_reg_rtx (QImode);
+
+      PUT_MODE (pat, QImode);
+      emit_insn (gen_rtx_SET (VOIDmode, target, pat));
+      return target;
+
+    case IX86_BUILTIN_READ_FLAGS:
+      emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
+
+      if (optimize
+	  || target == NULL_RTX
+	  || !nonimmediate_operand (target, word_mode)
+	  || GET_MODE (target) != word_mode)
+	target = gen_reg_rtx (word_mode);
+
+      emit_insn (gen_pop (target));
+      return target;
+
+    case IX86_BUILTIN_WRITE_FLAGS:
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      if (!general_no_elim_operand (op0, word_mode))
+	op0 = copy_to_mode_reg (word_mode, op0);
+
+      emit_insn (gen_push (op0));
+      emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
+      return 0;
+
+    case IX86_BUILTIN_KORTESTC16:
+      icode = CODE_FOR_kortestchi;
+      mode0 = HImode;
+      mode1 = CCCmode;
+      goto kortest;
+
+    case IX86_BUILTIN_KORTESTZ16:
+      icode = CODE_FOR_kortestzhi;
+      mode0 = HImode;
+      mode1 = CCZmode;
+
+    kortest:
+      arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1.  */
+      arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2.  */
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      op0 = copy_to_reg (op0);
+      op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
+      op1 = copy_to_reg (op1);
+      op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
+
+      target = gen_reg_rtx (QImode);
+      emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
+
+      /* Emit kortest.  */
+      emit_insn (GEN_FCN (icode) (op0, op1));
+      /* And use setcc to return result from flags.  */
+      ix86_expand_setcc (target, EQ,
+			 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
+      return target;
+
+    case IX86_BUILTIN_GATHERSIV2DF:
+      icode = CODE_FOR_avx2_gathersiv2df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV4DF:
+      icode = CODE_FOR_avx2_gathersiv4df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV2DF:
+      icode = CODE_FOR_avx2_gatherdiv2df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV4DF:
+      icode = CODE_FOR_avx2_gatherdiv4df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV4SF:
+      icode = CODE_FOR_avx2_gathersiv4sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV8SF:
+      icode = CODE_FOR_avx2_gathersiv8sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV4SF:
+      icode = CODE_FOR_avx2_gatherdiv4sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV8SF:
+      icode = CODE_FOR_avx2_gatherdiv8sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV2DI:
+      icode = CODE_FOR_avx2_gathersiv2di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV4DI:
+      icode = CODE_FOR_avx2_gathersiv4di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV2DI:
+      icode = CODE_FOR_avx2_gatherdiv2di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV4DI:
+      icode = CODE_FOR_avx2_gatherdiv4di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV4SI:
+      icode = CODE_FOR_avx2_gathersiv4si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERSIV8SI:
+      icode = CODE_FOR_avx2_gathersiv8si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV4SI:
+      icode = CODE_FOR_avx2_gatherdiv4si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERDIV8SI:
+      icode = CODE_FOR_avx2_gatherdiv8si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERALTSIV4DF:
+      icode = CODE_FOR_avx2_gathersiv4df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERALTDIV8SF:
+      icode = CODE_FOR_avx2_gatherdiv8sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERALTSIV4DI:
+      icode = CODE_FOR_avx2_gathersiv4di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHERALTDIV8SI:
+      icode = CODE_FOR_avx2_gatherdiv8si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV16SF:
+      icode = CODE_FOR_avx512f_gathersiv16sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV8DF:
+      icode = CODE_FOR_avx512f_gathersiv8df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV16SF:
+      icode = CODE_FOR_avx512f_gatherdiv16sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV8DF:
+      icode = CODE_FOR_avx512f_gatherdiv8df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV16SI:
+      icode = CODE_FOR_avx512f_gathersiv16si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV8DI:
+      icode = CODE_FOR_avx512f_gathersiv8di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV16SI:
+      icode = CODE_FOR_avx512f_gatherdiv16si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV8DI:
+      icode = CODE_FOR_avx512f_gatherdiv8di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTSIV8DF:
+      icode = CODE_FOR_avx512f_gathersiv8df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTDIV16SF:
+      icode = CODE_FOR_avx512f_gatherdiv16sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTSIV8DI:
+      icode = CODE_FOR_avx512f_gathersiv8di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTDIV16SI:
+      icode = CODE_FOR_avx512f_gatherdiv16si;
+      goto gather_gen;
+    case IX86_BUILTIN_SCATTERSIV16SF:
+      icode = CODE_FOR_avx512f_scattersiv16sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV8DF:
+      icode = CODE_FOR_avx512f_scattersiv8df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV16SF:
+      icode = CODE_FOR_avx512f_scatterdiv16sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV8DF:
+      icode = CODE_FOR_avx512f_scatterdiv8df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV16SI:
+      icode = CODE_FOR_avx512f_scattersiv16si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV8DI:
+      icode = CODE_FOR_avx512f_scattersiv8di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV16SI:
+      icode = CODE_FOR_avx512f_scatterdiv16si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV8DI:
+      icode = CODE_FOR_avx512f_scatterdiv8di;
+      goto scatter_gen;
+
+    case IX86_BUILTIN_GATHERPFDPD:
+      icode = CODE_FOR_avx512pf_gatherpfv8sidf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_GATHERPFDPS:
+      icode = CODE_FOR_avx512pf_gatherpfv16sisf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_GATHERPFQPD:
+      icode = CODE_FOR_avx512pf_gatherpfv8didf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_GATHERPFQPS:
+      icode = CODE_FOR_avx512pf_gatherpfv8disf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERPFDPD:
+      icode = CODE_FOR_avx512pf_scatterpfv8sidf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERPFDPS:
+      icode = CODE_FOR_avx512pf_scatterpfv16sisf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERPFQPD:
+      icode = CODE_FOR_avx512pf_scatterpfv8didf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERPFQPS:
+      icode = CODE_FOR_avx512pf_scatterpfv8disf;
+      goto vec_prefetch_gen;
+
+    gather_gen:
+      rtx half;
+      rtx (*gen) (rtx, rtx);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      arg3 = CALL_EXPR_ARG (exp, 3);
+      arg4 = CALL_EXPR_ARG (exp, 4);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      op3 = expand_normal (arg3);
+      op4 = expand_normal (arg4);
+      /* Note the arg order is different from the operand order.  */
+      mode0 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+      mode3 = insn_data[icode].operand[4].mode;
+      mode4 = insn_data[icode].operand[5].mode;
+
+      if (target == NULL_RTX
+	  || GET_MODE (target) != insn_data[icode].operand[0].mode
+	  || !insn_data[icode].operand[0].predicate (target,
+						     GET_MODE (target)))
+	subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
+      else
+	subtarget = target;
+
+      switch (fcode)
+	{
+	case IX86_BUILTIN_GATHER3ALTSIV8DF:
+	case IX86_BUILTIN_GATHER3ALTSIV8DI:
+	  half = gen_reg_rtx (V8SImode);
+	  if (!nonimmediate_operand (op2, V16SImode))
+	    op2 = copy_to_mode_reg (V16SImode, op2);
+	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
+	  op2 = half;
+	  break;
+	case IX86_BUILTIN_GATHERALTSIV4DF:
+	case IX86_BUILTIN_GATHERALTSIV4DI:
+	  half = gen_reg_rtx (V4SImode);
+	  if (!nonimmediate_operand (op2, V8SImode))
+	    op2 = copy_to_mode_reg (V8SImode, op2);
+	  emit_insn (gen_vec_extract_lo_v8si (half, op2));
+	  op2 = half;
+	  break;
+	case IX86_BUILTIN_GATHER3ALTDIV16SF:
+	case IX86_BUILTIN_GATHER3ALTDIV16SI:
+	  half = gen_reg_rtx (mode0);
+	  if (mode0 == V8SFmode)
+	    gen = gen_vec_extract_lo_v16sf;
+	  else
+	    gen = gen_vec_extract_lo_v16si;
+	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
+	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+	  emit_insn (gen (half, op0));
+	  op0 = half;
+	  if (GET_MODE (op3) != VOIDmode)
+	    {
+	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
+		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+	      emit_insn (gen (half, op3));
+	      op3 = half;
+	    }
+	  break;
+	case IX86_BUILTIN_GATHERALTDIV8SF:
+	case IX86_BUILTIN_GATHERALTDIV8SI:
+	  half = gen_reg_rtx (mode0);
+	  if (mode0 == V4SFmode)
+	    gen = gen_vec_extract_lo_v8sf;
+	  else
+	    gen = gen_vec_extract_lo_v8si;
+	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
+	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+	  emit_insn (gen (half, op0));
+	  op0 = half;
+	  if (GET_MODE (op3) != VOIDmode)
+	    {
+	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
+		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+	      emit_insn (gen (half, op3));
+	      op3 = half;
+	    }
+	  break;
+	default:
+	  break;
+	}
+
+      /* Force memory operand only with base register here.  But we
+	 don't want to do it on memory operand for other builtin
+	 functions.  */
+      op1 = ix86_zero_extend_to_Pmode (op1);
+
+      if (!insn_data[icode].operand[1].predicate (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (!insn_data[icode].operand[2].predicate (op1, Pmode))
+	op1 = copy_to_mode_reg (Pmode, op1);
+      if (!insn_data[icode].operand[3].predicate (op2, mode2))
+	op2 = copy_to_mode_reg (mode2, op2);
+      if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
+	{
+	  if (!insn_data[icode].operand[4].predicate (op3, mode3))
+	    op3 = copy_to_mode_reg (mode3, op3);
+	}
+      else
+	{
+	  op3 = copy_to_reg (op3);
+	  op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
+	}
+      if (!insn_data[icode].operand[5].predicate (op4, mode4))
+	{
+          error ("the last argument must be scale 1, 2, 4, 8");
+          return const0_rtx;
+	}
+
+      /* Optimize.  If mask is known to have all high bits set,
+	 replace op0 with pc_rtx to signal that the instruction
+	 overwrites the whole destination and doesn't use its
+	 previous contents.  */
+      if (optimize)
+	{
+	  if (TREE_CODE (arg3) == INTEGER_CST)
+	    {
+	      if (integer_all_onesp (arg3))
+		op0 = pc_rtx;
+	    }
+	  else if (TREE_CODE (arg3) == VECTOR_CST)
+	    {
+	      unsigned int negative = 0;
+	      for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
+		{
+		  tree cst = VECTOR_CST_ELT (arg3, i);
+		  if (TREE_CODE (cst) == INTEGER_CST
+		      && tree_int_cst_sign_bit (cst))
+		    negative++;
+		  else if (TREE_CODE (cst) == REAL_CST
+			   && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
+		    negative++;
+		}
+	      if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
+		op0 = pc_rtx;
+	    }
+	  else if (TREE_CODE (arg3) == SSA_NAME
+		   && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
+	    {
+	      /* Recognize also when mask is like:
+		 __v2df src = _mm_setzero_pd ();
+		 __v2df mask = _mm_cmpeq_pd (src, src);
+		 or
+		 __v8sf src = _mm256_setzero_ps ();
+		 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+		 as that is a cheaper way to load all ones into
+		 a register than having to load a constant from
+		 memory.  */
+	      gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
+	      if (is_gimple_call (def_stmt))
+		{
+		  tree fndecl = gimple_call_fndecl (def_stmt);
+		  if (fndecl
+		      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+		    switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
+		      {
+		      case IX86_BUILTIN_CMPPD:
+		      case IX86_BUILTIN_CMPPS:
+		      case IX86_BUILTIN_CMPPD256:
+		      case IX86_BUILTIN_CMPPS256:
+			if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
+			  break;
+			/* FALLTHRU */
+		      case IX86_BUILTIN_CMPEQPD:
+		      case IX86_BUILTIN_CMPEQPS:
+			if (initializer_zerop (gimple_call_arg (def_stmt, 0))
+			    && initializer_zerop (gimple_call_arg (def_stmt,
+								   1)))
+			  op0 = pc_rtx;
+			break;
+		      default:
+			break;
+		      }
+		}
+	    }
+	}
+
+      pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
+      if (! pat)
+	return const0_rtx;
+      emit_insn (pat);
+
+      switch (fcode)
+	{
+	case IX86_BUILTIN_GATHER3DIV16SF:
+	  if (target == NULL_RTX)
+	    target = gen_reg_rtx (V8SFmode);
+	  emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
+	  break;
+	case IX86_BUILTIN_GATHER3DIV16SI:
+	  if (target == NULL_RTX)
+	    target = gen_reg_rtx (V8SImode);
+	  emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
+	  break;
+	case IX86_BUILTIN_GATHERDIV8SF:
+	  if (target == NULL_RTX)
+	    target = gen_reg_rtx (V4SFmode);
+	  emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
+	  break;
+	case IX86_BUILTIN_GATHERDIV8SI:
+	  if (target == NULL_RTX)
+	    target = gen_reg_rtx (V4SImode);
+	  emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+	  break;
+	default:
+	  target = subtarget;
+	  break;
+	}
+      return target;
+
+    scatter_gen:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      arg3 = CALL_EXPR_ARG (exp, 3);
+      arg4 = CALL_EXPR_ARG (exp, 4);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      op3 = expand_normal (arg3);
+      op4 = expand_normal (arg4);
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      mode3 = insn_data[icode].operand[3].mode;
+      mode4 = insn_data[icode].operand[4].mode;
+
+      /* Force memory operand only with base register here.  But we
+	 don't want to do it on memory operand for other builtin
+	 functions.  */
+      op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
+
+      if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+	op0 = copy_to_mode_reg (Pmode, op0);
+
+      if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
+	{
+	  if (!insn_data[icode].operand[1].predicate (op1, mode1))
+	    op1 = copy_to_mode_reg (mode1, op1);
+	}
+      else
+	{
+	  op1 = copy_to_reg (op1);
+	  op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
+	}
+
+      if (!insn_data[icode].operand[2].predicate (op2, mode2))
+	op2 = copy_to_mode_reg (mode2, op2);
+
+      if (!insn_data[icode].operand[3].predicate (op3, mode3))
+	op3 = copy_to_mode_reg (mode3, op3);
+
+      if (!insn_data[icode].operand[4].predicate (op4, mode4))
+	{
+	  error ("the last argument must be scale 1, 2, 4, 8");
+	  return const0_rtx;
+	}
+
+      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+      if (! pat)
+	return const0_rtx;
+
+      emit_insn (pat);
+      return 0;
+
+    vec_prefetch_gen:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      arg3 = CALL_EXPR_ARG (exp, 3);
+      arg4 = CALL_EXPR_ARG (exp, 4);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      op3 = expand_normal (arg3);
+      op4 = expand_normal (arg4);
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode3 = insn_data[icode].operand[3].mode;
+      mode4 = insn_data[icode].operand[4].mode;
+
+      if (GET_MODE (op0) == mode0
+	  || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
+	{
+	  if (!insn_data[icode].operand[0].predicate (op0, mode0))
+	    op0 = copy_to_mode_reg (mode0, op0);
+	}
+      else if (op0 != constm1_rtx)
+	{
+	  op0 = copy_to_reg (op0);
+	  op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
+	}
+
+      if (!insn_data[icode].operand[1].predicate (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      /* Force memory operand only with base register here.  But we
+	 don't want to do it on memory operand for other builtin
+	 functions.  */
+      op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
+
+      if (!insn_data[icode].operand[2].predicate (op2, Pmode))
+	op2 = copy_to_mode_reg (Pmode, op2);
+
+      if (!insn_data[icode].operand[3].predicate (op3, mode3))
+	{
+	  error ("the forth argument must be scale 1, 2, 4, 8");
+	  return const0_rtx;
+	}
+
+      if (!insn_data[icode].operand[4].predicate (op4, mode4))
+	{
+	  error ("incorrect hint operand");
+	  return const0_rtx;
+	}
+
+      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+      if (! pat)
+	return const0_rtx;
+
+      emit_insn (pat);
+
+      return 0;
+
+    case IX86_BUILTIN_XABORT:
+      icode = CODE_FOR_xabort;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+      if (!insn_data[icode].operand[0].predicate (op0, mode0))
+	{
+	  error ("the xabort's argument must be an 8-bit immediate");
+	  return const0_rtx;
+	}
+      emit_insn (gen_xabort (op0));
+      return 0;
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_special_args;
+       i < ARRAY_SIZE (bdesc_special_args);
+       i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_special_args_builtin (d, exp, target);
+
+  for (i = 0, d = bdesc_args;
+       i < ARRAY_SIZE (bdesc_args);
+       i++, d++)
+    if (d->code == fcode)
+      switch (fcode)
+	{
+	case IX86_BUILTIN_FABSQ:
+	case IX86_BUILTIN_COPYSIGNQ:
+	  if (!TARGET_SSE)
+	    /* Emit a normal call if SSE isn't available.  */
+	    return expand_call (exp, target, ignore);
+	default:
+	  return ix86_expand_args_builtin (d, exp, target);
+	}
+
+  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_sse_comi (d, exp, target);
+
+  for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_round_builtin (d, exp, target);
+
+  for (i = 0, d = bdesc_pcmpestr;
+       i < ARRAY_SIZE (bdesc_pcmpestr);
+       i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_sse_pcmpestr (d, exp, target);
+
+  for (i = 0, d = bdesc_pcmpistr;
+       i < ARRAY_SIZE (bdesc_pcmpistr);
+       i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_sse_pcmpistr (d, exp, target);
+
+  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_multi_arg_builtin (d->icode, exp, target,
+					    (enum ix86_builtin_func_type)
+					    d->flag, d->comparison);
+
+  gcc_unreachable ();
+}
+
+/* This returns the target-specific builtin with code CODE if
+   current_function_decl has visibility on this builtin, which is checked
+   using isa flags.  Returns NULL_TREE otherwise.  */
+
+static tree ix86_get_builtin (enum ix86_builtins code)
+{
+  struct cl_target_option *opts;
+  tree target_tree = NULL_TREE;
+
+  /* Determine the isa flags of current_function_decl.  */
+
+  if (current_function_decl)
+    target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
+
+  if (target_tree == NULL)
+    target_tree = target_option_default_node;
+
+  opts = TREE_TARGET_OPTION (target_tree);
+
+  if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
+    return ix86_builtin_decl (code, true);
+  else
+    return NULL_TREE;
+}
+
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+ix86_builtin_vectorized_function (tree fndecl, tree type_out,
+				  tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    case BUILT_IN_SQRT:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
+	}
+      break;
+
+    case BUILT_IN_EXP2F:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
+	}
+      break;
+
+    case BUILT_IN_SQRTF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
+	}
+      break;
+
+    case BUILT_IN_IFLOOR:
+    case BUILT_IN_LFLOOR:
+    case BUILT_IN_LLFLOOR:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SImode && in_mode == DFmode)
+	{
+	  if (out_n == 4 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
+	  else if (out_n == 8 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
+	  else if (out_n == 16 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
+	}
+      break;
+
+    case BUILT_IN_IFLOORF:
+    case BUILT_IN_LFLOORF:
+    case BUILT_IN_LLFLOORF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SImode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
+	}
+      break;
+
+    case BUILT_IN_ICEIL:
+    case BUILT_IN_LCEIL:
+    case BUILT_IN_LLCEIL:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SImode && in_mode == DFmode)
+	{
+	  if (out_n == 4 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
+	  else if (out_n == 8 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
+	  else if (out_n == 16 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
+	}
+      break;
+
+    case BUILT_IN_ICEILF:
+    case BUILT_IN_LCEILF:
+    case BUILT_IN_LLCEILF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SImode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
+	}
+      break;
+
+    case BUILT_IN_IRINT:
+    case BUILT_IN_LRINT:
+    case BUILT_IN_LLRINT:
+      if (out_mode == SImode && in_mode == DFmode)
+	{
+	  if (out_n == 4 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
+	  else if (out_n == 8 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
+	}
+      break;
+
+    case BUILT_IN_IRINTF:
+    case BUILT_IN_LRINTF:
+    case BUILT_IN_LLRINTF:
+      if (out_mode == SImode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
+	}
+      break;
+
+    case BUILT_IN_IROUND:
+    case BUILT_IN_LROUND:
+    case BUILT_IN_LLROUND:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SImode && in_mode == DFmode)
+	{
+	  if (out_n == 4 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
+	  else if (out_n == 8 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
+	  else if (out_n == 16 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
+	}
+      break;
+
+    case BUILT_IN_IROUNDF:
+    case BUILT_IN_LROUNDF:
+    case BUILT_IN_LLROUNDF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SImode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
+	}
+      break;
+
+    case BUILT_IN_COPYSIGN:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
+	}
+      break;
+
+    case BUILT_IN_COPYSIGNF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
+	}
+      break;
+
+    case BUILT_IN_FLOOR:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
+	}
+      break;
+
+    case BUILT_IN_FLOORF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
+	}
+      break;
+
+    case BUILT_IN_CEIL:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPD);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
+	}
+      break;
+
+    case BUILT_IN_CEILF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPS);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
+	}
+      break;
+
+    case BUILT_IN_TRUNC:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
+	}
+      break;
+
+    case BUILT_IN_TRUNCF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
+	}
+      break;
+
+    case BUILT_IN_RINT:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_RINTPD);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
+	}
+      break;
+
+    case BUILT_IN_RINTF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_RINTPS);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
+	}
+      break;
+
+    case BUILT_IN_ROUND:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
+	}
+      break;
+
+    case BUILT_IN_ROUNDF:
+      /* The round insn does not trap on denormals.  */
+      if (flag_trapping_math || !TARGET_ROUND)
+	break;
+
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
+	}
+      break;
+
+    case BUILT_IN_FMA:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
+	}
+      break;
+
+    case BUILT_IN_FMAF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
+	  if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  /* Dispatch to a handler for a vectorization library.  */
+  if (ix86_veclib_handler)
+    return ix86_veclib_handler ((enum built_in_function) fn, type_out,
+				type_in);
+
+  return NULL_TREE;
+}
+
+/* Handler for an SVML-style interface to
+   a library with vectorized intrinsics.  */
+
+static tree
+ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
+{
+  char name[20];
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* The SVML is suitable for unsafe math only.  */
+  if (!flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  switch (fn)
+    {
+    case BUILT_IN_EXP:
+    case BUILT_IN_LOG:
+    case BUILT_IN_LOG10:
+    case BUILT_IN_POW:
+    case BUILT_IN_TANH:
+    case BUILT_IN_TAN:
+    case BUILT_IN_ATAN:
+    case BUILT_IN_ATAN2:
+    case BUILT_IN_ATANH:
+    case BUILT_IN_CBRT:
+    case BUILT_IN_SINH:
+    case BUILT_IN_SIN:
+    case BUILT_IN_ASINH:
+    case BUILT_IN_ASIN:
+    case BUILT_IN_COSH:
+    case BUILT_IN_COS:
+    case BUILT_IN_ACOSH:
+    case BUILT_IN_ACOS:
+      if (el_mode != DFmode || n != 2)
+	return NULL_TREE;
+      break;
+
+    case BUILT_IN_EXPF:
+    case BUILT_IN_LOGF:
+    case BUILT_IN_LOG10F:
+    case BUILT_IN_POWF:
+    case BUILT_IN_TANHF:
+    case BUILT_IN_TANF:
+    case BUILT_IN_ATANF:
+    case BUILT_IN_ATAN2F:
+    case BUILT_IN_ATANHF:
+    case BUILT_IN_CBRTF:
+    case BUILT_IN_SINHF:
+    case BUILT_IN_SINF:
+    case BUILT_IN_ASINHF:
+    case BUILT_IN_ASINF:
+    case BUILT_IN_COSHF:
+    case BUILT_IN_COSF:
+    case BUILT_IN_ACOSHF:
+    case BUILT_IN_ACOSF:
+      if (el_mode != SFmode || n != 4)
+	return NULL_TREE;
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
+
+  if (fn == BUILT_IN_LOGF)
+    strcpy (name, "vmlsLn4");
+  else if (fn == BUILT_IN_LOG)
+    strcpy (name, "vmldLn2");
+  else if (n == 4)
+    {
+      sprintf (name, "vmls%s", bname+10);
+      name[strlen (name)-1] = '4';
+    }
+  else
+    sprintf (name, "vmld%s2", bname+10);
+
+  /* Convert to uppercase. */
+  name[4] &= ~0x20;
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
+       args;
+       args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Handler for an ACML-style interface to
+   a library with vectorized intrinsics.  */
+
+static tree
+ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
+{
+  char name[20] = "__vr.._";
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* The ACML is 64bits only and suitable for unsafe math only as
+     it does not correctly support parts of IEEE with the required
+     precision such as denormals.  */
+  if (!TARGET_64BIT
+      || !flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  switch (fn)
+    {
+    case BUILT_IN_SIN:
+    case BUILT_IN_COS:
+    case BUILT_IN_EXP:
+    case BUILT_IN_LOG:
+    case BUILT_IN_LOG2:
+    case BUILT_IN_LOG10:
+      name[4] = 'd';
+      name[5] = '2';
+      if (el_mode != DFmode
+	  || n != 2)
+	return NULL_TREE;
+      break;
+
+    case BUILT_IN_SINF:
+    case BUILT_IN_COSF:
+    case BUILT_IN_EXPF:
+    case BUILT_IN_POWF:
+    case BUILT_IN_LOGF:
+    case BUILT_IN_LOG2F:
+    case BUILT_IN_LOG10F:
+      name[4] = 's';
+      name[5] = '4';
+      if (el_mode != SFmode
+	  || n != 4)
+	return NULL_TREE;
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
+  sprintf (name + 7, "%s", bname+10);
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
+       args;
+       args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Returns a decl of a function that implements gather load with
+   memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
+   Return NULL_TREE if it is not available.  */
+
+static tree
+ix86_vectorize_builtin_gather (const_tree mem_vectype,
+			       const_tree index_type, int scale)
+{
+  bool si;
+  enum ix86_builtins code;
+
+  if (! TARGET_AVX2)
+    return NULL_TREE;
+
+  if ((TREE_CODE (index_type) != INTEGER_TYPE
+       && !POINTER_TYPE_P (index_type))
+      || (TYPE_MODE (index_type) != SImode
+	  && TYPE_MODE (index_type) != DImode))
+    return NULL_TREE;
+
+  if (TYPE_PRECISION (index_type) > POINTER_SIZE)
+    return NULL_TREE;
+
+  /* v*gather* insn sign extends index to pointer mode.  */
+  if (TYPE_PRECISION (index_type) < POINTER_SIZE
+      && TYPE_UNSIGNED (index_type))
+    return NULL_TREE;
+
+  if (scale <= 0
+      || scale > 8
+      || (scale & (scale - 1)) != 0)
+    return NULL_TREE;
+
+  si = TYPE_MODE (index_type) == SImode;
+  switch (TYPE_MODE (mem_vectype))
+    {
+    case V2DFmode:
+      code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
+      break;
+    case V4DFmode:
+      code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
+      break;
+    case V2DImode:
+      code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
+      break;
+    case V4DImode:
+      code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
+      break;
+    case V4SFmode:
+      code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
+      break;
+    case V8SFmode:
+      code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
+      break;
+    case V4SImode:
+      code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
+      break;
+    case V8SImode:
+      code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
+      break;
+    case V8DFmode:
+      if (TARGET_AVX512F)
+	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
+      else
+	return NULL_TREE;
+      break;
+    case V8DImode:
+      if (TARGET_AVX512F)
+	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
+      else
+	return NULL_TREE;
+      break;
+    case V16SFmode:
+      if (TARGET_AVX512F)
+	code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
+      else
+	return NULL_TREE;
+      break;
+    case V16SImode:
+      if (TARGET_AVX512F)
+	code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
+      else
+	return NULL_TREE;
+      break;
+    default:
+      return NULL_TREE;
+    }
+
+  return ix86_get_builtin (code);
+}
+
+/* Returns a code for a target-specific builtin that implements
+   reciprocal of the function, or NULL_TREE if not available.  */
+
+static tree
+ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
+			 bool sqrt ATTRIBUTE_UNUSED)
+{
+  if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
+	 && flag_finite_math_only && !flag_trapping_math
+	 && flag_unsafe_math_optimizations))
+    return NULL_TREE;
+
+  if (md_fn)
+    /* Machine dependent builtins.  */
+    switch (fn)
+      {
+	/* Vectorized version of sqrt to rsqrt conversion.  */
+      case IX86_BUILTIN_SQRTPS_NR:
+	return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
+
+      case IX86_BUILTIN_SQRTPS_NR256:
+	return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
+
+      default:
+	return NULL_TREE;
+      }
+  else
+    /* Normal builtins.  */
+    switch (fn)
+      {
+	/* Sqrt to rsqrt conversion.  */
+      case BUILT_IN_SQRTF:
+	return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
+
+      default:
+	return NULL_TREE;
+      }
+}
+
+/* Helper for avx_vpermilps256_operand et al.  This is also used by
+   the expansion functions to turn the parallel back into a mask.
+   The return value is 0 for no match and the imm8+1 for a match.  */
+
+int
+avx_vpermilp_parallel (rtx par, enum machine_mode mode)
+{
+  unsigned i, nelt = GET_MODE_NUNITS (mode);
+  unsigned mask = 0;
+  unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
+
+  if (XVECLEN (par, 0) != (int) nelt)
+    return 0;
+
+  /* Validate that all of the elements are constants, and not totally
+     out of range.  Copy the data into an integral array to make the
+     subsequent checks easier.  */
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx er = XVECEXP (par, 0, i);
+      unsigned HOST_WIDE_INT ei;
+
+      if (!CONST_INT_P (er))
+	return 0;
+      ei = INTVAL (er);
+      if (ei >= nelt)
+	return 0;
+      ipar[i] = ei;
+    }
+
+  switch (mode)
+    {
+    case V8DFmode:
+      /* In the 512-bit DFmode case, we can only move elements within
+         a 128-bit lane.  First fill the second part of the mask,
+	 then fallthru.  */
+      for (i = 4; i < 6; ++i)
+	{
+	  if (ipar[i] < 4 || ipar[i] >= 6)
+	    return 0;
+	  mask |= (ipar[i] - 4) << i;
+	}
+      for (i = 6; i < 8; ++i)
+	{
+	  if (ipar[i] < 6)
+	    return 0;
+	  mask |= (ipar[i] - 6) << i;
+	}
+      /* FALLTHRU */
+
+    case V4DFmode:
+      /* In the 256-bit DFmode case, we can only move elements within
+         a 128-bit lane.  */
+      for (i = 0; i < 2; ++i)
+	{
+	  if (ipar[i] >= 2)
+	    return 0;
+	  mask |= ipar[i] << i;
+	}
+      for (i = 2; i < 4; ++i)
+	{
+	  if (ipar[i] < 2)
+	    return 0;
+	  mask |= (ipar[i] - 2) << i;
+	}
+      break;
+
+    case V16SFmode:
+      /* In 512 bit SFmode case, permutation in the upper 256 bits
+	 must mirror the permutation in the lower 256-bits.  */
+      for (i = 0; i < 8; ++i)
+	if (ipar[i] + 8 != ipar[i + 8])
+	  return 0;
+      /* FALLTHRU */
+
+    case V8SFmode:
+      /* In 256 bit SFmode case, we have full freedom of
+         movement within the low 128-bit lane, but the high 128-bit
+         lane must mirror the exact same pattern.  */
+      for (i = 0; i < 4; ++i)
+	if (ipar[i] + 4 != ipar[i + 4])
+	  return 0;
+      nelt = 4;
+      /* FALLTHRU */
+
+    case V2DFmode:
+    case V4SFmode:
+      /* In the 128-bit case, we've full freedom in the placement of
+	 the elements from the source operand.  */
+      for (i = 0; i < nelt; ++i)
+	mask |= ipar[i] << (i * (nelt / 2));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Make sure success has a non-zero value by adding one.  */
+  return mask + 1;
+}
+
+/* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
+   the expansion functions to turn the parallel back into a mask.
+   The return value is 0 for no match and the imm8+1 for a match.  */
+
+int
+avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
+{
+  unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
+  unsigned mask = 0;
+  unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
+
+  if (XVECLEN (par, 0) != (int) nelt)
+    return 0;
+
+  /* Validate that all of the elements are constants, and not totally
+     out of range.  Copy the data into an integral array to make the
+     subsequent checks easier.  */
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx er = XVECEXP (par, 0, i);
+      unsigned HOST_WIDE_INT ei;
+
+      if (!CONST_INT_P (er))
+	return 0;
+      ei = INTVAL (er);
+      if (ei >= 2 * nelt)
+	return 0;
+      ipar[i] = ei;
+    }
+
+  /* Validate that the halves of the permute are halves.  */
+  for (i = 0; i < nelt2 - 1; ++i)
+    if (ipar[i] + 1 != ipar[i + 1])
+      return 0;
+  for (i = nelt2; i < nelt - 1; ++i)
+    if (ipar[i] + 1 != ipar[i + 1])
+      return 0;
+
+  /* Reconstruct the mask.  */
+  for (i = 0; i < 2; ++i)
+    {
+      unsigned e = ipar[i * nelt2];
+      if (e % nelt2)
+	return 0;
+      e /= nelt2;
+      mask |= e << (i * 4);
+    }
+
+  /* Make sure success has a non-zero value by adding one.  */
+  return mask + 1;
+}
+
+/* Return a register priority for hard reg REGNO.  */
+static int
+ix86_register_priority (int hard_regno)
+{
+  /* ebp and r13 as the base always wants a displacement, r12 as the
+     base always wants an index.  So discourage their usage in an
+     address.  */
+  if (hard_regno == R12_REG || hard_regno == R13_REG)
+    return 0;
+  if (hard_regno == BP_REG)
+    return 1;
+  /* New x86-64 int registers result in bigger code size.  Discourage
+     them.  */
+  if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
+    return 2;
+  /* New x86-64 SSE registers result in bigger code size.  Discourage
+     them.  */
+  if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
+    return 2;
+  /* Usage of AX register results in smaller code.  Prefer it.  */
+  if (hard_regno == 0)
+    return 4;
+  return 3;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.
+
+   Put float CONST_DOUBLE in the constant pool instead of fp regs.
+   QImode must go into class Q_REGS.
+   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
+   movdf to do mem-to-mem moves through integer regs.  */
+
+static reg_class_t
+ix86_preferred_reload_class (rtx x, reg_class_t regclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  /* We're only allowed to return a subclass of CLASS.  Many of the
+     following checks fail for NO_REGS, so eliminate that early.  */
+  if (regclass == NO_REGS)
+    return NO_REGS;
+
+  /* All classes can load zeros.  */
+  if (x == CONST0_RTX (mode))
+    return regclass;
+
+  /* Force constants into memory if we are loading a (nonzero) constant into
+     an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
+     instructions to load from a constant.  */
+  if (CONSTANT_P (x)
+      && (MAYBE_MMX_CLASS_P (regclass)
+	  || MAYBE_SSE_CLASS_P (regclass)
+	  || MAYBE_MASK_CLASS_P (regclass)))
+    return NO_REGS;
+
+  /* Prefer SSE regs only, if we can use them for math.  */
+  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
+    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+
+  /* Floating-point constants need more complex checks.  */
+  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
+    {
+      /* General regs can load everything.  */
+      if (reg_class_subset_p (regclass, GENERAL_REGS))
+        return regclass;
+
+      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
+	 zero above.  We only want to wind up preferring 80387 registers if
+	 we plan on doing computation with them.  */
+      if (TARGET_80387
+	  && standard_80387_constant_p (x) > 0)
+	{
+	  /* Limit class to non-sse.  */
+	  if (regclass == FLOAT_SSE_REGS)
+	    return FLOAT_REGS;
+	  if (regclass == FP_TOP_SSE_REGS)
+	    return FP_TOP_REG;
+	  if (regclass == FP_SECOND_SSE_REGS)
+	    return FP_SECOND_REG;
+	  if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
+	    return regclass;
+	}
+
+      return NO_REGS;
+    }
+
+  /* Generally when we see PLUS here, it's the function invariant
+     (plus soft-fp const_int).  Which can only be computed into general
+     regs.  */
+  if (GET_CODE (x) == PLUS)
+    return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
+
+  /* QImode constants are easy to load, but non-constant QImode data
+     must go into Q_REGS.  */
+  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
+    {
+      if (reg_class_subset_p (regclass, Q_REGS))
+	return regclass;
+      if (reg_class_subset_p (Q_REGS, regclass))
+	return Q_REGS;
+      return NO_REGS;
+    }
+
+  return regclass;
+}
+
+/* Discourage putting floating-point values in SSE registers unless
+   SSE math is being used, and likewise for the 387 registers.  */
+static reg_class_t
+ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  /* Restrict the output reload class to the register bank that we are doing
+     math on.  If we would like not to return a subset of CLASS, reject this
+     alternative: if reload cannot do this, it will still use its choice.  */
+  mode = GET_MODE (x);
+  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+    return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
+
+  if (X87_FLOAT_MODE_P (mode))
+    {
+      if (regclass == FP_TOP_SSE_REGS)
+	return FP_TOP_REG;
+      else if (regclass == FP_SECOND_SSE_REGS)
+	return FP_SECOND_REG;
+      else
+	return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
+    }
+
+  return regclass;
+}
+
+static reg_class_t
+ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+		       enum machine_mode mode, secondary_reload_info *sri)
+{
+  /* Double-word spills from general registers to non-offsettable memory
+     references (zero-extended addresses) require special handling.  */
+  if (TARGET_64BIT
+      && MEM_P (x)
+      && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      && INTEGER_CLASS_P (rclass)
+      && !offsettable_memref_p (x))
+    {
+      sri->icode = (in_p
+		    ? CODE_FOR_reload_noff_load
+		    : CODE_FOR_reload_noff_store);
+      /* Add the cost of moving address to a temporary.  */
+      sri->extra_cost = 1;
+
+      return NO_REGS;
+    }
+
+  /* QImode spills from non-QI registers require
+     intermediate register on 32bit targets.  */
+  if (mode == QImode
+      && (MAYBE_MASK_CLASS_P (rclass)
+	  || (!TARGET_64BIT && !in_p
+	      && INTEGER_CLASS_P (rclass)
+	      && MAYBE_NON_Q_CLASS_P (rclass))))
+    {
+      int regno;
+
+      if (REG_P (x))
+	regno = REGNO (x);
+      else
+	regno = -1;
+
+      if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
+	regno = true_regnum (x);
+
+      /* Return Q_REGS if the operand is in memory.  */
+      if (regno == -1)
+	return Q_REGS;
+    }
+
+  /* This condition handles corner case where an expression involving
+     pointers gets vectorized.  We're trying to use the address of a
+     stack slot as a vector initializer.
+
+     (set (reg:V2DI 74 [ vect_cst_.2 ])
+          (vec_duplicate:V2DI (reg/f:DI 20 frame)))
+
+     Eventually frame gets turned into sp+offset like this:
+
+     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
+	                               (const_int 392 [0x188]))))
+
+     That later gets turned into:
+
+     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
+	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
+
+     We'll have the following reload recorded:
+
+     Reload 0: reload_in (DI) =
+           (plus:DI (reg/f:DI 7 sp)
+            (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
+     reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+     SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
+     reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
+     reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+     reload_reg_rtx: (reg:V2DI 22 xmm1)
+
+     Which isn't going to work since SSE instructions can't handle scalar
+     additions.  Returning GENERAL_REGS forces the addition into integer
+     register and reload can handle subsequent reloads without problems.  */
+
+  if (in_p && GET_CODE (x) == PLUS
+      && SSE_CLASS_P (rclass)
+      && SCALAR_INT_MODE_P (mode))
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+ix86_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+      case AREG:
+      case DREG:
+      case CREG:
+      case BREG:
+      case AD_REGS:
+      case SIREG:
+      case DIREG:
+      case SSE_FIRST_REG:
+      case FP_TOP_REG:
+      case FP_SECOND_REG:
+	return true;
+
+      default:
+	break;
+    }
+
+  return false;
+}
+
+/* If we are copying between general and FP registers, we need a memory
+   location. The same is true for SSE and MMX registers.
+
+   To optimize register_move_cost performance, allow inline variant.
+
+   The macro can't work reliably when one of the CLASSES is class containing
+   registers from multiple units (SSE, MMX, integer).  We avoid this by never
+   combining those units in single alternative in the machine description.
+   Ensure that this constraint holds to avoid unexpected surprises.
+
+   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
+   enforce these sanity checks.  */
+
+static inline bool
+inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+				enum machine_mode mode, int strict)
+{
+  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+    return false;
+  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
+      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
+      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
+      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
+      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
+      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
+    {
+      gcc_assert (!strict || lra_in_progress);
+      return true;
+    }
+
+  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
+    return true;
+
+  /* ??? This is a lie.  We do have moves between mmx/general, and for
+     mmx/sse2.  But by saying we need secondary memory we discourage the
+     register allocator from using the mmx registers unless needed.  */
+  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
+    return true;
+
+  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+    {
+      /* SSE1 doesn't have any direct moves from other classes.  */
+      if (!TARGET_SSE2)
+	return true;
+
+      /* If the target says that inter-unit moves are more expensive
+	 than moving through memory, then don't generate them.  */
+      if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
+	  || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
+	return true;
+
+      /* Between SSE and general, we have moves no larger than word size.  */
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return true;
+    }
+
+  return false;
+}
+
+bool
+ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+			      enum machine_mode mode, int strict)
+{
+  return inline_secondary_memory_needed (class1, class2, mode, strict);
+}
+
+/* Implement the TARGET_CLASS_MAX_NREGS hook.
+
+   On the 80386, this is the size of MODE in words,
+   except in the FP regs, where a single reg is always enough.  */
+
+static unsigned char
+ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
+{
+  if (MAYBE_INTEGER_CLASS_P (rclass))
+    {
+      if (mode == XFmode)
+	return (TARGET_64BIT ? 2 : 3);
+      else if (mode == XCmode)
+	return (TARGET_64BIT ? 4 : 6);
+      else
+	return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+    }
+  else
+    {
+      if (COMPLEX_MODE_P (mode))
+	return 2;
+      else
+	return 1;
+    }
+}
+
+/* Return true if the registers in CLASS cannot represent the change from
+   modes FROM to TO.  */
+
+bool
+ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			       enum reg_class regclass)
+{
+  if (from == to)
+    return false;
+
+  /* x87 registers can't do subreg at all, as all values are reformatted
+     to extended precision.  */
+  if (MAYBE_FLOAT_CLASS_P (regclass))
+    return true;
+
+  if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
+    {
+      /* Vector registers do not support QI or HImode loads.  If we don't
+	 disallow a change to these modes, reload will assume it's ok to
+	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
+	 the vec_dupv4hi pattern.  */
+      if (GET_MODE_SIZE (from) < 4)
+	return true;
+
+      /* Vector registers do not support subreg with nonzero offsets, which
+	 are otherwise valid for integer registers.  Since we can't see
+	 whether we have a nonzero offset from here, prohibit all
+         nonparadoxical subregs changing size.  */
+      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return the cost of moving data of mode M between a
+   register and memory.  A value of 2 is the default; this cost is
+   relative to those in `REGISTER_MOVE_COST'.
+
+   This function is used extensively by register_move_cost that is used to
+   build tables at startup.  Make it inline in this case.
+   When IN is 2, return maximum of in and out move cost.
+
+   If moving between registers and memory is more expensive than
+   between two registers, you should define this macro to express the
+   relative cost.
+
+   Model also increased moving costs of QImode registers in non
+   Q_REGS classes.
+ */
+static inline int
+inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
+			 int in)
+{
+  int cost;
+  if (FLOAT_CLASS_P (regclass))
+    {
+      int index;
+      switch (mode)
+	{
+	  case SFmode:
+	    index = 0;
+	    break;
+	  case DFmode:
+	    index = 1;
+	    break;
+	  case XFmode:
+	    index = 2;
+	    break;
+	  default:
+	    return 100;
+	}
+      if (in == 2)
+        return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
+      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+    }
+  if (SSE_CLASS_P (regclass))
+    {
+      int index;
+      switch (GET_MODE_SIZE (mode))
+	{
+	  case 4:
+	    index = 0;
+	    break;
+	  case 8:
+	    index = 1;
+	    break;
+	  case 16:
+	    index = 2;
+	    break;
+	  default:
+	    return 100;
+	}
+      if (in == 2)
+        return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
+      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+    }
+  if (MMX_CLASS_P (regclass))
+    {
+      int index;
+      switch (GET_MODE_SIZE (mode))
+	{
+	  case 4:
+	    index = 0;
+	    break;
+	  case 8:
+	    index = 1;
+	    break;
+	  default:
+	    return 100;
+	}
+      if (in)
+        return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
+      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+    }
+  switch (GET_MODE_SIZE (mode))
+    {
+      case 1:
+	if (Q_CLASS_P (regclass) || TARGET_64BIT)
+	  {
+	    if (!in)
+	      return ix86_cost->int_store[0];
+	    if (TARGET_PARTIAL_REG_DEPENDENCY
+	        && optimize_function_for_speed_p (cfun))
+	      cost = ix86_cost->movzbl_load;
+	    else
+	      cost = ix86_cost->int_load[0];
+	    if (in == 2)
+	      return MAX (cost, ix86_cost->int_store[0]);
+	    return cost;
+	  }
+	else
+	  {
+	   if (in == 2)
+	     return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
+	   if (in)
+	     return ix86_cost->movzbl_load;
+	   else
+	     return ix86_cost->int_store[0] + 4;
+	  }
+	break;
+      case 2:
+	if (in == 2)
+	  return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
+	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+      default:
+	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
+	if (mode == TFmode)
+	  mode = XFmode;
+	if (in == 2)
+	  cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
+	else if (in)
+	  cost = ix86_cost->int_load[2];
+	else
+	  cost = ix86_cost->int_store[2];
+	return (cost * (((int) GET_MODE_SIZE (mode)
+		        + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+    }
+}
+
+static int
+ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
+		       bool in)
+{
+  return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
+}
+
+
+/* Return the cost of moving data from a register in class CLASS1 to
+   one in class CLASS2.
+
+   It is not required that the cost always equal 2 when FROM is the same as TO;
+   on some machines it is expensive to move between registers if they are not
+   general registers.  */
+
+static int
+ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
+			 reg_class_t class2_i)
+{
+  enum reg_class class1 = (enum reg_class) class1_i;
+  enum reg_class class2 = (enum reg_class) class2_i;
+
+  /* In case we require secondary memory, compute cost of the store followed
+     by load.  In order to avoid bad register allocation choices, we need
+     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
+
+  if (inline_secondary_memory_needed (class1, class2, mode, 0))
+    {
+      int cost = 1;
+
+      cost += inline_memory_move_cost (mode, class1, 2);
+      cost += inline_memory_move_cost (mode, class2, 2);
+
+      /* In case of copying from general_purpose_register we may emit multiple
+         stores followed by single load causing memory size mismatch stall.
+         Count this as arbitrarily high cost of 20.  */
+      if (targetm.class_max_nregs (class1, mode)
+	  > targetm.class_max_nregs (class2, mode))
+	cost += 20;
+
+      /* In the case of FP/MMX moves, the registers actually overlap, and we
+	 have to switch modes in order to treat them differently.  */
+      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
+          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
+	cost += 20;
+
+      return cost;
+    }
+
+  /* Moves between SSE/MMX and integer unit are expensive.  */
+  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
+      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+
+    /* ??? By keeping returned value relatively high, we limit the number
+       of moves between integer and MMX/SSE registers for all targets.
+       Additionally, high value prevents problem with x86_modes_tieable_p(),
+       where integer modes in MMX/SSE registers are not tieable
+       because of missing QImode and HImode moves to, from or between
+       MMX/SSE registers.  */
+    return MAX (8, ix86_cost->mmxsse_to_integer);
+
+  if (MAYBE_FLOAT_CLASS_P (class1))
+    return ix86_cost->fp_move;
+  if (MAYBE_SSE_CLASS_P (class1))
+    return ix86_cost->sse_move;
+  if (MAYBE_MMX_CLASS_P (class1))
+    return ix86_cost->mmx_move;
+  return 2;
+}
+
+/* Return TRUE if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+
+bool
+ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* Flags and only flags can only hold CCmode values.  */
+  if (CC_REGNO_P (regno))
+    return GET_MODE_CLASS (mode) == MODE_CC;
+  if (GET_MODE_CLASS (mode) == MODE_CC
+      || GET_MODE_CLASS (mode) == MODE_RANDOM
+      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+    return false;
+  if (STACK_REGNO_P (regno))
+    return VALID_FP_MODE_P (mode);
+  if (MASK_REGNO_P (regno))
+    return VALID_MASK_REG_MODE (mode);
+  if (SSE_REGNO_P (regno))
+    {
+      /* We implement the move patterns for all vector modes into and
+	 out of SSE registers, even when no operation instructions
+	 are available.  */
+
+      /* For AVX-512 we allow, regardless of regno:
+	  - XI mode
+	  - any of 512-bit wide vector mode
+	  - any scalar mode.  */
+      if (TARGET_AVX512F
+	  && (mode == XImode
+	      || VALID_AVX512F_REG_MODE (mode)
+	      || VALID_AVX512F_SCALAR_MODE (mode)))
+	return true;
+
+      /* xmm16-xmm31 are only available for AVX-512.  */
+      if (EXT_REX_SSE_REGNO_P (regno))
+	return false;
+
+      /* OImode and AVX modes are available only when AVX is enabled.  */
+      return ((TARGET_AVX
+	       && VALID_AVX256_REG_OR_OI_MODE (mode))
+	      || VALID_SSE_REG_MODE (mode)
+	      || VALID_SSE2_REG_MODE (mode)
+	      || VALID_MMX_REG_MODE (mode)
+	      || VALID_MMX_REG_MODE_3DNOW (mode));
+    }
+  if (MMX_REGNO_P (regno))
+    {
+      /* We implement the move patterns for 3DNOW modes even in MMX mode,
+	 so if the register is available at all, then we can move data of
+	 the given mode into or out of it.  */
+      return (VALID_MMX_REG_MODE (mode)
+	      || VALID_MMX_REG_MODE_3DNOW (mode));
+    }
+
+  if (mode == QImode)
+    {
+      /* Take care for QImode values - they can be in non-QI regs,
+	 but then they do cause partial register stalls.  */
+      if (ANY_QI_REGNO_P (regno))
+	return true;
+      if (!TARGET_PARTIAL_REG_STALL)
+	return true;
+      /* LRA checks if the hard register is OK for the given mode.
+	 QImode values can live in non-QI regs, so we allow all
+	 registers here.  */
+      if (lra_in_progress)
+       return true;
+      return !can_create_pseudo_p ();
+    }
+  /* We handle both integer and floats in the general purpose registers.  */
+  else if (VALID_INT_MODE_P (mode))
+    return true;
+  else if (VALID_FP_MODE_P (mode))
+    return true;
+  else if (VALID_DFP_MODE_P (mode))
+    return true;
+  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
+     on to use that value in smaller contexts, this can easily force a
+     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
+     supporting DImode, allow it.  */
+  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
+    return true;
+
+  return false;
+}
+
+/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
+   tieable integer mode.  */
+
+static bool
+ix86_tieable_integer_mode_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case HImode:
+    case SImode:
+      return true;
+
+    case QImode:
+      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
+
+    case DImode:
+      return TARGET_64BIT;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if MODE1 is accessible in a register that can hold MODE2
+   without copying.  That is, all register classes that can hold MODE2
+   can also hold MODE1.  */
+
+bool
+ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (mode1 == mode2)
+    return true;
+
+  if (ix86_tieable_integer_mode_p (mode1)
+      && ix86_tieable_integer_mode_p (mode2))
+    return true;
+
+  /* MODE2 being XFmode implies fp stack or general regs, which means we
+     can tie any smaller floating point modes to it.  Note that we do not
+     tie this with TFmode.  */
+  if (mode2 == XFmode)
+    return mode1 == SFmode || mode1 == DFmode;
+
+  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
+     that we can tie it with SFmode.  */
+  if (mode2 == DFmode)
+    return mode1 == SFmode;
+
+  /* If MODE2 is only appropriate for an SSE register, then tie with
+     any other mode acceptable to SSE registers.  */
+  if (GET_MODE_SIZE (mode2) == 32
+      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+    return (GET_MODE_SIZE (mode1) == 32
+	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+  if (GET_MODE_SIZE (mode2) == 16
+      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+    return (GET_MODE_SIZE (mode1) == 16
+	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+
+  /* If MODE2 is appropriate for an MMX register, then tie
+     with any other mode acceptable to MMX registers.  */
+  if (GET_MODE_SIZE (mode2) == 8
+      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
+    return (GET_MODE_SIZE (mode1) == 8
+	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
+
+  return false;
+}
+
+/* Return the cost of moving between two registers of mode MODE.  */
+
+static int
+ix86_set_reg_reg_cost (enum machine_mode mode)
+{
+  unsigned int units = UNITS_PER_WORD;
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    default:
+      break;
+
+    case MODE_CC:
+      units = GET_MODE_SIZE (CCmode);
+      break;
+
+    case MODE_FLOAT:
+      if ((TARGET_SSE && mode == TFmode)
+	  || (TARGET_80387 && mode == XFmode)
+	  || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
+	  || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
+	units = GET_MODE_SIZE (mode);
+      break;
+
+    case MODE_COMPLEX_FLOAT:
+      if ((TARGET_SSE && mode == TCmode)
+	  || (TARGET_80387 && mode == XCmode)
+	  || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
+	  || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
+	units = GET_MODE_SIZE (mode);
+      break;
+
+    case MODE_VECTOR_INT:
+    case MODE_VECTOR_FLOAT:
+      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
+	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+	units = GET_MODE_SIZE (mode);
+    }
+
+  /* Return the cost of moving between two registers of mode MODE,
+     assuming that the move will be in pieces of at most UNITS bytes.  */
+  return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
+		bool speed)
+{
+  rtx mask;
+  enum rtx_code code = (enum rtx_code) code_i;
+  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
+  enum machine_mode mode = GET_MODE (x);
+  const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
+
+  switch (code)
+    {
+    case SET:
+      if (register_operand (SET_DEST (x), VOIDmode)
+	  && reg_or_0_operand (SET_SRC (x), VOIDmode))
+	{
+	  *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
+	  return true;
+	}
+      return false;
+
+    case CONST_INT:
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
+	*total = 3;
+      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
+	*total = 2;
+      else if (flag_pic && SYMBOLIC_CONST (x)
+	       && (!TARGET_64BIT
+		   || (!GET_CODE (x) != LABEL_REF
+		       && (GET_CODE (x) != SYMBOL_REF
+		           || !SYMBOL_REF_LOCAL_P (x)))))
+	*total = 1;
+      else
+	*total = 0;
+      return true;
+
+    case CONST_DOUBLE:
+      if (mode == VOIDmode)
+	{
+	  *total = 0;
+	  return true;
+	}
+      switch (standard_80387_constant_p (x))
+	{
+	case 1: /* 0.0 */
+	  *total = 1;
+	  return true;
+	default: /* Other constants */
+	  *total = 2;
+	  return true;
+	case 0:
+	case -1:
+	  break;
+	}
+      if (SSE_FLOAT_MODE_P (mode))
+	{
+    case CONST_VECTOR:
+	  switch (standard_sse_constant_p (x))
+	    {
+	    case 0:
+	      break;
+	    case 1:  /* 0: xor eliminates false dependency */
+	      *total = 0;
+	      return true;
+	    default: /* -1: cmp contains false dependency */
+	      *total = 1;
+	      return true;
+	    }
+	}
+      /* Fall back to (MEM (SYMBOL_REF)), since that's where
+	 it'll probably end up.  Add a penalty for size.  */
+      *total = (COSTS_N_INSNS (1)
+		+ (flag_pic != 0 && !TARGET_64BIT)
+		+ (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
+      return true;
+
+    case ZERO_EXTEND:
+      /* The zero extensions is often completely free on x86_64, so make
+	 it as cheap as possible.  */
+      if (TARGET_64BIT && mode == DImode
+	  && GET_MODE (XEXP (x, 0)) == SImode)
+	*total = 1;
+      else if (TARGET_ZERO_EXTEND_WITH_AND)
+	*total = cost->add;
+      else
+	*total = cost->movzx;
+      return false;
+
+    case SIGN_EXTEND:
+      *total = cost->movsx;
+      return false;
+
+    case ASHIFT:
+      if (SCALAR_INT_MODE_P (mode)
+	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+	  && CONST_INT_P (XEXP (x, 1)))
+	{
+	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+	  if (value == 1)
+	    {
+	      *total = cost->add;
+	      return false;
+	    }
+	  if ((value == 2 || value == 3)
+	      && cost->lea <= cost->shift_const)
+	    {
+	      *total = cost->lea;
+	      return false;
+	    }
+	}
+      /* FALLTHRU */
+
+    case ROTATE:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  /* ??? Should be SSE vector operation cost.  */
+	  /* At least for published AMD latencies, this really is the same
+	     as the latency for a simple fpu operation like fabs.  */
+	  /* V*QImode is emulated with 1-11 insns.  */
+	  if (mode == V16QImode || mode == V32QImode)
+	    {
+	      int count = 11;
+	      if (TARGET_XOP && mode == V16QImode)
+		{
+		  /* For XOP we use vpshab, which requires a broadcast of the
+		     value to the variable shift insn.  For constants this
+		     means a V16Q const in mem; even when we can perform the
+		     shift with one insn set the cost to prefer paddb.  */
+		  if (CONSTANT_P (XEXP (x, 1)))
+		    {
+		      *total = (cost->fabs
+				+ rtx_cost (XEXP (x, 0), code, 0, speed)
+				+ (speed ? 2 : COSTS_N_BYTES (16)));
+		      return true;
+		    }
+		  count = 3;
+		}
+	      else if (TARGET_SSSE3)
+		count = 7;
+	      *total = cost->fabs * count;
+	    }
+	  else
+	    *total = cost->fabs;
+	}
+      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	{
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      if (INTVAL (XEXP (x, 1)) > 32)
+		*total = cost->shift_const + COSTS_N_INSNS (2);
+	      else
+		*total = cost->shift_const * 2;
+	    }
+	  else
+	    {
+	      if (GET_CODE (XEXP (x, 1)) == AND)
+		*total = cost->shift_var * 2;
+	      else
+		*total = cost->shift_var * 6 + COSTS_N_INSNS (2);
+	    }
+	}
+      else
+	{
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    *total = cost->shift_const;
+	  else if (GET_CODE (XEXP (x, 1)) == SUBREG
+		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
+	    {
+	      /* Return the cost after shift-and truncation.  */
+	      *total = cost->shift_var;
+	      return true;
+	    }
+	  else
+	    *total = cost->shift_var;
+	}
+      return false;
+
+    case FMA:
+      {
+	rtx sub;
+
+        gcc_assert (FLOAT_MODE_P (mode));
+        gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
+
+        /* ??? SSE scalar/vector cost should be used here.  */
+        /* ??? Bald assumption that fma has the same cost as fmul.  */
+        *total = cost->fmul;
+	*total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
+
+        /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
+	sub = XEXP (x, 0);
+	if (GET_CODE (sub) == NEG)
+	  sub = XEXP (sub, 0);
+	*total += rtx_cost (sub, FMA, 0, speed);
+
+	sub = XEXP (x, 2);
+	if (GET_CODE (sub) == NEG)
+	  sub = XEXP (sub, 0);
+	*total += rtx_cost (sub, FMA, 2, speed);
+	return true;
+      }
+
+    case MULT:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	{
+	  /* ??? SSE scalar cost should be used here.  */
+	  *total = cost->fmul;
+	  return false;
+	}
+      else if (X87_FLOAT_MODE_P (mode))
+	{
+	  *total = cost->fmul;
+	  return false;
+	}
+      else if (FLOAT_MODE_P (mode))
+	{
+	  /* ??? SSE vector cost should be used here.  */
+	  *total = cost->fmul;
+	  return false;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  /* V*QImode is emulated with 7-13 insns.  */
+	  if (mode == V16QImode || mode == V32QImode)
+	    {
+	      int extra = 11;
+	      if (TARGET_XOP && mode == V16QImode)
+		extra = 5;
+	      else if (TARGET_SSSE3)
+		extra = 6;
+	      *total = cost->fmul * 2 + cost->fabs * extra;
+	    }
+	  /* V*DImode is emulated with 5-8 insns.  */
+	  else if (mode == V2DImode || mode == V4DImode)
+	    {
+	      if (TARGET_XOP && mode == V2DImode)
+		*total = cost->fmul * 2 + cost->fabs * 3;
+	      else
+		*total = cost->fmul * 3 + cost->fabs * 5;
+	    }
+	  /* Without sse4.1, we don't have PMULLD; it's emulated with 7
+	     insns, including two PMULUDQ.  */
+	  else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
+	    *total = cost->fmul * 2 + cost->fabs * 5;
+	  else
+	    *total = cost->fmul;
+	  return false;
+	}
+      else
+	{
+	  rtx op0 = XEXP (x, 0);
+	  rtx op1 = XEXP (x, 1);
+	  int nbits;
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+	      for (nbits = 0; value != 0; value &= value - 1)
+	        nbits++;
+	    }
+	  else
+	    /* This is arbitrary.  */
+	    nbits = 7;
+
+	  /* Compute costs correctly for widening multiplication.  */
+	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
+	         == GET_MODE_SIZE (mode))
+	    {
+	      int is_mulwiden = 0;
+	      enum machine_mode inner_mode = GET_MODE (op0);
+
+	      if (GET_CODE (op0) == GET_CODE (op1))
+		is_mulwiden = 1, op1 = XEXP (op1, 0);
+	      else if (CONST_INT_P (op1))
+		{
+		  if (GET_CODE (op0) == SIGN_EXTEND)
+		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
+			          == INTVAL (op1);
+		  else
+		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
+	        }
+
+	      if (is_mulwiden)
+	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
+	    }
+
+  	  *total = (cost->mult_init[MODE_INDEX (mode)]
+		    + nbits * cost->mult_bit
+	            + rtx_cost (op0, outer_code, opno, speed)
+		    + rtx_cost (op1, outer_code, opno, speed));
+
+          return true;
+	}
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	/* ??? SSE cost should be used here.  */
+	*total = cost->fdiv;
+      else if (X87_FLOAT_MODE_P (mode))
+	*total = cost->fdiv;
+      else if (FLOAT_MODE_P (mode))
+	/* ??? SSE vector cost should be used here.  */
+	*total = cost->fdiv;
+      else
+	*total = cost->divide[MODE_INDEX (mode)];
+      return false;
+
+    case PLUS:
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+	      && CONSTANT_P (XEXP (x, 1)))
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
+	      if (val == 2 || val == 4 || val == 8)
+		{
+		  *total = cost->lea;
+		  *total += rtx_cost (XEXP (XEXP (x, 0), 1),
+				      outer_code, opno, speed);
+		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+				      outer_code, opno, speed);
+		  *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
+		  return true;
+		}
+	    }
+	  else if (GET_CODE (XEXP (x, 0)) == MULT
+		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+	      if (val == 2 || val == 4 || val == 8)
+		{
+		  *total = cost->lea;
+		  *total += rtx_cost (XEXP (XEXP (x, 0), 0),
+				      outer_code, opno, speed);
+		  *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
+		  return true;
+		}
+	    }
+	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
+	    {
+	      *total = cost->lea;
+	      *total += rtx_cost (XEXP (XEXP (x, 0), 0),
+				  outer_code, opno, speed);
+	      *total += rtx_cost (XEXP (XEXP (x, 0), 1),
+				  outer_code, opno, speed);
+	      *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
+	      return true;
+	    }
+	}
+      /* FALLTHRU */
+
+    case MINUS:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	{
+	  /* ??? SSE cost should be used here.  */
+	  *total = cost->fadd;
+	  return false;
+	}
+      else if (X87_FLOAT_MODE_P (mode))
+	{
+	  *total = cost->fadd;
+	  return false;
+	}
+      else if (FLOAT_MODE_P (mode))
+	{
+	  /* ??? SSE vector cost should be used here.  */
+	  *total = cost->fadd;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	{
+	  *total = (cost->add * 2
+		    + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
+		       << (GET_MODE (XEXP (x, 0)) != DImode))
+		    + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
+	               << (GET_MODE (XEXP (x, 1)) != DImode)));
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case NEG:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	{
+	  /* ??? SSE cost should be used here.  */
+	  *total = cost->fchs;
+	  return false;
+	}
+      else if (X87_FLOAT_MODE_P (mode))
+	{
+	  *total = cost->fchs;
+	  return false;
+	}
+      else if (FLOAT_MODE_P (mode))
+	{
+	  /* ??? SSE vector cost should be used here.  */
+	  *total = cost->fchs;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case NOT:
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  /* ??? Should be SSE vector operation cost.  */
+	  /* At least for published AMD latencies, this really is the same
+	     as the latency for a simple fpu operation like fabs.  */
+	  *total = cost->fabs;
+	}
+      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	*total = cost->add * 2;
+      else
+	*total = cost->add;
+      return false;
+
+    case COMPARE:
+      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+	  && XEXP (XEXP (x, 0), 1) == const1_rtx
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
+	  && XEXP (x, 1) == const0_rtx)
+	{
+	  /* This kind of construct is implemented using test[bwl].
+	     Treat it as if we had an AND.  */
+	  *total = (cost->add
+		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
+		    + rtx_cost (const1_rtx, outer_code, opno, speed));
+	  return true;
+	}
+      return false;
+
+    case FLOAT_EXTEND:
+      if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
+	*total = 0;
+      return false;
+
+    case ABS:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	/* ??? SSE cost should be used here.  */
+	*total = cost->fabs;
+      else if (X87_FLOAT_MODE_P (mode))
+	*total = cost->fabs;
+      else if (FLOAT_MODE_P (mode))
+	/* ??? SSE vector cost should be used here.  */
+	*total = cost->fabs;
+      return false;
+
+    case SQRT:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	/* ??? SSE cost should be used here.  */
+	*total = cost->fsqrt;
+      else if (X87_FLOAT_MODE_P (mode))
+	*total = cost->fsqrt;
+      else if (FLOAT_MODE_P (mode))
+	/* ??? SSE vector cost should be used here.  */
+	*total = cost->fsqrt;
+      return false;
+
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_TP)
+	*total = 0;
+      return false;
+
+    case VEC_SELECT:
+    case VEC_CONCAT:
+    case VEC_DUPLICATE:
+      /* ??? Assume all of these vector manipulation patterns are
+	 recognizable.  In which case they all pretty much have the
+	 same cost.  */
+     *total = cost->fabs;
+     return true;
+    case VEC_MERGE:
+      mask = XEXP (x, 2);
+      /* This is masked instruction, assume the same cost,
+	 as nonmasked variant.  */
+      if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
+	*total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
+      else
+	*total = cost->fabs;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+#if TARGET_MACHO
+
+static int current_machopic_label_num;
+
+/* Given a symbol name and its associated stub, write out the
+   definition of the stub.  */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+  unsigned int length;
+  char *binder_name, *symbol_name, lazy_ptr_name[32];
+  int label = ++current_machopic_label_num;
+
+  /* For 64-bit we shouldn't get here.  */
+  gcc_assert (!TARGET_64BIT);
+
+  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
+  symb = targetm.strip_name_encoding (symb);
+
+  length = strlen (stub);
+  binder_name = XALLOCAVEC (char, length + 32);
+  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
+
+  length = strlen (symb);
+  symbol_name = XALLOCAVEC (char, length + 32);
+  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+  sprintf (lazy_ptr_name, "L%d$lz", label);
+
+  if (MACHOPIC_ATT_STUB)
+    switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
+  else if (MACHOPIC_PURE)
+    switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
+  else
+    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
+
+  fprintf (file, "%s:\n", stub);
+  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+  if (MACHOPIC_ATT_STUB)
+    {
+      fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
+    }
+  else if (MACHOPIC_PURE)
+    {
+      /* PIC stub.  */
+      /* 25-byte PIC stub using "CALL get_pc_thunk".  */
+      rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+      output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
+      fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
+	       label, lazy_ptr_name, label);
+      fprintf (file, "\tjmp\t*%%ecx\n");
+    }
+  else
+    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
+
+  /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
+     it needs no stub-binding-helper.  */
+  if (MACHOPIC_ATT_STUB)
+    return;
+
+  fprintf (file, "%s:\n", binder_name);
+
+  if (MACHOPIC_PURE)
+    {
+      fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
+      fprintf (file, "\tpushl\t%%ecx\n");
+    }
+  else
+    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
+
+  fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
+
+  /* N.B. Keep the correspondence of these
+     'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
+     old-pic/new-pic/non-pic stubs; altering this will break
+     compatibility with existing dylibs.  */
+  if (MACHOPIC_PURE)
+    {
+      /* 25-byte PIC stub using "CALL get_pc_thunk".  */
+      switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
+    }
+  else
+    /* 16-byte -mdynamic-no-pic stub.  */
+    switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
+
+  fprintf (file, "%s:\n", lazy_ptr_name);
+  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+  fprintf (file, ASM_LONG "%s\n", binder_name);
+}
+#endif /* TARGET_MACHO */
+
+/* Order the registers for register allocator.  */
+
+void
+x86_order_regs_for_local_alloc (void)
+{
+   int pos = 0;
+   int i;
+
+   /* First allocate the local general purpose registers.  */
+   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+     if (GENERAL_REGNO_P (i) && call_used_regs[i])
+	reg_alloc_order [pos++] = i;
+
+   /* Global general purpose registers.  */
+   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
+	reg_alloc_order [pos++] = i;
+
+   /* x87 registers come first in case we are doing FP math
+      using them.  */
+   if (!TARGET_SSE_MATH)
+     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+       reg_alloc_order [pos++] = i;
+
+   /* SSE registers.  */
+   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+     reg_alloc_order [pos++] = i;
+   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+     reg_alloc_order [pos++] = i;
+
+   /* Extended REX SSE registers.  */
+   for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+     reg_alloc_order [pos++] = i;
+
+   /* Mask register.  */
+   for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+     reg_alloc_order [pos++] = i;
+
+   /* x87 registers.  */
+   if (TARGET_SSE_MATH)
+     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+       reg_alloc_order [pos++] = i;
+
+   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
+     reg_alloc_order [pos++] = i;
+
+   /* Initialize the rest of array as we do not allocate some registers
+      at all.  */
+   while (pos < FIRST_PSEUDO_REGISTER)
+     reg_alloc_order [pos++] = 0;
+}
+
+/* Handle a "callee_pop_aggregate_return" attribute; arguments as
+   in struct attribute_spec handler.  */
+static tree
+ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
+					      tree args,
+					      int flags ATTRIBUTE_UNUSED,
+					      bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  if (TARGET_64BIT)
+    {
+      warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  if (is_attribute_p ("callee_pop_aggregate_return", name))
+    {
+      tree cst;
+
+      cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != INTEGER_CST)
+	{
+	  warning (OPT_Wattributes,
+		   "%qE attribute requires an integer constant argument",
+		   name);
+	  *no_add_attrs = true;
+	}
+      else if (compare_tree_int (cst, 0) != 0
+	       && compare_tree_int (cst, 1) != 0)
+	{
+	  warning (OPT_Wattributes,
+		   "argument to %qE attribute is neither zero, nor one",
+		   name);
+	  *no_add_attrs = true;
+	}
+
+      return NULL_TREE;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "ms_abi" or "sysv" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+ix86_handle_abi_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  /* Can combine regparm with all attributes but fastcall.  */
+  if (is_attribute_p ("ms_abi", name))
+    {
+      if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("ms_abi and sysv_abi attributes are not compatible");
+	}
+
+      return NULL_TREE;
+    }
+  else if (is_attribute_p ("sysv_abi", name))
+    {
+      if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("ms_abi and sysv_abi attributes are not compatible");
+	}
+
+      return NULL_TREE;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+ix86_handle_struct_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  tree *type = NULL;
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) == TYPE_DECL)
+	type = &TREE_TYPE (*node);
+    }
+  else
+    type = node;
+
+  if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  else if ((is_attribute_p ("ms_struct", name)
+	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+	   || ((is_attribute_p ("gcc_struct", name)
+		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+    {
+      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
+               name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+ix86_handle_fndecl_attribute (tree *node, tree name,
+                              tree args ATTRIBUTE_UNUSED,
+                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+               name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+static bool
+ix86_ms_bitfield_layout_p (const_tree record_type)
+{
+  return ((TARGET_MS_BITFIELD_LAYOUT
+	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+          || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
+}
+
+/* Returns an expression indicating where the this parameter is
+   located on entry to the FUNCTION.  */
+
+static rtx
+x86_this_parameter (tree function)
+{
+  tree type = TREE_TYPE (function);
+  bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
+  int nregs;
+
+  if (TARGET_64BIT)
+    {
+      const int *parm_regs;
+
+      if (ix86_function_type_abi (type) == MS_ABI)
+        parm_regs = x86_64_ms_abi_int_parameter_registers;
+      else
+        parm_regs = x86_64_int_parameter_registers;
+      return gen_rtx_REG (Pmode, parm_regs[aggr]);
+    }
+
+  nregs = ix86_function_regparm (type, function);
+
+  if (nregs > 0 && !stdarg_p (type))
+    {
+      int regno;
+      unsigned int ccvt = ix86_get_callcvt (type);
+
+      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+	regno = aggr ? DX_REG : CX_REG;
+      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+        {
+	  regno = CX_REG;
+	  if (aggr)
+	    return gen_rtx_MEM (SImode,
+				plus_constant (Pmode, stack_pointer_rtx, 4));
+	}
+      else
+        {
+	  regno = AX_REG;
+	  if (aggr)
+	    {
+	      regno = DX_REG;
+	      if (nregs == 1)
+		return gen_rtx_MEM (SImode,
+				    plus_constant (Pmode,
+						   stack_pointer_rtx, 4));
+	    }
+	}
+      return gen_rtx_REG (SImode, regno);
+    }
+
+  return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
+					     aggr ? 8 : 4));
+}
+
+/* Determine whether x86_output_mi_thunk can succeed.  */
+
+static bool
+x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT vcall_offset, const_tree function)
+{
+  /* 64-bit can handle anything.  */
+  if (TARGET_64BIT)
+    return true;
+
+  /* For 32-bit, everything's fine if we have one free register.  */
+  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
+    return true;
+
+  /* Need a free register for vcall_offset.  */
+  if (vcall_offset)
+    return false;
+
+  /* Need a free register for GOT references.  */
+  if (flag_pic && !targetm.binds_local_p (function))
+    return false;
+
+  /* Otherwise ok.  */
+  return true;
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+x86_output_mi_thunk (FILE *file,
+		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset, tree function)
+{
+  rtx this_param = x86_this_parameter (function);
+  rtx this_reg, tmp, fnaddr;
+  unsigned int tmp_regno;
+
+  if (TARGET_64BIT)
+    tmp_regno = R10_REG;
+  else
+    {
+      unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
+      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+	tmp_regno = AX_REG;
+      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+	tmp_regno = DX_REG;
+      else
+	tmp_regno = CX_REG;
+    }
+
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
+     pull it in now and let DELTA benefit.  */
+  if (REG_P (this_param))
+    this_reg = this_param;
+  else if (vcall_offset)
+    {
+      /* Put the this parameter into %eax.  */
+      this_reg = gen_rtx_REG (Pmode, AX_REG);
+      emit_move_insn (this_reg, this_param);
+    }
+  else
+    this_reg = NULL_RTX;
+
+  /* Adjust the this parameter by a fixed constant.  */
+  if (delta)
+    {
+      rtx delta_rtx = GEN_INT (delta);
+      rtx delta_dst = this_reg ? this_reg : this_param;
+
+      if (TARGET_64BIT)
+	{
+	  if (!x86_64_general_operand (delta_rtx, Pmode))
+	    {
+	      tmp = gen_rtx_REG (Pmode, tmp_regno);
+	      emit_move_insn (tmp, delta_rtx);
+	      delta_rtx = tmp;
+	    }
+	}
+
+      ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
+    }
+
+  /* Adjust the this parameter by a value stored in the vtable.  */
+  if (vcall_offset)
+    {
+      rtx vcall_addr, vcall_mem, this_mem;
+
+      tmp = gen_rtx_REG (Pmode, tmp_regno);
+
+      this_mem = gen_rtx_MEM (ptr_mode, this_reg);
+      if (Pmode != ptr_mode)
+	this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
+      emit_move_insn (tmp, this_mem);
+
+      /* Adjust the this parameter.  */
+      vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
+      if (TARGET_64BIT
+	  && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
+	{
+	  rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
+	  emit_move_insn (tmp2, GEN_INT (vcall_offset));
+	  vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
+	}
+
+      vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
+      if (Pmode != ptr_mode)
+	emit_insn (gen_addsi_1_zext (this_reg,
+				     gen_rtx_REG (ptr_mode,
+						  REGNO (this_reg)),
+				     vcall_mem));
+      else
+	ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
+    }
+
+  /* If necessary, drop THIS back to its stack slot.  */
+  if (this_reg && this_reg != this_param)
+    emit_move_insn (this_param, this_reg);
+
+  fnaddr = XEXP (DECL_RTL (function), 0);
+  if (TARGET_64BIT)
+    {
+      if (!flag_pic || targetm.binds_local_p (function)
+	  || TARGET_PECOFF)
+	;
+      else
+	{
+	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
+	  tmp = gen_rtx_CONST (Pmode, tmp);
+	  fnaddr = gen_const_mem (Pmode, tmp);
+	}
+    }
+  else
+    {
+      if (!flag_pic || targetm.binds_local_p (function))
+	;
+#if TARGET_MACHO
+      else if (TARGET_MACHO)
+	{
+	  fnaddr = machopic_indirect_call_target (DECL_RTL (function));
+	  fnaddr = XEXP (fnaddr, 0);
+	}
+#endif /* TARGET_MACHO */
+      else
+	{
+	  tmp = gen_rtx_REG (Pmode, CX_REG);
+	  output_set_got (tmp, NULL_RTX);
+
+	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
+	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
+	  fnaddr = gen_const_mem (Pmode, fnaddr);
+	}
+    }
+
+  /* Our sibling call patterns do not allow memories, because we have no
+     predicate that can distinguish between frame and non-frame memory.
+     For our purposes here, we can get away with (ab)using a jump pattern,
+     because we're going to do no optimization.  */
+  if (MEM_P (fnaddr))
+    emit_jump_insn (gen_indirect_jump (fnaddr));
+  else
+    {
+      if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
+	fnaddr = legitimize_pic_address (fnaddr,
+					 gen_rtx_REG (Pmode, tmp_regno));
+
+      if (!sibcall_insn_operand (fnaddr, word_mode))
+	{
+	  tmp = gen_rtx_REG (word_mode, tmp_regno);
+	  if (GET_MODE (fnaddr) != word_mode)
+	    fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
+	  emit_move_insn (tmp, fnaddr);
+	  fnaddr = tmp;
+	}
+
+      tmp = gen_rtx_MEM (QImode, fnaddr);
+      tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
+      tmp = emit_call_insn (tmp);
+      SIBLING_CALL_P (tmp) = 1;
+    }
+  emit_barrier ();
+
+  /* Emit just enough of rest_of_compilation to get the insns emitted.
+     Note that use_thunk calls assemble_start_function et al.  */
+  tmp = get_insns ();
+  shorten_branches (tmp);
+  final_start_function (tmp, file, 1);
+  final (tmp, file, 1);
+  final_end_function ();
+}
+
+static void
+x86_file_start (void)
+{
+  default_file_start ();
+  if (TARGET_16BIT)
+    fputs ("\t.code16gcc\n", asm_out_file);
+#if TARGET_MACHO
+  darwin_file_start ();
+#endif
+  if (X86_FILE_START_VERSION_DIRECTIVE)
+    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
+  if (X86_FILE_START_FLTUSED)
+    fputs ("\t.global\t__fltused\n", asm_out_file);
+  if (ix86_asm_dialect == ASM_INTEL)
+    fputs ("\t.intel_syntax noprefix\n", asm_out_file);
+}
+
+int
+x86_field_alignment (tree field, int computed)
+{
+  enum machine_mode mode;
+  tree type = TREE_TYPE (field);
+
+  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
+    return computed;
+  mode = TYPE_MODE (strip_array_types (type));
+  if (mode == DFmode || mode == DCmode
+      || GET_MODE_CLASS (mode) == MODE_INT
+      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
+    return MIN (32, computed);
+  return computed;
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+void
+x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+  const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
+					 : MCOUNT_NAME);
+
+  if (TARGET_64BIT)
+    {
+#ifndef NO_PROFILE_COUNTERS
+      fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
+#endif
+
+      if (!TARGET_PECOFF && flag_pic)
+	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
+      else
+	fprintf (file, "\tcall\t%s\n", mcount_name);
+    }
+  else if (flag_pic)
+    {
+#ifndef NO_PROFILE_COUNTERS
+      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
+	       LPREFIX, labelno);
+#endif
+      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
+    }
+  else
+    {
+#ifndef NO_PROFILE_COUNTERS
+      fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
+	       LPREFIX, labelno);
+#endif
+      fprintf (file, "\tcall\t%s\n", mcount_name);
+    }
+}
+
+/* We don't have exact information about the insn sizes, but we may assume
+   quite safely that we are informed about all 1 byte insns and memory
+   address sizes.  This is enough to eliminate unnecessary padding in
+   99% of cases.  */
+
+static int
+min_insn_size (rtx insn)
+{
+  int l = 0, len;
+
+  if (!INSN_P (insn) || !active_insn_p (insn))
+    return 0;
+
+  /* Discard alignments we've emit and jump instructions.  */
+  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
+    return 0;
+
+  /* Important case - calls are always 5 bytes.
+     It is common to have many calls in the row.  */
+  if (CALL_P (insn)
+      && symbolic_reference_mentioned_p (PATTERN (insn))
+      && !SIBLING_CALL_P (insn))
+    return 5;
+  len = get_attr_length (insn);
+  if (len <= 1)
+    return 1;
+
+  /* For normal instructions we rely on get_attr_length being exact,
+     with a few exceptions.  */
+  if (!JUMP_P (insn))
+    {
+      enum attr_type type = get_attr_type (insn);
+
+      switch (type)
+	{
+	case TYPE_MULTI:
+	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	      || asm_noperands (PATTERN (insn)) >= 0)
+	    return 0;
+	  break;
+	case TYPE_OTHER:
+	case TYPE_FCMP:
+	  break;
+	default:
+	  /* Otherwise trust get_attr_length.  */
+	  return len;
+	}
+
+      l = get_attr_length_address (insn);
+      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+	l = 4;
+    }
+  if (l)
+    return 1+l;
+  else
+    return 2;
+}
+
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+
+/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
+   window.  */
+
+static void
+ix86_avoid_jump_mispredicts (void)
+{
+  rtx insn, start = get_insns ();
+  int nbytes = 0, njumps = 0;
+  int isjump = 0;
+
+  /* Look for all minimal intervals of instructions containing 4 jumps.
+     The intervals are bounded by START and INSN.  NBYTES is the total
+     size of instructions in the interval including INSN and not including
+     START.  When the NBYTES is smaller than 16 bytes, it is possible
+     that the end of START and INSN ends up in the same 16byte page.
+
+     The smallest offset in the page INSN can start is the case where START
+     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
+     We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
+
+     Don't consider asm goto as jump, while it can contain a jump, it doesn't
+     have to, control transfer to label(s) can be performed through other
+     means, and also we estimate minimum length of all asm stmts as 0.  */
+  for (insn = start; insn; insn = NEXT_INSN (insn))
+    {
+      int min_size;
+
+      if (LABEL_P (insn))
+	{
+	  int align = label_to_alignment (insn);
+	  int max_skip = label_to_max_skip (insn);
+
+	  if (max_skip > 15)
+	    max_skip = 15;
+	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
+	     already in the current 16 byte page, because otherwise
+	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
+	     bytes to reach 16 byte boundary.  */
+	  if (align <= 0
+	      || (align <= 3 && max_skip != (1 << align) - 1))
+	    max_skip = 0;
+	  if (dump_file)
+	    fprintf (dump_file, "Label %i with max_skip %i\n",
+		     INSN_UID (insn), max_skip);
+	  if (max_skip)
+	    {
+	      while (nbytes + max_skip >= 16)
+		{
+		  start = NEXT_INSN (start);
+		  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
+		      || CALL_P (start))
+		    njumps--, isjump = 1;
+		  else
+		    isjump = 0;
+		  nbytes -= min_insn_size (start);
+		}
+	    }
+	  continue;
+	}
+
+      min_size = min_insn_size (insn);
+      nbytes += min_size;
+      if (dump_file)
+	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
+		 INSN_UID (insn), min_size);
+      if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
+	  || CALL_P (insn))
+	njumps++;
+      else
+	continue;
+
+      while (njumps > 3)
+	{
+	  start = NEXT_INSN (start);
+	  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
+	      || CALL_P (start))
+	    njumps--, isjump = 1;
+	  else
+	    isjump = 0;
+	  nbytes -= min_insn_size (start);
+	}
+      gcc_assert (njumps >= 0);
+      if (dump_file)
+        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
+		 INSN_UID (start), INSN_UID (insn), nbytes);
+
+      if (njumps == 3 && isjump && nbytes < 16)
+	{
+	  int padsize = 15 - nbytes + min_insn_size (insn);
+
+	  if (dump_file)
+	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
+		     INSN_UID (insn), padsize);
+          emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
+	}
+    }
+}
+#endif
+
+/* AMD Athlon works faster
+   when RET is not destination of conditional jump or directly preceded
+   by other jump instruction.  We avoid the penalty by inserting NOP just
+   before the RET instructions in such cases.  */
+static void
+ix86_pad_returns (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      basic_block bb = e->src;
+      rtx ret = BB_END (bb);
+      rtx prev;
+      bool replace = false;
+
+      if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
+	  || optimize_bb_for_size_p (bb))
+	continue;
+      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+	if (active_insn_p (prev) || LABEL_P (prev))
+	  break;
+      if (prev && LABEL_P (prev))
+	{
+	  edge e;
+	  edge_iterator ei;
+
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
+		&& !(e->flags & EDGE_FALLTHRU))
+	      {
+		replace = true;
+		break;
+	      }
+	}
+      if (!replace)
+	{
+	  prev = prev_active_insn (ret);
+	  if (prev
+	      && ((JUMP_P (prev) && any_condjump_p (prev))
+		  || CALL_P (prev)))
+	    replace = true;
+	  /* Empty functions get branch mispredict even when
+	     the jump destination is not visible to us.  */
+	  if (!prev && !optimize_function_for_size_p (cfun))
+	    replace = true;
+	}
+      if (replace)
+	{
+	  emit_jump_insn_before (gen_simple_return_internal_long (), ret);
+	  delete_insn (ret);
+	}
+    }
+}
+
+/* Count the minimum number of instructions in BB.  Return 4 if the
+   number of instructions >= 4.  */
+
+static int
+ix86_count_insn_bb (basic_block bb)
+{
+  rtx insn;
+  int insn_count = 0;
+
+  /* Count number of instructions in this block.  Return 4 if the number
+     of instructions >= 4.  */
+  FOR_BB_INSNS (bb, insn)
+    {
+      /* Only happen in exit blocks.  */
+      if (JUMP_P (insn)
+	  && ANY_RETURN_P (PATTERN (insn)))
+	break;
+
+      if (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER)
+	{
+	  insn_count++;
+	  if (insn_count >= 4)
+	    return insn_count;
+	}
+    }
+
+  return insn_count;
+}
+
+
+/* Count the minimum number of instructions in code path in BB.
+   Return 4 if the number of instructions >= 4.  */
+
+static int
+ix86_count_insn (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  int min_prev_count;
+
+  /* Only bother counting instructions along paths with no
+     more than 2 basic blocks between entry and exit.  Given
+     that BB has an edge to exit, determine if a predecessor
+     of BB has an edge from entry.  If so, compute the number
+     of instructions in the predecessor block.  If there
+     happen to be multiple such blocks, compute the minimum.  */
+  min_prev_count = 4;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      edge prev_e;
+      edge_iterator prev_ei;
+
+      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
+	{
+	  min_prev_count = 0;
+	  break;
+	}
+      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
+	{
+	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
+	    {
+	      int count = ix86_count_insn_bb (e->src);
+	      if (count < min_prev_count)
+		min_prev_count = count;
+	      break;
+	    }
+	}
+    }
+
+  if (min_prev_count < 4)
+    min_prev_count += ix86_count_insn_bb (bb);
+
+  return min_prev_count;
+}
+
+/* Pad short function to 4 instructions.   */
+
+static void
+ix86_pad_short_function (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      rtx ret = BB_END (e->src);
+      if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
+	{
+	  int insn_count = ix86_count_insn (e->src);
+
+	  /* Pad short function.  */
+	  if (insn_count < 4)
+	    {
+	      rtx insn = ret;
+
+	      /* Find epilogue.  */
+	      while (insn
+		     && (!NOTE_P (insn)
+			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
+		insn = PREV_INSN (insn);
+
+	      if (!insn)
+		insn = ret;
+
+	      /* Two NOPs count as one instruction.  */
+	      insn_count = 2 * (4 - insn_count);
+	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
+	    }
+	}
+    }
+}
+
+/* Fix up a Windows system unwinder issue.  If an EH region falls through into
+   the epilogue, the Windows system unwinder will apply epilogue logic and
+   produce incorrect offsets.  This can be avoided by adding a nop between
+   the last insn that can throw and the first insn of the epilogue.  */
+
+static void
+ix86_seh_fixup_eh_fallthru (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      rtx insn, next;
+
+      /* Find the beginning of the epilogue.  */
+      for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
+	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
+	  break;
+      if (insn == NULL)
+	continue;
+
+      /* We only care about preceding insns that can throw.  */
+      insn = prev_active_insn (insn);
+      if (insn == NULL || !can_throw_internal (insn))
+	continue;
+
+      /* Do not separate calls from their debug information.  */
+      for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
+	if (NOTE_P (next)
+            && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
+                || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
+	  insn = next;
+	else
+	  break;
+
+      emit_insn_after (gen_nops (const1_rtx), insn);
+    }
+}
+
+/* Implement machine specific optimizations.  We implement padding of returns
+   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
+static void
+ix86_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  if (TARGET_SEH && current_function_has_exception_handlers ())
+    ix86_seh_fixup_eh_fallthru ();
+
+  if (optimize && optimize_function_for_speed_p (cfun))
+    {
+      if (TARGET_PAD_SHORT_FUNCTION)
+	ix86_pad_short_function ();
+      else if (TARGET_PAD_RETURNS)
+	ix86_pad_returns ();
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+      if (TARGET_FOUR_JUMP_LIMIT)
+	ix86_avoid_jump_mispredicts ();
+#endif
+    }
+}
+
+/* Return nonzero when QImode register that must be represented via REX prefix
+   is used.  */
+bool
+x86_extended_QIreg_mentioned_p (rtx insn)
+{
+  int i;
+  extract_insn_cached (insn);
+  for (i = 0; i < recog_data.n_operands; i++)
+    if (GENERAL_REG_P (recog_data.operand[i])
+	&& !QI_REGNO_P (REGNO (recog_data.operand[i])))
+       return true;
+  return false;
+}
+
+/* Return nonzero when P points to register encoded via REX prefix.
+   Called via for_each_rtx.  */
+static int
+extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
+{
+   unsigned int regno;
+   if (!REG_P (*p))
+     return 0;
+   regno = REGNO (*p);
+   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
+}
+
+/* Return true when INSN mentions register that must be encoded using REX
+   prefix.  */
+bool
+x86_extended_reg_mentioned_p (rtx insn)
+{
+  return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
+		       extended_reg_mentioned_1, NULL);
+}
+
+/* If profitable, negate (without causing overflow) integer constant
+   of mode MODE at location LOC.  Return true in this case.  */
+bool
+x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+
+  if (!CONST_INT_P (*loc))
+    return false;
+
+  switch (mode)
+    {
+    case DImode:
+      /* DImode x86_64 constants must fit in 32 bits.  */
+      gcc_assert (x86_64_immediate_operand (*loc, mode));
+
+      mode = SImode;
+      break;
+
+    case SImode:
+    case HImode:
+    case QImode:
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Avoid overflows.  */
+  if (mode_signbit_p (mode, *loc))
+    return false;
+
+  val = INTVAL (*loc);
+
+  /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
+     Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+  if ((val < 0 && val != -128)
+      || val == 128)
+    {
+      *loc = GEN_INT (-val);
+      return true;
+    }
+
+  return false;
+}
+
+/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.  */
+
+void
+x86_emit_floatuns (rtx operands[2])
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+  enum machine_mode mode, inmode;
+
+  inmode = GET_MODE (operands[1]);
+  gcc_assert (inmode == SImode || inmode == DImode);
+
+  out = operands[0];
+  in = force_reg (inmode, operands[1]);
+  mode = GET_MODE (out);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
+
+  expand_float (out, in, 0);
+
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
+			    1, OPTAB_DIRECT);
+  i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
+			    1, OPTAB_DIRECT);
+  i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
+
+  expand_float (f0, i0, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* AVX512F does support 64-byte integer vector operations,
+   thus the longest vector we are faced with is V64QImode.  */
+#define MAX_VECT_LEN	64
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool one_operand_p;
+  bool testing_p;
+};
+
+static bool canonicalize_perm (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+
+/* Get a vector mode of the same size as the original but with elements
+   twice as wide.  This is only guaranteed to apply to integral vectors.  */
+
+static inline enum machine_mode
+get_mode_wider_vector (enum machine_mode o)
+{
+  /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
+  enum machine_mode n = GET_MODE_WIDER_MODE (o);
+  gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
+  gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
+  return n;
+}
+
+/* A subroutine of ix86_expand_vector_init_duplicate.  Tries to
+   fill target with val via vec_duplicate.  */
+
+static bool
+ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
+{
+  bool ok;
+  rtx insn, dup;
+
+  /* First attempt to recognize VAL as-is.  */
+  dup = gen_rtx_VEC_DUPLICATE (mode, val);
+  insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
+  if (recog_memoized (insn) < 0)
+    {
+      rtx seq;
+      /* If that fails, force VAL into a register.  */
+
+      start_sequence ();
+      XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
+      seq = get_insns ();
+      end_sequence ();
+      if (seq)
+	emit_insn_before (seq, insn);
+
+      ok = recog_memoized (insn) >= 0;
+      gcc_assert (ok);
+    }
+  return true;
+}
+
+/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
+   with all elements equal to VAR.  Return true if successful.  */
+
+static bool
+ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
+				   rtx target, rtx val)
+{
+  bool ok;
+
+  switch (mode)
+    {
+    case V2SImode:
+    case V2SFmode:
+      if (!mmx_ok)
+	return false;
+      /* FALLTHRU */
+
+    case V4DFmode:
+    case V4DImode:
+    case V8SFmode:
+    case V8SImode:
+    case V2DFmode:
+    case V2DImode:
+    case V4SFmode:
+    case V4SImode:
+    case V16SImode:
+    case V8DImode:
+    case V16SFmode:
+    case V8DFmode:
+      return ix86_vector_duplicate_value (mode, target, val);
+
+    case V4HImode:
+      if (!mmx_ok)
+	return false;
+      if (TARGET_SSE || TARGET_3DNOW_A)
+	{
+	  rtx x;
+
+	  val = gen_lowpart (SImode, val);
+	  x = gen_rtx_TRUNCATE (HImode, val);
+	  x = gen_rtx_VEC_DUPLICATE (mode, x);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
+	  return true;
+	}
+      goto widen;
+
+    case V8QImode:
+      if (!mmx_ok)
+	return false;
+      goto widen;
+
+    case V8HImode:
+      if (TARGET_SSE2)
+	{
+	  struct expand_vec_perm_d dperm;
+	  rtx tmp1, tmp2;
+
+	permute:
+	  memset (&dperm, 0, sizeof (dperm));
+	  dperm.target = target;
+	  dperm.vmode = mode;
+	  dperm.nelt = GET_MODE_NUNITS (mode);
+	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
+	  dperm.one_operand_p = true;
+
+	  /* Extend to SImode using a paradoxical SUBREG.  */
+	  tmp1 = gen_reg_rtx (SImode);
+	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
+
+	  /* Insert the SImode value as low element of a V4SImode vector. */
+	  tmp2 = gen_reg_rtx (V4SImode);
+	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
+	  emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
+
+	  ok = (expand_vec_perm_1 (&dperm)
+		|| expand_vec_perm_broadcast_1 (&dperm));
+	  gcc_assert (ok);
+	  return ok;
+	}
+      goto widen;
+
+    case V16QImode:
+      if (TARGET_SSE2)
+	goto permute;
+      goto widen;
+
+    widen:
+      /* Replicate the value once into the next wider mode and recurse.  */
+      {
+	enum machine_mode smode, wsmode, wvmode;
+	rtx x;
+
+	smode = GET_MODE_INNER (mode);
+	wvmode = get_mode_wider_vector (mode);
+	wsmode = GET_MODE_INNER (wvmode);
+
+	val = convert_modes (wsmode, smode, val, true);
+	x = expand_simple_binop (wsmode, ASHIFT, val,
+				 GEN_INT (GET_MODE_BITSIZE (smode)),
+				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+	x = gen_reg_rtx (wvmode);
+	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
+	gcc_assert (ok);
+	emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
+	return ok;
+      }
+
+    case V16HImode:
+    case V32QImode:
+      {
+	enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+	rtx x = gen_reg_rtx (hvmode);
+
+	ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+	gcc_assert (ok);
+
+	x = gen_rtx_VEC_CONCAT (mode, x, x);
+	emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      }
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
+   whose ONE_VAR element is VAR, and other elements are zero.  Return true
+   if successful.  */
+
+static bool
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+				     rtx target, rtx var, int one_var)
+{
+  enum machine_mode vsimode;
+  rtx new_target;
+  rtx x, tmp;
+  bool use_vector_set = false;
+
+  switch (mode)
+    {
+    case V2DImode:
+      /* For SSE4.1, we normally use vector set.  But if the second
+	 element is zero and inter-unit moves are OK, we use movq
+	 instead.  */
+      use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
+			&& !(TARGET_INTER_UNIT_MOVES_TO_VEC
+			     && one_var == 0));
+      break;
+    case V16QImode:
+    case V4SImode:
+    case V4SFmode:
+      use_vector_set = TARGET_SSE4_1;
+      break;
+    case V8HImode:
+      use_vector_set = TARGET_SSE2;
+      break;
+    case V4HImode:
+      use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
+      break;
+    case V32QImode:
+    case V16HImode:
+    case V8SImode:
+    case V8SFmode:
+    case V4DFmode:
+      use_vector_set = TARGET_AVX;
+      break;
+    case V4DImode:
+      /* Use ix86_expand_vector_set in 64bit mode only.  */
+      use_vector_set = TARGET_AVX && TARGET_64BIT;
+      break;
+    default:
+      break;
+    }
+
+  if (use_vector_set)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
+      var = force_reg (GET_MODE_INNER (mode), var);
+      ix86_expand_vector_set (mmx_ok, target, var, one_var);
+      return true;
+    }
+
+  switch (mode)
+    {
+    case V2SFmode:
+    case V2SImode:
+      if (!mmx_ok)
+	return false;
+      /* FALLTHRU */
+
+    case V2DFmode:
+    case V2DImode:
+      if (one_var != 0)
+	return false;
+      var = force_reg (GET_MODE_INNER (mode), var);
+      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
+      emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      return true;
+
+    case V4SFmode:
+    case V4SImode:
+      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+	new_target = gen_reg_rtx (mode);
+      else
+	new_target = target;
+      var = force_reg (GET_MODE_INNER (mode), var);
+      x = gen_rtx_VEC_DUPLICATE (mode, var);
+      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+      if (one_var != 0)
+	{
+	  /* We need to shuffle the value to the correct position, so
+	     create a new pseudo to store the intermediate result.  */
+
+	  /* With SSE2, we can use the integer shuffle insns.  */
+	  if (mode != V4SFmode && TARGET_SSE2)
+	    {
+	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+					    const1_rtx,
+					    GEN_INT (one_var == 1 ? 0 : 1),
+					    GEN_INT (one_var == 2 ? 0 : 1),
+					    GEN_INT (one_var == 3 ? 0 : 1)));
+	      if (target != new_target)
+		emit_move_insn (target, new_target);
+	      return true;
+	    }
+
+	  /* Otherwise convert the intermediate result to V4SFmode and
+	     use the SSE1 shuffle instructions.  */
+	  if (mode != V4SFmode)
+	    {
+	      tmp = gen_reg_rtx (V4SFmode);
+	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+	    }
+	  else
+	    tmp = new_target;
+
+	  emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
+				       const1_rtx,
+				       GEN_INT (one_var == 1 ? 0 : 1),
+				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
+				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+	  if (mode != V4SFmode)
+	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+	  else if (tmp != target)
+	    emit_move_insn (target, tmp);
+	}
+      else if (target != new_target)
+	emit_move_insn (target, new_target);
+      return true;
+
+    case V8HImode:
+    case V16QImode:
+      vsimode = V4SImode;
+      goto widen;
+    case V4HImode:
+    case V8QImode:
+      if (!mmx_ok)
+	return false;
+      vsimode = V2SImode;
+      goto widen;
+    widen:
+      if (one_var != 0)
+	return false;
+
+      /* Zero extend the variable element to SImode and recurse.  */
+      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
+
+      x = gen_reg_rtx (vsimode);
+      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+						var, one_var))
+	gcc_unreachable ();
+
+      emit_move_insn (target, gen_lowpart (mode, x));
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
+   consisting of the values in VALS.  It is known that all elements
+   except ONE_VAR are constants.  Return true if successful.  */
+
+static bool
+ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
+				 rtx target, rtx vals, int one_var)
+{
+  rtx var = XVECEXP (vals, 0, one_var);
+  enum machine_mode wmode;
+  rtx const_vec, x;
+
+  const_vec = copy_rtx (vals);
+  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
+  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+
+  switch (mode)
+    {
+    case V2DFmode:
+    case V2DImode:
+    case V2SFmode:
+    case V2SImode:
+      /* For the two element vectors, it's just as easy to use
+	 the general case.  */
+      return false;
+
+    case V4DImode:
+      /* Use ix86_expand_vector_set in 64bit mode only.  */
+      if (!TARGET_64BIT)
+	return false;
+    case V4DFmode:
+    case V8SFmode:
+    case V8SImode:
+    case V16HImode:
+    case V32QImode:
+    case V4SFmode:
+    case V4SImode:
+    case V8HImode:
+    case V4HImode:
+      break;
+
+    case V16QImode:
+      if (TARGET_SSE4_1)
+	break;
+      wmode = V8HImode;
+      goto widen;
+    case V8QImode:
+      wmode = V4HImode;
+      goto widen;
+    widen:
+      /* There's no way to set one QImode entry easily.  Combine
+	 the variable value with its adjacent constant value, and
+	 promote to an HImode set.  */
+      x = XVECEXP (vals, 0, one_var ^ 1);
+      if (one_var & 1)
+	{
+	  var = convert_modes (HImode, QImode, var, true);
+	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	  x = GEN_INT (INTVAL (x) & 0xff);
+	}
+      else
+	{
+	  var = convert_modes (HImode, QImode, var, true);
+	  x = gen_int_mode (INTVAL (x) << 8, HImode);
+	}
+      if (x != const0_rtx)
+	var = expand_simple_binop (HImode, IOR, var, x, var,
+				   1, OPTAB_LIB_WIDEN);
+
+      x = gen_reg_rtx (wmode);
+      emit_move_insn (x, gen_lowpart (wmode, const_vec));
+      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
+
+      emit_move_insn (target, gen_lowpart (mode, x));
+      return true;
+
+    default:
+      return false;
+    }
+
+  emit_move_insn (target, const_vec);
+  ix86_expand_vector_set (mmx_ok, target, var, one_var);
+  return true;
+}
+
+/* A subroutine of ix86_expand_vector_init_general.  Use vector
+   concatenate to handle the most general case: all values variable,
+   and none identical.  */
+
+static void
+ix86_expand_vector_init_concat (enum machine_mode mode,
+				rtx target, rtx *ops, int n)
+{
+  enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
+  rtx first[16], second[8], third[4];
+  rtvec v;
+  int i, j;
+
+  switch (n)
+    {
+    case 2:
+      switch (mode)
+	{
+	case V16SImode:
+	  cmode = V8SImode;
+	  break;
+	case V16SFmode:
+	  cmode = V8SFmode;
+	  break;
+	case V8DImode:
+	  cmode = V4DImode;
+	  break;
+	case V8DFmode:
+	  cmode = V4DFmode;
+	  break;
+	case V8SImode:
+	  cmode = V4SImode;
+	  break;
+	case V8SFmode:
+	  cmode = V4SFmode;
+	  break;
+	case V4DImode:
+	  cmode = V2DImode;
+	  break;
+	case V4DFmode:
+	  cmode = V2DFmode;
+	  break;
+	case V4SImode:
+	  cmode = V2SImode;
+	  break;
+	case V4SFmode:
+	  cmode = V2SFmode;
+	  break;
+	case V2DImode:
+	  cmode = DImode;
+	  break;
+	case V2SImode:
+	  cmode = SImode;
+	  break;
+	case V2DFmode:
+	  cmode = DFmode;
+	  break;
+	case V2SFmode:
+	  cmode = SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (!register_operand (ops[1], cmode))
+	ops[1] = force_reg (cmode, ops[1]);
+      if (!register_operand (ops[0], cmode))
+	ops[0] = force_reg (cmode, ops[0]);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_CONCAT (mode, ops[0],
+						  ops[1])));
+      break;
+
+    case 4:
+      switch (mode)
+	{
+	case V4DImode:
+	  cmode = V2DImode;
+	  break;
+	case V4DFmode:
+	  cmode = V2DFmode;
+	  break;
+	case V4SImode:
+	  cmode = V2SImode;
+	  break;
+	case V4SFmode:
+	  cmode = V2SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      goto half;
+
+    case 8:
+      switch (mode)
+	{
+	case V8DImode:
+	  cmode = V2DImode;
+	  hmode = V4DImode;
+	  break;
+	case V8DFmode:
+	  cmode = V2DFmode;
+	  hmode = V4DFmode;
+	  break;
+	case V8SImode:
+	  cmode = V2SImode;
+	  hmode = V4SImode;
+	  break;
+	case V8SFmode:
+	  cmode = V2SFmode;
+	  hmode = V4SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      goto half;
+
+    case 16:
+      switch (mode)
+	{
+	case V16SImode:
+	  cmode = V2SImode;
+	  hmode = V4SImode;
+	  gmode = V8SImode;
+	  break;
+	case V16SFmode:
+	  cmode = V2SFmode;
+	  hmode = V4SFmode;
+	  gmode = V8SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      goto half;
+
+half:
+      /* FIXME: We process inputs backward to help RA.  PR 36222.  */
+      i = n - 1;
+      j = (n >> 1) - 1;
+      for (; i > 0; i -= 2, j--)
+	{
+	  first[j] = gen_reg_rtx (cmode);
+	  v = gen_rtvec (2, ops[i - 1], ops[i]);
+	  ix86_expand_vector_init (false, first[j],
+				   gen_rtx_PARALLEL (cmode, v));
+	}
+
+      n >>= 1;
+      if (n > 4)
+	{
+	  gcc_assert (hmode != VOIDmode);
+	  gcc_assert (gmode != VOIDmode);
+	  for (i = j = 0; i < n; i += 2, j++)
+	    {
+	      second[j] = gen_reg_rtx (hmode);
+	      ix86_expand_vector_init_concat (hmode, second [j],
+					      &first [i], 2);
+	    }
+	  n >>= 1;
+	  for (i = j = 0; i < n; i += 2, j++)
+	    {
+	      third[j] = gen_reg_rtx (gmode);
+	      ix86_expand_vector_init_concat (gmode, third[j],
+					      &second[i], 2);
+	    }
+	  n >>= 1;
+	  ix86_expand_vector_init_concat (mode, target, third, n);
+	}
+      else if (n > 2)
+	{
+	  gcc_assert (hmode != VOIDmode);
+	  for (i = j = 0; i < n; i += 2, j++)
+	    {
+	      second[j] = gen_reg_rtx (hmode);
+	      ix86_expand_vector_init_concat (hmode, second [j],
+					      &first [i], 2);
+	    }
+	  n >>= 1;
+	  ix86_expand_vector_init_concat (mode, target, second, n);
+	}
+      else
+	ix86_expand_vector_init_concat (mode, target, first, n);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init_general.  Use vector
+   interleave to handle the most general case: all values variable,
+   and none identical.  */
+
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+				    rtx target, rtx *ops, int n)
+{
+  enum machine_mode first_imode, second_imode, third_imode, inner_mode;
+  int i, j;
+  rtx op0, op1;
+  rtx (*gen_load_even) (rtx, rtx, rtx);
+  rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+  rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case V8HImode:
+      gen_load_even = gen_vec_setv8hi;
+      gen_interleave_first_low = gen_vec_interleave_lowv4si;
+      gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      inner_mode = HImode;
+      first_imode = V4SImode;
+      second_imode = V2DImode;
+      third_imode = VOIDmode;
+      break;
+    case V16QImode:
+      gen_load_even = gen_vec_setv16qi;
+      gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+      gen_interleave_second_low = gen_vec_interleave_lowv4si;
+      inner_mode = QImode;
+      first_imode = V8HImode;
+      second_imode = V4SImode;
+      third_imode = V2DImode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  for (i = 0; i < n; i++)
+    {
+      /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
+      op0 = gen_reg_rtx (SImode);
+      emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
+
+      /* Insert the SImode value as low element of V4SImode vector. */
+      op1 = gen_reg_rtx (V4SImode);
+      op0 = gen_rtx_VEC_MERGE (V4SImode,
+			       gen_rtx_VEC_DUPLICATE (V4SImode,
+						      op0),
+			       CONST0_RTX (V4SImode),
+			       const1_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+      /* Cast the V4SImode vector back to a vector in orignal mode.  */
+      op0 = gen_reg_rtx (mode);
+      emit_move_insn (op0, gen_lowpart (mode, op1));
+
+      /* Load even elements into the second position.  */
+      emit_insn (gen_load_even (op0,
+				force_reg (inner_mode,
+					   ops [i + i + 1]),
+				const1_rtx));
+
+      /* Cast vector to FIRST_IMODE vector.  */
+      ops[i] = gen_reg_rtx (first_imode);
+      emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+    }
+
+  /* Interleave low FIRST_IMODE vectors.  */
+  for (i = j = 0; i < n; i += 2, j++)
+    {
+      op0 = gen_reg_rtx (first_imode);
+      emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
+
+      /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
+      ops[j] = gen_reg_rtx (second_imode);
+      emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+    }
+
+  /* Interleave low SECOND_IMODE vectors.  */
+  switch (second_imode)
+    {
+    case V4SImode:
+      for (i = j = 0; i < n / 2; i += 2, j++)
+	{
+	  op0 = gen_reg_rtx (second_imode);
+	  emit_insn (gen_interleave_second_low (op0, ops[i],
+						ops[i + 1]));
+
+	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+	     vector.  */
+	  ops[j] = gen_reg_rtx (third_imode);
+	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+	}
+      second_imode = V2DImode;
+      gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      /* FALLTHRU */
+
+    case V2DImode:
+      op0 = gen_reg_rtx (second_imode);
+      emit_insn (gen_interleave_second_low (op0, ops[0],
+					    ops[1]));
+
+      /* Cast the SECOND_IMODE vector back to a vector on original
+	 mode.  */
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_lowpart (mode, op0)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
+   all values variable, and none identical.  */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+				 rtx target, rtx vals)
+{
+  rtx ops[64], op0, op1;
+  enum machine_mode half_mode = VOIDmode;
+  int n, i;
+
+  switch (mode)
+    {
+    case V2SFmode:
+    case V2SImode:
+      if (!mmx_ok && !TARGET_SSE)
+	break;
+      /* FALLTHRU */
+
+    case V16SImode:
+    case V16SFmode:
+    case V8DFmode:
+    case V8DImode:
+    case V8SFmode:
+    case V8SImode:
+    case V4DFmode:
+    case V4DImode:
+    case V4SFmode:
+    case V4SImode:
+    case V2DFmode:
+    case V2DImode:
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      ix86_expand_vector_init_concat (mode, target, ops, n);
+      return;
+
+    case V32QImode:
+      half_mode = V16QImode;
+      goto half;
+
+    case V16HImode:
+      half_mode = V8HImode;
+      goto half;
+
+half:
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      op0 = gen_reg_rtx (half_mode);
+      op1 = gen_reg_rtx (half_mode);
+      ix86_expand_vector_init_interleave (half_mode, op0, ops,
+					  n >> 2);
+      ix86_expand_vector_init_interleave (half_mode, op1,
+					  &ops [n >> 1], n >> 2);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
+      return;
+
+    case V16QImode:
+      if (!TARGET_SSE4_1)
+	break;
+      /* FALLTHRU */
+
+    case V8HImode:
+      if (!TARGET_SSE2)
+	break;
+
+      /* Don't use ix86_expand_vector_init_interleave if we can't
+	 move from GPR to SSE register directly.  */
+      if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
+	break;
+
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+      return;
+
+    case V4HImode:
+    case V8QImode:
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    {
+      int i, j, n_elts, n_words, n_elt_per_word;
+      enum machine_mode inner_mode;
+      rtx words[4], shift;
+
+      inner_mode = GET_MODE_INNER (mode);
+      n_elts = GET_MODE_NUNITS (mode);
+      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+      n_elt_per_word = n_elts / n_words;
+      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
+
+      for (i = 0; i < n_words; ++i)
+	{
+	  rtx word = NULL_RTX;
+
+	  for (j = 0; j < n_elt_per_word; ++j)
+	    {
+	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
+	      elt = convert_modes (word_mode, inner_mode, elt, true);
+
+	      if (j == 0)
+		word = elt;
+	      else
+		{
+		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
+					      word, 1, OPTAB_LIB_WIDEN);
+		  word = expand_simple_binop (word_mode, IOR, word, elt,
+					      word, 1, OPTAB_LIB_WIDEN);
+		}
+	    }
+
+	  words[i] = word;
+	}
+
+      if (n_words == 1)
+	emit_move_insn (target, gen_lowpart (mode, words[0]));
+      else if (n_words == 2)
+	{
+	  rtx tmp = gen_reg_rtx (mode);
+	  emit_clobber (tmp);
+	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
+	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
+	  emit_move_insn (target, tmp);
+	}
+      else if (n_words == 4)
+	{
+	  rtx tmp = gen_reg_rtx (V4SImode);
+	  gcc_assert (word_mode == SImode);
+	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
+	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
+	  emit_move_insn (target, gen_lowpart (mode, tmp));
+	}
+      else
+	gcc_unreachable ();
+    }
+}
+
+/* Initialize vector TARGET via VALS.  Suppress the use of MMX
+   instructions unless MMX_OK is true.  */
+
+void
+ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true, all_const_zero = true;
+  int i;
+  rtx x;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
+	n_var++, one_var = i;
+      else if (x != CONST0_RTX (inner_mode))
+	all_const_zero = false;
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  /* Constants are best loaded from the constant pool.  */
+  if (n_var == 0)
+    {
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+      return;
+    }
+
+  /* If all values are identical, broadcast the value.  */
+  if (all_same
+      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
+					    XVECEXP (vals, 0, 0)))
+    return;
+
+  /* Values where only one field is non-constant are best loaded from
+     the pool and overwritten via move later.  */
+  if (n_var == 1)
+    {
+      if (all_const_zero
+	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+						  XVECEXP (vals, 0, one_var),
+						  one_var))
+	return;
+
+      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
+	return;
+    }
+
+  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
+}
+
+void
+ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  enum machine_mode half_mode;
+  bool use_vec_merge = false;
+  rtx tmp;
+  static rtx (*gen_extract[6][2]) (rtx, rtx)
+    = {
+	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+      };
+  static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+    = {
+	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+      };
+  int i, j, n;
+
+  switch (mode)
+    {
+    case V2SFmode:
+    case V2SImode:
+      if (mmx_ok)
+	{
+	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
+	  if (elt == 0)
+	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+	  else
+	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+	  return;
+	}
+      break;
+
+    case V2DImode:
+      use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
+      if (use_vec_merge)
+	break;
+
+      tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+      ix86_expand_vector_extract (false, tmp, target, 1 - elt);
+      if (elt == 0)
+	tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+      else
+	tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+      return;
+
+    case V2DFmode:
+      {
+	rtx op0, op1;
+
+	/* For the two element vectors, we implement a VEC_CONCAT with
+	   the extraction of the other element.  */
+
+	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
+	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
+
+	if (elt == 0)
+	  op0 = val, op1 = tmp;
+	else
+	  op0 = tmp, op1 = val;
+
+	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
+	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+      }
+      return;
+
+    case V4SFmode:
+      use_vec_merge = TARGET_SSE4_1;
+      if (use_vec_merge)
+	break;
+
+      switch (elt)
+	{
+	case 0:
+	  use_vec_merge = true;
+	  break;
+
+	case 1:
+	  /* tmp = target = A B C D */
+	  tmp = copy_to_reg (target);
+	  /* target = A A B B */
+	  emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
+	  /* target = X A B B */
+	  ix86_expand_vector_set (false, target, val, 0);
+	  /* target = A X C D  */
+	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+					  const1_rtx, const0_rtx,
+					  GEN_INT (2+4), GEN_INT (3+4)));
+	  return;
+
+	case 2:
+	  /* tmp = target = A B C D */
+	  tmp = copy_to_reg (target);
+	  /* tmp = X B C D */
+	  ix86_expand_vector_set (false, tmp, val, 0);
+	  /* target = A B X D */
+	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+					  const0_rtx, const1_rtx,
+					  GEN_INT (0+4), GEN_INT (3+4)));
+	  return;
+
+	case 3:
+	  /* tmp = target = A B C D */
+	  tmp = copy_to_reg (target);
+	  /* tmp = X B C D */
+	  ix86_expand_vector_set (false, tmp, val, 0);
+	  /* target = A B X D */
+	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+					  const0_rtx, const1_rtx,
+					  GEN_INT (2+4), GEN_INT (0+4)));
+	  return;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case V4SImode:
+      use_vec_merge = TARGET_SSE4_1;
+      if (use_vec_merge)
+	break;
+
+      /* Element 0 handled by vec_merge below.  */
+      if (elt == 0)
+	{
+	  use_vec_merge = true;
+	  break;
+	}
+
+      if (TARGET_SSE2)
+	{
+	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
+	     store into element 0, then shuffle them back.  */
+
+	  rtx order[4];
+
+	  order[0] = GEN_INT (elt);
+	  order[1] = const1_rtx;
+	  order[2] = const2_rtx;
+	  order[3] = GEN_INT (3);
+	  order[elt] = const0_rtx;
+
+	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+					order[1], order[2], order[3]));
+
+	  ix86_expand_vector_set (false, target, val, 0);
+
+	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+					order[1], order[2], order[3]));
+	}
+      else
+	{
+	  /* For SSE1, we have to reuse the V4SF code.  */
+	  rtx t = gen_reg_rtx (V4SFmode);
+	  ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
+	  emit_move_insn (target, gen_lowpart (mode, t));
+	}
+      return;
+
+    case V8HImode:
+      use_vec_merge = TARGET_SSE2;
+      break;
+    case V4HImode:
+      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+      break;
+
+    case V16QImode:
+      use_vec_merge = TARGET_SSE4_1;
+      break;
+
+    case V8QImode:
+      break;
+
+    case V32QImode:
+      half_mode = V16QImode;
+      j = 0;
+      n = 16;
+      goto half;
+
+    case V16HImode:
+      half_mode = V8HImode;
+      j = 1;
+      n = 8;
+      goto half;
+
+    case V8SImode:
+      half_mode = V4SImode;
+      j = 2;
+      n = 4;
+      goto half;
+
+    case V4DImode:
+      half_mode = V2DImode;
+      j = 3;
+      n = 2;
+      goto half;
+
+    case V8SFmode:
+      half_mode = V4SFmode;
+      j = 4;
+      n = 4;
+      goto half;
+
+    case V4DFmode:
+      half_mode = V2DFmode;
+      j = 5;
+      n = 2;
+      goto half;
+
+half:
+      /* Compute offset.  */
+      i = elt / n;
+      elt %= n;
+
+      gcc_assert (i <= 1);
+
+      /* Extract the half.  */
+      tmp = gen_reg_rtx (half_mode);
+      emit_insn (gen_extract[j][i] (tmp, target));
+
+      /* Put val in tmp at elt.  */
+      ix86_expand_vector_set (false, tmp, val, elt);
+
+      /* Put it back.  */
+      emit_insn (gen_insert[j][i] (target, target, tmp));
+      return;
+
+    default:
+      break;
+    }
+
+  if (use_vec_merge)
+    {
+      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
+      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else
+    {
+      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+
+      emit_move_insn (mem, target);
+
+      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+      emit_move_insn (tmp, val);
+
+      emit_move_insn (target, mem);
+    }
+}
+
+void
+ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
+{
+  enum machine_mode mode = GET_MODE (vec);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  bool use_vec_extr = false;
+  rtx tmp;
+
+  switch (mode)
+    {
+    case V2SImode:
+    case V2SFmode:
+      if (!mmx_ok)
+	break;
+      /* FALLTHRU */
+
+    case V2DFmode:
+    case V2DImode:
+      use_vec_extr = true;
+      break;
+
+    case V4SFmode:
+      use_vec_extr = TARGET_SSE4_1;
+      if (use_vec_extr)
+	break;
+
+      switch (elt)
+	{
+	case 0:
+	  tmp = vec;
+	  break;
+
+	case 1:
+	case 3:
+	  tmp = gen_reg_rtx (mode);
+	  emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
+				       GEN_INT (elt), GEN_INT (elt),
+				       GEN_INT (elt+4), GEN_INT (elt+4)));
+	  break;
+
+	case 2:
+	  tmp = gen_reg_rtx (mode);
+	  emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      vec = tmp;
+      use_vec_extr = true;
+      elt = 0;
+      break;
+
+    case V4SImode:
+      use_vec_extr = TARGET_SSE4_1;
+      if (use_vec_extr)
+	break;
+
+      if (TARGET_SSE2)
+	{
+	  switch (elt)
+	    {
+	    case 0:
+	      tmp = vec;
+	      break;
+
+	    case 1:
+	    case 3:
+	      tmp = gen_reg_rtx (mode);
+	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
+					    GEN_INT (elt), GEN_INT (elt),
+					    GEN_INT (elt), GEN_INT (elt)));
+	      break;
+
+	    case 2:
+	      tmp = gen_reg_rtx (mode);
+	      emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  vec = tmp;
+	  use_vec_extr = true;
+	  elt = 0;
+	}
+      else
+	{
+	  /* For SSE1, we have to reuse the V4SF code.  */
+	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
+				      gen_lowpart (V4SFmode, vec), elt);
+	  return;
+	}
+      break;
+
+    case V8HImode:
+      use_vec_extr = TARGET_SSE2;
+      break;
+    case V4HImode:
+      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+      break;
+
+    case V16QImode:
+      use_vec_extr = TARGET_SSE4_1;
+      break;
+
+    case V8SFmode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V4SFmode);
+	  if (elt < 4)
+	    emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
+	  return;
+	}
+      break;
+
+    case V4DFmode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V2DFmode);
+	  if (elt < 2)
+	    emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
+	  return;
+	}
+      break;
+
+    case V32QImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V16QImode);
+	  if (elt < 16)
+	    emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
+	  return;
+	}
+      break;
+
+    case V16HImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V8HImode);
+	  if (elt < 8)
+	    emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 7);
+	  return;
+	}
+      break;
+
+    case V8SImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V4SImode);
+	  if (elt < 4)
+	    emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
+	  return;
+	}
+      break;
+
+    case V4DImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V2DImode);
+	  if (elt < 2)
+	    emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
+	  return;
+	}
+      break;
+
+    case V16SFmode:
+      tmp = gen_reg_rtx (V8SFmode);
+      if (elt < 8)
+	emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
+      else
+	emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
+      ix86_expand_vector_extract (false, target, tmp, elt & 7);
+      return;
+
+    case V8DFmode:
+      tmp = gen_reg_rtx (V4DFmode);
+      if (elt < 4)
+	emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
+      else
+	emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
+      ix86_expand_vector_extract (false, target, tmp, elt & 3);
+      return;
+
+    case V16SImode:
+      tmp = gen_reg_rtx (V8SImode);
+      if (elt < 8)
+	emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
+      else
+	emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
+      ix86_expand_vector_extract (false, target, tmp, elt & 7);
+      return;
+
+    case V8DImode:
+      tmp = gen_reg_rtx (V4DImode);
+      if (elt < 4)
+	emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
+      else
+	emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
+      ix86_expand_vector_extract (false, target, tmp, elt & 3);
+      return;
+
+    case V8QImode:
+      /* ??? Could extract the appropriate HImode element and shift.  */
+    default:
+      break;
+    }
+
+  if (use_vec_extr)
+    {
+      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
+      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
+
+      /* Let the rtl optimizers know about the zero extension performed.  */
+      if (inner_mode == QImode || inner_mode == HImode)
+	{
+	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
+	  target = gen_lowpart (SImode, target);
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else
+    {
+      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+
+      emit_move_insn (mem, vec);
+
+      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+      emit_move_insn (target, tmp);
+    }
+}
+
+/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
+   to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
+   The upper bits of DEST are undefined, though they shouldn't cause
+   exceptions (some bits from src or all zeros are ok).  */
+
+static void
+emit_reduc_half (rtx dest, rtx src, int i)
+{
+  rtx tem, d = dest;
+  switch (GET_MODE (src))
+    {
+    case V4SFmode:
+      if (i == 128)
+	tem = gen_sse_movhlps (dest, src, src);
+      else
+	tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
+				   GEN_INT (1 + 4), GEN_INT (1 + 4));
+      break;
+    case V2DFmode:
+      tem = gen_vec_interleave_highv2df (dest, src, src);
+      break;
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+      d = gen_reg_rtx (V1TImode);
+      tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
+				GEN_INT (i / 2));
+      break;
+    case V8SFmode:
+      if (i == 256)
+	tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
+      else
+	tem = gen_avx_shufps256 (dest, src, src,
+				 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
+      break;
+    case V4DFmode:
+      if (i == 256)
+	tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
+      else
+	tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
+      break;
+    case V32QImode:
+    case V16HImode:
+    case V8SImode:
+    case V4DImode:
+      if (i == 256)
+	{
+	  if (GET_MODE (dest) != V4DImode)
+	    d = gen_reg_rtx (V4DImode);
+	  tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
+				   gen_lowpart (V4DImode, src),
+				   const1_rtx);
+	}
+      else
+	{
+	  d = gen_reg_rtx (V2TImode);
+	  tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
+				    GEN_INT (i / 2));
+	}
+      break;
+    case V16SImode:
+    case V16SFmode:
+    case V8DImode:
+    case V8DFmode:
+      if (i > 128)
+	tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
+				      gen_lowpart (V16SImode, src),
+				      gen_lowpart (V16SImode, src),
+				      GEN_INT (0x4 + (i == 512 ? 4 : 0)),
+				      GEN_INT (0x5 + (i == 512 ? 4 : 0)),
+				      GEN_INT (0x6 + (i == 512 ? 4 : 0)),
+				      GEN_INT (0x7 + (i == 512 ? 4 : 0)),
+				      GEN_INT (0xC), GEN_INT (0xD),
+				      GEN_INT (0xE), GEN_INT (0xF),
+				      GEN_INT (0x10), GEN_INT (0x11),
+				      GEN_INT (0x12), GEN_INT (0x13),
+				      GEN_INT (0x14), GEN_INT (0x15),
+				      GEN_INT (0x16), GEN_INT (0x17));
+      else
+	tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
+				   gen_lowpart (V16SImode, src),
+				   GEN_INT (i == 128 ? 0x2 : 0x1),
+				   GEN_INT (0x3),
+				   GEN_INT (0x3),
+				   GEN_INT (0x3),
+				   GEN_INT (i == 128 ? 0x6 : 0x5),
+				   GEN_INT (0x7),
+				   GEN_INT (0x7),
+				   GEN_INT (0x7),
+				   GEN_INT (i == 128 ? 0xA : 0x9),
+				   GEN_INT (0xB),
+				   GEN_INT (0xB),
+				   GEN_INT (0xB),
+				   GEN_INT (i == 128 ? 0xE : 0xD),
+				   GEN_INT (0xF),
+				   GEN_INT (0xF),
+				   GEN_INT (0xF));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  emit_insn (tem);
+  if (d != dest)
+    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
+}
+
+/* Expand a vector reduction.  FN is the binary pattern to reduce;
+   DEST is the destination; IN is the input vector.  */
+
+void
+ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+  rtx half, dst, vec = in;
+  enum machine_mode mode = GET_MODE (in);
+  int i;
+
+  /* SSE4 has a special instruction for V8HImode UMIN reduction.  */
+  if (TARGET_SSE4_1
+      && mode == V8HImode
+      && fn == gen_uminv8hi3)
+    {
+      emit_insn (gen_sse4_1_phminposuw (dest, in));
+      return;
+    }
+
+  for (i = GET_MODE_BITSIZE (mode);
+       i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+       i >>= 1)
+    {
+      half = gen_reg_rtx (mode);
+      emit_reduc_half (half, vec, i);
+      if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
+	dst = dest;
+      else
+	dst = gen_reg_rtx (mode);
+      emit_insn (fn (dst, half, vec));
+      vec = dst;
+    }
+}
+
+/* Target hook for scalar_mode_supported_p.  */
+static bool
+ix86_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+  else if (mode == TFmode)
+    return true;
+  else
+    return default_scalar_mode_supported_p (mode);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+static bool
+ix86_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+    return true;
+  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
+    return true;
+  if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+    return true;
+  if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+    return true;
+  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+    return true;
+  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+    return true;
+  return false;
+}
+
+/* Target hook for c_mode_for_suffix.  */
+static enum machine_mode
+ix86_c_mode_for_suffix (char suffix)
+{
+  if (suffix == 'q')
+    return TFmode;
+  if (suffix == 'w')
+    return XFmode;
+
+  return VOIDmode;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+
+   We do this in the new i386 backend to maintain source compatibility
+   with the old cc0-based compiler.  */
+
+static tree
+ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+		      tree inputs ATTRIBUTE_UNUSED,
+		      tree clobbers)
+{
+  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
+			clobbers);
+  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
+			clobbers);
+  return clobbers;
+}
+
+/* Implements target vector targetm.asm.encode_section_info.  */
+
+static void ATTRIBUTE_UNUSED
+ix86_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == VAR_DECL
+      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+      && ix86_in_large_data_p (decl))
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
+}
+
+/* Worker function for REVERSE_CONDITION.  */
+
+enum rtx_code
+ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
+{
+  return (mode != CCFPmode && mode != CCFPUmode
+	  ? reverse_condition (code)
+	  : reverse_condition_maybe_unordered (code));
+}
+
+/* Output code to perform an x87 FP register move, from OPERANDS[1]
+   to OPERANDS[0].  */
+
+const char *
+output_387_reg_move (rtx insn, rtx *operands)
+{
+  if (REG_P (operands[0]))
+    {
+      if (REG_P (operands[1])
+	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+	{
+	  if (REGNO (operands[0]) == FIRST_STACK_REG)
+	    return output_387_ffreep (operands, 0);
+	  return "fstp\t%y0";
+	}
+      if (STACK_TOP_P (operands[0]))
+	return "fld%Z1\t%y1";
+      return "fst\t%y0";
+    }
+  else if (MEM_P (operands[0]))
+    {
+      gcc_assert (REG_P (operands[1]));
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+	return "fstp%Z0\t%y0";
+      else
+	{
+	  /* There is no non-popping store to memory for XFmode.
+	     So if we need one, follow the store with a load.  */
+	  if (GET_MODE (operands[0]) == XFmode)
+	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
+	  else
+	    return "fst%Z0\t%y0";
+	}
+    }
+  else
+    gcc_unreachable();
+}
+
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+   FP status register is set.  */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+  rtx reg = gen_reg_rtx (HImode);
+  rtx temp;
+
+  emit_insn (gen_x86_fnstsw_1 (reg));
+
+  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
+    {
+      emit_insn (gen_x86_sahf_1 (reg));
+
+      temp = gen_rtx_REG (CCmode, FLAGS_REG);
+      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+    }
+  else
+    {
+      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
+
+      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
+    }
+
+  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+
+  emit_jump_insn (temp);
+  predict_jump (REG_BR_PROB_BASE * 10 / 100);
+}
+
+/* Output code to perform a log1p XFmode calculation.  */
+
+void ix86_emit_i387_log1p (rtx op0, rtx op1)
+{
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+
+  rtx tmp = gen_reg_rtx (XFmode);
+  rtx tmp2 = gen_reg_rtx (XFmode);
+  rtx test;
+
+  emit_insn (gen_absxf2 (tmp, op1));
+  test = gen_rtx_GE (VOIDmode, tmp,
+    CONST_DOUBLE_FROM_REAL_VALUE (
+       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
+       XFmode));
+  emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
+
+  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+  emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
+  emit_jump (label2);
+
+  emit_label (label1);
+  emit_move_insn (tmp, CONST1_RTX (XFmode));
+  emit_insn (gen_addxf3 (tmp, op1, tmp));
+  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+  emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
+
+  emit_label (label2);
+}
+
+/* Emit code for round calculation.  */
+void ix86_emit_i387_round (rtx op0, rtx op1)
+{
+  enum machine_mode inmode = GET_MODE (op1);
+  enum machine_mode outmode = GET_MODE (op0);
+  rtx e1, e2, res, tmp, tmp1, half;
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  rtx jump_label = gen_label_rtx ();
+  rtx insn;
+  rtx (*gen_abs) (rtx, rtx);
+  rtx (*gen_neg) (rtx, rtx);
+
+  switch (inmode)
+    {
+    case SFmode:
+      gen_abs = gen_abssf2;
+      break;
+    case DFmode:
+      gen_abs = gen_absdf2;
+      break;
+    case XFmode:
+      gen_abs = gen_absxf2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (outmode)
+    {
+    case SFmode:
+      gen_neg = gen_negsf2;
+      break;
+    case DFmode:
+      gen_neg = gen_negdf2;
+      break;
+    case XFmode:
+      gen_neg = gen_negxf2;
+      break;
+    case HImode:
+      gen_neg = gen_neghi2;
+      break;
+    case SImode:
+      gen_neg = gen_negsi2;
+      break;
+    case DImode:
+      gen_neg = gen_negdi2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  e1 = gen_reg_rtx (inmode);
+  e2 = gen_reg_rtx (inmode);
+  res = gen_reg_rtx (outmode);
+
+  half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
+
+  /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
+
+  /* scratch = fxam(op1) */
+  emit_insn (gen_rtx_SET (VOIDmode, scratch,
+			  gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
+					  UNSPEC_FXAM)));
+  /* e1 = fabs(op1) */
+  emit_insn (gen_abs (e1, op1));
+
+  /* e2 = e1 + 0.5 */
+  half = force_reg (inmode, half);
+  emit_insn (gen_rtx_SET (VOIDmode, e2,
+			  gen_rtx_PLUS (inmode, e1, half)));
+
+  /* res = floor(e2) */
+  if (inmode != XFmode)
+    {
+      tmp1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, tmp1,
+			      gen_rtx_FLOAT_EXTEND (XFmode, e2)));
+    }
+  else
+    tmp1 = e2;
+
+  switch (outmode)
+    {
+    case SFmode:
+    case DFmode:
+      {
+	rtx tmp0 = gen_reg_rtx (XFmode);
+
+	emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
+
+	emit_insn (gen_rtx_SET (VOIDmode, res,
+				gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
+						UNSPEC_TRUNC_NOOP)));
+      }
+      break;
+    case XFmode:
+      emit_insn (gen_frndintxf2_floor (res, tmp1));
+      break;
+    case HImode:
+      emit_insn (gen_lfloorxfhi2 (res, tmp1));
+      break;
+    case SImode:
+      emit_insn (gen_lfloorxfsi2 (res, tmp1));
+      break;
+    case DImode:
+      emit_insn (gen_lfloorxfdi2 (res, tmp1));
+	break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* flags = signbit(a) */
+  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
+
+  /* if (flags) then res = -res */
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+			      gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
+			      pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = jump_label;
+
+  emit_insn (gen_neg (res, res));
+
+  emit_label (jump_label);
+  LABEL_NUSES (jump_label) = 1;
+
+  emit_move_insn (op0, res);
+}
+
+/* Output code to perform a Newton-Rhapson approximation of a single precision
+   floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
+
+void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
+{
+  rtx x0, x1, e0, e1;
+
+  x0 = gen_reg_rtx (mode);
+  e0 = gen_reg_rtx (mode);
+  e1 = gen_reg_rtx (mode);
+  x1 = gen_reg_rtx (mode);
+
+  /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+
+  b = force_reg (mode, b);
+
+  /* x0 = rcp(b) estimate */
+  if (mode == V16SFmode || mode == V8DFmode)
+    emit_insn (gen_rtx_SET (VOIDmode, x0,
+			    gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+					    UNSPEC_RCP14)));
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, x0,
+			    gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+					    UNSPEC_RCP)));
+
+  /* e0 = x0 * b */
+  emit_insn (gen_rtx_SET (VOIDmode, e0,
+			  gen_rtx_MULT (mode, x0, b)));
+
+  /* e0 = x0 * e0 */
+  emit_insn (gen_rtx_SET (VOIDmode, e0,
+			  gen_rtx_MULT (mode, x0, e0)));
+
+  /* e1 = x0 + x0 */
+  emit_insn (gen_rtx_SET (VOIDmode, e1,
+			  gen_rtx_PLUS (mode, x0, x0)));
+
+  /* x1 = e1 - e0 */
+  emit_insn (gen_rtx_SET (VOIDmode, x1,
+			  gen_rtx_MINUS (mode, e1, e0)));
+
+  /* res = a * x1 */
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_MULT (mode, a, x1)));
+}
+
+/* Output code to perform a Newton-Rhapson approximation of a
+   single precision floating point [reciprocal] square root.  */
+
+void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
+			 bool recip)
+{
+  rtx x0, e0, e1, e2, e3, mthree, mhalf;
+  REAL_VALUE_TYPE r;
+  int unspec;
+
+  x0 = gen_reg_rtx (mode);
+  e0 = gen_reg_rtx (mode);
+  e1 = gen_reg_rtx (mode);
+  e2 = gen_reg_rtx (mode);
+  e3 = gen_reg_rtx (mode);
+
+  real_from_integer (&r, VOIDmode, -3, -1, 0);
+  mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+
+  real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
+  mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+  unspec = UNSPEC_RSQRT;
+
+  if (VECTOR_MODE_P (mode))
+    {
+      mthree = ix86_build_const_vector (mode, true, mthree);
+      mhalf = ix86_build_const_vector (mode, true, mhalf);
+      /* There is no 512-bit rsqrt.  There is however rsqrt14.  */
+      if (GET_MODE_SIZE (mode) == 64)
+	unspec = UNSPEC_RSQRT14;
+    }
+
+  /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
+     rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
+
+  a = force_reg (mode, a);
+
+  /* x0 = rsqrt(a) estimate */
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
+					  unspec)));
+
+  /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
+  if (!recip)
+    {
+      rtx zero, mask;
+
+      zero = gen_reg_rtx (mode);
+      mask = gen_reg_rtx (mode);
+
+      zero = force_reg (mode, CONST0_RTX(mode));
+
+      /* Handle masked compare.  */
+      if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+	{
+	  mask = gen_reg_rtx (HImode);
+	  /* Imm value 0x4 corresponds to not-equal comparison.  */
+	  emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
+	  emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, mask,
+				  gen_rtx_NE (mode, zero, a)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode, x0,
+				  gen_rtx_AND (mode, x0, mask)));
+	}
+    }
+
+  /* e0 = x0 * a */
+  emit_insn (gen_rtx_SET (VOIDmode, e0,
+			  gen_rtx_MULT (mode, x0, a)));
+  /* e1 = e0 * x0 */
+  emit_insn (gen_rtx_SET (VOIDmode, e1,
+			  gen_rtx_MULT (mode, e0, x0)));
+
+  /* e2 = e1 - 3. */
+  mthree = force_reg (mode, mthree);
+  emit_insn (gen_rtx_SET (VOIDmode, e2,
+			  gen_rtx_PLUS (mode, e1, mthree)));
+
+  mhalf = force_reg (mode, mhalf);
+  if (recip)
+    /* e3 = -.5 * x0 */
+    emit_insn (gen_rtx_SET (VOIDmode, e3,
+			    gen_rtx_MULT (mode, x0, mhalf)));
+  else
+    /* e3 = -.5 * e0 */
+    emit_insn (gen_rtx_SET (VOIDmode, e3,
+			    gen_rtx_MULT (mode, e0, mhalf)));
+  /* ret = e2 * e3 */
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_MULT (mode, e2, e3)));
+}
+
+#ifdef TARGET_SOLARIS
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
+
+static void
+i386_solaris_elf_named_section (const char *name, unsigned int flags,
+				tree decl)
+{
+  /* With Binutils 2.15, the "@unwind" marker must be specified on
+     every occurrence of the ".eh_frame" section, not just the first
+     one.  */
+  if (TARGET_64BIT
+      && strcmp (name, ".eh_frame") == 0)
+    {
+      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
+	       flags & SECTION_WRITE ? "aw" : "a");
+      return;
+    }
+
+#ifndef USE_GAS
+  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+    {
+      solaris_elf_asm_comdat_section (name, flags, decl);
+      return;
+    }
+#endif
+
+  default_elf_asm_named_section (name, flags, decl);
+}
+#endif /* TARGET_SOLARIS */
+
+/* Return the mangling of TYPE if it is an extended fundamental type.  */
+
+static const char *
+ix86_mangle_type (const_tree type)
+{
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+    return NULL;
+
+  switch (TYPE_MODE (type))
+    {
+    case TFmode:
+      /* __float128 is "g".  */
+      return "g";
+    case XFmode:
+      /* "long double" or __float80 is "e".  */
+      return "e";
+    default:
+      return NULL;
+    }
+}
+
+/* For 32-bit code we can save PIC register setup by using
+   __stack_chk_fail_local hidden function instead of calling
+   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
+   register, so it is better to call __stack_chk_fail directly.  */
+
+static tree ATTRIBUTE_UNUSED
+ix86_stack_protect_fail (void)
+{
+  return TARGET_64BIT
+	 ? default_external_stack_protect_fail ()
+	 : default_hidden_stack_protect_fail ();
+}
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   ??? All x86 object file formats are capable of representing this.
+   After all, the relocation needed is the same as for the call insn.
+   Whether or not a particular assembler allows us to enter such, I
+   guess we'll have to see.  */
+int
+asm_preferred_eh_data_format (int code, int global)
+{
+  if (flag_pic)
+    {
+      int type = DW_EH_PE_sdata8;
+      if (!TARGET_64BIT
+	  || ix86_cmodel == CM_SMALL_PIC
+	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
+	type = DW_EH_PE_sdata4;
+      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+    }
+  if (ix86_cmodel == CM_SMALL
+      || (ix86_cmodel == CM_MEDIUM && code))
+    return DW_EH_PE_udata4;
+  return DW_EH_PE_absptr;
+}
+
+/* Expand copysign from SIGN to the positive value ABS_VALUE
+   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
+   the sign-bit.  */
+static void
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
+{
+  enum machine_mode mode = GET_MODE (sign);
+  rtx sgn = gen_reg_rtx (mode);
+  if (mask == NULL_RTX)
+    {
+      enum machine_mode vmode;
+
+      if (mode == SFmode)
+	vmode = V4SFmode;
+      else if (mode == DFmode)
+	vmode = V2DFmode;
+      else
+	vmode = mode;
+
+      mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
+      if (!VECTOR_MODE_P (mode))
+	{
+	  /* We need to generate a scalar mode mask in this case.  */
+	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+	  mask = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+	}
+    }
+  else
+    mask = gen_rtx_NOT (mode, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, sgn,
+			  gen_rtx_AND (mode, mask, sign)));
+  emit_insn (gen_rtx_SET (VOIDmode, result,
+			  gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result.  The
+   mask for masking out the sign-bit is stored in *SMASK, if that is
+   non-null.  */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+  enum machine_mode vmode, mode = GET_MODE (op0);
+  rtx xa, mask;
+
+  xa = gen_reg_rtx (mode);
+  if (mode == SFmode)
+    vmode = V4SFmode;
+  else if (mode == DFmode)
+    vmode = V2DFmode;
+  else
+    vmode = mode;
+  mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
+  if (!VECTOR_MODE_P (mode))
+    {
+      /* We need to generate a scalar mode mask in this case.  */
+      rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+      tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+      mask = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+    }
+  emit_insn (gen_rtx_SET (VOIDmode, xa,
+			  gen_rtx_AND (mode, op0, mask)));
+
+  if (smask)
+    *smask = mask;
+
+  return xa;
+}
+
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+   swapping the operands if SWAP_OPERANDS is true.  The expanded
+   code is a forward jump to a newly created label in case the
+   comparison is true.  The generated label rtx is returned.  */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+                                  bool swap_operands)
+{
+  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
+  rtx label, tmp;
+
+  if (swap_operands)
+    {
+      tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  label = gen_label_rtx ();
+  tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			  gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  return label;
+}
+
+/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
+   using comparison code CODE.  Operands are swapped for the comparison if
+   SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
+static rtx
+ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
+			      bool swap_operands)
+{
+  rtx (*insn)(rtx, rtx, rtx, rtx);
+  enum machine_mode mode = GET_MODE (op0);
+  rtx mask = gen_reg_rtx (mode);
+
+  if (swap_operands)
+    {
+      rtx tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
+
+  emit_insn (insn (mask, op0, op1,
+		   gen_rtx_fmt_ee (code, mode, op0, op1)));
+  return mask;
+}
+
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+  REAL_VALUE_TYPE TWO52r;
+  rtx TWO52;
+
+  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+  TWO52 = const_double_from_real_value (TWO52r, mode);
+  TWO52 = force_reg (mode, TWO52);
+
+  return TWO52;
+}
+
+/* Expand SSE sequence for computing lround from OP1 storing
+   into OP0.  */
+void
+ix86_expand_lround (rtx op0, rtx op1)
+{
+  /* C code for the stuff we're doing below:
+       tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
+       return (long)tmp;
+   */
+  enum machine_mode mode = GET_MODE (op1);
+  const struct real_format *fmt;
+  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+  rtx adj;
+
+  /* load nextafter (0.5, 0.0) */
+  fmt = REAL_MODE_FORMAT (mode);
+  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+
+  /* adj = copysign (0.5, op1) */
+  adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
+  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
+
+  /* adj = op1 + adj */
+  adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* op0 = (imode)adj */
+  expand_fix (op0, adj, 0);
+}
+
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+  /* C code for the stuff we're doing below (for do_floor):
+	xi = (long)op1;
+        xi -= (double)xi > op1 ? 1 : 0;
+        return xi;
+   */
+  enum machine_mode fmode = GET_MODE (op1);
+  enum machine_mode imode = GET_MODE (op0);
+  rtx ireg, freg, label, tmp;
+
+  /* reg = (long)op1 */
+  ireg = gen_reg_rtx (imode);
+  expand_fix (ireg, op1, 0);
+
+  /* freg = (double)reg */
+  freg = gen_reg_rtx (fmode);
+  expand_float (freg, ireg, 0);
+
+  /* ireg = (freg > op1) ? ireg - 1 : ireg */
+  label = ix86_expand_sse_compare_and_jump (UNLE,
+					    freg, op1, !do_floor);
+  tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (ireg, tmp);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (op0, ireg);
+}
+
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+   result in OPERAND0.  */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+	xa = fabs (operand1);
+        if (!isless (xa, 2**52))
+	  return operand1;
+        xa = xa + 2**52 - 2**52;
+        return copysign (xa, operand1);
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, xa, label, TWO52, mask;
+
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  TWO52 = ix86_gen_TWO52 (mode);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+  ix86_sse_copysign_to_positive (res, xa, res, mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
+{
+  /* C code for the stuff we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        xa = xa + TWO52 - TWO52;
+        x2 = copysign (xa, x);
+     Compensate.  Floor:
+        if (x2 > x)
+          x2 -= 1;
+     Compensate.  Ceil:
+        if (x2 < x)
+          x2 -= -1;
+        return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, TWO52, tmp, label, one, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* xa = xa + TWO52 - TWO52; */
+  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+  /* xa = copysign (xa, operand1) */
+  ix86_sse_copysign_to_positive (xa, xa, res, mask);
+
+  /* generate 1.0 or -1.0 */
+  one = force_reg (mode,
+	           const_double_from_real_value (do_floor
+						 ? dconst1 : dconstm1, mode));
+
+  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  /* We always need to subtract here to preserve signed zero.  */
+  tmp = expand_simple_binop (mode, MINUS,
+			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
+{
+  /* C code for the stuff we expand below.
+	double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+	x2 = (double)(long)x;
+     Compensate.  Floor:
+	if (x2 > x)
+	  x2 -= 1;
+     Compensate.  Ceil:
+	if (x2 < x)
+	  x2 += 1;
+	if (HONOR_SIGNED_ZEROS (mode))
+	  return copysign (x2, x);
+	return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xi, TWO52, tmp, label, one, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* xa = (double)(long)x */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, res, 0);
+  expand_float (xa, xi, 0);
+
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
+			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  if (HONOR_SIGNED_ZEROS (mode))
+    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round from OPERAND1 storing
+   into OPERAND0.  Sequence that works without relying on DImode truncation
+   via cvttsd2siq that is only available on 64bit targets.  */
+void
+ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we expand below.
+        double xa = fabs (x), xa2, x2;
+        if (!isless (xa, TWO52))
+          return x;
+     Using the absolute value and copying back sign makes
+     -0.0 -> -0.0 correct.
+        xa2 = xa + TWO52 - TWO52;
+     Compensate.
+	dxa = xa2 - xa;
+        if (dxa <= -0.5)
+          xa2 += 1;
+        else if (dxa > 0.5)
+          xa2 -= 1;
+        x2 = copysign (xa2, x);
+        return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* xa2 = xa + TWO52 - TWO52; */
+  xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
+
+  /* dxa = xa2 - xa; */
+  dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* generate 0.5, 1.0 and -0.5 */
+  half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
+  one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
+  mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
+			       0, OPTAB_DIRECT);
+
+  /* Compensate.  */
+  tmp = gen_reg_rtx (mode);
+  /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* res = copysign (xa2, operand1) */
+  ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_trunc (rtx operand0, rtx operand1)
+{
+  /* C code for SSE variant we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        x2 = (double)(long)x;
+	if (HONOR_SIGNED_ZEROS (mode))
+	  return copysign (x2, x);
+	return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xi, TWO52, label, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* x = (double)(long)x */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, res, 0);
+  expand_float (res, xi, 0);
+
+  if (HONOR_SIGNED_ZEROS (mode))
+    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
+{
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, mask, TWO52, label, one, res, smask, tmp;
+
+  /* C code for SSE variant we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        xa2 = xa + TWO52 - TWO52;
+     Compensate:
+        if (xa2 > xa)
+          xa2 -= 1.0;
+        x2 = copysign (xa2, x);
+        return x2;
+   */
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &smask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* res = xa + TWO52 - TWO52; */
+  tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+  /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
+  mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
+  emit_insn (gen_rtx_SET (VOIDmode, mask,
+                          gen_rtx_AND (mode, mask, one)));
+  tmp = expand_simple_binop (mode, MINUS,
+			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  /* res = copysign (res, operand1) */
+  ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_round (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+        double xa = fabs (x);
+        if (!isless (xa, TWO52))
+          return x;
+        xa = (double)(long)(xa + nextafter (0.5, 0.0));
+        return copysign (xa, x);
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, TWO52, xa, label, xi, half, mask;
+  const struct real_format *fmt;
+  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  TWO52 = ix86_gen_TWO52 (mode);
+  xa = ix86_expand_sse_fabs (res, &mask);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* load nextafter (0.5, 0.0) */
+  fmt = REAL_MODE_FORMAT (mode);
+  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+
+  /* xa = xa + 0.5 */
+  half = force_reg (mode, const_double_from_real_value (pred_half, mode));
+  xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* xa = (double)(int64_t)xa */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, xa, 0);
+  expand_float (xa, xi, 0);
+
+  /* res = copysign (xa, operand1) */
+  ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round
+   from OP1 storing into OP0 using sse4 round insn.  */
+void
+ix86_expand_round_sse4 (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx e1, e2, res, half;
+  const struct real_format *fmt;
+  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+  rtx (*gen_copysign) (rtx, rtx, rtx);
+  rtx (*gen_round) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case SFmode:
+      gen_copysign = gen_copysignsf3;
+      gen_round = gen_sse4_1_roundsf2;
+      break;
+    case DFmode:
+      gen_copysign = gen_copysigndf3;
+      gen_round = gen_sse4_1_rounddf2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* round (a) = trunc (a + copysign (0.5, a)) */
+
+  /* load nextafter (0.5, 0.0) */
+  fmt = REAL_MODE_FORMAT (mode);
+  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+  half = const_double_from_real_value (pred_half, mode);
+
+  /* e1 = copysign (0.5, op1) */
+  e1 = gen_reg_rtx (mode);
+  emit_insn (gen_copysign (e1, half, op1));
+
+  /* e2 = op1 + e1 */
+  e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* res = trunc (e2) */
+  res = gen_reg_rtx (mode);
+  emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
+
+  emit_move_insn (op0, res);
+}
+
+
+/* Table of valid machine attributes.  */
+static const struct attribute_spec ix86_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  /* Stdcall attribute says callee is responsible for popping arguments
+     if they are not variable.  */
+  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute,
+    true },
+  /* Fastcall attribute says callee is responsible for popping arguments
+     if they are not variable.  */
+  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute,
+    true },
+  /* Thiscall attribute says callee is responsible for popping arguments
+     if they are not variable.  */
+  { "thiscall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute,
+    true },
+  /* Cdecl attribute says the callee is a normal C declaration */
+  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute,
+    true },
+  /* Regparm attribute specifies how many integer arguments are to be
+     passed in registers.  */
+  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute,
+    true },
+  /* Sseregparm attribute says we are using x86_64 calling conventions
+     for FP arguments.  */
+  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
+    true },
+  /* The transactional memory builtins are implicitly regparm or fastcall
+     depending on the ABI.  Override the generic do-nothing attribute that
+     these builtins were declared with.  */
+  { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
+    true },
+  /* force_align_arg_pointer says this function realigns the stack at entry.  */
+  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
+    false, true,  true, ix86_handle_cconv_attribute, false },
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+  { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
+  { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
+  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute,
+    false },
+#endif
+  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute,
+    false },
+  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute,
+    false },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+  SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+  /* ms_abi and sysv_abi calling convention function attributes.  */
+  { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
+  { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
+  { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
+    false },
+  { "callee_pop_aggregate_return", 1, 1, false, true, true,
+    ix86_handle_callee_pop_aggregate_return, true },
+  /* End element.  */
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                 tree vectype,
+                                 int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+        return ix86_cost->scalar_stmt_cost;
+
+      case scalar_load:
+        return ix86_cost->scalar_load_cost;
+
+      case scalar_store:
+        return ix86_cost->scalar_store_cost;
+
+      case vector_stmt:
+        return ix86_cost->vec_stmt_cost;
+
+      case vector_load:
+        return ix86_cost->vec_align_load_cost;
+
+      case vector_store:
+        return ix86_cost->vec_store_cost;
+
+      case vec_to_scalar:
+        return ix86_cost->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+        return ix86_cost->scalar_to_vec_cost;
+
+      case unaligned_load:
+      case unaligned_store:
+        return ix86_cost->vec_unalign_load_cost;
+
+      case cond_branch_taken:
+        return ix86_cost->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+        return ix86_cost->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+        return ix86_cost->vec_stmt_cost;
+
+      case vec_construct:
+	elements = TYPE_VECTOR_SUBPARTS (vectype);
+	return elements / 2 + 1;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
+   insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
+   insn every time.  */
+
+static GTY(()) rtx vselect_insn;
+
+/* Initialize vselect_insn.  */
+
+static void
+init_vselect_insn (void)
+{
+  unsigned i;
+  rtx x;
+
+  x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
+  for (i = 0; i < MAX_VECT_LEN; ++i)
+    XVECEXP (x, 0, i) = const0_rtx;
+  x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
+							const0_rtx), x);
+  x = gen_rtx_SET (VOIDmode, const0_rtx, x);
+  start_sequence ();
+  vselect_insn = emit_insn (x);
+  end_sequence ();
+}
+
+/* Construct (set target (vec_select op0 (parallel perm))) and
+   return true if that's a valid instruction in the active ISA.  */
+
+static bool
+expand_vselect (rtx target, rtx op0, const unsigned char *perm,
+		unsigned nelt, bool testing_p)
+{
+  unsigned int i;
+  rtx x, save_vconcat;
+  int icode;
+
+  if (vselect_insn == NULL_RTX)
+    init_vselect_insn ();
+
+  x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
+  PUT_NUM_ELEM (XVEC (x, 0), nelt);
+  for (i = 0; i < nelt; ++i)
+    XVECEXP (x, 0, i) = GEN_INT (perm[i]);
+  save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
+  XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
+  PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
+  SET_DEST (PATTERN (vselect_insn)) = target;
+  icode = recog_memoized (vselect_insn);
+
+  if (icode >= 0 && !testing_p)
+    emit_insn (copy_rtx (PATTERN (vselect_insn)));
+
+  SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
+  XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
+  INSN_CODE (vselect_insn) = -1;
+
+  return icode >= 0;
+}
+
+/* Similar, but generate a vec_concat from op0 and op1 as well.  */
+
+static bool
+expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+			const unsigned char *perm, unsigned nelt,
+			bool testing_p)
+{
+  enum machine_mode v2mode;
+  rtx x;
+  bool ok;
+
+  if (vselect_insn == NULL_RTX)
+    init_vselect_insn ();
+
+  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
+  x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
+  PUT_MODE (x, v2mode);
+  XEXP (x, 0) = op0;
+  XEXP (x, 1) = op1;
+  ok = expand_vselect (target, x, perm, nelt, testing_p);
+  XEXP (x, 0) = const0_rtx;
+  XEXP (x, 1) = const0_rtx;
+  return ok;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of blendp[sd] / pblendw / pblendvb / vpblendd.  */
+
+static bool
+expand_vec_perm_blend (struct expand_vec_perm_d *d)
+{
+  enum machine_mode vmode = d->vmode;
+  unsigned i, mask, nelt = d->nelt;
+  rtx target, op0, op1, x;
+  rtx rperm[32], vperm;
+
+  if (d->one_operand_p)
+    return false;
+  if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+    ;
+  else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
+    ;
+  else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
+    ;
+  else
+    return false;
+
+  /* This is a blend, not a permute.  Elements must stay in their
+     respective lanes.  */
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = d->perm[i];
+      if (!(e == i || e == i + nelt))
+	return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
+     decision should be extracted elsewhere, so that we only try that
+     sequence once all budget==3 options have been tried.  */
+  target = d->target;
+  op0 = d->op0;
+  op1 = d->op1;
+  mask = 0;
+
+  switch (vmode)
+    {
+    case V4DFmode:
+    case V8SFmode:
+    case V2DFmode:
+    case V4SFmode:
+    case V8HImode:
+    case V8SImode:
+      for (i = 0; i < nelt; ++i)
+	mask |= (d->perm[i] >= nelt) << i;
+      break;
+
+    case V2DImode:
+      for (i = 0; i < 2; ++i)
+	mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
+      vmode = V8HImode;
+      goto do_subreg;
+
+    case V4SImode:
+      for (i = 0; i < 4; ++i)
+	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
+      vmode = V8HImode;
+      goto do_subreg;
+
+    case V16QImode:
+      /* See if bytes move in pairs so we can use pblendw with
+	 an immediate argument, rather than pblendvb with a vector
+	 argument.  */
+      for (i = 0; i < 16; i += 2)
+	if (d->perm[i] + 1 != d->perm[i + 1])
+	  {
+	  use_pblendvb:
+	    for (i = 0; i < nelt; ++i)
+	      rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
+
+	  finish_pblendvb:
+	    vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
+	    vperm = force_reg (vmode, vperm);
+
+	    if (GET_MODE_SIZE (vmode) == 16)
+	      emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
+	    else
+	      emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
+	    if (target != d->target)
+	      emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+	    return true;
+	  }
+
+      for (i = 0; i < 8; ++i)
+	mask |= (d->perm[i * 2] >= 16) << i;
+      vmode = V8HImode;
+      /* FALLTHRU */
+
+    do_subreg:
+      target = gen_reg_rtx (vmode);
+      op0 = gen_lowpart (vmode, op0);
+      op1 = gen_lowpart (vmode, op1);
+      break;
+
+    case V32QImode:
+      /* See if bytes move in pairs.  If not, vpblendvb must be used.  */
+      for (i = 0; i < 32; i += 2)
+	if (d->perm[i] + 1 != d->perm[i + 1])
+	  goto use_pblendvb;
+      /* See if bytes move in quadruplets.  If yes, vpblendd
+	 with immediate can be used.  */
+      for (i = 0; i < 32; i += 4)
+	if (d->perm[i] + 2 != d->perm[i + 2])
+	  break;
+      if (i < 32)
+	{
+	  /* See if bytes move the same in both lanes.  If yes,
+	     vpblendw with immediate can be used.  */
+	  for (i = 0; i < 16; i += 2)
+	    if (d->perm[i] + 16 != d->perm[i + 16])
+	      goto use_pblendvb;
+
+	  /* Use vpblendw.  */
+	  for (i = 0; i < 16; ++i)
+	    mask |= (d->perm[i * 2] >= 32) << i;
+	  vmode = V16HImode;
+	  goto do_subreg;
+	}
+
+      /* Use vpblendd.  */
+      for (i = 0; i < 8; ++i)
+	mask |= (d->perm[i * 4] >= 32) << i;
+      vmode = V8SImode;
+      goto do_subreg;
+
+    case V16HImode:
+      /* See if words move in pairs.  If yes, vpblendd can be used.  */
+      for (i = 0; i < 16; i += 2)
+	if (d->perm[i] + 1 != d->perm[i + 1])
+	  break;
+      if (i < 16)
+	{
+	  /* See if words move the same in both lanes.  If not,
+	     vpblendvb must be used.  */
+	  for (i = 0; i < 8; i++)
+	    if (d->perm[i] + 8 != d->perm[i + 8])
+	      {
+		/* Use vpblendvb.  */
+		for (i = 0; i < 32; ++i)
+		  rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
+
+		vmode = V32QImode;
+		nelt = 32;
+		target = gen_reg_rtx (vmode);
+		op0 = gen_lowpart (vmode, op0);
+		op1 = gen_lowpart (vmode, op1);
+		goto finish_pblendvb;
+	      }
+
+	  /* Use vpblendw.  */
+	  for (i = 0; i < 16; ++i)
+	    mask |= (d->perm[i] >= 16) << i;
+	  break;
+	}
+
+      /* Use vpblendd.  */
+      for (i = 0; i < 8; ++i)
+	mask |= (d->perm[i * 2] >= 16) << i;
+      vmode = V8SImode;
+      goto do_subreg;
+
+    case V4DImode:
+      /* Use vpblendd.  */
+      for (i = 0; i < 4; ++i)
+	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
+      vmode = V8SImode;
+      goto do_subreg;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* This matches five different patterns with the different modes.  */
+  x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
+  x = gen_rtx_SET (VOIDmode, target, x);
+  emit_insn (x);
+  if (target != d->target)
+    emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of the variable form of vpermilps.
+
+   Note that we will have already failed the immediate input vpermilps,
+   which requires that the high and low part shuffle be identical; the
+   variable form doesn't require that.  */
+
+static bool
+expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
+{
+  rtx rperm[8], vperm;
+  unsigned i;
+
+  if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
+    return false;
+
+  /* We can only permute within the 128-bit lane.  */
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned e = d->perm[i];
+      if (i < 4 ? e >= 4 : e < 4)
+	return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned e = d->perm[i];
+
+      /* Within each 128-bit lane, the elements of op0 are numbered
+	 from 0 and the elements of op1 are numbered from 4.  */
+      if (e >= 8 + 4)
+	e -= 8;
+      else if (e >= 4)
+	e -= 4;
+
+      rperm[i] = GEN_INT (e);
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
+  vperm = force_reg (V8SImode, vperm);
+  emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
+
+  return true;
+}
+
+/* Return true if permutation D can be performed as VMODE permutation
+   instead.  */
+
+static bool
+valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
+{
+  unsigned int i, j, chunk;
+
+  if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
+      || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
+      || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
+    return false;
+
+  if (GET_MODE_NUNITS (vmode) >= d->nelt)
+    return true;
+
+  chunk = d->nelt / GET_MODE_NUNITS (vmode);
+  for (i = 0; i < d->nelt; i += chunk)
+    if (d->perm[i] & (chunk - 1))
+      return false;
+    else
+      for (j = 1; j < chunk; ++j)
+	if (d->perm[i] + j != d->perm[i + j])
+	  return false;
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128.  */
+
+static bool
+expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt, eltsz, mask;
+  unsigned char perm[32];
+  enum machine_mode vmode = V16QImode;
+  rtx rperm[32], vperm, target, op0, op1;
+
+  nelt = d->nelt;
+
+  if (!d->one_operand_p)
+    {
+      if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
+	{
+	  if (TARGET_AVX2
+	      && valid_perm_using_mode_p (V2TImode, d))
+	    {
+	      if (d->testing_p)
+		return true;
+
+	      /* Use vperm2i128 insn.  The pattern uses
+		 V4DImode instead of V2TImode.  */
+	      target = d->target;
+	      if (d->vmode != V4DImode)
+		target = gen_reg_rtx (V4DImode);
+	      op0 = gen_lowpart (V4DImode, d->op0);
+	      op1 = gen_lowpart (V4DImode, d->op1);
+	      rperm[0]
+		= GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
+			   || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
+	      emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
+	      if (target != d->target)
+		emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+	      return true;
+	    }
+	  return false;
+	}
+    }
+  else
+    {
+      if (GET_MODE_SIZE (d->vmode) == 16)
+	{
+	  if (!TARGET_SSSE3)
+	    return false;
+	}
+      else if (GET_MODE_SIZE (d->vmode) == 32)
+	{
+	  if (!TARGET_AVX2)
+	    return false;
+
+	  /* V4DImode should be already handled through
+	     expand_vselect by vpermq instruction.  */
+	  gcc_assert (d->vmode != V4DImode);
+
+	  vmode = V32QImode;
+	  if (d->vmode == V8SImode
+	      || d->vmode == V16HImode
+	      || d->vmode == V32QImode)
+	    {
+	      /* First see if vpermq can be used for
+		 V8SImode/V16HImode/V32QImode.  */
+	      if (valid_perm_using_mode_p (V4DImode, d))
+		{
+		  for (i = 0; i < 4; i++)
+		    perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
+		  if (d->testing_p)
+		    return true;
+		  target = gen_reg_rtx (V4DImode);
+		  if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
+				      perm, 4, false))
+		    {
+		      emit_move_insn (d->target,
+				      gen_lowpart (d->vmode, target));
+		      return true;
+		    }
+		  return false;
+		}
+
+	      /* Next see if vpermd can be used.  */
+	      if (valid_perm_using_mode_p (V8SImode, d))
+		vmode = V8SImode;
+	    }
+	  /* Or if vpermps can be used.  */
+	  else if (d->vmode == V8SFmode)
+	    vmode = V8SImode;
+
+	  if (vmode == V32QImode)
+	    {
+	      /* vpshufb only works intra lanes, it is not
+		 possible to shuffle bytes in between the lanes.  */
+	      for (i = 0; i < nelt; ++i)
+		if ((d->perm[i] ^ i) & (nelt / 2))
+		  return false;
+	    }
+	}
+      else
+	return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  if (vmode == V8SImode)
+    for (i = 0; i < 8; ++i)
+      rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
+  else
+    {
+      eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+      if (!d->one_operand_p)
+	mask = 2 * nelt - 1;
+      else if (vmode == V16QImode)
+	mask = nelt - 1;
+      else
+	mask = nelt / 2 - 1;
+
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned j, e = d->perm[i] & mask;
+	  for (j = 0; j < eltsz; ++j)
+	    rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
+	}
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (vmode,
+				gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
+  vperm = force_reg (vmode, vperm);
+
+  target = d->target;
+  if (d->vmode != vmode)
+    target = gen_reg_rtx (vmode);
+  op0 = gen_lowpart (vmode, d->op0);
+  if (d->one_operand_p)
+    {
+      if (vmode == V16QImode)
+	emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
+      else if (vmode == V32QImode)
+	emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+      else if (vmode == V8SFmode)
+	emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
+      else
+	emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
+    }
+  else
+    {
+      op1 = gen_lowpart (vmode, d->op1);
+      emit_insn (gen_xop_pperm (target, op0, op1, vperm));
+    }
+  if (target != d->target)
+    emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
+   in a single instruction.  */
+
+static bool
+expand_vec_perm_1 (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+  unsigned char perm2[MAX_VECT_LEN];
+
+  /* Check plain VEC_SELECT first, because AVX has instructions that could
+     match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
+     input where SEL+CONCAT may not.  */
+  if (d->one_operand_p)
+    {
+      int mask = nelt - 1;
+      bool identity_perm = true;
+      bool broadcast_perm = true;
+
+      for (i = 0; i < nelt; i++)
+	{
+	  perm2[i] = d->perm[i] & mask;
+	  if (perm2[i] != i)
+	    identity_perm = false;
+	  if (perm2[i])
+	    broadcast_perm = false;
+	}
+
+      if (identity_perm)
+	{
+	  if (!d->testing_p)
+	    emit_move_insn (d->target, d->op0);
+	  return true;
+	}
+      else if (broadcast_perm && TARGET_AVX2)
+	{
+	  /* Use vpbroadcast{b,w,d}.  */
+	  rtx (*gen) (rtx, rtx) = NULL;
+	  switch (d->vmode)
+	    {
+	    case V32QImode:
+	      gen = gen_avx2_pbroadcastv32qi_1;
+	      break;
+	    case V16HImode:
+	      gen = gen_avx2_pbroadcastv16hi_1;
+	      break;
+	    case V8SImode:
+	      gen = gen_avx2_pbroadcastv8si_1;
+	      break;
+	    case V16QImode:
+	      gen = gen_avx2_pbroadcastv16qi;
+	      break;
+	    case V8HImode:
+	      gen = gen_avx2_pbroadcastv8hi;
+	      break;
+	    case V8SFmode:
+	      gen = gen_avx2_vec_dupv8sf_1;
+	      break;
+	    /* For other modes prefer other shuffles this function creates.  */
+	    default: break;
+	    }
+	  if (gen != NULL)
+	    {
+	      if (!d->testing_p)
+		emit_insn (gen (d->target, d->op0));
+	      return true;
+	    }
+	}
+
+      if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
+	return true;
+
+      /* There are plenty of patterns in sse.md that are written for
+	 SEL+CONCAT and are not replicated for a single op.  Perhaps
+	 that should be changed, to avoid the nastiness here.  */
+
+      /* Recognize interleave style patterns, which means incrementing
+	 every other permutation operand.  */
+      for (i = 0; i < nelt; i += 2)
+	{
+	  perm2[i] = d->perm[i] & mask;
+	  perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
+	}
+      if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+				  d->testing_p))
+	return true;
+
+      /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
+      if (nelt >= 4)
+	{
+	  for (i = 0; i < nelt; i += 4)
+	    {
+	      perm2[i + 0] = d->perm[i + 0] & mask;
+	      perm2[i + 1] = d->perm[i + 1] & mask;
+	      perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
+	      perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
+	    }
+
+	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+				      d->testing_p))
+	    return true;
+	}
+    }
+
+  /* Finally, try the fully general two operand permute.  */
+  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
+			      d->testing_p))
+    return true;
+
+  /* Recognize interleave style patterns with reversed operands.  */
+  if (!d->one_operand_p)
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned e = d->perm[i];
+	  if (e >= nelt)
+	    e -= nelt;
+	  else
+	    e += nelt;
+	  perm2[i] = e;
+	}
+
+      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
+				  d->testing_p))
+	return true;
+    }
+
+  /* Try the SSE4.1 blend variable merge instructions.  */
+  if (expand_vec_perm_blend (d))
+    return true;
+
+  /* Try one of the AVX vpermil variable permutations.  */
+  if (expand_vec_perm_vpermil (d))
+    return true;
+
+  /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
+     vpshufb, vpermd, vpermps or vpermq variable permutation.  */
+  if (expand_vec_perm_pshufb (d))
+    return true;
+
+  /* Try the AVX512F vpermi2 instructions.  */
+  rtx vec[64];
+  enum machine_mode mode = d->vmode;
+  if (mode == V8DFmode)
+    mode = V8DImode;
+  else if (mode == V16SFmode)
+    mode = V16SImode;
+  for (i = 0; i < nelt; ++i)
+    vec[i] = GEN_INT (d->perm[i]);
+  rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
+  if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
+    return true;
+
+  return false;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of a pair of pshuflw + pshufhw instructions.  */
+
+static bool
+expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
+{
+  unsigned char perm2[MAX_VECT_LEN];
+  unsigned i;
+  bool ok;
+
+  if (d->vmode != V8HImode || !d->one_operand_p)
+    return false;
+
+  /* The two permutations only operate in 64-bit lanes.  */
+  for (i = 0; i < 4; ++i)
+    if (d->perm[i] >= 4)
+      return false;
+  for (i = 4; i < 8; ++i)
+    if (d->perm[i] < 4)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* Emit the pshuflw.  */
+  memcpy (perm2, d->perm, 4);
+  for (i = 4; i < 8; ++i)
+    perm2[i] = i;
+  ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
+  gcc_assert (ok);
+
+  /* Emit the pshufhw.  */
+  memcpy (perm2 + 4, d->perm + 4, 4);
+  for (i = 0; i < 4; ++i)
+    perm2[i] = i;
+  ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
+  gcc_assert (ok);
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   the permutation using the SSSE3 palignr instruction.  This succeeds
+   when all of the elements in PERM fit within one vector and we merely
+   need to shift them down so that a single vector permutation has a
+   chance to succeed.  */
+
+static bool
+expand_vec_perm_palignr (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+  unsigned min, max;
+  bool in_order, ok;
+  rtx shift, target;
+  struct expand_vec_perm_d dcopy;
+
+  /* Even with AVX, palignr only operates on 128-bit vectors.  */
+  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+    return false;
+
+  min = nelt, max = 0;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = d->perm[i];
+      if (e < min)
+	min = e;
+      if (e > max)
+	max = e;
+    }
+  if (min == 0 || max - min >= nelt)
+    return false;
+
+  /* Given that we have SSSE3, we know we'll be able to implement the
+     single operand permutation after the palignr with pshufb.  */
+  if (d->testing_p)
+    return true;
+
+  dcopy = *d;
+  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+  target = gen_reg_rtx (TImode);
+  emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
+				  gen_lowpart (TImode, d->op0), shift));
+
+  dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
+  dcopy.one_operand_p = true;
+
+  in_order = true;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = dcopy.perm[i] - min;
+      if (e != i)
+	in_order = false;
+      dcopy.perm[i] = e;
+    }
+
+  /* Test for the degenerate case where the alignment by itself
+     produces the desired permutation.  */
+  if (in_order)
+    {
+      emit_move_insn (d->target, dcopy.op0);
+      return true;
+    }
+
+  ok = expand_vec_perm_1 (&dcopy);
+  gcc_assert (ok);
+
+  return ok;
+}
+
+static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   a two vector permutation into a single vector permutation by using
+   an interleave operation to merge the vectors.  */
+
+static bool
+expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dremap, dfinal;
+  unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
+  unsigned HOST_WIDE_INT contents;
+  unsigned char remap[2 * MAX_VECT_LEN];
+  rtx seq;
+  bool ok, same_halves = false;
+
+  if (GET_MODE_SIZE (d->vmode) == 16)
+    {
+      if (d->one_operand_p)
+	return false;
+    }
+  else if (GET_MODE_SIZE (d->vmode) == 32)
+    {
+      if (!TARGET_AVX)
+	return false;
+      /* For 32-byte modes allow even d->one_operand_p.
+	 The lack of cross-lane shuffling in some instructions
+	 might prevent a single insn shuffle.  */
+      dfinal = *d;
+      dfinal.testing_p = true;
+      /* If expand_vec_perm_interleave3 can expand this into
+	 a 3 insn sequence, give up and let it be expanded as
+	 3 insn sequence.  While that is one insn longer,
+	 it doesn't need a memory operand and in the common
+	 case that both interleave low and high permutations
+	 with the same operands are adjacent needs 4 insns
+	 for both after CSE.  */
+      if (expand_vec_perm_interleave3 (&dfinal))
+	return false;
+    }
+  else
+    return false;
+
+  /* Examine from whence the elements come.  */
+  contents = 0;
+  for (i = 0; i < nelt; ++i)
+    contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
+
+  memset (remap, 0xff, sizeof (remap));
+  dremap = *d;
+
+  if (GET_MODE_SIZE (d->vmode) == 16)
+    {
+      unsigned HOST_WIDE_INT h1, h2, h3, h4;
+
+      /* Split the two input vectors into 4 halves.  */
+      h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
+      h2 = h1 << nelt2;
+      h3 = h2 << nelt2;
+      h4 = h3 << nelt2;
+
+      /* If the elements from the low halves use interleave low, and similarly
+	 for interleave high.  If the elements are from mis-matched halves, we
+	 can use shufps for V4SF/V4SI or do a DImode shuffle.  */
+      if ((contents & (h1 | h3)) == contents)
+	{
+	  /* punpckl* */
+	  for (i = 0; i < nelt2; ++i)
+	    {
+	      remap[i] = i * 2;
+	      remap[i + nelt] = i * 2 + 1;
+	      dremap.perm[i * 2] = i;
+	      dremap.perm[i * 2 + 1] = i + nelt;
+	    }
+	  if (!TARGET_SSE2 && d->vmode == V4SImode)
+	    dremap.vmode = V4SFmode;
+	}
+      else if ((contents & (h2 | h4)) == contents)
+	{
+	  /* punpckh* */
+	  for (i = 0; i < nelt2; ++i)
+	    {
+	      remap[i + nelt2] = i * 2;
+	      remap[i + nelt + nelt2] = i * 2 + 1;
+	      dremap.perm[i * 2] = i + nelt2;
+	      dremap.perm[i * 2 + 1] = i + nelt + nelt2;
+	    }
+	  if (!TARGET_SSE2 && d->vmode == V4SImode)
+	    dremap.vmode = V4SFmode;
+	}
+      else if ((contents & (h1 | h4)) == contents)
+	{
+	  /* shufps */
+	  for (i = 0; i < nelt2; ++i)
+	    {
+	      remap[i] = i;
+	      remap[i + nelt + nelt2] = i + nelt2;
+	      dremap.perm[i] = i;
+	      dremap.perm[i + nelt2] = i + nelt + nelt2;
+	    }
+	  if (nelt != 4)
+	    {
+	      /* shufpd */
+	      dremap.vmode = V2DImode;
+	      dremap.nelt = 2;
+	      dremap.perm[0] = 0;
+	      dremap.perm[1] = 3;
+	    }
+	}
+      else if ((contents & (h2 | h3)) == contents)
+	{
+	  /* shufps */
+	  for (i = 0; i < nelt2; ++i)
+	    {
+	      remap[i + nelt2] = i;
+	      remap[i + nelt] = i + nelt2;
+	      dremap.perm[i] = i + nelt2;
+	      dremap.perm[i + nelt2] = i + nelt;
+	    }
+	  if (nelt != 4)
+	    {
+	      /* shufpd */
+	      dremap.vmode = V2DImode;
+	      dremap.nelt = 2;
+	      dremap.perm[0] = 1;
+	      dremap.perm[1] = 2;
+	    }
+	}
+      else
+	return false;
+    }
+  else
+    {
+      unsigned int nelt4 = nelt / 4, nzcnt = 0;
+      unsigned HOST_WIDE_INT q[8];
+      unsigned int nonzero_halves[4];
+
+      /* Split the two input vectors into 8 quarters.  */
+      q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
+      for (i = 1; i < 8; ++i)
+	q[i] = q[0] << (nelt4 * i);
+      for (i = 0; i < 4; ++i)
+	if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
+	  {
+	    nonzero_halves[nzcnt] = i;
+	    ++nzcnt;
+	  }
+
+      if (nzcnt == 1)
+	{
+	  gcc_assert (d->one_operand_p);
+	  nonzero_halves[1] = nonzero_halves[0];
+	  same_halves = true;
+	}
+      else if (d->one_operand_p)
+	{
+	  gcc_assert (nonzero_halves[0] == 0);
+	  gcc_assert (nonzero_halves[1] == 1);
+	}
+
+      if (nzcnt <= 2)
+	{
+	  if (d->perm[0] / nelt2 == nonzero_halves[1])
+	    {
+	      /* Attempt to increase the likelihood that dfinal
+		 shuffle will be intra-lane.  */
+	      char tmph = nonzero_halves[0];
+	      nonzero_halves[0] = nonzero_halves[1];
+	      nonzero_halves[1] = tmph;
+	    }
+
+	  /* vperm2f128 or vperm2i128.  */
+	  for (i = 0; i < nelt2; ++i)
+	    {
+	      remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
+	      remap[i + nonzero_halves[0] * nelt2] = i;
+	      dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
+	      dremap.perm[i] = i + nonzero_halves[0] * nelt2;
+	    }
+
+	  if (d->vmode != V8SFmode
+	      && d->vmode != V4DFmode
+	      && d->vmode != V8SImode)
+	    {
+	      dremap.vmode = V8SImode;
+	      dremap.nelt = 8;
+	      for (i = 0; i < 4; ++i)
+		{
+		  dremap.perm[i] = i + nonzero_halves[0] * 4;
+		  dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
+		}
+	    }
+	}
+      else if (d->one_operand_p)
+	return false;
+      else if (TARGET_AVX2
+	       && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
+	{
+	  /* vpunpckl* */
+	  for (i = 0; i < nelt4; ++i)
+	    {
+	      remap[i] = i * 2;
+	      remap[i + nelt] = i * 2 + 1;
+	      remap[i + nelt2] = i * 2 + nelt2;
+	      remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
+	      dremap.perm[i * 2] = i;
+	      dremap.perm[i * 2 + 1] = i + nelt;
+	      dremap.perm[i * 2 + nelt2] = i + nelt2;
+	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
+	    }
+	}
+      else if (TARGET_AVX2
+	       && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
+	{
+	  /* vpunpckh* */
+	  for (i = 0; i < nelt4; ++i)
+	    {
+	      remap[i + nelt4] = i * 2;
+	      remap[i + nelt + nelt4] = i * 2 + 1;
+	      remap[i + nelt2 + nelt4] = i * 2 + nelt2;
+	      remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
+	      dremap.perm[i * 2] = i + nelt4;
+	      dremap.perm[i * 2 + 1] = i + nelt + nelt4;
+	      dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
+	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
+	    }
+	}
+      else
+	return false;
+    }
+
+  /* Use the remapping array set up above to move the elements from their
+     swizzled locations into their final destinations.  */
+  dfinal = *d;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = remap[d->perm[i]];
+      gcc_assert (e < nelt);
+      /* If same_halves is true, both halves of the remapped vector are the
+	 same.  Avoid cross-lane accesses if possible.  */
+      if (same_halves && i >= nelt2)
+	{
+	  gcc_assert (e < nelt2);
+	  dfinal.perm[i] = e + nelt2;
+	}
+      else
+	dfinal.perm[i] = e;
+    }
+  if (!d->testing_p)
+    {
+      dremap.target = gen_reg_rtx (dremap.vmode);
+      dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
+    }
+  dfinal.op1 = dfinal.op0;
+  dfinal.one_operand_p = true;
+
+  /* Test if the final remap can be done with a single insn.  For V4SFmode or
+     V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
+  start_sequence ();
+  ok = expand_vec_perm_1 (&dfinal);
+  seq = get_insns ();
+  end_sequence ();
+
+  if (!ok)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  if (dremap.vmode != dfinal.vmode)
+    {
+      dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
+      dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
+    }
+
+  ok = expand_vec_perm_1 (&dremap);
+  gcc_assert (ok);
+
+  emit_insn (seq);
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   a single vector cross-lane permutation into vpermq followed
+   by any of the single insn permutations.  */
+
+static bool
+expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dremap, dfinal;
+  unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
+  unsigned contents[2];
+  bool ok;
+
+  if (!(TARGET_AVX2
+	&& (d->vmode == V32QImode || d->vmode == V16HImode)
+	&& d->one_operand_p))
+    return false;
+
+  contents[0] = 0;
+  contents[1] = 0;
+  for (i = 0; i < nelt2; ++i)
+    {
+      contents[0] |= 1u << (d->perm[i] / nelt4);
+      contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
+    }
+
+  for (i = 0; i < 2; ++i)
+    {
+      unsigned int cnt = 0;
+      for (j = 0; j < 4; ++j)
+	if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
+	  return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  dremap = *d;
+  dremap.vmode = V4DImode;
+  dremap.nelt = 4;
+  dremap.target = gen_reg_rtx (V4DImode);
+  dremap.op0 = gen_lowpart (V4DImode, d->op0);
+  dremap.op1 = dremap.op0;
+  dremap.one_operand_p = true;
+  for (i = 0; i < 2; ++i)
+    {
+      unsigned int cnt = 0;
+      for (j = 0; j < 4; ++j)
+	if ((contents[i] & (1u << j)) != 0)
+	  dremap.perm[2 * i + cnt++] = j;
+      for (; cnt < 2; ++cnt)
+	dremap.perm[2 * i + cnt] = 0;
+    }
+
+  dfinal = *d;
+  dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
+  dfinal.op1 = dfinal.op0;
+  dfinal.one_operand_p = true;
+  for (i = 0, j = 0; i < nelt; ++i)
+    {
+      if (i == nelt2)
+	j = 2;
+      dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
+      if ((d->perm[i] / nelt4) == dremap.perm[j])
+	;
+      else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
+	dfinal.perm[i] |= nelt4;
+      else
+	gcc_unreachable ();
+    }
+
+  ok = expand_vec_perm_1 (&dremap);
+  gcc_assert (ok);
+
+  ok = expand_vec_perm_1 (&dfinal);
+  gcc_assert (ok);
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to expand
+   a vector permutation using two instructions, vperm2f128 resp.
+   vperm2i128 followed by any single in-lane permutation.  */
+
+static bool
+expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dfirst, dsecond;
+  unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
+  bool ok;
+
+  if (!TARGET_AVX
+      || GET_MODE_SIZE (d->vmode) != 32
+      || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
+    return false;
+
+  dsecond = *d;
+  dsecond.one_operand_p = false;
+  dsecond.testing_p = true;
+
+  /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
+     immediate.  For perm < 16 the second permutation uses
+     d->op0 as first operand, for perm >= 16 it uses d->op1
+     as first operand.  The second operand is the result of
+     vperm2[fi]128.  */
+  for (perm = 0; perm < 32; perm++)
+    {
+      /* Ignore permutations which do not move anything cross-lane.  */
+      if (perm < 16)
+	{
+	  /* The second shuffle for e.g. V4DFmode has
+	     0123 and ABCD operands.
+	     Ignore AB23, as 23 is already in the second lane
+	     of the first operand.  */
+	  if ((perm & 0xc) == (1 << 2)) continue;
+	  /* And 01CD, as 01 is in the first lane of the first
+	     operand.  */
+	  if ((perm & 3) == 0) continue;
+	  /* And 4567, as then the vperm2[fi]128 doesn't change
+	     anything on the original 4567 second operand.  */
+	  if ((perm & 0xf) == ((3 << 2) | 2)) continue;
+	}
+      else
+	{
+	  /* The second shuffle for e.g. V4DFmode has
+	     4567 and ABCD operands.
+	     Ignore AB67, as 67 is already in the second lane
+	     of the first operand.  */
+	  if ((perm & 0xc) == (3 << 2)) continue;
+	  /* And 45CD, as 45 is in the first lane of the first
+	     operand.  */
+	  if ((perm & 3) == 2) continue;
+	  /* And 0123, as then the vperm2[fi]128 doesn't change
+	     anything on the original 0123 first operand.  */
+	  if ((perm & 0xf) == (1 << 2)) continue;
+	}
+
+      for (i = 0; i < nelt; i++)
+	{
+	  j = d->perm[i] / nelt2;
+	  if (j == ((perm >> (2 * (i >= nelt2))) & 3))
+	    dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
+	  else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
+	    dsecond.perm[i] = d->perm[i] & (nelt - 1);
+	  else
+	    break;
+	}
+
+      if (i == nelt)
+	{
+	  start_sequence ();
+	  ok = expand_vec_perm_1 (&dsecond);
+	  end_sequence ();
+	}
+      else
+	ok = false;
+
+      if (ok)
+	{
+	  if (d->testing_p)
+	    return true;
+
+	  /* Found a usable second shuffle.  dfirst will be
+	     vperm2f128 on d->op0 and d->op1.  */
+	  dsecond.testing_p = false;
+	  dfirst = *d;
+	  dfirst.target = gen_reg_rtx (d->vmode);
+	  for (i = 0; i < nelt; i++)
+	    dfirst.perm[i] = (i & (nelt2 - 1))
+			     + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
+
+	  ok = expand_vec_perm_1 (&dfirst);
+	  gcc_assert (ok);
+
+	  /* And dsecond is some single insn shuffle, taking
+	     d->op0 and result of vperm2f128 (if perm < 16) or
+	     d->op1 and result of vperm2f128 (otherwise).  */
+	  dsecond.op1 = dfirst.target;
+	  if (perm >= 16)
+	    dsecond.op0 = dfirst.op1;
+
+	  ok = expand_vec_perm_1 (&dsecond);
+	  gcc_assert (ok);
+
+	  return true;
+	}
+
+      /* For one operand, the only useful vperm2f128 permutation is 0x10.  */
+      if (d->one_operand_p)
+	return false;
+    }
+
+  return false;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   a two vector permutation using 2 intra-lane interleave insns
+   and cross-lane shuffle for 32-byte vectors.  */
+
+static bool
+expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt;
+  rtx (*gen) (rtx, rtx, rtx);
+
+  if (d->one_operand_p)
+    return false;
+  if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
+    ;
+  else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
+    ;
+  else
+    return false;
+
+  nelt = d->nelt;
+  if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
+    return false;
+  for (i = 0; i < nelt; i += 2)
+    if (d->perm[i] != d->perm[0] + i / 2
+	|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  switch (d->vmode)
+    {
+    case V32QImode:
+      if (d->perm[0])
+	gen = gen_vec_interleave_highv32qi;
+      else
+	gen = gen_vec_interleave_lowv32qi;
+      break;
+    case V16HImode:
+      if (d->perm[0])
+	gen = gen_vec_interleave_highv16hi;
+      else
+	gen = gen_vec_interleave_lowv16hi;
+      break;
+    case V8SImode:
+      if (d->perm[0])
+	gen = gen_vec_interleave_highv8si;
+      else
+	gen = gen_vec_interleave_lowv8si;
+      break;
+    case V4DImode:
+      if (d->perm[0])
+	gen = gen_vec_interleave_highv4di;
+      else
+	gen = gen_vec_interleave_lowv4di;
+      break;
+    case V8SFmode:
+      if (d->perm[0])
+	gen = gen_vec_interleave_highv8sf;
+      else
+	gen = gen_vec_interleave_lowv8sf;
+      break;
+    case V4DFmode:
+      if (d->perm[0])
+	gen = gen_vec_interleave_highv4df;
+      else
+	gen = gen_vec_interleave_lowv4df;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (d->target, d->op0, d->op1));
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement
+   a single vector permutation using a single intra-lane vector
+   permutation, vperm2f128 swapping the lanes and vblend* insn blending
+   the non-swapped and swapped vectors together.  */
+
+static bool
+expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dfirst, dsecond;
+  unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
+  rtx seq;
+  bool ok;
+  rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
+
+  if (!TARGET_AVX
+      || TARGET_AVX2
+      || (d->vmode != V8SFmode && d->vmode != V4DFmode)
+      || !d->one_operand_p)
+    return false;
+
+  dfirst = *d;
+  for (i = 0; i < nelt; i++)
+    dfirst.perm[i] = 0xff;
+  for (i = 0, msk = 0; i < nelt; i++)
+    {
+      j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
+      if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
+	return false;
+      dfirst.perm[j] = d->perm[i];
+      if (j != i)
+	msk |= (1 << i);
+    }
+  for (i = 0; i < nelt; i++)
+    if (dfirst.perm[i] == 0xff)
+      dfirst.perm[i] = i;
+
+  if (!d->testing_p)
+    dfirst.target = gen_reg_rtx (dfirst.vmode);
+
+  start_sequence ();
+  ok = expand_vec_perm_1 (&dfirst);
+  seq = get_insns ();
+  end_sequence ();
+
+  if (!ok)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  emit_insn (seq);
+
+  dsecond = *d;
+  dsecond.op0 = dfirst.target;
+  dsecond.op1 = dfirst.target;
+  dsecond.one_operand_p = true;
+  dsecond.target = gen_reg_rtx (dsecond.vmode);
+  for (i = 0; i < nelt; i++)
+    dsecond.perm[i] = i ^ nelt2;
+
+  ok = expand_vec_perm_1 (&dsecond);
+  gcc_assert (ok);
+
+  blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
+  emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement a V4DF
+   permutation using two vperm2f128, followed by a vshufpd insn blending
+   the two vectors together.  */
+
+static bool
+expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dfirst, dsecond, dthird;
+  bool ok;
+
+  if (!TARGET_AVX || (d->vmode != V4DFmode))
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  dfirst = *d;
+  dsecond = *d;
+  dthird = *d;
+
+  dfirst.perm[0] = (d->perm[0] & ~1);
+  dfirst.perm[1] = (d->perm[0] & ~1) + 1;
+  dfirst.perm[2] = (d->perm[2] & ~1);
+  dfirst.perm[3] = (d->perm[2] & ~1) + 1;
+  dsecond.perm[0] = (d->perm[1] & ~1);
+  dsecond.perm[1] = (d->perm[1] & ~1) + 1;
+  dsecond.perm[2] = (d->perm[3] & ~1);
+  dsecond.perm[3] = (d->perm[3] & ~1) + 1;
+  dthird.perm[0] = (d->perm[0] % 2);
+  dthird.perm[1] = (d->perm[1] % 2) + 4;
+  dthird.perm[2] = (d->perm[2] % 2) + 2;
+  dthird.perm[3] = (d->perm[3] % 2) + 6;
+
+  dfirst.target = gen_reg_rtx (dfirst.vmode);
+  dsecond.target = gen_reg_rtx (dsecond.vmode);
+  dthird.op0 = dfirst.target;
+  dthird.op1 = dsecond.target;
+  dthird.one_operand_p = false;
+
+  canonicalize_perm (&dfirst);
+  canonicalize_perm (&dsecond);
+
+  ok = expand_vec_perm_1 (&dfirst)
+       && expand_vec_perm_1 (&dsecond)
+       && expand_vec_perm_1 (&dthird);
+
+  gcc_assert (ok);
+
+  return true;
+}
+
+/* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
+   permutation with two pshufb insns and an ior.  We should have already
+   failed all two instruction sequences.  */
+
+static bool
+expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
+{
+  rtx rperm[2][16], vperm, l, h, op, m128;
+  unsigned int i, nelt, eltsz;
+
+  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+    return false;
+  gcc_assert (!d->one_operand_p);
+
+  if (d->testing_p)
+    return true;
+
+  nelt = d->nelt;
+  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+
+  /* Generate two permutation masks.  If the required element is within
+     the given vector it is shuffled into the proper lane.  If the required
+     element is in the other vector, force a zero into the lane by setting
+     bit 7 in the permutation mask.  */
+  m128 = GEN_INT (-128);
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned j, e = d->perm[i];
+      unsigned which = (e >= nelt);
+      if (e >= nelt)
+	e -= nelt;
+
+      for (j = 0; j < eltsz; ++j)
+	{
+	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
+	  rperm[1-which][i*eltsz + j] = m128;
+	}
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
+  vperm = force_reg (V16QImode, vperm);
+
+  l = gen_reg_rtx (V16QImode);
+  op = gen_lowpart (V16QImode, d->op0);
+  emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
+
+  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
+  vperm = force_reg (V16QImode, vperm);
+
+  h = gen_reg_rtx (V16QImode);
+  op = gen_lowpart (V16QImode, d->op1);
+  emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
+
+  op = d->target;
+  if (d->vmode != V16QImode)
+    op = gen_reg_rtx (V16QImode);
+  emit_insn (gen_iorv16qi3 (op, l, h));
+  if (op != d->target)
+    emit_move_insn (d->target, gen_lowpart (d->vmode, op));
+
+  return true;
+}
+
+/* Implement arbitrary permutation of one V32QImode and V16QImode operand
+   with two vpshufb insns, vpermq and vpor.  We should have already failed
+   all two or three instruction sequences.  */
+
+static bool
+expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
+{
+  rtx rperm[2][32], vperm, l, h, hp, op, m128;
+  unsigned int i, nelt, eltsz;
+
+  if (!TARGET_AVX2
+      || !d->one_operand_p
+      || (d->vmode != V32QImode && d->vmode != V16HImode))
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  nelt = d->nelt;
+  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+
+  /* Generate two permutation masks.  If the required element is within
+     the same lane, it is shuffled in.  If the required element from the
+     other lane, force a zero by setting bit 7 in the permutation mask.
+     In the other mask the mask has non-negative elements if element
+     is requested from the other lane, but also moved to the other lane,
+     so that the result of vpshufb can have the two V2TImode halves
+     swapped.  */
+  m128 = GEN_INT (-128);
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned j, e = d->perm[i] & (nelt / 2 - 1);
+      unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
+
+      for (j = 0; j < eltsz; ++j)
+	{
+	  rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
+	  rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
+	}
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
+  vperm = force_reg (V32QImode, vperm);
+
+  h = gen_reg_rtx (V32QImode);
+  op = gen_lowpart (V32QImode, d->op0);
+  emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
+
+  /* Swap the 128-byte lanes of h into hp.  */
+  hp = gen_reg_rtx (V4DImode);
+  op = gen_lowpart (V4DImode, h);
+  emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
+				  const1_rtx));
+
+  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
+  vperm = force_reg (V32QImode, vperm);
+
+  l = gen_reg_rtx (V32QImode);
+  op = gen_lowpart (V32QImode, d->op0);
+  emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
+
+  op = d->target;
+  if (d->vmode != V32QImode)
+    op = gen_reg_rtx (V32QImode);
+  emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
+  if (op != d->target)
+    emit_move_insn (d->target, gen_lowpart (d->vmode, op));
+
+  return true;
+}
+
+/* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
+   and extract-odd permutations of two V32QImode and V16QImode operand
+   with two vpshufb insns, vpor and vpermq.  We should have already
+   failed all two or three instruction sequences.  */
+
+static bool
+expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
+{
+  rtx rperm[2][32], vperm, l, h, ior, op, m128;
+  unsigned int i, nelt, eltsz;
+
+  if (!TARGET_AVX2
+      || d->one_operand_p
+      || (d->vmode != V32QImode && d->vmode != V16HImode))
+    return false;
+
+  for (i = 0; i < d->nelt; ++i)
+    if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  nelt = d->nelt;
+  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+
+  /* Generate two permutation masks.  In the first permutation mask
+     the first quarter will contain indexes for the first half
+     of the op0, the second quarter will contain bit 7 set, third quarter
+     will contain indexes for the second half of the op0 and the
+     last quarter bit 7 set.  In the second permutation mask
+     the first quarter will contain bit 7 set, the second quarter
+     indexes for the first half of the op1, the third quarter bit 7 set
+     and last quarter indexes for the second half of the op1.
+     I.e. the first mask e.g. for V32QImode extract even will be:
+     0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
+     (all values masked with 0xf except for -128) and second mask
+     for extract even will be
+     -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe.  */
+  m128 = GEN_INT (-128);
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned j, e = d->perm[i] & (nelt / 2 - 1);
+      unsigned which = d->perm[i] >= nelt;
+      unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
+
+      for (j = 0; j < eltsz; ++j)
+	{
+	  rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
+	  rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
+	}
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
+  vperm = force_reg (V32QImode, vperm);
+
+  l = gen_reg_rtx (V32QImode);
+  op = gen_lowpart (V32QImode, d->op0);
+  emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
+
+  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
+  vperm = force_reg (V32QImode, vperm);
+
+  h = gen_reg_rtx (V32QImode);
+  op = gen_lowpart (V32QImode, d->op1);
+  emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
+
+  ior = gen_reg_rtx (V32QImode);
+  emit_insn (gen_iorv32qi3 (ior, l, h));
+
+  /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation.  */
+  op = gen_reg_rtx (V4DImode);
+  ior = gen_lowpart (V4DImode, ior);
+  emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
+				  const1_rtx, GEN_INT (3)));
+  emit_move_insn (d->target, gen_lowpart (d->vmode, op));
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
+   and extract-odd permutations.  */
+
+static bool
+expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
+{
+  rtx t1, t2, t3, t4, t5;
+
+  switch (d->vmode)
+    {
+    case V4DFmode:
+      if (d->testing_p)
+	break;
+      t1 = gen_reg_rtx (V4DFmode);
+      t2 = gen_reg_rtx (V4DFmode);
+
+      /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
+      emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
+      emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
+
+      /* Now an unpck[lh]pd will produce the result required.  */
+      if (odd)
+	t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
+      else
+	t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
+      emit_insn (t3);
+      break;
+
+    case V8SFmode:
+      {
+	int mask = odd ? 0xdd : 0x88;
+
+	if (d->testing_p)
+	  break;
+	t1 = gen_reg_rtx (V8SFmode);
+	t2 = gen_reg_rtx (V8SFmode);
+	t3 = gen_reg_rtx (V8SFmode);
+
+	/* Shuffle within the 128-bit lanes to produce:
+	   { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }.  */
+	emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
+				      GEN_INT (mask)));
+
+	/* Shuffle the lanes around to produce:
+	   { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }.  */
+	emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
+					    GEN_INT (0x3)));
+
+	/* Shuffle within the 128-bit lanes to produce:
+	   { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }.  */
+	emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
+
+	/* Shuffle within the 128-bit lanes to produce:
+	   { 8 a c e c e 8 a } | { 9 b d f d f 9 b }.  */
+	emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
+
+	/* Shuffle the lanes around to produce:
+	   { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }.  */
+	emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
+					    GEN_INT (0x20)));
+      }
+      break;
+
+    case V2DFmode:
+    case V4SFmode:
+    case V2DImode:
+    case V4SImode:
+      /* These are always directly implementable by expand_vec_perm_1.  */
+      gcc_unreachable ();
+
+    case V8HImode:
+      if (TARGET_SSSE3)
+	return expand_vec_perm_pshufb2 (d);
+      else
+	{
+	  if (d->testing_p)
+	    break;
+	  /* We need 2*log2(N)-1 operations to achieve odd/even
+	     with interleave. */
+	  t1 = gen_reg_rtx (V8HImode);
+	  t2 = gen_reg_rtx (V8HImode);
+	  emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
+	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
+	  if (odd)
+	    t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
+	  else
+	    t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
+	  emit_insn (t3);
+	}
+      break;
+
+    case V16QImode:
+      if (TARGET_SSSE3)
+	return expand_vec_perm_pshufb2 (d);
+      else
+	{
+	  if (d->testing_p)
+	    break;
+	  t1 = gen_reg_rtx (V16QImode);
+	  t2 = gen_reg_rtx (V16QImode);
+	  t3 = gen_reg_rtx (V16QImode);
+	  emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
+	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
+	  emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
+	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
+	  if (odd)
+	    t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
+	  else
+	    t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
+	  emit_insn (t3);
+	}
+      break;
+
+    case V16HImode:
+    case V32QImode:
+      return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
+
+    case V4DImode:
+      if (!TARGET_AVX2)
+	{
+	  struct expand_vec_perm_d d_copy = *d;
+	  d_copy.vmode = V4DFmode;
+	  if (d->testing_p)
+	    d_copy.target = gen_lowpart (V4DFmode, d->target);
+	  else
+	    d_copy.target = gen_reg_rtx (V4DFmode);
+	  d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
+	  d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
+	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
+	    {
+	      if (!d->testing_p)
+		emit_move_insn (d->target,
+				gen_lowpart (V4DImode, d_copy.target));
+	      return true;
+	    }
+	  return false;
+	}
+
+      if (d->testing_p)
+	break;
+
+      t1 = gen_reg_rtx (V4DImode);
+      t2 = gen_reg_rtx (V4DImode);
+
+      /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
+      emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
+      emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
+
+      /* Now an vpunpck[lh]qdq will produce the result required.  */
+      if (odd)
+	t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
+      else
+	t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
+      emit_insn (t3);
+      break;
+
+    case V8SImode:
+      if (!TARGET_AVX2)
+	{
+	  struct expand_vec_perm_d d_copy = *d;
+	  d_copy.vmode = V8SFmode;
+	  if (d->testing_p)
+	    d_copy.target = gen_lowpart (V8SFmode, d->target);
+	  else
+	    d_copy.target = gen_reg_rtx (V8SFmode);
+	  d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
+	  d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
+	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
+	    {
+	      if (!d->testing_p)
+		emit_move_insn (d->target,
+				gen_lowpart (V8SImode, d_copy.target));
+	      return true;
+	    }
+	  return false;
+	}
+
+      if (d->testing_p)
+	break;
+
+      t1 = gen_reg_rtx (V8SImode);
+      t2 = gen_reg_rtx (V8SImode);
+      t3 = gen_reg_rtx (V4DImode);
+      t4 = gen_reg_rtx (V4DImode);
+      t5 = gen_reg_rtx (V4DImode);
+
+      /* Shuffle the lanes around into
+	 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }.  */
+      emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
+				    gen_lowpart (V4DImode, d->op1),
+				    GEN_INT (0x20)));
+      emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
+				    gen_lowpart (V4DImode, d->op1),
+				    GEN_INT (0x31)));
+
+      /* Swap the 2nd and 3rd position in each lane into
+	 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }.  */
+      emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
+				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
+      emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
+				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
+
+      /* Now an vpunpck[lh]qdq will produce
+	 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }.  */
+      if (odd)
+	t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
+					   gen_lowpart (V4DImode, t2));
+      else
+	t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
+					  gen_lowpart (V4DImode, t2));
+      emit_insn (t3);
+      emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
+   extract-even and extract-odd permutations.  */
+
+static bool
+expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
+{
+  unsigned i, odd, nelt = d->nelt;
+
+  odd = d->perm[0];
+  if (odd != 0 && odd != 1)
+    return false;
+
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != 2 * i + odd)
+      return false;
+
+  return expand_vec_perm_even_odd_1 (d, odd);
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
+   permutations.  We assume that expand_vec_perm_1 has already failed.  */
+
+static bool
+expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
+{
+  unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
+  enum machine_mode vmode = d->vmode;
+  unsigned char perm2[4];
+  rtx op0 = d->op0, dest;
+  bool ok;
+
+  switch (vmode)
+    {
+    case V4DFmode:
+    case V8SFmode:
+      /* These are special-cased in sse.md so that we can optionally
+	 use the vbroadcast instruction.  They expand to two insns
+	 if the input happens to be in a register.  */
+      gcc_unreachable ();
+
+    case V2DFmode:
+    case V2DImode:
+    case V4SFmode:
+    case V4SImode:
+      /* These are always implementable using standard shuffle patterns.  */
+      gcc_unreachable ();
+
+    case V8HImode:
+    case V16QImode:
+      /* These can be implemented via interleave.  We save one insn by
+	 stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+	return true;
+      do
+	{
+	  rtx dest;
+	  rtx (*gen) (rtx, rtx, rtx)
+	    = vmode == V16QImode ? gen_vec_interleave_lowv16qi
+				 : gen_vec_interleave_lowv8hi;
+
+	  if (elt >= nelt2)
+	    {
+	      gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
+				       : gen_vec_interleave_highv8hi;
+	      elt -= nelt2;
+	    }
+	  nelt2 /= 2;
+
+	  dest = gen_reg_rtx (vmode);
+	  emit_insn (gen (dest, op0, op0));
+	  vmode = get_mode_wider_vector (vmode);
+	  op0 = gen_lowpart (vmode, dest);
+	}
+      while (vmode != V4SImode);
+
+      memset (perm2, elt, 4);
+      dest = gen_reg_rtx (V4SImode);
+      ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
+      gcc_assert (ok);
+      if (!d->testing_p)
+	emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
+      return true;
+
+    case V32QImode:
+    case V16HImode:
+    case V8SImode:
+    case V4DImode:
+      /* For AVX2 broadcasts of the first element vpbroadcast* or
+	 vpermq should be used by expand_vec_perm_1.  */
+      gcc_assert (!TARGET_AVX2 || d->perm[0]);
+      return false;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
+   broadcast permutations.  */
+
+static bool
+expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt, nelt = d->nelt;
+
+  if (!d->one_operand_p)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  return expand_vec_perm_broadcast_1 (d);
+}
+
+/* Implement arbitrary permutation of two V32QImode and V16QImode operands
+   with 4 vpshufb insns, 2 vpermq and 3 vpor.  We should have already failed
+   all the shorter instruction sequences.  */
+
+static bool
+expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
+{
+  rtx rperm[4][32], vperm, l[2], h[2], op, m128;
+  unsigned int i, nelt, eltsz;
+  bool used[4];
+
+  if (!TARGET_AVX2
+      || d->one_operand_p
+      || (d->vmode != V32QImode && d->vmode != V16HImode))
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  nelt = d->nelt;
+  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+
+  /* Generate 4 permutation masks.  If the required element is within
+     the same lane, it is shuffled in.  If the required element from the
+     other lane, force a zero by setting bit 7 in the permutation mask.
+     In the other mask the mask has non-negative elements if element
+     is requested from the other lane, but also moved to the other lane,
+     so that the result of vpshufb can have the two V2TImode halves
+     swapped.  */
+  m128 = GEN_INT (-128);
+  for (i = 0; i < 32; ++i)
+    {
+      rperm[0][i] = m128;
+      rperm[1][i] = m128;
+      rperm[2][i] = m128;
+      rperm[3][i] = m128;
+    }
+  used[0] = false;
+  used[1] = false;
+  used[2] = false;
+  used[3] = false;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned j, e = d->perm[i] & (nelt / 2 - 1);
+      unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
+      unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
+
+      for (j = 0; j < eltsz; ++j)
+	rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
+      used[which] = true;
+    }
+
+  for (i = 0; i < 2; ++i)
+    {
+      if (!used[2 * i + 1])
+	{
+	  h[i] = NULL_RTX;
+	  continue;
+	}
+      vperm = gen_rtx_CONST_VECTOR (V32QImode,
+				    gen_rtvec_v (32, rperm[2 * i + 1]));
+      vperm = force_reg (V32QImode, vperm);
+      h[i] = gen_reg_rtx (V32QImode);
+      op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
+      emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
+    }
+
+  /* Swap the 128-byte lanes of h[X].  */
+  for (i = 0; i < 2; ++i)
+   {
+     if (h[i] == NULL_RTX)
+       continue;
+     op = gen_reg_rtx (V4DImode);
+     emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
+				     const2_rtx, GEN_INT (3), const0_rtx,
+				     const1_rtx));
+     h[i] = gen_lowpart (V32QImode, op);
+   }
+
+  for (i = 0; i < 2; ++i)
+    {
+      if (!used[2 * i])
+	{
+	  l[i] = NULL_RTX;
+	  continue;
+	}
+      vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
+      vperm = force_reg (V32QImode, vperm);
+      l[i] = gen_reg_rtx (V32QImode);
+      op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
+      emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
+    }
+
+  for (i = 0; i < 2; ++i)
+    {
+      if (h[i] && l[i])
+	{
+	  op = gen_reg_rtx (V32QImode);
+	  emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
+	  l[i] = op;
+	}
+      else if (h[i])
+	l[i] = h[i];
+    }
+
+  gcc_assert (l[0] && l[1]);
+  op = d->target;
+  if (d->vmode != V32QImode)
+    op = gen_reg_rtx (V32QImode);
+  emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
+  if (op != d->target)
+    emit_move_insn (d->target, gen_lowpart (d->vmode, op));
+  return true;
+}
+
+/* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
+   With all of the interface bits taken care of, perform the expansion
+   in D and return true on success.  */
+
+static bool
+ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  /* Try a single instruction expansion.  */
+  if (expand_vec_perm_1 (d))
+    return true;
+
+  /* Try sequences of two instructions.  */
+
+  if (expand_vec_perm_pshuflw_pshufhw (d))
+    return true;
+
+  if (expand_vec_perm_palignr (d))
+    return true;
+
+  if (expand_vec_perm_interleave2 (d))
+    return true;
+
+  if (expand_vec_perm_broadcast (d))
+    return true;
+
+  if (expand_vec_perm_vpermq_perm_1 (d))
+    return true;
+
+  if (expand_vec_perm_vperm2f128 (d))
+    return true;
+
+  /* Try sequences of three instructions.  */
+
+  if (expand_vec_perm_2vperm2f128_vshuf (d))
+    return true;
+
+  if (expand_vec_perm_pshufb2 (d))
+    return true;
+
+  if (expand_vec_perm_interleave3 (d))
+    return true;
+
+  if (expand_vec_perm_vperm2f128_vblend (d))
+    return true;
+
+  /* Try sequences of four instructions.  */
+
+  if (expand_vec_perm_vpshufb2_vpermq (d))
+    return true;
+
+  if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
+    return true;
+
+  /* ??? Look for narrow permutations whose element orderings would
+     allow the promotion to a wider mode.  */
+
+  /* ??? Look for sequences of interleave or a wider permute that place
+     the data into the correct lanes for a half-vector shuffle like
+     pshuf[lh]w or vpermilps.  */
+
+  /* ??? Look for sequences of interleave that produce the desired results.
+     The combinatorics of punpck[lh] get pretty ugly... */
+
+  if (expand_vec_perm_even_odd (d))
+    return true;
+
+  /* Even longer sequences.  */
+  if (expand_vec_perm_vpshufb4_vpermq2 (d))
+    return true;
+
+  return false;
+}
+
+/* If a permutation only uses one operand, make it clear. Returns true
+   if the permutation references both operands.  */
+
+static bool
+canonicalize_perm (struct expand_vec_perm_d *d)
+{
+  int i, which, nelt = d->nelt;
+
+  for (i = which = 0; i < nelt; ++i)
+      which |= (d->perm[i] < nelt ? 1 : 2);
+
+  d->one_operand_p = true;
+  switch (which)
+    {
+    default:
+      gcc_unreachable();
+
+    case 3:
+      if (!rtx_equal_p (d->op0, d->op1))
+        {
+	  d->one_operand_p = false;
+	  break;
+        }
+      /* The elements of PERM do not suggest that only the first operand
+	 is used, but both operands are identical.  Allow easier matching
+	 of the permutation by folding the permutation into the single
+	 input vector.  */
+      /* FALLTHRU */
+
+    case 2:
+      for (i = 0; i < nelt; ++i)
+        d->perm[i] &= nelt - 1;
+      d->op0 = d->op1;
+      break;
+
+    case 1:
+      d->op1 = d->op0;
+      break;
+    }
+
+  return (which == 3);
+}
+
+bool
+ix86_expand_vec_perm_const (rtx operands[4])
+{
+  struct expand_vec_perm_d d;
+  unsigned char perm[MAX_VECT_LEN];
+  int i, nelt;
+  bool two_args;
+  rtx sel;
+
+  d.target = operands[0];
+  d.op0 = operands[1];
+  d.op1 = operands[2];
+  sel = operands[3];
+
+  d.vmode = GET_MODE (d.target);
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  gcc_assert (GET_CODE (sel) == CONST_VECTOR);
+  gcc_assert (XVECLEN (sel, 0) == nelt);
+  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      int ei = INTVAL (e) & (2 * nelt - 1);
+      d.perm[i] = ei;
+      perm[i] = ei;
+    }
+
+  two_args = canonicalize_perm (&d);
+
+  if (ix86_expand_vec_perm_const_1 (&d))
+    return true;
+
+  /* If the selector says both arguments are needed, but the operands are the
+     same, the above tried to expand with one_operand_p and flattened selector.
+     If that didn't work, retry without one_operand_p; we succeeded with that
+     during testing.  */
+  if (two_args && d.one_operand_p)
+    {
+      d.one_operand_p = false;
+      memcpy (d.perm, perm, sizeof (perm));
+      return ix86_expand_vec_perm_const_1 (&d);
+    }
+
+  return false;
+}
+
+/* Implement targetm.vectorize.vec_perm_const_ok.  */
+
+static bool
+ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				  const unsigned char *sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned int i, nelt, which;
+  bool ret;
+
+  d.vmode = vmode;
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+
+  /* Given sufficient ISA support we can just return true here
+     for selected vector modes.  */
+  if (d.vmode == V16SImode || d.vmode == V16SFmode
+      || d.vmode == V8DFmode || d.vmode == V8DImode)
+    /* All implementable with a single vpermi2 insn.  */
+    return true;
+  if (GET_MODE_SIZE (d.vmode) == 16)
+    {
+      /* All implementable with a single vpperm insn.  */
+      if (TARGET_XOP)
+	return true;
+      /* All implementable with 2 pshufb + 1 ior.  */
+      if (TARGET_SSSE3)
+	return true;
+      /* All implementable with shufpd or unpck[lh]pd.  */
+      if (d.nelt == 2)
+	return true;
+    }
+
+  /* Extract the values from the vector CST into the permutation
+     array in D.  */
+  memcpy (d.perm, sel, nelt);
+  for (i = which = 0; i < nelt; ++i)
+    {
+      unsigned char e = d.perm[i];
+      gcc_assert (e < 2 * nelt);
+      which |= (e < nelt ? 1 : 2);
+    }
+
+  /* For all elements from second vector, fold the elements to first.  */
+  if (which == 2)
+    for (i = 0; i < nelt; ++i)
+      d.perm[i] -= nelt;
+
+  /* Check whether the mask can be applied to the vector type.  */
+  d.one_operand_p = (which != 3);
+
+  /* Implementable with shufps or pshufd.  */
+  if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
+    return true;
+
+  /* Otherwise we have to go through the motions and see if we can
+     figure out how to generate the requested permutation.  */
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!d.one_operand_p)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = ix86_expand_vec_perm_const_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+void
+ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
+{
+  struct expand_vec_perm_d d;
+  unsigned i, nelt;
+
+  d.target = targ;
+  d.op0 = op0;
+  d.op1 = op1;
+  d.vmode = GET_MODE (targ);
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  for (i = 0; i < nelt; ++i)
+    d.perm[i] = i * 2 + odd;
+
+  /* We'll either be able to implement the permutation directly...  */
+  if (expand_vec_perm_1 (&d))
+    return;
+
+  /* ... or we use the special-case patterns.  */
+  expand_vec_perm_even_odd_1 (&d, odd);
+}
+
+static void
+ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
+{
+  struct expand_vec_perm_d d;
+  unsigned i, nelt, base;
+  bool ok;
+
+  d.target = targ;
+  d.op0 = op0;
+  d.op1 = op1;
+  d.vmode = GET_MODE (targ);
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  base = high_p ? nelt / 2 : 0;
+  for (i = 0; i < nelt / 2; ++i)
+    {
+      d.perm[i * 2] = i + base;
+      d.perm[i * 2 + 1] = i + base + nelt;
+    }
+
+  /* Note that for AVX this isn't one instruction.  */
+  ok = ix86_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+
+/* Expand a vector operation CODE for a V*QImode in terms of the
+   same operation on V*HImode.  */
+
+void
+ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
+{
+  enum machine_mode qimode = GET_MODE (dest);
+  enum machine_mode himode;
+  rtx (*gen_il) (rtx, rtx, rtx);
+  rtx (*gen_ih) (rtx, rtx, rtx);
+  rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
+  struct expand_vec_perm_d d;
+  bool ok, full_interleave;
+  bool uns_p = false;
+  int i;
+
+  switch (qimode)
+    {
+    case V16QImode:
+      himode = V8HImode;
+      gen_il = gen_vec_interleave_lowv16qi;
+      gen_ih = gen_vec_interleave_highv16qi;
+      break;
+    case V32QImode:
+      himode = V16HImode;
+      gen_il = gen_avx2_interleave_lowv32qi;
+      gen_ih = gen_avx2_interleave_highv32qi;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  op2_l = op2_h = op2;
+  switch (code)
+    {
+    case MULT:
+      /* Unpack data such that we've got a source byte in each low byte of
+	 each word.  We don't care what goes into the high byte of each word.
+	 Rather than trying to get zero in there, most convenient is to let
+	 it be a copy of the low byte.  */
+      op2_l = gen_reg_rtx (qimode);
+      op2_h = gen_reg_rtx (qimode);
+      emit_insn (gen_il (op2_l, op2, op2));
+      emit_insn (gen_ih (op2_h, op2, op2));
+      /* FALLTHRU */
+
+      op1_l = gen_reg_rtx (qimode);
+      op1_h = gen_reg_rtx (qimode);
+      emit_insn (gen_il (op1_l, op1, op1));
+      emit_insn (gen_ih (op1_h, op1, op1));
+      full_interleave = qimode == V16QImode;
+      break;
+
+    case ASHIFT:
+    case LSHIFTRT:
+      uns_p = true;
+      /* FALLTHRU */
+    case ASHIFTRT:
+      op1_l = gen_reg_rtx (himode);
+      op1_h = gen_reg_rtx (himode);
+      ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
+      ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
+      full_interleave = true;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Perform the operation.  */
+  res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
+			       1, OPTAB_DIRECT);
+  res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
+			       1, OPTAB_DIRECT);
+  gcc_assert (res_l && res_h);
+
+  /* Merge the data back into the right place.  */
+  d.target = dest;
+  d.op0 = gen_lowpart (qimode, res_l);
+  d.op1 = gen_lowpart (qimode, res_h);
+  d.vmode = qimode;
+  d.nelt = GET_MODE_NUNITS (qimode);
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  if (full_interleave)
+    {
+      /* For SSE2, we used an full interleave, so the desired
+	 results are in the even elements.  */
+      for (i = 0; i < 32; ++i)
+	d.perm[i] = i * 2;
+    }
+  else
+    {
+      /* For AVX, the interleave used above was not cross-lane.  So the
+	 extraction is evens but with the second and third quarter swapped.
+	 Happily, that is even one insn shorter than even extraction.  */
+      for (i = 0; i < 32; ++i)
+	d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
+    }
+
+  ok = ix86_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+
+  set_unique_reg_note (get_last_insn (), REG_EQUAL,
+		       gen_rtx_fmt_ee (code, qimode, op1, op2));
+}
+
+/* Helper function of ix86_expand_mul_widen_evenodd.  Return true
+   if op is CONST_VECTOR with all odd elements equal to their
+   preceding element.  */
+
+static bool
+const_vector_equal_evenodd_p (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+  int i, nunits = GET_MODE_NUNITS (mode);
+  if (GET_CODE (op) != CONST_VECTOR
+      || nunits != CONST_VECTOR_NUNITS (op))
+    return false;
+  for (i = 0; i < nunits; i += 2)
+    if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
+      return false;
+  return true;
+}
+
+void
+ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
+			       bool uns_p, bool odd_p)
+{
+  enum machine_mode mode = GET_MODE (op1);
+  enum machine_mode wmode = GET_MODE (dest);
+  rtx x;
+  rtx orig_op1 = op1, orig_op2 = op2;
+
+  if (!nonimmediate_operand (op1, mode))
+    op1 = force_reg (mode, op1);
+  if (!nonimmediate_operand (op2, mode))
+    op2 = force_reg (mode, op2);
+
+  /* We only play even/odd games with vectors of SImode.  */
+  gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
+
+  /* If we're looking for the odd results, shift those members down to
+     the even slots.  For some cpus this is faster than a PSHUFD.  */
+  if (odd_p)
+    {
+      /* For XOP use vpmacsdqh, but only for smult, as it is only
+	 signed.  */
+      if (TARGET_XOP && mode == V4SImode && !uns_p)
+	{
+	  x = force_reg (wmode, CONST0_RTX (wmode));
+	  emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
+	  return;
+	}
+
+      x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
+      if (!const_vector_equal_evenodd_p (orig_op1))
+	op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
+			    x, NULL, 1, OPTAB_DIRECT);
+      if (!const_vector_equal_evenodd_p (orig_op2))
+	op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
+			    x, NULL, 1, OPTAB_DIRECT);
+      op1 = gen_lowpart (mode, op1);
+      op2 = gen_lowpart (mode, op2);
+    }
+
+  if (mode == V16SImode)
+    {
+      if (uns_p)
+	x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
+      else
+	x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
+    }
+  else if (mode == V8SImode)
+    {
+      if (uns_p)
+	x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
+      else
+	x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
+    }
+  else if (uns_p)
+    x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
+  else if (TARGET_SSE4_1)
+    x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
+  else
+    {
+      rtx s1, s2, t0, t1, t2;
+
+      /* The easiest way to implement this without PMULDQ is to go through
+	 the motions as if we are performing a full 64-bit multiply.  With
+	 the exception that we need to do less shuffling of the elements.  */
+
+      /* Compute the sign-extension, aka highparts, of the two operands.  */
+      s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
+				op1, pc_rtx, pc_rtx);
+      s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
+				op2, pc_rtx, pc_rtx);
+
+      /* Multiply LO(A) * HI(B), and vice-versa.  */
+      t1 = gen_reg_rtx (wmode);
+      t2 = gen_reg_rtx (wmode);
+      emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
+      emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
+
+      /* Multiply LO(A) * LO(B).  */
+      t0 = gen_reg_rtx (wmode);
+      emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
+
+      /* Combine and shift the highparts into place.  */
+      t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
+      t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
+			 1, OPTAB_DIRECT);
+
+      /* Combine high and low parts.  */
+      force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
+      return;
+    }
+  emit_insn (x);
+}
+
+void
+ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
+			    bool uns_p, bool high_p)
+{
+  enum machine_mode wmode = GET_MODE (dest);
+  enum machine_mode mode = GET_MODE (op1);
+  rtx t1, t2, t3, t4, mask;
+
+  switch (mode)
+    {
+    case V4SImode:
+      t1 = gen_reg_rtx (mode);
+      t2 = gen_reg_rtx (mode);
+      if (TARGET_XOP && !uns_p)
+	{
+	  /* With XOP, we have pmacsdqh, aka mul_widen_odd.  In this case,
+	     shuffle the elements once so that all elements are in the right
+	     place for immediate use: { A C B D }.  */
+	  emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
+					const1_rtx, GEN_INT (3)));
+	  emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
+					const1_rtx, GEN_INT (3)));
+	}
+      else
+	{
+	  /* Put the elements into place for the multiply.  */
+	  ix86_expand_vec_interleave (t1, op1, op1, high_p);
+	  ix86_expand_vec_interleave (t2, op2, op2, high_p);
+	  high_p = false;
+	}
+      ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
+      break;
+
+    case V8SImode:
+      /* Shuffle the elements between the lanes.  After this we
+	 have { A B E F | C D G H } for each operand.  */
+      t1 = gen_reg_rtx (V4DImode);
+      t2 = gen_reg_rtx (V4DImode);
+      emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
+				      const0_rtx, const2_rtx,
+				      const1_rtx, GEN_INT (3)));
+      emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
+				      const0_rtx, const2_rtx,
+				      const1_rtx, GEN_INT (3)));
+
+      /* Shuffle the elements within the lanes.  After this we
+	 have { A A B B | C C D D } or { E E F F | G G H H }.  */
+      t3 = gen_reg_rtx (V8SImode);
+      t4 = gen_reg_rtx (V8SImode);
+      mask = GEN_INT (high_p
+		      ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
+		      : 0 + (0 << 2) + (1 << 4) + (1 << 6));
+      emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
+      emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
+
+      ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
+      break;
+
+    case V8HImode:
+    case V16HImode:
+      t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
+			 uns_p, OPTAB_DIRECT);
+      t2 = expand_binop (mode,
+			 uns_p ? umul_highpart_optab : smul_highpart_optab,
+			 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
+      gcc_assert (t1 && t2);
+
+      t3 = gen_reg_rtx (mode);
+      ix86_expand_vec_interleave (t3, t1, t2, high_p);
+      emit_move_insn (dest, gen_lowpart (wmode, t3));
+      break;
+
+    case V16QImode:
+    case V32QImode:
+      t1 = gen_reg_rtx (wmode);
+      t2 = gen_reg_rtx (wmode);
+      ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
+      ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+void
+ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
+{
+  rtx res_1, res_2, res_3, res_4;
+
+  res_1 = gen_reg_rtx (V4SImode);
+  res_2 = gen_reg_rtx (V4SImode);
+  res_3 = gen_reg_rtx (V2DImode);
+  res_4 = gen_reg_rtx (V2DImode);
+  ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
+  ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
+
+  /* Move the results in element 2 down to element 1; we don't care
+     what goes in elements 2 and 3.  Then we can merge the parts
+     back together with an interleave.
+
+     Note that two other sequences were tried:
+     (1) Use interleaves at the start instead of psrldq, which allows
+     us to use a single shufps to merge things back at the end.
+     (2) Use shufps here to combine the two vectors, then pshufd to
+     put the elements in the correct order.
+     In both cases the cost of the reformatting stall was too high
+     and the overall sequence slower.  */
+
+  emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
+				const0_rtx, const2_rtx,
+				const0_rtx, const0_rtx));
+  emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
+				const0_rtx, const2_rtx,
+				const0_rtx, const0_rtx));
+  res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
+
+  set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
+}
+
+void
+ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx t1, t2, t3, t4, t5, t6;
+
+  if (TARGET_XOP && mode == V2DImode)
+    {
+      /* op1: A,B,C,D, op2: E,F,G,H */
+      op1 = gen_lowpart (V4SImode, op1);
+      op2 = gen_lowpart (V4SImode, op2);
+
+      t1 = gen_reg_rtx (V4SImode);
+      t2 = gen_reg_rtx (V4SImode);
+      t3 = gen_reg_rtx (V2DImode);
+      t4 = gen_reg_rtx (V2DImode);
+
+      /* t1: B,A,D,C */
+      emit_insn (gen_sse2_pshufd_1 (t1, op1,
+				    GEN_INT (1),
+				    GEN_INT (0),
+				    GEN_INT (3),
+				    GEN_INT (2)));
+
+      /* t2: (B*E),(A*F),(D*G),(C*H) */
+      emit_insn (gen_mulv4si3 (t2, t1, op2));
+
+      /* t3: (B*E)+(A*F), (D*G)+(C*H) */
+      emit_insn (gen_xop_phadddq (t3, t2));
+
+      /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
+      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
+
+      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
+      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
+    }
+  else
+    {
+      enum machine_mode nmode;
+      rtx (*umul) (rtx, rtx, rtx);
+
+      if (mode == V2DImode)
+	{
+	  umul = gen_vec_widen_umult_even_v4si;
+	  nmode = V4SImode;
+	}
+      else if (mode == V4DImode)
+	{
+	  umul = gen_vec_widen_umult_even_v8si;
+	  nmode = V8SImode;
+	}
+      else if (mode == V8DImode)
+	{
+	  umul = gen_vec_widen_umult_even_v16si;
+	  nmode = V16SImode;
+	}
+      else
+	gcc_unreachable ();
+
+
+      /* Multiply low parts.  */
+      t1 = gen_reg_rtx (mode);
+      emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
+
+      /* Shift input vectors right 32 bits so we can multiply high parts.  */
+      t6 = GEN_INT (32);
+      t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
+      t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
+
+      /* Multiply high parts by low parts.  */
+      t4 = gen_reg_rtx (mode);
+      t5 = gen_reg_rtx (mode);
+      emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
+      emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
+
+      /* Combine and shift the highparts back.  */
+      t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
+      t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
+
+      /* Combine high and low parts.  */
+      force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
+    }
+
+  set_unique_reg_note (get_last_insn (), REG_EQUAL,
+		       gen_rtx_MULT (mode, op1, op2));
+}
+
+/* Calculate integer abs() using only SSE2 instructions.  */
+
+void
+ix86_expand_sse2_abs (rtx target, rtx input)
+{
+  enum machine_mode mode = GET_MODE (target);
+  rtx tmp0, tmp1, x;
+
+  switch (mode)
+    {
+      /* For 32-bit signed integer X, the best way to calculate the absolute
+	 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
+      case V4SImode:
+	tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
+				    GEN_INT (GET_MODE_BITSIZE
+					     (GET_MODE_INNER (mode)) - 1),
+				    NULL, 0, OPTAB_DIRECT);
+	tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
+				    NULL, 0, OPTAB_DIRECT);
+	x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
+				 target, 0, OPTAB_DIRECT);
+	break;
+
+      /* For 16-bit signed integer X, the best way to calculate the absolute
+	 value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
+      case V8HImode:
+	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
+
+	x = expand_simple_binop (mode, SMAX, tmp0, input,
+				 target, 0, OPTAB_DIRECT);
+	break;
+
+      /* For 8-bit signed integer X, the best way to calculate the absolute
+	 value of X is min ((unsigned char) X, (unsigned char) (-X)),
+	 as SSE2 provides the PMINUB insn.  */
+      case V16QImode:
+	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
+
+	x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
+				 target, 0, OPTAB_DIRECT);
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  if (x != target)
+    emit_move_insn (target, x);
+}
+
+/* Expand an insert into a vector register through pinsr insn.
+   Return true if successful.  */
+
+bool
+ix86_expand_pinsr (rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[3];
+
+  unsigned int size = INTVAL (operands[1]);
+  unsigned int pos = INTVAL (operands[2]);
+
+  if (GET_CODE (dst) == SUBREG)
+    {
+      pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
+      dst = SUBREG_REG (dst);
+    }
+
+  if (GET_CODE (src) == SUBREG)
+    src = SUBREG_REG (src);
+
+  switch (GET_MODE (dst))
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+      {
+	enum machine_mode srcmode, dstmode;
+	rtx (*pinsr)(rtx, rtx, rtx, rtx);
+
+	srcmode = mode_for_size (size, MODE_INT, 0);
+
+	switch (srcmode)
+	  {
+	  case QImode:
+	    if (!TARGET_SSE4_1)
+	      return false;
+	    dstmode = V16QImode;
+	    pinsr = gen_sse4_1_pinsrb;
+	    break;
+
+	  case HImode:
+	    if (!TARGET_SSE2)
+	      return false;
+	    dstmode = V8HImode;
+	    pinsr = gen_sse2_pinsrw;
+	    break;
+
+	  case SImode:
+	    if (!TARGET_SSE4_1)
+	      return false;
+	    dstmode = V4SImode;
+	    pinsr = gen_sse4_1_pinsrd;
+	    break;
+
+	  case DImode:
+	    gcc_assert (TARGET_64BIT);
+	    if (!TARGET_SSE4_1)
+	      return false;
+	    dstmode = V2DImode;
+	    pinsr = gen_sse4_1_pinsrq;
+	    break;
+
+	  default:
+	    return false;
+	  }
+
+	rtx d = dst;
+	if (GET_MODE (dst) != dstmode)
+	  d = gen_reg_rtx (dstmode);
+	src = gen_lowpart (srcmode, src);
+
+	pos /= size;
+
+	emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
+			  GEN_INT (1 << pos)));
+	if (d != dst)
+	  emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
+	return true;
+      }
+
+    default:
+      return false;
+    }
+}
+
+/* This function returns the calling abi specific va_list type node.
+   It returns  the FNDECL specific va_list type.  */
+
+static tree
+ix86_fn_abi_va_list (tree fndecl)
+{
+  if (!TARGET_64BIT)
+    return va_list_type_node;
+  gcc_assert (fndecl != NULL_TREE);
+
+  if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
+    return ms_va_list_type_node;
+  else
+    return sysv_va_list_type_node;
+}
+
+/* Returns the canonical va_list type specified by TYPE. If there
+   is no valid TYPE provided, it return NULL_TREE.  */
+
+static tree
+ix86_canonical_va_list_type (tree type)
+{
+  tree wtype, htype;
+
+  /* Resolve references and pointers to va_list type.  */
+  if (TREE_CODE (type) == MEM_REF)
+    type = TREE_TYPE (type);
+  else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
+    type = TREE_TYPE (type);
+  else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
+    type = TREE_TYPE (type);
+
+  if (TARGET_64BIT && va_list_type_node != NULL_TREE)
+    {
+      wtype = va_list_type_node;
+	  gcc_assert (wtype != NULL_TREE);
+      htype = type;
+      if (TREE_CODE (wtype) == ARRAY_TYPE)
+	{
+	  /* If va_list is an array type, the argument may have decayed
+	     to a pointer type, e.g. by being passed to another function.
+	     In that case, unwrap both types so that we can compare the
+	     underlying records.  */
+	  if (TREE_CODE (htype) == ARRAY_TYPE
+	      || POINTER_TYPE_P (htype))
+	    {
+	      wtype = TREE_TYPE (wtype);
+	      htype = TREE_TYPE (htype);
+	    }
+	}
+      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+	return va_list_type_node;
+      wtype = sysv_va_list_type_node;
+	  gcc_assert (wtype != NULL_TREE);
+      htype = type;
+      if (TREE_CODE (wtype) == ARRAY_TYPE)
+	{
+	  /* If va_list is an array type, the argument may have decayed
+	     to a pointer type, e.g. by being passed to another function.
+	     In that case, unwrap both types so that we can compare the
+	     underlying records.  */
+	  if (TREE_CODE (htype) == ARRAY_TYPE
+	      || POINTER_TYPE_P (htype))
+	    {
+	      wtype = TREE_TYPE (wtype);
+	      htype = TREE_TYPE (htype);
+	    }
+	}
+      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+	return sysv_va_list_type_node;
+      wtype = ms_va_list_type_node;
+	  gcc_assert (wtype != NULL_TREE);
+      htype = type;
+      if (TREE_CODE (wtype) == ARRAY_TYPE)
+	{
+	  /* If va_list is an array type, the argument may have decayed
+	     to a pointer type, e.g. by being passed to another function.
+	     In that case, unwrap both types so that we can compare the
+	     underlying records.  */
+	  if (TREE_CODE (htype) == ARRAY_TYPE
+	      || POINTER_TYPE_P (htype))
+	    {
+	      wtype = TREE_TYPE (wtype);
+	      htype = TREE_TYPE (htype);
+	    }
+	}
+      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+	return ms_va_list_type_node;
+      return NULL_TREE;
+    }
+  return std_canonical_va_list_type (type);
+}
+
+/* Iterate through the target-specific builtin types for va_list.
+   IDX denotes the iterator, *PTREE is set to the result type of
+   the va_list builtin, and *PNAME to its internal type.
+   Returns zero if there is no element for this index, otherwise
+   IDX should be increased upon the next call.
+   Note, do not iterate a base builtin's name like __builtin_va_list.
+   Used from c_common_nodes_and_builtins.  */
+
+static int
+ix86_enum_va_list (int idx, const char **pname, tree *ptree)
+{
+  if (TARGET_64BIT)
+    {
+      switch (idx)
+	{
+	default:
+	  break;
+
+	case 0:
+	  *ptree = ms_va_list_type_node;
+	  *pname = "__builtin_ms_va_list";
+	  return 1;
+
+	case 1:
+	  *ptree = sysv_va_list_type_node;
+	  *pname = "__builtin_sysv_va_list";
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
+#undef TARGET_SCHED_DISPATCH
+#define TARGET_SCHED_DISPATCH has_dispatch
+#undef TARGET_SCHED_DISPATCH_DO
+#define TARGET_SCHED_DISPATCH_DO do_dispatch
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER ix86_sched_reorder
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
+#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
+#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
+  ix86_dependencies_evaluation_hook
+
+/* The size of the dispatch window is the total number of bytes of
+   object code allowed in a window.  */
+#define DISPATCH_WINDOW_SIZE 16
+
+/* Number of dispatch windows considered for scheduling.  */
+#define MAX_DISPATCH_WINDOWS 3
+
+/* Maximum number of instructions in a window.  */
+#define MAX_INSN 4
+
+/* Maximum number of immediate operands in a window.  */
+#define MAX_IMM 4
+
+/* Maximum number of immediate bits allowed in a window.  */
+#define MAX_IMM_SIZE 128
+
+/* Maximum number of 32 bit immediates allowed in a window.  */
+#define MAX_IMM_32 4
+
+/* Maximum number of 64 bit immediates allowed in a window.  */
+#define MAX_IMM_64 2
+
+/* Maximum total of loads or prefetches allowed in a window.  */
+#define MAX_LOAD 2
+
+/* Maximum total of stores allowed in a window.  */
+#define MAX_STORE 1
+
+#undef BIG
+#define BIG 100
+
+
+/* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
+enum dispatch_group {
+  disp_no_group = 0,
+  disp_load,
+  disp_store,
+  disp_load_store,
+  disp_prefetch,
+  disp_imm,
+  disp_imm_32,
+  disp_imm_64,
+  disp_branch,
+  disp_cmp,
+  disp_jcc,
+  disp_last
+};
+
+/* Number of allowable groups in a dispatch window.  It is an array
+   indexed by dispatch_group enum.  100 is used as a big number,
+   because the number of these kind of operations does not have any
+   effect in dispatch window, but we need them for other reasons in
+   the table.  */
+static unsigned int num_allowable_groups[disp_last] = {
+  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};
+
+char group_name[disp_last + 1][16] = {
+  "disp_no_group", "disp_load", "disp_store", "disp_load_store",
+  "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
+  "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
+};
+
+/* Instruction path.  */
+enum insn_path {
+  no_path = 0,
+  path_single, /* Single micro op.  */
+  path_double, /* Double micro op.  */
+  path_multi,  /* Instructions with more than 2 micro op..  */
+  last_path
+};
+
+/* sched_insn_info defines a window to the instructions scheduled in
+   the basic block.  It contains a pointer to the insn_info table and
+   the instruction scheduled.
+
+   Windows are allocated for each basic block and are linked
+   together.  */
+typedef struct sched_insn_info_s {
+  rtx insn;
+  enum dispatch_group group;
+  enum insn_path path;
+  int byte_len;
+  int imm_bytes;
+} sched_insn_info;
+
+/* Linked list of dispatch windows.  This is a two way list of
+   dispatch windows of a basic block.  It contains information about
+   the number of uops in the window and the total number of
+   instructions and of bytes in the object code for this dispatch
+   window.  */
+typedef struct dispatch_windows_s {
+  int num_insn;            /* Number of insn in the window.  */
+  int num_uops;            /* Number of uops in the window.  */
+  int window_size;         /* Number of bytes in the window.  */
+  int window_num;          /* Window number between 0 or 1.  */
+  int num_imm;             /* Number of immediates in an insn.  */
+  int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
+  int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
+  int imm_size;            /* Total immediates in the window.  */
+  int num_loads;           /* Total memory loads in the window.  */
+  int num_stores;          /* Total memory stores in the window.  */
+  int violation;          /* Violation exists in window.  */
+  sched_insn_info *window; /* Pointer to the window.  */
+  struct dispatch_windows_s *next;
+  struct dispatch_windows_s *prev;
+} dispatch_windows;
+
+/* Immediate valuse used in an insn.  */
+typedef struct imm_info_s
+  {
+    int imm;
+    int imm32;
+    int imm64;
+  } imm_info;
+
+static dispatch_windows *dispatch_window_list;
+static dispatch_windows *dispatch_window_list1;
+
+/* Get dispatch group of insn.  */
+
+static enum dispatch_group
+get_mem_group (rtx insn)
+{
+  enum attr_memory memory;
+
+  if (INSN_CODE (insn) < 0)
+    return disp_no_group;
+  memory = get_attr_memory (insn);
+  if (memory == MEMORY_STORE)
+    return disp_store;
+
+  if (memory == MEMORY_LOAD)
+    return disp_load;
+
+  if (memory == MEMORY_BOTH)
+    return disp_load_store;
+
+  return disp_no_group;
+}
+
+/* Return true if insn is a compare instruction.  */
+
+static bool
+is_cmp (rtx insn)
+{
+  enum attr_type type;
+
+  type = get_attr_type (insn);
+  return (type == TYPE_TEST
+	  || type == TYPE_ICMP
+	  || type == TYPE_FCMP
+	  || GET_CODE (PATTERN (insn)) == COMPARE);
+}
+
+/* Return true if a dispatch violation encountered.  */
+
+static bool
+dispatch_violation (void)
+{
+  if (dispatch_window_list->next)
+    return dispatch_window_list->next->violation;
+  return dispatch_window_list->violation;
+}
+
+/* Return true if insn is a branch instruction.  */
+
+static bool
+is_branch (rtx insn)
+{
+  return (CALL_P (insn) || JUMP_P (insn));
+}
+
+/* Return true if insn is a prefetch instruction.  */
+
+static bool
+is_prefetch (rtx insn)
+{
+  return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
+}
+
+/* This function initializes a dispatch window and the list container holding a
+   pointer to the window.  */
+
+static void
+init_window (int window_num)
+{
+  int i;
+  dispatch_windows *new_list;
+
+  if (window_num == 0)
+    new_list = dispatch_window_list;
+  else
+    new_list = dispatch_window_list1;
+
+  new_list->num_insn = 0;
+  new_list->num_uops = 0;
+  new_list->window_size = 0;
+  new_list->next = NULL;
+  new_list->prev = NULL;
+  new_list->window_num = window_num;
+  new_list->num_imm = 0;
+  new_list->num_imm_32 = 0;
+  new_list->num_imm_64 = 0;
+  new_list->imm_size = 0;
+  new_list->num_loads = 0;
+  new_list->num_stores = 0;
+  new_list->violation = false;
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      new_list->window[i].insn = NULL;
+      new_list->window[i].group = disp_no_group;
+      new_list->window[i].path = no_path;
+      new_list->window[i].byte_len = 0;
+      new_list->window[i].imm_bytes = 0;
+    }
+  return;
+}
+
+/* This function allocates and initializes a dispatch window and the
+   list container holding a pointer to the window.  */
+
+static dispatch_windows *
+allocate_window (void)
+{
+  dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
+  new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
+
+  return new_list;
+}
+
+/* This routine initializes the dispatch scheduling information.  It
+   initiates building dispatch scheduler tables and constructs the
+   first dispatch window.  */
+
+static void
+init_dispatch_sched (void)
+{
+  /* Allocate a dispatch list and a window.  */
+  dispatch_window_list = allocate_window ();
+  dispatch_window_list1 = allocate_window ();
+  init_window (0);
+  init_window (1);
+}
+
+/* This function returns true if a branch is detected.  End of a basic block
+   does not have to be a branch, but here we assume only branches end a
+   window.  */
+
+static bool
+is_end_basic_block (enum dispatch_group group)
+{
+  return group == disp_branch;
+}
+
+/* This function is called when the end of a window processing is reached.  */
+
+static void
+process_end_window (void)
+{
+  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+  if (dispatch_window_list->next)
+    {
+      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+      gcc_assert (dispatch_window_list->window_size
+		  + dispatch_window_list1->window_size <= 48);
+      init_window (1);
+    }
+  init_window (0);
+}
+
+/* Allocates a new dispatch window and adds it to WINDOW_LIST.
+   WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
+   for 48 bytes of instructions.  Note that these windows are not dispatch
+   windows that their sizes are DISPATCH_WINDOW_SIZE.  */
+
+static dispatch_windows *
+allocate_next_window (int window_num)
+{
+  if (window_num == 0)
+    {
+      if (dispatch_window_list->next)
+	  init_window (1);
+      init_window (0);
+      return dispatch_window_list;
+    }
+
+  dispatch_window_list->next = dispatch_window_list1;
+  dispatch_window_list1->prev = dispatch_window_list;
+
+  return dispatch_window_list1;
+}
+
+/* Increment the number of immediate operands of an instruction.  */
+
+static int
+find_constant_1 (rtx *in_rtx, imm_info *imm_values)
+{
+  if (*in_rtx == 0)
+    return 0;
+
+    switch ( GET_CODE (*in_rtx))
+    {
+    case CONST:
+    case SYMBOL_REF:
+    case CONST_INT:
+      (imm_values->imm)++;
+      if (x86_64_immediate_operand (*in_rtx, SImode))
+	(imm_values->imm32)++;
+      else
+	(imm_values->imm64)++;
+      break;
+
+    case CONST_DOUBLE:
+      (imm_values->imm)++;
+      (imm_values->imm64)++;
+      break;
+
+    case CODE_LABEL:
+      if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
+	{
+	  (imm_values->imm)++;
+	  (imm_values->imm32)++;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Compute number of immediate operands of an instruction.  */
+
+static void
+find_constant (rtx in_rtx, imm_info *imm_values)
+{
+  for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
+		(rtx_function) find_constant_1, (void *) imm_values);
+}
+
+/* Return total size of immediate operands of an instruction along with number
+   of corresponding immediate-operands.  It initializes its parameters to zero
+   befor calling FIND_CONSTANT.
+   INSN is the input instruction.  IMM is the total of immediates.
+   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
+   bit immediates.  */
+
+static int
+get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
+{
+  imm_info imm_values = {0, 0, 0};
+
+  find_constant (insn, &imm_values);
+  *imm = imm_values.imm;
+  *imm32 = imm_values.imm32;
+  *imm64 = imm_values.imm64;
+  return imm_values.imm32 * 4 + imm_values.imm64 * 8;
+}
+
+/* This function indicates if an operand of an instruction is an
+   immediate.  */
+
+static bool
+has_immediate (rtx insn)
+{
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (insn)
+    return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+			       &num_imm64_operand);
+  return false;
+}
+
+/* Return single or double path for instructions.  */
+
+static enum insn_path
+get_insn_path (rtx insn)
+{
+  enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
+
+  if ((int)path == 0)
+    return path_single;
+
+  if ((int)path == 1)
+    return path_double;
+
+  return path_multi;
+}
+
+/* Return insn dispatch group.  */
+
+static enum dispatch_group
+get_insn_group (rtx insn)
+{
+  enum dispatch_group group = get_mem_group (insn);
+  if (group)
+    return group;
+
+  if (is_branch (insn))
+    return disp_branch;
+
+  if (is_cmp (insn))
+    return disp_cmp;
+
+  if (has_immediate (insn))
+    return disp_imm;
+
+  if (is_prefetch (insn))
+    return disp_prefetch;
+
+  return disp_no_group;
+}
+
+/* Count number of GROUP restricted instructions in a dispatch
+   window WINDOW_LIST.  */
+
+static int
+count_num_restricted (rtx insn, dispatch_windows *window_list)
+{
+  enum dispatch_group group = get_insn_group (insn);
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (group == disp_no_group)
+    return 0;
+
+  if (group == disp_imm)
+    {
+      imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+			      &num_imm64_operand);
+      if (window_list->imm_size + imm_size > MAX_IMM_SIZE
+	  || num_imm_operand + window_list->num_imm > MAX_IMM
+	  || (num_imm32_operand > 0
+	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
+		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
+	  || (num_imm64_operand > 0
+	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
+		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
+	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
+	      && num_imm64_operand > 0
+	      && ((window_list->num_imm_64 > 0
+		   && window_list->num_insn >= 2)
+		  || window_list->num_insn >= 3)))
+	return BIG;
+
+      return 1;
+    }
+
+  if ((group == disp_load_store
+       && (window_list->num_loads >= MAX_LOAD
+	   || window_list->num_stores >= MAX_STORE))
+      || ((group == disp_load
+	   || group == disp_prefetch)
+	  && window_list->num_loads >= MAX_LOAD)
+      || (group == disp_store
+	  && window_list->num_stores >= MAX_STORE))
+    return BIG;
+
+  return 1;
+}
+
+/* This function returns true if insn satisfies dispatch rules on the
+   last window scheduled.  */
+
+static bool
+fits_dispatch_window (rtx insn)
+{
+  dispatch_windows *window_list = dispatch_window_list;
+  dispatch_windows *window_list_next = dispatch_window_list->next;
+  unsigned int num_restrict;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int sum;
+
+  /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
+     instructions should be given the lowest priority in the
+     scheduling process in Haifa scheduler to make sure they will be
+     scheduled in the same dispatch window as the reference to them.  */
+  if (group == disp_jcc || group == disp_cmp)
+    return false;
+
+  /* Check nonrestricted.  */
+  if (group == disp_no_group || group == disp_branch)
+    return true;
+
+  /* Get last dispatch window.  */
+  if (window_list_next)
+    window_list = window_list_next;
+
+  if (window_list->window_num == 1)
+    {
+      sum = window_list->prev->window_size + window_list->window_size;
+
+      if (sum == 32
+	  || (min_insn_size (insn) + sum) >= 48)
+	/* Window 1 is full.  Go for next window.  */
+	return true;
+    }
+
+  num_restrict = count_num_restricted (insn, window_list);
+
+  if (num_restrict > num_allowable_groups[group])
+    return false;
+
+  /* See if it fits in the first window.  */
+  if (window_list->window_num == 0)
+    {
+      /* The first widow should have only single and double path
+	 uops.  */
+      if (path == path_double
+	  && (window_list->num_uops + 2) > MAX_INSN)
+	return false;
+      else if (path != path_single)
+        return false;
+    }
+  return true;
+}
+
+/* Add an instruction INSN with NUM_UOPS micro-operations to the
+   dispatch window WINDOW_LIST.  */
+
+static void
+add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
+{
+  int byte_len = min_insn_size (insn);
+  int num_insn = window_list->num_insn;
+  int imm_size;
+  sched_insn_info *window = window_list->window;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (!window_list->violation && group != disp_cmp
+      && !fits_dispatch_window (insn))
+    window_list->violation = true;
+
+  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+				 &num_imm64_operand);
+
+  /* Initialize window with new instruction.  */
+  window[num_insn].insn = insn;
+  window[num_insn].byte_len = byte_len;
+  window[num_insn].group = group;
+  window[num_insn].path = path;
+  window[num_insn].imm_bytes = imm_size;
+
+  window_list->window_size += byte_len;
+  window_list->num_insn = num_insn + 1;
+  window_list->num_uops = window_list->num_uops + num_uops;
+  window_list->imm_size += imm_size;
+  window_list->num_imm += num_imm_operand;
+  window_list->num_imm_32 += num_imm32_operand;
+  window_list->num_imm_64 += num_imm64_operand;
+
+  if (group == disp_store)
+    window_list->num_stores += 1;
+  else if (group == disp_load
+	   || group == disp_prefetch)
+    window_list->num_loads += 1;
+  else if (group == disp_load_store)
+    {
+      window_list->num_stores += 1;
+      window_list->num_loads += 1;
+    }
+}
+
+/* Adds a scheduled instruction, INSN, to the current dispatch window.
+   If the total bytes of instructions or the number of instructions in
+   the window exceed allowable, it allocates a new window.  */
+
+static void
+add_to_dispatch_window (rtx insn)
+{
+  int byte_len;
+  dispatch_windows *window_list;
+  dispatch_windows *next_list;
+  dispatch_windows *window0_list;
+  enum insn_path path;
+  enum dispatch_group insn_group;
+  bool insn_fits;
+  int num_insn;
+  int num_uops;
+  int window_num;
+  int insn_num_uops;
+  int sum;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = min_insn_size (insn);
+  window_list = dispatch_window_list;
+  next_list = window_list->next;
+  path = get_insn_path (insn);
+  insn_group = get_insn_group (insn);
+
+  /* Get the last dispatch window.  */
+  if (next_list)
+      window_list = dispatch_window_list->next;
+
+  if (path == path_single)
+    insn_num_uops = 1;
+  else if (path == path_double)
+    insn_num_uops = 2;
+  else
+    insn_num_uops = (int) path;
+
+  /* If current window is full, get a new window.
+     Window number zero is full, if MAX_INSN uops are scheduled in it.
+     Window number one is full, if window zero's bytes plus window
+     one's bytes is 32, or if the bytes of the new instruction added
+     to the total makes it greater than 48, or it has already MAX_INSN
+     instructions in it.  */
+  num_insn = window_list->num_insn;
+  num_uops = window_list->num_uops;
+  window_num = window_list->window_num;
+  insn_fits = fits_dispatch_window (insn);
+
+  if (num_insn >= MAX_INSN
+      || num_uops + insn_num_uops > MAX_INSN
+      || !(insn_fits))
+    {
+      window_num = ~window_num & 1;
+      window_list = allocate_next_window (window_num);
+    }
+
+  if (window_num == 0)
+    {
+      add_insn_window (insn, window_list, insn_num_uops);
+      if (window_list->num_insn >= MAX_INSN
+	  && insn_group == disp_branch)
+	{
+	  process_end_window ();
+	  return;
+	}
+    }
+  else if (window_num == 1)
+    {
+      window0_list = window_list->prev;
+      sum = window0_list->window_size + window_list->window_size;
+      if (sum == 32
+	  || (byte_len + sum) >= 48)
+	{
+	  process_end_window ();
+	  window_list = dispatch_window_list;
+	}
+
+      add_insn_window (insn, window_list, insn_num_uops);
+    }
+  else
+    gcc_unreachable ();
+
+  if (is_end_basic_block (insn_group))
+    {
+      /* End of basic block is reached do end-basic-block process.  */
+      process_end_window ();
+      return;
+    }
+}
+
+/* Print the dispatch window, WINDOW_NUM, to FILE.  */
+
+DEBUG_FUNCTION static void
+debug_dispatch_window_file (FILE *file, int window_num)
+{
+  dispatch_windows *list;
+  int i;
+
+  if (window_num == 0)
+    list = dispatch_window_list;
+  else
+    list = dispatch_window_list1;
+
+  fprintf (file, "Window #%d:\n", list->window_num);
+  fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
+	  list->num_insn, list->num_uops, list->window_size);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
+
+  fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
+	  list->num_stores);
+  fprintf (file, " insn info:\n");
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      if (!list->window[i].insn)
+	break;
+      fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
+	      i, group_name[list->window[i].group],
+	      i, (void *)list->window[i].insn,
+	      i, list->window[i].path,
+	      i, list->window[i].byte_len,
+	      i, list->window[i].imm_bytes);
+    }
+}
+
+/* Print to stdout a dispatch window.  */
+
+DEBUG_FUNCTION void
+debug_dispatch_window (int window_num)
+{
+  debug_dispatch_window_file (stdout, window_num);
+}
+
+/* Print INSN dispatch information to FILE.  */
+
+DEBUG_FUNCTION static void
+debug_insn_dispatch_info_file (FILE *file, rtx insn)
+{
+  int byte_len;
+  enum insn_path path;
+  enum dispatch_group group;
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = min_insn_size (insn);
+  path = get_insn_path (insn);
+  group = get_insn_group (insn);
+  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+				 &num_imm64_operand);
+
+  fprintf (file, " insn info:\n");
+  fprintf (file, "  group = %s, path = %d, byte_len = %d\n",
+	   group_name[group], path, byte_len);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
+}
+
+/* Print to STDERR the status of the ready list with respect to
+   dispatch windows.  */
+
+DEBUG_FUNCTION void
+debug_ready_dispatch (void)
+{
+  int i;
+  int no_ready = number_in_ready ();
+
+  fprintf (stdout, "Number of ready: %d\n", no_ready);
+
+  for (i = 0; i < no_ready; i++)
+    debug_insn_dispatch_info_file (stdout, get_ready_element (i));
+}
+
+/* This routine is the driver of the dispatch scheduler.  */
+
+static void
+do_dispatch (rtx insn, int mode)
+{
+  if (mode == DISPATCH_INIT)
+    init_dispatch_sched ();
+  else if (mode == ADD_TO_DISPATCH_WINDOW)
+    add_to_dispatch_window (insn);
+}
+
+/* Return TRUE if Dispatch Scheduling is supported.  */
+
+static bool
+has_dispatch (rtx insn, int action)
+{
+  if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
+      && flag_dispatch_scheduler)
+    switch (action)
+      {
+      default:
+	return false;
+
+      case IS_DISPATCH_ON:
+	return true;
+	break;
+
+      case IS_CMP:
+	return is_cmp (insn);
+
+      case DISPATCH_VIOLATION:
+	return dispatch_violation ();
+
+      case FITS_DISPATCH_WINDOW:
+	return fits_dispatch_window (insn);
+      }
+
+  return false;
+}
+
+/* Implementation of reassociation_width target hook used by
+   reassoc phase to identify parallelism level in reassociated
+   tree.  Statements tree_code is passed in OPC.  Arguments type
+   is passed in MODE.
+
+   Currently parallel reassociation is enabled for Atom
+   processors only and we set reassociation width to be 2
+   because Atom may issue up to 2 instructions per cycle.
+
+   Return value should be fixed if parallel reassociation is
+   enabled for other processors.  */
+
+static int
+ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+			  enum machine_mode mode)
+{
+  int res = 1;
+
+  if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
+    res = 2;
+  else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
+    res = 2;
+
+  return res;
+}
+
+/* ??? No autovectorization into MMX or 3DNOW until we can reliably
+   place emms and femms instructions.  */
+
+static enum machine_mode
+ix86_preferred_simd_mode (enum machine_mode mode)
+{
+  if (!TARGET_SSE)
+    return word_mode;
+
+  switch (mode)
+    {
+    case QImode:
+      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
+    case HImode:
+      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
+    case SImode:
+      return TARGET_AVX512F ? V16SImode :
+	(TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
+    case DImode:
+      return TARGET_AVX512F ? V8DImode :
+	(TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
+
+    case SFmode:
+      if (TARGET_AVX512F)
+	return V16SFmode;
+      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
+	return V8SFmode;
+      else
+	return V4SFmode;
+
+    case DFmode:
+      if (!TARGET_VECTORIZE_DOUBLE)
+	return word_mode;
+      else if (TARGET_AVX512F)
+	return V8DFmode;
+      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
+	return V4DFmode;
+      else if (TARGET_SSE2)
+	return V2DFmode;
+      /* FALLTHRU */
+
+    default:
+      return word_mode;
+    }
+}
+
+/* If AVX is enabled then try vectorizing with both 256bit and 128bit
+   vectors.  If AVX512F is enabled then try vectorizing with 512bit,
+   256bit and 128bit vectors.  */
+
+static unsigned int
+ix86_autovectorize_vector_sizes (void)
+{
+  return TARGET_AVX512F ? 64 | 32 | 16 :
+    (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
+}
+
+
+
+/* Return class of registers which could be used for pseudo of MODE
+   and of class RCLASS for spilling instead of memory.  Return NO_REGS
+   if it is not possible or non-profitable.  */
+static reg_class_t
+ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
+{
+  if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
+      && (mode == SImode || (TARGET_64BIT && mode == DImode))
+      && INTEGER_CLASS_P (rclass))
+    return ALL_SSE_REGS;
+  return NO_REGS;
+}
+
+/* Implement targetm.vectorize.init_cost.  */
+
+static void *
+ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
+{
+  unsigned *cost = XNEWVEC (unsigned, 3);
+  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  return cost;
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+
+static unsigned
+ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		    struct _stmt_vec_info *stmt_info, int misalign,
+		    enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+  int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+
+  /* Statements in an inner loop relative to the loop being
+     vectorized are weighted more heavily.  The value here is
+      arbitrary and could potentially be improved with analysis.  */
+  if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+    count *= 50;  /* FIXME.  */
+
+  retval = (unsigned) (count * stmt_cost);
+  cost[where] += retval;
+
+  return retval;
+}
+
+/* Implement targetm.vectorize.finish_cost.  */
+
+static void
+ix86_finish_cost (void *data, unsigned *prologue_cost,
+		  unsigned *body_cost, unsigned *epilogue_cost)
+{
+  unsigned *cost = (unsigned *) data;
+  *prologue_cost = cost[vect_prologue];
+  *body_cost     = cost[vect_body];
+  *epilogue_cost = cost[vect_epilogue];
+}
+
+/* Implement targetm.vectorize.destroy_cost_data.  */
+
+static void
+ix86_destroy_cost_data (void *data)
+{
+  free (data);
+}
+
+/* Validate target specific memory model bits in VAL. */
+
+static unsigned HOST_WIDE_INT
+ix86_memmodel_check (unsigned HOST_WIDE_INT val)
+{
+  unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
+  bool strong;
+
+  if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
+				      |MEMMODEL_MASK)
+      || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
+    {
+      warning (OPT_Winvalid_memory_model,
+	       "Unknown architecture specific memory model");
+      return MEMMODEL_SEQ_CST;
+    }
+  strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
+  if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
+    {
+      warning (OPT_Winvalid_memory_model,
+              "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
+      return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
+    }
+   if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
+    {
+      warning (OPT_Winvalid_memory_model,
+              "HLE_RELEASE not used with RELEASE or stronger memory model");
+      return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
+    }
+  return val;
+}
+
+/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
+   CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
+   CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
+   or number of vecsize_mangle variants that should be emitted.  */
+
+static int
+ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+					     struct cgraph_simd_clone *clonei,
+					     tree base_type, int num)
+{
+  int ret = 1;
+
+  if (clonei->simdlen
+      && (clonei->simdlen < 2
+	  || clonei->simdlen > 16
+	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+    {
+      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		  "unsupported simdlen %d", clonei->simdlen);
+      return 0;
+    }
+
+  tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+  if (TREE_CODE (ret_type) != VOID_TYPE)
+    switch (TYPE_MODE (ret_type))
+      {
+      case QImode:
+      case HImode:
+      case SImode:
+      case DImode:
+      case SFmode:
+      case DFmode:
+      /* case SCmode: */
+      /* case DCmode: */
+	break;
+      default:
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported return type %qT for simd\n", ret_type);
+	return 0;
+      }
+
+  tree t;
+  int i;
+
+  for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
+    /* FIXME: Shouldn't we allow such arguments if they are uniform?  */
+    switch (TYPE_MODE (TREE_TYPE (t)))
+      {
+      case QImode:
+      case HImode:
+      case SImode:
+      case DImode:
+      case SFmode:
+      case DFmode:
+      /* case SCmode: */
+      /* case DCmode: */
+	break;
+      default:
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported argument type %qT for simd\n", TREE_TYPE (t));
+	return 0;
+      }
+
+  if (clonei->cilk_elemental)
+    {
+      /* Parse here processor clause.  If not present, default to 'b'.  */
+      clonei->vecsize_mangle = 'b';
+    }
+  else if (!TREE_PUBLIC (node->decl))
+    {
+      /* If the function isn't exported, we can pick up just one ISA
+	 for the clones.  */
+      if (TARGET_AVX2)
+	clonei->vecsize_mangle = 'd';
+      else if (TARGET_AVX)
+	clonei->vecsize_mangle = 'c';
+      else
+	clonei->vecsize_mangle = 'b';
+      ret = 1;
+    }
+  else
+    {
+      clonei->vecsize_mangle = "bcd"[num];
+      ret = 3;
+    }
+  switch (clonei->vecsize_mangle)
+    {
+    case 'b':
+      clonei->vecsize_int = 128;
+      clonei->vecsize_float = 128;
+      break;
+    case 'c':
+      clonei->vecsize_int = 128;
+      clonei->vecsize_float = 256;
+      break;
+    case 'd':
+      clonei->vecsize_int = 256;
+      clonei->vecsize_float = 256;
+      break;
+    }
+  if (clonei->simdlen == 0)
+    {
+      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+	clonei->simdlen = clonei->vecsize_int;
+      else
+	clonei->simdlen = clonei->vecsize_float;
+      clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+      if (clonei->simdlen > 16)
+	clonei->simdlen = 16;
+    }
+  return ret;
+}
+
+/* Add target attribute to SIMD clone NODE if needed.  */
+
+static void
+ix86_simd_clone_adjust (struct cgraph_node *node)
+{
+  const char *str = NULL;
+  gcc_assert (node->decl == cfun->decl);
+  switch (node->simdclone->vecsize_mangle)
+    {
+    case 'b':
+      if (!TARGET_SSE2)
+	str = "sse2";
+      break;
+    case 'c':
+      if (!TARGET_AVX)
+	str = "avx";
+      break;
+    case 'd':
+      if (!TARGET_AVX2)
+	str = "avx2";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (str == NULL)
+    return;
+  push_cfun (NULL);
+  tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
+  bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
+  gcc_assert (ok);
+  pop_cfun ();
+  ix86_previous_fndecl = NULL_TREE;
+  ix86_set_current_function (node->decl);
+}
+
+/* If SIMD clone NODE can't be used in a vectorized loop
+   in current function, return -1, otherwise return a badness of using it
+   (0 if it is most desirable from vecsize_mangle point of view, 1
+   slightly less desirable, etc.).  */
+
+static int
+ix86_simd_clone_usable (struct cgraph_node *node)
+{
+  switch (node->simdclone->vecsize_mangle)
+    {
+    case 'b':
+      if (!TARGET_SSE2)
+	return -1;
+      if (!TARGET_AVX)
+	return 0;
+      return TARGET_AVX2 ? 2 : 1;
+    case 'c':
+      if (!TARGET_AVX)
+	return -1;
+      return TARGET_AVX2 ? 1 : 0;
+      break;
+    case 'd':
+      if (!TARGET_AVX2)
+	return -1;
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* This function gives out the number of memory references.
+   This value determines the unrolling factor for
+   bdver3 and bdver4 architectures. */
+
+static int
+ix86_loop_memcount (rtx *x, unsigned *mem_count)
+{
+  if (*x != NULL_RTX && MEM_P (*x))
+   {
+     enum machine_mode mode;
+     unsigned int n_words;
+
+     mode = GET_MODE (*x);
+     n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+
+    if (n_words > 4)
+       (*mem_count)+=2;
+    else
+       (*mem_count)+=1;
+   }
+  return 0;
+}
+
+/* This function adjusts the unroll factor based on
+   the hardware capabilities. For ex, bdver3 has
+   a loop buffer which makes unrolling of smaller
+   loops less important. This function decides the
+   unroll factor using number of memory references
+   (value 32 is used) as a heuristic. */
+
+static unsigned
+ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+  basic_block *bbs;
+  rtx insn;
+  unsigned i;
+  unsigned mem_count = 0;
+
+  if (!TARGET_ADJUST_UNROLL)
+     return nunroll;
+
+  /* Count the number of memory references within the loop body.  */
+  bbs = get_loop_body (loop);
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
+        if (NONDEBUG_INSN_P (insn))
+            for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
+    }
+  free (bbs);
+
+  if (mem_count && mem_count <=32)
+    return 32/mem_count;
+
+  return nunroll;
+}
+
+
+/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
+
+static bool
+ix86_float_exceptions_rounding_supported_p (void)
+{
+  /* For x87 floating point with standard excess precision handling,
+     there is no adddf3 pattern (since x87 floating point only has
+     XFmode operations) so the default hook implementation gets this
+     wrong.  */
+  return TARGET_80387 || TARGET_SSE_MATH;
+}
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  if (!TARGET_80387 && !TARGET_SSE_MATH)
+    return;
+  tree exceptions_var = create_tmp_var (integer_type_node, NULL);
+  if (TARGET_80387)
+    {
+      tree fenv_index_type = build_index_type (size_int (6));
+      tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
+      tree fenv_var = create_tmp_var (fenv_type, NULL);
+      mark_addressable (fenv_var);
+      tree fenv_ptr = build_pointer_type (fenv_type);
+      tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
+      fenv_addr = fold_convert (ptr_type_node, fenv_addr);
+      tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
+      tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
+      tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
+      tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
+      tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
+      tree hold_fnclex = build_call_expr (fnclex, 0);
+      *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
+		      hold_fnclex);
+      *clear = build_call_expr (fnclex, 0);
+      tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
+      mark_addressable (sw_var);
+      tree su_ptr = build_pointer_type (short_unsigned_type_node);
+      tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
+      tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
+      tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
+      tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
+				exceptions_var, exceptions_x87);
+      *update = build2 (COMPOUND_EXPR, integer_type_node,
+			fnstsw_call, update_mod);
+      tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
+      *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
+    }
+  if (TARGET_SSE_MATH)
+    {
+      tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
+      tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
+      tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
+      tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
+      tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
+      tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
+				      mxcsr_orig_var, stmxcsr_hold_call);
+      tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
+				  mxcsr_orig_var,
+				  build_int_cst (unsigned_type_node, 0x1f80));
+      hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
+			     build_int_cst (unsigned_type_node, 0xffffffc0));
+      tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
+				     mxcsr_mod_var, hold_mod_val);
+      tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
+      tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
+			      hold_assign_orig, hold_assign_mod);
+      hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
+			 ldmxcsr_hold_call);
+      if (*hold)
+	*hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
+      else
+	*hold = hold_all;
+      tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
+      if (*clear)
+	*clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
+			 ldmxcsr_clear_call);
+      else
+	*clear = ldmxcsr_clear_call;
+      tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
+      tree exceptions_sse = fold_convert (integer_type_node,
+					  stxmcsr_update_call);
+      if (*update)
+	{
+	  tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
+					exceptions_var, exceptions_sse);
+	  tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
+					   exceptions_var, exceptions_mod);
+	  *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
+			    exceptions_assign);
+	}
+      else
+	*update = build2 (MODIFY_EXPR, integer_type_node,
+			  exceptions_var, exceptions_sse);
+      tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
+      *update = build2 (COMPOUND_EXPR, void_type_node, *update,
+			ldmxcsr_update_call);
+    }
+  tree atomic_feraiseexcept
+    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
+						    1, exceptions_var);
+  *update = build2 (COMPOUND_EXPR, void_type_node, *update,
+		    atomic_feraiseexcept_call);
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
+#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#  undef TARGET_MERGE_DECL_ATTRIBUTES
+#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ix86_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL ix86_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  ix86_builtin_vectorized_function
+
+#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
+#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
+
+#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
+#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
+
+#undef TARGET_VECTORIZE_BUILTIN_GATHER
+#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
+
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifndef SUBTARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
+#endif
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP ASM_BYTE
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
+#ifdef ASM_QUAD
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
+#endif
+
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
+#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
+
+#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND ix86_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
+
+#undef TARGET_MEMMODEL_CHECK
+#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
+#endif
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START x86_file_start
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE ix86_option_override
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST ix86_address_cost
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN ix86_fold_builtin
+
+#undef TARGET_COMPARE_VERSION_PRIORITY
+#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
+
+#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
+#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
+  ix86_generate_version_dispatcher_body
+
+#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
+#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
+  ix86_get_function_versions_dispatcher
+
+#undef TARGET_ENUM_VA_LIST_P
+#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
+
+#undef TARGET_FN_ABI_VA_LIST
+#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
+
+#undef TARGET_CANONICAL_VA_LIST_TYPE
+#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG ix86_function_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
+#undef TARGET_UPDATE_STACK_BOUNDARY
+#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
+#undef TARGET_GET_DRAP_RTX
+#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN ix86_static_chain
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
+
+#undef TARGET_LEGITIMATE_COMBINED_INSN
+#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
+
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
+#endif
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ix86_mangle_type
+
+#if !TARGET_MACHO
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
+#endif
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ix86_function_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
+
+#undef TARGET_MEMBER_TYPE_FORCES_BLK
+#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
+
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
+#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  ix86_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
+  ix86_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
+  ix86_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  ix86_autovectorize_vector_sizes
+#undef TARGET_VECTORIZE_INIT_COST
+#define TARGET_VECTORIZE_INIT_COST ix86_init_cost
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
+#undef TARGET_VECTORIZE_FINISH_COST
+#define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
+#undef TARGET_VECTORIZE_DESTROY_COST_DATA
+#define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
+
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
+
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE ix86_function_specific_save
+
+#undef TARGET_OPTION_RESTORE
+#define TARGET_OPTION_RESTORE ix86_function_specific_restore
+
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT ix86_function_specific_print
+
+#undef TARGET_OPTION_FUNCTION_VERSIONS
+#define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
+
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P ix86_can_inline_p
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_true
+
+#undef TARGET_REGISTER_PRIORITY
+#define TARGET_REGISTER_PRIORITY ix86_register_priority
+
+#undef TARGET_REGISTER_USAGE_LEVELING_P
+#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE ix86_can_eliminate
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
+
+#undef TARGET_ASM_CODE_END
+#define TARGET_ASM_CODE_END ix86_code_end
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
+
+#if TARGET_MACHO
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS darwin_rename_builtins
+#endif
+
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
+
+#undef TARGET_SPILL_CLASS
+#define TARGET_SPILL_CLASS ix86_spill_class
+
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+  ix86_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST \
+  ix86_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE \
+  ix86_simd_clone_usable
+
+#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
+#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
+  ix86_float_exceptions_rounding_supported_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-i386.h"
diff --git a/gcc-4.9/gcc/config/i386/i386.h b/gcc-4.9/gcc/config/i386/i386.h
new file mode 100644
index 000000000..51659deb3
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386.h
@@ -0,0 +1,2552 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* The purpose of this file is to define the characteristics of the i386,
+   independent of assembler syntax or operating system.
+
+   Three other files build on this one to describe a specific assembler syntax:
+   bsd386.h, att386.h, and sun386.h.
+
+   The actual tm.h file for a particular system should include
+   this file, and then the file for the appropriate assembler syntax.
+
+   Many macros that specify assembler syntax are omitted entirely from
+   this file because they really belong in the files for particular
+   assemblers.  These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR,
+   ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many
+   that start with ASM_ or end in ASM_OP.  */
+
+/* Redefines for option macros.  */
+
+#define TARGET_64BIT	TARGET_ISA_64BIT
+#define TARGET_64BIT_P(x)	TARGET_ISA_64BIT_P(x)
+#define TARGET_MMX	TARGET_ISA_MMX
+#define TARGET_MMX_P(x)	TARGET_ISA_MMX_P(x)
+#define TARGET_3DNOW	TARGET_ISA_3DNOW
+#define TARGET_3DNOW_P(x)	TARGET_ISA_3DNOW_P(x)
+#define TARGET_3DNOW_A	TARGET_ISA_3DNOW_A
+#define TARGET_3DNOW_A_P(x)	TARGET_ISA_3DNOW_A_P(x)
+#define TARGET_SSE	TARGET_ISA_SSE
+#define TARGET_SSE_P(x)	TARGET_ISA_SSE_P(x)
+#define TARGET_SSE2	TARGET_ISA_SSE2
+#define TARGET_SSE2_P(x)	TARGET_ISA_SSE2_P(x)
+#define TARGET_SSE3	TARGET_ISA_SSE3
+#define TARGET_SSE3_P(x)	TARGET_ISA_SSE3_P(x)
+#define TARGET_SSSE3	TARGET_ISA_SSSE3
+#define TARGET_SSSE3_P(x)	TARGET_ISA_SSSE3_P(x)
+#define TARGET_SSE4_1	TARGET_ISA_SSE4_1
+#define TARGET_SSE4_1_P(x)	TARGET_ISA_SSE4_1_P(x)
+#define TARGET_SSE4_2	TARGET_ISA_SSE4_2
+#define TARGET_SSE4_2_P(x)	TARGET_ISA_SSE4_2_P(x)
+#define TARGET_AVX	TARGET_ISA_AVX
+#define TARGET_AVX_P(x)	TARGET_ISA_AVX_P(x)
+#define TARGET_AVX2	TARGET_ISA_AVX2
+#define TARGET_AVX2_P(x)	TARGET_ISA_AVX2_P(x)
+#define TARGET_AVX512F	TARGET_ISA_AVX512F
+#define TARGET_AVX512F_P(x)	TARGET_ISA_AVX512F_P(x)
+#define TARGET_AVX512PF	TARGET_ISA_AVX512PF
+#define TARGET_AVX512PF_P(x)	TARGET_ISA_AVX512PF_P(x)
+#define TARGET_AVX512ER	TARGET_ISA_AVX512ER
+#define TARGET_AVX512ER_P(x)	TARGET_ISA_AVX512ER_P(x)
+#define TARGET_AVX512CD	TARGET_ISA_AVX512CD
+#define TARGET_AVX512CD_P(x)	TARGET_ISA_AVX512CD_P(x)
+#define TARGET_FMA	TARGET_ISA_FMA
+#define TARGET_FMA_P(x)	TARGET_ISA_FMA_P(x)
+#define TARGET_SSE4A	TARGET_ISA_SSE4A
+#define TARGET_SSE4A_P(x)	TARGET_ISA_SSE4A_P(x)
+#define TARGET_FMA4	TARGET_ISA_FMA4
+#define TARGET_FMA4_P(x)	TARGET_ISA_FMA4_P(x)
+#define TARGET_XOP	TARGET_ISA_XOP
+#define TARGET_XOP_P(x)	TARGET_ISA_XOP_P(x)
+#define TARGET_LWP	TARGET_ISA_LWP
+#define TARGET_LWP_P(x)	TARGET_ISA_LWP_P(x)
+#define TARGET_ROUND	TARGET_ISA_ROUND
+#define TARGET_ABM	TARGET_ISA_ABM
+#define TARGET_ABM_P(x)	TARGET_ISA_ABM_P(x)
+#define TARGET_BMI	TARGET_ISA_BMI
+#define TARGET_BMI_P(x)	TARGET_ISA_BMI_P(x)
+#define TARGET_BMI2	TARGET_ISA_BMI2
+#define TARGET_BMI2_P(x)	TARGET_ISA_BMI2_P(x)
+#define TARGET_LZCNT	TARGET_ISA_LZCNT
+#define TARGET_LZCNT_P(x)	TARGET_ISA_LZCNT_P(x)
+#define TARGET_TBM	TARGET_ISA_TBM
+#define TARGET_TBM_P(x)	TARGET_ISA_TBM_P(x)
+#define TARGET_POPCNT	TARGET_ISA_POPCNT
+#define TARGET_POPCNT_P(x)	TARGET_ISA_POPCNT_P(x)
+#define TARGET_SAHF	TARGET_ISA_SAHF
+#define TARGET_SAHF_P(x)	TARGET_ISA_SAHF_P(x)
+#define TARGET_MOVBE	TARGET_ISA_MOVBE
+#define TARGET_MOVBE_P(x)	TARGET_ISA_MOVBE_P(x)
+#define TARGET_CRC32	TARGET_ISA_CRC32
+#define TARGET_CRC32_P(x)	TARGET_ISA_CRC32_P(x)
+#define TARGET_AES	TARGET_ISA_AES
+#define TARGET_AES_P(x)	TARGET_ISA_AES_P(x)
+#define TARGET_SHA	TARGET_ISA_SHA
+#define TARGET_SHA_P(x)	TARGET_ISA_SHA_P(x)
+#define TARGET_PCLMUL	TARGET_ISA_PCLMUL
+#define TARGET_PCLMUL_P(x)	TARGET_ISA_PCLMUL_P(x)
+#define TARGET_CMPXCHG16B	TARGET_ISA_CX16
+#define TARGET_CMPXCHG16B_P(x)	TARGET_ISA_CX16_P(x)
+#define TARGET_FSGSBASE	TARGET_ISA_FSGSBASE
+#define TARGET_FSGSBASE_P(x)	TARGET_ISA_FSGSBASE_P(x)
+#define TARGET_RDRND	TARGET_ISA_RDRND
+#define TARGET_RDRND_P(x)	TARGET_ISA_RDRND_P(x)
+#define TARGET_F16C	TARGET_ISA_F16C
+#define TARGET_F16C_P(x)	TARGET_ISA_F16C_P(x)
+#define TARGET_RTM	TARGET_ISA_RTM
+#define TARGET_RTM_P(x)	TARGET_ISA_RTM_P(x)
+#define TARGET_HLE	TARGET_ISA_HLE
+#define TARGET_HLE_P(x)	TARGET_ISA_HLE_P(x)
+#define TARGET_RDSEED	TARGET_ISA_RDSEED
+#define TARGET_RDSEED_P(x)	TARGET_ISA_RDSEED_P(x)
+#define TARGET_PRFCHW	TARGET_ISA_PRFCHW
+#define TARGET_PRFCHW_P(x)	TARGET_ISA_PRFCHW_P(x)
+#define TARGET_ADX	TARGET_ISA_ADX
+#define TARGET_ADX_P(x)	TARGET_ISA_ADX_P(x)
+#define TARGET_FXSR	TARGET_ISA_FXSR
+#define TARGET_FXSR_P(x)	TARGET_ISA_FXSR_P(x)
+#define TARGET_XSAVE	TARGET_ISA_XSAVE
+#define TARGET_XSAVE_P(x)	TARGET_ISA_XSAVE_P(x)
+#define TARGET_XSAVEOPT	TARGET_ISA_XSAVEOPT
+#define TARGET_XSAVEOPT_P(x)	TARGET_ISA_XSAVEOPT_P(x)
+#define TARGET_PREFETCHWT1	TARGET_ISA_PREFETCHWT1
+#define TARGET_PREFETCHWT1_P(x)	TARGET_ISA_PREFETCHWT1_P(x)
+
+#define TARGET_LP64	TARGET_ABI_64
+#define TARGET_LP64_P(x)	TARGET_ABI_64_P(x)
+#define TARGET_X32	TARGET_ABI_X32
+#define TARGET_X32_P(x)	TARGET_ABI_X32_P(x)
+#define TARGET_16BIT	TARGET_CODE16
+#define TARGET_16BIT_P(x)	TARGET_CODE16_P(x)
+
+/* SSE4.1 defines round instructions */
+#define	OPTION_MASK_ISA_ROUND	OPTION_MASK_ISA_SSE4_1
+#define	TARGET_ISA_ROUND	((ix86_isa_flags & OPTION_MASK_ISA_ROUND) != 0)
+
+#include "config/vxworks-dummy.h"
+
+#include "config/i386/i386-opts.h"
+
+#define MAX_STRINGOP_ALGS 4
+
+/* Specify what algorithm to use for stringops on known size.
+   When size is unknown, the UNKNOWN_SIZE alg is used.  When size is
+   known at compile time or estimated via feedback, the SIZE array
+   is walked in order until MAX is greater then the estimate (or -1
+   means infinity).  Corresponding ALG is used then.
+   When NOALIGN is true the code guaranting the alignment of the memory
+   block is skipped.
+
+   For example initializer:
+    {{256, loop}, {-1, rep_prefix_4_byte}}
+   will use loop for blocks smaller or equal to 256 bytes, rep prefix will
+   be used otherwise.  */
+struct stringop_algs
+{
+  const enum stringop_alg unknown_size;
+  const struct stringop_strategy {
+    const int max;
+    const enum stringop_alg alg;
+    int noalign;
+  } size [MAX_STRINGOP_ALGS];
+};
+
+/* Define the specific costs for a given cpu */
+
+struct processor_costs {
+  const int add;		/* cost of an add instruction */
+  const int lea;		/* cost of a lea instruction */
+  const int shift_var;		/* variable shift costs */
+  const int shift_const;	/* constant shift costs */
+  const int mult_init[5];	/* cost of starting a multiply
+				   in QImode, HImode, SImode, DImode, TImode*/
+  const int mult_bit;		/* cost of multiply per each bit set */
+  const int divide[5];		/* cost of a divide/mod
+				   in QImode, HImode, SImode, DImode, TImode*/
+  int movsx;			/* The cost of movsx operation.  */
+  int movzx;			/* The cost of movzx operation.  */
+  const int large_insn;		/* insns larger than this cost more */
+  const int move_ratio;		/* The threshold of number of scalar
+				   memory-to-memory move insns.  */
+  const int movzbl_load;	/* cost of loading using movzbl */
+  const int int_load[3];	/* cost of loading integer registers
+				   in QImode, HImode and SImode relative
+				   to reg-reg move (2).  */
+  const int int_store[3];	/* cost of storing integer register
+				   in QImode, HImode and SImode */
+  const int fp_move;		/* cost of reg,reg fld/fst */
+  const int fp_load[3];		/* cost of loading FP register
+				   in SFmode, DFmode and XFmode */
+  const int fp_store[3];	/* cost of storing FP register
+				   in SFmode, DFmode and XFmode */
+  const int mmx_move;		/* cost of moving MMX register.  */
+  const int mmx_load[2];	/* cost of loading MMX register
+				   in SImode and DImode */
+  const int mmx_store[2];	/* cost of storing MMX register
+				   in SImode and DImode */
+  const int sse_move;		/* cost of moving SSE register.  */
+  const int sse_load[3];	/* cost of loading SSE register
+				   in SImode, DImode and TImode*/
+  const int sse_store[3];	/* cost of storing SSE register
+				   in SImode, DImode and TImode*/
+  const int mmxsse_to_integer;	/* cost of moving mmxsse register to
+				   integer and vice versa.  */
+  const int l1_cache_size;	/* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size;	/* size of l2 cache, in kilobytes.  */
+  const int prefetch_block;	/* bytes moved to cache for prefetch.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+				   operations.  */
+  const int branch_cost;	/* Default value for BRANCH_COST.  */
+  const int fadd;		/* cost of FADD and FSUB instructions.  */
+  const int fmul;		/* cost of FMUL instruction.  */
+  const int fdiv;		/* cost of FDIV instruction.  */
+  const int fabs;		/* cost of FABS instruction.  */
+  const int fchs;		/* cost of FCHS instruction.  */
+  const int fsqrt;		/* cost of FSQRT instruction.  */
+				/* Specify what algorithm
+				   to use for stringops on unknown size.  */
+  struct stringop_algs *memcpy, *memset;
+  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
+				   load and store.  */
+  const int scalar_load_cost;   /* Cost of scalar load.  */
+  const int scalar_store_cost;  /* Cost of scalar store.  */
+  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
+                                   load, store, vector-to-scalar and
+                                   scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
+  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
+  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
+  const int vec_store_cost;        /* Cost of vector store.  */
+  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
+					  cost model.  */
+  const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+					  vectorizer cost model.  */
+};
+
+extern const struct processor_costs *ix86_cost;
+extern const struct processor_costs ix86_size_cost;
+
+#define ix86_cur_cost() \
+  (optimize_insn_for_size_p () ? &ix86_size_cost: ix86_cost)
+
+/* Macros used in the machine description to test the flags.  */
+
+/* configure can arrange to change it.  */
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT PROCESSOR_GENERIC
+#endif
+
+#ifndef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT \
+  (TARGET_64BIT && TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+#endif
+
+#ifndef TARGET_FPMATH_DEFAULT_P
+#define TARGET_FPMATH_DEFAULT_P(x) \
+  (TARGET_64BIT_P(x) && TARGET_SSE_P(x) ? FPMATH_SSE : FPMATH_387)
+#endif
+
+#define TARGET_FLOAT_RETURNS_IN_80387 TARGET_FLOAT_RETURNS
+#define TARGET_FLOAT_RETURNS_IN_80387_P(x) TARGET_FLOAT_RETURNS_P(x)
+
+/* 64bit Sledgehammer mode.  For libgcc2 we make sure this is a
+   compile-time constant.  */
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#else
+#ifndef TARGET_BI_ARCH
+#undef TARGET_64BIT
+#undef TARGET_64BIT_P
+#if TARGET_64BIT_DEFAULT
+#define TARGET_64BIT 1
+#define TARGET_64BIT_P(x) 1
+#else
+#define TARGET_64BIT 0
+#define TARGET_64BIT_P(x) 0
+#endif
+#endif
+#endif
+
+#define HAS_LONG_COND_BRANCH 1
+#define HAS_LONG_UNCOND_BRANCH 1
+
+#define TARGET_386 (ix86_tune == PROCESSOR_I386)
+#define TARGET_486 (ix86_tune == PROCESSOR_I486)
+#define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM)
+#define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO)
+#define TARGET_GEODE (ix86_tune == PROCESSOR_GEODE)
+#define TARGET_K6 (ix86_tune == PROCESSOR_K6)
+#define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON)
+#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
+#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
+#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
+#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
+#define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2)
+#define TARGET_NEHALEM (ix86_tune == PROCESSOR_NEHALEM)
+#define TARGET_SANDYBRIDGE (ix86_tune == PROCESSOR_SANDYBRIDGE)
+#define TARGET_HASWELL (ix86_tune == PROCESSOR_HASWELL)
+#define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL)
+#define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT)
+#define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
+#define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
+#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
+#define TARGET_BDVER1 (ix86_tune == PROCESSOR_BDVER1)
+#define TARGET_BDVER2 (ix86_tune == PROCESSOR_BDVER2)
+#define TARGET_BDVER3 (ix86_tune == PROCESSOR_BDVER3)
+#define TARGET_BDVER4 (ix86_tune == PROCESSOR_BDVER4)
+#define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
+#define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
+
+/* Feature tests against the various tunings.  */
+enum ix86_tune_indices {
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name, selector) tune,
+#include "x86-tune.def"
+#undef DEF_TUNE
+X86_TUNE_LAST
+};
+
+extern unsigned char ix86_tune_features[X86_TUNE_LAST];
+
+#define TARGET_USE_LEAVE	ix86_tune_features[X86_TUNE_USE_LEAVE]
+#define TARGET_PUSH_MEMORY	ix86_tune_features[X86_TUNE_PUSH_MEMORY]
+#define TARGET_ZERO_EXTEND_WITH_AND \
+	ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
+#define TARGET_UNROLL_STRLEN	ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
+#define TARGET_BRANCH_PREDICTION_HINTS \
+	ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_DOUBLE_WITH_ADD	ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
+#define TARGET_USE_SAHF		ix86_tune_features[X86_TUNE_USE_SAHF]
+#define TARGET_MOVX		ix86_tune_features[X86_TUNE_MOVX]
+#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
+#define TARGET_PARTIAL_FLAG_REG_STALL \
+	ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_LCP_STALL \
+	ix86_tune_features[X86_TUNE_LCP_STALL]
+#define TARGET_USE_HIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
+#define TARGET_USE_SIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
+#define TARGET_USE_MOV0		ix86_tune_features[X86_TUNE_USE_MOV0]
+#define TARGET_USE_CLTD		ix86_tune_features[X86_TUNE_USE_CLTD]
+#define TARGET_USE_XCHGB	ix86_tune_features[X86_TUNE_USE_XCHGB]
+#define TARGET_SPLIT_LONG_MOVES	ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
+#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
+#define TARGET_READ_MODIFY	ix86_tune_features[X86_TUNE_READ_MODIFY]
+#define TARGET_PROMOTE_QImode	ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
+#define TARGET_FAST_PREFIX	ix86_tune_features[X86_TUNE_FAST_PREFIX]
+#define TARGET_SINGLE_STRINGOP	ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
+#define TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES \
+	ix86_tune_features[X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES]
+#define TARGET_QIMODE_MATH	ix86_tune_features[X86_TUNE_QIMODE_MATH]
+#define TARGET_HIMODE_MATH	ix86_tune_features[X86_TUNE_HIMODE_MATH]
+#define TARGET_PROMOTE_QI_REGS	ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
+#define TARGET_PROMOTE_HI_REGS	ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
+#define TARGET_SINGLE_POP	ix86_tune_features[X86_TUNE_SINGLE_POP]
+#define TARGET_DOUBLE_POP	ix86_tune_features[X86_TUNE_DOUBLE_POP]
+#define TARGET_SINGLE_PUSH	ix86_tune_features[X86_TUNE_SINGLE_PUSH]
+#define TARGET_DOUBLE_PUSH	ix86_tune_features[X86_TUNE_DOUBLE_PUSH]
+#define TARGET_INTEGER_DFMODE_MOVES \
+	ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
+#define TARGET_PARTIAL_REG_DEPENDENCY \
+	ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
+	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
+#define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \
+	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL]
+#define TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL \
+	ix86_tune_features[X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL]
+#define TARGET_SSE_SPLIT_REGS	ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
+#define TARGET_SSE_TYPELESS_STORES \
+	ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
+#define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
+#define TARGET_MEMORY_MISMATCH_STALL \
+	ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
+#define TARGET_PROLOGUE_USING_MOVE \
+	ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
+#define TARGET_EPILOGUE_USING_MOVE \
+	ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
+#define TARGET_SHIFT1		ix86_tune_features[X86_TUNE_SHIFT1]
+#define TARGET_USE_FFREEP	ix86_tune_features[X86_TUNE_USE_FFREEP]
+#define TARGET_INTER_UNIT_MOVES_TO_VEC \
+	ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_TO_VEC]
+#define TARGET_INTER_UNIT_MOVES_FROM_VEC \
+	ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC]
+#define TARGET_INTER_UNIT_CONVERSIONS \
+	ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
+#define TARGET_FOUR_JUMP_LIMIT	ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
+#define TARGET_SCHEDULE		ix86_tune_features[X86_TUNE_SCHEDULE]
+#define TARGET_USE_BT		ix86_tune_features[X86_TUNE_USE_BT]
+#define TARGET_USE_INCDEC	ix86_tune_features[X86_TUNE_USE_INCDEC]
+#define TARGET_PAD_RETURNS	ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_PAD_SHORT_FUNCTION \
+	ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
+#define TARGET_EXT_80387_CONSTANTS \
+	ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
+#define TARGET_AVOID_VECTOR_DECODE \
+	ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
+#define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
+	ix86_tune_features[X86_TUNE_PROMOTE_HIMODE_IMUL]
+#define TARGET_SLOW_IMUL_IMM32_MEM \
+	ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM32_MEM]
+#define TARGET_SLOW_IMUL_IMM8	ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM8]
+#define	TARGET_MOVE_M1_VIA_OR	ix86_tune_features[X86_TUNE_MOVE_M1_VIA_OR]
+#define TARGET_NOT_UNPAIRABLE	ix86_tune_features[X86_TUNE_NOT_UNPAIRABLE]
+#define TARGET_NOT_VECTORMODE	ix86_tune_features[X86_TUNE_NOT_VECTORMODE]
+#define TARGET_USE_VECTOR_FP_CONVERTS \
+	ix86_tune_features[X86_TUNE_USE_VECTOR_FP_CONVERTS]
+#define TARGET_USE_VECTOR_CONVERTS \
+	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
+#define TARGET_FUSE_CMP_AND_BRANCH_32 \
+	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
+#define TARGET_FUSE_CMP_AND_BRANCH_64 \
+	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_64]
+#define TARGET_FUSE_CMP_AND_BRANCH \
+	(TARGET_64BIT ? TARGET_FUSE_CMP_AND_BRANCH_64 \
+	 : TARGET_FUSE_CMP_AND_BRANCH_32)
+#define TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS \
+	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS]
+#define TARGET_FUSE_ALU_AND_BRANCH \
+	ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH]
+#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
+#define TARGET_AVOID_LEA_FOR_ADDR \
+	ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
+#define TARGET_VECTORIZE_DOUBLE \
+	ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE]
+#define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
+	ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
+#define TARGET_AVX128_OPTIMAL \
+	ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
+#define TARGET_REASSOC_INT_TO_PARALLEL \
+	ix86_tune_features[X86_TUNE_REASSOC_INT_TO_PARALLEL]
+#define TARGET_REASSOC_FP_TO_PARALLEL \
+	ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
+#define TARGET_GENERAL_REGS_SSE_SPILL \
+	ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
+#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
+	ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
+#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
+	ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
+#define TARGET_ADJUST_UNROLL \
+    ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
+
+/* Feature tests against the various architecture variations.  */
+enum ix86_arch_indices {
+  X86_ARCH_CMOV,
+  X86_ARCH_CMPXCHG,
+  X86_ARCH_CMPXCHG8B,
+  X86_ARCH_XADD,
+  X86_ARCH_BSWAP,
+
+  X86_ARCH_LAST
+};
+
+extern unsigned char ix86_arch_features[X86_ARCH_LAST];
+
+#define TARGET_CMOV		ix86_arch_features[X86_ARCH_CMOV]
+#define TARGET_CMPXCHG		ix86_arch_features[X86_ARCH_CMPXCHG]
+#define TARGET_CMPXCHG8B	ix86_arch_features[X86_ARCH_CMPXCHG8B]
+#define TARGET_XADD		ix86_arch_features[X86_ARCH_XADD]
+#define TARGET_BSWAP		ix86_arch_features[X86_ARCH_BSWAP]
+
+/* For sane SSE instruction set generation we need fcomi instruction.
+   It is safe to enable all CMOVE instructions.  Also, RDRAND intrinsic
+   expands to a sequence that includes conditional move. */
+#define TARGET_CMOVE		(TARGET_CMOV || TARGET_SSE || TARGET_RDRND)
+
+#define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
+
+extern unsigned char x86_prefetch_sse;
+#define TARGET_PREFETCH_SSE	x86_prefetch_sse
+
+#define ASSEMBLER_DIALECT	(ix86_asm_dialect)
+
+#define TARGET_SSE_MATH		((ix86_fpmath & FPMATH_SSE) != 0)
+#define TARGET_MIX_SSE_I387 \
+ ((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
+
+#define TARGET_GNU_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS	(TARGET_GNU_TLS || TARGET_GNU2_TLS)
+#define TARGET_SUN_TLS		0
+
+#ifndef TARGET_64BIT_DEFAULT
+#define TARGET_64BIT_DEFAULT 0
+#endif
+#ifndef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
+#endif
+
+#define TARGET_SSP_GLOBAL_GUARD (ix86_stack_protector_guard == SSP_GLOBAL)
+#define TARGET_SSP_TLS_GUARD    (ix86_stack_protector_guard == SSP_TLS)
+
+/* Fence to use after loop using storent.  */
+
+extern tree x86_mfence;
+#define FENCE_FOLLOWING_MOVNT x86_mfence
+
+/* Once GDB has been enhanced to deal with functions without frame
+   pointers, we can change this to allow for elimination of
+   the frame pointer in leaf functions.  */
+#define TARGET_DEFAULT 0
+
+/* Extra bits to force.  */
+#define TARGET_SUBTARGET_DEFAULT 0
+#define TARGET_SUBTARGET_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 32-bit mode.  */
+#define TARGET_SUBTARGET32_DEFAULT 0
+#define TARGET_SUBTARGET32_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 64-bit mode.  */
+#define TARGET_SUBTARGET64_DEFAULT 0
+#define TARGET_SUBTARGET64_ISA_DEFAULT 0
+
+/* Replace MACH-O, ifdefs by in-line tests, where possible. 
+   (a) Macros defined in config/i386/darwin.h  */
+#define TARGET_MACHO 0
+#define TARGET_MACHO_BRANCH_ISLANDS 0
+#define MACHOPIC_ATT_STUB 0
+/* (b) Macros defined in config/darwin.h  */
+#define MACHO_DYNAMIC_NO_PIC_P 0
+#define MACHOPIC_INDIRECT 0
+#define MACHOPIC_PURE 0
+
+/* For the RDOS  */
+#define TARGET_RDOS 0
+
+/* For the Windows 64-bit ABI.  */
+#define TARGET_64BIT_MS_ABI (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
+
+/* For the Windows 32-bit ABI.  */
+#define TARGET_32BIT_MS_ABI (!TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
+
+/* This is re-defined by cygming.h.  */
+#define TARGET_SEH 0
+
+/* This is re-defined by cygming.h.  */
+#define TARGET_PECOFF 0
+
+/* The default abi used by target.  */
+#define DEFAULT_ABI SYSV_ABI
+
+/* The default TLS segment register used by target.  */
+#define DEFAULT_TLS_SEG_REG (TARGET_64BIT ? SEG_FS : SEG_GS)
+
+/* Subtargets may reset this to 1 in order to enable 96-bit long double
+   with the rounding mode forced to 53 bits.  */
+#define TARGET_96_ROUND_53_LONG_DOUBLE 0
+
+/* -march=native handling only makes sense with compiler running on
+   an x86 or x86_64 chip.  If changing this condition, also change
+   the condition in driver-i386.c.  */
+#if defined(__i386__) || defined(__x86_64__)
+/* In driver-i386.c.  */
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS \
+  { "local_cpu_detect", host_detect_local_cpu },
+#define HAVE_LOCAL_CPU_DETECT
+#endif
+
+#if TARGET_64BIT_DEFAULT
+#define OPT_ARCH64 "!m32"
+#define OPT_ARCH32 "m32"
+#else
+#define OPT_ARCH64 "m64|mx32"
+#define OPT_ARCH32 "m64|mx32:;"
+#endif
+
+/* Support for configure-time defaults of some command line options.
+   The order here is important so that -march doesn't squash the
+   tune or cpu values.  */
+#define OPTION_DEFAULT_SPECS					   \
+  {"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" },  \
+  {"cpu_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"arch", "%{!march=*:-march=%(VALUE)}"},			   \
+  {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"},	   \
+  {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
+
+/* Specs for the compiler proper */
+
+#ifndef CC1_CPU_SPEC
+#define CC1_CPU_SPEC_1 ""
+
+#ifndef HAVE_LOCAL_CPU_DETECT
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1
+#else
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1 \
+"%{march=native:%>march=native %:local_cpu_detect(arch) \
+  %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} \
+%{mtune=native:%>mtune=native %:local_cpu_detect(tune)}"
+#endif
+#endif
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS() ix86_target_macros ()
+
+/* Target Pragmas.  */
+#define REGISTER_TARGET_PRAGMAS() ix86_register_pragmas ()
+
+#ifndef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) "
+#endif
+
+/* This macro defines names of additional specifications to put in the
+   specs that can be used in various specifications like CC1_SPEC.  Its
+   definition is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS							\
+  { "cc1_cpu",  CC1_CPU_SPEC },						\
+  SUBTARGET_EXTRA_SPECS
+
+
+/* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
+   FPU, assume that the fpcw is set to extended precision; when using
+   only SSE, rounding is correct; when using both SSE and the FPU,
+   the rounding precision is indeterminate, since either may be chosen
+   apparently at random.  */
+#define TARGET_FLT_EVAL_METHOD \
+  (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2)
+
+/* Whether to allow x87 floating-point arithmetic on MODE (one of
+   SFmode, DFmode and XFmode) in the current excess precision
+   configuration.  */
+#define X87_ENABLE_ARITH(MODE) \
+  (flag_excess_precision == EXCESS_PRECISION_FAST || (MODE) == XFmode)
+
+/* Likewise, whether to allow direct conversions from integer mode
+   IMODE (HImode, SImode or DImode) to MODE.  */
+#define X87_ENABLE_FLOAT(MODE, IMODE)			\
+  (flag_excess_precision == EXCESS_PRECISION_FAST	\
+   || (MODE) == XFmode					\
+   || ((MODE) == DFmode && (IMODE) == SImode)		\
+   || (IMODE) == HImode)
+
+/* target machine storage layout */
+
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE (TARGET_X32 ? 32 : BITS_PER_WORD)
+#define POINTER_SIZE (TARGET_X32 ? 32 : BITS_PER_WORD)
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE \
+  (TARGET_LONG_DOUBLE_64 ? 64 : (TARGET_LONG_DOUBLE_128 ? 128 : 80))
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_64__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#elif defined (__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+#endif
+
+#define WIDEST_HARDWARE_FP_SIZE 80
+
+#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT
+#define MAX_BITS_PER_WORD 64
+#else
+#define MAX_BITS_PER_WORD 32
+#endif
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on the 80386.  */
+
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is not true on the 80386.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+/* Not true for 80386 */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		(TARGET_64BIT ? 8 : 4)
+
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD	4
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY \
+ (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
+
+/* Stack boundary of the main function guaranteed by OS.  */
+#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
+
+/* Minimum stack boundary.  */
+#define MIN_STACK_BOUNDARY (TARGET_64BIT ? (TARGET_SSE ? 128 : 64) : 32)
+
+/* Boundary (in *bits*) on which the stack pointer prefers to be
+   aligned; the compiler cannot rely on having this alignment.  */
+#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
+
+/* It should be MIN_STACK_BOUNDARY.  But we set it to 128 bits for
+   both 32bit and 64bit, to support codes that need 128 bit stack
+   alignment for SSE instructions, but can't realign the stack.  */
+#define PREFERRED_STACK_BOUNDARY_DEFAULT 128
+
+/* 1 if -mstackrealign should be turned on by default.  It will
+   generate an alternate prologue and epilogue that realigns the
+   runtime stack if nessary.  This supports mixing codes that keep a
+   4-byte aligned stack, as specified by i386 psABI, with codes that
+   need a 16-byte aligned stack, as required by SSE instructions.  */
+#define STACK_REALIGN_DEFAULT 0
+
+/* Boundary (in *bits*) on which the incoming stack is aligned.  */
+#define INCOMING_STACK_BOUNDARY ix86_incoming_stack_boundary
+
+/* According to Windows x64 software convention, the maximum stack allocatable
+   in the prologue is 4G - 8 bytes.  Furthermore, there is a limited set of
+   instructions allowed to adjust the stack pointer in the epilog, forcing the
+   use of frame pointer for frames larger than 2 GB.  This theorical limit
+   is reduced by 256, an over-estimated upper bound for the stack use by the
+   prologue.
+   We define only one threshold for both the prolog and the epilog.  When the
+   frame size is larger than this threshold, we allocate the area to save SSE
+   regs, then save them, and then allocate the remaining.  There is no SEH
+   unwind info for this later allocation.  */
+#define SEH_MAX_FRAME_SIZE ((2U << 30) - 256)
+
+/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack.  This is
+   mandatory for the 64-bit ABI, and may or may not be true for other
+   operating systems.  */
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
+
+/* Minimum allocation boundary for the code of a function.  */
+#define FUNCTION_BOUNDARY 8
+
+/* C++ stores the virtual bit in the lowest bit of function pointers.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_pfn
+
+/* Minimum size in bits of the largest boundary to which any
+   and all fundamental data types supported by the hardware
+   might need to be aligned. No data type wants to be aligned
+   rounder than this.
+
+   Pentium+ prefers DFmode values to be aligned to 64 bit boundary
+   and Pentium Pro XFmode values at 128 bit boundaries.  */
+
+#define BIGGEST_ALIGNMENT \
+  (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128))
+
+/* Maximum stack alignment.  */
+#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
+
+/* Alignment value for attribute ((aligned)).  It is a constant since
+   it is the part of the ABI.  We shouldn't change it with -mavx.  */
+#define ATTRIBUTE_ALIGNED_VALUE 128
+
+/* Decide whether a variable of mode MODE should be 128 bit aligned.  */
+#define ALIGN_MODE_128(MODE) \
+ ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
+
+/* The published ABIs say that doubles should be aligned on word
+   boundaries, so lower the alignment for structure fields unless
+   -malign-double is set.  */
+
+/* ??? Blah -- this macro is used directly by libobjc.  Since it
+   supports no vector modes, cut out the complexity and fall back
+   on BIGGEST_FIELD_ALIGNMENT.  */
+#ifdef IN_TARGET_LIBS
+#ifdef __x86_64__
+#define BIGGEST_FIELD_ALIGNMENT 128
+#else
+#define BIGGEST_FIELD_ALIGNMENT 32
+#endif
+#else
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+   x86_field_alignment (FIELD, COMPUTED)
+#endif
+
+/* If defined, a C expression to compute the alignment given to a
+   constant that is being placed in memory.  EXP is the constant
+   and ALIGN is the alignment that the object would ordinarily have.
+   The value of this macro is used instead of that alignment to align
+   the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that `strcpy' calls that copy
+   constants can be done inline.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) ix86_constant_alignment ((EXP), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  ix86_data_alignment ((TYPE), (ALIGN), true)
+
+/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates
+   some alignment increase, instead of optimization only purposes.  E.g.
+   AMD x86-64 psABI says that variables with array type larger than 15 bytes
+   must be aligned to 16 byte boundaries.
+
+   If this macro is not defined, then ALIGN is used.  */
+
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+  ix86_data_alignment ((TYPE), (ALIGN), false)
+
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  ix86_local_alignment ((TYPE), VOIDmode, (ALIGN))
+
+/* If defined, a C expression to compute the alignment for stack slot.
+   TYPE is the data type, MODE is the widest mode available, and ALIGN
+   is the alignment that the slot would ordinarily have.  The value of
+   this macro is used instead of that alignment to align the slot.
+
+   If this macro is not defined, then ALIGN is used when TYPE is NULL,
+   Otherwise, LOCAL_ALIGNMENT will be used.
+
+   One use of this macro is to set alignment of stack slot to the
+   maximum alignment of all possible modes which the slot may have.  */
+
+#define STACK_SLOT_ALIGNMENT(TYPE, MODE, ALIGN) \
+  ix86_local_alignment ((TYPE), (MODE), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+   variable DECL.
+
+   If this macro is not defined, then
+   LOCAL_ALIGNMENT (TREE_TYPE (DECL), DECL_ALIGN (DECL)) will be used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_DECL_ALIGNMENT(DECL) \
+  ix86_local_alignment ((DECL), VOIDmode, DECL_ALIGN (DECL))
+
+/* If defined, a C expression to compute the minimum required alignment
+   for dynamic stack realignment purposes for EXP (a TYPE or DECL),
+   MODE, assuming normal alignment ALIGN.
+
+   If this macro is not defined, then (ALIGN) will be used.  */
+
+#define MINIMUM_ALIGNMENT(EXP, MODE, ALIGN) \
+  ix86_minimum_alignment (EXP, MODE, ALIGN)
+
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 0
+
+/* If bit field type is int, don't let it cross an int,
+   and give entire struct the alignment of an int.  */
+/* Required on the 386 since it doesn't have bit-field insns.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Standard register usage.  */
+
+/* This processor has special stack-like registers.  See reg-stack.c
+   for details.  */
+
+#define STACK_REGS
+
+#define IS_STACK_MODE(MODE)					\
+  (((MODE) == SFmode && !(TARGET_SSE && TARGET_SSE_MATH))	\
+   || ((MODE) == DFmode && !(TARGET_SSE2 && TARGET_SSE_MATH))	\
+   || (MODE) == XFmode)
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   In the 80386 we give the 8 general purpose registers the numbers 0-7.
+   We number the floating point registers 8-15.
+   Note that registers 0-7 can be accessed as a  short or int,
+   while only 0-3 may be used with byte `mov' instructions.
+
+   Reg 16 does not correspond to any hardware register, but instead
+   appears in the RTL as an argument pointer prior to reload, and is
+   eliminated during reloading in favor of either the stack or frame
+   pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 77
+
+/* Number of hardware registers that go into the DWARF-2 unwind info.
+   If not defined, equals FIRST_PSEUDO_REGISTER.  */
+
+#define DWARF_FRAME_REGISTERS 17
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the 80386, the stack pointer is such, as is the arg pointer.
+
+   REX registers are disabled for 32bit targets in
+   TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+#define FIXED_REGISTERS						\
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/	\
+{  0, 0, 0, 0, 0, 0, 0, 1, 0,  0,  0,  0,  0,  0,  0,  0,	\
+/*arg,flags,fpsr,fpcr,frame*/					\
+    1,    1,   1,   1,    1,					\
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/			\
+     0,   0,   0,   0,   0,   0,   0,   0,			\
+/* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/			\
+     0,   0,   0,   0,   0,   0,   0,   0,			\
+/*  r8,  r9, r10, r11, r12, r13, r14, r15*/			\
+     0,   0,   0,   0,   0,   0,   0,   0,			\
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
+     0,   0,    0,    0,    0,    0,    0,    0,		\
+/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/		\
+     0,   0,    0,    0,    0,    0,    0,    0,		\
+/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/		\
+     0,   0,    0,    0,    0,    0,    0,    0,		\
+/*  k0,  k1, k2, k3, k4, k5, k6, k7*/				\
+     0,  0,   0,  0,  0,  0,  0,  0 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.
+
+   Value is set to 1 if the register is call used unconditionally.
+   Bit one is set if the register is call used on TARGET_32BIT ABI.
+   Bit two is set if the register is call used on TARGET_64BIT ABI.
+   Bit three is set if the register is call used on TARGET_64BIT_MS_ABI.
+
+   Proper values are computed in TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+#define CALL_USED_REGISTERS					\
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/	\
+{  1, 1, 1, 0, 4, 4, 0, 1, 1,  1,  1,  1,  1,  1,  1,  1,	\
+/*arg,flags,fpsr,fpcr,frame*/					\
+    1,   1,    1,   1,    1,					\
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/			\
+     1,   1,   1,   1,   1,   1,   6,   6,			\
+/* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/			\
+     1,   1,   1,   1,   1,   1,   1,   1,			\
+/*  r8,  r9, r10, r11, r12, r13, r14, r15*/			\
+     1,   1,   1,   1,   2,   2,   2,   2,			\
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
+     6,   6,    6,    6,    6,    6,    6,    6,		\
+/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/		\
+     6,    6,     6,    6,    6,    6,    6,    6,		\
+/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/		\
+     6,    6,     6,    6,    6,    6,    6,    6,		\
+ /* k0,  k1,  k2,  k3,  k4,  k5,  k6,  k7*/			\
+     1,   1,   1,   1,   1,   1,   1,   1 }
+
+/* Order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  List frame pointer
+   late and fixed registers last.  Note that, in general, we prefer
+   registers listed in CALL_USED_REGISTERS, keeping the others
+   available for storage of persistent values.
+
+   The ADJUST_REG_ALLOC_ORDER actually overwrite the order,
+   so this is just empty initializer for array.  */
+
+#define REG_ALLOC_ORDER 					\
+{  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
+   18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,	\
+   33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,  \
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,	\
+   63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76 }
+
+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
+   to be rearranged based on a particular function.  When using sse math,
+   we want to allocate SSE before x87 registers and vice versa.  */
+
+#define ADJUST_REG_ALLOC_ORDER x86_order_regs_for_local_alloc ()
+
+
+#define OVERRIDE_ABI_FORMAT(FNDECL) ix86_call_abi_override (FNDECL)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   Actually there are no two word move instructions for consecutive
+   registers.  And only registers 0-3 may have mov byte instructions
+   applied to them.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)	\
+   ? (COMPLEX_MODE_P (MODE) ? 2 : 1)					\
+   : ((MODE) == XFmode							\
+      ? (TARGET_64BIT ? 2 : 3)						\
+      : (MODE) == XCmode						\
+      ? (TARGET_64BIT ? 4 : 6)						\
+      : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
+
+#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE)			\
+  ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT)				\
+   ? (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+      ? 0								\
+      : ((MODE) == XFmode || (MODE) == XCmode))				\
+   : 0)
+
+#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
+
+#define VALID_AVX256_REG_MODE(MODE)					\
+  ((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode	\
+   || (MODE) == V4DImode || (MODE) == V2TImode || (MODE) == V8SFmode	\
+   || (MODE) == V4DFmode)
+
+#define VALID_AVX256_REG_OR_OI_MODE(MODE)		\
+  (VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode)
+
+#define VALID_AVX512F_SCALAR_MODE(MODE)					\
+  ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode		\
+   || (MODE) == SFmode)
+
+#define VALID_AVX512F_REG_MODE(MODE)					\
+  ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode	\
+   || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+
+#define VALID_SSE2_REG_MODE(MODE)					\
+  ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
+   || (MODE) == V2DImode || (MODE) == DFmode)
+
+#define VALID_SSE_REG_MODE(MODE)					\
+  ((MODE) == V1TImode || (MODE) == TImode				\
+   || (MODE) == V4SFmode || (MODE) == V4SImode				\
+   || (MODE) == SFmode || (MODE) == TFmode)
+
+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+  ((MODE) == V2SFmode || (MODE) == SFmode)
+
+#define VALID_MMX_REG_MODE(MODE)					\
+  ((MODE == V1DImode) || (MODE) == DImode				\
+   || (MODE) == V2SImode || (MODE) == SImode				\
+   || (MODE) == V4HImode || (MODE) == V8QImode)
+
+#define VALID_DFP_MODE_P(MODE) \
+  ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
+
+#define VALID_FP_MODE_P(MODE)						\
+  ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode		\
+   || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)		\
+
+#define VALID_INT_MODE_P(MODE)						\
+  ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode		\
+   || (MODE) == DImode							\
+   || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode	\
+   || (MODE) == CDImode							\
+   || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode		\
+			|| (MODE) == TFmode || (MODE) == TCmode)))
+
+/* Return true for modes passed in SSE registers.  */
+#define SSE_REG_MODE_P(MODE)						\
+  ((MODE) == V1TImode || (MODE) == TImode || (MODE) == V16QImode	\
+   || (MODE) == TFmode || (MODE) == V8HImode || (MODE) == V2DFmode	\
+   || (MODE) == V2DImode || (MODE) == V4SFmode || (MODE) == V4SImode	\
+   || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode	\
+   || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
+   || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
+   || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
+   || (MODE) == V16SFmode)
+
+#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	\
+   ix86_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2)  ix86_modes_tieable_p (MODE1, MODE2)
+
+/* It is possible to write patterns to move flags; but until someone
+   does it,  */
+#define AVOID_CCMODE_COPIES
+
+/* Specify the modes required to caller save a given hard regno.
+   We do this on i386 to prevent flags from being saved at all.
+
+   Kill any attempts to combine saving of modes.  */
+
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)			\
+  (CC_REGNO_P (REGNO) ? VOIDmode					\
+   : (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode			\
+   : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false) \
+   : (MODE) == HImode && !(TARGET_PARTIAL_REG_STALL			\
+			   || MASK_REGNO_P (REGNO)) ? SImode		\
+   : (MODE) == QImode && !(TARGET_64BIT || QI_REGNO_P (REGNO)		\
+			   || MASK_REGNO_P (REGNO)) ? SImode		\
+   : (MODE))
+
+/* The only ABI that saves SSE registers across calls is Win64 (thus no
+   need to check the current ABI here), and with AVX enabled Win64 only
+   guarantees that the low 16 bytes are saved.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)             \
+  (SSE_REGNO_P (REGNO) && GET_MODE_SIZE (MODE) > 16)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* on the 386 the pc register is %eip, and is not usable as a general
+   register.  The ordinary mov instructions won't work */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 7
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 6
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 20
+
+/* First floating point reg */
+#define FIRST_FLOAT_REG 8
+
+/* First & last stack-like regs */
+#define FIRST_STACK_REG FIRST_FLOAT_REG
+#define LAST_STACK_REG (FIRST_FLOAT_REG + 7)
+
+#define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1)
+#define LAST_SSE_REG  (FIRST_SSE_REG + 7)
+
+#define FIRST_MMX_REG  (LAST_SSE_REG + 1)   /*29*/
+#define LAST_MMX_REG   (FIRST_MMX_REG + 7)
+
+#define FIRST_REX_INT_REG  (LAST_MMX_REG + 1) /*37*/
+#define LAST_REX_INT_REG   (FIRST_REX_INT_REG + 7)
+
+#define FIRST_REX_SSE_REG  (LAST_REX_INT_REG + 1) /*45*/
+#define LAST_REX_SSE_REG   (FIRST_REX_SSE_REG + 7)
+
+#define FIRST_EXT_REX_SSE_REG  (LAST_REX_SSE_REG + 1) /*53*/
+#define LAST_EXT_REX_SSE_REG   (FIRST_EXT_REX_SSE_REG + 15) /*68*/
+
+#define FIRST_MASK_REG  (LAST_EXT_REX_SSE_REG + 1) /*69*/
+#define LAST_MASK_REG   (FIRST_MASK_REG + 7) /*76*/
+
+/* Override this in other tm.h files to cope with various OS lossage
+   requiring a frame pointer.  */
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+/* Make sure we can access arbitrary call frames.  */
+#define SETUP_FRAME_ADDRESSES()  ix86_setup_frame_addresses ()
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 16
+
+/* Register to hold the addressing base for position independent
+   code access to data items.  We don't use PIC pointer for 64bit
+   mode.  Define the regnum to dummy value to prevent gcc from
+   pessimizing code dealing with EBX.
+
+   To avoid clobbering a call-saved register unnecessarily, we renumber
+   the pic register when possible.  The change is visible after the
+   prologue has been emitted.  */
+
+#define REAL_PIC_OFFSET_TABLE_REGNUM  BX_REG
+
+#define PIC_OFFSET_TABLE_REGNUM				\
+  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC	\
+                     || TARGET_PECOFF))		\
+   || !flag_pic ? INVALID_REGNUM			\
+   : reload_completed ? REGNO (pic_offset_table_rtx)	\
+   : REAL_PIC_OFFSET_TABLE_REGNUM)
+
+#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
+
+/* This is overridden by <cygwin.h>.  */
+#define MS_AGGREGATE_RETURN 0
+
+#define KEEP_AGGREGATE_RETURN_POINTER 0
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It might seem that class BREG is unnecessary, since no useful 386
+   opcode needs reg %ebx.  But some systems pass args to the OS in ebx,
+   and the "b" register constraint is useful in asms for syscalls.
+
+   The flags, fpsr and fpcr registers are in no class.  */
+
+enum reg_class
+{
+  NO_REGS,
+  AREG, DREG, CREG, BREG, SIREG, DIREG,
+  AD_REGS,			/* %eax/%edx for DImode */
+  Q_REGS,			/* %eax %ebx %ecx %edx */
+  NON_Q_REGS,			/* %esi %edi %ebp %esp */
+  INDEX_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp */
+  LEGACY_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp %esp */
+  CLOBBERED_REGS,		/* call-clobbered integer registers */
+  GENERAL_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp %esp
+				   %r8 %r9 %r10 %r11 %r12 %r13 %r14 %r15 */
+  FP_TOP_REG, FP_SECOND_REG,	/* %st(0) %st(1) */
+  FLOAT_REGS,
+  SSE_FIRST_REG,
+  SSE_REGS,
+  EVEX_SSE_REGS,
+  ALL_SSE_REGS,
+  MMX_REGS,
+  FP_TOP_SSE_REGS,
+  FP_SECOND_SSE_REGS,
+  FLOAT_SSE_REGS,
+  FLOAT_INT_REGS,
+  INT_SSE_REGS,
+  FLOAT_INT_SSE_REGS,
+  MASK_EVEX_REGS,
+  MASK_REGS,
+  ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define INTEGER_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), GENERAL_REGS)
+#define FLOAT_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), FLOAT_REGS)
+#define SSE_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), ALL_SSE_REGS)
+#define MMX_CLASS_P(CLASS) \
+  ((CLASS) == MMX_REGS)
+#define MAYBE_INTEGER_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), GENERAL_REGS)
+#define MAYBE_FLOAT_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), FLOAT_REGS)
+#define MAYBE_SSE_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), ALL_SSE_REGS)
+#define MAYBE_MMX_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), MMX_REGS)
+#define MAYBE_MASK_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), MASK_REGS)
+
+#define Q_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), Q_REGS)
+
+#define MAYBE_NON_Q_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), NON_Q_REGS)
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+{  "NO_REGS",				\
+   "AREG", "DREG", "CREG", "BREG",	\
+   "SIREG", "DIREG",			\
+   "AD_REGS",				\
+   "Q_REGS", "NON_Q_REGS",		\
+   "INDEX_REGS",			\
+   "LEGACY_REGS",			\
+   "CLOBBERED_REGS",			\
+   "GENERAL_REGS",			\
+   "FP_TOP_REG", "FP_SECOND_REG",	\
+   "FLOAT_REGS",			\
+   "SSE_FIRST_REG",			\
+   "SSE_REGS",				\
+   "EVEX_SSE_REGS",			\
+   "ALL_SSE_REGS",			\
+   "MMX_REGS",				\
+   "FP_TOP_SSE_REGS",			\
+   "FP_SECOND_SSE_REGS",		\
+   "FLOAT_SSE_REGS",			\
+   "FLOAT_INT_REGS",			\
+   "INT_SSE_REGS",			\
+   "FLOAT_INT_SSE_REGS",		\
+   "MASK_EVEX_REGS",			\
+   "MASK_REGS",				\
+   "ALL_REGS" }
+
+/* Define which registers fit in which classes.  This is an initializer
+   for a vector of HARD_REG_SET of length N_REG_CLASSES.
+
+   Note that CLOBBERED_REGS are calculated by
+   TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+#define REG_CLASS_CONTENTS                                              \
+{     { 0x00,       0x0,   0x0 },                                       \
+      { 0x01,       0x0,   0x0 },       /* AREG */                      \
+      { 0x02,       0x0,   0x0 },       /* DREG */                      \
+      { 0x04,       0x0,   0x0 },       /* CREG */                      \
+      { 0x08,       0x0,   0x0 },       /* BREG */                      \
+      { 0x10,       0x0,   0x0 },       /* SIREG */                     \
+      { 0x20,       0x0,   0x0 },       /* DIREG */                     \
+      { 0x03,       0x0,   0x0 },       /* AD_REGS */                   \
+      { 0x0f,       0x0,   0x0 },       /* Q_REGS */                    \
+  { 0x1100f0,    0x1fe0,   0x0 },       /* NON_Q_REGS */                \
+      { 0x7f,    0x1fe0,   0x0 },       /* INDEX_REGS */                \
+  { 0x1100ff,       0x0,   0x0 },       /* LEGACY_REGS */               \
+      { 0x07,       0x0,   0x0 },       /* CLOBBERED_REGS */            \
+  { 0x1100ff,    0x1fe0,   0x0 },       /* GENERAL_REGS */              \
+     { 0x100,       0x0,   0x0 },       /* FP_TOP_REG */                \
+    { 0x0200,       0x0,   0x0 },       /* FP_SECOND_REG */             \
+    { 0xff00,       0x0,   0x0 },       /* FLOAT_REGS */                \
+  { 0x200000,       0x0,   0x0 },       /* SSE_FIRST_REG */             \
+{ 0x1fe00000,  0x1fe000,   0x0 },       /* SSE_REGS */                  \
+       { 0x0,0xffe00000,  0x1f },       /* EVEX_SSE_REGS */             \
+{ 0x1fe00000,0xffffe000,  0x1f },       /* ALL_SSE_REGS */              \
+{ 0xe0000000,      0x1f,   0x0 },       /* MMX_REGS */                  \
+{ 0x1fe00100,0xffffe000,  0x1f },       /* FP_TOP_SSE_REG */            \
+{ 0x1fe00200,0xffffe000,  0x1f },       /* FP_SECOND_SSE_REG */         \
+{ 0x1fe0ff00,0xffffe000,  0x1f },       /* FLOAT_SSE_REGS */            \
+{   0x11ffff,    0x1fe0,   0x0 },       /* FLOAT_INT_REGS */            \
+{ 0x1ff100ff,0xffffffe0,  0x1f },       /* INT_SSE_REGS */              \
+{ 0x1ff1ffff,0xffffffe0,  0x1f },       /* FLOAT_INT_SSE_REGS */        \
+       { 0x0,       0x0,0x1fc0 },       /* MASK_EVEX_REGS */           \
+       { 0x0,       0x0,0x1fe0 },       /* MASK_REGS */                 \
+{ 0xffffffff,0xffffffff,0x1fff }                                        \
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO])
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+#define QI_REG_P(X) (REG_P (X) && QI_REGNO_P (REGNO (X)))
+#define QI_REGNO_P(N) IN_RANGE ((N), AX_REG, BX_REG)
+
+#define GENERAL_REG_P(X) \
+  (REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+#define GENERAL_REGNO_P(N) \
+  (IN_RANGE ((N), AX_REG, SP_REG) || REX_INT_REGNO_P (N))
+
+#define ANY_QI_REG_P(X) (REG_P (X) && ANY_QI_REGNO_P (REGNO (X)))
+#define ANY_QI_REGNO_P(N) \
+  (TARGET_64BIT ? GENERAL_REGNO_P (N) : QI_REGNO_P (N))
+
+#define REX_INT_REG_P(X) (REG_P (X) && REX_INT_REGNO_P (REGNO (X)))
+#define REX_INT_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_REX_INT_REG, LAST_REX_INT_REG)
+
+#define STACK_REG_P(X) (REG_P (X) && STACK_REGNO_P (REGNO (X)))
+#define STACK_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
+
+#define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X)))
+#define ANY_FP_REGNO_P(N) (STACK_REGNO_P (N) || SSE_REGNO_P (N))
+
+#define X87_FLOAT_MODE_P(MODE)	\
+  (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
+
+#define SSE_REG_P(X) (REG_P (X) && SSE_REGNO_P (REGNO (X)))
+#define SSE_REGNO_P(N)						\
+  (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG)			\
+   || REX_SSE_REGNO_P (N)					\
+   || EXT_REX_SSE_REGNO_P (N))
+
+#define REX_SSE_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
+
+#define EXT_REX_SSE_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)
+
+#define SSE_REGNO(N) \
+  ((N) < 8 ? FIRST_SSE_REG + (N) \
+         : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
+                                   : (FIRST_EXT_REX_SSE_REG + (N) - 16))
+
+#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
+#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
+
+#define SSE_FLOAT_MODE_P(MODE) \
+  ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+
+#define FMA4_VEC_FLOAT_MODE_P(MODE) \
+  (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
+		  || (MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define MMX_REG_P(X) (REG_P (X) && MMX_REGNO_P (REGNO (X)))
+#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
+
+#define STACK_TOP_P(X) (REG_P (X) && REGNO (X) == FIRST_STACK_REG)
+
+#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define CC_REGNO_P(X) ((X) == FLAGS_REG || (X) == FPSR_REG)
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS INDEX_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Place additional restrictions on the register class to use when it
+   is necessary to be able to hold a value of mode MODE in a reload
+   register for which class CLASS would ordinarily be used.
+
+   We avoid classes containing registers from multiple units due to
+   the limitation in ix86_secondary_memory_needed.  We limit these
+   classes to their "natural mode" single unit register class, depending
+   on the unit availability.
+
+   Please note that reg_class_subset_p is not commutative, so these
+   conditions mean "... if (CLASS) includes ALL registers from the
+   register set."  */
+
+#define LIMIT_RELOAD_CLASS(MODE, CLASS)					\
+  (((MODE) == QImode && !TARGET_64BIT					\
+    && reg_class_subset_p (Q_REGS, (CLASS))) ? Q_REGS			\
+   : (((MODE) == SImode || (MODE) == DImode)				\
+      && reg_class_subset_p (GENERAL_REGS, (CLASS))) ? GENERAL_REGS	\
+   : (SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH			\
+      && reg_class_subset_p (SSE_REGS, (CLASS))) ? SSE_REGS		\
+   : (X87_FLOAT_MODE_P (MODE)						\
+      && reg_class_subset_p (FLOAT_REGS, (CLASS))) ? FLOAT_REGS		\
+   : (CLASS))
+
+/* If we are copying between general and FP registers, we need a memory
+   location. The same is true for SSE and MMX registers.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+  ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1)
+
+/* Get_secondary_mem widens integral modes to BITS_PER_WORD.
+   There is no need to emit full 64 bit move on 64 bit targets
+   for integral modes that can be moved using 32 bit move.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)			\
+  (GET_MODE_BITSIZE (MODE) < 32 && INTEGRAL_MODE_P (MODE)	\
+   ? mode_for_size (32, GET_MODE_CLASS (MODE), 0)		\
+   : MODE)
+
+/* Return a class of registers that cannot change FROM mode to TO mode.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  ix86_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes, this says how many the stack
+   pointer really advances by.  On 386, we have pushw instruction that
+   decrements by exactly 2 no matter what the position was, there is no pushb.
+
+   But as CIE data alignment factor on this arch is -4 for 32bit targets
+   and -8 for 64bit targets, we need to make sure all stack pointer adjustments
+   are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
+
+#define PUSH_ROUNDING(BYTES) \
+  (((BYTES) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD)
+
+/* If defined, the maximum amount of space required for outgoing arguments
+   will be computed and placed into the variable `crtl->outgoing_args_size'.
+   No space will be pushed onto the stack for each call; instead, the
+   function prologue should increase the stack frame size by this amount.  
+
+   In 32bit mode enabling argument accumulation results in about 5% code size
+   growth becuase move instructions are less compact than push.  In 64bit
+   mode the difference is less drastic but visible.  
+
+   FIXME: Unlike earlier implementations, the size of unwind info seems to
+   actually grow with accumulation.  Is that because accumulated args
+   unwind info became unnecesarily bloated?
+
+   With the 64-bit MS ABI, we can generate correct code with or without
+   accumulated args, but because of OUTGOING_REG_PARM_STACK_SPACE the code
+   generated without accumulated args is terrible.
+
+   If stack probes are required, the space used for large function
+   arguments on the stack must also be probed, so enable
+   -maccumulate-outgoing-args so this happens in the prologue.  */
+
+#define ACCUMULATE_OUTGOING_ARGS \
+  ((TARGET_ACCUMULATE_OUTGOING_ARGS && optimize_function_for_speed_p (cfun)) \
+   || TARGET_STACK_PROBE || TARGET_64BIT_MS_ABI)
+
+/* If defined, a C expression whose value is nonzero when we want to use PUSH
+   instructions to pass outgoing arguments.  */
+
+#define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS)
+
+/* We want the stack and args grow in opposite directions, even if
+   PUSH_ARGS is 0.  */
+#define PUSH_ARGS_REVERSED 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define this macro if functions should assume that stack space has been
+   allocated for arguments even when their values are passed in registers.
+
+   The value of this macro is the size, in bytes, of the area reserved for
+   arguments passed in registers for the function represented by FNDECL.
+
+   This space can be allocated by the caller, or be a part of the
+   machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' says
+   which.  */
+#define REG_PARM_STACK_SPACE(FNDECL) ix86_reg_parm_stack_space (FNDECL)
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) \
+  (TARGET_64BIT && ix86_function_type_abi (FNTYPE) == MS_ABI)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) ix86_libcall_value (MODE)
+
+/* Define the size of the result block used for communication between
+   untyped_call and untyped_return.  The block contains a DImode value
+   followed by the block used by fnsave and frstor.  */
+
+#define APPLY_RESULT_SIZE (8+108)
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N)
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+
+typedef struct ix86_args {
+  int words;			/* # words passed so far */
+  int nregs;			/* # registers available for passing */
+  int regno;			/* next available register number */
+  int fastcall;			/* fastcall or thiscall calling convention
+				   is used */
+  int sse_words;		/* # sse words passed so far */
+  int sse_nregs;		/* # sse registers available for passing */
+  int warn_avx512f;		/* True when we want to warn
+				   about AVX512F ABI.  */
+  int warn_avx;			/* True when we want to warn about AVX ABI.  */
+  int warn_sse;			/* True when we want to warn about SSE ABI.  */
+  int warn_mmx;			/* True when we want to warn about MMX ABI.  */
+  int sse_regno;		/* next available sse register number */
+  int mmx_words;		/* # mmx words passed so far */
+  int mmx_nregs;		/* # mmx registers available for passing */
+  int mmx_regno;		/* next available mmx register number */
+  int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
+  int caller;			/* true if it is caller.  */
+  int float_in_sse;		/* Set to 1 or 2 for 32bit targets if
+				   SFmode/DFmode arguments should be passed
+				   in SSE registers.  Otherwise 0.  */
+  enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
+ 				   MS_ABI for ms abi.  */
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL), \
+			(N_NAMED_ARGS) != -1)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) x86_function_profiler (FILE, LABELNO)
+
+#define MCOUNT_NAME "_mcount"
+
+#define MCOUNT_NAME_BEFORE_PROLOGUE "__fentry__"
+
+#define PROFILE_COUNT_REGISTER "edx"
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+/* Note on the 386 it might be more efficient not to define this since
+   we have to restore it ourselves from the frame pointer, in order to
+   use pop */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.  */
+
+/* On the 386, the trampoline contains two instructions:
+     mov #STATIC,ecx
+     jmp FUNCTION
+   The trampoline is generated entirely at runtime.  The operand of JMP
+   is the address of FUNCTION relative to the instruction following the
+   JMP (which is 5 bytes long).  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 24 : 10)
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   There are two registers that can always be eliminated on the i386.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}	\
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = ix86_initial_elimination_offset ((FROM), (TO)))
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 					\
+  ((REGNO) < STACK_POINTER_REGNUM 					\
+   || REX_INT_REGNO_P (REGNO)						\
+   || (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM		\
+   || REX_INT_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
+
+#define REGNO_OK_FOR_BASE_P(REGNO) 					\
+  (GENERAL_REGNO_P (REGNO)						\
+   || (REGNO) == ARG_POINTER_REGNUM 					\
+   || (REGNO) == FRAME_POINTER_REGNUM 					\
+   || GENERAL_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+
+/* Non strict versions, pseudos are ok.  */
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X)					\
+  (REGNO (X) < STACK_POINTER_REGNUM					\
+   || REX_INT_REGNO_P (REGNO (X))					\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+#define REG_OK_FOR_BASE_NONSTRICT_P(X)					\
+  (GENERAL_REGNO_P (REGNO (X))						\
+   || REGNO (X) == ARG_POINTER_REGNUM					\
+   || REGNO (X) == FRAME_POINTER_REGNUM 				\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Strict versions, hard registers only */
+#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#define REG_OK_FOR_BASE_STRICT_P(X)  REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifndef REG_OK_STRICT
+#define REG_OK_FOR_INDEX_P(X)  REG_OK_FOR_INDEX_NONSTRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)   REG_OK_FOR_BASE_NONSTRICT_P (X)
+
+#else
+#define REG_OK_FOR_INDEX_P(X)  REG_OK_FOR_INDEX_STRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)   REG_OK_FOR_BASE_STRICT_P (X)
+#endif
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in TARGET_LEGITIMATE_ADDRESS_P,
+   except for CONSTANT_ADDRESS_P which is usually machine-independent.
+
+   See legitimize_pic_address in i386.c for details as to what
+   constitutes a legitimate address when -fpic is used.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define CONSTANT_ADDRESS_P(X)  constant_address_p (X)
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, INDL, WIN)	\
+do {									\
+  if (ix86_legitimize_reload_address ((X), (MODE), (OPNUM),		\
+				      (int)(TYPE), (INDL)))		\
+    goto WIN;								\
+} while (0)
+
+/* If defined, a C expression to determine the base term of address X.
+   This macro is used in only one place: `find_base_term' in alias.c.
+
+   It is always safe for this macro to not be defined.  It exists so
+   that alias analysis can understand machine-dependent addresses.
+
+   The typical use of this macro is to handle addresses containing
+   a label_ref or symbol_ref within an UNSPEC.  */
+
+#define FIND_BASE_TERM(X) ix86_find_base_term (X)
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+
+#define SYMBOLIC_CONST(X)	\
+  (GET_CODE (X) == SYMBOL_REF						\
+   || GET_CODE (X) == LABEL_REF						\
+   || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+/* Max number of args passed in registers.  If this is more than 3, we will
+   have problems with ebx (register #4), since it is a caller save register and
+   is also used as the pic register in ELF.  So for now, don't allow more than
+   3 registers to be passed in registers.  */
+
+/* Abi specific values for REGPARM_MAX and SSE_REGPARM_MAX */
+#define X86_64_REGPARM_MAX 6
+#define X86_64_MS_REGPARM_MAX 4
+
+#define X86_32_REGPARM_MAX 3
+
+#define REGPARM_MAX							\
+  (TARGET_64BIT								\
+   ? (TARGET_64BIT_MS_ABI						\
+      ? X86_64_MS_REGPARM_MAX						\
+      : X86_64_REGPARM_MAX)						\
+   : X86_32_REGPARM_MAX)
+
+#define X86_64_SSE_REGPARM_MAX 8
+#define X86_64_MS_SSE_REGPARM_MAX 4
+
+#define X86_32_SSE_REGPARM_MAX (TARGET_SSE ? (TARGET_MACHO ? 4 : 3) : 0)
+
+#define SSE_REGPARM_MAX							\
+  (TARGET_64BIT								\
+   ? (TARGET_64BIT_MS_ABI						\
+      ? X86_64_MS_SSE_REGPARM_MAX					\
+      : X86_64_SSE_REGPARM_MAX)						\
+   : X86_32_SSE_REGPARM_MAX)
+
+#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE \
+ (!TARGET_LP64 || (flag_pic && ix86_cmodel != CM_LARGE_PIC) ? SImode : DImode)
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 16
+
+/* MOVE_MAX_PIECES is the number of bytes at a time which we can
+   move efficiently, as opposed to  MOVE_MAX which is the maximum
+   number of bytes we can move with a single instruction.  */
+#define MOVE_MAX_PIECES UNITS_PER_WORD
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.
+   Increasing the value will always make code faster, but eventually
+   incurs high cost in increased code size.
+
+   If you don't define this, a reasonable default is used.  */
+
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
+
+/* If a clear memory operation would take CLEAR_RATIO or more simple
+   move-instruction sequences, we will do a clrmem or libcall instead.  */
+
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
+
+/* Define if shifts truncate the shift count which implies one can
+   omit a sign-extension or zero-extension of a shift count.
+
+   On i386, shifts do truncate the count.  But bit test instructions
+   take the modulo of the bit offset operand.  */
+
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A macro to update M and UNSIGNEDP when an object whose type is
+   TYPE and which has the specified mode and signedness is to be
+   stored in a register.  This macro is only called when TYPE is a
+   scalar type.
+
+   On i386 it is sometimes useful to promote HImode and QImode
+   quantities to SImode.  The choice depends on target type.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) 		\
+do {							\
+  if (((MODE) == HImode && TARGET_PROMOTE_HI_REGS)	\
+      || ((MODE) == QImode && TARGET_PROMOTE_QI_REGS))	\
+    (MODE) = SImode;					\
+} while (0)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode (ix86_pmode == PMODE_DI ? DImode : SImode)
+
+/* A C expression whose value is zero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and
+   greater then zero if they are zero-extended and less then zero if the
+   ptr_extend instruction should be used.  */
+
+#define POINTERS_EXTEND_UNSIGNED 1
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+
+#define BRANCH_COST(speed_p, predictable_p) \
+  (!(speed_p) ? 2 : (predictable_p) ? 0 : ix86_branch_cost)
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  We allow pairs of registers.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_64BIT ? TImode : DImode)
+
+/* Define this macro as a C expression which is nonzero if accessing
+   less than a word of memory (i.e. a `char' or a `short') is no
+   faster than accessing a word of memory, i.e., if such access
+   require more than one instruction or if there is no difference in
+   cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by
+   finding the smallest containing object; when it is defined, a
+   fullword load will be used if alignment permits.  Unless bytes
+   accesses are faster than word accesses, using word accesses is
+   preferable since it may eliminate subsequent memory access if
+   subsequent accesses occur to other fields in the same word of the
+   structure, but to different bytes.  */
+
+#define SLOW_BYTE_ACCESS 0
+
+/* Nonzero if access to memory by shorts is slow and undesirable.  */
+#define SLOW_SHORT_ACCESS 0
+
+/* Define this macro to be the value 1 if unaligned accesses have a
+   cost many times greater than aligned accesses, for example if they
+   are emulated in a trap handler.
+
+   When this macro is nonzero, the compiler will act as if
+   `STRICT_ALIGNMENT' were nonzero when generating code for block
+   moves.  This can cause significantly more instructions to be
+   produced.  Therefore, do not set this macro nonzero if unaligned
+   accesses only add a cycle or two to the time for a memory access.
+
+   If the value of this macro is always zero, it need not be defined.  */
+
+/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.
+
+   Desirable on the 386 because a CALL with a constant address is
+   faster than one with a register address.  */
+
+#define NO_FUNCTION_CSE
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.
+
+   For floating-point equality comparisons, CCFPEQmode should be used.
+   VOIDmode should be used in all other cases.
+
+   For integer comparisons against zero, reduce to CCNOmode or CCZmode if
+   possible, to allow for more combinations.  */
+
+#define SELECT_CC_MODE(OP, X, Y) ix86_cc_mode ((OP), (X), (Y))
+
+/* Return nonzero if MODE implies a floating point inequality can be
+   reversed.  */
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* A C expression whose value is reversed condition code of the CODE for
+   comparison done in CC_MODE mode.  */
+#define REVERSE_CONDITION(CODE, MODE) ix86_reverse_condition ((CODE), (MODE))
+
+
+/* Control the assembler format that we output, to the extent
+   this does not vary between assemblers.  */
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+/* In order to refer to the first 8 regs as 32-bit regs, prefix an "e".
+   For non floating point regs, the following are the HImode names.
+
+   For float regs, the stack top is sometimes referred to as "%st(0)"
+   instead of just "%st".  TARGET_PRINT_OPERAND handles this with the
+   "y" code.  */
+
+#define HI_REGISTER_NAMES						\
+{"ax","dx","cx","bx","si","di","bp","sp",				\
+ "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)",		\
+ "argp", "flags", "fpsr", "fpcr", "frame",				\
+ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",		\
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",		\
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",			\
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",	\
+ "xmm16", "xmm17", "xmm18", "xmm19",					\
+ "xmm20", "xmm21", "xmm22", "xmm23",					\
+ "xmm24", "xmm25", "xmm26", "xmm27",					\
+ "xmm28", "xmm29", "xmm30", "xmm31",					\
+ "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" }
+
+#define REGISTER_NAMES HI_REGISTER_NAMES
+
+/* Table of additional register names to use in user input.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{ { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 },		\
+  { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 },		\
+  { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 },		\
+  { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 },		\
+  { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 },			\
+  { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 },			\
+  { "ymm0", 21}, { "ymm1", 22}, { "ymm2", 23}, { "ymm3", 24},		\
+  { "ymm4", 25}, { "ymm5", 26}, { "ymm6", 27}, { "ymm7", 28},		\
+  { "ymm8", 45}, { "ymm9", 46}, { "ymm10", 47}, { "ymm11", 48},		\
+  { "ymm12", 49}, { "ymm13", 50}, { "ymm14", 51}, { "ymm15", 52},	\
+  { "ymm16", 53}, { "ymm17", 54}, { "ymm18", 55}, { "ymm19", 56},	\
+  { "ymm20", 57}, { "ymm21", 58}, { "ymm22", 59}, { "ymm23", 60},	\
+  { "ymm24", 61}, { "ymm25", 62}, { "ymm26", 63}, { "ymm27", 64},	\
+  { "ymm28", 65}, { "ymm29", 66}, { "ymm30", 67}, { "ymm31", 68},	\
+  { "zmm0", 21}, { "zmm1", 22}, { "zmm2", 23}, { "zmm3", 24},		\
+  { "zmm4", 25}, { "zmm5", 26}, { "zmm6", 27}, { "zmm7", 28},		\
+  { "zmm8", 45}, { "zmm9", 46}, { "zmm10", 47}, { "zmm11", 48},		\
+  { "zmm12", 49}, { "zmm13", 50}, { "zmm14", 51}, { "zmm15", 52},	\
+  { "zmm16", 53}, { "zmm17", 54}, { "zmm18", 55}, { "zmm19", 56},	\
+  { "zmm20", 57}, { "zmm21", 58}, { "zmm22", 59}, { "zmm23", 60},	\
+  { "zmm24", 61}, { "zmm25", 62}, { "zmm26", 63}, { "zmm27", 64},	\
+  { "zmm28", 65}, { "zmm29", 66}, { "zmm30", 67}, { "zmm31", 68} }
+
+/* Note we are omitting these since currently I don't know how
+to get gcc to use these, since they want the same but different
+number as al, and ax.
+*/
+
+#define QI_REGISTER_NAMES \
+{"al", "dl", "cl", "bl", "sil", "dil", "bpl", "spl",}
+
+/* These parallel the array above, and can be used to access bits 8:15
+   of regs 0 through 3.  */
+
+#define QI_HIGH_REGISTER_NAMES \
+{"ah", "dh", "ch", "bh", }
+
+/* How to renumber registers for dbx and gdb.  */
+
+#define DBX_REGISTER_NUMBER(N) \
+  (TARGET_64BIT ? dbx64_register_map[(N)] : dbx_register_map[(N)])
+
+extern int const dbx_register_map[FIRST_PSEUDO_REGISTER];
+extern int const dbx64_register_map[FIRST_PSEUDO_REGISTER];
+extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
+
+extern int const x86_64_ms_sysv_extra_clobbered_registers[12];
+
+/* Before the prologue, RA is at 0(%esp).  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* After the prologue, RA is at -4(AP) in the current frame.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)					   \
+  ((COUNT) == 0								   \
+   ? gen_rtx_MEM (Pmode, plus_constant (Pmode, arg_pointer_rtx,	   \
+					-UNITS_PER_WORD))		   \
+   : gen_rtx_MEM (Pmode, plus_constant (Pmode, FRAME, UNITS_PER_WORD)))
+
+/* PC is dbx register 8; let's use that column for RA.  */
+#define DWARF_FRAME_RETURN_COLUMN 	(TARGET_64BIT ? 16 : 8)
+
+/* Before the prologue, the top of the frame is at 4(%esp).  */
+#define INCOMING_FRAME_SP_OFFSET UNITS_PER_WORD
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) <= DX_REG ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, CX_REG)
+
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   ??? All x86 object file formats are capable of representing this.
+   After all, the relocation needed is the same as for the call insn.
+   Whether or not a particular assembler allows us to enter such, I
+   guess we'll have to see.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)       		\
+  asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO)  \
+do {									\
+  if (TARGET_64BIT)							\
+    asm_fprintf ((FILE), "\tpush{q}\t%%r%s\n",				\
+		 reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0));	\
+  else									\
+    asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]);	\
+} while (0)
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO)  \
+do {									\
+  if (TARGET_64BIT)							\
+    asm_fprintf ((FILE), "\tpop{q}\t%%r%s\n",				\
+		 reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0));	\
+  else									\
+    asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]);	\
+} while (0)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  ix86_output_addr_vec_elt ((FILE), (VALUE))
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
+
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is true.  */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR)	\
+{						\
+  if ((PTR)[0] == '%' && (PTR)[1] == 'v')	\
+    (PTR) += TARGET_AVX ? 1 : 2;		\
+}
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in
+   its "internal" form--the form that is written in the machine
+   description.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to pad the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#undef  ASM_OUTPUT_MAX_SKIP_PAD
+#define ASM_OUTPUT_MAX_SKIP_PAD(FILE, LOG, MAX_SKIP)			\
+  if ((LOG) != 0)							\
+    {									\
+      if ((MAX_SKIP) == 0)						\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
+        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+    }
+#endif
+
+/* Write the extra assembler code needed to declare a function
+   properly.  */
+
+#undef ASM_OUTPUT_FUNCTION_LABEL
+#define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
+  ix86_asm_output_function_label (FILE, NAME, DECL)
+
+/* Under some conditions we need jump tables in the text section,
+   because the assembler cannot handle label differences between
+   sections.  This is the case for x86_64 on Mach-O for example.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION \
+  (flag_pic && ((TARGET_MACHO && TARGET_64BIT) \
+   || (!TARGET_64BIT && !HAVE_AS_GOTOFF_IN_DATA)))
+
+/* Switch to init or fini section via SECTION_OP, emit a call to FUNC,
+   and switch back.  For x86 we do this only to save a few bytes that
+   would otherwise be unused in the text section.  */
+#define CRT_MKSTR2(VAL) #VAL
+#define CRT_MKSTR(x) CRT_MKSTR2(x)
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)		\
+   asm (SECTION_OP "\n\t"					\
+	"call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n"	\
+	TEXT_SECTION_ASM_OP);
+
+/* Default threshold for putting data in large sections
+   with x86-64 medium memory model */
+#define DEFAULT_LARGE_SECTION_THRESHOLD 65536
+
+/* Which processor to tune code generation for.  These must be in sync
+   with processor_target_table in i386.c.  */ 
+
+enum processor_type
+{
+  PROCESSOR_GENERIC = 0,
+  PROCESSOR_I386,			/* 80386 */
+  PROCESSOR_I486,			/* 80486DX, 80486SX, 80486DX[24] */
+  PROCESSOR_PENTIUM,
+  PROCESSOR_PENTIUMPRO,
+  PROCESSOR_PENTIUM4,
+  PROCESSOR_NOCONA,
+  PROCESSOR_CORE2,
+  PROCESSOR_NEHALEM,
+  PROCESSOR_SANDYBRIDGE,
+  PROCESSOR_HASWELL,
+  PROCESSOR_BONNELL,
+  PROCESSOR_SILVERMONT,
+  PROCESSOR_INTEL,
+  PROCESSOR_GEODE,
+  PROCESSOR_K6,
+  PROCESSOR_ATHLON,
+  PROCESSOR_K8,
+  PROCESSOR_AMDFAM10,
+  PROCESSOR_BDVER1,
+  PROCESSOR_BDVER2,
+  PROCESSOR_BDVER3,
+  PROCESSOR_BDVER4,
+  PROCESSOR_BTVER1,
+  PROCESSOR_BTVER2,
+  PROCESSOR_max
+};
+
+extern enum processor_type ix86_tune;
+extern enum processor_type ix86_arch;
+
+/* Size of the RED_ZONE area.  */
+#define RED_ZONE_SIZE 128
+/* Reserved area of the red zone for temporaries.  */
+#define RED_ZONE_RESERVE 8
+
+extern unsigned int ix86_preferred_stack_boundary;
+extern unsigned int ix86_incoming_stack_boundary;
+
+/* Smallest class containing REGNO.  */
+extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER];
+
+enum ix86_fpcmp_strategy {
+  IX86_FPCMP_SAHF,
+  IX86_FPCMP_COMI,
+  IX86_FPCMP_ARITH
+};
+
+/* To properly truncate FP values into integers, we need to set i387 control
+   word.  We can't emit proper mode switching code before reload, as spills
+   generated by reload may truncate values incorrectly, but we still can avoid
+   redundant computation of new control word by the mode switching pass.
+   The fldcw instructions are still emitted redundantly, but this is probably
+   not going to be noticeable problem, as most CPUs do have fast path for
+   the sequence.
+
+   The machinery is to emit simple truncation instructions and split them
+   before reload to instructions having USEs of two memory locations that
+   are filled by this code to old and new control word.
+
+   Post-reload pass may be later used to eliminate the redundant fildcw if
+   needed.  */
+
+enum ix86_entity
+{
+  AVX_U128 = 0,
+  I387_TRUNC,
+  I387_FLOOR,
+  I387_CEIL,
+  I387_MASK_PM,
+  MAX_386_ENTITIES
+};
+
+enum ix86_stack_slot
+{
+  SLOT_TEMP = 0,
+  SLOT_CW_STORED,
+  SLOT_CW_TRUNC,
+  SLOT_CW_FLOOR,
+  SLOT_CW_CEIL,
+  SLOT_CW_MASK_PM,
+  MAX_386_STACK_LOCALS
+};
+
+enum avx_u128_state
+{
+  AVX_U128_CLEAN,
+  AVX_U128_DIRTY,
+  AVX_U128_ANY
+};
+
+/* Define this macro if the port needs extra instructions inserted
+   for mode switching in an optimizing compilation.  */
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \
+   ix86_optimize_mode_switching[(ENTITY)]
+
+/* If you define `OPTIMIZE_MODE_SWITCHING', you have to define this as
+   initializer for an array of integers.  Each initializer element N
+   refers to an entity that needs mode switching, and specifies the
+   number of different modes that might need to be set for this
+   entity.  The position of the initializer in the initializer -
+   starting counting at zero - determines the integer that is used to
+   refer to the mode-switched entity in question.  */
+
+#define NUM_MODES_FOR_MODE_SWITCHING \
+  { AVX_U128_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+
+/* ENTITY is an integer specifying a mode-switched entity.  If
+   `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
+   return an integer value not larger than the corresponding element
+   in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
+   must be switched into prior to the execution of INSN.  */
+
+#define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
+
+/* If this macro is defined, it is evaluated for every INSN during
+   mode switching.  It determines the mode that an insn results in (if
+   different from the incoming mode).  */
+
+#define MODE_AFTER(ENTITY, MODE, I) ix86_mode_after ((ENTITY), (MODE), (I))
+
+/* If this macro is defined, it is evaluated for every ENTITY that
+   needs mode switching.  It should evaluate to an integer, which is
+   a mode that ENTITY is assumed to be switched to at function entry.  */
+
+#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY)
+
+/* If this macro is defined, it is evaluated for every ENTITY that
+   needs mode switching.  It should evaluate to an integer, which is
+   a mode that ENTITY is assumed to be switched to at function exit.  */
+
+#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY)
+
+/* This macro specifies the order in which modes for ENTITY are
+   processed.  0 is the highest priority.  */
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) (N)
+
+/* Generate one or more insns to set ENTITY to MODE.  HARD_REG_LIVE
+   is the set of hard registers live at the point where the insn(s)
+   are to be inserted.  */
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+  ix86_emit_mode_set ((ENTITY), (MODE), (HARD_REGS_LIVE))
+
+/* Avoid renaming of stack registers, as doing so in combination with
+   scheduling just increases amount of live registers at time and in
+   the turn amount of fxch instructions needed.
+
+   ??? Maybe Pentium chips benefits from renaming, someone can try....
+
+   Don't rename evex to non-evex sse registers.  */
+
+#define HARD_REGNO_RENAME_OK(SRC, TARGET) (!STACK_REGNO_P (SRC) &&	 \
+					   (EXT_REX_SSE_REGNO_P (SRC) == \
+					    EXT_REX_SSE_REGNO_P (TARGET)))
+
+
+#define FASTCALL_PREFIX '@'
+
+/* Machine specific frame tracking during prologue/epilogue generation.  */
+
+#ifndef USED_FOR_TARGET
+struct GTY(()) machine_frame_state
+{
+  /* This pair tracks the currently active CFA as reg+offset.  When reg
+     is drap_reg, we don't bother trying to record here the real CFA when
+     it might really be a DW_CFA_def_cfa_expression.  */
+  rtx cfa_reg;
+  HOST_WIDE_INT cfa_offset;
+
+  /* The current offset (canonically from the CFA) of ESP and EBP.
+     When stack frame re-alignment is active, these may not be relative
+     to the CFA.  However, in all cases they are relative to the offsets
+     of the saved registers stored in ix86_frame.  */
+  HOST_WIDE_INT sp_offset;
+  HOST_WIDE_INT fp_offset;
+
+  /* The size of the red-zone that may be assumed for the purposes of
+     eliding register restore notes in the epilogue.  This may be zero
+     if no red-zone is in effect, or may be reduced from the real
+     red-zone value by a maximum runtime stack re-alignment value.  */
+  int red_zone_offset;
+
+  /* Indicate whether each of ESP, EBP or DRAP currently holds a valid
+     value within the frame.  If false then the offset above should be
+     ignored.  Note that DRAP, if valid, *always* points to the CFA and
+     thus has an offset of zero.  */
+  BOOL_BITFIELD sp_valid : 1;
+  BOOL_BITFIELD fp_valid : 1;
+  BOOL_BITFIELD drap_valid : 1;
+
+  /* Indicate whether the local stack frame has been re-aligned.  When
+     set, the SP/FP offsets above are relative to the aligned frame
+     and not the CFA.  */
+  BOOL_BITFIELD realigned : 1;
+};
+
+/* Private to winnt.c.  */
+struct seh_frame_state;
+
+struct GTY(()) machine_function {
+  struct stack_local_entry *stack_locals;
+  const char *some_ld_name;
+  int varargs_gpr_size;
+  int varargs_fpr_size;
+  int optimize_mode_switching[MAX_386_ENTITIES];
+
+  /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE
+     has been computed for.  */
+  int use_fast_prologue_epilogue_nregs;
+
+  /* For -fsplit-stack support: A stack local which holds a pointer to
+     the stack arguments for a function with a variable number of
+     arguments.  This is set at the start of the function and is used
+     to initialize the overflow_arg_area field of the va_list
+     structure.  */
+  rtx split_stack_varargs_pointer;
+
+  /* This value is used for amd64 targets and specifies the current abi
+     to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
+  ENUM_BITFIELD(calling_abi) call_abi : 8;
+
+  /* Nonzero if the function accesses a previous frame.  */
+  BOOL_BITFIELD accesses_prev_frame : 1;
+
+  /* Nonzero if the function requires a CLD in the prologue.  */
+  BOOL_BITFIELD needs_cld : 1;
+
+  /* Set by ix86_compute_frame_layout and used by prologue/epilogue
+     expander to determine the style used.  */
+  BOOL_BITFIELD use_fast_prologue_epilogue : 1;
+
+  /* If true, the current function needs the default PIC register, not
+     an alternate register (on x86) and must not use the red zone (on
+     x86_64), even if it's a leaf function.  We don't want the
+     function to be regarded as non-leaf because TLS calls need not
+     affect register allocation.  This flag is set when a TLS call
+     instruction is expanded within a function, and never reset, even
+     if all such instructions are optimized away.  Use the
+     ix86_current_function_calls_tls_descriptor macro for a better
+     approximation.  */
+  BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
+
+  /* If true, the current function has a STATIC_CHAIN is placed on the
+     stack below the return address.  */
+  BOOL_BITFIELD static_chain_on_stack : 1;
+
+  /* If true, it is safe to not save/restore DRAP register.  */
+  BOOL_BITFIELD no_drap_save_restore : 1;
+
+  /* During prologue/epilogue generation, the current frame state.
+     Otherwise, the frame state at the end of the prologue.  */
+  struct machine_frame_state fs;
+
+  /* During SEH output, this is non-null.  */
+  struct seh_frame_state * GTY((skip(""))) seh;
+};
+#endif
+
+#define ix86_stack_locals (cfun->machine->stack_locals)
+#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
+#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size)
+#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
+#define ix86_tls_descriptor_calls_expanded_in_cfun \
+  (cfun->machine->tls_descriptor_call_expanded_p)
+/* Since tls_descriptor_call_expanded is not cleared, even if all TLS
+   calls are optimized away, we try to detect cases in which it was
+   optimized away.  Since such instructions (use (reg REG_SP)), we can
+   verify whether there's any such instruction live by testing that
+   REG_SP is live.  */
+#define ix86_current_function_calls_tls_descriptor \
+  (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
+#define ix86_static_chain_on_stack (cfun->machine->static_chain_on_stack)
+
+/* Control behavior of x86_file_start.  */
+#define X86_FILE_START_VERSION_DIRECTIVE false
+#define X86_FILE_START_FLTUSED false
+
+/* Flag to mark data that is in the large address area.  */
+#define SYMBOL_FLAG_FAR_ADDR		(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_FAR_ADDR_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_FAR_ADDR) != 0)
+
+/* Flags to mark dllimport/dllexport.  Used by PE ports, but handy to
+   have defined always, to avoid ifdefing.  */
+#define SYMBOL_FLAG_DLLIMPORT		(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_DLLIMPORT_P(X) \
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLIMPORT) != 0)
+
+#define SYMBOL_FLAG_DLLEXPORT		(SYMBOL_FLAG_MACH_DEP << 2)
+#define SYMBOL_REF_DLLEXPORT_P(X) \
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
+
+#define SYMBOL_FLAG_STUBVAR	(SYMBOL_FLAG_MACH_DEP << 4)
+#define SYMBOL_REF_STUBVAR_P(X) \
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_STUBVAR) != 0)
+
+extern void debug_ready_dispatch (void);
+extern void debug_dispatch_window (int);
+
+/* The value at zero is only defined for the BMI instructions
+   LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI)
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT)
+
+
+/* Flags returned by ix86_get_callcvt ().  */
+#define IX86_CALLCVT_CDECL	0x1
+#define IX86_CALLCVT_STDCALL	0x2
+#define IX86_CALLCVT_FASTCALL	0x4
+#define IX86_CALLCVT_THISCALL	0x8
+#define IX86_CALLCVT_REGPARM	0x10
+#define IX86_CALLCVT_SSEREGPARM	0x20
+
+#define IX86_BASE_CALLCVT(FLAGS) \
+	((FLAGS) & (IX86_CALLCVT_CDECL | IX86_CALLCVT_STDCALL \
+		    | IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL))
+
+#define RECIP_MASK_NONE		0x00
+#define RECIP_MASK_DIV		0x01
+#define RECIP_MASK_SQRT		0x02
+#define RECIP_MASK_VEC_DIV	0x04
+#define RECIP_MASK_VEC_SQRT	0x08
+#define RECIP_MASK_ALL	(RECIP_MASK_DIV | RECIP_MASK_SQRT \
+			 | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT)
+#define RECIP_MASK_DEFAULT (RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT)
+
+#define TARGET_RECIP_DIV	((recip_mask & RECIP_MASK_DIV) != 0)
+#define TARGET_RECIP_SQRT	((recip_mask & RECIP_MASK_SQRT) != 0)
+#define TARGET_RECIP_VEC_DIV	((recip_mask & RECIP_MASK_VEC_DIV) != 0)
+#define TARGET_RECIP_VEC_SQRT	((recip_mask & RECIP_MASK_VEC_SQRT) != 0)
+
+#define IX86_HLE_ACQUIRE (1 << 16)
+#define IX86_HLE_RELEASE (1 << 17)
+
+/* For switching between functions with different target attributes.  */
+#define SWITCHABLE_TARGET 1
+
+/*
+Local variables:
+version-control: t
+End:
+*/
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
new file mode 100644
index 000000000..4a8b46388
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -0,0 +1,18044 @@
+;; GCC machine description for IA-32 and x86-64.
+;; Copyright (C) 1988-2014 Free Software Foundation, Inc.
+;; Mostly by William Schelter.
+;; x86_64 support added by Jan Hubicka
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+;;
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+;;
+;; The special asm out single letter directives following a '%' are:
+;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+;; C -- print opcode suffix for set/cmov insn.
+;; c -- like C, but print reversed condition
+;; F,f -- likewise, but for floating-point.
+;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+;;      otherwise nothing
+;; R -- print the prefix for register names.
+;; z -- print the opcode suffix for the size of the current operand.
+;; Z -- likewise, with special suffixes for x87 instructions.
+;; * -- print a star (in certain assembler syntax)
+;; A -- print an absolute memory reference.
+;; E -- print address with DImode register names if TARGET_64BIT.
+;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
+;; s -- print a shift double count, followed by the assemblers argument
+;;	delimiter.
+;; b -- print the QImode name of the register for the indicated operand.
+;;	%b0 would print %al if operands[0] is reg 0.
+;; w --  likewise, print the HImode name of the register.
+;; k --  likewise, print the SImode name of the register.
+;; q --  likewise, print the DImode name of the register.
+;; x --  likewise, print the V4SFmode name of the register.
+;; t --  likewise, print the V8SFmode name of the register.
+;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+;; y -- print "st(0)" instead of "st" as a register.
+;; d -- print duplicated register operand for AVX instruction.
+;; D -- print condition for SSE cmp instruction.
+;; P -- if PIC, print an @PLT suffix.
+;; p -- print raw symbol name.
+;; X -- don't print any sort of PIC '@' suffix for a symbol.
+;; & -- print some in-use local-dynamic symbol name.
+;; H -- print a memory address offset by 8; used for sse high-parts
+;; K -- print HLE lock prefix
+;; Y -- print condition for XOP pcom* instruction.
+;; + -- print a branch hint as 'cs' or 'ds' prefix
+;; ; -- print a semicolon (after prefixes due to bug in older gas).
+;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
+;; @ -- print a segment register of thread base pointer load
+;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+
+(define_c_enum "unspec" [
+  ;; Relocation specifiers
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_GOTPCREL
+  UNSPEC_GOTTPOFF
+  UNSPEC_TPOFF
+  UNSPEC_NTPOFF
+  UNSPEC_DTPOFF
+  UNSPEC_GOTNTPOFF
+  UNSPEC_INDNTPOFF
+  UNSPEC_PLTOFF
+  UNSPEC_MACHOPIC_OFFSET
+  UNSPEC_PCREL
+
+  ;; Prologue support
+  UNSPEC_STACK_ALLOC
+  UNSPEC_SET_GOT
+  UNSPEC_SET_RIP
+  UNSPEC_SET_GOT_OFFSET
+  UNSPEC_MEMORY_BLOCKAGE
+  UNSPEC_STACK_CHECK
+
+  ;; TLS support
+  UNSPEC_TP
+  UNSPEC_TLS_GD
+  UNSPEC_TLS_LD_BASE
+  UNSPEC_TLSDESC
+  UNSPEC_TLS_IE_SUN
+
+  ;; Other random patterns
+  UNSPEC_SCAS
+  UNSPEC_FNSTSW
+  UNSPEC_SAHF
+  UNSPEC_PARITY
+  UNSPEC_FSTCW
+  UNSPEC_ADD_CARRY
+  UNSPEC_FLDCW
+  UNSPEC_REP
+  UNSPEC_LD_MPIC	; load_macho_picbase
+  UNSPEC_TRUNC_NOOP
+  UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_MS_TO_SYSV_CALL
+  UNSPEC_PAUSE
+  UNSPEC_LEA_ADDR
+  UNSPEC_XBEGIN_ABORT
+  UNSPEC_STOS
+
+  ;; For SSE/MMX support:
+  UNSPEC_FIX_NOTRUNC
+  UNSPEC_MASKMOV
+  UNSPEC_MOVMSK
+  UNSPEC_RCP
+  UNSPEC_RSQRT
+  UNSPEC_PSADBW
+
+  ;; Generic math support
+  UNSPEC_COPYSIGN
+  UNSPEC_IEEE_MIN	; not commutative
+  UNSPEC_IEEE_MAX	; not commutative
+
+  ;; x87 Floating point
+  UNSPEC_SIN
+  UNSPEC_COS
+  UNSPEC_FPATAN
+  UNSPEC_FYL2X
+  UNSPEC_FYL2XP1
+  UNSPEC_FRNDINT
+  UNSPEC_FIST
+  UNSPEC_F2XM1
+  UNSPEC_TAN
+  UNSPEC_FXAM
+
+  ;; x87 Rounding
+  UNSPEC_FRNDINT_FLOOR
+  UNSPEC_FRNDINT_CEIL
+  UNSPEC_FRNDINT_TRUNC
+  UNSPEC_FRNDINT_MASK_PM
+  UNSPEC_FIST_FLOOR
+  UNSPEC_FIST_CEIL
+
+  ;; x87 Double output FP
+  UNSPEC_SINCOS_COS
+  UNSPEC_SINCOS_SIN
+  UNSPEC_XTRACT_FRACT
+  UNSPEC_XTRACT_EXP
+  UNSPEC_FSCALE_FRACT
+  UNSPEC_FSCALE_EXP
+  UNSPEC_FPREM_F
+  UNSPEC_FPREM_U
+  UNSPEC_FPREM1_F
+  UNSPEC_FPREM1_U
+
+  UNSPEC_C2_FLAG
+  UNSPEC_FXAM_MEM
+
+  ;; SSP patterns
+  UNSPEC_SP_SET
+  UNSPEC_SP_TEST
+  UNSPEC_SP_TLS_SET
+  UNSPEC_SP_TLS_TEST
+
+  ;; For ROUND support
+  UNSPEC_ROUND
+
+  ;; For CRC32 support
+  UNSPEC_CRC32
+
+  ;; For BMI support
+  UNSPEC_BEXTR
+
+  ;; For BMI2 support
+  UNSPEC_PDEP
+  UNSPEC_PEXT
+
+  ;; For AVX512F support
+  UNSPEC_KMOV
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_BLOCKAGE
+  UNSPECV_STACK_PROBE
+  UNSPECV_PROBE_STACK_RANGE
+  UNSPECV_ALIGN
+  UNSPECV_PROLOGUE_USE
+  UNSPECV_SPLIT_STACK_RETURN
+  UNSPECV_CLD
+  UNSPECV_NOPS
+  UNSPECV_RDTSC
+  UNSPECV_RDTSCP
+  UNSPECV_RDPMC
+  UNSPECV_LLWP_INTRINSIC
+  UNSPECV_SLWP_INTRINSIC
+  UNSPECV_LWPVAL_INTRINSIC
+  UNSPECV_LWPINS_INTRINSIC
+  UNSPECV_RDFSBASE
+  UNSPECV_RDGSBASE
+  UNSPECV_WRFSBASE
+  UNSPECV_WRGSBASE
+  UNSPECV_FXSAVE
+  UNSPECV_FXRSTOR
+  UNSPECV_FXSAVE64
+  UNSPECV_FXRSTOR64
+  UNSPECV_XSAVE
+  UNSPECV_XRSTOR
+  UNSPECV_XSAVE64
+  UNSPECV_XRSTOR64
+  UNSPECV_XSAVEOPT
+  UNSPECV_XSAVEOPT64
+
+  ;; For atomic compound assignments.
+  UNSPECV_FNSTENV
+  UNSPECV_FLDENV
+  UNSPECV_FNSTSW
+  UNSPECV_FNCLEX
+
+  ;; For RDRAND support
+  UNSPECV_RDRAND
+
+  ;; For RDSEED support
+  UNSPECV_RDSEED
+
+  ;; For RTM support
+  UNSPECV_XBEGIN
+  UNSPECV_XEND
+  UNSPECV_XABORT
+  UNSPECV_XTEST
+
+  UNSPECV_NLGR
+])
+
+;; Constants to represent rounding modes in the ROUND instruction
+(define_constants
+  [(ROUND_FLOOR			0x1)
+   (ROUND_CEIL			0x2)
+   (ROUND_TRUNC			0x3)
+   (ROUND_MXCSR			0x4)
+   (ROUND_NO_EXC		0x8)
+  ])
+
+;; Constants to represent AVX512F embeded rounding
+(define_constants
+  [(ROUND_NEAREST_INT			0)
+   (ROUND_NEG_INF			1)
+   (ROUND_POS_INF			2)
+   (ROUND_ZERO				3)
+   (NO_ROUND				4)
+   (ROUND_SAE				8)
+  ])
+
+;; Constants to represent pcomtrue/pcomfalse variants
+(define_constants
+  [(PCOM_FALSE			0)
+   (PCOM_TRUE			1)
+   (COM_FALSE_S			2)
+   (COM_FALSE_P			3)
+   (COM_TRUE_S			4)
+   (COM_TRUE_P			5)
+  ])
+
+;; Constants used in the XOP pperm instruction
+(define_constants
+  [(PPERM_SRC			0x00)	/* copy source */
+   (PPERM_INVERT		0x20)	/* invert source */
+   (PPERM_REVERSE		0x40)	/* bit reverse source */
+   (PPERM_REV_INV		0x60)	/* bit reverse & invert src */
+   (PPERM_ZERO			0x80)	/* all 0's */
+   (PPERM_ONES			0xa0)	/* all 1's */
+   (PPERM_SIGN			0xc0)	/* propagate sign bit */
+   (PPERM_INV_SIGN		0xe0)	/* invert & propagate sign */
+   (PPERM_SRC1			0x00)	/* use first source byte */
+   (PPERM_SRC2			0x10)	/* use second source byte */
+   ])
+
+;; Registers by name.
+(define_constants
+  [(AX_REG			 0)
+   (DX_REG			 1)
+   (CX_REG			 2)
+   (BX_REG			 3)
+   (SI_REG			 4)
+   (DI_REG			 5)
+   (BP_REG			 6)
+   (SP_REG			 7)
+   (ST0_REG			 8)
+   (ST1_REG			 9)
+   (ST2_REG			10)
+   (ST3_REG			11)
+   (ST4_REG			12)
+   (ST5_REG			13)
+   (ST6_REG			14)
+   (ST7_REG			15)
+   (FLAGS_REG			17)
+   (FPSR_REG			18)
+   (FPCR_REG			19)
+   (XMM0_REG			21)
+   (XMM1_REG			22)
+   (XMM2_REG			23)
+   (XMM3_REG			24)
+   (XMM4_REG			25)
+   (XMM5_REG			26)
+   (XMM6_REG			27)
+   (XMM7_REG			28)
+   (MM0_REG			29)
+   (MM1_REG			30)
+   (MM2_REG			31)
+   (MM3_REG			32)
+   (MM4_REG			33)
+   (MM5_REG			34)
+   (MM6_REG			35)
+   (MM7_REG			36)
+   (R8_REG			37)
+   (R9_REG			38)
+   (R10_REG			39)
+   (R11_REG			40)
+   (R12_REG			41)
+   (R13_REG			42)
+   (R14_REG			43)
+   (R15_REG			44)
+   (XMM8_REG			45)
+   (XMM9_REG			46)
+   (XMM10_REG			47)
+   (XMM11_REG			48)
+   (XMM12_REG			49)
+   (XMM13_REG			50)
+   (XMM14_REG			51)
+   (XMM15_REG			52)
+   (XMM16_REG			53)
+   (XMM17_REG			54)
+   (XMM18_REG			55)
+   (XMM19_REG			56)
+   (XMM20_REG			57)
+   (XMM21_REG			58)
+   (XMM22_REG			59)
+   (XMM23_REG			60)
+   (XMM24_REG			61)
+   (XMM25_REG			62)
+   (XMM26_REG			63)
+   (XMM27_REG			64)
+   (XMM28_REG			65)
+   (XMM29_REG			66)
+   (XMM30_REG			67)
+   (XMM31_REG			68)
+   (MASK0_REG			69)
+   (MASK1_REG			70)
+   (MASK2_REG			71)
+   (MASK3_REG			72)
+   (MASK4_REG			73)
+   (MASK5_REG			74)
+   (MASK6_REG			75)
+   (MASK7_REG			76)
+  ])
+
+;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
+;; from i386.c.
+
+;; In C guard expressions, put expressions which may be compile-time
+;; constants first.  This allows for better optimization.  For
+;; example, write "TARGET_64BIT && reload_completed", not
+;; "reload_completed && TARGET_64BIT".
+
+
+;; Processor type.
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
+		    atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,bdver4,
+		    btver2"
+  (const (symbol_ref "ix86_schedule")))
+
+;; A basic instruction type.  Refinements due to arguments to be
+;; provided in other attributes.
+(define_attr "type"
+  "other,multi,
+   alu,alu1,negnot,imov,imovx,lea,
+   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
+   imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
+   push,pop,call,callv,leave,
+   str,bitmanip,
+   fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
+   fxch,fistp,fisttp,frndint,
+   sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+   ssemul,sseimul,ssediv,sselog,sselog1,
+   sseishft,sseishft1,ssecmp,ssecomi,
+   ssecvt,ssecvt1,sseicvt,sseins,
+   sseshuf,sseshuf1,ssemuladd,sse4arg,
+   lwp,mskmov,msklog,
+   mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
+  (const_string "other"))
+
+;; Main data type used by the insn
+(define_attr "mode"
+  "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
+  V2DF,V2SF,V1DF,V8DF"
+  (const_string "unknown"))
+
+;; The CPU unit operations uses.
+(define_attr "unit" "integer,i387,sse,mmx,unknown"
+  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
+			  fxch,fistp,fisttp,frndint")
+	   (const_string "i387")
+	 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+			  ssemul,sseimul,ssediv,sselog,sselog1,
+			  sseishft,sseishft1,ssecmp,ssecomi,
+			  ssecvt,ssecvt1,sseicvt,sseins,
+			  sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
+	   (const_string "sse")
+	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+	   (const_string "mmx")
+	 (eq_attr "type" "other")
+	   (const_string "unknown")]
+	 (const_string "integer")))
+
+;; The minimum required alignment of vector mode memory operands of the SSE
+;; (non-VEX/EVEX) instruction in bits, if it is different from
+;; GET_MODE_ALIGNMENT of the operand, otherwise 0.  If an instruction has
+;; multiple alternatives, this should be conservative maximum of those minimum
+;; required alignments.
+(define_attr "ssememalign" "" (const_int 0))
+
+;; The (bounding maximum) length of an instruction immediate.
+(define_attr "length_immediate" ""
+  (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
+			  bitmanip,imulx,msklog,mskmov")
+	   (const_int 0)
+	 (eq_attr "unit" "i387,sse,mmx")
+	   (const_int 0)
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
+			  rotate,rotatex,rotate1,imul,icmp,push,pop")
+	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
+	 (eq_attr "type" "imov,test")
+	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
+	 (eq_attr "type" "call")
+	   (if_then_else (match_operand 0 "constant_call_address_operand")
+	     (const_int 4)
+	     (const_int 0))
+	 (eq_attr "type" "callv")
+	   (if_then_else (match_operand 1 "constant_call_address_operand")
+	     (const_int 4)
+	     (const_int 0))
+	 ;; We don't know the size before shorten_branches.  Expect
+	 ;; the instruction to fit for better scheduling.
+	 (eq_attr "type" "ibr")
+	   (const_int 1)
+	 ]
+	 (symbol_ref "/* Update immediate_length and other attributes! */
+		      gcc_unreachable (),1")))
+
+;; The (bounding maximum) length of an instruction address.
+(define_attr "length_address" ""
+  (cond [(eq_attr "type" "str,other,multi,fxch")
+	   (const_int 0)
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand"))
+	     (const_int 0)
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand"))
+	     (const_int 0)
+	 ]
+	 (symbol_ref "ix86_attr_length_address_default (insn)")))
+
+;; Set when length prefix is used.
+(define_attr "prefix_data16" ""
+  (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
+	   (const_int 0)
+	 (eq_attr "mode" "HI")
+	   (const_int 1)
+	 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; Set when string REP prefix is used.
+(define_attr "prefix_rep" ""
+  (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
+	   (const_int 0)
+	 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; Set when 0f opcode prefix is used.
+(define_attr "prefix_0f" ""
+  (if_then_else
+    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
+	 (eq_attr "unit" "sse,mmx"))
+    (const_int 1)
+    (const_int 0)))
+
+;; Set when REX opcode prefix is used.
+(define_attr "prefix_rex" ""
+  (cond [(not (match_test "TARGET_64BIT"))
+	   (const_int 0)
+	 (and (eq_attr "mode" "DI")
+	      (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
+		   (eq_attr "unit" "!mmx")))
+	   (const_int 1)
+	 (and (eq_attr "mode" "QI")
+	      (match_test "x86_extended_QIreg_mentioned_p (insn)"))
+	   (const_int 1)
+	 (match_test "x86_extended_reg_mentioned_p (insn)")
+	   (const_int 1)
+	 (and (eq_attr "type" "imovx")
+	      (match_operand:QI 1 "ext_QIreg_operand"))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; There are also additional prefixes in 3DNOW, SSSE3.
+;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
+;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
+;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
+(define_attr "prefix_extra" ""
+  (cond [(eq_attr "type" "ssemuladd,sse4arg")
+	   (const_int 2)
+	 (eq_attr "type" "sseiadd1,ssecvt1")
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; Prefix used: original, VEX or maybe VEX.
+(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
+  (cond [(eq_attr "mode" "OI,V8SF,V4DF")
+           (const_string "vex")
+         (eq_attr "mode" "XI,V16SF,V8DF")
+           (const_string "evex")
+        ]
+        (const_string "orig")))
+
+;; VEX W bit is used.
+(define_attr "prefix_vex_w" "" (const_int 0))
+
+;; The length of VEX prefix
+;; Only instructions with 0f prefix can have 2 byte VEX prefix,
+;; 0f38/0f3a prefixes can't.  In i386.md 0f3[8a] is
+;; still prefix_0f 1, with prefix_extra 1.
+(define_attr "length_vex" ""
+  (if_then_else (and (eq_attr "prefix_0f" "1")
+		     (eq_attr "prefix_extra" "0"))
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
+
+;; 4-bytes evex prefix and 1 byte opcode.
+(define_attr "length_evex" "" (const_int 5))
+
+;; Set when modrm byte is used.
+(define_attr "modrm" ""
+  (cond [(eq_attr "type" "str,leave")
+	   (const_int 0)
+	 (eq_attr "unit" "i387")
+	   (const_int 0)
+         (and (eq_attr "type" "incdec")
+	      (and (not (match_test "TARGET_64BIT"))
+		   (ior (match_operand:SI 1 "register_operand")
+			(match_operand:HI 1 "register_operand"))))
+	   (const_int 0)
+	 (and (eq_attr "type" "push")
+	      (not (match_operand 1 "memory_operand")))
+	   (const_int 0)
+	 (and (eq_attr "type" "pop")
+	      (not (match_operand 0 "memory_operand")))
+	   (const_int 0)
+	 (and (eq_attr "type" "imov")
+	      (and (not (eq_attr "mode" "DI"))
+		   (ior (and (match_operand 0 "register_operand")
+			     (match_operand 1 "immediate_operand"))
+		        (ior (and (match_operand 0 "ax_reg_operand")
+				  (match_operand 1 "memory_displacement_only_operand"))
+			     (and (match_operand 0 "memory_displacement_only_operand")
+				  (match_operand 1 "ax_reg_operand"))))))
+	   (const_int 0)
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand"))
+	     (const_int 0)
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand"))
+	     (const_int 0)
+	 (and (eq_attr "type" "alu,alu1,icmp,test")
+	      (match_operand 0 "ax_reg_operand"))
+	     (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
+	 ]
+	 (const_int 1)))
+
+;; The (bounding maximum) length of an instruction in bytes.
+;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
+;; Later we may want to split them and compute proper length as for
+;; other insns.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "other,multi,fistp,frndint")
+	   (const_int 16)
+	 (eq_attr "type" "fcmp")
+	   (const_int 4)
+	 (eq_attr "unit" "i387")
+	   (plus (const_int 2)
+		 (plus (attr "prefix_data16")
+		       (attr "length_address")))
+	 (ior (eq_attr "prefix" "evex")
+	      (and (ior (eq_attr "prefix" "maybe_evex")
+			(eq_attr "prefix" "maybe_vex"))
+		   (match_test "TARGET_AVX512F")))
+	   (plus (attr "length_evex")
+		 (plus (attr "length_immediate")
+		       (plus (attr "modrm")
+			     (attr "length_address"))))
+	 (ior (eq_attr "prefix" "vex")
+	      (and (ior (eq_attr "prefix" "maybe_vex")
+			(eq_attr "prefix" "maybe_evex"))
+		   (match_test "TARGET_AVX")))
+	   (plus (attr "length_vex")
+		 (plus (attr "length_immediate")
+		       (plus (attr "modrm")
+			     (attr "length_address"))))]
+	 (plus (plus (attr "modrm")
+		     (plus (attr "prefix_0f")
+			   (plus (attr "prefix_rex")
+				 (plus (attr "prefix_extra")
+				       (const_int 1)))))
+	       (plus (attr "prefix_rep")
+		     (plus (attr "prefix_data16")
+			   (plus (attr "length_immediate")
+				 (attr "length_address")))))))
+
+;; The `memory' attribute is `none' if no memory is referenced, `load' or
+;; `store' if there is a simple memory reference therein, or `unknown'
+;; if the instruction is complex.
+
+(define_attr "memory" "none,load,store,both,unknown"
+  (cond [(eq_attr "type" "other,multi,str,lwp")
+	   (const_string "unknown")
+	 (eq_attr "type" "lea,fcmov,fpspc")
+	   (const_string "none")
+	 (eq_attr "type" "fistp,leave")
+	   (const_string "both")
+	 (eq_attr "type" "frndint")
+	   (const_string "load")
+	 (eq_attr "type" "push")
+	   (if_then_else (match_operand 1 "memory_operand")
+	     (const_string "both")
+	     (const_string "store"))
+	 (eq_attr "type" "pop")
+	   (if_then_else (match_operand 0 "memory_operand")
+	     (const_string "both")
+	     (const_string "load"))
+	 (eq_attr "type" "setcc")
+	   (if_then_else (match_operand 0 "memory_operand")
+	     (const_string "store")
+	     (const_string "none"))
+	 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
+	   (if_then_else (ior (match_operand 0 "memory_operand")
+			      (match_operand 1 "memory_operand"))
+	     (const_string "load")
+	     (const_string "none"))
+	 (eq_attr "type" "ibr")
+	   (if_then_else (match_operand 0 "memory_operand")
+	     (const_string "load")
+	     (const_string "none"))
+	 (eq_attr "type" "call")
+	   (if_then_else (match_operand 0 "constant_call_address_operand")
+	     (const_string "none")
+	     (const_string "load"))
+	 (eq_attr "type" "callv")
+	   (if_then_else (match_operand 1 "constant_call_address_operand")
+	     (const_string "none")
+	     (const_string "load"))
+	 (and (eq_attr "type" "alu1,negnot,ishift1,sselog1,sseshuf1")
+	      (match_operand 1 "memory_operand"))
+	   (const_string "both")
+	 (and (match_operand 0 "memory_operand")
+	      (match_operand 1 "memory_operand"))
+	   (const_string "both")
+	 (match_operand 0 "memory_operand")
+	   (const_string "store")
+	 (match_operand 1 "memory_operand")
+	   (const_string "load")
+	 (and (eq_attr "type"
+		 "!alu1,negnot,ishift1,
+		   imov,imovx,icmp,test,bitmanip,
+		   fmov,fcmp,fsgn,
+		   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
+		   sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
+		   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
+	      (match_operand 2 "memory_operand"))
+	   (const_string "load")
+	 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
+	      (match_operand 3 "memory_operand"))
+	   (const_string "load")
+	]
+	(const_string "none")))
+
+;; Indicates if an instruction has both an immediate and a displacement.
+
+(define_attr "imm_disp" "false,true,unknown"
+  (cond [(eq_attr "type" "other,multi")
+	   (const_string "unknown")
+	 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
+	      (and (match_operand 0 "memory_displacement_operand")
+		   (match_operand 1 "immediate_operand")))
+	   (const_string "true")
+	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
+	      (and (match_operand 0 "memory_displacement_operand")
+		   (match_operand 2 "immediate_operand")))
+	   (const_string "true")
+	]
+	(const_string "false")))
+
+;; Indicates if an FP operation has an integer source.
+
+(define_attr "fp_int_src" "false,true"
+  (const_string "false"))
+
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+  (const_string "any"))
+
+;; Define attribute to classify add/sub insns that consumes carry flag (CF)
+(define_attr "use_carry" "0,1" (const_string "0"))
+
+;; Define attribute to indicate unaligned ssemov insns
+(define_attr "movu" "0,1" (const_string "0"))
+
+;; Used to control the "enabled" attribute on a per-instruction basis.
+(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
+		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
+		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
+  (const_string "base"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
+	 (eq_attr "isa" "x64_sse4")
+	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
+	 (eq_attr "isa" "x64_sse4_noavx")
+	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
+	 (eq_attr "isa" "x64_avx")
+	   (symbol_ref "TARGET_64BIT && TARGET_AVX")
+	 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
+	 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
+	 (eq_attr "isa" "sse2_noavx")
+	   (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
+	 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
+	 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
+	 (eq_attr "isa" "sse4_noavx")
+	   (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
+	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
+	 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
+	 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
+	 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
+	 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
+	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
+	 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
+	 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
+	 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
+	 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
+	 (eq_attr "isa" "fma_avx512f")
+	   (symbol_ref "TARGET_FMA || TARGET_AVX512F")
+	]
+	(const_int 1)))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "length" "128")
+   (set_attr "type" "multi")])
+
+(define_code_iterator plusminus [plus minus])
+
+(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
+
+(define_code_iterator multdiv [mult div])
+
+;; Base name for define_insn
+(define_code_attr plusminus_insn
+  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
+   (minus "sub") (ss_minus "sssub") (us_minus "ussub")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminus_mnemonic
+  [(plus "add") (ss_plus "adds") (us_plus "addus")
+   (minus "sub") (ss_minus "subs") (us_minus "subus")])
+(define_code_attr plusminus_carry_mnemonic
+  [(plus "adc") (minus "sbb")])
+(define_code_attr multdiv_mnemonic
+  [(mult "mul") (div "div")])
+
+;; Mark commutative operators as such in constraints.
+(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
+			(minus "") (ss_minus "") (us_minus "")])
+
+;; Mapping of max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
+;; Mapping of signed max and min
+(define_code_iterator smaxmin [smax smin])
+
+;; Mapping of unsigned max and min
+(define_code_iterator umaxmin [umax umin])
+
+;; Base name for integer and FP insn mnemonic
+(define_code_attr maxmin_int [(smax "maxs") (smin "mins")
+			      (umax "maxu") (umin "minu")])
+(define_code_attr maxmin_float [(smax "max") (smin "min")])
+
+;; Mapping of logic operators
+(define_code_iterator any_logic [and ior xor])
+(define_code_iterator any_or [ior xor])
+(define_code_iterator fpint_logic [and xor])
+
+;; Base name for insn mnemonic.
+(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of logic-shift operators
+(define_code_iterator any_lshift [ashift lshiftrt])
+
+;; Mapping of shift-right operators
+(define_code_iterator any_shiftrt [lshiftrt ashiftrt])
+
+;; Mapping of all shift operators
+(define_code_iterator any_shift [ashift lshiftrt ashiftrt])
+
+;; Base name for define_insn
+(define_code_attr shift_insn
+  [(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr shift [(ashift "sll") (lshiftrt "shr") (ashiftrt "sar")])
+(define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
+
+;; Mapping of rotate operators
+(define_code_iterator any_rotate [rotate rotatert])
+
+;; Base name for define_insn
+(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr rotate [(rotate "rol") (rotatert "ror")])
+
+;; Mapping of abs neg operators
+(define_code_iterator absneg [abs neg])
+
+;; Base name for x87 insn mnemonic.
+(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")])
+
+;; Used in signed and unsigned widening multiplications.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; Prefix for insn menmonic.
+(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")])
+
+;; Prefix for define_insn
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+(define_code_attr s [(sign_extend "s") (zero_extend "u")])
+(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
+
+;; Used in signed and unsigned truncations.
+(define_code_iterator any_truncate [ss_truncate truncate us_truncate])
+;; Instruction suffix for truncations.
+(define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")])
+
+;; Used in signed and unsigned fix.
+(define_code_iterator any_fix [fix unsigned_fix])
+(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
+
+;; All integer modes.
+(define_mode_iterator SWI1248x [QI HI SI DI])
+
+;; All integer modes without QImode.
+(define_mode_iterator SWI248x [HI SI DI])
+
+;; All integer modes without QImode and HImode.
+(define_mode_iterator SWI48x [SI DI])
+
+;; All integer modes without SImode and DImode.
+(define_mode_iterator SWI12 [QI HI])
+
+;; All integer modes without DImode.
+(define_mode_iterator SWI124 [QI HI SI])
+
+;; All integer modes without QImode and DImode.
+(define_mode_iterator SWI24 [HI SI])
+
+;; Single word integer modes.
+(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
+
+;; Single word integer modes without QImode.
+(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
+
+;; Single word integer modes without QImode and HImode.
+(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
+
+;; All math-dependant single and double word integer modes.
+(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
+			     (HI "TARGET_HIMODE_MATH")
+			     SI DI (TI "TARGET_64BIT")])
+
+;; Math-dependant single word integer modes.
+(define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
+			    (HI "TARGET_HIMODE_MATH")
+			    SI (DI "TARGET_64BIT")])
+
+;; Math-dependant integer modes without DImode.
+(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
+			       (HI "TARGET_HIMODE_MATH")
+			       SI])
+
+;; Math-dependant single word integer modes without QImode.
+(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
+		      	       SI (DI "TARGET_64BIT")])
+
+;; Double word integer modes.
+(define_mode_iterator DWI [(DI "!TARGET_64BIT")
+			   (TI "TARGET_64BIT")])
+
+;; GET_MODE_SIZE for selected modes.  As GET_MODE_SIZE is not
+;; compile time constant, it is faster to use <MODE_SIZE> than
+;; GET_MODE_SIZE (<MODE>mode).  For XFmode which depends on
+;; command line options just use GET_MODE_SIZE macro.
+(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16")
+			     (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)")
+			     (V16QI "16") (V32QI "32") (V64QI "64")
+			     (V8HI "16") (V16HI "32") (V32HI "64")
+			     (V4SI "16") (V8SI "32") (V16SI "64")
+			     (V2DI "16") (V4DI "32") (V8DI "64")
+			     (V1TI "16") (V2TI "32") (V4TI "64")
+			     (V2DF "16") (V4DF "32") (V8DF "64")
+			     (V4SF "16") (V8SF "32") (V16SF "64")])
+
+;; Double word integer modes as mode attribute.
+(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")])
+
+;; Half mode for double word integer modes.
+(define_mode_iterator DWIH [(SI "!TARGET_64BIT")
+			    (DI "TARGET_64BIT")])
+
+;; Instruction suffix for integer modes.
+(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Pointer size prefix for integer modes (Intel asm dialect)
+(define_mode_attr iptrsize [(QI "BYTE")
+			    (HI "WORD")
+			    (SI "DWORD")
+			    (DI "QWORD")])
+
+;; Register class for integer modes.
+(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
+
+;; Immediate operand constraint for integer modes.
+(define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])
+
+;; General operand constraint for word modes.
+(define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])
+
+;; Immediate operand constraint for double integer modes.
+(define_mode_attr di [(SI "nF") (DI "e")])
+
+;; Immediate operand constraint for shifts.
+(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+
+;; General operand predicate for integer modes.
+(define_mode_attr general_operand
+	[(QI "general_operand")
+	 (HI "general_operand")
+	 (SI "x86_64_general_operand")
+	 (DI "x86_64_general_operand")
+	 (TI "x86_64_general_operand")])
+
+;; General sign/zero extend operand predicate for integer modes.
+(define_mode_attr general_szext_operand
+	[(QI "general_operand")
+	 (HI "general_operand")
+	 (SI "x86_64_szext_general_operand")
+	 (DI "x86_64_szext_general_operand")])
+
+;; Immediate operand predicate for integer modes.
+(define_mode_attr immediate_operand
+	[(QI "immediate_operand")
+	 (HI "immediate_operand")
+	 (SI "x86_64_immediate_operand")
+	 (DI "x86_64_immediate_operand")])
+
+;; Nonmemory operand predicate for integer modes.
+(define_mode_attr nonmemory_operand
+	[(QI "nonmemory_operand")
+	 (HI "nonmemory_operand")
+	 (SI "x86_64_nonmemory_operand")
+	 (DI "x86_64_nonmemory_operand")])
+
+;; Operand predicate for shifts.
+(define_mode_attr shift_operand
+	[(QI "nonimmediate_operand")
+	 (HI "nonimmediate_operand")
+	 (SI "nonimmediate_operand")
+	 (DI "shiftdi_operand")
+	 (TI "register_operand")])
+
+;; Operand predicate for shift argument.
+(define_mode_attr shift_immediate_operand
+	[(QI "const_1_to_31_operand")
+	 (HI "const_1_to_31_operand")
+	 (SI "const_1_to_31_operand")
+	 (DI "const_1_to_63_operand")])
+
+;; Input operand predicate for arithmetic left shifts.
+(define_mode_attr ashl_input_operand
+	[(QI "nonimmediate_operand")
+	 (HI "nonimmediate_operand")
+	 (SI "nonimmediate_operand")
+	 (DI "ashldi_input_operand")
+	 (TI "reg_or_pm1_operand")])
+
+;; SSE and x87 SFmode and DFmode floating point modes
+(define_mode_iterator MODEF [SF DF])
+
+;; All x87 floating point modes
+(define_mode_iterator X87MODEF [SF DF XF])
+
+;; SSE instruction suffix for various modes
+(define_mode_attr ssemodesuffix
+  [(SF "ss") (DF "sd")
+   (V16SF "ps") (V8DF "pd")
+   (V8SF "ps") (V4DF "pd")
+   (V4SF "ps") (V2DF "pd")
+   (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
+   (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
+   (V64QI "b") (V16SI "d") (V8DI "q")])
+
+;; SSE vector suffix for floating point modes
+(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
+
+;; SSE vector mode corresponding to a scalar mode
+(define_mode_attr ssevecmode
+  [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
+(define_mode_attr ssevecmodelower
+  [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
+
+;; Instruction suffix for REX 64bit operators.
+(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; This mode iterator allows :W to be used for patterns that operate on
+;; word_mode sized quantities.
+(define_mode_iterator W
+  [(SI "word_mode == SImode") (DI "word_mode == DImode")])
+
+;; This mode iterator allows :PTR to be used for patterns that operate on
+;; ptr_mode sized quantities.
+(define_mode_iterator PTR
+  [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
+
+;; Scheduling descriptions
+
+(include "pentium.md")
+(include "ppro.md")
+(include "k6.md")
+(include "athlon.md")
+(include "bdver1.md")
+(include "bdver3.md")
+(include "btver2.md")
+(include "geode.md")
+(include "atom.md")
+(include "slm.md")
+(include "core2.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare and branch/compare and store instructions.
+
+(define_expand "cbranch<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SDWIM 1 "nonimmediate_operand")
+		    (match_operand:SDWIM 2 "<general_operand>")))
+   (set (pc) (if_then_else
+	       (match_operator 0 "ordered_comparison_operator"
+		[(reg:CC FLAGS_REG) (const_int 0)])
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+{
+  if (MEM_P (operands[1]) && MEM_P (operands[2]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstore<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SWIM 2 "nonimmediate_operand")
+		    (match_operand:SWIM 3 "<general_operand>")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator 1 "ordered_comparison_operator"
+	  [(reg:CC FLAGS_REG) (const_int 0)]))]
+  ""
+{
+  if (MEM_P (operands[2]) && MEM_P (operands[3]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cmp<mode>_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
+		    (match_operand:SWI48 1 "<general_operand>")))])
+
+(define_insn "*cmp<mode>_ccno_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
+		 (match_operand:SWI 1 "const0_operand")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   test{<imodesuffix>}\t%0, %0
+   cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test,icmp")
+   (set_attr "length_immediate" "0,1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmp<mode>_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+		 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m")))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmp<mode>_minus_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+		     (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpqi_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:QI 0 "nonimmediate_x64nomem_operand" "Q,m")
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q,Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "const0_operand")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t%h0, %h0"
+  [(set_attr "type" "test")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_expand "cmpqi_ext_3"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "const_int_operand")))])
+
+(define_insn "*cmpqi_ext_3"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q,Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "general_x64nomem_operand" "Qn,m")))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "icmp")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+;; These implement float point compares.
+;; %%% See if we can get away with VOIDmode operands on the actual insns,
+;; which would allow mix and match FP modes on the compares.  Which is what
+;; the old patterns did, but with many more of them.
+
+(define_expand "cbranchxf4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:XF 1 "nonmemory_operand")
+		    (match_operand:XF 2 "nonmemory_operand")))
+   (set (pc) (if_then_else
+              (match_operator 0 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)])
+              (label_ref (match_operand 3))
+              (pc)))]
+  "TARGET_80387"
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstorexf4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:XF 2 "nonmemory_operand")
+		    (match_operand:XF 3 "nonmemory_operand")))
+   (set (match_operand:QI 0 "register_operand")
+              (match_operator 1 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)]))]
+  "TARGET_80387"
+{
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
+		    (match_operand:MODEF 2 "cmp_fp_expander_operand")))
+   (set (pc) (if_then_else
+              (match_operator 0 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)])
+              (label_ref (match_operand 3))
+              (pc)))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstore<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
+		    (match_operand:MODEF 3 "cmp_fp_expander_operand")))
+   (set (match_operand:QI 0 "register_operand")
+              (match_operator 1 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)]))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cbranchcc4"
+  [(set (pc) (if_then_else
+              (match_operator 0 "comparison_operator"
+               [(match_operand 1 "flags_reg_operand")
+                (match_operand 2 "const0_operand")])
+              (label_ref (match_operand 3))
+              (pc)))]
+  ""
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstorecc4"
+  [(set (match_operand:QI 0 "register_operand")
+              (match_operator 1 "comparison_operator"
+               [(match_operand 2 "flags_reg_operand")
+                (match_operand 3 "const0_operand")]))]
+  ""
+{
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+
+;; FP compares, step 1:
+;; Set the FP condition codes.
+;;
+;; CCFPmode	compare with exceptions
+;; CCFPUmode	compare with no exceptions
+
+;; We may not use "#" to split and emit these, since the REG_DEAD notes
+;; used to manage the reg stack popping would not be preserved.
+
+(define_insn "*cmp<mode>_0_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:X87MODEF 1 "register_operand" "f")
+	     (match_operand:X87MODEF 2 "const0_operand"))]
+	UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, false, false);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmp<mode>_0_cc_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:X87MODEF 1 "register_operand" "f")
+	  (match_operand:X87MODEF 2 "const0_operand")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpxf_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:XF 1 "register_operand" "f")
+	     (match_operand:XF 2 "register_operand" "f"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, false, false);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn_and_split "*cmpxf_cc_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:XF 1 "register_operand" "f")
+	  (match_operand:XF 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cmp<mode>_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:MODEF 1 "register_operand" "f")
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, false, false);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmp<mode>_cc_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:MODEF 1 "register_operand" "f")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpu<mode>_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFPU
+	     (match_operand:X87MODEF 1 "register_operand" "f")
+	     (match_operand:X87MODEF 2 "register_operand" "f"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, false, true);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpu<mode>_cc_i387"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU
+	  (match_operand:X87MODEF 1 "register_operand" "f")
+	  (match_operand:X87MODEF 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFPU (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:X87MODEF 1 "register_operand" "f")
+	     (match_operator:X87MODEF 3 "float_operator"
+	       [(match_operand:SWI24 2 "memory_operand" "m")]))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387
+   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
+       || optimize_function_for_size_p (cfun))"
+  "* return output_fp_compare (insn, operands, false, false);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<SWI24:MODE>")])
+
+(define_insn_and_split "*cmp<X87MODEF:mode>_<SWI24:mode>_cc_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:X87MODEF 1 "register_operand" "f")
+	  (match_operator:X87MODEF 3 "float_operator"
+	    [(match_operand:SWI24 2 "memory_operand" "m")])))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE
+   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
+       || optimize_function_for_size_p (cfun))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_dup 1)
+	     (match_op_dup 3 [(match_dup 2)]))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<SWI24:MODE>")])
+
+;; FP compares, step 2
+;; Move the fpsw to ax.
+
+(define_insn "x86_fnstsw_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "fnstsw\t%0"
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+   (set_attr "mode" "SI")
+   (set_attr "unit" "i387")])
+
+;; FP compares, step 3
+;; Get ax into flags, general case.
+
+(define_insn "x86_sahf_1"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:HI 0 "register_operand" "a")]
+		   UNSPEC_SAHF))]
+  "TARGET_SAHF"
+{
+#ifndef HAVE_AS_IX86_SAHF
+  if (TARGET_64BIT)
+    return ASM_BYTE "0x9e";
+  else
+#endif
+  return "sahf";
+}
+  [(set_attr "length" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; Pentium Pro can do steps 1 through 3 in one go.
+;; comi*, ucomi*, fcomi*, ficomi*, fucomi*
+;; (these i387 instructions set flags directly)
+
+(define_mode_iterator FPCMP [CCFP CCFPU])
+(define_mode_attr unord [(CCFP "") (CCFPU "u")])
+
+(define_insn "*cmpi<FPCMP:unord><MODEF:mode>_mixed"
+  [(set (reg:FPCMP FLAGS_REG)
+	(compare:FPCMP
+	  (match_operand:MODEF 0 "register_operand" "f,x")
+	  (match_operand:MODEF 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)"
+  "* return output_fp_compare (insn, operands, true,
+			       <FPCMP:MODE>mode == CCFPUmode);"
+  [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set (attr "prefix_rep")
+	(if_then_else (eq_attr "type" "ssecomi")
+		      (const_string "0")
+		      (const_string "*")))
+   (set (attr "prefix_data16")
+	(cond [(eq_attr "type" "fcmp")
+		 (const_string "*")
+	       (eq_attr "mode" "DF")
+		 (const_string "1")
+	      ]
+	      (const_string "0")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpi<FPCMP:unord><MODEF:mode>_sse"
+  [(set (reg:FPCMP FLAGS_REG)
+	(compare:FPCMP
+	  (match_operand:MODEF 0 "register_operand" "x")
+	  (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)"
+  "* return output_fp_compare (insn, operands, true,
+			       <FPCMP:MODE>mode == CCFPUmode);"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpi<FPCMP:unord><X87MODEF:mode>_i387"
+  [(set (reg:FPCMP FLAGS_REG)
+	(compare:FPCMP
+	  (match_operand:X87MODEF 0 "register_operand" "f")
+	  (match_operand:X87MODEF 1 "register_operand" "f")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)"
+  "* return output_fp_compare (insn, operands, true,
+			       <FPCMP:MODE>mode == CCFPUmode);"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+;; Push/pop instructions.
+
+(define_insn "*push<mode>2"
+  [(set (match_operand:DWI 0 "push_operand" "=<")
+	(match_operand:DWI 1 "general_no_elim_operand" "riF*o"))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:TI 0 "push_operand")
+        (match_operand:TI 1 "general_operand"))]
+  "TARGET_64BIT && reload_completed
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushdi2_rex64"
+  [(set (match_operand:DI 0 "push_operand" "=<,!<")
+	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+  "TARGET_64BIT"
+  "@
+   push{q}\t%1
+   #"
+  [(set_attr "type" "push,multi")
+   (set_attr "mode" "DI")])
+
+;; Convert impossible pushes of immediate to existing instructions.
+;; First try to get scratch register and go through it.  In case this
+;; fails, push sign extended lower part first and then overwrite
+;; upper part by 32bit move.
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (set (match_operand:DI 0 "push_operand")
+        (match_operand:DI 1 "immediate_operand"))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+  [(set (match_operand:DI 0 "push_operand")
+        (match_operand:DI 1 "immediate_operand"))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode) && 1"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						   GEN_INT (4)));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand")
+        (match_operand:DI 1 "immediate_operand"))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)
+   && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						   GEN_INT (4)));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand")
+        (match_operand:DI 1 "general_operand"))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushsi2"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+  "!TARGET_64BIT"
+  "push{l}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+;; emit_push_insn when it calls move_by_pieces requires an insn to
+;; "push a byte/word".  But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
+
+;; For TARGET_64BIT we always round up to 8 bytes.
+(define_insn "*push<mode>2_rex64"
+  [(set (match_operand:SWI124 0 "push_operand" "=X")
+	(match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))]
+  "TARGET_64BIT"
+  "push{q}\t%q1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "DI")])
+
+(define_insn "*push<mode>2"
+  [(set (match_operand:SWI12 0 "push_operand" "=X")
+	(match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
+  "!TARGET_64BIT"
+  "push{l}\t%k1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+(define_insn "*push<mode>2_prologue"
+  [(set (match_operand:W 0 "push_operand" "=<")
+	(match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "push{<imodesuffix>}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pop<mode>1"
+  [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
+	(match_operand:W 1 "pop_operand" ">"))]
+  ""
+  "pop{<imodesuffix>}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pop<mode>1_epilogue"
+  [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
+	(match_operand:W 1 "pop_operand" ">"))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "pop{<imodesuffix>}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pushfl<mode>2"
+  [(set (match_operand:W 0 "push_operand" "=<")
+	(match_operand:W 1 "flags_reg_operand"))]
+  ""
+  "pushf{<imodesuffix>}"
+  [(set_attr "type" "push")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popfl<mode>1"
+  [(set (match_operand:W 0 "flags_reg_operand")
+	(match_operand:W 1 "pop_operand" ">"))]
+  ""
+  "popf{<imodesuffix>}"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+
+;; Move instructions.
+
+(define_expand "movxi"
+  [(set (match_operand:XI 0 "nonimmediate_operand")
+	(match_operand:XI 1 "general_operand"))]
+  "TARGET_AVX512F"
+  "ix86_expand_move (XImode, operands); DONE;")
+
+;; Reload patterns to support multi-word load/store
+;; with non-offsetable address.
+(define_expand "reload_noff_store"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+              (match_operand 1 "register_operand" "r")
+              (match_operand:DI 2 "register_operand" "=&r")])]
+  "TARGET_64BIT"
+{
+  rtx mem = operands[0];
+  rtx addr = XEXP (mem, 0);
+
+  emit_move_insn (operands[2], addr);
+  mem = replace_equiv_address_nv (mem, operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, mem, operands[1]));
+  DONE;
+})
+
+(define_expand "reload_noff_load"
+  [(parallel [(match_operand 0 "register_operand" "=r")
+              (match_operand 1 "memory_operand" "m")
+              (match_operand:DI 2 "register_operand" "=r")])]
+  "TARGET_64BIT"
+{
+  rtx mem = operands[1];
+  rtx addr = XEXP (mem, 0);
+
+  emit_move_insn (operands[2], addr);
+  mem = replace_equiv_address_nv (mem, operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], mem));
+  DONE;
+})
+
+(define_expand "movoi"
+  [(set (match_operand:OI 0 "nonimmediate_operand")
+	(match_operand:OI 1 "general_operand"))]
+  "TARGET_AVX"
+  "ix86_expand_move (OImode, operands); DONE;")
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand")
+	(match_operand:TI 1 "nonimmediate_operand"))]
+  "TARGET_64BIT || TARGET_SSE"
+{
+  if (TARGET_64BIT)
+    ix86_expand_move (TImode, operands);
+  else
+    ix86_expand_vector_move (TImode, operands);
+  DONE;
+})
+
+;; This expands to what emit_move_complex would generate if we didn't
+;; have a movti pattern.  Having this avoids problems with reload on
+;; 32-bit targets when SSE is present, but doesn't seem to be harmful
+;; to have around all the time.
+(define_expand "movcdi"
+  [(set (match_operand:CDI 0 "nonimmediate_operand")
+	(match_operand:CDI 1 "general_operand"))]
+  ""
+{
+  if (push_operand (operands[0], CDImode))
+    emit_move_complex_push (CDImode, operands[0], operands[1]);
+  else
+    emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
+	(match_operand:SWI1248x 1 "general_operand"))]
+  ""
+  "ix86_expand_move (<MODE>mode, operands); DONE;")
+
+(define_insn "*mov<mode>_xor"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "const0_operand"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{l}\t%k0, %k0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>_or"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "const_int_operand"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && operands[1] == constm1_rtx"
+  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "1")])
+
+(define_insn "*movxi_internal_avx512f"
+  [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m")
+	(match_operand:XI 1 "vector_move_operand"  "C ,xm,x"))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return standard_sse_constant_opcode (insn, operands[1]);
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], XImode)
+	  || misaligned_operand (operands[1], XImode))
+	return "vmovdqu32\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqa32\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*movoi_internal_avx"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m")
+	(match_operand:OI 1 "vector_move_operand"  "C ,xm,x"))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      if (misaligned_operand (operands[0], OImode)
+	  || misaligned_operand (operands[1], OImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V8SF)
+	    return "vmovups\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V8SF)
+	    return "vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqa\t{%1, %0|%0, %1}";
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V8SF")
+	       (and (eq_attr "alternative" "2")
+		    (match_test "TARGET_SSE_TYPELESS_STORES"))
+		 (const_string "V8SF")
+	      ]
+	      (const_string "OI")))])
+
+(define_insn "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,x,x ,m")
+	(match_operand:TI 1 "general_operand"      "riFo,re,C,xm,x"))]
+  "(TARGET_64BIT || TARGET_SSE)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "x64,x64,*,*,*")
+   (set_attr "type" "multi,multi,sselog1,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "mode")
+   	(cond [(eq_attr "alternative" "0,1")
+		 (const_string "DI")
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "4")
+		    (match_test "TARGET_SSE_TYPELESS_STORES"))
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	       ]
+	       (const_string "TI")))])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand")
+	(match_operand:TI 1 "general_operand"))]
+  "reload_completed
+   && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+	(match_operand:DI 1 "general_operand"
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,*Yj,*v,r   ,*Yj ,*Yn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MMXMOV:
+      /* Handle broken assemblers that require movd instead of movq.  */
+      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	  && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
+	return "movd\t{%1, %0|%0, %1}";
+      return "movq\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      if (GENERAL_REG_P (operands[0]))
+	return "%vpextrq\t{$0, %1, %0|%0, %1, 0}";
+
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_DI:
+	  /* Handle broken assemblers that require movd instead of movq.  */
+	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
+	    return "%vmovd\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+
+	case MODE_V2SF:
+	  gcc_assert (!TARGET_AVX);
+	  return "movlps\t{%1, %0|%0, %1}";
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_SSECVT:
+      if (SSE_REG_P (operands[0]))
+	return "movq2dq\t{%1, %0|%0, %1}";
+      else
+	return "movdq2q\t{%1, %0|%0, %1}";
+
+    case TYPE_LEA:
+      return "lea{q}\t{%E1, %0|%0, %E1}";
+
+    case TYPE_IMOV:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else if (which_alternative == 4)
+	return "movabs{q}\t{%1, %0|%0, %1}";
+      else if (ix86_use_lea_for_mov (insn, operands))
+	return "lea{q}\t{%E1, %0|%0, %E1}";
+      else
+	return "mov{q}\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+     (cond [(eq_attr "alternative" "0,1")
+	      (const_string "nox64")
+	    (eq_attr "alternative" "2,3,4,5,10,11,16,18")
+	      (const_string "x64")
+	    (eq_attr "alternative" "17")
+	      (const_string "x64_sse4")
+	   ]
+	   (const_string "*")))
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "0,1")
+	      (const_string "multi")
+	    (eq_attr "alternative" "6")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "7,8,9,10,11")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "12,17")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "13,14,15,16,18")
+	      (const_string "ssemov")
+	    (eq_attr "alternative" "19,20")
+	      (const_string "ssecvt")
+	    (match_operand 1 "pic_32bit_operand")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set (attr "modrm")
+     (if_then_else
+       (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
+	 (const_string "0")
+	 (const_string "*")))
+   (set (attr "length_immediate")
+     (cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
+	      (const_string "8")
+	    (eq_attr "alternative" "17")
+	      (const_string "1")
+	   ]
+	   (const_string "*")))
+   (set (attr "prefix_rex")
+     (if_then_else (eq_attr "alternative" "10,11,16,17,18")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else (eq_attr "alternative" "17")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "prefix_data16")
+     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "SI")
+	    (eq_attr "alternative" "12,13")
+	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			  (match_operand 1 "ext_sse_reg_operand"))
+		       (const_string "XI")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+		       (const_string "V4SF")
+		     (match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (match_test "optimize_function_for_size_p (cfun)")
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+
+	    (and (eq_attr "alternative" "14,15")
+		 (not (match_test "TARGET_SSE2")))
+	      (const_string "V2SF")
+	    (eq_attr "alternative" "17")
+	      (const_string "TI")
+	   ]
+	   (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+        (match_operand:DI 1 "general_operand"))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			"=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
+	(match_operand:SI 1 "general_operand"
+			"g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      if (GENERAL_REG_P (operands[0]))
+	return "%vpextrd\t{$0, %1, %0|%0, %1, 0}";
+
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_SI:
+          return "%vmovd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
+
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+
+	case MODE_SF:
+	  gcc_assert (!TARGET_AVX);
+          return "movss\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MMXMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_DI:
+	  return "movq\t{%1, %0|%0, %1}";
+	case MODE_SI:
+	  return "movd\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_LEA:
+      return "lea{l}\t{%E1, %0|%0, %E1}";
+
+    case TYPE_IMOV:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      if (ix86_use_lea_for_mov (insn, operands))
+	return "lea{l}\t{%E1, %0|%0, %E1}";
+      else
+	return "mov{l}\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+     (if_then_else (eq_attr "alternative" "11")
+       (const_string "sse4")
+       (const_string "*")))
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "3,4,5")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "6,11")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "7,8,9,10,12")
+	      (const_string "ssemov")
+ 	    (match_operand 1 "pic_32bit_operand")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set (attr "length_immediate")
+     (if_then_else (eq_attr "alternative" "11")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else (eq_attr "alternative" "11")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "prefix_data16")
+     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2,3")
+	      (const_string "DI")
+	    (eq_attr "alternative" "6,7")
+	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			  (match_operand 1 "ext_sse_reg_operand"))
+		       (const_string "XI")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+		       (const_string "V4SF")
+		     (match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (match_test "optimize_function_for_size_p (cfun)")
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+
+	    (and (eq_attr "alternative" "8,9")
+	         (not (match_test "TARGET_SSE2")))
+	      (const_string "SF")
+	    (eq_attr "alternative" "11")
+	      (const_string "TI")
+	   ]
+	   (const_string "SI")))])
+
+(define_insn "kmovw"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=k,k")
+	(unspec:HI
+	  [(match_operand:HI 1 "nonimmediate_operand" "rm,k")]
+	  UNSPEC_KMOV))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512F"
+  "@
+   kmovw\t{%k1, %0|%0, %k1}
+   kmovw\t{%1, %0|%0, %1}";
+  [(set_attr "mode" "HI")
+   (set_attr "type" "mskmov")
+   (set_attr "prefix" "vex")])
+
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k,rm")
+	(match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,rm,k,k"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      /* movzwl is faster than movw on p2 due to partial word stalls,
+	 though not as fast as an aligned movl.  */
+      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+
+    case TYPE_MSKMOV:
+      switch (which_alternative)
+        {
+	case 4: return "kmovw\t{%k1, %0|%0, %k1}";
+	case 5: return "kmovw\t{%1, %0|%0, %1}";
+	case 6: return "kmovw\t{%1, %k0|%k0, %1}";
+	default: gcc_unreachable ();
+	}
+
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{w}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(match_test "optimize_function_for_size_p (cfun)")
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "0")
+		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+		      (not (match_test "TARGET_HIMODE_MATH"))))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "1,2")
+		 (match_operand:HI 1 "aligned_operand"))
+	      (const_string "imov")
+	    (eq_attr "alternative" "4,5,6")
+	      (const_string "mskmov")
+	    (and (match_test "TARGET_MOVX")
+		 (eq_attr "alternative" "0,2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+    (set (attr "prefix")
+      (if_then_else (eq_attr "alternative" "4,5,6")
+	(const_string "vex")
+	(const_string "orig")))
+    (set (attr "mode")
+      (cond [(eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "1,2")
+		  (match_operand:HI 1 "aligned_operand"))
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "0")
+		  (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+		       (not (match_test "TARGET_HIMODE_MATH"))))
+	       (const_string "SI")
+	    ]
+	    (const_string "HI")))])
+
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there.  Then we use movzx.
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand"
+			"=q,q ,q ,r,r ,?r,m ,k,k,r")
+	(match_operand:QI 1 "general_operand"
+			"q ,qn,qm,q,rn,qm,qn,r ,k,k"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
+      return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+
+    case TYPE_MSKMOV:
+      switch (which_alternative)
+        {
+	case 7: return "kmovw\t{%k1, %0|%0, %k1}";
+	case 8: return "kmovw\t{%1, %0|%0, %1}";
+	case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+	default: gcc_unreachable ();
+	}
+
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (eq_attr "alternative" "5")
+		 (not (match_operand:QI 1 "aligned_operand")))
+	      (const_string "imovx")
+	    (match_test "optimize_function_for_size_p (cfun)")
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "3")
+		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+		      (not (match_test "TARGET_QIMODE_MATH"))))
+	      (const_string "imov")
+	    (eq_attr "alternative" "3,5")
+	      (const_string "imovx")
+	    (eq_attr "alternative" "7,8,9")
+	      (const_string "mskmov")
+	    (and (match_test "TARGET_MOVX")
+		 (eq_attr "alternative" "2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "7,8,9")
+       (const_string "vex")
+       (const_string "orig")))
+   (set (attr "mode")
+      (cond [(eq_attr "alternative" "3,4,5")
+	       (const_string "SI")
+	     (eq_attr "alternative" "6")
+	       (const_string "QI")
+	     (eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
+			    (and (not (match_test "optimize_function_for_size_p (cfun)"))
+				 (not (match_test "TARGET_PARTIAL_REG_STALL"))))))
+	       (const_string "SI")
+	     ;; Avoid partial register stalls when not using QImode arithmetic
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (match_test "TARGET_PARTIAL_REG_STALL")
+			    (not (match_test "TARGET_QIMODE_MATH")))))
+	       (const_string "SI")
+	   ]
+	   (const_string "QI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabs<mode>_1"
+  [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
+  "TARGET_LP64 && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{<imodesuffix>}\t{%1, %P0|[%P0], %1}
+   mov{<imodesuffix>}\t{%1, %a0|<iptrsize> PTR %a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movabs<mode>_2"
+  [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
+        (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_LP64 && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{<imodesuffix>}\t{%P1, %0|%0, [%P1]}
+   mov{<imodesuffix>}\t{%a1, %0|%0, <iptrsize> PTR %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*swap<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "+r")
+	(match_operand:SWI48 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  ""
+  "xchg{<imodesuffix>}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*swap<mode>_1"
+  [(set (match_operand:SWI12 0 "register_operand" "+r")
+	(match_operand:SWI12 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "xchg{l}\t%k1, %k0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL
+;; is disabled for AMDFAM10
+(define_insn "*swap<mode>_2"
+  [(set (match_operand:SWI12 0 "register_operand" "+<r>")
+	(match_operand:SWI12 1 "register_operand" "+<r>"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_PARTIAL_REG_STALL"
+  "xchg{<imodesuffix>}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstrict<mode>"
+  [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand"))
+	(match_operand:SWI12 1 "general_operand"))]
+  ""
+{
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
+  if (GET_CODE (operands[0]) == SUBREG
+      && GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT)
+    FAIL;
+  /* Don't generate memory->memory moves, go through a register */
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "*movstrict<mode>_1"
+  [(set (strict_low_part
+	  (match_operand:SWI12 0 "nonimmediate_operand" "+<r>m,<r>"))
+	(match_operand:SWI12 1 "general_operand" "<r>n,m"))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movstrict<mode>_xor"
+  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
+	(match_operand:SWI12 1 "const0_operand"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>_extv_1"
+  [(set (match_operand:SWI24 0 "register_operand" "=R")
+	(sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q")
+			    (const_int 8)
+			    (const_int 8)))]
+  ""
+  "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extv_1"
+  [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set_attr "isa" "*,*,nox64")
+   (set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand")
+			(ior (not (match_operand:QI 0 "QIreg_operand"))
+			     (match_test "TARGET_MOVX")))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*mov<mode>_extzv_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=R")
+	(zero_extract:SWI48 (match_operand 1 "ext_register_operand" "Q")
+			    (const_int 8)
+			    (const_int 8)))]
+  ""
+  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extzv_2"
+  [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m")
+        (subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q")
+			   (const_int 8)
+			   (const_int 8)) 0))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set_attr "isa" "*,*,nox64")
+   (set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand")
+			(ior (not (match_operand:QI 0 "QIreg_operand"))
+			     (match_test "TARGET_MOVX")))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "mov<mode>_insv_1"
+  [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "+Q,Q")
+			     (const_int 8)
+			     (const_int 8))
+	(match_operand:SWI48 1 "general_x64nomem_operand" "Qn,m"))]
+  ""
+{
+  if (CONST_INT_P (operands[1]))
+    operands[1] = simplify_gen_subreg (QImode, operands[1], <MODE>mode, 0);
+  return "mov{b}\t{%b1, %h0|%h0, %b1}";
+}
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movqi_insv_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
+		     (const_int 8)))]
+  ""
+  "mov{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+;; Floating point push instructions.
+
+(define_insn "*pushtf"
+  [(set (match_operand:TF 0 "push_operand" "=<,<")
+	(match_operand:TF 1 "general_no_elim_operand" "x,*roF"))]
+  "TARGET_64BIT || TARGET_SSE"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "isa" "*,x64")
+   (set_attr "type" "multi")
+   (set_attr "unit" "sse,*")
+   (set_attr "mode" "TF,DI")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:TF 0 "push_operand")
+	(match_operand:TF 1 "sse_reg_operand"))]
+  "TARGET_SSE && reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
+   (set (match_dup 0) (match_dup 1))]
+{
+  /* Preserve memory attributes. */
+  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
+
+(define_insn "*pushxf"
+  [(set (match_operand:XF 0 "push_operand" "=<,<")
+	(match_operand:XF 1 "general_no_elim_operand" "f,Yx*roF"))]
+  ""
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "1")
+		 (if_then_else (match_test "TARGET_64BIT")
+		   (const_string "DI")
+		   (const_string "SI"))
+	      ]
+	      (const_string "XF")))])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:XF 0 "push_operand")
+	(match_operand:XF 1 "fp_register_operand"))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));
+  /* Preserve memory attributes. */
+  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
+
+(define_insn "*pushdf"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,Yd*roF,rmF,x"))]
+  ""
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "isa" "*,nox64,x64,sse2")
+   (set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*,sse")
+   (set_attr "mode" "DF,SI,DI,DF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:DF 0 "push_operand")
+	(match_operand:DF 1 "any_fp_register_operand"))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (match_dup 0) (match_dup 1))]
+{
+  /* Preserve memory attributes. */
+  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
+
+(define_insn "*pushsf_rex64"
+  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
+  "TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{q}\t%q1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,DI,SF")])
+
+(define_insn "*pushsf"
+  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+	(match_operand:SF 1 "general_no_elim_operand" "f,rmF,x"))]
+  "!TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{l}\t%1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,SI,SF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:SF 0 "push_operand")
+	(match_operand:SF 1 "any_fp_register_operand"))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (match_dup 0) (match_dup 1))]
+{
+  rtx op = XEXP (operands[0], 0);
+  if (GET_CODE (op) == PRE_DEC)
+    {
+      gcc_assert (!TARGET_64BIT);
+      op = GEN_INT (-4);
+    }
+  else
+    {
+      op = XEXP (XEXP (op, 1), 1);
+      gcc_assert (CONST_INT_P (op));
+    }
+  operands[2] = op;
+  /* Preserve memory attributes. */
+  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
+
+(define_split
+  [(set (match_operand:SF 0 "push_operand")
+	(match_operand:SF 1 "memory_operand"))]
+  "reload_completed
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand 0 "push_operand")
+	(match_operand 1 "general_operand"))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !ANY_FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+;; Floating point move instructions.
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand")
+	(match_operand:TF 1 "nonimmediate_operand"))]
+  "TARGET_64BIT || TARGET_SSE"
+  "ix86_expand_move (TFmode, operands); DONE;")
+
+(define_expand "mov<mode>"
+  [(set (match_operand:X87MODEF 0 "nonimmediate_operand")
+	(match_operand:X87MODEF 1 "general_operand"))]
+  ""
+  "ix86_expand_move (<MODE>mode, operands); DONE;")
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o")
+	(match_operand:TF 1 "general_operand"	   "C ,xm,x,*roF,*rC"))]
+  "(TARGET_64BIT || TARGET_SSE)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (!can_create_pseudo_p ()
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && standard_sse_constant_p (operands[1])
+	   && !memory_operand (operands[0], TFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+	   && memory_operand (operands[0], TFmode)))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      /* Handle misaligned load/store since we
+         don't have movmisaligntf pattern. */
+      if (misaligned_operand (operands[0], TFmode)
+	  || misaligned_operand (operands[1], TFmode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+
+    case TYPE_MULTI:
+	return "#";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,*,*,x64,x64")
+   (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "3,4")
+		 (const_string "DI")
+	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "2")
+		    (match_test "TARGET_SSE_TYPELESS_STORES"))
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
+		 (const_string "V4SF")
+	       ]
+	       (const_string "TI")))])
+
+;; Possible store forwarding (partial memory) stall in alternatives 4 and 5.
+(define_insn "*movxf_internal"
+  [(set (match_operand:XF 0 "nonimmediate_operand"
+	 "=f,m,f,?Yx*r ,!o   ,!o")
+	(match_operand:XF 1 "general_operand"
+	 "fm,f,G,Yx*roF,Yx*rF,Yx*rC"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (!can_create_pseudo_p ()
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]) > 0
+	   && !memory_operand (operands[0], XFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+	   && memory_operand (operands[0], XFmode)))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_FMOV:
+      if (which_alternative == 2)
+        return standard_80387_constant_opcode (operands[1]);
+      return output_387_reg_move (insn, operands);
+
+    case TYPE_MULTI:
+      return "#";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,*,*,*,nox64,x64")
+   (set_attr "type" "fmov,fmov,fmov,multi,multi,multi")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "3,4,5")
+		 (if_then_else (match_test "TARGET_64BIT")
+		   (const_string "DI")
+		   (const_string "SI"))
+	      ]
+	      (const_string "XF")))])
+
+;; Possible store forwarding (partial memory) stall in alternative 4.
+(define_insn "*movdf_internal"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+    "=Yf*f,m   ,Yf*f,?Yd*r ,!o   ,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi")
+	(match_operand:DF 1 "general_operand"
+    "Yf*fm,Yf*f,G   ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (!can_create_pseudo_p ()
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
+		&& standard_80387_constant_p (operands[1]) > 0)
+	       || (TARGET_SSE2 && TARGET_SSE_MATH
+		   && standard_sse_constant_p (operands[1])))
+	   && !memory_operand (operands[0], DFmode))
+       || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
+	   && memory_operand (operands[0], DFmode)))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_FMOV:
+      if (which_alternative == 2)
+        return standard_80387_constant_opcode (operands[1]);
+      return output_387_reg_move (insn, operands);
+
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_IMOV:
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%1, %k0|%k0, %1}";
+      else if (which_alternative == 8)
+	return "movabs{q}\t{%1, %0|%0, %1}";
+      else
+	return "mov{q}\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_DF:
+	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
+	    return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	  return "%vmovsd\t{%1, %0|%0, %1}";
+
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V8DF:
+	  return "vmovapd\t{%g1, %g0|%g0, %g1}";
+	case MODE_V2DF:
+	  return "%vmovapd\t{%1, %0|%0, %1}";
+
+	case MODE_V2SF:
+	  gcc_assert (!TARGET_AVX);
+	  return "movlps\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  gcc_assert (!TARGET_AVX);
+	  return "movlpd\t{%1, %0|%0, %1}";
+
+	case MODE_DI:
+	  /* Handle broken assemblers that require movd instead of movq.  */
+	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
+	    return "%vmovd\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "3,4")
+		 (const_string "nox64")
+	       (eq_attr "alternative" "5,6,7,8,17,18")
+		 (const_string "x64")
+	       (eq_attr "alternative" "9,10,11,12")
+		 (const_string "sse2")
+	      ]
+	      (const_string "*")))
+   (set (attr "type")
+	(cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "fmov")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "multi")
+	       (eq_attr "alternative" "5,6,7,8")
+		 (const_string "imov")
+	       (eq_attr "alternative" "9,13")
+		 (const_string "sselog1")
+	      ]
+	      (const_string "ssemov")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "alternative" "8")
+       (const_string "0")
+       (const_string "*")))
+   (set (attr "length_immediate")
+     (if_then_else (eq_attr "alternative" "8")
+       (const_string "8")
+       (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
+	    (eq_attr "mode" "V1DF"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "3,4,7")
+		 (const_string "SI")
+	       (eq_attr "alternative" "5,6,8,17,18")
+		 (const_string "DI")
+
+	       /* xorps is one byte shorter for non-AVX targets.  */
+	       (eq_attr "alternative" "9,13")
+		 (cond [(not (match_test "TARGET_SSE2"))
+		 	  (const_string "V4SF")
+			(match_test "TARGET_AVX512F")
+			  (const_string "XI")
+			(match_test "TARGET_AVX")
+			  (const_string "V2DF")
+			(match_test "optimize_function_for_size_p (cfun)")
+			  (const_string "V4SF")
+			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use movapd to break dependency
+		  chains, otherwise use short move to avoid extra work.  */
+
+	       /* movaps is one byte shorter for non-AVX targets.  */
+	       (eq_attr "alternative" "10,14")
+		 (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			     (match_operand 1 "ext_sse_reg_operand"))
+			  (const_string "V8DF")
+			(ior (not (match_test "TARGET_SSE2"))
+			     (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+			  (const_string "V4SF")
+			(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			  (const_string "V2DF")
+			(match_test "TARGET_AVX")
+			  (const_string "DF")
+			(match_test "optimize_function_for_size_p (cfun)")
+			  (const_string "V4SF")
+		       ]
+		       (const_string "DF"))
+
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "11,15")
+		 (cond [(not (match_test "TARGET_SSE2"))
+			  (const_string "V2SF")
+			(match_test "TARGET_AVX")
+			  (const_string "DF")
+			(match_test "TARGET_SSE_SPLIT_REGS")
+			  (const_string "V1DF")
+		       ]
+		       (const_string "DF"))
+
+	       (and (eq_attr "alternative" "12,16")
+		    (not (match_test "TARGET_SSE2")))
+		 (const_string "V2SF")
+	      ]
+	      (const_string "DF")))])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand"
+	  "=Yf*f,m   ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
+	(match_operand:SF 1 "general_operand"
+	  "Yf*fm,Yf*f,G   ,rmF,rF,C,v,m,v,Yj,r  ,*y ,m  ,*y,*Yn,r"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (!can_create_pseudo_p ()
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && ((!TARGET_SSE_MATH
+		&& standard_80387_constant_p (operands[1]) > 0)
+	       || (TARGET_SSE_MATH
+		   && standard_sse_constant_p (operands[1]))))
+       || memory_operand (operands[0], SFmode))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_FMOV:
+      if (which_alternative == 2)
+        return standard_80387_constant_opcode (operands[1]);
+      return output_387_reg_move (insn, operands);
+
+    case TYPE_IMOV:
+      return "mov{l}\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_SF:
+	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
+	    return "vmovss\t{%1, %0, %0|%0, %0, %1}";
+	  return "%vmovss\t{%1, %0|%0, %1}";
+
+	case MODE_V16SF:
+	  return "vmovaps\t{%g1, %g0|%g0, %g1}";
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+
+	case MODE_SI:
+	  return "%vmovd\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_MMXMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_DI:
+	  return "movq\t{%1, %0|%0, %1}";
+	case MODE_SI:
+	  return "movd\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "type")
+	(cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "fmov")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "imov")
+	       (eq_attr "alternative" "5")
+		 (const_string "sselog1")
+	       (eq_attr "alternative" "11,12,13,14,15")
+		 (const_string "mmxmov")
+	      ]
+	      (const_string "ssemov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "prefix_data16")
+     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "3,4,9,10,13,14,15")
+		 (const_string "SI")
+	       (eq_attr "alternative" "11")
+		 (const_string "DI")
+	       (eq_attr "alternative" "5")
+		 (cond [(not (match_test "TARGET_SSE2"))
+ 		 	  (const_string "V4SF")
+			(match_test "TARGET_AVX512F")
+			  (const_string "V16SF")
+			(match_test "TARGET_AVX")
+			  (const_string "V4SF")
+ 			(match_test "optimize_function_for_size_p (cfun)")
+			  (const_string "V4SF")
+ 			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
+			  (const_string "TI")
+		       ]
+		       (const_string "V4SF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APS move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  Do the same for architectures resolving dependencies on
+		  the parts.  While in DF mode it is better to always handle
+		  just register parts, the SF mode is different due to lack
+		  of instructions to load just part of the register.  It is
+		  better to maintain the whole registers in single format
+		  to avoid problems on using packed logical operations.  */
+	       (eq_attr "alternative" "6")
+		 (cond [(ior  (match_operand 0 "ext_sse_reg_operand")
+			      (match_operand 1 "ext_sse_reg_operand"))
+			  (const_string "V16SF")
+			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			     (match_test "TARGET_SSE_SPLIT_REGS"))
+			  (const_string "V4SF")
+		       ]
+		       (const_string "SF"))
+	      ]
+	      (const_string "SF")))])
+
+(define_split
+  [(set (match_operand 0 "any_fp_register_operand")
+	(match_operand 1 "memory_operand"))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode
+       || GET_MODE (operands[0]) == SFmode)
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  rtx c = operands[2];
+  int r = REGNO (operands[0]);
+
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (STACK_REGNO_P (r) && standard_80387_constant_p (c) < 1))
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand 0 "any_fp_register_operand")
+	(float_extend (match_operand 1 "memory_operand")))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  rtx c = operands[2];
+  int r = REGNO (operands[0]);
+
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (STACK_REGNO_P (r) && standard_80387_constant_p (c) < 1))
+    FAIL;
+})
+
+;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
+(define_split
+  [(set (match_operand:X87MODEF 0 "fp_register_operand")
+	(match_operand:X87MODEF 1 "immediate_operand"))]
+  "reload_completed
+   && (standard_80387_constant_p (operands[1]) == 8
+       || standard_80387_constant_p (operands[1]) == 9)"
+  [(set (match_dup 0)(match_dup 1))
+   (set (match_dup 0)
+	(neg:X87MODEF (match_dup 0)))]
+{
+  REAL_VALUE_TYPE r;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  if (real_isnegzero (&r))
+    operands[1] = CONST0_RTX (<MODE>mode);
+  else
+    operands[1] = CONST1_RTX (<MODE>mode);
+})
+
+(define_split
+  [(set (match_operand 0 "nonimmediate_operand")
+        (match_operand 1 "general_operand"))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "swapxf"
+  [(set (match_operand:XF 0 "register_operand" "+f")
+	(match_operand:XF 1 "register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "XF")])
+
+(define_insn "*swap<mode>"
+  [(set (match_operand:MODEF 0 "fp_register_operand" "+f")
+	(match_operand:MODEF 1 "fp_register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387 || reload_completed"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "<MODE>")])
+
+;; Zero extension instructions
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
+
+(define_insn "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x")
+	(zero_extend:DI
+	 (match_operand:SI 1 "x86_64_zext_operand"
+	        	"0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m")))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      if (ix86_use_lea_for_mov (insn, operands))
+	return "lea{l}\t{%E1, %k0|%k0, %E1}";
+      else
+	return "mov{l}\t{%1, %k0|%k0, %1}";
+
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_MMXMOV:
+      return "movd\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return "%vpextrd\t{$0, %1, %k0|%k0, %1, 0}";
+
+    case TYPE_SSEMOV:
+      if (GENERAL_REG_P (operands[0]))
+	return "%vmovd\t{%1, %k0|%k0, %1}";
+
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+     (cond [(eq_attr "alternative" "0,1,2")
+	      (const_string "nox64")
+	    (eq_attr "alternative" "3,7")
+	      (const_string "x64")
+	    (eq_attr "alternative" "8")
+	      (const_string "x64_sse4")
+	    (eq_attr "alternative" "10")
+	      (const_string "sse2")
+	   ]
+	   (const_string "*")))
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "0,1,2,4")
+	      (const_string "multi")
+	    (eq_attr "alternative" "5,6")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "7,9,10")
+	      (const_string "ssemov")
+	    (eq_attr "alternative" "8")
+	      (const_string "sselog1")
+	   ]
+	   (const_string "imovx")))
+   (set (attr "prefix_extra")
+     (if_then_else (eq_attr "alternative" "8")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "length_immediate")
+     (if_then_else (eq_attr "alternative" "8")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "ssemov,sselog1")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "prefix_0f")
+     (if_then_else (eq_attr "type" "imovx")
+       (const_string "0")
+       (const_string "*")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "5,6")
+	      (const_string "DI")
+	    (eq_attr "alternative" "7,8,9")
+	      (const_string "TI")
+	   ]
+	   (const_string "SI")))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand")
+     	(zero_extend:DI (match_operand:SI 1 "memory_operand")))]
+  "reload_completed"
+  [(set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI (match_operand:SI 1 "register_operand")))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
+  "!TARGET_64BIT && reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_insn "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "TARGET_64BIT"
+  "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand")
+	(zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+    {
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn_and_split "zero_extend<mode>si2_and"
+  [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
+	(zero_extend:SI
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (true_regnum (operands[0]) != true_regnum (operands[1]))
+    {
+      ix86_expand_clear (operands[0]);
+
+      gcc_assert (!TARGET_PARTIAL_REG_STALL);
+      emit_insn (gen_movstrict<mode>
+		  (gen_lowpart (<MODE>mode, operands[0]), operands[1]));
+      DONE;
+    }
+
+  operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
+}
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
+  "movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+    {
+      operands[1] = force_reg (QImode, operands[1]);
+      emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn_and_split "zero_extendqihi2_and"
+  [(set (match_operand:HI 0 "register_operand" "=r,?&q")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (true_regnum (operands[0]) != true_regnum (operands[1]))
+    {
+      ix86_expand_clear (operands[0]);
+
+      gcc_assert (!TARGET_PARTIAL_REG_STALL);
+      emit_insn (gen_movstrictqi
+		  (gen_lowpart (QImode, operands[0]), operands[1]));
+      DONE;
+    }
+
+  operands[0] = gen_lowpart (SImode, operands[0]);
+}
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+; zero extend to SImode to avoid partial register stalls
+(define_insn "*zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
+  "movz{bl|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+;; Sign extension instructions
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:SI 1 "register_operand")))]
+  ""
+{
+  if (!TARGET_64BIT)
+    {
+      emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*extendsidi2_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=*a,r")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
+  "TARGET_64BIT"
+  "@
+   {cltq|cdqe}
+   movs{lq|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")
+   (set_attr "prefix_0f" "0")
+   (set_attr "modrm" "0,1")])
+
+(define_insn "extendsidi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
+  "!TARGET_64BIT"
+  "#")
+
+;; Split the memory case.  If the source register doesn't die, it will stay
+;; this way, if it does die, following peephole2s take care of it.
+(define_split
+  [(set (match_operand:DI 0 "memory_operand")
+	(sign_extend:DI (match_operand:SI 1 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_operand:SI 2 "register_operand"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
+
+  emit_move_insn (operands[3], operands[1]);
+
+  /* Generate a cltd if possible and doing so it profitable.  */
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+      && true_regnum (operands[1]) == AX_REG
+      && true_regnum (operands[2]) == DX_REG)
+    {
+      emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
+    }
+  else
+    {
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
+    }
+  emit_move_insn (operands[4], operands[2]);
+  DONE;
+})
+
+;; Peepholes for the case where the source register does die, after
+;; being split with the above splitter.
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand")
+	(match_operand:SI 1 "register_operand"))
+   (set (match_operand:SI 2 "register_operand") (match_dup 1))
+   (parallel [(set (match_dup 2)
+		   (ashiftrt:SI (match_dup 2) (const_int 31)))
+	       (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
+  "REGNO (operands[1]) != REGNO (operands[2])
+   && peep2_reg_dead_p (2, operands[1])
+   && peep2_reg_dead_p (4, operands[2])
+   && !reg_mentioned_p (operands[2], operands[3])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 3) (match_dup 1))])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand")
+	(match_operand:SI 1 "register_operand"))
+   (parallel [(set (match_operand:SI 2 "register_operand")
+		   (ashiftrt:SI (match_dup 1) (const_int 31)))
+	       (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
+  "/* cltd is shorter than sarl $31, %eax */
+   !optimize_function_for_size_p (cfun)
+   && true_regnum (operands[1]) == AX_REG
+   && true_regnum (operands[2]) == DX_REG
+   && peep2_reg_dead_p (2, operands[1])
+   && peep2_reg_dead_p (3, operands[2])
+   && !reg_mentioned_p (operands[2], operands[3])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 3) (match_dup 1))])
+
+;; Extend to register case.  Optimize case where source and destination
+;; registers match and cases where we can use cltd.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:SI 1 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
+
+  if (true_regnum (operands[3]) != true_regnum (operands[1]))
+    emit_move_insn (operands[3], operands[1]);
+
+  /* Generate a cltd if possible and doing so it profitable.  */
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+      && true_regnum (operands[3]) == AX_REG
+      && true_regnum (operands[4]) == DX_REG)
+    {
+      emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
+      DONE;
+    }
+
+  if (true_regnum (operands[4]) != true_regnum (operands[1]))
+    emit_move_insn (operands[4], operands[1]);
+
+  emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
+  DONE;
+})
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "TARGET_64BIT"
+  "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=*a,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cwtl|cwde}";
+    default:
+      return "movs{wl|x}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "*extendhisi2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=*a,r")
+	(zero_extend:DI
+	 (sign_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cwtl|cwde}";
+    default:
+      return "movs{wl|x}\t{%1, %k0|%k0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  ""
+  "movs{bl|x}\t{%1, %0|%0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
+  "TARGET_64BIT"
+  "movs{bl|x}\t{%1, %k0|%k0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "SI")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=*a,r")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cbtw|cbw}";
+    default:
+      return "movs{bw|x}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "HI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+;; Conversions between float and double.
+
+;; These are all no-ops in the model used for the 80387.
+;; So just emit moves.
+
+;; %%% Kill these when call knows how to work out a DFmode push earlier.
+(define_split
+  [(set (match_operand:DF 0 "push_operand")
+	(float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand")
+	(float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand")
+        (float_extend:DF (match_operand:SF 1 "general_operand")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  /* ??? Needed for compress_float_constant since all fp constants
+     are TARGET_LEGITIMATE_CONSTANT_P.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+	  && standard_80387_constant_p (operands[1]) > 0)
+	{
+	  operands[1] = simplify_const_unary_operation
+	    (FLOAT_EXTEND, DFmode, operands[1], SFmode);
+	  emit_move_insn_1 (operands[0], operands[1]);
+	  DONE;
+	}
+      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+    }
+})
+
+/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
+   cvtss2sd:
+      unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtps2pd xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:DF 0 "register_operand")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+	 (float_extend:V2DF
+	   (vec_select:V2SF
+	     (match_dup 3)
+	     (parallel [(const_int 0) (const_int 1)]))))]
+{
+  operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+  operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0);
+  /* Use movss for loading from memory, unpcklps reg, reg for registers.
+     Try to avoid move when unpacking can be done in source.  */
+  if (REG_P (operands[1]))
+    {
+      /* If it is unsafe to overwrite upper half of source, we need
+	 to move to destination and unpack there.  */
+      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	   || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+	  && true_regnum (operands[0]) != true_regnum (operands[1]))
+	{
+	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+	  emit_move_insn (tmp, operands[1]);
+	}
+      else
+	operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
+      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+      		 			     operands[3]));
+    }
+  else
+    emit_insn (gen_vec_setv4sf_0 (operands[3],
+				  CONST0_RTX (V4SFmode), operands[1]));
+})
+
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:SF 1 "memory_operand")))]
+  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
+(define_insn "*extendsfdf2_mixed"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "SF,XF,DF")])
+
+(define_insn "*extendsfdf2_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "%vcvtss2sd\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "*extendsfdf2_i387"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF,XF")])
+
+(define_expand "extend<mode>xf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand")
+        (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
+  "TARGET_80387"
+{
+  /* ??? Needed for compress_float_constant since all fp constants
+     are TARGET_LEGITIMATE_CONSTANT_P.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      if (standard_80387_constant_p (operands[1]) > 0)
+	{
+	  operands[1] = simplify_const_unary_operation
+	    (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
+	  emit_move_insn_1 (operands[0], operands[1]);
+	  DONE;
+	}
+      operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
+    }
+})
+
+(define_insn "*extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
+        (float_extend:XF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>,XF")])
+
+;; %%% This seems bad bad news.
+;; This cannot output into an f-reg because there is no way to be sure
+;; of truncating in that case.  Otherwise this is just like a simple move
+;; insn.  So we pretend we can output to a reg in order to get better
+;; register preferencing, but we really use a stack slot.
+
+;; Conversion from DFmode to SFmode.
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+    ;
+  else if (flag_unsafe_math_optimizations)
+    ;
+  else
+    {
+      rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP);
+      emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
+      DONE;
+    }
+})
+
+/* For converting DF(xmm2) to SF(xmm1), use the following code instead of
+   cvtsd2ss:
+      unpcklpd xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtpd2ps xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:SF 0 "register_operand")
+        (float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+	 (vec_concat:V4SF
+	   (float_truncate:V2SF
+	     (match_dup 4))
+	   (match_dup 3)))]
+{
+  operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+  operands[3] = CONST0_RTX (V2SFmode);
+  operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0);
+  /* Use movsd for loading from memory, unpcklpd for registers.
+     Try to avoid move when unpacking can be done in source, or SSE3
+     movddup is available.  */
+  if (REG_P (operands[1]))
+    {
+      if (!TARGET_SSE3
+	  && true_regnum (operands[0]) != true_regnum (operands[1])
+	  && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	      || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+	{
+	  rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0);
+	  emit_move_insn (tmp, operands[1]);
+	  operands[1] = tmp;
+	}
+      else if (!TARGET_SSE3)
+	operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+      emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
+    }
+  else
+    emit_insn (gen_sse2_loadlpd (operands[4],
+				 CONST0_RTX (V2DFmode), operands[1]));
+})
+
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand")
+	(float_truncate:SF
+	  (match_operand:DF 1 "memory_operand")))]
+  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
+(define_expand "truncdfsf2_with_temp"
+  [(parallel [(set (match_operand:SF 0)
+		   (float_truncate:SF (match_operand:DF 1)))
+	      (clobber (match_operand:SF 2))])])
+
+(define_insn "*truncdfsf_fast_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,x")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "f  ,xm")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,ssecvt")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "SF")])
+
+;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
+;; because nothing we do here is unsafe.
+(define_insn "*truncdfsf_fast_sse"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=x")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_fast_i387"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "f")))]
+  "TARGET_80387 && flag_unsafe_math_optimizations"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,x ,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "f ,xm,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"     "=X,X ,m ,m ,m"))]
+  "TARGET_MIX_SSE_I387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+
+    default:
+      return "#";
+    }
+}
+  [(set_attr "isa" "*,sse2,*,*,*")
+   (set_attr "type" "fmov,ssecvt,multi,multi,multi")
+   (set_attr "unit" "*,*,i387,i387,i387")
+   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_i387"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"     "=X,m ,m ,m"))]
+  "TARGET_80387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+
+    default:
+      return "#";
+    }
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf2_i387_1"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float_truncate:SF
+	  (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_80387
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !TARGET_MIX_SSE_I387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand")
+	(float_truncate:SF
+	 (match_operand:DF 1 "fp_register_operand")))
+   (clobber (match_operand 2))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));")
+
+;; Conversion from XFmode to {SF,DF}mode
+
+(define_expand "truncxf<mode>2"
+  [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand")
+		   (float_truncate:MODEF
+		     (match_operand:XF 1 "register_operand")))
+	      (clobber (match_dup 2))])]
+  "TARGET_80387"
+{
+  if (flag_unsafe_math_optimizations)
+    {
+      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1]));
+      if (reg != operands[0])
+	emit_move_insn (operands[0], reg);
+      DONE;
+    }
+  else
+    operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+})
+
+(define_insn "*truncxfsf2_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:XF 1 "register_operand"   "f ,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"   "=X,m ,m ,m"))]
+  "TARGET_80387"
+{
+  gcc_assert (!which_alternative);
+  return output_387_reg_move (insn, operands);
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncxfdf2_mixed"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?x,?*r")
+	(float_truncate:DF
+	  (match_operand:XF 1 "register_operand"   "f ,f ,f  ,f")))
+   (clobber (match_operand:DF 2 "memory_operand"   "=X,m ,m  ,m"))]
+  "TARGET_80387"
+{
+  gcc_assert (!which_alternative);
+  return output_387_reg_move (insn, operands);
+}
+  [(set_attr "isa" "*,*,sse2,*")
+   (set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "DF")])
+
+(define_insn "truncxf<mode>2_i387_noop"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387 && flag_unsafe_math_optimizations"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*truncxf<mode>2_i387"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand")))
+   (clobber (match_operand:MODEF 2 "memory_operand"))]
+  "TARGET_80387 && reload_completed"
+  [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:MODEF 0 "memory_operand")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand")))
+   (clobber (match_operand:MODEF 2 "memory_operand"))]
+  "TARGET_80387"
+  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))])
+
+;; Signed conversion to DImode.
+
+(define_expand "fix_truncxfdi2"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
+                   (fix:DI (match_operand:XF 1 "register_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>di2"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
+                   (fix:DI (match_operand:MODEF 1 "register_operand")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
+{
+  if (TARGET_FISTTP
+      && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
+     emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
+     if (out != operands[0])
+	emit_move_insn (operands[0], out);
+     DONE;
+   }
+})
+
+;; Signed conversion to SImode.
+
+(define_expand "fix_truncxfsi2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
+                   (fix:SI (match_operand:XF 1 "register_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>si2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
+	           (fix:SI (match_operand:MODEF 1 "register_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (TARGET_FISTTP
+      && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+     emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
+     if (out != operands[0])
+	emit_move_insn (operands[0], out);
+     DONE;
+   }
+})
+
+;; Signed conversion to HImode.
+
+(define_expand "fix_trunc<mode>hi2"
+  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
+	           (fix:HI (match_operand:X87MODEF 1 "register_operand")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+;; Unsigned conversion to SImode.
+
+(define_expand "fixuns_trunc<mode>si2"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand")
+	  (unsigned_fix:SI
+	    (match_operand:MODEF 1 "nonimmediate_operand")))
+     (use (match_dup 2))
+     (clobber (match_scratch:<ssevecmode> 3))
+     (clobber (match_scratch:<ssevecmode> 4))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  enum machine_mode mode = <MODE>mode;
+  enum machine_mode vecmode = <ssevecmode>mode;
+  REAL_VALUE_TYPE TWO31r;
+  rtx two31;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  real_ldexp (&TWO31r, &dconst1, 31);
+  two31 = const_double_from_real_value (TWO31r, mode);
+  two31 = ix86_build_const_vector (vecmode, true, two31);
+  operands[2] = force_reg (vecmode, two31);
+})
+
+(define_insn_and_split "*fixuns_trunc<mode>_1"
+  [(set (match_operand:SI 0 "register_operand" "=&x,&x")
+	(unsigned_fix:SI
+	  (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
+   (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
+   (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
+   (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_convert_uns_si_sse (operands);
+  DONE;
+})
+
+;; Unsigned conversion to HImode.
+;; Without these patterns, we'll try the unsigned SI conversion which
+;; is complex for SSE, rather than the signed SI conversion, which isn't.
+
+(define_expand "fixuns_trunc<mode>hi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
+   (set (match_operand:HI 0 "nonimmediate_operand")
+	(subreg:HI (match_dup 2) 0))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+;; When SSE is available, it is always faster to use it!
+(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<SWI48:MODE>mode == DImode")
+	  (const_string "1")
+	  (const_string "*")))
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
+
+;; Avoid vector decoded forms of the instruction.
+(define_peephole2
+  [(match_scratch:MODEF 2 "x")
+   (set (match_operand:SWI48 0 "register_operand")
+	(fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
+  "TARGET_AVOID_VECTOR_DECODE
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
+
+(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
+  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
+	(fix:SWI248x (match_operand 1 "register_operand")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
+							    operands[1],
+							    operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp"
+  [(set (match_operand:SWI248x 0 "memory_operand" "=m")
+	(fix:SWI248x (match_operand 1 "register_operand" "f")))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)"
+  "* return output_fix_trunc (insn, operands, true);"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
+  [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m,?r")
+	(fix:SWI248x (match_operand 1 "register_operand" "f,f")))
+   (clobber (match_operand:SWI248x 2 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	&& (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)"
+  "#"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(fix:SWI248x (match_operand 1 "register_operand")))
+   (clobber (match_operand:SWI248x 2 "memory_operand"))
+   (clobber (match_scratch 3))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (fix:SWI248x (match_dup 1)))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:SWI248x 0 "memory_operand")
+	(fix:SWI248x (match_operand 1 "register_operand")))
+   (clobber (match_operand:SWI248x 2 "memory_operand"))
+   (clobber (match_scratch 3))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:SWI248x (match_dup 1)))
+	      (clobber (match_dup 3))])])
+
+;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
+;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
+;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
+;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
+;; function in i386.c.
+(define_insn_and_split "*fix_trunc<mode>_i387_1"
+  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
+	(fix:SWI248x (match_operand 1 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+					 operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
+						     operands[2], operands[3],
+						     operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_truncdi_i387"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(fix:DI (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "* return output_fix_trunc (insn, operands, false);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fix_truncdi_i387_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(fix:DI (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(fix:DI (match_operand 1 "register_operand")))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:DI 4 "memory_operand"))
+   (clobber (match_scratch 5))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:DI (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand")
+	(fix:DI (match_operand 1 "register_operand")))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:DI 4 "memory_operand"))
+   (clobber (match_scratch 5))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])])
+
+(define_insn "fix_trunc<mode>_i387"
+  [(set (match_operand:SWI24 0 "memory_operand" "=m")
+	(fix:SWI24 (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "* return output_fix_trunc (insn, operands, false);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_with_temp"
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r")
+	(fix:SWI24 (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand")
+	(fix:SWI24 (match_operand 1 "register_operand")))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:SWI24 4 "memory_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:SWI24 (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:SWI24 0 "memory_operand")
+	(fix:SWI24 (match_operand 1 "register_operand")))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:SWI24 4 "memory_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:SWI24 (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])])
+
+(define_insn "x86_fnstcw_1"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+	(unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
+  "TARGET_80387"
+  "fnstcw\t%0"
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+   (set_attr "mode" "HI")
+   (set_attr "unit" "i387")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_fldcw_1"
+  [(set (reg:HI FPCR_REG)
+	(unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
+  "TARGET_80387"
+  "fldcw\t%0"
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+   (set_attr "mode" "HI")
+   (set_attr "unit" "i387")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+;; Conversion between fixed point and floating point.
+
+;; Even though we only accept memory inputs, the backend _really_
+;; wants to be able to do this between registers.  Thankfully, LRA
+;; will fix this up for us during register allocation.
+
+(define_insn "floathi<mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "fild%Z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "float<SWI48x:mode>xf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
+  "TARGET_80387"
+  "fild%Z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "XF")
+   (set_attr "fp_int_src" "true")])
+
+(define_expand "float<SWI48:mode><MODEF:mode>2"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (!(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+      && !X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
+    {
+      rtx reg = gen_reg_rtx (XFmode);
+      rtx (*insn)(rtx, rtx);
+
+      emit_insn (gen_float<SWI48:mode>xf2 (reg, operands[1]));
+
+      if (<MODEF:MODE>mode == SFmode)
+	insn = gen_truncxfsf2;
+      else if (<MODEF:MODE>mode == DFmode)
+	insn = gen_truncxfdf2;
+      else
+	gcc_unreachable ();
+
+      emit_insn (insn (operands[0], reg));
+      DONE;
+    }
+})
+
+(define_insn "*float<SWI48:mode><MODEF:mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
+	(float:MODEF
+	  (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+  "@
+   fild%Z1\t%1
+   %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
+   %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "fmov,sseicvt,sseicvt")
+   (set_attr "prefix" "orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "prefix" "maybe_vex")
+	    (match_test "<SWI48:MODE>mode == DImode"))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "unit" "i387,*,*")
+   (set_attr "athlon_decode" "*,double,direct")
+   (set_attr "amdfam10_decode" "*,vector,double")
+   (set_attr "bdver1_decode" "*,double,direct")
+   (set_attr "fp_int_src" "true")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+              (symbol_ref "TARGET_MIX_SSE_I387
+                           && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
+                                                <SWI48:MODE>mode)")
+            (eq_attr "alternative" "1")
+              (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS
+                           || optimize_function_for_size_p (cfun)")
+           ]
+           (symbol_ref "true")))
+   ])
+
+(define_insn "*float<SWI48x:mode><MODEF:mode>2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)"
+  "fild%Z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "fp_int_src" "true")])
+
+;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
+;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
+;; alternative in sse2_loadld.
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+	(float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed && SSE_REG_P (operands[0])
+   && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+			      CONST0_RTX (V4SImode), operands[1]));
+
+  if (<ssevecmode>mode == V4SFmode)
+    emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
+  else
+    emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
+  DONE;
+})
+
+;; Avoid partial SSE register dependency stalls
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && reload_completed && SSE_REG_P (operands[0])"
+  [(const_int 0)]
+{
+  const enum machine_mode vmode = <MODEF:ssevecmode>mode;
+  const enum machine_mode mode = <MODEF:MODE>mode;
+  rtx t, op0 = simplify_gen_subreg (vmode, operands[0], mode, 0);
+
+  emit_move_insn (op0, CONST0_RTX (vmode));
+
+  t = gen_rtx_FLOAT (mode, operands[1]);
+  t = gen_rtx_VEC_DUPLICATE (vmode, t);
+  t = gen_rtx_VEC_MERGE (vmode, t, op0, const1_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, op0, t));
+  DONE;
+})
+
+;; Break partial reg stall for cvtsd2ss.
+
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand")
+        (float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && SSE_REG_P (operands[0])
+   && (!SSE_REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 0)
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:V2SF
+	      (match_dup 1)))
+	  (match_dup 0)
+	  (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (V4SFmode, operands[0],
+				     SFmode, 0);
+  operands[1] = simplify_gen_subreg (V2DFmode, operands[1],
+				     DFmode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
+
+;; Break partial reg stall for cvtss2sd.
+
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+        (float_extend:DF
+          (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && SSE_REG_P (operands[0])
+   && (!SSE_REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 0)
+        (vec_merge:V2DF
+          (float_extend:V2DF
+            (vec_select:V2SF
+              (match_dup 1)
+              (parallel [(const_int 0) (const_int 1)])))
+          (match_dup 0)
+          (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (V2DFmode, operands[0],
+				     DFmode, 0);
+  operands[1] = simplify_gen_subreg (V4SFmode, operands[1],
+				     SFmode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
+
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF
+	  (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+   (clobber (match_scratch:V4SI 3 "=X,x"))
+   (clobber (match_scratch:V4SI 4 "=X,x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "fp_register_operand")
+	(float:X87MODEF (match_operand:DI 1 "register_operand")))
+   (clobber (match_scratch:V4SI 3))
+   (clobber (match_scratch:V4SI 4))
+   (clobber (match_operand:DI 2 "memory_operand"))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 0)));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 4)));
+  emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
+  	    				 operands[4]));
+
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "fp_register_operand")
+	(float:X87MODEF (match_operand:DI 1 "memory_operand")))
+   (clobber (match_scratch:V4SI 3))
+   (clobber (match_scratch:V4SI 4))
+   (clobber (match_operand:DI 2 "memory_operand"))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+
+(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unsigned_float:MODEF
+	  (match_operand:SWI12 1 "nonimmediate_operand")))]
+  "!TARGET_64BIT
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+{
+  operands[1] = convert_to_mode (SImode, operands[1], 1);
+  emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:SI 3 "=X,x"))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "register_operand")))
+   (clobber (match_operand:DI 2 "memory_operand"))
+   (clobber (match_scratch:SI 3))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0)
+	(float:X87MODEF (match_dup 2)))]
+  "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "memory_operand")))
+   (clobber (match_operand:DI 2 "memory_operand"))
+   (clobber (match_scratch:SI 3))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(float:X87MODEF (match_dup 2)))]
+{
+  emit_move_insn (operands[3], operands[1]);
+  operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
+})
+
+(define_expand "floatunssi<mode>2"
+  [(parallel
+     [(set (match_operand:X87MODEF 0 "register_operand")
+	   (unsigned_float:X87MODEF
+	     (match_operand:SI 1 "nonimmediate_operand")))
+      (clobber (match_dup 2))
+      (clobber (match_scratch:SI 3))])]
+  "!TARGET_64BIT
+   && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+	&& TARGET_SSE)
+       || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    {
+      ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+      DONE;
+    }
+  else
+    operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+})
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand"))
+   (use (match_operand:DI 1 "nonimmediate_operand"))]
+  "TARGET_64BIT && TARGET_SSE_MATH"
+  "x86_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand"))
+   (use (match_operand:DI 1 "nonimmediate_operand"))]
+  "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
+   && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  if (TARGET_64BIT)
+    x86_emit_floatuns (operands);
+  else
+    ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
+  DONE;
+})
+
+;; Load effective address instructions
+
+(define_insn_and_split "*lea<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
+  ""
+{
+  if (SImode_address_operand (operands[1], VOIDmode))
+    {
+      gcc_assert (TARGET_64BIT);
+      return "lea{l}\t{%E1, %k0|%k0, %E1}";
+    }
+  else 
+    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
+}
+  "reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
+  [(const_int 0)]
+{
+  enum machine_mode mode = <MODE>mode;
+  rtx pat;
+
+  /* ix86_avoid_lea_for_addr re-recognizes insn and may
+     change operands[] array behind our back.  */
+  pat = PATTERN (curr_insn);
+
+  operands[0] = SET_DEST (pat);
+  operands[1] = SET_SRC (pat);
+
+  /* Emit all operations in SImode for zero-extended addresses.  */
+  if (SImode_address_operand (operands[1], VOIDmode))
+    mode = SImode;
+
+  ix86_split_lea_for_addr (curr_insn, operands, mode);
+
+  /* Zero-extend return register to DImode for zero-extended addresses.  */
+  if (mode != <MODE>mode)
+    emit_insn (gen_zero_extendsidi2
+    	       (operands[0], gen_lowpart (mode, operands[0])));
+
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set (attr "mode")
+     (if_then_else
+       (match_operand 1 "SImode_address_operand")
+       (const_string "SI")
+       (const_string "<MODE>")))])
+
+;; Add instructions
+
+(define_expand "add<mode>3"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand")
+	(plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
+		    (match_operand:SDWIM 2 "<general_operand>")))]
+  ""
+  "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*add<dwi>3_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(plus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (unspec:CC [(match_dup 1) (match_dup 2)]
+			      UNSPEC_ADD_CARRY))
+	      (set (match_dup 0)
+		   (plus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (plus:DWIH
+		     (match_dup 4)
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "*add<mode>3_cc"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")]
+	  UNSPEC_ADD_CARRY))
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(plus:SWI48 (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "addqi3_cc"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:QI 2 "general_operand" "qn,qm")]
+	  UNSPEC_ADD_CARRY))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	(plus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands)"
+  "add{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*add<mode>_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
+	  (match_operand:SWI48 2 "x86_64_general_operand" "rme,re,0,le")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 2)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+        
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "3")
+              (const_string "lea")
+	    (match_operand:SWI48 2 "incdec_operand")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; It may seem that nonimmediate operand is proper one for operand 1.
+;; The addsi_1 pattern allows nonimmediate operand at that place and
+;; we take care in ix86_binary_operator_ok to not allow two memory
+;; operands so proper swapping will be done in reload.  This allow
+;; patterns constructed from addsi_1 to match.
+
+(define_insn "addsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
+		   (match_operand:SI 2 "x86_64_general_operand" "rme,0,le"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+	    (match_operand:SI 2 "incdec_operand")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*addhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
+		 (match_operand:HI 2 "general_operand" "rn,rm,0,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 2)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], HImode))
+	return "sub{w}\t{%2, %0|%0, %2}";
+
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "3")
+              (const_string "lea")
+	    (match_operand:HI 2 "incdec_operand")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "HI,HI,HI,SI")])
+
+;; %%% Potential partial reg stall on alternatives 3 and 4.  What to do?
+(define_insn "*addqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
+		 (match_operand:QI 2 "general_operand" "qn,qm,0,rn,0,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+  bool widen = (which_alternative == 3 || which_alternative == 4);
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  These alternatives
+	 were added to use ADD as much as possible.  */
+      if (which_alternative == 2 || which_alternative == 4)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], QImode))
+	{
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "5")
+              (const_string "lea")
+	    (match_operand:QI 2 "incdec_operand")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "QI,QI,QI,SI,SI,SI")])
+
+(define_insn "*addqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(plus:QI (match_dup 0)
+		 (match_operand:QI 1 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[1] == const1_rtx)
+	return "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "dec{b}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[1], QImode))
+	return "sub{b}\t{%1, %0|%0, %1}";
+
+      return "add{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 1 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu1")))
+   (set (attr "memory")
+     (if_then_else (match_operand 1 "memory_operand")
+        (const_string "load")
+        (const_string "none")))
+   (set_attr "mode" "QI")])
+
+;; Split non destructive adds if we cannot use lea.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(plus:SWI48 (match_operand:SWI48 1 "register_operand")
+		    (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Convert add to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(plus:SWI (match_operand:SWI 1 "register_operand")
+		  (match_operand:SWI 2 "<nonmemory_operand>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && ix86_lea_for_add_ok (insn, operands)" 
+  [(const_int 0)]
+{
+  enum machine_mode mode = <MODE>mode;
+  rtx pat;
+
+  if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
+    { 
+      mode = SImode; 
+      operands[0] = gen_lowpart (mode, operands[0]);
+      operands[1] = gen_lowpart (mode, operands[1]);
+      operands[2] = gen_lowpart (mode, operands[2]);
+    }
+
+  pat = gen_rtx_PLUS (mode, operands[1], operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Split non destructive adds if we cannot use lea.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "x86_64_nonmemory_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0)
+   	     	   (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[3] = gen_lowpart (SImode, operands[0]);")
+
+;; Convert add to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "x86_64_nonmemory_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
+  [(set (match_dup 0)
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*add<mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
+	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (which_alternative == 2)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+        
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*add<mode>_3"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0"))
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")))
+   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_3_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
+	  (match_operand:SI 1 "nonimmediate_operand" "%0,r")))
+   (set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "SI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+
+(define_insn "*adddi_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:DI 1 "nonimmediate_operand" "0")
+	  (match_operand:DI 2 "x86_64_immediate_operand" "e")))
+   (clobber (match_scratch:DI 0 "=rm"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCGCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], DImode))
+	return "add{q}\t{%2, %0|%0, %2}";
+
+      return "sub{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:DI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "DI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+
+(define_insn "*add<mode>_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:SWI124 1 "nonimmediate_operand" "0")
+	  (match_operand:SWI124 2 "const_int_operand" "n")))
+   (clobber (match_scratch:SWI124 0 "=<r>m"))]
+  "ix86_match_ccmode (insn, CCGCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:<MODE> 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
+	    (match_operand:SWI 2 "<general_operand>" "<g>,0"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+          gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "addqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0,0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%h0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{b}\t%h0";
+        }
+
+    default:
+      return "add{b}\t{%2, %h0|%h0, %2}";
+    }
+}
+  [(set_attr "isa" "*,nox64")
+   (set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "%0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extract:SI
+	    (match_operand 2 "ext_register_operand" "Q")
+	    (const_int 8)
+	    (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "add{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+;; Add with jump on overflow.
+(define_expand "addv<mode>4"
+  [(parallel [(set (reg:CCO FLAGS_REG)
+		   (eq:CCO (plus:<DWI>
+			      (sign_extend:<DWI>
+				 (match_operand:SWI 1 "nonimmediate_operand"))
+			      (sign_extend:<DWI>
+				 (match_operand:SWI 2 "<general_operand>")))
+			   (sign_extend:<DWI>
+			      (plus:SWI (match_dup 1) (match_dup 2)))))
+	      (set (match_operand:SWI 0 "register_operand")
+		   (plus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (eq (reg:CCO FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
+
+(define_insn "*addv<mode>4"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (plus:<DWI>
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>")))
+		(sign_extend:<DWI>
+		   (plus:SWI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+;; The lea patterns for modes less than 32 bits need to be matched by
+;; several insns converted to real lea by splitters.
+
+(define_insn_and_split "*lea_general_1"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "register_operand" "r"))
+	      (match_operand 3 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[2])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = SImode;
+  rtx pat;
+
+  operands[0] = gen_lowpart (mode, operands[0]);
+  operands[1] = gen_lowpart (mode, operands[1]);
+  operands[2] = gen_lowpart (mode, operands[2]);
+  operands[3] = gen_lowpart (mode, operands[3]);
+
+  pat = gen_rtx_PLUS (mode, gen_rtx_PLUS (mode, operands[1], operands[2]),
+  		      operands[3]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (mult (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "const248_operand" "n"))
+	      (match_operand 3 "nonmemory_operand" "ri")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = SImode;
+  rtx pat;
+
+  operands[0] = gen_lowpart (mode, operands[0]);
+  operands[1] = gen_lowpart (mode, operands[1]);
+  operands[3] = gen_lowpart (mode, operands[3]);
+
+  pat = gen_rtx_PLUS (mode, gen_rtx_MULT (mode, operands[1], operands[2]),
+		      operands[3]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (mult (match_operand 1 "index_register_operand" "l")
+			  (match_operand 2 "const248_operand" "n"))
+		    (match_operand 3 "register_operand" "r"))
+	      (match_operand 4 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[3])"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = SImode;
+  rtx pat;
+
+  operands[0] = gen_lowpart (mode, operands[0]);
+  operands[1] = gen_lowpart (mode, operands[1]);
+  operands[3] = gen_lowpart (mode, operands[3]);
+  operands[4] = gen_lowpart (mode, operands[4]);
+
+  pat = gen_rtx_PLUS (mode,
+  		      gen_rtx_PLUS (mode,
+				    gen_rtx_MULT (mode, operands[1],
+		      					operands[2]),
+				    operands[3]),
+  		      operands[4]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_4"
+  [(set (match_operand 0 "register_operand" "=r")
+	(any_or (ashift
+		  (match_operand 1 "index_register_operand" "l")
+		  (match_operand 2 "const_int_operand" "n"))
+		(match_operand 3 "const_int_operand" "n")))]
+  "(((GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)
+      && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)))
+    || GET_MODE (operands[0]) == SImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) - 1 < 3
+   && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
+       < ((unsigned HOST_WIDE_INT) 1 << INTVAL (operands[2])))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx pat;
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    { 
+      mode = SImode; 
+      operands[0] = gen_lowpart (mode, operands[0]);
+      operands[1] = gen_lowpart (mode, operands[1]);
+    }
+
+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+
+  pat = plus_constant (mode, gen_rtx_MULT (mode, operands[1], operands[2]),
+		       INTVAL (operands[3]));
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set (attr "mode")
+      (if_then_else (match_operand:DI 0)
+	(const_string "DI")
+	(const_string "SI")))])
+
+;; Subtract instructions
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand")
+	(minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
+		     (match_operand:SDWIM 2 "<general_operand>")))]
+  ""
+  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*sub<dwi>3_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(minus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+	  (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0)
+		   (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (minus:DWIH
+		     (match_dup 4)
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "*sub<mode>_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(minus:QI (match_dup 0)
+		  (match_operand:QI 1 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "sub{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*sub<mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "x86_64_general_operand" "rme"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_dup 1)
+		    (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; Subtract with jump on overflow.
+(define_expand "subv<mode>4"
+  [(parallel [(set (reg:CCO FLAGS_REG)
+		   (eq:CCO (minus:<DWI>
+			      (sign_extend:<DWI>
+				 (match_operand:SWI 1 "nonimmediate_operand"))
+			      (sign_extend:<DWI>
+				 (match_operand:SWI 2 "<general_operand>")))
+			   (sign_extend:<DWI>
+			      (minus:SWI (match_dup 1) (match_dup 2)))))
+	      (set (match_operand:SWI 0 "register_operand")
+		   (minus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (eq (reg:CCO FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+
+(define_insn "*subv<mode>4"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (minus:<DWI>
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+		(sign_extend:<DWI>
+		   (minus:SWI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sub<mode>_3"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi_3_zext"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "x86_64_general_operand" "rme")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_dup 1)
+		    (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %1|%1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; Add with carry and subtract with borrow
+
+(define_expand "<plusminus_insn><mode>3_carry"
+  [(parallel
+    [(set (match_operand:SWI 0 "nonimmediate_operand")
+	  (plusminus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand")
+	    (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator"
+		       [(match_operand 3 "flags_reg_operand")
+			(const_int 0)])
+		      (match_operand:SWI 2 "<general_operand>"))))
+     (clobber (reg:CC FLAGS_REG))])]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)")
+
+(define_insn "*<plusminus_insn><mode>3_carry"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plusminus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+	  (plus:SWI
+	    (match_operator 3 "ix86_carry_flag_operator"
+	     [(reg FLAGS_REG) (const_int 0)])
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*addsi3_carry_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		   (plus:SI (match_operator 3 "ix86_carry_flag_operator"
+			     [(reg FLAGS_REG) (const_int 0)])
+			    (match_operand:SI 2 "x86_64_general_operand" "rme")))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "adc{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi3_carry_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (plus:SI (match_operator 3 "ix86_carry_flag_operator"
+			      [(reg FLAGS_REG) (const_int 0)])
+			     (match_operand:SI 2 "x86_64_general_operand" "rme")))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sbb{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+;; ADCX instruction
+
+(define_insn "adcx<mode>3"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SWI48
+	    (match_operand:SWI48 1 "nonimmediate_operand" "%0")
+	    (plus:SWI48
+	      (match_operator 4 "ix86_carry_flag_operator"
+	       [(match_operand 3 "flags_reg_operand") (const_int 0)])
+	      (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+	  (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(plus:SWI48 (match_dup 1)
+		    (plus:SWI48 (match_op_dup 4
+				 [(match_dup 3) (const_int 0)])
+				(match_dup 2))))]
+  "TARGET_ADX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "adcx\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "mode" "<MODE>")])
+
+;; Overflow setting add instructions
+
+(define_insn "*add<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (match_dup 1)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plus:SWI
+		(match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	    (match_dup 1)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*addsi3_zext_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SI 2 "x86_64_general_operand" "rme"))
+	  (match_dup 1)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "<plusminus_insn>xf3"
+  [(set (match_operand:XF 0 "register_operand")
+	(plusminus:XF
+	  (match_operand:XF 1 "register_operand")
+	  (match_operand:XF 2 "register_operand")))]
+  "TARGET_80387")
+
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(plusminus:MODEF
+	  (match_operand:MODEF 1 "register_operand")
+	  (match_operand:MODEF 2 "nonimmediate_operand")))]
+  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
+    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
+
+;; Multiply instructions
+
+(define_expand "mul<mode>3"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand")
+		   (mult:SWIM248
+		     (match_operand:SWIM248 1 "register_operand")
+		     (match_operand:SWIM248 2 "<general_operand>")))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_expand "mulqi3"
+  [(parallel [(set (match_operand:QI 0 "register_operand")
+		   (mult:QI
+		     (match_operand:QI 1 "register_operand")
+		     (match_operand:QI 2 "nonimmediate_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH")
+
+;; On AMDFAM10
+;; IMUL reg32/64, reg32/64, imm8 	Direct
+;; IMUL reg32/64, mem32/64, imm8 	VectorPath
+;; IMUL reg32/64, reg32/64, imm32 	Direct
+;; IMUL reg32/64, mem32/64, imm32 	VectorPath
+;; IMUL reg32/64, reg32/64 		Direct
+;; IMUL reg32/64, mem32/64 		Direct
+;;
+;; On BDVER1, all above IMULs use DirectPath
+
+(define_insn "*mul<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r,r")
+	(mult:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%rm,rm,0")
+	  (match_operand:SWI48 2 "<general_operand>" "K,<i>,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand"))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand"))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mulsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI
+	  (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+		   (match_operand:SI 2 "x86_64_general_operand" "K,e,mr"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand"))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand"))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 	VectorPath
+;; IMUL reg16, mem16, imm8 	VectorPath
+;; IMUL reg16, reg16, imm16 	VectorPath
+;; IMUL reg16, mem16, imm16 	VectorPath
+;; IMUL reg16, reg16 		Direct
+;; IMUL reg16, mem16 		Direct
+;;
+;; On BDVER1, all HI MULs use DoublePath
+
+(define_insn "*mulhi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
+		 (match_operand:HI 2 "general_operand" "K,n,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_HIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{w}\t{%2, %1, %0|%0, %1, %2}
+   imul{w}\t{%2, %1, %0|%0, %1, %2}
+   imul{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1,2")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(eq_attr "alternative" "0,1")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "HI")])
+
+;;On AMDFAM10 and BDVER1
+;; MUL reg8 	Direct
+;; MUL mem8 	Direct
+
+(define_insn "*mulqi3_1"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "QI")])
+
+;; Multiply with jump on overflow.
+(define_expand "mulv<mode>4"
+  [(parallel [(set (reg:CCO FLAGS_REG)
+		   (eq:CCO (mult:<DWI>
+			      (sign_extend:<DWI>
+				 (match_operand:SWI48 1 "register_operand"))
+			      (sign_extend:<DWI>
+				 (match_operand:SWI48 2 "<general_operand>")))
+			   (sign_extend:<DWI>
+			      (mult:SWI48 (match_dup 1) (match_dup 2)))))
+	      (set (match_operand:SWI48 0 "register_operand")
+		   (mult:SWI48 (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (eq (reg:CCO FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))])
+
+(define_insn "*mulv<mode>4"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (mult:<DWI>
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 1 "nonimmediate_operand" "%rm,rm,0"))
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 2 "<general_operand>" "K,<i>,mr")))
+		(sign_extend:<DWI>
+		   (mult:SWI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SWI 0 "register_operand" "=r,r,r")
+	(mult:SWI (match_dup 1) (match_dup 2)))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand"))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand"))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "<u>mul<mode><dwi>3"
+  [(parallel [(set (match_operand:<DWI> 0 "register_operand")
+		   (mult:<DWI>
+		     (any_extend:<DWI>
+		       (match_operand:DWIH 1 "nonimmediate_operand"))
+		     (any_extend:<DWI>
+		       (match_operand:DWIH 2 "register_operand"))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_expand "<u>mulqihi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand")
+		   (mult:HI
+		     (any_extend:HI
+		       (match_operand:QI 1 "nonimmediate_operand"))
+		     (any_extend:HI
+		       (match_operand:QI 2 "register_operand"))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH")
+
+(define_insn "*bmi2_umulditi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	  (match_operand:DI 2 "nonimmediate_operand" "%d")
+	  (match_operand:DI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (zero_extend:TI (match_dup 2))
+		     (zero_extend:TI (match_dup 3)))
+	    (const_int 64))))]
+  "TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bmi2_umulsidi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (match_operand:SI 2 "nonimmediate_operand" "%d")
+	  (match_operand:SI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))]
+  "!TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%d,0"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   #
+   mul{<imodesuffix>}\t%2"
+  [(set_attr "isa" "bmi2,*")
+   (set_attr "type" "imulx,imul")
+   (set_attr "length_immediate" "*,0")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "alternative" "1")
+		 (if_then_else (eq_attr "cpu" "athlon")
+		   (const_string "vector")
+		   (const_string "double"))]
+	      (const_string "*")))
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "bdver1_decode" "*,direct")
+   (set_attr "prefix" "vex,orig")
+   (set_attr "mode" "<MODE>")])
+
+;; Convert mul to the mulx pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:<DWI> 0 "register_operand")
+       (mult:<DWI>
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 1 "register_operand"))
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 2 "nonimmediate_operand"))))
+  (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI2 && reload_completed
+  && true_regnum (operands[1]) == DX_REG"
+  [(parallel [(set (match_dup 3)
+		   (mult:DWIH (match_dup 1) (match_dup 2)))
+	      (set (match_dup 4)
+		   (truncate:DWIH
+		     (lshiftrt:<DWI>
+		       (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
+				   (zero_extend:<DWI> (match_dup 2)))
+		       (match_dup 5))))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
+
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+})
+
+(define_insn "*mul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A")
+	(mult:<DWI>
+	  (sign_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
+	  (sign_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{<imodesuffix>}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<u>mulqihi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI
+	  (any_extend:HI
+	    (match_operand:QI 1 "nonimmediate_operand" "%0"))
+	  (any_extend:HI
+	    (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "QI")])
+
+(define_expand "<s>mul<mode>3_highpart"
+  [(parallel [(set (match_operand:SWI48 0 "register_operand")
+		   (truncate:SWI48
+		     (lshiftrt:<DWI>
+		       (mult:<DWI>
+			 (any_extend:<DWI>
+			   (match_operand:SWI48 1 "nonimmediate_operand"))
+			 (any_extend:<DWI>
+			   (match_operand:SWI48 2 "register_operand")))
+		       (match_dup 4))))
+	      (clobber (match_scratch:SWI48 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
+
+(define_insn "*<s>muldi3_highpart_1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI
+	      (any_extend:TI
+		(match_operand:DI 1 "nonimmediate_operand" "%a"))
+	      (any_extend:TI
+		(match_operand:DI 2 "nonimmediate_operand" "rm")))
+	    (const_int 64))))
+   (clobber (match_scratch:DI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{q}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "DI")])
+
+(define_insn "*<s>mulsi3_highpart_1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (any_extend:DI
+		(match_operand:SI 1 "nonimmediate_operand" "%a"))
+	      (any_extend:DI
+		(match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<s>mulsi3_highpart_zext"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(zero_extend:DI (truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (any_extend:DI
+		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
+		     (any_extend:DI
+		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32)))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "mulxf3"
+  [(set (match_operand:XF 0 "register_operand")
+	(mult:XF (match_operand:XF 1 "register_operand")
+		 (match_operand:XF 2 "register_operand")))]
+  "TARGET_80387")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(mult:MODEF (match_operand:MODEF 1 "register_operand")
+		    (match_operand:MODEF 2 "nonimmediate_operand")))]
+  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
+    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
+
+;; Divide instructions
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "divxf3"
+  [(set (match_operand:XF 0 "register_operand")
+	(div:XF (match_operand:XF 1 "register_operand")
+		(match_operand:XF 2 "register_operand")))]
+  "TARGET_80387")
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "register_operand")
+ 	(div:DF (match_operand:DF 1 "register_operand")
+ 		(match_operand:DF 2 "nonimmediate_operand")))]
+   "(TARGET_80387 && X87_ENABLE_ARITH (DFmode))
+    || (TARGET_SSE2 && TARGET_SSE_MATH)")
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "register_operand")
+	(div:SF (match_operand:SF 1 "register_operand")
+		(match_operand:SF 2 "nonimmediate_operand")))]
+  "(TARGET_80387 && X87_ENABLE_ARITH (SFmode))
+    || TARGET_SSE_MATH"
+{
+  if (TARGET_SSE_MATH
+      && TARGET_RECIP_DIV
+      && optimize_insn_for_speed_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1],
+			 operands[2], SFmode);
+      DONE;
+    }
+})
+
+;; Divmod instructions.
+
+(define_expand "divmod<mode>4"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand")
+		   (div:SWIM248
+		     (match_operand:SWIM248 1 "register_operand")
+		     (match_operand:SWIM248 2 "nonimmediate_operand")))
+	      (set (match_operand:SWIM248 3 "register_operand")
+		   (mod:SWIM248 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand")
+		    (match_operand:SWI48 3 "nonimmediate_operand")))
+   (set (match_operand:SWI48 1 "register_operand")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+
+(define_insn_and_split "divmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+		   (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (mod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*divmod<mode>4"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+	(mod:SWIM248 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+		   (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SWIM248 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (mod:SWIM248 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+  if (<MODE>mode != HImode
+      && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*divmod<mode>4_noext"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(mod:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "idiv{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand")
+		   (div:QI
+		     (match_operand:QI 1 "register_operand")
+		     (match_operand:QI 2 "nonimmediate_operand")))
+	      (set (match_operand:QI 3 "register_operand")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx tmp0, tmp1;
+  
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  emit_insn (gen_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8));
+  insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_DIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+;; Change div/mod to HImode and extend the second argument to HImode
+;; so that mode of div/mod matches with mode of arguments.  Otherwise
+;; combine may fail.
+(define_insn "divmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(sign_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (sign_extend:HI (match_dup 2)))))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_expand "udivmod<mode>4"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand")
+		   (udiv:SWIM248
+		     (match_operand:SWIM248 1 "register_operand")
+		     (match_operand:SWIM248 2 "nonimmediate_operand")))
+	      (set (match_operand:SWIM248 3 "register_operand")
+		   (umod:SWIM248 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand")
+		    (match_operand:SWI48 3 "nonimmediate_operand")))
+   (set (match_operand:SWI48 1 "register_operand")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
+
+(define_insn_and_split "udivmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (umod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*udivmod<mode>4"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+	(umod:SWIM248 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SWIM248 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (umod:SWIM248 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*udivmod<mode>4_noext"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(umod:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "div{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "udivmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand")
+		   (udiv:QI
+		     (match_operand:QI 1 "register_operand")
+		     (match_operand:QI 2 "nonimmediate_operand")))
+	      (set (match_operand:QI 3 "register_operand")
+		   (umod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx tmp0, tmp1;
+  
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
+  tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0);
+  insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+(define_insn "udivmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(zero_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (zero_extend:HI (match_dup 2)))))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+;; We cannot use div/idiv for double division, because it causes
+;; "division by zero" on the overflow and that's not what we expect
+;; from truncate.  Because true (non truncating) double division is
+;; never generated, we can't create this insn anyway.
+;
+;(define_insn ""
+;  [(set (match_operand:SI 0 "register_operand" "=a")
+;	(truncate:SI
+;	  (udiv:DI (match_operand:DI 1 "register_operand" "A")
+;		   (zero_extend:DI
+;		     (match_operand:SI 2 "nonimmediate_operand" "rm")))))
+;   (set (match_operand:SI 3 "register_operand" "=d")
+;	(truncate:SI
+;	  (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
+;   (clobber (reg:CC FLAGS_REG))]
+;  ""
+;  "div{l}\t{%2, %0|%0, %2}"
+;  [(set_attr "type" "idiv")])
+
+;;- Logical AND instructions
+
+;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
+;; Note that this excludes ah.
+
+(define_expand "testsi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:SI (match_operand:SI 0 "nonimmediate_operand")
+		  (match_operand:SI 1 "x86_64_nonmemory_operand"))
+	  (const_int 0)))])
+
+(define_expand "testqi_ccz_1"
+  [(set (reg:CCZ FLAGS_REG)
+        (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand")
+			     (match_operand:QI 1 "nonmemory_operand"))
+		 (const_int 0)))])
+
+(define_expand "testdi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:DI (match_operand:DI 0 "nonimmediate_operand")
+		  (match_operand:DI 1 "x86_64_szext_general_operand"))
+	  (const_int 0)))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
+
+(define_insn "*testdi_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	 (and:DI
+	  (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm")
+	  (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re"))
+	 (const_int 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   test{l}\t{%k1, %k0|%k0, %k1}
+   test{l}\t{%k1, %k0|%k0, %k1}
+   test{q}\t{%1, %0|%0, %1}
+   test{q}\t{%1, %0|%0, %1}
+   test{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,0,1,1")
+   (set_attr "mode" "SI,SI,DI,DI,DI")])
+
+(define_insn "*testqi_1_maybe_si"
+  [(set (reg FLAGS_REG)
+        (compare
+	  (and:QI
+	    (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
+	    (match_operand:QI 1 "general_operand" "n,n,qn,n"))
+	  (const_int 0)))]
+   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+    && ix86_match_ccmode (insn,
+ 			 CONST_INT_P (operands[1])
+ 			 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
+{
+  if (which_alternative == 3)
+    {
+      if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
+	operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+      return "test{l}\t{%1, %k0|%k0, %1}";
+    }
+  return "test{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1,1")
+   (set_attr "mode" "QI,QI,QI,SI")
+   (set_attr "pent_pair" "uv,np,uv,np")])
+
+(define_insn "*test<mode>_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	 (and:SWI124
+	  (match_operand:SWI124 0 "nonimmediate_operand" "%!*a,<r>,<r>m")
+	  (match_operand:SWI124 1 "<general_operand>" "<i>,<i>,<r><i>"))
+	 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "test{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ext_ccno_0"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 1 "const_int_operand"))
+	  (const_int 0)))])
+
+(define_insn "*testqi_ext_0"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 1 "const_int_operand" "n"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "pent_pair" "np")])
+
+(define_insn "*testqi_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q,Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extend:SI
+	      (match_operand:QI 1 "nonimmediate_x64nomem_operand" "Q,m")))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+;; Combine likes to form bit extractions for some tests.  Humor it.
+(define_insn "*testqi_ext_3"
+  [(set (reg FLAGS_REG)
+	(compare (zero_extract:SWI48
+		   (match_operand 0 "nonimmediate_operand" "rm")
+		   (match_operand:SWI48 1 "const_int_operand")
+		   (match_operand:SWI48 2 "const_int_operand"))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ((TARGET_64BIT && GET_MODE (operands[0]) == DImode)
+       || GET_MODE (operands[0]) == SImode
+       || GET_MODE (operands[0]) == HImode
+       || GET_MODE (operands[0]) == QImode)
+   /* Ensure that resulting mask is zero or sign extended operand.  */
+   && INTVAL (operands[2]) >= 0
+   && ((INTVAL (operands[1]) > 0
+	&& INTVAL (operands[1]) + INTVAL (operands[2]) <= 32)
+       || (<MODE>mode == DImode
+	   && INTVAL (operands[1]) > 32
+	   && INTVAL (operands[1]) + INTVAL (operands[2]) == 64))"
+  "#")
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+        (match_operator 1 "compare_operator"
+	  [(zero_extract
+	     (match_operand 2 "nonimmediate_operand")
+	     (match_operand 3 "const_int_operand")
+	     (match_operand 4 "const_int_operand"))
+	   (const_int 0)]))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+{
+  rtx val = operands[2];
+  HOST_WIDE_INT len = INTVAL (operands[3]);
+  HOST_WIDE_INT pos = INTVAL (operands[4]);
+  HOST_WIDE_INT mask;
+  enum machine_mode mode, submode;
+
+  mode = GET_MODE (val);
+  if (MEM_P (val))
+    {
+      /* ??? Combine likes to put non-volatile mem extractions in QImode
+	 no matter the size of the test.  So find a mode that works.  */
+      if (! MEM_VOLATILE_P (val))
+	{
+	  mode = smallest_mode_for_size (pos + len, MODE_INT);
+	  val = adjust_address (val, mode, 0);
+	}
+    }
+  else if (GET_CODE (val) == SUBREG
+	   && (submode = GET_MODE (SUBREG_REG (val)),
+	       GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+	   && pos + len <= GET_MODE_BITSIZE (submode)
+	   && GET_MODE_CLASS (submode) == MODE_INT)
+    {
+      /* Narrow a paradoxical subreg to prevent partial register stalls.  */
+      mode = submode;
+      val = SUBREG_REG (val);
+    }
+  else if (mode == HImode && pos + len <= 8)
+    {
+      /* Small HImode tests can be converted to QImode.  */
+      mode = QImode;
+      val = gen_lowpart (QImode, val);
+    }
+
+  if (len == HOST_BITS_PER_WIDE_INT)
+    mask = -1;
+  else
+    mask = ((HOST_WIDE_INT)1 << len) - 1;
+  mask <<= pos;
+
+  operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode));
+})
+
+;; Convert HImode/SImode test instructions with immediate to QImode ones.
+;; i386 does not allow to encode test with 8bit sign extended immediate, so
+;; this is relatively important trick.
+;; Do the conversion only post-reload to avoid limiting of the register class
+;; to QI regs.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand 2 "register_operand")
+	        (match_operand 3 "const_int_operand"))
+	   (const_int 0)]))]
+   "reload_completed
+    && QI_REG_P (operands[2])
+    && GET_MODE (operands[2]) != QImode
+    && ((ix86_match_ccmode (insn, CCZmode)
+    	 && !(INTVAL (operands[3]) & ~(255 << 8)))
+	|| (ix86_match_ccmode (insn, CCNOmode)
+	    && !(INTVAL (operands[3]) & ~(127 << 8))))"
+  [(set (match_dup 0)
+	(match_op_dup 1
+	  [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8))
+		   (match_dup 3))
+	   (const_int 0)]))]
+{
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);
+})
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand 2 "nonimmediate_operand")
+	        (match_operand 3 "const_int_operand"))
+	   (const_int 0)]))]
+   "reload_completed
+    && GET_MODE (operands[2]) != QImode
+    && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
+    && ((ix86_match_ccmode (insn, CCZmode)
+	 && !(INTVAL (operands[3]) & ~255))
+	|| (ix86_match_ccmode (insn, CCNOmode)
+	    && !(INTVAL (operands[3]) & ~127)))"
+  [(set (match_dup 0)
+	(match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+			 (const_int 0)]))]
+{
+  operands[2] = gen_lowpart (QImode, operands[2]);
+  operands[3] = gen_lowpart (QImode, operands[3]);
+})
+
+(define_split
+  [(set (match_operand:SWI12 0 "mask_reg_operand")
+	(any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand")
+			 (match_operand:SWI12 2 "mask_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F && reload_completed"
+  [(set (match_dup 0)
+	(any_logic:SWI12 (match_dup 1)
+			 (match_dup 2)))])
+
+(define_insn "*k<logic><mode>"
+  [(set (match_operand:SWI12 0 "mask_reg_operand" "=k")
+	(any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k")
+			 (match_operand:SWI12 2 "mask_reg_operand" "k")))]
+  "TARGET_AVX512F"
+  "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+  [(set_attr "mode" "<MODE>")
+   (set_attr "type" "msklog")
+   (set_attr "prefix" "vex")])
+
+;; %%% This used to optimize known byte-wide and operations to memory,
+;; and sometimes to QImode registers.  If this is considered useful,
+;; it should be done with splitters.
+
+(define_expand "and<mode>3"
+  [(set (match_operand:SWIM 0 "nonimmediate_operand")
+	(and:SWIM (match_operand:SWIM 1 "nonimmediate_operand")
+		  (match_operand:SWIM 2 "<general_szext_operand>")))]
+  ""
+{
+  enum machine_mode mode = <MODE>mode;
+  rtx (*insn) (rtx, rtx);
+
+  if (CONST_INT_P (operands[2]) && REG_P (operands[0]))
+    {
+      HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+      if (ival == (HOST_WIDE_INT) 0xffffffff)
+	mode = SImode;
+      else if (ival == 0xffff)
+	mode = HImode;
+      else if (ival == 0xff)
+	mode = QImode;
+      }
+
+  if (mode == <MODE>mode)
+    {
+      ix86_expand_binary_operator (AND, <MODE>mode, operands);
+      DONE;
+    }
+
+  if (<MODE>mode == DImode)
+    insn = (mode == SImode)
+	   ? gen_zero_extendsidi2
+	   : (mode == HImode)
+	   ? gen_zero_extendhidi2
+	   : gen_zero_extendqidi2;
+  else if (<MODE>mode == SImode)
+    insn = (mode == HImode)
+	   ? gen_zero_extendhisi2
+	   : gen_zero_extendqisi2;
+  else if (<MODE>mode == HImode)
+    insn = gen_zero_extendqihi2;
+  else
+    gcc_unreachable ();
+
+  emit_insn (insn (operands[0], gen_lowpart (mode, operands[1])));
+  DONE;
+})
+
+(define_insn "*anddi_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(and:DI
+	 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
+	 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "#";
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (get_attr_mode (insn) == MODE_SI)
+	return "and{l}\t{%k2, %k0|%k0, %k2}";
+      else
+	return "and{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,alu,imovx")
+   (set_attr "length_immediate" "*,*,*,0")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "type" "imovx")
+	    (and (match_test "INTVAL (operands[2]) == 0xff")
+		 (match_operand 1 "ext_QIreg_operand")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "SI,DI,DI,SI")])
+
+(define_insn "*andsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
+		(match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "#";
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "and{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,imovx")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "type" "imovx")
+	    (and (match_test "INTVAL (operands[2]) == 0xff")
+		 (match_operand 1 "ext_QIreg_operand")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "length_immediate" "*,*,0")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "x86_64_general_operand" "rme"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya,!k")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm,k")
+		(match_operand:HI 2 "general_operand" "rn,rm,L,k")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "#";
+
+    case TYPE_MSKLOG:
+      return "kandw\t{%2, %1, %0|%0, %1, %2}";
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "and{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,imovx,msklog")
+   (set_attr "length_immediate" "*,*,0,*")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "type" "imovx")
+	    (match_operand 1 "ext_QIreg_operand"))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "HI,HI,SI,HI")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*andqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!k")
+	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
+		(match_operand:QI 2 "general_operand" "qn,qmn,rn,k")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, QImode, operands)"
+  "@
+   and{b}\t{%2, %0|%0, %2}
+   and{b}\t{%2, %0|%0, %2}
+   and{l}\t{%k2, %k0|%k0, %k2}
+   kandw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "alu,alu,alu,msklog")
+   (set_attr "mode" "QI,QI,SI,HI")])
+
+(define_insn "*andqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "qn,qmn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "and{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "kandn<mode>"
+  [(set (match_operand:SWI12 0 "register_operand" "=r,&r,!k")
+	(and:SWI12
+	  (not:SWI12
+	    (match_operand:SWI12 1 "register_operand" "r,0,k"))
+	  (match_operand:SWI12 2 "register_operand" "r,r,k")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F"
+  "@
+   andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
+   #
+   kandnw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "bmi,*,avx512f")
+   (set_attr "type" "bitmanip,*,msklog")
+   (set_attr "prefix" "*,*,vex")
+   (set_attr "btver2_decode" "direct,*,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:SWI12 0 "general_reg_operand")
+	(and:SWI12
+	  (not:SWI12
+	    (match_dup 0))
+	  (match_operand:SWI12 1 "general_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F && !TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+	(not:HI (match_dup 0)))
+   (parallel [(set (match_dup 0)
+		   (and:HI (match_dup 0)
+			   (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Turn *anddi_1 into *andsi_1_zext if possible.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
+		(match_operand:DI 2 "x86_64_zext_immediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_split
+  [(set (match_operand:SWI248 0 "register_operand")
+	(and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
+		    (match_operand:SWI248 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT ival = INTVAL (operands[2]);
+  enum machine_mode mode;
+  rtx (*insn) (rtx, rtx);
+
+  if (ival == (HOST_WIDE_INT) 0xffffffff)
+    mode = SImode;
+  else if (ival == 0xffff)
+    mode = HImode;
+  else
+    {
+      gcc_assert (ival == 0xff);
+      mode = QImode;
+    }
+
+  if (<MODE>mode == DImode)
+    insn = (mode == SImode)
+	   ? gen_zero_extendsidi2
+	   : (mode == HImode)
+	   ? gen_zero_extendhidi2
+	   : gen_zero_extendqidi2;
+  else
+    {
+      if (<MODE>mode != SImode)
+	/* Zero extend to SImode to avoid partial register stalls.  */
+	operands[0] = gen_lowpart (SImode, operands[0]);
+
+      insn = (mode == HImode)
+	     ? gen_zero_extendhisi2
+	     : gen_zero_extendqisi2;
+    }
+  emit_insn (insn (operands[0], gen_lowpart (mode, operands[1])));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(and (match_dup 0)
+	     (const_int -65536)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
+    || optimize_function_for_size_p (cfun)"
+  [(set (strict_low_part (match_dup 1)) (const_int 0))]
+  "operands[1] = gen_lowpart (HImode, operands[0]);")
+
+(define_split
+  [(set (match_operand 0 "ext_register_operand")
+	(and (match_dup 0)
+	     (const_int -256)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  [(set (strict_low_part (match_dup 1)) (const_int 0))]
+  "operands[1] = gen_lowpart (QImode, operands[0]);")
+
+(define_split
+  [(set (match_operand 0 "ext_register_operand")
+	(and (match_dup 0)
+	     (const_int -65281)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  [(parallel [(set (zero_extract:SI (match_dup 0)
+				    (const_int 8)
+				    (const_int 8))
+		   (xor:SI
+		     (zero_extract:SI (match_dup 0)
+				      (const_int 8)
+				      (const_int 8))
+		     (zero_extract:SI (match_dup 0)
+				      (const_int 8)
+				      (const_int 8))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);")
+
+(define_insn "*anddi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	 (and:DI
+	  (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
+	  (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
+	(and:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && ix86_match_ccmode
+	(insn,
+	 /* If we are going to emit andl instead of andq, and the operands[2]
+	    constant might have the SImode sign bit set, make sure the sign
+	    flag isn't tested, because the instruction will set the sign flag
+	    based on bit 31 rather than bit 63.  If it isn't CONST_INT,
+	    conservatively assume it might have bit 31 set.  */
+	 (satisfies_constraint_Z (operands[2])
+	  && (!CONST_INT_P (operands[2])
+	      || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
+	 ? CCZmode : CCNOmode)
+   && ix86_binary_operator_ok (AND, DImode, operands)"
+  "@
+   and{l}\t{%k2, %k0|%k0, %k2}
+   and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI,DI,DI")])
+
+(define_insn "*andqi_2_maybe_si"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		  (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		  (match_operand:QI 2 "general_operand" "qmn,qn,n"))
+		 (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+	(and:QI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (AND, QImode, operands)
+   && ix86_match_ccmode (insn,
+			 CONST_INT_P (operands[2])
+			 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
+{
+  if (which_alternative == 2)
+    {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+        operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
+      return "and{l}\t{%2, %k0|%k0, %2}";
+    }
+  return "and{b}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*and<mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare (and:SWI124
+		  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
+		  (match_operand:SWI124 2 "<general_operand>" "<g>,<r><i>"))
+		 (const_int 0)))
+   (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m")
+	(and:SWI124 (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+  "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (and:SI
+		  (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "x86_64_general_operand" "rme"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andqi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		   (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+		   (match_operand:QI 1 "nonimmediate_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(and:QI (match_dup 0) (match_dup 1)))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "and{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+;; ??? A bug in recog prevents it from recognizing a const_int as an
+;; operand to zero_extend in andqi_ext_1.  It was checking explicitly
+;; for a QImode operand, which of course failed.
+(define_insn "andqi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+;; Generated by peephole translating test to and.  This shows up
+;; often in fp comparisons.
+(define_insn "*andqi_ext_0_cc"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 2 "const_int_operand" "n"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 8)
+	    (const_int 8))
+	  (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0,0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "nonimmediate_x64nomem_operand" "Q,m"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "%0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extract:SI
+	    (match_operand 2 "ext_register_operand" "Q")
+	    (const_int 8)
+	    (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+;; Convert wide AND instructions with immediate operand to shorter QImode
+;; equivalents when possible.
+;; Don't do the splitting with memory operands, since it introduces risk
+;; of memory mismatch stalls.  We may want to do the splitting for optimizing
+;; for size, but that can (should?) be handled by generic code instead.
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(and (match_operand 1 "register_operand")
+	     (match_operand 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(~INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (and:SI (zero_extract:SI (match_dup 1)
+					    (const_int 8) (const_int 8))
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);
+})
+
+;; Since AND can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is not set.
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(and (match_operand 1 "general_operand")
+	     (match_operand 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(~INTVAL (operands[2]) & ~255)
+    && !(INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (and:QI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (QImode, operands[0]);
+  operands[1] = gen_lowpart (QImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+;; Logical inclusive and exclusive OR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:SWIM 0 "nonimmediate_operand")
+	(any_or:SWIM (match_operand:SWIM 1 "nonimmediate_operand")
+		     (match_operand:SWIM 2 "<general_operand>")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*<code><mode>_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
+	(any_or:SWI48
+	 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+	 (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>hi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k")
+	(any_or:HI
+	 (match_operand:HI 1 "nonimmediate_operand" "%0,0,k")
+	 (match_operand:HI 2 "general_operand" "<g>,r<i>,k")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, HImode, operands)"
+  "@
+  <logic>{w}\t{%2, %0|%0, %2}
+  <logic>{w}\t{%2, %0|%0, %2}
+  k<logic>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "alu,alu,msklog")
+   (set_attr "mode" "HI")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*<code>qi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!k")
+	(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
+		   (match_operand:QI 2 "general_operand" "qmn,qn,rn,k")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, QImode, operands)"
+  "@
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{l}\t{%k2, %k0|%k0, %k2}
+   k<logic>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "alu,alu,alu,msklog")
+   (set_attr "mode" "QI,QI,SI,HI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*<code>si_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>si_1_zext_imm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_or:DI
+	 (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+	 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>qi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m"))
+	(any_or:QI (match_dup 0)
+		   (match_operand:QI 1 "general_operand" "qmn,qn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "<logic>{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code><mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SWI
+		  (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+		  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))
+		 (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
+	(any_or:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "kxnor<mode>"
+  [(set (match_operand:SWI12 0 "register_operand" "=r,!k")
+	(not:SWI12
+	  (xor:SWI12
+	    (match_operand:SWI12 1 "register_operand" "0,k")
+	    (match_operand:SWI12 2 "register_operand" "r,k"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F"
+  "@
+   #
+   kxnorw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "*,msklog")
+   (set_attr "prefix" "*,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:SWI12 0 "general_reg_operand")
+	(not:SWI12
+	  (xor:SWI12
+	    (match_dup 0)
+	    (match_operand:SWI12 1 "general_reg_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F && reload_completed"
+   [(parallel [(set (match_dup 0)
+		    (xor:HI (match_dup 0)
+			    (match_dup 1)))
+	       (clobber (reg:CC FLAGS_REG))])
+    (set (match_dup 0)
+	 (not:HI (match_dup 0)))])
+
+(define_insn "kortestzhi"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (ior:HI
+	    (match_operand:HI 0 "register_operand" "k")
+	    (match_operand:HI 1 "register_operand" "k"))
+	  (const_int 0)))]
+  "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
+  "kortestw\t{%1, %0|%0, %1}"
+  [(set_attr "mode" "HI")
+   (set_attr "type" "msklog")
+   (set_attr "prefix" "vex")])
+
+(define_insn "kortestchi"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (ior:HI
+	    (match_operand:HI 0 "register_operand" "k")
+	    (match_operand:HI 1 "register_operand" "k"))
+	  (const_int -1)))]
+  "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)"
+  "kortestw\t{%1, %0|%0, %1}"
+  [(set_attr "mode" "HI")
+   (set_attr "type" "msklog")
+   (set_attr "prefix" "vex")])
+
+(define_insn "kunpckhi"
+  [(set (match_operand:HI 0 "register_operand" "=k")
+	(ior:HI
+	  (ashift:HI
+	    (match_operand:HI 1 "register_operand" "k")
+	    (const_int 8))
+	  (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
+  "TARGET_AVX512F"
+  "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mode" "HI")
+   (set_attr "type" "msklog")
+   (set_attr "prefix" "vex")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*<code>si_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			    (match_operand:SI 2 "x86_64_general_operand" "rme"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>si_2_zext_imm"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SI
+		  (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>qi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+			    (match_operand:QI 1 "general_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(any_or:QI (match_dup 0) (match_dup 1)))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "<logic>{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code><mode>_3"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SWI
+		  (match_operand:SWI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SWI 2 "<general_operand>" "<g>"))
+		 (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>qi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code>qi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0,0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "nonimmediate_x64nomem_operand" "Q,m"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code>qi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+	  		   (const_int 8)
+			   (const_int 8))
+	  (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+	  		   (const_int 8)
+			   (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(any_or (match_operand 1 "register_operand")
+		(match_operand 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (any_or:SI (zero_extract:SI (match_dup 1)
+					       (const_int 8) (const_int 8))
+			      (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);
+})
+
+;; Since OR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(any_or (match_operand 1 "general_operand")
+		(match_operand 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~255)
+    && (INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (any_or:QI (match_dup 1)
+			      (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (QImode, operands[0]);
+  operands[1] = gen_lowpart (QImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_expand "xorqi_cc_ext_1"
+  [(parallel [
+     (set (reg:CCNO FLAGS_REG)
+	  (compare:CCNO
+	    (xor:SI
+	      (zero_extract:SI
+		(match_operand 1 "ext_register_operand")
+		(const_int 8)
+		(const_int 8))
+	      (match_operand:QI 2 "const_int_operand"))
+	    (const_int 0)))
+     (set (zero_extract:SI (match_operand 0 "ext_register_operand")
+			   (const_int 8)
+			   (const_int 8))
+	  (xor:SI
+	    (zero_extract:SI
+	     (match_dup 1)
+	     (const_int 8)
+	     (const_int 8))
+	    (match_dup 2)))])])
+
+(define_insn "*xorqi_cc_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0,0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand:QI 2 "general_x64nomem_operand" "Qn,m"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI
+	   (match_dup 1)
+	   (const_int 8)
+	   (const_int 8))
+	  (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "isa" "*,nox64")
+   (set_attr "type" "alu")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+;; Negation instructions
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand")
+	(neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
+  ""
+  "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*neg<dwi>2_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+	(neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+	  (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0)))
+     (set (match_dup 0) (neg:DWIH (match_dup 1)))])
+   (parallel
+    [(set (match_dup 2)
+	  (plus:DWIH (match_dup 3)
+		     (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+				(const_int 0))))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_dup 2)
+	  (neg:DWIH (match_dup 2)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
+
+(define_insn "*neg<mode>2_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
+  "neg{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+;; Combine is quite creative about this pattern.
+(define_insn "*negsi2_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	  (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0")
+			     (const_int 32)))
+	(const_int 32)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*neg<mode>2_cmpz"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		   (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(neg:SWI (match_dup 1)))]
+  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
+  "neg{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*negsi2_cmpz_zext"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (lshiftrt:DI
+	    (neg:DI (ashift:DI
+		      (match_operand:DI 1 "register_operand" "0")
+		      (const_int 32)))
+	    (const_int 32))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
+					(const_int 32)))
+		     (const_int 32)))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; Negate with jump on overflow.
+(define_expand "negv<mode>3"
+  [(parallel [(set (reg:CCO FLAGS_REG)
+		   (ne:CCO (match_operand:SWI 1 "register_operand")
+			   (match_dup 3)))
+	      (set (match_operand:SWI 0 "register_operand")
+		   (neg:SWI (match_dup 1)))])
+   (set (pc) (if_then_else
+	       (eq (reg:CCO FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 2))
+	       (pc)))]
+  ""
+{
+  operands[3]
+    = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
+		    <MODE>mode);
+})
+
+(define_insn "*negv<mode>3"
+  [(set (reg:CCO FLAGS_REG)
+	(ne:CCO (match_operand:SWI 1 "nonimmediate_operand" "0")
+		(match_operand:SWI 2 "const_int_operand")))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(neg:SWI (match_dup 1)))]
+  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
+   && mode_signbit_p (<MODE>mode, operands[2])"
+  "neg{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+;; Changing of sign for FP values is doable using integer unit too.
+
+(define_expand "<code><mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand")
+	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*absneg<mode>2_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
+	(match_operator:MODEF 3 "absneg_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
+   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (<MODE>mode)"
+  "#")
+
+(define_insn "*absneg<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r")
+	(match_operator:MODEF 3 "absneg_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0 ,x,0")]))
+   (use (match_operand:<ssevecmode> 2 "register_operand" "xm,0,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "#")
+
+(define_insn "*absneg<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
+	(match_operator:X87MODEF 3 "absneg_operator"
+	  [(match_operand:X87MODEF 1 "register_operand" "0,0")]))
+   (use (match_operand 2))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "#")
+
+(define_expand "<code>tf2"
+  [(set (match_operand:TF 0 "register_operand")
+	(absneg:TF (match_operand:TF 1 "register_operand")))]
+  "TARGET_SSE"
+  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
+
+(define_insn "*absnegtf2_sse"
+  [(set (match_operand:TF 0 "register_operand" "=x,x")
+	(match_operator:TF 3 "absneg_operator"
+	  [(match_operand:TF 1 "register_operand" "0,x")]))
+   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSE"
+  "#")
+
+;; Splitters for fp abs and neg.
+
+(define_split
+  [(set (match_operand 0 "fp_register_operand")
+	(match_operator 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(match_operator 3 "absneg_operator"
+	  [(match_operand 1 "register_operand")]))
+   (use (match_operand 2 "nonimmediate_operand"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum machine_mode vmode = GET_MODE (operands[2]);
+  rtx tmp;
+
+  operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0);
+  operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0);
+  if (operands_match_p (operands[0], operands[2]))
+    {
+      tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+  if (GET_CODE (operands[3]) == ABS)
+    tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
+  else
+    tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
+  operands[3] = tmp;
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand")
+	(match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand:V4SF 2))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  if (GET_CODE (operands[1]) == ABS)
+    {
+      tmp = gen_int_mode (0x7fffffff, SImode);
+      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+    }
+  else
+    {
+      tmp = gen_int_mode (0x80000000, SImode);
+      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+    }
+  operands[1] = tmp;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand")
+	(match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  if (TARGET_64BIT)
+    {
+      tmp = gen_lowpart (DImode, operands[0]);
+      tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
+      operands[0] = tmp;
+
+      if (GET_CODE (operands[1]) == ABS)
+	tmp = const0_rtx;
+      else
+	tmp = gen_rtx_NOT (DImode, tmp);
+    }
+  else
+    {
+      operands[0] = gen_highpart (SImode, operands[0]);
+      if (GET_CODE (operands[1]) == ABS)
+	{
+	  tmp = gen_int_mode (0x7fffffff, SImode);
+	  tmp = gen_rtx_AND (SImode, operands[0], tmp);
+	}
+      else
+	{
+	  tmp = gen_int_mode (0x80000000, SImode);
+	  tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+	}
+    }
+  operands[1] = tmp;
+})
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand")
+	(match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  operands[0] = gen_rtx_REG (SImode,
+			     true_regnum (operands[0])
+			     + (TARGET_64BIT ? 1 : 2));
+  if (GET_CODE (operands[1]) == ABS)
+    {
+      tmp = GEN_INT (0x7fff);
+      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+    }
+  else
+    {
+      tmp = GEN_INT (0x8000);
+      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+    }
+  operands[1] = tmp;
+})
+
+;; Conditionalize these after reload. If they match before reload, we
+;; lose the clobber and ability to use integer instructions.
+
+(define_insn "*<code><mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
+  "TARGET_80387
+   && (reload_completed
+       || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(absneg:DF (float_extend:DF
+		     (match_operand:SF 1 "register_operand" "0"))))]
+  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "DF")])
+
+(define_insn "*<code>extendsfxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(absneg:XF (float_extend:XF
+		     (match_operand:SF 1 "register_operand" "0"))))]
+  "TARGET_80387"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "XF")])
+
+(define_insn "*<code>extenddfxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(absneg:XF (float_extend:XF
+		     (match_operand:DF 1 "register_operand" "0"))))]
+  "TARGET_80387"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "XF")])
+
+;; Copysign instructions
+
+(define_mode_iterator CSGNMODE [SF DF TF])
+(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")])
+
+(define_expand "copysign<mode>3"
+  [(match_operand:CSGNMODE 0 "register_operand")
+   (match_operand:CSGNMODE 1 "nonmemory_operand")
+   (match_operand:CSGNMODE 2 "register_operand")]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE && (<MODE>mode == TFmode))"
+  "ix86_expand_copysign (operands); DONE;")
+
+(define_insn_and_split "copysign<mode>3_const"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
+	(unspec:CSGNMODE
+	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
+	   (match_operand:CSGNMODE 2 "register_operand" "0")
+	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
+	  UNSPEC_COPYSIGN))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE && (<MODE>mode == TFmode))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_copysign_const (operands); DONE;")
+
+(define_insn "copysign<mode>3_var"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
+	(unspec:CSGNMODE
+	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x")
+	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x")
+	   (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
+	   (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
+	  UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE && (<MODE>mode == TFmode))"
+  "#")
+
+(define_split
+  [(set (match_operand:CSGNMODE 0 "register_operand")
+	(unspec:CSGNMODE
+	  [(match_operand:CSGNMODE 2 "register_operand")
+	   (match_operand:CSGNMODE 3 "register_operand")
+	   (match_operand:<CSGNVMODE> 4)
+	   (match_operand:<CSGNVMODE> 5)]
+	  UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1))]
+  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    || (TARGET_SSE && (<MODE>mode == TFmode)))
+   && reload_completed"
+  [(const_int 0)]
+  "ix86_split_copysign_var (operands); DONE;")
+
+;; One complement instructions
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SWIM 0 "nonimmediate_operand")
+	(not:SWIM (match_operand:SWIM 1 "nonimmediate_operand")))]
+  ""
+  "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
+
+(define_insn "*one_cmpl<mode>2_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
+  "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+  "not{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*one_cmplhi2_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,!k")
+	(not:HI (match_operand:HI 1 "nonimmediate_operand" "0,k")))]
+  "ix86_unary_operator_ok (NOT, HImode, operands)"
+  "@
+   not{w}\t%0
+   knotw\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "*,avx512f")
+   (set_attr "type" "negnot,msklog")
+   (set_attr "prefix" "*,vex")
+   (set_attr "mode" "HI")])
+
+;; %%% Potential partial reg stall on alternative 1.  What to do?
+(define_insn "*one_cmplqi2_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k")
+	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
+  "ix86_unary_operator_ok (NOT, QImode, operands)"
+  "@
+   not{b}\t%0
+   not{l}\t%k0
+   knotw\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "*,*,avx512f")
+   (set_attr "type" "negnot,negnot,msklog")
+   (set_attr "prefix" "*,*,vex")
+   (set_attr "mode" "QI,SI,QI")])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (not:SI (match_operand:SI 1 "register_operand" "0"))))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "not{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmpl<mode>2_2"
+  [(set (reg FLAGS_REG)
+	(compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(not:SWI (match_dup 1)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 2 "compare_operator"
+	  [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
+	   (const_int 0)]))
+   (set (match_operand:SWI 1 "nonimmediate_operand")
+	(not:SWI (match_dup 3)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
+				    (const_int 0)]))
+	      (set (match_dup 1)
+		   (xor:SWI (match_dup 3) (const_int -1)))])])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (not:SI (match_dup 1))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 2 "compare_operator"
+	  [(not:SI (match_operand:SI 3 "register_operand"))
+	   (const_int 0)]))
+   (set (match_operand:DI 1 "register_operand")
+	(zero_extend:DI (not:SI (match_dup 3))))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+				    (const_int 0)]))
+	      (set (match_dup 1)
+		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
+
+;; Shift instructions
+
+;; DImode shifts are implemented using the i386 "shift double" opcode,
+;; which is written as "sh[lr]d[lw] imm,reg,reg/mem".  If the shift count
+;; is variable, then the count is in %cl and the "imm" operand is dropped
+;; from the assembler input.
+;;
+;; This instruction shifts the target reg/mem as usual, but instead of
+;; shifting in zeros, bits are shifted in from reg operand.  If the insn
+;; is a left shift double, bits are taken from the high order bits of
+;; reg, else if the insn is a shift right double, bits are taken from the
+;; low order bits of reg.  So if %eax is "1234" and %edx is "5678",
+;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
+;;
+;; Since sh[lr]d does not change the `reg' operand, that is done
+;; separately, making all shifts emit pairs of shift double and normal
+;; shift.  Since sh[lr]d does not shift more than 31 bits, and we wish to
+;; support a 63 bit shift, each shift where the count is in a reg expands
+;; to a pair of shifts, a branch, a shift by 32 and a label.
+;;
+;; If the shift count is a constant, we need never emit more than one
+;; shift pair, instead using moves and sign extension for counts greater
+;; than 31.
+
+(define_expand "ashl<mode>3"
+  [(set (match_operand:SDWIM 0 "<shift_operand>")
+	(ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
+		      (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
+
+(define_insn "*ashl<mode>3_doubleword"
+  [(set (match_operand:DWI 0 "register_operand" "=&r,r")
+	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "n,0")
+		    (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:DWI 0 "register_operand")
+	(ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
+		    (match_operand:QI 2 "nonmemory_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
+
+;; By default we don't ask for a scratch register, because when DWImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+
+(define_peephole2
+  [(match_scratch:DWIH 3 "r")
+   (parallel [(set (match_operand:<DWI> 0 "register_operand")
+		   (ashift:<DWI>
+		     (match_operand:<DWI> 1 "nonmemory_operand")
+		     (match_operand:QI 2 "nonmemory_operand")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+
+(define_insn "x86_64_shld"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
+		(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		  (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_expand "x86_shift<mode>_adj_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
+			     (match_dup 4))
+		     (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand")
+        (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			    (match_operand:SWI48 1 "register_operand")
+			    (match_dup 0)))
+   (set (match_dup 1)
+	(if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			    (match_operand:SWI48 3 "register_operand")
+			    (match_dup 1)))]
+  "TARGET_CMOVE"
+  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
+
+(define_expand "x86_shift<mode>_adj_2"
+  [(use (match_operand:SWI48 0 "register_operand"))
+   (use (match_operand:SWI48 1 "register_operand"))
+   (use (match_operand:QI 2 "register_operand"))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2],
+			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  ix86_expand_clear (operands[1]);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+;; Avoid useless masking of count operand.
+(define_insn "*ashl<mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(ashift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+{
+  return "sal{<imodesuffix>}\t{%b2, %0|%0, %b2}";
+}
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi2_ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		      (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "shlx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_ISHIFTX:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,bmi2")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:QI 2 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "shlx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_ISHIFTX:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%k0, %k0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,bmi2")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
+            (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
+		     (match_operand:QI 2 "register_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_insn "*ashlhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "HI,SI")])
+
+;; %%% Potential partial reg stall on alternative 1.  What to do?
+(define_insn "*ashlqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+        return "add{l}\t%k0, %k0";
+      else
+        return "add{b}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t%k0";
+	  else
+	    return "sal{b}\t%0";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sal{b}\t{%2, %0|%0, %2}";
+	}
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI,SI,SI")])
+
+(define_insn "*ashlqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(ashift:QI (match_dup 0)
+		   (match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[1] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[1] == const1_rtx);
+      return "add{b}\t%0, %0";
+
+    default:
+      if (operands[1] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{b}\t%0";
+      else
+	return "sal{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 1 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift1")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift1")
+		 (and (match_operand 1 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+;; Convert ashift to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(ashift (match_operand 1 "index_register_operand")
+                (match_operand:QI 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx pat;
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    { 
+      mode = SImode; 
+      operands[0] = gen_lowpart (mode, operands[0]);
+      operands[1] = gen_lowpart (mode, operands[1]);
+    }
+
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), mode);
+
+  pat = gen_rtx_MULT (mode, operands[1], operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Convert ashift to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "index_register_operand")
+		     (match_operand:QI 2 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), SImode);
+})
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashl<mode>3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(ashift:SWI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashlsi3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%k0, %k0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashl<mode>3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
+		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand")
+		      (ior (match_test "TARGET_SHIFT1")
+			   (match_test "optimize_function_for_size_p (cfun)")))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; See comment above `ashl<mode>3' about how this works.
+
+(define_expand "<shift_insn><mode>3"
+  [(set (match_operand:SDWIM 0 "<shift_operand>")
+	(any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
+			   (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Avoid useless masking of count operand.
+(define_insn "*<shift_insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+{
+  return "<shift>{<imodesuffix>}\t{%b2, %0|%0, %b2}";
+}
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*<shift_insn><mode>3_doubleword"
+  [(set (match_operand:DWI 0 "register_operand" "=r")
+	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
+  [(const_int 0)]
+  "ix86_split_<shift_insn> (operands, NULL_RTX, <MODE>mode); DONE;"
+  [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DWImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+
+(define_peephole2
+  [(match_scratch:DWIH 3 "r")
+   (parallel [(set (match_operand:<DWI> 0 "register_operand")
+		   (any_shiftrt:<DWI>
+		     (match_operand:<DWI> 1 "register_operand")
+		     (match_operand:QI 2 "nonmemory_operand")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_<shift_insn> (operands, operands[3], <DWI>mode); DONE;")
+
+(define_insn "x86_64_shrd"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashiftrt:DI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
+		(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		  (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shrd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashiftrt:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "ashrdi3_cvt"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:QI 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 63
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "@
+   {cqto|cqo}
+   sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "DI")])
+
+(define_insn "ashrsi3_cvt"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:QI 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   {cltd|cdq}
+   sar{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cvt_zext"
+  [(set (match_operand:DI 0 "register_operand" "=*d,r")
+	(zero_extend:DI
+	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+		       (match_operand:QI 2 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   {cltd|cdq}
+   sar{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_expand "x86_shift<mode>_adj_3"
+  [(use (match_operand:SWI48 0 "register_operand"))
+   (use (match_operand:SWI48 1 "register_operand"))
+   (use (match_operand:QI 2 "register_operand"))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2],
+			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
+				  GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_insn "*bmi2_<shift_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			   (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "<shift>x\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shift_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shift>{<imodesuffix>}\t%0";
+      else
+	return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+			   (match_operand:QI 2 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_<shift_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			  (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shift_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shift>{l}\t%k0";
+      else
+	return "<shift>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "*,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
+			  (match_operand:QI 2 "register_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_insn "*<shift_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shift>{<imodesuffix>}\t%0";
+  else
+    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shift_insn>qi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(any_shiftrt:QI (match_dup 0)
+			(match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+	&& TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shift>{b}\t%0";
+  else
+    return "<shift>{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ishift1")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*<shift_insn><mode>3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (any_shiftrt:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shift>{<imodesuffix>}\t%0";
+  else
+    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shift_insn>si3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shift>{l}\t%k0";
+  else
+    return "<shift>{l}\t{%2, %k0|%k0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shift_insn><mode>3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (any_shiftrt:SWI
+	    (match_operand:SWI 1 "register_operand" "0")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shift>{<imodesuffix>}\t%0";
+  else
+    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; Rotate instructions
+
+(define_expand "<rotate_insn>ti3"
+  [(set (match_operand:TI 0 "register_operand")
+	(any_rotate:TI (match_operand:TI 1 "register_operand")
+		       (match_operand:QI 2 "nonmemory_operand")))]
+  "TARGET_64BIT"
+{
+  if (const_1_to_63_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>ti3_doubleword
+		(operands[0], operands[1], operands[2]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_expand "<rotate_insn>di3"
+  [(set (match_operand:DI 0 "shiftdi_operand")
+	(any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
+		       (match_operand:QI 2 "nonmemory_operand")))]
+ ""
+{
+  if (TARGET_64BIT)
+    ix86_expand_binary_operator (<CODE>, DImode, operands);
+  else if (const_1_to_31_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>di3_doubleword
+		(operands[0], operands[1], operands[2]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_expand "<rotate_insn><mode>3"
+  [(set (match_operand:SWIM124 0 "nonimmediate_operand")
+	(any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
+			    (match_operand:QI 2 "nonmemory_operand")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Avoid useless masking of count operand.
+(define_insn "*<rotate_insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+{
+  return "<rotate>{<imodesuffix>}\t{%b2, %0|%0, %b2}";
+}
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "<MODE>")])
+
+;; Implement rotation using two double-precision
+;; shift instructions and a scratch register.
+
+(define_insn_and_split "ix86_rotl<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+		     (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:DWIH 3 "=&r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+	 (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
+		   (lshiftrt:DWIH (match_dup 5)
+				  (minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+	 (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
+		   (lshiftrt:DWIH (match_dup 3)
+				  (minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
+})
+
+(define_insn_and_split "ix86_rotr<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+		       (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:DWIH 3 "=&r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+	 (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+		   (ashift:DWIH (match_dup 5)
+				(minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+	 (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+		   (ashift:DWIH (match_dup 3)
+				(minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
+})
+
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "<S>")))]
+  "TARGET_BMI2"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{<imodesuffix>}\t%0";
+      else
+	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand")
+		 (ior (match_test "TARGET_SHIFT1")
+		      (match_test "optimize_function_for_size_p (cfun)"))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:QI 2 "immediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+			(match_operand:QI 2 "immediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_rorxsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		       (match_operand:QI 2 "immediate_operand" "I"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{l}\t%k0";
+      else
+	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "*,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand")
+		 (ior (match_test "TARGET_SHIFT1")
+		      (match_test "optimize_function_for_size_p (cfun)"))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
+		     (match_operand:QI 2 "immediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
+		       (match_operand:QI 2 "immediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
+  else
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "rotate")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<rotate_insn>qi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(any_rotate:QI (match_dup 0)
+		       (match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+	&& TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{b}\t%0";
+  else
+    return "<rotate>{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "rotate1")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand:HI 0 "register_operand")
+       (any_rotate:HI (match_dup 0) (const_int 8)))
+  (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+  && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
+ [(parallel [(set (strict_low_part (match_dup 0))
+		  (bswap:HI (match_dup 0)))
+	     (clobber (reg:CC FLAGS_REG))])])
+
+;; Bit set / bit test instructions
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extract:SI (match_operand:SI 1 "register_operand")
+			 (match_operand:SI 2 "const8_operand")
+			 (match_operand:SI 3 "const8_operand")))]
+  ""
+{
+  /* Handle extractions from %ah et al.  */
+  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[1], VOIDmode))
+    FAIL;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand")
+	(zero_extract:SI (match_operand 1 "ext_register_operand")
+			 (match_operand:SI 2 "const8_operand")
+			 (match_operand:SI 3 "const8_operand")))]
+  ""
+{
+  /* Handle extractions from %ah et al.  */
+  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[1], VOIDmode))
+    FAIL;
+})
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "register_operand")
+		      (match_operand 1 "const_int_operand")
+		      (match_operand 2 "const_int_operand"))
+        (match_operand 3 "register_operand"))]
+  ""
+{
+  rtx (*gen_mov_insv_1) (rtx, rtx);
+
+  if (ix86_expand_pinsr (operands))
+    DONE;
+
+  /* Handle insertions to %ah et al.  */
+  if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[0], VOIDmode))
+    FAIL;
+
+  gen_mov_insv_1 = (TARGET_64BIT
+		    ? gen_movdi_insv_1 : gen_movsi_insv_1);
+
+  emit_insn (gen_mov_insv_1 (operands[0], operands[3]));
+  DONE;
+})
+
+;; %%% bts, btr, btc, bt.
+;; In general these instructions are *slow* when applied to memory,
+;; since they enforce atomic operation.  When applied to registers,
+;; it depends on the cpu implementation.  They're never faster than
+;; the corresponding and/ior/xor operations, so with 32-bit there's
+;; no point.  But in 64-bit, we can't hold the relevant immediates
+;; within the instruction itself, so operating on bits in the high
+;; 32-bits of a register becomes easier.
+;;
+;; These are slow on Nocona, but fast on Athlon64.  We do require the use
+;; of btrq and btcq for corner cases of post-reload expansion of absdf and
+;; negdf respectively, so they can never be disabled entirely.
+
+(define_insn "*btsq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand"))
+	(const_int 1))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "bts{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*btrq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand"))
+	(const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "btr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*btcq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand"))
+	(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "btc{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+;; Allow Nocona to avoid these instructions if a register is available.
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand"))
+		   (const_int 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_iordi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand"))
+		   (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (~lo, ~hi, DImode);
+  if (i >= 32)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_anddi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand"))
+	      (not:DI (zero_extract:DI
+			(match_dup 0) (const_int 1) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_insn "*bt<mode>"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_operand:SWI48 0 "register_operand" "r")
+	    (const_int 1)
+	    (match_operand:SWI48 1 "x86_64_nonmemory_operand" "rN"))
+	  (const_int 0)))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "bt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "<MODE>")])
+
+;; Store-flag instructions.
+
+;; For all sCOND expanders, also expand the compare or test insn that
+;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
+
+(define_insn_and_split "*setcc_di_1"
+  [(set (match_operand:DI 0 "register_operand" "=q")
+	(match_operator:DI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn_and_split "*setcc_si_1_and"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(match_operator:SI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn_and_split "*setcc_si_1_movzbl"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(match_operator:SI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "!TARGET_PARTIAL_REG_STALL
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn "*setcc_qi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(match_operator:QI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  ""
+  "set%C1\t%0"
+  [(set_attr "type" "setcc")
+   (set_attr "mode" "QI")])
+
+(define_insn "*setcc_qi_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(match_operator:QI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  ""
+  "set%C1\t%0"
+  [(set_attr "type" "setcc")
+   (set_attr "mode" "QI")])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one.  Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;;	seta	%al
+;;	testb	%al, %al
+;;	sete	%al
+
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+	(ne:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+  "PUT_MODE (operands[1], QImode);")
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand"))
+	(ne:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+  "PUT_MODE (operands[1], QImode);")
+
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+	(eq:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx new_op1 = copy_rtx (operands[1]);
+  operands[1] = new_op1;
+  PUT_MODE (new_op1, QImode);
+  PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+					     GET_MODE (XEXP (new_op1, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op1, VOIDmode))
+    FAIL;
+})
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand"))
+	(eq:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx new_op1 = copy_rtx (operands[1]);
+  operands[1] = new_op1;
+  PUT_MODE (new_op1, QImode);
+  PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+					     GET_MODE (XEXP (new_op1, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op1, VOIDmode))
+    FAIL;
+})
+
+;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
+;; subsequent logical operations are used to imitate conditional moves.
+;; 0xffffffff is NaN, but not in normalized form, so we can't represent
+;; it directly.
+
+(define_insn "setcc_<mode>_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(match_operator:MODEF 3 "sse_comparison_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "@
+   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
+   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; Basic conditional jump instructions.
+;; We ignore the overflow flag for signed branch instructions.
+
+(define_insn "*jcc_1"
+  [(set (pc)
+	(if_then_else (match_operator 1 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+		      (label_ref (match_operand 0))
+		      (pc)))]
+  ""
+  "%+j%C1\t%l0"
+  [(set_attr "type" "ibr")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 6)))])
+
+(define_insn "*jcc_2"
+  [(set (pc)
+	(if_then_else (match_operator 1 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0))))]
+  ""
+  "%+j%c1\t%l0"
+  [(set_attr "type" "ibr")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 6)))])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one.  Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;;	seta	%al
+;;	testb	%al, %al
+;;	je	Lfoo
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+			  (const_int 0))
+		      (label_ref (match_operand 1))
+		      (pc)))]
+  ""
+  [(set (pc)
+	(if_then_else (match_dup 0)
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "PUT_MODE (operands[0], VOIDmode);")
+
+(define_split
+  [(set (pc)
+	(if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+			  (const_int 0))
+		      (label_ref (match_operand 1))
+		      (pc)))]
+  ""
+  [(set (pc)
+	(if_then_else (match_dup 0)
+		      (label_ref (match_dup 1))
+		      (pc)))]
+{
+  rtx new_op0 = copy_rtx (operands[0]);
+  operands[0] = new_op0;
+  PUT_MODE (new_op0, VOIDmode);
+  PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0),
+					     GET_MODE (XEXP (new_op0, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op0, VOIDmode))
+    FAIL;
+})
+
+;; zero_extend in SImode is correct also for DImode, since this is what combine
+;; pass generates from shift insn with QImode operand.  Actually, the mode
+;; of operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; appropriate modulo of the bit offset value.
+
+(define_insn_and_split "*jcc_bt<mode>"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (zero_extend:SI
+			     (match_operand:QI 2 "register_operand" "r")))
+			 (const_int 0)])
+		      (label_ref (match_operand 3))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; Like *jcc_bt<mode>, but expect a SImode operand 2 instead of QImode
+;; zero extended to SImode.
+(define_insn_and_split "*jcc_bt<mode>_1"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand:SI 2 "register_operand" "r"))
+			 (const_int 0)])
+		      (label_ref (match_operand 3))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; Avoid useless masking of bit offset operand.  "and" in SImode is correct
+;; also for DImode, this is what combine produces.
+(define_insn_and_split "*jcc_bt<mode>_mask"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (and:SI
+			     (match_operand:SI 2 "register_operand" "r")
+			     (match_operand:SI 3 "const_int_operand" "n")))])
+		      (label_ref (match_operand 4))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+(define_insn_and_split "*jcc_btsi_1"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(and:SI
+			   (lshiftrt:SI
+			     (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:QI 2 "register_operand" "r"))
+			   (const_int 1))
+			 (const_int 0)])
+		      (label_ref (match_operand 3))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask_1"
+  [(set (pc)
+  	(if_then_else
+	  (match_operator 0 "bt_comparison_operator"
+	    [(and:SI
+	       (lshiftrt:SI
+		 (match_operand:SI 1 "register_operand" "r")
+		 (subreg:QI
+		   (and:SI
+		     (match_operand:SI 2 "register_operand" "r")
+		     (match_operand:SI 3 "const_int_operand" "n")) 0))
+	       (const_int 1))
+	     (const_int 0)])
+	  (label_ref (match_operand 4))
+	  (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+;; Define combination compare-and-branch fp compare instructions to help
+;; combine.
+
+(define_insn "*jcc<mode>_0_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
+			[(match_operand:X87MODEF 1 "register_operand" "f")
+			 (match_operand:X87MODEF 2 "const0_operand")])
+	  (label_ref (match_operand 3))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jcc<mode>_0_r_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
+			[(match_operand:X87MODEF 1 "register_operand" "f")
+			 (match_operand:X87MODEF 2 "const0_operand")])
+	  (pc)
+	  (label_ref (match_operand 3))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jccxf_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
+			[(match_operand:XF 1 "register_operand" "f")
+			 (match_operand:XF 2 "register_operand" "f")])
+	  (label_ref (match_operand 3))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jccxf_r_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
+			[(match_operand:XF 1 "register_operand" "f")
+			 (match_operand:XF 2 "register_operand" "f")])
+	  (pc)
+	  (label_ref (match_operand 3))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jcc<mode>_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
+			[(match_operand:MODEF 1 "register_operand" "f")
+			 (match_operand:MODEF 2 "nonimmediate_operand" "fm")])
+	  (label_ref (match_operand 3))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jcc<mode>_r_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
+			[(match_operand:MODEF 1 "register_operand" "f")
+			 (match_operand:MODEF 2 "nonimmediate_operand" "fm")])
+	  (pc)
+	  (label_ref (match_operand 3))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jccu<mode>_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFPU 0 "ix86_fp_comparison_operator"
+			[(match_operand:X87MODEF 1 "register_operand" "f")
+			 (match_operand:X87MODEF 2 "register_operand" "f")])
+	  (label_ref (match_operand 3))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*jccu<mode>_r_i387"
+  [(set (pc)
+	(if_then_else (match_operator:CCFPU 0 "ix86_fp_comparison_operator"
+			[(match_operand:X87MODEF 1 "register_operand" "f")
+			 (match_operand:X87MODEF 2 "register_operand" "f")])
+	  (pc)
+	  (label_ref (match_operand 3))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE"
+  "#")
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand:X87MODEF 1 "register_operand")
+			 (match_operand:X87MODEF 2 "nonimmediate_operand")])
+	  (match_operand 3)
+	  (match_operand 4)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "TARGET_80387 && !TARGET_CMOVE
+   && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+	                operands[3], operands[4], NULL_RTX);
+  DONE;
+})
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand:X87MODEF 1 "register_operand")
+			 (match_operand:X87MODEF 2 "general_operand")])
+	  (match_operand 3)
+	  (match_operand 4)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5))]
+  "TARGET_80387 && !TARGET_CMOVE
+   && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+			operands[3], operands[4], operands[5]);
+  DONE;
+})
+
+;; The order of operands in *jcc<fp>_<int>_i387 is forced by combine in
+;; simplify_comparison () function. Float operator is treated as RTX_OBJ
+;; with a precedence over other operators and is always put in the first
+;; place. Swap condition and operands to match ficom instruction.
+
+(define_insn "*jcc<X87MODEF:mode>_<SWI24:mode>_i387"
+  [(set (pc)
+	(if_then_else
+	  (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator:X87MODEF 1 "float_operator"
+	      [(match_operand:SWI24 2 "nonimmediate_operand" "m")])
+	     (match_operand:X87MODEF 3 "register_operand" "f")])
+	  (label_ref (match_operand 4))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE
+   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
+       || optimize_function_for_size_p (cfun))"
+  "#")
+
+(define_insn "*jcc<X87MODEF:mode>_<SWI24:mode>_r_i387"
+  [(set (pc)
+	(if_then_else
+	  (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator:X87MODEF 1 "float_operator"
+	      [(match_operand:SWI24 2 "nonimmediate_operand" "m")])
+	     (match_operand:X87MODEF 3 "register_operand" "f")])
+	  (pc)
+	  (label_ref (match_operand 4))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a"))]
+  "TARGET_80387 && !TARGET_CMOVE
+   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
+       || optimize_function_for_size_p (cfun))"
+  "#")
+
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator:X87MODEF 1 "float_operator"
+	      [(match_operand:SWI24 2 "memory_operand")])
+	     (match_operand:X87MODEF 3 "register_operand")])
+	  (match_operand 4)
+	  (match_operand 5)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 6))]
+  "TARGET_80387 && !TARGET_CMOVE
+   && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), operands[3],
+		        gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]),
+			operands[4], operands[5], operands[6]);
+  DONE;
+})
+
+;; Unconditional and other jump instructions
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  ""
+  "jmp\t%l0"
+  [(set_attr "type" "ibr")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 5)))
+   (set_attr "modrm" "0")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "indirect_branch_operand"))]
+  ""
+{
+  if (TARGET_X32)
+    operands[0] = convert_memory_address (word_mode, operands[0]);
+})
+
+(define_insn "*indirect_jump"
+  [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw"))]
+  ""
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
+	      (use (label_ref (match_operand 1)))])]
+  ""
+{
+  /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
+     relative.  Convert the relative address to an absolute address.  */
+  if (flag_pic)
+    {
+      rtx op0, op1;
+      enum rtx_code code;
+
+      /* We can't use @GOTOFF for text labels on VxWorks;
+	 see gotoff_operand.  */
+      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+	{
+	  code = PLUS;
+	  op0 = operands[0];
+	  op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+	}
+      else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
+	{
+	  code = PLUS;
+	  op0 = operands[0];
+	  op1 = pic_offset_table_rtx;
+	}
+      else
+	{
+	  code = MINUS;
+	  op0 = pic_offset_table_rtx;
+	  op1 = operands[0];
+	}
+
+      operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
+					 OPTAB_DIRECT);
+    }
+
+  if (TARGET_X32)
+    operands[0] = convert_memory_address (word_mode, operands[0]);
+})
+
+(define_insn "*tablejump_1"
+  [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw"))
+   (use (label_ref (match_operand 1)))]
+  ""
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0))
+   (set (match_operand:QI 1 "register_operand")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand 3 "q_regs_operand")
+	(zero_extend (match_dup 1)))]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 2))]
+{
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[5] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG) (match_operand 0))
+	      (match_operand 4)])
+   (set (match_operand:QI 1 "register_operand")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand 3 "q_regs_operand")
+	(zero_extend (match_dup 1)))]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(parallel [(set (match_dup 5) (match_dup 0))
+	      (match_dup 4)])
+   (set (strict_low_part (match_dup 6))
+	(match_dup 2))]
+{
+  operands[5] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[6] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+;; Similar, but match zero extend with andsi3.
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0))
+   (set (match_operand:QI 1 "register_operand")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (parallel [(set (match_operand:SI 3 "q_regs_operand")
+		   (and:SI (match_dup 3) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "REGNO (operands[1]) == REGNO (operands[3])
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 2))]
+{
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[5] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG) (match_operand 0))
+	      (match_operand 4)])
+   (set (match_operand:QI 1 "register_operand")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (parallel [(set (match_operand 3 "q_regs_operand")
+		   (zero_extend (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(parallel [(set (match_dup 5) (match_dup 0))
+	      (match_dup 4)])
+   (set (strict_low_part (match_dup 6))
+	(match_dup 2))]
+{
+  operands[5] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[6] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+;; Call instructions.
+
+;; The predicates normally associated with named expanders are not properly
+;; checked for calls.  This is a bug in the generic code, but it isn't that
+;; easy to fix.  Ignore it for now and be prepared to fix things up.
+
+;; P6 processors will jump to the address after the decrement when %esp
+;; is used as a call operand, so they will execute return address as a code.
+;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
+
+;; Register constraint for call instruction.
+(define_mode_attr c [(SI "l") (DI "r")])
+
+;; Call subroutine returning no value.
+
+(define_expand "call"
+  [(call (match_operand:QI 0)
+	 (match_operand 1))
+   (use (match_operand 2))]
+  ""
+{
+  ix86_expand_call (NULL, operands[0], operands[1],
+		    operands[2], NULL, false);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(call (match_operand:QI 0)
+	 (match_operand 1))
+   (use (match_operand 2))]
+  ""
+{
+  ix86_expand_call (NULL, operands[0], operands[1],
+		    operands[2], NULL, true);
+  DONE;
+})
+
+(define_insn "*call"
+  [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>zw"))
+	 (match_operand 1))]
+  "!SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[0]);"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_rex64_ms_sysv"
+  [(match_parallel 2 "call_rex64_ms_sysv_operation"
+    [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzw"))
+	   (match_operand 1))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[0]);"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall"
+  [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "Uz"))
+	 (match_operand 1))]
+  "SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[0]);"
+  [(set_attr "type" "call")])
+
+(define_expand "call_pop"
+  [(parallel [(call (match_operand:QI 0)
+		    (match_operand:SI 1))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 3)))])]
+  "!TARGET_64BIT"
+{
+  ix86_expand_call (NULL, operands[0], operands[1],
+		    operands[2], operands[3], false);
+  DONE;
+})
+
+(define_insn "*call_pop"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm"))
+	 (match_operand 1))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[0]);"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_pop"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "Uz"))
+	 (match_operand 1))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[0]);"
+  [(set_attr "type" "call")])
+
+;; Call subroutine, returning value in operand 0
+
+(define_expand "call_value"
+  [(set (match_operand 0)
+	(call (match_operand:QI 1)
+	      (match_operand 2)))
+   (use (match_operand 3))]
+  ""
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], NULL, false);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0)
+	(call (match_operand:QI 1)
+	      (match_operand 2)))
+   (use (match_operand 3))]
+  ""
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], NULL, true);
+  DONE;
+})
+
+(define_insn "*call_value"
+  [(set (match_operand 0)
+	(call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>zw"))
+	      (match_operand 2)))]
+  "!SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[1]);"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value"
+  [(set (match_operand 0)
+	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "Uz"))
+	      (match_operand 2)))]
+  "SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[1]);"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_rex64_ms_sysv"
+  [(match_parallel 3 "call_rex64_ms_sysv_operation"
+    [(set (match_operand 0)
+	  (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzw"))
+		(match_operand 2)))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])]
+ "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[1]);"
+  [(set_attr "type" "callv")])
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand:QI 1)
+			 (match_operand:SI 2)))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 4)))])]
+  "!TARGET_64BIT"
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], operands[4], false);
+  DONE;
+})
+
+(define_insn "*call_value_pop"
+  [(set (match_operand 0)
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm"))
+	      (match_operand 2)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "i")))]
+  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[1]);"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_pop"
+  [(set (match_operand 0)
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "Uz"))
+	      (match_operand 2)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "i")))]
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "* return ix86_output_call_insn (insn, operands[1]);"
+  [(set_attr "type" "callv")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0)
+		    (const_int 0))
+	      (match_operand 1)
+	      (match_operand 2)])]
+  ""
+{
+  int i;
+
+  /* In order to give reg-stack an easier job in validating two
+     coprocessor registers as containing a possible return value,
+     simply pretend the untyped call returns a complex long double
+     value. 
+
+     We can't use SSE_REGPARM_MAX here since callee is unprototyped
+     and should have the default ABI.  */
+
+  ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
+		     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
+		    operands[0], const0_rtx,
+		    GEN_INT ((TARGET_64BIT
+			      ? (ix86_abi == SYSV_ABI
+				 ? X86_64_SSE_REGPARM_MAX
+				 : X86_64_MS_SSE_REGPARM_MAX)
+			      : X86_32_SSE_REGPARM_MAX)
+		    	     - 1),
+		    NULL, false);
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; Prologue and epilogue instructions
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Do not schedule instructions accessing memory across this point.
+
+(define_expand "memory_blockage"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_blockage"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for PIC code
+(define_insn "prologue_use"
+  [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Insn emitted into the body of a function to return from a function.
+;; This is only done if the function's epilogue is known to be simple.
+;; See comments for ix86_can_use_return_insn_p in i386.c.
+
+(define_expand "return"
+  [(simple_return)]
+  "ix86_can_use_return_insn_p ()"
+{
+  if (crtl->args.pops_args)
+    {
+      rtx popc = GEN_INT (crtl->args.pops_args);
+      emit_jump_insn (gen_simple_return_pop_internal (popc));
+      DONE;
+    }
+})
+
+;; We need to disable this for TARGET_SEH, as otherwise
+;; shrink-wrapped prologue gets enabled too.  This might exceed
+;; the maximum size of prologue in unwind information.
+
+(define_expand "simple_return"
+  [(simple_return)]
+  "!TARGET_SEH"
+{
+  if (crtl->args.pops_args)
+    {
+      rtx popc = GEN_INT (crtl->args.pops_args);
+      emit_jump_insn (gen_simple_return_pop_internal (popc));
+      DONE;
+    }
+})
+
+(define_insn "simple_return_internal"
+  [(simple_return)]
+  "reload_completed"
+  "ret"
+  [(set_attr "length" "1")
+   (set_attr "atom_unit" "jeu")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+;; instruction Athlon and K8 have.
+
+(define_insn "simple_return_internal_long"
+  [(simple_return)
+   (unspec [(const_int 0)] UNSPEC_REP)]
+  "reload_completed"
+  "rep%; ret"
+  [(set_attr "length" "2")
+   (set_attr "atom_unit" "jeu")
+   (set_attr "length_immediate" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "modrm" "0")])
+
+(define_insn "simple_return_pop_internal"
+  [(simple_return)
+   (use (match_operand:SI 0 "const_int_operand"))]
+  "reload_completed"
+  "ret\t%0"
+  [(set_attr "length" "3")
+   (set_attr "atom_unit" "jeu")
+   (set_attr "length_immediate" "2")
+   (set_attr "modrm" "0")])
+
+(define_insn "simple_return_indirect_internal"
+  [(simple_return)
+   (use (match_operand:SI 0 "register_operand" "r"))]
+  "reload_completed"
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Generate nops.  Operand 0 is the number of nops, up to 8.
+(define_insn "nops"
+  [(unspec_volatile [(match_operand 0 "const_int_operand")]
+		    UNSPECV_NOPS)]
+  "reload_completed"
+{
+  int num = INTVAL (operands[0]);
+
+  gcc_assert (IN_RANGE (num, 1, 8));
+
+  while (num--)
+    fputs ("\tnop\n", asm_out_file);
+
+  return "";
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Pad to 16-byte boundary, max skip in op0.  Used to avoid
+;; branch prediction penalty for the third jump in a 16-byte
+;; block on K8.
+
+(define_insn "pad"
+  [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)]
+  ""
+{
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+  ASM_OUTPUT_MAX_SKIP_PAD (asm_out_file, 4, (int)INTVAL (operands[0]));
+#else
+  /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
+     The align insn is used to avoid 3 jump instructions in the row to improve
+     branch prediction and the benefits hardly outweigh the cost of extra 8
+     nops on the average inserted by full alignment pseudo operation.  */
+#endif
+  return "";
+}
+  [(set_attr "length" "16")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_prologue (); DONE;")
+
+(define_insn "set_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "* return output_set_got (operands[0], NULL_RTX);"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_labelled"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(label_ref (match_operand 1))]
+	 UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "* return output_set_got (operands[0], operands[1]);"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
+  "TARGET_64BIT"
+  "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "length_address" "4")
+   (set_attr "mode" "DI")])
+
+(define_insn "set_rip_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
+  "TARGET_64BIT"
+  "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "length_address" "4")
+   (set_attr "mode" "DI")])
+
+(define_insn "set_got_offset_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(label_ref (match_operand 1))]
+	  UNSPEC_SET_GOT_OFFSET))]
+  "TARGET_LP64"
+  "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
+  [(set_attr "type" "imov")
+   (set_attr "length_immediate" "0")
+   (set_attr "length_address" "8")
+   (set_attr "mode" "DI")])
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_epilogue (1); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_epilogue (0); DONE;")
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand"))]
+  ""
+{
+  rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
+
+  /* Tricky bit: we write the address of the handler to which we will
+     be returning into someone else's stack frame, one word below the
+     stack address we wish to restore.  */
+  tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
+  tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
+  tmp = gen_rtx_MEM (Pmode, tmp);
+  emit_move_insn (tmp, ra);
+
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+  "ix86_expand_epilogue (2); DONE;")
+
+(define_insn "leave"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
+   (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
+   (clobber (mem:BLK (scratch)))]
+  "!TARGET_64BIT"
+  "leave"
+  [(set_attr "type" "leave")])
+
+(define_insn "leave_rex64"
+  [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+   (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+  "leave"
+  [(set_attr "type" "leave")])
+
+;; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+  [(const_int 0)]
+  ""
+{
+  ix86_expand_split_stack_prologue ();
+  DONE;
+})
+
+;; In order to support the call/return predictor, we use a return
+;; instruction which the middle-end doesn't see.
+(define_insn "split_stack_return"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
+		     UNSPECV_SPLIT_STACK_RETURN)]
+  ""
+{
+  if (operands[0] == const0_rtx)
+    return "ret";
+  else
+    return "ret\t%0";
+}
+  [(set_attr "atom_unit" "jeu")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	(if_then_else (match_operand:SI 0 "const0_operand")
+		      (const_int 1)
+		      (const_int 3)))
+   (set (attr "length_immediate")
+	(if_then_else (match_operand:SI 0 "const0_operand")
+		      (const_int 0)
+		      (const_int 2)))])
+
+;; If there are operand 0 bytes available on the stack, jump to
+;; operand 1.
+
+(define_expand "split_stack_space_check"
+  [(set (pc) (if_then_else
+	      (ltu (minus (reg SP_REG)
+			  (match_operand 0 "register_operand"))
+		   (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+	      (label_ref (match_operand 1))
+	      (pc)))]
+  ""
+{
+  rtx reg, size, limit;
+
+  reg = gen_reg_rtx (Pmode);
+  size = force_reg (Pmode, operands[0]);
+  emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size));
+  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			  UNSPEC_STACK_CHECK);
+  limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit));
+  ix86_expand_branch (GEU, reg, limit, operands[1]);
+
+  DONE;
+})
+
+;; Bit manipulation instructions.
+
+(define_expand "ffs<mode>2"
+  [(set (match_dup 2) (const_int -1))
+   (parallel [(set (match_dup 3) (match_dup 4))
+	      (set (match_operand:SWI48 0 "register_operand")
+		   (ctz:SWI48
+		     (match_operand:SWI48 1 "nonimmediate_operand")))])
+   (set (match_dup 0) (if_then_else:SWI48
+			(eq (match_dup 3) (const_int 0))
+			(match_dup 2)
+			(match_dup 0)))
+   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  enum machine_mode flags_mode;
+
+  if (<MODE>mode == SImode && !TARGET_CMOVE)
+    {
+      emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
+      DONE;
+    }
+
+  flags_mode = TARGET_BMI ? CCCmode : CCZmode;
+
+  operands[2] = gen_reg_rtx (<MODE>mode);
+  operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
+  operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
+})
+
+(define_insn_and_split "ffssi2_no_cmove"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+   (clobber (match_scratch:SI 2 "=&q"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 4) (match_dup 5))
+	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
+   (set (strict_low_part (match_dup 3))
+	(eq:QI (match_dup 4) (const_int 0)))
+   (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
+
+  operands[3] = gen_lowpart (QImode, operands[2]);
+  operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
+  operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
+
+  ix86_expand_clear (operands[2]);
+})
+
+(define_insn "*tzcnt<mode>_1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		     (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(ctz:SWI48 (match_dup 1)))]
+  "TARGET_BMI"
+  "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bsf<mode>_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		     (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(ctz:SWI48 (match_dup 1)))]
+  ""
+  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ctz<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (TARGET_BMI)
+    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+  else if (optimize_function_for_size_p (cfun))
+    ;
+  else if (TARGET_GENERIC)
+    /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI.  */
+    return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+  return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set (attr "prefix_rep")
+     (if_then_else
+       (ior (match_test "TARGET_BMI")
+	    (and (not (match_test "optimize_function_for_size_p (cfun)"))
+		 (match_test "TARGET_GENERIC")))
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "clz<mode>2"
+  [(parallel
+     [(set (match_operand:SWI248 0 "register_operand")
+	   (minus:SWI248
+	     (match_dup 2)
+	     (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand"))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_LZCNT)
+    {
+      emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
+      DONE;
+    }
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+})
+
+(define_insn "clz<mode>2_lzcnt"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_LZCNT"
+  "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+;; BMI instructions.
+(define_insn "*bmi_andn_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+        (and:SWI48
+          (not:SWI48
+            (match_operand:SWI48 1 "register_operand" "r,r"))
+            (match_operand:SWI48 2 "nonimmediate_operand" "r,m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "andn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "direct, double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi_bextr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
+                       (match_operand:SWI48 2 "register_operand" "r,r")]
+                       UNSPEC_BEXTR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "bextr\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "direct, double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsi_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (neg:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_BMI"
+   "blsr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+;; BMI2 instructions.
+(define_insn "bmi2_bzhi_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(and:SWI48 (lshiftrt:SWI48 (const_int -1)
+				   (match_operand:SWI48 2 "register_operand" "r"))
+		   (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2"
+  "bzhi\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pdep_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
+                       (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
+                       UNSPEC_PDEP))]
+  "TARGET_BMI2"
+  "pdep\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pext_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
+                       (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
+                       UNSPEC_PEXT))]
+  "TARGET_BMI2"
+  "pext\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+;; TBM instructions.
+(define_insn "tbm_bextri_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (zero_extract:SWI48
+          (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+          (match_operand:SWI48 2 "const_0_to_255_operand" "n")
+          (match_operand:SWI48 3 "const_0_to_255_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
+  return "bextr\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcfill_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcfill\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blci_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (not:SWI48
+            (plus:SWI48
+              (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+              (const_int 1)))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blci\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcic_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcic\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcs_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcs\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blsfill_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blsfill\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blsic_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blsic\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_t1mskc_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "t1mskc\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_tzmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "tzmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bsr_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (const_int 63)
+		  (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "bsr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "bsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (const_int 31)
+		  (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*bsrhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (const_int 15)
+		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "HI")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(popcount:SWI248
+	  (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcount<mode>2_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (popcount:SWI248
+	    (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
+	  (const_int 0)))
+   (set (match_operand:SWI248 0 "register_operand" "=r")
+	(popcount:SWI248 (match_dup 1)))]
+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcountsi2_cmp_zext"
+  [(set (reg FLAGS_REG)
+        (compare
+          (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+          (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI(popcount:SI (match_dup 1))))]
+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{l}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_expand "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT"
+{
+  if (!TARGET_MOVBE)
+    operands[1] = force_reg (DImode, operands[1]);
+})
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_MOVBE)
+    ;
+  else if (TARGET_BSWAP)
+    operands[1] = force_reg (SImode, operands[1]);
+  else
+    {
+      rtx x = operands[0];
+
+      emit_move_insn (x, operands[1]);
+      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
+      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      DONE;
+    }
+})
+
+(define_insn "*bswap<mode>2_movbe"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
+	(bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_MOVBE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    bswap\t%0
+    movbe\t{%1, %0|%0, %1}
+    movbe\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip,imov,imov")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "prefix_0f" "*,1,1")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bswap<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
+  "TARGET_BSWAP"
+  "bswap\t%0"
+  [(set_attr "type" "bitmanip")
+   (set_attr "modrm" "0")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bswaphi_lowpart_1"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
+	(bswap:HI (match_dup 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)"
+  "@
+    xchg{b}\t{%h0, %b0|%b0, %h0}
+    rol{w}\t{$8, %0|%0, 8}"
+  [(set_attr "length" "2,4")
+   (set_attr "mode" "QI,HI")])
+
+(define_insn "bswaphi_lowpart"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+	(bswap:HI (match_dup 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "rol{w}\t{$8, %0|%0, 8}"
+  [(set_attr "length" "4")
+   (set_attr "mode" "HI")])
+
+(define_expand "paritydi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(parity:DI (match_operand:DI 1 "register_operand")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
+				NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  if (TARGET_64BIT)
+    emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
+  else
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extendqisi2 (tmp, scratch));
+      emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
+    }
+  DONE;
+})
+
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(parity:SI (match_operand:SI 1 "register_operand")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+  DONE;
+})
+
+(define_insn_and_split "paritydi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:DI 3 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:DI 0 "=r"))
+   (clobber (match_scratch:SI 1 "=&r"))
+   (clobber (match_scratch:HI 2 "=Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:SI (match_dup 1) (match_dup 4)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
+      (clobber (match_dup 1))
+      (clobber (match_dup 2))])]
+{
+  operands[4] = gen_lowpart (SImode, operands[3]);
+
+  if (TARGET_64BIT)
+    {
+      emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
+      emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
+    }
+  else
+    operands[1] = gen_highpart (SImode, operands[3]);
+})
+
+(define_insn_and_split "paritysi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:SI 2 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:SI 0 "=r"))
+   (clobber (match_scratch:HI 1 "=&Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:HI (match_dup 1) (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
+      (clobber (match_dup 1))])]
+{
+  operands[3] = gen_lowpart (HImode, operands[2]);
+
+  emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
+  emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
+})
+
+(define_insn "*parityhi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:HI 1 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:HI 0 "=Q"))]
+  "! TARGET_POPCNT"
+  "xor{b}\t{%h0, %b0|%b0, %h0}"
+  [(set_attr "length" "2")
+   (set_attr "mode" "HI")])
+
+
+;; Thread-local storage patterns for ELF.
+;;
+;; Note that these code sequences must appear exactly as shown
+;; in order to allow linker relaxation.
+
+(define_insn "*tls_global_dynamic_32_gnu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI
+	 [(match_operand:SI 1 "register_operand" "b")
+	  (match_operand 2 "tls_symbolic_operand")
+	  (match_operand 3 "constant_call_address_operand" "z")]
+	 UNSPEC_TLS_GD))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU_TLS"
+{
+  output_asm_insn
+    ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
+  if (TARGET_SUN_TLS)
+#ifdef HAVE_AS_IX86_TLSGDPLT
+    return "call\t%a2@tlsgdplt";
+#else
+    return "call\t%p3@plt";
+#endif
+  return "call\t%P3";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_expand "tls_global_dynamic_32"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand")
+	  (unspec:SI [(match_operand:SI 2 "register_operand")
+		      (match_operand 1 "tls_symbolic_operand")
+		      (match_operand 3 "constant_call_address_operand")]
+		     UNSPEC_TLS_GD))
+     (clobber (match_scratch:SI 4))
+     (clobber (match_scratch:SI 5))
+     (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*tls_global_dynamic_64_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(call:P
+	 (mem:QI (match_operand 2 "constant_call_address_operand" "z"))
+	 (match_operand 3)))
+   (unspec:P [(match_operand 1 "tls_symbolic_operand")]
+	     UNSPEC_TLS_GD)]
+  "TARGET_64BIT"
+{
+  if (!TARGET_X32)
+    fputs (ASM_BYTE "0x66\n", asm_out_file);
+  output_asm_insn
+    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+  fputs (ASM_SHORT "0x6666\n", asm_out_file);
+  fputs ("\trex64\n", asm_out_file);
+  if (TARGET_SUN_TLS)
+    return "call\t%p2@plt";
+  return "call\t%P2";
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(symbol_ref "TARGET_X32 ? 15 : 16"))])
+
+(define_insn "*tls_global_dynamic_64_largepic"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI
+	 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
+			  (match_operand:DI 3 "immediate_operand" "i")))
+	 (match_operand 4)))
+   (unspec:DI [(match_operand 1 "tls_symbolic_operand")]
+	     UNSPEC_TLS_GD)]
+  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+   && GET_CODE (operands[3]) == CONST
+   && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
+   && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
+{
+  output_asm_insn
+    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+  output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
+  output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
+  return "call\t{*%%rax|rax}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "22")])
+
+(define_expand "tls_global_dynamic_64_<mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand")
+	  (call:P
+	   (mem:QI (match_operand 2))
+	   (const_int 0)))
+     (unspec:P [(match_operand 1 "tls_symbolic_operand")]
+	       UNSPEC_TLS_GD)])]
+  "TARGET_64BIT")
+
+(define_insn "*tls_local_dynamic_base_32_gnu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI
+	 [(match_operand:SI 1 "register_operand" "b")
+	  (match_operand 2 "constant_call_address_operand" "z")]
+	 UNSPEC_TLS_LD_BASE))
+   (clobber (match_scratch:SI 3 "=d"))
+   (clobber (match_scratch:SI 4 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU_TLS"
+{
+  output_asm_insn
+    ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
+  if (TARGET_SUN_TLS)
+    {
+      if (HAVE_AS_IX86_TLSLDMPLT)
+	return "call\t%&@tlsldmplt";
+      else
+	return "call\t%p2@plt";
+    }
+  return "call\t%P2";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "11")])
+
+(define_expand "tls_local_dynamic_base_32"
+  [(parallel
+     [(set (match_operand:SI 0 "register_operand")
+	   (unspec:SI
+	    [(match_operand:SI 1 "register_operand")
+	     (match_operand 2 "constant_call_address_operand")]
+	    UNSPEC_TLS_LD_BASE))
+      (clobber (match_scratch:SI 3))
+      (clobber (match_scratch:SI 4))
+      (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*tls_local_dynamic_base_64_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(call:P
+	 (mem:QI (match_operand 1 "constant_call_address_operand" "z"))
+	 (match_operand 2)))
+   (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+  "TARGET_64BIT"
+{
+  output_asm_insn
+    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+  if (TARGET_SUN_TLS)
+    return "call\t%p1@plt";
+  return "call\t%P1";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "*tls_local_dynamic_base_64_largepic"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI
+	 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
+			  (match_operand:DI 2 "immediate_operand" "i")))
+	 (match_operand 3)))
+   (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+   && GET_CODE (operands[2]) == CONST
+   && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
+   && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
+{
+  output_asm_insn
+    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+  output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
+  output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
+  return "call\t{*%%rax|rax}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "22")])
+
+(define_expand "tls_local_dynamic_base_64_<mode>"
+  [(parallel
+     [(set (match_operand:P 0 "register_operand")
+	   (call:P
+	    (mem:QI (match_operand 1))
+	    (const_int 0)))
+      (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
+  "TARGET_64BIT")
+
+;; Local dynamic of a single variable is a lose.  Show combine how
+;; to convert that back to global dynamic.
+
+(define_insn_and_split "*tls_local_dynamic_32_once"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI
+	 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+		     (match_operand 2 "constant_call_address_operand" "z")]
+		    UNSPEC_TLS_LD_BASE)
+	 (const:SI (unspec:SI
+		    [(match_operand 3 "tls_symbolic_operand")]
+		    UNSPEC_DTPOFF))))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  ""
+  [(parallel
+     [(set (match_dup 0)
+	   (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+		      UNSPEC_TLS_GD))
+      (clobber (match_dup 4))
+      (clobber (match_dup 5))
+      (clobber (reg:CC FLAGS_REG))])])
+
+;; Segment register for the thread base ptr load
+(define_mode_attr tp_seg [(SI "gs") (DI "fs")])
+
+;; Load and add the thread base pointer from %<tp_seg>:0.
+(define_insn "*load_tp_x32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_X32"
+  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*load_tp_x32_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
+  "TARGET_X32"
+  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*load_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(const_int 0)] UNSPEC_TP))]
+  "!TARGET_X32"
+  "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_x32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		 (match_operand:SI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_X32"
+  "add{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_x32_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		   (match_operand:SI 1 "register_operand" "0"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_X32"
+  "add{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(plus:P (unspec:P [(const_int 0)] UNSPEC_TP)
+		(match_operand:P 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_X32"
+  "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+;; The Sun linker took the AMD64 TLS spec literally and can only handle
+;; %rax as destination of the initial executable code sequence.
+(define_insn "tls_initial_exec_64_sun"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI
+	 [(match_operand 1 "tls_symbolic_operand")]
+	 UNSPEC_TLS_IE_SUN))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_SUN_TLS"
+{
+  output_asm_insn
+    ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
+  return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
+}
+  [(set_attr "type" "multi")])
+
+;; GNU2 TLS patterns can be split.
+
+(define_expand "tls_dynamic_gnu2_32"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 2 "register_operand")
+		 (const:SI
+		  (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
+			     UNSPEC_TLSDESC))))
+   (parallel
+    [(set (match_operand:SI 0 "register_operand")
+	  (unspec:SI [(match_dup 1) (match_dup 3)
+		      (match_dup 2) (reg:SI SP_REG)]
+		      UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_gnu2_lea_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "b")
+		 (const:SI
+		  (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
+			      UNSPEC_TLSDESC))))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")
+   (set_attr "length" "6")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_gnu2_call_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand 1 "tls_symbolic_operand")
+		    (match_operand:SI 2 "register_operand" "0")
+		    ;; we have to make sure %ebx still points to the GOT
+		    (match_operand:SI 3 "register_operand" "b")
+		    (reg:SI SP_REG)]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+	(plus:SI
+	 (unspec:SI [(match_operand 3 "tls_modbase_operand")
+		     (match_operand:SI 4)
+		     (match_operand:SI 2 "register_operand" "b")
+		     (reg:SI SP_REG)]
+		    UNSPEC_TLSDESC)
+	 (const:SI (unspec:SI
+		    [(match_operand 1 "tls_symbolic_operand")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 5))]
+{
+  operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+  [(set (match_dup 2)
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand")]
+		   UNSPEC_TLSDESC))
+   (parallel
+    [(set (match_operand:DI 0 "register_operand")
+	  (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)]
+		     UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_gnu2_lea_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand")]
+		   UNSPEC_TLSDESC))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{q}\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")
+   (set_attr "length" "7")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_gnu2_call_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand")
+		    (match_operand:DI 2 "register_operand" "0")
+		    (reg:DI SP_REG)]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+  [(set (match_operand:DI 0 "register_operand" "=&a")
+	(plus:DI
+	 (unspec:DI [(match_operand 2 "tls_modbase_operand")
+		     (match_operand:DI 3)
+		     (reg:DI SP_REG)]
+		    UNSPEC_TLSDESC)
+	 (const:DI (unspec:DI
+		    [(match_operand 1 "tls_symbolic_operand")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 4))]
+{
+  operands[4] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
+})
+
+;; These patterns match the binary 387 instructions for addM3, subM3,
+;; mulM3 and divM3.  There are three patterns for each of DFmode and
+;; SFmode.  The first is the normal insn, the second the same insn but
+;; with one operand a conversion, and the third the same insn but with
+;; the other operand a conversion.  The conversion may be SFmode or
+;; SImode if the target mode DFmode, but only SImode if the target mode
+;; is SFmode.
+
+;; Gcc is slightly more smart about handling normal two address instructions
+;; so use special patterns for add and mull.
+
+(define_insn "*fop_<mode>_comm_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (eq_attr "alternative" "1,2")
+	   (if_then_else (match_operand:MODEF 3 "mult_operator")
+	      (const_string "ssemul")
+	      (const_string "sseadd"))
+	   (if_then_else (match_operand:MODEF 3 "mult_operator")
+	      (const_string "fmul")
+	      (const_string "fop"))))
+   (set_attr "isa" "*,noavx,avx")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:MODEF 3 "mult_operator")
+	   (const_string "ssemul")
+	   (const_string "sseadd")))
+   (set_attr "isa" "noavx,avx")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (match_operand:MODEF 3 "mult_operator")
+	   (const_string "fmul")
+	   (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(and (eq_attr "alternative" "2,3")
+	            (match_operand:MODEF 3 "mult_operator"))
+                 (const_string "ssemul")
+	       (and (eq_attr "alternative" "2,3")
+	            (match_operand:MODEF 3 "div_operator"))
+                 (const_string "ssediv")
+	       (eq_attr "alternative" "2,3")
+                 (const_string "sseadd")
+	       (match_operand:MODEF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "isa" "*,*,noavx,avx")
+   (set_attr "prefix" "orig,orig,orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*rcpsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_RCP))]
+  "TARGET_SSE_MATH"
+  "%vrcpss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "btver2_sse_attr" "rcp")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_<mode>_1_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator")
+                 (const_string "ssemul")
+	       (match_operand:MODEF 3 "div_operator")
+                 (const_string "ssediv")
+              ]
+              (const_string "sseadd")))
+   (set_attr "isa" "noavx,avx")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; This pattern is not fully shadowed by the pattern above.
+(define_insn "*fop_<mode>_1_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+;; ??? Add SSE splitters for these!
+(define_insn "*fop_<MODEF:mode>_2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(float:MODEF
+	     (match_operand:SWI24 1 "nonimmediate_operand" "m"))
+	   (match_operand:MODEF 2 "register_operand" "0")]))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
+       || optimize_function_for_size_p (cfun))"
+  { return output_387_binary_op (insn, operands); }
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<SWI24:MODE>")])
+
+(define_insn "*fop_<MODEF:mode>_3_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (float:MODEF
+	     (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
+       || optimize_function_for_size_p (cfun))"
+  { return output_387_binary_op (insn, operands); }
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_4_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	   [(float_extend:DF
+	     (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:DF 2 "register_operand" "0,f")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_5_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	  [(match_operand:DF 1 "register_operand" "0,f")
+	   (float_extend:DF
+	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_6_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	  [(float_extend:DF
+	    (match_operand:SF 1 "register_operand" "0,f"))
+	   (float_extend:DF
+	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_comm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "%0")
+			 (match_operand:XF 2 "register_operand" "f")]))]
+  "TARGET_80387
+   && COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:XF 3 "mult_operator")
+           (const_string "fmul")
+           (const_string "fop")))
+   (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_1_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "0,f")
+			 (match_operand:XF 2 "register_operand" "f,0")]))]
+  "TARGET_80387
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float:XF
+	     (match_operand:SWI24 1 "nonimmediate_operand" "m"))
+	   (match_operand:XF 2 "register_operand" "0")]))]
+  "TARGET_80387
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  { return output_387_binary_op (insn, operands); }
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0")
+	   (float:XF
+	     (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
+  "TARGET_80387
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  { return output_387_binary_op (insn, operands); }
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	   [(float_extend:XF
+	      (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:XF 2 "register_operand" "0,f")]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_5_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0,f")
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_6_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float_extend:XF
+	     (match_operand:MODEF 1 "register_operand" "0,f"))
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+;; FPU special functions.
+
+;; This pattern implements a no-op XFmode truncation for
+;; all fancy i386 XFmode math functions.
+
+(define_insn "truncxf<mode>2_i387_noop_unspec"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
+	UNSPEC_TRUNC_NOOP))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sqrtxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
+
+(define_insn "sqrt_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF
+	  (float_extend:XF
+	    (match_operand:MODEF 1 "register_operand" "0"))))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
+
+(define_insn "*rsqrtsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+  "%vrsqrtss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "btver2_sse_attr" "rcp")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_expand "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
+		   UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
+  DONE;
+})
+
+(define_insn "*sqrt<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(sqrt:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "%vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
+   (set_attr "btver2_sse_attr" "sqrt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "*")
+   (set_attr "amdfam10_decode" "*")
+   (set_attr "bdver1_decode" "*")])
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(sqrt:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand")))]
+  "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (<MODE>mode == SFmode
+      && TARGET_SSE_MATH
+      && TARGET_RECIP_SQRT
+      && !optimize_function_for_size_p (cfun)
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
+      DONE;
+    }
+
+  if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+      emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+      DONE;
+   }
+})
+
+(define_insn "fpremxf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM_U))
+   (set (reg:CCFP FPSR_REG)
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fprem"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "fmodxf3"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "general_operand"))
+   (use (match_operand:XF 2 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
+
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "fmod<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))
+   (use (match_operand:MODEF 2 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx (*gen_truncxf) (rtx, rtx);
+
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    gen_truncxf = gen_truncxf<mode>2;
+  else
+    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+
+  emit_insn (gen_truncxf (operands[0], op1));
+  DONE;
+})
+
+(define_insn "fprem1xf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM1_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM1_U))
+   (set (reg:CCFP FPSR_REG)
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fprem1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "remainderxf3"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "general_operand"))
+   (use (match_operand:XF 2 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
+
+  emit_label (label);
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "remainder<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))
+   (use (match_operand:MODEF 2 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx (*gen_truncxf) (rtx, rtx);
+
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_label (label);
+
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    gen_truncxf = gen_truncxf<mode>2;
+  else
+    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+
+  emit_insn (gen_truncxf (operands[0], op1));
+  DONE;
+})
+
+(define_int_iterator SINCOS
+	[UNSPEC_SIN
+	 UNSPEC_COS])
+
+(define_int_attr sincos
+	[(UNSPEC_SIN "sin")
+	 (UNSPEC_COS "cos")])
+
+(define_insn "*<sincos>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   SINCOS))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "f<sincos>"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*<sincos>_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))]
+		   SINCOS))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "f<sincos>"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+;; When sincos pattern is defined, sin and cos builtin functions will be
+;; expanded to sincos pattern with one of its outputs left unused.
+;; CSE pass will figure out if two sincos patterns can be combined,
+;; otherwise sincos pattern will be split back to sin or cos pattern,
+;; depending on the unused output.
+
+(define_insn "sincosxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fsincos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(match_operand:XF 2 "register_operand")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand")
+	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+   && can_create_pseudo_p ()"
+  [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(match_operand:XF 2 "register_operand")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand")
+	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))])
+
+(define_insn "sincos_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fsincos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand"))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand")
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+   && can_create_pseudo_p ()"
+  [(set (match_dup 1)
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand"))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand")
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))])
+
+(define_expand "sincos<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))
+   (use (match_operand:MODEF 2 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1));
+  DONE;
+})
+
+(define_insn "fptanxf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operand:XF 3 "const_double_operand" "F"))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_TAN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && standard_80387_constant_p (operands[3]) == 2"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fptan_extend<mode>xf4_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operand:MODEF 3 "const_double_operand" "F"))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_TAN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations
+   && standard_80387_constant_p (operands[3]) == 2"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "tanxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx one = gen_reg_rtx (XFmode);
+  rtx op2 = CONST1_RTX (XFmode); /* fld1 */
+
+  emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "tan<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx one = gen_reg_rtx (<MODE>mode);
+  rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */
+
+  emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0,
+					     operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "*fpatanxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+	            (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FPATAN))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fpatan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fpatan_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "u"))]
+	           UNSPEC_FPATAN))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fpatan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "atan2xf3"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 2 "register_operand")
+			       (match_operand:XF 1 "register_operand")]
+			      UNSPEC_FPATAN))
+	      (clobber (match_scratch:XF 3))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations")
+
+(define_expand "atan2<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))
+   (use (match_operand:MODEF 2 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "atanxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_dup 2)
+			       (match_operand:XF 1 "register_operand")]
+			      UNSPEC_FPATAN))
+	      (clobber (match_scratch:XF 3))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "atan<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (op2, CONST1_RTX (<MODE>mode));  /* fld1 */
+
+  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "asinxf2"
+  [(set (match_dup 2)
+	(mult:XF (match_operand:XF 1 "register_operand")
+		 (match_dup 1)))
+   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+   (set (match_dup 5) (sqrt:XF (match_dup 4)))
+   (parallel [(set (match_operand:XF 0 "register_operand")
+        	   (unspec:XF [(match_dup 5) (match_dup 1)]
+			      UNSPEC_FPATAN))
+   	      (clobber (match_scratch:XF 6))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "asin<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_asinxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "acosxf2"
+  [(set (match_dup 2)
+	(mult:XF (match_operand:XF 1 "register_operand")
+		 (match_dup 1)))
+   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+   (set (match_dup 5) (sqrt:XF (match_dup 4)))
+   (parallel [(set (match_operand:XF 0 "register_operand")
+        	   (unspec:XF [(match_dup 1) (match_dup 5)]
+			      UNSPEC_FPATAN))
+   	      (clobber (match_scratch:XF 6))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "acos<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_acosxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fyl2xxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2X))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fyl2x"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fyl2x_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2X))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fyl2x"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "logxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */
+})
+
+(define_expand "log<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "log10xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */
+})
+
+(define_expand "log10<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "log2xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "log2<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fyl2xp1xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2XP1))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fyl2xp1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fyl2xp1_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2XP1))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fyl2xp1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "log1pxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  ix86_emit_i387_log1p (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "log1p<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+
+  operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
+
+  ix86_emit_i387_log1p (op0, operands[1]);
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fxtractxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_XTRACT_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fxtract"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fxtract_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_XTRACT_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fxtract"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "logbxf2"
+  [(parallel [(set (match_dup 2)
+		   (unspec:XF [(match_operand:XF 1 "register_operand")]
+			      UNSPEC_XTRACT_FRACT))
+	      (set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "operands[2] = gen_reg_rtx (XFmode);")
+
+(define_expand "logb<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "ilogbxf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+  DONE;
+})
+
+(define_expand "ilogb<mode>2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+  DONE;
+})
+
+(define_insn "*f2xm1xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_F2XM1))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "f2xm1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fscalexf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FSCALE_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FSCALE_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fscale"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "expNcorexf3"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
+			       (match_operand:XF 2 "register_operand")))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
+   (parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_dup 8) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 9)
+		   (unspec:XF [(match_dup 8) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 3; i < 10; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[7], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "expxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "exp10xf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp10<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_exp10xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "exp2xf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, CONST1_RTX (XFmode));  /* fld1 */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp2<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_exp2xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "expm1xf2"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
+			       (match_dup 2)))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 9) (float_extend:XF (match_dup 13)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (parallel [(set (match_dup 7)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 8)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])
+   (parallel [(set (match_dup 10)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 11)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_EXP))])
+   (set (match_dup 12) (minus:XF (match_dup 10)
+				 (float_extend:XF (match_dup 13))))
+   (set (match_operand:XF 0 "register_operand")
+	(plus:XF (match_dup 12) (match_dup 7)))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 13; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  operands[13]
+    = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */
+
+  emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
+})
+
+(define_expand "expm1<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expm1xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "ldexpxf3"
+  [(match_operand:XF 0 "register_operand")
+   (match_operand:XF 1 "register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  operands[3] = gen_reg_rtx (XFmode);
+  operands[4] = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_floatsixf2 (operands[3], operands[2]));
+  emit_insn (gen_fscalexf4_i387 (operands[0], operands[4],
+                                 operands[1], operands[3]));
+  DONE;
+})
+
+(define_expand "ldexp<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))
+   (use (match_operand:SI 2 "register_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "scalbxf3"
+  [(parallel [(set (match_operand:XF 0 " register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")
+			       (match_operand:XF 2 "register_operand")]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 3)
+		   (unspec:XF [(match_dup 1) (match_dup 2)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  operands[3] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "scalb<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))
+   (use (match_operand:MODEF 2 "general_operand"))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1, op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+  op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_scalbxf3 (op0, op1, op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "significandxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")]
+			      UNSPEC_XTRACT_FRACT))
+	      (set (match_dup 2)
+		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "operands[2] = gen_reg_rtx (XFmode);")
+
+(define_expand "significand<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+
+(define_insn "sse4_1_round<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")
+		       (match_operand:SI 2 "const_0_to_15_operand" "n")]
+		      UNSPEC_ROUND))]
+  "TARGET_ROUND"
+  "%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rintxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_FRNDINT))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "frndint"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "rint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    {
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
+      else if (optimize_insn_for_size_p ())
+        FAIL;
+      else
+	ix86_expand_rint (operands[0], operands[1]);
+    }
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_rintxf2 (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_expand "round<mode>2"
+  [(match_operand:X87MODEF 0 "register_operand")
+   (match_operand:X87MODEF 1 "nonimmediate_operand")]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math && !flag_rounding_math)"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math && !flag_rounding_math)
+    {
+      if (TARGET_ROUND)
+        {
+	  operands[1] = force_reg (<MODE>mode, operands[1]);
+	  ix86_expand_round_sse4 (operands[0], operands[1]);
+	}
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_round (operands[0], operands[1]);
+      else
+	ix86_expand_rounddf_32 (operands[0], operands[1]);
+    }
+  else
+    {
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      ix86_emit_i387_round (operands[0], operands[1]);
+    }
+  DONE;
+})
+
+(define_insn_and_split "*fistdi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(unspec:DI [(match_operand:XF 1 "register_operand")]
+		   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fistdi2 (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+      emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
+					 operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+		   UNSPEC_FIST))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_fix_trunc (insn, operands, false);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(unspec:DI [(match_operand:XF 1 "register_operand")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand"))
+   (clobber (match_scratch 3))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand")
+	(unspec:DI [(match_operand:XF 1 "register_operand")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand"))
+   (clobber (match_scratch 3))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+	      (clobber (match_dup 3))])])
+
+(define_insn_and_split "*fist<mode>2_1"
+  [(set (match_operand:SWI24 0 "register_operand")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
+		      UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+  emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
+					operands[2]));
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2"
+  [(set (match_operand:SWI24 0 "memory_operand" "=m")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
+		      UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_fix_trunc (insn, operands, false);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_with_temp"
+  [(set (match_operand:SWI24 0 "register_operand" "=r")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
+		      UNSPEC_FIST))
+   (clobber (match_operand:SWI24 2 "memory_operand" "=m"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
+		      UNSPEC_FIST))
+   (clobber (match_operand:SWI24 2 "memory_operand"))]
+  "reload_completed"
+  [(set (match_dup 2) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST))
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:SWI24 0 "memory_operand")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
+		      UNSPEC_FIST))
+   (clobber (match_operand:SWI24 2 "memory_operand"))]
+  "reload_completed"
+  [(set (match_dup 0) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST))])
+
+(define_expand "lrintxf<mode>2"
+  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
+     (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
+		     UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387")
+
+(define_expand "lrint<MODEF:mode><SWI48:mode>2"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+     (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
+
+(define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
+  [(match_operand:SWI248x 0 "nonimmediate_operand")
+   (match_operand:X87MODEF 1 "register_operand")]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+       && <SWI248x:MODE>mode != HImode 
+       && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
+       && !flag_trapping_math && !flag_rounding_math)"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+      && <SWI248x:MODE>mode != HImode
+      && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
+      && !flag_trapping_math && !flag_rounding_math)
+    ix86_expand_lround (operands[0], operands[1]);
+  else
+    ix86_emit_i387_round (operands[0], operands[1]);
+  DONE;
+})
+
+(define_int_iterator FRNDINT_ROUNDING
+	[UNSPEC_FRNDINT_FLOOR
+	 UNSPEC_FRNDINT_CEIL
+	 UNSPEC_FRNDINT_TRUNC])
+
+(define_int_iterator FIST_ROUNDING
+	[UNSPEC_FIST_FLOOR
+	 UNSPEC_FIST_CEIL])
+
+;; Base name for define_insn
+(define_int_attr rounding_insn
+	[(UNSPEC_FRNDINT_FLOOR "floor")
+	 (UNSPEC_FRNDINT_CEIL "ceil")
+	 (UNSPEC_FRNDINT_TRUNC "btrunc")
+	 (UNSPEC_FIST_FLOOR "floor")
+	 (UNSPEC_FIST_CEIL "ceil")])
+
+(define_int_attr rounding
+	[(UNSPEC_FRNDINT_FLOOR "floor")
+	 (UNSPEC_FRNDINT_CEIL "ceil")
+	 (UNSPEC_FRNDINT_TRUNC "trunc")
+	 (UNSPEC_FIST_FLOOR "floor")
+	 (UNSPEC_FIST_CEIL "ceil")])
+
+(define_int_attr ROUNDING
+	[(UNSPEC_FRNDINT_FLOOR "FLOOR")
+	 (UNSPEC_FRNDINT_CEIL "CEIL")
+	 (UNSPEC_FRNDINT_TRUNC "TRUNC")
+	 (UNSPEC_FIST_FLOOR "FLOOR")
+	 (UNSPEC_FIST_CEIL "CEIL")])
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_<rounding>"
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(match_operand:XF 1 "register_operand")]
+		   FRNDINT_ROUNDING))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
+
+  emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
+					     operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_<rounding>_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   FRNDINT_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "XF")])
+
+(define_expand "<rounding_insn>xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")]
+			      FRNDINT_ROUNDING))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !optimize_insn_for_size_p ()")
+
+(define_expand "<rounding_insn><mode>2"
+  [(parallel [(set (match_operand:MODEF 0 "register_operand")
+		   (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
+				 FRNDINT_ROUNDING))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    {
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (ROUND_<ROUNDING>)));
+      else if (optimize_insn_for_size_p ())
+	FAIL;
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	{
+	  if (ROUND_<ROUNDING> == ROUND_FLOOR)
+	    ix86_expand_floorceil (operands[0], operands[1], true);
+	  else if (ROUND_<ROUNDING> == ROUND_CEIL)
+	    ix86_expand_floorceil (operands[0], operands[1], false);
+	  else if (ROUND_<ROUNDING> == ROUND_TRUNC)
+	    ix86_expand_trunc (operands[0], operands[1]);
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	{
+	  if (ROUND_<ROUNDING> == ROUND_FLOOR)
+	    ix86_expand_floorceildf_32 (operands[0], operands[1], true);
+	  else if (ROUND_<ROUNDING> == ROUND_CEIL)
+	    ix86_expand_floorceildf_32 (operands[0], operands[1], false);
+	  else if (ROUND_<ROUNDING> == ROUND_TRUNC)
+	    ix86_expand_truncdf_32 (operands[0], operands[1]);
+	  else
+	    gcc_unreachable ();
+	}
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_<rounding> (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_mask_pm"
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(match_operand:XF 1 "register_operand")]
+		   UNSPEC_FRNDINT_MASK_PM))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
+
+  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
+					  operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_mask_pm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_FRNDINT_MASK_PM))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_expand "nearbyintxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand")
+		   (unspec:XF [(match_operand:XF 1 "register_operand")]
+			      UNSPEC_FRNDINT_MASK_PM))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations")
+
+(define_expand "nearbyint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "*fist<mode>2_<rounding>_1"
+  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
+	(unspec:SWI248x [(match_operand:XF 1 "register_operand")]
+			FIST_ROUNDING))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
+					   operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_<rounding>_with_temp
+		  (operands[0], operands[1], operands[2],
+		   operands[3], operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_<rounding>"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+		   FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, false);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_<rounding>_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+		   FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(unspec:DI [(match_operand:XF 1 "register_operand")]
+		   FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:DI 4 "memory_operand"))
+   (clobber (match_scratch 5))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4)
+		   (unspec:DI [(match_dup 1)] FIST_ROUNDING))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand")
+	(unspec:DI [(match_operand:XF 1 "register_operand")]
+		   FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:DI 4 "memory_operand"))
+   (clobber (match_scratch 5))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (unspec:DI [(match_dup 1)] FIST_ROUNDING))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])])
+
+(define_insn "fist<mode>2_<rounding>"
+  [(set (match_operand:SWI24 0 "memory_operand" "=m")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
+		      FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, false);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_<rounding>_with_temp"
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f,f")]
+		      FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "<rounding>")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
+		      FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:SWI24 4 "memory_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4)
+		   (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:SWI24 0 "memory_operand")
+	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
+		      FIST_ROUNDING))
+   (use (match_operand:HI 2 "memory_operand"))
+   (use (match_operand:HI 3 "memory_operand"))
+   (clobber (match_operand:SWI24 4 "memory_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])])
+
+(define_expand "l<rounding_insn>xf<mode>2"
+  [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
+		   (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
+				   FIST_ROUNDING))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations")
+
+(define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
+  [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
+		   (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
+				 FIST_ROUNDING))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  if (TARGET_64BIT && optimize_insn_for_size_p ())
+    FAIL;
+
+  if (ROUND_<ROUNDING> == ROUND_FLOOR)
+    ix86_expand_lfloorceil (operands[0], operands[1], true);
+  else if (ROUND_<ROUNDING> == ROUND_CEIL)
+    ix86_expand_lfloorceil (operands[0], operands[1], false);
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+(define_insn "fxam<mode>2_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:X87MODEF 1 "register_operand" "f")]
+	  UNSPEC_FXAM))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fxam\n\tfnstsw\t%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "fxam<mode>2_i387_with_temp"
+  [(set (match_operand:HI 0 "register_operand")
+	(unspec:HI
+	  [(match_operand:MODEF 1 "memory_operand")]
+	  UNSPEC_FXAM_MEM))]
+  "TARGET_USE_FANCY_MATH_387
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)(match_dup 1))
+   (set (match_dup 0)
+	(unspec:HI [(match_dup 2)] UNSPEC_FXAM))]
+{
+  operands[2] = gen_reg_rtx (<MODE>mode);
+
+  MEM_VOLATILE_P (operands[1]) = 1;
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "isinfxf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && ix86_libc_has_function (function_c99_misc)"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
+(define_expand "isinf<mode>2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && ix86_libc_has_function (function_c99_misc)
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  /* Remove excess precision by forcing value through memory. */
+  if (memory_operand (operands[1], VOIDmode))
+    emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1]));
+  else
+    {
+      rtx temp = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+
+      emit_move_insn (temp, operands[1]);
+      emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp));
+    }
+
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
+(define_expand "signbitxf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0],
+	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+  DONE;
+})
+
+(define_insn "movmsk_df"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:DF 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
+  "%vmovmskpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+;; Use movmskpd in SSE mode to avoid store forwarding stall
+;; for 32bit targets and movq+shrq sequence for 64bit targets.
+(define_expand "signbitdf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:DF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
+    {
+      emit_insn (gen_movmsk_df (operands[0], operands[1]));
+      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+    }
+  else
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+
+      emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
+      emit_insn (gen_andsi3 (operands[0],
+		 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+    }
+  DONE;
+})
+
+(define_expand "signbitsf2"
+  [(use (match_operand:SI 0 "register_operand"))
+   (use (match_operand:SF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
+{
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0],
+	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+  DONE;
+})
+
+;; Block operation instructions
+
+(define_insn "cld"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
+  ""
+  "cld"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+(define_expand "movmem<mode>"
+  [(use (match_operand:BLK 0 "memory_operand"))
+   (use (match_operand:BLK 1 "memory_operand"))
+   (use (match_operand:SWI48 2 "nonmemory_operand"))
+   (use (match_operand:SWI48 3 "const_int_operand"))
+   (use (match_operand:SI 4 "const_int_operand"))
+   (use (match_operand:SI 5 "const_int_operand"))
+   (use (match_operand:SI 6 ""))
+   (use (match_operand:SI 7 ""))
+   (use (match_operand:SI 8 ""))]
+  ""
+{
+ if (ix86_expand_set_or_movmem (operands[0], operands[1],
+			        operands[2], NULL, operands[3],
+			        operands[4], operands[5],
+				operands[6], operands[7],
+				operands[8], false))
+   DONE;
+ else
+   FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strmov"
+  [(set (match_dup 4) (match_operand 3 "memory_operand"))
+   (set (match_operand 1 "memory_operand") (match_dup 4))
+   (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));
+
+  /* If .md ever supports :P for Pmode, these can be directly
+     in the pattern above.  */
+  operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
+  operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
+
+  /* Can't use this if the user has appropriated esi or edi.  */
+  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+    {
+      emit_insn (gen_strmov_singleop (operands[0], operands[1],
+				      operands[2], operands[3],
+				      operands[5], operands[6]));
+      DONE;
+    }
+
+  operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
+})
+
+(define_expand "strmov_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand")
+		   (match_operand 3 "memory_operand"))
+	      (set (match_operand 0 "register_operand")
+		   (match_operand 4))
+	      (set (match_operand 2 "register_operand")
+		   (match_operand 5))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strmovdi_rex_1"
+  [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
+	(mem:DI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 8)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 8)))]
+  "TARGET_64BIT
+   && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^movsq"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "DI")])
+
+(define_insn "*strmovsi_1"
+  [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
+	(mem:SI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 4)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 4)))]
+  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*strmovhi_1"
+  [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
+	(mem:HI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 2)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 2)))]
+  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^movsw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strmovqi_1"
+  [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
+	(mem:QI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 1)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 1)))]
+  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^movsb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<P:MODE>mode == DImode")
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_expand "rep_mov"
+  [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
+	      (set (match_operand 0 "register_operand")
+		   (match_operand 5))
+	      (set (match_operand 2 "register_operand")
+		   (match_operand 6))
+	      (set (match_operand 1 "memory_operand")
+		   (match_operand 3 "memory_operand"))
+	      (use (match_dup 4))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_movdi_rex64"
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
+			  (const_int 3))
+		(match_operand:P 3 "register_operand" "0")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (ashift:P (match_dup 5) (const_int 3))
+		(match_operand:P 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "TARGET_64BIT
+   && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^rep{%;} movsq"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "DI")])
+
+(define_insn "*rep_movsi"
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
+			  (const_int 2))
+		 (match_operand:P 3 "register_operand" "0")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (ashift:P (match_dup 5) (const_int 2))
+		(match_operand:P 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^rep{%;} movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi"
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (match_operand:P 3 "register_operand" "0")
+		(match_operand:P 5 "register_operand" "2")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^rep{%;} movsb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "QI")])
+
+(define_expand "setmem<mode>"
+   [(use (match_operand:BLK 0 "memory_operand"))
+    (use (match_operand:SWI48 1 "nonmemory_operand"))
+    (use (match_operand:QI 2 "nonmemory_operand"))
+    (use (match_operand 3 "const_int_operand"))
+    (use (match_operand:SI 4 "const_int_operand"))
+    (use (match_operand:SI 5 "const_int_operand"))
+    (use (match_operand:SI 6 ""))
+    (use (match_operand:SI 7 ""))
+    (use (match_operand:SI 8 ""))]
+  ""
+{
+ if (ix86_expand_set_or_movmem (operands[0], NULL,
+			        operands[1], operands[2],
+				operands[3], operands[4],
+			        operands[5], operands[6],
+				operands[7], operands[8], true))
+   DONE;
+ else
+   FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strset"
+  [(set (match_operand 1 "memory_operand")
+	(match_operand 2 "register_operand"))
+   (parallel [(set (match_operand 0 "register_operand")
+		   (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
+    operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
+
+  /* If .md ever supports :P for Pmode, this can be directly
+     in the pattern above.  */
+  operands[3] = gen_rtx_PLUS (Pmode, operands[0],
+			      GEN_INT (GET_MODE_SIZE (GET_MODE
+						      (operands[2]))));
+  /* Can't use this if the user has appropriated eax or edi.  */
+  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+      && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
+    {
+      emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
+				      operands[3]));
+      DONE;
+    }
+})
+
+(define_expand "strset_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand")
+		   (match_operand 2 "register_operand"))
+	      (set (match_operand 0 "register_operand")
+		   (match_operand 3))
+	      (unspec [(const_int 0)] UNSPEC_STOS)])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strsetdi_rex_1"
+  [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:DI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 8)))
+   (unspec [(const_int 0)] UNSPEC_STOS)]
+  "TARGET_64BIT
+   && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "%^stosq"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*strsetsi_1"
+  [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 4)))
+   (unspec [(const_int 0)] UNSPEC_STOS)]
+  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "%^stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*strsethi_1"
+  [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:HI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 2)))
+   (unspec [(const_int 0)] UNSPEC_STOS)]
+  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "%^stosw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strsetqi_1"
+  [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:QI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 1)))
+   (unspec [(const_int 0)] UNSPEC_STOS)]
+  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "%^stosb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<P:MODE>mode == DImode")
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_expand "rep_stos"
+  [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
+	      (set (match_operand 0 "register_operand")
+		   (match_operand 4))
+	      (set (match_operand 2 "memory_operand") (const_int 0))
+	      (use (match_operand 3 "register_operand"))
+	      (use (match_dup 1))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_stosdi_rex64"
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
+			  (const_int 3))
+		 (match_operand:P 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:DI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "TARGET_64BIT
+   && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "%^rep{%;} stosq"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*rep_stossi"
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
+			  (const_int 2))
+		 (match_operand:P 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:SI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "%^rep{%;} stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_stosqi"
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (match_operand:P 3 "register_operand" "0")
+		(match_operand:P 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:QI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "%^rep{%;} stosb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<P:MODE>mode == DImode")
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI 0 "register_operand")
+	(compare:SI (match_operand:BLK 1 "general_operand")
+		    (match_operand:BLK 2 "general_operand")))
+   (use (match_operand 3 "general_operand"))
+   (use (match_operand 4 "immediate_operand"))]
+  ""
+{
+  rtx addr1, addr2, out, outlow, count, countreg, align;
+
+  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+    FAIL;
+
+  /* Can't use this if the user has appropriated ecx, esi or edi.  */
+  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
+    FAIL;
+
+  out = operands[0];
+  if (!REG_P (out))
+    out = gen_reg_rtx (SImode);
+
+  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
+  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
+  if (addr1 != XEXP (operands[1], 0))
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  if (addr2 != XEXP (operands[2], 0))
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+
+  count = operands[3];
+  countreg = ix86_zero_extend_to_Pmode (count);
+
+  /* %%% Iff we are testing strict equality, we can use known alignment
+     to good advantage.  This may be possible with combine, particularly
+     once cc0 is dead.  */
+  align = operands[4];
+
+  if (CONST_INT_P (count))
+    {
+      if (INTVAL (count) == 0)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
+				     operands[1], operands[2]));
+    }
+  else
+    {
+      rtx (*gen_cmp) (rtx, rtx);
+
+      gen_cmp = (TARGET_64BIT
+		 ? gen_cmpdi_1 : gen_cmpsi_1);
+
+      emit_insn (gen_cmp (countreg, countreg));
+      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
+				  operands[1], operands[2]));
+    }
+
+  outlow = gen_lowpart (QImode, out);
+  emit_insn (gen_cmpintqi (outlow));
+  emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
+
+  if (operands[0] != out)
+    emit_move_insn (operands[0], out);
+
+  DONE;
+})
+
+;; Produce a tri-state integer (-1, 0, 1) from condition codes.
+
+(define_expand "cmpintqi"
+  [(set (match_dup 1)
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_dup 2)
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (parallel [(set (match_operand:QI 0 "register_operand")
+		   (minus:QI (match_dup 1)
+			     (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  operands[1] = gen_reg_rtx (QImode);
+  operands[2] = gen_reg_rtx (QImode);
+})
+
+;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
+;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
+
+(define_expand "cmpstrnqi_nz_1"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_operand 4 "memory_operand")
+			       (match_operand 5 "memory_operand")))
+	      (use (match_operand 2 "register_operand"))
+	      (use (match_operand:SI 3 "immediate_operand"))
+	      (clobber (match_operand 0 "register_operand"))
+	      (clobber (match_operand 1 "register_operand"))
+	      (clobber (match_dup 2))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_nz_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
+		    (mem:BLK (match_operand:P 5 "register_operand" "1"))))
+   (use (match_operand:P 6 "register_operand" "2"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:P 0 "register_operand" "=S"))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (match_operand:P 2 "register_operand" "=c"))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^repz{%;} cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<P:MODE>mode == DImode")
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "prefix_rep" "1")])
+
+;; The same, but the count is not known to not be zero.
+
+(define_expand "cmpstrnqi_1"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		(if_then_else:CC (ne (match_operand 2 "register_operand")
+				     (const_int 0))
+		  (compare:CC (match_operand 4 "memory_operand")
+			      (match_operand 5 "memory_operand"))
+		  (const_int 0)))
+	      (use (match_operand:SI 3 "immediate_operand"))
+	      (use (reg:CC FLAGS_REG))
+	      (clobber (match_operand 0 "register_operand"))
+	      (clobber (match_operand 1 "register_operand"))
+	      (clobber (match_dup 2))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_1"
+  [(set (reg:CC FLAGS_REG)
+	(if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
+			     (const_int 0))
+	  (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
+		      (mem:BLK (match_operand:P 5 "register_operand" "1")))
+	  (const_int 0)))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (use (reg:CC FLAGS_REG))
+   (clobber (match_operand:P 0 "register_operand" "=S"))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (match_operand:P 2 "register_operand" "=c"))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "%^repz{%;} cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<P:MODE>mode == DImode")
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "prefix_rep" "1")])
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:P 0 "register_operand")
+	(unspec:P [(match_operand:BLK 1 "general_operand")
+		   (match_operand:QI 2 "immediate_operand")
+		   (match_operand 3 "immediate_operand")]
+		  UNSPEC_SCAS))]
+  ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+   DONE;
+ else
+   FAIL;
+})
+
+(define_expand "strlenqi_1"
+  [(parallel [(set (match_operand 0 "register_operand")
+		   (match_operand 2))
+	      (clobber (match_operand 1 "register_operand"))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strlenqi_1"
+  [(set (match_operand:P 0 "register_operand" "=&c")
+	(unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
+		   (match_operand:QI 2 "register_operand" "a")
+		   (match_operand:P 3 "immediate_operand" "i")
+		   (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "%^repnz{%;} scasb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<P:MODE>mode == DImode")
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "prefix_rep" "1")])
+
+;; Peephole optimizations to clean up after cmpstrn*.  This should be
+;; handled in combine, but it is not currently up to the task.
+;; When used for their truth value, the cmpstrn* expanders generate
+;; code like this:
+;;
+;;   repz cmpsb
+;;   seta 	%al
+;;   setb 	%dl
+;;   cmpb 	%al, %dl
+;;   jcc	label
+;;
+;; The intermediate three instructions are unnecessary.
+
+;; This one handles cmpstrn*_nz_1...
+(define_peephole2
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (compare:CC (mem:BLK (match_operand 4 "register_operand"))
+		      (mem:BLK (match_operand 5 "register_operand"))))
+     (use (match_operand 6 "register_operand"))
+     (use (match_operand:SI 3 "immediate_operand"))
+     (clobber (match_operand 0 "register_operand"))
+     (clobber (match_operand 1 "register_operand"))
+     (clobber (match_operand 2 "register_operand"))])
+   (set (match_operand:QI 7 "register_operand")
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_operand:QI 8 "register_operand")
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (reg FLAGS_REG)
+	(compare (match_dup 7) (match_dup 8)))
+  ]
+  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (compare:CC (mem:BLK (match_dup 4))
+		      (mem:BLK (match_dup 5))))
+     (use (match_dup 6))
+     (use (match_dup 3))
+     (clobber (match_dup 0))
+     (clobber (match_dup 1))
+     (clobber (match_dup 2))])])
+
+;; ...and this one handles cmpstrn*_1.
+(define_peephole2
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (if_then_else:CC (ne (match_operand 6 "register_operand")
+			       (const_int 0))
+	    (compare:CC (mem:BLK (match_operand 4 "register_operand"))
+		        (mem:BLK (match_operand 5 "register_operand")))
+	    (const_int 0)))
+     (use (match_operand:SI 3 "immediate_operand"))
+     (use (reg:CC FLAGS_REG))
+     (clobber (match_operand 0 "register_operand"))
+     (clobber (match_operand 1 "register_operand"))
+     (clobber (match_operand 2 "register_operand"))])
+   (set (match_operand:QI 7 "register_operand")
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_operand:QI 8 "register_operand")
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (reg FLAGS_REG)
+	(compare (match_dup 7) (match_dup 8)))
+  ]
+  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (if_then_else:CC (ne (match_dup 6)
+			       (const_int 0))
+	    (compare:CC (mem:BLK (match_dup 4))
+			(mem:BLK (match_dup 5)))
+	    (const_int 0)))
+     (use (match_dup 3))
+     (use (reg:CC FLAGS_REG))
+     (clobber (match_dup 0))
+     (clobber (match_dup 1))
+     (clobber (match_dup 2))])])
+
+;; Conditional move instructions.
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:SWIM 0 "register_operand")
+	(if_then_else:SWIM (match_operand 1 "comparison_operator")
+			   (match_operand:SWIM 2 "<general_operand>")
+			   (match_operand:SWIM 3 "<general_operand>")))]
+  ""
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
+;; the register first winds up with `sbbl $0,reg', which is also weird.
+;; So just document what we're doing explicitly.
+
+(define_expand "x86_mov<mode>cc_0_m1"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (if_then_else:SWI48
+	    (match_operator:SWI48 2 "ix86_carry_flag_operator"
+	     [(match_operand 1 "flags_reg_operand")
+	      (const_int 0)])
+	    (const_int -1)
+	    (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*x86_mov<mode>cc_0_m1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
+			     [(reg FLAGS_REG) (const_int 0)])
+	  (const_int -1)
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
+  ; Since we don't have the proper number of operands for an alu insn,
+  ; fill in all the blanks.
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_mov<mode>cc_0_m1_se"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
+			     [(reg FLAGS_REG) (const_int 0)])
+			    (const_int 1)
+			    (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_mov<mode>cc_0_m1_neg"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(neg:SWI48 (match_operator 1 "ix86_carry_flag_operator"
+		    [(reg FLAGS_REG) (const_int 0)])))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>cc_noc"
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
+	  (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
+  "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "<MODE>")])
+
+;; Don't do conditional moves with memory inputs.  This splitter helps
+;; register starved x86_32 by forcing inputs into registers before reload.
+(define_split
+  [(set (match_operand:SWI248 0 "register_operand")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "nonimmediate_operand")
+	  (match_operand:SWI248 3 "nonimmediate_operand")))]
+  "!TARGET_64BIT && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && (MEM_P (operands[2]) || MEM_P (operands[3]))
+   && can_create_pseudo_p ()
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 0)
+	(if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+  if (MEM_P (operands[2]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  if (MEM_P (operands[3]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+})
+
+(define_insn "*movqicc_noc"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
+			   [(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:QI 2 "register_operand" "r,0")
+		      (match_operand:QI 3 "register_operand" "0,r")))]
+  "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "QI")])
+
+(define_split
+  [(set (match_operand:SWI12 0 "register_operand")
+	(if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
+			      [(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:SWI12 2 "register_operand")
+		      (match_operand:SWI12 3 "register_operand")))]
+  "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
+   && reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+})
+
+;; Don't do conditional moves with memory inputs
+(define_peephole2
+  [(match_scratch:SWI248 2 "r")
+   (set (match_operand:SWI248 0 "register_operand")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_dup 0)
+	  (match_operand:SWI248 3 "memory_operand")))]
+  "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))])
+
+(define_peephole2
+  [(match_scratch:SWI248 2 "r")
+   (set (match_operand:SWI248 0 "register_operand")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 3 "memory_operand")
+	  (match_dup 0)))]
+  "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))])
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:X87MODEF 0 "register_operand")
+	(if_then_else:X87MODEF
+	  (match_operand 1 "comparison_operator")
+	  (match_operand:X87MODEF 2 "register_operand")
+	  (match_operand:X87MODEF 3 "register_operand")))]
+  "(TARGET_80387 && TARGET_CMOVE)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "*movxfcc_1"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:XF 2 "register_operand" "f,0")
+		      (match_operand:XF 3 "register_operand" "0,f")))]
+  "TARGET_80387 && TARGET_CMOVE"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov")
+   (set_attr "mode" "XF")])
+
+(define_insn "*movdfcc_1"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand"
+					       "f ,0,rm,0 ,rm,0")
+		      (match_operand:DF 3 "nonimmediate_operand"
+					       "0 ,f,0 ,rm,0, rm")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   #
+   #
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
+   (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
+   (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand")
+		      (match_operand:DF 3 "nonimmediate_operand")))]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
+   (set (match_dup 3)
+	(if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
+{
+  split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
+  split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
+})
+
+(define_insn "*movsfcc_1_387"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+   (set_attr "mode" "SF,SF,SI,SI")])
+
+;; Don't do conditional moves with memory inputs.  This splitter helps
+;; register starved x86_32 by forcing inputs into registers before reload.
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+	(if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
+			      [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:MODEF 2 "nonimmediate_operand")
+	  (match_operand:MODEF 3 "nonimmediate_operand")))]
+  "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && (MEM_P (operands[2]) || MEM_P (operands[3]))
+   && can_create_pseudo_p ()
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 0)
+	(if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+  if (MEM_P (operands[2]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  if (MEM_P (operands[3]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+})
+
+;; Don't do conditional moves with memory inputs
+(define_peephole2
+  [(match_scratch:MODEF 2 "r")
+   (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+	(if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
+			      [(reg FLAGS_REG) (const_int 0)])
+	  (match_dup 0)
+	  (match_operand:MODEF 3 "memory_operand")))]
+  "(<MODE>mode != DFmode || TARGET_64BIT)
+   && TARGET_80387 && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))])
+
+(define_peephole2
+  [(match_scratch:MODEF 2 "r")
+   (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+	(if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
+			      [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:MODEF 3 "memory_operand")
+	  (match_dup 0)))]
+  "(<MODE>mode != DFmode || TARGET_64BIT)
+   && TARGET_80387 && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))])
+
+;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; XOP conditional move
+(define_insn "*xop_pcmov_<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(if_then_else:MODEF
+	  (match_operand:MODEF 1 "register_operand" "x")
+	  (match_operand:MODEF 2 "register_operand" "x")
+	  (match_operand:MODEF 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+  [(set_attr "type" "sse4arg")])
+
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
+	(smaxmin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "@
+   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
+   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_int_iterator IEEE_MAXMIN
+	[UNSPEC_IEEE_MAX
+	 UNSPEC_IEEE_MIN])
+
+(define_int_attr ieee_maxmin
+	[(UNSPEC_IEEE_MAX "max")
+	 (UNSPEC_IEEE_MIN "min")])
+
+(define_insn "*ieee_s<ieee_maxmin><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]
+	  IEEE_MAXMIN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "@
+   <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
+   v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+;; Make two stack loads independent:
+;;   fld aa              fld aa
+;;   fld %st(0)     ->   fld bb
+;;   fmul bb             fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+  [(set (match_operand 0 "fp_register_operand")
+	(match_operand 1 "fp_register_operand"))
+   (set (match_dup 0)
+	(match_operator 2 "binary_fp_operator"
+	   [(match_dup 0)
+	    (match_operand 3 "memory_operand")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+
+  ;; The % modifier is not operational anymore in peephole2's, so we have to
+  ;; swap the operands manually in the case of addition and multiplication.
+{
+  rtx op0, op1;
+
+  if (COMMUTATIVE_ARITH_P (operands[2]))
+    op0 = operands[0], op1 = operands[1];
+  else
+    op0 = operands[1], op1 = operands[0];
+
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]),
+				GET_MODE (operands[2]),
+				op0, op1);
+})
+
+;; Conditional addition patterns
+(define_expand "add<mode>cc"
+  [(match_operand:SWI 0 "register_operand")
+   (match_operand 1 "ordered_comparison_operator")
+   (match_operand:SWI 2 "register_operand")
+   (match_operand:SWI 3 "const_int_operand")]
+  ""
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
+
+;; Misc patterns (?)
+
+;; This pattern exists to put a dependency on all ebp-based memory accesses.
+;; Otherwise there will be nothing to keep
+;;
+;; [(set (reg ebp) (reg esp))]
+;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
+;;  (clobber (eflags)]
+;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
+;;
+;; in proper program order.
+
+(define_insn "pro_epilogue_adjust_stack_<mode>_add"
+  [(set (match_operand:P 0 "register_operand" "=r,r")
+	(plus:P (match_operand:P 1 "register_operand" "0,r")
+	        (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOV:
+      return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+    case TYPE_ALU:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+    default:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
+    }
+}
+  [(set (attr "type")
+	(cond [(and (eq_attr "alternative" "0")
+		    (not (match_test "TARGET_OPT_AGU")))
+		 (const_string "alu")
+	       (match_operand:<MODE> 2 "const0_operand")
+		 (const_string "imov")
+	      ]
+	      (const_string "lea")))
+   (set (attr "length_immediate")
+	(cond [(eq_attr "type" "imov")
+		 (const_string "0")
+	       (and (eq_attr "type" "alu")
+		    (match_operand 2 "const128_operand"))
+		 (const_string "1")
+	      ]
+	      (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "pro_epilogue_adjust_stack_<mode>_sub"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(minus:P (match_operand:P 1 "register_operand" "0")
+		 (match_operand:P 2 "register_operand" "r")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "allocate_stack_worker_probe_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
+			    UNSPECV_STACK_PROBE))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_target_stack_probe ()"
+  "call\t___chkstk_ms"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5")])
+
+(define_expand "allocate_stack"
+  [(match_operand 0 "register_operand")
+   (match_operand 1 "general_operand")]
+  "ix86_target_stack_probe ()"
+{
+  rtx x;
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT 0
+#endif
+
+  if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
+      && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
+    x = operands[1];
+  else
+    {
+      rtx (*insn) (rtx, rtx);
+
+      x = copy_to_mode_reg (Pmode, operands[1]);
+
+      insn = (TARGET_64BIT
+	      ? gen_allocate_stack_worker_probe_di
+	      : gen_allocate_stack_worker_probe_si);
+
+      emit_insn (insn (x, x));
+    }
+
+  x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
+			   stack_pointer_rtx, 0, OPTAB_DIRECT);
+
+  if (x != stack_pointer_rtx)
+    emit_move_insn (stack_pointer_rtx, x);
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+})
+
+;; Use IOR for stack probes, this is shorter.
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand")]
+  ""
+{
+  rtx (*gen_ior3) (rtx, rtx, rtx);
+
+  gen_ior3 = (GET_MODE (operands[0]) == DImode
+	      ? gen_iordi3 : gen_iorsi3);
+
+  emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx));
+  DONE;
+})
+
+(define_insn "adjust_stack_and_probe<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
+			    UNSPECV_PROBE_STACK_RANGE))
+   (set (reg:P SP_REG)
+        (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "* return output_adjust_stack_and_probe (operands[0]);"
+  [(set_attr "type" "multi")])
+
+(define_insn "probe_stack_range<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "const_int_operand" "n")]
+			    UNSPECV_PROBE_STACK_RANGE))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "multi")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0))]
+  "!TARGET_64BIT && flag_pic"
+{
+#if TARGET_MACHO
+  if (TARGET_MACHO)
+    {
+      rtx xops[3];
+      rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+      rtx label_rtx = gen_label_rtx ();
+      emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+      xops[0] = xops[1] = picreg;
+      xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
+      ix86_expand_binary_operator (MINUS, SImode, xops);
+    }
+  else
+#endif
+    emit_insn (gen_set_got (pic_offset_table_rtx));
+  DONE;
+})
+
+(define_insn_and_split "nonlocal_goto_receiver"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NLGR)]
+  "TARGET_MACHO && !TARGET_64BIT && flag_pic"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  if (crtl->uses_pic_offset_table)
+    {
+      rtx xops[3];
+      rtx label_rtx = gen_label_rtx ();
+      rtx tmp;
+
+      /* Get a new pic base.  */
+      emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+      /* Correct this with the offset from the new to the old.  */
+      xops[0] = xops[1] = pic_offset_table_rtx;
+      label_rtx = gen_rtx_LABEL_REF (SImode, label_rtx);
+      tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, label_rtx),
+			    UNSPEC_MACHOPIC_OFFSET);
+      xops[2] = gen_rtx_CONST (Pmode, tmp);
+      ix86_expand_binary_operator (MINUS, SImode, xops);
+    }
+  else
+    /* No pic reg restore needed.  */
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+})
+
+;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
+;; Do not split instructions with mask registers.
+(define_split
+  [(set (match_operand 0 "general_reg_operand")
+	(match_operator 3 "promotable_binary_operator"
+	   [(match_operand 1 "general_reg_operand")
+	    (match_operand 2 "aligned_operand")]))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && ((GET_MODE (operands[0]) == HImode
+	&& ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
+            /* ??? next two lines just !satisfies_constraint_K (...) */
+	    || !CONST_INT_P (operands[2])
+	    || satisfies_constraint_K (operands[2])))
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  if (GET_CODE (operands[3]) != ASHIFT)
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  PUT_MODE (operands[3], SImode);
+})
+
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 2 "compare_operator"
+	  [(and (match_operand 3 "aligned_operand")
+		(match_operand 4 "const_int_operand"))
+	   (const_int 0)]))
+   (set (match_operand 1 "register_operand")
+	(and (match_dup 3) (match_dup 4)))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && optimize_insn_for_speed_p ()
+   && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
+       || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
+			            (const_int 0)]))
+	      (set (match_dup 1)
+		   (and:SI (match_dup 3) (match_dup 4)))])]
+{
+  operands[4]
+    = gen_int_mode (INTVAL (operands[4])
+		    & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+})
+
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand:HI 2 "aligned_operand")
+		(match_operand:HI 3 "const_int_operand"))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && ! TARGET_FAST_PREFIX
+   && optimize_insn_for_speed_p ()
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
+  [(set (match_dup 0)
+	(match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+		         (const_int 0)]))]
+{
+  operands[3]
+    = gen_int_mode (INTVAL (operands[3])
+		    & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(neg (match_operand 1 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(parallel [(set (match_dup 0)
+		   (neg:SI (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+})
+
+;; Do not split instructions with mask regs.
+(define_split
+  [(set (match_operand 0 "general_reg_operand")
+	(not (match_operand 1 "general_reg_operand")))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(set (match_dup 0)
+	(not:SI (match_dup 1)))]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+})
+
+;; RTL Peephole optimizations, run before sched2.  These primarily look to
+;; transform a complex memory operation into two memory to register operations.
+
+;; Don't push memory operands
+(define_peephole2
+  [(set (match_operand:SWI 0 "push_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (match_scratch:SWI 2 "<r>")]
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; We need to handle SFmode only, because DFmode and XFmode are split to
+;; SImode pushes.
+(define_peephole2
+  [(set (match_operand:SF 0 "push_operand")
+	(match_operand:SF 1 "memory_operand"))
+   (match_scratch:SF 2 "r")]
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; Don't move an immediate directly to memory when the instruction
+;; gets too big, or if LCP stalls are a problem for 16-bit moves.
+(define_peephole2
+  [(match_scratch:SWI124 1 "<r>")
+   (set (match_operand:SWI124 0 "memory_operand")
+        (const_int 0))]
+  "optimize_insn_for_speed_p ()
+   && ((<MODE>mode == HImode
+       && TARGET_LCP_STALL)
+       || (!TARGET_USE_MOV0
+          && TARGET_SPLIT_LONG_MOVES
+          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 2) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 1))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+  [(match_scratch:SWI124 2 "<r>")
+   (set (match_operand:SWI124 0 "memory_operand")
+        (match_operand:SWI124 1 "immediate_operand"))]
+  "optimize_insn_for_speed_p ()
+   && ((<MODE>mode == HImode
+       && TARGET_LCP_STALL)
+       || (TARGET_SPLIT_LONG_MOVES
+          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; Don't compare memory with zero, load and use a test instead.
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand")
+ 	(match_operator 1 "compare_operator"
+	  [(match_operand:SI 2 "memory_operand")
+	   (const_int 0)]))
+   (match_scratch:SI 3 "r")]
+  "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
+
+;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
+;; Don't split NOTs with a displacement operand, because resulting XOR
+;; will not be pairable anyway.
+;;
+;; On AMD K6, NOT is vector decoded with memory operand that cannot be
+;; represented using a modRM byte.  The XOR replacement is long decoded,
+;; so this split helps here as well.
+;;
+;; Note: Can't do this as a regular split because we can't get proper
+;; lifetime information then.
+
+(define_peephole2
+  [(set (match_operand:SWI124 0 "nonimmediate_operand")
+	(not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand")))]
+  "optimize_insn_for_speed_p ()
+   && ((TARGET_NOT_UNPAIRABLE
+	&& (!MEM_P (operands[0])
+	    || !memory_displacement_operand (operands[0], <MODE>mode)))
+       || (TARGET_NOT_VECTORMODE
+	   && long_memory_operand (operands[0], <MODE>mode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (xor:SWI124 (match_dup 1) (const_int -1)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Non pairable "test imm, reg" instructions can be translated to
+;; "and imm, reg" if reg dies.  The "and" form is also shorter (one
+;; byte opcode instead of two, have a short form for byte operands),
+;; so do it for other CPUs as well.  Given that the value was dead,
+;; this should not create any new dependencies.  Pass on the sub-word
+;; versions if we're concerned about partial register stalls.
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(and:SI (match_operand:SI 2 "register_operand")
+		   (match_operand:SI 3 "immediate_operand"))
+	   (const_int 0)]))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && (true_regnum (operands[2]) != AX_REG
+       || satisfies_constraint_K (operands[3]))
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel
+     [(set (match_dup 0)
+	   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+		            (const_int 0)]))
+      (set (match_dup 2)
+	   (and:SI (match_dup 2) (match_dup 3)))])])
+
+;; We don't need to handle HImode case, because it will be promoted to SImode
+;; on ! TARGET_PARTIAL_REG_STALL
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(and:QI (match_operand:QI 2 "register_operand")
+		   (match_operand:QI 3 "immediate_operand"))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL
+   && ix86_match_ccmode (insn, CCNOmode)
+   && true_regnum (operands[2]) != AX_REG
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel
+     [(set (match_dup 0)
+	   (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+		            (const_int 0)]))
+      (set (match_dup 2)
+	   (and:QI (match_dup 2) (match_dup 3)))])])
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(and:SI
+	     (zero_extract:SI
+	       (match_operand 2 "ext_register_operand")
+	       (const_int 8)
+	       (const_int 8))
+	     (match_operand 3 "const_int_operand"))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL
+   && ix86_match_ccmode (insn, CCNOmode)
+   && true_regnum (operands[2]) != AX_REG
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 1
+		     [(and:SI
+			(zero_extract:SI
+			  (match_dup 2)
+			  (const_int 8)
+			  (const_int 8))
+			(match_dup 3))
+		      (const_int 0)]))
+	      (set (zero_extract:SI (match_dup 2)
+				    (const_int 8)
+				    (const_int 8))
+		   (and:SI
+		     (zero_extract:SI
+		       (match_dup 2)
+		       (const_int 8)
+		       (const_int 8))
+		     (match_dup 3)))])])
+
+;; Don't do logical operations with memory inputs.
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 1 "memory_operand")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
+              (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_operand:SI 1 "memory_operand")
+                      (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])])
+
+;; Prefer Load+RegOp to Mov+MemOp.  Watch out for cases when the memory address
+;; refers to the destination of the load!
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "register_operand"))
+   (parallel [(set (match_dup 0)
+                   (match_operator:SI 3 "commutative_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 2 "memory_operand")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "REGNO (operands[0]) != REGNO (operands[1])
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 0) (match_dup 4))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);")
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+        (match_operand 1 "register_operand"))
+   (set (match_dup 0)
+                   (match_operator 3 "commutative_operator"
+                     [(match_dup 0)
+                      (match_operand 2 "memory_operand")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])
+   && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) 
+       || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0)
+        (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
+
+; Don't do logical operations with memory outputs
+;
+; These two don't make sense for PPro/PII -- we're expanding a 4-uop
+; instruction into two 1-uop insns plus a 2-uop insn.  That last has
+; the same decoder scheduling characteristics as the original.
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 1 "nonmemory_operand")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel [(set (match_dup 2)
+                   (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 2))])
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_operand:SI 1 "nonmemory_operand")
+                      (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel [(set (match_dup 2)
+                   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 2))])
+
+;; Attempt to use arith or logical operations with memory outputs with
+;; setting of flags.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_dup 0)
+		   (match_operator:SWI 3 "plusminuslogic_operator"
+		     [(match_dup 0)
+		      (match_operand:SWI 2 "<nonmemory_operand>")]))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 0))
+   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || q_regs_operand (operands[2], QImode))
+   && ix86_match_ccmode (peep2_next_insn (3),
+			 (GET_CODE (operands[3]) == PLUS
+			  || GET_CODE (operands[3]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 5))
+	      (set (match_dup 1) (match_op_dup 3 [(match_dup 1)
+						  (match_dup 2)]))])]
+{
+  operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+				copy_rtx (operands[1]),
+				copy_rtx (operands[2]));
+  operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				 operands[5], const0_rtx);
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand:SWI 0 "register_operand")
+		   (match_operator:SWI 2 "plusminuslogic_operator"
+		     [(match_dup 0)
+		      (match_operand:SWI 1 "memory_operand")]))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 0))
+   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && GET_CODE (operands[2]) != MINUS
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (2),
+			 GET_CODE (operands[2]) == PLUS
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 3) (match_dup 4))
+	      (set (match_dup 1) (match_op_dup 2 [(match_dup 1)
+						  (match_dup 0)]))])]
+{
+  operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), <MODE>mode,
+				copy_rtx (operands[1]),
+				copy_rtx (operands[0]));
+  operands[4] = gen_rtx_COMPARE (GET_MODE (operands[3]),
+				 operands[4], const0_rtx);
+})
+
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+	(match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+		   (match_operator:SI 3 "plusminuslogic_operator"
+		     [(match_dup 4)
+		      (match_operand:SI 2 "nonmemory_operand")]))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 0))
+   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REG_P (operands[0]) && REG_P (operands[4])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || q_regs_operand (operands[2], SImode))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && ix86_match_ccmode (peep2_next_insn (3),
+			 (GET_CODE (operands[3]) == PLUS
+			  || GET_CODE (operands[3]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 5))
+	      (set (match_dup 1) (match_dup 6))])]
+{
+  operands[2] = gen_lowpart (<MODE>mode, operands[2]);
+  operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+				copy_rtx (operands[1]), operands[2]);
+  operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				 operands[5], const0_rtx);
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+				copy_rtx (operands[1]),
+				copy_rtx (operands[2]));
+})
+
+;; Attempt to always use XOR for zeroing registers.
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+	(match_operand 1 "const0_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+   && GENERAL_REG_P (operands[0])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (word_mode, operands[0]);")
+
+(define_peephole2
+  [(set (strict_low_part (match_operand 0 "register_operand"))
+	(const_int 0))]
+  "(GET_MODE (operands[0]) == QImode
+    || GET_MODE (operands[0]) == HImode)
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
+(define_peephole2
+  [(set (match_operand:SWI248 0 "register_operand")
+	(const_int -1))]
+  "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int -1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+;; Attempt to convert simple lea to add/shift.
+;; These can be created by move expanders.
+;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
+;; relevant lea instructions were already split.
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand")
+  	(plus:SWI48 (match_dup 0)
+		    (match_operand:SWI48 1 "<nonmemory_operand>")))]
+  "!TARGET_OPT_AGU
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand")
+  	(plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
+		    (match_dup 0)))]
+  "!TARGET_OPT_AGU
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "nonmemory_operand"))))]
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonmemory_operand")
+		   (match_operand:SI 2 "register_operand"))))]
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand")
+  	(mult:SWI48 (match_dup 0)
+		    (match_operand:SWI48 1 "const_int_operand")))]
+  "exact_log2 (INTVAL (operands[1])) >= 0
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (mult:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "const_int_operand"))))]
+  "TARGET_64BIT
+   && exact_log2 (INTVAL (operands[2])) >= 0
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+;; The ESP adjustments can be done by the push and pop instructions.  Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
+;; On many CPUs it is also faster, since special hardware to avoid esp
+;; dependencies is present.
+
+;; While some of these conversions may be done using splitters, we use
+;; peepholes in order to allow combine_stack_adjustments pass to see
+;; nonobfuscated RTL.
+
+;; Convert prologue esp subtractions to push.
+;; We need register to push.  In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live.  First choice will also most likely in
+;; fewer dependencies.  On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead.  We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)"
+  [(clobber (match_dup 1))
+   (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+	      (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)"
+  [(clobber (match_dup 1))
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)"
+  [(clobber (match_dup 1))
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)"
+  [(clobber (match_dup 1))
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
+  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Two pops case is tricky, since pop causes dependency
+;; on destination register.  We use two registers if available.
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (match_scratch:W 2 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])
+   (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
+
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p ()
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])
+   (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
+
+;; Convert esp additions to pop.
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
+  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
+
+;; Two pops case is tricky, since pop causes dependency
+;; on destination register.  We use two registers if available.
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (match_scratch:W 2 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
+
+(define_peephole2
+  [(match_scratch:W 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p ()
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
+
+;; Convert compares with 1 to shorter inc/dec operations when CF is not
+;; required and register dies.  Similarly for 128 to -128.
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand")
+	(match_operator 1 "compare_operator"
+	  [(match_operand 2 "register_operand")
+	   (match_operand 3 "const_int_operand")]))]
+  "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
+     && incdec_operand (operands[3], GET_MODE (operands[3])))
+    || (!TARGET_FUSE_CMP_AND_BRANCH
+	&& INTVAL (operands[3]) == 128))
+   && ix86_match_ccmode (insn, CCGCmode)
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
+	      (clobber (match_dup 2))])])
+
+;; Convert imul by three, five and nine into lea
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (mult:SWI48 (match_operand:SWI48 1 "register_operand")
+		      (match_operand:SWI48 2 "const359_operand")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_PARTIAL_REG_STALL
+   || <MODE>mode == SImode
+   || optimize_function_for_size_p (cfun)"
+  [(set (match_dup 0)
+	(plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
+		    (match_dup 1)))]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:SWI48 2 "const359_operand")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p ()
+   && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0)
+	(plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
+		    (match_dup 0)))]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
+
+;; imul $32bit_imm, mem, reg is vector decoded, while
+;; imul $32bit_imm, reg, reg is direct decoded.
+(define_peephole2
+  [(match_scratch:SWI48 3 "r")
+   (parallel [(set (match_operand:SWI48 0 "register_operand")
+		   (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
+			       (match_operand:SWI48 2 "immediate_operand")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand")
+		   (zero_extend:DI
+		     (mult:SI (match_operand:SI 1 "memory_operand")
+			      (match_operand:SI 2 "immediate_operand"))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT
+   && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; imul $8/16bit_imm, regmem, reg is vector decoded.
+;; Convert it into imul reg, reg
+;; It would be better to force assembler to encode instruction using long
+;; immediate, but there is apparently no way to do so.
+(define_peephole2
+  [(parallel [(set (match_operand:SWI248 0 "register_operand")
+		   (mult:SWI248
+		    (match_operand:SWI248 1 "nonimmediate_operand")
+		    (match_operand:SWI248 2 "const_int_operand")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_scratch:SWI248 3 "r")]
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
+   && satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 2))
+   (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
+;; After splitting up read-modify operations, array accesses with memory
+;; operands might end up in form:
+;;  sall    $2, %eax
+;;  movl    4(%esp), %edx
+;;  addl    %edx, %eax
+;; instead of pre-splitting:
+;;  sall    $2, %eax
+;;  addl    4(%esp), %eax
+;; Turn it into:
+;;  movl    4(%esp), %edx
+;;  leal    (%edx,%eax,4), %eax
+
+(define_peephole2
+  [(match_scratch:W 5 "r")
+   (parallel [(set (match_operand 0 "register_operand")
+		   (ashift (match_operand 1 "register_operand")
+			   (match_operand 2 "const_int_operand")))
+	       (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_operand 3 "register_operand")
+		   (plus (match_dup 0)
+			 (match_operand 4 "x86_64_general_operand")))
+		   (clobber (reg:CC FLAGS_REG))])]
+  "IN_RANGE (INTVAL (operands[2]), 1, 3)
+   /* Validate MODE for lea.  */
+   && ((!TARGET_PARTIAL_REG_STALL
+	&& (GET_MODE (operands[0]) == QImode
+	    || GET_MODE (operands[0]) == HImode))
+       || GET_MODE (operands[0]) == SImode
+       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+   && (rtx_equal_p (operands[0], operands[3])
+       || peep2_reg_dead_p (2, operands[0]))
+   /* We reorder load and the shift.  */
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(set (match_dup 5) (match_dup 4))
+   (set (match_dup 0) (match_dup 1))]
+{
+  enum machine_mode op1mode = GET_MODE (operands[1]);
+  enum machine_mode mode = op1mode == DImode ? DImode : SImode;
+  int scale = 1 << INTVAL (operands[2]);
+  rtx index = gen_lowpart (word_mode, operands[1]);
+  rtx base = gen_lowpart (word_mode, operands[5]);
+  rtx dest = gen_lowpart (mode, operands[3]);
+
+  operands[1] = gen_rtx_PLUS (word_mode, base,
+			      gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
+  operands[5] = base;
+  if (mode != word_mode)
+    operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+  if (op1mode != word_mode)
+    operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0);
+  operands[0] = dest;
+})
+
+;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
+;; That, however, is usually mapped by the OS to SIGSEGV, which is often
+;; caught for use by garbage collectors and the like.  Using an insn that
+;; maps to SIGILL makes it more likely the program will rightfully die.
+;; Keeping with tradition, "6" is in honor of #UD.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 6))]
+  ""
+{
+#ifdef HAVE_AS_IX86_UD2
+  return "ud2";
+#else
+  return ASM_SHORT "0x0b0f";
+#endif
+}
+  [(set_attr "length" "2")])
+
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand")
+	     (match_operand:SI 1 "const_int_operand")
+	     (match_operand:SI 2 "const_int_operand"))]
+  "TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
+{
+  bool write = INTVAL (operands[1]) != 0;
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (IN_RANGE (locality, 0, 3));
+
+  /* Use 3dNOW prefetch in case we are asking for write prefetch not
+     supported by SSE counterpart or the SSE prefetch is not available
+     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+     of locality.  */
+  if (TARGET_PREFETCHWT1 && write && locality <= 2)
+    operands[2] = const2_rtx;
+  else if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE))
+    operands[2] = GEN_INT (3);
+  else
+    operands[1] = const0_rtx;
+})
+
+(define_insn "*prefetch_sse"
+  [(prefetch (match_operand 0 "address_operand" "p")
+	     (const_int 0)
+	     (match_operand:SI 1 "const_int_operand"))]
+  "TARGET_PREFETCH_SSE"
+{
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
+
+  int locality = INTVAL (operands[1]);
+  gcc_assert (IN_RANGE (locality, 0, 3));
+
+  return patterns[locality];
+}
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0], false)"))
+   (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow"
+  [(prefetch (match_operand 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (const_int 3))]
+  "TARGET_PRFCHW"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
+  [(set_attr "type" "mmx")
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0], false)"))
+   (set_attr "memory" "none")])
+
+(define_insn "*prefetch_prefetchwt1_<mode>"
+  [(prefetch (match_operand:P 0 "address_operand" "p")
+	     (const_int 1)
+	     (const_int 2))]
+  "TARGET_PREFETCHWT1"
+  "prefetchwt1\t%a0";
+  [(set_attr "type" "sse")
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0], false)"))
+   (set_attr "memory" "none")])
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")]
+  "TARGET_SSP_TLS_GUARD"
+{
+  rtx (*insn)(rtx, rtx);
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
+  insn = (TARGET_LP64
+	  ? gen_stack_tls_protect_set_di
+	  : gen_stack_tls_protect_set_si);
+#else
+  insn = (TARGET_LP64
+	  ? gen_stack_protect_set_di
+	  : gen_stack_protect_set_si);
+#endif
+
+  emit_insn (insn (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_set_<mode>"
+  [(set (match_operand:PTR 0 "memory_operand" "=m")
+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
+		    UNSPEC_SP_SET))
+   (set (match_scratch:PTR 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSP_TLS_GUARD"
+  "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_<mode>"
+  [(set (match_operand:PTR 0 "memory_operand" "=m")
+	(unspec:PTR [(match_operand:PTR 1 "const_int_operand" "i")]
+		    UNSPEC_SP_TLS_SET))
+   (set (match_scratch:PTR 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{<imodesuffix>}\t{%@:%P1, %2|%2, <iptrsize> PTR %@:%P1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+  [(set_attr "type" "multi")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")
+   (match_operand 2)]
+  "TARGET_SSP_TLS_GUARD"
+{
+  rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+  rtx (*insn)(rtx, rtx, rtx);
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
+  insn = (TARGET_LP64
+	  ? gen_stack_tls_protect_test_di
+	  : gen_stack_tls_protect_test_si);
+#else
+  insn = (TARGET_LP64
+	  ? gen_stack_protect_test_di
+	  : gen_stack_protect_test_si);
+#endif
+
+  emit_insn (insn (flags, operands[0], operands[1]));
+
+  emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+				  flags, const0_rtx, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test_<mode>"
+  [(set (match_operand:CCZ 0 "flags_reg_operand")
+	(unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
+		     (match_operand:PTR 2 "memory_operand" "m")]
+		    UNSPEC_SP_TEST))
+   (clobber (match_scratch:PTR 3 "=&r"))]
+  "TARGET_SSP_TLS_GUARD"
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_<mode>"
+  [(set (match_operand:CCZ 0 "flags_reg_operand")
+	(unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
+		     (match_operand:PTR 2 "const_int_operand" "i")]
+		    UNSPEC_SP_TLS_TEST))
+   (clobber (match_scratch:PTR 3 "=r"))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%@:%P2, %3|%3, <iptrsize> PTR %@:%P2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "sse4_2_crc32<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
+	  UNSPEC_CRC32))]
+  "TARGET_SSE4_2 || TARGET_CRC32"
+  "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_data16")
+     (if_then_else (match_operand:HI 2)
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_rex")
+     (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "sse4_2_crc32di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(match_operand:DI 1 "register_operand" "0")
+	   (match_operand:DI 2 "nonimmediate_operand" "rm")]
+	  UNSPEC_CRC32))]
+  "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)"
+  "crc32{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "rdpmc"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+  	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
+			    UNSPECV_RDPMC))]
+  "!TARGET_64BIT"
+  "rdpmc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_insn "rdpmc_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+  	(unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
+			    UNSPECV_RDPMC))
+   (set (match_operand:DI 1 "register_operand" "=d")
+	(unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
+  "TARGET_64BIT"
+  "rdpmc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_insn "rdtsc"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
+  "!TARGET_64BIT"
+  "rdtsc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_insn "rdtsc_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
+   (set (match_operand:DI 1 "register_operand" "=d")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
+  "TARGET_64BIT"
+  "rdtsc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_insn "rdtscp"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
+   (set (match_operand:SI 1 "register_operand" "=c")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
+  "!TARGET_64BIT"
+  "rdtscp"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+(define_insn "rdtscp_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
+   (set (match_operand:DI 1 "register_operand" "=d")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
+   (set (match_operand:SI 2 "register_operand" "=c")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
+  "TARGET_64BIT"
+  "rdtscp"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; FXSR, XSAVE and XSAVEOPT instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "fxsave"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
+  "TARGET_FXSR"
+  "fxsave\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
+
+(define_insn "fxsave64"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
+  "TARGET_64BIT && TARGET_FXSR"
+  "fxsave64\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
+
+(define_insn "fxrstor"
+  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
+		    UNSPECV_FXRSTOR)]
+  "TARGET_FXSR"
+  "fxrstor\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "load")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
+
+(define_insn "fxrstor64"
+  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
+		    UNSPECV_FXRSTOR64)]
+  "TARGET_64BIT && TARGET_FXSR"
+  "fxrstor64\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "load")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
+
+(define_int_iterator ANY_XSAVE
+	[UNSPECV_XSAVE
+	 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")])
+
+(define_int_iterator ANY_XSAVE64
+	[UNSPECV_XSAVE64
+	 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")])
+
+(define_int_attr xsave
+	[(UNSPECV_XSAVE "xsave")
+	 (UNSPECV_XSAVE64 "xsave64")
+	 (UNSPECV_XSAVEOPT "xsaveopt")
+	 (UNSPECV_XSAVEOPT64 "xsaveopt64")])
+
+(define_insn "<xsave>"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec_volatile:BLK
+	 [(match_operand:DI 1 "register_operand" "A")]
+	 ANY_XSAVE))]
+  "!TARGET_64BIT && TARGET_XSAVE"
+  "<xsave>\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
+
+(define_insn "<xsave>_rex64"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec_volatile:BLK
+	 [(match_operand:SI 1 "register_operand" "a")
+	  (match_operand:SI 2 "register_operand" "d")]
+	 ANY_XSAVE))]
+  "TARGET_64BIT && TARGET_XSAVE"
+  "<xsave>\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
+
+(define_insn "<xsave>"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec_volatile:BLK
+	 [(match_operand:SI 1 "register_operand" "a")
+	  (match_operand:SI 2 "register_operand" "d")]
+	 ANY_XSAVE64))]
+  "TARGET_64BIT && TARGET_XSAVE"
+  "<xsave>\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
+
+(define_insn "xrstor"
+   [(unspec_volatile:BLK
+     [(match_operand:BLK 0 "memory_operand" "m")
+      (match_operand:DI 1 "register_operand" "A")]
+     UNSPECV_XRSTOR)]
+  "!TARGET_64BIT && TARGET_XSAVE"
+  "xrstor\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "load")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
+
+(define_insn "xrstor_rex64"
+   [(unspec_volatile:BLK
+     [(match_operand:BLK 0 "memory_operand" "m")
+      (match_operand:SI 1 "register_operand" "a")
+      (match_operand:SI 2 "register_operand" "d")]
+     UNSPECV_XRSTOR)]
+  "TARGET_64BIT && TARGET_XSAVE"
+  "xrstor\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "load")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
+
+(define_insn "xrstor64"
+   [(unspec_volatile:BLK
+     [(match_operand:BLK 0 "memory_operand" "m")
+      (match_operand:SI 1 "register_operand" "a")
+      (match_operand:SI 2 "register_operand" "d")]
+     UNSPECV_XRSTOR64)]
+  "TARGET_64BIT && TARGET_XSAVE"
+  "xrstor64\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "load")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Floating-point instructions for atomic compound assignments
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Clobber all floating-point registers on environment save and restore
+; to ensure that the TOS value saved at fnstenv is valid after fldenv.
+(define_insn "fnstenv"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
+   (clobber (reg:HI FPCR_REG))
+   (clobber (reg:XF ST0_REG))
+   (clobber (reg:XF ST1_REG))
+   (clobber (reg:XF ST2_REG))
+   (clobber (reg:XF ST3_REG))
+   (clobber (reg:XF ST4_REG))
+   (clobber (reg:XF ST5_REG))
+   (clobber (reg:XF ST6_REG))
+   (clobber (reg:XF ST7_REG))]
+  "TARGET_80387"
+  "fnstenv\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
+
+(define_insn "fldenv"
+  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
+		    UNSPECV_FLDENV)
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:HI FPCR_REG))
+   (clobber (reg:XF ST0_REG))
+   (clobber (reg:XF ST1_REG))
+   (clobber (reg:XF ST2_REG))
+   (clobber (reg:XF ST3_REG))
+   (clobber (reg:XF ST4_REG))
+   (clobber (reg:XF ST5_REG))
+   (clobber (reg:XF ST6_REG))
+   (clobber (reg:XF ST7_REG))]
+  "TARGET_80387"
+  "fldenv\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "load")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
+
+(define_insn "fnstsw"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+	(unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
+  "TARGET_80387"
+  "fnstsw\t%0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
+
+(define_insn "fnclex"
+  [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
+  "TARGET_80387"
+  "fnclex"
+  [(set_attr "type" "other")
+   (set_attr "memory" "none")
+   (set_attr "length" "2")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; LWP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "lwp_llwpcb"
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")]
+		    UNSPECV_LLWP_INTRINSIC)]
+  "TARGET_LWP")
+
+(define_insn "*lwp_llwpcb<mode>1"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+		    UNSPECV_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "5")])
+
+(define_expand "lwp_slwpcb"
+  [(set (match_operand 0 "register_operand" "=r")
+	(unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
+  "TARGET_LWP"
+{
+  rtx (*insn)(rtx);
+
+  insn = (Pmode == DImode
+	  ? gen_lwp_slwpcbdi
+	  : gen_lwp_slwpcbsi);
+
+  emit_insn (insn (operands[0]));
+  DONE;
+})
+
+(define_insn "lwp_slwpcb<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "5")])
+
+(define_expand "lwp_lwpval<mode>3"
+  [(unspec_volatile [(match_operand:SWI48 1 "register_operand" "r")
+    	    	     (match_operand:SI 2 "nonimmediate_operand" "rm")
+		     (match_operand:SI 3 "const_int_operand" "i")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  ;; Avoid unused variable warning.
+  "(void) operands[0];")
+
+(define_insn "*lwp_lwpval<mode>3_1"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
+    	    	     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "i")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+
+(define_expand "lwp_lwpins<mode>3"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand" "r")
+			      (match_operand:SI 2 "nonimmediate_operand" "rm")
+			      (match_operand:SI 3 "const_int_operand" "i")]
+			     UNSPECV_LWPINS_INTRINSIC))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
+  "TARGET_LWP")
+
+(define_insn "*lwp_lwpins<mode>3_1"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
+			      (match_operand:SI 1 "nonimmediate_operand" "rm")
+			      (match_operand:SI 2 "const_int_operand" "i")]
+			     UNSPECV_LWPINS_INTRINSIC))]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+
+(define_int_iterator RDFSGSBASE
+	[UNSPECV_RDFSBASE
+	 UNSPECV_RDGSBASE])
+
+(define_int_iterator WRFSGSBASE
+	[UNSPECV_WRFSBASE
+	 UNSPECV_WRGSBASE])
+
+(define_int_attr fsgs
+	[(UNSPECV_RDFSBASE "fs")
+	 (UNSPECV_RDGSBASE "gs")
+	 (UNSPECV_WRFSBASE "fs")
+	 (UNSPECV_WRGSBASE "gs")])
+
+(define_insn "rd<fsgs>base<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "rd<fsgs>base\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "wr<fsgs>base<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    WRFSGSBASE)]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "wr<fsgs>base\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "rdrand<mode>_1"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
+   (set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
+  "TARGET_RDRND"
+  "rdrand\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "1")])
+
+(define_insn "rdseed<mode>_1"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
+   (set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
+  "TARGET_RDSEED"
+  "rdseed\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "1")])
+
+(define_expand "pause"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+;; Use "rep; nop", instead of "pause", to support older assemblers.
+;; They have the same encoding.
+(define_insn "*pause"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
+  ""
+  "rep%; nop"
+  [(set_attr "length" "2")
+   (set_attr "memory" "unknown")])
+
+(define_expand "xbegin"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
+  "TARGET_RTM"
+{
+  rtx label = gen_label_rtx ();
+
+  /* xbegin is emitted as jump_insn, so reload won't be able
+     to reload its operand.  Force the value into AX hard register.  */
+  rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
+  emit_move_insn (ax_reg, constm1_rtx);
+
+  emit_jump_insn (gen_xbegin_1 (ax_reg, label));
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], ax_reg);
+
+  DONE;
+})
+
+(define_insn "xbegin_1"
+  [(set (pc)
+	(if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
+			  (const_int 0))
+		      (label_ref (match_operand 1))
+		      (pc)))
+   (set (match_operand:SI 0 "register_operand" "+a")
+	(unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
+  "TARGET_RTM"
+  "xbegin\t%l1"
+  [(set_attr "type" "other")
+   (set_attr "length" "6")])
+
+(define_insn "xend"
+  [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
+  "TARGET_RTM"
+  "xend"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+(define_insn "xabort"
+  [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand" "n")]
+		    UNSPECV_XABORT)]
+  "TARGET_RTM"
+  "xabort\t%0"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+(define_expand "xtest"
+  [(set (match_operand:QI 0 "register_operand")
+	(unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
+  "TARGET_RTM"
+{
+  emit_insn (gen_xtest_1 ());
+
+  ix86_expand_setcc (operands[0], NE,
+		     gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
+  DONE;
+})
+
+(define_insn "xtest_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
+  "TARGET_RTM"
+  "xtest"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+(include "mmx.md")
+(include "sse.md")
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt
new file mode 100644
index 000000000..0f463a238
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386.opt
@@ -0,0 +1,796 @@
+; Options for the IA-32 and AMD64 ports of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/i386/i386-opts.h
+
+; Bit flags that specify the ISA we are compiling for.
+Variable
+HOST_WIDE_INT ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT
+
+; A mask of ix86_isa_flags that includes bit X if X was set or cleared
+; on the command line.
+Variable
+HOST_WIDE_INT ix86_isa_flags_explicit
+
+TargetVariable
+int recip_mask = RECIP_MASK_DEFAULT
+
+Variable
+int recip_mask_explicit
+
+TargetSave
+int x_recip_mask_explicit
+
+;; Definitions to add to the cl_target_option structure
+;; -march= processor
+TargetSave
+unsigned char arch
+
+;; -mtune= processor
+TargetSave
+unsigned char tune
+
+;; -march= processor-string
+TargetSave
+const char *x_ix86_arch_string
+
+;; -mtune= processor-string
+TargetSave
+const char *x_ix86_tune_string
+
+;; CPU schedule model
+TargetSave
+unsigned char schedule
+
+;; branch cost
+TargetSave
+unsigned char branch_cost
+
+;; which flags were passed by the user
+TargetSave
+HOST_WIDE_INT x_ix86_isa_flags_explicit
+
+;; which flags were passed by the user
+Variable
+int ix86_target_flags_explicit
+
+;; which flags were passed by the user
+TargetSave
+HOST_WIDE_INT x_ix86_target_flags_explicit
+
+;; whether -mtune was not specified
+TargetSave
+unsigned char tune_defaulted
+
+;; whether -march was specified
+TargetSave
+unsigned char arch_specified
+
+;; -mcmodel= model
+TargetSave
+enum cmodel x_ix86_cmodel
+
+;; -mabi=
+TargetSave
+enum calling_abi x_ix86_abi
+
+;; -masm=
+TargetSave
+enum asm_dialect x_ix86_asm_dialect
+
+;; -mbranch-cost=
+TargetSave
+int x_ix86_branch_cost
+
+;; -mdump-tune-features= 
+TargetSave
+int x_ix86_dump_tunes
+
+;; -mstackrealign=
+TargetSave
+int x_ix86_force_align_arg_pointer
+
+;; -mforce-drap= 
+TargetSave
+int x_ix86_force_drap
+
+;; -mincoming-stack-boundary=
+TargetSave
+int x_ix86_incoming_stack_boundary_arg
+
+;; -maddress-mode=
+TargetSave
+enum pmode x_ix86_pmode
+
+;; -mpreferred-stack-boundary= 
+TargetSave
+int x_ix86_preferred_stack_boundary_arg
+
+;; -mrecip=
+TargetSave
+const char *x_ix86_recip_name
+
+;; -mregparm=
+TargetSave
+int x_ix86_regparm
+
+;; -mlarge-data-threshold=
+TargetSave
+int x_ix86_section_threshold
+
+;; -msse2avx=
+TargetSave
+int x_ix86_sse2avx
+
+;; -mstack-protector-guard=
+TargetSave
+enum stack_protector_guard x_ix86_stack_protector_guard
+
+;; -mstringop-strategy=
+TargetSave
+enum stringop_alg x_ix86_stringop_alg
+
+;; -mtls-dialect=
+TargetSave
+enum tls_dialect x_ix86_tls_dialect
+
+;; -mtune-ctrl=
+TargetSave
+const char *x_ix86_tune_ctrl_string
+
+;; -mmemcpy-strategy=
+TargetSave
+const char *x_ix86_tune_memcpy_strategy
+
+;; -mmemset-strategy=
+TargetSave
+const char *x_ix86_tune_memset_strategy
+
+;; -mno-default=
+TargetSave
+int x_ix86_tune_no_default
+
+;; -mveclibabi=
+TargetSave
+enum ix86_veclibabi x_ix86_veclibabi_type
+
+;; x86 options
+m128bit-long-double
+Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save
+sizeof(long double) is 16
+
+m80387
+Target Report Mask(80387) Save
+Use hardware fp
+
+m96bit-long-double
+Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) Save
+sizeof(long double) is 12
+
+mlong-double-80
+Target Report RejectNegative Negative(mlong-double-64) InverseMask(LONG_DOUBLE_64) Save
+Use 80-bit long double
+
+mlong-double-64
+Target Report RejectNegative Negative(mlong-double-128) Mask(LONG_DOUBLE_64) InverseMask(LONG_DOUBLE_128) Save
+Use 64-bit long double
+
+mlong-double-128
+Target Report RejectNegative Negative(mlong-double-80) Mask(LONG_DOUBLE_128) InverseMask(LONG_DOUBLE_64) Save
+Use 128-bit long double
+
+maccumulate-outgoing-args
+Target Report Mask(ACCUMULATE_OUTGOING_ARGS) Save
+Reserve space for outgoing arguments in the function prologue
+
+malign-double
+Target Report Mask(ALIGN_DOUBLE) Save
+Align some doubles on dword boundary
+
+malign-functions=
+Target RejectNegative Joined UInteger
+Function starts are aligned to this power of 2
+
+malign-jumps=
+Target RejectNegative Joined UInteger
+Jump targets are aligned to this power of 2
+
+malign-loops=
+Target RejectNegative Joined UInteger
+Loop code aligned to this power of 2
+
+malign-stringops
+Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) Save
+Align destination of the string operations
+
+march=
+Target RejectNegative Joined Var(ix86_arch_string)
+Generate code for given CPU
+
+masm=
+Target RejectNegative Joined Enum(asm_dialect) Var(ix86_asm_dialect) Init(ASM_ATT)
+Use given assembler dialect
+
+Enum
+Name(asm_dialect) Type(enum asm_dialect)
+Known assembler dialects (for use with the -masm-dialect= option):
+
+EnumValue
+Enum(asm_dialect) String(intel) Value(ASM_INTEL)
+
+EnumValue
+Enum(asm_dialect) String(att) Value(ASM_ATT)
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(ix86_branch_cost)
+Branches are this expensive (1-5, arbitrary units)
+
+mlarge-data-threshold=
+Target RejectNegative Joined UInteger Var(ix86_section_threshold) Init(DEFAULT_LARGE_SECTION_THRESHOLD)
+Data greater than given threshold will go into .ldata section in x86-64 medium model
+
+mcmodel=
+Target RejectNegative Joined Enum(cmodel) Var(ix86_cmodel) Init(CM_32)
+Use given x86-64 code model
+
+Enum
+Name(cmodel) Type(enum cmodel)
+Known code models (for use with the -mcmodel= option):
+
+EnumValue
+Enum(cmodel) String(small) Value(CM_SMALL)
+
+EnumValue
+Enum(cmodel) String(medium) Value(CM_MEDIUM)
+
+EnumValue
+Enum(cmodel) String(large) Value(CM_LARGE)
+
+EnumValue
+Enum(cmodel) String(32) Value(CM_32)
+
+EnumValue
+Enum(cmodel) String(kernel) Value(CM_KERNEL)
+
+maddress-mode=
+Target RejectNegative Joined Enum(pmode) Var(ix86_pmode) Init(PMODE_SI)
+Use given address mode
+
+Enum
+Name(pmode) Type(enum pmode)
+Known address mode (for use with the -maddress-mode= option):
+
+EnumValue
+Enum(pmode) String(short) Value(PMODE_SI)
+
+EnumValue
+Enum(pmode) String(long) Value(PMODE_DI)
+
+mcpu=
+Target RejectNegative Joined Undocumented Alias(mtune=) Warn(%<-mcpu=%> is deprecated; use %<-mtune=%> or %<-march=%> instead)
+
+mfancy-math-387
+Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
+Generate sin, cos, sqrt for FPU
+
+mforce-drap
+Target Report Var(ix86_force_drap)
+Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
+
+mfp-ret-in-387
+Target Report Mask(FLOAT_RETURNS) Save
+Return values of functions in FPU registers
+
+mfpmath=
+Target RejectNegative Joined Var(ix86_fpmath) Enum(fpmath_unit) Init(FPMATH_387) Save
+Generate floating point mathematics using given instruction set
+
+Enum
+Name(fpmath_unit) Type(enum fpmath_unit)
+Valid arguments to -mfpmath=:
+
+EnumValue
+Enum(fpmath_unit) String(387) Value(FPMATH_387)
+
+EnumValue
+Enum(fpmath_unit) String(sse) Value(FPMATH_SSE)
+
+EnumValue
+Enum(fpmath_unit) String(387,sse) Value({(enum fpmath_unit) (FPMATH_SSE | FPMATH_387)})
+
+EnumValue
+Enum(fpmath_unit) String(387+sse) Value({(enum fpmath_unit) (FPMATH_SSE | FPMATH_387)})
+
+EnumValue
+Enum(fpmath_unit) String(sse,387) Value({(enum fpmath_unit) (FPMATH_SSE | FPMATH_387)})
+
+EnumValue
+Enum(fpmath_unit) String(sse+387) Value({(enum fpmath_unit) (FPMATH_SSE | FPMATH_387)})
+
+EnumValue
+Enum(fpmath_unit) String(both) Value({(enum fpmath_unit) (FPMATH_SSE | FPMATH_387)})
+
+mhard-float
+Target RejectNegative Mask(80387) Save
+Use hardware fp
+
+mieee-fp
+Target Report Mask(IEEE_FP) Save
+Use IEEE math for fp comparisons
+
+minline-all-stringops
+Target Report Mask(INLINE_ALL_STRINGOPS) Save
+Inline all known string operations
+
+minline-stringops-dynamically
+Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) Save
+Inline memset/memcpy string operations, but perform inline version only for small blocks
+
+mintel-syntax
+Target Undocumented Alias(masm=, intel, att) Warn(%<-mintel-syntax%> and %<-mno-intel-syntax%> are deprecated; use %<-masm=intel%> and %<-masm=att%> instead)
+;; Deprecated
+
+mms-bitfields
+Target Report Mask(MS_BITFIELD_LAYOUT) Save
+Use native (MS) bitfield layout
+
+mno-align-stringops
+Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented Save
+
+mno-fancy-math-387
+Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented Save
+
+mno-push-args
+Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented Save
+
+mno-red-zone
+Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented Save
+
+momit-leaf-frame-pointer
+Target Report Mask(OMIT_LEAF_FRAME_POINTER) Save
+Omit the frame pointer in leaf functions
+
+mpc32
+Target RejectNegative Report
+Set 80387 floating-point precision to 32-bit
+
+mpc64
+Target RejectNegative Report
+Set 80387 floating-point precision to 64-bit
+
+mpc80
+Target RejectNegative Report
+Set 80387 floating-point precision to 80-bit
+
+mpreferred-stack-boundary=
+Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg)
+Attempt to keep stack aligned to this power of 2
+
+mincoming-stack-boundary=
+Target RejectNegative Joined UInteger Var(ix86_incoming_stack_boundary_arg)
+Assume incoming stack aligned to this power of 2
+
+mpush-args
+Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) Save
+Use push instructions to save outgoing arguments
+
+mred-zone
+Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) Save
+Use red-zone in the x86-64 code
+
+mregparm=
+Target RejectNegative Joined UInteger Var(ix86_regparm)
+Number of registers used to pass integer arguments
+
+mrtd
+Target Report Mask(RTD) Save
+Alternate calling convention
+
+msoft-float
+Target InverseMask(80387) Save
+Do not use hardware fp
+
+msseregparm
+Target RejectNegative Mask(SSEREGPARM) Save
+Use SSE register passing conventions for SF and DF mode
+
+mstackrealign
+Target Report Var(ix86_force_align_arg_pointer) Init(-1)
+Realign stack in prologue
+
+mstack-arg-probe
+Target Report Mask(STACK_PROBE) Save
+Enable stack probing
+
+mmemcpy-strategy=
+Target RejectNegative Joined Var(ix86_tune_memcpy_strategy)
+Specify memcpy expansion strategy when expected size is known
+
+mmemset-strategy=
+Target RejectNegative Joined Var(ix86_tune_memset_strategy)
+Specify memset expansion strategy when expected size is known
+
+mstringop-strategy=
+Target RejectNegative Joined Enum(stringop_alg) Var(ix86_stringop_alg) Init(no_stringop)
+Chose strategy to generate stringop using
+
+Enum
+Name(stringop_alg) Type(enum stringop_alg)
+Valid arguments to -mstringop-strategy=:
+
+EnumValue
+Enum(stringop_alg) String(rep_byte) Value(rep_prefix_1_byte)
+
+EnumValue
+Enum(stringop_alg) String(libcall) Value(libcall)
+
+EnumValue
+Enum(stringop_alg) String(rep_4byte) Value(rep_prefix_4_byte)
+
+EnumValue
+Enum(stringop_alg) String(rep_8byte) Value(rep_prefix_8_byte)
+
+EnumValue
+Enum(stringop_alg) String(byte_loop) Value(loop_1_byte)
+
+EnumValue
+Enum(stringop_alg) String(loop) Value(loop)
+
+EnumValue
+Enum(stringop_alg) String(unrolled_loop) Value(unrolled_loop)
+
+EnumValue
+Enum(stringop_alg) String(vector_loop) Value(vector_loop)
+
+mtls-dialect=
+Target RejectNegative Joined Var(ix86_tls_dialect) Enum(tls_dialect) Init(TLS_DIALECT_GNU)
+Use given thread-local storage dialect
+
+Enum
+Name(tls_dialect) Type(enum tls_dialect)
+Known TLS dialects (for use with the -mtls-dialect= option):
+
+EnumValue
+Enum(tls_dialect) String(gnu) Value(TLS_DIALECT_GNU)
+
+EnumValue
+Enum(tls_dialect) String(gnu2) Value(TLS_DIALECT_GNU2)
+
+mtls-direct-seg-refs
+Target Report Mask(TLS_DIRECT_SEG_REFS)
+Use direct references against %gs when accessing tls data
+
+mtune=
+Target RejectNegative Joined Var(ix86_tune_string)
+Schedule code for given CPU
+
+mtune-ctrl=
+Target RejectNegative Joined Var(ix86_tune_ctrl_string)
+Fine grain control of tune features
+
+mno-default
+Target RejectNegative Var(ix86_tune_no_default) Init(0)
+Clear all tune features
+
+mdump-tune-features
+Target RejectNegative Var(ix86_dump_tunes) Init(0)
+
+mabi=
+Target RejectNegative Joined Var(ix86_abi) Enum(calling_abi) Init(SYSV_ABI)
+Generate code that conforms to the given ABI
+
+Enum
+Name(calling_abi) Type(enum calling_abi)
+Known ABIs (for use with the -mabi= option):
+
+EnumValue
+Enum(calling_abi) String(sysv) Value(SYSV_ABI)
+
+EnumValue
+Enum(calling_abi) String(ms) Value(MS_ABI)
+
+mveclibabi=
+Target RejectNegative Joined Var(ix86_veclibabi_type) Enum(ix86_veclibabi) Init(ix86_veclibabi_type_none)
+Vector library ABI to use
+
+Enum
+Name(ix86_veclibabi) Type(enum ix86_veclibabi)
+Known vectorization library ABIs (for use with the -mveclibabi= option):
+
+EnumValue
+Enum(ix86_veclibabi) String(svml) Value(ix86_veclibabi_type_svml)
+
+EnumValue
+Enum(ix86_veclibabi) String(acml) Value(ix86_veclibabi_type_acml)
+
+mvect8-ret-in-mem
+Target Report Mask(VECT8_RETURNS) Save
+Return 8-byte vectors in memory
+
+mrecip
+Target Report Mask(RECIP) Save
+Generate reciprocals instead of divss and sqrtss.
+
+mrecip=
+Target Report RejectNegative Joined Var(ix86_recip_name)
+Control generation of reciprocal estimates.
+
+mcld
+Target Report Mask(CLD) Save
+Generate cld instruction in the function prologue.
+
+mvzeroupper
+Target Report Mask(VZEROUPPER) Save
+Generate vzeroupper instruction before a transfer of control flow out of
+the function.
+
+mdispatch-scheduler
+Target RejectNegative Var(flag_dispatch_scheduler)
+Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 or bdver4 and Haifa scheduling
+is selected.
+
+mprefer-avx128
+Target Report Mask(PREFER_AVX128) SAVE
+Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer.
+
+;; ISA support
+
+m32
+Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) Save
+Generate 32bit i386 code
+
+m64
+Target RejectNegative Negative(mx32) Report Mask(ABI_64) Var(ix86_isa_flags) Save
+Generate 64bit x86-64 code
+
+mx32
+Target RejectNegative Negative(m16) Report Mask(ABI_X32) Var(ix86_isa_flags) Save
+Generate 32bit x86-64 code
+
+m16
+Target RejectNegative Negative(m32) Report Mask(CODE16) InverseMask(ISA_64BIT) Var(ix86_isa_flags) Save
+Generate 16bit i386 code
+
+mmmx
+Target Report Mask(ISA_MMX) Var(ix86_isa_flags) Save
+Support MMX built-in functions
+
+m3dnow
+Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) Save
+Support 3DNow! built-in functions
+
+m3dnowa
+Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) Save
+Support Athlon 3Dnow! built-in functions
+
+msse
+Target Report Mask(ISA_SSE) Var(ix86_isa_flags) Save
+Support MMX and SSE built-in functions and code generation
+
+msse2
+Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) Save
+Support MMX, SSE and SSE2 built-in functions and code generation
+
+msse3
+Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
+
+mssse3
+Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
+
+msse4.1
+Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
+
+msse4.2
+Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+msse4
+Target RejectNegative Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+mno-sse4
+Target RejectNegative Report InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save
+Do not support SSE4.1 and SSE4.2 built-in functions and code generation
+
+msse5
+Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
+;; Deprecated
+
+mavx
+Target Report Mask(ISA_AVX) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation
+
+mavx2
+Target Report Mask(ISA_AVX2) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation
+
+mavx512f
+Target Report Mask(ISA_AVX512F) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation
+
+mavx512pf
+Target Report Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation
+
+mavx512er
+Target Report Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation
+
+mavx512cd
+Target Report Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation
+
+mfma
+Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
+
+msse4a
+Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
+
+mfma4
+Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) Save
+Support FMA4 built-in functions and code generation 
+
+mxop
+Target Report Mask(ISA_XOP) Var(ix86_isa_flags) Save
+Support XOP built-in functions and code generation 
+
+mlwp
+Target Report Mask(ISA_LWP) Var(ix86_isa_flags) Save
+Support LWP built-in functions and code generation 
+
+mabm
+Target Report Mask(ISA_ABM) Var(ix86_isa_flags) Save
+Support code generation of Advanced Bit Manipulation (ABM) instructions.
+
+mpopcnt
+Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) Save
+Support code generation of popcnt instruction.
+
+mbmi
+Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
+Support BMI built-in functions and code generation
+
+mbmi2
+Target Report Mask(ISA_BMI2) Var(ix86_isa_flags) Save
+Support BMI2 built-in functions and code generation
+
+mlzcnt
+Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
+Support LZCNT built-in function and code generation
+
+mhle
+Target Report Mask(ISA_HLE) Var(ix86_isa_flags) Save
+Support Hardware Lock Elision prefixes
+
+mrdseed
+Target Report Mask(ISA_RDSEED) Var(ix86_isa_flags) Save
+Support RDSEED instruction
+
+mprfchw
+Target Report Mask(ISA_PRFCHW) Var(ix86_isa_flags) Save
+Support PREFETCHW instruction
+
+madx
+Target Report Mask(ISA_ADX) Var(ix86_isa_flags) Save
+Support flag-preserving add-carry instructions
+
+mfxsr
+Target Report Mask(ISA_FXSR) Var(ix86_isa_flags) Save
+Support FXSAVE and FXRSTOR instructions
+
+mxsave
+Target Report Mask(ISA_XSAVE) Var(ix86_isa_flags) Save
+Support XSAVE and XRSTOR instructions
+
+mxsaveopt
+Target Report Mask(ISA_XSAVEOPT) Var(ix86_isa_flags) Save
+Support XSAVEOPT instruction
+
+mtbm
+Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save
+Support TBM built-in functions and code generation
+
+mcx16
+Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
+Support code generation of cmpxchg16b instruction.
+
+msahf
+Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) Save
+Support code generation of sahf instruction in 64bit x86-64 code.
+
+mmovbe
+Target Report Mask(ISA_MOVBE) Var(ix86_isa_flags) Save
+Support code generation of movbe instruction.
+
+mcrc32
+Target Report Mask(ISA_CRC32) Var(ix86_isa_flags) Save
+Support code generation of crc32 instruction.
+
+maes
+Target Report Mask(ISA_AES) Var(ix86_isa_flags) Save
+Support AES built-in functions and code generation
+
+msha
+Target Report Mask(ISA_SHA) Var(ix86_isa_flags) Save
+Support SHA1 and SHA256 built-in functions and code generation
+
+mpclmul
+Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) Save
+Support PCLMUL built-in functions and code generation
+
+msse2avx
+Target Report Var(ix86_sse2avx)
+Encode SSE instructions with VEX prefix
+
+mfsgsbase
+Target Report Mask(ISA_FSGSBASE) Var(ix86_isa_flags) Save
+Support FSGSBASE built-in functions and code generation
+
+mrdrnd
+Target Report Mask(ISA_RDRND) Var(ix86_isa_flags) Save
+Support RDRND built-in functions and code generation
+
+mf16c
+Target Report Mask(ISA_F16C) Var(ix86_isa_flags) Save
+Support F16C built-in functions and code generation
+
+mprefetchwt1
+Target Report Mask(ISA_PREFETCHWT1) Var(ix86_isa_flags) Save
+Support PREFETCHWT1 built-in functions and code generation
+
+mfentry
+Target Report Var(flag_fentry) Init(-1)
+Emit profiling counter call at function entry before prologue.
+
+m8bit-idiv
+Target Report Mask(USE_8BIT_IDIV) Save
+Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
+
+mavx256-split-unaligned-load
+Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save
+Split 32-byte AVX unaligned load
+
+mavx256-split-unaligned-store
+Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save
+Split 32-byte AVX unaligned store
+
+mrtm
+Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save
+Support RTM built-in functions and code generation
+
+mstack-protector-guard=
+Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS)
+Use given stack-protector guard
+
+Enum
+Name(stack_protector_guard) Type(enum stack_protector_guard)
+Known stack protector guard (for use with the -mstack-protector-guard= option):
+
+EnumValue
+Enum(stack_protector_guard) String(tls) Value(SSP_TLS)
+
+EnumValue
+Enum(stack_protector_guard) String(global) Value(SSP_GLOBAL)
diff --git a/gcc-4.9/gcc/config/i386/i386elf.h b/gcc-4.9/gcc/config/i386/i386elf.h
new file mode 100644
index 000000000..73e119ddd
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/i386elf.h
@@ -0,0 +1,103 @@
+/* Target definitions for GCC for Intel 80386 using ELF
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+   Derived from sysv4.h written by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The ELF ABI for the i386 says that records and unions are returned
+   in memory.  */
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+	(TYPE_MODE (TYPE) == BLKmode \
+	 || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8))
+
+#undef CPP_SPEC
+#define CPP_SPEC ""
+
+#define ENDFILE_SPEC "crtend.o%s"
+
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crtbegin.o%s"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* The routine used to output sequences of byte values.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable.  Note that if we find subparts of the
+   character sequence which end with NUL (and which are shorter than
+   ELF_STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)				\
+  do									\
+    {									\
+      const unsigned char *_ascii_bytes =				\
+        (const unsigned char *) (STR);					\
+      const unsigned char *limit = _ascii_bytes + (LENGTH);		\
+      unsigned bytes_in_chunk = 0;					\
+      for (; _ascii_bytes < limit; _ascii_bytes++)			\
+	{								\
+	  const unsigned char *p;					\
+	  if (bytes_in_chunk >= 64)					\
+	    {								\
+	      fputc ('\n', (FILE));					\
+	      bytes_in_chunk = 0;					\
+	    }								\
+	  for (p = _ascii_bytes; p < limit && *p != '\0'; p++)		\
+	    continue;							\
+	  if (p < limit && (p - _ascii_bytes) <= (long) ELF_STRING_LIMIT) \
+	    {								\
+	      if (bytes_in_chunk > 0)					\
+		{							\
+		  fputc ('\n', (FILE));					\
+		  bytes_in_chunk = 0;					\
+		}							\
+	      ASM_OUTPUT_LIMITED_STRING ((FILE), (const char *) _ascii_bytes); \
+	      _ascii_bytes = p;						\
+	    }								\
+	  else								\
+	    {								\
+	      if (bytes_in_chunk == 0)					\
+		fputs (ASM_BYTE, (FILE));				\
+	      else							\
+		fputc (',', (FILE));					\
+	      fprintf ((FILE), "0x%02x", *_ascii_bytes);			\
+	      bytes_in_chunk += 5;					\
+	    }								\
+	}								\
+      if (bytes_in_chunk > 0)						\
+	fputc ('\n', (FILE));						\
+    }									\
+  while (0)
+
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
diff --git a/gcc-4.9/gcc/config/i386/ia32intrin.h b/gcc-4.9/gcc/config/i386/ia32intrin.h
new file mode 100644
index 000000000..5e7c893fe
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/ia32intrin.h
@@ -0,0 +1,293 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+/* 32bit bsf */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsfd (int __X)
+{
+  return __builtin_ctz (__X);
+}
+
+/* 32bit bsr */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsrd (int __X)
+{
+  return __builtin_ia32_bsrsi (__X);
+}
+
+/* 32bit bswap */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bswapd (int __X)
+{
+  return __builtin_bswap32 (__X);
+}
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
+/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32b (unsigned int __C, unsigned char __V)
+{
+  return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32w (unsigned int __C, unsigned short __V)
+{
+  return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32d (unsigned int __C, unsigned int __V)
+{
+  return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+/* 32bit popcnt */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__popcntd (unsigned int __X)
+{
+  return __builtin_popcount (__X);
+}
+
+/* rdpmc */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdpmc (int __S)
+{
+  return __builtin_ia32_rdpmc (__S);
+}
+
+/* rdtsc */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtsc (void)
+{
+  return __builtin_ia32_rdtsc ();
+}
+
+/* rdtscp */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtscp (unsigned int *__A)
+{
+  return __builtin_ia32_rdtscp (__A);
+}
+
+/* 8bit rol */
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolb (unsigned char __X, int __C)
+{
+  return __builtin_ia32_rolqi (__X, __C);
+}
+
+/* 16bit rol */
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolw (unsigned short __X, int __C)
+{
+  return __builtin_ia32_rolhi (__X, __C);
+}
+
+/* 32bit rol */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rold (unsigned int __X, int __C)
+{
+  return (__X << __C) | (__X >> (32 - __C));
+}
+
+/* 8bit ror */
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorb (unsigned char __X, int __C)
+{
+  return __builtin_ia32_rorqi (__X, __C);
+}
+
+/* 16bit ror */
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorw (unsigned short __X, int __C)
+{
+  return __builtin_ia32_rorhi (__X, __C);
+}
+
+/* 32bit ror */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rord (unsigned int __X, int __C)
+{
+  return (__X >> __C) | (__X << (32 - __C));
+}
+
+/* Pause */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__pause (void)
+{
+  __builtin_ia32_pause ();
+}
+
+#ifdef __x86_64__
+/* 64bit bsf */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsfq (long long __X)
+{
+  return __builtin_ctzll (__X);
+}
+
+/* 64bit bsr */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsrq (long long __X)
+{
+  return __builtin_ia32_bsrdi (__X);
+}
+
+/* 64bit bswap */
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bswapq (long long __X)
+{
+  return __builtin_bswap64 (__X);
+}
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
+/* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32q (unsigned long long __C, unsigned long long __V)
+{
+  return __builtin_ia32_crc32di (__C, __V);
+}
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+/* 64bit popcnt */
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__popcntq (unsigned long long __X)
+{
+  return __builtin_popcountll (__X);
+}
+
+/* 64bit rol */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolq (unsigned long long __X, int __C)
+{
+  return (__X << __C) | (__X >> (64 - __C));
+}
+
+/* 64bit ror */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorq (unsigned long long __X, int __C)
+{
+  return (__X >> __C) | (__X << (64 - __C));
+}
+
+/* Read flags register */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__readeflags (void)
+{
+  return __builtin_ia32_readeflags_u64 ();
+}
+
+/* Write flags register */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__writeeflags (unsigned long long X)
+{
+  __builtin_ia32_writeeflags_u64 (X);
+}
+
+#define _bswap64(a)		__bswapq(a)
+#define _popcnt64(a)		__popcntq(a)
+#define _lrotl(a,b)		__rolq((a), (b))
+#define _lrotr(a,b)		__rorq((a), (b))
+#else
+#define _lrotl(a,b)		__rold((a), (b))
+#define _lrotr(a,b)		__rord((a), (b))
+
+/* Read flags register */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__readeflags (void)
+{
+  return __builtin_ia32_readeflags_u32 ();
+}
+
+/* Write flags register */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__writeeflags (unsigned int X)
+{
+  __builtin_ia32_writeeflags_u32 (X);
+}
+
+#endif
+
+#define _bit_scan_forward(a)	__bsfd(a)
+#define _bit_scan_reverse(a)	__bsrd(a)
+#define _bswap(a)		__bswapd(a)
+#define _popcnt32(a)		__popcntd(a)
+#define _rdpmc(a)		__rdpmc(a)
+#define _rdtsc()		__rdtsc()
+#define _rdtscp(a)		__rdtscp(a)
+#define _rotwl(a,b)		__rolw((a), (b))
+#define _rotwr(a,b)		__rorw((a), (b))
+#define _rotl(a,b)		__rold((a), (b))
+#define _rotr(a,b)		__rord((a), (b))
diff --git a/gcc-4.9/gcc/config/i386/immintrin.h b/gcc-4.9/gcc/config/i386/immintrin.h
new file mode 100644
index 000000000..73b485992
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/immintrin.h
@@ -0,0 +1,177 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#define _IMMINTRIN_H_INCLUDED
+
+#include <mmintrin.h>
+
+#include <xmmintrin.h>
+
+#include <emmintrin.h>
+
+#include <pmmintrin.h>
+
+#include <tmmintrin.h>
+
+#include <smmintrin.h>
+
+#include <wmmintrin.h>
+
+#include <avxintrin.h>
+
+#include <avx2intrin.h>
+
+#include <avx512fintrin.h>
+
+#include <avx512erintrin.h>
+
+#include <avx512pfintrin.h>
+
+#include <avx512cdintrin.h>
+
+#include <shaintrin.h>
+
+#include <lzcntintrin.h>
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#include <fmaintrin.h>
+
+#include <f16cintrin.h>
+
+#include <rtmintrin.h>
+
+#include <xtestintrin.h>
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand16_step (unsigned short *__P)
+{
+  return __builtin_ia32_rdrand16_step (__P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand32_step (unsigned int *__P)
+{
+  return __builtin_ia32_rdrand32_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#ifdef  __x86_64__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __DISABLE_FSGSBASE__
+#endif /* __FSGSBASE__ */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u32 (void)
+{
+  return __builtin_ia32_rdfsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u64 (void)
+{
+  return __builtin_ia32_rdfsbase64 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u32 (void)
+{
+  return __builtin_ia32_rdgsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u64 (void)
+{
+  return __builtin_ia32_rdgsbase64 ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u32 (unsigned int __B)
+{
+  __builtin_ia32_wrfsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u64 (unsigned long long __B)
+{
+  __builtin_ia32_wrfsbase64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u32 (unsigned int __B)
+{
+  __builtin_ia32_wrgsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u64 (unsigned long long __B)
+{
+  __builtin_ia32_wrgsbase64 (__B);
+}
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#pragma GCC pop_options
+#endif /* __DISABLE_FSGSBASE__ */
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand64_step (unsigned long long *__P)
+{
+  return __builtin_ia32_rdrand64_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#endif /* __x86_64__  */
+
+#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/interix.opt b/gcc-4.9/gcc/config/i386/interix.opt
new file mode 100644
index 000000000..a8c7230a2
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/interix.opt
@@ -0,0 +1,34 @@
+; Interix-specific options.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+dynamic
+Driver
+
+G
+Driver
+
+posix
+Driver
+
+mpe-aligned-commons
+Target Var(use_pe_aligned_common) Init(HAVE_GAS_ALIGNED_COMM)
+Use the GNU extension to the PE format for aligned common data
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/i386/k6.md b/gcc-4.9/gcc/config/i386/k6.md
new file mode 100644
index 000000000..dadb39201
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/k6.md
@@ -0,0 +1,266 @@
+;; AMD K6/K6-2 Scheduling
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; The K6 architecture is quite similar to PPro.  Important difference is
+;; that there are only two decoders and they seems to be much slower than
+;; any of the execution units.  So we have to pay much more attention to
+;; proper scheduling for the decoders.
+;; FIXME: We don't do that right now.  A good start would be to sort the
+;;        instructions based on length.
+;;
+;; This description is based on data from the following documents:
+;;
+;;    "AMD-K6 Processor Data Sheet (Preliminary information)"
+;;    Advanced Micro Devices, Inc., 1998.
+;;
+;;    "AMD-K6 Processor Code Optimization Application Note"
+;;    Advanced Micro Devices, Inc., 2000.
+;;
+;; CPU execution units of the K6:
+;;
+;; store	describes the Store unit.  This unit is not modelled
+;;		completely and it is only used to model lea operation.
+;;		Otherwise it lies outside of any critical path.
+;; load		describes the Load unit
+;; alux		describes the Integer X unit
+;; mm		describes the Multimedia unit, which shares a pipe
+;;		with the Integer X unit.  This unit is used for MMX,
+;;		which is not implemented for K6.
+;; aluy		describes the Integer Y unit
+;; fpu		describes the FPU unit
+;; branch	describes the Branch unit
+;;
+;; The fp unit is not pipelined, and it can only do one operation per two
+;; cycles, including fxcg.
+;;
+;; Generally this is a very poor description, but at least no worse than
+;; the old description, and a lot easier to extend to something more
+;; reasonable if anyone still cares enough about this architecture in 2004.
+;;
+;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
+
+(define_automaton "k6_decoder,k6_load_unit,k6_store_unit,k6_integer_units,k6_fpu_unit,k6_branch_unit")
+
+;; The K6 instruction decoding begins before the on-chip instruction cache is
+;; filled.  Depending on the length of the instruction, two simple instructions
+;; can be decoded in two parallel short decoders, or one complex instruction can
+;; be decoded in either the long or the vector decoder.  For all practical
+;; purposes, the long and vector decoder can be modelled as one decoder.
+(define_cpu_unit "k6_decode_short0" "k6_decoder")
+(define_cpu_unit "k6_decode_short1" "k6_decoder")
+(define_cpu_unit "k6_decode_long" "k6_decoder")
+(exclusion_set "k6_decode_long" "k6_decode_short0,k6_decode_short1")
+(define_reservation "k6_decode_short" "k6_decode_short0|k6_decode_short1")
+(define_reservation "k6_decode_vector" "k6_decode_long")
+
+(define_cpu_unit "k6_store" "k6_store_unit")
+(define_cpu_unit "k6_load" "k6_load_unit")
+(define_cpu_unit "k6_alux,k6_aluy" "k6_integer_units")
+(define_cpu_unit "k6_fpu" "k6_fpu_unit")
+(define_cpu_unit "k6_branch" "k6_branch_unit")
+
+;; Shift instructions and certain arithmetic are issued only on Integer X.
+(define_insn_reservation "k6_alux_only" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_short,k6_alux")
+
+(define_insn_reservation "k6_alux_only_load" 3
+			 (and (eq_attr "cpu" "k6")
+			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+				    (eq_attr "memory" "load")))
+			 "k6_decode_short,k6_load,k6_alux")
+
+(define_insn_reservation "k6_alux_only_store" 3
+			 (and (eq_attr "cpu" "k6")
+			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+				    (eq_attr "memory" "store,both,unknown")))
+			 "k6_decode_long,k6_load,k6_alux,k6_store")
+
+;; Integer divide and multiply can only be issued on Integer X, too.
+(define_insn_reservation "k6_alu_imul" 2
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "imul"))
+			 "k6_decode_vector,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_load" 4
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load")))
+			 "k6_decode_vector,k6_load,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_store" 4
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "store,both,unknown")))
+			 "k6_decode_vector,k6_load,k6_alux*3,k6_store")
+
+;; ??? Guessed latencies based on the old pipeline description.
+(define_insn_reservation "k6_alu_idiv" 17
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_vector,k6_alux*17")
+
+(define_insn_reservation "k6_alu_idiv_mem" 19
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "!none")))
+			 "k6_decode_vector,k6_load,k6_alux*17")
+
+;; Basic word and doubleword ALU ops can be issued on both Integer units.
+(define_insn_reservation "k6_alu" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_load" 3
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+				   (eq_attr "memory" "load")))
+			 "k6_decode_short,k6_load,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_store" 3
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+				   (eq_attr "memory" "store,both,unknown")))
+			 "k6_decode_long,k6_load,k6_alux|k6_aluy,k6_store")
+
+;; A "load immediate" operation does not require execution at all,
+;; it is available immediately after decoding.  Special-case this.
+(define_insn_reservation "k6_alu_imov" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (and (eq_attr "memory" "none")
+					(match_operand 1 "nonimmediate_operand"))))
+			 "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_imov_imm" 0
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (and (eq_attr "memory" "none")
+					(match_operand 1 "immediate_operand"))))
+			 "k6_decode_short")
+
+(define_insn_reservation "k6_alu_imov_load" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_alu_imov_store" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_alu_imov_both" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "both,unknown")))
+			 "k6_decode_long,k6_load,k6_alux|k6_aluy")
+
+;; The branch unit.
+(define_insn_reservation "k6_branch_call" 1
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "call,callv"))
+			 "k6_decode_vector,k6_branch")
+
+(define_insn_reservation "k6_branch_branch" 1
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "ibr"))
+			 "k6_decode_short,k6_branch")
+
+;; The load and units have two pipeline stages.  The load latency is
+;; two cycles.
+(define_insn_reservation "k6_load_pop" 3
+			 (and (eq_attr "cpu" "k6")
+			      (ior (eq_attr "type" "pop")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_load_leave" 5
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "leave"))
+			 "k6_decode_long,k6_load,(k6_alux|k6_aluy)*2")
+
+;; ??? From the old pipeline description.  Egad!
+;; ??? Apparently we take care of this reservation in adjust_cost.
+(define_insn_reservation "k6_load_str" 10
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_vector,k6_load*10")
+
+;; The store unit handles lea and push.  It is otherwise unmodelled.
+(define_insn_reservation "k6_store_lea" 2
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "lea"))
+			 "k6_decode_short,k6_store,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_store_push" 2
+			 (and (eq_attr "cpu" "k6")
+			      (ior (eq_attr "type" "push")
+				   (eq_attr "memory" "store,both")))
+			 "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_store_str" 10
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "str"))
+			 "k6_store*10")
+
+;; Most FPU instructions have latency 2 and throughput 2.
+(define_insn_reservation "k6_fpu" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_vector,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_load" 6
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_short,k6_load,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_store" 6
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+				   (eq_attr "memory" "store")))
+			 "k6_decode_short,k6_store,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_short,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul_load" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_short,k6_load,k6_fpu*2")
+
+;; ??? Guessed latencies from the old pipeline description.
+(define_insn_reservation "k6_fpu_expensive" 56
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "fdiv,fpspc"))
+			 "k6_decode_short,k6_fpu*56")
+
diff --git a/gcc-4.9/gcc/config/i386/kfreebsd-gnu.h b/gcc-4.9/gcc/config/i386/kfreebsd-gnu.h
new file mode 100644
index 000000000..e487205a7
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/kfreebsd-gnu.h
@@ -0,0 +1,22 @@
+/* Definitions for Intel 386 running kFreeBSD-based GNU systems with ELF format
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION "elf_i386_fbsd"
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
diff --git a/gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h b/gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h
new file mode 100644
index 000000000..1c75c8eb5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h
@@ -0,0 +1,27 @@
+/* Definitions for AMD x86-64 running kFreeBSD-based GNU systems with ELF format
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION32 "elf_i386_fbsd"
+#define GNU_USER_LINK_EMULATION64 "elf_x86_64_fbsd"
+#define GNU_USER_LINK_EMULATIONX32 "elf32_x86_64_fbsd"
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER64 "/lib/ld-kfreebsd-x86-64.so.1"
+#define GLIBC_DYNAMIC_LINKERX32 "/lib/ld-kfreebsd-x32.so.1"
diff --git a/gcc-4.9/gcc/config/i386/knetbsd-gnu.h b/gcc-4.9/gcc/config/i386/knetbsd-gnu.h
new file mode 100644
index 000000000..23bf12922
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/knetbsd-gnu.h
@@ -0,0 +1,21 @@
+/* Definitions for Intel 386 running kNetBSD-based GNU systems with ELF format
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION "elf_i386"
diff --git a/gcc-4.9/gcc/config/i386/kopensolaris-gnu.h b/gcc-4.9/gcc/config/i386/kopensolaris-gnu.h
new file mode 100644
index 000000000..73ca5518a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/kopensolaris-gnu.h
@@ -0,0 +1,21 @@
+/* Definitions for Intel 386 running kOpenSolaris-based GNU systems with ELF format
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION "elf_i386"
diff --git a/gcc-4.9/gcc/config/i386/linux-common.h b/gcc-4.9/gcc/config/i386/linux-common.h
new file mode 100644
index 000000000..1eaf024a6
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/linux-common.h
@@ -0,0 +1,55 @@
+/* Definitions for Intel 386 running Linux-based GNU systems with ELF format.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Ilya Enkovich.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()               \
+  do                                           \
+    {                                          \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();       \
+      ANDROID_TARGET_OS_CPP_BUILTINS();	       \
+    }                                          \
+  while (0)
+
+#undef CC1_SPEC
+#define CC1_SPEC \
+  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \
+		       GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC)
+
+#undef	LINK_SPEC
+#define LINK_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LINK_SPEC, \
+		       GNU_USER_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, \
+		       ANDROID_STARTFILE_SPEC)
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " " \
+		       GNU_USER_TARGET_ENDFILE_SPEC,	 \
+		       GNU_USER_TARGET_MATHFILE_SPEC " " \
+		       ANDROID_ENDFILE_SPEC)
diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h
new file mode 100644
index 000000000..1fb1e0321
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/linux.h
@@ -0,0 +1,23 @@
+/* Definitions for Intel 386 running Linux-based GNU systems with ELF format.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION "elf_i386"
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h
new file mode 100644
index 000000000..a90171e8c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/linux64.h
@@ -0,0 +1,32 @@
+/* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION32 "elf_i386"
+#define GNU_USER_LINK_EMULATION64 "elf_x86_64"
+#define GNU_USER_LINK_EMULATIONX32 "elf32_x86_64"
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux-x86-64.so.2"
+#define GLIBC_DYNAMIC_LINKERX32 "/libx32/ld-linux-x32.so.2"
diff --git a/gcc-4.9/gcc/config/i386/lwpintrin.h b/gcc-4.9/gcc/config/i386/lwpintrin.h
new file mode 100644
index 000000000..1cd046a99
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/lwpintrin.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+#pragma GCC push_options
+#pragma GCC target("lwp")
+#define __DISABLE_LWP__
+#endif /* __LWP__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb (void)
+{
+  return __builtin_ia32_slwpcb ();
+}
+
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpval32(D2, D1, F) \
+  (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpval64(D2, D1, F) \
+  (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#endif
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpins32(D2, D1, F) \
+  (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpins64(D2, D1, F) \
+  (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#endif
+#endif
+
+#ifdef __DISABLE_LWP__
+#undef __DISABLE_LWP__
+#pragma GCC pop_options
+#endif /* __DISABLE_LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/lynx.h b/gcc-4.9/gcc/config/i386/lynx.h
new file mode 100644
index 000000000..910930e71
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/lynx.h
@@ -0,0 +1,87 @@
+/* Definitions for LynxOS on i386.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__LITTLE_ENDIAN__");	\
+      builtin_define ("__x86__");		\
+    }						\
+  while (0)
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* BSS_SECTION_ASM_OP gets defined i386/unix.h.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* LynxOS's GDB counts the floating point registers from 16.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)						\
+  (TARGET_64BIT ? dbx64_register_map[n]					\
+   : (n) == 0 ? 0							\
+   : (n) == 1 ? 2							\
+   : (n) == 2 ? 1							\
+   : (n) == 3 ? 3							\
+   : (n) == 4 ? 6							\
+   : (n) == 5 ? 7							\
+   : (n) == 6 ? 5							\
+   : (n) == 7 ? 4							\
+   : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (int) (n) + 8	\
+   : (-1))
+  
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+    }									\
+  } while (0)
+#endif
+
+/* Undefine SUBTARGET_EXTRA_SPECS it is empty anyway.  We define it in
+   config/lynx.h.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+
+/* Undefine the definition from att.h to enable our default.  */
+
+#undef ASM_OUTPUT_ALIGN
+
+/* Undefine the definition from elfos.h to enable our default.  */
+
+#undef PREFERRED_DEBUGGING_TYPE
+
+/* The file i386.c defines TARGET_HAVE_TLS unconditionally if
+   HAVE_AS_TLS is defined.  HAVE_AS_TLS is defined as gas support for
+   TLS is detected by configure.  We undefine it here.  */
+
+#undef HAVE_AS_TLS
diff --git a/gcc-4.9/gcc/config/i386/lzcntintrin.h b/gcc-4.9/gcc/config/i386/lzcntintrin.h
new file mode 100644
index 000000000..b680a3539
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/lzcntintrin.h
@@ -0,0 +1,75 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+
+#ifndef _LZCNTINTRIN_H_INCLUDED
+#define _LZCNTINTRIN_H_INCLUDED
+
+#ifndef __LZCNT__
+#pragma GCC push_options
+#pragma GCC target("lzcnt")
+#define __DISABLE_LZCNT__
+#endif /* __LZCNT__ */
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt16 (unsigned short __X)
+{
+  return __builtin_clzs (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt32 (unsigned int __X)
+{
+  return __builtin_clz (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_lzcnt_u32 (unsigned int __X)
+{
+  return __builtin_clz (__X);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt64 (unsigned long long __X)
+{
+  return __builtin_clzll (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_lzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_clzll (__X);
+}
+#endif
+
+#ifdef __DISABLE_LZCNT__
+#undef __DISABLE_LZCNT__
+#pragma GCC pop_options
+#endif /* __DISABLE_LZCNT__ */
+
+#endif /* _LZCNTINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/mingw-pthread.h b/gcc-4.9/gcc/config/i386/mingw-pthread.h
new file mode 100644
index 000000000..99753cf03
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mingw-pthread.h
@@ -0,0 +1,21 @@
+/* Defines that pthread library shall be enabled by default
+   for target.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_USE_PTHREAD_BY_DEFAULT 1
diff --git a/gcc-4.9/gcc/config/i386/mingw-stdint.h b/gcc-4.9/gcc/config/i386/mingw-stdint.h
new file mode 100644
index 000000000..1589d96bf
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mingw-stdint.h
@@ -0,0 +1,50 @@
+/* Definitions for <stdint.h> types on systems using mingw.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+#define INTPTR_TYPE (TARGET_64BIT ? "long long int" : "int")
+#define UINTPTR_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
diff --git a/gcc-4.9/gcc/config/i386/mingw-w64.h b/gcc-4.9/gcc/config/i386/mingw-w64.h
new file mode 100644
index 000000000..b7436be04
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mingw-w64.h
@@ -0,0 +1,86 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows 32/64 via mingw-w64 runtime, using GNU tools and
+   the Windows API Library.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable -municode feature and support optional pthread support.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT} " \
+		 "%{municode:-DUNICODE} " \
+		 "%{" SPEC_PTHREAD1 ":-D_REENTRANT} " \
+		 "%{" SPEC_PTHREAD2 ":-U_REENTRANT} "
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
+  %{!shared:%{!mdll:%{!municode:crt2%O%s}}} \
+  %{!shared:%{!mdll:%{municode:crt2u%O%s}}} \
+  %{pg:gcrt2%O%s} \
+  crtbegin.o%s"
+
+/* Enable multilib.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m32:--32} %{m64:--64}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-lgmon} %{" SPEC_PTHREAD1 ":-lpthread} " \
+		 "%{" SPEC_PTHREAD2 ": } " \
+		 "%{mwindows:-lgdi32 -lcomdlg32} " \
+		 "-ladvapi32 -lshell32 -luser32 -lkernel32"
+
+#undef SPEC_32
+#undef SPEC_64
+#if TARGET_64BIT_DEFAULT
+#define SPEC_32 "m32"
+#define SPEC_64 "!m32"
+#else
+#define SPEC_32 "!m64"
+#define SPEC_64 "m64"
+#endif
+
+#undef SUB_LINK_ENTRY32
+#undef SUB_LINK_ENTRY64
+#define SUB_LINK_ENTRY32 "-e _DllMainCRTStartup@12"
+#if defined(USE_MINGW64_LEADING_UNDERSCORES)
+#define SUB_LINK_ENTRY64 "-e _DllMainCRTStartup"
+#else
+#define SUB_LINK_ENTRY64 "-e DllMainCRTStartup"
+#endif
+
+#undef SUB_LINK_SPEC
+#undef SUB_LINK_ENTRY
+#define SUB_LINK_SPEC "%{" SPEC_64 ":-m i386pep} %{" SPEC_32 ":-m i386pe}"
+#define SUB_LINK_ENTRY "%{" SPEC_64 ":" SUB_LINK_ENTRY64 "} %{" SPEC_32 ":" SUB_LINK_ENTRY32 "}"
+
+#undef MULTILIB_DEFAULTS
+#if TARGET_64BIT_DEFAULT
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+#undef LINK_SPEC
+#define LINK_SPEC SUB_LINK_SPEC " %{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
+  %(shared_libgcc_undefs)"
diff --git a/gcc-4.9/gcc/config/i386/mingw-w64.opt b/gcc-4.9/gcc/config/i386/mingw-w64.opt
new file mode 100644
index 000000000..90e01f362
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mingw-w64.opt
@@ -0,0 +1,25 @@
+; MinGW-w64-specific options.
+
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+municode
+Target
+Use unicode startup and define UNICODE macro
+
+; Retain blank line above.
diff --git a/gcc-4.9/gcc/config/i386/mingw.opt b/gcc-4.9/gcc/config/i386/mingw.opt
new file mode 100644
index 000000000..44fecb0cc
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mingw.opt
@@ -0,0 +1,35 @@
+; MinGW-specific options.
+
+; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+pthread
+Driver
+
+no-pthread
+Driver
+
+Wpedantic-ms-format
+C ObjC C++ ObjC++ Var(warn_pedantic_ms_format) Init(1) Warning
+Warn about none ISO msvcrt scanf/printf width extensions
+
+fset-stack-executable
+Common Report Var(flag_setstackexecutable) Init(1) Optimization
+For nested functions on stack executable permission is set.
+
+; Need to retain blank line above.
diff --git a/gcc-4.9/gcc/config/i386/mingw32.h b/gcc-4.9/gcc/config/i386/mingw32.h
new file mode 100644
index 000000000..f56382095
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mingw32.h
@@ -0,0 +1,248 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using GNU tools and the Windows32 API Library.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DEFAULT_ABI
+#define DEFAULT_ABI MS_ABI
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   returns float values in the 387 and needs stack probes.
+   We also align doubles to 64-bits for MSVC default compatibility.
+   Additionally we enable MS_BITFIELD_LAYOUT by default.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS \
+	 | MASK_STACK_PROBE | MASK_ALIGN_DOUBLE \
+	 | MASK_MS_BITFIELD_LAYOUT)
+
+/* See i386/crtdll.h for an alternative definition. _INTEGRAL_MAX_BITS
+   is for compatibility with native compiler.  */
+#define EXTRA_OS_CPP_BUILTINS()					\
+  do								\
+    {								\
+      builtin_define ("__MSVCRT__");				\
+      builtin_define ("__MINGW32__");			   	\
+      builtin_define ("_WIN32");				\
+      builtin_define_std ("WIN32");				\
+      builtin_define_std ("WINNT");				\
+      builtin_define_with_int_value ("_INTEGRAL_MAX_BITS",	\
+				     TYPE_PRECISION (intmax_type_node));\
+      if (TARGET_64BIT && ix86_abi == MS_ABI)			\
+	{							\
+	  builtin_define ("__MINGW64__");			\
+	  builtin_define_std ("WIN64");				\
+	  builtin_define ("_WIN64");				\
+	}							\
+    }								\
+  while (0)
+
+#ifndef TARGET_USE_PTHREAD_BY_DEFAULT
+#define SPEC_PTHREAD1 "pthread"
+#define SPEC_PTHREAD2 "!no-pthread"
+#else
+#define SPEC_PTHREAD1 "!no-pthread"
+#define SPEC_PTHREAD2 "pthread"
+#endif
+
+#undef SUB_LINK_ENTRY32
+#undef SUB_LINK_ENTRY64
+#define SUB_LINK_ENTRY32 "-e _DllMainCRTStartup@12"
+#if defined(USE_MINGW64_LEADING_UNDERSCORES)
+#define SUB_LINK_ENTRY64 "-e _DllMainCRTStartup"
+#else
+#define SUB_LINK_ENTRY64 "-e DllMainCRTStartup"
+#endif
+
+#undef SUB_LINK_ENTRY
+#if TARGET_64BIT_DEFAULT
+#define SUB_LINK_ENTRY SUB_LINK_ENTRY64
+#else
+#define SUB_LINK_ENTRY SUB_LINK_ENTRY32
+#endif
+
+#undef NATIVE_SYSTEM_HEADER_COMPONENT
+#define NATIVE_SYSTEM_HEADER_COMPONENT "MINGW"
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT} " \
+		 "%{" SPEC_PTHREAD1 ":-D_REENTRANT} " \
+		 "%{" SPEC_PTHREAD2 ": } "
+
+/* For Windows applications, include more libraries, but always include
+   kernel32.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-lgmon} %{" SPEC_PTHREAD1 ":-lpthread} " \
+		 "%{" SPEC_PTHREAD2 ": } " \
+		 "%{mwindows:-lgdi32 -lcomdlg32} " \
+                 "-ladvapi32 -lshell32 -luser32 -lkernel32"
+
+/* Weak symbols do not get resolved if using a Windows dll import lib.
+   Make the unwind registration references strong undefs.  */
+#if DWARF2_UNWIND_INFO
+/* DW2-unwind is just available for 32-bit mode.  */
+#if TARGET_64BIT_DEFAULT
+#error DW2 unwind is not available for 64-bit.
+#endif
+#define SHARED_LIBGCC_UNDEFS_SPEC \
+ "%{shared-libgcc: -u ___register_frame_info -u ___deregister_frame_info}"
+#else
+#define SHARED_LIBGCC_UNDEFS_SPEC ""
+#endif
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "shared_libgcc_undefs", SHARED_LIBGCC_UNDEFS_SPEC }
+
+#define LINK_SPEC "%{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
+  %(shared_libgcc_undefs)"
+
+/* Include in the mingw32 libraries with libgcc */
+#ifdef ENABLE_SHARED_LIBGCC
+#define SHARED_LIBGCC_SPEC " \
+ %{static|static-libgcc:-lgcc -lgcc_eh} \
+ %{!static: \
+   %{!static-libgcc: \
+     %{!shared: \
+       %{!shared-libgcc:-lgcc -lgcc_eh} \
+       %{shared-libgcc:-lgcc_s -lgcc} \
+      } \
+     %{shared:-lgcc_s -lgcc} \
+    } \
+  } "
+#else
+#define SHARED_LIBGCC_SPEC " -lgcc "
+#endif
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC \
+  "%{mthreads:-lmingwthrd} -lmingw32 \
+   "SHARED_LIBGCC_SPEC" \
+   -lmoldname -lmingwex -lmsvcrt"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
+  %{!shared:%{!mdll:crt2%O%s}} %{pg:gcrt2%O%s} \
+  crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+  crtend.o%s"
+
+/* Override startfile prefix defaults.  */
+#ifndef STANDARD_STARTFILE_PREFIX_1
+#define STANDARD_STARTFILE_PREFIX_1 "/mingw/lib/"
+#endif
+#ifndef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 ""
+#endif
+
+/* For native mingw-version we need to take care that NATIVE_SYSTEM_HEADER_DIR
+   macro contains POSIX-style path.  See bug 52947.  */
+#undef NATIVE_SYSTEM_HEADER_DIR
+#define NATIVE_SYSTEM_HEADER_DIR "/mingw/include"
+
+/* Output STRING, a string representing a filename, to FILE.
+   We canonicalize it to be in Unix format (backslashes are replaced
+   forward slashes.  */
+#undef OUTPUT_QUOTED_STRING
+#define OUTPUT_QUOTED_STRING(FILE, STRING)               \
+do {						         \
+  const char *_string = (const char *) (STRING);	 \
+  char c;					         \
+						         \
+  putc ('\"', (FILE));				         \
+						         \
+  while ((c = *_string++) != 0)			         \
+    {						         \
+      if (c == '\\')				         \
+	c = '/';				         \
+						         \
+      if (ISPRINT (c))                                   \
+        {                                                \
+          if (c == '\"')			         \
+	    putc ('\\', (FILE));		         \
+          putc (c, (FILE));			         \
+        }                                                \
+      else                                               \
+        fprintf ((FILE), "\\%03o", (unsigned char) c);	 \
+    }						         \
+						         \
+  putc ('\"', (FILE));					 \
+} while (0)
+
+/* Define as short unsigned for compatibility with MS runtime.  */
+#undef WINT_TYPE
+#define WINT_TYPE "short unsigned int"
+
+/* mingw32 uses the  -mthreads option to enable thread support.  */
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS "%{fopenmp|ftree-parallelize-loops=*: " \
+			"-mthreads -pthread}"
+#undef GTM_SELF_SPECS
+#define GTM_SELF_SPECS "%{fgnu-tm:-mthreads -pthread}"
+
+/* mingw32 atexit function is safe to use in shared libraries.  Use it
+   to register C++ static destructors.  */
+#define TARGET_CXX_USE_ATEXIT_FOR_CXA_ATEXIT hook_bool_void_true
+
+/* Contains a pointer to type target_ovr_attr defining the target specific
+   overrides of format attributes.  See c-format.h for structure
+   definition.  */
+#undef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
+#define TARGET_OVERRIDES_FORMAT_ATTRIBUTES mingw_format_attribute_overrides
+
+/* Specify the count of elements in TARGET_OVERRIDES_ATTRIBUTE.  */
+#undef TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT
+#define TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT 3
+
+/* Custom initialization for warning -Wpedantic-ms-format for c-format.  */
+#undef TARGET_OVERRIDES_FORMAT_INIT
+#define TARGET_OVERRIDES_FORMAT_INIT msformat_init
+
+/* MS specific format attributes for ms_printf, ms_scanf, ms_strftime.  */
+#undef TARGET_FORMAT_TYPES
+#define TARGET_FORMAT_TYPES mingw_format_attributes
+
+#undef TARGET_N_FORMAT_TYPES
+#define TARGET_N_FORMAT_TYPES 3
+
+/* Let defaults.h definition of TARGET_USE_JCR_SECTION apply. */
+#undef TARGET_USE_JCR_SECTION
+
+#define HAVE_ENABLE_EXECUTE_STACK
+#undef  CHECK_EXECUTE_STACK_ENABLED
+#define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
+
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygming. */
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygwin. */
+#if DWARF2_UNWIND_INFO
+#define LIBGCC_EH_EXTN "_dw2"
+#else
+#define LIBGCC_EH_EXTN "_sjlj"
+#endif
+#define LIBGCC_SONAME "libgcc_s" LIBGCC_EH_EXTN "-1.dll"
+
+/* We should find a way to not have to update this manually.  */
+#define LIBGCJ_SONAME "libgcj" /*LIBGCC_EH_EXTN*/ "-13.dll"
diff --git a/gcc-4.9/gcc/config/i386/mm3dnow.h b/gcc-4.9/gcc/config/i386/mm3dnow.h
new file mode 100644
index 000000000..bf847f939
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mm3dnow.h
@@ -0,0 +1,218 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
+   MSVC 7.1.  */
+
+#ifndef _MM3DNOW_H_INCLUDED
+#define _MM3DNOW_H_INCLUDED
+
+#include <mmintrin.h>
+#include <prfchwintrin.h>
+
+#ifndef __3dNOW__
+#pragma GCC push_options
+#pragma GCC target("3dnow")
+#define __DISABLE_3dNOW__
+#endif /* __3dNOW__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_femms (void)
+{
+  __builtin_ia32_femms();
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgusb (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2id (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfacc (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfadd (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpeq (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpge (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpgt (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmax (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmin (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmul (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcp (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit1 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit2 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqrt (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqit1 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsub (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsubr (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fd (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhrw (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetch (void *__P)
+{
+  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_float (float __A)
+{
+  return __extension__ (__m64)(__v2sf){ __A, 0.0f };
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_float (__m64 __A)
+{
+  union { __v2sf v; float a[2]; } __tmp;
+  __tmp.v = (__v2sf)__A;
+  return __tmp.a[0];
+}
+
+#ifdef __3dNOW_A__
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2iw (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfnacc (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfpnacc (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fw (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pswapd (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
+}
+
+#endif /* __3dNOW_A__ */
+
+#ifdef __DISABLE_3dNOW__
+#undef __DISABLE_3dNOW__
+#pragma GCC pop_options
+#endif /* __DISABLE_3dNOW__ */
+
+#endif /* _MM3DNOW_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/mmintrin.h b/gcc-4.9/gcc/config/i386/mmintrin.h
new file mode 100644
index 000000000..b351200e5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mmintrin.h
@@ -0,0 +1,942 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+#ifndef __MMX__
+#pragma GCC push_options
+#pragma GCC target("mmx")
+#define __DISABLE_MMX__
+#endif /* __MMX__ */
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+
+/* Internal data types for implementing the intrinsics.  */
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+/* Empty the multimedia state.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+  __builtin_ia32_emms ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_empty (void)
+{
+  _mm_empty ();
+}
+
+/* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si64 (int __i)
+{
+  return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
+}
+
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int (int __i)
+{
+  return _mm_cvtsi32_si64 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert I to a __m64 object.  */
+
+/* Intel intrinsic.  */
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_m64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi64x (long long __i)
+{
+  return (__m64) __i;
+}
+#endif
+
+/* Convert the lower 32 bits of the __m64 object into an integer.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si32 (__m64 __i)
+{
+  return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int (__m64 __i)
+{
+  return _mm_cvtsi64_si32 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert the __m64 object to a 64bit integer.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int64 (__m64 __i)
+{
+  return (long long)__i;
+}
+
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtm64_si64 (__m64 __i)
+{
+  return (long long)__i;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si64x (__m64 __i)
+{
+  return (long long)__i;
+}
+#endif
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with signed saturation.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packsswb (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pi16 (__m1, __m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with signed saturation.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packssdw (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pi32 (__m1, __m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with unsigned saturation.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packuswb (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pu16 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+   8-bit values from the high half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhbw (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+   16-bit values from the high half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+   value from the high half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhdq (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi32 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+   8-bit values from the low half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklbw (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+   16-bit values from the low half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+   value from the low half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckldq (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi32 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddb (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddw (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi16 (__m1, __m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddd (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
+}
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsb (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsw (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pi16 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusb (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pu8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusw (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pu16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubb (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubw (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi16 (__m1, __m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubd (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
+}
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+   saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsb (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   signed saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsw (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pi16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+   unsigned saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusb (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pu8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   unsigned saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusw (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pu16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaddwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_madd_pi16 (__m1, __m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhw (__m64 __m1, __m64 __m2)
+{
+  return _mm_mulhi_pi16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+   the low 16 bits of the results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmullw (__m64 __m1, __m64 __m2)
+{
+  return _mm_mullo_pi16 (__m1, __m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllw (__m64 __m, __m64 __count)
+{
+  return _mm_sll_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllwi (__m64 __m, int __count)
+{
+  return _mm_slli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslld (__m64 __m, __m64 __count)
+{
+  return _mm_sll_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslldi (__m64 __m, int __count)
+{
+  return _mm_slli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllq (__m64 __m, __m64 __count)
+{
+  return _mm_sll_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllqi (__m64 __m, int __count)
+{
+  return _mm_slli_si64 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psraw (__m64 __m, __m64 __count)
+{
+  return _mm_sra_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrawi (__m64 __m, int __count)
+{
+  return _mm_srai_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrad (__m64 __m, __m64 __count)
+{
+  return _mm_sra_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psradi (__m64 __m, int __count)
+{
+  return _mm_srai_pi32 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlw (__m64 __m, __m64 __count)
+{
+  return _mm_srl_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlwi (__m64 __m, int __count)
+{
+  return _mm_srli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrld (__m64 __m, __m64 __count)
+{
+  return _mm_srl_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrldi (__m64 __m, int __count)
+{
+  return _mm_srli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlq (__m64 __m, __m64 __count)
+{
+  return _mm_srl_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlqi (__m64 __m, int __count)
+{
+  return _mm_srli_si64 (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_pand (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pand (__m64 __m1, __m64 __m2)
+{
+  return _mm_and_si64 (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+   64-bit value in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_pandn (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pandn (__m64 __m1, __m64 __m2)
+{
+  return _mm_andnot_si64 (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_por (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_por (__m64 __m1, __m64 __m2)
+{
+  return _mm_or_si64 (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_pxor (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pxor (__m64 __m1, __m64 __m2)
+{
+  return _mm_xor_si64 (__m1, __m2);
+}
+
+/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
+   test is true and zero if false.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqb (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi8 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtb (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi8 (__m1, __m2);
+}
+
+/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
+   the test is true and zero if false.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqw (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi16 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtw (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi16 (__m1, __m2);
+}
+
+/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
+   the test is true and zero if false.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqd (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi32 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtd (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi32 (__m1, __m2);
+}
+
+/* Creates a 64-bit zero.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si64 (void)
+{
+  return (__m64)0LL;
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi32 (int __i1, int __i0)
+{
+  return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+  return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+	     char __b3, char __b2, char __b1, char __b0)
+{
+  return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
+					       __b4, __b5, __b6, __b7);
+}
+
+/* Similar, but with the arguments in reverse order.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi32 (int __i0, int __i1)
+{
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+	      char __b4, char __b5, char __b6, char __b7)
+{
+  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi32 (int __i)
+{
+  return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi16 (short __w)
+{
+  return _mm_set_pi16 (__w, __w, __w, __w);
+}
+
+/* Creates a vector of eight 8-bit values, all elements containing B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi8 (char __b)
+{
+  return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
+}
+#ifdef __DISABLE_MMX__
+#undef __DISABLE_MMX__
+#pragma GCC pop_options
+#endif /* __DISABLE_MMX__ */
+
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/mmx.md b/gcc-4.9/gcc/config/i386/mmx.md
new file mode 100644
index 000000000..214acde23
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/mmx.md
@@ -0,0 +1,1613 @@
+;; GCC machine description for MMX and 3dNOW! instructions
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The MMX and 3dNOW! patterns are in the same file because they use
+;; the same register file, and 3dNOW! adds a number of extensions to
+;; the base integer MMX isa.
+
+;; Note!  Except for the basic move instructions, *all* of these
+;; patterns are outside the normal optabs namespace.  This is because
+;; use of these registers requires the insertion of emms or femms
+;; instructions to return to normal fpu mode.  The compiler doesn't
+;; know how to do that itself, which means it's up to the user.  Which
+;; means that we should never use any of these patterns except at the
+;; direction of the user via a builtin.
+
+(define_c_enum "unspec" [
+  UNSPEC_MOVNTQ
+  UNSPEC_PFRCP
+  UNSPEC_PFRCPIT1
+  UNSPEC_PFRCPIT2
+  UNSPEC_PFRSQRT
+  UNSPEC_PFRSQIT1
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_EMMS
+  UNSPECV_FEMMS
+])
+
+;; 8 byte integral modes handled by MMX (and by extension, SSE)
+(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+
+;; All 8-byte vector modes handled by MMX
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
+
+;; Mix-n-match
+(define_mode_iterator MMXMODE12 [V8QI V4HI])
+(define_mode_iterator MMXMODE24 [V4HI V2SI])
+(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for MMX as well as 3dNOW.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
+	(match_operand:MMXMODE 1 "nonimmediate_operand"))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
+    "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r   ,?!Ym,v,v,v,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi")
+	(match_operand:MMXMODE 1 "vector_move_operand"
+    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!Yn,r   ,C,v,m,v,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))]
+  "TARGET_MMX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_IMOV:
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%1, %k0|%k0, %1}";
+      else
+	return "mov{q}\t{%1, %0|%0, %1}";
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MMXMOV:
+      /* Handle broken assemblers that require movd instead of movq.  */
+      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	  && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
+	return "movd\t{%1, %0|%0, %1}";
+      return "movq\t{%1, %0|%0, %1}";
+
+    case TYPE_SSECVT:
+      if (SSE_REG_P (operands[0]))
+	return "movq2dq\t{%1, %0|%0, %1}";
+      else
+	return "movdq2q\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_DI:
+	  /* Handle broken assemblers that require movd instead of movq.  */
+	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
+	    return "%vmovd\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+
+	case MODE_V2SF:
+	  if (TARGET_AVX && REG_P (operands[0]))
+	    return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	  return "%vmovlps\t{%1, %0|%0, %1}";
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+     (cond [(eq_attr "alternative" "0,1")
+	      (const_string "nox64")
+	    (eq_attr "alternative" "2,3,4,9,10,11,12,13,14,19,20")
+	      (const_string "x64")
+	   ]
+	   (const_string "*")))
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "0,1")
+	      (const_string "multi")
+	    (eq_attr "alternative" "2,3,4")
+	      (const_string "imov")
+	    (eq_attr "alternative" "5")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "6,7,8,9,10")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "11,15")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "21,22")
+	      (const_string "ssecvt")
+	   ]
+	   (const_string "ssemov")))
+   (set (attr "prefix_rex")
+     (if_then_else (eq_attr "alternative" "9,10,19,20")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "type" "sselog1,ssemov")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "SI")
+	    (eq_attr "alternative" "11,12,15,16")
+	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			  (match_operand 1 "ext_sse_reg_operand"))
+			(const_string "XI")
+		     (match_test "<MODE>mode == V2SFmode")
+		       (const_string "V4SF")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+		       (const_string "V4SF")
+		     (match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (match_test "optimize_function_for_size_p (cfun)")
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+
+	    (and (eq_attr "alternative" "13,14,17,18")
+	    	 (ior (match_test "<MODE>mode == V2SFmode")
+		      (not (match_test "TARGET_SSE2"))))
+	      (const_string "V2SF")
+	   ]
+	   (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
+        (match_operand:MMXMODE 1 "general_operand"))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
+	(match_operand:MMXMODE 1 "nonimmediate_operand"))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "sse_movntq"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+		   UNSPEC_MOVNTQ))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "movntq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxmov")
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(plus:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand")
+	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+
+(define_insn "*mmx_addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+  "pfadd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+        (minus:V2SF (match_operand:V2SF 1 "register_operand")
+		    (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_3DNOW")
+
+(define_expand "mmx_subrv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+        (minus:V2SF (match_operand:V2SF 2 "register_operand")
+		    (match_operand:V2SF 1 "nonimmediate_operand")))]
+  "TARGET_3DNOW")
+
+(define_insn "*mmx_subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,y")
+        (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
+		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
+  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   pfsub\t{%2, %0|%0, %2}
+   pfsubr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
+		   (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+
+(define_insn "*mmx_mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+  "pfmul\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
+
+(define_expand "mmx_<code>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand")
+	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_3DNOW"
+{
+  if (!flag_finite_math_only)
+    operands[1] = force_reg (V2SFmode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+})
+
+(define_insn "*mmx_<code>v2sf3_finite"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && flag_finite_math_only
+   && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+  "pf<maxmin_float>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "*mmx_<code>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "register_operand" "0")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf<maxmin_float>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRCP))]
+  "TARGET_3DNOW"
+  "pfrcp\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRCPIT1))]
+  "TARGET_3DNOW"
+  "pfrcpit1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit2v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRCPIT2))]
+  "TARGET_3DNOW"
+  "pfrcpit2\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqrtv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRSQRT))]
+  "TARGET_3DNOW"
+  "pfrsqrt\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRSQIT1))]
+  "TARGET_3DNOW"
+  "pfrsqit1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_haddv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_concat:V2SF
+	  (plus:SF
+	    (vec_select:SF
+	      (match_operand:V2SF 1 "register_operand" "0")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	  (plus:SF
+            (vec_select:SF
+	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW"
+  "pfacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_hsubv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_concat:V2SF
+	  (minus:SF
+	    (vec_select:SF
+	      (match_operand:V2SF 1 "register_operand" "0")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	  (minus:SF
+            (vec_select:SF
+	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfnacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_addsubv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (vec_merge:V2SF
+          (plus:V2SF
+            (match_operand:V2SF 1 "register_operand" "0")
+            (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
+          (minus:V2SF (match_dup 1) (match_dup 2))
+          (const_int 1)))]
+  "TARGET_3DNOW_A"
+  "pfpnacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
+		 (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
+
+(define_insn "*mmx_eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
+  "pfcmpeq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gtv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpgt\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gev2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpge\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pf2id"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf2id\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pf2iw"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(sign_extend:V2SI
+	  (ss_truncate:V2HI
+	    (fix:V2SI
+	      (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pf2iw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pi2fw"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(float:V2SF
+	  (sign_extend:V2SI
+	    (truncate:V2HI
+	      (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pi2fw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_floatv2si2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pi2fd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pswapdv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+			 (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_duplicate:V2SF
+	  (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_MMX"
+  "punpckldq\t%0, %0"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2sf"
+  [(set (match_operand:V2SF 0 "register_operand"     "=y,y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
+	  (match_operand:SF 2 "vector_move_operand"  "ym,C")))]
+  "TARGET_MMX && !TARGET_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt,mmxmov")
+   (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:SF 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn_and_split "*vec_extractv2sf_0"
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x, m,y ,m,f,r")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_extractv2sf_1"
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,y,x,f,r")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   punpckhdq\t%0, %0
+   unpckhps\t%0, %0
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov")
+   (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "memory_operand")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = adjust_address (operands[1], SFmode, 4);")
+
+(define_expand "vec_extractv2sf"
+  [(match_operand:SF 0 "register_operand")
+   (match_operand:V2SF 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODEI8 0 "register_operand")
+	(plusminus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
+  "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+        (plusminus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODE12 0 "register_operand")
+	(sat_plusminus:MMXMODE12
+	  (match_operand:MMXMODE12 1 "nonimmediate_operand")
+	  (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+        (sat_plusminus:MMXMODE12
+	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
+		   (match_operand:V4HI 2 "nonimmediate_operand")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmullw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand"))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand")))
+	    (const_int 16))))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	    (const_int 16))))]
+  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand"))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand")))
+	    (const_int 16))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	  (const_int 16))))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhuw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand")
+        (plus:V2SI
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 1 "nonimmediate_operand")
+		(parallel [(const_int 0) (const_int 2)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 2 "nonimmediate_operand")
+		(parallel [(const_int 0) (const_int 2)]))))
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)]))))))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0")
+		(parallel [(const_int 0) (const_int 2)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0) (const_int 2)]))))
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)]))))))]
+  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmaddwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (mult:V4SI
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand"))
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand")))
+	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
+				  (const_int 32768) (const_int 32768)]))
+	    (const_int 16))))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (mult:V4SI
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
+				  (const_int 32768) (const_int 32768)]))
+	    (const_int 16))))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhrw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(define_expand "sse2_umulv1siv1di3"
+  [(set (match_operand:V1DI 0 "register_operand")
+        (mult:V1DI
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 1 "nonimmediate_operand")
+	      (parallel [(const_int 0)])))
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 2 "nonimmediate_operand")
+	      (parallel [(const_int 0)])))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
+
+(define_insn "*sse2_umulv1siv1di3"
+  [(set (match_operand:V1DI 0 "register_operand" "=y")
+        (mult:V1DI
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+	      (parallel [(const_int 0)])))
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int 0)])))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+  "pmuludq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+        (smaxmin:V4HI
+	  (match_operand:V4HI 1 "nonimmediate_operand")
+	  (match_operand:V4HI 2 "nonimmediate_operand")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
+
+(define_insn "*mmx_<code>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smaxmin:V4HI
+	  (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
+  "p<maxmin_int>w\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+        (umaxmin:V8QI
+	  (match_operand:V8QI 1 "nonimmediate_operand")
+	  (match_operand:V8QI 2 "nonimmediate_operand")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
+
+(define_insn "*mmx_<code>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umaxmin:V8QI
+	  (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
+  "p<maxmin_int>b\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashr<mode>3"
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+        (ashiftrt:MMXMODE24
+	  (match_operand:MMXMODE24 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+  "TARGET_MMX"
+  "psra<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_<shift_insn><mode>3"
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (any_lshift:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+  "TARGET_MMX"
+  "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_eq<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+        (eq:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*mmx_eq<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+        (eq:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_gt<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+        (gt:MMXMODEI
+	  (match_operand:MMXMODEI 1 "register_operand" "0")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_andnot<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+	(and:MMXMODEI
+	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pandn\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code><mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+	(any_logic:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<code><mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+        (any_logic:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<logic>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_packsswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 1 "register_operand" "0"))
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_MMX"
+  "packsswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_packssdw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 1 "register_operand" "0"))
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_MMX"
+  "packssdw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_packuswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 1 "register_operand" "0"))
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_MMX"
+  "packuswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "0")
+	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 4) (const_int 12)
+                     (const_int 5) (const_int 13)
+                     (const_int 6) (const_int 14)
+                     (const_int 7) (const_int 15)])))]
+  "TARGET_MMX"
+  "punpckhbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "0")
+	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 0) (const_int 8)
+                     (const_int 1) (const_int 9)
+                     (const_int 2) (const_int 10)
+                     (const_int 3) (const_int 11)])))]
+  "TARGET_MMX"
+  "punpcklbw\t{%2, %0|%0, %k2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "0")
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 2) (const_int 6)
+                     (const_int 3) (const_int 7)])))]
+  "TARGET_MMX"
+  "punpckhwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "0")
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 0) (const_int 4)
+                     (const_int 1) (const_int 5)])))]
+  "TARGET_MMX"
+  "punpcklwd\t{%2, %0|%0, %k2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "register_operand" "0")
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_MMX"
+  "punpckhdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "register_operand" "0")
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_MMX"
+  "punpckldq\t{%2, %0|%0, %k2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand")
+        (vec_merge:V4HI
+          (vec_duplicate:V4HI
+            (match_operand:SI 2 "nonimmediate_operand"))
+	  (match_operand:V4HI 1 "register_operand")
+          (match_operand:SI 3 "const_0_to_3_operand")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[2] = gen_lowpart (HImode, operands[2]);
+  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
+
+(define_insn "*mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_merge:V4HI
+          (vec_duplicate:V4HI
+            (match_operand:HI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V4HI 1 "register_operand" "0")
+          (match_operand:SI 3 "const_int_operand")))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ((unsigned) exact_log2 (INTVAL (operands[3]))
+       < GET_MODE_NUNITS (V4HImode))"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  if (MEM_P (operands[2]))
+    return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+    return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_pextrw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI
+	  (vec_select:HI
+	    (match_operand:V4HI 1 "register_operand" "y")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pshufw"
+  [(match_operand:V4HI 0 "register_operand")
+   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_int_operand")]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
+                               GEN_INT ((mask >> 0) & 3),
+                               GEN_INT ((mask >> 2) & 3),
+                               GEN_INT ((mask >> 4) & 3),
+                               GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "mmx_pshufw_1"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_select:V4HI
+          (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+          (parallel [(match_operand 2 "const_0_to_3_operand")
+                     (match_operand 3 "const_0_to_3_operand")
+                     (match_operand 4 "const_0_to_3_operand")
+                     (match_operand 5 "const_0_to_3_operand")])))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_pswapdv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+	  (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_duplicate:V4HI
+	  (truncate:HI
+	    (match_operand:SI 1 "register_operand" "0"))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pshufw\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_duplicate:V2SI
+	  (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_MMX"
+  "punpckldq\t%0, %0"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2si"
+  [(set (match_operand:V2SI 0 "register_operand"     "=y,y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
+	  (match_operand:SI 2 "vector_move_operand"  "ym,C")))]
+  "TARGET_MMX && !TARGET_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt,mmxmov")
+   (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2si"
+  [(match_operand:V2SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn_and_split "*vec_extractv2si_0"
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,m,y, m,r")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], SImode, 0);
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_extractv2si_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,x,x,x,y,x,r")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,x,0,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   punpckhdq\t%0, %0
+   punpckhdq\t%0, %0
+   pshufd\t{$85, %1, %0|%0, %1, 85}
+   unpckhps\t%0, %0
+   #
+   #
+   #"
+  [(set (attr "isa")
+     (if_then_else (eq_attr "alternative" "1,2")
+       (const_string "sse2")
+       (const_string "*")))
+   (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov")
+   (set_attr "length_immediate" "*,*,1,*,*,*,*")
+   (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "memory_operand")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = adjust_address (operands[1], SImode, 4);")
+
+(define_insn_and_split "*vec_extractv2si_zext_mem"
+  [(set (match_operand:DI 0 "register_operand" "=y,x,r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V2SI 1 "memory_operand" "o,o,o")
+	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
+  "TARGET_64BIT && TARGET_MMX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
+})
+
+(define_expand "vec_extractv2si"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:V2SI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv2si"
+  [(match_operand:V2SI 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_setv4hi"
+  [(match_operand:V4HI 0 "register_operand")
+   (match_operand:HI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extractv4hi"
+  [(match_operand:HI 0 "register_operand")
+   (match_operand:V4HI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv4hi"
+  [(match_operand:V4HI 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_setv8qi"
+  [(match_operand:V8QI 0 "register_operand")
+   (match_operand:QI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extractv8qi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:V8QI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv8qi"
+  [(match_operand:V8QI 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+	(truncate:V8QI
+	  (lshiftrt:V8HI
+	    (plus:V8HI
+	      (plus:V8HI
+		(zero_extend:V8HI
+		  (match_operand:V8QI 1 "nonimmediate_operand"))
+		(zero_extend:V8HI
+		  (match_operand:V8QI 2 "nonimmediate_operand")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE || TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
+
+(define_insn "*mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(truncate:V8QI
+	  (lshiftrt:V8HI
+	    (plus:V8HI
+	      (plus:V8HI
+		(zero_extend:V8HI
+		  (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+		(zero_extend:V8HI
+		  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "(TARGET_SSE || TARGET_3DNOW)
+   && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
+{
+  /* These two instructions have the same operation, but their encoding
+     is different.  Prefer the one that is de facto standard.  */
+  if (TARGET_SSE || TARGET_3DNOW_A)
+    return "pavgb\t{%2, %0|%0, %2}";
+  else
+    return "pavgusb\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "mmxshft")
+   (set (attr "prefix_extra")
+     (if_then_else
+       (not (ior (match_test "TARGET_SSE")
+		 (match_test "TARGET_3DNOW_A")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (plus:V4SI
+		(zero_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand"))
+		(zero_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand")))
+	      (const_vector:V4SI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
+
+(define_insn "*mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (plus:V4SI
+		(zero_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		(zero_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	      (const_vector:V4SI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
+  "pavgw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_psadbw"
+  [(set (match_operand:V1DI 0 "register_operand" "=y")
+        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PSADBW))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "psadbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+		   UNSPEC_MOVMSK))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_maskmovq"
+  [(set (match_operand:V8QI 0 "memory_operand")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand")
+		      (match_operand:V8QI 2 "register_operand")
+		      (match_dup 0)]
+		     UNSPEC_MASKMOV))]
+  "TARGET_SSE || TARGET_3DNOW_A")
+
+(define_insn "*mmx_maskmovq"
+  [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")
+		      (mem:V8QI (match_dup 0))]
+		     UNSPEC_MASKMOV))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovq\t{%2, %1|%1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_emms"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_MMX"
+{
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (17));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_EMMS);
+
+  for (regno = 0; regno < 8; regno++)
+    {
+      XVECEXP (operands[0], 0, regno + 1)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (XFmode, FIRST_STACK_REG + regno));
+
+      XVECEXP (operands[0], 0, regno + 9)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (DImode, FIRST_MMX_REG + regno));
+    }
+})
+
+(define_insn "*mmx_emms"
+  [(match_parallel 0 "emms_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)])]
+  "TARGET_MMX"
+  "emms"
+  [(set_attr "type" "mmx")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")])
+
+(define_expand "mmx_femms"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_3DNOW"
+{
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (17));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_FEMMS);
+
+  for (regno = 0; regno < 8; regno++)
+    {
+      XVECEXP (operands[0], 0, regno + 1)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (XFmode, FIRST_STACK_REG + regno));
+
+      XVECEXP (operands[0], 0, regno + 9)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (DImode, FIRST_MMX_REG + regno));
+    }
+})
+
+(define_insn "*mmx_femms"
+  [(match_parallel 0 "emms_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)])]
+  "TARGET_3DNOW"
+  "femms"
+  [(set_attr "type" "mmx")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")])
diff --git a/gcc-4.9/gcc/config/i386/msformat-c.c b/gcc-4.9/gcc/config/i386/msformat-c.c
new file mode 100644
index 000000000..304d48f20
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/msformat-c.c
@@ -0,0 +1,195 @@
+/* Check calls to formatted I/O functions (-Wformat).
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "flags.h"
+#include "c-family/c-common.h"
+#include "intl.h"
+#include "diagnostic.h"
+#include "langhooks.h"
+#include "c-family/c-format.h"
+#include "alloc-pool.h"
+
+/* Mingw specific format attributes ms_printf, ms_scanf, and ms_strftime.  */
+
+static format_length_info ms_printf_length_specs[] =
+{
+  { "h", FMT_LEN_h, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 },
+  { "l", FMT_LEN_l, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 },
+  { "I32", FMT_LEN_l, STD_EXT, NULL, FMT_LEN_none, STD_C89, 1 },
+  { "I64", FMT_LEN_ll, STD_EXT, NULL, FMT_LEN_none, STD_C89, 1 },
+  { "I", FMT_LEN_L, STD_EXT, NULL, FMT_LEN_none, STD_C89, 1 },
+  { NULL, FMT_LEN_none, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 }
+};
+
+static const format_flag_spec ms_printf_flag_specs[] =
+{
+  { ' ',  0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
+  { '+',  0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
+  { '#',  0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
+  { '0',  0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
+  { '-',  0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
+  { '\'', 0, 0, N_("''' flag"),        N_("the ''' printf flag"),              STD_EXT },
+  { 'w',  0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
+  { 'p',  0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
+  { 'L',  0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+static const format_flag_pair ms_printf_flag_pairs[] =
+{
+  { ' ', '+', 1, 0   },
+  { '0', '-', 1, 0   }, { '0', 'p', 1, 'i' },
+  { 0, 0, 0, 0 }
+};
+
+static const format_flag_spec ms_scanf_flag_specs[] =
+{
+  { '*',  0, 0, N_("assignment suppression"), N_("the assignment suppression scanf feature"), STD_C89 },
+  { 'a',  0, 0, N_("'a' flag"),               N_("the 'a' scanf flag"),                       STD_EXT },
+  { 'w',  0, 0, N_("field width"),            N_("field width in scanf format"),              STD_C89 },
+  { 'L',  0, 0, N_("length modifier"),        N_("length modifier in scanf format"),          STD_C89 },
+  { '\'', 0, 0, N_("''' flag"),               N_("the ''' scanf flag"),                       STD_EXT },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+static const format_flag_pair ms_scanf_flag_pairs[] =
+{
+  { '*', 'L', 0, 0 },
+  { 0, 0, 0, 0 }
+};
+
+static const format_flag_spec ms_strftime_flag_specs[] =
+{
+  { '#', 0,   0, N_("'#' flag"),     N_("the '#' strftime flag"),          STD_EXT },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+static const format_flag_pair ms_strftime_flag_pairs[] =
+{
+  { 0, 0, 0, 0 }
+};
+
+static const format_char_info ms_print_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "di",  0, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  T99_SST,  BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +'",  "i",  NULL },
+  { "oxX", 0, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
+  { "u",   0, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "-wp0'",    "i",  NULL },
+  { "fgG", 0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN }, "-wp0 +#'", "",   NULL },
+  { "eE",  0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN }, "-wp0 +#",  "",   NULL },
+  { "c",   0, STD_C89, { T89_I,   BADLEN,  T89_S,  T94_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
+  { "s",   1, STD_C89, { T89_C,   BADLEN,  T89_S,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "cR", NULL },
+  { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "c",  NULL },
+  { "n",   1, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  BADLEN,  BADLEN, BADLEN,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",          "W",  NULL },
+  /* X/Open conversion specifiers.  */
+  { "C",   0, STD_EXT, { TEX_WI,  BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
+  { "S",   1, STD_EXT, { TEX_W,   BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "R",  NULL },
+  { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+static const format_char_info ms_scan_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "di",    1, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  T99_SST,  BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w'", "W",   NULL },
+  { "u",     1, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN,  BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "*w'", "W",   NULL },
+  { "oxX",   1, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN,  BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
+  { "efgEG", 1, STD_C89, { T89_F,   BADLEN,  BADLEN,  T89_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN }, "*w'",  "W",   NULL },
+  { "c",     1, STD_C89, { T89_C,   BADLEN,  T89_S,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "cW",  NULL },
+  { "s",     1, STD_C89, { T89_C,   BADLEN,  T89_S,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*aw",  "cW",  NULL },
+  { "[",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*aw",  "cW[", NULL },
+  { "p",     2, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
+  { "n",     1, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  BADLEN,  BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "",     "W",   NULL },
+  /* X/Open conversion specifiers.  */
+  { "C",     1, STD_EXT, { TEX_W,   BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
+  { "S",     1, STD_EXT, { TEX_W,   BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*aw",  "W",   NULL },
+  { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+static const format_char_info ms_time_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "ABZab",		0, STD_C89, NOLENGTHS, "#",     "",   NULL },
+  { "cx",		0, STD_C89, NOLENGTHS, "#",      "3",  NULL },
+  { "HIMSUWdmw",	0, STD_C89, NOLENGTHS, "#",  "",   NULL },
+  { "j",		0, STD_C89, NOLENGTHS, "#",  "",  NULL },
+  { "p",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
+  { "X",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
+  { "y",		0, STD_C89, NOLENGTHS, "#", "4",  NULL },
+  { "Y",		0, STD_C89, NOLENGTHS, "#", "",  NULL },
+  { "%",		0, STD_C89, NOLENGTHS, "",       "",   NULL },
+  /* C99 conversion specifiers.  */
+  { "z",		0, STD_C99, NOLENGTHS, "#",      "",  NULL },
+  { NULL,		0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+EXPORTED_CONST format_kind_info mingw_format_attributes[3] =
+{
+  { "ms_printf",   ms_printf_length_specs,  ms_print_char_table, " +#0-'", NULL,
+    ms_printf_flag_specs, ms_printf_flag_pairs,
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK,
+    'w', 0, 'p', 0, 'L', 0,
+    &integer_type_node, &integer_type_node
+  },
+  { "ms_scanf",    ms_printf_length_specs,   ms_scan_char_table,  "*'", NULL,
+    ms_scanf_flag_specs, ms_scanf_flag_pairs,
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_SCANF_A_KLUDGE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_ZERO_WIDTH_BAD|FMT_FLAG_DOLLAR_GAP_POINTER_OK,
+    'w', 0, 0, '*', 'L', 0,
+    NULL, NULL
+  },
+  { "ms_strftime", NULL,                 ms_time_char_table,  "", "#",
+    ms_strftime_flag_specs, ms_strftime_flag_pairs,
+    FMT_FLAG_FANCY_PERCENT_OK, 0, 0, 0, 0, 0, 0,
+    NULL, NULL
+  }
+};
+
+/* Default overrides for printf, scanf and strftime.  */
+EXPORTED_CONST target_ovr_attr mingw_format_attribute_overrides[4] =
+{
+  { "ms_printf", "printf" },
+  { "ms_scanf", "scanf" },
+  { "ms_strftime", "strftime" }
+};
+
+/* Setup for option Wpedantic-ms-format.  */
+
+#ifdef TARGET_OVERRIDES_FORMAT_INIT
+
+/* Make sure TARGET_OVERRIDES_FORMAT_INIT is prototyped.  */
+extern void TARGET_OVERRIDES_FORMAT_INIT (void);
+
+/* Helper.  */
+#define C89_OR_EXT (warn_pedantic_ms_format ? STD_EXT : STD_C89)
+
+void
+TARGET_OVERRIDES_FORMAT_INIT (void)
+{
+  ms_printf_length_specs[2].std = C89_OR_EXT; /* I32 */
+  ms_printf_length_specs[3].std = C89_OR_EXT; /* I64 */
+  ms_printf_length_specs[4].std = C89_OR_EXT; /* I */
+}
+
+#undef C89_OR_EXT
+
+#endif
diff --git a/gcc-4.9/gcc/config/i386/netbsd-elf.h b/gcc-4.9/gcc/config/i386/netbsd-elf.h
new file mode 100644
index 000000000..e575b39cb
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/netbsd-elf.h
@@ -0,0 +1,121 @@
+/* Definitions of target machine for GCC,
+   for i386/ELF NetBSD systems.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by matthew green <mrg@eterna.com.au>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+
+/* Extra specs needed for NetBSD/i386 ELF.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  { "netbsd_cpp_spec", NETBSD_CPP_SPEC },	\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/i386 ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#define NETBSD_ENTRY_POINT "__start"
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
+
+
+/* Output assembler code to FILE to call the profiler.  */
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+{									\
+  if (flag_pic)								\
+    fprintf (FILE, "\tcall __mcount@PLT\n");				\
+  else									\
+    fprintf (FILE, "\tcall __mcount\n");				\
+}
+
+
+#undef HAS_INIT_SECTION
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+  do { assemble_name(FILE, NAME1); 	 \
+       fputs(" = ", FILE);		 \
+       assemble_name(FILE, NAME2);	 \
+       fputc('\n', FILE); } while (0)
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)			\
+  if ((LOG) != 0) {							\
+    if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+    else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+  }
+#endif
+
+/* We always use gas here, so we don't worry about ECOFF assembler
+   problems.  */
+#undef TARGET_GAS
+#define TARGET_GAS	1
+
+/* Default to pcc-struct-return, because this is the ELF abi and
+   we don't care about compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+#define HAVE_ENABLE_EXECUTE_STACK
diff --git a/gcc-4.9/gcc/config/i386/netbsd64.h b/gcc-4.9/gcc/config/i386/netbsd64.h
new file mode 100644
index 000000000..f990835bd
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/netbsd64.h
@@ -0,0 +1,69 @@
+/* Definitions of target machine for GCC,
+   for x86-64/ELF NetBSD systems.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+
+/* Extra specs needed for NetBSD/x86-64 ELF.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  { "netbsd_cpp_spec", NETBSD_CPP_SPEC },	\
+  { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF },	\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/x86-64 ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{m32:-m elf_i386} \
+   %{m64:-m elf_x86_64} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "_start"
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+/* Output assembler code to FILE to call the profiler.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+{									\
+  if (TARGET_64BIT && flag_pic)						\
+    fprintf (FILE, "\tcall *__mcount@PLT\n");				\
+  else if (flag_pic)							\
+    fprintf (FILE, "\tcall *__mcount@PLT\n");				\
+  else									\
+    fprintf (FILE, "\tcall __mcount\n");				\
+}
+
+#define HAVE_ENABLE_EXECUTE_STACK
diff --git a/gcc-4.9/gcc/config/i386/nmmintrin.h b/gcc-4.9/gcc/config/i386/nmmintrin.h
new file mode 100644
index 000000000..9fc710736
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/nmmintrin.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 10.0.  */
+
+#ifndef _NMMINTRIN_H_INCLUDED
+#define _NMMINTRIN_H_INCLUDED
+
+/* We just include SSE4.1 header file.  */
+#include <smmintrin.h>
+
+#endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/nto.h b/gcc-4.9/gcc/config/i386/nto.h
new file mode 100644
index 000000000..2abb98751
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/nto.h
@@ -0,0 +1,105 @@
+/* Definitions for Intel 386 running QNX/Neutrino.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+        builtin_define ("__X86__");		\
+        builtin_define ("__QNXNTO__");		\
+        builtin_define ("__QNX__");		\
+        builtin_define ("__ELF__");		\
+        builtin_define ("__LITTLEENDIAN__");	\
+        builtin_assert ("system=qnx");		\
+        builtin_assert ("system=qnxnto");	\
+        builtin_assert ("system=nto");		\
+        builtin_assert ("system=unix");		\
+    }						\
+  while (0)
+
+#undef THREAD_MODEL_SPEC
+#define THREAD_MODEL_SPEC "posix"
+
+#ifdef CROSS_DIRECTORY_STRUCTURE
+#define SYSROOT_SUFFIX_SPEC "x86"
+#endif
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#endif
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{!shared: \
+  %{!symbolic: \
+    %{pg:mcrt1.o%s} \
+    %{!pg:%{p:mcrt1.o%s} \
+    %{!p:crt1.o%s}}}} \
+crti.o%s \
+%{fexceptions: crtbegin.o%s} \
+%{!fexceptions: %R/lib/crtbegin.o}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "crtend.o%s crtn.o%s"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{h*} %{v:-V} \
+   %{static:-dn -Bstatic} \
+   %{shared:-G -dy -z text} \
+   %{symbolic:-Bsymbolic -G -dy -z text} \
+   %{G:-G} \
+   %{YP,*} \
+   %{!YP,*:%{p:-Y P,%R/lib} \
+    %{!p:-Y P,%R/lib}} \
+   %{Qy:} %{!Qn:-Qy} \
+   -m i386nto \
+   %{!shared: --dynamic-linker /usr/lib/ldqnx.so.2}"
+
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:-lc}}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#define NO_IMPLICIT_EXTERN_C 1
+
+#define TARGET_POSIX_IO
+
+#undef DBX_REGISTER_NUMBER
diff --git a/gcc-4.9/gcc/config/i386/nto.opt b/gcc-4.9/gcc/config/i386/nto.opt
new file mode 100644
index 000000000..007894201
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/nto.opt
@@ -0,0 +1,32 @@
+; QNX options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+G
+Driver
+
+YP,
+Driver Joined
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/i386/openbsd.h b/gcc-4.9/gcc/config/i386/openbsd.h
new file mode 100644
index 000000000..f313d5cd1
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/openbsd.h
@@ -0,0 +1,101 @@
+/* Configuration for an OpenBSD i386 target.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+  (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__unix__");		\
+	builtin_define ("__OpenBSD__");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=bsd");		\
+	builtin_assert ("system=OpenBSD");	\
+    }						\
+  while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Assembler format: overall framework.  */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Stack & calling: aggregate returns.  */
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: alignment output.  */
+
+/* Kludgy test: when gas is upgraded, it will have p2align, and no problems
+   with nops.  */
+#ifndef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* i386 OpenBSD still uses an older gas that doesn't insert nops by default
+   when the .align directive demands to insert extra space in the text
+   segment.  */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.align %d,0x90\n", (LOG))
+#endif
+
+/* Stack & calling: profiling.  */
+
+/* OpenBSD's profiler recovers all information from the stack pointer.
+   The icky part is not here, but in machine/profile.h.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fputs (flag_pic ? "\tcall mcount@PLT\n": "\tcall mcount\n", FILE);
+
+/* Assembler format: exception region output.  */
+
+/* All configurations that don't use elf must be explicit about not using
+   dwarf unwind information.  */
+#define DWARF2_UNWIND_INFO 0
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START ";#"
+
+/* OpenBSD gas currently does not support quad, so do not use it.  */
+#undef ASM_QUAD
+
+#define TARGET_HAVE_NAMED_SECTIONS false
diff --git a/gcc-4.9/gcc/config/i386/openbsdelf.h b/gcc-4.9/gcc/config/i386/openbsdelf.h
new file mode 100644
index 000000000..46ae0b6cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/openbsdelf.h
@@ -0,0 +1,119 @@
+/* Configuration for an OpenBSD i386 target.
+   
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+    	OPENBSD_OS_CPP_BUILTINS();		\
+    }						\
+  while (0)
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* This must agree with <machine/_types.h>.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Override the default comment-starter of "/".  */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else {								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+	/* Make sure that we have at least 8 byte alignment if > 8 byte \
+	   alignment is preferred.  */					\
+	if ((LOG) > 3							\
+	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
+	    && (MAX_SKIP) >= 7)						\
+	  fputs ("\t.p2align 3\n", (FILE));				\
+      }									\
+    }									\
+  } while (0)
+#endif
+
+/* OpenBSD's profiler recovers all information from the stack pointer.
+   The icky part is not here, but in <machine/profile.h>.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fputs (flag_pic ? "\tcall __mcount@PLT\n": "\tcall __mcount\n", FILE);
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{!nostdlib:%{!r:%{!e*:-e __start}}}} \
+   %{shared:-shared} %{R*} \
+   %{static:-Bstatic} \
+   %{!static:-Bdynamic} \
+   %{assert*} \
+   -dynamic-linker /usr/libexec/ld.so"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+	%{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \
+	crtbegin%O%s} %{shared:crtbeginS%O%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+#define OBSD_HAS_CORRECT_SPECS
+
+#define HAVE_ENABLE_EXECUTE_STACK
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
diff --git a/gcc-4.9/gcc/config/i386/pentium.md b/gcc-4.9/gcc/config/i386/pentium.md
new file mode 100644
index 000000000..97fc55e2a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/pentium.md
@@ -0,0 +1,306 @@
+;; Pentium Scheduling
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; The Pentium is an in-order core with two integer pipelines.
+
+;; True for insns that behave like prefixed insns on the Pentium.
+(define_attr "pent_prefix" "false,true"
+  (if_then_else (ior (eq_attr "prefix_0f" "1")
+  		     (ior (eq_attr "prefix_data16" "1")
+			  (eq_attr "prefix_rep" "1")))
+    (const_string "true")
+    (const_string "false")))
+
+;; Categorize how an instruction slots.
+
+;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
+;; while MMX Pentium can slot it on either U or V.  Model non-MMX Pentium
+;; rules, because it results in noticeably better code on non-MMX Pentium
+;; and doesn't hurt much on MMX.  (Prefixed instructions are not very
+;; common, so the scheduler usually has a non-prefixed insn to pair).
+
+(define_attr "pent_pair" "uv,pu,pv,np"
+  (cond [(eq_attr "imm_disp" "true")
+	   (const_string "np")
+	 (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
+	      (and (eq_attr "type" "pop,push")
+		   (eq_attr "memory" "!both")))
+	   (if_then_else (eq_attr "pent_prefix" "true")
+	     (const_string "pu")
+	     (const_string "uv"))
+	 (eq_attr "type" "ibr")
+	   (const_string "pv")
+	 (and (eq_attr "type" "ishift")
+	      (match_operand 2 "const_int_operand"))
+	   (const_string "pu")
+	 (and (eq_attr "type" "rotate")
+	      (match_operand 2 "const1_operand"))
+	   (const_string "pu")
+	 (and (eq_attr "type" "ishift1")
+	      (match_operand 1 "const_int_operand"))
+	   (const_string "pu")
+	 (and (eq_attr "type" "rotate1")
+	      (match_operand 1 "const1_operand"))
+	   (const_string "pu")
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand"))
+	   (const_string "pv")
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand"))
+	   (const_string "pv")
+	]
+	(const_string "np")))
+
+(define_automaton "pentium,pentium_fpu")
+
+;; Pentium do have U and V pipes.  Instruction to both pipes
+;; are always issued together, much like on VLIW.
+;;
+;;                    predecode
+;;                   /         \
+;;               decodeu     decodev
+;;             /    |           |
+;;           fpu executeu    executev
+;;            |     |           |
+;;           fpu  retire     retire
+;;            |
+;;           fpu
+;; We add dummy "port" pipes allocated only first cycle of
+;; instruction to specify this behavior.
+
+(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
+(define_cpu_unit "pentium-u,pentium-v" "pentium")
+(absence_set "pentium-portu" "pentium-u,pentium-v")
+(presence_set "pentium-portv" "pentium-portu")
+
+;; Floating point instructions can overlap with new issue of integer
+;; instructions.  We model only first cycle of FP pipeline, as it is
+;; fully pipelined.
+(define_cpu_unit "pentium-fp" "pentium_fpu")
+
+;; There is non-pipelined multiplier unit used for complex operations.
+(define_cpu_unit "pentium-fmul" "pentium_fpu")
+
+;; Pentium preserves memory ordering, so when load-execute-store
+;; instruction is executed together with other instruction loading
+;; data, the execution of the other instruction is delayed to very
+;; last cycle of first instruction, when data are bypassed.
+;; We model this by allocating "memory" unit when store is pending
+;; and using conflicting load units together.
+
+(define_cpu_unit "pentium-memory" "pentium")
+(define_cpu_unit "pentium-load0" "pentium")
+(define_cpu_unit "pentium-load1" "pentium")
+(absence_set "pentium-load0,pentium-load1" "pentium-memory")
+
+(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
+(define_reservation "pentium-np" "(pentium-u + pentium-v)")
+(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
+(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
+(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
+(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
+(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
+(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
+(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
+(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
+					   | (pentium-firstv,pentium-v,
+					      (pentium-load+pentium-firstv))")
+(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
+					   + pentium-memory)")
+(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstv
+					   + pentium-memory)")
+(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
+					    + pentium-memory)
+					   | (pentium-firstv,pentium-v,
+					      (pentium-load+pentium-firstv))")
+
+;; Few common long latency instructions
+(define_insn_reservation "pent_mul" 11
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "imul"))
+  "pentium-np*11")
+
+(define_insn_reservation "pent_str" 12
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "str"))
+  "pentium-np*12")
+
+;; Integer division and some other long latency instruction block all
+;; units, including the FP pipe.  There is no value in modeling the
+;; latency of these instructions and not modeling the latency
+;; decreases the size of the DFA.
+(define_insn_reservation "pent_block" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "idiv"))
+  "pentium-np+pentium-fp")
+
+;;  Moves usually have one cycle penalty, but there are exceptions.
+(define_insn_reservation "pent_fmov" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "fmov")
+	    (eq_attr "memory" "none,load")))
+  "(pentium-fp+pentium-np)")
+
+(define_insn_reservation "pent_fpmovxf" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "fmov")
+	    (and (eq_attr "memory" "load,store")
+		 (eq_attr "mode" "XF"))))
+  "(pentium-fp+pentium-np)*3")
+
+(define_insn_reservation "pent_fpstore" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "fmov")
+	    (ior (match_operand 1 "immediate_operand")
+		 (eq_attr "memory" "store"))))
+  "(pentium-fp+pentium-np)*2")
+
+(define_insn_reservation "pent_imov" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "imov"))
+  "pentium-firstuv")
+
+;; Push and pop instructions have 1 cycle latency and special
+;; hardware bypass allows them to be paired with other push,pop
+;; and call instructions.
+(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
+(define_insn_reservation "pent_push" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "push")
+	    (eq_attr "memory" "store")))
+  "pentium-firstuv")
+
+(define_insn_reservation "pent_pop" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "pop,leave"))
+  "pentium-firstuv")
+
+;; Call and branch instruction can execute in either pipe, but
+;; they are only pairable when in the v pipe.
+(define_insn_reservation "pent_call" 10
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "call,callv"))
+  "pentium-firstv,pentium-v*9")
+
+(define_insn_reservation "pent_branch" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "ibr"))
+  "pentium-firstv")
+
+;; Floating point instruction dispatch in U pipe, but continue
+;; in FP pipeline allowing other instructions to be executed.
+(define_insn_reservation "pent_fp" 3
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fop,fistp"))
+  "(pentium-firstu+pentium-fp),nothing,nothing")
+
+;; First two cycles of fmul are not pipelined.
+(define_insn_reservation "pent_fmul" 3
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fmul"))
+  "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
+
+;; Long latency FP instructions overlap with integer instructions,
+;; but only last 2 cycles with FP ones.
+(define_insn_reservation "pent_fdiv" 39
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fdiv"))
+  "(pentium-np+pentium-fp+pentium-fmul),
+   (pentium-fp+pentium-fmul)*36,pentium-fmul*2")
+
+(define_insn_reservation "pent_fpspc" 70
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fpspc"))
+  "(pentium-np+pentium-fp+pentium-fmul),
+   (pentium-fp+pentium-fmul)*67,pentium-fmul*2")
+
+;; Integer instructions.  Load/execute/store takes 3 cycles,
+;; load/execute 2 cycles and execute only one cycle.
+(define_insn_reservation "pent_uv_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "uv")
+	    (eq_attr "memory" "both")))
+  "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
+
+(define_insn_reservation "pent_u_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pu")
+	    (eq_attr "memory" "both")))
+  "pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
+
+(define_insn_reservation "pent_v_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pv")
+	    (eq_attr "memory" "both")))
+  "pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
+
+(define_insn_reservation "pent_np_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "np")
+	    (eq_attr "memory" "both")))
+  "pentium-np,pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "uv")
+	    (eq_attr "memory" "load")))
+  "pentium-firstuvload,pentium-uv")
+
+(define_insn_reservation "pent_u_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pu")
+	    (eq_attr "memory" "load")))
+  "pentium-firstuload,pentium-u")
+
+(define_insn_reservation "pent_v_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pv")
+	    (eq_attr "memory" "load")))
+  "pentium-firstvload,pentium-v")
+
+(define_insn_reservation "pent_np_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "np")
+	    (eq_attr "memory" "load")))
+  "pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "uv")
+	    (eq_attr "memory" "none")))
+  "pentium-firstuv")
+
+(define_insn_reservation "pent_u" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pu")
+	    (eq_attr "memory" "none")))
+  "pentium-firstu")
+
+(define_insn_reservation "pent_v" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pv")
+	    (eq_attr "memory" "none")))
+  "pentium-firstv")
+
+(define_insn_reservation "pent_np" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "np")
+	    (eq_attr "memory" "none")))
+  "pentium-np")
+
diff --git a/gcc-4.9/gcc/config/i386/pmm_malloc.h b/gcc-4.9/gcc/config/i386/pmm_malloc.h
new file mode 100644
index 000000000..3be2f3545
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/pmm_malloc.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+   may not be visible.  */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+  void *ptr;
+  if (alignment == 1)
+    return malloc (size);
+  if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
+    alignment = sizeof (void *);
+  if (posix_memalign (&ptr, alignment, size) == 0)
+    return ptr;
+  else
+    return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+  free (ptr);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/pmmintrin.h b/gcc-4.9/gcc/config/i386/pmmintrin.h
new file mode 100644
index 000000000..6a795005c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/pmmintrin.h
@@ -0,0 +1,132 @@
+/* Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _PMMINTRIN_H_INCLUDED
+#define _PMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE2 and SSE header files*/
+#include <emmintrin.h>
+
+#ifndef __SSE3__
+#pragma GCC push_options
+#pragma GCC target("sse3")
+#define __DISABLE_SSE3__
+#endif /* __SSE3__ */
+
+/* Additional bits in the MXCSR.  */
+#define _MM_DENORMALS_ZERO_MASK		0x0040
+#define _MM_DENORMALS_ZERO_ON		0x0040
+#define _MM_DENORMALS_ZERO_OFF		0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
+  _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE() \
+  (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehdup_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_moveldup_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loaddup_pd (double const *__P)
+{
+  return _mm_load1_pd (__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movedup_pd (__m128d __X)
+{
+  return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lddqu_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_monitor (__P, __E, __H);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_mwait (__E, __H);
+}
+
+#ifdef __DISABLE_SSE3__
+#undef __DISABLE_SSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE3__ */
+
+#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/popcntintrin.h b/gcc-4.9/gcc/config/i386/popcntintrin.h
new file mode 100644
index 000000000..41845d868
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/popcntintrin.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _POPCNTINTRIN_H_INCLUDED
+#define _POPCNTINTRIN_H_INCLUDED
+
+#ifndef __POPCNT__
+#pragma GCC push_options
+#pragma GCC target("popcnt")
+#define __DISABLE_POPCNT__
+#endif /* __POPCNT__ */
+
+/* Calculate a number of bits set to 1.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+  return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+  return __builtin_popcountll (__X);
+}
+#endif
+
+#ifdef __DISABLE_POPCNT__
+#undef __DISABLE_POPCNT__
+#pragma GCC pop_options
+#endif  /* __DISABLE_POPCNT__ */
+
+#endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/ppro.md b/gcc-4.9/gcc/config/i386/ppro.md
new file mode 100644
index 000000000..25b2a546c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/ppro.md
@@ -0,0 +1,758 @@
+;; Scheduling for the Intel P6 family of processors
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; The P6 family includes the Pentium Pro, Pentium II, Pentium III, Celeron
+;; and Xeon lines of CPUs.  The DFA scheduler description in this file is
+;; based on information that can be found in the following three documents:
+;;
+;;    "P6 Family of Processors Hardware Developer's Manual",
+;;    Intel, September 1999.
+;;
+;;    "Intel Architecture Optimization Manual",
+;;    Intel, 1999 (Order Number: 245127-001).
+;;
+;;    "How to optimize for the Pentium family of microprocessors",
+;;    by Agner Fog, PhD.
+;;
+;; The P6 pipeline has three major components:
+;;   1) the FETCH/DECODE unit, an in-order issue front-end
+;;   2) the DISPATCH/EXECUTE unit, which is the out-of-order core
+;;   3) the RETIRE unit, an in-order retirement unit
+;;
+;; So, the P6 CPUs have out-of-order cores, but the instruction decoder and
+;; retirement unit are naturally in-order.
+;;
+;;                       BUS INTERFACE UNIT
+;;                     /                   \
+;;                L1 ICACHE             L1 DCACHE
+;;              /     |     \              |     \
+;;       DECODER0  DECODER1  DECODER2  DISP/EXEC  RETIRE
+;;              \     |     /              |        |
+;;            INSTRUCTION POOL   __________|_______/
+;;          (inc. reorder buffer)
+;;
+;; Since the P6 CPUs execute instructions out-of-order, the most important
+;; consideration in performance tuning is making sure enough micro-ops are
+;; ready for execution in the out-of-order core, while not stalling the
+;; decoder.
+;;
+;; TODO:
+;; - Find a less crude way to model complex instructions, in
+;;   particular how many cycles they take to be decoded.
+;; - Include decoder latencies in the total reservation latencies.
+;;   This isn't necessary right now because we assume for every
+;;   instruction that it never blocks a decoder.
+;; - Figure out where the p0 and p1 reservations come from.  These
+;;   appear not to be in the manual
+;; - Lots more because I'm sure this is still far from optimal :-)
+
+;; The ppro_idiv and ppro_fdiv automata are used to model issue
+;; latencies of idiv and fdiv type insns.
+(define_automaton "ppro_decoder,ppro_core,ppro_idiv,ppro_fdiv,ppro_load,ppro_store")
+
+;; Simple instructions of the register-register form have only one uop.
+;; Load instructions are also only one uop.  Store instructions decode to
+;; two uops, and simple read-modify instructions also take two uops.
+;; Simple instructions of the register-memory form have two to three uops.
+;; Simple read-modify-write instructions have four uops.  The rules for
+;; the decoder are simple:
+;;  - an instruction with 1 uop can be decoded by any of the three
+;;    decoders in one cycle.
+;;  - an instruction with 1 to 4 uops can be decoded only by decoder 0
+;;    but still in only one cycle.
+;;  - a complex (microcode) instruction can also only be decoded by
+;;    decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-one uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "decoder0" "ppro_decoder")
+(define_cpu_unit "decoder1" "ppro_decoder")
+(define_cpu_unit "decoder2" "ppro_decoder")
+
+;; We first wish to find an instruction for decoder0, so exclude
+;; decoder1 and decoder2 from being reserved until decoder 0 is
+;; reserved.
+(presence_set "decoder1" "decoder0")
+(presence_set "decoder2" "decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "decodern" "(decoder0|decoder1|decoder2)")
+
+;; The out-of-order core has five pipelines.  During each cycle, the core
+;; may dispatch zero or one uop on the port of any of the five pipelines
+;; so the maximum number of dispatched uops per cycle is 5.  In practicer,
+;; 3 uops per cycle is more realistic.
+;;
+;; Two of the five pipelines contain several execution units:
+;;
+;; Port 0	Port 1		Port 2		Port 3		Port 4
+;; ALU		ALU		LOAD		SAC		SDA
+;; FPU		JUE
+;; AGU		MMX
+;; MMX		P3FPU
+;; P3FPU
+;;
+;; (SAC=Store Address Calculation, SDA=Store Data Unit, P3FPU = SSE unit,
+;;  JUE = Jump Execution Unit, AGU = Address Generation Unit)
+;;
+(define_cpu_unit "p0,p1" "ppro_core")
+(define_cpu_unit "p2" "ppro_load")
+(define_cpu_unit "p3,p4" "ppro_store")
+(define_cpu_unit "idiv" "ppro_idiv")
+(define_cpu_unit "fdiv" "ppro_fdiv")
+
+;; Only the irregular instructions have to be modeled here.  A load
+;; increases the latency by 2 or 3, or by nothing if the manual gives
+;; a latency already.  Store latencies are not accounted for.
+;;
+;; The simple instructions follow a very regular pattern of 1 uop per
+;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
+;; on port 4 and port 3.  These instructions are modelled at the bottom
+;; of this file.
+;;
+;; For microcoded instructions we don't know how many uops are produced.
+;; These instructions are the "complex" ones in the Intel manuals.  All
+;; we _do_ know is that they typically produce four or more uops, so
+;; they can only be decoded on decoder0.  Modelling their latencies
+;; doesn't make sense because we don't know how these instructions are
+;; executed in the core.  So we just model that they can only be decoded
+;; on decoder 0, and say that it takes a little while before the result
+;; is available.
+(define_insn_reservation "ppro_complex_insn" 6
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "other,multi,call,callv,str"))
+			 "decoder0")
+
+;; imov with memory operands does not use the integer units.
+(define_insn_reservation "ppro_imov" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imov")))
+			 "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imov_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "imov")))
+			 "decodern,p2")
+
+(define_insn_reservation "ppro_imov_store" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "imov")))
+			 "decoder0,p4+p3")
+
+;; imovx always decodes to one uop, and also doesn't use the integer
+;; units if it has memory operands.
+(define_insn_reservation "ppro_imovx" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imovx")))
+			 "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imovx_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "imovx")))
+			 "decodern,p2")
+
+;; lea executes on port 0 with latency one and throughput 1.
+(define_insn_reservation "ppro_lea" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "lea")))
+			 "decodern,p0")
+
+;; Shift and rotate execute on port 0 with latency and throughput 1.
+;; The load and store units need to be reserved when memory operands
+;; are involved.
+(define_insn_reservation "ppro_shift_rotate" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_shift_rotate_mem" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "decoder0,p2+p0,p4+p3")
+
+
+;; The P6 has a sophisticated branch prediction mechanism to minimize
+;; latencies due to branching.  In particular, it has a fast way to
+;; execute branches that are taken multiple times (such as in loops).
+;; Branches not taken suffer no penalty, and correctly predicted
+;; branches cost only one fetch cycle.  Mispredicted branches are very
+;; costly: typically 15 cycles and possibly as many as 26 cycles.
+;;
+;; Unfortunately all this makes it quite difficult to properly model
+;; the latencies for the compiler.  Here I've made the choice to be
+;; optimistic and assume branches are often predicted correctly, so
+;; they have latency 1, and the decoders are not blocked.
+;;
+;; In addition, the model assumes a branch always decodes to only 1 uop,
+;; which is not exactly true because there are a few instructions that
+;; decode to 2 uops or microcode.  But this probably gives the best
+;; results because we can assume these instructions can decode on all
+;; decoders.
+(define_insn_reservation "ppro_branch" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ibr")))
+			 "decodern,p1")
+
+;; ??? Indirect branches probably have worse latency than this.
+(define_insn_reservation "ppro_indirect_branch" 6
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ibr")))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_leave" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "leave"))
+			 "decoder0,p2+(p0|p1),(p0|p1)")
+
+;; imul has throughput one, but latency 4, and can only execute on port 0.
+(define_insn_reservation "ppro_imul" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imul")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_imul_mem" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "imul")))
+			 "decoder0,p2+p0")
+
+;; div and idiv are very similar, so we model them the same.
+;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
+;; These issue latencies are modelled via the ppro_div automaton.
+(define_insn_reservation "ppro_idiv_QI" 19
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,(p0+idiv)*2,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_QI_load" 19
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_HI" 23
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*17")
+
+(define_insn_reservation "ppro_idiv_HI_load" 23
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*18")
+
+(define_insn_reservation "ppro_idiv_SI" 39
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*33")
+
+(define_insn_reservation "ppro_idiv_SI_load" 39
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*34")
+
+;; Floating point operations always execute on port 0.
+;; ??? where do these latencies come from? fadd has latency 3 and
+;;     has throughput "1/cycle (align with FADD)".  What do they
+;;     mean and how can we model that?
+(define_insn_reservation "ppro_fop" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "fop")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fop_load" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fop")))
+			 "decoder0,p2+p0,p0")
+
+(define_insn_reservation "ppro_fop_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "fop")))
+			 "decoder0,p0,p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fop_both" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "fop")))
+			 "decoder0,p2+p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fsgn" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "fsgn"))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fistp" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "fistp"))
+			 "decoder0,p0*2,p4+p3")
+
+(define_insn_reservation "ppro_fcmov" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "fcmov"))
+			 "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fcmp" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fcmp")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fcmp_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fcmp")))
+			 "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_fmov" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmov")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_load" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "decodern,p2")
+
+(define_insn_reservation "ppro_fmov_XF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "decoder0,(p2+p0)*2")
+
+(define_insn_reservation "ppro_fmov_store" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_XF_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "decoder0,(p0+p4),(p0+p3)")
+
+;; fmul executes on port 0 with latency 5.  It has issue latency 2,
+;; but we don't model this.
+(define_insn_reservation "ppro_fmul" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmul")))
+			 "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fmul_load" 6
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fmul")))
+			 "decoder0,p2+p0,p0")
+
+;; fdiv latencies depend on the mode of the operands.  XFmode gives
+;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
+;; Division by a power of 2 takes only 9 cycles, but we cannot model
+;; that.  Throughput is equal to latency - 1, which we model using the
+;; ppro_div automaton.
+(define_insn_reservation "ppro_fdiv_SF" 18
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decodern,p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_SF_load" 19
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decoder0,p2+p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_DF" 32
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decodern,p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_DF_load" 33
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decoder0,p2+p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_XF" 38
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decodern,p0+fdiv,fdiv*36")
+
+(define_insn_reservation "ppro_fdiv_XF_load" 39
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decoder0,p2+p0+fdiv,fdiv*36")
+
+;; MMX instructions can execute on either port 0 or port 1 with a
+;; throughput of 1/cycle.
+;;   on port 0:	- ALU (latency 1)
+;;		- Multiplier Unit (latency 3)
+;;   on port 1:	- ALU (latency 1)
+;;		- Shift Unit (latency 1)
+;;
+;; MMX instructions are either of the type reg-reg, or read-modify, and
+;; except for mmxshft and mmxmul they can execute on port 0 or port 1,
+;; so they behave as "simple" instructions that need no special modelling.
+;; We only have to model mmxshft and mmxmul.
+(define_insn_reservation "ppro_mmx_shft" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxshft")))
+			 "decodern,p1")
+
+(define_insn_reservation "ppro_mmx_shft_load" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxshft")))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_mmx_mul" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_mmx_mul_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul")))
+			 "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mmxcvt" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "DI")
+				   (eq_attr "type" "mmxcvt")))
+			 "decodern,p1")
+
+;; FIXME: These are Pentium III only, but we cannot tell here if
+;; we're generating code for PentiumPro/Pentium II or Pentium III
+;; (define_insn_reservation "ppro_sse_mmxshft" 2
+;;			 (and (eq_attr "cpu" "pentiumpro")
+;;			      (and (eq_attr "mode" "DI")
+;;				   (eq_attr "type" "mmxshft")))
+;;			 "decodern,p0")
+
+;; SSE is very complicated, and takes a bit more effort.
+;; ??? I assumed that all SSE instructions decode on decoder0,
+;;     but is this correct?
+
+;; The sfence instruction.
+(define_insn_reservation "ppro_sse_sfence" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "unknown")
+				   (eq_attr "type" "sse")))
+			 "decoder0,p4+p3")
+
+;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
+(define_insn_reservation "ppro_sse_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "SF")
+				   (eq_attr "type" "sse")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_sse_add_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseadd,sseadd1"))))
+			 "decodern,p1")
+
+(define_insn_reservation "ppro_sse_add_SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseadd,sseadd1"))))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_comi_SF" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecomi"))))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_sse_comi_SF_load" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecomi"))))
+			 "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mul_SF" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemul"))))
+			"decodern,p0")
+
+(define_insn_reservation "ppro_sse_mul_SF_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemul"))))
+			"decoder0,p2+p0")
+
+;; FIXME: ssediv doesn't close p0 for 17 cycles, surely???
+(define_insn_reservation "ppro_sse_div_SF" 18
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,p0*17")
+
+(define_insn_reservation "ppro_sse_div_SF_load" 18
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,(p2+p0),p0*16")
+
+(define_insn_reservation "ppro_sse_icvt_SF" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "SF")
+				   (eq_attr "type" "sseicvt")))
+			 "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_icvt_SI" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "SI")
+				   (eq_attr "type" "sseicvt")))
+			 "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,p4+p3")
+
+(define_insn_reservation "ppro_sse_V4SF" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "V4SF")
+				   (eq_attr "type" "sse")))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sseadd,sseadd1"))))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sseadd,sseadd1"))))
+			 "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecvt"))))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none,unknown")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p1,p4+p3")
+
+(define_insn_reservation "ppro_sse_mul_V4SF" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemul"))))
+			"decoder0,p0*2")
+
+(define_insn_reservation "ppro_sse_mul_V4SF_load" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemul"))))
+			"decoder0,(p2+p0)*2")
+
+;; FIXME: p0 really closed this long???
+(define_insn_reservation "ppro_sse_div_V4SF" 48
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,p0*34")
+
+(define_insn_reservation "ppro_sse_div_V4SF_load" 48
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,(p2+p0)*2,p0*32")
+
+(define_insn_reservation "ppro_sse_log_V4SF" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))))
+			 "decodern,p1")
+
+(define_insn_reservation "ppro_sse_log_V4SF_load" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))))
+			 "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_V4SF" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,(p0|p1)*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_load" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,p2*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,(p4+p3)*2")
+
+;; All other instructions are modelled as simple instructions.
+;; We have already modelled all i387 floating point instructions, so all
+;; other instructions execute on either port 0 or port 1.  This includes
+;; the ALU units, and the MMX units.
+;;
+;; reg-reg instructions produce 1 uop so they can be decoded on any of
+;; the three decoders.
+(define_insn_reservation "ppro_insn" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decodern,(p0|p1)")
+
+;; read-modify and register-memory instructions have 2 or three uops,
+;; so they have to be decoded on decoder0.
+(define_insn_reservation "ppro_insn_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_insn_store" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decoder0,(p0|p1),p4+p3")
+
+;; read-modify-store instructions produce 4 uops so they have to be
+;; decoded on decoder0 as well.
+(define_insn_reservation "ppro_insn_both" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decoder0,p2+(p0|p1),p4+p3")
+
diff --git a/gcc-4.9/gcc/config/i386/predicates.md b/gcc-4.9/gcc/config/i386/predicates.md
new file mode 100644
index 000000000..0492241fd
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/predicates.md
@@ -0,0 +1,1424 @@
+;; Predicate definitions for IA-32 and x86-64.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is either a i387 or SSE fp register.
+(define_predicate "any_fp_register_operand"
+  (and (match_code "reg")
+       (match_test "ANY_FP_REGNO_P (REGNO (op))")))
+
+;; Return true if OP is an i387 fp register.
+(define_predicate "fp_register_operand"
+  (and (match_code "reg")
+       (match_test "STACK_REGNO_P (REGNO (op))")))
+
+;; Return true if OP is a non-fp register_operand.
+(define_predicate "register_and_not_any_fp_reg_operand"
+  (and (match_code "reg")
+       (not (match_test "ANY_FP_REGNO_P (REGNO (op))"))))
+
+;; True if the operand is a GENERAL class register.
+(define_predicate "general_reg_operand"
+  (and (match_code "reg")
+       (match_test "GENERAL_REG_P (op)")))
+
+;; Return true if OP is a register operand other than an i387 fp register.
+(define_predicate "register_and_not_fp_reg_operand"
+  (and (match_code "reg")
+       (not (match_test "STACK_REGNO_P (REGNO (op))"))))
+
+;; True if the operand is an MMX register.
+(define_predicate "mmx_reg_operand"
+  (and (match_code "reg")
+       (match_test "MMX_REGNO_P (REGNO (op))")))
+
+;; True if the operand is an SSE register.
+(define_predicate "sse_reg_operand"
+  (and (match_code "reg")
+       (match_test "SSE_REGNO_P (REGNO (op))")))
+
+;; True if the operand is an AVX-512 new register.
+(define_predicate "ext_sse_reg_operand"
+  (and (match_code "reg")
+       (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))")))
+
+;; True if the operand is an AVX-512 mask register.
+(define_predicate "mask_reg_operand"
+  (and (match_code "reg")
+       (match_test "MASK_REGNO_P (REGNO (op))")))
+
+;; True if the operand is a Q_REGS class register.
+(define_predicate "q_regs_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return ANY_QI_REG_P (op);
+})
+
+;; Match an SI or HImode register for a zero_extract.
+(define_special_predicate "ext_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
+      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
+    return false;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* Be careful to accept only registers having upper parts.  */
+  return (REG_P (op)
+	  && (REGNO (op) > LAST_VIRTUAL_REGISTER || REGNO (op) <= BX_REG));
+})
+
+;; Match nonimmediate operands, but exclude memory operands on 64bit targets.
+(define_predicate "nonimmediate_x64nomem_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "register_operand")
+    (match_operand 0 "nonimmediate_operand")))
+
+;; Match general operands, but exclude memory operands on 64bit targets.
+(define_predicate "general_x64nomem_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "nonmemory_operand")
+    (match_operand 0 "general_operand")))
+
+;; Return true if op is the AX register.
+(define_predicate "ax_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == AX_REG")))
+
+;; Return true if op is the flags register.
+(define_predicate "flags_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == FLAGS_REG")))
+
+;; Return true if op is one of QImode registers: %[abcd][hl].
+(define_predicate "QIreg_operand"
+  (match_test "QI_REG_P (op)"))
+
+;; Return true if op is a QImode register operand other than
+;; %[abcd][hl].
+(define_predicate "ext_QIreg_operand"
+  (and (match_code "reg")
+       (match_test "TARGET_64BIT")
+       (match_test "REGNO (op) > BX_REG")))
+
+;; Return true if VALUE can be stored in a sign extended immediate field.
+(define_predicate "x86_64_immediate_operand"
+  (match_code "const_int,symbol_ref,label_ref,const")
+{
+  if (!TARGET_64BIT)
+    return immediate_operand (op, mode);
+
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
+         to be at least 32 and this all acceptable constants are
+	 represented as CONST_INT.  */
+      if (HOST_BITS_PER_WIDE_INT == 32)
+	return true;
+      else
+	{
+	  HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (op), DImode);
+	  return trunc_int_for_mode (val, SImode) == val;
+	}
+      break;
+
+    case SYMBOL_REF:
+      /* For certain code models, the symbolic references are known to fit.
+	 in CM_SMALL_PIC model we know it fits if it is local to the shared
+	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
+	 only if inside of UNSPEC handled below.  */
+      /* TLS symbols are not constant.  */
+      if (SYMBOL_REF_TLS_MODEL (op))
+	return false;
+      return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
+	      || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+    case LABEL_REF:
+      /* For certain code models, the code is near as well.  */
+      return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
+	      || ix86_cmodel == CM_KERNEL);
+
+    case CONST:
+      /* We also may accept the offsetted memory references in certain
+	 special cases.  */
+      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
+	switch (XINT (XEXP (op, 0), 1))
+	  {
+	  case UNSPEC_GOTPCREL:
+	  case UNSPEC_DTPOFF:
+	  case UNSPEC_GOTNTPOFF:
+	  case UNSPEC_NTPOFF:
+	    return true;
+	  default:
+	    break;
+	  }
+
+      if (GET_CODE (XEXP (op, 0)) == PLUS)
+	{
+	  rtx op1 = XEXP (XEXP (op, 0), 0);
+	  rtx op2 = XEXP (XEXP (op, 0), 1);
+	  HOST_WIDE_INT offset;
+
+	  if (ix86_cmodel == CM_LARGE)
+	    return false;
+	  if (!CONST_INT_P (op2))
+	    return false;
+	  offset = trunc_int_for_mode (INTVAL (op2), DImode);
+	  switch (GET_CODE (op1))
+	    {
+	    case SYMBOL_REF:
+	      /* TLS symbols are not constant.  */
+	      if (SYMBOL_REF_TLS_MODEL (op1))
+		return false;
+	      /* For CM_SMALL assume that latest object is 16MB before
+		 end of 31bits boundary.  We may also accept pretty
+		 large negative constants knowing that all objects are
+		 in the positive half of address space.  */
+	      if ((ix86_cmodel == CM_SMALL
+		   || (ix86_cmodel == CM_MEDIUM
+		       && !SYMBOL_REF_FAR_ADDR_P (op1)))
+		  && offset < 16*1024*1024
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      /* For CM_KERNEL we know that all object resist in the
+		 negative half of 32bits address space.  We may not
+		 accept negative offsets, since they may be just off
+		 and we may accept pretty large positive ones.  */
+	      if (ix86_cmodel == CM_KERNEL
+		  && offset > 0
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      break;
+
+	    case LABEL_REF:
+	      /* These conditions are similar to SYMBOL_REF ones, just the
+		 constraints for code models differ.  */
+	      if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+		  && offset < 16*1024*1024
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      if (ix86_cmodel == CM_KERNEL
+		  && offset > 0
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      break;
+
+	    case UNSPEC:
+	      switch (XINT (op1, 1))
+		{
+		case UNSPEC_DTPOFF:
+		case UNSPEC_NTPOFF:
+		  if (trunc_int_for_mode (offset, SImode) == offset)
+		    return true;
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+      break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  return false;
+})
+
+;; Return true if VALUE can be stored in the zero extended immediate field.
+(define_predicate "x86_64_zext_immediate_operand"
+  (match_code "const_double,const_int,symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST_DOUBLE:
+      if (HOST_BITS_PER_WIDE_INT == 32)
+	return (GET_MODE (op) == VOIDmode && !CONST_DOUBLE_HIGH (op));
+      else
+	return false;
+
+    case CONST_INT:
+      if (HOST_BITS_PER_WIDE_INT == 32)
+	return INTVAL (op) >= 0;
+      else
+	return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff);
+
+    case SYMBOL_REF:
+      /* For certain code models, the symbolic references are known to fit.  */
+      /* TLS symbols are not constant.  */
+      if (SYMBOL_REF_TLS_MODEL (op))
+	return false;
+      return (ix86_cmodel == CM_SMALL
+	      || (ix86_cmodel == CM_MEDIUM
+		  && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+    case LABEL_REF:
+      /* For certain code models, the code is near as well.  */
+      return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
+
+    case CONST:
+      /* We also may accept the offsetted memory references in certain
+	 special cases.  */
+      if (GET_CODE (XEXP (op, 0)) == PLUS)
+	{
+	  rtx op1 = XEXP (XEXP (op, 0), 0);
+	  rtx op2 = XEXP (XEXP (op, 0), 1);
+
+	  if (ix86_cmodel == CM_LARGE)
+	    return false;
+	  switch (GET_CODE (op1))
+	    {
+	    case SYMBOL_REF:
+	      /* TLS symbols are not constant.  */
+	      if (SYMBOL_REF_TLS_MODEL (op1))
+		return false;
+	      /* For small code model we may accept pretty large positive
+		 offsets, since one bit is available for free.  Negative
+		 offsets are limited by the size of NULL pointer area
+		 specified by the ABI.  */
+	      if ((ix86_cmodel == CM_SMALL
+		   || (ix86_cmodel == CM_MEDIUM
+		       && !SYMBOL_REF_FAR_ADDR_P (op1)))
+		  && CONST_INT_P (op2)
+		  && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+		  && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+		return true;
+	      /* ??? For the kernel, we may accept adjustment of
+		 -0x10000000, since we know that it will just convert
+		 negative address space to positive, but perhaps this
+		 is not worthwhile.  */
+	      break;
+
+	    case LABEL_REF:
+	      /* These conditions are similar to SYMBOL_REF ones, just the
+		 constraints for code models differ.  */
+	      if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+		  && CONST_INT_P (op2)
+		  && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+		  && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+		return true;
+	      break;
+
+	    default:
+	      return false;
+	    }
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return false;
+})
+
+;; Return true if OP is general operand representable on x86_64.
+(define_predicate "x86_64_general_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "nonimmediate_operand")
+	 (match_operand 0 "x86_64_immediate_operand"))
+    (match_operand 0 "general_operand")))
+
+;; Return true if OP is representable on x86_64 as zero-extended operand.
+;; This predicate is used in zero-extending conversion operations that
+;; require non-VOIDmode immediate operands.
+(define_predicate "x86_64_zext_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "nonimmediate_operand")
+	 (and (match_operand 0 "x86_64_zext_immediate_operand")
+	      (match_test "GET_MODE (op) != VOIDmode")))
+    (match_operand 0 "nonimmediate_operand")))
+
+;; Return true if OP is general operand representable on x86_64
+;; as either sign extended or zero extended constant.
+(define_predicate "x86_64_szext_general_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "nonimmediate_operand")
+	 (match_operand 0 "x86_64_immediate_operand")
+	 (match_operand 0 "x86_64_zext_immediate_operand"))
+    (match_operand 0 "general_operand")))
+
+;; Return true if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_nonmemory_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "register_operand")
+	 (match_operand 0 "x86_64_immediate_operand"))
+    (match_operand 0 "nonmemory_operand")))
+
+;; Return true if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_szext_nonmemory_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "register_operand")
+	 (match_operand 0 "x86_64_immediate_operand")
+	 (match_operand 0 "x86_64_zext_immediate_operand"))
+    (match_operand 0 "nonmemory_operand")))
+
+;; Return true when operand is PIC expression that can be computed by lea
+;; operation.
+(define_predicate "pic_32bit_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  if (!flag_pic)
+    return false;
+
+  /* Rule out relocations that translate into 64bit constants.  */
+  if (TARGET_64BIT && GET_CODE (op) == CONST)
+    {
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == PLUS && CONST_INT_P (XEXP (op, 1)))
+	op = XEXP (op, 0);
+      if (GET_CODE (op) == UNSPEC
+	  && (XINT (op, 1) == UNSPEC_GOTOFF
+	      || XINT (op, 1) == UNSPEC_GOT))
+	return false;
+    }
+
+  return symbolic_operand (op, mode);
+})
+
+;; Return true if OP is nonmemory operand acceptable by movabs patterns.
+(define_predicate "x86_64_movabs_operand"
+  (and (match_operand 0 "nonmemory_operand")
+       (not (match_operand 0 "pic_32bit_operand"))))
+
+;; Return true if OP is either a symbol reference or a sum of a symbol
+;; reference and a constant.
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF
+	  || (GET_CODE (op) == UNSPEC
+	      && (XINT (op, 1) == UNSPEC_GOT
+		  || XINT (op, 1) == UNSPEC_GOTOFF
+		  || XINT (op, 1) == UNSPEC_PCREL
+		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
+	return true;
+      if (GET_CODE (op) != PLUS
+	  || !CONST_INT_P (XEXP (op, 1)))
+	return false;
+
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF)
+	return true;
+      /* Only @GOTOFF gets offsets.  */
+      if (GET_CODE (op) != UNSPEC
+	  || XINT (op, 1) != UNSPEC_GOTOFF)
+	return false;
+
+      op = XVECEXP (op, 0, 0);
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF)
+	return true;
+      return false;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a symbolic operand that resolves locally.
+(define_predicate "local_symbolic_operand"
+  (match_code "const,label_ref,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return false;
+
+  if (SYMBOL_REF_TLS_MODEL (op))
+    return false;
+
+  /* Dll-imported symbols are always external.  */
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
+    return false;
+  if (SYMBOL_REF_LOCAL_P (op))
+    return true;
+
+  /* There is, however, a not insubstantial body of code in the rest of
+     the compiler that assumes it can just stick the results of
+     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
+  /* ??? This is a hack.  Should update the body of the compiler to
+     always create a DECL an invoke targetm.encode_section_info.  */
+  if (strncmp (XSTR (op, 0), internal_label_prefix,
+	       internal_label_prefix_len) == 0)
+    return true;
+
+  return false;
+})
+
+;; Test for a legitimate @GOTOFF operand.
+;;
+;; VxWorks does not impose a fixed gap between segments; the run-time
+;; gap can be different from the object-file gap.  We therefore can't
+;; use @GOTOFF unless we are absolutely sure that the symbol is in the
+;; same segment as the GOT.  Unfortunately, the flexibility of linker
+;; scripts means that we can't be sure of that in general, so assume
+;; that @GOTOFF is never valid on VxWorks.
+(define_predicate "gotoff_operand"
+  (and (not (match_test "TARGET_VXWORKS_RTP"))
+       (match_operand 0 "local_symbolic_operand")))
+
+;; Test for various thread-local symbols.
+(define_special_predicate "tls_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op)")))
+
+(define_special_predicate "tls_modbase_operand"
+  (and (match_code "symbol_ref")
+       (match_test "op == ix86_tls_module_base ()")))
+
+;; Test for a pc-relative call operand
+(define_predicate "constant_call_address_operand"
+  (match_code "symbol_ref")
+{
+  if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
+    return false;
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
+    return false;
+  return true;
+})
+
+;; P6 processors will jump to the address after the decrement when %esp
+;; is used as a call operand, so they will execute return address as a code.
+;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
+
+(define_predicate "call_register_no_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!TARGET_64BIT && op == stack_pointer_rtx)
+    return false;
+
+  return register_no_elim_operand (op, mode);
+})
+
+;; True for any non-virtual or eliminable register.  Used in places where
+;; instantiation of such a register may cause the pattern to not be recognized.
+(define_predicate "register_no_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return !(op == arg_pointer_rtx
+	   || op == frame_pointer_rtx
+	   || IN_RANGE (REGNO (op),
+			FIRST_PSEUDO_REGISTER, LAST_VIRTUAL_REGISTER));
+})
+
+;; Similarly, but include the stack pointer.  This is used to prevent esp
+;; from being used as an index reg.
+(define_predicate "index_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (reload_in_progress || reload_completed)
+    return REG_OK_FOR_INDEX_STRICT_P (op);
+  else
+    return REG_OK_FOR_INDEX_NONSTRICT_P (op);
+})
+
+;; Return false if this is any eliminable register.  Otherwise general_operand.
+(define_predicate "general_no_elim_operand"
+  (if_then_else (match_code "reg,subreg")
+    (match_operand 0 "register_no_elim_operand")
+    (match_operand 0 "general_operand")))
+
+;; Return false if this is any eliminable register.  Otherwise
+;; register_operand or a constant.
+(define_predicate "nonmemory_no_elim_operand"
+  (ior (match_operand 0 "register_no_elim_operand")
+       (match_operand 0 "immediate_operand")))
+
+;; Test for a valid operand for indirect branch.
+(define_predicate "indirect_branch_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (not (match_test "TARGET_X32"))
+	    (match_operand 0 "memory_operand"))))
+
+;; Test for a valid operand for a call instruction.
+;; Allow constant call address operands in Pmode only.
+(define_special_predicate "call_insn_operand"
+  (ior (match_test "constant_call_address_operand
+		     (op, mode == VOIDmode ? mode : Pmode)")
+       (match_operand 0 "call_register_no_elim_operand")
+       (and (not (match_test "TARGET_X32"))
+	    (match_operand 0 "memory_operand"))))
+
+;; Similarly, but for tail calls, in which we cannot allow memory references.
+(define_special_predicate "sibcall_insn_operand"
+  (ior (match_test "constant_call_address_operand
+		     (op, mode == VOIDmode ? mode : Pmode)")
+       (match_operand 0 "register_no_elim_operand")))
+
+;; Return true if OP is a call from MS ABI to SYSV ABI function.
+(define_predicate "call_rex64_ms_sysv_operation"
+  (match_code "parallel")
+{
+  unsigned creg_size = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
+  unsigned i;
+
+  if ((unsigned) XVECLEN (op, 0) != creg_size + 2)
+    return false;
+
+  for (i = 0; i < creg_size; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i+2);
+      enum machine_mode mode;
+      unsigned regno;
+
+      if (GET_CODE (elt) != CLOBBER
+          || GET_CODE (SET_DEST (elt)) != REG)
+        return false;
+
+      regno = x86_64_ms_sysv_extra_clobbered_registers[i];
+      mode = SSE_REGNO_P (regno) ? TImode : DImode;
+
+      if (GET_MODE (SET_DEST (elt)) != mode
+	  || REGNO (SET_DEST (elt)) != regno)
+	return false;
+    }
+  return true;
+})
+
+;; Match exactly zero.
+(define_predicate "const0_operand"
+  (match_code "const_int,const_double,const_vector")
+{
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  return op == CONST0_RTX (mode);
+})
+
+;; Match one or vector filled with ones.
+(define_predicate "const1_operand"
+  (match_code "const_int,const_double,const_vector")
+{
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  return op == CONST1_RTX (mode);
+})
+
+;; Match exactly eight.
+(define_predicate "const8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 8")))
+
+;; Match exactly 128.
+(define_predicate "const128_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 128")))
+
+;; Match exactly 0x0FFFFFFFF in anddi as a zero-extension operation
+(define_predicate "const_32bit_mask"
+  (and (match_code "const_int")
+       (match_test "trunc_int_for_mode (INTVAL (op), DImode)
+		    == (HOST_WIDE_INT) 0xffffffff")))
+
+;; Match 2, 4, or 8.  Used for leal multiplicands.
+(define_predicate "const248_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 2 || i == 4 || i == 8;
+})
+
+;; Match 2, 3, 6, or 7
+(define_predicate "const2367_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 2 || i == 3 || i == 6 || i == 7;
+})
+
+;; Match 1, 2, 4, or 8
+(define_predicate "const1248_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 1 || i == 2 || i == 4 || i == 8;
+})
+
+;; Match 3, 5, or 9.  Used for leal multiplicands.
+(define_predicate "const359_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 3 || i == 5 || i == 9;
+})
+
+;; Match 4 or 8 to 11.  Used for embeded rounding.
+(define_predicate "const_4_or_8_to_11_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 4 || (i >= 8 && i <= 11);
+})
+
+;; Match 4 or 8. Used for SAE.
+(define_predicate "const48_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 4 || i == 8;
+})
+
+;; Match 0 or 1.
+(define_predicate "const_0_to_1_operand"
+  (and (match_code "const_int")
+       (ior (match_test "op == const0_rtx")
+	    (match_test "op == const1_rtx"))))
+
+;; Match 0 to 3.
+(define_predicate "const_0_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+
+;; Match 0 to 4.
+(define_predicate "const_0_to_4_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 4)")))
+
+;; Match 0 to 5.
+(define_predicate "const_0_to_5_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 5)")))
+
+;; Match 0 to 7.
+(define_predicate "const_0_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+;; Match 0 to 15.
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+;; Match 0 to 31.
+(define_predicate "const_0_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+
+;; Match 0 to 63.
+(define_predicate "const_0_to_63_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
+
+;; Match 0 to 255.
+(define_predicate "const_0_to_255_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+;; Match (0 to 255) * 8
+(define_predicate "const_0_to_255_mul_8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT val = INTVAL (op);
+  return val <= 255*8 && val % 8 == 0;
+})
+
+;; Return true if OP is CONST_INT >= 1 and <= 31 (a valid operand
+;; for shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 31)")))
+
+;; Return true if OP is CONST_INT >= 1 and <= 63 (a valid operand
+;; for 64bit shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_63_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 63)")))
+
+;; Match 2 or 3.
+(define_predicate "const_2_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+
+;; Match 4 to 5.
+(define_predicate "const_4_to_5_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 4, 5)")))
+
+;; Match 4 to 7.
+(define_predicate "const_4_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 4, 7)")))
+
+;; Match 6 to 7.
+(define_predicate "const_6_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 6, 7)")))
+
+;; Match 8 to 9.
+(define_predicate "const_8_to_9_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 8, 9)")))
+
+;; Match 8 to 11.
+(define_predicate "const_8_to_11_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+
+;; Match 8 to 15.
+(define_predicate "const_8_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 8, 15)")))
+
+;; Match 10 to 11.
+(define_predicate "const_10_to_11_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 10, 11)")))
+
+;; Match 12 to 13.
+(define_predicate "const_12_to_13_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 12, 13)")))
+
+;; Match 12 to 15.
+(define_predicate "const_12_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+
+;; Match 14 to 15.
+(define_predicate "const_14_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 14, 15)")))
+
+;; Match 16 to 19.
+(define_predicate "const_16_to_19_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 16, 19)")))
+
+;; Match 16 to 31.
+(define_predicate "const_16_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 16, 31)")))
+
+;; Match 20 to 23.
+(define_predicate "const_20_to_23_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 20, 23)")))
+
+;; Match 24 to 27.
+(define_predicate "const_24_to_27_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 24, 27)")))
+
+;; Match 28 to 31.
+(define_predicate "const_28_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 28, 31)")))
+
+;; True if this is a constant appropriate for an increment or decrement.
+(define_predicate "incdec_operand"
+  (match_code "const_int")
+{
+  /* On Pentium4, the inc and dec operations causes extra dependency on flag
+     registers, since carry flag is not set.  */
+  if (!TARGET_USE_INCDEC && !optimize_insn_for_size_p ())
+    return false;
+  return op == const1_rtx || op == constm1_rtx;
+})
+
+;; True for registers, or 1 or -1.  Used to optimize double-word shifts.
+(define_predicate "reg_or_pm1_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (ior (match_test "op == const1_rtx")
+		 (match_test "op == constm1_rtx")))))
+
+;; True if OP is acceptable as operand of DImode shift expander.
+(define_predicate "shiftdi_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "nonimmediate_operand")
+    (match_operand 0 "register_operand")))
+
+(define_predicate "ashldi_input_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "nonimmediate_operand")
+    (match_operand 0 "reg_or_pm1_operand")))
+
+;; Return true if OP is a vector load from the constant pool with just
+;; the first element nonzero.
+(define_predicate "zero_extended_scalar_load_operand"
+  (match_code "mem")
+{
+  unsigned n_elts;
+  op = maybe_get_pool_constant (op);
+
+  if (!(op && GET_CODE (op) == CONST_VECTOR))
+    return false;
+
+  n_elts = CONST_VECTOR_NUNITS (op);
+
+  for (n_elts--; n_elts > 0; n_elts--)
+    {
+      rtx elt = CONST_VECTOR_ELT (op, n_elts);
+      if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+	return false;
+    }
+  return true;
+})
+
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "vector_all_ones_operand"
+  (match_code "const_vector")
+{
+  int nunits = GET_MODE_NUNITS (mode);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && CONST_VECTOR_NUNITS (op) == nunits)
+    {
+      int i;
+      for (i = 0; i < nunits; ++i)
+        {
+          rtx x = CONST_VECTOR_ELT (op, i);
+          if (x != constm1_rtx)
+            return false;
+        }
+      return true;
+    }
+
+  return false;
+})
+
+; Return true when OP is operand acceptable for standard SSE move.
+(define_predicate "vector_move_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return true when OP is either nonimmediate operand, or any
+;; CONST_VECTOR.
+(define_predicate "nonimmediate_or_const_vector_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_code "const_vector")))
+
+;; Return true when OP is nonimmediate or standard SSE constant.
+(define_predicate "nonimmediate_or_sse_const_operand"
+  (match_operand 0 "general_operand")
+{
+  if (nonimmediate_operand (op, mode))
+    return true;
+  if (standard_sse_constant_p (op) > 0)
+    return true;
+  return false;
+})
+
+;; Return true if OP is a register or a zero.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return true for RTX codes that force SImode address.
+(define_predicate "SImode_address_operand"
+  (match_code "subreg,zero_extend,and"))
+
+;; Return true if op if a valid address for LEA, and does not contain
+;; a segment override.  Defined as a special predicate to allow
+;; mode-less const_int operands pass to address_operand.
+(define_special_predicate "address_no_seg_operand"
+  (match_operand 0 "address_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  ok = ix86_decompose_address (op, &parts);
+  gcc_assert (ok);
+  return parts.seg == SEG_DEFAULT;
+})
+
+;; Return true if op if a valid base register, displacement or
+;; sum of base register and displacement for VSIB addressing.
+(define_predicate "vsib_address_operand"
+  (match_operand 0 "address_operand")
+{
+  struct ix86_address parts;
+  int ok;
+  rtx disp;
+
+  ok = ix86_decompose_address (op, &parts);
+  gcc_assert (ok);
+  if (parts.index || parts.seg != SEG_DEFAULT)
+    return false;
+
+  /* VSIB addressing doesn't support (%rip).  */
+  if (parts.disp)
+    {
+      disp = parts.disp;
+      if (GET_CODE (disp) == CONST)
+	{
+	  disp = XEXP (disp, 0);
+	  if (GET_CODE (disp) == PLUS)
+	    disp = XEXP (disp, 0);
+	  if (GET_CODE (disp) == UNSPEC)
+	    switch (XINT (disp, 1))
+	      {
+	      case UNSPEC_GOTPCREL:
+	      case UNSPEC_PCREL:
+	      case UNSPEC_GOTNTPOFF:
+		return false;
+	      }
+	}
+      if (TARGET_64BIT
+	  && flag_pic
+	  && (GET_CODE (disp) == SYMBOL_REF
+	      || GET_CODE (disp) == LABEL_REF))
+	return false;
+    }
+
+  return true;
+})
+
+(define_predicate "vsib_mem_operator"
+  (match_code "mem"))
+
+;; Return true if the rtx is known to be at least 32 bits aligned.
+(define_predicate "aligned_operand"
+  (match_operand 0 "general_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  /* Registers and immediate operands are always "aligned".  */
+  if (!MEM_P (op))
+    return true;
+
+  /* All patterns using aligned_operand on memory operands ends up
+     in promoting memory operand to 64bit and thus causing memory mismatch.  */
+  if (TARGET_MEMORY_MISMATCH_STALL && !optimize_insn_for_size_p ())
+    return false;
+
+  /* Don't even try to do any aligned optimizations with volatiles.  */
+  if (MEM_VOLATILE_P (op))
+    return false;
+
+  if (MEM_ALIGN (op) >= 32)
+    return true;
+
+  op = XEXP (op, 0);
+
+  /* Pushes and pops are only valid on the stack pointer.  */
+  if (GET_CODE (op) == PRE_DEC
+      || GET_CODE (op) == POST_INC)
+    return true;
+
+  /* Decode the address.  */
+  ok = ix86_decompose_address (op, &parts);
+  gcc_assert (ok);
+
+  if (parts.base && GET_CODE (parts.base) == SUBREG)
+    parts.base = SUBREG_REG (parts.base);
+  if (parts.index && GET_CODE (parts.index) == SUBREG)
+    parts.index = SUBREG_REG (parts.index);
+
+  /* Look for some component that isn't known to be aligned.  */
+  if (parts.index)
+    {
+      if (REGNO_POINTER_ALIGN (REGNO (parts.index)) * parts.scale < 32)
+	return false;
+    }
+  if (parts.base)
+    {
+      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
+	return false;
+    }
+  if (parts.disp)
+    {
+      if (!CONST_INT_P (parts.disp)
+	  || (INTVAL (parts.disp) & 3))
+	return false;
+    }
+
+  /* Didn't find one -- this must be an aligned address.  */
+  return true;
+})
+
+;; Return true if OP is memory operand with a displacement.
+(define_predicate "memory_displacement_operand"
+  (match_operand 0 "memory_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  ok = ix86_decompose_address (XEXP (op, 0), &parts);
+  gcc_assert (ok);
+  return parts.disp != NULL_RTX;
+})
+
+;; Return true if OP is memory operand with a displacement only.
+(define_predicate "memory_displacement_only_operand"
+  (match_operand 0 "memory_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  if (TARGET_64BIT)
+    return false;
+
+  ok = ix86_decompose_address (XEXP (op, 0), &parts);
+  gcc_assert (ok);
+
+  if (parts.base || parts.index)
+    return false;
+
+  return parts.disp != NULL_RTX;
+})
+
+;; Return true if OP is memory operand which will need zero or
+;; one register at most, not counting stack pointer or frame pointer.
+(define_predicate "cmpxchg8b_pic_memory_operand"
+  (match_operand 0 "memory_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  if (TARGET_64BIT || !flag_pic)
+    return true;
+
+  ok = ix86_decompose_address (XEXP (op, 0), &parts);
+  gcc_assert (ok);
+
+  if (parts.base && GET_CODE (parts.base) == SUBREG)
+    parts.base = SUBREG_REG (parts.base);
+  if (parts.index && GET_CODE (parts.index) == SUBREG)
+    parts.index = SUBREG_REG (parts.index);
+
+  if (parts.base == NULL_RTX
+      || parts.base == arg_pointer_rtx
+      || parts.base == frame_pointer_rtx
+      || parts.base == hard_frame_pointer_rtx
+      || parts.base == stack_pointer_rtx)
+    return true;
+
+  if (parts.index == NULL_RTX
+      || parts.index == arg_pointer_rtx
+      || parts.index == frame_pointer_rtx
+      || parts.index == hard_frame_pointer_rtx
+      || parts.index == stack_pointer_rtx)
+    return true;
+
+  return false;
+})
+
+
+;; Return true if OP is memory operand that cannot be represented
+;; by the modRM array.
+(define_predicate "long_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "memory_address_length (op, false)")))
+
+;; Return true if OP is a comparison operator that can be issued by fcmov.
+(define_predicate "fcmov_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+
+  if (inmode == CCFPmode || inmode == CCFPUmode)
+    {
+      if (!ix86_trivial_fp_comparison_operator (op, mode))
+	return false;
+      code = ix86_fp_compare_code_to_integer (code);
+    }
+  /* i387 supports just limited amount of conditional codes.  */
+  switch (code)
+    {
+    case LTU: case GTU: case LEU: case GEU:
+      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode
+	  || inmode == CCCmode)
+	return true;
+      return false;
+    case ORDERED: case UNORDERED:
+    case EQ: case NE:
+      return true;
+    default:
+      return false;
+    }
+})
+
+;; Return true if OP is a comparison that can be used in the CMPSS/CMPPS insns.
+;; The first set are supported directly; the second set can't be done with
+;; full IEEE support, i.e. NaNs.
+
+(define_predicate "sse_comparison_operator"
+  (ior (match_code "eq,ne,lt,le,unordered,unge,ungt,ordered")
+       (and (match_test "TARGET_AVX")
+	    (match_code "ge,gt,uneq,unle,unlt,ltgt"))))
+
+(define_predicate "ix86_comparison_int_operator"
+  (match_code "ne,eq,ge,gt,le,lt"))
+
+(define_predicate "ix86_comparison_uns_operator"
+  (match_code "ne,eq,geu,gtu,leu,ltu"))
+
+(define_predicate "bt_comparison_operator"
+  (match_code "ne,eq"))
+
+;; Return true if OP is a valid comparison operator in valid mode.
+(define_predicate "ix86_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+
+  if (inmode == CCFPmode || inmode == CCFPUmode)
+    return ix86_trivial_fp_comparison_operator (op, mode);
+
+  switch (code)
+    {
+    case EQ: case NE:
+      return true;
+    case LT: case GE:
+      if (inmode == CCmode || inmode == CCGCmode
+	  || inmode == CCGOCmode || inmode == CCNOmode)
+	return true;
+      return false;
+    case LTU: case GTU: case LEU: case GEU:
+      if (inmode == CCmode || inmode == CCCmode)
+	return true;
+      return false;
+    case ORDERED: case UNORDERED:
+      if (inmode == CCmode)
+	return true;
+      return false;
+    case GT: case LE:
+      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
+	return true;
+      return false;
+    default:
+      return false;
+    }
+})
+
+;; Return true if OP is a valid comparison operator
+;; testing carry flag to be set.
+(define_predicate "ix86_carry_flag_operator"
+  (match_code "ltu,lt,unlt,gtu,gt,ungt,le,unle,ge,unge,ltgt,uneq")
+{
+  enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+
+  if (inmode == CCFPmode || inmode == CCFPUmode)
+    {
+      if (!ix86_trivial_fp_comparison_operator (op, mode))
+	return false;
+      code = ix86_fp_compare_code_to_integer (code);
+    }
+  else if (inmode == CCCmode)
+   return code == LTU || code == GTU;
+  else if (inmode != CCmode)
+    return false;
+
+  return code == LTU;
+})
+
+;; Return true if this comparison only requires testing one flag bit.
+(define_predicate "ix86_trivial_fp_comparison_operator"
+  (match_code "gt,ge,unlt,unle,uneq,ltgt,ordered,unordered"))
+
+;; Return true if we know how to do this comparison.  Others require
+;; testing more than one flag bit, and we let the generic middle-end
+;; code do that.
+(define_predicate "ix86_fp_comparison_operator"
+  (if_then_else (match_test "ix86_fp_comparison_strategy (GET_CODE (op))
+                             == IX86_FPCMP_ARITH")
+               (match_operand 0 "comparison_operator")
+               (match_operand 0 "ix86_trivial_fp_comparison_operator")))
+
+;; Same as above, but for swapped comparison used in *jcc<fp>_<int>_i387.
+(define_predicate "ix86_swapped_fp_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  enum rtx_code code = GET_CODE (op);
+  bool ret;
+
+  PUT_CODE (op, swap_condition (code));
+  ret = ix86_fp_comparison_operator (op, mode);
+  PUT_CODE (op, code);
+  return ret;
+})
+
+;; Nearly general operand, but accept any const_double, since we wish
+;; to be able to drop them into memory rather than have them get pulled
+;; into registers.
+(define_predicate "cmp_fp_expander_operand"
+  (ior (match_code "const_double")
+       (match_operand 0 "general_operand")))
+
+;; Return true if this is a valid binary floating-point operation.
+(define_predicate "binary_fp_operator"
+  (match_code "plus,minus,mult,div"))
+
+;; Return true if this is a multiply operation.
+(define_predicate "mult_operator"
+  (match_code "mult"))
+
+;; Return true if this is a division operation.
+(define_predicate "div_operator"
+  (match_code "div"))
+
+;; Return true if this is a plus, minus, and, ior or xor operation.
+(define_predicate "plusminuslogic_operator"
+  (match_code "plus,minus,and,ior,xor"))
+
+;; Return true if this is a float extend operation.
+(define_predicate "float_operator"
+  (match_code "float"))
+
+;; Return true for ARITHMETIC_P.
+(define_predicate "arith_or_logical_operator"
+  (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div,
+	       mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert"))
+
+;; Return true for COMMUTATIVE_P.
+(define_predicate "commutative_operator"
+  (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax"))
+
+;; Return true if OP is a binary operator that can be promoted to wider mode.
+(define_predicate "promotable_binary_operator"
+  (ior (match_code "plus,minus,and,ior,xor,ashift")
+       (and (match_code "mult")
+	    (match_test "TARGET_TUNE_PROMOTE_HIMODE_IMUL"))))
+
+(define_predicate "compare_operator"
+  (match_code "compare"))
+
+(define_predicate "absneg_operator"
+  (match_code "abs,neg"))
+
+;; Return true if OP is misaligned memory operand
+(define_predicate "misaligned_operand"
+  (and (match_code "mem")
+       (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
+
+;; Return true if OP is a emms operation, known to be a PARALLEL.
+(define_predicate "emms_operation"
+  (match_code "parallel")
+{
+  unsigned i;
+
+  if (XVECLEN (op, 0) != 17)
+    return false;
+
+  for (i = 0; i < 8; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i+1);
+
+      if (GET_CODE (elt) != CLOBBER
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != XFmode
+	  || REGNO (SET_DEST (elt)) != FIRST_STACK_REG + i)
+        return false;
+
+      elt = XVECEXP (op, 0, i+9);
+
+      if (GET_CODE (elt) != CLOBBER
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != DImode
+	  || REGNO (SET_DEST (elt)) != FIRST_MMX_REG + i)
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a vzeroall operation, known to be a PARALLEL.
+(define_predicate "vzeroall_operation"
+  (match_code "parallel")
+{
+  unsigned i, nregs = TARGET_64BIT ? 16 : 8;
+
+  if ((unsigned) XVECLEN (op, 0) != 1 + nregs)
+    return false;
+
+  for (i = 0; i < nregs; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i+1);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != V8SImode
+	  || REGNO (SET_DEST (elt)) != SSE_REGNO (i)
+	  || SET_SRC (elt) != CONST0_RTX (V8SImode))
+	return false;
+    }
+  return true;
+})
+
+;; return true if OP is a vzeroupper operation.
+(define_predicate "vzeroupper_operation"
+  (and (match_code "unspec_volatile")
+       (match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
+
+;; Return true if OP is a parallel for a vbroadcast permute.
+
+(define_predicate "avx_vbroadcast_operand"
+  (and (match_code "parallel")
+       (match_code "const_int" "a"))
+{
+  rtx elt = XVECEXP (op, 0, 0);
+  int i, nelt = XVECLEN (op, 0);
+
+  /* Don't bother checking there are the right number of operands,
+     merely that they're all identical.  */
+  for (i = 1; i < nelt; ++i)
+    if (XVECEXP (op, 0, i) != elt)
+      return false;
+  return true;
+})
+
+;; Return true if OP is a proper third operand to vpblendw256.
+(define_predicate "avx2_pblendw_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  HOST_WIDE_INT low = val & 0xff;
+  return val == ((low << 8) | low);
+})
+
+;; Return true if OP is nonimmediate_operand or CONST_VECTOR.
+(define_predicate "general_vector_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_code "const_vector")))
+
+;; Return true if OP is either -1 constant or stored in register.
+(define_predicate "register_or_constm1_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "op == constm1_rtx"))))
diff --git a/gcc-4.9/gcc/config/i386/prfchwintrin.h b/gcc-4.9/gcc/config/i386/prfchwintrin.h
new file mode 100644
index 000000000..b2f5772ec
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/prfchwintrin.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _MM3DNOW_H_INCLUDED
+# error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
+#endif
+
+#ifndef _PRFCHWINTRIN_H_INCLUDED
+#define _PRFCHWINTRIN_H_INCLUDED
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchw (void *__P)
+{
+  __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
+}
+
+#endif /* _PRFCHWINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/rdos.h b/gcc-4.9/gcc/config/i386/rdos.h
new file mode 100644
index 000000000..e8370c6c6
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/rdos.h
@@ -0,0 +1,39 @@
+/* Definitions for RDOS on i386.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* RDOS uses .exe suffix */
+#undef TARGET_EXECUTABLE_SUFFIX
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+#undef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT MASK_TLS_DIRECT_SEG_REFS
+
+#undef DEFAULT_TLS_SEG_REG
+#define DEFAULT_TLS_SEG_REG SEG_GS 
+
+#undef TARGET_RDOS
+#define TARGET_RDOS 1
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__RDOS__");		\
+      builtin_assert ("system=rdos");		\
+    }						\
+  while (0)
diff --git a/gcc-4.9/gcc/config/i386/rdos64.h b/gcc-4.9/gcc/config/i386/rdos64.h
new file mode 100644
index 000000000..e6f089a00
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/rdos64.h
@@ -0,0 +1,24 @@
+/* Definitions for RDOS on x86_64.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef REAL_PIC_OFFSET_TABLE_REGNUM
+#define REAL_PIC_OFFSET_TABLE_REGNUM  R15_REG
+
+#undef DEFAULT_LARGE_SECTION_THRESHOLD
+#define DEFAULT_LARGE_SECTION_THRESHOLD 16
diff --git a/gcc-4.9/gcc/config/i386/rdseedintrin.h b/gcc-4.9/gcc/config/i386/rdseedintrin.h
new file mode 100644
index 000000000..0ab18e552
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/rdseedintrin.h
@@ -0,0 +1,66 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _RDSEEDINTRIN_H_INCLUDED
+#define _RDSEEDINTRIN_H_INCLUDED
+
+#ifndef __RDSEED__
+#pragma GCC push_options
+#pragma GCC target("rdseed")
+#define __DISABLE_RDSEED__
+#endif /* __RDSEED__ */
+
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdseed16_step (unsigned short *p)
+{
+    return __builtin_ia32_rdseed_hi_step (p);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdseed32_step (unsigned int *p)
+{
+    return __builtin_ia32_rdseed_si_step (p);
+}
+
+#ifdef __x86_64__
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdseed64_step (unsigned long long *p)
+{
+    return __builtin_ia32_rdseed_di_step (p);
+}
+#endif
+
+#ifdef __DISABLE_RDSEED__
+#undef __DISABLE_RDSEED__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDSEED__ */
+
+#endif /* _RDSEEDINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/rtemself.h b/gcc-4.9/gcc/config/i386/rtemself.h
new file mode 100644
index 000000000..7c3a19ce6
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/rtemself.h
@@ -0,0 +1,40 @@
+/* Definitions for rtems targeting an ix86 using ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_80387 ? 80 : 64)
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#ifdef _SOFT_FLOAT
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+#endif
diff --git a/gcc-4.9/gcc/config/i386/rtmintrin.h b/gcc-4.9/gcc/config/i386/rtmintrin.h
new file mode 100644
index 000000000..ac40d228a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/rtmintrin.h
@@ -0,0 +1,84 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _RTMINTRIN_H_INCLUDED
+#define _RTMINTRIN_H_INCLUDED
+
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
+#define _XBEGIN_STARTED		(~0u)
+#define _XABORT_EXPLICIT	(1 << 0)
+#define _XABORT_RETRY		(1 << 1)
+#define _XABORT_CONFLICT	(1 << 2)
+#define _XABORT_CAPACITY	(1 << 3)
+#define _XABORT_DEBUG		(1 << 4)
+#define _XABORT_NESTED		(1 << 5)
+#define _XABORT_CODE(x)		(((x) >> 24) & 0xFF)
+
+/* Start an RTM code region.  Return _XBEGIN_STARTED on success and the
+   abort condition otherwise.  */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xbegin (void)
+{
+  return __builtin_ia32_xbegin ();
+}
+
+/* Specify the end of an RTM code region.  If it corresponds to the
+   outermost transaction, then attempts the transaction commit.  If the
+   commit fails, then control is transferred to the outermost transaction
+   fallback handler.  */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xend (void)
+{
+  __builtin_ia32_xend ();
+}
+
+/* Force an RTM abort condition. The control is transferred to the
+   outermost transaction fallback handler with the abort condition IMM.  */
+#ifdef __OPTIMIZE__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xabort (const unsigned int imm)
+{
+  __builtin_ia32_xabort (imm);
+}
+#else
+#define _xabort(N)  __builtin_ia32_xabort (N)
+#endif /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
+#endif /* _RTMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/shaintrin.h b/gcc-4.9/gcc/config/i386/shaintrin.h
new file mode 100644
index 000000000..d8a3da3da
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/shaintrin.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _SHAINTRIN_H_INCLUDED
+#define _SHAINTRIN_H_INCLUDED
+
+#ifndef __SHA__
+#pragma GCC push_options
+#pragma GCC target("sha")
+#define __DISABLE_SHA__
+#endif /* __SHA__ */
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg1_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha1msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg2_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha1msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1nexte_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha1nexte ((__v4si) __A, (__v4si) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1rnds4_epu32 (__m128i __A, __m128i __B, const int __I)
+{
+  return (__m128i) __builtin_ia32_sha1rnds4 ((__v4si) __A, (__v4si) __B, __I);
+}
+#else
+#define _mm_sha1rnds4_epu32(A, B, I)				    \
+  ((__m128i) __builtin_ia32_sha1rnds4 ((__v4si)(__m128i)A,	    \
+				       (__v4si)(__m128i)B, (int)I))
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg1_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha256msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg2_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha256msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_sha256rnds2 ((__v4si) __A, (__v4si) __B,
+					       (__v4si) __C);
+}
+
+#ifdef __DISABLE_SHA__
+#undef __DISABLE_SHA__
+#pragma GCC pop_options
+#endif /* __DISABLE_SHA__ */
+
+#endif /* _SHAINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/slm.md b/gcc-4.9/gcc/config/i386/slm.md
new file mode 100644
index 000000000..e3a8328c4
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/slm.md
@@ -0,0 +1,758 @@
+;; Slivermont(SLM) Scheduling
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Silvermont has 2 out-of-order IEC, 2 in-order FEC and 1 in-order MEC.
+
+
+(define_automaton "slm")
+
+;;  EU: Execution Unit
+;;  Silvermont EUs are connected by port 0 or port 1.
+
+;;  SLM has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "slm-port-0,slm-port-1" "slm")
+
+(define_cpu_unit "slm-ieu-0, slm-ieu-1,
+                  slm-imul, slm-feu-0, slm-feu-1"
+                  "slm")
+
+(define_reservation "slm-all-ieu" "(slm-ieu-0 + slm-ieu-1 + slm-imul)")
+(define_reservation "slm-all-feu" "(slm-feu-0 + slm-feu-1)")
+(define_reservation "slm-all-eu" "(slm-all-ieu + slm-all-feu)")
+(define_reservation "slm-fp-0" "(slm-port-0 + slm-feu-0)")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "slm-port-either" "(slm-port-0 | slm-port-1)"
+(define_reservation "slm-port-dual" "(slm-port-0 + slm-port-1)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "slm-fmul-5c"
+                    "(slm-port-0 + slm-feu-0), slm-feu-0, nothing*3")
+(define_reservation "slm-fmul-4c" "(slm-port-0 + slm-feu-0), nothing*3")
+
+;;; fadd can has 3 cycles latency depends on instruction forms
+(define_reservation "slm-fadd-3c" "(slm-port-1 + slm-feu-1), nothing*2")
+(define_reservation "slm-fadd-4c"
+                    "(slm-port-1 + slm-feu-1), slm-feu-1, nothing*2")
+
+;;; imul insn has 3 cycles latency for SI operands
+(define_reservation "slm-imul-32"
+                    "(slm-port-1 + slm-imul), nothing*2")
+(define_reservation "slm-imul-mem-32"
+                    "(slm-port-1 + slm-imul + slm-port-0), nothing*2")
+;;; imul has 4 cycles latency for DI operands with 1/2 tput
+(define_reservation "slm-imul-64"
+                    "(slm-port-1 + slm-imul), slm-imul, nothing*2")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "slm-dual-1c" "(slm-port-dual + slm-all-eu)")
+(define_reservation "slm-dual-2c"
+                    "(slm-port-dual + slm-all-eu, nothing)")
+
+;;; Most of simple ALU instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "slm-simple-0" "(slm-port-0 + slm-ieu-0)")
+(define_reservation "slm-simple-1" "(slm-port-1 + slm-ieu-1)")
+(define_reservation "slm-simple-either" "(slm-simple-0 | slm-simple-1)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "slm-complex" "(slm-port-dual + slm-all-eu)")
+
+(define_insn_reservation  "slm_other" 9
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "!jeu")))
+  "slm-complex, slm-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation  "slm_other_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "jeu")))
+  "slm-dual-1c")
+
+(define_insn_reservation  "slm_multi" 9
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "multi"))
+  "slm-complex, slm-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "slm_alu" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "0"))))
+  "slm-simple-either")
+
+;; Normal alu insns without carry, but use MEC.
+(define_insn_reservation  "slm_alu_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                 (eq_attr "use_carry" "0"))))
+  "slm-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "slm_alu_carry" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "1"))))
+  "slm-simple-either, nothing")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "slm_alu_carry_mem" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                (eq_attr "use_carry" "1"))))
+  "slm-simple-either, nothing")
+
+(define_insn_reservation  "slm_alu1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))
+  "slm-simple-either")
+
+;; bsf and bsf insn
+(define_insn_reservation  "slm_alu1_1" 10
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none") (eq_attr "prefix_0f" "1")))
+  "slm-simple-1, slm-ieu-1*9")
+
+(define_insn_reservation  "slm_alu1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_negnot" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_negnot_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_imov" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_imov_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+;; 16<-16, 32<-32
+(define_insn_reservation  "slm_imovx" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "slm-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation  "slm_imovx_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation  "slm_imovx_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation  "slm_imovx_2_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "slm-simple-0")
+
+;; 16<-8
+(define_insn_reservation  "slm_imovx_3" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (match_operand:HI 0 "register_operand")
+                 (match_operand:QI 1 "general_operand"))))
+  "slm-simple-0, nothing*2")
+
+(define_insn_reservation  "slm_lea" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "!HI")))
+  "slm-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation  "slm_lea_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "HI")))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_incdec" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_incdec_mem" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0, nothing*2")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation  "slm_ishift" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+  "slm-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation  "slm_ishift_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+  "slm-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles
+(define_insn_reservation  "slm_ishift_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift")
+            (eq_attr "prefix_0f" "1")))
+  "slm-complex, slm-all-eu*3")
+
+(define_insn_reservation  "slm_ishift1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_ishift1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_imul" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+  "slm-imul-32")
+
+(define_insn_reservation  "slm_imul_mem" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+  "slm-imul-mem-32")
+
+;; latency set to 4 as common 64x64 imul with 1/2 tput
+(define_insn_reservation  "slm_imul_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imul")
+            (eq_attr "mode" "!SI")))
+  "slm-imul-64")
+
+(define_insn_reservation  "slm_idiv" 33
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "idiv"))
+  "slm-complex, slm-all-eu*16, nothing*16")
+
+(define_insn_reservation  "slm_icmp" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_icmp_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_test" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_test_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_ibr" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "!load")))
+  "slm-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation  "slm_ibr_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "load")))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_setcc" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "!store")))
+  "slm-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation  "slm_setcc_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "store")))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_icmov" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "none")))
+  "slm-simple-either, nothing")
+
+(define_insn_reservation  "slm_icmov_mem" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0, nothing")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "slm_push" 2
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "push"))
+  "slm-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation  "slm_pop" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "DI")))
+  "slm-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation  "slm_pop_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "!DI")))
+  "slm-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "slm_call" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "call"))
+  "slm-dual-1c")
+
+(define_insn_reservation  "slm_callv" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "callv"))
+  "slm-dual-1c")
+
+(define_insn_reservation  "slm_leave" 3
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "leave"))
+  "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation  "slm_str" 3
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "str"))
+  "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation  "slm_sselog" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_sselog_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_sselog1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_sselog1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation  "slm_sseiadd" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "!simul")
+                      (eq_attr "atom_unit" "!complex")))))
+  "slm-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation  "slm_sseiadd_2" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "DI")))))
+  "slm-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation  "slm_sseiadd_3" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "TI")))))
+  "slm-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation  "slm_sseiadd_4" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (ior (match_operand:V2DI 0 "register_operand")
+                 (eq_attr "atom_unit" "complex"))))
+  "slm-fadd-4c")
+
+;; if immediate op.
+(define_insn_reservation  "slm_sseishft" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "!sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "slm-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation  "slm_sseishft_2" 1
+  (and (eq_attr "cpu" "slm")
+       (ior (eq_attr "type" "sseishft1")
+	    (and (eq_attr "type" "sseishft")
+		 (and (eq_attr "atom_unit" "sishuf")
+		      (match_operand 2 "immediate_operand")))))
+  "slm-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation  "slm_sseishft_3" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseishft")
+            (not (match_operand 2 "immediate_operand"))))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_sseimul" 5
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "sseimul"))
+  "slm-fmul-5c")
+
+;; rcpss or rsqrtss
+(define_insn_reservation  "slm_sse" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+  "slm-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation  "slm_sse_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "movdup")))
+  "slm-simple-0")
+
+;; lfence
+(define_insn_reservation  "slm_sse_3" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "lfence")))
+  "slm-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation  "slm_sse_4" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (ior (eq_attr "atom_sse_attr" "fence")
+                 (eq_attr "atom_sse_attr" "prefetch"))))
+  "slm-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation  "slm_sse_5" 9
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+                      (eq_attr "atom_sse_attr" "mxcsr"))
+                 (and (eq_attr "atom_sse_attr" "rcp")
+                      (eq_attr "mode" "V4SF")))))
+  "slm-complex, slm-all-eu*7, nothing")
+
+;; xmm->xmm
+(define_insn_reservation  "slm_ssemov" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy")
+                 (match_operand 1 "register_operand" "xy"))))
+  "slm-simple-either")
+
+;; reg->xmm
+(define_insn_reservation  "slm_ssemov_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy")
+                 (match_operand 1 "register_operand" "r"))))
+  "slm-simple-0")
+
+;; xmm->reg
+(define_insn_reservation  "slm_ssemov_3" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "r")
+                 (match_operand 1 "register_operand" "xy"))))
+  "slm-simple-0, nothing*2")
+
+;; mov mem
+(define_insn_reservation  "slm_ssemov_4" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+  "slm-simple-0")
+
+;; movu mem
+(define_insn_reservation  "slm_ssemov_5" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+  "slm-simple-0, nothing")
+
+;; no memory simple
+(define_insn_reservation  "slm_sseadd" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "slm-fadd-3c")
+
+;; memory simple
+(define_insn_reservation  "slm_sseadd_mem" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "!none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "slm-fadd-3c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation  "slm_sseadd_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseadd")
+            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+  "slm-fadd-4c")
+
+;; Except dppd/dpps
+(define_insn_reservation  "slm_ssemul" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "!SF")))
+  "slm-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation  "slm_ssemul_2" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "SF")))
+  "slm-fmul-4c")
+
+(define_insn_reservation  "slm_ssecmp" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "ssecmp"))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_ssecomi" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "ssecomi"))
+  "slm-simple-0")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "slm_ssecvt" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "register_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand")))))
+  "slm-fp-0, slm-feu-0, nothing*3")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "slm_ssecvt_mem" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "memory_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand")))))
+"slm-fp-0, slm-feu-0, nothing*3")
+
+;; cvtpd2pi, cvtpi2pd
+(define_insn_reservation  "slm_ssecvt_1" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand"))
+                 (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V2DF 1 "register_operand")))))
+  "slm-fp-0, slm-feu-0")
+
+;; memory and cvtpd2pi, cvtpi2pd
+(define_insn_reservation  "slm_ssecvt_1_mem" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand"))
+                 (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V2DF 1 "memory_operand")))))
+  "slm-fp-0, slm-feu-0")
+
+;; otherwise. 4 cycles average for cvtss2sd
+(define_insn_reservation  "slm_ssecvt_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (not (ior (and (match_operand:V2SI 0 "register_operand")
+                           (match_operand:V4SF 1 "nonimmediate_operand"))
+                      (and (match_operand:V4SF 0 "register_operand")
+                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
+  "slm-fp-0, nothing*3")
+
+;; memory and cvtsi2sd
+(define_insn_reservation  "slm_sseicvt" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseicvt")
+            (and (match_operand:V2DF 0 "register_operand")
+                 (match_operand:SI 1 "nonimmediate_operand"))))
+  "slm-fp-0")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation  "slm_sseicvt_2" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseicvt")
+            (not (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:SI 1 "memory_operand")))))
+  "slm-fp-0, nothing*3")
+
+(define_insn_reservation  "slm_ssediv" 13
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "ssediv"))
+  "slm-fp-0, slm-feu-0*10, nothing*2")
+
+;; simple for fmov
+(define_insn_reservation  "slm_fmov" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+;; simple for fmov
+(define_insn_reservation  "slm_fmov_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+;; Define bypass here
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "slm_icmp, slm_test, slm_alu, slm_alu_carry,
+                  slm_alu1, slm_negnot, slm_incdec, slm_ishift,
+                  slm_ishift1, slm_rotate, slm_rotate1"
+                 "slm_icmov, slm_alu_carry")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "slm_lea"
+                 "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1"
+                 "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "slm_alu_carry,
+                  slm_alu,slm_alu1,slm_negnot,slm_imov,slm_imovx,
+                  slm_incdec,slm_ishift,slm_ishift1,slm_rotate,
+                  slm_rotate1, slm_setcc, slm_icmov, slm_pop,
+                  slm_alu_mem, slm_alu_carry_mem, slm_alu1_mem,
+                  slm_imovx_mem, slm_imovx_2_mem,
+                  slm_imov_mem, slm_icmov_mem, slm_fmov_mem"
+                 "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1,
+                  slm_ishift_mem, slm_ishift1_mem,
+                  slm_rotate_mem, slm_rotate1_mem"
+                 "ix86_dep_by_shift_count")
diff --git a/gcc-4.9/gcc/config/i386/smmintrin.h b/gcc-4.9/gcc/config/i386/smmintrin.h
new file mode 100644
index 000000000..886ace43f
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/smmintrin.h
@@ -0,0 +1,862 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 10.0.  */
+
+#ifndef _SMMINTRIN_H_INCLUDED
+#define _SMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
+   files.  */
+#include <tmmintrin.h>
+
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT	0x00
+#define _MM_FROUND_TO_NEG_INF		0x01
+#define _MM_FROUND_TO_POS_INF		0x02
+#define _MM_FROUND_TO_ZERO		0x03
+#define _MM_FROUND_CUR_DIRECTION	0x04
+
+#define _MM_FROUND_RAISE_EXC		0x00
+#define _MM_FROUND_NO_EXC		0x08
+
+#define _MM_FROUND_NINT		\
+  (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR	\
+  (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL		\
+  (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC	\
+  (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT		\
+  (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT	\
+  (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+
+/* Test Instruction */
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+   (__V & __M) == 0.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+   (__V & ~__M) == 0.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+   (__V & __M) != 0 && (__V & ~__M) != 0.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Macros for packed integer 128-bit comparison intrinsics.  */
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+#define _mm_test_all_ones(V) \
+  _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
+
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
+
+/* Packed/scalar double precision floating point rounding.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_pd (__m128d __V, const int __M)
+{
+  return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_sd(__m128d __D, __m128d __V, const int __M)
+{
+  return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
+					   (__v2df)__V,
+					   __M);
+}
+#else
+#define _mm_round_pd(V, M) \
+  ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
+
+#define _mm_round_sd(D, V, M)						\
+  ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D),		\
+				     (__v2df)(__m128d)(V), (int)(M)))
+#endif
+
+/* Packed/scalar single precision floating point rounding.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ps (__m128 __V, const int __M)
+{
+  return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ss (__m128 __D, __m128 __V, const int __M)
+{
+  return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
+					  (__v4sf)__V,
+					  __M);
+}
+#else
+#define _mm_round_ps(V, M) \
+  ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
+
+#define _mm_round_ss(D, V, M)						\
+  ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D),		\
+				    (__v4sf)(__m128)(V), (int)(M)))
+#endif
+
+/* Macros for ceil/floor intrinsics.  */
+#define _mm_ceil_pd(V)	   _mm_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(D, V)  _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_pd(V)	   _mm_round_pd((V), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
+
+#define _mm_ceil_ps(V)	   _mm_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(D, V)  _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(V)	   _mm_round_ps ((V), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
+
+/* SSE4.1 */
+
+/* Integer blend instructions - select data from 2 sources using
+   constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
+					      (__v8hi)__Y,
+					      __M);
+}
+#else
+#define _mm_blend_epi16(X, Y, M)					\
+  ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X),		\
+					(__v8hi)(__m128i)(Y), (int)(M)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
+{
+  return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
+					       (__v16qi)__Y,
+					       (__v16qi)__M);
+}
+
+/* Single precision floating point blend instructions - select data
+   from 2 sources using constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
+{
+  return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
+					  (__v4sf)__Y,
+					  __M);
+}
+#else
+#define _mm_blend_ps(X, Y, M)						\
+  ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X),		\
+				    (__v4sf)(__m128)(Y), (int)(M)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
+{
+  return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
+					   (__v4sf)__Y,
+					   (__v4sf)__M);
+}
+
+/* Double precision floating point blend instructions - select data
+   from 2 sources using constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
+{
+  return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
+					   (__v2df)__Y,
+					   __M);
+}
+#else
+#define _mm_blend_pd(X, Y, M)						\
+  ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X),		\
+				     (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
+{
+  return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
+					    (__v2df)__Y,
+					    (__v2df)__M);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+   of result.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
+{
+  return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
+				       (__v4sf)__Y,
+				       __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
+{
+  return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
+					(__v2df)__Y,
+					__M);
+}
+#else
+#define _mm_dp_ps(X, Y, M)						\
+  ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X),			\
+				 (__v4sf)(__m128)(Y), (int)(M)))
+
+#define _mm_dp_pd(X, Y, M)						\
+  ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),			\
+				  (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+   corresponding parts of result.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+}
+
+/*  Min/max packed integer instructions.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication with truncation of upper
+   halves of results.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication of 2 pairs of operands
+   with two 64-bit results.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Insert single precision float into packed single precision array
+   element selected by index N.  The bits [7-6] of N define S
+   index, the bits [5-4] define D index, and bits [3-0] define
+   zeroing mask for D.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
+{
+  return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
+					      (__v4sf)__S,
+					      __N);
+}
+#else
+#define _mm_insert_ps(D, S, N)						\
+  ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D),		\
+					(__v4sf)(__m128)(S), (int)(N)))
+#endif
+
+/* Helper macro to create the N value for _mm_insert_ps.  */
+#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
+
+/* Extract binary representation of single precision float from packed
+   single precision array element of X selected by index N.  */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_ps (__m128 __X, const int __N)
+{
+  union { int i; float f; } __tmp;
+  __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
+  return __tmp.i;
+}
+#else
+#define _mm_extract_ps(X, N)						\
+  (__extension__							\
+   ({									\
+     union { int i; float f; } __tmp;					\
+     __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
+     __tmp.i;								\
+   }))
+#endif
+
+/* Extract binary representation of single precision float into
+   D from packed single precision array element of S selected
+   by index N.  */
+#define _MM_EXTRACT_FLOAT(D, S, N) \
+  { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
+  
+/* Extract specified single precision float element into the lower
+   part of __m128.  */
+#define _MM_PICK_OUT_PS(X, N)				\
+  _mm_insert_ps (_mm_setzero_ps (), (X), 		\
+		 _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
+
+/* Insert integer, S, into packed integer array element of D
+   selected by index N.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i __D, int __S, const int __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
+						 __S, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i __D, int __S, const int __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
+						 __S, __N);
+}
+
+#ifdef __x86_64__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
+						 __S, __N);
+}
+#endif
+#else
+#define _mm_insert_epi8(D, S, N)					\
+  ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D),	\
+					   (int)(S), (int)(N)))
+
+#define _mm_insert_epi32(D, S, N)				\
+  ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D),	\
+					  (int)(S), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_insert_epi64(D, S, N)					\
+  ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D),		\
+					  (long long)(S), (int)(N)))
+#endif
+#endif
+
+/* Extract integer from packed integer array element of X selected by
+   index N.  */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi8 (__m128i __X, const int __N)
+{
+   return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi32 (__m128i __X, const int __N)
+{
+   return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi64 (__m128i __X, const int __N)
+{
+  return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
+}
+#endif
+#else
+#define _mm_extract_epi8(X, N) \
+  ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
+#define _mm_extract_epi32(X, N) \
+  ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) \
+  ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
+#endif
+#endif
+
+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minpos_epu16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
+}
+
+/* Packed integer sign-extension.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
+}
+
+/* Packed integer zero-extension. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
+}
+
+/* Pack 8 double words from 2 operands into 8 words of result with
+   unsigned saturation. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+   byte integers in the first 2 operands.  Starting offsets within
+   operands are determined by the 3rd mask operand.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
+					      (__v16qi)__Y, __M);
+}
+#else
+#define _mm_mpsadbw_epu8(X, Y, M)					\
+  ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X),		\
+					(__v16qi)(__m128i)(Y), (int)(M)))
+#endif
+
+/* Load double quadword using non-temporal aligned hint.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_load_si128 (__m128i *__X)
+{
+  return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
+}
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
+/* These macros specify the source data format.  */
+#define _SIDD_UBYTE_OPS			0x00
+#define _SIDD_UWORD_OPS			0x01
+#define _SIDD_SBYTE_OPS			0x02
+#define _SIDD_SWORD_OPS			0x03
+
+/* These macros specify the comparison operation.  */
+#define _SIDD_CMP_EQUAL_ANY		0x00
+#define _SIDD_CMP_RANGES		0x04
+#define _SIDD_CMP_EQUAL_EACH		0x08
+#define _SIDD_CMP_EQUAL_ORDERED		0x0c
+
+/* These macros specify the polarity.  */
+#define _SIDD_POSITIVE_POLARITY		0x00
+#define _SIDD_NEGATIVE_POLARITY		0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY	0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY	0x30
+
+/* These macros specify the output selection in _mm_cmpXstri ().  */
+#define _SIDD_LEAST_SIGNIFICANT		0x00
+#define _SIDD_MOST_SIGNIFICANT		0x40
+
+/* These macros specify the output selection in _mm_cmpXstrm ().  */
+#define _SIDD_BIT_MASK			0x00
+#define _SIDD_UNIT_MASK			0x40
+
+/* Intrinsics for text/string processing.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
+						(__v16qi)__Y,
+						__M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
+				      (__v16qi)__Y,
+				      __M);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
+						(__v16qi)__Y, __LY,
+						__M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
+				      (__v16qi)__Y, __LY,
+				      __M);
+}
+#else
+#define _mm_cmpistrm(X, Y, M)						\
+  ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X),	\
+					  (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistri(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X),		\
+				      (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M)					\
+  ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X),	\
+					  (int)(LX), (__v16qi)(__m128i)(Y), \
+					  (int)(LY), (int)(M)))
+#define _mm_cmpestri(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX),	\
+				      (__v16qi)(__m128i)(Y), (int)(LY),	\
+				      (int)(M)))
+#endif
+
+/* Intrinsics for text/string processing and reading values of
+   EFlags.  */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+#else
+#define _mm_cmpistra(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrc(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistro(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrs(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrz(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestra(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrc(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestro(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrs(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrz(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+   corresponding parts of result.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
+
+#include <popcntintrin.h>
+
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_1__ */
+
+/* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u8 (unsigned int __C, unsigned char __V)
+{
+  return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u16 (unsigned int __C, unsigned short __V)
+{
+  return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u32 (unsigned int __C, unsigned int __V)
+{
+  return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
+{
+  return __builtin_ia32_crc32di (__C, __V);
+}
+#endif
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
+
+#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/sol2-9.h b/gcc-4.9/gcc/config/i386/sol2-9.h
new file mode 100644
index 000000000..9ae88aae5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/sol2-9.h
@@ -0,0 +1,23 @@
+/* Target definitions for GCC for Intel 80386 running Solaris 9
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Solaris 9 only guarantees 4-byte stack alignment as required by the i386
+   psABI, so realign it as necessary for SSE instructions.  */
+#undef STACK_REALIGN_DEFAULT
+#define STACK_REALIGN_DEFAULT 1
diff --git a/gcc-4.9/gcc/config/i386/sol2-bi.h b/gcc-4.9/gcc/config/i386/sol2-bi.h
new file mode 100644
index 000000000..66d17801f
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/sol2-bi.h
@@ -0,0 +1,109 @@
+/* Definitions of target machine for GCC, for bi-arch Solaris 2/x86.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Override i386/sol2.h version: return 8-byte vectors in MMX registers if
+   possible, matching Sun Studio 12 Update 1+ compilers and other x86
+   targets.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
+
+#define SUBTARGET_OPTIMIZATION_OPTIONS				\
+  { OPT_LEVELS_1_PLUS, OPT_momit_leaf_frame_pointer, NULL, 1 }
+
+/* GNU as understands --32 and --64, but the native Solaris
+   assembler requires -xarch=generic or -xarch=generic64 instead.  */
+#ifdef USE_GAS
+#define ASM_CPU32_DEFAULT_SPEC "--32"
+#define ASM_CPU64_DEFAULT_SPEC "--64"
+#else
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=generic"
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64"
+#endif
+
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC "%(asm_cpu_default)"
+
+/* Don't let i386/x86-64.h override i386/sol2.h version.  Still cannot use
+   -K PIC with the Solaris 10+ assembler, it gives many warnings:
+	Absolute relocation is used for symbol "<symbol>"  */
+#undef ASM_SPEC
+#define ASM_SPEC ASM_SPEC_BASE
+
+/* We do not need to search a special directory for startup files.  */
+#undef MD_STARTFILE_PREFIX
+
+#define DEFAULT_ARCH32_P !TARGET_64BIT_DEFAULT
+
+#define ARCH64_SUBDIR "amd64"
+
+#ifdef USE_GLD
+/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
+   follow the Solaris 2 ABI.  Prefer them if present.  */
+#ifdef HAVE_LD_SOL2_EMULATION
+#define ARCH32_EMULATION "elf_i386_sol2"
+#define ARCH64_EMULATION "elf_x86_64_sol2"
+#else
+#define ARCH32_EMULATION "elf_i386"
+#define ARCH64_EMULATION "elf_x86_64"
+#endif
+#endif
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "/"
+
+/* The native Solaris assembler can't calculate the difference between
+   symbols in different sections, which causes problems for -fPIC jump
+   tables in .rodata.  */
+#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA
+#undef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but
+   requires SYMBOL@rel/@rel64 instead.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    assemble_name (FILE, LABEL);			\
+    fputs (SIZE == 8 ? "@rel64" : "@rel", FILE);	\
+  } while (0)
+#endif
+
+/* As in sol2.h, override the default from i386/x86-64.h to work around
+   Sun as TLS bug.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_SUN_TLS						\
+	  && in_section							\
+	  && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS))	\
+	switch_to_section (bss_section);				\
+      x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN);			\
+    }									\
+  while  (0)
+
+#define USE_IX86_FRAME_POINTER 1
+#define USE_X86_64_FRAME_POINTER 1
+
+#undef NO_PROFILE_COUNTERS
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "_mcount"
diff --git a/gcc-4.9/gcc/config/i386/sol2.h b/gcc-4.9/gcc/config/i386/sol2.h
new file mode 100644
index 000000000..8a21a5910
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/sol2.h
@@ -0,0 +1,189 @@
+/* Target definitions for GCC for Intel 80386 running Solaris 2
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Fred Fish (fnf@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Augment i386/unix.h version to return 8-byte vectors in memory, matching
+   Sun Studio compilers until version 12, the only ones supported on
+   Solaris 9.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_VECT8_RETURNS)
+
+/* Old versions of the Solaris assembler can not handle the difference of
+   labels in different sections, so force DW_EH_PE_datarel.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (flag_pic ? ((GLOBAL ? DW_EH_PE_indirect : 0)				\
+	       | (TARGET_64BIT ? DW_EH_PE_pcrel | DW_EH_PE_sdata4	\
+		  : DW_EH_PE_datarel))					\
+   : DW_EH_PE_absptr)
+
+/* The Solaris linker will not merge a read-only .eh_frame section
+   with a read-write .eh_frame section.  None of the encodings used
+   with non-PIC code require runtime relocations.  In 64-bit mode,
+   since there is no backwards compatibility issue, we use a read-only
+   section for .eh_frame.  In 32-bit mode, we use a writable .eh_frame
+   section in order to be compatible with G++ for Solaris x86.  */
+#undef EH_TABLES_CAN_BE_READ_ONLY
+#define EH_TABLES_CAN_BE_READ_ONLY (TARGET_64BIT)
+
+/* Follow Sun requirements for TLS code sequences and use Sun assembler TLS
+   syntax.  */
+#undef TARGET_SUN_TLS
+#define TARGET_SUN_TLS 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Solaris 2/Intel as chokes on #line directives before Solaris 10.  */
+#undef CPP_SPEC
+#define CPP_SPEC "%{,assembler-with-cpp:-P} %(cpp_subtarget)"
+
+#define ASM_CPU_DEFAULT_SPEC ""
+
+#define ASM_CPU_SPEC ""
+ 
+/* Don't include ASM_PIC_SPEC.  While the Solaris 9 assembler accepts
+   -K PIC, it gives many warnings:
+	R_386_32 relocation is used for symbol "<symbol>"
+   GNU as doesn't recognize -K at all.  */
+#undef ASM_SPEC
+#define ASM_SPEC ASM_SPEC_BASE
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{mpc32:crtprec32.o%s} \
+   %{mpc64:crtprec64.o%s} \
+   %{mpc80:crtprec80.o%s} \
+   crtend.o%s crtn.o%s"
+
+#define SUBTARGET_CPU_EXTRA_SPECS \
+  { "cpp_subtarget",	 CPP_SUBTARGET_SPEC },		\
+  { "asm_cpu",		 ASM_CPU_SPEC },		\
+  { "asm_cpu_default",	 ASM_CPU_DEFAULT_SPEC },	\
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "startfile_arch",	STARTFILE_ARCH_SPEC },		\
+  { "link_arch",	LINK_ARCH_SPEC },		\
+  SUBTARGET_CPU_EXTRA_SPECS
+
+/* Register the Solaris-specific #pragma directives.  */
+#define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas ()
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The 32-bit Solaris assembler does not support .quad.  Do not use it.  */
+#ifndef HAVE_AS_IX86_QUAD
+#undef ASM_QUAD
+#endif
+
+/* The Solaris assembler wants a .local for non-exported aliases.  */
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)	\
+  do {							\
+    const char *declname =				\
+      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));	\
+    ASM_OUTPUT_DEF ((FILE), declname,			\
+		    IDENTIFIER_POINTER (TARGET));	\
+    if (! TREE_PUBLIC (DECL))				\
+      {							\
+	fprintf ((FILE), "%s", LOCAL_ASM_OP);		\
+	assemble_name ((FILE), declname);		\
+	fprintf ((FILE), "\n");				\
+      }							\
+  } while (0)
+
+#ifndef USE_GAS
+/* The Sun assembler uses .tcomm for TLS common sections.  */
+#define TLS_COMMON_ASM_OP ".tcomm"
+
+/* Similar to the Sun assembler on SPARC, the native assembler requires
+   TLS objects to be declared as @tls_obj (not @tls_object).  Unlike SPARC,
+   gas doesn't understand this variant.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+								\
+      if (targetm.have_tls && DECL_THREAD_LOCAL_P (DECL))	\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "tls_obj");	\
+      else							\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");	\
+								\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+								\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+#endif /* !USE_GAS */
+
+/* Output a simple call for .init/.fini.  */
+#define ASM_OUTPUT_CALL(FILE, FN)				\
+  do								\
+    {								\
+      fprintf (FILE, "\tcall\t");				\
+      ix86_print_operand (FILE, XEXP (DECL_RTL (FN), 0), 'P');	\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION i386_solaris_elf_named_section
+
+#ifndef USE_GAS
+/* Emit COMDAT group signature symbols for Sun as.  */
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END solaris_file_end
+#endif
+
+/* Unlike GNU ld, Sun ld doesn't coalesce .ctors.N/.dtors.N sections, so
+   inhibit their creation.  Also cf. sparc/sysv4.h.  */
+#ifndef USE_GLD
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors, \"aw\""
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors, \"aw\""
+#endif
+
+/* We do not need NT_VERSION notes.  */
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE false
+
+/* Only recent versions of Solaris 11 ld properly support hidden .gnu.linkonce
+   sections, so don't use them.  */
+#ifndef USE_GLD
+#define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
new file mode 100644
index 000000000..f30b27e86
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -0,0 +1,15507 @@
+;; GCC machine description for SSE instructions
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  ;; SSE
+  UNSPEC_MOVNT
+  UNSPEC_LOADU
+  UNSPEC_STOREU
+
+  ;; SSE3
+  UNSPEC_LDDQU
+
+  ;; SSSE3
+  UNSPEC_PSHUFB
+  UNSPEC_PSIGN
+  UNSPEC_PALIGNR
+
+  ;; For SSE4A support
+  UNSPEC_EXTRQI
+  UNSPEC_EXTRQ
+  UNSPEC_INSERTQI
+  UNSPEC_INSERTQ
+
+  ;; For SSE4.1 support
+  UNSPEC_BLENDV
+  UNSPEC_INSERTPS
+  UNSPEC_DP
+  UNSPEC_MOVNTDQA
+  UNSPEC_MPSADBW
+  UNSPEC_PHMINPOSUW
+  UNSPEC_PTEST
+
+  ;; For SSE4.2 support
+  UNSPEC_PCMPESTR
+  UNSPEC_PCMPISTR
+
+  ;; For FMA4 support
+  UNSPEC_FMADDSUB
+  UNSPEC_XOP_UNSIGNED_CMP
+  UNSPEC_XOP_TRUEFALSE
+  UNSPEC_XOP_PERMUTE
+  UNSPEC_FRCZ
+
+  ;; For AES support
+  UNSPEC_AESENC
+  UNSPEC_AESENCLAST
+  UNSPEC_AESDEC
+  UNSPEC_AESDECLAST
+  UNSPEC_AESIMC
+  UNSPEC_AESKEYGENASSIST
+
+  ;; For PCLMUL support
+  UNSPEC_PCLMUL
+
+  ;; For AVX support
+  UNSPEC_PCMP
+  UNSPEC_VPERMIL
+  UNSPEC_VPERMIL2
+  UNSPEC_VPERMIL2F128
+  UNSPEC_CAST
+  UNSPEC_VTESTP
+  UNSPEC_VCVTPH2PS
+  UNSPEC_VCVTPS2PH
+
+  ;; For AVX2 support
+  UNSPEC_VPERMVAR
+  UNSPEC_VPERMTI
+  UNSPEC_GATHER
+  UNSPEC_VSIBADDR
+
+  ;; For AVX512F support
+  UNSPEC_VPERMI2
+  UNSPEC_VPERMT2
+  UNSPEC_VPERMI2_MASK
+  UNSPEC_UNSIGNED_FIX_NOTRUNC
+  UNSPEC_UNSIGNED_PCMP
+  UNSPEC_TESTM
+  UNSPEC_TESTNM
+  UNSPEC_SCATTER
+  UNSPEC_RCP14
+  UNSPEC_RSQRT14
+  UNSPEC_FIXUPIMM
+  UNSPEC_SCALEF
+  UNSPEC_VTERNLOG
+  UNSPEC_GETEXP
+  UNSPEC_GETMANT
+  UNSPEC_ALIGN
+  UNSPEC_CONFLICT
+  UNSPEC_COMPRESS
+  UNSPEC_COMPRESS_STORE
+  UNSPEC_EXPAND
+  UNSPEC_MASKED_EQ
+  UNSPEC_MASKED_GT
+
+  ;; For embed. rounding feature
+  UNSPEC_EMBEDDED_ROUNDING
+
+  ;; For AVX512PF support
+  UNSPEC_GATHER_PREFETCH
+  UNSPEC_SCATTER_PREFETCH
+
+  ;; For AVX512ER support
+  UNSPEC_EXP2
+  UNSPEC_RCP28
+  UNSPEC_RSQRT28
+
+  ;; For SHA support
+  UNSPEC_SHA1MSG1
+  UNSPEC_SHA1MSG2
+  UNSPEC_SHA1NEXTE
+  UNSPEC_SHA1RNDS4
+  UNSPEC_SHA256MSG1
+  UNSPEC_SHA256MSG2
+  UNSPEC_SHA256RNDS2
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_LDMXCSR
+  UNSPECV_STMXCSR
+  UNSPECV_CLFLUSH
+  UNSPECV_MONITOR
+  UNSPECV_MWAIT
+  UNSPECV_VZEROALL
+  UNSPECV_VZEROUPPER
+])
+
+;; All vector modes including V?TImode, used in move patterns.
+(define_mode_iterator VMOVE
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V2TI "TARGET_AVX") V1TI
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
+
+;; All vector modes
+(define_mode_iterator V
+  [(V32QI "TARGET_AVX") V16QI
+   (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
+;; All 128bit vector modes
+(define_mode_iterator V_128
+  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
+
+;; All 256bit vector modes
+(define_mode_iterator V_256
+  [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; All 512bit vector modes
+(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
+
+;; All 256bit and 512bit vector modes
+(define_mode_iterator V_256_512
+  [V32QI V16HI V8SI V4DI V8SF V4DF
+   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+
+;; All vector float modes
+(define_mode_iterator VF
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
+;; 128- and 256-bit float vector modes
+(define_mode_iterator VF_128_256
+  [(V8SF "TARGET_AVX") V4SF
+   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
+;; All SFmode vector float modes
+(define_mode_iterator VF1
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
+
+;; 128- and 256-bit SF vector modes
+(define_mode_iterator VF1_128_256
+  [(V8SF "TARGET_AVX") V4SF])
+
+;; All DFmode vector float modes
+(define_mode_iterator VF2
+  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+
+;; 128- and 256-bit DF vector modes
+(define_mode_iterator VF2_128_256
+  [(V4DF "TARGET_AVX") V2DF])
+
+(define_mode_iterator VF2_512_256
+  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
+
+;; All 128bit vector float modes
+(define_mode_iterator VF_128
+  [V4SF (V2DF "TARGET_SSE2")])
+
+;; All 256bit vector float modes
+(define_mode_iterator VF_256
+  [V8SF V4DF])
+
+;; All 512bit vector float modes
+(define_mode_iterator VF_512
+  [V16SF V8DF])
+
+;; All vector integer modes
+(define_mode_iterator VI
+  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V32QI "TARGET_AVX") V16QI
+   (V16HI "TARGET_AVX") V8HI
+   (V8SI "TARGET_AVX") V4SI
+   (V4DI "TARGET_AVX") V2DI])
+
+(define_mode_iterator VI_AVX2
+  [(V32QI "TARGET_AVX2") V16QI
+   (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+;; All QImode vector integer modes
+(define_mode_iterator VI1
+  [(V32QI "TARGET_AVX") V16QI])
+
+(define_mode_iterator VI_UNALIGNED_LOADSTORE
+  [(V32QI "TARGET_AVX") V16QI
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+
+;; All DImode vector integer modes
+(define_mode_iterator VI8
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
+
+(define_mode_iterator VI1_AVX2
+  [(V32QI "TARGET_AVX2") V16QI])
+
+(define_mode_iterator VI2_AVX2
+  [(V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI2_AVX512F
+  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI4_AVX
+  [(V8SI "TARGET_AVX") V4SI])
+
+(define_mode_iterator VI4_AVX2
+  [(V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI4_AVX512F
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI48_AVX512F
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F")])
+
+(define_mode_iterator VI8_AVX2
+  [(V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VI8_AVX2_AVX512F
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+;; All V8D* modes
+(define_mode_iterator V8FI
+  [V8DF V8DI])
+
+;; All V16S* modes
+(define_mode_iterator V16FI
+  [V16SF V16SI])
+
+;; ??? We should probably use TImode instead.
+(define_mode_iterator VIMAX_AVX2
+  [(V2TI "TARGET_AVX2") V1TI])
+
+;; ??? This should probably be dropped in favor of VIMAX_AVX2.
+(define_mode_iterator SSESCALARMODE
+  [(V2TI "TARGET_AVX2") TI])
+
+(define_mode_iterator VI12_AVX2
+  [(V32QI "TARGET_AVX2") V16QI
+   (V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI24_AVX2
+  [(V16HI "TARGET_AVX2") V8HI
+   (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI124_AVX2_48_AVX512F
+  [(V32QI "TARGET_AVX2") V16QI
+   (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F")])
+
+(define_mode_iterator VI124_AVX512F
+  [(V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI124_AVX2
+  [(V32QI "TARGET_AVX2") V16QI
+   (V16HI "TARGET_AVX2") V8HI
+   (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI248_AVX2
+  [(V16HI "TARGET_AVX2") V8HI
+   (V8SI "TARGET_AVX2") V4SI
+   (V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VI248_AVX2_8_AVX512F
+  [(V16HI "TARGET_AVX2") V8HI
+   (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VI48_AVX2_48_AVX512F
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator V48_AVX2
+  [V4SF V2DF
+   V8SF V4DF
+   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
+   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+
+(define_mode_attr sse2_avx_avx512f
+  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+   (V4SI  "sse2") (V8SI  "avx") (V16SI "avx512f")
+   (V8DI "avx512f")
+   (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
+   (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
+
+(define_mode_attr sse2_avx2
+  [(V16QI "sse2") (V32QI "avx2")
+   (V8HI "sse2") (V16HI "avx2")
+   (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
+   (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
+   (V1TI "sse2") (V2TI "avx2")])
+
+(define_mode_attr ssse3_avx2
+   [(V16QI "ssse3") (V32QI "avx2")
+    (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
+    (V4SI "ssse3") (V8SI "avx2")
+    (V2DI "ssse3") (V4DI "avx2")
+    (TI "ssse3") (V2TI "avx2")])
+
+(define_mode_attr sse4_1_avx2
+   [(V16QI "sse4_1") (V32QI "avx2")
+    (V8HI "sse4_1") (V16HI "avx2")
+    (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
+    (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
+
+(define_mode_attr avx_avx2
+  [(V4SF "avx") (V2DF "avx")
+   (V8SF "avx") (V4DF "avx")
+   (V4SI "avx2") (V2DI "avx2")
+   (V8SI "avx2") (V4DI "avx2")])
+
+(define_mode_attr vec_avx2
+  [(V16QI "vec") (V32QI "avx2")
+   (V8HI "vec") (V16HI "avx2")
+   (V4SI "vec") (V8SI "avx2")
+   (V2DI "vec") (V4DI "avx2")])
+
+(define_mode_attr avx2_avx512f
+  [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
+   (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
+   (V8SF "avx2") (V16SF "avx512f")
+   (V4DF "avx2") (V8DF "avx512f")])
+
+(define_mode_attr shuffletype
+  [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
+  (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
+  (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
+  (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
+  (V64QI "i") (V1TI "i") (V2TI "i")])
+
+(define_mode_attr ssequartermode
+  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
+
+(define_mode_attr ssedoublemode
+  [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
+   (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
+   (V32QI "V32HI") (V16QI "V16HI")])
+
+(define_mode_attr ssebytemode
+  [(V4DI "V32QI") (V2DI "V16QI")])
+
+;; All 128bit vector integer modes
+(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
+
+;; All 256bit vector integer modes
+(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
+
+;; All 512bit vector integer modes
+(define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
+
+;; Various 128bit vector integer mode combinations
+(define_mode_iterator VI12_128 [V16QI V8HI])
+(define_mode_iterator VI14_128 [V16QI V4SI])
+(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
+(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
+(define_mode_iterator VI24_128 [V8HI V4SI])
+(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
+(define_mode_iterator VI48_128 [V4SI V2DI])
+
+;; Various 256bit and 512 vector integer mode combinations
+(define_mode_iterator VI124_256_48_512
+  [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
+(define_mode_iterator VI48_256 [V8SI V4DI])
+(define_mode_iterator VI48_512 [V16SI V8DI])
+(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
+
+;; Int-float size matches
+(define_mode_iterator VI4F_128 [V4SI V4SF])
+(define_mode_iterator VI8F_128 [V2DI V2DF])
+(define_mode_iterator VI4F_256 [V8SI V8SF])
+(define_mode_iterator VI8F_256 [V4DI V4DF])
+(define_mode_iterator VI8F_256_512
+  [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+(define_mode_iterator VI48F_256_512
+  [V8SI V8SF
+  (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+  (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")])
+(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
+
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse
+  [(SF "sse") (DF "sse2")
+   (V4SF "sse") (V2DF "sse2")
+   (V16SF "avx512f") (V8SF "avx")
+   (V8DF "avx512f") (V4DF "avx")])
+
+(define_mode_attr sse2
+  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+   (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
+
+(define_mode_attr sse3
+  [(V16QI "sse3") (V32QI "avx")])
+
+(define_mode_attr sse4_1
+  [(V4SF "sse4_1") (V2DF "sse4_1")
+   (V8SF "avx") (V4DF "avx")
+   (V8DF "avx512f")])
+
+(define_mode_attr avxsizesuffix
+  [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
+   (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
+   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
+   (V16SF "512") (V8DF "512")
+   (V8SF "256") (V4DF "256")
+   (V4SF "") (V2DF "")])
+
+;; SSE instruction mode
+(define_mode_attr sseinsnmode
+  [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
+   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
+   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
+   (V16SF "V16SF") (V8DF "V8DF")
+   (V8SF "V8SF") (V4DF "V4DF")
+   (V4SF "V4SF") (V2DF "V2DF")
+   (TI "TI")])
+
+;; Mapping of vector modes to corresponding mask size
+(define_mode_attr avx512fmaskmode
+  [(V16QI "HI")
+   (V16HI "HI") (V8HI  "QI")
+   (V16SI "HI") (V8SI  "QI") (V4SI  "QI")
+   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
+   (V16SF "HI") (V8SF  "QI") (V4SF  "QI")
+   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
+
+;; Mapping of vector float modes to an integer mode of the same size
+(define_mode_attr sseintvecmode
+  [(V16SF "V16SI") (V8DF  "V8DI")
+   (V8SF  "V8SI")  (V4DF  "V4DI")
+   (V4SF  "V4SI")  (V2DF  "V2DI")
+   (V16SI "V16SI") (V8DI  "V8DI")
+   (V8SI  "V8SI")  (V4DI  "V4DI")
+   (V4SI  "V4SI")  (V2DI  "V2DI")
+   (V16HI "V16HI") (V8HI  "V8HI")
+   (V32QI "V32QI") (V16QI "V16QI")])
+
+(define_mode_attr sseintvecmodelower
+  [(V16SF "v16si")
+   (V8SF "v8si") (V4DF "v4di")
+   (V4SF "v4si") (V2DF "v2di")
+   (V8SI "v8si") (V4DI "v4di")
+   (V4SI "v4si") (V2DI "v2di")
+   (V16HI "v16hi") (V8HI "v8hi")
+   (V32QI "v32qi") (V16QI "v16qi")])
+
+;; Mapping of vector modes to a vector mode of double size
+(define_mode_attr ssedoublevecmode
+  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
+   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
+   (V8SF "V16SF") (V4DF "V8DF")
+   (V4SF "V8SF") (V2DF "V4DF")])
+
+;; Mapping of vector modes to a vector mode of half size
+(define_mode_attr ssehalfvecmode
+  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
+   (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
+   (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
+   (V16SF "V8SF") (V8DF "V4DF")
+   (V8SF  "V4SF") (V4DF "V2DF")
+   (V4SF  "V2SF")])
+
+;; Mapping of vector modes ti packed single mode of the same size
+(define_mode_attr ssePSmode
+  [(V16SI "V16SF") (V8DF "V16SF")
+   (V16SF "V16SF") (V8DI "V16SF")
+   (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
+   (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
+   (V8SI "V8SF") (V4SI "V4SF")
+   (V4DI "V8SF") (V2DI "V4SF")
+   (V2TI "V8SF") (V1TI "V4SF")
+   (V8SF "V8SF") (V4SF "V4SF")
+   (V4DF "V8SF") (V2DF "V4SF")])
+
+;; Mapping of vector modes back to the scalar modes
+(define_mode_attr ssescalarmode
+  [(V64QI "QI") (V32QI "QI") (V16QI "QI")
+   (V32HI "HI") (V16HI "HI") (V8HI "HI")
+   (V16SI "SI") (V8SI "SI")  (V4SI "SI")
+   (V8DI "DI")  (V4DI "DI")  (V2DI "DI")
+   (V16SF "SF") (V8SF "SF")  (V4SF "SF")
+   (V8DF "DF")  (V4DF "DF")  (V2DF "DF")])
+
+;; Mapping of vector modes to the 128bit modes
+(define_mode_attr ssexmmmode
+  [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
+   (V32HI "V8HI")  (V16HI "V8HI") (V8HI "V8HI")
+   (V16SI "V4SI")  (V8SI "V4SI")  (V4SI "V4SI")
+   (V8DI "V2DI")   (V4DI "V2DI")  (V2DI "V2DI")
+   (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
+   (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])
+
+;; Pointer size override for scalar modes (Intel asm dialect)
+(define_mode_attr iptr
+  [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
+   (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
+   (V8SF "k") (V4DF "q")
+   (V4SF "k") (V2DF "q")
+   (SF "k") (DF "q")])
+
+;; Number of scalar elements in each vector type
+(define_mode_attr ssescalarnum
+  [(V64QI "64") (V16SI "16") (V8DI "8")
+   (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
+   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
+   (V16SF "16") (V8DF "8")
+   (V8SF "8") (V4DF "4")
+   (V4SF "4") (V2DF "2")])
+
+;; Mask of scalar elements in each vector type
+(define_mode_attr ssescalarnummask
+  [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
+   (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
+   (V8SF "7") (V4DF "3")
+   (V4SF "3") (V2DF "1")])
+
+(define_mode_attr ssescalarsize
+  [(V8DI  "64") (V4DI  "64") (V2DI  "64")
+   (V32HI "16") (V16HI "16") (V8HI "16")
+   (V16SI "32") (V8SI "32") (V4SI "32")
+   (V16SF "32") (V8DF "64")])
+
+;; SSE prefix for integer vector modes
+(define_mode_attr sseintprefix
+  [(V2DI  "p") (V2DF  "")
+   (V4DI  "p") (V4DF  "")
+   (V8DI  "p") (V8DF  "")
+   (V4SI  "p") (V4SF  "")
+   (V8SI  "p") (V8SF  "")
+   (V16SI "p") (V16SF "")])
+
+;; SSE scalar suffix for vector modes
+(define_mode_attr ssescalarmodesuffix
+  [(SF "ss") (DF "sd")
+   (V8SF "ss") (V4DF "sd")
+   (V4SF "ss") (V2DF "sd")
+   (V8SI "ss") (V4DI "sd")
+   (V4SI "d")])
+
+;; Pack/unpack vector modes
+(define_mode_attr sseunpackmode
+  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
+   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
+   (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
+
+(define_mode_attr ssepackmode
+  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
+   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
+   (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
+
+;; Mapping of the max integer size for xop rotate immediate constraint
+(define_mode_attr sserotatemax
+  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
+;; Mapping of mode to cast intrinsic name
+(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
+
+;; Instruction suffix for sign and zero extensions.
+(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
+
+;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
+;; i64x4 or f64x4 for 512bit modes.
+(define_mode_attr i128
+  [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
+   (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
+   (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
+
+;; Mix-n-match
+(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits
+  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+
+;; Mapping suffixes for broadcast
+(define_mode_attr bcstscalarsuff
+  [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
+
+;; Include define_subst patterns for instructions with mask
+(include "subst.md")
+
+;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for SSE1 as well as SSE2.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+	(match_operand:VMOVE 1 "nonimmediate_operand"))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand"               "=v,v ,m")
+	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"  "C ,vm,v"))]
+  "TARGET_SSE
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  int mode = get_attr_mode (insn);
+  switch (which_alternative)
+    {
+    case 0:
+      return standard_sse_constant_opcode (insn, operands[1]);
+    case 1:
+    case 2:
+      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+	 in avx512f, so we need to use workarounds, to access sse registers
+	 16-31, which are evex-only.  */
+      if (TARGET_AVX512F && <MODE_SIZE> < 64
+	  && ((REG_P (operands[0])
+	       && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
+	      || (REG_P (operands[1])
+		  && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
+	{
+	  if (memory_operand (operands[0], <MODE>mode))
+	    {
+	      if (<MODE_SIZE> == 32)
+		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+	      else if (<MODE_SIZE> == 16)
+		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+	      else
+		gcc_unreachable ();
+	    }
+	  else if (memory_operand (operands[1], <MODE>mode))
+	    {
+	      if (<MODE_SIZE> == 32)
+		return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
+	      else if (<MODE_SIZE> == 16)
+		return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    /* Reg -> reg move is always aligned.  Just use wider move.  */
+	    switch (mode)
+	      {
+	      case MODE_V8SF:
+	      case MODE_V4SF:
+		return "vmovaps\t{%g1, %g0|%g0, %g1}";
+	      case MODE_V4DF:
+	      case MODE_V2DF:
+		return "vmovapd\t{%g1, %g0|%g0, %g1}";
+	      case MODE_OI:
+	      case MODE_TI:
+		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+	      default:
+		gcc_unreachable ();
+	      }
+	}
+      switch (mode)
+	{
+	case MODE_V16SF:
+	case MODE_V8SF:
+	case MODE_V4SF:
+	  if (TARGET_AVX
+	      && (misaligned_operand (operands[0], <MODE>mode)
+		  || misaligned_operand (operands[1], <MODE>mode)))
+	    return "vmovups\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+
+	case MODE_V8DF:
+	case MODE_V4DF:
+	case MODE_V2DF:
+	  if (TARGET_AVX
+	      && (misaligned_operand (operands[0], <MODE>mode)
+		  || misaligned_operand (operands[1], <MODE>mode)))
+	    return "vmovupd\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovapd\t{%1, %0|%0, %1}";
+
+	case MODE_OI:
+	case MODE_TI:
+	  if (TARGET_AVX
+	      && (misaligned_operand (operands[0], <MODE>mode)
+		  || misaligned_operand (operands[1], <MODE>mode)))
+	    return "vmovdqu\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
+	    return "vmovdqu64\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqa64\t{%1, %0|%0, %1}";
+
+	default:
+	  gcc_unreachable ();
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (and (match_test "<MODE_SIZE> == 16")
+		    (and (eq_attr "alternative" "2")
+			 (match_test "TARGET_SSE_TYPELESS_STORES")))
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<sseinsnmode>")
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
+		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "0")
+		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
+		 (const_string "TI")
+	      ]
+	      (const_string "<sseinsnmode>")))])
+
+(define_insn "avx512f_load<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+	(vec_merge:VI48F_512
+	  (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+	  (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+{
+  switch (MODE_<sseinsnmode>)
+    {
+    case MODE_V8DF:
+    case MODE_V16SF:
+      if (misaligned_operand (operands[1], <MODE>mode))
+	return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+      return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+    default:
+      if (misaligned_operand (operands[1], <MODE>mode))
+	return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+      return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_blendm<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_merge:VI48F_512
+	  (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
+	  (match_operand:VI48F_512 1 "register_operand" "v")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_store<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+	(vec_merge:VI48F_512
+	  (match_operand:VI48F_512 1 "register_operand" "v")
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+{
+  switch (MODE_<sseinsnmode>)
+    {
+    case MODE_V8DF:
+    case MODE_V16SF:
+      return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    default:
+      return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse2_movq128"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_concat:V2DI
+	  (vec_select:DI
+	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)]))
+	  (const_int 0)))]
+  "TARGET_SSE2"
+  "%vmovq\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
+;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
+;; from memory, we'd prefer to load the memory directly into the %xmm
+;; register.  To facilitate this happy circumstance, this pattern won't
+;; split until after register allocation.  If the 64-bit value didn't
+;; come from memory, this is the best we can do.  This is much better
+;; than storing %edx:%eax into a stack temporary and loading an %xmm
+;; from there.
+
+(define_insn_and_split "movdi_to_sse"
+  [(parallel
+    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
+	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
+     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+ if (register_operand (operands[1], DImode))
+   {
+      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+	 Assemble the 64-bit DImode value in an xmm register.  */
+      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
+				  gen_rtx_SUBREG (SImode, operands[1], 0)));
+      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
+				  gen_rtx_SUBREG (SImode, operands[1], 4)));
+      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
+					     operands[2]));
+   }
+ else if (memory_operand (operands[1], DImode))
+   {
+     rtx tmp = gen_reg_rtx (V2DImode);
+     emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
+     emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
+   }
+ else
+   gcc_unreachable ();
+})
+
+(define_split
+  [(set (match_operand:V4SF 0 "register_operand")
+	(match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
+  "TARGET_SSE && reload_completed"
+  [(set (match_dup 0)
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF (match_dup 1))
+	  (match_dup 2)
+	  (const_int 1)))]
+{
+  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+  operands[2] = CONST0_RTX (V4SFmode);
+})
+
+(define_split
+  [(set (match_operand:V2DF 0 "register_operand")
+	(match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
+{
+  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+  operands[2] = CONST0_RTX (DFmode);
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+	(match_operand:VMOVE 1 "nonimmediate_operand"))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move_misalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE && <mask_mode512bit_condition>"
+{
+  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+     just fine if misaligned_operand is true, and without the UNSPEC it can
+     be combined with arithmetic instructions.  If misaligned_operand is
+     false, still emit UNSPEC_LOADU insn to honor user's request for
+     misaligned load.  */
+  if (TARGET_AVX
+      && misaligned_operand (operands[1], <MODE>mode))
+    {
+      rtx src = operands[1];
+      if (<mask_applied>)
+	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
+				 operands[2 * <mask_applied>],
+				 operands[3 * <mask_applied>]);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
+      DONE;
+    }
+})
+
+(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+  [(set (match_operand:VF 0 "register_operand" "=v")
+	(unspec:VF
+	  [(match_operand:VF 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE && <mask_mode512bit_condition>"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V16SF:
+    case MODE_V8SF:
+    case MODE_V4SF:
+      return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+    default:
+      return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<MODE>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	      ]
+	      (const_string "<MODE>")))])
+
+(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:VF 0 "memory_operand" "=m")
+	(unspec:VF
+	  [(match_operand:VF 1 "register_operand" "v")]
+	  UNSPEC_STOREU))]
+  "TARGET_SSE"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V16SF:
+    case MODE_V8SF:
+    case MODE_V4SF:
+      return "%vmovups\t{%1, %0|%0, %1}";
+    default:
+      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(and (match_test "<MODE_SIZE> == 16")
+                    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                         (match_test "TARGET_SSE_TYPELESS_STORES")))
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<MODE>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	      ]
+	      (const_string "<MODE>")))])
+
+(define_insn "avx512f_storeu<ssemodesuffix>512_mask"
+  [(set (match_operand:VF_512 0 "memory_operand" "=m")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V16SF:
+      return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    default:
+      return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+{
+  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+     just fine if misaligned_operand is true, and without the UNSPEC it can
+     be combined with arithmetic instructions.  If misaligned_operand is
+     false, still emit UNSPEC_LOADU insn to honor user's request for
+     misaligned load.  */
+  if (TARGET_AVX
+      && misaligned_operand (operands[1], <MODE>mode))
+    {
+      rtx src = operands[1];
+      if (<mask_applied>)
+	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
+				 operands[2 * <mask_applied>],
+				 operands[3 * <mask_applied>]);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
+      DONE;
+    }
+})
+
+(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      return "%vmovups\t{%1, %0|%0, %1}";
+    case MODE_XI:
+      if (<MODE>mode == V8DImode)
+	return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+      else
+	return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+    default:
+      return "%vmovdqu\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<sseinsnmode>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+	         (const_string "V4SF")
+	      ]
+	      (const_string "<sseinsnmode>")))])
+
+(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
+	  UNSPEC_STOREU))]
+  "TARGET_SSE2"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V16SF:
+    case MODE_V8SF:
+    case MODE_V4SF:
+      return "%vmovups\t{%1, %0|%0, %1}";
+    case MODE_XI:
+      if (<MODE>mode == V8DImode)
+	return "vmovdqu64\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqu32\t{%1, %0|%0, %1}";
+    default:
+      return "%vmovdqu\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(and (match_test "<MODE_SIZE> == 16")
+		    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+			 (match_test "TARGET_SSE_TYPELESS_STORES")))
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<sseinsnmode>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+	         (const_string "V4SF")
+	      ]
+	      (const_string "<sseinsnmode>")))])
+
+(define_insn "avx512f_storedqu<mode>_mask"
+  [(set (match_operand:VI48_512 0 "memory_operand" "=m")
+	(vec_merge:VI48_512
+	  (unspec:VI48_512
+	    [(match_operand:VI48_512 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+{
+  if (<MODE>mode == V8DImode)
+    return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+  else
+    return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse3>_lddqu<avxsizesuffix>"
+  [(set (match_operand:VI1 0 "register_operand" "=x")
+	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
+		    UNSPEC_LDDQU))]
+  "TARGET_SSE3"
+  "%vlddqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "0")))
+   (set (attr "prefix_rep")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse2_movnti<mode>"
+  [(set (match_operand:SWI48 0 "memory_operand" "=m")
+	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
+		      UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "movnti\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "0")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_movnt<mode>"
+  [(set (match_operand:VF 0 "memory_operand" "=m")
+	(unspec:VF
+	  [(match_operand:VF 1 "register_operand" "v")]
+	  UNSPEC_MOVNT))]
+  "TARGET_SSE"
+  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse2>_movnt<mode>"
+  [(set (match_operand:VI8 0 "memory_operand" "=m")
+	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
+		    UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "%vmovntdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+; Expand patterns for non-temporal stores.  At the moment, only those
+; that directly map to insns are defined; it would be possible to
+; define patterns for other modes that would expand to several insns.
+
+;; Modes handled by storent patterns.
+(define_mode_iterator STORENT_MODE
+  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
+   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
+(define_expand "storent<mode>"
+  [(set (match_operand:STORENT_MODE 0 "memory_operand")
+	(unspec:STORENT_MODE
+	  [(match_operand:STORENT_MODE 1 "register_operand")]
+	  UNSPEC_MOVNT))]
+  "TARGET_SSE")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "<code><mode>2"
+  [(set (match_operand:VF 0 "register_operand")
+	(absneg:VF
+	  (match_operand:VF 1 "register_operand")))]
+  "TARGET_SSE"
+  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*absneg<mode>2"
+  [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
+	(match_operator:VF 3 "absneg_operator"
+	  [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
+   (use (match_operand:VF 2 "nonimmediate_operand"    "xm, 0, vm,v"))]
+  "TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum rtx_code absneg_op;
+  rtx op1, op2;
+  rtx t;
+
+  if (TARGET_AVX)
+    {
+      if (MEM_P (operands[1]))
+	op1 = operands[2], op2 = operands[1];
+      else
+	op1 = operands[1], op2 = operands[2];
+    }
+  else
+    {
+      op1 = operands[0];
+      if (rtx_equal_p (operands[0], operands[1]))
+	op2 = operands[2];
+      else
+	op2 = operands[1];
+    }
+
+  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
+  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
+  t = gen_rtx_SET (VOIDmode, operands[0], t);
+  emit_insn (t);
+  DONE;
+}
+  [(set_attr "isa" "noavx,noavx,avx,avx")])
+
+(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand")
+	(plusminus:VF
+	  (match_operand:VF 1 "<round_nimm_predicate>")
+	  (match_operand:VF 2 "<round_nimm_predicate>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(plusminus:VF
+	  (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
+	  (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
+  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
+   v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
+	(vec_merge:VF_128
+	  (plusminus:VF_128
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "<round_prefix>")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "mul<mode>3<mask_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand")
+	(mult:VF
+	  (match_operand:VF 1 "<round_nimm_predicate>")
+	  (match_operand:VF 2 "<round_nimm_predicate>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*mul<mode>3<mask_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(mult:VF
+	  (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
+	  (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
+  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   mul<ssemodesuffix>\t{%2, %0|%0, %2}
+   vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemul")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "btver2_decode" "direct,double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
+	(vec_merge:VF_128
+	  (multdiv:VF_128
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse<multdiv_mnemonic>")
+   (set_attr "prefix" "<round_prefix>")
+   (set_attr "btver2_decode" "direct,double")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "div<mode>3"
+  [(set (match_operand:VF2 0 "register_operand")
+	(div:VF2 (match_operand:VF2 1 "register_operand")
+		 (match_operand:VF2 2 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
+
+(define_expand "div<mode>3"
+  [(set (match_operand:VF1 0 "register_operand")
+	(div:VF1 (match_operand:VF1 1 "register_operand")
+		 (match_operand:VF1 2 "nonimmediate_operand")))]
+  "TARGET_SSE"
+{
+  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
+
+  if (TARGET_SSE_MATH
+      && TARGET_RECIP_VEC_DIV
+      && !optimize_insn_for_size_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
+      DONE;
+    }
+})
+
+(define_insn "<sse>_div<mode>3<mask_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(div:VF
+	  (match_operand:VF 1 "register_operand" "0,v")
+	  (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   div<ssemodesuffix>\t{%2, %0|%0, %2}
+   vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssediv")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_rcp<mode>2"
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+	(unspec:VF1_128_256
+	  [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+  "TARGET_SSE"
+  "%vrcpps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "btver2_sse_attr" "rcp")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse_vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
+		       UNSPEC_RCP)
+	  (match_operand:V4SF 2 "register_operand" "0,x")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   rcpss\t{%1, %0|%0, %k1}
+   vrcpss\t{%1, %2, %0|%0, %2, %k1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "ssememalign" "32")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "btver2_sse_attr" "rcp")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "<mask_codefor>rcp14<mode><mask_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RCP14))]
+  "TARGET_AVX512F"
+  "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "srcp14<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
+	    UNSPEC_RCP14)
+	  (match_operand:VF_128 2 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:VF2 0 "register_operand")
+	(sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
+  "TARGET_SSE2")
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:VF1 0 "register_operand")
+	(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE_MATH
+      && TARGET_RECIP_VEC_SQRT
+      && !optimize_insn_for_size_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
+      DONE;
+    }
+})
+
+(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand" "=v")
+	(sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
+   (set_attr "btver2_sse_attr" "sqrt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_vmsqrt<mode>2<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
+	(vec_merge:VF_128
+	  (sqrt:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
+	  (match_operand:VF_128 2 "register_operand" "0,v")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
+   vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
+   (set_attr "prefix" "<round_prefix>")
+   (set_attr "btver2_sse_attr" "sqrt")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "rsqrt<mode>2"
+  [(set (match_operand:VF1_128_256 0 "register_operand")
+	(unspec:VF1_128_256
+	  [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
+  DONE;
+})
+
+(define_insn "<sse>_rsqrt<mode>2"
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+	(unspec:VF1_128_256
+	  [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+  "TARGET_SSE"
+  "%vrsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RSQRT14))]
+  "TARGET_AVX512F"
+  "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rsqrt14<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
+	    UNSPEC_RSQRT14)
+	  (match_operand:VF_128 2 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse_vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
+		       UNSPEC_RSQRT)
+	  (match_operand:V4SF 2 "register_operand" "0,x")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   rsqrtss\t{%1, %0|%0, %k1}
+   vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "ssememalign" "32")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
+
+(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
+  [(set (match_operand:VF 0 "register_operand")
+	(smaxmin:VF
+	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
+	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
+{
+  if (!flag_finite_math_only)
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
+
+(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(smaxmin:VF
+	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
+	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
+  "TARGET_SSE && flag_finite_math_only
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
+  "@
+   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
+   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "btver2_sse_attr" "maxmin")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(smaxmin:VF
+	  (match_operand:VF 1 "register_operand" "0,v")
+	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
+  "TARGET_SSE && !flag_finite_math_only
+   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
+  "@
+   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
+   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "btver2_sse_attr" "maxmin")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
+	(vec_merge:VF_128
+	  (smaxmin:VF_128
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "btver2_sse_attr" "maxmin")
+   (set_attr "prefix" "<round_saeonly_prefix>")
+   (set_attr "mode" "<ssescalarmode>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:VF 0 "register_operand" "=v,v")
+	(unspec:VF
+	  [(match_operand:VF 1 "register_operand" "0,v")
+	   (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
+	 UNSPEC_IEEE_MIN))]
+  "TARGET_SSE"
+  "@
+   min<ssemodesuffix>\t{%2, %0|%0, %2}
+   vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:VF 0 "register_operand" "=v,v")
+	(unspec:VF
+	  [(match_operand:VF 1 "register_operand" "0,v")
+	   (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
+	 UNSPEC_IEEE_MAX))]
+  "TARGET_SSE"
+  "@
+   max<ssemodesuffix>\t{%2, %0|%0, %2}
+   vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_addsubv4df3"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_merge:V4DF
+	  (plus:V4DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (minus:V4DF (match_dup 1) (match_dup 2))
+	  (const_int 10)))]
+  "TARGET_AVX"
+  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "sse3_addsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_merge:V2DF
+	  (plus:V2DF
+	    (match_operand:V2DF 1 "register_operand" "0,x")
+	    (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
+	  (minus:V2DF (match_dup 1) (match_dup 2))
+	  (const_int 2)))]
+  "TARGET_SSE3"
+  "@
+   addsubpd\t{%2, %0|%0, %2}
+   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "avx_addsubv8sf3"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_merge:V8SF
+	  (plus:V8SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (minus:V8SF (match_dup 1) (match_dup 2))
+	  (const_int 170)))]
+  "TARGET_AVX"
+  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_addsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (plus:V4SF
+	    (match_operand:V4SF 1 "register_operand" "0,x")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+	  (minus:V4SF (match_dup 1) (match_dup 2))
+	  (const_int 10)))]
+  "TARGET_SSE3"
+  "@
+   addsubps\t{%2, %0|%0, %2}
+   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_rep" "1,*")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "avx_h<plusminus_insn>v4df3"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_concat:V4DF
+	  (vec_concat:V2DF
+	    (plusminus:DF
+	      (vec_select:DF
+		(match_operand:V4DF 1 "register_operand" "x")
+		(parallel [(const_int 0)]))
+	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	    (plusminus:DF
+	      (vec_select:DF
+		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
+	  (vec_concat:V2DF
+	    (plusminus:DF
+	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
+	    (plusminus:DF
+	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_AVX"
+  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(vec_concat:V2DF
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3")
+
+(define_insn "*sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_concat:V2DF
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "0,x")
+	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+	    (vec_select:DF
+	      (match_dup 1)
+	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
+	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+	    (vec_select:DF
+	      (match_dup 2)
+	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+  "TARGET_SSE3
+   && INTVAL (operands[3]) != INTVAL (operands[4])
+   && INTVAL (operands[5]) != INTVAL (operands[6])"
+  "@
+   haddpd\t{%2, %0|%0, %2}
+   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_concat:V2DF
+	  (minus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "0,x")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (minus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3"
+  "@
+   hsubpd\t{%2, %0|%0, %2}
+   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_haddv2df3_low"
+  [(set (match_operand:DF 0 "register_operand" "=x,x")
+	(plus:DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "register_operand" "0,x")
+	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+	  (vec_select:DF
+	    (match_dup 1)
+	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+  "TARGET_SSE3
+   && INTVAL (operands[2]) != INTVAL (operands[3])"
+  "@
+   haddpd\t{%0, %0|%0, %0}
+   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_hsubv2df3_low"
+  [(set (match_operand:DF 0 "register_operand" "=x,x")
+	(minus:DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "register_operand" "0,x")
+	    (parallel [(const_int 0)]))
+	  (vec_select:DF
+	    (match_dup 1)
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE3"
+  "@
+   hsubpd\t{%0, %0|%0, %0}
+   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "avx_h<plusminus_insn>v8sf3"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_concat:V8SF
+	  (vec_concat:V4SF
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF
+		  (match_operand:V8SF 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF
+		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
+	  (vec_concat:V4SF
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX"
+  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_h<plusminus_insn>v4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_concat:V4SF
+	  (vec_concat:V2SF
+	    (plusminus:SF
+	      (vec_select:SF
+		(match_operand:V4SF 1 "register_operand" "0,x")
+		(parallel [(const_int 0)]))
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	    (plusminus:SF
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SF
+	    (plusminus:SF
+	      (vec_select:SF
+		(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+	    (plusminus:SF
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSE3"
+  "@
+   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
+   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_rep" "1,*")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "reduc_splus_v8df"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V8DF 1 "register_operand")]
+  "TARGET_AVX512F"
+{
+  ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_splus_v4df"
+  [(match_operand:V4DF 0 "register_operand")
+   (match_operand:V4DF 1 "register_operand")]
+  "TARGET_AVX"
+{
+  rtx tmp = gen_reg_rtx (V4DFmode);
+  rtx tmp2 = gen_reg_rtx (V4DFmode);
+  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
+  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
+  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
+  DONE;
+})
+
+(define_expand "reduc_splus_v2df"
+  [(match_operand:V2DF 0 "register_operand")
+   (match_operand:V2DF 1 "register_operand")]
+  "TARGET_SSE3"
+{
+  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+  DONE;
+})
+
+(define_expand "reduc_splus_v16sf"
+  [(match_operand:V16SF 0 "register_operand")
+   (match_operand:V16SF 1 "register_operand")]
+  "TARGET_AVX512F"
+{
+  ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_splus_v8sf"
+  [(match_operand:V8SF 0 "register_operand")
+   (match_operand:V8SF 1 "register_operand")]
+  "TARGET_AVX"
+{
+  rtx tmp = gen_reg_rtx (V8SFmode);
+  rtx tmp2 = gen_reg_rtx (V8SFmode);
+  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
+  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
+  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
+  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
+  DONE;
+})
+
+(define_expand "reduc_splus_v4sf"
+  [(match_operand:V4SF 0 "register_operand")
+   (match_operand:V4SF 1 "register_operand")]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE3)
+    {
+      rtx tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
+      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
+    }
+  else
+    ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
+  DONE;
+})
+
+;; Modes handled by reduc_sm{in,ax}* patterns.
+(define_mode_iterator REDUC_SMINMAX_MODE
+  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
+   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
+   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
+   (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
+
+(define_expand "reduc_<code>_<mode>"
+  [(smaxmin:REDUC_SMINMAX_MODE
+     (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
+     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
+  ""
+{
+  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_<code>_<mode>"
+  [(umaxmin:VI48_512
+     (match_operand:VI48_512 0 "register_operand")
+     (match_operand:VI48_512 1 "register_operand"))]
+  "TARGET_AVX512F"
+{
+  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_<code>_<mode>"
+  [(umaxmin:VI_256
+     (match_operand:VI_256 0 "register_operand")
+     (match_operand:VI_256 1 "register_operand"))]
+  "TARGET_AVX2"
+{
+  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_umin_v8hi"
+  [(umin:V8HI
+     (match_operand:V8HI 0 "register_operand")
+     (match_operand:V8HI 1 "register_operand"))]
+  "TARGET_SSE4_1"
+{
+  ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_cmp<mode>3"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256 1 "register_operand" "x")
+	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	  UNSPEC_PCMP))]
+  "TARGET_AVX"
+  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vmcmp<mode>3"
+  [(set (match_operand:VF_128 0 "register_operand" "=x")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "x")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
+	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	    UNSPEC_PCMP)
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_AVX"
+  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "*<sse>_maskcmp<mode>3_comm"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+	(match_operator:VF_128_256 3 "sse_comparison_operator"
+	  [(match_operand:VF_128_256 1 "register_operand" "%0,x")
+	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
+  "TARGET_SSE
+   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
+  "@
+   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
+   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_maskcmp<mode>3"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+	(match_operator:VF_128_256 3 "sse_comparison_operator"
+	  [(match_operand:VF_128_256 1 "register_operand" "0,x")
+	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
+  "TARGET_SSE"
+  "@
+   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
+   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_vmmaskcmp<mode>3"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+	(vec_merge:VF_128
+	 (match_operator:VF_128 3 "sse_comparison_operator"
+	   [(match_operand:VF_128 1 "register_operand" "0,x")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_mode_attr cmp_imm_predicate
+  [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
+  (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
+
+(define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48F_512 1 "register_operand" "v")
+	   (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+	  UNSPEC_PCMP))]
+  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
+  "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand" "v")
+	   (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	  UNSPEC_UNSIGNED_PCMP))]
+  "TARGET_AVX512F"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(and:<avx512fmaskmode>
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	    UNSPEC_PCMP)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(and:<avx512fmaskmode>
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	    UNSPEC_PCMP)
+	  (and:<avx512fmaskmode>
+	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
+	    (const_int 1))))]
+  "TARGET_AVX512F"
+  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_maskcmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
+	  [(match_operand:VF 1 "register_operand" "v")
+	   (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
+  "TARGET_SSE"
+  "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse>_comi<round_saeonly_name>"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 0 "register_operand" "v")
+	    (parallel [(const_int 0)]))
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_ucomi<round_saeonly_name>"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 0 "register_operand" "v")
+	    (parallel [(const_int 0)]))
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "vcond<V_512:mode><VF_512:mode>"
+  [(set (match_operand:V_512 0 "register_operand")
+	(if_then_else:V_512
+	  (match_operator 3 ""
+	    [(match_operand:VF_512 4 "nonimmediate_operand")
+	     (match_operand:VF_512 5 "nonimmediate_operand")])
+	  (match_operand:V_512 1 "general_operand")
+	  (match_operand:V_512 2 "general_operand")))]
+  "TARGET_AVX512F
+   && (GET_MODE_NUNITS (<V_512:MODE>mode)
+       == GET_MODE_NUNITS (<VF_512:MODE>mode))"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<V_256:mode><VF_256:mode>"
+  [(set (match_operand:V_256 0 "register_operand")
+	(if_then_else:V_256
+	  (match_operator 3 ""
+	    [(match_operand:VF_256 4 "nonimmediate_operand")
+	     (match_operand:VF_256 5 "nonimmediate_operand")])
+	  (match_operand:V_256 1 "general_operand")
+	  (match_operand:V_256 2 "general_operand")))]
+  "TARGET_AVX
+   && (GET_MODE_NUNITS (<V_256:MODE>mode)
+       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<V_128:mode><VF_128:mode>"
+  [(set (match_operand:V_128 0 "register_operand")
+	(if_then_else:V_128
+	  (match_operator 3 ""
+	    [(match_operand:VF_128 4 "nonimmediate_operand")
+	     (match_operand:VF_128 5 "nonimmediate_operand")])
+	  (match_operand:V_128 1 "general_operand")
+	  (match_operand:V_128 2 "general_operand")))]
+  "TARGET_SSE
+   && (GET_MODE_NUNITS (<V_128:MODE>mode)
+       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "<sse>_andnot<mode>3"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(and:VF
+	  (not:VF
+	    (match_operand:VF 1 "register_operand" "0,v"))
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE"
+{
+  static char buf[32];
+  const char *ops;
+  const char *suffix;
+
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      suffix = "ps";
+      break;
+    default:
+      suffix = "<ssemodesuffix>";
+    }
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* There is no vandnp[sd].  Use vpandnq.  */
+  if (<MODE_SIZE> == 64)
+    {
+      suffix = "q";
+      ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+    }
+
+  snprintf (buf, sizeof (buf), ops, suffix);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix" "orig,maybe_evex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<MODE>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	       ]
+	       (const_string "<MODE>")))])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VF_128_256 0 "register_operand")
+	(any_logic:VF_128_256
+	  (match_operand:VF_128_256 1 "nonimmediate_operand")
+	  (match_operand:VF_128_256 2 "nonimmediate_operand")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VF_512 0 "register_operand")
+       (fpint_logic:VF_512
+         (match_operand:VF_512 1 "nonimmediate_operand")
+         (match_operand:VF_512 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<code><mode>3"
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
+	(any_logic:VF
+	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  static char buf[32];
+  const char *ops;
+  const char *suffix;
+
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      suffix = "ps";
+      break;
+    default:
+      suffix = "<ssemodesuffix>";
+    }
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* There is no v<logic>p[sd].  Use vp<logic>q.  */
+  if (<MODE_SIZE> == 64)
+    {
+      suffix = "q";
+      ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+    }
+
+  snprintf (buf, sizeof (buf), ops, suffix);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix" "orig,maybe_evex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX")
+		 (const_string "<MODE>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	       ]
+	       (const_string "<MODE>")))])
+
+(define_expand "copysign<mode>3"
+  [(set (match_dup 4)
+	(and:VF
+	  (not:VF (match_dup 3))
+	  (match_operand:VF 1 "nonimmediate_operand")))
+   (set (match_dup 5)
+	(and:VF (match_dup 3)
+		(match_operand:VF 2 "nonimmediate_operand")))
+   (set (match_operand:VF 0 "register_operand")
+	(ior:VF (match_dup 4) (match_dup 5)))]
+  "TARGET_SSE"
+{
+  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
+
+  operands[4] = gen_reg_rtx (<MODE>mode);
+  operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+;; Also define scalar versions.  These are used for abs, neg, and
+;; conditional move.  Using subregs into vector modes causes register
+;; allocation lossage.  These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*andnot<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(and:MODEF
+	  (not:MODEF
+	    (match_operand:MODEF 1 "register_operand" "0,x"))
+	    (match_operand:MODEF 2 "register_operand" "x,x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  static char buf[32];
+  const char *ops;
+  const char *suffix
+    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  snprintf (buf, sizeof (buf), ops, suffix);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix" "orig,vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "<ssevecmode>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	       ]
+	       (const_string "<ssevecmode>")))])
+
+(define_insn "*andnottf3"
+  [(set (match_operand:TF 0 "register_operand" "=x,x")
+	(and:TF
+	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
+	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE"
+{
+  static char buf[32];
+  const char *ops;
+  const char *tmp
+    = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  snprintf (buf, sizeof (buf), ops, tmp);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (eq_attr "mode" "TI"))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "orig,vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
+		 (const_string "V4SF")
+	       ]
+	       (const_string "TI")))])
+
+(define_insn "*<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(any_logic:MODEF
+	  (match_operand:MODEF 1 "register_operand" "%0,x")
+	  (match_operand:MODEF 2 "register_operand" "x,x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  static char buf[32];
+  const char *ops;
+  const char *suffix
+    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  snprintf (buf, sizeof (buf), ops, suffix);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix" "orig,vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "<ssevecmode>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	       ]
+	       (const_string "<ssevecmode>")))])
+
+(define_expand "<code>tf3"
+  [(set (match_operand:TF 0 "register_operand")
+	(any_logic:TF
+	  (match_operand:TF 1 "nonimmediate_operand")
+	  (match_operand:TF 2 "nonimmediate_operand")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
+
+(define_insn "*<code>tf3"
+  [(set (match_operand:TF 0 "register_operand" "=x,x")
+	(any_logic:TF
+	  (match_operand:TF 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE
+   && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
+{
+  static char buf[32];
+  const char *ops;
+  const char *tmp
+    = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  snprintf (buf, sizeof (buf), ops, tmp);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (eq_attr "mode" "TI"))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "orig,vex")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
+		 (const_string "V4SF")
+	       ]
+	       (const_string "TI")))])
+
+;; There are no floating point xor for V16SF and V8DF in avx512f
+;; but we need them for negation.  Instead we use int versions of
+;; xor.  Maybe there could be a better way to do that.
+
+(define_mode_attr avx512flogicsuff
+  [(V16SF "d") (V8DF "q")])
+
+(define_insn "avx512f_<logic><mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(fpint_logic:VF_512
+	  (match_operand:VF_512 1 "register_operand" "v")
+	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; FMA floating point multiply/accumulate instructions.  These include
+;; scalar versions of the instructions as well as vector versions.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The standard names for scalar FMA are only available with SSE math enabled.
+;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma.  It doesn't
+;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
+;; and TARGET_FMA4 are both false.
+;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
+;; one must force the EVEX encoding of the fma insns.  Ideally we'd improve
+;; GAS to allow proper prefix selection.  However, for the moment all hardware
+;; that supports AVX512F also supports FMA so we can ignore this for now.
+(define_mode_iterator FMAMODEM
+  [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
+   (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
+   (V4SF "TARGET_FMA || TARGET_FMA4")
+   (V2DF "TARGET_FMA || TARGET_FMA4")
+   (V8SF "TARGET_FMA || TARGET_FMA4")
+   (V4DF "TARGET_FMA || TARGET_FMA4")
+   (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:FMAMODEM 0 "register_operand")
+	(fma:FMAMODEM
+	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
+	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
+	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
+  "")
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:FMAMODEM 0 "register_operand")
+	(fma:FMAMODEM
+	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
+	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
+	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
+  "")
+
+(define_expand "fnma<mode>4"
+  [(set (match_operand:FMAMODEM 0 "register_operand")
+	(fma:FMAMODEM
+	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
+	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
+	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
+  "")
+
+(define_expand "fnms<mode>4"
+  [(set (match_operand:FMAMODEM 0 "register_operand")
+	(fma:FMAMODEM
+	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
+	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
+	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
+  "")
+
+;; The builtins for intrinsics are not constrained by SSE math enabled.
+(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+			       (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+			       (V4SF "TARGET_FMA || TARGET_FMA4")
+			       (V2DF "TARGET_FMA || TARGET_FMA4")
+			       (V8SF "TARGET_FMA || TARGET_FMA4")
+			       (V4DF "TARGET_FMA || TARGET_FMA4")
+			       (V16SF "TARGET_AVX512F")
+			       (V8DF "TARGET_AVX512F")])
+
+(define_expand "fma4i_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+  "")
+
+(define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
+  [(match_operand:VF_512 0 "register_operand")
+   (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
+   (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
+   (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+  DONE;
+})
+
+(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	  (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
+   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "0,0")
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
+	    (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "@
+   vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=x")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "x")
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
+	    (match_operand:VF_512 3 "register_operand" "0"))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE   1 "<round_nimm_predicate>" "%0, 0, v, x,x")
+	  (match_operand:FMAMODE   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
+   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "0,0")
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "v")
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "register_operand" "0")))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
+	  (match_operand:FMAMODE   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (match_operand:FMAMODE   3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
+   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "0,0"))
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
+	    (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "@
+   vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "v"))
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
+	    (match_operand:VF_512 3 "register_operand" "0"))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
+	  (match_operand:FMAMODE   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
+   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "0,0"))
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "@
+   vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "v"))
+	    (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "register_operand" "0")))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;; FMA parallel floating point multiply addsub and subadd operations.
+
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;;   (fma op1 op2 op3)
+;;   (fma op1 op2 (neg op3))
+;;   (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF
+	  [(match_operand:VF 1 "nonimmediate_operand")
+	   (match_operand:VF 2 "nonimmediate_operand")
+	   (match_operand:VF 3 "nonimmediate_operand")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+
+(define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
+  [(match_operand:VF_512 0 "register_operand")
+   (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
+   (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
+   (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+  DONE;
+})
+
+(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
+	(unspec:VF
+	  [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	   (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	   (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
+	  UNSPEC_FMADDSUB))]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
+   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "0,0")
+	     (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
+	     (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "@
+   vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "v")
+	     (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
+	     (match_operand:VF_512 3 "register_operand" "0")]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
+	(unspec:VF
+	  [(match_operand:VF   1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	   (match_operand:VF   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	   (neg:VF
+	     (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
+	  UNSPEC_FMADDSUB))]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
+   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "0,0")
+	     (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
+	     (neg:VF_512
+	       (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "@
+   vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "v")
+	     (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
+	     (neg:VF_512
+	       (match_operand:VF_512 3 "register_operand" "0"))]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
+
+(define_expand "fmai_vmfmadd_<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "<round_nimm_predicate>")
+	    (match_operand:VF_128 2 "<round_nimm_predicate>")
+	    (match_operand:VF_128 3 "<round_nimm_predicate>"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_FMA")
+
+(define_insn "*fmai_fmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
+	    (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
+	    (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_FMA || TARGET_AVX512F"
+  "@
+   vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
+	    (match_operand:VF_128   2 "<round_nimm_predicate>" "<round_constraint>,v")
+	    (neg:VF_128
+	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_FMA || TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmadd_<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (neg:VF_128
+	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
+	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
+	    (match_operand:VF_128   3 "<round_nimm_predicate>" "v,<round_constraint>"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_FMA || TARGET_AVX512F"
+  "@
+   vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (neg:VF_128
+	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
+	    (match_operand:VF_128   1 "<round_nimm_predicate>" " 0, 0")
+	    (neg:VF_128
+	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_FMA || TARGET_AVX512F"
+  "@
+   vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;; FMA4 floating point scalar intrinsics.  These write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand")
+	    (match_operand:VF_128 2 "nonimmediate_operand")
+	    (match_operand:VF_128 3 "nonimmediate_operand"))
+	  (match_dup 4)
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*fma4i_vmfmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
+	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
+	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
+	  (match_operand:VF_128 4 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_vmfmsub_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
+	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
+	    (neg:VF_128
+	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
+	  (match_operand:VF_128 4 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_vmfnmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (neg:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
+	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
+	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
+	  (match_operand:VF_128 4 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_vmfnmsub_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (neg:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
+	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
+	    (neg:VF_128
+	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
+	  (match_operand:VF_128 4 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (const_int 3)))]
+  "TARGET_SSE"
+  "cvtpi2ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_cvtps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvtps2pi\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvttps2pi\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "prefix_rep" "0")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ss<round_name>"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   cvtsi2ss\t{%2, %0|%0, %2}
+   cvtsi2ss\t{%2, %0|%0, %2}
+   vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "double,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ssq<round_name>"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE && TARGET_64BIT"
+  "@
+   cvtsi2ssq\t{%2, %0|%0, %2}
+   cvtsi2ssq\t{%2, %0|%0, %2}
+   vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "double,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "length_vex" "*,*,4")
+   (set_attr "prefix_rex" "1,1,*")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtss2si<round_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "%vcvtss2si\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2siq<round_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvtss2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttss2si<round_saeonly_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE"
+  "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvttss2siq<round_saeonly_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE && TARGET_64BIT"
+  "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_duplicate:VF_128
+	    (unsigned_float:<ssescalarmode>
+	      (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
+	  (match_operand:VF_128 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F && <round_modev4sf_condition>"
+  "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_duplicate:VF_128
+	    (unsigned_float:<ssescalarmode>
+	      (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
+	  (match_operand:VF_128 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
+  [(set (match_operand:VF1 0 "register_operand" "=v")
+	(float:VF1
+	  (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
+  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ufloatv16siv16sf2<mask_name><round_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(unsigned_float:V16SF
+	  (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
+  "TARGET_AVX512F"
+  "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+(define_expand "floatuns<sseintvecmodelower><mode>2"
+  [(match_operand:VF1 0 "register_operand")
+   (match_operand:<sseintvecmode> 1 "register_operand")]
+  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
+{
+  if (<MODE>mode == V16SFmode)
+    emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
+  else
+    ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
+
+  DONE;
+})
+
+
+;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
+(define_mode_attr sf2simodelower
+  [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
+
+(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
+  [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
+	(unspec:VI4_AVX
+	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(unspec:V16SI
+	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(unspec:V16SI
+	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(any_fix:V16SI
+	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+  "TARGET_AVX512F"
+  "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "fix_truncv8sfv8si2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "fix_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "%vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set (attr "prefix_rep")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "0")))
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
+  [(match_operand:<sseintvecmode> 0 "register_operand")
+   (match_operand:VF1 1 "register_operand")]
+  "TARGET_SSE2"
+{
+  if (<MODE>mode == V16SFmode)
+    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
+					  operands[1]));
+  else
+    {
+      rtx tmp[3];
+      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+      tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
+      emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
+      emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
+    }
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_cvtpi2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
+  "TARGET_SSE2"
+  "cvtpi2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtpd2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtpd2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "btver2_decode" "direct")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttpd2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvttpd2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvtsi2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
+	  (match_operand:V2DF 1 "register_operand" "0,0,x")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtsi2sd\t{%2, %0|%0, %2}
+   cvtsi2sd\t{%2, %0|%0, %2}
+   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,direct,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "double,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtsi2sdq<round_name>"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
+	  (match_operand:V2DF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "@
+   cvtsi2sdq\t{%2, %0|%0, %2}
+   cvtsi2sdq\t{%2, %0|%0, %2}
+   vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,direct,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "double,direct,*")
+   (set_attr "length_vex" "*,*,4")
+   (set_attr "prefix_rex" "1,1,*")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "DF")])
+
+(define_insn "avx512f_vcvtss2usi<round_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvtss2usiq<round_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unsigned_fix:SI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvtsd2usi<round_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvtsd2usiq<round_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unsigned_fix:SI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2si<round_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "btver2_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtsd2si\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2siq<round_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttsd2si<round_saeonly_name>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE2"
+  "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "btver2_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+;; For float<si2dfmode><mode>2 insn pattern
+(define_mode_attr si2dfmode
+  [(V8DF "V8SI") (V4DF "V4SI")])
+(define_mode_attr si2dfmodelower
+  [(V8DF "v8si") (V4DF "v4si")])
+
+(define_insn "float<si2dfmodelower><mode>2<mask_name>"
+  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
+	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX && <mask_mode512bit_condition>"
+  "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ufloatv8siv8df<mask_name>"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(unsigned_float:V8DF
+	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+(define_insn "avx512f_cvtdq2pd512_2"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(float:V8DF
+	  (vec_select:V8SI
+	    (match_operand:V16SI 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX"
+  "vcvtdq2pd\t{%t1, %0|%0, %t1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+(define_insn "avx_cvtdq2pd256_2"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(float:V4DF
+	  (vec_select:V4SI
+	    (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "sse2_cvtdq2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "%vcvtdq2pd\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "ssememalign" "64")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(unspec:V8SI
+	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "OI")])
+
+(define_insn "avx_cvtpd2dq256"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX"
+  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "avx_cvtpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand")
+	(vec_concat:V8SI
+	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_cvtpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(vec_concat:V8SI
+	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (match_operand:V4SI 2 "const0_operand")))]
+  "TARGET_AVX"
+  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_cvtpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand")
+	(vec_concat:V4SI
+	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvtpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (match_operand:V2SI 2 "const0_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_AVX)
+    return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
+  else
+    return "cvtpd2dq\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(unspec:V8SI
+	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(any_fix:V8SI
+	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+  "TARGET_AVX512F"
+  "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "OI")])
+
+(define_insn "fix_truncv4dfv4si2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "avx_cvttpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand")
+	(vec_concat:V8SI
+	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
+	  (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_cvttpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(vec_concat:V8SI
+	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
+	  (match_operand:V4SI 2 "const0_operand")))]
+  "TARGET_AVX"
+  "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_cvttpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand")
+	(vec_concat:V4SI
+	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
+	  (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvttpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+	  (match_operand:V2SI 2 "const0_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_AVX)
+    return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
+  else
+    return "cvttpd2dq\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvtsd2ss<round_name>"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:V2SF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtsd2ss\t{%2, %0|%0, %2}
+   cvtsd2ss\t{%2, %0|%0, %q2}
+   vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,double,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,<round_prefix>")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse2_cvtss2sd<round_saeonly_name>"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
+	(vec_merge:V2DF
+	  (float_extend:V2DF
+	    (vec_select:V2SF
+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
+	      (parallel [(const_int 0) (const_int 1)])))
+	  (match_operand:V2DF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtss2sd\t{%2, %0|%0, %2}
+   cvtss2sd\t{%2, %0|%0, %k2}
+   vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "athlon_decode" "direct,direct,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
+   (set_attr "mode" "DF")])
+
+(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
+  [(set (match_operand:V8SF 0 "register_operand" "=v")
+	(float_truncate:V8SF
+	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
+  "TARGET_AVX512F"
+  "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "avx_cvtpd2ps256"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(float_truncate:V4SF
+	  (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "sse2_cvtpd2ps"
+  [(set (match_operand:V4SF 0 "register_operand")
+	(vec_concat:V4SF
+	  (float_truncate:V2SF
+	    (match_operand:V2DF 1 "nonimmediate_operand"))
+	  (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_concat:V4SF
+	  (float_truncate:V2SF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+	  (match_operand:V2SF 2 "const0_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_AVX)
+    return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
+  else
+    return "cvtpd2ps\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
+(define_mode_attr sf2dfmode
+  [(V8DF "V8SF") (V4DF "V4SF")])
+
+(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
+	(float_extend:VF2_512_256
+	  (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
+  "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_cvtps2pd256_2"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(float_extend:V4DF
+	  (vec_select:V4SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "vcvtps2pd\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "vec_unpacks_lo_v16sf"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(float_extend:V8DF
+	  (vec_select:V8SF
+	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F"
+  "vcvtps2pd\t{%t1, %0|%0, %t1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+(define_insn "sse2_cvtps2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "%vcvtps2pd\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "athlon_decode" "double")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2DF")])
+
+(define_expand "vec_unpacks_hi_v4sf"
+  [(set (match_dup 2)
+   (vec_select:V4SF
+     (vec_concat:V8SF
+       (match_dup 2)
+       (match_operand:V4SF 1 "nonimmediate_operand"))
+     (parallel [(const_int 6) (const_int 7)
+		(const_int 2) (const_int 3)])))
+  (set (match_operand:V2DF 0 "register_operand")
+   (float_extend:V2DF
+     (vec_select:V2SF
+       (match_dup 2)
+       (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "operands[2] = gen_reg_rtx (V4SFmode);")
+
+(define_expand "vec_unpacks_hi_v8sf"
+  [(set (match_dup 2)
+	(vec_select:V4SF
+	  (match_operand:V8SF 1 "nonimmediate_operand")
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand")
+	(float_extend:V4DF
+	  (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = gen_reg_rtx (V4SFmode);")
+
+(define_expand "vec_unpacks_hi_v16sf"
+  [(set (match_dup 2)
+	(vec_select:V8SF
+	  (match_operand:V16SF 1 "nonimmediate_operand")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))
+   (set (match_operand:V8DF 0 "register_operand")
+	(float_extend:V8DF
+	  (match_dup 2)))]
+"TARGET_AVX512F"
+"operands[2] = gen_reg_rtx (V8SFmode);")
+
+(define_expand "vec_unpacks_lo_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2")
+
+(define_expand "vec_unpacks_lo_v8sf"
+  [(set (match_operand:V4DF 0 "register_operand")
+	(float_extend:V4DF
+	  (vec_select:V4SF
+	    (match_operand:V8SF 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX")
+
+(define_mode_attr sseunpackfltmode
+  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
+  (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
+
+(define_expand "vec_unpacks_float_hi_<mode>"
+  [(match_operand:<sseunpackfltmode> 0 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
+
+  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_<mode>"
+  [(match_operand:<sseunpackfltmode> 0 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
+
+  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_<mode>"
+  [(match_operand:<sseunpackfltmode> 0 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
+
+  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_<mode>"
+  [(match_operand:<sseunpackfltmode> 0 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
+
+  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+  [(set (match_dup 2)
+	(vec_select:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand")
+	  (parallel [(const_int 2) (const_int 3)
+		     (const_int 2) (const_int 3)])))
+   (set (match_operand:V2DF 0 "register_operand")
+	(float:V2DF
+	  (vec_select:V2SI
+	  (match_dup 2)
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "operands[2] = gen_reg_rtx (V4SImode);")
+
+(define_expand "vec_unpacks_float_lo_v4si"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2")
+
+(define_expand "vec_unpacks_float_hi_v8si"
+  [(set (match_dup 2)
+	(vec_select:V4SI
+	  (match_operand:V8SI 1 "nonimmediate_operand")
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand")
+	(float:V4DF
+	  (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = gen_reg_rtx (V4SImode);")
+
+(define_expand "vec_unpacks_float_lo_v8si"
+  [(set (match_operand:V4DF 0 "register_operand")
+	(float:V4DF
+	  (vec_select:V4SI
+	    (match_operand:V8SI 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX")
+
+(define_expand "vec_unpacks_float_hi_v16si"
+  [(set (match_dup 2)
+	(vec_select:V8SI
+	  (match_operand:V16SI 1 "nonimmediate_operand")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))
+   (set (match_operand:V8DF 0 "register_operand")
+	(float:V8DF
+	  (match_dup 2)))]
+  "TARGET_AVX512F"
+  "operands[2] = gen_reg_rtx (V8SImode);")
+
+(define_expand "vec_unpacks_float_lo_v16si"
+  [(set (match_operand:V8DF 0 "register_operand")
+	(float:V8DF
+	  (vec_select:V8SI
+	    (match_operand:V16SI 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F")
+
+(define_expand "vec_unpacku_float_hi_v4si"
+  [(set (match_dup 5)
+	(vec_select:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand")
+	  (parallel [(const_int 2) (const_int 3)
+		     (const_int 2) (const_int 3)])))
+   (set (match_dup 6)
+	(float:V2DF
+	  (vec_select:V2SI
+	  (match_dup 5)
+	    (parallel [(const_int 0) (const_int 1)]))))
+   (set (match_dup 7)
+	(lt:V2DF (match_dup 6) (match_dup 3)))
+   (set (match_dup 8)
+	(and:V2DF (match_dup 7) (match_dup 4)))
+   (set (match_operand:V2DF 0 "register_operand")
+	(plus:V2DF (match_dup 6) (match_dup 8)))]
+  "TARGET_SSE2"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x;
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
+  operands[4] = force_reg (V2DFmode,
+			   ix86_build_const_vector (V2DFmode, 1, x));
+
+  operands[5] = gen_reg_rtx (V4SImode);
+
+  for (i = 6; i < 9; i++)
+    operands[i] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+  [(set (match_dup 5)
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)]))))
+   (set (match_dup 6)
+	(lt:V2DF (match_dup 5) (match_dup 3)))
+   (set (match_dup 7)
+	(and:V2DF (match_dup 6) (match_dup 4)))
+   (set (match_operand:V2DF 0 "register_operand")
+	(plus:V2DF (match_dup 5) (match_dup 7)))]
+  "TARGET_SSE2"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x;
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
+  operands[4] = force_reg (V2DFmode,
+			   ix86_build_const_vector (V2DFmode, 1, x));
+
+  for (i = 5; i < 8; i++)
+    operands[i] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "vec_unpacku_float_hi_v8si"
+  [(match_operand:V4DF 0 "register_operand")
+   (match_operand:V8SI 1 "register_operand")]
+  "TARGET_AVX"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x, tmp[6];
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
+  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
+  tmp[5] = gen_reg_rtx (V4SImode);
+
+  for (i = 2; i < 5; i++)
+    tmp[i] = gen_reg_rtx (V4DFmode);
+  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
+  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
+  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
+			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
+  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
+  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v16si"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V16SI 1 "register_operand")]
+  "TARGET_AVX512F"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx k, x, tmp[4];
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+  tmp[2] = gen_reg_rtx (V8DFmode);
+  tmp[3] = gen_reg_rtx (V8SImode);
+  k = gen_reg_rtx (QImode);
+
+  emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
+  emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
+  emit_insn (gen_rtx_SET (VOIDmode, k,
+			  gen_rtx_LT (QImode, tmp[2], tmp[0])));
+  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+  emit_move_insn (operands[0], tmp[2]);
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v8si"
+  [(match_operand:V4DF 0 "register_operand")
+   (match_operand:V8SI 1 "nonimmediate_operand")]
+  "TARGET_AVX"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x, tmp[5];
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
+  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
+
+  for (i = 2; i < 5; i++)
+    tmp[i] = gen_reg_rtx (V4DFmode);
+  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
+			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
+  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
+  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v16si"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V16SI 1 "nonimmediate_operand")]
+  "TARGET_AVX512F"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx k, x, tmp[3];
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+  tmp[2] = gen_reg_rtx (V8DFmode);
+  k = gen_reg_rtx (QImode);
+
+  emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, k,
+			  gen_rtx_LT (QImode, tmp[2], tmp[0])));
+  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+  emit_move_insn (operands[0], tmp[2]);
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_<mode>"
+  [(set (match_dup 3)
+	(float_truncate:<sf2dfmode>
+	  (match_operand:VF2_512_256 1 "nonimmediate_operand")))
+   (set (match_dup 4)
+	(float_truncate:<sf2dfmode>
+	  (match_operand:VF2_512_256 2 "nonimmediate_operand")))
+   (set (match_operand:<ssePSmode> 0 "register_operand")
+	(vec_concat:<ssePSmode>
+	  (match_dup 3)
+	  (match_dup 4)))]
+  "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (<sf2dfmode>mode);
+  operands[4] = gen_reg_rtx (<sf2dfmode>mode);
+})
+
+(define_expand "vec_pack_trunc_v2df"
+  [(match_operand:V4SF 0 "register_operand")
+   (match_operand:V2DF 1 "nonimmediate_operand")
+   (match_operand:V2DF 2 "nonimmediate_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp0, tmp1;
+
+  if (TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
+
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (V4SFmode);
+      tmp1 = gen_reg_rtx (V4SFmode);
+
+      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
+      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
+      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
+    }
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v8df"
+  [(match_operand:V16SI 0 "register_operand")
+   (match_operand:V8DF 1 "nonimmediate_operand")
+   (match_operand:V8DF 2 "nonimmediate_operand")]
+  "TARGET_AVX512F"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V8SImode);
+  r2 = gen_reg_rtx (V8SImode);
+
+  emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
+  emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
+  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v4df"
+  [(match_operand:V8SI 0 "register_operand")
+   (match_operand:V4DF 1 "nonimmediate_operand")
+   (match_operand:V4DF 2 "nonimmediate_operand")]
+  "TARGET_AVX"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V4SImode);
+  r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
+  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
+  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+  [(match_operand:V4SI 0 "register_operand")
+   (match_operand:V2DF 1 "nonimmediate_operand")
+   (match_operand:V2DF 2 "nonimmediate_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp0, tmp1, tmp2;
+
+  if (TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
+
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (V4SImode);
+      tmp1 = gen_reg_rtx (V4SImode);
+      tmp2 = gen_reg_rtx (V2DImode);
+
+      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
+      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
+      emit_insn (gen_vec_interleave_lowv2di (tmp2,
+					     gen_lowpart (V2DImode, tmp0),
+					     gen_lowpart (V2DImode, tmp1)));
+      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
+    }
+  DONE;
+})
+
+(define_mode_attr ssepackfltmode
+  [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
+
+(define_expand "vec_pack_ufix_trunc_<mode>"
+  [(match_operand:<ssepackfltmode> 0 "register_operand")
+   (match_operand:VF2 1 "register_operand")
+   (match_operand:VF2 2 "register_operand")]
+  "TARGET_SSE2"
+{
+  if (<MODE>mode == V8DFmode)
+    {
+      rtx r1, r2;
+
+      r1 = gen_reg_rtx (V8SImode);
+      r2 = gen_reg_rtx (V8SImode);
+
+      emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
+      emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
+      emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
+    }
+  else
+    {
+      rtx tmp[7];
+      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+      tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
+      tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
+      emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
+      if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
+	{
+	  tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
+	  ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
+	}
+      else
+	{
+	  tmp[5] = gen_reg_rtx (V8SFmode);
+	  ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
+					    gen_lowpart (V8SFmode, tmp[3]), 0);
+	  tmp[5] = gen_lowpart (V8SImode, tmp[5]);
+	}
+      tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
+				    operands[0], 0, OPTAB_DIRECT);
+      if (tmp[6] != operands[0])
+	emit_move_insn (operands[0], tmp[6]);
+    }
+
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_v4df"
+  [(match_operand:V8SI 0 "register_operand")
+   (match_operand:V4DF 1 "nonimmediate_operand")
+   (match_operand:V4DF 2 "nonimmediate_operand")]
+  "TARGET_AVX"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V4SImode);
+  r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
+  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
+  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_v2df"
+  [(match_operand:V4SI 0 "register_operand")
+   (match_operand:V2DF 1 "nonimmediate_operand")
+   (match_operand:V2DF 2 "nonimmediate_operand")]
+  "TARGET_SSE2"
+{
+  rtx tmp0, tmp1, tmp2;
+
+  if (TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
+
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (V4SImode);
+      tmp1 = gen_reg_rtx (V4SImode);
+      tmp2 = gen_reg_rtx (V2DImode);
+
+      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
+      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
+      emit_insn (gen_vec_interleave_lowv2di (tmp2,
+					     gen_lowpart (V2DImode, tmp0),
+					     gen_lowpart (V2DImode, tmp1)));
+      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
+    }
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse_movhlps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand")
+	    (match_operand:V4SF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 6)
+		     (const_int 7)
+		     (const_int 2)
+		     (const_int 3)])))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+
+  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "sse_movhlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
+	  (parallel [(const_int 6)
+		     (const_int 7)
+		     (const_int 2)
+		     (const_int 3)])))]
+  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhlps\t{%2, %0|%0, %2}
+   vmovhlps\t{%2, %1, %0|%0, %1, %2}
+   movlps\t{%H2, %0|%0, %H2}
+   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
+   %vmovhps\t{%2, %0|%q0, %2}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*")
+   (set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
+
+(define_expand "sse_movlhps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand")
+	    (match_operand:V4SF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 4)
+		     (const_int 5)])))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+
+  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "sse_movlhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 4)
+		     (const_int 5)])))]
+  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   vmovlhps\t{%2, %1, %0|%0, %1, %2}
+   movhps\t{%2, %0|%0, %q2}
+   vmovhps\t{%2, %1, %0|%0, %1, %q2}
+   %vmovlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*")
+   (set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
+
+(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "register_operand" "v")
+	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_AVX512F"
+  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_insn "avx_unpckhps256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX"
+  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "vec_interleave_highv8sf"
+  [(set (match_dup 3)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))
+   (set (match_dup 4)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))
+   (set (match_operand:V8SF 0 "register_operand")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V8SFmode);
+  operands[4] = gen_reg_rtx (V8SFmode);
+})
+
+(define_insn "vec_interleave_highv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0,x")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_SSE"
+  "@
+   unpckhps\t{%2, %0|%0, %2}
+   vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "register_operand" "v")
+	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 8) (const_int 24)
+		     (const_int 9) (const_int 25)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)])))]
+  "TARGET_AVX512F"
+  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_insn "avx_unpcklps256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))]
+  "TARGET_AVX"
+  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "vec_interleave_lowv8sf"
+  [(set (match_dup 3)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))
+   (set (match_dup 4)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))
+   (set (match_operand:V8SF 0 "register_operand")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V8SFmode);
+  operands[4] = gen_reg_rtx (V8SFmode);
+})
+
+(define_insn "vec_interleave_lowv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0,x")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_SSE"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "avx_movshdup256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 1) (const_int 1)
+		     (const_int 3) (const_int 3)
+		     (const_int 5) (const_int 5)
+		     (const_int 7) (const_int 7)])))]
+  "TARGET_AVX"
+  "vmovshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_movshdup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 1)
+		     (const_int 1)
+		     (const_int 7)
+		     (const_int 7)])))]
+  "TARGET_SSE3"
+  "%vmovshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+	    (match_dup 1))
+	  (parallel [(const_int 1) (const_int 1)
+		     (const_int 3) (const_int 3)
+		     (const_int 5) (const_int 5)
+		     (const_int 7) (const_int 7)
+		     (const_int 9) (const_int 9)
+		     (const_int 11) (const_int 11)
+		     (const_int 13) (const_int 13)
+		     (const_int 15) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+(define_insn "avx_movsldup256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 0)
+		     (const_int 2) (const_int 2)
+		     (const_int 4) (const_int 4)
+		     (const_int 6) (const_int 6)])))]
+  "TARGET_AVX"
+  "vmovsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_movsldup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 0)
+		     (const_int 0)
+		     (const_int 6)
+		     (const_int 6)])))]
+  "TARGET_SSE3"
+  "%vmovsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 0)
+		     (const_int 2) (const_int 2)
+		     (const_int 4) (const_int 4)
+		     (const_int 6) (const_int 6)
+		     (const_int 8) (const_int 8)
+		     (const_int 10) (const_int 10)
+		     (const_int 12) (const_int 12)
+		     (const_int 14) (const_int 14)])))]
+  "TARGET_AVX512F"
+  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+(define_expand "avx_shufps256"
+  [(match_operand:V8SF 0 "register_operand")
+   (match_operand:V8SF 1 "register_operand")
+   (match_operand:V8SF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
+				  GEN_INT ((mask >> 0) & 3),
+				  GEN_INT ((mask >> 2) & 3),
+				  GEN_INT (((mask >> 4) & 3) + 8),
+				  GEN_INT (((mask >> 6) & 3) + 8),
+				  GEN_INT (((mask >> 0) & 3) + 4),
+				  GEN_INT (((mask >> 2) & 3) + 4),
+				  GEN_INT (((mask >> 4) & 3) + 12),
+				  GEN_INT (((mask >> 6) & 3) + 12)));
+  DONE;
+})
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx_shufps256_1"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3  "const_0_to_3_operand"  )
+		     (match_operand 4  "const_0_to_3_operand"  )
+		     (match_operand 5  "const_8_to_11_operand" )
+		     (match_operand 6  "const_8_to_11_operand" )
+		     (match_operand 7  "const_4_to_7_operand"  )
+		     (match_operand 8  "const_4_to_7_operand"  )
+		     (match_operand 9  "const_12_to_15_operand")
+		     (match_operand 10 "const_12_to_15_operand")])))]
+  "TARGET_AVX
+   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 8) << 4;
+  mask |= (INTVAL (operands[6]) - 8) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sseshuf")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "sse_shufps"
+  [(match_operand:V4SF 0 "register_operand")
+   (match_operand:V4SF 1 "register_operand")
+   (match_operand:V4SF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_SSE"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
+			       GEN_INT ((mask >> 0) & 3),
+			       GEN_INT ((mask >> 2) & 3),
+			       GEN_INT (((mask >> 4) & 3) + 4),
+			       GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "sse_shufps_<mode>"
+  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
+	(vec_select:VI4F_128
+	  (vec_concat:<ssedoublevecmode>
+	    (match_operand:VI4F_128 1 "register_operand" "0,x")
+	    (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_4_to_7_operand")
+		     (match_operand 6 "const_4_to_7_operand")])))]
+  "TARGET_SSE"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[3]) << 0;
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 4) << 4;
+  mask |= (INTVAL (operands[6]) - 4) << 6;
+  operands[3] = GEN_INT (mask);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "shufps\t{%3, %2, %0|%0, %2, %3}";
+    case 1:
+      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseshuf")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SSE"
+  "@
+   %vmovhps\t{%1, %0|%q0, %1}
+   %vmovhlps\t{%1, %d0|%d0, %1}
+   %vmovlps\t{%H1, %d0|%d0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadhps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand")
+	(vec_concat:V4SF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+
+  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "sse_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
+	(vec_concat:V4SF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%2, %0|%0, %q2}
+   vmovhps\t{%2, %1, %0|%0, %1, %q2}
+   movlhps\t{%2, %0|%0, %2}
+   vmovlhps\t{%2, %1, %0|%0, %1, %2}
+   %vmovlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*")
+   (set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "@
+   %vmovlps\t{%1, %0|%q0, %1}
+   %vmovaps\t{%1, %0|%0, %1}
+   %vmovlps\t{%1, %d0|%d0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadlps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 2 "nonimmediate_operand")
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+
+  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "sse_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,m,x")
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+  "@
+   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
+   movlps\t{%2, %0|%0, %q2}
+   vmovlps\t{%2, %1, %0|%0, %1, %q2}
+   %vmovlps\t{%2, %0|%q0, %2}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*")
+   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "length_immediate" "1,1,*,*,*")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
+
+(define_insn "sse_movss"
+  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
+	(vec_merge:V4SF
+	  (match_operand:V4SF 2 "register_operand" " x,x")
+	  (match_operand:V4SF 1 "register_operand" " 0,x")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   movss\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "avx2_vec_dup<mode>"
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+	(vec_duplicate:VF1_128_256
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "register_operand" "x")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX2"
+  "vbroadcastss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "<MODE>")])
+
+(define_insn "avx2_vec_dupv8sf_1"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_duplicate:V8SF
+	  (vec_select:SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX2"
+  "vbroadcastss\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "sselog1")
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "V8SF")])
+
+(define_insn "vec_dupv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+	(vec_duplicate:V4SF
+	  (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+  "TARGET_SSE"
+  "@
+   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+   vbroadcastss\t{%1, %0|%0, %1}
+   shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "isa" "avx,avx,noavx")
+   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
+   (set_attr "length_immediate" "1,0,1")
+   (set_attr "prefix_extra" "0,1,*")
+   (set_attr "prefix" "vex,vex,orig")
+   (set_attr "mode" "V4SF")])
+
+;; Although insertps takes register source, we prefer
+;; unpcklps with register source since it is shorter.
+(define_insn "*vec_concatv2sf_sse4_1"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,x,x,*y ,*y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
+	  (match_operand:SF 2 "vector_move_operand"  " x,x,m,m,C,*ym, C")))]
+  "TARGET_SSE4_1"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   vunpcklps\t{%2, %1, %0|%0, %1, %2}
+   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
+   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
+   %vmovss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
+   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_data16" "*,*,1,*,*,*,*")
+   (set_attr "prefix_extra" "*,*,1,1,*,*,*")
+   (set_attr "length_immediate" "*,*,1,1,*,*,*")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
+   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*vec_concatv2sf_sse"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
+  "TARGET_SSE"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*vec_concatv4sf"
+  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
+	  (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
+  "TARGET_SSE"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   vmovlhps\t{%2, %1, %0|%0, %1, %2}
+   movhps\t{%2, %0|%0, %q2}
+   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
+  [(set_attr "isa" "noavx,avx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "orig,vex,orig,vex")
+   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
+
+(define_expand "vec_init<mode>"
+  [(match_operand:V_128 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "vec_set<mode>_0"
+  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
+	  "=x,x,x ,x,x,x,x  ,x  ,m ,m   ,m")
+	(vec_merge:VI4F_128
+	  (vec_duplicate:VI4F_128
+	    (match_operand:<ssescalarmode> 2 "general_operand"
+	  " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
+	  (match_operand:VI4F_128 1 "vector_move_operand"
+	  " C,C,C ,C,0,x,0  ,x  ,0 ,0   ,0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
+   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
+   %vmovd\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}
+   pinsrd\t{$0, %2, %0|%0, %2, 0}
+   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
+   #
+   #
+   #"
+  [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "0,6,7")
+	      (const_string "sselog")
+	    (eq_attr "alternative" "9")
+	      (const_string "imov")
+	    (eq_attr "alternative" "10")
+	      (const_string "fmov")
+	   ]
+	   (const_string "ssemov")))
+   (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
+   (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
+   (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
+	  (match_operand:V4SF 1 "register_operand" "0,x")
+	  (match_operand:SI 3 "const_int_operand")))]
+  "TARGET_SSE4_1
+   && ((unsigned) exact_log2 (INTVAL (operands[3]))
+       < GET_MODE_NUNITS (V4SFmode))"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+  switch (which_alternative)
+    {
+    case 0:
+      return "insertps\t{%3, %2, %0|%0, %2, %3}";
+    case 1:
+      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
+		      (match_operand:V4SF 1 "register_operand" "0,x")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+		     UNSPEC_INSERTPS))]
+  "TARGET_SSE4_1"
+{
+  if (MEM_P (operands[2]))
+    {
+      unsigned count_s = INTVAL (operands[3]) >> 6;
+      if (count_s)
+	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
+      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
+    }
+  switch (which_alternative)
+    {
+    case 0:
+      return "insertps\t{%3, %2, %0|%0, %2, %3}";
+    case 1:
+      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+(define_split
+  [(set (match_operand:VI4F_128 0 "memory_operand")
+	(vec_merge:VI4F_128
+	  (vec_duplicate:VI4F_128
+	    (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_SSE && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
+
+(define_expand "vec_set<mode>"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:<ssescalarmode> 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+})
+
+(define_insn_and_split "*sse4_1_extractps"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "register_operand" "x,0,x")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
+  "TARGET_SSE4_1"
+  "@
+   %vextractps\t{%2, %1, %0|%0, %1, %2}
+   #
+   #"
+  "&& reload_completed && SSE_REG_P (operands[0])"
+  [(const_int 0)]
+{
+  rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
+  switch (INTVAL (operands[2]))
+    {
+    case 1:
+    case 3:
+      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
+				      operands[2], operands[2],
+				      GEN_INT (INTVAL (operands[2]) + 4),
+				      GEN_INT (INTVAL (operands[2]) + 4)));
+      break;
+    case 2:
+      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
+      break;
+    default:
+      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
+      gcc_unreachable ();
+    }
+  DONE;
+}
+  [(set_attr "isa" "*,noavx,avx")
+   (set_attr "type" "sselog,*,*")
+   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "length_immediate" "1,*,*")
+   (set_attr "prefix" "maybe_vex,*,*")
+   (set_attr "mode" "V4SF,*,*")])
+
+(define_insn_and_split "*vec_extractv4sf_mem"
+  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
+	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
+  "TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
+})
+
+(define_expand "avx512f_vextract<shuffletype>32x4_mask"
+  [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
+   (match_operand:V16FI 1 "register_operand")
+   (match_operand:SI 2 "const_0_to_3_operand")
+   (match_operand:<ssequartermode> 3 "nonimmediate_operand")
+   (match_operand:QI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+    operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
+          GEN_INT (3), operands[3], operands[4]));
+      break;
+    case 1:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
+          GEN_INT (7), operands[3], operands[4]));
+      break;
+    case 2:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
+          GEN_INT (11), operands[3], operands[4]));
+      break;
+    case 3:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
+          GEN_INT (15), operands[3], operands[4]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
+  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+	(vec_merge:<ssequartermode>
+	  (vec_select:<ssequartermode>
+	    (match_operand:V16FI 1 "register_operand" "v")
+	    (parallel [(match_operand 2  "const_0_to_15_operand")
+	      (match_operand 3  "const_0_to_15_operand")
+	      (match_operand 4  "const_0_to_15_operand")
+	      (match_operand 5  "const_0_to_15_operand")]))
+	  (match_operand:<ssequartermode> 6 "memory_operand" "0")
+	  (match_operand:QI 7 "register_operand" "Yk")))]
+  "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+  && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+  && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
+  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+	(vec_select:<ssequartermode>
+	  (match_operand:V16FI 1 "register_operand" "v")
+	  (parallel [(match_operand 2  "const_0_to_15_operand")
+            (match_operand 3  "const_0_to_15_operand")
+            (match_operand 4  "const_0_to_15_operand")
+            (match_operand 5  "const_0_to_15_operand")])))]
+  "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+  && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+  && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+  return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "memory")
+      (if_then_else (match_test "MEM_P (operands[0])")
+	(const_string "store")
+	(const_string "none")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vextract<shuffletype>64x4_mask"
+  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+   (match_operand:V8FI 1 "register_operand")
+   (match_operand:SI 2 "const_0_to_1_operand")
+   (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
+   (match_operand:QI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  rtx (*insn)(rtx, rtx, rtx, rtx);
+
+  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+    operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      insn = gen_vec_extract_lo_<mode>_mask;
+      break;
+    case 1:
+      insn = gen_vec_extract_hi_<mode>_mask;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V8FI 1 "nonimmediate_operand")
+	  (parallel [(const_int 0) (const_int 1)
+            (const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+  && reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
+  else
+    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_lo_<mode>_maskm"
+  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+	(vec_merge:<ssehalfvecmode>
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 0) (const_int 1)
+	      (const_int 2) (const_int 3)]))
+	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+	  (match_operand:QI 3 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode><mask_name>"
+  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V8FI 1 "nonimmediate_operand" "vm")
+	  (parallel [(const_int 0) (const_int 1)
+            (const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  if (<mask_applied>)
+    return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+  else
+    return "#";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "memory")
+      (if_then_else (match_test "MEM_P (operands[0])")
+	(const_string "store")
+	(const_string "none")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode>_maskm"
+  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+	(vec_merge:<ssehalfvecmode>
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 4) (const_int 5)
+	      (const_int 6) (const_int 7)]))
+	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+	  (match_operand:QI 3 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V8FI 1 "register_operand" "v")
+	  (parallel [(const_int 4) (const_int 5)
+            (const_int 6) (const_int 7)])))]
+  "TARGET_AVX512F"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "memory")
+      (if_then_else (match_test "MEM_P (operands[0])")
+	(const_string "store")
+	(const_string "none")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx_vextractf128<mode>"
+  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+   (match_operand:V_256 1 "register_operand")
+   (match_operand:SI 2 "const_0_to_1_operand")]
+  "TARGET_AVX"
+{
+  rtx (*insn)(rtx, rtx);
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      insn = gen_vec_extract_lo_<mode>;
+      break;
+    case 1:
+      insn = gen_vec_extract_hi_<mode>;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
+	  (parallel [(const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
+  else
+    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
+})
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:VI8F_256 1 "register_operand" "x,x")
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_AVX"
+  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
+})
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:VI4F_256 1 "register_operand" "x,x")
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX"
+  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn_and_split "vec_extract_lo_v32hi"
+  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V16HI
+	  (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], V16HImode, 0);
+})
+
+(define_insn "vec_extract_hi_v32hi"
+  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V16HI
+	  (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
+	  (parallel [(const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX512F"
+  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn_and_split "vec_extract_lo_v16hi"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V8HI
+	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], V8HImode, 0);
+})
+
+(define_insn "vec_extract_hi_v16hi"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V8HI
+	  (match_operand:V16HI 1 "register_operand" "x,x")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX"
+  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn_and_split "vec_extract_lo_v64qi"
+  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V32QI
+	  (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)
+		     (const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], V32QImode, 0);
+})
+
+(define_insn "vec_extract_hi_v64qi"
+  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V32QI
+	  (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
+	  (parallel [(const_int 32) (const_int 33)
+		     (const_int 34) (const_int 35)
+		     (const_int 36) (const_int 37)
+		     (const_int 38) (const_int 39)
+		     (const_int 40) (const_int 41)
+		     (const_int 42) (const_int 43)
+		     (const_int 44) (const_int 45)
+		     (const_int 46) (const_int 47)
+		     (const_int 48) (const_int 49)
+		     (const_int 50) (const_int 51)
+		     (const_int 52) (const_int 53)
+		     (const_int 54) (const_int 55)
+		     (const_int 56) (const_int 57)
+		     (const_int 58) (const_int 59)
+		     (const_int 60) (const_int 61)
+		     (const_int 62) (const_int 63)])))]
+  "TARGET_AVX512F"
+  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn_and_split "vec_extract_lo_v32qi"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V16QI
+	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], V16QImode, 0);
+})
+
+(define_insn "vec_extract_hi_v32qi"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V16QI
+	  (match_operand:V32QI 1 "register_operand" "x,x")
+	  (parallel [(const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX"
+  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+;; Modes handled by vec_extract patterns.
+(define_mode_iterator VEC_EXTRACT_MODE
+  [(V32QI "TARGET_AVX") V16QI
+   (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<ssescalarmode> 0 "register_operand")
+   (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "nonimmediate_operand" "v")
+	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_insn "avx_unpckhpd256"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_AVX"
+  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "vec_interleave_highv4df"
+  [(set (match_dup 3)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))
+   (set (match_dup 4)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 2) (const_int 3)
+		     (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V4DFmode);
+  operands[4] = gen_reg_rtx (V4DFmode);
+})
+
+
+(define_expand "vec_interleave_highv2df"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand")
+	    (match_operand:V2DF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2"
+{
+  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
+    operands[2] = force_reg (V2DFmode, operands[2]);
+})
+
+(define_insn "*vec_interleave_highv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
+  "@
+   unpckhpd\t{%2, %0|%0, %2}
+   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
+   %vmovddup\t{%H1, %0|%0, %H1}
+   movlpd\t{%H1, %0|%0, %H1}
+   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
+   %vmovhpd\t{%1, %0|%q0, %1}"
+  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
+   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_data16" "*,*,*,1,*,1")
+   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
+   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
+
+(define_expand "avx512f_movddup512<mask_name>"
+  [(set (match_operand:V8DF 0 "register_operand")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "nonimmediate_operand")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F")
+
+(define_expand "avx512f_unpcklpd512<mask_name>"
+  [(set (match_operand:V8DF 0 "register_operand")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "register_operand")
+	    (match_operand:V8DF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F")
+
+(define_insn "*avx512f_unpcklpd512<mask_name>"
+  [(set (match_operand:V8DF 0 "register_operand" "=v,v")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
+	    (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F"
+  "@
+   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
+   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_expand "avx_movddup256"
+  [(set (match_operand:V4DF 0 "register_operand")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "nonimmediate_operand")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_AVX")
+
+(define_expand "avx_unpcklpd256"
+  [(set (match_operand:V4DF 0 "register_operand")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand")
+	    (match_operand:V4DF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_AVX")
+
+(define_insn "*avx_unpcklpd256"
+  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_AVX"
+  "@
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "vec_interleave_lowv4df"
+  [(set (match_dup 3)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))
+   (set (match_dup 4)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 4) (const_int 5)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V4DFmode);
+  operands[4] = gen_reg_rtx (V4DFmode);
+})
+
+(define_expand "vec_interleave_lowv2df"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand")
+	    (match_operand:V2DF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2"
+{
+  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
+    operands[1] = force_reg (V2DFmode, operands[1]);
+})
+
+(define_insn "*vec_interleave_lowv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
+  "@
+   unpcklpd\t{%2, %0|%0, %2}
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   %vmovddup\t{%1, %0|%0, %q1}
+   movhpd\t{%2, %0|%0, %q2}
+   vmovhpd\t{%2, %1, %0|%0, %1, %q2}
+   %vmovlpd\t{%2, %H0|%H0, %2}"
+  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
+   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_data16" "*,*,*,1,*,1")
+   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
+   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "register_operand")
+	    (match_dup 1))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE3 && reload_completed"
+  [(const_int 0)]
+{
+  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V2DF 0 "register_operand")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "memory_operand")
+	    (match_dup 1))
+	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
+		     (match_operand:SI 3 "const_int_operand")])))]
+  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
+  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
+})
+
+(define_insn "avx512f_vmscalef<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
+	    UNSPEC_SCALEF)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<ssescalarmode>")])
+
+(define_insn "avx512f_scalef<mode><mask_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "register_operand" "v")
+	   (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
+	  UNSPEC_SCALEF))]
+  "TARGET_AVX512F"
+  "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<MODE>")])
+
+(define_expand "avx512f_vternlog<mode>_maskz"
+  [(match_operand:VI48_512 0 "register_operand")
+   (match_operand:VI48_512 1 "register_operand")
+   (match_operand:VI48_512 2 "register_operand")
+   (match_operand:VI48_512 3 "nonimmediate_operand")
+   (match_operand:SI 4 "const_0_to_255_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
+    operands[0], operands[1], operands[2], operands[3],
+    operands[4], CONST0_RTX (<MODE>mode), operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_vternlog<mode><sd_maskz_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(unspec:VI48_512
+	  [(match_operand:VI48_512 1 "register_operand" "0")
+	   (match_operand:VI48_512 2 "register_operand" "v")
+	   (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+	   (match_operand:SI 4 "const_0_to_255_operand")]
+	  UNSPEC_VTERNLOG))]
+  "TARGET_AVX512F"
+  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vternlog<mode>_mask"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(vec_merge:VI48_512
+	  (unspec:VI48_512
+	    [(match_operand:VI48_512 1 "register_operand" "0")
+	     (match_operand:VI48_512 2 "register_operand" "v")
+	     (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+	     (match_operand:SI 4 "const_0_to_255_operand")]
+	    UNSPEC_VTERNLOG)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+        (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+                        UNSPEC_GETEXP))]
+   "TARGET_AVX512F"
+   "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
+    [(set_attr "prefix" "evex")
+     (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	    UNSPEC_GETEXP)
+	  (match_dup 1)
+	  (const_int 1)))]
+   "TARGET_AVX512F"
+   "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
+    [(set_attr "prefix" "evex")
+     (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+        (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
+			  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+			  (match_operand:SI 3 "const_0_to_255_operand")]
+			 UNSPEC_ALIGN))]
+  "TARGET_AVX512F"
+  "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_shufps512_mask"
+  [(match_operand:V16SF 0 "register_operand")
+   (match_operand:V16SF 1 "register_operand")
+   (match_operand:V16SF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V16SF 4 "register_operand")
+   (match_operand:HI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
+					  GEN_INT ((mask >> 0) & 3),
+					  GEN_INT ((mask >> 2) & 3),
+					  GEN_INT (((mask >> 4) & 3) + 16),
+					  GEN_INT (((mask >> 6) & 3) + 16),
+					  GEN_INT (((mask >> 0) & 3) + 4),
+					  GEN_INT (((mask >> 2) & 3) + 4),
+					  GEN_INT (((mask >> 4) & 3) + 20),
+					  GEN_INT (((mask >> 6) & 3) + 20),
+					  GEN_INT (((mask >> 0) & 3) + 8),
+					  GEN_INT (((mask >> 2) & 3) + 8),
+					  GEN_INT (((mask >> 4) & 3) + 24),
+					  GEN_INT (((mask >> 6) & 3) + 24),
+					  GEN_INT (((mask >> 0) & 3) + 12),
+					  GEN_INT (((mask >> 2) & 3) + 12),
+					  GEN_INT (((mask >> 4) & 3) + 28),
+					  GEN_INT (((mask >> 6) & 3) + 28),
+					  operands[4], operands[5]));
+  DONE;
+})
+
+
+(define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
+  [(match_operand:VF_512 0 "register_operand")
+   (match_operand:VF_512 1 "register_operand")
+   (match_operand:VF_512 2 "register_operand")
+   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
+   (match_operand:SI 4 "const_0_to_255_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
+	operands[0], operands[1], operands[2], operands[3],
+	operands[4], CONST0_RTX (<MODE>mode), operands[5]
+	<round_saeonly_expand_operand6>));
+  DONE;
+})
+
+(define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+        (unspec:VF_512
+          [(match_operand:VF_512 1 "register_operand" "0")
+	   (match_operand:VF_512 2 "register_operand" "v")
+           (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+           (match_operand:SI 4 "const_0_to_255_operand")]
+           UNSPEC_FIXUPIMM))]
+  "TARGET_AVX512F"
+  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+          (unspec:VF_512
+            [(match_operand:VF_512 1 "register_operand" "0")
+	     (match_operand:VF_512 2 "register_operand" "v")
+             (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+             (match_operand:SI 4 "const_0_to_255_operand")]
+             UNSPEC_FIXUPIMM)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
+  [(match_operand:VF_128 0 "register_operand")
+   (match_operand:VF_128 1 "register_operand")
+   (match_operand:VF_128 2 "register_operand")
+   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
+   (match_operand:SI 4 "const_0_to_255_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
+	operands[0], operands[1], operands[2], operands[3],
+	operands[4], CONST0_RTX (<MODE>mode), operands[5]
+	<round_saeonly_expand_operand6>));
+  DONE;
+})
+
+(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+          (unspec:VF_128
+            [(match_operand:VF_128 1 "register_operand" "0")
+	     (match_operand:VF_128 2 "register_operand" "v")
+	     (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	     (match_operand:SI 4 "const_0_to_255_operand")]
+	    UNSPEC_FIXUPIMM)
+	  (match_dup 1)
+	  (const_int 1)))]
+   "TARGET_AVX512F"
+   "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
+   [(set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (unspec:VF_128
+	       [(match_operand:VF_128 1 "register_operand" "0")
+		(match_operand:VF_128 2 "register_operand" "v")
+		(match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+		(match_operand:SI 4 "const_0_to_255_operand")]
+	       UNSPEC_FIXUPIMM)
+	    (match_dup 1)
+	    (const_int 1))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	   (match_operand:SI 2 "const_0_to_255_operand")]
+	  UNSPEC_ROUND))]
+  "TARGET_AVX512F"
+  "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	     (match_operand:SI 3 "const_0_to_255_operand")]
+	    UNSPEC_ROUND)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx512f_shufps512_1<mask_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "register_operand" "v")
+	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3  "const_0_to_3_operand")
+		     (match_operand 4  "const_0_to_3_operand")
+		     (match_operand 5  "const_16_to_19_operand")
+		     (match_operand 6  "const_16_to_19_operand")
+		     (match_operand 7  "const_4_to_7_operand")
+		     (match_operand 8  "const_4_to_7_operand")
+		     (match_operand 9  "const_20_to_23_operand")
+		     (match_operand 10  "const_20_to_23_operand")
+		     (match_operand 11  "const_8_to_11_operand")
+		     (match_operand 12  "const_8_to_11_operand")
+		     (match_operand 13  "const_24_to_27_operand")
+		     (match_operand 14  "const_24_to_27_operand")
+		     (match_operand 15  "const_12_to_15_operand")
+		     (match_operand 16  "const_12_to_15_operand")
+		     (match_operand 17  "const_28_to_31_operand")
+		     (match_operand 18  "const_28_to_31_operand")])))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
+       && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
+       && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
+       && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
+       && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
+       && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
+       && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
+       && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
+       && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 16) << 4;
+  mask |= (INTVAL (operands[6]) - 16) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+(define_expand "avx512f_shufpd512_mask"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V8DF 1 "register_operand")
+   (match_operand:V8DF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V8DF 4 "register_operand")
+   (match_operand:QI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
+					GEN_INT (mask & 1),
+					GEN_INT (mask & 2 ? 9 : 8),
+					GEN_INT (mask & 4 ? 3 : 2),
+					GEN_INT (mask & 8 ? 11 : 10),
+					GEN_INT (mask & 16 ? 5 : 4),
+					GEN_INT (mask & 32 ? 13 : 12),
+					GEN_INT (mask & 64 ? 7 : 6),
+					GEN_INT (mask & 128 ? 15 : 14),
+					operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_shufpd512_1<mask_name>"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "register_operand" "v")
+	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand")
+		     (match_operand 4 "const_8_to_9_operand")
+		     (match_operand 5 "const_2_to_3_operand")
+		     (match_operand 6 "const_10_to_11_operand")
+		     (match_operand 7 "const_4_to_5_operand")
+		     (match_operand 8 "const_12_to_13_operand")
+		     (match_operand 9 "const_6_to_7_operand")
+		     (match_operand 10 "const_14_to_15_operand")])))]
+  "TARGET_AVX512F"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 8) << 1;
+  mask |= (INTVAL (operands[5]) - 2) << 2;
+  mask |= (INTVAL (operands[6]) - 10) << 3;
+  mask |= (INTVAL (operands[7]) - 4) << 4;
+  mask |= (INTVAL (operands[8]) - 12) << 5;
+  mask |= (INTVAL (operands[9]) - 6) << 6;
+  mask |= (INTVAL (operands[10]) - 14) << 7;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+(define_expand "avx_shufpd256"
+  [(match_operand:V4DF 0 "register_operand")
+   (match_operand:V4DF 1 "register_operand")
+   (match_operand:V4DF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
+				   GEN_INT (mask & 1),
+				   GEN_INT (mask & 2 ? 5 : 4),
+				   GEN_INT (mask & 4 ? 3 : 2),
+				   GEN_INT (mask & 8 ? 7 : 6)));
+  DONE;
+})
+
+(define_insn "avx_shufpd256_1"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand")
+		     (match_operand 4 "const_4_to_5_operand")
+		     (match_operand 5 "const_2_to_3_operand")
+		     (match_operand 6 "const_6_to_7_operand")])))]
+  "TARGET_AVX"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 4) << 1;
+  mask |= (INTVAL (operands[5]) - 2) << 2;
+  mask |= (INTVAL (operands[6]) - 6) << 3;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sseshuf")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "sse2_shufpd"
+  [(match_operand:V2DF 0 "register_operand")
+   (match_operand:V2DF 1 "register_operand")
+   (match_operand:V2DF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
+				GEN_INT (mask & 1),
+				GEN_INT (mask & 2 ? 3 : 2)));
+  DONE;
+})
+
+;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "avx2_interleave_highv4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(vec_select:V4DI
+	  (vec_concat:V8DI
+	    (match_operand:V4DI 1 "register_operand" "x")
+	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 1)
+		     (const_int 5)
+		     (const_int 3)
+		     (const_int 7)])))]
+  "TARGET_AVX2"
+  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(vec_select:V8DI
+	  (vec_concat:V16DI
+	    (match_operand:V8DI 1 "register_operand" "v")
+	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "vec_interleave_highv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "0,x")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2"
+  "@
+   punpckhqdq\t{%2, %0|%0, %2}
+   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_interleave_lowv4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(vec_select:V4DI
+	  (vec_concat:V8DI
+	    (match_operand:V4DI 1 "register_operand" "x")
+	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0)
+		     (const_int 4)
+		     (const_int 2)
+		     (const_int 6)])))]
+  "TARGET_AVX2"
+  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(vec_select:V8DI
+	  (vec_concat:V16DI
+	    (match_operand:V8DI 1 "register_operand" "v")
+	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F"
+  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "vec_interleave_lowv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "0,x")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2"
+  "@
+   punpcklqdq\t{%2, %0|%0, %2}
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_shufpd_<mode>"
+  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
+	(vec_select:VI8F_128
+	  (vec_concat:<ssedoublevecmode>
+	    (match_operand:VI8F_128 1 "register_operand" "0,x")
+	    (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand")
+		     (match_operand 4 "const_2_to_3_operand")])))]
+  "TARGET_SSE2"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 2) << 1;
+  operands[3] = GEN_INT (mask);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+    case 1:
+      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseshuf")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_storehpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   %vmovhpd\t{%1, %0|%0, %1}
+   unpckhpd\t%0, %0
+   vunpckhpd\t{%d1, %0|%0, %d1}
+   #
+   #
+   #"
+  [(set_attr "isa" "*,noavx,avx,*,*,*")
+   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (not (match_test "TARGET_AVX")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
+   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "memory_operand")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = adjust_address (operands[1], DFmode, 8);")
+
+(define_insn "*vec_extractv2df_1_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 1)])))]
+  "!TARGET_SSE2 && TARGET_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhps\t{%1, %0|%q0, %1}
+   movhlps\t{%1, %0|%0, %1}
+   movlps\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_storelpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   %vmovlpd\t{%1, %0|%0, %1}
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
+   (set_attr "prefix_data16" "1,*,*,*,*")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], DFmode, 0);
+})
+
+(define_insn "*vec_extractv2df_0_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+	  (parallel [(const_int 0)])))]
+  "!TARGET_SSE2 && TARGET_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse2_loadhpd_exp"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand")
+	(vec_concat:V2DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand")
+	    (parallel [(const_int 0)]))
+	  (match_operand:DF 2 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
+
+  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_loadhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"
+	  "=x,x,x,x,o,o ,o")
+	(vec_concat:V2DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand"
+	  " 0,x,0,x,0,0 ,0")
+	    (parallel [(const_int 0)]))
+	  (match_operand:DF 2 "nonimmediate_operand"
+	  " m,m,x,x,x,*f,r")))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhpd\t{%2, %0|%0, %2}
+   vmovhpd\t{%2, %1, %0|%0, %1, %2}
+   unpcklpd\t{%2, %0|%0, %2}
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   #
+   #
+   #"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
+   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_data16" "1,*,*,*,*,*,*")
+   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
+   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand")
+	(vec_concat:V2DF
+	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+	  (match_operand:DF 1 "register_operand")))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = adjust_address (operands[0], DFmode, 8);")
+
+(define_expand "sse2_loadlpd_exp"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand")
+	(vec_concat:V2DF
+	  (match_operand:DF 2 "nonimmediate_operand")
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand")
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE2"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
+
+  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_loadlpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"
+	  "=x,x,x,x,x,x,x,x,m,m ,m")
+	(vec_concat:V2DF
+	  (match_operand:DF 2 "nonimmediate_operand"
+	  " m,m,m,x,x,0,0,x,x,*f,r")
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "vector_move_operand"
+	  " C,0,x,0,x,x,o,o,0,0 ,0")
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   %vmovsd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   vmovlpd\t{%2, %1, %0|%0, %1, %2}
+   movsd\t{%2, %0|%0, %2}
+   vmovsd\t{%2, %1, %0|%0, %1, %2}
+   shufpd\t{$2, %1, %0|%0, %1, 2}
+   movhpd\t{%H1, %0|%0, %H1}
+   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
+   #
+   #
+   #"
+  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "5")
+	      (const_string "sselog")
+	    (eq_attr "alternative" "9")
+	      (const_string "fmov")
+	    (eq_attr "alternative" "10")
+	      (const_string "imov")
+	   ]
+	   (const_string "ssemov")))
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
+   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
+   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
+   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand")
+	(vec_concat:V2DF
+	  (match_operand:DF 1 "register_operand")
+	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = adjust_address (operands[0], DFmode, 0);")
+
+(define_insn "sse2_movsd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
+	(vec_merge:V2DF
+	  (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   movsd\t{%2, %0|%0, %2}
+   vmovsd\t{%2, %1, %0|%0, %1, %2}
+   movlpd\t{%2, %0|%0, %q2}
+   vmovlpd\t{%2, %1, %0|%0, %1, %q2}
+   %vmovlpd\t{%2, %0|%q0, %2}
+   shufpd\t{$2, %1, %0|%0, %1, 2}
+   movhps\t{%H1, %0|%0, %H1}
+   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
+   %vmovhps\t{%1, %H0|%H0, %1}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
+   (set (attr "type")
+     (if_then_else
+       (eq_attr "alternative" "5")
+       (const_string "sselog")
+       (const_string "ssemov")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "2,4")
+	    (not (match_test "TARGET_AVX")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
+   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
+
+(define_insn "vec_dupv2df"
+  [(set (match_operand:V2DF 0 "register_operand"     "=x,x")
+	(vec_duplicate:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
+  "TARGET_SSE2"
+  "@
+   unpcklpd\t%0, %0
+   %vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,sse3")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "V2DF,DF")])
+
+(define_insn "*vec_concatv2df"
+  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x,x,x,x,x,x")
+	(vec_concat:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
+	  (match_operand:DF 2 "vector_move_operand"  " x,x,1,m,m,C,x,m")))]
+  "TARGET_SSE"
+  "@
+   unpcklpd\t{%2, %0|%0, %2}
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   %vmovddup\t{%1, %0|%0, %1}
+   movhpd\t{%2, %0|%0, %2}
+   vmovhpd\t{%2, %1, %0|%0, %1, %2}
+   %vmovsd\t{%1, %0|%0, %1}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
+   (set (attr "type")
+     (if_then_else
+       (eq_attr "alternative" "0,1,2")
+       (const_string "sselog")
+       (const_string "ssemov")))
+   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
+   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
+   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integer down-conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
+(define_mode_attr pmov_src_mode
+  [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
+(define_mode_attr pmov_src_lower
+  [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
+(define_mode_attr pmov_suff
+  [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
+
+(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
+  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+	(any_truncate:PMOV_DST_MODE
+	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
+  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+    (vec_merge:PMOV_DST_MODE
+      (any_truncate:PMOV_DST_MODE
+        (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
+      (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
+  [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
+    (vec_merge:PMOV_DST_MODE
+      (any_truncate:PMOV_DST_MODE
+        (match_operand:<pmov_src_mode> 1 "register_operand"))
+      (match_dup 0)
+      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+  "TARGET_AVX512F")
+
+(define_insn "*avx512f_<code>v8div16qi2"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(vec_concat:V16QI
+	  (any_truncate:V8QI
+	    (match_operand:V8DI 1 "register_operand" "v"))
+	  (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)])))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx512f_<code>v8div16qi2_store"
+  [(set (match_operand:V16QI 0 "memory_operand" "=m")
+	(vec_concat:V16QI
+	  (any_truncate:V8QI
+	    (match_operand:V8DI 1 "register_operand" "v"))
+	  (vec_select:V8QI
+	    (match_dup 0)
+	    (parallel [(const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v8div16qi2_mask"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+    (vec_concat:V16QI
+      (vec_merge:V8QI
+        (any_truncate:V8QI
+          (match_operand:V8DI 1 "register_operand" "v"))
+        (vec_select:V8QI
+          (match_operand:V16QI 2 "vector_move_operand" "0C")
+          (parallel [(const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)]))
+        (match_operand:QI 3 "register_operand" "Yk"))
+      (const_vector:V8QI [(const_int 0) (const_int 0)
+                          (const_int 0) (const_int 0)
+                          (const_int 0) (const_int 0)
+                          (const_int 0) (const_int 0)])))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v8div16qi2_mask_store"
+  [(set (match_operand:V16QI 0 "memory_operand" "=m")
+    (vec_concat:V16QI
+      (vec_merge:V8QI
+        (any_truncate:V8QI
+          (match_operand:V8DI 1 "register_operand" "v"))
+        (vec_select:V8QI
+          (match_dup 0)
+          (parallel [(const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)]))
+        (match_operand:QI 2 "register_operand" "Yk"))
+      (vec_select:V8QI
+        (match_dup 0)
+        (parallel [(const_int 8) (const_int 9)
+                   (const_int 10) (const_int 11)
+                   (const_int 12) (const_int 13)
+                   (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:VI_AVX2 0 "register_operand")
+	(minus:VI_AVX2
+	  (match_dup 2)
+	  (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
+(define_expand "<plusminus_insn><mode>3<mask_name>"
+  [(set (match_operand:VI_AVX2 0 "register_operand")
+	(plusminus:VI_AVX2
+	  (match_operand:VI_AVX2 1 "nonimmediate_operand")
+	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<plusminus_insn><mode>3<mask_name>"
+  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
+	(plusminus:VI_AVX2
+	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
+	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
+  "@
+   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
+   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
+  [(set (match_operand:VI12_AVX2 0 "register_operand")
+	(sat_plusminus:VI12_AVX2
+	  (match_operand:VI12_AVX2 1 "nonimmediate_operand")
+	  (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
+  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
+	(sat_plusminus:VI12_AVX2
+	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
+	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "@
+   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
+   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VI1_AVX2 0 "register_operand")
+	(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
+		       (match_operand:VI1_AVX2 2 "register_operand")))]
+  "TARGET_SSE2"
+{
+  ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VI2_AVX2 0 "register_operand")
+	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
+		       (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*mul<mode>3"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
+		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "@
+   pmullw\t{%2, %0|%0, %2}
+   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<s>mul<mode>3_highpart"
+  [(set (match_operand:VI2_AVX2 0 "register_operand")
+	(truncate:VI2_AVX2
+	  (lshiftrt:<ssedoublemode>
+	    (mult:<ssedoublemode>
+	      (any_extend:<ssedoublemode>
+		(match_operand:VI2_AVX2 1 "nonimmediate_operand"))
+	      (any_extend:<ssedoublemode>
+		(match_operand:VI2_AVX2 2 "nonimmediate_operand")))
+	    (const_int 16))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*<s>mul<mode>3_highpart"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+	(truncate:VI2_AVX2
+	  (lshiftrt:<ssedoublemode>
+	    (mult:<ssedoublemode>
+	      (any_extend:<ssedoublemode>
+		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+	      (any_extend:<ssedoublemode>
+		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+	    (const_int 16))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "@
+   pmulh<u>w\t{%2, %0|%0, %2}
+   vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "vec_widen_umult_even_v16si<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand")
+        (mult:V8DI
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
+
+(define_insn "*vec_widen_umult_even_v16si<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+        (mult:V8DI
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
+  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "isa" "avx512f")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_expand "vec_widen_umult_even_v8si"
+  [(set (match_operand:V4DI 0 "register_operand")
+	(mult:V4DI
+	  (zero_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 1 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))
+	  (zero_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 2 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))))]
+  "TARGET_AVX2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
+
+(define_insn "*vec_widen_umult_even_v8si"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(mult:V4DI
+	  (zero_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))
+	  (zero_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))))]
+  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
+  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "vec_widen_umult_even_v4si"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(mult:V2DI
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*vec_widen_umult_even_v4si"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(mult:V2DI
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "vec_widen_smult_even_v16si<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand")
+        (mult:V8DI
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
+
+(define_insn "*vec_widen_smult_even_v16si<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+        (mult:V8DI
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
+  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "isa" "avx512f")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_expand "vec_widen_smult_even_v8si"
+  [(set (match_operand:V4DI 0 "register_operand")
+	(mult:V4DI
+	  (sign_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 1 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))
+	  (sign_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 2 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))))]
+  "TARGET_AVX2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
+
+(define_insn "*vec_widen_smult_even_v8si"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(mult:V4DI
+	  (sign_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 1 "nonimmediate_operand" "x")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))
+	  (sign_extend:V4DI
+	    (vec_select:V4SI
+	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))))]
+  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
+  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse4_1_mulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE4_1"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*sse4_1_mulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "@
+   pmuldq\t{%2, %0|%0, %2}
+   vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx2_pmaddwd"
+  [(set (match_operand:V8SI 0 "register_operand")
+	(plus:V8SI
+	  (mult:V8SI
+	    (sign_extend:V8SI
+	      (vec_select:V8HI
+		(match_operand:V16HI 1 "nonimmediate_operand")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)])))
+	    (sign_extend:V8SI
+	      (vec_select:V8HI
+		(match_operand:V16HI 2 "nonimmediate_operand")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)]))))
+	  (mult:V8SI
+	    (sign_extend:V8SI
+	      (vec_select:V8HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)])))
+	    (sign_extend:V8SI
+	      (vec_select:V8HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)]))))))]
+  "TARGET_AVX2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
+
+(define_insn "*avx2_pmaddwd"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(plus:V8SI
+	  (mult:V8SI
+	    (sign_extend:V8SI
+	      (vec_select:V8HI
+		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)])))
+	    (sign_extend:V8SI
+	      (vec_select:V8HI
+		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)]))))
+	  (mult:V8SI
+	    (sign_extend:V8SI
+	      (vec_select:V8HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)])))
+	    (sign_extend:V8SI
+	      (vec_select:V8HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)]))))))]
+  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
+  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand")
+	(plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "nonimmediate_operand")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "nonimmediate_operand")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)]))))
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)]))))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*sse2_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)]))))
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)]))))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "@
+   pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "mul<mode>3<mask_name>"
+  [(set (match_operand:VI4_AVX512F 0 "register_operand")
+	(mult:VI4_AVX512F
+	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
+	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+{
+  if (TARGET_SSE4_1)
+    {
+      if (!nonimmediate_operand (operands[1], <MODE>mode))
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      if (!nonimmediate_operand (operands[2], <MODE>mode))
+	operands[2] = force_reg (<MODE>mode, operands[2]);
+      ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
+    }
+  else
+    {
+      ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
+      DONE;
+    }
+})
+
+(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
+  [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
+	(mult:VI4_AVX512F
+	  (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
+  "@
+   pmulld\t{%2, %0|%0, %2}
+   vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "btver2_decode" "vector,vector")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
+	(mult:VI8_AVX2_AVX512F
+	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
+	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "vec_widen_<s>mult_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (any_extend:<sseunpackmode>
+     (match_operand:VI124_AVX2 1 "register_operand"))
+   (match_operand:VI124_AVX2 2 "register_operand")]
+  "TARGET_SSE2"
+{
+  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
+			      <u_bool>, true);
+  DONE;
+})
+
+(define_expand "vec_widen_<s>mult_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (any_extend:<sseunpackmode>
+     (match_operand:VI124_AVX2 1 "register_operand"))
+   (match_operand:VI124_AVX2 2 "register_operand")]
+  "TARGET_SSE2"
+{
+  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
+			      <u_bool>, false);
+  DONE;
+})
+
+;; Most widen_<s>mult_even_<mode> can be handled directly from other
+;; named patterns, but signed V4SI needs special help for plain SSE2.
+(define_expand "vec_widen_smult_even_v4si"
+  [(match_operand:V2DI 0 "register_operand")
+   (match_operand:V4SI 1 "nonimmediate_operand")
+   (match_operand:V4SI 2 "nonimmediate_operand")]
+  "TARGET_SSE2"
+{
+  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
+				 false, false);
+  DONE;
+})
+
+(define_expand "vec_widen_<s>mult_odd_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (any_extend:<sseunpackmode>
+     (match_operand:VI4_AVX512F 1 "general_vector_operand"))
+   (match_operand:VI4_AVX512F 2 "general_vector_operand")]
+  "TARGET_SSE2"
+{
+  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
+				 <u_bool>, true);
+  DONE;
+})
+
+(define_expand "sdot_prod<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (match_operand:VI2_AVX2 1 "register_operand")
+   (match_operand:VI2_AVX2 2 "register_operand")
+   (match_operand:<sseunpackmode> 3 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx t = gen_reg_rtx (<sseunpackmode>mode);
+  emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_PLUS (<sseunpackmode>mode,
+					operands[3], t)));
+  DONE;
+})
+
+;; Normally we use widen_mul_even/odd, but combine can't quite get it all
+;; back together when madd is available.
+(define_expand "sdot_prodv4si"
+  [(match_operand:V2DI 0 "register_operand")
+   (match_operand:V4SI 1 "register_operand")
+   (match_operand:V4SI 2 "register_operand")
+   (match_operand:V2DI 3 "register_operand")]
+  "TARGET_XOP"
+{
+  rtx t = gen_reg_rtx (V2DImode);
+  emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
+  emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
+  DONE;
+})
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+	(ashiftrt:VI24_AVX2
+	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
+	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
+  "TARGET_SSE2"
+  "@
+   psra<ssemodesuffix>\t{%2, %0|%0, %2}
+   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ashr<mode>3<mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
+	(ashiftrt:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
+	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+  "TARGET_AVX512F && <mask_mode512bit_condition>"
+  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<shift_insn><mode>3"
+  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+	(any_lshift:VI248_AVX2
+	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
+	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
+  "TARGET_SSE2"
+  "@
+   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
+   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<shift_insn><mode>3<mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
+	(any_lshift:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
+	  (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
+  "TARGET_AVX512F && <mask_mode512bit_condition>"
+  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "isa" "avx512f")
+   (set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_dup 3)
+	(ashift:V1TI
+	 (match_operand:VI_128 1 "register_operand")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
+   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
+  "TARGET_SSE2"
+{
+  operands[1] = gen_lowpart (V1TImode, operands[1]);
+  operands[3] = gen_reg_rtx (V1TImode);
+  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
+})
+
+(define_insn "<sse2_avx2>_ashl<mode>3"
+  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+	(ashift:VIMAX_AVX2
+	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "pslldq\t{%2, %0|%0, %2}";
+    case 1:
+      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_dup 3)
+	(lshiftrt:V1TI
+	 (match_operand:VI_128 1 "register_operand")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
+   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
+  "TARGET_SSE2"
+{
+  operands[1] = gen_lowpart (V1TImode, operands[1]);
+  operands[3] = gen_reg_rtx (V1TImode);
+  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
+})
+
+(define_insn "<sse2_avx2>_lshr<mode>3"
+  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+	(lshiftrt:VIMAX_AVX2
+	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "psrldq\t{%2, %0|%0, %2}";
+    case 1:
+      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "atom_unit" "sishuf")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_<rotate>v<mode><mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(any_rotate:VI48_512
+	  (match_operand:VI48_512 1 "register_operand" "v")
+	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_<rotate><mode><mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(any_rotate:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
+	  (match_operand:SI 2 "const_0_to_255_operand")))]
+  "TARGET_AVX512F"
+  "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<code><mode>3<mask_name><round_name>"
+  [(set (match_operand:VI124_256_48_512 0 "register_operand")
+	(maxmin:VI124_256_48_512
+	  (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
+	  (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
+  "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx2_<code><mode>3<mask_name><round_name>"
+  [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
+	(maxmin:VI124_256_48_512
+	  (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
+	  (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
+  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "OI")])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VI8_AVX2 0 "register_operand")
+	(maxmin:VI8_AVX2
+	  (match_operand:VI8_AVX2 1 "register_operand")
+	  (match_operand:VI8_AVX2 2 "register_operand")))]
+  "TARGET_SSE4_2"
+{
+  enum rtx_code code;
+  rtx xops[6];
+  bool ok;
+
+  xops[0] = operands[0];
+
+  if (<CODE> == SMAX || <CODE> == UMAX)
+    {
+      xops[1] = operands[1];
+      xops[2] = operands[2];
+    }
+  else
+    {
+      xops[1] = operands[2];
+      xops[2] = operands[1];
+    }
+
+  code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
+
+  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+  xops[4] = operands[1];
+  xops[5] = operands[2];
+
+  ok = ix86_expand_int_vcond (xops);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VI124_128 0 "register_operand")
+	(smaxmin:VI124_128
+	  (match_operand:VI124_128 1 "nonimmediate_operand")
+	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
+    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      xops[0] = operands[0];
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+
+      if (<CODE> == SMAX)
+	{
+	  xops[1] = operands[1];
+	  xops[2] = operands[2];
+	}
+      else
+	{
+	  xops[1] = operands[2];
+	  xops[2] = operands[1];
+	}
+
+      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
+(define_insn "*sse4_1_<code><mode>3"
+  [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
+	(smaxmin:VI14_128
+	  (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "@
+   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
+   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*<code>v8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(smaxmin:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
+  "@
+   p<maxmin_int>w\t{%2, %0|%0, %2}
+   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "*,1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VI124_128 0 "register_operand")
+	(umaxmin:VI124_128
+	  (match_operand:VI124_128 1 "nonimmediate_operand")
+	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
+    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
+    {
+      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      if (rtx_equal_p (op3, op2))
+	op3 = gen_reg_rtx (V8HImode);
+      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
+      emit_insn (gen_addv8hi3 (op0, op3, op2));
+      DONE;
+    }
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+
+      xops[0] = operands[0];
+
+      if (<CODE> == UMAX)
+	{
+	  xops[1] = operands[1];
+	  xops[2] = operands[2];
+	}
+      else
+	{
+	  xops[1] = operands[2];
+	  xops[2] = operands[1];
+	}
+
+      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
+(define_insn "*sse4_1_<code><mode>3"
+  [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
+	(umaxmin:VI24_128
+	  (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "@
+   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
+   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*<code>v16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(umaxmin:V16QI
+	  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
+  "@
+   p<maxmin_int>b\t{%2, %0|%0, %2}
+   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "*,1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "avx2_eq<mode>3"
+  [(set (match_operand:VI_256 0 "register_operand")
+	(eq:VI_256
+	  (match_operand:VI_256 1 "nonimmediate_operand")
+	  (match_operand:VI_256 2 "nonimmediate_operand")))]
+  "TARGET_AVX2"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*avx2_eq<mode>3"
+  [(set (match_operand:VI_256 0 "register_operand" "=x")
+	(eq:VI_256
+	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
+	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand")
+	   (match_operand:VI48_512 2 "nonimmediate_operand")]
+	  UNSPEC_MASKED_EQ))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand" "%v")
+	   (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+	  UNSPEC_MASKED_EQ))]
+  "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*sse4_1_eqv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(eq:V2DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
+  "@
+   pcmpeqq\t{%2, %0|%0, %2}
+   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_eq<mode>3"
+  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
+	(eq:VI124_128
+	  (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
+	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE2 && !TARGET_XOP
+   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "@
+   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
+   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_eq<mode>3"
+  [(set (match_operand:VI124_128 0 "register_operand")
+	(eq:VI124_128
+	  (match_operand:VI124_128 1 "nonimmediate_operand")
+	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
+  "TARGET_SSE2 && !TARGET_XOP "
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_expand "sse4_1_eqv2di3"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(eq:V2DI
+	  (match_operand:V2DI 1 "nonimmediate_operand")
+	  (match_operand:V2DI 2 "nonimmediate_operand")))]
+  "TARGET_SSE4_1"
+  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
+
+(define_insn "sse4_2_gtv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(gt:V2DI
+	  (match_operand:V2DI 1 "register_operand" "0,x")
+	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE4_2"
+  "@
+   pcmpgtq\t{%2, %0|%0, %2}
+   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_gt<mode>3"
+  [(set (match_operand:VI_256 0 "register_operand" "=x")
+	(gt:VI_256
+	  (match_operand:VI_256 1 "register_operand" "x")
+	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX2"
+  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand" "v")
+	   (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
+  "TARGET_AVX512F"
+  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse2_gt<mode>3"
+  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
+	(gt:VI124_128
+	  (match_operand:VI124_128 1 "register_operand" "0,x")
+	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
+  "TARGET_SSE2 && !TARGET_XOP"
+  "@
+   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
+   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "vcond<V_512:mode><VI_512:mode>"
+  [(set (match_operand:V_512 0 "register_operand")
+	(if_then_else:V_512
+	  (match_operator 3 ""
+	    [(match_operand:VI_512 4 "nonimmediate_operand")
+	     (match_operand:VI_512 5 "general_operand")])
+	  (match_operand:V_512 1)
+	  (match_operand:V_512 2)))]
+  "TARGET_AVX512F
+   && (GET_MODE_NUNITS (<V_512:MODE>mode)
+       == GET_MODE_NUNITS (<VI_512:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<V_256:mode><VI_256:mode>"
+  [(set (match_operand:V_256 0 "register_operand")
+	(if_then_else:V_256
+	  (match_operator 3 ""
+	    [(match_operand:VI_256 4 "nonimmediate_operand")
+	     (match_operand:VI_256 5 "general_operand")])
+	  (match_operand:V_256 1)
+	  (match_operand:V_256 2)))]
+  "TARGET_AVX2
+   && (GET_MODE_NUNITS (<V_256:MODE>mode)
+       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<V_128:mode><VI124_128:mode>"
+  [(set (match_operand:V_128 0 "register_operand")
+	(if_then_else:V_128
+	  (match_operator 3 ""
+	    [(match_operand:VI124_128 4 "nonimmediate_operand")
+	     (match_operand:VI124_128 5 "general_operand")])
+	  (match_operand:V_128 1)
+	  (match_operand:V_128 2)))]
+  "TARGET_SSE2
+   && (GET_MODE_NUNITS (<V_128:MODE>mode)
+       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<VI8F_128:mode>v2di"
+  [(set (match_operand:VI8F_128 0 "register_operand")
+	(if_then_else:VI8F_128
+	  (match_operator 3 ""
+	    [(match_operand:V2DI 4 "nonimmediate_operand")
+	     (match_operand:V2DI 5 "general_operand")])
+	  (match_operand:VI8F_128 1)
+	  (match_operand:VI8F_128 2)))]
+  "TARGET_SSE4_2"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<V_512:mode><VI_512:mode>"
+  [(set (match_operand:V_512 0 "register_operand")
+	(if_then_else:V_512
+	  (match_operator 3 ""
+	    [(match_operand:VI_512 4 "nonimmediate_operand")
+	     (match_operand:VI_512 5 "nonimmediate_operand")])
+	  (match_operand:V_512 1 "general_operand")
+	  (match_operand:V_512 2 "general_operand")))]
+  "TARGET_AVX512F
+   && (GET_MODE_NUNITS (<V_512:MODE>mode)
+       == GET_MODE_NUNITS (<VI_512:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<V_256:mode><VI_256:mode>"
+  [(set (match_operand:V_256 0 "register_operand")
+	(if_then_else:V_256
+	  (match_operator 3 ""
+	    [(match_operand:VI_256 4 "nonimmediate_operand")
+	     (match_operand:VI_256 5 "nonimmediate_operand")])
+	  (match_operand:V_256 1 "general_operand")
+	  (match_operand:V_256 2 "general_operand")))]
+  "TARGET_AVX2
+   && (GET_MODE_NUNITS (<V_256:MODE>mode)
+       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<V_128:mode><VI124_128:mode>"
+  [(set (match_operand:V_128 0 "register_operand")
+	(if_then_else:V_128
+	  (match_operator 3 ""
+	    [(match_operand:VI124_128 4 "nonimmediate_operand")
+	     (match_operand:VI124_128 5 "nonimmediate_operand")])
+	  (match_operand:V_128 1 "general_operand")
+	  (match_operand:V_128 2 "general_operand")))]
+  "TARGET_SSE2
+   && (GET_MODE_NUNITS (<V_128:MODE>mode)
+       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<VI8F_128:mode>v2di"
+  [(set (match_operand:VI8F_128 0 "register_operand")
+	(if_then_else:VI8F_128
+	  (match_operator 3 ""
+	    [(match_operand:V2DI 4 "nonimmediate_operand")
+	     (match_operand:V2DI 5 "nonimmediate_operand")])
+	  (match_operand:VI8F_128 1 "general_operand")
+	  (match_operand:VI8F_128 2 "general_operand")))]
+  "TARGET_SSE4_2"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_mode_iterator VEC_PERM_AVX2
+  [V16QI V8HI V4SI V2DI V4SF V2DF
+   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
+   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
+   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+
+(define_expand "vec_perm<mode>"
+  [(match_operand:VEC_PERM_AVX2 0 "register_operand")
+   (match_operand:VEC_PERM_AVX2 1 "register_operand")
+   (match_operand:VEC_PERM_AVX2 2 "register_operand")
+   (match_operand:<sseintvecmode> 3 "register_operand")]
+  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
+{
+  ix86_expand_vec_perm (operands);
+  DONE;
+})
+
+(define_mode_iterator VEC_PERM_CONST
+  [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
+   (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
+   (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
+   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
+   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
+   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VEC_PERM_CONST 0 "register_operand")
+   (match_operand:VEC_PERM_CONST 1 "register_operand")
+   (match_operand:VEC_PERM_CONST 2 "register_operand")
+   (match_operand:<sseintvecmode> 3)]
+  ""
+{
+  if (ix86_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel bitwise logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:VI 0 "register_operand")
+	(xor:VI (match_operand:VI 1 "nonimmediate_operand")
+		(match_dup 2)))]
+  "TARGET_SSE"
+{
+  int i, n = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n);
+
+  for (i = 0; i < n; ++i)
+    RTVEC_ELT (v, i) = constm1_rtx;
+
+  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
+})
+
+(define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
+  [(set (match_operand:VI_AVX2 0 "register_operand")
+	(and:VI_AVX2
+	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
+	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>")
+
+(define_insn "*andnot<mode>3<mask_name>"
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
+	(and:VI
+	  (not:VI (match_operand:VI 1 "register_operand" "0,v"))
+	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE && <mask_mode512bit_condition>"
+{
+  static char buf[64];
+  const char *ops;
+  const char *tmp;
+
+  switch (get_attr_mode (insn))
+    {
+    case MODE_XI:
+      gcc_assert (TARGET_AVX512F);
+
+      tmp = "pandn<ssemodesuffix>";
+      break;
+
+    case MODE_OI:
+      gcc_assert (TARGET_AVX2);
+    case MODE_TI:
+      gcc_assert (TARGET_SSE2);
+
+      tmp = "pandn";
+      break;
+
+   case MODE_V16SF:
+      gcc_assert (TARGET_AVX512F);
+   case MODE_V8SF:
+      gcc_assert (TARGET_AVX);
+   case MODE_V4SF:
+      gcc_assert (TARGET_SSE);
+
+      tmp = "andnps";
+      break;
+
+   default:
+      gcc_unreachable ();
+   }
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  snprintf (buf, sizeof (buf), ops, tmp);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (eq_attr "mode" "TI"))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "<mask_prefix3>")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX2")
+		 (const_string "<sseinsnmode>")
+	       (match_test "TARGET_AVX")
+		 (if_then_else
+		   (match_test "<MODE_SIZE> > 16")
+		   (const_string "V8SF")
+		   (const_string "<sseinsnmode>"))
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
+		 (const_string "V4SF")
+	      ]
+	      (const_string "<sseinsnmode>")))])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VI 0 "register_operand")
+	(any_logic:VI
+	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
+	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "<mask_codefor><code><mode>3<mask_name>"
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
+	(any_logic:VI
+	  (match_operand:VI 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE && <mask_mode512bit_condition>
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  static char buf[64];
+  const char *ops;
+  const char *tmp;
+
+  switch (get_attr_mode (insn))
+    {
+    case MODE_XI:
+      gcc_assert (TARGET_AVX512F);
+
+      tmp = "p<logic><ssemodesuffix>";
+      break;
+
+    case MODE_OI:
+      gcc_assert (TARGET_AVX2);
+    case MODE_TI:
+      gcc_assert (TARGET_SSE2);
+
+      tmp = "p<logic>";
+      break;
+
+   case MODE_V16SF:
+      gcc_assert (TARGET_AVX512F);
+   case MODE_V8SF:
+      gcc_assert (TARGET_AVX);
+   case MODE_V4SF:
+      gcc_assert (TARGET_SSE);
+
+      tmp = "<logic>ps";
+      break;
+
+   default:
+      gcc_unreachable ();
+   }
+
+  switch (which_alternative)
+    {
+    case 0:
+      ops = "%s\t{%%2, %%0|%%0, %%2}";
+      break;
+    case 1:
+      ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  snprintf (buf, sizeof (buf), ops, tmp);
+  return buf;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (eq_attr "mode" "TI"))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "<mask_prefix3>")
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "<ssePSmode>")
+	       (match_test "TARGET_AVX2")
+		 (const_string "<sseinsnmode>")
+	       (match_test "TARGET_AVX")
+		 (if_then_else
+		   (match_test "<MODE_SIZE> > 16")
+		   (const_string "V8SF")
+		   (const_string "<sseinsnmode>"))
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
+		 (const_string "V4SF")
+	      ]
+	      (const_string "<sseinsnmode>")))])
+
+(define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(unspec:<avx512fmaskmode>
+	 [(match_operand:VI48_512 1 "register_operand" "v")
+	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+	 UNSPEC_TESTM))]
+  "TARGET_AVX512F"
+  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<sseinsnmode>")])
+
+(define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(unspec:<avx512fmaskmode>
+	 [(match_operand:VI48_512 1 "register_operand" "v")
+	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+	 UNSPEC_TESTNM))]
+  "TARGET_AVX512F"
+  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<sseinsnmode>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "vec_pack_trunc_<mode>"
+  [(match_operand:<ssepackmode> 0 "register_operand")
+   (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
+   (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
+  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
+  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
+  DONE;
+})
+
+(define_insn "<sse2_avx2>_packsswb"
+  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+	(vec_concat:VI1_AVX2
+	  (ss_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+	  (ss_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
+  "TARGET_SSE2"
+  "@
+   packsswb\t{%2, %0|%0, %2}
+   vpacksswb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2_avx2>_packssdw"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+	(vec_concat:VI2_AVX2
+	  (ss_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+	  (ss_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
+  "TARGET_SSE2"
+  "@
+   packssdw\t{%2, %0|%0, %2}
+   vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2_avx2>_packuswb"
+  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+	(vec_concat:VI1_AVX2
+	  (us_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+	  (us_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
+  "TARGET_SSE2"
+  "@
+   packuswb\t{%2, %0|%0, %2}
+   vpackuswb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_interleave_highv32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_select:V32QI
+	  (vec_concat:V64QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 8)  (const_int 40)
+		     (const_int 9)  (const_int 41)
+		     (const_int 10) (const_int 42)
+		     (const_int 11) (const_int 43)
+		     (const_int 12) (const_int 44)
+		     (const_int 13) (const_int 45)
+		     (const_int 14) (const_int 46)
+		     (const_int 15) (const_int 47)
+		     (const_int 24) (const_int 56)
+		     (const_int 25) (const_int 57)
+		     (const_int 26) (const_int 58)
+		     (const_int 27) (const_int 59)
+		     (const_int 28) (const_int 60)
+		     (const_int 29) (const_int 61)
+		     (const_int 30) (const_int 62)
+		     (const_int 31) (const_int 63)])))]
+  "TARGET_AVX2"
+  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_interleave_highv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "0,x")
+	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 8)  (const_int 24)
+		     (const_int 9)  (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_SSE2"
+  "@
+   punpckhbw\t{%2, %0|%0, %2}
+   vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_interleave_lowv32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_select:V32QI
+	  (vec_concat:V64QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 32)
+		     (const_int 1) (const_int 33)
+		     (const_int 2) (const_int 34)
+		     (const_int 3) (const_int 35)
+		     (const_int 4) (const_int 36)
+		     (const_int 5) (const_int 37)
+		     (const_int 6) (const_int 38)
+		     (const_int 7) (const_int 39)
+		     (const_int 16) (const_int 48)
+		     (const_int 17) (const_int 49)
+		     (const_int 18) (const_int 50)
+		     (const_int 19) (const_int 51)
+		     (const_int 20) (const_int 52)
+		     (const_int 21) (const_int 53)
+		     (const_int 22) (const_int 54)
+		     (const_int 23) (const_int 55)])))]
+  "TARGET_AVX2"
+  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_interleave_lowv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "0,x")
+	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)])))]
+  "TARGET_SSE2"
+  "@
+   punpcklbw\t{%2, %0|%0, %2}
+   vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_interleave_highv16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_select:V16HI
+	  (vec_concat:V32HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_AVX2"
+  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_interleave_highv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "0,x")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_SSE2"
+  "@
+   punpckhwd\t{%2, %0|%0, %2}
+   vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_interleave_lowv16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_select:V16HI
+	  (vec_concat:V32HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 8) (const_int 24)
+		     (const_int 9) (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)])))]
+  "TARGET_AVX2"
+  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_interleave_lowv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "0,x")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "TARGET_SSE2"
+  "@
+   punpcklwd\t{%2, %0|%0, %2}
+   vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_interleave_highv8si"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(vec_select:V8SI
+	  (vec_concat:V16SI
+	    (match_operand:V8SI 1 "register_operand" "x")
+	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX2"
+  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_select:V16SI
+	  (vec_concat:V32SI
+	    (match_operand:V16SI 1 "register_operand" "v")
+	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_AVX512F"
+  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+
+(define_insn "vec_interleave_highv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "0,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_SSE2"
+  "@
+   punpckhdq\t{%2, %0|%0, %2}
+   vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_interleave_lowv8si"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(vec_select:V8SI
+	  (vec_concat:V16SI
+	    (match_operand:V8SI 1 "register_operand" "x")
+	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))]
+  "TARGET_AVX2"
+  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_select:V16SI
+	  (vec_concat:V32SI
+	    (match_operand:V16SI 1 "register_operand" "v")
+	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 8) (const_int 24)
+		     (const_int 9) (const_int 25)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)])))]
+  "TARGET_AVX512F"
+  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "vec_interleave_lowv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "0,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_SSE2"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "vec_interleave_high<mode>"
+  [(match_operand:VI_256 0 "register_operand" "=x")
+   (match_operand:VI_256 1 "register_operand" "x")
+   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ "TARGET_AVX2"
+{
+  rtx t1 = gen_reg_rtx (<MODE>mode);
+  rtx t2 = gen_reg_rtx (<MODE>mode);
+  rtx t3 = gen_reg_rtx (V4DImode);
+  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
+  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
+  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
+				gen_lowpart (V4DImode, t2),
+				GEN_INT (1 + (3 << 4))));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
+  DONE;
+})
+
+(define_expand "vec_interleave_low<mode>"
+  [(match_operand:VI_256 0 "register_operand" "=x")
+   (match_operand:VI_256 1 "register_operand" "x")
+   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ "TARGET_AVX2"
+{
+  rtx t1 = gen_reg_rtx (<MODE>mode);
+  rtx t2 = gen_reg_rtx (<MODE>mode);
+  rtx t3 = gen_reg_rtx (V4DImode);
+  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
+  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
+  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
+				gen_lowpart (V4DImode, t2),
+				GEN_INT (0 + (2 << 4))));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
+  DONE;
+})
+
+;; Modes handled by pinsr patterns.
+(define_mode_iterator PINSR_MODE
+  [(V16QI "TARGET_SSE4_1") V8HI
+   (V4SI "TARGET_SSE4_1")
+   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
+
+(define_mode_attr sse2p4_1
+  [(V16QI "sse4_1") (V8HI "sse2")
+   (V4SI "sse4_1") (V2DI "sse4_1")])
+
+;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
+(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
+  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
+	(vec_merge:PINSR_MODE
+	  (vec_duplicate:PINSR_MODE
+	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
+	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
+	  (match_operand:SI 3 "const_int_operand")))]
+  "TARGET_SSE2
+   && ((unsigned) exact_log2 (INTVAL (operands[3]))
+       < GET_MODE_NUNITS (<MODE>mode))"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+
+  switch (which_alternative)
+    {
+    case 0:
+      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
+	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
+      /* FALLTHRU */
+    case 1:
+      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
+    case 2:
+      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
+	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+      /* FALLTHRU */
+    case 3:
+      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,noavx,avx,avx")
+   (set_attr "type" "sselog")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (not (match_test "TARGET_AVX"))
+	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (not (match_test "TARGET_AVX"))
+	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else
+       (and (not (match_test "TARGET_AVX"))
+	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
+       (const_string "*")
+       (const_string "1")))
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,orig,vex,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx512f_vinsert<shuffletype>32x4_mask"
+  [(match_operand:V16FI 0 "register_operand")
+   (match_operand:V16FI 1 "register_operand")
+   (match_operand:<ssequartermode> 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_3_operand")
+   (match_operand:V16FI 4 "register_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xFFF), operands[4],
+	  operands[5]));
+      break;
+    case 1:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
+	  operands[5]));
+      break;
+    case 2:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
+	  operands[5]));
+      break;
+    case 3:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
+	  operands[5]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+
+})
+
+(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
+  [(set (match_operand:V16FI 0 "register_operand" "=v")
+	(vec_merge:V16FI
+	  (match_operand:V16FI 1 "register_operand" "v")
+	  (vec_duplicate:V16FI
+		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
+	  (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_AVX512F"
+{
+  int mask;
+  if (INTVAL (operands[3]) == 0xFFF)
+      mask = 0;
+  else if ( INTVAL (operands[3]) == 0xF0FF)
+      mask = 1;
+  else if ( INTVAL (operands[3]) == 0xFF0F)
+      mask = 2;
+  else if ( INTVAL (operands[3]) == 0xFFF0)
+      mask = 3;
+  else
+      gcc_unreachable ();
+
+  operands[3] = GEN_INT (mask);
+
+  return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vinsert<shuffletype>64x4_mask"
+  [(match_operand:V8FI 0 "register_operand")
+   (match_operand:V8FI 1 "register_operand")
+   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_1_operand")
+   (match_operand:V8FI 4 "register_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  if (mask == 0)
+    emit_insn (gen_vec_set_lo_<mode>_mask
+      (operands[0], operands[1], operands[2],
+       operands[4], operands[5]));
+  else
+    emit_insn (gen_vec_set_hi_<mode>_mask
+      (operands[0], operands[1], operands[2],
+       operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "vec_set_lo_<mode><mask_name>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v")
+	(vec_concat:V8FI
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 4) (const_int 5)
+              (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F"
+  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "vec_set_hi_<mode><mask_name>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v")
+	(vec_concat:V8FI
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 0) (const_int 1)
+              (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX512F"
+  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
+  [(match_operand:V8FI 0 "register_operand")
+   (match_operand:V8FI 1 "register_operand")
+   (match_operand:V8FI 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V8FI 4 "register_operand")
+   (match_operand:QI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
+      (operands[0], operands[1], operands[2],
+       GEN_INT (((mask >> 0) & 3) * 2),
+       GEN_INT (((mask >> 0) & 3) * 2 + 1),
+       GEN_INT (((mask >> 2) & 3) * 2),
+       GEN_INT (((mask >> 2) & 3) * 2 + 1),
+       GEN_INT (((mask >> 4) & 3) * 2 + 8),
+       GEN_INT (((mask >> 4) & 3) * 2 + 9),
+       GEN_INT (((mask >> 6) & 3) * 2 + 8),
+       GEN_INT (((mask >> 6) & 3) * 2 + 9),
+       operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v")
+	(vec_select:V8FI
+	  (vec_concat:<ssedoublemode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3  "const_0_to_7_operand")
+		     (match_operand 4  "const_0_to_7_operand")
+		     (match_operand 5  "const_0_to_7_operand")
+		     (match_operand 6  "const_0_to_7_operand")
+		     (match_operand 7  "const_8_to_15_operand")
+		     (match_operand 8  "const_8_to_15_operand")
+		     (match_operand 9  "const_8_to_15_operand")
+		     (match_operand 10  "const_8_to_15_operand")])))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
+       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+       && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
+{
+  int mask;
+  mask = INTVAL (operands[3]) / 2;
+  mask |= INTVAL (operands[5]) / 2 << 2;
+  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
+  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
+  [(match_operand:V16FI 0 "register_operand")
+   (match_operand:V16FI 1 "register_operand")
+   (match_operand:V16FI 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V16FI 4 "register_operand")
+   (match_operand:HI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
+      (operands[0], operands[1], operands[2],
+       GEN_INT (((mask >> 0) & 3) * 4),
+       GEN_INT (((mask >> 0) & 3) * 4 + 1),
+       GEN_INT (((mask >> 0) & 3) * 4 + 2),
+       GEN_INT (((mask >> 0) & 3) * 4 + 3),
+       GEN_INT (((mask >> 2) & 3) * 4),
+       GEN_INT (((mask >> 2) & 3) * 4 + 1),
+       GEN_INT (((mask >> 2) & 3) * 4 + 2),
+       GEN_INT (((mask >> 2) & 3) * 4 + 3),
+       GEN_INT (((mask >> 4) & 3) * 4 + 16),
+       GEN_INT (((mask >> 4) & 3) * 4 + 17),
+       GEN_INT (((mask >> 4) & 3) * 4 + 18),
+       GEN_INT (((mask >> 4) & 3) * 4 + 19),
+       GEN_INT (((mask >> 6) & 3) * 4 + 16),
+       GEN_INT (((mask >> 6) & 3) * 4 + 17),
+       GEN_INT (((mask >> 6) & 3) * 4 + 18),
+       GEN_INT (((mask >> 6) & 3) * 4 + 19),
+       operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
+  [(set (match_operand:V16FI 0 "register_operand" "=v")
+	(vec_select:V16FI
+	  (vec_concat:<ssedoublemode>
+	    (match_operand:V16FI 1 "register_operand" "v")
+	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3  "const_0_to_15_operand")
+		     (match_operand 4  "const_0_to_15_operand")
+		     (match_operand 5  "const_0_to_15_operand")
+		     (match_operand 6  "const_0_to_15_operand")
+		     (match_operand 7  "const_0_to_15_operand")
+		     (match_operand 8  "const_0_to_15_operand")
+		     (match_operand 9  "const_0_to_15_operand")
+		     (match_operand 10  "const_0_to_15_operand")
+		     (match_operand 11  "const_16_to_31_operand")
+		     (match_operand 12  "const_16_to_31_operand")
+		     (match_operand 13  "const_16_to_31_operand")
+		     (match_operand 14  "const_16_to_31_operand")
+		     (match_operand 15  "const_16_to_31_operand")
+		     (match_operand 16  "const_16_to_31_operand")
+		     (match_operand 17  "const_16_to_31_operand")
+		     (match_operand 18  "const_16_to_31_operand")])))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
+       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
+       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
+       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
+       && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
+       && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
+       && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
+       && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
+       && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
+       && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
+{
+  int mask;
+  mask = INTVAL (operands[3]) / 4;
+  mask |= INTVAL (operands[7]) / 4 << 2;
+  mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
+  mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_pshufdv3_mask"
+  [(match_operand:V16SI 0 "register_operand")
+   (match_operand:V16SI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")
+   (match_operand:V16SI 3 "register_operand")
+   (match_operand:HI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
+				       GEN_INT ((mask >> 0) & 3),
+				       GEN_INT ((mask >> 2) & 3),
+				       GEN_INT ((mask >> 4) & 3),
+				       GEN_INT ((mask >> 6) & 3),
+				       GEN_INT (((mask >> 0) & 3) + 4),
+				       GEN_INT (((mask >> 2) & 3) + 4),
+				       GEN_INT (((mask >> 4) & 3) + 4),
+				       GEN_INT (((mask >> 6) & 3) + 4),
+				       GEN_INT (((mask >> 0) & 3) + 8),
+				       GEN_INT (((mask >> 2) & 3) + 8),
+				       GEN_INT (((mask >> 4) & 3) + 8),
+				       GEN_INT (((mask >> 6) & 3) + 8),
+				       GEN_INT (((mask >> 0) & 3) + 12),
+				       GEN_INT (((mask >> 2) & 3) + 12),
+				       GEN_INT (((mask >> 4) & 3) + 12),
+				       GEN_INT (((mask >> 6) & 3) + 12),
+				       operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "avx512f_pshufd_1<mask_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_select:V16SI
+	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")
+		     (match_operand 6 "const_4_to_7_operand")
+		     (match_operand 7 "const_4_to_7_operand")
+		     (match_operand 8 "const_4_to_7_operand")
+		     (match_operand 9 "const_4_to_7_operand")
+		     (match_operand 10 "const_8_to_11_operand")
+		     (match_operand 11 "const_8_to_11_operand")
+		     (match_operand 12 "const_8_to_11_operand")
+		     (match_operand 13 "const_8_to_11_operand")
+		     (match_operand 14 "const_12_to_15_operand")
+		     (match_operand 15 "const_12_to_15_operand")
+		     (match_operand 16 "const_12_to_15_operand")
+		     (match_operand 17 "const_12_to_15_operand")])))]
+  "TARGET_AVX512F
+   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
+   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
+   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
+   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
+   && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
+   && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
+   && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
+   && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
+   && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
+   && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
+   && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
+   && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "evex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "XI")])
+
+(define_expand "avx2_pshufdv3"
+  [(match_operand:V8SI 0 "register_operand")
+   (match_operand:V8SI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")]
+  "TARGET_AVX2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
+				GEN_INT ((mask >> 0) & 3),
+				GEN_INT ((mask >> 2) & 3),
+				GEN_INT ((mask >> 4) & 3),
+				GEN_INT ((mask >> 6) & 3),
+				GEN_INT (((mask >> 0) & 3) + 4),
+				GEN_INT (((mask >> 2) & 3) + 4),
+				GEN_INT (((mask >> 4) & 3) + 4),
+				GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "avx2_pshufd_1"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(vec_select:V8SI
+	  (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")
+		     (match_operand 6 "const_4_to_7_operand")
+		     (match_operand 7 "const_4_to_7_operand")
+		     (match_operand 8 "const_4_to_7_operand")
+		     (match_operand 9 "const_4_to_7_operand")])))]
+  "TARGET_AVX2
+   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
+   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
+   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
+   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_pshufd"
+  [(match_operand:V4SI 0 "register_operand")
+   (match_operand:V4SI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_int_operand")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
+				GEN_INT ((mask >> 0) & 3),
+				GEN_INT ((mask >> 2) & 3),
+				GEN_INT ((mask >> 4) & 3),
+				GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "sse2_pshufd_1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_select:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx2_pshuflwv3"
+  [(match_operand:V16HI 0 "register_operand")
+   (match_operand:V16HI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")]
+  "TARGET_AVX2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
+				 GEN_INT ((mask >> 0) & 3),
+				 GEN_INT ((mask >> 2) & 3),
+				 GEN_INT ((mask >> 4) & 3),
+				 GEN_INT ((mask >> 6) & 3),
+				 GEN_INT (((mask >> 0) & 3) + 8),
+				 GEN_INT (((mask >> 2) & 3) + 8),
+				 GEN_INT (((mask >> 4) & 3) + 8),
+				 GEN_INT (((mask >> 6) & 3) + 8)));
+  DONE;
+})
+
+(define_insn "avx2_pshuflw_1"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_select:V16HI
+	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")
+		     (const_int 4)
+		     (const_int 5)
+		     (const_int 6)
+		     (const_int 7)
+		     (match_operand 6 "const_8_to_11_operand")
+		     (match_operand 7 "const_8_to_11_operand")
+		     (match_operand 8 "const_8_to_11_operand")
+		     (match_operand 9 "const_8_to_11_operand")
+		     (const_int 12)
+		     (const_int 13)
+		     (const_int 14)
+		     (const_int 15)])))]
+  "TARGET_AVX2
+   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
+   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
+   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
+   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_pshuflw"
+  [(match_operand:V8HI 0 "register_operand")
+   (match_operand:V8HI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_int_operand")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
+				 GEN_INT ((mask >> 0) & 3),
+				 GEN_INT ((mask >> 2) & 3),
+				 GEN_INT ((mask >> 4) & 3),
+				 GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "sse2_pshuflw_1"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")
+		     (const_int 4)
+		     (const_int 5)
+		     (const_int 6)
+		     (const_int 7)])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx2_pshufhwv3"
+  [(match_operand:V16HI 0 "register_operand")
+   (match_operand:V16HI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")]
+  "TARGET_AVX2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
+				 GEN_INT (((mask >> 0) & 3) + 4),
+				 GEN_INT (((mask >> 2) & 3) + 4),
+				 GEN_INT (((mask >> 4) & 3) + 4),
+				 GEN_INT (((mask >> 6) & 3) + 4),
+				 GEN_INT (((mask >> 0) & 3) + 12),
+				 GEN_INT (((mask >> 2) & 3) + 12),
+				 GEN_INT (((mask >> 4) & 3) + 12),
+				 GEN_INT (((mask >> 6) & 3) + 12)));
+  DONE;
+})
+
+(define_insn "avx2_pshufhw_1"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_select:V16HI
+	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 2)
+		     (const_int 3)
+		     (match_operand 2 "const_4_to_7_operand")
+		     (match_operand 3 "const_4_to_7_operand")
+		     (match_operand 4 "const_4_to_7_operand")
+		     (match_operand 5 "const_4_to_7_operand")
+		     (const_int 8)
+		     (const_int 9)
+		     (const_int 10)
+		     (const_int 11)
+		     (match_operand 6 "const_12_to_15_operand")
+		     (match_operand 7 "const_12_to_15_operand")
+		     (match_operand 8 "const_12_to_15_operand")
+		     (match_operand 9 "const_12_to_15_operand")])))]
+  "TARGET_AVX2
+   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
+   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
+   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
+   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
+{
+  int mask = 0;
+  mask |= (INTVAL (operands[2]) - 4) << 0;
+  mask |= (INTVAL (operands[3]) - 4) << 2;
+  mask |= (INTVAL (operands[4]) - 4) << 4;
+  mask |= (INTVAL (operands[5]) - 4) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_pshufhw"
+  [(match_operand:V8HI 0 "register_operand")
+   (match_operand:V8HI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_int_operand")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
+				 GEN_INT (((mask >> 0) & 3) + 4),
+				 GEN_INT (((mask >> 2) & 3) + 4),
+				 GEN_INT (((mask >> 4) & 3) + 4),
+				 GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "sse2_pshufhw_1"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 2)
+		     (const_int 3)
+		     (match_operand 2 "const_4_to_7_operand")
+		     (match_operand 3 "const_4_to_7_operand")
+		     (match_operand 4 "const_4_to_7_operand")
+		     (match_operand 5 "const_4_to_7_operand")])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= (INTVAL (operands[2]) - 4) << 0;
+  mask |= (INTVAL (operands[3]) - 4) << 2;
+  mask |= (INTVAL (operands[4]) - 4) << 4;
+  mask |= (INTVAL (operands[5]) - 4) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_loadd"
+  [(set (match_operand:V4SI 0 "register_operand")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 1 "nonimmediate_operand"))
+	  (match_dup 2)
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "sse2_loadld"
+  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x,x,x")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
+	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,x")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   %vmovd\t{%2, %0|%0, %2}
+   %vmovd\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "sse2,*,noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
+   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
+
+(define_insn "*vec_extract<mode>"
+  [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
+	(vec_select:<ssescalarmode>
+	  (match_operand:VI12_128 1 "register_operand" "x,x")
+	  (parallel
+	    [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
+  "TARGET_SSE4_1"
+  "@
+   %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
+   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else
+       (and (eq_attr "alternative" "0")
+	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
+       (const_string "*")
+       (const_string "1")))
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vec_extractv8hi_sse2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(vec_select:HI
+	  (match_operand:V8HI 1 "register_operand" "x")
+	  (parallel
+	    [(match_operand:SI 2 "const_0_to_7_operand")])))]
+  "TARGET_SSE2 && !TARGET_SSE4_1"
+  "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vec_extractv16qi_zext"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extend:SWI48
+	  (vec_select:QI
+	    (match_operand:V16QI 1 "register_operand" "x")
+	    (parallel
+	      [(match_operand:SI 2 "const_0_to_15_operand")]))))]
+  "TARGET_SSE4_1"
+  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vec_extractv8hi_zext"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extend:SWI48
+	  (vec_select:HI
+	    (match_operand:V8HI 1 "register_operand" "x")
+	    (parallel
+	      [(match_operand:SI 2 "const_0_to_7_operand")]))))]
+  "TARGET_SSE2"
+  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vec_extract<mode>_mem"
+  [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
+	(vec_select:<ssescalarmode>
+	  (match_operand:VI12_128 1 "memory_operand" "o")
+	  (parallel
+	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*vec_extract<ssevecmodelower>_0"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=r ,r,x ,m")
+	(vec_select:SWI48
+	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  [(set_attr "isa" "*,sse4,*,*")])
+
+(define_insn_and_split "*vec_extractv4si_0_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
+
+(define_insn "*vec_extractv2di_0_sse"
+  [(set (match_operand:DI 0 "nonimmediate_operand"     "=x,m")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE && !TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#")
+
+(define_split
+  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
+	(vec_select:SWI48x
+	  (match_operand:<ssevecmode> 1 "register_operand")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
+
+(define_insn "*vec_extractv4si"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
+	(vec_select:SI
+	  (match_operand:V4SI 1 "register_operand" "x,0,x")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
+  "TARGET_SSE4_1"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
+
+    case 1:
+      operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
+      return "psrldq\t{%2, %0|%0, %2}";
+
+    case 2:
+      operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
+      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,noavx,avx")
+   (set_attr "type" "sselog1,sseishft1,sseishft1")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex,orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vec_extractv4si_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
+  "TARGET_64BIT && TARGET_SSE4_1"
+  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vec_extractv4si_mem"
+  [(set (match_operand:SI 0 "register_operand" "=x,r")
+	(vec_select:SI
+	  (match_operand:V4SI 1 "memory_operand" "o,o")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn_and_split "*vec_extractv4si_zext_mem"
+  [(set (match_operand:DI 0 "register_operand" "=x,r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "memory_operand" "o,o")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
+  "TARGET_64BIT && TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
+})
+
+(define_insn "*vec_extractv2di_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,m,x,x,x,x,r")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand"  "x ,x,0,x,x,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   %vpextrq\t{$1, %1, %0|%0, %1, 1}
+   %vmovhps\t{%1, %0|%0, %1}
+   psrldq\t{$8, %0|%0, 8}
+   vpsrldq\t{$8, %1, %0|%0, %1, 8}
+   movhlps\t{%1, %0|%0, %1}
+   #
+   #"
+  [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
+   (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
+   (set_attr "length_immediate" "1,*,1,1,*,*,*")
+   (set_attr "prefix_rex" "1,*,*,*,*,*,*")
+   (set_attr "prefix_extra" "1,*,*,*,*,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
+   (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
+
+(define_split
+  [(set (match_operand:<ssescalarmode> 0 "register_operand")
+	(vec_select:<ssescalarmode>
+	  (match_operand:VI_128 1 "memory_operand")
+	  (parallel
+	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
+  "TARGET_SSE && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
+
+  operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
+})
+
+(define_insn "*vec_dupv4si"
+  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
+	(vec_duplicate:V4SI
+	  (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
+  "TARGET_SSE"
+  "@
+   %vpshufd\t{$0, %1, %0|%0, %1, 0}
+   vbroadcastss\t{%1, %0|%0, %1}
+   shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "isa" "sse2,avx,noavx")
+   (set_attr "type" "sselog1,ssemov,sselog1")
+   (set_attr "length_immediate" "1,0,1")
+   (set_attr "prefix_extra" "0,1,*")
+   (set_attr "prefix" "maybe_vex,vex,orig")
+   (set_attr "mode" "TI,V4SF,V4SF")])
+
+(define_insn "*vec_dupv2di"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
+	(vec_duplicate:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
+  "TARGET_SSE"
+  "@
+   punpcklqdq\t%0, %0
+   vpunpcklqdq\t{%d1, %0|%0, %d1}
+   %vmovddup\t{%1, %0|%0, %1}
+   movlhps\t%0, %0"
+  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
+   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
+   (set_attr "prefix" "orig,vex,maybe_vex,orig")
+   (set_attr "mode" "TI,TI,DF,V4SF")])
+
+(define_insn "*vec_concatv2si_sse4_1"
+  [(set (match_operand:V2SI 0 "register_operand"     "=x, x,x,x, x, *y,*y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm,  0,rm")
+	  (match_operand:SI 2 "vector_move_operand"  "rm,rm,x,x, C,*ym, C")))]
+  "TARGET_SSE4_1"
+  "@
+   pinsrd\t{$1, %2, %0|%0, %2, 1}
+   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   punpckldq\t{%2, %0|%0, %2}
+   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
+   %vmovd\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
+   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,1,*,*,*,*,*")
+   (set_attr "length_immediate" "1,1,*,*,*,*,*")
+   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
+   (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*vec_concatv2si"
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,x,x,*y,*y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
+	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,C, x,C,*y,C")))]
+  "TARGET_SSE && !TARGET_SSE4_1"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   unpcklps\t{%2, %0|%0, %2}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
+   (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
+
+(define_insn "*vec_concatv4si"
+  [(set (match_operand:V4SI 0 "register_operand"       "=x,x,x,x,x")
+	(vec_concat:V4SI
+	  (match_operand:V2SI 1 "register_operand"     " 0,x,0,0,x")
+	  (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
+  "TARGET_SSE"
+  "@
+   punpcklqdq\t{%2, %0|%0, %2}
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %q2}
+   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
+  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "prefix" "orig,vex,orig,orig,vex")
+   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
+
+;; movd instead of movq is required to handle broken assemblers.
+(define_insn "vec_concatv2di"
+  [(set (match_operand:V2DI 0 "register_operand"
+	  "=x,x ,Yi,x ,!x,x,x,x,x,x")
+	(vec_concat:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand"
+	  " 0,x ,r ,xm,*y,0,x,0,0,x")
+	  (match_operand:DI 2 "vector_move_operand"
+	  "rm,rm,C ,C ,C ,x,x,x,m,m")))]
+  "TARGET_SSE"
+  "@
+   pinsrq\t{$1, %2, %0|%0, %2, 1}
+   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
+   %vmovq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   punpcklqdq\t{%2, %0|%0, %2}
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}
+   vmovhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
+   (set (attr "type")
+     (if_then_else
+       (eq_attr "alternative" "0,1,5,6")
+       (const_string "sselog")
+       (const_string "ssemov")))
+   (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
+   (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
+   (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
+   (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
+   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (match_operand:VI124_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
+
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (match_operand:VI124_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
+
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (match_operand:VI124_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
+
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand")
+   (match_operand:VI124_AVX512F 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "<sse2_avx2>_uavg<mode>3"
+  [(set (match_operand:VI12_AVX2 0 "register_operand")
+	(truncate:VI12_AVX2
+	  (lshiftrt:<ssedoublemode>
+	    (plus:<ssedoublemode>
+	      (plus:<ssedoublemode>
+		(zero_extend:<ssedoublemode>
+		  (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
+		(zero_extend:<ssedoublemode>
+		  (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
+	      (match_dup 3))
+	    (const_int 1))))]
+  "TARGET_SSE2"
+{
+  operands[3] = CONST1_RTX(<MODE>mode);
+  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+})
+
+(define_insn "*<sse2_avx2>_uavg<mode>3"
+  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
+	(truncate:VI12_AVX2
+	  (lshiftrt:<ssedoublemode>
+	    (plus:<ssedoublemode>
+	      (plus:<ssedoublemode>
+		(zero_extend:<ssedoublemode>
+		  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
+		(zero_extend:<ssedoublemode>
+		  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
+	      (match_operand:VI12_AVX2 3 "const1_operand"))
+	    (const_int 1))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "@
+   pavg<ssemodesuffix>\t{%2, %0|%0, %2}
+   vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+;; The correct representation for this is absolutely enormous, and
+;; surely not generally useful.
+(define_insn "<sse2_avx2>_psadbw"
+  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
+	(unspec:VI8_AVX2
+	  [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
+	   (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
+	  UNSPEC_PSADBW))]
+  "TARGET_SSE2"
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:VF_128_256 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "TARGET_SSE"
+  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx2_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
+		   UNSPEC_MOVMSK))]
+  "TARGET_AVX2"
+  "vpmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
+		   UNSPEC_MOVMSK))]
+  "TARGET_SSE2"
+  "%vpmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_expand "sse2_maskmovdqu"
+  [(set (match_operand:V16QI 0 "memory_operand")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
+		       (match_operand:V16QI 2 "register_operand")
+		       (match_dup 0)]
+		      UNSPEC_MASKMOV))]
+  "TARGET_SSE2")
+
+(define_insn "*sse2_maskmovdqu"
+  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+		       (match_operand:V16QI 2 "register_operand" "x")
+		       (mem:V16QI (match_dup 0))]
+		      UNSPEC_MASKMOV))]
+  "TARGET_SSE2"
+{
+  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
+     that requires %v to be at the beginning of the opcode name.  */
+  if (Pmode != word_mode)
+    fputs ("\taddr32", asm_out_file);
+  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   (set (attr "length_address")
+     (symbol_ref ("Pmode != word_mode")))
+   ;; The implicit %rdi operand confuses default length_vex computation.
+   (set (attr "length_vex")
+     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse_ldmxcsr"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
+		    UNSPECV_LDMXCSR)]
+  "TARGET_SSE"
+  "%vldmxcsr\t%0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "load")])
+
+(define_insn "sse_stmxcsr"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
+  "TARGET_SSE"
+  "%vstmxcsr\t%0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "store")])
+
+(define_insn "sse2_clflush"
+  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
+		    UNSPECV_CLFLUSH)]
+  "TARGET_SSE2"
+  "clflush\t%a0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
+   (set_attr "memory" "unknown")])
+
+
+(define_insn "sse3_mwait"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")]
+		    UNSPECV_MWAIT)]
+  "TARGET_SSE3"
+;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
+;; Since 32bit register operands are implicitly zero extended to 64bit,
+;; we only need to set up 32bit registers.
+  "mwait"
+  [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor_<mode>"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")
+		     (match_operand:SI 2 "register_operand" "d")]
+		    UNSPECV_MONITOR)]
+  "TARGET_SSE3"
+;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
+;; RCX and RDX are used.  Since 32bit register operands are implicitly
+;; zero extended to 64bit, we only need to set up 32bit registers.
+  "%^monitor"
+  [(set (attr "length")
+     (symbol_ref ("(Pmode != word_mode) + 3")))])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSSE3 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
+
+(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (vec_concat:V8HI
+	    (vec_concat:V4HI
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI
+		    (match_operand:V16HI 1 "register_operand" "x")
+		    (parallel [(const_int 0)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	    (vec_concat:V4HI
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
+	  (vec_concat:V8HI
+	    (vec_concat:V4HI
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI
+		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+		    (parallel [(const_int 0)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
+	    (vec_concat:V4HI
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
+	      (vec_concat:V2HI
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
+		(ssse3_plusminus:HI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
+  "TARGET_AVX2"
+  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ssse3_plusminus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "0,x")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (ssse3_plusminus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ssse3_plusminus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (ssse3_plusminus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ssse3_plusminus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (ssse3_plusminus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ssse3_plusminus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (ssse3_plusminus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_SSSE3"
+  "@
+   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
+   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (vec_concat:V2HI
+	    (ssse3_plusminus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (ssse3_plusminus:HI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2HI
+	    (ssse3_plusminus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	    (ssse3_plusminus:HI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(vec_concat:V8SI
+	  (vec_concat:V4SI
+	    (vec_concat:V2SI
+	      (plusminus:SI
+		(vec_select:SI
+		  (match_operand:V8SI 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	      (plusminus:SI
+		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2SI
+	      (plusminus:SI
+		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
+	      (plusminus:SI
+		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4SI
+	    (vec_concat:V2SI
+	      (plusminus:SI
+		(vec_select:SI
+		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+	      (plusminus:SI
+		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2SI
+	      (plusminus:SI
+		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
+	      (plusminus:SI
+		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX2"
+  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(vec_concat:V4SI
+	  (vec_concat:V2SI
+	    (plusminus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 1 "register_operand" "0,x")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	    (plusminus:SI
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SI
+	    (plusminus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+	    (plusminus:SI
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "@
+   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
+   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_concat:V2SI
+	  (plusminus:SI
+	    (vec_select:SI
+	      (match_operand:V2SI 1 "register_operand" "0")
+	      (parallel [(const_int 0)]))
+	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	  (plusminus:SI
+	    (vec_select:SI
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int 0)]))
+	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSSE3"
+  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "avx2_pmaddubsw256"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(ss_plus:V16HI
+	  (mult:V16HI
+	    (zero_extend:V16HI
+	      (vec_select:V16QI
+		(match_operand:V32QI 1 "register_operand" "x")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)
+			   (const_int 16) (const_int 18)
+			   (const_int 20) (const_int 22)
+			   (const_int 24) (const_int 26)
+			   (const_int 28) (const_int 30)])))
+	    (sign_extend:V16HI
+	      (vec_select:V16QI
+		(match_operand:V32QI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)
+			   (const_int 16) (const_int 18)
+			   (const_int 20) (const_int 22)
+			   (const_int 24) (const_int 26)
+			   (const_int 28) (const_int 30)]))))
+	  (mult:V16HI
+	    (zero_extend:V16HI
+	      (vec_select:V16QI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)
+			   (const_int 17) (const_int 19)
+			   (const_int 21) (const_int 23)
+			   (const_int 25) (const_int 27)
+			   (const_int 29) (const_int 31)])))
+	    (sign_extend:V16HI
+	      (vec_select:V16QI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)
+			   (const_int 17) (const_int 19)
+			   (const_int 21) (const_int 23)
+			   (const_int 25) (const_int 27)
+			   (const_int 29) (const_int 31)]))))))]
+  "TARGET_AVX2"
+  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "ssse3_pmaddubsw128"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(ss_plus:V8HI
+	  (mult:V8HI
+	    (zero_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 1 "register_operand" "0,x")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)])))
+	    (sign_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)]))))
+	  (mult:V8HI
+	    (zero_extend:V8HI
+	      (vec_select:V8QI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)])))
+	    (sign_extend:V8HI
+	      (vec_select:V8QI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)]))))))]
+  "TARGET_SSSE3"
+  "@
+   pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmaddubsw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(ss_plus:V4HI
+	  (mult:V4HI
+	    (zero_extend:V4HI
+	      (vec_select:V4QI
+		(match_operand:V8QI 1 "register_operand" "0")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)])))
+	    (sign_extend:V4HI
+	      (vec_select:V4QI
+		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)]))))
+	  (mult:V4HI
+	    (zero_extend:V4HI
+	      (vec_select:V4QI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)])))
+	    (sign_extend:V4HI
+	      (vec_select:V4QI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)]))))))]
+  "TARGET_SSSE3"
+  "pmaddubsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_mode_iterator PMULHRSW
+  [V4HI V8HI (V16HI "TARGET_AVX2")])
+
+(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
+  [(set (match_operand:PMULHRSW 0 "register_operand")
+	(truncate:PMULHRSW
+	  (lshiftrt:<ssedoublemode>
+	    (plus:<ssedoublemode>
+	      (lshiftrt:<ssedoublemode>
+		(mult:<ssedoublemode>
+		  (sign_extend:<ssedoublemode>
+		    (match_operand:PMULHRSW 1 "nonimmediate_operand"))
+		  (sign_extend:<ssedoublemode>
+		    (match_operand:PMULHRSW 2 "nonimmediate_operand")))
+		(const_int 14))
+	      (match_dup 3))
+	    (const_int 1))))]
+  "TARGET_AVX2"
+{
+  operands[3] = CONST1_RTX(<MODE>mode);
+  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
+})
+
+(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+	(truncate:VI2_AVX2
+	  (lshiftrt:<ssedoublemode>
+	    (plus:<ssedoublemode>
+	      (lshiftrt:<ssedoublemode>
+		(mult:<ssedoublemode>
+		  (sign_extend:<ssedoublemode>
+		    (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+		  (sign_extend:<ssedoublemode>
+		    (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+		(const_int 14))
+	      (match_operand:VI2_AVX2 3 "const1_operand"))
+	    (const_int 1))))]
+  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*ssse3_pmulhrswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (lshiftrt:V4SI
+		(mult:V4SI
+		  (sign_extend:V4SI
+		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		  (sign_extend:V4SI
+		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+		(const_int 14))
+	      (match_operand:V4HI 3 "const1_operand"))
+	    (const_int 1))))]
+  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhrsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "<ssse3_avx2>_pshufb<mode>3"
+  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
+	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
+	  UNSPEC_PSHUFB))]
+  "TARGET_SSSE3"
+  "@
+   pshufb\t{%2, %0|%0, %2}
+   vpshufb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "vector,vector")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PSHUFB))]
+  "TARGET_SSSE3"
+  "pshufb\t{%2, %0|%0, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "<ssse3_avx2>_psign<mode>3"
+  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
+	(unspec:VI124_AVX2
+	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
+	   (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
+	  UNSPEC_PSIGN))]
+  "TARGET_SSSE3"
+  "@
+   psign<ssemodesuffix>\t{%2, %0|%0, %2}
+   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ssse3_psign<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+	(unspec:MMXMODEI
+	  [(match_operand:MMXMODEI 1 "register_operand" "0")
+	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+	  UNSPEC_PSIGN))]
+  "TARGET_SSSE3"
+  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "<ssse3_avx2>_palignr<mode>"
+  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
+	(unspec:SSESCALARMODE
+	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
+	   (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
+	  UNSPEC_PALIGNR))]
+  "TARGET_SSSE3"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "palignr\t{%3, %2, %0|%0, %2, %3}";
+    case 1:
+      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:DI 2 "nonimmediate_operand" "ym")
+		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+		   UNSPEC_PALIGNR))]
+  "TARGET_SSSE3"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "<mask_codefor>abs<mode>2<mask_name>"
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
+	(abs:VI124_AVX2_48_AVX512F
+	  (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
+  "TARGET_SSSE3 && <mask_mode512bit_condition>"
+  "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
+	(abs:VI124_AVX2_48_AVX512F
+	  (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+{
+  if (!TARGET_SSSE3)
+    {
+      ix86_expand_sse2_abs (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+	(abs:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_SSSE3"
+  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4a_movnt<mode>"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4a_vmmovnt<mode>"
+  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+	(unspec:<ssescalarmode>
+	  [(vec_select:<ssescalarmode>
+	     (match_operand:VF_128 1 "register_operand" "x")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "sse4a_extrqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		      (match_operand 2 "const_0_to_255_operand")
+		      (match_operand 3 "const_0_to_255_operand")]
+		     UNSPEC_EXTRQI))]
+  "TARGET_SSE4A"
+  "extrq\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		      (match_operand:V16QI 2 "register_operand" "x")]
+		     UNSPEC_EXTRQ))]
+  "TARGET_SSE4A"
+  "extrq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		      (match_operand:V2DI 2 "register_operand" "x")
+		      (match_operand 3 "const_0_to_255_operand")
+		      (match_operand 4 "const_0_to_255_operand")]
+		     UNSPEC_INSERTQI))]
+  "TARGET_SSE4A"
+  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+  [(set_attr "type" "sseins")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "length_immediate" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		      (match_operand:V2DI 2 "register_operand" "x")]
+		     UNSPEC_INSERTQ))]
+  "TARGET_SSE4A"
+  "insertq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseins")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.1 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+	(vec_merge:VF_128_256
+	  (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
+	  (match_operand:VF_128_256 1 "register_operand" "0,x")
+	  (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
+  "TARGET_SSE4_1"
+  "@
+   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256 1 "register_operand" "0,x")
+	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+  "@
+   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "vector,vector") 
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
+	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_DP))]
+  "TARGET_SSE4_1"
+  "@
+   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemul")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "vector,vector")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse4_1_avx2>_movntdqa"
+  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
+	(unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
+		     UNSPEC_MOVNTDQA))]
+  "TARGET_SSE4_1"
+  "%vmovntdqa\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1, *")
+   (set_attr "prefix" "maybe_vex, evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse4_1_avx2>_mpsadbw"
+  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
+	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_MPSADBW))]
+  "TARGET_SSE4_1"
+  "@
+   mpsadbw\t{%3, %2, %0|%0, %2, %3}
+   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "vector,vector")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_packusdw"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (us_truncate:V8HI
+	    (match_operand:V8SI 1 "register_operand" "x"))
+	  (us_truncate:V8HI
+	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_AVX2"
+  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_packusdw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_concat:V8HI
+	  (us_truncate:V4HI
+	    (match_operand:V4SI 1 "register_operand" "0,x"))
+	  (us_truncate:V4HI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
+  "TARGET_SSE4_1"
+  "@
+   packusdw\t{%2, %0|%0, %2}
+   vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "<sse4_1_avx2>_pblendvb"
+  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,x")
+	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+  "@
+   pblendvb\t{%3, %2, %0|%0, %2, %3}
+   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "*,1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "vector,vector")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse4_1_pblendw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_merge:V8HI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
+	  (match_operand:V8HI 1 "register_operand" "0,x")
+	  (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
+  "TARGET_SSE4_1"
+  "@
+   pblendw\t{%3, %2, %0|%0, %2, %3}
+   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+;; The builtin uses an 8-bit immediate.  Expand that.
+(define_expand "avx2_pblendw"
+  [(set (match_operand:V16HI 0 "register_operand")
+	(vec_merge:V16HI
+	  (match_operand:V16HI 2 "nonimmediate_operand")
+	  (match_operand:V16HI 1 "register_operand")
+	  (match_operand:SI 3 "const_0_to_255_operand")))]
+  "TARGET_AVX2"
+{
+  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
+  operands[3] = GEN_INT (val << 8 | val);
+})
+
+(define_insn "*avx2_pblendw"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_merge:V16HI
+	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+	  (match_operand:V16HI 1 "register_operand" "x")
+	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
+  "TARGET_AVX2"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
+  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "avx2_pblendd<mode>"
+  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
+	(vec_merge:VI4_AVX2
+	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
+	  (match_operand:VI4_AVX2 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+  "TARGET_AVX2"
+  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse4_1_phminposuw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_PHMINPOSUW))]
+  "TARGET_SSE4_1"
+  "%vphminposuw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx2_<code>v16qiv16hi2"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(any_extend:V16HI
+	  (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX2"
+  "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_<code>v8qiv8hi2"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(any_extend:V8HI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(any_extend:V16SI
+	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx2_<code>v8qiv8si2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(any_extend:V8SI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX2"
+  "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_<code>v4qiv4si2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(any_extend:V4SI
+	  (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "32")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(any_extend:V16SI
+	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx2_<code>v8hiv8si2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+	(any_extend:V8SI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX2"
+  "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_<code>v4hiv4si2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(any_extend:V4SI
+	  (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(any_extend:V8DI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx2_<code>v4qiv4di2"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(any_extend:V4DI
+	  (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX2"
+  "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_<code>v2qiv2di2"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(any_extend:V2DI
+	  (vec_select:V2QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "16")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(any_extend:V8DI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx2_<code>v4hiv4di2"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(any_extend:V4DI
+	  (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX2"
+  "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_<code>v2hiv2di2"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(any_extend:V2DI
+	  (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "32")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v8siv8di2<mask_name>"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(any_extend:V8DI
+	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx2_<code>v4siv4di2"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(any_extend:V4DI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX2"
+  "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_<code>v2siv2di2"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(any_extend:V2DI
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+;; ptestps/ptestpd are very similar to comiss and ucomiss when
+;; setting FLAGS_REG. But it is not a really compare instruction.
+(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
+		    (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_VTESTP))]
+  "TARGET_AVX"
+  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
+;; But it is not a really compare instruction.
+(define_insn "avx_ptest256"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
+		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_PTEST))]
+  "TARGET_AVX"
+  "vptest\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_ptest"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
+		    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_PTEST))]
+  "TARGET_SSE4_1"
+  "%vptest\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
+	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
+	  UNSPEC_ROUND))]
+  "TARGET_ROUND"
+  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
+  [(match_operand:<sseintvecmode> 0 "register_operand")
+   (match_operand:VF1_128_256 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_15_operand")]
+  "TARGET_ROUND"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  emit_insn
+    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
+						       operands[2]));
+  emit_insn
+    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "avx512f_roundpd512"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V8DF 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_15_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
+  [(match_operand:<ssepackfltmode> 0 "register_operand")
+   (match_operand:VF2 1 "nonimmediate_operand")
+   (match_operand:VF2 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_15_operand")]
+  "TARGET_ROUND"
+{
+  rtx tmp0, tmp1;
+
+  if (<MODE>mode == V2DFmode
+      && TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      rtx tmp2 = gen_reg_rtx (V4DFmode);
+
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
+
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
+      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (<MODE>mode);
+      tmp1 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn
+       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
+							  operands[3]));
+      emit_insn
+       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
+							  operands[3]));
+      emit_insn
+       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+    }
+  DONE;
+})
+
+(define_insn "sse4_1_round<ssescalarmodesuffix>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 2 "register_operand" "x,x")
+	     (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
+	    UNSPEC_ROUND)
+	  (match_operand:VF_128 1 "register_operand" "0,x")
+	  (const_int 1)))]
+  "TARGET_ROUND"
+  "@
+   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "round<mode>2"
+  [(set (match_dup 4)
+	(plus:VF
+	  (match_operand:VF 1 "register_operand")
+	  (match_dup 3)))
+   (set (match_operand:VF 0 "register_operand")
+	(unspec:VF
+	  [(match_dup 4) (match_dup 5)]
+	  UNSPEC_ROUND))]
+  "TARGET_ROUND && !flag_trapping_math"
+{
+  enum machine_mode scalar_mode;
+  const struct real_format *fmt;
+  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+  rtx half, vec_half;
+
+  scalar_mode = GET_MODE_INNER (<MODE>mode);
+
+  /* load nextafter (0.5, 0.0) */
+  fmt = REAL_MODE_FORMAT (scalar_mode);
+  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
+  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+  half = const_double_from_real_value (pred_half, scalar_mode);
+
+  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
+  vec_half = force_reg (<MODE>mode, vec_half);
+
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
+
+  operands[4] = gen_reg_rtx (<MODE>mode);
+  operands[5] = GEN_INT (ROUND_TRUNC);
+})
+
+(define_expand "round<mode>2_sfix"
+  [(match_operand:<sseintvecmode> 0 "register_operand")
+   (match_operand:VF1_128_256 1 "register_operand")]
+  "TARGET_ROUND && !flag_trapping_math"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_round<mode>2 (tmp, operands[1]));
+
+  emit_insn
+    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "round<mode>2_vec_pack_sfix"
+  [(match_operand:<ssepackfltmode> 0 "register_operand")
+   (match_operand:VF2 1 "register_operand")
+   (match_operand:VF2 2 "register_operand")]
+  "TARGET_ROUND && !flag_trapping_math"
+{
+  rtx tmp0, tmp1;
+
+  if (<MODE>mode == V2DFmode
+      && TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      rtx tmp2 = gen_reg_rtx (V4DFmode);
+
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
+
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_roundv4df2 (tmp2, tmp0));
+      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (<MODE>mode);
+      tmp1 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
+      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
+
+      emit_insn
+       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+    }
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.2 string/text processing instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "sse4_2_pcmpestr"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 2 "register_operand" "x,x")
+	   (match_operand:SI 3 "register_operand" "a,a")
+	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 5 "register_operand" "d,d")
+	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPESTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+				     operands[3], operands[4],
+				     operands[5], operands[6]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+				     operands[3], operands[4],
+				     operands[5], operands[6]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
+					   operands[2], operands[3],
+					   operands[4], operands[5],
+					   operands[6]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
+  [(set (match_operand:SI 0 "register_operand" "=c")
+	(unspec:SI
+	  [(match_operand:V16QI 2 "register_operand" "x")
+	   (match_operand:SI 3 "register_operand" "a")
+	   (unspec:V16QI
+	     [(match_operand:V16QI 4 "memory_operand" "m")]
+	     UNSPEC_LOADU)
+	   (match_operand:SI 5 "register_operand" "d")
+	   (match_operand:SI 6 "const_0_to_255_operand" "n")]
+	  UNSPEC_PCMPESTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz")
+	(unspec:V16QI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+				     operands[3], operands[4],
+				     operands[5], operands[6]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+				     operands[3], operands[4],
+				     operands[5], operands[6]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
+					   operands[2], operands[3],
+					   operands[4], operands[5],
+					   operands[6]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestri"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:SI 2 "register_operand" "a,a")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 4 "register_operand" "d,d")
+	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestrm"
+  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:SI 2 "register_operand" "a,a")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 4 "register_operand" "d,d")
+	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestr_cconly"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+	   (match_operand:SI 3 "register_operand" "a,a,a,a")
+	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:SI 5 "register_operand" "d,d,d,d")
+	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
+	  UNSPEC_PCMPESTR))
+   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
+  "TARGET_SSE4_2"
+  "@
+   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
+   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load,none,load")
+   (set_attr "btver2_decode" "vector,vector,vector,vector") 
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "sse4_2_pcmpistr"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 2 "register_operand" "x,x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPISTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+				     operands[3], operands[4]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+				     operands[3], operands[4]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
+					   operands[2], operands[3],
+					   operands[4]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
+  [(set (match_operand:SI 0 "register_operand" "=c")
+	(unspec:SI
+	  [(match_operand:V16QI 2 "register_operand" "x")
+	   (unspec:V16QI
+	     [(match_operand:V16QI 3 "memory_operand" "m")]
+	     UNSPEC_LOADU)
+	   (match_operand:SI 4 "const_0_to_255_operand" "n")]
+	  UNSPEC_PCMPISTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz")
+	(unspec:V16QI
+	  [(match_dup 2)
+	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
+	   (match_dup 4)]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 2)
+	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
+	   (match_dup 4)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+				     operands[3], operands[4]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+				     operands[3], operands[4]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
+					   operands[2], operands[3],
+					   operands[4]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistri"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "none,load")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistrm"
+  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "none,load")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistr_cconly"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
+	  UNSPEC_PCMPISTR))
+   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
+  "TARGET_SSE4_2"
+  "@
+   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
+   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load,none,load")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "btver2_decode" "vector,vector,vector,vector")
+   (set_attr "mode" "TI")])
+
+;; Packed float variants
+(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
+		      [(V8DI "V8SF") (V16SI "V16SF")])
+
+(define_expand "avx512pf_gatherpf<mode>sf"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:<GATHER_SCATTER_SF_MEM_MODE>
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI48_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const_2_to_3_operand")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_gatherpf<mode>sf_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "Tv")
+	    (match_operand:VI48_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const_2_to_3_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 3:
+      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    case 2:
+      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_gatherpf<mode>sf"
+  [(unspec
+     [(const_int -1)
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "Tv")
+	    (match_operand:VI48_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const_2_to_3_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 3:
+      return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
+    case 2:
+      return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+;; Packed double variants
+(define_expand "avx512pf_gatherpf<mode>df"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:V8DF
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI4_256_8_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const_2_to_3_operand")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_gatherpf<mode>df_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
+      (match_operator:V8DF 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const_2_to_3_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 3:
+      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    case 2:
+      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_gatherpf<mode>df"
+  [(unspec
+     [(const_int -1)
+      (match_operator:V8DF 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const_2_to_3_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 3:
+      return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
+    case 2:
+      return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+;; Packed float variants
+(define_expand "avx512pf_scatterpf<mode>sf"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:<GATHER_SCATTER_SF_MEM_MODE>
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI48_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const2367_operand")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_scatterpf<mode>sf_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "Tv")
+	    (match_operand:VI48_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const2367_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 3:
+    case 7:
+      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    case 2:
+    case 6:
+      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_scatterpf<mode>sf"
+  [(unspec
+     [(const_int -1)
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "Tv")
+	    (match_operand:VI48_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const2367_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 3:
+    case 7:
+      return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
+    case 2:
+    case 6:
+      return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+;; Packed double variants
+(define_expand "avx512pf_scatterpf<mode>df"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:V8DF
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI4_256_8_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const2367_operand")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_scatterpf<mode>df_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
+      (match_operator:V8DF 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const2367_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 3:
+    case 7:
+      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    case 2:
+    case 6:
+      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_scatterpf<mode>df"
+  [(unspec
+     [(const_int -1)
+      (match_operator:V8DF 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const2367_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 3:
+    case 7:
+      return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
+    case 2:
+    case 6:
+      return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  UNSPEC_EXP2))]
+  "TARGET_AVX512ER"
+  "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  UNSPEC_RCP28))]
+  "TARGET_AVX512ER"
+  "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	    UNSPEC_RCP28)
+	  (match_operand:VF_128 2 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512ER"
+  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "type" "sse")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  UNSPEC_RSQRT28))]
+  "TARGET_AVX512ER"
+  "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	    UNSPEC_RSQRT28)
+	  (match_operand:VF_128 2 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512ER"
+  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; XOP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_code_iterator xop_plus [plus ss_plus])
+
+(define_code_attr macs [(plus "macs") (ss_plus "macss")])
+(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
+
+;; XOP parallel integer multiply/add instructions.
+
+(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
+  [(set (match_operand:VI24_128 0 "register_operand" "=x")
+	(xop_plus:VI24_128
+	 (mult:VI24_128
+	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
+	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
+	 (match_operand:VI24_128 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_p<macs>dql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(xop_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 0) (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 2)]))))
+	 (match_operand:V2DI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_p<macs>dqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(xop_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 1) (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 1) (const_int 3)]))))
+	 (match_operand:V2DI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel integer multiply/add instructions for the intrinisics
+(define_insn "xop_p<macs>wd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(xop_plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 1) (const_int 3)
+		       (const_int 5) (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 1) (const_int 3)
+		       (const_int 5) (const_int 7)]))))
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_p<madcs>wd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(xop_plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	     (parallel [(const_int 0) (const_int 2)
+			(const_int 4) (const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0) (const_int 2)
+			(const_int 4) (const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1) (const_int 3)
+			(const_int 5) (const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1) (const_int 3)
+			(const_int 5) (const_int 7)])))))
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel XMM conditional moves
+(define_insn "xop_pcmov_<mode><avxsizesuffix>"
+  [(set (match_operand:V 0 "register_operand" "=x,x")
+	(if_then_else:V
+	  (match_operand:V 3 "nonimmediate_operand" "x,m")
+	  (match_operand:V 1 "register_operand" "x,x")
+	  (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+;; XOP horizontal add/subtract instructions
+(define_insn "xop_phadd<u>bw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (any_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0) (const_int 2)
+		      (const_int 4) (const_int 6)
+		      (const_int 8) (const_int 10)
+		      (const_int 12) (const_int 14)])))
+	 (any_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1) (const_int 3)
+		      (const_int 5) (const_int 7)
+		      (const_int 9) (const_int 11)
+		      (const_int 13) (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphadd<u>bw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadd<u>bd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (any_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 4)
+		       (const_int 8) (const_int 12)])))
+	  (any_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1) (const_int 5)
+		       (const_int 9) (const_int 13)]))))
+	 (plus:V4SI
+	  (any_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2) (const_int 6)
+		       (const_int 10) (const_int 14)])))
+	  (any_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3) (const_int 7)
+		       (const_int 11) (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphadd<u>bd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadd<u>bq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0) (const_int 8)])))
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1) (const_int 9)]))))
+	  (plus:V2DI
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2) (const_int 10)])))
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3) (const_int 11)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 4) (const_int 12)])))
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 5) (const_int 13)]))))
+	  (plus:V2DI
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 6) (const_int 14)])))
+	   (any_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 7) (const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphadd<u>bq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadd<u>wd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (any_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0) (const_int 2)
+		      (const_int 4) (const_int 6)])))
+	 (any_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1) (const_int 3)
+		      (const_int 5) (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphadd<u>wd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadd<u>wq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (any_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 4)])))
+	  (any_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1) (const_int 5)]))))
+	 (plus:V2DI
+	  (any_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2) (const_int 6)])))
+	  (any_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3) (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphadd<u>wq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadd<u>dq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (any_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0) (const_int 2)])))
+	 (any_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1) (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphadd<u>dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(minus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0) (const_int 2)
+		      (const_int 4) (const_int 6)
+		      (const_int 8) (const_int 10)
+		      (const_int 12) (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1) (const_int 3)
+		      (const_int 5) (const_int 7)
+		      (const_int 9) (const_int 11)
+		      (const_int 13) (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphsubbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(minus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0) (const_int 2)
+		      (const_int 4) (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1) (const_int 3)
+		      (const_int 5) (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphsubwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(minus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0) (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1) (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphsubdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+;; XOP permute instructions
+(define_insn "xop_pperm"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_XOP_PERMUTE))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP pack instructions that combine two vectors into a smaller vector
+(define_insn "xop_pperm_pack_v2di_v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(vec_concat:V4SI
+	 (truncate:V2SI
+	  (match_operand:V2DI 1 "register_operand" "x,x"))
+	 (truncate:V2SI
+	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v4si_v8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_concat:V8HI
+	 (truncate:V4HI
+	  (match_operand:V4SI 1 "register_operand" "x,x"))
+	 (truncate:V4HI
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v8hi_v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(vec_concat:V16QI
+	 (truncate:V8QI
+	  (match_operand:V8HI 1 "register_operand" "x,x"))
+	 (truncate:V8QI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP packed rotate instructions
+(define_expand "rotl<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand")
+	(rotate:VI_128
+	 (match_operand:VI_128 1 "nonimmediate_operand")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+	{
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+      DONE;
+    }
+})
+
+(define_expand "rotr<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand")
+	(rotatert:VI_128
+	 (match_operand:VI_128 1 "nonimmediate_operand")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+	{
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_neg<mode>2 (neg, reg));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
+      DONE;
+    }
+})
+
+(define_insn "xop_rotl<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x")
+	(rotate:VI_128
+	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_rotr<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x")
+	(rotatert:VI_128
+	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+{
+  operands[3]
+    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
+  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "vrotr<mode>3"
+  [(match_operand:VI_128 0 "register_operand")
+   (match_operand:VI_128 1 "register_operand")
+   (match_operand:VI_128 2 "register_operand")]
+  "TARGET_XOP"
+{
+  rtx reg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (reg, operands[2]));
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "vrotl<mode>3"
+  [(match_operand:VI_128 0 "register_operand")
+   (match_operand:VI_128 1 "register_operand")
+   (match_operand:VI_128 2 "register_operand")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_vrotl<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
+	(if_then_else:VI_128
+	 (ge:VI_128
+	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
+	  (const_int 0))
+	 (rotate:VI_128
+	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
+	  (match_dup 2))
+	 (rotatert:VI_128
+	  (match_dup 1)
+	  (neg:VI_128 (match_dup 2)))))]
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; XOP packed shift instructions.
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VI12_128 0 "register_operand")
+	(lshiftrt:VI12_128
+	  (match_operand:VI12_128 1 "register_operand")
+	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VI48_128 0 "register_operand")
+	(lshiftrt:VI48_128
+	  (match_operand:VI48_128 1 "register_operand")
+	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
+  "TARGET_AVX2 || TARGET_XOP"
+{
+  if (!TARGET_AVX2)
+    {
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
+      DONE;
+    }
+})
+
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VI48_512 0 "register_operand")
+	(lshiftrt:VI48_512
+	  (match_operand:VI48_512 1 "register_operand")
+	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F")
+
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VI48_256 0 "register_operand")
+	(lshiftrt:VI48_256
+	  (match_operand:VI48_256 1 "register_operand")
+	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
+  "TARGET_AVX2")
+
+(define_expand "vashr<mode>3"
+  [(set (match_operand:VI128_128 0 "register_operand")
+	(ashiftrt:VI128_128
+	  (match_operand:VI128_128 1 "register_operand")
+	  (match_operand:VI128_128 2 "nonimmediate_operand")))]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashrv4si3"
+  [(set (match_operand:V4SI 0 "register_operand")
+	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
+		       (match_operand:V4SI 2 "nonimmediate_operand")))]
+  "TARGET_AVX2 || TARGET_XOP"
+{
+  if (!TARGET_AVX2)
+    {
+      rtx neg = gen_reg_rtx (V4SImode);
+      emit_insn (gen_negv4si2 (neg, operands[2]));
+      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
+      DONE;
+    }
+})
+
+(define_expand "vashrv16si3"
+  [(set (match_operand:V16SI 0 "register_operand")
+	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
+		        (match_operand:V16SI 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F")
+
+(define_expand "vashrv8si3"
+  [(set (match_operand:V8SI 0 "register_operand")
+	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
+		       (match_operand:V8SI 2 "nonimmediate_operand")))]
+  "TARGET_AVX2")
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VI12_128 0 "register_operand")
+	(ashift:VI12_128
+	  (match_operand:VI12_128 1 "register_operand")
+	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VI48_128 0 "register_operand")
+	(ashift:VI48_128
+	  (match_operand:VI48_128 1 "register_operand")
+	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
+  "TARGET_AVX2 || TARGET_XOP"
+{
+  if (!TARGET_AVX2)
+    {
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VI48_512 0 "register_operand")
+	(ashift:VI48_512
+	  (match_operand:VI48_512 1 "register_operand")
+	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F")
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VI48_256 0 "register_operand")
+	(ashift:VI48_256
+	  (match_operand:VI48_256 1 "register_operand")
+	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
+  "TARGET_AVX2")
+
+(define_insn "xop_sha<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
+	(if_then_else:VI_128
+	 (ge:VI_128
+	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
+	  (const_int 0))
+	 (ashift:VI_128
+	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
+	  (match_dup 2))
+	 (ashiftrt:VI_128
+	  (match_dup 1)
+	  (neg:VI_128 (match_dup 2)))))]
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_shl<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
+	(if_then_else:VI_128
+	 (ge:VI_128
+	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
+	  (const_int 0))
+	 (ashift:VI_128
+	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
+	  (match_dup 2))
+	 (lshiftrt:VI_128
+	  (match_dup 1)
+	  (neg:VI_128 (match_dup 2)))))]
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+(define_expand "<shift_insn><mode>3"
+  [(set (match_operand:VI1_AVX2 0 "register_operand")
+	(any_shift:VI1_AVX2
+	  (match_operand:VI1_AVX2 1 "register_operand")
+	  (match_operand:SI 2 "nonmemory_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_XOP && <MODE>mode == V16QImode)
+    {
+      bool negate = false;
+      rtx (*gen) (rtx, rtx, rtx);
+      rtx tmp, par;
+      int i;
+
+      if (<CODE> != ASHIFT)
+	{
+	  if (CONST_INT_P (operands[2]))
+	    operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  else
+	    negate = true;
+	}
+      par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
+      for (i = 0; i < 16; i++)
+        XVECEXP (par, 0, i) = operands[2];
+
+      tmp = gen_reg_rtx (V16QImode);
+      emit_insn (gen_vec_initv16qi (tmp, par));
+
+      if (negate)
+	emit_insn (gen_negv16qi2 (tmp, tmp));
+
+      gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
+      emit_insn (gen (operands[0], operands[1], tmp));
+    }
+  else
+    ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "ashrv2di3"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(ashiftrt:V2DI
+	  (match_operand:V2DI 1 "register_operand")
+	  (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_XOP"
+{
+  rtx reg = gen_reg_rtx (V2DImode);
+  rtx par;
+  bool negate = false;
+  int i;
+
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (-INTVAL (operands[2]));
+  else
+    negate = true;
+
+  par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
+  for (i = 0; i < 2; i++)
+    XVECEXP (par, 0, i) = operands[2];
+
+  emit_insn (gen_vec_initv2di (reg, par));
+
+  if (negate)
+    emit_insn (gen_negv2di2 (reg, reg));
+
+  emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+;; XOP FRCZ support
+(define_insn "xop_frcz<mode>2"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
+	(unspec:FMAMODE
+	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
+	 UNSPEC_FRCZ))]
+  "TARGET_XOP"
+  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "xop_vmfrcz<mode>2"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	   [(match_operand:VF_128 1 "nonimmediate_operand")]
+	   UNSPEC_FRCZ)
+	  (match_dup 2)
+	  (const_int 1)))]
+  "TARGET_XOP"
+  "operands[2] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*xop_vmfrcz<mode>2"
+  [(set (match_operand:VF_128 0 "register_operand" "=x")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
+	   UNSPEC_FRCZ)
+	  (match_operand:VF_128 2 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_XOP"
+  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_maskcmp<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x")
+	(match_operator:VI_128 1 "ix86_comparison_int_operator"
+	 [(match_operand:VI_128 2 "register_operand" "x")
+	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_maskcmp_uns<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x")
+	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
+	 [(match_operand:VI_128 2 "register_operand" "x")
+	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
+;; and pcomneu* not to be converted to the signed ones in case somebody needs
+;; the exact instruction generated for the intrinsic.
+(define_insn "xop_maskcmp_uns2<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x")
+	(unspec:VI_128
+	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
+	  [(match_operand:VI_128 2 "register_operand" "x")
+	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
+	 UNSPEC_XOP_UNSIGNED_CMP))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
+;; being added here to be complete.
+(define_insn "xop_pcom_tf<mode>3"
+  [(set (match_operand:VI_128 0 "register_operand" "=x")
+	(unspec:VI_128
+	  [(match_operand:VI_128 1 "register_operand" "x")
+	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_int_operand" "n")]
+	  UNSPEC_XOP_TRUEFALSE))]
+  "TARGET_XOP"
+{
+  return ((INTVAL (operands[3]) != 0)
+	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_vpermil2<mode>3"
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256 1 "register_operand" "x")
+	   (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
+	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
+	   (match_operand:SI 4 "const_0_to_3_operand" "n")]
+	  UNSPEC_VPERMIL2))]
+  "TARGET_XOP"
+  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "aesenc"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
+		      UNSPEC_AESENC))]
+  "TARGET_AES"
+  "@
+   aesenc\t{%2, %0|%0, %2}
+   vaesenc\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "double,double")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesenclast"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
+		      UNSPEC_AESENCLAST))]
+  "TARGET_AES"
+  "@
+   aesenclast\t{%2, %0|%0, %2}
+   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "double,double") 
+   (set_attr "mode" "TI")])
+
+(define_insn "aesdec"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
+		      UNSPEC_AESDEC))]
+  "TARGET_AES"
+  "@
+   aesdec\t{%2, %0|%0, %2}
+   vaesdec\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "double,double") 
+   (set_attr "mode" "TI")])
+
+(define_insn "aesdeclast"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
+		      UNSPEC_AESDECLAST))]
+  "TARGET_AES"
+  "@
+   aesdeclast\t{%2, %0|%0, %2}
+   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "double,double")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesimc"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESIMC))]
+  "TARGET_AES"
+  "%vaesimc\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "aeskeygenassist"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
+		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
+		     UNSPEC_AESKEYGENASSIST))]
+  "TARGET_AES"
+  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "pclmulqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
+		      (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+		     UNSPEC_PCLMUL))]
+  "TARGET_PCLMUL"
+  "@
+   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
+   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx_vzeroall"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_AVX"
+{
+  int nregs = TARGET_64BIT ? 16 : 8;
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_VZEROALL);
+
+  for (regno = 0; regno < nregs; regno++)
+    XVECEXP (operands[0], 0, regno + 1)
+      = gen_rtx_SET (VOIDmode,
+		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
+		     CONST0_RTX (V8SImode));
+})
+
+(define_insn "*avx_vzeroall"
+  [(match_parallel 0 "vzeroall_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
+  "TARGET_AVX"
+  "vzeroall"
+  [(set_attr "type" "sse")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "OI")])
+
+;; Clear the upper 128bits of AVX registers, equivalent to a NOP
+;; if the upper 128bits are unused.
+(define_insn "avx_vzeroupper"
+  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
+  "TARGET_AVX"
+  "vzeroupper"
+  [(set_attr "type" "sse")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "OI")])
+
+(define_insn "avx2_pbroadcast<mode>"
+  [(set (match_operand:VI 0 "register_operand" "=x")
+	(vec_duplicate:VI
+	  (vec_select:<ssescalarmode>
+	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX2"
+  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_pbroadcast<mode>_1"
+  [(set (match_operand:VI_256 0 "register_operand" "=x,x")
+	(vec_duplicate:VI_256
+	  (vec_select:<ssescalarmode>
+	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX2"
+  "@
+   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
+   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
+  [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
+	(unspec:VI48F_256_512
+	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
+	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
+	  UNSPEC_VPERMVAR))]
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "<mask_prefix2>")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<avx2_avx512f>_perm<mode>"
+  [(match_operand:VI8F_256_512 0 "register_operand")
+   (match_operand:VI8F_256_512 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")]
+  "TARGET_AVX2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
+					      GEN_INT ((mask >> 0) & 3),
+					      GEN_INT ((mask >> 2) & 3),
+					      GEN_INT ((mask >> 4) & 3),
+					      GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_expand "avx512f_perm<mode>_mask"
+  [(match_operand:V8FI 0 "register_operand")
+   (match_operand:V8FI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")
+   (match_operand:V8FI 3 "vector_move_operand")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
+						   GEN_INT ((mask >> 0) & 3),
+						   GEN_INT ((mask >> 2) & 3),
+						   GEN_INT ((mask >> 4) & 3),
+						   GEN_INT ((mask >> 6) & 3),
+						   operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
+  [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
+	(vec_select:VI8F_256_512
+	  (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")])))]
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "<mask_prefix2>")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_permv2ti"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(unspec:V4DI
+	  [(match_operand:V4DI 1 "register_operand" "x")
+	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
+	  UNSPEC_VPERMTI))]
+  "TARGET_AVX2"
+  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "avx2_vec_dupv4df"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_duplicate:V4DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "register_operand" "x")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX2"
+  "vbroadcastsd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+;; Modes handled by AVX vec_dup patterns.
+(define_mode_iterator AVX_VEC_DUP_MODE
+  [V8SI V8SF V4DI V4DF])
+
+(define_insn "vec_dup<mode>"
+  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
+	(vec_duplicate:AVX_VEC_DUP_MODE
+	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
+  "TARGET_AVX"
+  "@
+   vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
+   vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
+   #"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,avx2,noavx2")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_duplicate:VI48F_512
+	  (vec_select:<ssescalarmode>
+	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
+  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
+	(vec_duplicate:V16FI
+	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
+  "TARGET_AVX512F"
+  "@
+   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
+   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v,v")
+	(vec_duplicate:V8FI
+	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
+  "TARGET_AVX512F"
+  "@
+   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
+   vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(vec_duplicate:VI48_512
+	  (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
+  "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
+  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_duplicate:VI48F_512
+	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_vbroadcasti128_<mode>"
+  [(set (match_operand:VI_256 0 "register_operand" "=x")
+	(vec_concat:VI_256
+	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
+	  (match_dup 1)))]
+  "TARGET_AVX2"
+  "vbroadcasti128\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_split
+  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
+	(vec_duplicate:AVX_VEC_DUP_MODE
+	  (match_operand:<ssescalarmode> 1 "register_operand")))]
+  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
+  [(set (match_dup 2)
+	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
+   (set (match_dup 0)
+	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
+
+(define_insn "avx_vbroadcastf128_<mode>"
+  [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
+	(vec_concat:V_256
+	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
+	  (match_dup 1)))]
+  "TARGET_AVX"
+  "@
+   vbroadcast<i128>\t{%1, %0|%0, %1}
+   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
+   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
+  [(set_attr "type" "ssemov,sselog1,sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "0,1,1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512cd_maskb_vec_dupv8di"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(vec_duplicate:V8DI
+	  (zero_extend:DI
+	    (match_operand:QI 1 "register_operand" "Yk"))))]
+  "TARGET_AVX512CD"
+  "vpbroadcastmb2q\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mskmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx512cd_maskw_vec_dupv16si"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_duplicate:V16SI
+	  (zero_extend:SI
+	    (match_operand:HI 1 "register_operand" "Yk"))))]
+  "TARGET_AVX512CD"
+  "vpbroadcastmw2d\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mskmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
+;; If it so happens that the input is in memory, use vbroadcast.
+;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
+(define_insn "*avx_vperm_broadcast_v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+	(vec_select:V4SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
+	  (match_parallel 2 "avx_vbroadcast_operand"
+	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
+  "TARGET_AVX"
+{
+  int elt = INTVAL (operands[3]);
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
+      return "vbroadcastss\t{%1, %0|%0, %k1}";
+    case 2:
+      operands[2] = GEN_INT (elt * 0x55);
+      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "0,0,1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF,SF,V4SF")])
+
+(define_insn_and_split "*avx_vperm_broadcast_<mode>"
+  [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
+	(vec_select:VF_256
+	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
+	  (match_parallel 2 "avx_vbroadcast_operand"
+	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
+  "TARGET_AVX"
+  "#"
+  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
+  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
+{
+  rtx op0 = operands[0], op1 = operands[1];
+  int elt = INTVAL (operands[3]);
+
+  if (REG_P (op1))
+    {
+      int mask;
+
+      if (TARGET_AVX2 && elt == 0)
+	{
+	  emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
+							  op1)));
+	  DONE;
+	}
+
+      /* Shuffle element we care about into all elements of the 128-bit lane.
+	 The other lane gets shuffled too, but we don't care.  */
+      if (<MODE>mode == V4DFmode)
+	mask = (elt & 1 ? 15 : 0);
+      else
+	mask = (elt & 3) * 0x55;
+      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
+
+      /* Shuffle the lane we care about into both lanes of the dest.  */
+      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
+      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
+      DONE;
+    }
+
+  operands[1] = adjust_address (op1, <ssescalarmode>mode,
+				elt * GET_MODE_SIZE (<ssescalarmode>mode));
+})
+
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
+  [(set (match_operand:VF2 0 "register_operand")
+	(vec_select:VF2
+	  (match_operand:VF2 1 "nonimmediate_operand")
+	  (match_operand:SI 2 "const_0_to_255_operand")))]
+  "TARGET_AVX && <mask_mode512bit_condition>"
+{
+  int mask = INTVAL (operands[2]);
+  rtx perm[<ssescalarnum>];
+
+  int i;
+  for (i = 0; i < <ssescalarnum>; i = i + 2)
+    {
+      perm[i]     = GEN_INT (((mask >> i)       & 1) + i);
+      perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
+    }
+
+  operands[2]
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
+})
+
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
+  [(set (match_operand:VF1 0 "register_operand")
+	(vec_select:VF1
+	  (match_operand:VF1 1 "nonimmediate_operand")
+	  (match_operand:SI 2 "const_0_to_255_operand")))]
+  "TARGET_AVX && <mask_mode512bit_condition>"
+{
+  int mask = INTVAL (operands[2]);
+  rtx perm[<ssescalarnum>];
+
+  int i;
+  for (i = 0; i < <ssescalarnum>; i = i + 4)
+    {
+      perm[i]     = GEN_INT (((mask >> 0) & 3) + i);
+      perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
+      perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
+      perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
+    }
+
+  operands[2]
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
+})
+
+(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
+  [(set (match_operand:VF 0 "register_operand" "=v")
+	(vec_select:VF
+	  (match_operand:VF 1 "nonimmediate_operand" "vm")
+	  (match_parallel 2 ""
+	    [(match_operand 3 "const_int_operand")])))]
+  "TARGET_AVX && <mask_mode512bit_condition>
+   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
+{
+  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
+  operands[2] = GEN_INT (mask);
+  return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "<mask_prefix>")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
+  [(set (match_operand:VF 0 "register_operand" "=v")
+	(unspec:VF
+	  [(match_operand:VF 1 "register_operand" "v")
+	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
+	  UNSPEC_VPERMIL))]
+  "TARGET_AVX && <mask_mode512bit_condition>"
+  "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "prefix" "<mask_prefix>")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vpermi2var<mode>3_maskz"
+  [(match_operand:VI48F_512 0 "register_operand" "=v")
+   (match_operand:VI48F_512 1 "register_operand" "v")
+   (match_operand:<sseintvecmode> 2 "register_operand" "0")
+   (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
+	operands[0], operands[1], operands[2], operands[3],
+	CONST0_RTX (<MODE>mode), operands[4]));
+  DONE;
+})
+
+(define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "v")
+	   (match_operand:<sseintvecmode> 2 "register_operand" "0")
+	   (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	  UNSPEC_VPERMI2))]
+  "TARGET_AVX512F"
+  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermi2var<mode>3_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_merge:VI48F_512
+	  (unspec:VI48F_512
+	    [(match_operand:VI48F_512 1 "register_operand" "v")
+	    (match_operand:<sseintvecmode> 2 "register_operand" "0")
+	    (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	    UNSPEC_VPERMI2_MASK)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vpermt2var<mode>3_maskz"
+  [(match_operand:VI48F_512 0 "register_operand" "=v")
+   (match_operand:<sseintvecmode> 1 "register_operand" "v")
+   (match_operand:VI48F_512 2 "register_operand" "0")
+   (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
+	operands[0], operands[1], operands[2], operands[3],
+	CONST0_RTX (<MODE>mode), operands[4]));
+  DONE;
+})
+
+(define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(unspec:VI48F_512
+	  [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+	   (match_operand:VI48F_512 2 "register_operand" "0")
+	   (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	  UNSPEC_VPERMT2))]
+  "TARGET_AVX512F"
+  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermt2var<mode>3_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_merge:VI48F_512
+	  (unspec:VI48F_512
+	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+	    (match_operand:VI48F_512 2 "register_operand" "0")
+	    (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	    UNSPEC_VPERMT2)
+	  (match_dup 2)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx_vperm2f128<mode>3"
+  [(set (match_operand:AVX256MODE2P 0 "register_operand")
+	(unspec:AVX256MODE2P
+	  [(match_operand:AVX256MODE2P 1 "register_operand")
+	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
+	   (match_operand:SI 3 "const_0_to_255_operand")]
+	  UNSPEC_VPERMIL2F128))]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[3]);
+  if ((mask & 0x88) == 0)
+    {
+      rtx perm[<ssescalarnum>], t1, t2;
+      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
+
+      base = (mask & 3) * nelt2;
+      for (i = 0; i < nelt2; ++i)
+	perm[i] = GEN_INT (base + i);
+
+      base = ((mask >> 4) & 3) * nelt2;
+      for (i = 0; i < nelt2; ++i)
+	perm[i + nelt2] = GEN_INT (base + i);
+
+      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
+			       operands[1], operands[2]);
+      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
+      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
+      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
+      emit_insn (t2);
+      DONE;
+    }
+})
+
+;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
+;; means that in order to represent this properly in rtl we'd have to
+;; nest *another* vec_concat with a zero operand and do the select from
+;; a 4x wide vector.  That doesn't seem very nice.
+(define_insn "*avx_vperm2f128<mode>_full"
+  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
+	(unspec:AVX256MODE2P
+	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
+	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
+	  UNSPEC_VPERMIL2F128))]
+  "TARGET_AVX"
+  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx_vperm2f128<mode>_nozero"
+  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
+	(vec_select:AVX256MODE2P
+	  (vec_concat:<ssedoublevecmode>
+	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
+	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
+	  (match_parallel 3 ""
+	    [(match_operand 4 "const_int_operand")])))]
+  "TARGET_AVX
+   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
+{
+  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
+  if (mask == 0x12)
+    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
+  if (mask == 0x20)
+    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
+  operands[3] = GEN_INT (mask);
+  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx_vinsertf128<mode>"
+  [(match_operand:V_256 0 "register_operand")
+   (match_operand:V_256 1 "register_operand")
+   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_1_operand")]
+  "TARGET_AVX"
+{
+  rtx (*insn)(rtx, rtx, rtx);
+
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      insn = gen_vec_set_lo_<mode>;
+      break;
+    case 1:
+      insn = gen_vec_set_hi_<mode>;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "avx2_vec_set_lo_v4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(vec_concat:V4DI
+	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V2DI
+	    (match_operand:V4DI 1 "register_operand" "x")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_AVX2"
+  "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "avx2_vec_set_hi_v4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+	(vec_concat:V4DI
+	  (vec_select:V2DI
+	    (match_operand:V4DI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX2"
+  "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_set_lo_<mode>"
+  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+	(vec_concat:VI8F_256
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:VI8F_256 1 "register_operand" "x")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_hi_<mode>"
+  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+	(vec_concat:VI8F_256
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:VI8F_256 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_lo_<mode>"
+  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+	(vec_concat:VI4F_256
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:VI4F_256 1 "register_operand" "x")
+	    (parallel [(const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX"
+  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_hi_<mode>"
+  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+	(vec_concat:VI4F_256
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:VI4F_256 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_lo_v16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V8HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (parallel [(const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX"
+  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_set_hi_v16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (vec_select:V8HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_set_lo_v32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_concat:V32QI
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V16QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (parallel [(const_int 16) (const_int 17)
+		       (const_int 18) (const_int 19)
+		       (const_int 20) (const_int 21)
+		       (const_int 22) (const_int 23)
+		       (const_int 24) (const_int 25)
+		       (const_int 26) (const_int 27)
+		       (const_int 28) (const_int 29)
+		       (const_int 30) (const_int 31)]))))]
+  "TARGET_AVX"
+  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "vec_set_hi_v32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_concat:V32QI
+	  (vec_select:V16QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)
+		       (const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
+	(unspec:V48_AVX2
+	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
+	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX"
+  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
+  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
+	(unspec:V48_AVX2
+	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
+	   (match_operand:V48_AVX2 2 "register_operand" "x")
+	   (match_dup 0)]
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX"
+  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector") 
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "maskload<mode>"
+  [(set (match_operand:V48_AVX2 0 "register_operand")
+	(unspec:V48_AVX2
+	  [(match_operand:<sseintvecmode> 2 "register_operand")
+	   (match_operand:V48_AVX2 1 "memory_operand")]
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX")
+
+(define_expand "maskstore<mode>"
+  [(set (match_operand:V48_AVX2 0 "memory_operand")
+	(unspec:V48_AVX2
+	  [(match_operand:<sseintvecmode> 2 "register_operand")
+	   (match_operand:V48_AVX2 1 "register_operand")
+	   (match_dup 0)]
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX")
+
+(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
+  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
+	(unspec:AVX256MODE2P
+	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_CAST))]
+  "TARGET_AVX"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  if (REG_P (op0))
+    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
+  else
+    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
+  emit_move_insn (op0, op1);
+  DONE;
+})
+
+(define_expand "vec_init<mode>"
+  [(match_operand:V_256 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_AVX"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_init<mode>"
+  [(match_operand:VI48F_512 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_AVX512F"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "avx2_extracti128"
+  [(match_operand:V2DI 0 "nonimmediate_operand")
+   (match_operand:V4DI 1 "register_operand")
+   (match_operand:SI 2 "const_0_to_1_operand")]
+  "TARGET_AVX2"
+{
+  rtx (*insn)(rtx, rtx);
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      insn = gen_vec_extract_lo_v4di;
+      break;
+    case 1:
+      insn = gen_vec_extract_hi_v4di;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "avx2_inserti128"
+  [(match_operand:V4DI 0 "register_operand")
+   (match_operand:V4DI 1 "register_operand")
+   (match_operand:V2DI 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_1_operand")]
+  "TARGET_AVX2"
+{
+  rtx (*insn)(rtx, rtx, rtx);
+
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      insn = gen_avx2_vec_set_lo_v4di;
+      break;
+    case 1:
+      insn = gen_avx2_vec_set_hi_v4di;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
+  [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
+	(ashiftrt:VI48_AVX512F
+	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
+	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
+  [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
+	(any_lshift:VI48_AVX2_48_AVX512F
+	  (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
+	  (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+;; For avx_vec_concat<mode> insn pattern
+(define_mode_attr concat_tg_mode
+  [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
+   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
+
+(define_insn "avx_vec_concat<mode>"
+  [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
+	(vec_concat:V_256_512
+	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
+	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
+  "TARGET_AVX"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
+    case 1:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V16SF:
+	  return "vmovaps\t{%1, %t0|%t0, %1}";
+	case MODE_V8DF:
+	  return "vmovapd\t{%1, %t0|%t0, %1}";
+	case MODE_V8SF:
+	  return "vmovaps\t{%1, %x0|%x0, %1}";
+	case MODE_V4DF:
+	  return "vmovapd\t{%1, %x0|%x0, %1}";
+	case MODE_XI:
+	  return "vmovdqa\t{%1, %t0|%t0, %1}";
+	case MODE_OI:
+	  return "vmovdqa\t{%1, %x0|%x0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog,ssemov")
+   (set_attr "prefix_extra" "1,*")
+   (set_attr "length_immediate" "1,*")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vcvtph2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
+		       UNSPEC_VCVTPH2PS)
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_F16C"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vcvtph2ps_load"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
+		     UNSPEC_VCVTPH2PS))]
+  "TARGET_F16C"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vcvtph2ps256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_VCVTPH2PS))]
+  "TARGET_F16C"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(unspec:V16SF
+	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  UNSPEC_VCVTPH2PS))]
+  "TARGET_AVX512F"
+  "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+(define_expand "vcvtps2ph"
+  [(set (match_operand:V8HI 0 "register_operand")
+	(vec_concat:V8HI
+	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
+			(match_operand:SI 2 "const_0_to_255_operand")]
+		       UNSPEC_VCVTPS2PH)
+	  (match_dup 3)))]
+  "TARGET_F16C"
+  "operands[3] = CONST0_RTX (V4HImode);")
+
+(define_insn "*vcvtps2ph"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+			(match_operand:SI 2 "const_0_to_255_operand" "N")]
+		       UNSPEC_VCVTPS2PH)
+	  (match_operand:V4HI 3 "const0_operand")))]
+  "TARGET_F16C"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vcvtps2ph_store"
+  [(set (match_operand:V4HI 0 "memory_operand" "=m")
+	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
+		     UNSPEC_VCVTPS2PH))]
+  "TARGET_F16C"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "vcvtps2ph256"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
+	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
+		     UNSPEC_VCVTPS2PH))]
+  "TARGET_F16C"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
+  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
+	(unspec:V16HI
+	  [(match_operand:V16SF 1 "register_operand" "v")
+	   (match_operand:SI 2 "const_0_to_255_operand" "N")]
+	  UNSPEC_VCVTPS2PH))]
+  "TARGET_AVX512F"
+  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+;; For gather* insn patterns
+(define_mode_iterator VEC_GATHER_MODE
+		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
+(define_mode_attr VEC_GATHER_IDXSI
+		      [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
+		       (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
+		       (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
+		       (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
+
+(define_mode_attr VEC_GATHER_IDXDI
+		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
+		       (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
+		       (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
+		       (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
+
+(define_mode_attr VEC_GATHER_SRCDI
+		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
+		       (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
+		       (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
+		       (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
+
+(define_expand "avx2_gathersi<mode>"
+  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
+		   (unspec:VEC_GATHER_MODE
+		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
+		      (mem:<ssescalarmode>
+			(match_par_dup 7
+			  [(match_operand 2 "vsib_address_operand")
+			   (match_operand:<VEC_GATHER_IDXSI>
+			      3 "register_operand")
+			   (match_operand:SI 5 "const1248_operand ")]))
+		      (mem:BLK (scratch))
+		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
+		     UNSPEC_GATHER))
+	      (clobber (match_scratch:VEC_GATHER_MODE 6))])]
+  "TARGET_AVX2"
+{
+  operands[7]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+					operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx2_gathersi<mode>"
+  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+	(unspec:VEC_GATHER_MODE
+	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
+	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 3 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
+		 (match_operand:SI 6 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])
+	   (mem:BLK (scratch))
+	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+  "TARGET_AVX2"
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gathersi<mode>_2"
+  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+	(unspec:VEC_GATHER_MODE
+	  [(pc)
+	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 2 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
+		 (match_operand:SI 5 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])
+	   (mem:BLK (scratch))
+	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+  "TARGET_AVX2"
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx2_gatherdi<mode>"
+  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
+		   (unspec:VEC_GATHER_MODE
+		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
+		      (mem:<ssescalarmode>
+			(match_par_dup 7
+			  [(match_operand 2 "vsib_address_operand")
+			   (match_operand:<VEC_GATHER_IDXDI>
+			      3 "register_operand")
+			   (match_operand:SI 5 "const1248_operand ")]))
+		      (mem:BLK (scratch))
+		      (match_operand:<VEC_GATHER_SRCDI>
+			4 "register_operand")]
+		     UNSPEC_GATHER))
+	      (clobber (match_scratch:VEC_GATHER_MODE 6))])]
+  "TARGET_AVX2"
+{
+  operands[7]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+					operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx2_gatherdi<mode>"
+  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+	(unspec:VEC_GATHER_MODE
+	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
+	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 3 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
+		 (match_operand:SI 6 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])
+	   (mem:BLK (scratch))
+	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+  "TARGET_AVX2"
+  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_2"
+  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+	(unspec:VEC_GATHER_MODE
+	  [(pc)
+	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 2 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+		 (match_operand:SI 5 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])
+	   (mem:BLK (scratch))
+	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+  "TARGET_AVX2"
+{
+  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_3"
+  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
+	(vec_select:<VEC_GATHER_SRCDI>
+	  (unspec:VI4F_256
+	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
+	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+	       [(unspec:P
+		  [(match_operand:P 3 "vsib_address_operand" "Tv")
+		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
+		   (match_operand:SI 6 "const1248_operand" "n")]
+		  UNSPEC_VSIBADDR)])
+	     (mem:BLK (scratch))
+	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
+	     UNSPEC_GATHER)
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))
+   (clobber (match_scratch:VI4F_256 1 "=&x"))]
+  "TARGET_AVX2"
+  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_4"
+  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
+	(vec_select:<VEC_GATHER_SRCDI>
+	  (unspec:VI4F_256
+	    [(pc)
+	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	       [(unspec:P
+		  [(match_operand:P 2 "vsib_address_operand" "Tv")
+		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+		   (match_operand:SI 5 "const1248_operand" "n")]
+		  UNSPEC_VSIBADDR)])
+	     (mem:BLK (scratch))
+	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+	    UNSPEC_GATHER)
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))
+   (clobber (match_scratch:VI4F_256 1 "=&x"))]
+  "TARGET_AVX2"
+  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_gathersi<mode>"
+  [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
+		   (unspec:VI48F_512
+		     [(match_operand:VI48F_512 1 "register_operand")
+		      (match_operand:<avx512fmaskmode> 4 "register_operand")
+		      (mem:<ssescalarmode>
+			(match_par_dup 6
+			  [(match_operand 2 "vsib_address_operand")
+			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
+			   (match_operand:SI 5 "const1248_operand")]))]
+		     UNSPEC_GATHER))
+	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
+  "TARGET_AVX512F"
+{
+  operands[6]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+					operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_gathersi<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "0")
+	   (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
+	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 4 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
+		 (match_operand:SI 5 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_gathersi<mode>_2"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(pc)
+	   (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 3 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
+		 (match_operand:SI 4 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+
+(define_expand "avx512f_gatherdi<mode>"
+  [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
+		   (unspec:VI48F_512
+		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
+		      (match_operand:QI 4 "register_operand")
+		      (mem:<ssescalarmode>
+			(match_par_dup 6
+			  [(match_operand 2 "vsib_address_operand")
+			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
+			   (match_operand:SI 5 "const1248_operand")]))]
+		     UNSPEC_GATHER))
+	      (clobber (match_scratch:QI 7))])]
+  "TARGET_AVX512F"
+{
+  operands[6]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+					operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_gatherdi<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
+	   (match_operand:QI 7 "register_operand" "2")
+	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 4 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
+		 (match_operand:SI 5 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:QI 2 "=&Yk"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_gatherdi<mode>_2"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(pc)
+	   (match_operand:QI 6 "register_operand" "1")
+	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 3 "vsib_address_operand" "Tv")
+		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
+		 (match_operand:SI 4 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:QI 1 "=&Yk"))]
+  "TARGET_AVX512F"
+{
+  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_scattersi<mode>"
+  [(parallel [(set (mem:VI48F_512
+		     (match_par_dup 5
+		       [(match_operand 0 "vsib_address_operand")
+			(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
+			(match_operand:SI 4 "const1248_operand")]))
+		   (unspec:VI48F_512
+		     [(match_operand:<avx512fmaskmode> 1 "register_operand")
+		      (match_operand:VI48F_512 3 "register_operand")]
+		     UNSPEC_SCATTER))
+	      (clobber (match_scratch:<avx512fmaskmode> 6))])]
+  "TARGET_AVX512F"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
+					operands[4]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_scattersi<mode>"
+  [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+	  [(unspec:P
+	     [(match_operand:P 0 "vsib_address_operand" "Tv")
+	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
+	      (match_operand:SI 4 "const1248_operand" "n")]
+	     UNSPEC_VSIBADDR)])
+	(unspec:VI48F_512
+	  [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+	   (match_operand:VI48F_512 3 "register_operand" "v")]
+	  UNSPEC_SCATTER))
+   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_scatterdi<mode>"
+  [(parallel [(set (mem:VI48F_512
+		     (match_par_dup 5
+		       [(match_operand 0 "vsib_address_operand")
+			(match_operand:V8DI 2 "register_operand")
+			(match_operand:SI 4 "const1248_operand")]))
+		   (unspec:VI48F_512
+		     [(match_operand:QI 1 "register_operand")
+		      (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
+		     UNSPEC_SCATTER))
+	      (clobber (match_scratch:QI 6))])]
+  "TARGET_AVX512F"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
+					operands[4]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_scatterdi<mode>"
+  [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+	  [(unspec:P
+	     [(match_operand:P 0 "vsib_address_operand" "Tv")
+	      (match_operand:V8DI 2 "register_operand" "v")
+	      (match_operand:SI 4 "const1248_operand" "n")]
+	     UNSPEC_VSIBADDR)])
+	(unspec:VI48F_512
+	  [(match_operand:QI 6 "register_operand" "1")
+	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
+	  UNSPEC_SCATTER))
+   (clobber (match_scratch:QI 1 "=&Yk"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_compress<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "v")
+	   (match_operand:VI48F_512 2 "vector_move_operand" "0C")
+	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
+	  UNSPEC_COMPRESS))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_compressstore<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "x")
+	   (match_dup 0)
+	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
+	  UNSPEC_COMPRESS_STORE))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_expand<mode>_maskz"
+  [(set (match_operand:VI48F_512 0 "register_operand")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "nonimmediate_operand")
+	   (match_operand:VI48F_512 2 "vector_move_operand")
+	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
+	  UNSPEC_EXPAND))]
+  "TARGET_AVX512F"
+  "operands[2] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "avx512f_expand<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+	   (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
+	  UNSPEC_EXPAND))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	   (match_operand:SI 2 "const_0_to_15_operand")]
+	  UNSPEC_GETMANT))]
+  "TARGET_AVX512F"
+  "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_getmant<mode><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	     (match_operand:SI 3 "const_0_to_15_operand")]
+	    UNSPEC_GETMANT)
+	  (match_dup 1)
+	  (const_int 1)))]
+   "TARGET_AVX512F"
+   "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
+   [(set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "clz<mode>2<mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(clz:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512CD"
+  "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>conflict<mode><mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(unspec:VI48_512
+	  [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_CONFLICT))]
+  "TARGET_AVX512CD"
+  "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sha1msg1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA1MSG1))]
+  "TARGET_SHA"
+  "sha1msg1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha1msg2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA1MSG2))]
+  "TARGET_SHA"
+  "sha1msg2\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha1nexte"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA1NEXTE))]
+  "TARGET_SHA"
+  "sha1nexte\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha1rnds4"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_3_operand" "n")]
+	  UNSPEC_SHA1RNDS4))]
+  "TARGET_SHA"
+  "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha256msg1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA256MSG1))]
+  "TARGET_SHA"
+  "sha256msg1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha256msg2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA256MSG2))]
+  "TARGET_SHA"
+  "sha256msg2\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha256rnds2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	   (match_operand:V4SI 3 "register_operand" "Yz")]
+	  UNSPEC_SHA256RNDS2))]
+  "TARGET_SHA"
+  "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
diff --git a/gcc-4.9/gcc/config/i386/ssemath.h b/gcc-4.9/gcc/config/i386/ssemath.h
new file mode 100644
index 000000000..ec8d74a62
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/ssemath.h
@@ -0,0 +1,28 @@
+/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE2 ? FPMATH_SSE : FPMATH_387)
+
+#undef TARGET_FPMATH_DEFAULT_P
+#define TARGET_FPMATH_DEFAULT_P(x) \
+  (TARGET_SSE2_P(x) ? FPMATH_SSE : FPMATH_387)
+
+#undef TARGET_SUBTARGET32_ISA_DEFAULT
+#define TARGET_SUBTARGET32_ISA_DEFAULT \
+   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2)
diff --git a/gcc-4.9/gcc/config/i386/stringop.def b/gcc-4.9/gcc/config/i386/stringop.def
new file mode 100644
index 000000000..279aa1961
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/stringop.def
@@ -0,0 +1,37 @@
+/* Definitions for stringop strategy for IA-32.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License 
+along with GCC; see the files COPYING3.  If not,
+see <http://www.gnu.org/licenses/>.  */
+
+DEF_ENUM
+DEF_ALG (no_stringop, no_stringop)
+DEF_ENUM
+DEF_ALG (libcall, libcall)
+DEF_ENUM
+DEF_ALG (rep_prefix_1_byte, rep_byte)
+DEF_ENUM
+DEF_ALG (rep_prefix_4_byte, rep_4byte)
+DEF_ENUM
+DEF_ALG (rep_prefix_8_byte, rep_8byte)
+DEF_ENUM
+DEF_ALG (loop_1_byte, byte_loop)
+DEF_ENUM
+DEF_ALG (loop, loop)
+DEF_ENUM
+DEF_ALG (unrolled_loop, unrolled_loop)
+DEF_ENUM
+DEF_ALG (vector_loop, vector_loop)
diff --git a/gcc-4.9/gcc/config/i386/stringop.opt b/gcc-4.9/gcc/config/i386/stringop.opt
new file mode 100644
index 000000000..bb8d2d2b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/stringop.opt
@@ -0,0 +1,31 @@
+/* Definitions for stringop option handling for IA-32.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the files COPYING3.  If not,
+see <http://www.gnu.org/licenses/>.  */
+
+Enum(stringop_alg) String(rep_byte) Value(rep_prefix_1_byte)
+
+#undef DEF_ENUM
+#define DEF_ENUM EnumValue
+
+#undef DEF_ALG
+#define DEF_ALG(alg, name) Enum(stringop_alg) String(name) Value(alg)
+
+#include "stringop.def"
+
+#undef DEF_ENUM
+#undef DEF_ALG
diff --git a/gcc-4.9/gcc/config/i386/subst.md b/gcc-4.9/gcc/config/i386/subst.md
new file mode 100644
index 000000000..1654cbae6
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/subst.md
@@ -0,0 +1,198 @@
+;; GCC machine description for AVX512F instructions
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Some iterators for extending subst as much as possible
+;; All vectors (Use it for destination)
+(define_mode_iterator SUBST_V
+  [V16QI
+   V16HI V8HI
+   V16SI V8SI  V4SI
+   V8DI  V4DI  V2DI
+   V16SF V8SF  V4SF
+   V8DF  V4DF  V2DF])
+
+(define_mode_iterator SUBST_S
+  [QI HI SI DI])
+
+(define_mode_iterator SUBST_A
+  [V16QI
+   V16HI V8HI
+   V16SI V8SI  V4SI
+   V8DI  V4DI  V2DI
+   V16SF V8SF  V4SF
+   V8DF  V4DF  V2DF
+   QI HI SI DI SF DF
+   CCFP CCFPU])
+
+(define_subst_attr "mask_name" "mask" "" "_mask")
+(define_subst_attr "mask_applied" "mask" "false" "true")
+(define_subst_attr "mask_operand2" "mask" "" "%{%3%}%N2")
+(define_subst_attr "mask_operand3" "mask" "" "%{%4%}%N3")
+(define_subst_attr "mask_operand3_1" "mask" "" "%%{%%4%%}%%N3") ;; for sprintf
+(define_subst_attr "mask_operand4" "mask" "" "%{%5%}%N4")
+(define_subst_attr "mask_operand6" "mask" "" "%{%7%}%N6")
+(define_subst_attr "mask_operand11" "mask" "" "%{%12%}%N11")
+(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
+(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
+(define_subst_attr "mask_codefor" "mask" "*" "")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(<MODE_SIZE> == 64)")
+(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
+(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
+(define_subst_attr "mask_prefix" "mask" "vex" "evex")
+(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
+(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+
+(define_subst "mask"
+  [(set (match_operand:SUBST_V 0)
+        (match_operand:SUBST_V 1))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+        (vec_merge:SUBST_V
+	  (match_dup 1)
+	  (match_operand:SUBST_V 2 "vector_move_operand" "0C")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))])
+
+(define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask")
+(define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}")
+(define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}")
+
+(define_subst "mask_scalar_merge"
+  [(set (match_operand:SUBST_S 0)
+        (match_operand:SUBST_S 1))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+        (and:SUBST_S
+	  (match_dup 1)
+	  (match_operand:SUBST_S 3 "register_operand" "Yk")))])
+
+(define_subst_attr "sd_maskz_name" "sd" "" "_maskz_1")
+(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
+(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
+(define_subst_attr "sd_mask_codefor" "sd" "*" "")
+(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(<MODE_SIZE> == 64)")
+
+(define_subst "sd"
+ [(set (match_operand:SUBST_V 0)
+       (match_operand:SUBST_V 1))]
+ ""
+ [(set (match_dup 0)
+       (vec_merge:SUBST_V
+	 (match_dup 1)
+	 (match_operand:SUBST_V 2 "const0_operand" "C")
+	 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))
+])
+
+(define_subst_attr "round_name" "round" "" "_round")
+(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
+(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
+(define_subst_attr "round_op2" "round" "" "%R2")
+(define_subst_attr "round_op3" "round" "" "%R3")
+(define_subst_attr "round_op4" "round" "" "%R4")
+(define_subst_attr "round_op5" "round" "" "%R5")
+(define_subst_attr "round_op6" "round" "" "%R6")
+(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>")
+(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
+(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>")
+(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
+(define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "round_constraint2" "round" "m" "v")
+(define_subst_attr "round_constraint3" "round" "rm" "r")
+(define_subst_attr "round_nimm_predicate" "round" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_prefix" "round" "vex" "evex")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
+(define_subst_attr "round_codefor" "round" "*" "")
+(define_subst_attr "round_opnum" "round" "5" "6")
+
+(define_subst "round"
+  [(set (match_operand:SUBST_A 0)
+        (match_operand:SUBST_A 1))]
+  "TARGET_AVX512F"
+  [(parallel[
+     (set (match_dup 0)
+          (match_dup 1))
+     (unspec [(match_operand:SI 2 "const_4_or_8_to_11_operand")] UNSPEC_EMBEDDED_ROUNDING)])])
+
+(define_subst_attr "round_saeonly_name" "round_saeonly" "" "_round")
+(define_subst_attr "round_saeonly_mask_operand2" "mask" "%r2" "%r4")
+(define_subst_attr "round_saeonly_mask_operand3" "mask" "%r3" "%r5")
+(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%r4" "%r5")
+(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%r5" "%r7")
+(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%r2")
+(define_subst_attr "round_saeonly_op3" "round_saeonly" "" "%r3")
+(define_subst_attr "round_saeonly_op4" "round_saeonly" "" "%r4")
+(define_subst_attr "round_saeonly_op5" "round_saeonly" "" "%r5")
+(define_subst_attr "round_saeonly_op6" "round_saeonly" "" "%r6")
+(define_subst_attr "round_saeonly_prefix" "round_saeonly" "vex" "evex")
+(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
+(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
+(define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>")
+(define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>")
+(define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v")
+(define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
+(define_subst_attr "round_saeonly_nimm_predicate" "round_saeonly" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
+
+(define_subst "round_saeonly"
+  [(set (match_operand:SUBST_A 0)
+        (match_operand:SUBST_A 1))]
+  "TARGET_AVX512F"
+  [(parallel[
+     (set (match_dup 0)
+          (match_dup 1))
+     (unspec [(match_operand:SI 2 "const48_operand")] UNSPEC_EMBEDDED_ROUNDING)])])
+
+(define_subst_attr "round_expand_name" "round_expand" "" "_round")
+(define_subst_attr "round_expand_nimm_predicate" "round_expand" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_expand_operand" "round_expand" "" ", operands[5]")
+
+(define_subst "round_expand"
+ [(match_operand:SUBST_V 0)
+  (match_operand:SUBST_V 1)
+  (match_operand:SUBST_V 2)
+  (match_operand:SUBST_V 3)
+  (match_operand:SUBST_S 4)]
+  "TARGET_AVX512F"
+  [(match_dup 0)
+   (match_dup 1)
+   (match_dup 2)
+   (match_dup 3)
+   (match_dup 4)
+   (unspec [(match_operand:SI 5 "const_4_or_8_to_11_operand")] UNSPEC_EMBEDDED_ROUNDING)])
+
+(define_subst_attr "round_saeonly_expand_name" "round_saeonly_expand" "" "_round")
+(define_subst_attr "round_saeonly_expand_nimm_predicate" "round_saeonly_expand" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_saeonly_expand_operand6" "round_saeonly_expand" "" ", operands[6]")
+
+(define_subst "round_saeonly_expand"
+ [(match_operand:SUBST_V 0)
+  (match_operand:SUBST_V 1)
+  (match_operand:SUBST_V 2)
+  (match_operand:SUBST_A 3)
+  (match_operand:SI 4)
+  (match_operand:SUBST_S 5)]
+  "TARGET_AVX512F"
+  [(match_dup 0)
+   (match_dup 1)
+   (match_dup 2)
+   (match_dup 3)
+   (match_dup 4)
+   (match_dup 5)
+   (unspec [(match_operand:SI 6 "const48_operand")] UNSPEC_EMBEDDED_ROUNDING)])
diff --git a/gcc-4.9/gcc/config/i386/sync.md b/gcc-4.9/gcc/config/i386/sync.md
new file mode 100644
index 000000000..4cd449ebf
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/sync.md
@@ -0,0 +1,606 @@
+;; GCC machine description for i386 synchronization instructions.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_LFENCE
+  UNSPEC_SFENCE
+  UNSPEC_MFENCE
+  UNSPEC_MOVA	; For __atomic support
+  UNSPEC_LDA
+  UNSPEC_STA
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_CMPXCHG
+  UNSPECV_XCHG
+  UNSPECV_LOCK
+])
+
+(define_expand "sse2_lfence"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_lfence"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+  "TARGET_SSE2"
+  "lfence"
+  [(set_attr "type" "sse")
+   (set_attr "length_address" "0")
+   (set_attr "atom_sse_attr" "lfence")
+   (set_attr "memory" "unknown")])
+
+(define_expand "sse_sfence"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse_sfence"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "sfence"
+  [(set_attr "type" "sse")
+   (set_attr "length_address" "0")
+   (set_attr "atom_sse_attr" "fence")
+   (set_attr "memory" "unknown")])
+
+(define_expand "sse2_mfence"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "mfence_sse2"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  "TARGET_64BIT || TARGET_SSE2"
+  "mfence"
+  [(set_attr "type" "sse")
+   (set_attr "length_address" "0")
+   (set_attr "atom_sse_attr" "fence")
+   (set_attr "memory" "unknown")])
+
+(define_insn "mfence_nosse"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(TARGET_64BIT || TARGET_SSE2)"
+  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
+  [(set_attr "memory" "unknown")])
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand")]		;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) (INTVAL (operands[0]) & MEMMODEL_MASK);
+
+  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (model == MEMMODEL_SEQ_CST)
+    {
+      rtx (*mfence_insn)(rtx);
+      rtx mem;
+
+      if (TARGET_64BIT || TARGET_SSE2)
+	mfence_insn = gen_mfence_sse2;
+      else
+	mfence_insn = gen_mfence_nosse;
+
+      mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+
+      emit_insn (mfence_insn (mem));
+    }
+  DONE;
+})
+
+;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
+;; Only beginning at Pentium family processors do we get any guarantee of
+;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
+;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
+;;
+;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
+;;
+;; Importantly, *no* processor makes atomicity guarantees for larger
+;; accesses.  In particular, there's no way to perform an atomic TImode
+;; move, despite the apparent applicability of MOVDQA et al.
+
+(define_mode_iterator ATOMIC
+   [QI HI SI
+    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
+   ])
+
+(define_expand "atomic_load<mode>"
+  [(set (match_operand:ATOMIC 0 "register_operand")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
+			(match_operand:SI 2 "const_int_operand")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    emit_insn (gen_atomic_loaddi_fpu
+	       (operands[0], operands[1],
+	        assign_386_stack_local (DImode, SLOT_TEMP)));
+  else
+    emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn_and_split "atomic_loaddi_fpu"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
+		   UNSPEC_MOVA))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
+   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dst = operands[0], src = operands[1];
+  rtx mem = operands[2], tmp = operands[3];
+
+  if (SSE_REG_P (dst))
+    emit_move_insn (dst, src);
+  else
+    {
+      if (MEM_P (dst))
+	mem = dst;
+
+      if (STACK_REG_P (tmp))
+        {
+	  emit_insn (gen_loaddi_via_fpu (tmp, src));
+	  emit_insn (gen_storedi_via_fpu (mem, tmp));
+	}
+      else
+	{
+	  adjust_reg_mode (tmp, DImode);
+	  emit_move_insn (tmp, src);
+	  emit_move_insn (mem, tmp);
+	}
+
+      if (mem != dst)
+	emit_move_insn (dst, mem);
+    }
+  DONE;
+})
+
+(define_expand "atomic_store<mode>"
+  [(set (match_operand:ATOMIC 0 "memory_operand")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand")
+			(match_operand:SI 2 "const_int_operand")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  enum memmodel model = (enum memmodel) (INTVAL (operands[2]) & MEMMODEL_MASK);
+
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    {
+      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
+      /* Note that while we could perform a cmpxchg8b loop, that turns
+	 out to be significantly larger than this plus a barrier.  */
+      emit_insn (gen_atomic_storedi_fpu
+		 (operands[0], operands[1],
+	          assign_386_stack_local (DImode, SLOT_TEMP)));
+    }
+  else
+    {
+      /* For seq-cst stores, when we lack MFENCE, use XCHG.  */
+      if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2))
+	{
+	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
+						operands[0], operands[1],
+						operands[2]));
+	  DONE;
+	}
+
+      /* Otherwise use a store.  */
+      emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
+					   operands[2]));
+    }
+  /* ... followed by an MFENCE, if required.  */
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_mem_thread_fence (operands[2]));
+  DONE;
+})
+
+(define_insn "atomic_store<mode>_1"
+  [(set (match_operand:SWI 0 "memory_operand" "=m")
+	(unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
+		     (match_operand:SI 2 "const_int_operand")]
+		    UNSPEC_MOVA))]
+  ""
+  "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn_and_split "atomic_storedi_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "x,m,?r")]
+		   UNSPEC_MOVA))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
+   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dst = operands[0], src = operands[1];
+  rtx mem = operands[2], tmp = operands[3];
+
+  if (!SSE_REG_P (src))
+    {
+      if (REG_P (src))
+	{
+	  emit_move_insn (mem, src);
+	  src = mem;
+	}
+
+      if (STACK_REG_P (tmp))
+	{
+	  emit_insn (gen_loaddi_via_fpu (tmp, src));
+	  emit_insn (gen_storedi_via_fpu (dst, tmp));
+	  DONE;
+	}
+      else
+	{
+	  adjust_reg_mode (tmp, DImode);
+	  emit_move_insn (tmp, mem);
+	  src = tmp;
+	}
+    }
+  emit_move_insn (dst, src);
+  DONE;
+})
+
+;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
+;; operations.  But the fix_trunc patterns want way more setup than we want
+;; to provide.  Note that the scratch is DFmode instead of XFmode in order
+;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
+
+(define_insn "loaddi_via_fpu"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_LDA))]
+  "TARGET_80387"
+  "fild%Z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "DF")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "storedi_via_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DF 1 "register_operand" "f")] UNSPEC_STA))]
+  "TARGET_80387"
+{
+  gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
+
+  return "fistp%Z0\t%0";
+}
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "DI")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand")	;; bool success output
+   (match_operand:SWI124 1 "register_operand")	;; oldval output
+   (match_operand:SWI124 2 "memory_operand")	;; memory
+   (match_operand:SWI124 3 "register_operand")	;; expected input
+   (match_operand:SWI124 4 "register_operand")	;; newval input
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; success model
+   (match_operand:SI 7 "const_int_operand")]	;; failure model
+  "TARGET_CMPXCHG"
+{
+  emit_insn
+   (gen_atomic_compare_and_swap<mode>_1
+    (operands[1], operands[2], operands[3], operands[4], operands[6]));
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
+
+(define_mode_iterator CASMODE
+  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand")	;; bool success output
+   (match_operand:CASMODE 1 "register_operand")	;; oldval output
+   (match_operand:CASMODE 2 "memory_operand")	;; memory
+   (match_operand:CASMODE 3 "register_operand")	;; expected input
+   (match_operand:CASMODE 4 "register_operand")	;; newval input
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; success model
+   (match_operand:SI 7 "const_int_operand")]	;; failure model
+  "TARGET_CMPXCHG"
+{
+  if (<MODE>mode == DImode && TARGET_64BIT)
+    {
+      emit_insn
+       (gen_atomic_compare_and_swapdi_1
+	(operands[1], operands[2], operands[3], operands[4], operands[6]));
+    }
+  else
+    {
+      enum machine_mode hmode = <CASHMODE>mode;
+      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
+
+      lo_o = operands[1];
+      mem  = operands[2];
+      lo_e = operands[3];
+      lo_n = operands[4];
+      hi_o = gen_highpart (hmode, lo_o);
+      hi_e = gen_highpart (hmode, lo_e);
+      hi_n = gen_highpart (hmode, lo_n);
+      lo_o = gen_lowpart (hmode, lo_o);
+      lo_e = gen_lowpart (hmode, lo_e);
+      lo_n = gen_lowpart (hmode, lo_n);
+
+      if (!cmpxchg8b_pic_memory_operand (mem, <MODE>mode))
+ 	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+
+      emit_insn
+       (gen_atomic_compare_and_swap<mode>_doubleword
+        (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n, operands[6]));
+    }
+
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
+
+(define_insn "atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:SWI 0 "register_operand" "=a")
+	(unspec_volatile:SWI
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SWI 2 "register_operand" "0")
+	   (match_operand:SWI 3 "register_operand" "<r>")
+	   (match_operand:SI 4 "const_int_operand")]
+	  UNSPECV_CMPXCHG))
+   (set (match_dup 1)
+	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (reg:CCZ FLAGS_REG)
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
+  "TARGET_CMPXCHG"
+  "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
+
+;; For double-word compare and swap, we are obliged to play tricks with
+;; the input newval (op5:op6) because the Intel register numbering does
+;; not match the gcc register numbering, so the pair must be CX:BX.
+;; That said, in order to take advantage of possible lower-subreg opts,
+;; treat all of the integral operands in the same way.
+
+;; Operands 5 and 6 really need to be different registers, which in
+;; this case means op5 must not be ecx.  If op5 and op6 are the same
+;; (like when the input is -1LL) GCC might chose to allocate op5 to ecx,
+;; like op6.  This breaks, as the xchg will move the PIC register
+;; contents to %ecx then --> boom.
+
+(define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
+(define_mode_attr regprefix [(SI "e") (DI "r")])
+
+(define_insn "atomic_compare_and_swap<dwi>_doubleword"
+  [(set (match_operand:DWIH 0 "register_operand" "=a,a")
+	(unspec_volatile:DWIH
+	  [(match_operand:<DWI> 2 "cmpxchg8b_pic_memory_operand" "+m,m")
+	   (match_operand:DWIH 3 "register_operand" "0,0")
+	   (match_operand:DWIH 4 "register_operand" "1,1")
+	   (match_operand:DWIH 5 "register_operand" "b,!*r")
+	   (match_operand:DWIH 6 "register_operand" "c,c")
+	   (match_operand:SI 7 "const_int_operand")]
+	  UNSPECV_CMPXCHG))
+   (set (match_operand:DWIH 1 "register_operand" "=d,d")
+	(unspec_volatile:DWIH [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_dup 2)
+	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (reg:CCZ FLAGS_REG)
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))
+   (clobber (match_scratch:DWIH 8 "=X,&5"))]
+  "TARGET_CMPXCHG<doublemodesuffix>B"
+{
+  bool swap = REGNO (operands[5]) != BX_REG;
+  const char *xchg = "xchg{<imodesuffix>}\t%%<regprefix>bx, %5";
+
+  if (swap)
+    {
+      output_asm_insn (xchg, operands);
+      if (ix86_emit_cfi ())
+	{
+	  output_asm_insn (".cfi_remember_state", operands);
+	  output_asm_insn (".cfi_register\t%%<regprefix>bx, %5", operands);
+	}
+    }
+  output_asm_insn ("lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2", operands);
+  if (swap)
+    {
+      output_asm_insn (xchg, operands);
+      if (ix86_emit_cfi ())
+	output_asm_insn (".cfi_restore_state", operands);
+    }
+
+  return "";
+})
+
+;; For operand 2 nonmemory_operand predicate is used instead of
+;; register_operand to allow combiner to better optimize atomic
+;; additions of constants.
+(define_insn "atomic_fetch_add<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+	(unspec_volatile:SWI
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  UNSPECV_XCHG))
+   (set (match_dup 1)
+	(plus:SWI (match_dup 1)
+		  (match_operand:SWI 2 "nonmemory_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_XADD"
+  "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
+
+;; This peephole2 and following insn optimize
+;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
+;; followed by testing of flags instead of lock xadd and comparisons.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operand:SWI 2 "const_int_operand"))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI
+		     [(match_operand:SWI 1 "memory_operand")
+		      (match_operand:SI 4 "const_int_operand")]
+		     UNSPECV_XCHG))
+	      (set (match_dup 1)
+		   (plus:SWI (match_dup 1)
+			     (match_dup 0)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_dup 0)
+		     (match_operand:SWI 3 "const_int_operand")))]
+  "peep2_reg_dead_p (3, operands[0])
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
+      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
+					  UNSPECV_XCHG)
+		     (match_dup 3)))
+	      (set (match_dup 1)
+		   (plus:SWI (match_dup 1)
+			     (match_dup 2)))])])
+
+(define_insn "*atomic_fetch_add_cmp<mode>"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (unspec_volatile:SWI
+	    [(match_operand:SWI 0 "memory_operand" "+m")
+	     (match_operand:SI 3 "const_int_operand")]		;; model
+	    UNSPECV_XCHG)
+	  (match_operand:SWI 2 "const_int_operand" "i")))
+   (set (match_dup 0)
+	(plus:SWI (match_dup 0)
+		  (match_operand:SWI 1 "const_int_operand" "i")))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
+   == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
+{
+  if (incdec_operand (operands[1], <MODE>mode))
+    {
+      if (operands[1] == const1_rtx)
+	return "lock{%;} %K3inc{<imodesuffix>}\t%0";
+      else
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "lock{%;} %K3dec{<imodesuffix>}\t%0";
+	}
+    }
+
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+  return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
+;; In addition, it is always a full barrier, so we can ignore the memory model.
+(define_insn "atomic_exchange<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
+	(unspec_volatile:SWI
+	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  UNSPECV_XCHG))
+   (set (match_dup 1)
+	(match_operand:SWI 2 "register_operand" "0"))]		;; input
+  ""
+  "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "atomic_add<mode>"
+  [(set (match_operand:SWI 0 "memory_operand" "+m")
+	(unspec_volatile:SWI
+	  [(plus:SWI (match_dup 0)
+		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  UNSPECV_LOCK))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (incdec_operand (operands[1], <MODE>mode))
+    {
+      if (operands[1] == const1_rtx)
+	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
+      else
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "lock{%;} %K2dec{<imodesuffix>}\t%0";
+	}
+    }
+
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+  return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "atomic_sub<mode>"
+  [(set (match_operand:SWI 0 "memory_operand" "+m")
+	(unspec_volatile:SWI
+	  [(minus:SWI (match_dup 0)
+		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  UNSPECV_LOCK))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (incdec_operand (operands[1], <MODE>mode))
+    {
+      if (operands[1] == const1_rtx)
+	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
+      else
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "lock{%;} %K2inc{<imodesuffix>}\t%0";
+	}
+    }
+
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+  return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "atomic_<logic><mode>"
+  [(set (match_operand:SWI 0 "memory_operand" "+m")
+	(unspec_volatile:SWI
+	  [(any_logic:SWI (match_dup 0)
+			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  UNSPECV_LOCK))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
diff --git a/gcc-4.9/gcc/config/i386/sysv4.h b/gcc-4.9/gcc/config/i386/sysv4.h
new file mode 100644
index 000000000..011b228ca
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/sysv4.h
@@ -0,0 +1,72 @@
+/* Target definitions for GCC for Intel 80386 running System V.4
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+   Written by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Output at beginning of assembler file.  */
+/* The .file command should always begin the output.  */
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE true
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel)		\
+      {									\
+	fputs (ASM_LONG, (FILE));					\
+	assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), (FILE)); \
+	goto DONE;							\
+      }									\
+  } while (0)
+
+/* Used by crtstuff.c to initialize the base of data-relative relocations.
+   These are GOT relative on x86, so return the pic register.  */
+#ifdef __PIC__
+#define CRT_GET_RFIB_DATA(BASE)			\
+  {						\
+    register void *ebx_ __asm__("ebx");		\
+    BASE = ebx_;				\
+  }
+#else
+#define CRT_GET_RFIB_DATA(BASE)						\
+  __asm__ ("call\t.LPR%=\n"						\
+	   ".LPR%=:\n\t"						\
+	   "pop{l}\t%0\n\t"						\
+	   /* Due to a GAS bug, this cannot use EAX.  That encodes	\
+	      smaller than the traditional EBX, which results in the	\
+	      offset being off by one.  */				\
+	   "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0"		\
+		   "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}"		\
+	   : "=d"(BASE))
+#endif
diff --git a/gcc-4.9/gcc/config/i386/t-cygming b/gcc-4.9/gcc/config/i386/t-cygming
new file mode 100644
index 000000000..9544e4914
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-cygming
@@ -0,0 +1,48 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# cygwin and mingw always have a limits.h, but, depending upon how we are
+# doing the build, it may not be installed yet.
+LIMITS_H_TEST = true
+
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASH_TABLE_H) $(GGC_H) $(LTO_STREAMER_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt.c
+
+winnt-cxx.o: $(srcdir)/config/i386/winnt-cxx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt-cxx.c
+
+
+winnt-stubs.o: $(srcdir)/config/i386/winnt-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt-stubs.c
+
+msformat-c.o: $(srcdir)/config/i386/msformat-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/msformat-c.c
+
+STMP_FIXINC=stmp-fixinc
diff --git a/gcc-4.9/gcc/config/i386/t-cygwin-w64 b/gcc-4.9/gcc/config/i386/t-cygwin-w64
new file mode 100644
index 000000000..01968fd88
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-cygwin-w64
@@ -0,0 +1,3 @@
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64
+MULTILIB_OSDIRNAMES = ../lib ../lib32
diff --git a/gcc-4.9/gcc/config/i386/t-darwin b/gcc-4.9/gcc/config/i386/t-darwin
new file mode 100644
index 000000000..bf44504d4
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-darwin
@@ -0,0 +1,2 @@
+MULTILIB_OPTIONS = m64
+MULTILIB_DIRNAMES = x86_64
diff --git a/gcc-4.9/gcc/config/i386/t-darwin64 b/gcc-4.9/gcc/config/i386/t-darwin64
new file mode 100644
index 000000000..6a6b22f1e
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-darwin64
@@ -0,0 +1,2 @@
+MULTILIB_OPTIONS = m32
+MULTILIB_DIRNAMES = i386
diff --git a/gcc-4.9/gcc/config/i386/t-gmm_malloc b/gcc-4.9/gcc/config/i386/t-gmm_malloc
new file mode 100644
index 000000000..c37f8a759
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-gmm_malloc
@@ -0,0 +1,6 @@
+# Install gmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/gmm_malloc.h
+	rm -f $@
+	cat $^ > $@
diff --git a/gcc-4.9/gcc/config/i386/t-gnu b/gcc-4.9/gcc/config/i386/t-gnu
new file mode 100644
index 000000000..5f946c716
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-gnu
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,i386-gnu)
diff --git a/gcc-4.9/gcc/config/i386/t-i386 b/gcc-4.9/gcc/config/i386/t-i386
new file mode 100644
index 000000000..345e84421
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-i386
@@ -0,0 +1,31 @@
+# Copyright (C) 2008-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+OPTIONS_H_EXTRA += $(srcdir)/config/i386/stringop.def
+TM_H += $(srcdir)/config/i386/x86-tune.def
+
+i386-c.o: $(srcdir)/config/i386/i386-c.c i386-builtin-types.inc
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
+
+i386-builtin-types.inc: s-i386-bt ; @true
+s-i386-bt: $(srcdir)/config/i386/i386-builtin-types.awk \
+  $(srcdir)/config/i386/i386-builtin-types.def
+	$(AWK) -f $^ > tmp-bt.inc
+	$(SHELL) $(srcdir)/../move-if-change tmp-bt.inc i386-builtin-types.inc
+	$(STAMP) $@
diff --git a/gcc-4.9/gcc/config/i386/t-interix b/gcc-4.9/gcc/config/i386/t-interix
new file mode 100644
index 000000000..24f5243f5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-interix
@@ -0,0 +1,30 @@
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASH_TABLE_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/winnt.c
+
+winnt-stubs.o: $(srcdir)/config/i386/winnt-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt-stubs.c
+
diff --git a/gcc-4.9/gcc/config/i386/t-kfreebsd b/gcc-4.9/gcc/config/i386/t-kfreebsd
new file mode 100644
index 000000000..762d520fa
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-kfreebsd
@@ -0,0 +1,5 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,i386-kfreebsd-gnu)
+
+# MULTILIB_OSDIRNAMES are set in t-linux64.
+KFREEBSD_OS = $(filter kfreebsd%, $(word 3, $(subst -, ,$(target))))
+MULTILIB_OSDIRNAMES := $(filter-out mx32=%,$(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES)))
diff --git a/gcc-4.9/gcc/config/i386/t-linux b/gcc-4.9/gcc/config/i386/t-linux
new file mode 100644
index 000000000..155314c08
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-linux
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,i386-linux-gnu)
diff --git a/gcc-4.9/gcc/config/i386/t-linux64 b/gcc-4.9/gcc/config/i386/t-linux64
new file mode 100644
index 000000000..5ec8907a9
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-linux64
@@ -0,0 +1,38 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+# To support i386, x86-64 and x32 libraries, the directory structrue
+# should be:
+#
+# 	/lib has i386 libraries.
+# 	/lib64 has x86-64 libraries.
+# 	/libx32 has x32 libraries.
+#
+comma=,
+MULTILIB_OPTIONS    = $(subst $(comma),/,$(TM_MULTILIB_CONFIG))
+MULTILIB_DIRNAMES   = $(patsubst m%, %, $(subst /, ,$(MULTILIB_OPTIONS)))
+MULTILIB_OSDIRNAMES = m64=../lib64$(call if_multiarch,:x86_64-linux-gnu)
+MULTILIB_OSDIRNAMES+= m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:i386-linux-gnu)
+MULTILIB_OSDIRNAMES+= mx32=../libx32$(call if_multiarch,:x86_64-linux-gnux32)
diff --git a/gcc-4.9/gcc/config/i386/t-mingw-w32 b/gcc-4.9/gcc/config/i386/t-mingw-w32
new file mode 100644
index 000000000..4fc8582cf
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-mingw-w32
@@ -0,0 +1,3 @@
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64 ../lib
diff --git a/gcc-4.9/gcc/config/i386/t-mingw-w64 b/gcc-4.9/gcc/config/i386/t-mingw-w64
new file mode 100644
index 000000000..c809ebd7d
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-mingw-w64
@@ -0,0 +1,3 @@
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib ../lib32
diff --git a/gcc-4.9/gcc/config/i386/t-openbsd b/gcc-4.9/gcc/config/i386/t-openbsd
new file mode 100644
index 000000000..4f8ff657a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-openbsd
@@ -0,0 +1,4 @@
+# gdb gets confused if pic code is linked with non pic
+# We cope by building variants of libgcc.
+MULTILIB_OPTIONS = fpic
+MULTILIB_MATCHES=fpic=fPIC
diff --git a/gcc-4.9/gcc/config/i386/t-pmm_malloc b/gcc-4.9/gcc/config/i386/t-pmm_malloc
new file mode 100644
index 000000000..109009fbf
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-pmm_malloc
@@ -0,0 +1,6 @@
+# Install pmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/pmm_malloc.h
+	rm -f $@
+	cat $^ > $@
diff --git a/gcc-4.9/gcc/config/i386/t-rtems b/gcc-4.9/gcc/config/i386/t-rtems
new file mode 100644
index 000000000..e3934179e
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-rtems
@@ -0,0 +1,26 @@
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+
+MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro msoft-float
+MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float
+MULTILIB_MATCHES = msoft-float=mno-80387
+MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?athlon
+MULTILIB_EXCEPTIONS = \
+mtune=pentium/*msoft-float* \
+mtune=pentiumpro/*msoft-float*
diff --git a/gcc-4.9/gcc/config/i386/t-sol2-64 b/gcc-4.9/gcc/config/i386/t-sol2-64
new file mode 100644
index 000000000..4e70f0bed
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-sol2-64
@@ -0,0 +1,21 @@
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = m32/m64
+MULTILIB_DIRNAMES = 32 amd64
+MULTILIB_OSDIRNAMES = . amd64
diff --git a/gcc-4.9/gcc/config/i386/t-vxworks b/gcc-4.9/gcc/config/i386/t-vxworks
new file mode 100644
index 000000000..c440b1f90
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-vxworks
@@ -0,0 +1,8 @@
+# Multilibs for VxWorks.
+
+# Build multilibs for normal, -mrtp, and -mrtp -fPIC.
+MULTILIB_OPTIONS = mrtp fPIC
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC
+
diff --git a/gcc-4.9/gcc/config/i386/t-vxworksae b/gcc-4.9/gcc/config/i386/t-vxworksae
new file mode 100644
index 000000000..0cea2bbf3
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/t-vxworksae
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks AE.
+
+MULTILIB_OPTIONS = mvthreads
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS = 
diff --git a/gcc-4.9/gcc/config/i386/tbmintrin.h b/gcc-4.9/gcc/config/i386/tbmintrin.h
new file mode 100644
index 000000000..871f53280
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/tbmintrin.h
@@ -0,0 +1,180 @@
+/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _TBMINTRIN_H_INCLUDED
+#define _TBMINTRIN_H_INCLUDED
+
+#ifndef __TBM__
+#pragma GCC push_options
+#pragma GCC target("tbm")
+#define __DISABLE_TBM__
+#endif /* __TBM__ */
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextri_u32 (unsigned int __X, const unsigned int __I)
+{
+	return __builtin_ia32_bextri_u32 (__X, __I);
+}
+#else
+#define __bextri_u32(X, I)                                           \
+        ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \
+	                                          (unsigned int)(I)))
+#endif /*__OPTIMIZE__ */
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcfill_u32 (unsigned int __X)
+{
+  return __X & (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blci_u32 (unsigned int __X)
+{
+  return __X | ~(__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcic_u32 (unsigned int __X)
+{
+  return ~__X & (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcmsk_u32 (unsigned int __X)
+{
+  return __X ^ (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcs_u32 (unsigned int __X)
+{
+  return __X | (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsfill_u32 (unsigned int __X)
+{
+  return __X | (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsic_u32 (unsigned int __X)
+{
+  return ~__X | (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__t1mskc_u32 (unsigned int __X)
+{
+  return ~__X | (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzmsk_u32 (unsigned int __X)
+{
+  return ~__X & (__X - 1);
+}
+
+
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextri_u64 (unsigned long long __X, const unsigned int __I)
+{
+  return __builtin_ia32_bextri_u64 (__X, __I);
+}
+#else
+#define __bextri_u64(X, I)						   \
+  ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long long)(X), \
+						  (unsigned long long)(I)))
+#endif /*__OPTIMIZE__ */
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcfill_u64 (unsigned long long __X)
+{
+  return __X & (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blci_u64 (unsigned long long __X)
+{
+  return __X | ~(__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcic_u64 (unsigned long long __X)
+{
+  return ~__X & (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcmsk_u64 (unsigned long long __X)
+{
+  return __X ^ (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcs_u64 (unsigned long long __X)
+{
+  return __X | (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsfill_u64 (unsigned long long __X)
+{
+  return __X | (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsic_u64 (unsigned long long __X)
+{
+  return ~__X | (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__t1mskc_u64 (unsigned long long __X)
+{
+  return ~__X | (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzmsk_u64 (unsigned long long __X)
+{
+  return ~__X & (__X - 1);
+}
+
+
+#endif /* __x86_64__  */
+
+#ifdef __DISABLE_TBM__
+#undef __DISABLE_TBM__
+#pragma GCC pop_options
+#endif /* __DISABLE_TBM__ */
+
+#endif /* _TBMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/tmmintrin.h b/gcc-4.9/gcc/config/i386/tmmintrin.h
new file mode 100644
index 000000000..89556d24b
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/tmmintrin.h
@@ -0,0 +1,249 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.1.  */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+#ifndef __SSSE3__
+#pragma GCC push_options
+#pragma GCC target("ssse3")
+#define __DISABLE_SSSE3__
+#endif /* __SSSE3__ */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
+{
+  return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
+					      (__v2di)__Y, __N * 8);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
+{
+  return (__m64) __builtin_ia32_palignr ((__v1di)__X,
+					 (__v1di)__Y, __N * 8);
+}
+#else
+#define _mm_alignr_epi8(X, Y, N)					\
+  ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X),		\
+					(__v2di)(__m128i)(Y),		\
+					(int)(N) * 8))
+#define _mm_alignr_pi8(X, Y, N)						\
+  ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X),			\
+				   (__v1di)(__m64)(Y),			\
+				   (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi8 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi8 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi16 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi32 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+#ifdef __DISABLE_SSSE3__
+#undef __DISABLE_SSSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSSE3__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/unix.h b/gcc-4.9/gcc/config/i386/unix.h
new file mode 100644
index 000000000..d4fdf9b4b
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/unix.h
@@ -0,0 +1,80 @@
+/* Definitions for Unix assembler syntax for the Intel 80386.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file defines the aspects of assembler syntax
+   that are the same for all the i386 Unix systems
+   (though they may differ in non-Unix systems).  */
+
+/* Define macro used to output shift-double opcodes when the shift
+   count is in %cl.  Some assemblers require %cl as an argument;
+   some don't.  This macro controls what to do: by default, don't
+   print %cl.  */
+#define SHIFT_DOUBLE_OMITS_COUNT 1
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+/* String containing the assembler's comment-starter.
+   Note the trailing space is necessary in case the character
+   that immediately follows the comment is '*'.  If this happens
+   and the space is not there the assembler will interpret this
+   as the start of a C-like slash-star comment and complain when
+   there is no terminator.  */
+
+#define ASM_COMMENT_START "/ "
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "/APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "/NO_APP\n"
+
+/* Output before read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable (initialized) data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* Output before writable (uninitialized) data.  */
+
+#define BSS_SECTION_ASM_OP "\t.bss"
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   and returns float values in the 387.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
+
+/* By default, 64-bit mode uses 128-bit long double.  */
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT \
+	MASK_128BIT_LONG_DOUBLE
diff --git a/gcc-4.9/gcc/config/i386/vx-common.h b/gcc-4.9/gcc/config/i386/vx-common.h
new file mode 100644
index 000000000..136c2d9af
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/vx-common.h
@@ -0,0 +1,33 @@
+/* IA32 VxWorks and VxWorks AE target definitions.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* VxWorks uses the same ABI as Solaris 2, so use i386/sol2.h version.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_VECT8_RETURNS)
+
+/* Provide our target specific DBX_REGISTER_NUMBER.  VxWorks relies on
+   the SVR4 numbering.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
diff --git a/gcc-4.9/gcc/config/i386/vxworks.h b/gcc-4.9/gcc/config/i386/vxworks.h
new file mode 100644
index 000000000..49206e015
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/vxworks.h
@@ -0,0 +1,73 @@
+/* IA32 VxWorks target definitions for GNU compiler.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Updated by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      VXWORKS_OS_CPP_BUILTINS ();			\
+      if (TARGET_386)					\
+        builtin_define ("CPU=I80386");			\
+      else if (TARGET_486)				\
+        builtin_define ("CPU=I80486");			\
+      else if (TARGET_PENTIUM)				\
+        {						\
+          builtin_define ("CPU=PENTIUM");		\
+          builtin_define ("CPU_VARIANT=PENTIUM");	\
+        }						\
+      else if (TARGET_PENTIUMPRO)			\
+        {						\
+          builtin_define ("CPU=PENTIUM2");		\
+          builtin_define ("CPU_VARIANT=PENTIUMPRO");	\
+        }						\
+      else if (TARGET_PENTIUM4)				\
+        {						\
+          builtin_define ("CPU=PENTIUM4");		\
+          builtin_define ("CPU_VARIANT=PENTIUM4");	\
+        }						\
+    }							\
+  while (0)
+
+#undef  CPP_SPEC
+#define CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+#undef  LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+#undef  LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+
+#undef  SUBTARGET_SWITCHES
+#define SUBTARGET_SWITCHES EXTRA_SUBTARGET_SWITCHES
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
+
+/* We cannot use PC-relative accesses for VxWorks PIC because there is no
+   fixed gap between segments.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
diff --git a/gcc-4.9/gcc/config/i386/vxworksae.h b/gcc-4.9/gcc/config/i386/vxworksae.h
new file mode 100644
index 000000000..bb63c079c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/vxworksae.h
@@ -0,0 +1,35 @@
+/* IA32 VxWorks AE target definitions for GNU compiler.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* On VxWorks AE, we only want SIMNT.  */
+#undef VXWORKS_CPU_DEFINE
+#define VXWORKS_CPU_DEFINE()			\
+  do						\
+    builtin_define ("CPU=SIMNT");		\
+  while (0)
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
diff --git a/gcc-4.9/gcc/config/i386/winnt-cxx.c b/gcc-4.9/gcc/config/i386/winnt-cxx.c
new file mode 100644
index 000000000..aa75f9157
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/winnt-cxx.c
@@ -0,0 +1,184 @@
+/* Target support for C++ classes on Windows.
+   Contributed by Danny Smith (dannysmith@users.sourceforge.net)
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "cp/cp-tree.h" /* This is why we're a separate module.  */
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "hashtab.h"
+
+bool
+i386_pe_type_dllimport_p (tree decl)
+{
+  gcc_assert (TREE_CODE (decl) == VAR_DECL 
+	      || TREE_CODE (decl) == FUNCTION_DECL);
+
+  if (TARGET_NOP_FUN_DLLIMPORT && TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  /* We ignore the dllimport attribute for inline member functions.
+     This differs from MSVC behavior which treats it like GNUC
+     'extern inline' extension.  Also ignore for template
+     instantiations with linkonce semantics and artificial methods.  */
+  if (TREE_CODE (decl) ==  FUNCTION_DECL
+      && (DECL_DECLARED_INLINE_P (decl)
+	  || DECL_TEMPLATE_INSTANTIATION (decl)
+	  || DECL_ARTIFICIAL (decl)))
+    return false;
+  
+  /* Overrides of the class dllimport decls by out-of-class definitions are 
+     handled by tree.c:merge_dllimport_decl_attributes.   */
+  return true;
+}
+
+bool
+i386_pe_type_dllexport_p (tree decl)
+{
+  gcc_assert (TREE_CODE (decl) == VAR_DECL 
+              || TREE_CODE (decl) == FUNCTION_DECL);
+
+  /* Avoid exporting compiler-generated default dtors and copy ctors.
+     The only artificial methods that need to be exported are virtual
+     and non-virtual thunks.  */
+  if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE
+      && DECL_ARTIFICIAL (decl) && !DECL_THUNK_P (decl))
+    return false;
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && DECL_DECLARED_INLINE_P (decl))
+    {
+      if (DECL_REALLY_EXTERN (decl)
+	  || !flag_keep_inline_dllexport)
+	return false;
+    }
+  return true;
+}
+
+static inline void maybe_add_dllimport (tree decl) 
+{
+  if (i386_pe_type_dllimport_p (decl))
+    DECL_DLLIMPORT_P (decl) = 1;
+}
+
+static inline void maybe_add_dllexport (tree decl) 
+{
+  if (i386_pe_type_dllexport_p (decl))
+    {   
+      tree decl_attrs = DECL_ATTRIBUTES (decl);
+      if (lookup_attribute ("dllexport", decl_attrs) != NULL_TREE)
+	/* Already done.  */
+	return;
+      DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("dllexport"),
+					  NULL_TREE, decl_attrs);
+    }
+}
+
+void
+i386_pe_adjust_class_at_definition (tree t)
+{
+  tree member;
+
+  gcc_assert (CLASS_TYPE_P (t));
+ 
+ 
+  if (lookup_attribute ("dllexport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
+    {
+      tree tmv = TYPE_MAIN_VARIANT (t);
+
+      /* Make sure that we set dllexport attribute to typeinfo's
+	 base declaration, as otherwise it would fail to be exported as
+	 it isn't a class-member.  */
+      if (tmv != NULL_TREE
+	  && CLASSTYPE_TYPEINFO_VAR (tmv) != NULL_TREE)
+	{
+	  tree na, ti_decl = CLASSTYPE_TYPEINFO_VAR (tmv);
+	  na = tree_cons (get_identifier ("dllexport"), NULL_TREE,
+			  NULL_TREE);
+	  decl_attributes (&ti_decl, na, 0);
+	}
+
+      /* Check static VAR_DECL's.  */
+      for (member = TYPE_FIELDS (t); member; member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL)     
+	  maybe_add_dllexport (member);
+    
+      /* Check FUNCTION_DECL's.  */
+      for (member = TYPE_METHODS (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == FUNCTION_DECL)
+	  {
+	    tree thunk;
+	    maybe_add_dllexport (member);
+	  
+	    /* Also add the attribute to its thunks.  */
+	    for (thunk = DECL_THUNKS (member); thunk;
+		 thunk = TREE_CHAIN (thunk))
+	      maybe_add_dllexport (thunk);
+	}
+      /* Check vtables  */
+      for (member = CLASSTYPE_VTABLES (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL) 
+	  maybe_add_dllexport (member);
+    }
+
+  else if (lookup_attribute ("dllimport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
+    {
+      /* We don't actually add the attribute to the decl, just set the flag
+	 that signals that the address of this symbol is not a compile-time
+	 constant.   Any subsequent out-of-class declaration of members wil
+	 cause the DECL_DLLIMPORT_P flag to be unset.
+	 (See  tree.c: merge_dllimport_decl_attributes).
+	 That is just right since out-of class declarations can only be a
+	 definition.   */
+
+      /* Check static VAR_DECL's.  */
+      for (member = TYPE_FIELDS (t); member; member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL)     
+	  maybe_add_dllimport (member);
+    
+      /* Check FUNCTION_DECL's.  */
+      for (member = TYPE_METHODS (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == FUNCTION_DECL)
+	  {
+	    tree thunk;
+	    maybe_add_dllimport (member);
+	  
+	    /* Also add the attribute to its thunks.  */
+	    for (thunk = DECL_THUNKS (member); thunk;
+		 thunk = DECL_CHAIN (thunk))
+	      maybe_add_dllimport (thunk);
+	 }
+ 
+      /* Check vtables  */
+      for (member = CLASSTYPE_VTABLES (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL) 
+	  maybe_add_dllimport (member);
+
+      /* We leave typeinfo tables alone.  We can't mark TI objects as
+	dllimport, since the address of a secondary VTT may be needed
+	for static initialization of a primary VTT.  VTT's  of
+	dllimport'd classes should always be link-once COMDAT.  */ 
+    }
+}
diff --git a/gcc-4.9/gcc/config/i386/winnt-stubs.c b/gcc-4.9/gcc/config/i386/winnt-stubs.c
new file mode 100644
index 000000000..30321d0f7
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/winnt-stubs.c
@@ -0,0 +1,51 @@
+/* Dummy subroutines for language-specific support on Windows.
+   Contributed by Danny Smith (dannysmith@users.sourceforge.net)
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "hashtab.h"
+
+bool
+i386_pe_type_dllimport_p (tree decl ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+bool
+i386_pe_type_dllexport_p (tree decl ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+void
+i386_pe_adjust_class_at_definition (tree t ATTRIBUTE_UNUSED)
+{ }
diff --git a/gcc-4.9/gcc/config/i386/winnt.c b/gcc-4.9/gcc/config/i386/winnt.c
new file mode 100644
index 000000000..bcfd48a03
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/winnt.c
@@ -0,0 +1,1304 @@
+/* Subroutines for insn-output.c for Windows NT.
+   Contributed by Douglas Rupp (drupp@cs.washington.edu)
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "hash-table.h"
+#include "langhooks.h"
+#include "ggc.h"
+#include "target.h"
+#include "except.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "lto-streamer.h"
+
+/* i386/PE specific attribute support.
+
+   i386/PE has two new attributes:
+   dllexport - for exporting a function/variable that will live in a dll
+   dllimport - for importing a function/variable from a dll
+
+   Microsoft allows multiple declspecs in one __declspec, separating
+   them with spaces.  We do NOT support this.  Instead, use __declspec
+   multiple times.
+*/
+
+/* Handle a "shared" attribute;
+   arguments as in struct attribute_spec.handler.  */
+tree
+ix86_handle_shared_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to variables",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "selectany" attribute;
+   arguments as in struct attribute_spec.handler.  */
+tree
+ix86_handle_selectany_attribute (tree *node, tree name,
+			         tree args ATTRIBUTE_UNUSED,
+			         int flags ATTRIBUTE_UNUSED,
+				 bool *no_add_attrs)
+{
+  /* The attribute applies only to objects that are initialized and have
+     external linkage.  However, we may not know about initialization
+     until the language frontend has processed the decl. We'll check for
+     initialization later in encode_section_info.  */
+  if (TREE_CODE (*node) != VAR_DECL || !TREE_PUBLIC (*node))
+    {	
+      error ("%qE attribute applies only to initialized variables"
+       	     " with external linkage", name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+
+/* Return the type that we should use to determine if DECL is
+   imported or exported.  */
+
+static tree
+associated_type (tree decl)
+{
+  return (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl))
+          ?  DECL_CONTEXT (decl) : NULL_TREE);
+}
+
+/* Return true if DECL should be a dllexport'd object.  */
+
+static bool
+i386_pe_determine_dllexport_p (tree decl)
+{
+  if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  /* Don't export local clones of dllexports.  */
+  if (!TREE_PUBLIC (decl))
+    return false;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && DECL_DECLARED_INLINE_P (decl)
+      && !flag_keep_inline_dllexport)
+    return false; 
+
+  if (lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)))
+    return true;
+
+  return false;
+}
+
+/* Return true if DECL should be a dllimport'd object.  */
+
+static bool
+i386_pe_determine_dllimport_p (tree decl)
+{
+  tree assoc;
+
+  if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  if (DECL_DLLIMPORT_P (decl))
+    return true;
+
+  /* The DECL_DLLIMPORT_P flag was set for decls in the class definition
+     by  targetm.cxx.adjust_class_at_definition.  Check again to emit
+     error message if the class attribute has been overridden by an
+     out-of-class definition of static data.  */
+  assoc = associated_type (decl);
+  if (assoc && lookup_attribute ("dllimport", TYPE_ATTRIBUTES (assoc))
+      && TREE_CODE (decl) == VAR_DECL
+      && TREE_STATIC (decl) && TREE_PUBLIC (decl)
+      && !DECL_EXTERNAL (decl)
+      /* vtable's are linkonce constants, so defining a vtable is not
+	 an error as long as we don't try to import it too.  */
+      && !DECL_VIRTUAL_P (decl))
+	error ("definition of static data member %q+D of "
+	       "dllimport%'d class", decl);
+
+  return false;
+}
+
+/* Handle the -mno-fun-dllimport target switch.  */
+
+bool
+i386_pe_valid_dllimport_attribute_p (const_tree decl)
+{
+   if (TARGET_NOP_FUN_DLLIMPORT && TREE_CODE (decl) == FUNCTION_DECL)
+     return false;
+   return true;
+}
+
+/* Return string which is the function name, identified by ID, modified
+   with a suffix consisting of an atsign (@) followed by the number of
+   bytes of arguments.  If ID is NULL use the DECL_NAME as base. If
+   FASTCALL is true, also add the FASTCALL_PREFIX.
+   Return NULL if no change required.  */
+
+static tree
+gen_stdcall_or_fastcall_suffix (tree decl, tree id, bool fastcall)
+{
+  HOST_WIDE_INT total = 0;
+  const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+  char *new_str, *p;
+  tree type = TREE_TYPE (DECL_ORIGIN (decl));
+  tree arg;
+  function_args_iterator args_iter;
+
+  gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);  
+
+  if (prototype_p (type))
+    {
+      /* This attribute is ignored for variadic functions.  */ 
+      if (stdarg_p (type))
+	return NULL_TREE;
+
+      /* Quit if we hit an incomplete type.  Error is reported
+	 by convert_arguments in c-typeck.c or cp/typeck.c.  */
+      FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+	{
+	  HOST_WIDE_INT parm_size;
+	  HOST_WIDE_INT parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+	  if (! COMPLETE_TYPE_P (arg))
+	    break;
+
+	  parm_size = int_size_in_bytes (arg);
+	  if (parm_size < 0)
+	    break;
+
+	  /* Must round up to include padding.  This is done the same
+	     way as in store_one_arg.  */
+	  parm_size = ((parm_size + parm_boundary_bytes - 1)
+		       / parm_boundary_bytes * parm_boundary_bytes);
+	  total += parm_size;
+	}
+    }
+
+  /* Assume max of 8 base 10 digits in the suffix.  */
+  p = new_str = XALLOCAVEC (char, 1 + strlen (old_str) + 1 + 8 + 1);
+  if (fastcall)
+    *p++ = FASTCALL_PREFIX;
+  sprintf (p, "%s@" HOST_WIDE_INT_PRINT_DEC, old_str, total);
+
+  return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+   fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_pe_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree new_id = NULL_TREE;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    { 
+      unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
+      if ((ccvt & IX86_CALLCVT_STDCALL) != 0)
+        {
+	  if (TARGET_RTD)
+	    /* If we are using -mrtd emit undecorated symbol and let linker
+	       do the proper resolving.  */
+	    return NULL_TREE;
+	  new_id = gen_stdcall_or_fastcall_suffix (decl, id, false);
+	}
+      else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+	new_id = gen_stdcall_or_fastcall_suffix (decl, id, true);
+    }
+
+  return new_id;
+}
+
+/* Emit an assembler directive to set symbol for DECL visibility to
+   the visibility type VIS, which must not be VISIBILITY_DEFAULT.
+   As for PE there is no hidden support in gas, we just warn for
+   user-specified visibility attributes.  */
+
+void
+i386_pe_assemble_visibility (tree decl,
+			     int vis ATTRIBUTE_UNUSED)
+{
+  if (!decl
+      || !lookup_attribute ("visibility", DECL_ATTRIBUTES (decl)))
+    return;
+  if (!DECL_ARTIFICIAL (decl))
+    warning (OPT_Wattributes, "visibility attribute not supported "
+			      "in this configuration; ignored");
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+   in the language-independent default hook
+   langhooks,c:lhd_set_decl_assembler_name ()
+   and in cp/mangle,c:mangle_decl ().  */
+tree
+i386_pe_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree new_id = i386_pe_maybe_mangle_decl_assembler_name (decl, id);   
+
+  return (new_id ? new_id : id);
+}
+
+/* This hook behaves the same as varasm.c/assemble_name(), but
+   generates the name into memory rather than outputting it to
+   a file stream.  */
+
+tree
+i386_pe_mangle_assembler_name (const char *name ATTRIBUTE_UNUSED)
+{
+  const char *skipped = name + (*name == '*' ? 1 : 0);
+  const char *stripped = targetm.strip_name_encoding (skipped);
+  if (*name != '*' && *user_label_prefix && *stripped != FASTCALL_PREFIX)
+    stripped = ACONCAT ((user_label_prefix, stripped, NULL));
+  return get_identifier (stripped);
+}
+
+void
+i386_pe_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx symbol;
+  int flags;
+
+  /* Do this last, due to our frobbing of DECL_DLLIMPORT_P above.  */
+  default_encode_section_info (decl, rtl, first);
+
+  /* Careful not to prod global register variables.  */
+  if (!MEM_P (rtl))
+    return;
+
+  symbol = XEXP (rtl, 0);
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+  switch (TREE_CODE (decl))
+    {
+    case FUNCTION_DECL:
+      /* FIXME:  Imported stdcall names are not modified by the Ada frontend.
+	 Check and decorate the RTL name now.  */
+      if  (strcmp (lang_hooks.name, "GNU Ada") == 0)
+	{
+	  tree new_id;
+	  tree old_id = DECL_ASSEMBLER_NAME (decl);
+	  const char* asm_str = IDENTIFIER_POINTER (old_id);
+	  /* Do not change the identifier if a verbatim asmspec
+	     or if stdcall suffix already added. */
+	  if (!(*asm_str == '*' || strchr (asm_str, '@'))
+	      && (new_id = i386_pe_maybe_mangle_decl_assembler_name (decl,
+								     old_id)))
+	    XSTR (symbol, 0) = IDENTIFIER_POINTER (new_id);
+	}
+      break;
+
+    case VAR_DECL:
+      if (lookup_attribute ("selectany", DECL_ATTRIBUTES (decl)))
+	{
+	  if (DECL_INITIAL (decl)
+	      /* If an object is initialized with a ctor, the static
+		 initialization and destruction code for it is present in
+		 each unit defining the object.  The code that calls the
+		 ctor is protected by a link-once guard variable, so that
+		 the object still has link-once semantics,  */
+	      || TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (decl)))
+	    make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+	  else
+	    error ("%q+D:'selectany' attribute applies only to "
+		   "initialized objects", decl);
+	}
+      break;
+
+    default:
+      return;
+    }
+
+  /* Mark the decl so we can tell from the rtl whether the object is
+     dllexport'd or dllimport'd.  tree.c: merge_dllimport_decl_attributes
+     handles dllexport/dllimport override semantics.  */
+  flags = (SYMBOL_REF_FLAGS (symbol) &
+	   ~(SYMBOL_FLAG_DLLIMPORT | SYMBOL_FLAG_DLLEXPORT));
+  if (i386_pe_determine_dllexport_p (decl))
+    flags |= SYMBOL_FLAG_DLLEXPORT;
+  else if (i386_pe_determine_dllimport_p (decl))
+    flags |= SYMBOL_FLAG_DLLIMPORT;
+ 
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+
+bool
+i386_pe_binds_local_p (const_tree exp)
+{
+  if ((TREE_CODE (exp) == VAR_DECL || TREE_CODE (exp) == FUNCTION_DECL)
+      && DECL_DLLIMPORT_P (exp))
+    return false;
+
+  /* External public symbols, which aren't weakref-s,
+     have local-binding for PE targets.  */
+  if (DECL_P (exp)
+      && !lookup_attribute ("weakref", DECL_ATTRIBUTES (exp))
+      && TREE_PUBLIC (exp)
+      && DECL_EXTERNAL (exp))
+    return true;
+  return default_binds_local_p_1 (exp, 0);
+}
+
+/* Also strip the fastcall prefix and stdcall suffix.  */
+
+const char *
+i386_pe_strip_name_encoding_full (const char *str)
+{
+  const char *p;
+  const char *name = default_strip_name_encoding (str);
+
+  /* Strip leading '@' on fastcall symbols.  */
+  if (*name == '@')
+    name++;
+
+  /* Strip trailing "@n".  */
+  p = strchr (name, '@');
+  if (p)
+    return ggc_alloc_string (name, p - name);
+
+  return name;
+}
+
+void
+i386_pe_unique_section (tree decl, int reloc)
+{
+  int len;
+  const char *name, *prefix;
+  char *string;
+
+  /* Ignore RELOC, if we are allowed to put relocated
+     const data into read-only section.  */
+  if (!flag_writable_rel_rdata)
+    reloc = 0;
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = i386_pe_strip_name_encoding_full (name);
+
+  /* The object is put in, for example, section .text$foo.
+     The linker will then ultimately place them in .text
+     (everything from the $ on is stripped). Don't put
+     read-only data in .rdata section to avoid a PE linker
+     bug when .rdata$* grouped sections are used in code
+     without a .rdata section.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    prefix = ".text$";
+  else if (decl_readonly_section (decl, reloc))
+    prefix = ".rdata$";
+  else
+    prefix = ".data$";
+  len = strlen (name) + strlen (prefix);
+  string = XALLOCAVEC (char, len + 1);
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+/* Local and global relocs can be placed always into readonly memory for
+   memory for PE-COFF targets.  */
+int
+i386_pe_reloc_rw_mask (void)
+{
+  return 0;
+}
+
+/* Select a set of attributes for section NAME based on the properties
+   of DECL and whether or not RELOC indicates that DECL's initializer
+   might contain runtime relocations.
+
+   We make the section read-only and executable for a function decl,
+   read-only for a const data decl, and writable for a non-const data decl.
+
+   If the section has already been defined, to not allow it to have
+   different attributes, as (1) this is ambiguous since we're not seeing
+   all the declarations up front and (2) some assemblers (e.g. SVR4)
+   do not recognize section redefinitions.  */
+/* ??? This differs from the "standard" PE implementation in that we
+   handle the SHARED variable attribute.  Should this be done for all
+   PE targets?  */
+
+#define SECTION_PE_SHARED	SECTION_MACH_DEP
+
+unsigned int
+i386_pe_section_type_flags (tree decl, const char *name, int reloc)
+{
+  static hash_table <pointer_hash <unsigned int> > htab;
+  unsigned int flags;
+  unsigned int **slot;
+
+  /* Ignore RELOC, if we are allowed to put relocated
+     const data into read-only section.  */
+  if (!flag_writable_rel_rdata)
+    reloc = 0;
+  /* The names we put in the hashtable will always be the unique
+     versions given to us by the stringtable, so we can just use
+     their addresses as the keys.  */
+  if (!htab.is_created ())
+    htab.create (31);
+
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL)
+    flags = SECTION_CODE;
+  else if (decl && decl_readonly_section (decl, reloc))
+    flags = 0;
+  else
+    {
+      flags = SECTION_WRITE;
+
+      if (decl && TREE_CODE (decl) == VAR_DECL
+	  && lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
+	flags |= SECTION_PE_SHARED;
+    }
+
+  if (decl && DECL_P (decl) && DECL_ONE_ONLY (decl))
+    flags |= SECTION_LINKONCE;
+
+  /* See if we already have an entry for this section.  */
+  slot = htab.find_slot ((const unsigned int *)name, INSERT);
+  if (!*slot)
+    {
+      *slot = (unsigned int *) xmalloc (sizeof (unsigned int));
+      **slot = flags;
+    }
+  else
+    {
+      if (decl && **slot != flags)
+	error ("%q+D causes a section type conflict", decl);
+    }
+
+  return flags;
+}
+
+void
+i386_pe_asm_named_section (const char *name, unsigned int flags, 
+			   tree decl)
+{
+  char flagchars[8], *f = flagchars;
+
+#if defined (HAVE_GAS_SECTION_EXCLUDE) && HAVE_GAS_SECTION_EXCLUDE == 1
+  if ((flags & SECTION_EXCLUDE) != 0)
+    *f++ = 'e';
+#endif
+
+  if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0)
+    /* readonly data */
+    {
+      *f++ ='d';  /* This is necessary for older versions of gas.  */
+      *f++ ='r';
+    }
+  else	
+    {
+      if (flags & SECTION_CODE)
+        *f++ = 'x';
+      if (flags & SECTION_WRITE)
+        *f++ = 'w';
+      if (flags & SECTION_PE_SHARED)
+        *f++ = 's';
+#if !defined (HAVE_GAS_SECTION_EXCLUDE) || HAVE_GAS_SECTION_EXCLUDE == 0
+      /* If attribute "e" isn't supported we mark this section as
+         never-load.  */
+      if ((flags & SECTION_EXCLUDE) != 0)
+	*f++ = 'n';
+#endif
+    }
+
+  /* LTO sections need 1-byte alignment to avoid confusing the
+     zlib decompression algorithm with trailing zero pad bytes.  */
+  if (strncmp (name, LTO_SECTION_NAME_PREFIX,
+			strlen (LTO_SECTION_NAME_PREFIX)) == 0)
+    *f++ = '0';
+
+  *f = '\0';
+
+  fprintf (asm_out_file, "\t.section\t%s,\"%s\"\n", name, flagchars);
+
+  if (flags & SECTION_LINKONCE)
+    {
+      /* Functions may have been compiled at various levels of
+	 optimization so we can't use `same_size' here.
+	 Instead, have the linker pick one, without warning.
+	 If 'selectany' attribute has been specified,  MS compiler
+	 sets 'discard' characteristic, rather than telling linker
+	 to warn of size or content mismatch, so do the same.  */ 
+      bool discard = (flags & SECTION_CODE)
+		      || (TREE_CODE (decl) != IDENTIFIER_NODE
+			  && lookup_attribute ("selectany",
+					       DECL_ATTRIBUTES (decl)));
+      fprintf (asm_out_file, "\t.linkonce %s\n",
+	       (discard  ? "discard" : "same_size"));
+    }
+}
+
+/* Beware, DECL may be NULL if compile_file() is emitting the LTO marker.  */
+
+void
+i386_pe_asm_output_aligned_decl_common (FILE *stream, tree decl,
+					const char *name, HOST_WIDE_INT size,
+					HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT rounded;
+
+  /* Compute as in assemble_noswitch_variable, since we don't have
+     support for aligned common on older binutils.  We must also
+     avoid emitting a common symbol of size zero, as this is the
+     overloaded representation that indicates an undefined external
+     symbol in the PE object file format.  */
+  rounded = size ? size : 1;
+  rounded += (BIGGEST_ALIGNMENT / BITS_PER_UNIT) - 1;
+  rounded = (rounded / (BIGGEST_ALIGNMENT / BITS_PER_UNIT)
+	     * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
+  
+  i386_pe_maybe_record_exported_symbol (decl, name, 1);
+
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  if (use_pe_aligned_common)
+    fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC ", %d\n",
+	   size ? size : (HOST_WIDE_INT) 1,
+	   exact_log2 (align) - exact_log2 (CHAR_BIT));
+  else
+    fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC "\t" ASM_COMMENT_START
+	   " " HOST_WIDE_INT_PRINT_DEC "\n", rounded, size);
+}
+
+/* The Microsoft linker requires that every function be marked as
+   DT_FCN.  When using gas on cygwin, we must emit appropriate .type
+   directives.  */
+
+#include "gsyms.h"
+
+/* Mark a function appropriately.  This should only be called for
+   functions for which we are not emitting COFF debugging information.
+   FILE is the assembler output file, NAME is the name of the
+   function, and PUB is nonzero if the function is globally
+   visible.  */
+
+void
+i386_pe_declare_function_type (FILE *file, const char *name, int pub)
+{
+  fprintf (file, "\t.def\t");
+  assemble_name (file, name);
+  fprintf (file, ";\t.scl\t%d;\t.type\t%d;\t.endef\n",
+	   pub ? (int) C_EXT : (int) C_STAT,
+	   (int) DT_FCN << N_BTSHFT);
+}
+
+/* Keep a list of external functions.  */
+
+struct GTY(()) extern_list
+{
+  struct extern_list *next;
+  tree decl;
+  const char *name;
+};
+
+static GTY(()) struct extern_list *extern_head;
+
+/* Assemble an external function reference.  We need to keep a list of
+   these, so that we can output the function types at the end of the
+   assembly.  We can't output the types now, because we might see a
+   definition of the function later on and emit debugging information
+   for it then.  */
+
+void
+i386_pe_record_external_function (tree decl, const char *name)
+{
+  struct extern_list *p;
+
+  p = ggc_alloc_extern_list ();
+  p->next = extern_head;
+  p->decl = decl;
+  p->name = name;
+  extern_head = p;
+}
+
+/* Keep a list of exported symbols.  */
+
+struct GTY(()) export_list
+{
+  struct export_list *next;
+  const char *name;
+  int is_data;		/* used to type tag exported symbols.  */
+};
+
+/* Keep a list of stub symbols.  */
+
+struct GTY(()) stub_list
+{
+  struct stub_list *next;
+  const char *name;
+};
+
+static GTY(()) struct export_list *export_head;
+
+static GTY(()) struct stub_list *stub_head;
+
+/* Assemble an export symbol entry.  We need to keep a list of
+   these, so that we can output the export list at the end of the
+   assembly.  We used to output these export symbols in each function,
+   but that causes problems with GNU ld when the sections are
+   linkonce.  Beware, DECL may be NULL if compile_file() is emitting
+   the LTO marker.  */
+
+void
+i386_pe_maybe_record_exported_symbol (tree decl, const char *name, int is_data)
+{
+  rtx symbol;
+  struct export_list *p;
+
+  if (!decl)
+    return;
+
+  symbol = XEXP (DECL_RTL (decl), 0);
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+  if (!SYMBOL_REF_DLLEXPORT_P (symbol))
+    return;
+
+  gcc_assert (TREE_PUBLIC (decl));
+
+  p = ggc_alloc_export_list ();
+  p->next = export_head;
+  p->name = name;
+  p->is_data = is_data;
+  export_head = p;
+}
+
+void
+i386_pe_record_stub (const char *name)
+{
+  struct stub_list *p;
+
+  if (!name || *name == 0)
+    return;
+
+  p = stub_head;
+  while (p != NULL)
+    {
+      if (p->name[0] == *name
+          && !strcmp (p->name, name))
+	return;
+      p = p->next;
+    }
+
+  p = ggc_alloc_stub_list ();
+  p->next = stub_head;
+  p->name = name;
+  stub_head = p;
+}
+
+
+#ifdef CXX_WRAP_SPEC_LIST
+
+/* Hashtable helpers.  */
+
+struct wrapped_symbol_hasher : typed_noop_remove <char>
+{
+  typedef char value_type;
+  typedef char compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+inline hashval_t
+wrapped_symbol_hasher::hash (const value_type *v)
+{
+  return htab_hash_string (v);
+}
+
+/*  Hash table equality helper function.  */
+
+inline bool
+wrapped_symbol_hasher::equal (const value_type *x, const compare_type *y)
+{
+  return !strcmp (x, y);
+}
+
+/* Search for a function named TARGET in the list of library wrappers
+   we are using, returning a pointer to it if found or NULL if not.
+   This function might be called on quite a few symbols, and we only
+   have the list of names of wrapped functions available to us as a
+   spec string, so first time round we lazily initialise a hash table
+   to make things quicker.  */
+
+static const char *
+i386_find_on_wrapper_list (const char *target)
+{
+  static char first_time = 1;
+  static hash_table <wrapped_symbol_hasher> wrappers;
+
+  if (first_time)
+    {
+      /* Beware that this is not a complicated parser, it assumes
+         that any sequence of non-whitespace beginning with an
+	 underscore is one of the wrapped symbols.  For now that's
+	 adequate to distinguish symbols from spec substitutions
+	 and command-line options.  */
+      static char wrapper_list_buffer[] = CXX_WRAP_SPEC_LIST;
+      char *bufptr;
+      /* Breaks up the char array into separated strings
+         strings and enter them into the hash table.  */
+      wrappers.create (8);
+      for (bufptr = wrapper_list_buffer; *bufptr; ++bufptr)
+	{
+	  char *found = NULL;
+	  if (ISSPACE (*bufptr))
+	    continue;
+	  if (*bufptr == '_')
+	    found = bufptr;
+	  while (*bufptr && !ISSPACE (*bufptr))
+	    ++bufptr;
+	  if (*bufptr)
+	    *bufptr = 0;
+	  if (found)
+	    *wrappers.find_slot (found, INSERT) = found;
+	}
+      first_time = 0;
+    }
+
+  return wrappers.find (target);
+}
+
+#endif /* CXX_WRAP_SPEC_LIST */
+
+/* This is called at the end of assembly.  For each external function
+   which has not been defined, we output a declaration now.  We also
+   output the .drectve section.  */
+
+void
+i386_pe_file_end (void)
+{
+  struct extern_list *p;
+
+  for (p = extern_head; p != NULL; p = p->next)
+    {
+      tree decl;
+
+      decl = p->decl;
+
+      /* Positively ensure only one declaration for any given symbol.  */
+      if (! TREE_ASM_WRITTEN (decl)
+	  && TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+	{
+#ifdef CXX_WRAP_SPEC_LIST
+	  /* To ensure the DLL that provides the corresponding real
+	     functions is still loaded at runtime, we must reference
+	     the real function so that an (unused) import is created.  */
+	  const char *realsym = i386_find_on_wrapper_list (p->name);
+	  if (realsym)
+	    i386_pe_declare_function_type (asm_out_file,
+		concat ("__real_", realsym, NULL), TREE_PUBLIC (decl));
+#endif /* CXX_WRAP_SPEC_LIST */
+	  TREE_ASM_WRITTEN (decl) = 1;
+	  i386_pe_declare_function_type (asm_out_file, p->name,
+					 TREE_PUBLIC (decl));
+	}
+    }
+
+  if (export_head)
+    {
+      struct export_list *q;
+      drectve_section ();
+      for (q = export_head; q != NULL; q = q->next)
+	{
+	  fprintf (asm_out_file, "\t.ascii \" -export:\\\"%s\\\"%s\"\n",
+		   default_strip_name_encoding (q->name),
+		   (q->is_data ? ",data" : ""));
+	}
+    }
+
+  if (stub_head)
+    {
+      struct stub_list *q;
+
+      for (q = stub_head; q != NULL; q = q->next)
+	{
+	  const char *name = q->name;
+	  const char *oname;
+
+	  if (name[0] == '*')
+	    ++name;
+	  oname = name;
+	  if (name[0] == '.')
+	    ++name;
+	  if (strncmp (name, "refptr.", 7) != 0)
+	    continue;
+	  name += 7;
+	  fprintf (asm_out_file, "\t.section\t.rdata$%s, \"dr\"\n"
+	  		   "\t.globl\t%s\n"
+			   "\t.linkonce\tdiscard\n", oname, oname);
+	  fprintf (asm_out_file, "%s:\n\t.quad\t%s\n", oname, name);
+	}
+    }
+}
+
+
+/* x64 Structured Exception Handling unwind info.  */
+
+struct seh_frame_state
+{
+  /* SEH records saves relative to the "current" stack pointer, whether
+     or not there's a frame pointer in place.  This tracks the current
+     stack pointer offset from the CFA.  */
+  HOST_WIDE_INT sp_offset;
+
+  /* The CFA is located at CFA_REG + CFA_OFFSET.  */
+  HOST_WIDE_INT cfa_offset;
+  rtx cfa_reg;
+};
+
+/* Set up data structures beginning output for SEH.  */
+
+void
+i386_pe_seh_init (FILE *f)
+{
+  struct seh_frame_state *seh;
+
+  if (!TARGET_SEH)
+    return;
+  if (cfun->is_thunk)
+    return;
+
+  /* We cannot support DRAP with SEH.  We turned off support for it by
+     re-defining MAX_STACK_ALIGNMENT when SEH is enabled.  */
+  gcc_assert (!stack_realign_drap);
+
+  seh = XCNEW (struct seh_frame_state);
+  cfun->machine->seh = seh;
+
+  seh->sp_offset = INCOMING_FRAME_SP_OFFSET;
+  seh->cfa_offset = INCOMING_FRAME_SP_OFFSET;
+  seh->cfa_reg = stack_pointer_rtx;
+
+  fputs ("\t.seh_proc\t", f);
+  assemble_name (f, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (cfun->decl)));
+  fputc ('\n', f);
+}
+
+void
+i386_pe_seh_end_prologue (FILE *f)
+{
+  struct seh_frame_state *seh;
+
+  if (!TARGET_SEH)
+    return;
+  if (cfun->is_thunk)
+    return;
+  seh = cfun->machine->seh;
+
+  XDELETE (seh);
+  cfun->machine->seh = NULL;
+
+  fputs ("\t.seh_endprologue\n", f);
+}
+
+static void
+i386_pe_seh_fini (FILE *f)
+{
+  if (!TARGET_SEH)
+    return;
+  if (cfun->is_thunk)
+    return;
+  fputs ("\t.seh_endproc\n", f);
+}
+
+/* Emit an assembler directive to save REG via a PUSH.  */
+
+static void
+seh_emit_push (FILE *f, struct seh_frame_state *seh, rtx reg)
+{
+  unsigned int regno = REGNO (reg);
+
+  gcc_checking_assert (GENERAL_REGNO_P (regno));
+
+  seh->sp_offset += UNITS_PER_WORD;
+  if (seh->cfa_reg == stack_pointer_rtx)
+    seh->cfa_offset += UNITS_PER_WORD;
+
+  fputs ("\t.seh_pushreg\t", f);
+  print_reg (reg, 0, f);
+  fputc ('\n', f);
+}
+
+/* Emit an assembler directive to save REG at CFA - CFA_OFFSET.  */
+
+static void
+seh_emit_save (FILE *f, struct seh_frame_state *seh,
+	       rtx reg, HOST_WIDE_INT cfa_offset)
+{
+  unsigned int regno = REGNO (reg);
+  HOST_WIDE_INT offset;
+
+  /* Negative save offsets are of course not supported, since that
+     would be a store below the stack pointer and thus clobberable.  */
+  gcc_assert (seh->sp_offset >= cfa_offset);
+  offset = seh->sp_offset - cfa_offset;
+
+  fputs ((SSE_REGNO_P (regno) ? "\t.seh_savexmm\t"
+	 : GENERAL_REGNO_P (regno) ?  "\t.seh_savereg\t"
+	 : (gcc_unreachable (), "")), f);
+  print_reg (reg, 0, f);
+  fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
+}
+
+/* Emit an assembler directive to adjust RSP by OFFSET.  */
+
+static void
+seh_emit_stackalloc (FILE *f, struct seh_frame_state *seh,
+		     HOST_WIDE_INT offset)
+{
+  /* We're only concerned with prologue stack allocations, which all
+     are subtractions from the stack pointer.  */
+  gcc_assert (offset < 0);
+  offset = -offset;
+
+  if (seh->cfa_reg == stack_pointer_rtx)
+    seh->cfa_offset += offset;
+  seh->sp_offset += offset;
+
+  /* Do not output the stackalloc in that case (it won't work as there is no
+     encoding for very large frame size).  */
+  if (offset < SEH_MAX_FRAME_SIZE)
+    fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
+}
+
+/* Process REG_CFA_ADJUST_CFA for SEH.  */
+
+static void
+seh_cfa_adjust_cfa (FILE *f, struct seh_frame_state *seh, rtx pat)
+{
+  rtx dest, src;
+  HOST_WIDE_INT reg_offset = 0;
+  unsigned int dest_regno;
+
+  dest = SET_DEST (pat);
+  src = SET_SRC (pat);
+
+  if (GET_CODE (src) == PLUS)
+    {
+      reg_offset = INTVAL (XEXP (src, 1));
+      src = XEXP (src, 0);
+    }
+  else if (GET_CODE (src) == MINUS)
+    {
+      reg_offset = -INTVAL (XEXP (src, 1));
+      src = XEXP (src, 0);
+    }
+  gcc_assert (src == stack_pointer_rtx);
+  gcc_assert (seh->cfa_reg == stack_pointer_rtx);
+  dest_regno = REGNO (dest);
+
+  if (dest_regno == STACK_POINTER_REGNUM)
+    seh_emit_stackalloc (f, seh, reg_offset);
+  else if (dest_regno == HARD_FRAME_POINTER_REGNUM)
+    {
+      HOST_WIDE_INT offset;
+
+      seh->cfa_reg = dest;
+      seh->cfa_offset -= reg_offset;
+
+      offset = seh->sp_offset - seh->cfa_offset;
+
+      gcc_assert ((offset & 15) == 0);
+      gcc_assert (IN_RANGE (offset, 0, 240));
+
+      fputs ("\t.seh_setframe\t", f);
+      print_reg (seh->cfa_reg, 0, f);
+      fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Process REG_CFA_OFFSET for SEH.  */
+
+static void
+seh_cfa_offset (FILE *f, struct seh_frame_state *seh, rtx pat)
+{
+  rtx dest, src;
+  HOST_WIDE_INT reg_offset;
+
+  dest = SET_DEST (pat);
+  src = SET_SRC (pat);
+
+  gcc_assert (MEM_P (dest));
+  dest = XEXP (dest, 0);
+  if (REG_P (dest))
+    reg_offset = 0;
+  else
+    {
+      gcc_assert (GET_CODE (dest) == PLUS);
+      reg_offset = INTVAL (XEXP (dest, 1));
+      dest = XEXP (dest, 0);
+    }
+  gcc_assert (dest == seh->cfa_reg);
+
+  seh_emit_save (f, seh, src, seh->cfa_offset - reg_offset);
+}
+
+/* Process a FRAME_RELATED_EXPR for SEH.  */
+
+static void
+seh_frame_related_expr (FILE *f, struct seh_frame_state *seh, rtx pat)
+{
+  rtx dest, src;
+  HOST_WIDE_INT addend;
+
+  /* See the full loop in dwarf2out_frame_debug_expr.  */
+  if (GET_CODE (pat) == PARALLEL || GET_CODE (pat) == SEQUENCE)
+    {
+      int i, n = XVECLEN (pat, 0), pass, npass;
+
+      npass = (GET_CODE (pat) == PARALLEL ? 2 : 1);
+      for (pass = 0; pass < npass; ++pass)
+	for (i = 0; i < n; ++i)
+	  {
+	    rtx ele = XVECEXP (pat, 0, i);
+
+	    if (GET_CODE (ele) != SET)
+	      continue;
+	    dest = SET_DEST (ele);
+
+	    /* Process each member of the PARALLEL independently.  The first
+	       member is always processed; others only if they are marked.  */
+	    if (i == 0 || RTX_FRAME_RELATED_P (ele))
+	      {
+		/* Evaluate all register saves in the first pass and all
+		   register updates in the second pass.  */
+		if ((MEM_P (dest) ^ pass) || npass == 1)
+		  seh_frame_related_expr (f, seh, ele);
+	      }
+	  }
+      return;
+    }
+
+  dest = SET_DEST (pat);
+  src = SET_SRC (pat);
+
+  switch (GET_CODE (dest))
+    {
+    case REG:
+      switch (GET_CODE (src))
+	{
+	case REG:
+	  /* REG = REG: This should be establishing a frame pointer.  */
+	  gcc_assert (src == stack_pointer_rtx);
+	  gcc_assert (dest == hard_frame_pointer_rtx);
+	  seh_cfa_adjust_cfa (f, seh, pat);
+	  break;
+
+	case PLUS:
+	  addend = INTVAL (XEXP (src, 1));
+	  src = XEXP (src, 0);
+	  if (dest == hard_frame_pointer_rtx)
+	    seh_cfa_adjust_cfa (f, seh, pat);
+	  else if (dest == stack_pointer_rtx)
+	    {
+	      gcc_assert (src == stack_pointer_rtx);
+	      seh_emit_stackalloc (f, seh, addend);
+	    }
+	  else
+	    gcc_unreachable ();
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case MEM:
+      /* A save of some kind.  */
+      dest = XEXP (dest, 0);
+      if (GET_CODE (dest) == PRE_DEC)
+	{
+	  gcc_checking_assert (GET_MODE (src) == Pmode);
+	  gcc_checking_assert (REG_P (src));
+	  seh_emit_push (f, seh, src);
+	}
+      else
+	seh_cfa_offset (f, seh, pat);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* This function looks at a single insn and emits any SEH directives
+   required for unwind of this insn.  */
+
+void
+i386_pe_seh_unwind_emit (FILE *asm_out_file, rtx insn)
+{
+  rtx note, pat;
+  bool handled_one = false;
+  struct seh_frame_state *seh;
+
+  if (!TARGET_SEH)
+    return;
+
+  /* We free the SEH data once done with the prologue.  Ignore those
+     RTX_FRAME_RELATED_P insns that are associated with the epilogue.  */
+  seh = cfun->machine->seh;
+  if (seh == NULL)
+    return;
+
+  if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
+    return;
+
+  for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
+    {
+      switch (REG_NOTE_KIND (note))
+	{
+	case REG_FRAME_RELATED_EXPR:
+	  pat = XEXP (note, 0);
+	  goto found;
+
+	case REG_CFA_DEF_CFA:
+	case REG_CFA_EXPRESSION:
+	  /* Only emitted with DRAP, which we disable.  */
+	  gcc_unreachable ();
+	  break;
+
+	case REG_CFA_REGISTER:
+	  /* Only emitted in epilogues, which we skip.  */
+	  gcc_unreachable ();
+
+	case REG_CFA_ADJUST_CFA:
+	  pat = XEXP (note, 0);
+	  if (pat == NULL)
+	    {
+	      pat = PATTERN (insn);
+	      if (GET_CODE (pat) == PARALLEL)
+		pat = XVECEXP (pat, 0, 0);
+	    }
+	  seh_cfa_adjust_cfa (asm_out_file, seh, pat);
+	  handled_one = true;
+	  break;
+
+	case REG_CFA_OFFSET:
+	  pat = XEXP (note, 0);
+	  if (pat == NULL)
+	    pat = single_set (insn);
+	  seh_cfa_offset (asm_out_file, seh, pat);
+	  handled_one = true;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+  if (handled_one)
+    return;
+  pat = PATTERN (insn);
+ found:
+  seh_frame_related_expr (asm_out_file, seh, pat);
+}
+
+void
+i386_pe_seh_emit_except_personality (rtx personality)
+{
+  int flags = 0;
+
+  if (!TARGET_SEH)
+    return;
+
+  fputs ("\t.seh_handler\t", asm_out_file);
+  output_addr_const (asm_out_file, personality);
+
+#if 0
+  /* ??? The current implementation of _GCC_specific_handler requires
+     both except and unwind handling, regardless of which sorts the
+     user-level function requires.  */
+  eh_region r;
+  FOR_ALL_EH_REGION(r)
+    {
+      if (r->type == ERT_CLEANUP)
+	flags |= 1;
+      else
+	flags |= 2;
+    }
+#else
+  flags = 3;
+#endif
+
+  if (flags & 1)
+    fputs (", @unwind", asm_out_file);
+  if (flags & 2)
+    fputs (", @except", asm_out_file);
+  fputc ('\n', asm_out_file);
+}
+
+void
+i386_pe_seh_init_sections (void)
+{
+  if (TARGET_SEH)
+    exception_section = get_unnamed_section (0, output_section_asm_op,
+					     "\t.seh_handlerdata");
+}
+
+void
+i386_pe_start_function (FILE *f, const char *name, tree decl)
+{
+  i386_pe_maybe_record_exported_symbol (decl, name, 0);
+  if (write_symbols != SDB_DEBUG)
+    i386_pe_declare_function_type (f, name, TREE_PUBLIC (decl));
+  /* In case section was altered by debugging output.  */
+  if (decl != NULL_TREE)
+    switch_to_section (function_section (decl));
+  ASM_OUTPUT_FUNCTION_LABEL (f, name, decl);
+}
+
+void
+i386_pe_end_function (FILE *f, const char *name ATTRIBUTE_UNUSED,
+		      tree decl ATTRIBUTE_UNUSED)
+{
+  i386_pe_seh_fini (f);
+}
+
+
+#include "gt-winnt.h"
diff --git a/gcc-4.9/gcc/config/i386/wmmintrin.h b/gcc-4.9/gcc/config/i386/wmmintrin.h
new file mode 100644
index 000000000..2002375c6
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/wmmintrin.h
@@ -0,0 +1,132 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 10.1.  */
+
+#ifndef _WMMINTRIN_H_INCLUDED
+#define _WMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE2 header file.  */
+#include <emmintrin.h>
+
+/* AES */
+
+#ifndef __AES__
+#pragma GCC push_options
+#pragma GCC target("aes")
+#define __DISABLE_AES__
+#endif /* __AES__ */
+
+/* Performs 1 round of AES decryption of the first m128i using 
+   the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdec_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES decryption of the first m128i 
+   using the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
+						 (__v2di)__Y);
+}
+
+/* Performs 1 round of AES encryption of the first m128i using 
+   the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenc_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES encryption of the first m128i
+   using the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the InverseMixColumn operation on the source m128i 
+   and stores the result into m128i destination.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesimc_si128 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
+}
+
+/* Generates a m128i round key for the input m128i AES cipher key and
+   byte round constant.  The second parameter must be a compile time
+   constant.  */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
+{
+  return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
+}
+#else
+#define _mm_aeskeygenassist_si128(X, C)					\
+  ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X),	\
+						(int)(C)))
+#endif
+
+#ifdef __DISABLE_AES__
+#undef __DISABLE_AES__
+#pragma GCC pop_options
+#endif /* __DISABLE_AES__ */
+
+/* PCLMUL */
+
+#ifndef __PCLMUL__
+#pragma GCC push_options
+#pragma GCC target("pclmul")
+#define __DISABLE_PCLMUL__
+#endif /* __PCLMUL__ */
+
+/* Performs carry-less integer multiplication of 64-bit halves of
+   128-bit input operands.  The third parameter inducates which 64-bit
+   haves of the input parameters v1 and v2 should be used. It must be
+   a compile time constant.  */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
+{
+  return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
+						(__v2di)__Y, __I);
+}
+#else
+#define _mm_clmulepi64_si128(X, Y, I)					\
+  ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X),		\
+					  (__v2di)(__m128i)(Y), (int)(I)))
+#endif
+
+#ifdef __DISABLE_PCLMUL__
+#undef __DISABLE_PCLMUL__
+#pragma GCC pop_options
+#endif /* __DISABLE_PCLMUL__ */
+
+#endif /* _WMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/x-cygwin b/gcc-4.9/gcc/config/i386/x-cygwin
new file mode 100644
index 000000000..752af76ef
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x-cygwin
@@ -0,0 +1,4 @@
+host-cygwin.o : $(srcdir)/config/i386/host-cygwin.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) toplev.h diagnostic.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/host-cygwin.c
diff --git a/gcc-4.9/gcc/config/i386/x-darwin b/gcc-4.9/gcc/config/i386/x-darwin
new file mode 100644
index 000000000..4967d695c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x-darwin
@@ -0,0 +1,3 @@
+host-i386-darwin.o : $(srcdir)/config/i386/host-i386-darwin.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/i386/x-i386 b/gcc-4.9/gcc/config/i386/x-i386
new file mode 100644
index 000000000..1f3db1d19
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x-i386
@@ -0,0 +1,3 @@
+driver-i386.o : $(srcdir)/config/i386/driver-i386.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/i386/x-mingw32 b/gcc-4.9/gcc/config/i386/x-mingw32
new file mode 100644
index 000000000..333346018
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x-mingw32
@@ -0,0 +1,31 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+#
+# Make local_includedir relative to EXEC_PREFIX 
+#
+local_includedir=$(libsubdir)/$(unlibsubdir)/..`echo $(exec_prefix) | sed -e 's|^$(prefix)||' -e 's|/[^/]*|/..|g'`/include
+
+# On MinGW, we use "%IA64d" to print 64-bit integers, and the format-checking
+# code does not handle that, so we have to disable checking here.
+WERROR_FLAGS += -Wno-format
+
+host-mingw32.o : $(srcdir)/config/i386/host-mingw32.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h toplev.h $(DIAGNOSTIC_H) $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/host-mingw32.c
diff --git a/gcc-4.9/gcc/config/i386/x86-64.h b/gcc-4.9/gcc/config/i386/x86-64.h
new file mode 100644
index 000000000..16fc68581
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x86-64.h
@@ -0,0 +1,108 @@
+/* OS independent definitions for AMD x86-64.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* Output assembler code to FILE to call the profiler.  */
+#define NO_PROFILE_COUNTERS 1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "mcount"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_LP64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_LP64 ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m32:--32} %{m64:--64} %{mx32:--x32}"
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  x86_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN);
+
+/* This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else {								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+	/* Make sure that we have at least 8 byte alignment if > 8 byte \
+	   alignment is preferred.  */					\
+	if ((LOG) > 3							\
+	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
+	    && (MAX_SKIP) >= 7)						\
+	  fputs ("\t.p2align 3\n", (FILE));				\
+      }									\
+    }									\
+  } while (0)
+#undef  ASM_OUTPUT_MAX_SKIP_PAD
+#define ASM_OUTPUT_MAX_SKIP_PAD(FILE, LOG, MAX_SKIP)			\
+  if ((LOG) != 0)							\
+    {									\
+      if ((MAX_SKIP) == 0)						\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
+        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+    }
+#endif
+
+
+/* i386 System V Release 4 uses DWARF debugging info.
+   x86-64 ABI specifies DWARF2.  */
+
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_UNWIND_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  x86_64_elf_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  x86_64_elf_unique_section
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  x86_64_elf_section_type_flags
diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def
new file mode 100644
index 000000000..839910267
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x86-tune.def
@@ -0,0 +1,525 @@
+/* Definitions of x86 tunable features.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Tuning for a given CPU XXXX consists of:
+    - adding new CPU into:
+	- adding PROCESSOR_XXX to processor_type (in i386.h)
+	- possibly adding XXX into CPU attribute in i386.md
+	- adding XXX to processor_alias_table (in i386.c)
+    - introducing ix86_XXX_cost in i386.c
+	- Stringop generation table can be build based on test_stringop
+	- script (once rest of tuning is complete)
+    - designing a scheduler model in
+	- XXXX.md file
+	- Updating ix86_issue_rate and ix86_adjust_cost in i386.md
+	- possibly updating ia32_multipass_dfa_lookahead, ix86_sched_reorder
+	  and ix86_sched_init_global if those tricks are needed.
+    - Tunning the flags bellow. Those are split into sections and each
+      section is very roughly ordered by importance.  */
+
+/*****************************************************************************/
+/* Scheduling flags. 					                     */
+/*****************************************************************************/
+
+/* X86_TUNE_SCHEDULE: Enable scheduling.  */
+DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
+          m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL 
+	  | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
+
+/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
+   on modern chips.  Preffer stores affecting whole integer register
+   over partial stores.  For example preffer MOVZBL or MOVQ to load 8bit
+   value over movb.  */
+DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
+          m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
+	  | m_AMD_MULTIPLE | m_GENERIC)
+
+/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
+   destinations to be 128bit to allow register renaming on 128bit SSE units,
+   but usually results in one extra microop on 64bit SSE units.
+   Experimental results shows that disabling this option on P4 brings over 20%
+   SPECfp regression, while enabling it on K8 brings roughly 2.4% regression
+   that can be partly masked by careful scheduling of moves.  */
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+	  | m_INTEL | m_AMDFAM10 | m_BDVER | m_GENERIC)
+
+/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
+   are resolved on SSE register parts instead of whole registers, so we may
+   maintain just lower part of scalar values in proper format leaving the
+   upper part undefined.  */
+DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8)
+
+/* X86_TUNE_PARTIAL_FLAG_REG_STALL: this flag disables use of of flags
+   set by instructions affecting just some flags (in particular shifts).
+   This is because Core2 resolves dependencies on whole flags register
+   and such sequences introduce false dependency on previous instruction
+   setting full flags.
+
+   The flags does not affect generation of INC and DEC that is controlled
+   by X86_TUNE_USE_INCDEC.
+
+   This flag may be dropped from generic once core2-corei5 machines are
+   rare enough.  */
+DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
+          m_CORE2 | m_GENERIC)
+
+/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
+   partial dependencies.  */
+DEF_TUNE (X86_TUNE_MOVX, "movx",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+	  | m_INTEL | m_GEODE | m_AMD_MULTIPLE  | m_GENERIC)
+
+/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
+   full sized loads.  */
+DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
+          m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
+	  | m_AMD_MULTIPLE | m_GENERIC)
+
+/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
+   conditional jump instruction for 32 bit TARGET.
+   FIXME: revisit for generic.  */
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
+          m_CORE_ALL | m_BDVER)
+
+/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
+   conditional jump instruction for TARGET_64BIT.
+   FIXME: revisit for generic.  */
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
+          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
+
+/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
+   subsequent conditional jump instruction when the condition jump
+   check sign flag (SF) or overflow flag (OF).  */
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
+          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
+
+/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
+   jump instruction when the alu instruction produces the CCFLAG consumed by
+   the conditional jump instruction. */
+DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
+          m_SANDYBRIDGE | m_HASWELL)
+
+/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
+   during reassociation of integer computation.  */
+DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
+          m_BONNELL)
+
+/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
+   during reassociation of fp computation.  */
+DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
+          m_BONNELL | m_SILVERMONT | m_HASWELL | m_INTEL | m_BDVER1
+	  | m_BDVER2 | m_GENERIC)
+
+/*****************************************************************************/
+/* Function prologue, epilogue and function calling sequences.               */
+/*****************************************************************************/
+
+/* X86_TUNE_ACCUMULATE_OUTGOING_ARGS: Allocate stack space for outgoing
+   arguments in prologue/epilogue instead of separately for each call
+   by push/pop instructions.
+   This increase code size by about 5% in 32bit mode, less so in 64bit mode
+   because parameters are passed in registers.  It is considerable
+   win for targets without stack engine that prevents multple push operations
+   to happen in parallel.
+
+   FIXME: the flags is incorrectly enabled for amdfam10, Bulldozer,
+   Bobcat and Generic.  This is because disabling it causes large
+   regression on mgrid due to IRA limitation leading to unecessary
+   use of the frame pointer in 32bit mode.  */
+DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
+	  m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL
+	  | m_ATHLON_K8)
+
+/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
+   considered on critical path.  */
+DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move",
+          m_PPRO | m_ATHLON_K8)
+
+/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in epilogues that are
+   considered on critical path.  */
+DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",
+          m_PPRO | m_ATHLON_K8)
+
+/* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits.  */
+DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
+	  m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
+
+/* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.
+   Some chips, like 486 and Pentium works faster with separate load
+   and push instructions.  */
+DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",
+          m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE
+          | m_GENERIC)
+
+/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
+   over esp subtraction.  */
+DEF_TUNE (X86_TUNE_SINGLE_PUSH, "single_push", m_386 | m_486 | m_PENT
+          | m_K6_GEODE)
+
+/* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
+   over esp subtraction.  */
+DEF_TUNE (X86_TUNE_DOUBLE_PUSH, "double_push", m_PENT | m_K6_GEODE)
+
+/* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
+   over esp addition.  */
+DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT | m_PPRO)
+
+/* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
+   over esp addition.  */
+DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT)
+
+/*****************************************************************************/
+/* Branch predictor tuning  		                                     */
+/*****************************************************************************/
+
+/* X86_TUNE_PAD_SHORT_FUNCTION: Make every function to be at least 4
+   instructions long.  */
+DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_BONNELL)
+
+/* X86_TUNE_PAD_RETURNS: Place NOP before every RET that is a destination
+   of conditional jump or directly preceded by other jump instruction.
+   This is important for AND K8-AMDFAM10 because the branch prediction
+   architecture expect at most one jump per 2 byte window.  Failing to
+   pad returns leads to misaligned return stack.  */
+DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
+          m_ATHLON_K8 | m_AMDFAM10 | m_GENERIC)
+
+/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
+   than 4 branch instructions in the 16 byte window.  */
+DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
+          m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
+	  m_ATHLON_K8 | m_AMDFAM10)
+
+/*****************************************************************************/
+/* Integer instruction selection tuning                                      */
+/*****************************************************************************/
+
+/* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
+   at -O3.  For the moment, the prefetching seems badly tuned for Intel
+   chips.  */
+DEF_TUNE (X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, "software_prefetching_beneficial",
+          m_K6_GEODE | m_AMD_MULTIPLE)
+
+/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
+   on 16-bit immediate moves into memory on Core2 and Corei7.  */
+DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_GENERIC)
+
+/* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such
+   as "add mem, reg".  */
+DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_PPRO))
+
+/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions.   */
+DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
+          ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
+	    | m_GENERIC))
+
+/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
+   for DFmode copies */
+DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
+          ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+	    | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
+
+/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+   will impact LEA instruction selection. */
+DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_INTEL)
+
+/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation.  */
+DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
+	  m_BONNELL | m_SILVERMONT)
+
+/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
+   vector path on AMD machines.
+   FIXME: Do we need to enable this for core? */
+DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM32_MEM, "slow_imul_imm32_mem",
+          m_K8 | m_AMDFAM10)
+
+/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
+   machines.
+   FIXME: Do we need to enable this for core? */
+DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
+          m_K8 | m_AMDFAM10)
+
+/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
+   a conditional move.  */
+DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
+	  m_BONNELL | m_SILVERMONT | m_INTEL)
+
+/* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
+   as MOVS and STOS (without a REP prefix) to move/set sequences of bytes.  */
+DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA)
+
+/* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of
+   compact prologues and epilogues by issuing a misaligned moves.  This
+   requires target to handle misaligned moves and partial memory stalls
+   reasonably well.
+   FIXME: This may actualy be a win on more targets than listed here.  */
+DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
+	  "misaligned_move_string_pro_epilogues",
+	  m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC)
+
+/* X86_TUNE_USE_SAHF: Controls use of SAHF.  */
+DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+	  | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER
+	  | m_GENERIC)
+
+/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions.  */
+DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
+	  ~(m_PENT | m_BONNELL | m_SILVERMONT | m_INTEL  | m_K6))
+
+/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions.  */
+DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
+          m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | m_AMD_MULTIPLE
+	  | m_GENERIC)
+
+/*****************************************************************************/
+/* 387 instruction selection tuning                                          */
+/*****************************************************************************/
+
+/* X86_TUNE_USE_HIMODE_FIOP: Enables use of x87 instructions with 16bit
+   integer operand.
+   FIXME: Why this is disabled for modern chips?  */
+DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
+          m_386 | m_486 | m_K6_GEODE)
+
+/* X86_TUNE_USE_SIMODE_FIOP: Enables use of x87 instructions with 32bit
+   integer operand.  */
+DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
+          ~(m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+	    | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
+
+/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp.  */
+DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
+
+/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI.  */
+DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+	  | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
+
+/*****************************************************************************/
+/* SSE instruction selection tuning                                          */
+/*****************************************************************************/
+
+/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
+   instructions.  */
+DEF_TUNE (X86_TUNE_VECTORIZE_DOUBLE, "vectorize_double", ~m_BONNELL)
+
+/* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
+   regs instead of memory.  */
+DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
+          m_CORE_ALL)
+
+/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
+   of a sequence loading registers by parts.  */
+DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
+          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_AMDFAM10 | m_BDVER
+	  | m_BTVER | m_SILVERMONT | m_INTEL | m_GENERIC)
+
+/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
+   of a sequence loading registers by parts.  */
+DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
+          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_SILVERMONT
+	  | m_INTEL | m_GENERIC)
+
+/* Use packed single precision instructions where posisble.  I.e. movups instead
+   of movupd.  */
+DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal",
+          m_BDVER)
+
+/* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores.   */
+DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
+	  m_AMD_MULTIPLE | m_CORE_ALL | m_GENERIC)
+
+/* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
+   xorps/xorpd and other variants.  */
+DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
+	  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_GENERIC)
+
+/* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
+   to SSE registers.  If disabled, the moves will be done by storing
+   the value to memory and reloading.  */
+DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_TO_VEC, "inter_unit_moves_to_vec",
+          ~(m_AMD_MULTIPLE | m_GENERIC))
+
+/* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from SSE
+   to integer registers.  If disabled, the moves will be done by storing
+   the value to memory and reloading.  */
+DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec",
+          ~m_ATHLON_K8)
+
+/* X86_TUNE_INTER_UNIT_CONVERSIONS: Enable float<->integer conversions
+   to use both SSE and integer registers at a same time.
+   FIXME: revisit importance of this for generic.  */
+DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
+          ~(m_AMDFAM10 | m_BDVER))
+
+/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+   fp converts to destination register.  */
+DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
+          m_SILVERMONT | m_INTEL)
+
+/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
+   from FP to FP.  This form of instructions avoids partial write to the
+   destination.  */
+DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts",
+          m_AMDFAM10)
+
+/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
+   from integer to FP. */
+DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
+
+/*****************************************************************************/
+/* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
+/*****************************************************************************/
+
+/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if false, unaligned loads are
+   split.  */
+DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal",
+          ~(m_NEHALEM | m_SANDYBRIDGE | m_GENERIC))
+
+/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are
+   split.  */
+DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal",
+          ~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_GENERIC))
+
+/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
+   the auto-vectorizer.  */
+DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2)
+
+/*****************************************************************************/
+/* Historical relics: tuning flags that helps a specific old CPU designs     */
+/*****************************************************************************/
+
+/* X86_TUNE_DOUBLE_WITH_ADD: Use add instead of sal to double value in
+   an integer register.  */
+DEF_TUNE (X86_TUNE_DOUBLE_WITH_ADD, "double_with_add", ~m_386)
+
+/* X86_TUNE_ALWAYS_FANCY_MATH_387: controls use of fancy 387 operations,
+   such as fsqrt, fprem, fsin, fcos, fsincos etc.
+   Should be enabled for all targets that always has coprocesor.  */
+DEF_TUNE (X86_TUNE_ALWAYS_FANCY_MATH_387, "always_fancy_math_387",
+          ~(m_386 | m_486))
+
+/* X86_TUNE_UNROLL_STRLEN: Produce (quite lame) unrolled sequence for
+   inline strlen.  This affects only -minline-all-stringops mode. By
+   default we always dispatch to a library since our internal strlen
+   is bad.  */
+DEF_TUNE (X86_TUNE_UNROLL_STRLEN, "unroll_strlen", ~m_386)
+
+/* X86_TUNE_SHIFT1: Enables use of short encoding of "sal reg" instead of
+   longer "sal $1, reg".  */
+DEF_TUNE (X86_TUNE_SHIFT1, "shift1", ~m_486)
+
+/* X86_TUNE_ZERO_EXTEND_WITH_AND: Use AND instruction instead
+   of mozbl/movwl.  */
+DEF_TUNE (X86_TUNE_ZERO_EXTEND_WITH_AND, "zero_extend_with_and",  m_486 | m_PENT)
+
+/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
+   and SImode multiply, but 386 and 486 do HImode multiply faster.  */
+DEF_TUNE (X86_TUNE_PROMOTE_HIMODE_IMUL, "promote_himode_imul",
+          ~(m_386 | m_486))
+
+/* X86_TUNE_FAST_PREFIX: Enable demoting some 32bit or 64bit arithmetic
+   into 16bit/8bit when resulting sequence is shorter.  For example
+   for "and $-65536, reg" to 16bit store of 0.  */
+DEF_TUNE (X86_TUNE_FAST_PREFIX, "fast_prefix", ~(m_386 | m_486 | m_PENT))
+
+/* X86_TUNE_READ_MODIFY_WRITE: Enable use of read modify write instructions
+   such as "add $1, mem".  */
+DEF_TUNE (X86_TUNE_READ_MODIFY_WRITE, "read_modify_write", ~m_PENT)
+
+/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
+   than a MOV.  */
+DEF_TUNE (X86_TUNE_MOVE_M1_VIA_OR, "move_m1_via_or", m_PENT)
+
+/* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
+   but one byte longer.  */
+DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT)
+
+/* X86_TUNE_PARTIAL_REG_STALL: Pentium pro, unlike later chips, handled
+   use of partial registers by renaming.  This improved performance of 16bit
+   code where upper halves of registers are not used.  It also leads to
+   an penalty whenever a 16bit store is followed by 32bit use.  This flag
+   disables production of such sequences in common cases.
+   See also X86_TUNE_HIMODE_MATH.
+
+   In current implementation the partial register stalls are not eliminated
+   very well - they can be introduced via subregs synthesized by combine
+   and can happen in caller/callee saving sequences.  */
+DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO)
+
+/* X86_TUNE_PROMOTE_QIMODE: When it is cheap, turn 8bit arithmetic to
+   corresponding 32bit arithmetic.  */
+DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode",
+	  ~m_PPRO)
+
+/* X86_TUNE_PROMOTE_HI_REGS: Same, but for 16bit artihmetic.  Again we avoid
+   partial register stalls on PentiumPro targets. */
+DEF_TUNE (X86_TUNE_PROMOTE_HI_REGS, "promote_hi_regs", m_PPRO)
+
+/* X86_TUNE_HIMODE_MATH: Enable use of 16bit arithmetic.
+   On PPro this flag is meant to avoid partial register stalls.  */
+DEF_TUNE (X86_TUNE_HIMODE_MATH, "himode_math", ~m_PPRO)
+
+/* X86_TUNE_SPLIT_LONG_MOVES: Avoid instructions moving immediates
+   directly to memory.  */
+DEF_TUNE (X86_TUNE_SPLIT_LONG_MOVES, "split_long_moves", m_PPRO)
+
+/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
+DEF_TUNE (X86_TUNE_USE_XCHGB, "use_xchgb", m_PENT4)
+
+/* X86_TUNE_USE_MOV0: Use "mov $0, reg" instead of "xor reg, reg" to clear
+   integer register.  */
+DEF_TUNE (X86_TUNE_USE_MOV0, "use_mov0", m_K6)
+
+/* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
+   operand that cannot be represented using a modRM byte.  The XOR
+   replacement is long decoded, so this split helps here as well.  */
+DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
+
+/* X86_TUNE_AVOID_VECTOR_DECODE: Enable splitters that avoid vector decoded
+   forms of instructions on K8 targets.  */
+DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
+          m_K8)
+
+/*****************************************************************************/
+/* This never worked well before.                                            */
+/*****************************************************************************/
+
+/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+   on simulation result. But after P4 was made, no performance benefit
+   was observed with branch hints.  It also increases the code size.
+   As a result, icc never generates branch hints.  */
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", 0)
+
+/* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic.  */
+DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0)
+
+/* X86_TUNE_PROMOTE_QI_REGS: This enables generic code that promotes all 8bit
+   arithmetic to 32bit via PROMOTE_MODE macro.  This code generation scheme
+   is usually used for RISC targets.  */
+DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0)
+
+/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
+   on hardware capabilities. Bdver3 hardware has a loop buffer which makes
+   unrolling small loop less important. For, such architectures we adjust
+   the unroll factor so that the unrolled loop fits the loop buffer.  */
+DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
diff --git a/gcc-4.9/gcc/config/i386/x86intrin.h b/gcc-4.9/gcc/config/i386/x86intrin.h
new file mode 100644
index 000000000..80e9e6f33
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/x86intrin.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+#define _X86INTRIN_H_INCLUDED
+
+#include <ia32intrin.h>
+
+#include <mmintrin.h>
+
+#include <xmmintrin.h>
+
+#include <emmintrin.h>
+
+#include <pmmintrin.h>
+
+#include <tmmintrin.h>
+
+#include <ammintrin.h>
+
+#include <smmintrin.h>
+
+#include <wmmintrin.h>
+
+/* For including AVX instructions */
+#include <immintrin.h>
+
+#include <mm3dnow.h>
+
+#include <fma4intrin.h>
+
+#include <xopintrin.h>
+
+#include <lwpintrin.h>
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#include <tbmintrin.h>
+
+#include <lzcntintrin.h>
+
+#include <popcntintrin.h>
+
+#include <rdseedintrin.h>
+
+#include <prfchwintrin.h>
+
+#include <fxsrintrin.h>
+
+#include <xsaveintrin.h>
+
+#include <xsaveoptintrin.h>
+
+#include <adxintrin.h>
+
+#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/xm-cygwin.h b/gcc-4.9/gcc/config/i386/xm-cygwin.h
new file mode 100644
index 000000000..d66a46df5
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xm-cygwin.h
@@ -0,0 +1,21 @@
+/* Configuration for GCC for hosting on Windows NT.
+   using a unix style C library.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
diff --git a/gcc-4.9/gcc/config/i386/xm-djgpp.h b/gcc-4.9/gcc/config/i386/xm-djgpp.h
new file mode 100644
index 000000000..2f7989c16
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xm-djgpp.h
@@ -0,0 +1,83 @@
+/* Configuration for GCC for Intel 80386 running DJGPP.
+   Copyright (C) 1988-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use semicolons to separate elements of a path.  */
+#define PATH_SEPARATOR ';'
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+
+/* System dependent initialization for collect2
+   to tell system() to act like Unix.  */
+#define COLLECT2_HOST_INITIALIZATION \
+  do { __system_flags |= (__system_allow_multiple_cmds			\
+		          | __system_emulate_chdir); } while (0)
+
+/* Define a version appropriate for DOS.  */
+#undef XREF_FILE_NAME
+#define XREF_FILE_NAME(xref_file, file) \
+  do { \
+    const char xref_ext[] = ".gxref"; \
+    strcpy (xref_file, file); \
+    s = basename (xref_file); \
+    t = strchr (s, '.'); \
+    if (t) \
+      strcpy (t, xref_ext); \
+    else \
+      strcat (xref_file, xref_ext); \
+  } while (0)
+
+#undef GCC_DRIVER_HOST_INITIALIZATION
+#define GCC_DRIVER_HOST_INITIALIZATION \
+  do { \
+    /* If the environment variable DJDIR is not defined, then DJGPP is not \
+       installed correctly and GCC will quickly become confused with the \
+       default prefix settings. Report the problem now so the user doesn't \
+       receive deceptive "file not found" error messages later.  */ \
+    char *djdir = getenv ("DJDIR"); \
+    if (djdir == NULL) \
+      { \
+        /* DJDIR is automatically defined by the DJGPP environment config \
+           file pointed to by the environment variable DJGPP. Examine DJGPP \
+           to try and figure out what's wrong.  */ \
+        char *djgpp = getenv ("DJGPP"); \
+        if (djgpp == NULL) \
+          fatal ("environment variable DJGPP not defined"); \
+        else if (access (djgpp, R_OK) == 0) \
+          fatal ("environment variable DJGPP points to missing file '%s'", \
+                 djgpp); \
+        else \
+          fatal ("environment variable DJGPP points to corrupt file '%s'", \
+                  djgpp); \
+      } \
+  } while (0)
+
+/* Canonicalize paths containing '/dev/env/'; used in prefix.c.
+   _fixpath is a djgpp-specific function to canonicalize a path.
+   "/dev/env/DJDIR" evaluates to "c:/djgpp" if DJDIR is "c:/djgpp" for
+   example.  It removes any trailing '/', so add it back.  */
+/* We cannot free PATH below as it can point to string constant  */
+#define UPDATE_PATH_HOST_CANONICALIZE(PATH) \
+  if (memcmp ((PATH), "/dev/env/", sizeof("/dev/env/") - 1) == 0) \
+    {						\
+      static char fixed_path[FILENAME_MAX + 1];	\
+						\
+      _fixpath ((PATH), fixed_path);		\
+      strcat (fixed_path, "/");			\
+      (PATH) = xstrdup (fixed_path);		\
+    } 
diff --git a/gcc-4.9/gcc/config/i386/xm-mingw32.h b/gcc-4.9/gcc/config/i386/xm-mingw32.h
new file mode 100644
index 000000000..b6d87a42a
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xm-mingw32.h
@@ -0,0 +1,40 @@
+/* Configuration for GCC for hosting on Windows32.
+   using GNU tools and the Windows32 API Library.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+
+#undef PATH_SEPARATOR
+#define PATH_SEPARATOR ';'
+
+/* This is the name of the null device on windows.  */
+#define HOST_BIT_BUCKET "nul"
+
+/*  The st_ino field of struct stat is always 0.  */
+#define HOST_LACKS_INODE_NUMBERS
+
+#ifdef __MINGW32__
+#undef __USE_MINGW_ANSI_STDIO
+#define __USE_MINGW_ANSI_STDIO 1
+#else
+/* MSVCRT does not support the "ll" format specifier for printing
+   "long long" values.  Instead, we use "I64".  */
+#define HOST_LONG_LONG_FORMAT "I64"
+#endif
+
diff --git a/gcc-4.9/gcc/config/i386/xmmintrin.h b/gcc-4.9/gcc/config/i386/xmmintrin.h
new file mode 100644
index 000000000..a3824e73f
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xmmintrin.h
@@ -0,0 +1,1265 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+/* We need type definitions from the MMX header file.  */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free ().  */
+#include <mm_malloc.h>
+
+/* Constants for use with _mm_prefetch.  */
+enum _mm_hint
+{
+  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
+  _MM_HINT_ET0 = 7,
+  _MM_HINT_ET1 = 6,
+  _MM_HINT_T0 = 3,
+  _MM_HINT_T1 = 2,
+  _MM_HINT_T2 = 1,
+  _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+   processor.  The selector I specifies the type of prefetch operation.  */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
+}
+#else
+#define _mm_prefetch(P, I) \
+  __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
+#endif
+
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __DISABLE_SSE__
+#endif /* __SSE__ */
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Internal data types for implementing the intrinsics.  */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create a selector for use with the SHUFPS instruction.  */
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
+/* Bits in the MXCSR.  */
+#define _MM_EXCEPT_MASK       0x003f
+#define _MM_EXCEPT_INVALID    0x0001
+#define _MM_EXCEPT_DENORM     0x0002
+#define _MM_EXCEPT_DIV_ZERO   0x0004
+#define _MM_EXCEPT_OVERFLOW   0x0008
+#define _MM_EXCEPT_UNDERFLOW  0x0010
+#define _MM_EXCEPT_INEXACT    0x0020
+
+#define _MM_MASK_MASK         0x1f80
+#define _MM_MASK_INVALID      0x0080
+#define _MM_MASK_DENORM       0x0100
+#define _MM_MASK_DIV_ZERO     0x0200
+#define _MM_MASK_OVERFLOW     0x0400
+#define _MM_MASK_UNDERFLOW    0x0800
+#define _MM_MASK_INEXACT      0x1000
+
+#define _MM_ROUND_MASK        0x6000
+#define _MM_ROUND_NEAREST     0x0000
+#define _MM_ROUND_DOWN        0x2000
+#define _MM_ROUND_UP          0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+
+#define _MM_FLUSH_ZERO_MASK   0x8000
+#define _MM_FLUSH_ZERO_ON     0x8000
+#define _MM_FLUSH_ZERO_OFF    0x0000
+
+/* Create an undefined vector.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+  __m128 __Y = __Y;
+  return __Y;
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+   floating-point) values of A and B; the upper three SPFP values are
+   passed through from A.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform the respective operation on the four SPFP values in A and B.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform logical bit-wise operations on 128-bit values.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_andps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_andnps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_orps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_xorps (__A, __B);
+}
+
+/* Perform a comparison on the lower SPFP values of A and B.  If the
+   comparison is true, place a mask of all ones in the result, otherwise a
+   mask of zeros.  The upper three SPFP values are passed through from A.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpltss ((__v4sf) __B,
+								(__v4sf)
+								__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpless ((__v4sf) __B,
+								(__v4sf)
+								__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpnltss ((__v4sf) __B,
+								 (__v4sf)
+								 __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpnless ((__v4sf) __B,
+								 (__v4sf)
+								 __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform a comparison on the four SPFP values of A and B.  For each
+   element, if the comparison is true, place a mask of all ones in the
+   result, otherwise a mask of zeros.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+   and 0 if false.  */
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+   rounding mode.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+  return _mm_cvtss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the
+   current rounding mode.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+   current rounding mode.  Return the integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+  return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+  return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+  return _mm_cvttss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Truncate the two lower SPFP values to 32-bit integers.  Return the
+   integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+  return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+  return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+  return _mm_cvtsi32_ss (__A, __B);
+}
+
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+
+/* Intel intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+   as the two lower elements in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128 __A, __m64 __B)
+{
+  return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+  return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
+  __v4hi __sign;
+  __v2si __hisi, __losi;
+  __v4sf __zero, __ra, __rb;
+
+  /* This comparison against zero gives us a mask that can be used to
+     fill in the missing sign bits in the unpack operations below, so
+     that we get signed values after unpacking.  */
+  __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
+
+  /* Convert the four words to doublewords.  */
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
+
+  /* Convert the doublewords to floating point two at a time.  */
+  __zero = (__v4sf) _mm_setzero_ps ();
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+
+  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
+  __v2si __hisi, __losi;
+  __v4sf __zero, __ra, __rb;
+
+  /* Convert the four words to doublewords.  */
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+
+  /* Convert the doublewords to floating point two at a time.  */
+  __zero = (__v4sf) _mm_setzero_ps ();
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+
+  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
+  __v8qi __sign;
+
+  /* This comparison against zero gives us a mask that can be used to
+     fill in the missing sign bits in the unpack operations below, so
+     that we get signed values after unpacking.  */
+  __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
+
+  /* Convert the four low bytes to words.  */
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
+
+  return _mm_cvtpi16_ps(__A);
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu8_ps(__m64 __A)
+{
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
+  return _mm_cvtpu16_ps(__A);
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
+{
+  __v4sf __zero = (__v4sf) _mm_setzero_ps ();
+  __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
+  __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B);
+  return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16(__m128 __A)
+{
+  __v4sf __hisf = (__v4sf)__A;
+  __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
+  __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
+  __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
+  return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8(__m128 __A)
+{
+  __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
+  return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
+}
+
+/* Selects four specific SPFP values from A and B based on MASK.  */
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
+{
+  return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
+}
+#else
+#define _mm_shuffle_ps(A, B, MASK)					\
+  ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A),			\
+				   (__v4sf)(__m128)(B), (int)(MASK)))
+#endif
+
+/* Selects and interleaves the upper two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+   the lower two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+  return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the upper two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+  __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Moves the upper two values of B into the lower two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Moves the lower two values of B into the upper two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+   the upper two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+  return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the lower two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+  __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128 __A)
+{
+  return __builtin_ia32_movmskps ((__v4sf)__A);
+}
+
+/* Return the contents of the control register.  */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getcsr (void)
+{
+  return __builtin_ia32_stmxcsr ();
+}
+
+/* Read exception bits from the control register.  */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_STATE (void)
+{
+  return _mm_getcsr() & _MM_EXCEPT_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_MASK (void)
+{
+  return _mm_getcsr() & _MM_MASK_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_ROUNDING_MODE (void)
+{
+  return _mm_getcsr() & _MM_ROUND_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_FLUSH_ZERO_MODE (void)
+{
+  return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
+}
+
+/* Set the control register to I.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setcsr (unsigned int __I)
+{
+  __builtin_ia32_ldmxcsr (__I);
+}
+
+/* Set exception bits in the control register.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_STATE(unsigned int __mask)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_MASK (unsigned int __mask)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_ROUNDING_MODE (unsigned int __mode)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
+}
+
+/* Create a vector with element 0 as F and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
+}
+
+/* Create a vector with all four elements equal to F.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+  return _mm_set1_ps (__F);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+  return _mm_set_ss (*__P);
+}
+
+/* Create a vector with all four elements equal to *P.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+  return _mm_set1_ps (*__P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+  return _mm_load1_ps (__P);
+}
+
+/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
+  return (__m128) *(__v4sf *)__P;
+}
+
+/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
+  return (__m128) __builtin_ia32_loadups (__P);
+}
+
+/* Load four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
+  __v4sf __tmp = *(__v4sf *)__P;
+  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
+}
+
+/* Create the vector [Z Y X W].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Stores the lower SPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
+  *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
+  return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+/* Store four SPFP values.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
+  *(__v4sf *)__P = (__v4sf)__A;
+}
+
+/* Store four SPFP values.  The address need not be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+  __builtin_ia32_storeups (__P, (__v4sf)__A);
+}
+
+/* Store the lower SPFP value across four words.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
+  __v4sf __va = (__v4sf)__A;
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+  _mm_storeu_ps (__P, __tmp);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+  _mm_store1_ps (__P, __A);
+}
+
+/* Store four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
+  __v4sf __va = (__v4sf)__A;
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
+  _mm_store_ps (__P, __tmp);
+}
+
+/* Sets the low SPFP value of A from the low value of B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Extracts one of the four words of A.  The selector N must be immediate.  */
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+  return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+  return _mm_extract_pi16 (__A, __N);
+}
+#else
+#define _mm_extract_pi16(A, N)	\
+  ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
+#endif
+
+/* Inserts word D into one of four words of A.  The selector N must be
+   immediate.  */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+  return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+  return _mm_insert_pi16 (__A, __D, __N);
+}
+#else
+#define _mm_insert_pi16(A, D, N)				\
+  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A),	\
+					(int)(D), (int)(N)))
+
+#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
+#endif
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+  return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+  return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+  return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+  return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+  return __builtin_ia32_pmovmskb ((__v8qi)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+  return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+   in B and produce the high 16 bits of the 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+  return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+  return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+  return _mm_shuffle_pi16 (__A, __N);
+}
+#else
+#define _mm_shuffle_pi16(A, N) \
+  ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
+#endif
+
+/* Conditionally store byte elements of A into P.  The high bit of each
+   byte in the selector N determines whether the corresponding byte from
+   A is stored.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+  __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+  _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+  return _mm_sad_pu8 (__A, __B);
+}
+
+/* Stores the data in A to the address P without polluting the caches.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+  __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
+}
+
+/* Likewise.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+  __builtin_ia32_movntps (__P, (__v4sf)__A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+   any subsequent store.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+  __builtin_ia32_sfence ();
+}
+
+/* The execution of the next instruction is delayed by an implementation
+   specific amount of time.  The instruction does not modify the
+   architectural state.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+  __builtin_ia32_pause ();
+}
+
+/* Transpose the 4x4 matrix composed of row[0-3].  */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
+do {									\
+  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
+  __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1);			\
+  __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3);			\
+  __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1);			\
+  __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3);			\
+  (row0) = __builtin_ia32_movlhps (__t0, __t1);				\
+  (row1) = __builtin_ia32_movhlps (__t1, __t0);				\
+  (row2) = __builtin_ia32_movlhps (__t2, __t3);				\
+  (row3) = __builtin_ia32_movhlps (__t3, __t2);				\
+} while (0)
+
+/* For backward source compatibility.  */
+# include <emmintrin.h>
+
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE__ */
+
+#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/xopintrin.h b/gcc-4.9/gcc/config/i386/xopintrin.h
new file mode 100644
index 000000000..cc82bc5fa
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xopintrin.h
@@ -0,0 +1,844 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _XOPMMINTRIN_H_INCLUDED
+#define _XOPMMINTRIN_H_INCLUDED
+
+#include <fma4intrin.h>
+
+#ifndef __XOP__
+#pragma GCC push_options
+#pragma GCC target("xop")
+#define __DISABLE_XOP__
+#endif /* __XOP__ */
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts
+   Rotates - Non-Immediate form */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A,  __m128i __B)
+{
+  return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Rotates - Immediate form */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+  ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+  ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+  ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+  ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* Shifts */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
+}
+
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation
+   pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+  return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf)__A,
+					(__v4sf)
+					__builtin_ia32_vfrczss ((__v4sf)__B));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
+					 (__v2df)
+					 __builtin_ia32_vfrczsd ((__v2df)__B));
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
+}
+
+/* PERMIL2 */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
+{
+  return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
+					      (__v2df)__Y,
+					      (__v2di)__C,
+					      __I);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
+{
+  return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
+						 (__v4df)__Y,
+						 (__v4di)__C,
+						 __I);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
+{
+  return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
+					     (__v4sf)__Y,
+					     (__v4si)__C,
+					     __I);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
+{
+  return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
+						(__v8sf)__Y,
+						(__v8si)__C,
+						__I);
+}
+#else
+#define _mm_permute2_pd(X, Y, C, I)					\
+  ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
+					(__v2df)(__m128d)(Y),		\
+					(__v2di)(__m128d)(C),		\
+					(int)(I)))
+
+#define _mm256_permute2_pd(X, Y, C, I)					\
+  ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
+					   (__v4df)(__m256d)(Y),	\
+					   (__v4di)(__m256d)(C),	\
+					   (int)(I)))
+
+#define _mm_permute2_ps(X, Y, C, I)					\
+  ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
+				       (__v4sf)(__m128)(Y),		\
+				       (__v4si)(__m128)(C),		\
+				       (int)(I)))
+
+#define _mm256_permute2_ps(X, Y, C, I)					\
+  ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
+					  (__v8sf)(__m256)(Y),  	\
+					  (__v8si)(__m256)(C),		\
+ 					  (int)(I)))
+#endif /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_XOP__
+#undef __DISABLE_XOP__
+#pragma GCC pop_options
+#endif /* __DISABLE_XOP__ */
+
+#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/xsaveintrin.h b/gcc-4.9/gcc/config/i386/xsaveintrin.h
new file mode 100644
index 000000000..47be25f0c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xsaveintrin.h
@@ -0,0 +1,72 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
+/* # error "Never use <xsaveintrin.h> directly; include <x86intrin.h> instead." */
+/* #endif */
+
+#ifndef _XSAVEINTRIN_H_INCLUDED
+#define _XSAVEINTRIN_H_INCLUDED
+
+#ifndef __XSAVE__
+#pragma GCC push_options
+#pragma GCC target("xsave")
+#define __DISABLE_XSAVE__
+#endif /* __XSAVE__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsave (void *__P, long long __M)
+{
+  return __builtin_ia32_xsave (__P, __M);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xrstor (void *__P, long long __M)
+{
+  return __builtin_ia32_xrstor (__P, __M);
+}
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsave64 (void *__P, long long __M)
+{
+  return __builtin_ia32_xsave64 (__P, __M);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xrstor64 (void *__P, long long __M)
+{
+  return __builtin_ia32_xrstor64 (__P, __M);
+}
+#endif
+
+#ifdef __DISABLE_XSAVE__
+#undef __DISABLE_XSAVE__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVE__ */
+
+#endif /* _XSAVEINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/xsaveoptintrin.h b/gcc-4.9/gcc/config/i386/xsaveoptintrin.h
new file mode 100644
index 000000000..d7534b41c
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xsaveoptintrin.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
+/* # error "Never use <xsaveoptintrin.h> directly; include <x86intrin.h> instead." */
+/* #endif */
+
+#ifndef _XSAVEOPTINTRIN_H_INCLUDED
+#define _XSAVEOPTINTRIN_H_INCLUDED
+
+#ifndef __XSAVEOPT__
+#pragma GCC push_options
+#pragma GCC target("xsaveopt")
+#define __DISABLE_XSAVEOPT__
+#endif /* __XSAVEOPT__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsaveopt (void *__P, long long __M)
+{
+  return __builtin_ia32_xsaveopt (__P, __M);
+}
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsaveopt64 (void *__P, long long __M)
+{
+  return __builtin_ia32_xsaveopt64 (__P, __M);
+}
+#endif
+
+#ifdef __DISABLE_XSAVEOPT__
+#undef __DISABLE_XSAVEOPT__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVEOPT__ */
+
+#endif /* _XSAVEOPTINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/i386/xtestintrin.h b/gcc-4.9/gcc/config/i386/xtestintrin.h
new file mode 100644
index 000000000..ba79e5c5e
--- /dev/null
+++ b/gcc-4.9/gcc/config/i386/xtestintrin.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _XTESTINTRIN_H_INCLUDED
+#define _XTESTINTRIN_H_INCLUDED
+
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
+/* Return non-zero if the instruction executes inside an RTM or HLE code
+   region.  Return zero otherwise.   */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xtest (void)
+{
+  return __builtin_ia32_xtest ();
+}
+
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
+#endif /* _XTESTINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/ia64/constraints.md b/gcc-4.9/gcc/config/ia64/constraints.md
new file mode 100644
index 000000000..3bf56bd37
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/constraints.md
@@ -0,0 +1,154 @@
+;; Constraint definitions for IA-64
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+(define_register_constraint "a" "ADDL_REGS"
+  "addl register")
+
+(define_register_constraint "b" "BR_REGS"
+  "branch register")
+
+(define_register_constraint "c" "PR_REGS"
+  "predicate register")
+
+(define_register_constraint "d" "AR_M_REGS"
+  "memory pipeline application register")
+
+(define_register_constraint "e" "AR_I_REGS"
+  "integer pipeline application register")
+
+(define_register_constraint "f" "FR_REGS"
+  "floating-point register")
+
+(define_register_constraint "x" "FP_REGS"
+  "floating-point register, excluding f31 and f127, used for fldp")
+
+;; Integer constraints
+
+(define_constraint "I"
+  "14 bit signed immediate for arithmetic instructions"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x2000 < 0x4000")))
+
+(define_constraint "J"
+  "22 bit signed immediate for arith instructions with r0/r1/r2/r3 source"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x200000 < 0x400000")))
+
+(define_constraint "j"
+  "(2**32-2**13)..(2**32-1) for addp4 instructions"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival >= 0xffffe000
+		    && (unsigned HOST_WIDE_INT)ival <= 0xffffffff")))
+
+(define_constraint "K"
+  "8 bit signed immediate for logical instructions"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x80 < 0x100")))
+
+(define_constraint "L"
+  "8 bit adjusted signed immediate for compare pseudo-ops"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x7F < 0x100")))
+
+(define_constraint "M"
+  "6 bit unsigned immediate for shift counts"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival < 0x40")))
+
+(define_constraint "N"
+  "9 bit signed immediate for load/store post-increments"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x100 < 0x200")))
+
+(define_constraint "O"
+  "constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P"
+  "0 or -1 for dep instruction"
+  (and (match_code "const_int")
+       (match_test "ival == 0 || ival == -1")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "0.0 and 1.0 for fr0 and fr1"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode) || op == CONST1_RTX (mode)")))
+
+(define_constraint "Z"
+  "1.0 or (0.0 and !flag_signed_zeros)"
+  (and (match_code "const_double")
+       (ior (match_test "op == CONST1_RTX (mode)")
+	    (and (match_test "op == CONST0_RTX (mode)")
+		 (match_test "!flag_signed_zeros")))))
+
+(define_constraint "H"
+  "0.0"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Extra constraints
+
+;; Note that while this accepts mem, it only accepts non-volatile mem,
+;; and so cannot be "fixed" by adjusting the address.  Thus it cannot
+;; and does not use define_memory_constraint.
+(define_constraint "Q"
+  "Non-volatile memory for FP_REG loads/stores"
+  (and (match_operand 0 "memory_operand")
+       (match_test "!MEM_VOLATILE_P (op)")))
+
+(define_constraint "R"
+  "1..4 for shladd arguments"
+  (and (match_code "const_int")
+       (match_test "ival >= 1 && ival <= 4")))
+
+(define_constraint "T"
+  "Symbol ref to small-address-area"
+  (match_operand 0 "small_addr_symbolic_operand"))
+
+(define_constraint "U"
+  "vector zero constant"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "W"
+  "An integer vector, such that conversion to an integer yields a
+   value appropriate for an integer 'J' constraint."
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_CLASS (mode) == MODE_VECTOR_INT")
+       (match_test
+	"satisfies_constraint_J (simplify_subreg (DImode, op, mode, 0))")))
+
+(define_constraint "Y"
+  "A V2SF vector containing elements that satisfy 'G'"
+  (and (match_code "const_vector")
+       (match_test "mode == V2SFmode")
+       (match_test "satisfies_constraint_G (XVECEXP (op, 0, 0))")
+       (match_test "satisfies_constraint_G (XVECEXP (op, 0, 1))")))
+
+;; Memory constraints
+
+(define_memory_constraint "S"
+  "Non-post-inc memory for asms and other unsavory creatures"
+  (and (match_code "mem")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
diff --git a/gcc-4.9/gcc/config/ia64/div.md b/gcc-4.9/gcc/config/ia64/div.md
new file mode 100644
index 000000000..892421428
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/div.md
@@ -0,0 +1,1221 @@
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; For the internal conditional math routines:
+
+;; operand 0 is always the result
+;; operand 1 is always the predicate
+;; operand 2, 3, and sometimes 4 are the input values.
+;; operand 4 or 5 is the floating point status register to use.
+;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none)
+;;
+;; addrf3_cond   - F0 = F2 + F3
+;; subrf3_cond   - F0 = F2 - F3
+;; mulrf3_cond   - F0 = F2 * F3
+;; nmulrf3_cond  - F0 = - (F2 * F3)
+;; m1addrf4_cond - F0 = (F2 * F3) + F4
+;; m1subrf4_cond - F0 = (F2 * F3) - F4
+;; m2addrf4_cond - F0 = F2 + (F3 * F4)
+;; m2subrf4_cond - F0 = F2 - (F3 * F4)
+
+;; Basic plus/minus/mult operations
+
+(define_insn "addrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fadd%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "subrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fsub%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "mulrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (mult:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fmpy%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; neg-mult operation
+
+(define_insn "nmulrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (neg:RF (mult:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fnmpy%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; add-mult/sub-mult operations (mult as op1)
+
+(define_insn "m1addrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (mult:RF
+              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "m1subrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (mult:RF
+              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; add-mult/sub-mult operations (mult as op2)
+
+(define_insn "m2addrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (mult:RF
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "m2subrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (mult:RF
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; Conversions to/from RF and SF/DF/XF
+;; These conversions should not generate any code but make it possible
+;; for all the instructions used to implement floating point division
+;; to be written for RFmode only and to not have to handle multiple
+;; modes or to have to handle a register in more than one mode.
+
+(define_mode_iterator SDX_F [SF DF XF])
+
+(define_insn "extend<mode>rf2"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "yes")])
+
+(define_split
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))]
+   "reload_completed"
+   [(set (match_dup 0) (match_dup 2))]
+{
+   if (operands[1] == CONST0_RTX (<MODE>mode))
+     operands[2] = gen_rtx_REG (RFmode, FR_REG (0));
+   else if (operands[1] == CONST1_RTX (<MODE>mode))
+     operands[2] = gen_rtx_REG (RFmode, FR_REG (1));
+   else
+     operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1]));
+})
+
+
+(define_insn "truncrf<mode>2"
+  [(set (match_operand:SDX_F 0 "fr_register_operand" "=f")
+        (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "yes")])
+
+(define_split
+  [(set (match_operand:SDX_F 0 "fr_register_operand" "")
+        (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))]
+   "reload_completed"
+   [(set (match_dup 0) (match_dup 2))]
+{
+   if (operands[1] == CONST0_RTX (RFmode))
+     operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0));
+   else if (operands[1] == CONST1_RTX (RFmode))
+     operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1));
+   else
+     operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+})
+
+;; Float to integer truncations using an alternative status register. 
+
+(define_insn "fix_truncrfdi2_alts"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+        (fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "fcvt.fx.trunc.s%2 %0 = %1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncrfdi2_alts"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+        (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "fcvt.fxu.trunc.s%2 %0 = %1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "setf_exp_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (unspec:RF [(match_operand:DI 1 "register_operand" "r")]
+                  UNSPEC_SETF_EXP))]
+  ""
+  "setf.exp %0 = %1"
+  [(set_attr "itanium_class" "frfr")])
+
+;; Reciprocal approximation
+
+(define_insn "recip_approx_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_FR_RECIP_APPROX_RES))
+   (set (match_operand:CCI 3 "register_operand" "=c")
+        (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX))
+   (use (match_operand:SI 4 "const_int_operand" ""))]
+  ""
+  "frcpa.s%4 %0, %3 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "no")])
+
+;; Single precision floating point division
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+	(div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx insn;
+  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
+    insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]);
+  else
+    insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]);
+  emit_insn (insn);
+  DONE;
+})
+
+;; Single precision floating point division (maximum throughput algorithm).
+
+(define_expand "divsf3_internal_thr"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx y     = gen_reg_rtx (RFmode);
+  rtx a     = gen_reg_rtx (RFmode);
+  rtx b     = gen_reg_rtx (RFmode);
+  rtx e     = gen_reg_rtx (RFmode);
+  rtx y1    = gen_reg_rtx (RFmode);
+  rtx y2    = gen_reg_rtx (RFmode);
+  rtx q     = gen_reg_rtx (RFmode);
+  rtx r     = gen_reg_rtx (RFmode);
+  rtx q_res = gen_reg_rtx (RFmode);
+  rtx cond  = gen_reg_rtx (CCImode);
+  rtx zero    = CONST0_RTX (RFmode);
+  rtx one     = CONST1_RTX (RFmode);
+  rtx status0 = CONST0_RTX (SImode);
+  rtx status1 = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off    = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode.  */
+  emit_insn (gen_extendsfrf2 (a, operands[1]));
+  emit_insn (gen_extendsfrf2 (b, operands[2]));
+  /* y = 1 / b				*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e = 1 - (b * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* y2 = y + (y1 * e)			*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off));
+  /* q = single(a * y2)			*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl));
+  /* r = a - (q * b)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off));
+  /* Q = single (q + (r * y2))		*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.	*/
+  emit_insn (gen_truncrfsf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Single precision floating point division (minimum latency algorithm).
+
+(define_expand "divsf3_internal_lat"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode.  */
+  emit_insn (gen_extendsfrf2 (a, operands[1]));
+  emit_insn (gen_extendsfrf2 (b, operands[2]));
+  /* y = 1 / b				*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* q = a * y				*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e = 1 - (b * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* e1 = e + (e * e)			*/
+  emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
+  /* q1 = single(q + (q * e1))		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
+  /* y1 = y + (y * e1)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
+  /* r = a - (q1 * b)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
+  /* Q = single (q1 + (r * y1))		*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.	*/
+  emit_insn (gen_truncrfsf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Double precision floating point division
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+	(div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx insn;
+  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
+    insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]);
+  else
+    insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]);
+  emit_insn (insn);
+  DONE;
+})
+
+;; Double precision floating point division (maximum throughput algorithm).
+
+(define_expand "divdf3_internal_thr"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res = gen_reg_rtx (RFmode);
+  rtx a     = gen_reg_rtx (RFmode);
+  rtx b     = gen_reg_rtx (RFmode);
+  rtx y     = gen_reg_rtx (RFmode);
+  rtx e     = gen_reg_rtx (RFmode);
+  rtx y1    = gen_reg_rtx (RFmode);
+  rtx e1    = gen_reg_rtx (RFmode);
+  rtx y2    = gen_reg_rtx (RFmode);
+  rtx e2    = gen_reg_rtx (RFmode);
+  rtx y3    = gen_reg_rtx (RFmode);
+  rtx q     = gen_reg_rtx (RFmode);
+  rtx r     = gen_reg_rtx (RFmode);
+  rtx cond  = gen_reg_rtx (CCImode);
+  rtx zero    = CONST0_RTX (RFmode);
+  rtx one     = CONST1_RTX (RFmode);
+  rtx status0 = CONST0_RTX (SImode);
+  rtx status1 = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extenddfrf2 (a, operands[1]));
+  emit_insn (gen_extenddfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* e2 = e1 * e1		*/
+  emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off));
+  /* y3 = y2 + (y2 * e2)	*/
+  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off));
+  /* q  = double (a * y3)	*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl));
+  /* r  = a - (b * q)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+  /* Q  = double (q + (r * y3))	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl));
+  /* Conversion back into DFmode */
+  emit_insn (gen_truncrfdf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Double precision floating point division (minimum latency algorithm).
+
+(define_expand "divdf3_internal_lat"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx e3        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extenddfrf2 (a, operands[1]));
+  emit_insn (gen_extenddfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e2 = e + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+  /* e1 = e * e                 */
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* e3 = e + (e1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+  /* q1 = q + (q * e2)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
+  /* y1 = y + (y * e2)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+  /* q2 = double(q + (q1 * e3))	*/
+  emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
+  /* y2 = y + (y1 * e3)		*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+  /* r1  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
+  /* Q  = double (q2 + (r1 * y2))	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
+  /* Conversion back into DFmode */
+  emit_insn (gen_truncrfdf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Extended precision floating point division.
+
+(define_expand "divxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "")
+        (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx y3        = gen_reg_rtx (RFmode);
+  rtx e3        = gen_reg_rtx (RFmode);
+  rtx e4        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extendxfrf2 (a, operands[1]));
+  emit_insn (gen_extendxfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e2 = e + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+  /* e1 = e * e                 */
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y1 = y + (y * e2)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+  /* e3 = e + (e1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+  /* y2 = y + (y1 * e3)		*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+  /* r  = a - (b * q)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+  /* e4  = 1 - (b * y2)		*/
+  emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
+  /* q1 = q + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
+  /* y3 = y2 + (y2 * e4)	*/
+  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
+  /* r1  = a - (b * q1)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
+  /* Q  = q1 + (r1 * y3)	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
+  /* Conversion back into XFmode */
+  emit_insn (gen_truncrfxf2 (operands[0], q_res));
+  DONE;
+})
+
+
+;; Integer division operations
+
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(div:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf, op0_di;
+
+  op0_rf = gen_reg_rtx (RFmode);
+  op0_di = gen_reg_rtx (DImode);
+
+  if (! register_operand (operands[1], SImode))
+    operands[1] = force_reg (SImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 0);
+
+  if (! register_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 0);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
+			    CONST1_RTX (SImode)));
+  
+  emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+  DONE;
+})
+
+(define_expand "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mod:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, op1_di, div;
+
+  div = gen_reg_rtx (SImode);
+  emit_insn (gen_divsi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  /* This is a trick to get us to reuse the value that we're sure to
+     have already copied to the FP regs.  */
+  op1_di = gen_reg_rtx (DImode);
+  convert_move (op1_di, operands[1], 0);
+
+  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+			  gen_lowpart (SImode, op1_di)));
+  DONE;
+})
+
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf, op0_di;
+
+  op0_rf = gen_reg_rtx (RFmode);
+  op0_di = gen_reg_rtx (DImode);
+
+  if (! register_operand (operands[1], SImode))
+    operands[1] = force_reg (SImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 1);
+
+  if (! register_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 1);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
+                            CONST1_RTX (SImode)));
+  
+  emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+  DONE;
+})
+
+(define_expand "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(umod:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, op1_di, div;
+
+  div = gen_reg_rtx (SImode);
+  emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  /* This is a trick to get us to reuse the value that we're sure to
+     have already copied to the FP regs.  */
+  op1_di = gen_reg_rtx (DImode);
+  convert_move (op1_di, operands[1], 1);
+
+  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+			  gen_lowpart (SImode, op1_di)));
+  DONE;
+})
+
+(define_expand "divsi3_internal"
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "")
+                          (match_operand:RF 2 "fr_register_operand" ""))))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx a         = operands[1];
+  rtx b         = operands[2];
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+  rtx twon34_exp = gen_reg_rtx (DImode);
+  rtx twon34    = gen_reg_rtx (RFmode);
+
+  /* Load cosntant 2**(-34) */
+  emit_move_insn (twon34_exp, GEN_INT (65501));
+  emit_insn (gen_setf_exp_rf (twon34, twon34_exp));
+
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* q1 = q + (q * e)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
+  /* e1 = (2**-34) + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off));
+  /* q2 = q1 + (e1 * q1)		*/
+  emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off));
+  DONE;
+})
+
+(define_expand "divdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(div:DI (match_operand:DI 1 "general_operand" "")
+		(match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf;
+
+  op0_rf = gen_reg_rtx (RFmode);
+
+  if (! register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 0);
+
+  if (! register_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 0);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
+                            CONST1_RTX (DImode)));
+
+  if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
+    emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
+  else
+    emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
+  DONE;
+})
+
+(define_expand "moddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mod:SI (match_operand:DI 1 "general_operand" "")
+		(match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, div;
+
+  div = gen_reg_rtx (DImode);
+  emit_insn (gen_divdi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+  DONE;
+})
+
+(define_expand "udivdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(udiv:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf;
+
+  op0_rf = gen_reg_rtx (RFmode);
+
+  if (! register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 1);
+
+  if (! register_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 1);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
+                            CONST1_RTX (DImode)));
+
+  if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
+    emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
+  else
+    emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
+  DONE;
+})
+
+(define_expand "umoddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(umod:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, div;
+
+  div = gen_reg_rtx (DImode);
+  emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+  DONE;
+})
+
+(define_expand "divdi3_internal_lat"
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
+                          (match_operand:RF 2 "fr_register_operand" ""))))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx a         = operands[1];
+  rtx b         = operands[2];
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* q1 = q + (q * e)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* q2 = q1 + (e1 * q1)	*/
+  emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* r  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* q3 = q2 + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
+  DONE;
+})
+
+(define_expand "divdi3_internal_thr"
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
+                          (match_operand:RF 2 "fr_register_operand" ""))))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx a         = operands[1];
+  rtx b         = operands[2];
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* q2 = y2 * a		*/
+  emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off));
+  /* r  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
+  /* q3 = q2 + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
+  DONE;
+})
+
+;; SQRT operations
+
+
+(define_insn "sqrt_approx_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+                (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_FR_SQRT_RECIP_APPROX_RES))
+   (set (match_operand:CCI 2 "register_operand" "=c")
+        (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+  "frsqrta.s%3 %0, %2 = %F1"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "no")])
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+	(sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx insn;
+  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
+    insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]);
+  else
+    insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_expand "sqrtsf2_internal_thr"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx s         = gen_reg_rtx (RFmode);
+  rtx f         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx c2        = ia64_dconst_0_375();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_df_c2	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_movdf (reg_df_c2, c2));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendsfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* e = 1 - (g * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
+  /* s = 0.5 + (0.375 * e)		*/
+  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
+  /* f = y * e				*/
+  emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off));
+  /* y1 = y + (f * s)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off));
+  /* g1 = single (b * y1)		*/
+  emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl));
+  /* h = 0.5 * y1			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off));
+  /* d = b - g1 * g1			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
+  /* g2 = single(g1 + (d * h))		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfsf2 (operands[0], g2));
+  DONE;
+})
+
+(define_expand "sqrtsf2_internal_lat"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx s         = gen_reg_rtx (RFmode);
+  rtx f         = gen_reg_rtx (RFmode);
+  rtx f1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx c2        = ia64_dconst_0_375();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_df_c2	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_movdf (reg_df_c2, c2));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendsfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* e = 1 - (g * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* s = 0.5 + (0.375 * e)		*/
+  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
+  /* f = e * g				*/
+  emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off));
+  /* g1 = single (g + (f * s))		*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl));
+  /* f1 = e * h				*/
+  emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off));
+  /* d = b - g1 * g1			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
+  /* h1 = h + (f1 * s)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off));
+  /* g2 = single(g1 + (d * h1))		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfsf2 (operands[0], g2));
+  DONE;
+})
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+	(sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx insn;
+#if 0
+  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
+    insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]);
+  else
+#endif
+  insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_expand "sqrtdf2_internal_thr"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx g3        = gen_reg_rtx (RFmode);
+  rtx g4        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx h2        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx d1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extenddfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* r = 0.5 - (g * h)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
+  /* g1 = g + (g * r)			*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off));
+  /* h1 = h + (h * r)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off));
+  /* r1 = 0.5 - (g1 * h1)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
+  /* g2 = g1 + (g1 * r1)		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off));
+  /* h2 = h1 + (h1 * r1)		*/
+  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off));
+  /* d = b - (g2 * g2)			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
+  /* g3 = g2 + (d * h2)			*/
+  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
+  /* d1 = b - (g3 * g3)			*/
+  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
+  /* g4 = g3 + (d1 * h2)		*/
+  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfdf2 (operands[0], g4));
+  DONE;
+})
+
+(define_expand "sqrtxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "")
+        (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx g3        = gen_reg_rtx (RFmode);
+  rtx g4        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx h2        = gen_reg_rtx (RFmode);
+  rtx h3        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx d1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendxfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* e = 0.5 - (g * h)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
+  /* g1 = g + (g * e)			*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off));
+  /* h1 = h + (h * e)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off));
+  /* e1 = 0.5 - (g1 * h1)		*/
+  emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
+  /* g2 = g1 + (g1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off));
+  /* h2 = h1 + (h1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off));
+  /* d = b - (g2 * g2)			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
+  /* e2 = 0.5 - (g2 * h2)		*/
+  emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off));
+  /* g3 = g2 + (d * h2)			*/
+  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
+  /* h3 = h2 + (e2 * h2)		*/
+  emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off));
+  /* d1 = b - (g3 * g3)			*/
+  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
+  /* g4 = g3 + (d1 * h3)		*/
+  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfxf2 (operands[0], g4));
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/ia64/elf.h b/gcc-4.9/gcc/config/ia64/elf.h
new file mode 100644
index 000000000..375238786
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/elf.h
@@ -0,0 +1,68 @@
+/* Definitions for embedded ia64-elf target.
+
+Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* A C string constant that tells the GCC driver program options to pass to
+   the assembler.  It can also specify how to translate options you give to GNU
+   CC into options for GCC to pass to the assembler.  */
+
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_AS) != 0
+/* GNU AS.  */
+#undef  ASM_EXTRA_SPEC
+#define ASM_EXTRA_SPEC \
+  "%{mno-gnu-as:-N so} %{!mno-gnu-as:-x}"
+#else
+/* Intel ias.  */
+#undef  ASM_SPEC
+#define ASM_SPEC \
+  "%{!mgnu-as:-N so} %{mgnu-as:-x} %{mconstant-gp:-M const_gp}\
+   %{mauto-pic:-M no_plabel}"
+#endif
+
+/* A C string constant that tells the GCC driver program options to pass to
+   the linker.  It can also specify how to translate options you give to GCC
+   into options for GCC to pass to the linker.  */
+
+/* The Intel linker does not support dynamic linking, so we need -dn.
+   The Intel linker gives annoying messages unless -N so is used.  */
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_LD) != 0
+/* GNU LD.  */
+#define LINK_SPEC "%{mno-gnu-ld:-dn -N so}"
+#else
+/* Intel ild.  */
+#define LINK_SPEC "%{!mgnu-ld:-dn -N so}"
+#endif
+
+/* elfos.h does not link with crti.o/crtn.o.  We override elfos.h so
+   that we can use the standard ELF Unix method.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crti.o%s crtbegin.o%s"
+
+/* End of elf.h */
diff --git a/gcc-4.9/gcc/config/ia64/freebsd.h b/gcc-4.9/gcc/config/ia64/freebsd.h
new file mode 100644
index 000000000..505ce8ee0
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/freebsd.h
@@ -0,0 +1,52 @@
+/* Definitions for Intel IA-64 running FreeBSD using the ELF format
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+#define LINK_SPEC "							\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{symbolic:-Bsymbolic}						\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* Earlier headers may get this wrong for FreeBSD.
+   We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#define TARGET_ELF		1
+
+#define JMP_BUF_SIZE  76
diff --git a/gcc-4.9/gcc/config/ia64/hpux-unix2003.h b/gcc-4.9/gcc/config/ia64/hpux-unix2003.h
new file mode 100644
index 000000000..36418be00
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/hpux-unix2003.h
@@ -0,0 +1,8 @@
+
+/* For HP-UX 11.31 and greater, use unix2003.o instead of unix98.o to
+   get correct C99 snprintf behaviour with buffer overflow.  */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:%{static:crt0%O%s} \
+			  %{mlp64:/usr/lib/hpux64/unix2003%O%s} \
+			  %{!mlp64:/usr/lib/hpux32/unix2003%O%s}}"
diff --git a/gcc-4.9/gcc/config/ia64/hpux.h b/gcc-4.9/gcc/config/ia64/hpux.h
new file mode 100644
index 000000000..0261c7096
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/hpux.h
@@ -0,0 +1,234 @@
+/* Definitions of target machine GNU compiler.  IA-64 version.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Steve Ellcey <sje@cup.hp.com> and
+                  Reva Cuthbertson <reva@cup.hp.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable HPUX ABI quirks.  */
+#undef  TARGET_HPUX
+#define TARGET_HPUX 1
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()			\
+do {							\
+	builtin_assert("system=hpux");			\
+	builtin_assert("system=posix");			\
+	builtin_assert("system=unix");			\
+	builtin_define_std("hpux");			\
+	builtin_define_std("unix");			\
+	builtin_define("__IA64__");			\
+	builtin_define("_LONGLONG");			\
+	builtin_define("_INCLUDE_LONGLONG");		\
+	builtin_define("__STDC_EXT__");			\
+	builtin_define("_UINT128_T");			\
+	if (c_dialect_cxx () || !flag_iso)		\
+	  {						\
+	    builtin_define("_HPUX_SOURCE");		\
+	    builtin_define("__STDCPP__");		\
+	    builtin_define("_INCLUDE__STDC_A1_SOURCE");	\
+	  }						\
+	if (TARGET_ILP32)				\
+	  builtin_define("_ILP32");			\
+} while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+  "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}"
+/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD.  These
+   affect only aCC's C++ library (Rogue Wave-derived) which we do not
+   use, and they violate the user's name space.  */
+
+#undef  ASM_EXTRA_SPEC
+#define ASM_EXTRA_SPEC "%{milp32:-milp32} %{mlp64:-mlp64}"
+
+#ifndef USE_GAS
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+#endif
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#endif
+
+#undef ENDFILE_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:%{static:crt0%O%s} \
+			  %{mlp64:/usr/lib/hpux64/unix98%O%s} \
+			  %{!mlp64:/usr/lib/hpux32/unix98%O%s}}"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-z +Accept TypeMismatch \
+   %{shared:-b} \
+   %{!shared: \
+     -u main \
+     %{static:-noshared}}"
+
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared: \
+     %{mt|pthread:%{fopenmp|ftree-parallelize-loops=*:-lrt} -lpthread} \
+     %{p:%{!mlp64:-L/usr/lib/hpux32/libp} \
+	 %{mlp64:-L/usr/lib/hpux64/libp} -lprof} \
+     %{pg:%{!mlp64:-L/usr/lib/hpux32/libp} \
+	  %{mlp64:-L/usr/lib/hpux64/libp} -lgprof} \
+     %{!symbolic:-lc}}"
+
+#define MULTILIB_DEFAULTS { "milp32" }
+
+/* A C expression whose value is zero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and
+   greater then zero if they are zero-extended and less then zero if the
+   ptr_extend instruction should be used.  */
+
+#define POINTERS_EXTEND_UNSIGNED -1
+
+#define JMP_BUF_SIZE  (8 * 76)
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_DWARF2_ASM | MASK_BIG_ENDIAN | MASK_ILP32)
+
+/* ASM_OUTPUT_EXTERNAL_LIBCALL defaults to just a globalize_label call,
+   but that doesn't put out the @function type information which causes
+   shared library problems.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)			\
+do {								\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function");	\
+} while (0)
+
+#undef FUNCTION_ARG_PADDING
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+	ia64_hpux_function_arg_padding ((MODE), (TYPE))
+
+#undef PAD_VARARGS_DOWN
+#define PAD_VARARGS_DOWN (!AGGREGATE_TYPE_P (type))
+
+#define REGISTER_TARGET_PRAGMAS() \
+  c_register_pragma (0, "builtin", ia64_hpux_handle_builtin_pragma)
+
+/* Tell ia64.c that we are using the HP linker and we should delay output of
+   function extern declarations so that we don't output them for functions
+   which are never used (and may not be defined).  */
+
+#undef TARGET_HPUX_LD
+#define TARGET_HPUX_LD	1
+
+/* The HPUX dynamic linker objects to weak symbols with no
+   definitions, so do not use them in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP  "\t.section\t.init_array,\t\"aw\",\"init_array\""
+
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP  "\t.section\t.fini_array,\t\"aw\",\"fini_array\""
+
+/* The init_array/fini_array technique does not permit the use of
+   initialization priorities.  */
+#define SUPPORTS_INIT_PRIORITY 0
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata,\t\"a\",\t\"progbits\""
+
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.section\t.data,\t\"aw\",\t\"progbits\""
+
+#undef SDATA_SECTION_ASM_OP
+#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\t\"asw\",\t\"progbits\""
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss,\t\"aw\",\t\"nobits\""
+
+#undef SBSS_SECTION_ASM_OP
+#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\t\"asw\",\t\"nobits\""
+
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.section\t.text,\t\"ax\",\t\"progbits\""
+
+/* It is illegal to have relocations in shared segments on HPUX.
+   Pretend flag_pic is always set.  */
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK  ia64_hpux_reloc_rw_mask
+
+/* ia64 HPUX has the float and long double forms of math functions.
+   We redefine this hook so the version from elfos.h header won't be used.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_hpux_init_libfuncs
+
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode)
+
+/* Put all *xf routines in libgcc, regardless of long double size.  */
+#undef LIBGCC2_HAS_XF_MODE
+#define LIBGCC2_HAS_XF_MODE 1
+#define XF_SIZE 64
+
+/* Put all *tf routines in libgcc, regardless of long double size.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define TF_SIZE 113
+
+/* HP-UX headers are C++-compatible.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* HP-UX uses PROFILE_HOOK instead of FUNCTION_PROFILER but we need a
+   FUNCTION_PROFILER defined because its use is not ifdefed.  When using
+   PROFILE_HOOK, the profile call comes after the prologue.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) do { } while (0)
+
+#undef PROFILE_HOOK
+#define PROFILE_HOOK(LABEL) ia64_profile_hook (LABEL)
+
+#undef  PROFILE_BEFORE_PROLOGUE
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 0
+
+/* The HP-UX linker has a bug that causes calls from functions in
+   .text.unlikely to functions in .text to cause a segfault.  Until
+   it is fixed, prevent code from being put into .text.unlikely or
+   .text.hot.  */
+
+#define TARGET_ASM_FUNCTION_SECTION ia64_hpux_function_section
+
+#define TARGET_POSIX_IO
+
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Minimum amount of stack required to recover from an anticipated stack
+   overflow detection.  */
+#define STACK_CHECK_PROTECT (24 * 1024)
diff --git a/gcc-4.9/gcc/config/ia64/ia64-c.c b/gcc-4.9/gcc/config/ia64/ia64-c.c
new file mode 100644
index 000000000..bb39fb5ad
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64-c.c
@@ -0,0 +1,191 @@
+/* Definitions of C specific functions for GNU compiler.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Steve Ellcey <sje@cup.hp.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "cpplib.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+
+static void ia64_hpux_add_pragma_builtin (tree func);
+
+void
+ia64_hpux_handle_builtin_pragma (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  /* #pragma builtin name, name, name */
+
+  enum cpp_ttype type;
+  tree x;
+
+  type = pragma_lex (&x);
+  while (type == CPP_NAME)
+    {
+      ia64_hpux_add_pragma_builtin (x);
+      type = pragma_lex (&x);
+      if (type == CPP_COMMA)
+	type = pragma_lex (&x);
+    }
+  if (type != CPP_EOF)
+    warning (OPT_Wpragmas, "malformed #pragma builtin");
+}
+
+/* List of standard math functions which do not set matherr by default
+   and which have a different version which does set errno and which we
+   want to call *if* we have seen an extern for the routine and we have
+   asked for strict C89 compatibility.  */
+
+typedef struct c89_mathlib_names
+{
+        const char *realname; /* User visible function name.  */
+        const char *c89name;  /* libm special name needed to set errno.  */
+} c89_mathlib_names;
+
+static const c89_mathlib_names c89_mathlib_name_list [] =
+{
+	{"acos", "_Acos_e#"},
+	{"acosd", "_Acosd_e#"},
+	{"acosdf", "_Acosdf_e#"},
+	{"acosdl", "_Acosdl_e#"},
+	{"acosdw", "_Acosdw_e#"},
+	{"acosf", "_Acosf_e#"},
+	{"acosh", "_Acosh_e#"},
+	{"acoshf", "_Acoshf_e#"},
+	{"acoshl", "_Acoshl_e#"},
+	{"acoshw", "_Acoshw_e#"},
+	{"acosl", "_Acosl_e#"},
+	{"acosw", "_Acosw_e#"},
+	{"asin", "_Asin_e#"},
+	{"asind", "_Asind_e#"},
+	{"asindf", "_Asindf_e#"},
+	{"asindl", "_Asindl_e#"},
+	{"asindw", "_Asindw_e#"},
+	{"asinf", "_Asinf_e#"},
+	{"asinl", "_Asinl_e#"},
+	{"asinw", "_Asinw_e#"},
+	{"atanh", "_Atanh_e#"},
+	{"atanhf", "_Atanhf_e#"},
+	{"atanhl", "_Atanhl_e#"},
+	{"atanhw", "_Atanhw_e#"},
+	{"cosh", "_Cosh_e#"},
+	{"coshf", "_Coshf_e#"},
+	{"coshl", "_Coshl_e#"},
+	{"coshw", "_Coshw_e#"},
+	{"exp2", "_Exp2_e#"},
+	{"exp2f", "_Exp2f_e#"},
+	{"exp2l", "_Exp2l_e#"},
+	{"exp2w", "_Exp2w_e#"},
+	{"exp", "_Exp_e#"},
+	{"expf", "_Expf_e#"},
+	{"expl", "_Expl_e#"},
+	{"expm1", "_Expm1_e#"},
+	{"expm1f", "_Expm1f_e#"},
+	{"expm1l", "_Expm1l_e#"},
+	{"expm1w", "_Expm1w_e#"},
+	{"expw", "_Expw_e#"},
+	{"fmod", "_Fmod_e#"},
+	{"fmodf", "_Fmodf_e#"},
+	{"fmodl", "_Fmodl_e#"},
+	{"fmodw", "_Fmodw_e#"},
+	{"gamma", "_Gamma_e#"},
+	{"gammaf", "_Gammaf_e#"},
+	{"gammal", "_Gammal_e#"},
+	{"gammaw", "_Gammaw_e#"},
+	{"ldexp", "_Ldexp_e#"},
+	{"ldexpf", "_Ldexpf_e#"},
+	{"ldexpl", "_Ldexpl_e#"},
+	{"ldexpw", "_Ldexpw_e#"},
+	{"lgamma", "_Lgamma_e#"},
+	{"lgammaf", "_Lgammaf_e#"},
+	{"lgammal", "_Lgammal_e#"},
+	{"lgammaw", "_Lgammaw_e#"},
+	{"log10", "_Log10_e#"},
+	{"log10f", "_Log10f_e#"},
+	{"log10l", "_Log10l_e#"},
+	{"log10w", "_Log10w_e#"},
+	{"log1p", "_Log1p_e#"},
+	{"log1pf", "_Log1pf_e#"},
+	{"log1pl", "_Log1pl_e#"},
+	{"log1pw", "_Log1pw_e#"},
+	{"log2", "_Log2_e#"},
+	{"log2f", "_Log2f_e#"},
+	{"log2l", "_Log2l_e#"},
+	{"log2w", "_Log2w_e#"},
+	{"log", "_Log_e#"},
+	{"logb", "_Logb_e#"},
+	{"logbf", "_Logbf_e#"},
+	{"logbl", "_Logbl_e#"},
+	{"logbw", "_Logbw_e#"},
+	{"logf", "_Logf_e#"},
+	{"logl", "_Logl_e#"},
+	{"logw", "_Logw_e#"},
+	{"nextafter", "_Nextafter_e#"},
+	{"nextafterf", "_Nextafterf_e#"},
+	{"nextafterl", "_Nextafterl_e#"},
+	{"nextafterw", "_Nextafterw_e#"},
+	{"pow", "_Pow_e#"},
+	{"powf", "_Powf_e#"},
+	{"powl", "_Powl_e#"},
+	{"poww", "_Poww_e#"},
+	{"remainder", "_Remainder_e#"},
+	{"remainderf", "_Remainderf_e#"},
+	{"remainderl", "_Remainderl_e#"},
+	{"remainderw", "_Remainderw_e#"},
+	{"scalb", "_Scalb_e#"},
+	{"scalbf", "_Scalbf_e#"},
+	{"scalbl", "_Scalbl_e#"},
+	{"scalbw", "_Scalbw_e#"},
+	{"sinh", "_Sinh_e#"},
+	{"sinhf", "_Sinhf_e#"},
+	{"sinhl", "_Sinhl_e#"},
+	{"sinhw", "_Sinhw_e#"},
+	{"sqrt", "_Sqrt_e#"},
+	{"sqrtf", "_Sqrtf_e#"},
+	{"sqrtl", "_Sqrtl_e#"},
+	{"sqrtw", "_Sqrtw_e#"},
+	{"tgamma", "_Tgamma_e#"},
+	{"tgammaf", "_Tgammaf_e#"},
+	{"tgammal", "_Tgammal_e#"},
+	{"tgammaw", "_Tgammaw_e#"}
+};
+
+static void
+ia64_hpux_add_pragma_builtin (tree func)
+{
+  size_t i;
+
+  if (!flag_isoc94 && flag_iso)
+    {
+	for (i = 0; i < ARRAY_SIZE (c89_mathlib_name_list); i++)
+	  {
+	    if (!strcmp(c89_mathlib_name_list[i].realname,
+			IDENTIFIER_POINTER (func)))
+	      {
+		add_to_renaming_pragma_list(func,
+			get_identifier(c89_mathlib_name_list[i].c89name));
+	      }
+	  }
+    }
+}
diff --git a/gcc-4.9/gcc/config/ia64/ia64-modes.def b/gcc-4.9/gcc/config/ia64/ia64-modes.def
new file mode 100644
index 000000000..0aa29b1a4
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64-modes.def
@@ -0,0 +1,86 @@
+/* Definitions of target machine GNU compiler.  IA-64 version.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+   		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* IA64 requires both XF and TF modes.
+   XFmode is __float80 is IEEE extended; TFmode is __float128
+   is IEEE quad.  Both these modes occupy 16 bytes, but XFmode
+   only has 80 significant bits.  RFmode is __fpreg is IA64 internal
+   register format with 82 significant bits but otherwise handled like
+   XFmode.  */
+
+FRACTIONAL_FLOAT_MODE (XF, 80, 16, ieee_extended_intel_128_format);
+FRACTIONAL_FLOAT_MODE (RF, 82, 16, ieee_extended_intel_128_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* The above produces:
+
+   mode	  ILP32 size/align	LP64 size/align
+   XF	  16/16			16/16
+   TF	  16/16			16/16
+
+   psABI expectations:
+
+   mode   ILP32 size/align	LP64 size/align
+   XF	  12/4			-
+   TF	  -			-
+
+   HPUX expectations:
+
+   mode	  ILP32 size/align	LP64 size/align
+   XF	  -			-
+   TF	  16/8			-
+
+   We fix this up here.  */
+
+ADJUST_FLOAT_FORMAT (XF, (TARGET_ILP32 && !TARGET_HPUX)
+			 ? &ieee_extended_intel_96_format
+			 : &ieee_extended_intel_128_format);
+ADJUST_BYTESIZE  (XF, (TARGET_ILP32 && !TARGET_HPUX) ? 12 : 16);
+ADJUST_ALIGNMENT (XF, (TARGET_ILP32 && !TARGET_HPUX) ?  4 : 16);
+
+ADJUST_FLOAT_FORMAT (RF, (TARGET_ILP32 && !TARGET_HPUX)
+			 ? &ieee_extended_intel_96_format
+			 : &ieee_extended_intel_128_format);
+ADJUST_BYTESIZE  (RF, (TARGET_ILP32 && !TARGET_HPUX) ? 12 : 16);
+ADJUST_ALIGNMENT (RF, (TARGET_ILP32 && !TARGET_HPUX) ?  4 : 16);
+
+ADJUST_ALIGNMENT (TF, (TARGET_ILP32 &&  TARGET_HPUX) ?  8 : 16);
+
+/* 256-bit integer mode is needed for STACK_SAVEAREA_MODE.  */
+INT_MODE (OI, 32);
+
+/* Add any extra modes needed to represent the condition code.
+
+   CCImode is used to mark a single predicate register instead
+   of a register pair.  This is currently only used in reg_raw_mode
+   so that flow doesn't do something stupid.  */
+
+CC_MODE (CCI);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);		/* V4QI V2HI */
+VECTOR_MODES (INT, 8);		/* V8QI V4HI V2SI */
+VECTOR_MODE (INT, QI, 16);
+VECTOR_MODE (INT, HI, 8);
+VECTOR_MODE (INT, SI, 4);
+VECTOR_MODE (FLOAT, SF, 2);
+VECTOR_MODE (FLOAT, SF, 4);
+
diff --git a/gcc-4.9/gcc/config/ia64/ia64-opts.h b/gcc-4.9/gcc/config/ia64/ia64-opts.h
new file mode 100644
index 000000000..3e0703414
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64-opts.h
@@ -0,0 +1,34 @@
+/* Definitions for option handling for IA-64.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef IA64_OPTS_H
+#define IA64_OPTS_H
+
+/* Which processor to schedule for. The cpu attribute defines a list
+   that mirrors this list, so changes to ia64.md must be made at the
+   same time.  */
+
+enum processor_type
+{
+  PROCESSOR_ITANIUM,			/* Original Itanium.  */
+  PROCESSOR_ITANIUM2,
+  PROCESSOR_max
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/ia64/ia64-protos.h b/gcc-4.9/gcc/config/ia64/ia64-protos.h
new file mode 100644
index 000000000..35fee49cc
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64-protos.h
@@ -0,0 +1,104 @@
+/* Definitions of target machine for GNU compiler for IA-64.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Shared between the driver and cc1.  */
+extern enum unwind_info_type ia64_except_unwind_info (struct gcc_options *);
+
+/* Functions defined in ia64.c */
+
+extern int bundling_p;
+#ifdef RTX_CODE
+extern int ia64_st_address_bypass_p (rtx, rtx);
+extern int ia64_ld_address_bypass_p (rtx, rtx);
+extern int ia64_produce_address_p (rtx);
+
+extern rtx ia64_expand_move (rtx, rtx);
+extern int ia64_move_ok (rtx, rtx);
+extern int ia64_load_pair_ok (rtx, rtx);
+extern int addp4_optimize_ok (rtx, rtx);
+extern void ia64_emit_cond_move (rtx, rtx, rtx);
+extern int ia64_depz_field_mask (rtx, rtx);
+extern void ia64_split_tmode_move (rtx[]);
+extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
+extern void ia64_expand_compare (rtx *, rtx *, rtx *);
+extern void ia64_expand_vecint_cmov (rtx[]);
+extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
+extern void ia64_expand_unpack (rtx [], bool, bool);
+extern void ia64_expand_widen_sum (rtx[], bool);
+extern void ia64_expand_call (rtx, rtx, rtx, int);
+extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
+extern void ia64_reload_gp (void);
+extern void ia64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx,
+				   enum memmodel);
+
+extern HOST_WIDE_INT ia64_initial_elimination_offset (int, int);
+extern void ia64_expand_prologue (void);
+extern void ia64_expand_epilogue (int);
+
+extern int ia64_direct_return (void);
+extern bool ia64_expand_load_address (rtx, rtx);
+extern int ia64_hard_regno_rename_ok (int, int);
+
+extern enum reg_class ia64_secondary_reload_class (enum reg_class,
+						   enum machine_mode, rtx);
+extern const char *get_bundle_name (int);
+extern const char *output_probe_stack_range (rtx, rtx);
+
+extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int);
+extern bool ia64_expand_vec_perm_const (rtx op[4]);
+extern void ia64_expand_vec_setv2sf (rtx op[3]);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+#ifdef RTX_CODE
+extern rtx ia64_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+extern rtx ia64_va_arg (tree, tree);
+#endif /* RTX_CODE */
+
+extern void ia64_asm_output_external (FILE *, tree, const char *);
+extern void ia64_vms_output_aligned_decl_common (FILE *, tree, const char *,
+						 unsigned HOST_WIDE_INT,
+						 unsigned int);
+extern void ia64_vms_elf_asm_named_section (const char *, unsigned int, tree);
+extern void ia64_start_function (FILE *, const char *, tree);
+#endif /* TREE_CODE */
+
+extern int ia64_epilogue_uses (int);
+extern int ia64_eh_uses (int);
+extern void emit_safe_across_calls (void);
+extern void ia64_init_builtins (void);
+extern int ia64_dbx_register_number (int);
+
+extern rtx ia64_return_addr_rtx (HOST_WIDE_INT, rtx);
+extern void ia64_split_return_addr_rtx (rtx);
+
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction'.  */
+extern enum direction ia64_hpux_function_arg_padding (enum machine_mode, const_tree);
+#endif /* ARGS_SIZE_RTX */
+
+extern void ia64_hpux_handle_builtin_pragma (struct cpp_reader *);
+extern void ia64_output_function_profiler (FILE *, int);
+extern void ia64_profile_hook (int);
+
+extern void ia64_init_expanders (void);
+
+extern rtx ia64_dconst_0_5 (void);
+extern rtx ia64_dconst_0_375 (void);
diff --git a/gcc-4.9/gcc/config/ia64/ia64.c b/gcc-4.9/gcc/config/ia64/ia64.c
new file mode 100644
index 000000000..41adc4adc
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64.c
@@ -0,0 +1,11762 @@
+/* Definitions of target machine for GNU compiler.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "libfuncs.h"
+#include "diagnostic-core.h"
+#include "sched-int.h"
+#include "timevar.h"
+#include "target.h"
+#include "target-def.h"
+#include "common/common-target.h"
+#include "tm_p.h"
+#include "hash-table.h"
+#include "langhooks.h"
+#include "pointer-set.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "intl.h"
+#include "df.h"
+#include "debug.h"
+#include "params.h"
+#include "dbgcnt.h"
+#include "tm-constrs.h"
+#include "sel-sched.h"
+#include "reload.h"
+#include "opts.h"
+#include "dumpfile.h"
+
+/* This is used for communication between ASM_OUTPUT_LABEL and
+   ASM_OUTPUT_LABELREF.  */
+int ia64_asm_output_label = 0;
+
+/* Register names for ia64_expand_prologue.  */
+static const char * const ia64_reg_numbers[96] =
+{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
+  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
+  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
+  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
+  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
+  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
+  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
+  "r104","r105","r106","r107","r108","r109","r110","r111",
+  "r112","r113","r114","r115","r116","r117","r118","r119",
+  "r120","r121","r122","r123","r124","r125","r126","r127"};
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+static const char * const ia64_input_reg_names[8] =
+{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+static const char * const ia64_local_reg_names[80] =
+{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
+  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
+  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
+  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
+  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
+  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
+  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
+  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
+  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
+  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+static const char * const ia64_output_reg_names[8] =
+{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
+
+/* Variables which are this size or smaller are put in the sdata/sbss
+   sections.  */
+
+unsigned int ia64_section_threshold;
+
+/* The following variable is used by the DFA insn scheduler.  The value is
+   TRUE if we do insn bundling instead of insn scheduling.  */
+int bundling_p = 0;
+
+enum ia64_frame_regs
+{
+   reg_fp,
+   reg_save_b0,
+   reg_save_pr,
+   reg_save_ar_pfs,
+   reg_save_ar_unat,
+   reg_save_ar_lc,
+   reg_save_gp,
+   number_of_ia64_frame_regs
+};
+
+/* Structure to be filled in by ia64_compute_frame_size with register
+   save masks and offsets for the current function.  */
+
+struct ia64_frame_info
+{
+  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
+				   the caller's scratch area.  */
+  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
+  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
+  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
+  HARD_REG_SET mask;		/* mask of saved registers.  */
+  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
+				   registers or long-term scratches.  */
+  int n_spilled;		/* number of spilled registers.  */
+  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
+  int n_input_regs;		/* number of input registers used.  */
+  int n_local_regs;		/* number of local registers used.  */
+  int n_output_regs;		/* number of output registers used.  */
+  int n_rotate_regs;		/* number of rotating registers used.  */
+
+  char need_regstk;		/* true if a .regstk directive needed.  */
+  char initialized;		/* true if the data is finalized.  */
+};
+
+/* Current frame information calculated by ia64_compute_frame_size.  */
+static struct ia64_frame_info current_frame_info;
+/* The actual registers that are emitted.  */
+static int emitted_frame_related_regs[number_of_ia64_frame_regs];
+
+static int ia64_first_cycle_multipass_dfa_lookahead (void);
+static void ia64_dependencies_evaluation_hook (rtx, rtx);
+static void ia64_init_dfa_pre_cycle_insn (void);
+static rtx ia64_dfa_pre_cycle_insn (void);
+static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
+static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
+static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
+static void ia64_h_i_d_extended (void);
+static void * ia64_alloc_sched_context (void);
+static void ia64_init_sched_context (void *, bool);
+static void ia64_set_sched_context (void *);
+static void ia64_clear_sched_context (void *);
+static void ia64_free_sched_context (void *);
+static int ia64_mode_to_int (enum machine_mode);
+static void ia64_set_sched_flags (spec_info_t);
+static ds_t ia64_get_insn_spec_ds (rtx);
+static ds_t ia64_get_insn_checked_ds (rtx);
+static bool ia64_skip_rtx_p (const_rtx);
+static int ia64_speculate_insn (rtx, ds_t, rtx *);
+static bool ia64_needs_block_p (ds_t);
+static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
+static int ia64_spec_check_p (rtx);
+static int ia64_spec_check_src_p (rtx);
+static rtx gen_tls_get_addr (void);
+static rtx gen_thread_pointer (void);
+static int find_gr_spill (enum ia64_frame_regs, int);
+static int next_scratch_gr_reg (void);
+static void mark_reg_gr_used_mask (rtx, void *);
+static void ia64_compute_frame_size (HOST_WIDE_INT);
+static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
+static void finish_spill_pointers (void);
+static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
+static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
+static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
+static rtx gen_movdi_x (rtx, rtx, rtx);
+static rtx gen_fr_spill_x (rtx, rtx, rtx);
+static rtx gen_fr_restore_x (rtx, rtx, rtx);
+
+static void ia64_option_override (void);
+static bool ia64_can_eliminate (const int, const int);
+static enum machine_mode hfa_element_mode (const_tree, bool);
+static void ia64_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+					 tree, int *, int);
+static int ia64_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				   tree, bool);
+static rtx ia64_function_arg_1 (cumulative_args_t, enum machine_mode,
+				const_tree, bool, bool);
+static rtx ia64_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static rtx ia64_function_incoming_arg (cumulative_args_t,
+				       enum machine_mode, const_tree, bool);
+static void ia64_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static unsigned int ia64_function_arg_boundary (enum machine_mode,
+						const_tree);
+static bool ia64_function_ok_for_sibcall (tree, tree);
+static bool ia64_return_in_memory (const_tree, const_tree);
+static rtx ia64_function_value (const_tree, const_tree, bool);
+static rtx ia64_libcall_value (enum machine_mode, const_rtx);
+static bool ia64_function_value_regno_p (const unsigned int);
+static int ia64_register_move_cost (enum machine_mode, reg_class_t,
+                                    reg_class_t);
+static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
+				  bool);
+static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
+static int ia64_unspec_may_trap_p (const_rtx, unsigned);
+static void fix_range (const char *);
+static struct machine_function * ia64_init_machine_status (void);
+static void emit_insn_group_barriers (FILE *);
+static void emit_all_insn_group_barriers (FILE *);
+static void final_emit_insn_group_barriers (FILE *);
+static void emit_predicate_relation_info (void);
+static void ia64_reorg (void);
+static bool ia64_in_small_data_p (const_tree);
+static void process_epilogue (FILE *, rtx, bool, bool);
+
+static bool ia64_assemble_integer (rtx, unsigned int, int);
+static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void ia64_output_function_end_prologue (FILE *);
+
+static void ia64_print_operand (FILE *, rtx, int);
+static void ia64_print_operand_address (FILE *, rtx);
+static bool ia64_print_operand_punct_valid_p (unsigned char code);
+
+static int ia64_issue_rate (void);
+static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
+static void ia64_sched_init (FILE *, int, int);
+static void ia64_sched_init_global (FILE *, int, int);
+static void ia64_sched_finish_global (FILE *, int);
+static void ia64_sched_finish (FILE *, int);
+static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
+static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
+static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
+static int ia64_variable_issue (FILE *, int, rtx, int);
+
+static void ia64_asm_unwind_emit (FILE *, rtx);
+static void ia64_asm_emit_except_personality (rtx);
+static void ia64_asm_init_sections (void);
+
+static enum unwind_info_type ia64_debug_unwind_info (void);
+
+static struct bundle_state *get_free_bundle_state (void);
+static void free_bundle_state (struct bundle_state *);
+static void initiate_bundle_states (void);
+static void finish_bundle_states (void);
+static int insert_bundle_state (struct bundle_state *);
+static void initiate_bundle_state_table (void);
+static void finish_bundle_state_table (void);
+static int try_issue_nops (struct bundle_state *, int);
+static int try_issue_insn (struct bundle_state *, rtx);
+static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
+static int get_max_pos (state_t);
+static int get_template (state_t, int);
+
+static rtx get_next_important_insn (rtx, rtx);
+static bool important_for_bundling_p (rtx);
+static bool unknown_for_bundling_p (rtx);
+static void bundling (FILE *, int, rtx, rtx);
+
+static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				  HOST_WIDE_INT, tree);
+static void ia64_file_start (void);
+static void ia64_globalize_decl_name (FILE *, tree);
+
+static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
+static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
+static section *ia64_select_rtx_section (enum machine_mode, rtx,
+					 unsigned HOST_WIDE_INT);
+static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
+     ATTRIBUTE_UNUSED;
+static unsigned int ia64_section_type_flags (tree, const char *, int);
+static void ia64_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_hpux_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_sysv4_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_vms_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_soft_fp_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
+     ATTRIBUTE_UNUSED;
+static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
+     ATTRIBUTE_UNUSED;
+
+static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
+static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
+static void ia64_encode_section_info (tree, rtx, int);
+static rtx ia64_struct_value_rtx (tree, int);
+static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
+static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
+static bool ia64_vector_mode_supported_p (enum machine_mode mode);
+static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
+static bool ia64_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
+static const char *ia64_mangle_type (const_tree);
+static const char *ia64_invalid_conversion (const_tree, const_tree);
+static const char *ia64_invalid_unary_op (int, const_tree);
+static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
+static enum machine_mode ia64_c_mode_for_suffix (char);
+static void ia64_trampoline_init (rtx, tree, rtx);
+static void ia64_override_options_after_change (void);
+static bool ia64_member_type_forces_blk (const_tree, enum machine_mode);
+
+static tree ia64_builtin_decl (unsigned, bool);
+
+static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
+static enum machine_mode ia64_get_reg_raw_mode (int regno);
+static section * ia64_hpux_function_section (tree, enum node_frequency,
+					     bool, bool);
+
+static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+					      const unsigned char *sel);
+
+#define MAX_VECT_LEN	8
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool one_operand_p;
+  bool testing_p; 
+};
+
+static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
+
+/* Table of valid machine attributes.  */
+static const struct attribute_spec ia64_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "syscall_linkage", 0, 0, false, true,  true,  NULL, false },
+  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute,
+    false },
+#if TARGET_ABI_OPEN_VMS
+  { "common_object",   1, 1, true, false, false,
+    ia64_vms_common_object_attribute, false },
+#endif
+  { "version_id",      1, 1, true, false, false,
+    ia64_handle_version_id_attribute, false },
+  { NULL,	       0, 0, false, false, false, NULL, false }
+};
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ia64_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
+
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL ia64_builtin_decl
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP "\tdata1\t"
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER ia64_assemble_integer
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE ia64_option_override
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND ia64_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
+
+#undef TARGET_SCHED_ADJUST_COST_2
+#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT ia64_sched_init
+#undef TARGET_SCHED_FINISH
+#define TARGET_SCHED_FINISH ia64_sched_finish
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER ia64_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
+
+#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
+#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
+#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
+  ia64_first_cycle_multipass_dfa_lookahead_guard
+
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
+
+#undef TARGET_SCHED_H_I_D_EXTENDED
+#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
+
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
+
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
+
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
+
+#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
+#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
+
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
+
+#undef TARGET_SCHED_SET_SCHED_FLAGS
+#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
+
+#undef TARGET_SCHED_GET_INSN_SPEC_DS
+#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
+
+#undef TARGET_SCHED_GET_INSN_CHECKED_DS
+#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
+
+#undef TARGET_SCHED_SPECULATE_INSN
+#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
+
+#undef TARGET_SCHED_NEEDS_BLOCK_P
+#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
+
+#undef TARGET_SCHED_GEN_SPEC_CHECK
+#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
+  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
+
+#undef TARGET_SCHED_SKIP_RTX_P
+#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG ia64_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START ia64_file_start
+
+#undef TARGET_ASM_GLOBALIZE_DECL_NAME
+#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ia64_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_UNSPEC_MAY_TRAP_P
+#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
+
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
+#endif
+
+/* ??? Investigate.  */
+#if 0
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
+#endif
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ia64_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE ia64_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_GET_RAW_RESULT_MODE
+#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
+#undef TARGET_GET_RAW_ARG_MODE
+#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
+
+#undef TARGET_MEMBER_TYPE_FORCES_BLK
+#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
+
+#undef TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
+
+#undef TARGET_DEBUG_UNWIND_INFO
+#define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
+
+/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
+   in an order different from the specified program order.  */
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING true
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ia64_mangle_type
+
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE ia64_can_eliminate
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
+
+#undef TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+typedef enum
+  {
+    ADDR_AREA_NORMAL,	/* normal address area */
+    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
+  }
+ia64_addr_area;
+
+static GTY(()) tree small_ident1;
+static GTY(()) tree small_ident2;
+
+static void
+init_idents (void)
+{
+  if (small_ident1 == 0)
+    {
+      small_ident1 = get_identifier ("small");
+      small_ident2 = get_identifier ("__small__");
+    }
+}
+
+/* Retrieve the address area that has been chosen for the given decl.  */
+
+static ia64_addr_area
+ia64_get_addr_area (tree decl)
+{
+  tree model_attr;
+
+  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
+  if (model_attr)
+    {
+      tree id;
+
+      init_idents ();
+      id = TREE_VALUE (TREE_VALUE (model_attr));
+      if (id == small_ident1 || id == small_ident2)
+	return ADDR_AREA_SMALL;
+    }
+  return ADDR_AREA_NORMAL;
+}
+
+static tree
+ia64_handle_model_attribute (tree *node, tree name, tree args,
+			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
+  ia64_addr_area area;
+  tree arg, decl = *node;
+
+  init_idents ();
+  arg = TREE_VALUE (args);
+  if (arg == small_ident1 || arg == small_ident2)
+    {
+      addr_area = ADDR_AREA_SMALL;
+    }
+  else
+    {
+      warning (OPT_Wattributes, "invalid argument of %qE attribute",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  switch (TREE_CODE (decl))
+    {
+    case VAR_DECL:
+      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
+	   == FUNCTION_DECL)
+	  && !TREE_STATIC (decl))
+	{
+	  error_at (DECL_SOURCE_LOCATION (decl),
+		    "an address area attribute cannot be specified for "
+		    "local variables");
+	  *no_add_attrs = true;
+	}
+      area = ia64_get_addr_area (decl);
+      if (area != ADDR_AREA_NORMAL && addr_area != area)
+	{
+	  error ("address area of %q+D conflicts with previous "
+		 "declaration", decl);
+	  *no_add_attrs = true;
+	}
+      break;
+
+    case FUNCTION_DECL:
+      error_at (DECL_SOURCE_LOCATION (decl),
+		"address area attribute cannot be specified for "
+		"functions");
+      *no_add_attrs = true;
+      break;
+
+    default:
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* Part of the low level implementation of DEC Ada pragma Common_Object which
+   enables the shared use of variables stored in overlaid linker areas
+   corresponding to the use of Fortran COMMON.  */
+
+static tree
+ia64_vms_common_object_attribute (tree *node, tree name, tree args,
+				  int flags ATTRIBUTE_UNUSED,
+				  bool *no_add_attrs)
+{
+    tree decl = *node;
+    tree id;
+
+    gcc_assert (DECL_P (decl));
+  
+    DECL_COMMON (decl) = 1;
+    id = TREE_VALUE (args);
+    if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
+      {
+	error ("%qE attribute requires a string constant argument", name);
+	*no_add_attrs = true;
+	return NULL_TREE;
+      }
+    return NULL_TREE;
+}
+
+/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
+
+void
+ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
+				     unsigned HOST_WIDE_INT size,
+				     unsigned int align)
+{
+  tree attr = DECL_ATTRIBUTES (decl);
+
+  if (attr)
+    attr = lookup_attribute ("common_object", attr);
+  if (attr)
+    {
+      tree id = TREE_VALUE (TREE_VALUE (attr));
+      const char *name;
+
+      if (TREE_CODE (id) == IDENTIFIER_NODE)
+        name = IDENTIFIER_POINTER (id);
+      else if (TREE_CODE (id) == STRING_CST)
+        name = TREE_STRING_POINTER (id);
+      else
+        abort ();
+
+      fprintf (file, "\t.vms_common\t\"%s\",", name);
+    }
+  else
+    fprintf (file, "%s", COMMON_ASM_OP);
+
+  /*  Code from elfos.h.  */
+  assemble_name (file, name);
+  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u",
+           size, align / BITS_PER_UNIT);
+
+  fputc ('\n', file);
+}
+
+static void
+ia64_encode_addr_area (tree decl, rtx symbol)
+{
+  int flags;
+
+  flags = SYMBOL_REF_FLAGS (symbol);
+  switch (ia64_get_addr_area (decl))
+    {
+    case ADDR_AREA_NORMAL: break;
+    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
+    default: gcc_unreachable ();
+    }
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+static void
+ia64_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  /* Careful not to prod global register variables.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && GET_CODE (DECL_RTL (decl)) == MEM
+      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
+      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    ia64_encode_addr_area (decl, XEXP (rtl, 0));
+}
+
+/* Return 1 if the operands of a move are ok.  */
+
+int
+ia64_move_ok (rtx dst, rtx src)
+{
+  /* If we're under init_recog_no_volatile, we'll not be able to use
+     memory_operand.  So check the code directly and don't worry about
+     the validity of the underlying address, which should have been
+     checked elsewhere anyway.  */
+  if (GET_CODE (dst) != MEM)
+    return 1;
+  if (GET_CODE (src) == MEM)
+    return 0;
+  if (register_operand (src, VOIDmode))
+    return 1;
+
+  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
+  if (INTEGRAL_MODE_P (GET_MODE (dst)))
+    return src == const0_rtx;
+  else
+    return satisfies_constraint_G (src);
+}
+
+/* Return 1 if the operands are ok for a floating point load pair.  */
+
+int
+ia64_load_pair_ok (rtx dst, rtx src)
+{
+  /* ??? There is a thinko in the implementation of the "x" constraint and the
+     FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
+     also return false for it.  */
+  if (GET_CODE (dst) != REG
+      || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
+    return 0;
+  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
+    return 0;
+  switch (GET_CODE (XEXP (src, 0)))
+    {
+    case REG:
+    case POST_INC:
+      break;
+    case POST_DEC:
+      return 0;
+    case POST_MODIFY:
+      {
+	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
+
+	if (GET_CODE (adjust) != CONST_INT
+	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
+	  return 0;
+      }
+      break;
+    default:
+      abort ();
+    }
+  return 1;
+}
+
+int
+addp4_optimize_ok (rtx op1, rtx op2)
+{
+  return (basereg_operand (op1, GET_MODE(op1)) !=
+	  basereg_operand (op2, GET_MODE(op2)));
+}
+
+/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
+   Return the length of the field, or <= 0 on failure.  */
+
+int
+ia64_depz_field_mask (rtx rop, rtx rshift)
+{
+  unsigned HOST_WIDE_INT op = INTVAL (rop);
+  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
+
+  /* Get rid of the zero bits we're shifting in.  */
+  op >>= shift;
+
+  /* We must now have a solid block of 1's at bit 0.  */
+  return exact_log2 (op + 1);
+}
+
+/* Return the TLS model to use for ADDR.  */
+
+static enum tls_model
+tls_symbolic_operand_type (rtx addr)
+{
+  enum tls_model tls_kind = TLS_MODEL_NONE;
+
+  if (GET_CODE (addr) == CONST)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
+        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
+
+  return tls_kind;
+}
+
+/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
+   as a base register.  */
+
+static inline bool
+ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
+{
+  if (strict
+      && REGNO_OK_FOR_BASE_P (REGNO (reg)))
+    return true;
+  else if (!strict
+	   && (GENERAL_REGNO_P (REGNO (reg))
+	       || !HARD_REGISTER_P (reg)))
+    return true;
+  else
+    return false;
+}
+
+static bool
+ia64_legitimate_address_reg (const_rtx reg, bool strict)
+{
+  if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
+      || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
+	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
+    return true;
+
+  return false;
+}
+
+static bool
+ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
+{
+  if (GET_CODE (disp) == PLUS
+      && rtx_equal_p (reg, XEXP (disp, 0))
+      && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
+	  || (CONST_INT_P (XEXP (disp, 1))
+	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+ia64_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x, bool strict)
+{
+  if (ia64_legitimate_address_reg (x, strict))
+    return true;
+  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
+	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
+	   && XEXP (x, 0) != arg_pointer_rtx) 
+    return true;
+  else if (GET_CODE (x) == POST_MODIFY
+	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
+	   && XEXP (x, 0) != arg_pointer_rtx
+	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
+    return true;
+  else
+    return false;
+}
+
+/* Return true if X is a constant that is valid for some immediate
+   field in an instruction.  */
+
+static bool
+ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case LABEL_REF:
+      return true;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
+	return true;
+      return satisfies_constraint_G (x);
+
+    case CONST:
+    case SYMBOL_REF:
+      /* ??? Short term workaround for PR 28490.  We must make the code here
+	 match the code in ia64_expand_move and move_operand, even though they
+	 are both technically wrong.  */
+      if (tls_symbolic_operand_type (x) == 0)
+	{
+	  HOST_WIDE_INT addend = 0;
+	  rtx op = x;
+
+	  if (GET_CODE (op) == CONST
+	      && GET_CODE (XEXP (op, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
+	    {
+	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
+	      op = XEXP (XEXP (op, 0), 0);
+	    }
+
+          if (any_offset_symbol_operand (op, mode)
+              || function_operand (op, mode))
+            return true;
+	  if (aligned_offset_symbol_operand (op, mode))
+	    return (addend & 0x3fff) == 0;
+	  return false;
+	}
+      return false;
+
+    case CONST_VECTOR:
+      if (mode == V2SFmode)
+	return satisfies_constraint_Y (x);
+
+      return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	      && GET_MODE_SIZE (mode) <= 8);
+
+    default:
+      return false;
+    }
+}
+
+/* Don't allow TLS addresses to get spilled to memory.  */
+
+static bool
+ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
+{
+  if (mode == RFmode)
+    return true;
+  return tls_symbolic_operand_type (x) != 0;
+}
+
+/* Expand a symbolic constant load.  */
+
+bool
+ia64_expand_load_address (rtx dest, rtx src)
+{
+  gcc_assert (GET_CODE (dest) == REG);
+
+  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
+     having to pointer-extend the value afterward.  Other forms of address
+     computation below are also more natural to compute as 64-bit quantities.
+     If we've been given an SImode destination register, change it.  */
+  if (GET_MODE (dest) != Pmode)
+    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
+			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
+
+  if (TARGET_NO_PIC)
+    return false;
+  if (small_addr_symbolic_operand (src, VOIDmode))
+    return false;
+
+  if (TARGET_AUTO_PIC)
+    emit_insn (gen_load_gprel64 (dest, src));
+  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
+    emit_insn (gen_load_fptr (dest, src));
+  else if (sdata_symbolic_operand (src, VOIDmode))
+    emit_insn (gen_load_gprel (dest, src));
+  else
+    {
+      HOST_WIDE_INT addend = 0;
+      rtx tmp;
+
+      /* We did split constant offsets in ia64_expand_move, and we did try
+	 to keep them split in move_operand, but we also allowed reload to
+	 rematerialize arbitrary constants rather than spill the value to
+	 the stack and reload it.  So we have to be prepared here to split
+	 them apart again.  */
+      if (GET_CODE (src) == CONST)
+	{
+	  HOST_WIDE_INT hi, lo;
+
+	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
+	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
+	  hi = hi - lo;
+
+	  if (lo != 0)
+	    {
+	      addend = lo;
+	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
+	    }
+	}
+
+      tmp = gen_rtx_HIGH (Pmode, src);
+      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+
+      tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+
+      if (addend)
+	{
+	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+	}
+    }
+
+  return true;
+}
+
+static GTY(()) rtx gen_tls_tga;
+static rtx
+gen_tls_get_addr (void)
+{
+  if (!gen_tls_tga)
+    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
+  return gen_tls_tga;
+}
+
+static GTY(()) rtx thread_pointer_rtx;
+static rtx
+gen_thread_pointer (void)
+{
+  if (!thread_pointer_rtx)
+    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
+  return thread_pointer_rtx;
+}
+
+static rtx
+ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
+			 rtx orig_op1, HOST_WIDE_INT addend)
+{
+  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
+  rtx orig_op0 = op0;
+  HOST_WIDE_INT addend_lo, addend_hi;
+
+  switch (tls_kind)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      start_sequence ();
+
+      tga_op1 = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_dtpmod (tga_op1, op1));
+
+      tga_op2 = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_dtprel (tga_op2, op1));
+
+      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
+					 LCT_CONST, Pmode, 2, tga_op1,
+					 Pmode, tga_op2, Pmode);
+
+      insns = get_insns ();
+      end_sequence ();
+
+      if (GET_MODE (op0) != Pmode)
+	op0 = tga_ret;
+      emit_libcall_block (insns, op0, tga_ret, op1);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      /* ??? This isn't the completely proper way to do local-dynamic
+	 If the call to __tls_get_addr is used only by a single symbol,
+	 then we should (somehow) move the dtprel to the second arg
+	 to avoid the extra add.  */
+      start_sequence ();
+
+      tga_op1 = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_dtpmod (tga_op1, op1));
+
+      tga_op2 = const0_rtx;
+
+      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
+					 LCT_CONST, Pmode, 2, tga_op1,
+					 Pmode, tga_op2, Pmode);
+
+      insns = get_insns ();
+      end_sequence ();
+
+      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				UNSPEC_LD_BASE);
+      tmp = gen_reg_rtx (Pmode);
+      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
+
+      if (!register_operand (op0, Pmode))
+	op0 = gen_reg_rtx (Pmode);
+      if (TARGET_TLS64)
+	{
+	  emit_insn (gen_load_dtprel (op0, op1));
+	  emit_insn (gen_adddi3 (op0, tmp, op0));
+	}
+      else
+	emit_insn (gen_add_dtprel (op0, op1, tmp));
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+      addend_hi = addend - addend_lo;
+
+      op1 = plus_constant (Pmode, op1, addend_hi);
+      addend = addend_lo;
+
+      tmp = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_tprel (tmp, op1));
+
+      if (!register_operand (op0, Pmode))
+	op0 = gen_reg_rtx (Pmode);
+      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      if (!register_operand (op0, Pmode))
+	op0 = gen_reg_rtx (Pmode);
+
+      op1 = orig_op1;
+      addend = 0;
+      if (TARGET_TLS64)
+	{
+	  emit_insn (gen_load_tprel (op0, op1));
+	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
+	}
+      else
+	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (addend)
+    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
+			       orig_op0, 1, OPTAB_DIRECT);
+  if (orig_op0 == op0)
+    return NULL_RTX;
+  if (GET_MODE (orig_op0) == Pmode)
+    return op0;
+  return gen_lowpart (GET_MODE (orig_op0), op0);
+}
+
+rtx
+ia64_expand_move (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+
+  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
+    op1 = force_reg (mode, op1);
+
+  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
+    {
+      HOST_WIDE_INT addend = 0;
+      enum tls_model tls_kind;
+      rtx sym = op1;
+
+      if (GET_CODE (op1) == CONST
+	  && GET_CODE (XEXP (op1, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
+	{
+	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
+	  sym = XEXP (XEXP (op1, 0), 0);
+	}
+
+      tls_kind = tls_symbolic_operand_type (sym);
+      if (tls_kind)
+	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
+
+      if (any_offset_symbol_operand (sym, mode))
+	addend = 0;
+      else if (aligned_offset_symbol_operand (sym, mode))
+	{
+	  HOST_WIDE_INT addend_lo, addend_hi;
+	      
+	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+	  addend_hi = addend - addend_lo;
+
+	  if (addend_lo != 0)
+	    {
+	      op1 = plus_constant (mode, sym, addend_hi);
+	      addend = addend_lo;
+	    }
+	  else
+	    addend = 0;
+	}
+      else
+	op1 = sym;
+
+      if (reload_completed)
+	{
+	  /* We really should have taken care of this offset earlier.  */
+	  gcc_assert (addend == 0);
+	  if (ia64_expand_load_address (op0, op1))
+	    return NULL_RTX;
+	}
+
+      if (addend)
+	{
+	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
+
+	  op1 = expand_simple_binop (mode, PLUS, subtarget,
+				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
+	  if (op0 == op1)
+	    return NULL_RTX;
+	}
+    }
+
+  return op1;
+}
+
+/* Split a move from OP1 to OP0 conditional on COND.  */
+
+void
+ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
+{
+  rtx insn, first = get_last_insn ();
+
+  emit_move_insn (op0, op1);
+
+  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
+    if (INSN_P (insn))
+      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
+					  PATTERN (insn));
+}
+
+/* Split a post-reload TImode or TFmode reference into two DImode
+   components.  This is made extra difficult by the fact that we do
+   not get any scratch registers to work with, because reload cannot
+   be prevented from giving us a scratch that overlaps the register
+   pair involved.  So instead, when addressing memory, we tweak the
+   pointer register up and back down with POST_INCs.  Or up and not
+   back down when we can get away with it.
+
+   REVERSED is true when the loads must be done in reversed order
+   (high word first) for correctness.  DEAD is true when the pointer
+   dies with the second insn we generate and therefore the second
+   address must not carry a postmodify.
+
+   May return an insn which is to be emitted after the moves.  */
+
+static rtx
+ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
+{
+  rtx fixup = 0;
+
+  switch (GET_CODE (in))
+    {
+    case REG:
+      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
+      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      /* Cannot occur reversed.  */
+      gcc_assert (!reversed);
+      
+      if (GET_MODE (in) != TFmode)
+	split_double (in, &out[0], &out[1]);
+      else
+	/* split_double does not understand how to split a TFmode
+	   quantity into a pair of DImode constants.  */
+	{
+	  REAL_VALUE_TYPE r;
+	  unsigned HOST_WIDE_INT p[2];
+	  long l[4];  /* TFmode is 128 bits */
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
+	  real_to_target (l, &r, TFmode);
+
+	  if (FLOAT_WORDS_BIG_ENDIAN)
+	    {
+	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
+	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
+	    }
+	  else
+	    {
+	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
+	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
+	    }
+	  out[0] = GEN_INT (p[0]);
+	  out[1] = GEN_INT (p[1]);
+	}
+      break;
+
+    case MEM:
+      {
+	rtx base = XEXP (in, 0);
+	rtx offset;
+
+	switch (GET_CODE (base))
+	  {
+	  case REG:
+	    if (!reversed)
+	      {
+		out[0] = adjust_automodify_address
+		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+		out[1] = adjust_automodify_address
+		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
+	      }
+	    else
+	      {
+		/* Reversal requires a pre-increment, which can only
+		   be done as a separate insn.  */
+		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
+		out[0] = adjust_automodify_address
+		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
+		out[1] = adjust_address (in, DImode, 0);
+	      }
+	    break;
+
+	  case POST_INC:
+	    gcc_assert (!reversed && !dead);
+	    
+	    /* Just do the increment in two steps.  */
+	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
+	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
+	    break;
+
+	  case POST_DEC:
+	    gcc_assert (!reversed && !dead);
+	    
+	    /* Add 8, subtract 24.  */
+	    base = XEXP (base, 0);
+	    out[0] = adjust_automodify_address
+	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+	    out[1] = adjust_automodify_address
+	      (in, DImode,
+	       gen_rtx_POST_MODIFY (Pmode, base,
+				    plus_constant (Pmode, base, -24)),
+	       8);
+	    break;
+
+	  case POST_MODIFY:
+	    gcc_assert (!reversed && !dead);
+
+	    /* Extract and adjust the modification.  This case is
+	       trickier than the others, because we might have an
+	       index register, or we might have a combined offset that
+	       doesn't fit a signed 9-bit displacement field.  We can
+	       assume the incoming expression is already legitimate.  */
+	    offset = XEXP (base, 1);
+	    base = XEXP (base, 0);
+
+	    out[0] = adjust_automodify_address
+	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+
+	    if (GET_CODE (XEXP (offset, 1)) == REG)
+	      {
+		/* Can't adjust the postmodify to match.  Emit the
+		   original, then a separate addition insn.  */
+		out[1] = adjust_automodify_address (in, DImode, 0, 8);
+		fixup = gen_adddi3 (base, base, GEN_INT (-8));
+	      }
+	    else
+	      {
+		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
+		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
+		  {
+		    /* Again the postmodify cannot be made to match,
+		       but in this case it's more efficient to get rid
+		       of the postmodify entirely and fix up with an
+		       add insn.  */
+		    out[1] = adjust_automodify_address (in, DImode, base, 8);
+		    fixup = gen_adddi3
+		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
+		  }
+		else
+		  {
+		    /* Combined offset still fits in the displacement field.
+		       (We cannot overflow it at the high end.)  */
+		    out[1] = adjust_automodify_address
+		      (in, DImode, gen_rtx_POST_MODIFY
+		       (Pmode, base, gen_rtx_PLUS
+			(Pmode, base,
+			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
+		       8);
+		  }
+	      }
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return fixup;
+}
+
+/* Split a TImode or TFmode move instruction after reload.
+   This is used by *movtf_internal and *movti_internal.  */
+void
+ia64_split_tmode_move (rtx operands[])
+{
+  rtx in[2], out[2], insn;
+  rtx fixup[2];
+  bool dead = false;
+  bool reversed = false;
+
+  /* It is possible for reload to decide to overwrite a pointer with
+     the value it points to.  In that case we have to do the loads in
+     the appropriate order so that the pointer is not destroyed too
+     early.  Also we must not generate a postmodify for that second
+     load, or rws_access_regno will die.  And we must not generate a
+     postmodify for the second load if the destination register 
+     overlaps with the base register.  */
+  if (GET_CODE (operands[1]) == MEM
+      && reg_overlap_mentioned_p (operands[0], operands[1]))
+    {
+      rtx base = XEXP (operands[1], 0);
+      while (GET_CODE (base) != REG)
+	base = XEXP (base, 0);
+
+      if (REGNO (base) == REGNO (operands[0]))
+	reversed = true;
+
+      if (refers_to_regno_p (REGNO (operands[0]),
+			     REGNO (operands[0])+2,
+			     base, 0))
+	dead = true;
+    }
+  /* Another reason to do the moves in reversed order is if the first
+     element of the target register pair is also the second element of
+     the source register pair.  */
+  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
+      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    reversed = true;
+
+  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
+  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
+
+#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
+  if (GET_CODE (EXP) == MEM						\
+      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
+	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
+	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
+    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
+  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
+  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
+  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
+  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
+
+  if (fixup[0])
+    emit_insn (fixup[0]);
+  if (fixup[1])
+    emit_insn (fixup[1]);
+
+#undef MAYBE_ADD_REG_INC_NOTE
+}
+
+/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
+   through memory plus an extra GR scratch register.  Except that you can
+   either get the first from SECONDARY_MEMORY_NEEDED or the second from
+   SECONDARY_RELOAD_CLASS, but not both.
+
+   We got into problems in the first place by allowing a construct like
+   (subreg:XF (reg:TI)), which we got from a union containing a long double.
+   This solution attempts to prevent this situation from occurring.  When
+   we see something like the above, we spill the inner register to memory.  */
+
+static rtx
+spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
+{
+  if (GET_CODE (in) == SUBREG
+      && GET_MODE (SUBREG_REG (in)) == TImode
+      && GET_CODE (SUBREG_REG (in)) == REG)
+    {
+      rtx memt = assign_stack_temp (TImode, 16);
+      emit_move_insn (memt, SUBREG_REG (in));
+      return adjust_address (memt, mode, 0);
+    }
+  else if (force && GET_CODE (in) == REG)
+    {
+      rtx memx = assign_stack_temp (mode, 16);
+      emit_move_insn (memx, in);
+      return memx;
+    }
+  else
+    return in;
+}
+
+/* Expand the movxf or movrf pattern (MODE says which) with the given
+   OPERANDS, returning true if the pattern should then invoke
+   DONE.  */
+
+bool
+ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
+{
+  rtx op0 = operands[0];
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = SUBREG_REG (op0);
+
+  /* We must support XFmode loads into general registers for stdarg/vararg,
+     unprototyped calls, and a rare case where a long double is passed as
+     an argument after a float HFA fills the FP registers.  We split them into
+     DImode loads for convenience.  We also need to support XFmode stores
+     for the last case.  This case does not happen for stdarg/vararg routines,
+     because we do a block store to memory of unnamed arguments.  */
+
+  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
+    {
+      rtx out[2];
+
+      /* We're hoping to transform everything that deals with XFmode
+	 quantities and GR registers early in the compiler.  */
+      gcc_assert (can_create_pseudo_p ());
+
+      /* Struct to register can just use TImode instead.  */
+      if ((GET_CODE (operands[1]) == SUBREG
+	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
+	  || (GET_CODE (operands[1]) == REG
+	      && GR_REGNO_P (REGNO (operands[1]))))
+	{
+	  rtx op1 = operands[1];
+
+	  if (GET_CODE (op1) == SUBREG)
+	    op1 = SUBREG_REG (op1);
+	  else
+	    op1 = gen_rtx_REG (TImode, REGNO (op1));
+
+	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
+	  return true;
+	}
+
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	{
+	  /* Don't word-swap when reading in the constant.  */
+	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
+			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
+					   0, mode));
+	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
+			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
+					   0, mode));
+	  return true;
+	}
+
+      /* If the quantity is in a register not known to be GR, spill it.  */
+      if (register_operand (operands[1], mode))
+	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
+
+      gcc_assert (GET_CODE (operands[1]) == MEM);
+
+      /* Don't word-swap when reading in the value.  */
+      out[0] = gen_rtx_REG (DImode, REGNO (op0));
+      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
+
+      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
+      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
+      return true;
+    }
+
+  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
+    {
+      /* We're hoping to transform everything that deals with XFmode
+	 quantities and GR registers early in the compiler.  */
+      gcc_assert (can_create_pseudo_p ());
+
+      /* Op0 can't be a GR_REG here, as that case is handled above.
+	 If op0 is a register, then we spill op1, so that we now have a
+	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
+	 to force the spill.  */
+      if (register_operand (operands[0], mode))
+	{
+	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+	  op1 = gen_rtx_SUBREG (mode, op1, 0);
+	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
+	}
+
+      else
+	{
+	  rtx in[2];
+
+	  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+	  /* Don't word-swap when writing out the value.  */
+	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
+	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
+
+	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
+	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
+	  return true;
+	}
+    }
+
+  if (!reload_in_progress && !reload_completed)
+    {
+      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
+
+      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
+	{
+	  rtx memt, memx, in = operands[1];
+	  if (CONSTANT_P (in))
+	    in = validize_mem (force_const_mem (mode, in));
+	  if (GET_CODE (in) == MEM)
+	    memt = adjust_address (in, TImode, 0);
+	  else
+	    {
+	      memt = assign_stack_temp (TImode, 16);
+	      memx = adjust_address (memt, mode, 0);
+	      emit_move_insn (memx, in);
+	    }
+	  emit_move_insn (op0, memt);
+	  return true;
+	}
+
+      if (!ia64_move_ok (operands[0], operands[1]))
+	operands[1] = force_reg (mode, operands[1]);
+    }
+
+  return false;
+}
+
+/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
+   with the expression that holds the compare result (in VOIDmode).  */
+
+static GTY(()) rtx cmptf_libfunc;
+
+void
+ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
+{
+  enum rtx_code code = GET_CODE (*expr);
+  rtx cmp;
+
+  /* If we have a BImode input, then we already have a compare result, and
+     do not need to emit another comparison.  */
+  if (GET_MODE (*op0) == BImode)
+    {
+      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
+      cmp = *op0;
+    }
+  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
+     magic number as its third argument, that indicates what to do.
+     The return value is an integer to be compared against zero.  */
+  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
+    {
+      enum qfcmp_magic {
+	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
+	QCMP_UNORD = 2,
+	QCMP_EQ = 4,
+	QCMP_LT = 8,
+	QCMP_GT = 16
+      };
+      int magic;
+      enum rtx_code ncode;
+      rtx ret, insns;
+      
+      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
+      switch (code)
+	{
+	  /* 1 = equal, 0 = not equal.  Equality operators do
+	     not raise FP_INVALID when given a NaN operand.  */
+	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
+	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
+	  /* isunordered() from C99.  */
+	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
+	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
+	  /* Relational operators raise FP_INVALID when given
+	     a NaN operand.  */
+	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
+	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
+	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
+	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
+          /* Unordered relational operators do not raise FP_INVALID
+	     when given a NaN operand.  */
+	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
+	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
+	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
+	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
+	  /* Not supported.  */
+	case UNEQ:
+	case LTGT:
+	default: gcc_unreachable ();
+	}
+
+      start_sequence ();
+
+      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
+				     *op0, TFmode, *op1, TFmode,
+				     GEN_INT (magic), DImode);
+      cmp = gen_reg_rtx (BImode);
+      emit_insn (gen_rtx_SET (VOIDmode, cmp,
+			      gen_rtx_fmt_ee (ncode, BImode,
+					      ret, const0_rtx)));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_libcall_block (insns, cmp, cmp,
+			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
+      code = NE;
+    }
+  else
+    {
+      cmp = gen_reg_rtx (BImode);
+      emit_insn (gen_rtx_SET (VOIDmode, cmp,
+			      gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
+      code = NE;
+    }
+
+  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
+  *op0 = cmp;
+  *op1 = const0_rtx;
+}
+
+/* Generate an integral vector comparison.  Return true if the condition has
+   been reversed, and so the sense of the comparison should be inverted.  */
+
+static bool
+ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
+			    rtx dest, rtx op0, rtx op1)
+{
+  bool negate = false;
+  rtx x;
+
+  /* Canonicalize the comparison to EQ, GT, GTU.  */
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case GTU:
+      break;
+
+    case NE:
+    case LE:
+    case LEU:
+      code = reverse_condition (code);
+      negate = true;
+      break;
+
+    case GE:
+    case GEU:
+      code = reverse_condition (code);
+      negate = true;
+      /* FALLTHRU */
+
+    case LT:
+    case LTU:
+      code = swap_condition (code);
+      x = op0, op0 = op1, op1 = x;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Unsigned parallel compare is not supported by the hardware.  Play some
+     tricks to turn this into a signed comparison against 0.  */
+  if (code == GTU)
+    {
+      switch (mode)
+	{
+	case V2SImode:
+	  {
+	    rtx t1, t2, mask;
+
+	    /* Subtract (-(INT MAX) - 1) from both operands to make
+	       them signed.  */
+	    mask = GEN_INT (0x80000000);
+	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
+	    mask = force_reg (mode, mask);
+	    t1 = gen_reg_rtx (mode);
+	    emit_insn (gen_subv2si3 (t1, op0, mask));
+	    t2 = gen_reg_rtx (mode);
+	    emit_insn (gen_subv2si3 (t2, op1, mask));
+	    op0 = t1;
+	    op1 = t2;
+	    code = GT;
+	  }
+	  break;
+
+	case V8QImode:
+	case V4HImode:
+	  /* Perform a parallel unsigned saturating subtraction.  */
+	  x = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, x,
+				  gen_rtx_US_MINUS (mode, op0, op1)));
+
+	  code = EQ;
+	  op0 = x;
+	  op1 = CONST0_RTX (mode);
+	  negate = !negate;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  x = gen_rtx_fmt_ee (code, mode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+  return negate;
+}
+
+/* Emit an integral vector conditional move.  */
+
+void
+ia64_expand_vecint_cmov (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum rtx_code code = GET_CODE (operands[3]);
+  bool negate;
+  rtx cmp, x, ot, of;
+
+  cmp = gen_reg_rtx (mode);
+  negate = ia64_expand_vecint_compare (code, mode, cmp,
+				       operands[4], operands[5]);
+
+  ot = operands[1+negate];
+  of = operands[2-negate];
+
+  if (ot == CONST0_RTX (mode))
+    {
+      if (of == CONST0_RTX (mode))
+	{
+	  emit_move_insn (operands[0], ot);
+	  return;
+	}
+
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, of);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+    }
+  else if (of == CONST0_RTX (mode))
+    {
+      x = gen_rtx_AND (mode, cmp, ot);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+    }
+  else
+    {
+      rtx t, f;
+
+      t = gen_reg_rtx (mode);
+      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
+      emit_insn (gen_rtx_SET (VOIDmode, t, x));
+
+      f = gen_reg_rtx (mode);
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, operands[2-negate]);
+      emit_insn (gen_rtx_SET (VOIDmode, f, x));
+
+      x = gen_rtx_IOR (mode, t, f);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+    }
+}
+
+/* Emit an integral vector min or max operation.  Return true if all done.  */
+
+bool
+ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
+			   rtx operands[])
+{
+  rtx xops[6];
+
+  /* These four combinations are supported directly.  */
+  if (mode == V8QImode && (code == UMIN || code == UMAX))
+    return false;
+  if (mode == V4HImode && (code == SMIN || code == SMAX))
+    return false;
+
+  /* This combination can be implemented with only saturating subtraction.  */
+  if (mode == V4HImode && code == UMAX)
+    {
+      rtx x, tmp = gen_reg_rtx (mode);
+
+      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
+
+      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
+      return true;
+    }
+
+  /* Everything else implemented via vector comparisons.  */
+  xops[0] = operands[0];
+  xops[4] = xops[1] = operands[1];
+  xops[5] = xops[2] = operands[2];
+
+  switch (code)
+    {
+    case UMIN:
+      code = LTU;
+      break;
+    case UMAX:
+      code = GTU;
+      break;
+    case SMIN:
+      code = LT;
+      break;
+    case SMAX:
+      code = GT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+
+  ia64_expand_vecint_cmov (xops);
+  return true;
+}
+
+/* The vectors LO and HI each contain N halves of a double-wide vector.
+   Reassemble either the first N/2 or the second N/2 elements.  */
+
+void
+ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
+{
+  enum machine_mode vmode = GET_MODE (lo);
+  unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
+  struct expand_vec_perm_d d;
+  bool ok;
+
+  d.target = gen_lowpart (vmode, out);
+  d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
+  d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
+  d.vmode = vmode;
+  d.nelt = nelt;
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  high = (highp ? nelt / 2 : 0);
+  for (i = 0; i < nelt / 2; ++i)
+    {
+      d.perm[i * 2] = i + high;
+      d.perm[i * 2 + 1] = i + high + nelt;
+    }
+
+  ok = ia64_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+/* Return a vector of the sign-extension of VEC.  */
+
+static rtx
+ia64_unpack_sign (rtx vec, bool unsignedp)
+{
+  enum machine_mode mode = GET_MODE (vec);
+  rtx zero = CONST0_RTX (mode);
+
+  if (unsignedp)
+    return zero;
+  else
+    {
+      rtx sign = gen_reg_rtx (mode);
+      bool neg;
+
+      neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
+      gcc_assert (!neg);
+
+      return sign;
+    }
+}
+
+/* Emit an integral vector unpack operation.  */
+
+void
+ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+{
+  rtx sign = ia64_unpack_sign (operands[1], unsignedp);
+  ia64_unpack_assemble (operands[0], operands[1], sign, highp);
+}
+
+/* Emit an integral vector widening sum operations.  */
+
+void
+ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+{
+  enum machine_mode wmode;
+  rtx l, h, t, sign;
+
+  sign = ia64_unpack_sign (operands[1], unsignedp);
+
+  wmode = GET_MODE (operands[0]);
+  l = gen_reg_rtx (wmode);
+  h = gen_reg_rtx (wmode);
+
+  ia64_unpack_assemble (l, operands[1], sign, false);
+  ia64_unpack_assemble (h, operands[1], sign, true);
+
+  t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
+  t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
+  if (t != operands[0])
+    emit_move_insn (operands[0], t);
+}
+
+/* Emit the appropriate sequence for a call.  */
+
+void
+ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
+		  int sibcall_p)
+{
+  rtx insn, b0;
+
+  addr = XEXP (addr, 0);
+  addr = convert_memory_address (DImode, addr);
+  b0 = gen_rtx_REG (DImode, R_BR (0));
+
+  /* ??? Should do this for functions known to bind local too.  */
+  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
+    {
+      if (sibcall_p)
+	insn = gen_sibcall_nogp (addr);
+      else if (! retval)
+	insn = gen_call_nogp (addr, b0);
+      else
+	insn = gen_call_value_nogp (retval, addr, b0);
+      insn = emit_call_insn (insn);
+    }
+  else
+    {
+      if (sibcall_p)
+	insn = gen_sibcall_gp (addr);
+      else if (! retval)
+	insn = gen_call_gp (addr, b0);
+      else
+	insn = gen_call_value_gp (retval, addr, b0);
+      insn = emit_call_insn (insn);
+
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+    }
+
+  if (sibcall_p)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
+
+  if (TARGET_ABI_OPEN_VMS)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+	     gen_rtx_REG (DImode, GR_REG (25)));
+}
+
+static void
+reg_emitted (enum ia64_frame_regs r)
+{
+  if (emitted_frame_related_regs[r] == 0)
+    emitted_frame_related_regs[r] = current_frame_info.r[r];
+  else
+    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
+}
+
+static int
+get_reg (enum ia64_frame_regs r)
+{
+  reg_emitted (r);
+  return current_frame_info.r[r];
+}
+
+static bool
+is_emitted (int regno)
+{
+  unsigned int r;
+
+  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
+    if (emitted_frame_related_regs[r] == regno)
+      return true;
+  return false;
+}
+
+void
+ia64_reload_gp (void)
+{
+  rtx tmp;
+
+  if (current_frame_info.r[reg_save_gp])
+    {
+      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
+    }
+  else
+    {
+      HOST_WIDE_INT offset;
+      rtx offset_r;
+
+      offset = (current_frame_info.spill_cfa_off
+	        + current_frame_info.spill_size);
+      if (frame_pointer_needed)
+        {
+          tmp = hard_frame_pointer_rtx;
+          offset = -offset;
+        }
+      else
+        {
+          tmp = stack_pointer_rtx;
+          offset = current_frame_info.total_size - offset;
+        }
+
+      offset_r = GEN_INT (offset);
+      if (satisfies_constraint_I (offset_r))
+        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
+      else
+        {
+          emit_move_insn (pic_offset_table_rtx, offset_r);
+          emit_insn (gen_adddi3 (pic_offset_table_rtx,
+			         pic_offset_table_rtx, tmp));
+        }
+
+      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
+    }
+
+  emit_move_insn (pic_offset_table_rtx, tmp);
+}
+
+void
+ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
+		 rtx scratch_b, int noreturn_p, int sibcall_p)
+{
+  rtx insn;
+  bool is_desc = false;
+
+  /* If we find we're calling through a register, then we're actually
+     calling through a descriptor, so load up the values.  */
+  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
+    {
+      rtx tmp;
+      bool addr_dead_p;
+
+      /* ??? We are currently constrained to *not* use peep2, because
+	 we can legitimately change the global lifetime of the GP
+	 (in the form of killing where previously live).  This is
+	 because a call through a descriptor doesn't use the previous
+	 value of the GP, while a direct call does, and we do not
+	 commit to either form until the split here.
+
+	 That said, this means that we lack precise life info for
+	 whether ADDR is dead after this call.  This is not terribly
+	 important, since we can fix things up essentially for free
+	 with the POST_DEC below, but it's nice to not use it when we
+	 can immediately tell it's not necessary.  */
+      addr_dead_p = ((noreturn_p || sibcall_p
+		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+					    REGNO (addr)))
+		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
+
+      /* Load the code address into scratch_b.  */
+      tmp = gen_rtx_POST_INC (Pmode, addr);
+      tmp = gen_rtx_MEM (Pmode, tmp);
+      emit_move_insn (scratch_r, tmp);
+      emit_move_insn (scratch_b, scratch_r);
+
+      /* Load the GP address.  If ADDR is not dead here, then we must
+	 revert the change made above via the POST_INCREMENT.  */
+      if (!addr_dead_p)
+	tmp = gen_rtx_POST_DEC (Pmode, addr);
+      else
+	tmp = addr;
+      tmp = gen_rtx_MEM (Pmode, tmp);
+      emit_move_insn (pic_offset_table_rtx, tmp);
+
+      is_desc = true;
+      addr = scratch_b;
+    }
+
+  if (sibcall_p)
+    insn = gen_sibcall_nogp (addr);
+  else if (retval)
+    insn = gen_call_value_nogp (retval, addr, retaddr);
+  else
+    insn = gen_call_nogp (addr, retaddr);
+  emit_call_insn (insn);
+
+  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
+    ia64_reload_gp ();
+}
+
+/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
+
+   This differs from the generic code in that we know about the zero-extending
+   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
+   also know that ld.acq+cmpxchg.rel equals a full barrier.
+
+   The loop we want to generate looks like
+
+	cmp_reg = mem;
+      label:
+        old_reg = cmp_reg;
+	new_reg = cmp_reg op val;
+	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
+	if (cmp_reg != old_reg)
+	  goto label;
+
+   Note that we only do the plain load from memory once.  Subsequent
+   iterations use the value loaded by the compare-and-swap pattern.  */
+
+void
+ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+		       rtx old_dst, rtx new_dst, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
+  enum insn_code icode;
+
+  /* Special case for using fetchadd.  */
+  if ((mode == SImode || mode == DImode)
+      && (code == PLUS || code == MINUS)
+      && fetchadd_operand (val, mode))
+    {
+      if (code == MINUS)
+	val = GEN_INT (-INTVAL (val));
+
+      if (!old_dst)
+        old_dst = gen_reg_rtx (mode);
+
+      switch (model)
+	{
+	case MEMMODEL_ACQ_REL:
+	case MEMMODEL_SEQ_CST:
+	  emit_insn (gen_memory_barrier ());
+	  /* FALLTHRU */
+	case MEMMODEL_RELAXED:
+	case MEMMODEL_ACQUIRE:
+	case MEMMODEL_CONSUME:
+	  if (mode == SImode)
+	    icode = CODE_FOR_fetchadd_acq_si;
+	  else
+	    icode = CODE_FOR_fetchadd_acq_di;
+	  break;
+	case MEMMODEL_RELEASE:
+	  if (mode == SImode)
+	    icode = CODE_FOR_fetchadd_rel_si;
+	  else
+	    icode = CODE_FOR_fetchadd_rel_di;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
+
+      if (new_dst)
+	{
+	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
+					 true, OPTAB_WIDEN);
+	  if (new_reg != new_dst)
+	    emit_move_insn (new_dst, new_reg);
+	}
+      return;
+    }
+
+  /* Because of the volatile mem read, we get an ld.acq, which is the
+     front half of the full barrier.  The end half is the cmpxchg.rel.
+     For relaxed and release memory models, we don't need this.  But we
+     also don't bother trying to prevent it either.  */
+  gcc_assert (model == MEMMODEL_RELAXED
+	      || model == MEMMODEL_RELEASE
+	      || MEM_VOLATILE_P (mem));
+
+  old_reg = gen_reg_rtx (DImode);
+  cmp_reg = gen_reg_rtx (DImode);
+  label = gen_label_rtx ();
+
+  if (mode != DImode)
+    {
+      val = simplify_gen_subreg (DImode, val, mode, 0);
+      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
+    }
+  else
+    emit_move_insn (cmp_reg, mem);
+
+  emit_label (label);
+
+  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+  emit_move_insn (old_reg, cmp_reg);
+  emit_move_insn (ar_ccv, cmp_reg);
+
+  if (old_dst)
+    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
+
+  new_reg = cmp_reg;
+  if (code == NOT)
+    {
+      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
+				     true, OPTAB_DIRECT);
+      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
+    }
+  else
+    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
+				   true, OPTAB_DIRECT);
+
+  if (mode != DImode)
+    new_reg = gen_lowpart (mode, new_reg);
+  if (new_dst)
+    emit_move_insn (new_dst, new_reg);
+
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_CONSUME:
+      switch (mode)
+	{
+	case QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
+	case HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
+	case SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
+	case DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      switch (mode)
+	{
+	case QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
+	case HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
+	case SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
+	case DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
+
+  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
+}
+
+/* Begin the assembly file.  */
+
+static void
+ia64_file_start (void)
+{
+  default_file_start ();
+  emit_safe_across_calls ();
+}
+
+void
+emit_safe_across_calls (void)
+{
+  unsigned int rs, re;
+  int out_state;
+
+  rs = 1;
+  out_state = 0;
+  while (1)
+    {
+      while (rs < 64 && call_used_regs[PR_REG (rs)])
+	rs++;
+      if (rs >= 64)
+	break;
+      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
+	continue;
+      if (out_state == 0)
+	{
+	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
+	  out_state = 1;
+	}
+      else
+	fputc (',', asm_out_file);
+      if (re == rs + 1)
+	fprintf (asm_out_file, "p%u", rs);
+      else
+	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
+      rs = re + 1;
+    }
+  if (out_state)
+    fputc ('\n', asm_out_file);
+}
+
+/* Globalize a declaration.  */
+
+static void
+ia64_globalize_decl_name (FILE * stream, tree decl)
+{
+  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
+  if (version_attr)
+    {
+      tree v = TREE_VALUE (TREE_VALUE (version_attr));
+      const char *p = TREE_STRING_POINTER (v);
+      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
+    }
+  targetm.asm_out.globalize_label (stream, name);
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
+}
+
+/* Helper function for ia64_compute_frame_size: find an appropriate general
+   register to spill some special register to.  SPECIAL_SPILL_MASK contains
+   bits in GR0 to GR31 that have already been allocated by this routine.
+   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
+
+static int
+find_gr_spill (enum ia64_frame_regs r, int try_locals)
+{
+  int regno;
+
+  if (emitted_frame_related_regs[r] != 0)
+    {
+      regno = emitted_frame_related_regs[r];
+      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
+	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
+        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
+      else if (crtl->is_leaf
+               && regno >= GR_REG (1) && regno <= GR_REG (31))
+        current_frame_info.gr_used_mask |= 1 << regno;
+
+      return regno;
+    }
+
+  /* If this is a leaf function, first try an otherwise unused
+     call-clobbered register.  */
+  if (crtl->is_leaf)
+    {
+      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
+	if (! df_regs_ever_live_p (regno)
+	    && call_used_regs[regno]
+	    && ! fixed_regs[regno]
+	    && ! global_regs[regno]
+	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
+            && ! is_emitted (regno))
+	  {
+	    current_frame_info.gr_used_mask |= 1 << regno;
+	    return regno;
+	  }
+    }
+
+  if (try_locals)
+    {
+      regno = current_frame_info.n_local_regs;
+      /* If there is a frame pointer, then we can't use loc79, because
+	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
+	 reg_name switching code in ia64_expand_prologue.  */
+      while (regno < (80 - frame_pointer_needed))
+	if (! is_emitted (LOC_REG (regno++)))
+	  {
+	    current_frame_info.n_local_regs = regno;
+	    return LOC_REG (regno - 1);
+	  }
+    }
+
+  /* Failed to find a general register to spill to.  Must use stack.  */
+  return 0;
+}
+
+/* In order to make for nice schedules, we try to allocate every temporary
+   to a different register.  We must of course stay away from call-saved,
+   fixed, and global registers.  We must also stay away from registers
+   allocated in current_frame_info.gr_used_mask, since those include regs
+   used all through the prologue.
+
+   Any register allocated here must be used immediately.  The idea is to
+   aid scheduling, not to solve data flow problems.  */
+
+static int last_scratch_gr_reg;
+
+static int
+next_scratch_gr_reg (void)
+{
+  int i, regno;
+
+  for (i = 0; i < 32; ++i)
+    {
+      regno = (last_scratch_gr_reg + i + 1) & 31;
+      if (call_used_regs[regno]
+	  && ! fixed_regs[regno]
+	  && ! global_regs[regno]
+	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
+	{
+	  last_scratch_gr_reg = regno;
+	  return regno;
+	}
+    }
+
+  /* There must be _something_ available.  */
+  gcc_unreachable ();
+}
+
+/* Helper function for ia64_compute_frame_size, called through
+   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
+
+static void
+mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
+{
+  unsigned int regno = REGNO (reg);
+  if (regno < 32)
+    {
+      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
+      for (i = 0; i < n; ++i)
+	current_frame_info.gr_used_mask |= 1 << (regno + i);
+    }
+}
+
+
+/* Returns the number of bytes offset between the frame pointer and the stack
+   pointer for the current function.  SIZE is the number of bytes of space
+   needed for local variables.  */
+
+static void
+ia64_compute_frame_size (HOST_WIDE_INT size)
+{
+  HOST_WIDE_INT total_size;
+  HOST_WIDE_INT spill_size = 0;
+  HOST_WIDE_INT extra_spill_size = 0;
+  HOST_WIDE_INT pretend_args_size;
+  HARD_REG_SET mask;
+  int n_spilled = 0;
+  int spilled_gr_p = 0;
+  int spilled_fr_p = 0;
+  unsigned int regno;
+  int min_regno;
+  int max_regno;
+  int i;
+
+  if (current_frame_info.initialized)
+    return;
+
+  memset (&current_frame_info, 0, sizeof current_frame_info);
+  CLEAR_HARD_REG_SET (mask);
+
+  /* Don't allocate scratches to the return register.  */
+  diddle_return_value (mark_reg_gr_used_mask, NULL);
+
+  /* Don't allocate scratches to the EH scratch registers.  */
+  if (cfun->machine->ia64_eh_epilogue_sp)
+    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
+  if (cfun->machine->ia64_eh_epilogue_bsp)
+    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
+
+  /* Static stack checking uses r2 and r3.  */
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    current_frame_info.gr_used_mask |= 0xc;
+
+  /* Find the size of the register stack frame.  We have only 80 local
+     registers, because we reserve 8 for the inputs and 8 for the
+     outputs.  */
+
+  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
+     since we'll be adjusting that down later.  */
+  regno = LOC_REG (78) + ! frame_pointer_needed;
+  for (; regno >= LOC_REG (0); regno--)
+    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
+      break;
+  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
+
+  /* For functions marked with the syscall_linkage attribute, we must mark
+     all eight input registers as in use, so that locals aren't visible to
+     the caller.  */
+
+  if (cfun->machine->n_varargs > 0
+      || lookup_attribute ("syscall_linkage",
+			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    current_frame_info.n_input_regs = 8;
+  else
+    {
+      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
+	if (df_regs_ever_live_p (regno))
+	  break;
+      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
+    }
+
+  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
+    if (df_regs_ever_live_p (regno))
+      break;
+  i = regno - OUT_REG (0) + 1;
+
+#ifndef PROFILE_HOOK
+  /* When -p profiling, we need one output register for the mcount argument.
+     Likewise for -a profiling for the bb_init_func argument.  For -ax
+     profiling, we need two output registers for the two bb_init_trace_func
+     arguments.  */
+  if (crtl->profile)
+    i = MAX (i, 1);
+#endif
+  current_frame_info.n_output_regs = i;
+
+  /* ??? No rotating register support yet.  */
+  current_frame_info.n_rotate_regs = 0;
+
+  /* Discover which registers need spilling, and how much room that
+     will take.  Begin with floating point and general registers,
+     which will always wind up on the stack.  */
+
+  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	spill_size += 16;
+	n_spilled += 1;
+	spilled_fr_p = 1;
+      }
+
+  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	spill_size += 8;
+	n_spilled += 1;
+	spilled_gr_p = 1;
+      }
+
+  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	spill_size += 8;
+	n_spilled += 1;
+      }
+
+  /* Now come all special registers that might get saved in other
+     general registers.  */
+
+  if (frame_pointer_needed)
+    {
+      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
+      /* If we did not get a register, then we take LOC79.  This is guaranteed
+	 to be free, even if regs_ever_live is already set, because this is
+	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
+	 as we don't count loc79 above.  */
+      if (current_frame_info.r[reg_fp] == 0)
+	{
+	  current_frame_info.r[reg_fp] = LOC_REG (79);
+	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
+	}
+    }
+
+  if (! crtl->is_leaf)
+    {
+      /* Emit a save of BR0 if we call other functions.  Do this even
+	 if this function doesn't return, as EH depends on this to be
+	 able to unwind the stack.  */
+      SET_HARD_REG_BIT (mask, BR_REG (0));
+
+      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
+      if (current_frame_info.r[reg_save_b0] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      /* Similarly for ar.pfs.  */
+      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
+      if (current_frame_info.r[reg_save_ar_pfs] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
+	 registers are clobbered, so we fall back to the stack.  */
+      current_frame_info.r[reg_save_gp]
+	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
+      if (current_frame_info.r[reg_save_gp] == 0)
+	{
+	  SET_HARD_REG_BIT (mask, GR_REG (1));
+	  spill_size += 8;
+	  n_spilled += 1;
+	}
+    }
+  else
+    {
+      if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
+	{
+	  SET_HARD_REG_BIT (mask, BR_REG (0));
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      if (df_regs_ever_live_p (AR_PFS_REGNUM))
+	{
+	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+ 	  current_frame_info.r[reg_save_ar_pfs] 
+            = find_gr_spill (reg_save_ar_pfs, 1);
+	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
+	    {
+	      extra_spill_size += 8;
+	      n_spilled += 1;
+	    }
+	}
+    }
+
+  /* Unwind descriptor hackery: things are most efficient if we allocate
+     consecutive GR save registers for RP, PFS, FP in that order. However,
+     it is absolutely critical that FP get the only hard register that's
+     guaranteed to be free, so we allocated it first.  If all three did
+     happen to be allocated hard regs, and are consecutive, rearrange them
+     into the preferred order now.  
+     
+     If we have already emitted code for any of those registers,
+     then it's already too late to change.  */
+  min_regno = MIN (current_frame_info.r[reg_fp],
+		   MIN (current_frame_info.r[reg_save_b0],
+			current_frame_info.r[reg_save_ar_pfs]));
+  max_regno = MAX (current_frame_info.r[reg_fp],
+		   MAX (current_frame_info.r[reg_save_b0],
+			current_frame_info.r[reg_save_ar_pfs]));
+  if (min_regno > 0
+      && min_regno + 2 == max_regno
+      && (current_frame_info.r[reg_fp] == min_regno + 1
+	  || current_frame_info.r[reg_save_b0] == min_regno + 1
+	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
+      && (emitted_frame_related_regs[reg_save_b0] == 0
+	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
+      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
+	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
+      && (emitted_frame_related_regs[reg_fp] == 0
+	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
+    {
+      current_frame_info.r[reg_save_b0] = min_regno;
+      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
+      current_frame_info.r[reg_fp] = min_regno + 2;
+    }
+
+  /* See if we need to store the predicate register block.  */
+  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      break;
+  if (regno <= PR_REG (63))
+    {
+      SET_HARD_REG_BIT (mask, PR_REG (0));
+      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
+      if (current_frame_info.r[reg_save_pr] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      /* ??? Mark them all as used so that register renaming and such
+	 are free to use them.  */
+      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+	df_set_regs_ever_live (regno, true);
+    }
+
+  /* If we're forced to use st8.spill, we're forced to save and restore
+     ar.unat as well.  The check for existing liveness allows inline asm
+     to touch ar.unat.  */
+  if (spilled_gr_p || cfun->machine->n_varargs
+      || df_regs_ever_live_p (AR_UNAT_REGNUM))
+    {
+      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
+      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
+      current_frame_info.r[reg_save_ar_unat] 
+        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
+      if (current_frame_info.r[reg_save_ar_unat] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+    }
+
+  if (df_regs_ever_live_p (AR_LC_REGNUM))
+    {
+      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
+      current_frame_info.r[reg_save_ar_lc] 
+        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
+      if (current_frame_info.r[reg_save_ar_lc] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+    }
+
+  /* If we have an odd number of words of pretend arguments written to
+     the stack, then the FR save area will be unaligned.  We round the
+     size of this area up to keep things 16 byte aligned.  */
+  if (spilled_fr_p)
+    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
+  else
+    pretend_args_size = crtl->args.pretend_args_size;
+
+  total_size = (spill_size + extra_spill_size + size + pretend_args_size
+		+ crtl->outgoing_args_size);
+  total_size = IA64_STACK_ALIGN (total_size);
+
+  /* We always use the 16-byte scratch area provided by the caller, but
+     if we are a leaf function, there's no one to which we need to provide
+     a scratch area.  However, if the function allocates dynamic stack space,
+     the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
+     so we need to cope.  */
+  if (crtl->is_leaf && !cfun->calls_alloca)
+    total_size = MAX (0, total_size - 16);
+
+  current_frame_info.total_size = total_size;
+  current_frame_info.spill_cfa_off = pretend_args_size - 16;
+  current_frame_info.spill_size = spill_size;
+  current_frame_info.extra_spill_size = extra_spill_size;
+  COPY_HARD_REG_SET (current_frame_info.mask, mask);
+  current_frame_info.n_spilled = n_spilled;
+  current_frame_info.initialized = reload_completed;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == BR_REG (0) ? crtl->is_leaf : true);
+}
+
+/* Compute the initial difference between the specified pair of registers.  */
+
+HOST_WIDE_INT
+ia64_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  ia64_compute_frame_size (get_frame_size ());
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      switch (to)
+	{
+	case HARD_FRAME_POINTER_REGNUM:
+	  offset = -current_frame_info.total_size;
+	  if (!crtl->is_leaf || cfun->calls_alloca)
+	    offset += 16 + crtl->outgoing_args_size;
+	  break;
+
+	case STACK_POINTER_REGNUM:
+	  offset = 0;
+	  if (!crtl->is_leaf || cfun->calls_alloca)
+	    offset += 16 + crtl->outgoing_args_size;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case ARG_POINTER_REGNUM:
+      /* Arguments start above the 16 byte save area, unless stdarg
+	 in which case we store through the 16 byte save area.  */
+      switch (to)
+	{
+	case HARD_FRAME_POINTER_REGNUM:
+	  offset = 16 - crtl->args.pretend_args_size;
+	  break;
+
+	case STACK_POINTER_REGNUM:
+	  offset = (current_frame_info.total_size
+		    + 16 - crtl->args.pretend_args_size);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+/* If there are more than a trivial number of register spills, we use
+   two interleaved iterators so that we can get two memory references
+   per insn group.
+
+   In order to simplify things in the prologue and epilogue expanders,
+   we use helper functions to fix up the memory references after the
+   fact with the appropriate offsets to a POST_MODIFY memory mode.
+   The following data structure tracks the state of the two iterators
+   while insns are being emitted.  */
+
+struct spill_fill_data
+{
+  rtx init_after;		/* point at which to emit initializations */
+  rtx init_reg[2];		/* initial base register */
+  rtx iter_reg[2];		/* the iterator registers */
+  rtx *prev_addr[2];		/* address of last memory use */
+  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
+  HOST_WIDE_INT prev_off[2];	/* last offset */
+  int n_iter;			/* number of iterators in use */
+  int next_iter;		/* next iterator to use */
+  unsigned int save_gr_used_mask;
+};
+
+static struct spill_fill_data spill_fill_data;
+
+static void
+setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
+{
+  int i;
+
+  spill_fill_data.init_after = get_last_insn ();
+  spill_fill_data.init_reg[0] = init_reg;
+  spill_fill_data.init_reg[1] = init_reg;
+  spill_fill_data.prev_addr[0] = NULL;
+  spill_fill_data.prev_addr[1] = NULL;
+  spill_fill_data.prev_insn[0] = NULL;
+  spill_fill_data.prev_insn[1] = NULL;
+  spill_fill_data.prev_off[0] = cfa_off;
+  spill_fill_data.prev_off[1] = cfa_off;
+  spill_fill_data.next_iter = 0;
+  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
+
+  spill_fill_data.n_iter = 1 + (n_spills > 2);
+  for (i = 0; i < spill_fill_data.n_iter; ++i)
+    {
+      int regno = next_scratch_gr_reg ();
+      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
+      current_frame_info.gr_used_mask |= 1 << regno;
+    }
+}
+
+static void
+finish_spill_pointers (void)
+{
+  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
+}
+
+static rtx
+spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
+{
+  int iter = spill_fill_data.next_iter;
+  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
+  rtx disp_rtx = GEN_INT (disp);
+  rtx mem;
+
+  if (spill_fill_data.prev_addr[iter])
+    {
+      if (satisfies_constraint_N (disp_rtx))
+	{
+	  *spill_fill_data.prev_addr[iter]
+	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
+				   gen_rtx_PLUS (DImode,
+						 spill_fill_data.iter_reg[iter],
+						 disp_rtx));
+	  add_reg_note (spill_fill_data.prev_insn[iter],
+			REG_INC, spill_fill_data.iter_reg[iter]);
+	}
+      else
+	{
+	  /* ??? Could use register post_modify for loads.  */
+	  if (!satisfies_constraint_I (disp_rtx))
+	    {
+	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
+	      emit_move_insn (tmp, disp_rtx);
+	      disp_rtx = tmp;
+	    }
+	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
+				 spill_fill_data.iter_reg[iter], disp_rtx));
+	}
+    }
+  /* Micro-optimization: if we've created a frame pointer, it's at
+     CFA 0, which may allow the real iterator to be initialized lower,
+     slightly increasing parallelism.  Also, if there are few saves
+     it may eliminate the iterator entirely.  */
+  else if (disp == 0
+	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
+	   && frame_pointer_needed)
+    {
+      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
+      set_mem_alias_set (mem, get_varargs_alias_set ());
+      return mem;
+    }
+  else
+    {
+      rtx seq, insn;
+
+      if (disp == 0)
+	seq = gen_movdi (spill_fill_data.iter_reg[iter],
+			 spill_fill_data.init_reg[iter]);
+      else
+	{
+	  start_sequence ();
+
+	  if (!satisfies_constraint_I (disp_rtx))
+	    {
+	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
+	      emit_move_insn (tmp, disp_rtx);
+	      disp_rtx = tmp;
+	    }
+
+	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
+				 spill_fill_data.init_reg[iter],
+				 disp_rtx));
+
+	  seq = get_insns ();
+	  end_sequence ();
+	}
+
+      /* Careful for being the first insn in a sequence.  */
+      if (spill_fill_data.init_after)
+	insn = emit_insn_after (seq, spill_fill_data.init_after);
+      else
+	{
+	  rtx first = get_insns ();
+	  if (first)
+	    insn = emit_insn_before (seq, first);
+	  else
+	    insn = emit_insn (seq);
+	}
+      spill_fill_data.init_after = insn;
+    }
+
+  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
+
+  /* ??? Not all of the spills are for varargs, but some of them are.
+     The rest of the spills belong in an alias set of their own.  But
+     it doesn't actually hurt to include them here.  */
+  set_mem_alias_set (mem, get_varargs_alias_set ());
+
+  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
+  spill_fill_data.prev_off[iter] = cfa_off;
+
+  if (++iter >= spill_fill_data.n_iter)
+    iter = 0;
+  spill_fill_data.next_iter = iter;
+
+  return mem;
+}
+
+static void
+do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
+	  rtx frame_reg)
+{
+  int iter = spill_fill_data.next_iter;
+  rtx mem, insn;
+
+  mem = spill_restore_mem (reg, cfa_off);
+  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
+  spill_fill_data.prev_insn[iter] = insn;
+
+  if (frame_reg)
+    {
+      rtx base;
+      HOST_WIDE_INT off;
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Don't even pretend that the unwind code can intuit its way
+	 through a pair of interleaved post_modify iterators.  Just
+	 provide the correct answer.  */
+
+      if (frame_pointer_needed)
+	{
+	  base = hard_frame_pointer_rtx;
+	  off = - cfa_off;
+	}
+      else
+	{
+	  base = stack_pointer_rtx;
+	  off = current_frame_info.total_size - cfa_off;
+	}
+
+      add_reg_note (insn, REG_CFA_OFFSET,
+		    gen_rtx_SET (VOIDmode,
+				 gen_rtx_MEM (GET_MODE (reg),
+					      plus_constant (Pmode,
+							     base, off)),
+				 frame_reg));
+    }
+}
+
+static void
+do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
+{
+  int iter = spill_fill_data.next_iter;
+  rtx insn;
+
+  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
+				GEN_INT (cfa_off)));
+  spill_fill_data.prev_insn[iter] = insn;
+}
+
+/* Wrapper functions that discards the CONST_INT spill offset.  These
+   exist so that we can give gr_spill/gr_fill the offset they need and
+   use a consistent function interface.  */
+
+static rtx
+gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+{
+  return gen_movdi (dest, src);
+}
+
+static rtx
+gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+{
+  return gen_fr_spill (dest, src);
+}
+
+static rtx
+gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+{
+  return gen_fr_restore (dest, src);
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
+#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  BS_SIZE
+   is the size of the backing store.  ??? This clobbers r2 and r3.  */
+
+static void
+ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
+			     int bs_size)
+{
+  rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
+  rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
+  rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
+
+  /* On the IA-64 there is a second stack in memory, namely the Backing Store
+     of the Register Stack Engine.  We also need to probe it after checking
+     that the 2 stacks don't overlap.  */
+  emit_insn (gen_bsp_value (r3));
+  emit_move_insn (r2, GEN_INT (-(first + size)));
+
+  /* Compare current value of BSP and SP registers.  */
+  emit_insn (gen_rtx_SET (VOIDmode, p6,
+			  gen_rtx_fmt_ee (LTU, BImode,
+					  r3, stack_pointer_rtx)));
+
+  /* Compute the address of the probe for the Backing Store (which grows
+     towards higher addresses).  We probe only at the first offset of
+     the next page because some OS (eg Linux/ia64) only extend the
+     backing store when this specific address is hit (but generate a SEGV
+     on other address).  Page size is the worst case (4KB).  The reserve
+     size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
+     Also compute the address of the last probe for the memory stack
+     (which grows towards lower addresses).  */
+  emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095)));
+  emit_insn (gen_rtx_SET (VOIDmode, r2,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+
+  /* Compare them and raise SEGV if the former has topped the latter.  */
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
+				gen_rtx_SET (VOIDmode, p6,
+					     gen_rtx_fmt_ee (GEU, BImode,
+							     r3, r2))));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
+						const0_rtx),
+			  const0_rtx));
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
+				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
+						 GEN_INT (11))));
+
+  /* Probe the Backing Store if necessary.  */
+  if (bs_size > 0)
+    emit_stack_probe (r3);
+
+  /* Probe the memory stack if necessary.  */
+  if (size == 0)
+    ;
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  else if (size <= PROBE_INTERVAL)
+    emit_stack_probe (r2);
+
+  /* The run-time loop is made up of 8 insns in the generic case while this
+     compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 4 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i;
+
+      emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
+      emit_insn (gen_rtx_SET (VOIDmode, r2,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+      emit_stack_probe (r2);
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+	 it exceeds SIZE.  If only two probes are needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, r2,
+				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
+	  emit_stack_probe (r2);
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode, r2,
+			      plus_constant (Pmode, r2,
+					     (i - PROBE_INTERVAL) - size)));
+      emit_stack_probe (r2);
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+
+      emit_move_insn (r2, GEN_INT (-first));
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_insn (gen_rtx_SET (VOIDmode, r2,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      if (rounded_size > (1 << 21))
+	{
+	  emit_move_insn (r3, GEN_INT (-rounded_size));
+	  emit_insn (gen_rtx_SET (VOIDmode, r3, gen_rtx_PLUS (Pmode, r2, r3)));
+	}
+      else
+        emit_insn (gen_rtx_SET (VOIDmode, r3,
+				gen_rtx_PLUS (Pmode, r2,
+					      GEN_INT (-rounded_size))));
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (gen_probe_stack_range (r2, r2, r3));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      /* TEMP = SIZE - ROUNDED_SIZE.  */
+      if (size != rounded_size)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, r2,
+				  plus_constant (Pmode, r2,
+						 rounded_size - size)));
+	  emit_stack_probe (r2);
+	}
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[3];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg1;
+  xops[1] = reg2;
+  xops[2] = gen_rtx_REG (BImode, PR_REG (6));
+  output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
+  fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  output_asm_insn ("addl %0 = %1, %0", xops);
+  fputs ("\t;;\n", asm_out_file);
+
+  /* Probe at TEST_ADDR and branch.  */
+  output_asm_insn ("probe.w.fault %0, 0", xops);
+  fprintf (asm_out_file, "\tbr ");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in output_function_prologue(), since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.
+
+   The register save area is laid out like so:
+   cfa+16
+	[ varargs spill area ]
+	[ fr register spill area ]
+	[ br register spill area ]
+	[ ar register spill area ]
+	[ pr register spill area ]
+	[ gr register spill area ] */
+
+/* ??? Get inefficient code when the frame size is larger than can fit in an
+   adds instruction.  */
+
+void
+ia64_expand_prologue (void)
+{
+  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
+  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
+  rtx reg, alt_reg;
+
+  ia64_compute_frame_size (get_frame_size ());
+  last_scratch_gr_reg = 15;
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = current_frame_info.total_size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      HOST_WIDE_INT size = current_frame_info.total_size;
+      int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
+					  + current_frame_info.n_local_regs);
+
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					 size - STACK_CHECK_PROTECT,
+					 bs_size);
+	  else if (size + bs_size > STACK_CHECK_PROTECT)
+	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
+	}
+      else if (size + bs_size > 0)
+	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
+    }
+
+  if (dump_file) 
+    {
+      fprintf (dump_file, "ia64 frame related registers "
+               "recorded in current_frame_info.r[]:\n");
+#define PRINTREG(a) if (current_frame_info.r[a]) \
+        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
+      PRINTREG(reg_fp);
+      PRINTREG(reg_save_b0);
+      PRINTREG(reg_save_pr);
+      PRINTREG(reg_save_ar_pfs);
+      PRINTREG(reg_save_ar_unat);
+      PRINTREG(reg_save_ar_lc);
+      PRINTREG(reg_save_gp);
+#undef PRINTREG
+    }
+
+  /* If there is no epilogue, then we don't need some prologue insns.
+     We need to avoid emitting the dead prologue insns, because flow
+     will complain about them.  */
+  if (optimize)
+    {
+      edge e;
+      edge_iterator ei;
+
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+	if ((e->flags & EDGE_FAKE) == 0
+	    && (e->flags & EDGE_FALLTHRU) != 0)
+	  break;
+      epilogue_p = (e != NULL);
+    }
+  else
+    epilogue_p = 1;
+
+  /* Set the local, input, and output register names.  We need to do this
+     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
+     half.  If we use in/loc/out register names, then we get assembler errors
+     in crtn.S because there is no alloc insn or regstk directive in there.  */
+  if (! TARGET_REG_NAMES)
+    {
+      int inputs = current_frame_info.n_input_regs;
+      int locals = current_frame_info.n_local_regs;
+      int outputs = current_frame_info.n_output_regs;
+
+      for (i = 0; i < inputs; i++)
+	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
+      for (i = 0; i < locals; i++)
+	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
+      for (i = 0; i < outputs; i++)
+	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
+    }
+
+  /* Set the frame pointer register name.  The regnum is logically loc79,
+     but of course we'll not have allocated that many locals.  Rather than
+     worrying about renumbering the existing rtxs, we adjust the name.  */
+  /* ??? This code means that we can never use one local register when
+     there is a frame pointer.  loc79 gets wasted in this case, as it is
+     renamed to a register that will never be used.  See also the try_locals
+     code in find_gr_spill.  */
+  if (current_frame_info.r[reg_fp])
+    {
+      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
+      reg_names[HARD_FRAME_POINTER_REGNUM]
+	= reg_names[current_frame_info.r[reg_fp]];
+      reg_names[current_frame_info.r[reg_fp]] = tmp;
+    }
+
+  /* We don't need an alloc instruction if we've used no outputs or locals.  */
+  if (current_frame_info.n_local_regs == 0
+      && current_frame_info.n_output_regs == 0
+      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
+      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
+    {
+      /* If there is no alloc, but there are input registers used, then we
+	 need a .regstk directive.  */
+      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
+      ar_pfs_save_reg = NULL_RTX;
+    }
+  else
+    {
+      current_frame_info.need_regstk = 0;
+
+      if (current_frame_info.r[reg_save_ar_pfs])
+        {
+	  regno = current_frame_info.r[reg_save_ar_pfs];
+	  reg_emitted (reg_save_ar_pfs);
+	}
+      else
+	regno = next_scratch_gr_reg ();
+      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
+
+      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
+				   GEN_INT (current_frame_info.n_input_regs),
+				   GEN_INT (current_frame_info.n_local_regs),
+				   GEN_INT (current_frame_info.n_output_regs),
+				   GEN_INT (current_frame_info.n_rotate_regs)));
+      if (current_frame_info.r[reg_save_ar_pfs])
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode,
+				     ar_pfs_save_reg,
+				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
+	}
+    }
+
+  /* Set up frame pointer, stack pointer, and spill iterators.  */
+
+  n_varargs = cfun->machine->n_varargs;
+  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
+			stack_pointer_rtx, 0);
+
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Force the unwind info to recognize this as defining a new CFA,
+	 rather than some temp register setup.  */
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
+    }
+
+  if (current_frame_info.total_size != 0)
+    {
+      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
+      rtx offset;
+
+      if (satisfies_constraint_I (frame_size_rtx))
+	offset = frame_size_rtx;
+      else
+	{
+	  regno = next_scratch_gr_reg ();
+	  offset = gen_rtx_REG (DImode, regno);
+	  emit_move_insn (offset, frame_size_rtx);
+	}
+
+      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+				    stack_pointer_rtx, offset));
+
+      if (! frame_pointer_needed)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     gen_rtx_PLUS (DImode,
+						   stack_pointer_rtx,
+						   frame_size_rtx)));
+	}
+
+      /* ??? At this point we must generate a magic insn that appears to
+	 modify the stack pointer, the frame pointer, and all spill
+	 iterators.  This would allow the most scheduling freedom.  For
+	 now, just hard stop.  */
+      emit_insn (gen_blockage ());
+    }
+
+  /* Must copy out ar.unat before doing any integer spills.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+    {
+      if (current_frame_info.r[reg_save_ar_unat])
+        {
+	  ar_unat_save_reg
+	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
+	  reg_emitted (reg_save_ar_unat);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
+	  current_frame_info.gr_used_mask |= 1 << alt_regno;
+	}
+
+      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+      insn = emit_move_insn (ar_unat_save_reg, reg);
+      if (current_frame_info.r[reg_save_ar_unat])
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+	}
+
+      /* Even if we're not going to generate an epilogue, we still
+	 need to save the register so that EH works.  */
+      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
+	emit_insn (gen_prologue_use (ar_unat_save_reg));
+    }
+  else
+    ar_unat_save_reg = NULL_RTX;
+
+  /* Spill all varargs registers.  Do this before spilling any GR registers,
+     since we want the UNAT bits for the GR registers to override the UNAT
+     bits from varargs, which we don't care about.  */
+
+  cfa_off = -16;
+  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
+    {
+      reg = gen_rtx_REG (DImode, regno);
+      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
+    }
+
+  /* Locate the bottom of the register save area.  */
+  cfa_off = (current_frame_info.spill_cfa_off
+	     + current_frame_info.spill_size
+	     + current_frame_info.extra_spill_size);
+
+  /* Save the predicate register block either in a register or in memory.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
+    {
+      reg = gen_rtx_REG (DImode, PR_REG (0));
+      if (current_frame_info.r[reg_save_pr] != 0)
+	{
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
+	  reg_emitted (reg_save_pr);
+	  insn = emit_move_insn (alt_reg, reg);
+
+	  /* ??? Denote pr spill/fill by a DImode move that modifies all
+	     64 hard registers.  */
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+	  /* Even if we're not going to generate an epilogue, we still
+	     need to save the register so that EH works.  */
+	  if (! epilogue_p)
+	    emit_insn (gen_prologue_use (alt_reg));
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  insn = emit_move_insn (alt_reg, reg);
+	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	  cfa_off -= 8;
+	}
+    }
+
+  /* Handle AR regs in numerical order.  All of them get special handling.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
+      && current_frame_info.r[reg_save_ar_unat] == 0)
+    {
+      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
+      cfa_off -= 8;
+    }
+
+  /* The alloc insn already copied ar.pfs into a general register.  The
+     only thing we have to do now is copy that register to a stack slot
+     if we'd not allocated a local register for the job.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
+      && current_frame_info.r[reg_save_ar_pfs] == 0)
+    {
+      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
+      cfa_off -= 8;
+    }
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+    {
+      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
+      if (current_frame_info.r[reg_save_ar_lc] != 0)
+	{
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
+	  reg_emitted (reg_save_ar_lc);
+	  insn = emit_move_insn (alt_reg, reg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+	  /* Even if we're not going to generate an epilogue, we still
+	     need to save the register so that EH works.  */
+	  if (! epilogue_p)
+	    emit_insn (gen_prologue_use (alt_reg));
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  emit_move_insn (alt_reg, reg);
+	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	  cfa_off -= 8;
+	}
+    }
+
+  /* Save the return pointer.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      reg = gen_rtx_REG (DImode, BR_REG (0));
+      if (current_frame_info.r[reg_save_b0] != 0)
+	{
+          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+          reg_emitted (reg_save_b0);
+	  insn = emit_move_insn (alt_reg, reg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
+
+	  /* Even if we're not going to generate an epilogue, we still
+	     need to save the register so that EH works.  */
+	  if (! epilogue_p)
+	    emit_insn (gen_prologue_use (alt_reg));
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  emit_move_insn (alt_reg, reg);
+	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	  cfa_off -= 8;
+	}
+    }
+
+  if (current_frame_info.r[reg_save_gp])
+    {
+      reg_emitted (reg_save_gp);
+      insn = emit_move_insn (gen_rtx_REG (DImode,
+					  current_frame_info.r[reg_save_gp]),
+			     pic_offset_table_rtx);
+    }
+
+  /* We should now be at the base of the gr/br/fr spill area.  */
+  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
+			  + current_frame_info.spill_size));
+
+  /* Spill all general registers.  */
+  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	reg = gen_rtx_REG (DImode, regno);
+	do_spill (gen_gr_spill, reg, cfa_off, reg);
+	cfa_off -= 8;
+      }
+
+  /* Spill the rest of the BR registers.  */
+  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	alt_regno = next_scratch_gr_reg ();
+	alt_reg = gen_rtx_REG (DImode, alt_regno);
+	reg = gen_rtx_REG (DImode, regno);
+	emit_move_insn (alt_reg, reg);
+	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	cfa_off -= 8;
+      }
+
+  /* Align the frame and spill all FR registers.  */
+  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+        gcc_assert (!(cfa_off & 15));
+	reg = gen_rtx_REG (XFmode, regno);
+	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
+	cfa_off -= 16;
+      }
+
+  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
+
+  finish_spill_pointers ();
+}
+
+/* Output the textual info surrounding the prologue.  */
+
+void
+ia64_start_function (FILE *file, const char *fnname,
+		     tree decl ATTRIBUTE_UNUSED)
+{
+#if TARGET_ABI_OPEN_VMS
+  vms_start_function (fnname);
+#endif
+
+  fputs ("\t.proc ", file);
+  assemble_name (file, fnname);
+  fputc ('\n', file);
+  ASM_OUTPUT_LABEL (file, fnname);
+}
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using an epilogue insn is favored compared to putting all of the
+   instructions in output_function_prologue(), since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.  */
+
+void
+ia64_expand_epilogue (int sibcall_p)
+{
+  rtx insn, reg, alt_reg, ar_unat_save_reg;
+  int regno, alt_regno, cfa_off;
+
+  ia64_compute_frame_size (get_frame_size ());
+
+  /* If there is a frame pointer, then we use it instead of the stack
+     pointer, so that the stack pointer does not need to be valid when
+     the epilogue starts.  See EXIT_IGNORE_STACK.  */
+  if (frame_pointer_needed)
+    setup_spill_pointers (current_frame_info.n_spilled,
+			  hard_frame_pointer_rtx, 0);
+  else
+    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
+			  current_frame_info.total_size);
+
+  if (current_frame_info.total_size != 0)
+    {
+      /* ??? At this point we must generate a magic insn that appears to
+         modify the spill iterators and the frame pointer.  This would
+	 allow the most scheduling freedom.  For now, just hard stop.  */
+      emit_insn (gen_blockage ());
+    }
+
+  /* Locate the bottom of the register save area.  */
+  cfa_off = (current_frame_info.spill_cfa_off
+	     + current_frame_info.spill_size
+	     + current_frame_info.extra_spill_size);
+
+  /* Restore the predicate registers.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
+    {
+      if (current_frame_info.r[reg_save_pr] != 0)
+        {
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
+	  reg_emitted (reg_save_pr);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+      reg = gen_rtx_REG (DImode, PR_REG (0));
+      emit_move_insn (reg, alt_reg);
+    }
+
+  /* Restore the application registers.  */
+
+  /* Load the saved unat from the stack, but do not restore it until
+     after the GRs have been restored.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+    {
+      if (current_frame_info.r[reg_save_ar_unat] != 0)
+        {
+          ar_unat_save_reg
+	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
+	  reg_emitted (reg_save_ar_unat);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
+	  current_frame_info.gr_used_mask |= 1 << alt_regno;
+	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+    }
+  else
+    ar_unat_save_reg = NULL_RTX;
+
+  if (current_frame_info.r[reg_save_ar_pfs] != 0)
+    {
+      reg_emitted (reg_save_ar_pfs);
+      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
+      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+      emit_move_insn (reg, alt_reg);
+    }
+  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
+    {
+      alt_regno = next_scratch_gr_reg ();
+      alt_reg = gen_rtx_REG (DImode, alt_regno);
+      do_restore (gen_movdi_x, alt_reg, cfa_off);
+      cfa_off -= 8;
+      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+      emit_move_insn (reg, alt_reg);
+    }
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+    {
+      if (current_frame_info.r[reg_save_ar_lc] != 0)
+        {
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
+          reg_emitted (reg_save_ar_lc);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
+      emit_move_insn (reg, alt_reg);
+    }
+
+  /* Restore the return pointer.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      if (current_frame_info.r[reg_save_b0] != 0)
+        {
+         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+         reg_emitted (reg_save_b0);
+        }
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+      reg = gen_rtx_REG (DImode, BR_REG (0));
+      emit_move_insn (reg, alt_reg);
+    }
+
+  /* We should now be at the base of the gr/br/fr spill area.  */
+  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
+			  + current_frame_info.spill_size));
+
+  /* The GP may be stored on the stack in the prologue, but it's
+     never restored in the epilogue.  Skip the stack slot.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
+    cfa_off -= 8;
+
+  /* Restore all general registers.  */
+  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	reg = gen_rtx_REG (DImode, regno);
+	do_restore (gen_gr_restore, reg, cfa_off);
+	cfa_off -= 8;
+      }
+
+  /* Restore the branch registers.  */
+  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	alt_regno = next_scratch_gr_reg ();
+	alt_reg = gen_rtx_REG (DImode, alt_regno);
+	do_restore (gen_movdi_x, alt_reg, cfa_off);
+	cfa_off -= 8;
+	reg = gen_rtx_REG (DImode, regno);
+	emit_move_insn (reg, alt_reg);
+      }
+
+  /* Restore floating point registers.  */
+  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+        gcc_assert (!(cfa_off & 15));
+	reg = gen_rtx_REG (XFmode, regno);
+	do_restore (gen_fr_restore_x, reg, cfa_off);
+	cfa_off -= 16;
+      }
+
+  /* Restore ar.unat for real.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+    {
+      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+      emit_move_insn (reg, ar_unat_save_reg);
+    }
+
+  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
+
+  finish_spill_pointers ();
+
+  if (current_frame_info.total_size
+      || cfun->machine->ia64_eh_epilogue_sp
+      || frame_pointer_needed)
+    {
+      /* ??? At this point we must generate a magic insn that appears to
+         modify the spill iterators, the stack pointer, and the frame
+	 pointer.  This would allow the most scheduling freedom.  For now,
+	 just hard stop.  */
+      emit_insn (gen_blockage ());
+    }
+
+  if (cfun->machine->ia64_eh_epilogue_sp)
+    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
+  else if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
+    }
+  else if (current_frame_info.total_size)
+    {
+      rtx offset, frame_size_rtx;
+
+      frame_size_rtx = GEN_INT (current_frame_info.total_size);
+      if (satisfies_constraint_I (frame_size_rtx))
+	offset = frame_size_rtx;
+      else
+	{
+	  regno = next_scratch_gr_reg ();
+	  offset = gen_rtx_REG (DImode, regno);
+	  emit_move_insn (offset, frame_size_rtx);
+	}
+
+      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    offset));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		    gen_rtx_SET (VOIDmode,
+				 stack_pointer_rtx,
+				 gen_rtx_PLUS (DImode,
+					       stack_pointer_rtx,
+					       frame_size_rtx)));
+    }
+
+  if (cfun->machine->ia64_eh_epilogue_bsp)
+    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
+
+  if (! sibcall_p)
+    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
+  else
+    {
+      int fp = GR_REG (2);
+      /* We need a throw away register here, r0 and r1 are reserved,
+	 so r2 is the first available call clobbered register.  If
+	 there was a frame_pointer register, we may have swapped the
+	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
+	 sure we're using the string "r2" when emitting the register
+	 name for the assembler.  */
+      if (current_frame_info.r[reg_fp] 
+          && current_frame_info.r[reg_fp] == GR_REG (2))
+	fp = HARD_FRAME_POINTER_REGNUM;
+
+      /* We must emit an alloc to force the input registers to become output
+	 registers.  Otherwise, if the callee tries to pass its parameters
+	 through to another call without an intervening alloc, then these
+	 values get lost.  */
+      /* ??? We don't need to preserve all input registers.  We only need to
+	 preserve those input registers used as arguments to the sibling call.
+	 It is unclear how to compute that number here.  */
+      if (current_frame_info.n_input_regs != 0)
+	{
+	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
+
+	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
+				const0_rtx, const0_rtx,
+				n_inputs, const0_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  /* ??? We need to mark the alloc as frame-related so that it gets
+	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
+	     But there's nothing dwarf2 related to be done wrt the register
+	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
+	     the empty parallel means dwarf2out will not see anything.  */
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
+	}
+    }
+}
+
+/* Return 1 if br.ret can do all the work required to return from a
+   function.  */
+
+int
+ia64_direct_return (void)
+{
+  if (reload_completed && ! frame_pointer_needed)
+    {
+      ia64_compute_frame_size (get_frame_size ());
+
+      return (current_frame_info.total_size == 0
+	      && current_frame_info.n_spilled == 0
+	      && current_frame_info.r[reg_save_b0] == 0
+	      && current_frame_info.r[reg_save_pr] == 0
+	      && current_frame_info.r[reg_save_ar_pfs] == 0
+	      && current_frame_info.r[reg_save_ar_unat] == 0
+	      && current_frame_info.r[reg_save_ar_lc] == 0);
+    }
+  return 0;
+}
+
+/* Return the magic cookie that we use to hold the return address
+   during early compilation.  */
+
+rtx
+ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL;
+  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
+}
+
+/* Split this value after reload, now that we know where the return
+   address is saved.  */
+
+void
+ia64_split_return_addr_rtx (rtx dest)
+{
+  rtx src;
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      if (current_frame_info.r[reg_save_b0] != 0)
+        {
+	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+	  reg_emitted (reg_save_b0);
+	}
+      else
+	{
+	  HOST_WIDE_INT off;
+	  unsigned int regno;
+	  rtx off_r;
+
+	  /* Compute offset from CFA for BR0.  */
+	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
+	  off = (current_frame_info.spill_cfa_off
+		 + current_frame_info.spill_size);
+	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	      off -= 8;
+
+	  /* Convert CFA offset to a register based offset.  */
+	  if (frame_pointer_needed)
+	    src = hard_frame_pointer_rtx;
+	  else
+	    {
+	      src = stack_pointer_rtx;
+	      off += current_frame_info.total_size;
+	    }
+
+	  /* Load address into scratch register.  */
+	  off_r = GEN_INT (off);
+	  if (satisfies_constraint_I (off_r))
+	    emit_insn (gen_adddi3 (dest, src, off_r));
+	  else
+	    {
+	      emit_move_insn (dest, off_r);
+	      emit_insn (gen_adddi3 (dest, src, dest));
+	    }
+
+	  src = gen_rtx_MEM (Pmode, dest);
+	}
+    }
+  else
+    src = gen_rtx_REG (DImode, BR_REG (0));
+
+  emit_move_insn (dest, src);
+}
+
+int
+ia64_hard_regno_rename_ok (int from, int to)
+{
+  /* Don't clobber any of the registers we reserved for the prologue.  */
+  unsigned int r;
+
+  for (r = reg_fp; r <= reg_save_ar_lc; r++)
+    if (to == current_frame_info.r[r] 
+        || from == current_frame_info.r[r]
+        || to == emitted_frame_related_regs[r]
+        || from == emitted_frame_related_regs[r])
+      return 0;
+
+  /* Don't use output registers outside the register frame.  */
+  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
+    return 0;
+
+  /* Retain even/oddness on predicate register pairs.  */
+  if (PR_REGNO_P (from) && PR_REGNO_P (to))
+    return (from & 1) == (to & 1);
+
+  return 1;
+}
+
+/* Target hook for assembling integer objects.  Handle word-sized
+   aligned objects and detect the cases when @fptr is needed.  */
+
+static bool
+ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == POINTER_SIZE / BITS_PER_UNIT
+      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
+      && GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_FUNCTION_P (x))
+    {
+      static const char * const directive[2][2] = {
+	  /* 64-bit pointer */  /* 32-bit pointer */
+	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
+	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
+      };
+      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")\n", asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Emit the function prologue.  */
+
+static void
+ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  int mask, grsave, grsave_prev;
+
+  if (current_frame_info.need_regstk)
+    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
+	     current_frame_info.n_input_regs,
+	     current_frame_info.n_local_regs,
+	     current_frame_info.n_output_regs,
+	     current_frame_info.n_rotate_regs);
+
+  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  /* Emit the .prologue directive.  */
+
+  mask = 0;
+  grsave = grsave_prev = 0;
+  if (current_frame_info.r[reg_save_b0] != 0)
+    {
+      mask |= 8;
+      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
+    }
+  if (current_frame_info.r[reg_save_ar_pfs] != 0
+      && (grsave_prev == 0
+	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
+    {
+      mask |= 4;
+      if (grsave_prev == 0)
+	grsave = current_frame_info.r[reg_save_ar_pfs];
+      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
+    }
+  if (current_frame_info.r[reg_fp] != 0
+      && (grsave_prev == 0
+	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
+    {
+      mask |= 2;
+      if (grsave_prev == 0)
+	grsave = HARD_FRAME_POINTER_REGNUM;
+      grsave_prev = current_frame_info.r[reg_fp];
+    }
+  if (current_frame_info.r[reg_save_pr] != 0
+      && (grsave_prev == 0
+	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
+    {
+      mask |= 1;
+      if (grsave_prev == 0)
+	grsave = current_frame_info.r[reg_save_pr];
+    }
+
+  if (mask && TARGET_GNU_AS)
+    fprintf (file, "\t.prologue %d, %d\n", mask,
+	     ia64_dbx_register_number (grsave));
+  else
+    fputs ("\t.prologue\n", file);
+
+  /* Emit a .spill directive, if necessary, to relocate the base of
+     the register spill area.  */
+  if (current_frame_info.spill_cfa_off != -16)
+    fprintf (file, "\t.spill %ld\n",
+	     (long) (current_frame_info.spill_cfa_off
+		     + current_frame_info.spill_size));
+}
+
+/* Emit the .body directive at the scheduled end of the prologue.  */
+
+static void
+ia64_output_function_end_prologue (FILE *file)
+{
+  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  fputs ("\t.body\n", file);
+}
+
+/* Emit the function epilogue.  */
+
+static void
+ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  int i;
+
+  if (current_frame_info.r[reg_fp])
+    {
+      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
+      reg_names[HARD_FRAME_POINTER_REGNUM]
+	= reg_names[current_frame_info.r[reg_fp]];
+      reg_names[current_frame_info.r[reg_fp]] = tmp;
+      reg_emitted (reg_fp);
+    }
+  if (! TARGET_REG_NAMES)
+    {
+      for (i = 0; i < current_frame_info.n_input_regs; i++)
+	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
+      for (i = 0; i < current_frame_info.n_local_regs; i++)
+	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
+      for (i = 0; i < current_frame_info.n_output_regs; i++)
+	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
+    }
+
+  current_frame_info.initialized = 0;
+}
+
+int
+ia64_dbx_register_number (int regno)
+{
+  /* In ia64_expand_prologue we quite literally renamed the frame pointer
+     from its home at loc79 to something inside the register frame.  We
+     must perform the same renumbering here for the debug info.  */
+  if (current_frame_info.r[reg_fp])
+    {
+      if (regno == HARD_FRAME_POINTER_REGNUM)
+	regno = current_frame_info.r[reg_fp];
+      else if (regno == current_frame_info.r[reg_fp])
+	regno = HARD_FRAME_POINTER_REGNUM;
+    }
+
+  if (IN_REGNO_P (regno))
+    return 32 + regno - IN_REG (0);
+  else if (LOC_REGNO_P (regno))
+    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
+  else if (OUT_REGNO_P (regno))
+    return (32 + current_frame_info.n_input_regs
+	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
+  else
+    return regno;
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.
+
+   The trampoline should set the static chain pointer to value placed
+   into the trampoline and should branch to the specified routine.
+   To make the normal indirect-subroutine calling convention work,
+   the trampoline must look like a function descriptor; the first
+   word being the target address and the second being the target's
+   global pointer.
+
+   We abuse the concept of a global pointer by arranging for it
+   to point to the data we need to load.  The complete trampoline
+   has the following form:
+
+		+-------------------+ \
+	TRAMP:	| __ia64_trampoline | |
+		+-------------------+  > fake function descriptor
+		| TRAMP+16          | |
+		+-------------------+ /
+		| target descriptor |
+		+-------------------+
+		| static link	    |
+		+-------------------+
+*/
+
+static void
+ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
+
+  /* The Intel assembler requires that the global __ia64_trampoline symbol
+     be declared explicitly */
+  if (!TARGET_GNU_AS)
+    {
+      static bool declared_ia64_trampoline = false;
+
+      if (!declared_ia64_trampoline)
+	{
+	  declared_ia64_trampoline = true;
+	  (*targetm.asm_out.globalize_label) (asm_out_file,
+					      "__ia64_trampoline");
+	}
+    }
+
+  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
+  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
+  fnaddr = convert_memory_address (Pmode, fnaddr);
+  static_chain = convert_memory_address (Pmode, static_chain);
+
+  /* Load up our iterator.  */
+  addr_reg = copy_to_reg (addr);
+  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
+
+  /* The first two words are the fake descriptor:
+     __ia64_trampoline, ADDR+16.  */
+  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
+	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
+	 relocation against function symbols to make it identical to the
+	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
+	 strict ELF and dereference to get the bare code address.  */
+      rtx reg = gen_reg_rtx (Pmode);
+      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
+      emit_move_insn (reg, tramp);
+      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
+      tramp = reg;
+   }
+  emit_move_insn (m_tramp, tramp);
+  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+
+  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
+  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+
+  /* The third word is the target descriptor.  */
+  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
+  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+
+  /* The fourth word is the static chain.  */
+  emit_move_insn (m_tramp, static_chain);
+}
+
+/* Do any needed setup for a variadic function.  CUM has not been updated
+   for the last named argument which has type TYPE and mode MODE.
+
+   We generate the actual spill instructions during prologue generation.  */
+
+static void
+ia64_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+			     tree type, int * pretend_size,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
+
+  /* Skip the current argument.  */
+  ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
+
+  if (next_cum.words < MAX_ARGUMENT_SLOTS)
+    {
+      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
+      *pretend_size = n * UNITS_PER_WORD;
+      cfun->machine->n_varargs = n;
+    }
+}
+
+/* Check whether TYPE is a homogeneous floating point aggregate.  If
+   it is, return the mode of the floating point type that appears
+   in all leafs.  If it is not, return VOIDmode.
+
+   An aggregate is a homogeneous floating point aggregate is if all
+   fields/elements in it have the same floating point type (e.g,
+   SFmode).  128-bit quad-precision floats are excluded.
+
+   Variable sized aggregates should never arrive here, since we should
+   have already decided to pass them by reference.  Top-level zero-sized
+   aggregates are excluded because our parallels crash the middle-end.  */
+
+static enum machine_mode
+hfa_element_mode (const_tree type, bool nested)
+{
+  enum machine_mode element_mode = VOIDmode;
+  enum machine_mode mode;
+  enum tree_code code = TREE_CODE (type);
+  int know_element_mode = 0;
+  tree t;
+
+  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
+    return VOIDmode;
+
+  switch (code)
+    {
+    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
+    case BOOLEAN_TYPE:	case POINTER_TYPE:
+    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
+    case LANG_TYPE:		case FUNCTION_TYPE:
+      return VOIDmode;
+
+      /* Fortran complex types are supposed to be HFAs, so we need to handle
+	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
+	 types though.  */
+    case COMPLEX_TYPE:
+      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
+	  && TYPE_MODE (type) != TCmode)
+	return GET_MODE_INNER (TYPE_MODE (type));
+      else
+	return VOIDmode;
+
+    case REAL_TYPE:
+      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
+	 mode if this is contained within an aggregate.  */
+      if (nested && TYPE_MODE (type) != TFmode)
+	return TYPE_MODE (type);
+      else
+	return VOIDmode;
+
+    case ARRAY_TYPE:
+      return hfa_element_mode (TREE_TYPE (type), 1);
+
+    case RECORD_TYPE:
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
+	{
+	  if (TREE_CODE (t) != FIELD_DECL)
+	    continue;
+
+	  mode = hfa_element_mode (TREE_TYPE (t), 1);
+	  if (know_element_mode)
+	    {
+	      if (mode != element_mode)
+		return VOIDmode;
+	    }
+	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
+	    return VOIDmode;
+	  else
+	    {
+	      know_element_mode = 1;
+	      element_mode = mode;
+	    }
+	}
+      return element_mode;
+
+    default:
+      /* If we reach here, we probably have some front-end specific type
+	 that the backend doesn't know about.  This can happen via the
+	 aggregate_value_p call in init_function_start.  All we can do is
+	 ignore unknown tree types.  */
+      return VOIDmode;
+    }
+
+  return VOIDmode;
+}
+
+/* Return the number of words required to hold a quantity of TYPE and MODE
+   when passed as an argument.  */
+static int
+ia64_function_arg_words (const_tree type, enum machine_mode mode)
+{
+  int words;
+
+  if (mode == BLKmode)
+    words = int_size_in_bytes (type);
+  else
+    words = GET_MODE_SIZE (mode);
+
+  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
+}
+
+/* Return the number of registers that should be skipped so the current
+   argument (described by TYPE and WORDS) will be properly aligned.
+
+   Integer and float arguments larger than 8 bytes start at the next
+   even boundary.  Aggregates larger than 8 bytes start at the next
+   even boundary if the aggregate has 16 byte alignment.  Note that
+   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
+   but are still to be aligned in registers.
+
+   ??? The ABI does not specify how to handle aggregates with
+   alignment from 9 to 15 bytes, or greater than 16.  We handle them
+   all as if they had 16 byte alignment.  Such aggregates can occur
+   only if gcc extensions are used.  */
+static int
+ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
+			  const_tree type, int words)
+{
+  /* No registers are skipped on VMS.  */
+  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
+    return 0;
+
+  if (type
+      && TREE_CODE (type) != INTEGER_TYPE
+      && TREE_CODE (type) != REAL_TYPE)
+    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
+  else
+    return words > 1;
+}
+
+/* Return rtx for register where argument is passed, or zero if it is passed
+   on the stack.  */
+/* ??? 128-bit quad-precision floats are always passed in general
+   registers.  */
+
+static rtx
+ia64_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
+		     const_tree type, bool named, bool incoming)
+{
+  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
+  enum machine_mode hfa_mode = VOIDmode;
+
+  /* For OPEN VMS, emit the instruction setting up the argument register here,
+     when we know this will be together with the other arguments setup related
+     insns.  This is not the conceptually best place to do this, but this is
+     the easiest as we have convenient access to cumulative args info.  */
+
+  if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
+      && named == 1)
+    {
+      unsigned HOST_WIDE_INT regval = cum->words;
+      int i;
+
+      for (i = 0; i < 8; i++)
+	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
+
+      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
+		      GEN_INT (regval));
+    }
+
+  /* If all argument slots are used, then it must go on the stack.  */
+  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  /* On OpenVMS argument is either in Rn or Fn.  */
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      if (FLOAT_MODE_P (mode))
+	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
+      else
+	return gen_rtx_REG (mode, basereg + cum->words);
+    }
+
+  /* Check for and handle homogeneous FP aggregates.  */
+  if (type)
+    hfa_mode = hfa_element_mode (type, 0);
+
+  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+     and unprototyped hfas are passed specially.  */
+  if (hfa_mode != VOIDmode && (! cum->prototype || named))
+    {
+      rtx loc[16];
+      int i = 0;
+      int fp_regs = cum->fp_regs;
+      int int_regs = cum->words + offset;
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+      int byte_size;
+      int args_byte_size;
+
+      /* If prototyped, pass it in FR regs then GR regs.
+	 If not prototyped, pass it in both FR and GR regs.
+
+	 If this is an SFmode aggregate, then it is possible to run out of
+	 FR regs while GR regs are still left.  In that case, we pass the
+	 remaining part in the GR regs.  */
+
+      /* Fill the FP regs.  We do this always.  We stop if we reach the end
+	 of the argument, the last FP register, or the last argument slot.  */
+
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      args_byte_size = int_regs * UNITS_PER_WORD;
+      offset = 0;
+      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
+	{
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
+							      + fp_regs)),
+				      GEN_INT (offset));
+	  offset += hfa_size;
+	  args_byte_size += hfa_size;
+	  fp_regs++;
+	}
+
+      /* If no prototype, then the whole thing must go in GR regs.  */
+      if (! cum->prototype)
+	offset = 0;
+      /* If this is an SFmode aggregate, then we might have some left over
+	 that needs to go in GR regs.  */
+      else if (byte_size != offset)
+	int_regs += offset / UNITS_PER_WORD;
+
+      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
+
+      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
+	{
+	  enum machine_mode gr_mode = DImode;
+	  unsigned int gr_size;
+
+	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
+	     then this goes in a GR reg left adjusted/little endian, right
+	     adjusted/big endian.  */
+	  /* ??? Currently this is handled wrong, because 4-byte hunks are
+	     always right adjusted/little endian.  */
+	  if (offset & 0x4)
+	    gr_mode = SImode;
+	  /* If we have an even 4 byte hunk because the aggregate is a
+	     multiple of 4 bytes in size, then this goes in a GR reg right
+	     adjusted/little endian.  */
+	  else if (byte_size - offset == 4)
+	    gr_mode = SImode;
+
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (gr_mode, (basereg
+							     + int_regs)),
+				      GEN_INT (offset));
+
+	  gr_size = GET_MODE_SIZE (gr_mode);
+	  offset += gr_size;
+	  if (gr_size == UNITS_PER_WORD
+	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
+	    int_regs++;
+	  else if (gr_size > UNITS_PER_WORD)
+	    int_regs += gr_size / UNITS_PER_WORD;
+	}
+      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+    }
+  
+  /* Integral and aggregates go in general registers.  If we have run out of
+     FR registers, then FP values must also go in general registers.  This can
+     happen when we have a SFmode HFA.  */
+  else if (mode == TFmode || mode == TCmode
+	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
+    {
+      int byte_size = ((mode == BLKmode)
+                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      if (BYTES_BIG_ENDIAN
+	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
+	&& byte_size < UNITS_PER_WORD
+	&& byte_size > 0)
+	{
+	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode,
+						       (basereg + cum->words
+							+ offset)),
+					  const0_rtx);
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
+	}
+      else
+	return gen_rtx_REG (mode, basereg + cum->words + offset);
+
+    }
+
+  /* If there is a prototype, then FP values go in a FR register when
+     named, and in a GR register when unnamed.  */
+  else if (cum->prototype)
+    {
+      if (named)
+	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
+      /* In big-endian mode, an anonymous SFmode value must be represented
+         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
+	 the value into the high half of the general register.  */
+      else if (BYTES_BIG_ENDIAN && mode == SFmode)
+	return gen_rtx_PARALLEL (mode,
+		 gen_rtvec (1,
+                   gen_rtx_EXPR_LIST (VOIDmode,
+		     gen_rtx_REG (DImode, basereg + cum->words + offset),
+				      const0_rtx)));
+      else
+	return gen_rtx_REG (mode, basereg + cum->words + offset);
+    }
+  /* If there is no prototype, then FP values go in both FR and GR
+     registers.  */
+  else
+    {
+      /* See comment above.  */
+      enum machine_mode inner_mode =
+	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
+
+      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (mode, (FR_ARG_FIRST
+							  + cum->fp_regs)),
+				      const0_rtx);
+      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (inner_mode,
+						   (basereg + cum->words
+						    + offset)),
+				      const0_rtx);
+
+      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
+    }
+}
+
+/* Implement TARGET_FUNCION_ARG target hook.  */
+
+static rtx
+ia64_function_arg (cumulative_args_t cum, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  return ia64_function_arg_1 (cum, mode, type, named, false);
+}
+
+/* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
+
+static rtx
+ia64_function_incoming_arg (cumulative_args_t cum,
+			    enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  return ia64_function_arg_1 (cum, mode, type, named, true);
+}
+
+/* Return number of bytes, at the beginning of the argument, that must be
+   put in registers.  0 is the argument is entirely in registers or entirely
+   in memory.  */
+
+static int
+ia64_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
+
+  /* If all argument slots are used, then it must go on the stack.  */
+  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  /* It doesn't matter whether the argument goes in FR or GR regs.  If
+     it fits within the 8 argument slots, then it goes entirely in
+     registers.  If it extends past the last argument slot, then the rest
+     goes on the stack.  */
+
+  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
+}
+
+/* Return ivms_arg_type based on machine_mode.  */
+
+static enum ivms_arg_type
+ia64_arg_type (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return FS;
+    case DFmode:
+      return FT;
+    default:
+      return I64;
+    }
+}
+
+/* Update CUM to point after this argument.  This is patterned after
+   ia64_function_arg.  */
+
+static void
+ia64_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
+  enum machine_mode hfa_mode = VOIDmode;
+
+  /* If all arg slots are already full, then there is nothing to do.  */
+  if (cum->words >= MAX_ARGUMENT_SLOTS)
+    {
+      cum->words += words + offset;
+      return;
+    }
+
+  cum->atypes[cum->words] = ia64_arg_type (mode);
+  cum->words += words + offset;
+
+  /* On OpenVMS argument is either in Rn or Fn.  */
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      cum->int_regs = cum->words;
+      cum->fp_regs = cum->words;
+      return;
+    }
+
+  /* Check for and handle homogeneous FP aggregates.  */
+  if (type)
+    hfa_mode = hfa_element_mode (type, 0);
+
+  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+     and unprototyped hfas are passed specially.  */
+  if (hfa_mode != VOIDmode && (! cum->prototype || named))
+    {
+      int fp_regs = cum->fp_regs;
+      /* This is the original value of cum->words + offset.  */
+      int int_regs = cum->words - words;
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+      int byte_size;
+      int args_byte_size;
+
+      /* If prototyped, pass it in FR regs then GR regs.
+	 If not prototyped, pass it in both FR and GR regs.
+
+	 If this is an SFmode aggregate, then it is possible to run out of
+	 FR regs while GR regs are still left.  In that case, we pass the
+	 remaining part in the GR regs.  */
+
+      /* Fill the FP regs.  We do this always.  We stop if we reach the end
+	 of the argument, the last FP register, or the last argument slot.  */
+
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      args_byte_size = int_regs * UNITS_PER_WORD;
+      offset = 0;
+      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
+	{
+	  offset += hfa_size;
+	  args_byte_size += hfa_size;
+	  fp_regs++;
+	}
+
+      cum->fp_regs = fp_regs;
+    }
+
+  /* Integral and aggregates go in general registers.  So do TFmode FP values.
+     If we have run out of FR registers, then other FP values must also go in
+     general registers.  This can happen when we have a SFmode HFA.  */
+  else if (mode == TFmode || mode == TCmode
+           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
+    cum->int_regs = cum->words;
+
+  /* If there is a prototype, then FP values go in a FR register when
+     named, and in a GR register when unnamed.  */
+  else if (cum->prototype)
+    {
+      if (! named)
+	cum->int_regs = cum->words;
+      else
+	/* ??? Complex types should not reach here.  */
+	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+    }
+  /* If there is no prototype, then FP values go in both FR and GR
+     registers.  */
+  else
+    {
+      /* ??? Complex types should not reach here.  */
+      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+      cum->int_regs = cum->words;
+    }
+}
+
+/* Arguments with alignment larger than 8 bytes start at the next even
+   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
+   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
+
+static unsigned int
+ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
+    return PARM_BOUNDARY * 2;
+
+  if (type)
+    {
+      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
+        return PARM_BOUNDARY * 2;
+      else
+        return PARM_BOUNDARY;
+    }
+
+  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
+    return PARM_BOUNDARY * 2;
+  else
+    return PARM_BOUNDARY;
+}
+
+/* True if it is OK to do sibling call optimization for the specified
+   call expression EXP.  DECL will be the called function, or NULL if
+   this is an indirect call.  */
+static bool
+ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* We can't perform a sibcall if the current function has the syscall_linkage
+     attribute.  */
+  if (lookup_attribute ("syscall_linkage",
+			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    return false;
+
+  /* We must always return with our current GP.  This means we can
+     only sibcall to functions defined in the current module unless
+     TARGET_CONST_GP is set to true.  */
+  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
+}
+
+
+/* Implement va_arg.  */
+
+static tree
+ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		      gimple_seq *post_p)
+{
+  /* Variable sized types are passed by reference.  */
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      tree ptrtype = build_pointer_type (type);
+      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
+      return build_va_arg_indirect_ref (addr);
+    }
+
+  /* Aggregate arguments with alignment larger than 8 bytes start at
+     the next even boundary.  Integer and floating point arguments
+     do so if they are larger than 8 bytes, whether or not they are
+     also aligned larger than 8 bytes.  */
+  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
+      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
+    {
+      tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
+      gimplify_assign (unshare_expr (valist), t, pre_p);
+    }
+
+  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+}
+
+/* Return 1 if function return value returned in memory.  Return 0 if it is
+   in a register.  */
+
+static bool
+ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  enum machine_mode hfa_mode;
+  HOST_WIDE_INT byte_size;
+
+  mode = TYPE_MODE (valtype);
+  byte_size = GET_MODE_SIZE (mode);
+  if (mode == BLKmode)
+    {
+      byte_size = int_size_in_bytes (valtype);
+      if (byte_size < 0)
+	return true;
+    }
+
+  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
+
+  hfa_mode = hfa_element_mode (valtype, 0);
+  if (hfa_mode != VOIDmode)
+    {
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+
+      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
+	return true;
+      else
+	return false;
+    }
+  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
+    return true;
+  else
+    return false;
+}
+
+/* Return rtx for register that holds the function return value.  */
+
+static rtx
+ia64_function_value (const_tree valtype,
+		     const_tree fn_decl_or_type,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  enum machine_mode hfa_mode;
+  int unsignedp;
+  const_tree func = fn_decl_or_type;
+
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    func = NULL;
+  
+  mode = TYPE_MODE (valtype);
+  hfa_mode = hfa_element_mode (valtype, 0);
+
+  if (hfa_mode != VOIDmode)
+    {
+      rtx loc[8];
+      int i;
+      int hfa_size;
+      int byte_size;
+      int offset;
+
+      hfa_size = GET_MODE_SIZE (hfa_mode);
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
+      offset = 0;
+      for (i = 0; offset < byte_size; i++)
+	{
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
+				      GEN_INT (offset));
+	  offset += hfa_size;
+	}
+      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+    }
+  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
+    return gen_rtx_REG (mode, FR_ARG_FIRST);
+  else
+    {
+      bool need_parallel = false;
+
+      /* In big-endian mode, we need to manage the layout of aggregates
+	 in the registers so that we get the bits properly aligned in
+	 the highpart of the registers.  */
+      if (BYTES_BIG_ENDIAN
+	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
+	need_parallel = true;
+
+      /* Something like struct S { long double x; char a[0] } is not an
+	 HFA structure, and therefore doesn't go in fp registers.  But
+	 the middle-end will give it XFmode anyway, and XFmode values
+	 don't normally fit in integer registers.  So we need to smuggle
+	 the value inside a parallel.  */
+      else if (mode == XFmode || mode == XCmode || mode == RFmode)
+	need_parallel = true;
+
+      if (need_parallel)
+	{
+	  rtx loc[8];
+	  int offset;
+	  int bytesize;
+	  int i;
+
+	  offset = 0;
+	  bytesize = int_size_in_bytes (valtype);
+	  /* An empty PARALLEL is invalid here, but the return value
+	     doesn't matter for empty structs.  */
+	  if (bytesize == 0)
+	    return gen_rtx_REG (mode, GR_RET_FIRST);
+	  for (i = 0; offset < bytesize; i++)
+	    {
+	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode,
+						       GR_RET_FIRST + i),
+					  GEN_INT (offset));
+	      offset += UNITS_PER_WORD;
+	    }
+	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+	}
+
+      mode = promote_function_mode (valtype, mode, &unsignedp,
+                                    func ? TREE_TYPE (func) : NULL_TREE,
+                                    true);
+
+      return gen_rtx_REG (mode, GR_RET_FIRST);
+    }
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+ia64_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode,
+		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
+			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+			&& (mode) != TFmode)
+		       ? FR_RET_FIRST : GR_RET_FIRST));
+}
+
+/* Worker function for FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+ia64_function_value_regno_p (const unsigned int regno)
+{
+  return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
+          || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4 || size == 8);
+  if (size == 4)
+    fputs ("\tdata4.ua\t@dtprel(", file);
+  else
+    fputs ("\tdata8.ua\t@dtprel(", file);
+  output_addr_const (file, x);
+  fputs (")", file);
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
+   also call this from ia64_print_operand for memory addresses.  */
+
+static void
+ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
+			    rtx address ATTRIBUTE_UNUSED)
+{
+}
+
+/* Print an operand to an assembler instruction.
+   C	Swap and print a comparison operator.
+   D	Print an FP comparison operator.
+   E    Print 32 - constant, for SImode shifts as extract.
+   e    Print 64 - constant, for DImode rotates.
+   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
+        a floating point register emitted normally.
+   G	A floating point constant.
+   I	Invert a predicate register by adding 1.
+   J    Select the proper predicate register for a condition.
+   j    Select the inverse predicate register for a condition.
+   O	Append .acq for volatile load.
+   P	Postincrement of a MEM.
+   Q	Append .rel for volatile store.
+   R	Print .s .d or nothing for a single, double or no truncation.
+   S	Shift amount for shladd instruction.
+   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
+	for Intel assembler.
+   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
+	for Intel assembler.
+   X	A pair of floating point registers.
+   r	Print register name, or constant 0 as r0.  HP compatibility for
+	Linux kernel.
+   v    Print vector constant value as an 8-byte integer value.  */
+
+static void
+ia64_print_operand (FILE * file, rtx x, int code)
+{
+  const char *str;
+
+  switch (code)
+    {
+    case 0:
+      /* Handled below.  */
+      break;
+
+    case 'C':
+      {
+	enum rtx_code c = swap_condition (GET_CODE (x));
+	fputs (GET_RTX_NAME (c), file);
+	return;
+      }
+
+    case 'D':
+      switch (GET_CODE (x))
+	{
+	case NE:
+	  str = "neq";
+	  break;
+	case UNORDERED:
+	  str = "unord";
+	  break;
+	case ORDERED:
+	  str = "ord";
+	  break;
+	case UNLT:
+	  str = "nge";
+	  break;
+	case UNLE:
+	  str = "ngt";
+	  break;
+	case UNGT:
+	  str = "nle";
+	  break;
+	case UNGE:
+	  str = "nlt";
+	  break;
+	case UNEQ:
+	case LTGT:
+	  gcc_unreachable ();
+	default:
+	  str = GET_RTX_NAME (GET_CODE (x));
+	  break;
+	}
+      fputs (str, file);
+      return;
+
+    case 'E':
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
+      return;
+
+    case 'e':
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
+      return;
+
+    case 'F':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	str = reg_names [FR_REG (0)];
+      else if (x == CONST1_RTX (GET_MODE (x)))
+	str = reg_names [FR_REG (1)];
+      else
+	{
+	  gcc_assert (GET_CODE (x) == REG);
+	  str = reg_names [REGNO (x)];
+	}
+      fputs (str, file);
+      return;
+
+    case 'G':
+      {
+	long val[4];
+	REAL_VALUE_TYPE rv;
+	REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	real_to_target (val, &rv, GET_MODE (x));
+	if (GET_MODE (x) == SFmode)
+	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
+	else if (GET_MODE (x) == DFmode)
+	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
+					  & 0xffffffff,
+					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
+					  & 0xffffffff);
+	else
+	  output_operand_lossage ("invalid %%G mode");
+      }
+      return;
+
+    case 'I':
+      fputs (reg_names [REGNO (x) + 1], file);
+      return;
+
+    case 'J':
+    case 'j':
+      {
+	unsigned int regno = REGNO (XEXP (x, 0));
+	if (GET_CODE (x) == EQ)
+	  regno += 1;
+	if (code == 'j')
+	  regno ^= 1;
+        fputs (reg_names [regno], file);
+      }
+      return;
+
+    case 'O':
+      if (MEM_VOLATILE_P (x))
+	fputs(".acq", file);
+      return;
+
+    case 'P':
+      {
+	HOST_WIDE_INT value;
+
+	switch (GET_CODE (XEXP (x, 0)))
+	  {
+	  default:
+	    return;
+
+	  case POST_MODIFY:
+	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
+	    if (GET_CODE (x) == CONST_INT)
+	      value = INTVAL (x);
+	    else
+	      {
+		gcc_assert (GET_CODE (x) == REG);
+		fprintf (file, ", %s", reg_names[REGNO (x)]);
+		return;
+	      }
+	    break;
+
+	  case POST_INC:
+	    value = GET_MODE_SIZE (GET_MODE (x));
+	    break;
+
+	  case POST_DEC:
+	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
+	    break;
+	  }
+
+	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
+	return;
+      }
+
+    case 'Q':
+      if (MEM_VOLATILE_P (x))
+	fputs(".rel", file);
+      return;
+
+    case 'R':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	fputs(".s", file);
+      else if (x == CONST1_RTX (GET_MODE (x)))
+	fputs(".d", file);
+      else if (x == CONST2_RTX (GET_MODE (x)))
+	;
+      else
+	output_operand_lossage ("invalid %%R value");
+      return;
+
+    case 'S':
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      return;
+
+    case 'T':
+      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+	{
+	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
+	  return;
+	}
+      break;
+
+    case 'U':
+      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+	{
+	  const char *prefix = "0x";
+	  if (INTVAL (x) & 0x80000000)
+	    {
+	      fprintf (file, "0xffffffff");
+	      prefix = "";
+	    }
+	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
+	  return;
+	}
+      break;
+
+    case 'X':
+      {
+	unsigned int regno = REGNO (x);
+	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
+      }
+      return;
+
+    case 'r':
+      /* If this operand is the constant zero, write it as register zero.
+	 Any register, zero, or CONST_INT value is OK here.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x)], file);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fputs ("r0", file);
+      else if (GET_CODE (x) == CONST_INT)
+	output_addr_const (file, x);
+      else
+	output_operand_lossage ("invalid %%r value");
+      return;
+
+    case 'v':
+      gcc_assert (GET_CODE (x) == CONST_VECTOR);
+      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+      break;
+
+    case '+':
+      {
+	const char *which;
+
+	/* For conditional branches, returns or calls, substitute
+	   sptk, dptk, dpnt, or spnt for %s.  */
+	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+	if (x)
+	  {
+	    int pred_val = XINT (x, 0);
+
+	    /* Guess top and bottom 10% statically predicted.  */
+	    if (pred_val < REG_BR_PROB_BASE / 50
+		&& br_prob_note_reliable_p (x))
+	      which = ".spnt";
+	    else if (pred_val < REG_BR_PROB_BASE / 2)
+	      which = ".dpnt";
+	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
+		     || !br_prob_note_reliable_p (x))
+	      which = ".dptk";
+	    else
+	      which = ".sptk";
+	  }
+	else if (CALL_P (current_output_insn))
+	  which = ".sptk";
+	else
+	  which = ".dptk";
+
+	fputs (which, file);
+	return;
+      }
+
+    case ',':
+      x = current_insn_predicate;
+      if (x)
+	{
+	  unsigned int regno = REGNO (XEXP (x, 0));
+	  if (GET_CODE (x) == EQ)
+	    regno += 1;
+          fprintf (file, "(%s) ", reg_names [regno]);
+	}
+      return;
+
+    default:
+      output_operand_lossage ("ia64_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+      /* This happens for the spill/restore instructions.  */
+    case POST_INC:
+    case POST_DEC:
+    case POST_MODIFY:
+      x = XEXP (x, 0);
+      /* ... fall through ...  */
+
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      {
+	rtx addr = XEXP (x, 0);
+	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
+	  addr = XEXP (addr, 0);
+	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
+	break;
+      }
+
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+ia64_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '+' || code == ',');
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+/* ??? This is incomplete.  */
+
+static bool
+ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      switch (outer_code)
+        {
+        case SET:
+	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
+	  return true;
+        case PLUS:
+	  if (satisfies_constraint_I (x))
+	    *total = 0;
+	  else if (satisfies_constraint_J (x))
+	    *total = 1;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	  return true;
+        default:
+	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
+	    *total = 0;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case FMA:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case MULT:
+      /* For multiplies wider than HImode, we have to go to the FPU,
+         which normally involves copies.  Plus there's the latency
+         of the multiply itself, and the latency of the instructions to
+         transfer integer regs to FP regs.  */
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	*total = COSTS_N_INSNS (4);
+      else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
+        *total = COSTS_N_INSNS (10);
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      /* We make divide expensive, so that divide-by-constant will be
+         optimized to a multiply.  */
+      *total = COSTS_N_INSNS (60);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Calculate the cost of moving data from a register in class FROM to
+   one in class TO, using MODE.  */
+
+static int
+ia64_register_move_cost (enum machine_mode mode, reg_class_t from,
+			 reg_class_t to)
+{
+  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
+  if (to == ADDL_REGS)
+    to = GR_REGS;
+  if (from == ADDL_REGS)
+    from = GR_REGS;
+
+  /* All costs are symmetric, so reduce cases by putting the
+     lower number class as the destination.  */
+  if (from < to)
+    {
+      reg_class_t tmp = to;
+      to = from, from = tmp;
+    }
+
+  /* Moving from FR<->GR in XFmode must be more expensive than 2,
+     so that we get secondary memory reloads.  Between FR_REGS,
+     we have to make this at least as expensive as memory_move_cost
+     to avoid spectacularly poor register class preferencing.  */
+  if (mode == XFmode || mode == RFmode)
+    {
+      if (to != GR_REGS || from != GR_REGS)
+        return memory_move_cost (mode, to, false);
+      else
+	return 3;
+    }
+
+  switch (to)
+    {
+    case PR_REGS:
+      /* Moving between PR registers takes two insns.  */
+      if (from == PR_REGS)
+	return 3;
+      /* Moving between PR and anything but GR is impossible.  */
+      if (from != GR_REGS)
+	return memory_move_cost (mode, to, false);
+      break;
+
+    case BR_REGS:
+      /* Moving between BR and anything but GR is impossible.  */
+      if (from != GR_REGS && from != GR_AND_BR_REGS)
+	return memory_move_cost (mode, to, false);
+      break;
+
+    case AR_I_REGS:
+    case AR_M_REGS:
+      /* Moving between AR and anything but GR is impossible.  */
+      if (from != GR_REGS)
+	return memory_move_cost (mode, to, false);
+      break;
+
+    case GR_REGS:
+    case FR_REGS:
+    case FP_REGS:
+    case GR_AND_FR_REGS:
+    case GR_AND_BR_REGS:
+    case ALL_REGS:
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return 2;
+}
+
+/* Calculate the cost of moving data of MODE from a register to or from
+   memory.  */
+
+static int
+ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  if (rclass == GENERAL_REGS
+      || rclass == FR_REGS
+      || rclass == FP_REGS
+      || rclass == GR_AND_FR_REGS)
+    return 4;
+  else
+    return 10;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
+   on RCLASS to use when copying X into that class.  */
+
+static reg_class_t
+ia64_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  switch (rclass)
+    {
+    case FR_REGS:
+    case FP_REGS:
+      /* Don't allow volatile mem reloads into floating point registers.
+	 This is defined to force reload to choose the r/m case instead
+	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
+      if (MEM_P (x) && MEM_VOLATILE_P (x))
+	return NO_REGS;
+      
+      /* Force all unrecognized constants into the constant pool.  */
+      if (CONSTANT_P (x))
+	return NO_REGS;
+      break;
+
+    case AR_M_REGS:
+    case AR_I_REGS:
+      if (!OBJECT_P (x))
+	return NO_REGS;
+      break;
+
+    default:
+      break;
+    }
+
+  return rclass;
+}
+
+/* This function returns the register class required for a secondary
+   register when copying between one of the registers in RCLASS, and X,
+   using MODE.  A return value of NO_REGS means that no secondary register
+   is required.  */
+
+enum reg_class
+ia64_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  int regno = -1;
+
+  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+    regno = true_regnum (x);
+
+  switch (rclass)
+    {
+    case BR_REGS:
+    case AR_M_REGS:
+    case AR_I_REGS:
+      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
+	 interaction.  We end up with two pseudos with overlapping lifetimes
+	 both of which are equiv to the same constant, and both which need
+	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
+	 changes depending on the path length, which means the qty_first_reg
+	 check in make_regs_eqv can give different answers at different times.
+	 At some point I'll probably need a reload_indi pattern to handle
+	 this.
+
+	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
+	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
+	 non-general registers for good measure.  */
+      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
+	return GR_REGS;
+
+      /* This is needed if a pseudo used as a call_operand gets spilled to a
+	 stack slot.  */
+      if (GET_CODE (x) == MEM)
+	return GR_REGS;
+      break;
+
+    case FR_REGS:
+    case FP_REGS:
+      /* Need to go through general registers to get to other class regs.  */
+      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
+	return GR_REGS;
+
+      /* This can happen when a paradoxical subreg is an operand to the
+	 muldi3 pattern.  */
+      /* ??? This shouldn't be necessary after instruction scheduling is
+	 enabled, because paradoxical subregs are not accepted by
+	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
+	 stop the paradoxical subreg stupidity in the *_operand functions
+	 in recog.c.  */
+      if (GET_CODE (x) == MEM
+	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
+	      || GET_MODE (x) == QImode))
+	return GR_REGS;
+
+      /* This can happen because of the ior/and/etc patterns that accept FP
+	 registers as operands.  If the third operand is a constant, then it
+	 needs to be reloaded into a FP register.  */
+      if (GET_CODE (x) == CONST_INT)
+	return GR_REGS;
+
+      /* This can happen because of register elimination in a muldi3 insn.
+	 E.g. `26107 * (unsigned long)&u'.  */
+      if (GET_CODE (x) == PLUS)
+	return GR_REGS;
+      break;
+
+    case PR_REGS:
+      /* ??? This happens if we cse/gcse a BImode value across a call,
+	 and the function has a nonlocal goto.  This is because global
+	 does not allocate call crossing pseudos to hard registers when
+	 crtl->has_nonlocal_goto is true.  This is relatively
+	 common for C++ programs that use exceptions.  To reproduce,
+	 return NO_REGS and compile libstdc++.  */
+      if (GET_CODE (x) == MEM)
+	return GR_REGS;
+
+      /* This can happen when we take a BImode subreg of a DImode value,
+	 and that DImode value winds up in some non-GR register.  */
+      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
+	return GR_REGS;
+      break;
+
+    default:
+      break;
+    }
+
+  return NO_REGS;
+}
+
+
+/* Implement targetm.unspec_may_trap_p hook.  */
+static int
+ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
+{
+  switch (XINT (x, 1))
+    {
+    case UNSPEC_LDA:
+    case UNSPEC_LDS:
+    case UNSPEC_LDSA:
+    case UNSPEC_LDCCLR:
+    case UNSPEC_CHKACLR:
+    case UNSPEC_CHKS:
+      /* These unspecs are just wrappers.  */
+      return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
+    }
+
+  return default_unspec_may_trap_p (x, flags);
+}
+
+
+/* Parse the -mfixed-range= option string.  */
+
+static void
+fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  This is
+     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
+
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+ia64_option_override (void)
+{
+  unsigned int i;
+  cl_deferred_option *opt;
+  vec<cl_deferred_option> *v
+    = (vec<cl_deferred_option> *) ia64_deferred_options;
+
+  if (v)
+    FOR_EACH_VEC_ELT (*v, i, opt)
+      {
+	switch (opt->opt_index)
+	  {
+	  case OPT_mfixed_range_:
+	    fix_range (opt->arg);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+  if (TARGET_AUTO_PIC)
+    target_flags |= MASK_CONST_GP;
+
+  /* Numerous experiment shows that IRA based loop pressure
+     calculation works better for RTL loop invariant motion on targets
+     with enough (>= 32) registers.  It is an expensive optimization.
+     So it is on only for peak performance.  */
+  if (optimize >= 3)
+    flag_ira_loop_pressure = 1;
+
+
+  ia64_section_threshold = (global_options_set.x_g_switch_value
+			    ? g_switch_value
+			    : IA64_DEFAULT_GVALUE);
+
+  init_machine_status = ia64_init_machine_status;
+
+  if (align_functions <= 0)
+    align_functions = 64;
+  if (align_loops <= 0)
+    align_loops = 32;
+  if (TARGET_ABI_OPEN_VMS)
+    flag_no_common = 1;
+
+  ia64_override_options_after_change();
+}
+
+/* Implement targetm.override_options_after_change.  */
+
+static void
+ia64_override_options_after_change (void)
+{
+  if (optimize >= 3
+      && !global_options_set.x_flag_selective_scheduling
+      && !global_options_set.x_flag_selective_scheduling2)
+    {
+      flag_selective_scheduling2 = 1;
+      flag_sel_sched_pipelining = 1;
+    }
+  if (mflag_sched_control_spec == 2)
+    {
+      /* Control speculation is on by default for the selective scheduler,
+         but not for the Haifa scheduler.  */
+      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
+    }
+  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
+    {
+      /* FIXME: remove this when we'd implement breaking autoinsns as
+         a transformation.  */
+      flag_auto_inc_dec = 0;
+    }
+}
+
+/* Initialize the record of emitted frame related registers.  */
+
+void ia64_init_expanders (void)
+{
+  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
+}
+
+static struct machine_function *
+ia64_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+static enum attr_itanium_class ia64_safe_itanium_class (rtx);
+static enum attr_type ia64_safe_type (rtx);
+
+static enum attr_itanium_class
+ia64_safe_itanium_class (rtx insn)
+{
+  if (recog_memoized (insn) >= 0)
+    return get_attr_itanium_class (insn);
+  else if (DEBUG_INSN_P (insn))
+    return ITANIUM_CLASS_IGNORE;
+  else
+    return ITANIUM_CLASS_UNKNOWN;
+}
+
+static enum attr_type
+ia64_safe_type (rtx insn)
+{
+  if (recog_memoized (insn) >= 0)
+    return get_attr_type (insn);
+  else
+    return TYPE_UNKNOWN;
+}
+
+/* The following collection of routines emit instruction group stop bits as
+   necessary to avoid dependencies.  */
+
+/* Need to track some additional registers as far as serialization is
+   concerned so we can properly handle br.call and br.ret.  We could
+   make these registers visible to gcc, but since these registers are
+   never explicitly used in gcc generated code, it seems wasteful to
+   do so (plus it would make the call and return patterns needlessly
+   complex).  */
+#define REG_RP		(BR_REG (0))
+#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
+/* This is used for volatile asms which may require a stop bit immediately
+   before and after them.  */
+#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
+#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
+#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
+
+/* For each register, we keep track of how it has been written in the
+   current instruction group.
+
+   If a register is written unconditionally (no qualifying predicate),
+   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
+
+   If a register is written if its qualifying predicate P is true, we
+   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
+   may be written again by the complement of P (P^1) and when this happens,
+   WRITE_COUNT gets set to 2.
+
+   The result of this is that whenever an insn attempts to write a register
+   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
+
+   If a predicate register is written by a floating-point insn, we set
+   WRITTEN_BY_FP to true.
+
+   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
+   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
+
+#if GCC_VERSION >= 4000
+#define RWS_FIELD_TYPE __extension__ unsigned short
+#else
+#define RWS_FIELD_TYPE unsigned int
+#endif
+struct reg_write_state
+{
+  RWS_FIELD_TYPE write_count : 2;
+  RWS_FIELD_TYPE first_pred : 10;
+  RWS_FIELD_TYPE written_by_fp : 1;
+  RWS_FIELD_TYPE written_by_and : 1;
+  RWS_FIELD_TYPE written_by_or : 1;
+};
+
+/* Cumulative info for the current instruction group.  */
+struct reg_write_state rws_sum[NUM_REGS];
+#ifdef ENABLE_CHECKING
+/* Bitmap whether a register has been written in the current insn.  */
+HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
+			   / HOST_BITS_PER_WIDEST_FAST_INT];
+
+static inline void
+rws_insn_set (int regno)
+{
+  gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
+  SET_HARD_REG_BIT (rws_insn, regno);
+}
+
+static inline int
+rws_insn_test (int regno)
+{
+  return TEST_HARD_REG_BIT (rws_insn, regno);
+}
+#else
+/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
+unsigned char rws_insn[2];
+
+static inline void
+rws_insn_set (int regno)
+{
+  if (regno == REG_AR_CFM)
+    rws_insn[0] = 1;
+  else if (regno == REG_VOLATILE)
+    rws_insn[1] = 1;
+}
+
+static inline int
+rws_insn_test (int regno)
+{
+  if (regno == REG_AR_CFM)
+    return rws_insn[0];
+  if (regno == REG_VOLATILE)
+    return rws_insn[1];
+  return 0;
+}
+#endif
+
+/* Indicates whether this is the first instruction after a stop bit,
+   in which case we don't need another stop bit.  Without this,
+   ia64_variable_issue will die when scheduling an alloc.  */
+static int first_instruction;
+
+/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
+   RTL for one instruction.  */
+struct reg_flags
+{
+  unsigned int is_write : 1;	/* Is register being written?  */
+  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
+  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
+  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
+  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
+  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
+};
+
+static void rws_update (int, struct reg_flags, int);
+static int rws_access_regno (int, struct reg_flags, int);
+static int rws_access_reg (rtx, struct reg_flags, int);
+static void update_set_flags (rtx, struct reg_flags *);
+static int set_src_needs_barrier (rtx, struct reg_flags, int);
+static int rtx_needs_barrier (rtx, struct reg_flags, int);
+static void init_insn_group_barriers (void);
+static int group_barrier_needed (rtx);
+static int safe_group_barrier_needed (rtx);
+static int in_safe_group_barrier;
+
+/* Update *RWS for REGNO, which is being written by the current instruction,
+   with predicate PRED, and associated register flags in FLAGS.  */
+
+static void
+rws_update (int regno, struct reg_flags flags, int pred)
+{
+  if (pred)
+    rws_sum[regno].write_count++;
+  else
+    rws_sum[regno].write_count = 2;
+  rws_sum[regno].written_by_fp |= flags.is_fp;
+  /* ??? Not tracking and/or across differing predicates.  */
+  rws_sum[regno].written_by_and = flags.is_and;
+  rws_sum[regno].written_by_or = flags.is_or;
+  rws_sum[regno].first_pred = pred;
+}
+
+/* Handle an access to register REGNO of type FLAGS using predicate register
+   PRED.  Update rws_sum array.  Return 1 if this access creates
+   a dependency with an earlier instruction in the same group.  */
+
+static int
+rws_access_regno (int regno, struct reg_flags flags, int pred)
+{
+  int need_barrier = 0;
+
+  gcc_assert (regno < NUM_REGS);
+
+  if (! PR_REGNO_P (regno))
+    flags.is_and = flags.is_or = 0;
+
+  if (flags.is_write)
+    {
+      int write_count;
+
+      rws_insn_set (regno);
+      write_count = rws_sum[regno].write_count;
+
+      switch (write_count)
+	{
+	case 0:
+	  /* The register has not been written yet.  */
+	  if (!in_safe_group_barrier)
+	    rws_update (regno, flags, pred);
+	  break;
+
+	case 1:
+	  /* The register has been written via a predicate.  Treat
+	     it like a unconditional write and do not try to check
+	     for complementary pred reg in earlier write.  */
+	  if (flags.is_and && rws_sum[regno].written_by_and)
+	    ;
+	  else if (flags.is_or && rws_sum[regno].written_by_or)
+	    ;
+	  else
+	    need_barrier = 1;
+	  if (!in_safe_group_barrier)
+	    rws_update (regno, flags, pred);
+	  break;
+
+	case 2:
+	  /* The register has been unconditionally written already.  We
+	     need a barrier.  */
+	  if (flags.is_and && rws_sum[regno].written_by_and)
+	    ;
+	  else if (flags.is_or && rws_sum[regno].written_by_or)
+	    ;
+	  else
+	    need_barrier = 1;
+	  if (!in_safe_group_barrier)
+	    {
+	      rws_sum[regno].written_by_and = flags.is_and;
+	      rws_sum[regno].written_by_or = flags.is_or;
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      if (flags.is_branch)
+	{
+	  /* Branches have several RAW exceptions that allow to avoid
+	     barriers.  */
+
+	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
+	    /* RAW dependencies on branch regs are permissible as long
+	       as the writer is a non-branch instruction.  Since we
+	       never generate code that uses a branch register written
+	       by a branch instruction, handling this case is
+	       easy.  */
+	    return 0;
+
+	  if (REGNO_REG_CLASS (regno) == PR_REGS
+	      && ! rws_sum[regno].written_by_fp)
+	    /* The predicates of a branch are available within the
+	       same insn group as long as the predicate was written by
+	       something other than a floating-point instruction.  */
+	    return 0;
+	}
+
+      if (flags.is_and && rws_sum[regno].written_by_and)
+	return 0;
+      if (flags.is_or && rws_sum[regno].written_by_or)
+	return 0;
+
+      switch (rws_sum[regno].write_count)
+	{
+	case 0:
+	  /* The register has not been written yet.  */
+	  break;
+
+	case 1:
+	  /* The register has been written via a predicate, assume we
+	     need a barrier (don't check for complementary regs).  */
+	  need_barrier = 1;
+	  break;
+
+	case 2:
+	  /* The register has been unconditionally written already.  We
+	     need a barrier.  */
+	  need_barrier = 1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  return need_barrier;
+}
+
+static int
+rws_access_reg (rtx reg, struct reg_flags flags, int pred)
+{
+  int regno = REGNO (reg);
+  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
+
+  if (n == 1)
+    return rws_access_regno (regno, flags, pred);
+  else
+    {
+      int need_barrier = 0;
+      while (--n >= 0)
+	need_barrier |= rws_access_regno (regno + n, flags, pred);
+      return need_barrier;
+    }
+}
+
+/* Examine X, which is a SET rtx, and update the flags, the predicate, and
+   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
+
+static void
+update_set_flags (rtx x, struct reg_flags *pflags)
+{
+  rtx src = SET_SRC (x);
+
+  switch (GET_CODE (src))
+    {
+    case CALL:
+      return;
+
+    case IF_THEN_ELSE:
+      /* There are four cases here:
+	 (1) The destination is (pc), in which case this is a branch,
+	 nothing here applies.
+	 (2) The destination is ar.lc, in which case this is a
+	 doloop_end_internal,
+	 (3) The destination is an fp register, in which case this is
+	 an fselect instruction.
+	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 
+	 this is a check load.
+	 In all cases, nothing we do in this function applies.  */
+      return;
+
+    default:
+      if (COMPARISON_P (src)
+	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
+	/* Set pflags->is_fp to 1 so that we know we're dealing
+	   with a floating point comparison when processing the
+	   destination of the SET.  */
+	pflags->is_fp = 1;
+
+      /* Discover if this is a parallel comparison.  We only handle
+	 and.orcm and or.andcm at present, since we must retain a
+	 strict inverse on the predicate pair.  */
+      else if (GET_CODE (src) == AND)
+	pflags->is_and = 1;
+      else if (GET_CODE (src) == IOR)
+	pflags->is_or = 1;
+
+      break;
+    }
+}
+
+/* Subroutine of rtx_needs_barrier; this function determines whether the
+   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
+   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
+   for this insn.  */
+
+static int
+set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
+{
+  int need_barrier = 0;
+  rtx dst;
+  rtx src = SET_SRC (x);
+
+  if (GET_CODE (src) == CALL)
+    /* We don't need to worry about the result registers that
+       get written by subroutine call.  */
+    return rtx_needs_barrier (src, flags, pred);
+  else if (SET_DEST (x) == pc_rtx)
+    {
+      /* X is a conditional branch.  */
+      /* ??? This seems redundant, as the caller sets this bit for
+	 all JUMP_INSNs.  */
+      if (!ia64_spec_check_src_p (src))
+	flags.is_branch = 1;
+      return rtx_needs_barrier (src, flags, pred);
+    }
+
+  if (ia64_spec_check_src_p (src))
+    /* Avoid checking one register twice (in condition 
+       and in 'then' section) for ldc pattern.  */
+    {
+      gcc_assert (REG_P (XEXP (src, 2)));
+      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
+		  
+      /* We process MEM below.  */
+      src = XEXP (src, 1);
+    }
+
+  need_barrier |= rtx_needs_barrier (src, flags, pred);
+
+  dst = SET_DEST (x);
+  if (GET_CODE (dst) == ZERO_EXTRACT)
+    {
+      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
+    }
+  return need_barrier;
+}
+
+/* Handle an access to rtx X of type FLAGS using predicate register
+   PRED.  Return 1 if this access creates a dependency with an earlier
+   instruction in the same group.  */
+
+static int
+rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
+{
+  int i, j;
+  int is_complemented = 0;
+  int need_barrier = 0;
+  const char *format_ptr;
+  struct reg_flags new_flags;
+  rtx cond;
+
+  if (! x)
+    return 0;
+
+  new_flags = flags;
+
+  switch (GET_CODE (x))
+    {
+    case SET:
+      update_set_flags (x, &new_flags);
+      need_barrier = set_src_needs_barrier (x, new_flags, pred);
+      if (GET_CODE (SET_SRC (x)) != CALL)
+	{
+	  new_flags.is_write = 1;
+	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
+	}
+      break;
+
+    case CALL:
+      new_flags.is_write = 0;
+      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
+
+      /* Avoid multiple register writes, in case this is a pattern with
+	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
+      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
+	{
+	  new_flags.is_write = 1;
+	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
+	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
+	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
+	}
+      break;
+
+    case COND_EXEC:
+      /* X is a predicated instruction.  */
+
+      cond = COND_EXEC_TEST (x);
+      gcc_assert (!pred);
+      need_barrier = rtx_needs_barrier (cond, flags, 0);
+
+      if (GET_CODE (cond) == EQ)
+	is_complemented = 1;
+      cond = XEXP (cond, 0);
+      gcc_assert (GET_CODE (cond) == REG
+		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
+      pred = REGNO (cond);
+      if (is_complemented)
+	++pred;
+
+      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
+      return need_barrier;
+
+    case CLOBBER:
+    case USE:
+      /* Clobber & use are for earlier compiler-phases only.  */
+      break;
+
+    case ASM_OPERANDS:
+    case ASM_INPUT:
+      /* We always emit stop bits for traditional asms.  We emit stop bits
+	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
+      if (GET_CODE (x) != ASM_OPERANDS
+	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
+	{
+	  /* Avoid writing the register multiple times if we have multiple
+	     asm outputs.  This avoids a failure in rws_access_reg.  */
+	  if (! rws_insn_test (REG_VOLATILE))
+	    {
+	      new_flags.is_write = 1;
+	      rws_access_regno (REG_VOLATILE, new_flags, pred);
+	    }
+	  return 1;
+	}
+
+      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
+	 We cannot just fall through here since then we would be confused
+	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
+	 traditional asms unlike their normal usage.  */
+
+      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
+	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
+	  need_barrier = 1;
+      break;
+
+    case PARALLEL:
+      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+	{
+	  rtx pat = XVECEXP (x, 0, i);
+	  switch (GET_CODE (pat))
+	    {
+	    case SET:
+	      update_set_flags (pat, &new_flags);
+	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
+	      break;
+
+	    case USE:
+	    case CALL:
+	    case ASM_OPERANDS:
+	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
+	      break;
+
+	    case CLOBBER:
+	      if (REG_P (XEXP (pat, 0))
+		  && extract_asm_operands (x) != NULL_RTX
+		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
+		{
+		  new_flags.is_write = 1;
+		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
+						     new_flags, pred);
+		  new_flags = flags;
+		}
+	      break;
+
+	    case RETURN:
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+	{
+	  rtx pat = XVECEXP (x, 0, i);
+	  if (GET_CODE (pat) == SET)
+	    {
+	      if (GET_CODE (SET_SRC (pat)) != CALL)
+		{
+		  new_flags.is_write = 1;
+		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
+						     pred);
+		}
+	    }
+	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
+	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
+	}
+      break;
+
+    case SUBREG:
+      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
+      break;
+    case REG:
+      if (REGNO (x) == AR_UNAT_REGNUM)
+	{
+	  for (i = 0; i < 64; ++i)
+	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
+	}
+      else
+	need_barrier = rws_access_reg (x, flags, pred);
+      break;
+
+    case MEM:
+      /* Find the regs used in memory address computation.  */
+      new_flags.is_write = 0;
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+      break;
+
+    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
+    case SYMBOL_REF:  case LABEL_REF:     case CONST:
+      break;
+
+      /* Operators with side-effects.  */
+    case POST_INC:    case POST_DEC:
+      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
+      break;
+
+    case POST_MODIFY:
+      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
+      break;
+
+      /* Handle common unary and binary ops for efficiency.  */
+    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
+    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
+    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
+    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
+    case NE:       case EQ:      case GE:      case GT:        case LE:
+    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+      break;
+
+    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
+    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
+    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
+    case SQRT:     case FFS:		case POPCOUNT:
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+      break;
+
+    case VEC_SELECT:
+      /* VEC_SELECT's second argument is a PARALLEL with integers that
+	 describe the elements selected.  On ia64, those integers are
+	 always constants.  Avoid walking the PARALLEL so that we don't
+	 get confused with "normal" parallels and then die.  */
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+      break;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_LTOFF_DTPMOD:
+	case UNSPEC_LTOFF_DTPREL:
+	case UNSPEC_DTPREL:
+	case UNSPEC_LTOFF_TPREL:
+	case UNSPEC_TPREL:
+	case UNSPEC_PRED_REL_MUTEX:
+	case UNSPEC_PIC_CALL:
+        case UNSPEC_MF:
+        case UNSPEC_FETCHADD_ACQ:
+        case UNSPEC_FETCHADD_REL:
+	case UNSPEC_BSP_VALUE:
+	case UNSPEC_FLUSHRS:
+	case UNSPEC_BUNDLE_SELECTOR:
+          break;
+
+	case UNSPEC_GR_SPILL:
+	case UNSPEC_GR_RESTORE:
+	  {
+	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
+	    HOST_WIDE_INT bit = (offset >> 3) & 63;
+
+	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
+	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
+					      new_flags, pred);
+	    break;
+	  }
+
+	case UNSPEC_FR_SPILL:
+	case UNSPEC_FR_RESTORE:
+	case UNSPEC_GETF_EXP:
+	case UNSPEC_SETF_EXP:
+        case UNSPEC_ADDP4:
+	case UNSPEC_FR_SQRT_RECIP_APPROX:
+	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
+	case UNSPEC_LDA:
+	case UNSPEC_LDS:
+	case UNSPEC_LDS_A:
+	case UNSPEC_LDSA:
+	case UNSPEC_CHKACLR:
+        case UNSPEC_CHKS:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  break;
+
+	case UNSPEC_FR_RECIP_APPROX:
+	case UNSPEC_SHRP:
+	case UNSPEC_COPYSIGN:
+	case UNSPEC_FR_RECIP_APPROX_RES:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+	  break;
+
+        case UNSPEC_CMPXCHG_ACQ:
+        case UNSPEC_CMPXCHG_REL:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case UNSPEC_VOLATILE:
+      switch (XINT (x, 1))
+	{
+	case UNSPECV_ALLOC:
+	  /* Alloc must always be the first instruction of a group.
+	     We force this by always returning true.  */
+	  /* ??? We might get better scheduling if we explicitly check for
+	     input/local/output register dependencies, and modify the
+	     scheduler so that alloc is always reordered to the start of
+	     the current group.  We could then eliminate all of the
+	     first_instruction code.  */
+	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
+
+	  new_flags.is_write = 1;
+	  rws_access_regno (REG_AR_CFM, new_flags, pred);
+	  return 1;
+
+	case UNSPECV_SET_BSP:
+	case UNSPECV_PROBE_STACK_RANGE:
+	  need_barrier = 1;
+          break;
+
+	case UNSPECV_BLOCKAGE:
+	case UNSPECV_INSN_GROUP_BARRIER:
+	case UNSPECV_BREAK:
+	case UNSPECV_PSAC_ALL:
+	case UNSPECV_PSAC_NORMAL:
+	  return 0;
+
+	case UNSPECV_PROBE_STACK_ADDRESS:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case RETURN:
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_regno (REG_RP, flags, pred);
+      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
+
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
+      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
+      break;
+
+    default:
+      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	switch (format_ptr[i])
+	  {
+	  case '0':	/* unused field */
+	  case 'i':	/* integer */
+	  case 'n':	/* note */
+	  case 'w':	/* wide integer */
+	  case 's':	/* pointer to string */
+	  case 'S':	/* optional pointer to string */
+	    break;
+
+	  case 'e':
+	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
+	      need_barrier = 1;
+	    break;
+
+	  case 'E':
+	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
+	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
+		need_barrier = 1;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      break;
+    }
+  return need_barrier;
+}
+
+/* Clear out the state for group_barrier_needed at the start of a
+   sequence of insns.  */
+
+static void
+init_insn_group_barriers (void)
+{
+  memset (rws_sum, 0, sizeof (rws_sum));
+  first_instruction = 1;
+}
+
+/* Given the current state, determine whether a group barrier (a stop bit) is
+   necessary before INSN.  Return nonzero if so.  This modifies the state to
+   include the effects of INSN as a side-effect.  */
+
+static int
+group_barrier_needed (rtx insn)
+{
+  rtx pat;
+  int need_barrier = 0;
+  struct reg_flags flags;
+
+  memset (&flags, 0, sizeof (flags));
+  switch (GET_CODE (insn))
+    {
+    case NOTE:
+    case DEBUG_INSN:
+      break;
+
+    case BARRIER:
+      /* A barrier doesn't imply an instruction group boundary.  */
+      break;
+
+    case CODE_LABEL:
+      memset (rws_insn, 0, sizeof (rws_insn));
+      return 1;
+
+    case CALL_INSN:
+      flags.is_branch = 1;
+      flags.is_sibcall = SIBLING_CALL_P (insn);
+      memset (rws_insn, 0, sizeof (rws_insn));
+
+      /* Don't bundle a call following another call.  */
+      if ((pat = prev_active_insn (insn)) && CALL_P (pat))
+	{
+	  need_barrier = 1;
+	  break;
+	}
+
+      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
+      break;
+
+    case JUMP_INSN:
+      if (!ia64_spec_check_p (insn))
+	flags.is_branch = 1;
+
+      /* Don't bundle a jump following a call.  */
+      if ((pat = prev_active_insn (insn)) && CALL_P (pat))
+	{
+	  need_barrier = 1;
+	  break;
+	}
+      /* FALLTHRU */
+
+    case INSN:
+      if (GET_CODE (PATTERN (insn)) == USE
+	  || GET_CODE (PATTERN (insn)) == CLOBBER)
+	/* Don't care about USE and CLOBBER "insns"---those are used to
+	   indicate to the optimizer that it shouldn't get rid of
+	   certain operations.  */
+	break;
+
+      pat = PATTERN (insn);
+
+      /* Ug.  Hack hacks hacked elsewhere.  */
+      switch (recog_memoized (insn))
+	{
+	  /* We play dependency tricks with the epilogue in order
+	     to get proper schedules.  Undo this for dv analysis.  */
+	case CODE_FOR_epilogue_deallocate_stack:
+	case CODE_FOR_prologue_allocate_stack:
+	  pat = XVECEXP (pat, 0, 0);
+	  break;
+
+	  /* The pattern we use for br.cloop confuses the code above.
+	     The second element of the vector is representative.  */
+	case CODE_FOR_doloop_end_internal:
+	  pat = XVECEXP (pat, 0, 1);
+	  break;
+
+	  /* Doesn't generate code.  */
+	case CODE_FOR_pred_rel_mutex:
+	case CODE_FOR_prologue_use:
+	  return 0;
+
+	default:
+	  break;
+	}
+
+      memset (rws_insn, 0, sizeof (rws_insn));
+      need_barrier = rtx_needs_barrier (pat, flags, 0);
+
+      /* Check to see if the previous instruction was a volatile
+	 asm.  */
+      if (! need_barrier)
+	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
+
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (first_instruction && important_for_bundling_p (insn))
+    {
+      need_barrier = 0;
+      first_instruction = 0;
+    }
+
+  return need_barrier;
+}
+
+/* Like group_barrier_needed, but do not clobber the current state.  */
+
+static int
+safe_group_barrier_needed (rtx insn)
+{
+  int saved_first_instruction;
+  int t;
+
+  saved_first_instruction = first_instruction;
+  in_safe_group_barrier = 1;
+
+  t = group_barrier_needed (insn);
+
+  first_instruction = saved_first_instruction;
+  in_safe_group_barrier = 0;
+
+  return t;
+}
+
+/* Scan the current function and insert stop bits as necessary to
+   eliminate dependencies.  This function assumes that a final
+   instruction scheduling pass has been run which has already
+   inserted most of the necessary stop bits.  This function only
+   inserts new ones at basic block boundaries, since these are
+   invisible to the scheduler.  */
+
+static void
+emit_insn_group_barriers (FILE *dump)
+{
+  rtx insn;
+  rtx last_label = 0;
+  int insns_since_last_label = 0;
+
+  init_insn_group_barriers ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (LABEL_P (insn))
+	{
+	  if (insns_since_last_label)
+	    last_label = insn;
+	  insns_since_last_label = 0;
+	}
+      else if (NOTE_P (insn)
+	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
+	{
+	  if (insns_since_last_label)
+	    last_label = insn;
+	  insns_since_last_label = 0;
+	}
+      else if (NONJUMP_INSN_P (insn)
+	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
+	{
+	  init_insn_group_barriers ();
+	  last_label = 0;
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  insns_since_last_label = 1;
+
+	  if (group_barrier_needed (insn))
+	    {
+	      if (last_label)
+		{
+		  if (dump)
+		    fprintf (dump, "Emitting stop before label %d\n",
+			     INSN_UID (last_label));
+		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
+		  insn = last_label;
+
+		  init_insn_group_barriers ();
+		  last_label = 0;
+		}
+	    }
+	}
+    }
+}
+
+/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
+   This function has to emit all necessary group barriers.  */
+
+static void
+emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
+{
+  rtx insn;
+
+  init_insn_group_barriers ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (BARRIER_P (insn))
+	{
+	  rtx last = prev_active_insn (insn);
+
+	  if (! last)
+	    continue;
+	  if (JUMP_TABLE_DATA_P (last))
+	    last = prev_active_insn (last);
+	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
+
+	  init_insn_group_barriers ();
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
+	    init_insn_group_barriers ();
+	  else if (group_barrier_needed (insn))
+	    {
+	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
+	      init_insn_group_barriers ();
+	      group_barrier_needed (insn);
+	    }
+	}
+    }
+}
+
+
+
+/* Instruction scheduling support.  */
+
+#define NR_BUNDLES 10
+
+/* A list of names of all available bundles.  */
+
+static const char *bundle_name [NR_BUNDLES] =
+{
+  ".mii",
+  ".mmi",
+  ".mfi",
+  ".mmf",
+#if NR_BUNDLES == 10
+  ".bbb",
+  ".mbb",
+#endif
+  ".mib",
+  ".mmb",
+  ".mfb",
+  ".mlx"
+};
+
+/* Nonzero if we should insert stop bits into the schedule.  */
+
+int ia64_final_schedule = 0;
+
+/* Codes of the corresponding queried units: */
+
+static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
+static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
+
+static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
+static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
+
+static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
+
+/* The following variable value is an insn group barrier.  */
+
+static rtx dfa_stop_insn;
+
+/* The following variable value is the last issued insn.  */
+
+static rtx last_scheduled_insn;
+
+/* The following variable value is pointer to a DFA state used as
+   temporary variable.  */
+
+static state_t temp_dfa_state = NULL;
+
+/* The following variable value is DFA state after issuing the last
+   insn.  */
+
+static state_t prev_cycle_state = NULL;
+
+/* The following array element values are TRUE if the corresponding
+   insn requires to add stop bits before it.  */
+
+static char *stops_p = NULL;
+
+/* The following variable is used to set up the mentioned above array.  */
+
+static int stop_before_p = 0;
+
+/* The following variable value is length of the arrays `clocks' and
+   `add_cycles'. */
+
+static int clocks_length;
+
+/* The following variable value is number of data speculations in progress.  */
+static int pending_data_specs = 0;
+
+/* Number of memory references on current and three future processor cycles.  */
+static char mem_ops_in_group[4];
+
+/* Number of current processor cycle (from scheduler's point of view).  */
+static int current_cycle;
+
+static rtx ia64_single_set (rtx);
+static void ia64_emit_insn_before (rtx, rtx);
+
+/* Map a bundle number to its pseudo-op.  */
+
+const char *
+get_bundle_name (int b)
+{
+  return bundle_name[b];
+}
+
+
+/* Return the maximum number of instructions a cpu can issue.  */
+
+static int
+ia64_issue_rate (void)
+{
+  return 6;
+}
+
+/* Helper function - like single_set, but look inside COND_EXEC.  */
+
+static rtx
+ia64_single_set (rtx insn)
+{
+  rtx x = PATTERN (insn), ret;
+  if (GET_CODE (x) == COND_EXEC)
+    x = COND_EXEC_CODE (x);
+  if (GET_CODE (x) == SET)
+    return x;
+
+  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
+     Although they are not classical single set, the second set is there just
+     to protect it from moving past FP-relative stack accesses.  */
+  switch (recog_memoized (insn))
+    {
+    case CODE_FOR_prologue_allocate_stack:
+    case CODE_FOR_prologue_allocate_stack_pr:
+    case CODE_FOR_epilogue_deallocate_stack:
+    case CODE_FOR_epilogue_deallocate_stack_pr:
+      ret = XVECEXP (x, 0, 0);
+      break;
+
+    default:
+      ret = single_set_2 (insn, x);
+      break;
+    }
+
+  return ret;
+}
+
+/* Adjust the cost of a scheduling dependency.
+   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
+   COST is the current cost, DW is dependency weakness.  */
+static int
+ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
+{
+  enum reg_note dep_type = (enum reg_note) dep_type1;
+  enum attr_itanium_class dep_class;
+  enum attr_itanium_class insn_class;
+
+  insn_class = ia64_safe_itanium_class (insn);
+  dep_class = ia64_safe_itanium_class (dep_insn);
+
+  /* Treat true memory dependencies separately.  Ignore apparent true
+     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
+  if (dep_type == REG_DEP_TRUE
+      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
+      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
+    return 0;
+
+  if (dw == MIN_DEP_WEAK)
+    /* Store and load are likely to alias, use higher cost to avoid stall.  */
+    return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
+  else if (dw > MIN_DEP_WEAK)
+    {
+      /* Store and load are less likely to alias.  */
+      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
+	/* Assume there will be no cache conflict for floating-point data.
+	   For integer data, L1 conflict penalty is huge (17 cycles), so we
+	   never assume it will not cause a conflict.  */
+	return 0;
+      else
+	return cost;
+    }
+
+  if (dep_type != REG_DEP_OUTPUT)
+    return cost;
+
+  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
+      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
+    return 0;
+
+  return cost;
+}
+
+/* Like emit_insn_before, but skip cycle_display notes.
+   ??? When cycle display notes are implemented, update this.  */
+
+static void
+ia64_emit_insn_before (rtx insn, rtx before)
+{
+  emit_insn_before (insn, before);
+}
+
+/* The following function marks insns who produce addresses for load
+   and store insns.  Such insns will be placed into M slots because it
+   decrease latency time for Itanium1 (see function
+   `ia64_produce_address_p' and the DFA descriptions).  */
+
+static void
+ia64_dependencies_evaluation_hook (rtx head, rtx tail)
+{
+  rtx insn, next, next_tail;
+
+  /* Before reload, which_alternative is not set, which means that
+     ia64_safe_itanium_class will produce wrong results for (at least)
+     move instructions.  */
+  if (!reload_completed)
+    return;
+
+  next_tail = NEXT_INSN (tail);
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      insn->call = 0;
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
+      {
+	sd_iterator_def sd_it;
+	dep_t dep;
+	bool has_mem_op_consumer_p = false;
+
+	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+	  {
+	    enum attr_itanium_class c;
+
+	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
+	      continue;
+
+	    next = DEP_CON (dep);
+	    c = ia64_safe_itanium_class (next);
+	    if ((c == ITANIUM_CLASS_ST
+		 || c == ITANIUM_CLASS_STF)
+		&& ia64_st_address_bypass_p (insn, next))
+	      {
+		has_mem_op_consumer_p = true;
+		break;
+	      }
+	    else if ((c == ITANIUM_CLASS_LD
+		      || c == ITANIUM_CLASS_FLD
+		      || c == ITANIUM_CLASS_FLDP)
+		     && ia64_ld_address_bypass_p (insn, next))
+	      {
+		has_mem_op_consumer_p = true;
+		break;
+	      }
+	  }
+
+	insn->call = has_mem_op_consumer_p;
+      }
+}
+
+/* We're beginning a new block.  Initialize data structures as necessary.  */
+
+static void
+ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		 int sched_verbose ATTRIBUTE_UNUSED,
+		 int max_ready ATTRIBUTE_UNUSED)
+{
+#ifdef ENABLE_CHECKING
+  rtx insn;
+
+  if (!sel_sched_p () && reload_completed)
+    for (insn = NEXT_INSN (current_sched_info->prev_head);
+	 insn != current_sched_info->next_tail;
+	 insn = NEXT_INSN (insn))
+      gcc_assert (!SCHED_GROUP_P (insn));
+#endif
+  last_scheduled_insn = NULL_RTX;
+  init_insn_group_barriers ();
+
+  current_cycle = 0;
+  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
+}
+
+/* We're beginning a scheduling pass.  Check assertion.  */
+
+static void
+ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+                        int sched_verbose ATTRIBUTE_UNUSED,
+                        int max_ready ATTRIBUTE_UNUSED)
+{  
+  gcc_assert (pending_data_specs == 0);
+}
+
+/* Scheduling pass is now finished.  Free/reset static variable.  */
+static void
+ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+			  int sched_verbose ATTRIBUTE_UNUSED)
+{
+  gcc_assert (pending_data_specs == 0);
+}
+
+/* Return TRUE if INSN is a load (either normal or speculative, but not a
+   speculation check), FALSE otherwise.  */
+static bool
+is_load_p (rtx insn)
+{
+  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
+
+  return
+   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
+    && get_attr_check_load (insn) == CHECK_LOAD_NO);
+}
+
+/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
+   (taking account for 3-cycle cache reference postponing for stores: Intel
+   Itanium 2 Reference Manual for Software Development and Optimization,
+   6.7.3.1).  */
+static void
+record_memory_reference (rtx insn)
+{
+  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
+
+  switch (insn_class) {
+    case ITANIUM_CLASS_FLD:
+    case ITANIUM_CLASS_LD:
+      mem_ops_in_group[current_cycle % 4]++;
+      break;
+    case ITANIUM_CLASS_STF:
+    case ITANIUM_CLASS_ST:
+      mem_ops_in_group[(current_cycle + 3) % 4]++;
+      break;
+    default:;
+  }
+}
+
+/* We are about to being issuing insns for this clock cycle.
+   Override the default sort algorithm to better slot instructions.  */
+
+static int
+ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
+			int *pn_ready, int clock_var,
+			int reorder_type)
+{
+  int n_asms;
+  int n_ready = *pn_ready;
+  rtx *e_ready = ready + n_ready;
+  rtx *insnp;
+
+  if (sched_verbose)
+    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
+
+  if (reorder_type == 0)
+    {
+      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
+      n_asms = 0;
+      for (insnp = ready; insnp < e_ready; insnp++)
+	if (insnp < e_ready)
+	  {
+	    rtx insn = *insnp;
+	    enum attr_type t = ia64_safe_type (insn);
+	    if (t == TYPE_UNKNOWN)
+	      {
+		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+		    || asm_noperands (PATTERN (insn)) >= 0)
+		  {
+		    rtx lowest = ready[n_asms];
+		    ready[n_asms] = insn;
+		    *insnp = lowest;
+		    n_asms++;
+		  }
+		else
+		  {
+		    rtx highest = ready[n_ready - 1];
+		    ready[n_ready - 1] = insn;
+		    *insnp = highest;
+		    return 1;
+		  }
+	      }
+	  }
+
+      if (n_asms < n_ready)
+	{
+	  /* Some normal insns to process.  Skip the asms.  */
+	  ready += n_asms;
+	  n_ready -= n_asms;
+	}
+      else if (n_ready > 0)
+	return 1;
+    }
+
+  if (ia64_final_schedule)
+    {
+      int deleted = 0;
+      int nr_need_stop = 0;
+
+      for (insnp = ready; insnp < e_ready; insnp++)
+	if (safe_group_barrier_needed (*insnp))
+	  nr_need_stop++;
+
+      if (reorder_type == 1 && n_ready == nr_need_stop)
+	return 0;
+      if (reorder_type == 0)
+	return 1;
+      insnp = e_ready;
+      /* Move down everything that needs a stop bit, preserving
+	 relative order.  */
+      while (insnp-- > ready + deleted)
+	while (insnp >= ready + deleted)
+	  {
+	    rtx insn = *insnp;
+	    if (! safe_group_barrier_needed (insn))
+	      break;
+	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	    *ready = insn;
+	    deleted++;
+	  }
+      n_ready -= deleted;
+      ready += deleted;
+    }
+
+  current_cycle = clock_var;
+  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
+    {
+      int moved = 0;
+
+      insnp = e_ready;
+      /* Move down loads/stores, preserving relative order.  */
+      while (insnp-- > ready + moved)
+	while (insnp >= ready + moved)
+	  {
+	    rtx insn = *insnp;
+	    if (! is_load_p (insn))
+	      break;
+	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	    *ready = insn;
+	    moved++;
+	  }
+      n_ready -= moved;
+      ready += moved;
+    }
+
+  return 1;
+}
+
+/* We are about to being issuing insns for this clock cycle.  Override
+   the default sort algorithm to better slot instructions.  */
+
+static int
+ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+		    int clock_var)
+{
+  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
+				 pn_ready, clock_var, 0);
+}
+
+/* Like ia64_sched_reorder, but called after issuing each insn.
+   Override the default sort algorithm to better slot instructions.  */
+
+static int
+ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
+		     int *pn_ready, int clock_var)
+{
+  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
+				 clock_var, 1);
+}
+
+/* We are about to issue INSN.  Return the number of insns left on the
+   ready queue that can be issued this cycle.  */
+
+static int
+ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+		     int sched_verbose ATTRIBUTE_UNUSED,
+		     rtx insn ATTRIBUTE_UNUSED,
+		     int can_issue_more ATTRIBUTE_UNUSED)
+{
+  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
+    /* Modulo scheduling does not extend h_i_d when emitting
+       new instructions.  Don't use h_i_d, if we don't have to.  */
+    {
+      if (DONE_SPEC (insn) & BEGIN_DATA)
+	pending_data_specs++;
+      if (CHECK_SPEC (insn) & BEGIN_DATA)
+	pending_data_specs--;
+    }
+
+  if (DEBUG_INSN_P (insn))
+    return 1;
+
+  last_scheduled_insn = insn;
+  memcpy (prev_cycle_state, curr_state, dfa_state_size);
+  if (reload_completed)
+    {
+      int needed = group_barrier_needed (insn);
+      
+      gcc_assert (!needed);
+      if (CALL_P (insn))
+	init_insn_group_barriers ();
+      stops_p [INSN_UID (insn)] = stop_before_p;
+      stop_before_p = 0;
+
+      record_memory_reference (insn);
+    }
+  return 1;
+}
+
+/* We are choosing insn from the ready queue.  Return nonzero if INSN
+   can be chosen.  */
+
+static int
+ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
+{
+  gcc_assert (insn && INSN_P (insn));
+  return ((!reload_completed
+	   || !safe_group_barrier_needed (insn))
+	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
+	  && (!mflag_sched_mem_insns_hard_limit
+	      || !is_load_p (insn)
+	      || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
+}
+
+/* We are choosing insn from the ready queue.  Return nonzero if INSN
+   can be chosen.  */
+
+static bool
+ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
+{
+  gcc_assert (insn  && INSN_P (insn));
+  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
+     we keep ALAT half-empty.  */
+  return (pending_data_specs < 16
+	  || !(TODO_SPEC (insn) & BEGIN_DATA));
+}
+
+/* The following variable value is pseudo-insn used by the DFA insn
+   scheduler to change the DFA state when the simulated clock is
+   increased.  */
+
+static rtx dfa_pre_cycle_insn;
+
+/* Returns 1 when a meaningful insn was scheduled between the last group
+   barrier and LAST.  */
+static int
+scheduled_good_insn (rtx last)
+{
+  if (last && recog_memoized (last) >= 0)
+    return 1;
+
+  for ( ;
+       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
+       && !stops_p[INSN_UID (last)];
+       last = PREV_INSN (last))
+    /* We could hit a NOTE_INSN_DELETED here which is actually outside
+       the ebb we're scheduling.  */
+    if (INSN_P (last) && recog_memoized (last) >= 0)
+      return 1;
+
+  return 0;
+}
+
+/* We are about to being issuing INSN.  Return nonzero if we cannot
+   issue it on given cycle CLOCK and return zero if we should not sort
+   the ready queue on the next clock start.  */
+
+static int
+ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
+		    int clock, int *sort_p)
+{
+  gcc_assert (insn && INSN_P (insn));
+
+  if (DEBUG_INSN_P (insn))
+    return 0;
+
+  /* When a group barrier is needed for insn, last_scheduled_insn
+     should be set.  */
+  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
+              || last_scheduled_insn);
+
+  if ((reload_completed
+       && (safe_group_barrier_needed (insn)
+	   || (mflag_sched_stop_bits_after_every_cycle
+	       && last_clock != clock
+	       && last_scheduled_insn
+	       && scheduled_good_insn (last_scheduled_insn))))
+      || (last_scheduled_insn
+	  && (CALL_P (last_scheduled_insn)
+	      || unknown_for_bundling_p (last_scheduled_insn))))
+    {
+      init_insn_group_barriers ();
+
+      if (verbose && dump)
+	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
+		 last_clock == clock ? " + cycle advance" : "");
+
+      stop_before_p = 1;
+      current_cycle = clock;
+      mem_ops_in_group[current_cycle % 4] = 0;
+
+      if (last_clock == clock)
+	{
+	  state_transition (curr_state, dfa_stop_insn);
+	  if (TARGET_EARLY_STOP_BITS)
+	    *sort_p = (last_scheduled_insn == NULL_RTX
+		       || ! CALL_P (last_scheduled_insn));
+	  else
+	    *sort_p = 0;
+	  return 1;
+	}
+
+      if (last_scheduled_insn)
+	{
+	  if (unknown_for_bundling_p (last_scheduled_insn))
+	    state_reset (curr_state);
+	  else
+	    {
+	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
+	      state_transition (curr_state, dfa_stop_insn);
+	      state_transition (curr_state, dfa_pre_cycle_insn);
+	      state_transition (curr_state, NULL);
+	    }
+	}
+    }
+  return 0;
+}
+
+/* Implement targetm.sched.h_i_d_extended hook.
+   Extend internal data structures.  */
+static void
+ia64_h_i_d_extended (void)
+{
+  if (stops_p != NULL) 
+    {
+      int new_clocks_length = get_max_uid () * 3 / 2;
+      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
+      clocks_length = new_clocks_length;
+    }
+}
+
+
+/* This structure describes the data used by the backend to guide scheduling.
+   When the current scheduling point is switched, this data should be saved
+   and restored later, if the scheduler returns to this point.  */
+struct _ia64_sched_context
+{
+  state_t prev_cycle_state;
+  rtx last_scheduled_insn;
+  struct reg_write_state rws_sum[NUM_REGS];
+  struct reg_write_state rws_insn[NUM_REGS];
+  int first_instruction;
+  int pending_data_specs;
+  int current_cycle;
+  char mem_ops_in_group[4];
+};
+typedef struct _ia64_sched_context *ia64_sched_context_t;
+
+/* Allocates a scheduling context.  */
+static void *
+ia64_alloc_sched_context (void)
+{
+  return xmalloc (sizeof (struct _ia64_sched_context));
+}
+
+/* Initializes the _SC context with clean data, if CLEAN_P, and from
+   the global context otherwise.  */
+static void
+ia64_init_sched_context (void *_sc, bool clean_p)
+{
+  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+
+  sc->prev_cycle_state = xmalloc (dfa_state_size);
+  if (clean_p)
+    {
+      state_reset (sc->prev_cycle_state);
+      sc->last_scheduled_insn = NULL_RTX;
+      memset (sc->rws_sum, 0, sizeof (rws_sum));
+      memset (sc->rws_insn, 0, sizeof (rws_insn));
+      sc->first_instruction = 1;
+      sc->pending_data_specs = 0;
+      sc->current_cycle = 0;
+      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
+    }
+  else
+    {
+      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
+      sc->last_scheduled_insn = last_scheduled_insn;
+      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
+      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
+      sc->first_instruction = first_instruction;
+      sc->pending_data_specs = pending_data_specs;
+      sc->current_cycle = current_cycle;
+      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
+    }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC.  */
+static void
+ia64_set_sched_context (void *_sc)
+{
+  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+
+  gcc_assert (sc != NULL);
+
+  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
+  last_scheduled_insn = sc->last_scheduled_insn;
+  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
+  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
+  first_instruction = sc->first_instruction;
+  pending_data_specs = sc->pending_data_specs;
+  current_cycle = sc->current_cycle;
+  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
+}
+
+/* Clears the data in the _SC scheduling context.  */
+static void
+ia64_clear_sched_context (void *_sc)
+{
+  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+  
+  free (sc->prev_cycle_state);
+  sc->prev_cycle_state = NULL;
+}
+
+/* Frees the _SC scheduling context.  */
+static void
+ia64_free_sched_context (void *_sc)
+{
+  gcc_assert (_sc != NULL);
+
+  free (_sc);
+}
+
+typedef rtx (* gen_func_t) (rtx, rtx);
+
+/* Return a function that will generate a load of mode MODE_NO
+   with speculation types TS.  */
+static gen_func_t
+get_spec_load_gen_function (ds_t ts, int mode_no)
+{
+  static gen_func_t gen_ld_[] = {
+    gen_movbi,
+    gen_movqi_internal,
+    gen_movhi_internal,
+    gen_movsi_internal,
+    gen_movdi_internal,
+    gen_movsf_internal,
+    gen_movdf_internal,
+    gen_movxf_internal,
+    gen_movti_internal,
+    gen_zero_extendqidi2,
+    gen_zero_extendhidi2,
+    gen_zero_extendsidi2,
+  };
+
+  static gen_func_t gen_ld_a[] = {
+    gen_movbi_advanced,
+    gen_movqi_advanced,
+    gen_movhi_advanced,
+    gen_movsi_advanced,
+    gen_movdi_advanced,
+    gen_movsf_advanced,
+    gen_movdf_advanced,
+    gen_movxf_advanced,
+    gen_movti_advanced,
+    gen_zero_extendqidi2_advanced,
+    gen_zero_extendhidi2_advanced,
+    gen_zero_extendsidi2_advanced,
+  };
+  static gen_func_t gen_ld_s[] = {
+    gen_movbi_speculative,
+    gen_movqi_speculative,
+    gen_movhi_speculative,
+    gen_movsi_speculative,
+    gen_movdi_speculative,
+    gen_movsf_speculative,
+    gen_movdf_speculative,
+    gen_movxf_speculative,
+    gen_movti_speculative,
+    gen_zero_extendqidi2_speculative,
+    gen_zero_extendhidi2_speculative,
+    gen_zero_extendsidi2_speculative,
+  };
+  static gen_func_t gen_ld_sa[] = {
+    gen_movbi_speculative_advanced,
+    gen_movqi_speculative_advanced,
+    gen_movhi_speculative_advanced,
+    gen_movsi_speculative_advanced,
+    gen_movdi_speculative_advanced,
+    gen_movsf_speculative_advanced,
+    gen_movdf_speculative_advanced,
+    gen_movxf_speculative_advanced,
+    gen_movti_speculative_advanced,
+    gen_zero_extendqidi2_speculative_advanced,
+    gen_zero_extendhidi2_speculative_advanced,
+    gen_zero_extendsidi2_speculative_advanced,
+  };
+  static gen_func_t gen_ld_s_a[] = {
+    gen_movbi_speculative_a,
+    gen_movqi_speculative_a,
+    gen_movhi_speculative_a,
+    gen_movsi_speculative_a,
+    gen_movdi_speculative_a,
+    gen_movsf_speculative_a,
+    gen_movdf_speculative_a,
+    gen_movxf_speculative_a,
+    gen_movti_speculative_a,
+    gen_zero_extendqidi2_speculative_a,
+    gen_zero_extendhidi2_speculative_a,
+    gen_zero_extendsidi2_speculative_a,
+  };
+
+  gen_func_t *gen_ld;
+
+  if (ts & BEGIN_DATA)
+    {
+      if (ts & BEGIN_CONTROL)
+	gen_ld = gen_ld_sa;
+      else
+	gen_ld = gen_ld_a;
+    }
+  else if (ts & BEGIN_CONTROL)
+    {
+      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
+	  || ia64_needs_block_p (ts))
+	gen_ld = gen_ld_s;
+      else
+	gen_ld = gen_ld_s_a;
+    }
+  else if (ts == 0)
+    gen_ld = gen_ld_;
+  else
+    gcc_unreachable ();
+
+  return gen_ld[mode_no];
+}
+
+/* Constants that help mapping 'enum machine_mode' to int.  */
+enum SPEC_MODES
+  {
+    SPEC_MODE_INVALID = -1,
+    SPEC_MODE_FIRST = 0,
+    SPEC_MODE_FOR_EXTEND_FIRST = 1,
+    SPEC_MODE_FOR_EXTEND_LAST = 3,
+    SPEC_MODE_LAST = 8
+  };
+
+enum
+  {
+    /* Offset to reach ZERO_EXTEND patterns.  */
+    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
+  };
+
+/* Return index of the MODE.  */
+static int
+ia64_mode_to_int (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case BImode: return 0; /* SPEC_MODE_FIRST  */
+    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
+    case HImode: return 2;
+    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
+    case DImode: return 4;
+    case SFmode: return 5;
+    case DFmode: return 6;
+    case XFmode: return 7;
+    case TImode:
+      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
+	 mentioned in itanium[12].md.  Predicate fp_register_operand also
+	 needs to be defined.  Bottom line: better disable for now.  */
+      return SPEC_MODE_INVALID;
+    default:     return SPEC_MODE_INVALID;
+    }
+}
+
+/* Provide information about speculation capabilities.  */
+static void
+ia64_set_sched_flags (spec_info_t spec_info)
+{
+  unsigned int *flags = &(current_sched_info->flags);
+
+  if (*flags & SCHED_RGN
+      || *flags & SCHED_EBB
+      || *flags & SEL_SCHED)
+    {
+      int mask = 0;
+
+      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
+          || (mflag_sched_ar_data_spec && reload_completed))
+	{
+	  mask |= BEGIN_DATA;
+
+	  if (!sel_sched_p ()
+	      && ((mflag_sched_br_in_data_spec && !reload_completed)
+		  || (mflag_sched_ar_in_data_spec && reload_completed)))
+	    mask |= BE_IN_DATA;
+	}
+      
+      if (mflag_sched_control_spec
+          && (!sel_sched_p ()
+	      || reload_completed))
+	{
+	  mask |= BEGIN_CONTROL;
+	  
+	  if (!sel_sched_p () && mflag_sched_in_control_spec)
+	    mask |= BE_IN_CONTROL;
+	}
+
+      spec_info->mask = mask;
+
+      if (mask)
+	{
+	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
+
+	  if (mask & BE_IN_SPEC)
+	    *flags |= NEW_BBS;
+	  
+	  spec_info->flags = 0;
+      
+	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
+	    spec_info->flags |= PREFER_NON_DATA_SPEC;
+
+	  if (mask & CONTROL_SPEC)
+	    {
+	      if (mflag_sched_prefer_non_control_spec_insns)
+		spec_info->flags |= PREFER_NON_CONTROL_SPEC;
+
+	      if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
+		spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
+	    }
+
+	  if (sched_verbose >= 1)
+	    spec_info->dump = sched_dump;
+	  else
+	    spec_info->dump = 0;
+	  
+	  if (mflag_sched_count_spec_in_critical_path)
+	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
+	}
+    }
+  else
+    spec_info->mask = 0;
+}
+
+/* If INSN is an appropriate load return its mode.
+   Return -1 otherwise.  */
+static int
+get_mode_no_for_insn (rtx insn)
+{
+  rtx reg, mem, mode_rtx;
+  int mode_no;
+  bool extend_p;
+
+  extract_insn_cached (insn);
+
+  /* We use WHICH_ALTERNATIVE only after reload.  This will
+     guarantee that reload won't touch a speculative insn.  */
+
+  if (recog_data.n_operands != 2)
+    return -1;
+
+  reg = recog_data.operand[0];
+  mem = recog_data.operand[1];
+
+  /* We should use MEM's mode since REG's mode in presence of
+     ZERO_EXTEND will always be DImode.  */
+  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
+    /* Process non-speculative ld.  */
+    {
+      if (!reload_completed)
+	{
+	  /* Do not speculate into regs like ar.lc.  */
+	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
+	    return -1;
+
+	  if (!MEM_P (mem))
+	    return -1;
+
+	  {
+	    rtx mem_reg = XEXP (mem, 0);
+
+	    if (!REG_P (mem_reg))
+	      return -1;
+	  }
+
+	  mode_rtx = mem;
+	}
+      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
+	{
+	  gcc_assert (REG_P (reg) && MEM_P (mem));
+	  mode_rtx = mem;
+	}
+      else
+	return -1;
+    }
+  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
+	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
+	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
+    /* Process speculative ld or ld.c.  */
+    {
+      gcc_assert (REG_P (reg) && MEM_P (mem));
+      mode_rtx = mem;
+    }
+  else
+    {
+      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
+
+      if (attr_class == ITANIUM_CLASS_CHK_A
+	  || attr_class == ITANIUM_CLASS_CHK_S_I
+	  || attr_class == ITANIUM_CLASS_CHK_S_F)
+	/* Process chk.  */
+	mode_rtx = reg;
+      else
+	return -1;
+    }
+
+  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
+
+  if (mode_no == SPEC_MODE_INVALID)
+    return -1;
+
+  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
+
+  if (extend_p)
+    {
+      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
+	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
+	return -1;
+
+      mode_no += SPEC_GEN_EXTEND_OFFSET;
+    }
+
+  return mode_no;
+}
+
+/* If X is an unspec part of a speculative load, return its code.
+   Return -1 otherwise.  */
+static int
+get_spec_unspec_code (const_rtx x)
+{
+  if (GET_CODE (x) != UNSPEC)
+    return -1;
+
+  {
+    int code;
+
+    code = XINT (x, 1);
+
+    switch (code)
+      {
+      case UNSPEC_LDA:
+      case UNSPEC_LDS:
+      case UNSPEC_LDS_A:
+      case UNSPEC_LDSA:
+	return code;
+
+      default:
+	return -1;
+      }
+  }
+}
+
+/* Implement skip_rtx_p hook.  */
+static bool
+ia64_skip_rtx_p (const_rtx x)
+{
+  return get_spec_unspec_code (x) != -1;
+}
+
+/* If INSN is a speculative load, return its UNSPEC code.
+   Return -1 otherwise.  */
+static int
+get_insn_spec_code (const_rtx insn)
+{
+  rtx pat, reg, mem;
+
+  pat = PATTERN (insn);
+
+  if (GET_CODE (pat) == COND_EXEC)
+    pat = COND_EXEC_CODE (pat);
+
+  if (GET_CODE (pat) != SET)
+    return -1;
+
+  reg = SET_DEST (pat);
+  if (!REG_P (reg))
+    return -1;
+
+  mem = SET_SRC (pat);
+  if (GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  return get_spec_unspec_code (mem);
+}
+
+/* If INSN is a speculative load, return a ds with the speculation types.
+   Otherwise [if INSN is a normal instruction] return 0.  */
+static ds_t
+ia64_get_insn_spec_ds (rtx insn)
+{
+  int code = get_insn_spec_code (insn);
+
+  switch (code)
+    {
+    case UNSPEC_LDA:
+      return BEGIN_DATA;
+
+    case UNSPEC_LDS:
+    case UNSPEC_LDS_A:
+      return BEGIN_CONTROL;
+
+    case UNSPEC_LDSA:
+      return BEGIN_DATA | BEGIN_CONTROL;
+
+    default:
+      return 0;
+    }
+}
+
+/* If INSN is a speculative load return a ds with the speculation types that
+   will be checked.
+   Otherwise [if INSN is a normal instruction] return 0.  */
+static ds_t
+ia64_get_insn_checked_ds (rtx insn)
+{
+  int code = get_insn_spec_code (insn);
+
+  switch (code)
+    {
+    case UNSPEC_LDA:
+      return BEGIN_DATA | BEGIN_CONTROL;
+
+    case UNSPEC_LDS:
+      return BEGIN_CONTROL;
+
+    case UNSPEC_LDS_A:
+    case UNSPEC_LDSA:
+      return BEGIN_DATA | BEGIN_CONTROL;
+
+    default:
+      return 0;
+    }
+}
+
+/* If GEN_P is true, calculate the index of needed speculation check and return
+   speculative pattern for INSN with speculative mode TS, machine mode
+   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
+   If GEN_P is false, just calculate the index of needed speculation check.  */
+static rtx
+ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
+{
+  rtx pat, new_pat;
+  gen_func_t gen_load;
+
+  gen_load = get_spec_load_gen_function (ts, mode_no);
+
+  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
+		      copy_rtx (recog_data.operand[1]));
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == COND_EXEC)
+    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
+				 new_pat);
+
+  return new_pat;
+}
+
+static bool
+insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
+			      ds_t ds ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* Implement targetm.sched.speculate_insn hook.
+   Check if the INSN can be TS speculative.
+   If 'no' - return -1.
+   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
+   If current pattern of the INSN already provides TS speculation,
+   return 0.  */
+static int
+ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
+{  
+  int mode_no;
+  int res;
+  
+  gcc_assert (!(ts & ~SPECULATIVE));
+
+  if (ia64_spec_check_p (insn))
+    return -1;
+
+  if ((ts & BE_IN_SPEC)
+      && !insn_can_be_in_speculative_p (insn, ts))
+    return -1;
+
+  mode_no = get_mode_no_for_insn (insn);
+
+  if (mode_no != SPEC_MODE_INVALID)
+    {
+      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
+	res = 0;
+      else
+	{
+	  res = 1;
+	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
+	}
+    }
+  else
+    res = -1;
+
+  return res;
+}
+
+/* Return a function that will generate a check for speculation TS with mode
+   MODE_NO.
+   If simple check is needed, pass true for SIMPLE_CHECK_P.
+   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
+static gen_func_t
+get_spec_check_gen_function (ds_t ts, int mode_no,
+			     bool simple_check_p, bool clearing_check_p)
+{
+  static gen_func_t gen_ld_c_clr[] = {
+    gen_movbi_clr,
+    gen_movqi_clr,
+    gen_movhi_clr,
+    gen_movsi_clr,
+    gen_movdi_clr,
+    gen_movsf_clr,
+    gen_movdf_clr,
+    gen_movxf_clr,
+    gen_movti_clr,
+    gen_zero_extendqidi2_clr,
+    gen_zero_extendhidi2_clr,
+    gen_zero_extendsidi2_clr,
+  };
+  static gen_func_t gen_ld_c_nc[] = {
+    gen_movbi_nc,
+    gen_movqi_nc,
+    gen_movhi_nc,
+    gen_movsi_nc,
+    gen_movdi_nc,
+    gen_movsf_nc,
+    gen_movdf_nc,
+    gen_movxf_nc,
+    gen_movti_nc,
+    gen_zero_extendqidi2_nc,
+    gen_zero_extendhidi2_nc,
+    gen_zero_extendsidi2_nc,
+  };
+  static gen_func_t gen_chk_a_clr[] = {
+    gen_advanced_load_check_clr_bi,
+    gen_advanced_load_check_clr_qi,
+    gen_advanced_load_check_clr_hi,
+    gen_advanced_load_check_clr_si,
+    gen_advanced_load_check_clr_di,
+    gen_advanced_load_check_clr_sf,
+    gen_advanced_load_check_clr_df,
+    gen_advanced_load_check_clr_xf,
+    gen_advanced_load_check_clr_ti,
+    gen_advanced_load_check_clr_di,
+    gen_advanced_load_check_clr_di,
+    gen_advanced_load_check_clr_di,
+  };
+  static gen_func_t gen_chk_a_nc[] = {
+    gen_advanced_load_check_nc_bi,
+    gen_advanced_load_check_nc_qi,
+    gen_advanced_load_check_nc_hi,
+    gen_advanced_load_check_nc_si,
+    gen_advanced_load_check_nc_di,
+    gen_advanced_load_check_nc_sf,
+    gen_advanced_load_check_nc_df,
+    gen_advanced_load_check_nc_xf,
+    gen_advanced_load_check_nc_ti,
+    gen_advanced_load_check_nc_di,
+    gen_advanced_load_check_nc_di,
+    gen_advanced_load_check_nc_di,
+  };
+  static gen_func_t gen_chk_s[] = {
+    gen_speculation_check_bi,
+    gen_speculation_check_qi,
+    gen_speculation_check_hi,
+    gen_speculation_check_si,
+    gen_speculation_check_di,
+    gen_speculation_check_sf,
+    gen_speculation_check_df,
+    gen_speculation_check_xf,
+    gen_speculation_check_ti,
+    gen_speculation_check_di,
+    gen_speculation_check_di,
+    gen_speculation_check_di,
+  };
+
+  gen_func_t *gen_check;
+
+  if (ts & BEGIN_DATA)
+    {
+      /* We don't need recovery because even if this is ld.sa
+	 ALAT entry will be allocated only if NAT bit is set to zero.
+	 So it is enough to use ld.c here.  */
+
+      if (simple_check_p)
+	{
+	  gcc_assert (mflag_sched_spec_ldc);
+
+	  if (clearing_check_p)
+	    gen_check = gen_ld_c_clr;
+	  else
+	    gen_check = gen_ld_c_nc;
+	}
+      else
+	{
+	  if (clearing_check_p)
+	    gen_check = gen_chk_a_clr;
+	  else
+	    gen_check = gen_chk_a_nc;
+	}
+    }
+  else if (ts & BEGIN_CONTROL)
+    {
+      if (simple_check_p)
+	/* We might want to use ld.sa -> ld.c instead of
+	   ld.s -> chk.s.  */
+	{
+	  gcc_assert (!ia64_needs_block_p (ts));
+
+	  if (clearing_check_p)
+	    gen_check = gen_ld_c_clr;
+	  else
+	    gen_check = gen_ld_c_nc;
+	}
+      else
+	{
+	  gen_check = gen_chk_s;
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  gcc_assert (mode_no >= 0);
+  return gen_check[mode_no];
+}
+
+/* Return nonzero, if INSN needs branchy recovery check.  */
+static bool
+ia64_needs_block_p (ds_t ts)
+{
+  if (ts & BEGIN_DATA)
+    return !mflag_sched_spec_ldc;
+
+  gcc_assert ((ts & BEGIN_CONTROL) != 0);
+
+  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
+}
+
+/* Generate (or regenerate) a recovery check for INSN.  */
+static rtx
+ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
+{
+  rtx op1, pat, check_pat;
+  gen_func_t gen_check;
+  int mode_no;
+
+  mode_no = get_mode_no_for_insn (insn);
+  gcc_assert (mode_no >= 0);
+
+  if (label)
+    op1 = label;
+  else
+    {
+      gcc_assert (!ia64_needs_block_p (ds));
+      op1 = copy_rtx (recog_data.operand[1]);
+    }
+      
+  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
+					   true);
+
+  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
+    
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == COND_EXEC)
+    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
+				   check_pat);
+
+  return check_pat;
+}
+
+/* Return nonzero, if X is branchy recovery check.  */
+static int
+ia64_spec_check_p (rtx x)
+{
+  x = PATTERN (x);
+  if (GET_CODE (x) == COND_EXEC)
+    x = COND_EXEC_CODE (x);
+  if (GET_CODE (x) == SET)
+    return ia64_spec_check_src_p (SET_SRC (x));
+  return 0;
+}
+
+/* Return nonzero, if SRC belongs to recovery check.  */
+static int
+ia64_spec_check_src_p (rtx src)
+{
+  if (GET_CODE (src) == IF_THEN_ELSE)
+    {
+      rtx t;
+
+      t = XEXP (src, 0);
+      if (GET_CODE (t) == NE)
+	{
+	  t = XEXP (t, 0);	    
+
+	  if (GET_CODE (t) == UNSPEC)
+	    {
+	      int code;
+	      
+	      code = XINT (t, 1);
+	     
+	      if (code == UNSPEC_LDCCLR
+		  || code == UNSPEC_LDCNC
+		  || code == UNSPEC_CHKACLR
+		  || code == UNSPEC_CHKANC
+		  || code == UNSPEC_CHKS)
+		{
+		  gcc_assert (code != 0);
+		  return code;
+		}
+	    }
+	}
+    }
+  return 0;
+}
+
+
+/* The following page contains abstract data `bundle states' which are
+   used for bundling insns (inserting nops and template generation).  */
+
+/* The following describes state of insn bundling.  */
+
+struct bundle_state
+{
+  /* Unique bundle state number to identify them in the debugging
+     output  */
+  int unique_num;
+  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
+  /* number nops before and after the insn  */
+  short before_nops_num, after_nops_num;
+  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
+                   insn */
+  int cost;     /* cost of the state in cycles */
+  int accumulated_insns_num; /* number of all previous insns including
+				nops.  L is considered as 2 insns */
+  int branch_deviation; /* deviation of previous branches from 3rd slots  */
+  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
+  struct bundle_state *next;  /* next state with the same insn_num  */
+  struct bundle_state *originator; /* originator (previous insn state)  */
+  /* All bundle states are in the following chain.  */
+  struct bundle_state *allocated_states_chain;
+  /* The DFA State after issuing the insn and the nops.  */
+  state_t dfa_state;
+};
+
+/* The following is map insn number to the corresponding bundle state.  */
+
+static struct bundle_state **index_to_bundle_states;
+
+/* The unique number of next bundle state.  */
+
+static int bundle_states_num;
+
+/* All allocated bundle states are in the following chain.  */
+
+static struct bundle_state *allocated_bundle_states_chain;
+
+/* All allocated but not used bundle states are in the following
+   chain.  */
+
+static struct bundle_state *free_bundle_state_chain;
+
+
+/* The following function returns a free bundle state.  */
+
+static struct bundle_state *
+get_free_bundle_state (void)
+{
+  struct bundle_state *result;
+
+  if (free_bundle_state_chain != NULL)
+    {
+      result = free_bundle_state_chain;
+      free_bundle_state_chain = result->next;
+    }
+  else
+    {
+      result = XNEW (struct bundle_state);
+      result->dfa_state = xmalloc (dfa_state_size);
+      result->allocated_states_chain = allocated_bundle_states_chain;
+      allocated_bundle_states_chain = result;
+    }
+  result->unique_num = bundle_states_num++;
+  return result;
+
+}
+
+/* The following function frees given bundle state.  */
+
+static void
+free_bundle_state (struct bundle_state *state)
+{
+  state->next = free_bundle_state_chain;
+  free_bundle_state_chain = state;
+}
+
+/* Start work with abstract data `bundle states'.  */
+
+static void
+initiate_bundle_states (void)
+{
+  bundle_states_num = 0;
+  free_bundle_state_chain = NULL;
+  allocated_bundle_states_chain = NULL;
+}
+
+/* Finish work with abstract data `bundle states'.  */
+
+static void
+finish_bundle_states (void)
+{
+  struct bundle_state *curr_state, *next_state;
+
+  for (curr_state = allocated_bundle_states_chain;
+       curr_state != NULL;
+       curr_state = next_state)
+    {
+      next_state = curr_state->allocated_states_chain;
+      free (curr_state->dfa_state);
+      free (curr_state);
+    }
+}
+
+/* Hashtable helpers.  */
+
+struct bundle_state_hasher : typed_noop_remove <bundle_state>
+{
+  typedef bundle_state value_type;
+  typedef bundle_state compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
+
+/* The function returns hash of BUNDLE_STATE.  */
+
+inline hashval_t
+bundle_state_hasher::hash (const value_type *state)
+{
+  unsigned result, i;
+
+  for (result = i = 0; i < dfa_state_size; i++)
+    result += (((unsigned char *) state->dfa_state) [i]
+	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
+  return result + state->insn_num;
+}
+
+/* The function returns nonzero if the bundle state keys are equal.  */
+
+inline bool
+bundle_state_hasher::equal (const value_type *state1,
+			    const compare_type *state2)
+{
+  return (state1->insn_num == state2->insn_num
+	  && memcmp (state1->dfa_state, state2->dfa_state,
+		     dfa_state_size) == 0);
+}
+
+/* Hash table of the bundle states.  The key is dfa_state and insn_num
+   of the bundle states.  */
+
+static hash_table <bundle_state_hasher> bundle_state_table;
+
+/* The function inserts the BUNDLE_STATE into the hash table.  The
+   function returns nonzero if the bundle has been inserted into the
+   table.  The table contains the best bundle state with given key.  */
+
+static int
+insert_bundle_state (struct bundle_state *bundle_state)
+{
+  struct bundle_state **entry_ptr;
+
+  entry_ptr = bundle_state_table.find_slot (bundle_state, INSERT);
+  if (*entry_ptr == NULL)
+    {
+      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
+      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
+      *entry_ptr = bundle_state;
+      return TRUE;
+    }
+  else if (bundle_state->cost < (*entry_ptr)->cost
+	   || (bundle_state->cost == (*entry_ptr)->cost
+	       && ((*entry_ptr)->accumulated_insns_num
+		   > bundle_state->accumulated_insns_num
+		   || ((*entry_ptr)->accumulated_insns_num
+		       == bundle_state->accumulated_insns_num
+		       && ((*entry_ptr)->branch_deviation
+			   > bundle_state->branch_deviation
+			   || ((*entry_ptr)->branch_deviation
+			       == bundle_state->branch_deviation
+			       && (*entry_ptr)->middle_bundle_stops
+			       > bundle_state->middle_bundle_stops))))))
+
+    {
+      struct bundle_state temp;
+
+      temp = **entry_ptr;
+      **entry_ptr = *bundle_state;
+      (*entry_ptr)->next = temp.next;
+      *bundle_state = temp;
+    }
+  return FALSE;
+}
+
+/* Start work with the hash table.  */
+
+static void
+initiate_bundle_state_table (void)
+{
+  bundle_state_table.create (50);
+}
+
+/* Finish work with the hash table.  */
+
+static void
+finish_bundle_state_table (void)
+{
+  bundle_state_table.dispose ();
+}
+
+
+
+/* The following variable is a insn `nop' used to check bundle states
+   with different number of inserted nops.  */
+
+static rtx ia64_nop;
+
+/* The following function tries to issue NOPS_NUM nops for the current
+   state without advancing processor cycle.  If it failed, the
+   function returns FALSE and frees the current state.  */
+
+static int
+try_issue_nops (struct bundle_state *curr_state, int nops_num)
+{
+  int i;
+
+  for (i = 0; i < nops_num; i++)
+    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
+      {
+	free_bundle_state (curr_state);
+	return FALSE;
+      }
+  return TRUE;
+}
+
+/* The following function tries to issue INSN for the current
+   state without advancing processor cycle.  If it failed, the
+   function returns FALSE and frees the current state.  */
+
+static int
+try_issue_insn (struct bundle_state *curr_state, rtx insn)
+{
+  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
+    {
+      free_bundle_state (curr_state);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
+   starting with ORIGINATOR without advancing processor cycle.  If
+   TRY_BUNDLE_END_P is TRUE, the function also/only (if
+   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
+   If it was successful, the function creates new bundle state and
+   insert into the hash table and into `index_to_bundle_states'.  */
+
+static void
+issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
+		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
+{
+  struct bundle_state *curr_state;
+
+  curr_state = get_free_bundle_state ();
+  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
+  curr_state->insn = insn;
+  curr_state->insn_num = originator->insn_num + 1;
+  curr_state->cost = originator->cost;
+  curr_state->originator = originator;
+  curr_state->before_nops_num = before_nops_num;
+  curr_state->after_nops_num = 0;
+  curr_state->accumulated_insns_num
+    = originator->accumulated_insns_num + before_nops_num;
+  curr_state->branch_deviation = originator->branch_deviation;
+  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
+  gcc_assert (insn);
+  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
+    {
+      gcc_assert (GET_MODE (insn) != TImode);
+      if (!try_issue_nops (curr_state, before_nops_num))
+	return;
+      if (!try_issue_insn (curr_state, insn))
+	return;
+      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
+      if (curr_state->accumulated_insns_num % 3 != 0)
+	curr_state->middle_bundle_stops++;
+      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
+	  && curr_state->accumulated_insns_num % 3 != 0)
+	{
+	  free_bundle_state (curr_state);
+	  return;
+	}
+    }
+  else if (GET_MODE (insn) != TImode)
+    {
+      if (!try_issue_nops (curr_state, before_nops_num))
+	return;
+      if (!try_issue_insn (curr_state, insn))
+	return;
+      curr_state->accumulated_insns_num++;
+      gcc_assert (!unknown_for_bundling_p (insn));
+
+      if (ia64_safe_type (insn) == TYPE_L)
+	curr_state->accumulated_insns_num++;
+    }
+  else
+    {
+      /* If this is an insn that must be first in a group, then don't allow
+	 nops to be emitted before it.  Currently, alloc is the only such
+	 supported instruction.  */
+      /* ??? The bundling automatons should handle this for us, but they do
+	 not yet have support for the first_insn attribute.  */
+      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
+	{
+	  free_bundle_state (curr_state);
+	  return;
+	}
+
+      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
+      state_transition (curr_state->dfa_state, NULL);
+      curr_state->cost++;
+      if (!try_issue_nops (curr_state, before_nops_num))
+	return;
+      if (!try_issue_insn (curr_state, insn))
+	return;
+      curr_state->accumulated_insns_num++;
+      if (unknown_for_bundling_p (insn))
+	{
+	  /* Finish bundle containing asm insn.  */
+	  curr_state->after_nops_num
+	    = 3 - curr_state->accumulated_insns_num % 3;
+	  curr_state->accumulated_insns_num
+	    += 3 - curr_state->accumulated_insns_num % 3;
+	}
+      else if (ia64_safe_type (insn) == TYPE_L)
+	curr_state->accumulated_insns_num++;
+    }
+  if (ia64_safe_type (insn) == TYPE_B)
+    curr_state->branch_deviation
+      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
+  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
+    {
+      if (!only_bundle_end_p && insert_bundle_state (curr_state))
+	{
+	  state_t dfa_state;
+	  struct bundle_state *curr_state1;
+	  struct bundle_state *allocated_states_chain;
+
+	  curr_state1 = get_free_bundle_state ();
+	  dfa_state = curr_state1->dfa_state;
+	  allocated_states_chain = curr_state1->allocated_states_chain;
+	  *curr_state1 = *curr_state;
+	  curr_state1->dfa_state = dfa_state;
+	  curr_state1->allocated_states_chain = allocated_states_chain;
+	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
+		  dfa_state_size);
+	  curr_state = curr_state1;
+	}
+      if (!try_issue_nops (curr_state,
+			   3 - curr_state->accumulated_insns_num % 3))
+	return;
+      curr_state->after_nops_num
+	= 3 - curr_state->accumulated_insns_num % 3;
+      curr_state->accumulated_insns_num
+	+= 3 - curr_state->accumulated_insns_num % 3;
+    }
+  if (!insert_bundle_state (curr_state))
+    free_bundle_state (curr_state);
+  return;
+}
+
+/* The following function returns position in the two window bundle
+   for given STATE.  */
+
+static int
+get_max_pos (state_t state)
+{
+  if (cpu_unit_reservation_p (state, pos_6))
+    return 6;
+  else if (cpu_unit_reservation_p (state, pos_5))
+    return 5;
+  else if (cpu_unit_reservation_p (state, pos_4))
+    return 4;
+  else if (cpu_unit_reservation_p (state, pos_3))
+    return 3;
+  else if (cpu_unit_reservation_p (state, pos_2))
+    return 2;
+  else if (cpu_unit_reservation_p (state, pos_1))
+    return 1;
+  else
+    return 0;
+}
+
+/* The function returns code of a possible template for given position
+   and state.  The function should be called only with 2 values of
+   position equal to 3 or 6.  We avoid generating F NOPs by putting
+   templates containing F insns at the end of the template search
+   because undocumented anomaly in McKinley derived cores which can
+   cause stalls if an F-unit insn (including a NOP) is issued within a
+   six-cycle window after reading certain application registers (such
+   as ar.bsp).  Furthermore, power-considerations also argue against
+   the use of F-unit instructions unless they're really needed.  */
+
+static int
+get_template (state_t state, int pos)
+{
+  switch (pos)
+    {
+    case 3:
+      if (cpu_unit_reservation_p (state, _0mmi_))
+	return 1;
+      else if (cpu_unit_reservation_p (state, _0mii_))
+	return 0;
+      else if (cpu_unit_reservation_p (state, _0mmb_))
+	return 7;
+      else if (cpu_unit_reservation_p (state, _0mib_))
+	return 6;
+      else if (cpu_unit_reservation_p (state, _0mbb_))
+	return 5;
+      else if (cpu_unit_reservation_p (state, _0bbb_))
+	return 4;
+      else if (cpu_unit_reservation_p (state, _0mmf_))
+	return 3;
+      else if (cpu_unit_reservation_p (state, _0mfi_))
+	return 2;
+      else if (cpu_unit_reservation_p (state, _0mfb_))
+	return 8;
+      else if (cpu_unit_reservation_p (state, _0mlx_))
+	return 9;
+      else
+	gcc_unreachable ();
+    case 6:
+      if (cpu_unit_reservation_p (state, _1mmi_))
+	return 1;
+      else if (cpu_unit_reservation_p (state, _1mii_))
+	return 0;
+      else if (cpu_unit_reservation_p (state, _1mmb_))
+	return 7;
+      else if (cpu_unit_reservation_p (state, _1mib_))
+	return 6;
+      else if (cpu_unit_reservation_p (state, _1mbb_))
+	return 5;
+      else if (cpu_unit_reservation_p (state, _1bbb_))
+	return 4;
+      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
+	return 3;
+      else if (cpu_unit_reservation_p (state, _1mfi_))
+	return 2;
+      else if (cpu_unit_reservation_p (state, _1mfb_))
+	return 8;
+      else if (cpu_unit_reservation_p (state, _1mlx_))
+	return 9;
+      else
+	gcc_unreachable ();
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* True when INSN is important for bundling.  */
+
+static bool
+important_for_bundling_p (rtx insn)
+{
+  return (INSN_P (insn)
+	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER);
+}
+
+/* The following function returns an insn important for insn bundling
+   followed by INSN and before TAIL.  */
+
+static rtx
+get_next_important_insn (rtx insn, rtx tail)
+{
+  for (; insn && insn != tail; insn = NEXT_INSN (insn))
+    if (important_for_bundling_p (insn))
+      return insn;
+  return NULL_RTX;
+}
+
+/* True when INSN is unknown, but important, for bundling.  */
+
+static bool
+unknown_for_bundling_p (rtx insn)
+{
+  return (INSN_P (insn)
+	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER);
+}
+
+/* Add a bundle selector TEMPLATE0 before INSN.  */
+
+static void
+ia64_add_bundle_selector_before (int template0, rtx insn)
+{
+  rtx b = gen_bundle_selector (GEN_INT (template0));
+
+  ia64_emit_insn_before (b, insn);
+#if NR_BUNDLES == 10
+  if ((template0 == 4 || template0 == 5)
+      && ia64_except_unwind_info (&global_options) == UI_TARGET)
+    {
+      int i;
+      rtx note = NULL_RTX;
+
+      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
+	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
+	 to following nops, as br.call sets rp to the address of following
+	 bundle and therefore an EH region end must be on a bundle
+	 boundary.  */
+      insn = PREV_INSN (insn);
+      for (i = 0; i < 3; i++)
+	{
+	  do
+	    insn = next_active_insn (insn);
+	  while (NONJUMP_INSN_P (insn)
+		 && get_attr_empty (insn) == EMPTY_YES);
+	  if (CALL_P (insn))
+	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
+	  else if (note)
+	    {
+	      int code;
+
+	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
+			  || code == CODE_FOR_nop_b);
+	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
+		note = NULL_RTX;
+	      else
+		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
+	    }
+	}
+    }
+#endif
+}
+
+/* The following function does insn bundling.  Bundling means
+   inserting templates and nop insns to fit insn groups into permitted
+   templates.  Instruction scheduling uses NDFA (non-deterministic
+   finite automata) encoding informations about the templates and the
+   inserted nops.  Nondeterminism of the automata permits follows
+   all possible insn sequences very fast.
+
+   Unfortunately it is not possible to get information about inserting
+   nop insns and used templates from the automata states.  The
+   automata only says that we can issue an insn possibly inserting
+   some nops before it and using some template.  Therefore insn
+   bundling in this function is implemented by using DFA
+   (deterministic finite automata).  We follow all possible insn
+   sequences by inserting 0-2 nops (that is what the NDFA describe for
+   insn scheduling) before/after each insn being bundled.  We know the
+   start of simulated processor cycle from insn scheduling (insn
+   starting a new cycle has TImode).
+
+   Simple implementation of insn bundling would create enormous
+   number of possible insn sequences satisfying information about new
+   cycle ticks taken from the insn scheduling.  To make the algorithm
+   practical we use dynamic programming.  Each decision (about
+   inserting nops and implicitly about previous decisions) is described
+   by structure bundle_state (see above).  If we generate the same
+   bundle state (key is automaton state after issuing the insns and
+   nops for it), we reuse already generated one.  As consequence we
+   reject some decisions which cannot improve the solution and
+   reduce memory for the algorithm.
+
+   When we reach the end of EBB (extended basic block), we choose the
+   best sequence and then, moving back in EBB, insert templates for
+   the best alternative.  The templates are taken from querying
+   automaton state for each insn in chosen bundle states.
+
+   So the algorithm makes two (forward and backward) passes through
+   EBB.  */
+
+static void
+bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
+{
+  struct bundle_state *curr_state, *next_state, *best_state;
+  rtx insn, next_insn;
+  int insn_num;
+  int i, bundle_end_p, only_bundle_end_p, asm_p;
+  int pos = 0, max_pos, template0, template1;
+  rtx b;
+  rtx nop;
+  enum attr_type type;
+
+  insn_num = 0;
+  /* Count insns in the EBB.  */
+  for (insn = NEXT_INSN (prev_head_insn);
+       insn && insn != tail;
+       insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      insn_num++;
+  if (insn_num == 0)
+    return;
+  bundling_p = 1;
+  dfa_clean_insn_cache ();
+  initiate_bundle_state_table ();
+  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
+  /* First (forward) pass -- generation of bundle states.  */
+  curr_state = get_free_bundle_state ();
+  curr_state->insn = NULL;
+  curr_state->before_nops_num = 0;
+  curr_state->after_nops_num = 0;
+  curr_state->insn_num = 0;
+  curr_state->cost = 0;
+  curr_state->accumulated_insns_num = 0;
+  curr_state->branch_deviation = 0;
+  curr_state->middle_bundle_stops = 0;
+  curr_state->next = NULL;
+  curr_state->originator = NULL;
+  state_reset (curr_state->dfa_state);
+  index_to_bundle_states [0] = curr_state;
+  insn_num = 0;
+  /* Shift cycle mark if it is put on insn which could be ignored.  */
+  for (insn = NEXT_INSN (prev_head_insn);
+       insn != tail;
+       insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& !important_for_bundling_p (insn)
+	&& GET_MODE (insn) == TImode)
+      {
+	PUT_MODE (insn, VOIDmode);
+	for (next_insn = NEXT_INSN (insn);
+	     next_insn != tail;
+	     next_insn = NEXT_INSN (next_insn))
+	  if (important_for_bundling_p (next_insn)
+	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
+	    {
+	      PUT_MODE (next_insn, TImode);
+	      break;
+	    }
+      }
+  /* Forward pass: generation of bundle states.  */
+  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
+       insn != NULL_RTX;
+       insn = next_insn)
+    {
+      gcc_assert (important_for_bundling_p (insn));
+      type = ia64_safe_type (insn);
+      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
+      insn_num++;
+      index_to_bundle_states [insn_num] = NULL;
+      for (curr_state = index_to_bundle_states [insn_num - 1];
+	   curr_state != NULL;
+	   curr_state = next_state)
+	{
+	  pos = curr_state->accumulated_insns_num % 3;
+	  next_state = curr_state->next;
+	  /* We must fill up the current bundle in order to start a
+	     subsequent asm insn in a new bundle.  Asm insn is always
+	     placed in a separate bundle.  */
+	  only_bundle_end_p
+	    = (next_insn != NULL_RTX
+	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
+	       && unknown_for_bundling_p (next_insn));
+	  /* We may fill up the current bundle if it is the cycle end
+	     without a group barrier.  */
+	  bundle_end_p
+	    = (only_bundle_end_p || next_insn == NULL_RTX
+	       || (GET_MODE (next_insn) == TImode
+		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
+	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
+	      || type == TYPE_S)
+	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
+				 only_bundle_end_p);
+	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
+			       only_bundle_end_p);
+	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
+			       only_bundle_end_p);
+	}
+      gcc_assert (index_to_bundle_states [insn_num]);
+      for (curr_state = index_to_bundle_states [insn_num];
+	   curr_state != NULL;
+	   curr_state = curr_state->next)
+	if (verbose >= 2 && dump)
+	  {
+	    /* This structure is taken from generated code of the
+	       pipeline hazard recognizer (see file insn-attrtab.c).
+	       Please don't forget to change the structure if a new
+	       automaton is added to .md file.  */
+	    struct DFA_chip
+	    {
+	      unsigned short one_automaton_state;
+	      unsigned short oneb_automaton_state;
+	      unsigned short two_automaton_state;
+	      unsigned short twob_automaton_state;
+	    };
+
+	    fprintf
+	      (dump,
+	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
+	       curr_state->unique_num,
+	       (curr_state->originator == NULL
+		? -1 : curr_state->originator->unique_num),
+	       curr_state->cost,
+	       curr_state->before_nops_num, curr_state->after_nops_num,
+	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
+	       curr_state->middle_bundle_stops,
+	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
+	       INSN_UID (insn));
+	  }
+    }
+  
+  /* We should find a solution because the 2nd insn scheduling has
+     found one.  */
+  gcc_assert (index_to_bundle_states [insn_num]);
+  /* Find a state corresponding to the best insn sequence.  */
+  best_state = NULL;
+  for (curr_state = index_to_bundle_states [insn_num];
+       curr_state != NULL;
+       curr_state = curr_state->next)
+    /* We are just looking at the states with fully filled up last
+       bundle.  The first we prefer insn sequences with minimal cost
+       then with minimal inserted nops and finally with branch insns
+       placed in the 3rd slots.  */
+    if (curr_state->accumulated_insns_num % 3 == 0
+	&& (best_state == NULL || best_state->cost > curr_state->cost
+	    || (best_state->cost == curr_state->cost
+		&& (curr_state->accumulated_insns_num
+		    < best_state->accumulated_insns_num
+		    || (curr_state->accumulated_insns_num
+			== best_state->accumulated_insns_num
+			&& (curr_state->branch_deviation
+			    < best_state->branch_deviation
+			    || (curr_state->branch_deviation
+				== best_state->branch_deviation
+				&& curr_state->middle_bundle_stops
+				< best_state->middle_bundle_stops)))))))
+      best_state = curr_state;
+  /* Second (backward) pass: adding nops and templates.  */
+  gcc_assert (best_state);
+  insn_num = best_state->before_nops_num;
+  template0 = template1 = -1;
+  for (curr_state = best_state;
+       curr_state->originator != NULL;
+       curr_state = curr_state->originator)
+    {
+      insn = curr_state->insn;
+      asm_p = unknown_for_bundling_p (insn);
+      insn_num++;
+      if (verbose >= 2 && dump)
+	{
+	  struct DFA_chip
+	  {
+	    unsigned short one_automaton_state;
+	    unsigned short oneb_automaton_state;
+	    unsigned short two_automaton_state;
+	    unsigned short twob_automaton_state;
+	  };
+
+	  fprintf
+	    (dump,
+	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
+	     curr_state->unique_num,
+	     (curr_state->originator == NULL
+	      ? -1 : curr_state->originator->unique_num),
+	     curr_state->cost,
+	     curr_state->before_nops_num, curr_state->after_nops_num,
+	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
+	     curr_state->middle_bundle_stops,
+	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
+	     INSN_UID (insn));
+	}
+      /* Find the position in the current bundle window.  The window can
+	 contain at most two bundles.  Two bundle window means that
+	 the processor will make two bundle rotation.  */
+      max_pos = get_max_pos (curr_state->dfa_state);
+      if (max_pos == 6
+	  /* The following (negative template number) means that the
+	     processor did one bundle rotation.  */
+	  || (max_pos == 3 && template0 < 0))
+	{
+	  /* We are at the end of the window -- find template(s) for
+	     its bundle(s).  */
+	  pos = max_pos;
+	  if (max_pos == 3)
+	    template0 = get_template (curr_state->dfa_state, 3);
+	  else
+	    {
+	      template1 = get_template (curr_state->dfa_state, 3);
+	      template0 = get_template (curr_state->dfa_state, 6);
+	    }
+	}
+      if (max_pos > 3 && template1 < 0)
+	/* It may happen when we have the stop inside a bundle.  */
+	{
+	  gcc_assert (pos <= 3);
+	  template1 = get_template (curr_state->dfa_state, 3);
+	  pos += 3;
+	}
+      if (!asm_p)
+	/* Emit nops after the current insn.  */
+	for (i = 0; i < curr_state->after_nops_num; i++)
+	  {
+	    nop = gen_nop ();
+	    emit_insn_after (nop, insn);
+	    pos--;
+	    gcc_assert (pos >= 0);
+	    if (pos % 3 == 0)
+	      {
+		/* We are at the start of a bundle: emit the template
+		   (it should be defined).  */
+		gcc_assert (template0 >= 0);
+		ia64_add_bundle_selector_before (template0, nop);
+		/* If we have two bundle window, we make one bundle
+		   rotation.  Otherwise template0 will be undefined
+		   (negative value).  */
+		template0 = template1;
+		template1 = -1;
+	      }
+	  }
+      /* Move the position backward in the window.  Group barrier has
+	 no slot.  Asm insn takes all bundle.  */
+      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+	  && !unknown_for_bundling_p (insn))
+	pos--;
+      /* Long insn takes 2 slots.  */
+      if (ia64_safe_type (insn) == TYPE_L)
+	pos--;
+      gcc_assert (pos >= 0);
+      if (pos % 3 == 0
+	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+	  && !unknown_for_bundling_p (insn))
+	{
+	  /* The current insn is at the bundle start: emit the
+	     template.  */
+	  gcc_assert (template0 >= 0);
+	  ia64_add_bundle_selector_before (template0, insn);
+	  b = PREV_INSN (insn);
+	  insn = b;
+	  /* See comment above in analogous place for emitting nops
+	     after the insn.  */
+	  template0 = template1;
+	  template1 = -1;
+	}
+      /* Emit nops after the current insn.  */
+      for (i = 0; i < curr_state->before_nops_num; i++)
+	{
+	  nop = gen_nop ();
+	  ia64_emit_insn_before (nop, insn);
+	  nop = PREV_INSN (insn);
+	  insn = nop;
+	  pos--;
+	  gcc_assert (pos >= 0);
+	  if (pos % 3 == 0)
+	    {
+	      /* See comment above in analogous place for emitting nops
+		 after the insn.  */
+	      gcc_assert (template0 >= 0);
+	      ia64_add_bundle_selector_before (template0, insn);
+	      b = PREV_INSN (insn);
+	      insn = b;
+	      template0 = template1;
+	      template1 = -1;
+	    }
+	}
+    }
+
+#ifdef ENABLE_CHECKING
+  {
+    /* Assert right calculation of middle_bundle_stops.  */
+    int num = best_state->middle_bundle_stops;
+    bool start_bundle = true, end_bundle = false;
+
+    for (insn = NEXT_INSN (prev_head_insn);
+	 insn && insn != tail;
+	 insn = NEXT_INSN (insn))
+      {
+	if (!INSN_P (insn))
+	  continue;
+	if (recog_memoized (insn) == CODE_FOR_bundle_selector)
+	  start_bundle = true;
+	else
+	  {
+	    rtx next_insn;
+
+	    for (next_insn = NEXT_INSN (insn);
+		 next_insn && next_insn != tail;
+		 next_insn = NEXT_INSN (next_insn))
+	      if (INSN_P (next_insn)
+		  && (ia64_safe_itanium_class (next_insn)
+		      != ITANIUM_CLASS_IGNORE
+		      || recog_memoized (next_insn)
+		      == CODE_FOR_bundle_selector)
+		  && GET_CODE (PATTERN (next_insn)) != USE
+		  && GET_CODE (PATTERN (next_insn)) != CLOBBER)
+		break;
+
+	    end_bundle = next_insn == NULL_RTX
+	     || next_insn == tail
+	     || (INSN_P (next_insn)
+		 && recog_memoized (next_insn)
+		 == CODE_FOR_bundle_selector);
+	    if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
+		&& !start_bundle && !end_bundle
+		&& next_insn
+		&& !unknown_for_bundling_p (next_insn))
+	      num--;
+
+	    start_bundle = false;
+	  }
+      }
+
+    gcc_assert (num == 0);
+  }
+#endif
+
+  free (index_to_bundle_states);
+  finish_bundle_state_table ();
+  bundling_p = 0;
+  dfa_clean_insn_cache ();
+}
+
+/* The following function is called at the end of scheduling BB or
+   EBB.  After reload, it inserts stop bits and does insn bundling.  */
+
+static void
+ia64_sched_finish (FILE *dump, int sched_verbose)
+{
+  if (sched_verbose)
+    fprintf (dump, "// Finishing schedule.\n");
+  if (!reload_completed)
+    return;
+  if (reload_completed)
+    {
+      final_emit_insn_group_barriers (dump);
+      bundling (dump, sched_verbose, current_sched_info->prev_head,
+		current_sched_info->next_tail);
+      if (sched_verbose && dump)
+	fprintf (dump, "//    finishing %d-%d\n",
+		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
+		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
+
+      return;
+    }
+}
+
+/* The following function inserts stop bits in scheduled BB or EBB.  */
+
+static void
+final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
+{
+  rtx insn;
+  int need_barrier_p = 0;
+  int seen_good_insn = 0;
+
+  init_insn_group_barriers ();
+
+  for (insn = NEXT_INSN (current_sched_info->prev_head);
+       insn != current_sched_info->next_tail;
+       insn = NEXT_INSN (insn))
+    {
+      if (BARRIER_P (insn))
+	{
+	  rtx last = prev_active_insn (insn);
+
+	  if (! last)
+	    continue;
+	  if (JUMP_TABLE_DATA_P (last))
+	    last = prev_active_insn (last);
+	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
+
+	  init_insn_group_barriers ();
+	  seen_good_insn = 0;
+	  need_barrier_p = 0;
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
+	    {
+	      init_insn_group_barriers ();
+	      seen_good_insn = 0;
+	      need_barrier_p = 0;
+	    }
+	  else if (need_barrier_p || group_barrier_needed (insn)
+		   || (mflag_sched_stop_bits_after_every_cycle
+		       && GET_MODE (insn) == TImode
+		       && seen_good_insn))
+	    {
+	      if (TARGET_EARLY_STOP_BITS)
+		{
+		  rtx last;
+
+		  for (last = insn;
+		       last != current_sched_info->prev_head;
+		       last = PREV_INSN (last))
+		    if (INSN_P (last) && GET_MODE (last) == TImode
+			&& stops_p [INSN_UID (last)])
+		      break;
+		  if (last == current_sched_info->prev_head)
+		    last = insn;
+		  last = prev_active_insn (last);
+		  if (last
+		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
+		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
+				     last);
+		  init_insn_group_barriers ();
+		  for (last = NEXT_INSN (last);
+		       last != insn;
+		       last = NEXT_INSN (last))
+		    if (INSN_P (last))
+		      {
+			group_barrier_needed (last);
+			if (recog_memoized (last) >= 0
+			    && important_for_bundling_p (last))
+			  seen_good_insn = 1;
+		      }
+		}
+	      else
+		{
+		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+				    insn);
+		  init_insn_group_barriers ();
+		  seen_good_insn = 0;
+		}
+	      group_barrier_needed (insn);
+	      if (recog_memoized (insn) >= 0
+		  && important_for_bundling_p (insn))
+		seen_good_insn = 1;
+	    }
+	  else if (recog_memoized (insn) >= 0
+		   && important_for_bundling_p (insn))
+	    seen_good_insn = 1;
+	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
+	}
+    }
+}
+
+
+
+/* If the following function returns TRUE, we will use the DFA
+   insn scheduler.  */
+
+static int
+ia64_first_cycle_multipass_dfa_lookahead (void)
+{
+  return (reload_completed ? 6 : 4);
+}
+
+/* The following function initiates variable `dfa_pre_cycle_insn'.  */
+
+static void
+ia64_init_dfa_pre_cycle_insn (void)
+{
+  if (temp_dfa_state == NULL)
+    {
+      dfa_state_size = state_size ();
+      temp_dfa_state = xmalloc (dfa_state_size);
+      prev_cycle_state = xmalloc (dfa_state_size);
+    }
+  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
+  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
+  recog_memoized (dfa_pre_cycle_insn);
+  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
+  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
+  recog_memoized (dfa_stop_insn);
+}
+
+/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
+   used by the DFA insn scheduler.  */
+
+static rtx
+ia64_dfa_pre_cycle_insn (void)
+{
+  return dfa_pre_cycle_insn;
+}
+
+/* The following function returns TRUE if PRODUCER (of type ilog or
+   ld) produces address for CONSUMER (of type st or stf). */
+
+int
+ia64_st_address_bypass_p (rtx producer, rtx consumer)
+{
+  rtx dest, reg, mem;
+
+  gcc_assert (producer && consumer);
+  dest = ia64_single_set (producer);
+  gcc_assert (dest);
+  reg = SET_DEST (dest);
+  gcc_assert (reg);
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+  gcc_assert (GET_CODE (reg) == REG);
+  
+  dest = ia64_single_set (consumer);
+  gcc_assert (dest);
+  mem = SET_DEST (dest);
+  gcc_assert (mem && GET_CODE (mem) == MEM);
+  return reg_mentioned_p (reg, mem);
+}
+
+/* The following function returns TRUE if PRODUCER (of type ilog or
+   ld) produces address for CONSUMER (of type ld or fld). */
+
+int
+ia64_ld_address_bypass_p (rtx producer, rtx consumer)
+{
+  rtx dest, src, reg, mem;
+
+  gcc_assert (producer && consumer);
+  dest = ia64_single_set (producer);
+  gcc_assert (dest);
+  reg = SET_DEST (dest);
+  gcc_assert (reg);
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+  gcc_assert (GET_CODE (reg) == REG);
+  
+  src = ia64_single_set (consumer);
+  gcc_assert (src);
+  mem = SET_SRC (src);
+  gcc_assert (mem);
+ 
+  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
+    mem = XVECEXP (mem, 0, 0);
+  else if (GET_CODE (mem) == IF_THEN_ELSE)
+    /* ??? Is this bypass necessary for ld.c?  */
+    {
+      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
+      mem = XEXP (mem, 1);
+    }
+     
+  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  if (GET_CODE (mem) == UNSPEC)
+    {
+      int c = XINT (mem, 1);
+
+      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
+		  || c == UNSPEC_LDSA);
+      mem = XVECEXP (mem, 0, 0);
+    }
+
+  /* Note that LO_SUM is used for GOT loads.  */
+  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
+
+  return reg_mentioned_p (reg, mem);
+}
+
+/* The following function returns TRUE if INSN produces address for a
+   load/store insn.  We will place such insns into M slot because it
+   decreases its latency time.  */
+
+int
+ia64_produce_address_p (rtx insn)
+{
+  return insn->call;
+}
+
+
+/* Emit pseudo-ops for the assembler to describe predicate relations.
+   At present this assumes that we only consider predicate pairs to
+   be mutex, and that the assembler can deduce proper values from
+   straight-line code.  */
+
+static void
+emit_predicate_relation_info (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
+    {
+      int r;
+      rtx head = BB_HEAD (bb);
+
+      /* We only need such notes at code labels.  */
+      if (! LABEL_P (head))
+	continue;
+      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
+	head = NEXT_INSN (head);
+
+      /* Skip p0, which may be thought to be live due to (reg:DI p0)
+	 grabbing the entire block of predicate registers.  */
+      for (r = PR_REG (2); r < PR_REG (64); r += 2)
+	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
+	  {
+	    rtx p = gen_rtx_REG (BImode, r);
+	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
+	    if (head == BB_END (bb))
+	      BB_END (bb) = n;
+	    head = n;
+	  }
+    }
+
+  /* Look for conditional calls that do not return, and protect predicate
+     relations around them.  Otherwise the assembler will assume the call
+     returns, and complain about uses of call-clobbered predicates after
+     the call.  */
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
+    {
+      rtx insn = BB_HEAD (bb);
+
+      while (1)
+	{
+	  if (CALL_P (insn)
+	      && GET_CODE (PATTERN (insn)) == COND_EXEC
+	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
+	    {
+	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
+	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
+	      if (BB_HEAD (bb) == insn)
+		BB_HEAD (bb) = b;
+	      if (BB_END (bb) == insn)
+		BB_END (bb) = a;
+	    }
+
+	  if (insn == BB_END (bb))
+	    break;
+	  insn = NEXT_INSN (insn);
+	}
+    }
+}
+
+/* Perform machine dependent operations on the rtl chain INSNS.  */
+
+static void
+ia64_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  /* If optimizing, we'll have split before scheduling.  */
+  if (optimize == 0)
+    split_all_insns ();
+
+  if (optimize && flag_schedule_insns_after_reload
+      && dbg_cnt (ia64_sched2))
+    {
+      basic_block bb;
+      timevar_push (TV_SCHED2);
+      ia64_final_schedule = 1;
+
+      /* We can't let modulo-sched prevent us from scheduling any bbs,
+	 since we need the final schedule to produce bundle information.  */
+      FOR_EACH_BB_FN (bb, cfun)
+	bb->flags &= ~BB_DISABLE_SCHEDULE;
+
+      initiate_bundle_states ();
+      ia64_nop = make_insn_raw (gen_nop ());
+      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
+      recog_memoized (ia64_nop);
+      clocks_length = get_max_uid () + 1;
+      stops_p = XCNEWVEC (char, clocks_length);
+
+      if (ia64_tune == PROCESSOR_ITANIUM2)
+	{
+	  pos_1 = get_cpu_unit_code ("2_1");
+	  pos_2 = get_cpu_unit_code ("2_2");
+	  pos_3 = get_cpu_unit_code ("2_3");
+	  pos_4 = get_cpu_unit_code ("2_4");
+	  pos_5 = get_cpu_unit_code ("2_5");
+	  pos_6 = get_cpu_unit_code ("2_6");
+	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
+	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
+	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
+	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
+	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
+	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
+	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
+	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
+	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
+	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
+	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
+	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
+	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
+	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
+	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
+	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
+	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
+	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
+	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
+	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
+	}
+      else
+	{
+	  pos_1 = get_cpu_unit_code ("1_1");
+	  pos_2 = get_cpu_unit_code ("1_2");
+	  pos_3 = get_cpu_unit_code ("1_3");
+	  pos_4 = get_cpu_unit_code ("1_4");
+	  pos_5 = get_cpu_unit_code ("1_5");
+	  pos_6 = get_cpu_unit_code ("1_6");
+	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
+	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
+	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
+	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
+	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
+	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
+	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
+	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
+	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
+	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
+	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
+	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
+	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
+	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
+	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
+	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
+	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
+	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
+	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
+	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
+	}
+
+      if (flag_selective_scheduling2
+	  && !maybe_skip_selective_scheduling ())
+        run_selective_scheduling ();
+      else
+	schedule_ebbs ();
+
+      /* Redo alignment computation, as it might gone wrong.  */
+      compute_alignments ();
+
+      /* We cannot reuse this one because it has been corrupted by the
+	 evil glat.  */
+      finish_bundle_states ();
+      free (stops_p);
+      stops_p = NULL;
+      emit_insn_group_barriers (dump_file);
+
+      ia64_final_schedule = 0;
+      timevar_pop (TV_SCHED2);
+    }
+  else
+    emit_all_insn_group_barriers (dump_file);
+
+  df_analyze ();
+ 
+  /* A call must not be the last instruction in a function, so that the
+     return address is still within the function, so that unwinding works
+     properly.  Note that IA-64 differs from dwarf2 on this point.  */
+  if (ia64_except_unwind_info (&global_options) == UI_TARGET)
+    {
+      rtx insn;
+      int saw_stop = 0;
+
+      insn = get_last_insn ();
+      if (! INSN_P (insn))
+        insn = prev_active_insn (insn);
+      if (insn)
+	{
+	  /* Skip over insns that expand to nothing.  */
+	  while (NONJUMP_INSN_P (insn)
+		 && get_attr_empty (insn) == EMPTY_YES)
+	    {
+	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
+		saw_stop = 1;
+	      insn = prev_active_insn (insn);
+	    }
+	  if (CALL_P (insn))
+	    {
+	      if (! saw_stop)
+		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
+	      emit_insn (gen_break_f ());
+	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
+	    }
+	}
+    }
+
+  emit_predicate_relation_info ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+  df_finish_pass (false);
+}
+
+/* Return true if REGNO is used by the epilogue.  */
+
+int
+ia64_epilogue_uses (int regno)
+{
+  switch (regno)
+    {
+    case R_GR (1):
+      /* With a call to a function in another module, we will write a new
+	 value to "gp".  After returning from such a call, we need to make
+	 sure the function restores the original gp-value, even if the
+	 function itself does not use the gp anymore.  */
+      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
+
+    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
+    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
+      /* For functions defined with the syscall_linkage attribute, all
+	 input registers are marked as live at all function exits.  This
+	 prevents the register allocator from using the input registers,
+	 which in turn makes it possible to restart a system call after
+	 an interrupt without having to save/restore the input registers.
+	 This also prevents kernel data from leaking to application code.  */
+      return lookup_attribute ("syscall_linkage",
+	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
+
+    case R_BR (0):
+      /* Conditional return patterns can't represent the use of `b0' as
+         the return address, so we force the value live this way.  */
+      return 1;
+
+    case AR_PFS_REGNUM:
+      /* Likewise for ar.pfs, which is used by br.ret.  */
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Return true if REGNO is used by the frame unwinder.  */
+
+int
+ia64_eh_uses (int regno)
+{
+  unsigned int r;
+
+  if (! reload_completed)
+    return 0;
+
+  if (regno == 0)
+    return 0;
+
+  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
+    if (regno == current_frame_info.r[r]
+       || regno == emitted_frame_related_regs[r])
+      return 1;
+
+  return 0;
+}
+
+/* Return true if this goes in small data/bss.  */
+
+/* ??? We could also support own long data here.  Generating movl/add/ld8
+   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
+   code faster because there is one less load.  This also includes incomplete
+   types which can't go in sdata/sbss.  */
+
+static bool
+ia64_in_small_data_p (const_tree exp)
+{
+  if (TARGET_NO_SDATA)
+    return false;
+
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never small data.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+
+      if (strcmp (section, ".sdata") == 0
+	  || strncmp (section, ".sdata.", 7) == 0
+	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
+	  || strcmp (section, ".sbss") == 0
+	  || strncmp (section, ".sbss.", 6) == 0
+	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= ia64_section_threshold)
+	return true;
+    }
+
+  return false;
+}
+
+/* Output assembly directives for prologue regions.  */
+
+/* The current basic block number.  */
+
+static bool last_block;
+
+/* True if we need a copy_state command at the start of the next block.  */
+
+static bool need_copy_state;
+
+#ifndef MAX_ARTIFICIAL_LABEL_BYTES
+# define MAX_ARTIFICIAL_LABEL_BYTES 30
+#endif
+
+/* The function emits unwind directives for the start of an epilogue.  */
+
+static void
+process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
+		  bool unwind, bool frame ATTRIBUTE_UNUSED)
+{
+  /* If this isn't the last block of the function, then we need to label the
+     current state, and copy it back in at the start of the next block.  */
+
+  if (!last_block)
+    {
+      if (unwind)
+	fprintf (asm_out_file, "\t.label_state %d\n",
+		 ++cfun->machine->state_num);
+      need_copy_state = true;
+    }
+
+  if (unwind)
+    fprintf (asm_out_file, "\t.restore sp\n");
+}
+
+/* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
+
+static void
+process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
+			bool unwind, bool frame)
+{
+  rtx dest = SET_DEST (pat);
+  rtx src = SET_SRC (pat);
+
+  if (dest == stack_pointer_rtx)
+    {
+      if (GET_CODE (src) == PLUS)
+	{
+	  rtx op0 = XEXP (src, 0);
+	  rtx op1 = XEXP (src, 1);
+	  
+	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
+	  
+	  if (INTVAL (op1) < 0)
+	    {
+	      gcc_assert (!frame_pointer_needed);
+	      if (unwind)
+		fprintf (asm_out_file,
+			 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
+			 -INTVAL (op1));
+	    }
+	  else
+	    process_epilogue (asm_out_file, insn, unwind, frame);
+	}
+      else
+	{
+	  gcc_assert (src == hard_frame_pointer_rtx);
+	  process_epilogue (asm_out_file, insn, unwind, frame);
+	}
+    }
+  else if (dest == hard_frame_pointer_rtx)
+    {
+      gcc_assert (src == stack_pointer_rtx);
+      gcc_assert (frame_pointer_needed);
+
+      if (unwind)
+	fprintf (asm_out_file, "\t.vframe r%d\n",
+		 ia64_dbx_register_number (REGNO (dest)));
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* This function processes a SET pattern for REG_CFA_REGISTER.  */
+
+static void
+process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
+{
+  rtx dest = SET_DEST (pat);
+  rtx src = SET_SRC (pat);
+  int dest_regno = REGNO (dest);
+  int src_regno;
+
+  if (src == pc_rtx)
+    {
+      /* Saving return address pointer.  */
+      if (unwind)
+	fprintf (asm_out_file, "\t.save rp, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      return;
+    }
+
+  src_regno = REGNO (src);
+
+  switch (src_regno)
+    {
+    case PR_REG (0):
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save pr, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    case AR_UNAT_REGNUM:
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    case AR_LC_REGNUM:
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    default:
+      /* Everything else should indicate being stored to memory.  */
+      gcc_unreachable ();
+    }
+}
+
+/* This function processes a SET pattern for REG_CFA_OFFSET.  */
+
+static void
+process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
+{
+  rtx dest = SET_DEST (pat);
+  rtx src = SET_SRC (pat);
+  int src_regno = REGNO (src);
+  const char *saveop;
+  HOST_WIDE_INT off;
+  rtx base;
+
+  gcc_assert (MEM_P (dest));
+  if (GET_CODE (XEXP (dest, 0)) == REG)
+    {
+      base = XEXP (dest, 0);
+      off = 0;
+    }
+  else
+    {
+      gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
+      base = XEXP (XEXP (dest, 0), 0);
+      off = INTVAL (XEXP (XEXP (dest, 0), 1));
+    }
+
+  if (base == hard_frame_pointer_rtx)
+    {
+      saveop = ".savepsp";
+      off = - off;
+    }
+  else
+    {
+      gcc_assert (base == stack_pointer_rtx);
+      saveop = ".savesp";
+    }
+
+  src_regno = REGNO (src);
+  switch (src_regno)
+    {
+    case BR_REG (0):
+      gcc_assert (!current_frame_info.r[reg_save_b0]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case PR_REG (0):
+      gcc_assert (!current_frame_info.r[reg_save_pr]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case AR_LC_REGNUM:
+      gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case AR_PFS_REGNUM:
+      gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case AR_UNAT_REGNUM:
+      gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case GR_REG (4):
+    case GR_REG (5):
+    case GR_REG (6):
+    case GR_REG (7):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.g 0x%x\n",
+		 1 << (src_regno - GR_REG (4)));
+      break;
+
+    case BR_REG (1):
+    case BR_REG (2):
+    case BR_REG (3):
+    case BR_REG (4):
+    case BR_REG (5):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.b 0x%x\n",
+		 1 << (src_regno - BR_REG (1)));
+      break;
+
+    case FR_REG (2):
+    case FR_REG (3):
+    case FR_REG (4):
+    case FR_REG (5):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.f 0x%x\n",
+		 1 << (src_regno - FR_REG (2)));
+      break;
+
+    case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
+    case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
+    case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
+    case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
+		 1 << (src_regno - FR_REG (12)));
+      break;
+
+    default:
+      /* ??? For some reason we mark other general registers, even those
+	 we can't represent in the unwind info.  Ignore them.  */
+      break;
+    }
+}
+
+/* This function looks at a single insn and emits any directives
+   required to unwind this insn.  */
+
+static void
+ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
+{
+  bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
+  bool frame = dwarf2out_do_frame ();
+  rtx note, pat;
+  bool handled_one;
+
+  if (!unwind && !frame)
+    return;
+
+  if (NOTE_INSN_BASIC_BLOCK_P (insn))
+    {
+      last_block = NOTE_BASIC_BLOCK (insn)->next_bb
+     == EXIT_BLOCK_PTR_FOR_FN (cfun);
+
+      /* Restore unwind state from immediately before the epilogue.  */
+      if (need_copy_state)
+	{
+	  if (unwind)
+	    {
+	      fprintf (asm_out_file, "\t.body\n");
+	      fprintf (asm_out_file, "\t.copy_state %d\n",
+		       cfun->machine->state_num);
+	    }
+	  need_copy_state = false;
+	}
+    }
+
+  if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
+    return;
+
+  /* Look for the ALLOC insn.  */
+  if (INSN_CODE (insn) == CODE_FOR_alloc)
+    {
+      rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
+      int dest_regno = REGNO (dest);
+
+      /* If this is the final destination for ar.pfs, then this must
+	 be the alloc in the prologue.  */
+      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
+	{
+	  if (unwind)
+	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
+		     ia64_dbx_register_number (dest_regno));
+	}
+      else
+	{
+	  /* This must be an alloc before a sibcall.  We must drop the
+	     old frame info.  The easiest way to drop the old frame
+	     info is to ensure we had a ".restore sp" directive
+	     followed by a new prologue.  If the procedure doesn't
+	     have a memory-stack frame, we'll issue a dummy ".restore
+	     sp" now.  */
+	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
+	    /* if haven't done process_epilogue() yet, do it now */
+	    process_epilogue (asm_out_file, insn, unwind, frame);
+	  if (unwind)
+	    fprintf (asm_out_file, "\t.prologue\n");
+	}
+      return;
+    }
+
+  handled_one = false;
+  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+    switch (REG_NOTE_KIND (note))
+      {
+      case REG_CFA_ADJUST_CFA:
+	pat = XEXP (note, 0);
+	if (pat == NULL)
+	  pat = PATTERN (insn);
+	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
+	handled_one = true;
+	break;
+
+      case REG_CFA_OFFSET:
+	pat = XEXP (note, 0);
+	if (pat == NULL)
+	  pat = PATTERN (insn);
+	process_cfa_offset (asm_out_file, pat, unwind);
+	handled_one = true;
+	break;
+
+      case REG_CFA_REGISTER:
+	pat = XEXP (note, 0);
+	if (pat == NULL)
+	  pat = PATTERN (insn);
+	process_cfa_register (asm_out_file, pat, unwind);
+	handled_one = true;
+	break;
+
+      case REG_FRAME_RELATED_EXPR:
+      case REG_CFA_DEF_CFA:
+      case REG_CFA_EXPRESSION:
+      case REG_CFA_RESTORE:
+      case REG_CFA_SET_VDRAP:
+	/* Not used in the ia64 port.  */
+	gcc_unreachable ();
+
+      default:
+	/* Not a frame-related note.  */
+	break;
+      }
+
+  /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
+     explicit action to take.  No guessing required.  */
+  gcc_assert (handled_one);
+}
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
+
+static void
+ia64_asm_emit_except_personality (rtx personality)
+{
+  fputs ("\t.personality\t", asm_out_file);
+  output_addr_const (asm_out_file, personality);
+  fputc ('\n', asm_out_file);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
+
+static void
+ia64_asm_init_sections (void)
+{
+  exception_section = get_unnamed_section (0, output_section_asm_op,
+					   "\t.handlerdata");
+}
+
+/* Implement TARGET_DEBUG_UNWIND_INFO.  */
+
+static enum unwind_info_type
+ia64_debug_unwind_info (void)
+{
+  return UI_TARGET;
+}
+
+enum ia64_builtins
+{
+  IA64_BUILTIN_BSP,
+  IA64_BUILTIN_COPYSIGNQ,
+  IA64_BUILTIN_FABSQ,
+  IA64_BUILTIN_FLUSHRS,
+  IA64_BUILTIN_INFQ,
+  IA64_BUILTIN_HUGE_VALQ,
+  IA64_BUILTIN_max
+};
+
+static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
+
+void
+ia64_init_builtins (void)
+{
+  tree fpreg_type;
+  tree float80_type;
+  tree decl;
+
+  /* The __fpreg type.  */
+  fpreg_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (fpreg_type) = 82;
+  layout_type (fpreg_type);
+  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
+
+  /* The __float80 type.  */
+  float80_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (float80_type) = 80;
+  layout_type (float80_type);
+  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
+
+  /* The __float128 type.  */
+  if (!TARGET_HPUX)
+    {
+      tree ftype;
+      tree float128_type = make_node (REAL_TYPE);
+
+      TYPE_PRECISION (float128_type) = 128;
+      layout_type (float128_type);
+      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
+
+      /* TFmode support builtins.  */
+      ftype = build_function_type_list (float128_type, NULL_TREE);
+      decl = add_builtin_function ("__builtin_infq", ftype,
+				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      ia64_builtins[IA64_BUILTIN_INFQ] = decl;
+
+      decl = add_builtin_function ("__builtin_huge_valq", ftype,
+				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
+
+      ftype = build_function_type_list (float128_type,
+					float128_type,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_fabsq", ftype,
+				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
+				   "__fabstf2", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
+
+      ftype = build_function_type_list (float128_type,
+					float128_type,
+					float128_type,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_copysignq", ftype,
+				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
+				   "__copysigntf3", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
+    }
+  else
+    /* Under HPUX, this is a synonym for "long double".  */
+    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+					       "__float128");
+
+  /* Fwrite on VMS is non-standard.  */
+#if TARGET_ABI_OPEN_VMS
+  vms_patch_builtins ();
+#endif
+
+#define def_builtin(name, type, code)					\
+  add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
+		       NULL, NULL_TREE)
+
+  decl = def_builtin ("__builtin_ia64_bsp",
+		      build_function_type_list (ptr_type_node, NULL_TREE),
+		      IA64_BUILTIN_BSP);
+  ia64_builtins[IA64_BUILTIN_BSP] = decl;
+
+  decl = def_builtin ("__builtin_ia64_flushrs",
+		      build_function_type_list (void_type_node, NULL_TREE),
+		      IA64_BUILTIN_FLUSHRS);
+  ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
+
+#undef def_builtin
+
+  if (TARGET_HPUX)
+    {
+      if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
+	set_user_assembler_name (decl, "_Isfinite");
+      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
+	set_user_assembler_name (decl, "_Isfinitef");
+      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
+	set_user_assembler_name (decl, "_Isfinitef128");
+    }
+}
+
+rtx
+ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case IA64_BUILTIN_BSP:
+      if (! target || ! register_operand (target, DImode))
+	target = gen_reg_rtx (DImode);
+      emit_insn (gen_bsp_value (target));
+#ifdef POINTERS_EXTEND_UNSIGNED
+      target = convert_memory_address (ptr_mode, target);
+#endif
+      return target;
+
+    case IA64_BUILTIN_FLUSHRS:
+      emit_insn (gen_flushrs ());
+      return const0_rtx;
+
+    case IA64_BUILTIN_INFQ:
+    case IA64_BUILTIN_HUGE_VALQ:
+      {
+        enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
+	REAL_VALUE_TYPE inf;
+	rtx tmp;
+
+	real_inf (&inf);
+	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
+
+	tmp = validize_mem (force_const_mem (target_mode, tmp));
+
+	if (target == 0)
+	  target = gen_reg_rtx (target_mode);
+
+	emit_move_insn (target, tmp);
+	return target;
+      }
+
+    case IA64_BUILTIN_FABSQ:
+    case IA64_BUILTIN_COPYSIGNQ:
+      return expand_call (exp, target, ignore);
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return NULL_RTX;
+}
+
+/* Return the ia64 builtin for CODE.  */
+
+static tree
+ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= IA64_BUILTIN_max)
+    return error_mark_node;
+
+  return ia64_builtins[code];
+}
+
+/* For the HP-UX IA64 aggregate parameters are passed stored in the
+   most significant bits of the stack slot.  */
+
+enum direction
+ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
+{
+   /* Exception to normal case for structures/unions/etc.  */
+
+   if (type && AGGREGATE_TYPE_P (type)
+       && int_size_in_bytes (type) < UNITS_PER_WORD)
+     return upward;
+
+   /* Fall back to the default.  */
+   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+/* Emit text to declare externally defined variables and functions, because
+   the Intel assembler does not support undefined externals.  */
+
+void
+ia64_asm_output_external (FILE *file, tree decl, const char *name)
+{
+  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+     set in order to avoid putting out names that are never really
+     used. */
+  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+    {
+      /* maybe_assemble_visibility will return 1 if the assembler
+	 visibility directive is output.  */
+      int need_visibility = ((*targetm.binds_local_p) (decl)
+			     && maybe_assemble_visibility (decl));
+
+      /* GNU as does not need anything here, but the HP linker does
+	 need something for external functions.  */
+      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
+	  && TREE_CODE (decl) == FUNCTION_DECL)
+	  (*targetm.asm_out.globalize_decl_name) (file, decl);
+      else if (need_visibility && !TARGET_GNU_AS)
+	(*targetm.asm_out.globalize_label) (file, name);
+    }
+}
+
+/* Set SImode div/mod functions, init_integral_libfuncs only initializes
+   modes of word_mode and larger.  Rename the TFmode libfuncs using the
+   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
+   backward compatibility. */
+
+static void
+ia64_init_libfuncs (void)
+{
+  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
+  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
+  set_optab_libfunc (smod_optab, SImode, "__modsi3");
+  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
+
+  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
+  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
+  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
+  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
+  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
+
+  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
+  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
+  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
+  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
+  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
+  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
+
+  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
+  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
+  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
+  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
+  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
+
+  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
+  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
+  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
+  /* HP-UX 11.23 libc does not have a function for unsigned
+     SImode-to-TFmode conversion.  */
+  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
+}
+
+/* Rename all the TFmode libfuncs using the HPUX conventions.  */
+
+static void
+ia64_hpux_init_libfuncs (void)
+{
+  ia64_init_libfuncs ();
+
+  /* The HP SI millicode division and mod functions expect DI arguments.
+     By turning them off completely we avoid using both libgcc and the
+     non-standard millicode routines and use the HP DI millicode routines
+     instead.  */
+
+  set_optab_libfunc (sdiv_optab, SImode, 0);
+  set_optab_libfunc (udiv_optab, SImode, 0);
+  set_optab_libfunc (smod_optab, SImode, 0);
+  set_optab_libfunc (umod_optab, SImode, 0);
+
+  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
+  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
+  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
+  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
+
+  /* HP-UX libc has TF min/max/abs routines in it.  */
+  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
+  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
+  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
+
+  /* ia64_expand_compare uses this.  */
+  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
+
+  /* These should never be used.  */
+  set_optab_libfunc (eq_optab, TFmode, 0);
+  set_optab_libfunc (ne_optab, TFmode, 0);
+  set_optab_libfunc (gt_optab, TFmode, 0);
+  set_optab_libfunc (ge_optab, TFmode, 0);
+  set_optab_libfunc (lt_optab, TFmode, 0);
+  set_optab_libfunc (le_optab, TFmode, 0);
+}
+
+/* Rename the division and modulus functions in VMS.  */
+
+static void
+ia64_vms_init_libfuncs (void)
+{
+  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
+  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
+  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
+  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
+  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
+  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
+  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
+  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
+  abort_libfunc = init_one_libfunc ("decc$abort");
+  memcmp_libfunc = init_one_libfunc ("decc$memcmp");
+#ifdef MEM_LIBFUNCS_INIT
+  MEM_LIBFUNCS_INIT;
+#endif
+}
+
+/* Rename the TFmode libfuncs available from soft-fp in glibc using
+   the HPUX conventions.  */
+
+static void
+ia64_sysv4_init_libfuncs (void)
+{
+  ia64_init_libfuncs ();
+
+  /* These functions are not part of the HPUX TFmode interface.  We
+     use them instead of _U_Qfcmp, which doesn't work the way we
+     expect.  */
+  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
+  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
+  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
+  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
+  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
+  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
+
+  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
+     glibc doesn't have them.  */
+}
+
+/* Use soft-fp.  */
+
+static void
+ia64_soft_fp_init_libfuncs (void)
+{
+}
+
+static bool
+ia64_vms_valid_pointer_mode (enum machine_mode mode)
+{
+  return (mode == SImode || mode == DImode);
+}
+
+/* For HPUX, it is illegal to have relocations in shared segments.  */
+
+static int
+ia64_hpux_reloc_rw_mask (void)
+{
+  return 3;
+}
+
+/* For others, relax this so that relocations to local data goes in
+   read-only segments, but we still cannot allow global relocations
+   in read-only segments.  */
+
+static int
+ia64_reloc_rw_mask (void)
+{
+  return flag_pic ? 3 : 2;
+}
+
+/* Return the section to use for X.  The only special thing we do here
+   is to honor small data.  */
+
+static section *
+ia64_select_rtx_section (enum machine_mode mode, rtx x,
+			 unsigned HOST_WIDE_INT align)
+{
+  if (GET_MODE_SIZE (mode) > 0
+      && GET_MODE_SIZE (mode) <= ia64_section_threshold
+      && !TARGET_NO_SDATA)
+    return sdata_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+static unsigned int
+ia64_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = 0;
+
+  if (strcmp (name, ".sdata") == 0
+      || strncmp (name, ".sdata.", 7) == 0
+      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
+      || strncmp (name, ".sdata2.", 8) == 0
+      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
+      || strcmp (name, ".sbss") == 0
+      || strncmp (name, ".sbss.", 6) == 0
+      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
+    flags = SECTION_SMALL;
+
+  flags |= default_section_type_flags (decl, name, reloc);
+  return flags;
+}
+
+/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
+   structure type and that the address of that type should be passed
+   in out0, rather than in r8.  */
+
+static bool
+ia64_struct_retval_addr_is_first_parm_p (tree fntype)
+{
+  tree ret_type = TREE_TYPE (fntype);
+
+  /* The Itanium C++ ABI requires that out0, rather than r8, be used
+     as the structure return address parameter, if the return value
+     type has a non-trivial copy constructor or destructor.  It is not
+     clear if this same convention should be used for other
+     programming languages.  Until G++ 3.4, we incorrectly used r8 for
+     these return values.  */
+  return (abi_version_at_least (2)
+	  && ret_type
+	  && TYPE_MODE (ret_type) == BLKmode 
+	  && TREE_ADDRESSABLE (ret_type)
+	  && strcmp (lang_hooks.name, "GNU C++") == 0);
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx this_rtx, insn, funexp;
+  unsigned int this_parmno;
+  unsigned int this_regno;
+  rtx delta_rtx;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  /* Set things up as ia64_expand_prologue might.  */
+  last_scratch_gr_reg = 15;
+
+  memset (&current_frame_info, 0, sizeof (current_frame_info));
+  current_frame_info.spill_cfa_off = -16;
+  current_frame_info.n_input_regs = 1;
+  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Figure out whether "this" will be the first parameter (the
+     typical case) or the second parameter (as happens when the
+     virtual function returns certain class objects).  */
+  this_parmno
+    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
+       ? 1 : 0);
+  this_regno = IN_REG (this_parmno);
+  if (!TARGET_REG_NAMES)
+    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
+
+  this_rtx = gen_rtx_REG (Pmode, this_regno);
+
+  /* Apply the constant offset, if required.  */
+  delta_rtx = GEN_INT (delta);
+  if (TARGET_ILP32)
+    {
+      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
+      REG_POINTER (tmp) = 1;
+      if (delta && satisfies_constraint_I (delta_rtx))
+	{
+	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
+	  delta = 0;
+	}
+      else
+	emit_insn (gen_ptr_extend (this_rtx, tmp));
+    }
+  if (delta)
+    {
+      if (!satisfies_constraint_I (delta_rtx))
+	{
+	  rtx tmp = gen_rtx_REG (Pmode, 2);
+	  emit_move_insn (tmp, delta_rtx);
+	  delta_rtx = tmp;
+	}
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
+    }
+
+  /* Apply the offset from the vtable, if required.  */
+  if (vcall_offset)
+    {
+      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+      rtx tmp = gen_rtx_REG (Pmode, 2);
+
+      if (TARGET_ILP32)
+	{
+	  rtx t = gen_rtx_REG (ptr_mode, 2);
+	  REG_POINTER (t) = 1;
+	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
+	  if (satisfies_constraint_I (vcall_offset_rtx))
+	    {
+	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
+	      vcall_offset = 0;
+	    }
+	  else
+	    emit_insn (gen_ptr_extend (tmp, t));
+	}
+      else
+	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      if (vcall_offset)
+	{
+	  if (!satisfies_constraint_J (vcall_offset_rtx))
+	    {
+	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
+	      emit_move_insn (tmp2, vcall_offset_rtx);
+	      vcall_offset_rtx = tmp2;
+	    }
+	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
+	}
+
+      if (TARGET_ILP32)
+	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
+      else
+	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
+  insn = get_last_insn ();
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Code generation for calls relies on splitting.  */
+  reload_completed = 1;
+  epilogue_completed = 1;
+  try_split (PATTERN (insn), insn, 0);
+
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+
+  emit_all_insn_group_barriers (NULL);
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+ia64_struct_value_rtx (tree fntype,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ABI_OPEN_VMS ||
+      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
+    return NULL_RTX;
+  return gen_rtx_REG (Pmode, GR_REG (8));
+}
+
+static bool
+ia64_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode:
+      return true;
+
+    case SFmode:
+    case DFmode:
+    case XFmode:
+    case RFmode:
+      return true;
+
+    case TFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+static bool
+ia64_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V8QImode:
+    case V4HImode:
+    case V2SImode:
+      return true;
+
+    case V2SFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement the FUNCTION_PROFILER macro.  */
+
+void
+ia64_output_function_profiler (FILE *file, int labelno)
+{
+  bool indirect_call;
+
+  /* If the function needs a static chain and the static chain
+     register is r15, we use an indirect call so as to bypass
+     the PLT stub in case the executable is dynamically linked,
+     because the stub clobbers r15 as per 5.3.6 of the psABI.
+     We don't need to do that in non canonical PIC mode.  */
+
+  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
+    {
+      gcc_assert (STATIC_CHAIN_REGNUM == 15);
+      indirect_call = true;
+    }
+  else
+    indirect_call = false;
+
+  if (TARGET_GNU_AS)
+    fputs ("\t.prologue 4, r40\n", file);
+  else
+    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
+  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
+
+  if (NO_PROFILE_COUNTERS)
+    fputs ("\tmov out3 = r0\n", file);
+  else
+    {
+      char buf[20];
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+
+      if (TARGET_AUTO_PIC)
+	fputs ("\tmovl out3 = @gprel(", file);
+      else
+	fputs ("\taddl out3 = @ltoff(", file);
+      assemble_name (file, buf);
+      if (TARGET_AUTO_PIC)
+	fputs (")\n", file);
+      else
+	fputs ("), r1\n", file);
+    }
+
+  if (indirect_call)
+    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
+  fputs ("\t;;\n", file);
+
+  fputs ("\t.save rp, r42\n", file);
+  fputs ("\tmov out2 = b0\n", file);
+  if (indirect_call)
+    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
+  fputs ("\t.body\n", file);
+  fputs ("\tmov out1 = r1\n", file);
+  if (indirect_call)
+    {
+      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
+      fputs ("\tmov b6 = r16\n", file);
+      fputs ("\tld8 r1 = [r14]\n", file);
+      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
+    }
+  else
+    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
+}
+
+static GTY(()) rtx mcount_func_rtx;
+static rtx
+gen_mcount_func_rtx (void)
+{
+  if (!mcount_func_rtx)
+    mcount_func_rtx = init_one_libfunc ("_mcount");
+  return mcount_func_rtx;
+}
+
+void
+ia64_profile_hook (int labelno)
+{
+  rtx label, ip;
+
+  if (NO_PROFILE_COUNTERS)
+    label = const0_rtx;
+  else
+    {
+      char buf[30];
+      const char *label_name;
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+      label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
+      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
+      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
+    }
+  ip = gen_reg_rtx (Pmode);
+  emit_insn (gen_ip_value (ip));
+  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
+                     VOIDmode, 3,
+		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
+		     ip, Pmode,
+		     label, Pmode);
+}
+
+/* Return the mangling of TYPE if it is an extended fundamental type.  */
+
+static const char *
+ia64_mangle_type (const_tree type)
+{
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+    return NULL;
+
+  /* On HP-UX, "long double" is mangled as "e" so __float128 is
+     mangled as "e".  */
+  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
+    return "g";
+  /* On HP-UX, "e" is not available as a mangling of __float80 so use
+     an extended mangling.  Elsewhere, "e" is available since long
+     double is 80 bits.  */
+  if (TYPE_MODE (type) == XFmode)
+    return TARGET_HPUX ? "u9__float80" : "e";
+  if (TYPE_MODE (type) == RFmode)
+    return "u7__fpreg";
+  return NULL;
+}
+
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+static const char *
+ia64_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  /* Reject nontrivial conversion to or from __fpreg.  */
+  if (TYPE_MODE (fromtype) == RFmode
+      && TYPE_MODE (totype) != RFmode
+      && TYPE_MODE (totype) != VOIDmode)
+    return N_("invalid conversion from %<__fpreg%>");
+  if (TYPE_MODE (totype) == RFmode
+      && TYPE_MODE (fromtype) != RFmode)
+    return N_("invalid conversion to %<__fpreg%>");
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+static const char *
+ia64_invalid_unary_op (int op, const_tree type)
+{
+  /* Reject operations on __fpreg other than unary + or &.  */
+  if (TYPE_MODE (type) == RFmode
+      && op != CONVERT_EXPR
+      && op != ADDR_EXPR)
+    return N_("invalid operation on %<__fpreg%>");
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+static const char *
+ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
+{
+  /* Reject operations on __fpreg.  */
+  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
+    return N_("invalid operation on %<__fpreg%>");
+  return NULL;
+}
+
+/* HP-UX version_id attribute.
+   For object foo, if the version_id is set to 1234 put out an alias
+   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
+   other than an alias statement because it is an illegal symbol name.  */
+
+static tree
+ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
+                                 tree name ATTRIBUTE_UNUSED,
+                                 tree args,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool *no_add_attrs)
+{
+  tree arg = TREE_VALUE (args);
+
+  if (TREE_CODE (arg) != STRING_CST)
+    {
+      error("version attribute is not a string");
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  return NULL_TREE;
+}
+
+/* Target hook for c_mode_for_suffix.  */
+
+static enum machine_mode
+ia64_c_mode_for_suffix (char suffix)
+{
+  if (suffix == 'q')
+    return TFmode;
+  if (suffix == 'w')
+    return XFmode;
+
+  return VOIDmode;
+}
+
+static GTY(()) rtx ia64_dconst_0_5_rtx;
+
+rtx
+ia64_dconst_0_5 (void)
+{
+  if (! ia64_dconst_0_5_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_string (&rv, "0.5");
+      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
+    }
+  return ia64_dconst_0_5_rtx;
+}
+
+static GTY(()) rtx ia64_dconst_0_375_rtx;
+
+rtx
+ia64_dconst_0_375 (void)
+{
+  if (! ia64_dconst_0_375_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_string (&rv, "0.375");
+      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
+    }
+  return ia64_dconst_0_375_rtx;
+}
+
+static enum machine_mode
+ia64_get_reg_raw_mode (int regno)
+{
+  if (FR_REGNO_P (regno))
+    return XFmode;
+  return default_get_reg_raw_mode(regno);
+}
+
+/* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
+   anymore.  */
+
+bool
+ia64_member_type_forces_blk (const_tree, enum machine_mode mode)
+{
+  return TARGET_HPUX && mode == TFmode;
+}
+
+/* Always default to .text section until HP-UX linker is fixed.  */
+
+ATTRIBUTE_UNUSED static section *
+ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
+			    enum node_frequency freq ATTRIBUTE_UNUSED,
+			    bool startup ATTRIBUTE_UNUSED,
+			    bool exit ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+
+/* Construct (set target (vec_select op0 (parallel perm))) and
+   return true if that's a valid instruction in the active ISA.  */
+
+static bool
+expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+{
+  rtx rperm[MAX_VECT_LEN], x;
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    rperm[i] = GEN_INT (perm[i]);
+
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
+  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
+  x = gen_rtx_SET (VOIDmode, target, x);
+
+  x = emit_insn (x);
+  if (recog_memoized (x) < 0)
+    {
+      remove_insn (x);
+      return false;
+    }
+  return true;
+}
+
+/* Similar, but generate a vec_concat from op0 and op1 as well.  */
+
+static bool
+expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+			const unsigned char *perm, unsigned nelt)
+{
+  enum machine_mode v2mode;
+  rtx x;
+
+  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
+  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+  return expand_vselect (target, x, perm, nelt);
+}
+
+/* Try to expand a no-op permutation.  */
+
+static bool
+expand_vec_perm_identity (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+
+  for (i = 0; i < nelt; ++i)
+    if (d->perm[i] != i)
+      return false;
+
+  if (!d->testing_p)
+    emit_move_insn (d->target, d->op0);
+
+  return true;
+}
+
+/* Try to expand D via a shrp instruction.  */
+
+static bool
+expand_vec_perm_shrp (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt, shift, mask;
+  rtx tmp, hi, lo;
+
+  /* ??? Don't force V2SFmode into the integer registers.  */
+  if (d->vmode == V2SFmode)
+    return false;
+
+  mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
+
+  shift = d->perm[0];
+  if (BYTES_BIG_ENDIAN && shift > nelt)
+    return false;
+
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != ((shift + i) & mask))
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  hi = shift < nelt ? d->op1 : d->op0;
+  lo = shift < nelt ? d->op0 : d->op1;
+
+  shift %= nelt;
+
+  shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
+
+  /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
+  gcc_assert (IN_RANGE (shift, 1, 63));
+
+  /* Recall that big-endian elements are numbered starting at the top of
+     the register.  Ideally we'd have a shift-left-pair.  But since we
+     don't, convert to a shift the other direction.  */
+  if (BYTES_BIG_ENDIAN)
+    shift = 64 - shift;
+
+  tmp = gen_reg_rtx (DImode);
+  hi = gen_lowpart (DImode, hi);
+  lo = gen_lowpart (DImode, lo);
+  emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
+
+  emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
+  return true;
+}
+
+/* Try to instantiate D in a single instruction.  */
+
+static bool
+expand_vec_perm_1 (struct expand_vec_perm_d *d)
+{     
+  unsigned i, nelt = d->nelt;
+  unsigned char perm2[MAX_VECT_LEN];
+
+  /* Try single-operand selections.  */
+  if (d->one_operand_p)
+    {
+      if (expand_vec_perm_identity (d))
+	return true;
+      if (expand_vselect (d->target, d->op0, d->perm, nelt))
+	return true;
+    }
+
+  /* Try two operand selections.  */
+  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+    return true;
+
+  /* Recognize interleave style patterns with reversed operands.  */
+  if (!d->one_operand_p)
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned e = d->perm[i];
+	  if (e >= nelt)
+	    e -= nelt;
+	  else
+	    e += nelt;
+	  perm2[i] = e;
+	}
+
+      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+	return true;
+    }
+
+  if (expand_vec_perm_shrp (d))
+    return true;
+
+  /* ??? Look for deposit-like permutations where most of the result 
+     comes from one vector unchanged and the rest comes from a 
+     sequential hunk of the other vector.  */
+
+  return false;
+}
+
+/* Pattern match broadcast permutations.  */
+
+static bool
+expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt, nelt = d->nelt;
+  unsigned char perm2[2];
+  rtx temp;
+  bool ok;
+
+  if (!d->one_operand_p)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  switch (d->vmode)
+    {
+    case V2SImode:
+    case V2SFmode:
+      /* Implementable by interleave.  */
+      perm2[0] = elt;
+      perm2[1] = elt + 2;
+      ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
+      gcc_assert (ok);
+      break;
+
+    case V8QImode:
+      /* Implementable by extract + broadcast.  */
+      if (BYTES_BIG_ENDIAN)
+	elt = 7 - elt;
+      elt *= BITS_PER_UNIT;
+      temp = gen_reg_rtx (DImode);
+      emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
+			    GEN_INT (8), GEN_INT (elt)));
+      emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
+      break;
+
+    case V4HImode:
+      /* Should have been matched directly by vec_select.  */
+    default:
+      gcc_unreachable ();
+    }
+
+  return true;
+}
+
+/* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
+   two vector permutation into a single vector permutation by using
+   an interleave operation to merge the vectors.  */
+
+static bool
+expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dremap, dfinal;
+  unsigned char remap[2 * MAX_VECT_LEN];
+  unsigned contents, i, nelt, nelt2;
+  unsigned h0, h1, h2, h3;
+  rtx seq;
+  bool ok;
+
+  if (d->one_operand_p)
+    return false;
+
+  nelt = d->nelt;
+  nelt2 = nelt / 2;
+
+  /* Examine from whence the elements come.  */
+  contents = 0;
+  for (i = 0; i < nelt; ++i)
+    contents |= 1u << d->perm[i];
+
+  memset (remap, 0xff, sizeof (remap));
+  dremap = *d;
+
+  h0 = (1u << nelt2) - 1;
+  h1 = h0 << nelt2;
+  h2 = h0 << nelt;
+  h3 = h0 << (nelt + nelt2);
+  
+  if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if ((contents & 0x5555) == contents)	/* mix even elements */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
+    {
+      unsigned shift = ctz_hwi (contents);
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = (i + shift) & (2 * nelt - 1);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else
+    return false;
+
+  /* Use the remapping array set up above to move the elements from their
+     swizzled locations into their final destinations.  */
+  dfinal = *d;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = remap[d->perm[i]];
+      gcc_assert (e < nelt);
+      dfinal.perm[i] = e;
+    }
+  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+  dfinal.op1 = dfinal.op0;
+  dfinal.one_operand_p = true;
+  dremap.target = dfinal.op0;
+
+  /* Test if the final remap can be done with a single insn.  For V4HImode
+     this *will* succeed.  For V8QImode or V2SImode it may not.  */
+  start_sequence ();
+  ok = expand_vec_perm_1 (&dfinal);
+  seq = get_insns ();
+  end_sequence ();
+  if (!ok)
+    return false;
+  if (d->testing_p)
+    return true;
+
+  ok = expand_vec_perm_1 (&dremap);
+  gcc_assert (ok);
+
+  emit_insn (seq);
+  return true;
+}
+
+/* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
+   constant permutation via two mux2 and a merge.  */
+
+static bool
+expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
+{
+  unsigned char perm2[4];
+  rtx rmask[4];
+  unsigned i;
+  rtx t0, t1, mask, x;
+  bool ok;
+
+  if (d->vmode != V4HImode || d->one_operand_p)
+    return false;
+  if (d->testing_p)
+    return true;
+
+  for (i = 0; i < 4; ++i)
+    {
+      perm2[i] = d->perm[i] & 3;
+      rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
+    }
+  mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
+  mask = force_reg (V4HImode, mask);
+
+  t0 = gen_reg_rtx (V4HImode);
+  t1 = gen_reg_rtx (V4HImode);
+
+  ok = expand_vselect (t0, d->op0, perm2, 4);
+  gcc_assert (ok);
+  ok = expand_vselect (t1, d->op1, perm2, 4);
+  gcc_assert (ok);
+
+  x = gen_rtx_AND (V4HImode, mask, t0);
+  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+  x = gen_rtx_NOT (V4HImode, mask);
+  x = gen_rtx_AND (V4HImode, x, t1);
+  emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+  x = gen_rtx_IOR (V4HImode, t0, t1);
+  emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
+
+  return true;
+}
+
+/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
+   With all of the interface bits taken care of, perform the expansion
+   in D and return true on success.  */
+
+static bool
+ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  if (expand_vec_perm_1 (d))
+    return true;
+  if (expand_vec_perm_broadcast (d))
+    return true;
+  if (expand_vec_perm_interleave_2 (d))
+    return true;
+  if (expand_vec_perm_v4hi_5 (d))
+    return true;
+  return false;
+}
+
+bool
+ia64_expand_vec_perm_const (rtx operands[4])
+{
+  struct expand_vec_perm_d d;
+  unsigned char perm[MAX_VECT_LEN];
+  int i, nelt, which;
+  rtx sel;
+
+  d.target = operands[0];
+  d.op0 = operands[1];
+  d.op1 = operands[2];
+  sel = operands[3];
+
+  d.vmode = GET_MODE (d.target);
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  gcc_assert (GET_CODE (sel) == CONST_VECTOR);
+  gcc_assert (XVECLEN (sel, 0) == nelt);
+  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+
+  for (i = which = 0; i < nelt; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      int ei = INTVAL (e) & (2 * nelt - 1);
+
+      which |= (ei < nelt ? 1 : 2);
+      d.perm[i] = ei;
+      perm[i] = ei;
+    }
+
+  switch (which)
+    {
+    default:
+      gcc_unreachable();
+
+    case 3:
+      if (!rtx_equal_p (d.op0, d.op1))
+	{
+	  d.one_operand_p = false;
+	  break;
+	}
+
+      /* The elements of PERM do not suggest that only the first operand
+	 is used, but both operands are identical.  Allow easier matching
+	 of the permutation by folding the permutation into the single
+	 input vector.  */
+      for (i = 0; i < nelt; ++i)
+	if (d.perm[i] >= nelt)
+	  d.perm[i] -= nelt;
+      /* FALLTHRU */
+
+    case 1:
+      d.op1 = d.op0;
+      d.one_operand_p = true;
+      break;
+
+    case 2:
+      for (i = 0; i < nelt; ++i)
+        d.perm[i] -= nelt;
+      d.op0 = d.op1;
+      d.one_operand_p = true;
+      break;
+    }
+
+  if (ia64_expand_vec_perm_const_1 (&d))
+    return true;
+
+  /* If the mask says both arguments are needed, but they are the same,
+     the above tried to expand with one_operand_p true.  If that didn't
+     work, retry with one_operand_p false, as that's what we used in _ok.  */
+  if (which == 3 && d.one_operand_p)
+    {
+      memcpy (d.perm, perm, sizeof (perm));
+      d.one_operand_p = false;
+      return ia64_expand_vec_perm_const_1 (&d);
+    }
+
+  return false;
+}
+
+/* Implement targetm.vectorize.vec_perm_const_ok.  */
+
+static bool
+ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				  const unsigned char *sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned int i, nelt, which;
+  bool ret;
+
+  d.vmode = vmode;
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+
+  /* Extract the values from the vector CST into the permutation
+     array in D.  */
+  memcpy (d.perm, sel, nelt);
+  for (i = which = 0; i < nelt; ++i)
+    {
+      unsigned char e = d.perm[i];
+      gcc_assert (e < 2 * nelt);
+      which |= (e < nelt ? 1 : 2);
+    }
+
+  /* For all elements from second vector, fold the elements to first.  */
+  if (which == 2)
+    for (i = 0; i < nelt; ++i)
+      d.perm[i] -= nelt;
+
+  /* Check whether the mask can be applied to the vector type.  */
+  d.one_operand_p = (which != 3);
+
+  /* Otherwise we have to go through the motions and see if we can
+     figure out how to generate the requested permutation.  */
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!d.one_operand_p)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = ia64_expand_vec_perm_const_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+void
+ia64_expand_vec_setv2sf (rtx operands[3])
+{
+  struct expand_vec_perm_d d;
+  unsigned int which;
+  bool ok;
+  
+  d.target = operands[0];
+  d.op0 = operands[0];
+  d.op1 = gen_reg_rtx (V2SFmode);
+  d.vmode = V2SFmode;
+  d.nelt = 2;
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  which = INTVAL (operands[2]);
+  gcc_assert (which <= 1);
+  d.perm[0] = 1 - which;
+  d.perm[1] = which + 2;
+
+  emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
+
+  ok = ia64_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+void
+ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
+{
+  struct expand_vec_perm_d d;
+  enum machine_mode vmode = GET_MODE (target);
+  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
+  bool ok;
+
+  d.target = target;
+  d.op0 = op0;
+  d.op1 = op1;
+  d.vmode = vmode;
+  d.nelt = nelt;
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  for (i = 0; i < nelt; ++i)
+    d.perm[i] = i * 2 + odd;
+
+  ok = ia64_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+#include "gt-ia64.h"
diff --git a/gcc-4.9/gcc/config/ia64/ia64.h b/gcc-4.9/gcc/config/ia64/ia64.h
new file mode 100644
index 000000000..dd14b8af6
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64.h
@@ -0,0 +1,1724 @@
+/* Definitions of target machine GNU compiler.  IA-64 version.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+   		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* ??? Look at ABI group documents for list of preprocessor macros and
+   other features required for ABI compliance.  */
+
+/* ??? Functions containing a non-local goto target save many registers.  Why?
+   See for instance execute/920428-2.c.  */
+
+
+/* Run-time target specifications */
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+do {						\
+	builtin_assert("cpu=ia64");		\
+	builtin_assert("machine=ia64");		\
+	builtin_define("__ia64");		\
+	builtin_define("__ia64__");		\
+	builtin_define("__itanium__");		\
+	if (TARGET_BIG_ENDIAN)			\
+	  builtin_define("__BIG_ENDIAN__");	\
+} while (0)
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS \
+  { "asm_extra", ASM_EXTRA_SPEC }, \
+  SUBTARGET_EXTRA_SPECS
+
+#define CC1_SPEC "%(cc1_cpu) "
+
+#define ASM_EXTRA_SPEC ""
+
+/* Variables which are this size or smaller are put in the sdata/sbss
+   sections.  */
+extern unsigned int ia64_section_threshold;
+
+/* If the assembler supports thread-local storage, assume that the
+   system does as well.  If a particular target system has an
+   assembler that supports TLS -- but the rest of the system does not
+   support TLS -- that system should explicit define TARGET_HAVE_TLS
+   to false in its own configuration file.  */
+#if !defined(TARGET_HAVE_TLS) && defined(HAVE_AS_TLS)
+#define TARGET_HAVE_TLS true
+#endif
+
+#define TARGET_TLS14		(ia64_tls_size == 14)
+#define TARGET_TLS22		(ia64_tls_size == 22)
+#define TARGET_TLS64		(ia64_tls_size == 64)
+
+#define TARGET_HPUX		0
+#define TARGET_HPUX_LD		0
+
+#define TARGET_ABI_OPEN_VMS 0
+
+#ifndef TARGET_ILP32
+#define TARGET_ILP32 0
+#endif
+
+#ifndef HAVE_AS_LTOFFX_LDXMOV_RELOCS
+#define HAVE_AS_LTOFFX_LDXMOV_RELOCS 0
+#endif
+
+/* Values for TARGET_INLINE_FLOAT_DIV, TARGET_INLINE_INT_DIV, and
+   TARGET_INLINE_SQRT.  */
+
+enum ia64_inline_type
+{
+  INL_NO = 0,
+  INL_MIN_LAT = 1,
+  INL_MAX_THR = 2
+};
+
+/* Default target_flags if no switches are specified  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_DWARF2_ASM)
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+/* Driver configuration */
+
+/* A C string constant that tells the GCC driver program options to pass to
+   `cc1'.  It can also specify how to translate options you give to GCC into
+   options for GCC to pass to the `cc1'.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{G*}"
+
+/* A C string constant that tells the GCC driver program options to pass to
+   `cc1plus'.  It can also specify how to translate options you give to GCC
+   into options for GCC to pass to the `cc1plus'.  */
+
+/* #define CC1PLUS_SPEC "" */
+
+/* Storage Layout */
+
+/* Define this macro to have the value 1 if the most significant bit in a byte
+   has the lowest number; otherwise define it to have the value zero.  */
+
+#define BITS_BIG_ENDIAN 0
+
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this macro to have the value 1 if, in a multiword object, the most
+   significant word has the lowest number.  */
+
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define UNITS_PER_WORD 8
+
+#define POINTER_SIZE (TARGET_ILP32 ? 32 : 64)
+
+/* A C expression whose value is zero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and one if
+   they are zero-extended and negative one if there is a ptr_extend operation.
+
+   You need not define this macro if the `POINTER_SIZE' is equal to the width
+   of `Pmode'.  */
+/* Need this for 32-bit pointers, see hpux.h for setting it.  */
+/* #define POINTERS_EXTEND_UNSIGNED */
+
+/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and
+   which has the specified mode and signedness is to be stored in a register.
+   This macro is only called when TYPE is a scalar type.  */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)				\
+do									\
+  {									\
+    if (GET_MODE_CLASS (MODE) == MODE_INT				\
+	&& GET_MODE_SIZE (MODE) < 4)					\
+      (MODE) = SImode;							\
+  }									\
+while (0)
+
+#define PARM_BOUNDARY 64
+
+/* Define this macro if you wish to preserve a certain alignment for the stack
+   pointer.  The definition is a C expression for the desired alignment
+   (measured in bits).  */
+
+#define STACK_BOUNDARY 128
+
+/* Align frames on double word boundaries */
+#ifndef IA64_STACK_ALIGN
+#define IA64_STACK_ALIGN(LOC) (((LOC) + 15) & ~15)
+#endif
+
+#define FUNCTION_BOUNDARY 128
+
+/* Optional x86 80-bit float, quad-precision 128-bit float, and quad-word
+   128-bit integers all require 128-bit alignment.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* If defined, a C expression to compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that the object
+   would ordinarily have.  The value of this macro is used instead of that
+   alignment to align the object.  */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a constant that
+   is being placed in memory.  CONSTANT is the constant and ALIGN is the
+   alignment that the object would ordinarily have.  The value of this macro is
+   used instead of that alignment to align the object.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+/* Define this if you wish to imitate the way many other C compilers handle
+   alignment of bitfields and the structures that contain them.
+   The behavior is that the type written for a bit-field (`int', `short', or
+   other integer type) imposes an alignment for the entire structure, as if the
+   structure really did contain an ordinary field of that type.  In addition,
+   the bit-field is placed within the structure so that it would fit within such
+   a field, not crossing a boundary for it.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  */
+
+/* Allow pairs of registers to be used, which is the intent of the default.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode)
+
+/* By default, the C++ compiler will use function addresses in the
+   vtable entries.  Setting this nonzero tells the compiler to use
+   function descriptors instead.  The value of this macro says how
+   many words wide the descriptor is (normally 2).  It is assumed
+   that the address of a function descriptor may be treated as a
+   pointer to a function.
+
+   For reasons known only to HP, the vtable entries (as opposed to
+   normal function descriptors) are 16 bytes wide in 32-bit mode as
+   well, even though the 3rd and 4th words are unused.  */
+#define TARGET_VTABLE_USES_DESCRIPTORS (TARGET_ILP32 ? 4 : 2)
+
+/* Due to silliness in the HPUX linker, vtable entries must be
+   8-byte aligned even in 32-bit mode.  Rather than create multiple
+   ABIs, force this restriction on everyone else too.  */
+#define TARGET_VTABLE_ENTRY_ALIGN  64
+
+/* Due to the above, we need extra padding for the data entries below 0
+   to retain the alignment of the descriptors.  */
+#define TARGET_VTABLE_DATA_ENTRY_DISTANCE (TARGET_ILP32 ? 2 : 1)
+
+/* Layout of Source Language Data Types */
+
+#define INT_TYPE_SIZE 32
+
+#define SHORT_TYPE_SIZE 16
+
+#define LONG_TYPE_SIZE (TARGET_ILP32 ? 32 : 64)
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+/* long double is XFmode normally, and TFmode for HPUX.  It should be
+   TFmode for VMS as well but we only support up to DFmode now.  */
+#define LONG_DOUBLE_TYPE_SIZE \
+  (TARGET_HPUX ? 128 \
+   : TARGET_ABI_OPEN_VMS ? 64 \
+   : 80)
+
+/* We always want the XFmode operations from libgcc2.c, except on VMS
+   where this yields references to unimplemented "insns".  */
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE  (TARGET_ABI_OPEN_VMS ? 64 : 80)
+
+
+/* On HP-UX, we use the l suffix for TFmode in libgcc2.c.  */
+#define LIBGCC2_TF_CEXT l
+
+#define DEFAULT_SIGNED_CHAR 1
+
+/* A C expression for a string describing the name of the data type to use for
+   size values.  The typedef name `size_t' is defined using the contents of the
+   string.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define SIZE_TYPE */
+
+/* A C expression for a string describing the name of the data type to use for
+   the result of subtracting two pointers.  The typedef name `ptrdiff_t' is
+   defined using the contents of the string.  See `SIZE_TYPE' above for more
+   information.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define PTRDIFF_TYPE */
+
+/* A C expression for a string describing the name of the data type to use for
+   wide characters.  The typedef name `wchar_t' is defined using the contents
+   of the string.  See `SIZE_TYPE' above for more information.  */
+/* #define WCHAR_TYPE */
+
+/* A C expression for the size in bits of the data type for wide characters.
+   This is used in `cpp', which cannot make use of `WCHAR_TYPE'.  */
+/* #define WCHAR_TYPE_SIZE */
+
+
+/* Register Basics */
+
+/* Number of hardware registers known to the compiler.
+   We have 128 general registers, 128 floating point registers,
+   64 predicate registers, 8 branch registers, one frame pointer,
+   and several "application" registers.  */
+
+#define FIRST_PSEUDO_REGISTER 334
+
+/* Ranges for the various kinds of registers.  */
+#define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3)
+#define GR_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 127)
+#define FR_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 255)
+#define FP_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 254 && (REGNO) != 159)
+#define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319)
+#define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327)
+#define GENERAL_REGNO_P(REGNO) \
+  (GR_REGNO_P (REGNO) || (REGNO) == FRAME_POINTER_REGNUM)
+
+#define GR_REG(REGNO) ((REGNO) + 0)
+#define FR_REG(REGNO) ((REGNO) + 128)
+#define PR_REG(REGNO) ((REGNO) + 256)
+#define BR_REG(REGNO) ((REGNO) + 320)
+#define OUT_REG(REGNO) ((REGNO) + 120)
+#define IN_REG(REGNO) ((REGNO) + 112)
+#define LOC_REG(REGNO) ((REGNO) + 32)
+
+#define AR_CCV_REGNUM	329
+#define AR_UNAT_REGNUM  330
+#define AR_PFS_REGNUM	331
+#define AR_LC_REGNUM	332
+#define AR_EC_REGNUM	333
+
+#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7))
+#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79))
+#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7))
+
+#define AR_M_REGNO_P(REGNO) ((REGNO) == AR_CCV_REGNUM \
+			     || (REGNO) == AR_UNAT_REGNUM)
+#define AR_I_REGNO_P(REGNO) ((REGNO) >= AR_PFS_REGNUM \
+			     && (REGNO) < FIRST_PSEUDO_REGISTER)
+#define AR_REGNO_P(REGNO) ((REGNO) >= AR_CCV_REGNUM \
+			   && (REGNO) < FIRST_PSEUDO_REGISTER)
+
+
+/* ??? Don't really need two sets of macros.  I like this one better because
+   it is less typing.  */
+#define R_GR(REGNO) GR_REG (REGNO)
+#define R_FR(REGNO) FR_REG (REGNO)
+#define R_PR(REGNO) PR_REG (REGNO)
+#define R_BR(REGNO) BR_REG (REGNO)
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.
+
+   r0: constant 0
+   r1: global pointer (gp)
+   r12: stack pointer (sp)
+   r13: thread pointer (tp)
+   f0: constant 0.0
+   f1: constant 1.0
+   p0: constant true
+   fp: eliminable frame pointer */
+
+/* The last 16 stacked regs are reserved for the 8 input and 8 output
+   registers.  */
+
+#define FIXED_REGISTERS \
+{ /* General registers.  */				\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Floating-point registers.  */			\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Predicate registers.  */				\
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  0, 0, 0, 0, 0, 0, 0, 0,				\
+  /*FP CCV UNAT PFS LC EC */				\
+     1,  1,   1,  1, 1, 1				\
+ }
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered
+   (in general) by function calls as well as for fixed registers.  This
+   macro therefore identifies the registers that are not available for
+   general allocation of values that must live across function calls.  */
+
+#define CALL_USED_REGISTERS \
+{ /* General registers.  */				\
+  1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Floating-point registers.  */			\
+  1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Predicate registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1,				\
+  /*FP CCV UNAT PFS LC EC */				\
+     1,  1,   1,  1, 1, 1				\
+}
+
+/* Like `CALL_USED_REGISTERS' but used to overcome a historical
+   problem which makes CALL_USED_REGISTERS *always* include
+   all the FIXED_REGISTERS.  Until this problem has been
+   resolved this macro can be used to overcome this situation.
+   In particular, block_propagate() requires this list
+   be accurate, or we can remove registers which should be live.
+   This macro is used in regs_invalidated_by_call.  */
+
+#define CALL_REALLY_USED_REGISTERS \
+{ /* General registers.  */				\
+  0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Floating-point registers.  */			\
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Predicate registers.  */				\
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1,				\
+  /*FP CCV UNAT PFS LC EC */				\
+     0,  1,   0,  1, 0, 0				\
+}
+
+
+/* Define this macro if the target machine has register windows.  This C
+   expression returns the register number as seen by the called function
+   corresponding to the register number OUT as seen by the calling function.
+   Return OUT if register number OUT is not an outbound register.  */
+
+#define INCOMING_REGNO(OUT) \
+  ((unsigned) ((OUT) - OUT_REG (0)) < 8 ? IN_REG ((OUT) - OUT_REG (0)) : (OUT))
+
+/* Define this macro if the target machine has register windows.  This C
+   expression returns the register number as seen by the calling function
+   corresponding to the register number IN as seen by the called function.
+   Return IN if register number IN is not an inbound register.  */
+
+#define OUTGOING_REGNO(IN) \
+  ((unsigned) ((IN) - IN_REG (0)) < 8 ? OUT_REG ((IN) - IN_REG (0)) : (IN))
+
+/* Define this macro if the target machine has register windows.  This
+   C expression returns true if the register is call-saved but is in the
+   register window.  */
+
+#define LOCAL_REGNO(REGNO) \
+  (IN_REGNO_P (REGNO) || LOC_REGNO_P (REGNO))
+
+/* We define CCImode in ia64-modes.def so we need a selector.  */
+
+#define SELECT_CC_MODE(OP,X,Y)  CCmode
+
+/* Order of allocation of registers */
+
+/* If defined, an initializer for a vector of integers, containing the numbers
+   of hard registers in the order in which GCC should prefer to use them
+   (from most preferred to least).
+
+   If this macro is not defined, registers are used lowest numbered first (all
+   else being equal).
+
+   One use of this macro is on machines where the highest numbered registers
+   must always be saved and the save-multiple-registers instruction supports
+   only sequences of consecutive registers.  On such machines, define
+   `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered
+   allocatable register first.  */
+
+/* ??? Should the GR return value registers come before or after the rest
+   of the caller-save GRs?  */
+
+#define REG_ALLOC_ORDER							   \
+{									   \
+  /* Caller-saved general registers.  */				   \
+  R_GR (14), R_GR (15), R_GR (16), R_GR (17),				   \
+  R_GR (18), R_GR (19), R_GR (20), R_GR (21), R_GR (22), R_GR (23),	   \
+  R_GR (24), R_GR (25), R_GR (26), R_GR (27), R_GR (28), R_GR (29),	   \
+  R_GR (30), R_GR (31),							   \
+  /* Output registers.  */						   \
+  R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125),  \
+  R_GR (126), R_GR (127),						   \
+  /* Caller-saved general registers, also used for return values.  */	   \
+  R_GR (8), R_GR (9), R_GR (10), R_GR (11),				   \
+  /* addl caller-saved general registers.  */				   \
+  R_GR (2), R_GR (3),							   \
+  /* Caller-saved FP registers.  */					   \
+  R_FR (6), R_FR (7),							   \
+  /* Caller-saved FP registers, used for parameters and return values.  */ \
+  R_FR (8), R_FR (9), R_FR (10), R_FR (11),				   \
+  R_FR (12), R_FR (13), R_FR (14), R_FR (15),				   \
+  /* Rotating caller-saved FP registers.  */				   \
+  R_FR (32), R_FR (33), R_FR (34), R_FR (35),				   \
+  R_FR (36), R_FR (37), R_FR (38), R_FR (39), R_FR (40), R_FR (41),	   \
+  R_FR (42), R_FR (43), R_FR (44), R_FR (45), R_FR (46), R_FR (47),	   \
+  R_FR (48), R_FR (49), R_FR (50), R_FR (51), R_FR (52), R_FR (53),	   \
+  R_FR (54), R_FR (55), R_FR (56), R_FR (57), R_FR (58), R_FR (59),	   \
+  R_FR (60), R_FR (61), R_FR (62), R_FR (63), R_FR (64), R_FR (65),	   \
+  R_FR (66), R_FR (67), R_FR (68), R_FR (69), R_FR (70), R_FR (71),	   \
+  R_FR (72), R_FR (73), R_FR (74), R_FR (75), R_FR (76), R_FR (77),	   \
+  R_FR (78), R_FR (79), R_FR (80), R_FR (81), R_FR (82), R_FR (83),	   \
+  R_FR (84), R_FR (85), R_FR (86), R_FR (87), R_FR (88), R_FR (89),	   \
+  R_FR (90), R_FR (91), R_FR (92), R_FR (93), R_FR (94), R_FR (95),	   \
+  R_FR (96), R_FR (97), R_FR (98), R_FR (99), R_FR (100), R_FR (101),	   \
+  R_FR (102), R_FR (103), R_FR (104), R_FR (105), R_FR (106), R_FR (107),  \
+  R_FR (108), R_FR (109), R_FR (110), R_FR (111), R_FR (112), R_FR (113),  \
+  R_FR (114), R_FR (115), R_FR (116), R_FR (117), R_FR (118), R_FR (119),  \
+  R_FR (120), R_FR (121), R_FR (122), R_FR (123), R_FR (124), R_FR (125),  \
+  R_FR (126), R_FR (127),						   \
+  /* Caller-saved predicate registers.  */				   \
+  R_PR (6), R_PR (7), R_PR (8), R_PR (9), R_PR (10), R_PR (11),		   \
+  R_PR (12), R_PR (13), R_PR (14), R_PR (15),				   \
+  /* Rotating caller-saved predicate registers.  */			   \
+  R_PR (16), R_PR (17),							   \
+  R_PR (18), R_PR (19), R_PR (20), R_PR (21), R_PR (22), R_PR (23),	   \
+  R_PR (24), R_PR (25), R_PR (26), R_PR (27), R_PR (28), R_PR (29),	   \
+  R_PR (30), R_PR (31), R_PR (32), R_PR (33), R_PR (34), R_PR (35),	   \
+  R_PR (36), R_PR (37), R_PR (38), R_PR (39), R_PR (40), R_PR (41),	   \
+  R_PR (42), R_PR (43), R_PR (44), R_PR (45), R_PR (46), R_PR (47),	   \
+  R_PR (48), R_PR (49), R_PR (50), R_PR (51), R_PR (52), R_PR (53),	   \
+  R_PR (54), R_PR (55), R_PR (56), R_PR (57), R_PR (58), R_PR (59),	   \
+  R_PR (60), R_PR (61), R_PR (62), R_PR (63),				   \
+  /* Caller-saved branch registers.  */					   \
+  R_BR (6), R_BR (7),							   \
+									   \
+  /* Stacked callee-saved general registers.  */			   \
+  R_GR (32), R_GR (33), R_GR (34), R_GR (35),				   \
+  R_GR (36), R_GR (37), R_GR (38), R_GR (39), R_GR (40), R_GR (41),	   \
+  R_GR (42), R_GR (43), R_GR (44), R_GR (45), R_GR (46), R_GR (47),	   \
+  R_GR (48), R_GR (49), R_GR (50), R_GR (51), R_GR (52), R_GR (53),	   \
+  R_GR (54), R_GR (55), R_GR (56), R_GR (57), R_GR (58), R_GR (59),	   \
+  R_GR (60), R_GR (61), R_GR (62), R_GR (63), R_GR (64), R_GR (65),	   \
+  R_GR (66), R_GR (67), R_GR (68), R_GR (69), R_GR (70), R_GR (71),	   \
+  R_GR (72), R_GR (73), R_GR (74), R_GR (75), R_GR (76), R_GR (77),	   \
+  R_GR (78), R_GR (79), R_GR (80), R_GR (81), R_GR (82), R_GR (83),	   \
+  R_GR (84), R_GR (85), R_GR (86), R_GR (87), R_GR (88), R_GR (89),	   \
+  R_GR (90), R_GR (91), R_GR (92), R_GR (93), R_GR (94), R_GR (95),	   \
+  R_GR (96), R_GR (97), R_GR (98), R_GR (99), R_GR (100), R_GR (101),	   \
+  R_GR (102), R_GR (103), R_GR (104), R_GR (105), R_GR (106), R_GR (107),  \
+  R_GR (108),								   \
+  /* Input registers.  */						   \
+  R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117),  \
+  R_GR (118), R_GR (119),						   \
+  /* Callee-saved general registers.  */				   \
+  R_GR (4), R_GR (5), R_GR (6), R_GR (7),				   \
+  /* Callee-saved FP registers.  */					   \
+  R_FR (2), R_FR (3), R_FR (4), R_FR (5), R_FR (16), R_FR (17),		   \
+  R_FR (18), R_FR (19), R_FR (20), R_FR (21), R_FR (22), R_FR (23),	   \
+  R_FR (24), R_FR (25), R_FR (26), R_FR (27), R_FR (28), R_FR (29),	   \
+  R_FR (30), R_FR (31),							   \
+  /* Callee-saved predicate registers.  */				   \
+  R_PR (1), R_PR (2), R_PR (3), R_PR (4), R_PR (5),			   \
+  /* Callee-saved branch registers.  */					   \
+  R_BR (1), R_BR (2), R_BR (3), R_BR (4), R_BR (5),			   \
+									   \
+  /* ??? Stacked registers reserved for fp, rp, and ar.pfs.  */		   \
+  R_GR (109), R_GR (110), R_GR (111),					   \
+									   \
+  /* Special general registers.  */					   \
+  R_GR (0), R_GR (1), R_GR (12), R_GR (13),				   \
+  /* Special FP registers.  */						   \
+  R_FR (0), R_FR (1),							   \
+  /* Special predicate registers.  */					   \
+  R_PR (0),								   \
+  /* Special branch registers.  */					   \
+  R_BR (0),								   \
+  /* Other fixed registers.  */						   \
+  FRAME_POINTER_REGNUM, 						   \
+  AR_CCV_REGNUM, AR_UNAT_REGNUM, AR_PFS_REGNUM, AR_LC_REGNUM,		   \
+  AR_EC_REGNUM		  						   \
+}
+
+/* How Values Fit in Registers */
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.  */
+
+/* ??? We say that BImode PR values require two registers.  This allows us to
+   easily store the normal and inverted values.  We use CCImode to indicate
+   a single predicate register.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  ((REGNO) == PR_REG (0) && (MODE) == DImode ? 64			\
+   : PR_REGNO_P (REGNO) && (MODE) == BImode ? 2				\
+   : (PR_REGNO_P (REGNO) || GR_REGNO_P (REGNO)) && (MODE) == CCImode ? 1\
+   : FR_REGNO_P (REGNO) && (MODE) == XFmode ? 1				\
+   : FR_REGNO_P (REGNO) && (MODE) == RFmode ? 1				\
+   : FR_REGNO_P (REGNO) && (MODE) == XCmode ? 2				\
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)				\
+  (FR_REGNO_P (REGNO) ?						\
+     GET_MODE_CLASS (MODE) != MODE_CC &&			\
+     (MODE) != BImode &&					\
+     (MODE) != TFmode 						\
+   : PR_REGNO_P (REGNO) ?					\
+     (MODE) == BImode || GET_MODE_CLASS (MODE) == MODE_CC	\
+   : GR_REGNO_P (REGNO) ?					\
+     (MODE) != XFmode && (MODE) != XCmode && (MODE) != RFmode	\
+   : AR_REGNO_P (REGNO) ? (MODE) == DImode			\
+   : BR_REGNO_P (REGNO) ? (MODE) == DImode			\
+   : 0)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+/* Don't tie integer and FP modes, as that causes us to get integer registers
+   allocated for FP instructions.  XFmode only supported in FP registers so
+   we can't tie it with any other modes.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)	\
+   && ((((MODE1) == XFmode) || ((MODE1) == XCmode) || ((MODE1) == RFmode))	\
+       == (((MODE2) == XFmode) || ((MODE2) == XCmode) || ((MODE2) == RFmode)))	\
+   && (((MODE1) == BImode) == ((MODE2) == BImode)))
+
+/* Specify the modes required to caller save a given hard regno.
+   We need to ensure floating pt regs are not saved as DImode.  */
+
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  ((FR_REGNO_P (REGNO) && (NREGS) == 1) ? RFmode        \
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
+/* Handling Leaf Functions */
+
+/* A C initializer for a vector, indexed by hard register number, which
+   contains 1 for a register that is allowable in a candidate for leaf function
+   treatment.  */
+/* ??? This might be useful.  */
+/* #define LEAF_REGISTERS */
+
+/* A C expression whose value is the register number to which REGNO should be
+   renumbered, when a function is treated as a leaf function.  */
+/* ??? This might be useful.  */
+/* #define LEAF_REG_REMAP(REGNO) */
+
+
+/* Register Classes */
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there
+   are.  */
+/* ??? When compiling without optimization, it is possible for the only use of
+   a pseudo to be a parameter load from the stack with a REG_EQUIV note.
+   Regclass handles this case specially and does not assign any costs to the
+   pseudo.  The pseudo then ends up using the last class before ALL_REGS.
+   Thus we must not let either PR_REGS or BR_REGS be the last class.  The
+   testcase for this is gcc.c-torture/execute/va-arg-7.c.  */
+enum reg_class
+{
+  NO_REGS,
+  PR_REGS,
+  BR_REGS,
+  AR_M_REGS,
+  AR_I_REGS,
+  ADDL_REGS,
+  GR_REGS,
+  FP_REGS,
+  FR_REGS,
+  GR_AND_BR_REGS,
+  GR_AND_FR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS GR_REGS
+
+/* The number of distinct register classes.  */
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "PR_REGS", "BR_REGS", "AR_M_REGS", "AR_I_REGS", \
+  "ADDL_REGS", "GR_REGS", "FP_REGS", "FR_REGS", \
+  "GR_AND_BR_REGS", "GR_AND_FR_REGS", "ALL_REGS" }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.  */
+#define REG_CLASS_CONTENTS \
+{ 							\
+  /* NO_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* PR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x0000 },			\
+  /* BR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00FF },			\
+  /* AR_M_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0600 },			\
+  /* AR_I_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x3800 },			\
+  /* ADDL_REGS.  */					\
+  { 0x0000000F, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* GR_REGS.  */					\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0100 },			\
+  /* FP_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* FR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* GR_AND_BR_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x01FF },			\
+  /* GR_AND_FR_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x0100 },			\
+  /* ALL_REGS.  */					\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF },			\
+}
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+/* The NO_REGS case is primarily for the benefit of rws_access_reg, which
+   may call here with private (invalid) register numbers, such as
+   REG_VOLATILE.  */
+#define REGNO_REG_CLASS(REGNO) \
+(ADDL_REGNO_P (REGNO) ? ADDL_REGS	\
+ : GENERAL_REGNO_P (REGNO) ? GR_REGS	\
+ : FR_REGNO_P (REGNO) ? (REGNO) != R_FR (31) \
+			&& (REGNO) != R_FR(127) ? FP_REGS : FR_REGS \
+ : PR_REGNO_P (REGNO) ? PR_REGS		\
+ : BR_REGNO_P (REGNO) ? BR_REGS		\
+ : AR_M_REGNO_P (REGNO) ? AR_M_REGS	\
+ : AR_I_REGNO_P (REGNO) ? AR_I_REGS	\
+ : NO_REGS)
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  This is needed for POST_MODIFY.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard reg.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  (GENERAL_REGNO_P (REGNO) || GENERAL_REGNO_P (reg_renumber[REGNO]))
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard reg.
+   This is needed for POST_MODIFY.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM)
+
+/* You should define this macro to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   CLASS in MODE requires an intermediate register, you should define this
+   to return the largest register class all of whose registers can be used
+   as intermediate registers or scratch registers.  */
+
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+ ia64_secondary_reload_class (CLASS, MODE, X)
+
+/* Certain machines have the property that some registers cannot be copied to
+   some other registers without using memory.  Define this macro on those
+   machines to be a C expression that is nonzero if objects of mode M in
+   registers of CLASS1 can only be copied to registers of class CLASS2 by
+   storing a register of CLASS1 into memory and loading that memory location
+   into a register of CLASS2.  */
+
+#if 0
+/* ??? May need this, but since we've disallowed XFmode in GR_REGS,
+   I'm not quite sure how it could be invoked.  The normal problems
+   with unions should be solved with the addressof fiddling done by
+   movxf and friends.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)			\
+  (((MODE) == XFmode || (MODE) == XCmode)				\
+   && (((CLASS1) == GR_REGS && (CLASS2) == FR_REGS)			\
+       || ((CLASS1) == FR_REGS && (CLASS2) == GR_REGS)))
+#endif
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+   This is closely related to the macro `HARD_REGNO_NREGS'.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((MODE) == BImode && (CLASS) == PR_REGS ? 2			\
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == XFmode) ? 1 \
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == RFmode) ? 1 \
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == XCmode) ? 2 \
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* In BR regs, we can't change the DImode at all.
+   In FP regs, we can't change FP values to integer values and vice versa,
+   but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 		\
+  (reg_classes_intersect_p (CLASS, BR_REGS)			\
+   ? (FROM) != (TO)						\
+   : (SCALAR_FLOAT_MODE_P (FROM) != SCALAR_FLOAT_MODE_P (TO)	\
+      ? reg_classes_intersect_p (CLASS, FR_REGS)		\
+      : 0))
+
+/* Basic Stack Layout */
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this macro to nonzero if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 0
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.  */
+/* IA64 has a 16 byte scratch area that is at the bottom of the stack.  */
+#define STACK_POINTER_OFFSET 16
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  */
+
+/* ??? Frames other than zero would likely require interpreting the frame
+   unwind info, so we don't try to support them.  We would also need to define
+   DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush).  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+  ia64_return_addr_rtx (COUNT, FRAME)
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.  This enables DWARF2
+   unwind info for C++ EH.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, BR_REG (0))
+
+/* A C expression whose value is an integer giving the offset, in bytes, from
+   the value of the stack pointer register to the top of the stack frame at the
+   beginning of any function, before the prologue.  The top of the frame is
+   defined to be the value of the stack pointer in the previous frame, just
+   before the call instruction.  */
+/* The CFA is past the red zone, not at the entry-point stack
+   pointer.  */
+#define INCOMING_FRAME_SP_OFFSET STACK_POINTER_OFFSET
+
+/* We shorten debug info by using CFA-16 as DW_AT_frame_base.  */
+#define CFA_FRAME_BASE_OFFSET(FUNDECL) (-INCOMING_FRAME_SP_OFFSET)
+
+
+/* Register That Address the Stack Frame.  */
+
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+
+#define STACK_POINTER_REGNUM 12
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+
+#define FRAME_POINTER_REGNUM 328
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM  LOC_REG (79)
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  */
+/* r0 won't otherwise be used, so put the always eliminated argument pointer
+   in it.  */
+#define ARG_POINTER_REGNUM R_GR(0)
+
+/* Due to the way varargs and argument spilling happens, the argument
+   pointer is not 16-byte aligned like the stack pointer.  */
+#define INIT_EXPANDERS					\
+  do {							\
+    ia64_init_expanders ();                             \
+    if (crtl->emit.regno_pointer_align)	\
+      REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = 64;	\
+  } while (0)
+
+/* Register numbers used for passing a function's static chain pointer.  */
+/* ??? The ABI sez the static chain should be passed as a normal parameter.  */
+#define STATIC_CHAIN_REGNUM 15
+
+/* Eliminating the Frame Pointer and the Arg Pointer */
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},			\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},			\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = ia64_initial_elimination_offset ((FROM), (TO)))
+
+/* Passing Function Arguments on the Stack */
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `crtl->outgoing_args_size'.  */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Function Arguments in Registers */
+
+#define MAX_ARGUMENT_SLOTS 8
+#define MAX_INT_RETURN_SLOTS 4
+#define GR_ARG_FIRST IN_REG (0)
+#define GR_RET_FIRST GR_REG (8)
+#define GR_RET_LAST  GR_REG (11)
+#define FR_ARG_FIRST FR_REG (8)
+#define FR_RET_FIRST FR_REG (8)
+#define FR_RET_LAST  FR_REG (15)
+#define AR_ARG_FIRST OUT_REG (0)
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.  */
+
+enum ivms_arg_type {I64, FF, FD, FG, FS, FT};
+/* VMS floating point formats VAX F, VAX D, VAX G, IEEE S, IEEE T.  */
+
+typedef struct ia64_args
+{
+  int words;			/* # words of arguments so far  */
+  int int_regs;			/* # GR registers used so far  */
+  int fp_regs;			/* # FR registers used so far  */
+  int prototype;		/* whether function prototyped  */
+  enum ivms_arg_type atypes[8]; /* which VMS float type or if not float */
+} CUMULATIVE_ARGS;
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+do {									\
+  (CUM).words = 0;							\
+  (CUM).int_regs = 0;							\
+  (CUM).fp_regs = 0;							\
+  (CUM).prototype = ((FNTYPE) && prototype_p (FNTYPE)) || (LIBNAME);	\
+  (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64;	        \
+  (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64;            \
+  (CUM).atypes[6] = (CUM).atypes[7] = I64;                              \
+} while (0)
+
+/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the
+   arguments for the function being compiled.  If this macro is undefined,
+   `INIT_CUMULATIVE_ARGS' is used instead.  */
+
+/* We set prototype to true so that we never try to return a PARALLEL from
+   function_arg.  */
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+do {									\
+  (CUM).words = 0;							\
+  (CUM).int_regs = 0;							\
+  (CUM).fp_regs = 0;							\
+  (CUM).prototype = 1;							\
+  (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64;	        \
+  (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64;            \
+  (CUM).atypes[6] = (CUM).atypes[7] = I64;                              \
+} while (0)
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+(((REGNO) >= AR_ARG_FIRST && (REGNO) < (AR_ARG_FIRST + MAX_ARGUMENT_SLOTS)) \
+ || ((REGNO) >= FR_ARG_FIRST && (REGNO) < (FR_ARG_FIRST + MAX_ARGUMENT_SLOTS)))
+
+
+/* How Large Values are Returned */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+
+/* Caller-Saves Register Allocation */
+
+/* A C expression to determine whether it is worthwhile to consider placing a
+   pseudo-register in a call-clobbered hard register and saving and restoring
+   it around each function call.  The expression should be 1 when this is worth
+   doing, and 0 otherwise.
+
+   If you don't define this macro, a default is used which is good on most
+   machines: `4 * CALLS < REFS'.  */
+/* ??? Investigate.  */
+/* #define CALLER_SAVE_PROFITABLE(REFS, CALLS) */
+
+
+/* Function Entry and Exit */
+
+/* Define this macro as a C expression that is nonzero if the return
+   instruction or the function epilogue ignores the value of the stack pointer;
+   in other words, if it is safe to delete an instruction to adjust the stack
+   pointer before a return from the function.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define this macro as a C expression that is nonzero for registers
+   used by the epilogue or the `return' pattern.  */
+
+#define EPILOGUE_USES(REGNO) ia64_epilogue_uses (REGNO)
+
+/* Nonzero for registers used by the exception handling mechanism.  */
+
+#define EH_USES(REGNO) ia64_eh_uses (REGNO)
+
+/* Output part N of a function descriptor for DECL.  For ia64, both
+   words are emitted with a single relocation, so ignore N > 0.  */
+#define ASM_OUTPUT_FDESC(FILE, DECL, PART)				\
+do {									\
+  if ((PART) == 0)							\
+    {									\
+      if (TARGET_ILP32)							\
+        fputs ("\tdata8.ua @iplt(", FILE);				\
+      else								\
+        fputs ("\tdata16.ua @iplt(", FILE);				\
+      mark_decl_referenced (DECL);					\
+      assemble_name (FILE, XSTR (XEXP (DECL_RTL (DECL), 0), 0));	\
+      fputs (")\n", FILE);						\
+      if (TARGET_ILP32)							\
+	fputs ("\tdata8.ua 0\n", FILE);					\
+    }									\
+} while (0)
+
+/* Generating Code for Profiling.  */
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  ia64_output_function_profiler(FILE, LABELNO)
+
+/* Neither hpux nor linux use profile counters.  */
+#define NO_PROFILE_COUNTERS 1
+
+/* Trampolines for Nested Functions.  */
+
+/* We need 32 bytes, so we can save the sp, ar.rnat, ar.bsp, and ar.pfs of
+   the function containing a non-local goto target.  */
+
+#define STACK_SAVEAREA_MODE(LEVEL) \
+  ((LEVEL) == SAVE_NONLOCAL ? OImode : Pmode)
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+
+#define TRAMPOLINE_SIZE		32
+
+/* Alignment required for trampolines, in bits.  */
+
+#define TRAMPOLINE_ALIGNMENT	64
+
+/* Addressing Modes */
+
+/* Define this macro if the machine supports post-increment addressing.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_REG 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a valid
+   address.  */
+
+#define CONSTANT_ADDRESS_P(X) 0
+
+/* The max number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* Condition Code Status */
+
+/* One some machines not all possible comparisons are defined, but you can
+   convert an invalid comparison into a valid one.  */
+/* ??? Investigate.  See the alpha definition.  */
+/* #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) */
+
+
+/* Describing Relative Costs of Operations */
+
+/* A C expression for the cost of a branch instruction.  A value of 1 is the
+   default; other values are interpreted relative to that.  Used by the
+   if-conversion code as max instruction count.  */
+/* ??? This requires investigation.  The primary effect might be how
+   many additional insn groups we run into, vs how good the dynamic
+   branch predictor is.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 6
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory.  */
+
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.
+
+   Indirect function calls are more expensive that direct function calls, so
+   don't cse function addresses.  */
+
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the output into sections.  */
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* If defined, a C expression whose value is a string containing the assembler
+   operation to identify the following data as uninitialized global data.  */
+
+#define BSS_SECTION_ASM_OP "\t.bss"
+
+#define IA64_DEFAULT_GVALUE 8
+
+/* Position Independent Code.  */
+
+/* The register number of the register used to address a table of static data
+   addresses in memory.  */
+
+/* ??? Should modify ia64.md to use pic_offset_table_rtx instead of
+   gen_rtx_REG (DImode, 1).  */
+
+/* ??? Should we set flag_pic?  Probably need to define
+   LEGITIMIZE_PIC_OPERAND_P to make that work.  */
+
+#define PIC_OFFSET_TABLE_REGNUM GR_REG (1)
+
+/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' is
+   clobbered by calls.  */
+
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED 1
+
+
+/* The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+
+#define ASM_COMMENT_START "//"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  */
+
+#define ASM_APP_ON (TARGET_GNU_AS ? "#APP\n" : "//APP\n")
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  */
+
+#define ASM_APP_OFF (TARGET_GNU_AS ? "#NO_APP\n" : "//NO_APP\n")
+
+/* Output and Generation of Labels.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a label named NAME.  */
+
+/* See the ASM_OUTPUT_LABELREF definition in sysv4.h for an explanation of
+   why ia64_asm_output_label exists.  */
+
+extern int ia64_asm_output_label;
+#define ASM_OUTPUT_LABEL(STREAM, NAME)					\
+do {									\
+  ia64_asm_output_label = 1;						\
+  assemble_name (STREAM, NAME);						\
+  fputs (":\n", STREAM);						\
+  ia64_asm_output_label = 0;						\
+} while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM any text
+   necessary for declaring the name of an external symbol named NAME which is
+   referenced in this compilation but not defined.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  ia64_asm_output_external (FILE, DECL, NAME)
+
+/* A C statement to store into the string STRING a label whose name is made
+   from the string PREFIX and the number NUM.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+do {									\
+  sprintf (LABEL, "*.%s%d", PREFIX, NUM);				\
+} while (0)
+
+/* ??? Not sure if using a ? in the name for Intel as is safe.  */
+
+#define ASM_PN_FORMAT (TARGET_GNU_AS ? "%s.%lu" : "%s?%lu")
+
+/* A C statement to output to the stdio stream STREAM assembler code which
+   defines (equates) the symbol NAME to have the value VALUE.  */
+
+#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \
+do {									\
+  assemble_name (STREAM, NAME);						\
+  fputs (" = ", STREAM);						\
+  if (ISDIGIT (*VALUE))							\
+    ia64_asm_output_label = 1;						\
+  assemble_name (STREAM, VALUE);					\
+  fputc ('\n', STREAM);							\
+  ia64_asm_output_label = 0;						\
+} while (0)
+
+
+/* Macros Controlling Initialization Routines.  */
+
+/* This is handled by sysv4.h.  */
+
+
+/* Output of Assembler Instructions.  */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  */
+
+#define REGISTER_NAMES \
+{									\
+  /* General registers.  */						\
+  "ap", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",		\
+  "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",	\
+  "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29",	\
+  "r30", "r31",								\
+  /* Local registers.  */						\
+  "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",	\
+  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",	\
+  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",	\
+  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",	\
+  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",	\
+  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",	\
+  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",	\
+  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",	\
+  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",	\
+  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79",	\
+  /* Input registers.  */						\
+  "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7",	\
+  /* Output registers.  */						\
+  "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",	\
+  /* Floating-point registers.  */					\
+  "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9",		\
+  "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19",	\
+  "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29",	\
+  "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39",	\
+  "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", "f48", "f49",	\
+  "f50", "f51", "f52", "f53", "f54", "f55", "f56", "f57", "f58", "f59",	\
+  "f60", "f61", "f62", "f63", "f64", "f65", "f66", "f67", "f68", "f69",	\
+  "f70", "f71", "f72", "f73", "f74", "f75", "f76", "f77", "f78", "f79",	\
+  "f80", "f81", "f82", "f83", "f84", "f85", "f86", "f87", "f88", "f89",	\
+  "f90", "f91", "f92", "f93", "f94", "f95", "f96", "f97", "f98", "f99",	\
+  "f100","f101","f102","f103","f104","f105","f106","f107","f108","f109",\
+  "f110","f111","f112","f113","f114","f115","f116","f117","f118","f119",\
+  "f120","f121","f122","f123","f124","f125","f126","f127",		\
+  /* Predicate registers.  */						\
+  "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9",		\
+  "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19",	\
+  "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29",	\
+  "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39",	\
+  "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49",	\
+  "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59",	\
+  "p60", "p61", "p62", "p63",						\
+  /* Branch registers.  */						\
+  "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",			\
+  /* Frame pointer.  Application registers.  */				\
+  "sfp", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec",	\
+}
+
+/* If defined, a C initializer for an array of structures containing a name and
+   a register number.  This macro defines additional names for hard registers,
+   thus allowing the `asm' option in declarations to refer to registers using
+   alternate names.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{									\
+  { "gp", R_GR (1) },							\
+  { "sp", R_GR (12) },							\
+  { "in0", IN_REG (0) },						\
+  { "in1", IN_REG (1) },						\
+  { "in2", IN_REG (2) },						\
+  { "in3", IN_REG (3) },						\
+  { "in4", IN_REG (4) },						\
+  { "in5", IN_REG (5) },						\
+  { "in6", IN_REG (6) },						\
+  { "in7", IN_REG (7) },						\
+  { "out0", OUT_REG (0) },						\
+  { "out1", OUT_REG (1) },						\
+  { "out2", OUT_REG (2) },						\
+  { "out3", OUT_REG (3) },						\
+  { "out4", OUT_REG (4) },						\
+  { "out5", OUT_REG (5) },						\
+  { "out6", OUT_REG (6) },						\
+  { "out7", OUT_REG (7) },						\
+  { "loc0", LOC_REG (0) },						\
+  { "loc1", LOC_REG (1) },						\
+  { "loc2", LOC_REG (2) },						\
+  { "loc3", LOC_REG (3) },						\
+  { "loc4", LOC_REG (4) },						\
+  { "loc5", LOC_REG (5) },						\
+  { "loc6", LOC_REG (6) },						\
+  { "loc7", LOC_REG (7) },						\
+  { "loc8", LOC_REG (8) }, 						\
+  { "loc9", LOC_REG (9) }, 						\
+  { "loc10", LOC_REG (10) }, 						\
+  { "loc11", LOC_REG (11) }, 						\
+  { "loc12", LOC_REG (12) }, 						\
+  { "loc13", LOC_REG (13) }, 						\
+  { "loc14", LOC_REG (14) }, 						\
+  { "loc15", LOC_REG (15) }, 						\
+  { "loc16", LOC_REG (16) }, 						\
+  { "loc17", LOC_REG (17) }, 						\
+  { "loc18", LOC_REG (18) }, 						\
+  { "loc19", LOC_REG (19) }, 						\
+  { "loc20", LOC_REG (20) }, 						\
+  { "loc21", LOC_REG (21) }, 						\
+  { "loc22", LOC_REG (22) }, 						\
+  { "loc23", LOC_REG (23) }, 						\
+  { "loc24", LOC_REG (24) }, 						\
+  { "loc25", LOC_REG (25) }, 						\
+  { "loc26", LOC_REG (26) }, 						\
+  { "loc27", LOC_REG (27) }, 						\
+  { "loc28", LOC_REG (28) }, 						\
+  { "loc29", LOC_REG (29) }, 						\
+  { "loc30", LOC_REG (30) }, 						\
+  { "loc31", LOC_REG (31) }, 						\
+  { "loc32", LOC_REG (32) }, 						\
+  { "loc33", LOC_REG (33) }, 						\
+  { "loc34", LOC_REG (34) }, 						\
+  { "loc35", LOC_REG (35) }, 						\
+  { "loc36", LOC_REG (36) }, 						\
+  { "loc37", LOC_REG (37) }, 						\
+  { "loc38", LOC_REG (38) }, 						\
+  { "loc39", LOC_REG (39) }, 						\
+  { "loc40", LOC_REG (40) }, 						\
+  { "loc41", LOC_REG (41) }, 						\
+  { "loc42", LOC_REG (42) }, 						\
+  { "loc43", LOC_REG (43) }, 						\
+  { "loc44", LOC_REG (44) }, 						\
+  { "loc45", LOC_REG (45) }, 						\
+  { "loc46", LOC_REG (46) }, 						\
+  { "loc47", LOC_REG (47) }, 						\
+  { "loc48", LOC_REG (48) }, 						\
+  { "loc49", LOC_REG (49) }, 						\
+  { "loc50", LOC_REG (50) }, 						\
+  { "loc51", LOC_REG (51) }, 						\
+  { "loc52", LOC_REG (52) }, 						\
+  { "loc53", LOC_REG (53) }, 						\
+  { "loc54", LOC_REG (54) }, 						\
+  { "loc55", LOC_REG (55) }, 						\
+  { "loc56", LOC_REG (56) }, 						\
+  { "loc57", LOC_REG (57) }, 						\
+  { "loc58", LOC_REG (58) }, 						\
+  { "loc59", LOC_REG (59) }, 						\
+  { "loc60", LOC_REG (60) }, 						\
+  { "loc61", LOC_REG (61) }, 						\
+  { "loc62", LOC_REG (62) }, 						\
+  { "loc63", LOC_REG (63) }, 						\
+  { "loc64", LOC_REG (64) }, 						\
+  { "loc65", LOC_REG (65) }, 						\
+  { "loc66", LOC_REG (66) }, 						\
+  { "loc67", LOC_REG (67) }, 						\
+  { "loc68", LOC_REG (68) }, 						\
+  { "loc69", LOC_REG (69) }, 						\
+  { "loc70", LOC_REG (70) }, 						\
+  { "loc71", LOC_REG (71) }, 						\
+  { "loc72", LOC_REG (72) }, 						\
+  { "loc73", LOC_REG (73) }, 						\
+  { "loc74", LOC_REG (74) }, 						\
+  { "loc75", LOC_REG (75) }, 						\
+  { "loc76", LOC_REG (76) }, 						\
+  { "loc77", LOC_REG (77) }, 						\
+  { "loc78", LOC_REG (78) }, 						\
+  { "loc79", LOC_REG (79) }, 						\
+}
+
+/* If defined, C string expressions to be used for the `%R', `%L', `%U', and
+   `%I' options of `asm_fprintf' (see `final.c').  */
+
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX ""
+
+
+/* Output of dispatch tables.  */
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.  */
+
+/* ??? Depends on the pointer size.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)	\
+  do {								\
+  if (CASE_VECTOR_MODE == SImode)				\
+    fprintf (STREAM, "\tdata4 @pcrel(.L%d)\n", VALUE);		\
+  else								\
+    fprintf (STREAM, "\tdata8 @pcrel(.L%d)\n", VALUE);		\
+  } while (0)
+
+/* Jump tables only need 4 or 8 byte alignment.  */
+
+#define ADDR_VEC_ALIGN(ADDR_VEC) (CASE_VECTOR_MODE == SImode ? 2 : 3)
+
+
+/* Assembler Commands for Exception Regions.  */
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)	\
+  (((CODE) == 1 ? DW_EH_PE_textrel : DW_EH_PE_datarel)	\
+   | ((GLOBAL) ? DW_EH_PE_indirect : 0)			\
+   | (TARGET_ILP32 ? DW_EH_PE_udata4 : DW_EH_PE_udata8))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    const char *reltag = NULL;						\
+    if (((ENCODING) & 0xF0) == DW_EH_PE_textrel)			\
+      reltag = "@segrel(";						\
+    else if (((ENCODING) & 0xF0) == DW_EH_PE_datarel)			\
+      reltag = "@gprel(";						\
+    if (reltag)								\
+      {									\
+	fputs (integer_asm_op (SIZE, FALSE), FILE);			\
+	fputs (reltag, FILE);						\
+	assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputc (')', FILE);						\
+	goto DONE;							\
+      }									\
+  } while (0)
+
+
+/* Assembler Commands for Alignment.  */
+
+/* ??? Investigate.  */
+
+/* The alignment (log base 2) to put in front of LABEL, which follows
+   a BARRIER.  */
+
+/* #define LABEL_ALIGN_AFTER_BARRIER(LABEL) */
+
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+
+/* #define LOOP_ALIGN(LABEL) */
+
+/* Define this macro if `ASM_OUTPUT_SKIP' should not be used in the text
+   section because it fails put zeros in the bytes that are skipped.  */
+
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf (STREAM, "\t.align %d\n", 1<<(POWER))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+/* This is handled in sysv4.h.  */
+
+
+/* Specific Options for DBX Output.  */
+
+/* This is handled by dbxelf.h.  */
+
+
+/* Open ended Hooks for DBX Output.  */
+
+/* Likewise.  */
+
+
+/* File names in DBX format.  */
+
+/* Likewise.  */
+
+
+/* Macros for SDB and Dwarf Output.  */
+
+/* Define this macro if GCC should produce dwarf version 2 format debugging
+   output in response to the `-g' option.  */
+
+#define DWARF2_DEBUGGING_INFO 1
+
+#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DWARF2_ASM)
+
+/* Use tags for debug info labels, so that they don't break instruction
+   bundles.  This also avoids getting spurious DV warnings from the
+   assembler.  This is similar to (*targetm.asm_out.internal_label), except that we
+   add brackets around the label.  */
+
+#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \
+  fprintf (FILE, TARGET_GNU_AS ? "[.%s%d:]\n" : ".%s%d:\n", PREFIX, NUM)
+
+/* Use section-relative relocations for debugging offsets.  Unlike other
+   targets that fake this by putting the section VMA at 0, IA-64 has
+   proper relocations for them.  */
+#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL, SECTION)	\
+  do {								\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);			\
+    fputs ("@secrel(", FILE);					\
+    assemble_name (FILE, LABEL);				\
+    fputc (')', FILE);						\
+  } while (0)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    fputs ("@pcrel(", FILE);				\
+    assemble_name (FILE, LABEL);			\
+    fputc (')', FILE);					\
+  } while (0)
+
+/* Register Renaming Parameters.  */
+
+/* A C expression that is nonzero if hard register number REGNO2 can be
+   considered for use as a rename register for REGNO1 */
+
+#define HARD_REGNO_RENAME_OK(REGNO1,REGNO2) \
+  ia64_hard_regno_rename_ok((REGNO1), (REGNO2))
+
+
+/* Miscellaneous Parameters.  */
+
+/* Flag to mark data that is in the small address area (addressable
+   via "addl", that is, within a 2MByte offset of 0.  */
+#define SYMBOL_FLAG_SMALL_ADDR		(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_SMALL_ADDR_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SMALL_ADDR) != 0)
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+
+#define CASE_VECTOR_MODE ptr_mode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.  */
+
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this macro if operations between registers with integral mode smaller
+   than a word are always performed on the entire register.  */
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Define this macro to be a C expression indicating when insns that read
+   memory in MODE, an integral mode narrower than a word, set the bits outside
+   of MODE to be either the sign-extension or the zero-extension of the data
+   read.  */
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.  */
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A C expression describing the value returned by a comparison operator with
+   an integral mode and stored by a store-flag instruction (`sCOND') when the
+   condition is true.  */
+
+/* ??? Investigate using STORE_FLAG_VALUE of -1 instead of 1.  */
+
+/* An alias for the machine mode for pointers.  */
+
+/* ??? This would change if we had ILP32 support.  */
+
+#define Pmode DImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  */
+
+#define FUNCTION_MODE Pmode
+
+/* A C expression for the maximum number of instructions to execute via
+   conditional execution instructions instead of a branch.  A value of
+   BRANCH_COST+1 is the default if the machine does not use
+   cc0, and 1 if it does use cc0.  */
+/* ??? Investigate.  */
+#define MAX_CONDITIONAL_EXECUTE 12
+
+extern int ia64_final_schedule;
+
+#define TARGET_UNWIND_TABLES_DEFAULT true
+
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 15 : INVALID_REGNUM)
+
+/* This function contains machine specific function data.  */
+struct GTY(()) machine_function
+{
+  /* The new stack pointer when unwinding from EH.  */
+  rtx ia64_eh_epilogue_sp;
+
+  /* The new bsp value when unwinding from EH.  */
+  rtx ia64_eh_epilogue_bsp;
+
+  /* The GP value save register.  */
+  rtx ia64_gp_save;
+
+  /* The number of varargs registers to save.  */
+  int n_varargs;
+
+  /* The number of the next unwind state to copy.  */
+  int state_num;
+};
+
+#define DONT_USE_BUILTIN_SETJMP
+
+/* Output any profiling code before the prologue.  */
+
+#undef  PROFILE_BEFORE_PROLOGUE
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Initialize library function table. */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_init_libfuncs
+
+
+/* Switch on code for querying unit reservations.  */
+#define CPU_UNITS_QUERY 1
+
+/* End of ia64.h */
diff --git a/gcc-4.9/gcc/config/ia64/ia64.md b/gcc-4.9/gcc/config/ia64/ia64.md
new file mode 100644
index 000000000..5fedc9214
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64.md
@@ -0,0 +1,5242 @@
+;; IA-64 Machine description template
+;; Copyright (C) 1999-2014 Free Software Foundation, Inc.
+;; Contributed by James E. Wilson <wilson@cygnus.com> and
+;;		  David Mosberger <davidm@hpl.hp.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; ??? register_operand accepts (subreg:DI (mem:SI X)) which forces later
+;; reload.  This will be fixed once scheduling support is turned on.
+
+;; ??? Optimize for post-increment addressing modes.
+
+;; ??? fselect is not supported, because there is no integer register
+;; equivalent.
+
+;; ??? fp abs/min/max instructions may also work for integer values.
+
+;; ??? Would a predicate_reg_operand predicate be useful?  The HP one is buggy,
+;; it assumes the operand is a register and takes REGNO of it without checking.
+
+;; ??? Would a branch_reg_operand predicate be useful?  The HP one is buggy,
+;; it assumes the operand is a register and takes REGNO of it without checking.
+
+;; ??? Go through list of documented named patterns and look for more to
+;; implement.
+
+;; ??? Go through instruction manual and look for more instructions that
+;; can be emitted.
+
+;; ??? Add function unit scheduling info for Itanium (TM) processor.
+
+;; ??? Need a better way to describe alternate fp status registers.
+
+(define_c_enum "unspec"
+  [; Relocations
+   UNSPEC_LTOFF_DTPMOD
+   UNSPEC_LTOFF_DTPREL
+   UNSPEC_DTPREL
+   UNSPEC_LTOFF_TPREL
+   UNSPEC_TPREL
+   UNSPEC_DTPMOD
+
+   UNSPEC_LD_BASE
+   UNSPEC_GR_SPILL
+   UNSPEC_GR_RESTORE
+   UNSPEC_FR_SPILL
+   UNSPEC_FR_RESTORE
+   UNSPEC_FR_RECIP_APPROX
+   UNSPEC_PRED_REL_MUTEX
+   UNSPEC_GETF_EXP
+   UNSPEC_PIC_CALL
+   UNSPEC_MF
+   UNSPEC_CMPXCHG_ACQ
+   UNSPEC_CMPXCHG_REL
+   UNSPEC_FETCHADD_ACQ
+   UNSPEC_FETCHADD_REL
+   UNSPEC_BSP_VALUE
+   UNSPEC_FLUSHRS
+   UNSPEC_BUNDLE_SELECTOR
+   UNSPEC_ADDP4
+   UNSPEC_PROLOGUE_USE
+   UNSPEC_RET_ADDR
+   UNSPEC_SETF_EXP
+   UNSPEC_FR_SQRT_RECIP_APPROX
+   UNSPEC_SHRP
+   UNSPEC_COPYSIGN
+   UNSPEC_VECT_EXTR
+   UNSPEC_LDA
+   UNSPEC_LDS
+   UNSPEC_LDS_A
+   UNSPEC_LDSA
+   UNSPEC_LDCCLR
+   UNSPEC_LDCNC
+   UNSPEC_CHKACLR
+   UNSPEC_CHKANC
+   UNSPEC_CHKS
+   UNSPEC_FR_RECIP_APPROX_RES
+   UNSPEC_FR_SQRT_RECIP_APPROX_RES
+  ])
+
+(define_c_enum "unspecv" [
+   UNSPECV_ALLOC
+   UNSPECV_BLOCKAGE
+   UNSPECV_INSN_GROUP_BARRIER
+   UNSPECV_BREAK
+   UNSPECV_SET_BSP
+   UNSPECV_PSAC_ALL		; pred.safe_across_calls
+   UNSPECV_PSAC_NORMAL
+   UNSPECV_SETJMP_RECEIVER
+   UNSPECV_GOTO_RECEIVER
+   UNSPECV_PROBE_STACK_ADDRESS
+   UNSPECV_PROBE_STACK_RANGE
+  ])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in ia64.h.
+(define_attr "cpu" "itanium,itanium2"
+  (const (symbol_ref "((enum attr_cpu) ia64_tune)")))
+
+;; Instruction type.  This primarily determines how instructions can be
+;; packed in bundles, and secondarily affects scheduling to function units.
+
+;; A alu, can go in I or M syllable of a bundle
+;; I integer
+;; M memory
+;; F floating-point
+;; B branch
+;; L long immediate, takes two syllables
+;; S stop bit
+
+;; ??? Should not have any pattern with type unknown.  Perhaps add code to
+;; check this in md_reorg?  Currently use unknown for patterns which emit
+;; multiple instructions, patterns which emit 0 instructions, and patterns
+;; which emit instruction that can go in any slot (e.g. nop).
+
+(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,
+	fldp,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,
+	ld,chk_s_i,chk_s_f,chk_a,long_i,mmalua,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,
+        st,syst_m0, syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,
+        nop_b,nop_f,nop_i,nop_m,nop_x,lfetch,pre_cycle"
+  (const_string "unknown"))
+
+;; chk_s_i has an I and an M form; use type A for convenience.
+(define_attr "type" "unknown,A,I,M,F,B,L,X,S"
+  (cond [(eq_attr "itanium_class" "ld,st,fld,fldp,stf,sem,nop_m") (const_string "M")
+	 (eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
+	 (eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
+	 (eq_attr "itanium_class" "lfetch") (const_string "M")
+         (eq_attr "itanium_class" "chk_s_f,chk_a") (const_string "M")
+	 (eq_attr "itanium_class" "chk_s_i,ialu,icmp,ilog,mmalua")
+	   (const_string "A")
+	 (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
+	 (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
+	 (eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I")
+	 (eq_attr "itanium_class" "frpr,topr,ishf,xtd,tbit") (const_string "I")
+	 (eq_attr "itanium_class" "mmmul,mmshf,mmshfi,nop_i") (const_string "I")
+	 (eq_attr "itanium_class" "br,scall,nop_b") (const_string "B")
+	 (eq_attr "itanium_class" "stop_bit") (const_string "S")
+	 (eq_attr "itanium_class" "nop_x") (const_string "X")
+	 (eq_attr "itanium_class" "long_i") (const_string "L")]
+	(const_string "unknown")))
+
+(define_attr "itanium_requires_unit0" "no,yes"
+  (cond [(eq_attr "itanium_class" "syst_m0,sem,frfr,rse_m") (const_string "yes")
+	 (eq_attr "itanium_class" "toar_m,frar_m") (const_string "yes")
+	 (eq_attr "itanium_class" "frbr,tobr,mmmul") (const_string "yes")
+	 (eq_attr "itanium_class" "tbit,ishf,topr,frpr") (const_string "yes")
+	 (eq_attr "itanium_class" "toar_i,frar_i") (const_string "yes")
+	 (eq_attr "itanium_class" "fmisc,fcmp") (const_string "yes")]
+	(const_string "no")))
+
+;; Predication.  True iff this instruction can be predicated.
+
+(define_attr "predicable" "no,yes" (const_string "yes"))
+
+;; Empty.  True iff this insn does not generate any code.
+
+(define_attr "empty" "no,yes" (const_string "no"))
+
+;; True iff this insn must be the first insn of an instruction group.
+;; This is true for the alloc instruction, and will also be true of others
+;; when we have full intrinsics support.
+
+(define_attr "first_insn" "no,yes" (const_string "no"))
+
+(define_attr "data_speculative" "no,yes" (const_string "no"))
+
+(define_attr "control_speculative" "no,yes" (const_string "no"))
+
+(define_attr "check_load" "no,yes" (const_string "no"))
+
+(define_attr "speculable1" "no,yes" (const_string "no"))
+
+(define_attr "speculable2" "no,yes" (const_string "no"))
+
+;; DFA descriptions of ia64 processors used for insn scheduling and
+;; bundling.
+
+(automata_option "ndfa")
+
+;; Uncomment the following line to output automata for debugging.
+;; (automata_option "v")
+
+(automata_option "w")
+
+(include "itanium2.md")
+
+;; Mode iterators
+
+; Used for truncations from XFmode.
+(define_mode_iterator MODE_SDF [SF DF])
+
+(define_mode_attr suffix [
+  (SF ".s")
+  (DF ".d")
+  (XF "")
+  ])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+;; Set of a single predicate register.  This is only used to implement
+;; pr-to-pr move and complement.
+
+(define_insn "movcci"
+  [(set (match_operand:CCI 0 "destination_operand" "=c,c,?c,?*r, c,*r,*m,*r")
+	(match_operand:CCI 1 "move_operand"        " O,n, c,  c,*r,*m,*r,*r"))]
+  ""
+  "@
+   cmp.ne %0, p0 = r0, r0
+   cmp.eq %0, p0 = r0, r0
+   (%1) cmp.eq.unc %0, p0 = r0, r0
+   #
+   tbit.nz %0, p0 = %1, 0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %1%P0
+   mov %0 = %1"
+  [(set_attr "itanium_class" "icmp,icmp,icmp,unknown,tbit,ld,st,ialu")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:CCI 0 "register_operand" "")
+	(match_operand:CCI 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 2) (const_int 0))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+     (set (match_dup 2) (const_int 1)))]
+  "operands[2] = gen_rtx_REG (BImode, REGNO (operands[0]));
+   operands[3] = gen_rtx_REG (BImode, REGNO (operands[1]));")
+
+(define_insn "movbi"
+  [(set (match_operand:BI 0 "destination_operand" "=c,c,?c,?*r, c,*r,*r,*m,*r")
+	(match_operand:BI 1 "move_operand"        " O,n, c,  c,*r, n,*m,*r,*r"))]
+  ""
+  "@
+   cmp.ne %0, %I0 = r0, r0
+   cmp.eq %0, %I0 = r0, r0
+   #
+   #
+   tbit.nz %0, %I0 = %1, 0
+   adds %0 = %1, r0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %1%P0
+   mov %0 = %1"
+  [(set_attr "itanium_class" "icmp,icmp,unknown,unknown,tbit,ialu,ld,st,ialu")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no,  no,     no,     no,  no, yes,no,no")])
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(match_operand:BI 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(match_operand:BI 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 0) (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  "operands[2] = gen_rtx_REG (CCImode, REGNO (operands[0]));
+   operands[3] = gen_rtx_REG (CCImode, REGNO (operands[0]) + 1);
+   operands[4] = gen_rtx_REG (CCImode, REGNO (operands[1]));
+   operands[5] = gen_rtx_REG (CCImode, REGNO (operands[1]) + 1);")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "destination_operand" "=r,r,r, m, r,*f,*f")
+	(match_operand:QI 1 "move_operand"        "rO,J,m,rO,*f,rO,*f"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %r1
+   addl %0 = %1, r0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %r1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %r1
+   mov %0 = %1"
+  [(set_attr "itanium_class" "ialu,ialu,ld,st,frfr,tofr,fmisc")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no, yes,no,no,  no,  no")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "destination_operand" "=r,r,r, m, r,*f,*f")
+	(match_operand:HI 1 "move_operand"        "rO,J,m,rO,*f,rO,*f"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %r1
+   addl %0 = %1, r0
+   ld2%O1 %0 = %1%P1
+   st2%Q0 %0 = %r1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %r1
+   mov %0 = %1"
+  [(set_attr "itanium_class" "ialu,ialu,ld,st,frfr,tofr,fmisc")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no, yes,no,no,  no,  no")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "destination_operand" "=r,r,r,r,r, m, r,*f,*f, r,*d")
+	(match_operand:SI 1 "move_operand"        "rO,J,j,i,m,rO,*f,rO,*f,*d,rK"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+  mov %0 = %r1
+  addl %0 = %1, r0
+  addp4 %0 = %1 - 0x100000000, r0
+  movl %0 = %1
+  ld4%O1 %0 = %1%P1
+  st4%Q0 %0 = %r1%P0
+  getf.sig %0 = %1
+  setf.sig %0 = %r1
+  mov %0 = %1
+  mov %0 = %1
+  mov %0 = %r1"
+  ;; frar_m, toar_m ??? why not frar_i and toar_i
+  [(set_attr "itanium_class" "ialu,ialu,ialu,long_i,ld,st,frfr,tofr,fmisc,frar_m,toar_m")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no,  no,  no,   yes,no,no,  no,  no,   no,    no")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movdi_internal"
+  [(set (match_operand:DI 0 "destination_operand"
+		    "=r,r,r,r,r, m, r,*f,*f,*f, Q, r,*b, r,*e, r,*d, r,*c")
+	(match_operand:DI 1 "move_operand"
+		    "rO,JT,j,i,m,rO,*f,rO,*f, Q,*f,*b,rO,*e,rK,*d,rK,*c,rO"))]
+  "ia64_move_ok (operands[0], operands[1])"
+{
+  static const char * const alt[] = {
+    "%,mov %0 = %r1",
+    "%,addl %0 = %1, r0",
+    "%,addp4 %0 = %1 - 0x100000000, r0",
+    "%,movl %0 = %1",
+    "%,ld8%O1 %0 = %1%P1",
+    "%,st8%Q0 %0 = %r1%P0",
+    "%,getf.sig %0 = %1",
+    "%,setf.sig %0 = %r1",
+    "%,mov %0 = %1",
+    "%,ldf8 %0 = %1%P1",
+    "%,stf8 %0 = %1%P0",
+    "%,mov %0 = %1",
+    "%,mov %0 = %r1",
+    "%,mov %0 = %1",
+    "%,mov %0 = %1",
+    "%,mov %0 = %1",
+    "%,mov %0 = %1",
+    "mov %0 = pr",
+    "mov pr = %1, -1"
+  };
+
+  gcc_assert (which_alternative != 2 || TARGET_NO_PIC
+              || !symbolic_operand (operands[1], VOIDmode));
+
+  return alt[which_alternative];
+}
+  [(set_attr "itanium_class" "ialu,ialu,ialu,long_i,ld,st,frfr,tofr,fmisc,fld,stf,frbr,tobr,frar_i,toar_i,frar_m,toar_m,frpr,topr")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no,  no,  no,   yes,no,no,  no,  no,   yes,no, no,  no,  no,    no,    no,    no,    no,  no")])
+
+(define_mode_iterator MODE [BI QI HI SI DI SF DF XF TI])
+(define_mode_iterator MODE_FOR_CMP [BI SI DI SF DF XF (TF "TARGET_HPUX")])
+(define_mode_iterator MODE_FOR_EXTEND [QI HI SI])
+
+(define_mode_attr output_a [
+  (BI "ld1.a %0 = %1%P1")
+  (QI "ld1.a %0 = %1%P1")
+  (HI "ld2.a %0 = %1%P1")
+  (SI "ld4.a %0 = %1%P1")
+  (DI
+   "@
+    ld8.a %0 = %1%P1
+    ldf8.a %0 = %1%P1")
+  (SF
+   "@
+    ldfs.a %0 = %1%P1
+    ld4.a %0 = %1%P1")
+  (DF
+   "@
+    ldfd.a %0 = %1%P1
+    ld8.a %0 = %1%P1")
+  (XF "ldfe.a %0 = %1%P1")
+  (TI "ldfp8.a %X0 = %1%P1")])
+
+(define_mode_attr output_s [
+  (BI "ld1.s %0 = %1%P1")
+  (QI "ld1.s %0 = %1%P1")
+  (HI "ld2.s %0 = %1%P1")
+  (SI "ld4.s %0 = %1%P1")
+  (DI
+   "@
+    ld8.s %0 = %1%P1
+    ldf8.s %0 = %1%P1")
+  (SF
+   "@
+    ldfs.s %0 = %1%P1
+    ld4.s %0 = %1%P1")
+  (DF
+   "@
+    ldfd.s %0 = %1%P1
+    ld8.s %0 = %1%P1")
+  (XF "ldfe.s %0 = %1%P1")
+  (TI "ldfp8.s %X0 = %1%P1")])
+
+(define_mode_attr output_sa [
+  (BI "ld1.sa %0 = %1%P1")
+  (QI "ld1.sa %0 = %1%P1")
+  (HI "ld2.sa %0 = %1%P1")
+  (SI "ld4.sa %0 = %1%P1")
+  (DI
+   "@
+    ld8.sa %0 = %1%P1
+    ldf8.sa %0 = %1%P1")
+  (SF
+   "@
+    ldfs.sa %0 = %1%P1
+    ld4.sa %0 = %1%P1")
+  (DF
+   "@
+    ldfd.sa %0 = %1%P1
+    ld8.sa %0 = %1%P1")
+  (XF "ldfe.sa %0 = %1%P1")
+  (TI "ldfp8.sa %X0 = %1%P1")])
+
+(define_mode_attr output_c_clr [
+  (BI "ld1.c.clr%O1 %0 = %1%P1")
+  (QI "ld1.c.clr%O1 %0 = %1%P1")
+  (HI "ld2.c.clr%O1 %0 = %1%P1")
+  (SI "ld4.c.clr%O1 %0 = %1%P1")
+  (DI
+   "@
+    ld8.c.clr%O1 %0 = %1%P1
+    ldf8.c.clr %0 = %1%P1")
+  (SF
+   "@
+    ldfs.c.clr %0 = %1%P1
+    ld4.c.clr%O1 %0 = %1%P1")
+  (DF
+   "@
+    ldfd.c.clr %0 = %1%P1
+    ld8.c.clr%O1 %0 = %1%P1")
+  (XF "ldfe.c.clr %0 = %1%P1")
+  (TI "ldfp8.c.clr %X0 = %1%P1")])
+
+(define_mode_attr output_c_nc [
+  (BI "ld1.c.nc%O1 %0 = %1%P1")
+  (QI "ld1.c.nc%O1 %0 = %1%P1")
+  (HI "ld2.c.nc%O1 %0 = %1%P1")
+  (SI "ld4.c.nc%O1 %0 = %1%P1")
+  (DI
+   "@
+    ld8.c.nc%O1 %0 = %1%P1
+    ldf8.c.nc %0 = %1%P1")
+  (SF
+   "@
+    ldfs.c.nc %0 = %1%P1
+    ld4.c.nc%O1 %0 = %1%P1")
+  (DF
+   "@
+    ldfd.c.nc %0 = %1%P1
+    ld8.c.nc%O1 %0 = %1%P1")
+  (XF "ldfe.c.nc %0 = %1%P1")
+  (TI "ldfp8.c.nc %X0 = %1%P1")])
+
+(define_mode_attr ld_reg_constr [(BI "=*r") (QI "=r") (HI "=r") (SI "=r") (DI "=r,*f") (SF "=f,*r") (DF "=f,*r") (XF "=f") (TI "=*x")])
+(define_mode_attr ldc_reg_constr [(BI "+*r") (QI "+r") (HI "+r") (SI "+r") (DI "+r,*f") (SF "+f,*r") (DF "+f,*r") (XF "+f") (TI "+*x")])
+(define_mode_attr chk_reg_constr [(BI "*r") (QI "r") (HI "r") (SI "r") (DI "r,*f") (SF "f,*r") (DF "f,*r") (XF "f") (TI "*x")])
+
+(define_mode_attr mem_constr [(BI "*m") (QI "m") (HI "m") (SI "m") (DI "m,Q") (SF "Q,m") (DF "Q,m") (XF "m") (TI "Q")])
+
+;; Define register predicate prefix.
+;; We can generate speculative loads only for general and fp registers - this
+;; is constrained in ia64.c: ia64_speculate_insn ().
+(define_mode_attr reg_pred_prefix [(BI "gr") (QI "gr") (HI "gr") (SI "gr") (DI "grfr") (SF "grfr") (DF "grfr") (XF "fr") (TI "fr")])
+
+(define_mode_attr ld_class [(BI "ld") (QI "ld") (HI "ld") (SI "ld") (DI "ld,fld") (SF "fld,ld") (DF "fld,ld") (XF "fld") (TI "fldp")])
+(define_mode_attr chka_class [(BI "chk_a") (QI "chk_a") (HI "chk_a") (SI "chk_a") (DI "chk_a,chk_a") (SF "chk_a,chk_a") (DF "chk_a,chk_a") (XF "chk_a") (TI "chk_a")])
+(define_mode_attr chks_class [(BI "chk_s_i") (QI "chk_s_i") (HI "chk_s_i") (SI "chk_s_i") (DI "chk_s_i,chk_s_f") (SF "chk_s_f,chk_s_i") (DF "chk_s_f,chk_s_i") (XF "chk_s_f") (TI "chk_s_i")])
+
+(define_mode_attr attr_yes [(BI "yes") (QI "yes") (HI "yes") (SI "yes") (DI "yes,yes") (SF "yes,yes") (DF "yes,yes") (XF "yes") (TI "yes")])
+
+(define_insn "mov<mode>_advanced"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDA))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_a>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_advanced"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDA)))]
+  ""
+  "<output_a>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_speculative"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_s>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_speculative"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS)))]
+  ""
+  "<output_s>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_speculative_advanced"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDSA))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_speculative_a"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS_A))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_speculative_advanced"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDSA)))]
+  ""
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_speculative_a"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS_A)))]
+  ""
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_clr"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ldc_reg_constr>")
+	(if_then_else:MODE (ne (unspec [(match_dup 0)] UNSPEC_LDCCLR) (const_int 0))
+			   (match_operand:MODE 1 "memory_operand" "<mem_constr>")
+			   (match_dup 0)))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_c_clr>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "mov<mode>_nc"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ldc_reg_constr>")
+	(if_then_else:MODE (ne (unspec [(match_dup 0)] UNSPEC_LDCNC) (const_int 0))
+			   (match_operand:MODE 1 "memory_operand" "<mem_constr>")
+			   (match_dup 0)))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_c_nc>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_clr"
+  [(set (match_operand:DI 0 "gr_register_operand" "+r")
+	(if_then_else:DI (ne (unspec [(match_dup 0)] UNSPEC_LDCCLR) (const_int 0))
+			 (zero_extend:DI (match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>"))
+			 (match_dup 0)))]
+  ""
+  "<output_c_clr>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_nc"
+  [(set (match_operand:DI 0 "gr_register_operand" "+r")
+	(if_then_else:DI (ne (unspec [(match_dup 0)] UNSPEC_LDCNC) (const_int 0))
+			 (zero_extend:DI (match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>"))
+			 (match_dup 0)))]
+  ""
+  "<output_c_nc>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "advanced_load_check_clr_<mode>"
+  [(set (pc)
+        (if_then_else (ne (unspec [(match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<chk_reg_constr>")] UNSPEC_CHKACLR) (const_int 0))
+                      (pc)
+                      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "chk.a.clr %0, %l1"
+  [(set_attr "itanium_class" "<chka_class>")])
+
+(define_insn "advanced_load_check_nc_<mode>"
+  [(set (pc)
+        (if_then_else (ne (unspec [(match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<chk_reg_constr>")] UNSPEC_CHKANC) (const_int 0))
+                      (pc)
+                      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "chk.a.clr %0, %l1"
+  [(set_attr "itanium_class" "<chka_class>")])
+
+(define_insn "speculation_check_<mode>"
+  [(set (pc) 
+        (if_then_else (ne (unspec [(match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<chk_reg_constr>")] UNSPEC_CHKS) (const_int 0))
+                      (pc)
+                      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "chk.s %0, %l1"
+  [(set_attr "itanium_class" "<chks_class>")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "symbolic_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  if (ia64_expand_load_address (operands[0], operands[1]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "load_fptr"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_dup 2) (match_operand 1 "function_operand" "")))
+   (set (match_dup 0) (match_dup 3))]
+  "reload_completed"
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_fptr_internal1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand 1 "function_operand" "s")))]
+  "reload_completed"
+  "addl %0 = @ltoff(@fptr(%1)), gp"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "load_gprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand 1 "sdata_symbolic_operand" "s")))]
+  "reload_completed"
+  "addl %0 = @gprel(%1), gp"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*gprel64_offset"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "symbolic_operand" "") (reg:DI 1)))]
+  "reload_completed"
+  "movl %0 = @gprel(%1)"
+  [(set_attr "itanium_class" "long_i")])
+
+(define_expand "load_gprel64"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "symbolic_operand" "") (match_dup 2)))
+   (set (match_dup 0)
+	(plus:DI (match_dup 2) (match_dup 0)))]
+  "reload_completed"
+{
+  operands[2] = pic_offset_table_rtx;
+})
+
+;; This is used as a placeholder for the return address during early
+;; compilation.  We won't know where we've placed this until during
+;; reload, at which point it can wind up in b0, a general register,
+;; or memory.  The only safe destination under these conditions is a
+;; general register.
+
+(define_insn_and_split "*movdi_ret_addr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_RET_ADDR))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_return_addr_rtx (operands[0]);
+  DONE;
+}
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*load_symptr_high"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (high:DI (match_operand 1 "got_symbolic_operand" "s"))
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "reload_completed"
+{
+  if (HAVE_AS_LTOFFX_LDXMOV_RELOCS)
+    return "%,addl %0 = @ltoffx(%1), %2";
+  else
+    return "%,addl %0 = @ltoff(%1), %2";
+}
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*load_symptr_low"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (mem:DI (match_operand:DI 1 "register_operand" "r"))
+		   (match_operand 2 "got_symbolic_operand" "s")))]
+  "reload_completed"
+{
+  if (HAVE_AS_LTOFFX_LDXMOV_RELOCS)
+    return "%,ld8.mov %0 = [%1], %2";
+  else
+    return "%,ld8 %0 = [%1]";
+}
+  [(set_attr "itanium_class" "ld")])
+
+(define_insn_and_split "load_dtpmod"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_DTPMOD))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (unspec:DI [(match_dup 1)] UNSPEC_LTOFF_DTPMOD)
+		 (match_dup 2)))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_ltoff_dtpmod"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+			    UNSPEC_LTOFF_DTPMOD)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "reload_completed"
+  "addl %0 = @ltoff(@dtpmod(%1)), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "load_dtprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  ""
+  "")
+
+(define_insn "*load_dtprel64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  "TARGET_TLS64"
+  "movl %0 = @dtprel(%1)"
+  [(set_attr "itanium_class" "long_i")])
+
+(define_insn "*load_dtprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  ""
+  "addl %0 = @dtprel(%1), r0"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn_and_split "*load_dtprel_gd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (unspec:DI [(match_dup 1)] UNSPEC_LTOFF_DTPREL)
+		 (match_dup 2)))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_ltoff_dtprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+			    UNSPEC_LTOFF_DTPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  ""
+  "addl %0 = @ltoff(@dtprel(%1)), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "add_dtprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+			    UNSPEC_DTPREL)
+		 (match_operand:DI 2 "register_operand" "")))]
+  "!TARGET_TLS64"
+  "")
+
+(define_insn "*add_dtprel14"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+			    UNSPEC_DTPREL)
+		 (match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_TLS14"
+  "adds %0 = @dtprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*add_dtprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+			    UNSPEC_DTPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "TARGET_TLS22"
+  "addl %0 = @dtprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "load_tprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  ""
+  "")
+
+(define_insn "*load_tprel64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  "TARGET_TLS64"
+  "movl %0 = @tprel(%1)"
+  [(set_attr "itanium_class" "long_i")])
+
+(define_insn "*load_tprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  ""
+  "addl %0 = @tprel(%1), r0"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn_and_split "*load_tprel_ie"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "ie_tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (unspec:DI [(match_dup 1)] UNSPEC_LTOFF_TPREL)
+		 (match_dup 2)))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_ltoff_tprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "ie_tls_symbolic_operand" "")]
+			    UNSPEC_LTOFF_TPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  ""
+  "addl %0 = @ltoff(@tprel(%1)), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "add_tprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+			    UNSPEC_TPREL)
+		 (match_operand:DI 2 "register_operand" "")))]
+  "!TARGET_TLS64"
+  "")
+
+(define_insn "*add_tprel14"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+			    UNSPEC_TPREL)
+		 (match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_TLS14"
+  "adds %0 = @tprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*add_tprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+			    UNSPEC_TPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "TARGET_TLS22"
+  "addl %0 = @tprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+;; With no offsettable memory references, we've got to have a scratch
+;; around to play with the second word.  However, in order to avoid a
+;; reload nightmare we lie, claim we don't need one, and fix it up
+;; in ia64_split_tmode_move.
+(define_expand "movti"
+  [(set (match_operand:TI 0 "general_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn_and_split "movti_internal"
+  [(set (match_operand:TI 0 "destination_operand" "=r,   *fm,*x,*f,  Q")
+	(match_operand:TI 1 "general_operand"     "r*fim,r,  Q, *fOQ,*f"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   #
+   #
+   ldfp8 %X0 = %1%P1
+   #
+   #"
+  "reload_completed && !ia64_load_pair_ok(operands[0], operands[1])"
+  [(const_int 0)]
+{
+  ia64_split_tmode_move (operands);
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown,unknown,fldp,unknown,unknown")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,     no,     yes, no,     no")])
+
+;; Floating Point Moves
+;;
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesize them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movsf_internal"
+  [(set (match_operand:SF 0 "destination_operand" "=f,f, Q,*r, f,*r,*r, m,*r")
+	(match_operand:SF 1 "general_operand"     "fG,Q,fG,fG,*r,*r, m,*r, F"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldfs %0 = %1%P1
+   stfs %0 = %F1%P0
+   getf.s %0 = %F1
+   setf.s %0 = %1
+   mov %0 = %1
+   ld4%O1 %0 = %1%P1
+   st4%Q0 %0 = %1%P0
+   movl %0 = %G1"
+  [(set_attr "itanium_class" "fmisc,fld,stf,frfr,tofr,ialu,ld,st,long_i")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,   yes,no, no,  no,  no, yes,no,no")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movdf_internal"
+  [(set (match_operand:DF 0 "destination_operand" "=f,f, Q,*r, f,*r,*r, m,*r")
+	(match_operand:DF 1 "general_operand"     "fG,Q,fG,fG,*r,*r, m,*r, F"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldfd %0 = %1%P1
+   stfd %0 = %F1%P0
+   getf.d %0 = %F1
+   setf.d %0 = %1
+   mov %0 = %1
+   ld8%O1 %0 = %1%P1
+   st8%Q0 %0 = %1%P0
+   movl %0 = %G1"
+  [(set_attr "itanium_class" "fmisc,fld,stf,frfr,tofr,ialu,ld,st,long_i")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,   yes,no, no,  no,  no, yes,no,no")])
+
+;; With no offsettable memory references, we've got to have a scratch
+;; around to play with the second word if the variable winds up in GRs.
+(define_expand "movxf"
+  [(set (match_operand:XF 0 "general_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  ""
+{
+  if (ia64_expand_movxf_movrf (XFmode, operands))
+    DONE;
+})
+
+;; ??? There's no easy way to mind volatile acquire/release semantics.
+
+(define_insn "movxf_internal"
+  [(set (match_operand:XF 0 "destination_operand" "=f,f, m")
+	(match_operand:XF 1 "general_operand"     "fG,m,fG"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldfe %0 = %1%P1
+   stfe %0 = %F1%P0"
+  [(set_attr "itanium_class" "fmisc,fld,stf")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,   yes,no")])
+
+;; Same as for movxf, but for RFmode.
+(define_expand "movrf"
+  [(set (match_operand:RF 0 "general_operand" "")
+	(match_operand:RF 1 "general_operand" ""))]
+  ""
+{
+  if (ia64_expand_movxf_movrf (RFmode, operands))
+    DONE;
+})
+
+(define_insn "*movrf_internal"
+  [(set (match_operand:RF 0 "destination_operand" "=f,f, m")
+	(match_operand:RF 1 "general_operand"     "fG,m,fG"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldf.fill %0 = %1%P1
+   stf.spill %0 = %F1%P0"
+  [(set_attr "itanium_class" "fmisc,fld,stf")])
+
+;; Better code generation via insns that deal with TFmode register pairs
+;; directly.  Same concerns apply as for TImode.
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "general_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn_and_split "*movtf_internal"
+  [(set (match_operand:TF 0 "destination_operand"  "=r,r,m")
+	(match_operand:TF 1 "general_operand"      "ri,m,r"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_tmode_move (operands);
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown")
+   (set_attr "predicable" "no")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+;; Signed conversions from a smaller integer to a larger integer
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "gr_register_operand" "r")))]
+  ""
+  "sxt1 %0 = %1"
+  [(set_attr "itanium_class" "xtd")])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "gr_register_operand" "r")))]
+  ""
+  "sxt2 %0 = %1"
+  [(set_attr "itanium_class" "xtd")])
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f")
+	(sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))]
+  ""
+  "@
+   sxt4 %0 = %1
+   fsxt.r %0 = %1, %1"
+  [(set_attr "itanium_class" "xtd,fmisc")])
+
+;; Unsigned conversions from a smaller integer to a larger integer
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "gr_nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   zxt1 %0 = %1
+   ld1%O1 %0 = %1%P1"
+  [(set_attr "itanium_class" "xtd,ld")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no, yes")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "gr_nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   zxt2 %0 = %1
+   ld2%O1 %0 = %1%P1"
+  [(set_attr "itanium_class" "xtd,ld")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no, yes")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f")
+	(zero_extend:DI
+	  (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))]
+  ""
+  "@
+   addp4 %0 = %1, r0
+   ld4%O1 %0 = %1%P1
+   fmix.r %0 = f0, %1"
+  [(set_attr "itanium_class" "ialu,ld,fmisc")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no, yes,no")])
+
+;; Convert between floating point types of different sizes.
+
+;; At first glance, it would appear that emitting fnorm for an extending
+;; conversion is unnecessary.  However, the stf and getf instructions work
+;; correctly only if the input is properly rounded for its type.  In
+;; particular, we get the wrong result for getf.d/stfd if the input is a
+;; denorm single.  Since we don't know what the next instruction will be, we
+;; have to emit an fnorm.
+
+;; ??? Optimization opportunity here.  Get rid of the insn altogether
+;; when we can.  Should probably use a scheme like has been proposed
+;; for ia32 in dealing with operands that match unary operators.  This
+;; would let combine merge the thing into adjacent insns.  See also how the
+;; mips port handles SIGN_EXTEND as operands to integer arithmetic insns via
+;; se_register_operand.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.d %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "extendsfxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(float_extend:XF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "extenddfxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(float_extend:XF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.s %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "truncxfsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.s %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "truncxfdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.d %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+;; Convert between signed integer types and floating point.
+
+(define_insn "floatdirf2"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+	(float:RF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatdixf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(float:XF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncxfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncrfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+;; Convert between unsigned integer types and floating point.
+
+(define_insn "floatunsdisf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf.s %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(unsigned_float:DF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf.d %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatunsdixf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(unsigned_float:XF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatunsdirf2"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+	(unsigned_float:RF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncxfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncrfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit field extraction
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "extv"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+			 (match_operand:DI 2 "extr_len_operand" "n")
+			 (match_operand:DI 3 "shift_count_operand" "M")))]
+  ""
+  "extr %0 = %1, %3, %2"
+  [(set_attr "itanium_class" "ishf")])
+
+(define_insn "extzv"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+			 (match_operand:DI 2 "extr_len_operand" "n")
+			 (match_operand:DI 3 "shift_count_operand" "M")))]
+  ""
+  "extr.u %0 = %1, %3, %2"
+  [(set_attr "itanium_class" "ishf")])
+
+;; Insert a bit field.
+;; Can have 3 operands, source1 (inserter), source2 (insertee), dest.
+;; Source1 can be 0 or -1.
+;; Source2 can be 0.
+
+;; ??? Actual dep instruction is more powerful than what these insv
+;; patterns support.  Unfortunately, combine is unable to create patterns
+;; where source2 != dest.
+
+(define_expand "insv"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "")
+			 (match_operand:DI 1 "const_int_operand" "")
+			 (match_operand:DI 2 "const_int_operand" ""))
+	(match_operand:DI 3 "nonmemory_operand" ""))]
+  ""
+{
+  int width = INTVAL (operands[1]);
+  int shift = INTVAL (operands[2]);
+
+  /* If operand[3] is a constant, and isn't 0 or -1, then load it into a
+     pseudo.  */
+  if (! register_operand (operands[3], DImode)
+      && operands[3] != const0_rtx && operands[3] != constm1_rtx)
+    operands[3] = force_reg (DImode, operands[3]);
+
+  /* If this is a single dep instruction, we have nothing to do.  */
+  if (! ((register_operand (operands[3], DImode) && width <= 16)
+	 || operands[3] == const0_rtx || operands[3] == constm1_rtx))
+    {
+      /* Check for cases that can be implemented with a mix instruction.  */
+      if (width == 32 && shift == 0)
+	{
+	  /* Directly generating the mix4left instruction confuses
+	     optimize_bit_field in function.c.  Since this is performing
+	     a useful optimization, we defer generation of the complicated
+	     mix4left RTL to the first splitting phase.  */
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_shift_mix4left (operands[0], operands[3], tmp));
+	  DONE;
+	}
+      else if (width == 32 && shift == 32)
+	{
+	  emit_insn (gen_mix4right (operands[0], operands[3]));
+	  DONE;
+	}
+
+      /* We could handle remaining cases by emitting multiple dep
+	 instructions.
+
+	 If we need more than two dep instructions then we lose.  A 6
+	 insn sequence mov mask1,mov mask2,shl;;and,and;;or is better than
+	 mov;;dep,shr;;dep,shr;;dep.  The former can be executed in 3 cycles,
+	 the latter is 6 cycles on an Itanium (TM) processor, because there is
+	 only one function unit that can execute dep and shr immed.
+
+	 If we only need two dep instruction, then we still lose.
+	 mov;;dep,shr;;dep is still 4 cycles.  Even if we optimize away
+	 the unnecessary mov, this is still undesirable because it will be
+	 hard to optimize, and it creates unnecessary pressure on the I0
+	 function unit.  */
+
+      FAIL;
+
+#if 0
+      /* This code may be useful for other IA-64 processors, so we leave it in
+	 for now.  */
+      while (width > 16)
+	{
+	  rtx tmp;
+
+	  emit_insn (gen_insv (operands[0], GEN_INT (16), GEN_INT (shift),
+			       operands[3]));
+	  shift += 16;
+	  width -= 16;
+	  tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp, operands[3], GEN_INT (16)));
+	  operands[3] = tmp;
+	}
+      operands[1] = GEN_INT (width);
+      operands[2] = GEN_INT (shift);
+#endif
+    }
+})
+
+(define_insn "*insv_internal"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (match_operand:DI 1 "const_int_operand" "n")
+			 (match_operand:DI 2 "const_int_operand" "n"))
+	(match_operand:DI 3 "nonmemory_operand" "rP"))]
+  "(gr_register_operand (operands[3], DImode) && INTVAL (operands[1]) <= 16)
+   || operands[3] == const0_rtx || operands[3] == constm1_rtx"
+  "dep %0 = %3, %0, %2, %1"
+  [(set_attr "itanium_class" "ishf")])
+
+;; Combine doesn't like to create bit-field insertions into zero.
+(define_insn "*shladdp4_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gr_register_operand" "r")
+			   (match_operand:DI 2 "shladd_log2_operand" "n"))
+		(match_operand:DI 3 "const_int_operand" "n")))]
+  "ia64_depz_field_mask (operands[3], operands[2]) + INTVAL (operands[2]) == 32"
+  "shladdp4 %0 = %1, %2, r0"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*depz_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gr_register_operand" "r")
+			   (match_operand:DI 2 "const_int_operand" "M"))
+		(match_operand:DI 3 "const_int_operand" "n")))]
+  "satisfies_constraint_M (operands[2])
+   && ia64_depz_field_mask (operands[3], operands[2]) > 0"
+{
+  operands[3] = GEN_INT (ia64_depz_field_mask (operands[3], operands[2]));
+  return "%,dep.z %0 = %1, %2, %3";
+}
+  [(set_attr "itanium_class" "ishf")])
+
+(define_insn "shift_mix4left"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "gr_register_operand" "r"))
+   (clobber (match_operand:DI 2 "gr_register_operand" "=r"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "register_operand" ""))
+   (clobber (match_operand:DI 2 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
+   (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_dup 3) (const_int 32)))]
+  "operands[3] = operands[2];")
+
+(define_insn "*mix4left"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_operand:DI 1 "gr_register_operand" "r")
+		     (const_int 32)))]
+  ""
+  "mix4.l %0 = %0, %r1"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix4right"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (const_int 32) (const_int 32))
+	(match_operand:DI 1 "gr_reg_or_0_operand" "rO"))]
+  ""
+  "mix4.r %0 = %r1, %0"
+  [(set_attr "itanium_class" "mmshf")])
+
+;; This is used by the rotrsi3 pattern.
+
+(define_insn "*mix4right_3op"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "gr_register_operand" "r"))
+		(ashift:DI (zero_extend:DI
+			     (match_operand:SI 2 "gr_register_operand" "r"))
+			   (const_int 32))))]
+  ""
+  "mix4.r %0 = %2, %1"
+  [(set_attr "itanium_class" "mmshf")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 1-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn_and_split "andbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c,r")
+	(and:BI (match_operand:BI 1 "register_operand" "%0,0,r")
+		(match_operand:BI 2 "register_operand" "c,r,r")))]
+  ""
+  "@
+   #
+   tbit.nz.and.orcm %0, %I0 = %2, 0
+   and %0 = %2, %1"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[2]) == REG && PR_REGNO_P (REGNO (operands[2]))"
+  [(cond_exec (eq (match_dup 2) (const_int 0))
+     (set (match_dup 0) (and:BI (ne:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit,ilog")])
+
+(define_insn_and_split "*andcmbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c,r")
+	(and:BI (not:BI (match_operand:BI 1 "register_operand" "c,r,r"))
+		(match_operand:BI 2 "register_operand" "0,0,r")))]
+  ""
+  "@
+   #
+   tbit.z.and.orcm %0, %I0 = %1, 0
+   andcm %0 = %2, %1"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (and:BI (ne:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit,ilog")])
+
+(define_insn_and_split "iorbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c,r")
+	(ior:BI (match_operand:BI 1 "register_operand" "%0,0,r")
+		(match_operand:BI 2 "register_operand" "c,r,r")))]
+  ""
+  "@
+   #
+   tbit.nz.or.andcm %0, %I0 = %2, 0
+   or %0 = %2, %1"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[2]) == REG && PR_REGNO_P (REGNO (operands[2]))"
+  [(cond_exec (ne (match_dup 2) (const_int 0))
+     (set (match_dup 0) (ior:BI (eq:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit,ilog")])
+
+(define_insn_and_split "*iorcmbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c")
+	(ior:BI (not:BI (match_operand:BI 1 "register_operand" "c,r"))
+		(match_operand:BI 2 "register_operand" "0,0")))]
+  ""
+  "@
+   #
+   tbit.z.or.andcm %0, %I0 = %1, 0"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (ior:BI (eq:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit")])
+
+(define_insn "one_cmplbi2"
+  [(set (match_operand:BI 0 "register_operand" "=c,r,c,&c")
+	(not:BI (match_operand:BI 1 "register_operand" "r,r,0,c")))
+   (clobber (match_scratch:BI 2 "=X,X,c,X"))]
+  ""
+  "@
+   tbit.z %0, %I0 = %1, 0
+   xor %0 = 1, %1
+   #
+   #"
+  [(set_attr "itanium_class" "tbit,ilog,unknown,unknown")])
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(not:BI (match_operand:BI 1 "register_operand" "")))
+   (clobber (match_scratch:BI 2 ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 0) (const_int 1))
+   (cond_exec (ne (match_dup 2) (const_int 0))
+     (set (match_dup 0) (const_int 0)))
+   (set (match_dup 0) (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  "operands[3] = gen_rtx_REG (CCImode, REGNO (operands[1]));
+   operands[4] = gen_rtx_REG (CCImode, REGNO (operands[2]));")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(not:BI (match_operand:BI 1 "register_operand" "")))
+   (clobber (match_scratch:BI 2 ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))
+   && ! rtx_equal_p (operands[0], operands[1])"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))
+   (set (match_dup 0) (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  "")
+
+(define_insn "*cmpsi_and_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+		   (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.and.orcm %0, %I0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_and_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:SI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.and.orcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_andnot_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+			  (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.or.andcm %I0, %0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_andnot_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:SI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.or.andcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_and_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.and.orcm %0, %I0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_and_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.and.orcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_andnot_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:DI 2 "gr_register_operand" "r")
+			  (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.or.andcm %I0, %0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_andnot_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:DI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.or.andcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*tbit_and_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (ne:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.nz.and.orcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_and_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (eq:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.z.and.orcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_and_2"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (ne:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.nz.and.orcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_and_3"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (eq:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.z.and.orcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*cmpsi_or_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+		   (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.or.andcm %0, %I0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_or_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:SI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.or.andcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_orcm_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+			  (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.and.orcm %I0, %0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_orcm_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:SI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.and.orcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_or_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.or.andcm %0, %I0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_or_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.or.andcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_orcm_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:DI 2 "gr_register_operand" "r")
+			  (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.and.orcm %I0, %0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_orcm_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:DI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.and.orcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*tbit_or_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (ne:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.nz.or.andcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_or_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (eq:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.z.or.andcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_or_2"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (ne:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.nz.or.andcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_or_3"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (eq:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.z.or.andcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+;; Transform test of and/or of setcc into parallel comparisons.
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(ne:BI (and:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0)
+	(and:BI (ne:BI (and:DI (match_dup 3) (const_int 1)) (const_int 0))
+		(match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(eq:BI (and:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0)
+	(and:BI (ne:BI (and:DI (match_dup 3) (const_int 1)) (const_int 0))
+		(match_dup 2)))
+   (parallel [(set (match_dup 0) (not:BI (match_dup 0)))
+	      (clobber (scratch))])]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(ne:BI (ior:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0) 
+	(ior:BI (ne:BI (match_dup 3) (const_int 0))
+		(match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(eq:BI (ior:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0) 
+	(ior:BI (ne:BI (match_dup 3) (const_int 0))
+		(match_dup 2)))
+   (parallel [(set (match_dup 0) (not:BI (match_dup 0)))
+	      (clobber (scratch))])]
+  "")
+
+;; ??? Incredibly hackish.  Either need four proper patterns with all
+;; the alternatives, or rely on sched1 to split the insn and hope that
+;; nothing bad happens to the comparisons in the meantime.
+;;
+;; Alternately, adjust combine to allow 2->2 and 3->3 splits, assuming
+;; that we're doing height reduction.
+;
+;(define_insn_and_split ""
+;  [(set (match_operand:BI 0 "register_operand" "=c")
+;	(and:BI (and:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")]))
+;		(match_dup 0)))]
+;  "flag_schedule_insns"
+;  "#"
+;  ""
+;  [(set (match_dup 0) (and:BI (match_dup 1) (match_dup 0)))
+;   (set (match_dup 0) (and:BI (match_dup 4) (match_dup 0)))]
+;  "")
+;
+;(define_insn_and_split ""
+;  [(set (match_operand:BI 0 "register_operand" "=c")
+;	(ior:BI (ior:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")]))
+;		(match_dup 0)))]
+;  "flag_schedule_insns"
+;  "#"
+;  ""
+;  [(set (match_dup 0) (ior:BI (match_dup 1) (match_dup 0)))
+;   (set (match_dup 0) (ior:BI (match_dup 4) (match_dup 0)))]
+;  "")
+;
+;(define_split
+;  [(set (match_operand:BI 0 "register_operand" "")
+;	(and:BI (and:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operand:BI 7 "register_operand" ""))
+;		(and:BI (match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")])
+;			(match_operand:BI 8 "register_operand" ""))))]
+;  ""
+;  [(set (match_dup 0) (and:BI (match_dup 7) (match_dup 8)))
+;   (set (match_dup 0) (and:BI (and:BI (match_dup 1) (match_dup 4))
+;			      (match_dup 0)))]
+;  "")
+;
+;(define_split
+;  [(set (match_operand:BI 0 "register_operand" "")
+;	(ior:BI (ior:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operand:BI 7 "register_operand" ""))
+;		(ior:BI (match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")])
+;			(match_operand:BI 8 "register_operand" ""))))]
+;  ""
+;  [(set (match_dup 0) (ior:BI (match_dup 7) (match_dup 8)))
+;   (set (match_dup 0) (ior:BI (ior:BI (match_dup 1) (match_dup 4))
+;			      (match_dup 0)))]
+;  "")
+
+;; Try harder to avoid predicate copies by duplicating compares.
+;; Note that we'll have already split the predicate copy, which
+;; is kind of a pain, but oh well.
+
+(define_peephole2
+  [(set (match_operand:BI 0 "register_operand" "")
+	(match_operand:BI 1 "comparison_operator" ""))
+   (set (match_operand:CCI 2 "register_operand" "")
+	(match_operand:CCI 3 "register_operand" ""))
+   (set (match_operand:CCI 4 "register_operand" "")
+	(match_operand:CCI 5 "register_operand" ""))
+   (set (match_operand:BI 6 "register_operand" "")
+	(unspec:BI [(match_dup 6)] UNSPEC_PRED_REL_MUTEX))]
+  "REGNO (operands[3]) == REGNO (operands[0])
+   && REGNO (operands[4]) == REGNO (operands[0]) + 1
+   && REGNO (operands[4]) == REGNO (operands[2]) + 1
+   && REGNO (operands[6]) == REGNO (operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 6) (match_dup 7))]
+  "operands[7] = copy_rtx (operands[1]);")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "gr_register_operand" "=r")
+	(mult:HI (match_operand:HI 1 "gr_register_operand" "r")
+		 (match_operand:HI 2 "gr_register_operand" "r")))]
+  ""
+  "pmpy2.r %0 = %1, %2"
+  [(set_attr "itanium_class" "mmmul")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "gr_register_operand" "%r,r,a")
+		 (match_operand:SI 2 "gr_reg_or_22bit_operand" "r,I,J")))]
+  ""
+  "@
+   add %0 = %1, %2
+   adds %0 = %2, %1
+   addl %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*addsi3_plus1"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "gr_register_operand" "r")
+			  (match_operand:SI 2 "gr_register_operand" "r"))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %2, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*addsi3_plus1_alt"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "gr_register_operand" "r")
+			  (const_int 2))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*addsi3_shladd"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "gr_register_operand" "r")
+			  (match_operand:SI 2 "shladd_operand" "n"))
+		 (match_operand:SI 3 "gr_register_operand" "r")))]
+  ""
+  "shladd %0 = %1, %S2, %3"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "gr_reg_or_8bit_operand" "rK")
+		  (match_operand:SI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*subsi3_minus1"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (not:SI (match_operand:SI 1 "gr_register_operand" "r"))
+		 (match_operand:SI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %2, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; ??? Could add maddsi3 patterns patterned after the madddi3 patterns.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "fr_register_operand" "=f")
+	(mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
+		 (match_operand:SI 2 "grfr_register_operand" "f")))]
+  ""
+  "xmpy.l %0 = %1, %2"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "maddsi4"
+  [(set (match_operand:SI 0 "fr_register_operand" "=f")
+	(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
+			  (match_operand:SI 2 "grfr_register_operand" "f"))
+		 (match_operand:SI 3 "grfr_register_operand" "f")))]
+  ""
+  "xma.l %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = r0, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "abssi2"
+  [(set (match_dup 2)
+	(ge:BI (match_operand:SI 1 "gr_register_operand" "") (const_int 0)))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (eq (match_dup 2) (const_int 0))
+			 (neg:SI (match_dup 1))
+			 (match_dup 1)))]
+  ""
+  { operands[2] = gen_reg_rtx (BImode); })
+
+(define_expand "sminsi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:SI 1 "gr_register_operand" "")
+	       (match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "smaxsi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:SI 1 "gr_register_operand" "")
+	       (match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "uminsi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:SI 1 "gr_register_operand" "")
+		(match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "umaxsi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:SI 1 "gr_register_operand" "")
+		(match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "gr_register_operand" "%r,r,a")
+		 (match_operand:DI 2 "gr_reg_or_22bit_operand" "r,I,J")))]
+  ""
+  "@
+   add %0 = %1, %2
+   adds %0 = %2, %1
+   addl %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*adddi3_plus1"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (plus:DI (match_operand:DI 1 "gr_register_operand" "r")
+			  (match_operand:DI 2 "gr_register_operand" "r"))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %2, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; This has some of the same problems as shladd.  We let the shladd
+;; eliminator hack handle it, which results in the 1 being forced into
+;; a register, but not more ugliness here.
+(define_insn "*adddi3_plus1_alt"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r")
+			  (const_int 2))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "gr_reg_or_8bit_operand" "rK")
+		  (match_operand:DI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*subdi3_minus1"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (not:DI (match_operand:DI 1 "gr_register_operand" "r"))
+		 (match_operand:DI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %2, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; ??? Use grfr instead of fr because of virtual register elimination
+;; and silly test cases multiplying by the frame pointer.
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
+		 (match_operand:DI 2 "grfr_register_operand" "f")))]
+  ""
+  "xmpy.l %0 = %1, %2"
+  [(set_attr "itanium_class" "xmpy")])
+
+;; ??? If operand 3 is an eliminable reg, then register elimination causes the
+;; same problem that we have with shladd below.  Unfortunately, this case is
+;; much harder to fix because the multiply puts the result in an FP register,
+;; but the add needs inputs from a general register.  We add a spurious clobber
+;; here so that it will be present just in case register elimination gives us
+;; the funny result.
+
+;; ??? Maybe validate_changes should try adding match_scratch clobbers?
+
+;; ??? Maybe we should change how adds are canonicalized.
+
+(define_insn "madddi4"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
+			  (match_operand:DI 2 "grfr_register_operand" "f"))
+		 (match_operand:DI 3 "grfr_register_operand" "f")))
+   (clobber (match_scratch:DI 4 "=X"))]
+  ""
+  "xma.l %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "xmpy")])
+
+;; This can be created by register elimination if operand3 of shladd is an
+;; eliminable register or has reg_equiv_constant set.
+
+;; We have to use nonmemory_operand for operand 4, to ensure that the
+;; validate_changes call inside eliminate_regs will always succeed.  If it
+;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded
+;; incorrectly.
+
+(define_insn "*madddi4_elim"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
+				   (match_operand:DI 2 "register_operand" "f"))
+			  (match_operand:DI 3 "register_operand" "f"))
+		 (match_operand:DI 4 "nonmemory_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=f"))]
+  "reload_in_progress"
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+				   (match_operand:DI 2 "register_operand" ""))
+			  (match_operand:DI 3 "register_operand" ""))
+		 (match_operand:DI 4 "gr_reg_or_14bit_operand" "")))
+   (clobber (match_scratch:DI 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+					  (match_dup 3)))
+	      (clobber (match_dup 0))])
+   (set (match_dup 0) (match_dup 5))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  "")
+
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (sign_extend:TI
+		     (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		   (sign_extend:TI
+		     (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG")))
+	  (const_int 64))))]
+  ""
+  "xmpy.h %0 = %F1, %F2"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		   (zero_extend:TI
+		     (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG")))
+	  (const_int 64))))]
+  ""
+  "xmpy.hu %0 = %F1, %F2"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = r0, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "absdi2"
+  [(set (match_dup 2)
+	(ge:BI (match_operand:DI 1 "gr_register_operand" "") (const_int 0)))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (eq (match_dup 2) (const_int 0))
+			 (neg:DI (match_dup 1))
+			 (match_dup 1)))]
+  ""
+  { operands[2] = gen_reg_rtx (BImode); })
+
+(define_expand "smindi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:DI 1 "gr_register_operand" "")
+	       (match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "smaxdi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:DI 1 "gr_register_operand" "")
+	       (match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "umindi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:DI 1 "gr_register_operand" "")
+		(match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "umaxdi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:DI 1 "gr_register_operand" "")
+		(match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "ffsdi2"
+  [(set (match_dup 6)
+	(eq:BI (match_operand:DI 1 "gr_register_operand" "") (const_int 0)))
+   (set (match_dup 2) (plus:DI (match_dup 1) (const_int -1)))
+   (set (match_dup 5) (const_int 0))
+   (set (match_dup 3) (xor:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4) (popcount:DI (match_dup 3)))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 6) (const_int 0))
+			 (match_dup 5) (match_dup 4)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+  operands[6] = gen_reg_rtx (BImode);
+})
+
+(define_expand "ctzdi2"
+  [(set (match_dup 2) (plus:DI (match_operand:DI 1 "gr_register_operand" "")
+			       (const_int -1)))
+   (set (match_dup 3) (not:DI (match_dup 1)))
+   (set (match_dup 4) (and:DI (match_dup 2) (match_dup 3)))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(popcount:DI (match_dup 4)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+;; Note the computation here is op0 = 63 - (exp - 0xffff).
+(define_expand "clzdi2"
+  [(set (match_dup 2)
+	(unsigned_float:XF (match_operand:DI 1 "fr_reg_or_fp01_operand" "")))
+   (set (match_dup 3)
+	(unspec:DI [(match_dup 2)] UNSPEC_GETF_EXP))
+   (set (match_dup 4) (const_int 65598))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(minus:DI (match_dup 4) (match_dup 3)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "popcnt %0 = %1"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "bswapdi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (bswap:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "mux1 %0 = %1, @rev"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*getf_exp_xf"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(unspec:DI [(match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_GETF_EXP))]
+  ""
+  "getf.exp %0 = %F1"
+  [(set_attr "itanium_class" "frfr")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 128-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(plus:TI (match_operand:TI 1 "gr_register_operand" "%r")
+		 (match_operand:TI 2 "gr_reg_or_14bit_operand" "rI")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(plus:TI (match_operand:TI 1 "register_operand" "")
+		 (match_operand:TI 2 "register_operand" "")))
+   (clobber (match_scratch:BI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (ltu:BI (match_dup 0) (match_dup 1)))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (plus:DI (match_dup 5) (match_dup 6))))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (plus:DI (match_dup 5) (match_dup 6))
+			    (const_int 1))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[6] = gen_highpart (DImode, operands[2]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(plus:TI (match_operand:TI 1 "register_operand" "")
+		 (match_operand:TI 2 "immediate_operand" "")))
+   (clobber (match_scratch:BI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (ltu:BI (match_dup 0) (match_dup 1)))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (match_dup 5) (match_dup 6))))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (match_dup 5) (match_dup 7))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[6] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
+  operands[7] = INTVAL (operands[2]) < 0 ? const0_rtx : const1_rtx;
+})
+
+(define_insn "subti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(minus:TI (match_operand:TI 1 "gr_reg_or_8bit_operand" "rK")
+		  (match_operand:TI 2 "gr_register_operand" "r")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "register_operand" "")
+		  (match_operand:TI 2 "register_operand" "")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  "reload_completed"
+  [(set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (ltu:BI (match_dup 1) (match_dup 0)))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (minus:DI (match_dup 5) (match_dup 6))))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (not:DI (match_dup 6)) (match_dup 5))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[6] = gen_highpart (DImode, operands[2]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "immediate_operand" "")
+		  (match_operand:TI 2 "register_operand" "")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  "reload_completed && satisfies_constraint_K (operands[1])"
+  [(set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (gtu:BI (match_dup 0) (match_dup 1)))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (minus:DI (match_dup 6) (match_dup 5))))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (minus:DI (match_dup 7) (match_dup 5))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[2]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+  operands[6] = INTVAL (operands[1]) < 0 ? GEN_INT (-2) : constm1_rtx;
+  operands[7] = INTVAL (operands[1]) < 0 ? constm1_rtx : const0_rtx;
+})
+
+(define_expand "mulditi3"
+  [(set (match_operand:TI 0 "fr_register_operand" "")
+	(mult:TI (sign_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" ""))
+		 (sign_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" ""))))]
+  ""
+  "")
+
+(define_insn_and_split "*mulditi3_internal"
+  [(set (match_operand:TI 0 "fr_register_operand" "=&f")
+	(mult:TI (sign_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		 (sign_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (truncate:DI
+			(lshiftrt:TI
+			  (mult:TI (sign_extend:TI (match_dup 1))
+				   (sign_extend:TI (match_dup 2)))
+			  (const_int 64))))]
+{
+  operands[3] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+}
+  [(set_attr "itanium_class" "unknown")])
+
+(define_expand "umulditi3"
+  [(set (match_operand:TI 0 "fr_register_operand" "")
+	(mult:TI (zero_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" ""))
+		 (zero_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" ""))))]
+  ""
+  "")
+
+(define_insn_and_split "*umulditi3_internal"
+  [(set (match_operand:TI 0 "fr_register_operand" "=&f")
+	(mult:TI (zero_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		 (zero_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (truncate:DI
+			(lshiftrt:TI
+			  (mult:TI (zero_extend:TI (match_dup 1))
+				   (zero_extend:TI (match_dup 2)))
+			  (const_int 64))))]
+{
+  operands[3] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+}
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn_and_split "negti2"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(neg:TI (match_operand:TI 1 "gr_register_operand" "r")))
+   (clobber (match_scratch:BI 2 "=&c"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (eq:BI (match_dup 1) (const_int 0)))
+   (set (match_dup 0) (minus:DI (const_int 0) (match_dup 1)))
+   (cond_exec (eq (match_dup 2) (const_int 0))
+	      (set (match_dup 3) (minus:DI (const_int -1) (match_dup 4))))
+   (cond_exec (ne (match_dup 2) (const_int 0))
+	      (set (match_dup 3) (minus:DI (const_int 0) (match_dup 4))))]
+{
+  operands[3] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[4] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+}
+  [(set_attr "itanium_class" "unknown")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "%fG")
+		 (match_operand:SF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fadd.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:SF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fsub.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fneg %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nabssf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnegabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  ""
+  "fmerge.s %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*ncopysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (unspec:SF [(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+			    (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  ""
+  "fmerge.ns %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(smin:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smaxsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(smax:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nmulsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+			 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+		(neg:SF
+		  (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fms.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG"))
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fnma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "%fG")
+		 (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fadd.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*adddf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (plus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "%fG")
+		   (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fadd.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fsub.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*subdf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (minus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fsub.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*muldf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		   (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fneg %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nabsdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnegabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "copysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  ""
+  "fmerge.s %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*ncopysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (unspec:DF [(match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+			    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  ""
+  "fmerge.ns %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smindf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(smin:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smaxdf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(smax:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nmuldf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+			 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*nmuldf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (neg:DF (mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+			   (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))))]
+  ""
+  "fnmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmadf4"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fma.d %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmadf_trunc_sf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmsdf4"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		(neg:DF
+		  (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fms.d %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmsdf_trunc_sf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (fma:DF
+	    (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+	    (neg:DF
+	      (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))]
+  ""
+  "fms.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmadf4"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(fma:DF (neg:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fnma.d %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fnmadf_trunc_sf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (fma:DF
+	    (neg:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))
+	    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fnma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 80-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(plus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "%fG")
+		 (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fadd %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*addxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (plus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "%fG")
+		   (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fadd.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*addxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (plus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "%fG")
+		   (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fadd.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "subxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(minus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		  (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fsub %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*subxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (minus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fsub.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*subxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (minus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fsub.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "mulxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fmpy %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*mulxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		   (match_operand:XF 2 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*mulxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		   (match_operand:XF 2 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "absxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(abs:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(neg:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fneg %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nabsxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(neg:XF (abs:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fnegabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "copysignxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  ""
+  "fmerge.s %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*ncopysignxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(neg:XF (unspec:XF [(match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+			    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  ""
+  "fmerge.ns %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(smin:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smaxxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(smax:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nmulxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(neg:XF (mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+			 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fnmpy %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*nmulxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (neg:XF (mult:XF
+		    (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))))]
+  ""
+  "fnmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*nmulxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (neg:XF (mult:XF
+		    (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))))]
+  ""
+  "fnmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmaxf4"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fma %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmaxf_trunc_<mode>"
+  [(set (match_operand:MODE_SDF 0 "fr_register_operand" "=f")
+	(float_truncate:MODE_SDF
+	  (fma:XF
+	    (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fma<suffix> %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmsxf4"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+		(neg:XF
+		  (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fms %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmsxf_trunc_<mode>"
+  [(set (match_operand:MODE_SDF 0 "fr_register_operand" "=f")
+	(float_truncate:MODE_SDF
+	  (fma:XF
+	    (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+	    (neg:XF
+	      (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))]
+  ""
+  "fms<suffix> %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmaxf4"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(fma:XF (neg:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG"))
+		(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fnma %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fnmaxf_trunc_<mode>"
+  [(set (match_operand:MODE_SDF 0 "fr_register_operand" "=f")
+	(float_truncate:MODE_SDF
+	  (fma:XF
+	    (neg:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG"))
+	    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fnma<suffix> %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(ashift:SI (match_operand:SI 1 "gr_register_operand" "")
+		   (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      /* Why oh why didn't Intel arrange for SHIFT_COUNT_TRUNCATED?  Now
+	 we've got to get rid of stray bits outside the SImode register.  */
+      rtx subshift = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (subshift, operands[2]));
+      operands[2] = subshift;
+    }
+})
+
+(define_insn "*ashlsi3_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "gr_register_operand" "r,r,r")
+		   (match_operand:DI 2 "gr_reg_or_5bit_operand" "R,n,r")))]
+  ""
+  "@
+   shladd %0 = %1, %2, r0
+   dep.z %0 = %1, %2, %E2
+   shl %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu,ishf,mmshf")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "gr_register_operand" "")
+		     (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  rtx subtarget = gen_reg_rtx (DImode);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    emit_insn (gen_extv (subtarget, gen_lowpart (DImode, operands[1]),
+			 GEN_INT (32 - INTVAL (operands[2])), operands[2]));
+  else
+    {
+      rtx subshift = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (subtarget, operands[1]));
+      emit_insn (gen_zero_extendsidi2 (subshift, operands[2]));
+      emit_insn (gen_ashrdi3 (subtarget, subtarget, subshift));
+    }
+  emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget);
+  DONE;
+})
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "gr_register_operand" "")
+		     (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  rtx subtarget = gen_reg_rtx (DImode);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    emit_insn (gen_extzv (subtarget, gen_lowpart (DImode, operands[1]),
+			  GEN_INT (32 - INTVAL (operands[2])), operands[2]));
+  else
+    {
+      rtx subshift = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (subtarget, operands[1]));
+      emit_insn (gen_zero_extendsidi2 (subshift, operands[2]));
+      emit_insn (gen_lshrdi3 (subtarget, subtarget, subshift));
+    }
+  emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget);
+  DONE;
+})
+
+;; Use mix4.r/shr to implement rotrsi3.  We only get 32 bits of valid result
+;; here, instead of 64 like the patterns above.  Keep the pattern together
+;; until after combine; otherwise it won't get matched often.
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "gr_register_operand" "")
+		     (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  if (GET_MODE (operands[2]) != VOIDmode)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (tmp, operands[2]));
+      operands[2] = tmp;
+    }
+})
+
+(define_insn_and_split "*rotrsi3_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=&r")
+	(rotatert:SI (match_operand:SI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "gr_reg_or_5bit_operand" "rM")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3)
+	(ior:DI (zero_extend:DI (match_dup 1))
+		(ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32))))
+   (set (match_dup 3)
+	(lshiftrt:DI (match_dup 3) (match_dup 2)))]
+  "operands[3] = gen_rtx_REG (DImode, REGNO (operands[0]));")
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(rotate:SI (match_operand:SI 1 "gr_register_operand" "")
+		   (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  if (! shift_32bit_count_operand (operands[2], SImode))
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (tmp, GEN_INT (32), operands[2]));
+      emit_insn (gen_rotrsi3 (operands[0], operands[1], tmp));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*rotlsi3_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "gr_register_operand" "r")
+		   (match_operand:SI 2 "shift_32bit_count_operand" "n")))]
+  ""
+  "mux2 %0 = %1, 0xe1"
+  "reload_completed && INTVAL (operands[2]) != 16"
+  [(set (match_dup 3)
+	(ior:DI (zero_extend:DI (match_dup 1))
+		(ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32))))
+   (set (match_dup 3)
+	(lshiftrt:DI (match_dup 3) (match_dup 2)))]
+{
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r,r")
+	(ashift:DI (match_operand:DI 1 "gr_register_operand" "r,r,r")
+		   (match_operand:DI 2 "gr_reg_or_6bit_operand" "R,r,rM")))]
+  ""
+  "@
+   shladd %0 = %1, %2, r0
+   shl %0 = %1, %2
+   shl %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu,mmshf,mmshfi")])
+
+;; ??? Maybe combine this with the multiply and add instruction?
+
+(define_insn "*shladd"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r")
+			  (match_operand:DI 2 "shladd_operand" "n"))
+		 (match_operand:DI 3 "gr_register_operand" "r")))]
+  ""
+  "shladd %0 = %1, %S2, %3"
+  [(set_attr "itanium_class" "ialu")])
+
+;; This can be created by register elimination if operand3 of shladd is an
+;; eliminable register or has reg_equiv_constant set.
+
+;; We have to use nonmemory_operand for operand 4, to ensure that the
+;; validate_changes call inside eliminate_regs will always succeed.  If it
+;; doesn't succeed, then this remain a shladd pattern, and will be reloaded
+;; incorrectly.
+
+(define_insn_and_split "*shladd_elim"
+  [(set (match_operand:DI 0 "gr_register_operand" "=&r")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r")
+				   (match_operand:DI 2 "shladd_operand" "n"))
+			  (match_operand:DI 3 "nonmemory_operand" "r"))
+		 (match_operand:DI 4 "nonmemory_operand" "rI")))]
+  "reload_in_progress"
+  "* gcc_unreachable ();"
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+			       (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  ""
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "gr_register_operand" "r,r")
+		     (match_operand:DI 2 "gr_reg_or_6bit_operand" "r,rM")))]
+  ""
+  "@
+   shr %0 = %1, %2
+   shr %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf,mmshfi")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "gr_register_operand" "r,r")
+		     (match_operand:DI 2 "gr_reg_or_6bit_operand" "r,rM")))]
+  ""
+  "@
+   shr.u %0 = %1, %2
+   shr.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf,mmshfi")])
+
+;; Using a predicate that accepts only constants doesn't work, because optabs
+;; will load the operand into a register and call the pattern if the predicate
+;; did not accept it on the first try.  So we use nonmemory_operand and then
+;; verify that we have an appropriate constant in the expander.
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "")
+	(rotatert:DI (match_operand:DI 1 "gr_register_operand" "")
+		     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (! shift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn "*rotrdi3_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(rotatert:DI (match_operand:DI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "shift_count_operand" "M")))]
+  ""
+  "shrp %0 = %1, %1, %2"
+  [(set_attr "itanium_class" "ishf")])
+
+(define_expand "rotldi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "")
+	(rotate:DI (match_operand:DI 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (! shift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn "*rotldi3_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(rotate:DI (match_operand:DI 1 "gr_register_operand" "r")
+		   (match_operand:DI 2 "shift_count_operand" "M")))]
+  ""
+  "shrp %0 = %1, %1, %e2"
+  [(set_attr "itanium_class" "ishf")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 128-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "ashlti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+	(ashift:TI (match_operand:TI 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (!dshift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn_and_split "*ashlti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(ashift:TI (match_operand:TI 1 "gr_register_operand" "r")
+		   (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT shift = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx lo = gen_lowpart (DImode, operands[1]);
+  rtx shiftlo = GEN_INT (shift & 63);
+
+  if (shift & 64)
+    {
+      emit_move_insn (rl, const0_rtx);
+      if (shift & 63)
+	emit_insn (gen_ashldi3 (rh, lo, shiftlo));
+      else
+	emit_move_insn (rh, lo);
+    }
+  else
+    {
+      rtx hi = gen_highpart (DImode, operands[1]);
+
+      emit_insn (gen_shrp (rh, hi, lo, GEN_INT (-shift & 63)));
+      emit_insn (gen_ashldi3 (rl, lo, shiftlo));
+    }
+  DONE;
+})
+
+(define_expand "ashrti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+	(ashiftrt:TI (match_operand:TI 1 "gr_register_operand" "")
+		     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (!dshift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn_and_split "*ashrti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(ashiftrt:TI (match_operand:TI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT shift = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx hi = gen_highpart (DImode, operands[1]);
+  rtx shiftlo = GEN_INT (shift & 63);
+
+  if (shift & 64)
+    {
+      if (shift & 63)
+	emit_insn (gen_ashrdi3 (rl, hi, shiftlo));
+      else
+	emit_move_insn (rl, hi);
+      emit_insn (gen_ashrdi3 (rh, hi, GEN_INT (63)));
+    }
+  else
+    {
+      rtx lo = gen_lowpart (DImode, operands[1]);
+
+      emit_insn (gen_shrp (rl, hi, lo, shiftlo));
+      emit_insn (gen_ashrdi3 (rh, hi, shiftlo));
+    }
+  DONE;
+})
+
+(define_expand "lshrti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+        (lshiftrt:TI (match_operand:TI 1 "gr_register_operand" "")
+                     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{ 
+  if (!dshift_count_operand (operands[2], DImode))
+    FAIL;
+}) 
+
+(define_insn_and_split "*lshrti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(lshiftrt:TI (match_operand:TI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT shift = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx hi = gen_highpart (DImode, operands[1]);
+  rtx shiftlo = GEN_INT (shift & 63);
+
+  if (shift & 64)
+    {
+      if (shift & 63)
+	emit_insn (gen_lshrdi3 (rl, hi, shiftlo));
+      else
+	emit_move_insn (rl, hi);
+      emit_move_insn (rh, const0_rtx);
+    }
+  else
+    {
+      rtx lo = gen_lowpart (DImode, operands[1]);
+
+      emit_insn (gen_shrp (rl, hi, lo, shiftlo));
+      emit_insn (gen_lshrdi3 (rh, hi, shiftlo));
+    }
+  DONE;
+})
+
+(define_expand "rotlti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+	(rotate:TI (match_operand:TI 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (! dshift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn_and_split "*rotlti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(rotate:TI (match_operand:TI 1 "gr_register_operand" "r")
+		   (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT count = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx lo = gen_lowpart (DImode, operands[1]);
+  rtx hi = gen_highpart (DImode, operands[1]);
+  rtx countlo = GEN_INT (-count & 63);
+
+  if (count & 64)
+    {
+      if (count & 63)
+	{
+	  emit_insn (gen_shrp (rl, hi, lo, countlo));
+	  emit_insn (gen_shrp (rh, lo, hi, countlo));
+	}
+      else
+	{
+	  emit_move_insn (rl, hi);
+	  emit_move_insn (rh, lo);
+	}
+    }
+  else
+    {
+      emit_insn (gen_shrp (rl, lo, hi, countlo));
+      emit_insn (gen_shrp (rh, hi, lo, countlo));
+    }
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn "shrp"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "gr_register_operand" "r")
+		    (match_operand:DI 2 "gr_register_operand" "r")
+		    (match_operand:DI 3 "shift_count_operand" "M")]
+		   UNSPEC_SHRP))]
+  ""
+  "shrp %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "ishf")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; We don't seem to need any other 32-bit logical operations, because gcc
+;; generates zero-extend;zero-extend;DImode-op, which combine optimizes to
+;; DImode-op;zero-extend, and then we can optimize away the zero-extend.
+;; This doesn't work for unary logical operations, because we don't call
+;; apply_distributive_law for them.
+
+;; ??? Likewise, this doesn't work for andnot, which isn't handled by
+;; apply_distributive_law.  We get inefficient code for
+;; int sub4 (int i, int j) { return i & ~j; }
+;; We could convert (and (not (sign_extend A)) (sign_extend B)) to
+;; (zero_extend (and (not A) B)) in combine.
+;; Or maybe fix this by adding andsi3/iorsi3/xorsi3 patterns like the
+;; one_cmplsi2 pattern.
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(not:SI (match_operand:SI 1 "gr_register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "itanium_class" "ilog")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(and:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f")
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   and %0 = %2, %1
+   fand %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "*andnot"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(and:DI (not:DI (match_operand:DI 1 "grfr_register_operand" "r,*f"))
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   andcm %0 = %2, %1
+   fandcm %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(ior:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f")
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   or %0 = %2, %1
+   for %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(xor:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f")
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   xor %0 = %2, %1
+   fxor %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(not:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "itanium_class" "ilog")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Comparisons
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cbranchbi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:BI 1 "register_operand" "")
+	                (match_operand:BI 2 "const_int_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SI 1 "gr_register_operand" "")
+	                (match_operand:SI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DI 1 "gr_register_operand" "")
+	                (match_operand:DI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchxf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:XF 1 "xfreg_or_fp01_operand" "")
+	                (match_operand:XF 2 "xfreg_or_fp01_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchtf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:TF 1 "gr_register_operand" "")
+	                (match_operand:TF 2 "gr_register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_HPUX"
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+
+(define_insn "*cmpsi_normal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "normal_comparison_operator"
+	   [(match_operand:SI 2 "gr_register_operand" "r")
+	    (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))]
+  ""
+  "cmp4.%C1 %0, %I0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+;; We use %r3 because it is possible for us to match a 0, and two of the
+;; unsigned comparisons don't accept immediate operands of zero.
+
+(define_insn "*cmpsi_adjusted"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "adjusted_comparison_operator"
+	   [(match_operand:SI 2 "gr_register_operand" "r")
+	    (match_operand:SI 3 "gr_reg_or_8bit_adjusted_operand" "rL")]))]
+  ""
+  "cmp4.%C1 %0, %I0 = %r3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_normal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "normal_comparison_operator"
+	   [(match_operand:DI 2 "gr_reg_or_0_operand" "rO")
+	    (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))]
+  ""
+  "cmp.%C1 %0, %I0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+;; We use %r3 because it is possible for us to match a 0, and two of the
+;; unsigned comparisons don't accept immediate operands of zero.
+
+(define_insn "*cmpdi_adjusted"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "adjusted_comparison_operator"
+	   [(match_operand:DI 2 "gr_register_operand" "r")
+	    (match_operand:DI 3 "gr_reg_or_8bit_adjusted_operand" "rL")]))]
+  ""
+  "cmp.%C1 %0, %I0 = %r3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsf_internal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "comparison_operator"
+	   [(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:SF 3 "fr_reg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "itanium_class" "fcmp")])
+
+(define_insn "*cmpdf_internal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "comparison_operator"
+	   [(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "itanium_class" "fcmp")])
+
+(define_insn "*cmpxf_internal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "comparison_operator"
+		   [(match_operand:XF 2 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 3 "xfreg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "itanium_class" "fcmp")])
+
+;; ??? Can this pattern be generated?
+
+(define_insn "*bit_zero"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(eq:BI (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+				(const_int 1)
+				(match_operand:DI 2 "shift_count_operand" "M"))
+	       (const_int 0)))]
+  ""
+  "tbit.z %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*bit_one"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ne:BI (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+				(const_int 1)
+				(match_operand:DI 2 "shift_count_operand" "M"))
+	       (const_int 0)))]
+  ""
+  "tbit.nz %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "*br_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "(%J0) br.cond%+ %l2"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+(define_insn "*br_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "(%j0) br.cond%+ %l2"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Counted loop operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))]	; label
+  ""
+{
+  emit_jump_insn (gen_doloop_end_internal (gen_rtx_REG (DImode, AR_LC_REGNUM),
+					   operands[1]));
+  DONE;
+})
+
+(define_insn "doloop_end_internal"
+  [(set (pc) (if_then_else (ne (match_operand:DI 0 "ar_lc_reg_operand" "")
+			       (const_int 0))
+		(label_ref (match_operand 1 "" ""))
+		(pc)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 0) (const_int 0))
+			 (plus:DI (match_dup 0) (const_int -1))
+			 (match_dup 0)))]
+  ""
+  "br.cloop.sptk.few %l1"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Set flag operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cstorebi4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:BI 2 "register_operand" "")
+	                (match_operand:BI 3 "const_int_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoresi4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:SI 2 "gr_register_operand" "")
+	                (match_operand:SI 3 "gr_reg_or_8bit_and_adjusted_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoredi4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:DI 2 "gr_register_operand" "")
+	                (match_operand:DI 3 "gr_reg_or_8bit_and_adjusted_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoresf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:SF 2 "fr_reg_or_fp01_operand" "")
+	                (match_operand:SF 3 "fr_reg_or_fp01_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoredf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:DF 2 "fr_reg_or_fp01_operand" "")
+	                (match_operand:DF 3 "fr_reg_or_fp01_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstorexf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:XF 2 "xfreg_or_fp01_operand" "")
+	                (match_operand:XF 3 "xfreg_or_fp01_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoretf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:TF 2 "gr_register_operand" "")
+	                (match_operand:TF 3 "gr_register_operand" "")]))]
+  "TARGET_HPUX"
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+;; Don't allow memory as destination here, because cmov/cmov/st is more
+;; efficient than mov/mov/cst/cst.
+
+(define_insn_and_split "*sne_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(ne:DI (match_operand:BI 1 "register_operand" "c")
+	       (const_int 0)))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))]
+  ""
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn_and_split "*seq_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(eq:DI (match_operand:BI 1 "register_operand" "c")
+	       (const_int 0)))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))]
+  ""
+  [(set_attr "itanium_class" "unknown")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional move instructions.
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? Add movXXcc patterns?
+
+;;
+;; DImode if_then_else patterns.
+;;
+
+(define_insn "*cmovdi_internal"
+  [(set (match_operand:DI 0 "not_postinc_destination_operand"
+	   "= r,  r,  r,   r,  r,  r,   r, r, r,   r, m, Q, *f,*b,*d*e")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand"
+		"c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")
+	     (const_int 0)])
+	  (match_operand:DI 2 "not_postinc_move_operand"
+	   "rim, *f, *b,*d*e,rim,rim, rim,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")
+	  (match_operand:DI 3 "not_postinc_move_operand"
+	   "rim,rim,rim, rim, *f, *b,*d*e,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")))]
+  "ia64_move_ok (operands[0], operands[2])
+   && ia64_move_ok (operands[0], operands[3])"
+  { gcc_unreachable (); }
+  [(set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand 0 "not_postinc_destination_operand" "")
+	(if_then_else
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "")
+	     (const_int 0)])
+	  (match_operand 2 "not_postinc_move_operand" "")
+	  (match_operand 3 "not_postinc_move_operand" "")))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  bool emitted_something = false;
+  rtx dest = operands[0];
+  rtx srct = operands[2];
+  rtx srcf = operands[3];
+  rtx cond = operands[4];
+
+  if (! rtx_equal_p (dest, srct))
+    {
+      ia64_emit_cond_move (dest, srct, cond);
+      emitted_something = true;
+    }
+  if (! rtx_equal_p (dest, srcf))
+    {
+      cond = gen_rtx_fmt_ee (GET_CODE (cond) == NE ? EQ : NE,
+ 			     VOIDmode, operands[1], const0_rtx);
+      ia64_emit_cond_move (dest, srcf, cond);
+      emitted_something = true;
+    }
+  if (! emitted_something)
+    emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+;; Absolute value pattern.
+
+(define_insn "*absdi2_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" "rI,rI"))
+	  (match_operand:DI 3 "gr_reg_or_22bit_operand" "0,rI")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "ialu,unknown")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:DI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed && rtx_equal_p (operands[0], operands[3])"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0)
+	  (neg:DI (match_dup 2))))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:DI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0) (neg:DI (match_dup 2))))
+   (cond_exec
+     (match_dup 5)
+     (set (match_dup 0) (match_dup 3)))]
+{
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+})
+
+;;
+;; SImode if_then_else patterns.
+;;
+
+(define_insn "*cmovsi_internal"
+  [(set (match_operand:SI 0 "not_postinc_destination_operand"
+		"=r,m,*f,r,m,*f,r,m,*f")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c,c,c,c,c,c,c,c")
+	     (const_int 0)])
+	  (match_operand:SI 2 "not_postinc_move_operand"
+		    "0,0,0,rim*f,rO,rO,rim*f,rO,rO")
+	  (match_operand:SI 3 "not_postinc_move_operand"
+		    "rim*f,rO,rO,0,0,0,rim*f,rO,rO")))]
+  "ia64_move_ok (operands[0], operands[2])
+   && ia64_move_ok (operands[0], operands[3])"
+  { gcc_unreachable (); }
+  [(set_attr "predicable" "no")])
+
+(define_insn "*abssi2_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r,r")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:SI (match_operand:SI 3 "gr_reg_or_22bit_operand" "rI,rI"))
+	  (match_operand:SI 2 "gr_reg_or_22bit_operand" "0,rI")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "ialu,unknown")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:SI (match_operand:SI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:SI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed && rtx_equal_p (operands[0], operands[3])"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0)
+	  (neg:SI (match_dup 2))))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:SI (match_operand:SI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:SI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0) (neg:SI (match_dup 2))))
+   (cond_exec
+     (match_dup 5)
+     (set (match_dup 0) (match_dup 3)))]
+{
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+})
+
+(define_insn_and_split "*cond_opsi2_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(match_operator:SI 5 "condop_operator"
+	  [(if_then_else:SI
+	     (match_operator 6 "predicate_operator"
+	       [(match_operand:BI 1 "register_operand" "c")
+	        (const_int 0)])
+	     (match_operand:SI 2 "gr_register_operand" "r")
+	     (match_operand:SI 3 "gr_register_operand" "r"))
+	   (match_operand:SI 4 "gr_register_operand" "r")]))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec
+     (match_dup 6)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 2) (match_dup 4)])))
+   (cond_exec
+     (match_dup 7)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 3) (match_dup 4)])))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[6]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+}
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+
+(define_insn_and_split "*cond_opsi2_internal_b"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(match_operator:SI 5 "condop_operator"
+	  [(match_operand:SI 4 "gr_register_operand" "r")
+	   (if_then_else:SI
+	     (match_operator 6 "predicate_operator"
+	       [(match_operand:BI 1 "register_operand" "c")
+	        (const_int 0)])
+	     (match_operand:SI 2 "gr_register_operand" "r")
+	     (match_operand:SI 3 "gr_register_operand" "r"))]))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec
+     (match_dup 6)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 4) (match_dup 2)])))
+   (cond_exec
+     (match_dup 7)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 4) (match_dup 3)])))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[6]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+}
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(use (match_operand:DI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  ia64_expand_call (NULL_RTX, operands[0], operands[2], false);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(use (match_operand:DI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  ia64_expand_call (NULL_RTX, operands[0], operands[2], true);
+  DONE;
+})
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands,
+;; the same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+;;
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:DI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+{
+  ia64_expand_call (operands[0], operands[1], operands[3], false);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:DI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+{
+  ia64_expand_call (operands[0], operands[1], operands[3], true);
+  DONE;
+})
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+(define_insn "call_nogp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,s"))
+	 (const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=b,b"))]
+  ""
+  "br.call%+.many %1 = %0"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_insn "call_value_nogp"
+  [(set (match_operand 0 "" "=X,X")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "?b,s"))
+	      (const_int 0)))
+   (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
+  ""
+  "br.call%+.many %2 = %1"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_insn "sibcall_nogp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,s"))
+	 (const_int 0))]
+  ""
+  "br%+.many %0"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_insn "call_gp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?r,s"))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" "=b,b"))
+   (clobber (match_scratch:DI 2 "=&r,X"))
+   (clobber (match_scratch:DI 3 "=b,X"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "br,scall")])
+
+;; Irritatingly, we don't have access to INSN within the split body.
+;; See commentary in ia64_split_call as to why these aren't peep2.
+(define_split
+  [(call (mem (match_operand 0 "call_operand" ""))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (match_scratch:DI 3 ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+		   operands[3], true, false);
+  DONE;
+})
+
+(define_split
+  [(call (mem (match_operand 0 "call_operand" ""))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (match_scratch:DI 3 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+		   operands[3], false, false);
+  DONE;
+})
+
+(define_insn "call_value_gp"
+  [(set (match_operand 0 "" "=X,X")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "?r,s"))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" "=b,b"))
+   (clobber (match_scratch:DI 3 "=&r,X"))
+   (clobber (match_scratch:DI 4 "=b,X"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:DI 4 ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+		   operands[4], true, false);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:DI 4 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+		   operands[4], false, false);
+  DONE;
+})
+
+(define_insn_and_split "sibcall_gp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?r,s"))
+	 (const_int 1))
+   (clobber (match_scratch:DI 1 "=&r,X"))
+   (clobber (match_scratch:DI 2 "=b,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], NULL_RTX, operands[1],
+		   operands[2], true, true);
+  DONE;
+}
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand:DI 0 "register_operand" "b"))]
+  ""
+  "br.ret.sptk.many %0"
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "return"
+  [(return)]
+  "ia64_direct_return ()"
+  "br.ret.sptk.many rp"
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "*return_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (return)
+		      (pc)))]
+  "ia64_direct_return ()"
+  "(%J0) br.ret%+.many rp"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+(define_insn "*return_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (pc)
+		      (return)))]
+  "ia64_direct_return ()"
+  "(%j0) br.ret%+.many rp"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "br %l0"
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "b"))]
+  ""
+  "br %0"
+  [(set_attr "itanium_class" "br")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "memory_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  rtx op0 = operands[0];
+  rtx addr;
+
+  /* ??? Bother -- do_tablejump is "helpful" and pulls the table
+     element into a register without bothering to see whether that
+     is necessary given the operand predicate.  Check for MEM just
+     in case someone fixes this.  */
+  if (GET_CODE (op0) == MEM)
+    addr = XEXP (op0, 0);
+  else
+    {
+      /* Otherwise, cheat and guess that the previous insn in the
+	 stream was the memory load.  Grab the address from that.
+	 Note we have to momentarily pop out of the sequence started
+	 by the insn-emit wrapper in order to grab the last insn.  */
+      rtx last, set;
+
+      end_sequence ();
+      last = get_last_insn ();
+      start_sequence ();
+      set = single_set (last);
+
+      gcc_assert (rtx_equal_p (SET_DEST (set), op0)
+		  && GET_CODE (SET_SRC (set)) == MEM);
+      addr = XEXP (SET_SRC (set), 0);
+      gcc_assert (!rtx_equal_p (addr, op0));
+    }
+
+  /* Jump table elements are stored pc-relative.  That is, a displacement
+     from the entry to the label.  Thus to convert to an absolute address
+     we add the address of the memory from which the value is loaded.  */
+  operands[0] = expand_simple_binop (DImode, PLUS, op0, addr,
+				     NULL_RTX, 0, OPTAB_DIRECT);
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:DI 0 "register_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "br %0"
+  [(set_attr "itanium_class" "br")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  ia64_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  ia64_expand_epilogue (0);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  ia64_expand_epilogue (1);
+  DONE;
+})
+
+;; This prevents the scheduler from moving the SP decrement past FP-relative
+;; stack accesses.  This is the same as adddi3 plus the extra set.
+;; Explicit predicated version of insn needed to check by CODE_FOR_
+;; in ia64_single_set, where despite of 2 sets this define_insn should be OK.
+
+(define_insn "prologue_allocate_stack"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,a")
+		 (match_operand:DI 2 "gr_reg_or_22bit_operand" "r,I,J")))
+   (set (match_operand:DI 3 "register_operand" "+r,r,r")
+	(match_dup 3))]
+  ""
+  "@
+   add %0 = %1, %2
+   adds %0 = %2, %1
+   addl %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+(define_insn "prologue_allocate_stack_pr"
+  [(cond_exec (match_operator 0 ("predicate_operator")
+		 [(match_operand:BI 1 ("register_operand") ("c,c,c"))
+		  (const_int 0)])
+	      (parallel
+	         [(set (match_operand:DI 2 "register_operand" "=r,r,r")
+		       (plus:DI (match_operand:DI 3 "register_operand" "%r,r,a")
+				(match_operand:DI 4 "gr_reg_or_22bit_operand" "r,I,J")))
+		  (set (match_operand:DI 5 "register_operand" "+r,r,r")
+		       (match_dup 5))]))]
+  ""
+  "@
+   (%J0) add %2 = %3, %4
+   (%J0) adds %2 = %3, %4
+   (%J0) addl %2 = %3, %4"
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+;; This prevents the scheduler from moving the SP restore past FP-relative
+;; stack accesses.  This is similar to movdi plus the extra set.
+;; Explicit predicated version of insn needed to check by CODE_FOR_
+;; in ia64_single_set, where despite of 2 sets this define_insn should be OK.
+
+(define_insn "epilogue_deallocate_stack"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "register_operand" "+r"))
+   (set (match_dup 1) (match_dup 1))]
+  ""
+  "mov %0 = %1"
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+(define_insn "epilogue_deallocate_stack_pr"
+  [(cond_exec (match_operator 0 ("predicate_operator")
+		 [(match_operand:BI 1 ("register_operand") ("c"))
+		  (const_int 0)])
+	      (parallel
+	         [(set (match_operand:DI 2 "register_operand" "=r")
+		       (match_operand:DI 3 "register_operand" "+r"))
+		  (set (match_dup 3) (match_dup 3))]))]
+  ""
+  "(%J0) mov %2 = %3"
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for EH handling
+(define_insn "prologue_use"
+  [(unspec:DI [(match_operand:DI 0 "register_operand" "")]
+	      UNSPEC_PROLOGUE_USE)]
+  ""
+  ""
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")
+   (set_attr "empty" "yes")])
+
+;; Allocate a new register frame.
+
+(define_insn "alloc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_ALLOC))
+   (use (match_operand:DI 1 "const_int_operand" "i"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (match_operand:DI 3 "const_int_operand" "i"))
+   (use (match_operand:DI 4 "const_int_operand" "i"))]
+  ""
+  "alloc %0 = ar.pfs, %1, %2, %3, %4"
+  [(set_attr "itanium_class" "syst_m0")
+   (set_attr "predicable" "no")
+   (set_attr "first_insn" "yes")])
+
+;; Modifies ar.unat
+(define_expand "gr_spill"
+  [(parallel [(set (match_operand:DI 0 "memory_operand" "=m")
+		   (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+			       (match_operand:DI 2 "const_int_operand" "")]
+			      UNSPEC_GR_SPILL))
+	      (clobber (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_REG (DImode, AR_UNAT_REGNUM);")
+
+(define_insn "gr_spill_internal"
+  [(set (match_operand:DI 0 "destination_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_GR_SPILL))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  ""
+{
+  /* Note that we use a C output pattern here to avoid the predicate
+     being automatically added before the .mem.offset directive.  */
+  return ".mem.offset %2, 0\;%,st8.spill %0 = %1%P0";
+}
+  [(set_attr "itanium_class" "st")])
+
+;; Reads ar.unat
+(define_expand "gr_restore"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
+		   (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+			       (match_operand:DI 2 "const_int_operand" "")]
+			      UNSPEC_GR_RESTORE))
+	      (use (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_REG (DImode, AR_UNAT_REGNUM);")
+
+(define_insn "gr_restore_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_GR_RESTORE))
+   (use (match_operand:DI 3 "register_operand" ""))]
+  ""
+  { return ".mem.offset %2, 0\;%,ld8.fill %0 = %1%P1"; }
+  [(set_attr "itanium_class" "ld")])
+
+(define_insn "fr_spill"
+  [(set (match_operand:XF 0 "destination_operand" "=m")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "f")]
+		   UNSPEC_FR_SPILL))]
+  ""
+  "stf.spill %0 = %1%P0"
+  [(set_attr "itanium_class" "stf")])
+
+(define_insn "fr_restore"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "memory_operand" "m")]
+		   UNSPEC_FR_RESTORE))]
+  ""
+  "ldf.fill %0 = %1%P1"
+  [(set_attr "itanium_class" "fld")])
+
+;; ??? The explicit stop is not ideal.  It would be better if
+;; rtx_needs_barrier took care of this, but this is something that can be
+;; fixed later.  This avoids an RSE DV.
+
+(define_insn "bsp_value"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_BSP_VALUE))]
+  ""
+  "*
+{
+  return \";;\;%,mov %0 = ar.bsp\";
+}"
+  [(set_attr "itanium_class" "frar_i")])
+
+(define_insn "set_bsp"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+		    UNSPECV_SET_BSP)]
+  ""
+  "flushrs
+	mov r19=ar.rsc
+	;;
+	and r19=0x1c,r19
+	;;
+	mov ar.rsc=r19
+	;;
+	mov ar.bspstore=%0
+	;;
+	or r19=0x3,r19
+	;;
+	loadrs
+	invala
+	;;
+	mov ar.rsc=r19"
+  [(set_attr "itanium_class" "unknown")
+   (set_attr "predicable" "no")])
+
+;; ??? The explicit stops are not ideal.  It would be better if
+;; rtx_needs_barrier took care of this, but this is something that can be
+;; fixed later.  This avoids an RSE DV.
+
+(define_insn "flushrs"
+  [(unspec [(const_int 0)] UNSPEC_FLUSHRS)]
+  ""
+  ";;\;flushrs\;;;"
+  [(set_attr "itanium_class" "rse_m")
+   (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? Emitting a NOP instruction isn't very useful.  This should probably
+;; be emitting ";;" to force a break in the instruction packing.
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop 0"
+  [(set_attr "itanium_class" "nop")])
+
+(define_insn "nop_m"
+  [(const_int 1)]
+  ""
+  "nop.m 0"
+  [(set_attr "itanium_class" "nop_m")])
+
+(define_insn "nop_i"
+  [(const_int 2)]
+  ""
+  "nop.i 0"
+  [(set_attr "itanium_class" "nop_i")])
+
+(define_insn "nop_f"
+  [(const_int 3)]
+  ""
+  "nop.f 0"
+  [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "nop_b"
+  [(const_int 4)]
+  ""
+  "nop.b 0"
+  [(set_attr "itanium_class" "nop_b")])
+
+(define_insn "nop_x"
+  [(const_int 5)]
+  ""
+  ""
+  [(set_attr "itanium_class" "nop_x")
+   (set_attr "empty" "yes")])
+
+;; The following insn will be never generated.  It is used only by
+;; insn scheduler to change state before advancing cycle.
+(define_insn "pre_cycle"
+  [(const_int 6)]
+  ""
+  ""
+  [(set_attr "itanium_class" "pre_cycle")])
+
+(define_insn "bundle_selector"
+  [(unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BUNDLE_SELECTOR)]
+  ""
+  { return get_bundle_name (INTVAL (operands[0])); }
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+(define_insn "insn_group_barrier"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
+		    UNSPECV_INSN_GROUP_BARRIER)]
+  ""
+  ";;"
+  [(set_attr "itanium_class" "stop_bit")
+   (set_attr "predicable" "no")
+   (set_attr "empty" "yes")])
+
+(define_expand "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "")
+
+;; ??? We don't have a match-any slot type.  Setting the type to unknown
+;; produces worse code that setting the slot type to A.
+
+(define_insn "*trap"
+  [(trap_if (const_int 1) (match_operand 0 "const_int_operand" ""))]
+  ""
+  "break %0"
+  [(set_attr "itanium_class" "chk_s_i")])
+
+(define_expand "ctrapbi4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:BI 1 "register_operand" "")
+	                (match_operand:BI 2 "const_int_operand" "")])
+		      (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapsi4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SI 1 "gr_register_operand" "")
+	                (match_operand:SI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapdi4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DI 1 "gr_register_operand" "")
+	                (match_operand:DI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapsf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapdf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapxf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:XF 1 "xfreg_or_fp01_operand" "")
+	                (match_operand:XF 2 "xfreg_or_fp01_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctraptf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:TF 1 "gr_register_operand" "")
+	                (match_operand:TF 2 "gr_register_operand" "")])
+		       (match_operand 3 "" ""))]
+  "TARGET_HPUX"
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+
+(define_insn "*conditional_trap"
+  [(trap_if (match_operator 0 "predicate_operator"
+	      [(match_operand:BI 1 "register_operand" "c")
+	       (const_int 0)])  
+	    (match_operand 2 "const_int_operand" ""))]
+  ""
+  "(%J0) break %2"
+  [(set_attr "itanium_class" "chk_s_i")
+   (set_attr "predicable" "no")])
+
+(define_insn "break_f"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BREAK)]
+  ""
+  "break.f 0"
+  [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  ""
+{
+  static const char * const alt[2][4] = {
+    {
+      "%,lfetch.nta [%0]",
+      "%,lfetch.nt1 [%0]",
+      "%,lfetch.nt2 [%0]",
+      "%,lfetch [%0]"
+    },
+    {
+      "%,lfetch.excl.nta [%0]",
+      "%,lfetch.excl.nt1 [%0]",
+      "%,lfetch.excl.nt2 [%0]",
+      "%,lfetch.excl [%0]"
+    }
+  };
+  int i = (INTVAL (operands[1]));
+  int j = (INTVAL (operands[2]));
+
+  gcc_assert (i == 0 || i == 1);
+  gcc_assert (j >= 0 && j <= 3);
+  return alt[i][j];
+}
+  [(set_attr "itanium_class" "lfetch")])
+
+;; Non-local goto support.
+
+(define_expand "save_stack_nonlocal"
+  [(use (match_operand:OI 0 "memory_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))]
+  ""
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode,
+					 \"__ia64_save_stack_nonlocal\"),
+		     LCT_NORMAL, VOIDmode, 2, XEXP (operands[0], 0), Pmode,
+		     operands[1], Pmode);
+  DONE;
+})
+
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, \"__ia64_nonlocal_goto\"),
+		     LCT_NORETURN, VOIDmode, 3,
+		     operands[1], Pmode,
+		     copy_to_reg (XEXP (operands[2], 0)), Pmode,
+		     operands[3], Pmode);
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "nonlocal_goto_receiver"
+  [(unspec_volatile [(const_int 0)] UNSPECV_GOTO_RECEIVER)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_reload_gp ();
+  DONE;
+})
+
+(define_insn_and_split "builtin_setjmp_receiver"
+  [(unspec_volatile [(match_operand:DI 0 "" "")] UNSPECV_SETJMP_RECEIVER)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_reload_gp ();
+  DONE;
+})
+
+(define_expand "eh_epilogue"
+  [(use (match_operand:DI 0 "register_operand" "r"))
+   (use (match_operand:DI 1 "register_operand" "r"))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  ""
+{
+  rtx bsp = gen_rtx_REG (Pmode, 10);
+  rtx sp = gen_rtx_REG (Pmode, 9);
+
+  if (GET_CODE (operands[0]) != REG || REGNO (operands[0]) != 10)
+    {
+      emit_move_insn (bsp, operands[0]);
+      operands[0] = bsp;
+    }
+  if (GET_CODE (operands[2]) != REG || REGNO (operands[2]) != 9)
+    {
+      emit_move_insn (sp, operands[2]);
+      operands[2] = sp;
+    }
+  emit_use (sp);
+  emit_use (bsp);
+
+  cfun->machine->ia64_eh_epilogue_sp = sp;
+  cfun->machine->ia64_eh_epilogue_bsp = bsp;
+})
+
+;; Builtin apply support.
+
+(define_expand "restore_stack_nonlocal"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:OI 1 "memory_operand" ""))]
+  ""
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode,
+					 "__ia64_restore_stack_nonlocal"),
+		     LCT_NORMAL, VOIDmode, 1,
+		     copy_to_reg (XEXP (operands[1], 0)), Pmode);
+  DONE;
+})
+
+
+;; Predication.
+
+(define_cond_exec
+  [(match_operator 0 "predicate_operator"
+     [(match_operand:BI 1 "register_operand" "c")
+      (const_int 0)])]
+  ""
+  "(%J0)")
+
+(define_insn "pred_rel_mutex"
+  [(set (match_operand:BI 0 "register_operand" "+c")
+       (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  ""
+  ".pred.rel.mutex %0, %I0"
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+(define_insn "safe_across_calls_all"
+  [(unspec_volatile [(const_int 0)] UNSPECV_PSAC_ALL)]
+  ""
+  ".pred.safe_across_calls p1-p63"
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+(define_insn "safe_across_calls_normal"
+  [(unspec_volatile [(const_int 0)] UNSPECV_PSAC_NORMAL)]
+  ""
+{
+  emit_safe_across_calls ();
+  return "";
+}
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+;; UNSPEC instruction definition to "swizzle" 32-bit pointer into 64-bit
+;; pointer.  This is used by the HP-UX 32 bit mode.
+
+(define_insn "ptr_extend"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (unspec:DI [(match_operand:SI 1 "gr_register_operand" "r")]
+		   UNSPEC_ADDP4))]
+  ""
+  "addp4 %0 = 0,%1"
+  [(set_attr "itanium_class" "ialu")])
+
+;;
+;; Optimizations for ptr_extend
+
+(define_insn "ptr_extend_plus_imm"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (unspec:DI
+         [(plus:SI (match_operand:SI 1 "basereg_operand" "r")
+                   (match_operand:SI 2 "gr_reg_or_14bit_operand" "rI"))]
+         UNSPEC_ADDP4))]
+  "addp4_optimize_ok (operands[1], operands[2])"
+  "addp4 %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*ptr_extend_plus_2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (unspec:DI
+         [(plus:SI (match_operand:SI 1 "gr_register_operand" "r")
+                   (match_operand:SI 2 "basereg_operand" "r"))]
+         UNSPEC_ADDP4))]
+  "addp4_optimize_ok (operands[1], operands[2])"
+  "addp4 %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu")])
+
+;;
+;; Get instruction pointer
+
+(define_insn "ip_value"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (pc))]
+ ""
+ "mov %0 = ip"
+  [(set_attr "itanium_class" "frbr")])
+
+;;
+;; Stack checking
+
+(define_insn "probe_stack_address"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+		    UNSPECV_PROBE_STACK_ADDRESS)]
+  ""
+  "probe.w.fault %0, 0"
+[(set_attr "itanium_class" "chk_s_i")])
+
+(define_insn "probe_stack_range"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
+			     (match_operand:DI 2 "register_operand" "r")]
+			     UNSPECV_PROBE_STACK_RANGE))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "itanium_class" "unknown")
+   (set_attr "predicable" "no")])
+
+;; Vector operations
+(include "vect.md")
+;; Atomic operations
+(include "sync.md")
+;; New division operations
+(include "div.md")
diff --git a/gcc-4.9/gcc/config/ia64/ia64.opt b/gcc-4.9/gcc/config/ia64/ia64.opt
new file mode 100644
index 000000000..0fd439226
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64.opt
@@ -0,0 +1,198 @@
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/ia64/ia64-opts.h
+
+; Which cpu are we scheduling for.
+Variable
+enum processor_type ia64_tune = PROCESSOR_ITANIUM2
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Generate big endian code
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_ENDIAN)
+Generate little endian code
+
+mgnu-as
+Target Report Mask(GNU_AS)
+Generate code for GNU as
+
+mgnu-ld
+Target Report Mask(GNU_LD)
+Generate code for GNU ld
+
+mvolatile-asm-stop
+Target Report Mask(VOL_ASM_STOP)
+Emit stop bits before and after volatile extended asms
+
+mregister-names
+Target Mask(REG_NAMES)
+Use in/loc/out register names
+
+mno-sdata
+Target Report RejectNegative Mask(NO_SDATA)
+
+msdata
+Target Report RejectNegative InverseMask(NO_SDATA)
+Enable use of sdata/scommon/sbss
+
+mno-pic
+Target Report RejectNegative Mask(NO_PIC)
+Generate code without GP reg
+
+mconstant-gp
+Target Report RejectNegative Mask(CONST_GP)
+gp is constant (but save/restore gp on indirect calls)
+
+mauto-pic
+Target Report RejectNegative Mask(AUTO_PIC)
+Generate self-relocatable code
+
+minline-float-divide-min-latency
+Target Report RejectNegative Var(TARGET_INLINE_FLOAT_DIV, 1)
+Generate inline floating point division, optimize for latency
+
+minline-float-divide-max-throughput
+Target Report RejectNegative Var(TARGET_INLINE_FLOAT_DIV, 2) Init(2)
+Generate inline floating point division, optimize for throughput
+
+mno-inline-float-divide
+Target Report RejectNegative Var(TARGET_INLINE_FLOAT_DIV, 0)
+
+minline-int-divide-min-latency
+Target Report RejectNegative Var(TARGET_INLINE_INT_DIV, 1)
+Generate inline integer division, optimize for latency
+
+minline-int-divide-max-throughput
+Target Report RejectNegative Var(TARGET_INLINE_INT_DIV, 2)
+Generate inline integer division, optimize for throughput
+
+mno-inline-int-divide
+Target Report RejectNegative Var(TARGET_INLINE_INT_DIV, 0)
+Do not inline integer division
+
+minline-sqrt-min-latency
+Target Report RejectNegative Var(TARGET_INLINE_SQRT, 1)
+Generate inline square root, optimize for latency
+
+minline-sqrt-max-throughput
+Target Report RejectNegative Var(TARGET_INLINE_SQRT, 2)
+Generate inline square root, optimize for throughput
+
+mno-inline-sqrt
+Target Report RejectNegative Var(TARGET_INLINE_SQRT, 0)
+Do not inline square root
+
+mdwarf2-asm
+Target Report Mask(DWARF2_ASM)
+Enable Dwarf 2 line debug info via GNU as
+
+mearly-stop-bits
+Target Report Mask(EARLY_STOP_BITS)
+Enable earlier placing stop bits for better scheduling
+
+mfixed-range=
+Target RejectNegative Joined Var(ia64_deferred_options) Defer
+Specify range of registers to make fixed
+
+mtls-size=
+Target RejectNegative Joined UInteger Var(ia64_tls_size) Init(22)
+Specify bit size of immediate TLS offsets
+
+mtune=
+Target RejectNegative Joined Enum(ia64_tune) Var(ia64_tune)
+Schedule code for given CPU
+
+Enum
+Name(ia64_tune) Type(enum processor_type)
+Known Itanium CPUs (for use with the -mtune= option):
+
+EnumValue
+Enum(ia64_tune) String(itanium2) Value(PROCESSOR_ITANIUM2)
+
+EnumValue
+Enum(ia64_tune) String(mckinley) Value(PROCESSOR_ITANIUM2)
+
+msched-br-data-spec
+Target Report Var(mflag_sched_br_data_spec) Init(0)
+Use data speculation before reload
+
+msched-ar-data-spec
+Target Report Var(mflag_sched_ar_data_spec) Init(1)
+Use data speculation after reload
+
+msched-control-spec
+Target Report Var(mflag_sched_control_spec) Init(2)
+Use control speculation
+
+msched-br-in-data-spec
+Target Report Var(mflag_sched_br_in_data_spec) Init(1)
+Use in block data speculation before reload
+
+msched-ar-in-data-spec
+Target Report Var(mflag_sched_ar_in_data_spec) Init(1)
+Use in block data speculation after reload
+
+msched-in-control-spec
+Target Report Var(mflag_sched_in_control_spec) Init(1)
+Use in block control speculation
+
+msched-spec-ldc
+Target Report Var(mflag_sched_spec_ldc) Init(1)
+Use simple data speculation check
+
+msched-spec-control-ldc
+Target Report Var(mflag_sched_spec_control_ldc) Init(0)
+Use simple data speculation check for control speculation
+
+msched-prefer-non-data-spec-insns
+Target Report Var(mflag_sched_prefer_non_data_spec_insns) Init(0)
+If set, data speculative instructions will be chosen for schedule only if there are no other choices at the moment 
+
+msched-prefer-non-control-spec-insns
+Target Report Var(mflag_sched_prefer_non_control_spec_insns) Init(0)
+If set, control speculative instructions will be chosen for schedule only if there are no other choices at the moment 
+
+msched-count-spec-in-critical-path
+Target Report Var(mflag_sched_count_spec_in_critical_path) Init(0)
+Count speculative dependencies while calculating priority of instructions
+
+msched-stop-bits-after-every-cycle
+Target Report Var(mflag_sched_stop_bits_after_every_cycle) Init(1)
+Place a stop bit after every cycle when scheduling
+
+msched-fp-mem-deps-zero-cost
+Target Report Var(mflag_sched_fp_mem_deps_zero_cost) Init(0)
+Assume that floating-point stores and loads are not likely to cause conflict when placed into one instruction group
+
+msched-max-memory-insns=
+Target RejectNegative Joined UInteger Var(ia64_max_memory_insns) Init(1)
+Soft limit on number of memory insns per instruction group, giving lower priority to subsequent memory insns attempting to schedule in the same insn group. Frequently useful to prevent cache bank conflicts.  Default value is 1
+
+msched-max-memory-insns-hard-limit
+Target Report Var(mflag_sched_mem_insns_hard_limit) Init(0)
+Disallow more than 'msched-max-memory-insns' in instruction group. Otherwise, limit is 'soft' (prefer non-memory operations when limit is reached)
+
+msel-sched-dont-check-control-spec
+Target Report Var(mflag_sel_sched_dont_check_control_spec) Init(0)
+Don't generate checks for control speculation in selective scheduling
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/ia64/ia64intrin.h b/gcc-4.9/gcc/config/ia64/ia64intrin.h
new file mode 100644
index 000000000..fba7296aa
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ia64intrin.h
@@ -0,0 +1,2 @@
+/* Overloaded builtins have been ported to C++: nothing is needed
+   in the header anymore.  This file intentionally left void.  */
diff --git a/gcc-4.9/gcc/config/ia64/ilp32.opt b/gcc-4.9/gcc/config/ia64/ilp32.opt
new file mode 100644
index 000000000..bcb64737e
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/ilp32.opt
@@ -0,0 +1,7 @@
+milp32
+Target Report RejectNegative Mask(ILP32)
+Generate ILP32 code
+
+mlp64
+Target Report RejectNegative InverseMask(ILP32)
+Generate LP64 code
diff --git a/gcc-4.9/gcc/config/ia64/itanium2.md b/gcc-4.9/gcc/config/ia64/itanium2.md
new file mode 100644
index 000000000..9649801ff
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/itanium2.md
@@ -0,0 +1,1867 @@
+;; Itanium2 DFA descriptions for insn scheduling and bundling.
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;; Contributed by Vladimir Makarov <vmakarov@redhat.com>.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+
+/* This is description of pipeline hazards based on DFA.  The
+   following constructions can be used for this:
+   
+   o define_cpu_unit string [string]) describes a cpu functional unit
+     (separated by comma).
+
+     1st operand: Names of cpu function units.
+     2nd operand: Name of automaton (see comments for
+     DEFINE_AUTOMATON).
+
+     All define_reservations and define_cpu_units should have unique
+     names which cannot be "nothing".
+
+   o (exclusion_set string string) means that each CPU function unit
+     in the first string cannot be reserved simultaneously with each
+     unit whose name is in the second string and vise versa.  CPU
+     units in the string are separated by commas. For example, it is
+     useful for description CPU with fully pipelined floating point
+     functional unit which can execute simultaneously only single
+     floating point insns or only double floating point insns.
+
+   o (presence_set string string) means that each CPU function unit in
+     the first string cannot be reserved unless at least one of
+     pattern of units whose names are in the second string is
+     reserved.  This is an asymmetric relation.  CPU units or unit
+     patterns in the strings are separated by commas.  Pattern is one
+     unit name or unit names separated by white-spaces.
+ 
+     For example, it is useful for description that slot1 is reserved
+     after slot0 reservation for a VLIW processor.  We could describe
+     it by the following construction
+
+         (presence_set "slot1" "slot0")
+
+     Or slot1 is reserved only after slot0 and unit b0 reservation.
+     In this case we could write
+
+         (presence_set "slot1" "slot0 b0")
+
+     All CPU functional units in a set should belong to the same
+     automaton.
+
+   o (final_presence_set string string) is analogous to
+     `presence_set'.  The difference between them is when checking is
+     done.  When an instruction is issued in given automaton state
+     reflecting all current and planned unit reservations, the
+     automaton state is changed.  The first state is a source state,
+     the second one is a result state.  Checking for `presence_set' is
+     done on the source state reservation, checking for
+     `final_presence_set' is done on the result reservation.  This
+     construction is useful to describe a reservation which is
+     actually two subsequent reservations.  For example, if we use
+
+         (presence_set "slot1" "slot0")
+
+     the following insn will be never issued (because slot1 requires
+     slot0 which is absent in the source state).
+
+         (define_reservation "insn_and_nop" "slot0 + slot1")
+
+     but it can be issued if we use analogous `final_presence_set'.
+
+   o (absence_set string string) means that each CPU function unit in
+     the first string can be reserved only if each pattern of units
+     whose names are in the second string is not reserved.  This is an
+     asymmetric relation (actually exclusion set is analogous to this
+     one but it is symmetric).  CPU units or unit patterns in the
+     string are separated by commas.  Pattern is one unit name or unit
+     names separated by white-spaces.
+
+     For example, it is useful for description that slot0 cannot be
+     reserved after slot1 or slot2 reservation for a VLIW processor.
+     We could describe it by the following construction
+
+        (absence_set "slot2" "slot0, slot1")
+
+     Or slot2 cannot be reserved if slot0 and unit b0 are reserved or
+     slot1 and unit b1 are reserved .  In this case we could write
+
+        (absence_set "slot2" "slot0 b0, slot1 b1")
+
+     All CPU functional units in a set should to belong the same
+     automaton.
+
+   o (final_absence_set string string) is analogous to `absence_set' but
+     checking is done on the result (state) reservation.  See comments
+     for final_presence_set.
+
+   o (define_bypass number out_insn_names in_insn_names) names bypass with
+     given latency (the first number) from insns given by the first
+     string (see define_insn_reservation) into insns given by the
+     second string.  Insn names in the strings are separated by
+     commas.
+
+   o (define_automaton string) describes names of an automaton
+     generated and used for pipeline hazards recognition.  The names
+     are separated by comma.  Actually it is possibly to generate the
+     single automaton but unfortunately it can be very large.  If we
+     use more one automata, the summary size of the automata usually
+     is less than the single one.  The automaton name is used in
+     define_cpu_unit.  All automata should have unique names.
+
+   o (automata_option string) describes option for generation of
+     automata.  Currently there are the following options:
+
+     o "no-minimization" which makes no minimization of automata.
+       This is only worth to do when we are debugging the description
+       and need to look more accurately at reservations of states.
+
+     o "ndfa" which makes automata with nondeterministic reservation
+        by insns.
+
+   o (define_reservation string string) names reservation (the first
+     string) of cpu functional units (the 2nd string).  Sometimes unit
+     reservations for different insns contain common parts.  In such
+     case, you describe common part and use one its name (the 1st
+     parameter) in regular expression in define_insn_reservation.  All
+     define_reservations, define results and define_cpu_units should
+     have unique names which cannot be "nothing".
+
+   o (define_insn_reservation name default_latency condition regexpr)
+     describes reservation of cpu functional units (the 3nd operand)
+     for instruction which is selected by the condition (the 2nd
+     parameter).  The first parameter is used for output of debugging
+     information.  The reservations are described by a regular
+     expression according the following syntax:
+
+       regexp = regexp "," oneof
+              | oneof
+
+       oneof = oneof "|" allof
+             | allof
+
+       allof = allof "+" repeat
+             | repeat
+ 
+       repeat = element "*" number
+              | element
+
+       element = cpu_function_name
+               | reservation_name
+               | result_name
+               | "nothing"
+               | "(" regexp ")"
+
+       1. "," is used for describing start of the next cycle in
+          reservation.
+
+       2. "|" is used for describing the reservation described by the
+          first regular expression *or* the reservation described by
+          the second regular expression *or* etc.
+
+       3. "+" is used for describing the reservation described by the
+          first regular expression *and* the reservation described by
+          the second regular expression *and* etc.
+
+       4. "*" is used for convenience and simply means sequence in
+          which the regular expression are repeated NUMBER times with
+          cycle advancing (see ",").
+
+       5. cpu function unit name which means reservation.
+
+       6. reservation name -- see define_reservation.
+
+       7. string "nothing" means no units reservation.
+
+*/
+
+(define_automaton "two")
+
+;;   All possible combinations of bundles/syllables
+(define_cpu_unit "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+                  2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx" "two")
+(define_cpu_unit "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\
+                  2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx." "two")
+(define_cpu_unit "2_0mii., 2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\
+                  2_0mib., 2_0mmb., 2_0mfb." "two")
+
+(define_cpu_unit "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\
+                  2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx" "two")
+(define_cpu_unit "2_1mi.i, 2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\
+                  2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx." "two")
+(define_cpu_unit "2_1mii., 2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\
+                  2_1mib., 2_1mmb., 2_1mfb." "two")
+
+;; Slot 1
+(exclusion_set "2_0m.ii" "2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+                          2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.mi" "2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib,\
+                          2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.fi" "2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb,\
+                          2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.mf" "2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb,\
+	                  2_0m.lx")
+(exclusion_set "2_0b.bb" "2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.bb" "2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.ib" "2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.mb" "2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.fb" "2_0m.lx")
+
+;; Slot 2
+(exclusion_set "2_0mi.i" "2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\
+                          2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mm.i" "2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\
+                          2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mf.i" "2_0mm.f, 2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b,\
+                          2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mm.f" "2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b,\
+                          2_0mlx.")
+(exclusion_set "2_0bb.b" "2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mb.b" "2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mi.b" "2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mm.b" "2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mf.b" "2_0mlx.")
+
+;; Slot 3
+(exclusion_set "2_0mii." "2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\
+                          2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mmi." "2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\
+                          2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mfi." "2_0mmf., 2_0bbb., 2_0mbb., 2_0mib., 2_0mmb.,\
+                          2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mmf." "2_0bbb., 2_0mbb., 2_0mib., 2_0mmb., 2_0mfb.,\
+                          2_0mlx.")
+(exclusion_set "2_0bbb." "2_0mbb., 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mbb." "2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mib." "2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mmb." "2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mfb." "2_0mlx.")
+
+;; Slot 4
+(exclusion_set "2_1m.ii" "2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\
+                          2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.mi" "2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib,\
+                          2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.fi" "2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb,\
+                          2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.mf" "2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb,\
+                          2_1m.lx")
+(exclusion_set "2_1b.bb" "2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.bb" "2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.ib" "2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.mb" "2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.fb" "2_1m.lx")
+
+;; Slot 5
+(exclusion_set "2_1mi.i" "2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\
+                          2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mm.i" "2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\
+                          2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mf.i" "2_1mm.f, 2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b,\
+                          2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mm.f" "2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b,\
+                          2_1mlx.")
+(exclusion_set "2_1bb.b" "2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mb.b" "2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mi.b" "2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mm.b" "2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mf.b" "2_1mlx.")
+
+;; Slot 6
+(exclusion_set "2_1mii." "2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\
+                          2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mmi." "2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\
+                          2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mfi." "2_1mmf., 2_1bbb., 2_1mbb., 2_1mib., 2_1mmb.,\
+                          2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mmf." "2_1bbb., 2_1mbb., 2_1mib., 2_1mmb., 2_1mfb.,\
+                          2_1mlx.")
+(exclusion_set "2_1bbb." "2_1mbb., 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mbb." "2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mib." "2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mmb." "2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mfb." "2_1mlx.")
+
+(final_presence_set "2_0mi.i" "2_0m.ii")
+(final_presence_set "2_0mii." "2_0mi.i")
+(final_presence_set "2_1mi.i" "2_1m.ii")
+(final_presence_set "2_1mii." "2_1mi.i")
+
+(final_presence_set "2_0mm.i" "2_0m.mi")
+(final_presence_set "2_0mmi." "2_0mm.i")
+(final_presence_set "2_1mm.i" "2_1m.mi")
+(final_presence_set "2_1mmi." "2_1mm.i")
+
+(final_presence_set "2_0mf.i" "2_0m.fi")
+(final_presence_set "2_0mfi." "2_0mf.i")
+(final_presence_set "2_1mf.i" "2_1m.fi")
+(final_presence_set "2_1mfi." "2_1mf.i")
+
+(final_presence_set "2_0mm.f" "2_0m.mf")
+(final_presence_set "2_0mmf." "2_0mm.f")
+(final_presence_set "2_1mm.f" "2_1m.mf")
+(final_presence_set "2_1mmf." "2_1mm.f")
+
+(final_presence_set "2_0bb.b" "2_0b.bb")
+(final_presence_set "2_0bbb." "2_0bb.b")
+(final_presence_set "2_1bb.b" "2_1b.bb")
+(final_presence_set "2_1bbb." "2_1bb.b")
+
+(final_presence_set "2_0mb.b" "2_0m.bb")
+(final_presence_set "2_0mbb." "2_0mb.b")
+(final_presence_set "2_1mb.b" "2_1m.bb")
+(final_presence_set "2_1mbb." "2_1mb.b")
+
+(final_presence_set "2_0mi.b" "2_0m.ib")
+(final_presence_set "2_0mib." "2_0mi.b")
+(final_presence_set "2_1mi.b" "2_1m.ib")
+(final_presence_set "2_1mib." "2_1mi.b")
+
+(final_presence_set "2_0mm.b" "2_0m.mb")
+(final_presence_set "2_0mmb." "2_0mm.b")
+(final_presence_set "2_1mm.b" "2_1m.mb")
+(final_presence_set "2_1mmb." "2_1mm.b")
+
+(final_presence_set "2_0mf.b" "2_0m.fb")
+(final_presence_set "2_0mfb." "2_0mf.b")
+(final_presence_set "2_1mf.b" "2_1m.fb")
+(final_presence_set "2_1mfb." "2_1mf.b")
+
+(final_presence_set "2_0mlx." "2_0m.lx")
+(final_presence_set "2_1mlx." "2_1m.lx")
+
+;;   The following reflects the dual issue bundle types table.
+;;   We could place all possible combinations here because impossible
+;; combinations would go away by the subsequent constrains.
+(final_presence_set
+   "2_1m.lx"
+   "2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.")
+(final_presence_set "2_1b.bb" "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mlx.")
+(final_presence_set
+   "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1m.bb,2_1m.ib,2_1m.mb,2_1m.fb"
+   "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.")
+
+;;  Ports/units (nb means nop.b insn issued into given port):
+(define_cpu_unit
+   "2_um0, 2_um1, 2_um2, 2_um3, 2_ui0, 2_ui1, 2_uf0, 2_uf1,\
+    2_ub0, 2_ub1, 2_ub2, 2_unb0, 2_unb1, 2_unb2" "two")
+
+(exclusion_set "2_ub0" "2_unb0")
+(exclusion_set "2_ub1" "2_unb1")
+(exclusion_set "2_ub2" "2_unb2")
+
+;; The following rules are used to decrease number of alternatives.
+;; They are consequences of Itanium2 microarchitecture.  They also
+;; describe the following rules mentioned in Itanium2
+;; microarchitecture: rules mentioned in Itanium2 microarchitecture:
+;; o "BBB/MBB: Always splits issue after either of these bundles".
+;; o "MIB BBB: Split issue after the first bundle in this pair".
+(exclusion_set
+   "2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb."
+   "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1b.bb,2_1m.bb,\
+    2_1m.ib,2_1m.mb,2_1m.fb,2_1m.lx")
+(exclusion_set "2_0m.ib,2_0mi.b,2_0mib." "2_1b.bb")
+
+;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the
+;;; B-slot contains a nop.b or a brp instruction".
+;;;   "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or
+;;; nop.b, otherwise it disperses to B2".
+(final_absence_set
+   "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\
+    2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx"
+   "2_0mib. 2_ub2, 2_0mfb. 2_ub2, 2_0mmb. 2_ub2")
+
+;; This is necessary to start new processor cycle when we meet stop bit.
+(define_cpu_unit "2_stop" "two")
+(final_absence_set
+   "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\
+    2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\
+    2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\
+    2_0m.lx,2_0mlx., \
+    2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\
+    2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\
+    2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\
+    2_1m.lx,2_1mlx."
+   "2_stop")
+
+;;   The issue logic can reorder M slot insns between different subtypes
+;; but cannot reorder insn within the same subtypes.  The following
+;; constraint is enough to describe this.
+(final_presence_set "2_um1" "2_um0")
+(final_presence_set "2_um3" "2_um2")
+
+;;   The insn in the 1st I slot of the two bundle issue group will issue
+;; to I0.  The second I slot insn will issue to I1.
+(final_presence_set "2_ui1" "2_ui0")
+
+;;  For exceptions of I insns:
+(define_cpu_unit "2_only_ui0" "two")
+(final_absence_set "2_only_ui0"  "2_ui1")
+
+;; Insns
+
+(define_reservation "2_M0"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +(2_um0|2_um1|2_um2|2_um3)")
+
+(define_reservation "2_M1"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +(2_um0|2_um1|2_um2|2_um3)")
+
+(define_reservation "2_M" "2_M0|2_M1")
+
+(define_reservation "2_M0_only_um0"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +2_um0")
+
+(define_reservation "2_M1_only_um0"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +2_um0")
+
+(define_reservation "2_M_only_um0" "2_M0_only_um0|2_M1_only_um0")
+
+(define_reservation "2_M0_only_um2"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +2_um2")
+
+(define_reservation "2_M1_only_um2"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +2_um2")
+
+(define_reservation "2_M_only_um2" "2_M0_only_um2|2_M1_only_um2")
+
+(define_reservation "2_M0_only_um23"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +(2_um2|2_um3)")
+
+(define_reservation "2_M1_only_um23"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +(2_um2|2_um3)")
+
+(define_reservation "2_M_only_um23" "2_M0_only_um23|2_M1_only_um23")
+
+(define_reservation "2_M0_only_um01"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +(2_um0|2_um1)")
+
+(define_reservation "2_M1_only_um01"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +(2_um0|2_um1)")
+
+(define_reservation "2_M_only_um01" "2_M0_only_um01|2_M1_only_um01")
+
+;; I instruction is dispersed to the lowest numbered I unit
+;; not already in use.  Remember about possible splitting.
+(define_reservation "2_I0"
+  "2_0mi.i+2_ui0|2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0\
+   |2_0mfi.+2_ui0|2_0mi.b+2_ui0|(2_1mi.i|2_1mi.b)+(2_ui0|2_ui1)\
+   |(2_1mii.|2_1mmi.|2_1mfi.)+(2_ui0|2_ui1)")
+
+(define_reservation "2_I1"
+  "2_0m.ii+(2_um0|2_um1|2_um2|2_um3)+2_0mi.i+2_ui0\
+   |2_0mm.i+(2_um0|2_um1|2_um2|2_um3)+2_0mmi.+2_ui0\
+   |2_0mf.i+2_uf0+2_0mfi.+2_ui0\
+   |2_0m.ib+(2_um0|2_um1|2_um2|2_um3)+2_0mi.b+2_ui0\
+   |(2_1m.ii+2_1mi.i|2_1m.ib+2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)+(2_ui0|2_ui1)\
+   |2_1mm.i+(2_um0|2_um1|2_um2|2_um3)+2_1mmi.+(2_ui0|2_ui1)\
+   |2_1mf.i+2_uf1+2_1mfi.+(2_ui0|2_ui1)")
+
+(define_reservation "2_I" "2_I0|2_I1")
+
+;; "An F slot in the 1st bundle disperses to F0".
+;; "An F slot in the 2st bundle disperses to F1".
+(define_reservation "2_F0"
+   "2_0mf.i+2_uf0|2_0mmf.+2_uf0|2_0mf.b+2_uf0\
+    |2_1mf.i+2_uf1|2_1mmf.+2_uf1|2_1mf.b+2_uf1")
+
+(define_reservation "2_F1"
+   "(2_0m.fi+2_0mf.i|2_0mm.f+2_0mmf.|2_0m.fb+2_0mf.b)\
+    +(2_um0|2_um1|2_um2|2_um3)+2_uf0\
+    |(2_1m.fi+2_1mf.i|2_1mm.f+2_1mmf.|2_1m.fb+2_1mf.b)\
+     +(2_um0|2_um1|2_um2|2_um3)+2_uf1")
+
+(define_reservation "2_F2"
+   "(2_0m.mf+2_0mm.f+2_0mmf.+2_uf0|2_1m.mf+2_1mm.f+2_1mmf.+2_uf1)\
+    +(2_um0|2_um1|2_um2|2_um3)+(2_um0|2_um1|2_um2|2_um3)\
+    |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0\
+      |2_0mmf.+(2_um0|2_um1|2_um2|2_um3)\
+      |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0)\
+     +(2_1m.fi+2_1mf.i|2_1m.fb+2_1mf.b)+(2_um0|2_um1|2_um2|2_um3)+2_uf1")
+
+(define_reservation "2_F" "2_F0|2_F1|2_F2")
+
+;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B
+;;; unit. That is, a B slot in 1st position is dispersed to B0.  In the
+;;; 2nd position it is dispersed to B2".
+(define_reservation "2_NB"
+    "2_0b.bb+2_unb0|2_0bb.b+2_unb1|2_0bbb.+2_unb2\
+     |2_0mb.b+2_unb1|2_0mbb.+2_unb2|2_0mib.+2_unb0\
+     |2_0mmb.+2_unb0|2_0mfb.+2_unb0\
+     |2_1b.bb+2_unb0|2_1bb.b+2_unb1
+     |2_1bbb.+2_unb2|2_1mb.b+2_unb1|2_1mbb.+2_unb2\
+     |2_1mib.+2_unb0|2_1mmb.+2_unb0|2_1mfb.+2_unb0")
+
+(define_reservation "2_B0"
+   "2_0b.bb+2_ub0|2_0bb.b+2_ub1|2_0bbb.+2_ub2\
+    |2_0mb.b+2_ub1|2_0mbb.+2_ub2|2_0mib.+2_ub2\
+    |2_0mfb.+2_ub2|2_1b.bb+2_ub0|2_1bb.b+2_ub1\
+    |2_1bbb.+2_ub2|2_1mb.b+2_ub1\
+    |2_1mib.+2_ub2|2_1mmb.+2_ub2|2_1mfb.+2_ub2")
+
+(define_reservation "2_B1"
+   "2_0m.bb+(2_um0|2_um1|2_um2|2_um3)+2_0mb.b+2_ub1\
+    |2_0mi.b+2_ui0+2_0mib.+2_ub2\
+    |2_0mm.b+(2_um0|2_um1|2_um2|2_um3)+2_0mmb.+2_ub2\
+    |2_0mf.b+2_uf0+2_0mfb.+2_ub2\
+    |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0)\
+     +2_1b.bb+2_ub0\
+    |2_1m.bb+(2_um0|2_um1|2_um2|2_um3)+2_1mb.b+2_ub1\
+    |2_1mi.b+(2_ui0|2_ui1)+2_1mib.+2_ub2\
+    |2_1mm.b+(2_um0|2_um1|2_um2|2_um3)+2_1mmb.+2_ub2\
+    |2_1mf.b+2_uf1+2_1mfb.+2_ub2")
+
+(define_reservation "2_B" "2_B0|2_B1")
+
+;; MLX bunlde uses ports equivalent to MFI bundles.
+
+;;   For the MLI template, the I slot insn is always assigned to port I0
+;; if it is in the first bundle or it is assigned to port I1 if it is in
+;; the second bundle.
+(define_reservation "2_L0" "2_0mlx.+2_ui0+2_uf0|2_1mlx.+2_ui1+2_uf1")
+
+(define_reservation "2_L1"
+   "2_0m.lx+(2_um0|2_um1|2_um2|2_um3)+2_0mlx.+2_ui0+2_uf0\
+   |2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1")
+
+(define_reservation "2_L2"
+   "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+     |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0)
+    +2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1")
+
+(define_reservation "2_L" "2_L0|2_L1|2_L2")
+
+;;   Should we describe that A insn in I slot can be issued into M
+;; ports?  I think it is not necessary because of multipass
+;; scheduling.  For example, the multipass scheduling could use
+;; MMI-MMI instead of MII-MII where the two last I slots contain A
+;; insns (even if the case is complicated by use-def conflicts).
+;;
+;; In any case we could describe it as
+;;    (define_cpu_unit "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres" "two")
+;;    (final_presence_set "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres"
+;;                        "2_ui1")
+;;    (define_reservation "b_A"
+;;       "b_M|b_I\
+;;        |(2_1mi.i|2_1mii.|2_1mmi.|2_1mfi.|2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)\
+;;         +(2_ui1_0pres|2_ui1_1pres|2_ui1_2pres|2_ui1_3pres)")
+
+(define_reservation "2_A" "2_M|2_I")
+
+;; We assume that there is no insn issued on the same cycle as the
+;; unknown insn.
+(define_cpu_unit "2_empty" "two")
+(exclusion_set "2_empty"
+    "2_0m.ii,2_0m.mi,2_0m.fi,2_0m.mf,2_0b.bb,2_0m.bb,2_0m.ib,2_0m.mb,2_0m.fb,\
+     2_0m.lx")
+
+(define_cpu_unit
+   "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs"
+   "two")
+(define_cpu_unit
+   "2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs"
+   "two")
+
+(define_cpu_unit "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont, 2_mb_cont,\
+	          2_b_cont, 2_bb_cont" "two")
+
+;; For stop in the middle of the bundles.
+(define_cpu_unit "2_m_stop, 2_m0_stop, 2_m1_stop, 2_0mmi_cont" "two")
+(define_cpu_unit "2_mi_stop, 2_mi0_stop, 2_mi1_stop, 2_0mii_cont" "two")
+
+(final_presence_set "2_0m_bs"
+   "2_0m.ii, 2_0m.mi, 2_0m.mf, 2_0m.fi, 2_0m.bb,\
+    2_0m.ib, 2_0m.fb, 2_0m.mb, 2_0m.lx")
+(final_presence_set "2_1m_bs"
+   "2_1m.ii, 2_1m.mi, 2_1m.mf, 2_1m.fi, 2_1m.bb,\
+    2_1m.ib, 2_1m.fb, 2_1m.mb, 2_1m.lx")
+(final_presence_set "2_0mi_bs"  "2_0mi.i, 2_0mi.i")
+(final_presence_set "2_1mi_bs"  "2_1mi.i, 2_1mi.i")
+(final_presence_set "2_0mm_bs"  "2_0mm.i, 2_0mm.f, 2_0mm.b")
+(final_presence_set "2_1mm_bs"  "2_1mm.i, 2_1mm.f, 2_1mm.b")
+(final_presence_set "2_0mf_bs"  "2_0mf.i, 2_0mf.b")
+(final_presence_set "2_1mf_bs"  "2_1mf.i, 2_1mf.b")
+(final_presence_set "2_0b_bs"  "2_0b.bb")
+(final_presence_set "2_1b_bs"  "2_1b.bb")
+(final_presence_set "2_0bb_bs"  "2_0bb.b")
+(final_presence_set "2_1bb_bs"  "2_1bb.b")
+(final_presence_set "2_0mb_bs"  "2_0mb.b")
+(final_presence_set "2_1mb_bs"  "2_1mb.b")
+
+(exclusion_set "2_0m_bs"
+   "2_0mi.i, 2_0mm.i, 2_0mm.f, 2_0mf.i, 2_0mb.b,\
+    2_0mi.b, 2_0mf.b, 2_0mm.b, 2_0mlx., 2_m0_stop")
+(exclusion_set "2_1m_bs"
+   "2_1mi.i, 2_1mm.i, 2_1mm.f, 2_1mf.i, 2_1mb.b,\
+    2_1mi.b, 2_1mf.b, 2_1mm.b, 2_1mlx., 2_m1_stop")
+(exclusion_set "2_0mi_bs"  "2_0mii., 2_0mib., 2_mi0_stop")
+(exclusion_set "2_1mi_bs"  "2_1mii., 2_1mib., 2_mi1_stop")
+(exclusion_set "2_0mm_bs"  "2_0mmi., 2_0mmf., 2_0mmb.")
+(exclusion_set "2_1mm_bs"  "2_1mmi., 2_1mmf., 2_1mmb.")
+(exclusion_set "2_0mf_bs"  "2_0mfi., 2_0mfb.")
+(exclusion_set "2_1mf_bs"  "2_1mfi., 2_1mfb.")
+(exclusion_set "2_0b_bs"  "2_0bb.b")
+(exclusion_set "2_1b_bs"  "2_1bb.b")
+(exclusion_set "2_0bb_bs"  "2_0bbb.")
+(exclusion_set "2_1bb_bs"  "2_1bbb.")
+(exclusion_set "2_0mb_bs"  "2_0mbb.")
+(exclusion_set "2_1mb_bs"  "2_1mbb.")
+
+(exclusion_set
+   "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs,
+    2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs"
+   "2_stop")
+
+(final_presence_set
+   "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\
+    2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx."
+   "2_m_cont")
+(final_presence_set "2_0mii., 2_0mib." "2_mi_cont")
+(final_presence_set "2_0mmi., 2_0mmf., 2_0mmb." "2_mm_cont")
+(final_presence_set "2_0mfi., 2_0mfb." "2_mf_cont")
+(final_presence_set "2_0bb.b" "2_b_cont")
+(final_presence_set "2_0bbb." "2_bb_cont")
+(final_presence_set "2_0mbb." "2_mb_cont")
+
+(exclusion_set
+   "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+    2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx"
+   "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont,\
+    2_mb_cont, 2_b_cont, 2_bb_cont")
+
+(exclusion_set "2_empty"
+               "2_m_cont,2_mi_cont,2_mm_cont,2_mf_cont,\
+                2_mb_cont,2_b_cont,2_bb_cont")
+
+;; For m;mi bundle
+(final_presence_set "2_m0_stop" "2_0m.mi")
+(final_presence_set "2_0mm.i" "2_0mmi_cont")
+(exclusion_set "2_0mmi_cont"
+   "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+    2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_m0_stop" "2_0mm.i")
+(final_presence_set "2_m1_stop" "2_1m.mi")
+(exclusion_set "2_m1_stop" "2_1mm.i")
+(final_presence_set "2_m_stop" "2_m0_stop, 2_m1_stop")
+
+;; For mi;i bundle
+(final_presence_set "2_mi0_stop" "2_0mi.i")
+(final_presence_set "2_0mii." "2_0mii_cont")
+(exclusion_set "2_0mii_cont"
+   "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+    2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_mi0_stop" "2_0mii.")
+(final_presence_set "2_mi1_stop" "2_1mi.i")
+(exclusion_set "2_mi1_stop" "2_1mii.")
+(final_presence_set "2_mi_stop" "2_mi0_stop, 2_mi1_stop")
+
+(final_absence_set
+   "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\
+    2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\
+    2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\
+    2_0m.lx,2_0mlx., \
+    2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\
+    2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\
+    2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\
+    2_1m.lx,2_1mlx."
+   "2_m0_stop,2_m1_stop,2_mi0_stop,2_mi1_stop")
+
+(define_insn_reservation "2_stop_bit" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stop_bit"))
+       (not (match_test "bundling_p")))
+  "2_stop|2_m0_stop|2_m1_stop|2_mi0_stop|2_mi1_stop")
+
+(define_insn_reservation "2_br"      0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "br"))
+       (not (match_test "bundling_p"))) "2_B")
+(define_insn_reservation "2_scall"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "scall"))
+       (not (match_test "bundling_p"))) "2_B")
+(define_insn_reservation "2_fcmp"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcmp"))
+       (not (match_test "bundling_p"))) "2_F")
+(define_insn_reservation "2_fcvtfx"  4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcvtfx"))
+       (not (match_test "bundling_p"))) "2_F")
+(define_insn_reservation "2_fld"     6
+  (and (and (and (and (eq_attr "cpu" "itanium2")
+                      (eq_attr "itanium_class" "fld"))
+                 (eq_attr "data_speculative" "no"))
+            (eq_attr "check_load" "no"))
+       (not (match_test "bundling_p")))
+  "2_M")
+(define_insn_reservation "2_flda"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "data_speculative" "yes"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+(define_insn_reservation "2_fldc"    0
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "check_load" "yes"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_fldp"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "no"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+(define_insn_reservation "2_fldpc"   0
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "yes"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_fmac"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmac"))
+       (not (match_test "bundling_p"))) "2_F")
+(define_insn_reservation "2_fmisc"   4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmisc"))
+       (not (match_test "bundling_p"))) "2_F")
+
+;; There is only one insn `mov = ar.bsp' for frar_i:
+;; Latency time ???
+(define_insn_reservation "2_frar_i" 13
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_i"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+;; There is only two insns `mov = ar.unat' or `mov = ar.ccv' for frar_m:
+;; Latency time ???
+(define_insn_reservation "2_frar_m"  6
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_m"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um2")
+(define_insn_reservation "2_frbr"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frbr"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+(define_insn_reservation "2_frfr"    5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frfr"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um2")
+(define_insn_reservation "2_frpr"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frpr"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+
+(define_insn_reservation "2_ialu"      1
+    (and (and (eq_attr "cpu" "itanium2")
+              (eq_attr "itanium_class" "ialu"))
+         (not (match_test "bundling_p")))
+    "2_A")
+(define_insn_reservation "2_icmp"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "icmp"))
+       (not (match_test "bundling_p"))) "2_A")
+(define_insn_reservation "2_ilog"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ilog"))
+       (not (match_test "bundling_p"))) "2_A")
+(define_insn_reservation "2_mmalua"  2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmalua"))
+       (not (match_test "bundling_p"))) "2_A")
+;; Latency time ???
+(define_insn_reservation "2_ishf"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ishf"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+
+(define_insn_reservation "2_ld"      1
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "ld"))
+            (eq_attr "check_load" "no"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+(define_insn_reservation "2_ldc"     0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "check_load" "yes"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_long_i"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "long_i"))
+       (not (match_test "bundling_p"))) "2_L")
+
+(define_insn_reservation "2_mmmul"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmmul"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2_mmshf"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshf"))
+       (not (match_test "bundling_p"))) "2_I")
+;; Latency time ???
+(define_insn_reservation "2_mmshfi"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshfi"))
+       (not (match_test "bundling_p"))) "2_I")
+
+;; Now we have only one insn (flushrs) of such class.  We assume that flushrs
+;; is the 1st syllable of the bundle after stop bit.
+(define_insn_reservation "2_rse_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "rse_m"))
+       (not (match_test "bundling_p")))
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb\
+    |2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx)+2_um0")
+(define_insn_reservation "2_sem"     0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "sem"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um23")
+
+(define_insn_reservation "2_stf"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stf"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um23")
+(define_insn_reservation "2_st"      1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "st"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um23")
+(define_insn_reservation "2_syst_m0" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m0"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um2")
+(define_insn_reservation "2_syst_m"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um0")
+;; Reservation???
+(define_insn_reservation "2_tbit"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tbit"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+
+;; There is only ony insn `mov ar.pfs =' for toar_i:
+(define_insn_reservation "2_toar_i"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_i"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m:
+;; Latency time ???
+(define_insn_reservation "2_toar_m"  5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_m"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um2")
+;; Latency time ???
+(define_insn_reservation "2_tobr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tobr"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+(define_insn_reservation "2_tofr"    5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tofr"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um23")
+;; Latency time ???
+(define_insn_reservation "2_topr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "topr"))
+       (not (match_test "bundling_p")))
+  "2_I+2_only_ui0")
+
+(define_insn_reservation "2_xmpy"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xmpy"))
+       (not (match_test "bundling_p"))) "2_F")
+;; Latency time ???
+(define_insn_reservation "2_xtd"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xtd"))
+       (not (match_test "bundling_p"))) "2_I")
+
+(define_insn_reservation "2_chk_s_i" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_i"))
+       (not (match_test "bundling_p")))
+  "2_I|2_M_only_um23")
+(define_insn_reservation "2_chk_s_f" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_f"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um23")
+(define_insn_reservation "2_chk_a"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_a"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_lfetch"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "lfetch"))
+       (not (match_test "bundling_p")))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_nop_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_m"))
+       (not (match_test "bundling_p"))) "2_M0")
+(define_insn_reservation "2_nop_b"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_b"))
+       (not (match_test "bundling_p"))) "2_NB")
+(define_insn_reservation "2_nop_i"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_i"))
+       (not (match_test "bundling_p"))) "2_I0")
+(define_insn_reservation "2_nop_f"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_f"))
+       (not (match_test "bundling_p"))) "2_F0")
+(define_insn_reservation "2_nop_x"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_x"))
+       (not (match_test "bundling_p"))) "2_L0")
+
+(define_insn_reservation "2_unknown" 1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "unknown"))
+       (not (match_test "bundling_p"))) "2_empty")
+
+(define_insn_reservation "2_nop" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop"))
+       (not (match_test "bundling_p")))
+  "2_M0|2_NB|2_I0|2_F0")
+
+(define_insn_reservation "2_ignore" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ignore"))
+       (not (match_test "bundling_p"))) "nothing")
+
+(define_cpu_unit "2_m_cont_only, 2_b_cont_only" "two")
+(define_cpu_unit "2_mi_cont_only, 2_mm_cont_only, 2_mf_cont_only" "two")
+(define_cpu_unit "2_mb_cont_only, 2_bb_cont_only" "two")
+
+(final_presence_set "2_m_cont_only" "2_m_cont")
+(exclusion_set "2_m_cont_only"
+  "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\
+   2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+
+(final_presence_set "2_b_cont_only" "2_b_cont")
+(exclusion_set "2_b_cont_only"  "2_0bb.b")
+
+(final_presence_set "2_mi_cont_only" "2_mi_cont")
+(exclusion_set "2_mi_cont_only" "2_0mii., 2_0mib.")
+
+(final_presence_set "2_mm_cont_only" "2_mm_cont")
+(exclusion_set "2_mm_cont_only" "2_0mmi., 2_0mmf., 2_0mmb.")
+
+(final_presence_set "2_mf_cont_only" "2_mf_cont")
+(exclusion_set "2_mf_cont_only" "2_0mfi., 2_0mfb.")
+
+(final_presence_set "2_mb_cont_only" "2_mb_cont")
+(exclusion_set "2_mb_cont_only" "2_0mbb.")
+
+(final_presence_set "2_bb_cont_only" "2_bb_cont")
+(exclusion_set "2_bb_cont_only" "2_0bbb.")
+
+(define_insn_reservation "2_pre_cycle" 0
+   (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "pre_cycle"))
+        (not (match_test "bundling_p")))
+                         "nothing")
+
+;;(define_insn_reservation "2_pre_cycle" 0
+;;   (and (and (eq_attr "cpu" "itanium2")
+;;             (eq_attr "itanium_class" "pre_cycle"))
+;;        (not (match_test "bundling_p")))
+;;                         "(2_0m_bs, 2_m_cont)                     \
+;;                          | (2_0mi_bs, (2_mi_cont|nothing))       \
+;;                          | (2_0mm_bs, 2_mm_cont)                 \
+;;                          | (2_0mf_bs, (2_mf_cont|nothing))       \
+;;                          | (2_0b_bs, (2_b_cont|nothing))         \
+;;                          | (2_0bb_bs, (2_bb_cont|nothing))       \
+;;                          | (2_0mb_bs, (2_mb_cont|nothing))       \
+;;                          | (2_1m_bs, 2_m_cont)                   \
+;;                          | (2_1mi_bs, (2_mi_cont|nothing))       \
+;;                          | (2_1mm_bs, 2_mm_cont)                 \
+;;                          | (2_1mf_bs, (2_mf_cont|nothing))       \
+;;                          | (2_1b_bs, (2_b_cont|nothing))         \
+;;                          | (2_1bb_bs, (2_bb_cont|nothing))       \
+;;                          | (2_1mb_bs, (2_mb_cont|nothing))       \
+;;                          | (2_m_cont_only, (2_m_cont|nothing))   \
+;;                          | (2_b_cont_only,  (2_b_cont|nothing))  \
+;;                          | (2_mi_cont_only, (2_mi_cont|nothing)) \
+;;                          | (2_mm_cont_only, (2_mm_cont|nothing)) \
+;;                          | (2_mf_cont_only, (2_mf_cont|nothing)) \
+;;                          | (2_mb_cont_only, (2_mb_cont|nothing)) \
+;;                          | (2_bb_cont_only, (2_bb_cont|nothing)) \
+;;                          | (2_m_stop, (2_0mmi_cont|nothing))     \
+;;                          | (2_mi_stop, (2_0mii_cont|nothing))")
+
+;; Bypasses:
+
+(define_bypass  1 "2_fcmp" "2_br,2_scall")
+(define_bypass  0 "2_icmp" "2_br,2_scall")
+(define_bypass  0 "2_tbit" "2_br,2_scall")
+(define_bypass  2 "2_ld" "2_ld"  "ia64_ld_address_bypass_p")
+(define_bypass  2 "2_ld" "2_st"  "ia64_st_address_bypass_p")
+(define_bypass  2 "2_ld,2_ldc" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass  3 "2_ilog" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass  3 "2_ialu" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass  3 "2_mmalua,2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld,2_ldc")
+(define_bypass  6 "2_tofr"  "2_frfr,2_stf")
+
+;; We don't use here fcmp because scall may be predicated.
+(define_bypass  0 "2_fcvtfx,2_fld,2_flda,2_fldc,2_fmac,2_fmisc,2_frar_i,2_frar_m,\
+                   2_frbr,2_frfr,2_frpr,2_ialu,2_ilog,2_ishf,2_ld,2_ldc,2_long_i,\
+                   2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tobr,2_tofr,\
+		   2_xmpy,2_xtd"
+                  "2_br,2_scall")
+
+(define_bypass  0 "2_unknown,2_ignore,2_stop_bit,2_br,2_fcmp,2_fcvtfx,2_fld,2_flda,2_fldc,\
+                   2_fmac,2_fmisc,2_frar_i,2_frar_m,2_frbr,2_frfr,2_frpr,\
+                   2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_ldc,2_chk_s_i,2_chk_s_f,2_chk_a,2_long_i,\
+		   2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\
+                   2_nop_i,2_nop_m,2_nop_x,2_rse_m,2_scall,2_sem,2_stf,2_st,\
+                   2_syst_m0,2_syst_m,2_tbit,2_toar_i,2_toar_m,2_tobr,2_tofr,\
+                   2_topr,2_xmpy,2_xtd,2_lfetch" "2_ignore")
+
+
+
+;; Bundling
+
+(define_automaton "twob")
+
+;; Pseudo units for quicker searching for position in two packet window.  */
+(define_query_cpu_unit "2_1,2_2,2_3,2_4,2_5,2_6" "twob")
+
+;;   All possible combinations of bundles/syllables
+(define_cpu_unit
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx" "twob")
+(define_cpu_unit
+   "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b" "twob")
+(define_query_cpu_unit
+   "2b_0mii., 2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\
+    2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx." "twob")
+
+(define_cpu_unit
+   "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx" "twob")
+(define_cpu_unit
+   "2b_1mi.i, 2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mm.b, 2b_1mf.b" "twob")
+(define_query_cpu_unit
+   "2b_1mii., 2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\
+    2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx." "twob")
+
+;; Slot 1
+(exclusion_set "2b_0m.ii"
+   "2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.mi"
+   "2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib,\
+    2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.fi"
+   "2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.mf"
+   "2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0b.bb" "2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.bb" "2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.ib" "2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.mb" "2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.fb" "2b_0m.lx")
+
+;; Slot 2
+(exclusion_set "2b_0mi.i"
+   "2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mm.i"
+   "2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mf.i"
+   "2b_0mm.f, 2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mm.f"
+   "2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0bb.b" "2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mb.b" "2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mi.b" "2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mm.b" "2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mf.b" "2b_0mlx.")
+
+;; Slot 3
+(exclusion_set "2b_0mii."
+   "2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\
+    2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mmi."
+   "2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\
+    2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mfi."
+   "2b_0mmf., 2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mmf."
+   "2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0bbb." "2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mbb." "2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mib." "2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mmb." "2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mfb." "2b_0mlx.")
+
+;; Slot 4
+(exclusion_set "2b_1m.ii"
+   "2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.mi"
+   "2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib,\
+    2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.fi"
+   "2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.mf"
+   "2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1b.bb" "2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.bb" "2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.ib" "2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.mb" "2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.fb" "2b_1m.lx")
+
+;; Slot 5
+(exclusion_set "2b_1mi.i"
+   "2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mm.i"
+   "2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mf.i"
+   "2b_1mm.f, 2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mm.f"
+   "2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1bb.b" "2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mb.b" "2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mi.b" "2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mm.b" "2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mf.b" "2b_1mlx.")
+
+;; Slot 6
+(exclusion_set "2b_1mii."
+   "2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\
+    2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mmi."
+   "2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\
+    2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mfi."
+   "2b_1mmf., 2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mmf."
+   "2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1bbb." "2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mbb." "2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mib." "2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mmb." "2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mfb." "2b_1mlx.")
+
+(final_presence_set "2b_0mi.i" "2b_0m.ii")
+(final_presence_set "2b_0mii." "2b_0mi.i")
+(final_presence_set "2b_1mi.i" "2b_1m.ii")
+(final_presence_set "2b_1mii." "2b_1mi.i")
+
+(final_presence_set "2b_0mm.i" "2b_0m.mi")
+(final_presence_set "2b_0mmi." "2b_0mm.i")
+(final_presence_set "2b_1mm.i" "2b_1m.mi")
+(final_presence_set "2b_1mmi." "2b_1mm.i")
+
+(final_presence_set "2b_0mf.i" "2b_0m.fi")
+(final_presence_set "2b_0mfi." "2b_0mf.i")
+(final_presence_set "2b_1mf.i" "2b_1m.fi")
+(final_presence_set "2b_1mfi." "2b_1mf.i")
+
+(final_presence_set "2b_0mm.f" "2b_0m.mf")
+(final_presence_set "2b_0mmf." "2b_0mm.f")
+(final_presence_set "2b_1mm.f" "2b_1m.mf")
+(final_presence_set "2b_1mmf." "2b_1mm.f")
+
+(final_presence_set "2b_0bb.b" "2b_0b.bb")
+(final_presence_set "2b_0bbb." "2b_0bb.b")
+(final_presence_set "2b_1bb.b" "2b_1b.bb")
+(final_presence_set "2b_1bbb." "2b_1bb.b")
+
+(final_presence_set "2b_0mb.b" "2b_0m.bb")
+(final_presence_set "2b_0mbb." "2b_0mb.b")
+(final_presence_set "2b_1mb.b" "2b_1m.bb")
+(final_presence_set "2b_1mbb." "2b_1mb.b")
+
+(final_presence_set "2b_0mi.b" "2b_0m.ib")
+(final_presence_set "2b_0mib." "2b_0mi.b")
+(final_presence_set "2b_1mi.b" "2b_1m.ib")
+(final_presence_set "2b_1mib." "2b_1mi.b")
+
+(final_presence_set "2b_0mm.b" "2b_0m.mb")
+(final_presence_set "2b_0mmb." "2b_0mm.b")
+(final_presence_set "2b_1mm.b" "2b_1m.mb")
+(final_presence_set "2b_1mmb." "2b_1mm.b")
+
+(final_presence_set "2b_0mf.b" "2b_0m.fb")
+(final_presence_set "2b_0mfb." "2b_0mf.b")
+(final_presence_set "2b_1mf.b" "2b_1m.fb")
+(final_presence_set "2b_1mfb." "2b_1mf.b")
+
+(final_presence_set "2b_0mlx." "2b_0m.lx")
+(final_presence_set "2b_1mlx." "2b_1m.lx")
+
+;;  See the corresponding comment in non-bundling section above.
+(final_presence_set
+   "2b_1m.lx"
+   "2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.")
+(final_presence_set "2b_1b.bb" "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mlx.")
+(final_presence_set
+   "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1m.bb,2b_1m.ib,2b_1m.mb,2b_1m.fb"
+   "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.")
+
+;;  Ports/units (nb means nop.b insn issued into given port):
+(define_cpu_unit
+   "2b_um0, 2b_um1, 2b_um2, 2b_um3, 2b_ui0, 2b_ui1, 2b_uf0, 2b_uf1,\
+    2b_ub0, 2b_ub1, 2b_ub2, 2b_unb0, 2b_unb1, 2b_unb2" "twob")
+
+(exclusion_set "2b_ub0" "2b_unb0")
+(exclusion_set "2b_ub1" "2b_unb1")
+(exclusion_set "2b_ub2" "2b_unb2")
+
+;; The following rules are used to decrease number of alternatives.
+;; They are consequences of Itanium2 microarchitecture.  They also
+;; describe the following rules mentioned in Itanium2
+;; microarchitecture: rules mentioned in Itanium2 microarchitecture:
+;; o "BBB/MBB: Always splits issue after either of these bundles".
+;; o "MIB BBB: Split issue after the first bundle in this pair".
+(exclusion_set
+   "2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb."
+   "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1b.bb,2b_1m.bb,\
+    2b_1m.ib,2b_1m.mb,2b_1m.fb,2b_1m.lx")
+(exclusion_set "2b_0m.ib,2b_0mi.b,2b_0mib." "2b_1b.bb")
+
+;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the
+;;; B-slot contains a nop.b or a brp instruction".
+;;;   "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or
+;;; nop.b, otherwise it disperses to B2".
+(final_absence_set
+   "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx"
+   "2b_0mib. 2b_ub2, 2b_0mfb. 2b_ub2, 2b_0mmb. 2b_ub2")
+
+;; This is necessary to start new processor cycle when we meet stop bit.
+(define_cpu_unit "2b_stop" "twob")
+(final_absence_set
+   "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\
+    2b_0m.fi,2b_0mf.i,2b_0mfi.,\
+    2b_0m.mf,2b_0mm.f,2b_0mmf.,2b_0b.bb,2b_0bb.b,2b_0bbb.,\
+    2b_0m.bb,2b_0mb.b,2b_0mbb.,\
+    2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\
+    2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \
+    2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\
+    2b_1m.fi,2b_1mf.i,2b_1mfi.,\
+    2b_1m.mf,2b_1mm.f,2b_1mmf.,2b_1b.bb,2b_1bb.b,2b_1bbb.,\
+    2b_1m.bb,2b_1mb.b,2b_1mbb.,\
+    2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\
+    2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx."
+   "2b_stop")
+
+;;   The issue logic can reorder M slot insns between different subtypes
+;; but cannot reorder insn within the same subtypes.  The following
+;; constraint is enough to describe this.
+(final_presence_set "2b_um1" "2b_um0")
+(final_presence_set "2b_um3" "2b_um2")
+
+;;   The insn in the 1st I slot of the two bundle issue group will issue
+;; to I0.  The second I slot insn will issue to I1.
+(final_presence_set "2b_ui1" "2b_ui0")
+
+;;  For exceptions of I insns:
+(define_cpu_unit "2b_only_ui0" "twob")
+(final_absence_set "2b_only_ui0"  "2b_ui1")
+
+;; Insns
+
+(define_reservation "2b_M"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +(2b_um0|2b_um1|2b_um2|2b_um3)")
+
+(define_reservation "2b_M_only_um0"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +2b_um0")
+
+(define_reservation "2b_M_only_um2"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +2b_um2")
+
+(define_reservation "2b_M_only_um01"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +(2b_um0|2b_um1)")
+
+(define_reservation "2b_M_only_um23"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +(2b_um2|2b_um3)")
+
+;; I instruction is dispersed to the lowest numbered I unit
+;; not already in use.  Remember about possible splitting.
+(define_reservation "2b_I"
+  "2b_0mi.i+2_2+2b_ui0|2b_0mii.+2_3+(2b_ui0|2b_ui1)|2b_0mmi.+2_3+2b_ui0\
+   |2b_0mfi.+2_3+2b_ui0|2b_0mi.b+2_2+2b_ui0\
+   |(2b_1mi.i+2_5|2b_1mi.b+2_5)+(2b_ui0|2b_ui1)\
+   |(2b_1mii.|2b_1mmi.|2b_1mfi.)+2_6+(2b_ui0|2b_ui1)")
+
+;; "An F slot in the 1st bundle disperses to F0".
+;; "An F slot in the 2st bundle disperses to F1".
+(define_reservation "2b_F"
+   "2b_0mf.i+2_2+2b_uf0|2b_0mmf.+2_3+2b_uf0|2b_0mf.b+2_2+2b_uf0\
+    |2b_1mf.i+2_5+2b_uf1|2b_1mmf.+2_6+2b_uf1|2b_1mf.b+2_5+2b_uf1")
+
+;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B
+;;; unit. That is, a B slot in 1st position is dispersed to B0.  In the
+;;; 2nd position it is dispersed to B2".
+(define_reservation "2b_NB"
+    "2b_0b.bb+2_1+2b_unb0|2b_0bb.b+2_2+2b_unb1|2b_0bbb.+2_3+2b_unb2\
+     |2b_0mb.b+2_2+2b_unb1|2b_0mbb.+2_3+2b_unb2\
+     |2b_0mib.+2_3+2b_unb0|2b_0mmb.+2_3+2b_unb0|2b_0mfb.+2_3+2b_unb0\
+     |2b_1b.bb+2_4+2b_unb0|2b_1bb.b+2_5+2b_unb1\
+     |2b_1bbb.+2_6+2b_unb2|2b_1mb.b+2_5+2b_unb1|2b_1mbb.+2_6+2b_unb2\
+     |2b_1mib.+2_6+2b_unb0|2b_1mmb.+2_6+2b_unb0|2b_1mfb.+2_6+2b_unb0")
+
+(define_reservation "2b_B"
+   "2b_0b.bb+2_1+2b_ub0|2b_0bb.b+2_2+2b_ub1|2b_0bbb.+2_3+2b_ub2\
+    |2b_0mb.b+2_2+2b_ub1|2b_0mbb.+2_3+2b_ub2|2b_0mib.+2_3+2b_ub2\
+    |2b_0mfb.+2_3+2b_ub2|2b_1b.bb+2_4+2b_ub0|2b_1bb.b+2_5+2b_ub1\
+    |2b_1bbb.+2_6+2b_ub2|2b_1mb.b+2_5+2b_ub1\
+    |2b_1mib.+2_6+2b_ub2|2b_1mmb.+2_6+2b_ub2|2b_1mfb.+2_6+2b_ub2")
+
+;;   For the MLI template, the I slot insn is always assigned to port I0
+;; if it is in the first bundle or it is assigned to port I1 if it is in
+;; the second bundle.
+(define_reservation "2b_L"
+                    "2b_0mlx.+2_3+2b_ui0+2b_uf0|2b_1mlx.+2_6+2b_ui1+2b_uf1")
+
+;;   Should we describe that A insn in I slot can be issued into M
+;; ports?  I think it is not necessary because of multipass
+;; scheduling.  For example, the multipass scheduling could use
+;; MMI-MMI instead of MII-MII where the two last I slots contain A
+;; insns (even if the case is complicated by use-def conflicts).
+;;
+;; In any case we could describe it as
+;;    (define_cpu_unit "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres"
+;;                     "twob")
+;;    (final_presence_set "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres"
+;;                        "2b_ui1")
+;;    (define_reservation "b_A"
+;;       "b_M|b_I\
+;;        |(2b_1mi.i+2_5|2b_1mii.+2_6|2b_1mmi.+2_6|2b_1mfi.+2_6|2b_1mi.b+2_5)\
+;;         +(2b_um0|2b_um1|2b_um2|2b_um3)\
+;;         +(2b_ui1_0pres|2b_ui1_1pres|2b_ui1_2pres|2b_ui1_3pres)")
+
+(define_reservation "2b_A" "2b_M|2b_I")
+
+;; We assume that there is no insn issued on the same cycle as the
+;; unknown insn.
+(define_cpu_unit "2b_empty" "twob")
+(exclusion_set "2b_empty"
+    "2b_0m.ii,2b_0m.mi,2b_0m.fi,2b_0m.mf,2b_0b.bb,2b_0m.bb,\
+     2b_0m.ib,2b_0m.mb,2b_0m.fb,2b_0m.lx,2b_0mm.i")
+
+(define_cpu_unit
+   "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs"
+   "twob")
+(define_cpu_unit
+   "2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs"
+   "twob")
+
+(define_cpu_unit "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont, 2b_mb_cont,\
+	          2b_b_cont, 2b_bb_cont" "twob")
+
+;; For stop in the middle of the bundles.
+(define_cpu_unit "2b_m_stop, 2b_m0_stop, 2b_m1_stop, 2b_0mmi_cont" "twob")
+(define_cpu_unit "2b_mi_stop, 2b_mi0_stop, 2b_mi1_stop, 2b_0mii_cont" "twob")
+
+(final_presence_set "2b_0m_bs"
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.mf, 2b_0m.fi, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.fb, 2b_0m.mb, 2b_0m.lx")
+(final_presence_set "2b_1m_bs"
+   "2b_1m.ii, 2b_1m.mi, 2b_1m.mf, 2b_1m.fi, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.fb, 2b_1m.mb, 2b_1m.lx")
+(final_presence_set "2b_0mi_bs"  "2b_0mi.i, 2b_0mi.i")
+(final_presence_set "2b_1mi_bs"  "2b_1mi.i, 2b_1mi.i")
+(final_presence_set "2b_0mm_bs"  "2b_0mm.i, 2b_0mm.f, 2b_0mm.b")
+(final_presence_set "2b_1mm_bs"  "2b_1mm.i, 2b_1mm.f, 2b_1mm.b")
+(final_presence_set "2b_0mf_bs"  "2b_0mf.i, 2b_0mf.b")
+(final_presence_set "2b_1mf_bs"  "2b_1mf.i, 2b_1mf.b")
+(final_presence_set "2b_0b_bs"  "2b_0b.bb")
+(final_presence_set "2b_1b_bs"  "2b_1b.bb")
+(final_presence_set "2b_0bb_bs"  "2b_0bb.b")
+(final_presence_set "2b_1bb_bs"  "2b_1bb.b")
+(final_presence_set "2b_0mb_bs"  "2b_0mb.b")
+(final_presence_set "2b_1mb_bs"  "2b_1mb.b")
+
+(exclusion_set "2b_0m_bs"
+   "2b_0mi.i, 2b_0mm.i, 2b_0mm.f, 2b_0mf.i, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mf.b, 2b_0mm.b, 2b_0mlx., 2b_m0_stop")
+(exclusion_set "2b_1m_bs"
+   "2b_1mi.i, 2b_1mm.i, 2b_1mm.f, 2b_1mf.i, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mf.b, 2b_1mm.b, 2b_1mlx., 2b_m1_stop")
+(exclusion_set "2b_0mi_bs"  "2b_0mii., 2b_0mib., 2b_mi0_stop")
+(exclusion_set "2b_1mi_bs"  "2b_1mii., 2b_1mib., 2b_mi1_stop")
+(exclusion_set "2b_0mm_bs"  "2b_0mmi., 2b_0mmf., 2b_0mmb.")
+(exclusion_set "2b_1mm_bs"  "2b_1mmi., 2b_1mmf., 2b_1mmb.")
+(exclusion_set "2b_0mf_bs"  "2b_0mfi., 2b_0mfb.")
+(exclusion_set "2b_1mf_bs"  "2b_1mfi., 2b_1mfb.")
+(exclusion_set "2b_0b_bs"  "2b_0bb.b")
+(exclusion_set "2b_1b_bs"  "2b_1bb.b")
+(exclusion_set "2b_0bb_bs"  "2b_0bbb.")
+(exclusion_set "2b_1bb_bs"  "2b_1bbb.")
+(exclusion_set "2b_0mb_bs"  "2b_0mbb.")
+(exclusion_set "2b_1mb_bs"  "2b_1mbb.")
+
+(exclusion_set
+   "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs,
+    2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs"
+   "2b_stop")
+
+(final_presence_set
+   "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx."
+   "2b_m_cont")
+(final_presence_set "2b_0mii., 2b_0mib." "2b_mi_cont")
+(final_presence_set "2b_0mmi., 2b_0mmf., 2b_0mmb." "2b_mm_cont")
+(final_presence_set "2b_0mfi., 2b_0mfb." "2b_mf_cont")
+(final_presence_set "2b_0bb.b" "2b_b_cont")
+(final_presence_set "2b_0bbb." "2b_bb_cont")
+(final_presence_set "2b_0mbb." "2b_mb_cont")
+
+(exclusion_set
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx"
+   "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont,\
+    2b_mb_cont, 2b_b_cont, 2b_bb_cont")
+
+(exclusion_set "2b_empty"
+               "2b_m_cont,2b_mi_cont,2b_mm_cont,2b_mf_cont,\
+                2b_mb_cont,2b_b_cont,2b_bb_cont")
+
+;; For m;mi bundle
+(final_presence_set "2b_m0_stop" "2b_0m.mi")
+(final_presence_set "2b_0mm.i" "2b_0mmi_cont")
+(exclusion_set "2b_0mmi_cont"
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_m0_stop" "2b_0mm.i")
+(final_presence_set "2b_m1_stop" "2b_1m.mi")
+(exclusion_set "2b_m1_stop" "2b_1mm.i")
+(final_presence_set "2b_m_stop" "2b_m0_stop, 2b_m1_stop")
+
+;; For mi;i bundle
+(final_presence_set "2b_mi0_stop" "2b_0mi.i")
+(final_presence_set "2b_0mii." "2b_0mii_cont")
+(exclusion_set "2b_0mii_cont"
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_mi0_stop" "2b_0mii.")
+(final_presence_set "2b_mi1_stop" "2b_1mi.i")
+(exclusion_set "2b_mi1_stop" "2b_1mii.")
+(final_presence_set "2b_mi_stop" "2b_mi0_stop, 2b_mi1_stop")
+
+(final_absence_set
+   "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\
+    2b_0m.fi,2b_0mf.i,2b_0mfi.,2b_0m.mf,2b_0mm.f,2b_0mmf.,\
+    2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb.,\
+    2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\
+    2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \
+    2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\
+    2b_1m.fi,2b_1mf.i,2b_1mfi.,2b_1m.mf,2b_1mm.f,2b_1mmf.,\
+    2b_1b.bb,2b_1bb.b,2b_1bbb.,2b_1m.bb,2b_1mb.b,2b_1mbb.,\
+    2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\
+    2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx."
+   "2b_m0_stop,2b_m1_stop,2b_mi0_stop,2b_mi1_stop")
+
+(define_insn_reservation "2b_stop_bit" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stop_bit"))
+       (match_test "bundling_p"))
+  "2b_stop|2b_m0_stop|2b_m1_stop|2b_mi0_stop|2b_mi1_stop")
+(define_insn_reservation "2b_br"      0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "br"))
+       (match_test "bundling_p")) "2b_B")
+(define_insn_reservation "2b_scall"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "scall"))
+       (match_test "bundling_p")) "2b_B")
+(define_insn_reservation "2b_fcmp"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcmp"))
+       (match_test "bundling_p")) "2b_F")
+(define_insn_reservation "2b_fcvtfx"  4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcvtfx"))
+       (match_test "bundling_p")) "2b_F")
+(define_insn_reservation "2b_fld"     6
+  (and (and (and (and (eq_attr "cpu" "itanium2")
+                      (eq_attr "itanium_class" "fld"))
+                 (eq_attr "data_speculative" "no"))
+            (eq_attr "check_load" "no"))
+       (match_test "bundling_p"))
+  "2b_M")
+(define_insn_reservation "2b_flda"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "data_speculative" "yes"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_fldc"    0
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "check_load" "yes"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_fldp"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "no"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_fldpc"   0
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "yes"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_fmac"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmac"))
+       (match_test "bundling_p")) "2b_F")
+(define_insn_reservation "2b_fmisc"   4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmisc"))
+       (match_test "bundling_p")) "2b_F")
+
+;; Latency time ???
+(define_insn_reservation "2b_frar_i" 13
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_i"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2b_frar_m"  6
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_m"))
+       (match_test "bundling_p"))
+  "2b_M_only_um2")
+(define_insn_reservation "2b_frbr"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frbr"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+(define_insn_reservation "2b_frfr"    5				  
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frfr"))
+       (match_test "bundling_p"))
+  "2b_M_only_um2")
+(define_insn_reservation "2b_frpr"    2				  
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frpr"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+
+(define_insn_reservation "2b_ialu"      1
+    (and (and (eq_attr "cpu" "itanium2")
+              (eq_attr "itanium_class" "ialu"))
+         (match_test "bundling_p"))
+    "2b_A")
+(define_insn_reservation "2b_icmp"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "icmp"))
+       (match_test "bundling_p")) "2b_A")
+(define_insn_reservation "2b_ilog"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ilog"))
+       (match_test "bundling_p")) "2b_A")
+(define_insn_reservation "2b_mmalua"  2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmalua"))
+       (match_test "bundling_p")) "2b_A")
+;; Latency time ???
+(define_insn_reservation "2b_ishf"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ishf"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+
+(define_insn_reservation "2b_ld"      1
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "ld"))
+            (eq_attr "check_load" "no"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_ldc"     0
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "ld"))
+            (eq_attr "check_load" "yes"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_long_i"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "long_i"))
+       (match_test "bundling_p")) "2b_L")
+
+;; Latency time ???
+(define_insn_reservation "2b_mmmul"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmmul"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2b_mmshf"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshf"))
+       (match_test "bundling_p")) "2b_I")
+;; Latency time ???
+(define_insn_reservation "2b_mmshfi"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshfi"))
+       (match_test "bundling_p")) "2b_I")
+
+(define_insn_reservation "2b_rse_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "rse_m"))
+       (match_test "bundling_p"))
+   "(2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1+2b_um0")
+(define_insn_reservation "2b_sem"     0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "sem"))
+       (match_test "bundling_p"))
+  "2b_M_only_um23")
+
+(define_insn_reservation "2b_stf"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stf"))
+       (match_test "bundling_p"))
+  "2b_M_only_um23")
+(define_insn_reservation "2b_st"      1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "st"))
+       (match_test "bundling_p"))
+  "2b_M_only_um23")
+(define_insn_reservation "2b_syst_m0" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m0"))
+       (match_test "bundling_p"))
+  "2b_M_only_um2")
+(define_insn_reservation "2b_syst_m"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m"))
+       (match_test "bundling_p"))
+  "2b_M_only_um0")
+;; Reservation???
+(define_insn_reservation "2b_tbit"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tbit"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+(define_insn_reservation "2b_toar_i"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_i"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2b_toar_m"  5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_m"))
+       (match_test "bundling_p"))
+  "2b_M_only_um2")
+;; Latency time ???
+(define_insn_reservation "2b_tobr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tobr"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+(define_insn_reservation "2b_tofr"    5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tofr"))
+       (match_test "bundling_p"))
+  "2b_M_only_um23")
+;; Latency time ???
+(define_insn_reservation "2b_topr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "topr"))
+       (match_test "bundling_p"))
+  "2b_I+2b_only_ui0")
+
+(define_insn_reservation "2b_xmpy"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xmpy"))
+       (match_test "bundling_p")) "2b_F")
+;; Latency time ???
+(define_insn_reservation "2b_xtd"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xtd"))
+       (match_test "bundling_p")) "2b_I")
+
+(define_insn_reservation "2b_chk_s_i" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_i"))
+       (match_test "bundling_p"))
+  "2b_I|2b_M_only_um23")
+(define_insn_reservation "2b_chk_s_f" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_f"))
+       (match_test "bundling_p"))
+  "2b_M_only_um23")
+(define_insn_reservation "2b_chk_a"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_a"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_lfetch"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "lfetch"))
+       (match_test "bundling_p"))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_nop_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_m"))
+       (match_test "bundling_p")) "2b_M")
+(define_insn_reservation "2b_nop_b"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_b"))
+       (match_test "bundling_p")) "2b_NB")
+(define_insn_reservation "2b_nop_i"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_i"))
+       (match_test "bundling_p")) "2b_I")
+(define_insn_reservation "2b_nop_f"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_f"))
+       (match_test "bundling_p")) "2b_F")
+(define_insn_reservation "2b_nop_x"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_x"))
+       (match_test "bundling_p")) "2b_L")
+(define_insn_reservation "2b_unknown" 1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "unknown"))
+       (match_test "bundling_p")) "2b_empty")
+(define_insn_reservation "2b_nop" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop"))
+       (match_test "bundling_p"))
+  "2b_M|2b_NB|2b_I|2b_F")
+(define_insn_reservation "2b_ignore" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ignore"))
+       (match_test "bundling_p")) "nothing")
+
+(define_insn_reservation "2b_pre_cycle" 0
+   (and (and (eq_attr "cpu" "itanium2")
+             (eq_attr "itanium_class" "pre_cycle"))
+        (match_test "bundling_p"))
+                         "(2b_0m_bs, 2b_m_cont)     \
+                          | (2b_0mi_bs, 2b_mi_cont) \
+                          | (2b_0mm_bs, 2b_mm_cont) \
+                          | (2b_0mf_bs, 2b_mf_cont) \
+                          | (2b_0b_bs, 2b_b_cont)   \
+                          | (2b_0bb_bs, 2b_bb_cont) \
+                          | (2b_0mb_bs, 2b_mb_cont) \
+                          | (2b_1m_bs, 2b_m_cont)   \
+                          | (2b_1mi_bs, 2b_mi_cont) \
+                          | (2b_1mm_bs, 2b_mm_cont) \
+                          | (2b_1mf_bs, 2b_mf_cont) \
+                          | (2b_1b_bs, 2b_b_cont)   \
+                          | (2b_1bb_bs, 2b_bb_cont) \
+                          | (2b_1mb_bs, 2b_mb_cont) \
+                          | (2b_m_stop, 2b_0mmi_cont)   \
+                          | (2b_mi_stop, 2b_0mii_cont)")
+
diff --git a/gcc-4.9/gcc/config/ia64/linux.h b/gcc-4.9/gcc/config/ia64/linux.h
new file mode 100644
index 000000000..e4a3ea750
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/linux.h
@@ -0,0 +1,90 @@
+/* Definitions for ia64-linux target.
+
+Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#undef CC1_SPEC
+#define CC1_SPEC "%{profile:-p} %{G*}"
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+do {						\
+	GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+	builtin_define("_LONGLONG");		\
+} while (0)
+
+/* Need to override linux.h STARTFILE_SPEC, since it has crtbeginT.o in.  */
+#undef STARTFILE_SPEC
+#ifdef HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* Define this for shared library support because it isn't in the main
+   linux.h file.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-ia64.so.2"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#define JMP_BUF_SIZE  76
+
+/* Override linux.h LINK_EH_SPEC definition.
+   Signalize that because we have fde-glibc, we don't need all C shared libs
+   linked against -lgcc_s.  */
+#undef LINK_EH_SPEC
+#define LINK_EH_SPEC ""
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#undef LIBGCC2_TF_CEXT
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_soft_fp_init_libfuncs
+
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc-4.9/gcc/config/ia64/predicates.md b/gcc-4.9/gcc/config/ia64/predicates.md
new file mode 100644
index 000000000..989c550e7
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/predicates.md
@@ -0,0 +1,636 @@
+;; Predicate definitions for IA-64.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; True if OP is a valid operand for the MEM of a CALL insn.
+(define_predicate "call_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
+;; True if OP refers to any kind of symbol.
+;; For roughly the same reasons that pmode_register_operand exists, this
+;; predicate ignores its mode argument.
+(define_special_predicate "symbolic_operand" 
+   (match_code "symbol_ref,const,label_ref"))
+
+;; True if OP is a SYMBOL_REF which refers to a function.
+(define_predicate "function_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_FUNCTION_P (op)")))
+
+;; True if OP refers to a symbol in the sdata section.
+(define_predicate "sdata_symbolic_operand" 
+  (match_code "symbol_ref,const")
+{
+  HOST_WIDE_INT offset = 0, size = 0;
+
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      offset = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      if (CONSTANT_POOL_ADDRESS_P (op))
+	{
+	  size = GET_MODE_SIZE (get_pool_mode (op));
+	  if (size > ia64_section_threshold)
+	    return false;
+	}
+      else
+	{
+	  tree t;
+
+	  if (!SYMBOL_REF_LOCAL_P (op) || !SYMBOL_REF_SMALL_P (op))
+	    return false;
+
+	  /* Note that in addition to DECLs, we can get various forms
+	     of constants here.  */
+	  t = SYMBOL_REF_DECL (op);
+	  if (DECL_P (t))
+	    t = DECL_SIZE_UNIT (t);
+	  else
+	    t = TYPE_SIZE_UNIT (TREE_TYPE (t));
+	  if (t && tree_fits_shwi_p (t))
+	    {
+	      size = tree_to_shwi (t);
+	      if (size < 0)
+		size = 0;
+	    }
+	}
+
+      /* Deny the stupid user trick of addressing outside the object.  Such
+	 things quickly result in GPREL22 relocation overflows.  Of course,
+	 they're also highly undefined.  From a pure pedant's point of view
+	 they deserve a slap on the wrist (such as provided by a relocation
+	 overflow), but that just leads to bugzilla noise.  */
+      return (offset >= 0 && offset <= size);
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; True if OP refers to a symbol in the small address area.
+(define_predicate "small_addr_symbolic_operand" 
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_SMALL_ADDR_P (op);
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; True if OP refers to a symbol with which we may use any offset.
+(define_predicate "any_offset_symbol_operand"
+  (match_code "symbol_ref")
+{
+  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
+    return true;
+  if (SYMBOL_REF_SMALL_ADDR_P (op))
+    return true;
+  if (SYMBOL_REF_FUNCTION_P (op))
+    return false;
+  if (sdata_symbolic_operand (op, mode))
+    return true;
+  return false;
+})
+
+;; True if OP refers to a symbol with which we may use 14-bit aligned offsets.
+;; False if OP refers to a symbol with which we may not use any offset at any
+;; time.
+(define_predicate "aligned_offset_symbol_operand"
+  (and (match_code "symbol_ref")
+       (match_test "! SYMBOL_REF_FUNCTION_P (op)")))
+
+;; True if OP refers to a symbol, and is appropriate for a GOT load.
+(define_predicate "got_symbolic_operand" 
+  (match_operand 0 "symbolic_operand" "")
+{
+  HOST_WIDE_INT addend = 0;
+
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      /* Accept only (plus (symbol_ref) (const_int)).  */
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+          || GET_CODE (XEXP (op, 1)) != CONST_INT)
+        return false;
+
+      addend = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* These symbols shouldn't be used with got loads.  */
+      if (SYMBOL_REF_SMALL_ADDR_P (op))
+	return false;
+      if (SYMBOL_REF_TLS_MODEL (op) != 0)
+	return false;
+
+      if (any_offset_symbol_operand (op, mode))
+	return true;
+
+      /* The low 14 bits of the constant have been forced to zero
+	 so that we do not use up so many GOT entries.  Prevent cse
+	 from undoing this.  */
+      if (aligned_offset_symbol_operand (op, mode))
+	return (addend & 0x3fff) == 0;
+
+      return addend == 0;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a valid thread local storage symbolic operand.
+(define_predicate "tls_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) != 0;
+
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+
+      /* We only allow certain offsets for certain tls models.  */
+      switch (SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
+	{
+	case TLS_MODEL_GLOBAL_DYNAMIC:
+	case TLS_MODEL_LOCAL_DYNAMIC:
+	  return false;
+
+	case TLS_MODEL_INITIAL_EXEC:
+	  return (INTVAL (XEXP (op, 1)) & 0x3fff) == 0;
+
+	case TLS_MODEL_LOCAL_EXEC:
+	  return true;
+
+	default:
+	  return false;
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a local-dynamic thread local storage symbolic operand.
+(define_predicate "ld_tls_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is an initial-exec thread local storage symbolic operand.
+(define_predicate "ie_tls_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT
+	  || (INTVAL (XEXP (op, 1)) & 0x3fff) != 0)
+	return false;
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a local-exec thread local storage symbolic operand.
+(define_predicate "le_tls_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+          || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+          || GET_CODE (XEXP (op, 1)) != CONST_INT)
+        return false;
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Like nonimmediate_operand, but don't allow MEMs that try to use a
+;; POST_MODIFY with a REG as displacement.
+(define_predicate "destination_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "GET_CODE (op) != MEM
+		    || GET_CODE (XEXP (op, 0)) != POST_MODIFY
+		    || GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) != REG")))
+
+;; Like destination_operand, but don't allow any post-increments.
+(define_predicate "not_postinc_destination_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "GET_CODE (op) != MEM
+        || GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+;; Like memory_operand, but don't allow post-increments.
+(define_predicate "not_postinc_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+;; True if OP is a general operand, with some restrictions on symbols.
+(define_predicate "move_operand"
+  (match_operand 0 "general_operand")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      {
+	HOST_WIDE_INT addend;
+
+	/* Accept only (plus (symbol_ref) (const_int)).  */
+	op = XEXP (op, 0);
+	if (GET_CODE (op) != PLUS
+	    || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+            || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	  return false;
+
+	addend = INTVAL (XEXP (op, 1));
+	op = XEXP (op, 0);
+
+	/* After reload, we want to allow any offset whatsoever.  This
+	   allows reload the opportunity to avoid spilling addresses to
+	   the stack, and instead simply substitute in the value from a
+	   REG_EQUIV.  We'll split this up again when splitting the insn.  */
+	if (reload_in_progress || reload_completed)
+	  return true;
+
+	/* Some symbol types we allow to use with any offset.  */
+	if (any_offset_symbol_operand (op, mode))
+	  return true;
+
+	/* Some symbol types we allow offsets with the low 14 bits of the
+	   constant forced to zero so that we do not use up so many GOT
+	   entries.  We want to prevent cse from undoing this.  */
+	if (aligned_offset_symbol_operand (op, mode))
+	  return (addend & 0x3fff) == 0;
+
+	/* The remaining symbol types may never be used with an offset.  */
+	return false;
+      }
+
+    default:
+      return true;
+    }
+})
+
+;; Like move_operand but don't allow post-increments.
+(define_predicate "not_postinc_move_operand"
+  (and (match_operand 0 "move_operand")
+       (match_test "GET_CODE (op) != MEM
+        || GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+;; True if OP is a register operand that is (or could be) a GR reg.
+(define_predicate "gr_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || GENERAL_REGNO_P (regno));
+})
+
+;; True if OP is a register operand that is (or could be) an FR reg.
+(define_predicate "fr_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || FR_REGNO_P (regno));
+})
+
+;; True if OP is a register operand that is (or could be) a GR/FR reg.
+(define_predicate "grfr_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER
+	  || GENERAL_REGNO_P (regno)
+	  || FR_REGNO_P (regno));
+})
+
+;; True if OP is a nonimmediate operand that is (or could be) a GR reg.
+(define_predicate "gr_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  unsigned int regno;
+
+  if (GET_CODE (op) == MEM)
+    return true;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || GENERAL_REGNO_P (regno));
+})
+
+;; True if OP is a nonimmediate operand that is (or could be) a FR reg.
+(define_predicate "fr_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  unsigned int regno;
+
+  if (GET_CODE (op) == MEM)
+    return true;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || FR_REGNO_P (regno));
+})
+
+;; True if OP is a nonimmediate operand that is (or could be) a GR/FR reg.
+(define_predicate "grfr_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  unsigned int regno;
+
+  if (GET_CODE (op) == MEM)
+    return true;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER
+	  || GENERAL_REGNO_P (regno)
+	  || FR_REGNO_P (regno));
+})
+
+;; True if OP is a GR register operand, or zero.
+(define_predicate "gr_reg_or_0_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int,const_double,const_vector")
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
+;; True if OP is a GR register operand, or a 5-bit immediate operand.
+(define_predicate "gr_reg_or_5bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32"))))
+
+;; True if OP is a GR register operand, or a 6-bit immediate operand.
+(define_predicate "gr_reg_or_6bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_M (op)"))))
+
+;; True if OP is a GR register operand, or an 8-bit immediate operand.
+(define_predicate "gr_reg_or_8bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_K (op)"))))
+
+;; True if OP is a GR/FR register operand, or an 8-bit immediate operand.
+(define_predicate "grfr_reg_or_8bit_operand"
+  (ior (match_operand 0 "grfr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_K (op)"))))
+
+;; True if OP is a register operand, or an 8-bit adjusted immediate operand.
+(define_predicate "gr_reg_or_8bit_adjusted_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_L (op)"))))
+
+;; True if OP is a register operand, or is valid for both an 8-bit
+;; immediate and an 8-bit adjusted immediate operand.  This is necessary
+;; because when we emit a compare, we don't know what the condition will be,
+;; so we need the union of the immediates accepted by GT and LT.
+(define_predicate "gr_reg_or_8bit_and_adjusted_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_K (op)
+                         && satisfies_constraint_L (op)"))))
+
+;; True if OP is a register operand, or a 14-bit immediate operand.
+(define_predicate "gr_reg_or_14bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_I (op)"))))
+
+;;  True if OP is a register operand, or a 22-bit immediate operand.
+(define_predicate "gr_reg_or_22bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_J (op)"))))
+
+;; True if OP is a 7-bit immediate operand.
+(define_predicate "dshift_count_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 128")))
+
+;; True if OP is a 6-bit immediate operand.
+(define_predicate "shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_M (op)")))
+
+;; True if OP-1 is a 6-bit immediate operand, used in extr instruction.
+(define_predicate "extr_len_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_M (GEN_INT (INTVAL (op) - 1))")))
+
+;; True if OP is a 5-bit immediate operand.
+(define_predicate "shift_32bit_count_operand"
+   (and (match_code "const_int")
+        (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32")))
+
+;; True if OP is one of the immediate values 2, 4, 8, or 16.
+(define_predicate "shladd_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 ||
+	            INTVAL (op) == 8 || INTVAL (op) == 16")))
+
+;; True if OP is one of the immediate values 1, 2, 3, or 4.
+(define_predicate "shladd_log2_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 4")))
+
+;; True if OP is one of the immediate values  -16, -8, -4, -1, 1, 4, 8, 16.
+(define_predicate "fetchadd_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == -16 || INTVAL (op) == -8 ||
+                    INTVAL (op) == -4  || INTVAL (op) == -1 ||
+                    INTVAL (op) == 1   || INTVAL (op) == 4  ||
+                    INTVAL (op) == 8   || INTVAL (op) == 16")))
+
+;; True if OP is one of the immediate values 0, 7, 15, 16
+(define_predicate "pmpyshr_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 0 || INTVAL (op) == 7
+		    || INTVAL (op) == 15 || INTVAL (op) == 16")))
+
+;; True if OP is 0..3.
+(define_predicate "const_int_2bit_operand"
+  (and (match_code "const_int")
+        (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3")))
+
+;; True if OP is a floating-point constant zero, one, or a register.
+(define_predicate "fr_reg_or_fp01_operand"
+  (ior (match_operand 0 "fr_register_operand")
+       (and (match_code "const_double")
+	    (match_test "satisfies_constraint_G (op)"))))
+
+;; Like fr_reg_or_fp01_operand, but don't allow any SUBREGs.
+(define_predicate "xfreg_or_fp01_operand"
+  (and (match_operand 0 "fr_reg_or_fp01_operand")
+       (not (match_code "subreg"))))
+
+;; Like fr_reg_or_fp01_operand, but don't allow 0 if flag_signed_zero is set.
+;; Using f0 as the second arg to fadd or fsub, or as the third arg to fma or
+;; fms can cause a zero result to have the wrong sign.
+(define_predicate "fr_reg_or_signed_fp01_operand"
+  (ior (match_operand 0 "fr_register_operand")
+       (and (match_code "const_double")
+	    (match_test "satisfies_constraint_Z (op)"))))
+
+;; Like fr_reg_or_signed_fp01_operand, but don't allow any SUBREGs.
+(define_predicate "xfreg_or_signed_fp01_operand"
+  (and (match_operand 0 "fr_reg_or_signed_fp01_operand")
+       (not (match_code "subreg"))))
+
+;; True if OP is a constant zero, or a register.
+(define_predicate "fr_reg_or_0_operand"
+  (ior (match_operand 0 "fr_register_operand")
+       (and (match_code "const_double,const_vector")
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; If we're assuming that FP operations cannot generate user-visible traps,
+;; then we can use the FP unordered-signaling instructions to implement the
+;; FP unordered-quiet comparison predicates.
+(define_predicate "ia64_cbranch_operator"
+  (if_then_else (match_test "flag_trapping_math")
+		(ior (match_operand 0 "ordered_comparison_operator")
+		      (match_code "ordered,unordered"))
+		(and (match_operand 0 "comparison_operator")
+		      (not (match_code "uneq,ltgt")))))
+
+;; True if this is a comparison operator, which accepts a normal 8-bit
+;; signed immediate operand.
+(define_predicate "normal_comparison_operator"
+  (match_code "eq,ne,gt,le,gtu,leu"))
+
+;; True if this is a comparison operator, which accepts an adjusted 8-bit
+;; signed immediate operand.
+(define_predicate "adjusted_comparison_operator"
+  (match_code "lt,ge,ltu,geu"))
+
+;; True if this is a signed inequality operator.
+(define_predicate "signed_inequality_operator"
+  (match_code "ge,gt,le,lt"))
+
+;; True if this operator is valid for predication.
+(define_predicate "predicate_operator"
+  (match_code "eq,ne"))
+
+;; True if this operator can be used in a conditional operation.
+(define_predicate "condop_operator"
+  (match_code "plus,minus,ior,xor,and"))
+
+;; These three are hardware registers that can only be addressed in
+;; DImode.  It's not strictly necessary to test mode == DImode here,
+;; but it makes decent insurance against someone writing a
+;; match_operand wrong.
+
+;; True if this is the ar.lc register.
+(define_predicate "ar_lc_reg_operand"
+  (and (match_code "reg")
+       (match_test "mode == DImode && REGNO (op) == AR_LC_REGNUM")))
+
+;; True if this is the ar.ccv register.
+(define_predicate "ar_ccv_reg_operand"
+  (and (match_code "reg")
+       (match_test "mode == DImode && REGNO (op) == AR_CCV_REGNUM")))
+
+;; True if this is the ar.pfs register.
+(define_predicate "ar_pfs_reg_operand"
+  (and (match_code "reg")
+       (match_test "mode == DImode && REGNO (op) == AR_PFS_REGNUM")))
+
+;; True if OP is valid as a base register in a reg + offset address.
+;; ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
+;; checks from pa.c basereg_operand as well?  Seems to be OK without them
+;; in test runs.
+(define_predicate "basereg_operand"
+  (match_operand 0 "register_operand")
+{
+  return REG_P (op) && REG_POINTER (op);
+})
+
+;; True if this is the right-most vector element; for mux1 @brcst.
+(define_predicate "mux1_brcst_element"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
diff --git a/gcc-4.9/gcc/config/ia64/sync.md b/gcc-4.9/gcc/config/ia64/sync.md
new file mode 100644
index 000000000..fe8d70859
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/sync.md
@@ -0,0 +1,330 @@
+;; GCC machine description for IA-64 synchronization instructions.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Conversion to C++11 memory model based on
+;; http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+
+(define_mode_iterator IMODE [QI HI SI DI])
+(define_mode_iterator I124MODE [QI HI SI])
+(define_mode_iterator I48MODE [SI DI])
+(define_mode_attr modesuffix [(QI "1") (HI "2") (SI "4") (DI "8")])
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]		;; model
+  ""
+{
+  if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST)
+    emit_insn (gen_memory_barrier ());
+  DONE;
+})
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+  "mf"
+  [(set_attr "itanium_class" "syst_m")])
+
+(define_expand "atomic_load<mode>"
+  [(match_operand:IMODE 0 "gr_register_operand" "")		;; output
+   (match_operand:IMODE 1 "memory_operand" "")			;; memory
+   (match_operand:SI 2 "const_int_operand" "")]			;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  /* Unless the memory model is relaxed, we want to emit ld.acq, which
+     will happen automatically for volatile memories.  */
+  gcc_assert (model == MEMMODEL_RELAXED || MEM_VOLATILE_P (operands[1]));
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "atomic_store<mode>"
+  [(match_operand:IMODE 0 "memory_operand" "")			;; memory
+   (match_operand:IMODE 1 "gr_reg_or_0_operand" "")		;; input
+   (match_operand:SI 2 "const_int_operand" "")]			;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  /* Unless the memory model is relaxed, we want to emit st.rel, which
+     will happen automatically for volatile memories.  */
+  gcc_assert (model == MEMMODEL_RELAXED || MEM_VOLATILE_P (operands[0]));
+  emit_move_insn (operands[0], operands[1]);
+
+  /* Sequentially consistent stores need a subsequent MF.  See
+     http://www.decadent.org.uk/pipermail/cpp-threads/2008-December/001952.html
+     for a discussion of why a MF is needed here, but not for atomic_load.  */
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_memory_barrier ());
+  DONE;
+})
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:DI 0 "gr_register_operand" "")		;; bool out
+   (match_operand:IMODE 1 "gr_register_operand" "")		;; val out
+   (match_operand:IMODE 2 "not_postinc_memory_operand" "")	;; memory
+   (match_operand:IMODE 3 "gr_register_operand" "")		;; expected
+   (match_operand:IMODE 4 "gr_reg_or_0_operand" "")		;; desired
+   (match_operand:SI 5 "const_int_operand" "")			;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")			;; succ model
+   (match_operand:SI 7 "const_int_operand" "")]			;; fail model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[6]);
+  rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+  rtx dval, eval;
+
+  eval = gen_reg_rtx (DImode);
+  convert_move (eval, operands[3], 1);
+  emit_move_insn (ccv, eval);
+
+  if (<MODE>mode == DImode)
+    dval = operands[1];
+  else
+    dval = gen_reg_rtx (DImode);
+
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_CONSUME:
+      emit_insn (gen_cmpxchg_acq_<mode> (dval, operands[2], ccv, operands[4]));
+      break;
+    case MEMMODEL_RELEASE:
+      emit_insn (gen_cmpxchg_rel_<mode> (dval, operands[2], ccv, operands[4]));
+      break;
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_cmpxchg_rel_<mode> (dval, operands[2], ccv, operands[4]));
+      emit_insn (gen_memory_barrier ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (<MODE>mode != DImode)
+    emit_move_insn (operands[1], gen_lowpart (<MODE>mode, dval));
+
+  emit_insn (gen_cstoredi4 (operands[0], gen_rtx_EQ (DImode, dval, eval),
+			    dval, eval));
+  DONE;
+})
+
+(define_insn "cmpxchg_acq_<mode>"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI
+	  (match_operand:I124MODE 1 "not_postinc_memory_operand" "+S")))
+   (set (match_dup 1)
+        (unspec:I124MODE
+	  [(match_dup 1)
+	   (match_operand:DI 2 "ar_ccv_reg_operand" "")
+	   (match_operand:I124MODE 3 "gr_reg_or_0_operand" "rO")]
+	  UNSPEC_CMPXCHG_ACQ))]
+  ""
+  "cmpxchg<modesuffix>.acq %0 = %1, %r3, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_insn "cmpxchg_rel_<mode>"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI
+	  (match_operand:I124MODE 1 "not_postinc_memory_operand" "+S")))
+   (set (match_dup 1)
+        (unspec:I124MODE
+	  [(match_dup 1)
+	   (match_operand:DI 2 "ar_ccv_reg_operand" "")
+	   (match_operand:I124MODE 3 "gr_reg_or_0_operand" "rO")]
+	  UNSPEC_CMPXCHG_REL))]
+  ""
+  "cmpxchg<modesuffix>.rel %0 = %1, %r3, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_insn "cmpxchg_acq_di"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(match_operand:DI 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+        (unspec:DI [(match_dup 1)
+		    (match_operand:DI 2 "ar_ccv_reg_operand" "")
+		    (match_operand:DI 3 "gr_reg_or_0_operand" "rO")]
+		   UNSPEC_CMPXCHG_ACQ))]
+  ""
+  "cmpxchg8.acq %0 = %1, %r3, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_insn "cmpxchg_rel_di"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(match_operand:DI 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+        (unspec:DI [(match_dup 1)
+		    (match_operand:DI 2 "ar_ccv_reg_operand" "")
+		    (match_operand:DI 3 "gr_reg_or_0_operand" "rO")]
+		   UNSPEC_CMPXCHG_REL))]
+  ""
+  "cmpxchg8.rel %0 = %1, %r3, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:IMODE 0 "gr_register_operand" "")		;; output
+   (match_operand:IMODE 1 "not_postinc_memory_operand" "")	;; memory
+   (match_operand:IMODE 2 "gr_reg_or_0_operand" "")		;; input
+   (match_operand:SI 3 "const_int_operand" "")]			;; succ model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_CONSUME:
+      break;
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_memory_barrier ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  emit_insn (gen_xchg_acq_<mode> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Note that XCHG is always memory model acquire.
+(define_insn "xchg_acq_<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "=r")
+        (match_operand:IMODE 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+        (match_operand:IMODE 2 "gr_reg_or_0_operand" "rO"))]
+  ""
+  "xchg<modesuffix> %0 = %1, %r2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_expand "atomic_<fetchop_name><mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "")
+	(FETCHOP:IMODE (match_dup 0)
+	  (match_operand:IMODE 1 "nonmemory_operand" "")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+{
+  ia64_expand_atomic_op (<CODE>, operands[0], operands[1], NULL, NULL,
+			 (enum memmodel) INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "atomic_nand<mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "")
+	(not:IMODE
+	  (and:IMODE (match_dup 0)
+		     (match_operand:IMODE 1 "nonmemory_operand" ""))))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+{
+  ia64_expand_atomic_op (NOT, operands[0], operands[1], NULL, NULL,
+			 (enum memmodel) INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "atomic_fetch_<fetchop_name><mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(FETCHOP:IMODE 
+	  (match_operand:IMODE 1 "memory_operand" "")
+	  (match_operand:IMODE 2 "nonmemory_operand" "")))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+{
+  ia64_expand_atomic_op (<CODE>, operands[1], operands[2], operands[0], NULL,
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+})
+
+(define_expand "atomic_fetch_nand<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(not:IMODE 
+	  (and:IMODE (match_operand:IMODE 1 "memory_operand" "")
+		     (match_operand:IMODE 2 "nonmemory_operand" ""))))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+{
+  ia64_expand_atomic_op (NOT, operands[1], operands[2], operands[0], NULL,
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+})
+
+(define_expand "atomic_<fetchop_name>_fetch<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(FETCHOP:IMODE 
+	  (match_operand:IMODE 1 "memory_operand" "")
+	  (match_operand:IMODE 2 "nonmemory_operand" "")))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+{
+  ia64_expand_atomic_op (<CODE>, operands[1], operands[2], NULL, operands[0],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+})
+
+(define_expand "atomic_nand_fetch<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(not:IMODE 
+	  (and:IMODE (match_operand:IMODE 1 "memory_operand" "")
+		     (match_operand:IMODE 2 "nonmemory_operand" ""))))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+{
+  ia64_expand_atomic_op (NOT, operands[1], operands[2], NULL, operands[0],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+})
+
+(define_insn "fetchadd_acq_<mode>"
+  [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
+	(match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+	(unspec:I48MODE [(match_dup 1)
+			 (match_operand:I48MODE 2 "fetchadd_operand" "n")]
+		        UNSPEC_FETCHADD_ACQ))]
+  ""
+  "fetchadd<modesuffix>.acq %0 = %1, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_insn "fetchadd_rel_<mode>"
+  [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
+	(match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+	(unspec:I48MODE [(match_dup 1)
+			 (match_operand:I48MODE 2 "fetchadd_operand" "n")]
+		        UNSPEC_FETCHADD_REL))]
+  ""
+  "fetchadd<modesuffix>.rel %0 = %1, %2"
+  [(set_attr "itanium_class" "sem")])
diff --git a/gcc-4.9/gcc/config/ia64/sysv4.h b/gcc-4.9/gcc/config/ia64/sysv4.h
new file mode 100644
index 000000000..f0afa5238
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/sysv4.h
@@ -0,0 +1,144 @@
+/* Override definitions in elfos.h to be correct for IA64.
+
+Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_sysv4_init_libfuncs
+
+/* We want DWARF2 as specified by the IA64 ABI.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Stabs does not work properly for 64-bit targets.  */
+#undef DBX_DEBUGGING_INFO
+
+/* Various pseudo-ops for which the Intel assembler uses non-standard
+   definitions.  */
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP "\tstringz\t"
+
+#undef SKIP_ASM_OP
+#define SKIP_ASM_OP "\t.skip\t"
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP "\t.common\t"
+
+#undef ASCII_DATA_ASM_OP
+#define ASCII_DATA_ASM_OP "\tstring\t"
+
+/* ia64-specific options for gas
+   ??? ia64 gas doesn't accept standard svr4 assembler options?  */
+#undef ASM_SPEC
+#define ASM_SPEC "-x %{mconstant-gp} %{mauto-pic} %(asm_extra)"
+
+/* ??? Unfortunately, .lcomm doesn't work, because it puts things in either
+   .bss or .sbss, and we can't control the decision of which is used.  When
+   I use .lcomm, I get a cryptic "Section group has no member" error from
+   the Intel simulator.  So we must explicitly put variables in .bss
+   instead.  This matters only if we care about the Intel assembler.  */
+
+/* This is asm_output_aligned_bss from varasm.c without the
+   (*targetm.asm_out.globalize_label) call at the beginning.  */
+
+/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME.  */
+extern int size_directive_output;
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+do {									\
+  if ((DECL) && sdata_symbolic_operand (XEXP (DECL_RTL (DECL), 0), Pmode)) \
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_ALIGN (FILE, floor_log2 ((ALIGN) / BITS_PER_UNIT));	\
+  ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);				\
+  ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1);				\
+} while (0)
+
+/* The # tells the Intel assembler that this is not a register name.
+   However, we can't emit the # in a label definition, so we set a variable
+   in ASM_OUTPUT_LABEL to control whether we want the postfix here or not.
+   We append the # to the label name, but since NAME can be an expression
+   we have to scan it for a non-label character and insert the # there.  */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+do {						\
+  const char *name_ = NAME;			\
+  if (*name_ == '*')				\
+    name_++;					\
+  else						\
+    fputs (user_label_prefix, STREAM);		\
+  fputs (name_, STREAM);			\
+  if (!ia64_asm_output_label)			\
+    fputc ('#', STREAM);			\
+} while (0)
+
+/* Intel assembler requires both flags and type if declaring a non-predefined
+   section.  */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section\t.init,\"ax\",\"progbits\""
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini,\"ax\",\"progbits\""
+
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ia64_dbx_register_number(REGNO)
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* We redefine this to use the ia64 .proc pseudo-op.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+  ia64_start_function(FILE,NAME,DECL)
+
+/* We redefine this to use the ia64 .endp pseudo-op.  */
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL) \
+do {									\
+  fputs ("\t.endp ", FILE);						\
+  assemble_name (FILE, NAME);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* Override default elf definition.  */
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK  ia64_reloc_rw_mask
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  ia64_select_rtx_section
+
+#define SDATA_SECTION_ASM_OP "\t.sdata"
+#define SBSS_SECTION_ASM_OP "\t.sbss"
diff --git a/gcc-4.9/gcc/config/ia64/t-hpux b/gcc-4.9/gcc/config/ia64/t-hpux
new file mode 100644
index 000000000..ae36152f9
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/t-hpux
@@ -0,0 +1,28 @@
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We need multilib support for HPUX's ILP32 & LP64 modes.
+
+MULTILIB_OPTIONS = milp32/mlp64
+MULTILIB_DIRNAMES = hpux32 hpux64
+MULTILIB_MATCHES =
+
+# We do not want to include the EH stuff that linux uses, we want to use
+# the HP-UX libunwind library.
+
+T_CFLAGS += -DUSE_LIBUNWIND_EXCEPTIONS
diff --git a/gcc-4.9/gcc/config/ia64/t-ia64 b/gcc-4.9/gcc/config/ia64/t-ia64
new file mode 100644
index 000000000..f007d3c82
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/t-ia64
@@ -0,0 +1,28 @@
+# Copyright (C) 2000-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+ia64-c.o: $(srcdir)/config/ia64/ia64-c.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TREE_H) $(CPPLIB_H) $(C_COMMON_H) $(C_PRAGMA_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/ia64/ia64-c.c
+
+# genattrtab generates very long string literals.
+insn-attrtab.o-warn = -Wno-error
+
+ia64.o: $(srcdir)/config/ia64/ia64.c debug.h $(PARAMS_H) sel-sched.h reload.h \
+	$(OPTS_H) dumpfile.h $(HASH_TABLE_H)
diff --git a/gcc-4.9/gcc/config/ia64/t-linux b/gcc-4.9/gcc/config/ia64/t-linux
new file mode 100644
index 000000000..ba9c60a38
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/t-linux
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,ia64-linux-gnu)
diff --git a/gcc-4.9/gcc/config/ia64/vect.md b/gcc-4.9/gcc/config/ia64/vect.md
new file mode 100644
index 000000000..e3ce29220
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/vect.md
@@ -0,0 +1,1569 @@
+;; IA-64 machine description for vector operations.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Integer vector operations
+
+(define_mode_iterator VEC [V8QI V4HI V2SI V2SF])
+(define_mode_iterator VECINT [V8QI V4HI V2SI])
+(define_mode_iterator VECINT12 [V8QI V4HI])
+(define_mode_iterator VECINT24 [V4HI V2SI])
+(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
+(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
+(define_mode_attr vecint
+  [(V8QI "V8QI") (V4HI "V4HI") (V2SI "V2SI") (V2SF "V2SI")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VECINT 0 "general_operand" "")
+        (match_operand:VECINT 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:VECINT 0 "destination_operand"
+					"=r,r,r,r,m ,*f ,*f,Q ,r ,*f")
+	(match_operand:VECINT 1 "move_operand"
+					"rU,W,i,m,rU,U*f,Q ,*f,*f,r "))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %r1
+   addl %0 = %v1, r0
+   movl %0 = %v1
+   ld8%O1 %0 = %1%P1
+   st8%Q0 %0 = %r1%P0
+   mov %0 = %F1
+   ldf8 %0 = %1%P1
+   stf8 %0 = %1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %1"
+  [(set_attr "itanium_class" "ialu,ialu,long_i,ld,st,fmisc,fld,stf,frfr,tofr")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(not:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "itanium_class" "ilog")])
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(and:VECINT
+	  (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   and %0 = %2, %1
+   fand %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "*andnot<mode>"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(and:VECINT
+	  (not:VECINT (match_operand:VECINT 1 "grfr_register_operand" "r,*f"))
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   andcm %0 = %2, %1
+   fandcm %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(ior:VECINT
+	  (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   or %0 = %2, %1
+   for %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(xor:VECINT
+	  (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   xor %0 = %2, %1
+   fxor %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(neg:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize> %0 = r0, %1"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(plus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")
+		     (match_operand:VECINT 2 "gr_register_operand" "r")))]
+  ""
+  "padd<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*ssadd<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(ss_plus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "padd<vecsize>.sss %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*usadd<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(us_plus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "padd<vecsize>.uuu %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(minus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")
+		      (match_operand:VECINT 2 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*sssub<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(ss_minus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize>.sss %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*ussub<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(us_minus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize>.uuu %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_expand "mulv8qi3"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "")
+	(mult:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l));
+  else
+    emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacku_lo_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacku_lo_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_expand "vec_widen_umult_hi_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacku_hi_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacku_hi_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_expand "vec_widen_smult_lo_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacks_lo_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacks_lo_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_expand "vec_widen_smult_hi_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacks_hi_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacks_hi_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r")
+		   (match_operand:V4HI 2 "gr_register_operand" "r")))]
+  ""
+  "pmpyshr2 %0 = %1, %2, 0"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "pmpyshr2"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(truncate:V4HI
+	  (ashiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "gr_register_operand" "r"))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "gr_register_operand" "r")))
+	    (match_operand:SI 3 "pmpyshr_operand" "n"))))]
+  ""
+  "pmpyshr2 %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "pmpyshr2_u"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "gr_register_operand" "r"))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "gr_register_operand" "r")))
+	    (match_operand:SI 3 "pmpyshr_operand" "n"))))]
+  ""
+  "pmpyshr2.u %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_expand "smulv4hi3_highpart"
+  [(match_operand:V4HI 0 "gr_register_operand")
+   (match_operand:V4HI 1 "gr_register_operand")
+   (match_operand:V4HI 2 "gr_register_operand")]
+  ""
+{
+  emit_insn (gen_pmpyshr2 (operands[0], operands[1],
+			   operands[2], GEN_INT (16)));
+  DONE;
+})
+
+(define_expand "umulv4hi3_highpart"
+  [(match_operand:V4HI 0 "gr_register_operand")
+   (match_operand:V4HI 1 "gr_register_operand")
+   (match_operand:V4HI 2 "gr_register_operand")]
+  ""
+{
+  emit_insn (gen_pmpyshr2_u (operands[0], operands[1],
+			     operands[2], GEN_INT (16)));
+  DONE;
+})
+
+(define_insn "vec_widen_smult_even_v4hi"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(mult:V2SI
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 1 "gr_register_operand" "r"))
+	    (parallel [(const_int 0) (const_int 2)]))
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 2 "gr_register_operand" "r"))
+	    (parallel [(const_int 0) (const_int 2)]))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pmpy2.l %0 = %1, %2";
+  else
+    return "%,pmpy2.r %0 = %1, %2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_widen_smult_odd_v4hi"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(mult:V2SI
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 1 "gr_register_operand" "r"))
+	    (parallel [(const_int 1) (const_int 3)]))
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 2 "gr_register_operand" "r"))
+	    (parallel [(const_int 1) (const_int 3)]))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pmpy2.r %0 = %1, %2";
+  else
+    return "%,pmpy2.l %0 = %1, %2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_widen_smult_lo_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, false);
+  DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, true);
+  DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, false);
+  DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, true);
+  DONE;
+})
+
+(define_expand "mulv2si3"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "")
+	(mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r")
+		   (match_operand:V2SI 2 "gr_register_operand" "r")))]
+  ""
+{
+  rtx t0, t1, t2, t3, t4, t5, t6, t7, x;
+  rtx op1h = gen_lowpart (V4HImode, operands[1]);
+  rtx op2h = gen_lowpart (V4HImode, operands[2]);
+
+  t0 = gen_reg_rtx (V4HImode);
+  t1 = gen_reg_rtx (V4HImode);
+  t2 = gen_reg_rtx (V4HImode);
+  t3 = gen_reg_rtx (V4HImode);
+  t4 = gen_reg_rtx (V2SImode);
+  t5 = gen_reg_rtx (V2SImode);
+  t6 = gen_reg_rtx (V2SImode);
+  t7 = gen_reg_rtx (V2SImode);
+
+  /* Consider the HImode components of op1 = DCBA, op2 = ZYXW.
+     Consider .l and .h suffixes below the low and high 16 bits
+     of the full 32-bit product.  */
+
+  /* T0 = CDBA.  */
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
+					     GEN_INT (3), const2_rtx));
+  x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
+  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+  /* T1 = DZ.l, CY.l, BX.l, AW.l.  */
+  emit_insn (gen_mulv4hi3 (t1, op1h, op2h));
+
+  /* T2 = DZ.h, CY.h, BX.h, AW.h.  */
+  emit_insn (gen_pmpyshr2_u (t2, op1h, op2h, GEN_INT (16)));
+
+  /* T3 = CZ.l, DY.l, AX.l, BW.l.  */
+  emit_insn (gen_mulv4hi3 (t3, t0, op2h));
+
+  /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW.  */
+  x = gen_lowpart (V4HImode, t4);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_odd (x, t2, t1);
+  else
+    x = gen_mix2_even (x, t1, t2);
+  emit_insn (x);
+
+  /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16.  */
+  x = gen_lowpart (V4HImode, t5);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode));
+  else
+    x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3);
+  emit_insn (x);
+
+  /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16.  */
+  x = gen_lowpart (V4HImode, t6);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode));
+  else
+    x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3);
+  emit_insn (x);
+
+  emit_insn (gen_addv2si3 (t7, t4, t5));
+  emit_insn (gen_addv2si3 (operands[0], t6, t7));
+  DONE;
+})
+
+(define_expand "umax<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
+		     (match_operand:VECINT 2 "gr_register_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (UMAX, <MODE>mode, operands))
+    DONE;
+})
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(smax:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+		     (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (SMAX, <MODE>mode, operands))
+    DONE;
+})
+
+(define_expand "umin<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(umin:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
+		     (match_operand:VECINT 2 "gr_register_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (UMIN, <MODE>mode, operands))
+    DONE;
+})
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(smin:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+		     (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (SMIN, <MODE>mode, operands))
+    DONE;
+})
+
+(define_insn "*umaxv8qi3"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(umax:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
+  ""
+  "pmax1.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*smaxv4hi3"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(smax:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pmax2 %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*uminv8qi3"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(umin:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
+  ""
+  "pmin1.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*sminv4hi3"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(smin:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pmin2 %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+	(ashift:VECINT24
+	  (match_operand:VECINT24 1 "gr_register_operand" "r")
+	  (match_operand:DI 2 "gr_reg_or_5bit_operand" "rn")))]
+  ""
+  "pshl<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+	(ashiftrt:VECINT24
+	  (match_operand:VECINT24 1 "gr_register_operand" "r")
+	  (match_operand:DI 2 "gr_reg_or_5bit_operand" "rn")))]
+  ""
+  "pshr<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "lshr<mode>3"
+  [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+	(lshiftrt:VECINT24
+	  (match_operand:VECINT24 1 "gr_register_operand" "r")
+	  (match_operand:DI 2 "gr_reg_or_5bit_operand" "rn")))]
+  ""
+  "pshr<vecsize>.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(ashift:DI (match_operand:VECINT 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "gr_reg_or_6bit_operand" "")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+        (lshiftrt:DI (match_operand:VECINT 1 "gr_register_operand" "")
+                     (match_operand:DI 2 "gr_reg_or_6bit_operand" "")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "widen_usumv8qi3"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, true);
+  DONE;
+})
+
+(define_expand "widen_usumv4hi3"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, true);
+  DONE;
+})
+
+(define_expand "widen_ssumv8qi3"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, false);
+  DONE;
+})
+
+(define_expand "widen_ssumv4hi3"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, false);
+  DONE;
+})
+
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(if_then_else:VECINT
+	  (match_operator 3 "" 
+	    [(match_operand:VECINT 4 "gr_reg_or_0_operand" "")
+	     (match_operand:VECINT 5 "gr_reg_or_0_operand" "")])
+	  (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+	  (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  ia64_expand_vecint_cmov (operands);
+  DONE;
+})
+
+(define_expand "vcondu<mode><mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(if_then_else:VECINT
+	  (match_operator 3 "" 
+	    [(match_operand:VECINT 4 "gr_reg_or_0_operand" "")
+	     (match_operand:VECINT 5 "gr_reg_or_0_operand" "")])
+	  (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+	  (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  ia64_expand_vecint_cmov (operands);
+  DONE;
+})
+
+(define_insn "*cmpeq_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(eq:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pcmp<vecsize>.eq %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*cmpgt_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(gt:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pcmp<vecsize>.gt %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "vec_pack_ssat_v4hi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_concat:V8QI
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU"))
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack2.sss %0 = %r2, %r1";
+  else
+    return "%,pack2.sss %0 = %r1, %r2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_pack_usat_v4hi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_concat:V8QI
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU"))
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack2.uss %0 = %r2, %r1";
+  else
+    return "%,pack2.uss %0 = %r1, %r2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_pack_ssat_v2si"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_concat:V4HI
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU"))
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack4.sss %0 = %r2, %r1";
+  else
+    return "%,pack4.sss %0 = %r1, %r2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*vec_interleave_lowv8qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack1.l %0 = %r1, %r2";
+  else
+    return "%,unpack1.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*vec_interleave_highv8qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack1.h %0 = %r1, %r2";
+  else
+    return "%,unpack1.h %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mix1_even"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix1.l %0 = %r1, %r2";
+  else
+    return "%,mix1.r %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mix1_odd"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix1.r %0 = %r1, %r2";
+  else
+    return "%,mix1.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_rev"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 7) (const_int 6)
+		     (const_int 5) (const_int 4)
+		     (const_int 3) (const_int 2)
+		     (const_int 1) (const_int 0)])))]
+  ""
+  "mux1 %0 = %1, @rev"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_mix"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)
+		     (const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  ""
+  "mux1 %0 = %1, @mix"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_shuf"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)
+		     (const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  ""
+  "mux1 %0 = %1, @shuf"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_alt"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 0) (const_int 2)
+		     (const_int 4) (const_int 6)
+		     (const_int 1) (const_int 3)
+		     (const_int 5) (const_int 7)])))]
+  ""
+  "mux1 %0 = %1, @alt"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_brcst_v8qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(match_operand 2 "mux1_brcst_element" "")
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)])))]
+  ""
+  "mux1 %0 = %1, @brcst"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mux1_brcst_qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_duplicate:V8QI
+	  (match_operand:QI 1 "gr_register_operand" "r")))]
+  ""
+  "mux1 %0 = %1, @brcst"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*vec_interleave_lowv4hi"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack2.l %0 = %r1, %r2";
+  else
+    return "%,unpack2.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*vec_interleave_highv4hi"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack2.h %0 = %r1, %r2";
+  else
+    return "%,unpack2.h %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix2_even"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix2.l %0 = %r1, %r2";
+  else
+    return "%,mix2.r %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix2_odd"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix2.r %0 = %r1, %r2";
+  else
+    return "%,mix2.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux2"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (match_operand:V4HI 1 "gr_register_operand" "r")
+	  (parallel [(match_operand 2 "const_int_2bit_operand" "")
+		     (match_operand 3 "const_int_2bit_operand" "")
+		     (match_operand 4 "const_int_2bit_operand" "")
+		     (match_operand 5 "const_int_2bit_operand" "")])))]
+  ""
+{
+  int mask = 0;
+  if (TARGET_BIG_ENDIAN)
+    {
+      mask |= (3 - INTVAL (operands[2])) << 6;
+      mask |= (3 - INTVAL (operands[3])) << 4;
+      mask |= (3 - INTVAL (operands[4])) << 2;
+      mask |= 3 - INTVAL (operands[5]);
+    }
+  else
+    {
+      mask |= INTVAL (operands[2]);
+      mask |= INTVAL (operands[3]) << 2;
+      mask |= INTVAL (operands[4]) << 4;
+      mask |= INTVAL (operands[5]) << 6;
+    }
+  operands[2] = GEN_INT (mask);
+  return "%,mux2 %0 = %1, %2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux2_brcst_hi"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_duplicate:V4HI
+	  (match_operand:HI 1 "gr_register_operand" "r")))]
+  ""
+  "mux2 %0 = %1, 0"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*vec_interleave_lowv2si"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.l %0 = %r1, %r2";
+  else
+    return "%,unpack4.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*vec_interleave_highv2si"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.h %0 = %r1, %r2";
+  else
+    return "%,unpack4.h %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_initv2si"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+{
+  rtx op1 = XVECEXP (operands[1], 0, 0);
+  rtx op2 = XVECEXP (operands[1], 0, 1);
+  rtx x;
+
+  if (GET_CODE (op1) == CONST_INT && GET_CODE (op2) == CONST_INT)
+    {
+      x = gen_rtx_CONST_VECTOR (V2SImode, XVEC (operands[1], 0));
+      emit_move_insn (operands[0], x);
+      DONE;
+    }
+
+  if (!gr_reg_or_0_operand (op1, SImode))
+    op1 = force_reg (SImode, op1);
+  if (!gr_reg_or_0_operand (op2, SImode))
+    op2 = force_reg (SImode, op2);
+
+  x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*vecinit_v2si"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "gr_reg_or_0_operand" "rO")
+	  (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.l %0 = %r1, %r2";
+  else
+    return "%,unpack4.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+;; Missing operations
+;; padd.uus
+;; pavg
+;; pavgsub
+;; psad
+;; pshladd
+;; pshradd
+;; psub.uus
+
+;; Floating point vector operations
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_operand" "")
+        (match_operand:V2SF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "*movv2sf_internal"
+  [(set (match_operand:V2SF 0 "destination_operand"
+					"=f,f,f,Q,*r ,*r,*r,*r,m ,f ,*r")
+	(match_operand:V2SF 1 "move_operand"
+					"fU,Y,Q,f,U*r,W ,i ,m ,*r,*r,f "))]
+  "ia64_move_ok (operands[0], operands[1])"
+{
+  static const char * const alt[] = {
+    "%,mov %0 = %F1",
+    "%,fpack %0 = %F2, %F1",
+    "%,ldf8 %0 = %1%P1",
+    "%,stf8 %0 = %1%P0",
+    "%,mov %0 = %r1",
+    "%,addl %0 = %v1, r0",
+    "%,movl %0 = %v1",
+    "%,ld8%O1 %0 = %1%P1",
+    "%,st8%Q0 %0 = %r1%P0",
+    "%,setf.sig %0 = %1",
+    "%,getf.sig %0 = %1"
+  };
+
+  if (which_alternative == 1)
+    {
+      operands[2] = XVECEXP (operands[1], 0, TARGET_BIG_ENDIAN ? 0 : 1);
+      operands[1] = XVECEXP (operands[1], 0, TARGET_BIG_ENDIAN ? 1 : 0);
+    }
+
+  return alt[which_alternative];
+}
+  [(set_attr "itanium_class" "fmisc,fmisc,fld,stf,ialu,ialu,long_i,ld,st,tofr,frfr")])
+
+(define_insn "absv2sf2"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))]
+  ""
+  "fpabs %0 = %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negv2sf2"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))]
+  ""
+  "fpneg %0 = %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*negabsv2sf2"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(neg:V2SF
+	  (abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f"))))]
+  ""
+  "fpnegabs %0 = %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "addv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "")
+	(fma:V2SF (match_operand:V2SF 1 "fr_register_operand" "")
+		  (match_dup 3)
+		  (match_operand:V2SF 2 "fr_register_operand" "")))]
+  ""
+{
+  rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode));
+  operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v));
+})
+
+(define_expand "subv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "")
+	  (match_dup 3)
+	  (neg:V2SF (match_operand:V2SF 2 "fr_register_operand" ""))))]
+  ""
+{
+  rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode));
+  operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v));
+})
+
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
+  ""
+  "fpmpy %0 = %1, %2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmav2sf4"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "f")
+	  (match_operand:V2SF 2 "fr_register_operand" "f")
+	  (match_operand:V2SF 3 "fr_register_operand" "f")))]
+  ""
+  "fpma %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmsv2sf4"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "f")
+	  (match_operand:V2SF 2 "fr_register_operand" "f")
+	  (neg:V2SF (match_operand:V2SF 3 "fr_register_operand" "f"))))]
+  ""
+  "fpms %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fpnmpy"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(neg:V2SF
+	  (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		     (match_operand:V2SF 2 "fr_register_operand" "f"))))]
+  ""
+  "fpnmpy %0 = %1, %2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmav2sf4"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(fma:V2SF
+	  (neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f"))
+	  (match_operand:V2SF 2 "fr_register_operand" "f")
+	  (match_operand:V2SF 3 "fr_register_operand" "f")))]
+  ""
+  "fpnma %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "smaxv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(smax:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
+  ""
+  "fpmax %0 = %1, %2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(smin:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
+  ""
+  "fpmin %0 = %1, %2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "reduc_splus_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "reduc_smax_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "reduc_smin_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "vcondv2sfv2sf"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "")
+	(if_then_else:V2SF
+	  (match_operator 3 "" 
+	    [(match_operand:V2SF 4 "fr_reg_or_0_operand" "")
+	     (match_operand:V2SF 5 "fr_reg_or_0_operand" "")])
+	  (match_operand:V2SF 1 "fr_reg_or_0_operand" "")
+	  (match_operand:V2SF 2 "fr_reg_or_0_operand" "")))]
+  ""
+{
+  rtx x, cmp;
+
+  cmp = gen_reg_rtx (V2SFmode);
+  PUT_MODE (operands[3], V2SFmode);
+  emit_insn (gen_rtx_SET (VOIDmode, cmp, operands[3]));
+
+  x = gen_rtx_IF_THEN_ELSE (V2SFmode, cmp, operands[1], operands[2]);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*fpcmp"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(match_operator:V2SF 3 "comparison_operator"
+	  [(match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	   (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")]))]
+  ""
+  "fpcmp.%D3 %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*fselect"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(if_then_else:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "f")
+	  (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")
+	  (match_operand:V2SF 3 "fr_reg_or_0_operand" "fU")))]
+  ""
+  "fselect %0 = %F2, %F3, %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+{
+  rtx op1 = XVECEXP (operands[1], 0, 0);
+  rtx op2 = XVECEXP (operands[1], 0, 1);
+  rtx x;
+
+  if (GET_CODE (op1) == CONST_DOUBLE && GET_CODE (op2) == CONST_DOUBLE)
+    {
+      x = gen_rtx_CONST_VECTOR (V2SFmode, XVEC (operands[1], 0));
+      emit_move_insn (operands[0], x);
+      DONE;
+    }
+
+  if (!fr_reg_or_fp01_operand (op1, SFmode))
+    op1 = force_reg (SFmode, op1);
+  if (!fr_reg_or_fp01_operand (op2, SFmode))
+    op2 = force_reg (SFmode, op2);
+
+  emit_insn (gen_fpack (operands[0], op1, op2));
+  DONE;
+})
+
+(define_insn "fpack"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+	  (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fpack %0 = %F1, %F2";
+  else
+    return "%,fpack %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "fswap"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 1) (const_int 2)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fswap %0 = %F2, %F1";
+  else
+    return "%,fswap %0 = %F1, %F2";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*vec_interleave_highv2sf"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.l %0 = %F1, %F2";
+  else
+    return "%,fmix.l %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*vec_interleave_lowv2sf"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.r %0 = %F1, %F2";
+  else
+    return "%,fmix.r %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "fmix_lr"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.lr %0 = %F1, %F2";
+  else
+    return "%,fmix.lr %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vec_setv2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:SF 1 "fr_register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  ""
+{
+  ia64_expand_vec_setv2sf (operands);
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv2sf_0_le"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,f,m")
+	(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "rfm,rm,r")
+		    (const_int 0)]
+		   UNSPEC_VECT_EXTR))]
+  "!TARGET_BIG_ENDIAN"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
+    operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
+  else if (MEM_P (operands[1]))
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+  else
+    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
+(define_insn_and_split "*vec_extractv2sf_0_be"
+  [(set (match_operand:SF 0 "register_operand" "=rf,r")
+	(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r")
+		    (const_int 0)]
+		   UNSPEC_VECT_EXTR))]
+  "TARGET_BIG_ENDIAN"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (MEM_P (operands[1]))
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+  else
+    {
+      emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*vec_extractv2sf_1_le"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+		    (const_int 1)]
+		   UNSPEC_VECT_EXTR))]
+  "!TARGET_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+  emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv2sf_1_be"
+  [(set (match_operand:SF 0 "register_operand" "=rf")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+		    (const_int 1)]
+		   UNSPEC_VECT_EXTR))]
+  "TARGET_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
+(define_expand "vec_extractv2sf"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_VECT_EXTR))]
+  ""
+  "")
+
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_v4hi"
+  [(match_operand:V8QI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_lowpart (V8QImode, operands[1]);
+  rtx op2 = gen_lowpart (V8QImode, operands[2]);
+  ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_v2si"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V2SI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_lowpart (V4HImode, operands[1]);
+  rtx op2 = gen_lowpart (V4HImode, operands[2]);
+  ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
+  DONE;
+})
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VEC 0 "register_operand" "")
+   (match_operand:VEC 1 "register_operand" "")
+   (match_operand:VEC 2 "register_operand" "")
+   (match_operand:<vecint> 3 "" "")]
+  ""
+{
+  if (ia64_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Missing operations
+;; fprcpa
+;; fpsqrta
diff --git a/gcc-4.9/gcc/config/ia64/vms.h b/gcc-4.9/gcc/config/ia64/vms.h
new file mode 100644
index 000000000..4e38e80f9
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/vms.h
@@ -0,0 +1,156 @@
+/* Definitions of target machine GNU compiler. IA64-VMS version.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define OBJECT_FORMAT_ELF
+
+#define SUBTARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define ("__IA64");		\
+	builtin_define ("__IEEE_FLOAT");	\
+    } while (0)
+
+/* Need .debug_line info generated from gcc and gas.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_DWARF2_ASM | MASK_GNU_AS)
+
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT 524288  /* 8 x 2^16 by DEC Ada Test CD40VRA */
+
+/* Widest floating-point type efficiently supported by hardware and OS.  */
+#undef WIDEST_HARDWARE_FP_SIZE
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* The structure return address arrives as an "argument" on VMS.  */
+#undef PCC_STATIC_STRUCT_RETURN
+
+/* Turn on VMS specific Dwarf2 features.  */
+#define VMS_DEBUGGING_INFO 1
+
+#define ASM_OUTPUT_DWARF_VMS_DELTA(FILE,SIZE,LABEL1,LABEL2) \
+do {                                          \
+  fprintf (FILE, "\tdata4.ua\t@slotcount(");  \
+  assemble_name (FILE, LABEL1);               \
+  fprintf (FILE, "-");                        \
+  assemble_name (FILE, LABEL2);               \
+  fprintf (FILE, ")");                        \
+} while (0)
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:crt0.o%s crtbegin.o%s} \
+ %{!static:%{shared:crtinitS.o%s crtbeginS.o%s}}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend.o%s} %{!static:%{shared:crtendS.o%s}}"
+
+#define LINK_GCC_C_SEQUENCE_SPEC "%G"
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{g0} %{g*:-g} %{map} %{save-temps} %{shared} %{v}"
+
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+"%{mno-gnu-as:-N so -N vms_upcase -W DVLoc_off} %{mconstant-gp:-M const_gp} \
+ %{mauto-pic:-M no_plabel} %{source-listing:-ahdl=%b.lis}"
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)			\
+do {								\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function");	\
+} while (0)
+
+/* Set the function to change the names of the division and modulus
+   functions.   */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_vms_init_libfuncs
+
+#define CTOR_LIST_BEGIN asm (".global\tLIB$INITIALIZE#\n");                  \
+STATIC func_ptr __CTOR_LIST__[1]                                             \
+  __attribute__ ((__unused__, section(".ctors"), aligned(sizeof(func_ptr)))) \
+  = { (func_ptr) (-1) };
+
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP ".section\tLIB$INITIALIZE#,\"a\",@progbits"
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)      \
+  asm (SECTION_OP "\n\tdata4 @fptr(" #FUNC"#)\n");      \
+  FORCE_CODE_SECTION_ALIGN                            \
+  asm (TEXT_SECTION_ASM_OP);
+
+#undef FINI_SECTION_ASM_OP
+
+/* Maybe same as HPUX?  Needs to be checked.  */
+#define JMP_BUF_SIZE  (8 * 76)
+
+#undef SUBTARGET_OPTIMIZATION_OPTIONS
+#define SUBTARGET_OPTIMIZATION_OPTIONS			\
+  { OPT_LEVELS_ALL, OPT_fmerge_constants, NULL, 0 }
+
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Minimum amount of stack required to recover from an anticipated stack
+   overflow detection.  The default value conveys an estimate of the amount
+   of stack required to propagate an exception.  */
+#define STACK_CHECK_PROTECT (24 * 1024)
+
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+  ia64_vms_output_aligned_decl_common (FILE, DECL, NAME, SIZE, ALIGN)
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE ia64_vms_valid_pointer_mode
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   For ia64, we always store objects in a full register.  32-bit integers
+   are always sign-extended, but smaller objects retain their signedness.  */
+
+#undef PROMOTE_MODE
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)			\
+  if (GET_MODE_CLASS (MODE) == MODE_INT				\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+    {								\
+      if ((MODE) == SImode)					\
+	(UNSIGNEDP) = 0;					\
+      (MODE) = DImode;						\
+    }
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+
+/* Code is always in P0/P1 (lower 32 bit addresses) on VMS.  */
+#undef CASE_VECTOR_MODE
+#define CASE_VECTOR_MODE SImode
+
+/* IA64 VMS doesn't fully support COMDAT sections.  */
+
+#define SUPPORTS_ONE_ONLY 0
+
+/* Default values for _CRTL_VER and _VMS_VER.  */
+#define VMS_DEFAULT_CRTL_VER 80300000
+#define VMS_DEFAULT_VMS_VER 80300000
diff --git a/gcc-4.9/gcc/config/ia64/vms.opt b/gcc-4.9/gcc/config/ia64/vms.opt
new file mode 100644
index 000000000..5f6cff61d
--- /dev/null
+++ b/gcc-4.9/gcc/config/ia64/vms.opt
@@ -0,0 +1,29 @@
+; IA64 VMS options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+source-listing
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/initfini-array.h b/gcc-4.9/gcc/config/initfini-array.h
new file mode 100644
index 000000000..f7ae836e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/initfini-array.h
@@ -0,0 +1,40 @@
+/* Definitions for ELF systems with .init_array/.fini_array section
+   support.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_INITFINI_ARRAY_SUPPORT
+
+#define USE_INITFINI_ARRAY
+
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+
+#undef INIT_ARRAY_SECTION_ASM_OP
+#define INIT_ARRAY_SECTION_ASM_OP
+
+#undef FINI_ARRAY_SECTION_ASM_OP
+#define FINI_ARRAY_SECTION_ASM_OP
+
+/* Use .init_array/.fini_array section for constructors and destructors. */
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR default_elf_init_array_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR default_elf_fini_array_asm_out_destructor
+
+#endif
diff --git a/gcc-4.9/gcc/config/iq2000/abi b/gcc-4.9/gcc/config/iq2000/abi
new file mode 100644
index 000000000..e80d280ac
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/abi
@@ -0,0 +1,239 @@
+			IQ2000 ABI
+			=========
+
+Sizes and alignments
+--------------------
+
+	Type		Size (bytes)	Alignment (bytes)
+
+	char		1		1
+	short		2		2
+	int		4		4
+	unsigned	4		4
+	long		4		4 
+	long long	8		8
+	float		4		4
+	double		8		8
+	pointers	4		4 
+
+* alignment within aggregates (structs and unions) is as above, with
+  padding added if needed
+* aggregates have alignment equal to that of their most aligned
+  member
+* aggregates have sizes which are a multiple of their alignment
+
+
+Floating point
+--------------
+
+All emulated using IEEE floating point conventions.
+
+Registers
+----------------
+
+%0		always zero
+%1		call clobbered
+%2		return value
+%3		return value
+%4		argument register 1
+%5		argument register 2
+%6		argument register 3
+%7		argument register 4
+%8		argument register 5
+%9		argument register 6
+%10		argument register 7
+%11		argument register 8
+%12		call clobbered
+%13		call clobbered
+%14		call clobbered
+%15		call clobbered
+%16		call saved
+%17		call saved
+%18		call saved
+%19		call saved
+%20		call saved
+%21		call saved
+%22		call saved
+%23		call saved
+%24		call clobbered
+%25		call clobbered
+%26		reserved
+%27		frame ptr
+%28		global ptr
+%29		stack ptr
+%30		reserved
+%31 		return address
+
+Stack alignment		8 bytes
+
+Structures passed	<= 32 bits as values, else as pointers
+
+The IQ2000 Stack
+---------------
+
+Space is allocated as needed in the stack frame for the following at compile
+time:
+
+* Outgoing parameters beyond the eighth
+
+* All automatic arrays, automatic data aggregates, automatic
+  scalars which must be addressable, and automatic scalars for
+  which there is no room in registers 
+
+* Compiler-generated temporary values (typically when there are
+  too many for the compiler to keep them all in registers) 
+
+Space can be allocated dynamically (at runtime) in the stack frame for the
+following:
+
+* Memory allocated using the alloca() function of the C library
+
+Addressable automatic variables on the stack are addressed with positive
+offsets relative to %27; dynamically allocated space is addressed with positive
+offsets from the pointer returned by alloca().
+
+Stack Frame
+-----------
+
+        +-----------------------+
+	|    Caller memory args |
+        +-----------------------+ <-sp
+ 	|    Return address	|
+	+-----------------------+
+	|    Previous FP	|
+	+-----------------------+
+	|    Saved Registers	|
+	+-----------------------+
+	|        ...		|
+	+-----------------------+
+	|    Local Variables	|
+	+-----------------------+ <-fp
+	|    Alloca		|
+	+-----------------------+
+	|        ...		|
+	+-----------------------+
+	|   Parameter Word 2	|
+	+-----------------------+
+	|   Parameter Word 1	|
+	+-----------------------+ <-sp
+
+
+Parameter Assignment to Registers
+---------------------------------
+
+Consider the parameters in a function call as ordered from left (first
+parameter) to right.  GR contains the number of the next available
+general-purpose register.  STARG is the address of the next available stack
+parameter word.
+
+INITIALIZE:
+	Set GR=r4 and STARG to point to parameter word 1.
+
+SCAN:
+	If there are no more parameters, terminate.
+	Otherwise, select one of the following depending on the type
+	of the next parameter:
+
+    SIMPLE ARG:
+
+	A SIMPLE ARG is one of the following:
+
+	* One of the simple integer types which will fit into a
+	  general-purpose register,
+	* A pointer to an object of any type,
+	* A struct or union small enough to fit in a register (<= 32 bits)
+	* A larger struct or union, which shall be treated as a
+	  pointer to the object or to a copy of the object.
+	  (See below for when copies are made.)
+
+	If GR > r11, go to STACK.  Otherwise, load the parameter value into
+	general-purpose register GR and advance GR to the next general-purpose
+	register.  Values shorter than the register size are sign-extended or
+	zero-extended depending on whether they are signed or unsigned.  Then
+	go to SCAN.
+
+    DOUBLE or LONG LONG
+
+	If GR > r10, go to STACK.  Otherwise, if GR is odd, advance GR to the
+	next register.  Load the 64-bit long long or double value into register
+	pair GR and GR+1.  Advance GR to GR+2 and go to SCAN.
+
+    STACK:
+
+	Parameters not otherwise handled above are passed in the parameter
+	words of the caller's stack frame.  SIMPLE ARGs, as defined above, are
+	considered to have size and alignment equal to the size of a
+	general-purpose register, with simple argument types shorter than this
+	sign- or zero-extended to this width.  Round STARG up to a multiple of
+	the alignment requirement of the parameter and copy the argument
+	byte-for-byte into STARG, STARG+1, ...  STARG+size-1.  Set STARG to
+	STARG+size and go to SCAN.
+
+
+Structure passing
+-----------------
+
+As noted above, code which passes structures and unions by value is implemented
+specially.  (In this section, "struct" will refer to structs and unions
+inclusively.)  Structs small enough to fit in a register are passed by value in
+a single register or in a stack frame slot the size of a register.  Structs
+containing a single double or long long component are passed by value in two
+registers or in a stack frame slot the size of two registers.  Other structs
+are handled by passing the address of the structure.  In this case, a copy of
+the structure will be made if necessary in order to preserve the pass-by-value
+semantics.
+
+Copies of large structs are made under the following rules:
+
+			ANSI mode			K&R Mode
+			---------			--------
+Normal param	 	Callee copies if needed		Caller copies
+Varargs (...) param	Caller copies			Caller copies
+
+In the case of normal (non-varargs) large-struct parameters in ANSI mode, the
+callee is responsible for producing the same effect as if a copy of the
+structure were passed, preserving the pass-by-value semantics.  This may be
+accomplished by having the callee make a copy, but in some cases the callee may
+be able to determine that a copy is not necessary in order to produce the same
+results.  In such cases, the callee may choose to avoid making a copy of the
+parameter.
+
+
+Varargs handling
+----------------
+
+No special changes are needed for handling varargs parameters other than the
+caller knowing that a copy is needed on struct parameters larger than a
+register (see above).
+
+The varargs macros set up a register save area for the general-purpose
+registers to be saved.  Because the save area lies between the caller and
+callee stack frames, the saved register parameters are contiguous with
+parameters passed on the stack.  A pointer advances from the register save area
+into the caller's stack frame.
+
+
+Function return values
+----------------------
+
+	Type		Register
+	----		--------
+	int		r2
+	short		r2
+	long		r2
+	long long	r2-r3
+	float		r2
+	double		r2-r3
+	struct/union	see below
+
+Structs/unions which will fit into two general-purpose registers are returned
+in r2, or in r2-r3 if necessary.  Larger structs/unions are handled by the
+caller passing as a "hidden" first argument a pointer to space allocated to
+receive the return value.
+
+
+Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc-4.9/gcc/config/iq2000/constraints.md b/gcc-4.9/gcc/config/iq2000/constraints.md
new file mode 100644
index 000000000..49767d3b9
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/constraints.md
@@ -0,0 +1,79 @@
+;; Constraints for Vitesse IQ2000 processors
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "b" "ALL_REGS"
+  "@internal")
+
+(define_register_constraint "d" "GR_REGS"
+  "@internal")
+
+(define_register_constraint "y" "GR_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "A 16-bit signed integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "J"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "A 16-bit unsigned integer"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "L"
+  "A 32-bit constant whose bottom 16 bits are zero."
+  (and (match_code "const_int")
+      (ior (match_test "(ival | 0x7fff0000) == 0x7fff0000")
+	   (match_test "(ival | 0x7fff0000) + 0x10000 == 0"))))
+
+(define_constraint "M"
+  "Any constant not matched by 'I', 'K', or 'L'."
+  (and (match_code "const_int")
+       (match_test "!insn_const_int_ok_for_constraint (ival, CONSTRAINT_I)")
+       (match_test "!insn_const_int_ok_for_constraint (ival, CONSTRAINT_K)")
+       (match_test "!insn_const_int_ok_for_constraint (ival, CONSTRAINT_L)")))
+
+(define_constraint "N"
+  "Any constant whose lower or upper 16 bits are 0xffff."
+  (and (match_code "const_int")
+       (ior (match_test "(ival & 0xffff) == 0xffff")
+	    (match_test "(ival & 0xffff0000) == 0xffff0000"))))
+
+(define_constraint "O"
+  "A 5-bit signed integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -31, 31)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Extra constraints.
+(define_constraint "R"
+  "A memory reference which takes one word for the instruction."
+  (match_test "simple_memory_operand (op, mode)"))
diff --git a/gcc-4.9/gcc/config/iq2000/iq2000-opts.h b/gcc-4.9/gcc/config/iq2000/iq2000-opts.h
new file mode 100644
index 000000000..e648e8b2f
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/iq2000-opts.h
@@ -0,0 +1,32 @@
+/* Definitions for option handling for Vitesse IQ2000 processors.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef IQ2000_OPTS_H
+#define IQ2000_OPTS_H
+
+/* Which processor to schedule for.  */
+
+enum processor_type
+{
+  PROCESSOR_DEFAULT,
+  PROCESSOR_IQ2000,
+  PROCESSOR_IQ10
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/iq2000/iq2000-protos.h b/gcc-4.9/gcc/config/iq2000/iq2000-protos.h
new file mode 100644
index 000000000..6be8da9a4
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/iq2000-protos.h
@@ -0,0 +1,48 @@
+/* Definitions of target machine for GNU compiler for iq2000.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_IQ2000_PROTOS_H
+#define GCC_IQ2000_PROTOS_H
+
+extern int              iq2000_check_split (rtx, enum machine_mode);
+extern int              iq2000_reg_mode_ok_for_base_p (rtx, enum machine_mode, int);
+extern const char *     iq2000_fill_delay_slot (const char *, enum delay_type, rtx *, rtx);
+extern const char *     iq2000_move_1word (rtx *, rtx, int);
+extern HOST_WIDE_INT    iq2000_debugger_offset (rtx, HOST_WIDE_INT);
+extern void             final_prescan_insn (rtx, rtx *, int);
+extern HOST_WIDE_INT    compute_frame_size (HOST_WIDE_INT);
+extern int              iq2000_initial_elimination_offset (int, int);
+extern void             iq2000_expand_prologue (void);
+extern void             iq2000_expand_epilogue (void);
+extern void             iq2000_expand_eh_return (rtx);
+extern int              iq2000_can_use_return_insn (void);
+extern int              iq2000_adjust_insn_length (rtx, int);
+extern char *           iq2000_output_conditional_branch (rtx, rtx *, int, int, int, int);
+
+#ifdef RTX_CODE
+extern rtx              gen_int_relational (enum rtx_code, rtx, rtx, rtx, int *);
+extern void             gen_conditional_branch (rtx *, enum machine_mode);
+#endif
+
+#ifdef TREE_CODE
+extern void             init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
+extern bool 		iq2000_function_value_regno_p (const unsigned int);
+#endif
+
+#endif /* ! GCC_IQ2000_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/iq2000/iq2000.c b/gcc-4.9/gcc/config/iq2000/iq2000.c
new file mode 100644
index 000000000..ed7aecbd4
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/iq2000.c
@@ -0,0 +1,3474 @@
+/* Subroutines used for code generation on Vitesse IQ2000 processors
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "reload.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+
+/* Enumeration for all of the relational tests, so that we can build
+   arrays indexed by the test type, and not worry about the order
+   of EQ, NE, etc.  */
+
+enum internal_test
+  {
+    ITEST_EQ,
+    ITEST_NE,
+    ITEST_GT,
+    ITEST_GE,
+    ITEST_LT,
+    ITEST_LE,
+    ITEST_GTU,
+    ITEST_GEU,
+    ITEST_LTU,
+    ITEST_LEU,
+    ITEST_MAX
+  };
+
+struct constant;
+
+
+/* Structure to be filled in by compute_frame_size with register
+   save masks, and offsets for the current function.  */
+
+struct iq2000_frame_info
+{
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  long extra_size;		/* # bytes of extra gunk.  */
+  int  gp_reg_size;		/* # bytes needed to store gp regs.  */
+  int  fp_reg_size;		/* # bytes needed to store fp regs.  */
+  long mask;			/* Mask of saved gp registers.  */
+  long gp_save_offset;		/* Offset from vfp to store gp registers.  */
+  long fp_save_offset;		/* Offset from vfp to store fp registers.  */
+  long gp_sp_offset;		/* Offset from new sp to store gp registers.  */
+  long fp_sp_offset;		/* Offset from new sp to store fp registers.  */
+  int  initialized;		/* != 0 if frame size already calculated.  */
+  int  num_gp;			/* Number of gp registers saved.  */
+} iq2000_frame_info;
+
+struct GTY(()) machine_function
+{
+  /* Current frame information, calculated by compute_frame_size.  */
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  long extra_size;		/* # bytes of extra gunk.  */
+  int  gp_reg_size;		/* # bytes needed to store gp regs.  */
+  int  fp_reg_size;		/* # bytes needed to store fp regs.  */
+  long mask;			/* Mask of saved gp registers.  */
+  long gp_save_offset;		/* Offset from vfp to store gp registers.  */
+  long fp_save_offset;		/* Offset from vfp to store fp registers.  */
+  long gp_sp_offset;		/* Offset from new sp to store gp registers.  */
+  long fp_sp_offset;		/* Offset from new sp to store fp registers.  */
+  int  initialized;		/* != 0 if frame size already calculated.  */
+  int  num_gp;			/* Number of gp registers saved.  */
+};
+
+/* Global variables for machine-dependent things.  */
+
+/* List of all IQ2000 punctuation characters used by iq2000_print_operand.  */
+static char iq2000_print_operand_punct[256];
+
+/* Which instruction set architecture to use.  */
+int iq2000_isa;
+
+/* Local variables.  */
+
+/* The next branch instruction is a branch likely, not branch normal.  */
+static int iq2000_branch_likely;
+
+/* Count of delay slots and how many are filled.  */
+static int dslots_load_total;
+static int dslots_load_filled;
+static int dslots_jump_total;
+
+/* # of nops needed by previous insn.  */
+static int dslots_number_nops;
+
+/* Number of 1/2/3 word references to data items (i.e., not jal's).  */
+static int num_refs[3];
+
+/* Registers to check for load delay.  */
+static rtx iq2000_load_reg;
+static rtx iq2000_load_reg2;
+static rtx iq2000_load_reg3;
+static rtx iq2000_load_reg4;
+
+/* Mode used for saving/restoring general purpose registers.  */
+static enum machine_mode gpr_mode;
+
+
+/* Initialize the GCC target structure.  */
+static struct machine_function* iq2000_init_machine_status (void);
+static void iq2000_option_override    (void);
+static section *iq2000_select_rtx_section (enum machine_mode, rtx,
+					   unsigned HOST_WIDE_INT);
+static void iq2000_init_builtins      (void);
+static rtx  iq2000_expand_builtin     (tree, rtx, rtx, enum machine_mode, int);
+static bool iq2000_return_in_memory   (const_tree, const_tree);
+static void iq2000_setup_incoming_varargs (cumulative_args_t,
+					   enum machine_mode, tree, int *,
+					   int);
+static bool iq2000_rtx_costs          (rtx, int, int, int, int *, bool);
+static int  iq2000_address_cost       (rtx, enum machine_mode, addr_space_t,
+				       bool);
+static section *iq2000_select_section (tree, int, unsigned HOST_WIDE_INT);
+static rtx  iq2000_legitimize_address (rtx, rtx, enum machine_mode);
+static bool iq2000_pass_by_reference  (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static int  iq2000_arg_partial_bytes  (cumulative_args_t, enum machine_mode,
+				       tree, bool);
+static rtx iq2000_function_arg	      (cumulative_args_t,
+				       enum machine_mode, const_tree, bool);
+static void iq2000_function_arg_advance (cumulative_args_t,
+					 enum machine_mode, const_tree, bool);
+static unsigned int iq2000_function_arg_boundary (enum machine_mode,
+						  const_tree);
+static void iq2000_va_start	      (tree, rtx);
+static bool iq2000_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool iq2000_can_eliminate      (const int, const int);
+static void iq2000_asm_trampoline_template (FILE *);
+static void iq2000_trampoline_init    (rtx, tree, rtx);
+static rtx iq2000_function_value      (const_tree, const_tree, bool);
+static rtx iq2000_libcall_value       (enum machine_mode, const_rtx);
+static void iq2000_print_operand      (FILE *, rtx, int);
+static void iq2000_print_operand_address (FILE *, rtx);
+static bool iq2000_print_operand_punct_valid_p (unsigned char code);
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS 		iq2000_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN 		iq2000_expand_builtin
+#undef  TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION	iq2000_select_rtx_section
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		iq2000_option_override
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS		iq2000_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST		iq2000_address_cost
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION	iq2000_select_section
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS	iq2000_legitimize_address
+
+/* The assembler supports switchable .bss sections, but
+   iq2000_select_section doesn't yet make use of them.  */
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS false
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND		iq2000_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS	iq2000_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P iq2000_print_operand_punct_valid_p
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE	default_promote_function_mode_always_promote
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE 		iq2000_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE		iq2000_libcall_value
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		iq2000_return_in_memory
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE	iq2000_pass_by_reference
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES		hook_callee_copies_named
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES	iq2000_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		iq2000_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	iq2000_function_arg_advance
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY	iq2000_function_arg_boundary
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS	iq2000_setup_incoming_varargs
+#undef  TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING	hook_bool_CUMULATIVE_ARGS_true
+
+#undef	TARGET_EXPAND_BUILTIN_VA_START
+#define	TARGET_EXPAND_BUILTIN_VA_START	iq2000_va_start
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	iq2000_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE            iq2000_can_eliminate
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	iq2000_asm_trampoline_template
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		iq2000_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Return nonzero if we split the address into high and low parts.  */
+
+int
+iq2000_check_split (rtx address, enum machine_mode mode)
+{
+  /* This is the same check used in simple_memory_operand.
+     We use it here because LO_SUM is not offsettable.  */
+  if (GET_MODE_SIZE (mode) > (unsigned) UNITS_PER_WORD)
+    return 0;
+
+  if ((GET_CODE (address) == SYMBOL_REF)
+      || (GET_CODE (address) == CONST
+	  && GET_CODE (XEXP (XEXP (address, 0), 0)) == SYMBOL_REF)
+      || GET_CODE (address) == LABEL_REF)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if REG is valid for MODE.  */
+
+int
+iq2000_reg_mode_ok_for_base_p (rtx reg,
+			       enum machine_mode mode ATTRIBUTE_UNUSED,
+			       int strict)
+{
+  return (strict
+	  ? REGNO_MODE_OK_FOR_BASE_P (REGNO (reg), mode)
+	  : GP_REG_OR_PSEUDO_NONSTRICT_P (REGNO (reg), mode));
+}
+
+/* Return a nonzero value if XINSN is a legitimate address for a
+   memory operand of the indicated MODE.  STRICT is nonzero if this
+   function is called during reload.  */
+
+bool
+iq2000_legitimate_address_p (enum machine_mode mode, rtx xinsn, bool strict)
+{
+  if (TARGET_DEBUG_A_MODE)
+    {
+      GO_PRINTF2 ("\n========== legitimate_address_p, %sstrict\n",
+		  strict ? "" : "not ");
+      GO_DEBUG_RTX (xinsn);
+    }
+
+  /* Check for constant before stripping off SUBREG, so that we don't
+     accept (subreg (const_int)) which will fail to reload.  */
+  if (CONSTANT_ADDRESS_P (xinsn)
+      && ! (iq2000_check_split (xinsn, mode))
+      && ! (GET_CODE (xinsn) == CONST_INT && ! SMALL_INT (xinsn)))
+    return 1;
+
+  while (GET_CODE (xinsn) == SUBREG)
+    xinsn = SUBREG_REG (xinsn);
+
+  if (GET_CODE (xinsn) == REG
+      && iq2000_reg_mode_ok_for_base_p (xinsn, mode, strict))
+    return 1;
+
+  if (GET_CODE (xinsn) == LO_SUM)
+    {
+      rtx xlow0 = XEXP (xinsn, 0);
+      rtx xlow1 = XEXP (xinsn, 1);
+
+      while (GET_CODE (xlow0) == SUBREG)
+	xlow0 = SUBREG_REG (xlow0);
+      if (GET_CODE (xlow0) == REG
+	  && iq2000_reg_mode_ok_for_base_p (xlow0, mode, strict)
+	  && iq2000_check_split (xlow1, mode))
+	return 1;
+    }
+
+  if (GET_CODE (xinsn) == PLUS)
+    {
+      rtx xplus0 = XEXP (xinsn, 0);
+      rtx xplus1 = XEXP (xinsn, 1);
+      enum rtx_code code0;
+      enum rtx_code code1;
+
+      while (GET_CODE (xplus0) == SUBREG)
+	xplus0 = SUBREG_REG (xplus0);
+      code0 = GET_CODE (xplus0);
+
+      while (GET_CODE (xplus1) == SUBREG)
+	xplus1 = SUBREG_REG (xplus1);
+      code1 = GET_CODE (xplus1);
+
+      if (code0 == REG
+	  && iq2000_reg_mode_ok_for_base_p (xplus0, mode, strict))
+	{
+	  if (code1 == CONST_INT && SMALL_INT (xplus1)
+	      && SMALL_INT_UNSIGNED (xplus1) /* No negative offsets */)
+	    return 1;
+	}
+    }
+
+  if (TARGET_DEBUG_A_MODE)
+    GO_PRINTF ("Not a enum machine_mode mode, legitimate address\n");
+
+  /* The address was not legitimate.  */
+  return 0;
+}
+
+/* Returns an operand string for the given instruction's delay slot,
+   after updating filled delay slot statistics.
+
+   We assume that operands[0] is the target register that is set.
+
+   In order to check the next insn, most of this functionality is moved
+   to FINAL_PRESCAN_INSN, and we just set the global variables that
+   it needs.  */
+
+const char *
+iq2000_fill_delay_slot (const char *ret, enum delay_type type, rtx operands[],
+			rtx cur_insn)
+{
+  rtx set_reg;
+  enum machine_mode mode;
+  rtx next_insn = cur_insn ? NEXT_INSN (cur_insn) : NULL_RTX;
+  int num_nops;
+
+  if (type == DELAY_LOAD || type == DELAY_FCMP)
+    num_nops = 1;
+
+  else
+    num_nops = 0;
+
+  /* Make sure that we don't put nop's after labels.  */
+  next_insn = NEXT_INSN (cur_insn);
+  while (next_insn != 0
+	 && (NOTE_P (next_insn) || LABEL_P (next_insn)))
+    next_insn = NEXT_INSN (next_insn);
+
+  dslots_load_total += num_nops;
+  if (TARGET_DEBUG_C_MODE
+      || type == DELAY_NONE
+      || operands == 0
+      || cur_insn == 0
+      || next_insn == 0
+      || LABEL_P (next_insn)
+      || (set_reg = operands[0]) == 0)
+    {
+      dslots_number_nops = 0;
+      iq2000_load_reg  = 0;
+      iq2000_load_reg2 = 0;
+      iq2000_load_reg3 = 0;
+      iq2000_load_reg4 = 0;
+
+      return ret;
+    }
+
+  set_reg = operands[0];
+  if (set_reg == 0)
+    return ret;
+
+  while (GET_CODE (set_reg) == SUBREG)
+    set_reg = SUBREG_REG (set_reg);
+
+  mode = GET_MODE (set_reg);
+  dslots_number_nops = num_nops;
+  iq2000_load_reg = set_reg;
+  if (GET_MODE_SIZE (mode)
+      > (unsigned) (UNITS_PER_WORD))
+    iq2000_load_reg2 = gen_rtx_REG (SImode, REGNO (set_reg) + 1);
+  else
+    iq2000_load_reg2 = 0;
+
+  return ret;
+}
+
+/* Determine whether a memory reference takes one (based off of the GP
+   pointer), two (normal), or three (label + reg) instructions, and bump the
+   appropriate counter for -mstats.  */
+
+static void
+iq2000_count_memory_refs (rtx op, int num)
+{
+  int additional = 0;
+  int n_words = 0;
+  rtx addr, plus0, plus1;
+  enum rtx_code code0, code1;
+  int looping;
+
+  if (TARGET_DEBUG_B_MODE)
+    {
+      fprintf (stderr, "\n========== iq2000_count_memory_refs:\n");
+      debug_rtx (op);
+    }
+
+  /* Skip MEM if passed, otherwise handle movsi of address.  */
+  addr = (GET_CODE (op) != MEM) ? op : XEXP (op, 0);
+
+  /* Loop, going through the address RTL.  */
+  do
+    {
+      looping = FALSE;
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	case CONST_INT:
+	case LO_SUM:
+	  break;
+
+	case PLUS:
+	  plus0 = XEXP (addr, 0);
+	  plus1 = XEXP (addr, 1);
+	  code0 = GET_CODE (plus0);
+	  code1 = GET_CODE (plus1);
+
+	  if (code0 == REG)
+	    {
+	      additional++;
+	      addr = plus1;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code0 == CONST_INT)
+	    {
+	      addr = plus1;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code1 == REG)
+	    {
+	      additional++;
+	      addr = plus0;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code1 == CONST_INT)
+	    {
+	      addr = plus0;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code0 == SYMBOL_REF || code0 == LABEL_REF || code0 == CONST)
+	    {
+	      addr = plus0;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code1 == SYMBOL_REF || code1 == LABEL_REF || code1 == CONST)
+	    {
+	      addr = plus1;
+	      looping = 1;
+	      continue;
+	    }
+
+	  break;
+
+	case LABEL_REF:
+	  n_words = 2;		/* Always 2 words.  */
+	  break;
+
+	case CONST:
+	  addr = XEXP (addr, 0);
+	  looping = 1;
+	  continue;
+
+	case SYMBOL_REF:
+	  n_words = SYMBOL_REF_FLAG (addr) ? 1 : 2;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+  while (looping);
+
+  if (n_words == 0)
+    return;
+
+  n_words += additional;
+  if (n_words > 3)
+    n_words = 3;
+
+  num_refs[n_words-1] += num;
+}
+
+/* Abort after printing out a specific insn.  */
+
+static void
+abort_with_insn (rtx insn, const char * reason)
+{
+  error (reason);
+  debug_rtx (insn);
+  fancy_abort (__FILE__, __LINE__, __FUNCTION__);
+}
+
+/* Return the appropriate instructions to move one operand to another.  */
+
+const char *
+iq2000_move_1word (rtx operands[], rtx insn, int unsignedp)
+{
+  const char *ret = 0;
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  enum rtx_code code0 = GET_CODE (op0);
+  enum rtx_code code1 = GET_CODE (op1);
+  enum machine_mode mode = GET_MODE (op0);
+  int subreg_offset0 = 0;
+  int subreg_offset1 = 0;
+  enum delay_type delay = DELAY_NONE;
+
+  while (code0 == SUBREG)
+    {
+      subreg_offset0 += subreg_regno_offset (REGNO (SUBREG_REG (op0)),
+					     GET_MODE (SUBREG_REG (op0)),
+					     SUBREG_BYTE (op0),
+					     GET_MODE (op0));
+      op0 = SUBREG_REG (op0);
+      code0 = GET_CODE (op0);
+    }
+
+  while (code1 == SUBREG)
+    {
+      subreg_offset1 += subreg_regno_offset (REGNO (SUBREG_REG (op1)),
+					     GET_MODE (SUBREG_REG (op1)),
+					     SUBREG_BYTE (op1),
+					     GET_MODE (op1));
+      op1 = SUBREG_REG (op1);
+      code1 = GET_CODE (op1);
+    }
+
+  /* For our purposes, a condition code mode is the same as SImode.  */
+  if (mode == CCmode)
+    mode = SImode;
+
+  if (code0 == REG)
+    {
+      int regno0 = REGNO (op0) + subreg_offset0;
+
+      if (code1 == REG)
+	{
+	  int regno1 = REGNO (op1) + subreg_offset1;
+
+	  /* Do not do anything for assigning a register to itself */
+	  if (regno0 == regno1)
+	    ret = "";
+
+	  else if (GP_REG_P (regno0))
+	    {
+	      if (GP_REG_P (regno1))
+		ret = "or\t%0,%%0,%1";
+	    }
+
+	}
+
+      else if (code1 == MEM)
+	{
+	  delay = DELAY_LOAD;
+
+	  if (TARGET_STATS)
+	    iq2000_count_memory_refs (op1, 1);
+
+	  if (GP_REG_P (regno0))
+	    {
+	      /* For loads, use the mode of the memory item, instead of the
+		 target, so zero/sign extend can use this code as well.  */
+	      switch (GET_MODE (op1))
+		{
+		default:
+		  break;
+		case SFmode:
+		  ret = "lw\t%0,%1";
+		  break;
+		case SImode:
+		case CCmode:
+		  ret = "lw\t%0,%1";
+		  break;
+		case HImode:
+		  ret = (unsignedp) ? "lhu\t%0,%1" : "lh\t%0,%1";
+		  break;
+		case QImode:
+		  ret = (unsignedp) ? "lbu\t%0,%1" : "lb\t%0,%1";
+		  break;
+		}
+	    }
+	}
+
+      else if (code1 == CONST_INT
+	       || (code1 == CONST_DOUBLE
+		   && GET_MODE (op1) == VOIDmode))
+	{
+	  if (code1 == CONST_DOUBLE)
+	    {
+	      /* This can happen when storing constants into long long
+                 bitfields.  Just store the least significant word of
+                 the value.  */
+	      operands[1] = op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
+	    }
+
+	  if (INTVAL (op1) == 0)
+	    {
+	      if (GP_REG_P (regno0))
+		ret = "or\t%0,%%0,%z1";
+	    }
+	 else if (GP_REG_P (regno0))
+	    {
+	      if (SMALL_INT_UNSIGNED (op1))
+		ret = "ori\t%0,%%0,%x1\t\t\t# %1";
+	      else if (SMALL_INT (op1))
+		ret = "addiu\t%0,%%0,%1\t\t\t# %1";
+	      else
+		ret = "lui\t%0,%X1\t\t\t# %1\n\tori\t%0,%0,%x1";
+	    }
+	}
+
+      else if (code1 == CONST_DOUBLE && mode == SFmode)
+	{
+	  if (op1 == CONST0_RTX (SFmode))
+	    {
+	      if (GP_REG_P (regno0))
+		ret = "or\t%0,%%0,%.";
+	    }
+
+	  else
+	    {
+	      delay = DELAY_LOAD;
+	      ret = "li.s\t%0,%1";
+	    }
+	}
+
+      else if (code1 == LABEL_REF)
+	{
+	  if (TARGET_STATS)
+	    iq2000_count_memory_refs (op1, 1);
+
+	  ret = "la\t%0,%a1";
+	}
+
+      else if (code1 == SYMBOL_REF || code1 == CONST)
+	{
+	  if (TARGET_STATS)
+	    iq2000_count_memory_refs (op1, 1);
+
+	  ret = "la\t%0,%a1";
+	}
+
+      else if (code1 == PLUS)
+	{
+	  rtx add_op0 = XEXP (op1, 0);
+	  rtx add_op1 = XEXP (op1, 1);
+
+	  if (GET_CODE (XEXP (op1, 1)) == REG
+	      && GET_CODE (XEXP (op1, 0)) == CONST_INT)
+	    add_op0 = XEXP (op1, 1), add_op1 = XEXP (op1, 0);
+
+	  operands[2] = add_op0;
+	  operands[3] = add_op1;
+	  ret = "add%:\t%0,%2,%3";
+	}
+
+      else if (code1 == HIGH)
+	{
+	  operands[1] = XEXP (op1, 0);
+	  ret = "lui\t%0,%%hi(%1)";
+	}
+    }
+
+  else if (code0 == MEM)
+    {
+      if (TARGET_STATS)
+	iq2000_count_memory_refs (op0, 1);
+
+      if (code1 == REG)
+	{
+	  int regno1 = REGNO (op1) + subreg_offset1;
+
+	  if (GP_REG_P (regno1))
+	    {
+	      switch (mode)
+		{
+		case SFmode: ret = "sw\t%1,%0"; break;
+		case SImode: ret = "sw\t%1,%0"; break;
+		case HImode: ret = "sh\t%1,%0"; break;
+		case QImode: ret = "sb\t%1,%0"; break;
+		default: break;
+		}
+	    }
+	}
+
+      else if (code1 == CONST_INT && INTVAL (op1) == 0)
+	{
+	  switch (mode)
+	    {
+	    case SFmode: ret = "sw\t%z1,%0"; break;
+	    case SImode: ret = "sw\t%z1,%0"; break;
+	    case HImode: ret = "sh\t%z1,%0"; break;
+	    case QImode: ret = "sb\t%z1,%0"; break;
+	    default: break;
+	    }
+	}
+
+      else if (code1 == CONST_DOUBLE && op1 == CONST0_RTX (mode))
+	{
+	  switch (mode)
+	    {
+	    case SFmode: ret = "sw\t%.,%0"; break;
+	    case SImode: ret = "sw\t%.,%0"; break;
+	    case HImode: ret = "sh\t%.,%0"; break;
+	    case QImode: ret = "sb\t%.,%0"; break;
+	    default: break;
+	    }
+	}
+    }
+
+  if (ret == 0)
+    {
+      abort_with_insn (insn, "Bad move");
+      return 0;
+    }
+
+  if (delay != DELAY_NONE)
+    return iq2000_fill_delay_slot (ret, delay, operands, insn);
+
+  return ret;
+}
+
+/* Provide the costs of an addressing mode that contains ADDR.  */
+
+static int
+iq2000_address_cost (rtx addr, enum machine_mode mode, addr_space_t as,
+		     bool speed)
+{
+  switch (GET_CODE (addr))
+    {
+    case LO_SUM:
+      return 1;
+
+    case LABEL_REF:
+      return 2;
+
+    case CONST:
+      {
+	rtx offset = const0_rtx;
+
+	addr = eliminate_constant_term (XEXP (addr, 0), & offset);
+	if (GET_CODE (addr) == LABEL_REF)
+	  return 2;
+
+	if (GET_CODE (addr) != SYMBOL_REF)
+	  return 4;
+
+	if (! SMALL_INT (offset))
+	  return 2;
+      }
+
+      /* Fall through.  */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_FLAG (addr) ? 1 : 2;
+
+    case PLUS:
+      {
+	rtx plus0 = XEXP (addr, 0);
+	rtx plus1 = XEXP (addr, 1);
+
+	if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG)
+	  plus0 = XEXP (addr, 1), plus1 = XEXP (addr, 0);
+
+	if (GET_CODE (plus0) != REG)
+	  break;
+
+	switch (GET_CODE (plus1))
+	  {
+	  case CONST_INT:
+	    return SMALL_INT (plus1) ? 1 : 2;
+
+	  case CONST:
+	  case SYMBOL_REF:
+	  case LABEL_REF:
+	  case HIGH:
+	  case LO_SUM:
+	    return iq2000_address_cost (plus1, mode, as, speed) + 1;
+
+	  default:
+	    break;
+	  }
+      }
+
+    default:
+      break;
+    }
+
+  return 4;
+}
+
+/* Make normal rtx_code into something we can index from an array.  */
+
+static enum internal_test
+map_test_to_internal_test (enum rtx_code test_code)
+{
+  enum internal_test test = ITEST_MAX;
+
+  switch (test_code)
+    {
+    case EQ:  test = ITEST_EQ;  break;
+    case NE:  test = ITEST_NE;  break;
+    case GT:  test = ITEST_GT;  break;
+    case GE:  test = ITEST_GE;  break;
+    case LT:  test = ITEST_LT;  break;
+    case LE:  test = ITEST_LE;  break;
+    case GTU: test = ITEST_GTU; break;
+    case GEU: test = ITEST_GEU; break;
+    case LTU: test = ITEST_LTU; break;
+    case LEU: test = ITEST_LEU; break;
+    default:			break;
+    }
+
+  return test;
+}
+
+/* Generate the code to do a TEST_CODE comparison on two integer values CMP0
+   and CMP1.  P_INVERT is NULL or ptr if branch needs to reverse its test.
+   The return value RESULT is:
+   (reg:SI xx)		The pseudo register the comparison is in
+   0		       	No register, generate a simple branch.  */
+
+rtx
+gen_int_relational (enum rtx_code test_code, rtx result, rtx cmp0, rtx cmp1,
+		    int *p_invert)
+{
+  struct cmp_info
+  {
+    enum rtx_code test_code;	/* Code to use in instruction (LT vs. LTU).  */
+    int const_low;		/* Low bound of constant we can accept.  */
+    int const_high;		/* High bound of constant we can accept.  */
+    int const_add;		/* Constant to add (convert LE -> LT).  */
+    int reverse_regs;		/* Reverse registers in test.  */
+    int invert_const;		/* != 0 if invert value if cmp1 is constant.  */
+    int invert_reg;		/* != 0 if invert value if cmp1 is register.  */
+    int unsignedp;		/* != 0 for unsigned comparisons.  */
+  };
+
+  static struct cmp_info info[ (int)ITEST_MAX ] =
+  {
+    { XOR,	 0,  65535,  0,	 0,  0,	 0, 0 },	/* EQ  */
+    { XOR,	 0,  65535,  0,	 0,  1,	 1, 0 },	/* NE  */
+    { LT,   -32769,  32766,  1,	 1,  1,	 0, 0 },	/* GT  */
+    { LT,   -32768,  32767,  0,	 0,  1,	 1, 0 },	/* GE  */
+    { LT,   -32768,  32767,  0,	 0,  0,	 0, 0 },	/* LT  */
+    { LT,   -32769,  32766,  1,	 1,  0,	 1, 0 },	/* LE  */
+    { LTU,  -32769,  32766,  1,	 1,  1,	 0, 1 },	/* GTU */
+    { LTU,  -32768,  32767,  0,	 0,  1,	 1, 1 },	/* GEU */
+    { LTU,  -32768,  32767,  0,	 0,  0,	 0, 1 },	/* LTU */
+    { LTU,  -32769,  32766,  1,	 1,  0,	 1, 1 },	/* LEU */
+  };
+
+  enum internal_test test;
+  enum machine_mode mode;
+  struct cmp_info *p_info;
+  int branch_p;
+  int eqne_p;
+  int invert;
+  rtx reg;
+  rtx reg2;
+
+  test = map_test_to_internal_test (test_code);
+  gcc_assert (test != ITEST_MAX);
+
+  p_info = &info[(int) test];
+  eqne_p = (p_info->test_code == XOR);
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Eliminate simple branches.  */
+  branch_p = (result == 0);
+  if (branch_p)
+    {
+      if (GET_CODE (cmp0) == REG || GET_CODE (cmp0) == SUBREG)
+	{
+	  /* Comparisons against zero are simple branches.  */
+	  if (GET_CODE (cmp1) == CONST_INT && INTVAL (cmp1) == 0)
+	    return 0;
+
+	  /* Test for beq/bne.  */
+	  if (eqne_p)
+	    return 0;
+	}
+
+      /* Allocate a pseudo to calculate the value in.  */
+      result = gen_reg_rtx (mode);
+    }
+
+  /* Make sure we can handle any constants given to us.  */
+  if (GET_CODE (cmp0) == CONST_INT)
+    cmp0 = force_reg (mode, cmp0);
+
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (cmp1);
+
+      if (value < p_info->const_low
+	  || value > p_info->const_high)
+	cmp1 = force_reg (mode, cmp1);
+    }
+
+  /* See if we need to invert the result.  */
+  invert = (GET_CODE (cmp1) == CONST_INT
+	    ? p_info->invert_const : p_info->invert_reg);
+
+  if (p_invert != (int *)0)
+    {
+      *p_invert = invert;
+      invert = 0;
+    }
+
+  /* Comparison to constants, may involve adding 1 to change a LT into LE.
+     Comparison between two registers, may involve switching operands.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      if (p_info->const_add != 0)
+	{
+	  HOST_WIDE_INT new_const = INTVAL (cmp1) + p_info->const_add;
+
+	  /* If modification of cmp1 caused overflow,
+	     we would get the wrong answer if we follow the usual path;
+	     thus, x > 0xffffffffU would turn into x > 0U.  */
+	  if ((p_info->unsignedp
+	       ? (unsigned HOST_WIDE_INT) new_const >
+	       (unsigned HOST_WIDE_INT) INTVAL (cmp1)
+	       : new_const > INTVAL (cmp1))
+	      != (p_info->const_add > 0))
+	    {
+	      /* This test is always true, but if INVERT is true then
+		 the result of the test needs to be inverted so 0 should
+		 be returned instead.  */
+	      emit_move_insn (result, invert ? const0_rtx : const_true_rtx);
+	      return result;
+	    }
+	  else
+	    cmp1 = GEN_INT (new_const);
+	}
+    }
+
+  else if (p_info->reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  if (test == ITEST_NE && GET_CODE (cmp1) == CONST_INT && INTVAL (cmp1) == 0)
+    reg = cmp0;
+  else
+    {
+      reg = (invert || eqne_p) ? gen_reg_rtx (mode) : result;
+      convert_move (reg, gen_rtx_fmt_ee (p_info->test_code, mode, cmp0, cmp1), 0);
+    }
+
+  if (test == ITEST_NE)
+    {
+      convert_move (result, gen_rtx_GTU (mode, reg, const0_rtx), 0);
+      if (p_invert != NULL)
+	*p_invert = 0;
+      invert = 0;
+    }
+
+  else if (test == ITEST_EQ)
+    {
+      reg2 = invert ? gen_reg_rtx (mode) : result;
+      convert_move (reg2, gen_rtx_LTU (mode, reg, const1_rtx), 0);
+      reg = reg2;
+    }
+
+  if (invert)
+    {
+      rtx one;
+
+      one = const1_rtx;
+      convert_move (result, gen_rtx_XOR (mode, reg, one), 0);
+    }
+
+  return result;
+}
+
+/* Emit the common code for doing conditional branches.
+   operand[0] is the label to jump to.
+   The comparison operands are saved away by cmp{si,di,sf,df}.  */
+
+void
+gen_conditional_branch (rtx operands[], enum machine_mode mode)
+{
+  enum rtx_code test_code = GET_CODE (operands[0]);
+  rtx cmp0 = operands[1];
+  rtx cmp1 = operands[2];
+  rtx reg;
+  int invert;
+  rtx label1, label2;
+
+  invert = 0;
+  reg = gen_int_relational (test_code, NULL_RTX, cmp0, cmp1, &invert);
+
+  if (reg)
+    {
+      cmp0 = reg;
+      cmp1 = const0_rtx;
+      test_code = NE;
+    }
+  else if (GET_CODE (cmp1) == CONST_INT && INTVAL (cmp1) != 0)
+    /* We don't want to build a comparison against a nonzero
+       constant.  */
+    cmp1 = force_reg (mode, cmp1);
+
+  /* Generate the branch.  */
+  label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  label2 = pc_rtx;
+
+  if (invert)
+    {
+      label2 = label1;
+      label1 = pc_rtx;
+    }
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     gen_rtx_fmt_ee (test_code,
+								     VOIDmode,
+								     cmp0, cmp1),
+						     label1, label2)));
+}
+
+/* Initialize CUM for a function FNTYPE.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cum;
+  tree param;
+  tree next_param;
+
+  if (TARGET_DEBUG_D_MODE)
+    {
+      fprintf (stderr,
+	       "\ninit_cumulative_args, fntype = 0x%.8lx", (long) fntype);
+
+      if (!fntype)
+	fputc ('\n', stderr);
+
+      else
+	{
+	  tree ret_type = TREE_TYPE (fntype);
+
+	  fprintf (stderr, ", fntype code = %s, ret code = %s\n",
+		   get_tree_code_name (TREE_CODE (fntype)),
+		   get_tree_code_name (TREE_CODE (ret_type)));
+	}
+    }
+
+  *cum = zero_cum;
+
+  /* Determine if this function has variable arguments.  This is
+     indicated by the last argument being 'void_type_mode' if there
+     are no variable arguments.  The standard IQ2000 calling sequence
+     passes all arguments in the general purpose registers in this case.  */
+
+  for (param = fntype ? TYPE_ARG_TYPES (fntype) : 0;
+       param != 0; param = next_param)
+    {
+      next_param = TREE_CHAIN (param);
+      if (next_param == 0 && TREE_VALUE (param) != void_type_node)
+	cum->gp_reg_found = 1;
+    }
+}
+
+/* Advance the argument of type TYPE and mode MODE to the next argument
+   position in CUM.  */
+
+static void
+iq2000_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			     const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (TARGET_DEBUG_D_MODE)
+    {
+      fprintf (stderr,
+	       "function_adv({gp reg found = %d, arg # = %2d, words = %2d}, %4s, ",
+	       cum->gp_reg_found, cum->arg_number, cum->arg_words,
+	       GET_MODE_NAME (mode));
+      fprintf (stderr, "%p", (const void *) type);
+      fprintf (stderr, ", %d )\n\n", named);
+    }
+
+  cum->arg_number++;
+  switch (mode)
+    {
+    case VOIDmode:
+      break;
+
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+		  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case BLKmode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((int_size_in_bytes (type) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case SFmode:
+      cum->arg_words ++;
+      if (! cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 1 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DFmode:
+      cum->arg_words += 2;
+      if (! cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 2 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += 2;
+      break;
+
+    case TImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += 4;
+      break;
+
+    case QImode:
+    case HImode:
+    case SImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words ++;
+      break;
+    }
+}
+
+/* Return an RTL expression containing the register for the given mode MODE
+   and type TYPE in CUM, or 0 if the argument is to be passed on the stack.  */
+
+static rtx
+iq2000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		     const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  rtx ret;
+  int regbase = -1;
+  int bias = 0;
+  unsigned int *arg_words = &cum->arg_words;
+  int struct_p = (type != 0
+		  && (TREE_CODE (type) == RECORD_TYPE
+		      || TREE_CODE (type) == UNION_TYPE
+		      || TREE_CODE (type) == QUAL_UNION_TYPE));
+
+  if (TARGET_DEBUG_D_MODE)
+    {
+      fprintf (stderr,
+	       "function_arg( {gp reg found = %d, arg # = %2d, words = %2d}, %4s, ",
+	       cum->gp_reg_found, cum->arg_number, cum->arg_words,
+	       GET_MODE_NAME (mode));
+      fprintf (stderr, "%p", (const void *) type);
+      fprintf (stderr, ", %d ) = ", named);
+    }
+
+
+  cum->last_arg_fp = 0;
+  switch (mode)
+    {
+    case SFmode:
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case DFmode:
+      cum->arg_words += cum->arg_words & 1;
+
+      regbase = GP_ARG_FIRST;
+      break;
+
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+		  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+      /* Drops through.  */
+    case BLKmode:
+      if (type != NULL_TREE && TYPE_ALIGN (type) > (unsigned) BITS_PER_WORD)
+	cum->arg_words += (cum->arg_words & 1);
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case VOIDmode:
+    case QImode:
+    case HImode:
+    case SImode:
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case DImode:
+      cum->arg_words += (cum->arg_words & 1);
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case TImode:
+      cum->arg_words += (cum->arg_words & 3);
+      regbase = GP_ARG_FIRST;
+      break;
+    }
+
+  if (*arg_words >= (unsigned) MAX_ARGS_IN_REGISTERS)
+    {
+      if (TARGET_DEBUG_D_MODE)
+	fprintf (stderr, "<stack>%s\n", struct_p ? ", [struct]" : "");
+
+      ret = 0;
+    }
+  else
+    {
+      gcc_assert (regbase != -1);
+
+      if (! type || TREE_CODE (type) != RECORD_TYPE
+	  || ! named  || ! TYPE_SIZE_UNIT (type)
+	  || ! tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
+	ret = gen_rtx_REG (mode, regbase + *arg_words + bias);
+      else
+	{
+	  tree field;
+
+	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	    if (TREE_CODE (field) == FIELD_DECL
+		&& TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+		&& TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD
+		&& tree_fits_shwi_p (bit_position (field))
+		&& int_bit_position (field) % BITS_PER_WORD == 0)
+	      break;
+
+	  /* If the whole struct fits a DFmode register,
+	     we don't need the PARALLEL.  */
+	  if (! field || mode == DFmode)
+	    ret = gen_rtx_REG (mode, regbase + *arg_words + bias);
+	  else
+	    {
+	      unsigned int chunks;
+	      HOST_WIDE_INT bitpos;
+	      unsigned int regno;
+	      unsigned int i;
+
+	      /* ??? If this is a packed structure, then the last hunk won't
+		 be 64 bits.  */
+	      chunks
+		= tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
+	      if (chunks + *arg_words + bias > (unsigned) MAX_ARGS_IN_REGISTERS)
+		chunks = MAX_ARGS_IN_REGISTERS - *arg_words - bias;
+
+	      /* Assign_parms checks the mode of ENTRY_PARM, so we must
+		 use the actual mode here.  */
+	      ret = gen_rtx_PARALLEL (mode, rtvec_alloc (chunks));
+
+	      bitpos = 0;
+	      regno = regbase + *arg_words + bias;
+	      field = TYPE_FIELDS (type);
+	      for (i = 0; i < chunks; i++)
+		{
+		  rtx reg;
+
+		  for (; field; field = DECL_CHAIN (field))
+		    if (TREE_CODE (field) == FIELD_DECL
+			&& int_bit_position (field) >= bitpos)
+		      break;
+
+		  if (field
+		      && int_bit_position (field) == bitpos
+		      && TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+		      && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD)
+		    reg = gen_rtx_REG (DFmode, regno++);
+		  else
+		    reg = gen_rtx_REG (word_mode, regno);
+
+		  XVECEXP (ret, 0, i)
+		    = gen_rtx_EXPR_LIST (VOIDmode, reg,
+					 GEN_INT (bitpos / BITS_PER_UNIT));
+
+		  bitpos += 64;
+		  regno++;
+		}
+	    }
+	}
+
+      if (TARGET_DEBUG_D_MODE)
+	fprintf (stderr, "%s%s\n", reg_names[regbase + *arg_words + bias],
+		 struct_p ? ", [struct]" : "");
+    }
+
+  /* We will be called with a mode of VOIDmode after the last argument
+     has been seen.  Whatever we return will be passed to the call
+     insn.  If we need any shifts for small structures, return them in
+     a PARALLEL.  */
+  if (mode == VOIDmode)
+    {
+      if (cum->num_adjusts > 0)
+	ret = gen_rtx_PARALLEL ((enum machine_mode) cum->fp_code,
+		       gen_rtvec_v (cum->num_adjusts, cum->adjust));
+    }
+
+  return ret;
+}
+
+static unsigned int
+iq2000_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return (type != NULL_TREE
+	  ? (TYPE_ALIGN (type) <= PARM_BOUNDARY
+	     ? PARM_BOUNDARY
+	     : TYPE_ALIGN (type))
+	  : (GET_MODE_ALIGNMENT (mode) <= PARM_BOUNDARY
+	     ? PARM_BOUNDARY
+	     : GET_MODE_ALIGNMENT (mode)));
+}
+
+static int
+iq2000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+			  tree type ATTRIBUTE_UNUSED,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (mode == DImode && cum->arg_words == MAX_ARGS_IN_REGISTERS - 1)
+    {
+      if (TARGET_DEBUG_D_MODE)
+	fprintf (stderr, "iq2000_arg_partial_bytes=%d\n", UNITS_PER_WORD);
+      return UNITS_PER_WORD;
+    }
+
+  return 0;
+}
+
+/* Implement va_start.  */
+
+static void
+iq2000_va_start (tree valist, rtx nextarg)
+{
+  int int_arg_words;
+  /* Find out how many non-float named formals.  */
+  int gpr_save_area_size;
+  /* Note UNITS_PER_WORD is 4 bytes.  */
+  int_arg_words = crtl->args.info.arg_words;
+
+  if (int_arg_words < 8 )
+    /* Adjust for the prologue's economy measure.  */
+    gpr_save_area_size = (8 - int_arg_words) * UNITS_PER_WORD;
+  else
+    gpr_save_area_size = 0;
+
+  /* Everything is in the GPR save area, or in the overflow
+     area which is contiguous with it.  */
+  nextarg = plus_constant (Pmode, nextarg, - gpr_save_area_size);
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Allocate a chunk of memory for per-function machine-dependent data.  */
+
+static struct machine_function *
+iq2000_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Detect any conflicts in the switches.  */
+
+static void
+iq2000_option_override (void)
+{
+  target_flags &= ~MASK_GPOPT;
+
+  iq2000_isa = IQ2000_ISA_DEFAULT;
+
+  /* Identify the processor type.  */
+
+  iq2000_print_operand_punct['?'] = 1;
+  iq2000_print_operand_punct['#'] = 1;
+  iq2000_print_operand_punct['&'] = 1;
+  iq2000_print_operand_punct['!'] = 1;
+  iq2000_print_operand_punct['*'] = 1;
+  iq2000_print_operand_punct['@'] = 1;
+  iq2000_print_operand_punct['.'] = 1;
+  iq2000_print_operand_punct['('] = 1;
+  iq2000_print_operand_punct[')'] = 1;
+  iq2000_print_operand_punct['['] = 1;
+  iq2000_print_operand_punct[']'] = 1;
+  iq2000_print_operand_punct['<'] = 1;
+  iq2000_print_operand_punct['>'] = 1;
+  iq2000_print_operand_punct['{'] = 1;
+  iq2000_print_operand_punct['}'] = 1;
+  iq2000_print_operand_punct['^'] = 1;
+  iq2000_print_operand_punct['$'] = 1;
+  iq2000_print_operand_punct['+'] = 1;
+  iq2000_print_operand_punct['~'] = 1;
+
+  /* Save GPR registers in word_mode sized hunks.  word_mode hasn't been
+     initialized yet, so we can't use that here.  */
+  gpr_mode = SImode;
+
+  /* Function to allocate machine-dependent function status.  */
+  init_machine_status = iq2000_init_machine_status;
+}
+
+/* The arg pointer (which is eliminated) points to the virtual frame pointer,
+   while the frame pointer (which may be eliminated) points to the stack
+   pointer after the initial adjustments.  */
+
+HOST_WIDE_INT
+iq2000_debugger_offset (rtx addr, HOST_WIDE_INT offset)
+{
+  rtx offset2 = const0_rtx;
+  rtx reg = eliminate_constant_term (addr, & offset2);
+
+  if (offset == 0)
+    offset = INTVAL (offset2);
+
+  if (reg == stack_pointer_rtx || reg == frame_pointer_rtx
+      || reg == hard_frame_pointer_rtx)
+    {
+      HOST_WIDE_INT frame_size = (!cfun->machine->initialized)
+				  ? compute_frame_size (get_frame_size ())
+				  : cfun->machine->total_size;
+
+      offset = offset - frame_size;
+    }
+
+  return offset;
+}
+
+/* If defined, a C statement to be executed just prior to the output of
+   assembler code for INSN, to modify the extracted operands so they will be
+   output differently.
+
+   Here the argument OPVEC is the vector containing the operands extracted
+   from INSN, and NOPERANDS is the number of elements of the vector which
+   contain meaningful data for this insn.  The contents of this vector are
+   what will be used to convert the insn template into assembler code, so you
+   can change the assembler output by changing the contents of the vector.
+
+   We use it to check if the current insn needs a nop in front of it because
+   of load delays, and also to update the delay slot statistics.  */
+
+void
+final_prescan_insn (rtx insn, rtx opvec[] ATTRIBUTE_UNUSED,
+		    int noperands ATTRIBUTE_UNUSED)
+{
+  if (dslots_number_nops > 0)
+    {
+      rtx pattern = PATTERN (insn);
+      int length = get_attr_length (insn);
+
+      /* Do we need to emit a NOP?  */
+      if (length == 0
+	  || (iq2000_load_reg != 0 && reg_mentioned_p (iq2000_load_reg,  pattern))
+	  || (iq2000_load_reg2 != 0 && reg_mentioned_p (iq2000_load_reg2, pattern))
+	  || (iq2000_load_reg3 != 0 && reg_mentioned_p (iq2000_load_reg3, pattern))
+	  || (iq2000_load_reg4 != 0
+	      && reg_mentioned_p (iq2000_load_reg4, pattern)))
+	fputs ("\tnop\n", asm_out_file);
+
+      else
+	dslots_load_filled ++;
+
+      while (--dslots_number_nops > 0)
+	fputs ("\tnop\n", asm_out_file);
+
+      iq2000_load_reg = 0;
+      iq2000_load_reg2 = 0;
+      iq2000_load_reg3 = 0;
+      iq2000_load_reg4 = 0;
+    }
+
+  if (   (JUMP_P (insn)
+       || CALL_P (insn)
+       || (GET_CODE (PATTERN (insn)) == RETURN))
+	   && NEXT_INSN (PREV_INSN (insn)) == insn)
+    {
+      rtx nop_insn = emit_insn_after (gen_nop (), insn);
+
+      INSN_ADDRESSES_NEW (nop_insn, -1);
+    }
+  
+  if (TARGET_STATS
+      && (JUMP_P (insn) || CALL_P (insn)))
+    dslots_jump_total ++;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer where SIZE is the # of var. bytes allocated.
+
+   IQ2000 stack frames look like:
+
+             Before call		        After call
+        +-----------------------+	+-----------------------+
+   high |			|       |      			|
+   mem. |		        |	|			|
+        |  caller's temps.    	|       |  caller's temps.    	|
+	|       		|       |       	        |
+        +-----------------------+	+-----------------------+
+ 	|       		|	|		        |
+        |  arguments on stack.  |	|  arguments on stack.  |
+	|       		|	|			|
+        +-----------------------+	+-----------------------+
+ 	|  4 words to save     	|	|  4 words to save	|
+	|  arguments passed	|	|  arguments passed	|
+	|  in registers, even	|	|  in registers, even	|
+    SP->|  if not passed.       |  VFP->|  if not passed.	|
+	+-----------------------+       +-----------------------+
+					|		        |
+                                        |  fp register save     |
+					|			|
+					+-----------------------+
+					|		        |
+                                        |  gp register save     |
+                                        |       		|
+					+-----------------------+
+					|			|
+					|  local variables	|
+					|			|
+					+-----------------------+
+					|			|
+                                        |  alloca allocations   |
+        				|			|
+					+-----------------------+
+					|			|
+					|  GP save for V.4 abi	|
+					|			|
+					+-----------------------+
+					|			|
+                                        |  arguments on stack   |
+        				|		        |
+					+-----------------------+
+                                        |  4 words to save      |
+					|  arguments passed     |
+                                        |  in registers, even   |
+   low                              SP->|  if not passed.       |
+   memory        			+-----------------------+  */
+
+HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size)
+{
+  int regno;
+  HOST_WIDE_INT total_size;	/* # bytes that the entire frame takes up.  */
+  HOST_WIDE_INT var_size;	/* # bytes that variables take up.  */
+  HOST_WIDE_INT args_size;	/* # bytes that outgoing arguments take up.  */
+  HOST_WIDE_INT extra_size;	/* # extra bytes.  */
+  HOST_WIDE_INT gp_reg_rounded;	/* # bytes needed to store gp after rounding.  */
+  HOST_WIDE_INT gp_reg_size;	/* # bytes needed to store gp regs.  */
+  HOST_WIDE_INT fp_reg_size;	/* # bytes needed to store fp regs.  */
+  long mask;			/* mask of saved gp registers.  */
+
+  gp_reg_size = 0;
+  fp_reg_size = 0;
+  mask = 0;
+  extra_size = IQ2000_STACK_ALIGN ((0));
+  var_size = IQ2000_STACK_ALIGN (size);
+  args_size = IQ2000_STACK_ALIGN (crtl->outgoing_args_size);
+
+  /* If a function dynamically allocates the stack and
+     has 0 for STACK_DYNAMIC_OFFSET then allocate some stack space.  */
+  if (args_size == 0 && cfun->calls_alloca)
+    args_size = 4 * UNITS_PER_WORD;
+
+  total_size = var_size + args_size + extra_size;
+
+  /* Calculate space needed for gp registers.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno))
+	{
+	  gp_reg_size += GET_MODE_SIZE (gpr_mode);
+	  mask |= 1L << (regno - GP_REG_FIRST);
+	}
+    }
+
+  /* We need to restore these for the handler.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0; ; ++i)
+	{
+	  regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == (int) INVALID_REGNUM)
+	    break;
+	  gp_reg_size += GET_MODE_SIZE (gpr_mode);
+	  mask |= 1L << (regno - GP_REG_FIRST);
+	}
+    }
+
+  gp_reg_rounded = IQ2000_STACK_ALIGN (gp_reg_size);
+  total_size += gp_reg_rounded + IQ2000_STACK_ALIGN (fp_reg_size);
+
+  /* The gp reg is caller saved, so there is no need for leaf routines 
+     (total_size == extra_size) to save the gp reg.  */
+  if (total_size == extra_size
+      && ! profile_flag)
+    total_size = extra_size = 0;
+
+  total_size += IQ2000_STACK_ALIGN (crtl->args.pretend_args_size);
+
+  /* Save other computed information.  */
+  cfun->machine->total_size = total_size;
+  cfun->machine->var_size = var_size;
+  cfun->machine->args_size = args_size;
+  cfun->machine->extra_size = extra_size;
+  cfun->machine->gp_reg_size = gp_reg_size;
+  cfun->machine->fp_reg_size = fp_reg_size;
+  cfun->machine->mask = mask;
+  cfun->machine->initialized = reload_completed;
+  cfun->machine->num_gp = gp_reg_size / UNITS_PER_WORD;
+
+  if (mask)
+    {
+      unsigned long offset;
+
+      offset = (args_size + extra_size + var_size
+		+ gp_reg_size - GET_MODE_SIZE (gpr_mode));
+
+      cfun->machine->gp_sp_offset = offset;
+      cfun->machine->gp_save_offset = offset - total_size;
+    }
+  else
+    {
+      cfun->machine->gp_sp_offset = 0;
+      cfun->machine->gp_save_offset = 0;
+    }
+
+  cfun->machine->fp_sp_offset = 0;
+  cfun->machine->fp_save_offset = 0;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+
+/* We can always eliminate to the frame pointer.  We can eliminate to the
+   stack pointer unless a frame pointer is needed.  */
+
+bool
+iq2000_can_eliminate (const int from, const int to)
+{
+  return (from == RETURN_ADDRESS_POINTER_REGNUM
+          && (! leaf_function_p ()
+              || (to == GP_REG_FIRST + 31 && leaf_function_p ())))
+          || (from != RETURN_ADDRESS_POINTER_REGNUM
+              && (to == HARD_FRAME_POINTER_REGNUM
+                  || (to == STACK_POINTER_REGNUM
+                      && ! frame_pointer_needed)));
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer, argument pointer, or return address pointer.  TO is either
+   the stack pointer or hard frame pointer.  */
+
+int
+iq2000_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
+{
+  int offset;
+
+  compute_frame_size (get_frame_size ());				 
+  if ((from) == FRAME_POINTER_REGNUM) 
+    (offset) = 0; 
+  else if ((from) == ARG_POINTER_REGNUM) 
+    (offset) = (cfun->machine->total_size); 
+  else if ((from) == RETURN_ADDRESS_POINTER_REGNUM) 
+    {
+      if (leaf_function_p ()) 
+	(offset) = 0; 
+      else (offset) = cfun->machine->gp_sp_offset 
+	     + ((UNITS_PER_WORD - (POINTER_SIZE / BITS_PER_UNIT)) 
+		* (BYTES_BIG_ENDIAN != 0)); 
+    }
+  else
+    gcc_unreachable ();
+
+  return offset;
+}
+
+/* Common code to emit the insns (or to write the instructions to a file)
+   to save/restore registers.  
+   Other parts of the code assume that IQ2000_TEMP1_REGNUM (aka large_reg)
+   is not modified within save_restore_insns.  */
+
+#define BITSET_P(VALUE,BIT) (((VALUE) & (1L << (BIT))) != 0)
+
+/* Emit instructions to load the value (SP + OFFSET) into IQ2000_TEMP2_REGNUM
+   and return an rtl expression for the register.  Write the assembly
+   instructions directly to FILE if it is not null, otherwise emit them as
+   rtl.
+
+   This function is a subroutine of save_restore_insns.  It is used when
+   OFFSET is too large to add in a single instruction.  */
+
+static rtx
+iq2000_add_large_offset_to_sp (HOST_WIDE_INT offset)
+{
+  rtx reg = gen_rtx_REG (Pmode, IQ2000_TEMP2_REGNUM);
+  rtx offset_rtx = GEN_INT (offset);
+
+  emit_move_insn (reg, offset_rtx);
+  emit_insn (gen_addsi3 (reg, reg, stack_pointer_rtx));
+  return reg;
+}
+
+/* Make INSN frame related and note that it performs the frame-related
+   operation DWARF_PATTERN.  */
+
+static void
+iq2000_annotate_frame_insn (rtx insn, rtx dwarf_pattern)
+{
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      dwarf_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Emit a move instruction that stores REG in MEM.  Make the instruction
+   frame related and note that it stores REG at (SP + OFFSET).  */
+
+static void
+iq2000_emit_frame_related_store (rtx mem, rtx reg, HOST_WIDE_INT offset)
+{
+  rtx dwarf_address = plus_constant (Pmode, stack_pointer_rtx, offset);
+  rtx dwarf_mem = gen_rtx_MEM (GET_MODE (reg), dwarf_address);
+
+  iq2000_annotate_frame_insn (emit_move_insn (mem, reg),
+			    gen_rtx_SET (GET_MODE (reg), dwarf_mem, reg));
+}
+
+/* Emit instructions to save/restore registers, as determined by STORE_P.  */
+
+static void
+save_restore_insns (int store_p)
+{
+  long mask = cfun->machine->mask;
+  int regno;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT base_offset;
+  HOST_WIDE_INT gp_offset;
+  HOST_WIDE_INT end_offset;
+
+  gcc_assert (!frame_pointer_needed
+	      || BITSET_P (mask, HARD_FRAME_POINTER_REGNUM - GP_REG_FIRST));
+
+  if (mask == 0)
+    {
+      base_reg_rtx = 0, base_offset  = 0;
+      return;
+    }
+
+  /* Save registers starting from high to low.  The debuggers prefer at least
+     the return register be stored at func+4, and also it allows us not to
+     need a nop in the epilog if at least one register is reloaded in
+     addition to return address.  */
+
+  /* Save GP registers if needed.  */
+  /* Pick which pointer to use as a base register.  For small frames, just
+     use the stack pointer.  Otherwise, use a temporary register.  Save 2
+     cycles if the save area is near the end of a large frame, by reusing
+     the constant created in the prologue/epilogue to adjust the stack
+     frame.  */
+
+  gp_offset = cfun->machine->gp_sp_offset;
+  end_offset
+    = gp_offset - (cfun->machine->gp_reg_size
+		   - GET_MODE_SIZE (gpr_mode));
+
+  if (gp_offset < 0 || end_offset < 0)
+    internal_error
+      ("gp_offset (%ld) or end_offset (%ld) is less than zero",
+       (long) gp_offset, (long) end_offset);
+
+  else if (gp_offset < 32768)
+    base_reg_rtx = stack_pointer_rtx, base_offset  = 0;
+  else
+    {
+      int regno;
+      int reg_save_count = 0;
+
+      for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+	if (BITSET_P (mask, regno - GP_REG_FIRST)) reg_save_count += 1;
+      base_offset = gp_offset - ((reg_save_count - 1) * 4);
+      base_reg_rtx = iq2000_add_large_offset_to_sp (base_offset);
+    }
+
+  for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+    {
+      if (BITSET_P (mask, regno - GP_REG_FIRST))
+	{
+	  rtx reg_rtx;
+	  rtx mem_rtx
+	    = gen_rtx_MEM (gpr_mode,
+		       gen_rtx_PLUS (Pmode, base_reg_rtx,
+				GEN_INT (gp_offset - base_offset)));
+
+	  reg_rtx = gen_rtx_REG (gpr_mode, regno);
+
+	  if (store_p)
+	    iq2000_emit_frame_related_store (mem_rtx, reg_rtx, gp_offset);
+	  else 
+	    {
+	      emit_move_insn (reg_rtx, mem_rtx);
+	    }
+	  gp_offset -= GET_MODE_SIZE (gpr_mode);
+	}
+    }
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+iq2000_expand_prologue (void)
+{
+  int regno;
+  HOST_WIDE_INT tsize;
+  int last_arg_is_vararg_marker = 0;
+  tree fndecl = current_function_decl;
+  tree fntype = TREE_TYPE (fndecl);
+  tree fnargs = DECL_ARGUMENTS (fndecl);
+  rtx next_arg_reg;
+  int i;
+  tree next_arg;
+  tree cur_arg;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+  int store_args_on_stack = (iq2000_can_use_return_insn ());
+
+  /* If struct value address is treated as the first argument.  */
+  if (aggregate_value_p (DECL_RESULT (fndecl), fndecl)
+      && !cfun->returns_pcc_struct
+      && targetm.calls.struct_value_rtx (TREE_TYPE (fndecl), 1) == 0)
+    {
+      tree type = build_pointer_type (fntype);
+      tree function_result_decl = build_decl (BUILTINS_LOCATION,
+					      PARM_DECL, NULL_TREE, type);
+
+      DECL_ARG_TYPE (function_result_decl) = type;
+      DECL_CHAIN (function_result_decl) = fnargs;
+      fnargs = function_result_decl;
+    }
+
+  /* For arguments passed in registers, find the register number
+     of the first argument in the variable part of the argument list,
+     otherwise GP_ARG_LAST+1.  Note also if the last argument is
+     the varargs special argument, and treat it as part of the
+     variable arguments.
+
+     This is only needed if store_args_on_stack is true.  */
+  INIT_CUMULATIVE_ARGS (args_so_far_v, fntype, NULL_RTX, 0, 0);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+  regno = GP_ARG_FIRST;
+
+  for (cur_arg = fnargs; cur_arg != 0; cur_arg = next_arg)
+    {
+      tree passed_type = DECL_ARG_TYPE (cur_arg);
+      enum machine_mode passed_mode = TYPE_MODE (passed_type);
+      rtx entry_parm;
+
+      if (TREE_ADDRESSABLE (passed_type))
+	{
+	  passed_type = build_pointer_type (passed_type);
+	  passed_mode = Pmode;
+	}
+
+      entry_parm = iq2000_function_arg (args_so_far, passed_mode,
+					passed_type, true);
+
+      iq2000_function_arg_advance (args_so_far, passed_mode,
+				   passed_type, true);
+      next_arg = DECL_CHAIN (cur_arg);
+
+      if (entry_parm && store_args_on_stack)
+	{
+	  if (next_arg == 0
+	      && DECL_NAME (cur_arg)
+	      && ((0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (cur_arg)),
+				"__builtin_va_alist"))
+		  || (0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (cur_arg)),
+				   "va_alist"))))
+	    {
+	      last_arg_is_vararg_marker = 1;
+	      break;
+	    }
+	  else
+	    {
+	      int words;
+
+	      gcc_assert (GET_CODE (entry_parm) == REG);
+
+	      /* Passed in a register, so will get homed automatically.  */
+	      if (GET_MODE (entry_parm) == BLKmode)
+		words = (int_size_in_bytes (passed_type) + 3) / 4;
+	      else
+		words = (GET_MODE_SIZE (GET_MODE (entry_parm)) + 3) / 4;
+
+	      regno = REGNO (entry_parm) + words - 1;
+	    }
+	}
+      else
+	{
+	  regno = GP_ARG_LAST+1;
+	  break;
+	}
+    }
+
+  /* In order to pass small structures by value in registers we need to
+     shift the value into the high part of the register.
+     iq2000_unction_arg has encoded a PARALLEL rtx, holding a vector of
+     adjustments to be made as the next_arg_reg variable, so we split up
+     the insns, and emit them separately.  */
+  next_arg_reg = iq2000_function_arg (args_so_far, VOIDmode,
+				      void_type_node, true);
+  if (next_arg_reg != 0 && GET_CODE (next_arg_reg) == PARALLEL)
+    {
+      rtvec adjust = XVEC (next_arg_reg, 0);
+      int num = GET_NUM_ELEM (adjust);
+
+      for (i = 0; i < num; i++)
+	{
+	  rtx pattern;
+
+	  pattern = RTVEC_ELT (adjust, i);
+	  if (GET_CODE (pattern) != SET
+	      || GET_CODE (SET_SRC (pattern)) != ASHIFT)
+	    abort_with_insn (pattern, "Insn is not a shift");
+	  PUT_CODE (SET_SRC (pattern), ASHIFTRT);
+
+	  emit_insn (pattern);
+	}
+    }
+
+  tsize = compute_frame_size (get_frame_size ());
+
+  /* If this function is a varargs function, store any registers that
+     would normally hold arguments ($4 - $7) on the stack.  */
+  if (store_args_on_stack
+      && (stdarg_p (fntype)
+	  || last_arg_is_vararg_marker))
+    {
+      int offset = (regno - GP_ARG_FIRST) * UNITS_PER_WORD;
+      rtx ptr = stack_pointer_rtx;
+
+      for (; regno <= GP_ARG_LAST; regno++)
+	{
+	  if (offset != 0)
+	    ptr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	  emit_move_insn (gen_rtx_MEM (gpr_mode, ptr),
+			  gen_rtx_REG (gpr_mode, regno));
+
+	  offset += GET_MODE_SIZE (gpr_mode);
+	}
+    }
+
+  if (tsize > 0)
+    {
+      rtx tsize_rtx = GEN_INT (tsize);
+      rtx adjustment_rtx, insn, dwarf_pattern;
+
+      if (tsize > 32767)
+	{
+	  adjustment_rtx = gen_rtx_REG (Pmode, IQ2000_TEMP1_REGNUM);
+	  emit_move_insn (adjustment_rtx, tsize_rtx);
+	}
+      else
+	adjustment_rtx = tsize_rtx;
+
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    adjustment_rtx));
+
+      dwarf_pattern = gen_rtx_SET (Pmode, stack_pointer_rtx,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  -tsize));
+
+      iq2000_annotate_frame_insn (insn, dwarf_pattern);
+
+      save_restore_insns (1);
+
+      if (frame_pointer_needed)
+	{
+	  rtx insn = 0;
+
+	  insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				       stack_pointer_rtx));
+
+	  if (insn)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  emit_insn (gen_blockage ());
+}
+
+/* Expand the epilogue into a bunch of separate insns.  */
+
+void
+iq2000_expand_epilogue (void)
+{
+  HOST_WIDE_INT tsize = cfun->machine->total_size;
+  rtx tsize_rtx = GEN_INT (tsize);
+  rtx tmp_rtx = (rtx)0;
+
+  if (iq2000_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  if (tsize > 32767)
+    {
+      tmp_rtx = gen_rtx_REG (Pmode, IQ2000_TEMP1_REGNUM);
+      emit_move_insn (tmp_rtx, tsize_rtx);
+      tsize_rtx = tmp_rtx;
+    }
+
+  if (tsize > 0)
+    {
+      if (frame_pointer_needed)
+	{
+	  emit_insn (gen_blockage ());
+
+	  emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+	}
+
+      save_restore_insns (0);
+
+      if (crtl->calls_eh_return)
+	{
+	  rtx eh_ofs = EH_RETURN_STACKADJ_RTX;
+	  emit_insn (gen_addsi3 (eh_ofs, eh_ofs, tsize_rtx));
+	  tsize_rtx = eh_ofs;
+	}
+
+      emit_insn (gen_blockage ());
+
+      if (tsize != 0 || crtl->calls_eh_return)
+	{
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				 tsize_rtx));
+	}
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      /* Perform the additional bump for __throw.  */
+      emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
+		      stack_pointer_rtx);
+      emit_use (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM));
+      emit_jump_insn (gen_eh_return_internal ());
+    }
+  else
+      emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode,
+						  GP_REG_FIRST + 31)));
+}
+
+void
+iq2000_expand_eh_return (rtx address)
+{
+  HOST_WIDE_INT gp_offset = cfun->machine->gp_sp_offset;
+  rtx scratch;
+
+  scratch = plus_constant (Pmode, stack_pointer_rtx, gp_offset);
+  emit_move_insn (gen_rtx_MEM (GET_MODE (address), scratch), address);
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+int
+iq2000_can_use_return_insn (void)
+{
+  if (! reload_completed)
+    return 0;
+
+  if (df_regs_ever_live_p (31) || profile_flag)
+    return 0;
+
+  if (cfun->machine->initialized)
+    return cfun->machine->total_size == 0;
+
+  return compute_frame_size (get_frame_size ()) == 0;
+}
+
+/* Choose the section to use for the constant rtx expression X that has
+   mode MODE.  */
+
+static section *
+iq2000_select_rtx_section (enum machine_mode mode, rtx x ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT align)
+{
+  /* For embedded applications, always put constants in read-only data,
+     in order to reduce RAM usage.  */
+  return mergeable_constant_section (mode, align, 0);
+}
+
+/* Choose the section to use for DECL.  RELOC is true if its value contains
+   any relocatable expression.
+
+   Some of the logic used here needs to be replicated in
+   ENCODE_SECTION_INFO in iq2000.h so that references to these symbols
+   are done correctly.  */
+
+static section *
+iq2000_select_section (tree decl, int reloc ATTRIBUTE_UNUSED,
+		       unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (TARGET_EMBEDDED_DATA)
+    {
+      /* For embedded applications, always put an object in read-only data
+	 if possible, in order to reduce RAM usage.  */
+      if ((TREE_CODE (decl) == VAR_DECL
+	   && TREE_READONLY (decl) && !TREE_SIDE_EFFECTS (decl)
+	   && DECL_INITIAL (decl)
+	   && (DECL_INITIAL (decl) == error_mark_node
+	       || TREE_CONSTANT (DECL_INITIAL (decl))))
+	  /* Deal with calls from output_constant_def_contents.  */
+	  || TREE_CODE (decl) != VAR_DECL)
+	return readonly_data_section;
+      else
+	return data_section;
+    }
+  else
+    {
+      /* For hosted applications, always put an object in small data if
+	 possible, as this gives the best performance.  */
+      if ((TREE_CODE (decl) == VAR_DECL
+	   && TREE_READONLY (decl) && !TREE_SIDE_EFFECTS (decl)
+	   && DECL_INITIAL (decl)
+	   && (DECL_INITIAL (decl) == error_mark_node
+	       || TREE_CONSTANT (DECL_INITIAL (decl))))
+	  /* Deal with calls from output_constant_def_contents.  */
+	  || TREE_CODE (decl) != VAR_DECL)
+	return readonly_data_section;
+      else
+	return data_section;
+    }
+}
+/* Return register to use for a function return value with VALTYPE for function
+   FUNC.  */
+
+static rtx
+iq2000_function_value (const_tree valtype,
+		       const_tree fn_decl_or_type,
+		       bool outgoing ATTRIBUTE_UNUSED)
+{
+  int reg = GP_RETURN;
+  enum machine_mode mode = TYPE_MODE (valtype);
+  int unsignedp = TYPE_UNSIGNED (valtype);
+  const_tree func = fn_decl_or_type;
+
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    fn_decl_or_type = NULL;
+
+  /* Since we promote return types, we must promote the mode here too.  */
+  mode = promote_function_mode (valtype, mode, &unsignedp, func, 1);
+
+  return gen_rtx_REG (mode, reg);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+iq2000_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (((GET_MODE_CLASS (mode) != MODE_INT
+	                || GET_MODE_SIZE (mode) >= 4)
+	               ? mode : SImode),
+	              GP_RETURN);
+}
+
+/* Worker function for FUNCTION_VALUE_REGNO_P.
+
+   On the IQ2000, R2 and R3 are the only register thus used.  */
+
+bool
+iq2000_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == GP_RETURN);
+}
+
+
+/* Return true when an argument must be passed by reference.  */
+
+static bool
+iq2000_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int size;
+
+  /* We must pass by reference if we would be both passing in registers
+     and the stack.  This is because any subsequent partial arg would be
+     handled incorrectly in this case.  */
+  if (cum && targetm.calls.must_pass_in_stack (mode, type))
+     {
+       /* Don't pass the actual CUM to FUNCTION_ARG, because we would
+	  get double copies of any offsets generated for small structs
+	  passed in registers.  */
+       CUMULATIVE_ARGS temp;
+
+       temp = *cum;
+       if (iq2000_function_arg (pack_cumulative_args (&temp), mode, type, named)
+	   != 0)
+	 return 1;
+     }
+
+  if (type == NULL_TREE || mode == DImode || mode == DFmode)
+    return 0;
+
+  size = int_size_in_bytes (type);
+  return size == -1 || size > UNITS_PER_WORD;
+}
+
+/* Return the length of INSN.  LENGTH is the initial length computed by
+   attributes in the machine-description file.  */
+
+int
+iq2000_adjust_insn_length (rtx insn, int length)
+{
+  /* A unconditional jump has an unfilled delay slot if it is not part
+     of a sequence.  A conditional jump normally has a delay slot.  */
+  if (simplejump_p (insn)
+      || (   (JUMP_P (insn)
+	   || CALL_P (insn))))
+    length += 4;
+
+  return length;
+}
+
+/* Output assembly instructions to perform a conditional branch.
+
+   INSN is the branch instruction.  OPERANDS[0] is the condition.
+   OPERANDS[1] is the target of the branch.  OPERANDS[2] is the target
+   of the first operand to the condition.  If TWO_OPERANDS_P is
+   nonzero the comparison takes two operands; OPERANDS[3] will be the
+   second operand.
+
+   If INVERTED_P is nonzero we are to branch if the condition does
+   not hold.  If FLOAT_P is nonzero this is a floating-point comparison.
+
+   LENGTH is the length (in bytes) of the sequence we are to generate.
+   That tells us whether to generate a simple conditional branch, or a
+   reversed conditional branch around a `jr' instruction.  */
+
+char *
+iq2000_output_conditional_branch (rtx insn, rtx * operands, int two_operands_p,
+				  int float_p, int inverted_p, int length)
+{
+  static char buffer[200];
+  /* The kind of comparison we are doing.  */
+  enum rtx_code code = GET_CODE (operands[0]);
+  /* Nonzero if the opcode for the comparison needs a `z' indicating
+     that it is a comparison against zero.  */
+  int need_z_p;
+  /* A string to use in the assembly output to represent the first
+     operand.  */
+  const char *op1 = "%z2";
+  /* A string to use in the assembly output to represent the second
+     operand.  Use the hard-wired zero register if there's no second
+     operand.  */
+  const char *op2 = (two_operands_p ? ",%z3" : ",%.");
+  /* The operand-printing string for the comparison.  */
+  const char *comp = (float_p ? "%F0" : "%C0");
+  /* The operand-printing string for the inverted comparison.  */
+  const char *inverted_comp = (float_p ? "%W0" : "%N0");
+
+  /* Likely variants of each branch instruction annul the instruction
+     in the delay slot if the branch is not taken.  */
+  iq2000_branch_likely = (final_sequence && INSN_ANNULLED_BRANCH_P (insn));
+
+  if (!two_operands_p)
+    {
+      /* To compute whether than A > B, for example, we normally
+	 subtract B from A and then look at the sign bit.  But, if we
+	 are doing an unsigned comparison, and B is zero, we don't
+	 have to do the subtraction.  Instead, we can just check to
+	 see if A is nonzero.  Thus, we change the CODE here to
+	 reflect the simpler comparison operation.  */
+      switch (code)
+	{
+	case GTU:
+	  code = NE;
+	  break;
+
+	case LEU:
+	  code = EQ;
+	  break;
+
+	case GEU:
+	  /* A condition which will always be true.  */
+	  code = EQ;
+	  op1 = "%.";
+	  break;
+
+	case LTU:
+	  /* A condition which will always be false.  */
+	  code = NE;
+	  op1 = "%.";
+	  break;
+
+	default:
+	  /* Not a special case.  */
+	  break;
+	}
+    }
+
+  /* Relative comparisons are always done against zero.  But
+     equality comparisons are done between two operands, and therefore
+     do not require a `z' in the assembly language output.  */
+  need_z_p = (!float_p && code != EQ && code != NE);
+  /* For comparisons against zero, the zero is not provided
+     explicitly.  */
+  if (need_z_p)
+    op2 = "";
+
+  /* Begin by terminating the buffer.  That way we can always use
+     strcat to add to it.  */
+  buffer[0] = '\0';
+
+  switch (length)
+    {
+    case 4:
+    case 8:
+      /* Just a simple conditional branch.  */
+      if (float_p)
+	sprintf (buffer, "b%s%%?\t%%Z2%%1",
+		 inverted_p ? inverted_comp : comp);
+      else
+	sprintf (buffer, "b%s%s%%?\t%s%s,%%1",
+		 inverted_p ? inverted_comp : comp,
+		 need_z_p ? "z" : "",
+		 op1,
+		 op2);
+      return buffer;
+
+    case 12:
+    case 16:
+      {
+	/* Generate a reversed conditional branch around ` j'
+	   instruction:
+
+		.set noreorder
+		.set nomacro
+		bc    l
+		nop
+		j     target
+		.set macro
+		.set reorder
+	     l:
+
+	   Because we have to jump four bytes *past* the following
+	   instruction if this branch was annulled, we can't just use
+	   a label, as in the picture above; there's no way to put the
+	   label after the next instruction, as the assembler does not
+	   accept `.L+4' as the target of a branch.  (We can't just
+	   wait until the next instruction is output; it might be a
+	   macro and take up more than four bytes.  Once again, we see
+	   why we want to eliminate macros.)
+
+	   If the branch is annulled, we jump four more bytes that we
+	   would otherwise; that way we skip the annulled instruction
+	   in the delay slot.  */
+
+	const char *target
+	  = ((iq2000_branch_likely || length == 16) ? ".+16" : ".+12");
+	char *c;
+
+	c = strchr (buffer, '\0');
+	/* Generate the reversed comparison.  This takes four
+	   bytes.  */
+	if (float_p)
+	  sprintf (c, "b%s\t%%Z2%s",
+		   inverted_p ? comp : inverted_comp,
+		   target);
+	else
+	  sprintf (c, "b%s%s\t%s%s,%s",
+		   inverted_p ? comp : inverted_comp,
+		   need_z_p ? "z" : "",
+		   op1,
+		   op2,
+		   target);
+	strcat (c, "\n\tnop\n\tj\t%1");
+	if (length == 16)
+	  /* The delay slot was unfilled.  Since we're inside
+	     .noreorder, the assembler will not fill in the NOP for
+	     us, so we must do it ourselves.  */
+	  strcat (buffer, "\n\tnop");
+	return buffer;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* NOTREACHED */
+  return 0;
+}
+
+#define def_builtin(NAME, TYPE, CODE)					\
+  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
+		       NULL, NULL_TREE)
+
+static void
+iq2000_init_builtins (void)
+{
+  tree void_ftype, void_ftype_int, void_ftype_int_int;
+  tree void_ftype_int_int_int;
+  tree int_ftype_int, int_ftype_int_int, int_ftype_int_int_int;
+  tree int_ftype_int_int_int_int;
+
+  /* func () */
+  void_ftype
+    = build_function_type_list (void_type_node, NULL_TREE);
+
+  /* func (int) */
+  void_ftype_int
+    = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
+
+  /* void func (int, int) */
+  void_ftype_int_int
+    = build_function_type_list (void_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  /* int func (int) */
+  int_ftype_int
+    = build_function_type_list (integer_type_node,
+                                integer_type_node, NULL_TREE);
+
+  /* int func (int, int) */
+  int_ftype_int_int
+    = build_function_type_list (integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  /* void func (int, int, int) */
+  void_ftype_int_int_int
+    = build_function_type_list (void_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  /* int func (int, int, int) */
+  int_ftype_int_int_int
+    = build_function_type_list (integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  /* int func (int, int, int, int) */
+  int_ftype_int_int_int_int
+    = build_function_type_list (integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  def_builtin ("__builtin_ado16", int_ftype_int_int, IQ2000_BUILTIN_ADO16);
+  def_builtin ("__builtin_ram", int_ftype_int_int_int_int, IQ2000_BUILTIN_RAM);
+  def_builtin ("__builtin_chkhdr", void_ftype_int_int, IQ2000_BUILTIN_CHKHDR);
+  def_builtin ("__builtin_pkrl", void_ftype_int_int, IQ2000_BUILTIN_PKRL);
+  def_builtin ("__builtin_cfc0", int_ftype_int, IQ2000_BUILTIN_CFC0);
+  def_builtin ("__builtin_cfc1", int_ftype_int, IQ2000_BUILTIN_CFC1);
+  def_builtin ("__builtin_cfc2", int_ftype_int, IQ2000_BUILTIN_CFC2);
+  def_builtin ("__builtin_cfc3", int_ftype_int, IQ2000_BUILTIN_CFC3);
+  def_builtin ("__builtin_ctc0", void_ftype_int_int, IQ2000_BUILTIN_CTC0);
+  def_builtin ("__builtin_ctc1", void_ftype_int_int, IQ2000_BUILTIN_CTC1);
+  def_builtin ("__builtin_ctc2", void_ftype_int_int, IQ2000_BUILTIN_CTC2);
+  def_builtin ("__builtin_ctc3", void_ftype_int_int, IQ2000_BUILTIN_CTC3);
+  def_builtin ("__builtin_mfc0", int_ftype_int, IQ2000_BUILTIN_MFC0);
+  def_builtin ("__builtin_mfc1", int_ftype_int, IQ2000_BUILTIN_MFC1);
+  def_builtin ("__builtin_mfc2", int_ftype_int, IQ2000_BUILTIN_MFC2);
+  def_builtin ("__builtin_mfc3", int_ftype_int, IQ2000_BUILTIN_MFC3);
+  def_builtin ("__builtin_mtc0", void_ftype_int_int, IQ2000_BUILTIN_MTC0);
+  def_builtin ("__builtin_mtc1", void_ftype_int_int, IQ2000_BUILTIN_MTC1);
+  def_builtin ("__builtin_mtc2", void_ftype_int_int, IQ2000_BUILTIN_MTC2);
+  def_builtin ("__builtin_mtc3", void_ftype_int_int, IQ2000_BUILTIN_MTC3);
+  def_builtin ("__builtin_lur", void_ftype_int_int, IQ2000_BUILTIN_LUR);
+  def_builtin ("__builtin_rb", void_ftype_int_int, IQ2000_BUILTIN_RB);
+  def_builtin ("__builtin_rx", void_ftype_int_int, IQ2000_BUILTIN_RX);
+  def_builtin ("__builtin_srrd", void_ftype_int, IQ2000_BUILTIN_SRRD);
+  def_builtin ("__builtin_srwr", void_ftype_int_int, IQ2000_BUILTIN_SRWR);
+  def_builtin ("__builtin_wb", void_ftype_int_int, IQ2000_BUILTIN_WB);
+  def_builtin ("__builtin_wx", void_ftype_int_int, IQ2000_BUILTIN_WX);
+  def_builtin ("__builtin_luc32l", void_ftype_int_int, IQ2000_BUILTIN_LUC32L);
+  def_builtin ("__builtin_luc64", void_ftype_int_int, IQ2000_BUILTIN_LUC64);
+  def_builtin ("__builtin_luc64l", void_ftype_int_int, IQ2000_BUILTIN_LUC64L);
+  def_builtin ("__builtin_luk", void_ftype_int_int, IQ2000_BUILTIN_LUK);
+  def_builtin ("__builtin_lulck", void_ftype_int, IQ2000_BUILTIN_LULCK);
+  def_builtin ("__builtin_lum32", void_ftype_int_int, IQ2000_BUILTIN_LUM32);
+  def_builtin ("__builtin_lum32l", void_ftype_int_int, IQ2000_BUILTIN_LUM32L);
+  def_builtin ("__builtin_lum64", void_ftype_int_int, IQ2000_BUILTIN_LUM64);
+  def_builtin ("__builtin_lum64l", void_ftype_int_int, IQ2000_BUILTIN_LUM64L);
+  def_builtin ("__builtin_lurl", void_ftype_int_int, IQ2000_BUILTIN_LURL);
+  def_builtin ("__builtin_mrgb", int_ftype_int_int_int, IQ2000_BUILTIN_MRGB);
+  def_builtin ("__builtin_srrdl", void_ftype_int, IQ2000_BUILTIN_SRRDL);
+  def_builtin ("__builtin_srulck", void_ftype_int, IQ2000_BUILTIN_SRULCK);
+  def_builtin ("__builtin_srwru", void_ftype_int_int, IQ2000_BUILTIN_SRWRU);
+  def_builtin ("__builtin_trapqfl", void_ftype, IQ2000_BUILTIN_TRAPQFL);
+  def_builtin ("__builtin_trapqne", void_ftype, IQ2000_BUILTIN_TRAPQNE);
+  def_builtin ("__builtin_traprel", void_ftype_int, IQ2000_BUILTIN_TRAPREL);
+  def_builtin ("__builtin_wbu", void_ftype_int_int_int, IQ2000_BUILTIN_WBU);
+  def_builtin ("__builtin_syscall", void_ftype, IQ2000_BUILTIN_SYSCALL);
+}
+
+/* Builtin for ICODE having ARGCOUNT args in EXP where each arg
+   has an rtx CODE.  */
+
+static rtx
+expand_one_builtin (enum insn_code icode, rtx target, tree exp,
+		    enum rtx_code *code, int argcount)
+{
+  rtx pat;
+  tree arg [5];
+  rtx op [5];
+  enum machine_mode mode [5];
+  int i;
+
+  mode[0] = insn_data[icode].operand[0].mode;
+  for (i = 0; i < argcount; i++)
+    {
+      arg[i] = CALL_EXPR_ARG (exp, i);
+      op[i] = expand_normal (arg[i]);
+      mode[i] = insn_data[icode].operand[i].mode;
+      if (code[i] == CONST_INT && GET_CODE (op[i]) != CONST_INT)
+	error ("argument %qd is not a constant", i + 1);
+      if (code[i] == REG
+	  && ! (*insn_data[icode].operand[i].predicate) (op[i], mode[i]))
+	op[i] = copy_to_mode_reg (mode[i], op[i]);
+    }
+
+  if (insn_data[icode].operand[0].constraint[0] == '=')
+    {
+      if (target == 0
+	  || GET_MODE (target) != mode[0]
+	  || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
+	target = gen_reg_rtx (mode[0]);
+    }
+  else
+    target = 0;
+
+  switch (argcount)
+    {
+    case 0:
+	pat = GEN_FCN (icode) (target);
+    case 1:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1]);
+      break;
+    case 3:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+      break;
+    case 4:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+iq2000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum rtx_code code [5];
+
+  code[0] = REG;
+  code[1] = REG;
+  code[2] = REG;
+  code[3] = REG;
+  code[4] = REG;
+  switch (fcode)
+    {
+    default:
+      break;
+      
+    case IQ2000_BUILTIN_ADO16:
+      return expand_one_builtin (CODE_FOR_ado16, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_RAM:
+      code[1] = CONST_INT;
+      code[2] = CONST_INT;
+      code[3] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ram, target, exp, code, 4);
+      
+    case IQ2000_BUILTIN_CHKHDR:
+      return expand_one_builtin (CODE_FOR_chkhdr, target, exp, code, 2);
+      
+    case IQ2000_BUILTIN_PKRL:
+      return expand_one_builtin (CODE_FOR_pkrl, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CFC0:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc0, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CFC1:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc1, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CFC2:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc2, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CFC3:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc3, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CTC0:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc0, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CTC1:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc1, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CTC2:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc2, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CTC3:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc3, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MFC0:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc0, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MFC1:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc1, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MFC2:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc2, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MFC3:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc3, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MTC0:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc0, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MTC1:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc1, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MTC2:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc2, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MTC3:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc3, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUR:
+      return expand_one_builtin (CODE_FOR_lur, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_RB:
+      return expand_one_builtin (CODE_FOR_rb, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_RX:
+      return expand_one_builtin (CODE_FOR_rx, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_SRRD:
+      return expand_one_builtin (CODE_FOR_srrd, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_SRWR:
+      return expand_one_builtin (CODE_FOR_srwr, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_WB:
+      return expand_one_builtin (CODE_FOR_wb, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_WX:
+      return expand_one_builtin (CODE_FOR_wx, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUC32L:
+      return expand_one_builtin (CODE_FOR_luc32l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUC64:
+      return expand_one_builtin (CODE_FOR_luc64, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUC64L:
+      return expand_one_builtin (CODE_FOR_luc64l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUK:
+      return expand_one_builtin (CODE_FOR_luk, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LULCK:
+      return expand_one_builtin (CODE_FOR_lulck, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_LUM32:
+      return expand_one_builtin (CODE_FOR_lum32, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUM32L:
+      return expand_one_builtin (CODE_FOR_lum32l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUM64:
+      return expand_one_builtin (CODE_FOR_lum64, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUM64L:
+      return expand_one_builtin (CODE_FOR_lum64l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LURL:
+      return expand_one_builtin (CODE_FOR_lurl, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MRGB:
+      code[2] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mrgb, target, exp, code, 3);
+
+    case IQ2000_BUILTIN_SRRDL:
+      return expand_one_builtin (CODE_FOR_srrdl, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_SRULCK:
+      return expand_one_builtin (CODE_FOR_srulck, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_SRWRU:
+      return expand_one_builtin (CODE_FOR_srwru, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_TRAPQFL:
+      return expand_one_builtin (CODE_FOR_trapqfl, target, exp, code, 0);
+
+    case IQ2000_BUILTIN_TRAPQNE:
+      return expand_one_builtin (CODE_FOR_trapqne, target, exp, code, 0);
+
+    case IQ2000_BUILTIN_TRAPREL:
+      return expand_one_builtin (CODE_FOR_traprel, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_WBU:
+      return expand_one_builtin (CODE_FOR_wbu, target, exp, code, 3);
+
+    case IQ2000_BUILTIN_SYSCALL:
+      return expand_one_builtin (CODE_FOR_syscall, target, exp, code, 0);
+    }
+  
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+iq2000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((int_size_in_bytes (type) > (2 * UNITS_PER_WORD))
+	  || (int_size_in_bytes (type) == -1));
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+iq2000_setup_incoming_varargs (cumulative_args_t cum_v,
+			       enum machine_mode mode ATTRIBUTE_UNUSED,
+			       tree type ATTRIBUTE_UNUSED, int * pretend_size,
+			       int no_rtl)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  unsigned int iq2000_off = ! cum->last_arg_fp; 
+  unsigned int iq2000_fp_off = cum->last_arg_fp; 
+
+  if ((cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off))
+    {
+      int iq2000_save_gp_regs 
+	= MAX_ARGS_IN_REGISTERS - cum->arg_words - iq2000_off; 
+      int iq2000_save_fp_regs 
+        = (MAX_ARGS_IN_REGISTERS - cum->fp_arg_words - iq2000_fp_off); 
+
+      if (iq2000_save_gp_regs < 0) 
+	iq2000_save_gp_regs = 0; 
+      if (iq2000_save_fp_regs < 0) 
+	iq2000_save_fp_regs = 0; 
+
+      *pretend_size = ((iq2000_save_gp_regs * UNITS_PER_WORD) 
+                      + (iq2000_save_fp_regs * UNITS_PER_FPREG)); 
+
+      if (! (no_rtl)) 
+	{
+	  if (cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off) 
+	    {
+	      rtx ptr, mem; 
+	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
+				   - (iq2000_save_gp_regs
+				      * UNITS_PER_WORD));
+	      mem = gen_rtx_MEM (BLKmode, ptr); 
+	      move_block_from_reg 
+		(cum->arg_words + GP_ARG_FIRST + iq2000_off, 
+		 mem, 
+		 iq2000_save_gp_regs);
+	    } 
+	} 
+    }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+
+static void
+iq2000_print_operand_address (FILE * file, rtx addr)
+{
+  if (!addr)
+    error ("PRINT_OPERAND_ADDRESS, null pointer");
+
+  else
+    switch (GET_CODE (addr))
+      {
+      case REG:
+	if (REGNO (addr) == ARG_POINTER_REGNUM)
+	  abort_with_insn (addr, "Arg pointer not eliminated.");
+
+	fprintf (file, "0(%s)", reg_names [REGNO (addr)]);
+	break;
+
+      case LO_SUM:
+	{
+	  rtx arg0 = XEXP (addr, 0);
+	  rtx arg1 = XEXP (addr, 1);
+
+	  if (GET_CODE (arg0) != REG)
+	    abort_with_insn (addr,
+			     "PRINT_OPERAND_ADDRESS, LO_SUM with #1 not REG.");
+
+	  fprintf (file, "%%lo(");
+	  iq2000_print_operand_address (file, arg1);
+	  fprintf (file, ")(%s)", reg_names [REGNO (arg0)]);
+	}
+	break;
+
+      case PLUS:
+	{
+	  rtx reg = 0;
+	  rtx offset = 0;
+	  rtx arg0 = XEXP (addr, 0);
+	  rtx arg1 = XEXP (addr, 1);
+
+	  if (GET_CODE (arg0) == REG)
+	    {
+	      reg = arg0;
+	      offset = arg1;
+	      if (GET_CODE (offset) == REG)
+		abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, 2 regs");
+	    }
+
+	  else if (GET_CODE (arg1) == REG)
+	      reg = arg1, offset = arg0;
+	  else if (CONSTANT_P (arg0) && CONSTANT_P (arg1))
+	    {
+	      output_addr_const (file, addr);
+	      break;
+	    }
+	  else
+	    abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, no regs");
+
+	  if (! CONSTANT_P (offset))
+	    abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, invalid insn #2");
+
+	  if (REGNO (reg) == ARG_POINTER_REGNUM)
+	    abort_with_insn (addr, "Arg pointer not eliminated.");
+
+	  output_addr_const (file, offset);
+	  fprintf (file, "(%s)", reg_names [REGNO (reg)]);
+	}
+	break;
+
+      case LABEL_REF:
+      case SYMBOL_REF:
+      case CONST_INT:
+      case CONST:
+	output_addr_const (file, addr);
+	if (GET_CODE (addr) == CONST_INT)
+	  fprintf (file, "(%s)", reg_names [0]);
+	break;
+
+      default:
+	abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, invalid insn #1");
+	break;
+    }
+}
+
+/* A C compound statement to output to stdio stream FILE the
+   assembler syntax for an instruction operand OP.
+
+   LETTER is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  LETTER
+   comes from the `%' specification that was used to request
+   printing of the operand.  If the specification was just `%DIGIT'
+   then LETTER is 0; if the specification was `%LTR DIGIT' then LETTER
+   is the ASCII code for LTR.
+
+   If OP is a register, this macro should print the register's name.
+   The names can be found in an array `reg_names' whose type is
+   `char *[]'.  `reg_names' is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for LETTER.
+
+   The IQ2000 specific codes are:
+
+   'X'  X is CONST_INT, prints upper 16 bits in hexadecimal format = "0x%04x",
+   'x'  X is CONST_INT, prints lower 16 bits in hexadecimal format = "0x%04x",
+   'd'  output integer constant in decimal,
+   'z'	if the operand is 0, use $0 instead of normal operand.
+   'D'  print second part of double-word register or memory operand.
+   'L'  print low-order register of double-word register operand.
+   'M'  print high-order register of double-word register operand.
+   'C'  print part of opcode for a branch condition.
+   'F'  print part of opcode for a floating-point branch condition.
+   'N'  print part of opcode for a branch condition, inverted.
+   'W'  print part of opcode for a floating-point branch condition, inverted.
+   'A'	Print part of opcode for a bit test condition.
+   'P'  Print label for a bit test.
+   'p'  Print log for a bit test.
+   'B'  print 'z' for EQ, 'n' for NE
+   'b'  print 'n' for EQ, 'z' for NE
+   'T'  print 'f' for EQ, 't' for NE
+   't'  print 't' for EQ, 'f' for NE
+   'Z'  print register and a comma, but print nothing for $fcc0
+   '?'	Print 'l' if we are to use a branch likely instead of normal branch.
+   '@'	Print the name of the assembler temporary register (at or $1).
+   '.'	Print the name of the register with a hard-wired zero (zero or $0).
+   '$'	Print the name of the stack pointer register (sp or $29).
+   '+'	Print the name of the gp register (gp or $28).  */
+
+static void
+iq2000_print_operand (FILE *file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  if (iq2000_print_operand_punct_valid_p (letter))
+    {
+      switch (letter)
+	{
+	case '?':
+	  if (iq2000_branch_likely)
+	    putc ('l', file);
+	  break;
+
+	case '@':
+	  fputs (reg_names [GP_REG_FIRST + 1], file);
+	  break;
+
+	case '.':
+	  fputs (reg_names [GP_REG_FIRST + 0], file);
+	  break;
+
+	case '$':
+	  fputs (reg_names[STACK_POINTER_REGNUM], file);
+	  break;
+
+	case '+':
+	  fputs (reg_names[GP_REG_FIRST + 28], file);
+	  break;
+
+	default:
+	  error ("PRINT_OPERAND: Unknown punctuation '%c'", letter);
+	  break;
+	}
+
+      return;
+    }
+
+  if (! op)
+    {
+      error ("PRINT_OPERAND null pointer");
+      return;
+    }
+
+  code = GET_CODE (op);
+
+  if (code == SIGN_EXTEND)
+    op = XEXP (op, 0), code = GET_CODE (op);
+
+  if (letter == 'C')
+    switch (code)
+      {
+      case EQ:	fputs ("eq",  file); break;
+      case NE:	fputs ("ne",  file); break;
+      case GT:	fputs ("gt",  file); break;
+      case GE:	fputs ("ge",  file); break;
+      case LT:	fputs ("lt",  file); break;
+      case LE:	fputs ("le",  file); break;
+      case GTU: fputs ("ne", file); break;
+      case GEU: fputs ("geu", file); break;
+      case LTU: fputs ("ltu", file); break;
+      case LEU: fputs ("eq", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%C");
+      }
+
+  else if (letter == 'N')
+    switch (code)
+      {
+      case EQ:	fputs ("ne",  file); break;
+      case NE:	fputs ("eq",  file); break;
+      case GT:	fputs ("le",  file); break;
+      case GE:	fputs ("lt",  file); break;
+      case LT:	fputs ("ge",  file); break;
+      case LE:	fputs ("gt",  file); break;
+      case GTU: fputs ("leu", file); break;
+      case GEU: fputs ("ltu", file); break;
+      case LTU: fputs ("geu", file); break;
+      case LEU: fputs ("gtu", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%N");
+      }
+
+  else if (letter == 'F')
+    switch (code)
+      {
+      case EQ: fputs ("c1f", file); break;
+      case NE: fputs ("c1t", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%F");
+      }
+
+  else if (letter == 'W')
+    switch (code)
+      {
+      case EQ: fputs ("c1t", file); break;
+      case NE: fputs ("c1f", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%W");
+      }
+
+  else if (letter == 'A')
+    fputs (code == LABEL_REF ? "i" : "in", file);
+
+  else if (letter == 'P')
+    {
+      if (code == LABEL_REF)
+	output_addr_const (file, op);
+      else if (code != PC)
+	output_operand_lossage ("invalid %%P operand");
+    }
+
+  else if (letter == 'p')
+    {
+      int value;
+      if (code != CONST_INT
+	  || (value = exact_log2 (INTVAL (op))) < 0)
+	output_operand_lossage ("invalid %%p value");
+      else
+	fprintf (file, "%d", value);
+    }
+
+  else if (letter == 'Z')
+    {
+      gcc_unreachable ();
+    }
+
+  else if (code == REG || code == SUBREG)
+    {
+      int regnum;
+
+      if (code == REG)
+	regnum = REGNO (op);
+      else
+	regnum = true_regnum (op);
+
+      if ((letter == 'M' && ! WORDS_BIG_ENDIAN)
+	  || (letter == 'L' && WORDS_BIG_ENDIAN)
+	  || letter == 'D')
+	regnum++;
+
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+
+  else if (code == MEM)
+    {
+      if (letter == 'D')
+	output_address (plus_constant (Pmode, XEXP (op, 0), 4));
+      else
+	output_address (XEXP (op, 0));
+    }
+
+  else if (code == CONST_DOUBLE
+	   && GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT)
+    {
+      char s[60];
+
+      real_to_decimal (s, CONST_DOUBLE_REAL_VALUE (op), sizeof (s), 0, 1);
+      fputs (s, file);
+    }
+
+  else if (letter == 'x' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, 0xffff & INTVAL(op));
+
+  else if (letter == 'X' && GET_CODE(op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, 0xffff & (INTVAL (op) >> 16));
+
+  else if (letter == 'd' && GET_CODE(op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, (INTVAL(op)));
+
+  else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    fputs (reg_names[GP_REG_FIRST], file);
+
+  else if (letter == 'd' || letter == 'x' || letter == 'X')
+    output_operand_lossage ("invalid use of %%d, %%x, or %%X");
+
+  else if (letter == 'B')
+    fputs (code == EQ ? "z" : "n", file);
+  else if (letter == 'b')
+    fputs (code == EQ ? "n" : "z", file);
+  else if (letter == 'T')
+    fputs (code == EQ ? "f" : "t", file);
+  else if (letter == 't')
+    fputs (code == EQ ? "t" : "f", file);
+
+  else if (code == CONST && GET_CODE (XEXP (op, 0)) == REG)
+    {
+      iq2000_print_operand (file, XEXP (op, 0), letter);
+    }
+
+  else
+    output_addr_const (file, op);
+}
+
+static bool
+iq2000_print_operand_punct_valid_p (unsigned char code)
+{
+  return iq2000_print_operand_punct[code];
+}
+
+/* For the IQ2000, transform:
+
+        memory(X + <large int>)
+   into:
+        Y = <large int> & ~0x7fff;
+        Z = X + Y
+        memory (Z + (<large int> & 0x7fff));
+*/
+
+rtx
+iq2000_legitimize_address (rtx xinsn, rtx old_x ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  if (TARGET_DEBUG_B_MODE)
+    {
+      GO_PRINTF ("\n========== LEGITIMIZE_ADDRESS\n");
+      GO_DEBUG_RTX (xinsn);
+    }
+
+  if (iq2000_check_split (xinsn, mode))
+    {
+      return gen_rtx_LO_SUM (Pmode,
+                             copy_to_mode_reg (Pmode,
+                                               gen_rtx_HIGH (Pmode, xinsn)),
+                             xinsn);
+    }
+
+  if (GET_CODE (xinsn) == PLUS)
+    {
+      rtx xplus0 = XEXP (xinsn, 0);
+      rtx xplus1 = XEXP (xinsn, 1);
+      enum rtx_code code0 = GET_CODE (xplus0);
+      enum rtx_code code1 = GET_CODE (xplus1);
+
+      if (code0 != REG && code1 == REG)
+        {
+          xplus0 = XEXP (xinsn, 1);
+          xplus1 = XEXP (xinsn, 0);
+          code0 = GET_CODE (xplus0);
+          code1 = GET_CODE (xplus1);
+        }
+
+      if (code0 == REG && REG_MODE_OK_FOR_BASE_P (xplus0, mode)
+          && code1 == CONST_INT && !SMALL_INT (xplus1))
+        {
+          rtx int_reg = gen_reg_rtx (Pmode);
+          rtx ptr_reg = gen_reg_rtx (Pmode);
+
+          emit_move_insn (int_reg,
+                          GEN_INT (INTVAL (xplus1) & ~ 0x7fff));
+
+          emit_insn (gen_rtx_SET (VOIDmode,
+                                  ptr_reg,
+                                  gen_rtx_PLUS (Pmode, xplus0, int_reg)));
+
+          return plus_constant (Pmode, ptr_reg, INTVAL (xplus1) & 0x7fff);
+        }
+    }
+
+  if (TARGET_DEBUG_B_MODE)
+    GO_PRINTF ("LEGITIMIZE_ADDRESS could not fix.\n");
+
+  return xinsn;
+}
+
+
+static bool
+iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		  int opno ATTRIBUTE_UNUSED, int * total,
+		  bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case MEM:
+      {
+	int num_words = (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ? 2 : 1;
+
+	if (simple_memory_operand (x, mode))
+	  return COSTS_N_INSNS (num_words);
+
+	* total = COSTS_N_INSNS (2 * num_words);
+	break;
+      }
+      
+    case FFS:
+      * total = COSTS_N_INSNS (6);
+      break;
+
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+      * total = COSTS_N_INSNS (mode == DImode ? 2 : 1);
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (mode == DImode)
+	* total = COSTS_N_INSNS ((GET_CODE (XEXP (x, 1)) == CONST_INT) ? 4 : 12);
+      else
+	* total = COSTS_N_INSNS (1);
+    break;								
+
+    case ABS:
+      if (mode == SFmode || mode == DFmode)
+	* total = COSTS_N_INSNS (1);
+      else
+	* total = COSTS_N_INSNS (4);
+      break;
+    
+    case PLUS:
+    case MINUS:
+      if (mode == SFmode || mode == DFmode)
+	* total = COSTS_N_INSNS (6);
+      else if (mode == DImode)
+	* total = COSTS_N_INSNS (4);
+      else
+	* total = COSTS_N_INSNS (1);
+      break;
+    
+    case NEG:
+      * total = (mode == DImode) ? 4 : 1;
+      break;
+
+    case MULT:
+      if (mode == SFmode)
+	* total = COSTS_N_INSNS (7);
+      else if (mode == DFmode)
+	* total = COSTS_N_INSNS (8);
+      else
+	* total = COSTS_N_INSNS (10);
+      break;
+
+    case DIV:
+    case MOD:
+      if (mode == SFmode)
+	* total = COSTS_N_INSNS (23);
+      else if (mode == DFmode)
+	* total = COSTS_N_INSNS (36);
+      else
+	* total = COSTS_N_INSNS (69);
+      break;
+      
+    case UDIV:
+    case UMOD:
+      * total = COSTS_N_INSNS (69);
+      break;
+      
+    case SIGN_EXTEND:
+      * total = COSTS_N_INSNS (2);
+      break;
+    
+    case ZERO_EXTEND:
+      * total = COSTS_N_INSNS (1);
+      break;
+
+    case CONST_INT:
+      * total = 0;
+      break;
+    
+    case LABEL_REF:
+      * total = COSTS_N_INSNS (2);
+      break;
+
+    case CONST:
+      {
+	rtx offset = const0_rtx;
+	rtx symref = eliminate_constant_term (XEXP (x, 0), & offset);
+
+	if (GET_CODE (symref) == LABEL_REF)
+	  * total = COSTS_N_INSNS (2);
+	else if (GET_CODE (symref) != SYMBOL_REF)
+	  * total = COSTS_N_INSNS (4);
+	/* Let's be paranoid....  */
+	else if (INTVAL (offset) < -32768 || INTVAL (offset) > 32767)
+	  * total = COSTS_N_INSNS (2);
+	else
+	  * total = COSTS_N_INSNS (SYMBOL_REF_FLAG (symref) ? 1 : 2);
+	break;
+      }
+
+    case SYMBOL_REF:
+      * total = COSTS_N_INSNS (SYMBOL_REF_FLAG (x) ? 1 : 2);
+      break;
+    
+    case CONST_DOUBLE:
+      {
+	rtx high, low;
+      
+	split_double (x, & high, & low);
+      
+	* total = COSTS_N_INSNS (  (high == CONST0_RTX (GET_MODE (high))
+				  || low == CONST0_RTX (GET_MODE (low)))
+				   ? 2 : 4);
+	break;
+      }
+    
+    default:
+      return false;
+    }
+  return true;
+}
+
+/* Worker for TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+iq2000_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.word\t0x03e00821\t\t# move   $1,$31\n");
+  fprintf (f, "\t.word\t0x04110001\t\t# bgezal $0,.+8\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# nop\n");
+  if (Pmode == DImode)
+    {
+      fprintf (f, "\t.word\t0xdfe30014\t\t# ld     $3,20($31)\n");
+      fprintf (f, "\t.word\t0xdfe2001c\t\t# ld     $2,28($31)\n");
+    }
+  else
+    {
+      fprintf (f, "\t.word\t0x8fe30014\t\t# lw     $3,20($31)\n");
+      fprintf (f, "\t.word\t0x8fe20018\t\t# lw     $2,24($31)\n");
+    }
+  fprintf (f, "\t.word\t0x0060c821\t\t# move   $25,$3 (abicalls)\n");
+  fprintf (f, "\t.word\t0x00600008\t\t# jr     $3\n");
+  fprintf (f, "\t.word\t0x0020f821\t\t# move   $31,$1\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# <function address>\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# <static chain value>\n");
+}
+
+/* Worker for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+iq2000_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_CODE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, Pmode, TRAMPOLINE_CODE_SIZE);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, Pmode,
+			TRAMPOLINE_CODE_SIZE + GET_MODE_SIZE (Pmode));
+  emit_move_insn (mem, chain_value);
+}
+
+#include "gt-iq2000.h"
diff --git a/gcc-4.9/gcc/config/iq2000/iq2000.h b/gcc-4.9/gcc/config/iq2000/iq2000.h
new file mode 100644
index 000000000..602898a08
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/iq2000.h
@@ -0,0 +1,871 @@
+/* Definitions of target machine for GNU compiler.  
+   Vitesse IQ2000 processors
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Driver configuration.  */
+
+/* A generic LIB_SPEC with -leval and --*group tacked on.  */
+#undef  LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:--start-group -lc -leval -lgcc --end-group}}"
+
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+
+/* Run-time target specifications.  */
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("__iq2000__"); 		\
+      builtin_assert ("cpu=iq2000"); 		\
+      builtin_assert ("machine=iq2000");	\
+    }                                           \
+  while (0)
+
+/* Macros used in the machine description to test the flags.  */
+
+#define TARGET_STATS		0
+
+#define TARGET_DEBUG_MODE	0
+#define TARGET_DEBUG_A_MODE	0
+#define TARGET_DEBUG_B_MODE	0
+#define TARGET_DEBUG_C_MODE	0
+#define TARGET_DEBUG_D_MODE	0
+
+#ifndef IQ2000_ISA_DEFAULT
+#define IQ2000_ISA_DEFAULT 1
+#endif
+
+/* Storage Layout.  */
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		1 
+#define WORDS_BIG_ENDIAN 		1
+#define BITS_PER_WORD 			32
+#define MAX_BITS_PER_WORD 		64
+#define UNITS_PER_WORD 			4
+#define MIN_UNITS_PER_WORD 		4
+#define POINTER_SIZE 			32
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   We promote any value smaller than SImode up to SImode.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)		\
+    (MODE) = SImode;
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 64
+
+#define FUNCTION_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 64
+
+#undef  DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE 		32
+#define SHORT_TYPE_SIZE 	16
+#define LONG_TYPE_SIZE 		32
+#define LONG_LONG_TYPE_SIZE 	64
+#define CHAR_TYPE_SIZE		BITS_PER_UNIT
+#define FLOAT_TYPE_SIZE 	32
+#define DOUBLE_TYPE_SIZE 	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+#define DEFAULT_SIGNED_CHAR	1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+
+/* Register Basics.  */
+
+/* On the IQ2000, we have 32 integer registers.  */
+#define FIRST_PSEUDO_REGISTER 33
+
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1			\
+}
+
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1			\
+}
+
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER							\
+{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,	\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31	\
+}
+
+
+/* How Values Fit in Registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+ ((REGNO_REG_CLASS (REGNO) == GR_REGS)				\
+  ? ((REGNO) & 1) == 0 || GET_MODE_SIZE (MODE) <= 4     	\
+  : ((REGNO) & 1) == 0 || GET_MODE_SIZE (MODE) == 4)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||			\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)		\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||			\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+#define AVOID_CCMODE_COPIES
+
+
+/* Register Classes.  */
+
+enum reg_class
+{
+  NO_REGS,			/* No registers in set.  */
+  GR_REGS,			/* Integer registers.  */
+  ALL_REGS,			/* All registers.  */
+  LIM_REG_CLASSES		/* Max value + 1.  */
+};
+
+#define GENERAL_REGS GR_REGS
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES						\
+{								\
+  "NO_REGS",							\
+  "GR_REGS",							\
+  "ALL_REGS"							\
+}
+
+#define REG_CLASS_CONTENTS					\
+{								\
+  { 0x00000000, 0x00000000 },	/* No registers,  */		\
+  { 0xffffffff, 0x00000000 },	/* Integer registers.  */	\
+  { 0xffffffff, 0x00000001 }	/* All registers.  */		\
+}
+
+#define REGNO_REG_CLASS(REGNO) \
+((REGNO) <= GP_REG_LAST + 1 ? GR_REGS : NO_REGS)
+
+#define BASE_REG_CLASS  (GR_REGS)
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define REGNO_OK_FOR_INDEX_P(regno)	0
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)				\
+  ((CLASS) != ALL_REGS						\
+   ? (CLASS)							\
+   : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT		\
+       || GET_MODE_CLASS (GET_MODE (X)) == MODE_COMPLEX_FLOAT)	\
+      ? (GR_REGS)						\
+      : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_INT		\
+	  || GET_MODE (X) == VOIDmode)				\
+	 ? (GR_REGS)						\
+	 : (CLASS))))
+
+
+/* Basic Stack Layout.  */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD 0
+
+#define STARTING_FRAME_OFFSET						\
+  (crtl->outgoing_args_size)
+
+/* Use the default value zero.  */
+/* #define STACK_POINTER_OFFSET 0 */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* The return address for the current frame is in r31 if this is a leaf
+   function.  Otherwise, it is on the stack.  It is at a variable offset
+   from sp/fp/ap, so we define a fake hard register rap which is a
+   pointer to the return address on the stack.  This always gets eliminated
+   during reload to be either the frame pointer or the stack pointer plus
+   an offset.  */
+
+#define RETURN_ADDR_RTX(count, frame)                                   \
+  (((count) == 0)                                                       \
+   ? (leaf_function_p ()                                                \
+      ? gen_rtx_REG (Pmode, GP_REG_FIRST + 31)                          \
+      : gen_rtx_MEM (Pmode, gen_rtx_REG (Pmode,                         \
+                                         RETURN_ADDRESS_POINTER_REGNUM))) \
+    : (rtx) 0)
+
+/* Before the prologue, RA lives in r31.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (VOIDmode, GP_REG_FIRST + 31)
+
+
+/* Register That Address the Stack Frame.  */
+
+#define STACK_POINTER_REGNUM 		(GP_REG_FIRST + 29)
+#define FRAME_POINTER_REGNUM 		(GP_REG_FIRST + 1)
+#define HARD_FRAME_POINTER_REGNUM 	(GP_REG_FIRST + 27)
+#define ARG_POINTER_REGNUM 		GP_REG_FIRST
+#define RETURN_ADDRESS_POINTER_REGNUM	RAP_REG_NUM
+#define STATIC_CHAIN_REGNUM 		(GP_REG_FIRST + 2)
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer.  */
+
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM},			\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, GP_REG_FIRST + 31},			\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			 \
+        (OFFSET) = iq2000_initial_elimination_offset ((FROM), (TO))
+
+/* Passing Function Arguments on the Stack.  */
+
+/* #define PUSH_ROUNDING(BYTES) 0 */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define REG_PARM_STACK_SPACE(FNDECL) 0
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+
+/* Function Arguments in Registers.  */
+
+#define MAX_ARGS_IN_REGISTERS 8
+
+typedef struct iq2000_args
+{
+  int gp_reg_found;		/* Whether a gp register was found yet.  */
+  unsigned int arg_number;	/* Argument number.  */
+  unsigned int arg_words;	/* # total words the arguments take.  */
+  unsigned int fp_arg_words;	/* # words for FP args (IQ2000_EABI only).  */
+  int last_arg_fp;		/* Nonzero if last arg was FP (EABI only).  */
+  int fp_code;			/* Mode of FP arguments.  */
+  unsigned int num_adjusts;	/* Number of adjustments made.  */
+				/* Adjustments made to args pass in regs.  */
+  rtx adjust[MAX_ARGS_IN_REGISTERS * 2];
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  init_cumulative_args (& CUM, FNTYPE, LIBNAME)				\
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE)				\
+  (! BYTES_BIG_ENDIAN							\
+   ? upward								\
+   : (((MODE) == BLKmode						\
+       ? ((TYPE) && TREE_CODE (TYPE_SIZE (TYPE)) == INTEGER_CST		\
+	  && int_size_in_bytes (TYPE) < (PARM_BOUNDARY / BITS_PER_UNIT))\
+       : (GET_MODE_BITSIZE (MODE) < PARM_BOUNDARY			\
+	  && (GET_MODE_CLASS (MODE) == MODE_INT)))			\
+      ? downward : upward))
+
+#define FUNCTION_ARG_REGNO_P(N)						\
+  (((N) >= GP_ARG_FIRST && (N) <= GP_ARG_LAST))			
+
+
+/* On the IQ2000, R2 and R3 are the only register thus used.  */
+
+#define FUNCTION_VALUE_REGNO_P(N) iq2000_function_value_regno_p (N)
+
+
+/* How Large Values are Returned.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Function Entry and Exit.  */
+
+#define EXIT_IGNORE_STACK 1
+
+
+/* Generating Code for Profiling.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+{									\
+  fprintf (FILE, "\t.set\tnoreorder\n");				\
+  fprintf (FILE, "\t.set\tnoat\n");					\
+  fprintf (FILE, "\tmove\t%s,%s\t\t# save current return address\n",	\
+	   reg_names[GP_REG_FIRST + 1], reg_names[GP_REG_FIRST + 31]);	\
+  fprintf (FILE, "\tjal\t_mcount\n");					\
+  fprintf (FILE,							\
+	   "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from  stack\n",	\
+	   "subu",							\
+	   reg_names[STACK_POINTER_REGNUM],				\
+	   reg_names[STACK_POINTER_REGNUM],				\
+	   Pmode == DImode ? 16 : 8);					\
+  fprintf (FILE, "\t.set\treorder\n");					\
+  fprintf (FILE, "\t.set\tat\n");					\
+}
+
+
+/* Trampolines for Nested Functions.  */
+
+#define TRAMPOLINE_CODE_SIZE  (8*4)
+#define TRAMPOLINE_SIZE       (TRAMPOLINE_CODE_SIZE + 2*GET_MODE_SIZE (Pmode))
+#define TRAMPOLINE_ALIGNMENT  GET_MODE_ALIGNMENT (Pmode)
+
+
+/* Addressing Modes.  */
+
+#define CONSTANT_ADDRESS_P(X)						\
+  (   (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define REG_OK_FOR_INDEX_P(X) 0
+
+
+/* Describing Relative Costs of Operations.  */
+
+#define REGISTER_MOVE_COST(MODE, FROM, TO)	2
+
+#define MEMORY_MOVE_COST(MODE,CLASS,TO_P)	\
+  (TO_P ? 2 : 16)
+
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE 1
+
+#define ADJUST_COST(INSN,LINK,DEP_INSN,COST)				\
+  if (REG_NOTE_KIND (LINK) != 0)					\
+    (COST) = 0; /* Anti or output dependence.  */
+
+
+/* Dividing the output into sections.  */
+
+#define TEXT_SECTION_ASM_OP	"\t.text"	/* Instructions.  */
+
+#define DATA_SECTION_ASM_OP	"\t.data"	/* Large data.  */
+
+
+/* The Overall Framework of an Assembler File.  */
+
+#define ASM_COMMENT_START " #"
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Output and Generation of Labels.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM))
+
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+
+/* Output of Assembler Instructions.  */
+
+#define REGISTER_NAMES							\
+{									\
+ "%0",   "%1",   "%2",   "%3",   "%4",   "%5",   "%6",   "%7",		\
+ "%8",   "%9",   "%10",  "%11",  "%12",  "%13",  "%14",  "%15",		\
+ "%16",  "%17",  "%18",  "%19",  "%20",  "%21",  "%22",  "%23",		\
+ "%24",  "%25",  "%26",  "%27",  "%28",  "%29",  "%30",  "%31",  "%rap"	\
+}
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "%0",	 0 + GP_REG_FIRST },					\
+  { "%1",	 1 + GP_REG_FIRST },					\
+  { "%2",	 2 + GP_REG_FIRST },					\
+  { "%3",	 3 + GP_REG_FIRST },					\
+  { "%4",	 4 + GP_REG_FIRST },					\
+  { "%5",	 5 + GP_REG_FIRST },					\
+  { "%6",	 6 + GP_REG_FIRST },					\
+  { "%7",	 7 + GP_REG_FIRST },					\
+  { "%8",	 8 + GP_REG_FIRST },					\
+  { "%9",	 9 + GP_REG_FIRST },					\
+  { "%10",	10 + GP_REG_FIRST },					\
+  { "%11",	11 + GP_REG_FIRST },					\
+  { "%12",	12 + GP_REG_FIRST },					\
+  { "%13",	13 + GP_REG_FIRST },					\
+  { "%14",	14 + GP_REG_FIRST },					\
+  { "%15",	15 + GP_REG_FIRST },					\
+  { "%16",	16 + GP_REG_FIRST },					\
+  { "%17",	17 + GP_REG_FIRST },					\
+  { "%18",	18 + GP_REG_FIRST },					\
+  { "%19",	19 + GP_REG_FIRST },					\
+  { "%20",	20 + GP_REG_FIRST },					\
+  { "%21",	21 + GP_REG_FIRST },					\
+  { "%22",	22 + GP_REG_FIRST },					\
+  { "%23",	23 + GP_REG_FIRST },					\
+  { "%24",	24 + GP_REG_FIRST },					\
+  { "%25",	25 + GP_REG_FIRST },					\
+  { "%26",	26 + GP_REG_FIRST },					\
+  { "%27",	27 + GP_REG_FIRST },					\
+  { "%28",	28 + GP_REG_FIRST },					\
+  { "%29",	29 + GP_REG_FIRST },					\
+  { "%30",	27 + GP_REG_FIRST },					\
+  { "%31",	31 + GP_REG_FIRST },					\
+  { "%rap",	32 + GP_REG_FIRST },					\
+}
+
+/* Check if the current insn needs a nop in front of it
+   because of load delays, and also update the delay slot statistics.  */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)			\
+  final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#define DBR_OUTPUT_SEQEND(STREAM)					\
+do									\
+  {									\
+    fputs ("\n", STREAM);						\
+  }									\
+while (0)
+
+#define LOCAL_LABEL_PREFIX	"$"
+
+#define USER_LABEL_PREFIX	""
+
+
+/* Output of dispatch tables.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do									\
+    {									\
+      fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	       Pmode == DImode ? ".dword" : ".word",			\
+	       LOCAL_LABEL_PREFIX, VALUE);				\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	   Pmode == DImode ? ".dword" : ".word",			\
+	   LOCAL_LABEL_PREFIX,						\
+	   VALUE)
+
+
+/* Assembler Commands for Alignment.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+  fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n",	\
+           (unsigned HOST_WIDE_INT)(SIZE))
+
+#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+  if ((LOG) != 0)                       				\
+    fprintf (STREAM, "\t.balign %d\n", 1<<(LOG))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+#define DEBUGGER_AUTO_OFFSET(X)  \
+  iq2000_debugger_offset (X, (HOST_WIDE_INT) 0)
+
+#define DEBUGGER_ARG_OFFSET(OFFSET, X)  \
+  iq2000_debugger_offset (X, (HOST_WIDE_INT) OFFSET)
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+
+
+/* Miscellaneous Parameters.  */
+
+#define CASE_VECTOR_MODE SImode
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define MOVE_MAX 4
+
+#define MAX_MOVE_MAX 8
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define STORE_FLAG_VALUE 1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE SImode
+
+/* IQ2000 external variables defined in iq2000.c.  */
+
+/* Comparison type.  */
+enum cmp_type
+{
+  CMP_SI,				/* Compare four byte integers.  */
+  CMP_DI,				/* Compare eight byte integers.  */
+  CMP_SF,				/* Compare single precision floats.  */
+  CMP_DF,				/* Compare double precision floats.  */
+  CMP_MAX				/* Max comparison type.  */
+};
+
+/* Types of delay slot.  */
+enum delay_type
+{
+  DELAY_NONE,				/* No delay slot.  */
+  DELAY_LOAD,				/* Load from memory delay.  */
+  DELAY_FCMP				/* Delay after doing c.<xx>.{d,s}.  */
+};
+
+/* Recast the cpu class to be the cpu attribute.  */
+#define iq2000_cpu_attr ((enum attr_cpu) iq2000_tune)
+
+#define BITMASK_UPPER16	((unsigned long) 0xffff << 16)	/* 0xffff0000 */
+#define BITMASK_LOWER16	((unsigned long) 0xffff)	/* 0x0000ffff */
+
+
+#define GENERATE_BRANCHLIKELY  (ISA_HAS_BRANCHLIKELY)
+
+/* Macros to decide whether certain features are available or not,
+   depending on the instruction set architecture level.  */
+
+#define BRANCH_LIKELY_P()	GENERATE_BRANCHLIKELY
+
+/* ISA has branch likely instructions.  */
+#define ISA_HAS_BRANCHLIKELY	(iq2000_isa == 1)
+
+
+#undef ASM_SPEC
+
+
+/* The mapping from gcc register number to DWARF 2 CFA column number.  */
+#define DWARF_FRAME_REGNUM(REG)        (REG)
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN (GP_REG_FIRST + 31)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + GP_ARG_FIRST : INVALID_REGNUM)
+
+/* The EH_RETURN_STACKADJ_RTX macro returns RTL which describes the
+   location used to store the amount to adjust the stack.  This is
+   usually a register that is available from end of the function's body
+   to the end of the epilogue. Thus, this cannot be a register used as a
+   temporary by the epilogue.
+
+   This must be an integer register.  */
+#define EH_RETURN_STACKADJ_REGNO        3
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, EH_RETURN_STACKADJ_REGNO)
+
+/* The EH_RETURN_HANDLER_RTX macro returns RTL which describes the
+   location used to store the address the processor should jump to
+   catch exception.  This is usually a registers that is available from
+   end of the function's body to the end of the epilogue. Thus, this
+   cannot be a register used as a temporary by the epilogue.
+
+   This must be an address register.  */
+#define EH_RETURN_HANDLER_REGNO         26
+#define EH_RETURN_HANDLER_RTX           \
+        gen_rtx_REG (Pmode, EH_RETURN_HANDLER_REGNO)
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.  */
+#define DWARF_CIE_DATA_ALIGNMENT 4
+
+/* For IQ2000, width of a floating point register.  */
+#define UNITS_PER_FPREG 4
+
+/* Force right-alignment for small varargs in 32 bit little_endian mode */
+
+#define PAD_VARARGS_DOWN !BYTES_BIG_ENDIAN
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register, a floating point register, a
+   multiply/divide register, or a status register.  */
+
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  31
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+
+#define RAP_REG_NUM   32
+#define AT_REGNUM	(GP_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO)	\
+  ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+
+/* IQ2000 registers used in prologue/epilogue code when the stack frame
+   is larger than 32K bytes.  These registers must come from the
+   scratch register set, and not used for passing and returning
+   arguments and any other information used in the calling sequence.  */
+
+#define IQ2000_TEMP1_REGNUM (GP_REG_FIRST + 12)
+#define IQ2000_TEMP2_REGNUM (GP_REG_FIRST + 13)
+
+/* This macro is used later on in the file.  */
+#define GR_REG_CLASS_P(CLASS)						\
+  ((CLASS) == GR_REGS)
+
+#define SMALL_INT(X) ((unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000)
+#define SMALL_INT_UNSIGNED(X) ((unsigned HOST_WIDE_INT) (INTVAL (X)) < 0x10000)
+
+/* Certain machines have the property that some registers cannot be
+   copied to some other registers without using memory.  Define this
+   macro on those machines to be a C expression that is nonzero if
+   objects of mode MODE in registers of CLASS1 can only be copied to
+   registers of class CLASS2 by storing a register of CLASS1 into
+   memory and loading that memory location into a register of CLASS2.
+
+   Do not define this macro if its value would always be zero.  */
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_UNITS(mode, size)						\
+  ((GET_MODE_SIZE (mode) + (size) - 1) / (size))
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.  */
+
+#define CLASS_CANNOT_CHANGE_MODE 0
+
+/* Defines illegal mode changes for CLASS_CANNOT_CHANGE_MODE.  */
+
+#define CLASS_CANNOT_CHANGE_MODE_P(FROM,TO) \
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO))
+
+/* Make sure 4 words are always allocated on the stack.  */
+
+#ifndef STACK_ARGS_ADJUST
+#define STACK_ARGS_ADJUST(SIZE)						\
+  {									\
+    if (SIZE.constant < 4 * UNITS_PER_WORD)				\
+      SIZE.constant = 4 * UNITS_PER_WORD;				\
+  }
+#endif
+
+
+/* Symbolic macros for the registers used to return integer and floating
+   point values.  */
+
+#define GP_RETURN (GP_REG_FIRST + 2)
+
+/* Symbolic macros for the first/last argument registers.  */
+
+#define GP_ARG_FIRST (GP_REG_FIRST + 4)
+#define GP_ARG_LAST  (GP_REG_FIRST + 11)
+
+#define MAX_ARGS_IN_REGISTERS	8
+
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
+
+#define MUST_SAVE_REGISTER(regno) \
+  ((df_regs_ever_live_p (regno) && !call_used_regs[regno])		\
+  || (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)	\
+   || (regno == (GP_REG_FIRST + 31) && df_regs_ever_live_p (GP_REG_FIRST + 31)))
+
+/* ALIGN FRAMES on double word boundaries */
+#ifndef IQ2000_STACK_ALIGN
+#define IQ2000_STACK_ALIGN(LOC) (((LOC) + 7) & ~7)
+#endif
+
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   These definitions are NOT overridden anywhere.  */
+
+#define BASE_REG_P(regno, mode)					\
+  (GP_REG_P (regno))
+
+#define GP_REG_OR_PSEUDO_STRICT_P(regno, mode)				    \
+  BASE_REG_P((regno < FIRST_PSEUDO_REGISTER) ? regno : reg_renumber[regno], \
+	     (mode))
+
+#define GP_REG_OR_PSEUDO_NONSTRICT_P(regno, mode) \
+  (((regno) >= FIRST_PSEUDO_REGISTER) || (BASE_REG_P ((regno), (mode))))
+
+#define REGNO_MODE_OK_FOR_BASE_P(regno, mode) \
+  GP_REG_OR_PSEUDO_STRICT_P ((int) (regno), (mode))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects them all.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Some source files that are used after register allocation
+   need to be strict.  */
+
+#ifndef REG_OK_STRICT
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  iq2000_reg_mode_ok_for_base_p (X, MODE, 0)
+#else
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  iq2000_reg_mode_ok_for_base_p (X, MODE, 1)
+#endif
+
+#if 1
+#define GO_PRINTF(x)	fprintf (stderr, (x))
+#define GO_PRINTF2(x,y)	fprintf (stderr, (x), (y))
+#define GO_DEBUG_RTX(x) debug_rtx (x)
+
+#else
+#define GO_PRINTF(x)
+#define GO_PRINTF2(x,y)
+#define GO_DEBUG_RTX(x)
+#endif
+
+/* If defined, modifies the length assigned to instruction INSN as a
+   function of the context in which it is used.  LENGTH is an lvalue
+   that contains the initially computed length of the insn and should
+   be updated with the correct length of the insn.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = iq2000_adjust_insn_length ((INSN), (LENGTH)))
+
+
+
+
+/* How to tell the debugger about changes of source files.  */
+
+#ifndef SET_FILE_NUMBER
+#define SET_FILE_NUMBER() ++ num_source_filenames
+#endif
+
+/* This is how to output a note the debugger telling it the line number
+   to which the following sequence of instructions corresponds.  */
+
+#ifndef LABEL_AFTER_LOC
+#define LABEL_AFTER_LOC(STREAM)
+#endif
+
+
+/* Default to -G 8 */
+#ifndef IQ2000_DEFAULT_GVALUE
+#define IQ2000_DEFAULT_GVALUE 8
+#endif
+
+#define SDATA_SECTION_ASM_OP	"\t.sdata"	/* Small data.  */
+
+
+/* Which instruction set architecture to use.  */
+extern int iq2000_isa;
+
+enum iq2000_builtins
+{
+  IQ2000_BUILTIN_ADO16,
+  IQ2000_BUILTIN_CFC0,
+  IQ2000_BUILTIN_CFC1,
+  IQ2000_BUILTIN_CFC2,
+  IQ2000_BUILTIN_CFC3,
+  IQ2000_BUILTIN_CHKHDR,
+  IQ2000_BUILTIN_CTC0,
+  IQ2000_BUILTIN_CTC1,
+  IQ2000_BUILTIN_CTC2,
+  IQ2000_BUILTIN_CTC3,
+  IQ2000_BUILTIN_LU,
+  IQ2000_BUILTIN_LUC32L,
+  IQ2000_BUILTIN_LUC64,
+  IQ2000_BUILTIN_LUC64L,
+  IQ2000_BUILTIN_LUK,
+  IQ2000_BUILTIN_LULCK,
+  IQ2000_BUILTIN_LUM32,
+  IQ2000_BUILTIN_LUM32L,
+  IQ2000_BUILTIN_LUM64,
+  IQ2000_BUILTIN_LUM64L,
+  IQ2000_BUILTIN_LUR,
+  IQ2000_BUILTIN_LURL,
+  IQ2000_BUILTIN_MFC0,
+  IQ2000_BUILTIN_MFC1,
+  IQ2000_BUILTIN_MFC2,
+  IQ2000_BUILTIN_MFC3,
+  IQ2000_BUILTIN_MRGB,
+  IQ2000_BUILTIN_MTC0,
+  IQ2000_BUILTIN_MTC1,
+  IQ2000_BUILTIN_MTC2,
+  IQ2000_BUILTIN_MTC3,
+  IQ2000_BUILTIN_PKRL,
+  IQ2000_BUILTIN_RAM,
+  IQ2000_BUILTIN_RB,
+  IQ2000_BUILTIN_RX,
+  IQ2000_BUILTIN_SRRD,
+  IQ2000_BUILTIN_SRRDL,
+  IQ2000_BUILTIN_SRULC,
+  IQ2000_BUILTIN_SRULCK,
+  IQ2000_BUILTIN_SRWR,
+  IQ2000_BUILTIN_SRWRU,
+  IQ2000_BUILTIN_TRAPQF,
+  IQ2000_BUILTIN_TRAPQFL,
+  IQ2000_BUILTIN_TRAPQN,
+  IQ2000_BUILTIN_TRAPQNE,
+  IQ2000_BUILTIN_TRAPRE,
+  IQ2000_BUILTIN_TRAPREL,
+  IQ2000_BUILTIN_WB,
+  IQ2000_BUILTIN_WBR,
+  IQ2000_BUILTIN_WBU,
+  IQ2000_BUILTIN_WX,
+  IQ2000_BUILTIN_SYSCALL
+};
diff --git a/gcc-4.9/gcc/config/iq2000/iq2000.md b/gcc-4.9/gcc/config/iq2000/iq2000.md
new file mode 100644
index 000000000..f7c7b34e1
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/iq2000.md
@@ -0,0 +1,2179 @@
+;;  iq2000.md	     Machine Description for Vitesse IQ2000 processors
+;;  Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(UNSPEC_ADO16 0)
+   (UNSPEC_RAM	1)
+   (UNSPEC_CHKHDR 2)
+   (UNSPEC_PKRL	3)
+   (UNSPEC_CFC0	4)
+   (UNSPEC_CFC1	5)
+   (UNSPEC_CFC2	6)
+   (UNSPEC_CFC3	7)
+   (UNSPEC_CTC0	8)
+   (UNSPEC_CTC1	9)
+   (UNSPEC_CTC2	10)
+   (UNSPEC_CTC3	11)
+   (UNSPEC_MFC0	12)
+   (UNSPEC_MFC1	13)
+   (UNSPEC_MFC2	14)
+   (UNSPEC_MFC3	15)
+   (UNSPEC_MTC0	16)
+   (UNSPEC_MTC1	17)
+   (UNSPEC_MTC2	18)
+   (UNSPEC_MTC3	19)
+   (UNSPEC_LUR	20)
+   (UNSPEC_RB	21)
+   (UNSPEC_RX	22)
+   (UNSPEC_SRRD	23)
+   (UNSPEC_SRWR	24)
+   (UNSPEC_WB	25)
+   (UNSPEC_WX	26)
+   (UNSPEC_LUC32 49)
+   (UNSPEC_LUC32L 27)
+   (UNSPEC_LUC64 28)
+   (UNSPEC_LUC64L 29)
+   (UNSPEC_LUK 30)
+   (UNSPEC_LULCK 31)
+   (UNSPEC_LUM32 32)
+   (UNSPEC_LUM32L 33)
+   (UNSPEC_LUM64 34)
+   (UNSPEC_LUM64L 35)
+   (UNSPEC_LURL 36)
+   (UNSPEC_MRGB 37)
+   (UNSPEC_SRRDL 38)
+   (UNSPEC_SRULCK 39)
+   (UNSPEC_SRWRU 40)
+   (UNSPEC_TRAPQFL 41)
+   (UNSPEC_TRAPQNE 42)
+   (UNSPEC_TRAPREL 43)
+   (UNSPEC_WBU 44)
+   (UNSPEC_SYSCALL 45)]
+)
+;; UNSPEC values used in iq2000.md
+;; Number	USE
+;; 0		movsi_ul
+;; 1		movsi_us, get_fnaddr
+;; 3		eh_set_return
+;; 20		builtin_setjmp_setup
+;;
+;; UNSPEC_VOLATILE values
+;; 0		blockage
+;; 2		loadgp
+;; 3		builtin_longjmp
+;; 4		exception_receiver
+;; 10		consttable_qi
+;; 11		consttable_hi
+;; 12		consttable_si
+;; 13		consttable_di
+;; 14		consttable_sf
+;; 15		consttable_df
+;; 16		align_2
+;; 17		align_4
+;; 18		align_8
+
+
+;; ....................
+;;
+;;	Attributes
+;;
+;; ....................
+
+;; Classification of each insn.
+;; branch	conditional branch
+;; jump		unconditional jump
+;; call		unconditional call
+;; load		load instruction(s)
+;; store	store instruction(s)
+;; move		data movement within same register set
+;; xfer		transfer to/from coprocessor
+;; arith	integer arithmetic instruction
+;; darith	double precision integer arithmetic instructions
+;; imul		integer multiply
+;; idiv		integer divide
+;; icmp		integer compare
+;; fadd		floating point add/subtract
+;; fmul		floating point multiply
+;; fmadd	floating point multiply-add
+;; fdiv		floating point divide
+;; fabs		floating point absolute value
+;; fneg		floating point negation
+;; fcmp		floating point compare
+;; fcvt		floating point convert
+;; fsqrt	floating point square root
+;; multi	multiword sequence (or user asm statements)
+;; nop		no operation
+
+(define_attr "type"
+  "unknown,branch,jump,call,load,store,move,xfer,arith,darith,imul,idiv,icmp,fadd,fmul,fmadd,fdiv,fabs,fneg,fcmp,fcvt,fsqrt,multi,nop"
+  (const_string "unknown"))
+
+;; Main data type used by the insn
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,SF,DF,FPSW" (const_string "unknown"))
+
+;; Length (in # of bytes).  A conditional branch is allowed only to a
+;; location within a signed 18-bit offset of the delay slot.  If that
+;; provides too small a range, we use the `j' instruction.  This
+;; instruction takes a 28-bit value, but that value is not an offset.
+;; Instead, it's bitwise-ored with the high-order four bits of the
+;; instruction in the delay slot, which means it cannot be used to
+;; cross a 256MB boundary.  We could fall back back on the jr,
+;; instruction which allows full access to the entire address space,
+;; but we do not do so at present.
+
+(define_attr "length" ""
+   (cond [(eq_attr "type" "branch")
+          (cond [(lt (abs (minus (match_dup 1) (plus (pc) (const_int 4))))
+                     (const_int 131072))
+                 (const_int 4)]
+	         (const_int 12))]
+          (const_int 4)))
+
+(define_attr "cpu"
+  "default,iq2000"
+  (const (symbol_ref "iq2000_cpu_attr")))
+
+;; Does the instruction have a mandatory delay slot? has_dslot
+;; Can the instruction be in a delay slot? ok_in_dslot
+;; Can the instruction not be in a delay slot? not_in_dslot
+(define_attr "dslot" "has_dslot,ok_in_dslot,not_in_dslot"
+  (if_then_else (eq_attr "type" "branch,jump,call,xfer,fcmp")
+		(const_string "has_dslot")
+		(const_string "ok_in_dslot")))
+
+;; Attribute defining whether or not we can use the branch-likely instructions
+
+(define_attr "branch_likely" "no,yes"
+  (const
+   (if_then_else (match_test "GENERATE_BRANCHLIKELY")
+		 (const_string "yes")
+		 (const_string "no"))))
+
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+
+
+;; .........................
+;;
+;;	Delay slots, can't describe load/fcmp/xfer delay slots here
+;;
+;; .........................
+
+(define_delay (eq_attr "type" "jump")
+  [(and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4"))
+   (nil)
+   (nil)])
+
+(define_delay (eq_attr "type" "branch")
+  [(and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4"))
+   (nil)
+   (and (eq_attr "branch_likely" "yes") (and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4")))])
+
+(define_delay (eq_attr "type" "call")
+  [(and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4"))
+   (nil)
+   (nil)])
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; .........................
+;;
+;;	Pipeline model
+;;
+;; .........................
+
+(define_automaton "iq2000")
+(define_cpu_unit "core,memory" "iq2000")
+
+(define_insn_reservation "nonmemory" 1
+  (eq_attr "type" "!load,move,store,xfer")
+  "core")
+
+(define_insn_reservation "iq2000_load_move" 3
+  (and (eq_attr "type" "load,move")
+       (eq_attr "cpu" "iq2000"))
+  "memory")
+
+(define_insn_reservation "other_load_move" 1
+  (and (eq_attr "type" "load,move")
+       (eq_attr "cpu" "!iq2000"))
+  "memory")
+
+(define_insn_reservation "store" 1
+  (eq_attr "type" "store")
+  "memory")
+
+(define_insn_reservation "xfer" 2
+  (eq_attr "type" "xfer")
+  "memory")
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL TRAPS
+;;
+;;  ....................
+;;
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "*
+{
+  return \"break\";
+}")
+
+;;
+;;  ....................
+;;
+;;	ADDITION
+;;
+;;  ....................
+;;
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ")
+		 (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "addsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ,dJ")
+		 (match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   addu\\t%0,%z1,%2
+   addiu\\t%0,%z1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	SUBTRACTION
+;;
+;;  ....................
+;;
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ")
+		  (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "subsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ,dJ")
+		  (match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   subu\\t%0,%z1,%2
+   addiu\\t%0,%z1,%n2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	NEGATION and ONE'S COMPLEMENT
+;;
+;;  ....................
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "*
+{
+  operands[2] = const0_rtx;
+  return \"subu\\t%0,%z2,%1\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(not:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "*
+{
+  operands[2] = const0_rtx;
+  return \"nor\\t%0,%z2,%1\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	LOGICAL
+;;
+;;  ....................
+;;
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(and:SI (match_operand:SI 1 "uns_arith_operand" "%d,d,d")
+		(match_operand:SI 2 "nonmemory_operand" "d,K,N")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(and:SI (match_operand:SI 1 "uns_arith_operand" "%d,d,d")
+		(match_operand:SI 2 "nonmemory_operand" "d,K,N")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"and\\t%0,%1,%2\";
+  else if (which_alternative == 1)
+    return \"andi\\t%0,%1,%x2\";
+  else if (which_alternative == 2)
+    {
+      if ((INTVAL (operands[2]) & 0xffff) == 0xffff)
+	{
+	  operands[2] = GEN_INT (INTVAL (operands[2]) >> 16);
+	  return \"andoui\\t%0,%1,%x2\";
+	}
+      else
+	{
+	  operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffff);
+	  return \"andoi\\t%0,%1,%x2\";
+	}
+    }
+  else
+    gcc_unreachable ();
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ior:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ior:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "@
+   or\\t%0,%1,%2
+   ori\\t%0,%1,%x2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(xor:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(xor:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "@
+   xor\\t%0,%1,%2
+   xori\\t%0,%1,%x2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "*norsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "d"))
+		(not:SI (match_operand:SI 2 "register_operand" "d"))))]
+  ""
+  "nor\\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	ZERO EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+;; Those for integer source operand are ordered widest source type first.
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"andi\\t%0,%1,0xffff\";
+  else
+    return iq2000_move_1word (operands, insn, TRUE);
+}"
+  [(set_attr "type"	"arith,load,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"andi\\t%0,%1,0x00ff\";
+  else
+    return iq2000_move_1word (operands, insn, TRUE);
+}"
+  [(set_attr "type"	"arith,load,load")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"4,4,8")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"andi\\t%0,%1,0x00ff\";
+  else
+    return iq2000_move_1word (operands, insn, TRUE);
+}"
+  [(set_attr "type"	"arith,load,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8")])
+
+;;
+;;  ....................
+;;
+;;	SIGN EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+;; Those for integer source operand are ordered widest source type first.
+
+;; These patterns originally accepted general_operands, however, slightly
+;; better code is generated by only accepting register_operands, and then
+;; letting combine generate the lh and lb insns.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+  "
+{
+  if (optimize && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_not_mem (operands[1]);
+
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx op1   = gen_lowpart (SImode, operands[1]);
+      rtx temp  = gen_reg_rtx (SImode);
+      rtx shift = GEN_INT (16);
+
+      emit_insn (gen_ashlsi3 (temp, op1, shift));
+      emit_insn (gen_ashrsi3 (operands[0], temp, shift));
+      DONE;
+    }
+}")
+
+(define_insn "extendhisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "R,m")))]
+  ""
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "
+{
+  if (optimize && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_not_mem (operands[1]);
+
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx op0   = gen_lowpart (SImode, operands[0]);
+      rtx op1   = gen_lowpart (SImode, operands[1]);
+      rtx temp  = gen_reg_rtx (SImode);
+      rtx shift = GEN_INT (24);
+
+      emit_insn (gen_ashlsi3 (temp, op1, shift));
+      emit_insn (gen_ashrsi3 (op0, temp, shift));
+      DONE;
+    }
+}")
+
+(define_insn "extendqihi2_internal"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+	(sign_extend:HI (match_operand:QI 1 "memory_operand" "R,m")))]
+  ""
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "
+{
+  if (optimize && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_not_mem (operands[1]);
+
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx op1   = gen_lowpart (SImode, operands[1]);
+      rtx temp  = gen_reg_rtx (SImode);
+      rtx shift = GEN_INT (24);
+
+      emit_insn (gen_ashlsi3 (temp, op1, shift));
+      emit_insn (gen_ashrsi3 (operands[0], temp, shift));
+      DONE;
+    }
+}")
+
+(define_insn "extendqisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "R,m")))]
+  ""
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+;;
+;;  ........................
+;;
+;;      BIT FIELD EXTRACTION
+;;
+;;  ........................
+
+(define_insn "extzv"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+                         (match_operand:SI 2 "const_int_operand" "O")
+                         (match_operand:SI 3 "const_int_operand" "O")))]
+  ""
+  "*
+{
+  int value[4];
+  value[2] = INTVAL (operands[2]);
+  value[3] = INTVAL (operands[3]);
+  operands[2] = GEN_INT ((value[3]));
+  operands[3] = GEN_INT ((32 - value[2]));
+  return \"ram\\t%0,%1,%2,%3,0x0\";  
+}"
+  [(set_attr "type" "arith")])
+
+;;
+;;  ....................
+;;
+;;	DATA MOVEMENT
+;;
+;;  ....................
+
+/* Take care of constants that don't fit in single instruction */
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  "(reload_in_progress || reload_completed)
+   && large_int (operands[1], SImode)"
+
+  [(set (match_dup 0 )
+        (high:SI (match_dup 1)))
+   (set (match_dup 0 )
+        (lo_sum:SI (match_dup 0)
+                   (match_dup 1)))]
+)
+
+;; ??? iq2000_move_1word has support for HIGH, so this pattern may be
+;; unnecessary.
+
+(define_insn "high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" "")))]
+  ""
+  "lui\\t%0,%%hi(%1) # high"
+  [(set_attr "type"	"move")])
+
+(define_insn "low"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+  "addiu\\t%0,%1,%%lo(%2) # low"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; 32-bit Integer moves
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "large_int" ""))]
+  "reload_in_progress | reload_completed"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+     	(ior:SI (match_dup 0)
+		(match_dup 3)))]
+  "
+{
+  operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1])
+					     & BITMASK_UPPER16,
+					     SImode));
+  operands[3] = GEN_INT (INTVAL (operands[1]) & BITMASK_LOWER16);
+}")
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (iq2000_check_split (operands[1], SImode))
+    {
+      enum machine_mode mode = GET_MODE (operands[0]);
+      rtx tem = ((reload_in_progress | reload_completed)
+		 ? operands[0] : gen_reg_rtx (mode));
+
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_HIGH (mode, operands[1])));
+
+      operands[1] = gen_rtx_LO_SUM (mode, tem, operands[1]);
+    }
+
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], SImode)
+      && !register_operand (operands[1], SImode)
+      && (GET_CODE (operands[1]) != CONST_INT
+	  || INTVAL (operands[1]) != 0))
+    {
+      rtx temp = force_reg (SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+
+  /* Take care of constants that don't fit in single instruction */
+  if ((reload_in_progress || reload_completed)
+      && CONSTANT_P (operands[1])
+      && GET_CODE (operands[1]) != HIGH
+      && GET_CODE (operands[1]) != LO_SUM
+      && ! SMALL_INT_UNSIGNED (operands[1]))
+    {
+      rtx tem = ((reload_in_progress | reload_completed)
+		 ? operands[0] : gen_reg_rtx (SImode));
+
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_HIGH (SImode, operands[1])));
+      operands[1] = gen_rtx_LO_SUM (SImode, tem, operands[1]);
+    }
+}")
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "movsi_internal2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+	(match_operand:SI 1 "move_operand" "d,IKL,Mnis,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode)
+       || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))"
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"move,arith,arith,load,load,store,store")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8,8,8,4,8")])
+
+;; 16-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], HImode)
+      && !register_operand (operands[1], HImode)
+      && ((GET_CODE (operands[1]) != CONST_INT
+	  || INTVAL (operands[1]) != 0)))
+    {
+      rtx temp = force_reg (HImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "movhi_internal2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,R,m")
+	(match_operand:HI 1 "general_operand"       "d,IK,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode)
+       || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))"
+  "* return iq2000_move_1word (operands, insn, TRUE);"
+  [(set_attr "type"	"move,arith,load,load,store,store")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"4,4,4,8,4,8")])
+
+;; 8-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], QImode)
+      && !register_operand (operands[1], QImode)
+      && (GET_CODE (operands[1]) != CONST_INT
+          || INTVAL (operands[1]) != 0))
+    {
+      rtx temp = force_reg (QImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "movqi_internal2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,m")
+	(match_operand:QI 1 "general_operand"       "d,IK,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode)
+       || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))"
+  "* return iq2000_move_1word (operands, insn, TRUE);"
+  [(set_attr "type"	"move,arith,load,load,store,store")
+   (set_attr "mode"	"QI")
+   (set_attr "length"	"4,4,4,8,4,8")])
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+        (match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+         || GET_CODE (operands[1]) == CONST_DOUBLE))
+    operands[1] = copy_to_mode_reg (SFmode, operands[1]);
+
+  /* Take care of reg <- SF constant */
+  if ( const_double_operand (operands[1], GET_MODE (operands[1]) ) )
+    {
+      emit_insn (gen_movsf_high (operands[0], operands[1]));
+      emit_insn (gen_movsf_lo_sum (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_insn "movsf_lo_sum"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (lo_sum:SF (match_operand:SF 1 "register_operand" "r")
+                   (match_operand:SF 2 "const_double_operand" "")))]
+  ""
+  "*
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[2]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[2] = GEN_INT (i);
+  return \"addiu\\t%0,%1,%%lo(%2) # low\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_insn "movsf_high"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (high:SF (match_operand:SF 1 "const_double_operand" "")))]
+  ""
+  "*
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[1] = GEN_INT (i);
+  return \"lui\\t%0,%%hi(%1) # high\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m")
+        (match_operand:SF 1 "nonimmediate_operand" "r,m,r"))]
+  "!memory_operand (operands[0], SFmode) || !memory_operand (operands[1], SFmode)"
+  "*
+{
+  iq2000_fill_delay_slot (\"\", DELAY_LOAD, operands, insn);  
+  if (which_alternative == 0)
+    return \"or\\t%0,%1,%1\";
+  else if (which_alternative == 1)
+    return \"lw\\t%0,%1\";
+  else if (which_alternative == 2)
+    return \"sw\\t%1,%0\";
+  else
+    gcc_unreachable ();
+}"
+  [(set_attr "length" "4,4,4")
+   (set_attr "type" "arith,load,store")]
+)
+
+;;
+;;  ....................
+;;
+;;	SHIFTS
+;;
+;;  ....................
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                   (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "ashlsi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return \"sll\\t%0,%1,%2\";
+    }
+  else
+    return \"sllv\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "ashrsi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return \"sra\\t%0,%1,%2\";
+    }
+  else
+    return \"srav\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "lshrsi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return \"srl\\t%0,%1,%2\";
+    }
+  else
+    return \"srlv\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; Rotate Right
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (rotatert:SI (match_operand:SI 1 "register_operand" "r")
+                     (match_operand:SI 2 "uns_arith_operand" "O")))]
+  ""
+  "ram %0,%1,%2,0x0,0x0"
+  [(set_attr "type" "arith")])
+
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL BRANCHES
+;;
+;;  ....................
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "ordered_comparison_operator"
+			 [(match_operand:SI 1 "register_operand")
+			  (match_operand:SI 2 "reg_or_const_operand")])
+	 (label_ref (match_operand 3 ""))
+	 (pc)))]
+  ""
+  "
+{
+  gen_conditional_branch (operands, SImode);
+  DONE;
+}")
+
+
+;; Conditional branches on comparisons with zero.
+
+(define_insn "branch_zero"
+  [(set (pc)
+	(if_then_else
+         (match_operator 0 "cmp_op"
+			 [(match_operand:SI 2 "register_operand" "d")
+			  (const_int 0)])
+        (label_ref (match_operand 1 "" ""))
+        (pc)))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/0,
+					 /*float_p=*/0,
+					 /*inverted_p=*/0,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+(define_insn "branch_zero_inverted"
+  [(set (pc)
+	(if_then_else
+         (match_operator 0 "cmp_op"
+			 [(match_operand:SI 2 "register_operand" "d")
+			  (const_int 0)])
+        (pc)
+        (label_ref (match_operand 1 "" ""))))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/0,
+					 /*float_p=*/0,
+					 /*inverted_p=*/1,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+;; Conditional branch on equality comparison.
+
+(define_insn "branch_equality"
+  [(set (pc)
+	(if_then_else
+         (match_operator 0 "equality_op"
+			 [(match_operand:SI 2 "register_operand" "d")
+			  (match_operand:SI 3 "register_operand" "d")])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/1,
+					 /*float_p=*/0,
+					 /*inverted_p=*/0,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+(define_insn "branch_equality_inverted"
+  [(set (pc)
+	(if_then_else
+         (match_operator 0 "equality_op"
+			 [(match_operand:SI 2 "register_operand" "d")
+			  (match_operand:SI 3 "register_operand" "d")])
+         (pc)
+         (label_ref (match_operand 1 "" ""))))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/1,
+					 /*float_p=*/0,
+					 /*inverted_p=*/1,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+
+;; Recognize bbi and bbin instructions.  These use two unusual template
+;; patterns, %Ax and %Px.  %Ax outputs an 'i' if operand `x' is a LABEL_REF
+;; otherwise it outputs an 'in'.  %Px does nothing if `x' is PC 
+;; and outputs the operand if `x' is a LABEL_REF.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (sign_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A2\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (sign_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A3\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  "0"
+  "bb%A2\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  "0"
+  "bb%A3\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (and:SI (match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "power_of_2_operand" "I"))
+	      (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A3\\t%0(%p1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (and:SI (match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "power_of_2_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A2\\t%0(%p1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+;;
+;;  ....................
+;;
+;;	SETTING A REGISTER FROM A COMPARISON
+;;
+;;  ....................
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "register_operand")
+	  (match_operand:SI 3 "reg_or_const_operand")]))]
+  ""
+  "
+{
+  gen_int_relational (GET_CODE (operands[1]), operands[0],
+		      operands[2], operands[3], (int *)0);
+  DONE;
+}")
+
+(define_insn "seq_si_zero"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(eq:SI (match_operand:SI 1 "register_operand" "d")
+	       (const_int 0)))]
+  ""
+  "sltiu\\t%0,%1,1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sne_si_zero"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI (match_operand:SI 1 "register_operand" "d")
+	       (const_int 0)))]
+  ""
+  "sltu\\t%0,%.,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sgt_si"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(gt:SI (match_operand:SI 1 "register_operand" "d,d")
+	       (match_operand:SI 2 "reg_or_0_operand" "d,J")))]
+  ""
+  "@
+   slt\\t%0,%z2,%1
+   slt\\t%0,%z2,%1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI,SI")])
+
+(define_insn "slt_si"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(lt:SI (match_operand:SI 1 "register_operand" "d,d")
+	       (match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   slt\\t%0,%1,%2
+   slti\\t%0,%1,%2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI,SI")])
+
+(define_insn "sle_si_const"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(le:SI (match_operand:SI 1 "register_operand" "d")
+	       (match_operand:SI 2 "small_int" "I")))]
+  "INTVAL (operands[2]) < 32767"
+  "*
+{
+  operands[2] = GEN_INT (INTVAL (operands[2])+1);
+  return \"slti\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sgtu_si"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(gtu:SI (match_operand:SI 1 "register_operand" "d")
+		(match_operand:SI 2 "reg_or_0_operand" "dJ")))]
+  ""
+  "sltu\\t%0,%z2,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sltu_si"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(ltu:SI (match_operand:SI 1 "register_operand" "d,d")
+		(match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   sltu\\t%0,%1,%2
+   sltiu\\t%0,%1,%2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI,SI")])
+
+(define_insn "sleu_si_const"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(leu:SI (match_operand:SI 1 "register_operand" "d")
+		(match_operand:SI 2 "small_int" "I")))]
+  "INTVAL (operands[2]) < 32767"
+  "*
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+  return \"sltiu\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+
+;;
+;;  ....................
+;;
+;;	UNCONDITIONAL BRANCHES
+;;
+;;  ....................
+
+;; Unconditional branches.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[0]) == REG)
+    return \"j\\t%0\";
+  return \"j\\t%l0\";
+  /* return \"b\\t%l0\";*/
+}"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "d"))]
+  ""
+  "
+{
+  rtx dest;
+
+  if (operands[0])		/* eliminate unused code warnings */
+    {
+      dest = operands[0];
+      if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+	operands[0] = copy_to_mode_reg (Pmode, dest);
+
+      if (!(Pmode == DImode))
+	emit_jump_insn (gen_indirect_jump_internal1 (operands[0]));
+      else
+	internal_error (\"unimplemented functionality\");
+
+      DONE;
+    }
+}")
+
+(define_insn "indirect_jump_internal1"
+  [(set (pc) (match_operand:SI 0 "register_operand" "d"))]
+  "!(Pmode == DImode)"
+  "j\\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "tablejump"
+  [(set (pc)
+	(match_operand 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "
+{
+  if (operands[0])		/* eliminate unused code warnings */
+    {
+      gcc_assert (GET_MODE (operands[0]) == Pmode);
+
+      if (!(Pmode == DImode))
+	emit_jump_insn (gen_tablejump_internal1 (operands[0], operands[1]));
+      else
+	internal_error (\"unimplemented functionality\");
+
+      DONE;
+    }
+}")
+
+(define_insn "tablejump_internal1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "!(Pmode == DImode)"
+  "j\\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "tablejump_internal3"
+  [(parallel [(set (pc)
+		   (plus:SI (match_operand:SI 0 "register_operand" "d")
+			    (label_ref:SI (match_operand 1 "" ""))))
+	      (use (label_ref:SI (match_dup 1)))])]
+  ""
+  "")
+
+;;; Make sure that this only matches the insn before ADDR_DIFF_VEC.  Otherwise
+;;; it is not valid.  ??? With the USE, the condition tests may not be required
+;;; any longer.
+
+;;; ??? The length depends on the ABI.  It is two for o32, and one for n32.
+;;; We just use the conservative number here.
+
+(define_insn ""
+  [(set (pc)
+	(plus:SI (match_operand:SI 0 "register_operand" "d")
+		 (label_ref:SI (match_operand 1 "" ""))))
+   (use (label_ref:SI (match_dup 1)))]
+  "!(Pmode == DImode) && NEXT_INSN (operands[1]) != 0
+   && GET_CODE (PATTERN (NEXT_INSN (operands[1]))) == ADDR_DIFF_VEC"
+  "*
+{
+  return \"j\\t%0\";
+}"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"8")])
+
+;;
+;;  ....................
+;;
+;;	Function prologue/epilogue
+;;
+;;  ....................
+;;
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  if (iq2000_isa >= 0)		/* avoid unused code warnings */
+    {
+      iq2000_expand_prologue ();
+      DONE;
+    }
+}")
+
+;; Block any insns from being moved before this point, since the
+;; profiling call to mcount can use various registers that aren't
+;; saved or used to pass arguments.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "type"	"unknown")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  "
+{
+  if (iq2000_isa >= 0)            /* avoid unused code warnings */
+    {
+      iq2000_expand_epilogue ();
+      DONE;
+    }
+}")
+
+;; Trivial return.  Make it look like a normal return insn as that
+;; allows jump optimizations to work better .
+(define_insn "return"
+  [(return)]
+  "iq2000_can_use_return_insn ()"
+  "j\\t%%31"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+;; Normal return.
+
+(define_insn "return_internal"
+  [(use (match_operand 0 "pmode_register_operand" ""))
+   (return)]
+  ""
+  "*
+{
+  return \"j\\t%0\";
+}"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_insn "eh_return_internal"
+  [(const_int 4)
+   (return)
+   (use (reg:SI 26))
+   (use (reg:SI 31))]
+  ""
+  "j\\t%%26"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "eh_return"
+  [(use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "
+{
+  iq2000_expand_eh_return (operands[0]);
+  DONE;
+}")
+
+
+;;
+;;  ....................
+;;
+;;	FUNCTION CALLS
+;;
+;;  ....................
+
+;; calls.c now passes a third argument, make saber happy
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "m")
+		    (match_operand 1 "" "i"))
+	      (clobber (reg:SI 31))
+	      (use (match_operand 2 "" ""))		;; next_arg_reg
+	      (use (match_operand 3 "" ""))])]		;; struct_value_size_rtx
+  ""
+  "
+{
+  rtx addr;
+
+  if (operands[0])		/* eliminate unused code warnings */
+    {
+      addr = XEXP (operands[0], 0);
+      if ((GET_CODE (addr) != REG && (!CONSTANT_ADDRESS_P (addr)))
+	  || ! call_insn_operand (addr, VOIDmode))
+	XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+
+      /* In order to pass small structures by value in registers
+	 compatibly with the IQ2000 compiler, we need to shift the value
+	 into the high part of the register.  Function_arg has encoded
+	 a PARALLEL rtx, holding a vector of adjustments to be made
+	 as the next_arg_reg variable, so we split up the insns,
+	 and emit them separately.  */
+
+      if (operands[2] != (rtx)0 && GET_CODE (operands[2]) == PARALLEL)
+	{
+	  rtvec adjust = XVEC (operands[2], 0);
+	  int num = GET_NUM_ELEM (adjust);
+	  int i;
+
+	  for (i = 0; i < num; i++)
+	    emit_insn (RTVEC_ELT (adjust, i));
+	}
+
+      emit_call_insn (gen_call_internal0 (operands[0], operands[1],
+					  gen_rtx_REG (SImode,
+						       GP_REG_FIRST + 31)));
+      DONE;
+    }
+}")
+
+(define_expand "call_internal0"
+  [(parallel [(call (match_operand 0 "" "")
+		    (match_operand 1 "" ""))
+	      (clobber (match_operand:SI 2 "" ""))])]
+  ""
+  "")
+
+(define_insn "call_internal1"
+  [(call (mem (match_operand 0 "call_insn_operand" "ri"))
+	 (match_operand 1 "" "i"))
+   (clobber (match_operand:SI 2 "register_operand" "=d"))]
+  ""
+  "*
+{
+  register rtx target = operands[0];
+
+  if (GET_CODE (target) == CONST_INT)
+    return \"li\\t%@,%0\\n\\tjalr\\t%2,%@\";
+  else if (CONSTANT_ADDRESS_P (target))
+    return \"jal\\t%0\";
+  else
+    return \"jalr\\t%2,%0\";
+}"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")])
+
+;; calls.c now passes a fourth argument, make saber happy
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "=d")
+		   (call (match_operand 1 "memory_operand" "m")
+			 (match_operand 2 "" "i")))
+	      (clobber (reg:SI 31))
+	      (use (match_operand 3 "" ""))])]		;; next_arg_reg
+  ""
+  "
+{
+  rtx addr;
+
+  if (operands[0])		/* eliminate unused code warning */
+    {
+      addr = XEXP (operands[1], 0);
+      if ((GET_CODE (addr) != REG && (!CONSTANT_ADDRESS_P (addr)))
+	  || ! call_insn_operand (addr, VOIDmode))
+	XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+
+      /* In order to pass small structures by value in registers
+	 compatibly with the IQ2000 compiler, we need to shift the value
+	 into the high part of the register.  Function_arg has encoded
+	 a PARALLEL rtx, holding a vector of adjustments to be made
+	 as the next_arg_reg variable, so we split up the insns,
+	 and emit them separately.  */
+
+      if (operands[3] != (rtx)0 && GET_CODE (operands[3]) == PARALLEL)
+	{
+	  rtvec adjust = XVEC (operands[3], 0);
+	  int num = GET_NUM_ELEM (adjust);
+	  int i;
+
+	  for (i = 0; i < num; i++)
+	    emit_insn (RTVEC_ELT (adjust, i));
+	}
+
+      if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) > 1)
+	{
+	  emit_call_insn (gen_call_value_multiple_internal0
+			  (XEXP (XVECEXP (operands[0], 0, 0), 0),
+			   operands[1], operands[2],
+			   XEXP (XVECEXP (operands[0], 0, 1), 0),
+			   gen_rtx_REG (SImode, GP_REG_FIRST + 31)));
+	  DONE;
+	}
+
+      /* We have a call returning a DImode structure in an FP reg.
+	 Strip off the now unnecessary PARALLEL.  */
+      if (GET_CODE (operands[0]) == PARALLEL)
+	operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0);
+
+      emit_call_insn (gen_call_value_internal0 (operands[0], operands[1], operands[2],
+					        gen_rtx_REG (SImode,
+							     GP_REG_FIRST + 31)));
+
+      DONE;
+    }
+}")
+
+(define_expand "call_value_internal0"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "" "")
+			 (match_operand 2 "" "")))
+	      (clobber (match_operand:SI 3 "" ""))])]
+  ""
+  "")
+
+(define_insn "call_value_internal1"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem (match_operand 1 "call_insn_operand" "r"))
+              (match_operand 2 "" "i")))
+   (clobber (match_operand:SI 3 "register_operand" "=d"))]
+  ""
+  "*
+{
+  register rtx target = operands[1];
+
+  if (GET_CODE (target) == CONST_INT)
+    return \"li\\t%@,%1\\n\\tjalr\\t%3,%@\";
+  else if (CONSTANT_ADDRESS_P (target))
+    return \"jal\\t%1\";
+  else
+    return \"jalr\\t%3,%1\";
+}"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")])
+
+(define_expand "call_value_multiple_internal0"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "" "")
+			 (match_operand 2 "" "")))
+	      (set (match_operand 3 "" "")
+		   (call (match_dup 1)
+			 (match_dup 2)))
+	      (clobber (match_operand:SI 4 "" ""))])]
+  ""
+  "")
+
+;; ??? May eventually need all 6 versions of the call patterns with multiple
+;; return values.
+
+(define_insn "call_value_multiple_internal1"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem (match_operand 1 "call_insn_operand" "r"))
+              (match_operand 2 "" "i")))
+   (set (match_operand 3 "register_operand" "=d")
+   	(call (mem (match_dup 1))
+              (match_dup 2)))
+  (clobber (match_operand:SI 4 "register_operand" "=d"))]
+  ""
+  "*
+{
+  register rtx target = operands[1];
+
+  if (GET_CODE (target) == CONST_INT)
+    return \"li\\t%@,%1\\n\\tjalr\\t%4,%@\";
+  else if (CONSTANT_ADDRESS_P (target))
+    return \"jal\\t%1\";
+  else
+    return \"jalr\\t%4,%1\";
+}"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  if (operands[0])		/* silence statement not reached warnings */
+    {
+      int i;
+
+      emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+      for (i = 0; i < XVECLEN (operands[2], 0); i++)
+	{
+	  rtx set = XVECEXP (operands[2], 0, i);
+	  emit_move_insn (SET_DEST (set), SET_SRC (set));
+	}
+
+      emit_insn (gen_blockage ());
+      DONE;
+    }
+}")
+
+;;
+;;  ....................
+;;
+;;	MISC.
+;;
+;;  ....................
+;;
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")])
+
+
+;; For the rare case where we need to load an address into a register
+;; that cannot be recognized by the normal movsi/addsi instructions.
+;; I have no idea how many insns this can actually generate.  It should
+;; be rare, so over-estimating as 10 instructions should not have any
+;; real performance impact.
+(define_insn "leasi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (match_operand:SI 1 "address_operand" "p"))]
+  "Pmode == SImode"
+  "*
+{
+  rtx xoperands [3];
+
+  xoperands[0] = operands[0];
+  xoperands[1] = XEXP (operands[1], 0);
+  xoperands[2] = XEXP (operands[1], 1);
+  output_asm_insn (\"addiu\\t%0,%1,%2\", xoperands);
+  return \"\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"40")])
+
+(define_insn "ado16"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")]
+		UNSPEC_ADO16))]
+  ""
+  "ado16\\t%0, %1, %2"
+)
+
+(define_insn "ram"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	      (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		                (match_operand:SI 2 "const_int_operand" "I")
+		                (match_operand:SI 3 "const_int_operand" "I")
+		                (match_operand:SI 4 "const_int_operand" "I")]
+		     UNSPEC_RAM))]
+  ""
+  "ram\\t%0, %1, %2, %3, %4"
+)
+
+(define_insn "chkhdr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "=r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_CHKHDR)]
+  ""
+  "* return iq2000_fill_delay_slot (\"chkhdr\\t%0, %1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "pkrl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_PKRL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"pkrl\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "cfc0"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+    (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC0))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc0\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "cfc1"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC1))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc1\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "cfc2"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC2))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc2\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "cfc3"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC3))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc3\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "ctc0"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC0)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc0\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "ctc1"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC1)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc1\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "ctc2"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC2)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc2\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "ctc3"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC3)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc3\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mfc0"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC0))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc0\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mfc1"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC1))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc1\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mfc2"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC2))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc2\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "mfc3"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC3))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc3\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "mtc0"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC0)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc0\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mtc1"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC1)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc1\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mtc2"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC2)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc2\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mtc3"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC3)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc3\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "lur"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUR)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lur\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "rb"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_RB)]
+  ""
+  "* return iq2000_fill_delay_slot (\"rb\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "rx"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_RX)]
+  ""
+  "* return iq2000_fill_delay_slot (\"rx\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srrd"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_SRRD)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srrd\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srwr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_SRWR)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srwr\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "wb"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_WB)]
+  ""
+  "* return iq2000_fill_delay_slot (\"wb\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "wx"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_WX)]
+  ""
+  "* return iq2000_fill_delay_slot (\"wx\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc32"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC32)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc32\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc32l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC32L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc32l\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc64"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC64)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc64\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc64l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC64L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc64l\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luk"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUK)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luk\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "lulck"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_LULCK)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lulck\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum32"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM32)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum32\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum32l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM32L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum32l\\t%0, %1\", DELAY_NONE, operands, insn);" 
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum64"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM64)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum64\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum64l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM64L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum64l\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lurl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LURL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lurl\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "mrgb"
+  [(set (match_operand:SI                 0 "register_operand" "=r")
+  	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")
+		(match_operand:SI 3 "const_int_operand" "I")]
+		UNSPEC_MRGB))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mrgb\\t%0, %1, %2, %3\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "srrdl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_SRRDL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srrdl\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srulck"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_SRULCK)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srulck\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srwru"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_SRWRU)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srwru\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "trapqfl"
+  [(unspec_volatile:SI [(const_int 1)] UNSPEC_TRAPQFL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"trapqfl\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "trapqne"
+  [(unspec_volatile:SI [(const_int 2)] UNSPEC_TRAPQNE)]
+  ""
+  "* return iq2000_fill_delay_slot (\"trapqne\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "traprel"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_TRAPREL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"traprel %0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "wbu"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_WBU)]
+  ""
+  "* return iq2000_fill_delay_slot (\"wbu\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "syscall"
+  [(unspec_volatile:SI [(const_int 2)] UNSPEC_SYSCALL)]
+  ""
+  "syscall"
+  [(set_attr "dslot"    "not_in_dslot")]
+)
diff --git a/gcc-4.9/gcc/config/iq2000/iq2000.opt b/gcc-4.9/gcc/config/iq2000/iq2000.opt
new file mode 100644
index 000000000..288ff5fe7
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/iq2000.opt
@@ -0,0 +1,74 @@
+; Options for the Vitesse IQ2000 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/iq2000/iq2000-opts.h
+
+; The target cpu for optimization and scheduling.
+Variable
+enum processor_type iq2000_tune = PROCESSOR_DEFAULT
+
+; This option has no effect at the moment.
+march=
+Target RejectNegative Joined Enum(iq2000_arch)
+Specify CPU for code generation purposes
+
+Enum
+Name(iq2000_arch) Type(int)
+
+EnumValue
+Enum(iq2000_arch) String(default) Value(0)
+
+EnumValue
+Enum(iq2000_arch) String(DEFAULT) Value(0)
+
+EnumValue
+Enum(iq2000_arch) String(iq2000) Value(0)
+
+mcpu=
+Target RejectNegative Joined Enum(iq2000_tune) Var(iq2000_tune)
+Specify CPU for scheduling purposes
+
+Enum
+Name(iq2000_tune) Type(enum processor_type)
+Known IQ2000 CPUs (for use with the -mcpu= option):
+
+EnumValue
+Enum(iq2000_tune) String(iq10) Value(PROCESSOR_IQ10)
+
+EnumValue
+Enum(iq2000_tune) String(iq2000) Value(PROCESSOR_IQ2000)
+
+membedded-data
+Target Mask(EMBEDDED_DATA)
+Use ROM instead of RAM
+
+mgpopt
+Target Mask(GPOPT)
+Use GP relative sdata/sbss sections
+
+; Not used by the compiler proper.
+mno-crt0
+Target RejectNegative
+No default crt0.o
+
+muninit-const-in-rodata
+Target Mask(UNINIT_CONST_IN_RODATA)
+Put uninitialized constants in ROM (needs -membedded-data)
diff --git a/gcc-4.9/gcc/config/iq2000/predicates.md b/gcc-4.9/gcc/config/iq2000/predicates.md
new file mode 100644
index 000000000..8ee9305eb
--- /dev/null
+++ b/gcc-4.9/gcc/config/iq2000/predicates.md
@@ -0,0 +1,240 @@
+;; Predicate definitions for Vitesse IQ2000.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP can be used as an operand where a register or 16-bit
+;; unsigned integer is needed.
+
+(define_predicate "uns_arith_operand"
+  (match_code "reg,const_int,subreg")
+{
+  if (GET_CODE (op) == CONST_INT && SMALL_INT_UNSIGNED (op))
+    return 1;
+
+  return register_operand (op, mode);
+})
+
+;; Return 1 if OP can be used as an operand where a 16-bit integer is
+;; needed.
+
+(define_predicate "arith_operand"
+  (match_code "reg,const_int,subreg")
+{
+  if (GET_CODE (op) == CONST_INT && SMALL_INT (op))
+    return 1;
+
+  return register_operand (op, mode);
+})
+
+;; Return 1 if OP is a register or a constant.  gen_int_relational
+;; takes care of forcing out-of-range constants into a register.
+
+(define_predicate "reg_or_const_operand"
+  (ior (match_code "const_int")
+       (and (match_code "reg,subreg")
+            (match_operand 0 "register_operand"))))
+
+;; Return 1 if OP is a integer which fits in 16 bits.
+
+(define_predicate "small_int"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
+})
+
+;; Return 1 if OP is a 32-bit integer which is too big to be loaded
+;; with one instruction.
+
+(define_predicate "large_int"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT value;
+
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+
+  value = INTVAL (op);
+
+  /* IOR reg,$r0,value.  */
+  if ((value & ~ ((HOST_WIDE_INT) 0x0000ffff)) == 0)
+    return 0;
+
+  /* SUBU reg,$r0,value.  */
+  if (((unsigned HOST_WIDE_INT) (value + 32768)) <= 32767)
+    return 0;
+
+  /* LUI reg,value >> 16.  */
+  if ((value & 0x0000ffff) == 0)
+    return 0;
+
+  return 1;
+})
+
+;; Return 1 if OP is a register or the constant 0.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "reg,const_int,const_double,subreg")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      return INTVAL (op) == 0;
+
+    case CONST_DOUBLE:
+      return op == CONST0_RTX (mode);
+
+    case REG:
+    case SUBREG:
+      return register_operand (op, mode);
+
+    default:
+      break;
+    }
+
+  return 0;
+})
+
+;; Return 1 if OP is a memory operand that fits in a single
+;; instruction (i.e., register + small offset).
+
+(define_predicate "simple_memory_operand"
+  (match_code "mem,subreg")
+{
+  rtx addr, plus0, plus1;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* Dword operations really put out 2 instructions, so eliminate them.  */
+  if (GET_MODE_SIZE (GET_MODE (op)) > (unsigned) UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+    case LO_SUM:
+      return 1;
+
+    case CONST_INT:
+      return SMALL_INT (addr);
+
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+      if (GET_CODE (plus0) == REG
+	  && GET_CODE (plus1) == CONST_INT && SMALL_INT (plus1)
+	  && SMALL_INT_UNSIGNED (plus1) /* No negative offsets.  */)
+	return 1;
+
+      else if (GET_CODE (plus1) == REG
+	       && GET_CODE (plus0) == CONST_INT && SMALL_INT (plus0)
+	       && SMALL_INT_UNSIGNED (plus1) /* No negative offsets.  */)
+	return 1;
+
+      else
+	return 0;
+
+    case SYMBOL_REF:
+      return 0;
+
+    default:
+      break;
+    }
+
+  return 0;
+})
+
+;; Return nonzero if the code of this rtx pattern is EQ or NE.
+
+(define_predicate "equality_op"
+  (match_code "eq,ne")
+{
+  if (mode != GET_MODE (op))
+    return 0;
+
+  return GET_CODE (op) == EQ || GET_CODE (op) == NE;
+})
+
+;; Return nonzero if the code is a relational operations (EQ, LE,
+;; etc).
+
+(define_predicate "cmp_op"
+  (match_code "eq,ne,gt,ge,gtu,geu,lt,le,ltu,leu")
+{
+  if (mode != GET_MODE (op))
+    return 0;
+
+  return COMPARISON_P (op);
+})
+
+;; Return nonzero if the operand is either the PC or a label_ref.
+
+(define_special_predicate "pc_or_label_operand"
+  (match_code "pc,label_ref")
+{
+  if (op == pc_rtx)
+    return 1;
+
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  return 0;
+})
+
+;; Return nonzero if OP is a valid operand for a call instruction.
+
+(define_predicate "call_insn_operand"
+  (match_code "const_int,const,symbol_ref,reg")
+{
+  return (CONSTANT_ADDRESS_P (op)
+	  || (GET_CODE (op) == REG && op != arg_pointer_rtx
+	      && ! (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		    && REGNO (op) <= LAST_VIRTUAL_REGISTER)));
+})
+
+;; Return nonzero if OP is valid as a source operand for a move
+;; instruction.
+
+(define_predicate "move_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  /* Accept any general operand after reload has started; doing so
+     avoids losing if reload does an in-place replacement of a register
+     with a SYMBOL_REF or CONST.  */
+  return (general_operand (op, mode)
+	  && (! (iq2000_check_split (op, mode))
+	      || reload_in_progress || reload_completed));
+})
+
+;; Return nonzero if OP is a constant power of 2.
+
+(define_predicate "power_of_2_operand"
+  (match_code "const_int")
+{
+  int intval;
+
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  else
+    intval = INTVAL (op);
+
+  return ((intval & ((unsigned)(intval) - 1)) == 0);
+})
diff --git a/gcc-4.9/gcc/config/kfreebsd-gnu.h b/gcc-4.9/gcc/config/kfreebsd-gnu.h
new file mode 100644
index 000000000..34657b3cb
--- /dev/null
+++ b/gcc-4.9/gcc/config/kfreebsd-gnu.h
@@ -0,0 +1,35 @@
+/* Definitions for kFreeBSD-based GNU systems with ELF format
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__FreeBSD_kernel__");	\
+	builtin_define ("__GLIBC__");		\
+	builtin_define_std ("unix");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    }						\
+  while (0)
+
+#define GNU_USER_DYNAMIC_LINKER        GLIBC_DYNAMIC_LINKER
+#define GNU_USER_DYNAMIC_LINKER32      GLIBC_DYNAMIC_LINKER32
+#define GNU_USER_DYNAMIC_LINKER64      GLIBC_DYNAMIC_LINKER64
+#define GNU_USER_DYNAMIC_LINKERX32     GLIBC_DYNAMIC_LINKERX32
diff --git a/gcc-4.9/gcc/config/knetbsd-gnu.h b/gcc-4.9/gcc/config/knetbsd-gnu.h
new file mode 100644
index 000000000..6fbf9d180
--- /dev/null
+++ b/gcc-4.9/gcc/config/knetbsd-gnu.h
@@ -0,0 +1,35 @@
+/* Definitions for kNetBSD-based GNU systems with ELF format
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef GNU_USER_TARGET_OS_CPP_BUILTINS    
+#define GNU_USER_TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__NetBSD_kernel__");	\
+	builtin_define ("__GLIBC__");		\
+	builtin_define_std ("unix");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    }						\
+  while (0)
+
+
+#undef GNU_USER_DYNAMIC_LINKER
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld.so.1"
diff --git a/gcc-4.9/gcc/config/kopensolaris-gnu.h b/gcc-4.9/gcc/config/kopensolaris-gnu.h
new file mode 100644
index 000000000..f0aaad109
--- /dev/null
+++ b/gcc-4.9/gcc/config/kopensolaris-gnu.h
@@ -0,0 +1,34 @@
+/* Definitions for kOpenSolaris-based GNU systems with ELF format
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef GNU_USER_TARGET_OS_CPP_BUILTINS    
+#define GNU_USER_TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__OpenSolaris_kernel__");	\
+	builtin_define ("__GLIBC__");		\
+	builtin_define_std ("unix");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    }						\
+  while (0)
+
+#undef GNU_USER_DYNAMIC_LINKER
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld.so.1"
diff --git a/gcc-4.9/gcc/config/linux-android.h b/gcc-4.9/gcc/config/linux-android.h
new file mode 100644
index 000000000..26f1a74fd
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux-android.h
@@ -0,0 +1,59 @@
+/* Configuration file for Linux Android targets.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Doug Kwan (dougkwan@google.com)
+   Rewritten by CodeSourcery, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define ANDROID_TARGET_OS_CPP_BUILTINS()			\
+    do {							\
+	if (TARGET_ANDROID)					\
+	  builtin_define ("__ANDROID__");			\
+    } while (0)
+
+#if ANDROID_DEFAULT
+# define NOANDROID "mno-android"
+#else
+# define NOANDROID "!mandroid"
+#endif
+
+#define LINUX_OR_ANDROID_CC(LINUX_SPEC, ANDROID_SPEC) \
+  "%{" NOANDROID "|tno-android-cc:" LINUX_SPEC ";:" ANDROID_SPEC "}"
+
+#define LINUX_OR_ANDROID_LD(LINUX_SPEC, ANDROID_SPEC) \
+  "%{" NOANDROID "|tno-android-ld:" LINUX_SPEC ";:" ANDROID_SPEC "}"
+
+#define ANDROID_LINK_SPEC \
+  "%{shared: -Bsymbolic}"
+
+#define ANDROID_CC1_SPEC						\
+  "%{!mglibc:%{!muclibc:%{!mbionic: -mbionic}}} "			\
+  "%{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: -fPIC}}}}"
+
+#define ANDROID_CC1PLUS_SPEC						\
+  "%{!fexceptions:%{!fno-exceptions: -fno-exceptions}} "		\
+  "%{!frtti:%{!fno-rtti: -fno-rtti}}"
+
+#define ANDROID_LIB_SPEC \
+  "%{!static: -ldl}"
+
+#define ANDROID_STARTFILE_SPEC						\
+  "%{shared: crtbegin_so%O%s;:"						\
+  "  %{static: crtbegin_static%O%s;: crtbegin_dynamic%O%s}}"
+
+#define ANDROID_ENDFILE_SPEC \
+  "%{shared: crtend_so%O%s;: crtend_android%O%s}"
diff --git a/gcc-4.9/gcc/config/linux-android.opt b/gcc-4.9/gcc/config/linux-android.opt
new file mode 100644
index 000000000..550af3b09
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux-android.opt
@@ -0,0 +1,30 @@
+; Android specific options.
+
+; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mandroid
+Target Report Mask(ANDROID) Var(flag_android) Init(ANDROID_DEFAULT ? OPTION_MASK_ANDROID : 0)
+Generate code for the Android platform.
+
+tno-android-cc
+Driver
+
+tno-android-ld
+Driver
+
diff --git a/gcc-4.9/gcc/config/linux-protos.h b/gcc-4.9/gcc/config/linux-protos.h
new file mode 100644
index 000000000..80001e95b
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux-protos.h
@@ -0,0 +1,22 @@
+/* Prototypes.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern bool linux_has_ifunc_p (void);
+
+extern bool linux_libc_has_function (enum function_class fn_class);
diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c
new file mode 100644
index 000000000..6242e1100
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux.c
@@ -0,0 +1,46 @@
+/* Functions for Linux Android as target machine for GNU C compiler.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "linux-protos.h"
+
+/* Android does not support GNU indirect functions.  */
+
+bool
+linux_has_ifunc_p (void)
+{
+  return OPTION_BIONIC ? false : HAVE_GNU_INDIRECT_FUNCTION;
+}
+
+bool
+linux_libc_has_function (enum function_class fn_class)
+{
+  if (OPTION_GLIBC)
+    return true;
+  if (OPTION_BIONIC)
+    if (fn_class == function_c94
+	|| fn_class == function_c99_misc
+	|| fn_class == function_sincos)
+	return true;
+
+  return false;
+}
diff --git a/gcc-4.9/gcc/config/linux.h b/gcc-4.9/gcc/config/linux.h
new file mode 100644
index 000000000..d38ef81e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux.h
@@ -0,0 +1,129 @@
+/* Definitions for systems using the Linux kernel, with or without
+   MMU, using ELF at the compiler level but possibly FLT for final
+   linked executables and shared libraries in some no-MMU cases, and
+   possibly with a choice of libc implementations.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* C libraries supported on Linux.  */
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
+#endif
+
+#define GNU_USER_TARGET_OS_CPP_BUILTINS()			\
+    do {							\
+	if (OPTION_GLIBC)					\
+	  builtin_define ("__gnu_linux__");			\
+	builtin_define_std ("linux");				\
+	builtin_define_std ("unix");				\
+	builtin_assert ("system=linux");			\
+	builtin_assert ("system=unix");				\
+	builtin_assert ("system=posix");			\
+    } while (0)
+
+/* Determine which dynamic linker to use depending on whether GLIBC or
+   uClibc or Bionic is the default C library and whether
+   -muclibc or -mglibc or -mbionic has been passed to change the default.  */
+
+#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3)	\
+  "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}"
+
+#if DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+  CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B)
+#elif DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+  CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B)
+#elif DEFAULT_LIBC == LIBC_BIONIC
+#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+  CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U)
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif /* DEFAULT_LIBC */
+
+/* For most targets the following definitions suffice;
+   GLIBC_DYNAMIC_LINKER must be defined for each target using them, or
+   GLIBC_DYNAMIC_LINKER32 and GLIBC_DYNAMIC_LINKER64 for targets
+   supporting both 32-bit and 64-bit compilation.  */
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKERX32 "/lib/ldx32-uClibc.so.0"
+#define BIONIC_DYNAMIC_LINKER "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
+#define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"
+
+#define GNU_USER_DYNAMIC_LINKER						\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER,	\
+			 BIONIC_DYNAMIC_LINKER)
+#define GNU_USER_DYNAMIC_LINKER32					\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \
+			 BIONIC_DYNAMIC_LINKER32)
+#define GNU_USER_DYNAMIC_LINKER64					\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \
+			 BIONIC_DYNAMIC_LINKER64)
+#define GNU_USER_DYNAMIC_LINKERX32					\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \
+			 BIONIC_DYNAMIC_LINKERX32)
+
+/* Whether we have Bionic libc runtime */
+#undef TARGET_HAS_BIONIC
+#define TARGET_HAS_BIONIC (OPTION_BIONIC)
+
+#if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */
+/* This is a *uclinux* target.  We don't define below macros to normal linux
+   versions, because doing so would require *uclinux* targets to include
+   linux.c, linux-protos.h, linux.opt, etc.  We could, alternatively, add
+   these files to *uclinux* targets, but that would only pollute option list
+   (add -mglibc, etc.) without adding any useful support.  */
+
+/* Define TARGET_LIBC_HAS_FUNCTION for *uclinux* targets to
+   no_c99_libc_has_function, because uclibc does not, normally, have
+   c99 runtime.  If, in special cases, uclibc does have c99 runtime,
+   this should be defined to a new hook.  Also please note that for targets
+   like *-linux-uclibc that similar check will also need to be added to
+   linux_libc_has_function.  */
+# undef TARGET_LIBC_HAS_FUNCTION
+# define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+#else /* !uClinux, i.e., normal Linux */
+
+/* IFUNCs are supported by glibc, but not by uClibc or Bionic.  */
+# undef TARGET_HAS_IFUNC_P
+# define TARGET_HAS_IFUNC_P linux_has_ifunc_p
+
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+# undef TARGET_LIBC_HAS_FUNCTION
+# define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
+
+#endif
diff --git a/gcc-4.9/gcc/config/linux.opt b/gcc-4.9/gcc/config/linux.opt
new file mode 100644
index 000000000..435bd87d1
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux.opt
@@ -0,0 +1,32 @@
+; Processor-independent options for GNU/Linux.
+;
+; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+; Contributed by CodeSourcery.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mbionic
+Target Report RejectNegative Var(linux_libc,LIBC_BIONIC) Init(DEFAULT_LIBC) Negative(mglibc)
+Use Bionic C library
+
+mglibc
+Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc)
+Use GNU C library
+
+muclibc
+Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic)
+Use uClibc C library
diff --git a/gcc-4.9/gcc/config/lm32/constraints.md b/gcc-4.9/gcc/config/lm32/constraints.md
new file mode 100644
index 000000000..00b216dc1
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/constraints.md
@@ -0,0 +1,57 @@
+;; Constraint definitions for Lattice Mico32 architecture.
+;; Contributed by Jon Beniston <jon@beniston.com>
+;;
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constraint "J"
+  "The value 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+       
+(define_constraint "K"
+  "A signed 16-bit immediate in the range -32768 to 32767."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "L"
+  "An unsigned 16-bit immediate in the range 0 to 65535."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "M"
+  "The value 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "U"
+  "A shifted signed 16-bit constant appropriate for orhi."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0
+		    && (ival >> 31 == -1 || ival >> 31 == 0)")))
+
+(define_constraint "S"
+  "A symbol in the small data section."
+  (match_operand 0 "no_pic_small_symbol"))
+
+(define_constraint "Y"
+  "A high part of a symbol."
+  (and (match_code "high")
+       (ior (ior (match_code "symbol_ref" "0")
+                 (match_code "label_ref" "0"))
+            (match_code "const" "0"))))
diff --git a/gcc-4.9/gcc/config/lm32/lm32-protos.h b/gcc-4.9/gcc/config/lm32/lm32-protos.h
new file mode 100644
index 000000000..898d4fd74
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/lm32-protos.h
@@ -0,0 +1,38 @@
+/* Prototypes of target machine functions, Lattice Mico32 architecture.
+   Contributed by Jon Beniston <jon@beniston.com>
+
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+extern int lm32_return_in_memory (tree type);
+extern void lm32_declare_object (FILE *stream, char *name, char *init_string, 
+                                 char *final_string, int size);
+extern void lm32_expand_prologue (void);
+extern void lm32_expand_epilogue (void);
+extern void lm32_print_operand (FILE *file, rtx op, int letter);
+extern void lm32_print_operand_address (FILE *file, rtx addr);
+extern HOST_WIDE_INT lm32_compute_initial_elimination_offset (int from, 
+                                                             int to);
+extern int lm32_can_use_return (void);
+extern rtx lm32_return_addr_rtx (int count, rtx frame);
+extern int lm32_expand_block_move (rtx *);
+extern int nonpic_symbol_mentioned_p (rtx);
+extern rtx lm32_legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern void lm32_expand_scc (rtx operands[]);
+extern void lm32_expand_conditional_branch (rtx operands[]);
+extern bool lm32_move_ok (enum machine_mode, rtx operands[2]);
diff --git a/gcc-4.9/gcc/config/lm32/lm32.c b/gcc-4.9/gcc/config/lm32/lm32.c
new file mode 100644
index 000000000..4f6aba1e7
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/lm32.c
@@ -0,0 +1,1227 @@
+/* Subroutines used for code generation on the Lattice Mico32 architecture.
+   Contributed by Jon Beniston <jon@beniston.com>
+
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "calls.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "tm-constrs.h"
+#include "df.h"
+
+struct lm32_frame_info
+{
+  HOST_WIDE_INT total_size;	/* number of bytes of entire frame.  */
+  HOST_WIDE_INT callee_size;	/* number of bytes to save callee saves.  */
+  HOST_WIDE_INT pretend_size;	/* number of bytes we pretend caller did.  */
+  HOST_WIDE_INT args_size;	/* number of bytes for outgoing arguments.  */
+  HOST_WIDE_INT locals_size;	/* number of bytes for local variables.  */
+  unsigned int reg_save_mask;	/* mask of saved registers.  */
+};
+
+/* Prototypes for static functions.  */
+static rtx emit_add (rtx dest, rtx src0, rtx src1);
+static void expand_save_restore (struct lm32_frame_info *info, int op);
+static void stack_adjust (HOST_WIDE_INT amount);
+static bool lm32_in_small_data_p (const_tree);
+static void lm32_setup_incoming_varargs (cumulative_args_t cum,
+					 enum machine_mode mode, tree type,
+					 int *pretend_size, int no_rtl);
+static bool lm32_rtx_costs (rtx x, int code, int outer_code, int opno,
+			    int *total, bool speed);
+static bool lm32_can_eliminate (const int, const int);
+static bool
+lm32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict);
+static HOST_WIDE_INT lm32_compute_frame_size (int size);
+static void lm32_option_override (void);
+static rtx lm32_function_arg (cumulative_args_t cum,
+			      enum machine_mode mode, const_tree type,
+			      bool named);
+static void lm32_function_arg_advance (cumulative_args_t cum,
+				       enum machine_mode mode,
+				       const_tree type, bool named);
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE lm32_option_override
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS lm32_rtx_costs
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P lm32_in_small_data_p
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS lm32_setup_incoming_varargs
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG lm32_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE lm32_function_arg_advance
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -0x8000
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE lm32_can_eliminate
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P lm32_legitimate_address_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Current frame information calculated by lm32_compute_frame_size.  */
+static struct lm32_frame_info current_frame_info;
+
+/* Return non-zero if the given return type should be returned in memory.  */
+
+int
+lm32_return_in_memory (tree type)
+{
+  HOST_WIDE_INT size;
+
+  if (!AGGREGATE_TYPE_P (type))
+    {
+      /* All simple types are returned in registers.  */
+      return 0;
+    }
+
+  size = int_size_in_bytes (type);
+  if (size >= 0 && size <= UNITS_PER_WORD)
+    {
+      /* If it can fit in one register.  */
+      return 0;
+    }
+
+  return 1;
+}
+
+/* Generate an emit a word sized add instruction.  */
+
+static rtx
+emit_add (rtx dest, rtx src0, rtx src1)
+{
+  rtx insn;
+  insn = emit_insn (gen_addsi3 (dest, src0, src1));
+  return insn;
+}
+
+/* Generate the code to compare (and possibly branch) two integer values
+   TEST_CODE is the comparison code we are trying to emulate 
+     (or implement directly)
+   RESULT is where to store the result of the comparison, 
+     or null to emit a branch
+   CMP0 CMP1 are the two comparison operands
+   DESTINATION is the destination of the branch, or null to only compare
+   */
+
+static void
+gen_int_relational (enum rtx_code code,	
+		    rtx result,	
+		    rtx cmp0,	
+		    rtx cmp1,	
+		    rtx destination)	
+{
+  enum machine_mode mode;
+  int branch_p;
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Is this a branch or compare.  */
+  branch_p = (destination != 0);
+
+  /* Instruction set doesn't support LE or LT, so swap operands and use 
+     GE, GT.  */
+  switch (code)
+    {
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      {
+	rtx temp;
+
+	code = swap_condition (code);
+	temp = cmp0;
+	cmp0 = cmp1;
+	cmp1 = temp;
+	break;
+      }
+    default:
+      break;
+    }
+
+  if (branch_p)
+    {
+      rtx insn, cond, label;
+
+      /* Operands must be in registers.  */
+      if (!register_operand (cmp0, mode))
+	cmp0 = force_reg (mode, cmp0);
+      if (!register_operand (cmp1, mode))
+	cmp1 = force_reg (mode, cmp1);
+
+      /* Generate conditional branch instruction.  */
+      cond = gen_rtx_fmt_ee (code, mode, cmp0, cmp1);
+      label = gen_rtx_LABEL_REF (VOIDmode, destination);
+      insn = gen_rtx_SET (VOIDmode, pc_rtx,
+			  gen_rtx_IF_THEN_ELSE (VOIDmode,
+						cond, label, pc_rtx));
+      emit_jump_insn (insn);
+    }
+  else
+    {
+      /* We can't have const_ints in cmp0, other than 0.  */
+      if ((GET_CODE (cmp0) == CONST_INT) && (INTVAL (cmp0) != 0))
+	cmp0 = force_reg (mode, cmp0);
+
+      /* If the comparison is against an int not in legal range
+         move it into a register.  */
+      if (GET_CODE (cmp1) == CONST_INT)
+	{
+	  switch (code)
+	    {
+	    case EQ:
+	    case NE:
+	    case LE:
+	    case LT:
+	    case GE:
+	    case GT:
+	      if (!satisfies_constraint_K (cmp1))
+		cmp1 = force_reg (mode, cmp1);
+	      break;
+	    case LEU:
+	    case LTU:
+	    case GEU:
+	    case GTU:
+	      if (!satisfies_constraint_L (cmp1))
+		cmp1 = force_reg (mode, cmp1);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Generate compare instruction.  */
+      emit_move_insn (result, gen_rtx_fmt_ee (code, mode, cmp0, cmp1));
+    }
+}
+
+/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
+   and OPERAND[3].  Store the result in OPERANDS[0].  */
+
+void
+lm32_expand_scc (rtx operands[])
+{
+  rtx target = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  gen_int_relational (code, target, op0, op1, NULL_RTX);  
+}
+
+/* Compare OPERANDS[1] with OPERANDS[2] using comparison code
+   CODE and jump to OPERANDS[3] if the condition holds.  */
+
+void
+lm32_expand_conditional_branch (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx destination = operands[3];
+
+  gen_int_relational (code, NULL_RTX, op0, op1, destination);  
+}
+
+/* Generate and emit RTL to save or restore callee save registers.  */
+static void
+expand_save_restore (struct lm32_frame_info *info, int op)
+{
+  unsigned int reg_save_mask = info->reg_save_mask;
+  int regno;
+  HOST_WIDE_INT offset;
+  rtx insn;
+
+  /* Callee saves are below locals and above outgoing arguments.  */
+  offset = info->args_size + info->callee_size;
+  for (regno = 0; regno <= 31; regno++)
+    {
+      if ((reg_save_mask & (1 << regno)) != 0)
+	{
+	  rtx offset_rtx;
+	  rtx mem;
+	  
+	  offset_rtx = GEN_INT (offset);
+	  if (satisfies_constraint_K (offset_rtx))
+	    {	
+              mem = gen_rtx_MEM (word_mode,
+                                 gen_rtx_PLUS (Pmode,
+                                               stack_pointer_rtx,
+                                               offset_rtx));
+            }
+          else
+            {
+              /* r10 is caller saved so it can be used as a temp reg.  */
+              rtx r10;        
+               
+              r10 = gen_rtx_REG (word_mode, 10);
+              insn = emit_move_insn (r10, offset_rtx);
+              if (op == 0)
+                RTX_FRAME_RELATED_P (insn) = 1;
+              insn = emit_add (r10, r10, stack_pointer_rtx);
+              if (op == 0)
+                RTX_FRAME_RELATED_P (insn) = 1;                
+              mem = gen_rtx_MEM (word_mode, r10);
+            }                                                 	    
+	    	    
+	  if (op == 0)
+	    insn = emit_move_insn (mem, gen_rtx_REG (word_mode, regno));
+	  else
+	    insn = emit_move_insn (gen_rtx_REG (word_mode, regno), mem);
+        
+	  /* only prologue instructions which set the sp fp or save a
+	     register should be marked as frame related.  */
+	  if (op == 0)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  offset -= UNITS_PER_WORD;
+	}
+    }
+}
+
+static void
+stack_adjust (HOST_WIDE_INT amount)
+{
+  rtx insn;
+
+  if (!IN_RANGE (amount, -32776, 32768))
+    {
+      /* r10 is caller saved so it can be used as a temp reg.  */
+      rtx r10;
+      r10 = gen_rtx_REG (word_mode, 10);
+      insn = emit_move_insn (r10, GEN_INT (amount));
+      if (amount < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_add (stack_pointer_rtx, stack_pointer_rtx, r10);
+      if (amount < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      insn = emit_add (stack_pointer_rtx,
+		       stack_pointer_rtx, GEN_INT (amount));
+      if (amount < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+
+/* Create and emit instructions for a functions prologue.  */
+void
+lm32_expand_prologue (void)
+{
+  rtx insn;
+
+  lm32_compute_frame_size (get_frame_size ());
+
+  if (current_frame_info.total_size > 0)
+    {
+      /* Add space on stack new frame.  */
+      stack_adjust (-current_frame_info.total_size);
+
+      /* Save callee save registers.  */
+      if (current_frame_info.reg_save_mask != 0)
+	expand_save_restore (&current_frame_info, 0);
+
+      /* Setup frame pointer if it's needed.  */
+      if (frame_pointer_needed == 1)
+	{
+	  /* Move sp to fp.  */
+	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1; 
+
+	  /* Add offset - Don't use total_size, as that includes pretend_size, 
+             which isn't part of this frame?  */
+	  insn = emit_add (frame_pointer_rtx, 
+			   frame_pointer_rtx,
+			   GEN_INT (current_frame_info.args_size +
+				    current_frame_info.callee_size +
+				    current_frame_info.locals_size));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* Prevent prologue from being scheduled into function body.  */
+      emit_insn (gen_blockage ());
+    }
+}
+
+/* Create an emit instructions for a functions epilogue.  */
+void
+lm32_expand_epilogue (void)
+{
+  rtx ra_rtx = gen_rtx_REG (Pmode, RA_REGNUM);
+
+  lm32_compute_frame_size (get_frame_size ());
+
+  if (current_frame_info.total_size > 0)
+    {
+      /* Prevent stack code from being reordered.  */
+      emit_insn (gen_blockage ());
+
+      /* Restore callee save registers.  */
+      if (current_frame_info.reg_save_mask != 0)
+	expand_save_restore (&current_frame_info, 1);
+
+      /* Deallocate stack.  */
+      stack_adjust (current_frame_info.total_size);
+
+      /* Return to calling function.  */
+      emit_jump_insn (gen_return_internal (ra_rtx));
+    }
+  else
+    {
+      /* Return to calling function.  */
+      emit_jump_insn (gen_return_internal (ra_rtx));
+    }
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  */
+static HOST_WIDE_INT
+lm32_compute_frame_size (int size)
+{
+  int regno;
+  HOST_WIDE_INT total_size, locals_size, args_size, pretend_size, callee_size;
+  unsigned int reg_save_mask;
+
+  locals_size = size;
+  args_size = crtl->outgoing_args_size;
+  pretend_size = crtl->args.pretend_args_size;
+  callee_size = 0;
+  reg_save_mask = 0;
+
+  /* Build mask that actually determines which regsiters we save
+     and calculate size required to store them in the stack.  */
+  for (regno = 1; regno < SP_REGNUM; regno++)
+    {
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  reg_save_mask |= 1 << regno;
+	  callee_size += UNITS_PER_WORD;
+	}
+    }
+  if (df_regs_ever_live_p (RA_REGNUM) || ! crtl->is_leaf
+      || !optimize)
+    {
+      reg_save_mask |= 1 << RA_REGNUM;
+      callee_size += UNITS_PER_WORD;
+    }
+  if (!(reg_save_mask & (1 << FP_REGNUM)) && frame_pointer_needed)
+    {
+      reg_save_mask |= 1 << FP_REGNUM;
+      callee_size += UNITS_PER_WORD;
+    }
+
+  /* Compute total frame size.  */
+  total_size = pretend_size + args_size + locals_size + callee_size;
+
+  /* Align frame to appropriate boundary.  */
+  total_size = (total_size + 3) & ~3;
+
+  /* Save computed information.  */
+  current_frame_info.total_size = total_size;
+  current_frame_info.callee_size = callee_size;
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.locals_size = locals_size;
+  current_frame_info.args_size = args_size;
+  current_frame_info.reg_save_mask = reg_save_mask;
+
+  return total_size;
+}
+
+void
+lm32_print_operand (FILE * file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  code = GET_CODE (op);
+
+  if (code == SIGN_EXTEND)
+    op = XEXP (op, 0), code = GET_CODE (op);
+  else if (code == REG || code == SUBREG)
+    {
+      int regnum;
+
+      if (code == REG)
+	regnum = REGNO (op);
+      else
+	regnum = true_regnum (op);
+
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+  else if (code == HIGH)
+    output_addr_const (file, XEXP (op, 0));  
+  else if (code == MEM)
+    output_address (XEXP (op, 0));
+  else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    fprintf (file, "%s", reg_names[0]);
+  else if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      if ((CONST_DOUBLE_LOW (op) != 0) || (CONST_DOUBLE_HIGH (op) != 0))
+	output_operand_lossage ("only 0.0 can be loaded as an immediate");
+      else
+	fprintf (file, "0");
+    }
+  else if (code == EQ)
+    fprintf (file, "e  ");
+  else if (code == NE)
+    fprintf (file, "ne ");
+  else if (code == GT)
+    fprintf (file, "g  ");
+  else if (code == GTU)
+    fprintf (file, "gu ");
+  else if (code == LT)
+    fprintf (file, "l  ");
+  else if (code == LTU)
+    fprintf (file, "lu ");
+  else if (code == GE)
+    fprintf (file, "ge ");
+  else if (code == GEU)
+    fprintf (file, "geu");
+  else if (code == LE)
+    fprintf (file, "le ");
+  else if (code == LEU)
+    fprintf (file, "leu");
+  else
+    output_addr_const (file, op);
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   On some machines, the syntax for a symbolic address depends on
+   the section that the address refers to.  On these machines,
+   define the macro `ENCODE_SECTION_INFO' to store the information
+   into the `symbol_ref', and then check for it here.  */
+
+void
+lm32_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "(%s+0)", reg_names[REGNO (addr)]);
+      break;
+
+    case MEM:
+      output_address (XEXP (addr, 0));
+      break;
+
+    case PLUS:
+      {
+	rtx arg0 = XEXP (addr, 0);
+	rtx arg1 = XEXP (addr, 1);
+
+	if (GET_CODE (arg0) == REG && CONSTANT_P (arg1))
+	  {
+	    if (GET_CODE (arg1) == CONST_INT)
+	      fprintf (file, "(%s+%ld)", reg_names[REGNO (arg0)],
+		       INTVAL (arg1));
+	    else
+	      {
+		fprintf (file, "(%s+", reg_names[REGNO (arg0)]);
+		output_addr_const (file, arg1);
+		fprintf (file, ")");
+	      }
+	  }
+	else if (CONSTANT_P (arg0) && CONSTANT_P (arg1))
+	  output_addr_const (file, addr);
+	else
+	  fatal_insn ("bad operand", addr);
+      }
+      break;
+
+    case SYMBOL_REF:
+      if (SYMBOL_REF_SMALL_P (addr))
+	{
+	  fprintf (file, "gp(");
+	  output_addr_const (file, addr);
+	  fprintf (file, ")");
+	}
+      else
+	fatal_insn ("can't use non gp relative absolute address", addr);
+      break;
+
+    default:
+      fatal_insn ("invalid addressing mode", addr);
+      break;
+    }
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+lm32_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (mode == VOIDmode)
+    /* Compute operand 2 of the call insn.  */
+    return GEN_INT (0);
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return NULL_RTX;
+
+  if (!named || (*cum + LM32_NUM_REGS2 (mode, type) > LM32_NUM_ARG_REGS))
+    return NULL_RTX;
+
+  return gen_rtx_REG (mode, *cum + LM32_FIRST_ARG_REG);
+}
+
+static void
+lm32_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *get_cumulative_args (cum) += LM32_NUM_REGS2 (mode, type);
+}
+
+HOST_WIDE_INT
+lm32_compute_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset = 0;
+
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      switch (to)
+	{
+	case FRAME_POINTER_REGNUM:
+	  offset = 0;
+	  break;
+	case STACK_POINTER_REGNUM:
+	  offset =
+	    lm32_compute_frame_size (get_frame_size ()) -
+	    current_frame_info.pretend_size;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+static void
+lm32_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
+			     tree type, int *pretend_size, int no_rtl)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int first_anon_arg;
+  tree fntype;
+
+  fntype = TREE_TYPE (current_function_decl);
+
+  if (stdarg_p (fntype))
+    first_anon_arg = *cum + LM32_FIRST_ARG_REG;
+  else
+    {
+      /* this is the common case, we have been passed details setup
+         for the last named argument, we want to skip over the
+         registers, if any used in passing this named paramter in
+         order to determine which is the first registers used to pass
+         anonymous arguments.  */
+      int size;
+
+      if (mode == BLKmode)
+	size = int_size_in_bytes (type);
+      else
+	size = GET_MODE_SIZE (mode);
+
+      first_anon_arg =
+	*cum + LM32_FIRST_ARG_REG +
+	((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+    }
+
+  if ((first_anon_arg < (LM32_FIRST_ARG_REG + LM32_NUM_ARG_REGS)) && !no_rtl)
+    {
+      int first_reg_offset = first_anon_arg;
+      int size = LM32_FIRST_ARG_REG + LM32_NUM_ARG_REGS - first_anon_arg;
+      rtx regblock;
+
+      regblock = gen_rtx_MEM (BLKmode,
+			      plus_constant (Pmode, arg_pointer_rtx,
+					     FIRST_PARM_OFFSET (0)));
+      move_block_from_reg (first_reg_offset, regblock, size);
+
+      *pretend_size = size * UNITS_PER_WORD;
+    }
+}
+
+/* Override command line options.  */
+static void
+lm32_option_override (void)
+{
+  /* We must have sign-extend enabled if barrel-shift isn't.  */
+  if (!TARGET_BARREL_SHIFT_ENABLED && !TARGET_SIGN_EXTEND_ENABLED)
+    target_flags |= MASK_SIGN_EXTEND_ENABLED;
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+int
+lm32_can_use_return (void)
+{
+  if (!reload_completed)
+    return 0;
+
+  if (df_regs_ever_live_p (RA_REGNUM) || crtl->profile)
+    return 0;
+
+  if (lm32_compute_frame_size (get_frame_size ()) != 0)
+    return 0;
+
+  return 1;
+}
+
+/* Support function to determine the return address of the function
+   'count' frames back up the stack.  */
+rtx
+lm32_return_addr_rtx (int count, rtx frame)
+{
+  rtx r;
+  if (count == 0)
+    {
+      if (!df_regs_ever_live_p (RA_REGNUM))
+	r = gen_rtx_REG (Pmode, RA_REGNUM);
+      else
+	{
+	  r = gen_rtx_MEM (Pmode,
+			   gen_rtx_PLUS (Pmode, frame,
+					 GEN_INT (-2 * UNITS_PER_WORD)));
+	  set_mem_alias_set (r, get_frame_alias_set ());
+	}
+    }
+  else if (flag_omit_frame_pointer)
+    r = NULL_RTX;
+  else
+    {
+      r = gen_rtx_MEM (Pmode,
+		       gen_rtx_PLUS (Pmode, frame,
+				     GEN_INT (-2 * UNITS_PER_WORD)));
+      set_mem_alias_set (r, get_frame_alias_set ());
+    }
+  return r;
+}
+
+/* Return true if EXP should be placed in the small data section.  */
+
+static bool
+lm32_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  Duh.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+      if (strcmp (section, ".sdata") == 0 || strcmp (section, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+         in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= g_switch_value)
+	return true;
+    }
+
+  return false;
+}
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+lm32_block_move_inline (rtx dest, rtx src, HOST_WIDE_INT length,
+			HOST_WIDE_INT alignment)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  /* Work out how many bits to move at a time.  */
+  switch (alignment)
+    {
+    case 1:
+      bits = 8;
+      break;
+    case 2:
+      bits = 16;
+      break;
+    default:
+      bits = 32;
+      break;
+    }
+
+  mode = mode_for_size (bits, MODE_INT, 0);
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      emit_move_insn (regs[i], adjust_address (src, mode, offset));
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    emit_move_insn (adjust_address (dest, mode, offset), regs[i]);
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+    }
+}
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.  */
+
+int
+lm32_expand_block_move (rtx * operands)
+{
+  if ((GET_CODE (operands[2]) == CONST_INT) && (INTVAL (operands[2]) <= 32))
+    {
+      lm32_block_move_inline (operands[0], operands[1], INTVAL (operands[2]),
+			      INTVAL (operands[3]));
+      return 1;
+    }
+  return 0;
+}
+
+/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec.  */
+int
+nonpic_symbol_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
+      || GET_CODE (x) == PC)
+    return 1;
+
+  /* We don't want to look into the possible MEM location of a
+     CONST_DOUBLE, since we're not going to use it, in general.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    return 0;
+
+  if (GET_CODE (x) == UNSPEC)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+lm32_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool small_mode;
+
+  const int arithmetic_latency = 1;
+  const int shift_latency = 1;
+  const int compare_latency = 2;
+  const int multiply_latency = 3;
+  const int load_latency = 3;
+  const int libcall_size_cost = 5;
+
+  /* Determine if we can handle the given mode size in a single instruction.  */
+  small_mode = (mode == QImode) || (mode == HImode) || (mode == SImode);
+
+  switch (code)
+    {
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+    case NEG:
+      if (!speed)
+	*total = COSTS_N_INSNS (LM32_NUM_REGS (mode));
+      else
+	*total =
+	  COSTS_N_INSNS (arithmetic_latency + (LM32_NUM_REGS (mode) - 1));
+      break;
+
+    case COMPARE:
+      if (small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (compare_latency);
+	}
+      else
+	{
+	  /* FIXME. Guessing here.  */
+	  *total = COSTS_N_INSNS (LM32_NUM_REGS (mode) * (2 + 3) / 2);
+	}
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TARGET_BARREL_SHIFT_ENABLED && small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (shift_latency);
+	}
+      else if (TARGET_BARREL_SHIFT_ENABLED)
+	{
+	  /* FIXME: Guessing here.  */
+	  *total = COSTS_N_INSNS (LM32_NUM_REGS (mode) * 4);
+	}
+      else if (small_mode && GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  *total = COSTS_N_INSNS (INTVAL (XEXP (x, 1)));
+	}
+      else
+	{
+	  /* Libcall.  */
+	  if (!speed)
+	    *total = COSTS_N_INSNS (libcall_size_cost);
+	  else
+	    *total = COSTS_N_INSNS (100);
+	}
+      break;
+
+    case MULT:
+      if (TARGET_MULTIPLY_ENABLED && small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (multiply_latency);
+	}
+      else
+	{
+	  /* Libcall.  */
+	  if (!speed)
+	    *total = COSTS_N_INSNS (libcall_size_cost);
+	  else
+	    *total = COSTS_N_INSNS (100);
+	}
+      break;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      if (TARGET_DIVIDE_ENABLED && small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    {
+	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+		{
+		  int cycles = 0;
+		  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+
+		  while (i)
+		    {
+		      i >>= 2;
+		      cycles++;
+		    }
+		  if (IN_RANGE (i, 0, 65536))
+		    *total = COSTS_N_INSNS (1 + 1 + cycles);
+		  else
+		    *total = COSTS_N_INSNS (2 + 1 + cycles);
+		  return true;
+		}
+	      else if (GET_CODE (XEXP (x, 1)) == REG)
+		{
+		  *total = COSTS_N_INSNS (1 + GET_MODE_SIZE (mode) / 2);
+		  return true;
+		}
+	      else
+		{
+		  *total = COSTS_N_INSNS (1 + GET_MODE_SIZE (mode) / 2);
+		  return false;
+		}
+	    }
+	}
+      else
+	{
+	  /* Libcall.  */
+	  if (!speed)
+	    *total = COSTS_N_INSNS (libcall_size_cost);
+	  else
+	    *total = COSTS_N_INSNS (100);
+	}
+      break;
+
+    case HIGH:
+    case LO_SUM:
+      if (!speed)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (arithmetic_latency);
+      break;
+
+    case ZERO_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	*total = COSTS_N_INSNS (0);
+      else if (small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (arithmetic_latency);
+	}
+      else
+	*total = COSTS_N_INSNS (LM32_NUM_REGS (mode) / 2);
+      break;
+
+    case CONST_INT:
+      {
+	switch (outer_code)
+	  {
+	  case HIGH:
+	  case LO_SUM:
+	    *total = COSTS_N_INSNS (0);
+	    return true;
+
+	  case AND:
+	  case XOR:
+	  case IOR:
+	  case ASHIFT:
+	  case ASHIFTRT:
+	  case LSHIFTRT:
+	  case ROTATE:
+	  case ROTATERT:
+	    if (satisfies_constraint_L (x))
+	      *total = COSTS_N_INSNS (0);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	    return true;
+
+	  case SET:
+	  case PLUS:
+	  case MINUS:
+	  case COMPARE:
+	    if (satisfies_constraint_K (x))
+	      *total = COSTS_N_INSNS (0);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	    return true;
+
+	  case MULT:
+	    if (TARGET_MULTIPLY_ENABLED)
+	      {
+	        if (satisfies_constraint_K (x))
+	         *total = COSTS_N_INSNS (0);
+	        else
+	          *total = COSTS_N_INSNS (2);
+		return true;
+	      }
+	    /* Fall through.  */ 
+
+	  default:
+            if (satisfies_constraint_K (x))
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	    return true;
+	  }
+      }
+
+    case SYMBOL_REF:
+    case CONST:
+      switch (outer_code)
+	{
+	case HIGH:
+	case LO_SUM:
+	  *total = COSTS_N_INSNS (0);
+	  return true;
+
+	case MEM:
+	case SET:
+	  if (g_switch_value)
+	    {
+	      *total = COSTS_N_INSNS (0);
+	      return true;
+	    }
+	  break;
+	}
+      /* Fall through.  */
+
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case SET:
+      *total = COSTS_N_INSNS (1);
+      break;
+
+    case MEM:
+      if (!speed)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (load_latency);
+      break;
+
+    }
+
+  return false;
+}
+
+/* Implemenent TARGET_CAN_ELIMINATE.  */
+
+bool
+lm32_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false : true;
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+lm32_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x, bool strict)
+{  
+   /* (rM) */                                                    
+  if (strict && REG_P (x) && STRICT_REG_OK_FOR_BASE_P (x))
+    return true;
+  if (!strict && REG_P (x) && NONSTRICT_REG_OK_FOR_BASE_P (x))
+    return true;
+       
+  /* (rM)+literal) */                               
+  if (GET_CODE (x) == PLUS  
+     && REG_P (XEXP (x, 0))                                     
+     && ((strict && STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)))
+         || (!strict && NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))))                           
+     && GET_CODE (XEXP (x, 1)) == CONST_INT                      
+     && satisfies_constraint_K (XEXP ((x), 1)))
+    return true;
+              
+  /* gp(sym)  */   
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) 
+    return true;
+    
+  return false;                                
+}
+
+/* Check a move is not memory to memory.  */ 
+
+bool 
+lm32_move_ok (enum machine_mode mode, rtx operands[2]) {
+  if (memory_operand (operands[0], mode))
+    return register_or_zero_operand (operands[1], mode);
+  return true;
+}
diff --git a/gcc-4.9/gcc/config/lm32/lm32.h b/gcc-4.9/gcc/config/lm32/lm32.h
new file mode 100644
index 000000000..e4db51947
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/lm32.h
@@ -0,0 +1,545 @@
+/* Definitions of target machine for GNU compiler, Lattice Mico32 architecture.
+   Contributed by Jon Beniston <jon@beniston.com>
+
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/*-------------------------------*/
+/* Run-time Target Specification */
+/*-------------------------------*/
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()                       \
+  do                                                    \
+    {                                                   \
+      builtin_define ("__lm32__");                      \
+      builtin_assert ("cpu=lm32");                      \
+      builtin_assert ("machine=lm32");                  \
+      if (TARGET_MULTIPLY_ENABLED)                      \
+        builtin_define ("__multiply_enabled__");        \
+      if (TARGET_DIVIDE_ENABLED)                        \
+        builtin_define ("__divide_enabled__");          \
+      if (TARGET_BARREL_SHIFT_ENABLED)                  \
+        builtin_define ("__barrel_shift_enabled__");    \
+      if (TARGET_SIGN_EXTEND_ENABLED)                   \
+        builtin_define ("__sign_extend_enabled__");     \
+      if (TARGET_USER_ENABLED)                          \
+        builtin_define ("__user_enabled__");            \
+    }                                                   \
+  while (0)
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mmultiply-enabled} \
+%{mdivide-enabled} \
+%{mbarrel-shift-enabled} \
+%{msign-extend-enabled} \
+%{muser-enabled} \
+"
+
+/* Let link script define all link options. 
+   Default to using simulator link script.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+#undef  LIB_SPEC
+#define LIB_SPEC "%{!T*:-T sim.ld}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{G*}"
+
+/*---------------------------------*/
+/* Target machine storage layout.  */
+/*---------------------------------*/
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+#define BITS_PER_WORD 32
+#define UNITS_PER_WORD 4
+
+#define POINTER_SIZE 32
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)               \
+do {                                                    \
+  if (GET_MODE_CLASS (MODE) == MODE_INT                 \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)         \
+    (MODE) = word_mode;                                 \
+} while (0)
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 64
+
+#define FUNCTION_BOUNDARY  32
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRICT_ALIGNMENT 1
+
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Make arrays and structures word-aligned to allow faster copying etc.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* We need this for the same reason as DATA_ALIGNMENT, namely to cause
+   character arrays to be word-aligned so that `strcpy' calls that copy
+   constants to character arrays can be done inline, and 'strcmp' can be
+   optimised to use word loads.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  DATA_ALIGNMENT (TYPE, ALIGN)
+
+/*----------------------------------------*/
+/* Layout of source language data types.  */
+/*----------------------------------------*/
+
+#define INT_TYPE_SIZE		    32
+#define SHORT_TYPE_SIZE		    16
+#define LONG_TYPE_SIZE		    32
+#define LONG_LONG_TYPE_SIZE	    64
+
+#define FLOAT_TYPE_SIZE		    32
+#define DOUBLE_TYPE_SIZE	    64
+#define LONG_DOUBLE_TYPE_SIZE       64
+
+#define DEFAULT_SIGNED_CHAR         0
+
+#define SIZE_TYPE "unsigned int"
+
+#define PTRDIFF_TYPE "int"
+
+/*---------------------------*/
+/* Standard register usage.  */
+/*---------------------------*/
+
+#define FIRST_PSEUDO_REGISTER  32
+
+#define RV_REGNUM   1
+#define GP_REGNUM   26
+#define FP_REGNUM   27
+#define SP_REGNUM   28
+#define RA_REGNUM   29
+
+#define G_REG_P(X)      ((X)<32)
+
+#define FIXED_REGISTERS   \
+{ 1, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 1, 0, 1, 0, 1, 1}
+
+#define CALL_USED_REGISTERS \
+{ 1, 1, 1, 1, 1, 1, 1, 1,   \
+  1, 1, 1, 0, 0, 0, 0, 0,   \
+  0, 0, 0, 0, 0, 0, 0, 0,   \
+  0, 0, 1, 0, 1, 0, 1, 1}
+
+#define HARD_REGNO_NREGS(REGNO, MODE)                                   \
+    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) G_REG_P(REGNO)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)           \
+(      GET_MODE_CLASS (MODE1) == MODE_INT		\
+    && GET_MODE_CLASS (MODE2) == MODE_INT		\
+    && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+    && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+#define AVOID_CCMODE_COPIES
+
+/*----------------------------------*/
+/* Register classes and constants.  */
+/*----------------------------------*/
+
+enum reg_class
+{
+  NO_REGS,                                      
+  GENERAL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES { "NO_REGS", "GENERAL_REGS", "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS      \
+{ {0x00000000},                 \
+  {0xffffffff},                 \
+  {0xffffffff}                  \
+}
+
+#define REGNO_REG_CLASS(REGNO) \
+    (G_REG_P(REGNO) ? GENERAL_REGS : NO_REGS)
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+    (G_REG_P (REGNO) || G_REG_P ((unsigned) reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+
+/*----------------------------------------*/
+/* Stack Layout and Calling Conventions.  */
+/*----------------------------------------*/
+
+#define STACK_GROWS_DOWNWARD 1
+
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STACK_POINTER_OFFSET (UNITS_PER_WORD)
+
+#define STARTING_FRAME_OFFSET (UNITS_PER_WORD)
+
+#define FIRST_PARM_OFFSET(FNDECL) (UNITS_PER_WORD)
+
+#define STACK_POINTER_REGNUM SP_REGNUM
+
+#define FRAME_POINTER_REGNUM FP_REGNUM
+
+#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RA_REGNUM)
+
+#define RETURN_ADDR_RTX(count, frame)                                   \
+  lm32_return_addr_rtx (count, frame)
+
+/* FIXME - This is not yet supported.  */
+#define STATIC_CHAIN_REGNUM 9
+
+#define ELIMINABLE_REGS \
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },                        \
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },                          \
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)                    \
+  (OFFSET) = lm32_compute_initial_elimination_offset (FROM, TO)
+
+/*-----------------------------*/
+/* Function argument passing.  */
+/*-----------------------------*/
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/*--------------------------------*/
+/* Passing Arguments in Registers */
+/*--------------------------------*/
+
+/* The first argument register.  */
+#define LM32_FIRST_ARG_REG 1
+
+/* The number of (integer) argument register available.  */
+#define LM32_NUM_ARG_REGS 8
+
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS)  \
+  (CUM) = 0
+
+#define FUNCTION_ARG_REGNO_P(r)                                         \
+  (((r) >= LM32_FIRST_ARG_REG) && ((r) <= LM32_NUM_ARG_REGS))
+
+/*--------------------*/
+/* Function results.  */
+/*--------------------*/
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)                                   \
+   gen_rtx_REG ((INTEGRAL_TYPE_P (VALTYPE)                              \
+                 && TYPE_PRECISION (VALTYPE) < BITS_PER_WORD)           \
+	            ? word_mode                                         \
+	            : TYPE_MODE (VALTYPE),				\
+	            RV_REGNUM)
+
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, RV_REGNUM)
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == RV_REGNUM)
+
+#define RETURN_IN_MEMORY(TYPE) lm32_return_in_memory (TYPE)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Convert from bytes to ints.  */
+#define LM32_NUM_INTS(X) (((X) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The number of (integer) registers required to hold a quantity of
+   type MODE.  */
+#define LM32_NUM_REGS(MODE) LM32_NUM_INTS (GET_MODE_SIZE (MODE))
+
+/* The number of (integer) registers required to hold a quantity of
+   TYPE MODE.  */
+#define LM32_NUM_REGS2(MODE, TYPE)                       \
+  LM32_NUM_INTS ((MODE) == BLKmode ?                     \
+  int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE))
+
+#define STRUCT_VALUE 0
+
+/*---------------------------*/
+/* Function entry and exit.  */
+/*---------------------------*/
+
+/*-------------*/
+/* Profiling.  */
+/*-------------*/
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/*---------------*/
+/* Trampolines.  */
+/*---------------*/
+
+#define TRAMPOLINE_SIZE		0
+
+/*---------------------*/
+/*  Addressing Modes.  */
+/*---------------------*/
+
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define STRICT_REG_OK_FOR_BASE_P(X)                                     \
+  (REGNO_OK_FOR_BASE_P (REGNO (X)))
+#define NONSTRICT_REG_OK_FOR_BASE_P(X)                                  \
+  (G_REG_P (REGNO (X)) || !HARD_REGISTER_NUM_P (REGNO (X)))
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P(X)
+#else
+#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P(X)
+#endif
+
+/*-------------------------*/
+/* Condition Code Status.  */
+/*-------------------------*/
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/*---------*/
+/* Costs.  */
+/*---------*/
+
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE
+
+#define BRANCH_COST(speed_p, predictable_p) 4
+
+#define MOVE_RATIO(speed) (speed ? 24 : 3)
+
+/*------------*/
+/* Sections.  */
+/*------------*/
+
+#define TEXT_SECTION_ASM_OP             "\t.section\t.text"
+#define DATA_SECTION_ASM_OP             "\t.section\t.data"
+#define SDATA_SECTION_ASM_OP            "\t.section\t.sdata,\"aw\""
+#define BSS_SECTION_ASM_OP              "\t.section\t.bss"
+#define SBSS_SECTION_ASM_OP             "\t.section\t.sbss,\"aw\""
+
+/*-------*/
+/* PIC.  */
+/*-------*/
+
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? GP_REGNUM : INVALID_REGNUM)
+
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+#define LEGITIMATE_PIC_OPERAND_P(X)                                    \
+	(!(nonpic_symbol_mentioned_p (X)))
+
+/*-------------*/
+/* Assembler.  */
+/*-------------*/
+
+#define ASM_COMMENT_START "#"
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {									\
+	fputc ( '\t', FILE);						\
+	assemble_name (FILE, LABEL1);					\
+	fputs ( " = ", FILE);						\
+	assemble_name (FILE, LABEL2);					\
+	fputc ( '\n', FILE);						\
+ } while (0)
+
+/* Override default implementation in elfos.h to support -G.  */
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+  if (!flag_inhibit_size_directive)					\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+  ASM_OUTPUT_LABEL(FILE, NAME);						\
+  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);				\
+} while (0)
+
+/* Override default implementation in elfos.h to support -G.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+do 									\
+{									\
+  if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+    {									\
+      switch_to_section (sbss_section);					\
+      (*targetm.asm_out.globalize_label) (FILE, NAME);			\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+      if (!flag_inhibit_size_directive)					\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+      ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL(FILE, NAME);					\
+      ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);			\
+    }									\
+  else									\
+    {									\
+      switch_to_section (bss_section);					\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",          \
+               (SIZE), (ALIGN) / BITS_PER_UNIT);	                \
+    }									\
+}									\
+while (0)
+
+#define ASM_OUTPUT_LABEL(FILE, NAME) \
+  do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  do {					\
+    const char *xname = (NAME);		\
+    if (xname[0] == '@')		\
+      xname += 1;			\
+    if (xname[0] == '*')		\
+      xname += 1;			\
+    fputs (xname, FILE);		\
+  } while (0)
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYMBOL)				\
+  do {									\
+    assemble_name (STREAM, XSTR (SYMBOL, 0));				\
+  } while (0)
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define REGISTER_NAMES                                          \
+{                                                               \
+ "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",	        \
+ "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",        \
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",        \
+ "r24", "r25",  "gp",  "fp",  "sp",  "ra",  "ea",  "ba"}
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+  (((CHAR) == '&') || ((CHAR) == '@') || ((CHAR) == '*'))
+
+#define PRINT_OPERAND(FILE, X, CODE)                            \
+  lm32_print_operand (FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)                       \
+  lm32_print_operand_address (FILE, ADDR)
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+#endif
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)                              \
+  do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", (1 << (LOG))); } while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)                    \
+do {                                                            \
+  char label[64];                                               \
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);              \
+  fprintf (FILE, "\n\t.word\t");                                \
+  assemble_name (FILE, label);                                  \
+  fprintf (FILE, "\n");                                         \
+} while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)        \
+do {                                                            \
+  char label[64];                                               \
+  fprintf (FILE, "\t.word\t(");                                 \
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);		\
+  assemble_name (FILE, label);                                  \
+  fprintf (FILE, "-");                                          \
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);                \
+  assemble_name (FILE, label);                                  \
+  fprintf (FILE, ")\n");                                        \
+} while (0)
+
+/*-------------*/
+/* Debugging.  */
+/*-------------*/
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/*--------*/
+/* Misc.  */
+/*--------*/
+
+#define CASE_VECTOR_MODE Pmode
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+#define MOVE_MAX        UNITS_PER_WORD
+#define MAX_MOVE_MAX    4
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE SImode
+
+#ifndef NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+#endif
+
+#define STORE_FLAG_VALUE 1
diff --git a/gcc-4.9/gcc/config/lm32/lm32.md b/gcc-4.9/gcc/config/lm32/lm32.md
new file mode 100644
index 000000000..26d16823c
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/lm32.md
@@ -0,0 +1,1015 @@
+;; Machine description of the Lattice Mico32 architecture for GNU C compiler.
+;; Contributed by Jon Beniston <jon@beniston.com>
+
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  
+
+;; Include predicate and constraint definitions
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Register numbers
+(define_constants
+  [(RA_REGNUM           29)	; return address register.
+  ]
+)
+
+;; LM32 specific volatile operations
+(define_constants
+  [(UNSPECV_BLOCKAGE    1)]     ; prevent scheduling across pro/epilog boundaries
+)
+
+;; LM32 specific operations
+(define_constants
+  [(UNSPEC_GOT          2)
+   (UNSPEC_GOTOFF_HI16  3)
+   (UNSPEC_GOTOFF_LO16  4)]     
+)
+
+;; --------------------------------- 
+;;      instruction types
+;; ---------------------------------
+
+(define_attr "type"
+  "unknown,load,store,arith,compare,shift,multiply,divide,call,icall,ubranch,uibranch,cbranch"
+  (const_string "unknown"))
+  
+;; ---------------------------------
+;;      instruction lengths
+;; ---------------------------------
+  
+; All instructions are 4 bytes
+; Except for branches that are out of range, and have to be implemented
+; as two instructions
+(define_attr "length" "" 
+        (cond [
+                (eq_attr "type" "cbranch")
+                (if_then_else
+                        (lt (abs (minus (match_dup 2) (pc)))
+                                (const_int 32768)
+                        )
+                        (const_int 4)
+                        (const_int 8)               
+                )
+              ] 
+        (const_int 4))
+)
+                    
+;; ---------------------------------
+;;           scheduling 
+;; ---------------------------------
+
+(define_automaton "lm32")
+
+(define_cpu_unit "x" "lm32")
+(define_cpu_unit "m" "lm32")
+(define_cpu_unit "w" "lm32")
+
+(define_insn_reservation "singlecycle" 1
+  (eq_attr "type" "store,arith,call,icall,ubranch,uibranch,cbranch")
+ "x")
+
+(define_insn_reservation "twocycle" 2
+  (eq_attr "type" "compare,shift,divide")
+ "x,m") 
+
+(define_insn_reservation "threecycle" 3
+  (eq_attr "type" "load,multiply")
+ "x,m,w")
+
+;; ---------------------------------
+;;               mov 
+;; ---------------------------------
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operand0) == MEM)
+        {
+          /* Source operand for store must be in a register.  */
+          operands[1] = force_reg (QImode, operands[1]);
+        }
+    }
+}")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+        (match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) == MEM)
+        {
+          /* Source operand for store must be in a register.  */
+          operands[1] = force_reg (HImode, operands[1]);
+        }
+    }
+}")
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) == MEM 
+	  || (GET_CODE (operands[0]) == SUBREG 
+	      && GET_CODE (SUBREG_REG (operands[0])) == MEM))
+        {
+          /* Source operand for store must be in a register.  */
+          operands[1] = force_reg (SImode, operands[1]);
+        }
+    }
+
+  if (flag_pic && symbolic_operand (operands[1], SImode)) 
+    {
+      if (GET_CODE (operands[1]) == LABEL_REF
+          || (GET_CODE (operands[1]) == SYMBOL_REF 
+              && SYMBOL_REF_LOCAL_P (operands[1])
+              && !SYMBOL_REF_WEAK (operands[1])))
+        {
+          emit_insn (gen_movsi_gotoff_hi16 (operands[0], operands[1]));
+          emit_insn (gen_addsi3 (operands[0], 
+                                 operands[0], 
+                                 pic_offset_table_rtx));
+          emit_insn (gen_movsi_gotoff_lo16 (operands[0], 
+                                            operands[0], 
+                                            operands[1]));
+        } 
+      else 
+        emit_insn (gen_movsi_got (operands[0], operands[1]));
+      crtl->uses_pic_offset_table = 1;
+      DONE;
+    }         
+  else if (flag_pic && GET_CODE (operands[1]) == CONST) 
+    {
+      rtx op = XEXP (operands[1], 0);
+      if (GET_CODE (op) == PLUS)
+        {
+          rtx arg0 = XEXP (op, 0);
+          rtx arg1 = XEXP (op, 1);
+          if (GET_CODE (arg0) == LABEL_REF
+              || (GET_CODE (arg0) == SYMBOL_REF 
+                  && SYMBOL_REF_LOCAL_P (arg0)
+                  && !SYMBOL_REF_WEAK (arg0)))
+            {
+              emit_insn (gen_movsi_gotoff_hi16 (operands[0], arg0));
+              emit_insn (gen_addsi3 (operands[0], 
+                                     operands[0], 
+                                     pic_offset_table_rtx));
+              emit_insn (gen_movsi_gotoff_lo16 (operands[0], 
+                                                operands[0], 
+                                                arg0));
+            } 
+          else 
+            emit_insn (gen_movsi_got (operands[0], arg0));
+          emit_insn (gen_addsi3 (operands[0], operands[0], arg1));
+          crtl->uses_pic_offset_table = 1;
+          DONE;
+        }     
+    }
+  else if (!flag_pic && reloc_operand (operands[1], GET_MODE (operands[1]))) 
+    {
+      emit_insn (gen_rtx_SET (SImode, operands[0], gen_rtx_HIGH (SImode, operands[1])));
+      emit_insn (gen_rtx_SET (SImode, operands[0], gen_rtx_LO_SUM (SImode, operands[0], operands[1])));
+      DONE;
+    }  
+  else if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (!(satisfies_constraint_K (operands[1]) 
+          || satisfies_constraint_L (operands[1])
+          || satisfies_constraint_U (operands[1])))      
+        {
+          emit_insn (gen_movsi_insn (operands[0], 
+                                     GEN_INT (INTVAL (operands[1]) & ~0xffff)));
+          emit_insn (gen_iorsi3 (operands[0], 
+                                 operands[0], 
+                                 GEN_INT (INTVAL (operands[1]) & 0xffff)));
+          DONE;
+        }
+    }    
+}")
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand" "")
+		   (match_operand:BLK 1 "general_operand" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  ""
+{
+  if (!lm32_expand_block_move (operands))
+    FAIL;
+  DONE;
+})
+
+;; ---------------------------------
+;;        load/stores/moves 
+;; ---------------------------------
+
+(define_insn "movsi_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_GOT))]
+  "flag_pic"
+  "lw       %0, (gp+got(%1))"
+  [(set_attr "type" "load")]
+)
+
+(define_insn "movsi_gotoff_hi16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_GOTOFF_HI16))]
+  "flag_pic"
+  "orhi     %0, r0, gotoffhi16(%1)"
+  [(set_attr "type" "load")]
+)
+
+(define_insn "movsi_gotoff_lo16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "0")
+                             (match_operand 2 "" ""))] UNSPEC_GOTOFF_LO16))]        
+  "flag_pic"
+  "addi     %0, %1, gotofflo16(%2)"
+  [(set_attr "type" "arith")]
+)
+  
+(define_insn "*movsi_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+                   (match_operand:SI 2 "reloc_operand" "i")))]
+  "!flag_pic"
+  "ori      %0, %0, lo(%2)"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m,m,r")
+        (match_operand:QI 1 "general_operand" "m,r,r,J,n"))]
+  "lm32_move_ok (QImode, operands)"
+  "@
+   lbu      %0, %1
+   or       %0, %1, r0
+   sb       %0, %1
+   sb       %0, r0
+   addi     %0, r0, %1"
+  [(set_attr "type" "load,arith,store,store,arith")]   
+)
+   
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,m,r,r")
+        (match_operand:HI 1 "general_operand" "m,r,r,J,K,L"))]
+  "lm32_move_ok (HImode, operands)"
+  "@
+   lhu      %0, %1
+   or       %0, %1, r0
+   sh       %0, %1
+   sh       %0, r0
+   addi     %0, r0, %1
+   ori      %0, r0, %1"
+  [(set_attr "type" "load,arith,store,store,arith,arith")]   
+)
+
+(define_insn "movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m,m,r,r,r,r,r,r")
+        (match_operand:SI 1 "general_operand" "m,r,r,J,K,L,U,S,Y,n"))]
+  "lm32_move_ok (SImode, operands)"
+  "@
+   lw       %0, %1
+   or       %0, %1, r0
+   sw       %0, %1
+   sw       %0, r0
+   addi     %0, r0, %1
+   ori      %0, r0, %1
+   orhi     %0, r0, hi(%1)
+   mva      %0, gp(%1)
+   orhi     %0, r0, hi(%1)
+   ori      %0, r0, lo(%1); orhi     %0, %0, hi(%1)"
+  [(set_attr "type" "load,arith,store,store,arith,arith,arith,arith,arith,arith")]   
+)
+
+;; ---------------------------------
+;;      sign and zero extension 
+;; ---------------------------------
+
+(define_insn "*extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  "TARGET_SIGN_EXTEND_ENABLED || (GET_CODE (operands[1]) != REG)"
+  "@
+   lb       %0, %1
+   sextb    %0, %1"
+  [(set_attr "type" "load,arith")]
+)
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  ""
+  "@
+   lbu      %0, %1
+   andi     %0, %1, 0xff"
+  [(set_attr "type" "load,arith")]  
+)
+
+(define_insn "*extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  "TARGET_SIGN_EXTEND_ENABLED || (GET_CODE (operands[1]) != REG)"
+  "@
+   lb       %0, %1
+   sextb    %0, %1"
+  [(set_attr "type" "load,arith")]
+)
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  ""
+  "@
+   lbu      %0, %1
+   andi     %0, %1, 0xff"
+  [(set_attr "type" "load,arith")]  
+)
+
+(define_insn "*extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
+  "TARGET_SIGN_EXTEND_ENABLED || (GET_CODE (operands[1]) != REG)"
+  "@
+   lh       %0, %1
+   sexth    %0, %1"
+  [(set_attr "type" "load,arith")]
+)
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
+  ""
+  "@
+   lhu      %0, %1
+   andi     %0, %1, 0xffff"
+  [(set_attr "type" "load,arith")]  
+)
+
+;; ---------------------------------
+;;             compare 
+;; ---------------------------------
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "register_operand")
+	  (match_operand:SI 3 "register_or_int_operand")]))]
+  ""
+{
+  lm32_expand_scc (operands);
+  DONE;
+})
+
+(define_insn "*seq"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (eq:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpe     %0, %z1, %2
+   cmpei    %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sne"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ne:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpne    %0, %z1, %2
+   cmpnei   %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sgt"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (gt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpg     %0, %z1, %2
+   cmpgi    %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sge"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ge:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpge    %0, %z1, %2
+   cmpgei   %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sgtu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (gtu:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   cmpgu    %0, %z1, %2
+   cmpgui   %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sgeu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (geu:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   cmpgeu   %0, %z1, %2
+   cmpgeui  %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+;; ---------------------------------
+;;       unconditional branch
+;; ---------------------------------
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bi       %0"
+  [(set_attr "type" "ubranch")]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "b        %0"
+  [(set_attr "type" "uibranch")]
+)
+
+;; ---------------------------------
+;;        conditional branch
+;; ---------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+   (if_then_else (match_operator 0 "comparison_operator" 
+                  [(match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "nonmemory_operand")])
+                 (label_ref (match_operand 3 "" ""))
+                 (pc)))]
+  ""
+  "
+{   
+  lm32_expand_conditional_branch (operands);
+  DONE;
+}")
+
+(define_insn "*beq"
+  [(set (pc)
+        (if_then_else (eq:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "be     %z0,%z1,%2"
+        : "bne    %z0,%z1,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bne"
+  [(set (pc)
+        (if_then_else (ne:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bne    %z0,%z1,%2"
+        : "be     %z0,%z1,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bgt"
+  [(set (pc)
+        (if_then_else (gt:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bg     %z0,%z1,%2"
+        : "bge    %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bge"
+  [(set (pc)
+        (if_then_else (ge:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bge    %z0,%z1,%2"
+        : "bg     %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bgtu"
+  [(set (pc)
+        (if_then_else (gtu:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                              (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bgu    %z0,%z1,%2"
+        : "bgeu   %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bgeu"
+  [(set (pc)
+        (if_then_else (geu:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                              (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bgeu   %z0,%z1,%2"
+        : "bgu    %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+;; ---------------------------------
+;;               call 
+;; ---------------------------------
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (match_operand 1 "" ""))
+              (clobber (reg:SI RA_REGNUM))
+             ])]
+  ""
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (!CONSTANT_ADDRESS_P (addr))
+    XEXP (operands[0], 0) = force_reg (Pmode, addr);
+}")
+
+(define_insn "*call"
+  [(call (mem:SI (match_operand:SI 0 "call_operand" "r,s"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RA_REGNUM))]
+  ""
+  "@
+   call     %0
+   calli    %0"
+  [(set_attr "type" "call,icall")]  
+)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "")
+                         (match_operand 2 "" "")))
+              (clobber (reg:SI RA_REGNUM))
+             ])]
+  ""
+  "
+{
+  rtx addr = XEXP (operands[1], 0);
+  if (!CONSTANT_ADDRESS_P (addr))
+    XEXP (operands[1], 0) = force_reg (Pmode, addr); 
+}")
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "register_operand" "=r,r")
+        (call (mem:SI (match_operand:SI 1 "call_operand" "r,s"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RA_REGNUM))]
+  ""
+  "@
+   call     %1
+   calli    %1"
+  [(set_attr "type" "call,icall")]  
+)
+
+(define_insn "return_internal"
+  [(use (match_operand:SI 0 "register_operand" "r"))
+   (return)]
+  ""
+  "b        %0"
+  [(set_attr "type" "uibranch")]  
+)
+
+(define_expand "return"
+  [(return)]
+  "lm32_can_use_return ()"
+  ""
+) 
+
+(define_expand "simple_return"
+  [(simple_return)]
+  ""
+  ""
+) 
+
+(define_insn "*return"
+  [(return)]
+  "reload_completed"
+  "ret"
+  [(set_attr "type" "uibranch")]  
+) 
+
+(define_insn "*simple_return"
+  [(simple_return)]
+  ""
+  "ret"
+  [(set_attr "type" "uibranch")]  
+) 
+
+;; ---------------------------------
+;;       switch/case statements 
+;; ---------------------------------
+  
+(define_expand "tablejump"
+  [(set (pc) (match_operand 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "
+{
+  rtx target = operands[0];
+  if (flag_pic)
+    {
+      /* For PIC, the table entry is relative to the start of the table.  */
+      rtx label = gen_reg_rtx (SImode);
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1]));
+      emit_insn (gen_addsi3 (target, operands[0], label));
+    }
+  emit_jump_insn (gen_tablejumpsi (target, operands[1]));
+  DONE;
+}")
+
+(define_insn "tablejumpsi"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "b        %0"
+  [(set_attr "type" "ubranch")]  
+)
+
+;; ---------------------------------
+;;            arithmetic 
+;; ---------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                 (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   add      %0, %z1, %2
+   addi     %0, %z1, %2"
+  [(set_attr "type" "arith")]  
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                  (match_operand:SI 2 "register_or_zero_operand" "rJ")))]
+  ""
+  "sub      %0, %z1, %z2"
+  [(set_attr "type" "arith")]  
+)
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (mult:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                 (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  "TARGET_MULTIPLY_ENABLED"
+  "@
+   mul      %0, %z1, %2
+   muli     %0, %z1, %2"
+  [(set_attr "type" "multiply")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (udiv:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIVIDE_ENABLED"
+  "divu     %0, %z1, %2"
+  [(set_attr "type" "divide")]
+)
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (umod:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIVIDE_ENABLED"
+  "modu     %0, %z1, %2"
+  [(set_attr "type" "divide")]
+)
+
+;; ---------------------------------
+;;      negation and inversion 
+;; ---------------------------------
+               
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (neg:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")))]
+  ""
+  "sub      %0, r0, %z1"
+  [(set_attr "type" "arith")]
+)      
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (not:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")))]
+  ""
+  "not      %0, %z1"
+  [(set_attr "type" "arith")]
+)
+
+;; ---------------------------------
+;;             logical 
+;; ---------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (and:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   and      %0, %z1, %2
+   andi     %0, %z1, %2"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ior:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   or       %0, %z1, %2
+   ori      %0, %z1, %2"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   xor      %0, %z1, %2
+   xori     %0, %z1, %2"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "*norsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(not:SI (ior:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+			(match_operand:SI 2 "register_or_L_operand" "r,L"))))]
+  ""
+  "@ 
+   nor      %0, %z1, %2
+   nori     %0, %z1, %2"     		
+  [(set_attr "type" "arith")]
+)                
+
+(define_insn "*xnorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(not:SI (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+			(match_operand:SI 2 "register_or_L_operand" "r,L"))))]
+  ""
+  "@
+   xnor     %0, %z1, %2
+   xnori    %0, %z1, %2"     		
+  [(set_attr "type" "arith")]
+)                
+
+;; ---------------------------------
+;;              shifts 
+;; ---------------------------------
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ashift:SI (match_operand:SI 1 "register_or_zero_operand" "")
+                   (match_operand:SI 2 "register_or_L_operand" "")))]
+  ""
+{
+  if (!TARGET_BARREL_SHIFT_ENABLED)
+    {
+      if (!optimize_size 
+          && satisfies_constraint_L (operands[2])
+          && INTVAL (operands[2]) <= 8)
+        {
+          int i;
+          int shifts = INTVAL (operands[2]);
+          
+          if (shifts == 0)
+            emit_move_insn (operands[0], operands[1]);
+          else
+            emit_insn (gen_addsi3 (operands[0], operands[1], operands[1]));
+          for (i = 1; i < shifts; i++) 
+            emit_insn (gen_addsi3 (operands[0], operands[0], operands[0]));
+          DONE;                  
+        }
+      else
+        FAIL;
+    }
+})  
+
+(define_insn "*ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ashift:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                   (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  "TARGET_BARREL_SHIFT_ENABLED"
+  "@ 
+   sl       %0, %z1, %2
+   sli      %0, %z1, %2"
+  [(set_attr "type" "shift")]
+)
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ashiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "")
+                     (match_operand:SI 2 "register_or_L_operand" "")))]
+  ""
+{
+  if (!TARGET_BARREL_SHIFT_ENABLED)
+    {
+      if (!optimize_size 
+          && satisfies_constraint_L (operands[2])
+          && INTVAL (operands[2]) <= 8)
+        {
+          int i;
+          int shifts = INTVAL (operands[2]);
+          rtx one = GEN_INT (1);
+          
+          if (shifts == 0)
+            emit_move_insn (operands[0], operands[1]);
+          else
+            emit_insn (gen_ashrsi3_1bit (operands[0], operands[1], one));
+          for (i = 1; i < shifts; i++) 
+            emit_insn (gen_ashrsi3_1bit (operands[0], operands[0], one));
+          DONE;                  
+        }
+      else
+        FAIL;
+    }
+})  
+                       
+(define_insn "*ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ashiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                     (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  "TARGET_BARREL_SHIFT_ENABLED"
+  "@
+   sr       %0, %z1, %2
+   sri      %0, %z1, %2"
+  [(set_attr "type" "shift")]
+)
+
+(define_insn "ashrsi3_1bit"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                     (match_operand:SI 2 "constant_M_operand" "M")))]
+  "!TARGET_BARREL_SHIFT_ENABLED"
+  "sri      %0, %z1, %2"
+  [(set_attr "type" "shift")]
+)
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lshiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "")
+                     (match_operand:SI 2 "register_or_L_operand" "")))]
+  ""
+{
+  if (!TARGET_BARREL_SHIFT_ENABLED)
+    {
+      if (!optimize_size 
+          && satisfies_constraint_L (operands[2])
+          && INTVAL (operands[2]) <= 8)
+        {
+          int i;
+          int shifts = INTVAL (operands[2]);
+          rtx one = GEN_INT (1);
+          
+          if (shifts == 0)
+            emit_move_insn (operands[0], operands[1]);
+          else
+            emit_insn (gen_lshrsi3_1bit (operands[0], operands[1], one));
+          for (i = 1; i < shifts; i++) 
+            emit_insn (gen_lshrsi3_1bit (operands[0], operands[0], one));
+          DONE;                  
+        }
+      else
+        FAIL;
+    }
+})  
+
+(define_insn "*lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (lshiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                     (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  "TARGET_BARREL_SHIFT_ENABLED"
+  "@ 
+   sru      %0, %z1, %2
+   srui     %0, %z1, %2"
+  [(set_attr "type" "shift")]   
+)
+
+(define_insn "lshrsi3_1bit"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lshiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                     (match_operand:SI 2 "constant_M_operand" "M")))]
+  "!TARGET_BARREL_SHIFT_ENABLED"
+  "srui     %0, %z1, %2"
+  [(set_attr "type" "shift")]   
+)
+
+;; ---------------------------------
+;;     function entry / exit 
+;; ---------------------------------
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  lm32_expand_prologue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  lm32_expand_epilogue ();
+  DONE;
+}")
+
+;; ---------------------------------
+;;              nop 
+;; ---------------------------------
+
+(define_insn "nop"  
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "arith")]
+)
+
+;; ---------------------------------
+;;             blockage 
+;; ---------------------------------
+
+;; used to stop the scheduler from 
+;; scheduling code across certain boundaries
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
diff --git a/gcc-4.9/gcc/config/lm32/lm32.opt b/gcc-4.9/gcc/config/lm32/lm32.opt
new file mode 100644
index 000000000..d943de4e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/lm32.opt
@@ -0,0 +1,40 @@
+; Options for the Lattice Mico32 port of the compiler.
+; Contributed by Jon Beniston <jon@beniston.com>
+;
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published
+; by the Free Software Foundation; either version 3, or (at your
+; option) any later version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+;  <http://www.gnu.org/licenses/>.  
+   
+mmultiply-enabled
+Target Report Mask(MULTIPLY_ENABLED)
+Enable multiply instructions
+
+mdivide-enabled
+Target Report Mask(DIVIDE_ENABLED)
+Enable divide and modulus instructions
+
+mbarrel-shift-enabled
+Target Report Mask(BARREL_SHIFT_ENABLED)
+Enable barrel shift instructions
+
+msign-extend-enabled
+Target Report Mask(SIGN_EXTEND_ENABLED)
+Enable sign extend instructions
+
+muser-enabled
+Target Report Mask(USER_ENABLED)
+Enable user-defined instructions
diff --git a/gcc-4.9/gcc/config/lm32/predicates.md b/gcc-4.9/gcc/config/lm32/predicates.md
new file mode 100644
index 000000000..ad2e462db
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/predicates.md
@@ -0,0 +1,72 @@
+;; Predicate definitions for Lattice Mico32 architecture.
+;; Contributed by Jon Beniston <jon@beniston.com>
+;;
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;; 
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;;  <http://www.gnu.org/licenses/>.  
+
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_predicate "constant_K_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_K (op)")))
+       
+(define_predicate "constant_L_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_L (op)")))
+
+(define_predicate "constant_M_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_M (op)")))
+
+(define_predicate "register_or_zero_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+(define_predicate "register_or_K_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "constant_K_operand")))
+         
+(define_predicate "register_or_L_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "constant_L_operand")))
+
+(define_predicate "register_or_int_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_code "const_int")))
+
+(define_predicate "reloc_operand"
+  (ior (ior (match_code "label_ref")
+            (match_code "symbol_ref"))
+       (match_code "const")))
+
+(define_predicate "symbolic_operand"
+  (ior (match_code "label_ref")
+       (match_code "symbol_ref")))
+       
+(define_predicate "no_pic_small_symbol"
+  (match_code "symbol_ref")
+{
+  return !flag_pic && SYMBOL_REF_SMALL_P (op);
+})
+
+(define_predicate "call_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
diff --git a/gcc-4.9/gcc/config/lm32/rtems.h b/gcc-4.9/gcc/config/lm32/rtems.h
new file mode 100644
index 000000000..00d80fd89
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/rtems.h
@@ -0,0 +1,32 @@
+/* Definitions for rtems targeting a lm32 using ELF.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc-4.9/gcc/config/lm32/t-lm32 b/gcc-4.9/gcc/config/lm32/t-lm32
new file mode 100644
index 000000000..ec9a18b73
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/t-lm32
@@ -0,0 +1,2 @@
+# for multilib
+MULTILIB_OPTIONS = mbarrel-shift-enabled mmultiply-enabled mdivide-enabled msign-extend-enabled
diff --git a/gcc-4.9/gcc/config/lm32/t-rtems b/gcc-4.9/gcc/config/lm32/t-rtems
new file mode 100644
index 000000000..e47245009
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/t-rtems
@@ -0,0 +1,21 @@
+# Custom RTEMS multilibs
+
+MULTILIB_OPTIONS = mmultiply-enabled mbarrel-shift-enabled
+MULTILIB_OPTIONS += mdivide-enabled msign-extend-enabled
+
+MULTILIB_EXCEPTIONS =
+# MULTILIB_EXCEPTIONS += mmultiply-enabled/mbarrel-shift-enabled/mdivide-enabled/msign-extend-enabled
+MULTILIB_EXCEPTIONS += mmultiply-enabled/mbarrel-shift-enabled/mdivide-enabled
+MULTILIB_EXCEPTIONS += mmultiply-enabled/mbarrel-shift-enabled/msign-extend-enabled
+# MULTILIB_EXCEPTIONS += mmultiply-enabled/mbarrel-shift-enabled
+MULTILIB_EXCEPTIONS += mmultiply-enabled/mdivide-enabled/msign-extend-enabled
+MULTILIB_EXCEPTIONS += mmultiply-enabled/mdivide-enabled
+MULTILIB_EXCEPTIONS += mmultiply-enabled/msign-extend-enabled
+# MULTILIB_EXCEPTIONS += mmultiply-enabled
+MULTILIB_EXCEPTIONS += mbarrel-shift-enabled/mdivide-enabled/msign-extend-enabled
+MULTILIB_EXCEPTIONS += mbarrel-shift-enabled/mdivide-enabled
+MULTILIB_EXCEPTIONS += mbarrel-shift-enabled/msign-extend-enabled
+# MULTILIB_EXCEPTIONS += mbarrel-shift-enabled
+MULTILIB_EXCEPTIONS += mdivide-enabled/msign-extend-enabled
+MULTILIB_EXCEPTIONS += mdivide-enabled
+MULTILIB_EXCEPTIONS += msign-extend-enabled
diff --git a/gcc-4.9/gcc/config/lm32/uclinux-elf.h b/gcc-4.9/gcc/config/lm32/uclinux-elf.h
new file mode 100644
index 000000000..591825644
--- /dev/null
+++ b/gcc-4.9/gcc/config/lm32/uclinux-elf.h
@@ -0,0 +1,78 @@
+/* Definitions for LM32 running Linux-based GNU systems using ELF
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Philip Blundell <philb@gnu.org>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* elfos.h should have already been included.  Now just override
+   any conflicting definitions and add any extras.  */
+
+/* Do not assume anything about header files.  */
+#undef NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+/* Now we define the strings used to build the spec file.  */
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:-lc} "
+
+#define LIBGCC_SPEC "-lgcc"
+
+/* Provide a STARTFILE_SPEC appropriate for GNU/Linux.  Here we add
+   the GNU/Linux magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+                       %{!p:%{profile:gcrt1.o%s} \
+                         %{!profile:crt1.o%s}}}} \
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
+   the GNU/Linux magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU/Linux "finalizer" file, `crtn.o'.  */
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} \
+   %{static:-Bstatic} \
+   %{shared:-shared} \
+   %{symbolic:-Bsymbolic} \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker /lib/ld-linux.so.2"
+
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{G*} %{!fno-PIC:-fPIC}"
diff --git a/gcc-4.9/gcc/config/lynx.h b/gcc-4.9/gcc/config/lynx.h
new file mode 100644
index 000000000..3a750317c
--- /dev/null
+++ b/gcc-4.9/gcc/config/lynx.h
@@ -0,0 +1,176 @@
+/* Target independent definitions for LynxOS.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* In this file we set up defaults that can be chosen by
+   <target>/lynx.h files.  A target-specific lynx.h file can decide
+   either to define and override these definitions or to use them by
+   ensuring they are undefined at this point.  If we were to #undef
+   them here we might accidentally disable some target-specific
+   defines.  */
+
+#ifndef EXTRA_OS_LYNX_TARGET_SPECS
+# define EXTRA_OS_LYNX_TARGET_SPECS
+#endif
+
+#ifndef EXTRA_OS_LYNX_SPECS
+# define EXTRA_OS_LYNX_SPECS \
+  { "cpp_os_lynx", CPP_OS_LYNX_SPEC }, \
+  { "lib_os_lynx", LIB_OS_LYNX_SPEC }, \
+  { "link_os_lynx", LINK_OS_LYNX_SPEC }, \
+  { "startfile_os_lynx", STARTFILE_OS_LYNX_SPEC }, \
+  { "endfile_os_lynx", ENDFILE_OS_LYNX_SPEC }, \
+  EXTRA_OS_LYNX_TARGET_SPECS
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+# define SUBTARGET_EXTRA_SPECS EXTRA_OS_LYNX_SPECS
+#endif
+
+#ifndef CPP_SPEC
+# define CPP_SPEC "%(cpp_cpu) %(cpp_os_lynx)"
+#endif
+
+#ifndef LIB_SPEC
+# define LIB_SPEC "%(lib_os_lynx)"
+#endif
+
+#ifndef LINK_SPEC
+# define LINK_SPEC "%(link_os_lynx)"
+#endif
+
+#ifndef STARTFILE_SPEC
+# define STARTFILE_SPEC "%(startfile_os_lynx)"
+#endif
+
+#ifndef ENDFILE_SPEC
+# define ENDFILE_SPEC "%(endfile_os_lynx)"
+#endif
+
+#ifndef CPP_OS_LYNX_SPEC
+# define CPP_OS_LYNX_SPEC \
+"%{mthreads: \
+   %{mlegacy-threads: \
+     %ecannot use mthreads and mlegacy-threads together}} \
+ %{mthreads: -D_MULTITHREADED} \
+ %{mlegacy-threads: -D_THREADS_POSIX4ad4} \
+ -Asystem=lynx -Asystem=unix -D__Lynx__ -D__unix__"
+#endif
+
+#ifndef LIB_OS_LYNX_SPEC
+# define LIB_OS_LYNX_SPEC \
+"%{mlegacy-threads:-lposix-pre1c} -lm -lc"
+#endif
+
+/* We link static executables for LynxOS by default unless -mshared is
+   used when linking an executable.  Along the same line, we link to
+   shared libraries when linking a shared object by default unless
+   -static is used.
+
+   We have to pass in our -L options here otherwise the translated
+   startfile directories (%D) will take priority over this.
+   Furthermore since we have to pass in -L options here we have to
+   make sure that -L options provided by the user take priority over
+   everything we specify.  */
+
+#ifndef LINK_OS_LYNX_SPEC
+# define LINK_OS_LYNX_SPEC \
+"%{shared} %{static} \
+ %{mshared: %{static: %ecannot use mshared and static together}} \
+ %{!mshared: %{!shared: %{!static: -static}}} \
+ %{L*} \
+ %{mthreads: \
+   %{mshared: -L/lib/thread/shlib -rpath /lib/thread/shlib} \
+   %{shared: \
+     %{!static: -L/lib/thread/shlib -rpath /lib/thread/shlib} \
+   %{!mshared: -L/lib/thread}} \
+   %{shared: %{static: -L/lib/thread}}} \
+ %{!mthreads: \
+   %{mshared: -L/lib/shlib -rpath /lib/shlib} \
+   %{shared: -L/lib/shlib -rpath /lib/shlib}} \
+ %{mlegacy-threads:-lposix-pre1c} -lm -lc"
+#endif
+
+#ifndef STARTFILE_OS_LYNX_SPEC
+# define STARTFILE_OS_LYNX_SPEC \
+"%{!shared: \
+   %{!mthreads: \
+     %{p:gcrt1.o%s} %{pg:gcrt1.o%s} \
+     %{!p:%{!pg:crt1.o%s}}} \
+   %{mthreads: \
+     %{p:thread/gcrt1.o%s} %{pg:thread/gcrt1.o%s} \
+     %{!p:%{!pg:thread/crt1.o%s }}}}\
+ %{mthreads: thread/crti.o%s} %{!mthreads: crti.o%s} \
+ %{!shared: crtbegin.o%s} \
+ %{shared: crtbeginS.o%s}"
+#endif
+
+#ifndef ENDFILE_OS_LYNX_SPEC
+# define ENDFILE_OS_LYNX_SPEC \
+"%{!shared: crtend.o%s} \
+ %{shared: crtendS.o%s} \
+ %{mthreads: thread/crtn.o%s} %{!mthreads: crtn.o%s}"
+#endif
+
+/* Define the actual types of some ANSI-mandated types.  */
+
+#ifndef SIZE_TYPE
+# define SIZE_TYPE "unsigned int"
+#endif
+
+#ifndef  PTRDIFF_TYPE
+# define PTRDIFF_TYPE "int"
+#endif
+
+#ifndef  WCHAR_TYPE
+# define WCHAR_TYPE "long int"
+#endif
+
+#ifndef  WCHAR_TYPE_SIZE
+# define WCHAR_TYPE_SIZE BITS_PER_WORD
+#endif
+
+/* Define ASM_OUTPUT_ALIGN to use the .balign directive rather that
+   the .align directive with GAS.  */
+
+#ifndef ASM_OUTPUT_ALIGN
+# define ASM_OUTPUT_ALIGN(FILE, LOG) 			\
+  do							\
+    {							\
+      if ((LOG) != 0)					\
+	fprintf ((FILE), "\t.balign %d\n", 1 << (LOG));	\
+    }							\
+  while (0)
+#endif
+
+/* Keep the *_DEBUGGING_INFO defines from elfos.h except that stabs is
+   the default on LynxOS.  */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+# define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+#endif
+
+/* We have C++ support in our system headers.  */
+
+#ifndef NO_IMPLICIT_EXTERN_C
+# define NO_IMPLICIT_EXTERN_C
+#endif
+
+#ifndef TARGET_POSIX_IO
+# define TARGET_POSIX_IO
+#endif
diff --git a/gcc-4.9/gcc/config/lynx.opt b/gcc-4.9/gcc/config/lynx.opt
new file mode 100644
index 000000000..657a1ede0
--- /dev/null
+++ b/gcc-4.9/gcc/config/lynx.opt
@@ -0,0 +1,31 @@
+; Processor-independent options for LynxOS.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mlegacy-threads
+Target RejectNegative
+Support legacy multi-threading
+
+mshared
+Target RejectNegative
+Use shared libraries
+
+mthreads
+Target RejectNegative
+Support multi-threading
diff --git a/gcc-4.9/gcc/config/m32c/addsub.md b/gcc-4.9/gcc/config/m32c/addsub.md
new file mode 100644
index 000000000..68cdfefd5
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/addsub.md
@@ -0,0 +1,259 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; add, sub
+
+(define_insn "addqi3"
+  [(set (match_operand:QI 0 "mra_or_sp_operand"
+		  "=SdRhl,SdRhl,??Rmm,??Rmm, *Raa,*Raa,SdRhl,??Rmm")
+	(plus:QI (match_operand:QI 1 "mra_operand"
+		  "%0,0,0,0, 0,0,0,0")
+		 (match_operand:QI 2 "mrai_operand"
+		  "iSdRhl,?Rmm,iSdRhl,?Rmm, iSdRhl,?Rmm,*Raa,*Raa")))]
+  ""
+  "add.b\t%2,%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand"
+	 	  "=SdRhi,SdRhi,??Rmm,??Rmm, SdRhi,??Rmm, Rhi, Raw, Raw, !Rsp")
+	(plus:HI (match_operand:HI 1 "m32c_any_operand"
+		  "%0,0,0,0, 0,0, Raw, Rfb, Rfb, 0")
+		 (match_operand:HI 2 "m32c_any_operand"
+		  "IU2sSdRhi,?Rmm,IU2sSdRhi,?Rmm, IM2,IM2, IS2IU2, I00, IS1, i")))]
+  ""
+  "@
+   add.w\t%2,%0
+   add.w\t%2,%0
+   add.w\t%2,%0
+   add.w\t%2,%0
+   sub.w\t%m2,%0
+   sub.w\t%m2,%0
+   mova\t%d2[%1],%0
+   stc\t%1,%0
+   mova\t%D2[%1],%0
+   add.w\t%2,%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc,oszc,oszc,n,n,n,oszc")]
+  )
+
+(define_insn "addpsi3"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "=Rpi,Raa,SdRpi,SdRpi,Rsp*Rmm, Rpi,Rpi")
+	(plus:PSI (match_operand:PSI 1 "m32c_nonimmediate_operand" "0,0,0,0,0, Raa,Rad")
+		  (match_operand:PSI 2 "m32c_any_operand" "Is3,IS1,iSdRpi,?Rmm,i, i,IS2")))]
+  "TARGET_A24"
+  "@
+   add.l:q\t%2,%0
+   addx\t%2,%0
+   add.l\t%2,%0
+   add.l\t%2,%0
+   add.l\t%2,%0
+   mova\t%d2[%1],%0
+   mova\t%D2[%1],%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc,oszc,n,n")]
+  )
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (plus:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0")
+                 (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24 ||TARGET_A16"
+  ""
+  )
+
+(define_insn "addsi3_1"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,??Rmm,RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (plus:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0,0,0")
+                 (match_operand 2 "mrai_operand" "IU2,IU2,i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  "TARGET_A16"
+  "*
+  
+  switch (which_alternative)
+    { 
+    case 0:
+      return \"add.w %X2,%h0\;adcf.w %H0\";
+    case 1:
+      return \"add.w %X2,%h0\;adcf.w %H0\";
+    case 2:
+      if (GET_CODE (operands[2]) == SYMBOL_REF)
+        {
+          output_asm_insn (\"add.w #%%lo(%d2),%h0\",operands);
+          return \"adc.w #%%hi(%d2),%H0\";
+        }
+      else
+        {
+          output_asm_insn (\"add.w %X2,%h0\",operands);
+          operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+          return \"adc.w %X2,%H0\";
+        }
+    case 3:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    case 4:
+      output_asm_insn (\"add.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"adc.w %X2,%H0\";
+    case 5:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    case 6:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    case 7:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x,x,x")]
+)
+
+(define_insn "addsi3_2"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (plus:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0")
+                 (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24"
+  "add.l\t%2,%0"
+  [(set_attr "flags" "oszc")]
+)
+
+(define_insn "subqi3"
+  [(set (match_operand:QI 0 "mra_or_sp_operand"
+		   "=SdRhl,SdRhl,??Rmm,??Rmm, Raa,Raa,SdRhl,??Rmm, *Rsp")
+	(minus:QI (match_operand:QI 1 "mra_operand"
+		   "0,0,0,0, 0,0,0,0, 0")
+		  (match_operand:QI 2 "mrai_operand"
+		   "iSdRhl,?Rmm,iSdRhl,?Rmm, iSdRhl,?Rmm,Raa,Raa, i")))]
+  ""
+  "sub.b\t%2,%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "mra_operand"
+		   "=SdRhi,SdRhi,??Rmm,??Rmm, SdRhi,??Rmm")
+	(minus:HI (match_operand:HI 1 "mras_operand"
+		   "0,0,0,0, 0,0")
+		  (match_operand:HI 2 "mrai_operand"
+		   "IU2SdRhi,?Rmm,IU2SdRhi,?Rmm, IM2,IM2")))]
+  ""
+  "@
+   sub.w\t%2,%0
+   sub.w\t%2,%0
+   sub.w\t%2,%0
+   sub.w\t%2,%0
+   add.w\t%m2,%0
+   add.w\t%m2,%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc,oszc,oszc")]
+  )
+
+(define_insn "subpsi3"
+  [(set (match_operand:PSI 0 "mra_operand" "=RpiSd,RpiSd,??Rmm,??Rmm")
+	(minus:PSI (match_operand:PSI 1 "mra_operand" "0,0,0,0")
+		   (match_operand:PSI 2 "mrai_operand" "iRpiSd,?Rmm,iRpiSd,?Rmm")))]
+  "TARGET_A24"
+  "sub.%&\t%2,%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (minus:SI (match_operand:SI 1 "mra_operand" "0,0,0,0")
+                  (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24 ||TARGET_A16"
+  ""
+)
+
+(define_insn "subsi3_1"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (minus:SI (match_operand:SI 1 "mra_operand" "0,0,0,0,0,0")
+                  (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  "TARGET_A16"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"sub.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"sbb.w %X2,%H0\";
+    case 1:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"sub.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"sbb.w %X2,%H0\";
+    case 3:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    case 4:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    case 5:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+(define_insn "subsi3_2"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (minus:SI (match_operand:SI 1 "mra_operand" "0,0,0,0")
+                  (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24"
+  "sub.l\t%2,%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc")]
+)
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "mra_operand" "=SdRhl,??Rmm")
+	(neg:QI (match_operand:QI 1 "mra_operand" "0,0")))]
+  ""
+  "neg.b\t%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "mra_operand" "=SdRhi,??Rmm")
+	(neg:HI (match_operand:HI 1 "mra_operand" "0,0")))]
+  ""
+  "neg.w\t%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+; We can negate an SImode by operating on the subparts.  GCC deals
+; with this itself for larger modes, but not SI.
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "mra_operand" "=SdR03,??Rmm")
+	(neg:SI (match_operand:SI 1 "mra_operand" "0,0")))]
+  ""
+  "not.w %h0 | not.w %H0 | add.w #1,%h0 | adcf.w %H0"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "absqi2"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,??Rmm")
+	(abs:QI (match_operand:QI 1 "mra_operand" "0,0")))]
+  ""
+  "abs.b\t%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_insn "abshi2"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+	(abs:HI (match_operand:HI 1 "mra_operand" "0,0")))]
+  ""
+  "abs.w\t%0"
+  [(set_attr "flags" "oszc")]
+  )
diff --git a/gcc-4.9/gcc/config/m32c/bitops.md b/gcc-4.9/gcc/config/m32c/bitops.md
new file mode 100644
index 000000000..e727e2de7
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/bitops.md
@@ -0,0 +1,421 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Bit-wise operations (and, ior, xor, shift)
+
+; On the R8C and M16C, "address" for bit instructions is usually (but
+; not always!) the *bit* address, not the *byte* address.  This
+; confuses gcc, so we avoid cases where gcc would produce the wrong
+; code.  We're left with absolute addresses and registers, and the odd
+; case of shifting a bit by a variable.
+
+; On the M32C, "address" for bit instructions is a regular address,
+; and the bit number is stored in a separate field.  Thus, we can let
+; gcc do more interesting things.  However, the M32C cannot set all
+; the bits in a 16-bit register, which the R8C/M16C can do.
+
+; However, it all means that we end up with two sets of patterns, one
+; for each chip.
+
+;;----------------------------------------------------------------------
+
+;; First off, all the ways we can set one bit, other than plain IOR.
+
+(define_insn "bset_qi"
+  [(set (match_operand:QI 0 "memsym_operand" "+Si")
+	(ior:QI (subreg:QI (ashift:HI (const_int 1)
+				      (subreg:QI (match_operand:HI 1 "a_qi_operand" "Raa") 0)) 0)
+		(match_dup 0)))]
+  "TARGET_A16"
+  "bset\t%0[%1]"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "bset_hi"
+  [(set (zero_extract:HI (match_operand:QI 0 "memsym_operand" "+Si")
+			 (const_int 1)
+			 (zero_extend:HI (subreg:QI (match_operand:HI 1 "a_qi_operand" "Raa") 0)))
+	(const_int 1))]
+  "TARGET_A16"
+  "bset\t%0[%1]"
+  [(set_attr "flags" "n")]
+  )  
+
+;;----------------------------------------------------------------------
+
+;; Now all the ways we can clear one bit, other than plain AND.
+
+; This is odd because the shift patterns use QI counts, but we can't
+; easily put QI in $aN without causing problems elsewhere.
+(define_insn "bclr_qi"
+  [(set (zero_extract:HI (match_operand:QI 0 "memsym_operand" "+Si")
+			 (const_int 1)
+			 (zero_extend:HI (subreg:QI (match_operand:HI 1 "a_qi_operand" "Raa") 0)))
+	(const_int 0))]
+  "TARGET_A16"
+  "bclr\t%0[%1]"
+  [(set_attr "flags" "n")]
+  )  
+
+
+;;----------------------------------------------------------------------
+
+;; Now the generic patterns.
+
+(define_insn "andqi3_16"
+  [(set (match_operand:QI 0 "mra_operand" "=Sp,Rqi,RhlSd,RhlSd,??Rmm,??Rmm")
+	(and:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		(match_operand 2 "mrai_operand" "Imb,Imb,iRhlSd,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A16"
+  "@
+   bclr\t%B2,%0
+   bclr\t%B2,%h0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0"
+  [(set_attr "flags" "n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "andhi3_16"
+  [(set (match_operand:HI 0 "mra_operand" "=Sp,Sp,Rhi,RhiSd,??Rmm,RhiSd,??Rmm")
+	(and:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "ImB,Imw,Imw,iRhiSd,?Rmm,?Rmm,iRhiSd")))]
+  "TARGET_A16"
+  "@
+   
+   bclr\t%B2,%0
+   bclr\t%B2-8,1+%0
+   bclr\t%B2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (and:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0")
+                (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"and.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"and.w %X2,%H0\";
+    case 1:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"and.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"and.w %X2,%H0\";
+    case 3:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    case 4:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    case 5:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+
+(define_insn "iorqi3_16"
+  [(set (match_operand:QI 0 "mra_operand" "=Sp,Rqi,RqiSd,??Rmm,RqiSd,??Rmm")
+	(ior:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		(match_operand:QI 2 "mrai_operand" "Ilb,Ilb,iRhlSd,iRhlSd,?Rmm,?Rmm")))]
+  "TARGET_A16"
+  "@
+   bset\t%B2,%0
+   bset\t%B2,%h0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0"
+  [(set_attr "flags" "n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "iorhi3_16"
+  [(set (match_operand:HI 0 "mra_operand" "=Sp,Sp,Rhi,RhiSd,RhiSd,??Rmm,??Rmm")
+	(ior:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "Ilb,Ilw,Ilw,iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A16"
+  "@
+   bset %B2,%0
+   bset\t%B2-8,1+%0
+   bset\t%B2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,sz,sz,sz,sz")]
+  )
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+(define_insn "andqi3_24"
+  [(set (match_operand:QI 0 "mra_operand" "=Sd,Rqi,RhlSd,RhlSd,??Rmm,??Rmm")
+	(and:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		(match_operand 2 "mrai_operand" "Imb,Imb,iRhlSd,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A24"
+  "@
+   bclr\t%B2,%0
+   bclr\t%B2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0"
+  [(set_attr "flags" "n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "andhi3_24"
+  [(set (match_operand:HI 0 "mra_operand" "=Sd,Sd,?Rhl,?Rhl,RhiSd,??Rmm,RhiSd,??Rmm")
+	(and:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "ImB,Imw,ImB,Imw,iRhiSd,?Rmm,?Rmm,iRhiSd")))]
+  "TARGET_A24"
+  "@
+   bclr\t%B2,%0
+   bclr\t%B2-8,1+%0
+   bclr\t%B2,%h0
+   bclr\t%B2-8,%H0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,n,sz,sz,sz,sz")]
+  )
+
+
+
+(define_insn "iorqi3_24"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd,RqiSd,??Rmm,RqiSd,??Rmm")
+	(ior:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0")
+		(match_operand:QI 2 "mrai_operand" "Ilb,iRhlSd,iRhlSd,?Rmm,?Rmm")))]
+  "TARGET_A24"
+  "@
+   bset\t%B2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0"
+  [(set_attr "flags" "n,sz,sz,sz,sz")]
+  )
+
+(define_insn "iorhi3_24"
+  [(set (match_operand:HI 0 "mra_operand" "=Sd,Sd,?Rhl,?Rhl,RhiSd,RhiSd,??Rmm,??Rmm")
+	(ior:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "Ilb,Ilw,Ilb,Ilw,iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A24"
+  "@
+   bset\t%B2,%0
+   bset\t%B2-8,1+%0
+   bset\t%B2,%h0
+   bset\t%B2-8,%H0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,n,sz,sz,sz,sz")]
+  )
+
+
+; ----------------------------------------------------------------------
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "mra_operand" "")
+	(and:QI (match_operand:QI 1 "mra_operand" "")
+		(match_operand:QI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_andqi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_andqi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "mra_operand" "")
+	(and:HI (match_operand:HI 1 "mra_operand" "")
+		(match_operand:HI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_andhi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_andhi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI 0 "mra_operand" "")
+	(ior:QI (match_operand:QI 1 "mra_operand" "")
+		(match_operand:QI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_iorqi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_iorqi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_expand "iorhi3"
+  [(set (match_operand:HI 0 "mra_operand" "")
+	(ior:HI (match_operand:HI 1 "mra_operand" "")
+		(match_operand:HI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_iorhi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_iorhi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (ior:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0")
+                (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"or.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"or.w %X2,%H0\";
+    case 1:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"or.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"or.w %X2,%H0\";
+    case 3:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    case 4:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    case 5:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,RhlSd,??Rmm,??Rmm")
+	(xor:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0")
+		(match_operand:QI 2 "mrai_operand" "iRhlSd,?Rmm,iRhlSd,?Rmm")))]
+  ""
+  "xor.b\t%x2,%0"
+  [(set_attr "flags" "sz,sz,sz,sz")]
+  )
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm")
+	(xor:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  ""
+  "xor.w\t%X2,%0"
+  [(set_attr "flags" "sz,sz,sz,sz")]
+  )
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (xor:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0")
+                (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"xor.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"xor.w %X2,%H0\";
+    case 1:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"xor.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"xor.w %X2,%H0\";
+    case 3:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    case 4:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    case 5:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,??Rmm")
+	(not:QI (match_operand:QI 1 "mra_operand" "0,0")))]
+  ""
+  "not.b\t%0"
+  [(set_attr "flags" "sz,sz")]
+  )
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+	(not:HI (match_operand:HI 1 "mra_operand" "0,0")))]
+  ""
+  "not.w\t%0"
+  [(set_attr "flags" "sz,sz")]
+  )
+
+; Optimizations using bit opcodes
+
+; We need this because combine only looks at three insns at a time,
+; and the bclr_qi pattern uses four - mov, shift, not, and.  GCC
+; should never expand this pattern, because it only shifts a constant
+; by a constant, so gcc should do that itself.
+(define_insn "shift1_qi"
+  [(set (match_operand:QI 0 "mra_operand" "=Rqi")
+	(ashift:QI (const_int 1)
+		   (match_operand 1 "const_int_operand" "In4")))]
+  ""
+  "mov.b\t#1,%0\n\tshl.b\t%1,%0"
+  )
+(define_insn "shift1_hi"
+  [(set (match_operand:HI 0 "mra_operand" "=Rhi")
+	(ashift:HI (const_int 1)
+		   (match_operand 1 "const_int_operand" "In4")))]
+  ""
+  "mov.w\t#1,%0\n\tshl.w\t%1,%0"
+  )
+
+; Generic insert-bit expander, needed so that we can use the bit
+; opcodes for volatile bitfields.
+
+(define_expand "insv"
+  [(set (zero_extract:HI (match_operand:HI 0 "mra_operand" "")
+			 (match_operand 1 "const_int_operand" "")
+			 (match_operand 2 "const_int_operand" ""))
+	(match_operand:HI 3 "const_int_operand" ""))]
+  ""
+  "if (m32c_expand_insv (operands))
+     FAIL;
+   DONE;"
+  )
diff --git a/gcc-4.9/gcc/config/m32c/blkmov.md b/gcc-4.9/gcc/config/m32c/blkmov.md
new file mode 100644
index 000000000..00bc761ee
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/blkmov.md
@@ -0,0 +1,241 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; various block move instructions
+
+;; R8C:
+;;  SMOVB - while (r3--) { *a1-- = *r1ha0--; } - memcpy
+;;  SMOVF - while (r3--) { *a1++ = *r1ha0++; } - memcpy
+;;  SSTR  - while (r3--) { *a1++ = [r0l,r0]; } - memset
+
+;; M32CM:
+;;  SCMPU - while (*a0 && *a0 != *a1) { a0++; a1++; } - strcmp
+;;  SIN   - while (r3--) { *a1++ = *a0; }
+;;  SMOVB - while (r3--) { *a1-- = *a0--; } - memcpy
+;;  SMOVF - while (r3--) { *a1++ = *a0++; } - memcpy
+;;  SMOVU - while (*a1++ = *a0++) ; - strcpy
+;;  SOUT  - while (r3--) { *a1 = *a0++; }
+;;  SSTR  - while (r3--) { *a1++ = [r0l,r0]; } - memset
+
+
+
+;; 0 = destination (mem:BLK ...)
+;; 1 = source (mem:BLK ...)
+;; 2 = count
+;; 3 = alignment
+(define_expand "movmemhi"
+  [(match_operand 0 "ap_operand" "")
+   (match_operand 1 "ap_operand" "")
+   (match_operand 2 "m32c_r3_operand" "")
+   (match_operand 3 "" "")
+   ]
+  ""
+  "if (m32c_expand_movmemhi(operands)) DONE; FAIL;"
+  )
+
+;; We can't use mode iterators for these because M16C uses r1h to extend
+;; the source address, for copying data from ROM to RAM.  We don't yet
+;; support that, but we need to zero our r1h, so the patterns differ.
+
+;; 0 = dest (out)
+;; 1 = src (out)
+;; 2 = count (out)
+;; 3 = dest (in)
+;; 4 = src (in)
+;; 5 = count (in)
+(define_insn "movmemhi_bhi_op"
+  [(set (mem:QI (match_operand:HI 3 "ap_operand" "0"))
+	(mem:QI (match_operand:HI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HI 0 "ap_operand" "=Ra1")
+	(plus:HI (match_dup 3)
+		  (zero_extend:HI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:HI 1 "ap_operand" "=Ra0")
+	(plus:HI (match_dup 4)
+		  (zero_extend:HI (match_dup 5))))
+   (use (reg:HI R1_REGNO))]
+  "TARGET_A16"
+  "mov.b:q\t#0,r1h\n\tsmovf.b\t; %0[0..%2-1]=r1h%1[]"
+  )
+(define_insn "movmemhi_bpsi_op"
+  [(set (mem:QI (match_operand:PSI 3 "ap_operand" "0"))
+	(mem:QI (match_operand:PSI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:PSI 0 "ap_operand" "=Ra1")
+	(plus:PSI (match_dup 3)
+		  (zero_extend:PSI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:PSI 1 "ap_operand" "=Ra0")
+	(plus:PSI (match_dup 4)
+		  (zero_extend:PSI (match_dup 5))))]
+  "TARGET_A24"
+  "smovf.b\t; %0[0..%2-1]=%1[]"
+  )
+(define_insn "movmemhi_whi_op"
+  [(set (mem:HI (match_operand:HI 3 "ap_operand" "0"))
+	(mem:HI (match_operand:HI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HI 0 "ap_operand" "=Ra1")
+	(plus:HI (match_dup 3)
+		  (zero_extend:HI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:HI 1 "ap_operand" "=Ra0")
+	(plus:HI (match_dup 4)
+		  (zero_extend:HI (match_dup 5))))
+   (use (reg:HI R1_REGNO))]
+  "TARGET_A16"
+  "mov.b:q\t#0,r1h\n\tsmovf.w\t; %0[0..%2-1]=r1h%1[]"
+  )
+(define_insn "movmemhi_wpsi_op"
+  [(set (mem:HI (match_operand:PSI 3 "ap_operand" "0"))
+	(mem:HI (match_operand:PSI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:PSI 0 "ap_operand" "=Ra1")
+	(plus:PSI (match_dup 3)
+		  (zero_extend:PSI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:PSI 1 "ap_operand" "=Ra0")
+	(plus:PSI (match_dup 4)
+		  (zero_extend:PSI (match_dup 5))))]
+  "TARGET_A24"
+  "smovf.w\t; %0[0..%2-1]=%1[]"
+  )
+
+
+
+;; 0 = destination (mem:BLK ...)
+;; 1 = number of bytes
+;; 2 = value to store
+;; 3 = alignment
+(define_expand "setmemhi"
+  [(match_operand 0 "ap_operand" "")
+   (match_operand 1 "m32c_r3_operand" "")
+   (match_operand 2 "m32c_r0_operand" "")
+   (match_operand 3 "" "")
+   ]
+  "TARGET_A24"
+  "if (m32c_expand_setmemhi(operands)) DONE; FAIL;"
+  )
+
+;; 0 = address (out)
+;; 1 = count (out)
+;; 2 = value (in)
+;; 3 = address (in)
+;; 4 = count (in)
+(define_insn "setmemhi_b<mode>_op"
+  [(set (mem:QI (match_operand:HPSI 3 "ap_operand" "0"))
+	(match_operand:QI 2 "m32c_r0_operand" "R0w"))
+   (set (match_operand:HI 1 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HPSI 0 "ap_operand" "=Ra1")
+	(plus:HPSI (match_dup 3)
+		  (zero_extend:HPSI (match_operand:HI 4 "m32c_r3_operand" "1"))))]
+  "TARGET_A24"
+  "sstr.b\t; %0[0..%1-1]=%2"
+  )
+
+(define_insn "setmemhi_w<mode>_op"
+  [(set (mem:HI (match_operand:HPSI 3 "ap_operand" "0"))
+	(match_operand:HI 2 "m32c_r0_operand" "R0w"))
+   (set (match_operand:HI 1 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HPSI 0 "ap_operand" "=Ra1")
+	(plus:HPSI (match_dup 3)
+		  (zero_extend:HPSI (match_operand:HI 4 "m32c_r3_operand" "1"))))]
+  "TARGET_A24"
+  "sstr.w\t; %0[0..%1-1]=%2"
+  )
+
+
+;; SCMPU sets the flags according to the result of the string
+;; comparison.  GCC wants the result to be a signed value reflecting
+;; the result, which it then compares to zero.  Hopefully we can
+;; optimize that later (see peephole in cond.md).  Meanwhile, the
+;; strcmp builtin is expanded to a SCMPU followed by a flags-to-int
+;; pattern in cond.md.
+
+;; 0 = result:HI
+;; 1 = destination (mem:BLK ...)
+;; 2 = source (mem:BLK ...)
+;; 3 = alignment
+
+(define_expand "cmpstrsi"
+  [(match_operand:HI 0 "" "")
+   (match_operand 1 "ap_operand" "")
+   (match_operand 2 "ap_operand" "")
+   (match_operand 3 "" "")
+   ]
+  "TARGET_A24"
+  "if (m32c_expand_cmpstr(operands)) DONE; FAIL;"
+  )
+
+;; 0 = string1
+;; 1 = string2
+
+(define_insn "cmpstrhi_op"
+  [(set (reg:CC FLG_REGNO)
+	(compare:CC (mem:BLK (match_operand:PSI 0 "ap_operand" "Ra0"))
+		    (mem:BLK (match_operand:PSI 1 "ap_operand" "Ra1"))))
+   (clobber (match_operand:PSI 2 "ap_operand" "=0"))
+   (clobber (match_operand:PSI 3 "ap_operand" "=1"))]
+  "TARGET_A24"
+  "scmpu.b\t; flags := strcmp(*%0,*%1)"
+  [(set_attr "flags" "oszc")]
+  )
+
+
+
+;; Note that SMOVU leaves the address registers pointing *after*
+;; the NUL at the end of the string.  This is not what gcc expects; it
+;; expects the address registers to point *at* the NUL.  The expander
+;; must emit a suitable add insn.
+
+;; 0 = target: set to &NUL in dest
+;; 1 = destination (mem:BLK ...)
+;; 2 = source (mem:BLK ...)
+
+(define_expand "movstr"
+  [(match_operand 0 "m32c_nonimmediate_operand" "")
+   (match_operand 1 "" "")
+   (match_operand 2 "" "")
+   ]
+  "TARGET_A24"
+  "if (m32c_expand_movstr(operands)) DONE; FAIL;"
+  )
+
+;; 0 = dest (out)
+;; 1 = src (out) (clobbered)
+;; 2 = dest (in)
+;; 3 = src (in)
+(define_insn "movstr_op"
+  [(set (mem:BLK (match_operand:PSI 2 "ap_operand" "0"))
+	(mem:BLK (match_operand:PSI 3 "ap_operand" "1")))
+   (set (match_operand:PSI 0 "ap_operand" "=Ra1")
+	(plus:PSI (match_dup 2)
+		  (unspec:PSI [(const_int 0)] UNS_SMOVU)))
+   (set (match_operand:PSI 1 "ap_operand" "=Ra0")
+	(plus:PSI (match_dup 3)
+		  (unspec:PSI [(const_int 0)] UNS_SMOVU)))]
+  "TARGET_A24"
+  "smovu.b\t; while (*%2++ := *%3++) != 0"
+  [(set_attr "flags" "*")]
+  )
+  
diff --git a/gcc-4.9/gcc/config/m32c/cond.md b/gcc-4.9/gcc/config/m32c/cond.md
new file mode 100644
index 000000000..5f3fd1618
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/cond.md
@@ -0,0 +1,309 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; conditionals - cmp, jcc, setcc, etc.
+
+; Special note about conditional instructions: GCC always emits the
+; compare right before the insn, which is good, because m32c's mov
+; insns modify the flags.  However, this means that any conditional
+; insn that may require reloading must be kept with its compare until
+; after reload finishes, else the reload insns might clobber the
+; flags.  Thus, these rules:
+;
+; * the cmp* expanders just save the operands in compare_op0 and
+;   compare_op1 via m32c_pend_compare.
+; * conditional insns that won't need reload can call
+;   m32c_unpend_compare before their expansion.
+; * other insns must expand to include the compare operands within,
+;   then split after reload to a separate compare and conditional.
+
+; Until support for relaxing is supported in gas, we must assume that
+; short labels won't reach, so we must use long labels.
+; Unfortunately, there aren't any conditional jumps with long labels,
+; so instead we invert the conditional and jump around a regular jump.
+
+; Note that we can, at some point in the future, add code to omit the
+; "cmp" portion of the insn if the preceding insn happened to set the
+; right flags already.  For example, a mov followed by a "cmp *,0" is
+; redundant; the move already set the Z flag.
+
+(define_insn_and_split "cbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "m32c_cmp_operator"
+			      [(match_operand:QHPSI 1 "mra_operand" "RraSd")
+			       (match_operand:QHPSI 2 "mrai_operand" "iRraSd")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 1)
+		 (match_dup 2)))
+   (set (pc) (if_then_else (match_op_dup 0 [(reg:CC FLG_REGNO) (const_int 0)])
+			   (label_ref (match_dup 3))
+			   (pc)))]
+  ""
+  )
+
+(define_insn "bcc_op"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(reg:CC FLG_REGNO) (const_int 0)])
+                      (label_ref (match_operand 1 ""))
+                      (pc)))]
+  ""
+  "j%c0\t%l1"
+  [(set_attr "flags" "n")]
+)
+
+(define_insn "stzx_16"
+  [(set (match_operand:QI 0 "register_operand" "=R0w,R0w,R0w")
+	(if_then_else:QI (eq (reg:CC FLG_REGNO) (const_int 0))
+			 (match_operand:QI 1 "const_int_operand" "i,i,0")
+			 (match_operand:QI 2 "const_int_operand" "i,0,i")))]
+  "TARGET_A16 && reload_completed"
+  "@
+   stzx\t%1,%2,%0
+   stz\t%1,%0
+   stnz\t%2,%0"
+  [(set_attr "flags" "n,n,n")]
+)
+
+(define_insn "stzx_24_<mode>"
+  [(set (match_operand:QHI 0 "mra_operand" "=RraSd,RraSd,RraSd")
+	(if_then_else:QHI (eq (reg:CC FLG_REGNO) (const_int 0))
+			 (match_operand:QHI 1 "const_int_operand" "i,i,0")
+			 (match_operand:QHI 2 "const_int_operand" "i,0,i")))]
+  "TARGET_A24 && reload_completed"
+  "@
+   stzx.<bwl>\t%1,%2,%0
+   stz.<bwl>\t%1,%0
+   stnz.<bwl>\t%2,%0"
+  [(set_attr "flags" "n,n,n")])
+
+(define_insn_and_split "stzx_reversed_<mode>"
+  [(set (match_operand:QHI 0 "m32c_r0_operand" "=R0w")
+	(if_then_else:QHI (ne (reg:CC FLG_REGNO) (const_int 0))
+			 (match_operand:QHI 1 "const_int_operand" "")
+			 (match_operand:QHI 2 "const_int_operand" "")))]
+  "(TARGET_A24 || GET_MODE (operands[0]) == QImode) && reload_completed"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(if_then_else:QHI (eq (reg:CC FLG_REGNO) (const_int 0))
+		      (match_dup 2)
+		      (match_dup 1)))]
+  ""
+  )
+
+
+(define_insn "cmp<mode>_op"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_operand:QHPSI 0 "mra_operand" "RraSd")
+		 (match_operand:QHPSI 1 "mrai_operand" "RraSdi")))]
+  ""
+  "* return m32c_output_compare(insn, operands); "
+  [(set_attr "flags" "oszc")])
+
+;; m32c_conditional_register_usage changes the setcc_gen_code array to
+;; point to the _24 variants if needed.
+
+;; We need to keep the compare and conditional sets together through
+;; reload, because reload might need to add address reloads to the
+;; set, which would clobber the flags.  By keeping them together, the
+;; reloads get put before the compare, thus preserving the flags.
+
+;; These are the post-split patterns for the conditional sets.
+
+(define_insn "scc_op"
+  [(set (match_operand:QI 0 "register_operand" "=Rqi")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  "TARGET_A16 && reload_completed"
+  "* return m32c_scc_pattern(operands, GET_CODE (operands[1]));")
+
+(define_insn "scc_24_op"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd")
+	(match_operator:HI 1 "ordered_comparison_operator"
+	 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  "TARGET_A24 && reload_completed"
+  "sc%c1\t%0"
+  [(set_attr "flags" "n")]
+)
+
+;; These are the pre-split patterns for the conditional sets.
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(match_operand:QHPSI 2 "mra_operand")
+	  (match_operand:QHPSI 3 "mrai_operand")]))]
+  ""
+{
+  if (TARGET_A24)
+    {
+      rtx o = gen_reg_rtx (HImode);
+      emit_insn (gen_cstore<mode>4_24 (o, operands[1],
+				       operands[2], operands[3]));
+      emit_move_insn (operands[0], gen_lowpart (QImode, o));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*cstore<mode>4_16"
+  [(set (match_operand:QI 0 "register_operand" "=Rqi")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(match_operand:QHPSI 2 "mra_operand" "RraSd")
+	  (match_operand:QHPSI 3 "mrai_operand" "RraSdi")]))]
+  "TARGET_A16"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 2)
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  ""
+  [(set_attr "flags" "x")]
+)
+
+(define_insn_and_split "cstore<mode>4_24"
+  [(set (match_operand:HI 0 "mra_nopp_operand" "=RhiSd")
+	(match_operator:HI 1 "ordered_comparison_operator"
+	 [(match_operand:QHPSI 2 "mra_operand" "RraSd")
+	  (match_operand:QHPSI 3 "mrai_operand" "RraSdi")]))]
+  "TARGET_A24"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 2)
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  ""
+  [(set_attr "flags" "x")]
+)
+
+(define_insn_and_split "movqicc_<code>_<mode>"
+  [(set (match_operand:QI 0 "register_operand" "=R0w")
+        (if_then_else:QI (eqne_cond:QI (match_operand:QHPSI 1 "mra_operand" "RraSd")
+				       (match_operand:QHPSI 2 "mrai_operand" "RraSdi"))
+			  (match_operand:QI 3 "const_int_operand" "")
+			  (match_operand:QI 4 "const_int_operand" "")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:QI (eqne_cond:QI (reg:CC FLG_REGNO) (const_int 0))
+			 (match_dup 3)
+			 (match_dup 4)))]
+  ""
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn_and_split "movhicc_<code>_<mode>"
+  [(set (match_operand:HI 0 "register_operand" "=R0w")
+        (if_then_else:HI (eqne_cond:HI (match_operand:QHPSI 1 "mra_operand" "RraSd")
+				       (match_operand:QHPSI 2 "mrai_operand" "RraSdi"))
+			  (match_operand:QI 3 "const_int_operand" "")
+			  (match_operand:QI 4 "const_int_operand" "")))]
+  "TARGET_A24"
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:HI (eqne_cond:HI (reg:CC FLG_REGNO) (const_int 0))
+			 (match_dup 3)
+			 (match_dup 4)))]
+  ""
+  [(set_attr "flags" "x")]
+  )
+
+;; And these are the expanders.
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (if_then_else:QI (match_operand 1 "m32c_eqne_operator" "")
+                         (match_operand:QI 2 "const_int_operand" "")
+                         (match_operand:QI 3 "const_int_operand" "")))]
+  ""
+  "if (m32c_expand_movcc(operands))
+     FAIL;
+   DONE;"
+)
+
+(define_expand "movhicc"
+  [(set (match_operand:HI 0 "mra_operand" "")
+        (if_then_else:HI (match_operand 1 "m32c_eqne_operator" "")
+                         (match_operand:HI 2 "const_int_operand" "")
+                         (match_operand:HI 3 "const_int_operand" "")))]
+  "TARGET_A24"
+  "if (m32c_expand_movcc(operands))
+     FAIL;
+   DONE;"
+)
+
+
+;; CMP opcodes subtract two values, set the flags, and discard the
+;; value.  This pattern recovers the sign of the discarded value based
+;; on the flags.  Operand 0 is set to -1, 0, or 1.  This is used for
+;; the cmpstr pattern.  For optimal code, this should be removed if
+;; followed by a suitable CMP insn (see the peephole following).  This
+;; pattern is 7 bytes and 5 cycles.  If you don't need specific
+;; values, a 5/4 pattern can be made with SCGT and BMLT to set the
+;; appropriate bits.
+
+(define_insn "cond_to_int"
+  [(set (match_operand:HI 0 "mra_qi_operand" "=Rqi")
+	(if_then_else:HI (lt (reg:CC FLG_REGNO) (const_int 0))
+			 (const_int -1)
+			 (if_then_else:HI (eq (reg:CC FLG_REGNO) (const_int 0))
+					  (const_int 0)
+					  (const_int -1))))]
+  "TARGET_A24"
+  "sceq\t%0\n\tbmgt\t1,%h0\n\tdec.w\t%0"
+  [(set_attr "flags" "x")]
+  )  
+
+;; A cond_to_int followed by a compare against zero is essentially a
+;; no-op.  However, the result of the cond_to_int may be used by later
+;; insns, so make sure it's dead before deleting its set.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "mra_qi_operand" "")
+	(if_then_else:HI (lt (reg:CC FLG_REGNO) (const_int 0))
+			 (const_int -1)
+			 (if_then_else:HI (eq (reg:CC FLG_REGNO) (const_int 0))
+					  (const_int 0)
+					  (const_int -1))))
+   (set (reg:CC FLG_REGNO)
+	(compare (match_operand:HI 1 "mra_qi_operand" "")
+		 (const_int 0)))
+   ]
+  "rtx_equal_p (operands[0], operands[1])
+     && dead_or_set_p (peep2_next_insn (1), operands[0])"
+  [(const_int 1)]
+  "")
diff --git a/gcc-4.9/gcc/config/m32c/constraints.md b/gcc-4.9/gcc/config/m32c/constraints.md
new file mode 100644
index 000000000..7ac0bf948
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/constraints.md
@@ -0,0 +1,225 @@
+;; m32c constraints
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "Rsp" "SP_REGS"
+  "@internal")
+
+(define_register_constraint "Rfb" "FB_REGS"
+  "@internal")
+
+(define_register_constraint "Rsb" "SB_REGS"
+  "@internal")
+
+(define_register_constraint "Rcr" "TARGET_A16 ? CR_REGS : NO_REGS"
+  "@internal")
+
+(define_register_constraint "Rcl" "TARGET_A24 ? CR_REGS : NO_REGS"
+  "@internal")
+
+(define_register_constraint "R0w" "R0_REGS"
+  "@internal")
+
+(define_register_constraint "R1w" "R1_REGS"
+  "@internal")
+
+(define_register_constraint "R2w" "R2_REGS"
+  "@internal")
+
+(define_register_constraint "R3w" "R3_REGS"
+  "@internal")
+
+(define_register_constraint "R02" "R02_REGS"
+  "@internal")
+
+(define_register_constraint "R13" "R13_REGS"
+  "@internal")
+
+(define_register_constraint "R03" "R03_REGS"
+  "@internal")
+
+(define_register_constraint "Rdi" "DI_REGS"
+  "@internal")
+
+(define_register_constraint "Rhl" "HL_REGS"
+  "@internal")
+
+(define_register_constraint "R23" "R23_REGS"
+  "@internal")
+
+(define_register_constraint "Ra0" "A0_REGS"
+  "@internal")
+
+(define_register_constraint "Ra1" "A1_REGS"
+  "@internal")
+
+(define_register_constraint "Raa" "A_REGS"
+  "@internal")
+
+(define_register_constraint "Raw" "TARGET_A16 ? A_REGS : NO_REGS"
+  "@internal")
+
+(define_register_constraint "Ral" "TARGET_A24 ? A_REGS : NO_REGS"
+  "@internal")
+
+(define_register_constraint "Rqi" "QI_REGS"
+  "@internal")
+
+(define_register_constraint "Rad" "AD_REGS"
+  "@internal")
+
+(define_register_constraint "Rsi" "SI_REGS"
+  "@internal")
+
+(define_register_constraint "Rhi" "HI_REGS"
+  "@internal")
+
+(define_register_constraint "Rhc" "HC_REGS"
+  "@internal")
+
+(define_register_constraint "Rra" "RA_REGS"
+  "@internal")
+
+(define_register_constraint "Rfl" "FLG_REGS"
+  "@internal")
+
+(define_register_constraint "Rmm" "fixed_regs[MEM0_REGNO] ? NO_REGS : MEM_REGS"
+  "@internal")
+
+(define_register_constraint "Rpi" "TARGET_A16 ? HI_REGS : RA_REGS"
+  "@internal")
+
+;;; For integer constant constraints:
+;;; s=signed u=unsigned n=nonzero m=minus l=log2able,
+;;; [sun] bits [SUN] bytes, p=pointer size
+;;; I[-0-9][0-9] matches that number
+
+(define_constraint "Is3"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -8, 7)")))
+
+(define_constraint "IS1"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -128, 127)")))
+
+(define_constraint "IS2"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "IU2"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "IU3"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 0x00ffffff)")))
+
+(define_constraint "In4"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -8, 8) && ival")))
+
+(define_constraint "In5"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -16, 16) && ival")))
+
+(define_constraint "In6"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32, 32) && ival")))
+
+(define_constraint "IM2"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -65536, -1)")))
+
+(define_constraint "Ilb"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 (ival), 0, 7)")))
+
+(define_constraint "Imb"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 ((ival ^ 0xff) & 0xff), 0, 7)")))
+
+(define_constraint "ImB"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 ((ival ^ 0xffff) & 0xffff), 0, 7)")))
+
+(define_constraint "Ilw"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 (ival), 0, 15)")))
+
+(define_constraint "Imw"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 ((ival ^ 0xffff) & 0xffff), 0, 15)")))
+
+(define_constraint "I00"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_memory_constraint "SF"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_SF)"))
+
+(define_memory_constraint "Sd"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Sd)"))
+
+(define_memory_constraint "Sa"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Sa)"))
+
+(define_memory_constraint "Si"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Si)"))
+
+(define_memory_constraint "Ss"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Ss)"))
+
+(define_memory_constraint "Sf"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Sf)"))
+
+(define_memory_constraint "Sb"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Sb)"))
+
+(define_memory_constraint "Sp"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Sp)"))
+
+(define_memory_constraint "S1"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_S1)"))
+
+(define_constraint "Rpa"
+  "@internal"
+  (match_test "m32c_matches_constraint_p (op, CONSTRAINT_Rpa)"))
diff --git a/gcc-4.9/gcc/config/m32c/jump.md b/gcc-4.9/gcc/config/m32c/jump.md
new file mode 100644
index 000000000..06ea417d4
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/jump.md
@@ -0,0 +1,134 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; jump, conditionals, calls, etc
+
+(define_insn "indirect_jump_16"
+  [(set (pc)
+       (match_operand:HI 0 "register_operand" "Rhi"))]
+  "TARGET_A16"
+;  "jmpi.a\t%0"
+  ; no 16-bit jmpi in r8c
+  "push.b #0 | push.w\t%0 | rts"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "indirect_jump_24"
+  [(set (pc)
+       (match_operand:PSI 0 "register_operand" "Rpi"))]
+  "TARGET_A24"
+  "jmpi.a\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_expand "indirect_jump"
+  [(match_operand 0 "register_operand" "")]
+  ""
+  "if (TARGET_A16)
+     emit_jump_insn (gen_indirect_jump_16(operands[0]));
+   else
+     emit_jump_insn (gen_indirect_jump_24(operands[0]));
+   DONE;"
+  )
+
+; We can replace this with jmp.s when gas supports relaxing.  m32c
+; opcodes are too complicated to try to compute their sizes here, it's
+; far easier (and more reliable) to let gas worry about it.
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmp.a\t%l0"
+  [(set_attr "flags" "n")]
+)
+
+; No 16-bit indirect calls on r8c/m16c.  */
+(define_insn "call"
+  [(call (match_operand:QI 0 "memory_operand" "Si,SaSb,?Rmm")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" ""))]
+  ""
+  "*
+switch (which_alternative) {
+  case 0:
+    {
+      HOST_WIDE_INT func_vect_num = 
+      current_function_special_page_vector(XEXP (operands[0], 0));
+      if (func_vect_num)
+        {
+          operands[3] = gen_rtx_CONST_INT (VOIDmode, func_vect_num);
+          return \"jsrs\t%3\";
+        }
+      else
+        return \"jsr.a\t%0\";
+    }
+  case 1: return TARGET_A16 ? \"push.w %a0 | jsr.a\tm32c_jsri16\" : \"jsri.a\t%a0\";
+  case 2: return \"jsri.a\t%a0\";
+  default: gcc_unreachable ();
+}"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "call_value"
+  [(set (match_operand 0 "m32c_return_operand" "=RdiRmmRpa,RdiRmmRpa,RdiRmmRpa")
+	(call (match_operand:QI 1 "memory_operand" "Si,SaSb,?Rmm")
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "immediate_operand" ""))]
+  ""
+  "*
+switch (which_alternative) {
+  case 0:
+    {
+      HOST_WIDE_INT func_vect_num = 
+      current_function_special_page_vector(XEXP (operands[1], 0));
+      if (func_vect_num)
+        {
+          operands[4] = gen_rtx_CONST_INT (VOIDmode, func_vect_num);
+          return \"jsrs\t%4\";
+        }
+      else
+        return \"jsr.a\t%1\";
+    }
+  case 1: return TARGET_A16 ? \"push.w %a1 | jsr.a\tm32c_jsri16\" : \"jsri.a\t%a1\";
+  case 2: return \"jsri.a\t%a1\";
+  default: gcc_unreachable ();
+}"
+  [(set_attr "flags" "x,x,x")]
+  )
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (const_int 0))
+              (match_operand 1 "" "")
+              (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+  DONE;
+}")
diff --git a/gcc-4.9/gcc/config/m32c/m32c-modes.def b/gcc-4.9/gcc/config/m32c/m32c-modes.def
new file mode 100644
index 000000000..8600306f5
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c-modes.def
@@ -0,0 +1,28 @@
+/* Target-Specific Modes for R8C/M16C/M32C
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* 24-bit pointers, whole */
+/*INT_MODE (PI, 3);*/
+
+/* 24-bit pointers, in 32-bit units */
+PARTIAL_INT_MODE (SI, 24, PSI);
+
+/* 48-bit MULEX result */
+/* INT_MODE (MI, 6); */
diff --git a/gcc-4.9/gcc/config/m32c/m32c-pragma.c b/gcc-4.9/gcc/config/m32c/m32c-pragma.c
new file mode 100644
index 000000000..1e9c7900d
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c-pragma.c
@@ -0,0 +1,129 @@
+/* M32C Pragma support
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-common.h"
+#include "diagnostic-core.h"
+#include "cpplib.h"
+#include "m32c-protos.h"
+
+/* Implements the "GCC memregs" pragma.  This pragma takes only an
+   integer, and is semantically identical to the -memregs= command
+   line option.  The only catch is, the programmer should only use
+   this pragma at the beginning of the file (preferably, in some
+   project-wide header) to avoid ABI changes related to changing the
+   list of available "registers".  */
+static void
+m32c_pragma_memregs (cpp_reader * reader ATTRIBUTE_UNUSED)
+{
+  /* on off */
+  tree val;
+  enum cpp_ttype type;
+  HOST_WIDE_INT i;
+
+  type = pragma_lex (&val);
+  if (type == CPP_NUMBER)
+    {
+      if (tree_fits_uhwi_p (val))
+	{
+	  i = tree_to_uhwi (val);
+
+	  type = pragma_lex (&val);
+	  if (type != CPP_EOF)
+	    warning (0, "junk at end of #pragma GCC memregs [0..16]");
+
+	  if (0 <= i && i <= 16)
+	    {
+	      if (!ok_to_change_target_memregs)
+		{
+		  warning (0,
+			   "#pragma GCC memregs must precede any function decls");
+		  return;
+		}
+	      target_memregs = i;
+	      m32c_conditional_register_usage ();
+	    }
+	  else
+	    {
+	      warning (0, "#pragma GCC memregs takes a number [0..16]");
+	    }
+
+	  return;
+	}
+    }
+
+  error ("#pragma GCC memregs takes a number [0..16]");
+}
+
+/* Implements the "pragma ADDRESS" pragma.  This pragma takes a
+   variable name and an address, and arranges for that variable to be
+   "at" that address.  The variable is also made volatile.  */
+static void
+m32c_pragma_address (cpp_reader * reader ATTRIBUTE_UNUSED)
+{
+  /* on off */
+  tree var, addr;
+  enum cpp_ttype type;
+
+  type = pragma_lex (&var);
+  if (type == CPP_NAME)
+    {
+      type = pragma_lex (&addr);
+      if (type == CPP_NUMBER)
+	{
+	  if (var != error_mark_node)
+	    {
+	      unsigned uaddr = tree_to_uhwi (addr);
+	      m32c_note_pragma_address (IDENTIFIER_POINTER (var), uaddr);
+	    }
+
+	  type = pragma_lex (&var);
+	  if (type != CPP_EOF)
+	    {
+	      error ("junk at end of #pragma ADDRESS");
+	    }
+	  return;
+	}
+    }
+  error ("malformed #pragma ADDRESS variable address");
+}
+
+/* Implements REGISTER_TARGET_PRAGMAS.  */
+void
+m32c_register_pragmas (void)
+{
+  c_register_pragma ("GCC", "memregs", m32c_pragma_memregs);
+  c_register_pragma (NULL, "ADDRESS", m32c_pragma_address);
+  c_register_pragma (NULL, "address", m32c_pragma_address);
+
+  /* R8C and M16C have 16-bit pointers in a 20-bit address zpace.
+     M32C has 24-bit pointers in a 24-bit address space, so does not
+     need far pointers, but we accept the qualifier anyway, as a
+     no-op.  */
+  if (TARGET_A16)
+    c_register_addr_space ("__far", ADDR_SPACE_FAR);
+  else
+    c_register_addr_space ("__far", ADDR_SPACE_GENERIC);
+}
diff --git a/gcc-4.9/gcc/config/m32c/m32c-protos.h b/gcc-4.9/gcc/config/m32c/m32c-protos.h
new file mode 100644
index 000000000..bec47476f
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c-protos.h
@@ -0,0 +1,84 @@
+/* Target Prototypes for R8C/M16C/M32C
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+void m32c_conditional_register_usage (void);
+unsigned int m32c_dwarf_frame_regnum (int);
+int  m32c_eh_return_data_regno (int);
+void m32c_emit_epilogue (void);
+void m32c_emit_prologue (void);
+int  m32c_epilogue_uses (int);
+int  m32c_function_arg_regno_p (int);
+void m32c_init_expanders (void);
+int  m32c_initial_elimination_offset (int, int);
+void m32c_output_reg_pop (FILE *, int);
+void m32c_output_reg_push (FILE *, int);
+unsigned int  m32c_push_rounding (int);
+void m32c_register_pragmas (void);
+void m32c_note_pragma_address (const char *, unsigned);
+int  m32c_regno_ok_for_base_p (int);
+int  m32c_trampoline_alignment (void);
+int  m32c_trampoline_size (void);
+
+#ifdef RTX_CODE
+
+int  m32c_cannot_change_mode_class (enum machine_mode, enum machine_mode, int);
+rtx  m32c_eh_return_stackadj_rtx (void);
+void m32c_emit_eh_epilogue (rtx);
+int  m32c_expand_cmpstr (rtx *);
+int  m32c_expand_insv (rtx *);
+int  m32c_expand_movcc (rtx *);
+int  m32c_expand_movmemhi (rtx *);
+int  m32c_expand_movstr (rtx *);
+void m32c_expand_neg_mulpsi3 (rtx *);
+int  m32c_expand_setmemhi (rtx *);
+bool m32c_matches_constraint_p (rtx, int);
+int  m32c_hard_regno_nregs (int, enum machine_mode);
+int  m32c_hard_regno_ok (int, enum machine_mode);
+bool m32c_illegal_subreg_p (rtx);
+bool m32c_immd_dbl_mov (rtx *, enum machine_mode);
+rtx  m32c_incoming_return_addr_rtx (void);
+int  m32c_legitimize_reload_address (rtx *, enum machine_mode, int, int, int);
+int  m32c_limit_reload_class (enum machine_mode, int);
+int  m32c_modes_tieable_p (enum machine_mode, enum machine_mode);
+bool m32c_mov_ok (rtx *, enum machine_mode);
+char * m32c_output_compare (rtx, rtx *);
+int  m32c_prepare_move (rtx *, enum machine_mode);
+int  m32c_prepare_shift (rtx *, int, int);
+int  m32c_reg_ok_for_base_p (rtx, int);
+enum reg_class m32c_regno_reg_class (int);
+rtx  m32c_return_addr_rtx (int);
+const char *m32c_scc_pattern (rtx *, RTX_CODE);
+int  m32c_secondary_reload_class (int, enum machine_mode, rtx);
+int  m32c_split_move (rtx *, enum machine_mode, int);
+int  m32c_split_psi_p (rtx *);
+int current_function_special_page_vector (rtx);
+
+#endif
+
+#ifdef TREE_CODE
+
+tree m32c_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+void m32c_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+bool m32c_promote_function_return (const_tree);
+int  m32c_special_page_vector_p (tree);
+void m32c_output_aligned_common (FILE *, tree, const char *,
+				 int, int, int);
+
+#endif
diff --git a/gcc-4.9/gcc/config/m32c/m32c.abi b/gcc-4.9/gcc/config/m32c/m32c.abi
new file mode 100644
index 000000000..69e6761d1
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c.abi
@@ -0,0 +1,131 @@
+   Target Definitions for R8C/M16C/M32C
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+
+These are just some random notes I used during development of this
+port.  Please don't consider these to be "official" specifications,
+just additional information to help make the code easier to
+understand.
+
+
+Frame
+=====
+
+	+--------------------
+	| incoming args
+	+--------------------
+	| return Address
+osp ->	+--------------------
+	| saved fp
+fp ->	+--------------------
+	| local data
+	+--------------------
+	| saved regs
+	+--------------------
+	| outgoing args (opt)
+sp ->	+--------------------
+
+Argument Passing
+================
+
+r8c, m16c
+---------
+
+First arg may be passed in r1l or r1 if it (1) fits (QImode or
+HImode), (2) is named, and (3) is an integer or pointer type (no
+structs, floats, etc).  Otherwise, it's passed on the stack.
+
+Second arg may be passed in r2, same restrictions (but not QImode),
+even if the first arg is passed on the stack.
+
+Third and further args are passed on the stack.  No padding is used,
+stack "alignment" is 8 bits.
+
+m32cm, m32c
+-----------
+First arg may be passed in r0l or r0, same restrictions as above.
+
+Second and further args are passed on the stack.  Padding is used
+after QImode parameters (i.e. lower-addressed byte is the value,
+higher-addressed byte is the padding), stack "alignment" is 16 bits.
+
+
+Return Value
+============
+
+r8c, m16c
+---------
+
+QImode in r0l
+HImode in r0
+near pointer in r0
+(desired)
+SImode in r2r0
+far pointer in r2r0
+(actual)
+Anything bigger than 16 bits is returned in memory, at mem0 (mem0
+through mem15 are provided by libgcc.a)
+
+Aggregate values (regardless of size) are returned by pushing a
+pointer to a temporary area on the stack after the args are pushed.
+The function fills in this area with the value.  Note that this
+pointer on the stack does not affect how register arguments, if any,
+are configured.
+
+m32cm, m32c
+-----------
+Same.
+
+
+Registers Preserved Across Calls
+================================
+
+r8c, m16c
+---------
+sb, fb, sp (i.e. nearly all registers are call clobbered)
+
+m32cm, m32c
+-----------
+r1, r2, r3, a0, a1, sb, fb, sp
+(except when used for return values)
+
+
+Interrupt Handlers
+==================
+
+The stack frame is slightly different for interrupt handlers, because
+(1) we don't have a usable parent frame, and (2) we have to use
+special instructions to return and thus must save/restore everything
+differently.
+
+	+--------------------
+	| program state
+osp ->	+--------------------
+	| return address
+	+--------------------
+	| saved r0..fp (pushm)
+fp ->	+--------------------
+	| local data
+	+--------------------
+	| saved regs mem0..mem15
+	+--------------------
+	| outgoing args (opt)
+sp ->	+--------------------
+
diff --git a/gcc-4.9/gcc/config/m32c/m32c.c b/gcc-4.9/gcc/config/m32c/m32c.c
new file mode 100644
index 000000000..57cfb20ee
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c.c
@@ -0,0 +1,4544 @@
+/* Target Code for R8C/M16C/M32C
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "df.h"
+#include "tm-constrs.h"
+
+/* Prototypes */
+
+/* Used by m32c_pushm_popm.  */
+typedef enum
+{
+  PP_pushm,
+  PP_popm,
+  PP_justcount
+} Push_Pop_Type;
+
+static bool m32c_function_needs_enter (void);
+static tree interrupt_handler (tree *, tree, tree, int, bool *);
+static tree function_vector_handler (tree *, tree, tree, int, bool *);
+static int interrupt_p (tree node);
+static int bank_switch_p (tree node);
+static int fast_interrupt_p (tree node);
+static int interrupt_p (tree node);
+static bool m32c_asm_integer (rtx, unsigned int, int);
+static int m32c_comp_type_attributes (const_tree, const_tree);
+static bool m32c_fixed_condition_code_regs (unsigned int *, unsigned int *);
+static struct machine_function *m32c_init_machine_status (void);
+static void m32c_insert_attributes (tree, tree *);
+static bool m32c_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool m32c_addr_space_legitimate_address_p (enum machine_mode, rtx, bool, addr_space_t);
+static rtx m32c_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static bool m32c_pass_by_reference (cumulative_args_t, enum machine_mode,
+				    const_tree, bool);
+static void m32c_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static unsigned int m32c_function_arg_boundary (enum machine_mode, const_tree);
+static int m32c_pushm_popm (Push_Pop_Type);
+static bool m32c_strict_argument_naming (cumulative_args_t);
+static rtx m32c_struct_value_rtx (tree, int);
+static rtx m32c_subreg (enum machine_mode, rtx, enum machine_mode, int);
+static int need_to_save (int);
+static rtx m32c_function_value (const_tree, const_tree, bool);
+static rtx m32c_libcall_value (enum machine_mode, const_rtx);
+
+/* Returns true if an address is specified, else false.  */
+static bool m32c_get_pragma_address (const char *varname, unsigned *addr);
+
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION    (SYMBOL_FLAG_MACH_DEP << 0)
+
+#define streq(a,b) (strcmp ((a), (b)) == 0)
+
+/* Internal support routines */
+
+/* Debugging statements are tagged with DEBUG0 only so that they can
+   be easily enabled individually, by replacing the '0' with '1' as
+   needed.  */
+#define DEBUG0 0
+#define DEBUG1 1
+
+#if DEBUG0
+/* This is needed by some of the commented-out debug statements
+   below.  */
+static char const *class_names[LIM_REG_CLASSES] = REG_CLASS_NAMES;
+#endif
+static int class_contents[LIM_REG_CLASSES][1] = REG_CLASS_CONTENTS;
+
+/* These are all to support encode_pattern().  */
+static char pattern[30], *patternp;
+static GTY(()) rtx patternr[30];
+#define RTX_IS(x) (streq (pattern, x))
+
+/* Some macros to simplify the logic throughout this file.  */
+#define IS_MEM_REGNO(regno) ((regno) >= MEM0_REGNO && (regno) <= MEM7_REGNO)
+#define IS_MEM_REG(rtx) (GET_CODE (rtx) == REG && IS_MEM_REGNO (REGNO (rtx)))
+
+#define IS_CR_REGNO(regno) ((regno) >= SB_REGNO && (regno) <= PC_REGNO)
+#define IS_CR_REG(rtx) (GET_CODE (rtx) == REG && IS_CR_REGNO (REGNO (rtx)))
+
+static int
+far_addr_space_p (rtx x)
+{
+  if (GET_CODE (x) != MEM)
+    return 0;
+#if DEBUG0
+  fprintf(stderr, "\033[35mfar_addr_space: "); debug_rtx(x);
+  fprintf(stderr, " = %d\033[0m\n", MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR);
+#endif
+  return MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR;
+}
+
+/* We do most RTX matching by converting the RTX into a string, and
+   using string compares.  This vastly simplifies the logic in many of
+   the functions in this file.
+
+   On exit, pattern[] has the encoded string (use RTX_IS("...") to
+   compare it) and patternr[] has pointers to the nodes in the RTX
+   corresponding to each character in the encoded string.  The latter
+   is mostly used by print_operand().
+
+   Unrecognized patterns have '?' in them; this shows up when the
+   assembler complains about syntax errors.
+*/
+
+static void
+encode_pattern_1 (rtx x)
+{
+  int i;
+
+  if (patternp == pattern + sizeof (pattern) - 2)
+    {
+      patternp[-1] = '?';
+      return;
+    }
+
+  patternr[patternp - pattern] = x;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      *patternp++ = 'r';
+      break;
+    case SUBREG:
+      if (GET_MODE_SIZE (GET_MODE (x)) !=
+	  GET_MODE_SIZE (GET_MODE (XEXP (x, 0))))
+	*patternp++ = 'S';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case MEM:
+      *patternp++ = 'm';
+    case CONST:
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case SIGN_EXTEND:
+      *patternp++ = '^';
+      *patternp++ = 'S';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case ZERO_EXTEND:
+      *patternp++ = '^';
+      *patternp++ = 'Z';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case PLUS:
+      *patternp++ = '+';
+      encode_pattern_1 (XEXP (x, 0));
+      encode_pattern_1 (XEXP (x, 1));
+      break;
+    case PRE_DEC:
+      *patternp++ = '>';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case POST_INC:
+      *patternp++ = '<';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case LO_SUM:
+      *patternp++ = 'L';
+      encode_pattern_1 (XEXP (x, 0));
+      encode_pattern_1 (XEXP (x, 1));
+      break;
+    case HIGH:
+      *patternp++ = 'H';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case SYMBOL_REF:
+      *patternp++ = 's';
+      break;
+    case LABEL_REF:
+      *patternp++ = 'l';
+      break;
+    case CODE_LABEL:
+      *patternp++ = 'c';
+      break;
+    case CONST_INT:
+    case CONST_DOUBLE:
+      *patternp++ = 'i';
+      break;
+    case UNSPEC:
+      *patternp++ = 'u';
+      *patternp++ = '0' + XCINT (x, 1, UNSPEC);
+      for (i = 0; i < XVECLEN (x, 0); i++)
+	encode_pattern_1 (XVECEXP (x, 0, i));
+      break;
+    case USE:
+      *patternp++ = 'U';
+      break;
+    case PARALLEL:
+      *patternp++ = '|';
+      for (i = 0; i < XVECLEN (x, 0); i++)
+	encode_pattern_1 (XVECEXP (x, 0, i));
+      break;
+    case EXPR_LIST:
+      *patternp++ = 'E';
+      encode_pattern_1 (XEXP (x, 0));
+      if (XEXP (x, 1))
+	encode_pattern_1 (XEXP (x, 1));
+      break;
+    default:
+      *patternp++ = '?';
+#if DEBUG0
+      fprintf (stderr, "can't encode pattern %s\n",
+	       GET_RTX_NAME (GET_CODE (x)));
+      debug_rtx (x);
+      gcc_unreachable ();
+#endif
+      break;
+    }
+}
+
+static void
+encode_pattern (rtx x)
+{
+  patternp = pattern;
+  encode_pattern_1 (x);
+  *patternp = 0;
+}
+
+/* Since register names indicate the mode they're used in, we need a
+   way to determine which name to refer to the register with.  Called
+   by print_operand().  */
+
+static const char *
+reg_name_with_mode (int regno, enum machine_mode mode)
+{
+  int mlen = GET_MODE_SIZE (mode);
+  if (regno == R0_REGNO && mlen == 1)
+    return "r0l";
+  if (regno == R0_REGNO && (mlen == 3 || mlen == 4))
+    return "r2r0";
+  if (regno == R0_REGNO && mlen == 6)
+    return "r2r1r0";
+  if (regno == R0_REGNO && mlen == 8)
+    return "r3r1r2r0";
+  if (regno == R1_REGNO && mlen == 1)
+    return "r1l";
+  if (regno == R1_REGNO && (mlen == 3 || mlen == 4))
+    return "r3r1";
+  if (regno == A0_REGNO && TARGET_A16 && (mlen == 3 || mlen == 4))
+    return "a1a0";
+  return reg_names[regno];
+}
+
+/* How many bytes a register uses on stack when it's pushed.  We need
+   to know this because the push opcode needs to explicitly indicate
+   the size of the register, even though the name of the register
+   already tells it that.  Used by m32c_output_reg_{push,pop}, which
+   is only used through calls to ASM_OUTPUT_REG_{PUSH,POP}.  */
+
+static int
+reg_push_size (int regno)
+{
+  switch (regno)
+    {
+    case R0_REGNO:
+    case R1_REGNO:
+      return 2;
+    case R2_REGNO:
+    case R3_REGNO:
+    case FLG_REGNO:
+      return 2;
+    case A0_REGNO:
+    case A1_REGNO:
+    case SB_REGNO:
+    case FB_REGNO:
+    case SP_REGNO:
+      if (TARGET_A16)
+	return 2;
+      else
+	return 3;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Given two register classes, find the largest intersection between
+   them.  If there is no intersection, return RETURNED_IF_EMPTY
+   instead.  */
+static reg_class_t
+reduce_class (reg_class_t original_class, reg_class_t limiting_class,
+	      reg_class_t returned_if_empty)
+{
+  HARD_REG_SET cc;
+  int i;
+  reg_class_t best = NO_REGS;
+  unsigned int best_size = 0;
+
+  if (original_class == limiting_class)
+    return original_class;
+
+  cc = reg_class_contents[original_class];
+  AND_HARD_REG_SET (cc, reg_class_contents[limiting_class]);
+
+  for (i = 0; i < LIM_REG_CLASSES; i++)
+    {
+      if (hard_reg_set_subset_p (reg_class_contents[i], cc))
+	if (best_size < reg_class_size[i])
+	  {
+	    best = (reg_class_t) i;
+	    best_size = reg_class_size[i];
+	  }
+
+    }
+  if (best == NO_REGS)
+    return returned_if_empty;
+  return best;
+}
+
+/* Used by m32c_register_move_cost to determine if a move is
+   impossibly expensive.  */
+static bool
+class_can_hold_mode (reg_class_t rclass, enum machine_mode mode)
+{
+  /* Cache the results:  0=untested  1=no  2=yes */
+  static char results[LIM_REG_CLASSES][MAX_MACHINE_MODE];
+
+  if (results[(int) rclass][mode] == 0)
+    {
+      int r;
+      results[rclass][mode] = 1;
+      for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
+	if (in_hard_reg_set_p (reg_class_contents[(int) rclass], mode, r)
+	    && HARD_REGNO_MODE_OK (r, mode))
+	  {
+	    results[rclass][mode] = 2;
+	    break;
+	  }
+    }
+
+#if DEBUG0
+  fprintf (stderr, "class %s can hold %s? %s\n",
+	   class_names[(int) rclass], mode_name[mode],
+	   (results[rclass][mode] == 2) ? "yes" : "no");
+#endif
+  return results[(int) rclass][mode] == 2;
+}
+
+/* Run-time Target Specification.  */
+
+/* Memregs are memory locations that gcc treats like general
+   registers, as there are a limited number of true registers and the
+   m32c families can use memory in most places that registers can be
+   used.
+
+   However, since memory accesses are more expensive than registers,
+   we allow the user to limit the number of memregs available, in
+   order to try to persuade gcc to try harder to use real registers.
+
+   Memregs are provided by lib1funcs.S.
+*/
+
+int ok_to_change_target_memregs = TRUE;
+
+/* Implements TARGET_OPTION_OVERRIDE.  */
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m32c_option_override
+
+static void
+m32c_option_override (void)
+{
+  /* We limit memregs to 0..16, and provide a default.  */
+  if (global_options_set.x_target_memregs)
+    {
+      if (target_memregs < 0 || target_memregs > 16)
+	error ("invalid target memregs value '%d'", target_memregs);
+    }
+  else
+    target_memregs = 16;
+
+  if (TARGET_A24)
+    flag_ivopts = 0;
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* r8c/m16c have no 16-bit indirect call, so thunks are involved.
+     This is always worse than an absolute call.  */
+  if (TARGET_A16)
+    flag_no_function_cse = 1;
+
+  /* This wants to put insns between compares and their jumps.  */
+  /* FIXME: The right solution is to properly trace the flags register
+     values, but that is too much work for stage 4.  */
+  flag_combine_stack_adjustments = 0;
+}
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE m32c_override_options_after_change
+
+static void
+m32c_override_options_after_change (void)
+{
+  if (TARGET_A16)
+    flag_no_function_cse = 1;
+}
+
+/* Defining data structures for per-function information */
+
+/* The usual; we set up our machine_function data.  */
+static struct machine_function *
+m32c_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implements INIT_EXPANDERS.  We just set up to call the above
+   function.  */
+void
+m32c_init_expanders (void)
+{
+  init_machine_status = m32c_init_machine_status;
+}
+
+/* Storage Layout */
+
+/* Register Basics */
+
+/* Basic Characteristics of Registers */
+
+/* Whether a mode fits in a register is complex enough to warrant a
+   table.  */
+static struct
+{
+  char qi_regs;
+  char hi_regs;
+  char pi_regs;
+  char si_regs;
+  char di_regs;
+} nregs_table[FIRST_PSEUDO_REGISTER] =
+{
+  { 1, 1, 2, 2, 4 },		/* r0 */
+  { 0, 1, 0, 0, 0 },		/* r2 */
+  { 1, 1, 2, 2, 0 },		/* r1 */
+  { 0, 1, 0, 0, 0 },		/* r3 */
+  { 0, 1, 1, 0, 0 },		/* a0 */
+  { 0, 1, 1, 0, 0 },		/* a1 */
+  { 0, 1, 1, 0, 0 },		/* sb */
+  { 0, 1, 1, 0, 0 },		/* fb */
+  { 0, 1, 1, 0, 0 },		/* sp */
+  { 1, 1, 1, 0, 0 },		/* pc */
+  { 0, 0, 0, 0, 0 },		/* fl */
+  { 1, 1, 1, 0, 0 },		/* ap */
+  { 1, 1, 2, 2, 4 },		/* mem0 */
+  { 1, 1, 2, 2, 4 },		/* mem1 */
+  { 1, 1, 2, 2, 4 },		/* mem2 */
+  { 1, 1, 2, 2, 4 },		/* mem3 */
+  { 1, 1, 2, 2, 4 },		/* mem4 */
+  { 1, 1, 2, 2, 0 },		/* mem5 */
+  { 1, 1, 2, 2, 0 },		/* mem6 */
+  { 1, 1, 0, 0, 0 },		/* mem7 */
+};
+
+/* Implements TARGET_CONDITIONAL_REGISTER_USAGE.  We adjust the number
+   of available memregs, and select which registers need to be preserved
+   across calls based on the chip family.  */
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m32c_conditional_register_usage
+void
+m32c_conditional_register_usage (void)
+{
+  int i;
+
+  if (0 <= target_memregs && target_memregs <= 16)
+    {
+      /* The command line option is bytes, but our "registers" are
+	 16-bit words.  */
+      for (i = (target_memregs+1)/2; i < 8; i++)
+	{
+	  fixed_regs[MEM0_REGNO + i] = 1;
+	  CLEAR_HARD_REG_BIT (reg_class_contents[MEM_REGS], MEM0_REGNO + i);
+	}
+    }
+
+  /* M32CM and M32C preserve more registers across function calls.  */
+  if (TARGET_A24)
+    {
+      call_used_regs[R1_REGNO] = 0;
+      call_used_regs[R2_REGNO] = 0;
+      call_used_regs[R3_REGNO] = 0;
+      call_used_regs[A0_REGNO] = 0;
+      call_used_regs[A1_REGNO] = 0;
+    }
+}
+
+/* How Values Fit in Registers */
+
+/* Implements HARD_REGNO_NREGS.  This is complicated by the fact that
+   different registers are different sizes from each other, *and* may
+   be different sizes in different chip families.  */
+static int
+m32c_hard_regno_nregs_1 (int regno, enum machine_mode mode)
+{
+  if (regno == FLG_REGNO && mode == CCmode)
+    return 1;
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+
+  if (regno >= MEM0_REGNO && regno <= MEM7_REGNO)
+    return (GET_MODE_SIZE (mode) + 1) / 2;
+
+  if (GET_MODE_SIZE (mode) <= 1)
+    return nregs_table[regno].qi_regs;
+  if (GET_MODE_SIZE (mode) <= 2)
+    return nregs_table[regno].hi_regs;
+  if (regno == A0_REGNO && mode == SImode && TARGET_A16)
+    return 2;
+  if ((GET_MODE_SIZE (mode) <= 3 || mode == PSImode) && TARGET_A24)
+    return nregs_table[regno].pi_regs;
+  if (GET_MODE_SIZE (mode) <= 4)
+    return nregs_table[regno].si_regs;
+  if (GET_MODE_SIZE (mode) <= 8)
+    return nregs_table[regno].di_regs;
+  return 0;
+}
+
+int
+m32c_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  int rv = m32c_hard_regno_nregs_1 (regno, mode);
+  return rv ? rv : 1;
+}
+
+/* Implements HARD_REGNO_MODE_OK.  The above function does the work
+   already; just test its return value.  */
+int
+m32c_hard_regno_ok (int regno, enum machine_mode mode)
+{
+  return m32c_hard_regno_nregs_1 (regno, mode) != 0;
+}
+
+/* Implements MODES_TIEABLE_P.  In general, modes aren't tieable since
+   registers are all different sizes.  However, since most modes are
+   bigger than our registers anyway, it's easier to implement this
+   function that way, leaving QImode as the only unique case.  */
+int
+m32c_modes_tieable_p (enum machine_mode m1, enum machine_mode m2)
+{
+  if (GET_MODE_SIZE (m1) == GET_MODE_SIZE (m2))
+    return 1;
+
+#if 0
+  if (m1 == QImode || m2 == QImode)
+    return 0;
+#endif
+
+  return 1;
+}
+
+/* Register Classes */
+
+/* Implements REGNO_REG_CLASS.  */
+enum reg_class
+m32c_regno_reg_class (int regno)
+{
+  switch (regno)
+    {
+    case R0_REGNO:
+      return R0_REGS;
+    case R1_REGNO:
+      return R1_REGS;
+    case R2_REGNO:
+      return R2_REGS;
+    case R3_REGNO:
+      return R3_REGS;
+    case A0_REGNO:
+      return A0_REGS;
+    case A1_REGNO:
+      return A1_REGS;
+    case SB_REGNO:
+      return SB_REGS;
+    case FB_REGNO:
+      return FB_REGS;
+    case SP_REGNO:
+      return SP_REGS;
+    case FLG_REGNO:
+      return FLG_REGS;
+    default:
+      if (IS_MEM_REGNO (regno))
+	return MEM_REGS;
+      return ALL_REGS;
+    }
+}
+
+/* Implements REGNO_OK_FOR_BASE_P.  */
+int
+m32c_regno_ok_for_base_p (int regno)
+{
+  if (regno == A0_REGNO
+      || regno == A1_REGNO || regno >= FIRST_PSEUDO_REGISTER)
+    return 1;
+  return 0;
+}
+
+#define DEBUG_RELOAD 0
+
+/* Implements TARGET_PREFERRED_RELOAD_CLASS.  In general, prefer general
+   registers of the appropriate size.  */
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS m32c_preferred_reload_class
+
+static reg_class_t
+m32c_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  reg_class_t newclass = rclass;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "\npreferred_reload_class for %s is ",
+	   class_names[rclass]);
+#endif
+  if (rclass == NO_REGS)
+    rclass = GET_MODE (x) == QImode ? HL_REGS : R03_REGS;
+
+  if (reg_classes_intersect_p (rclass, CR_REGS))
+    {
+      switch (GET_MODE (x))
+	{
+	case QImode:
+	  newclass = HL_REGS;
+	  break;
+	default:
+	  /*      newclass = HI_REGS; */
+	  break;
+	}
+    }
+
+  else if (newclass == QI_REGS && GET_MODE_SIZE (GET_MODE (x)) > 2)
+    newclass = SI_REGS;
+  else if (GET_MODE_SIZE (GET_MODE (x)) > 4
+	   && ! reg_class_subset_p (R03_REGS, rclass))
+    newclass = DI_REGS;
+
+  rclass = reduce_class (rclass, newclass, rclass);
+
+  if (GET_MODE (x) == QImode)
+    rclass = reduce_class (rclass, HL_REGS, rclass);
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "%s\n", class_names[rclass]);
+  debug_rtx (x);
+
+  if (GET_CODE (x) == MEM
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
+    fprintf (stderr, "Glorm!\n");
+#endif
+  return rclass;
+}
+
+/* Implements TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS m32c_preferred_output_reload_class
+
+static reg_class_t
+m32c_preferred_output_reload_class (rtx x, reg_class_t rclass)
+{
+  return m32c_preferred_reload_class (x, rclass);
+}
+
+/* Implements LIMIT_RELOAD_CLASS.  We basically want to avoid using
+   address registers for reloads since they're needed for address
+   reloads.  */
+int
+m32c_limit_reload_class (enum machine_mode mode, int rclass)
+{
+#if DEBUG_RELOAD
+  fprintf (stderr, "limit_reload_class for %s: %s ->",
+	   mode_name[mode], class_names[rclass]);
+#endif
+
+  if (mode == QImode)
+    rclass = reduce_class (rclass, HL_REGS, rclass);
+  else if (mode == HImode)
+    rclass = reduce_class (rclass, HI_REGS, rclass);
+  else if (mode == SImode)
+    rclass = reduce_class (rclass, SI_REGS, rclass);
+
+  if (rclass != A_REGS)
+    rclass = reduce_class (rclass, DI_REGS, rclass);
+
+#if DEBUG_RELOAD
+  fprintf (stderr, " %s\n", class_names[rclass]);
+#endif
+  return rclass;
+}
+
+/* Implements SECONDARY_RELOAD_CLASS.  QImode have to be reloaded in
+   r0 or r1, as those are the only real QImode registers.  CR regs get
+   reloaded through appropriately sized general or address
+   registers.  */
+int
+m32c_secondary_reload_class (int rclass, enum machine_mode mode, rtx x)
+{
+  int cc = class_contents[rclass][0];
+#if DEBUG0
+  fprintf (stderr, "\nsecondary reload class %s %s\n",
+	   class_names[rclass], mode_name[mode]);
+  debug_rtx (x);
+#endif
+  if (mode == QImode
+      && GET_CODE (x) == MEM && (cc & ~class_contents[R23_REGS][0]) == 0)
+    return QI_REGS;
+  if (reg_classes_intersect_p (rclass, CR_REGS)
+      && GET_CODE (x) == REG
+      && REGNO (x) >= SB_REGNO && REGNO (x) <= SP_REGNO)
+    return (TARGET_A16 || mode == HImode) ? HI_REGS : A_REGS;
+  return NO_REGS;
+}
+
+/* Implements TARGET_CLASS_LIKELY_SPILLED_P.  A_REGS is needed for address
+   reloads.  */
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P m32c_class_likely_spilled_p
+
+static bool
+m32c_class_likely_spilled_p (reg_class_t regclass)
+{
+  if (regclass == A_REGS)
+    return true;
+
+  return (reg_class_size[(int) regclass] == 1);
+}
+
+/* Implements TARGET_CLASS_MAX_NREGS.  We calculate this according to its
+   documented meaning, to avoid potential inconsistencies with actual
+   class definitions.  */
+
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS m32c_class_max_nregs
+
+static unsigned char
+m32c_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
+{
+  int rn;
+  unsigned char max = 0;
+
+  for (rn = 0; rn < FIRST_PSEUDO_REGISTER; rn++)
+    if (TEST_HARD_REG_BIT (reg_class_contents[(int) regclass], rn))
+      {
+	unsigned char n = m32c_hard_regno_nregs (rn, mode);
+	if (max < n)
+	  max = n;
+      }
+  return max;
+}
+
+/* Implements CANNOT_CHANGE_MODE_CLASS.  Only r0 and r1 can change to
+   QI (r0l, r1l) because the chip doesn't support QI ops on other
+   registers (well, it does on a0/a1 but if we let gcc do that, reload
+   suffers).  Otherwise, we allow changes to larger modes.  */
+int
+m32c_cannot_change_mode_class (enum machine_mode from,
+			       enum machine_mode to, int rclass)
+{
+  int rn;
+#if DEBUG0
+  fprintf (stderr, "cannot change from %s to %s in %s\n",
+	   mode_name[from], mode_name[to], class_names[rclass]);
+#endif
+
+  /* If the larger mode isn't allowed in any of these registers, we
+     can't allow the change.  */
+  for (rn = 0; rn < FIRST_PSEUDO_REGISTER; rn++)
+    if (class_contents[rclass][0] & (1 << rn))
+      if (! m32c_hard_regno_ok (rn, to))
+	return 1;
+
+  if (to == QImode)
+    return (class_contents[rclass][0] & 0x1ffa);
+
+  if (class_contents[rclass][0] & 0x0005	/* r0, r1 */
+      && GET_MODE_SIZE (from) > 1)
+    return 0;
+  if (GET_MODE_SIZE (from) > 2)	/* all other regs */
+    return 0;
+
+  return 1;
+}
+
+/* Helpers for the rest of the file.  */
+/* TRUE if the rtx is a REG rtx for the given register.  */
+#define IS_REG(rtx,regno) (GET_CODE (rtx) == REG \
+			   && REGNO (rtx) == regno)
+/* TRUE if the rtx is a pseudo - specifically, one we can use as a
+   base register in address calculations (hence the "strict"
+   argument).  */
+#define IS_PSEUDO(rtx,strict) (!strict && GET_CODE (rtx) == REG \
+			       && (REGNO (rtx) == AP_REGNO \
+				   || REGNO (rtx) >= FIRST_PSEUDO_REGISTER))
+
+#define A0_OR_PSEUDO(x) (IS_REG(x, A0_REGNO) || REGNO (x) >= FIRST_PSEUDO_REGISTER)
+
+/* Implements EXTRA_CONSTRAINT_STR (see next function too).  'S' is
+   for memory constraints, plus "Rpa" for PARALLEL rtx's we use for
+   call return values.  */
+bool
+m32c_matches_constraint_p (rtx value, int constraint)
+{
+  encode_pattern (value);
+
+  switch (constraint) {
+  case CONSTRAINT_SF:
+    return (far_addr_space_p (value)
+	    && ((RTX_IS ("mr")
+		 && A0_OR_PSEUDO (patternr[1])
+		 && GET_MODE (patternr[1]) == SImode)
+		|| (RTX_IS ("m+^Sri")
+		    && A0_OR_PSEUDO (patternr[4])
+		    && GET_MODE (patternr[4]) == HImode)
+		|| (RTX_IS ("m+^Srs")
+		    && A0_OR_PSEUDO (patternr[4])
+		    && GET_MODE (patternr[4]) == HImode)
+		|| (RTX_IS ("m+^S+ris")
+		    && A0_OR_PSEUDO (patternr[5])
+		    && GET_MODE (patternr[5]) == HImode)
+		|| RTX_IS ("ms")));
+  case CONSTRAINT_Sd:    
+    {
+      /* This is the common "src/dest" address */
+      rtx r;
+      if (GET_CODE (value) == MEM && CONSTANT_P (XEXP (value, 0)))
+	return true;
+      if (RTX_IS ("ms") || RTX_IS ("m+si"))
+	return true;
+      if (RTX_IS ("m++rii"))
+	{
+	  if (REGNO (patternr[3]) == FB_REGNO
+	      && INTVAL (patternr[4]) == 0)
+	    return true;
+	}
+      if (RTX_IS ("mr"))
+	r = patternr[1];
+      else if (RTX_IS ("m+ri") || RTX_IS ("m+rs") || RTX_IS ("m+r+si"))
+	r = patternr[2];
+      else
+	return false;
+      if (REGNO (r) == SP_REGNO)
+	return false;
+      return m32c_legitimate_address_p (GET_MODE (value), XEXP (value, 0), 1);
+    }
+  case CONSTRAINT_Sa:
+    {
+      rtx r;
+      if (RTX_IS ("mr"))
+	r = patternr[1];
+      else if (RTX_IS ("m+ri"))
+	r = patternr[2];
+      else
+	return false;
+      return (IS_REG (r, A0_REGNO) || IS_REG (r, A1_REGNO));
+    }
+  case CONSTRAINT_Si:
+    return (RTX_IS ("mi") || RTX_IS ("ms") || RTX_IS ("m+si"));
+  case CONSTRAINT_Ss:
+    return ((RTX_IS ("mr")
+	     && (IS_REG (patternr[1], SP_REGNO)))
+	    || (RTX_IS ("m+ri") && (IS_REG (patternr[2], SP_REGNO))));
+  case CONSTRAINT_Sf:
+    return ((RTX_IS ("mr")
+	     && (IS_REG (patternr[1], FB_REGNO)))
+	    || (RTX_IS ("m+ri") && (IS_REG (patternr[2], FB_REGNO))));
+  case CONSTRAINT_Sb:
+    return ((RTX_IS ("mr")
+	     && (IS_REG (patternr[1], SB_REGNO)))
+	    || (RTX_IS ("m+ri") && (IS_REG (patternr[2], SB_REGNO))));
+  case CONSTRAINT_Sp:
+    /* Absolute addresses 0..0x1fff used for bit addressing (I/O ports) */
+    return (RTX_IS ("mi")
+	    && !(INTVAL (patternr[1]) & ~0x1fff));
+  case CONSTRAINT_S1:
+    return r1h_operand (value, QImode);
+  case CONSTRAINT_Rpa:
+    return GET_CODE (value) == PARALLEL;
+  default:
+    return false;
+  }
+}
+
+/* STACK AND CALLING */
+
+/* Frame Layout */
+
+/* Implements RETURN_ADDR_RTX.  Note that R8C and M16C push 24 bits
+   (yes, THREE bytes) onto the stack for the return address, but we
+   don't support pointers bigger than 16 bits on those chips.  This
+   will likely wreak havoc with exception unwinding.  FIXME.  */
+rtx
+m32c_return_addr_rtx (int count)
+{
+  enum machine_mode mode;
+  int offset;
+  rtx ra_mem;
+
+  if (count)
+    return NULL_RTX;
+  /* we want 2[$fb] */
+
+  if (TARGET_A24)
+    {
+      /* It's four bytes */
+      mode = PSImode;
+      offset = 4;
+    }
+  else
+    {
+      /* FIXME: it's really 3 bytes */
+      mode = HImode;
+      offset = 2;
+    }
+
+  ra_mem =
+    gen_rtx_MEM (mode, plus_constant (Pmode, gen_rtx_REG (Pmode, FP_REGNO),
+				      offset));
+  return copy_to_mode_reg (mode, ra_mem);
+}
+
+/* Implements INCOMING_RETURN_ADDR_RTX.  See comment above.  */
+rtx
+m32c_incoming_return_addr_rtx (void)
+{
+  /* we want [sp] */
+  return gen_rtx_MEM (PSImode, gen_rtx_REG (PSImode, SP_REGNO));
+}
+
+/* Exception Handling Support */
+
+/* Implements EH_RETURN_DATA_REGNO.  Choose registers able to hold
+   pointers.  */
+int
+m32c_eh_return_data_regno (int n)
+{
+  switch (n)
+    {
+    case 0:
+      return A0_REGNO;
+    case 1:
+      if (TARGET_A16)
+	return R3_REGNO;
+      else
+	return R1_REGNO;
+    default:
+      return INVALID_REGNUM;
+    }
+}
+
+/* Implements EH_RETURN_STACKADJ_RTX.  Saved and used later in
+   m32c_emit_eh_epilogue.  */
+rtx
+m32c_eh_return_stackadj_rtx (void)
+{
+  if (!cfun->machine->eh_stack_adjust)
+    {
+      rtx sa;
+
+      sa = gen_rtx_REG (Pmode, R0_REGNO);
+      cfun->machine->eh_stack_adjust = sa;
+    }
+  return cfun->machine->eh_stack_adjust;
+}
+
+/* Registers That Address the Stack Frame */
+
+/* Implements DWARF_FRAME_REGNUM and DBX_REGISTER_NUMBER.  Note that
+   the original spec called for dwarf numbers to vary with register
+   width as well, for example, r0l, r0, and r2r0 would each have
+   different dwarf numbers.  GCC doesn't support this, and we don't do
+   it, and gdb seems to like it this way anyway.  */
+unsigned int
+m32c_dwarf_frame_regnum (int n)
+{
+  switch (n)
+    {
+    case R0_REGNO:
+      return 5;
+    case R1_REGNO:
+      return 6;
+    case R2_REGNO:
+      return 7;
+    case R3_REGNO:
+      return 8;
+    case A0_REGNO:
+      return 9;
+    case A1_REGNO:
+      return 10;
+    case FB_REGNO:
+      return 11;
+    case SB_REGNO:
+      return 19;
+
+    case SP_REGNO:
+      return 12;
+    case PC_REGNO:
+      return 13;
+    default:
+      return DWARF_FRAME_REGISTERS + 1;
+    }
+}
+
+/* The frame looks like this:
+
+   ap -> +------------------------------
+         | Return address (3 or 4 bytes)
+	 | Saved FB (2 or 4 bytes)
+   fb -> +------------------------------
+	 | local vars
+         | register saves fb
+	 |        through r0 as needed
+   sp -> +------------------------------
+*/
+
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* This maps register numbers to the PUSHM/POPM bitfield, and tells us
+   how much the stack pointer moves for each, for each cpu family.  */
+static struct
+{
+  int reg1;
+  int bit;
+  int a16_bytes;
+  int a24_bytes;
+} pushm_info[] =
+{
+  /* These are in reverse push (nearest-to-sp) order.  */
+  { R0_REGNO, 0x80, 2, 2 },
+  { R1_REGNO, 0x40, 2, 2 },
+  { R2_REGNO, 0x20, 2, 2 },
+  { R3_REGNO, 0x10, 2, 2 },
+  { A0_REGNO, 0x08, 2, 4 },
+  { A1_REGNO, 0x04, 2, 4 },
+  { SB_REGNO, 0x02, 2, 4 },
+  { FB_REGNO, 0x01, 2, 4 }
+};
+
+#define PUSHM_N (sizeof(pushm_info)/sizeof(pushm_info[0]))
+
+/* Returns TRUE if we need to save/restore the given register.  We
+   save everything for exception handlers, so that any register can be
+   unwound.  For interrupt handlers, we save everything if the handler
+   calls something else (because we don't know what *that* function
+   might do), but try to be a bit smarter if the handler is a leaf
+   function.  We always save $a0, though, because we use that in the
+   epilogue to copy $fb to $sp.  */
+static int
+need_to_save (int regno)
+{
+  if (fixed_regs[regno])
+    return 0;
+  if (crtl->calls_eh_return)
+    return 1;
+  if (regno == FP_REGNO)
+    return 0;
+  if (cfun->machine->is_interrupt
+      && (!cfun->machine->is_leaf
+	  || (regno == A0_REGNO
+	      && m32c_function_needs_enter ())
+	  ))
+    return 1;
+  if (df_regs_ever_live_p (regno)
+      && (!call_used_regs[regno] || cfun->machine->is_interrupt))
+    return 1;
+  return 0;
+}
+
+/* This function contains all the intelligence about saving and
+   restoring registers.  It always figures out the register save set.
+   When called with PP_justcount, it merely returns the size of the
+   save set (for eliminating the frame pointer, for example).  When
+   called with PP_pushm or PP_popm, it emits the appropriate
+   instructions for saving (pushm) or restoring (popm) the
+   registers.  */
+static int
+m32c_pushm_popm (Push_Pop_Type ppt)
+{
+  int reg_mask = 0;
+  int byte_count = 0, bytes;
+  int i;
+  rtx dwarf_set[PUSHM_N];
+  int n_dwarfs = 0;
+  int nosave_mask = 0;
+
+  if (crtl->return_rtx
+      && GET_CODE (crtl->return_rtx) == PARALLEL
+      && !(crtl->calls_eh_return || cfun->machine->is_interrupt))
+    {
+      rtx exp = XVECEXP (crtl->return_rtx, 0, 0);
+      rtx rv = XEXP (exp, 0);
+      int rv_bytes = GET_MODE_SIZE (GET_MODE (rv));
+
+      if (rv_bytes > 2)
+	nosave_mask |= 0x20;	/* PSI, SI */
+      else
+	nosave_mask |= 0xf0;	/* DF */
+      if (rv_bytes > 4)
+	nosave_mask |= 0x50;	/* DI */
+    }
+
+  for (i = 0; i < (int) PUSHM_N; i++)
+    {
+      /* Skip if neither register needs saving.  */
+      if (!need_to_save (pushm_info[i].reg1))
+	continue;
+
+      if (pushm_info[i].bit & nosave_mask)
+	continue;
+
+      reg_mask |= pushm_info[i].bit;
+      bytes = TARGET_A16 ? pushm_info[i].a16_bytes : pushm_info[i].a24_bytes;
+
+      if (ppt == PP_pushm)
+	{
+	  enum machine_mode mode = (bytes == 2) ? HImode : SImode;
+	  rtx addr;
+
+	  /* Always use stack_pointer_rtx instead of calling
+	     rtx_gen_REG ourselves.  Code elsewhere in GCC assumes
+	     that there is a single rtx representing the stack pointer,
+	     namely stack_pointer_rtx, and uses == to recognize it.  */
+	  addr = stack_pointer_rtx;
+
+	  if (byte_count != 0)
+	    addr = gen_rtx_PLUS (GET_MODE (addr), addr, GEN_INT (byte_count));
+
+	  dwarf_set[n_dwarfs++] =
+	    gen_rtx_SET (VOIDmode,
+			 gen_rtx_MEM (mode, addr),
+			 gen_rtx_REG (mode, pushm_info[i].reg1));
+	  F (dwarf_set[n_dwarfs - 1]);
+
+	}
+      byte_count += bytes;
+    }
+
+  if (cfun->machine->is_interrupt)
+    {
+      cfun->machine->intr_pushm = reg_mask & 0xfe;
+      reg_mask = 0;
+      byte_count = 0;
+    }
+
+  if (cfun->machine->is_interrupt)
+    for (i = MEM0_REGNO; i <= MEM7_REGNO; i++)
+      if (need_to_save (i))
+	{
+	  byte_count += 2;
+	  cfun->machine->intr_pushmem[i - MEM0_REGNO] = 1;
+	}
+
+  if (ppt == PP_pushm && byte_count)
+    {
+      rtx note = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (n_dwarfs + 1));
+      rtx pushm;
+
+      if (reg_mask)
+	{
+	  XVECEXP (note, 0, 0)
+	    = gen_rtx_SET (VOIDmode,
+			   stack_pointer_rtx,
+			   gen_rtx_PLUS (GET_MODE (stack_pointer_rtx),
+					 stack_pointer_rtx,
+					 GEN_INT (-byte_count)));
+	  F (XVECEXP (note, 0, 0));
+
+	  for (i = 0; i < n_dwarfs; i++)
+	    XVECEXP (note, 0, i + 1) = dwarf_set[i];
+
+	  pushm = F (emit_insn (gen_pushm (GEN_INT (reg_mask))));
+
+	  add_reg_note (pushm, REG_FRAME_RELATED_EXPR, note);
+	}
+
+      if (cfun->machine->is_interrupt)
+	for (i = MEM0_REGNO; i <= MEM7_REGNO; i++)
+	  if (cfun->machine->intr_pushmem[i - MEM0_REGNO])
+	    {
+	      if (TARGET_A16)
+		pushm = emit_insn (gen_pushhi_16 (gen_rtx_REG (HImode, i)));
+	      else
+		pushm = emit_insn (gen_pushhi_24 (gen_rtx_REG (HImode, i)));
+	      F (pushm);
+	    }
+    }
+  if (ppt == PP_popm && byte_count)
+    {
+      if (cfun->machine->is_interrupt)
+	for (i = MEM7_REGNO; i >= MEM0_REGNO; i--)
+	  if (cfun->machine->intr_pushmem[i - MEM0_REGNO])
+	    {
+	      if (TARGET_A16)
+		emit_insn (gen_pophi_16 (gen_rtx_REG (HImode, i)));
+	      else
+		emit_insn (gen_pophi_24 (gen_rtx_REG (HImode, i)));
+	    }
+      if (reg_mask)
+	emit_insn (gen_popm (GEN_INT (reg_mask)));
+    }
+
+  return byte_count;
+}
+
+/* Implements INITIAL_ELIMINATION_OFFSET.  See the comment above that
+   diagrams our call frame.  */
+int
+m32c_initial_elimination_offset (int from, int to)
+{
+  int ofs = 0;
+
+  if (from == AP_REGNO)
+    {
+      if (TARGET_A16)
+	ofs += 5;
+      else
+	ofs += 8;
+    }
+
+  if (to == SP_REGNO)
+    {
+      ofs += m32c_pushm_popm (PP_justcount);
+      ofs += get_frame_size ();
+    }
+
+  /* Account for push rounding.  */
+  if (TARGET_A24)
+    ofs = (ofs + 1) & ~1;
+#if DEBUG0
+  fprintf (stderr, "initial_elimination_offset from=%d to=%d, ofs=%d\n", from,
+	   to, ofs);
+#endif
+  return ofs;
+}
+
+/* Passing Function Arguments on the Stack */
+
+/* Implements PUSH_ROUNDING.  The R8C and M16C have byte stacks, the
+   M32C has word stacks.  */
+unsigned int
+m32c_push_rounding (int n)
+{
+  if (TARGET_R8C || TARGET_M16C)
+    return n;
+  return (n + 1) & ~1;
+}
+
+/* Passing Arguments in Registers */
+
+/* Implements TARGET_FUNCTION_ARG.  Arguments are passed partly in
+   registers, partly on stack.  If our function returns a struct, a
+   pointer to a buffer for it is at the top of the stack (last thing
+   pushed).  The first few real arguments may be in registers as
+   follows:
+
+   R8C/M16C:	arg1 in r1 if it's QI or HI (else it's pushed on stack)
+		arg2 in r2 if it's HI (else pushed on stack)
+		rest on stack
+   M32C:        arg1 in r0 if it's QI or HI (else it's pushed on stack)
+		rest on stack
+
+   Structs are not passed in registers, even if they fit.  Only
+   integer and pointer types are passed in registers.
+
+   Note that when arg1 doesn't fit in r1, arg2 may still be passed in
+   r2 if it fits.  */
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m32c_function_arg
+static rtx
+m32c_function_arg (cumulative_args_t ca_v,
+		   enum machine_mode mode, const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  /* Can return a reg, parallel, or 0 for stack */
+  rtx rv = NULL_RTX;
+#if DEBUG0
+  fprintf (stderr, "func_arg %d (%s, %d)\n",
+	   ca->parm_num, mode_name[mode], named);
+  debug_tree (type);
+#endif
+
+  if (mode == VOIDmode)
+    return GEN_INT (0);
+
+  if (ca->force_mem || !named)
+    {
+#if DEBUG0
+      fprintf (stderr, "func arg: force %d named %d, mem\n", ca->force_mem,
+	       named);
+#endif
+      return NULL_RTX;
+    }
+
+  if (type && INTEGRAL_TYPE_P (type) && POINTER_TYPE_P (type))
+    return NULL_RTX;
+
+  if (type && AGGREGATE_TYPE_P (type))
+    return NULL_RTX;
+
+  switch (ca->parm_num)
+    {
+    case 1:
+      if (GET_MODE_SIZE (mode) == 1 || GET_MODE_SIZE (mode) == 2)
+	rv = gen_rtx_REG (mode, TARGET_A16 ? R1_REGNO : R0_REGNO);
+      break;
+
+    case 2:
+      if (TARGET_A16 && GET_MODE_SIZE (mode) == 2)
+	rv = gen_rtx_REG (mode, R2_REGNO);
+      break;
+    }
+
+#if DEBUG0
+  debug_rtx (rv);
+#endif
+  return rv;
+}
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE m32c_pass_by_reference
+static bool
+m32c_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			const_tree type ATTRIBUTE_UNUSED,
+			bool named ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+/* Implements INIT_CUMULATIVE_ARGS.  */
+void
+m32c_init_cumulative_args (CUMULATIVE_ARGS * ca,
+			   tree fntype,
+			   rtx libname ATTRIBUTE_UNUSED,
+			   tree fndecl,
+			   int n_named_args ATTRIBUTE_UNUSED)
+{
+  if (fntype && aggregate_value_p (TREE_TYPE (fntype), fndecl))
+    ca->force_mem = 1;
+  else
+    ca->force_mem = 0;
+  ca->parm_num = 1;
+}
+
+/* Implements TARGET_FUNCTION_ARG_ADVANCE.  force_mem is set for
+   functions returning structures, so we always reset that.  Otherwise,
+   we only need to know the sequence number of the argument to know what
+   to do with it.  */
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m32c_function_arg_advance
+static void
+m32c_function_arg_advance (cumulative_args_t ca_v,
+			   enum machine_mode mode ATTRIBUTE_UNUSED,
+			   const_tree type ATTRIBUTE_UNUSED,
+			   bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if (ca->force_mem)
+    ca->force_mem = 0;
+  else
+    ca->parm_num++;
+}
+
+/* Implements TARGET_FUNCTION_ARG_BOUNDARY.  */
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY m32c_function_arg_boundary
+static unsigned int
+m32c_function_arg_boundary (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    const_tree type ATTRIBUTE_UNUSED)
+{
+  return (TARGET_A16 ? 8 : 16);
+}
+
+/* Implements FUNCTION_ARG_REGNO_P.  */
+int
+m32c_function_arg_regno_p (int r)
+{
+  if (TARGET_A24)
+    return (r == R0_REGNO);
+  return (r == R1_REGNO || r == R2_REGNO);
+}
+
+/* HImode and PSImode are the two "native" modes as far as GCC is
+   concerned, but the chips also support a 32-bit mode which is used
+   for some opcodes in R8C/M16C and for reset vectors and such.  */
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE m32c_valid_pointer_mode
+static bool
+m32c_valid_pointer_mode (enum machine_mode mode)
+{
+  if (mode == HImode
+      || mode == PSImode
+      || mode == SImode
+      )
+    return 1;
+  return 0;
+}
+
+/* How Scalar Function Values Are Returned */
+
+/* Implements TARGET_LIBCALL_VALUE.  Most values are returned in $r0, or some
+   combination of registers starting there (r2r0 for longs, r3r1r2r0
+   for long long, r3r2r1r0 for doubles), except that that ABI
+   currently doesn't work because it ends up using all available
+   general registers and gcc often can't compile it.  So, instead, we
+   return anything bigger than 16 bits in "mem0" (effectively, a
+   memory location).  */
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE m32c_libcall_value
+
+static rtx
+m32c_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  /* return reg or parallel */
+#if 0
+  /* FIXME: GCC has difficulty returning large values in registers,
+     because that ties up most of the general registers and gives the
+     register allocator little to work with.  Until we can resolve
+     this, large values are returned in memory.  */
+  if (mode == DFmode)
+    {
+      rtx rv;
+
+      rv = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
+      XVECEXP (rv, 0, 0) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R0_REGNO),
+					      GEN_INT (0));
+      XVECEXP (rv, 0, 1) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R1_REGNO),
+					      GEN_INT (2));
+      XVECEXP (rv, 0, 2) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R2_REGNO),
+					      GEN_INT (4));
+      XVECEXP (rv, 0, 3) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R3_REGNO),
+					      GEN_INT (6));
+      return rv;
+    }
+
+  if (TARGET_A24 && GET_MODE_SIZE (mode) > 2)
+    {
+      rtx rv;
+
+      rv = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
+      XVECEXP (rv, 0, 0) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (mode,
+							   R0_REGNO),
+					      GEN_INT (0));
+      return rv;
+    }
+#endif
+
+  if (GET_MODE_SIZE (mode) > 2)
+    return gen_rtx_REG (mode, MEM0_REGNO);
+  return gen_rtx_REG (mode, R0_REGNO);
+}
+
+/* Implements TARGET_FUNCTION_VALUE.  Functions and libcalls have the same
+   conventions.  */
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE m32c_function_value
+
+static rtx
+m32c_function_value (const_tree valtype,
+		     const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  /* return reg or parallel */
+  const enum machine_mode mode = TYPE_MODE (valtype);
+  return m32c_libcall_value (mode, NULL_RTX);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P m32c_function_value_regno_p
+
+static bool
+m32c_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == R0_REGNO || regno == MEM0_REGNO);
+}
+
+/* How Large Values Are Returned */
+
+/* We return structures by pushing the address on the stack, even if
+   we use registers for the first few "real" arguments.  */
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX m32c_struct_value_rtx
+static rtx
+m32c_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+/* Function Entry and Exit */
+
+/* Implements EPILOGUE_USES.  Interrupts restore all registers.  */
+int
+m32c_epilogue_uses (int regno ATTRIBUTE_UNUSED)
+{
+  if (cfun->machine->is_interrupt)
+    return 1;
+  return 0;
+}
+
+/* Implementing the Varargs Macros */
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING m32c_strict_argument_naming
+static bool
+m32c_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Trampolines for Nested Functions */
+
+/*
+   m16c:
+   1 0000 75C43412              mov.w   #0x1234,a0
+   2 0004 FC000000              jmp.a   label
+
+   m32c:
+   1 0000 BC563412              mov.l:s #0x123456,a0
+   2 0004 CC000000              jmp.a   label
+*/
+
+/* Implements TRAMPOLINE_SIZE.  */
+int
+m32c_trampoline_size (void)
+{
+  /* Allocate extra space so we can avoid the messy shifts when we
+     initialize the trampoline; we just write past the end of the
+     opcode.  */
+  return TARGET_A16 ? 8 : 10;
+}
+
+/* Implements TRAMPOLINE_ALIGNMENT.  */
+int
+m32c_trampoline_alignment (void)
+{
+  return 2;
+}
+
+/* Implements TARGET_TRAMPOLINE_INIT.  */
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m32c_trampoline_init
+static void
+m32c_trampoline_init (rtx m_tramp, tree fndecl, rtx chainval)
+{
+  rtx function = XEXP (DECL_RTL (fndecl), 0);
+
+#define A0(m,i) adjust_address (m_tramp, m, i)
+  if (TARGET_A16)
+    {
+      /* Note: we subtract a "word" because the moves want signed
+	 constants, not unsigned constants.  */
+      emit_move_insn (A0 (HImode, 0), GEN_INT (0xc475 - 0x10000));
+      emit_move_insn (A0 (HImode, 2), chainval);
+      emit_move_insn (A0 (QImode, 4), GEN_INT (0xfc - 0x100));
+      /* We use 16-bit addresses here, but store the zero to turn it
+	 into a 24-bit offset.  */
+      emit_move_insn (A0 (HImode, 5), function);
+      emit_move_insn (A0 (QImode, 7), GEN_INT (0x00));
+    }
+  else
+    {
+      /* Note that the PSI moves actually write 4 bytes.  Make sure we
+	 write stuff out in the right order, and leave room for the
+	 extra byte at the end.  */
+      emit_move_insn (A0 (QImode, 0), GEN_INT (0xbc - 0x100));
+      emit_move_insn (A0 (PSImode, 1), chainval);
+      emit_move_insn (A0 (QImode, 4), GEN_INT (0xcc - 0x100));
+      emit_move_insn (A0 (PSImode, 5), function);
+    }
+#undef A0
+}
+
+/* Addressing Modes */
+
+/* The r8c/m32c family supports a wide range of non-orthogonal
+   addressing modes, including the ability to double-indirect on *some*
+   of them.  Not all insns support all modes, either, but we rely on
+   predicates and constraints to deal with that.  */
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P m32c_legitimate_address_p
+bool
+m32c_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  int mode_adjust;
+  if (CONSTANT_P (x))
+    return 1;
+
+  if (TARGET_A16 && GET_MODE (x) != HImode && GET_MODE (x) != SImode)
+    return 0;
+  if (TARGET_A24 && GET_MODE (x) != PSImode)
+    return 0;
+
+  /* Wide references to memory will be split after reload, so we must
+     ensure that all parts of such splits remain legitimate
+     addresses.  */
+  mode_adjust = GET_MODE_SIZE (mode) - 1;
+
+  /* allowing PLUS yields mem:HI(plus:SI(mem:SI(plus:SI in m32c_split_move */
+  if (GET_CODE (x) == PRE_DEC
+      || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_MODIFY)
+    {
+      return (GET_CODE (XEXP (x, 0)) == REG
+	      && REGNO (XEXP (x, 0)) == SP_REGNO);
+    }
+
+#if 0
+  /* This is the double indirection detection, but it currently
+     doesn't work as cleanly as this code implies, so until we've had
+     a chance to debug it, leave it disabled.  */
+  if (TARGET_A24 && GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) != PLUS)
+    {
+#if DEBUG_DOUBLE
+      fprintf (stderr, "double indirect\n");
+#endif
+      x = XEXP (x, 0);
+    }
+#endif
+
+  encode_pattern (x);
+  if (RTX_IS ("r"))
+    {
+      /* Most indexable registers can be used without displacements,
+	 although some of them will be emitted with an explicit zero
+	 to please the assembler.  */
+      switch (REGNO (patternr[0]))
+	{
+	case A1_REGNO:
+	case SB_REGNO:
+	case FB_REGNO:
+	case SP_REGNO:
+	  if (TARGET_A16 && GET_MODE (x) == SImode)
+	    return 0;
+	case A0_REGNO:
+	  return 1;
+
+	default:
+	  if (IS_PSEUDO (patternr[0], strict))
+	    return 1;
+	  return 0;
+	}
+    }
+
+  if (TARGET_A16 && GET_MODE (x) == SImode)
+    return 0;
+
+  if (RTX_IS ("+ri"))
+    {
+      /* This is more interesting, because different base registers
+	 allow for different displacements - both range and signedness
+	 - and it differs from chip series to chip series too.  */
+      int rn = REGNO (patternr[1]);
+      HOST_WIDE_INT offs = INTVAL (patternr[2]);
+      switch (rn)
+	{
+	case A0_REGNO:
+	case A1_REGNO:
+	case SB_REGNO:
+	  /* The syntax only allows positive offsets, but when the
+	     offsets span the entire memory range, we can simulate
+	     negative offsets by wrapping.  */
+	  if (TARGET_A16)
+	    return (offs >= -65536 && offs <= 65535 - mode_adjust);
+	  if (rn == SB_REGNO)
+	    return (offs >= 0 && offs <= 65535 - mode_adjust);
+	  /* A0 or A1 */
+	  return (offs >= -16777216 && offs <= 16777215);
+
+	case FB_REGNO:
+	  if (TARGET_A16)
+	    return (offs >= -128 && offs <= 127 - mode_adjust);
+	  return (offs >= -65536 && offs <= 65535 - mode_adjust);
+
+	case SP_REGNO:
+	  return (offs >= -128 && offs <= 127 - mode_adjust);
+
+	default:
+	  if (IS_PSEUDO (patternr[1], strict))
+	    return 1;
+	  return 0;
+	}
+    }
+  if (RTX_IS ("+rs") || RTX_IS ("+r+si"))
+    {
+      rtx reg = patternr[1];
+
+      /* We don't know where the symbol is, so only allow base
+	 registers which support displacements spanning the whole
+	 address range.  */
+      switch (REGNO (reg))
+	{
+	case A0_REGNO:
+	case A1_REGNO:
+	  /* $sb needs a secondary reload, but since it's involved in
+	     memory address reloads too, we don't deal with it very
+	     well.  */
+	  /*    case SB_REGNO: */
+	  return 1;
+	default:
+	  if (IS_PSEUDO (reg, strict))
+	    return 1;
+	  return 0;
+	}
+    }
+  return 0;
+}
+
+/* Implements REG_OK_FOR_BASE_P.  */
+int
+m32c_reg_ok_for_base_p (rtx x, int strict)
+{
+  if (GET_CODE (x) != REG)
+    return 0;
+  switch (REGNO (x))
+    {
+    case A0_REGNO:
+    case A1_REGNO:
+    case SB_REGNO:
+    case FB_REGNO:
+    case SP_REGNO:
+      return 1;
+    default:
+      if (IS_PSEUDO (x, strict))
+	return 1;
+      return 0;
+    }
+}
+
+/* We have three choices for choosing fb->aN offsets.  If we choose -128,
+   we need one MOVA -128[fb],aN opcode and 16-bit aN displacements,
+   like this:
+       EB 4B FF    mova    -128[$fb],$a0
+       D8 0C FF FF mov.w:Q #0,-1[$a0]
+
+   Alternately, we subtract the frame size, and hopefully use 8-bit aN
+   displacements:
+       7B F4       stc $fb,$a0
+       77 54 00 01 sub #256,$a0
+       D8 08 01    mov.w:Q #0,1[$a0]
+
+   If we don't offset (i.e. offset by zero), we end up with:
+       7B F4       stc $fb,$a0
+       D8 0C 00 FF mov.w:Q #0,-256[$a0]
+
+   We have to subtract *something* so that we have a PLUS rtx to mark
+   that we've done this reload.  The -128 offset will never result in
+   an 8-bit aN offset, and the payoff for the second case is five
+   loads *if* those loads are within 256 bytes of the other end of the
+   frame, so the third case seems best.  Note that we subtract the
+   zero, but detect that in the addhi3 pattern.  */
+
+#define BIG_FB_ADJ 0
+
+/* Implements LEGITIMIZE_ADDRESS.  The only address we really have to
+   worry about is frame base offsets, as $fb has a limited
+   displacement range.  We deal with this by attempting to reload $fb
+   itself into an address register; that seems to result in the best
+   code.  */
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS m32c_legitimize_address
+static rtx
+m32c_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+#if DEBUG0
+  fprintf (stderr, "m32c_legitimize_address for mode %s\n", mode_name[mode]);
+  debug_rtx (x);
+  fprintf (stderr, "\n");
+#endif
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) == FB_REGNO
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && (INTVAL (XEXP (x, 1)) < -128
+	  || INTVAL (XEXP (x, 1)) > (128 - GET_MODE_SIZE (mode))))
+    {
+      /* reload FB to A_REGS */
+      rtx temp = gen_reg_rtx (Pmode);
+      x = copy_rtx (x);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, XEXP (x, 0)));
+      XEXP (x, 0) = temp;
+    }
+
+  return x;
+}
+
+/* Implements LEGITIMIZE_RELOAD_ADDRESS.  See comment above.  */
+int
+m32c_legitimize_reload_address (rtx * x,
+				enum machine_mode mode,
+				int opnum,
+				int type, int ind_levels ATTRIBUTE_UNUSED)
+{
+#if DEBUG0
+  fprintf (stderr, "\nm32c_legitimize_reload_address for mode %s\n",
+	   mode_name[mode]);
+  debug_rtx (*x);
+#endif
+
+  /* At one point, this function tried to get $fb copied to an address
+     register, which in theory would maximize sharing, but gcc was
+     *also* still trying to reload the whole address, and we'd run out
+     of address registers.  So we let gcc do the naive (but safe)
+     reload instead, when the above function doesn't handle it for
+     us.
+
+     The code below is a second attempt at the above.  */
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == REG
+      && REGNO (XEXP (*x, 0)) == FB_REGNO
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT
+      && (INTVAL (XEXP (*x, 1)) < -128
+	  || INTVAL (XEXP (*x, 1)) > (128 - GET_MODE_SIZE (mode))))
+    {
+      rtx sum;
+      int offset = INTVAL (XEXP (*x, 1));
+      int adjustment = -BIG_FB_ADJ;
+
+      sum = gen_rtx_PLUS (Pmode, XEXP (*x, 0),
+			  GEN_INT (adjustment));
+      *x = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - adjustment));
+      if (type == RELOAD_OTHER)
+	type = RELOAD_FOR_OTHER_ADDRESS;
+      push_reload (sum, NULL_RTX, &XEXP (*x, 0), NULL,
+		   A_REGS, Pmode, VOIDmode, 0, 0, opnum,
+		   (enum reload_type) type);
+      return 1;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (*x, 0), 0)) == REG
+      && REGNO (XEXP (XEXP (*x, 0), 0)) == FB_REGNO
+      && GET_CODE (XEXP (XEXP (*x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT
+      )
+    {
+      if (type == RELOAD_OTHER)
+	type = RELOAD_FOR_OTHER_ADDRESS;
+      push_reload (XEXP (*x, 0), NULL_RTX, &XEXP (*x, 0), NULL,
+		   A_REGS, Pmode, VOIDmode, 0, 0, opnum,
+		   (enum reload_type) type);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return the appropriate mode for a named address pointer.  */
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE m32c_addr_space_pointer_mode
+static enum machine_mode
+m32c_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return TARGET_A24 ? PSImode : HImode;
+    case ADDR_SPACE_FAR:
+      return SImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the appropriate mode for a named address address.  */
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE m32c_addr_space_address_mode
+static enum machine_mode
+m32c_addr_space_address_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return TARGET_A24 ? PSImode : HImode;
+    case ADDR_SPACE_FAR:
+      return SImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Like m32c_legitimate_address_p, except with named addresses.  */
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+  m32c_addr_space_legitimate_address_p
+static bool
+m32c_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+				      bool strict, addr_space_t as)
+{
+  if (as == ADDR_SPACE_FAR)
+    {
+      if (TARGET_A24)
+	return 0;
+      encode_pattern (x);
+      if (RTX_IS ("r"))
+	{
+	  if (GET_MODE (x) != SImode)
+	    return 0;
+	  switch (REGNO (patternr[0]))
+	    {
+	    case A0_REGNO:
+	      return 1;
+
+	    default:
+	      if (IS_PSEUDO (patternr[0], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("+^Sri"))
+	{
+	  int rn = REGNO (patternr[3]);
+	  HOST_WIDE_INT offs = INTVAL (patternr[4]);
+	  if (GET_MODE (patternr[3]) != HImode)
+	    return 0;
+	  switch (rn)
+	    {
+	    case A0_REGNO:
+	      return (offs >= 0 && offs <= 0xfffff);
+
+	    default:
+	      if (IS_PSEUDO (patternr[3], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("+^Srs"))
+	{
+	  int rn = REGNO (patternr[3]);
+	  if (GET_MODE (patternr[3]) != HImode)
+	    return 0;
+	  switch (rn)
+	    {
+	    case A0_REGNO:
+	      return 1;
+
+	    default:
+	      if (IS_PSEUDO (patternr[3], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("+^S+ris"))
+	{
+	  int rn = REGNO (patternr[4]);
+	  if (GET_MODE (patternr[4]) != HImode)
+	    return 0;
+	  switch (rn)
+	    {
+	    case A0_REGNO:
+	      return 1;
+
+	    default:
+	      if (IS_PSEUDO (patternr[4], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("s"))
+	{
+	  return 1;
+	}
+      return 0;
+    }
+
+  else if (as != ADDR_SPACE_GENERIC)
+    gcc_unreachable ();
+
+  return m32c_legitimate_address_p (mode, x, strict);
+}
+
+/* Like m32c_legitimate_address, except with named address support.  */
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS m32c_addr_space_legitimize_address
+static rtx
+m32c_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+				    addr_space_t as)
+{
+  if (as != ADDR_SPACE_GENERIC)
+    {
+#if DEBUG0
+      fprintf (stderr, "\033[36mm32c_addr_space_legitimize_address for mode %s\033[0m\n", mode_name[mode]);
+      debug_rtx (x);
+      fprintf (stderr, "\n");
+#endif
+
+      if (GET_CODE (x) != REG)
+	{
+	  x = force_reg (SImode, x);
+	}
+      return x;
+    }
+
+  return m32c_legitimize_address (x, oldx, mode);
+}
+
+/* Determine if one named address space is a subset of another.  */
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P m32c_addr_space_subset_p
+static bool
+m32c_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_FAR);
+  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_FAR);
+
+  if (subset == superset)
+    return true;
+
+  else
+    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_FAR);
+}
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT m32c_addr_space_convert
+/* Convert from one address space to another.  */
+static rtx
+m32c_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+  rtx result;
+
+  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_FAR);
+  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_FAR);
+
+  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_FAR)
+    {
+      /* This is unpredictable, as we're truncating off usable address
+	 bits.  */
+
+      result = gen_reg_rtx (HImode);
+      emit_move_insn (result, simplify_subreg (HImode, op, SImode, 0));
+      return result;
+    }
+  else if (to_as == ADDR_SPACE_FAR && from_as == ADDR_SPACE_GENERIC)
+    {
+      /* This always works.  */
+      result = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extendhisi2 (result, op));
+      return result;
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Condition Code Status */
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS m32c_fixed_condition_code_regs
+static bool
+m32c_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = FLG_REGNO;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+/* Describing Relative Costs of Operations */
+
+/* Implements TARGET_REGISTER_MOVE_COST.  We make impossible moves
+   prohibitively expensive, like trying to put QIs in r2/r3 (there are
+   no opcodes to do that).  We also discourage use of mem* registers
+   since they're really memory.  */
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST m32c_register_move_cost
+
+static int
+m32c_register_move_cost (enum machine_mode mode, reg_class_t from,
+			 reg_class_t to)
+{
+  int cost = COSTS_N_INSNS (3);
+  HARD_REG_SET cc;
+
+/* FIXME: pick real values, but not 2 for now.  */
+  COPY_HARD_REG_SET (cc, reg_class_contents[(int) from]);
+  IOR_HARD_REG_SET (cc, reg_class_contents[(int) to]);
+
+  if (mode == QImode
+      && hard_reg_set_intersect_p (cc, reg_class_contents[R23_REGS]))
+    {
+      if (hard_reg_set_subset_p (cc, reg_class_contents[R23_REGS]))
+	cost = COSTS_N_INSNS (1000);
+      else
+	cost = COSTS_N_INSNS (80);
+    }
+
+  if (!class_can_hold_mode (from, mode) || !class_can_hold_mode (to, mode))
+    cost = COSTS_N_INSNS (1000);
+
+  if (reg_classes_intersect_p (from, CR_REGS))
+    cost += COSTS_N_INSNS (5);
+
+  if (reg_classes_intersect_p (to, CR_REGS))
+    cost += COSTS_N_INSNS (5);
+
+  if (from == MEM_REGS || to == MEM_REGS)
+    cost += COSTS_N_INSNS (50);
+  else if (reg_classes_intersect_p (from, MEM_REGS)
+	   || reg_classes_intersect_p (to, MEM_REGS))
+    cost += COSTS_N_INSNS (10);
+
+#if DEBUG0
+  fprintf (stderr, "register_move_cost %s from %s to %s = %d\n",
+	   mode_name[mode], class_names[(int) from], class_names[(int) to],
+	   cost);
+#endif
+  return cost;
+}
+
+/*  Implements TARGET_MEMORY_MOVE_COST.  */
+
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST m32c_memory_move_cost
+
+static int
+m32c_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  /* FIXME: pick real values.  */
+  return COSTS_N_INSNS (10);
+}
+
+/* Here we try to describe when we use multiple opcodes for one RTX so
+   that gcc knows when to use them.  */
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m32c_rtx_costs
+static bool
+m32c_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case REG:
+      if (REGNO (x) >= MEM0_REGNO && REGNO (x) <= MEM7_REGNO)
+	*total += COSTS_N_INSNS (500);
+      else
+	*total += COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	{
+	  /* mov.b r1l, r1h */
+	  *total +=  COSTS_N_INSNS (1);
+	  return true;
+	}
+      if (INTVAL (XEXP (x, 1)) > 8
+	  || INTVAL (XEXP (x, 1)) < -8)
+	{
+	  /* mov.b #N, r1l */
+	  /* mov.b r1l, r1h */
+	  *total +=  COSTS_N_INSNS (2);
+	  return true;
+	}
+      return true;
+
+    case LE:
+    case LEU:
+    case LT:
+    case LTU:
+    case GT:
+    case GTU:
+    case GE:
+    case GEU:
+    case NE:
+    case EQ:
+      if (outer_code == SET)
+	{
+	  *total += COSTS_N_INSNS (2);
+	  return true;
+	}
+      break;
+
+    case ZERO_EXTRACT:
+      {
+	rtx dest = XEXP (x, 0);
+	rtx addr = XEXP (dest, 0);
+	switch (GET_CODE (addr))
+	  {
+	  case CONST_INT:
+	    *total += COSTS_N_INSNS (1);
+	    break;
+	  case SYMBOL_REF:
+	    *total += COSTS_N_INSNS (3);
+	    break;
+	  default:
+	    *total += COSTS_N_INSNS (2);
+	    break;
+	  }
+	return true;
+      }
+      break;
+
+    default:
+      /* Reasonable default.  */
+      if (TARGET_A16 && GET_MODE(x) == SImode)
+	*total += COSTS_N_INSNS (2);
+      break;
+    }
+  return false;
+}
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST m32c_address_cost
+static int
+m32c_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  int i;
+  /*  fprintf(stderr, "\naddress_cost\n");
+      debug_rtx(addr);*/
+  switch (GET_CODE (addr))
+    {
+    case CONST_INT:
+      i = INTVAL (addr);
+      if (i == 0)
+	return COSTS_N_INSNS(1);
+      if (0 < i && i <= 255)
+	return COSTS_N_INSNS(2);
+      if (0 < i && i <= 65535)
+	return COSTS_N_INSNS(3);
+      return COSTS_N_INSNS(4);
+    case SYMBOL_REF:
+      return COSTS_N_INSNS(4);
+    case REG:
+      return COSTS_N_INSNS(1);
+    case PLUS:
+      if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	{
+	  i = INTVAL (XEXP (addr, 1));
+	  if (i == 0)
+	    return COSTS_N_INSNS(1);
+	  if (0 < i && i <= 255)
+	    return COSTS_N_INSNS(2);
+	  if (0 < i && i <= 65535)
+	    return COSTS_N_INSNS(3);
+	}
+      return COSTS_N_INSNS(4);
+    default:
+      return 0;
+    }
+}
+
+/* Defining the Output Assembler Language */
+
+/* Output of Data */
+
+/* We may have 24 bit sizes, which is the native address size.
+   Currently unused, but provided for completeness.  */
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER m32c_asm_integer
+static bool
+m32c_asm_integer (rtx x, unsigned int size, int aligned_p)
+{
+  switch (size)
+    {
+    case 3:
+      fprintf (asm_out_file, "\t.3byte\t");
+      output_addr_const (asm_out_file, x);
+      fputc ('\n', asm_out_file);
+      return true;
+    case 4:
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  fprintf (asm_out_file, "\t.long\t");
+	  output_addr_const (asm_out_file, x);
+	  fputc ('\n', asm_out_file);
+	  return true;
+	}
+      break;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Output of Assembler Instructions */
+
+/* We use a lookup table because the addressing modes are non-orthogonal.  */
+
+static struct
+{
+  char code;
+  char const *pattern;
+  char const *format;
+}
+const conversions[] = {
+  { 0, "r", "0" },
+
+  { 0, "mr", "z[1]" },
+  { 0, "m+ri", "3[2]" },
+  { 0, "m+rs", "3[2]" },
+  { 0, "m+^Zrs", "5[4]" },
+  { 0, "m+^Zri", "5[4]" },
+  { 0, "m+^Z+ris", "7+6[5]" },
+  { 0, "m+^Srs", "5[4]" },
+  { 0, "m+^Sri", "5[4]" },
+  { 0, "m+^S+ris", "7+6[5]" },
+  { 0, "m+r+si", "4+5[2]" },
+  { 0, "ms", "1" },
+  { 0, "mi", "1" },
+  { 0, "m+si", "2+3" },
+
+  { 0, "mmr", "[z[2]]" },
+  { 0, "mm+ri", "[4[3]]" },
+  { 0, "mm+rs", "[4[3]]" },
+  { 0, "mm+r+si", "[5+6[3]]" },
+  { 0, "mms", "[[2]]" },
+  { 0, "mmi", "[[2]]" },
+  { 0, "mm+si", "[4[3]]" },
+
+  { 0, "i", "#0" },
+  { 0, "s", "#0" },
+  { 0, "+si", "#1+2" },
+  { 0, "l", "#0" },
+
+  { 'l', "l", "0" },
+  { 'd', "i", "0" },
+  { 'd', "s", "0" },
+  { 'd', "+si", "1+2" },
+  { 'D', "i", "0" },
+  { 'D', "s", "0" },
+  { 'D', "+si", "1+2" },
+  { 'x', "i", "#0" },
+  { 'X', "i", "#0" },
+  { 'm', "i", "#0" },
+  { 'b', "i", "#0" },
+  { 'B', "i", "0" },
+  { 'p', "i", "0" },
+
+  { 0, 0, 0 }
+};
+
+/* This is in order according to the bitfield that pushm/popm use.  */
+static char const *pushm_regs[] = {
+  "fb", "sb", "a1", "a0", "r3", "r2", "r1", "r0"
+};
+
+/* Implements TARGET_PRINT_OPERAND.  */
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND m32c_print_operand
+
+static void
+m32c_print_operand (FILE * file, rtx x, int code)
+{
+  int i, j, b;
+  const char *comma;
+  HOST_WIDE_INT ival;
+  int unsigned_const = 0;
+  int force_sign;
+
+  /* Multiplies; constants are converted to sign-extended format but
+   we need unsigned, so 'u' and 'U' tell us what size unsigned we
+   need.  */
+  if (code == 'u')
+    {
+      unsigned_const = 2;
+      code = 0;
+    }
+  if (code == 'U')
+    {
+      unsigned_const = 1;
+      code = 0;
+    }
+  /* This one is only for debugging; you can put it in a pattern to
+     force this error.  */
+  if (code == '!')
+    {
+      fprintf (stderr, "dj: unreviewed pattern:");
+      if (current_output_insn)
+	debug_rtx (current_output_insn);
+      gcc_unreachable ();
+    }
+  /* PSImode operations are either .w or .l depending on the target.  */
+  if (code == '&')
+    {
+      if (TARGET_A16)
+	fprintf (file, "w");
+      else
+	fprintf (file, "l");
+      return;
+    }
+  /* Inverted conditionals.  */
+  if (code == 'C')
+    {
+      switch (GET_CODE (x))
+	{
+	case LE:
+	  fputs ("gt", file);
+	  break;
+	case LEU:
+	  fputs ("gtu", file);
+	  break;
+	case LT:
+	  fputs ("ge", file);
+	  break;
+	case LTU:
+	  fputs ("geu", file);
+	  break;
+	case GT:
+	  fputs ("le", file);
+	  break;
+	case GTU:
+	  fputs ("leu", file);
+	  break;
+	case GE:
+	  fputs ("lt", file);
+	  break;
+	case GEU:
+	  fputs ("ltu", file);
+	  break;
+	case NE:
+	  fputs ("eq", file);
+	  break;
+	case EQ:
+	  fputs ("ne", file);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+  /* Regular conditionals.  */
+  if (code == 'c')
+    {
+      switch (GET_CODE (x))
+	{
+	case LE:
+	  fputs ("le", file);
+	  break;
+	case LEU:
+	  fputs ("leu", file);
+	  break;
+	case LT:
+	  fputs ("lt", file);
+	  break;
+	case LTU:
+	  fputs ("ltu", file);
+	  break;
+	case GT:
+	  fputs ("gt", file);
+	  break;
+	case GTU:
+	  fputs ("gtu", file);
+	  break;
+	case GE:
+	  fputs ("ge", file);
+	  break;
+	case GEU:
+	  fputs ("geu", file);
+	  break;
+	case NE:
+	  fputs ("ne", file);
+	  break;
+	case EQ:
+	  fputs ("eq", file);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+  /* Used in negsi2 to do HImode ops on the two parts of an SImode
+     operand.  */
+  if (code == 'h' && GET_MODE (x) == SImode)
+    {
+      x = m32c_subreg (HImode, x, SImode, 0);
+      code = 0;
+    }
+  if (code == 'H' && GET_MODE (x) == SImode)
+    {
+      x = m32c_subreg (HImode, x, SImode, 2);
+      code = 0;
+    }
+  if (code == 'h' && GET_MODE (x) == HImode)
+    {
+      x = m32c_subreg (QImode, x, HImode, 0);
+      code = 0;
+    }
+  if (code == 'H' && GET_MODE (x) == HImode)
+    {
+      /* We can't actually represent this as an rtx.  Do it here.  */
+      if (GET_CODE (x) == REG)
+	{
+	  switch (REGNO (x))
+	    {
+	    case R0_REGNO:
+	      fputs ("r0h", file);
+	      return;
+	    case R1_REGNO:
+	      fputs ("r1h", file);
+	      return;
+	    default:
+	      gcc_unreachable();
+	    }
+	}
+      /* This should be a MEM.  */
+      x = m32c_subreg (QImode, x, HImode, 1);
+      code = 0;
+    }
+  /* This is for BMcond, which always wants word register names.  */
+  if (code == 'h' && GET_MODE (x) == QImode)
+    {
+      if (GET_CODE (x) == REG)
+	x = gen_rtx_REG (HImode, REGNO (x));
+      code = 0;
+    }
+  /* 'x' and 'X' need to be ignored for non-immediates.  */
+  if ((code == 'x' || code == 'X') && GET_CODE (x) != CONST_INT)
+    code = 0;
+
+  encode_pattern (x);
+  force_sign = 0;
+  for (i = 0; conversions[i].pattern; i++)
+    if (conversions[i].code == code
+	&& streq (conversions[i].pattern, pattern))
+      {
+	for (j = 0; conversions[i].format[j]; j++)
+	  /* backslash quotes the next character in the output pattern.  */
+	  if (conversions[i].format[j] == '\\')
+	    {
+	      fputc (conversions[i].format[j + 1], file);
+	      j++;
+	    }
+	  /* Digits in the output pattern indicate that the
+	     corresponding RTX is to be output at that point.  */
+	  else if (ISDIGIT (conversions[i].format[j]))
+	    {
+	      rtx r = patternr[conversions[i].format[j] - '0'];
+	      switch (GET_CODE (r))
+		{
+		case REG:
+		  fprintf (file, "%s",
+			   reg_name_with_mode (REGNO (r), GET_MODE (r)));
+		  break;
+		case CONST_INT:
+		  switch (code)
+		    {
+		    case 'b':
+		    case 'B':
+		      {
+			int v = INTVAL (r);
+			int i = (int) exact_log2 (v);
+			if (i == -1)
+			  i = (int) exact_log2 ((v ^ 0xffff) & 0xffff);
+			if (i == -1)
+			  i = (int) exact_log2 ((v ^ 0xff) & 0xff);
+			/* Bit position.  */
+			fprintf (file, "%d", i);
+		      }
+		      break;
+		    case 'x':
+		      /* Unsigned byte.  */
+		      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+			       INTVAL (r) & 0xff);
+		      break;
+		    case 'X':
+		      /* Unsigned word.  */
+		      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+			       INTVAL (r) & 0xffff);
+		      break;
+		    case 'p':
+		      /* pushm and popm encode a register set into a single byte.  */
+		      comma = "";
+		      for (b = 7; b >= 0; b--)
+			if (INTVAL (r) & (1 << b))
+			  {
+			    fprintf (file, "%s%s", comma, pushm_regs[b]);
+			    comma = ",";
+			  }
+		      break;
+		    case 'm':
+		      /* "Minus".  Output -X  */
+		      ival = (-INTVAL (r) & 0xffff);
+		      if (ival & 0x8000)
+			ival = ival - 0x10000;
+		      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+		      break;
+		    default:
+		      ival = INTVAL (r);
+		      if (conversions[i].format[j + 1] == '[' && ival < 0)
+			{
+			  /* We can simulate negative displacements by
+			     taking advantage of address space
+			     wrapping when the offset can span the
+			     entire address range.  */
+			  rtx base =
+			    patternr[conversions[i].format[j + 2] - '0'];
+			  if (GET_CODE (base) == REG)
+			    switch (REGNO (base))
+			      {
+			      case A0_REGNO:
+			      case A1_REGNO:
+				if (TARGET_A24)
+				  ival = 0x1000000 + ival;
+				else
+				  ival = 0x10000 + ival;
+				break;
+			      case SB_REGNO:
+				if (TARGET_A16)
+				  ival = 0x10000 + ival;
+				break;
+			      }
+			}
+		      else if (code == 'd' && ival < 0 && j == 0)
+			/* The "mova" opcode is used to do addition by
+			   computing displacements, but again, we need
+			   displacements to be unsigned *if* they're
+			   the only component of the displacement
+			   (i.e. no "symbol-4" type displacement).  */
+			ival = (TARGET_A24 ? 0x1000000 : 0x10000) + ival;
+
+		      if (conversions[i].format[j] == '0')
+			{
+			  /* More conversions to unsigned.  */
+			  if (unsigned_const == 2)
+			    ival &= 0xffff;
+			  if (unsigned_const == 1)
+			    ival &= 0xff;
+			}
+		      if (streq (conversions[i].pattern, "mi")
+			  || streq (conversions[i].pattern, "mmi"))
+			{
+			  /* Integers used as addresses are unsigned.  */
+			  ival &= (TARGET_A24 ? 0xffffff : 0xffff);
+			}
+		      if (force_sign && ival >= 0)
+			fputc ('+', file);
+		      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+		      break;
+		    }
+		  break;
+		case CONST_DOUBLE:
+		  /* We don't have const_double constants.  If it
+		     happens, make it obvious.  */
+		  fprintf (file, "[const_double 0x%lx]",
+			   (unsigned long) CONST_DOUBLE_HIGH (r));
+		  break;
+		case SYMBOL_REF:
+		  assemble_name (file, XSTR (r, 0));
+		  break;
+		case LABEL_REF:
+		  output_asm_label (r);
+		  break;
+		default:
+		  fprintf (stderr, "don't know how to print this operand:");
+		  debug_rtx (r);
+		  gcc_unreachable ();
+		}
+	    }
+	  else
+	    {
+	      if (conversions[i].format[j] == 'z')
+		{
+		  /* Some addressing modes *must* have a displacement,
+		     so insert a zero here if needed.  */
+		  int k;
+		  for (k = j + 1; conversions[i].format[k]; k++)
+		    if (ISDIGIT (conversions[i].format[k]))
+		      {
+			rtx reg = patternr[conversions[i].format[k] - '0'];
+			if (GET_CODE (reg) == REG
+			    && (REGNO (reg) == SB_REGNO
+				|| REGNO (reg) == FB_REGNO
+				|| REGNO (reg) == SP_REGNO))
+			  fputc ('0', file);
+		      }
+		  continue;
+		}
+	      /* Signed displacements off symbols need to have signs
+		 blended cleanly.  */
+	      if (conversions[i].format[j] == '+'
+		  && (!code || code == 'D' || code == 'd')
+		  && ISDIGIT (conversions[i].format[j + 1])
+		  && (GET_CODE (patternr[conversions[i].format[j + 1] - '0'])
+		      == CONST_INT))
+		{
+		  force_sign = 1;
+		  continue;
+		}
+	      fputc (conversions[i].format[j], file);
+	    }
+	break;
+      }
+  if (!conversions[i].pattern)
+    {
+      fprintf (stderr, "unconvertible operand %c `%s'", code ? code : '-',
+	       pattern);
+      debug_rtx (x);
+      fprintf (file, "[%c.%s]", code ? code : '-', pattern);
+    }
+
+  return;
+}
+
+/* Implements TARGET_PRINT_OPERAND_PUNCT_VALID_P.
+
+   See m32c_print_operand above for descriptions of what these do.  */
+
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P m32c_print_operand_punct_valid_p
+
+static bool 
+m32c_print_operand_punct_valid_p (unsigned char c)
+{
+  if (c == '&' || c == '!')
+    return true;
+
+  return false;
+}
+
+/* Implements TARGET_PRINT_OPERAND_ADDRESS.  Nothing unusual here.  */
+
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS m32c_print_operand_address
+
+static void
+m32c_print_operand_address (FILE * stream, rtx address)
+{
+  if (GET_CODE (address) == MEM)
+    address = XEXP (address, 0);
+  else
+    /* cf: gcc.dg/asm-4.c.  */
+    gcc_assert (GET_CODE (address) == REG);
+
+  m32c_print_operand (stream, address, 0);
+}
+
+/* Implements ASM_OUTPUT_REG_PUSH.  Control registers are pushed
+   differently than general registers.  */
+void
+m32c_output_reg_push (FILE * s, int regno)
+{
+  if (regno == FLG_REGNO)
+    fprintf (s, "\tpushc\tflg\n");
+  else
+    fprintf (s, "\tpush.%c\t%s\n",
+	     " bwll"[reg_push_size (regno)], reg_names[regno]);
+}
+
+/* Likewise for ASM_OUTPUT_REG_POP.  */
+void
+m32c_output_reg_pop (FILE * s, int regno)
+{
+  if (regno == FLG_REGNO)
+    fprintf (s, "\tpopc\tflg\n");
+  else
+    fprintf (s, "\tpop.%c\t%s\n",
+	     " bwll"[reg_push_size (regno)], reg_names[regno]);
+}
+
+/* Defining target-specific uses of `__attribute__' */
+
+/* Used to simplify the logic below.  Find the attributes wherever
+   they may be.  */
+#define M32C_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+                : DECL_ATTRIBUTES (decl) \
+                  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+/* Returns TRUE if the given tree has the "interrupt" attribute.  */
+static int
+interrupt_p (tree node ATTRIBUTE_UNUSED)
+{
+  tree list = M32C_ATTRIBUTES (node);
+  while (list)
+    {
+      if (is_attribute_p ("interrupt", TREE_PURPOSE (list)))
+	return 1;
+      list = TREE_CHAIN (list);
+    }
+  return fast_interrupt_p (node);
+}
+
+/* Returns TRUE if the given tree has the "bank_switch" attribute.  */
+static int
+bank_switch_p (tree node ATTRIBUTE_UNUSED)
+{
+  tree list = M32C_ATTRIBUTES (node);
+  while (list)
+    {
+      if (is_attribute_p ("bank_switch", TREE_PURPOSE (list)))
+	return 1;
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+/* Returns TRUE if the given tree has the "fast_interrupt" attribute.  */
+static int
+fast_interrupt_p (tree node ATTRIBUTE_UNUSED)
+{
+  tree list = M32C_ATTRIBUTES (node);
+  while (list)
+    {
+      if (is_attribute_p ("fast_interrupt", TREE_PURPOSE (list)))
+	return 1;
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+static tree
+interrupt_handler (tree * node ATTRIBUTE_UNUSED,
+		   tree name ATTRIBUTE_UNUSED,
+		   tree args ATTRIBUTE_UNUSED,
+		   int flags ATTRIBUTE_UNUSED,
+		   bool * no_add_attrs ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
+
+/* Returns TRUE if given tree has the "function_vector" attribute. */
+int
+m32c_special_page_vector_p (tree func)
+{
+  tree list;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  list = M32C_ATTRIBUTES (func);
+  while (list)
+    {
+      if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+        return 1;
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+static tree
+function_vector_handler (tree * node ATTRIBUTE_UNUSED,
+                         tree name ATTRIBUTE_UNUSED,
+                         tree args ATTRIBUTE_UNUSED,
+                         int flags ATTRIBUTE_UNUSED,
+                         bool * no_add_attrs ATTRIBUTE_UNUSED)
+{
+  if (TARGET_R8C)
+    {
+      /* The attribute is not supported for R8C target.  */
+      warning (OPT_Wattributes,
+                "%qE attribute is not supported for R8C target",
+                name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      /* The attribute must be applied to functions only.  */
+      warning (OPT_Wattributes,
+                "%qE attribute applies only to functions",
+                name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes,
+                "%qE attribute argument not an integer constant",
+                name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_INT_CST_LOW (TREE_VALUE (args)) < 18
+           || TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
+    {
+      /* The argument value must be between 18 to 255.  */
+      warning (OPT_Wattributes,
+                "%qE attribute argument should be between 18 to 255",
+                name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+/* If the function is assigned the attribute 'function_vector', it
+   returns the function vector number, otherwise returns zero.  */
+int
+current_function_special_page_vector (rtx x)
+{
+  int num;
+
+  if ((GET_CODE(x) == SYMBOL_REF)
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      tree list;
+      tree t = SYMBOL_REF_DECL (x);
+
+      if (TREE_CODE (t) != FUNCTION_DECL)
+        return 0;
+
+      list = M32C_ATTRIBUTES (t);
+      while (list)
+        {
+          if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+            {
+              num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
+              return num;
+            }
+
+          list = TREE_CHAIN (list);
+        }
+
+      return 0;
+    }
+  else
+    return 0;
+}
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m32c_attribute_table
+static const struct attribute_spec m32c_attribute_table[] = {
+  {"interrupt", 0, 0, false, false, false, interrupt_handler, false},
+  {"bank_switch", 0, 0, false, false, false, interrupt_handler, false},
+  {"fast_interrupt", 0, 0, false, false, false, interrupt_handler, false},
+  {"function_vector", 1, 1, true,  false, false, function_vector_handler,
+   false},
+  {0, 0, 0, 0, 0, 0, 0, false}
+};
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES m32c_comp_type_attributes
+static int
+m32c_comp_type_attributes (const_tree type1 ATTRIBUTE_UNUSED,
+			   const_tree type2 ATTRIBUTE_UNUSED)
+{
+  /* 0=incompatible 1=compatible 2=warning */
+  return 1;
+}
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES m32c_insert_attributes
+static void
+m32c_insert_attributes (tree node ATTRIBUTE_UNUSED,
+			tree * attr_ptr ATTRIBUTE_UNUSED)
+{
+  unsigned addr;
+  /* See if we need to make #pragma address variables volatile.  */
+
+  if (TREE_CODE (node) == VAR_DECL)
+    {
+      const char *name = IDENTIFIER_POINTER (DECL_NAME (node));
+      if (m32c_get_pragma_address  (name, &addr))
+	{
+	  TREE_THIS_VOLATILE (node) = true;
+	}
+    }	
+}
+
+
+struct GTY(()) pragma_entry {
+  const char *varname;
+  unsigned address;
+};
+typedef struct pragma_entry pragma_entry;
+
+/* Hash table of pragma info.  */
+static GTY((param_is (pragma_entry))) htab_t pragma_htab;
+
+static int
+pragma_entry_eq (const void *p1, const void *p2)
+{
+  const pragma_entry *old = (const pragma_entry *) p1;
+  const char *new_name = (const char *) p2;
+
+  return strcmp (old->varname, new_name) == 0;
+}
+
+static hashval_t
+pragma_entry_hash (const void *p)
+{
+  const pragma_entry *old = (const pragma_entry *) p;
+  return htab_hash_string (old->varname);
+}
+
+void
+m32c_note_pragma_address (const char *varname, unsigned address)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    pragma_htab = htab_create_ggc (31, pragma_entry_hash,
+				    pragma_entry_eq, NULL);
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, varname,
+			      htab_hash_string (varname), INSERT);
+
+  if (!*slot)
+    {
+      *slot = ggc_alloc_pragma_entry ();
+      (*slot)->varname = ggc_strdup (varname);
+    }
+  (*slot)->address = address;
+}
+
+static bool
+m32c_get_pragma_address (const char *varname, unsigned *address)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    return false;
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, varname,
+			      htab_hash_string (varname), NO_INSERT);
+  if (slot && *slot)
+    {
+      *address = (*slot)->address;
+      return true;
+    }
+  return false;
+}
+
+void
+m32c_output_aligned_common (FILE *stream, tree decl ATTRIBUTE_UNUSED,
+			    const char *name,
+			    int size, int align, int global)
+{
+  unsigned address;
+
+  if (m32c_get_pragma_address (name, &address))
+    {
+      /* We never output these as global.  */
+      assemble_name (stream, name);
+      fprintf (stream, " = 0x%04x\n", address);
+      return;
+    }
+  if (!global)
+    {
+      fprintf (stream, "\t.local\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Predicates */
+
+/* This is a list of legal subregs of hard regs.  */
+static const struct {
+  unsigned char outer_mode_size;
+  unsigned char inner_mode_size;
+  unsigned char byte_mask;
+  unsigned char legal_when;
+  unsigned int regno;
+} legal_subregs[] = {
+  {1, 2, 0x03, 1, R0_REGNO}, /* r0h r0l */
+  {1, 2, 0x03, 1, R1_REGNO}, /* r1h r1l */
+  {1, 2, 0x01, 1, A0_REGNO},
+  {1, 2, 0x01, 1, A1_REGNO},
+
+  {1, 4, 0x01, 1, A0_REGNO},
+  {1, 4, 0x01, 1, A1_REGNO},
+
+  {2, 4, 0x05, 1, R0_REGNO}, /* r2 r0 */
+  {2, 4, 0x05, 1, R1_REGNO}, /* r3 r1 */
+  {2, 4, 0x05, 16, A0_REGNO}, /* a1 a0 */
+  {2, 4, 0x01, 24, A0_REGNO}, /* a1 a0 */
+  {2, 4, 0x01, 24, A1_REGNO}, /* a1 a0 */
+
+  {4, 8, 0x55, 1, R0_REGNO}, /* r3 r1 r2 r0 */
+};
+
+/* Returns TRUE if OP is a subreg of a hard reg which we don't
+   support.  We also bail on MEMs with illegal addresses.  */
+bool
+m32c_illegal_subreg_p (rtx op)
+{
+  int offset;
+  unsigned int i;
+  int src_mode, dest_mode;
+
+  if (GET_CODE (op) == MEM
+      && ! m32c_legitimate_address_p (Pmode, XEXP (op, 0), false))
+    {
+      return true;
+    }
+
+  if (GET_CODE (op) != SUBREG)
+    return false;
+
+  dest_mode = GET_MODE (op);
+  offset = SUBREG_BYTE (op);
+  op = SUBREG_REG (op);
+  src_mode = GET_MODE (op);
+
+  if (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (src_mode))
+    return false;
+  if (GET_CODE (op) != REG)
+    return false;
+  if (REGNO (op) >= MEM0_REGNO)
+    return false;
+
+  offset = (1 << offset);
+
+  for (i = 0; i < ARRAY_SIZE (legal_subregs); i ++)
+    if (legal_subregs[i].outer_mode_size == GET_MODE_SIZE (dest_mode)
+	&& legal_subregs[i].regno == REGNO (op)
+	&& legal_subregs[i].inner_mode_size == GET_MODE_SIZE (src_mode)
+	&& legal_subregs[i].byte_mask & offset)
+      {
+	switch (legal_subregs[i].legal_when)
+	  {
+	  case 1:
+	    return false;
+	  case 16:
+	    if (TARGET_A16)
+	      return false;
+	    break;
+	  case 24:
+	    if (TARGET_A24)
+	      return false;
+	    break;
+	  }
+      }
+  return true;
+}
+
+/* Returns TRUE if we support a move between the first two operands.
+   At the moment, we just want to discourage mem to mem moves until
+   after reload, because reload has a hard time with our limited
+   number of address registers, and we can get into a situation where
+   we need three of them when we only have two.  */
+bool
+m32c_mov_ok (rtx * operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  if (TARGET_A24)
+    return true;
+
+#define DEBUG_MOV_OK 0
+#if DEBUG_MOV_OK
+  fprintf (stderr, "m32c_mov_ok %s\n", mode_name[mode]);
+  debug_rtx (op0);
+  debug_rtx (op1);
+#endif
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = XEXP (op0, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = XEXP (op1, 0);
+
+  if (GET_CODE (op0) == MEM
+      && GET_CODE (op1) == MEM
+      && ! reload_completed)
+    {
+#if DEBUG_MOV_OK
+      fprintf (stderr, " - no, mem to mem\n");
+#endif
+      return false;
+    }
+
+#if DEBUG_MOV_OK
+  fprintf (stderr, " - ok\n");
+#endif
+  return true;
+}
+
+/* Returns TRUE if two consecutive HImode mov instructions, generated
+   for moving an immediate double data to a double data type variable
+   location, can be combined into single SImode mov instruction.  */
+bool
+m32c_immd_dbl_mov (rtx * operands ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  /* ??? This relied on the now-defunct MEM_SCALAR and MEM_IN_STRUCT_P
+     flags.  */
+  return false;
+}  
+
+/* Expanders */
+
+/* Subregs are non-orthogonal for us, because our registers are all
+   different sizes.  */
+static rtx
+m32c_subreg (enum machine_mode outer,
+	     rtx x, enum machine_mode inner, int byte)
+{
+  int r, nr = -1;
+
+  /* Converting MEMs to different types that are the same size, we
+     just rewrite them.  */
+  if (GET_CODE (x) == SUBREG
+      && SUBREG_BYTE (x) == 0
+      && GET_CODE (SUBREG_REG (x)) == MEM
+      && (GET_MODE_SIZE (GET_MODE (x))
+	  == GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+    {
+      rtx oldx = x;
+      x = gen_rtx_MEM (GET_MODE (x), XEXP (SUBREG_REG (x), 0));
+      MEM_COPY_ATTRIBUTES (x, SUBREG_REG (oldx));
+    }
+
+  /* Push/pop get done as smaller push/pops.  */
+  if (GET_CODE (x) == MEM
+      && (GET_CODE (XEXP (x, 0)) == PRE_DEC
+	  || GET_CODE (XEXP (x, 0)) == POST_INC))
+    return gen_rtx_MEM (outer, XEXP (x, 0));
+  if (GET_CODE (x) == SUBREG
+      && GET_CODE (XEXP (x, 0)) == MEM
+      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == PRE_DEC
+	  || GET_CODE (XEXP (XEXP (x, 0), 0)) == POST_INC))
+    return gen_rtx_MEM (outer, XEXP (XEXP (x, 0), 0));
+
+  if (GET_CODE (x) != REG)
+    {
+      rtx r = simplify_gen_subreg (outer, x, inner, byte);
+      if (GET_CODE (r) == SUBREG
+	  && GET_CODE (x) == MEM
+	  && MEM_VOLATILE_P (x))
+	{
+	  /* Volatile MEMs don't get simplified, but we need them to
+	     be.  We are little endian, so the subreg byte is the
+	     offset.  */
+	  r = adjust_address_nv (x, outer, byte);
+	}
+      return r;
+    }
+
+  r = REGNO (x);
+  if (r >= FIRST_PSEUDO_REGISTER || r == AP_REGNO)
+    return simplify_gen_subreg (outer, x, inner, byte);
+
+  if (IS_MEM_REGNO (r))
+    return simplify_gen_subreg (outer, x, inner, byte);
+
+  /* This is where the complexities of our register layout are
+     described.  */
+  if (byte == 0)
+    nr = r;
+  else if (outer == HImode)
+    {
+      if (r == R0_REGNO && byte == 2)
+	nr = R2_REGNO;
+      else if (r == R0_REGNO && byte == 4)
+	nr = R1_REGNO;
+      else if (r == R0_REGNO && byte == 6)
+	nr = R3_REGNO;
+      else if (r == R1_REGNO && byte == 2)
+	nr = R3_REGNO;
+      else if (r == A0_REGNO && byte == 2)
+	nr = A1_REGNO;
+    }
+  else if (outer == SImode)
+    {
+      if (r == R0_REGNO && byte == 0)
+	nr = R0_REGNO;
+      else if (r == R0_REGNO && byte == 4)
+	nr = R1_REGNO;
+    }
+  if (nr == -1)
+    {
+      fprintf (stderr, "m32c_subreg %s %s %d\n",
+	       mode_name[outer], mode_name[inner], byte);
+      debug_rtx (x);
+      gcc_unreachable ();
+    }
+  return gen_rtx_REG (outer, nr);
+}
+
+/* Used to emit move instructions.  We split some moves,
+   and avoid mem-mem moves.  */
+int
+m32c_prepare_move (rtx * operands, enum machine_mode mode)
+{
+  if (far_addr_space_p (operands[0])
+      && CONSTANT_P (operands[1]))
+    {
+      operands[1] = force_reg (GET_MODE (operands[0]), operands[1]);
+    }
+  if (TARGET_A16 && mode == PSImode)
+    return m32c_split_move (operands, mode, 1);
+  if ((GET_CODE (operands[0]) == MEM)
+      && (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY))
+    {
+      rtx pmv = XEXP (operands[0], 0);
+      rtx dest_reg = XEXP (pmv, 0);
+      rtx dest_mod = XEXP (pmv, 1);
+
+      emit_insn (gen_rtx_SET (Pmode, dest_reg, dest_mod));
+      operands[0] = gen_rtx_MEM (mode, dest_reg);
+    }
+  if (can_create_pseudo_p () && MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = copy_to_mode_reg (mode, operands[1]);
+  return 0;
+}
+
+#define DEBUG_SPLIT 0
+
+/* Returns TRUE if the given PSImode move should be split.  We split
+   for all r8c/m16c moves, since it doesn't support them, and for
+   POP.L as we can only *push* SImode.  */
+int
+m32c_split_psi_p (rtx * operands)
+{
+#if DEBUG_SPLIT
+  fprintf (stderr, "\nm32c_split_psi_p\n");
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+  if (TARGET_A16)
+    {
+#if DEBUG_SPLIT
+      fprintf (stderr, "yes, A16\n");
+#endif
+      return 1;
+    }
+  if (GET_CODE (operands[1]) == MEM
+      && GET_CODE (XEXP (operands[1], 0)) == POST_INC)
+    {
+#if DEBUG_SPLIT
+      fprintf (stderr, "yes, pop.l\n");
+#endif
+      return 1;
+    }
+#if DEBUG_SPLIT
+  fprintf (stderr, "no, default\n");
+#endif
+  return 0;
+}
+
+/* Split the given move.  SPLIT_ALL is 0 if splitting is optional
+   (define_expand), 1 if it is not optional (define_insn_and_split),
+   and 3 for define_split (alternate api). */
+int
+m32c_split_move (rtx * operands, enum machine_mode mode, int split_all)
+{
+  rtx s[4], d[4];
+  int parts, si, di, rev = 0;
+  int rv = 0, opi = 2;
+  enum machine_mode submode = HImode;
+  rtx *ops, local_ops[10];
+
+  /* define_split modifies the existing operands, but the other two
+     emit new insns.  OPS is where we store the operand pairs, which
+     we emit later.  */
+  if (split_all == 3)
+    ops = operands;
+  else
+    ops = local_ops;
+
+  /* Else HImode.  */
+  if (mode == DImode)
+    submode = SImode;
+
+  /* Before splitting mem-mem moves, force one operand into a
+     register.  */
+  if (can_create_pseudo_p () && MEM_P (operands[0]) && MEM_P (operands[1]))
+    {
+#if DEBUG0
+      fprintf (stderr, "force_reg...\n");
+      debug_rtx (operands[1]);
+#endif
+      operands[1] = force_reg (mode, operands[1]);
+#if DEBUG0
+      debug_rtx (operands[1]);
+#endif
+    }
+
+  parts = 2;
+
+#if DEBUG_SPLIT
+  fprintf (stderr, "\nsplit_move %d all=%d\n", !can_create_pseudo_p (),
+	   split_all);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  /* Note that split_all is not used to select the api after this
+     point, so it's safe to set it to 3 even with define_insn.  */
+  /* None of the chips can move SI operands to sp-relative addresses,
+     so we always split those.  */
+  if (satisfies_constraint_Ss (operands[0]))
+    split_all = 3;
+
+  if (TARGET_A16
+      && (far_addr_space_p (operands[0])
+	  || far_addr_space_p (operands[1])))
+    split_all |= 1;
+
+  /* We don't need to split these.  */
+  if (TARGET_A24
+      && split_all != 3
+      && (mode == SImode || mode == PSImode)
+      && !(GET_CODE (operands[1]) == MEM
+	   && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    return 0;
+
+  /* First, enumerate the subregs we'll be dealing with.  */
+  for (si = 0; si < parts; si++)
+    {
+      d[si] =
+	m32c_subreg (submode, operands[0], mode,
+		     si * GET_MODE_SIZE (submode));
+      s[si] =
+	m32c_subreg (submode, operands[1], mode,
+		     si * GET_MODE_SIZE (submode));
+    }
+
+  /* Split pushes by emitting a sequence of smaller pushes.  */
+  if (GET_CODE (d[0]) == MEM && GET_CODE (XEXP (d[0], 0)) == PRE_DEC)
+    {
+      for (si = parts - 1; si >= 0; si--)
+	{
+	  ops[opi++] = gen_rtx_MEM (submode,
+				    gen_rtx_PRE_DEC (Pmode,
+						     gen_rtx_REG (Pmode,
+								  SP_REGNO)));
+	  ops[opi++] = s[si];
+	}
+
+      rv = 1;
+    }
+  /* Likewise for pops.  */
+  else if (GET_CODE (s[0]) == MEM && GET_CODE (XEXP (s[0], 0)) == POST_INC)
+    {
+      for (di = 0; di < parts; di++)
+	{
+	  ops[opi++] = d[di];
+	  ops[opi++] = gen_rtx_MEM (submode,
+				    gen_rtx_POST_INC (Pmode,
+						      gen_rtx_REG (Pmode,
+								   SP_REGNO)));
+	}
+      rv = 1;
+    }
+  else if (split_all)
+    {
+      /* if d[di] == s[si] for any di < si, we'll early clobber. */
+      for (di = 0; di < parts - 1; di++)
+	for (si = di + 1; si < parts; si++)
+	  if (reg_mentioned_p (d[di], s[si]))
+	    rev = 1;
+
+      if (rev)
+	for (si = 0; si < parts; si++)
+	  {
+	    ops[opi++] = d[si];
+	    ops[opi++] = s[si];
+	  }
+      else
+	for (si = parts - 1; si >= 0; si--)
+	  {
+	    ops[opi++] = d[si];
+	    ops[opi++] = s[si];
+	  }
+      rv = 1;
+    }
+  /* Now emit any moves we may have accumulated.  */
+  if (rv && split_all != 3)
+    {
+      int i;
+      for (i = 2; i < opi; i += 2)
+	emit_move_insn (ops[i], ops[i + 1]);
+    }
+  return rv;
+}
+
+/* The m32c has a number of opcodes that act like memcpy, strcmp, and
+   the like.  For the R8C they expect one of the addresses to be in
+   R1L:An so we need to arrange for that.  Otherwise, it's just a
+   matter of picking out the operands we want and emitting the right
+   pattern for them.  All these expanders, which correspond to
+   patterns in blkmov.md, must return nonzero if they expand the insn,
+   or zero if they should FAIL.  */
+
+/* This is a memset() opcode.  All operands are implied, so we need to
+   arrange for them to be in the right registers.  The opcode wants
+   addresses, not [mem] syntax.  $0 is the destination (MEM:BLK), $1
+   the count (HI), and $2 the value (QI).  */
+int
+m32c_expand_setmemhi(rtx *operands)
+{
+  rtx desta, count, val;
+  rtx desto, counto;
+
+  desta = XEXP (operands[0], 0);
+  count = operands[1];
+  val = operands[2];
+
+  desto = gen_reg_rtx (Pmode);
+  counto = gen_reg_rtx (HImode);
+
+  if (GET_CODE (desta) != REG
+      || REGNO (desta) < FIRST_PSEUDO_REGISTER)
+    desta = copy_to_mode_reg (Pmode, desta);
+
+  /* This looks like an arbitrary restriction, but this is by far the
+     most common case.  For counts 8..14 this actually results in
+     smaller code with no speed penalty because the half-sized
+     constant can be loaded with a shorter opcode.  */
+  if (GET_CODE (count) == CONST_INT
+      && GET_CODE (val) == CONST_INT
+      && ! (INTVAL (count) & 1)
+      && (INTVAL (count) > 1)
+      && (INTVAL (val) <= 7 && INTVAL (val) >= -8))
+    {
+      unsigned v = INTVAL (val) & 0xff;
+      v = v | (v << 8);
+      count = copy_to_mode_reg (HImode, GEN_INT (INTVAL (count) / 2));
+      val = copy_to_mode_reg (HImode, GEN_INT (v));
+      if (TARGET_A16)
+	emit_insn (gen_setmemhi_whi_op (desto, counto, val, desta, count));
+      else
+	emit_insn (gen_setmemhi_wpsi_op (desto, counto, val, desta, count));
+      return 1;
+    }
+
+  /* This is the generalized memset() case.  */
+  if (GET_CODE (val) != REG
+      || REGNO (val) < FIRST_PSEUDO_REGISTER)
+    val = copy_to_mode_reg (QImode, val);
+
+  if (GET_CODE (count) != REG
+      || REGNO (count) < FIRST_PSEUDO_REGISTER)
+    count = copy_to_mode_reg (HImode, count);
+
+  if (TARGET_A16)
+    emit_insn (gen_setmemhi_bhi_op (desto, counto, val, desta, count));
+  else
+    emit_insn (gen_setmemhi_bpsi_op (desto, counto, val, desta, count));
+
+  return 1;
+}
+
+/* This is a memcpy() opcode.  All operands are implied, so we need to
+   arrange for them to be in the right registers.  The opcode wants
+   addresses, not [mem] syntax.  $0 is the destination (MEM:BLK), $1
+   is the source (MEM:BLK), and $2 the count (HI).  */
+int
+m32c_expand_movmemhi(rtx *operands)
+{
+  rtx desta, srca, count;
+  rtx desto, srco, counto;
+
+  desta = XEXP (operands[0], 0);
+  srca = XEXP (operands[1], 0);
+  count = operands[2];
+
+  desto = gen_reg_rtx (Pmode);
+  srco = gen_reg_rtx (Pmode);
+  counto = gen_reg_rtx (HImode);
+
+  if (GET_CODE (desta) != REG
+      || REGNO (desta) < FIRST_PSEUDO_REGISTER)
+    desta = copy_to_mode_reg (Pmode, desta);
+
+  if (GET_CODE (srca) != REG
+      || REGNO (srca) < FIRST_PSEUDO_REGISTER)
+    srca = copy_to_mode_reg (Pmode, srca);
+
+  /* Similar to setmem, but we don't need to check the value.  */
+  if (GET_CODE (count) == CONST_INT
+      && ! (INTVAL (count) & 1)
+      && (INTVAL (count) > 1))
+    {
+      count = copy_to_mode_reg (HImode, GEN_INT (INTVAL (count) / 2));
+      if (TARGET_A16)
+	emit_insn (gen_movmemhi_whi_op (desto, srco, counto, desta, srca, count));
+      else
+	emit_insn (gen_movmemhi_wpsi_op (desto, srco, counto, desta, srca, count));
+      return 1;
+    }
+
+  /* This is the generalized memset() case.  */
+  if (GET_CODE (count) != REG
+      || REGNO (count) < FIRST_PSEUDO_REGISTER)
+    count = copy_to_mode_reg (HImode, count);
+
+  if (TARGET_A16)
+    emit_insn (gen_movmemhi_bhi_op (desto, srco, counto, desta, srca, count));
+  else
+    emit_insn (gen_movmemhi_bpsi_op (desto, srco, counto, desta, srca, count));
+
+  return 1;
+}
+
+/* This is a stpcpy() opcode.  $0 is the destination (MEM:BLK) after
+   the copy, which should point to the NUL at the end of the string,
+   $1 is the destination (MEM:BLK), and $2 is the source (MEM:BLK).
+   Since our opcode leaves the destination pointing *after* the NUL,
+   we must emit an adjustment.  */
+int
+m32c_expand_movstr(rtx *operands)
+{
+  rtx desta, srca;
+  rtx desto, srco;
+
+  desta = XEXP (operands[1], 0);
+  srca = XEXP (operands[2], 0);
+
+  desto = gen_reg_rtx (Pmode);
+  srco = gen_reg_rtx (Pmode);
+
+  if (GET_CODE (desta) != REG
+      || REGNO (desta) < FIRST_PSEUDO_REGISTER)
+    desta = copy_to_mode_reg (Pmode, desta);
+
+  if (GET_CODE (srca) != REG
+      || REGNO (srca) < FIRST_PSEUDO_REGISTER)
+    srca = copy_to_mode_reg (Pmode, srca);
+
+  emit_insn (gen_movstr_op (desto, srco, desta, srca));
+  /* desto ends up being a1, which allows this type of add through MOVA.  */
+  emit_insn (gen_addpsi3 (operands[0], desto, GEN_INT (-1)));
+
+  return 1;
+}
+
+/* This is a strcmp() opcode.  $0 is the destination (HI) which holds
+   <=>0 depending on the comparison, $1 is one string (MEM:BLK), and
+   $2 is the other (MEM:BLK).  We must do the comparison, and then
+   convert the flags to a signed integer result.  */
+int
+m32c_expand_cmpstr(rtx *operands)
+{
+  rtx src1a, src2a;
+
+  src1a = XEXP (operands[1], 0);
+  src2a = XEXP (operands[2], 0);
+
+  if (GET_CODE (src1a) != REG
+      || REGNO (src1a) < FIRST_PSEUDO_REGISTER)
+    src1a = copy_to_mode_reg (Pmode, src1a);
+
+  if (GET_CODE (src2a) != REG
+      || REGNO (src2a) < FIRST_PSEUDO_REGISTER)
+    src2a = copy_to_mode_reg (Pmode, src2a);
+
+  emit_insn (gen_cmpstrhi_op (src1a, src2a, src1a, src2a));
+  emit_insn (gen_cond_to_int (operands[0]));
+
+  return 1;
+}
+
+
+typedef rtx (*shift_gen_func)(rtx, rtx, rtx);
+
+static shift_gen_func
+shift_gen_func_for (int mode, int code)
+{
+#define GFF(m,c,f) if (mode == m && code == c) return f
+  GFF(QImode,  ASHIFT,   gen_ashlqi3_i);
+  GFF(QImode,  ASHIFTRT, gen_ashrqi3_i);
+  GFF(QImode,  LSHIFTRT, gen_lshrqi3_i);
+  GFF(HImode,  ASHIFT,   gen_ashlhi3_i);
+  GFF(HImode,  ASHIFTRT, gen_ashrhi3_i);
+  GFF(HImode,  LSHIFTRT, gen_lshrhi3_i);
+  GFF(PSImode, ASHIFT,   gen_ashlpsi3_i);
+  GFF(PSImode, ASHIFTRT, gen_ashrpsi3_i);
+  GFF(PSImode, LSHIFTRT, gen_lshrpsi3_i);
+  GFF(SImode,  ASHIFT,   TARGET_A16 ? gen_ashlsi3_16 : gen_ashlsi3_24);
+  GFF(SImode,  ASHIFTRT, TARGET_A16 ? gen_ashrsi3_16 : gen_ashrsi3_24);
+  GFF(SImode,  LSHIFTRT, TARGET_A16 ? gen_lshrsi3_16 : gen_lshrsi3_24);
+#undef GFF
+  gcc_unreachable ();
+}
+
+/* The m32c only has one shift, but it takes a signed count.  GCC
+   doesn't want this, so we fake it by negating any shift count when
+   we're pretending to shift the other way.  Also, the shift count is
+   limited to -8..8.  It's slightly better to use two shifts for 9..15
+   than to load the count into r1h, so we do that too.  */
+int
+m32c_prepare_shift (rtx * operands, int scale, int shift_code)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  shift_gen_func func = shift_gen_func_for (mode, shift_code);
+  rtx temp;
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int maxc = TARGET_A24 && (mode == PSImode || mode == SImode) ? 32 : 8;
+      int count = INTVAL (operands[2]) * scale;
+
+      while (count > maxc)
+	{
+	  temp = gen_reg_rtx (mode);
+	  emit_insn (func (temp, operands[1], GEN_INT (maxc)));
+	  operands[1] = temp;
+	  count -= maxc;
+	}
+      while (count < -maxc)
+	{
+	  temp = gen_reg_rtx (mode);
+	  emit_insn (func (temp, operands[1], GEN_INT (-maxc)));
+	  operands[1] = temp;
+	  count += maxc;
+	}
+      emit_insn (func (operands[0], operands[1], GEN_INT (count)));
+      return 1;
+    }
+
+  temp = gen_reg_rtx (QImode);
+  if (scale < 0)
+    /* The pattern has a NEG that corresponds to this. */
+    emit_move_insn (temp, gen_rtx_NEG (QImode, operands[2]));
+  else if (TARGET_A16 && mode == SImode)
+    /* We do this because the code below may modify this, we don't
+       want to modify the origin of this value.  */
+    emit_move_insn (temp, operands[2]);
+  else
+    /* We'll only use it for the shift, no point emitting a move.  */
+    temp = operands[2];
+
+  if (TARGET_A16 && GET_MODE_SIZE (mode) == 4)
+    {
+      /* The m16c has a limit of -16..16 for SI shifts, even when the
+	 shift count is in a register.  Since there are so many targets
+	 of these shifts, it's better to expand the RTL here than to
+	 call a helper function.
+
+	 The resulting code looks something like this:
+
+		cmp.b	r1h,-16
+		jge.b	1f
+		shl.l	-16,dest
+		add.b	r1h,16
+	1f:	cmp.b	r1h,16
+		jle.b	1f
+		shl.l	16,dest
+		sub.b	r1h,16
+	1f:	shl.l	r1h,dest
+
+	 We take advantage of the fact that "negative" shifts are
+	 undefined to skip one of the comparisons.  */
+
+      rtx count;
+      rtx label, insn, tempvar;
+
+      emit_move_insn (operands[0], operands[1]);
+
+      count = temp;
+      label = gen_label_rtx ();
+      LABEL_NUSES (label) ++;
+
+      tempvar = gen_reg_rtx (mode);
+
+      if (shift_code == ASHIFT)
+	{
+	  /* This is a left shift.  We only need check positive counts.  */
+	  emit_jump_insn (gen_cbranchqi4 (gen_rtx_LE (VOIDmode, 0, 0),
+					  count, GEN_INT (16), label));
+	  emit_insn (func (tempvar, operands[0], GEN_INT (8)));
+	  emit_insn (func (operands[0], tempvar, GEN_INT (8)));
+	  insn = emit_insn (gen_addqi3 (count, count, GEN_INT (-16)));
+	  emit_label_after (label, insn);
+	}
+      else
+	{
+	  /* This is a right shift.  We only need check negative counts.  */
+	  emit_jump_insn (gen_cbranchqi4 (gen_rtx_GE (VOIDmode, 0, 0),
+					  count, GEN_INT (-16), label));
+	  emit_insn (func (tempvar, operands[0], GEN_INT (-8)));
+	  emit_insn (func (operands[0], tempvar, GEN_INT (-8)));
+	  insn = emit_insn (gen_addqi3 (count, count, GEN_INT (16)));
+	  emit_label_after (label, insn);
+	}
+      operands[1] = operands[0];
+      emit_insn (func (operands[0], operands[0], count));
+      return 1;
+    }
+
+  operands[2] = temp;
+  return 0;
+}
+
+/* The m32c has a limited range of operations that work on PSImode
+   values; we have to expand to SI, do the math, and truncate back to
+   PSI.  Yes, this is expensive, but hopefully gcc will learn to avoid
+   those cases.  */
+void
+m32c_expand_neg_mulpsi3 (rtx * operands)
+{
+  /* operands: a = b * i */
+  rtx temp1; /* b as SI */
+  rtx scale /* i as SI */;
+  rtx temp2; /* a*b as SI */
+
+  temp1 = gen_reg_rtx (SImode);
+  temp2 = gen_reg_rtx (SImode);
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      scale = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extendpsisi2 (scale, operands[2]));
+    }
+  else
+    scale = copy_to_mode_reg (SImode, operands[2]);
+
+  emit_insn (gen_zero_extendpsisi2 (temp1, operands[1]));
+  temp2 = expand_simple_binop (SImode, MULT, temp1, scale, temp2, 1, OPTAB_LIB);
+  emit_insn (gen_truncsipsi2 (operands[0], temp2));
+}
+
+/* Pattern Output Functions */
+
+int
+m32c_expand_movcc (rtx *operands)
+{
+  rtx rel = operands[1];
+
+  if (GET_CODE (rel) != EQ && GET_CODE (rel) != NE)
+    return 1;
+  if (GET_CODE (operands[2]) != CONST_INT
+      || GET_CODE (operands[3]) != CONST_INT)
+    return 1;
+  if (GET_CODE (rel) == NE)
+    {
+      rtx tmp = operands[2];
+      operands[2] = operands[3];
+      operands[3] = tmp;
+      rel = gen_rtx_EQ (GET_MODE (rel), XEXP (rel, 0), XEXP (rel, 1));
+    }
+
+  emit_move_insn (operands[0],
+		  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
+					rel,
+					operands[2],
+					operands[3]));
+  return 0;
+}
+
+/* Used for the "insv" pattern.  Return nonzero to fail, else done.  */
+int
+m32c_expand_insv (rtx *operands)
+{
+  rtx op0, src0, p;
+  int mask;
+
+  if (INTVAL (operands[1]) != 1)
+    return 1;
+
+  /* Our insv opcode (bset, bclr) can only insert a one-bit constant.  */
+  if (GET_CODE (operands[3]) != CONST_INT)
+    return 1;
+  if (INTVAL (operands[3]) != 0
+      && INTVAL (operands[3]) != 1
+      && INTVAL (operands[3]) != -1)
+    return 1;
+
+  mask = 1 << INTVAL (operands[2]);
+
+  op0 = operands[0];
+  if (GET_CODE (op0) == SUBREG
+      && SUBREG_BYTE (op0) == 0)
+    {
+      rtx sub = SUBREG_REG (op0);
+      if (GET_MODE (sub) == HImode || GET_MODE (sub) == QImode)
+	op0 = sub;
+    }
+
+  if (!can_create_pseudo_p ()
+      || (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0)))
+    src0 = op0;
+  else
+    {
+      src0 = gen_reg_rtx (GET_MODE (op0));
+      emit_move_insn (src0, op0);
+    }
+
+  if (GET_MODE (op0) == HImode
+      && INTVAL (operands[2]) >= 8
+      && GET_CODE (op0) == MEM)
+    {
+      /* We are little endian.  */
+      rtx new_mem = gen_rtx_MEM (QImode, plus_constant (Pmode,
+							XEXP (op0, 0), 1));
+      MEM_COPY_ATTRIBUTES (new_mem, op0);
+      mask >>= 8;
+    }
+
+  /* First, we generate a mask with the correct polarity.  If we are
+     storing a zero, we want an AND mask, so invert it.  */
+  if (INTVAL (operands[3]) == 0)
+    {
+      /* Storing a zero, use an AND mask */
+      if (GET_MODE (op0) == HImode)
+	mask ^= 0xffff;
+      else
+	mask ^= 0xff;
+    }
+  /* Now we need to properly sign-extend the mask in case we need to
+     fall back to an AND or OR opcode.  */
+  if (GET_MODE (op0) == HImode)
+    {
+      if (mask & 0x8000)
+	mask -= 0x10000;
+    }
+  else
+    {
+      if (mask & 0x80)
+	mask -= 0x100;
+    }
+
+  switch (  (INTVAL (operands[3]) ? 4 : 0)
+	  + ((GET_MODE (op0) == HImode) ? 2 : 0)
+	  + (TARGET_A24 ? 1 : 0))
+    {
+    case 0: p = gen_andqi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 1: p = gen_andqi3_24 (op0, src0, GEN_INT (mask)); break;
+    case 2: p = gen_andhi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 3: p = gen_andhi3_24 (op0, src0, GEN_INT (mask)); break;
+    case 4: p = gen_iorqi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 5: p = gen_iorqi3_24 (op0, src0, GEN_INT (mask)); break;
+    case 6: p = gen_iorhi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 7: p = gen_iorhi3_24 (op0, src0, GEN_INT (mask)); break;
+    default: p = NULL_RTX; break; /* Not reached, but silences a warning.  */
+    }
+
+  emit_insn (p);
+  return 0;
+}
+
+const char *
+m32c_scc_pattern(rtx *operands, RTX_CODE code)
+{
+  static char buf[30];
+  if (GET_CODE (operands[0]) == REG
+      && REGNO (operands[0]) == R0_REGNO)
+    {
+      if (code == EQ)
+	return "stzx\t#1,#0,r0l";
+      if (code == NE)
+	return "stzx\t#0,#1,r0l";
+    }
+  sprintf(buf, "bm%s\t0,%%h0\n\tand.b\t#1,%%0", GET_RTX_NAME (code));
+  return buf;
+}
+
+/* Encode symbol attributes of a SYMBOL_REF into its
+   SYMBOL_REF_FLAGS. */
+static void
+m32c_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int extra_flags = 0;
+
+  default_encode_section_info (decl, rtl, first);
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && m32c_special_page_vector_p (decl))
+
+    extra_flags = SYMBOL_FLAG_FUNCVEC_FUNCTION;
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= extra_flags;
+}
+
+/* Returns TRUE if the current function is a leaf, and thus we can
+   determine which registers an interrupt function really needs to
+   save.  The logic below is mostly about finding the insn sequence
+   that's the function, versus any sequence that might be open for the
+   current insn.  */
+static int
+m32c_leaf_function_p (void)
+{
+  rtx saved_first, saved_last;
+  struct sequence_stack *seq;
+  int rv;
+
+  saved_first = crtl->emit.x_first_insn;
+  saved_last = crtl->emit.x_last_insn;
+  for (seq = crtl->emit.sequence_stack; seq && seq->next; seq = seq->next)
+    ;
+  if (seq)
+    {
+      crtl->emit.x_first_insn = seq->first;
+      crtl->emit.x_last_insn = seq->last;
+    }
+
+  rv = leaf_function_p ();
+
+  crtl->emit.x_first_insn = saved_first;
+  crtl->emit.x_last_insn = saved_last;
+  return rv;
+}
+
+/* Returns TRUE if the current function needs to use the ENTER/EXIT
+   opcodes.  If the function doesn't need the frame base or stack
+   pointer, it can use the simpler RTS opcode.  */
+static bool
+m32c_function_needs_enter (void)
+{
+  rtx insn;
+  struct sequence_stack *seq;
+  rtx sp = gen_rtx_REG (Pmode, SP_REGNO);
+  rtx fb = gen_rtx_REG (Pmode, FB_REGNO);
+
+  insn = get_insns ();
+  for (seq = crtl->emit.sequence_stack;
+       seq;
+       insn = seq->first, seq = seq->next);
+
+  while (insn)
+    {
+      if (reg_mentioned_p (sp, insn))
+	return true;
+      if (reg_mentioned_p (fb, insn))
+	return true;
+      insn = NEXT_INSN (insn);
+    }
+  return false;
+}
+
+/* Mark all the subexpressions of the PARALLEL rtx PAR as
+   frame-related.  Return PAR.
+
+   dwarf2out.c:dwarf2out_frame_debug_expr ignores sub-expressions of a
+   PARALLEL rtx other than the first if they do not have the
+   FRAME_RELATED flag set on them.  So this function is handy for
+   marking up 'enter' instructions.  */
+static rtx
+m32c_all_frame_related (rtx par)
+{
+  int len = XVECLEN (par, 0);
+  int i;
+
+  for (i = 0; i < len; i++)
+    F (XVECEXP (par, 0, i));
+
+  return par;
+}
+
+/* Emits the prologue.  See the frame layout comment earlier in this
+   file.  We can reserve up to 256 bytes with the ENTER opcode, beyond
+   that we manually update sp.  */
+void
+m32c_emit_prologue (void)
+{
+  int frame_size, extra_frame_size = 0, reg_save_size;
+  int complex_prologue = 0;
+
+  cfun->machine->is_leaf = m32c_leaf_function_p ();
+  if (interrupt_p (cfun->decl))
+    {
+      cfun->machine->is_interrupt = 1;
+      complex_prologue = 1;
+    }
+  else if (bank_switch_p (cfun->decl))
+    warning (OPT_Wattributes,
+	     "%<bank_switch%> has no effect on non-interrupt functions");
+
+  reg_save_size = m32c_pushm_popm (PP_justcount);
+
+  if (interrupt_p (cfun->decl))
+    {
+      if (bank_switch_p (cfun->decl))
+	emit_insn (gen_fset_b ());
+      else if (cfun->machine->intr_pushm)
+	emit_insn (gen_pushm (GEN_INT (cfun->machine->intr_pushm)));
+    }
+
+  frame_size =
+    m32c_initial_elimination_offset (FB_REGNO, SP_REGNO) - reg_save_size;
+  if (frame_size == 0
+      && !m32c_function_needs_enter ())
+    cfun->machine->use_rts = 1;
+
+  if (frame_size > 254)
+    {
+      extra_frame_size = frame_size - 254;
+      frame_size = 254;
+    }
+  if (cfun->machine->use_rts == 0)
+    F (emit_insn (m32c_all_frame_related
+		  (TARGET_A16
+		   ? gen_prologue_enter_16 (GEN_INT (frame_size + 2))
+		   : gen_prologue_enter_24 (GEN_INT (frame_size + 4)))));
+
+  if (extra_frame_size)
+    {
+      complex_prologue = 1;
+      if (TARGET_A16)
+	F (emit_insn (gen_addhi3 (gen_rtx_REG (HImode, SP_REGNO),
+				  gen_rtx_REG (HImode, SP_REGNO),
+				  GEN_INT (-extra_frame_size))));
+      else
+	F (emit_insn (gen_addpsi3 (gen_rtx_REG (PSImode, SP_REGNO),
+				   gen_rtx_REG (PSImode, SP_REGNO),
+				   GEN_INT (-extra_frame_size))));
+    }
+
+  complex_prologue += m32c_pushm_popm (PP_pushm);
+
+  /* This just emits a comment into the .s file for debugging.  */
+  if (complex_prologue)
+    emit_insn (gen_prologue_end ());
+}
+
+/* Likewise, for the epilogue.  The only exception is that, for
+   interrupts, we must manually unwind the frame as the REIT opcode
+   doesn't do that.  */
+void
+m32c_emit_epilogue (void)
+{
+  int popm_count = m32c_pushm_popm (PP_justcount);
+
+  /* This just emits a comment into the .s file for debugging.  */
+  if (popm_count > 0 || cfun->machine->is_interrupt)
+    emit_insn (gen_epilogue_start ());
+
+  if (popm_count > 0)
+    m32c_pushm_popm (PP_popm);
+
+  if (cfun->machine->is_interrupt)
+    {
+      enum machine_mode spmode = TARGET_A16 ? HImode : PSImode;
+
+      /* REIT clears B flag and restores $fp for us, but we still
+	 have to fix up the stack.  USE_RTS just means we didn't
+	 emit ENTER.  */
+      if (!cfun->machine->use_rts)
+	{
+	  emit_move_insn (gen_rtx_REG (spmode, A0_REGNO),
+			  gen_rtx_REG (spmode, FP_REGNO));
+	  emit_move_insn (gen_rtx_REG (spmode, SP_REGNO),
+			  gen_rtx_REG (spmode, A0_REGNO));
+	  /* We can't just add this to the POPM because it would be in
+	     the wrong order, and wouldn't fix the stack if we're bank
+	     switching.  */
+	  if (TARGET_A16)
+	    emit_insn (gen_pophi_16 (gen_rtx_REG (HImode, FP_REGNO)));
+	  else
+	    emit_insn (gen_poppsi (gen_rtx_REG (PSImode, FP_REGNO)));
+	}
+      if (!bank_switch_p (cfun->decl) && cfun->machine->intr_pushm)
+	emit_insn (gen_popm (GEN_INT (cfun->machine->intr_pushm)));
+
+      /* The FREIT (Fast REturn from InTerrupt) instruction should be
+         generated only for M32C/M32CM targets (generate the REIT
+         instruction otherwise).  */
+      if (fast_interrupt_p (cfun->decl))
+        {
+          /* Check if fast_attribute is set for M32C or M32CM.  */
+          if (TARGET_A24)
+            {
+              emit_jump_insn (gen_epilogue_freit ());
+            }
+          /* If fast_interrupt attribute is set for an R8C or M16C
+             target ignore this attribute and generated REIT
+             instruction.  */
+          else
+	    {
+	      warning (OPT_Wattributes,
+		       "%<fast_interrupt%> attribute directive ignored");
+	      emit_jump_insn (gen_epilogue_reit_16 ());
+	    }
+        }
+      else if (TARGET_A16)
+	emit_jump_insn (gen_epilogue_reit_16 ());
+      else
+	emit_jump_insn (gen_epilogue_reit_24 ());
+    }
+  else if (cfun->machine->use_rts)
+    emit_jump_insn (gen_epilogue_rts ());
+  else if (TARGET_A16)
+    emit_jump_insn (gen_epilogue_exitd_16 ());
+  else
+    emit_jump_insn (gen_epilogue_exitd_24 ());
+}
+
+void
+m32c_emit_eh_epilogue (rtx ret_addr)
+{
+  /* R0[R2] has the stack adjustment.  R1[R3] has the address to
+     return to.  We have to fudge the stack, pop everything, pop SP
+     (fudged), and return (fudged).  This is actually easier to do in
+     assembler, so punt to libgcc.  */
+  emit_jump_insn (gen_eh_epilogue (ret_addr, cfun->machine->eh_stack_adjust));
+  /*  emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
+}
+
+/* Indicate which flags must be properly set for a given conditional.  */
+static int
+flags_needed_for_conditional (rtx cond)
+{
+  switch (GET_CODE (cond))
+    {
+    case LE:
+    case GT:
+      return FLAGS_OSZ;
+    case LEU:
+    case GTU:
+      return FLAGS_ZC;
+    case LT:
+    case GE:
+      return FLAGS_OS;
+    case LTU:
+    case GEU:
+      return FLAGS_C;
+    case EQ:
+    case NE:
+      return FLAGS_Z;
+    default:
+      return FLAGS_N;
+    }
+}
+
+#define DEBUG_CMP 0
+
+/* Returns true if a compare insn is redundant because it would only
+   set flags that are already set correctly.  */
+static bool
+m32c_compare_redundant (rtx cmp, rtx *operands)
+{
+  int flags_needed;
+  int pflags;
+  rtx prev, pp, next;
+  rtx op0, op1;
+#if DEBUG_CMP
+  int prev_icode, i;
+#endif
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+#if DEBUG_CMP
+  fprintf(stderr, "\n\033[32mm32c_compare_redundant\033[0m\n");
+  debug_rtx(cmp);
+  for (i=0; i<2; i++)
+    {
+      fprintf(stderr, "operands[%d] = ", i);
+      debug_rtx(operands[i]);
+    }
+#endif
+
+  next = next_nonnote_insn (cmp);
+  if (!next || !INSN_P (next))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "compare not followed by insn\n");
+      debug_rtx(next);
+#endif
+      return false;
+    }
+  if (GET_CODE (PATTERN (next)) == SET
+      && GET_CODE (XEXP ( PATTERN (next), 1)) == IF_THEN_ELSE)
+    {
+      next = XEXP (XEXP (PATTERN (next), 1), 0);
+    }
+  else if (GET_CODE (PATTERN (next)) == SET)
+    {
+      /* If this is a conditional, flags_needed will be something
+	 other than FLAGS_N, which we test below.  */
+      next = XEXP (PATTERN (next), 1);
+    }
+  else
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "compare not followed by conditional\n");
+      debug_rtx(next);
+#endif
+      return false;
+    }
+#if DEBUG_CMP
+  fprintf(stderr, "conditional is: ");
+  debug_rtx(next);
+#endif
+
+  flags_needed = flags_needed_for_conditional (next);
+  if (flags_needed == FLAGS_N)
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "compare not followed by conditional\n");
+      debug_rtx(next);
+#endif
+      return false;
+    }
+
+  /* Compare doesn't set overflow and carry the same way that
+     arithmetic instructions do, so we can't replace those.  */
+  if (flags_needed & FLAGS_OC)
+    return false;
+
+  prev = cmp;
+  do {
+    prev = prev_nonnote_insn (prev);
+    if (!prev)
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "No previous insn.\n");
+#endif
+	return false;
+      }
+    if (!INSN_P (prev))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "Previous insn is a non-insn.\n");
+#endif
+	return false;
+      }
+    pp = PATTERN (prev);
+    if (GET_CODE (pp) != SET)
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "Previous insn is not a SET.\n");
+#endif
+	return false;
+      }
+    pflags = get_attr_flags (prev);
+
+    /* Looking up attributes of previous insns corrupted the recog
+       tables.  */
+    INSN_UID (cmp) = -1;
+    recog (PATTERN (cmp), cmp, 0);
+
+    if (pflags == FLAGS_N
+	&& reg_mentioned_p (op0, pp))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "intermediate non-flags insn uses op:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+
+    /* Check for comparisons against memory - between volatiles and
+       aliases, we just can't risk this one.  */
+    if (GET_CODE (operands[0]) == MEM
+	|| GET_CODE (operands[0]) == MEM)
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "comparisons with memory:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+
+    /* Check for PREV changing a register that's used to compute a
+       value in CMP, even if it doesn't otherwise change flags.  */
+    if (GET_CODE (operands[0]) == REG
+	&& rtx_referenced_p (SET_DEST (PATTERN (prev)), operands[0]))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "sub-value affected, op0:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+    if (GET_CODE (operands[1]) == REG
+	&& rtx_referenced_p (SET_DEST (PATTERN (prev)), operands[1]))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "sub-value affected, op1:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+
+  } while (pflags == FLAGS_N);
+#if DEBUG_CMP
+  fprintf(stderr, "previous flag-setting insn:\n");
+  debug_rtx(prev);
+  debug_rtx(pp);
+#endif
+
+  if (GET_CODE (pp) == SET
+      && GET_CODE (XEXP (pp, 0)) == REG
+      && REGNO (XEXP (pp, 0)) == FLG_REGNO
+      && GET_CODE (XEXP (pp, 1)) == COMPARE)
+    {
+      /* Adjacent cbranches must have the same operands to be
+	 redundant.  */
+      rtx pop0 = XEXP (XEXP (pp, 1), 0);
+      rtx pop1 = XEXP (XEXP (pp, 1), 1);
+#if DEBUG_CMP
+      fprintf(stderr, "adjacent cbranches\n");
+      debug_rtx(pop0);
+      debug_rtx(pop1);
+#endif
+      if (rtx_equal_p (op0, pop0)
+	  && rtx_equal_p (op1, pop1))
+	return true;
+#if DEBUG_CMP
+      fprintf(stderr, "prev cmp not same\n");
+#endif
+      return false;
+    }
+
+  /* Else the previous insn must be a SET, with either the source or
+     dest equal to operands[0], and operands[1] must be zero.  */
+
+  if (!rtx_equal_p (op1, const0_rtx))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "operands[1] not const0_rtx\n");
+#endif
+      return false;
+    }
+  if (GET_CODE (pp) != SET)
+    {
+#if DEBUG_CMP
+      fprintf (stderr, "pp not set\n");
+#endif
+      return false;
+    }
+  if (!rtx_equal_p (op0, SET_SRC (pp))
+      && !rtx_equal_p (op0, SET_DEST (pp)))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "operands[0] not found in set\n");
+#endif
+      return false;
+    }
+
+#if DEBUG_CMP
+  fprintf(stderr, "cmp flags %x prev flags %x\n", flags_needed, pflags);
+#endif
+  if ((pflags & flags_needed) == flags_needed)
+    return true;
+
+  return false;
+}
+
+/* Return the pattern for a compare.  This will be commented out if
+   the compare is redundant, else a normal pattern is returned.  Thus,
+   the assembler output says where the compare would have been.  */
+char *
+m32c_output_compare (rtx insn, rtx *operands)
+{
+  static char templ[] = ";cmp.b\t%1,%0";
+  /*                             ^ 5  */
+
+  templ[5] = " bwll"[GET_MODE_SIZE(GET_MODE(operands[0]))];
+  if (m32c_compare_redundant (insn, operands))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "cbranch: cmp not needed\n");
+#endif
+      return templ;
+    }
+
+#if DEBUG_CMP
+  fprintf(stderr, "cbranch: cmp needed: `%s'\n", templ + 1);
+#endif
+  return templ + 1;
+}
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO m32c_encode_section_info
+
+/* If the frame pointer isn't used, we detect it manually.  But the
+   stack pointer doesn't have as flexible addressing as the frame
+   pointer, so we always assume we have it.  */
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED hook_bool_void_true
+
+/* The Global `targetm' Variable. */
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-m32c.h"
diff --git a/gcc-4.9/gcc/config/m32c/m32c.h b/gcc-4.9/gcc/config/m32c/m32c.h
new file mode 100644
index 000000000..94865ce59
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c.h
@@ -0,0 +1,647 @@
+/* Target Definitions for R8C/M16C/M32C
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_M32C_H
+#define GCC_M32C_H
+
+/* Controlling the Compilation Driver, `gcc'.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/* There are four CPU series we support, but they basically break down
+   into two families - the R8C/M16C families, with 16-bit address
+   registers and one set of opcodes, and the M32CM/M32C group, with
+   24-bit address registers and a different set of opcodes.  The
+   assembler doesn't care except for which opcode set is needed; the
+   big difference is in the memory maps, which we cover in
+   LIB_SPEC.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mcpu=r8c:--m16c} \
+%{mcpu=m16c:--m16c} \
+%{mcpu=m32cm:--m32c} \
+%{mcpu=m32c:--m32c} "
+
+/* The default is R8C hardware.  We support a simulator, which has its
+   own libgloss and link map, plus one default link map for each chip
+   family.  Most of the logic here is making sure we do the right
+   thing when no CPU is specified, which defaults to R8C.  */
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim:-lsim}%{!msim:-lnosys} -) \
+%{msim:%{!T*: %{mcpu=m32cm:%Tsim24.ld}%{mcpu=m32c:%Tsim24.ld} \
+       %{!mcpu=m32cm:%{!mcpu=m32c:%Tsim16.ld}}}} \
+%{!T*:%{!msim: %{mcpu=m16c:%Tm16c.ld} \
+	       %{mcpu=m32cm:%Tm32cm.ld} \
+	       %{mcpu=m32c:%Tm32c.ld} \
+	       %{!mcpu=m16c:%{!mcpu=m32cm:%{!mcpu=m32c:%Tr8c.ld}}}}} \
+"
+
+/* Run-time Target Specification */
+
+/* Nothing unusual here.  */
+#define TARGET_CPU_CPP_BUILTINS() \
+  { \
+    builtin_assert ("cpu=m32c"); \
+    builtin_assert ("machine=m32c"); \
+    builtin_define ("__m32c__=1"); \
+    if (TARGET_R8C) \
+      builtin_define ("__r8c_cpu__=1"); \
+    if (TARGET_M16C) \
+      builtin_define ("__m16c_cpu__=1"); \
+    if (TARGET_M32CM) \
+      builtin_define ("__m32cm_cpu__=1"); \
+    if (TARGET_M32C) \
+      builtin_define ("__m32c_cpu__=1"); \
+  }
+
+/* The pragma handlers need to know if we've started processing
+   functions yet, as the memregs pragma should only be given at the
+   beginning of the file.  This variable starts off TRUE and later
+   becomes FALSE.  */
+extern int ok_to_change_target_memregs;
+
+/* TARGET_CPU is a multi-way option set in m32c.opt.  While we could
+   use enums or defines for this, this and m32c.opt are the only
+   places that know (or care) what values are being used.  */
+#define TARGET_R8C	(target_cpu == 'r')
+#define TARGET_M16C	(target_cpu == '6')
+#define TARGET_M32CM	(target_cpu == 'm')
+#define TARGET_M32C	(target_cpu == '3')
+
+/* Address register sizes.  Warning: these are used all over the place
+   to select between the two CPU families in general.  */
+#define TARGET_A16	(TARGET_R8C || TARGET_M16C)
+#define TARGET_A24	(TARGET_M32CM || TARGET_M32C)
+
+/* Defining data structures for per-function information */
+
+typedef struct GTY (()) machine_function
+{
+  /* How much we adjust the stack when returning from an exception
+     handler.  */
+  rtx eh_stack_adjust;
+
+  /* TRUE if the current function is an interrupt handler.  */
+  int is_interrupt;
+
+  /* TRUE if the current function is a leaf function.  Currently, this
+     only affects saving $a0 in interrupt functions.  */
+  int is_leaf;
+
+  /* Bitmask that keeps track of which registers are used in an
+     interrupt function, so we know which ones need to be saved and
+     restored.  */
+  int intr_pushm;
+  /* Likewise, one element for each memreg that needs to be saved.  */
+  char intr_pushmem[16];
+
+  /* TRUE if the current function can use a simple RTS to return, instead
+     of the longer ENTER/EXIT pair.  */
+  int use_rts;
+}
+machine_function;
+
+#define INIT_EXPANDERS m32c_init_expanders ()
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+/* We can do QI, HI, and SI operations pretty much equally well, but
+   GCC expects us to have a "native" format, so we pick the one that
+   matches "int".  Pointers are 16 bits for R8C/M16C (when TARGET_A16
+   is true) and 24 bits for M32CM/M32C (when TARGET_A24 is true), but
+   24-bit pointers are stored in 32-bit words.  */
+#define UNITS_PER_WORD 2
+#define POINTER_SIZE (TARGET_A16 ? 16 : 32)
+#define POINTERS_EXTEND_UNSIGNED 1
+/* We have a problem with libgcc2.  It only defines two versions of
+   each function, one for "int" and one for "long long".  Ie it assumes
+   that "sizeof (int) == sizeof (long)".  For the M32C this is not true
+   and we need a third set of functions.  We explicitly define
+   LIBGCC2_UNITS_PER_WORD here so that it is clear that we are expecting
+   to get the SI and DI versions from the libgcc2.c sources, and we
+   provide our own set of HI functions in m32c-lib2.c, which is why this
+   definition is surrounded by #ifndef..#endif.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 4
+#endif
+
+/* These match the alignment enforced by the two types of stack operations.  */
+#define PARM_BOUNDARY (TARGET_A16 ? 8 : 16)
+#define STACK_BOUNDARY (TARGET_A16 ? 8 : 16)
+
+/* We do this because we care more about space than about speed.  For
+   the chips with 16-bit busses, we could set these to 16 if
+   desired.  */
+#define FUNCTION_BOUNDARY 8
+#define BIGGEST_ALIGNMENT 8
+
+/* Since we have a maximum structure alignment of 8 there
+   is no need to enforce any alignment of bitfield types.  */
+#undef  PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 0
+
+#define STRICT_ALIGNMENT 0
+#define SLOW_BYTE_ACCESS 1
+
+/* Layout of Source Language Data Types */
+
+#define INT_TYPE_SIZE 16
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_A16 ? "int" : "long int")
+
+#undef UINTPTR_TYPE
+#define UINTPTR_TYPE (TARGET_A16 ? "unsigned int" : "long unsigned int")
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* REGISTER USAGE */
+
+/* Register Basics */
+
+/* Register layout:
+
+        [r0h][r0l]  $r0  (16 bits, or two 8-bit halves)
+        [--------]  $r2  (16 bits)
+        [r1h][r1l]  $r1  (16 bits, or two 8-bit halves)
+        [--------]  $r3  (16 bits)
+   [---][--------]  $a0  (might be 24 bits)
+   [---][--------]  $a1  (might be 24 bits)
+   [---][--------]  $sb  (might be 24 bits)
+   [---][--------]  $fb  (might be 24 bits)
+   [---][--------]  $sp  (might be 24 bits)
+   [-------------]  $pc  (20 or 24 bits)
+             [---]  $flg (CPU flags)
+   [---][--------]  $argp (virtual)
+        [--------]  $mem0 (all 16 bits)
+          . . .
+        [--------]  $mem14
+*/
+
+#define FIRST_PSEUDO_REGISTER   20
+
+/* Note that these two tables are modified based on which CPU family
+   you select; see m32c_conditional_register_usage for details.  */
+
+/* r0 r2 r1 r3 - a0 a1 sb fb - sp pc flg argp - mem0..mem14 */
+#define FIXED_REGISTERS     { 0, 0, 0, 0, \
+			      0, 0, 1, 0, \
+			      1, 1, 0, 1, \
+			      0, 0, 0, 0, 0, 0, 0, 0 }
+#define CALL_USED_REGISTERS { 1, 1, 1, 1, \
+			      1, 1, 1, 0, \
+			      1, 1, 1, 1, \
+			      1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* The *_REGNO theme matches m32c.md and most register number
+   arguments; the PC_REGNUM is the odd one out.  */
+#ifndef PC_REGNO
+#define PC_REGNO 9
+#endif
+#define PC_REGNUM PC_REGNO
+
+/* Order of Allocation of Registers */
+
+#define REG_ALLOC_ORDER { \
+	0, 1, 2, 3, 4, 5, /* r0..r3, a0, a1 */ \
+        12, 13, 14, 15, 16, 17, 18, 19, /* mem0..mem7 */	\
+	6, 7, 8, 9, 10, 11 /* sb, fb, sp, pc, flg, ap */ }
+
+/* How Values Fit in Registers */
+
+#define HARD_REGNO_NREGS(R,M) m32c_hard_regno_nregs (R, M)
+#define HARD_REGNO_MODE_OK(R,M) m32c_hard_regno_ok (R, M)
+#define MODES_TIEABLE_P(M1,M2) m32c_modes_tieable_p (M1, M2)
+#define AVOID_CCMODE_COPIES
+
+/* Register Classes */
+
+/* Most registers are special purpose in some form or another, so this
+   table is pretty big.  Class names are used for constraints also;
+   for example the HL_REGS class (HL below) is "Rhl" in the md files.
+   See m32c_reg_class_from_constraint for the mapping.  There's some
+   duplication so that we can better isolate the reason for using
+   constraints in the md files from the actual registers used; for
+   example we may want to exclude a1a0 from SI_REGS in the future,
+   without precluding their use as HImode registers.  */
+
+/* m7654 - m3210 - argp flg pc sp - fb sb a1 a0 - r3 r1 r2 r0 */
+/*       mmPAR */
+#define REG_CLASS_CONTENTS \
+{ { 0x00000000 }, /* NO */\
+  { 0x00000100 }, /* SP  - sp */\
+  { 0x00000080 }, /* FB  - fb */\
+  { 0x00000040 }, /* SB  - sb */\
+  { 0x000001c0 }, /* CR  - sb fb sp */\
+  { 0x00000001 }, /* R0  - r0 */\
+  { 0x00000004 }, /* R1  - r1 */\
+  { 0x00000002 }, /* R2  - r2 */\
+  { 0x00000008 }, /* R3  - r3 */\
+  { 0x00000003 }, /* R02 - r0r2 */\
+  { 0x0000000c }, /* R13 - r1r3 */\
+  { 0x00000005 }, /* HL  - r0 r1 */\
+  { 0x0000000a }, /* R23 - r2 r3 */\
+  { 0x0000000f }, /* R03 - r0r2 r1r3 */\
+  { 0x00000010 }, /* A0  - a0 */\
+  { 0x00000020 }, /* A1  - a1 */\
+  { 0x00000030 }, /* A   - a0 a1 */\
+  { 0x000000f0 }, /* AD  - a0 a1 sb fp */\
+  { 0x000001f0 }, /* PS  - a0 a1 sb fp sp */\
+  { 0x00000033 }, /* R02A  - r0r2 a0 a1 */ \
+  { 0x0000003f }, /* RA  - r0 r1 r2 r3 a0 a1 */\
+  { 0x0000007f }, /* GENERAL */\
+  { 0x00000400 }, /* FLG */\
+  { 0x000001ff }, /* HC  - r0l r1 r2 r3 a0 a1 sb fb sp */\
+  { 0x000ff000 }, /* MEM */\
+  { 0x000ff003 }, /* R02_A_MEM */\
+  { 0x000ff005 }, /* A_HL_MEM */\
+  { 0x000ff00c }, /* R1_R3_A_MEM */\
+  { 0x000ff00f }, /* R03_MEM */\
+  { 0x000ff03f }, /* A_HI_MEM */\
+  { 0x000ff0ff }, /* A_AD_CR_MEM_SI */\
+  { 0x000ff5ff }, /* ALL */\
+}
+
+#define QI_REGS HL_REGS
+#define HI_REGS RA_REGS
+#define SI_REGS R03_REGS
+#define DI_REGS R03_REGS
+
+enum reg_class
+{
+  NO_REGS,
+  SP_REGS,
+  FB_REGS,
+  SB_REGS,
+  CR_REGS,
+  R0_REGS,
+  R1_REGS,
+  R2_REGS,
+  R3_REGS,
+  R02_REGS,
+  R13_REGS,
+  HL_REGS,
+  R23_REGS,
+  R03_REGS,
+  A0_REGS,
+  A1_REGS,
+  A_REGS,
+  AD_REGS,
+  PS_REGS,
+  R02A_REGS,
+  RA_REGS,
+  GENERAL_REGS,
+  FLG_REGS,
+  HC_REGS,
+  MEM_REGS,
+  R02_A_MEM_REGS,
+  A_HL_MEM_REGS,
+  R1_R3_A_MEM_REGS,
+  R03_MEM_REGS,
+  A_HI_MEM_REGS,
+  A_AD_CR_MEM_SI_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {\
+"NO_REGS", \
+"SP_REGS", \
+"FB_REGS", \
+"SB_REGS", \
+"CR_REGS", \
+"R0_REGS", \
+"R1_REGS", \
+"R2_REGS", \
+"R3_REGS", \
+"R02_REGS", \
+"R13_REGS", \
+"HL_REGS", \
+"R23_REGS", \
+"R03_REGS", \
+"A0_REGS", \
+"A1_REGS", \
+"A_REGS", \
+"AD_REGS", \
+"PS_REGS", \
+"R02A_REGS", \
+"RA_REGS", \
+"GENERAL_REGS", \
+"FLG_REGS", \
+"HC_REGS", \
+"MEM_REGS", \
+"R02_A_MEM_REGS", \
+"A_HL_MEM_REGS", \
+"R1_R3_A_MEM_REGS", \
+"R03_MEM_REGS", \
+"A_HI_MEM_REGS", \
+"A_AD_CR_MEM_SI_REGS", \
+"ALL_REGS", \
+}
+
+#define REGNO_REG_CLASS(R) m32c_regno_reg_class (R)
+
+/* We support simple displacements off address registers, nothing else.  */
+#define BASE_REG_CLASS A_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+/* We primarily use the new "long" constraint names, with the initial
+   letter classifying the constraint type and following letters
+   specifying which.  The types are:
+
+   I - integer values
+   R - register classes
+   S - memory references (M was used)
+   A - addresses (currently unused)
+*/
+
+#define REGNO_OK_FOR_BASE_P(NUM) m32c_regno_ok_for_base_p (NUM)
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+
+#define LIMIT_RELOAD_CLASS(MODE,CLASS) \
+  (enum reg_class) m32c_limit_reload_class (MODE, CLASS)
+
+#define SECONDARY_RELOAD_CLASS(CLASS,MODE,X) \
+  (enum reg_class) m32c_secondary_reload_class (CLASS, MODE, X)
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+#define CANNOT_CHANGE_MODE_CLASS(F,T,C) m32c_cannot_change_mode_class(F,T,C)
+
+/* STACK AND CALLING */
+
+/* Frame Layout */
+
+/* Standard push/pop stack, no surprises here.  */
+
+#define STACK_GROWS_DOWNWARD 1
+#define STACK_PUSH_CODE PRE_DEC
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET 0
+#define FIRST_PARM_OFFSET(F) 0
+
+#define RETURN_ADDR_RTX(COUNT,FA) m32c_return_addr_rtx (COUNT)
+
+#define INCOMING_RETURN_ADDR_RTX m32c_incoming_return_addr_rtx()
+#define INCOMING_FRAME_SP_OFFSET (TARGET_A24 ? 4 : 3)
+
+/* Exception Handling Support */
+
+#define EH_RETURN_DATA_REGNO(N) m32c_eh_return_data_regno (N)
+#define EH_RETURN_STACKADJ_RTX m32c_eh_return_stackadj_rtx ()
+
+/* Registers That Address the Stack Frame */
+
+#ifndef FP_REGNO
+#define FP_REGNO 7
+#endif
+#ifndef SP_REGNO
+#define SP_REGNO 8
+#endif
+#define AP_REGNO 11
+
+#define STACK_POINTER_REGNUM	SP_REGNO
+#define FRAME_POINTER_REGNUM	FP_REGNO
+#define ARG_POINTER_REGNUM	AP_REGNO
+
+/* The static chain must be pointer-capable.  */
+#define STATIC_CHAIN_REGNUM A0_REGNO
+
+#define DWARF_FRAME_REGISTERS 20
+#define DWARF_FRAME_REGNUM(N) m32c_dwarf_frame_regnum (N)
+#define DBX_REGISTER_NUMBER(N) m32c_dwarf_frame_regnum (N)
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+/* This is the same as the default in practice, except that by making
+   it explicit we tell binutils what size pointers to use.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+  (TARGET_A16 ? DW_EH_PE_udata2 : DW_EH_PE_udata4)
+
+/* Eliminating Frame Pointer and Arg Pointer */
+
+#define ELIMINABLE_REGS \
+  {{AP_REGNO, SP_REGNO}, \
+   {AP_REGNO, FB_REGNO}, \
+   {FB_REGNO, SP_REGNO}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM,TO,VAR) \
+	(VAR) = m32c_initial_elimination_offset(FROM,TO)
+
+/* Passing Function Arguments on the Stack */
+
+#define PUSH_ARGS 1
+#define PUSH_ROUNDING(N) m32c_push_rounding (N)
+#define CALL_POPS_ARGS(C) 0
+
+/* Passing Arguments in Registers */
+
+typedef struct m32c_cumulative_args
+{
+  /* For address of return value buffer (structures are returned by
+     passing the address of a buffer as an invisible first argument.
+     This identifies it).  If set, the current parameter will be put
+     on the stack, regardless of type.  */
+  int force_mem;
+  /* First parm is 1, parm 0 is hidden pointer for returning
+     aggregates.  */
+  int parm_num;
+} m32c_cumulative_args;
+
+#define CUMULATIVE_ARGS m32c_cumulative_args
+#define INIT_CUMULATIVE_ARGS(CA,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+	m32c_init_cumulative_args (&(CA),FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS)
+#define FUNCTION_ARG_REGNO_P(r) m32c_function_arg_regno_p (r)
+
+/* How Large Values Are Returned */
+
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* Function Entry and Exit */
+
+#define EXIT_IGNORE_STACK 0
+#define EPILOGUE_USES(REGNO) m32c_epilogue_uses(REGNO)
+#define EH_USES(REGNO) 0	/* FIXME */
+
+/* Generating Code for Profiling */
+
+#define FUNCTION_PROFILER(FILE,LABELNO)
+
+/* Implementing the Varargs Macros */
+
+/* Trampolines for Nested Functions */
+
+#define TRAMPOLINE_SIZE m32c_trampoline_size ()
+#define TRAMPOLINE_ALIGNMENT m32c_trampoline_alignment ()
+
+/* Addressing Modes */
+
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_INCREMENT 1
+#define MAX_REGS_PER_ADDRESS 1
+
+/* This is passed to the macros below, so that they can be implemented
+   in m32c.c.  */
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_V 1
+#else
+#define REG_OK_STRICT_V 0
+#endif
+
+#define REG_OK_FOR_BASE_P(X) m32c_reg_ok_for_base_p (X, REG_OK_STRICT_V)
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* #define FIND_BASE_TERM(X) when we do unspecs for symrefs */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \
+	if (m32c_legitimize_reload_address(&(X),MODE,OPNUM,TYPE,IND_LEVELS)) \
+	  goto WIN;
+
+/* Address spaces.  */
+#define ADDR_SPACE_FAR	1
+
+
+/* Condition Code Status */
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* Dividing the Output into Sections (Texts, Data, ...) */
+
+#define TEXT_SECTION_ASM_OP ".text"
+#define DATA_SECTION_ASM_OP ".data"
+#define BSS_SECTION_ASM_OP ".bss"
+
+#define CTOR_LIST_BEGIN
+#define CTOR_LIST_END
+#define DTOR_LIST_BEGIN
+#define DTOR_LIST_END
+#define CTORS_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array"
+#define DTORS_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array"
+#define INIT_ARRAY_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array"
+#define FINI_ARRAY_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array"
+
+/* The Overall Framework of an Assembler File */
+
+#define ASM_COMMENT_START ";"
+#define ASM_APP_ON ""
+#define ASM_APP_OFF ""
+
+/* Output and Generation of Labels */
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Output of Assembler Instructions */
+
+#define REGISTER_NAMES {	\
+  "r0", "r2", "r1", "r3", \
+  "a0", "a1", "sb", "fb", "sp", \
+  "pc", "flg", "argp", \
+  "mem0",  "mem2",  "mem4",  "mem6",  "mem8",  "mem10",  "mem12",  "mem14", \
+}
+
+#define ADDITIONAL_REGISTER_NAMES { \
+  {"r0l", 0}, \
+  {"r1l", 2}, \
+  {"r0r2", 0}, \
+  {"r1r3", 2}, \
+  {"a0a1", 4}, \
+  {"r0r2r1r3", 0} }
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define ASM_OUTPUT_REG_PUSH(S,R) m32c_output_reg_push (S, R)
+#define ASM_OUTPUT_REG_POP(S,R) m32c_output_reg_pop (S, R)
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	m32c_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 1)
+
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	m32c_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+
+/* Output of Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(S,V) \
+	fprintf (S, "\t.word L%d\n", V)
+
+/* Assembler Commands for Exception Regions */
+
+#define DWARF_CIE_DATA_ALIGNMENT -1
+
+/* Assembler Commands for Alignment */
+
+#define ASM_OUTPUT_ALIGN(STREAM,POWER) \
+	fprintf (STREAM, "\t.p2align\t%d\n", POWER);
+
+/* Controlling Debugging Information Format */
+
+#define DWARF2_ADDR_SIZE	4
+
+/* Miscellaneous Parameters */
+
+#define HAS_LONG_COND_BRANCH false
+#define HAS_LONG_UNCOND_BRANCH true
+#define CASE_VECTOR_MODE SImode
+#define LOAD_EXTEND_OP(MEM) ZERO_EXTEND
+
+#define MOVE_MAX 4
+#define TRULY_NOOP_TRUNCATION(op,ip) 1
+
+#define STORE_FLAG_VALUE 1
+
+/* 16- or 24-bit pointers */
+#define Pmode (TARGET_A16 ? HImode : PSImode)
+#define FUNCTION_MODE QImode
+
+#define REGISTER_TARGET_PRAGMAS() m32c_register_pragmas()
+
+#endif
diff --git a/gcc-4.9/gcc/config/m32c/m32c.md b/gcc-4.9/gcc/config/m32c/m32c.md
new file mode 100644
index 000000000..b3bee2a1b
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c.md
@@ -0,0 +1,79 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(R0_REGNO 0)
+   (R2_REGNO 1)
+   (R1_REGNO 2)
+   (R3_REGNO 3)
+
+   (A0_REGNO 4)
+   (A1_REGNO 5)
+   (SB_REGNO 6)
+   (FB_REGNO 7)
+
+   (SP_REGNO 8)
+   (PC_REGNO 9)
+   (FLG_REGNO 10)
+   (MEM0_REGNO 12)
+   (MEM7_REGNO 19)
+   ])
+
+(define_constants
+  [(UNS_PROLOGUE_END 1)
+   (UNS_EPILOGUE_START 2)
+   (UNS_EH_EPILOGUE 3)
+   (UNS_PUSHM 4)
+   (UNS_POPM 5)
+   (UNS_SMOVF 6)
+   (UNS_SSTR 7)
+   (UNS_SCMPU 8)
+   (UNS_SMOVU 9)
+   (UNS_FSETB 10)
+   (UNS_FREIT 11)
+   ])
+
+;; n = no change, x = clobbered.  The first 16 values are chosen such
+;; that the enum has one bit set for each flag.
+(define_attr "flags" "x,c,z,zc,s,sc,sz,szc,o,oc,oz,ozc,os,osc,osz,oszc,n" (const_string "n"))
+(define_asm_attributes [(set_attr "flags" "x")])
+
+(define_mode_iterator QHI [QI HI])
+(define_mode_iterator HPSI [(HI "TARGET_A16") (PSI "TARGET_A24")])
+(define_mode_iterator QHPSI [QI HI (PSI "TARGET_A24")])
+(define_mode_iterator QHSI [QI HI (SI "TARGET_A24")])
+(define_mode_attr bwl [(QI "b") (HI "w") (PSI "l") (SI "l")])
+
+(define_code_iterator eqne_cond [eq ne])
+
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "flags" "n")]
+)
+
+(define_insn "no_insn"
+  [(const_int 1)]
+  ""
+  ""
+  [(set_attr "flags" "n")]
+)
diff --git a/gcc-4.9/gcc/config/m32c/m32c.opt b/gcc-4.9/gcc/config/m32c/m32c.opt
new file mode 100644
index 000000000..762d58c5d
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/m32c.opt
@@ -0,0 +1,43 @@
+; Target Options for R8C/M16C/M32C
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published
+; by the Free Software Foundation; either version 3, or (at your
+; option) any later version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msim
+Target
+-msim	Use simulator runtime
+
+mcpu=r8c
+Target RejectNegative Var(target_cpu,'r') Init('r')
+-mcpu=r8c	Compile code for R8C variants
+
+mcpu=m16c
+Target RejectNegative Var(target_cpu,'6')
+-mcpu=m16c	Compile code for M16C variants
+
+mcpu=m32cm
+Target RejectNegative Var(target_cpu,'m')
+-mcpu=m32cm	Compile code for M32CM variants
+
+mcpu=m32c
+Target RejectNegative Var(target_cpu,'3')
+-mcpu=m32c	Compile code for M32C variants
+
+memregs=
+Target RejectNegative Joined UInteger Var(target_memregs) Init(16)
+-memregs=	Number of memreg bytes (default: 16, range: 0..16)
diff --git a/gcc-4.9/gcc/config/m32c/minmax.md b/gcc-4.9/gcc/config/m32c/minmax.md
new file mode 100644
index 000000000..14788dec2
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/minmax.md
@@ -0,0 +1,57 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; min, max
+
+(define_insn "sminqi3"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,RhlSd,??Rmm,??Rmm,Raa,Raa")
+	(smin:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		 (match_operand:QI 2 "mrai_operand" "iRhlSdRaa,?Rmm,iRhlSdRaa,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A24"
+  "min.b\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "sminhi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm")
+	(smin:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "mrai_operand" "iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A24"
+  "min.w\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "smaxqi3"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,RhlSd,??Rmm,??Rmm,Raa,Raa")
+	(smax:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		 (match_operand:QI 2 "mrai_operand" "iRhlSdRaa,?Rmm,iRhlSdRaa,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A24"
+  "max.b\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "smaxhi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm")
+	(smax:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "mrai_operand" "iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A24"
+  "max.w\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
diff --git a/gcc-4.9/gcc/config/m32c/mov.md b/gcc-4.9/gcc/config/m32c/mov.md
new file mode 100644
index 000000000..906894889
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/mov.md
@@ -0,0 +1,490 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; move, push, extend, etc.
+
+;; Be careful to never create an alternative that has memory as both
+;; src and dest, as that makes gcc think that mem-mem moves in general
+;; are supported.  While the chip does support this, it only has two
+;; address registers and sometimes gcc requires more than that.  One
+;; example is code like this: a = *b where both a and b are spilled to
+;; the stack.
+
+(define_insn "mov<mode>_far_op1"
+  [(set (match_operand:QHI 0 "register_operand" "=Rhi")
+	(mem:QHI (plus:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "Ra0"))
+			 (match_operand 2 "immediate_operand" "si"))))
+   ]
+  ""
+  "lde.<bwl>\t%D2[%1],%0"
+  [(set_attr "flags" "sz")]
+  )
+
+(define_insn "mov<mode>_far_op2"
+  [(set (mem:QHI (plus:SI (sign_extend:SI (match_operand:HI 0 "register_operand" "Ra0"))
+			 (match_operand 1 "immediate_operand" "si")))
+	(match_operand:QHI 2 "register_operand"
+			  "=Rhi"))
+   ]
+  ""
+  "ste.<bwl>\t%2,%D1[%0]"
+  [(set_attr "flags" "sz")]
+  )
+
+;; Match push/pop before mov.b for passing char as arg,
+;; e.g. stdlib/efgcvt.c.
+(define_insn "movqi_op"
+  [(set (match_operand:QI 0 "m32c_nonimmediate_operand"
+			  "=SF,Rhi*Rmm, Rqi*Rmm, <,          RqiSd*Rmm, SdSs,    Rqi*Rmm, Sd")
+	(match_operand:QI 1 "m32c_any_operand"
+			  "Rhi*Rmm,SF, iRqi*Rmm, iRqiSd*Rmm, >,         Rqi*Rmm, SdSs,    i"))]
+  "m32c_mov_ok (operands, QImode)"
+  "@
+    lde.b\t%1,%0
+    ste.b\t%1,%0
+    mov.b\t%1,%0
+    push.b\t%1
+    pop.b\t%0
+    mov.b\t%1,%0
+    mov.b\t%1,%0
+    mov.b\t%1,%0"
+  [(set_attr "flags" "sz,sz,sz,*,*,sz,sz,sz")]
+  )
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=RqiSd*Rmm")
+	(match_operand:QI 1 "general_operand" "iRqiSd*Rmm"))]
+  ""
+  "if (m32c_prepare_move (operands, QImode)) DONE;"
+  )
+
+
+(define_insn "movhi_op"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand"
+			  "=SF,Rhi*Rmm, Rhi*Rmm,     Sd, SdSs,   *Rcr, RhiSd*Rmm, <, RhiSd*Rmm, <, *Rcr")
+	(match_operand:HI 1 "m32c_any_operand"
+			  " Rhi*Rmm,SF, iRhi*RmmSdSs, i, Rhi*Rmm, RhiSd*Rmm, *Rcr, iRhiSd*Rmm, >, *Rcr, >"))]
+  "m32c_mov_ok (operands, HImode)"
+  "@
+   ste.w\t%1,%0
+   lde.w\t%1,%0
+   mov.w\t%1,%0
+   mov.w\t%1,%0
+   mov.w\t%1,%0
+   ldc\t%1,%0
+   stc\t%1,%0
+   push.w\t%1
+   pop.w\t%0
+   pushc\t%1
+   popc\t%0"
+  [(set_attr "flags" "sz,sz,sz,sz,sz,n,n,n,n,n,n")]
+  )
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand" "=RhiSd*Rmm")
+	(match_operand:HI 1 "m32c_any_operand" "iRhiSd*Rmm"))]
+  ""
+  "if (m32c_prepare_move (operands, HImode)) DONE;"
+  )
+
+
+(define_insn "movpsi_op"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand"
+			   "=Raa, SdRmmRpi,  Rcl,  RpiSd*Rmm, <,       <, Rcl, RpiRaa*Rmm")
+	(match_operand:PSI 1 "m32c_any_operand"
+			   "sIU3, iSdRmmRpi, iRpiSd*Rmm, Rcl, Rpi*Rmm, Rcl, >, >"))]
+  "TARGET_A24 && m32c_mov_ok (operands, PSImode)"
+  "@
+   mov.l:s\t%1,%0
+   mov.l\t%1,%0
+   ldc\t%1,%0
+   stc\t%1,%0
+   push.l\t%1
+   pushc\t%1
+   popc\t%0
+   #"
+  [(set_attr "flags" "sz,sz,n,n,n,n,n,*")]
+  )
+
+
+;; The intention here is to combine the add with the move to create an
+;; indexed move.  GCC doesn't always figure this out itself.
+
+(define_peephole2
+  [(set (match_operand:HPSI 0 "register_operand" "")
+	(plus:HPSI (match_operand:HPSI 1 "register_operand" "")
+		   (match_operand:HPSI 2 "immediate_operand" "")))
+   (set (match_operand:QHSI 3 "nonimmediate_operand" "")
+	(mem:QHSI (match_operand:HPSI 4 "register_operand" "")))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[0], operands[3])
+       || (dead_or_set_p (peep2_next_insn (1), operands[4])
+          && ! reg_mentioned_p (operands[0], operands[3])))"
+  [(set (match_dup 3)
+	(mem:QHSI (plus:HPSI (match_dup 1)
+			     (match_dup 2))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HPSI 0 "register_operand" "")
+	(plus:HPSI (match_operand:HPSI 1 "register_operand" "")
+		   (match_operand:HPSI 2 "immediate_operand" "")))
+   (set (mem:QHSI (match_operand:HPSI 4 "register_operand" ""))
+	(match_operand:QHSI 3 "m32c_any_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && dead_or_set_p (peep2_next_insn (1), operands[4])
+   && ! reg_mentioned_p (operands[0], operands[3])"
+  [(set (mem:QHSI (plus:HPSI (match_dup 1)
+			     (match_dup 2)))
+	(match_dup 3))]
+  "")
+
+; Peephole to generate SImode mov instructions for storing an
+; immediate double data to a memory location.
+(define_peephole2
+  [(set (match_operand:HI 0 "memory_operand" "")
+        (match_operand 1 "const_int_operand" ""))
+   (set (match_operand:HI 2 "memory_operand" "")
+        (match_operand 3 "const_int_operand" ""))]
+   "TARGET_A24 && m32c_immd_dbl_mov (operands, HImode)"
+   [(set (match_dup 4) (match_dup 5))]
+   ""
+)
+
+; Some PSI moves must be split.
+(define_split
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "")
+	(match_operand:PSI 1 "m32c_any_operand" ""))]
+  "reload_completed && m32c_split_psi_p (operands)"
+  [(set (match_dup 2)
+	(match_dup 3))
+   (set (match_dup 4)
+	(match_dup 5))]
+  "m32c_split_move (operands, PSImode, 3);"
+  )
+
+(define_expand "movpsi"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "")
+	(match_operand:PSI 1 "m32c_any_operand" ""))]
+  ""
+  "if (m32c_prepare_move (operands, PSImode)) DONE;"
+  )
+
+
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand" "=RsiSd*Rmm")
+	(match_operand:SI 1 "m32c_any_operand" "iRsiSd*Rmm"))]
+  ""
+  "if (m32c_split_move (operands, SImode, 0)) DONE;"
+  )
+
+; All SI moves are split if TARGET_A16
+(define_insn_and_split "movsi_splittable"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand" "=RsiRaa<*Rmm,  RsiRaaSd*Rmm,  Ss")
+	(match_operand:SI 1 "m32c_any_operand" "iRsiRaaSd*Rmm,  iRsiRaa>*Rmm,  RsiRaa*Rmm"))]
+  "TARGET_A16"
+  "#"
+  "TARGET_A16"
+  [(pc)]
+  "m32c_split_move (operands, SImode, 1); DONE;"
+  )
+
+; The movsi pattern doesn't always match because sometimes the modes
+; don't match.
+(define_insn "push_a01_l"
+  [(set (mem:SI (pre_dec:PSI (reg:PSI SP_REGNO)))
+	(match_operand 0 "a_operand" "Raa"))]
+  ""
+  "push.l\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "movsi_24"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand"  "=Rsi*Rmm,   Sd,       RsiSd*Rmm,     <")
+	(match_operand:SI 1 "m32c_any_operand" "iRsiSd*Rmm, iRsi*Rmm, >, iRsiRaaSd*Rmm"))]
+  "TARGET_A24"
+  "@
+   mov.l\t%1,%0
+   mov.l\t%1,%0
+   #
+   push.l\t%1"
+  [(set_attr "flags" "sz,sz,*,n")]
+  )
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "m32c_nonimmediate_operand" "=RdiSd*Rmm")
+	(match_operand:DI 1 "m32c_any_operand" "iRdiSd*Rmm"))]
+  ""
+  "if (m32c_split_move (operands, DImode, 0)) DONE;"
+  )
+
+(define_insn_and_split "movdi_splittable"
+  [(set (match_operand:DI 0 "m32c_nonimmediate_operand" "=Rdi<*Rmm,RdiSd*Rmm")
+	(match_operand:DI 1 "m32c_any_operand" "iRdiSd*Rmm,iRdi>*Rmm"))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  "m32c_split_move (operands, DImode, 1); DONE;"
+  )
+
+
+
+
+(define_insn "pushqi"
+  [(set (mem:QI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:QI 0 "mrai_operand" "iRqiSd*Rmm"))]
+  ""
+  "push.b\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_expand "pushhi"
+  [(set (mem:HI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:HI 0 "" ""))]
+  ""
+  "if (TARGET_A16)
+     gen_pushhi_16 (operands[0]);
+   else
+     gen_pushhi_24 (operands[0]);
+   DONE;"
+  )
+
+(define_insn "pushhi_16"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNO)))
+        (match_operand:HI 0 "mrai_operand" "iRhiSd*Rmm,Rcr"))]
+  "TARGET_A16"
+  "@
+   push.w\t%0
+   pushc\t%0"
+  [(set_attr "flags" "n,n")]
+  )
+
+(define_insn "pushhi_24"
+  [(set (mem:HI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:HI 0 "mrai_operand" "iRhiSd*Rmm"))]
+  "TARGET_A24"
+  "push.w\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+;(define_insn "pushpi"
+;  [(set (mem:PSI (pre_dec:PSI (reg:PSI SP_REGNO)))
+;        (match_operand:PI 0 "mrai_operand" "iRaa,Rcr"))]
+;  "TARGET_A24"
+;  "@
+;   push.l\t%0
+;   pushc\t%0"
+;  )
+
+(define_insn "pushsi"
+  [(set (mem:SI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:SI 0 "mrai_operand" "iRsiSd*Rmm"))]
+  "TARGET_A24"
+  "push.l\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_expand "pophi"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm,Rcr")
+        (mem:HI (post_inc:HI (reg:HI SP_REGNO))))]
+  ""
+  "if (TARGET_A16)
+     gen_pophi_16 (operands[0]);
+   else
+     gen_pophi_24 (operands[0]);
+   DONE;"
+  )
+
+(define_insn "pophi_16"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm,Rcr")
+        (mem:HI (post_inc:HI (reg:HI SP_REGNO))))]
+  "TARGET_A16"
+  "@
+   pop.w\t%0
+   popc\t%0"
+  [(set_attr "flags" "n,n")]
+  )
+
+(define_insn "pophi_24"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm")
+        (mem:HI (post_inc:PSI (reg:PSI SP_REGNO))))]
+  "TARGET_A24"
+  "pop.w\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "poppsi"
+  [(set (match_operand:PSI 0 "cr_operand" "=Rcl")
+        (mem:PSI (post_inc:PSI (reg:PSI SP_REGNO))))]
+  "TARGET_A24"
+  "popc\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+
+;; Rhl used here as an HI-mode Rxl
+(define_insn "extendqihi2"
+[(set (match_operand:HI 0 "m32c_nonimmediate_operand" "=RhlSd*Rmm")
+	(sign_extend:HI (match_operand:QI 1 "mra_operand" "0")))]
+  ""
+  "exts.b\t%1"
+  [(set_attr "flags" "sz")]
+  )
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=R03")
+	(sign_extend:SI (match_operand:HI 1 "r0123_operand" "0")))]
+  ""
+  "*
+   if (REGNO(operands[0]) == 0) return \"exts.w\t%1\";
+   else return \"mov.w r1,r3 | sha.w #-8,r3 | sha.w #-7,r3\";"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=R03")
+	(sign_extend:PSI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "*
+   if (REGNO(operands[0]) == 0) return \"exts.w\t%1\";
+   else return \"mov.w r1,r3 | sha.w #-8,r3 | sha.w #-7,r3\";"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "extendpsisi2"
+  [(set (match_operand:SI 0 "mr_operand" "=R03Sd*Rmm")
+	(sign_extend:SI (match_operand:PSI 1 "mr_operand" "0")))]
+  ""
+  "; expand psi %1 to si %0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "zero_extendpsisi2"
+  [(set (match_operand:SI 0 "mr_operand" "=R03Sd*Rmm")
+	(zero_extend:SI (match_operand:PSI 1 "mr_operand" "0")))]
+  ""
+  "; expand psi %1 to si %0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "zero_extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=Raa")
+	(truncate:PSI (zero_extend:SI (match_operand:HI 1 "register_operand" "R03"))))]
+  ""
+  "mov.w\t%1,%0"
+  [(set_attr "flags" "sz")]
+  )
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand" "=RsiSd")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")))]
+  ""
+  "mov.w\t#0,%H0"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand" "=?Rhl,RhiSd*Rmm")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+  ""
+  "@
+   mov.b\t#0,%H0
+   and.w\t#255,%0"
+  [(set_attr "flags" "x,x")]
+  )
+
+(define_insn "truncsipsi2_16"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "=RsiRadSd*Rmm,Raa,Rcr,RsiSd*Rmm")
+	(truncate:PSI (match_operand:SI 1 "nonimmediate_operand" "0,RsiSd*Rmm,RsiSd*Rmm,Rcr")))]
+  "TARGET_A16"
+  "@
+   ; no-op trunc si %1 to psi %0
+   #
+   ldc\t%1,%0
+   stc\t%1,%0"
+  [(set_attr "flags" "n,*,n,n")]
+  )
+
+(define_insn "trunchiqi2"
+  [(set (match_operand:QI 0 "m32c_nonimmediate_operand" "=RqiRmmSd")
+	(truncate:QI (match_operand:HI 1 "mra_qi_operand" "0")))]
+  ""
+  "; no-op trunc hi %1 to qi %0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "truncsipsi2_24"
+  [(set (match_operand:PSI 0              "m32c_nonimmediate_operand" "=RsiSd*Rmm,Raa,!Rcl,RsiSd*Rmm")
+	(truncate:PSI (match_operand:SI 1 "m32c_nonimmediate_operand" "0,RsiSd*Rmm,RsiSd*Rmm,!Rcl")))]
+  "TARGET_A24"
+  "@
+   ; no-op trunc si %1 to psi %0
+   mov.l\t%1,%0
+   ldc\t%1,%0
+   stc\t%1,%0"
+  [(set_attr "flags" "n,sz,n,n")]
+  )
+
+(define_expand "truncsipsi2"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "=RsiRadSd*Rmm,Raa,Rcr,RsiSd*Rmm")
+	(truncate:PSI (match_operand:SI 1 "m32c_nonimmediate_operand" "0,RsiSd*Rmm,RsiSd*Rmm,Rcr")))]
+  ""
+  ""
+  )
+
+(define_expand "reload_inqi"
+  [(set (match_operand:QI 2 "" "=&Rqi")
+	(match_operand:QI 1 "" ""))
+   (set (match_operand:QI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
+
+(define_expand "reload_outqi"
+  [(set (match_operand:QI 2 "" "=&Rqi")
+	(match_operand:QI 1 "" ""))
+   (set (match_operand:QI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
+
+(define_expand "reload_inhi"
+  [(set (match_operand:HI 2 "" "=&Rhi")
+	(match_operand:HI 1 "" ""))
+   (set (match_operand:HI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
+
+(define_expand "reload_outhi"
+  [(set (match_operand:HI 2 "" "=&Rhi")
+	(match_operand:HI 1 "" ""))
+   (set (match_operand:HI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
diff --git a/gcc-4.9/gcc/config/m32c/muldiv.md b/gcc-4.9/gcc/config/m32c/muldiv.md
new file mode 100644
index 000000000..2f45272cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/muldiv.md
@@ -0,0 +1,287 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; multiply and divide
+
+; Here is the pattern for the const_int.
+(define_insn "mulqihi3_c"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "mra_operand" "%0,0"))
+                 (match_operand 2 "immediate_operand" "i,i")))]
+  ""
+  "mul.b\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+; Here is the pattern for registers and such.
+(define_insn "mulqihi3_r"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (sign_extend:HI (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm"))))]
+  ""
+  "mul.b\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+; Don't try to sign_extend a const_int.  Same for all other multiplies.
+(define_expand "mulqihi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_SIGN_EXTEND (HImode, operands[2]); }"
+)
+
+(define_insn "umulqihi3_c"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "mra_operand" "%0,0"))
+                 (match_operand 2 "immediate_operand" "i,i")))]
+  ""
+  "mulu.b\t%U2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "umulqihi3_r"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (zero_extend:HI (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm"))))]
+  ""
+  "mulu.b\t%U2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "umulqihi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_ZERO_EXTEND (HImode, operands[2]); }"
+)
+
+(define_insn "mulhisi3_c"
+  [(set (match_operand:SI 0 "ra_operand" "=Rsi")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "mra_operand" "%0"))
+                 (match_operand:HI 2 "immediate_operand" "i")))]
+  ""
+  "mul.w\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "mulhisi3_r"
+  [(set (match_operand:SI 0 "mra_operand" "=Rsi,Rsi")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "mra_operand" "%0,0"))
+                 (sign_extend:SI (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm"))))]
+  ""
+  "mul.w\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "mra_operand" "%0,0,0,0"))
+                 (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm,RhiSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_SIGN_EXTEND (SImode, operands[2]); }"
+)
+
+(define_insn "umulhisi3_c"
+  [(set (match_operand:SI 0 "ra_operand" "=Rsi")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "mra_operand" "%0"))
+                 (match_operand 2 "immediate_operand" "i")))]
+  ""
+  "mulu.w\t%u2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "umulhisi3_r"
+  [(set (match_operand:SI 0 "mra_operand" "=Rsi,Rsi")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "mra_operand" "%0,0"))
+                 (zero_extend:SI (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm"))))]
+  ""
+  "mulu.w\t%u2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "mra_operand" "%0,0,0,0"))
+                 (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm,RhiSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_ZERO_EXTEND (SImode, operands[2]); }"
+)
+
+
+; GCC expects to be able to multiply pointer-sized integers too, but
+; fortunately it only multiplies by powers of two, although sometimes
+; they're negative.
+(define_insn "mulpsi3_op"
+  [(set (match_operand:PSI 0 "mra_operand" "=RsiSd")
+	(mult:PSI (match_operand:PSI 1 "mra_operand" "%0")
+		  (match_operand 2 "m32c_psi_scale" "Ilb")))]
+  "TARGET_A24"
+  "shl.l\t%b2,%0"
+  [(set_attr "flags" "szc")]
+  )
+
+(define_expand "mulpsi3"
+  [(set (match_operand:PSI 0 "mra_operand" "=RsiSd")
+	(mult:PSI (match_operand:PSI 1 "mra_operand" "%0")
+		  (match_operand 2 "m32c_psi_scale" "Ilb")))]
+  "TARGET_A24"
+  "if (GET_CODE (operands[2]) != CONST_INT
+       || ! m32c_psi_scale (operands[2], PSImode))
+     {
+       m32c_expand_neg_mulpsi3 (operands);
+       DONE;
+     }"
+  )
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "r0123_operand" "=R02,R02")
+        (mult:SI (match_operand:SI 1 "r0123_operand" "%0,0")
+                 (match_operand:SI 2 "mra_operand" "RsiSd,?Rmm")))]
+  "TARGET_M32C"
+  "mul.l\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "divmodqi4"
+  [(set (match_dup 4)
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:QI 0 "register_operand" "=R0w,R0w")
+		   (div:QI (match_dup 4)
+			   (match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+	      (set (match_operand:QI 3 "register_operand" "=&R0h,&R0h")
+		   (mod:QI (match_dup 4) (match_dup 2)))
+	      ])]
+  "0"
+  "operands[4] = gen_reg_rtx (HImode);"
+  )
+
+(define_insn "divmodqi4_n"
+  [(set (match_operand:QI 0 "register_operand" "=R0l,R0l")
+	(div:QI (match_operand:HI 1 "register_operand" "R0w,R0w")
+		(match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+   (set (match_operand:QI 3 "register_operand" "=R0h,R0h")
+	(mod:QI (match_dup 1) (match_dup 2)))
+   ]
+  "0"
+  "div.b\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_expand "udivmodqi4"
+  [(set (match_dup 4)
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:QI 0 "register_operand" "=R0l,R0l")
+		   (udiv:QI (match_dup 4)
+			   (match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+	      (set (match_operand:QI 3 "register_operand" "=&R0h,&R0h")
+		   (umod:QI (match_dup 4) (match_dup 2)))
+	      ])]
+  "0"
+  "operands[4] = gen_reg_rtx (HImode);"
+  )
+
+(define_insn "udivmodqi4_n"
+  [(set (match_operand:QI 0 "register_operand" "=R0l,R0l")
+	(udiv:QI (match_operand:HI 1 "register_operand" "R0w,R0w")
+		(match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+   (set (match_operand:QI 3 "register_operand" "=R0h,R0h")
+	(umod:QI (match_dup 1) (match_dup 2)))
+   ]
+  "0"
+  "divu.b\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_expand "divmodhi4"
+  [(set (match_dup 4)
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=R0w,R0w")
+		   (div:HI (match_dup 4)
+			   (match_operand:HI 2 "general_operand" "iRhiSd,?Rmm")))
+	      (set (match_operand:HI 3 "register_operand" "=R2w,R2w")
+		   (mod:HI (match_dup 4) (match_dup 2)))
+	      ])]
+  ""
+  "operands[4] = gen_reg_rtx (SImode);"
+  )
+
+(define_insn "divmodhi4_n"
+  [(set (match_operand:HI 0 "m32c_r0_operand" "=R0w,R0w")
+	(div:HI (match_operand:SI 1 "m32c_r0_operand" "R02,R02")
+		(match_operand:HI 2 "m32c_notr2_operand" "iR1wR3wRaaSd,?Rmm")))
+   (set (match_operand:HI 3 "m32c_r2_operand" "=R2w,R2w")
+	(mod:HI (match_dup 1) (match_dup 2)))
+   ]
+  ""
+  "div.w\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_expand "udivmodhi4"
+  [(set (match_dup 4)
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=R0w,R0w")
+		   (udiv:HI (match_dup 4)
+			   (match_operand:HI 2 "general_operand" "iRhiSd,?Rmm")))
+	      (set (match_operand:HI 3 "register_operand" "=R2w,R2w")
+		   (umod:HI (match_dup 4) (match_dup 2)))
+	      ])]
+  ""
+  "operands[4] = gen_reg_rtx (SImode);"
+  )
+
+(define_insn "udivmodhi4_n"
+  [(set (match_operand:HI 0 "m32c_r0_operand" "=R0w,R0w")
+	(udiv:HI (match_operand:SI 1 "m32c_r0_operand" "R02,R02")
+		(match_operand:HI 2 "m32c_notr2_operand" "iR1wR3wRaaSd,?Rmm")))
+   (set (match_operand:HI 3 "m32c_r2_operand" "=R2w,R2w")
+	(umod:HI (match_dup 1) (match_dup 2)))
+   ]
+  ""
+  "divu.w\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "r0123_operand" "=R02,R02")
+        (div:SI (match_operand:SI 1 "r0123_operand" "0,0")
+                (match_operand:SI 2 "mra_operand" "RsiSd,?Rmm")))]
+  "TARGET_M32C"
+  "div.l\t%2"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "r0123_operand" "=R02,R02")
+        (udiv:SI (match_operand:SI 1 "r0123_operand" "0,0")
+                 (match_operand:SI 2 "mra_operand" "RsiSd,?Rmm")))]
+  "TARGET_M32C"
+  "divu.l\t%2"
+  [(set_attr "flags" "o")]
+)
+
+
diff --git a/gcc-4.9/gcc/config/m32c/predicates.md b/gcc-4.9/gcc/config/m32c/predicates.md
new file mode 100644
index 000000000..cdee4eb7c
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/predicates.md
@@ -0,0 +1,294 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Predicates
+
+; TRUE for any valid operand.  We do this because general_operand
+; refuses to match volatile memory refs.
+
+(define_predicate "m32c_any_operand"
+  (ior (match_operand 0 "general_operand")
+       (match_code "mem,const_int,const_double"))
+  {
+    return ! m32c_illegal_subreg_p (op);
+  }
+)
+
+; Likewise for nonimmediate_operand.
+
+(define_predicate "m32c_nonimmediate_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_code "mem"))
+  {
+    return ! m32c_illegal_subreg_p (op);
+  }
+)
+
+; TRUE if the operand is a pseudo-register.
+(define_predicate "m32c_pseudo"
+  (ior (and (match_code "reg")
+	    (match_test "REGNO(op) >= FIRST_PSEUDO_REGISTER"))
+       (and (match_code "subreg")
+	    (and (match_test "GET_CODE (XEXP (op, 0)) == REG")
+		 (match_test "REGNO(XEXP (op,0)) >= FIRST_PSEUDO_REGISTER")))))
+       
+
+; Returning true causes many predicates to NOT match.  We allow
+; subregs for type changing, but not for size changing.
+(define_predicate "m32c_wide_subreg"
+  (and (match_code "subreg")
+       (not (match_operand 0 "m32c_pseudo")))
+  {
+    unsigned int sizeo = GET_MODE_SIZE (GET_MODE (op));
+    unsigned int sizei = GET_MODE_SIZE (GET_MODE (XEXP (op, 0)));
+    sizeo = (sizeo + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+    sizei = (sizei + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+    return sizeo != sizei;
+  })
+
+; TRUE for r0 through r3, or a pseudo that reload could put in r0
+; through r3 (likewise for the next couple too)
+(define_predicate "r0123_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) <= R3_REGNO"))))
+
+; TRUE for r0
+(define_predicate "m32c_r0_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R0_REGNO"))))
+
+; TRUE for r1
+(define_predicate "m32c_r1_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R1_REGNO"))))
+
+; TRUE for HL_CLASS (r0 or r1)
+(define_predicate "m32c_hl_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R0_REGNO || REGNO(op) == R1_REGNO"))))
+
+
+; TRUE for r2
+(define_predicate "m32c_r2_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R2_REGNO"))))
+
+; TRUE for r3
+(define_predicate "m32c_r3_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R3_REGNO"))))
+
+; TRUE for any general operand except r2.
+(define_predicate "m32c_notr2_operand"
+  (and (match_operand 0 "general_operand")
+       (ior (not (match_code "reg"))
+	    (match_test "REGNO(op) != R2_REGNO"))))
+
+; TRUE for the stack pointer.
+(define_predicate "m32c_sp_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == SP_REGNO"))))
+
+; TRUE for control registers.
+(define_predicate "cr_operand"
+  (match_code "reg")
+  "return (REGNO (op) >= SB_REGNO
+           && REGNO (op) <= FLG_REGNO);")
+
+; TRUE for $a0 or $a1.
+(define_predicate "a_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == A0_REGNO || REGNO (op) == A1_REGNO")))
+
+; TRUE for $a0 or $a1 or a pseudo
+(define_predicate "ap_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == A0_REGNO || REGNO (op) == A1_REGNO"))))
+
+; TRUE for r0 through r3, or a0 or a1.
+(define_predicate "ra_operand"
+  (and (and (match_operand 0 "register_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+; Likewise, plus TRUE for memory references.
+(define_predicate "mra_operand"
+  (and (and (match_operand 0 "m32c_nonimmediate_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+; Likewise, plus TRUE for subregs.
+(define_predicate "mras_operand"
+  (and (match_operand 0 "nonimmediate_operand" "")
+       (not (match_operand 1 "cr_operand" ""))))
+
+; As above, but no push/pop operations
+(define_predicate "mra_nopp_operand"
+  (match_operand 0 "mra_operand" "")
+{
+  if (GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == PRE_DEC
+	  || (GET_CODE (XEXP (op, 0)) == POST_INC)))
+    return 0;
+  return 1;
+})
+
+; TRUE for memory, r0..r3, a0..a1, or immediates.
+(define_predicate "mrai_operand"
+  (and (and (match_operand 0 "m32c_any_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+; Likewise, plus true for subregs.
+(define_predicate "mrasi_operand"
+  (and (match_operand 0 "general_operand" "")
+       (not (match_operand 1 "cr_operand" ""))))
+
+; TRUE for r0..r3 or memory.
+(define_predicate "mr_operand"
+  (and (match_operand 0 "mra_operand" "")
+       (not (match_operand 1 "a_operand" ""))))
+
+; TRUE for a0..a1 or memory.
+(define_predicate "ma_operand"
+  (ior (match_operand 0 "a_operand" "")
+       (match_operand 1 "memory_operand" "")))
+
+; TRUE for memory operands that are not indexed
+(define_predicate "memsym_operand"
+  (and (match_operand 0 "memory_operand" "")
+       (match_test "satisfies_constraint_Si (op)")))
+
+; TRUE for memory operands with small integer addresses
+(define_predicate "memimmed_operand"
+  (and (match_operand 0 "memory_operand" "")
+       (match_test "satisfies_constraint_Sp (op)")))
+
+; TRUE for r1h.  This is complicated since r1h isn't a register GCC
+; normally knows about.
+(define_predicate "r1h_operand"
+  (match_code "zero_extract")
+  {
+    rtx reg = XEXP (op, 0);
+    rtx size = XEXP (op, 1);
+    rtx pos = XEXP (op, 2);
+    return (GET_CODE (reg) == REG
+	    && REGNO (reg) == R1_REGNO
+	    && GET_CODE (size) == CONST_INT
+	    && INTVAL (size) == 8
+	    && GET_CODE (pos) == CONST_INT
+	    && INTVAL (pos) == 8);
+  })
+
+; TRUE if we can shift by this amount.  Constant shift counts have a
+; limited range.
+(define_predicate "shiftcount_operand"
+  (ior (match_operand 0 "mra_operand" "")
+       (and (match_operand 2 "const_int_operand" "")
+	    (match_test "-8 <= INTVAL (op) && INTVAL (op) && INTVAL (op) <= 8"))))
+(define_predicate "longshiftcount_operand"
+  (ior (match_operand 0 "mra_operand" "")
+       (and (match_operand 2 "const_int_operand" "")
+	    (match_test "-32 <= INTVAL (op) && INTVAL (op) && INTVAL (op) <= 32"))))
+
+; TRUE for r0..r3, a0..a1, or sp.
+(define_predicate "mra_or_sp_operand"
+  (and (ior (match_operand 0 "mra_operand")
+	    (match_operand 1 "m32c_sp_operand"))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+
+; TRUE for r2 or r3.
+(define_predicate "m32c_r2r3_operand"
+  (ior (and (match_code "reg")
+	    (ior (match_test "REGNO(op) == R2_REGNO")
+		 (match_test "REGNO(op) == R3_REGNO")))
+       (and (match_code "subreg")
+	    (match_test "GET_CODE (XEXP (op, 0)) == REG && (REGNO (XEXP (op, 0)) == R2_REGNO || REGNO (XEXP (op, 0)) == R3_REGNO)"))))
+
+; Likewise, plus TRUE for a0..a1.
+(define_predicate "m32c_r2r3a_operand"
+  (ior (match_operand 0 "m32c_r2r3_operand" "")
+       (match_operand 0 "a_operand" "")))
+
+; These two are only for movqi - no subreg limit
+(define_predicate "mra_qi_operand"
+  (and (and (match_operand 0 "m32c_nonimmediate_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 1 "m32c_r2r3a_operand" ""))))
+
+(define_predicate "mrai_qi_operand"
+  (and (and (match_operand 0 "m32c_any_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 1 "m32c_r2r3a_operand" ""))))
+
+(define_predicate "a_qi_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (match_operand 1 "a_operand" "")))
+
+; TRUE for comparisons we support.
+(define_predicate "m32c_cmp_operator"
+  (match_code "eq,ne,gt,gtu,lt,ltu,ge,geu,le,leu"))
+
+(define_predicate "m32c_eqne_operator"
+  (match_code "eq,ne"))
+
+; TRUE for mem0
+(define_predicate "m32c_mem0_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == MEM0_REGNO"))))
+
+; TRUE for things the call patterns can return.
+(define_predicate "m32c_return_operand"
+  (ior (match_operand 0 "m32c_r0_operand")
+       (ior (match_operand 0 "m32c_mem0_operand")
+	    (match_code "parallel"))))
+
+; TRUE for constants we can multiply pointers by
+(define_predicate "m32c_psi_scale"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Ilb (op)")))
+
+; TRUE for one bit set (bit) or clear (mask) out of N bits.
+
+(define_predicate "m32c_1bit8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Ilb (op)")))
+
+(define_predicate "m32c_1bit16_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Ilw (op)")))
+
+(define_predicate "m32c_1mask8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_ImB (op)")))
+
+(define_predicate "m32c_1mask16_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Imw (op)")))
diff --git a/gcc-4.9/gcc/config/m32c/prologue.md b/gcc-4.9/gcc/config/m32c/prologue.md
new file mode 100644
index 000000000..0959e2ba9
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/prologue.md
@@ -0,0 +1,201 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Prologue and epilogue patterns
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "m32c_emit_prologue(); DONE;"
+  )
+
+; For the next two, operands[0] is the amount of stack space we want
+; to reserve.
+
+; We assume dwarf2out will process each set in sequence.
+(define_insn "prologue_enter_16"
+  [(set (mem:HI (plus:HI (reg:HI SP_REGNO) (const_int -2)))
+	(reg:HI FB_REGNO))
+   (set (reg:HI FB_REGNO)
+	(plus:HI (reg:HI SP_REGNO) (const_int -2)))
+   (set (reg:HI SP_REGNO)
+	(minus:HI (reg:HI SP_REGNO)
+	           (match_operand 0 "const_int_operand" "i")))
+   ]
+  "TARGET_A16"
+  {
+    /* This is due to binutils bug gas/4659.  */
+    if (INTVAL (operands[0]) == 2)
+      return "enter\t#0";
+    return "enter\t%0-2";
+  }
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "prologue_enter_24"
+  [(set (mem:SI (plus:PSI (reg:PSI SP_REGNO) (const_int -4)))
+	(reg:SI FB_REGNO))
+   (set (reg:PSI FB_REGNO)
+	(plus:PSI (reg:PSI SP_REGNO) (const_int -4)))
+   (set (reg:PSI SP_REGNO)
+	(minus:PSI (reg:PSI SP_REGNO)
+	           (match_operand 0 "const_int_operand" "i")))
+   ]
+  "TARGET_A24"
+  {
+    /* This is due to binutils bug gas/4659.  */
+    if (INTVAL (operands[0]) == 4)
+      return "enter\t#0";
+    return "enter\t%0-4";
+  }
+  [(set_attr "flags" "x")]
+  )
+
+; Just a comment, for debugging the assembler output.
+(define_insn "prologue_end"
+  [(unspec_volatile [(const_int 0)] UNS_PROLOGUE_END)]
+  ""
+  "; end of prologue"
+  [(set_attr "flags" "n")]
+  )
+
+
+
+(define_expand "epilogue"
+  [(const_int 1)]
+  ""
+  "m32c_emit_epilogue(); DONE;"
+  )
+
+(define_expand "eh_return"
+  [(match_operand:PSI 0 "" "")]
+  ""
+  "m32c_emit_eh_epilogue(operands[0]);
+   emit_barrier ();
+   DONE;"
+  )
+
+(define_insn "eh_epilogue"
+  [(set (pc)
+	(unspec_volatile [(match_operand 0 "m32c_r1_operand" "")
+			  (match_operand 1 "m32c_r0_operand" "")
+			  ] UNS_EH_EPILOGUE))
+   (return)]
+  ""
+  "jmp.a\t__m32c_eh_return"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_exitd_16"
+  [(set (reg:HI SP_REGNO)
+	(plus:HI (reg:HI FB_REGNO)
+	      (const_int 2)))
+   (set (reg:HI FB_REGNO)
+	(mem:HI (reg:HI FB_REGNO)))
+   (return)
+   ]
+  "TARGET_A16"
+  "exitd"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_reit_16"
+  [(set (reg:HI SP_REGNO)
+	(plus:HI (reg:HI SP_REGNO)
+	      (const_int 4)))
+   (return)
+   ]
+  "TARGET_A16"
+  "reit"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_exitd_24"
+  [(set (reg:PSI SP_REGNO)
+	(plus:PSI (reg:PSI FB_REGNO)
+	      (const_int 4)))
+   (set (reg:PSI FB_REGNO)
+	(mem:PSI (reg:PSI FB_REGNO)))
+   (return)
+   ]
+  "TARGET_A24"
+  "exitd"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_reit_24"
+  [(set (reg:PSI SP_REGNO)
+	(plus:PSI (reg:PSI SP_REGNO)
+	      (const_int 6)))
+   (return)
+   ]
+  "TARGET_A24"
+  "reit"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_freit"
+  [(unspec [(const_int 0)] UNS_FREIT)
+   (return)
+   ]
+  ""
+  "freit"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_rts"
+  [(return)
+   ]
+  ""
+  "rts"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_start"
+  [(unspec_volatile [(const_int 0)] UNS_EPILOGUE_START)]
+  ""
+  "; start of epilogue"
+  [(set_attr "flags" "n")]
+  )
+
+
+; These are used by the prologue/epilogue code.
+
+(define_insn "pushm"
+  [(unspec [(match_operand 0 "const_int_operand" "i")] UNS_PUSHM)]
+  ""
+  "pushm\t%p0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "popm"
+  [(unspec [(match_operand 0 "const_int_operand" "i")] UNS_POPM)]
+  ""
+  "popm\t%p0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "fset_b"
+  [(unspec [(const_int 0)] UNS_FSETB)]
+  ""
+  "fset\tB"
+  [(set_attr "flags" "n")]
+  )
+
diff --git a/gcc-4.9/gcc/config/m32c/rtems.h b/gcc-4.9/gcc/config/m32c/rtems.h
new file mode 100644
index 000000000..b6b4aaca5
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/rtems.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a M32C using ELF.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc-4.9/gcc/config/m32c/shift.md b/gcc-4.9/gcc/config/m32c/shift.md
new file mode 100644
index 000000000..83ad29f61
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/shift.md
@@ -0,0 +1,351 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; bit shifting
+
+; Shifts are unusual for m32c.  We only support shifting in one
+; "direction" but the shift count is signed.  Also, immediate shift
+; counts have a limited range, and variable shift counts have to be in
+; $r1h which GCC normally doesn't even know about.
+
+; Other than compensating for the above, the patterns below are pretty
+; straightforward.
+
+(define_insn "ashlqi3_i"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd*Rmm,RqiSd*Rmm")
+	(ashift:QI (match_operand:QI 1 "mra_operand" "0,0")
+		   (match_operand:QI 2 "mrai_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.b\t%2,%0
+   mov.b\t%2,r1h\n\tsha.b\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrqi3_i"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd*Rmm,RqiSd*Rmm")
+	(ashiftrt:QI (match_operand:QI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.b\t%2,%0
+   mov.b\t%2,r1h\n\tsha.b\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrqi3_i"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd*Rmm,RqiSd*Rmm")
+	(lshiftrt:QI (match_operand:QI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   shl.b\t%2,%0
+   mov.b\t%2,r1h\n\tshl.b\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+(define_expand "ashlqi3"
+  [(parallel [(set (match_operand:QI 0 "mra_operand" "")
+	(ashift:QI (match_operand:QI 1 "mra_operand" "")
+		   (match_operand:QI 2 "general_operand" "")))
+   (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrqi3"
+  [(parallel [(set (match_operand:QI 0 "mra_operand" "")
+	(ashiftrt:QI (match_operand:QI 1 "mra_operand" "")
+		     (neg:QI (match_operand:QI 2 "general_operand" ""))))
+   (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrqi3"
+  [(parallel [(set (match_operand:QI 0 "mra_operand" "")
+		   (lshiftrt:QI (match_operand:QI 1 "mra_operand" "")
+				(neg:QI (match_operand:QI 2 "general_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+(define_insn "ashlhi3_i"
+  [(set (match_operand:HI 0 "mra_operand" "=SdRhi*Rmm,SdRhi*Rmm")
+	(ashift:HI (match_operand:HI 1 "mra_operand" "0,0")
+		   (match_operand:QI 2 "mrai_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.w\t%2,%0
+   mov.b\t%2,r1h\n\tsha.w\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrhi3_i"
+  [(set (match_operand:HI 0 "mra_operand" "=SdRhi*Rmm,SdRhi*Rmm")
+	(ashiftrt:HI (match_operand:HI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.w\t%2,%0
+   mov.b\t%2,r1h\n\tsha.w\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrhi3_i"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm,RhiSd*Rmm")
+	(lshiftrt:HI (match_operand:HI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   shl.w\t%2,%0
+   mov.b\t%2,r1h\n\tshl.w\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+(define_expand "ashlhi3"
+  [(parallel [(set (match_operand:HI 0 "mra_operand" "")
+		   (ashift:HI (match_operand:HI 1 "mra_operand" "")
+			      (match_operand:QI 2 "general_operand" "")))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrhi3"
+  [(parallel [(set (match_operand:HI 0 "mra_operand" "")
+		   (ashiftrt:HI (match_operand:HI 1 "mra_operand" "")
+				(neg:QI (match_operand:QI 2 "general_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrhi3"
+  [(parallel [(set (match_operand:HI 0 "mra_operand" "")
+		   (lshiftrt:HI (match_operand:HI 1 "mra_operand" "")
+				(neg:QI (match_operand:QI 2 "general_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
+
+
+
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+(define_insn "ashlpsi3_i"
+  [(set (match_operand:PSI 0 "mra_operand" "=R02RaaSd*Rmm,R02RaaSd*Rmm")
+	(ashift:PSI (match_operand:PSI 1 "mra_operand" "0,0")
+		    (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrpsi3_i"
+  [(set (match_operand:PSI 0 "mra_operand" "=R02RaaSd*Rmm,R02RaaSd*Rmm")
+	(ashiftrt:PSI (match_operand:PSI 1 "mra_operand" "0,0")
+		      (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrpsi3_i"
+  [(set (match_operand:PSI 0 "mra_operand" "=R02RaaSd,??Rmm")
+	(lshiftrt:PSI (match_operand:PSI 1 "mra_operand" "0,0")
+		      (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   shl.l\t%2,%0
+   mov.b\t%2,r1h\n\tshl.l\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+(define_expand "ashlpsi3"
+  [(parallel [(set (match_operand:PSI 0 "mra_operand" "")
+		   (ashift:PSI (match_operand:PSI 1 "mra_operand" "")
+			       (match_operand:QI 2 "shiftcount_operand" "")))
+	      (clobber (match_scratch:HI 3 ""))])]
+  "TARGET_A24"
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrpsi3"
+  [(parallel [(set (match_operand:PSI 0 "mra_operand" "")
+		   (ashiftrt:PSI (match_operand:PSI 1 "mra_operand" "")
+				 (neg:QI (match_operand:QI 2 "shiftcount_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  "TARGET_A24"
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrpsi3"
+  [(parallel [(set (match_operand:PSI 0 "mra_operand" "")
+		   (lshiftrt:PSI (match_operand:PSI 1 "mra_operand" "")
+				 (neg:QI (match_operand:QI 2 "shiftcount_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  "TARGET_A24"
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+; The m16c has a maximum shift count of -16..16, even when in a
+; register.  It's optimal to use multiple shifts of -8..8 rather than
+; loading larger constants into R1H multiple time.  The m32c can shift
+; -32..32 either via immediates or in registers.  Hence, separate
+; patterns.
+
+
+(define_insn "ashlsi3_16"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashift:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		   (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A16"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrsi3_16"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A16"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrsi3_16"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(lshiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A16"
+  "@
+   shl.l\t%2,%0
+   mov.b\t%2,r1h\n\tshl.l\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+
+(define_insn "ashlsi3_24"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashift:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		   (match_operand:QI 2 "longshiftcount_operand" "In6,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  )
+
+(define_insn "ashrsi3_24"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "longshiftcount_operand" "In6,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  )
+
+(define_insn "lshrsi3_24"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(lshiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "longshiftcount_operand" "In6,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   shl.l\t%2,%0
+   mov.b\t%2,r1h\n\tshl.l\tr1h,%0"
+  )
+
+
+
+
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI 0 "r0123_operand" "")
+		   (ashift:SI (match_operand:SI 1 "r0123_operand" "")
+			      (match_operand:QI 2 "mrai_operand" "")))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "r0123_operand" "")
+		   (ashiftrt:SI (match_operand:SI 1 "r0123_operand" "")
+				(neg:QI (match_operand:QI 2 "mrai_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI 0 "r0123_operand" "")
+		   (lshiftrt:SI (match_operand:SI 1 "r0123_operand" "")
+				(neg:QI (match_operand:QI 2 "mrai_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
diff --git a/gcc-4.9/gcc/config/m32c/t-m32c b/gcc-4.9/gcc/config/m32c/t-m32c
new file mode 100644
index 000000000..36e6a6312
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32c/t-m32c
@@ -0,0 +1,42 @@
+# Target Makefile Fragment for R8C/M16C/M32C
+# Copyright (C) 2005-2014 Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# target-specific files
+
+md_file = md
+
+MD_FILES = m32c constraints predicates addsub bitops blkmov cond jump minmax mov muldiv prologue shift
+
+# Doing it this way lets the gen* programs report the right line numbers.
+
+md : $(MD_FILES:%=$(srcdir)/config/m32c/%.md) $(srcdir)/config/m32c/t-m32c
+	for md in $(MD_FILES); do \
+	  echo "(include \"$(srcdir)/config/m32c/$$md.md\")"; \
+	done > md
+
+m32c-pragma.o: $(srcdir)/config/m32c/m32c-pragma.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+# We support four CPU series, but R8C and M16C share one multilib, and
+# M32C and M32CM share another.
+
+MULTILIB_OPTIONS = mcpu=m32cm
+MULTILIB_DIRNAMES = m32cm
+MULTILIB_MATCHES = mcpu?m32cm=mcpu?m32c mcpu?r8c=mcpu?m16c
diff --git a/gcc-4.9/gcc/config/m32r/constraints.md b/gcc-4.9/gcc/config/m32r/constraints.md
new file mode 100644
index 000000000..79ff4b521
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/constraints.md
@@ -0,0 +1,147 @@
+;; Constraint definitions for Renesas M32R cpu for GNU C compiler
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; The letters I, J, K, L, M, N, O, P in a register constraint string
+;; can be used to stand for particular ranges of immediate operands.
+;; The letters Q, R, S, T, U are used to segregate specific types of
+;; operands, usually memory references, for the target machine.
+;;
+;; I is used for 8-bit signed immediates.
+;; J is used for 16-bit signed immediates.
+;; K is used for 16-bit unsigned immediates.
+;; L is used for 16-bit immediates left shifted by 16 (sign ???).
+;; M is used for 24-bit unsigned immediates.
+;; N is used for 8-bit signed immediates for compares
+;;   (values in the range -127 to +128).
+;; O is used for 5-bit unsigned immediates (shift count).
+;; P is used for 16-bit signed immediates for compares
+;;     (values in the range -32767 to +32768).
+;;
+;; Q is for symbolic addresses loadable with ld24.
+;; R is for symbolic addresses when ld24 can't be used.
+;; S is for stores with pre {inc,dec}rement
+;; T is for indirect of a pointer.
+;; U is for loads with post increment.
+;; W is used for an immediate value of 0.
+;;
+;; Register constraints
+
+(define_register_constraint "a" "ACCUM_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CARRY_REG"
+  "@internal")
+
+;; Integer constraints
+(define_constraint "I"
+  "8-bit signed immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x80 && ival <= 0x7f")))
+
+(define_constraint "J"
+  "16-bit signed immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x8000 && ival <= 0x7fff")))
+
+(define_constraint "K"
+  "16-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 0x0000ffff")))
+
+(define_constraint "L"
+  "16-bit signed immediate left shifted by 16."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0")
+       (match_test "(ival >> 16) >= -0x8000 && (ival >> 16) <= 0x7fff")))
+
+(define_constraint "M"
+  "24-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 0x00ffffff")))
+
+(define_constraint "N"
+  "8-bit signed immediate for compare."
+  (and (match_code "const_int")
+       (match_test "ival >= -127 && ival <= 128")))
+
+(define_constraint "O"
+  "5-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 32")))
+
+(define_constraint "P"
+  "16-bit signed immediate for compare."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x7fff && ival <= 0x8000")))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Double constant loadable with 2 ldi insns."
+  (and (match_code "const_double")
+       (match_test "easy_di_const (op)")))
+
+(define_constraint "H"
+  "Double constant loadable with movdf."
+  (and (match_code "const_double")
+       (match_test "easy_df_const (op)")))
+
+;; Extra constraints
+(define_constraint "Q"
+  "A symbolic address loadable when ld24."
+  (ior (and (match_test "TARGET_ADDR24")
+	    (match_test "GET_CODE (op) == LABEL_REF"))
+       (match_test "addr24_operand (op, VOIDmode)")))
+
+(define_constraint "R"
+  "A symbolic address loadable with ld24 can't be used."
+  (ior (and (match_test "TARGET_ADDR32")
+	    (match_test "GET_CODE (op) == LABEL_REF"))
+       (match_test "addr32_operand (op, VOIDmode)")))
+
+(define_constraint "S"
+  "A store with pre {inc,dec}rement."
+  (and (match_code "mem")
+       (match_test "mode == SImode || mode == SFmode")
+       (match_code "pre_inc,pre_dec" "0")
+       (match_code "reg" "00")
+       (match_test "GPR_P (REGNO (XEXP (XEXP (op, 0), 0)))
+		    || REGNO (XEXP (XEXP (op, 0), 0)) == ARG_POINTER_REGNUM
+		    || ! HARD_REGISTER_P (XEXP (XEXP (op, 0), 0))")))
+
+(define_constraint "T"
+  "An indirect of a pointer."
+  (and (match_code "mem")
+       (match_test "memreg_operand (op, GET_MODE (op))")))
+
+(define_constraint "U"
+  "A load with post increment."
+  (and (match_code "mem")
+       (match_test "mode == SImode || mode == SFmode")
+       (match_code "post_inc" "0")
+       (match_code "reg" "00")
+       (match_test "GPR_P (REGNO (XEXP (XEXP (op, 0), 0)))
+		    || REGNO (XEXP (XEXP (op, 0), 0)) == ARG_POINTER_REGNUM
+		    || ! HARD_REGISTER_P (XEXP (XEXP (op, 0), 0))")))
+
+(define_constraint "W"
+  "zero immediate."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
diff --git a/gcc-4.9/gcc/config/m32r/linux.h b/gcc-4.9/gcc/config/m32r/linux.h
new file mode 100644
index 000000000..698086b3f
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/linux.h
@@ -0,0 +1,91 @@
+/* Definitions for Renesas M32R running Linux-based GNU systems using ELF.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+ 
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+  
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+   
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+    
+/* Provide a LINK_SPEC appropriate for Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef	LINK_SPEC
+#if TARGET_LITTLE_ENDIAN
+#define LINK_SPEC "%(link_cpu) -m m32rlelf_linux %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+#else
+#define LINK_SPEC "%(link_cpu) -m m32relf_linux %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+#endif
+
+#undef	LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared: -lc} \
+   %{!shared: \
+       %{mieee-fp:-lieee} \
+       %{profile:-lc_p} %{!profile: -lc}}"
+
+#undef  STARTFILE_SPEC
+#if defined HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} %{!p:crt1.o%s}}}\
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+#endif
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "\
+   %{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS} \
+"
+                                                                                
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
diff --git a/gcc-4.9/gcc/config/m32r/little.h b/gcc-4.9/gcc/config/m32r/little.h
new file mode 100644
index 000000000..7a0817e09
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/little.h
@@ -0,0 +1,20 @@
+/* Definitions for Renesas little endian M32R cpu.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TARGET_LITTLE_ENDIAN 1
diff --git a/gcc-4.9/gcc/config/m32r/m32r-opts.h b/gcc-4.9/gcc/config/m32r/m32r-opts.h
new file mode 100644
index 000000000..db9886f08
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/m32r-opts.h
@@ -0,0 +1,108 @@
+/* Definitions for option handling for Renesas M32R cpu.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef M32R_OPTS_H
+#define M32R_OPTS_H
+
+/* Code Models
+
+   Code models are used to select between two choices of two separate
+   possibilities (address space size, call insn to use):
+
+   small: addresses use 24 bits, use bl to make calls
+   medium: addresses use 32 bits, use bl to make calls (*1)
+   large: addresses use 32 bits, use seth/add3/jl to make calls (*2)
+
+   The fourth is "addresses use 24 bits, use seth/add3/jl to make calls" but
+   using this one doesn't make much sense.
+
+   (*1) The linker may eventually be able to relax seth/add3 -> ld24.
+   (*2) The linker may eventually be able to relax seth/add3/jl -> bl.
+
+   Internally these are recorded as TARGET_ADDR{24,32} and
+   TARGET_CALL{26,32}.
+
+   The __model__ attribute can be used to select the code model to use when
+   accessing particular objects.  */
+
+enum m32r_model { M32R_MODEL_SMALL, M32R_MODEL_MEDIUM, M32R_MODEL_LARGE };
+
+#define TARGET_MODEL_SMALL  (m32r_model_selected == M32R_MODEL_SMALL)
+#define TARGET_MODEL_MEDIUM (m32r_model_selected == M32R_MODEL_MEDIUM)
+#define TARGET_MODEL_LARGE  (m32r_model_selected == M32R_MODEL_LARGE)
+#define TARGET_ADDR24       (m32r_model_selected == M32R_MODEL_SMALL)
+#define TARGET_ADDR32       (! TARGET_ADDR24)
+#define TARGET_CALL26       (! TARGET_CALL32)
+#define TARGET_CALL32       (m32r_model_selected == M32R_MODEL_LARGE)
+
+/* The default is the small model.  */
+#ifndef M32R_MODEL_DEFAULT
+#define M32R_MODEL_DEFAULT M32R_MODEL_SMALL
+#endif
+
+/* Small Data Area
+
+   The SDA consists of sections .sdata, .sbss, and .scommon.
+   .scommon isn't a real section, symbols in it have their section index
+   set to SHN_M32R_SCOMMON, though support for it exists in the linker script.
+
+   Two switches control the SDA:
+
+   -G NNN        - specifies the maximum size of variable to go in the SDA
+
+   -msdata=foo   - specifies how such variables are handled
+
+        -msdata=none  - small data area is disabled
+
+        -msdata=sdata - small data goes in the SDA, special code isn't
+                        generated to use it, and special relocs aren't
+                        generated
+
+        -msdata=use   - small data goes in the SDA, special code is generated
+                        to use the SDA and special relocs are generated
+
+   The SDA is not multilib'd, it isn't necessary.
+   MULTILIB_EXTRA_OPTS is set in tmake_file to -msdata=sdata so multilib'd
+   libraries have small data in .sdata/SHN_M32R_SCOMMON so programs that use
+   -msdata=use will successfully link with them (references in header files
+   will cause the compiler to emit code that refers to library objects in
+   .data).  ??? There can be a problem if the user passes a -G value greater
+   than the default and a library object in a header file is that size.
+   The default is 8 so this should be rare - if it occurs the user
+   is required to rebuild the libraries or use a smaller value for -G.  */
+
+/* Maximum size of variables that go in .sdata/.sbss.
+   The -msdata=foo switch also controls how small variables are handled.  */
+#ifndef SDATA_DEFAULT_SIZE
+#define SDATA_DEFAULT_SIZE 8
+#endif
+
+enum m32r_sdata { M32R_SDATA_NONE, M32R_SDATA_SDATA, M32R_SDATA_USE };
+
+#define TARGET_SDATA_NONE  (m32r_sdata_selected == M32R_SDATA_NONE)
+#define TARGET_SDATA_SDATA (m32r_sdata_selected == M32R_SDATA_SDATA)
+#define TARGET_SDATA_USE   (m32r_sdata_selected == M32R_SDATA_USE)
+
+/* Default is to disable the SDA
+   [for upward compatibility with previous toolchains].  */
+#ifndef M32R_SDATA_DEFAULT
+#define M32R_SDATA_DEFAULT M32R_SDATA_NONE
+#endif
+
+#endif
diff --git a/gcc-4.9/gcc/config/m32r/m32r-protos.h b/gcc-4.9/gcc/config/m32r/m32r-protos.h
new file mode 100644
index 000000000..c1b613c1b
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/m32r-protos.h
@@ -0,0 +1,62 @@
+/* Prototypes for m32r.c functions used in the md file & elsewhere.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Function prototypes that cannot exist in m32r.h due to dependency
+   complications.  */
+
+extern void   m32r_init (void);
+extern void   m32r_init_expanders (void);
+extern unsigned m32r_compute_frame_size (int);
+extern void   m32r_expand_prologue (void);
+extern void   m32r_expand_epilogue (void);
+extern int    direct_return (void);
+extern void   m32r_load_pic_register (void);
+extern enum m32r_function_type m32r_compute_function_type (tree);
+
+#ifdef RTX_CODE
+extern int    easy_di_const (rtx);
+extern int    easy_df_const (rtx);
+extern rtx    gen_compare (enum rtx_code, rtx, rtx, int);
+extern bool   gen_cond_store (enum rtx_code, rtx, rtx, rtx);
+extern rtx    gen_split_move_double (rtx *);
+extern int    m32r_address_code (rtx);
+extern void   m32r_initialize_trampoline (rtx, rtx, rtx);
+extern int    zero_and_one (rtx, rtx);
+extern char * emit_cond_move (rtx *, rtx);
+extern void   m32r_output_block_move (rtx, rtx *);
+extern int    m32r_expand_block_move (rtx *);
+extern int    m32r_not_same_reg (rtx, rtx);
+extern int    m32r_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int    m32r_legitimate_pic_operand_p (rtx);
+extern rtx    m32r_legitimize_pic_address (rtx, rtx);
+extern rtx    m32r_return_addr (int);
+extern rtx    m32r_function_symbol (const char *);
+
+#ifdef HAVE_MACHINE_MODES
+extern int    call_operand (rtx, enum machine_mode);
+extern int    small_data_operand (rtx, enum machine_mode);
+extern int    addr24_operand (rtx, enum machine_mode);
+extern int    addr32_operand (rtx, enum machine_mode);
+extern int    call26_operand (rtx, enum machine_mode);
+extern int    memreg_operand (rtx, enum machine_mode);
+extern int    small_insn_p (rtx, enum machine_mode);
+
+#endif /* HAVE_MACHINE_MODES */
+
+#endif /* RTX_CODE */
diff --git a/gcc-4.9/gcc/config/m32r/m32r.c b/gcc-4.9/gcc/config/m32r/m32r.c
new file mode 100644
index 000000000..83bc3a7bf
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/m32r.c
@@ -0,0 +1,2921 @@
+/* Subroutines used for code generation on the Renesas M32R cpu.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "stringpool.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "dbxout.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "expr.h"
+#include "function.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "df.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm-constrs.h"
+#include "opts.h"
+
+/* Array of valid operand punctuation characters.  */
+static char m32r_punct_chars[256];
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_MODEL_SHIFT		SYMBOL_FLAG_MACH_DEP_SHIFT
+#define SYMBOL_REF_MODEL(X) \
+  ((enum m32r_model) ((SYMBOL_REF_FLAGS (X) >> SYMBOL_FLAG_MODEL_SHIFT) & 3))
+
+/* For string literals, etc.  */
+#define LIT_NAME_P(NAME) ((NAME)[0] == '*' && (NAME)[1] == '.')
+
+/* Forward declaration.  */
+static void  m32r_option_override (void);
+static void  init_reg_tables (void);
+static void  block_move_call (rtx, rtx, rtx);
+static int   m32r_is_insn (rtx);
+static bool  m32r_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx   m32r_legitimize_address (rtx, rtx, enum machine_mode);
+static bool  m32r_mode_dependent_address_p (const_rtx, addr_space_t);
+static tree  m32r_handle_model_attribute (tree *, tree, tree, int, bool *);
+static void  m32r_print_operand (FILE *, rtx, int);
+static void  m32r_print_operand_address (FILE *, rtx);
+static bool  m32r_print_operand_punct_valid_p (unsigned char code);
+static void  m32r_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void  m32r_output_function_epilogue (FILE *, HOST_WIDE_INT);
+
+static void  m32r_file_start (void);
+
+static int    m32r_adjust_priority (rtx, int);
+static int    m32r_issue_rate (void);
+
+static void m32r_encode_section_info (tree, rtx, int);
+static bool m32r_in_small_data_p (const_tree);
+static bool m32r_return_in_memory (const_tree, const_tree);
+static rtx m32r_function_value (const_tree, const_tree, bool);
+static rtx m32r_libcall_value (enum machine_mode, const_rtx);
+static bool m32r_function_value_regno_p (const unsigned int);
+static void m32r_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+					 tree, int *, int);
+static void init_idents (void);
+static bool m32r_rtx_costs (rtx, int, int, int, int *, bool speed);
+static int m32r_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static bool m32r_pass_by_reference (cumulative_args_t, enum machine_mode,
+				    const_tree, bool);
+static int m32r_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				   tree, bool);
+static rtx m32r_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static void m32r_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static bool m32r_can_eliminate (const int, const int);
+static void m32r_conditional_register_usage (void);
+static void m32r_trampoline_init (rtx, tree, rtx);
+static bool m32r_legitimate_constant_p (enum machine_mode, rtx);
+
+/* M32R specific attributes.  */
+
+static const struct attribute_spec m32r_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt", 0, 0, true,  false, false, NULL, false },
+  { "model",     1, 1, true,  false, false, m32r_handle_model_attribute,
+    false },
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m32r_attribute_table
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P m32r_legitimate_address_p
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS m32r_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P m32r_mode_dependent_address_p
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND m32r_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS m32r_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P m32r_print_operand_punct_valid_p
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE m32r_output_function_prologue
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE m32r_output_function_epilogue
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START m32r_file_start
+
+#undef  TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY m32r_adjust_priority
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE m32r_issue_rate
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m32r_option_override
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO m32r_encode_section_info
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P m32r_in_small_data_p
+
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST m32r_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m32r_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY m32r_return_in_memory
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE m32r_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE m32r_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P m32r_function_value_regno_p
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS m32r_setup_incoming_varargs
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE m32r_pass_by_reference
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES m32r_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m32r_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m32r_function_arg_advance
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE m32r_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m32r_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m32r_trampoline_init
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P m32r_legitimate_constant_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Called by m32r_option_override to initialize various things.  */
+
+void
+m32r_init (void)
+{
+  init_reg_tables ();
+
+  /* Initialize array for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+  memset (m32r_punct_chars, 0, sizeof (m32r_punct_chars));
+  m32r_punct_chars['#'] = 1;
+  m32r_punct_chars['@'] = 1; /* ??? no longer used */
+
+  /* Provide default value if not specified.  */
+  if (!global_options_set.x_g_switch_value)
+    g_switch_value = SDATA_DEFAULT_SIZE;
+}
+
+static void
+m32r_option_override (void)
+{
+  /* These need to be done at start up.
+     It's convenient to do them here.  */
+  m32r_init ();
+  SUBTARGET_OVERRIDE_OPTIONS;
+}
+
+/* Vectors to keep interesting information about registers where it can easily
+   be got.  We use to use the actual mode value as the bit number, but there
+   is (or may be) more than 32 modes now.  Instead we use two tables: one
+   indexed by hard register number, and one indexed by mode.  */
+
+/* The purpose of m32r_mode_class is to shrink the range of modes so that
+   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
+   mapped into one m32r_mode_class mode.  */
+
+enum m32r_mode_class
+{
+  C_MODE,
+  S_MODE, D_MODE, T_MODE, O_MODE,
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE, A_MODE
+};
+
+/* Modes for condition codes.  */
+#define C_MODES (1 << (int) C_MODE)
+
+/* Modes for single-word and smaller quantities.  */
+#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-word and smaller quantities.  */
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Modes for quad-word and smaller quantities.  */
+#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+
+/* Modes for accumulators.  */
+#define A_MODES (1 << (int) A_MODE)
+
+/* Value is 1 if register/mode pair is acceptable on arc.  */
+
+const unsigned int m32r_hard_regno_mode_ok[FIRST_PSEUDO_REGISTER] =
+{
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, C_MODES, A_MODES, A_MODES
+};
+
+unsigned int m32r_mode_class [NUM_MACHINE_MODES];
+
+enum reg_class m32r_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+static void
+init_reg_tables (void)
+{
+  int i;
+
+  for (i = 0; i < NUM_MACHINE_MODES; i++)
+    {
+      switch (GET_MODE_CLASS (i))
+	{
+	case MODE_INT:
+	case MODE_PARTIAL_INT:
+	case MODE_COMPLEX_INT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    m32r_mode_class[i] = 1 << (int) S_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    m32r_mode_class[i] = 1 << (int) D_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    m32r_mode_class[i] = 1 << (int) T_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    m32r_mode_class[i] = 1 << (int) O_MODE;
+	  else
+	    m32r_mode_class[i] = 0;
+	  break;
+	case MODE_FLOAT:
+	case MODE_COMPLEX_FLOAT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    m32r_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    m32r_mode_class[i] = 1 << (int) DF_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    m32r_mode_class[i] = 1 << (int) TF_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    m32r_mode_class[i] = 1 << (int) OF_MODE;
+	  else
+	    m32r_mode_class[i] = 0;
+	  break;
+	case MODE_CC:
+	  m32r_mode_class[i] = 1 << (int) C_MODE;
+	  break;
+	default:
+	  m32r_mode_class[i] = 0;
+	  break;
+	}
+    }
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (GPR_P (i))
+	m32r_regno_reg_class[i] = GENERAL_REGS;
+      else if (i == ARG_POINTER_REGNUM)
+	m32r_regno_reg_class[i] = GENERAL_REGS;
+      else
+	m32r_regno_reg_class[i] = NO_REGS;
+    }
+}
+
+/* M32R specific attribute support.
+
+   interrupt - for interrupt functions
+
+   model - select code model used to access object
+
+	small: addresses use 24 bits, use bl to make calls
+	medium: addresses use 32 bits, use bl to make calls
+	large: addresses use 32 bits, use seth/add3/jl to make calls
+
+	Grep for MODEL in m32r.h for more info.  */
+
+static tree small_ident1;
+static tree small_ident2;
+static tree medium_ident1;
+static tree medium_ident2;
+static tree large_ident1;
+static tree large_ident2;
+
+static void
+init_idents (void)
+{
+  if (small_ident1 == 0)
+    {
+      small_ident1 = get_identifier ("small");
+      small_ident2 = get_identifier ("__small__");
+      medium_ident1 = get_identifier ("medium");
+      medium_ident2 = get_identifier ("__medium__");
+      large_ident1 = get_identifier ("large");
+      large_ident2 = get_identifier ("__large__");
+    }
+}
+
+/* Handle an "model" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+m32r_handle_model_attribute (tree *node ATTRIBUTE_UNUSED, tree name,
+			     tree args, int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs)
+{
+  tree arg;
+
+  init_idents ();
+  arg = TREE_VALUE (args);
+
+  if (arg != small_ident1
+      && arg != small_ident2
+      && arg != medium_ident1
+      && arg != medium_ident2
+      && arg != large_ident1
+      && arg != large_ident2)
+    {
+      warning (OPT_Wattributes, "invalid argument of %qs attribute",
+	       IDENTIFIER_POINTER (name));
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Encode section information of DECL, which is either a VAR_DECL,
+   FUNCTION_DECL, STRING_CST, CONSTRUCTOR, or ???.
+
+   For the M32R we want to record:
+
+   - whether the object lives in .sdata/.sbss.
+   - what code model should be used to access the object
+*/
+
+static void
+m32r_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int extra_flags = 0;
+  tree model_attr;
+  enum m32r_model model;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (!DECL_P (decl))
+    return;
+
+  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
+  if (model_attr)
+    {
+      tree id;
+
+      init_idents ();
+
+      id = TREE_VALUE (TREE_VALUE (model_attr));
+
+      if (id == small_ident1 || id == small_ident2)
+	model = M32R_MODEL_SMALL;
+      else if (id == medium_ident1 || id == medium_ident2)
+	model = M32R_MODEL_MEDIUM;
+      else if (id == large_ident1 || id == large_ident2)
+	model = M32R_MODEL_LARGE;
+      else
+	gcc_unreachable (); /* shouldn't happen */
+    }
+  else
+    {
+      if (TARGET_MODEL_SMALL)
+	model = M32R_MODEL_SMALL;
+      else if (TARGET_MODEL_MEDIUM)
+	model = M32R_MODEL_MEDIUM;
+      else if (TARGET_MODEL_LARGE)
+	model = M32R_MODEL_LARGE;
+      else
+	gcc_unreachable (); /* shouldn't happen */
+    }
+  extra_flags |= model << SYMBOL_FLAG_MODEL_SHIFT;
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= extra_flags;
+}
+
+/* Only mark the object as being small data area addressable if
+   it hasn't been explicitly marked with a code model.
+
+   The user can explicitly put an object in the small data area with the
+   section attribute.  If the object is in sdata/sbss and marked with a
+   code model do both [put the object in .sdata and mark it as being
+   addressed with a specific code model - don't mark it as being addressed
+   with an SDA reloc though].  This is ok and might be useful at times.  If
+   the object doesn't fit the linker will give an error.  */
+
+static bool
+m32r_in_small_data_p (const_tree decl)
+{
+  const_tree section;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return false;
+
+  if (lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
+    return false;
+
+  section = DECL_SECTION_NAME (decl);
+  if (section)
+    {
+      const char *const name = TREE_STRING_POINTER (section);
+      if (strcmp (name, ".sdata") == 0 || strcmp (name, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      if (! TREE_READONLY (decl) && ! TARGET_SDATA_NONE)
+	{
+	  int size = int_size_in_bytes (TREE_TYPE (decl));
+
+	  if (size > 0 && size <= g_switch_value)
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Do anything needed before RTL is emitted for each function.  */
+
+void
+m32r_init_expanders (void)
+{
+  /* ??? At one point there was code here.  The function is left in
+     to make it easy to experiment.  */
+}
+
+int
+call_operand (rtx op, enum machine_mode mode)
+{
+  if (!MEM_P (op))
+    return 0;
+  op = XEXP (op, 0);
+  return call_address_operand (op, mode);
+}
+
+/* Return 1 if OP is a reference to an object in .sdata/.sbss.  */
+
+int
+small_data_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (! TARGET_SDATA_USE)
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_SMALL_P (op);
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+      && satisfies_constraint_J (XEXP (XEXP (op, 0), 1)))
+    return SYMBOL_REF_SMALL_P (XEXP (XEXP (op, 0), 0));
+
+  return 0;
+}
+
+/* Return 1 if OP is a symbol that can use 24-bit addressing.  */
+
+int
+addr24_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx sym;
+
+  if (flag_pic)
+    return 0;
+
+  if (GET_CODE (op) == LABEL_REF)
+    return TARGET_ADDR24;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    sym = op;
+  else if (GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	   && satisfies_constraint_M (XEXP (XEXP (op, 0), 1)))
+    sym = XEXP (XEXP (op, 0), 0);
+  else
+    return 0;
+
+  if (SYMBOL_REF_MODEL (sym) == M32R_MODEL_SMALL)
+    return 1;
+
+  if (TARGET_ADDR24
+      && (CONSTANT_POOL_ADDRESS_P (sym)
+	  || LIT_NAME_P (XSTR (sym, 0))))
+    return 1;
+
+  return 0;
+}
+
+/* Return 1 if OP is a symbol that needs 32-bit addressing.  */
+
+int
+addr32_operand (rtx op, enum machine_mode mode)
+{
+  rtx sym;
+
+  if (GET_CODE (op) == LABEL_REF)
+    return TARGET_ADDR32;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    sym = op;
+  else if (GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	   && CONST_INT_P (XEXP (XEXP (op, 0), 1))
+	   && ! flag_pic)
+    sym = XEXP (XEXP (op, 0), 0);
+  else
+    return 0;
+
+  return (! addr24_operand (sym, mode)
+	  && ! small_data_operand (sym, mode));
+}
+
+/* Return 1 if OP is a function that can be called with the `bl' insn.  */
+
+int
+call26_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic)
+    return 1;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_MODEL (op) != M32R_MODEL_LARGE;
+
+  return TARGET_CALL26;
+}
+
+/* Return 1 if OP is a DImode const we want to handle inline.
+   This must match the code in the movdi pattern.
+   It is used by the 'G' CONST_DOUBLE_OK_FOR_LETTER.  */
+
+int
+easy_di_const (rtx op)
+{
+  rtx high_rtx, low_rtx;
+  HOST_WIDE_INT high, low;
+
+  split_double (op, &high_rtx, &low_rtx);
+  high = INTVAL (high_rtx);
+  low = INTVAL (low_rtx);
+  /* Pick constants loadable with 2 16-bit `ldi' insns.  */
+  if (high >= -128 && high <= 127
+      && low >= -128 && low <= 127)
+    return 1;
+  return 0;
+}
+
+/* Return 1 if OP is a DFmode const we want to handle inline.
+   This must match the code in the movdf pattern.
+   It is used by the 'H' CONST_DOUBLE_OK_FOR_LETTER.  */
+
+int
+easy_df_const (rtx op)
+{
+  REAL_VALUE_TYPE r;
+  long l[2];
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+  if (l[0] == 0 && l[1] == 0)
+    return 1;
+  if ((l[0] & 0xffff) == 0 && l[1] == 0)
+    return 1;
+  return 0;
+}
+
+/* Return 1 if OP is (mem (reg ...)).
+   This is used in insn length calcs.  */
+
+int
+memreg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return MEM_P (op) && REG_P (XEXP (op, 0));
+}
+
+/* Return nonzero if TYPE must be passed by indirect reference.  */
+
+static bool
+m32r_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  int size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return (size < 0 || size > 8);
+}
+
+/* Comparisons.  */
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for compare [arg0 of the if_then_else].
+   If need_compare is true then the comparison insn must be generated, rather
+   than being subsumed into the following branch instruction.  */
+
+rtx
+gen_compare (enum rtx_code code, rtx x, rtx y, int need_compare)
+{
+  enum rtx_code compare_code;
+  enum rtx_code branch_code;
+  rtx cc_reg = gen_rtx_REG (CCmode, CARRY_REGNUM);
+  int must_swap = 0;
+
+  switch (code)
+    {
+    case EQ:  compare_code = EQ;  branch_code = NE; break;
+    case NE:  compare_code = EQ;  branch_code = EQ; break;
+    case LT:  compare_code = LT;  branch_code = NE; break;
+    case LE:  compare_code = LT;  branch_code = EQ; must_swap = 1; break;
+    case GT:  compare_code = LT;  branch_code = NE; must_swap = 1; break;
+    case GE:  compare_code = LT;  branch_code = EQ; break;
+    case LTU: compare_code = LTU; branch_code = NE; break;
+    case LEU: compare_code = LTU; branch_code = EQ; must_swap = 1; break;
+    case GTU: compare_code = LTU; branch_code = NE; must_swap = 1; break;
+    case GEU: compare_code = LTU; branch_code = EQ; break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (need_compare)
+    {
+      switch (compare_code)
+	{
+	case EQ:
+	  if (satisfies_constraint_P (y)		/* Reg equal to small const.  */
+	      && y != const0_rtx)
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_addsi3 (tmp, x, GEN_INT (-INTVAL (y))));
+	      x = tmp;
+	      y = const0_rtx;
+	    }
+	  else if (CONSTANT_P (y))			/* Reg equal to const.  */
+	    {
+	      rtx tmp = force_reg (GET_MODE (x), y);
+	      y = tmp;
+	    }
+
+	  if (register_operand (y, SImode) 		/* Reg equal to reg.  */
+	      || y == const0_rtx) 	   		/* Reg equal to zero.  */
+	    {
+	      emit_insn (gen_cmp_eqsi_insn (x, y));
+
+	      return gen_rtx_fmt_ee (code, CCmode, cc_reg, const0_rtx);
+	    }
+	  break;
+
+	case LT:
+	  if (register_operand (y, SImode)
+	      || satisfies_constraint_P (y))
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);	      /* Reg compared to reg.  */
+
+	      switch (code)
+		{
+		case LT:
+		  emit_insn (gen_cmp_ltsi_insn (x, y));
+		  code = EQ;
+		  break;
+		case LE:
+		  if (y == const0_rtx)
+		    tmp = const1_rtx;
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltsi_insn (x, tmp));
+		  code = EQ;
+		  break;
+		case GT:
+		  if (CONST_INT_P (y))
+		    tmp = gen_rtx_PLUS (SImode, y, const1_rtx);
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltsi_insn (x, tmp));
+		  code = NE;
+		  break;
+		case GE:
+		  emit_insn (gen_cmp_ltsi_insn (x, y));
+		  code = NE;
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+
+	      return gen_rtx_fmt_ee (code, CCmode, cc_reg, const0_rtx);
+	    }
+	  break;
+
+	case LTU:
+	  if (register_operand (y, SImode)
+	      || satisfies_constraint_P (y))
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);	      /* Reg (unsigned) compared to reg.  */
+
+	      switch (code)
+		{
+		case LTU:
+		  emit_insn (gen_cmp_ltusi_insn (x, y));
+		  code = EQ;
+		  break;
+		case LEU:
+		  if (y == const0_rtx)
+		    tmp = const1_rtx;
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltusi_insn (x, tmp));
+		  code = EQ;
+		  break;
+		case GTU:
+		  if (CONST_INT_P (y))
+		    tmp = gen_rtx_PLUS (SImode, y, const1_rtx);
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltusi_insn (x, tmp));
+		  code = NE;
+		  break;
+		case GEU:
+		  emit_insn (gen_cmp_ltusi_insn (x, y));
+		  code = NE;
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+
+	      return gen_rtx_fmt_ee (code, CCmode, cc_reg, const0_rtx);
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      /* Reg/reg equal comparison.  */
+      if (compare_code == EQ
+	  && register_operand (y, SImode))
+	return gen_rtx_fmt_ee (code, CCmode, x, y);
+
+      /* Reg/zero signed comparison.  */
+      if ((compare_code == EQ || compare_code == LT)
+	  && y == const0_rtx)
+	return gen_rtx_fmt_ee (code, CCmode, x, y);
+
+      /* Reg/smallconst equal comparison.  */
+      if (compare_code == EQ
+	  && satisfies_constraint_P (y))
+	{
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_addsi3 (tmp, x, GEN_INT (-INTVAL (y))));
+	  return gen_rtx_fmt_ee (code, CCmode, tmp, const0_rtx);
+	}
+
+      /* Reg/const equal comparison.  */
+      if (compare_code == EQ
+	  && CONSTANT_P (y))
+	{
+	  rtx tmp = force_reg (GET_MODE (x), y);
+
+	  return gen_rtx_fmt_ee (code, CCmode, x, tmp);
+	}
+    }
+
+  if (CONSTANT_P (y))
+    {
+      if (must_swap)
+	y = force_reg (GET_MODE (x), y);
+      else
+	{
+	  int ok_const = reg_or_int16_operand (y, GET_MODE (y));
+
+	  if (! ok_const)
+	    y = force_reg (GET_MODE (x), y);
+	}
+    }
+
+  switch (compare_code)
+    {
+    case EQ :
+      emit_insn (gen_cmp_eqsi_insn (must_swap ? y : x, must_swap ? x : y));
+      break;
+    case LT :
+      emit_insn (gen_cmp_ltsi_insn (must_swap ? y : x, must_swap ? x : y));
+      break;
+    case LTU :
+      emit_insn (gen_cmp_ltusi_insn (must_swap ? y : x, must_swap ? x : y));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_fmt_ee (branch_code, VOIDmode, cc_reg, CONST0_RTX (CCmode));
+}
+
+bool
+gen_cond_store (enum rtx_code code, rtx op0, rtx op1, rtx op2)
+{
+  enum machine_mode mode = GET_MODE (op0);
+
+  gcc_assert (mode == SImode);
+  switch (code)
+    {
+    case EQ:
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (TARGET_M32RX || TARGET_M32R2)
+	{
+	  if (!reg_or_zero_operand (op2, mode))
+	    op2 = force_reg (mode, op2);
+
+	  emit_insn (gen_seq_insn_m32rx (op0, op1, op2));
+	  return true;
+	}
+      if (CONST_INT_P (op2) && INTVAL (op2) == 0)
+	{
+	  emit_insn (gen_seq_zero_insn (op0, op1));
+	  return true;
+	}
+
+      if (!reg_or_eq_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      emit_insn (gen_seq_insn (op0, op1, op2));
+      return true;
+
+    case NE:
+      if (!CONST_INT_P (op2)
+	  || (INTVAL (op2) != 0 && satisfies_constraint_K (op2)))
+	{
+	  rtx reg;
+
+	  if (reload_completed || reload_in_progress)
+	    return false;
+
+	  reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (reg, op1, op2));
+	  op1 = reg;
+
+	  if (!register_operand (op1, mode))
+	    op1 = force_reg (mode, op1);
+
+	  emit_insn (gen_sne_zero_insn (op0, op1));
+	  return true;
+	}
+      return false;
+
+    case LT:
+    case GT:
+      if (code == GT)
+	{
+	  rtx tmp = op2;
+	  op2 = op1;
+	  op1 = tmp;
+	  code = LT;
+	}
+
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (!reg_or_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      emit_insn (gen_slt_insn (op0, op1, op2));
+      return true;
+
+    case LTU:
+    case GTU:
+      if (code == GTU)
+	{
+	  rtx tmp = op2;
+	  op2 = op1;
+	  op1 = tmp;
+	  code = LTU;
+	}
+
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (!reg_or_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      emit_insn (gen_sltu_insn (op0, op1, op2));
+      return true;
+
+    case GE:
+    case GEU:
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (!reg_or_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      if (code == GE)
+	emit_insn (gen_sge_insn (op0, op1, op2));
+      else
+	emit_insn (gen_sgeu_insn (op0, op1, op2));
+      return true;
+
+    case LE:
+    case LEU:
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (CONST_INT_P (op2))
+	{
+	  HOST_WIDE_INT value = INTVAL (op2);
+	  if (value >= 2147483647)
+	    {
+	      emit_move_insn (op0, const1_rtx);
+	      return true;
+	    }
+
+	  op2 = GEN_INT (value + 1);
+	  if (value < -32768 || value >= 32767)
+	    op2 = force_reg (mode, op2);
+
+          if (code == LEU)
+	    emit_insn (gen_sltu_insn (op0, op1, op2));
+	  else
+	    emit_insn (gen_slt_insn (op0, op1, op2));
+	  return true;
+	}
+
+      if (!register_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      if (code == LEU)
+        emit_insn (gen_sleu_insn (op0, op1, op2));
+      else
+        emit_insn (gen_sle_insn (op0, op1, op2));
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Split a 2 word move (DI or DF) into component parts.  */
+
+rtx
+gen_split_move_double (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  rtx val;
+
+  /* We might have (SUBREG (MEM)) here, so just get rid of the
+     subregs to make this code simpler.  It is safe to call
+     alter_subreg any time after reload.  */
+  if (GET_CODE (dest) == SUBREG)
+    alter_subreg (&dest, true);
+  if (GET_CODE (src) == SUBREG)
+    alter_subreg (&src, true);
+
+  start_sequence ();
+  if (REG_P (dest))
+    {
+      int dregno = REGNO (dest);
+
+      /* Reg = reg.  */
+      if (REG_P (src))
+	{
+	  int sregno = REGNO (src);
+
+	  int reverse = (dregno == sregno + 1);
+
+	  /* We normally copy the low-numbered register first.  However, if
+	     the first register operand 0 is the same as the second register of
+	     operand 1, we must copy in the opposite order.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  operand_subword (src,  reverse, TRUE, mode)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, !reverse, TRUE, mode),
+				  operand_subword (src,  !reverse, TRUE, mode)));
+	}
+
+      /* Reg = constant.  */
+      else if (CONST_INT_P (src) || GET_CODE (src) == CONST_DOUBLE)
+	{
+	  rtx words[2];
+	  split_double (src, &words[0], &words[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 0, TRUE, mode),
+				  words[0]));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 1, TRUE, mode),
+				  words[1]));
+	}
+
+      /* Reg = mem.  */
+      else if (MEM_P (src))
+	{
+	  /* If the high-address word is used in the address, we must load it
+	     last.  Otherwise, load it first.  */
+	  int reverse
+	    = (refers_to_regno_p (dregno, dregno + 1, XEXP (src, 0), 0) != 0);
+
+	  /* We used to optimize loads from single registers as
+
+		ld r1,r3+; ld r2,r3
+
+	     if r3 were not used subsequently.  However, the REG_NOTES aren't
+	     propagated correctly by the reload phase, and it can cause bad
+	     code to be generated.  We could still try:
+
+		ld r1,r3+; ld r2,r3; addi r3,-4
+
+	     which saves 2 bytes and doesn't force longword alignment.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  adjust_address (src, SImode,
+						  reverse * UNITS_PER_WORD)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, !reverse, TRUE, mode),
+				  adjust_address (src, SImode,
+						  !reverse * UNITS_PER_WORD)));
+	}
+      else
+	gcc_unreachable ();
+    }
+
+  /* Mem = reg.  */
+  /* We used to optimize loads from single registers as
+
+	st r1,r3; st r2,+r3
+
+     if r3 were not used subsequently.  However, the REG_NOTES aren't
+     propagated correctly by the reload phase, and it can cause bad
+     code to be generated.  We could still try:
+
+	st r1,r3; st r2,+r3; addi r3,-4
+
+     which saves 2 bytes and doesn't force longword alignment.  */
+  else if (MEM_P (dest) && REG_P (src))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      adjust_address (dest, SImode, 0),
+			      operand_subword (src, 0, TRUE, mode)));
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      adjust_address (dest, SImode, UNITS_PER_WORD),
+			      operand_subword (src, 1, TRUE, mode)));
+    }
+
+  else
+    gcc_unreachable ();
+
+  val = get_insns ();
+  end_sequence ();
+  return val;
+}
+
+
+static int
+m32r_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  int words;
+  unsigned int size =
+    (((mode == BLKmode && type)
+      ? (unsigned int) int_size_in_bytes (type)
+      : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
+    / UNITS_PER_WORD;
+
+  if (*cum >= M32R_MAX_PARM_REGS)
+    words = 0;
+  else if (*cum + size > M32R_MAX_PARM_REGS)
+    words = (*cum + size) - M32R_MAX_PARM_REGS;
+  else
+    words = 0;
+
+  return words * UNITS_PER_WORD;
+}
+
+/* The ROUND_ADVANCE* macros are local to this file.  */
+/* Round SIZE up to a word boundary.  */
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round arg MODE/TYPE up to the next word boundary.  */
+#define ROUND_ADVANCE_ARG(MODE, TYPE) \
+  ((MODE) == BLKmode				\
+   ? ROUND_ADVANCE ((unsigned int) int_size_in_bytes (TYPE))	\
+   : ROUND_ADVANCE ((unsigned int) GET_MODE_SIZE (MODE)))
+
+/* Round CUM up to the necessary point for argument MODE/TYPE.  */
+#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) (CUM)
+
+/* Return boolean indicating arg of type TYPE and mode MODE will be passed in
+   a reg.  This includes arguments that have to be passed by reference as the
+   pointer to them is passed in a reg if one is available (and that is what
+   we're given).
+   This macro is only used in this file.  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)) < M32R_MAX_PARM_REGS)
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+/* On the M32R the first M32R_MAX_PARM_REGS args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+m32r_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  return (PASS_IN_REG_P (*cum, mode, type)
+	  ? gen_rtx_REG (mode, ROUND_ADVANCE_CUM (*cum, mode, type))
+	  : NULL_RTX);
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+m32r_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum = (ROUND_ADVANCE_CUM (*cum, mode, type)
+	  + ROUND_ADVANCE_ARG (mode, type));
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+m32r_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  cumulative_args_t dummy = pack_cumulative_args (NULL);
+
+  return m32r_pass_by_reference (dummy, TYPE_MODE (type), type, false);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+static rtx
+m32r_function_value (const_tree valtype,
+		const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), 0);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+m32r_libcall_value (enum machine_mode mode,
+		const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, 0);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.
+
+  ??? What about r1 in DI/DF values.  */
+
+static bool
+m32r_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 0);
+}
+
+/* Do any needed setup for a variadic function.  For the M32R, we must
+   create a register parameter block, and then copy any anonymous arguments
+   in registers to memory.
+
+   CUM has not been updated for the last named argument which has type TYPE
+   and mode MODE, and we rely on this fact.  */
+
+static void
+m32r_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+			     tree type, int *pretend_size, int no_rtl)
+{
+  int first_anon_arg;
+
+  if (no_rtl)
+    return;
+
+  /* All BLKmode values are passed by reference.  */
+  gcc_assert (mode != BLKmode);
+
+  first_anon_arg = (ROUND_ADVANCE_CUM (*get_cumulative_args (cum), mode, type)
+		    + ROUND_ADVANCE_ARG (mode, type));
+
+  if (first_anon_arg < M32R_MAX_PARM_REGS)
+    {
+      /* Note that first_reg_offset < M32R_MAX_PARM_REGS.  */
+      int first_reg_offset = first_anon_arg;
+      /* Size in words to "pretend" allocate.  */
+      int size = M32R_MAX_PARM_REGS - first_reg_offset;
+      rtx regblock;
+
+      regblock = gen_frame_mem (BLKmode,
+				plus_constant (Pmode, arg_pointer_rtx,
+					       FIRST_PARM_OFFSET (0)));
+      set_mem_alias_set (regblock, get_varargs_alias_set ());
+      move_block_from_reg (first_reg_offset, regblock, size);
+
+      *pretend_size = (size * UNITS_PER_WORD);
+    }
+}
+
+
+/* Return true if INSN is real instruction bearing insn.  */
+
+static int
+m32r_is_insn (rtx insn)
+{
+  return (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER);
+}
+
+/* Increase the priority of long instructions so that the
+   short instructions are scheduled ahead of the long ones.  */
+
+static int
+m32r_adjust_priority (rtx insn, int priority)
+{
+  if (m32r_is_insn (insn)
+      && get_attr_insn_size (insn) != INSN_SIZE_SHORT)
+    priority <<= 3;
+
+  return priority;
+}
+
+
+/* Indicate how many instructions can be issued at the same time.
+   This is sort of a lie.  The m32r can issue only 1 long insn at
+   once, but it can issue 2 short insns.  The default therefore is
+   set at 2, but this can be overridden by the command line option
+   -missue-rate=1.  */
+
+static int
+m32r_issue_rate (void)
+{
+  return ((TARGET_LOW_ISSUE_RATE) ? 1 : 2);
+}
+
+/* Cost functions.  */
+/* Memory is 3 times as expensive as registers.
+   ??? Is that the right way to look at it?  */
+
+static int
+m32r_memory_move_cost (enum machine_mode mode,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+    return 6;
+  else
+    return 12;
+}
+
+static bool
+m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		int opno ATTRIBUTE_UNUSED, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      /* Small integers are as cheap as registers.  4 byte values can be
+         fetched as immediate constants - let's give that the cost of an
+         extra insn.  */
+    case CONST_INT:
+      if (INT16_P (INTVAL (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST_DOUBLE:
+      {
+	rtx high, low;
+
+	split_double (x, &high, &low);
+	*total = COSTS_N_INSNS (!INT16_P (INTVAL (high))
+			        + !INT16_P (INTVAL (low)));
+	return true;
+      }
+
+    case MULT:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (10);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Type of function DECL.
+
+   The result is cached.  To reset the cache at the end of a function,
+   call with DECL = NULL_TREE.  */
+
+enum m32r_function_type
+m32r_compute_function_type (tree decl)
+{
+  /* Cached value.  */
+  static enum m32r_function_type fn_type = M32R_FUNCTION_UNKNOWN;
+  /* Last function we were called for.  */
+  static tree last_fn = NULL_TREE;
+
+  /* Resetting the cached value?  */
+  if (decl == NULL_TREE)
+    {
+      fn_type = M32R_FUNCTION_UNKNOWN;
+      last_fn = NULL_TREE;
+      return fn_type;
+    }
+
+  if (decl == last_fn && fn_type != M32R_FUNCTION_UNKNOWN)
+    return fn_type;
+
+  /* Compute function type.  */
+  fn_type = (lookup_attribute ("interrupt", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE
+	     ? M32R_FUNCTION_INTERRUPT
+	     : M32R_FUNCTION_NORMAL);
+
+  last_fn = decl;
+  return fn_type;
+}
+/* Function prologue/epilogue handlers.  */
+
+/* M32R stack frames look like:
+
+             Before call                       After call
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+   high |  local variables,     |       |  local variables,     |
+   mem  |  reg save area, etc.  |       |  reg save area, etc.  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  arguments on stack.  |       |  arguments on stack.  |
+        |                       |       |                       |
+  SP+0->+-----------------------+       +-----------------------+
+                                        |  reg parm save area,  |
+                                        |  only created for     |
+                                        |  variable argument    |
+                                        |  functions            |
+					+-----------------------+
+                                        |   previous frame ptr  |
+                                        +-----------------------+
+                                        |                       |
+                                        |  register save area   |
+                                        |                       |
+					+-----------------------+
+                                        |    return address     |
+                                        +-----------------------+
+                                        |                       |
+                                        |  local variables      |
+                                        |                       |
+                                        +-----------------------+
+                                        |                       |
+                                        |  alloca allocations   |
+                                        |                       |
+                                        +-----------------------+
+                                        |                       |
+   low                                  |  arguments on stack   |
+   memory                               |                       |
+                                  SP+0->+-----------------------+
+
+Notes:
+1) The "reg parm save area" does not exist for non variable argument fns.
+2) The "reg parm save area" can be eliminated completely if we saved regs
+   containing anonymous args separately but that complicates things too
+   much (so it's not done).
+3) The return address is saved after the register save area so as to have as
+   many insns as possible between the restoration of `lr' and the `jmp lr'.  */
+
+/* Structure to be filled in by m32r_compute_frame_size with register
+   save masks, and offsets for the current function.  */
+struct m32r_frame_info
+{
+  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
+  unsigned int extra_size;	/* # bytes of extra stuff.  */
+  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # bytes needed to store regs.  */
+  unsigned int var_size;	/* # bytes that variables take up.  */
+  unsigned int gmask;		/* Mask of saved gp registers.  */
+  unsigned int save_fp;		/* Nonzero if fp must be saved.  */
+  unsigned int save_lr;		/* Nonzero if lr (return addr) must be saved.  */
+  int          initialized;	/* Nonzero if frame size already calculated.  */
+};
+
+/* Current frame information calculated by m32r_compute_frame_size.  */
+static struct m32r_frame_info current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+static struct m32r_frame_info zero_frame_info;
+
+#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
+#define RETURN_ADDR_MASK   (1 << (RETURN_ADDR_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.  */
+#define MUST_SAVE_REGISTER(regno, interrupt_p) \
+  ((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
+   && (df_regs_ever_live_p (regno) && (!call_really_used_regs[regno] || interrupt_p)))
+
+#define MUST_SAVE_FRAME_POINTER (df_regs_ever_live_p (FRAME_POINTER_REGNUM))
+#define MUST_SAVE_RETURN_ADDR   (df_regs_ever_live_p (RETURN_ADDR_REGNUM) || crtl->profile)
+
+#define SHORT_INSN_SIZE 2	/* Size of small instructions.  */
+#define LONG_INSN_SIZE 4	/* Size of long instructions.  */
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   SIZE is the size needed for local variables.  */
+
+unsigned int
+m32r_compute_frame_size (int size)	/* # of var. bytes allocated.  */
+{
+  unsigned int regno;
+  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
+  unsigned int reg_size;
+  unsigned int gmask;
+  enum m32r_function_type fn_type;
+  int interrupt_p;
+  int pic_reg_used = flag_pic && (crtl->uses_pic_offset_table
+                                  | crtl->profile);
+
+  var_size	= M32R_STACK_ALIGN (size);
+  args_size	= M32R_STACK_ALIGN (crtl->outgoing_args_size);
+  pretend_size	= crtl->args.pretend_args_size;
+  extra_size	= FIRST_PARM_OFFSET (0);
+  total_size	= extra_size + pretend_size + args_size + var_size;
+  reg_size	= 0;
+  gmask		= 0;
+
+  /* See if this is an interrupt handler.  Call used registers must be saved
+     for them too.  */
+  fn_type = m32r_compute_function_type (current_function_decl);
+  interrupt_p = M32R_INTERRUPT_P (fn_type);
+
+  /* Calculate space needed for registers.  */
+  for (regno = 0; regno < M32R_MAX_INT_REGS; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno, interrupt_p)
+          || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+	{
+	  reg_size += UNITS_PER_WORD;
+	  gmask |= 1 << regno;
+	}
+    }
+
+  current_frame_info.save_fp = MUST_SAVE_FRAME_POINTER;
+  current_frame_info.save_lr = MUST_SAVE_RETURN_ADDR || pic_reg_used;
+
+  reg_size += ((current_frame_info.save_fp + current_frame_info.save_lr)
+	       * UNITS_PER_WORD);
+  total_size += reg_size;
+
+  /* ??? Not sure this is necessary, and I don't think the epilogue
+     handler will do the right thing if this changes total_size.  */
+  total_size = M32R_STACK_ALIGN (total_size);
+
+  /* frame_size = total_size - (pretend_size + reg_size); */
+
+  /* Save computed information.  */
+  current_frame_info.total_size   = total_size;
+  current_frame_info.extra_size   = extra_size;
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.var_size     = var_size;
+  current_frame_info.args_size    = args_size;
+  current_frame_info.reg_size	  = reg_size;
+  current_frame_info.gmask	  = gmask;
+  current_frame_info.initialized  = reload_completed;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+m32r_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+
+/* The table we use to reference PIC data.  */
+static rtx global_offset_table;
+
+static void
+m32r_reload_lr (rtx sp, int size)
+{
+  rtx lr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+
+  if (size == 0)
+    emit_insn (gen_movsi (lr, gen_frame_mem (Pmode, sp)));
+  else if (size < 32768)
+    emit_insn (gen_movsi (lr, gen_frame_mem (Pmode,
+					     gen_rtx_PLUS (Pmode, sp,
+							   GEN_INT (size)))));
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+      emit_insn (gen_movsi (tmp, GEN_INT (size)));
+      emit_insn (gen_addsi3 (tmp, tmp, sp));
+      emit_insn (gen_movsi (lr, gen_frame_mem (Pmode, tmp)));
+    }
+
+  emit_use (lr);
+}
+
+void
+m32r_load_pic_register (void)
+{
+  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
+                         GEN_INT (TARGET_MODEL_SMALL)));
+
+  /* Need to emit this whether or not we obey regdecls,
+     since setjmp/longjmp can cause life info to screw up.  */
+  emit_use (pic_offset_table_rtx);
+}
+
+/* Expand the m32r prologue as a series of insns.  */
+
+void
+m32r_expand_prologue (void)
+{
+  int regno;
+  int frame_size;
+  unsigned int gmask;
+  int pic_reg_used = flag_pic && (crtl->uses_pic_offset_table
+                                  | crtl->profile);
+
+  if (! current_frame_info.initialized)
+    m32r_compute_frame_size (get_frame_size ());
+
+  gmask = current_frame_info.gmask;
+
+  /* These cases shouldn't happen.  Catch them now.  */
+  gcc_assert (current_frame_info.total_size || !gmask);
+
+  /* Allocate space for register arguments if this is a variadic function.  */
+  if (current_frame_info.pretend_size != 0)
+    {
+      /* Use a HOST_WIDE_INT temporary, since negating an unsigned int gives
+	 the wrong result on a 64-bit host.  */
+      HOST_WIDE_INT pretend_size = current_frame_info.pretend_size;
+      emit_insn (gen_addsi3 (stack_pointer_rtx,
+			     stack_pointer_rtx,
+			     GEN_INT (-pretend_size)));
+    }
+
+  /* Save any registers we need to and set up fp.  */
+  if (current_frame_info.save_fp)
+    emit_insn (gen_movsi_push (stack_pointer_rtx, frame_pointer_rtx));
+
+  gmask &= ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK);
+
+  /* Save any needed call-saved regs (and call-used if this is an
+     interrupt handler).  */
+  for (regno = 0; regno <= M32R_MAX_INT_REGS; ++regno)
+    {
+      if ((gmask & (1 << regno)) != 0)
+	emit_insn (gen_movsi_push (stack_pointer_rtx,
+				   gen_rtx_REG (Pmode, regno)));
+    }
+
+  if (current_frame_info.save_lr)
+    emit_insn (gen_movsi_push (stack_pointer_rtx,
+			       gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)));
+
+  /* Allocate the stack frame.  */
+  frame_size = (current_frame_info.total_size
+		- (current_frame_info.pretend_size
+		   + current_frame_info.reg_size));
+
+  if (frame_size == 0)
+    ; /* Nothing to do.  */
+  else if (frame_size <= 32768)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (-frame_size)));
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+      emit_insn (gen_movsi (tmp, GEN_INT (frame_size)));
+      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+    }
+
+  if (frame_pointer_needed)
+    emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+
+  if (crtl->profile)
+    /* Push lr for mcount (form_pc, x).  */
+    emit_insn (gen_movsi_push (stack_pointer_rtx,
+                               gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)));
+
+  if (pic_reg_used)
+    {
+      m32r_load_pic_register ();
+      m32r_reload_lr (stack_pointer_rtx,
+                      (crtl->profile ? 0 : frame_size));
+    }
+
+  if (crtl->profile && !pic_reg_used)
+    emit_insn (gen_blockage ());
+}
+
+
+/* Set up the stack and frame pointer (if desired) for the function.
+   Note, if this is changed, you need to mirror the changes in
+   m32r_compute_frame_size which calculates the prolog size.  */
+
+static void
+m32r_output_function_prologue (FILE * file, HOST_WIDE_INT size)
+{
+  enum m32r_function_type fn_type = m32r_compute_function_type (current_function_decl);
+
+  /* If this is an interrupt handler, mark it as such.  */
+  if (M32R_INTERRUPT_P (fn_type))
+    fprintf (file, "\t%s interrupt handler\n", ASM_COMMENT_START);
+
+  if (! current_frame_info.initialized)
+    m32r_compute_frame_size (size);
+
+  /* This is only for the human reader.  */
+  fprintf (file,
+	   "\t%s PROLOGUE, vars= %d, regs= %d, args= %d, extra= %d\n",
+	   ASM_COMMENT_START,
+	   current_frame_info.var_size,
+	   current_frame_info.reg_size / 4,
+	   current_frame_info.args_size,
+	   current_frame_info.extra_size);
+}
+
+/* Output RTL to pop register REGNO from the stack.  */
+
+static void
+pop (int regno)
+{
+  rtx x;
+
+  x = emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno),
+				stack_pointer_rtx));
+  add_reg_note (x, REG_INC, stack_pointer_rtx);
+}
+
+/* Expand the m32r epilogue as a series of insns.  */
+
+void
+m32r_expand_epilogue (void)
+{
+  int regno;
+  int noepilogue = FALSE;
+  int total_size;
+
+  gcc_assert (current_frame_info.initialized);
+  total_size = current_frame_info.total_size;
+
+  if (total_size == 0)
+    {
+      rtx insn = get_last_insn ();
+
+      /* If the last insn was a BARRIER, we don't have to write any code
+	 because a jump (aka return) was put there.  */
+      if (insn && NOTE_P (insn))
+	insn = prev_nonnote_insn (insn);
+      if (insn && BARRIER_P (insn))
+	noepilogue = TRUE;
+    }
+
+  if (!noepilogue)
+    {
+      unsigned int var_size = current_frame_info.var_size;
+      unsigned int args_size = current_frame_info.args_size;
+      unsigned int gmask = current_frame_info.gmask;
+      int can_trust_sp_p = !cfun->calls_alloca;
+
+      if (flag_exceptions)
+        emit_insn (gen_blockage ());
+
+      /* The first thing to do is point the sp at the bottom of the register
+	 save area.  */
+      if (can_trust_sp_p)
+	{
+	  unsigned int reg_offset = var_size + args_size;
+
+	  if (reg_offset == 0)
+	    ; /* Nothing to do.  */
+	  else if (reg_offset < 32768)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (reg_offset)));
+	  else
+	    {
+	      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+	      emit_insn (gen_movsi (tmp, GEN_INT (reg_offset)));
+	      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				     tmp));
+	    }
+	}
+      else if (frame_pointer_needed)
+	{
+	  unsigned int reg_offset = var_size + args_size;
+
+	  if (reg_offset == 0)
+	    emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+	  else if (reg_offset < 32768)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_rtx,
+			   GEN_INT (reg_offset)));
+	  else
+	    {
+	      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+	      emit_insn (gen_movsi (tmp, GEN_INT (reg_offset)));
+	      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+	      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				     tmp));
+	    }
+	}
+      else
+	gcc_unreachable ();
+
+      if (current_frame_info.save_lr)
+	pop (RETURN_ADDR_REGNUM);
+
+      /* Restore any saved registers, in reverse order of course.  */
+      gmask &= ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK);
+      for (regno = M32R_MAX_INT_REGS - 1; regno >= 0; --regno)
+	{
+	  if ((gmask & (1L << regno)) != 0)
+	    pop (regno);
+	}
+
+      if (current_frame_info.save_fp)
+	pop (FRAME_POINTER_REGNUM);
+
+      /* Remove varargs area if present.  */
+      if (current_frame_info.pretend_size != 0)
+	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (current_frame_info.pretend_size)));
+
+      emit_insn (gen_blockage ());
+    }
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+
+static void
+m32r_output_function_epilogue (FILE * file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+  m32r_compute_function_type (NULL_TREE);
+}
+
+/* Return nonzero if this function is known to have a null or 1 instruction
+   epilogue.  */
+
+int
+direct_return (void)
+{
+  if (!reload_completed)
+    return FALSE;
+
+  if (M32R_INTERRUPT_P (m32r_compute_function_type (current_function_decl)))
+    return FALSE;
+
+  if (! current_frame_info.initialized)
+    m32r_compute_frame_size (get_frame_size ());
+
+  return current_frame_info.total_size == 0;
+}
+
+
+/* PIC.  */
+
+int
+m32r_legitimate_pic_operand_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    return 0;
+
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+          || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
+      && (CONST_INT_P (XEXP (XEXP (x, 0), 1))))
+    return 0;
+
+  return 1;
+}
+
+rtx
+m32r_legitimize_pic_address (rtx orig, rtx reg)
+{
+#ifdef DEBUG_PIC
+  printf("m32r_legitimize_pic_address()\n");
+#endif
+
+  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
+    {
+      rtx pic_ref, address;
+      int subregs = 0;
+
+      if (reg == 0)
+        {
+          gcc_assert (!reload_in_progress && !reload_completed);
+	  reg = gen_reg_rtx (Pmode);
+
+          subregs = 1;
+        }
+
+      if (subregs)
+        address = gen_reg_rtx (Pmode);
+      else
+        address = reg;
+
+      crtl->uses_pic_offset_table = 1;
+
+      if (GET_CODE (orig) == LABEL_REF
+          || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
+        {
+          emit_insn (gen_gotoff_load_addr (reg, orig));
+          emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+          return reg;
+        }
+
+      emit_insn (gen_pic_load_addr (address, orig));
+
+      emit_insn (gen_addsi3 (address, address, pic_offset_table_rtx));
+      pic_ref = gen_const_mem (Pmode, address);
+      emit_move_insn (reg, pic_ref);
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+          && XEXP (XEXP (orig, 0), 1) == pic_offset_table_rtx)
+        return orig;
+
+      if (reg == 0)
+        {
+          gcc_assert (!reload_in_progress && !reload_completed);
+	  reg = gen_reg_rtx (Pmode);
+        }
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS)
+        {
+          base = m32r_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
+          if (base == reg)
+            offset = m32r_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), NULL_RTX);
+          else
+            offset = m32r_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), reg);
+        }
+      else
+        return orig;
+
+      if (CONST_INT_P (offset))
+        {
+          if (INT16_P (INTVAL (offset)))
+            return plus_constant (Pmode, base, INTVAL (offset));
+          else
+	    {
+	      gcc_assert (! reload_in_progress && ! reload_completed);
+	      offset = force_reg (Pmode, offset);
+	    }
+        }
+
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  return orig;
+}
+
+static rtx
+m32r_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic)
+    return m32r_legitimize_pic_address (x, NULL_RTX);
+  else
+    return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.  */
+
+static bool
+m32r_mode_dependent_address_p (const_rtx addr, addr_space_t as ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (addr) == LO_SUM)
+    return true;
+
+  return false;
+}
+
+/* Nested function support.  */
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+void
+m32r_initialize_trampoline (rtx tramp ATTRIBUTE_UNUSED,
+			    rtx fnaddr ATTRIBUTE_UNUSED,
+			    rtx cxt ATTRIBUTE_UNUSED)
+{
+}
+
+static void
+m32r_file_start (void)
+{
+  default_file_start ();
+
+  if (flag_verbose_asm)
+    fprintf (asm_out_file,
+	     "%s M32R/D special options: -G %d\n",
+	     ASM_COMMENT_START, g_switch_value);
+
+  if (TARGET_LITTLE_ENDIAN)
+    fprintf (asm_out_file, "\t.little\n");
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+static void
+m32r_print_operand (FILE * file, rtx x, int code)
+{
+  rtx addr;
+
+  switch (code)
+    {
+      /* The 's' and 'p' codes are used by output_block_move() to
+	 indicate post-increment 's'tores and 'p're-increment loads.  */
+    case 's':
+      if (REG_P (x))
+	fprintf (file, "@+%s", reg_names [REGNO (x)]);
+      else
+	output_operand_lossage ("invalid operand to %%s code");
+      return;
+
+    case 'p':
+      if (REG_P (x))
+	fprintf (file, "@%s+", reg_names [REGNO (x)]);
+      else
+	output_operand_lossage ("invalid operand to %%p code");
+      return;
+
+    case 'R' :
+      /* Write second word of DImode or DFmode reference,
+	 register or memory.  */
+      if (REG_P (x))
+	fputs (reg_names[REGNO (x)+1], file);
+      else if (MEM_P (x))
+	{
+	  fprintf (file, "@(");
+	  /* Handle possible auto-increment.  Since it is pre-increment and
+	     we have already done it, we can just use an offset of four.  */
+	  /* ??? This is taken from rs6000.c I think.  I don't think it is
+	     currently necessary, but keep it around.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
+	  else
+	    output_address (plus_constant (Pmode, XEXP (x, 0), 4));
+	  fputc (')', file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%R code");
+      return;
+
+    case 'H' : /* High word.  */
+    case 'L' : /* Low word.  */
+      if (REG_P (x))
+	{
+	  /* L = least significant word, H = most significant word.  */
+	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
+	    fputs (reg_names[REGNO (x)], file);
+	  else
+	    fputs (reg_names[REGNO (x)+1], file);
+	}
+      else if (CONST_INT_P (x)
+	       || GET_CODE (x) == CONST_DOUBLE)
+	{
+	  rtx first, second;
+
+	  split_double (x, &first, &second);
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		   code == 'L' ? INTVAL (first) : INTVAL (second));
+	}
+      else
+	output_operand_lossage ("invalid operand to %%H/%%L code");
+      return;
+
+    case 'A' :
+      {
+	char str[30];
+
+	if (GET_CODE (x) != CONST_DOUBLE
+	    || GET_MODE_CLASS (GET_MODE (x)) != MODE_FLOAT)
+	  fatal_insn ("bad insn for 'A'", x);
+
+	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
+	fprintf (file, "%s", str);
+	return;
+      }
+
+    case 'B' : /* Bottom half.  */
+    case 'T' : /* Top half.  */
+      /* Output the argument to a `seth' insn (sets the Top half-word).
+	 For constants output arguments to a seth/or3 pair to set Top and
+	 Bottom halves.  For symbols output arguments to a seth/add3 pair to
+	 set Top and Bottom halves.  The difference exists because for
+	 constants seth/or3 is more readable but for symbols we need to use
+	 the same scheme as `ld' and `st' insns (16-bit addend is signed).  */
+      switch (GET_CODE (x))
+	{
+	case CONST_INT :
+	case CONST_DOUBLE :
+	  {
+	    rtx first, second;
+
+	    split_double (x, &first, &second);
+	    x = WORDS_BIG_ENDIAN ? second : first;
+	    fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		     (code == 'B'
+		      ? INTVAL (x) & 0xffff
+		      : (INTVAL (x) >> 16) & 0xffff));
+	  }
+	  return;
+	case CONST :
+	case SYMBOL_REF :
+	  if (code == 'B'
+	      && small_data_operand (x, VOIDmode))
+	    {
+	      fputs ("sda(", file);
+	      output_addr_const (file, x);
+	      fputc (')', file);
+	      return;
+	    }
+	  /* fall through */
+	case LABEL_REF :
+	  fputs (code == 'T' ? "shigh(" : "low(", file);
+	  output_addr_const (file, x);
+	  fputc (')', file);
+	  return;
+	default :
+	  output_operand_lossage ("invalid operand to %%T/%%B code");
+	  return;
+	}
+      break;
+
+    case 'U' :
+      /* ??? wip */
+      /* Output a load/store with update indicator if appropriate.  */
+      if (MEM_P (x))
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    fputs (".a", file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%U code");
+      return;
+
+    case 'N' :
+      /* Print a constant value negated.  */
+      if (CONST_INT_P (x))
+	output_addr_const (file, GEN_INT (- INTVAL (x)));
+      else
+	output_operand_lossage ("invalid operand to %%N code");
+      return;
+
+    case 'X' :
+      /* Print a const_int in hex.  Used in comments.  */
+      if (CONST_INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      return;
+
+    case '#' :
+      fputs (IMMEDIATE_PREFIX, file);
+      return;
+
+    case 0 :
+      /* Do nothing special.  */
+      break;
+
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG :
+      fputs (reg_names[REGNO (x)], file);
+      break;
+
+    case MEM :
+      addr = XEXP (x, 0);
+      if (GET_CODE (addr) == PRE_INC)
+	{
+	  if (!REG_P (XEXP (addr, 0)))
+	    fatal_insn ("pre-increment address is not a register", x);
+
+	  fprintf (file, "@+%s", reg_names[REGNO (XEXP (addr, 0))]);
+	}
+      else if (GET_CODE (addr) == PRE_DEC)
+	{
+	  if (!REG_P (XEXP (addr, 0)))
+	    fatal_insn ("pre-decrement address is not a register", x);
+
+	  fprintf (file, "@-%s", reg_names[REGNO (XEXP (addr, 0))]);
+	}
+      else if (GET_CODE (addr) == POST_INC)
+	{
+	  if (!REG_P (XEXP (addr, 0)))
+	    fatal_insn ("post-increment address is not a register", x);
+
+	  fprintf (file, "@%s+", reg_names[REGNO (XEXP (addr, 0))]);
+	}
+      else
+	{
+	  fputs ("@(", file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	}
+      break;
+
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+
+      /* Fall through.  Let output_addr_const deal with it.  */
+
+    default :
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+static void
+m32r_print_operand_address (FILE * file, rtx addr)
+{
+  rtx base;
+  rtx index = 0;
+  int offset = 0;
+
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      fputs (reg_names[REGNO (addr)], file);
+      break;
+
+    case PLUS :
+      if (CONST_INT_P (XEXP (addr, 0)))
+	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
+      else if (CONST_INT_P (XEXP (addr, 1)))
+	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+      if (REG_P (base))
+	{
+	  /* Print the offset first (if present) to conform to the manual.  */
+	  if (index == 0)
+	    {
+	      if (offset != 0)
+		fprintf (file, "%d,", offset);
+	      fputs (reg_names[REGNO (base)], file);
+	    }
+	  /* The chip doesn't support this, but left in for generality.  */
+	  else if (REG_P (index))
+	    fprintf (file, "%s,%s",
+		     reg_names[REGNO (base)], reg_names[REGNO (index)]);
+	  /* Not sure this can happen, but leave in for now.  */
+	  else if (GET_CODE (index) == SYMBOL_REF)
+	    {
+	      output_addr_const (file, index);
+	      fputc (',', file);
+	      fputs (reg_names[REGNO (base)], file);
+	    }
+	  else
+	    fatal_insn ("bad address", addr);
+	}
+      else if (GET_CODE (base) == LO_SUM)
+	{
+	  gcc_assert (!index && REG_P (XEXP (base, 0)));
+	  if (small_data_operand (XEXP (base, 1), VOIDmode))
+	    fputs ("sda(", file);
+	  else
+	    fputs ("low(", file);
+	  output_addr_const (file, plus_constant (Pmode, XEXP (base, 1),
+						  offset));
+	  fputs ("),", file);
+	  fputs (reg_names[REGNO (XEXP (base, 0))], file);
+	}
+      else
+	fatal_insn ("bad address", addr);
+      break;
+
+    case LO_SUM :
+      if (!REG_P (XEXP (addr, 0)))
+	fatal_insn ("lo_sum not of register", addr);
+      if (small_data_operand (XEXP (addr, 1), VOIDmode))
+	fputs ("sda(", file);
+      else
+	fputs ("low(", file);
+      output_addr_const (file, XEXP (addr, 1));
+      fputs ("),", file);
+      fputs (reg_names[REGNO (XEXP (addr, 0))], file);
+      break;
+
+    case PRE_INC :	/* Assume SImode.  */
+      fprintf (file, "+%s", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PRE_DEC :	/* Assume SImode.  */
+      fprintf (file, "-%s", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_INC :	/* Assume SImode.  */
+      fprintf (file, "%s+", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    default :
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static bool
+m32r_print_operand_punct_valid_p (unsigned char code)
+{
+  return m32r_punct_chars[code];
+}
+
+/* Return true if the operands are the constants 0 and 1.  */
+
+int
+zero_and_one (rtx operand1, rtx operand2)
+{
+  return
+       CONST_INT_P (operand1)
+    && CONST_INT_P (operand2)
+    && (  ((INTVAL (operand1) == 0) && (INTVAL (operand2) == 1))
+	||((INTVAL (operand1) == 1) && (INTVAL (operand2) == 0)));
+}
+
+/* Generate the correct assembler code to handle the conditional loading of a
+   value into a register.  It is known that the operands satisfy the
+   conditional_move_operand() function above.  The destination is operand[0].
+   The condition is operand [1].  The 'true' value is operand [2] and the
+   'false' value is operand [3].  */
+
+char *
+emit_cond_move (rtx * operands, rtx insn ATTRIBUTE_UNUSED)
+{
+  static char buffer [100];
+  const char * dest = reg_names [REGNO (operands [0])];
+
+  buffer [0] = 0;
+
+  /* Destination must be a register.  */
+  gcc_assert (REG_P (operands [0]));
+  gcc_assert (conditional_move_operand (operands [2], SImode));
+  gcc_assert (conditional_move_operand (operands [3], SImode));
+
+  /* Check to see if the test is reversed.  */
+  if (GET_CODE (operands [1]) == NE)
+    {
+      rtx tmp = operands [2];
+      operands [2] = operands [3];
+      operands [3] = tmp;
+    }
+
+  sprintf (buffer, "mvfc %s, cbr", dest);
+
+  /* If the true value was '0' then we need to invert the results of the move.  */
+  if (INTVAL (operands [2]) == 0)
+    sprintf (buffer + strlen (buffer), "\n\txor3 %s, %s, #1",
+	     dest, dest);
+
+  return buffer;
+}
+
+/* Returns true if the registers contained in the two
+   rtl expressions are different.  */
+
+int
+m32r_not_same_reg (rtx a, rtx b)
+{
+  int reg_a = -1;
+  int reg_b = -2;
+
+  while (GET_CODE (a) == SUBREG)
+    a = SUBREG_REG (a);
+
+  if (REG_P (a))
+    reg_a = REGNO (a);
+
+  while (GET_CODE (b) == SUBREG)
+    b = SUBREG_REG (b);
+
+  if (REG_P (b))
+    reg_b = REGNO (b);
+
+  return reg_a != reg_b;
+}
+
+
+rtx
+m32r_function_symbol (const char *name)
+{
+  int extra_flags = 0;
+  enum m32r_model model;
+  rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+
+  if (TARGET_MODEL_SMALL)
+    model = M32R_MODEL_SMALL;
+  else if (TARGET_MODEL_MEDIUM)
+    model = M32R_MODEL_MEDIUM;
+  else if (TARGET_MODEL_LARGE)
+    model = M32R_MODEL_LARGE;
+  else
+    gcc_unreachable (); /* Shouldn't happen.  */
+  extra_flags |= model << SYMBOL_FLAG_MODEL_SHIFT;
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (sym) |= extra_flags;
+
+  return sym;
+}
+
+/* Use a library function to move some bytes.  */
+
+static void
+block_move_call (rtx dest_reg, rtx src_reg, rtx bytes_rtx)
+{
+  /* We want to pass the size as Pmode, which will normally be SImode
+     but will be DImode if we are using 64-bit longs and pointers.  */
+  if (GET_MODE (bytes_rtx) != VOIDmode
+      && GET_MODE (bytes_rtx) != Pmode)
+    bytes_rtx = convert_to_mode (Pmode, bytes_rtx, 1);
+
+  emit_library_call (m32r_function_symbol ("memcpy"), LCT_NORMAL,
+		     VOIDmode, 3, dest_reg, Pmode, src_reg, Pmode,
+		     convert_to_mode (TYPE_MODE (sizetype), bytes_rtx,
+				      TYPE_UNSIGNED (sizetype)),
+		     TYPE_MODE (sizetype));
+}
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.
+
+   Returns 1 upon success, 0 otherwise.  */
+
+int
+m32r_expand_block_move (rtx operands[])
+{
+  rtx           orig_dst  = operands[0];
+  rtx           orig_src  = operands[1];
+  rtx           bytes_rtx = operands[2];
+  rtx           align_rtx = operands[3];
+  int           constp    = CONST_INT_P (bytes_rtx);
+  HOST_WIDE_INT bytes     = constp ? INTVAL (bytes_rtx) : 0;
+  int           align     = INTVAL (align_rtx);
+  int           leftover;
+  rtx           src_reg;
+  rtx           dst_reg;
+
+  if (constp && bytes <= 0)
+    return 1;
+
+  /* Move the address into scratch registers.  */
+  dst_reg = copy_addr_to_reg (XEXP (orig_dst, 0));
+  src_reg = copy_addr_to_reg (XEXP (orig_src, 0));
+
+  if (align > UNITS_PER_WORD)
+    align = UNITS_PER_WORD;
+
+  /* If we prefer size over speed, always use a function call.
+     If we do not know the size, use a function call.
+     If the blocks are not word aligned, use a function call.  */
+  if (optimize_size || ! constp || align != UNITS_PER_WORD)
+    {
+      block_move_call (dst_reg, src_reg, bytes_rtx);
+      return 0;
+    }
+
+  leftover = bytes % MAX_MOVE_BYTES;
+  bytes   -= leftover;
+
+  /* If necessary, generate a loop to handle the bulk of the copy.  */
+  if (bytes)
+    {
+      rtx label = NULL_RTX;
+      rtx final_src = NULL_RTX;
+      rtx at_a_time = GEN_INT (MAX_MOVE_BYTES);
+      rtx rounded_total = GEN_INT (bytes);
+      rtx new_dst_reg = gen_reg_rtx (SImode);
+      rtx new_src_reg = gen_reg_rtx (SImode);
+
+      /* If we are going to have to perform this loop more than
+	 once, then generate a label and compute the address the
+	 source register will contain upon completion of the final
+	 iteration.  */
+      if (bytes > MAX_MOVE_BYTES)
+	{
+	  final_src = gen_reg_rtx (Pmode);
+
+	  if (INT16_P(bytes))
+	    emit_insn (gen_addsi3 (final_src, src_reg, rounded_total));
+	  else
+	    {
+	      emit_insn (gen_movsi (final_src, rounded_total));
+	      emit_insn (gen_addsi3 (final_src, final_src, src_reg));
+	    }
+
+	  label = gen_label_rtx ();
+	  emit_label (label);
+	}
+
+      /* It is known that output_block_move() will update src_reg to point
+	 to the word after the end of the source block, and dst_reg to point
+	 to the last word of the destination block, provided that the block
+	 is MAX_MOVE_BYTES long.  */
+      emit_insn (gen_movmemsi_internal (dst_reg, src_reg, at_a_time,
+					new_dst_reg, new_src_reg));
+      emit_move_insn (dst_reg, new_dst_reg);
+      emit_move_insn (src_reg, new_src_reg);
+      emit_insn (gen_addsi3 (dst_reg, dst_reg, GEN_INT (4)));
+
+      if (bytes > MAX_MOVE_BYTES)
+	{
+	  rtx test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+	  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+	}
+    }
+
+  if (leftover)
+    emit_insn (gen_movmemsi_internal (dst_reg, src_reg, GEN_INT (leftover),
+				      gen_reg_rtx (SImode),
+				      gen_reg_rtx (SImode)));
+  return 1;
+}
+
+
+/* Emit load/stores for a small constant word aligned block_move.
+
+   operands[0] is the memory address of the destination.
+   operands[1] is the memory address of the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is a temp register.
+   operands[4] is a temp register.  */
+
+void
+m32r_output_block_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
+{
+  HOST_WIDE_INT bytes = INTVAL (operands[2]);
+  int		first_time;
+  int		got_extra = 0;
+
+  gcc_assert (bytes >= 1 && bytes <= MAX_MOVE_BYTES);
+
+  /* We do not have a post-increment store available, so the first set of
+     stores are done without any increment, then the remaining ones can use
+     the pre-increment addressing mode.
+
+     Note: expand_block_move() also relies upon this behavior when building
+     loops to copy large blocks.  */
+  first_time = 1;
+
+  while (bytes > 0)
+    {
+      if (bytes >= 8)
+	{
+	  if (first_time)
+	    {
+	      output_asm_insn ("ld\t%5, %p1", operands);
+	      output_asm_insn ("ld\t%6, %p1", operands);
+	      output_asm_insn ("st\t%5, @%0", operands);
+	      output_asm_insn ("st\t%6, %s0", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ld\t%5, %p1", operands);
+	      output_asm_insn ("ld\t%6, %p1", operands);
+	      output_asm_insn ("st\t%5, %s0", operands);
+	      output_asm_insn ("st\t%6, %s0", operands);
+	    }
+
+	  bytes -= 8;
+	}
+      else if (bytes >= 4)
+	{
+	  if (bytes > 4)
+	    got_extra = 1;
+
+	  output_asm_insn ("ld\t%5, %p1", operands);
+
+	  if (got_extra)
+	    output_asm_insn ("ld\t%6, %p1", operands);
+
+	  if (first_time)
+	    output_asm_insn ("st\t%5, @%0", operands);
+	  else
+	    output_asm_insn ("st\t%5, %s0", operands);
+
+	  bytes -= 4;
+	}
+      else
+	{
+	  /* Get the entire next word, even though we do not want all of it.
+	     The saves us from doing several smaller loads, and we assume that
+	     we cannot cause a page fault when at least part of the word is in
+	     valid memory [since we don't get called if things aren't properly
+	     aligned].  */
+	  int dst_offset = first_time ? 0 : 4;
+	  /* The amount of increment we have to make to the
+	     destination pointer.  */
+	  int dst_inc_amount = dst_offset + bytes - 4;
+	  /* The same for the source pointer.  */
+	  int src_inc_amount = bytes;
+	  int last_shift;
+	  rtx my_operands[3];
+
+	  /* If got_extra is true then we have already loaded
+	     the next word as part of loading and storing the previous word.  */
+	  if (! got_extra)
+	    output_asm_insn ("ld\t%6, @%1", operands);
+
+	  if (bytes >= 2)
+	    {
+	      bytes -= 2;
+
+	      output_asm_insn ("sra3\t%5, %6, #16", operands);
+	      my_operands[0] = operands[5];
+	      my_operands[1] = GEN_INT (dst_offset);
+	      my_operands[2] = operands[0];
+	      output_asm_insn ("sth\t%0, @(%1,%2)", my_operands);
+
+	      /* If there is a byte left to store then increment the
+		 destination address and shift the contents of the source
+		 register down by 8 bits.  We could not do the address
+		 increment in the store half word instruction, because it does
+		 not have an auto increment mode.  */
+	      if (bytes > 0)  /* assert (bytes == 1) */
+		{
+		  dst_offset += 2;
+		  last_shift = 8;
+		}
+	    }
+	  else
+	    last_shift = 24;
+
+	  if (bytes > 0)
+	    {
+	      my_operands[0] = operands[6];
+	      my_operands[1] = GEN_INT (last_shift);
+	      output_asm_insn ("srai\t%0, #%1", my_operands);
+	      my_operands[0] = operands[6];
+	      my_operands[1] = GEN_INT (dst_offset);
+	      my_operands[2] = operands[0];
+	      output_asm_insn ("stb\t%0, @(%1,%2)", my_operands);
+	    }
+
+	  /* Update the destination pointer if needed.  We have to do
+	     this so that the patterns matches what we output in this
+	     function.  */
+	  if (dst_inc_amount
+	      && !find_reg_note (insn, REG_UNUSED, operands[0]))
+	    {
+	      my_operands[0] = operands[0];
+	      my_operands[1] = GEN_INT (dst_inc_amount);
+	      output_asm_insn ("addi\t%0, #%1", my_operands);
+	    }
+
+	  /* Update the source pointer if needed.  We have to do this
+	     so that the patterns matches what we output in this
+	     function.  */
+	  if (src_inc_amount
+	      && !find_reg_note (insn, REG_UNUSED, operands[1]))
+	    {
+	      my_operands[0] = operands[1];
+	      my_operands[1] = GEN_INT (src_inc_amount);
+	      output_asm_insn ("addi\t%0, #%1", my_operands);
+	    }
+
+	  bytes = 0;
+	}
+
+      first_time = 0;
+    }
+}
+
+/* Return true if using NEW_REG in place of OLD_REG is ok.  */
+
+int
+m32r_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			   unsigned int new_reg)
+{
+  /* Interrupt routines can't clobber any register that isn't already used.  */
+  if (lookup_attribute ("interrupt", DECL_ATTRIBUTES (current_function_decl))
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+rtx
+m32r_return_addr (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
+}
+
+static void
+m32r_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  emit_move_insn (adjust_address (m_tramp, SImode, 0),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0x017e8e17 : 0x178e7e01, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 4),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0x0c00ae86 : 0x86ae000c, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 8),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0xe627871e : 0x1e8727e6, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 12),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0xc616c626 : 0x26c61fc6, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 16),
+		  chain_value);
+  emit_move_insn (adjust_address (m_tramp, SImode, 20),
+		  XEXP (DECL_RTL (fndecl), 0));
+
+  if (m32r_cache_flush_trap >= 0)
+    emit_insn (gen_flush_icache
+	       (validize_mem (adjust_address (m_tramp, SImode, 0)),
+		gen_int_mode (m32r_cache_flush_trap, SImode)));
+  else if (m32r_cache_flush_func && m32r_cache_flush_func[0])
+    emit_library_call (m32r_function_symbol (m32r_cache_flush_func),
+		       LCT_NORMAL, VOIDmode, 3, XEXP (m_tramp, 0), Pmode,
+		       gen_int_mode (TRAMPOLINE_SIZE, SImode), SImode,
+		       GEN_INT (3), SImode);
+}
+
+/* True if X is a reg that can be used as a base reg.  */
+
+static bool
+m32r_rtx_ok_for_base_p (const_rtx x, bool strict)
+{
+  if (! REG_P (x))
+    return false;
+
+  if (strict)
+    {
+      if (GPR_P (REGNO (x)))
+	return true;
+    }
+  else
+    {
+      if (GPR_P (REGNO (x))
+	  || REGNO (x) == ARG_POINTER_REGNUM
+	  || ! HARD_REGISTER_P (x))
+	return true;
+    }
+
+  return false;
+}
+
+static inline bool
+m32r_rtx_ok_for_offset_p (const_rtx x)
+{
+  return (CONST_INT_P (x) && INT16_P (INTVAL (x)));
+}
+
+static inline bool
+m32r_legitimate_offset_addres_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				 const_rtx x, bool strict)
+{
+  if (GET_CODE (x) == PLUS
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict)
+      && m32r_rtx_ok_for_offset_p (XEXP (x, 1)))
+    return true;
+
+  return false;
+}
+
+/* For LO_SUM addresses, do not allow them if the MODE is > 1 word,
+   since more than one instruction will be required.  */
+
+static inline bool
+m32r_legitimate_lo_sum_addres_p (enum machine_mode mode, const_rtx x,
+				 bool strict)
+{
+  if (GET_CODE (x) == LO_SUM
+      && (mode != BLKmode && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict)
+      && CONSTANT_P (XEXP (x, 1)))
+    return true;
+
+  return false;
+}
+
+/* Is this a load and increment operation.  */
+
+static inline bool
+m32r_load_postinc_p (enum machine_mode mode, const_rtx x, bool strict)
+{
+  if ((mode == SImode || mode == SFmode)
+      && GET_CODE (x) == POST_INC
+      && REG_P (XEXP (x, 0))
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict))
+    return true;
+
+  return false;
+}
+
+/* Is this an increment/decrement and store operation.  */
+
+static inline bool
+m32r_store_preinc_predec_p (enum machine_mode mode, const_rtx x, bool strict)
+{
+  if ((mode == SImode || mode == SFmode)
+      && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+      && REG_P (XEXP (x, 0))                           \
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict))
+    return true;
+
+  return false;
+}
+
+/* Implement  TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+m32r_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (m32r_rtx_ok_for_base_p (x, strict)
+      || m32r_legitimate_offset_addres_p (mode, x, strict)
+      || m32r_legitimate_lo_sum_addres_p (mode, x, strict)
+      || m32r_load_postinc_p (mode, x, strict)
+      || m32r_store_preinc_predec_p (mode, x, strict))
+    return true;
+
+  return false;
+}
+
+static void
+m32r_conditional_register_usage (void)
+{
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P
+
+   We don't allow (plus symbol large-constant) as the relocations can't
+   describe it.  INTVAL > 32767 handles both 16-bit and 24-bit relocations.
+   We allow all CONST_DOUBLE's as the md file patterns will force the
+   constant to memory if they can't handle them.  */
+
+static bool
+m32r_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return !(GET_CODE (x) == CONST
+	   && GET_CODE (XEXP (x, 0)) == PLUS
+	   && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
+	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	   && UINTVAL (XEXP (XEXP (x, 0), 1)) > 32767);
+}
diff --git a/gcc-4.9/gcc/config/m32r/m32r.h b/gcc-4.9/gcc/config/m32r/m32r.h
new file mode 100644
index 000000000..485137929
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/m32r.h
@@ -0,0 +1,1051 @@
+/* Definitions of target machine for GNU compiler, Renesas M32R cpu.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Things to do:
+- longlong.h?
+*/
+
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
+#undef CPP_SPEC
+#undef ASM_SPEC
+#undef LINK_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+#undef ASM_APP_ON
+#undef ASM_APP_OFF
+
+
+/* M32R/X overrides.  */
+
+/* Additional flags for the preprocessor.  */
+#define CPP_CPU_SPEC "%{m32rx:-D__M32RX__ -D__m32rx__ -U__M32R2__ -U__m32r2__} \
+%{m32r2:-D__M32R2__ -D__m32r2__ -U__M32RX__ -U__m32rx__} \
+%{m32r:-U__M32RX__  -U__m32rx__ -U__M32R2__ -U__m32r2__} \
+ "
+
+/* Assembler switches.  */
+#define ASM_CPU_SPEC \
+"%{m32r} %{m32rx} %{m32r2} %{!O0: %{O*: -O}} --no-warn-explicit-parallel-conflicts"
+
+/* Use m32rx specific crt0/crtinit/crtfini files.  */
+#define STARTFILE_CPU_SPEC "%{!shared:crt0.o%s} %{m32rx:m32rx/crtinit.o%s} %{!m32rx:crtinit.o%s}"
+#define ENDFILE_CPU_SPEC "-lgloss %{m32rx:m32rx/crtfini.o%s} %{!m32rx:crtfini.o%s}"
+
+/* Define this macro as a C expression for the initializer of an array of
+   strings to tell the driver program which options are defaults for this
+   target and thus do not need to be handled specially when using
+   `MULTILIB_OPTIONS'.  */
+#define SUBTARGET_MULTILIB_DEFAULTS , "m32r"
+
+/* Number of additional registers the subtarget defines.  */
+#define SUBTARGET_NUM_REGISTERS 1
+
+/* 1 for registers that cannot be allocated.  */
+#define SUBTARGET_FIXED_REGISTERS , 1
+
+/* 1 for registers that are not available across function calls.  */
+#define SUBTARGET_CALL_USED_REGISTERS , 1
+
+/* Order to allocate model specific registers.  */
+#define SUBTARGET_REG_ALLOC_ORDER , 19
+
+/* Registers which are accumulators.  */
+#define SUBTARGET_REG_CLASS_ACCUM 0x80000
+
+/* All registers added.  */
+#define SUBTARGET_REG_CLASS_ALL SUBTARGET_REG_CLASS_ACCUM
+
+/* Additional accumulator registers.  */
+#define SUBTARGET_ACCUM_P(REGNO) ((REGNO) == 19)
+
+/* Define additional register names.  */
+#define SUBTARGET_REGISTER_NAMES , "a1"
+/* end M32R/X overrides.  */
+
+/* Names to predefine in the preprocessor for this target machine.  */
+/* __M32R__ is defined by the existing compiler so we use that.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__M32R__");		\
+      builtin_define ("__m32r__");		\
+      builtin_assert ("cpu=m32r");		\
+      builtin_assert ("machine=m32r");		\
+      builtin_define (TARGET_BIG_ENDIAN		\
+                      ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); \
+    }						\
+  while (0)
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef	ASM_CPU_SPEC
+#define	ASM_CPU_SPEC ""
+#endif
+
+#ifndef	CPP_CPU_SPEC
+#define	CPP_CPU_SPEC ""
+#endif
+
+#ifndef	CC1_CPU_SPEC
+#define	CC1_CPU_SPEC ""
+#endif
+
+#ifndef	LINK_CPU_SPEC
+#define	LINK_CPU_SPEC ""
+#endif
+
+#ifndef STARTFILE_CPU_SPEC
+#define STARTFILE_CPU_SPEC "%{!shared:crt0.o%s} crtinit.o%s"
+#endif
+
+#ifndef ENDFILE_CPU_SPEC
+#define ENDFILE_CPU_SPEC "-lgloss crtfini.o%s"
+#endif
+
+#ifndef RELAX_SPEC
+#if 0 /* Not supported yet.  */
+#define RELAX_SPEC "%{mrelax:-relax}"
+#else
+#define RELAX_SPEC ""
+#endif
+#endif
+
+#define EXTRA_SPECS							\
+  { "asm_cpu",			ASM_CPU_SPEC },				\
+  { "cpp_cpu",			CPP_CPU_SPEC },				\
+  { "cc1_cpu",			CC1_CPU_SPEC },				\
+  { "link_cpu",			LINK_CPU_SPEC },			\
+  { "startfile_cpu",		STARTFILE_CPU_SPEC },			\
+  { "endfile_cpu",		ENDFILE_CPU_SPEC },			\
+  { "relax",			RELAX_SPEC },				\
+  SUBTARGET_EXTRA_SPECS
+
+#define CPP_SPEC "%(cpp_cpu)"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{G*} %(cc1_cpu)"
+
+/* Options to pass on to the assembler.  */
+#undef  ASM_SPEC
+#define ASM_SPEC "%(asm_cpu) %(relax) %{fpic|fpie:-K PIC} %{fPIC|fPIE:-K PIC}"
+
+#define LINK_SPEC "%{v} %(link_cpu) %(relax)"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%(startfile_cpu)"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "%(endfile_cpu)"
+
+#undef LIB_SPEC
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+#define TARGET_M32R             (! TARGET_M32RX && ! TARGET_M32R2)
+
+#ifndef TARGET_LITTLE_ENDIAN
+#define TARGET_LITTLE_ENDIAN	0
+#endif
+#define TARGET_BIG_ENDIAN       (! TARGET_LITTLE_ENDIAN)
+
+/* This defaults us to m32r.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef M32R_OPTS_H
+#include "config/m32r/m32r-opts.h"
+#endif
+
+/* Define this macro as a C expression for the initializer of an array of
+   strings to tell the driver program which options are defaults for this
+   target and thus do not need to be handled specially when using
+   `MULTILIB_OPTIONS'.  */
+#ifndef SUBTARGET_MULTILIB_DEFAULTS
+#define SUBTARGET_MULTILIB_DEFAULTS
+#endif
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mmodel=small" SUBTARGET_MULTILIB_DEFAULTS }
+#endif
+
+#ifndef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS
+#endif
+
+/* Target machine storage layout.  */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases, 
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)	\
+    {						\
+      (MODE) = SImode;				\
+    }
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 32
+
+/* ALIGN FRAMES on word boundaries */
+#define M32R_STACK_ALIGN(LOC) (((LOC) + 3) & ~ 3)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)	\
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  (TREE_CODE (TYPE) == ARRAY_TYPE					\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode				\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define LAVEL_ALIGN to calculate code length of PNOP at labels.  */
+#define LABEL_ALIGN(insn) 2
+
+/* Layout of source language data types.  */
+
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define M32R_NUM_REGISTERS 	19
+
+#ifndef SUBTARGET_NUM_REGISTERS
+#define SUBTARGET_NUM_REGISTERS 0
+#endif
+
+#define FIRST_PSEUDO_REGISTER (M32R_NUM_REGISTERS + SUBTARGET_NUM_REGISTERS)
+	
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   0-3   - arguments/results
+   4-5   - call used [4 is used as a tmp during prologue/epilogue generation]
+   6     - call used, gptmp
+   7     - call used, static chain pointer
+   8-11  - call saved
+   12    - call saved [reserved for global pointer]
+   13    - frame pointer
+   14    - subroutine link register
+   15    - stack pointer
+   16    - arg pointer
+   17    - carry flag
+   18	 - accumulator
+   19    - accumulator 1 in the m32r/x
+   By default, the extension registers are not available.  */
+
+#ifndef SUBTARGET_FIXED_REGISTERS
+#define SUBTARGET_FIXED_REGISTERS
+#endif
+
+#define FIXED_REGISTERS		\
+{				\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 1,	\
+  1, 1, 1			\
+  SUBTARGET_FIXED_REGISTERS	\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#ifndef SUBTARGET_CALL_USED_REGISTERS
+#define SUBTARGET_CALL_USED_REGISTERS
+#endif
+
+#define CALL_USED_REGISTERS	\
+{				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 1, 1,	\
+  1, 1, 1			\
+  SUBTARGET_CALL_USED_REGISTERS	\
+}
+
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+/* If defined, an initializer for a vector of integers, containing the
+   numbers of hard registers in the order in which GCC should
+   prefer to use them (from most preferred to least).  */
+
+#ifndef SUBTARGET_REG_ALLOC_ORDER
+#define SUBTARGET_REG_ALLOC_ORDER
+#endif
+
+#if 1 /* Better for int code.  */
+#define REG_ALLOC_ORDER				\
+{						\
+  4,  5,  6,  7,  2,  3,  8,  9, 10,		\
+  11, 12, 13, 14,  0,  1, 15, 16, 17, 18	\
+  SUBTARGET_REG_ALLOC_ORDER			\
+}
+
+#else /* Better for fp code at expense of int code.  */
+#define REG_ALLOC_ORDER				\
+{						\
+   0,  1,  2,  3,  4,  5,  6,  7,  8,		\
+   9, 10, 11, 12, 13, 14, 15, 16, 17, 18	\
+  SUBTARGET_REG_ALLOC_ORDER			\
+}
+#endif
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+extern const unsigned int m32r_hard_regno_mode_ok[FIRST_PSEUDO_REGISTER];
+extern unsigned int m32r_mode_class[];
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((m32r_hard_regno_mode_ok[REGNO] & m32r_mode_class[MODE]) != 0)
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero.  */
+
+/* Tie QI/HI/SI modes together.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 		\
+  (   GET_MODE_CLASS (MODE1) == MODE_INT	\
+   && GET_MODE_CLASS (MODE2) == MODE_INT	\
+   && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+   && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  m32r_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Register classes and constants.  */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It is important that any condition codes have class NO_REGS.
+   See `register_operand'.  */
+
+enum reg_class
+{
+  NO_REGS, CARRY_REG, ACCUM_REGS, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES \
+  { "NO_REGS", "CARRY_REG", "ACCUM_REGS", "GENERAL_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#ifndef SUBTARGET_REG_CLASS_CARRY
+#define SUBTARGET_REG_CLASS_CARRY 0
+#endif
+
+#ifndef SUBTARGET_REG_CLASS_ACCUM
+#define SUBTARGET_REG_CLASS_ACCUM 0
+#endif
+
+#ifndef SUBTARGET_REG_CLASS_GENERAL
+#define SUBTARGET_REG_CLASS_GENERAL 0
+#endif
+
+#ifndef SUBTARGET_REG_CLASS_ALL
+#define SUBTARGET_REG_CLASS_ALL 0
+#endif
+
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000 },								\
+  { 0x20000 | SUBTARGET_REG_CLASS_CARRY },				\
+  { 0x40000 | SUBTARGET_REG_CLASS_ACCUM },				\
+  { 0x1ffff | SUBTARGET_REG_CLASS_GENERAL },				\
+  { 0x7ffff | SUBTARGET_REG_CLASS_ALL },				\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+extern enum reg_class m32r_regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) (m32r_regno_reg_class[REGNO])
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  ((REGNO) < FIRST_PSEUDO_REGISTER			\
+   ? GPR_P (REGNO) || (REGNO) == ARG_POINTER_REGNUM	\
+   : GPR_P (reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* Return true if a value is inside a range.  */
+#define IN_RANGE_P(VALUE, LOW, HIGH)			\
+  (((unsigned HOST_WIDE_INT)((VALUE) - (LOW)))		\
+   <= ((unsigned HOST_WIDE_INT)((HIGH) - (LOW))))
+
+/* Some range macros.  */
+#define INT16_P(X)     ((X) >= - 0x8000 && (X) <= 0x7fff)
+#define CMP_INT16_P(X) ((X) >= - 0x7fff && (X) <= 0x8000)
+#define UINT16_P(X)   (((unsigned HOST_WIDE_INT) (X)) <= 0x0000ffff)
+#define UINT24_P(X)   (((unsigned HOST_WIDE_INT) (X)) <= 0x00ffffff)
+
+/* Stack layout and stack pointer usage.  */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Offset from frame pointer to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+/* The frame pointer points at the same place as the stack pointer, except if
+   alloca has been called.  */
+#define STARTING_FRAME_OFFSET \
+  M32R_STACK_ALIGN (crtl->outgoing_args_size)
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 15
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 13
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 16
+
+/* Register in which static-chain is passed to a function.
+   This must not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM  7
+
+/* These aren't official macros.  */
+#define PROLOGUE_TMP_REGNUM  4
+#define RETURN_ADDR_REGNUM  14
+/* #define GP_REGNUM        12 */
+#define CARRY_REGNUM        17
+#define ACCUM_REGNUM        18
+#define M32R_MAX_INT_REGS   16
+
+#ifndef SUBTARGET_GPR_P
+#define SUBTARGET_GPR_P(REGNO) 0
+#endif
+
+#ifndef SUBTARGET_ACCUM_P
+#define SUBTARGET_ACCUM_P(REGNO) 0
+#endif
+
+#ifndef SUBTARGET_CARRY_P
+#define SUBTARGET_CARRY_P(REGNO) 0
+#endif
+
+#define GPR_P(REGNO)   (IN_RANGE_P ((REGNO), 0, 15) || SUBTARGET_GPR_P (REGNO))
+#define ACCUM_P(REGNO) ((REGNO) == ACCUM_REGNUM || SUBTARGET_ACCUM_P (REGNO))
+#define CARRY_P(REGNO) ((REGNO) == CARRY_REGNUM || SUBTARGET_CARRY_P (REGNO))
+
+/* Eliminating the frame and arg pointers.  */
+
+#if 0
+/* C statement to store the difference between the frame pointer
+   and the stack pointer values immediately after the function prologue.
+   If `ELIMINABLE_REGS' is defined, this macro will be not be used and
+   need not be defined.  */
+#define INITIAL_FRAME_POINTER_OFFSET(VAR) \
+((VAR) = m32r_compute_frame_size (get_frame_size ()))
+#endif
+
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  If
+   it is not defined, the only elimination attempted by the compiler
+   is to replace references to the frame pointer with references to
+   the stack pointer.
+
+   Note that the elimination of the argument pointer with the stack
+   pointer is specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS					\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM }}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)				\
+  do										\
+    {										\
+      int size = m32r_compute_frame_size (get_frame_size ());			\
+										\
+      if ((FROM) == FRAME_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
+	(OFFSET) = 0;								\
+      else if ((FROM) == ARG_POINTER_REGNUM && (TO) == FRAME_POINTER_REGNUM)	\
+	(OFFSET) = size - crtl->args.pretend_args_size;			\
+      else if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
+	(OFFSET) = size - crtl->args.pretend_args_size;			\
+      else									\
+	gcc_unreachable ();								\
+    }										\
+  while (0)
+
+/* Function argument passing.  */
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  ((CUM) = 0)
+
+/* The number of registers used for parameter passing.  Local to this file.  */
+#define M32R_MAX_PARM_REGS 4
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) \
+  ((unsigned) (N) < M32R_MAX_PARM_REGS)
+
+
+/* Function results.  */
+
+/* Tell GCC to use TARGET_RETURN_IN_MEMORY.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Function entry and exit.  */
+
+/* Initialize data used by insn expanders.  This is called from
+   init_emit, once for each function, before code is generated.  */
+#define INIT_EXPANDERS m32r_init_expanders ()
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#undef  FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)			\
+  do								\
+    {								\
+      if (flag_pic)						\
+	{							\
+	  fprintf (FILE, "\tld24 r14,#mcount\n");		\
+	  fprintf (FILE, "\tadd r14,r12\n");			\
+	  fprintf (FILE, "\tld r14,@r14\n");			\
+	  fprintf (FILE, "\tjl r14\n");				\
+	}							\
+      else							\
+	{							\
+	  if (TARGET_ADDR24)					\
+	    fprintf (FILE, "\tbl mcount\n");			\
+	  else							\
+	    {							\
+	      fprintf (FILE, "\tseth r14,#high(mcount)\n");	\
+	      fprintf (FILE, "\tor3 r14,r14,#low(mcount)\n");	\
+	      fprintf (FILE, "\tjl r14\n");			\
+	    }							\
+	}							\
+      fprintf (FILE, "\taddi sp,#4\n");				\
+    }								\
+  while (0)
+
+/* Trampolines.  */
+
+/* On the M32R, the trampoline is:
+
+        mv      r7, lr   -> bl L1        ; 178e 7e01
+L1:     add3    r6, lr, #L2-L1           ; 86ae 000c (L2 - L1 = 12)
+        mv      lr, r7   -> ld r7,@r6+   ; 1e87 27e6
+        ld      r6, @r6  -> jmp r6       ; 26c6 1fc6
+L2:     .word STATIC
+        .word FUNCTION  */
+
+#ifndef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_flush_cache"
+#endif
+#ifndef CACHE_FLUSH_TRAP
+#define CACHE_FLUSH_TRAP 12
+#endif
+
+/* Length in bytes of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE 24
+
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) m32r_return_addr (COUNT)
+
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* We have post-inc load and pre-dec,pre-inc store,
+   but only for 4 byte vals.  */
+#define HAVE_PRE_DECREMENT  1
+#define HAVE_PRE_INCREMENT  1
+#define HAVE_POST_INCREMENT 1
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X)   \
+  (    GET_CODE (X) == LABEL_REF  \
+   ||  GET_CODE (X) == SYMBOL_REF \
+   ||  CONST_INT_P (X)  \
+   || (GET_CODE (X) == CONST      \
+       && ! (flag_pic && ! m32r_legitimate_pic_operand_p (X))))
+
+/* Condition code usage.  */
+
+/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/
+
+/* Costs.  */
+
+/* The cost of a branch insn.  */
+/* A value of 2 here causes GCC to avoid using branches in comparisons like
+   while (a < N && a).  Branches aren't that expensive on the M32R so
+   we define this as 1.  Defining it as 2 had a heavy hit in fp-bit.c.  */
+#define BRANCH_COST(speed_p, predictable_p) ((TARGET_BRANCH_COST) ? 2 : 1)
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE
+
+/* Section selection.  */
+
+#define TEXT_SECTION_ASM_OP	"\t.section .text"
+#define DATA_SECTION_ASM_OP	"\t.section .data"
+#define BSS_SECTION_ASM_OP	"\t.section .bss"
+
+/* Define this macro if jump tables (for tablejump insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.
+   This macro is irrelevant if there is no separate readonly data section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+/* Position Independent Code.  */
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  In some cases this register is defined by a
+   processor's ``application binary interface'' (ABI).  When this macro
+   is defined, RTL is generated for this register once, as with the stack
+   pointer and frame pointer registers.  If this macro is not defined, it
+   is up to the machine-dependent files to allocate such a register (if
+   necessary).  */
+#define PIC_OFFSET_TABLE_REGNUM 12
+
+/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is
+   clobbered by calls.  Do not define this macro if PIC_OFFSET_TABLE_REGNUM
+   is not defined.  */
+/* This register is call-saved on the M32R.  */
+/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent code.
+   You can assume that X satisfies CONSTANT_P, so you need not
+   check this.  You can also assume `flag_pic' is true, so you need not
+   check it either.  You need not define this macro if all constants
+   (including SYMBOL_REF) can be immediate operands when generating
+   position independent code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X) m32r_legitimate_pic_operand_p (X)
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will
+   end at the end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF ""
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* We do not use DBX_LINES_FUNCTION_RELATIVE or
+   dbxout_stab_value_internal_label_diff here because
+   we need to use .debugsym for the line label.  */
+
+#define DBX_OUTPUT_SOURCE_LINE(file, line, counter)			\
+  do									\
+    {									\
+      const char * begin_label =					\
+	XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);		\
+      char label[64];							\
+      ASM_GENERATE_INTERNAL_LABEL (label, "LM", counter);		\
+									\
+      dbxout_begin_stabn_sline (line);					\
+      assemble_name (file, label);					\
+      putc ('-', file);							\
+      assemble_name (file, begin_label);				\
+      fputs ("\n\t.debugsym ", file);					\
+      assemble_name (file, label);					\
+      putc ('\n', file);						\
+      counter += 1;							\
+     }									\
+  while (0)
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#ifndef SUBTARGET_REGISTER_NAMES
+#define SUBTARGET_REGISTER_NAMES
+#endif
+
+#define REGISTER_NAMES					\
+{							\
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",	\
+  "r8", "r9", "r10", "r11", "r12", "fp", "lr", "sp",	\
+  "ap", "cbit", "a0"					\
+  SUBTARGET_REGISTER_NAMES				\
+}
+
+/* If defined, a C initializer for an array of structures containing
+   a name and a register number.  This macro defines additional names
+   for hard registers, thus allowing the `asm' option in declarations
+   to refer to registers using alternate names.  */
+#ifndef SUBTARGET_ADDITIONAL_REGISTER_NAMES
+#define SUBTARGET_ADDITIONAL_REGISTER_NAMES
+#endif
+
+#define ADDITIONAL_REGISTER_NAMES	\
+{					\
+  /*{ "gp", GP_REGNUM },*/		\
+  { "r13", FRAME_POINTER_REGNUM },	\
+  { "r14", RETURN_ADDR_REGNUM },	\
+  { "r15", STACK_POINTER_REGNUM },	\
+  SUBTARGET_ADDITIONAL_REGISTER_NAMES	\
+}
+
+/* If defined, C string expressions to be used for the `%R', `%L',
+   `%U', and `%I' options of `asm_fprintf' (see `final.c').  These
+   are useful when a single `md' file must support multiple assembler
+   formats.  In that case, the various `tm.h' files can define these
+   macros differently.  */
+#define REGISTER_PREFIX		""
+#define LOCAL_LABEL_PREFIX	".L"
+#define USER_LABEL_PREFIX	""
+#define IMMEDIATE_PREFIX	"#"
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)		\
+   do							\
+     {							\
+       char label[30];					\
+       ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+       fprintf (FILE, "\t.word\t");			\
+       assemble_name (FILE, label);			\
+       fprintf (FILE, "\n");				\
+     }							\
+  while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)\
+  do							\
+    {							\
+      char label[30];					\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+      fprintf (FILE, "\t.word\t");			\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "-");				\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);	\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "\n");				\
+    }							\
+  while (0)
+
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+/* On the M32R, align loops to 32 byte boundaries (cache line size)
+   if -malign-loops.  */
+#define LOOP_ALIGN(LABEL) (TARGET_ALIGN_LOOPS ? 5 : 0)
+
+/* Define this to be the maximum number of insns to move around when moving
+   a loop test from the top of a loop to the bottom
+   and seeing whether to duplicate it.  The default is thirty.
+
+   Loop unrolling currently doesn't like this optimization, so
+   disable doing if we are unrolling loops and saving space.  */
+#define LOOP_TEST_THRESHOLD (optimize_size				\
+			     && !flag_unroll_loops			\
+			     && !flag_unroll_all_loops ? 2 : 30)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+/* .balign is used to avoid confusion.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)			\
+  do							\
+    {							\
+      if ((LOG) != 0)					\
+	fprintf (FILE, "\t.balign %d\n", 1 << (LOG));	\
+    }							\
+  while (0)
+
+/* Like `ASM_OUTPUT_COMMON' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used in
+   place of `ASM_OUTPUT_COMMON', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.  */
+
+#define SCOMMON_ASM_OP "\t.scomm\t"
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (! TARGET_SDATA_NONE						\
+	  && (SIZE) > 0							\
+	  && (SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+	fprintf ((FILE), "%s", SCOMMON_ASM_OP);				\
+      else								\
+	fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (ALIGN) / BITS_PER_UNIT);\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (! TARGET_SDATA_NONE						\
+          && (SIZE) > 0							\
+	  && (SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+        switch_to_section (get_named_section (NULL, ".sbss", 0));	\
+      else								\
+        switch_to_section (bss_section);				\
+      ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+      last_assemble_variable_decl = DECL;				\
+      ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);			\
+      ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1);				\
+    }									\
+  while (0)
+
+/* Debugging information.  */
+
+/* Generate DBX and DWARF debugging information.  */
+#define DBX_DEBUGGING_INFO    1
+#define DWARF2_DEBUGGING_INFO 1
+
+/* Use DWARF2 debugging info by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Turn off splitting of long stabs.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Miscellaneous.  */
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (flag_pic ? SImode : Pmode)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Max number of bytes we can move from memory
+   to memory in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+/* ??? The M32R doesn't have full 32-bit pointers, but making this PSImode has
+   its own problems (you have to add extendpsisi2 and truncsipsi2).
+   Try to avoid it.  */
+#define Pmode SImode
+
+/* A function address in a call instruction.  */
+#define FUNCTION_MODE SImode
+
+/* M32R function types.  */
+enum m32r_function_type
+{
+  M32R_FUNCTION_UNKNOWN, M32R_FUNCTION_NORMAL, M32R_FUNCTION_INTERRUPT
+};
+
+#define M32R_INTERRUPT_P(TYPE) ((TYPE) == M32R_FUNCTION_INTERRUPT)
+
+/* The maximum number of bytes to copy using pairs of load/store instructions.
+   If a block is larger than this then a loop will be generated to copy
+   MAX_MOVE_BYTES chunks at a time.  The value of 32 is a semi-arbitrary choice.
+   A customer uses Dhrystome as their benchmark, and Dhrystone has a 31 byte
+   string copy in it.  */
+#define MAX_MOVE_BYTES 32
diff --git a/gcc-4.9/gcc/config/m32r/m32r.md b/gcc-4.9/gcc/config/m32r/m32r.md
new file mode 100644
index 000000000..47efb910d
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/m32r.md
@@ -0,0 +1,2276 @@
+;; Machine description of the Renesas M32R cpu for GNU C compiler
+;; Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; UNSPEC_VOLATILE usage
+(define_constants
+  [(UNSPECV_BLOCKAGE		0)
+   (UNSPECV_FLUSH_ICACHE	1)])
+
+;; UNSPEC usage
+(define_constants
+  [(UNSPEC_LOAD_SDA_BASE	2)
+   (UNSPEC_SET_CBIT		3)
+   (UNSPEC_PIC_LOAD_ADDR	4)
+   (UNSPEC_GET_PC		5)
+   (UNSPEC_GOTOFF		6)
+   ])
+
+;; Insn type.  Used to default other attribute values.
+(define_attr "type"
+  "int2,int4,load2,load4,load8,store2,store4,store8,shift2,shift4,mul2,div4,uncond_branch,branch,call,multi,misc"
+  (const_string "misc"))
+
+;; Length in bytes.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "int2,load2,store2,shift2,mul2")
+	 (const_int 2)
+
+	 (eq_attr "type" "int4,load4,store4,shift4,div4")
+	 (const_int 4)
+
+	 (eq_attr "type" "multi")
+	 (const_int 8)
+
+	 (eq_attr "type" "uncond_branch,branch,call")
+	 (const_int 4)]
+
+	 (const_int 4)))
+
+;; The length here is the length of a single asm.  Unfortunately it might be
+;; 2 or 4 so we must allow for 4.  That's ok though.
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")])
+
+;; Whether an instruction is short (16-bit) or long (32-bit).
+(define_attr "insn_size" "short,long"
+  (if_then_else (eq_attr "type" "int2,load2,store2,shift2,mul2")
+		(const_string "short")
+		(const_string "long")))
+
+;; The target CPU we're compiling for.
+(define_attr "cpu" "m32r,m32r2,m32rx"
+  (cond [(match_test "TARGET_M32RX")
+	     (const_string "m32rx")
+	 (match_test "TARGET_M32R2")
+	     (const_string "m32r2")]
+    (const_string "m32r")))
+
+;; Defines the pipeline where an instruction can be executed on.
+;; For the M32R, a short instruction can execute one of the two pipes.
+;; For the M32Rx, the restrictions are modelled in the second
+;;  condition of this attribute definition.
+(define_attr "m32r_pipeline" "either,s,o,long"
+  (cond [(and (eq_attr "cpu" "m32r")
+	      (eq_attr "insn_size" "short"))
+	     (const_string "either")
+         (eq_attr "insn_size" "!short")
+	     (const_string "long")]
+	 (cond [(eq_attr "type" "int2")
+		   (const_string "either")
+	        (eq_attr "type" "load2,store2,shift2,uncond_branch,branch,call")
+		   (const_string "o")
+	        (eq_attr "type" "mul2")
+		   (const_string "s")]
+	 (const_string "long"))))
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Pipeline description
+;; ::
+;; ::::::::::::::::::::
+
+;; This model is based on Chapter 2, Appendix 3 and Appendix 4 of the
+;; "M32R-FPU Software Manual", Revision 1.01, plus additional information
+;; obtained by our best friend and mine, Google.
+;;
+;; The pipeline is modelled as a fetch unit, and a core with a memory unit,
+;; two execution units, where "fetch" models IF and D, "memory" for MEM1
+;; and MEM2, and "EXEC" for E, E1, E2, EM, and EA.  Writeback and
+;; bypasses are not modelled.
+(define_automaton "m32r")
+
+;; We pretend there are two short (16 bits) instruction fetchers.  The
+;; "s" short fetcher cannot be reserved until the "o" short fetcher is
+;; reserved.  Some instructions reserve both the left and right fetchers.
+;; These fetch units are a hack to get GCC to better pack the instructions
+;; for the M32Rx processor, which has two execution pipes.
+;;
+;; In reality there is only one decoder, which can decode either two 16-bit
+;; instructions, or a single 32-bit instruction.
+;;
+;; Note, "fetch" models both the IF and the D pipeline stages.
+;;
+;; The m32rx core has two execution pipes.  We name them o_E and s_E.
+;; In addition, there's a memory unit.
+
+(define_cpu_unit "o_IF,s_IF,o_E,s_E,memory" "m32r")
+
+;; Prevent the s pipe from being reserved before the o pipe.
+(absence_set "s_IF" "o_IF")
+(absence_set "s_E"  "o_E")
+
+;; On the M32Rx, long instructions execute on both pipes, so reserve
+;; both fetch slots and both pipes.
+(define_reservation "long_IF" "o_IF+s_IF")
+(define_reservation "long_E" "o_E+s_E")
+
+;; ::::::::::::::::::::
+
+;; Simple instructions do 4 stages: IF D E WB.  WB is not modelled.
+;; Hence, ready latency is 1.
+(define_insn_reservation "short_left" 1
+  (and (eq_attr "m32r_pipeline" "o")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "!load2")))
+  "o_IF,o_E")
+
+(define_insn_reservation "short_right" 1
+  (and (eq_attr "m32r_pipeline" "s")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "!load2")))
+  "s_IF,s_E")
+
+(define_insn_reservation "short_either" 1
+  (and (eq_attr "m32r_pipeline" "either")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "!load2")))
+  "o_IF|s_IF,o_E|s_E")
+
+(define_insn_reservation "long_m32r" 1
+  (and (eq_attr "cpu" "m32r")
+       (and (eq_attr "insn_size" "long")
+	    (eq_attr "type" "!load4,load8")))
+  "long_IF,long_E")
+
+(define_insn_reservation "long_m32rx" 2
+  (and (eq_attr "m32r_pipeline" "long")
+       (and (eq_attr "insn_size" "long")
+	    (eq_attr "type" "!load4,load8")))
+  "long_IF,long_E")
+
+;; Load/store instructions do 6 stages: IF D E MEM1 MEM2 WB.
+;; MEM1 may require more than one cycle depending on locality.  We
+;; optimistically assume all memory is nearby, i.e. MEM1 takes only
+;; one cycle.  Hence, ready latency is 3.
+
+;; The M32Rx can do short load/store only on the left pipe.
+(define_insn_reservation "short_load_left" 3
+  (and (eq_attr "m32r_pipeline" "o")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "load2")))
+  "o_IF,o_E,memory*2")
+
+(define_insn_reservation "short_load" 3
+  (and (eq_attr "m32r_pipeline" "either")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "load2")))
+  "s_IF|o_IF,s_E|o_E,memory*2")
+
+(define_insn_reservation "long_load" 3
+  (and (eq_attr "cpu" "m32r")
+       (and (eq_attr "insn_size" "long")
+	    (eq_attr "type" "load4,load8")))
+  "long_IF,long_E,memory*2")
+
+(define_insn_reservation "long_load_m32rx" 3
+  (and (eq_attr "m32r_pipeline" "long")
+       (eq_attr "type" "load4,load8"))
+  "long_IF,long_E,memory*2")
+
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Expand prologue as RTL
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  m32r_expand_prologue ();
+  DONE;
+}")
+
+;; Expand epilogue as RTL
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  m32r_expand_epilogue ();
+  emit_jump_insn (gen_return_normal ());
+  DONE;
+}")
+
+;; Move instructions.
+;;
+;; For QI and HI moves, the register must contain the full properly
+;; sign-extended value.  nonzero_bits assumes this [otherwise
+;; SHORT_IMMEDIATES_SIGN_EXTEND must be used, but the comment for it
+;; says it's a kludge and the .md files should be fixed instead].
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], QImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.
+     Objects in the small data area are handled too.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (QImode, operands[1]);
+}")
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "move_dest_operand" "=r,r,r,r,r,T,m")
+	(match_operand:QI 1 "move_src_operand" "r,I,JQR,T,m,r,r"))]
+  "register_operand (operands[0], QImode) || register_operand (operands[1], QImode)"
+  "@
+   mv %0,%1
+   ldi %0,%#%1
+   ldi %0,%#%1
+   ldub %0,%1
+   ldub %0,%1
+   stb %1,%0
+   stb %1,%0"
+  [(set_attr "type" "int2,int2,int4,load2,load4,store2,store4")
+   (set_attr "length" "2,2,4,2,4,2,4")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], HImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (HImode, operands[1]);
+}")
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "move_dest_operand" "=r,r,r,r,r,r,T,m")
+	(match_operand:HI 1 "move_src_operand" "r,I,JQR,K,T,m,r,r"))]
+  "register_operand (operands[0], HImode) || register_operand (operands[1], HImode)"
+  "@
+   mv %0,%1
+   ldi %0,%#%1
+   ldi %0,%#%1
+   ld24 %0,%#%1
+   lduh %0,%1
+   lduh %0,%1
+   sth %1,%0
+   sth %1,%0"
+  [(set_attr "type" "int2,int2,int4,int4,load2,load4,store2,store4")
+   (set_attr "length" "2,2,4,4,2,4,2,4")])
+
+(define_expand "movsi_push"
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "")))
+	(match_operand:SI 1 "register_operand" ""))]
+  ""
+  "")
+
+(define_expand "movsi_pop"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (post_inc:SI (match_operand:SI 1 "register_operand" ""))))]
+  ""
+  "")
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], SImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* Small Data Area reference?  */
+  if (small_data_operand (operands[1], SImode))
+    {
+      emit_insn (gen_movsi_sda (operands[0], operands[1]));
+      DONE;
+    }
+
+  /* If medium or large code model, symbols have to be loaded with
+     seth/add3.  */
+  if (addr32_operand (operands[1], SImode))
+    {
+      emit_insn (gen_movsi_addr32 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+;; ??? Do we need a const_double constraint here for large unsigned values?
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "move_dest_operand" "=r,r,r,r,r,r,r,r,r,T,S,m")
+	(match_operand:SI 1 "move_src_operand" "r,I,J,MQ,L,n,T,U,m,r,r,r"))]
+  "register_operand (operands[0], SImode) || register_operand (operands[1], SImode)"
+  "*
+{
+  if (REG_P (operands[0]) || GET_CODE (operands[1]) == SUBREG)
+    {
+      switch (GET_CODE (operands[1]))
+	{
+	  default:
+	    break;
+
+	  case REG:
+	  case SUBREG:
+	    return \"mv %0,%1\";
+
+	  case MEM:
+	    if (GET_CODE (XEXP (operands[1], 0)) == POST_INC
+		&& XEXP (XEXP (operands[1], 0), 0) == stack_pointer_rtx)
+	      return \"pop %0\";
+
+	    return \"ld %0,%1\";
+
+	  case CONST_INT:
+	    if (satisfies_constraint_J (operands[1]))
+	      return \"ldi %0,%#%1\\t; %X1\";
+
+	    if (satisfies_constraint_M (operands[1]))
+	      return \"ld24 %0,%#%1\\t; %X1\";
+
+	    if (satisfies_constraint_L (operands[1]))
+	      return \"seth %0,%#%T1\\t; %X1\";
+
+	    return \"#\";
+
+	  case CONST:
+	  case SYMBOL_REF:
+	  case LABEL_REF:
+	    if (TARGET_ADDR24)
+	      return \"ld24 %0,%#%1\";
+
+	    return \"#\";
+	}
+    }
+
+  else if (MEM_P (operands[0])
+	   && (REG_P (operands[1]) || GET_CODE (operands[1]) == SUBREG))
+    {
+      if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	  && XEXP (XEXP (operands[0], 0), 0) == stack_pointer_rtx)
+	return \"push %1\";
+
+      return \"st %1,%0\";
+    }
+
+  gcc_unreachable ();
+}"
+  [(set_attr "type" "int2,int2,int4,int4,int4,multi,load2,load2,load4,store2,store2,store4")
+   (set_attr "length" "2,2,4,4,4,8,2,2,4,2,2,4")])
+
+; Try to use a four byte / two byte pair for constants not loadable with
+; ldi, ld24, seth.
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+       (match_operand:SI 1 "two_insn_const_operand" ""))]
+  ""
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 3)))]
+  "
+{
+  unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
+  unsigned HOST_WIDE_INT tmp;
+  int shift;
+
+  /* In all cases we will emit two instructions.  However we try to
+     use 2 byte instructions wherever possible.  We can assume the
+     constant isn't loadable with any of ldi, ld24, or seth.  */
+
+  /* See if we can load a 24-bit unsigned value and invert it.  */
+  if (UINT24_P (~ val))
+    {
+      emit_insn (gen_movsi (operands[0], GEN_INT (~ val)));
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+      DONE;
+    }
+
+  /* See if we can load a 24-bit unsigned value and shift it into place.
+     0x01fffffe is just beyond ld24's range.  */
+  for (shift = 1, tmp = 0x01fffffe;
+       shift < 8;
+       ++shift, tmp <<= 1)
+    {
+      if ((val & ~tmp) == 0)
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (val >> shift)));
+	  emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (shift)));
+	  DONE;
+	}
+    }
+
+  /* Can't use any two byte insn, fall back to seth/or3.  Use ~0xffff instead
+     of 0xffff0000, since the later fails on a 64-bit host.  */
+  operands[2] = GEN_INT ((val) & ~0xffff);
+  operands[3] = GEN_INT ((val) & 0xffff);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "seth_add3_operand" ""))]
+  "TARGET_ADDR32"
+  [(set (match_dup 0)
+	(high:SI (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0)
+		   (match_dup 1)))]
+  "")
+
+;; Small data area support.
+;; The address of _SDA_BASE_ is loaded into a register and all objects in
+;; the small data area are indexed off that.  This is done for each reference
+;; but cse will clean things up for us.  We let the compiler choose the
+;; register to use so we needn't allocate (and maybe even fix) a special
+;; register to use.  Since the load and store insns have a 16-bit offset the
+;; total size of the data area can be 64K.  However, if the data area lives
+;; above 16M (24 bits), _SDA_BASE_ will have to be loaded with seth/add3 which
+;; would then yield 3 instructions to reference an object [though there would
+;; be no net loss if two or more objects were referenced].  The 3 insns can be
+;; reduced back to 2 if the size of the small data area were reduced to 32K
+;; [then seth + ld/st would work for any object in the area].  Doing this
+;; would require special handling of _SDA_BASE_ (its value would be
+;; (.sdata + 32K) & 0xffff0000) and reloc computations would be different
+;; [I think].  What to do about this is deferred until later and for now we
+;; require .sdata to be in the first 16M.
+
+(define_expand "movsi_sda"
+  [(set (match_dup 2)
+	(unspec:SI [(const_int 0)] UNSPEC_LOAD_SDA_BASE))
+   (set (match_operand:SI 0 "register_operand" "")
+	(lo_sum:SI (match_dup 2)
+		   (match_operand:SI 1 "small_data_operand" "")))]
+  ""
+  "
+{
+  if (reload_in_progress || reload_completed)
+    operands[2] = operands[0];
+  else
+    operands[2] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "*load_sda_base_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_LOAD_SDA_BASE))]
+  "TARGET_ADDR32"
+  "seth %0,%#shigh(_SDA_BASE_)\;add3 %0,%0,%#low(_SDA_BASE_)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "*load_sda_base"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_LOAD_SDA_BASE))]
+  ""
+  "ld24 %0,#_SDA_BASE_"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+;; 32-bit address support.
+
+(define_expand "movsi_addr32"
+  [(set (match_dup 2)
+	; addr32_operand isn't used because it's too restrictive,
+	; seth_add3_operand is more general and thus safer.
+	(high:SI (match_operand:SI 1 "seth_add3_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(lo_sum:SI (match_dup 2) (match_dup 1)))]
+  ""
+  "
+{
+  if (reload_in_progress || reload_completed)
+    operands[2] = operands[0];
+  else
+    operands[2] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "set_hi_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand 1 "symbolic_operand" "")))]
+  ""
+  "seth %0,%#shigh(%1)"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+(define_insn "lo_sum_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "in")))]
+  ""
+  "add3 %0,%1,%#%B2"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], DImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (DImode, operands[1]);
+}")
+
+(define_insn "*movdi_insn"
+  [(set (match_operand:DI 0 "move_dest_operand" "=r,r,r,r,m")
+	(match_operand:DI 1 "move_double_src_operand" "r,nG,F,m,r"))]
+  "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)"
+  "#"
+  [(set_attr "type" "multi,multi,multi,load8,store8")
+   (set_attr "length" "4,4,16,6,6")])
+
+(define_split
+  [(set (match_operand:DI 0 "move_dest_operand" "")
+	(match_operand:DI 1 "move_double_src_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = gen_split_move_double (operands);")
+
+;; Floating point move insns.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], SFmode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (SFmode, operands[1]);
+}")
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,r,r,T,S,m")
+	(match_operand:SF 1 "move_src_operand" "r,F,U,S,m,r,r,r"))]
+  "register_operand (operands[0], SFmode) || register_operand (operands[1], SFmode)"
+  "@
+   mv %0,%1
+   #
+   ld %0,%1
+   ld %0,%1
+   ld %0,%1
+   st %1,%0
+   st %1,%0
+   st %1,%0"
+  ;; ??? Length of alternative 1 is either 2, 4 or 8.
+  [(set_attr "type" "int2,multi,load2,load2,load4,store2,store2,store4")
+   (set_attr "length" "2,8,2,2,4,2,2,4")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+  operands[3] = operand_subword (operands[1], 0, 0, SFmode);
+}")
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], DFmode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (DFmode, operands[1]);
+}")
+
+(define_insn "*movdf_insn"
+  [(set (match_operand:DF 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:DF 1 "move_double_src_operand" "r,F,m,r"))]
+  "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
+  "#"
+  [(set_attr "type" "multi,multi,load8,store8")
+   (set_attr "length" "4,16,6,6")])
+
+(define_split
+  [(set (match_operand:DF 0 "move_dest_operand" "")
+	(match_operand:DF 1 "move_double_src_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = gen_split_move_double (operands);")
+
+;; Zero extension instructions.
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(zero_extend:HI (match_operand:QI 1 "extend_operand" "r,T,m")))]
+  ""
+  "@
+   and3 %0,%1,%#255
+   ldub %0,%1
+   ldub %0,%1"
+  [(set_attr "type" "int4,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:QI 1 "extend_operand" "r,T,m")))]
+  ""
+  "@
+   and3 %0,%1,%#255
+   ldub %0,%1
+   ldub %0,%1"
+  [(set_attr "type" "int4,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:HI 1 "extend_operand" "r,T,m")))]
+  ""
+  "@
+   and3 %0,%1,%#65535
+   lduh %0,%1
+   lduh %0,%1"
+  [(set_attr "type" "int4,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+;; Signed conversions from a smaller integer to a larger integer
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(sign_extend:HI (match_operand:QI 1 "extend_operand" "0,T,m")))]
+  ""
+  "@
+    #
+    ldb %0,%1
+    ldb %0,%1"
+  [(set_attr "type" "multi,load2,load4")
+   (set_attr "length" "2,2,4")])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (op0, op0, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:QI 1 "extend_operand" "0,T,m")))]
+  ""
+  "@
+    #
+    ldb %0,%1
+    ldb %0,%1"
+  [(set_attr "type" "multi,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (operands[0], operands[0], shift);
+  operands[3] = gen_ashrsi3 (operands[0], operands[0], shift);
+}")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:HI 1 "extend_operand" "0,T,m")))]
+  ""
+  "@
+    #
+    ldh %0,%1
+    ldh %0,%1"
+  [(set_attr "type" "multi,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx shift = GEN_INT (16);
+
+  operands[2] = gen_ashlsi3 (operands[0], operands[0], shift);
+  operands[3] = gen_ashrsi3 (operands[0], operands[0], shift);
+}")
+
+;; Arithmetic instructions.
+
+; ??? Adding an alternative to split add3 of small constants into two
+; insns yields better instruction packing but slower code.  Adds of small
+; values is done a lot.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		 (match_operand:SI 2 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+   add %0,%2
+   addi %0,%#%2
+   add3 %0,%1,%#%2"
+  [(set_attr "type" "int2,int2,int4")
+   (set_attr "length" "2,2,4")])
+
+;(define_split
+;  [(set (match_operand:SI 0 "register_operand" "")
+;	(plus:SI (match_operand:SI 1 "register_operand" "")
+;		 (match_operand:SI 2 "int8_operand" "")))]
+;  "reload_completed
+;   && REGNO (operands[0]) != REGNO (operands[1])
+;   && satisfies_constraint_I (operands[2])
+;   && INTVAL (operands[2]) != 0"
+;  [(set (match_dup 0) (match_dup 1))
+;   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+;  "")
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0")
+		 (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+;; ??? The cmp clears the condition bit.  Can we speed up somehow?
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "reload_completed"
+  [(parallel [(set (reg:CC 17)
+		   (const_int 0))
+	      (use (match_dup 4))])
+   (parallel [(set (match_dup 4)
+		   (plus:SI (match_dup 4)
+			    (plus:SI (match_dup 5)
+				     (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])
+   (parallel [(set (match_dup 6)
+		   (plus:SI (match_dup 6)
+			    (plus:SI (match_dup 7)
+				     (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])]
+  "
+{
+  operands[4] = operand_subword (operands[0], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[5] = operand_subword (operands[2], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[6] = operand_subword (operands[0], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+  operands[7] = operand_subword (operands[2], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+}")
+
+(define_insn "*clear_c"
+  [(set (reg:CC 17)
+	(const_int 0))
+   (use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "cmp %0,%0"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "*add_carry"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0")
+		 (plus:SI (match_operand:SI 2 "register_operand" "r")
+			  (ne:SI (reg:CC 17) (const_int 0)))))
+   (set (reg:CC 17)
+	(unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))]
+  ""
+  "addx %0,%2"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub %0,%2"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0")
+		  (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+;; ??? The cmp clears the condition bit.  Can we speed up somehow?
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "register_operand" "")
+		  (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "reload_completed"
+  [(parallel [(set (reg:CC 17)
+		   (const_int 0))
+	      (use (match_dup 4))])
+   (parallel [(set (match_dup 4)
+		   (minus:SI (match_dup 4)
+			     (minus:SI (match_dup 5)
+				       (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])
+   (parallel [(set (match_dup 6)
+		   (minus:SI (match_dup 6)
+			     (minus:SI (match_dup 7)
+				       (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])]
+  "
+{
+  operands[4] = operand_subword (operands[0], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[5] = operand_subword (operands[2], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[6] = operand_subword (operands[0], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+  operands[7] = operand_subword (operands[2], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+}")
+
+(define_insn "*sub_carry"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "%0")
+		  (minus:SI (match_operand:SI 2 "register_operand" "r")
+			    (ne:SI (reg:CC 17) (const_int 0)))))
+   (set (reg:CC 17)
+	(unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))]
+  ""
+  "subx %0,%2"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+; Multiply/Divide instructions.
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "mullo %1,%2\;mvfacmi %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "mul %0,%2"
+  [(set_attr "type" "mul2")
+   (set_attr "length" "2")])
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "div %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "divu %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mod:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "rem %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(umod:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "remu %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+;; Boolean instructions.
+;;
+;; We don't define the DImode versions as expand_binop does a good enough job.
+;; And if it doesn't it should be fixed.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "reg_or_uint16_operand" "r,K")))]
+  ""
+  "*
+{
+  /* If we are worried about space, see if we can break this up into two
+     short instructions, which might eliminate a NOP being inserted.  */
+  if (optimize_size
+      && m32r_not_same_reg (operands[0], operands[1])
+      && satisfies_constraint_I (operands[2]))
+    return \"#\";
+
+  else if (CONST_INT_P (operands[2]))
+    return \"and3 %0,%1,%#%X2\";
+
+  return \"and %0,%2\";
+}"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "int8_operand" "")))]
+  "optimize_size && m32r_not_same_reg (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "reg_or_uint16_operand" "r,K")))]
+  ""
+  "*
+{
+  /* If we are worried about space, see if we can break this up into two
+     short instructions, which might eliminate a NOP being inserted.  */
+  if (optimize_size
+      && m32r_not_same_reg (operands[0], operands[1])
+      && satisfies_constraint_I (operands[2]))
+    return \"#\";
+
+  else if (CONST_INT_P (operands[2]))
+    return \"or3 %0,%1,%#%X2\";
+
+  return \"or %0,%2\";
+}"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "int8_operand" "")))]
+  "optimize_size && m32r_not_same_reg (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "reg_or_uint16_operand" "r,K")))]
+  ""
+  "*
+{
+  /* If we are worried about space, see if we can break this up into two
+     short instructions, which might eliminate a NOP being inserted.  */
+  if (optimize_size
+      && m32r_not_same_reg (operands[0], operands[1])
+      && satisfies_constraint_I (operands[2]))
+    return \"#\";
+
+  else if (CONST_INT_P (operands[2]))
+    return \"xor3 %0,%1,%#%X2\";
+
+  return \"xor %0,%2\";
+}"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "int8_operand" "")))]
+  "optimize_size && m32r_not_same_reg (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg %0,%1"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "not %0,%1"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,0,r")
+		   (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
+  ""
+  "@
+   sll %0,%2
+   slli %0,%#%2
+   sll3 %0,%1,%#%2"
+  [(set_attr "type" "shift2,shift2,shift4")
+   (set_attr "length" "2,2,4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r")
+		     (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
+  ""
+  "@
+   sra %0,%2
+   srai %0,%#%2
+   sra3 %0,%1,%#%2"
+  [(set_attr "type" "shift2,shift2,shift4")
+   (set_attr "length" "2,2,4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r")
+		     (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
+  ""
+  "@
+   srl %0,%2
+   srli %0,%#%2
+   srl3 %0,%1,%#%2"
+  [(set_attr "type" "shift2,shift2,shift4")
+   (set_attr "length" "2,2,4")])
+
+;; Compare instructions.
+;; This controls RTL generation and register allocation.
+
+;; We generate RTL for comparisons and branches by having the cmpxx
+;; patterns store away the operands.  Then the bcc patterns
+;; emit RTL for both the compare and the branch.
+;;
+;; On the m32r it is more efficient to use the bxxz instructions and
+;; thus merge the compare and branch into one instruction, so they are
+;; preferred.
+
+(define_insn "cmp_eqsi_zero_insn"
+  [(set (reg:CC 17)
+        (eq:CC (match_operand:SI 0 "register_operand" "r,r")
+               (match_operand:SI 1 "reg_or_zero_operand" "r,P")))]
+  "TARGET_M32RX || TARGET_M32R2"
+  "@
+   cmpeq %0, %1
+   cmpz  %0"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+;; The cmp_xxx_insn patterns set the condition bit to the result of the
+;; comparison.  There isn't a "compare equal" instruction so cmp_eqsi_insn
+;; is quite inefficient.  However, it is rarely used.
+
+(define_insn "cmp_eqsi_insn"
+  [(set (reg:CC 17)
+        (eq:CC (match_operand:SI 0 "register_operand" "r,r")
+               (match_operand:SI 1 "reg_or_cmp_int16_operand" "r,P")))
+   (clobber (match_scratch:SI 2 "=&r,&r"))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    {
+         return \"mv %2,%0\;sub %2,%1\;cmpui %2,#1\";
+    }
+  else
+    {
+        if (INTVAL (operands [1]) == 0)
+          return \"cmpui %0, #1\";
+        else if (REGNO (operands [2]) == REGNO (operands [0]))
+          return \"addi %0,%#%N1\;cmpui %2,#1\";
+        else
+          return \"add3 %2,%0,%#%N1\;cmpui %2,#1\";
+    }
+}"
+  [(set_attr "type" "multi,multi")
+   (set_attr "length" "8,8")])
+
+(define_insn "cmp_ltsi_insn"
+  [(set (reg:CC 17)
+        (lt:CC (match_operand:SI 0 "register_operand" "r,r")
+               (match_operand:SI 1 "reg_or_int16_operand" "r,J")))]
+  ""
+  "@
+   cmp %0,%1
+   cmpi %0,%#%1"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_insn "cmp_ltusi_insn"
+  [(set (reg:CC 17)
+        (ltu:CC (match_operand:SI 0 "register_operand" "r,r")
+                (match_operand:SI 1 "reg_or_int16_operand" "r,J")))]
+  ""
+  "@
+   cmpu %0,%1
+   cmpui %0,%#%1"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+;; These control RTL generation for conditional jump insns.
+
+(define_expand "cbranchsi4"
+  ; the comparison is emitted by gen_compare if needed.
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "reg_or_cmp_int16_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[0] = gen_compare (GET_CODE (operands[0]), operands[1], operands[2], FALSE);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = XEXP (operands[0], 1);
+}")
+
+;; Now match both normal and inverted jump.
+
+(define_insn "*branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(reg 17) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  static char instruction[40];
+  sprintf (instruction, \"%s%s %%l0\",
+	   (GET_CODE (operands[1]) == NE) ? \"bc\" : \"bnc\",
+	   (get_attr_length (insn) == 2) ? \".s\" : \"\");
+  return instruction;
+}"
+  [(set_attr "type" "branch")
+   ; cf PR gcc/28508
+   ; We use 300/600 instead of 512,1024 to account for inaccurate insn
+   ; lengths and insn alignments that are complex to track.
+   ; It's not important that we be hyper-precise here.  It may be more
+   ; important blah blah blah when the chip supports parallel execution
+   ; blah blah blah but until then blah blah blah this is simple and
+   ; suffices.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 300))
+					   (const_int 600))
+				      (const_int 2)
+				      (const_int 4)))])
+
+(define_insn "*rev_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(reg 17) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ;"REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))"
+  ""
+  "*
+{
+  static char instruction[40];
+  sprintf (instruction, \"%s%s %%l0\",
+	   (GET_CODE (operands[1]) == EQ) ? \"bc\" : \"bnc\",
+	   (get_attr_length (insn) == 2) ? \".s\" : \"\");
+  return instruction;
+}"
+  [(set_attr "type" "branch")
+   ; cf PR gcc/28508
+   ; We use 300/600 instead of 512,1024 to account for inaccurate insn
+   ; lengths and insn alignments that are complex to track.
+   ; It's not important that we be hyper-precise here.  It may be more
+   ; important blah blah blah when the chip supports parallel execution
+   ; blah blah blah but until then blah blah blah this is simple and
+   ; suffices.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 300))
+					   (const_int 600))
+				      (const_int 2)
+				      (const_int 4)))])
+
+; reg/reg compare and branch insns
+
+(define_insn "*reg_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (match_operand:SI 3 "register_operand" "r")])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  /* Is branch target reachable with beq/bne?  */
+  if (get_attr_length (insn) == 4)
+    {
+      if (GET_CODE (operands[1]) == EQ)
+	return \"beq %2,%3,%l0\";
+      else
+	return \"bne %2,%3,%l0\";
+    }
+  else
+    {
+      if (GET_CODE (operands[1]) == EQ)
+	return \"bne %2,%3,1f\;bra %l0\;1:\";
+      else
+	return \"beq %2,%3,1f\;bra %l0\;1:\";
+    }
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+(define_insn "*rev_reg_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (match_operand:SI 3 "register_operand" "r")])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  /* Is branch target reachable with beq/bne?  */
+  if (get_attr_length (insn) == 4)
+    {
+      if (GET_CODE (operands[1]) == NE)
+	return \"beq %2,%3,%l0\";
+      else
+	return \"bne %2,%3,%l0\";
+    }
+  else
+    {
+      if (GET_CODE (operands[1]) == NE)
+	return \"bne %2,%3,1f\;bra %l0\;1:\";
+      else
+	return \"beq %2,%3,1f\;bra %l0\;1:\";
+    }
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+; reg/zero compare and branch insns
+
+(define_insn "*zero_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  const char *br,*invbr;
+  char asmtext[40];
+
+  switch (GET_CODE (operands[1]))
+    {
+      case EQ : br = \"eq\"; invbr = \"ne\"; break;
+      case NE : br = \"ne\"; invbr = \"eq\"; break;
+      case LE : br = \"le\"; invbr = \"gt\"; break;
+      case GT : br = \"gt\"; invbr = \"le\"; break;
+      case LT : br = \"lt\"; invbr = \"ge\"; break;
+      case GE : br = \"ge\"; invbr = \"lt\"; break;
+
+      default: gcc_unreachable ();
+    }
+
+  /* Is branch target reachable with bxxz?  */
+  if (get_attr_length (insn) == 4)
+    {
+      sprintf (asmtext, \"b%sz %%2,%%l0\", br);
+      output_asm_insn (asmtext, operands);
+    }
+  else
+    {
+      sprintf (asmtext, \"b%sz %%2,1f\;bra %%l0\;1:\", invbr);
+      output_asm_insn (asmtext, operands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+(define_insn "*rev_zero_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  const char *br,*invbr;
+  char asmtext[40];
+
+  switch (GET_CODE (operands[1]))
+    {
+      case EQ : br = \"eq\"; invbr = \"ne\"; break;
+      case NE : br = \"ne\"; invbr = \"eq\"; break;
+      case LE : br = \"le\"; invbr = \"gt\"; break;
+      case GT : br = \"gt\"; invbr = \"le\"; break;
+      case LT : br = \"lt\"; invbr = \"ge\"; break;
+      case GE : br = \"ge\"; invbr = \"lt\"; break;
+
+      default: gcc_unreachable ();
+    }
+
+  /* Is branch target reachable with bxxz?  */
+  if (get_attr_length (insn) == 4)
+    {
+      sprintf (asmtext, \"b%sz %%2,%%l0\", invbr);
+      output_asm_insn (asmtext, operands);
+    }
+  else
+    {
+      sprintf (asmtext, \"b%sz %%2,1f\;bra %%l0\;1:\", br);
+      output_asm_insn (asmtext, operands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+;; S<cc> operations to set a register to 1/0 based on a comparison
+
+(define_expand "cstoresi4"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operator:SI 1 "ordered_comparison_operator"
+    [(match_operand:SI 2 "register_operand" "")
+     (match_operand:SI 3 "reg_or_cmp_int16_operand" "")])]
+  ""
+  "
+{
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+
+  if (!gen_cond_store (GET_CODE (operands[1]),
+		       operands[0], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "seq_insn_m32rx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "register_operand" "%r")
+	       (match_operand:SI 2 "reg_or_zero_operand" "rP")))
+   (clobber (reg:CC 17))]
+  "TARGET_M32RX || TARGET_M32R2"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_zero_operand" "")))
+   (clobber (reg:CC 17))]
+  "TARGET_M32RX || TARGET_M32R2"
+  [(set (reg:CC 17)
+	(eq:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "seq_zero_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC 17))]
+  "TARGET_M32R"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC 17))]
+  "TARGET_M32R"
+  [(match_dup 3)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  start_sequence ();
+  emit_insn (gen_cmp_ltusi_insn (op1, const1_rtx));
+  emit_insn (gen_movcc_insn (op0));
+  operands[3] = get_insns ();
+  end_sequence ();
+}")
+
+(define_insn "seq_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,??r,r")
+	(eq:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+	       (match_operand:SI 2 "reg_or_eq_int16_operand" "r,r,r,PK")))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 3 "=1,2,&r,r"))]
+  "TARGET_M32R"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8,8,10,10")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_eq_int16_operand" "")))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_M32R && reload_completed"
+  [(match_dup 4)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+  HOST_WIDE_INT value;
+
+  if (REG_P (op2) && REG_P (op3)
+      && REGNO (op2) == REGNO (op3))
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+
+  start_sequence ();
+  if (REG_P (op1) && REG_P (op3)
+      && REGNO (op1) != REGNO (op3))
+    {
+      emit_move_insn (op3, op1);
+      op1 = op3;
+    }
+
+  if (satisfies_constraint_P (op2) && (value = INTVAL (op2)) != 0)
+    emit_insn (gen_addsi3 (op3, op1, GEN_INT (-value)));
+  else
+    emit_insn (gen_xorsi3 (op3, op1, op2));
+
+  emit_insn (gen_cmp_ltusi_insn (op3, const1_rtx));
+  emit_insn (gen_movcc_insn (op0));
+  operands[4] = get_insns ();
+  end_sequence ();
+}")
+
+(define_insn "sne_zero_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ne:SI (match_operand:SI 1 "register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(const_int 0))
+   (set (reg:CC 17)
+	(ltu:CC (match_dup 2)
+		(match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "slt_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(lt:SI (match_operand:SI 1 "register_operand" "r,r")
+	       (match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4,6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lt:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "sle_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(le:SI (match_operand:SI 1 "register_operand" "r")
+	       (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(le:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 2)
+	       (match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(le:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 2)
+	       (match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "sge_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ge:SI (match_operand:SI 1 "register_operand" "r,r")
+	       (match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8,10")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ge:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ge:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "sltu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ltu:SI (match_operand:SI 1 "register_operand" "r,r")
+		(match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6,8")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ltu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "sleu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(leu:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(leu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 2)
+		(match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(leu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 2)
+		(match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "sgeu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(geu:SI (match_operand:SI 1 "register_operand" "r,r")
+		(match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8,10")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(geu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(geu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "movcc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  ""
+  "mvfc %0, cbr"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2")])
+
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bra %l0"
+  [(set_attr "type" "uncond_branch")
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 400))
+					   (const_int 800))
+				      (const_int 2)
+				      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  ""
+  "jmp %a0"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_insn "return_lr"
+  [(parallel [(return) (use (reg:SI 14))])]
+  ""
+  "jmp lr"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_insn "return_rte"
+  [(return)]
+  ""
+  "rte"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_expand "return"
+  [(return)]
+  "direct_return ()"
+  "
+{
+  emit_jump_insn (gen_return_lr ());
+  DONE;
+}")
+
+(define_expand "return_normal"
+  [(return)]
+  "!direct_return ()"
+  "
+{
+  enum m32r_function_type fn_type;
+
+  fn_type = m32r_compute_function_type (current_function_decl);
+  if (M32R_INTERRUPT_P (fn_type))
+    {
+      emit_jump_insn (gen_return_rte ());
+      DONE;
+    }
+
+  emit_jump_insn (gen_return_lr ());
+  DONE;
+}")
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
+              (use (label_ref (match_operand 1 "" "")))])]
+  ""
+  "
+{
+  /* In pic mode, our address differences are against the base of the
+     table.  Add that base value back in; CSE ought to be able to combine
+     the two address loads.  */
+  if (flag_pic)
+    {
+      rtx tmp, tmp2;
+
+      tmp = gen_rtx_LABEL_REF (Pmode, operands[1]);
+      tmp2 = operands[0];
+      tmp = gen_rtx_PLUS (Pmode, tmp2, tmp);
+      operands[0] = memory_address (Pmode, tmp);
+    }
+}")
+
+(define_insn "*tablejump_insn"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp %a0"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_expand "call"
+  ;; operands[1] is stack_size_rtx
+  ;; operands[2] is next_arg_register
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	     (clobber (reg:SI 14))])]
+  ""
+  "
+{
+  if (flag_pic)
+    crtl->uses_pic_offset_table = 1;
+}")
+
+(define_insn "*call_via_reg"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 14))]
+  ""
+  "jl %0"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*call_via_label"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" ""))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 14))]
+  ""
+  "*
+{
+  int call26_p = call26_operand (operands[0], FUNCTION_MODE);
+
+  if (! call26_p)
+    {
+      /* We may not be able to reach with a `bl' insn so punt and leave it to
+	 the linker.
+	 We do this here, rather than doing a force_reg in the define_expand
+	 so these insns won't be separated, say by scheduling, thus simplifying
+	 the linker.  */
+      return \"seth r14,%T0\;add3 r14,r14,%B0\;jl r14\";
+    }
+  else
+    return \"bl %0\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (not (match_test "call26_operand (operands[0], FUNCTION_MODE)"))
+		      (const_int 12) ; 10 + 2 for nop filler
+		      ; The return address must be on a 4 byte boundary so
+		      ; there's no point in using a value of 2 here.  A 2 byte
+		      ; insn may go in the left slot but we currently can't
+		      ; use such knowledge.
+		      (const_int 4)))])
+
+(define_expand "call_value"
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  [(parallel [(set (match_operand 0 "register_operand" "=r")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	     (clobber (reg:SI 14))])]
+  ""
+  "
+{
+  if (flag_pic)
+    crtl->uses_pic_offset_table = 1;
+}")
+
+(define_insn "*call_value_via_reg"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 14))]
+  ""
+  "jl %1"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*call_value_via_label"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "call_address_operand" ""))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 14))]
+  ""
+  "*
+{
+  int call26_p = call26_operand (operands[1], FUNCTION_MODE);
+
+  if (flag_pic)
+    crtl->uses_pic_offset_table = 1;
+
+  if (! call26_p)
+    {
+      /* We may not be able to reach with a `bl' insn so punt and leave it to
+	 the linker.
+	 We do this here, rather than doing a force_reg in the define_expand
+	 so these insns won't be separated, say by scheduling, thus simplifying
+	 the linker.  */
+      return \"seth r14,%T1\;add3 r14,r14,%B1\;jl r14\";
+    }
+  else
+    return \"bl %1\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (not (match_test "call26_operand (operands[1], FUNCTION_MODE)"))
+		      (const_int 12) ; 10 + 2 for nop filler
+		      ; The return address must be on a 4 byte boundary so
+		      ; there's no point in using a value of 2 here.  A 2 byte
+		      ; insn may go in the left slot but we currently can't
+		      ; use such knowledge.
+		      (const_int 4)))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  "")
+
+;; Special pattern to flush the icache.
+
+(define_insn "flush_icache"
+  [(unspec_volatile [(match_operand 0 "memory_operand" "m")]
+		    UNSPECV_FLUSH_ICACHE)
+   (match_operand 1 "" "")
+   (clobber (reg:SI 17))]
+  ""
+  "* return \"trap %#%1 ; flush-icache\";"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+;; Speed up fabs and provide correct sign handling for -0
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(abs:DF (match_operand:DF 1 "register_operand" "0")))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(abs:DF (match_operand:DF 1 "register_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(ashift:SI (match_dup 2)
+		   (const_int 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 1)))]
+  "operands[2] = gen_highpart (SImode, operands[0]);")
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(abs:SF (match_operand:SF 1 "register_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(ashift:SI (match_dup 2)
+		   (const_int 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 1)))]
+  "operands[2] = gen_highpart (SImode, operands[0]);")
+
+;; Conditional move instructions
+;; Based on those done for the d10v
+
+(define_expand "movsicc"
+  [
+   (set (match_operand:SI 0 "register_operand" "r")
+	(if_then_else:SI (match_operand 1 "" "")
+			 (match_operand:SI 2 "conditional_move_operand" "O")
+			 (match_operand:SI 3 "conditional_move_operand" "O")
+        )
+   )
+  ]
+  ""
+  "
+{
+  if (! zero_and_one (operands [2], operands [3]))
+    FAIL;
+
+  /* Generate the comparison that will set the carry flag.  */
+  operands[1] = gen_compare (GET_CODE (operands[1]), XEXP (operands[1], 0),
+			     XEXP (operands[1], 1), TRUE);
+
+  /* See other movsicc pattern below for reason why.  */
+  emit_insn (gen_blockage ());
+}")
+
+;; Generate the conditional instructions based on how the carry flag is examined.
+(define_insn "*movsicc_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI (match_operand 1 "carry_compare_operand" "")
+			 (match_operand:SI 2 "conditional_move_operand" "O")
+			 (match_operand:SI 3 "conditional_move_operand" "O")
+        )
+   )]
+  "zero_and_one (operands [2], operands[3])"
+  "* return emit_cond_move (operands, insn);"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")
+  ]
+)
+
+
+;; Block moves, see m32r.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand" "")
+		   (match_operand:BLK 1 "general_operand" ""))
+	      (use (match_operand:SI  2 "immediate_operand" ""))
+	      (use (match_operand:SI  3 "immediate_operand" ""))])]
+  ""
+  "
+{
+  if (operands[0])		/* Avoid unused code messages.  */
+    {
+     if (m32r_expand_block_move (operands))
+       DONE;
+     else
+       FAIL;
+    }
+}")
+
+;; Insn generated by block moves
+
+(define_insn "movmemsi_internal"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r"))	;; destination
+	(mem:BLK (match_operand:SI 1 "register_operand" "r")))	;; source
+   (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move
+   (set (match_operand:SI 3 "register_operand" "=0")
+	(plus:SI (minus (match_dup 2) (const_int 4))
+	         (match_dup 0)))
+   (set (match_operand:SI 4 "register_operand" "=1")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))
+   (clobber (match_scratch:SI 5 "=&r"))  ;; temp1
+   (clobber (match_scratch:SI 6 "=&r"))] ;; temp2
+  ""
+  "* m32r_output_block_move (insn, operands); return \"\"; "
+  [(set_attr "type"	"store8")
+   (set_attr "length"	"72")]) ;; Maximum
+
+;; PIC
+
+/* When generating pic, we need to load the symbol offset into a register.
+   So that the optimizer does not confuse this with a normal symbol load
+   we use an unspec.  The offset will be loaded from a constant pool entry,
+   since that is the only type of relocation we can use.  */
+
+(define_insn "pic_load_addr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_PIC_LOAD_ADDR))]
+  "flag_pic"
+  "ld24 %0,%#%1"
+  [(set_attr "type" "int4")])
+
+(define_insn "gotoff_load_addr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_GOTOFF))]
+  "flag_pic"
+  "seth %0, %#shigh(%1@GOTOFF)\;add3 %0, %0, low(%1@GOTOFF)"
+  [(set_attr "type" 	"int4")
+   (set_attr "length"	"8")])
+
+;; Load program counter insns.
+
+(define_insn "get_pc"
+  [(clobber (reg:SI 14))
+   (set (match_operand 0 "register_operand" "=r,r")
+        (unspec [(match_operand 1 "" "")] UNSPEC_GET_PC))
+   (use (match_operand:SI 2 "immediate_operand" "W,i"))]
+  "flag_pic"
+  "@
+   bl.s .+4\;seth %0,%#shigh(%1)\;add3 %0,%0,%#low(%1+4)\;add %0,lr
+   bl.s .+4\;ld24 %0,%#%1\;add %0,lr"
+  [(set_attr "length" "12,8")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+  "
+{
+  m32r_load_pic_register ();
+  DONE;
+}")
diff --git a/gcc-4.9/gcc/config/m32r/m32r.opt b/gcc-4.9/gcc/config/m32r/m32r.opt
new file mode 100644
index 000000000..64afd93b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/m32r.opt
@@ -0,0 +1,117 @@
+; Options for the Renesas M32R port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/m32r/m32r-opts.h
+
+; Selected code model.
+Variable
+enum m32r_model m32r_model_selected = M32R_MODEL_DEFAULT
+
+; Selected SDA support.
+Variable
+enum m32r_sdata m32r_sdata_selected = M32R_SDATA_DEFAULT
+
+m32rx
+Target Report RejectNegative Mask(M32RX)
+Compile for the m32rx
+
+m32r2
+Target Report RejectNegative Mask(M32R2)
+Compile for the m32r2
+
+m32r
+Target RejectNegative
+Compile for the m32r
+
+malign-loops
+Target Report Mask(ALIGN_LOOPS)
+Align all loops to 32 byte boundary
+
+mbranch-cost=1
+Target Report RejectNegative Mask(BRANCH_COST)
+Prefer branches over conditional execution
+
+mbranch-cost=2
+Target Report RejectNegative InverseMask(BRANCH_COST)
+Give branches their default cost
+
+mdebug
+Target Mask(DEBUG)
+Display compile time statistics
+
+mflush-func=
+Target RejectNegative Joined Var(m32r_cache_flush_func) Init(CACHE_FLUSH_FUNC)
+Specify cache flush function
+
+mflush-trap=
+Target RejectNegative Joined UInteger Var(m32r_cache_flush_trap) Init(CACHE_FLUSH_TRAP)
+Specify cache flush trap number
+
+missue-rate=1
+Target Report RejectNegative Mask(LOW_ISSUE_RATE)
+Only issue one instruction per cycle
+
+missue-rate=2
+Target Report RejectNegative InverseMask(LOW_ISSUE_RATE)
+Allow two instructions to be issued per cycle
+
+mmodel=
+Target RejectNegative Joined Enum(m32r_model) Var(m32r_model_selected)
+Code size: small, medium or large
+
+Enum
+Name(m32r_model) Type(enum m32r_model)
+
+EnumValue
+Enum(m32r_model) String(small) Value(M32R_MODEL_SMALL)
+
+EnumValue
+Enum(m32r_model) String(medium) Value(M32R_MODEL_MEDIUM)
+
+EnumValue
+Enum(m32r_model) String(large) Value(M32R_MODEL_LARGE)
+
+mno-flush-func
+Target RejectNegative
+Don't call any cache flush functions
+
+mno-flush-trap
+Target RejectNegative Var(m32r_cache_flush_trap, -1)
+Don't call any cache flush trap
+
+; mrelax
+; Target Mask(RELAX)
+
+msdata=
+Target RejectNegative Joined Enum(m32r_sdata) Var(m32r_sdata_selected)
+Small data area: none, sdata, use
+
+Enum
+Name(m32r_sdata) Type(enum m32r_sdata)
+
+EnumValue
+Enum(m32r_sdata) String(none) Value(M32R_SDATA_NONE)
+
+EnumValue
+Enum(m32r_sdata) String(sdata) Value(M32R_SDATA_SDATA)
+
+EnumValue
+Enum(m32r_sdata) String(use) Value(M32R_SDATA_USE)
diff --git a/gcc-4.9/gcc/config/m32r/predicates.md b/gcc-4.9/gcc/config/m32r/predicates.md
new file mode 100644
index 000000000..dbe11e8db
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/predicates.md
@@ -0,0 +1,440 @@
+;; Predicate definitions for Renesas M32R.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is a register or the constant 0.
+
+(define_predicate "reg_or_zero_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+
+  if (!CONST_INT_P (op))
+    return 0;
+
+  return INTVAL (op) == 0;
+})
+
+;; Return nonzero if the operand is suitable for use in a conditional
+;; move sequence.
+
+(define_predicate "conditional_move_operand"
+  (match_code "reg,subreg,const_int")
+{
+  /* Only defined for simple integers so far...  */
+  if (mode != SImode && mode != HImode && mode != QImode)
+    return FALSE;
+
+  /* At the moment we can handle moving registers and loading constants.  */
+  /* To be added: Addition/subtraction/bitops/multiplication of registers.  */
+
+  switch (GET_CODE (op))
+    {
+    case REG:
+      return 1;
+
+    case CONST_INT:
+      return satisfies_constraint_I (op);
+
+    default:
+#if 0
+      fprintf (stderr, "Test for cond move op of type: %s\n",
+	       GET_RTX_NAME (GET_CODE (op)));
+#endif
+      return 0;
+    }
+})
+
+;; Return true if the code is a test of the carry bit.
+
+(define_predicate "carry_compare_operand"
+  (match_code "eq,ne")
+{
+  rtx x;
+
+  if (GET_MODE (op) != CCmode && GET_MODE (op) != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != NE && GET_CODE (op) != EQ)
+    return FALSE;
+
+  x = XEXP (op, 0);
+  if (!REG_P (x) || REGNO (x) != CARRY_REGNUM)
+    return FALSE;
+
+  x = XEXP (op, 1);
+  if (!CONST_INT_P (x) || INTVAL (x) != 0)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; Return 1 if OP is an EQ or NE comparison operator.
+
+(define_predicate "eqne_comparison_operator"
+  (match_code "eq,ne")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == EQ || code == NE);
+})
+
+;; Return 1 if OP is a signed comparison operator.
+
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,lt,le,gt,ge")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (COMPARISON_P (op)
+  	  && (code == EQ || code == NE
+	      || code == LT || code == LE || code == GT || code == GE));
+})
+
+;; Return true if OP is an acceptable argument for a move destination.
+
+(define_predicate "move_dest_operand"
+  (match_code "reg,subreg,mem")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (MEM_P (SUBREG_REG (op)))
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      if (GET_CODE (XEXP (op, 0)) == POST_INC)
+	return 0;		/* stores can't do post inc */
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is an acceptable argument for a single word move
+;; source.
+
+(define_predicate "move_src_operand"
+  (match_code "reg,subreg,mem,const_int,const_double,label_ref,const,symbol_ref")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF :
+    case SYMBOL_REF :
+    case CONST :
+      return addr24_operand (op, mode);
+    case CONST_INT :
+      /* ??? We allow more cse opportunities if we only allow constants
+	 loadable with one insn, and split the rest into two.  The instances
+	 where this would help should be rare and the current way is
+	 simpler.  */
+      if (HOST_BITS_PER_WIDE_INT > 32)
+	{
+	  HOST_WIDE_INT rest = INTVAL (op) >> 31;
+	  return (rest == 0 || rest == -1);
+	}
+      else
+	return 1;
+    case CONST_DOUBLE :
+      if (mode == SFmode)
+	return 1;
+      else if (mode == SImode)
+	{
+	  /* Large unsigned constants are represented as const_double's.  */
+	  unsigned HOST_WIDE_INT low, high;
+
+	  low = CONST_DOUBLE_LOW (op);
+	  high = CONST_DOUBLE_HIGH (op);
+	  return high == 0 && low <= (unsigned) 0xffffffff;
+	}
+      else
+	return 0;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (MEM_P (SUBREG_REG (op)))
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      if (GET_CODE (XEXP (op, 0)) == PRE_INC
+	  || GET_CODE (XEXP (op, 0)) == PRE_DEC)
+	return 0;		/* loads can't do pre-{inc,dec} */
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is an acceptable argument for a double word move
+;; source.
+
+(define_predicate "move_double_src_operand"
+  (match_code "reg,subreg,mem,const_int,const_double")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST_INT :
+    case CONST_DOUBLE :
+      return 1;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (MEM_P (SUBREG_REG (op)))
+	return move_double_src_operand (SUBREG_REG (op), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      /* Disallow auto inc/dec for now.  */
+      if (GET_CODE (XEXP (op, 0)) == PRE_DEC
+	  || GET_CODE (XEXP (op, 0)) == PRE_INC)
+	return 0;
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is a const_int requiring two instructions to
+;; load.
+
+(define_predicate "two_insn_const_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  if (satisfies_constraint_J (op)
+      || satisfies_constraint_M (op)
+      || satisfies_constraint_L (op))
+    return 0;
+  return 1;
+})
+
+;; Returns 1 if OP is a symbol reference.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST :
+      return 1;
+
+    default:
+      return 0;
+    }
+})
+
+;; Return true if OP is a signed 8-bit immediate value.
+
+(define_predicate "int8_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_I (op);
+})
+
+;; Return true if OP is an unsigned 16-bit immediate value.
+
+(define_predicate "uint16_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_K (op);
+})
+
+;; Return true if OP is a register or signed 16-bit value.
+
+(define_predicate "reg_or_int16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_J (op);
+})
+
+;; Return true if OP is a register or an unsigned 16-bit value.
+
+(define_predicate "reg_or_uint16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_K (op);
+})
+
+;; Return true if OP is a register or signed 16-bit value for
+;; compares.
+
+(define_predicate "reg_or_cmp_int16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_P (op);
+})
+
+;; Return true if OP is a register or an integer value that can be
+;; used is SEQ/SNE.  We can use either XOR of the value or ADD of the
+;; negative of the value for the constant.  Don't allow 0, because
+;; that is special cased.
+
+(define_predicate "reg_or_eq_int16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  HOST_WIDE_INT value;
+
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+
+  if (!CONST_INT_P (op))
+    return 0;
+
+  value = INTVAL (op);
+  return (value != 0) && (UINT16_P (value) || CMP_INT16_P (-value));
+})
+
+;; Return true if OP is a signed 16-bit immediate value useful in
+;; comparisons.
+
+(define_predicate "cmp_int16_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_P (op);
+})
+
+;; Acceptable arguments to the call insn.
+
+(define_predicate "call_address_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  return symbolic_operand (op, mode);
+
+/* Constants and values in registers are not OK, because
+   the m32r BL instruction can only support PC relative branching.  */
+})
+
+;; Return true if OP is an acceptable input argument for a zero/sign
+;; extend operation.
+
+(define_predicate "extend_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx addr;
+
+  switch (GET_CODE (op))
+    {
+    case REG :
+    case SUBREG :
+      return register_operand (op, mode);
+
+    case MEM :
+      addr = XEXP (op, 0);
+      if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	return 0;		/* loads can't do pre inc/pre dec */
+
+      return address_operand (addr, mode);
+
+    default :
+      return 0;
+    }
+})
+
+;; Return nonzero if the operand is an insn that is a small
+;; insn. Allow const_int 0 as well, which is a placeholder for NOP
+;; slots.
+
+(define_predicate "small_insn_p"
+  (match_code "insn,call_insn,jump_insn")
+{
+  if (CONST_INT_P (op) && INTVAL (op) == 0)
+    return 1;
+
+  if (! INSN_P (op))
+    return 0;
+
+  return get_attr_length (op) == 2;
+})
+
+;; Return true if op is an integer constant, less than or equal to
+;; MAX_MOVE_BYTES.
+
+(define_predicate "m32r_block_immediate_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op)
+      || INTVAL (op) > MAX_MOVE_BYTES
+      || INTVAL (op) <= 0)
+    return 0;
+
+  return 1;
+})
+
+;; Return nonzero if the operand is an insn that is a large insn.
+
+(define_predicate "large_insn_p"
+  (match_code "insn,call_insn,jump_insn")
+{
+  if (! INSN_P (op))
+    return 0;
+
+  return get_attr_length (op) != 2;
+})
+
+;; Returns 1 if OP is an acceptable operand for seth/add3.
+
+(define_predicate "seth_add3_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  if (flag_pic)
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF
+      || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+      && satisfies_constraint_J (XEXP (XEXP (op, 0), 1)))
+    return 1;
+
+  return 0;
+})
diff --git a/gcc-4.9/gcc/config/m32r/rtems.h b/gcc-4.9/gcc/config/m32r/rtems.h
new file mode 100644
index 000000000..0fc47aad5
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/rtems.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a M32R using ELF.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc-4.9/gcc/config/m32r/t-linux b/gcc-4.9/gcc/config/m32r/t-linux
new file mode 100644
index 000000000..3e1519997
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/t-linux
@@ -0,0 +1,20 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Don't install "assert.h" in gcc. We use the one in glibc.
+INSTALL_ASSERT_H =
diff --git a/gcc-4.9/gcc/config/m32r/t-m32r b/gcc-4.9/gcc/config/m32r/t-m32r
new file mode 100644
index 000000000..dc016a8d3
--- /dev/null
+++ b/gcc-4.9/gcc/config/m32r/t-m32r
@@ -0,0 +1,31 @@
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# -mmodel={small,medium} requires separate libraries.
+# We don't build libraries for the large model, instead we use the medium
+# libraries.  The only difference is that the large model can handle jumps
+# more than 26 signed bits away.
+
+MULTILIB_OPTIONS = mmodel=small/mmodel=medium m32r/m32rx/m32r2 
+MULTILIB_DIRNAMES = small medium m32r m32rx m32r2
+MULTILIB_MATCHES = mmodel?medium=mmodel?large
+
+# Set MULTILIB_EXTRA_OPTS so shipped libraries have small data in .sdata and
+# SHN_M32R_SCOMMON.
+# This is important for objects referenced in system header files.
+MULTILIB_EXTRA_OPTS = msdata=sdata
diff --git a/gcc-4.9/gcc/config/m68k/cf.md b/gcc-4.9/gcc/config/m68k/cf.md
new file mode 100644
index 000000000..7c4e3513f
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/cf.md
@@ -0,0 +1,2250 @@
+;; ColdFire V1, V2, V3 and V4/V4e DFA description.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery Inc., www.codesourcery.com
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Instruction Buffer
+(define_automaton "cfv123_ib")
+
+;; These pseudo units are used to model instruction buffer of ColdFire cores.
+;; Instruction of size N can be issued only when cf_ib_wN is available.
+(define_cpu_unit "cf_ib_w1, cf_ib_w2, cf_ib_w3" "cfv123_ib")
+
+;; Instruction occupies 1 word in the instruction buffer.
+(define_reservation "cf_ib1" "cf_ib_w1")
+;; Instruction occupies 2 words in the instruction buffer.
+(define_reservation "cf_ib2" "cf_ib_w1+cf_ib_w2")
+;; Instruction occupies 3 words in the instruction buffer.
+(define_reservation "cf_ib3" "cf_ib_w1+cf_ib_w2+cf_ib_w3")
+
+;; This reservation is used at the start of each cycle to setup the maximal
+;; length of instruction that can be issued on current cycle.
+;; E.g., when this reservation is applied for the first time, cf_ib_w3
+;; resource is marked busy, thus filtering out all 3-word insns.
+;;
+;; This reservation requires deterministic automaton.
+;;
+;; At each cycle, given that memory bus is available (i.e., there is no
+;; pending memory operation), instruction fetch pipeline (IFP) prefetches
+;; two instruction words into instruction buffer (IB).
+(define_insn_reservation "cf_ib1" 0
+  (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+       (eq_attr "type" "ib"))
+  "cf_ib_w3|cf_ib_w2|cf_ib_w1")
+
+;; Operand Execution Pipeline
+(define_automaton "cfv123_oep")
+
+(define_cpu_unit "cf_dsoc,cf_agex" "cfv123_oep")
+
+;; A memory unit that is referred to as 'certain hardware resources' in
+;; ColdFire reference manuals.  This unit remains occupied for two cycles
+;; after last dsoc cycle of a store - hence there is a 2 cycle delay between
+;; two consecutive stores.
+(define_automaton "cfv123_chr")
+
+(define_cpu_unit "cf_chr" "cfv123_chr")
+
+;; Memory bus
+(define_automaton "cfv123_mem")
+
+;; When memory bus is subscribed, that implies that instruction buffer won't
+;; get its portion this cycle.  To model that we query if cf_mem unit is
+;; subscribed and adjust number of prefetched instruction words accordingly.
+;; 
+(define_query_cpu_unit "cf_mem1, cf_mem2" "cfv123_mem")
+
+(define_reservation "cf_mem" "cf_mem1+cf_mem2")
+
+(define_automaton "cf_mac")
+
+(define_cpu_unit "cf_mac1,cf_mac2,cf_mac3,cf_mac4"
+  "cf_mac")
+
+(define_automaton "cfv123_guess")
+
+(define_query_cpu_unit "cfv123_guess" "cfv123_guess")
+
+;; Register to register move.
+;; Takes 1 cycle.
+(define_reservation "cfv123_alu_00"
+  "cf_dsoc,cf_agex")
+
+;; Load from a memory location.
+;; Takes 3 cycles.
+(define_reservation "cfv12_alu_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 2 cycles.
+(define_reservation "cfv12_omove_10"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 4 cycles.
+(define_reservation "cfv3_alu_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+;; Takes 3 cycles.
+(define_reservation "cfv3_omove_10"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+
+;; Load from an indexed location.
+;; Takes 4 cycles.
+(define_reservation "cfv12_alu_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 3 cycles.
+(define_reservation "cfv12_omove_i0"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 5 cycles.
+(define_reservation "cfv3_alu_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+;; Takes 4 cycles.
+(define_reservation "cfv3_omove_i0"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+
+;; Store to a memory location.
+;; Takes 1 cycle.
+(define_reservation "cfv12_alu_01"
+  "cf_dsoc+cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 1 cycle.
+(define_reservation "cfv3_alu_01"
+  "cf_dsoc+cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Store to an indexed location.
+;; Takes 2 cycles.
+(define_reservation "cfv12_alu_0i"
+  "cf_dsoc+cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 2 cycles.
+(define_reservation "cfv3_alu_0i"
+  "cf_dsoc+cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Load from a memory location and store to a memory location.
+;; Takes 3 cycles
+(define_reservation "cfv12_alu_11"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 2 cycles.
+(define_reservation "cfv12_omove_11"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 4 cycles
+(define_reservation "cfv3_alu_11"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv3_omove_11"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Load from an indexed location and store to a memory location.
+;; Takes 4 cycles.
+(define_reservation "cfv12_alu_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv12_omove_i1"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 5 cycles.
+(define_reservation "cfv3_alu_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+;; Takes 4 cycles.
+(define_reservation "cfv3_omove_i1"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Load from a memory location and store to an indexed location.
+;; Takes 4 cycles.
+(define_reservation "cfv12_alu_1i"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv12_omove_1i"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 5 cycles.
+(define_reservation "cfv3_alu_1i"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+;; Takes 4 cycles.
+(define_reservation "cfv3_omove_1i"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Lea operation for a memory location.
+;; Takes 1 cycle.
+(define_reservation "cfv123_lea_10"
+  "cf_dsoc,cf_agex")
+
+;; Lea operation for an indexed location.
+;; Takes 2 cycles.
+(define_reservation "cfv123_lea_i0"
+  "cf_dsoc,cf_agex,cf_agex")
+
+;; Pea operation for a memory location.
+;; Takes 2 cycles.
+(define_reservation "cfv12_pea_11"
+  "cf_dsoc,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 2 cycles.
+(define_reservation "cfv3_pea_11"
+  "cf_dsoc,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Pea operation for an indexed location.
+;; Takes 3 cycles.
+(define_reservation "cfv12_pea_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv3_pea_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Long multiplication with no mac.
+;; Takes 9-18 cycles.
+(define_reservation "cfv123_mul_l_00"
+  "cf_dsoc,(cf_agex+cf_dsoc)*17,cf_agex")
+
+;; Word multiplication with no mac.
+;; Takes 9 cycles.
+(define_reservation "cfv123_mul_w_00"
+  "cf_dsoc,(cf_agex+cf_dsoc)*8,cf_agex")
+
+;; Long multiplication with no mac.
+;; Takes 11-20 cycles.
+(define_reservation "cfv12_mul_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,(cf_agex+cf_dsoc)*17,cf_agex")
+;; Takes 12-21 cycles.
+(define_reservation "cfv3_mul_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,(cf_agex+cf_dsoc)*17,cf_agex")
+
+;; Word multiplication with no mac.
+;; Takes 11 cycles.
+(define_reservation "cfv12_mul_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,(cf_agex+cf_dsoc)*8,cf_agex")
+;; Takes 12 cycles.
+(define_reservation "cfv3_mul_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,(cf_agex+cf_dsoc)*8,cf_agex")
+
+;; Word multiplication with no mac.
+;; Takes 12 cycles.
+(define_reservation "cfv12_mul_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,(cf_agex+cf_dsoc)*8,cf_agex")
+;; Takes 13 cycles.
+(define_reservation "cfv3_mul_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,(cf_agex+cf_dsoc)*8,cf_agex")
+
+;; Long multiplication with mac.
+;; Takes 5 cycles.
+(define_reservation "cfv123_mac_l_00"
+  "cf_dsoc,cf_agex,cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Word multiplication with mac.
+;; Takes 3 cycles.
+(define_reservation "cfv123_mac_w_00"
+  "cf_dsoc,cf_agex,cf_mac1,cf_mac2")
+
+;; Long multiplication with mac.
+;; Takes 7 cycles.
+(define_reservation "cfv12_mac_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+;; Takes 8 cycles.
+(define_reservation "cfv3_mac_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Word multiplication with mac.
+;; Takes 5 cycles.
+(define_reservation "cfv12_mac_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_mac1,cf_mac2")
+;; Takes 6 cycles.
+(define_reservation "cfv3_mac_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_mac1,cf_mac2")
+
+;; Word multiplication with mac.
+;; Takes 6 cycles.
+(define_reservation "cfv12_mac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_mac1,cf_mac2")
+;; Takes 7 cycles.
+(define_reservation "cfv3_mac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_mac1,cf_mac2")
+
+;; Multiplication with emac.
+;; Takes 4 cycles.
+(define_reservation "cfv123_emac_00"
+  "cf_dsoc,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Multiplication with emac.
+;; Takes 6 cycles.
+(define_reservation "cfv12_emac_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+;; Takes 7 cycles.
+(define_reservation "cfv3_emac_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Word multiplication with emac.
+;; Takes 7 cycles.
+(define_reservation "cfv12_emac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+;; Takes 8 cycles.
+(define_reservation "cfv3_emac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Return instruction.
+;; ??? As return reads target address from stack, use a mem-read reservation
+;; ??? for it.
+;; ??? It's not clear what the core does during these 5 cycles.
+;; ??? Luckily, we don't care that much about an insn that won't be moved.
+;; Takes 5 cycles.
+(define_reservation "cfv12_rts" "cfv12_alu_10")
+;; Takes 8 cycles.
+(define_reservation "cfv3_rts" "cfv3_alu_10")
+
+;; Call instruction.
+;; ??? It's not clear what reservation is best to use for calls.
+;; ??? For now we use mem-write + return reservations to reflect the fact of
+;; ??? pushing and poping return address to and from the stack.
+;; Takes 3 cycles.
+(define_reservation "cfv12_call" "cfv12_alu_01,cfv12_rts")
+;; Takes 1/5 cycles.
+(define_reservation "cfv3_call" "cfv3_alu_01,cfv3_rts")
+
+;; Conditional branch instruction.
+;; ??? Branch reservations are unclear to me so far.  Luckily, we don't care
+;; ??? that much about branches.
+;; Takes 2 cycles.
+(define_reservation "cfv12_bcc" "cfv123_alu_00")
+;; Takes 1 cycles.
+(define_reservation "cfv3_bcc" "cfv123_alu_00")
+
+;; Unconditional branch instruciton.
+;; Takes 2 cycles.
+(define_reservation "cfv12_bra" "cfv12_alu_01")
+;; Takes 1 cycles.
+(define_reservation "cfv3_bra" "cfv3_alu_01")
+
+;; Computed jump instruction.
+;; Takes 3 cycles.
+(define_reservation "cfv12_jmp"
+  "(cf_dsoc+cf_agex)*3")
+;; Takes 5 cycles.
+(define_reservation "cfv3_jmp"
+  "(cf_dsoc+cf_agex)*5")
+
+;; Instruction reservations.
+
+;; Below reservations are simple derivation from the above reservations.
+;; Each reservation from the above expands into 3 reservations below - one
+;; for each instruction size.
+;; A number in the end of reservation's name is the size of the instruction.
+
+(define_insn_reservation "cfv123_alu_00_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "00"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_alu_00")
+
+(define_insn_reservation "cfv123_alu_00_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "00"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_alu_00")
+
+(define_insn_reservation "cfv123_alu_00_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "00"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_alu_00")
+
+(define_insn_reservation "cfv1_alu_10_1" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_10")
+
+(define_insn_reservation "cfv1_alu_10_2" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_10")
+
+(define_insn_reservation "cfv1_alu_10_3" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_10")
+
+(define_insn_reservation "cfv1_omove_10_1" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_10")
+
+(define_insn_reservation "cfv1_omove_10_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_10")
+
+(define_insn_reservation "cfv1_omove_10_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_10")
+
+(define_insn_reservation "cfv2_alu_10_1" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_10")
+
+(define_insn_reservation "cfv2_alu_10_2" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_10")
+
+(define_insn_reservation "cfv2_alu_10_3" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_10")
+
+(define_insn_reservation "cfv2_omove_10_1" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_10")
+
+(define_insn_reservation "cfv2_omove_10_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_10")
+
+(define_insn_reservation "cfv2_omove_10_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_10")
+
+(define_insn_reservation "cfv3_alu_10_1" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_alu_10")
+
+(define_insn_reservation "cfv3_alu_10_2" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_alu_10")
+
+(define_insn_reservation "cfv3_alu_10_3" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_10")
+
+(define_insn_reservation "cfv3_omove_10_1" 3
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_omove_10")
+
+(define_insn_reservation "cfv3_omove_10_2" 3
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_omove_10")
+
+(define_insn_reservation "cfv3_omove_10_3" 3
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_10")
+
+(define_insn_reservation "cfv1_alu_i0_2" 4
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i0")
+
+(define_insn_reservation "cfv1_alu_i0_3" 4
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i0")
+
+(define_insn_reservation "cfv1_omove_i0_2" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i0")
+
+(define_insn_reservation "cfv1_omove_i0_3" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i0")
+
+(define_insn_reservation "cfv2_alu_i0_2" 4
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i0")
+
+(define_insn_reservation "cfv2_alu_i0_3" 4
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i0")
+
+(define_insn_reservation "cfv2_omove_i0_2" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i0")
+
+(define_insn_reservation "cfv2_omove_i0_3" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i0")
+
+(define_insn_reservation "cfv3_alu_i0_2" 5
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_i0")
+
+(define_insn_reservation "cfv3_alu_i0_3" 5
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_i0")
+
+(define_insn_reservation "cfv3_omove_i0_2" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_omove_i0")
+
+(define_insn_reservation "cfv3_omove_i0_3" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_i0")
+
+(define_insn_reservation "cfv12_alu_01_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_01")
+
+(define_insn_reservation "cfv12_alu_01_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_01")
+
+(define_insn_reservation "cfv12_alu_01_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_01")
+
+(define_insn_reservation "cfv3_alu_01_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_alu_01")
+
+(define_insn_reservation "cfv3_alu_01_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_alu_01")
+
+(define_insn_reservation "cfv3_alu_01_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_01")
+
+(define_insn_reservation "cfv12_alu_0i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_0i")
+
+(define_insn_reservation "cfv12_alu_0i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_0i")
+
+(define_insn_reservation "cfv3_alu_0i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_0i")
+
+(define_insn_reservation "cfv3_alu_0i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_0i")
+
+(define_insn_reservation "cfv1_alu_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_11")
+
+(define_insn_reservation "cfv1_alu_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_11")
+
+(define_insn_reservation "cfv1_alu_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_11")
+
+(define_insn_reservation "cfv1_omove_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_11")
+
+(define_insn_reservation "cfv1_omove_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_11")
+
+(define_insn_reservation "cfv1_omove_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_11")
+
+(define_insn_reservation "cfv2_alu_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_11")
+
+(define_insn_reservation "cfv2_alu_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_11")
+
+(define_insn_reservation "cfv2_alu_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_11")
+
+(define_insn_reservation "cfv2_omove_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_11")
+
+(define_insn_reservation "cfv2_omove_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_11")
+
+(define_insn_reservation "cfv2_omove_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_11")
+
+(define_insn_reservation "cfv3_alu_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_alu_11")
+
+(define_insn_reservation "cfv3_alu_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "size" "2"))
+       (eq_attr "op_mem" "11"))
+  "cf_ib2+cfv3_alu_11")
+
+(define_insn_reservation "cfv3_alu_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_11")
+
+(define_insn_reservation "cfv3_omove_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_omove_11")
+
+(define_insn_reservation "cfv3_omove_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "size" "2"))
+       (eq_attr "op_mem" "11"))
+  "cf_ib2+cfv3_omove_11")
+
+(define_insn_reservation "cfv3_omove_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_11")
+
+(define_insn_reservation "cfv1_alu_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i1")
+
+(define_insn_reservation "cfv1_alu_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i1")
+
+(define_insn_reservation "cfv1_omove_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i1")
+
+(define_insn_reservation "cfv1_omove_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i1")
+
+(define_insn_reservation "cfv2_alu_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i1")
+
+(define_insn_reservation "cfv2_alu_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i1")
+
+(define_insn_reservation "cfv2_omove_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i1")
+
+(define_insn_reservation "cfv2_omove_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i1")
+
+(define_insn_reservation "cfv3_alu_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_i1")
+
+(define_insn_reservation "cfv3_alu_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_i1")
+
+(define_insn_reservation "cfv3_omove_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_omove_i1")
+
+(define_insn_reservation "cfv3_omove_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_i1")
+
+(define_insn_reservation "cfv1_alu_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_1i")
+
+(define_insn_reservation "cfv1_alu_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_1i")
+
+(define_insn_reservation "cfv1_omove_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_1i")
+
+(define_insn_reservation "cfv1_omove_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_1i")
+
+(define_insn_reservation "cfv2_alu_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_1i")
+
+(define_insn_reservation "cfv2_alu_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_1i")
+
+(define_insn_reservation "cfv2_omove_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_1i")
+
+(define_insn_reservation "cfv2_omove_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_1i")
+
+(define_insn_reservation "cfv3_alu_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_1i")
+
+(define_insn_reservation "cfv3_alu_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_1i")
+
+(define_insn_reservation "cfv3_omove_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_omove_1i")
+
+(define_insn_reservation "cfv3_omove_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_1i")
+
+(define_insn_reservation "cfv123_lea_10_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_lea_10")
+
+(define_insn_reservation "cfv123_lea_10_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_lea_10")
+
+(define_insn_reservation "cfv123_lea_10_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_lea_10")
+
+(define_insn_reservation "cfv123_lea_i0_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv123_lea_i0")
+
+(define_insn_reservation "cfv123_lea_i0_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_lea_i0")
+
+(define_insn_reservation "cfv12_pea_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_pea_11")
+
+(define_insn_reservation "cfv12_pea_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_pea_11")
+
+(define_insn_reservation "cfv12_pea_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_pea_11")
+
+(define_insn_reservation "cfv3_pea_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_pea_11")
+
+(define_insn_reservation "cfv3_pea_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_pea_11")
+
+(define_insn_reservation "cfv3_pea_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_pea_11")
+
+(define_insn_reservation "cfv12_pea_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_pea_i1")
+
+(define_insn_reservation "cfv12_pea_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_pea_i1")
+
+(define_insn_reservation "cfv3_pea_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_pea_i1")
+
+(define_insn_reservation "cfv3_pea_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_pea_i1")
+
+(define_insn_reservation "cfv123_mul_l_00_1" 18
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mul_l_00")
+
+(define_insn_reservation "cfv123_mul_l_00_2" 18
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mul_l_00")
+
+(define_insn_reservation "cfv123_mul_l_00_3" 18
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mul_l_00")
+
+(define_insn_reservation "cfv123_mul_w_00_1" 9
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mul_w_00")
+
+(define_insn_reservation "cfv123_mul_w_00_2" 9
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mul_w_00")
+
+(define_insn_reservation "cfv123_mul_w_00_3" 9
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mul_w_00")
+
+(define_insn_reservation "cfv12_mul_l_10_1" 20
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mul_l_10")
+
+(define_insn_reservation "cfv12_mul_l_10_2" 20
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mul_l_10")
+
+(define_insn_reservation "cfv12_mul_l_10_3" 20
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mul_l_10")
+
+(define_insn_reservation "cfv3_mul_l_10_1" 21
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mul_l_10")
+
+(define_insn_reservation "cfv3_mul_l_10_2" 21
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mul_l_10")
+
+(define_insn_reservation "cfv3_mul_l_10_3" 21
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mul_l_10")
+
+(define_insn_reservation "cfv12_mul_w_10_1" 11
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mul_w_10")
+
+(define_insn_reservation "cfv12_mul_w_10_2" 11
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mul_w_10")
+
+(define_insn_reservation "cfv12_mul_w_10_3" 11
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mul_w_10")
+
+(define_insn_reservation "cfv3_mul_w_10_1" 12
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mul_w_10")
+
+(define_insn_reservation "cfv3_mul_w_10_2" 12
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mul_w_10")
+
+(define_insn_reservation "cfv3_mul_w_10_3" 12
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mul_w_10")
+
+(define_insn_reservation "cfv12_mul_w_i0_2" 12
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_mul_w_i0")
+
+(define_insn_reservation "cfv12_mul_w_i0_3" 12
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mul_w_i0")
+
+(define_insn_reservation "cfv3_mul_w_i0_2" 13
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_mul_w_i0")
+
+(define_insn_reservation "cfv3_mul_w_i0_3" 13
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mul_w_i0")
+
+(define_insn_reservation "cfv123_mac_l_00_1" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mac_l_00")
+
+(define_insn_reservation "cfv123_mac_l_00_2" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mac_l_00")
+
+(define_insn_reservation "cfv123_mac_l_00_3" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mac_l_00")
+
+(define_insn_reservation "cfv123_mac_w_00_1" 3
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mac_w_00")
+
+(define_insn_reservation "cfv123_mac_w_00_2" 3
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mac_w_00")
+
+(define_insn_reservation "cfv123_mac_w_00_3" 3
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mac_w_00")
+
+(define_insn_reservation "cfv12_mac_l_10_1" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mac_l_10")
+
+(define_insn_reservation "cfv12_mac_l_10_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mac_l_10")
+
+(define_insn_reservation "cfv12_mac_l_10_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mac_l_10")
+
+(define_insn_reservation "cfv3_mac_l_10_1" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mac_l_10")
+
+(define_insn_reservation "cfv3_mac_l_10_2" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mac_l_10")
+
+(define_insn_reservation "cfv3_mac_l_10_3" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mac_l_10")
+
+(define_insn_reservation "cfv12_mac_w_10_1" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mac_w_10")
+
+(define_insn_reservation "cfv12_mac_w_10_2" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mac_w_10")
+
+(define_insn_reservation "cfv12_mac_w_10_3" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mac_w_10")
+
+(define_insn_reservation "cfv3_mac_w_10_1" 6
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mac_w_10")
+
+(define_insn_reservation "cfv3_mac_w_10_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mac_w_10")
+
+(define_insn_reservation "cfv3_mac_w_10_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mac_w_10")
+
+(define_insn_reservation "cfv12_mac_w_i0_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_mac_w_i0")
+
+(define_insn_reservation "cfv12_mac_w_i0_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mac_w_i0")
+
+(define_insn_reservation "cfv3_mac_w_i0_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_mac_w_i0")
+
+(define_insn_reservation "cfv3_mac_w_i0_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mac_w_i0")
+
+(define_insn_reservation "cfv123_emac_00_1" 4
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l,mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_emac_00")
+
+(define_insn_reservation "cfv123_emac_00_2" 4
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l,mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_emac_00")
+
+(define_insn_reservation "cfv123_emac_00_3" 4
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l,mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_emac_00")
+
+(define_insn_reservation "cfv12_emac_l_10_1" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_l_10_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_l_10_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_emac_10")
+
+(define_insn_reservation "cfv3_emac_l_10_1" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_l_10_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_l_10_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_10_1" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_10_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_10_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_emac_10")
+
+(define_insn_reservation "cfv3_emac_w_10_1" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_w_10_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_w_10_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_i0_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_emac_w_i0")
+
+(define_insn_reservation "cfv12_emac_w_i0_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_emac_w_i0")
+
+(define_insn_reservation "cfv3_emac_w_i0_2" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_emac_w_i0")
+
+(define_insn_reservation "cfv3_emac_w_i0_3" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_emac_w_i0")
+
+(define_insn_reservation "cfv12_rts" 5
+  (and (eq_attr "cpu" "cfv1,cfv2")
+       (eq_attr "type" "rts"))
+  "cf_ib1+cfv12_rts")
+
+(define_insn_reservation "cfv3_rts" 8
+  (and (eq_attr "cpu" "cfv3")
+       (eq_attr "type" "rts"))
+  "cf_ib1+cfv3_rts")
+
+(define_insn_reservation "cfv12_call_1" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_call")
+
+(define_insn_reservation "cfv12_call_2" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_call")
+
+(define_insn_reservation "cfv12_call_3" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_call")
+
+(define_insn_reservation "cfv3_call_1" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_call")
+
+(define_insn_reservation "cfv3_call_2" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_call")
+
+(define_insn_reservation "cfv3_call_3" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_call")
+
+(define_insn_reservation "cfv12_bcc_1" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_bcc")
+
+(define_insn_reservation "cfv12_bcc_2" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_bcc")
+
+(define_insn_reservation "cfv12_bcc_3" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_bcc")
+
+(define_insn_reservation "cfv3_bcc_1" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_bcc")
+
+(define_insn_reservation "cfv3_bcc_2" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_bcc")
+
+(define_insn_reservation "cfv3_bcc_3" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_bcc")
+
+(define_insn_reservation "cfv12_bra_1" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_bra")
+
+(define_insn_reservation "cfv12_bra_2" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_bra")
+
+(define_insn_reservation "cfv12_bra_3" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_bra")
+
+(define_insn_reservation "cfv3_bra_1" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_bra")
+
+(define_insn_reservation "cfv3_bra_2" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_bra")
+
+(define_insn_reservation "cfv3_bra_3" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_bra")
+
+(define_insn_reservation "cfv12_jmp_1" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_jmp")
+
+(define_insn_reservation "cfv12_jmp_2" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_jmp")
+
+(define_insn_reservation "cfv12_jmp_3" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_jmp")
+
+(define_insn_reservation "cfv3_jmp_1" 5
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_jmp")
+
+(define_insn_reservation "cfv3_jmp_2" 5
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_jmp")
+
+(define_insn_reservation "cfv3_jmp_3" 5
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_jmp")
+
+(define_insn_reservation "cfv12_unlk" 2
+  (and (eq_attr "cpu" "cfv1,cfv2")
+       (eq_attr "type" "unlk"))
+  "cf_ib1+cfv12_alu_10")
+
+(define_insn_reservation "cfv3_unlk" 3
+  (and (eq_attr "cpu" "cfv3")
+       (eq_attr "type" "unlk"))
+  "cf_ib1+cfv3_alu_10")
+
+;; Dummy reservation for instructions that are not handled.
+(define_insn_reservation "cfv123_guess" 3
+  (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+       (eq_attr "type" "falu,fbcc,fcmp,fdiv,fmove,fmul,fneg,fsqrt,ftst,
+                        div_w,div_l,link,mvsz,nop,trap,unknown"))
+  "cf_ib3+cfv123_guess+cf_dsoc+cf_agex+cf_mem")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Below is pipeline description of ColdFire V4 core.
+;; It is substantially different from the description of V1, V2 or V3 cores,
+;; primarily due to no need to model the instruction buffer.
+;;
+;; V4 pipeline model uses a completely separate set of cpu units.
+
+;; Operand Execution Pipeline.
+(define_automaton "cfv4_oep")
+
+(define_cpu_unit "cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da"
+  "cfv4_oep")
+
+;; V4 has 3 cases of dual-issue.
+;; After issuing a cfv4_pOEPx instruction, it'll be possible to issue
+;; a cfv4_sOEPx instruction on the same cycle (see final_presence_sets below).
+(define_cpu_unit "cfv4_pOEP1,cfv4_sOEP1,
+                  cfv4_pOEP2,cfv4_sOEP2,
+                  cfv4_pOEP3,cfv4_sOEP3" "cfv4_oep")
+
+(final_presence_set "cfv4_sOEP1" "cfv4_pOEP1")
+(final_presence_set "cfv4_sOEP2" "cfv4_pOEP2")
+(final_presence_set "cfv4_sOEP3" "cfv4_pOEP3")
+
+;; Reservation for instructions that don't allow dual-issue.
+(define_reservation "cfv4_ds" "cfv4_pOEP1+cfv4_sOEP1+
+                               cfv4_pOEP2+cfv4_sOEP2+
+                               cfv4_pOEP3+cfv4_sOEP3")
+
+;; Memory access resource.
+(define_automaton "cfv4_mem")
+
+(define_cpu_unit "cfv4_mem" "cfv4_mem")
+
+;; EMAC.
+(define_automaton "cfv4_emac")
+
+(define_cpu_unit "cfv4_emac" "cfv4_emac")
+
+;; FPU.
+(define_automaton "cfv4_fp")
+
+(define_cpu_unit "cfv4_fp" "cfv4_fp")
+
+;; Automaton for unknown instruction.
+(define_automaton "cfv4_guess")
+
+(define_query_cpu_unit "cfv4_guess" "cfv4_guess")
+
+;; This bypass allows 1st case of dual-issue.
+(define_bypass 0 "cfv4_00_oag_pOEP1,cfv4_10_pOEP1,cfv4_i0_pOEP1"
+  "cfv4_00_oag,cfv4_00_oag_pOEP3_sOEP12,cfv4_00_oag_pOEP1,
+   cfv4_00_oag_moveql,cfv4_00_ex_sOEP13")
+
+;; The following bypasses decrease the latency of producers if it modifies
+;; a target register in the EX stage and the consumer also uses
+;; that register in the EX stage.
+(define_bypass 1 "cfv4_00_ex" "cfv4_00_ex,cfv4_00_ex_sOEP13")
+(define_bypass 1 "cfv4_00_ex" "cfv4_10,cfv4_10_pOEP1,cfv4_i0,cfv4_i0_pOEP1"
+  "!m68k_sched_address_bypass_p")
+
+;; Indexed loads with scale factors 2 and 4 require an update of the index
+;; register in the register file.  Considering that the index register is
+;; only needed at the second cycle of address generation, we get
+;; a latency of 4.
+;; Producers for indexed loads with scale factor 1 should have
+;; a latency of 3.  Since we're only allowed one bypass, we handle it
+;; in the adjust_cost hook.
+(define_bypass 4
+  "cfv4_00_oag,cfv4_00_oag_pOEP3_sOEP12,cfv4_00_oag_lea,cfv4_00_oag_pOEP1,
+   cfv4_00_oag_moveql"
+  "cfv4_i0,cfv4_i0_pOEP1"
+  "m68k_sched_indexed_address_bypass_p")
+
+;; First part of cfv4_00.
+;; If issued in pairs with cfv4_movel_?0, the cost should be increased.
+;; ??? Is it possible that combined cfv4_movel_00 and cfv4_oag_00 instructions
+;; have longer latency than the two instructions emitted sequentially?
+;; Due to register renaming, the result of the sequence would be available
+;; after 3 cycles, instead of 4 for combined instruction?
+(define_insn_reservation "cfv4_00_oag" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,clr_l,cmp_l,mov3q_l,neg_l"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_sOEP1|cfv4_sOEP3|(cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+(define_insn_reservation "cfv4_00_oag_pOEP3_sOEP12" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l,mov3q_l,clr_l"))
+       (and (eq_attr "op_mem" "00")
+	    (and (eq_attr "opx_type" "Rn")
+		 (eq_attr "opy_type" "none,imm_q,imm_w,imm_l"))))
+  "cfv4_sOEP1|cfv4_sOEP2|(cfv4_pOEP3,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+(define_insn_reservation "cfv4_00_oag_lea" 1
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "lea"))
+  "cfv4_pOEP3,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_00_oag_pOEP1" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l,mov3q_l,clr_l"))
+       (and (eq_attr "op_mem" "00")
+	    (ior (eq_attr "opx_type" "!Rn")
+		 (eq_attr "opy_type" "!none,imm_q,imm_w,imm_l"))))
+  "cfv4_sOEP1|(cfv4_pOEP1,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+(define_insn_reservation "cfv4_00_oag_moveql" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "moveq_l"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_sOEP1|cfv4_sOEP2|cfv4_sOEP3|(cfv4_pOEP3,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+;; Second part of cfv4_00.
+;; Latency is either 1 or 4 depending on which stage the consumer
+;; will need the data.
+
+(define_insn_reservation "cfv4_00_ex" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "bitr,bitrw,clr,cmp,move,mvsz,scc,tst"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_00_ex_sOEP13" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alux_l,ext,shift,tst_l"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_sOEP1|cfv4_sOEP3|(cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+;; Several types mentioned in this reservation (e.g., ext and shift) don't
+;; support implicit load.  But we handle them anyway due to first scheduling
+;; pass, which handles non-strict rtl.
+;;
+;; Latency is either 1 or 4 depending in which stage the consumer
+;; will need the data.
+(define_insn_reservation "cfv4_10" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,moveq_l,mvsz,neg_l,
+                             shift,tst,tst_l"))
+       (eq_attr "op_mem" "10"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; Specialization of cfv4_10.
+;; move.l has OC2-to-DS forwarding path, that saves one cycle of latency.
+(define_insn_reservation "cfv4_10_pOEP1" 3
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l"))
+       (eq_attr "op_mem" "10"))
+  "cfv4_pOEP1,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; Same here.  But +1 to latency due to longer OAG.
+(define_insn_reservation "cfv4_i0" 5
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,moveq_l,mvsz,neg_l,
+                             shift,tst,tst_l"))
+       (eq_attr "op_mem" "i0"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; ??? Does indexed load trigger dual-issue?
+;; ??? Does OC2-to-DS forwarding path saves a cycle?
+(define_insn_reservation "cfv4_i0_pOEP1" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l"))
+       (eq_attr "op_mem" "i0"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; This reservation is for moves and clr.  Arithmetic instructions
+;; don't write to memory unless they also read from it.
+;; But, before reload we can have all sorts of things.
+;; With cfv4_pOEP2 allow dual-issue for type 2 cases.
+(define_insn_reservation "cfv4_01" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "01"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; ??? Does indexed store trigger dual-issue?
+(define_insn_reservation "cfv4_0i" 2
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "0i"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_11" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "11"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; Latency is 2 due to long OAG stage.
+(define_insn_reservation "cfv4_i1" 2
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; This one is the same as cfv4_i1.
+;; ??? Should it be different?
+(define_insn_reservation "cfv4_1i" 2
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "1i"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; ??? Does pea indeed support case 2 of dual-issue?
+(define_insn_reservation "cfv4_11_pea" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "pea"))
+       (eq_attr "op_mem" "11,00,01,0i,10"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; ??? Does pea indeed support case 2 of dual-issue?
+;; ??? Does indexed store trigger dual-issue?
+(define_insn_reservation "cfv4_i1_pea" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "pea"))
+       (eq_attr "op_mem" "i1,1i"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_link" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "link"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_ex,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_unlink" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "unlk"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_divw_00" 20
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_w"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex*15")
+
+(define_insn_reservation "cfv4_divw_10" 20
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_w"))
+       (eq_attr "op_mem" "10,11,1i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex*15")
+
+(define_insn_reservation "cfv4_divw_i0" 21
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_w"))
+       (eq_attr "op_mem" "i0,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex*15")
+
+(define_insn_reservation "cfv4_divl_00" 35
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_l"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex*30")
+
+(define_insn_reservation "cfv4_divl_10" 35
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_l"))
+       (eq_attr "op_mem" "10,11,1i,i0,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex*30")
+
+(define_insn_reservation "cfv4_emac_mul_00" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "mul_w,mul_l"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_emac")
+
+(define_insn_reservation "cfv4_emac_mul_10" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "mul_w,mul_l"))
+       (eq_attr "op_mem" "10,11,1i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_emac")
+
+(define_insn_reservation "cfv4_emac_mul_i0" 8
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "mul_w,mul_l"))
+       (eq_attr "op_mem" "i0,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_emac")
+
+(define_insn_reservation "cfv4_falu_00" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "falu,fcmp,fmul"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_falu_10" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "falu,fcmp,fmul"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_fneg_00" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fmove,fneg,ftst"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_fmove_fneg_10" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fmove,fneg,ftst"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_fmove_01" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fmove,fneg,ftst"))
+       (eq_attr "op_mem" "01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_fdiv_00" 23
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fdiv"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp*17")
+
+(define_insn_reservation "cfv4_fdiv_10" 23
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fdiv"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp*17")
+
+(define_insn_reservation "cfv4_fsqrt_00" 56
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fsqrt"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp*50")
+
+(define_insn_reservation "cfv4_fsqrt_10" 56
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fsqrt"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp*50")
+
+(define_insn_reservation "cfv4_bcc" 0
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "bcc"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_fbcc" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "fbcc"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+;; ??? Why is bra said to write to memory: 1(0/1) ?
+(define_insn_reservation "cfv4_bra_bsr" 1
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "bra,bsr"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_jmp_jsr" 5
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "jmp,jsr"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_rts" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "rts"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_nop" 1
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "nop"))
+  "cfv4_ds+cfv4_oag+cfv4_oc1+cfv4_mem+cfv4_oc2+cfv4_ex")
+
+(define_insn_reservation "cfv4_guess" 10
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "trap,unknown"))
+  "cfv4_guess+cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_emac+cfv4_fp")
+
+(define_insn_reservation "ignore" 0
+  (eq_attr "type" "ignore")
+  "nothing")
diff --git a/gcc-4.9/gcc/config/m68k/constraints.md b/gcc-4.9/gcc/config/m68k/constraints.md
new file mode 100644
index 000000000..8c2acb0c2
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/constraints.md
@@ -0,0 +1,165 @@
+;; Constraint definitions for m68k
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "a" "ADDR_REGS"
+  "Address register.")
+
+(define_register_constraint "d" "DATA_REGS"
+  "Data register.")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+  "Floating point register.")
+
+(define_constraint "I"
+  "Integer constant in the range 1 @dots 8, for immediate shift counts and addq."
+  (and (match_code "const_int")
+       (match_test "ival > 0 && ival <= 8")))
+
+(define_constraint "J"
+  "Signed 16-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x8000 && ival <= 0x7fff")))
+
+(define_constraint "K"
+  "Integer constant that moveq can't handle."
+  (and (match_code "const_int")
+       (match_test "ival < -0x80 || ival >= 0x80")))
+
+(define_constraint "L"
+  "Integer constant in the range -8 @dots -1, for subq."
+  (and (match_code "const_int")
+       (match_test "ival < 0 && ival >= -8")))
+
+(define_constraint "M"
+  "Integer constant that moveq+notb can't handle."
+  (and (match_code "const_int")
+       (match_test "ival < -0x100 || ival >= 0x100")))
+
+(define_constraint "N"
+  "Integer constant in the range 24 @dots 31, for rotatert:SI 8 to 1 expressed as rotate."
+  (and (match_code "const_int")
+       (match_test "ival >= 24 && ival <= 31")))
+
+(define_constraint "O"
+  "Integer constant 16, for rotate using swap."
+  (and (match_code "const_int")
+       (match_test "ival == 16")))
+
+(define_constraint "P"
+  "Integer constant in the range 8 @dots 15, for rotatert:HI 8 to 1 expressed as rotate."
+  (and (match_code "const_int")
+       (match_test "ival >= 8 && ival <= 15")))
+
+(define_constraint "R"
+  "Integer constant that mov3q can handle."
+  (and (match_code "const_int")
+       (match_test "valid_mov3q_const (ival)")))
+
+(define_constraint "G"
+  "Defines all of the floating constants that are *NOT* 68881
+   constants.  This is so 68881 constants get reloaded and the fpmovecr
+   is used."
+  (and (match_code "const_double")
+       (match_test "!(TARGET_68881 && standard_68881_constant_p (op))")))
+
+(define_constraint "H"
+  "Defines a real zero constant."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_constraint "S"
+  "Used for operands that satisfy 'm' when -mpcrel is in effect."
+  (and (match_code "mem")
+       (match_test "TARGET_PCREL
+		    && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+			|| GET_CODE (XEXP (op, 0)) == LABEL_REF
+			|| GET_CODE (XEXP (op, 0)) == CONST)")))
+
+(define_constraint "T"
+  "Used for operands that satisfy 's' when -mpcrel is not in effect."
+  (and (match_code "symbol_ref,label_ref,const")
+       (match_test "!TARGET_PCREL")
+       (match_test "!flag_pic || LEGITIMATE_PIC_OPERAND_P (op)")))
+
+(define_memory_constraint "Q"
+  "Means address register indirect addressing mode."
+  (and (match_code "mem")
+       (match_test "m68k_matches_q_p (op)")))
+
+(define_constraint "U"
+  "Used for register offset addressing."
+  (and (match_code "mem")
+       (match_test "m68k_matches_u_p (op)")))
+
+(define_constraint "W"
+  "Used for const_call_operands."
+  (match_operand 0 "const_call_operand"))
+
+(define_constraint "Cs"
+  "symbol_ref or const."
+  (match_code "symbol_ref,const"))
+
+(define_constraint "Ci"
+  "const_int."
+  (and (match_code "const_int")
+       (match_test "true")))
+
+(define_constraint "C0"
+  "const_int 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Cj"
+  "Range of signed numbers that don't fit in 16 bits."
+  (and (match_code "const_int")
+       (match_test "ival < -0x8000 || ival > 0x7FFF")))
+
+(define_constraint "Cu"
+  "16-bit offset for wrapped symbols"
+  (and (match_code "const")
+       (match_test "m68k_unwrap_symbol (op, false) != op")))
+
+(define_constraint "CQ"
+  "Integers valid for mvq."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == MOVQ")))
+
+(define_constraint "CW"
+  "Integers valid for a moveq followed by a swap."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == SWAP")))
+
+(define_constraint "CZ"
+  "Integers valid for mvz."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == MVZ")))
+
+(define_constraint "CS"
+  "Integers valid for mvs."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == MVS")))
+
+(define_constraint "Ap"
+  "push_operand."
+  (match_operand 0 "push_operand"))
+
+(define_constraint "Ac"
+  "Non-register operands allowed in clr."
+  (and (match_operand 0 "movsi_const0_operand")
+       (match_test "!REG_P (op)")))
diff --git a/gcc-4.9/gcc/config/m68k/genopt.sh b/gcc-4.9/gcc/config/m68k/genopt.sh
new file mode 100755
index 000000000..78ffc9655
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/genopt.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+# Generate m68k-tables.opt from the lists in *.def.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+cat <<EOF
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from m68k-devices.def,
+; m68k-isas.def and m68k-microarchs.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(target_device) Type(enum target_device)
+Known M68K CPUs (for use with the -mcpu= option):
+
+EOF
+
+awk -F'[(, 	]+' '/^M68K_DEVICE/ {
+    name = $2
+    enum = $3
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(target_device) String(" name ") Value(" enum ")"
+    print ""
+}' $1/m68k-devices.def
+
+cat <<EOF
+Enum
+Name(uarch_type) Type(enum uarch_type)
+Known M68K microarchitectures (for use with the -mtune= option):
+
+EOF
+
+awk -F'[(, 	]+' '/^M68K_MICROARCH/ {
+    name = $2
+    enum = $4
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(uarch_type) String(" name ") Value(u" enum ")"
+    print ""
+}' $1/m68k-microarchs.def
+
+cat <<EOF
+Enum
+Name(m68k_isa) Type(int)
+Known M68K ISAs (for use with the -march= option):
+
+EOF
+
+awk -F'[(, 	]+' 'BEGIN {
+    value = 0
+}
+/^M68K_ISA/ {
+    name = $2
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(m68k_isa) String(" name ") Value(" value ")"
+    print ""
+    value++
+}' $1/m68k-isas.def
diff --git a/gcc-4.9/gcc/config/m68k/ieee.opt b/gcc-4.9/gcc/config/m68k/ieee.opt
new file mode 100644
index 000000000..4295fd4b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/ieee.opt
@@ -0,0 +1,24 @@
+; Extra IEEE options for the Motorola 68000 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; This option is ignored by gcc
+mieee-fp
+Target RejectNegative
+Use IEEE math for fp comparisons
diff --git a/gcc-4.9/gcc/config/m68k/linux.h b/gcc-4.9/gcc/config/m68k/linux.h
new file mode 100644
index 000000000..5a586f541
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/linux.h
@@ -0,0 +1,241 @@
+/* Definitions for Motorola 68k running Linux-based GNU systems with
+   ELF format.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Add %(asm_cpu_spec) to a generic definition of ASM_SPEC.  */
+#undef ASM_SPEC
+#define ASM_SPEC "%(asm_cpu_spec) %(asm_pcrel_spec)"
+
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY 32
+
+/* for 68k machines this only needs to be TRUE for the 68000 */
+
+#undef STRICT_ALIGNMENT
+#define STRICT_ALIGNMENT 0
+#undef M68K_HONOR_TARGET_STRICT_ALIGNMENT
+#define M68K_HONOR_TARGET_STRICT_ALIGNMENT 0
+
+/* Here are four prefixes that are used by asm_fprintf to
+   facilitate customization for alternate assembler syntaxes.
+   Machines with no likelihood of an alternate syntax need not
+   define these and need not use asm_fprintf.  */
+
+/* The prefix for register names.  Note that REGISTER_NAMES
+   is supposed to include this prefix. Also note that this is NOT an
+   fprintf format string, it is a literal string */
+
+#undef REGISTER_PREFIX
+#define REGISTER_PREFIX "%"
+
+/* The prefix for local (compiler generated) labels.
+   These labels will not appear in the symbol table.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+#define ASM_COMMENT_START "|"
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Provide a LINK_SPEC appropriate for GNU/Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-m m68kelf %{shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+    %{static}}"
+
+/* For compatibility with linux/a.out */
+
+#undef PCC_BITFIELD_TYPE_MATTERS
+
+/* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
+   keep switch tables in the text section.  */
+   
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Use the default action for outputting the case label.  */
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_COLDFIRE)				\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)				\
+  if ((LOG) > 0)						\
+    fprintf ((FILE), "%s%u\n", ALIGN_ASM_OP, 1 << (LOG));
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as uninitialized global
+   data.  */
+
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define NO_PROFILE_COUNTERS 1
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+{									\
+  if (flag_pic)								\
+    fprintf (FILE, "\tbsr.l _mcount@PLTPC\n");				\
+  else									\
+    fprintf (FILE, "\tjbsr _mcount\n");					\
+}
+
+/* Do not break .stabs pseudos into continuations.  */
+
+#define DBX_CONTIN_LENGTH 0
+
+/* 1 if N is a possible register number for a function value.  For
+   m68k/SVR4 allow d0, a0, or fp0 as return registers, for integral,
+   pointer, or floating types, respectively.  Reject fp0 if not using
+   a 68881 coprocessor.  */
+
+#undef FUNCTION_VALUE_REGNO_P
+#define FUNCTION_VALUE_REGNO_P(N) \
+  ((N) == D0_REG || (N) == A0_REG || (TARGET_68881 && (N) == FP0_REG))
+
+/* Define this to be true when FUNCTION_VALUE_REGNO_P is true for
+   more than one register.  */
+
+#undef NEEDS_UNTYPED_CALL
+#define NEEDS_UNTYPED_CALL 1
+
+/* Define how to generate (in the callee) the output value of a
+   function and how to find (in the caller) the value returned by a
+   function.  VALTYPE is the data type of the value (as a tree).  If
+   the precise function being called is known, FUNC is its
+   FUNCTION_DECL; otherwise, FUNC is 0.  For m68k/SVR4 generate the
+   result in d0, a0, or fp0 as appropriate.  */
+
+#undef FUNCTION_VALUE
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+  m68k_function_value (VALTYPE, FUNC)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.
+   For m68k/SVR4 look for integer values in d0, pointer values in d0
+   (returned in both d0 and a0), and floating values in fp0.  */
+
+#undef LIBCALL_VALUE
+#define LIBCALL_VALUE(MODE)						\
+  m68k_libcall_value (MODE)
+
+/* For m68k SVR4, structures are returned using the reentrant
+   technique.  */
+#undef PCC_STATIC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Finalize the trampoline by flushing the insn cache.  */
+
+#undef FINALIZE_TRAMPOLINE
+#define FINALIZE_TRAMPOLINE(TRAMP)					\
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),	\
+		     LCT_NORMAL, VOIDmode, 2, TRAMP, Pmode,		\
+		     plus_constant (Pmode, TRAMP, TRAMPOLINE_SIZE), \
+		     Pmode);
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  The arguments are as
+   follows:
+
+	cacheflush (addr, scope, cache, len)
+
+   addr	  - the start address for the flush
+   scope  - the scope of the flush (see the cpush insn)
+   cache  - which cache to flush (see the cpush insn)
+   len    - a factor relating to the number of flushes to perform:
+	    len/16 lines, or len/4096 pages.  */
+
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("%d1") = (unsigned long) (BEG);	\
+  unsigned long _end = (unsigned long) (END);				\
+  register unsigned long _len __asm ("%d4") = (_end - _beg + 32);	\
+  __asm __volatile							\
+    ("move%.l #123, %/d0\n\t"	/* system call nr */			\
+     "move%.l #1, %/d2\n\t"	/* clear lines */			\
+     "move%.l #3, %/d3\n\t"	/* insn+data caches */			\
+     "trap #0"								\
+     : /* no outputs */							\
+     : "d" (_beg), "d" (_len)						\
+     : "%d0", "%d2", "%d3");						\
+}
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Install the __sync libcalls.  */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS  m68k_init_sync_libfuncs
diff --git a/gcc-4.9/gcc/config/m68k/m68020-elf.h b/gcc-4.9/gcc/config/m68k/m68020-elf.h
new file mode 100644
index 000000000..3036d4bc6
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68020-elf.h
@@ -0,0 +1,30 @@
+/* Definitions of target machine for GNU compiler.  "naked" 68020,
+   elf object files and debugging, version.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This comment is here to see if it will keep Sun's cpp from dying.  */
+
+/* We need to override the default specs from elfos.h.  This suppresses the
+   loading of crt0.o by gcc's default linker spec.  For embedded targets crt0
+   now comes from the linker script.  */
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "crtbegin.o%s"
+
+/* end of m68020-elf.h */
diff --git a/gcc-4.9/gcc/config/m68k/m68k-devices.def b/gcc-4.9/gcc/config/m68k/m68k-devices.def
new file mode 100644
index 000000000..b1090a162
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-devices.def
@@ -0,0 +1,195 @@
+/* m68k device names -*- C -*-
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Written by CodeSourcery
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file lists each target device that we support.  It is used by
+   both C code and build scripts.
+
+   Following Freescale's lead, we group devices into families that share
+   the same core and extension units.  Devices in these families differ
+   only in the set of peripherals they provide.  We pick one device to
+   act as the representative of each family.
+
+   We further group device families into multilibs, again picking one
+   family (and its representative device) to represent each multilib.
+
+   Devices are declared using the construct:
+
+      M68K_DEVICE (NAME, ENUM_VALUE, FAMILY, MULTILIB, MICROARCH, ISA, FLAGS)
+
+   where the arguments are as follows:
+
+      NAME
+	The name of the device as a string.  This string acts as the
+	device's -mcpu argument and is guaranteed to be unique.
+
+      ENUM_VALUE
+	The associated value in the target_device enumeration.
+	This value is also guaranteed to be unique.
+
+      FAMILY
+	The NAME field of the family's representative device.
+
+      MULTILIB
+	The NAME field of the multilib's representative device.
+
+      MICROARCH
+	The class of core used by devices in this family.  The field
+	is a uarch enumeration value without the leading "u".
+
+      ISA
+	The ISA implemented by this family.  The field is
+	an m68k_isa enumeration value.
+
+      FLAGS
+	The FL_* flags that apply to this family, excluding FL_FOR_isa_*.
+	See m68k.h for the full list.
+
+   There is a bit of duplication between devices in the same family,
+   but this approach makes scripting easier.  We keep each entry on
+   a single line for the same reason.
+
+   As the compiler does not (currently) generate MAC or EMAC commands,
+   we do not need separate multilibs for cores that only differ in
+   their MAC functionality.  */
+
+/* 680x0 series processors.  */
+M68K_DEVICE ("68000", m68000,   "68000", "68000", 68000,    isa_00,    0)
+M68K_DEVICE ("68010", m68010,   "68010", "68000", 68010,    isa_10,    0)
+M68K_DEVICE ("68020", m68020,   "68020", "68020", 68020,    isa_20,    FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("68030", m68030,   "68030", "68020", 68030,    isa_20,    FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("68040", m68040,   "68040", "68040", 68040,    isa_40,    FL_MMU)
+M68K_DEVICE ("68060", m68060,   "68060", "68060", 68060,    isa_40,    FL_MMU)
+M68K_DEVICE ("68302", m68302,   "68302", "68000", 68000,    isa_00,    FL_MMU)
+M68K_DEVICE ("68332", m68332,   "68332", "cpu32", cpu32,    isa_cpu32, FL_MMU)
+M68K_DEVICE ("cpu32", cpu32,    "cpu32", "cpu32", cpu32,    isa_cpu32, FL_MMU)
+
+/* ColdFire CFV1 processor.  */
+/* For historical reasons, the 51 multilib is named 51qe.  */
+M68K_DEVICE ("51",    mcf51,    "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51ac",  mcf51ac,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51ag",  mcf51ag,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51cn",  mcf51cn,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51em",  mcf51em,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_MAC)
+M68K_DEVICE ("51je",  mcf51je,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_MAC)
+M68K_DEVICE ("51jf",  mcf51jf,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_EMAC)
+M68K_DEVICE ("51jg",  mcf51jg,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_EMAC)
+M68K_DEVICE ("51jm",  mcf51jm,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51mm",  mcf51mm,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_MAC)
+M68K_DEVICE ("51qe",  mcf51qe,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51qm",  mcf51qm,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_EMAC)
+
+/* ColdFire CFV2 processors.  */
+M68K_DEVICE ("5202",  mcf5202,  "5206",  "5206",  cfv2,     isa_a,     0)
+M68K_DEVICE ("5204",  mcf5204,  "5206",  "5206",  cfv2,     isa_a,     0)
+M68K_DEVICE ("5206",  mcf5206,  "5206",  "5206",  cfv2,     isa_a,     0)
+M68K_DEVICE ("5206e", mcf5206e, "5206e", "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5207",  mcf5207,  "5208",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5208",  mcf5208,  "5208",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5210a", mcf5210a, "5211a", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5211a", mcf5211a, "5211a", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5211",  mcf5211,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5212",  mcf5212,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5213",  mcf5213,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5214",  mcf5214,  "5216",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5216",  mcf5216,  "5216",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5221x", mcf5221x, "5221x", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52221", mcf52221, "52223", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52223", mcf52223, "52223", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52230", mcf52230, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52231", mcf52231, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52232", mcf52232, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52233", mcf52233, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52234", mcf52234, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52235", mcf52235, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5224",  mcf5224,  "5225",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5225",  mcf5225,  "5225",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52252", mcf52252, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52254", mcf52254, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52255", mcf52255, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52256", mcf52256, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52258", mcf52258, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52259", mcf52259, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52274", mcf52274, "52277", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52277", mcf52277, "52277", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5232",  mcf5232,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5233",  mcf5233,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5234",  mcf5234,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5235",  mcf5235,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("523x",  mcf523x,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5249",  mcf5249,  "5249",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5250",  mcf5250,  "5250",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5253",  mcf5253,  "5253",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5270",  mcf5270,  "5271",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5271",  mcf5271,  "5271",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV)
+M68K_DEVICE ("5272",  mcf5272,  "5272",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5274",  mcf5274,  "5275",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5275",  mcf5275,  "5275",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5280",  mcf5280,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5281",  mcf5281,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5282",  mcf5282,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("528x",  mcf528x,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+
+/* CFV3 processors.  */
+M68K_DEVICE ("53011", mcf53011, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53012", mcf53012, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53013", mcf53013, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53014", mcf53014, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53015", mcf53015, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53016", mcf53016, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53017", mcf53017, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5307",  mcf5307,  "5307",  "5307",  cfv3,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5327",  mcf5327,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5328",  mcf5328,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5329",  mcf5329,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("532x",  mcf532x,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5372",  mcf5372,  "5373",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5373",  mcf5373,  "5373",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("537x",  mcf537x,  "5373",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+
+/* CFV4/CFV4e processors.  */
+M68K_DEVICE ("5407",  mcf5407,  "5407",  "5407",  cfv4,     isa_b,     FL_CF_MAC)
+M68K_DEVICE ("54410", mcf54410, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54415", mcf54415, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54416", mcf54416, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54417", mcf54417, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54418", mcf54418, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54450", mcf54450, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54451", mcf54451, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54452", mcf54452, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54453", mcf54453, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54454", mcf54454, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54455", mcf54455, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("5470",  mcf5470,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5471",  mcf5471,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5472",  mcf5472,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5473",  mcf5473,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5474",  mcf5474,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5475",  mcf5475,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("547x",  mcf547x,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5480",  mcf5480,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5481",  mcf5481,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5482",  mcf5482,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5483",  mcf5483,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5484",  mcf5484,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5485",  mcf5485,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("548x",  mcf548x,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+
+/* Fido processor.  */
+M68K_DEVICE ("fidoa", fidoa,    "cpu32", "fidoa", cpu32,    isa_cpu32, FL_FIDOA | FL_MMU)
diff --git a/gcc-4.9/gcc/config/m68k/m68k-isas.def b/gcc-4.9/gcc/config/m68k/m68k-isas.def
new file mode 100644
index 000000000..a56066c22
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-isas.def
@@ -0,0 +1,41 @@
+/* m68k ISA names.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define ISAs for the -march option, used both in m68k.c and to
+   generate m68k-tables.opt.  Before including this file, define a
+   macro:
+
+   M68K_ISA (NAME, DEVICE, MICROARCH, ISA, FLAGS)
+
+   where NAME is the name for use with -march=, DEVICE is the value in
+   the target_device enumeration of a representative device, FLAGS is
+   the set of FL_* flags that apply to this ISA and the other
+   arguments are as for M68K_DEVICE in m68k-devices.def.  */
+
+M68K_ISA ("68000",    m68000,     68000, isa_00,    FL_FOR_isa_00)
+M68K_ISA ("68010",    m68010,     68010, isa_10,    FL_FOR_isa_10)
+M68K_ISA ("68020",    m68020,     68020, isa_20,    FL_FOR_isa_20)
+M68K_ISA ("68030",    m68030,     68030, isa_20,    FL_FOR_isa_20)
+M68K_ISA ("68040",    m68040,     68040, isa_40,    FL_FOR_isa_40)
+M68K_ISA ("68060",    m68060,     68060, isa_40,    FL_FOR_isa_40)
+M68K_ISA ("cpu32",    cpu32,      cpu32, isa_20,    FL_FOR_isa_cpu32)
+M68K_ISA ("isaa",     mcf5206e,   cfv2,  isa_a,     FL_FOR_isa_a | FL_CF_HWDIV)
+M68K_ISA ("isaaplus", mcf5271,    cfv2,  isa_aplus, FL_FOR_isa_aplus | FL_CF_HWDIV)
+M68K_ISA ("isab",     mcf5407,    cfv4,  isa_b,     FL_FOR_isa_b)
+M68K_ISA ("isac",     unk_device, cfv4,  isa_c,     FL_FOR_isa_c | FL_CF_HWDIV)
diff --git a/gcc-4.9/gcc/config/m68k/m68k-microarchs.def b/gcc-4.9/gcc/config/m68k/m68k-microarchs.def
new file mode 100644
index 000000000..7f263506c
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-microarchs.def
@@ -0,0 +1,44 @@
+/* m68k microarchitecture names.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define microarchitectures for the -mtune option, used both in
+   m68k.c and to generate m68k-tables.opt.  Before including this
+   file, define a macro:
+
+   M68K_MICROARCH (NAME, DEVICE, MICROARCH, ISA, FLAGS)
+
+   where NAME is the name for use with -mtune=, DEVICE is the value in
+   the target_device enumeration of a representative device, FLAGS is
+   the set of FL_* flags that apply to this ISA and the other
+   arguments are as for M68K_DEVICE in m68k-devices.def.  */
+
+M68K_MICROARCH ("68000",    m68000,  68000,    isa_00,  FL_FOR_isa_00)
+M68K_MICROARCH ("68010",    m68010,  68010,    isa_10,  FL_FOR_isa_10)
+M68K_MICROARCH ("68020",    m68020,  68020,    isa_20,  FL_FOR_isa_20)
+M68K_MICROARCH ("68020-40", m68020,  68020_40, isa_20,  FL_FOR_isa_20)
+M68K_MICROARCH ("68020-60", m68020,  68020_60, isa_20,  FL_FOR_isa_20)
+M68K_MICROARCH ("68030",    m68030,  68030,    isa_20,  FL_FOR_isa_20)
+M68K_MICROARCH ("68040",    m68040,  68040,    isa_40,  FL_FOR_isa_40)
+M68K_MICROARCH ("68060",    m68060,  68060,    isa_40,  FL_FOR_isa_40)
+M68K_MICROARCH ("cpu32",    cpu32,   cpu32,    isa_20,  FL_FOR_isa_cpu32)
+M68K_MICROARCH ("cfv1",     mcf51qe, cfv1,     isa_c,   FL_FOR_isa_c)
+M68K_MICROARCH ("cfv2",     mcf5206, cfv2,     isa_a,   FL_FOR_isa_a)
+M68K_MICROARCH ("cfv3",     mcf5307, cfv3,     isa_a,   FL_FOR_isa_a | FL_CF_HWDIV)
+M68K_MICROARCH ("cfv4",     mcf5407, cfv4,     isa_b,   FL_FOR_isa_b)
+M68K_MICROARCH ("cfv4e",    mcf547x, cfv4e,    isa_b,   FL_FOR_isa_b | FL_CF_USP| FL_CF_EMAC | FL_CF_FPU)
diff --git a/gcc-4.9/gcc/config/m68k/m68k-modes.def b/gcc-4.9/gcc/config/m68k/m68k-modes.def
new file mode 100644
index 000000000..41669d829
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-modes.def
@@ -0,0 +1,25 @@
+/* M68k extra machine modes. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* These differ in the representation of the canonical NaN.  */
+RESET_FLOAT_FORMAT (SF, motorola_single_format);
+RESET_FLOAT_FORMAT (DF, motorola_double_format);
+
+/* 80-bit floating point (IEEE extended, in a 96-bit field) */
+FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_motorola_format);
diff --git a/gcc-4.9/gcc/config/m68k/m68k-none.h b/gcc-4.9/gcc/config/m68k/m68k-none.h
new file mode 100644
index 000000000..12da47e59
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-none.h
@@ -0,0 +1,19 @@
+/* Definitions of target machine for GNU compiler.  "naked" 68020.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
diff --git a/gcc-4.9/gcc/config/m68k/m68k-opts.h b/gcc-4.9/gcc/config/m68k/m68k-opts.h
new file mode 100644
index 000000000..1a671942c
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-opts.h
@@ -0,0 +1,44 @@
+/* Definitions for option handling for Motorola 680x0/ColdFire.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef M68K_OPTS_H
+#define M68K_OPTS_H
+
+/* Values used in the MICROARCH argument to M68K_DEVICE.  */
+enum uarch_type
+{
+#define M68K_MICROARCH(NAME,DEVICE,MICROARCH,ISA,FLAGS) \
+  u##MICROARCH,
+#include "m68k-microarchs.def"
+#undef M68K_MICROARCH
+  ucfv5,
+  unk_arch
+};
+
+/* An enumeration of all supported target devices.  */
+enum target_device
+{
+#define M68K_DEVICE(NAME,ENUM_VALUE,FAMILY,MULTILIB,MICROARCH,ISA,FLAGS) \
+  ENUM_VALUE,
+#include "m68k-devices.def"
+#undef M68K_DEVICE
+  unk_device
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/m68k/m68k-protos.h b/gcc-4.9/gcc/config/m68k/m68k-protos.h
new file mode 100644
index 000000000..512821f46
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-protos.h
@@ -0,0 +1,101 @@
+/* Definitions of target machine for GNU compiler.  Sun 68000/68020 version.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Define functions defined in aux-output.c and used in templates.  */
+
+#ifdef RTX_CODE
+extern enum m68k_function_kind m68k_get_function_kind (tree);
+extern HOST_WIDE_INT m68k_initial_elimination_offset (int from, int to);
+
+extern void split_di (rtx[], int, rtx[], rtx[]);
+
+extern bool valid_mov3q_const (HOST_WIDE_INT);
+extern const char *output_move_simode (rtx *);
+extern const char *output_move_himode (rtx *);
+extern const char *output_move_qimode (rtx *);
+extern const char *output_move_stricthi (rtx *);
+extern const char *output_move_strictqi (rtx *);
+extern const char *output_move_double (rtx *);
+extern const char *output_move_const_single (rtx *);
+extern const char *output_move_const_double (rtx *);
+extern const char *output_btst (rtx *, rtx, rtx, rtx, int);
+extern const char *output_scc_di (rtx, rtx, rtx, rtx);
+extern const char *output_addsi3 (rtx *);
+extern const char *output_andsi3 (rtx *);
+extern const char *output_iorsi3 (rtx *);
+extern const char *output_xorsi3 (rtx *);
+extern const char *output_call (rtx);
+extern const char *output_sibcall (rtx);
+extern void output_dbcc_and_branch (rtx *);
+extern int floating_exact_log2 (rtx);
+extern bool strict_low_part_peephole_ok (enum machine_mode mode, rtx first_insn, rtx target);
+
+/* Functions from m68k.c used in macros.  */
+extern int standard_68881_constant_p (rtx);
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern void notice_update_cc (rtx, rtx);
+extern bool m68k_legitimate_base_reg_p (rtx, bool);
+extern bool m68k_legitimate_index_reg_p (rtx, bool);
+extern bool m68k_illegitimate_symbolic_constant_p (rtx);
+extern bool m68k_legitimate_constant_p (enum machine_mode, rtx);
+extern bool m68k_matches_q_p (rtx);
+extern bool m68k_matches_u_p (rtx);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern rtx m68k_legitimize_tls_address (rtx);
+extern bool m68k_tls_reference_p (rtx, bool);
+extern int valid_dbcc_comparison_p_2 (rtx, enum machine_mode);
+extern rtx m68k_libcall_value (enum machine_mode);
+extern rtx m68k_function_value (const_tree, const_tree);
+extern int emit_move_sequence (rtx *, enum machine_mode, rtx);
+extern bool m68k_movem_pattern_p (rtx, rtx, HOST_WIDE_INT, bool);
+extern const char *m68k_output_movem (rtx *, rtx, HOST_WIDE_INT, bool);
+extern void m68k_final_prescan_insn (rtx, rtx *, int);
+extern bool m68k_epilogue_uses (int);
+
+/* Functions from m68k.c used in constraints.md.  */
+extern rtx m68k_unwrap_symbol (rtx, bool);
+
+/* Functions from m68k.c used in genattrtab.  */
+#ifdef HAVE_ATTR_cpu
+extern enum attr_cpu m68k_sched_cpu;
+extern enum attr_mac m68k_sched_mac;
+
+extern enum attr_opx_type m68k_sched_attr_opx_type (rtx, int);
+extern enum attr_opy_type m68k_sched_attr_opy_type (rtx, int);
+extern enum attr_size m68k_sched_attr_size (rtx);
+extern enum attr_op_mem m68k_sched_attr_op_mem (rtx);
+#endif /* HAVE_ATTR_cpu */
+
+#endif /* RTX_CODE */
+
+extern bool m68k_regno_mode_ok (int, enum machine_mode);
+extern enum reg_class m68k_secondary_reload_class (enum reg_class,
+						   enum machine_mode, rtx);
+extern enum reg_class m68k_preferred_reload_class (rtx, enum reg_class);
+extern int flags_in_68881 (void);
+extern void m68k_expand_prologue (void);
+extern bool m68k_use_return_insn (void);
+extern void m68k_expand_epilogue (bool);
+extern const char *m68k_cpp_cpu_ident (const char *);
+extern const char *m68k_cpp_cpu_family (const char *);
+extern void init_68881_table (void);
+extern rtx m68k_legitimize_call_address (rtx);
+extern rtx m68k_legitimize_sibcall_address (rtx);
+extern int m68k_hard_regno_rename_ok(unsigned int, unsigned int);
diff --git a/gcc-4.9/gcc/config/m68k/m68k-tables.opt b/gcc-4.9/gcc/config/m68k/m68k-tables.opt
new file mode 100644
index 000000000..0a2d40905
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k-tables.opt
@@ -0,0 +1,445 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from m68k-devices.def,
+; m68k-isas.def and m68k-microarchs.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(target_device) Type(enum target_device)
+Known M68K CPUs (for use with the -mcpu= option):
+
+EnumValue
+Enum(target_device) String(68000) Value(m68000)
+
+EnumValue
+Enum(target_device) String(68010) Value(m68010)
+
+EnumValue
+Enum(target_device) String(68020) Value(m68020)
+
+EnumValue
+Enum(target_device) String(68030) Value(m68030)
+
+EnumValue
+Enum(target_device) String(68040) Value(m68040)
+
+EnumValue
+Enum(target_device) String(68060) Value(m68060)
+
+EnumValue
+Enum(target_device) String(68302) Value(m68302)
+
+EnumValue
+Enum(target_device) String(68332) Value(m68332)
+
+EnumValue
+Enum(target_device) String(cpu32) Value(cpu32)
+
+EnumValue
+Enum(target_device) String(51) Value(mcf51)
+
+EnumValue
+Enum(target_device) String(51ac) Value(mcf51ac)
+
+EnumValue
+Enum(target_device) String(51ag) Value(mcf51ag)
+
+EnumValue
+Enum(target_device) String(51cn) Value(mcf51cn)
+
+EnumValue
+Enum(target_device) String(51em) Value(mcf51em)
+
+EnumValue
+Enum(target_device) String(51je) Value(mcf51je)
+
+EnumValue
+Enum(target_device) String(51jf) Value(mcf51jf)
+
+EnumValue
+Enum(target_device) String(51jg) Value(mcf51jg)
+
+EnumValue
+Enum(target_device) String(51jm) Value(mcf51jm)
+
+EnumValue
+Enum(target_device) String(51mm) Value(mcf51mm)
+
+EnumValue
+Enum(target_device) String(51qe) Value(mcf51qe)
+
+EnumValue
+Enum(target_device) String(51qm) Value(mcf51qm)
+
+EnumValue
+Enum(target_device) String(5202) Value(mcf5202)
+
+EnumValue
+Enum(target_device) String(5204) Value(mcf5204)
+
+EnumValue
+Enum(target_device) String(5206) Value(mcf5206)
+
+EnumValue
+Enum(target_device) String(5206e) Value(mcf5206e)
+
+EnumValue
+Enum(target_device) String(5207) Value(mcf5207)
+
+EnumValue
+Enum(target_device) String(5208) Value(mcf5208)
+
+EnumValue
+Enum(target_device) String(5210a) Value(mcf5210a)
+
+EnumValue
+Enum(target_device) String(5211a) Value(mcf5211a)
+
+EnumValue
+Enum(target_device) String(5211) Value(mcf5211)
+
+EnumValue
+Enum(target_device) String(5212) Value(mcf5212)
+
+EnumValue
+Enum(target_device) String(5213) Value(mcf5213)
+
+EnumValue
+Enum(target_device) String(5214) Value(mcf5214)
+
+EnumValue
+Enum(target_device) String(5216) Value(mcf5216)
+
+EnumValue
+Enum(target_device) String(5221x) Value(mcf5221x)
+
+EnumValue
+Enum(target_device) String(52221) Value(mcf52221)
+
+EnumValue
+Enum(target_device) String(52223) Value(mcf52223)
+
+EnumValue
+Enum(target_device) String(52230) Value(mcf52230)
+
+EnumValue
+Enum(target_device) String(52231) Value(mcf52231)
+
+EnumValue
+Enum(target_device) String(52232) Value(mcf52232)
+
+EnumValue
+Enum(target_device) String(52233) Value(mcf52233)
+
+EnumValue
+Enum(target_device) String(52234) Value(mcf52234)
+
+EnumValue
+Enum(target_device) String(52235) Value(mcf52235)
+
+EnumValue
+Enum(target_device) String(5224) Value(mcf5224)
+
+EnumValue
+Enum(target_device) String(5225) Value(mcf5225)
+
+EnumValue
+Enum(target_device) String(52252) Value(mcf52252)
+
+EnumValue
+Enum(target_device) String(52254) Value(mcf52254)
+
+EnumValue
+Enum(target_device) String(52255) Value(mcf52255)
+
+EnumValue
+Enum(target_device) String(52256) Value(mcf52256)
+
+EnumValue
+Enum(target_device) String(52258) Value(mcf52258)
+
+EnumValue
+Enum(target_device) String(52259) Value(mcf52259)
+
+EnumValue
+Enum(target_device) String(52274) Value(mcf52274)
+
+EnumValue
+Enum(target_device) String(52277) Value(mcf52277)
+
+EnumValue
+Enum(target_device) String(5232) Value(mcf5232)
+
+EnumValue
+Enum(target_device) String(5233) Value(mcf5233)
+
+EnumValue
+Enum(target_device) String(5234) Value(mcf5234)
+
+EnumValue
+Enum(target_device) String(5235) Value(mcf5235)
+
+EnumValue
+Enum(target_device) String(523x) Value(mcf523x)
+
+EnumValue
+Enum(target_device) String(5249) Value(mcf5249)
+
+EnumValue
+Enum(target_device) String(5250) Value(mcf5250)
+
+EnumValue
+Enum(target_device) String(5253) Value(mcf5253)
+
+EnumValue
+Enum(target_device) String(5270) Value(mcf5270)
+
+EnumValue
+Enum(target_device) String(5271) Value(mcf5271)
+
+EnumValue
+Enum(target_device) String(5272) Value(mcf5272)
+
+EnumValue
+Enum(target_device) String(5274) Value(mcf5274)
+
+EnumValue
+Enum(target_device) String(5275) Value(mcf5275)
+
+EnumValue
+Enum(target_device) String(5280) Value(mcf5280)
+
+EnumValue
+Enum(target_device) String(5281) Value(mcf5281)
+
+EnumValue
+Enum(target_device) String(5282) Value(mcf5282)
+
+EnumValue
+Enum(target_device) String(528x) Value(mcf528x)
+
+EnumValue
+Enum(target_device) String(53011) Value(mcf53011)
+
+EnumValue
+Enum(target_device) String(53012) Value(mcf53012)
+
+EnumValue
+Enum(target_device) String(53013) Value(mcf53013)
+
+EnumValue
+Enum(target_device) String(53014) Value(mcf53014)
+
+EnumValue
+Enum(target_device) String(53015) Value(mcf53015)
+
+EnumValue
+Enum(target_device) String(53016) Value(mcf53016)
+
+EnumValue
+Enum(target_device) String(53017) Value(mcf53017)
+
+EnumValue
+Enum(target_device) String(5307) Value(mcf5307)
+
+EnumValue
+Enum(target_device) String(5327) Value(mcf5327)
+
+EnumValue
+Enum(target_device) String(5328) Value(mcf5328)
+
+EnumValue
+Enum(target_device) String(5329) Value(mcf5329)
+
+EnumValue
+Enum(target_device) String(532x) Value(mcf532x)
+
+EnumValue
+Enum(target_device) String(5372) Value(mcf5372)
+
+EnumValue
+Enum(target_device) String(5373) Value(mcf5373)
+
+EnumValue
+Enum(target_device) String(537x) Value(mcf537x)
+
+EnumValue
+Enum(target_device) String(5407) Value(mcf5407)
+
+EnumValue
+Enum(target_device) String(54410) Value(mcf54410)
+
+EnumValue
+Enum(target_device) String(54415) Value(mcf54415)
+
+EnumValue
+Enum(target_device) String(54416) Value(mcf54416)
+
+EnumValue
+Enum(target_device) String(54417) Value(mcf54417)
+
+EnumValue
+Enum(target_device) String(54418) Value(mcf54418)
+
+EnumValue
+Enum(target_device) String(54450) Value(mcf54450)
+
+EnumValue
+Enum(target_device) String(54451) Value(mcf54451)
+
+EnumValue
+Enum(target_device) String(54452) Value(mcf54452)
+
+EnumValue
+Enum(target_device) String(54453) Value(mcf54453)
+
+EnumValue
+Enum(target_device) String(54454) Value(mcf54454)
+
+EnumValue
+Enum(target_device) String(54455) Value(mcf54455)
+
+EnumValue
+Enum(target_device) String(5470) Value(mcf5470)
+
+EnumValue
+Enum(target_device) String(5471) Value(mcf5471)
+
+EnumValue
+Enum(target_device) String(5472) Value(mcf5472)
+
+EnumValue
+Enum(target_device) String(5473) Value(mcf5473)
+
+EnumValue
+Enum(target_device) String(5474) Value(mcf5474)
+
+EnumValue
+Enum(target_device) String(5475) Value(mcf5475)
+
+EnumValue
+Enum(target_device) String(547x) Value(mcf547x)
+
+EnumValue
+Enum(target_device) String(5480) Value(mcf5480)
+
+EnumValue
+Enum(target_device) String(5481) Value(mcf5481)
+
+EnumValue
+Enum(target_device) String(5482) Value(mcf5482)
+
+EnumValue
+Enum(target_device) String(5483) Value(mcf5483)
+
+EnumValue
+Enum(target_device) String(5484) Value(mcf5484)
+
+EnumValue
+Enum(target_device) String(5485) Value(mcf5485)
+
+EnumValue
+Enum(target_device) String(548x) Value(mcf548x)
+
+EnumValue
+Enum(target_device) String(fidoa) Value(fidoa)
+
+Enum
+Name(uarch_type) Type(enum uarch_type)
+Known M68K microarchitectures (for use with the -mtune= option):
+
+EnumValue
+Enum(uarch_type) String(68000) Value(u68000)
+
+EnumValue
+Enum(uarch_type) String(68010) Value(u68010)
+
+EnumValue
+Enum(uarch_type) String(68020) Value(u68020)
+
+EnumValue
+Enum(uarch_type) String(68020-40) Value(u68020_40)
+
+EnumValue
+Enum(uarch_type) String(68020-60) Value(u68020_60)
+
+EnumValue
+Enum(uarch_type) String(68030) Value(u68030)
+
+EnumValue
+Enum(uarch_type) String(68040) Value(u68040)
+
+EnumValue
+Enum(uarch_type) String(68060) Value(u68060)
+
+EnumValue
+Enum(uarch_type) String(cpu32) Value(ucpu32)
+
+EnumValue
+Enum(uarch_type) String(cfv1) Value(ucfv1)
+
+EnumValue
+Enum(uarch_type) String(cfv2) Value(ucfv2)
+
+EnumValue
+Enum(uarch_type) String(cfv3) Value(ucfv3)
+
+EnumValue
+Enum(uarch_type) String(cfv4) Value(ucfv4)
+
+EnumValue
+Enum(uarch_type) String(cfv4e) Value(ucfv4e)
+
+Enum
+Name(m68k_isa) Type(int)
+Known M68K ISAs (for use with the -march= option):
+
+EnumValue
+Enum(m68k_isa) String(68000) Value(0)
+
+EnumValue
+Enum(m68k_isa) String(68010) Value(1)
+
+EnumValue
+Enum(m68k_isa) String(68020) Value(2)
+
+EnumValue
+Enum(m68k_isa) String(68030) Value(3)
+
+EnumValue
+Enum(m68k_isa) String(68040) Value(4)
+
+EnumValue
+Enum(m68k_isa) String(68060) Value(5)
+
+EnumValue
+Enum(m68k_isa) String(cpu32) Value(6)
+
+EnumValue
+Enum(m68k_isa) String(isaa) Value(7)
+
+EnumValue
+Enum(m68k_isa) String(isaaplus) Value(8)
+
+EnumValue
+Enum(m68k_isa) String(isab) Value(9)
+
+EnumValue
+Enum(m68k_isa) String(isac) Value(10)
+
diff --git a/gcc-4.9/gcc/config/m68k/m68k.c b/gcc-4.9/gcc/config/m68k/m68k.c
new file mode 100644
index 000000000..7f7d66847
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k.c
@@ -0,0 +1,6530 @@
+/* Subroutines for insn-output.c for Motorola 68000 family.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "rtl.h"
+#include "function.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "expr.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "flags.h"
+#include "df.h"
+/* ??? Need to add a dependency between m68k.o and sched-int.h.  */
+#include "sched-int.h"
+#include "insn-codes.h"
+#include "ggc.h"
+#include "opts.h"
+#include "optabs.h"
+
+enum reg_class regno_reg_class[] =
+{
+  DATA_REGS, DATA_REGS, DATA_REGS, DATA_REGS,
+  DATA_REGS, DATA_REGS, DATA_REGS, DATA_REGS,
+  ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  ADDR_REGS
+};
+
+
+/* The minimum number of integer registers that we want to save with the
+   movem instruction.  Using two movel instructions instead of a single
+   moveml is about 15% faster for the 68020 and 68030 at no expense in
+   code size.  */
+#define MIN_MOVEM_REGS 3
+
+/* The minimum number of floating point registers that we want to save
+   with the fmovem instruction.  */
+#define MIN_FMOVEM_REGS 1
+
+/* Structure describing stack frame layout.  */
+struct m68k_frame
+{
+  /* Stack pointer to frame pointer offset.  */
+  HOST_WIDE_INT offset;
+
+  /* Offset of FPU registers.  */
+  HOST_WIDE_INT foffset;
+
+  /* Frame size in bytes (rounded up).  */
+  HOST_WIDE_INT size;
+
+  /* Data and address register.  */
+  int reg_no;
+  unsigned int reg_mask;
+
+  /* FPU registers.  */
+  int fpu_no;
+  unsigned int fpu_mask;
+
+  /* Offsets relative to ARG_POINTER.  */
+  HOST_WIDE_INT frame_pointer_offset;
+  HOST_WIDE_INT stack_pointer_offset;
+
+  /* Function which the above information refers to.  */
+  int funcdef_no;
+};
+
+/* Current frame information calculated by m68k_compute_frame_layout().  */
+static struct m68k_frame current_frame;
+
+/* Structure describing an m68k address.
+
+   If CODE is UNKNOWN, the address is BASE + INDEX * SCALE + OFFSET,
+   with null fields evaluating to 0.  Here:
+
+   - BASE satisfies m68k_legitimate_base_reg_p
+   - INDEX satisfies m68k_legitimate_index_reg_p
+   - OFFSET satisfies m68k_legitimate_constant_address_p
+
+   INDEX is either HImode or SImode.  The other fields are SImode.
+
+   If CODE is PRE_DEC, the address is -(BASE).  If CODE is POST_INC,
+   the address is (BASE)+.  */
+struct m68k_address {
+  enum rtx_code code;
+  rtx base;
+  rtx index;
+  rtx offset;
+  int scale;
+};
+
+static int m68k_sched_adjust_cost (rtx, rtx, rtx, int);
+static int m68k_sched_issue_rate (void);
+static int m68k_sched_variable_issue (FILE *, int, rtx, int);
+static void m68k_sched_md_init_global (FILE *, int, int);
+static void m68k_sched_md_finish_global (FILE *, int);
+static void m68k_sched_md_init (FILE *, int, int);
+static void m68k_sched_dfa_pre_advance_cycle (void);
+static void m68k_sched_dfa_post_advance_cycle (void);
+static int m68k_sched_first_cycle_multipass_dfa_lookahead (void);
+
+static bool m68k_can_eliminate (const int, const int);
+static void m68k_conditional_register_usage (void);
+static bool m68k_legitimate_address_p (enum machine_mode, rtx, bool);
+static void m68k_option_override (void);
+static void m68k_override_options_after_change (void);
+static rtx find_addr_reg (rtx);
+static const char *singlemove_string (rtx *);
+static void m68k_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+					  HOST_WIDE_INT, tree);
+static rtx m68k_struct_value_rtx (tree, int);
+static tree m68k_handle_fndecl_attribute (tree *node, tree name,
+					  tree args, int flags,
+					  bool *no_add_attrs);
+static void m68k_compute_frame_layout (void);
+static bool m68k_save_reg (unsigned int regno, bool interrupt_handler);
+static bool m68k_ok_for_sibcall_p (tree, tree);
+static bool m68k_tls_symbol_p (rtx);
+static rtx m68k_legitimize_address (rtx, rtx, enum machine_mode);
+static bool m68k_rtx_costs (rtx, int, int, int, int *, bool);
+#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
+static bool m68k_return_in_memory (const_tree, const_tree);
+#endif
+static void m68k_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static void m68k_trampoline_init (rtx, tree, rtx);
+static int m68k_return_pops_args (tree, tree, int);
+static rtx m68k_delegitimize_address (rtx);
+static void m68k_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static rtx m68k_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static bool m68k_cannot_force_const_mem (enum machine_mode mode, rtx x);
+static bool m68k_output_addr_const_extra (FILE *, rtx);
+static void m68k_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
+
+/* Initialize the GCC target structure.  */
+
+#if INT_OP_GROUP == INT_OP_DOT_WORD
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#endif
+
+#if INT_OP_GROUP == INT_OP_NO_DOT
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP "\tbyte\t"
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\tshort\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\tlong\t"
+#endif
+
+#if INT_OP_GROUP == INT_OP_DC
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP "\tdc.b\t"
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\tdc.w\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\tdc.l\t"
+#endif
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK m68k_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START_APP_OFF
+#define TARGET_ASM_FILE_START_APP_OFF true
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS m68k_legitimize_address
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST m68k_sched_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE m68k_sched_issue_rate
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE m68k_sched_variable_issue
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL m68k_sched_md_init_global
+
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL m68k_sched_md_finish_global
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT m68k_sched_md_init
+
+#undef TARGET_SCHED_DFA_PRE_ADVANCE_CYCLE
+#define TARGET_SCHED_DFA_PRE_ADVANCE_CYCLE m68k_sched_dfa_pre_advance_cycle
+
+#undef TARGET_SCHED_DFA_POST_ADVANCE_CYCLE
+#define TARGET_SCHED_DFA_POST_ADVANCE_CYCLE m68k_sched_dfa_post_advance_cycle
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD	\
+  m68k_sched_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m68k_option_override
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE m68k_override_options_after_change
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m68k_rtx_costs
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m68k_attribute_table
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX m68k_struct_value_rtx
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM m68k_cannot_force_const_mem
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL m68k_ok_for_sibcall_p
+
+#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY m68k_return_in_memory
+#endif
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (true)
+
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL m68k_output_dwarf_dtprel
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	m68k_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE m68k_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m68k_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m68k_trampoline_init
+
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS m68k_return_pops_args
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS m68k_delegitimize_address
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m68k_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m68k_function_arg_advance
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P m68k_legitimate_constant_p
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA m68k_output_addr_const_extra
+
+/* The value stored by TAS.  */
+#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
+#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 128
+
+static const struct attribute_spec m68k_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt", 0, 0, true,  false, false, m68k_handle_fndecl_attribute,
+    false },
+  { "interrupt_handler", 0, 0, true,  false, false,
+    m68k_handle_fndecl_attribute, false },
+  { "interrupt_thread", 0, 0, true,  false, false,
+    m68k_handle_fndecl_attribute, false },
+  { NULL,                0, 0, false, false, false, NULL, false }
+};
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Base flags for 68k ISAs.  */
+#define FL_FOR_isa_00    FL_ISA_68000
+#define FL_FOR_isa_10    (FL_FOR_isa_00 | FL_ISA_68010)
+/* FL_68881 controls the default setting of -m68881.  gcc has traditionally
+   generated 68881 code for 68020 and 68030 targets unless explicitly told
+   not to.  */
+#define FL_FOR_isa_20    (FL_FOR_isa_10 | FL_ISA_68020 \
+			  | FL_BITFIELD | FL_68881 | FL_CAS)
+#define FL_FOR_isa_40    (FL_FOR_isa_20 | FL_ISA_68040)
+#define FL_FOR_isa_cpu32 (FL_FOR_isa_10 | FL_ISA_68020)
+
+/* Base flags for ColdFire ISAs.  */
+#define FL_FOR_isa_a     (FL_COLDFIRE | FL_ISA_A)
+#define FL_FOR_isa_aplus (FL_FOR_isa_a | FL_ISA_APLUS | FL_CF_USP)
+/* Note ISA_B doesn't necessarily include USP (user stack pointer) support.  */
+#define FL_FOR_isa_b     (FL_FOR_isa_a | FL_ISA_B | FL_CF_HWDIV)
+/* ISA_C is not upwardly compatible with ISA_B.  */
+#define FL_FOR_isa_c     (FL_FOR_isa_a | FL_ISA_C | FL_CF_USP)
+
+enum m68k_isa
+{
+  /* Traditional 68000 instruction sets.  */
+  isa_00,
+  isa_10,
+  isa_20,
+  isa_40,
+  isa_cpu32,
+  /* ColdFire instruction set variants.  */
+  isa_a,
+  isa_aplus,
+  isa_b,
+  isa_c,
+  isa_max
+};
+
+/* Information about one of the -march, -mcpu or -mtune arguments.  */
+struct m68k_target_selection
+{
+  /* The argument being described.  */
+  const char *name;
+
+  /* For -mcpu, this is the device selected by the option.
+     For -mtune and -march, it is a representative device
+     for the microarchitecture or ISA respectively.  */
+  enum target_device device;
+
+  /* The M68K_DEVICE fields associated with DEVICE.  See the comment
+     in m68k-devices.def for details.  FAMILY is only valid for -mcpu.  */
+  const char *family;
+  enum uarch_type microarch;
+  enum m68k_isa isa;
+  unsigned long flags;
+};
+
+/* A list of all devices in m68k-devices.def.  Used for -mcpu selection.  */
+static const struct m68k_target_selection all_devices[] =
+{
+#define M68K_DEVICE(NAME,ENUM_VALUE,FAMILY,MULTILIB,MICROARCH,ISA,FLAGS) \
+  { NAME, ENUM_VALUE, FAMILY, u##MICROARCH, ISA, FLAGS | FL_FOR_##ISA },
+#include "m68k-devices.def"
+#undef M68K_DEVICE
+  { NULL, unk_device, NULL, unk_arch, isa_max, 0 }
+};
+
+/* A list of all ISAs, mapping each one to a representative device.
+   Used for -march selection.  */
+static const struct m68k_target_selection all_isas[] =
+{
+#define M68K_ISA(NAME,DEVICE,MICROARCH,ISA,FLAGS) \
+  { NAME, DEVICE, NULL, u##MICROARCH, ISA, FLAGS },
+#include "m68k-isas.def"
+#undef M68K_ISA
+  { NULL,       unk_device, NULL,  unk_arch, isa_max,   0 }
+};
+
+/* A list of all microarchitectures, mapping each one to a representative
+   device.  Used for -mtune selection.  */
+static const struct m68k_target_selection all_microarchs[] =
+{
+#define M68K_MICROARCH(NAME,DEVICE,MICROARCH,ISA,FLAGS) \
+  { NAME, DEVICE, NULL, u##MICROARCH, ISA, FLAGS },
+#include "m68k-microarchs.def"
+#undef M68K_MICROARCH
+  { NULL,       unk_device, NULL,  unk_arch,  isa_max, 0 }
+};
+
+/* The entries associated with the -mcpu, -march and -mtune settings,
+   or null for options that have not been used.  */
+const struct m68k_target_selection *m68k_cpu_entry;
+const struct m68k_target_selection *m68k_arch_entry;
+const struct m68k_target_selection *m68k_tune_entry;
+
+/* Which CPU we are generating code for.  */
+enum target_device m68k_cpu;
+
+/* Which microarchitecture to tune for.  */
+enum uarch_type m68k_tune;
+
+/* Which FPU to use.  */
+enum fpu_type m68k_fpu;
+
+/* The set of FL_* flags that apply to the target processor.  */
+unsigned int m68k_cpu_flags;
+
+/* The set of FL_* flags that apply to the processor to be tuned for.  */
+unsigned int m68k_tune_flags;
+
+/* Asm templates for calling or jumping to an arbitrary symbolic address,
+   or NULL if such calls or jumps are not supported.  The address is held
+   in operand 0.  */
+const char *m68k_symbolic_call;
+const char *m68k_symbolic_jump;
+
+/* Enum variable that corresponds to m68k_symbolic_call values.  */
+enum M68K_SYMBOLIC_CALL m68k_symbolic_call_var;
+
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+m68k_option_override (void)
+{
+  const struct m68k_target_selection *entry;
+  unsigned long target_mask;
+
+  if (global_options_set.x_m68k_arch_option)
+    m68k_arch_entry = &all_isas[m68k_arch_option];
+
+  if (global_options_set.x_m68k_cpu_option)
+    m68k_cpu_entry = &all_devices[(int) m68k_cpu_option];
+
+  if (global_options_set.x_m68k_tune_option)
+    m68k_tune_entry = &all_microarchs[(int) m68k_tune_option];
+
+  /* User can choose:
+
+     -mcpu=
+     -march=
+     -mtune=
+
+     -march=ARCH should generate code that runs any processor
+     implementing architecture ARCH.  -mcpu=CPU should override -march
+     and should generate code that runs on processor CPU, making free
+     use of any instructions that CPU understands.  -mtune=UARCH applies
+     on top of -mcpu or -march and optimizes the code for UARCH.  It does
+     not change the target architecture.  */
+  if (m68k_cpu_entry)
+    {
+      /* Complain if the -march setting is for a different microarchitecture,
+	 or includes flags that the -mcpu setting doesn't.  */
+      if (m68k_arch_entry
+	  && (m68k_arch_entry->microarch != m68k_cpu_entry->microarch
+	      || (m68k_arch_entry->flags & ~m68k_cpu_entry->flags) != 0))
+	warning (0, "-mcpu=%s conflicts with -march=%s",
+		 m68k_cpu_entry->name, m68k_arch_entry->name);
+
+      entry = m68k_cpu_entry;
+    }
+  else
+    entry = m68k_arch_entry;
+
+  if (!entry)
+    entry = all_devices + TARGET_CPU_DEFAULT;
+
+  m68k_cpu_flags = entry->flags;
+
+  /* Use the architecture setting to derive default values for
+     certain flags.  */
+  target_mask = 0;
+
+  /* ColdFire is lenient about alignment.  */
+  if (!TARGET_COLDFIRE)
+    target_mask |= MASK_STRICT_ALIGNMENT;
+
+  if ((m68k_cpu_flags & FL_BITFIELD) != 0)
+    target_mask |= MASK_BITFIELD;
+  if ((m68k_cpu_flags & FL_CF_HWDIV) != 0)
+    target_mask |= MASK_CF_HWDIV;
+  if ((m68k_cpu_flags & (FL_68881 | FL_CF_FPU)) != 0)
+    target_mask |= MASK_HARD_FLOAT;
+  target_flags |= target_mask & ~target_flags_explicit;
+
+  /* Set the directly-usable versions of the -mcpu and -mtune settings.  */
+  m68k_cpu = entry->device;
+  if (m68k_tune_entry)
+    {
+      m68k_tune = m68k_tune_entry->microarch;
+      m68k_tune_flags = m68k_tune_entry->flags;
+    }
+#ifdef M68K_DEFAULT_TUNE
+  else if (!m68k_cpu_entry && !m68k_arch_entry)
+    {
+      enum target_device dev;
+      dev = all_microarchs[M68K_DEFAULT_TUNE].device;
+      m68k_tune_flags = all_devices[dev].flags;
+    }
+#endif
+  else
+    {
+      m68k_tune = entry->microarch;
+      m68k_tune_flags = entry->flags;
+    }
+
+  /* Set the type of FPU.  */
+  m68k_fpu = (!TARGET_HARD_FLOAT ? FPUTYPE_NONE
+	      : (m68k_cpu_flags & FL_COLDFIRE) != 0 ? FPUTYPE_COLDFIRE
+	      : FPUTYPE_68881);
+
+  /* Sanity check to ensure that msep-data and mid-sahred-library are not
+   * both specified together.  Doing so simply doesn't make sense.
+   */
+  if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
+    error ("cannot specify both -msep-data and -mid-shared-library");
+
+  /* If we're generating code for a separate A5 relative data segment,
+   * we've got to enable -fPIC as well.  This might be relaxable to
+   * -fpic but it hasn't been tested properly.
+   */
+  if (TARGET_SEP_DATA || TARGET_ID_SHARED_LIBRARY)
+    flag_pic = 2;
+
+  /* -mpcrel -fPIC uses 32-bit pc-relative displacements.  Raise an
+     error if the target does not support them.  */
+  if (TARGET_PCREL && !TARGET_68020 && flag_pic == 2)
+    error ("-mpcrel -fPIC is not currently supported on selected cpu");
+
+  /* ??? A historic way of turning on pic, or is this intended to
+     be an embedded thing that doesn't have the same name binding
+     significance that it does on hosted ELF systems?  */
+  if (TARGET_PCREL && flag_pic == 0)
+    flag_pic = 1;
+
+  if (!flag_pic)
+    {
+      m68k_symbolic_call_var = M68K_SYMBOLIC_CALL_JSR;
+
+      m68k_symbolic_jump = "jra %a0";
+    }
+  else if (TARGET_ID_SHARED_LIBRARY)
+    /* All addresses must be loaded from the GOT.  */
+    ;
+  else if (TARGET_68020 || TARGET_ISAB || TARGET_ISAC)
+    {
+      if (TARGET_PCREL)
+	m68k_symbolic_call_var = M68K_SYMBOLIC_CALL_BSR_C;
+      else
+	m68k_symbolic_call_var = M68K_SYMBOLIC_CALL_BSR_P;
+
+      if (TARGET_ISAC)
+	/* No unconditional long branch */;
+      else if (TARGET_PCREL)
+	m68k_symbolic_jump = "bra%.l %c0";
+      else
+	m68k_symbolic_jump = "bra%.l %p0";
+      /* Turn off function cse if we are doing PIC.  We always want
+	 function call to be done as `bsr foo@PLTPC'.  */
+      /* ??? It's traditional to do this for -mpcrel too, but it isn't
+	 clear how intentional that is.  */
+      flag_no_function_cse = 1;
+    }
+
+  switch (m68k_symbolic_call_var)
+    {
+    case M68K_SYMBOLIC_CALL_JSR:
+      m68k_symbolic_call = "jsr %a0";
+      break;
+
+    case M68K_SYMBOLIC_CALL_BSR_C:
+      m68k_symbolic_call = "bsr%.l %c0";
+      break;
+
+    case M68K_SYMBOLIC_CALL_BSR_P:
+      m68k_symbolic_call = "bsr%.l %p0";
+      break;
+
+    case M68K_SYMBOLIC_CALL_NONE:
+      gcc_assert (m68k_symbolic_call == NULL);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+#ifndef ASM_OUTPUT_ALIGN_WITH_NOP
+  if (align_labels > 2)
+    {
+      warning (0, "-falign-labels=%d is not supported", align_labels);
+      align_labels = 0;
+    }
+  if (align_loops > 2)
+    {
+      warning (0, "-falign-loops=%d is not supported", align_loops);
+      align_loops = 0;
+    }
+#endif
+
+  if (stack_limit_rtx != NULL_RTX && !TARGET_68020)
+    {
+      warning (0, "-fstack-limit- options are not supported on this cpu");
+      stack_limit_rtx = NULL_RTX;
+    }
+
+  SUBTARGET_OVERRIDE_OPTIONS;
+
+  /* Setup scheduling options.  */
+  if (TUNE_CFV1)
+    m68k_sched_cpu = CPU_CFV1;
+  else if (TUNE_CFV2)
+    m68k_sched_cpu = CPU_CFV2;
+  else if (TUNE_CFV3)
+    m68k_sched_cpu = CPU_CFV3;
+  else if (TUNE_CFV4)
+    m68k_sched_cpu = CPU_CFV4;
+  else
+    {
+      m68k_sched_cpu = CPU_UNKNOWN;
+      flag_schedule_insns = 0;
+      flag_schedule_insns_after_reload = 0;
+      flag_modulo_sched = 0;
+      flag_live_range_shrinkage = 0;
+    }
+
+  if (m68k_sched_cpu != CPU_UNKNOWN)
+    {
+      if ((m68k_cpu_flags & (FL_CF_EMAC | FL_CF_EMAC_B)) != 0)
+	m68k_sched_mac = MAC_CF_EMAC;
+      else if ((m68k_cpu_flags & FL_CF_MAC) != 0)
+	m68k_sched_mac = MAC_CF_MAC;
+      else
+	m68k_sched_mac = MAC_NO;
+    }
+}
+
+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
+
+static void
+m68k_override_options_after_change (void)
+{
+  if (m68k_sched_cpu == CPU_UNKNOWN)
+    {
+      flag_schedule_insns = 0;
+      flag_schedule_insns_after_reload = 0;
+      flag_modulo_sched = 0;
+      flag_live_range_shrinkage = 0;
+    }
+}
+
+/* Generate a macro of the form __mPREFIX_cpu_NAME, where PREFIX is the
+   given argument and NAME is the argument passed to -mcpu.  Return NULL
+   if -mcpu was not passed.  */
+
+const char *
+m68k_cpp_cpu_ident (const char *prefix)
+{
+  if (!m68k_cpu_entry)
+    return NULL;
+  return concat ("__m", prefix, "_cpu_", m68k_cpu_entry->name, NULL);
+}
+
+/* Generate a macro of the form __mPREFIX_family_NAME, where PREFIX is the
+   given argument and NAME is the name of the representative device for
+   the -mcpu argument's family.  Return NULL if -mcpu was not passed.  */
+
+const char *
+m68k_cpp_cpu_family (const char *prefix)
+{
+  if (!m68k_cpu_entry)
+    return NULL;
+  return concat ("__m", prefix, "_family_", m68k_cpu_entry->family, NULL);
+}
+
+/* Return m68k_fk_interrupt_handler if FUNC has an "interrupt" or
+   "interrupt_handler" attribute and interrupt_thread if FUNC has an
+   "interrupt_thread" attribute.  Otherwise, return
+   m68k_fk_normal_function.  */
+
+enum m68k_function_kind
+m68k_get_function_kind (tree func)
+{
+  tree a;
+
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+  
+  a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    return m68k_fk_interrupt_handler;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    return m68k_fk_interrupt_handler;
+
+  a = lookup_attribute ("interrupt_thread", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    return m68k_fk_interrupt_thread;
+
+  return m68k_fk_normal_function;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+m68k_handle_fndecl_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  if (m68k_get_function_kind (*node) != m68k_fk_normal_function)
+    {
+      error ("multiple interrupt attributes not allowed");
+      *no_add_attrs = true;
+    }
+
+  if (!TARGET_FIDOA
+      && !strcmp (IDENTIFIER_POINTER (name), "interrupt_thread"))
+    {
+      error ("interrupt_thread is available only on fido");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static void
+m68k_compute_frame_layout (void)
+{
+  int regno, saved;
+  unsigned int mask;
+  enum m68k_function_kind func_kind =
+    m68k_get_function_kind (current_function_decl);
+  bool interrupt_handler = func_kind == m68k_fk_interrupt_handler;
+  bool interrupt_thread = func_kind == m68k_fk_interrupt_thread;
+
+  /* Only compute the frame once per function.
+     Don't cache information until reload has been completed.  */
+  if (current_frame.funcdef_no == current_function_funcdef_no
+      && reload_completed)
+    return;
+
+  current_frame.size = (get_frame_size () + 3) & -4;
+
+  mask = saved = 0;
+
+  /* Interrupt thread does not need to save any register.  */
+  if (!interrupt_thread)
+    for (regno = 0; regno < 16; regno++)
+      if (m68k_save_reg (regno, interrupt_handler))
+	{
+	  mask |= 1 << (regno - D0_REG);
+	  saved++;
+	}
+  current_frame.offset = saved * 4;
+  current_frame.reg_no = saved;
+  current_frame.reg_mask = mask;
+
+  current_frame.foffset = 0;
+  mask = saved = 0;
+  if (TARGET_HARD_FLOAT)
+    {
+      /* Interrupt thread does not need to save any register.  */
+      if (!interrupt_thread)
+	for (regno = 16; regno < 24; regno++)
+	  if (m68k_save_reg (regno, interrupt_handler))
+	    {
+	      mask |= 1 << (regno - FP0_REG);
+	      saved++;
+	    }
+      current_frame.foffset = saved * TARGET_FP_REG_SIZE;
+      current_frame.offset += current_frame.foffset;
+    }
+  current_frame.fpu_no = saved;
+  current_frame.fpu_mask = mask;
+
+  /* Remember what function this frame refers to.  */
+  current_frame.funcdef_no = current_function_funcdef_no;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+m68k_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+HOST_WIDE_INT
+m68k_initial_elimination_offset (int from, int to)
+{
+  int argptr_offset;
+  /* The arg pointer points 8 bytes before the start of the arguments,
+     as defined by FIRST_PARM_OFFSET.  This makes it coincident with the
+     frame pointer in most frames.  */
+  argptr_offset = frame_pointer_needed ? 0 : UNITS_PER_WORD;
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return argptr_offset;
+
+  m68k_compute_frame_layout ();
+
+  gcc_assert (to == STACK_POINTER_REGNUM);
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      return current_frame.offset + current_frame.size - argptr_offset;
+    case FRAME_POINTER_REGNUM:
+      return current_frame.offset + current_frame.size;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Refer to the array `regs_ever_live' to determine which registers
+   to save; `regs_ever_live[I]' is nonzero if register number I
+   is ever used in the function.  This function is responsible for
+   knowing which registers should not be saved even if used.
+   Return true if we need to save REGNO.  */
+
+static bool
+m68k_save_reg (unsigned int regno, bool interrupt_handler)
+{
+  if (flag_pic && regno == PIC_REG)
+    {
+      if (crtl->saves_all_registers)
+	return true;
+      if (crtl->uses_pic_offset_table)
+	return true;
+      /* Reload may introduce constant pool references into a function
+	 that thitherto didn't need a PIC register.  Note that the test
+	 above will not catch that case because we will only set
+	 crtl->uses_pic_offset_table when emitting
+	 the address reloads.  */
+      if (crtl->uses_const_pool)
+	return true;
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      for (i = 0; ; i++)
+	{
+	  unsigned int test = EH_RETURN_DATA_REGNO (i);
+	  if (test == INVALID_REGNUM)
+	    break;
+	  if (test == regno)
+	    return true;
+	}
+    }
+
+  /* Fixed regs we never touch.  */
+  if (fixed_regs[regno])
+    return false;
+
+  /* The frame pointer (if it is such) is handled specially.  */
+  if (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return false;
+
+  /* Interrupt handlers must also save call_used_regs
+     if they are live or when calling nested functions.  */
+  if (interrupt_handler)
+    {
+      if (df_regs_ever_live_p (regno))
+	return true;
+
+      if (!crtl->is_leaf && call_used_regs[regno])
+	return true;
+    }
+
+  /* Never need to save registers that aren't touched.  */
+  if (!df_regs_ever_live_p (regno))
+    return false;
+
+  /* Otherwise save everything that isn't call-clobbered.  */
+  return !call_used_regs[regno];
+}
+
+/* Emit RTL for a MOVEM or FMOVEM instruction.  BASE + OFFSET represents
+   the lowest memory address.  COUNT is the number of registers to be
+   moved, with register REGNO + I being moved if bit I of MASK is set.
+   STORE_P specifies the direction of the move and ADJUST_STACK_P says
+   whether or not this is pre-decrement (if STORE_P) or post-increment
+   (if !STORE_P) operation.  */
+
+static rtx
+m68k_emit_movem (rtx base, HOST_WIDE_INT offset,
+		 unsigned int count, unsigned int regno,
+		 unsigned int mask, bool store_p, bool adjust_stack_p)
+{
+  int i;
+  rtx body, addr, src, operands[2];
+  enum machine_mode mode;
+
+  body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (adjust_stack_p + count));
+  mode = reg_raw_mode[regno];
+  i = 0;
+
+  if (adjust_stack_p)
+    {
+      src = plus_constant (Pmode, base,
+			   (count
+			    * GET_MODE_SIZE (mode)
+			    * (HOST_WIDE_INT) (store_p ? -1 : 1)));
+      XVECEXP (body, 0, i++) = gen_rtx_SET (VOIDmode, base, src);
+    }
+
+  for (; mask != 0; mask >>= 1, regno++)
+    if (mask & 1)
+      {
+	addr = plus_constant (Pmode, base, offset);
+	operands[!store_p] = gen_frame_mem (mode, addr);
+	operands[store_p] = gen_rtx_REG (mode, regno);
+	XVECEXP (body, 0, i++)
+	  = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
+	offset += GET_MODE_SIZE (mode);
+      }
+  gcc_assert (i == XVECLEN (body, 0));
+
+  return emit_insn (body);
+}
+
+/* Make INSN a frame-related instruction.  */
+
+static void
+m68k_set_frame_related (rtx insn)
+{
+  rtx body;
+  int i;
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  body = PATTERN (insn);
+  if (GET_CODE (body) == PARALLEL)
+    for (i = 0; i < XVECLEN (body, 0); i++)
+      RTX_FRAME_RELATED_P (XVECEXP (body, 0, i)) = 1;
+}
+
+/* Emit RTL for the "prologue" define_expand.  */
+
+void
+m68k_expand_prologue (void)
+{
+  HOST_WIDE_INT fsize_with_regs;
+  rtx limit, src, dest;
+
+  m68k_compute_frame_layout ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size
+      = current_frame.size + current_frame.offset;
+
+  /* If the stack limit is a symbol, we can check it here,
+     before actually allocating the space.  */
+  if (crtl->limit_stack
+      && GET_CODE (stack_limit_rtx) == SYMBOL_REF)
+    {
+      limit = plus_constant (Pmode, stack_limit_rtx, current_frame.size + 4);
+      if (!m68k_legitimate_constant_p (Pmode, limit))
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, D0_REG), limit);
+	  limit = gen_rtx_REG (Pmode, D0_REG);
+	}
+      emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode,
+					    stack_pointer_rtx, limit),
+			       stack_pointer_rtx, limit,
+			       const1_rtx));
+    }
+
+  fsize_with_regs = current_frame.size;
+  if (TARGET_COLDFIRE)
+    {
+      /* ColdFire's move multiple instructions do not allow pre-decrement
+	 addressing.  Add the size of movem saves to the initial stack
+	 allocation instead.  */
+      if (current_frame.reg_no >= MIN_MOVEM_REGS)
+	fsize_with_regs += current_frame.reg_no * GET_MODE_SIZE (SImode);
+      if (current_frame.fpu_no >= MIN_FMOVEM_REGS)
+	fsize_with_regs += current_frame.fpu_no * GET_MODE_SIZE (DFmode);
+    }
+
+  if (frame_pointer_needed)
+    {
+      if (fsize_with_regs == 0 && TUNE_68040)
+	{
+	  /* On the 68040, two separate moves are faster than link.w 0.  */
+	  dest = gen_frame_mem (Pmode,
+				gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+	  m68k_set_frame_related (emit_move_insn (dest, frame_pointer_rtx));
+	  m68k_set_frame_related (emit_move_insn (frame_pointer_rtx,
+						  stack_pointer_rtx));
+	}
+      else if (fsize_with_regs < 0x8000 || TARGET_68020)
+	m68k_set_frame_related
+	  (emit_insn (gen_link (frame_pointer_rtx,
+				GEN_INT (-4 - fsize_with_regs))));
+      else
+ 	{
+	  m68k_set_frame_related
+	    (emit_insn (gen_link (frame_pointer_rtx, GEN_INT (-4))));
+	  m68k_set_frame_related
+	    (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-fsize_with_regs))));
+	}
+
+      /* If the frame pointer is needed, emit a special barrier that
+	 will prevent the scheduler from moving stores to the frame
+	 before the stack adjustment.  */
+      emit_insn (gen_stack_tie (stack_pointer_rtx, frame_pointer_rtx));
+    }
+  else if (fsize_with_regs != 0)
+    m68k_set_frame_related
+      (emit_insn (gen_addsi3 (stack_pointer_rtx,
+			      stack_pointer_rtx,
+			      GEN_INT (-fsize_with_regs))));
+
+  if (current_frame.fpu_mask)
+    {
+      gcc_assert (current_frame.fpu_no >= MIN_FMOVEM_REGS);
+      if (TARGET_68881)
+	m68k_set_frame_related
+	  (m68k_emit_movem (stack_pointer_rtx,
+			    current_frame.fpu_no * -GET_MODE_SIZE (XFmode),
+			    current_frame.fpu_no, FP0_REG,
+			    current_frame.fpu_mask, true, true));
+      else
+	{
+	  int offset;
+
+	  /* If we're using moveml to save the integer registers,
+	     the stack pointer will point to the bottom of the moveml
+	     save area.  Find the stack offset of the first FP register.  */
+	  if (current_frame.reg_no < MIN_MOVEM_REGS)
+	    offset = 0;
+	  else
+	    offset = current_frame.reg_no * GET_MODE_SIZE (SImode);
+	  m68k_set_frame_related
+	    (m68k_emit_movem (stack_pointer_rtx, offset,
+			      current_frame.fpu_no, FP0_REG,
+			      current_frame.fpu_mask, true, false));
+	}
+    }
+
+  /* If the stack limit is not a symbol, check it here.
+     This has the disadvantage that it may be too late...  */
+  if (crtl->limit_stack)
+    {
+      if (REG_P (stack_limit_rtx))
+        emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode, stack_pointer_rtx,
+					      stack_limit_rtx),
+			         stack_pointer_rtx, stack_limit_rtx,
+			         const1_rtx));
+
+      else if (GET_CODE (stack_limit_rtx) != SYMBOL_REF)
+	warning (0, "stack limit expression is not supported");
+    }
+
+  if (current_frame.reg_no < MIN_MOVEM_REGS)
+    {
+      /* Store each register separately in the same order moveml does.  */
+      int i;
+
+      for (i = 16; i-- > 0; )
+	if (current_frame.reg_mask & (1 << i))
+	  {
+	    src = gen_rtx_REG (SImode, D0_REG + i);
+	    dest = gen_frame_mem (SImode,
+				  gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+	    m68k_set_frame_related (emit_insn (gen_movsi (dest, src)));
+	  }
+    }
+  else
+    {
+      if (TARGET_COLDFIRE)
+	/* The required register save space has already been allocated.
+	   The first register should be stored at (%sp).  */
+	m68k_set_frame_related
+	  (m68k_emit_movem (stack_pointer_rtx, 0,
+			    current_frame.reg_no, D0_REG,
+			    current_frame.reg_mask, true, false));
+      else
+	m68k_set_frame_related
+	  (m68k_emit_movem (stack_pointer_rtx,
+			    current_frame.reg_no * -GET_MODE_SIZE (SImode),
+			    current_frame.reg_no, D0_REG,
+			    current_frame.reg_mask, true, true));
+    }
+
+  if (!TARGET_SEP_DATA
+      && crtl->uses_pic_offset_table)
+    emit_insn (gen_load_got (pic_offset_table_rtx));
+}
+
+/* Return true if a simple (return) instruction is sufficient for this
+   instruction (i.e. if no epilogue is needed).  */
+
+bool
+m68k_use_return_insn (void)
+{
+  if (!reload_completed || frame_pointer_needed || get_frame_size () != 0)
+    return false;
+
+  m68k_compute_frame_layout ();
+  return current_frame.offset == 0;
+}
+
+/* Emit RTL for the "epilogue" or "sibcall_epilogue" define_expand;
+   SIBCALL_P says which.
+
+   The function epilogue should not depend on the current stack pointer!
+   It should use the frame pointer only, if there is a frame pointer.
+   This is mandatory because of alloca; we also take advantage of it to
+   omit stack adjustments before returning.  */
+
+void
+m68k_expand_epilogue (bool sibcall_p)
+{
+  HOST_WIDE_INT fsize, fsize_with_regs;
+  bool big, restore_from_sp;
+
+  m68k_compute_frame_layout ();
+
+  fsize = current_frame.size;
+  big = false;
+  restore_from_sp = false;
+
+  /* FIXME : crtl->is_leaf below is too strong.
+     What we really need to know there is if there could be pending
+     stack adjustment needed at that point.  */
+  restore_from_sp = (!frame_pointer_needed
+		     || (!cfun->calls_alloca && crtl->is_leaf));
+
+  /* fsize_with_regs is the size we need to adjust the sp when
+     popping the frame.  */
+  fsize_with_regs = fsize;
+  if (TARGET_COLDFIRE && restore_from_sp)
+    {
+      /* ColdFire's move multiple instructions do not allow post-increment
+	 addressing.  Add the size of movem loads to the final deallocation
+	 instead.  */
+      if (current_frame.reg_no >= MIN_MOVEM_REGS)
+	fsize_with_regs += current_frame.reg_no * GET_MODE_SIZE (SImode);
+      if (current_frame.fpu_no >= MIN_FMOVEM_REGS)
+	fsize_with_regs += current_frame.fpu_no * GET_MODE_SIZE (DFmode);
+    }
+
+  if (current_frame.offset + fsize >= 0x8000
+      && !restore_from_sp
+      && (current_frame.reg_mask || current_frame.fpu_mask))
+    {
+      if (TARGET_COLDFIRE
+	  && (current_frame.reg_no >= MIN_MOVEM_REGS
+	      || current_frame.fpu_no >= MIN_FMOVEM_REGS))
+	{
+	  /* ColdFire's move multiple instructions do not support the
+	     (d8,Ax,Xi) addressing mode, so we're as well using a normal
+	     stack-based restore.  */
+	  emit_move_insn (gen_rtx_REG (Pmode, A1_REG),
+			  GEN_INT (-(current_frame.offset + fsize)));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx,
+				 gen_rtx_REG (Pmode, A1_REG),
+				 frame_pointer_rtx));
+	  restore_from_sp = true;
+	}
+      else
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, A1_REG), GEN_INT (-fsize));
+	  fsize = 0;
+	  big = true;
+	}
+    }
+
+  if (current_frame.reg_no < MIN_MOVEM_REGS)
+    {
+      /* Restore each register separately in the same order moveml does.  */
+      int i;
+      HOST_WIDE_INT offset;
+
+      offset = current_frame.offset + fsize;
+      for (i = 0; i < 16; i++)
+        if (current_frame.reg_mask & (1 << i))
+          {
+	    rtx addr;
+
+	    if (big)
+	      {
+		/* Generate the address -OFFSET(%fp,%a1.l).  */
+		addr = gen_rtx_REG (Pmode, A1_REG);
+		addr = gen_rtx_PLUS (Pmode, addr, frame_pointer_rtx);
+		addr = plus_constant (Pmode, addr, -offset);
+	      }
+	    else if (restore_from_sp)
+	      addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+	    else
+	      addr = plus_constant (Pmode, frame_pointer_rtx, -offset);
+	    emit_move_insn (gen_rtx_REG (SImode, D0_REG + i),
+			    gen_frame_mem (SImode, addr));
+	    offset -= GET_MODE_SIZE (SImode);
+	  }
+    }
+  else if (current_frame.reg_mask)
+    {
+      if (big)
+	m68k_emit_movem (gen_rtx_PLUS (Pmode,
+				       gen_rtx_REG (Pmode, A1_REG),
+				       frame_pointer_rtx),
+			 -(current_frame.offset + fsize),
+			 current_frame.reg_no, D0_REG,
+			 current_frame.reg_mask, false, false);
+      else if (restore_from_sp)
+	m68k_emit_movem (stack_pointer_rtx, 0,
+			 current_frame.reg_no, D0_REG,
+			 current_frame.reg_mask, false,
+			 !TARGET_COLDFIRE);
+      else
+	m68k_emit_movem (frame_pointer_rtx,
+			 -(current_frame.offset + fsize),
+			 current_frame.reg_no, D0_REG,
+			 current_frame.reg_mask, false, false);
+    }
+
+  if (current_frame.fpu_no > 0)
+    {
+      if (big)
+	m68k_emit_movem (gen_rtx_PLUS (Pmode,
+				       gen_rtx_REG (Pmode, A1_REG),
+				       frame_pointer_rtx),
+			 -(current_frame.foffset + fsize),
+			 current_frame.fpu_no, FP0_REG,
+			 current_frame.fpu_mask, false, false);
+      else if (restore_from_sp)
+	{
+	  if (TARGET_COLDFIRE)
+	    {
+	      int offset;
+
+	      /* If we used moveml to restore the integer registers, the
+		 stack pointer will still point to the bottom of the moveml
+		 save area.  Find the stack offset of the first FP
+		 register.  */
+	      if (current_frame.reg_no < MIN_MOVEM_REGS)
+		offset = 0;
+	      else
+		offset = current_frame.reg_no * GET_MODE_SIZE (SImode);
+	      m68k_emit_movem (stack_pointer_rtx, offset,
+			       current_frame.fpu_no, FP0_REG,
+			       current_frame.fpu_mask, false, false);
+	    }
+	  else
+	    m68k_emit_movem (stack_pointer_rtx, 0,
+			     current_frame.fpu_no, FP0_REG,
+			     current_frame.fpu_mask, false, true);
+	}
+      else
+	m68k_emit_movem (frame_pointer_rtx,
+			 -(current_frame.foffset + fsize),
+			 current_frame.fpu_no, FP0_REG,
+			 current_frame.fpu_mask, false, false);
+    }
+
+  if (frame_pointer_needed)
+    emit_insn (gen_unlink (frame_pointer_rtx));
+  else if (fsize_with_regs)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   GEN_INT (fsize_with_regs)));
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   EH_RETURN_STACKADJ_RTX));
+
+  if (!sibcall_p)
+    emit_jump_insn (ret_rtx);
+}
+
+/* Return true if X is a valid comparison operator for the dbcc 
+   instruction.  
+
+   Note it rejects floating point comparison operators.
+   (In the future we could use Fdbcc).
+
+   It also rejects some comparisons when CC_NO_OVERFLOW is set.  */
+   
+int
+valid_dbcc_comparison_p_2 (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (x))
+    {
+      case EQ: case NE: case GTU: case LTU:
+      case GEU: case LEU:
+        return 1;
+
+      /* Reject some when CC_NO_OVERFLOW is set.  This may be over
+         conservative */
+      case GT: case LT: case GE: case LE:
+        return ! (cc_prev_status.flags & CC_NO_OVERFLOW);
+      default:
+        return 0;
+    }
+}
+
+/* Return nonzero if flags are currently in the 68881 flag register.  */
+int
+flags_in_68881 (void)
+{
+  /* We could add support for these in the future */
+  return cc_status.flags & CC_IN_68881;
+}
+
+/* Return true if PARALLEL contains register REGNO.  */
+static bool
+m68k_reg_present_p (const_rtx parallel, unsigned int regno)
+{
+  int i;
+
+  if (REG_P (parallel) && REGNO (parallel) == regno)
+    return true;
+
+  if (GET_CODE (parallel) != PARALLEL)
+    return false;
+
+  for (i = 0; i < XVECLEN (parallel, 0); ++i)
+    {
+      const_rtx x;
+
+      x = XEXP (XVECEXP (parallel, 0, i), 0);
+      if (REG_P (x) && REGNO (x) == regno)
+	return true;
+    }
+
+  return false;
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P.  */
+
+static bool
+m68k_ok_for_sibcall_p (tree decl, tree exp)
+{
+  enum m68k_function_kind kind;
+  
+  /* We cannot use sibcalls for nested functions because we use the
+     static chain register for indirect calls.  */
+  if (CALL_EXPR_STATIC_CHAIN (exp))
+    return false;
+
+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a D0 register but then need to transfer it to a A0 register.  */
+      rtx cfun_value;
+      rtx call_value;
+
+      cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)),
+				   cfun->decl);
+      call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl);
+
+      /* Check that the values are equal or that the result the callee
+	 function returns is superset of what the current function returns.  */
+      if (!(rtx_equal_p (cfun_value, call_value)
+	    || (REG_P (cfun_value)
+		&& m68k_reg_present_p (call_value, REGNO (cfun_value)))))
+	return false;
+    }
+
+  kind = m68k_get_function_kind (current_function_decl);
+  if (kind == m68k_fk_normal_function)
+    /* We can always sibcall from a normal function, because it's
+       undefined if it is calling an interrupt function.  */
+    return true;
+
+  /* Otherwise we can only sibcall if the function kind is known to be
+     the same.  */
+  if (decl && m68k_get_function_kind (decl) == kind)
+    return true;
+  
+  return false;
+}
+
+/* On the m68k all args are always pushed.  */
+
+static rtx
+m68k_function_arg (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+static void
+m68k_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum += (mode != BLKmode
+	   ? (GET_MODE_SIZE (mode) + 3) & ~3
+	   : (int_size_in_bytes (type) + 3) & ~3);
+}
+
+/* Convert X to a legitimate function call memory reference and return the
+   result.  */
+
+rtx
+m68k_legitimize_call_address (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (call_operand (XEXP (x, 0), VOIDmode))
+    return x;
+  return replace_equiv_address (x, force_reg (Pmode, XEXP (x, 0)));
+}
+
+/* Likewise for sibling calls.  */
+
+rtx
+m68k_legitimize_sibcall_address (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (sibcall_operand (XEXP (x, 0), VOIDmode))
+    return x;
+
+  emit_move_insn (gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM), XEXP (x, 0));
+  return replace_equiv_address (x, gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM));
+}
+
+/* Convert X to a legitimate address and return it if successful.  Otherwise
+   return X.
+
+   For the 68000, we handle X+REG by loading X into a register R and
+   using R+REG.  R will go in an address reg and indexing will be used.
+   However, if REG is a broken-out memory address or multiplication,
+   nothing needs to be done because REG can certainly go in an address reg.  */
+
+static rtx
+m68k_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  if (m68k_tls_symbol_p (x))
+    return m68k_legitimize_tls_address (x);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      int ch = (x) != (oldx);
+      int copied = 0;
+
+#define COPY_ONCE(Y) if (!copied) { Y = copy_rtx (Y); copied = ch = 1; }
+
+      if (GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  COPY_ONCE (x);
+	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+	}
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  COPY_ONCE (x);
+	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+	}
+      if (ch)
+	{
+          if (GET_CODE (XEXP (x, 1)) == REG
+	      && GET_CODE (XEXP (x, 0)) == REG)
+	    {
+	      if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT)
+	        {
+	          COPY_ONCE (x);
+	          x = force_operand (x, 0);
+	        }
+	      return x;
+	    }
+	  if (memory_address_p (mode, x))
+	    return x;
+	}
+      if (GET_CODE (XEXP (x, 0)) == REG
+	  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	      && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val = force_operand (XEXP (x, 1), 0);
+	  emit_move_insn (temp, val);
+	  COPY_ONCE (x);
+	  XEXP (x, 1) = temp;
+	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && GET_CODE (XEXP (x, 0)) == REG)
+	    x = force_operand (x, 0);
+	}
+      else if (GET_CODE (XEXP (x, 1)) == REG
+	       || (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+		   && GET_MODE (XEXP (XEXP (x, 1), 0)) == HImode))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val = force_operand (XEXP (x, 0), 0);
+	  emit_move_insn (temp, val);
+	  COPY_ONCE (x);
+	  XEXP (x, 0) = temp;
+	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && GET_CODE (XEXP (x, 1)) == REG)
+	    x = force_operand (x, 0);
+	}
+    }
+
+  return x;
+}
+
+ 
+/* Output a dbCC; jCC sequence.  Note we do not handle the 
+   floating point version of this sequence (Fdbcc).  We also
+   do not handle alternative conditions when CC_NO_OVERFLOW is
+   set.  It is assumed that valid_dbcc_comparison_p and flags_in_68881 will
+   kick those out before we get here.  */
+
+void
+output_dbcc_and_branch (rtx *operands)
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case EQ:
+	output_asm_insn ("dbeq %0,%l1\n\tjeq %l2", operands);
+	break;
+
+      case NE:
+	output_asm_insn ("dbne %0,%l1\n\tjne %l2", operands);
+	break;
+
+      case GT:
+	output_asm_insn ("dbgt %0,%l1\n\tjgt %l2", operands);
+	break;
+
+      case GTU:
+	output_asm_insn ("dbhi %0,%l1\n\tjhi %l2", operands);
+	break;
+
+      case LT:
+	output_asm_insn ("dblt %0,%l1\n\tjlt %l2", operands);
+	break;
+
+      case LTU:
+	output_asm_insn ("dbcs %0,%l1\n\tjcs %l2", operands);
+	break;
+
+      case GE:
+	output_asm_insn ("dbge %0,%l1\n\tjge %l2", operands);
+	break;
+
+      case GEU:
+	output_asm_insn ("dbcc %0,%l1\n\tjcc %l2", operands);
+	break;
+
+      case LE:
+	output_asm_insn ("dble %0,%l1\n\tjle %l2", operands);
+	break;
+
+      case LEU:
+	output_asm_insn ("dbls %0,%l1\n\tjls %l2", operands);
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  /* If the decrement is to be done in SImode, then we have
+     to compensate for the fact that dbcc decrements in HImode.  */
+  switch (GET_MODE (operands[0]))
+    {
+      case SImode:
+        output_asm_insn ("clr%.w %0\n\tsubq%.l #1,%0\n\tjpl %l1", operands);
+        break;
+
+      case HImode:
+        break;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+const char *
+output_scc_di (rtx op, rtx operand1, rtx operand2, rtx dest)
+{
+  rtx loperands[7];
+  enum rtx_code op_code = GET_CODE (op);
+
+  /* This does not produce a useful cc.  */
+  CC_STATUS_INIT;
+
+  /* The m68k cmp.l instruction requires operand1 to be a reg as used
+     below.  Swap the operands and change the op if these requirements
+     are not fulfilled.  */
+  if (GET_CODE (operand2) == REG && GET_CODE (operand1) != REG)
+    {
+      rtx tmp = operand1;
+
+      operand1 = operand2;
+      operand2 = tmp;
+      op_code = swap_condition (op_code);
+    }
+  loperands[0] = operand1;
+  if (GET_CODE (operand1) == REG)
+    loperands[1] = gen_rtx_REG (SImode, REGNO (operand1) + 1);
+  else
+    loperands[1] = adjust_address (operand1, SImode, 4);
+  if (operand2 != const0_rtx)
+    {
+      loperands[2] = operand2;
+      if (GET_CODE (operand2) == REG)
+	loperands[3] = gen_rtx_REG (SImode, REGNO (operand2) + 1);
+      else
+	loperands[3] = adjust_address (operand2, SImode, 4);
+    }
+  loperands[4] = gen_label_rtx ();
+  if (operand2 != const0_rtx)
+    output_asm_insn ("cmp%.l %2,%0\n\tjne %l4\n\tcmp%.l %3,%1", loperands);
+  else
+    {
+      if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (loperands[0]))
+	output_asm_insn ("tst%.l %0", loperands);
+      else
+	output_asm_insn ("cmp%.w #0,%0", loperands);
+
+      output_asm_insn ("jne %l4", loperands);
+
+      if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (loperands[1]))
+	output_asm_insn ("tst%.l %1", loperands);
+      else
+	output_asm_insn ("cmp%.w #0,%1", loperands);
+    }
+
+  loperands[5] = dest;
+
+  switch (op_code)
+    {
+      case EQ:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("seq %5", loperands);
+        break;
+
+      case NE:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sne %5", loperands);
+        break;
+
+      case GT:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("shi %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sgt %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case GTU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("shi %5", loperands);
+        break;
+
+      case LT:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("scs %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("slt %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case LTU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("scs %5", loperands);
+        break;
+
+      case GE:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("scc %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sge %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case GEU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("scc %5", loperands);
+        break;
+
+      case LE:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("sls %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sle %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case LEU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sls %5", loperands);
+        break;
+
+      default:
+	gcc_unreachable ();
+    }
+  return "";
+}
+
+const char *
+output_btst (rtx *operands, rtx countop, rtx dataop, rtx insn, int signpos)
+{
+  operands[0] = countop;
+  operands[1] = dataop;
+
+  if (GET_CODE (countop) == CONST_INT)
+    {
+      register int count = INTVAL (countop);
+      /* If COUNT is bigger than size of storage unit in use,
+	 advance to the containing unit of same size.  */
+      if (count > signpos)
+	{
+	  int offset = (count & ~signpos) / 8;
+	  count = count & signpos;
+	  operands[1] = dataop = adjust_address (dataop, QImode, offset);
+	}
+      if (count == signpos)
+	cc_status.flags = CC_NOT_POSITIVE | CC_Z_IN_NOT_N;
+      else
+	cc_status.flags = CC_NOT_NEGATIVE | CC_Z_IN_NOT_N;
+
+      /* These three statements used to use next_insns_test_no...
+	 but it appears that this should do the same job.  */
+      if (count == 31
+	  && next_insn_tests_no_inequality (insn))
+	return "tst%.l %1";
+      if (count == 15
+	  && next_insn_tests_no_inequality (insn))
+	return "tst%.w %1";
+      if (count == 7
+	  && next_insn_tests_no_inequality (insn))
+	return "tst%.b %1";
+      /* Try to use `movew to ccr' followed by the appropriate branch insn.
+         On some m68k variants unfortunately that's slower than btst.
+         On 68000 and higher, that should also work for all HImode operands. */
+      if (TUNE_CPU32 || TARGET_COLDFIRE || optimize_size)
+	{
+	  if (count == 3 && DATA_REG_P (operands[1])
+	      && next_insn_tests_no_inequality (insn))
+	    {
+	    cc_status.flags = CC_NOT_NEGATIVE | CC_Z_IN_NOT_N | CC_NO_OVERFLOW;
+	    return "move%.w %1,%%ccr";
+	    }
+	  if (count == 2 && DATA_REG_P (operands[1])
+	      && next_insn_tests_no_inequality (insn))
+	    {
+	    cc_status.flags = CC_NOT_NEGATIVE | CC_INVERTED | CC_NO_OVERFLOW;
+	    return "move%.w %1,%%ccr";
+	    }
+	  /* count == 1 followed by bvc/bvs and
+	     count == 0 followed by bcc/bcs are also possible, but need
+	     m68k-specific CC_Z_IN_NOT_V and CC_Z_IN_NOT_C flags. */
+	}
+
+      cc_status.flags = CC_NOT_NEGATIVE;
+    }
+  return "btst %0,%1";
+}
+
+/* Return true if X is a legitimate base register.  STRICT_P says
+   whether we need strict checking.  */
+
+bool
+m68k_legitimate_base_reg_p (rtx x, bool strict_p)
+{
+  /* Allow SUBREG everywhere we allow REG.  This results in better code.  */
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && (strict_p
+	      ? REGNO_OK_FOR_BASE_P (REGNO (x))
+	      : REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x))));
+}
+
+/* Return true if X is a legitimate index register.  STRICT_P says
+   whether we need strict checking.  */
+
+bool
+m68k_legitimate_index_reg_p (rtx x, bool strict_p)
+{
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && (strict_p
+	      ? REGNO_OK_FOR_INDEX_P (REGNO (x))
+	      : REGNO_OK_FOR_INDEX_NONSTRICT_P (REGNO (x))));
+}
+
+/* Return true if X is a legitimate index expression for a (d8,An,Xn) or
+   (bd,An,Xn) addressing mode.  Fill in the INDEX and SCALE fields of
+   ADDRESS if so.  STRICT_P says whether we need strict checking.  */
+
+static bool
+m68k_decompose_index (rtx x, bool strict_p, struct m68k_address *address)
+{
+  int scale;
+
+  /* Check for a scale factor.  */
+  scale = 1;
+  if ((TARGET_68020 || TARGET_COLDFIRE)
+      && GET_CODE (x) == MULT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && (INTVAL (XEXP (x, 1)) == 2
+	  || INTVAL (XEXP (x, 1)) == 4
+	  || (INTVAL (XEXP (x, 1)) == 8
+	      && (TARGET_COLDFIRE_FPU || !TARGET_COLDFIRE))))
+    {
+      scale = INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+    }
+
+  /* Check for a word extension.  */
+  if (!TARGET_COLDFIRE
+      && GET_CODE (x) == SIGN_EXTEND
+      && GET_MODE (XEXP (x, 0)) == HImode)
+    x = XEXP (x, 0);
+
+  if (m68k_legitimate_index_reg_p (x, strict_p))
+    {
+      address->scale = scale;
+      address->index = x;
+      return true;
+    }
+
+  return false;
+}
+
+/* Return true if X is an illegitimate symbolic constant.  */
+
+bool
+m68k_illegitimate_symbolic_constant_p (rtx x)
+{
+  rtx base, offset;
+
+  if (M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	return true;
+    }
+  return m68k_tls_reference_p (x, false);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+m68k_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return m68k_illegitimate_symbolic_constant_p (x);
+}
+
+/* Return true if X is a legitimate constant address that can reach
+   bytes in the range [X, X + REACH).  STRICT_P says whether we need
+   strict checking.  */
+
+static bool
+m68k_legitimate_constant_address_p (rtx x, unsigned int reach, bool strict_p)
+{
+  rtx base, offset;
+
+  if (!CONSTANT_ADDRESS_P (x))
+    return false;
+
+  if (flag_pic
+      && !(strict_p && TARGET_PCREL)
+      && symbolic_operand (x, VOIDmode))
+    return false;
+
+  if (M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P && reach > 1)
+    {
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset) + reach - 1))
+	return false;
+    }
+
+  return !m68k_tls_reference_p (x, false);
+}
+
+/* Return true if X is a LABEL_REF for a jump table.  Assume that unplaced
+   labels will become jump tables.  */
+
+static bool
+m68k_jump_table_ref_p (rtx x)
+{
+  if (GET_CODE (x) != LABEL_REF)
+    return false;
+
+  x = XEXP (x, 0);
+  if (!NEXT_INSN (x) && !PREV_INSN (x))
+    return true;
+
+  x = next_nonnote_insn (x);
+  return x && JUMP_TABLE_DATA_P (x);
+}
+
+/* Return true if X is a legitimate address for values of mode MODE.
+   STRICT_P says whether strict checking is needed.  If the address
+   is valid, describe its components in *ADDRESS.  */
+
+static bool
+m68k_decompose_address (enum machine_mode mode, rtx x,
+			bool strict_p, struct m68k_address *address)
+{
+  unsigned int reach;
+
+  memset (address, 0, sizeof (*address));
+
+  if (mode == BLKmode)
+    reach = 1;
+  else
+    reach = GET_MODE_SIZE (mode);
+
+  /* Check for (An) (mode 2).  */
+  if (m68k_legitimate_base_reg_p (x, strict_p))
+    {
+      address->base = x;
+      return true;
+    }
+
+  /* Check for -(An) and (An)+ (modes 3 and 4).  */
+  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)
+      && m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p))
+    {
+      address->code = GET_CODE (x);
+      address->base = XEXP (x, 0);
+      return true;
+    }
+
+  /* Check for (d16,An) (mode 5).  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && IN_RANGE (INTVAL (XEXP (x, 1)), -0x8000, 0x8000 - reach)
+      && m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p))
+    {
+      address->base = XEXP (x, 0);
+      address->offset = XEXP (x, 1);
+      return true;
+    }
+
+  /* Check for GOT loads.  These are (bd,An,Xn) addresses if
+     TARGET_68020 && flag_pic == 2, otherwise they are (d16,An)
+     addresses.  */
+  if (GET_CODE (x) == PLUS
+      && XEXP (x, 0) == pic_offset_table_rtx)
+    {
+      /* As we are processing a PLUS, do not unwrap RELOC32 symbols --
+	 they are invalid in this context.  */
+      if (m68k_unwrap_symbol (XEXP (x, 1), false) != XEXP (x, 1))
+	{
+	  address->base = XEXP (x, 0);
+	  address->offset = XEXP (x, 1);
+	  return true;
+	}
+    }
+
+  /* The ColdFire FPU only accepts addressing modes 2-5.  */
+  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return false;
+
+  /* Check for (xxx).w and (xxx).l.  Also, in the TARGET_PCREL case,
+     check for (d16,PC) or (bd,PC,Xn) with a suppressed index register.
+     All these modes are variations of mode 7.  */
+  if (m68k_legitimate_constant_address_p (x, reach, strict_p))
+    {
+      address->offset = x;
+      return true;
+    }
+
+  /* Check for (d8,PC,Xn), a mode 7 form.  This case is needed for
+     tablejumps.
+
+     ??? do_tablejump creates these addresses before placing the target
+     label, so we have to assume that unplaced labels are jump table
+     references.  It seems unlikely that we would ever generate indexed
+     accesses to unplaced labels in other cases.  */
+  if (GET_CODE (x) == PLUS
+      && m68k_jump_table_ref_p (XEXP (x, 1))
+      && m68k_decompose_index (XEXP (x, 0), strict_p, address))
+    {
+      address->offset = XEXP (x, 1);
+      return true;
+    }
+
+  /* Everything hereafter deals with (d8,An,Xn.SIZE*SCALE) or
+     (bd,An,Xn.SIZE*SCALE) addresses.  */
+
+  if (TARGET_68020)
+    {
+      /* Check for a nonzero base displacement.  */
+      if (GET_CODE (x) == PLUS
+	  && m68k_legitimate_constant_address_p (XEXP (x, 1), reach, strict_p))
+	{
+	  address->offset = XEXP (x, 1);
+	  x = XEXP (x, 0);
+	}
+
+      /* Check for a suppressed index register.  */
+      if (m68k_legitimate_base_reg_p (x, strict_p))
+	{
+	  address->base = x;
+	  return true;
+	}
+
+      /* Check for a suppressed base register.  Do not allow this case
+	 for non-symbolic offsets as it effectively gives gcc freedom
+	 to treat data registers as base registers, which can generate
+	 worse code.  */
+      if (address->offset
+	  && symbolic_operand (address->offset, VOIDmode)
+	  && m68k_decompose_index (x, strict_p, address))
+	return true;
+    }
+  else
+    {
+      /* Check for a nonzero base displacement.  */
+      if (GET_CODE (x) == PLUS
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && IN_RANGE (INTVAL (XEXP (x, 1)), -0x80, 0x80 - reach))
+	{
+	  address->offset = XEXP (x, 1);
+	  x = XEXP (x, 0);
+	}
+    }
+
+  /* We now expect the sum of a base and an index.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      if (m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p)
+	  && m68k_decompose_index (XEXP (x, 1), strict_p, address))
+	{
+	  address->base = XEXP (x, 0);
+	  return true;
+	}
+
+      if (m68k_legitimate_base_reg_p (XEXP (x, 1), strict_p)
+	  && m68k_decompose_index (XEXP (x, 0), strict_p, address))
+	{
+	  address->base = XEXP (x, 1);
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Return true if X is a legitimate address for values of mode MODE.
+   STRICT_P says whether strict checking is needed.  */
+
+bool
+m68k_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  struct m68k_address address;
+
+  return m68k_decompose_address (mode, x, strict_p, &address);
+}
+
+/* Return true if X is a memory, describing its address in ADDRESS if so.
+   Apply strict checking if called during or after reload.  */
+
+static bool
+m68k_legitimate_mem_p (rtx x, struct m68k_address *address)
+{
+  return (MEM_P (x)
+	  && m68k_decompose_address (GET_MODE (x), XEXP (x, 0),
+				     reload_in_progress || reload_completed,
+				     address));
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+bool
+m68k_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  return mode != XFmode && !m68k_illegitimate_symbolic_constant_p (x);
+}
+
+/* Return true if X matches the 'Q' constraint.  It must be a memory
+   with a base address and no constant offset or index.  */
+
+bool
+m68k_matches_q_p (rtx x)
+{
+  struct m68k_address address;
+
+  return (m68k_legitimate_mem_p (x, &address)
+	  && address.code == UNKNOWN
+	  && address.base
+	  && !address.offset
+	  && !address.index);
+}
+
+/* Return true if X matches the 'U' constraint.  It must be a base address
+   with a constant offset and no index.  */
+
+bool
+m68k_matches_u_p (rtx x)
+{
+  struct m68k_address address;
+
+  return (m68k_legitimate_mem_p (x, &address)
+	  && address.code == UNKNOWN
+	  && address.base
+	  && address.offset
+	  && !address.index);
+}
+
+/* Return GOT pointer.  */
+
+static rtx
+m68k_get_gp (void)
+{
+  if (pic_offset_table_rtx == NULL_RTX)
+    pic_offset_table_rtx = gen_rtx_REG (Pmode, PIC_REG);
+
+  crtl->uses_pic_offset_table = 1;
+
+  return pic_offset_table_rtx;
+}
+
+/* M68K relocations, used to distinguish GOT and TLS relocations in UNSPEC
+   wrappers.  */
+enum m68k_reloc { RELOC_GOT, RELOC_TLSGD, RELOC_TLSLDM, RELOC_TLSLDO,
+		  RELOC_TLSIE, RELOC_TLSLE };
+
+#define TLS_RELOC_P(RELOC) ((RELOC) != RELOC_GOT)
+
+/* Wrap symbol X into unspec representing relocation RELOC.
+   BASE_REG - register that should be added to the result.
+   TEMP_REG - if non-null, temporary register.  */
+
+static rtx
+m68k_wrap_symbol (rtx x, enum m68k_reloc reloc, rtx base_reg, rtx temp_reg)
+{
+  bool use_x_p;
+
+  use_x_p = (base_reg == pic_offset_table_rtx) ? TARGET_XGOT : TARGET_XTLS;
+
+  if (TARGET_COLDFIRE && use_x_p)
+    /* When compiling with -mx{got, tls} switch the code will look like this:
+
+       move.l <X>@<RELOC>,<TEMP_REG>
+       add.l <BASE_REG>,<TEMP_REG>  */
+    {
+      /* Wrap X in UNSPEC_??? to tip m68k_output_addr_const_extra
+	 to put @RELOC after reference.  */
+      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+			  UNSPEC_RELOC32);
+      x = gen_rtx_CONST (Pmode, x);
+
+      if (temp_reg == NULL)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  temp_reg = gen_reg_rtx (Pmode);
+	}
+
+      emit_move_insn (temp_reg, x);
+      emit_insn (gen_addsi3 (temp_reg, temp_reg, base_reg));
+      x = temp_reg;
+    }
+  else
+    {
+      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+			  UNSPEC_RELOC16);
+      x = gen_rtx_CONST (Pmode, x);
+
+      x = gen_rtx_PLUS (Pmode, base_reg, x);
+    }
+
+  return x;
+}
+
+/* Helper for m68k_unwrap_symbol.
+   Also, if unwrapping was successful (that is if (ORIG != <return value>)),
+   sets *RELOC_PTR to relocation type for the symbol.  */
+
+static rtx
+m68k_unwrap_symbol_1 (rtx orig, bool unwrap_reloc32_p,
+		      enum m68k_reloc *reloc_ptr)
+{
+  if (GET_CODE (orig) == CONST)
+    {
+      rtx x;
+      enum m68k_reloc dummy;
+
+      x = XEXP (orig, 0);
+
+      if (reloc_ptr == NULL)
+	reloc_ptr = &dummy;
+
+      /* Handle an addend.  */
+      if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS)
+	  && CONST_INT_P (XEXP (x, 1)))
+	x = XEXP (x, 0);
+
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_RELOC16:
+	      orig = XVECEXP (x, 0, 0);
+	      *reloc_ptr = (enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1));
+	      break;
+
+	    case UNSPEC_RELOC32:
+	      if (unwrap_reloc32_p)
+		{
+		  orig = XVECEXP (x, 0, 0);
+		  *reloc_ptr = (enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1));
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+    }
+
+  return orig;
+}
+
+/* Unwrap symbol from UNSPEC_RELOC16 and, if unwrap_reloc32_p,
+   UNSPEC_RELOC32 wrappers.  */
+
+rtx
+m68k_unwrap_symbol (rtx orig, bool unwrap_reloc32_p)
+{
+  return m68k_unwrap_symbol_1 (orig, unwrap_reloc32_p, NULL);
+}
+
+/* Helper for m68k_final_prescan_insn.  */
+
+static int
+m68k_final_prescan_insn_1 (rtx *x_ptr, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *x_ptr;
+
+  if (m68k_unwrap_symbol (x, true) != x)
+    /* For rationale of the below, see comment in m68k_final_prescan_insn.  */
+    {
+      rtx plus;
+
+      gcc_assert (GET_CODE (x) == CONST);
+      plus = XEXP (x, 0);
+
+      if (GET_CODE (plus) == PLUS || GET_CODE (plus) == MINUS)
+	{
+	  rtx unspec;
+	  rtx addend;
+
+	  unspec = XEXP (plus, 0);
+	  gcc_assert (GET_CODE (unspec) == UNSPEC);
+	  addend = XEXP (plus, 1);
+	  gcc_assert (CONST_INT_P (addend));
+
+	  /* We now have all the pieces, rearrange them.  */
+
+	  /* Move symbol to plus.  */
+	  XEXP (plus, 0) = XVECEXP (unspec, 0, 0);
+
+	  /* Move plus inside unspec.  */
+	  XVECEXP (unspec, 0, 0) = plus;
+
+	  /* Move unspec to top level of const.  */
+	  XEXP (x, 0) = unspec;
+	}
+
+      return -1;
+    }
+
+  return 0;
+}
+
+/* Prescan insn before outputing assembler for it.  */
+
+void
+m68k_final_prescan_insn (rtx insn ATTRIBUTE_UNUSED,
+			 rtx *operands, int n_operands)
+{
+  int i;
+
+  /* Combine and, possibly, other optimizations may do good job
+     converting
+       (const (unspec [(symbol)]))
+     into
+       (const (plus (unspec [(symbol)])
+                    (const_int N))).
+     The problem with this is emitting @TLS or @GOT decorations.
+     The decoration is emitted when processing (unspec), so the
+     result would be "#symbol@TLSLE+N" instead of "#symbol+N@TLSLE".
+
+     It seems that the easiest solution to this is to convert such
+     operands to
+       (const (unspec [(plus (symbol)
+                             (const_int N))])).
+     Note, that the top level of operand remains intact, so we don't have
+     to patch up anything outside of the operand.  */
+
+  for (i = 0; i < n_operands; ++i)
+    {
+      rtx op;
+
+      op = operands[i];
+
+      for_each_rtx (&op, m68k_final_prescan_insn_1, NULL);
+    }
+}
+
+/* Move X to a register and add REG_EQUAL note pointing to ORIG.
+   If REG is non-null, use it; generate new pseudo otherwise.  */
+
+static rtx
+m68k_move_to_reg (rtx x, rtx orig, rtx reg)
+{
+  rtx insn;
+
+  if (reg == NULL_RTX)
+    {
+      gcc_assert (can_create_pseudo_p ());
+      reg = gen_reg_rtx (Pmode);
+    }
+
+  insn = emit_move_insn (reg, x);
+  /* Put a REG_EQUAL note on this insn, so that it can be optimized
+     by loop.  */
+  set_unique_reg_note (insn, REG_EQUAL, orig);
+
+  return reg;
+}
+
+/* Does the same as m68k_wrap_symbol, but returns a memory reference to
+   GOT slot.  */
+
+static rtx
+m68k_wrap_symbol_into_got_ref (rtx x, enum m68k_reloc reloc, rtx temp_reg)
+{
+  x = m68k_wrap_symbol (x, reloc, m68k_get_gp (), temp_reg);
+
+  x = gen_rtx_MEM (Pmode, x);
+  MEM_READONLY_P (x) = 1;
+
+  return x;
+}
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go to REG.  If we need more
+   than one register, we lose.  
+
+   An address is legitimized by making an indirect reference
+   through the Global Offset Table with the name of the symbol
+   used as an offset.  
+
+   The assembler and linker are responsible for placing the 
+   address of the symbol in the GOT.  The function prologue
+   is responsible for initializing a5 to the starting address
+   of the GOT.
+
+   The assembler is also responsible for translating a symbol name
+   into a constant displacement from the start of the GOT.  
+
+   A quick example may make things a little clearer:
+
+   When not generating PIC code to store the value 12345 into _foo
+   we would generate the following code:
+
+	movel #12345, _foo
+
+   When generating PIC two transformations are made.  First, the compiler
+   loads the address of foo into a register.  So the first transformation makes:
+
+	lea	_foo, a0
+	movel   #12345, a0@
+
+   The code in movsi will intercept the lea instruction and call this
+   routine which will transform the instructions into:
+
+	movel   a5@(_foo:w), a0
+	movel   #12345, a0@
+   
+
+   That (in a nutshell) is how *all* symbol and label references are 
+   handled.  */
+
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
+		        rtx reg)
+{
+  rtx pic_ref = orig;
+
+  /* First handle a simple SYMBOL_REF or LABEL_REF */
+  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
+    {
+      gcc_assert (reg);
+
+      pic_ref = m68k_wrap_symbol_into_got_ref (orig, RELOC_GOT, reg);
+      pic_ref = m68k_move_to_reg (pic_ref, orig, reg);
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base;
+
+      /* Make sure this has not already been legitimized.  */
+      if (m68k_unwrap_symbol (orig, true) != orig)
+	return orig;
+
+      gcc_assert (reg);
+
+      /* legitimize both operands of the PLUS */
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				     base == reg ? 0 : reg);
+
+      if (GET_CODE (orig) == CONST_INT)
+	pic_ref = plus_constant (Pmode, base, INTVAL (orig));
+      else
+	pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+    }
+
+  return pic_ref;
+}
+
+/* The __tls_get_addr symbol.  */
+static GTY(()) rtx m68k_tls_get_addr;
+
+/* Return SYMBOL_REF for __tls_get_addr.  */
+
+static rtx
+m68k_get_tls_get_addr (void)
+{
+  if (m68k_tls_get_addr == NULL_RTX)
+    m68k_tls_get_addr = init_one_libfunc ("__tls_get_addr");
+
+  return m68k_tls_get_addr;
+}
+
+/* Return libcall result in A0 instead of usual D0.  */
+static bool m68k_libcall_value_in_a0_p = false;
+
+/* Emit instruction sequence that calls __tls_get_addr.  X is
+   the TLS symbol we are referencing and RELOC is the symbol type to use
+   (either TLSGD or TLSLDM).  EQV is the REG_EQUAL note for the sequence
+   emitted.  A pseudo register with result of __tls_get_addr call is
+   returned.  */
+
+static rtx
+m68k_call_tls_get_addr (rtx x, rtx eqv, enum m68k_reloc reloc)
+{
+  rtx a0;
+  rtx insns;
+  rtx dest;
+
+  /* Emit the call sequence.  */
+  start_sequence ();
+
+  /* FIXME: Unfortunately, emit_library_call_value does not
+     consider (plus (%a5) (const (unspec))) to be a good enough
+     operand for push, so it forces it into a register.  The bad
+     thing about this is that combiner, due to copy propagation and other
+     optimizations, sometimes can not later fix this.  As a consequence,
+     additional register may be allocated resulting in a spill.
+     For reference, see args processing loops in
+     calls.c:emit_library_call_value_1.
+     For testcase, see gcc.target/m68k/tls-{gd, ld}.c  */
+  x = m68k_wrap_symbol (x, reloc, m68k_get_gp (), NULL_RTX);
+
+  /* __tls_get_addr() is not a libcall, but emitting a libcall_value
+     is the simpliest way of generating a call.  The difference between
+     __tls_get_addr() and libcall is that the result is returned in D0
+     instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
+     which temporarily switches returning the result to A0.  */ 
+
+  m68k_libcall_value_in_a0_p = true;
+  a0 = emit_library_call_value (m68k_get_tls_get_addr (), NULL_RTX, LCT_PURE,
+				Pmode, 1, x, Pmode);
+  m68k_libcall_value_in_a0_p = false;
+  
+  insns = get_insns ();
+  end_sequence ();
+
+  gcc_assert (can_create_pseudo_p ());
+  dest = gen_reg_rtx (Pmode);
+  emit_libcall_block (insns, dest, a0, eqv);
+
+  return dest;
+}
+
+/* The __tls_get_addr symbol.  */
+static GTY(()) rtx m68k_read_tp;
+
+/* Return SYMBOL_REF for __m68k_read_tp.  */
+
+static rtx
+m68k_get_m68k_read_tp (void)
+{
+  if (m68k_read_tp == NULL_RTX)
+    m68k_read_tp = init_one_libfunc ("__m68k_read_tp");
+
+  return m68k_read_tp;
+}
+
+/* Emit instruction sequence that calls __m68k_read_tp.
+   A pseudo register with result of __m68k_read_tp call is returned.  */
+
+static rtx 
+m68k_call_m68k_read_tp (void)
+{
+  rtx a0;
+  rtx eqv;
+  rtx insns;
+  rtx dest;
+
+  start_sequence ();
+
+  /* __m68k_read_tp() is not a libcall, but emitting a libcall_value
+     is the simpliest way of generating a call.  The difference between
+     __m68k_read_tp() and libcall is that the result is returned in D0
+     instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
+     which temporarily switches returning the result to A0.  */ 
+
+  /* Emit the call sequence.  */
+  m68k_libcall_value_in_a0_p = true;
+  a0 = emit_library_call_value (m68k_get_m68k_read_tp (), NULL_RTX, LCT_PURE,
+				Pmode, 0);
+  m68k_libcall_value_in_a0_p = false;
+  insns = get_insns ();
+  end_sequence ();
+
+  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+     share the m68k_read_tp result with other IE/LE model accesses.  */
+  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_RELOC32);
+
+  gcc_assert (can_create_pseudo_p ());
+  dest = gen_reg_rtx (Pmode);
+  emit_libcall_block (insns, dest, a0, eqv);
+
+  return dest;
+}
+
+/* Return a legitimized address for accessing TLS SYMBOL_REF X.
+   For explanations on instructions sequences see TLS/NPTL ABI for m68k and
+   ColdFire.  */
+
+rtx
+m68k_legitimize_tls_address (rtx orig)
+{
+  switch (SYMBOL_REF_TLS_MODEL (orig))
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      orig = m68k_call_tls_get_addr (orig, orig, RELOC_TLSGD);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      {
+	rtx eqv;
+	rtx a0;
+	rtx x;
+ 
+	/* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	   share the LDM result with other LD model accesses.  */
+	eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			      UNSPEC_RELOC32);
+
+	a0 = m68k_call_tls_get_addr (orig, eqv, RELOC_TLSLDM);
+
+	x = m68k_wrap_symbol (orig, RELOC_TLSLDO, a0, NULL_RTX);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    case TLS_MODEL_INITIAL_EXEC:
+      {
+	rtx a0;
+	rtx x;
+
+	a0 = m68k_call_m68k_read_tp ();
+
+	x = m68k_wrap_symbol_into_got_ref (orig, RELOC_TLSIE, NULL_RTX);
+	x = gen_rtx_PLUS (Pmode, x, a0);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    case TLS_MODEL_LOCAL_EXEC:
+      {
+	rtx a0;
+	rtx x;
+
+	a0 = m68k_call_m68k_read_tp ();
+
+	x = m68k_wrap_symbol (orig, RELOC_TLSLE, a0, NULL_RTX);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return orig;
+}
+
+/* Return true if X is a TLS symbol.  */
+
+static bool
+m68k_tls_symbol_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Helper for m68k_tls_referenced_p.  */
+
+static int
+m68k_tls_reference_p_1 (rtx *x_ptr, void *data ATTRIBUTE_UNUSED)
+{
+  /* Note: this is not the same as m68k_tls_symbol_p.  */
+  if (GET_CODE (*x_ptr) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x_ptr) != 0 ? 1 : 0;
+
+  /* Don't recurse into legitimate TLS references.  */
+  if (m68k_tls_reference_p (*x_ptr, true))
+    return -1;
+
+  return 0;
+}
+
+/* If !LEGITIMATE_P, return true if X is a TLS symbol reference,
+   though illegitimate one.
+   If LEGITIMATE_P, return true if X is a legitimate TLS symbol reference.  */
+
+bool
+m68k_tls_reference_p (rtx x, bool legitimate_p)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (!legitimate_p)
+    return for_each_rtx (&x, m68k_tls_reference_p_1, NULL) == 1 ? true : false;
+  else
+    {
+      enum m68k_reloc reloc = RELOC_GOT;
+
+      return (m68k_unwrap_symbol_1 (x, true, &reloc) != x
+	      && TLS_RELOC_P (reloc));
+    }
+}
+
+
+
+#define USE_MOVQ(i)	((unsigned) ((i) + 128) <= 255)
+
+/* Return the type of move that should be used for integer I.  */
+
+M68K_CONST_METHOD
+m68k_const_method (HOST_WIDE_INT i)
+{
+  unsigned u;
+
+  if (USE_MOVQ (i))
+    return MOVQ;
+
+  /* The ColdFire doesn't have byte or word operations.  */
+  /* FIXME: This may not be useful for the m68060 either.  */
+  if (!TARGET_COLDFIRE)
+    {
+      /* if -256 < N < 256 but N is not in range for a moveq
+	 N^ff will be, so use moveq #N^ff, dreg; not.b dreg.  */
+      if (USE_MOVQ (i ^ 0xff))
+	return NOTB;
+      /* Likewise, try with not.w */
+      if (USE_MOVQ (i ^ 0xffff))
+	return NOTW;
+      /* This is the only value where neg.w is useful */
+      if (i == -65408)
+	return NEGW;
+    }
+
+  /* Try also with swap.  */
+  u = i;
+  if (USE_MOVQ ((u >> 16) | (u << 16)))
+    return SWAP;
+
+  if (TARGET_ISAB)
+    {
+      /* Try using MVZ/MVS with an immediate value to load constants.  */
+      if (i >= 0 && i <= 65535)
+	return MVZ;
+      if (i >= -32768 && i <= 32767)
+	return MVS;
+    }
+
+  /* Otherwise, use move.l */
+  return MOVL;
+}
+
+/* Return the cost of moving constant I into a data register.  */
+
+static int
+const_int_cost (HOST_WIDE_INT i)
+{
+  switch (m68k_const_method (i))
+    {
+    case MOVQ:
+      /* Constants between -128 and 127 are cheap due to moveq.  */
+      return 0;
+    case MVZ:
+    case MVS:
+    case NOTB:
+    case NOTW:
+    case NEGW:
+    case SWAP:
+      /* Constants easily generated by moveq + not.b/not.w/neg.w/swap.  */
+      return 1;
+    case MOVL:
+      return 2;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static bool
+m68k_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      /* Constant zero is super cheap due to clr instruction.  */
+      if (x == const0_rtx)
+	*total = 0;
+      else
+        *total = const_int_cost (INTVAL (x));
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 3;
+      return true;
+
+    case CONST_DOUBLE:
+      /* Make 0.0 cheaper than other floating constants to
+         encourage creating tstsf and tstdf insns.  */
+      if (outer_code == COMPARE
+          && (x == CONST0_RTX (SFmode) || x == CONST0_RTX (DFmode)))
+	*total = 4;
+      else
+	*total = 5;
+      return true;
+
+    /* These are vaguely right for a 68020.  */
+    /* The costs for long multiply have been adjusted to work properly
+       in synth_mult on the 68020, relative to an average of the time
+       for add and the time for shift, taking away a little more because
+       sometimes move insns are needed.  */
+    /* div?.w is relatively cheaper on 68000 counted in COSTS_N_INSNS
+       terms.  */
+#define MULL_COST				\
+  (TUNE_68060 ? 2				\
+   : TUNE_68040 ? 5				\
+   : (TUNE_CFV2 && TUNE_EMAC) ? 3		\
+   : (TUNE_CFV2 && TUNE_MAC) ? 4		\
+   : TUNE_CFV2 ? 8				\
+   : TARGET_COLDFIRE ? 3 : 13)
+
+#define MULW_COST				\
+  (TUNE_68060 ? 2				\
+   : TUNE_68040 ? 3				\
+   : TUNE_68000_10 ? 5				\
+   : (TUNE_CFV2 && TUNE_EMAC) ? 3		\
+   : (TUNE_CFV2 && TUNE_MAC) ? 2		\
+   : TUNE_CFV2 ? 8				\
+   : TARGET_COLDFIRE ? 2 : 8)
+
+#define DIVW_COST				\
+  (TARGET_CF_HWDIV ? 11				\
+   : TUNE_68000_10 || TARGET_COLDFIRE ? 12 : 27)
+
+    case PLUS:
+      /* An lea costs about three times as much as a simple add.  */
+      if (GET_MODE (x) == SImode
+	  && GET_CODE (XEXP (x, 1)) == REG
+	  && GET_CODE (XEXP (x, 0)) == MULT
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	  && (INTVAL (XEXP (XEXP (x, 0), 1)) == 2
+	      || INTVAL (XEXP (XEXP (x, 0), 1)) == 4
+	      || INTVAL (XEXP (XEXP (x, 0), 1)) == 8))
+	{
+	    /* lea an@(dx:l:i),am */
+	    *total = COSTS_N_INSNS (TARGET_COLDFIRE ? 2 : 3);
+	    return true;
+	}
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TUNE_68060)
+	{
+          *total = COSTS_N_INSNS(1);
+	  return true;
+	}
+      if (TUNE_68000_10)
+        {
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      if (INTVAL (XEXP (x, 1)) < 16)
+	        *total = COSTS_N_INSNS (2) + INTVAL (XEXP (x, 1)) / 2;
+	      else
+	        /* We're using clrw + swap for these cases.  */
+	        *total = COSTS_N_INSNS (4) + (INTVAL (XEXP (x, 1)) - 16) / 2;
+	    }
+	  else
+	    *total = COSTS_N_INSNS (10); /* Worst case.  */
+	  return true;
+        }
+      /* A shift by a big integer takes an extra instruction.  */
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && (INTVAL (XEXP (x, 1)) == 16))
+	{
+	  *total = COSTS_N_INSNS (2);	 /* clrw;swap */
+	  return true;
+	}
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && !(INTVAL (XEXP (x, 1)) > 0
+	       && INTVAL (XEXP (x, 1)) <= 8))
+	{
+	  *total = COSTS_N_INSNS (TARGET_COLDFIRE ? 1 : 3);	 /* lsr #i,dn */
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	  && GET_MODE (x) == SImode)
+        *total = COSTS_N_INSNS (MULW_COST);
+      else if (GET_MODE (x) == QImode || GET_MODE (x) == HImode)
+        *total = COSTS_N_INSNS (MULW_COST);
+      else
+        *total = COSTS_N_INSNS (MULL_COST);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (GET_MODE (x) == QImode || GET_MODE (x) == HImode)
+        *total = COSTS_N_INSNS (DIVW_COST);	/* div.w */
+      else if (TARGET_CF_HWDIV)
+        *total = COSTS_N_INSNS (18);
+      else
+	*total = COSTS_N_INSNS (43);		/* div.l */
+      return true;
+
+    case ZERO_EXTRACT:
+      if (outer_code == COMPARE)
+        *total = 0;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return an instruction to move CONST_INT OPERANDS[1] into data register
+   OPERANDS[0].  */
+
+static const char *
+output_move_const_into_data_reg (rtx *operands)
+{
+  HOST_WIDE_INT i;
+
+  i = INTVAL (operands[1]);
+  switch (m68k_const_method (i))
+    {
+    case MVZ:
+      return "mvzw %1,%0";
+    case MVS:
+      return "mvsw %1,%0";
+    case MOVQ:
+      return "moveq %1,%0";
+    case NOTB:
+      CC_STATUS_INIT;
+      operands[1] = GEN_INT (i ^ 0xff);
+      return "moveq %1,%0\n\tnot%.b %0";
+    case NOTW:
+      CC_STATUS_INIT;
+      operands[1] = GEN_INT (i ^ 0xffff);
+      return "moveq %1,%0\n\tnot%.w %0";
+    case NEGW:
+      CC_STATUS_INIT;
+      return "moveq #-128,%0\n\tneg%.w %0";
+    case SWAP:
+      {
+	unsigned u = i;
+
+	operands[1] = GEN_INT ((u << 16) | (u >> 16));
+	return "moveq %1,%0\n\tswap %0";
+      }
+    case MOVL:
+      return "move%.l %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if I can be handled by ISA B's mov3q instruction.  */
+
+bool
+valid_mov3q_const (HOST_WIDE_INT i)
+{
+  return TARGET_ISAB && (i == -1 || IN_RANGE (i, 1, 7));
+}
+
+/* Return an instruction to move CONST_INT OPERANDS[1] into OPERANDS[0].
+   I is the value of OPERANDS[1].  */
+
+static const char *
+output_move_simode_const (rtx *operands)
+{
+  rtx dest;
+  HOST_WIDE_INT src;
+
+  dest = operands[0];
+  src = INTVAL (operands[1]);
+  if (src == 0
+      && (DATA_REG_P (dest) || MEM_P (dest))
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(MEM_P (dest) && MEM_VOLATILE_P (dest))))
+    return "clr%.l %0";
+  else if (GET_MODE (dest) == SImode && valid_mov3q_const (src))
+    return "mov3q%.l %1,%0";
+  else if (src == 0 && ADDRESS_REG_P (dest))
+    return "sub%.l %0,%0";
+  else if (DATA_REG_P (dest))
+    return output_move_const_into_data_reg (operands);
+  else if (ADDRESS_REG_P (dest) && IN_RANGE (src, -0x8000, 0x7fff))
+    {
+      if (valid_mov3q_const (src))
+        return "mov3q%.l %1,%0";
+      return "move%.w %1,%0";
+    }
+  else if (MEM_P (dest)
+	   && GET_CODE (XEXP (dest, 0)) == PRE_DEC
+	   && REGNO (XEXP (XEXP (dest, 0), 0)) == STACK_POINTER_REGNUM
+	   && IN_RANGE (src, -0x8000, 0x7fff))
+    {
+      if (valid_mov3q_const (src))
+        return "mov3q%.l %1,%-";
+      return "pea %a1";
+    }
+  return "move%.l %1,%0";
+}
+
+const char *
+output_move_simode (rtx *operands)
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    return output_move_simode_const (operands);
+  else if ((GET_CODE (operands[1]) == SYMBOL_REF
+	    || GET_CODE (operands[1]) == CONST)
+	   && push_operand (operands[0], SImode))
+    return "pea %a1";
+  else if ((GET_CODE (operands[1]) == SYMBOL_REF
+	    || GET_CODE (operands[1]) == CONST)
+	   && ADDRESS_REG_P (operands[0]))
+    return "lea %a1,%0";
+  return "move%.l %1,%0";
+}
+
+const char *
+output_move_himode (rtx *operands)
+{
+ if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (operands[1] == const0_rtx
+	  && (DATA_REG_P (operands[0])
+	      || GET_CODE (operands[0]) == MEM)
+	  /* clr insns on 68000 read before writing.  */
+	  && ((TARGET_68010 || TARGET_COLDFIRE)
+	      || !(GET_CODE (operands[0]) == MEM
+		   && MEM_VOLATILE_P (operands[0]))))
+	return "clr%.w %0";
+      else if (operands[1] == const0_rtx
+	       && ADDRESS_REG_P (operands[0]))
+	return "sub%.l %0,%0";
+      else if (DATA_REG_P (operands[0])
+	       && INTVAL (operands[1]) < 128
+	       && INTVAL (operands[1]) >= -128)
+	return "moveq %1,%0";
+      else if (INTVAL (operands[1]) < 0x8000
+	       && INTVAL (operands[1]) >= -0x8000)
+	return "move%.w %1,%0";
+    }
+  else if (CONSTANT_P (operands[1]))
+    return "move%.l %1,%0";
+  return "move%.w %1,%0";
+}
+
+const char *
+output_move_qimode (rtx *operands)
+{
+  /* 68k family always modifies the stack pointer by at least 2, even for
+     byte pushes.  The 5200 (ColdFire) does not do this.  */
+  
+  /* This case is generated by pushqi1 pattern now.  */
+  gcc_assert (!(GET_CODE (operands[0]) == MEM
+		&& GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+		&& XEXP (XEXP (operands[0], 0), 0) == stack_pointer_rtx
+		&& ! ADDRESS_REG_P (operands[1])
+		&& ! TARGET_COLDFIRE));
+
+  /* clr and st insns on 68000 read before writing.  */
+  if (!ADDRESS_REG_P (operands[0])
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    {
+      if (operands[1] == const0_rtx)
+	return "clr%.b %0";
+      if ((!TARGET_COLDFIRE || DATA_REG_P (operands[0]))
+	  && GET_CODE (operands[1]) == CONST_INT
+	  && (INTVAL (operands[1]) & 255) == 255)
+	{
+	  CC_STATUS_INIT;
+	  return "st %0";
+	}
+    }
+  if (GET_CODE (operands[1]) == CONST_INT
+      && DATA_REG_P (operands[0])
+      && INTVAL (operands[1]) < 128
+      && INTVAL (operands[1]) >= -128)
+    return "moveq %1,%0";
+  if (operands[1] == const0_rtx && ADDRESS_REG_P (operands[0]))
+    return "sub%.l %0,%0";
+  if (GET_CODE (operands[1]) != CONST_INT && CONSTANT_P (operands[1]))
+    return "move%.l %1,%0";
+  /* 68k family (including the 5200 ColdFire) does not support byte moves to
+     from address registers.  */
+  if (ADDRESS_REG_P (operands[0]) || ADDRESS_REG_P (operands[1]))
+    return "move%.w %1,%0";
+  return "move%.b %1,%0";
+}
+
+const char *
+output_move_stricthi (rtx *operands)
+{
+  if (operands[1] == const0_rtx
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    return "clr%.w %0";
+  return "move%.w %1,%0";
+}
+
+const char *
+output_move_strictqi (rtx *operands)
+{
+  if (operands[1] == const0_rtx
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+          || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    return "clr%.b %0";
+  return "move%.b %1,%0";
+}
+
+/* Return the best assembler insn template
+   for moving operands[1] into operands[0] as a fullword.  */
+
+static const char *
+singlemove_string (rtx *operands)
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    return output_move_simode_const (operands);
+  return "move%.l %1,%0";
+}
+
+
+/* Output assembler or rtl code to perform a doubleword move insn
+   with operands OPERANDS.
+   Pointers to 3 helper functions should be specified:
+   HANDLE_REG_ADJUST to adjust a register by a small value,
+   HANDLE_COMPADR to compute an address and
+   HANDLE_MOVSI to move 4 bytes.  */
+
+static void
+handle_move_double (rtx operands[2],
+		    void (*handle_reg_adjust) (rtx, int),
+		    void (*handle_compadr) (rtx [2]),
+		    void (*handle_movsi) (rtx [2]))
+{
+  enum
+    {
+      REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP
+    } optype0, optype1;
+  rtx latehalf[2];
+  rtx middlehalf[2];
+  rtx xops[2];
+  rtx addreg0 = 0, addreg1 = 0;
+  int dest_overlapped_low = 0;
+  int size = GET_MODE_SIZE (GET_MODE (operands[0]));
+
+  middlehalf[0] = 0;
+  middlehalf[1] = 0;
+
+  /* First classify both operands.  */
+
+  if (REG_P (operands[0]))
+    optype0 = REGOP;
+  else if (offsettable_memref_p (operands[0]))
+    optype0 = OFFSOP;
+  else if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+    optype0 = POPOP;
+  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+    optype0 = PUSHOP;
+  else if (GET_CODE (operands[0]) == MEM)
+    optype0 = MEMOP;
+  else
+    optype0 = RNDOP;
+
+  if (REG_P (operands[1]))
+    optype1 = REGOP;
+  else if (CONSTANT_P (operands[1]))
+    optype1 = CNSTOP;
+  else if (offsettable_memref_p (operands[1]))
+    optype1 = OFFSOP;
+  else if (GET_CODE (XEXP (operands[1], 0)) == POST_INC)
+    optype1 = POPOP;
+  else if (GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)
+    optype1 = PUSHOP;
+  else if (GET_CODE (operands[1]) == MEM)
+    optype1 = MEMOP;
+  else
+    optype1 = RNDOP;
+
+  /* Check for the cases that the operand constraints are not supposed
+     to allow to happen.  Generating code for these cases is
+     painful.  */
+  gcc_assert (optype0 != RNDOP && optype1 != RNDOP);
+
+  /* If one operand is decrementing and one is incrementing
+     decrement the former register explicitly
+     and change that operand into ordinary indexing.  */
+
+  if (optype0 == PUSHOP && optype1 == POPOP)
+    {
+      operands[0] = XEXP (XEXP (operands[0], 0), 0);
+
+      handle_reg_adjust (operands[0], -size);
+
+      if (GET_MODE (operands[1]) == XFmode)
+	operands[0] = gen_rtx_MEM (XFmode, operands[0]);
+      else if (GET_MODE (operands[0]) == DFmode)
+	operands[0] = gen_rtx_MEM (DFmode, operands[0]);
+      else
+	operands[0] = gen_rtx_MEM (DImode, operands[0]);
+      optype0 = OFFSOP;
+    }
+  if (optype0 == POPOP && optype1 == PUSHOP)
+    {
+      operands[1] = XEXP (XEXP (operands[1], 0), 0);
+
+      handle_reg_adjust (operands[1], -size);
+
+      if (GET_MODE (operands[1]) == XFmode)
+	operands[1] = gen_rtx_MEM (XFmode, operands[1]);
+      else if (GET_MODE (operands[1]) == DFmode)
+	operands[1] = gen_rtx_MEM (DFmode, operands[1]);
+      else
+	operands[1] = gen_rtx_MEM (DImode, operands[1]);
+      optype1 = OFFSOP;
+    }
+
+  /* If an operand is an unoffsettable memory ref, find a register
+     we can increment temporarily to make it refer to the second word.  */
+
+  if (optype0 == MEMOP)
+    addreg0 = find_addr_reg (XEXP (operands[0], 0));
+
+  if (optype1 == MEMOP)
+    addreg1 = find_addr_reg (XEXP (operands[1], 0));
+
+  /* Ok, we can do one word at a time.
+     Normally we do the low-numbered word first,
+     but if either operand is autodecrementing then we
+     do the high-numbered word first.
+
+     In either case, set up in LATEHALF the operands to use
+     for the high-numbered word and in some cases alter the
+     operands in OPERANDS to be suitable for the low-numbered word.  */
+
+  if (size == 12)
+    {
+      if (optype0 == REGOP)
+	{
+	  latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 2);
+	  middlehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	}
+      else if (optype0 == OFFSOP)
+	{
+	  middlehalf[0] = adjust_address (operands[0], SImode, 4);
+	  latehalf[0] = adjust_address (operands[0], SImode, size - 4);
+	}
+      else
+	{
+	  middlehalf[0] = adjust_address (operands[0], SImode, 0);
+	  latehalf[0] = adjust_address (operands[0], SImode, 0);
+	}
+
+      if (optype1 == REGOP)
+	{
+	  latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	  middlehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	}
+      else if (optype1 == OFFSOP)
+	{
+	  middlehalf[1] = adjust_address (operands[1], SImode, 4);
+	  latehalf[1] = adjust_address (operands[1], SImode, size - 4);
+	}
+      else if (optype1 == CNSTOP)
+	{
+	  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l[3];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+	      operands[1] = GEN_INT (l[0]);
+	      middlehalf[1] = GEN_INT (l[1]);
+	      latehalf[1] = GEN_INT (l[2]);
+	    }
+	  else
+	    {
+	      /* No non-CONST_DOUBLE constant should ever appear
+		 here.  */
+	      gcc_assert (!CONSTANT_P (operands[1]));
+	    }
+	}
+      else
+	{
+	  middlehalf[1] = adjust_address (operands[1], SImode, 0);
+	  latehalf[1] = adjust_address (operands[1], SImode, 0);
+	}
+    }
+  else
+    /* size is not 12: */
+    {
+      if (optype0 == REGOP)
+	latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      else if (optype0 == OFFSOP)
+	latehalf[0] = adjust_address (operands[0], SImode, size - 4);
+      else
+	latehalf[0] = adjust_address (operands[0], SImode, 0);
+
+      if (optype1 == REGOP)
+	latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+      else if (optype1 == OFFSOP)
+	latehalf[1] = adjust_address (operands[1], SImode, size - 4);
+      else if (optype1 == CNSTOP)
+	split_double (operands[1], &operands[1], &latehalf[1]);
+      else
+	latehalf[1] = adjust_address (operands[1], SImode, 0);
+    }
+
+  /* If insn is effectively movd N(REG),-(REG) then we will do the high
+     word first.  We should use the adjusted operand 1 (which is N+4(REG))
+     for the low word as well, to compensate for the first decrement of
+     REG.  */
+  if (optype0 == PUSHOP
+      && reg_overlap_mentioned_p (XEXP (XEXP (operands[0], 0), 0), operands[1]))
+    operands[1] = middlehalf[1] = latehalf[1];
+
+  /* For (set (reg:DI N) (mem:DI ... (reg:SI N) ...)),
+     if the upper part of reg N does not appear in the MEM, arrange to
+     emit the move late-half first.  Otherwise, compute the MEM address
+     into the upper part of N and use that as a pointer to the memory
+     operand.  */
+  if (optype0 == REGOP
+      && (optype1 == OFFSOP || optype1 == MEMOP))
+    {
+      rtx testlow = gen_rtx_REG (SImode, REGNO (operands[0]));
+
+      if (reg_overlap_mentioned_p (testlow, XEXP (operands[1], 0))
+	  && reg_overlap_mentioned_p (latehalf[0], XEXP (operands[1], 0)))
+	{
+	  /* If both halves of dest are used in the src memory address,
+	     compute the address into latehalf of dest.
+	     Note that this can't happen if the dest is two data regs.  */
+	compadr:
+	  xops[0] = latehalf[0];
+	  xops[1] = XEXP (operands[1], 0);
+
+	  handle_compadr (xops);
+	  if (GET_MODE (operands[1]) == XFmode)
+	    {
+	      operands[1] = gen_rtx_MEM (XFmode, latehalf[0]);
+	      middlehalf[1] = adjust_address (operands[1], DImode, size - 8);
+	      latehalf[1] = adjust_address (operands[1], DImode, size - 4);
+	    }
+	  else
+	    {
+	      operands[1] = gen_rtx_MEM (DImode, latehalf[0]);
+	      latehalf[1] = adjust_address (operands[1], DImode, size - 4);
+	    }
+	}
+      else if (size == 12
+	       && reg_overlap_mentioned_p (middlehalf[0],
+					   XEXP (operands[1], 0)))
+	{
+	  /* Check for two regs used by both source and dest.
+	     Note that this can't happen if the dest is all data regs.
+	     It can happen if the dest is d6, d7, a0.
+	     But in that case, latehalf is an addr reg, so
+	     the code at compadr does ok.  */
+
+	  if (reg_overlap_mentioned_p (testlow, XEXP (operands[1], 0))
+	      || reg_overlap_mentioned_p (latehalf[0], XEXP (operands[1], 0)))
+	    goto compadr;
+
+	  /* JRV says this can't happen: */
+	  gcc_assert (!addreg0 && !addreg1);
+
+	  /* Only the middle reg conflicts; simply put it last.  */
+	  handle_movsi (operands);
+	  handle_movsi (latehalf);
+	  handle_movsi (middlehalf);
+
+	  return;
+	}
+      else if (reg_overlap_mentioned_p (testlow, XEXP (operands[1], 0)))
+	/* If the low half of dest is mentioned in the source memory
+	   address, the arrange to emit the move late half first.  */
+	dest_overlapped_low = 1;
+    }
+
+  /* If one or both operands autodecrementing,
+     do the two words, high-numbered first.  */
+
+  /* Likewise,  the first move would clobber the source of the second one,
+     do them in the other order.  This happens only for registers;
+     such overlap can't happen in memory unless the user explicitly
+     sets it up, and that is an undefined circumstance.  */
+
+  if (optype0 == PUSHOP || optype1 == PUSHOP
+      || (optype0 == REGOP && optype1 == REGOP
+	  && ((middlehalf[1] && REGNO (operands[0]) == REGNO (middlehalf[1]))
+	      || REGNO (operands[0]) == REGNO (latehalf[1])))
+      || dest_overlapped_low)
+    {
+      /* Make any unoffsettable addresses point at high-numbered word.  */
+      if (addreg0)
+	handle_reg_adjust (addreg0, size - 4);
+      if (addreg1)
+	handle_reg_adjust (addreg1, size - 4);
+
+      /* Do that word.  */
+      handle_movsi (latehalf);
+
+      /* Undo the adds we just did.  */
+      if (addreg0)
+	handle_reg_adjust (addreg0, -4);
+      if (addreg1)
+	handle_reg_adjust (addreg1, -4);
+
+      if (size == 12)
+	{
+	  handle_movsi (middlehalf);
+
+	  if (addreg0)
+	    handle_reg_adjust (addreg0, -4);
+	  if (addreg1)
+	    handle_reg_adjust (addreg1, -4);
+	}
+
+      /* Do low-numbered word.  */
+
+      handle_movsi (operands);
+      return;
+    }
+
+  /* Normal case: do the two words, low-numbered first.  */
+
+  m68k_final_prescan_insn (NULL, operands, 2);
+  handle_movsi (operands);
+
+  /* Do the middle one of the three words for long double */
+  if (size == 12)
+    {
+      if (addreg0)
+	handle_reg_adjust (addreg0, 4);
+      if (addreg1)
+	handle_reg_adjust (addreg1, 4);
+
+      m68k_final_prescan_insn (NULL, middlehalf, 2);
+      handle_movsi (middlehalf);
+    }
+
+  /* Make any unoffsettable addresses point at high-numbered word.  */
+  if (addreg0)
+    handle_reg_adjust (addreg0, 4);
+  if (addreg1)
+    handle_reg_adjust (addreg1, 4);
+
+  /* Do that word.  */
+  m68k_final_prescan_insn (NULL, latehalf, 2);
+  handle_movsi (latehalf);
+
+  /* Undo the adds we just did.  */
+  if (addreg0)
+    handle_reg_adjust (addreg0, -(size - 4));
+  if (addreg1)
+    handle_reg_adjust (addreg1, -(size - 4));
+
+  return;
+}
+
+/* Output assembler code to adjust REG by N.  */
+static void
+output_reg_adjust (rtx reg, int n)
+{
+  const char *s;
+
+  gcc_assert (GET_MODE (reg) == SImode
+	      && -12 <= n && n != 0 && n <= 12);
+
+  switch (n)
+    {
+    case 12:
+      s = "add%.l #12,%0";
+      break;
+
+    case 8:
+      s = "addq%.l #8,%0";
+      break;
+
+    case 4:
+      s = "addq%.l #4,%0";
+      break;
+
+    case -12:
+      s = "sub%.l #12,%0";
+      break;
+
+    case -8:
+      s = "subq%.l #8,%0";
+      break;
+
+    case -4:
+      s = "subq%.l #4,%0";
+      break;
+
+    default:
+      gcc_unreachable ();
+      s = NULL;
+    }
+
+  output_asm_insn (s, &reg);
+}
+
+/* Emit rtl code to adjust REG by N.  */
+static void
+emit_reg_adjust (rtx reg1, int n)
+{
+  rtx reg2;
+
+  gcc_assert (GET_MODE (reg1) == SImode
+	      && -12 <= n && n != 0 && n <= 12);
+
+  reg1 = copy_rtx (reg1);
+  reg2 = copy_rtx (reg1);
+
+  if (n < 0)
+    emit_insn (gen_subsi3 (reg1, reg2, GEN_INT (-n)));
+  else if (n > 0)
+    emit_insn (gen_addsi3 (reg1, reg2, GEN_INT (n)));
+  else
+    gcc_unreachable ();
+}
+
+/* Output assembler to load address OPERANDS[0] to register OPERANDS[1].  */
+static void
+output_compadr (rtx operands[2])
+{
+  output_asm_insn ("lea %a1,%0", operands);
+}
+
+/* Output the best assembler insn for moving operands[1] into operands[0]
+   as a fullword.  */
+static void
+output_movsi (rtx operands[2])
+{
+  output_asm_insn (singlemove_string (operands), operands);
+}
+
+/* Copy OP and change its mode to MODE.  */
+static rtx
+copy_operand (rtx op, enum machine_mode mode)
+{
+  /* ??? This looks really ugly.  There must be a better way
+     to change a mode on the operand.  */
+  if (GET_MODE (op) != VOIDmode)
+    {
+      if (REG_P (op))
+	op = gen_rtx_REG (mode, REGNO (op));
+      else
+	{
+	  op = copy_rtx (op);
+	  PUT_MODE (op, mode);
+	}
+    }
+
+  return op;
+}
+
+/* Emit rtl code for moving operands[1] into operands[0] as a fullword.  */
+static void
+emit_movsi (rtx operands[2])
+{
+  operands[0] = copy_operand (operands[0], SImode);
+  operands[1] = copy_operand (operands[1], SImode);
+
+  emit_insn (gen_movsi (operands[0], operands[1]));
+}
+
+/* Output assembler code to perform a doubleword move insn
+   with operands OPERANDS.  */
+const char *
+output_move_double (rtx *operands)
+{
+  handle_move_double (operands,
+		      output_reg_adjust, output_compadr, output_movsi);
+
+  return "";
+}
+
+/* Output rtl code to perform a doubleword move insn
+   with operands OPERANDS.  */
+void
+m68k_emit_move_double (rtx operands[2])
+{
+  handle_move_double (operands, emit_reg_adjust, emit_movsi, emit_movsi);
+}
+
+/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
+   new rtx with the correct mode.  */
+
+static rtx
+force_mode (enum machine_mode mode, rtx orig)
+{
+  if (mode == GET_MODE (orig))
+    return orig;
+
+  if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
+    abort ();
+
+  return gen_rtx_REG (mode, REGNO (orig));
+}
+
+static int
+fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return reg_renumber && FP_REG_P (op);
+}
+
+/* Emit insns to move operands[1] into operands[0].
+
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.
+
+   Note SCRATCH_REG may not be in the proper mode depending on how it
+   will be used.  This routine is responsible for creating a new copy
+   of SCRATCH_REG in the proper mode.  */
+
+int
+emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
+{
+  register rtx operand0 = operands[0];
+  register rtx operand1 = operands[1];
+  register rtx tem;
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand0) == REG
+      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+    operand0 = reg_equiv_mem (REGNO (operand0));
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand0) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand0)) == REG
+	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
+				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
+				 SUBREG_BYTE (operand0));
+      operand0 = alter_subreg (&temp, true);
+    }
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand1) == REG
+      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
+    operand1 = reg_equiv_mem (REGNO (operand1));
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand1) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand1)) == REG
+	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
+				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
+				 SUBREG_BYTE (operand1));
+      operand1 = alter_subreg (&temp, true);
+    }
+
+  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+      && ((tem = find_replacement (&XEXP (operand0, 0)))
+	  != XEXP (operand0, 0)))
+    operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
+  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+      && ((tem = find_replacement (&XEXP (operand1, 0)))
+	  != XEXP (operand1, 0)))
+    operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
+
+  /* Handle secondary reloads for loads/stores of FP registers where
+     the address is symbolic by using the scratch register */
+  if (fp_reg_operand (operand0, mode)
+      && ((GET_CODE (operand1) == MEM
+	   && ! memory_address_p (DFmode, XEXP (operand1, 0)))
+	  || ((GET_CODE (operand1) == SUBREG
+	       && GET_CODE (XEXP (operand1, 0)) == MEM
+	       && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
+      && scratch_reg)
+    {
+      if (GET_CODE (operand1) == SUBREG)
+	operand1 = XEXP (operand1, 0);
+
+      /* SCRATCH_REG will hold an address.  We want
+	 it in SImode regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (SImode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (!memory_address_p (Pmode, XEXP (operand1, 0)))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
+						       Pmode,
+						       XEXP (XEXP (operand1, 0), 0),
+						       scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand1, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, operand0,
+			      gen_rtx_MEM (mode, scratch_reg)));
+      return 1;
+    }
+  else if (fp_reg_operand (operand1, mode)
+	   && ((GET_CODE (operand0) == MEM
+		&& ! memory_address_p (DFmode, XEXP (operand0, 0)))
+	       || ((GET_CODE (operand0) == SUBREG)
+		   && GET_CODE (XEXP (operand0, 0)) == MEM
+		   && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
+	   && scratch_reg)
+    {
+      if (GET_CODE (operand0) == SUBREG)
+	operand0 = XEXP (operand0, 0);
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in SIMODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (SImode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (!memory_address_p (Pmode, XEXP (operand0, 0)))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
+								        0)),
+						       Pmode,
+						       XEXP (XEXP (operand0, 0),
+								   0),
+						       scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand0, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
+			      operand1));
+      return 1;
+    }
+  /* Handle secondary reloads for loads of FP registers from constant
+     expressions by forcing the constant into memory.
+
+     use scratch_reg to hold the address of the memory location.
+
+     The proper fix is to change PREFERRED_RELOAD_CLASS to return
+     NO_REGS when presented with a const_int and an register class
+     containing only FP registers.  Doing so unfortunately creates
+     more problems than it solves.   Fix this for 2.5.  */
+  else if (fp_reg_operand (operand0, mode)
+	   && CONSTANT_P (operand1)
+	   && scratch_reg)
+    {
+      rtx xoperands[2];
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in SIMODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (SImode, scratch_reg);
+
+      /* Force the constant into memory and put the address of the
+	 memory location into scratch_reg.  */
+      xoperands[0] = scratch_reg;
+      xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
+      emit_insn (gen_rtx_SET (mode, scratch_reg, xoperands[1]));
+
+      /* Now load the destination register.  */
+      emit_insn (gen_rtx_SET (mode, operand0,
+			      gen_rtx_MEM (mode, scratch_reg)));
+      return 1;
+    }
+
+  /* Now have insn-emit do whatever it normally does.  */
+  return 0;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuses to split volatile memory addresses,
+	 but we still have to handle it.  */
+      if (GET_CODE (op) == MEM)
+	{
+	  lo_half[num] = adjust_address (op, SImode, 4);
+	  hi_half[num] = adjust_address (op, SImode, 0);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 4);
+	  hi_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 0);
+	}
+    }
+}
+
+/* Split X into a base and a constant offset, storing them in *BASE
+   and *OFFSET respectively.  */
+
+static void
+m68k_split_offset (rtx x, rtx *base, HOST_WIDE_INT *offset)
+{
+  *offset = 0;
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      *offset += INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+    }
+  *base = x;
+}
+
+/* Return true if PATTERN is a PARALLEL suitable for a movem or fmovem
+   instruction.  STORE_P says whether the move is a load or store.
+
+   If the instruction uses post-increment or pre-decrement addressing,
+   AUTOMOD_BASE is the base register and AUTOMOD_OFFSET is the total
+   adjustment.  This adjustment will be made by the first element of
+   PARALLEL, with the loads or stores starting at element 1.  If the
+   instruction does not use post-increment or pre-decrement addressing,
+   AUTOMOD_BASE is null, AUTOMOD_OFFSET is 0, and the loads or stores
+   start at element 0.  */
+
+bool
+m68k_movem_pattern_p (rtx pattern, rtx automod_base,
+		      HOST_WIDE_INT automod_offset, bool store_p)
+{
+  rtx base, mem_base, set, mem, reg, last_reg;
+  HOST_WIDE_INT offset, mem_offset;
+  int i, first, len;
+  enum reg_class rclass;
+
+  len = XVECLEN (pattern, 0);
+  first = (automod_base != NULL);
+
+  if (automod_base)
+    {
+      /* Stores must be pre-decrement and loads must be post-increment.  */
+      if (store_p != (automod_offset < 0))
+	return false;
+
+      /* Work out the base and offset for lowest memory location.  */
+      base = automod_base;
+      offset = (automod_offset < 0 ? automod_offset : 0);
+    }
+  else
+    {
+      /* Allow any valid base and offset in the first access.  */
+      base = NULL;
+      offset = 0;
+    }
+
+  last_reg = NULL;
+  rclass = NO_REGS;
+  for (i = first; i < len; i++)
+    {
+      /* We need a plain SET.  */
+      set = XVECEXP (pattern, 0, i);
+      if (GET_CODE (set) != SET)
+	return false;
+
+      /* Check that we have a memory location...  */
+      mem = XEXP (set, !store_p);
+      if (!MEM_P (mem) || !memory_operand (mem, VOIDmode))
+	return false;
+
+      /* ...with the right address.  */
+      if (base == NULL)
+	{
+	  m68k_split_offset (XEXP (mem, 0), &base, &offset);
+	  /* The ColdFire instruction only allows (An) and (d16,An) modes.
+	     There are no mode restrictions for 680x0 besides the
+	     automodification rules enforced above.  */
+	  if (TARGET_COLDFIRE
+	      && !m68k_legitimate_base_reg_p (base, reload_completed))
+	    return false;
+	}
+      else
+	{
+	  m68k_split_offset (XEXP (mem, 0), &mem_base, &mem_offset);
+	  if (!rtx_equal_p (base, mem_base) || offset != mem_offset)
+	    return false;
+	}
+
+      /* Check that we have a register of the required mode and class.  */
+      reg = XEXP (set, store_p);
+      if (!REG_P (reg)
+	  || !HARD_REGISTER_P (reg)
+	  || GET_MODE (reg) != reg_raw_mode[REGNO (reg)])
+	return false;
+
+      if (last_reg)
+	{
+	  /* The register must belong to RCLASS and have a higher number
+	     than the register in the previous SET.  */
+	  if (!TEST_HARD_REG_BIT (reg_class_contents[rclass], REGNO (reg))
+	      || REGNO (last_reg) >= REGNO (reg))
+	    return false;
+	}
+      else
+	{
+	  /* Work out which register class we need.  */
+	  if (INT_REGNO_P (REGNO (reg)))
+	    rclass = GENERAL_REGS;
+	  else if (FP_REGNO_P (REGNO (reg)))
+	    rclass = FP_REGS;
+	  else
+	    return false;
+	}
+
+      last_reg = reg;
+      offset += GET_MODE_SIZE (GET_MODE (reg));
+    }
+
+  /* If we have an automodification, check whether the final offset is OK.  */
+  if (automod_base && offset != (automod_offset < 0 ? 0 : automod_offset))
+    return false;
+
+  /* Reject unprofitable cases.  */
+  if (len < first + (rclass == FP_REGS ? MIN_FMOVEM_REGS : MIN_MOVEM_REGS))
+    return false;
+
+  return true;
+}
+
+/* Return the assembly code template for a movem or fmovem instruction
+   whose pattern is given by PATTERN.  Store the template's operands
+   in OPERANDS.
+
+   If the instruction uses post-increment or pre-decrement addressing,
+   AUTOMOD_OFFSET is the total adjustment, otherwise it is 0.  STORE_P
+   is true if this is a store instruction.  */
+
+const char *
+m68k_output_movem (rtx *operands, rtx pattern,
+		   HOST_WIDE_INT automod_offset, bool store_p)
+{
+  unsigned int mask;
+  int i, first;
+
+  gcc_assert (GET_CODE (pattern) == PARALLEL);
+  mask = 0;
+  first = (automod_offset != 0);
+  for (i = first; i < XVECLEN (pattern, 0); i++)
+    {
+      /* When using movem with pre-decrement addressing, register X + D0_REG
+	 is controlled by bit 15 - X.  For all other addressing modes,
+	 register X + D0_REG is controlled by bit X.  Confusingly, the
+	 register mask for fmovem is in the opposite order to that for
+	 movem.  */
+      unsigned int regno;
+
+      gcc_assert (MEM_P (XEXP (XVECEXP (pattern, 0, i), !store_p)));
+      gcc_assert (REG_P (XEXP (XVECEXP (pattern, 0, i), store_p)));
+      regno = REGNO (XEXP (XVECEXP (pattern, 0, i), store_p));
+      if (automod_offset < 0)
+	{
+	  if (FP_REGNO_P (regno))
+	    mask |= 1 << (regno - FP0_REG);
+	  else
+	    mask |= 1 << (15 - (regno - D0_REG));
+	}
+      else
+	{
+	  if (FP_REGNO_P (regno))
+	    mask |= 1 << (7 - (regno - FP0_REG));
+	  else
+	    mask |= 1 << (regno - D0_REG);
+	}
+    }
+  CC_STATUS_INIT;
+
+  if (automod_offset == 0)
+    operands[0] = XEXP (XEXP (XVECEXP (pattern, 0, first), !store_p), 0);
+  else if (automod_offset < 0)
+    operands[0] = gen_rtx_PRE_DEC (Pmode, SET_DEST (XVECEXP (pattern, 0, 0)));
+  else
+    operands[0] = gen_rtx_POST_INC (Pmode, SET_DEST (XVECEXP (pattern, 0, 0)));
+  operands[1] = GEN_INT (mask);
+  if (FP_REGNO_P (REGNO (XEXP (XVECEXP (pattern, 0, first), store_p))))
+    {
+      if (store_p)
+	return "fmovem %1,%a0";
+      else
+	return "fmovem %a0,%1";
+    }
+  else
+    {
+      if (store_p)
+	return "movem%.l %1,%a0";
+      else
+	return "movem%.l %a0,%1";
+    }
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+   ADDR can be effectively incremented by incrementing REG.  */
+
+static rtx
+find_addr_reg (rtx addr)
+{
+  while (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	addr = XEXP (addr, 0);
+      else if (GET_CODE (XEXP (addr, 1)) == REG)
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 0)))
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 1)))
+	addr = XEXP (addr, 0);
+      else
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (addr) == REG);
+  return addr;
+}
+
+/* Output assembler code to perform a 32-bit 3-operand add.  */
+
+const char *
+output_addsi3 (rtx *operands)
+{
+  if (! operands_match_p (operands[0], operands[1]))
+    {
+      if (!ADDRESS_REG_P (operands[1]))
+	{
+	  rtx tmp = operands[1];
+
+	  operands[1] = operands[2];
+	  operands[2] = tmp;
+	}
+
+      /* These insns can result from reloads to access
+	 stack slots over 64k from the frame pointer.  */
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && (INTVAL (operands[2]) < -32768 || INTVAL (operands[2]) > 32767))
+        return "move%.l %2,%0\n\tadd%.l %1,%0";
+      if (GET_CODE (operands[2]) == REG)
+	return MOTOROLA ? "lea (%1,%2.l),%0" : "lea %1@(0,%2:l),%0";
+      return MOTOROLA ? "lea (%c2,%1),%0" : "lea %1@(%c2),%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) > 0
+	  && INTVAL (operands[2]) <= 8)
+	return "addq%.l %2,%0";
+      if (INTVAL (operands[2]) < 0
+	  && INTVAL (operands[2]) >= -8)
+        {
+	  operands[2] = GEN_INT (- INTVAL (operands[2]));
+	  return "subq%.l %2,%0";
+	}
+      /* On the CPU32 it is faster to use two addql instructions to
+	 add a small integer (8 < N <= 16) to a register.
+	 Likewise for subql.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[2]) > 8
+	      && INTVAL (operands[2]) <= 16)
+	    {
+	      operands[2] = GEN_INT (INTVAL (operands[2]) - 8);
+	      return "addq%.l #8,%0\n\taddq%.l %2,%0";
+	    }
+	  if (INTVAL (operands[2]) < -8
+	      && INTVAL (operands[2]) >= -16)
+	    {
+	      operands[2] = GEN_INT (- INTVAL (operands[2]) - 8);
+	      return "subq%.l #8,%0\n\tsubq%.l %2,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0])
+	  && INTVAL (operands[2]) >= -0x8000
+	  && INTVAL (operands[2]) < 0x8000)
+	{
+	  if (TUNE_68040)
+	    return "add%.w %2,%0";
+	  else
+	    return MOTOROLA ? "lea (%c2,%0),%0" : "lea %0@(%c2),%0";
+	}
+    }
+  return "add%.l %2,%0";
+}
+
+/* Store in cc_status the expressions that the condition codes will
+   describe after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+/* On the 68000, all the insns to store in an address register fail to
+   set the cc's.  However, in some cases these instructions can make it
+   possibly invalid to use the saved cc's.  In those cases we clear out
+   some or all of the saved cc's so they won't be used.  */
+
+void
+notice_update_cc (rtx exp, rtx insn)
+{
+  if (GET_CODE (exp) == SET)
+    {
+      if (GET_CODE (SET_SRC (exp)) == CALL)
+	CC_STATUS_INIT; 
+      else if (ADDRESS_REG_P (SET_DEST (exp)))
+	{
+	  if (cc_status.value1 && modified_in_p (cc_status.value1, insn))
+	    cc_status.value1 = 0;
+	  if (cc_status.value2 && modified_in_p (cc_status.value2, insn))
+	    cc_status.value2 = 0; 
+	}
+      /* fmoves to memory or data registers do not set the condition
+	 codes.  Normal moves _do_ set the condition codes, but not in
+	 a way that is appropriate for comparison with 0, because -0.0
+	 would be treated as a negative nonzero number.  Note that it
+	 isn't appropriate to conditionalize this restriction on
+	 HONOR_SIGNED_ZEROS because that macro merely indicates whether
+	 we care about the difference between -0.0 and +0.0.  */
+      else if (!FP_REG_P (SET_DEST (exp))
+	       && SET_DEST (exp) != cc0_rtx
+	       && (FP_REG_P (SET_SRC (exp))
+		   || GET_CODE (SET_SRC (exp)) == FIX
+		   || FLOAT_MODE_P (GET_MODE (SET_DEST (exp)))))
+	CC_STATUS_INIT; 
+      /* A pair of move insns doesn't produce a useful overall cc.  */
+      else if (!FP_REG_P (SET_DEST (exp))
+	       && !FP_REG_P (SET_SRC (exp))
+	       && GET_MODE_SIZE (GET_MODE (SET_SRC (exp))) > 4
+	       && (GET_CODE (SET_SRC (exp)) == REG
+		   || GET_CODE (SET_SRC (exp)) == MEM
+		   || GET_CODE (SET_SRC (exp)) == CONST_DOUBLE))
+	CC_STATUS_INIT; 
+      else if (SET_DEST (exp) != pc_rtx)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = SET_DEST (exp);
+	  cc_status.value2 = SET_SRC (exp);
+	}
+    }
+  else if (GET_CODE (exp) == PARALLEL
+	   && GET_CODE (XVECEXP (exp, 0, 0)) == SET)
+    {
+      rtx dest = SET_DEST (XVECEXP (exp, 0, 0));
+      rtx src  = SET_SRC  (XVECEXP (exp, 0, 0));
+
+      if (ADDRESS_REG_P (dest))
+	CC_STATUS_INIT;
+      else if (dest != pc_rtx)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = dest;
+	  cc_status.value2 = src;
+	}
+    }
+  else
+    CC_STATUS_INIT;
+  if (cc_status.value2 != 0
+      && ADDRESS_REG_P (cc_status.value2)
+      && GET_MODE (cc_status.value2) == QImode)
+    CC_STATUS_INIT;
+  if (cc_status.value2 != 0)
+    switch (GET_CODE (cc_status.value2))
+      {
+      case ASHIFT: case ASHIFTRT: case LSHIFTRT:
+      case ROTATE: case ROTATERT:
+	/* These instructions always clear the overflow bit, and set
+	   the carry to the bit shifted out.  */
+	cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
+	break;
+
+      case PLUS: case MINUS: case MULT:
+      case DIV: case UDIV: case MOD: case UMOD: case NEG:
+	if (GET_MODE (cc_status.value2) != VOIDmode)
+	  cc_status.flags |= CC_NO_OVERFLOW;
+	break;
+      case ZERO_EXTEND:
+	/* (SET r1 (ZERO_EXTEND r2)) on this machine
+	   ends with a move insn moving r2 in r2's mode.
+	   Thus, the cc's are set for r2.
+	   This can set N bit spuriously.  */
+	cc_status.flags |= CC_NOT_NEGATIVE; 
+
+      default:
+	break;
+      }
+  if (cc_status.value1 && GET_CODE (cc_status.value1) == REG
+      && cc_status.value2
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+    cc_status.value2 = 0;
+  /* Check for PRE_DEC in dest modifying a register used in src.  */
+  if (cc_status.value1 && GET_CODE (cc_status.value1) == MEM
+      && GET_CODE (XEXP (cc_status.value1, 0)) == PRE_DEC
+      && cc_status.value2
+      && reg_overlap_mentioned_p (XEXP (XEXP (cc_status.value1, 0), 0),
+				  cc_status.value2))
+    cc_status.value2 = 0;
+  if (((cc_status.value1 && FP_REG_P (cc_status.value1))
+       || (cc_status.value2 && FP_REG_P (cc_status.value2))))
+    cc_status.flags = CC_IN_68881;
+  if (cc_status.value2 && GET_CODE (cc_status.value2) == COMPARE
+      && GET_MODE_CLASS (GET_MODE (XEXP (cc_status.value2, 0))) == MODE_FLOAT)
+    {
+      cc_status.flags = CC_IN_68881;
+      if (!FP_REG_P (XEXP (cc_status.value2, 0))
+	  && FP_REG_P (XEXP (cc_status.value2, 1)))
+	cc_status.flags |= CC_REVERSED;
+    }
+}
+
+const char *
+output_move_const_double (rtx *operands)
+{
+  int code = standard_68881_constant_p (operands[1]);
+
+  if (code != 0)
+    {
+      static char buf[40];
+
+      sprintf (buf, "fmovecr #0x%x,%%0", code & 0xff);
+      return buf;
+    }
+  return "fmove%.d %1,%0";
+}
+
+const char *
+output_move_const_single (rtx *operands)
+{
+  int code = standard_68881_constant_p (operands[1]);
+
+  if (code != 0)
+    {
+      static char buf[40];
+
+      sprintf (buf, "fmovecr #0x%x,%%0", code & 0xff);
+      return buf;
+    }
+  return "fmove%.s %f1,%0";
+}
+
+/* Return nonzero if X, a CONST_DOUBLE, has a value that we can get
+   from the "fmovecr" instruction.
+   The value, anded with 0xff, gives the code to use in fmovecr
+   to get the desired constant.  */
+
+/* This code has been fixed for cross-compilation.  */
+  
+static int inited_68881_table = 0;
+
+static const char *const strings_68881[7] = {
+  "0.0",
+  "1.0",
+  "10.0",
+  "100.0",
+  "10000.0",
+  "1e8",
+  "1e16"
+};
+
+static const int codes_68881[7] = {
+  0x0f,
+  0x32,
+  0x33,
+  0x34,
+  0x35,
+  0x36,
+  0x37
+};
+
+REAL_VALUE_TYPE values_68881[7];
+
+/* Set up values_68881 array by converting the decimal values
+   strings_68881 to binary.  */
+
+void
+init_68881_table (void)
+{
+  int i;
+  REAL_VALUE_TYPE r;
+  enum machine_mode mode;
+
+  mode = SFmode;
+  for (i = 0; i < 7; i++)
+    {
+      if (i == 6)
+        mode = DFmode;
+      r = REAL_VALUE_ATOF (strings_68881[i], mode);
+      values_68881[i] = r;
+    }
+  inited_68881_table = 1;
+}
+
+int
+standard_68881_constant_p (rtx x)
+{
+  REAL_VALUE_TYPE r;
+  int i;
+
+  /* fmovecr must be emulated on the 68040 and 68060, so it shouldn't be
+     used at all on those chips.  */
+  if (TUNE_68040_60)
+    return 0;
+
+  if (! inited_68881_table)
+    init_68881_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* Use REAL_VALUES_IDENTICAL instead of REAL_VALUES_EQUAL so that -0.0
+     is rejected.  */
+  for (i = 0; i < 6; i++)
+    {
+      if (REAL_VALUES_IDENTICAL (r, values_68881[i]))
+        return (codes_68881[i]);
+    }
+  
+  if (GET_MODE (x) == SFmode)
+    return 0;
+
+  if (REAL_VALUES_EQUAL (r, values_68881[6]))
+    return (codes_68881[6]);
+
+  /* larger powers of ten in the constants ram are not used
+     because they are not equal to a `double' C constant.  */
+  return 0;
+}
+
+/* If X is a floating-point constant, return the logarithm of X base 2,
+   or 0 if X is not a power of 2.  */
+
+int
+floating_exact_log2 (rtx x)
+{
+  REAL_VALUE_TYPE r, r1;
+  int exp;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  if (REAL_VALUES_LESS (r, dconst1))
+    return 0;
+
+  exp = real_exponent (&r);
+  real_2expN (&r1, exp, DFmode);
+  if (REAL_VALUES_EQUAL (r1, r))
+    return exp;
+
+  return 0;
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  X is an RTL
+   expression.
+
+   CODE is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  CODE
+   comes from the `%' specification that was used to request
+   printing of the operand.  If the specification was just `%DIGIT'
+   then CODE is 0; if the specification was `%LTR DIGIT' then CODE
+   is the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.
+   The names can be found in an array `reg_names' whose type is
+   `char *[]'.  `reg_names' is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for CODE.
+
+   The m68k specific codes are:
+
+   '.' for dot needed in Motorola-style opcode names.
+   '-' for an operand pushing on the stack:
+       sp@-, -(sp) or -(%sp) depending on the style of syntax.
+   '+' for an operand pushing on the stack:
+       sp@+, (sp)+ or (%sp)+ depending on the style of syntax.
+   '@' for a reference to the top word on the stack:
+       sp@, (sp) or (%sp) depending on the style of syntax.
+   '#' for an immediate operand prefix (# in MIT and Motorola syntax
+       but & in SGS syntax).
+   '!' for the cc register (used in an `and to cc' insn).
+   '$' for the letter `s' in an op code, but only on the 68040.
+   '&' for the letter `d' in an op code, but only on the 68040.
+   '/' for register prefix needed by longlong.h.
+   '?' for m68k_library_id_string
+
+   'b' for byte insn (no effect, on the Sun; this is for the ISI).
+   'd' to force memory addressing to be absolute, not relative.
+   'f' for float insn (print a CONST_DOUBLE as a float rather than in hex)
+   'x' for float insn (print a CONST_DOUBLE as a float rather than in hex),
+       or print pair of registers as rx:ry.
+   'p' print an address with @PLTPC attached, but only if the operand
+       is not locally-bound.  */
+
+void
+print_operand (FILE *file, rtx op, int letter)
+{
+  if (letter == '.')
+    {
+      if (MOTOROLA)
+	fprintf (file, ".");
+    }
+  else if (letter == '#')
+    asm_fprintf (file, "%I");
+  else if (letter == '-')
+    asm_fprintf (file, MOTOROLA ? "-(%Rsp)" : "%Rsp@-");
+  else if (letter == '+')
+    asm_fprintf (file, MOTOROLA ? "(%Rsp)+" : "%Rsp@+");
+  else if (letter == '@')
+    asm_fprintf (file, MOTOROLA ? "(%Rsp)" : "%Rsp@");
+  else if (letter == '!')
+    asm_fprintf (file, "%Rfpcr");
+  else if (letter == '$')
+    {
+      if (TARGET_68040)
+	fprintf (file, "s");
+    }
+  else if (letter == '&')
+    {
+      if (TARGET_68040)
+	fprintf (file, "d");
+    }
+  else if (letter == '/')
+    asm_fprintf (file, "%R");
+  else if (letter == '?')
+    asm_fprintf (file, m68k_library_id_string);
+  else if (letter == 'p')
+    {
+      output_addr_const (file, op);
+      if (!(GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (op)))
+	fprintf (file, "@PLTPC");
+    }
+  else if (GET_CODE (op) == REG)
+    {
+      if (letter == 'R')
+	/* Print out the second register name of a register pair.
+	   I.e., R (6) => 7.  */
+	fputs (M68K_REGNAME(REGNO (op) + 1), file);
+      else
+	fputs (M68K_REGNAME(REGNO (op)), file);
+    }
+  else if (GET_CODE (op) == MEM)
+    {
+      output_address (XEXP (op, 0));
+      if (letter == 'd' && ! TARGET_68020
+	  && CONSTANT_ADDRESS_P (XEXP (op, 0))
+	  && !(GET_CODE (XEXP (op, 0)) == CONST_INT
+	       && INTVAL (XEXP (op, 0)) < 0x8000
+	       && INTVAL (XEXP (op, 0)) >= -0x8000))
+	fprintf (file, MOTOROLA ? ".l" : ":l");
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == SFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l;
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+      asm_fprintf (file, "%I0x%lx", l & 0xFFFFFFFF);
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == XFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l[3];
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+      asm_fprintf (file, "%I0x%lx%08lx%08lx", l[0] & 0xFFFFFFFF,
+		   l[1] & 0xFFFFFFFF, l[2] & 0xFFFFFFFF);
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == DFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l[2];
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+      asm_fprintf (file, "%I0x%lx%08lx", l[0] & 0xFFFFFFFF, l[1] & 0xFFFFFFFF);
+    }
+  else
+    {
+      /* Use `print_operand_address' instead of `output_addr_const'
+	 to ensure that we print relevant PIC stuff.  */
+      asm_fprintf (file, "%I");
+      if (TARGET_PCREL
+	  && (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST))
+	print_operand_address (file, op);
+      else
+	output_addr_const (file, op);
+    }
+}
+
+/* Return string for TLS relocation RELOC.  */
+
+static const char *
+m68k_get_reloc_decoration (enum m68k_reloc reloc)
+{
+  /* To my knowledge, !MOTOROLA assemblers don't support TLS.  */
+  gcc_assert (MOTOROLA || reloc == RELOC_GOT);
+
+  switch (reloc)
+    {
+    case RELOC_GOT:
+      if (MOTOROLA)
+	{
+	  if (flag_pic == 1 && TARGET_68020)
+	    return "@GOT.w";
+	  else
+	    return "@GOT";
+	}
+      else
+	{
+	  if (TARGET_68020)
+	    {
+	      switch (flag_pic)
+		{
+		case 1:
+		  return ":w";
+		case 2:
+		  return ":l";
+		default:
+		  return "";
+		}
+	    }
+	}
+
+    case RELOC_TLSGD:
+      return "@TLSGD";
+
+    case RELOC_TLSLDM:
+      return "@TLSLDM";
+
+    case RELOC_TLSLDO:
+      return "@TLSLDO";
+
+    case RELOC_TLSIE:
+      return "@TLSIE";
+
+    case RELOC_TLSLE:
+      return "@TLSLE";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* m68k implementation of TARGET_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+m68k_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_RELOC16:
+	case UNSPEC_RELOC32:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs (m68k_get_reloc_decoration
+		 ((enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1))), file);
+	  return true;
+
+	default:
+	  break;
+	}
+    }
+
+  return false;
+}
+
+/* M68K implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void
+m68k_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.long\t", file);
+  output_addr_const (file, x);
+  fputs ("@TLSLDO+0x8000", file);
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+m68k_delegitimize_address (rtx orig_x)
+{
+  rtx x;
+  struct m68k_address addr;
+  rtx unspec;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
+    return orig_x;
+
+  if (!m68k_decompose_address (GET_MODE (x), x, false, &addr)
+      || addr.offset == NULL_RTX
+      || GET_CODE (addr.offset) != CONST)
+    return orig_x;
+
+  unspec = XEXP (addr.offset, 0);
+  if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
+    unspec = XEXP (unspec, 0);
+  if (GET_CODE (unspec) != UNSPEC 
+      || (XINT (unspec, 1) != UNSPEC_RELOC16
+	  && XINT (unspec, 1) != UNSPEC_RELOC32))
+    return orig_x;
+  x = XVECEXP (unspec, 0, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF);
+  if (unspec != XEXP (addr.offset, 0))
+    x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.offset, 0), 1));
+  if (addr.index)
+    {
+      rtx idx = addr.index;
+      if (addr.scale != 1)
+	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
+      x = gen_rtx_PLUS (Pmode, idx, x);
+    }
+  if (addr.base)
+    x = gen_rtx_PLUS (Pmode, addr.base, x);
+  if (MEM_P (orig_x))
+    x = replace_equiv_address_nv (orig_x, x);
+  return x;
+}
+  
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   Note that this contains a kludge that knows that the only reason
+   we have an address (plus (label_ref...) (reg...)) when not generating
+   PIC code is in the insn before a tablejump, and we know that m68k.md
+   generates a label LInnn: on such an insn.
+
+   It is possible for PIC to generate a (plus (label_ref...) (reg...))
+   and we handle that just like we would a (plus (symbol_ref...) (reg...)).
+
+   This routine is responsible for distinguishing between -fpic and -fPIC 
+   style relocations in an address.  When generating -fpic code the
+   offset is output in word mode (e.g. movel a5@(_foo:w), a0).  When generating
+   -fPIC code the offset is output in long mode (e.g. movel a5@(_foo:l), a0) */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  struct m68k_address address;
+
+  if (!m68k_decompose_address (QImode, addr, true, &address))
+    gcc_unreachable ();
+
+  if (address.code == PRE_DEC)
+    fprintf (file, MOTOROLA ? "-(%s)" : "%s@-",
+	     M68K_REGNAME (REGNO (address.base)));
+  else if (address.code == POST_INC)
+    fprintf (file, MOTOROLA ? "(%s)+" : "%s@+",
+	     M68K_REGNAME (REGNO (address.base)));
+  else if (!address.base && !address.index)
+    {
+      /* A constant address.  */
+      gcc_assert (address.offset == addr);
+      if (GET_CODE (addr) == CONST_INT)
+	{
+	  /* (xxx).w or (xxx).l.  */
+	  if (IN_RANGE (INTVAL (addr), -0x8000, 0x7fff))
+	    fprintf (file, MOTOROLA ? "%d.w" : "%d:w", (int) INTVAL (addr));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
+	}
+      else if (TARGET_PCREL)
+	{
+	  /* (d16,PC) or (bd,PC,Xn) (with suppressed index register).  */
+	  fputc ('(', file);
+	  output_addr_const (file, addr);
+	  asm_fprintf (file, flag_pic == 1 ? ":w,%Rpc)" : ":l,%Rpc)");
+	}
+      else
+	{
+	  /* (xxx).l.  We need a special case for SYMBOL_REF if the symbol
+	     name ends in `.<letter>', as the last 2 characters can be
+	     mistaken as a size suffix.  Put the name in parentheses.  */
+	  if (GET_CODE (addr) == SYMBOL_REF
+	      && strlen (XSTR (addr, 0)) > 2
+	      && XSTR (addr, 0)[strlen (XSTR (addr, 0)) - 2] == '.')
+	    {
+	      putc ('(', file);
+	      output_addr_const (file, addr);
+	      putc (')', file);
+	    }
+	  else
+	    output_addr_const (file, addr);
+	}
+    }
+  else
+    {
+      int labelno;
+
+      /* If ADDR is a (d8,pc,Xn) address, this is the number of the
+	 label being accessed, otherwise it is -1.  */
+      labelno = (address.offset
+		 && !address.base
+		 && GET_CODE (address.offset) == LABEL_REF
+		 ? CODE_LABEL_NUMBER (XEXP (address.offset, 0))
+		 : -1);
+      if (MOTOROLA)
+	{
+	  /* Print the "offset(base" component.  */
+	  if (labelno >= 0)
+	    asm_fprintf (file, "%LL%d(%Rpc,", labelno);
+	  else
+	    {
+	      if (address.offset)
+		output_addr_const (file, address.offset);
+
+	      putc ('(', file);
+	      if (address.base)
+		fputs (M68K_REGNAME (REGNO (address.base)), file);
+	    }
+	  /* Print the ",index" component, if any.  */
+	  if (address.index)
+	    {
+	      if (address.base)
+		putc (',', file);
+	      fprintf (file, "%s.%c",
+		       M68K_REGNAME (REGNO (address.index)),
+		       GET_MODE (address.index) == HImode ? 'w' : 'l');
+	      if (address.scale != 1)
+		fprintf (file, "*%d", address.scale);
+	    }
+	  putc (')', file);
+	}
+      else /* !MOTOROLA */
+	{
+	  if (!address.offset && !address.index)
+	    fprintf (file, "%s@", M68K_REGNAME (REGNO (address.base)));
+	  else
+	    {
+	      /* Print the "base@(offset" component.  */
+	      if (labelno >= 0)
+		asm_fprintf (file, "%Rpc@(%LL%d", labelno);
+	      else
+		{
+		  if (address.base)
+		    fputs (M68K_REGNAME (REGNO (address.base)), file);
+		  fprintf (file, "@(");
+		  if (address.offset)
+		    output_addr_const (file, address.offset);
+		}
+	      /* Print the ",index" component, if any.  */
+	      if (address.index)
+		{
+		  fprintf (file, ",%s:%c",
+			   M68K_REGNAME (REGNO (address.index)),
+			   GET_MODE (address.index) == HImode ? 'w' : 'l');
+		  if (address.scale != 1)
+		    fprintf (file, ":%d", address.scale);
+		}
+	      putc (')', file);
+	    }
+	}
+    }
+}
+
+/* Check for cases where a clr insns can be omitted from code using
+   strict_low_part sets.  For example, the second clrl here is not needed:
+   clrl d0; movw a0@+,d0; use d0; clrl d0; movw a0@+; use d0; ...
+
+   MODE is the mode of this STRICT_LOW_PART set.  FIRST_INSN is the clear
+   insn we are checking for redundancy.  TARGET is the register set by the
+   clear insn.  */
+
+bool
+strict_low_part_peephole_ok (enum machine_mode mode, rtx first_insn,
+                             rtx target)
+{
+  rtx p = first_insn;
+
+  while ((p = PREV_INSN (p)))
+    {
+      if (NOTE_INSN_BASIC_BLOCK_P (p))
+	return false;
+
+      if (NOTE_P (p))
+	continue;
+
+      /* If it isn't an insn, then give up.  */
+      if (!INSN_P (p))
+	return false;
+
+      if (reg_set_p (target, p))
+	{
+	  rtx set = single_set (p);
+	  rtx dest;
+
+	  /* If it isn't an easy to recognize insn, then give up.  */
+	  if (! set)
+	    return false;
+
+	  dest = SET_DEST (set);
+
+	  /* If this sets the entire target register to zero, then our
+	     first_insn is redundant.  */
+	  if (rtx_equal_p (dest, target)
+	      && SET_SRC (set) == const0_rtx)
+	    return true;
+	  else if (GET_CODE (dest) == STRICT_LOW_PART
+		   && GET_CODE (XEXP (dest, 0)) == REG
+		   && REGNO (XEXP (dest, 0)) == REGNO (target)
+		   && (GET_MODE_SIZE (GET_MODE (XEXP (dest, 0)))
+		       <= GET_MODE_SIZE (mode)))
+	    /* This is a strict low part set which modifies less than
+	       we are using, so it is safe.  */
+	    ;
+	  else
+	    return false;
+	}
+    }
+
+  return false;
+}
+
+/* Operand predicates for implementing asymmetric pc-relative addressing
+   on m68k.  The m68k supports pc-relative addressing (mode 7, register 2)
+   when used as a source operand, but not as a destination operand.
+
+   We model this by restricting the meaning of the basic predicates
+   (general_operand, memory_operand, etc) to forbid the use of this
+   addressing mode, and then define the following predicates that permit
+   this addressing mode.  These predicates can then be used for the
+   source operands of the appropriate instructions.
+
+   n.b.  While it is theoretically possible to change all machine patterns
+   to use this addressing more where permitted by the architecture,
+   it has only been implemented for "common" cases: SImode, HImode, and
+   QImode operands, and only for the principle operations that would
+   require this addressing mode: data movement and simple integer operations.
+
+   In parallel with these new predicates, two new constraint letters
+   were defined: 'S' and 'T'.  'S' is the -mpcrel analog of 'm'.
+   'T' replaces 's' in the non-pcrel case.  It is a no-op in the pcrel case.
+   In the pcrel case 's' is only valid in combination with 'a' registers.
+   See addsi3, subsi3, cmpsi, and movsi patterns for a better understanding
+   of how these constraints are used.
+
+   The use of these predicates is strictly optional, though patterns that
+   don't will cause an extra reload register to be allocated where one
+   was not necessary:
+
+	lea (abc:w,%pc),%a0	; need to reload address
+	moveq &1,%d1		; since write to pc-relative space
+	movel %d1,%a0@		; is not allowed
+	...
+	lea (abc:w,%pc),%a1	; no need to reload address here
+	movel %a1@,%d0		; since "movel (abc:w,%pc),%d0" is ok
+
+   For more info, consult tiemann@cygnus.com.
+
+
+   All of the ugliness with predicates and constraints is due to the
+   simple fact that the m68k does not allow a pc-relative addressing
+   mode as a destination.  gcc does not distinguish between source and
+   destination addresses.  Hence, if we claim that pc-relative address
+   modes are valid, e.g. TARGET_LEGITIMATE_ADDRESS_P accepts them, then we
+   end up with invalid code.  To get around this problem, we left
+   pc-relative modes as invalid addresses, and then added special
+   predicates and constraints to accept them.
+
+   A cleaner way to handle this is to modify gcc to distinguish
+   between source and destination addresses.  We can then say that
+   pc-relative is a valid source address but not a valid destination
+   address, and hopefully avoid a lot of the predicate and constraint
+   hackery.  Unfortunately, this would be a pretty big change.  It would
+   be a useful change for a number of ports, but there aren't any current
+   plans to undertake this.
+
+   ***************************************************************************/
+
+
+const char *
+output_andsi3 (rtx *operands)
+{
+  int logval;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) | 0xffff) == -1
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0]))
+      && !TARGET_COLDFIRE)
+    {
+      if (GET_CODE (operands[0]) != REG)
+        operands[0] = adjust_address (operands[0], HImode, 2);
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffff);
+      /* Do not delete a following tstl %0 insn; that would be incorrect.  */
+      CC_STATUS_INIT;
+      if (operands[2] == const0_rtx)
+        return "clr%.w %0";
+      return "and%.w %2,%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (logval = exact_log2 (~ INTVAL (operands[2]) & 0xffffffff)) >= 0
+      && (DATA_REG_P (operands[0])
+          || offsettable_memref_p (operands[0])))
+    {
+      if (DATA_REG_P (operands[0]))
+	operands[1] = GEN_INT (logval);
+      else
+        {
+	  operands[0] = adjust_address (operands[0], SImode, 3 - (logval / 8));
+	  operands[1] = GEN_INT (logval % 8);
+        }
+      /* This does not set condition codes in a standard way.  */
+      CC_STATUS_INIT;
+      return "bclr %1,%0";
+    }
+  return "and%.l %2,%0";
+}
+
+const char *
+output_iorsi3 (rtx *operands)
+{
+  register int logval;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >> 16 == 0
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0]))
+      && !TARGET_COLDFIRE)
+    {
+      if (GET_CODE (operands[0]) != REG)
+        operands[0] = adjust_address (operands[0], HImode, 2);
+      /* Do not delete a following tstl %0 insn; that would be incorrect.  */
+      CC_STATUS_INIT;
+      if (INTVAL (operands[2]) == 0xffff)
+	return "mov%.w %2,%0";
+      return "or%.w %2,%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (logval = exact_log2 (INTVAL (operands[2]) & 0xffffffff)) >= 0
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0])))
+    {
+      if (DATA_REG_P (operands[0]))
+	operands[1] = GEN_INT (logval);
+      else
+        {
+	  operands[0] = adjust_address (operands[0], SImode, 3 - (logval / 8));
+	  operands[1] = GEN_INT (logval % 8);
+	}
+      CC_STATUS_INIT;
+      return "bset %1,%0";
+    }
+  return "or%.l %2,%0";
+}
+
+const char *
+output_xorsi3 (rtx *operands)
+{
+  register int logval;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >> 16 == 0
+      && (offsettable_memref_p (operands[0]) || DATA_REG_P (operands[0]))
+      && !TARGET_COLDFIRE)
+    {
+      if (! DATA_REG_P (operands[0]))
+	operands[0] = adjust_address (operands[0], HImode, 2);
+      /* Do not delete a following tstl %0 insn; that would be incorrect.  */
+      CC_STATUS_INIT;
+      if (INTVAL (operands[2]) == 0xffff)
+	return "not%.w %0";
+      return "eor%.w %2,%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (logval = exact_log2 (INTVAL (operands[2]) & 0xffffffff)) >= 0
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0])))
+    {
+      if (DATA_REG_P (operands[0]))
+	operands[1] = GEN_INT (logval);
+      else
+        {
+	  operands[0] = adjust_address (operands[0], SImode, 3 - (logval / 8));
+	  operands[1] = GEN_INT (logval % 8);
+	}
+      CC_STATUS_INIT;
+      return "bchg %1,%0";
+    }
+  return "eor%.l %2,%0";
+}
+
+/* Return the instruction that should be used for a call to address X,
+   which is known to be in operand 0.  */
+
+const char *
+output_call (rtx x)
+{
+  if (symbolic_operand (x, VOIDmode))
+    return m68k_symbolic_call;
+  else
+    return "jsr %a0";
+}
+
+/* Likewise sibling calls.  */
+
+const char *
+output_sibcall (rtx x)
+{
+  if (symbolic_operand (x, VOIDmode))
+    return m68k_symbolic_jump;
+  else
+    return "jmp %a0";
+}
+
+static void
+m68k_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx this_slot, offset, addr, mem, insn, tmp;
+
+  /* Avoid clobbering the struct value reg by using the
+     static chain reg as a temporary.  */
+  tmp = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* The "this" pointer is stored at 4(%sp).  */
+  this_slot = gen_rtx_MEM (Pmode, plus_constant (Pmode,
+						 stack_pointer_rtx, 4));
+
+  /* Add DELTA to THIS.  */
+  if (delta != 0)
+    {
+      /* Make the offset a legitimate operand for memory addition.  */
+      offset = GEN_INT (delta);
+      if ((delta < -8 || delta > 8)
+	  && (TARGET_COLDFIRE || USE_MOVQ (delta)))
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, D0_REG), offset);
+	  offset = gen_rtx_REG (Pmode, D0_REG);
+	}
+      emit_insn (gen_add3_insn (copy_rtx (this_slot),
+				copy_rtx (this_slot), offset));
+    }
+
+  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
+  if (vcall_offset != 0)
+    {
+      /* Set the static chain register to *THIS.  */
+      emit_move_insn (tmp, this_slot);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+
+      /* Set ADDR to a legitimate address for *THIS + VCALL_OFFSET.  */
+      addr = plus_constant (Pmode, tmp, vcall_offset);
+      if (!m68k_legitimate_address_p (Pmode, addr, true))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, tmp, addr));
+	  addr = tmp;
+	}
+
+      /* Load the offset into %d0 and add it to THIS.  */
+      emit_move_insn (gen_rtx_REG (Pmode, D0_REG),
+		      gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (copy_rtx (this_slot),
+				copy_rtx (this_slot),
+				gen_rtx_REG (Pmode, D0_REG)));
+    }
+
+  /* Jump to the target function.  Use a sibcall if direct jumps are
+     allowed, otherwise load the address into a register first.  */
+  mem = DECL_RTL (function);
+  if (!sibcall_operand (XEXP (mem, 0), VOIDmode))
+    {
+      gcc_assert (flag_pic);
+
+      if (!TARGET_SEP_DATA)
+	{
+	  /* Use the static chain register as a temporary (call-clobbered)
+	     GOT pointer for this function.  We can use the static chain
+	     register because it isn't live on entry to the thunk.  */
+	  SET_REGNO (pic_offset_table_rtx, STATIC_CHAIN_REGNUM);
+	  emit_insn (gen_load_got (pic_offset_table_rtx));
+	}
+      legitimize_pic_address (XEXP (mem, 0), Pmode, tmp);
+      mem = replace_equiv_address (mem, tmp);
+    }
+  insn = emit_call_insn (gen_sibcall (mem, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation.  */
+  insn = get_insns ();
+  split_all_insns_noflow ();
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Clean up the vars set above.  */
+  reload_completed = 0;
+
+  /* Restore the original PIC register.  */
+  if (flag_pic)
+    SET_REGNO (pic_offset_table_rtx, PIC_REG);
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+m68k_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, M68K_STRUCT_VALUE_REGNUM);
+}
+
+/* Return nonzero if register old_reg can be renamed to register new_reg.  */
+int
+m68k_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			   unsigned int new_reg)
+{
+
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if ((m68k_get_function_kind (current_function_decl)
+       == m68k_fk_interrupt_handler)
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Value is true if hard register REGNO can hold a value of machine-mode
+   MODE.  On the 68000, we let the cpu registers can hold any mode, but
+   restrict the 68881 registers to floating-point modes.  */
+
+bool
+m68k_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  if (DATA_REGNO_P (regno))
+    {
+      /* Data Registers, can hold aggregate if fits in.  */
+      if (regno + GET_MODE_SIZE (mode) / 4 <= 8)
+	return true;
+    }
+  else if (ADDRESS_REGNO_P (regno))
+    {
+      if (regno + GET_MODE_SIZE (mode) / 4 <= 16)
+	return true;
+    }
+  else if (FP_REGNO_P (regno))
+    {
+      /* FPU registers, hold float or complex float of long double or
+	 smaller.  */
+      if ((GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	  && GET_MODE_UNIT_SIZE (mode) <= TARGET_FP_REG_SIZE)
+	return true;
+    }
+  return false;
+}
+
+/* Implement SECONDARY_RELOAD_CLASS.  */
+
+enum reg_class
+m68k_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode, rtx x)
+{
+  int regno;
+
+  regno = true_regnum (x);
+
+  /* If one operand of a movqi is an address register, the other
+     operand must be a general register or constant.  Other types
+     of operand must be reloaded through a data register.  */
+  if (GET_MODE_SIZE (mode) == 1
+      && reg_classes_intersect_p (rclass, ADDR_REGS)
+      && !(INT_REGNO_P (regno) || CONSTANT_P (x)))
+    return DATA_REGS;
+
+  /* PC-relative addresses must be loaded into an address register first.  */
+  if (TARGET_PCREL
+      && !reg_class_subset_p (rclass, ADDR_REGS)
+      && symbolic_operand (x, VOIDmode))
+    return ADDR_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement PREFERRED_RELOAD_CLASS.  */
+
+enum reg_class
+m68k_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  enum reg_class secondary_class;
+
+  /* If RCLASS might need a secondary reload, try restricting it to
+     a class that doesn't.  */
+  secondary_class = m68k_secondary_reload_class (rclass, GET_MODE (x), x);
+  if (secondary_class != NO_REGS
+      && reg_class_subset_p (secondary_class, rclass))
+    return secondary_class;
+
+  /* Prefer to use moveq for in-range constants.  */
+  if (GET_CODE (x) == CONST_INT
+      && reg_class_subset_p (DATA_REGS, rclass)
+      && IN_RANGE (INTVAL (x), -0x80, 0x7f))
+    return DATA_REGS;
+
+  /* ??? Do we really need this now?  */
+  if (GET_CODE (x) == CONST_DOUBLE
+      && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      if (TARGET_HARD_FLOAT && reg_class_subset_p (FP_REGS, rclass))
+	return FP_REGS;
+
+      return NO_REGS;
+    }
+
+  return rclass;
+}
+
+/* Return floating point values in a 68881 register.  This makes 68881 code
+   a little bit faster.  It also makes -msoft-float code incompatible with
+   hard-float code, so people have to be careful not to mix the two.
+   For ColdFire it was decided the ABI incompatibility is undesirable.
+   If there is need for a hard-float ABI it is probably worth doing it
+   properly and also passing function arguments in FP registers.  */
+rtx
+m68k_libcall_value (enum machine_mode mode)
+{
+  switch (mode) {
+  case SFmode:
+  case DFmode:
+  case XFmode:
+    if (TARGET_68881)
+      return gen_rtx_REG (mode, FP0_REG);
+    break;
+  default:
+    break;
+  }
+
+  return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG);
+}
+
+/* Location in which function value is returned.
+   NOTE: Due to differences in ABIs, don't call this function directly,
+   use FUNCTION_VALUE instead.  */
+rtx
+m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+
+  mode = TYPE_MODE (valtype);
+  switch (mode) {
+  case SFmode:
+  case DFmode:
+  case XFmode:
+    if (TARGET_68881)
+      return gen_rtx_REG (mode, FP0_REG);
+    break;
+  default:
+    break;
+  }
+
+  /* If the function returns a pointer, push that into %a0.  */
+  if (func && POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (func))))
+    /* For compatibility with the large body of existing code which
+       does not always properly declare external functions returning
+       pointer types, the m68k/SVR4 convention is to copy the value
+       returned for pointer functions from a0 to d0 in the function
+       epilogue, so that callers that have neglected to properly
+       declare the callee can still find the correct return value in
+       d0.  */
+    return gen_rtx_PARALLEL
+      (mode,
+       gen_rtvec (2,
+		  gen_rtx_EXPR_LIST (VOIDmode,
+				     gen_rtx_REG (mode, A0_REG),
+				     const0_rtx),
+		  gen_rtx_EXPR_LIST (VOIDmode,
+				     gen_rtx_REG (mode, D0_REG),
+				     const0_rtx)));
+  else if (POINTER_TYPE_P (valtype))
+    return gen_rtx_REG (mode, A0_REG);
+  else
+    return gen_rtx_REG (mode, D0_REG);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
+static bool
+m68k_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (mode == BLKmode)
+    return true;
+
+  /* If TYPE's known alignment is less than the alignment of MODE that
+     would contain the structure, then return in memory.  We need to
+     do so to maintain the compatibility between code compiled with
+     -mstrict-align and that compiled with -mno-strict-align.  */
+  if (AGGREGATE_TYPE_P (type)
+      && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (mode))
+    return true;
+
+  return false;
+}
+#endif
+
+/* CPU to schedule the program for.  */
+enum attr_cpu m68k_sched_cpu;
+
+/* MAC to schedule the program for.  */
+enum attr_mac m68k_sched_mac;
+
+/* Operand type.  */
+enum attr_op_type
+  {
+    /* No operand.  */
+    OP_TYPE_NONE,
+
+    /* Integer register.  */
+    OP_TYPE_RN,
+
+    /* FP register.  */
+    OP_TYPE_FPN,
+
+    /* Implicit mem reference (e.g. stack).  */
+    OP_TYPE_MEM1,
+
+    /* Memory without offset or indexing.  EA modes 2, 3 and 4.  */
+    OP_TYPE_MEM234,
+
+    /* Memory with offset but without indexing.  EA mode 5.  */
+    OP_TYPE_MEM5,
+
+    /* Memory with indexing.  EA mode 6.  */
+    OP_TYPE_MEM6,
+
+    /* Memory referenced by absolute address.  EA mode 7.  */
+    OP_TYPE_MEM7,
+
+    /* Immediate operand that doesn't require extension word.  */
+    OP_TYPE_IMM_Q,
+
+    /* Immediate 16 bit operand.  */
+    OP_TYPE_IMM_W,
+
+    /* Immediate 32 bit operand.  */
+    OP_TYPE_IMM_L
+  };
+
+/* Return type of memory ADDR_RTX refers to.  */
+static enum attr_op_type
+sched_address_type (enum machine_mode mode, rtx addr_rtx)
+{
+  struct m68k_address address;
+
+  if (symbolic_operand (addr_rtx, VOIDmode))
+    return OP_TYPE_MEM7;
+
+  if (!m68k_decompose_address (mode, addr_rtx,
+			       reload_completed, &address))
+    {
+      gcc_assert (!reload_completed);
+      /* Reload will likely fix the address to be in the register.  */
+      return OP_TYPE_MEM234;
+    }
+
+  if (address.scale != 0)
+    return OP_TYPE_MEM6;
+
+  if (address.base != NULL_RTX)
+    {
+      if (address.offset == NULL_RTX)
+	return OP_TYPE_MEM234;
+
+      return OP_TYPE_MEM5;
+    }
+
+  gcc_assert (address.offset != NULL_RTX);
+
+  return OP_TYPE_MEM7;
+}
+
+/* Return X or Y (depending on OPX_P) operand of INSN.  */
+static rtx
+sched_get_operand (rtx insn, bool opx_p)
+{
+  int i;
+
+  if (recog_memoized (insn) < 0)
+    gcc_unreachable ();
+
+  extract_constrain_insn_cached (insn);
+
+  if (opx_p)
+    i = get_attr_opx (insn);
+  else
+    i = get_attr_opy (insn);
+
+  if (i >= recog_data.n_operands)
+    return NULL;
+
+  return recog_data.operand[i];
+}
+
+/* Return type of INSN's operand X (if OPX_P) or operand Y (if !OPX_P).
+   If ADDRESS_P is true, return type of memory location operand refers to.  */
+static enum attr_op_type
+sched_attr_op_type (rtx insn, bool opx_p, bool address_p)
+{
+  rtx op;
+
+  op = sched_get_operand (insn, opx_p);
+
+  if (op == NULL)
+    {
+      gcc_assert (!reload_completed);
+      return OP_TYPE_RN;
+    }
+
+  if (address_p)
+    return sched_address_type (QImode, op);
+
+  if (memory_operand (op, VOIDmode))
+    return sched_address_type (GET_MODE (op), XEXP (op, 0));
+
+  if (register_operand (op, VOIDmode))
+    {
+      if ((!reload_completed && FLOAT_MODE_P (GET_MODE (op)))
+	  || (reload_completed && FP_REG_P (op)))
+	return OP_TYPE_FPN;
+
+      return OP_TYPE_RN;
+    }
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      int ival;
+
+      ival = INTVAL (op);
+
+      /* Check for quick constants.  */
+      switch (get_attr_type (insn))
+	{
+	case TYPE_ALUQ_L:
+	  if (IN_RANGE (ival, 1, 8) || IN_RANGE (ival, -8, -1))
+	    return OP_TYPE_IMM_Q;
+
+	  gcc_assert (!reload_completed);
+	  break;
+
+	case TYPE_MOVEQ_L:
+	  if (USE_MOVQ (ival))
+	    return OP_TYPE_IMM_Q;
+
+	  gcc_assert (!reload_completed);
+	  break;
+
+	case TYPE_MOV3Q_L:
+	  if (valid_mov3q_const (ival))
+	    return OP_TYPE_IMM_Q;
+
+	  gcc_assert (!reload_completed);
+	  break;
+
+	default:
+	  break;
+	}
+
+      if (IN_RANGE (ival, -0x8000, 0x7fff))
+	return OP_TYPE_IMM_W;
+
+      return OP_TYPE_IMM_L;
+    }
+
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      switch (GET_MODE (op))
+	{
+	case SFmode:
+	  return OP_TYPE_IMM_W;
+
+	case VOIDmode:
+	case DFmode:
+	  return OP_TYPE_IMM_L;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (GET_CODE (op) == CONST
+      || symbolic_operand (op, VOIDmode)
+      || LABEL_P (op))
+    {
+      switch (GET_MODE (op))
+	{
+	case QImode:
+	  return OP_TYPE_IMM_Q;
+
+	case HImode:
+	  return OP_TYPE_IMM_W;
+
+	case SImode:
+	  return OP_TYPE_IMM_L;
+
+	default:
+	  if (symbolic_operand (m68k_unwrap_symbol (op, false), VOIDmode))
+	    /* Just a guess.  */
+	    return OP_TYPE_IMM_W;
+
+	  return OP_TYPE_IMM_L;
+	}
+    }
+
+  gcc_assert (!reload_completed);
+
+  if (FLOAT_MODE_P (GET_MODE (op)))
+    return OP_TYPE_FPN;
+
+  return OP_TYPE_RN;
+}
+
+/* Implement opx_type attribute.
+   Return type of INSN's operand X.
+   If ADDRESS_P is true, return type of memory location operand refers to.  */
+enum attr_opx_type
+m68k_sched_attr_opx_type (rtx insn, int address_p)
+{
+  switch (sched_attr_op_type (insn, true, address_p != 0))
+    {
+    case OP_TYPE_RN:
+      return OPX_TYPE_RN;
+
+    case OP_TYPE_FPN:
+      return OPX_TYPE_FPN;
+
+    case OP_TYPE_MEM1:
+      return OPX_TYPE_MEM1;
+
+    case OP_TYPE_MEM234:
+      return OPX_TYPE_MEM234;
+
+    case OP_TYPE_MEM5:
+      return OPX_TYPE_MEM5;
+
+    case OP_TYPE_MEM6:
+      return OPX_TYPE_MEM6;
+
+    case OP_TYPE_MEM7:
+      return OPX_TYPE_MEM7;
+
+    case OP_TYPE_IMM_Q:
+      return OPX_TYPE_IMM_Q;
+
+    case OP_TYPE_IMM_W:
+      return OPX_TYPE_IMM_W;
+
+    case OP_TYPE_IMM_L:
+      return OPX_TYPE_IMM_L;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement opy_type attribute.
+   Return type of INSN's operand Y.
+   If ADDRESS_P is true, return type of memory location operand refers to.  */
+enum attr_opy_type
+m68k_sched_attr_opy_type (rtx insn, int address_p)
+{
+  switch (sched_attr_op_type (insn, false, address_p != 0))
+    {
+    case OP_TYPE_RN:
+      return OPY_TYPE_RN;
+
+    case OP_TYPE_FPN:
+      return OPY_TYPE_FPN;
+
+    case OP_TYPE_MEM1:
+      return OPY_TYPE_MEM1;
+
+    case OP_TYPE_MEM234:
+      return OPY_TYPE_MEM234;
+
+    case OP_TYPE_MEM5:
+      return OPY_TYPE_MEM5;
+
+    case OP_TYPE_MEM6:
+      return OPY_TYPE_MEM6;
+
+    case OP_TYPE_MEM7:
+      return OPY_TYPE_MEM7;
+
+    case OP_TYPE_IMM_Q:
+      return OPY_TYPE_IMM_Q;
+
+    case OP_TYPE_IMM_W:
+      return OPY_TYPE_IMM_W;
+
+    case OP_TYPE_IMM_L:
+      return OPY_TYPE_IMM_L;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return size of INSN as int.  */
+static int
+sched_get_attr_size_int (rtx insn)
+{
+  int size;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IGNORE:
+      /* There should be no references to m68k_sched_attr_size for 'ignore'
+	 instructions.  */
+      gcc_unreachable ();
+      return 0;
+
+    case TYPE_MUL_L:
+      size = 2;
+      break;
+
+    default:
+      size = 1;
+      break;
+    }
+
+  switch (get_attr_opx_type (insn))
+    {
+    case OPX_TYPE_NONE:
+    case OPX_TYPE_RN:
+    case OPX_TYPE_FPN:
+    case OPX_TYPE_MEM1:
+    case OPX_TYPE_MEM234:
+    case OPY_TYPE_IMM_Q:
+      break;
+
+    case OPX_TYPE_MEM5:
+    case OPX_TYPE_MEM6:
+      /* Here we assume that most absolute references are short.  */
+    case OPX_TYPE_MEM7:
+    case OPY_TYPE_IMM_W:
+      ++size;
+      break;
+
+    case OPY_TYPE_IMM_L:
+      size += 2;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (get_attr_opy_type (insn))
+    {
+    case OPY_TYPE_NONE:
+    case OPY_TYPE_RN:
+    case OPY_TYPE_FPN:
+    case OPY_TYPE_MEM1:
+    case OPY_TYPE_MEM234:
+    case OPY_TYPE_IMM_Q:
+      break;
+
+    case OPY_TYPE_MEM5:
+    case OPY_TYPE_MEM6:
+      /* Here we assume that most absolute references are short.  */
+    case OPY_TYPE_MEM7:
+    case OPY_TYPE_IMM_W:
+      ++size;
+      break;
+
+    case OPY_TYPE_IMM_L:
+      size += 2;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (size > 3)
+    {
+      gcc_assert (!reload_completed);
+
+      size = 3;
+    }
+
+  return size;
+}
+
+/* Return size of INSN as attribute enum value.  */
+enum attr_size
+m68k_sched_attr_size (rtx insn)
+{
+  switch (sched_get_attr_size_int (insn))
+    {
+    case 1:
+      return SIZE_1;
+
+    case 2:
+      return SIZE_2;
+
+    case 3:
+      return SIZE_3;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return operand X or Y (depending on OPX_P) of INSN,
+   if it is a MEM, or NULL overwise.  */
+static enum attr_op_type
+sched_get_opxy_mem_type (rtx insn, bool opx_p)
+{
+  if (opx_p)
+    {
+      switch (get_attr_opx_type (insn))
+	{
+	case OPX_TYPE_NONE:
+	case OPX_TYPE_RN:
+	case OPX_TYPE_FPN:
+	case OPX_TYPE_IMM_Q:
+	case OPX_TYPE_IMM_W:
+	case OPX_TYPE_IMM_L:
+	  return OP_TYPE_RN;
+
+	case OPX_TYPE_MEM1:
+	case OPX_TYPE_MEM234:
+	case OPX_TYPE_MEM5:
+	case OPX_TYPE_MEM7:
+	  return OP_TYPE_MEM1;
+
+	case OPX_TYPE_MEM6:
+	  return OP_TYPE_MEM6;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (get_attr_opy_type (insn))
+	{
+	case OPY_TYPE_NONE:
+	case OPY_TYPE_RN:
+	case OPY_TYPE_FPN:
+	case OPY_TYPE_IMM_Q:
+	case OPY_TYPE_IMM_W:
+	case OPY_TYPE_IMM_L:
+	  return OP_TYPE_RN;
+
+	case OPY_TYPE_MEM1:
+	case OPY_TYPE_MEM234:
+	case OPY_TYPE_MEM5:
+	case OPY_TYPE_MEM7:
+	  return OP_TYPE_MEM1;
+
+	case OPY_TYPE_MEM6:
+	  return OP_TYPE_MEM6;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}
+
+/* Implement op_mem attribute.  */
+enum attr_op_mem
+m68k_sched_attr_op_mem (rtx insn)
+{
+  enum attr_op_type opx;
+  enum attr_op_type opy;
+
+  opx = sched_get_opxy_mem_type (insn, true);
+  opy = sched_get_opxy_mem_type (insn, false);
+
+  if (opy == OP_TYPE_RN && opx == OP_TYPE_RN)
+    return OP_MEM_00;
+
+  if (opy == OP_TYPE_RN && opx == OP_TYPE_MEM1)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_R:
+	  return OP_MEM_10;
+
+	case OPX_ACCESS_W:
+	  return OP_MEM_01;
+
+	case OPX_ACCESS_RW:
+	  return OP_MEM_11;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (opy == OP_TYPE_RN && opx == OP_TYPE_MEM6)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_R:
+	  return OP_MEM_I0;
+
+	case OPX_ACCESS_W:
+	  return OP_MEM_0I;
+
+	case OPX_ACCESS_RW:
+	  return OP_MEM_I1;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (opy == OP_TYPE_MEM1 && opx == OP_TYPE_RN)
+    return OP_MEM_10;
+
+  if (opy == OP_TYPE_MEM1 && opx == OP_TYPE_MEM1)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_W:
+	  return OP_MEM_11;
+
+	default:
+	  gcc_assert (!reload_completed);
+	  return OP_MEM_11;
+	}
+    }
+
+  if (opy == OP_TYPE_MEM1 && opx == OP_TYPE_MEM6)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_W:
+	  return OP_MEM_1I;
+
+	default:
+	  gcc_assert (!reload_completed);
+	  return OP_MEM_1I;
+	}
+    }
+
+  if (opy == OP_TYPE_MEM6 && opx == OP_TYPE_RN)
+    return OP_MEM_I0;
+
+  if (opy == OP_TYPE_MEM6 && opx == OP_TYPE_MEM1)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_W:
+	  return OP_MEM_I1;
+
+	default:
+	  gcc_assert (!reload_completed);
+	  return OP_MEM_I1;
+	}
+    }
+
+  gcc_assert (opy == OP_TYPE_MEM6 && opx == OP_TYPE_MEM6);
+  gcc_assert (!reload_completed);
+  return OP_MEM_I1;
+}
+
+/* Data for ColdFire V4 index bypass.
+   Producer modifies register that is used as index in consumer with
+   specified scale.  */
+static struct
+{
+  /* Producer instruction.  */
+  rtx pro;
+
+  /* Consumer instruction.  */
+  rtx con;
+
+  /* Scale of indexed memory access within consumer.
+     Or zero if bypass should not be effective at the moment.  */
+  int scale;
+} sched_cfv4_bypass_data;
+
+/* An empty state that is used in m68k_sched_adjust_cost.  */
+static state_t sched_adjust_cost_state;
+
+/* Implement adjust_cost scheduler hook.
+   Return adjusted COST of dependency LINK between DEF_INSN and INSN.  */
+static int
+m68k_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx def_insn,
+			int cost)
+{
+  int delay;
+
+  if (recog_memoized (def_insn) < 0
+      || recog_memoized (insn) < 0)
+    return cost;
+
+  if (sched_cfv4_bypass_data.scale == 1)
+    /* Handle ColdFire V4 bypass for indexed address with 1x scale.  */
+    {
+      /* haifa-sched.c: insn_cost () calls bypass_p () just before
+	 targetm.sched.adjust_cost ().  Hence, we can be relatively sure
+	 that the data in sched_cfv4_bypass_data is up to date.  */
+      gcc_assert (sched_cfv4_bypass_data.pro == def_insn
+		  && sched_cfv4_bypass_data.con == insn);
+
+      if (cost < 3)
+	cost = 3;
+
+      sched_cfv4_bypass_data.pro = NULL;
+      sched_cfv4_bypass_data.con = NULL;
+      sched_cfv4_bypass_data.scale = 0;
+    }
+  else
+    gcc_assert (sched_cfv4_bypass_data.pro == NULL
+		&& sched_cfv4_bypass_data.con == NULL
+		&& sched_cfv4_bypass_data.scale == 0);
+
+  /* Don't try to issue INSN earlier than DFA permits.
+     This is especially useful for instructions that write to memory,
+     as their true dependence (default) latency is better to be set to 0
+     to workaround alias analysis limitations.
+     This is, in fact, a machine independent tweak, so, probably,
+     it should be moved to haifa-sched.c: insn_cost ().  */
+  delay = min_insn_conflict_delay (sched_adjust_cost_state, def_insn, insn);
+  if (delay > cost)
+    cost = delay;
+
+  return cost;
+}
+
+/* Return maximal number of insns that can be scheduled on a single cycle.  */
+static int
+m68k_sched_issue_rate (void)
+{
+  switch (m68k_sched_cpu)
+    {
+    case CPU_CFV1:
+    case CPU_CFV2:
+    case CPU_CFV3:
+      return 1;
+
+    case CPU_CFV4:
+      return 2;
+
+    default:
+      gcc_unreachable ();
+      return 0;
+    }
+}
+
+/* Maximal length of instruction for current CPU.
+   E.g. it is 3 for any ColdFire core.  */
+static int max_insn_size;
+
+/* Data to model instruction buffer of CPU.  */
+struct _sched_ib
+{
+  /* True if instruction buffer model is modeled for current CPU.  */
+  bool enabled_p;
+
+  /* Size of the instruction buffer in words.  */
+  int size;
+
+  /* Number of filled words in the instruction buffer.  */
+  int filled;
+
+  /* Additional information about instruction buffer for CPUs that have
+     a buffer of instruction records, rather then a plain buffer
+     of instruction words.  */
+  struct _sched_ib_records
+  {
+    /* Size of buffer in records.  */
+    int n_insns;
+
+    /* Array to hold data on adjustements made to the size of the buffer.  */
+    int *adjust;
+
+    /* Index of the above array.  */
+    int adjust_index;
+  } records;
+
+  /* An insn that reserves (marks empty) one word in the instruction buffer.  */
+  rtx insn;
+};
+
+static struct _sched_ib sched_ib;
+
+/* ID of memory unit.  */
+static int sched_mem_unit_code;
+
+/* Implementation of the targetm.sched.variable_issue () hook.
+   It is called after INSN was issued.  It returns the number of insns
+   that can possibly get scheduled on the current cycle.
+   It is used here to determine the effect of INSN on the instruction
+   buffer.  */
+static int
+m68k_sched_variable_issue (FILE *sched_dump ATTRIBUTE_UNUSED,
+			   int sched_verbose ATTRIBUTE_UNUSED,
+			   rtx insn, int can_issue_more)
+{
+  int insn_size;
+
+  if (recog_memoized (insn) >= 0 && get_attr_type (insn) != TYPE_IGNORE)
+    {
+      switch (m68k_sched_cpu)
+	{
+	case CPU_CFV1:
+	case CPU_CFV2:
+	  insn_size = sched_get_attr_size_int (insn);
+	  break;
+
+	case CPU_CFV3:
+	  insn_size = sched_get_attr_size_int (insn);
+	  
+	  /* ColdFire V3 and V4 cores have instruction buffers that can
+	     accumulate up to 8 instructions regardless of instructions'
+	     sizes.  So we should take care not to "prefetch" 24 one-word
+	     or 12 two-words instructions.
+	     To model this behavior we temporarily decrease size of the
+	     buffer by (max_insn_size - insn_size) for next 7 instructions.  */
+	  {
+	    int adjust;
+
+	    adjust = max_insn_size - insn_size;
+	    sched_ib.size -= adjust;
+
+	    if (sched_ib.filled > sched_ib.size)
+	      sched_ib.filled = sched_ib.size;
+
+	    sched_ib.records.adjust[sched_ib.records.adjust_index] = adjust;
+	  }
+
+	  ++sched_ib.records.adjust_index;
+	  if (sched_ib.records.adjust_index == sched_ib.records.n_insns)
+	    sched_ib.records.adjust_index = 0;
+
+	  /* Undo adjustement we did 7 instructions ago.  */
+	  sched_ib.size
+	    += sched_ib.records.adjust[sched_ib.records.adjust_index];
+
+	  break;
+
+	case CPU_CFV4:
+	  gcc_assert (!sched_ib.enabled_p);
+	  insn_size = 0;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (insn_size > sched_ib.filled)
+	/* Scheduling for register pressure does not always take DFA into
+	   account.  Workaround instruction buffer not being filled enough.  */
+	{
+	  gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED);
+	  insn_size = sched_ib.filled;
+	}
+
+      --can_issue_more;
+    }
+  else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	   || asm_noperands (PATTERN (insn)) >= 0)
+    insn_size = sched_ib.filled;
+  else
+    insn_size = 0;
+
+  sched_ib.filled -= insn_size;
+
+  return can_issue_more;
+}
+
+/* Return how many instructions should scheduler lookahead to choose the
+   best one.  */
+static int
+m68k_sched_first_cycle_multipass_dfa_lookahead (void)
+{
+  return m68k_sched_issue_rate () - 1;
+}
+
+/* Implementation of targetm.sched.init_global () hook.
+   It is invoked once per scheduling pass and is used here
+   to initialize scheduler constants.  */
+static void
+m68k_sched_md_init_global (FILE *sched_dump ATTRIBUTE_UNUSED,
+			   int sched_verbose ATTRIBUTE_UNUSED,
+			   int n_insns ATTRIBUTE_UNUSED)
+{
+#ifdef ENABLE_CHECKING
+  /* Check that all instructions have DFA reservations and
+     that all instructions can be issued from a clean state.  */
+  {
+    rtx insn;
+    state_t state;
+
+    state = alloca (state_size ());
+
+    for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+      {
+ 	if (INSN_P (insn) && recog_memoized (insn) >= 0)
+	  {
+ 	    gcc_assert (insn_has_dfa_reservation_p (insn));
+
+ 	    state_reset (state);
+ 	    if (state_transition (state, insn) >= 0)
+ 	      gcc_unreachable ();
+ 	  }
+      }
+  }
+#endif
+
+  /* Setup target cpu.  */
+
+  /* ColdFire V4 has a set of features to keep its instruction buffer full
+     (e.g., a separate memory bus for instructions) and, hence, we do not model
+     buffer for this CPU.  */
+  sched_ib.enabled_p = (m68k_sched_cpu != CPU_CFV4);
+
+  switch (m68k_sched_cpu)
+    {
+    case CPU_CFV4:
+      sched_ib.filled = 0;
+
+      /* FALLTHRU */
+
+    case CPU_CFV1:
+    case CPU_CFV2:
+      max_insn_size = 3;
+      sched_ib.records.n_insns = 0;
+      sched_ib.records.adjust = NULL;
+      break;
+
+    case CPU_CFV3:
+      max_insn_size = 3;
+      sched_ib.records.n_insns = 8;
+      sched_ib.records.adjust = XNEWVEC (int, sched_ib.records.n_insns);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  sched_mem_unit_code = get_cpu_unit_code ("cf_mem1");
+
+  sched_adjust_cost_state = xmalloc (state_size ());
+  state_reset (sched_adjust_cost_state);
+
+  start_sequence ();
+  emit_insn (gen_ib ());
+  sched_ib.insn = get_insns ();
+  end_sequence ();
+}
+
+/* Scheduling pass is now finished.  Free/reset static variables.  */
+static void
+m68k_sched_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+			     int verbose ATTRIBUTE_UNUSED)
+{
+  sched_ib.insn = NULL;
+
+  free (sched_adjust_cost_state);
+  sched_adjust_cost_state = NULL;
+
+  sched_mem_unit_code = 0;
+
+  free (sched_ib.records.adjust);
+  sched_ib.records.adjust = NULL;
+  sched_ib.records.n_insns = 0;
+  max_insn_size = 0;
+}
+
+/* Implementation of targetm.sched.init () hook.
+   It is invoked each time scheduler starts on the new block (basic block or
+   extended basic block).  */
+static void
+m68k_sched_md_init (FILE *sched_dump ATTRIBUTE_UNUSED,
+		    int sched_verbose ATTRIBUTE_UNUSED,
+		    int n_insns ATTRIBUTE_UNUSED)
+{
+  switch (m68k_sched_cpu)
+    {
+    case CPU_CFV1:
+    case CPU_CFV2:
+      sched_ib.size = 6;
+      break;
+
+    case CPU_CFV3:
+      sched_ib.size = sched_ib.records.n_insns * max_insn_size;
+
+      memset (sched_ib.records.adjust, 0,
+	      sched_ib.records.n_insns * sizeof (*sched_ib.records.adjust));
+      sched_ib.records.adjust_index = 0;
+      break;
+
+    case CPU_CFV4:
+      gcc_assert (!sched_ib.enabled_p);
+      sched_ib.size = 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (sched_ib.enabled_p)
+    /* haifa-sched.c: schedule_block () calls advance_cycle () just before
+       the first cycle.  Workaround that.  */
+    sched_ib.filled = -2;
+}
+
+/* Implementation of targetm.sched.dfa_pre_advance_cycle () hook.
+   It is invoked just before current cycle finishes and is used here
+   to track if instruction buffer got its two words this cycle.  */
+static void
+m68k_sched_dfa_pre_advance_cycle (void)
+{
+  if (!sched_ib.enabled_p)
+    return;
+
+  if (!cpu_unit_reservation_p (curr_state, sched_mem_unit_code))
+    {
+      sched_ib.filled += 2;
+
+      if (sched_ib.filled > sched_ib.size)
+	sched_ib.filled = sched_ib.size;
+    }
+}
+
+/* Implementation of targetm.sched.dfa_post_advance_cycle () hook.
+   It is invoked just after new cycle begins and is used here
+   to setup number of filled words in the instruction buffer so that
+   instructions which won't have all their words prefetched would be
+   stalled for a cycle.  */
+static void
+m68k_sched_dfa_post_advance_cycle (void)
+{
+  int i;
+
+  if (!sched_ib.enabled_p)
+    return;
+
+  /* Setup number of prefetched instruction words in the instruction
+     buffer.  */
+  i = max_insn_size - sched_ib.filled;
+
+  while (--i >= 0)
+    {
+      if (state_transition (curr_state, sched_ib.insn) >= 0)
+	/* Pick up scheduler state.  */
+	++sched_ib.filled;
+    }
+}
+
+/* Return X or Y (depending on OPX_P) operand of INSN,
+   if it is an integer register, or NULL overwise.  */
+static rtx
+sched_get_reg_operand (rtx insn, bool opx_p)
+{
+  rtx op = NULL;
+
+  if (opx_p)
+    {
+      if (get_attr_opx_type (insn) == OPX_TYPE_RN)
+	{
+	  op = sched_get_operand (insn, true);
+	  gcc_assert (op != NULL);
+
+	  if (!reload_completed && !REG_P (op))
+	    return NULL;
+	}
+    }
+  else
+    {
+      if (get_attr_opy_type (insn) == OPY_TYPE_RN)
+	{
+	  op = sched_get_operand (insn, false);
+	  gcc_assert (op != NULL);
+
+	  if (!reload_completed && !REG_P (op))
+	    return NULL;
+	}
+    }
+
+  return op;
+}
+
+/* Return true, if X or Y (depending on OPX_P) operand of INSN
+   is a MEM.  */
+static bool
+sched_mem_operand_p (rtx insn, bool opx_p)
+{
+  switch (sched_get_opxy_mem_type (insn, opx_p))
+    {
+    case OP_TYPE_MEM1:
+    case OP_TYPE_MEM6:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Return X or Y (depending on OPX_P) operand of INSN,
+   if it is a MEM, or NULL overwise.  */
+static rtx
+sched_get_mem_operand (rtx insn, bool must_read_p, bool must_write_p)
+{
+  bool opx_p;
+  bool opy_p;
+
+  opx_p = false;
+  opy_p = false;
+
+  if (must_read_p)
+    {
+      opx_p = true;
+      opy_p = true;
+    }
+
+  if (must_write_p)
+    {
+      opx_p = true;
+      opy_p = false;
+    }
+
+  if (opy_p && sched_mem_operand_p (insn, false))
+    return sched_get_operand (insn, false);
+
+  if (opx_p && sched_mem_operand_p (insn, true))
+    return sched_get_operand (insn, true);
+
+  gcc_unreachable ();
+  return NULL;
+}
+
+/* Return non-zero if PRO modifies register used as part of
+   address in CON.  */
+int
+m68k_sched_address_bypass_p (rtx pro, rtx con)
+{
+  rtx pro_x;
+  rtx con_mem_read;
+
+  pro_x = sched_get_reg_operand (pro, true);
+  if (pro_x == NULL)
+    return 0;
+
+  con_mem_read = sched_get_mem_operand (con, true, false);
+  gcc_assert (con_mem_read != NULL);
+
+  if (reg_mentioned_p (pro_x, con_mem_read))
+    return 1;
+
+  return 0;
+}
+
+/* Helper function for m68k_sched_indexed_address_bypass_p.
+   if PRO modifies register used as index in CON,
+   return scale of indexed memory access in CON.  Return zero overwise.  */
+static int
+sched_get_indexed_address_scale (rtx pro, rtx con)
+{
+  rtx reg;
+  rtx mem;
+  struct m68k_address address;
+
+  reg = sched_get_reg_operand (pro, true);
+  if (reg == NULL)
+    return 0;
+
+  mem = sched_get_mem_operand (con, true, false);
+  gcc_assert (mem != NULL && MEM_P (mem));
+
+  if (!m68k_decompose_address (GET_MODE (mem), XEXP (mem, 0), reload_completed,
+			       &address))
+    gcc_unreachable ();
+
+  if (REGNO (reg) == REGNO (address.index))
+    {
+      gcc_assert (address.scale != 0);
+      return address.scale;
+    }
+
+  return 0;
+}
+
+/* Return non-zero if PRO modifies register used
+   as index with scale 2 or 4 in CON.  */
+int
+m68k_sched_indexed_address_bypass_p (rtx pro, rtx con)
+{
+  gcc_assert (sched_cfv4_bypass_data.pro == NULL
+	      && sched_cfv4_bypass_data.con == NULL
+	      && sched_cfv4_bypass_data.scale == 0);
+
+  switch (sched_get_indexed_address_scale (pro, con))
+    {
+    case 1:
+      /* We can't have a variable latency bypass, so
+	 remember to adjust the insn cost in adjust_cost hook.  */
+      sched_cfv4_bypass_data.pro = pro;
+      sched_cfv4_bypass_data.con = con;
+      sched_cfv4_bypass_data.scale = 1;
+      return 0;
+
+    case 2:
+    case 4:
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* We generate a two-instructions program at M_TRAMP :
+	movea.l &CHAIN_VALUE,%a0
+	jmp FNADDR
+   where %a0 can be modified by changing STATIC_CHAIN_REGNUM.  */
+
+static void
+m68k_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  gcc_assert (ADDRESS_REGNO_P (STATIC_CHAIN_REGNUM));
+
+  mem = adjust_address (m_tramp, HImode, 0);
+  emit_move_insn (mem, GEN_INT(0x207C + ((STATIC_CHAIN_REGNUM-8) << 9)));
+  mem = adjust_address (m_tramp, SImode, 2);
+  emit_move_insn (mem, chain_value);
+
+  mem = adjust_address (m_tramp, HImode, 6);
+  emit_move_insn (mem, GEN_INT(0x4EF9));
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, fnaddr);
+
+  FINALIZE_TRAMPOLINE (XEXP (m_tramp, 0));
+}
+
+/* On the 68000, the RTS insn cannot pop anything.
+   On the 68010, the RTD insn may be used to pop them if the number
+     of args is fixed, but if the number is variable then the caller
+     must pop them all.  RTD can't be used for library calls now
+     because the library is compiled with the Unix compiler.
+   Use of RTD is a selectable option, since it is incompatible with
+   standard Unix calling sequences.  If the option is not selected,
+   the caller must always pop the args.  */
+
+static int
+m68k_return_pops_args (tree fundecl, tree funtype, int size)
+{
+  return ((TARGET_RTD
+	   && (!fundecl
+	       || TREE_CODE (fundecl) != IDENTIFIER_NODE)
+	   && (!stdarg_p (funtype)))
+	  ? size : 0);
+}
+
+/* Make sure everything's fine if we *don't* have a given processor.
+   This assumes that putting a register in fixed_regs will keep the
+   compiler's mitts completely off it.  We don't bother to zero it out
+   of register classes.  */
+
+static void
+m68k_conditional_register_usage (void)
+{
+  int i;
+  HARD_REG_SET x;
+  if (!TARGET_HARD_FLOAT)
+    {
+      COPY_HARD_REG_SET (x, reg_class_contents[(int)FP_REGS]);
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+        if (TEST_HARD_REG_BIT (x, i))
+	  fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (flag_pic)
+    fixed_regs[PIC_REG] = call_used_regs[PIC_REG] = 1;
+}
+
+static void
+m68k_init_sync_libfuncs (void)
+{
+  init_sync_libfuncs (UNITS_PER_WORD);
+}
+
+/* Implements EPILOGUE_USES.  All registers are live on exit from an
+   interrupt routine.  */
+bool
+m68k_epilogue_uses (int regno ATTRIBUTE_UNUSED)
+{
+  return (reload_completed
+	  && (m68k_get_function_kind (current_function_decl)
+	      == m68k_fk_interrupt_handler));
+}
+
+#include "gt-m68k.h"
diff --git a/gcc-4.9/gcc/config/m68k/m68k.h b/gcc-4.9/gcc/config/m68k/m68k.h
new file mode 100644
index 000000000..3a6d1b773
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k.h
@@ -0,0 +1,980 @@
+/* Definitions of target machine for GCC for Motorola 680x0/ColdFire.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We need to have MOTOROLA always defined (either 0 or 1) because we use
+   if-statements and ?: on it.  This way we have compile-time error checking
+   for both the MOTOROLA and MIT code paths.  We do rely on the host compiler
+   to optimize away all constant tests.  */
+#if MOTOROLA  /* Use the Motorola assembly syntax.  */
+#else
+# define MOTOROLA 0  /* Use the MIT assembly syntax.  */
+#endif
+
+/* Handle --with-cpu default option from configure script.  */
+#define OPTION_DEFAULT_SPECS						\
+  { "cpu",   "%{!m68020-40:%{!m68020-60:\
+%{!mcpu=*:%{!march=*:-%(VALUE)}}}}" },
+
+/* Pass flags to gas indicating which type of processor we have.  This
+   can be simplified when we can rely on the assembler supporting .cpu
+   and .arch directives.  */
+
+#define ASM_CPU_SPEC "\
+%{m68851}%{mno-68851} %{m68881}%{mno-68881} %{msoft-float:-mno-float} \
+%{m68020-40:-m68040}%{m68020-60:-m68040}\
+%{mcpu=*:-mcpu=%*}%{march=*:-march=%*}\
+"
+#define ASM_PCREL_SPEC "%{fPIC|fpic|mpcrel:--pcrel} \
+ %{msep-data|mid-shared-library:--pcrel} \
+"
+
+#define ASM_SPEC "%(asm_cpu_spec) %(asm_pcrel_spec)"
+
+#define EXTRA_SPECS					\
+  { "asm_cpu_spec", ASM_CPU_SPEC },			\
+  { "asm_pcrel_spec", ASM_PCREL_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+#define SUBTARGET_EXTRA_SPECS
+
+/* Note that some other tm.h files include this one and then override
+   many of the definitions that relate to assembler syntax.  */
+
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_define ("__m68k__");					\
+      builtin_define_std ("mc68000");					\
+      /* The other mc680x0 macros have traditionally been derived	\
+	 from the tuning setting.  For example, -m68020-60 defines	\
+	 m68060, even though it generates pure 68020 code.  */		\
+      switch (m68k_tune)						\
+	{								\
+	case u68010:							\
+	  builtin_define_std ("mc68010");				\
+	  break;							\
+									\
+	case u68020:							\
+	  builtin_define_std ("mc68020");				\
+	  break;							\
+									\
+	case u68030:							\
+	  builtin_define_std ("mc68030");				\
+	  break;							\
+									\
+	case u68040:							\
+	  builtin_define_std ("mc68040");				\
+	  break;							\
+									\
+	case u68060:							\
+	  builtin_define_std ("mc68060");				\
+	  break;							\
+									\
+	case u68020_60:							\
+	  builtin_define_std ("mc68060");				\
+	  /* Fall through.  */						\
+	case u68020_40:							\
+	  builtin_define_std ("mc68040");				\
+	  builtin_define_std ("mc68030");				\
+	  builtin_define_std ("mc68020");				\
+	  break;							\
+									\
+	case ucpu32:							\
+	  builtin_define_std ("mc68332");				\
+	  builtin_define_std ("mcpu32");				\
+	  builtin_define_std ("mc68020");				\
+	  break;							\
+									\
+	case ucfv1:							\
+	  builtin_define ("__mcfv1__");					\
+	  break;							\
+									\
+	case ucfv2:							\
+	  builtin_define ("__mcfv2__");					\
+	  break;							\
+									\
+    	case ucfv3:							\
+	  builtin_define ("__mcfv3__");					\
+	  break;							\
+									\
+	case ucfv4:							\
+	  builtin_define ("__mcfv4__");					\
+	  break;							\
+									\
+	case ucfv4e:							\
+	  builtin_define ("__mcfv4e__");				\
+	  break;							\
+									\
+	case ucfv5:							\
+	  builtin_define ("__mcfv5__");					\
+	  break;							\
+									\
+	default:							\
+	  break;							\
+	}								\
+									\
+      if (TARGET_68881)							\
+	builtin_define ("__HAVE_68881__");				\
+									\
+      if (TARGET_COLDFIRE)						\
+	{								\
+	  const char *tmp;						\
+	  								\
+	  tmp = m68k_cpp_cpu_ident ("cf");			   	\
+	  if (tmp)							\
+	    builtin_define (tmp);					\
+	  tmp = m68k_cpp_cpu_family ("cf");				\
+	  if (tmp)							\
+	    builtin_define (tmp);					\
+	  builtin_define ("__mcoldfire__");				\
+									\
+	  if (TARGET_ISAC)						\
+	    builtin_define ("__mcfisac__");				\
+	  else if (TARGET_ISAB)						\
+	    {								\
+	      builtin_define ("__mcfisab__");				\
+	      /* ISA_B: Legacy 5407 defines.  */			\
+	      builtin_define ("__mcf5400__");				\
+	      builtin_define ("__mcf5407__");				\
+	    }								\
+	  else if (TARGET_ISAAPLUS)					\
+	    {								\
+	      builtin_define ("__mcfisaaplus__");			\
+	      /* ISA_A+: legacy defines.  */				\
+	      builtin_define ("__mcf528x__");				\
+	      builtin_define ("__mcf5200__");				\
+	    }								\
+	  else 								\
+	    {								\
+	      builtin_define ("__mcfisaa__");				\
+	      /* ISA_A: legacy defines.  */				\
+	      switch (m68k_tune)					\
+		{							\
+		case ucfv2:						\
+		  builtin_define ("__mcf5200__");			\
+		  break;						\
+									\
+		case ucfv3:						\
+		  builtin_define ("__mcf5307__");			\
+		  builtin_define ("__mcf5300__");			\
+		  break;						\
+									\
+		default:						\
+		  break;						\
+		}							\
+    	    }								\
+	}								\
+									\
+      if (TARGET_COLDFIRE_FPU)						\
+	builtin_define ("__mcffpu__");					\
+									\
+      if (TARGET_CF_HWDIV)						\
+	builtin_define ("__mcfhwdiv__");				\
+									\
+      if (TARGET_FIDOA)							\
+	builtin_define ("__mfido__");					\
+									\
+      builtin_assert ("cpu=m68k");					\
+      builtin_assert ("machine=m68k");					\
+    }									\
+  while (0)
+
+/* Classify the groups of pseudo-ops used to assemble QI, HI and SI
+   quantities.  */
+#define INT_OP_STANDARD	0	/* .byte, .short, .long */
+#define INT_OP_DOT_WORD	1	/* .byte, .word, .long */
+#define INT_OP_NO_DOT   2	/* byte, short, long */
+#define INT_OP_DC	3	/* dc.b, dc.w, dc.l */
+
+/* Set the default.  */
+#define INT_OP_GROUP INT_OP_DOT_WORD
+
+/* Bit values used by m68k-devices.def to identify processor capabilities.  */
+#define FL_BITFIELD  (1 << 0)    /* Support bitfield instructions.  */
+#define FL_68881     (1 << 1)    /* (Default) support for 68881/2.  */
+#define FL_COLDFIRE  (1 << 2)    /* ColdFire processor.  */
+#define FL_CF_HWDIV  (1 << 3)    /* ColdFire hardware divide supported.  */
+#define FL_CF_MAC    (1 << 4)    /* ColdFire MAC unit supported.  */
+#define FL_CF_EMAC   (1 << 5)    /* ColdFire eMAC unit supported.  */
+#define FL_CF_EMAC_B (1 << 6)    /* ColdFire eMAC-B unit supported.  */
+#define FL_CF_USP    (1 << 7)    /* ColdFire User Stack Pointer supported.  */
+#define FL_CF_FPU    (1 << 8)    /* ColdFire FPU supported.  */
+#define FL_ISA_68000 (1 << 9)
+#define FL_ISA_68010 (1 << 10)
+#define FL_ISA_68020 (1 << 11)
+#define FL_ISA_68040 (1 << 12)
+#define FL_ISA_A     (1 << 13)
+#define FL_ISA_APLUS (1 << 14)
+#define FL_ISA_B     (1 << 15)
+#define FL_ISA_C     (1 << 16)
+#define FL_FIDOA     (1 << 17)
+#define FL_CAS	     (1 << 18)	/* Support cas insn.  */
+#define FL_MMU 	     0   /* Used by multilib machinery.  */
+#define FL_UCLINUX   0   /* Used by multilib machinery.  */
+
+#define TARGET_68010		((m68k_cpu_flags & FL_ISA_68010) != 0)
+#define TARGET_68020		((m68k_cpu_flags & FL_ISA_68020) != 0)
+#define TARGET_68040		((m68k_cpu_flags & FL_ISA_68040) != 0)
+#define TARGET_COLDFIRE		((m68k_cpu_flags & FL_COLDFIRE) != 0)
+#define TARGET_COLDFIRE_FPU	(m68k_fpu == FPUTYPE_COLDFIRE)
+#define TARGET_68881		(m68k_fpu == FPUTYPE_68881)
+#define TARGET_FIDOA		((m68k_cpu_flags & FL_FIDOA) != 0)
+#define TARGET_CAS		((m68k_cpu_flags & FL_CAS) != 0)
+
+/* Size (in bytes) of FPU registers.  */
+#define TARGET_FP_REG_SIZE	(TARGET_COLDFIRE ? 8 : 12)
+
+#define TARGET_ISAAPLUS		((m68k_cpu_flags & FL_ISA_APLUS) != 0)
+#define TARGET_ISAB		((m68k_cpu_flags & FL_ISA_B) != 0)
+#define TARGET_ISAC		((m68k_cpu_flags & FL_ISA_C) != 0)
+
+/* Some instructions are common to more than one ISA.  */
+#define ISA_HAS_MVS_MVZ	(TARGET_ISAB || TARGET_ISAC)
+#define ISA_HAS_FF1	(TARGET_ISAAPLUS || TARGET_ISAC)
+#define ISA_HAS_TAS	(!TARGET_COLDFIRE || TARGET_ISAB || TARGET_ISAC)
+
+#define TUNE_68000	(m68k_tune == u68000)
+#define TUNE_68010	(m68k_tune == u68010)
+#define TUNE_68000_10	(TUNE_68000 || TUNE_68010)
+#define TUNE_68030	(m68k_tune == u68030 \
+			 || m68k_tune == u68020_40 \
+			 || m68k_tune == u68020_60)
+#define TUNE_68040	(m68k_tune == u68040 \
+			 || m68k_tune == u68020_40 \
+			 || m68k_tune == u68020_60)
+#define TUNE_68060	(m68k_tune == u68060 || m68k_tune == u68020_60)
+#define TUNE_68040_60	(TUNE_68040 || TUNE_68060)
+#define TUNE_CPU32	(m68k_tune == ucpu32)
+#define TUNE_CFV1       (m68k_tune == ucfv1)
+#define TUNE_CFV2	(m68k_tune == ucfv2)
+#define TUNE_CFV3       (m68k_tune == ucfv3)
+#define TUNE_CFV4       (m68k_tune == ucfv4 || m68k_tune == ucfv4e)
+
+#define TUNE_MAC	((m68k_tune_flags & FL_CF_MAC) != 0)
+#define TUNE_EMAC	((m68k_tune_flags & FL_CF_EMAC) != 0)
+
+/* These are meant to be redefined in the host dependent files */
+#define SUBTARGET_OVERRIDE_OPTIONS
+
+/* target machine storage layout */
+
+/* "long double" is the same as "double" on ColdFire and fido
+   targets.  */
+
+#define LONG_DOUBLE_TYPE_SIZE			\
+  ((TARGET_COLDFIRE || TARGET_FIDOA) ? 64 : 80)
+
+/* We need to know the size of long double at compile-time in libgcc2.  */
+
+#if defined(__mcoldfire__) || defined(__mfido__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+#endif
+
+/* Set the value of FLT_EVAL_METHOD in float.h.  When using 68040 fp
+   instructions, we get proper intermediate rounding, otherwise we
+   get extended precision results.  */
+#define TARGET_FLT_EVAL_METHOD ((TARGET_68040 || ! TARGET_68881) ? 0 : 2)
+
+#define BITS_BIG_ENDIAN 1
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+#define UNITS_PER_WORD 4
+
+#define PARM_BOUNDARY (TARGET_SHORT ? 16 : 32)
+#define STACK_BOUNDARY 16
+#define FUNCTION_BOUNDARY 16
+#define EMPTY_FIELD_BOUNDARY 16
+/* ColdFire and fido strongly prefer a 32-bit aligned stack.  */
+#define PREFERRED_STACK_BOUNDARY \
+  ((TARGET_COLDFIRE || TARGET_FIDOA) ? 32 : 16)
+
+/* No data type wants to be aligned rounder than this.
+   Most published ABIs say that ints should be aligned on 16-bit
+   boundaries, but CPUs with 32-bit busses get better performance
+   aligned on 32-bit boundaries.  */
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_INT ? 32 : 16)
+
+#define STRICT_ALIGNMENT (TARGET_STRICT_ALIGNMENT)
+#define M68K_HONOR_TARGET_STRICT_ALIGNMENT 1
+
+#define DWARF_CIE_DATA_ALIGNMENT -2
+
+#define INT_TYPE_SIZE (TARGET_SHORT ? 16 : 32)
+
+/* Define these to avoid dependence on meaning of `int'.  */
+#define WCHAR_TYPE "long int"
+#define WCHAR_TYPE_SIZE 32
+
+/* Maximum number of library IDs we permit with -mid-shared-library.  */
+#define MAX_LIBRARY_ID 255
+
+
+/* Standard register usage.  */
+
+/* For the m68k, we give the data registers numbers 0-7,
+   the address registers numbers 010-017 (8-15),
+   and the 68881 floating point registers numbers 020-027 (16-23).
+   We also have a fake `arg-pointer' register 030 (24) used for
+   register elimination.  */
+#define FIRST_PSEUDO_REGISTER 25
+
+/* All m68k targets (except AmigaOS) use %a5 as the PIC register  */
+#define PIC_OFFSET_TABLE_REGNUM				\
+  (!flag_pic ? INVALID_REGNUM				\
+   : reload_completed ? REGNO (pic_offset_table_rtx)	\
+   : PIC_REG)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the m68k, only the stack pointer is such.
+   Our fake arg-pointer is obviously fixed as well.  */
+#define FIXED_REGISTERS        \
+ {/* Data registers.  */       \
+  0, 0, 0, 0, 0, 0, 0, 0,      \
+                               \
+  /* Address registers.  */    \
+  0, 0, 0, 0, 0, 0, 0, 1,      \
+                               \
+  /* Floating point registers  \
+     (if available).  */       \
+  0, 0, 0, 0, 0, 0, 0, 0,      \
+                               \
+  /* Arg pointer.  */          \
+  1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS     \
+ {/* Data registers.  */        \
+  1, 1, 0, 0, 0, 0, 0, 0,       \
+                                \
+  /* Address registers.  */     \
+  1, 1, 0, 0, 0, 0, 0, 1,       \
+                                \
+  /* Floating point registers   \
+     (if available).  */        \
+  1, 1, 0, 0, 0, 0, 0, 0,       \
+                                \
+  /* Arg pointer.  */           \
+  1 }
+
+#define REG_ALLOC_ORDER		\
+{ /* d0/d1/a0/a1 */		\
+  0, 1, 8, 9,			\
+  /* d2-d7 */			\
+  2, 3, 4, 5, 6, 7,		\
+  /* a2-a7/arg */		\
+  10, 11, 12, 13, 14, 15, 24,	\
+  /* fp0-fp7 */			\
+  16, 17, 18, 19, 20, 21, 22, 23\
+}
+
+
+/* On the m68k, ordinary registers hold 32 bits worth;
+   for the 68881 registers, a single register is always enough for
+   anything that can be stored in them at all.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((REGNO) >= 16 ? GET_MODE_NUNITS (MODE)	\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register.  */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  m68k_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  m68k_regno_mode_ok ((REGNO), (MODE))
+
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+  m68k_secondary_reload_class (CLASS, MODE, X)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+  (! TARGET_HARD_FLOAT					\
+   || ((GET_MODE_CLASS (MODE1) == MODE_FLOAT		\
+	|| GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)	\
+       == (GET_MODE_CLASS (MODE2) == MODE_FLOAT		\
+	   || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT)))
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+#define STACK_POINTER_REGNUM SP_REG
+
+/* Most m68k targets use %a6 as a frame pointer.  The AmigaOS
+   ABI uses %a6 for shared library calls, therefore the frame
+   pointer is shifted to %a5 on this target.  */
+#define FRAME_POINTER_REGNUM A6_REG
+
+/* Base register for access to arguments of the function.
+ * This isn't a hardware register. It will be eliminated to the
+ * stack pointer or frame pointer.
+ */
+#define ARG_POINTER_REGNUM 24
+
+#define STATIC_CHAIN_REGNUM A0_REG
+#define M68K_STATIC_CHAIN_REG_NAME REGISTER_PREFIX "a0"
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+#define M68K_STRUCT_VALUE_REGNUM A1_REG
+
+
+
+/* The m68k has three kinds of registers, so eight classes would be
+   a complete set.  One of them is not needed.  */
+enum reg_class {
+  NO_REGS, DATA_REGS,
+  ADDR_REGS, FP_REGS,
+  GENERAL_REGS, DATA_OR_FP_REGS,
+  ADDR_OR_FP_REGS, ALL_REGS,
+  LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES \
+ { "NO_REGS", "DATA_REGS",              \
+   "ADDR_REGS", "FP_REGS",              \
+   "GENERAL_REGS", "DATA_OR_FP_REGS",   \
+   "ADDR_OR_FP_REGS", "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS \
+{					\
+  {0x00000000},  /* NO_REGS */		\
+  {0x000000ff},  /* DATA_REGS */	\
+  {0x0100ff00},  /* ADDR_REGS */	\
+  {0x00ff0000},  /* FP_REGS */		\
+  {0x0100ffff},  /* GENERAL_REGS */	\
+  {0x00ff00ff},  /* DATA_OR_FP_REGS */	\
+  {0x01ffff00},  /* ADDR_OR_FP_REGS */	\
+  {0x01ffffff},  /* ALL_REGS */		\
+}
+
+extern enum reg_class regno_reg_class[];
+#define REGNO_REG_CLASS(REGNO) (regno_reg_class[(REGNO)])
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS ADDR_REGS
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS) \
+  m68k_preferred_reload_class (X, CLASS)
+
+/* On the m68k, this is the size of MODE in words,
+   except in the FP regs, where a single reg is always enough.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+ ((CLASS) == FP_REGS ? 1 \
+  : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Moves between fp regs and other regs are two insns.  */
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)	\
+  ((((CLASS1) == FP_REGS) != ((CLASS2) == FP_REGS)) ? 4 : 2)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD 1
+#define FRAME_GROWS_DOWNWARD 1
+#define STARTING_FRAME_OFFSET 0
+
+/* On the 680x0, sp@- in a byte insn really pushes a word.
+   On the ColdFire, sp@- in a byte insn pushes just a byte.  */
+#define PUSH_ROUNDING(BYTES) (TARGET_COLDFIRE ? BYTES : ((BYTES) + 1) & ~1)
+
+#define FIRST_PARM_OFFSET(FNDECL) 8
+
+/* On the m68k the return value defaults to D0.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC)  \
+  gen_rtx_REG (TYPE_MODE (VALTYPE), D0_REG)
+
+/* On the m68k the return value defaults to D0.  */
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, D0_REG)
+
+/* On the m68k, D0 is usually the only register used.  */
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == D0_REG)
+
+/* Define this to be true when FUNCTION_VALUE_REGNO_P is true for
+   more than one register.
+   XXX This macro is m68k specific and used only for m68kemb.h.  */
+#define NEEDS_UNTYPED_CALL 0
+
+/* On the m68k, all arguments are usually pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(N) 0
+
+/* On the m68k, this is a single integer, which is a number of bytes
+   of arguments scanned so far.  */
+#define CUMULATIVE_ARGS int
+
+/* On the m68k, the offset starts at 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  asm_fprintf (FILE, "\tlea %LLP%d,%Ra0\n\tjsr mcount\n", (LABELNO))
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+
+   On the m68k, the trampoline looks like this:
+     movl #STATIC,a0
+     jmp  FUNCTION
+
+   WARNING: Targets that may run on 68040+ cpus must arrange for
+   the instruction cache to be flushed.  Previous incarnations of
+   the m68k trampoline code attempted to get around this by either
+   using an out-of-line transfer function or pc-relative data, but
+   the fact remains that the code to jump to the transfer function
+   or the code to load the pc-relative data needs to be flushed
+   just as much as the "variable" portion of the trampoline.
+   Recognizing that a cache flush is going to be required anyway,
+   dispense with such notions and build a smaller trampoline.
+
+   Since more instructions are required to move a template into
+   place than to create it on the spot, don't use a template.  */
+
+#define TRAMPOLINE_SIZE 12
+#define TRAMPOLINE_ALIGNMENT 16
+
+/* Targets redefine this to invoke code to either flush the cache,
+   or enable stack execution (or both).  */
+#ifndef FINALIZE_TRAMPOLINE
+#define FINALIZE_TRAMPOLINE(TRAMP)
+#endif
+
+/* This is the library routine that is used to transfer control from the
+   trampoline to the actual nested function.  It is defined for backward
+   compatibility, for linking with object code that used the old trampoline
+   definition.
+
+   A colon is used with no explicit operands to cause the template string
+   to be scanned for %-constructs.
+
+   The function name __transfer_from_trampoline is not actually used.
+   The function definition just permits use of "asm with operands"
+   (though the operand list is empty).  */
+#define TRANSFER_FROM_TRAMPOLINE				\
+void								\
+__transfer_from_trampoline ()					\
+{								\
+  register char *a0 asm (M68K_STATIC_CHAIN_REG_NAME);		\
+  asm (GLOBAL_ASM_OP "___trampoline");				\
+  asm ("___trampoline:");					\
+  asm volatile ("move%.l %0,%@" : : "m" (a0[22]));		\
+  asm volatile ("move%.l %1,%0" : "=a" (a0) : "m" (a0[18]));	\
+  asm ("rts":);							\
+}
+
+/* There are two registers that can always be eliminated on the m68k.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },		\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  (OFFSET) = m68k_initial_elimination_offset(FROM, TO)
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* True for data registers, D0 through D7.  */
+#define DATA_REGNO_P(REGNO)	IN_RANGE (REGNO, 0, 7)
+
+/* True for address registers, A0 through A7.  */
+#define ADDRESS_REGNO_P(REGNO)	IN_RANGE (REGNO, 8, 15)
+
+/* True for integer registers, D0 through D7 and A0 through A7.  */
+#define INT_REGNO_P(REGNO)	IN_RANGE (REGNO, 0, 15)
+
+/* True for floating point registers, FP0 through FP7.  */
+#define FP_REGNO_P(REGNO)	IN_RANGE (REGNO, 16, 23)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)			\
+  (INT_REGNO_P (REGNO)					\
+   || INT_REGNO_P (reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_BASE_P(REGNO)			\
+  (ADDRESS_REGNO_P (REGNO)				\
+   || ADDRESS_REGNO_P (reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_INDEX_NONSTRICT_P(REGNO)		\
+  (INT_REGNO_P (REGNO)					\
+   || REGNO == ARG_POINTER_REGNUM			\
+   || REGNO >= FIRST_PSEUDO_REGISTER)
+
+#define REGNO_OK_FOR_BASE_NONSTRICT_P(REGNO)		\
+  (ADDRESS_REGNO_P (REGNO)				\
+   || REGNO == ARG_POINTER_REGNUM			\
+   || REGNO >= FIRST_PSEUDO_REGISTER)
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+
+   These macros are specific to the m68k, and may be used only
+   in code for printing assembler insns and in conditions for
+   define_optimization.  */
+
+/* 1 if X is a data register.  */
+#define DATA_REG_P(X)	(REG_P (X) && DATA_REGNO_P (REGNO (X)))
+
+/* 1 if X is an fp register.  */
+#define FP_REG_P(X)	(REG_P (X) && FP_REGNO_P (REGNO (X)))
+
+/* 1 if X is an address register  */
+#define ADDRESS_REG_P(X) (REG_P (X) && ADDRESS_REGNO_P (REGNO (X)))
+
+/* True if SYMBOL + OFFSET constants must refer to something within
+   SYMBOL's section.  */
+#ifndef M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
+#define M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0
+#endif
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST		\
+    || GET_CODE (X) == HIGH)						\
+   && m68k_legitimate_constant_p (Pmode, X))
+
+#ifndef REG_OK_STRICT
+#define REG_STRICT_P 0
+#else
+#define REG_STRICT_P 1
+#endif
+
+#define LEGITIMATE_PIC_OPERAND_P(X)				\
+  (!symbolic_operand (X, VOIDmode)				\
+   || (TARGET_PCREL && REG_STRICT_P)				\
+   || m68k_tls_reference_p (X, true))
+
+#define REG_OK_FOR_BASE_P(X) \
+  m68k_legitimate_base_reg_p (X, REG_STRICT_P)
+
+#define REG_OK_FOR_INDEX_P(X) \
+  m68k_legitimate_index_reg_p (X, REG_STRICT_P)
+
+
+/* This address is OK as it stands.  */
+#define PIC_CASE_VECTOR_ADDRESS(index) index
+#define CASE_VECTOR_MODE HImode
+#define CASE_VECTOR_PC_RELATIVE 1
+
+#define DEFAULT_SIGNED_CHAR 1
+#define MOVE_MAX 4
+#define SLOW_BYTE_ACCESS 0
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* The 68020 BFFFO and ColdFire FF1 instructions return 32 for zero. */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+
+#define STORE_FLAG_VALUE (-1)
+
+#define Pmode SImode
+#define FUNCTION_MODE QImode
+
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  */
+
+/* Set if the cc value is actually in the 68881, so a floating point
+   conditional branch must be output.  */
+#define CC_IN_68881 04000
+
+/* On the 68000, all the insns to store in an address register fail to
+   set the cc's.  However, in some cases these instructions can make it
+   possibly invalid to use the saved cc's.  In those cases we clear out
+   some or all of the saved cc's so they won't be used.  */
+#define NOTICE_UPDATE_CC(EXP,INSN) notice_update_cc (EXP, INSN)
+
+/* The shift instructions always clear the overflow bit.  */
+#define CC_OVERFLOW_UNUSABLE 01000
+
+/* The shift instructions use the carry bit in a way not compatible with
+   conditional branches.  conditions.h uses CC_NO_OVERFLOW for this purpose.
+   Rename it to something more understandable.  */
+#define CC_NO_CARRY CC_NO_OVERFLOW
+
+#define OUTPUT_JUMP(NORMAL, FLOAT, NO_OV)  \
+do { if (cc_prev_status.flags & CC_IN_68881)			\
+    return FLOAT;						\
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)			\
+    return NO_OV;						\
+  return NORMAL; } while (0)
+
+/* Control the assembler format that we output.  */
+
+#define ASM_APP_ON "#APP\n"
+#define ASM_APP_OFF "#NO_APP\n"
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define GLOBAL_ASM_OP "\t.globl\t"
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX ""
+#define USER_LABEL_PREFIX "_"
+#define IMMEDIATE_PREFIX "#"
+
+#define REGISTER_NAMES \
+{REGISTER_PREFIX"d0", REGISTER_PREFIX"d1", REGISTER_PREFIX"d2",	\
+ REGISTER_PREFIX"d3", REGISTER_PREFIX"d4", REGISTER_PREFIX"d5",	\
+ REGISTER_PREFIX"d6", REGISTER_PREFIX"d7",			\
+ REGISTER_PREFIX"a0", REGISTER_PREFIX"a1", REGISTER_PREFIX"a2", \
+ REGISTER_PREFIX"a3", REGISTER_PREFIX"a4", REGISTER_PREFIX"a5", \
+ REGISTER_PREFIX"a6", REGISTER_PREFIX"sp",			\
+ REGISTER_PREFIX"fp0", REGISTER_PREFIX"fp1", REGISTER_PREFIX"fp2", \
+ REGISTER_PREFIX"fp3", REGISTER_PREFIX"fp4", REGISTER_PREFIX"fp5", \
+ REGISTER_PREFIX"fp6", REGISTER_PREFIX"fp7", REGISTER_PREFIX"argptr" }
+
+#define M68K_FP_REG_NAME REGISTER_PREFIX"fp"
+
+/* Return a register name by index, handling %fp nicely.
+   We don't replace %fp for targets that don't map it to %a6
+   since it may confuse GAS.  */
+#define M68K_REGNAME(r) ( \
+  ((FRAME_POINTER_REGNUM == A6_REG) \
+    && ((r) == FRAME_POINTER_REGNUM) \
+    && frame_pointer_needed) ? \
+    M68K_FP_REG_NAME : reg_names[(r)])
+
+/* On the Sun-3, the floating point registers have numbers
+   18 to 25, not 16 to 23 as they do in the compiler.  */
+#define DBX_REGISTER_NUMBER(REGNO) ((REGNO) < 16 ? (REGNO) : (REGNO) + 2)
+
+/* Before the prologue, RA is at 0(%sp).  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* After the prologue, RA is at 4(AP) in the current frame.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)					   \
+  ((COUNT) == 0								   \
+   ? gen_rtx_MEM (Pmode, plus_constant (Pmode, arg_pointer_rtx,	   \
+					UNITS_PER_WORD))		   \
+   : gen_rtx_MEM (Pmode, plus_constant (Pmode, FRAME, UNITS_PER_WORD)))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) \
+  (INT_REGNO_P (REG) || FP_REGNO_P (REG) ? (REG) : INVALID_REGNUM)
+
+/* The return column was originally 24, but gcc used 25 for a while too.
+   Define both registers 24 and 25 as Pmode ones and use 24 in our own
+   unwind information.  */
+#define DWARF_FRAME_REGISTERS 25
+#define DWARF_FRAME_RETURN_COLUMN 24
+#define DWARF_ALT_FRAME_RETURN_COLUMN 25
+
+/* Before the prologue, the top of the frame is at 4(%sp).  */
+#define INCOMING_FRAME_SP_OFFSET 4
+
+#define EPILOGUE_USES(REGNO) m68k_epilogue_uses (REGNO)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 2 ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, A0_REG)
+#define EH_RETURN_HANDLER_RTX					    \
+  gen_rtx_MEM (Pmode,						    \
+	       gen_rtx_PLUS (Pmode, arg_pointer_rtx,		    \
+			     plus_constant (Pmode, EH_RETURN_STACKADJ_RTX, \
+					    UNITS_PER_WORD)))
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   TARGET_ID_SHARED_LIBRARY and TARGET_SEP_DATA are designed to support
+   a read-only text segment without imposing a fixed gap between the
+   text and data segments.  As a result, the text segment cannot refer
+   to anything in the data segment, even in PC-relative form.  Because
+   .eh_frame refers to both code and data, it follows that .eh_frame
+   must be in the data segment itself, and that the offset between
+   .eh_frame and code will not be a link-time constant.
+
+   In theory, we could create a read-only .eh_frame by using DW_EH_PE_pcrel
+   | DW_EH_PE_indirect for all code references.  However, gcc currently
+   handles indirect references using a per-TU constant pool.  This means
+   that if a function and its eh_frame are removed by the linker, the
+   eh_frame's indirect references to the removed function will not be
+   removed, leading to an unresolved symbol error.
+
+   It isn't clear that any -msep-data or -mid-shared-library target
+   would benefit from a read-only .eh_frame anyway.  In particular,
+   no known target that supports these options has a feature like
+   PT_GNU_RELRO.  Without any such feature to motivate them, indirect
+   references would be unnecessary bloat, so we simply use an absolute
+   pointer for code and global references.  We still use pc-relative
+   references to data, as this avoids a relocation.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			   \
+  (flag_pic								   \
+   && !((TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)			   \
+	&& ((GLOBAL) || (CODE)))					   \
+   ? ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \
+   : DW_EH_PE_absptr)
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  asm_fprintf (FILE, "%U%s", NAME)
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long)(NUM))
+
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)			\
+  asm_fprintf (FILE, (MOTOROLA				\
+		      ? "\tmove.l %s,-(%Rsp)\n"		\
+		      : "\tmovel %s,%Rsp@-\n"),		\
+	       reg_names[REGNO])
+
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)			\
+  asm_fprintf (FILE, (MOTOROLA				\
+		      ? "\tmove.l (%Rsp)+,%s\n"		\
+		      : "\tmovel %Rsp@+,%s\n"),		\
+	       reg_names[REGNO])
+
+/* The m68k does not use absolute case-vectors, but we must define this macro
+   anyway.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  asm_fprintf (FILE, "\t.long %LL%d\n", VALUE)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
+  asm_fprintf (FILE, "\t.word %LL%d-%LL%d\n", VALUE, REL)
+
+/* We don't have a way to align to more than a two-byte boundary, so do the
+   best we can and don't complain.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) >= 1)			\
+    fprintf (FILE, "\t.even\n");
+
+#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN
+/* Use "move.l %a4,%a4" to advance within code.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG)			\
+  if ((LOG) > 0)						\
+    fprintf ((FILE), "\t.balignw %u,0x284c\n", 1 << (LOG));
+#endif
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.skip %u\n", (int)(SIZE))
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  m68k_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+/* On the 68000, we use several CODE characters:
+   '.' for dot needed in Motorola-style opcode names.
+   '-' for an operand pushing on the stack:
+       sp@-, -(sp) or -(%sp) depending on the style of syntax.
+   '+' for an operand pushing on the stack:
+       sp@+, (sp)+ or (%sp)+ depending on the style of syntax.
+   '@' for a reference to the top word on the stack:
+       sp@, (sp) or (%sp) depending on the style of syntax.
+   '#' for an immediate operand prefix (# in MIT and Motorola syntax
+       but & in SGS syntax).
+   '!' for the fpcr register (used in some float-to-fixed conversions).
+   '$' for the letter `s' in an op code, but only on the 68040.
+   '&' for the letter `d' in an op code, but only on the 68040.
+   '/' for register prefix needed by longlong.h.
+   '?' for m68k_library_id_string
+
+   'b' for byte insn (no effect, on the Sun; this is for the ISI).
+   'd' to force memory addressing to be absolute, not relative.
+   'f' for float insn (print a CONST_DOUBLE as a float rather than in hex)
+   'x' for float insn (print a CONST_DOUBLE as a float rather than in hex),
+       or print pair of registers as rx:ry.  */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)				\
+  ((CODE) == '.' || (CODE) == '#' || (CODE) == '-'			\
+   || (CODE) == '+' || (CODE) == '@' || (CODE) == '!'			\
+   || (CODE) == '$' || (CODE) == '&' || (CODE) == '/' || (CODE) == '?')
+
+
+/* See m68k.c for the m68k specific codes.  */
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+#include "config/m68k/m68k-opts.h"
+
+enum fpu_type
+{
+  FPUTYPE_NONE,
+  FPUTYPE_68881,
+  FPUTYPE_COLDFIRE
+};
+
+enum m68k_function_kind
+{
+  m68k_fk_normal_function,
+  m68k_fk_interrupt_handler,
+  m68k_fk_interrupt_thread
+};
+
+/* Variables in m68k.c; see there for details.  */
+extern enum target_device m68k_cpu;
+extern enum uarch_type m68k_tune;
+extern enum fpu_type m68k_fpu;
+extern unsigned int m68k_cpu_flags;
+extern unsigned int m68k_tune_flags;
+extern const char *m68k_symbolic_call;
+extern const char *m68k_symbolic_jump;
+
+enum M68K_SYMBOLIC_CALL { M68K_SYMBOLIC_CALL_NONE, M68K_SYMBOLIC_CALL_JSR,
+			  M68K_SYMBOLIC_CALL_BSR_C, M68K_SYMBOLIC_CALL_BSR_P };
+
+extern enum M68K_SYMBOLIC_CALL m68k_symbolic_call_var;
+
+/* ??? HOST_WIDE_INT is not being defined for auto-generated files.
+   Workaround that.  */
+#ifdef HOST_WIDE_INT
+typedef enum { MOVL, SWAP, NEGW, NOTW, NOTB, MOVQ, MVS, MVZ }
+  M68K_CONST_METHOD;
+
+extern M68K_CONST_METHOD m68k_const_method (HOST_WIDE_INT);
+#endif
+
+extern void m68k_emit_move_double (rtx [2]);
+
+extern int m68k_sched_address_bypass_p (rtx, rtx);
+extern int m68k_sched_indexed_address_bypass_p (rtx, rtx);
+
+#define CPU_UNITS_QUERY 1
diff --git a/gcc-4.9/gcc/config/m68k/m68k.md b/gcc-4.9/gcc/config/m68k/m68k.md
new file mode 100644
index 000000000..e61048b4d
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k.md
@@ -0,0 +1,7585 @@
+;;- Machine description for GNU compiler, Motorola 68000 Version
+;;  Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- Information about MCF5200 port.
+
+;;- The MCF5200 "ColdFire" architecture is a reduced version of the
+;;- 68k ISA.  Differences include reduced support for byte and word
+;;- operands and the removal of BCD, bitfield, rotate, and integer
+;;- divide instructions.  The TARGET_COLDFIRE flag turns the use of the
+;;- removed opcodes and addressing modes off.
+;;- 
+
+
+;;- instruction definitions
+
+;;- @@The original PO technology requires these to be ordered by speed,
+;;- @@    so that assigner will pick the fastest.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;- When naming insn's (operand 0 of define_insn) be careful about using
+;;- names from other targets machine descriptions.
+
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;;- Operand classes for the register allocator:
+;;- 'a' one of the address registers can be used.
+;;- 'd' one of the data registers can be used.
+;;- 'f' one of the m68881/fpu registers can be used
+;;- 'r' either a data or an address register can be used.
+
+;;- Immediate Floating point operator constraints
+;;- 'G' a floating point constant that is *NOT* one of the standard
+;;   68881 constant values (to force calling output_move_const_double
+;;   to get it from rom if it is a 68881 constant).
+;;
+;;   See the functions standard_XXX_constant_p in output-m68k.c for more
+;; info.
+
+;;- Immediate integer operand constraints:
+;;- 'I'  1 .. 8
+;;- 'J'  -32768 .. 32767
+;;- 'K'  all integers EXCEPT -128 .. 127
+;;- 'L'  -8 .. -1
+;;- 'M'  all integers EXCEPT -256 .. 255
+;;- 'N'  24 .. 31
+;;- 'O'  16
+;;- 'P'  8 .. 15
+
+;;- Assembler specs:
+;;- "%."    size separator ("." or "")			move%.l d0,d1
+;;- "%-"    push operand "sp@-"				move%.l d0,%-
+;;- "%+"    pop operand "sp@+"				move%.l d0,%+
+;;- "%@"    top of stack "sp@"				move%.l d0,%@
+;;- "%!"    fpcr register
+;;- "%$"    single-precision fp specifier ("s" or "")	f%$add.x fp0,fp1
+;;- "%&"    double-precision fp specifier ("d" or "")	f%&add.x fp0,fp1
+
+;;- Information about 68040 port.
+
+;;- The 68040 executes all 68030 and 68881/2 instructions, but some must
+;;- be emulated in software by the OS.  It is faster to avoid these
+;;- instructions and issue a library call rather than trapping into
+;;- the kernel.  The affected instructions are fintrz and fscale.  The
+;;- TUNE_68040 flag turns the use of the opcodes off.
+
+;;- The '040 also implements a set of new floating-point instructions
+;;- which specify the rounding precision in the opcode.  This finally
+;;- permit the 68k series to be truly IEEE compliant, and solves all
+;;- issues of excess precision accumulating in the extended registers.
+;;- By default, GCC does not use these instructions, since such code will
+;;- not run on an '030.  To use these instructions, use the -m68040-only
+;;- switch.
+
+;;- These new instructions aren't directly in the md.  They are brought
+;;- into play by defining "%$" and "%&" to expand to "s" and "d" rather
+;;- than "".
+
+;;- Information about 68060 port.
+
+;;- The 68060 executes all 68030 and 68881/2 instructions, but some must
+;;- be emulated in software by the OS.  It is faster to avoid these
+;;- instructions and issue a library call rather than trapping into
+;;- the kernel.  The affected instructions are: divs.l <ea>,Dr:Dq;
+;;- divu.l <ea>,Dr:Dq; muls.l <ea>,Dr:Dq; mulu.l <ea>,Dr:Dq; and
+;;- fscale.  The TUNE_68060 flag turns the use of the opcodes off.
+
+;;- Some of these insn's are composites of several m68000 op codes.
+;;- The assembler (or final @@??) insures that the appropriate one is
+;;- selected.
+
+;; UNSPEC usage:
+
+(define_constants
+  [(UNSPEC_SIN 1)
+   (UNSPEC_COS 2)
+   (UNSPEC_GOT 3)
+   (UNSPEC_IB 4)
+   (UNSPEC_TIE 5)
+   (UNSPEC_RELOC16 6)
+   (UNSPEC_RELOC32 7)
+  ])
+
+;; UNSPEC_VOLATILE usage:
+
+(define_constants
+  [(UNSPECV_BLOCKAGE	0)
+   (UNSPECV_CAS_1	1)
+   (UNSPECV_CAS_2	2)
+   (UNSPECV_TAS_1	3)
+   (UNSPECV_TAS_2	4)
+  ])
+
+;; Registers by name.
+(define_constants
+  [(D0_REG		0)
+   (A0_REG		8)
+   (A1_REG		9)
+   (PIC_REG		13)
+   (A6_REG		14)
+   (SP_REG		15)
+   (FP0_REG		16)
+  ])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; Processor type.
+(define_attr "cpu" "cfv1, cfv2, cfv3, cfv4, unknown"
+  (const (symbol_ref "m68k_sched_cpu")))
+
+;; MAC type.
+(define_attr "mac" "no, cf_mac, cf_emac"
+  (const (symbol_ref "m68k_sched_mac")))
+
+;; Instruction type for use in scheduling description.
+;; _l and _w suffixes indicate size of the operands of instruction.
+;; alu - usual arithmetic or logic instruction.
+;; aluq - arithmetic or logic instruction which has a quick immediate (the one
+;;        that is encoded in the instruction word) for its Y operand.
+;; alux - Arithmetic instruction that uses carry bit (e.g., addx and subx).
+;; bcc - conditional branch.
+;; bitr - bit operation that only updates flags.
+;; bitrw - bit operation that updates flags and output operand.
+;; bra, bsr, clr, cmp, div, ext - corresponding instruction.
+;; falu, fbcc, fcmp, fdiv, fmove, fmul, fneg, fsqrt, ftst - corresponding
+;;                                                          instruction.
+;; ib - fake instruction to subscribe slots in ColdFire V1,V2,V3 instruction
+;;      buffer.
+;; ignore - fake instruction.
+;; jmp, jsr, lea, link, mov3q, move, moveq, mul - corresponding instruction.
+;; mvsz - mvs or mvz instruction.
+;; neg, nop, pea, rts, scc - corresponding instruction.
+;; shift - arithmetic or logical shift instruction.
+;; trap, tst, unlk - corresponding instruction.
+(define_attr "type"
+  "alu_l,aluq_l,alux_l,bcc,bitr,bitrw,bra,bsr,clr,clr_l,cmp,cmp_l,
+   div_w,div_l,ext,
+   falu,fbcc,fcmp,fdiv,fmove,fmul,fneg,fsqrt,ftst,
+   ib,ignore,
+   jmp,jsr,lea,link,mov3q_l,move,move_l,moveq_l,mul_w,mul_l,mvsz,neg_l,nop,
+   pea,rts,scc,shift,
+   trap,tst,tst_l,unlk,
+   unknown"
+  (const_string "unknown"))
+
+;; Index of the X or Y operand in recog_data.operand[].
+;; Should be used only within opx_type and opy_type.
+(define_attr "opx" "" (const_int 0))
+(define_attr "opy" "" (const_int 1))
+
+;; Type of the Y operand.
+;; See m68k.c: enum attr_op_type.
+(define_attr "opy_type"
+  "none,Rn,FPn,mem1,mem234,mem5,mem6,mem7,imm_q,imm_w,imm_l"
+  (cond [(eq_attr "type" "ext,fbcc,ftst,neg_l,bcc,bra,bsr,clr,clr_l,ib,ignore,
+                          jmp,jsr,nop,rts,scc,trap,tst,tst_l,
+                          unlk,unknown") (const_string "none")
+	 (eq_attr "type" "lea,pea")
+	 (symbol_ref "m68k_sched_attr_opy_type (insn, 1)")]
+	(symbol_ref "m68k_sched_attr_opy_type (insn, 0)")))
+
+;; Type of the X operand.
+;; See m68k.c: enum attr_op_type.
+(define_attr "opx_type"
+  "none,Rn,FPn,mem1,mem234,mem5,mem6,mem7,imm_q,imm_w,imm_l"
+  (cond [(eq_attr "type" "ib,ignore,nop,rts,trap,unlk,
+                          unknown") (const_string "none")
+	 (eq_attr "type" "pea") (const_string "mem1")
+	 (eq_attr "type" "jmp,jsr")
+	 (symbol_ref "m68k_sched_attr_opx_type (insn, 1)")]
+	(symbol_ref "m68k_sched_attr_opx_type (insn, 0)")))
+
+;; Access to the X operand: none, read, write, read/write, unknown.
+;; Access to the Y operand is either none (if opy_type is none)
+;; or read otherwise.
+(define_attr "opx_access" "none, r, w, rw"
+  (cond [(eq_attr "type" "ib,ignore,nop,rts,trap,unlk,
+                          unknown") (const_string "none")
+	 (eq_attr "type" "bcc,bra,bsr,bitr,cmp,cmp_l,fbcc,fcmp,ftst,
+                          jmp,jsr,tst,tst_l") (const_string "r")
+	 (eq_attr "type" "clr,clr_l,fneg,fmove,lea,
+                          mov3q_l,move,move_l,moveq_l,mvsz,
+                          pea,scc") (const_string "w")
+	 (eq_attr "type" "alu_l,aluq_l,alux_l,bitrw,div_w,div_l,ext,
+                          falu,fdiv,fmul,fsqrt,link,mul_w,mul_l,
+                          neg_l,shift") (const_string "rw")]
+	;; Should never be used.
+	(symbol_ref "(gcc_unreachable (), OPX_ACCESS_NONE)")))
+
+;; Memory accesses of the insn.
+;; 00 - no memory references
+;; 10 - memory is read
+;; i0 - indexed memory is read
+;; 01 - memory is written
+;; 0i - indexed memory is written
+;; 11 - memory is read, memory is written
+;; i1 - indexed memory is read, memory is written
+;; 1i - memory is read, indexed memory is written
+(define_attr "op_mem" "00, 10, i0, 01, 0i, 11, i1, 1i"
+  (symbol_ref "m68k_sched_attr_op_mem (insn)"))
+
+;; Instruction size in words.
+(define_attr "size" "1,2,3"
+  (symbol_ref "m68k_sched_attr_size (insn)"))
+
+;; Alternative is OK for ColdFire.
+(define_attr "ok_for_coldfire" "yes,no" (const_string "yes"))
+
+;; Define 'enabled' attribute.
+(define_attr "enabled" ""
+  (cond [(and (match_test "TARGET_COLDFIRE")
+	      (eq_attr "ok_for_coldfire" "no"))
+	 (const_int 0)]
+ 	(const_int 1)))
+
+;; Mode macros for integer operations.
+(define_mode_iterator I [QI HI SI])
+(define_mode_attr sz [(QI "%.b") (HI "%.w") (SI "%.l")])
+
+;; Mode macros for floating point operations.
+;; Valid floating point modes
+(define_mode_iterator FP [SF DF (XF "TARGET_68881")])
+;; Mnemonic infix to round result
+(define_mode_attr round [(SF "%$") (DF "%&") (XF "")])
+;; Mnemonic infix to round result for mul or div instruction
+(define_mode_attr round_mul [(SF "sgl") (DF "%&") (XF "")])
+;; Suffix specifying source operand format
+(define_mode_attr prec [(SF "s") (DF "d") (XF "x")])
+;; Allowable D registers
+(define_mode_attr dreg [(SF "d") (DF "") (XF "")])
+;; Allowable 68881 constant constraints
+(define_mode_attr const [(SF "F") (DF "G") (XF "")])
+
+
+(define_insn_and_split "*movdf_internal"
+  [(set (match_operand:DF 0 "push_operand"   "=m, m")
+	(match_operand:DF 1 "general_operand" "f, ro<>E"))]
+  ""
+  "@
+   fmove%.d %f1,%0
+   #"
+  "&& reload_completed && (extract_constrain_insn_cached (insn), which_alternative == 1)"
+  [(const_int 0)]
+{
+  m68k_emit_move_double (operands);
+  DONE;
+}
+  [(set_attr "type" "fmove,*")])
+
+(define_insn_and_split "pushdi"
+  [(set (match_operand:DI 0 "push_operand" "=m")
+	(match_operand:DI 1 "general_operand" "ro<>Fi"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  m68k_emit_move_double (operands);
+  DONE;
+})
+
+;; We don't want to allow a constant operand for test insns because
+;; (set (cc0) (const_int foo)) has no mode information.  Such insns will
+;; be folded while optimizing anyway.
+
+(define_insn "tstdi"
+  [(set (cc0)
+	(compare (match_operand:DI 0 "nonimmediate_operand" "am,d")
+		 (const_int 0)))
+   (clobber (match_scratch:SI 1 "=X,d"))
+   (clobber (match_scratch:DI 2 "=d,X"))]
+  ""
+{
+  if (which_alternative == 0)
+    {
+      rtx xoperands[2];
+
+      xoperands[0] = operands[2];
+      xoperands[1] = operands[0];
+      output_move_double (xoperands);
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "neg%.l %R2\;negx%.l %2";
+    }
+  if (find_reg_note (insn, REG_DEAD, operands[0]))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "neg%.l %R0\;negx%.l %0";
+    }
+  else
+    /*
+       'sub' clears %1, and also clears the X cc bit
+       'tst' sets the Z cc bit according to the low part of the DImode operand
+       'subx %1' (i.e. subx #0) acts as a (non-existent) tstx on the high part.
+    */
+    return "sub%.l %1,%1\;tst%.l %R0\;subx%.l %1,%0";
+})
+
+;; If you think that the 68020 does not support tstl a0,
+;; reread page B-167 of the 68020 manual more carefully.
+(define_insn "*tstsi_internal_68020_cf"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "rm")
+		 (const_int 0)))]
+  "TARGET_68020 || TARGET_COLDFIRE"
+  "tst%.l %0"
+  [(set_attr "type" "tst_l")])
+
+;; On an address reg, cmpw may replace cmpl.
+(define_insn "*tstsi_internal"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "dm,r")
+		 (const_int 0)))]
+  "!(TARGET_68020 || TARGET_COLDFIRE)"
+  "@
+   tst%.l %0
+   cmp%.w #0,%0"
+  [(set_attr "type" "tst_l,cmp")])
+
+;; This can't use an address register, because comparisons
+;; with address registers as second operand always test the whole word.
+(define_insn "*tsthi_internal"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "nonimmediate_operand" "dm")
+		 (const_int 0)))]
+  ""
+  "tst%.w %0"
+  [(set_attr "type" "tst")])
+
+(define_insn "*tstqi_internal"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "nonimmediate_operand" "dm")
+		 (const_int 0)))]
+  ""
+  "tst%.b %0"
+  [(set_attr "type" "tst")])
+
+(define_insn "tst<mode>_68881"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "general_operand" "f<FP:dreg>m")
+		 (match_operand:FP 1 "const0_operand" "H")))]
+  "TARGET_68881"
+{
+  cc_status.flags = CC_IN_68881;
+  if (FP_REG_P (operands[0]))
+    return "ftst%.x %0";
+  return "ftst%.<FP:prec> %0";
+}
+  [(set_attr "type" "ftst")])
+
+(define_insn "tst<mode>_cf"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "general_operand" "f<FP:dreg><Q>U")
+		 (match_operand:FP 1 "const0_operand" "H")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  cc_status.flags = CC_IN_68881;
+  if (FP_REG_P (operands[0]))
+    return "ftst%.d %0";
+  return "ftst%.<FP:prec> %0";
+}
+  [(set_attr "type" "ftst")])
+
+
+;; compare instructions.
+
+(define_insn "*cmpdi_internal"
+ [(set (cc0)
+       (compare (match_operand:DI 1 "nonimmediate_operand" "0,d")
+                (match_operand:DI 2 "general_operand" "d,0")))
+  (clobber (match_scratch:DI 0 "=d,d"))]
+  ""
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    return "sub%.l %R2,%R0\;subx%.l %2,%0";
+  else
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "sub%.l %R1,%R0\;subx%.l %1,%0";
+    }
+})
+
+(define_insn "cmpdi"
+ [(set (cc0)
+       (compare (match_operand:DI 0 "nonimmediate_operand")
+                (match_operand:DI 1 "general_operand")))
+  (clobber (match_scratch:DI 2))]
+  ""
+  "")
+
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:DI 1 "nonimmediate_operand")
+			(match_operand:DI 2 "general_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  if (operands[2] == const0_rtx)
+    emit_insn (gen_tstdi (operands[1]));
+  else
+    emit_insn (gen_cmpdi (operands[1], operands[2]));
+  operands[1] = cc0_rtx;
+  operands[2] = const0_rtx;
+})
+
+(define_expand "cstoredi4"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(match_operand:DI 2 "nonimmediate_operand")
+	  (match_operand:DI 3 "general_operand")]))]
+  ""
+{
+  if (operands[3] == const0_rtx)
+    emit_insn (gen_tstdi (operands[2]));
+  else
+    emit_insn (gen_cmpdi (operands[2], operands[3]));
+  operands[2] = cc0_rtx;
+  operands[3] = const0_rtx;
+})
+
+
+(define_expand "cbranchsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cstoresi4"
+  [(set (cc0)
+	(compare (match_operand:SI 2 "nonimmediate_operand" "")
+		 (match_operand:SI 3 "general_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+
+;; A composite of the cmp, cmpa, cmpi & cmpm m68000 op codes.
+(define_insn ""
+  [(set (cc0)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "rKT,rKs,mSr,mSa,>")
+                 (match_operand:SI 1 "general_src_operand" "mSr,mSa,KTr,Ksr,>")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    return "cmpm%.l %1,%0";
+  if (REG_P (operands[1])
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.l %d0,%d1";
+    }
+  if (ADDRESS_REG_P (operands[0])
+      && GET_CODE (operands[1]) == CONST_INT
+      && INTVAL (operands[1]) < 0x8000
+      && INTVAL (operands[1]) >= -0x8000)
+    return "cmp%.w %1,%0";
+  return "cmp%.l %d1,%d0";
+})
+
+(define_insn "*cmpsi_cf"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "mrKs,r")
+		 (match_operand:SI 1 "general_operand" "r,mrKs")))]
+  "TARGET_COLDFIRE"
+{
+  if (REG_P (operands[1])
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.l %d0,%d1";
+    }
+  return "cmp%.l %d1,%d0";
+}
+  [(set_attr "type" "cmp_l")])
+
+(define_expand "cbranchhi4"
+  [(set (cc0)
+	(compare (match_operand:HI 1 "nonimmediate_src_operand" "")
+		 (match_operand:HI 2 "m68k_subword_comparison_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cstorehi4"
+  [(set (cc0)
+	(compare (match_operand:HI 2 "nonimmediate_operand" "")
+		 (match_operand:HI 3 "m68k_subword_comparison_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (cc0)
+        (compare (match_operand:HI 0 "nonimmediate_src_operand" "rnmS,d,n,mS,>")
+                 (match_operand:HI 1 "general_src_operand" "d,rnmS,mS,n,>")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    return "cmpm%.w %1,%0";
+  if ((REG_P (operands[1]) && !ADDRESS_REG_P (operands[1]))
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.w %d0,%d1";
+    }
+  return "cmp%.w %d1,%d0";
+})
+
+(define_expand "cbranchqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 1 "nonimmediate_src_operand" "")
+		 (match_operand:QI 2 "m68k_subword_comparison_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cstoreqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 2 "nonimmediate_src_operand" "")
+		 (match_operand:QI 3 "m68k_subword_comparison_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (cc0)
+        (compare (match_operand:QI 0 "nonimmediate_src_operand" "dn,dmS,>")
+                 (match_operand:QI 1 "general_src_operand" "dmS,nd,>")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    return "cmpm%.b %1,%0";
+  if (REG_P (operands[1])
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.b %d0,%d1";
+    }
+  return "cmp%.b %d1,%d0";
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+	(compare (match_operand:FP 1 "register_operand" "")
+		 (match_operand:FP 2 "fp_src_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_expand "cstore<mode>4"
+  [(set (cc0)
+	(compare (match_operand:FP 2 "register_operand" "")
+		 (match_operand:FP 3 "fp_src_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "m68k_cstore_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  "TARGET_HARD_FLOAT && !(TUNE_68060 || TARGET_COLDFIRE_FPU)"
+  "if (TARGET_COLDFIRE && operands[2] != const0_rtx)
+     FAIL;")
+
+(define_insn "*cmp<mode>_68881"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "fp_src_operand" "f,f,<FP:dreg>mF")
+		 (match_operand:FP 1 "fp_src_operand" "f,<FP:dreg>mF,f")))]
+  "TARGET_68881
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   fcmp%.x %1,%0
+   fcmp%.<FP:prec> %f1,%0
+   fcmp%.<FP:prec> %0,%f1"
+  [(set_attr "type" "fcmp")])
+
+(define_insn "*cmp<mode>_cf"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "fp_src_operand" "f,f,<FP:dreg><Q>U")
+		 (match_operand:FP 1 "fp_src_operand" "f,<FP:dreg><Q>U,f")))]
+  "TARGET_COLDFIRE_FPU
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   fcmp%.d %1,%0
+   fcmp%.<FP:prec> %f1,%0
+   fcmp%.<FP:prec> %0,%f1"
+  [(set_attr "type" "fcmp")])
+
+;; Recognizers for btst instructions.
+
+;; ColdFire/5200 only allows "<Q>" type addresses when the bit position is
+;; specified as a constant, so we must disable all patterns that may extract
+;; from a MEM at a constant bit position if we can't use this as a constraint.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_src_operand" "oS")
+			       (const_int 1)
+			       (minus:SI (const_int 7)
+				         (match_operand:SI 1 "general_operand" "di")))
+	     (const_int 0)))]
+  "!TARGET_COLDFIRE"
+{
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+;; This is the same as the above pattern except for the constraints.  The 'i'
+;; has been deleted.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "o")
+			       (const_int 1)
+			       (minus:SI (const_int 7)
+				         (match_operand:SI 1 "general_operand" "d")))
+	     (const_int 0)))]
+  "TARGET_COLDFIRE"
+{
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "d")
+			       (const_int 1)
+			       (minus:SI (const_int 31)
+				         (match_operand:SI 1 "general_operand" "di")))
+	     (const_int 0)))]
+  ""
+{
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+;; The following two patterns are like the previous two
+;; except that they use the fact that bit-number operands
+;; are automatically masked to 3 or 5 bits.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "o")
+			       (const_int 1)
+			       (minus:SI (const_int 7)
+				         (and:SI
+				          (match_operand:SI 1 "register_operand" "d")
+				          (const_int 7))))
+	     (const_int 0)))]
+  ""
+{
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "d")
+			       (const_int 1)
+			       (minus:SI (const_int 31)
+				         (and:SI
+				          (match_operand:SI 1 "register_operand" "d")
+				          (const_int 31))))
+	     (const_int 0)))]
+  ""
+{
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+;; Nonoffsettable mem refs are ok in this one pattern
+;; since we don't try to adjust them.
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "m")
+			      (const_int 1)
+			      (match_operand:SI 1 "const_int_operand" "n"))
+	     (const_int 0)))]
+  "(unsigned) INTVAL (operands[1]) < 8 && !TARGET_COLDFIRE"
+{
+  operands[1] = GEN_INT (7 - INTVAL (operands[1]));
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "do")
+			      (const_int 1)
+			    (match_operand:SI 1 "const_int_operand" "n"))
+	     (const_int 0)))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      operands[0] = adjust_address (operands[0], QImode,
+				    INTVAL (operands[1]) / 8);
+      operands[1] = GEN_INT (7 - INTVAL (operands[1]) % 8);
+      return output_btst (operands, operands[1], operands[0], insn, 7);
+    }
+  operands[1] = GEN_INT (31 - INTVAL (operands[1]));
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+;; This is the same as the above pattern except for the constraints.
+;; The 'o' has been replaced with 'Q'.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "dQ")
+			      (const_int 1)
+			      (match_operand:SI 1 "const_int_operand" "n"))
+	     (const_int 0)))]
+  "TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      operands[0] = adjust_address (operands[0], QImode,
+				    INTVAL (operands[1]) / 8);
+      operands[1] = GEN_INT (7 - INTVAL (operands[1]) % 8);
+      return output_btst (operands, operands[1], operands[0], insn, 7);
+    }
+  operands[1] = GEN_INT (31 - INTVAL (operands[1]));
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+
+;; move instructions
+
+;; A special case in which it is not desirable
+;; to reload the constant into a data register.
+(define_insn "pushexthisi_const"
+  [(set (match_operand:SI 0 "push_operand" "=m,m,m")
+	(match_operand:SI 1 "const_int_operand" "C0,R,J"))]
+  "INTVAL (operands[1]) >= -0x8000 && INTVAL (operands[1]) < 0x8000"
+  "@
+   clr%.l %0
+   mov3q%.l %1,%-
+   pea %a1"
+  [(set_attr "type" "clr_l,mov3q_l,pea")])
+
+;This is never used.
+;(define_insn "swapsi"
+;  [(set (match_operand:SI 0 "nonimmediate_operand" "+r")
+;	(match_operand:SI 1 "general_operand" "+r"))
+;   (set (match_dup 1) (match_dup 0))]
+;  ""
+;  "exg %1,%0")
+
+;; Special case of fullword move when source is zero for 68000_10.
+;; moveq is faster on the 68000.
+(define_insn "*movsi_const0_68000_10"
+  [(set (match_operand:SI 0 "movsi_const0_operand" "=d,a,g")
+	(const_int 0))]
+  "TUNE_68000_10"
+  "@
+   moveq #0,%0
+   sub%.l %0,%0
+   clr%.l %0"
+  [(set_attr "type" "moveq_l,alu_l,clr_l")
+   (set_attr "opy" "*,0,*")])
+
+;; Special case of fullword move when source is zero for 68040_60.
+;; On the '040, 'subl an,an' takes 2 clocks while lea takes only 1
+(define_insn "*movsi_const0_68040_60"
+  [(set (match_operand:SI 0 "movsi_const0_operand" "=a,g")
+	(const_int 0))]
+  "TUNE_68040_60"
+{
+  if (which_alternative == 0)
+    return MOTOROLA ? "lea 0.w,%0" : "lea 0:w,%0";
+  else if (which_alternative == 1)
+    return "clr%.l %0";
+  else
+    {
+      gcc_unreachable ();
+      return "";
+    }
+}
+  [(set_attr "type" "lea,clr_l")])
+
+;; Special case of fullword move when source is zero.
+(define_insn "*movsi_const0"
+  [(set (match_operand:SI 0 "movsi_const0_operand" "=a,g")
+	(const_int 0))]
+  "!(TUNE_68000_10 || TUNE_68040_60)"
+  "@
+   sub%.l %0,%0
+   clr%.l %0"
+  [(set_attr "type" "alu_l,clr_l")
+   (set_attr "opy" "0,*")])
+
+;; General case of fullword move.
+;;
+;; This is the main "hook" for PIC code.  When generating
+;; PIC, movsi is responsible for determining when the source address
+;; needs PIC relocation and appropriately calling legitimize_pic_address
+;; to perform the actual relocation.
+;;
+;; In both the PIC and non-PIC cases the patterns generated will
+;; matched by the next define_insn.
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "" "")
+	(match_operand:SI 1 "" ""))]
+  ""
+{
+  rtx tmp, base, offset;
+
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (!TARGET_PCREL && m68k_tls_reference_p (operands[1], false))
+    {
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+        {
+          addend = XEXP (XEXP (tmp, 0), 1);
+          tmp = XEXP (XEXP (tmp, 0), 0);
+        }
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0);
+
+      tmp = m68k_legitimize_tls_address (tmp);
+
+      if (addend)
+        {
+	  if (!REG_P (tmp))
+	    {
+	      rtx reg;
+
+	      reg = gen_reg_rtx (Pmode);
+	      emit_move_insn (reg, tmp);
+	      tmp = reg;
+	    }
+
+          tmp = gen_rtx_PLUS (SImode, tmp, addend);
+	}
+
+      operands[1] = tmp;
+    }
+  else if (flag_pic && !TARGET_PCREL && symbolic_operand (operands[1], SImode))
+    {
+      /* The source is an address which requires PIC relocation.
+         Call legitimize_pic_address with the source, mode, and a relocation
+         register (a new pseudo, or the final destination if reload_in_progress
+         is set).   Then fall through normally */
+      rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+      operands[1] = legitimize_pic_address (operands[1], SImode, temp);
+    }
+  else if (flag_pic && TARGET_PCREL && ! reload_in_progress)
+    {
+      /* Don't allow writes to memory except via a register;
+	 the m68k doesn't consider PC-relative addresses to be writable.  */
+      if (symbolic_operand (operands[0], SImode))
+	operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+      else if (GET_CODE (operands[0]) == MEM
+	       && symbolic_operand (XEXP (operands[0], 0), SImode))
+	operands[0] = gen_rtx_MEM (SImode,
+			       force_reg (SImode, XEXP (operands[0], 0)));
+    }
+  if (M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (operands[1], &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	{
+	  tmp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (SImode);
+	  emit_move_insn (tmp, base);
+	  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+	  DONE;
+	}
+    }
+})
+
+;; General case of fullword move.
+(define_insn "*movsi_m68k"
+  ;; Notes: make sure no alternative allows g vs g.
+  ;; We don't allow f-regs since fixed point cannot go in them.
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g,d,a<")
+        (match_operand:SI 1 "general_src_operand" "damSnT,n,i"))]
+  "!TARGET_COLDFIRE && reload_completed"
+{
+  return output_move_simode (operands);
+})
+
+;; Before reload is completed the register constraints
+;; force integer constants in range for a moveq to be reloaded
+;; if they are headed for memory.
+(define_insn "*movsi_m68k2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g,d,a<")
+        (match_operand:SI 1 "general_src_operand" "damSKT,n,i"))]
+
+  "!TARGET_COLDFIRE"
+{
+  return output_move_simode (operands);
+})
+
+;; ColdFire move instructions can have at most one operand of mode >= 6.
+(define_insn "*movsi_cf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g,d, d, d, d, d, a,Ap,  a,  r<Q>,g,    U")
+	(match_operand:SI 1 "general_operand"      " R,CQ,CW,CZ,CS,Ci,J,J Cs,Cs, g,   Rr<Q>,U"))]
+  "TARGET_COLDFIRE"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov3q%.l %1,%0";
+
+    case 1:
+      return "moveq %1,%0";
+
+    case 2:
+      {
+	unsigned u = INTVAL (operands[1]);
+
+	operands[1] = GEN_INT ((u << 16) | (u >> 16));  /*|*/
+	return "moveq %1,%0\n\tswap %0";
+      }
+
+    case 3:
+      return "mvz%.w %1,%0";
+
+    case 4:
+      return "mvs%.w %1,%0";
+
+    case 5:
+      return "move%.l %1,%0";
+
+    case 6:
+      return "move%.w %1,%0";
+
+    case 7:
+      return "pea %a1";
+
+    case 8:
+      return "lea %a1,%0";
+
+    case 9:
+    case 10:
+    case 11:
+      return "move%.l %1,%0";
+
+    default:
+      gcc_unreachable ();
+      return "";
+    }
+}
+  [(set_attr "type" "mov3q_l,moveq_l,*,mvsz,mvsz,move_l,move,pea,lea,move_l,move_l,move_l")])
+
+;; Special case of fullword move, where we need to get a non-GOT PIC
+;; reference into an address register.
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a<")
+        (match_operand:SI 1 "pcrel_address" ""))]
+  "TARGET_PCREL"
+{
+  if (push_operand (operands[0], SImode))
+    return "pea %a1";
+  return "lea %a1,%0";
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+        (match_operand:HI 1 "general_src_operand" "gS"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_himode (operands);")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r<Q>,g,U")
+	(match_operand:HI 1 "general_operand" "g,r<Q>,U"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_himode (operands);")
+
+(define_expand "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
+        (match_operand:HI 1 "general_src_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(match_operand:HI 1 "general_src_operand" "rmSn"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_stricthi (operands);")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+d,m"))
+	(match_operand:HI 1 "general_src_operand" "rmn,r"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_stricthi (operands);")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (match_operand:QI 1 "general_src_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,*a,m")
+	(match_operand:QI 1 "general_src_operand" "dmSi*a,di*a,dmSi"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_qimode (operands);")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d<Q>,dm,U,d*a")
+	(match_operand:QI 1 "general_src_operand" "dmi,d<Q>,U,di*a"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_qimode (operands);")
+
+(define_expand "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+        (match_operand:QI 1 "general_src_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(match_operand:QI 1 "general_src_operand" "dmSn"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_strictqi (operands);")
+
+(define_insn "*movstrictqi_cf"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+d, Ac, d,m"))
+	(match_operand:QI 1 "general_src_operand"                    "C0,C0, dmn,d"))]
+  "TARGET_COLDFIRE"
+  "@
+   clr%.b %0
+   clr%.b %0
+   move%.b %1,%0
+   move%.b %1,%0"
+  [(set_attr "type" "clr,clr,move,move")])
+
+(define_expand "pushqi1"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -2)))
+   (set (mem:QI (plus:SI (reg:SI SP_REG) (const_int 1)))
+	(match_operand:QI 0 "general_operand" ""))]
+  "!TARGET_COLDFIRE"
+  "")
+
+(define_expand "reload_insf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f")
+        (match_operand:SF 1 "general_operand" "mf"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reload_outsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+        (match_operand:SF 1 "register_operand" "f"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rmf")
+	(match_operand:SF 1 "general_operand" "rmfF"))]
+  "!TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "f%$move%.x %1,%0";
+      else if (ADDRESS_REG_P (operands[1]))
+	return "move%.l %1,%-\;f%$move%.s %+,%0";
+      else if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_single (operands);
+      return "f%$move%.s %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return "fmove%.s %1,%-\;move%.l %+,%0";
+      return "fmove%.s %f1,%0";
+    }
+  if (operands[1] == CONST0_RTX (SFmode)
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	{
+	  /* On the '040, 'subl an,an' takes 2 clocks while lea takes only 1 */
+	  if (TUNE_68040_60)
+	    return MOTOROLA ? "lea 0.w,%0" : "lea 0:w,%0";
+	  else
+	    return "sub%.l %0,%0";
+	}
+      /* moveq is faster on the 68000.  */
+      if (DATA_REG_P (operands[0]) && TUNE_68000_10)
+	return "moveq #0,%0";
+      return "clr%.l %0";
+    }
+  return "move%.l %1,%0";
+})
+
+(define_insn "movsf_cf_soft"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r<Q>,g,U")
+	(match_operand:SF 1 "general_operand" "g,r<Q>,U"))]
+  "TARGET_COLDFIRE && !TARGET_COLDFIRE_FPU"
+  "move%.l %1,%0"
+  [(set_attr "type" "move_l")])
+
+;; SFmode MEMs are restricted to modes 2-4 if TARGET_COLDFIRE_FPU.
+;; The move instructions can handle all combinations.
+(define_insn "movsf_cf_hard"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r<Q>U, f,    f,mr,f,r<Q>,f
+,m")
+        (match_operand:SF 1 "general_operand"      " f,     r<Q>U,f,rm,F,F,   m
+,f"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (which_alternative == 4 || which_alternative == 5) {
+    rtx xoperands[2];
+    REAL_VALUE_TYPE r;
+    long l;
+    REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+    REAL_VALUE_TO_TARGET_SINGLE (r, l);
+    xoperands[0] = operands[0];
+    xoperands[1] = GEN_INT (l);
+    if (which_alternative == 5) {
+      if (l == 0) {
+        if (ADDRESS_REG_P (xoperands[0]))
+          output_asm_insn ("sub%.l %0,%0", xoperands);
+        else
+          output_asm_insn ("clr%.l %0", xoperands);
+      } else
+        if (GET_CODE (operands[0]) == MEM
+            && symbolic_operand (XEXP (operands[0], 0), SImode))
+          output_asm_insn ("move%.l %1,%-;move%.l %+,%0", xoperands);
+        else
+          output_asm_insn ("move%.l %1,%0", xoperands);
+      return "";
+    }
+    if (l != 0)
+      output_asm_insn ("move%.l %1,%-;fsmove%.s %+,%0", xoperands);
+    else
+      output_asm_insn ("clr%.l %-;fsmove%.s %+,%0", xoperands);
+    return "";
+  }
+  if (FP_REG_P (operands[0]))
+    {
+      if (ADDRESS_REG_P (operands[1]))
+        return "move%.l %1,%-;fsmove%.s %+,%0";
+      if (FP_REG_P (operands[1]))
+        return "fsmove%.d %1,%0";
+      return "fsmove%.s %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+        return "fmove%.s %1,%-;move%.l %+,%0";
+      return "fmove%.s %f1,%0";
+    }
+  if (operands[1] == CONST0_RTX (SFmode))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return "sub%.l %0,%0";
+      return "clr%.l %0";
+    }
+  return "move%.l %1,%0";
+})
+
+(define_expand "reload_indf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
+        (match_operand:DF 1 "general_operand" "mf"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reload_outdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+        (match_operand:DF 1 "register_operand" "f"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_COLDFIRE_FPU)
+    if (emit_move_sequence (operands, DFmode, 0))
+      DONE;
+})
+
+(define_insn ""
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,rf,rf,&rof<>")
+	(match_operand:DF 1 "general_operand" "*rf,m,0,*rofE<>"))]
+;  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,&rf,&rof<>")
+;	(match_operand:DF 1 "general_operand" "rf,m,rofF<>"))]
+  "!TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "f%&move%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "f%&move%.d %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_double (operands);
+      return "f%&move%.d %f1,%0";
+    }
+  else if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+	{
+	  output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  return "move%.l %+,%0";
+	}
+      else
+        return "fmove%.d %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn_and_split "movdf_cf_soft"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,g")
+	(match_operand:DF 1 "general_operand" "g,r"))]
+  "TARGET_COLDFIRE && !TARGET_COLDFIRE_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  m68k_emit_move_double (operands);
+  DONE;
+})
+
+(define_insn "movdf_cf_hard"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,    <Q>U,r,f,r,r,m,f")
+        (match_operand:DF 1 "general_operand"      " f<Q>U,f,   f,r,r,m,r,E"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  rtx xoperands[3];
+  REAL_VALUE_TYPE r;
+  long l[2];
+
+  switch (which_alternative)
+    {
+    default:
+      return "fdmove%.d %1,%0";
+    case 1:
+      return "fmove%.d %1,%0";
+    case 2:
+      return "fmove%.d %1,%-;move%.l %+,%0;move%.l %+,%R0";
+    case 3:
+      return "move%.l %R1,%-;move%.l %1,%-;fdmove%.d %+,%0";
+    case 4: case 5: case 6:
+      return output_move_double (operands);
+    case 7:
+      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+      xoperands[0] = operands[0];
+      xoperands[1] = GEN_INT (l[0]);
+      xoperands[2] = GEN_INT (l[1]);
+      if (operands[1] == CONST0_RTX (DFmode))
+	output_asm_insn ("clr%.l %-;clr%.l %-;fdmove%.d %+,%0",
+			xoperands);
+      else
+	if (l[1] == 0)
+	  output_asm_insn ("clr%.l %-;move%.l %1,%-;fdmove%.d %+,%0",
+			  xoperands);
+	else
+	  output_asm_insn ("move%.l %2,%-;move%.l %1,%-;fdmove%.d %+,%0",
+			  xoperands);
+      return "";
+    }
+})
+
+;; ??? The XFmode patterns are schizophrenic about whether constants are
+;; allowed.  Most but not all have predicates and constraint that disallow
+;; constants.  Most but not all have output templates that handle constants.
+;; See also TARGET_LEGITIMATE_CONSTANT_P.
+
+(define_expand "movxf"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  ""
+{
+  /* We can't rewrite operands during reload.  */
+  if (! reload_in_progress)
+    {
+      if (CONSTANT_P (operands[1]))
+	{
+	  operands[1] = force_const_mem (XFmode, operands[1]);
+	  if (! memory_address_p (XFmode, XEXP (operands[1], 0)))
+	    operands[1] = adjust_address (operands[1], XFmode, 0);
+	}
+      if (flag_pic && TARGET_PCREL)
+	{
+	  /* Don't allow writes to memory except via a register; the
+	     m68k doesn't consider PC-relative addresses to be writable.  */
+	  if (GET_CODE (operands[0]) == MEM
+	      && symbolic_operand (XEXP (operands[0], 0), SImode))
+	    operands[0] = gen_rtx_MEM (XFmode,
+				   force_reg (SImode, XEXP (operands[0], 0)));
+	}
+    }
+})
+
+(define_insn ""
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,!r,!f,!r,m,!r")
+	(match_operand:XF 1 "nonimmediate_operand" "m,f,f,f,r,!r,!r,m"))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "fmove%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "fmove%.x %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+        return "fmove%.x %1,%0";
+      return "fmove%.x %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+	{
+	  output_asm_insn ("fmove%.x %f1,%-\;move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  output_asm_insn ("move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  return "move%.l %+,%0";
+	}
+      /* Must be memory destination.  */
+      return "fmove%.x %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn ""
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=rm,rf,&rof<>")
+	(match_operand:XF 1 "nonimmediate_operand" "rf,m,rof<>"))]
+  "! TARGET_68881 && ! TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "fmove%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "fmove%.x %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+        return "fmove%.x %1,%0";
+      return "fmove%.x %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+        {
+          output_asm_insn ("fmove%.x %f1,%-\;move%.l %+,%0", operands);
+          operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+          output_asm_insn ("move%.l %+,%0", operands);
+          operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+          return "move%.l %+,%0";
+        }
+      else
+        return "fmove%.x %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn ""
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=r,g")
+	(match_operand:XF 1 "nonimmediate_operand" "g,r"))]
+  "! TARGET_68881 && TARGET_COLDFIRE"
+  "* return output_move_double (operands);")
+
+(define_expand "movdi"
+  ;; Let's see if it really still needs to handle fp regs, and, if so, why.
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "")
+
+;; movdi can apply to fp regs in some cases
+(define_insn ""
+  ;; Let's see if it really still needs to handle fp regs, and, if so, why.
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r,&ro<>")
+	(match_operand:DI 1 "general_operand" "rF,m,roi<>F"))]
+;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&r,&ro<>,!&rm,!&f")
+;	(match_operand:DI 1 "general_operand" "r,m,roi<>,fF"))]
+;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&rf,&ro<>,!&rm,!&f")
+;	(match_operand:DI 1 "general_operand" "r,m,roi<>,fF,rfF"))]
+  "!TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "fmove%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "fmove%.d %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_double (operands);
+      return "fmove%.d %f1,%0";
+    }
+  else if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+	{
+	  output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  return "move%.l %+,%0";
+	}
+      else
+        return "fmove%.d %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,g")
+	(match_operand:DI 1 "general_operand" "g,r"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_double (operands);")
+
+;; Thus goes after the move instructions
+;; because the move instructions are better (require no spilling)
+;; when they can apply.  It goes before the add/sub insns
+;; so we will prefer it to them.
+
+(define_insn "pushasi"
+  [(set (match_operand:SI 0 "push_operand" "=m")
+	(match_operand:SI 1 "address_operand" "p"))]
+  ""
+  "pea %a1"
+  [(set_attr "type" "pea")])
+
+;; truncation instructions
+(define_insn "truncsiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm,d")
+	(truncate:QI
+	 (match_operand:SI 1 "general_src_operand" "doJS,i")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == REG)
+    {
+      /* Must clear condition codes, since the move.l bases them on
+	 the entire 32 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.l %1,%0";
+    }
+  if (GET_CODE (operands[1]) == MEM)
+    operands[1] = adjust_address (operands[1], QImode, 3);
+  return "move%.b %1,%0";
+})
+
+(define_insn "trunchiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm,d")
+	(truncate:QI
+	 (match_operand:HI 1 "general_src_operand" "doJS,i")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == REG
+      && (GET_CODE (operands[1]) == MEM
+	  || GET_CODE (operands[1]) == CONST_INT))
+    {
+      /* Must clear condition codes, since the move.w bases them on
+	 the entire 16 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.w %1,%0";
+    }
+  if (GET_CODE (operands[0]) == REG)
+    {
+      /* Must clear condition codes, since the move.l bases them on
+	 the entire 32 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.l %1,%0";
+    }
+  if (GET_CODE (operands[1]) == MEM)
+    operands[1] = adjust_address (operands[1], QImode, 1);
+  return "move%.b %1,%0";
+})
+
+(define_insn "truncsihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm,d")
+	(truncate:HI
+	 (match_operand:SI 1 "general_src_operand" "roJS,i")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == REG)
+    {
+      /* Must clear condition codes, since the move.l bases them on
+	 the entire 32 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.l %1,%0";
+    }
+  if (GET_CODE (operands[1]) == MEM)
+    operands[1] = adjust_address (operands[1], QImode, 2);
+  return "move%.w %1,%0";
+})
+
+;; zero extension instructions
+
+;; two special patterns to match various post_inc/pre_dec patterns
+(define_insn_and_split "*zero_extend_inc"
+  [(set (match_operand 0 "post_inc_operand" "")
+	(zero_extend (match_operand 1 "register_operand" "")))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT &&
+   GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT &&
+   GET_MODE_SIZE (GET_MODE (operands[0])) == GET_MODE_SIZE (GET_MODE (operands[1])) * 2"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (match_dup 0)
+	(match_dup 1))]
+{
+  operands[0] = adjust_address (operands[0], GET_MODE (operands[1]), 0);
+})
+
+(define_insn_and_split "*zero_extend_dec"
+  [(set (match_operand 0 "pre_dec_operand" "")
+	(zero_extend (match_operand 1 "register_operand" "")))]
+  "(GET_MODE (operands[0]) != HImode || XEXP (XEXP (operands[0], 0), 0) != stack_pointer_rtx) &&
+   GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT &&
+   GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT &&
+   GET_MODE_SIZE (GET_MODE (operands[0])) == GET_MODE_SIZE (GET_MODE (operands[1])) * 2"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(const_int 0))]
+{
+  operands[0] = adjust_address (operands[0], GET_MODE (operands[1]), 0);
+})
+
+(define_insn_and_split "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_src_operand" "")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 3)
+	(const_int 0))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[0]);
+})
+
+(define_insn_and_split "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "nonimmediate_src_operand" "")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 3)
+	(const_int 0))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[0]);
+})
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_src_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn_and_split "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_src_operand" "")))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
+  "#"
+  ""
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 3)
+	(const_int 0))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[0]);
+})
+
+(define_insn "*zero_extendhisi2_cf"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_src_operand" "rmS")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvz%.w %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_src_operand" "rmS")))]
+  ""
+  "#")
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_src_operand" "")))]
+  "!TARGET_COLDFIRE"
+  "")
+
+(define_insn "*zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_src_operand" "dmS")))]
+  "!TARGET_COLDFIRE"
+  "#")
+
+(define_insn "*zero_extendqisi2_cfv4"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_src_operand" "dmS")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvz%.b %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_src_operand" "dmS")))]
+  ""
+  "#")
+
+;; these two pattern split everything else which isn't matched by
+;; something else above
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(zero_extend (match_operand 1 "nonimmediate_src_operand" "")))]
+  "!ISA_HAS_MVS_MVZ
+   && reload_completed
+   && reg_mentioned_p (operands[0], operands[1])"
+  [(set (strict_low_part (match_dup 2))
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup 4 [(match_dup 0) (match_dup 3)]))]
+{
+  operands[2] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
+  operands[3] = GEN_INT (GET_MODE_MASK (GET_MODE (operands[1])));
+  operands[4] = gen_rtx_AND (GET_MODE (operands[0]), operands[0], operands[3]);
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(zero_extend (match_operand 1 "nonimmediate_src_operand" "")))]
+  "!ISA_HAS_MVS_MVZ && reload_completed"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (strict_low_part (match_dup 2))
+	(match_dup 1))]
+{
+  operands[2] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
+})
+
+;; sign extension instructions
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+        (sign_extend:DI (match_operand:QI 1 "general_src_operand" "rmS")))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (ISA_HAS_MVS_MVZ)
+    return "mvs%.b %1,%2\;smi %0\;extb%.l %0";
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    {
+      if (ADDRESS_REG_P (operands[1]))
+	return "move%.w %1,%2\;extb%.l %2\;smi %0\;extb%.l %0";
+      else
+	return "move%.b %1,%2\;extb%.l %2\;smi %0\;extb%.l %0";
+    }
+  else
+    {
+      if (ADDRESS_REG_P (operands[1]))
+	return "move%.w %1,%2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0\;smi %0";
+      else
+	return "move%.b %1,%2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0\;smi %0";
+    }
+})
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+	(sign_extend:DI
+	 (match_operand:HI 1 "general_src_operand" "rmS")))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (ISA_HAS_MVS_MVZ)
+    return "mvs%.w %1,%2\;smi %0\;extb%.l %0";
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "move%.w %1,%2\;ext%.l %2\;smi %0\;extb%.l %0";
+  else
+    return "move%.w %1,%2\;ext%.l %2\;smi %0\;ext%.w %0\;ext%.l %0";
+})
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,o,o,<")
+	(sign_extend:DI
+	 (match_operand:SI 1 "nonimmediate_src_operand" "rm,rm,r<Q>,rm")))
+   (clobber (match_scratch:SI 2 "=X,d,d,d"))]
+  ""
+{
+  CC_STATUS_INIT;
+
+  if (which_alternative == 0)
+    /* Handle alternative 0.  */
+    {
+      if (TARGET_68020 || TARGET_COLDFIRE)
+        return "move%.l %1,%R0\;smi %0\;extb%.l %0";
+      else
+        return "move%.l %1,%R0\;smi %0\;ext%.w %0\;ext%.l %0";
+    }
+
+  /* Handle alternatives 1, 2 and 3.  We don't need to adjust address by 4
+     in alternative 3 because autodecrement will do that for us.  */
+  operands[3] = adjust_address (operands[0], SImode,
+				which_alternative == 3 ? 0 : 4);
+  operands[0] = adjust_address (operands[0], SImode, 0);
+
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "move%.l %1,%3\;smi %2\;extb%.l %2\;move%.l %2,%0";
+  else
+    return "move%.l %1,%3\;smi %2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0";
+}
+  [(set_attr "ok_for_coldfire" "yes,no,yes,yes")])
+
+;; Special case when one can avoid register clobbering, copy and test
+;; Maybe there is a way to make that the general case, by forcing the
+;; result of the SI tree to be in the lower register of the DI target
+
+(define_insn "extendplussidi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+    (sign_extend:DI (plus:SI (match_operand:SI 1 "general_operand" "%rmn")
+            (match_operand:SI 2 "general_operand" "rmn"))))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (GET_CODE (operands[1]) == CONST_INT
+  && (unsigned) INTVAL (operands[1]) > 8)
+    {
+      rtx tmp = operands[1];
+
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+  if (GET_CODE (operands[1]) == REG
+      && REGNO (operands[1]) == REGNO (operands[3]))
+    output_asm_insn ("add%.l %2,%3", operands);
+  else
+    output_asm_insn ("move%.l %2,%3\;add%.l %1,%3", operands);
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "smi %0\;extb%.l %0";
+  else
+    return "smi %0\;ext%.w %0\;ext%.l %0";
+})
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(sign_extend:SI
+	 (match_operand:HI 1 "nonimmediate_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*cfv4_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extend:SI
+	 (match_operand:HI 1 "nonimmediate_src_operand" "rmS")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvs%.w %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "*68k_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,a")
+	(sign_extend:SI
+	 (match_operand:HI 1 "nonimmediate_src_operand" "0,rmS")))]
+  "!ISA_HAS_MVS_MVZ"
+  "@
+   ext%.l %0
+   move%.w %1,%0"
+  [(set_attr "type" "ext,move")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0")))]
+  ""
+  "ext%.w %0"
+  [(set_attr "type" "ext")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "TARGET_68020 || TARGET_COLDFIRE"
+  "")
+
+(define_insn "*cfv4_extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "rms")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvs%.b %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "*68k_extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0")))]
+  "TARGET_68020 || (TARGET_COLDFIRE && !ISA_HAS_MVS_MVZ)"
+  "extb%.l %0"
+  [(set_attr "type" "ext")])
+
+;; Conversions between float and double.
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(float_extend:DF
+	 (match_operand:SF 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=*fdm,f")
+	(float_extend:DF
+	  (match_operand:SF 1 "general_operand" "f,dmF")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "f%&move%.x %1,%0";
+    }
+  if (FP_REG_P (operands[0]))
+    return "f%&move%.s %f1,%0";
+  if (DATA_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+      operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      return "move%.l %+,%0";
+    }
+  return "fmove%.d %f1,%0";
+})
+
+(define_insn "extendsfdf2_cf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f")
+	(float_extend:DF
+	 (match_operand:SF 1 "general_operand" "f,<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "fdmove%.d %1,%0";
+    }
+  return "fdmove%.s %f1,%0";
+})
+
+;; This cannot output into an f-reg because there is no way to be
+;; sure of truncating in that case.
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+;; On the '040 we can truncate in a register accurately and easily.
+(define_insn ""
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "fmG")))]
+  "TARGET_68881 && TARGET_68040"
+{
+  if (FP_REG_P (operands[1]))
+    return "f%$move%.x %1,%0";
+  return "f%$move%.d %f1,%0";
+})
+
+(define_insn "truncdfsf2_cf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,d<Q>U")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "<Q>U,f")))]
+  "TARGET_COLDFIRE_FPU"
+  "@
+  fsmove%.d %1,%0
+  fmove%.s %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "*truncdfsf2_68881"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=dm")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.s %f1,%0"
+  [(set_attr "type" "fmove")])
+
+;; Conversion between fixed point and floating point.
+;; Note that among the fix-to-float insns
+;; the ones that start with SImode come first.
+;; That is so that an operand that is a CONST_INT
+;; (and therefore lacks a specific machine mode).
+;; will be recognized as SImode (which is always valid)
+;; rather than as QImode or HImode.
+
+(define_expand "floatsi<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(float:FP (match_operand:SI 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "floatsi<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:SI 1 "general_operand" "dmi")))]
+  "TARGET_68881"
+  "f<FP:round>move%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "floatsi<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:SI 1 "general_operand" "d<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+  "f<FP:prec>move%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+(define_expand "floathi<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(float:FP (match_operand:HI 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "floathi<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:HI 1 "general_operand" "dmn")))]
+  "TARGET_68881"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "floathi<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+        (float:FP (match_operand:HI 1 "general_operand" "d<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+(define_expand "floatqi<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(float:FP (match_operand:QI 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "floatqi<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:QI 1 "general_operand" "dmn")))]
+  "TARGET_68881"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "floatqi<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:QI 1 "general_operand" "d<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+;; New routines to convert floating-point values to integers
+;; to be used on the '040.  These should be faster than trapping
+;; into the kernel to emulate fintrz.  They should also be faster
+;; than calling the subroutines fixsfsi or fixdfsi.
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))
+   (clobber (match_scratch:SI 2 "=d"))
+   (clobber (match_scratch:SI 3 "=d"))]
+  "TARGET_68881 && TUNE_68040"
+{
+  CC_STATUS_INIT;
+  return "fmovem%.l %!,%2\;moveq #16,%3\;or%.l %2,%3\;and%.w #-33,%3\;fmovem%.l %3,%!\;fmove%.l %1,%0\;fmovem%.l %2,%!";
+})
+
+(define_insn "fix_truncdfhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(fix:HI (fix:DF (match_operand:DF 1 "register_operand" "f"))))
+   (clobber (match_scratch:SI 2 "=d"))
+   (clobber (match_scratch:SI 3 "=d"))]
+  "TARGET_68881 && TUNE_68040"
+{
+  CC_STATUS_INIT;
+  return "fmovem%.l %!,%2\;moveq #16,%3\;or%.l %2,%3\;and%.w #-33,%3\;fmovem%.l %3,%!\;fmove%.w %1,%0\;fmovem%.l %2,%!";
+})
+
+(define_insn "fix_truncdfqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(fix:QI (fix:DF (match_operand:DF 1 "register_operand" "f"))))
+   (clobber (match_scratch:SI 2 "=d"))
+   (clobber (match_scratch:SI 3 "=d"))]
+  "TARGET_68881 && TUNE_68040"
+{
+  CC_STATUS_INIT;
+  return "fmovem%.l %!,%2\;moveq #16,%3\;or%.l %2,%3\;and%.w #-33,%3\;fmovem%.l %3,%!\;fmove%.b %1,%0\;fmovem%.l %2,%!";
+})
+
+;; Convert a float to a float whose value is an integer.
+;; This is the first stage of converting it to an integer type.
+
+(define_expand "ftrunc<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(fix:FP (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT && !TUNE_68040"
+  "")
+
+(define_insn "ftrunc<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(fix:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m")))]
+  "TARGET_68881 && !TUNE_68040"
+{
+  if (FP_REG_P (operands[1]))
+    return "fintrz%.x %f1,%0";
+  return "fintrz%.<FP:prec> %f1,%0";
+}
+  [(set_attr "type" "falu")])
+
+(define_insn "ftrunc<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+        (fix:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[1]))
+    return "fintrz%.d %f1,%0";
+  return "fintrz%.<FP:prec> %f1,%0";
+}
+  [(set_attr "type" "falu")])
+
+;; Convert a float whose value is an integer
+;; to an actual integer.  Second stage of converting float to integer type.
+(define_expand "fix<mode>qi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(fix:QI (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "fix<mode>qi2_68881"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(fix:QI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "fix<mode>qi2_cf"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d<Q>U")
+	(fix:QI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "fix<mode>hi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(fix:HI (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "fix<mode>hi2_68881"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(fix:HI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "fix<mode>hi2_cf"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d<Q>U")
+	(fix:HI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "fix<mode>si2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(fix:SI (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "fix<mode>si2_68881"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(fix:SI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "fix<mode>si2_cf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d<Q>U")
+	(fix:SI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+;; add instructions
+
+(define_insn "adddi_lshrdi_63"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+    (plus:DI (lshiftrt:DI (match_operand:DI 1 "general_operand" "rm")
+            (const_int 63))
+        (match_dup 1)))
+   (clobber (match_scratch:SI 2 "=d"))]
+  ""
+{
+  operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (REG_P (operands[1]) && REGNO (operands[1]) == REGNO (operands[0]))
+    return
+    "move%.l %1,%2\;add%.l %2,%2\;subx%.l %2,%2\;sub%.l %2,%3\;subx%.l %2,%0";
+  if (GET_CODE (operands[1]) == REG)
+    operands[4] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else if (GET_CODE (XEXP (operands[1], 0)) == POST_INC
+        || GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)
+    operands[4] = operands[1];
+  else
+    operands[4] = adjust_address (operands[1], SImode, 4);
+  if (GET_CODE (operands[1]) == MEM
+   && GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)
+    output_asm_insn ("move%.l %4,%3", operands);
+  output_asm_insn ("move%.l %1,%0\;smi %2", operands);
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    output_asm_insn ("extb%.l %2", operands);
+  else
+    output_asm_insn ("ext%.w %2\;ext%.l %2", operands);
+  if (GET_CODE (operands[1]) != MEM
+   || GET_CODE (XEXP (operands[1], 0)) != PRE_DEC)
+    output_asm_insn ("move%.l %4,%3", operands);
+  return "sub%.l %2,%3\;subx%.l %2,%0";
+})
+
+(define_insn "adddi_sexthishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,a,*d,*d")
+    (plus:DI (ashift:DI (sign_extend:DI
+          (match_operand:HI 1 "general_operand" "rm,rm,rm,rm"))
+            (const_int 32))
+        (match_operand:DI 2 "general_operand" "0,0,0,0")))
+   (clobber (match_scratch:SI 3 "=&d,X,a,?d"))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (ADDRESS_REG_P (operands[0]))
+    return "add%.w %1,%0";
+  else if (ADDRESS_REG_P (operands[3]))
+    return "move%.w %1,%3\;add%.l %3,%0";
+  else
+    return "move%.w %1,%3\;ext%.l %3\;add%.l %3,%0";
+})
+
+(define_insn "*adddi_dilshr32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,o")
+	(plus:DI (lshiftrt:DI (match_operand:DI 1 "general_operand" "ro,d")
+			      (const_int 32))
+		 (match_operand:DI 2 "general_operand" "0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[2] = adjust_address (operands[0], SImode, 4);
+  return "add%.l %1,%2\;negx%.l %0\;neg%.l %0";
+})
+
+(define_insn "*adddi_dilshr32_cf"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "ro")
+			      (const_int 32))
+		 (match_operand:DI 2 "register_operand" "0")))]
+  "TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  return "add%.l %1,%R0\;negx%.l %0\;neg%.l %0";
+})
+
+(define_insn "adddi_dishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+;;    (plus:DI (match_operand:DI 2 "general_operand" "%0")
+;;	(ashift:DI (match_operand:DI 1 "general_operand" "ro")
+;;            (const_int 32))))]
+    (plus:DI (ashift:DI (match_operand:DI 1 "general_operand" "ro,d")
+            (const_int 32))
+        (match_operand:DI 2 "general_operand" "0,0")))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else
+    operands[1] = adjust_address (operands[1], SImode, 4);
+  return "add%.l %1,%0";
+}
+  [(set_attr "type" "alu_l")])
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o<>,d,d,d")
+	(plus:DI (match_operand:DI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "d,no>,d,a")))
+   (clobber (match_scratch:SI 3 "=&d,&d,X,&d"))]
+  ""
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      if (DATA_REG_P (operands[2]))
+	return "add%.l %R2,%R0\;addx%.l %2,%0";
+      else if (GET_CODE (operands[2]) == MEM
+	  && GET_CODE (XEXP (operands[2], 0)) == POST_INC)
+	return "move%.l %2,%3\;add%.l %2,%R0\;addx%.l %3,%0";
+      else
+	{
+	  rtx high, low;
+	  rtx xoperands[2];
+
+	  if (GET_CODE (operands[2]) == REG)
+	    {
+	      low = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+	      high = operands[2];
+	    }
+	  else if (CONSTANT_P (operands[2]))
+	    split_double (operands[2], &high, &low);
+	  else
+	    {
+	      low = adjust_address (operands[2], SImode, 4);
+	      high = operands[2];
+	    }
+
+	  operands[1] = low, operands[2] = high;
+	  xoperands[0] = operands[3];
+	  if (GET_CODE (operands[1]) == CONST_INT
+	      && INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+	    xoperands[1] = GEN_INT (-INTVAL (operands[2]) - 1);
+	  else
+	    xoperands[1] = operands[2];
+
+	  output_asm_insn (output_move_simode (xoperands), xoperands);
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) <= 8)
+		return "addq%.l %1,%R0\;addx%.l %3,%0";
+	      else if (INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+		{
+		  operands[1] = GEN_INT (-INTVAL (operands[1]));
+		  return "subq%.l %1,%R0\;subx%.l %3,%0";
+		}
+	    }
+	  return "add%.l %1,%R0\;addx%.l %3,%0";
+	}
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[0]) == MEM);
+      CC_STATUS_INIT;
+      if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+	{
+	  operands[1] = gen_rtx_MEM (SImode,
+				     plus_constant (Pmode,
+						    XEXP(operands[0], 0), -8));
+	  return "move%.l %0,%3\;add%.l %R2,%0\;addx%.l %2,%3\;move%.l %3,%1";
+	}
+      else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+	{
+	  operands[1] = XEXP(operands[0], 0);
+	  return "add%.l %R2,%0\;move%.l %0,%3\;addx%.l %2,%3\;move%.l %3,%1";
+	}
+      else
+	{
+	  operands[1] = adjust_address (operands[0], SImode, 4);
+	  return "add%.l %R2,%1\;move%.l %0,%3\;addx%.l %2,%3\;move%.l %3,%0";
+	}
+    }
+})
+
+(define_insn "addsi_lshrsi_31"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm,dm,d<Q>")
+    (plus:SI (lshiftrt:SI (match_operand:SI 1 "general_operand" "rm,r<Q>,rm")
+            (const_int 31))
+        (match_dup 1)))]
+  ""
+{
+  operands[2] = operands[0];
+  operands[3] = gen_label_rtx();
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+        operands[0] = gen_rtx_MEM (SImode, XEXP (XEXP (operands[0], 0), 0));
+      else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+        operands[2] = gen_rtx_MEM (SImode, XEXP (XEXP (operands[0], 0), 0));
+    }
+  output_asm_insn ("move%.l %1,%0", operands);
+  output_asm_insn ("jpl %l3", operands);
+  output_asm_insn ("addq%.l #1,%2", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				CODE_LABEL_NUMBER (operands[3]));
+  return "";
+}
+  [(set_attr "ok_for_coldfire" "no,yes,yes")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(plus:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_src_operand" "")))]
+  ""
+  "")
+
+;; Note that the middle two alternatives are near-duplicates
+;; in order to handle insns generated by reload.
+;; This is needed since they are not themselves reloaded,
+;; so commutativity won't apply to them.
+(define_insn "*addsi3_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?a,?a,d,a")
+        (plus:SI (match_operand:SI 1 "general_operand" "%0,a,rJK,0,0")
+                 (match_operand:SI 2 "general_src_operand" "dIKLT,rJK,a,mSrIKLT,mSrIKLs")))]
+
+
+  "! TARGET_COLDFIRE"
+  "* return output_addsi3 (operands);")
+
+(define_insn_and_split "*addsi3_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=mr,mr,a,  m,r,  ?a, ?a,?a,?a")
+	(plus:SI (match_operand:SI 1 "general_operand"     "%0, 0, 0,  0,0,   a,  a, r, a")
+		 (match_operand:SI 2 "general_src_operand" " I, L, JCu,d,mrKi,Cj, r, a, JCu")))]
+  "TARGET_COLDFIRE"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "addq%.l %2,%0";
+
+    case 1:
+      operands[2] = GEN_INT (- INTVAL (operands[2]));
+      return "subq%.l %2,%0";
+
+    case 3:
+    case 4:
+      return "add%.l %2,%0";
+
+    case 5:
+      /* move%.l %2,%0\n\tadd%.l %1,%0 */
+      return "#";
+
+    case 6:
+      return MOTOROLA ? "lea (%1,%2.l),%0" : "lea %1@(0,%2:l),%0";
+
+    case 7:
+      return MOTOROLA ? "lea (%2,%1.l),%0" : "lea %2@(0,%1:l),%0";
+
+    case 2:
+    case 8:
+      return MOTOROLA ? "lea (%c2,%1),%0" : "lea %1@(%c2),%0";
+
+    default:
+      gcc_unreachable ();
+      return "";
+    }
+}
+  "&& reload_completed && (extract_constrain_insn_cached (insn), which_alternative == 5) && !operands_match_p (operands[0], operands[1])"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  ""
+  [(set_attr "type"     "aluq_l,aluq_l,lea, alu_l,alu_l,*,lea, lea, lea")
+   (set_attr "opy"      "2,     2,     *,   2,    2,    *,*,   *,   *")
+   (set_attr "opy_type" "*,     *,     mem5,*,    *,    *,mem6,mem6,mem5")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
+	(plus:SI (match_operand:SI 1 "general_operand" "0")
+		 (sign_extend:SI
+		  (match_operand:HI 2 "nonimmediate_src_operand" "rmS"))))]
+  "!TARGET_COLDFIRE"
+  "add%.w %2,%0")
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,r")
+	(plus:HI (match_operand:HI 1 "general_operand" "%0,0")
+		 (match_operand:HI 2 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      /* If the constant would be a negative number when interpreted as
+	 HImode, make it negative.  This is usually, but not always, done
+	 elsewhere in the compiler.  First check for constants out of range,
+	 which could confuse us.  */
+
+      if (INTVAL (operands[2]) >= 32768)
+	operands[2] = GEN_INT (INTVAL (operands[2]) - 65536);
+
+      if (INTVAL (operands[2]) > 0
+	  && INTVAL (operands[2]) <= 8)
+	return "addq%.w %2,%0";
+      if (INTVAL (operands[2]) < 0
+	  && INTVAL (operands[2]) >= -8)
+	{
+	  operands[2] = GEN_INT (- INTVAL (operands[2]));
+	  return "subq%.w %2,%0";
+	}
+      /* On the CPU32 it is faster to use two addqw instructions to
+	 add a small integer (8 < N <= 16) to a register.  
+	 Likewise for subqw.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[2]) > 8
+	      && INTVAL (operands[2]) <= 16)
+	    {
+	      operands[2] = GEN_INT (INTVAL (operands[2]) - 8);
+	      return "addq%.w #8,%0\;addq%.w %2,%0";
+	    }
+	  if (INTVAL (operands[2]) < -8
+	      && INTVAL (operands[2]) >= -16)
+	    {
+	      operands[2] = GEN_INT (- INTVAL (operands[2]) - 8);
+	      return "subq%.w #8,%0\;subq%.w %2,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0]) && !TUNE_68040)
+	return MOTOROLA ? "lea (%c2,%0),%0" : "lea %0@(%c2),%0";
+    }
+  return "add%.w %2,%0";
+})
+
+;; These insns must use MATCH_DUP instead of the more expected
+;; use of a matching constraint because the "output" here is also
+;; an input, so you can't use the matching constraint.  That also means
+;; that you can't use the "%", so you need patterns with the matched
+;; operand in both positions.
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      /* If the constant would be a negative number when interpreted as
+	 HImode, make it negative.  This is usually, but not always, done
+	 elsewhere in the compiler.  First check for constants out of range,
+	 which could confuse us.  */
+
+      if (INTVAL (operands[1]) >= 32768)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 65536);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.w %1,%0";
+      if (INTVAL (operands[1]) < 0
+	  && INTVAL (operands[1]) >= -8)
+	{
+	  operands[1] = GEN_INT (- INTVAL (operands[1]));
+	  return "subq%.w %1,%0";
+	}
+      /* On the CPU32 it is faster to use two addqw instructions to
+	 add a small integer (8 < N <= 16) to a register. 
+	 Likewise for subqw.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[1]) > 8
+	      && INTVAL (operands[1]) <= 16)
+	    {
+	      operands[1] = GEN_INT (INTVAL (operands[1]) - 8);
+	      return "addq%.w #8,%0\;addq%.w %1,%0";
+	    }
+	  if (INTVAL (operands[1]) < -8
+	      && INTVAL (operands[1]) >= -16)
+	    {
+	      operands[1] = GEN_INT (- INTVAL (operands[1]) - 8);
+	      return "subq%.w #8,%0\;subq%.w %1,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0]) && !TUNE_68040)
+	return MOTOROLA ? "lea (%c1,%0),%0" : "lea %0@(%c1),%0";
+    }
+  return "add%.w %1,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(plus:HI (match_operand:HI 1 "general_src_operand" "dn,rmSn")
+		 (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      /* If the constant would be a negative number when interpreted as
+	 HImode, make it negative.  This is usually, but not always, done
+	 elsewhere in the compiler.  First check for constants out of range,
+	 which could confuse us.  */
+
+      if (INTVAL (operands[1]) >= 32768)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 65536);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.w %1,%0";
+      if (INTVAL (operands[1]) < 0
+	  && INTVAL (operands[1]) >= -8)
+	{
+	  operands[1] = GEN_INT (- INTVAL (operands[1]));
+	  return "subq%.w %1,%0";
+	}
+      /* On the CPU32 it is faster to use two addqw instructions to
+	 add a small integer (8 < N <= 16) to a register.
+	 Likewise for subqw.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[1]) > 8
+	      && INTVAL (operands[1]) <= 16)
+	    {
+	      operands[1] = GEN_INT (INTVAL (operands[1]) - 8);
+	      return "addq%.w #8,%0\;addq%.w %1,%0";
+	    }
+	  if (INTVAL (operands[1]) < -8
+	      && INTVAL (operands[1]) >= -16)
+	    {
+	      operands[1] = GEN_INT (- INTVAL (operands[1]) - 8);
+	      return "subq%.w #8,%0\;subq%.w %1,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0]) && !TUNE_68040)
+	return MOTOROLA ? "lea (%c1,%0),%0" : "lea %0@(%c1),%0";
+    }
+  return "add%.w %1,%0";
+})
+
+(define_insn "addqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(plus:QI (match_operand:QI 1 "general_operand" "%0,0")
+		 (match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 128)
+	operands[2] = GEN_INT (INTVAL (operands[2]) - 256);
+
+      if (INTVAL (operands[2]) > 0
+	  && INTVAL (operands[2]) <= 8)
+	return "addq%.b %2,%0";
+      if (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) >= -8)
+       {
+	 operands[2] = GEN_INT (- INTVAL (operands[2]));
+	 return "subq%.b %2,%0";
+       }
+    }
+  return "add%.b %2,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(plus:QI (match_dup 0)
+		 (match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 128)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 256);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.b %1,%0";
+      if (INTVAL (operands[1]) < 0 && INTVAL (operands[1]) >= -8)
+       {
+	 operands[1] = GEN_INT (- INTVAL (operands[1]));
+	 return "subq%.b %1,%0";
+       }
+    }
+  return "add%.b %1,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(plus:QI (match_operand:QI 1 "general_src_operand" "dn,dmSn")
+		 (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 128)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 256);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.b %1,%0";
+      if (INTVAL (operands[1]) < 0 && INTVAL (operands[1]) >= -8)
+       {
+	 operands[1] = GEN_INT (- INTVAL (operands[1]));
+	 return "subq%.b %1,%0";
+       }
+    }
+  return "add%.b %1,%0";
+})
+
+(define_expand "add<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(plus:FP (match_operand:FP 1 "general_operand" "")
+		 (match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "add<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (float:FP (match_operand:SI 2 "general_operand" "dmi"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+  "f<FP:round>add%.l %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (float:FP (match_operand:HI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+  "f<FP:round>add%.w %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (float:FP (match_operand:QI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+  "f<FP:round>add%.b %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (match_operand:FP 1 "general_operand" "%0")
+		 (match_operand:FP 2 "general_operand" "f<FP:dreg>m<FP:const>")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:round>add%.x %2,%0";
+  return "f<FP:round>add%.<FP:prec> %f2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (match_operand:FP 1 "general_operand" "%0")
+		 (match_operand:FP 2 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>add%.d %2,%0";
+  return "f<FP:prec>add%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+;; subtract instructions
+
+(define_insn "subdi_sexthishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,a,*d,*d")
+    (minus:DI (match_operand:DI 1 "general_operand" "0,0,0,0")
+        (ashift:DI (sign_extend:DI (match_operand:HI 2 "general_operand" "rm,rm,rm,rm"))
+            (const_int 32))))
+   (clobber (match_scratch:SI 3 "=&d,X,a,?d"))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (ADDRESS_REG_P (operands[0]))
+    return "sub%.w %2,%0";
+  else if (ADDRESS_REG_P (operands[3]))
+    return "move%.w %2,%3\;sub%.l %3,%0";
+  else
+    return "move%.w %2,%3\;ext%.l %3\;sub%.l %3,%0";
+})
+
+(define_insn "subdi_dishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+ro")
+    (minus:DI (match_dup 0)
+        (ashift:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32))))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else
+    operands[1] = adjust_address (operands[1], SImode, 4);
+  return "sub%.l %1,%0";
+}
+  [(set_attr "type" "alu_l")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o<>,d,d,d")
+	(minus:DI (match_operand:DI 1 "general_operand" "0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "d,no>,d,a")))
+   (clobber (match_scratch:SI 3 "=&d,&d,X,&d"))]
+  ""
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      if (DATA_REG_P (operands[2]))
+	return "sub%.l %R2,%R0\;subx%.l %2,%0";
+      else if (GET_CODE (operands[2]) == MEM
+	  && GET_CODE (XEXP (operands[2], 0)) == POST_INC)
+	{
+	  return "move%.l %2,%3\;sub%.l %2,%R0\;subx%.l %3,%0";
+	}
+      else
+	{
+	  rtx high, low;
+	  rtx xoperands[2];
+
+	  if (GET_CODE (operands[2]) == REG)
+	    {
+	      low = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+	      high = operands[2];
+	    }
+	  else if (CONSTANT_P (operands[2]))
+	    split_double (operands[2], &high, &low);
+	  else
+	    {
+	      low = adjust_address (operands[2], SImode, 4);
+	      high = operands[2];
+	    }
+
+	  operands[1] = low, operands[2] = high;
+	  xoperands[0] = operands[3];
+	  if (GET_CODE (operands[1]) == CONST_INT
+	      && INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+	    xoperands[1] = GEN_INT (-INTVAL (operands[2]) - 1);
+	  else
+	    xoperands[1] = operands[2];
+
+	  output_asm_insn (output_move_simode (xoperands), xoperands);
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) <= 8)
+		return "subq%.l %1,%R0\;subx%.l %3,%0";
+	      else if (INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+		{
+		  operands[1] = GEN_INT (-INTVAL (operands[1]));
+		  return "addq%.l %1,%R0\;addx%.l %3,%0";
+		}
+	    }
+	  return "sub%.l %1,%R0\;subx%.l %3,%0";
+	}
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[0]) == MEM);
+      CC_STATUS_INIT;
+      if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+	{
+	  operands[1]
+	    = gen_rtx_MEM (SImode, plus_constant (Pmode,
+						  XEXP (operands[0], 0), -8));
+	  return "move%.l %0,%3\;sub%.l %R2,%0\;subx%.l %2,%3\;move%.l %3,%1";
+	}
+      else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+	{
+	  operands[1] = XEXP(operands[0], 0);
+	  return "sub%.l %R2,%0\;move%.l %0,%3\;subx%.l %2,%3\;move%.l %3,%1";
+	}
+      else
+	{
+	  operands[1] = adjust_address (operands[0], SImode, 4);
+	  return "sub%.l %R2,%1\;move%.l %0,%3\;subx%.l %2,%3\;move%.l %3,%0";
+	}
+    }
+})
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=mda,m,d,a")
+	(minus:SI (match_operand:SI 1 "general_operand" "0,0,0,0")
+		  (match_operand:SI 2 "general_src_operand" "I,dT,mSrT,mSrs")))]
+  ""
+  "@
+   subq%.l %2, %0
+   sub%.l %2,%0
+   sub%.l %2,%0
+   sub%.l %2,%0"
+  [(set_attr "type" "aluq_l,alu_l,alu_l,alu_l")
+   (set_attr "opy" "2")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
+	(minus:SI (match_operand:SI 1 "general_operand" "0")
+		  (sign_extend:SI
+		   (match_operand:HI 2 "nonimmediate_src_operand" "rmS"))))]
+  "!TARGET_COLDFIRE"
+  "sub%.w %2,%0")
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "0,0")
+		  (match_operand:HI 2 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(minus:HI (match_dup 0)
+		  (match_operand:HI 1 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.w %1,%0")
+
+(define_insn "subqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(minus:QI (match_operand:QI 1 "general_operand" "0,0")
+		  (match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(minus:QI (match_dup 0)
+		  (match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.b %1,%0")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(minus:FP (match_operand:FP 1 "general_operand" "")
+		  (match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "sub<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (float:FP (match_operand:SI 2 "general_operand" "dmi"))))]
+  "TARGET_68881"
+  "f<FP:round>sub%.l %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (float:FP (match_operand:HI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+  "f<FP:round>sub%.w %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (float:FP (match_operand:QI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+  "f<FP:round>sub%.b %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (match_operand:FP 2 "general_operand" "f<FP:dreg>m<FP:const>")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:round>sub%.x %2,%0";
+  return "f<FP:round>sub%.<FP:prec> %f2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+        (minus:FP (match_operand:FP 1 "general_operand" "0")
+                  (match_operand:FP 2 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>sub%.d %2,%0";
+  return "f<FP:prec>sub%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+;; multiply instructions
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(mult:HI (match_operand:HI 1 "general_operand" "%0")
+		 (match_operand:HI 2 "general_src_operand" "dmSn")))]
+  ""
+{
+  return MOTOROLA ? "muls%.w %2,%0" : "muls %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "nonimmediate_src_operand" "dmS"))))]
+  ""
+{
+  return MOTOROLA ? "muls%.w %2,%0" : "muls %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_insn "*mulhisisi3_s"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (match_operand:SI 2 "const_int_operand" "n")))]
+  "INTVAL (operands[2]) >= -0x8000 && INTVAL (operands[2]) <= 0x7fff"
+{
+  return MOTOROLA ? "muls%.w %2,%0" : "muls %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(mult:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_68020 || TARGET_COLDFIRE"
+  "")
+
+(define_insn "*mulsi3_68020"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (match_operand:SI 1 "general_operand" "%0")
+                 (match_operand:SI 2 "general_src_operand" "dmSTK")))]
+
+  "TARGET_68020"
+  "muls%.l %2,%0"
+  [(set_attr "type" "mul_l")
+   (set_attr "opy" "2")])
+
+(define_insn "*mulsi3_cf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (match_operand:SI 1 "general_operand" "%0")
+		 (match_operand:SI 2 "general_operand" "d<Q>")))]
+  "TARGET_COLDFIRE"
+  "muls%.l %2,%0"
+  [(set_attr "type" "mul_l")
+   (set_attr "opy" "2")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "nonimmediate_src_operand" "dmS"))))]
+  ""
+{
+  return MOTOROLA ? "mulu%.w %2,%0" : "mulu %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_insn "*mulhisisi3_z"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (match_operand:SI 2 "const_int_operand" "n")))]
+  "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 0xffff"
+{
+  return MOTOROLA ? "mulu%.w %2,%0" : "mulu %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+;; We need a separate DEFINE_EXPAND for u?mulsidi3 to be able to use the
+;; proper matching constraint.  This is because the matching is between
+;; the high-numbered word of the DImode operand[0] and operand[1].
+(define_expand "umulsidi3"
+  [(parallel
+    [(set (subreg:SI (match_operand:DI 0 "register_operand" "") 4)
+	  (mult:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "register_operand" "")))
+     (set (subreg:SI (match_dup 0) 0)
+	  (truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+					     (zero_extend:DI (match_dup 2)))
+				    (const_int 32))))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		  (match_operand:SI 2 "nonimmediate_operand" "dm")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+					   (zero_extend:DI (match_dup 2)))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "mulu%.l %2,%3:%0")
+
+; Match immediate case.  For 2.4 only match things < 2^31.
+; It's tricky with larger values in these patterns since we need to match
+; values between the two parallel multiplies, between a CONST_DOUBLE and
+; a CONST_INT.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+					   (match_dup 2))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE
+   && (unsigned) INTVAL (operands[2]) <= 0x7fffffff"
+  "mulu%.l %2,%3:%0")
+
+(define_expand "mulsidi3"
+  [(parallel
+    [(set (subreg:SI (match_operand:DI 0 "register_operand" "") 4)
+	  (mult:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "register_operand" "")))
+     (set (subreg:SI (match_dup 0) 0)
+	  (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+					     (sign_extend:DI (match_dup 2)))
+				    (const_int 32))))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "nonimmediate_operand" "dm")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+					   (sign_extend:DI (match_dup 2)))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %2,%3:%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+					   (match_dup 2))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %2,%3:%0")
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		     (zero_extend:DI (match_operand:SI 2 "general_operand" "")))
+	    (const_int 32))))
+     (clobber (match_dup 3))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+{
+  operands[3] = gen_reg_rtx (SImode);
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = immed_double_const (INTVAL (operands[2]) & 0xffffffff,
+					0, DImode);
+
+      /* We have to adjust the operand order for the matching constraints.  */
+      emit_insn (gen_const_umulsi3_highpart (operands[0], operands[3],
+					     operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "%1"))
+		   (zero_extend:DI (match_operand:SI 3 "nonimmediate_operand" "dm")))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "mulu%.l %3,%0:%1")
+
+(define_insn "const_umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "1"))
+		   (match_operand:DI 3 "const_uint32_operand" "n"))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "mulu%.l %3,%0:%1")
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+		     (sign_extend:DI (match_operand:SI 2 "general_operand" "")))
+	    (const_int 32))))
+     (clobber (match_dup 3))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+{
+  operands[3] = gen_reg_rtx (SImode);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      /* We have to adjust the operand order for the matching constraints.  */
+      emit_insn (gen_const_smulsi3_highpart (operands[0], operands[3],
+					     operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" "%1"))
+		   (sign_extend:DI (match_operand:SI 3 "nonimmediate_operand" "dm")))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %3,%0:%1")
+
+(define_insn "const_smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" "1"))
+		   (match_operand:DI 3 "const_sint32_operand" "n"))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %3,%0:%1")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(mult:FP (match_operand:FP 1 "general_operand" "")
+		 (match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "mul<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (float:FP (match_operand:SI 2 "general_operand" "dmi"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>mul%.l %2,%0"
+	 : "f<FP:round_mul>mul%.l %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+(define_insn "mul<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (float:FP (match_operand:HI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>mul%.w %2,%0"
+	 : "f<FP:round_mul>mul%.w %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+(define_insn "mul<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (float:FP (match_operand:QI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>mul%.b %2,%0"
+	 : "f<FP:round_mul>mul%.b %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+(define_insn "muldf_68881"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
+	(mult:DF (match_operand:DF 1 "general_operand" "%0")
+		 (match_operand:DF 2 "general_operand" "fmG")))]
+  "TARGET_68881"
+{
+  if (GET_CODE (operands[2]) == CONST_DOUBLE
+      && floating_exact_log2 (operands[2]) && !TUNE_68040_60)
+    {
+      int i = floating_exact_log2 (operands[2]);
+      operands[2] = GEN_INT (i);
+      return "fscale%.l %2,%0";
+    }
+  if (REG_P (operands[2]))
+    return "f%&mul%.x %2,%0";
+  return "f%&mul%.d %f2,%0";
+})
+
+(define_insn "mulsf_68881"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f")
+	(mult:SF (match_operand:SF 1 "general_operand" "%0")
+		 (match_operand:SF 2 "general_operand" "fdmF")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return (TARGET_68040
+	    ? "fsmul%.x %2,%0"
+	    : "fsglmul%.x %2,%0");
+  return (TARGET_68040
+	  ? "fsmul%.s %f2,%0"
+	  : "fsglmul%.s %f2,%0");
+})
+
+(define_insn "mulxf3_68881"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f")
+	(mult:XF (match_operand:XF 1 "nonimmediate_operand" "%0")
+		 (match_operand:XF 2 "nonimmediate_operand" "fm")))]
+  "TARGET_68881"
+{
+  return "fmul%.x %f2,%0";
+})
+
+(define_insn "fmul<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (match_operand:FP 1 "general_operand" "%0")
+		 (match_operand:FP 2 "general_operand" "f<Q>U<FP:dreg>")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>mul%.d %2,%0";
+  return "f<FP:prec>mul%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+;; divide instructions
+
+(define_expand "div<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(div:FP (match_operand:FP 1 "general_operand" "")
+		(match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "div<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(float:FP (match_operand:SI 2 "general_operand" "dmi"))))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>div%.l %2,%0"
+	 : "f<FP:round_mul>div%.l %2,%0";
+})
+
+(define_insn "div<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(float:FP (match_operand:HI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>div%.w %2,%0"
+	 : "f<FP:round_mul>div%.w %2,%0";
+})
+
+(define_insn "div<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(float:FP (match_operand:QI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>div%.b %2,%0"
+	 : "f<FP:round_mul>div%.b %2,%0";
+})
+
+(define_insn "div<mode>3_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(match_operand:FP 2 "general_operand" "f<FP:dreg>m<FP:const>")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return (TARGET_68040
+	    ? "f<FP:round>div%.x %2,%0"
+	    : "f<FP:round_mul>div%.x %2,%0");
+  return (TARGET_68040
+	  ? "f<FP:round>div%.<FP:prec> %f2,%0"
+	  : "f<FP:round_mul>div%.<FP:prec> %f2,%0");
+})
+
+(define_insn "div<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(match_operand:FP 2 "general_operand" "f<Q>U<FP:dreg>")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>div%.d %2,%0";
+  return "f<FP:prec>div%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "fdiv")
+   (set_attr "opy" "2")])
+
+;; Remainder instructions.
+
+(define_expand "divmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "nonimmediate_operand" "")
+          (div:SI (match_operand:SI 1 "general_operand" "")
+                  (match_operand:SI 2 "general_src_operand" "")))
+     (set (match_operand:SI 3 "nonimmediate_operand" "")
+          (mod:SI (match_dup 1) (match_dup 2)))])]
+  "TARGET_68020 || TARGET_CF_HWDIV"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(div:SI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:SI 2 "general_src_operand" "d<Q>U")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=&d")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_CF_HWDIV"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divs%.l %2,%0";
+  else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+    return "rems%.l %2,%3:%0";
+  else
+    return "rems%.l %2,%3:%0\;divs%.l %2,%0";
+}
+  [(set_attr "type" "div_l")
+   (set_attr "opy" "2")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(div:SI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:SI 2 "general_src_operand" "dmSTK")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=d")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_68020"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divs%.l %2,%0";
+  else
+    return "divsl%.l %2,%3:%0";
+})
+
+(define_expand "udivmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+          (udiv:SI (match_operand:SI 1 "general_operand" "0")
+                   (match_operand:SI 2 "general_src_operand" "dmSTK")))
+     (set (match_operand:SI 3 "nonimmediate_operand" "=d")
+          (umod:SI (match_dup 1) (match_dup 2)))])]
+  "TARGET_68020 || TARGET_CF_HWDIV"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(udiv:SI (match_operand:SI 1 "general_operand" "0")
+		 (match_operand:SI 2 "general_src_operand" "d<Q>U")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=&d")
+	(umod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_CF_HWDIV"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divu%.l %2,%0";
+  else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+    return "remu%.l %2,%3:%0";
+  else
+    return "remu%.l %2,%3:%0\;divu%.l %2,%0";
+}
+  [(set_attr "type" "div_l")
+   (set_attr "opy" "2")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(udiv:SI (match_operand:SI 1 "general_operand" "0")
+		 (match_operand:SI 2 "general_src_operand" "dmSTK")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=d")
+	(umod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_68020 && !TARGET_COLDFIRE"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divu%.l %2,%0";
+  else
+    return "divul%.l %2,%3:%0";
+})
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(div:HI (match_operand:HI 1 "general_operand" "0")
+		(match_operand:HI 2 "general_src_operand" "dmSKT")))
+   (set (match_operand:HI 3 "nonimmediate_operand" "=d")
+	(mod:HI (match_dup 1) (match_dup 2)))]
+  "!TARGET_COLDFIRE || TARGET_CF_HWDIV"
+{
+  output_asm_insn (MOTOROLA ?
+    "ext%.l %0\;divs%.w %2,%0" :
+    "extl %0\;divs %2,%0",
+    operands);
+  if (!find_reg_note(insn, REG_UNUSED, operands[3]))
+    {
+      CC_STATUS_INIT;
+      return "move%.l %0,%3\;swap %3";
+    }
+  else
+    return "";
+})
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(udiv:HI (match_operand:HI 1 "general_operand" "0")
+		 (match_operand:HI 2 "general_src_operand" "dmSKT")))
+   (set (match_operand:HI 3 "nonimmediate_operand" "=d")
+	(umod:HI (match_dup 1) (match_dup 2)))]
+  "!TARGET_COLDFIRE || TARGET_CF_HWDIV"
+{
+  if (ISA_HAS_MVS_MVZ)
+    output_asm_insn (MOTOROLA ?
+      "mvz%.w %0,%0\;divu%.w %2,%0" :
+      "mvz%.w %0,%0\;divu %2,%0",
+      operands);
+  else
+    output_asm_insn (MOTOROLA ?
+      "and%.l #0xFFFF,%0\;divu%.w %2,%0" :
+      "and%.l #0xFFFF,%0\;divu %2,%0",
+      operands);
+
+  if (!find_reg_note(insn, REG_UNUSED, operands[3]))
+    {
+      CC_STATUS_INIT;
+      return "move%.l %0,%3\;swap %3";
+    }
+  else
+    return "";
+})
+
+;; logical-and instructions
+
+;; Prevent AND from being made with sp.  This doesn't exist in the machine
+;; and reload will cause inefficient code.  Since sp is a FIXED_REG, we
+;; can't allocate pseudos into it.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "not_sp_operand" "")
+	(and:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_src_operand" "")))]
+  ""
+  "")
+
+;; produced by split operations after reload finished
+(define_insn "*andsi3_split"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(and:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "const_int_operand" "i")))]
+  "reload_completed && !TARGET_COLDFIRE"
+{
+  return output_andsi3 (operands);
+})
+
+(define_insn "andsi3_internal"
+  [(set (match_operand:SI 0 "not_sp_operand" "=m,d")
+	(and:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_src_operand" "dKT,dmSM")))]
+  "!TARGET_COLDFIRE"
+{
+  return output_andsi3 (operands);
+})
+
+(define_insn "andsi3_5200"
+  [(set (match_operand:SI 0 "not_sp_operand" "=m,d")
+	(and:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_src_operand" "d,dmsK")))]
+  "TARGET_COLDFIRE"
+{
+  if (ISA_HAS_MVS_MVZ
+      && DATA_REG_P (operands[0])
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) == 0x000000ff)
+        return "mvz%.b %0,%0";
+      else if (INTVAL (operands[2]) == 0x0000ffff)
+        return "mvz%.w %0,%0";
+    }
+  return output_andsi3 (operands);
+})
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,d")
+	(and:HI (match_operand:HI 1 "general_operand" "%0,0")
+		(match_operand:HI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(and:HI (match_dup 0)
+		(match_operand:HI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.w %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(and:HI (match_operand:HI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "and%.w %1,%0")
+
+(define_insn "andqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(and:QI (match_operand:QI 1 "general_operand" "%0,0")
+		(match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.b %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(and:QI (match_operand:QI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "and%.b %1,%0")
+
+;; inclusive-or instructions
+
+(define_insn "iordi_zext"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,d")
+    (ior:DI (zero_extend:DI (match_operand 1 "general_operand" "dn,dmn"))
+        (match_operand:DI 2 "general_operand" "0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  int byte_mode;
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[0] = adjust_address (operands[0], SImode, 4);
+  if (GET_MODE (operands[1]) == SImode)
+    return "or%.l %1,%0";
+  byte_mode = (GET_MODE (operands[1]) == QImode);
+  if (GET_CODE (operands[0]) == MEM)
+    operands[0] = adjust_address (operands[0], byte_mode ? QImode : HImode,
+				  byte_mode ? 3 : 2);
+  if (byte_mode)
+    return "or%.b %1,%0";
+  else
+    return "or%.w %1,%0";
+})
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ior:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "iorsi3_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,d")
+	(ior:SI (match_operand:SI 1 "general_operand" "%0,0")
+                (match_operand:SI 2 "general_src_operand" "dKT,dmSMT")))]
+  "! TARGET_COLDFIRE"
+{
+  return output_iorsi3 (operands);
+})
+
+(define_insn "iorsi3_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,d")
+	(ior:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_src_operand" "d,dmsK")))]
+  "TARGET_COLDFIRE"
+{
+  return output_iorsi3 (operands);
+})
+
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,d")
+	(ior:HI (match_operand:HI 1 "general_operand" "%0,0")
+		(match_operand:HI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(ior:HI (match_dup 0)
+		(match_operand:HI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.w %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(ior:HI (match_operand:HI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "or%.w %1,%0")
+
+(define_insn "iorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(ior:QI (match_operand:QI 1 "general_operand" "%0,0")
+                (match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(ior:QI (match_dup 0)
+                (match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.b %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+        (ior:QI (match_operand:QI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "or%.b %1,%0")
+
+;; On all 68k models, this makes faster code in a special case.
+;; See also ashlsi_16, ashrsi_16 and lshrsi_16.
+
+(define_insn "iorsi_zexthi_ashl16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&d")
+    (ior:SI (zero_extend:SI (match_operand:HI 1 "general_operand" "rmn"))
+        (ashift:SI (match_operand:SI 2 "general_operand" "or")
+            (const_int 16))))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[2]) != REG)
+      operands[2] = adjust_address (operands[2], HImode, 2);
+  if (GET_CODE (operands[2]) != REG
+  || REGNO (operands[2]) != REGNO (operands[0]))
+    output_asm_insn ("move%.w %2,%0", operands);
+  return "swap %0\;mov%.w %1,%0";
+})
+
+(define_insn "iorsi_zext"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=o,d")
+    (ior:SI (zero_extend:SI (match_operand 1 "general_operand" "dn,dmn"))
+        (match_operand:SI 2 "general_operand" "0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  int byte_mode;
+
+  CC_STATUS_INIT;
+  byte_mode = (GET_MODE (operands[1]) == QImode);
+  if (GET_CODE (operands[0]) == MEM)
+    operands[0] = adjust_address (operands[0], byte_mode ? QImode : HImode,
+				  byte_mode ? 3 : 2);
+  if (byte_mode)
+    return "or%.b %1,%0";
+  else
+    return "or%.w %1,%0";
+})
+
+;; xor instructions
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(xor:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "xorsi3_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=do,m")
+	(xor:SI (match_operand:SI 1 "general_operand" "%0,0")
+                (match_operand:SI 2 "general_operand" "di,dKT")))]
+
+  "!TARGET_COLDFIRE"
+{
+  return output_xorsi3 (operands);
+})
+
+(define_insn "xorsi3_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm,d")
+	(xor:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "d,Ks")))]
+  "TARGET_COLDFIRE"
+{
+  return output_xorsi3 (operands);
+})
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(xor:HI (match_operand:HI 1 "general_operand" "%0")
+		(match_operand:HI 2 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(xor:HI (match_dup 0)
+		(match_operand:HI 1 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.w %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(xor:HI (match_operand:HI 1 "general_operand" "dn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "eor%.w %1,%0")
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(xor:QI (match_operand:QI 1 "general_operand" "%0")
+		(match_operand:QI 2 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(xor:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.b %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(xor:QI (match_operand:QI 1 "general_operand" "dn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "eor%.b %1,%0")
+
+;; negation instructions
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(neg:DI (match_operand:DI 1 "general_operand" "")))]
+  ""
+{
+  if (TARGET_COLDFIRE)
+    emit_insn (gen_negdi2_5200 (operands[0], operands[1]));
+  else
+    emit_insn (gen_negdi2_internal (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negdi2_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=<,do,!*a")
+	(neg:DI (match_operand:DI 1 "general_operand" "0,0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  if (which_alternative == 0)
+    return "neg%.l %0\;negx%.l %0";
+  if (GET_CODE (operands[0]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[1] = adjust_address (operands[0], SImode, 4);
+  if (ADDRESS_REG_P (operands[0]))
+    return "exg %/d0,%1\;neg%.l %/d0\;exg %/d0,%1\;exg %/d0,%0\;negx%.l %/d0\;exg %/d0,%0";
+  else
+    return "neg%.l %1\;negx%.l %0";
+})
+
+(define_insn "negdi2_5200"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+	(neg:DI (match_operand:DI 1 "general_operand" "0")))]
+  "TARGET_COLDFIRE"
+{
+  operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  return "neg%.l %1\;negx%.l %0";
+})
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(neg:SI (match_operand:SI 1 "general_operand" "")))]
+  ""
+{
+  if (TARGET_COLDFIRE)
+    emit_insn (gen_negsi2_5200 (operands[0], operands[1]));
+  else
+    emit_insn (gen_negsi2_internal (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negsi2_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(neg:SI (match_operand:SI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "neg%.l %0"
+  [(set_attr "type" "neg_l")])
+
+(define_insn "negsi2_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(neg:SI (match_operand:SI 1 "general_operand" "0")))]
+  "TARGET_COLDFIRE"
+  "neg%.l %0"
+  [(set_attr "type" "neg_l")])
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(neg:HI (match_operand:HI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "neg%.w %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(neg:HI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "neg%.w %0")
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(neg:QI (match_operand:QI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "neg%.b %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(neg:QI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "neg%.b %0")
+
+;; If using software floating point, just flip the sign bit.
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(neg:SF (match_operand:SF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+
+      target = operand_subword_force (operands[0], 0, SFmode);
+      result = expand_binop (SImode, xor_optab,
+			     operand_subword_force (operands[1], 0, SFmode),
+			     GEN_INT (-2147483647 - 1), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      /* Make a place for REG_EQUAL.  */
+      emit_move_insn (operands[0], operands[0]);
+      DONE;
+    }
+})
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(neg:DF (match_operand:DF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, DFmode);
+      result = expand_binop (SImode, xor_optab,
+			     operand_subword_force (operands[1], 0, DFmode),
+			     GEN_INT (-2147483647 - 1), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
+		      operand_subword_force (operands[1], 1, DFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_expand "negxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_68881)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, XFmode);
+      result = expand_binop (SImode, xor_optab,
+			     operand_subword_force (operands[1], 0, XFmode),
+			     GEN_INT (-2147483647 - 1), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, XFmode),
+		      operand_subword_force (operands[1], 1, XFmode));
+      emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
+		      operand_subword_force (operands[1], 2, XFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_insn "neg<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(neg:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m<FP:const>,0")))]
+  "TARGET_68881"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bchg %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:round>neg%.x %1,%0";
+  return "f<FP:round>neg%.<FP:prec> %f1,%0";
+})
+
+(define_insn "neg<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(neg:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U,0")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bchg %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:prec>neg%.d %1,%0";
+  return "f<FP:prec>neg%.<FP:prec> %1,%0";
+})
+
+;; Sqrt instruction for the 68881
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(sqrt:FP (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "sqrt<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(sqrt:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[1]))
+    return "f<FP:round>sqrt%.x %1,%0";
+  return "f<FP:round>sqrt%.<FP:prec> %1,%0";
+}
+  [(set_attr "type" "fsqrt")])
+
+(define_insn "sqrt<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(sqrt:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[1]))
+    return "f<FP:prec>sqrt%.d %1,%0";
+  return "f<FP:prec>sqrt%.<FP:prec> %1,%0";
+}
+  [(set_attr "type" "fsqrt")])
+;; Absolute value instructions
+;; If using software floating point, just zero the sign bit.
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(abs:SF (match_operand:SF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+
+      target = operand_subword_force (operands[0], 0, SFmode);
+      result = expand_binop (SImode, and_optab,
+			     operand_subword_force (operands[1], 0, SFmode),
+			     GEN_INT (0x7fffffff), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      /* Make a place for REG_EQUAL.  */
+      emit_move_insn (operands[0], operands[0]);
+      DONE;
+    }
+})
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(abs:DF (match_operand:DF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, DFmode);
+      result = expand_binop (SImode, and_optab,
+			     operand_subword_force (operands[1], 0, DFmode),
+			     GEN_INT (0x7fffffff), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
+		      operand_subword_force (operands[1], 1, DFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_expand "absxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_68881)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, XFmode);
+      result = expand_binop (SImode, and_optab,
+			     operand_subword_force (operands[1], 0, XFmode),
+			     GEN_INT (0x7fffffff), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, XFmode),
+		      operand_subword_force (operands[1], 1, XFmode));
+      emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
+		      operand_subword_force (operands[1], 2, XFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_insn "abs<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(abs:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m<FP:const>,0")))]
+  "TARGET_68881"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bclr %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:round>abs%.x %1,%0";
+  return "f<FP:round>abs%.<FP:prec> %f1,%0";
+})
+
+(define_insn "abs<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(abs:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U,0")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bclr %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:prec>abs%.d %1,%0";
+  return "f<FP:prec>abs%.<FP:prec> %1,%0";
+}
+  [(set_attr "type" "bitrw,fneg")])
+
+;; bit indexing instructions
+
+(define_expand "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (clz:SI (match_operand:SI 1 "general_operand" "")))]
+  "ISA_HAS_FF1 || (TARGET_68020 && TARGET_BITFIELD)"
+{
+  if (ISA_HAS_FF1)
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*clzsi2_68k"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (clz:SI (match_operand:SI 1 "general_operand" "do")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfffo %1{#0:#0},%0";
+})
+
+;; ColdFire ff1 instruction implements clz.
+(define_insn "*clzsi2_cf"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+ 	(clz:SI (match_operand:SI 1 "register_operand" "0")))]
+  "ISA_HAS_FF1"
+{
+  CC_STATUS_INIT;
+  return "ff1 %0";
+}
+  [(set_attr "type" "ext")])
+
+;; one complement instructions
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(not:SI (match_operand:SI 1 "general_operand" "")))]
+  ""
+{
+  if (TARGET_COLDFIRE)
+    emit_insn (gen_one_cmplsi2_5200 (operands[0], operands[1]));
+  else
+    emit_insn (gen_one_cmplsi2_internal (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "one_cmplsi2_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(not:SI (match_operand:SI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "not%.l %0")
+
+(define_insn "one_cmplsi2_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(not:SI (match_operand:SI 1 "general_operand" "0")))]
+  "TARGET_COLDFIRE"
+  "not%.l %0"
+  [(set_attr "type" "neg_l")])
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(not:HI (match_operand:HI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "not%.w %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(not:HI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "not%.w %0")
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(not:QI (match_operand:QI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "not%.b %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(not:QI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "not%.b %0")
+
+;; arithmetic shift instructions
+;; We don't need the shift memory by 1 bit instruction
+(define_insn_and_split "ashldi_extsi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro")
+    (ashift:DI
+      (match_operator:DI 2 "extend_operator"
+        [(match_operand:SI 1 "general_operand" "rm")])
+      (const_int 32)))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (const_int 0))]
+  "split_di(operands, 1, operands + 2, operands + 3);")
+
+(define_insn "ashldi_sexthi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,a*d")
+    (ashift:DI (sign_extend:DI (match_operand:HI 1 "general_operand" "rm,rm"))
+        (const_int 32)))
+    (clobber (match_scratch:SI 2 "=a,X"))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == MEM)
+    {
+    if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      return "clr%.l %0\;move%.w %1,%2\;move%.l %2,%0";
+    else if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+      return "move%.w %1,%2\;move%.l %2,%0\;clr%.l %0";
+    else
+      {
+	operands[3] = adjust_address (operands[0], SImode, 4);
+	return "move%.w %1,%2\;move%.l %2,%0\;clr%.l %3";
+      }
+    }
+  else if (DATA_REG_P (operands[0]))
+    return "move%.w %1,%0\;ext%.l %0\;clr%.l %R0";
+  else
+    return "move%.w %1,%0\;sub%.l %R0,%R0";
+})
+
+(define_insn "*ashldi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashift:DI (match_operand:DI 1 "register_operand" "0")
+		   (const_int 1)))]
+  "!TARGET_COLDFIRE"
+  "add%.l %R0,%R0\;addx%.l %0,%0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 2)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashift:DI (match_dup 1) (const_int 1)))
+   (set (match_dup 0)
+	(ashift:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 3)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashift:DI (match_dup 1) (const_int 2)))
+   (set (match_dup 0)
+	(ashift:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 8)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 8)))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 8)))
+   (set (strict_low_part (subreg:QI (match_dup 0) 3))
+	(subreg:QI (match_dup 0) 7))
+   (set (strict_low_part (subreg:QI (match_dup 0) 7))
+	(const_int 0))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 16)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))
+   (set (strict_low_part (subreg:HI (match_dup 0) 2))
+	(subreg:HI (match_dup 0) 6))
+   (set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(const_int 0))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "pre_dec_operand" "")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		   (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_lowpart(SImode, operands[1]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "post_inc_operand" "")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		   (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (const_int 0))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_lowpart(SImode, operands[1]);
+})
+
+(define_insn_and_split "*ashldi3_const32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro<>")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "ro")
+		   (const_int 32)))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 2) (const_int 0))]
+  "split_di(operands, 2, operands + 2, operands + 4);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 40"
+  [(set (match_dup 4) (ashift:SI (match_dup 4) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))
+   (set (match_dup 4) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 48)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 3) (const_int 0))
+   (set (strict_low_part (subreg:HI (match_dup 0) 2))
+	(const_int 0))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 40 && INTVAL (operands[2]) <= 63"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 4) (ashift:SI (match_dup 4) (match_dup 3)))
+   (set (match_dup 3) (match_dup 4))
+   (set (match_dup 4) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_insn "*ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashift:DI (match_operand:DI 1 "register_operand" "0")
+		   (match_operand 2 "const_int_operand" "n")))]
+  "!TARGET_COLDFIRE
+    && ((INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3)
+	|| INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16
+	|| (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63))"
+  "#")
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand 2 "const_int_operand" "")))]
+  "!TARGET_COLDFIRE"
+{
+  /* ???  This is a named pattern like this is not allowed to FAIL based
+     on its operands.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || ((INTVAL (operands[2]) < 1 || INTVAL (operands[2]) > 3)
+	  && INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16
+	  && (INTVAL (operands[2]) < 32 || INTVAL (operands[2]) > 63)))
+    FAIL;
+})
+
+;; On most 68k models, this makes faster code in a special case.
+
+(define_insn "ashlsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (const_int 16)))]
+  "!TUNE_68060"
+{
+  CC_STATUS_INIT;
+  return "swap %0\;clr%.w %0";
+})
+
+;; ashift patterns : use lsl instead of asl, because lsl always clears the
+;; overflow bit, so we must not set CC_NO_OVERFLOW.
+
+;; On the 68000, this makes faster code in a special case.
+
+(define_insn "ashlsi_17_24"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "const_int_operand" "n")))]
+  "TUNE_68000_10
+   && INTVAL (operands[2]) > 16
+   && INTVAL (operands[2]) <= 24"
+{
+  CC_STATUS_INIT;
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 16);
+  return "lsl%.w %2,%0\;swap %0\;clr%.w %0";
+})
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "general_operand" "dI")))]
+  ""
+{
+  if (operands[2] == const1_rtx)
+    {
+      cc_status.flags = CC_NO_OVERFLOW;
+      return "add%.l %0,%0";
+    }
+  return "lsl%.l %2,%0";
+})
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(ashift:HI (match_dup 0)
+		   (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.w %1,%0")
+
+(define_insn "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ashift:QI (match_operand:QI 1 "register_operand" "0")
+		   (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(ashift:QI (match_dup 0)
+		   (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.b %1,%0")
+
+;; On most 68k models, this makes faster code in a special case.
+
+(define_insn "ashrsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 16)))]
+  "!TUNE_68060"
+  "swap %0\;ext%.l %0")
+
+;; On the 68000, this makes faster code in a special case.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "n")))]
+  "TUNE_68000_10
+   && INTVAL (operands[2]) > 16
+   && INTVAL (operands[2]) <= 24"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 16);
+  return "swap %0\;asr%.w %2,%0\;ext%.l %0";
+})
+
+(define_insn "subreghi1ashrdi_const32"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+    (subreg:HI (ashiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32)) 6))]
+  ""
+{
+  if (GET_CODE (operands[1]) != REG)
+    operands[1] = adjust_address (operands[1], HImode, 2);
+  return "move%.w %1,%0";
+}
+  [(set_attr "type" "move")])
+
+(define_insn "subregsi1ashrdi_const32"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+    (subreg:SI (ashiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32)) 4))]
+  ""
+{
+  return "move%.l %1,%0";
+}
+  [(set_attr "type" "move_l")])
+
+(define_insn "*ashrdi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (const_int 1)))]
+  "!TARGET_COLDFIRE"
+{
+  operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  return "asr%.l #1,%0\;roxr%.l #1,%1";
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 2)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashiftrt:DI (match_dup 1) (const_int 1)))
+   (set (match_dup 0)
+	(ashiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 3)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashiftrt:DI (match_dup 1) (const_int 2)))
+   (set (match_dup 0)
+	(ashiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 8)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:QI (match_dup 0) 7))
+	(subreg:QI (match_dup 0) 3))
+   (set (match_dup 2)
+	(ashiftrt:SI (match_dup 2) (const_int 8)))
+   (set (match_dup 3)
+	(rotatert:SI (match_dup 3) (const_int 8)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 16)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(subreg:HI (match_dup 0) 2))
+   (set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))
+   (set (match_dup 2)
+	(sign_extend:SI (subreg:HI (match_dup 2) 2)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_insn "*ashrdi_const32"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_src_operand" "ro")
+		     (const_int 32)))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (TARGET_68020)
+    return "move%.l %1,%R0\;smi %0\;extb%.l %0";
+  else
+    return "move%.l %1,%R0\;smi %0\;ext%.w %0\;ext%.l %0";
+})
+
+(define_insn "*ashrdi_const32_mem"
+  [(set (match_operand:DI 0 "memory_operand" "=o,<")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_src_operand" "ro,ro")
+		     (const_int 32)))
+   (clobber (match_scratch:SI 2 "=d,d"))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[3] = adjust_address (operands[0], SImode,
+				which_alternative == 0 ? 4 : 0);
+  operands[0] = adjust_address (operands[0], SImode, 0);
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "move%.l %1,%3\;smi %2\;extb%.l %2\;move%.l %2,%0";
+  else
+    return "move%.l %1,%3\;smi %2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0";
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 63)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 3)
+	(ashiftrt:SI (match_dup 3) (const_int 31)))
+   (set (match_dup 2)
+	(match_dup 3))]
+  "split_di(operands, 1, operands + 2, operands + 3);")
+
+;; The predicate below must be general_operand, because ashrdi3 allows that
+(define_insn "ashrdi_const"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand 2 "const_int_operand" "n")))]
+  "!TARGET_COLDFIRE
+    && ((INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3)
+	|| INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16
+	|| INTVAL (operands[2]) == 31
+	|| (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63))"
+{
+  operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  CC_STATUS_INIT;
+  if (INTVAL (operands[2]) == 48)
+    return "swap %0\;ext%.l %0\;move%.l %0,%1\;smi %0\;ext%.w %0";
+  if (INTVAL (operands[2]) == 31)
+    return "add%.l %1,%1\;addx%.l %0,%0\;move%.l %0,%1\;subx%.l %0,%0";
+  if (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+      output_asm_insn (INTVAL (operands[2]) <= 8 ? "asr%.l %2,%0" :
+			"moveq %2,%1\;asr%.l %1,%0", operands);
+      output_asm_insn ("mov%.l %0,%1\;smi %0", operands);
+      return INTVAL (operands[2]) >= 15 ? "ext%.w %d0" :
+	     TARGET_68020 ? "extb%.l %0" : "ext%.w %0\;ext%.l %0";
+    }
+  return "#";
+})
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "!TARGET_COLDFIRE"
+{
+  /* ???  This is a named pattern like this is not allowed to FAIL based
+     on its operands.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || ((INTVAL (operands[2]) < 1 || INTVAL (operands[2]) > 3)
+	  && INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16
+	  && (INTVAL (operands[2]) < 31 || INTVAL (operands[2]) > 63)))
+    FAIL;
+})
+
+;; On all 68k models, this makes faster code in a special case.
+
+(define_insn "ashrsi_31"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 31)))]
+  ""
+{
+  return "add%.l %0,%0\;subx%.l %0,%0";
+})
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "general_operand" "dI")))]
+  ""
+  "asr%.l %2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "opy" "2")])
+
+(define_insn "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(ashiftrt:HI (match_dup 0)
+		     (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.w %1,%0")
+
+(define_insn "ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(ashiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.b %1,%0")
+
+;; logical shift instructions
+
+;; commented out because of reload problems in 950612-1.c
+;;(define_insn ""
+;;        [(set (cc0)
+;;            (subreg:SI (lshiftrt:DI (match_operand:DI 0 "general_operand" "ro")
+;;                    (const_int 32)) 4))
+;;        (set (match_operand:SI 1 "nonimmediate_operand" "=dm")
+;;            (subreg:SI (lshiftrt:DI (match_dup 0)
+;;                    (const_int 32)) 4))]
+;;  ""
+;;{
+;;  return "move%.l %0,%1";
+;;})
+;;
+;;(define_insn ""
+;;        [(set (cc0)
+;;            (subreg:SI (lshiftrt:DI (match_operand:DI 0 "general_operand" "ro")
+;;                    (const_int 32)) 0))
+;;        (set (match_operand:DI 1 "nonimmediate_operand" "=do")
+;;            (lshiftrt:DI (match_dup 0)
+;;                (const_int 32)))]
+;;  ""
+;;{
+;;  if (GET_CODE (operands[1]) == REG)
+;;    operands[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+;;  else
+;;    operands[2] = adjust_address (operands[1], SImode, 4);
+;;  return "move%.l %0,%2\;clr%.l %1";
+;;})
+
+(define_insn "subreg1lshrdi_const32"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+    (subreg:SI (lshiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32)) 4))]
+  ""
+  "move%.l %1,%0"
+  [(set_attr "type" "move_l")])
+
+(define_insn "*lshrdi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (const_int 1)))]
+  "!TARGET_COLDFIRE"
+  "lsr%.l #1,%0\;roxr%.l #1,%R0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 2)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(lshiftrt:DI (match_dup 1) (const_int 1)))
+   (set (match_dup 0)
+	(lshiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 3)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(lshiftrt:DI (match_dup 1) (const_int 2)))
+   (set (match_dup 0)
+	(lshiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 8)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:QI (match_dup 0) 7))
+	(subreg:QI (match_dup 0) 3))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2) (const_int 8)))
+   (set (match_dup 3)
+	(rotatert:SI (match_dup 3) (const_int 8)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 16)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(subreg:HI (match_dup 0) 2))
+   (set (strict_low_part (subreg:HI (match_dup 0) 2))
+	(const_int 0))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))
+   (set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "pre_dec_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		     (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (const_int 0))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_highpart(SImode, operands[1]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "post_inc_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		     (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_highpart(SImode, operands[1]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		     (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 5))
+   (set (match_dup 4) (const_int 0))]
+  "split_di(operands, 2, operands + 2, operands + 4);")
+
+(define_insn "*lshrdi_const32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro<>")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+		     (const_int 32)))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 40"
+  [(set (match_dup 3) (lshiftrt:SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 3) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 48)))]
+  "reload_completed"
+  [(set (match_dup 3) (match_dup 2))
+   (set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(const_int 0))
+   (set (match_dup 2) (const_int 0))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 40 && INTVAL (operands[2]) <= 62"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 3) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 3) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_insn "*lshrdi_const63"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (const_int 63)))]
+  ""
+  "add%.l %0,%0\;clr%.l %0\;clr%.l %R1\;addx%.l %R1,%R1")
+
+(define_insn "*lshrdi3_const"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand 2 "const_int_operand" "n")))]
+  "(!TARGET_COLDFIRE
+    && ((INTVAL (operands[2]) >= 2 && INTVAL (operands[2]) <= 3)
+	 || INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16
+	 || (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63)))"
+  "#")
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "!TARGET_COLDFIRE"
+{
+  /* ???  This is a named pattern like this is not allowed to FAIL based
+     on its operands.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || ((INTVAL (operands[2]) < 1 || INTVAL (operands[2]) > 3)
+	  && INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16
+	  && (INTVAL (operands[2]) < 32 || INTVAL (operands[2]) > 63)))
+    FAIL;
+})
+
+;; On all 68k models, this makes faster code in a special case.
+
+(define_insn "lshrsi_31"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 31)))]
+  ""
+{
+  return "add%.l %0,%0\;subx%.l %0,%0\;neg%.l %0";
+})
+
+;; On most 68k models, this makes faster code in a special case.
+
+(define_insn "lshrsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 16)))]
+  "!TUNE_68060"
+{
+  CC_STATUS_INIT;
+  return "clr%.w %0\;swap %0";
+})
+
+;; On the 68000, this makes faster code in a special case.
+
+(define_insn "lshrsi_17_24"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "n")))]
+  "TUNE_68000_10
+   && INTVAL (operands[2]) > 16
+   && INTVAL (operands[2]) <= 24"
+{
+  /* I think lsr%.w sets the CC properly.  */
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 16);
+  return "clr%.w %0\;swap %0\;lsr%.w %2,%0";
+})
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "general_operand" "dI")))]
+  ""
+  "lsr%.l %2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "opy" "2")])
+
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(lshiftrt:HI (match_dup 0)
+		     (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.w %1,%0")
+
+(define_insn "lshrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(lshiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.b %1,%0")
+
+;; rotate instructions
+
+(define_insn "rotlsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotate:SI (match_operand:SI 1 "register_operand" "0")
+		   (const_int 16)))]
+  ""
+  "swap %0"
+  [(set_attr "type" "shift")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotate:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "general_operand" "dINO")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 16)
+    return "swap %0";
+  else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 16)
+    {
+      operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+      return "ror%.l %2,%0";
+    }
+  else
+    return "rol%.l %2,%0";
+})
+
+(define_insn "rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "dIP")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 8)
+    {
+      operands[2] = GEN_INT (16 - INTVAL (operands[2]));
+      return "ror%.w %2,%0";
+    }
+  else
+    return "rol%.w %2,%0";
+})
+
+(define_insn "*rotlhi3_lowpart"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(rotate:HI (match_dup 0)
+		   (match_operand:HI 1 "general_operand" "dIP")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) >= 8)
+    {
+      operands[1] = GEN_INT (16 - INTVAL (operands[1]));
+      return "ror%.w %1,%0";
+    }
+  else
+    return "rol%.w %1,%0";
+})
+
+(define_insn "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0")
+		   (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 4)
+    {
+      operands[2] = GEN_INT (8 - INTVAL (operands[2]));
+      return "ror%.b %2,%0";
+    }
+  else
+    return "rol%.b %2,%0";
+})
+
+(define_insn "*rotlqi3_lowpart"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(rotate:QI (match_dup 0)
+		   (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) >= 4)
+    {
+      operands[1] = GEN_INT (8 - INTVAL (operands[1]));
+      return "ror%.b %1,%0";
+    }
+  else
+    return "rol%.b %1,%0";
+})
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.l %2,%0")
+
+(define_insn "rotrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotatert:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.w %2,%0")
+
+(define_insn "rotrhi_lowpart"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(rotatert:HI (match_dup 0)
+		     (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.w %1,%0")
+
+(define_insn "rotrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(rotatert:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(rotatert:QI (match_dup 0)
+		     (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.b %1,%0")
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(bswap:SI (match_operand:SI 1 "register_operand")))]
+  "!TARGET_COLDFIRE"
+{
+  rtx x = operands[0];
+  emit_move_insn (x, operands[1]);
+  emit_insn (gen_rotrhi_lowpart (gen_lowpart (HImode, x), GEN_INT (8)));
+  emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
+  emit_insn (gen_rotrhi_lowpart (gen_lowpart (HImode, x), GEN_INT (8)));
+  DONE;
+})
+
+
+;; Bit set/clear in memory byte.
+
+;; set bit, bit number is int
+(define_insn "bsetmemqi"
+  [(set (match_operand:QI 0 "memory_operand" "+m")
+	(ior:QI (subreg:QI (ashift:SI (const_int 1)
+		(match_operand:SI 1 "general_operand" "d")) 3)
+	(match_dup 0)))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bset %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; set bit, bit number is (sign/zero)_extended from HImode/QImode
+(define_insn "*bsetmemqi_ext"
+  [(set (match_operand:QI 0 "memory_operand" "+m")
+	(ior:QI (subreg:QI (ashift:SI (const_int 1)
+	    (match_operator:SI 2 "extend_operator"
+		[(match_operand 1 "general_operand" "d")])) 3)
+	(match_dup 0)))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bset %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; clear bit, bit number is int
+(define_insn "bclrmemqi"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+m")
+	(const_int 1)
+	(minus:SI (const_int 7)
+	    (match_operand:SI 1 "general_operand" "d")))
+    (const_int 0))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bclr %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; clear bit, bit number is (sign/zero)_extended from HImode/QImode
+(define_insn "*bclrmemqi_ext"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+m")
+	(const_int 1)
+	(minus:SI (const_int 7)
+	    (match_operator:SI 2 "extend_operator"
+		[(match_operand 1 "general_operand" "d")])))
+    (const_int 0))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bclr %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; Special cases of bit-field insns which we should
+;; recognize in preference to the general case.
+;; These handle aligned 8-bit and 16-bit fields,
+;; which can usually be done with move instructions.
+
+;
+; Special case for 32-bit field in memory.  This only occurs when 32-bit
+; alignment of structure members is specified.
+;
+; The move is allowed to be odd byte aligned, because that's still faster
+; than an odd byte aligned bit-field instruction.
+;
+(define_insn "*insv_32_mem"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (const_int 32)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(match_operand:SI 2 "general_src_operand" "rmSi"))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[1]) % 8) == 0
+   && ! mode_dependent_address_p (XEXP (operands[0], 0),
+                                  MEM_ADDR_SPACE (operands[0]))"
+{
+  operands[0]
+    = adjust_address (operands[0], SImode, INTVAL (operands[1]) / 8);
+
+  return "move%.l %2,%0";
+})
+
+(define_insn "*insv_8_16_reg"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "register_operand" "d"))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[1]) == 8 || INTVAL (operands[1]) == 16)
+   && INTVAL (operands[2]) % INTVAL (operands[1]) == 0"
+{
+  if (INTVAL (operands[1]) + INTVAL (operands[2]) != 32)
+    return "bfins %3,%0{%b2:%b1}";
+
+  if (INTVAL (operands[1]) == 8)
+    return "move%.b %3,%0";
+  return "move%.w %3,%0";
+})
+
+
+;
+; Special case for 32-bit field in memory.  This only occurs when 32-bit
+; alignment of structure members is specified.
+;
+; The move is allowed to be odd byte aligned, because that's still faster
+; than an odd byte aligned bit-field instruction.
+;
+(define_insn "*extzv_32_mem"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(zero_extract:SI (match_operand:QI 1 "memory_src_operand" "oS")
+			 (const_int 32)
+			 (match_operand:SI 2 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) % 8) == 0
+   && ! mode_dependent_address_p (XEXP (operands[1], 0),
+                                  MEM_ADDR_SPACE (operands[1]))"
+{
+  operands[1]
+    = adjust_address (operands[1], SImode, INTVAL (operands[2]) / 8);
+
+  return "move%.l %1,%0";
+})
+
+(define_insn "*extzv_8_16_reg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&d")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0"
+{
+  cc_status.flags |= CC_NOT_NEGATIVE;
+  if (INTVAL (operands[2]) + INTVAL (operands[3]) != 32)
+    return "bfextu %1{%b3:%b2},%0";
+
+  output_asm_insn ("clr%.l %0", operands);
+  if (INTVAL (operands[2]) == 8)
+    return "move%.b %1,%0";
+  return "move%.w %1,%0";
+})
+
+;
+; Special case for 32-bit field in memory.  This only occurs when 32-bit
+; alignment of structure members is specified.
+;
+; The move is allowed to be odd byte aligned, because that's still faster
+; than an odd byte aligned bit-field instruction.
+;
+(define_insn "*extv_32_mem"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(sign_extract:SI (match_operand:QI 1 "memory_src_operand" "oS")
+			 (const_int 32)
+			 (match_operand:SI 2 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) % 8) == 0
+   && ! mode_dependent_address_p (XEXP (operands[1], 0),
+                                  MEM_ADDR_SPACE (operands[1]))"
+{
+  operands[1]
+    = adjust_address (operands[1], SImode, INTVAL (operands[2]) / 8);
+
+  return "move%.l %1,%0";
+})
+
+(define_insn "*extv_8_16_reg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0"
+{
+  if (INTVAL (operands[2]) + INTVAL (operands[3]) != 32)
+    return "bfexts %1{%b3:%b2},%0";
+
+  if (INTVAL (operands[2]) == 8)
+    return "move%.b %1,%0\;extb%.l %0";
+  return "move%.w %1,%0\;ext%.l %0";
+})
+
+;; Bit-field instructions, general cases.
+;; "o,d" constraint causes a nonoffsettable memref to match the "o"
+;; so that its address is reloaded.
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "general_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "")
+
+(define_insn "*extv_bfexts_mem"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extract:SI (match_operand:QI 1 "memory_operand" "o")
+			 (match_operand:SI 2 "nonmemory_operand" "dn")
+			 (match_operand:SI 3 "nonmemory_operand" "dn")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "bfexts %1{%b3:%b2},%0")
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "general_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "")
+
+(define_insn "*extzv_bfextu_mem"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extract:SI (match_operand:QI 1 "memory_operand" "o")
+			 (match_operand:SI 2 "nonmemory_operand" "dn")
+			 (match_operand:SI 3 "nonmemory_operand" "dn")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) != 32)
+	cc_status.flags |= CC_NOT_NEGATIVE;
+    }
+  else
+    {
+      CC_STATUS_INIT;
+    }
+  return "bfextu %1{%b3:%b2},%0";
+})
+
+(define_insn "*insv_bfchg_mem"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "nonmemory_operand" "dn")
+			 (match_operand:SI 2 "nonmemory_operand" "dn"))
+        (xor:SI (zero_extract:SI (match_dup 0) (match_dup 1) (match_dup 2))
+		(match_operand 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[3]) == -1
+       || (GET_CODE (operands[1]) == CONST_INT
+           && (~ INTVAL (operands[3]) & ((1 << INTVAL (operands[1]))- 1)) == 0))"
+{
+  CC_STATUS_INIT;
+  return "bfchg %0{%b2:%b1}";
+})
+
+(define_insn "*insv_bfclr_mem"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "nonmemory_operand" "dn")
+			 (match_operand:SI 2 "nonmemory_operand" "dn"))
+	(const_int 0))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfclr %0{%b2:%b1}";
+})
+
+(define_insn "*insv_bfset_mem"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "general_operand" "dn")
+			 (match_operand:SI 2 "general_operand" "dn"))
+	(const_int -1))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfset %0{%b2:%b1}";
+})
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "nonimmediate_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "")
+
+(define_insn "*insv_bfins_mem"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "nonmemory_operand" "dn")
+			 (match_operand:SI 2 "nonmemory_operand" "dn"))
+	(match_operand:SI 3 "register_operand" "d"))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "bfins %3,%0{%b2:%b1}")
+
+;; Now recognize bit-field insns that operate on registers
+;; (or at least were intended to do so).
+
+(define_insn "*extv_bfexts_reg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "bfexts %1{%b3:%b2},%0")
+
+(define_insn "*extv_bfextu_reg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) != 32)
+	cc_status.flags |= CC_NOT_NEGATIVE;
+    }
+  else
+    {
+      CC_STATUS_INIT;
+    }
+  return "bfextu %1{%b3:%b2},%0";
+})
+
+(define_insn "*insv_bfclr_reg"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(const_int 0))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfclr %0{%b2:%b1}";
+})
+
+(define_insn "*insv_bfset_reg"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(const_int -1))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfset %0{%b2:%b1}";
+})
+
+(define_insn "*insv_bfins_reg"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "register_operand" "d"))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+#if 0
+  /* These special cases are now recognized by a specific pattern.  */
+  if (GET_CODE (operands[1]) == CONST_INT && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[1]) == 16 && INTVAL (operands[2]) == 16)
+    return "move%.w %3,%0";
+  if (GET_CODE (operands[1]) == CONST_INT && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[1]) == 24 && INTVAL (operands[2]) == 8)
+    return "move%.b %3,%0";
+#endif
+  return "bfins %3,%0{%b2:%b1}";
+})
+
+;; Special patterns for optimizing bit-field instructions.
+
+(define_insn "*tst_bftst_mem"
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "o")
+				  (match_operand:SI 1 "const_int_operand" "n")
+				  (match_operand:SI 2 "general_operand" "dn"))
+	         (const_int 0)))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (operands[1] == const1_rtx
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      int width = GET_CODE (operands[0]) == REG ? 31 : 7;
+      return output_btst (operands,
+			  GEN_INT (width - INTVAL (operands[2])),
+			  operands[0], insn, 1000);
+      /* Pass 1000 as SIGNPOS argument so that btst will
+         not think we are testing the sign bit for an `and'
+	 and assume that nonzero implies a negative result.  */
+    }
+  if (INTVAL (operands[1]) != 32)
+    cc_status.flags = CC_NOT_NEGATIVE;
+  return "bftst %0{%b2:%b1}";
+})
+
+
+;;; now handle the register cases
+(define_insn "*tst_bftst_reg"
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:SI 0 "register_operand" "d")
+				  (match_operand:SI 1 "const_int_operand" "n")
+			 	  (match_operand:SI 2 "general_operand" "dn"))
+		 (const_int 0)))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (operands[1] == const1_rtx
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      int width = GET_CODE (operands[0]) == REG ? 31 : 7;
+      return output_btst (operands, GEN_INT (width - INTVAL (operands[2])),
+			  operands[0], insn, 1000);
+      /* Pass 1000 as SIGNPOS argument so that btst will
+         not think we are testing the sign bit for an `and'
+	 and assume that nonzero implies a negative result.  */
+    }
+  if (INTVAL (operands[1]) != 32)
+    cc_status.flags = CC_NOT_NEGATIVE;
+  return "bftst %0{%b2:%b1}";
+})
+
+(define_insn "scc0_di"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro") (const_int 0)]))]
+  "! TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], const0_rtx, operands[0]);
+})
+
+(define_insn "scc0_di_5200"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro") (const_int 0)]))]
+  "TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], const0_rtx, operands[0]);
+})
+
+(define_insn "scc_di"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm,dm")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro,r")
+       (match_operand:DI 3 "general_operand" "r,ro")]))]
+  "! TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], operands[3], operands[0]);
+})
+
+(define_insn "scc_di_5200"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro,r")
+       (match_operand:DI 3 "general_operand" "r,ro")]))]
+  "TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], operands[3], operands[0]);
+})
+
+;; Note that operand 0 of an SCC insn is supported in the hardware as
+;; memory, but we cannot allow it to be in memory in case the address
+;; needs to be reloaded.
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(eq:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("seq %0", "fseq %0", "seq %0");
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ne:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("sne %0", "fsne %0", "sne %0");
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(gt:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("sgt %0", "fsgt %0", 0);
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(gtu:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  return "shi %0";
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(lt:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   OUTPUT_JUMP ("slt %0", "fslt %0", "smi %0");
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ltu:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   return "scs %0";
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ge:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   OUTPUT_JUMP ("sge %0", "fsge %0", "spl %0");
+})
+
+(define_insn "*scc"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(geu:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   return "scc %0";
+}
+  [(set_attr "type" "scc")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(le:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("sle %0", "fsle %0", 0);
+})
+
+(define_insn "*sls"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(leu:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   return "sls %0";
+}
+  [(set_attr "type" "scc")])
+
+(define_insn "*sordered_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ordered:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsor %0";
+})
+
+(define_insn "*sunordered_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unordered:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsun %0";
+})
+
+(define_insn "*suneq_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(uneq:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsueq %0";
+})
+
+(define_insn "*sunge_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unge:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsuge %0";
+})
+
+(define_insn "*sungt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ungt:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsugt %0";
+})
+
+(define_insn "*sunle_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unle:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsule %0";
+})
+
+(define_insn "*sunlt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unlt:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsult %0";
+})
+
+(define_insn "*sltgt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ltgt:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsogl %0";
+})
+
+(define_insn "*fsogt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (unle:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsogt %0";
+})
+
+(define_insn "*fsoge_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (unlt:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsoge %0";
+})
+
+(define_insn "*fsolt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (unge:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsolt %0";
+})
+
+(define_insn "*fsole_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (ungt:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsole %0";
+})
+
+;; Basic conditional jump instructions.
+
+(define_insn "beq0_di"
+  [(set (pc)
+    (if_then_else (eq (match_operand:DI 0 "general_operand" "d*ao,<>")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ","))
+        (pc)))
+   (clobber (match_scratch:SI 2 "=d,d"))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (which_alternative == 1)
+    return "move%.l %0,%2\;or%.l %0,%2\;jeq %l1";
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return "jeq %l1";
+    }
+  if (GET_CODE (operands[0]) == REG)
+    operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[3] = adjust_address (operands[0], SImode, 4);
+  if (! ADDRESS_REG_P (operands[0]))
+    {
+      if (reg_overlap_mentioned_p (operands[2], operands[0]))
+	{
+	  if (reg_overlap_mentioned_p (operands[2], operands[3]))
+	    return "or%.l %0,%2\;jeq %l1";
+	  else
+	    return "or%.l %3,%2\;jeq %l1";
+	}
+      return "move%.l %0,%2\;or%.l %3,%2\;jeq %l1";
+    }
+  operands[4] = gen_label_rtx();
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    output_asm_insn ("tst%.l %0\;jne %l4\;tst%.l %3\;jeq %l1", operands);
+  else
+    output_asm_insn ("cmp%.w #0,%0\;jne %l4\;cmp%.w #0,%3\;jeq %l1", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				CODE_LABEL_NUMBER (operands[4]));
+  return "";
+})
+
+(define_insn "bne0_di"
+  [(set (pc)
+    (if_then_else (ne (match_operand:DI 0 "general_operand" "do,*a")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ","))
+        (pc)))
+   (clobber (match_scratch:SI 2 "=d,X"))]
+  ""
+{
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return "jne %l1";
+    }
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[3] = adjust_address (operands[0], SImode, 4);
+  if (!ADDRESS_REG_P (operands[0]))
+    {
+      if (reg_overlap_mentioned_p (operands[2], operands[0]))
+	{
+	  if (reg_overlap_mentioned_p (operands[2], operands[3]))
+	    return "or%.l %0,%2\;jne %l1";
+	  else
+	    return "or%.l %3,%2\;jne %l1";
+	}
+      return "move%.l %0,%2\;or%.l %3,%2\;jne %l1";
+    }
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "tst%.l %0\;jne %l1\;tst%.l %3\;jne %l1";
+  else
+    return "cmp%.w #0,%0\;jne %l1\;cmp%.w #0,%3\;jne %l1";
+})
+
+(define_insn "bge0_di"
+  [(set (pc)
+    (if_then_else (ge (match_operand:DI 0 "general_operand" "ro")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ""))
+        (pc)))]
+  ""
+{
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return cc_status.flags & CC_REVERSED ? "jle %l1" : "jpl %l1";
+    }
+  CC_STATUS_INIT;
+  if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (operands[0]))
+    output_asm_insn("tst%.l %0", operands);
+  else
+    {
+      /* On an address reg, cmpw may replace cmpl.  */
+      output_asm_insn("cmp%.w #0,%0", operands);
+    }
+  return "jpl %l1";
+})
+
+(define_insn "blt0_di"
+  [(set (pc)
+    (if_then_else (lt (match_operand:DI 0 "general_operand" "ro")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ""))
+        (pc)))]
+  ""
+{
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return cc_status.flags & CC_REVERSED ? "jgt %l1" : "jmi %l1";
+    }
+  CC_STATUS_INIT;
+  if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (operands[0]))
+    output_asm_insn("tst%.l %0", operands);
+  else
+    {
+      /* On an address reg, cmpw may replace cmpl.  */
+      output_asm_insn("cmp%.w #0,%0", operands);
+    }
+  return "jmi %l1";
+})
+
+(define_insn "beq"
+  [(set (pc)
+	(if_then_else (eq (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  OUTPUT_JUMP ("jeq %l0", "fjeq %l0", "jeq %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bne"
+  [(set (pc)
+	(if_then_else (ne (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  OUTPUT_JUMP ("jne %l0", "fjne %l0", "jne %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bgt"
+  [(set (pc)
+	(if_then_else (gt (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jgt %l0", "fjgt %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bgtu"
+  [(set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jhi %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "blt"
+  [(set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jlt %l0", "fjlt %l0", "jmi %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bltu"
+  [(set (pc)
+	(if_then_else (ltu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcs %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bge"
+  [(set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jge %l0", "fjge %l0", "jpl %l0");
+})
+
+(define_insn "bgeu"
+  [(set (pc)
+	(if_then_else (geu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcc %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "ble"
+  [(set (pc)
+	(if_then_else (le (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jle %l0", "fjle %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bleu"
+  [(set (pc)
+	(if_then_else (leu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jls %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bordered"
+  [(set (pc)
+	(if_then_else (ordered (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjor %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunordered"
+  [(set (pc)
+	(if_then_else (unordered (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjun %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "buneq"
+  [(set (pc)
+	(if_then_else (uneq (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjueq %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunge"
+  [(set (pc)
+	(if_then_else (unge (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjuge %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bungt"
+  [(set (pc)
+	(if_then_else (ungt (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjugt %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunle"
+  [(set (pc)
+	(if_then_else (unle (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjule %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunlt"
+  [(set (pc)
+	(if_then_else (unlt (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjult %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bltgt"
+  [(set (pc)
+	(if_then_else (ltgt (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjogl %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+;; Negated conditional jump instructions.
+
+(define_insn "*beq_rev"
+  [(set (pc)
+	(if_then_else (eq (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  OUTPUT_JUMP ("jne %l0", "fjne %l0", "jne %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bne_rev"
+  [(set (pc)
+	(if_then_else (ne (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  OUTPUT_JUMP ("jeq %l0", "fjeq %l0", "jeq %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bgt_rev"
+  [(set (pc)
+	(if_then_else (gt (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jle %l0", "fjngt %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bgtu_rev"
+  [(set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jls %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*blt_rev"
+  [(set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jge %l0", "fjnlt %l0", "jpl %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bltu_rev"
+  [(set (pc)
+	(if_then_else (ltu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcc %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bge_rev"
+  [(set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jlt %l0", "fjnge %l0", "jmi %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bgeu_rev"
+  [(set (pc)
+	(if_then_else (geu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcs %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*ble_rev"
+  [(set (pc)
+	(if_then_else (le (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jgt %l0", "fjnle %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bleu_rev"
+  [(set (pc)
+	(if_then_else (leu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jhi %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bordered_rev"
+  [(set (pc)
+	(if_then_else (ordered (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjun %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunordered_rev"
+  [(set (pc)
+	(if_then_else (unordered (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjor %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*buneq_rev"
+  [(set (pc)
+	(if_then_else (uneq (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjogl %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunge_rev"
+  [(set (pc)
+	(if_then_else (unge (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjolt %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bungt_rev"
+  [(set (pc)
+	(if_then_else (ungt (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjole %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunle_rev"
+  [(set (pc)
+	(if_then_else (unle (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjogt %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunlt_rev"
+  [(set (pc)
+	(if_then_else (unlt (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjoge %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bltgt_rev"
+  [(set (pc)
+	(if_then_else (ltgt (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjueq %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+;; Unconditional and other jump instructions
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jra %l0"
+  [(set_attr "type" "bra")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+#if CASE_VECTOR_PC_RELATIVE
+    operands[0] = gen_rtx_PLUS (SImode, pc_rtx,
+				gen_rtx_SIGN_EXTEND (SImode, operands[0]));
+#endif
+})
+
+;; Jump to variable address from dispatch table of absolute addresses.
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  return MOTOROLA ? "jmp (%0)" : "jmp %0@";
+}
+  [(set_attr "type" "jmp")])
+
+;; Jump to variable address from dispatch table of relative addresses.
+(define_insn ""
+  [(set (pc)
+	(plus:SI (pc)
+		 (sign_extend:SI (match_operand:HI 0 "register_operand" "r"))))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+#ifdef ASM_RETURN_CASE_JUMP
+  ASM_RETURN_CASE_JUMP;
+#else
+  if (TARGET_COLDFIRE)
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return MOTOROLA ? "jmp (2,pc,%0.l)" : "jmp pc@(2,%0:l)";
+      else if (MOTOROLA)
+	return "ext%.l %0\;jmp (2,pc,%0.l)";
+      else
+	return "extl %0\;jmp pc@(2,%0:l)";
+    }
+  else
+    return MOTOROLA ? "jmp (2,pc,%0.w)" : "jmp pc@(2,%0:w)";
+#endif
+})
+
+;; Decrement-and-branch insns.
+(define_insn "*dbne_hi"
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:HI 0 "nonimmediate_operand" "+d*g")
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.w #1,%0\;jcc %l1";
+  return "subq%.w #1,%0\;cmp%.w #-1,%0\;jne %l1";
+})
+
+(define_insn "*dbne_si"
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:SI 0 "nonimmediate_operand" "+d*g")
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1\;clr%.w %0\;subq%.l #1,%0\;jcc %l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.l #1,%0\;jcc %l1";
+  return "subq%.l #1,%0\;cmp%.l #-1,%0\;jne %l1";
+})
+
+;; Two dbra patterns that use REG_NOTES info generated by strength_reduce.
+
+(define_insn "*dbge_hi"
+  [(set (pc)
+	(if_then_else
+	  (ge (plus:HI (match_operand:HI 0 "nonimmediate_operand" "+d*am")
+		       (const_int -1))
+	      (const_int 0))
+	  (label_ref (match_operand 1 "" ""))
+	  (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE && find_reg_note (insn, REG_NONNEG, 0)"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.w #1,%0\;jcc %l1";
+  return "subq%.w #1,%0\;cmp%.w #-1,%0\;jne %l1";
+})
+
+(define_expand "decrement_and_branch_until_zero"
+  [(parallel [(set (pc)
+		   (if_then_else
+		    (ge (plus:SI (match_operand:SI 0 "nonimmediate_operand" "")
+				 (const_int -1))
+			(const_int 0))
+		    (label_ref (match_operand 1 "" ""))
+		    (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))])]
+  ""
+  "")
+
+(define_insn "*dbge_si"
+  [(set (pc)
+	(if_then_else
+	  (ge (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+d*am")
+		       (const_int -1))
+	      (const_int 0))
+	  (label_ref (match_operand 1 "" ""))
+	  (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE && find_reg_note (insn, REG_NONNEG, 0)"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1\;clr%.w %0\;subq%.l #1,%0\;jcc %l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.l #1,%0\;jcc %l1";
+  return "subq%.l #1,%0\;cmp%.l #-1,%0\;jne %l1";
+})
+
+(define_expand "sibcall"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  operands[0] = m68k_legitimize_sibcall_address (operands[0]);
+})
+
+(define_insn "*sibcall"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_operand" ""))
+	 (match_operand:SI 1 "general_operand" ""))]
+  "SIBLING_CALL_P (insn)"
+{
+  return output_sibcall (operands[0]);
+})
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "")
+	      (match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  operands[1] = m68k_legitimize_sibcall_address (operands[1]);
+})
+
+(define_insn "*sibcall_value"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "sibcall_operand" ""))
+	      (match_operand:SI 2 "general_operand" "")))]
+  "SIBLING_CALL_P (insn)"
+{
+  operands[0] = operands[1];
+  return output_sibcall (operands[0]);
+})
+
+;; Call subroutine with no return value.
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand:SI 1 "general_operand" ""))]
+  ;; Operand 1 not really used on the m68000.
+  ""
+{
+  operands[0] = m68k_legitimize_call_address (operands[0]);
+})
+
+(define_insn "*call"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "a,W"))
+	 (match_operand:SI 1 "general_operand" "g,g"))]
+  ;; Operand 1 not really used on the m68000.
+  "!SIBLING_CALL_P (insn)"
+{
+  return output_call (operands[0]);
+}
+  [(set_attr "type" "jsr")])
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "")
+	      (match_operand:SI 2 "general_operand" "")))]
+  ;; Operand 2 not really used on the m68000.
+  ""
+{
+  operands[1] = m68k_legitimize_call_address (operands[1]);
+})
+
+(define_insn "*non_symbolic_call_value"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "non_symbolic_call_operand" "a,W"))
+	      (match_operand:SI 2 "general_operand" "g,g")))]
+  ;; Operand 2 not really used on the m68000.
+  "!SIBLING_CALL_P (insn)"
+  "jsr %a1"
+  [(set_attr "type" "jsr")
+   (set_attr "opx" "1")])
+
+(define_insn "*symbolic_call_value_jsr"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "symbolic_operand" "a,W"))
+	      (match_operand:SI 2 "general_operand" "g,g")))]
+  ;; Operand 2 not really used on the m68000.
+  "!SIBLING_CALL_P (insn) && m68k_symbolic_call_var == M68K_SYMBOLIC_CALL_JSR"
+{
+  operands[0] = operands[1];
+  return m68k_symbolic_call;
+}
+  [(set_attr "type" "jsr")
+   (set_attr "opx" "1")])
+
+(define_insn "*symbolic_call_value_bsr"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "symbolic_operand" "a,W"))
+	      (match_operand:SI 2 "general_operand" "g,g")))]
+  ;; Operand 2 not really used on the m68000.
+  "!SIBLING_CALL_P (insn)
+   && (m68k_symbolic_call_var == M68K_SYMBOLIC_CALL_BSR_C
+       || m68k_symbolic_call_var == M68K_SYMBOLIC_CALL_BSR_P)"
+{
+  operands[0] = operands[1];
+  return m68k_symbolic_call;
+}
+  [(set_attr "type" "bsr")
+   (set_attr "opx" "1")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "NEEDS_UNTYPED_CALL"
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  "")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "nop")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  m68k_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  m68k_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  m68k_expand_epilogue (true);
+  DONE;
+})
+
+;; Used for frameless functions which save no regs and allocate no locals.
+(define_expand "return"
+  [(return)]
+  "m68k_use_return_insn ()"
+  "")
+
+(define_insn "*return"
+  [(return)]
+  ""
+{
+  switch (m68k_get_function_kind (current_function_decl))
+    {
+    case m68k_fk_interrupt_handler:
+      return "rte";
+
+    case m68k_fk_interrupt_thread:
+      return "sleep";
+
+    default:
+      if (crtl->args.pops_args)
+	{
+	  operands[0] = GEN_INT (crtl->args.pops_args);
+	  return "rtd %0";
+	}
+      else
+	return "rts";
+    }
+}
+  [(set_attr "type" "rts")])
+
+(define_insn "*m68k_store_multiple"
+  [(match_parallel 0 "" [(match_operand 1 "")])]
+  "m68k_movem_pattern_p (operands[0], NULL, 0, true)"
+{
+  return m68k_output_movem (operands, operands[0], 0, true);
+})
+
+(define_insn "*m68k_store_multiple_automod"
+  [(match_parallel 0 ""
+     [(set (match_operand:SI 1 "register_operand" "=a")
+	   (plus:SI (match_operand:SI 2 "register_operand" "1")
+		    (match_operand:SI 3 "const_int_operand")))])]
+  "m68k_movem_pattern_p (operands[0], operands[1], INTVAL (operands[3]), true)"
+{
+  return m68k_output_movem (operands, operands[0], INTVAL (operands[3]), true);
+})
+
+(define_insn "*m68k_load_multiple"
+  [(match_parallel 0 "" [(match_operand 1 "")])]
+  "m68k_movem_pattern_p (operands[0], NULL, 0, false)"
+{
+  return m68k_output_movem (operands, operands[0], 0, false);
+})
+
+(define_insn "*m68k_load_multiple_automod"
+  [(match_parallel 0 ""
+     [(set (match_operand:SI 1 "register_operand" "=a")
+	   (plus:SI (match_operand:SI 2 "register_operand" "1")
+		    (match_operand:SI 3 "const_int_operand")))])]
+  "m68k_movem_pattern_p (operands[0], operands[1],
+			 INTVAL (operands[3]), false)"
+{
+  return m68k_output_movem (operands, operands[0],
+			    INTVAL (operands[3]), false);
+})
+
+(define_expand "link"
+  [(parallel
+       [(set (match_operand:SI 0 "register_operand")
+	     (plus:SI (reg:SI SP_REG) (const_int -4)))
+	(set (match_dup 2)
+	     (match_dup 0))
+	(set (reg:SI SP_REG)
+	     (plus:SI (reg:SI SP_REG)
+		      (match_operand:SI 1 "const_int_operand")))])]
+  "TARGET_68020 || INTVAL (operands[1]) >= -0x8004"
+{
+  operands[2] = gen_frame_mem (SImode,
+			       plus_constant (Pmode, stack_pointer_rtx, -4));
+})
+
+(define_insn "*link"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+	(match_dup 0))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 1 "const_int_operand")))]
+  "TARGET_68020 || INTVAL (operands[1]) >= -0x8004"
+{
+  operands[1] = GEN_INT (INTVAL (operands[1]) + 4);
+  if (!MOTOROLA)
+    return "link %0,%1";
+  else if (INTVAL (operands[1]) >= -0x8000)
+    return "link.w %0,%1";
+  else
+    return "link.l %0,%1";
+}
+  [(set_attr "type" "link")])
+
+(define_expand "unlink"
+  [(parallel
+      [(set (match_operand:SI 0 "register_operand")
+	    (match_dup 1))
+       (set (reg:SI SP_REG)
+	    (plus:SI (match_dup 0)
+		     (const_int 4)))])]
+  ""
+{
+  operands[1] = gen_frame_mem (SImode, copy_rtx (operands[0]));
+})
+
+(define_insn "*unlink"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(mem:SI (match_dup 0)))
+   (set (reg:SI SP_REG)
+	(plus:SI (match_dup 0)
+		 (const_int 4)))]
+  ""
+  "unlk %0"
+  [(set_attr "type" "unlk")])
+
+(define_insn "load_got"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))]
+  ""
+{
+  if (TARGET_ID_SHARED_LIBRARY)
+    {
+      operands[1] = gen_rtx_REG (Pmode, PIC_REG);
+      return MOTOROLA ? "move.l %?(%1),%0" : "movel %1@(%?), %0";
+    }
+  else if (MOTOROLA)
+    {
+      if (TARGET_COLDFIRE)
+	/* Load the full 32-bit PC-relative offset of
+	   _GLOBAL_OFFSET_TABLE_ into the PIC register, then use it to
+	   calculate the absolute value.  The offset and "lea"
+	   operation word together occupy 6 bytes.  */
+	return ("move.l #_GLOBAL_OFFSET_TABLE_@GOTPC, %0\n\t"
+		"lea (-6, %%pc, %0), %0");
+      else
+	return "lea (%%pc, _GLOBAL_OFFSET_TABLE_@GOTPC), %0";
+    }
+  else
+    return ("movel #_GLOBAL_OFFSET_TABLE_, %0\n\t"
+	    "lea %%pc@(0,%0:l),%0");
+})
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  ""
+  "jmp %a0"
+  [(set_attr "type" "jmp")])
+
+;; This should not be used unless the add/sub insns can't be.
+
+(define_insn "*lea"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
+	(match_operand:QI 1 "address_operand" "p"))]
+  ""
+  "lea %a1,%0")
+
+;; This is the first machine-dependent peephole optimization.
+;; It is useful when a floating value is returned from a function call
+;; and then is moved into an FP register.
+;; But it is mainly intended to test the support for these optimizations.
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+   (set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))]
+  "FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
+  [(set (mem:SI (reg:SI SP_REG)) (match_dup 1))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 2))
+   (set (match_dup 0) (mem:DF (post_inc:SI (reg:SI SP_REG))))]
+  "split_di(operands + 1, 1, operands + 1, operands + 2);")
+
+;; Optimize a stack-adjust followed by a push of an argument.
+;; This is said to happen frequently with -msoft-float
+;; when there are consecutive library calls.
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+   (set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[0])"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);")
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand:SI 0 "const_int_operand" "")))
+   (set (match_operand:SF 1 "push_operand" "")
+	(match_operand:SF 2 "general_operand" ""))]
+  "INTVAL (operands[0]) > 4
+   && !reg_mentioned_p (stack_pointer_rtx, operands[2])"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 0)))
+   (set (match_dup 1) (match_dup 2))]
+{
+  operands[0] = GEN_INT (INTVAL (operands[0]) - 4);
+  operands[1] = replace_equiv_address (operands[1], stack_pointer_rtx);
+})
+
+;; Speed up stack adjust followed by a fullword fixedpoint push.
+;; Constant operands need special care, as replacing a "pea X.w" with
+;; "move.l #X,(%sp)" is often not a win.
+
+;; Already done by the previous csa pass, left as reference.
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+   (set (match_operand:SI 0 "push_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);")
+
+;; Try to use moveq, after stack push has been changed into a simple move.
+(define_peephole2
+  [(match_scratch:SI 2 "d")
+   (set (match_operand:SI 0 "memory_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
+   && INTVAL (operands[1]) != 0
+   && IN_RANGE (INTVAL (operands[1]), -0x80, 0x7f)
+   && !valid_mov3q_const (INTVAL (operands[1]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; This sequence adds an instruction, but is two bytes shorter.
+(define_peephole2
+  [(match_scratch:SI 2 "d")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 12)))
+   (set (match_operand:SI 0 "push_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "INTVAL (operands[1]) != 0
+   && IN_RANGE (INTVAL (operands[1]), -0x80, 0x7f)
+   && !valid_mov3q_const (INTVAL (operands[1]))"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+   (set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);")
+
+;; Changing pea X.w into a move.l is no real win here.
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand:SI 0 "const_int_operand" "")))
+   (set (match_operand:SI 1 "push_operand" "")
+	(match_operand:SI 2 "general_operand" ""))]
+  "INTVAL (operands[0]) > 4
+   && !reg_mentioned_p (stack_pointer_rtx, operands[2])
+   && !(CONST_INT_P (operands[2]) && INTVAL (operands[2]) != 0
+	&& IN_RANGE (INTVAL (operands[2]), -0x8000, 0x7fff)
+	&& !valid_mov3q_const (INTVAL (operands[2])))"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 0)))
+   (set (match_dup 1) (match_dup 2))]
+{
+  operands[0] = GEN_INT (INTVAL (operands[0]) - 4);
+  operands[1] = replace_equiv_address (operands[1], stack_pointer_rtx);
+})
+
+;; Speed up pushing a single byte/two bytes but leaving four bytes of space
+;; (which differs slightly between m680x0 and ColdFire).
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (match_operand:QI 0 "memory_operand" "")
+	(match_operand:QI 1 "register_operand" ""))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])
+   && GET_CODE (XEXP (operands[0], 0)) == PLUS
+   && rtx_equal_p (XEXP (XEXP (operands[0], 0), 0), stack_pointer_rtx)
+   && CONST_INT_P (XEXP (XEXP (operands[0], 0), 1))
+   && INTVAL (XEXP (XEXP (operands[0], 0), 1)) == 3"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+  operands[0] = adjust_automodify_address (operands[0], SImode, addr, -3);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+})
+
+(define_peephole2
+  [(set (match_operand:QI 0 "push_operand" "")
+	(match_operand:QI 1 "register_operand" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -3)))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_automodify_address (operands[0], SImode,
+					   XEXP (operands[0], 0), -3);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+})
+
+(define_peephole2
+  [(set (match_operand:HI 0 "push_operand" "")
+	(match_operand:HI 1 "register_operand" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -2)))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_automodify_address (operands[0], SImode,
+					   XEXP (operands[0], 0), -2);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+})
+
+;; Optimize a series of strict_low_part assignments
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(const_int 0))
+   (set (strict_low_part (match_operand:HI 1 "register_operand" ""))
+	(match_operand:HI 2 "general_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && strict_low_part_peephole_ok (HImode, insn, operands[0])"
+  [(set (strict_low_part (match_dup 1)) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(const_int 0))
+   (set (strict_low_part (match_operand:QI 1 "register_operand" ""))
+	(match_operand:QI 2 "general_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && strict_low_part_peephole_ok (QImode, insn, operands[0])"
+  [(set (strict_low_part (match_dup 1)) (match_dup 2))]
+  "")
+
+;; dbCC peepholes
+;;
+;; Turns
+;;   loop:
+;;           [ ... ]
+;;           jCC label		; abnormal loop termination
+;;           dbra dN, loop	; normal loop termination
+;;
+;; Into
+;;   loop:
+;;           [ ... ]
+;;           dbCC dN, loop
+;;           jCC label
+;;
+;; Which moves the jCC condition outside the inner loop for free.
+;;
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (match_operand:HI 0 "register_operand" "")
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:HI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (match_operand:SI 0 "register_operand" "")
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:SI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ge (plus:HI (match_operand:HI 0 "register_operand" "")
+		         (const_int -1))
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:HI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ge (plus:SI (match_operand:SI 0 "register_operand" "")
+		         (const_int -1))
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:SI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+
+(define_insn "extendsfxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=fm,f")
+	(float_extend:XF (match_operand:SF 1 "general_operand" "f,rmF")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "f%$move%.x %1,%0";
+    }
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "f%$move%.x %1,%0";
+      else if (ADDRESS_REG_P (operands[1]))
+	return "move%.l %1,%-\;f%$move%.s %+,%0";
+      else if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_single (operands);
+      return "f%$move%.s %f1,%0";
+    }
+  return "fmove%.x %f1,%0";
+})
+
+
+(define_insn "extenddfxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=fm,f")
+	(float_extend:XF
+          (match_operand:DF 1 "general_operand" "f,rmE")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "fmove%.x %1,%0";
+    }
+  if (FP_REG_P (operands[0]))
+    {
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "f%&move%.d %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_double (operands);
+      return "f%&move%.d %f1,%0";
+    }
+  return "fmove%.x %f1,%0";
+})
+
+(define_insn "truncxfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,!r")
+	(float_truncate:DF
+          (match_operand:XF 1 "general_operand" "f,f")))]
+  "TARGET_68881"
+{
+  if (REG_P (operands[0]))
+    {
+      output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+      operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      return "move%.l %+,%0";
+    }
+  return "fmove%.d %f1,%0";
+})
+
+(define_insn "truncxfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=dm")
+	(float_truncate:SF
+	  (match_operand:XF 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.s %f1,%0")
+
+(define_insn "sin<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(unspec:FP
+	  [(match_operand:FP 1 "general_operand" "f<FP:dreg>m")] UNSPEC_SIN))]
+  "TARGET_68881 && flag_unsafe_math_optimizations"
+{
+  if (FP_REG_P (operands[1]))
+    return "fsin%.x %1,%0";
+  else
+    return "fsin%.<FP:prec> %1,%0";
+})
+
+(define_insn "cos<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(unspec:FP
+	  [(match_operand:FP 1 "general_operand" "f<FP:dreg>m")] UNSPEC_COS))]
+  "TARGET_68881 && flag_unsafe_math_optimizations"
+{
+  if (FP_REG_P (operands[1]))
+    return "fcos%.x %1,%0";
+  else
+    return "fcos%.<FP:prec> %1,%0";
+})
+
+;; Unconditional traps are assumed to have (const_int 1) for the condition.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 7))]
+  ""
+  "trap #7"
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrapdi4"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+{
+  if (operands[2] == const0_rtx)
+    emit_insn (gen_tstdi (operands[1]));
+  else
+    emit_insn (gen_cmpdi (operands[1], operands[2]));
+  operands[1] = cc0_rtx;
+  operands[2] = const0_rtx;
+})
+
+(define_expand "ctrapsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))
+   (trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+  "")
+
+(define_expand "ctraphi4"
+  [(set (cc0)
+	(compare (match_operand:HI 1 "nonimmediate_src_operand" "")
+		 (match_operand:HI 2 "general_src_operand" "")))
+   (trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+  "")
+
+(define_expand "ctrapqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 1 "nonimmediate_src_operand" "")
+		 (match_operand:QI 2 "general_src_operand" "")))
+   (trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+  "")
+
+(define_insn "*conditional_trap"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 1 "const1_operand" "I"))]
+  "TARGET_68020 && ! flags_in_68881 ()"
+{
+  switch (GET_CODE (operands[0]))
+  {
+  case EQ:  return "trapeq";
+  case NE:  return "trapne";
+  case GT:  return "trapgt";
+  case GTU: return "traphi";
+  case LT:  return "traplt";
+  case LTU: return "trapcs";
+  case GE:  return "trapge";
+  case GEU: return "trapcc";
+  case LE:  return "traple";
+  case LEU: return "trapls";
+  default: gcc_unreachable ();
+  }
+})
+
+;; These are to prevent the scheduler from moving stores to the frame
+;; before the stack adjustment.
+(define_insn "stack_tie"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "type" "ignore")])
+
+;; Instruction that subscribes one word in ColdFire instruction buffer.
+;; This instruction is used within scheduler only and should not appear
+;; in the instruction stream.
+(define_insn "ib"
+  [(unspec [(const_int 0)] UNSPEC_IB)]
+  ""
+  "#"
+  [(set_attr "type" "ib")])
+
+(include "cf.md")
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/m68k/m68k.opt b/gcc-4.9/gcc/config/m68k/m68k.opt
new file mode 100644
index 000000000..5ff157f46
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68k.opt
@@ -0,0 +1,195 @@
+; Options for the Motorola 68000 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/m68k/m68k-opts.h
+
+; Specify the identification number of the library being built.
+Variable
+const char *m68k_library_id_string = "_current_shared_library_a5_offset_"
+
+m5200
+Target RejectNegative Alias(mcpu=, 5206)
+Generate code for a 520X
+
+m5206e
+Target RejectNegative Alias(mcpu=, 5206e)
+Generate code for a 5206e
+
+m528x
+Target RejectNegative Alias(mcpu=, 528x)
+Generate code for a 528x
+
+m5307
+Target RejectNegative Alias(mcpu=, 5307)
+Generate code for a 5307
+
+m5407
+Target RejectNegative Alias(mcpu=, 5407)
+Generate code for a 5407
+
+m68000
+Target RejectNegative Alias(mcpu=, 68000)
+Generate code for a 68000
+
+m68010
+Target RejectNegative Alias(mcpu=, 68010)
+Generate code for a 68010
+
+m68020
+Target RejectNegative Alias(mcpu=, 68020)
+Generate code for a 68020
+
+m68020-40
+Target RejectNegative
+Generate code for a 68040, without any new instructions
+
+m68020-60
+Target RejectNegative
+Generate code for a 68060, without any new instructions
+
+m68030
+Target RejectNegative Alias(mcpu=, 68030)
+Generate code for a 68030
+
+m68040
+Target RejectNegative Alias(mcpu=, 68040)
+Generate code for a 68040
+
+m68060
+Target RejectNegative Alias(mcpu=, 68060)
+Generate code for a 68060
+
+m68302
+Target RejectNegative Alias(mcpu=, 68302)
+Generate code for a 68302
+
+m68332
+Target RejectNegative Alias(mcpu=, 68332)
+Generate code for a 68332
+
+; Has no effect on gcc
+m68851
+Target
+Generate code for a 68851
+
+m68881
+Target RejectNegative Mask(HARD_FLOAT)
+Generate code that uses 68881 floating-point instructions
+
+malign-int
+Target Report Mask(ALIGN_INT)
+Align variables on a 32-bit boundary
+
+march=
+Target RejectNegative Joined Enum(m68k_isa) Var(m68k_arch_option)
+Specify the name of the target architecture
+
+mbitfield
+Target Report Mask(BITFIELD)
+Use the bit-field instructions
+
+mc68000
+Target RejectNegative Alias(mcpu=, 68000)
+Generate code for a 68000
+
+mc68020
+Target RejectNegative Alias(mcpu=, 68020)
+Generate code for a 68020
+
+mcfv4e
+Target RejectNegative Alias(mcpu=, 547x)
+Generate code for a ColdFire v4e
+
+mcpu=
+Target RejectNegative Joined Enum(target_device) Var(m68k_cpu_option) Init(unk_device)
+Specify the target CPU
+
+mcpu32
+Target RejectNegative Alias(mcpu=, 68332)
+Generate code for a cpu32
+
+mdiv
+Target Report Mask(CF_HWDIV)
+Use hardware division instructions on ColdFire
+
+mfidoa
+Target RejectNegative
+Generate code for a Fido A
+
+mhard-float
+Target RejectNegative Mask(HARD_FLOAT)
+Generate code which uses hardware floating point instructions
+
+mid-shared-library
+Target Report Mask(ID_SHARED_LIBRARY)
+Enable ID based shared library
+
+mnobitfield
+Target RejectNegative InverseMask(BITFIELD)
+Do not use the bit-field instructions
+
+mnortd
+Target RejectNegative InverseMask(RTD)
+Use normal calling convention
+
+mnoshort
+Target RejectNegative InverseMask(SHORT)
+Consider type 'int' to be 32 bits wide
+
+mpcrel
+Target Report Mask(PCREL)
+Generate pc-relative code
+
+mrtd
+Target Report Mask(RTD)
+Use different calling convention using 'rtd'
+
+msep-data
+Target Report Mask(SEP_DATA)
+Enable separate data segment
+
+mshared-library-id=
+Target RejectNegative Joined UInteger
+ID of shared library to build
+
+mshort
+Target Report Mask(SHORT)
+Consider type 'int' to be 16 bits wide
+
+msoft-float
+Target RejectNegative InverseMask(HARD_FLOAT)
+Generate code with library calls for floating point
+
+mstrict-align
+Target Report Mask(STRICT_ALIGNMENT)
+Do not use unaligned memory references
+
+mtune=
+Target RejectNegative Joined Enum(uarch_type) Var(m68k_tune_option) Init(unk_arch)
+Tune for the specified target CPU or architecture
+
+mxgot
+Target Report Mask(XGOT)
+Support more than 8192 GOT entries on ColdFire
+
+mxtls
+Target Report Mask(XTLS)
+Support TLS segment larger than 64K
diff --git a/gcc-4.9/gcc/config/m68k/m68kelf.h b/gcc-4.9/gcc/config/m68k/m68kelf.h
new file mode 100644
index 000000000..8b571daef
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68kelf.h
@@ -0,0 +1,148 @@
+/* m68kelf support, derived from m68kv4.h */
+
+/* Target definitions for GNU compiler for mc680x0 running System V.4
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+   Written by Ron Guilmette (rfg@netcom.com) and Fred Fish (fnf@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef SWBEG_ASM_OP
+#define SWBEG_ASM_OP "\t.swbeg\t"
+#endif
+
+/* Here are three prefixes that are used by asm_fprintf to
+   facilitate customization for alternate assembler syntaxes.
+   Machines with no likelihood of an alternate syntax need not
+   define these and need not use asm_fprintf.  */
+
+/* The prefix for register names.  Note that REGISTER_NAMES
+   is supposed to include this prefix. Also note that this is NOT an
+   fprintf format string, it is a literal string */
+
+#undef REGISTER_PREFIX
+#define REGISTER_PREFIX "%"
+
+/* The prefix for local (compiler generated) labels.
+   These labels will not appear in the symbol table.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* config/m68k.md has an explicit reference to the program counter,
+   prefix this by the register prefix.  */
+
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_COLDFIRE)				\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)				\
+do {								\
+  if ((LOG) > 0)						\
+    fprintf ((FILE), "%s%u\n", ALIGN_ASM_OP, 1 << (LOG));	\
+} while (0)
+
+/* Register in which address to store a structure value is passed to a
+   function.  The default in m68k.h is a1.  For m68k/SVR4 it is a0.  */
+
+#undef M68K_STRUCT_VALUE_REGNUM
+#define M68K_STRUCT_VALUE_REGNUM A0_REG
+
+/* The static chain regnum defaults to a0, but we use that for
+   structure return, so have to use a1 for the static chain.  */
+
+#undef STATIC_CHAIN_REGNUM
+#define STATIC_CHAIN_REGNUM A1_REG
+#undef M68K_STATIC_CHAIN_REG_NAME
+#define M68K_STATIC_CHAIN_REG_NAME REGISTER_PREFIX "a1"
+
+#define ASM_COMMENT_START "|"
+
+/* Define how the m68k registers should be numbered for Dwarf output.
+   The numbering provided here should be compatible with the native
+   SVR4 SDB debugger in the m68k/SVR4 reference port, where d0-d7
+   are 0-7, a0-a8 are 8-15, and fp0-fp7 are 16-23.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#if 0
+/* SVR4 m68k assembler is bitching on the `comm i,1,1' which askes for 
+   1 byte alignment. Don't generate alignment for COMMON seems to be
+   safer until we the assembler is fixed.  */
+#undef ASM_OUTPUT_ALIGNED_COMMON
+/* Same problem with this one.  */
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#endif
+
+#undef ASM_OUTPUT_COMMON
+#undef ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+/* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
+   keep switch tables in the text section.  */
+   
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* In m68k svr4, using swbeg is the standard way to do switch
+   table.  */
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE,PREFIX,NUM,TABLE)		\
+  fprintf ((FILE), "%s&%d\n", SWBEG_ASM_OP, XVECLEN (PATTERN (TABLE), 1));
+/* end of stuff from m68kv4.h */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "crtbegin.o%s"
+
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#endif
+
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
diff --git a/gcc-4.9/gcc/config/m68k/m68kemb.h b/gcc-4.9/gcc/config/m68k/m68kemb.h
new file mode 100644
index 000000000..681a1836e
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/m68kemb.h
@@ -0,0 +1,52 @@
+/* Definitions of target machine for GNU compiler.  "embedded" 68XXX.
+   This is meant to be included after m68k.h.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.  */
+
+/* Override the SVR4 ABI for this target.  */
+
+#define PTRDIFF_TYPE "long int"
+#define SIZE_TYPE "long unsigned int"
+
+/* In order for bitfields to work on a 68000, or with -mnobitfield, we must
+   define either PCC_BITFIELD_TYPE_MATTERS or STRUCTURE_SIZE_BOUNDARY.
+   Defining STRUCTURE_SIZE_BOUNDARY results in structure packing problems,
+   so we define PCC_BITFIELD_TYPE_MATTERS.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Don't default to pcc-struct-return, so that we can return small structures
+   and unions in registers, which is slightly more efficient.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#undef FUNCTION_VALUE
+#define FUNCTION_VALUE(VALTYPE,FUNC) LIBCALL_VALUE (TYPE_MODE (VALTYPE))
+
+#undef LIBCALL_VALUE
+#define LIBCALL_VALUE(MODE)					\
+  m68k_libcall_value (MODE)
+
+#undef FUNCTION_VALUE_REGNO_P
+#define FUNCTION_VALUE_REGNO_P(N)			\
+  ((N) == D0_REG || (TARGET_68881 && (N) == FP0_REG))
+
+#undef NEEDS_UNTYPED_CALL
+#define NEEDS_UNTYPED_CALL 1
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__embedded__");		\
+    }						\
+  while (0)
+
+/* Override the default LIB_SPEC from gcc.c.  We don't currently support
+   profiling, or libg.a.  */
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* Make this be null, since we want the crt0.o to come from the linker
+   script */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC ""
diff --git a/gcc-4.9/gcc/config/m68k/math-68881.h b/gcc-4.9/gcc/config/m68k/math-68881.h
new file mode 100644
index 000000000..6d9f8b2d4
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/math-68881.h
@@ -0,0 +1,529 @@
+/******************************************************************\
+*								   *
+*  <math-68881.h>		last modified: 23 May 1992.	   *
+*								   *
+*  Copyright (C) 1989 by Matthew Self.				   *
+*  You may freely distribute verbatim copies of this software	   *
+*  provided that this copyright notice is retained in all copies.  *
+*  You may distribute modifications to this software under the     *
+*  conditions above if you also clearly note such modifications    *
+*  with their author and date.			   	     	   *
+*								   *
+*  Note:  errno is not set to EDOM when domain errors occur for    *
+*  most of these functions.  Rather, it is assumed that the	   *
+*  68881's OPERR exception will be enabled and handled		   *
+*  appropriately by the	operating system.  Similarly, overflow	   *
+*  and underflow do not set errno to ERANGE.			   *
+*								   *
+*  Send bugs to Matthew Self (self@bayes.arc.nasa.gov).		   *
+*								   *
+\******************************************************************/
+
+/* This file is NOT a part of GCC, just distributed with it.  */
+
+/* If you find this in GCC,
+   please send bug reports to bug-gcc@prep.ai.mit.edu.  */
+
+/* Changed by Richard Stallman:
+   May 1993, add conditional to prevent multiple inclusion.
+   % inserted before a #.
+   New function `hypot' added.
+   Nans written in hex to avoid 0rnan.
+   May 1992, use %! for fpcr register.  Break lines before function names.
+   December 1989, add parens around `&' in pow.
+   November 1990, added alternate definition of HUGE_VAL for Sun.  */
+
+/* Changed by Jim Wilson:
+   September 1993, Use #undef before HUGE_VAL instead of #ifdef/#endif.  */
+
+/* Changed by Ian Lance Taylor:
+   September 1994, use extern inline instead of static inline.  */
+
+#ifndef __math_68881
+#define __math_68881
+
+#include <errno.h>
+
+#undef HUGE_VAL
+#ifdef __sun__
+/* The Sun assembler fails to handle the hex constant in the usual defn.  */
+#define HUGE_VAL							\
+({									\
+  static union { int i[2]; double d; } u = { {0x7ff00000, 0} };		\
+  u.d;									\
+})
+#else
+#define HUGE_VAL							\
+({									\
+  double huge_val;							\
+									\
+  __asm ("fmove%.d #0x7ff0000000000000,%0"	/* Infinity */		\
+	 : "=f" (huge_val)						\
+	 : /* no inputs */);						\
+  huge_val;								\
+})
+#endif
+
+__inline extern double
+sin (double x)
+{
+  double value;
+
+  __asm ("fsin%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+cos (double x)
+{
+  double value;
+
+  __asm ("fcos%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+tan (double x)
+{
+  double value;
+
+  __asm ("ftan%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+asin (double x)
+{
+  double value;
+
+  __asm ("fasin%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+acos (double x)
+{
+  double value;
+
+  __asm ("facos%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+atan (double x)
+{
+  double value;
+
+  __asm ("fatan%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+atan2 (double y, double x)
+{
+  double pi, pi_over_2;
+
+  __asm ("fmovecr%.x #0,%0"		/* extended precision pi */
+	 : "=f" (pi)
+	 : /* no inputs */ );
+  __asm ("fscale%.b #-1,%0"		/* no loss of accuracy */
+	 : "=f" (pi_over_2)
+	 : "0" (pi));
+  if (x > 0)
+    {
+      if (y > 0)
+	{
+	  if (x > y)
+	    return atan (y / x);
+	  else
+	    return pi_over_2 - atan (x / y);
+	}
+      else
+	{
+	  if (x > -y)
+	    return atan (y / x);
+	  else
+	    return - pi_over_2 - atan (x / y);
+	}
+    }
+  else
+    {
+      if (y < 0)
+	{
+	  if (-x > -y)
+	    return - pi + atan (y / x);
+	  else
+	    return - pi_over_2 - atan (x / y);
+	}
+      else
+	{
+	  if (-x > y)
+	    return pi + atan (y / x);
+	  else if (y > 0)
+	    return pi_over_2 - atan (x / y);
+	  else
+	    {
+	      double value;
+
+	      errno = EDOM;
+	      __asm ("fmove%.d #0x7fffffffffffffff,%0"	/* quiet NaN */
+		     : "=f" (value)
+		     : /* no inputs */);
+	      return value;
+	    }
+	}
+    }
+}
+
+__inline extern double
+sinh (double x)
+{
+  double value;
+
+  __asm ("fsinh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+cosh (double x)
+{
+  double value;
+
+  __asm ("fcosh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+tanh (double x)
+{
+  double value;
+
+  __asm ("ftanh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+atanh (double x)
+{
+  double value;
+
+  __asm ("fatanh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+exp (double x)
+{
+  double value;
+
+  __asm ("fetox%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+expm1 (double x)
+{
+  double value;
+
+  __asm ("fetoxm1%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+log (double x)
+{
+  double value;
+
+  __asm ("flogn%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+log1p (double x)
+{
+  double value;
+
+  __asm ("flognp1%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+log10 (double x)
+{
+  double value;
+
+  __asm ("flog10%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+sqrt (double x)
+{
+  double value;
+
+  __asm ("fsqrt%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+hypot (double x, double y)
+{
+  return sqrt (x*x + y*y);
+}
+
+__inline extern double
+pow (double x, double y)
+{
+  if (x > 0)
+    return exp (y * log (x));
+  else if (x == 0)
+    {
+      if (y > 0)
+	return 0.0;
+      else
+	{
+	  double value;
+
+	  errno = EDOM;
+	  __asm ("fmove%.d #0x7fffffffffffffff,%0"		/* quiet NaN */
+		 : "=f" (value)
+		 : /* no inputs */);
+	  return value;
+	}
+    }
+  else
+    {
+      double temp;
+
+      __asm ("fintrz%.x %1,%0"
+	     : "=f" (temp)			/* integer-valued float */
+	     : "f" (y));
+      if (y == temp)
+        {
+	  int i = (int) y;
+
+	  if ((i & 1) == 0)			/* even */
+	    return exp (y * log (-x));
+	  else
+	    return - exp (y * log (-x));
+        }
+      else
+        {
+	  double value;
+
+	  errno = EDOM;
+	  __asm ("fmove%.d #0x7fffffffffffffff,%0"		/* quiet NaN */
+		 : "=f" (value)
+		 : /* no inputs */);
+	  return value;
+        }
+    }
+}
+
+__inline extern double
+fabs (double x)
+{
+  double value;
+
+  __asm ("fabs%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+ceil (double x)
+{
+  int rounding_mode, round_up;
+  double value;
+
+  __asm volatile ("fmove%.l %!,%0"
+		  : "=dm" (rounding_mode)
+		  : /* no inputs */ );
+  round_up = rounding_mode | 0x30;
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (round_up));
+  __asm volatile ("fint%.x %1,%0"
+		  : "=f" (value)
+		  : "f" (x));
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (rounding_mode));
+  return value;
+}
+
+__inline extern double
+floor (double x)
+{
+  int rounding_mode, round_down;
+  double value;
+
+  __asm volatile ("fmove%.l %!,%0"
+		  : "=dm" (rounding_mode)
+		  : /* no inputs */ );
+  round_down = (rounding_mode & ~0x10)
+		| 0x20;
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (round_down));
+  __asm volatile ("fint%.x %1,%0"
+		  : "=f" (value)
+		  : "f" (x));
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (rounding_mode));
+  return value;
+}
+
+__inline extern double
+rint (double x)
+{
+  int rounding_mode, round_nearest;
+  double value;
+
+  __asm volatile ("fmove%.l %!,%0"
+		  : "=dm" (rounding_mode)
+		  : /* no inputs */ );
+  round_nearest = rounding_mode & ~0x30;
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (round_nearest));
+  __asm volatile ("fint%.x %1,%0"
+		  : "=f" (value)
+		  : "f" (x));
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (rounding_mode));
+  return value;
+}
+
+__inline extern double
+fmod (double x, double y)
+{
+  double value;
+
+  __asm ("fmod%.x %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "f" (y));
+  return value;
+}
+
+__inline extern double
+drem (double x, double y)
+{
+  double value;
+
+  __asm ("frem%.x %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "f" (y));
+  return value;
+}
+
+__inline extern double
+scalb (double x, int n)
+{
+  double value;
+
+  __asm ("fscale%.l %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "dmi" (n));
+  return value;
+}
+
+__inline extern double
+logb (double x)
+{
+  double exponent;
+
+  __asm ("fgetexp%.x %1,%0"
+	 : "=f" (exponent)
+	 : "f" (x));
+  return exponent;
+}
+
+__inline extern double
+ldexp (double x, int n)
+{
+  double value;
+
+  __asm ("fscale%.l %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "dmi" (n));
+  return value;
+}
+
+__inline extern double
+frexp (double x, int *exp)
+{
+  double float_exponent;
+  int int_exponent;
+  double mantissa;
+
+  __asm ("fgetexp%.x %1,%0"
+	 : "=f" (float_exponent)	/* integer-valued float */
+	 : "f" (x));
+  int_exponent = (int) float_exponent;
+  __asm ("fgetman%.x %1,%0"
+	 : "=f" (mantissa)		/* 1.0 <= mantissa < 2.0 */
+	 : "f" (x));
+  if (mantissa != 0)
+    {
+      __asm ("fscale%.b #-1,%0"
+	     : "=f" (mantissa)		/* mantissa /= 2.0 */
+	     : "0" (mantissa));
+      int_exponent += 1;
+    }
+  *exp = int_exponent;
+  return mantissa;
+}
+
+__inline extern double
+modf (double x, double *ip)
+{
+  double temp;
+
+  __asm ("fintrz%.x %1,%0"
+	 : "=f" (temp)			/* integer-valued float */
+	 : "f" (x));
+  *ip = temp;
+  return x - temp;
+}
+
+#endif /* not __math_68881 */
diff --git a/gcc-4.9/gcc/config/m68k/netbsd-elf.h b/gcc-4.9/gcc/config/m68k/netbsd-elf.h
new file mode 100644
index 000000000..be8919338
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/netbsd-elf.h
@@ -0,0 +1,297 @@
+/* Definitions of target machine for GNU compiler,
+   for m68k (including m68010) NetBSD platforms using the
+   ELF object format.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems. Inc.
+
+   This file is derived from <m68k/m68kv4.h>, <m68k/m68kelf.h>,
+   and <m68k/linux.h>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+      builtin_define ("__m68k__");		\
+      builtin_define ("__SVR4_ABI__");		\
+      builtin_define ("__motorola__");		\
+      if (TARGET_HARD_FLOAT)			\
+	builtin_define ("__HAVE_FPU__");	\
+    }						\
+  while (0)
+
+/* Don't try using XFmode on the 68010.  */ 
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_68020 ? 80 : 64)
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#ifdef __mc68010__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "netbsd_entry_point",   NETBSD_ENTRY_POINT },
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD m68k targets.  Currently we
+   deal with the GCC option '-posix', as well as an indication as to
+   whether or not use of the FPU is allowed.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+
+/* Provide an ASM_SPEC appropriate for NetBSD m68k ELF targets.  We need
+   to pass PIC code generation options.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%(asm_cpu_spec) %{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/m68k ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#define NETBSD_ENTRY_POINT "_start"
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function only.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+do									\
+  {									\
+    asm_fprintf (FILE, "\tlea (%LLP%d,%Rpc),%Ra1\n", (LABELNO));	\
+    if (flag_pic)							\
+      fprintf (FILE, "\tbsr.l __mcount@PLTPC\n");			\
+    else								\
+      fprintf (FILE, "\tjbsr __mcount\n");				\
+  }									\
+while (0)
+
+
+/* Make gcc agree with <machine/ansi.h>  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+
+/* XXX
+   Here is a bunch of stuff lifted from m68kelf.h.  We don't use that
+   file directly, because it has a lot of baggage we don't want.  */
+
+
+/* The prefix for register names.  Note that REGISTER_NAMES
+   is supposed to include this prefix.  Also note that this is NOT an
+   fprintf format string, it is a literal string.  */
+
+#undef REGISTER_PREFIX
+#define REGISTER_PREFIX "%"
+
+
+/* The prefix for local (compiler generated) lables.
+   These labels will not appear in the symbol table.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "|"
+
+
+/* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
+   keep switch tables in the text section.  */
+
+#undef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* Use the default action for outputting the case label.  */
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_COLDFIRE)				\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
+
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)					\
+do									\
+  {									\
+    if ((LOG) > 0)							\
+      fprintf ((FILE), "%s%u\n", ALIGN_ASM_OP, 1 << (LOG));		\
+  }									\
+while (0)
+
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as uninitialized global
+   data.  */
+
+#define BSS_SECTION_ASM_OP	".section\t.bss"
+
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+
+#undef ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)			\
+( fputs (".comm ", (FILE)),						\
+  assemble_name ((FILE), (NAME)),					\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+#undef ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)			\
+( fputs (".lcomm ", (FILE)),						\
+  assemble_name ((FILE), (NAME)),					\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+
+/* XXX
+   This is the end of the chunk lifted from m68kelf.h  */
+
+
+/* XXX
+   The following chunk is more or less lifted from m68kv4.h.
+   We'd like to just #include that file, but it has not yet
+   been converted to the new include style.
+
+   Should there be a m68kv4-abi.h ??  */
+
+
+/* Register in which address to store a structure value is passed to a
+   function.  The default in m68k.h is a1.  For m68k/SVR4 it is a0. */
+
+#undef M68K_STRUCT_VALUE_REGNUM
+#define M68K_STRUCT_VALUE_REGNUM A0_REG
+
+
+/* Register in which static-chain is passed to a function.  The
+   default isn m68k.h is a0, but that is already the struct value
+   regnum.  Make it a1 instead.  */
+
+#undef STATIC_CHAIN_REGNUM
+#define STATIC_CHAIN_REGNUM A1_REG
+#undef M68K_STATIC_CHAIN_REG_NAME
+#define M68K_STATIC_CHAIN_REG_NAME REGISTER_PREFIX "a1"
+
+
+/* Now to renumber registers for dbx and gdb.
+   We use the Sun-3 convention, which is:
+   floating point registers have numbers 18 to 25, not
+   16 to 23 as they do in the compiler.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) ((REGNO) < 16 ? (REGNO) : (REGNO) + 2)
+
+
+/* 1 if N is a possible register number for a function value.  For
+   m68k/SVR4 allow d0, a0, or fp0 as return registers, for integral,
+   pointer, or floating types, respectively.  Reject fp0 if not using
+   a 68881 coprocessor.  */
+
+#undef FUNCTION_VALUE_REGNO_P
+#define FUNCTION_VALUE_REGNO_P(N)					\
+  ((N) == D0_REG || (N) == A0_REG || (TARGET_68881 && (N) == FP0_REG))
+
+
+/* Define this to be true when FUNCTION_VALUE_REGNO_P is true for
+   more than one register.  */
+
+#undef NEEDS_UNTYPED_CALL
+#define NEEDS_UNTYPED_CALL 1
+
+
+/* Define how to generate (in the callee) the output value of a
+   function and how to find (in the caller) the value returned by a
+   function.  VALTYPE is the data type of the value (as a tree).  If
+   the precise function being called is known, FUNC is its
+   FUNCTION_DECL; otherwise, FUNC is 0.  For m68k/SVR4 generate the
+   result in d0, a0, or fp0 as appropriate.  */
+
+#undef FUNCTION_VALUE
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+  m68k_function_value (VALTYPE, FUNC)
+
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.
+   For m68k/SVR4 look for integer values in d0, pointer values in d0
+   (returned in both d0 and a0), and floating values in fp0.  */
+
+#undef LIBCALL_VALUE
+#define LIBCALL_VALUE(MODE)						\
+  m68k_libcall_value (MODE)
+
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.
+   The m68k/SVR4 convention is to keep the stack pointer longword aligned.  */
+
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY 32
+
+
+/* Alignment of field after `int : 0' in a structure.
+   For m68k/SVR4, this is the next longword boundary.  */
+
+#undef EMPTY_FIELD_BOUNDARY
+#define EMPTY_FIELD_BOUNDARY 32
+
+
+/* No data type wants to be aligned rounder than this.
+   For m68k/SVR4, some types (doubles for example) are aligned on 8 byte
+   boundaries */
+
+#undef BIGGEST_ALIGNMENT
+#define BIGGEST_ALIGNMENT 64
+
+
+/* The svr4 ABI for the m68k says that records and unions are returned
+   in memory.  */
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* XXX
+   This is the end of the chunk lifted from m68kv4.h  */
diff --git a/gcc-4.9/gcc/config/m68k/openbsd.h b/gcc-4.9/gcc/config/m68k/openbsd.h
new file mode 100644
index 000000000..743d742c0
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/openbsd.h
@@ -0,0 +1,90 @@
+/* Configuration file for an m68k OpenBSD target.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__unix__");		\
+	builtin_define ("__OpenBSD__");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=OpenBSD");	\
+   }						\
+  while (0)
+
+/* Define __HAVE_68881__ in preprocessor, unless -msoft-float is specified.
+   This will control the use of inline 68881 insns in certain macros.  */
+#undef CPP_SPEC
+#define CPP_SPEC "%{!msoft-float:-D__HAVE_68881__ -D__HAVE_FPU__} %{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%(asm_cpu_spec) %{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* Storage layout.  */
+
+/* Every structure or union's size must be a multiple of 2 bytes.  */
+#define STRUCTURE_SIZE_BOUNDARY 16
+
+/* Specific options for DBX Output.  */
+
+/* This is BSD, so it wants DBX format.  */
+#define DBX_DEBUGGING_INFO 1
+
+/* Do not break .stabs pseudos into continuations.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+#define DBX_CONTIN_CHAR '?'
+
+/* Stack & calling: aggregate returns.  */
+
+/* ??? This is traditional, but quite possibly wrong.  It appears to
+   disagree with gdb.  */
+#define PCC_STATIC_STRUCT_RETURN 1
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: exception region output.  */
+
+/* All configurations that don't use elf must be explicit about not using
+   dwarf unwind information.  */
+#define DWARF2_UNWIND_INFO 0
+
+#define TARGET_HAVE_NAMED_SECTIONS false
diff --git a/gcc-4.9/gcc/config/m68k/predicates.md b/gcc-4.9/gcc/config/m68k/predicates.md
new file mode 100644
index 000000000..8d8708836
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/predicates.md
@@ -0,0 +1,246 @@
+;; Predicate definitions for Motorola 68000.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Special case of a general operand that's used as a source
+;; operand. Use this to permit reads from PC-relative memory when
+;; -mpcrel is specified.
+
+(define_predicate "general_src_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  if (TARGET_PCREL
+      && GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (op, 0)) == LABEL_REF
+	  || GET_CODE (XEXP (op, 0)) == CONST))
+    return 1;
+  return general_operand (op, mode);
+})
+
+;; Special case of a nonimmediate operand that's used as a source. Use
+;; this to permit reads from PC-relative memory when -mpcrel is
+;; specified.
+
+(define_predicate "nonimmediate_src_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (TARGET_PCREL && GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (op, 0)) == LABEL_REF
+	  || GET_CODE (XEXP (op, 0)) == CONST))
+    return 1;
+  return nonimmediate_operand (op, mode);
+})
+
+;; Special case of a memory operand that's used as a source. Use this
+;; to permit reads from PC-relative memory when -mpcrel is specified.
+
+(define_predicate "memory_src_operand"
+  (match_code "subreg,mem")
+{
+  if (TARGET_PCREL && GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (op, 0)) == LABEL_REF
+	  || GET_CODE (XEXP (op, 0)) == CONST))
+    return 1;
+  return memory_operand (op, mode);
+})
+
+;; Similar to general_operand, but exclude stack_pointer_rtx.
+
+(define_predicate "not_sp_operand"
+  (match_code "subreg,reg,mem")
+{
+  return op != stack_pointer_rtx && nonimmediate_operand (op, mode);
+})
+
+;; Predicate that accepts only a pc-relative address.  This is needed
+;; because pc-relative addresses don't satisfy the predicate
+;; "general_src_operand".
+
+(define_predicate "pcrel_address"
+  (match_code "symbol_ref,label_ref,const"))
+
+;; Accept integer operands in the range 0..0xffffffff.  We have to
+;; check the range carefully since this predicate is used in DImode
+;; contexts.  Also, we need some extra crud to make it work when
+;; hosted on 64-bit machines.
+
+(define_predicate "const_uint32_operand"
+  (match_code "const_int,const_double")
+{
+  /* It doesn't make sense to ask this question with a mode that is
+     not larger than 32 bits.  */
+  gcc_assert (GET_MODE_BITSIZE (mode) > 32);
+
+#if HOST_BITS_PER_WIDE_INT > 32
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= 0 && INTVAL (op) <= 0xffffffffL));
+#else
+  return (GET_CODE (op) == CONST_INT
+	  || (GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_HIGH (op) == 0));
+#endif
+})
+
+;; Accept integer operands in the range -0x80000000..0x7fffffff.  We
+;; have to check the range carefully since this predicate is used in
+;; DImode contexts.
+
+(define_predicate "const_sint32_operand"
+  (match_code "const_int")
+{
+  /* It doesn't make sense to ask this question with a mode that is
+     not larger than 32 bits.  */
+  gcc_assert (GET_MODE_BITSIZE (mode) > 32);
+
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= (-0x7fffffff - 1) && INTVAL (op) <= 0x7fffffff));
+})
+
+;; Return true if X is a valid comparison operator for the dbcc
+;; instruction.  Note it rejects floating point comparison
+;; operators. (In the future we could use Fdbcc).  It also rejects
+;; some comparisons when CC_NO_OVERFLOW is set.
+
+(define_predicate "valid_dbcc_comparison_p"
+  (and (match_code "eq,ne,gtu,ltu,geu,leu,gt,lt,ge,le")
+       (match_test "valid_dbcc_comparison_p_2 (op, mode)")))
+
+(define_predicate "m68k_cstore_comparison_operator"
+  (if_then_else (match_test "TARGET_68881")
+	        (match_operand 0 "comparison_operator")
+		(match_operand 0 "ordered_comparison_operator")))
+
+;; Check for sign_extend or zero_extend.  Used for bit-count operands.
+
+(define_predicate "extend_operator"
+  (match_code "sign_extend,zero_extend"))
+
+;; Returns true if OP is either a symbol reference or a sum of a
+;; symbol reference and a constant.  This predicate is for "raw"
+;; symbol references not yet processed by legitimize*_address,
+;; hence we do not handle UNSPEC_{XGOT, TLS, XTLS} here.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      op = XEXP (op, 0);
+      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+
+#if 0 /* Deleted, with corresponding change in m68k.h,
+	 so as to fit the specs.  No CONST_DOUBLE is ever symbolic.  */
+    case CONST_DOUBLE:
+      return GET_MODE (op) == mode;
+#endif
+
+    default:
+      return false;
+    }
+})
+
+;; A constant that can be used the address in a call insn
+(define_predicate "const_call_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (and (match_test "m68k_symbolic_call != NULL")
+	    (match_operand 0 "symbolic_operand"))))
+
+;; An operand that can be used as the address in a call insn.
+(define_predicate "call_operand"
+  (ior (match_operand 0 "const_call_operand")
+       (match_operand 0 "register_operand")))
+
+;; A constant that can be used the address in a sibcall insn
+(define_predicate "const_sibcall_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (and (match_test "m68k_symbolic_jump != NULL")
+	    (match_operand 0 "symbolic_operand"))))
+
+;; An operand that can be used as the address in a sibcall insn.
+(define_predicate "sibcall_operand"
+  (ior (match_operand 0 "const_sibcall_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == STATIC_CHAIN_REGNUM"))))
+
+;; TODO: Add a comment here.
+
+(define_predicate "post_inc_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == POST_INC")))
+
+;; TODO: Add a comment here.
+
+(define_predicate "pre_dec_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC")))
+
+;; A zero constant.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; A one constant (operand for conditional_trap).
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const1_rtx")))
+
+;; A valid operand for a HImode or QImode conditional operation.
+;; ColdFire has tst patterns, but not cmp patterns.
+(define_predicate "m68k_subword_comparison_operand"
+  (if_then_else (match_test "TARGET_COLDFIRE")
+                (and (match_code "const_int")
+		     (match_test "op == const0_rtx"))
+		(match_operand 0 "general_src_operand")))
+
+;; An operand for movsi_const0 pattern.
+(define_predicate "movsi_const0_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "(TARGET_68010 || TARGET_COLDFIRE)
+                    || !(MEM_P (op) && MEM_VOLATILE_P (op))")))
+ 
+;; A non-symbolic call operand.
+;; We need to special case 'const_int' to ignore its mode while matching.
+(define_predicate "non_symbolic_call_operand"
+  (and (match_operand 0 "call_operand")
+       (ior (and (match_code "const_int")
+ 		 (match_test "!symbolic_operand (op, mode)"))
+ 	    (match_test "!symbolic_operand (op,mode)"))))
+
+;; Special case of general_src_operand, which rejects a few fp
+;; constants (which we prefer in registers) before reload.
+
+(define_predicate "fp_src_operand"
+  (match_operand 0 "general_src_operand")
+{
+  return !CONSTANT_P (op)
+	 || (TARGET_68881
+	     && (!standard_68881_constant_p (op)
+		 || reload_in_progress
+		 || reload_completed));
+})
diff --git a/gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh b/gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh
new file mode 100644
index 000000000..7fa9fc992
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# Copyright (C) 2006-2014 Free Software Foundation, Inc.
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This script takes the following arguments:
+#
+#    - the target sysroot
+#    - the value of $(MULTILIB_MATCHES)
+#    - the value of $(MULTILIB_OPTIONS)
+#
+# It uses these arguments to construct a definition of SYSROOT_SUFFIX_SPEC,
+# which it prints to the standard output.  For each multilib directory FOO,
+# the script checks whether $sysroot has a subdirectory FOO, and if so will
+# use /FOO for all compatible command-line options.  It will not add a
+# suffix for /FOO's options otherwise.  These suffixes are concatenated,
+# with one subspec for each space-separated entry in $(MULTILIB_OPTIONS).
+set -e
+sysroot=$1
+matches=$2
+options=$3
+
+# For each multilib option OPT, add to $substs a sed command of the
+# form "-e 's/OPT/OPT/'".
+substs=""
+for option in `echo "$options" | tr '/' ' '`
+do
+  substs="$substs -e 's/$option/$option/g'"
+done
+
+# For each ALIAS=CANONICAL entry in $MULTILIB_MATCHES, look for sed
+# arguments in $substs of the form "-e 's/CANONICAL/.../'".  Replace
+# such entries with "-e 's/CANONICAL/ALIAS|.../'".  Both the ALIAS and
+# CANONICAL parts of $MULTILIB_MATCHES use '?' to stand for '='.
+#
+# After this loop, a command of the form "echo FOO | eval sed $substs"
+# will replace a canonical option FOO with a %{...}-style spec pattern.
+for match in $matches
+do
+  canonical=`echo "$match" | sed -e 's/=.*//' -e 's/?/=/g'`
+  alias=`echo "$match" | sed -e 's/.*=//' -e 's/?/=/g'`
+  substs=`echo "$substs" | sed -e "s,s/$canonical/,&$alias|,"`
+done
+
+# Build up the final SYSROOT_SUFFIX_SPEC in $spec.
+spec=
+for combo in $options
+do
+  # See which option alternatives in $combo have their own sysroot
+  # directory.  Create a subspec of the form "%{PAT1:/DIR1;...;PATn:DIRn}"
+  # from each such option OPTi, where DIRi is the directory associated
+  # with OPTi and PATi is the result of passing OPTi through $substs.
+  subspec=
+  for option in `echo "$combo" | tr '/' ' '`
+  do
+    dir=`echo "$option" | sed 's/cpu=//'`
+    if test -d "$sysroot/$dir"; then
+      test -z "$subspec" || subspec="$subspec;"
+      subspec="$subspec"`echo "$option" | eval sed $substs`":/$dir"
+    fi
+  done
+  # Concatenate all the subspecs.
+  test -z "$subspec" || spec="$spec%{$subspec}"
+done
+if test -n "$spec"; then
+  echo "#undef SYSROOT_SUFFIX_SPEC"
+  echo "#define SYSROOT_SUFFIX_SPEC \"$spec\""
+fi
diff --git a/gcc-4.9/gcc/config/m68k/rtemself.h b/gcc-4.9/gcc/config/m68k/rtemself.h
new file mode 100644
index 000000000..e673d4af1
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/rtemself.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a Motorola m68k using elf.
+   Copyright (C) 1999, 2000, 2002 National Research Council of Canada.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+   Contributed by Charles-Antoine Gauthier (charles.gauthier@nrc.ca).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS	/* Defined in m68kemb.h.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define_std ("mc68000");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_define ("__rtems__");		\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
diff --git a/gcc-4.9/gcc/config/m68k/sync.md b/gcc-4.9/gcc/config/m68k/sync.md
new file mode 100644
index 000000000..7f2271aea
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/sync.md
@@ -0,0 +1,80 @@
+;; GCC machine description for m68k synchronization instructions.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:I 1 "register_operand" "")		;; oldval output
+   (match_operand:I 2 "memory_operand" "")		;; memory
+   (match_operand:I 3 "register_operand" "")		;; expected input
+   (match_operand:I 4 "register_operand" "")		;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CAS"
+{
+  emit_insn (gen_atomic_compare_and_swap<mode>_1
+	     (operands[0], operands[1], operands[2],
+	      operands[3], operands[4]));
+  emit_insn (gen_negqi2 (operands[0], operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:I 1 "register_operand" "=d")
+	(unspec_volatile:I
+	  [(match_operand:I 2 "memory_operand" "+m")
+	   (match_operand:I 3 "register_operand" "1")
+	   (match_operand:I 4 "register_operand" "d")]
+	  UNSPECV_CAS_1))
+   (set (match_dup 2)
+	(unspec_volatile:I
+	  [(match_dup 2) (match_dup 3) (match_dup 4)]
+	  UNSPECV_CAS_2))
+   (set (match_operand:QI 0 "register_operand" "=d")
+	(unspec_volatile:QI
+	  [(match_dup 2) (match_dup 3) (match_dup 4)]
+	  UNSPECV_CAS_2))]
+  "TARGET_CAS"
+  ;; Elide the seq if operands[0] is dead.
+  "cas<sz> %1,%4,%2\;seq %0")
+
+(define_expand "atomic_test_and_set"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:QI 1 "memory_operand" "")		;; memory
+   (match_operand:SI 2 "const_int_operand" "")]		;; model
+  "ISA_HAS_TAS"
+{
+  rtx t = gen_reg_rtx (QImode);
+  emit_insn (gen_atomic_test_and_set_1 (t, operands[1]));
+  t = expand_simple_unop (QImode, NEG, t, operands[0], 0);
+  if (t != operands[0])
+    emit_move_insn (operands[0], t);
+  DONE;
+})
+
+(define_insn "atomic_test_and_set_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unspec_volatile:QI
+	  [(match_operand:QI 1 "memory_operand" "+m")]
+	  UNSPECV_TAS_1))
+   (set (match_dup 1)
+	(unspec_volatile:QI [(match_dup 1)] UNSPECV_TAS_2))]
+  "ISA_HAS_TAS"
+  "tas %1\;sne %0")
diff --git a/gcc-4.9/gcc/config/m68k/t-cf b/gcc-4.9/gcc/config/m68k/t-cf
new file mode 100644
index 000000000..7bf8e11ee
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-cf
@@ -0,0 +1,7 @@
+# Select only ColdFire-specific CPUs.
+
+M68K_MLIB_CPU += && (CPU ~ "^mcf")
+M68K_ARCH := cf
+# Do not stamp the multilibs with a MAC type, as we never use those
+# instructions in compiler-generated code.
+MULTILIB_EXTRA_OPTS += Wa,-mno-mac
diff --git a/gcc-4.9/gcc/config/m68k/t-linux b/gcc-4.9/gcc/config/m68k/t-linux
new file mode 100644
index 000000000..eed9d15c0
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-linux
@@ -0,0 +1,31 @@
+# Copyright (C) 2008-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Only include multilibs for 680x0 and ColdFire CPUs with an MMU.
+M68K_MLIB_CPU += && ((CPU ~ "^m680") || (CPU ~ "^mcf")) && (FLAGS ~ "FL_MMU")
+
+ifeq ($(M68K_ARCH),m68k)
+MULTIARCH_DIRNAME = $(call if_multiarch,m68k-linux-gnu)
+endif
+
+# This rule uses MULTILIB_MATCHES to generate a definition of
+# SYSROOT_SUFFIX_SPEC.
+sysroot-suffix.h: $(srcdir)/config/m68k/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/m68k/print-sysroot-suffix.sh \
+	  "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \
+	  "$(MULTILIB_OPTIONS)" > $@
diff --git a/gcc-4.9/gcc/config/m68k/t-m68k b/gcc-4.9/gcc/config/m68k/t-m68k
new file mode 100644
index 000000000..cbff34d65
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-m68k
@@ -0,0 +1,4 @@
+# Select only 680x0-specific CPUs.
+
+M68K_MLIB_CPU += && (CPU !~ "^mcf")
+M68K_ARCH := m68k
diff --git a/gcc-4.9/gcc/config/m68k/t-m68kbare b/gcc-4.9/gcc/config/m68k/t-m68kbare
new file mode 100644
index 000000000..0cbaead7d
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-m68kbare
@@ -0,0 +1,4 @@
+# Add soft-float multilibs.
+M68K_MLIB_DIRNAMES += softfp
+M68K_MLIB_OPTIONS += msoft-float
+
diff --git a/gcc-4.9/gcc/config/m68k/t-mlibs b/gcc-4.9/gcc/config/m68k/t-mlibs
new file mode 100644
index 000000000..69840893a
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-mlibs
@@ -0,0 +1,94 @@
+# multilibs  -*- mode:Makefile -*-
+#
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# An awk command to extract lines from the m68k-devices.def file that
+# match $1 and then print the string defined by $2.  Leading and
+# trailing whitespace is removed.  $1 & $2 can make use of
+# CPU -- the cpu identifier (has leading 'm'/'mcf')
+# FLAGS -- the cpu capability flags
+# CPU_NAME -- the cpu name (has no leading m/mcf)
+# MLIB -- the multilib cpu name (no leading m/mcf)
+# This is intended to be used as $(call M68K_AWK,predicate,string)
+M68K_AWK = $(strip $(shell $(AWK) 'BEGIN { FS="[ \t]*[,()][ \t]*"; ORS=" " }; \
+	/^M68K_DEVICE/ { CPU=$$3; FLAGS=$$8; \
+	CPU_NAME=substr($$2,2,length($$2)-2); \
+	MLIB=substr($$5,2,length($$5)-2); \
+	if ($1) print $2 }' $(srcdir)/config/m68k/m68k-devices.def))
+
+# Add a multilib for each distinct architecture.  M68K_MLIB_CPU, if defined,
+# adds additional restrictions.
+M68K_MLIB_CPUS := $(call M68K_AWK,\
+	(CPU_NAME == MLIB) $(M68K_MLIB_CPU), \
+	"m"MLIB)
+
+# Make the default cpu the default multilib.
+M68K_MLIB_DEFAULT := $(call M68K_AWK, CPU == "$(target_cpu_default)", MLIB)
+
+ifeq ($(filter m$(M68K_MLIB_DEFAULT),$(M68K_MLIB_CPUS)),)
+$(error Error default cpu '$(target_cpu_default)' is not in multilib set '$(M68K_MLIB_CPUS)')
+endif
+
+MULTILIB_DIRNAMES := $(filter-out m$(M68K_MLIB_DEFAULT),$(M68K_MLIB_CPUS))
+MULTILIB_OPTIONS := $(shell echo $(MULTILIB_DIRNAMES:m%=mcpu=%) \
+		      | sed -e 's| |/|g' )
+
+# Add subtarget specific options & dirs.
+MULTILIB_DIRNAMES += $(M68K_MLIB_DIRNAMES)
+MULTILIB_OPTIONS += $(M68K_MLIB_OPTIONS)
+
+MULTILIB_MATCHES :=
+
+ifneq ($(M68K_ARCH),cf)
+# Map -march=* options to the representative -mcpu=* option.
+MULTILIB_MATCHES += mcpu?68000=march?68000 \
+		    mcpu?68020=march?68020 \
+		    mcpu?68030=march?68030 \
+		    mcpu?68040=march?68040 \
+		    mcpu?68060=march?68060 \
+		    mcpu?cpu32=march?cpu32
+endif
+
+ifneq ($(M68K_ARCH),m68k)
+# Map -march=* options to the representative -mcpu=* option.
+MULTILIB_MATCHES += mcpu?5206e=march?isaa mcpu?5208=march?isaaplus \
+		    mcpu?5407=march?isab
+endif
+
+# Match non-representative -mcpu options to their representative option.
+MULTILIB_MATCHES += \
+  $(call M68K_AWK, \
+	 (CPU_NAME != MLIB) $(M68K_MLIB_CPU), \
+	 ("mcpu?"MLIB"=mcpu?"CPU_NAME))
+
+MULTILIB_EXCEPTIONS :=
+
+ifeq ($(firstword $(M68K_MLIB_OPTIONS)),msoft-float)
+# Exclude soft-float multilibs for targets that default to soft-float anyway.
+MULTILIB_EXCEPTIONS += $(call M68K_AWK,\
+	(CPU_NAME == MLIB) $(M68K_MLIB_CPU) \
+	 && (((CPU ~ "^mcf") && !match(FLAGS, "FL_CF_FPU")) \
+	     || CPU == "cpu32" \
+	     || CPU == "m68000"), \
+	 "mcpu="MLIB"/msoft-float*")
+endif
+
+# Remove the default CPU from the explicit exceptions.
+MULTILIB_EXCEPTIONS := \
+	$(patsubst mcpu=$(M68K_MLIB_DEFAULT)/%,%,$(MULTILIB_EXCEPTIONS))
diff --git a/gcc-4.9/gcc/config/m68k/t-openbsd b/gcc-4.9/gcc/config/m68k/t-openbsd
new file mode 100644
index 000000000..b295608de
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-openbsd
@@ -0,0 +1,4 @@
+# gdb gets confused if pic code is linked with non pic
+# We cope by building all variants of libgcc.
+M68K_MLIB_OPTIONS += fpic/fPIC
+M68K_MLIB_DIRNAMES += fpic fPIC
diff --git a/gcc-4.9/gcc/config/m68k/t-opts b/gcc-4.9/gcc/config/m68k/t-opts
new file mode 100644
index 000000000..7f752aed3
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-opts
@@ -0,0 +1,5 @@
+$(srcdir)/config/m68k/m68k-tables.opt: $(srcdir)/config/m68k/genopt.sh \
+  $(srcdir)/config/m68k/m68k-devices.def $(srcdir)/config/m68k/m68k-isas.def \
+  $(srcdir)/config/m68k/m68k-microarchs.def
+	$(SHELL) $(srcdir)/config/m68k/genopt.sh $(srcdir)/config/m68k > \
+		$(srcdir)/config/m68k/m68k-tables.opt
diff --git a/gcc-4.9/gcc/config/m68k/t-rtems b/gcc-4.9/gcc/config/m68k/t-rtems
new file mode 100644
index 000000000..0997afebc
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-rtems
@@ -0,0 +1,9 @@
+# Custom multilibs for RTEMS
+M68K_MLIB_CPU += && (match(MLIB, "^68") \
+		     || MLIB == "cpu32" \
+		     || MLIB == "5206" \
+		     || MLIB == "5208" \
+		     || MLIB == "5307" \
+		     || MLIB == "5329" \
+		     || MLIB == "5407" \
+		     || MLIB == "5475")
diff --git a/gcc-4.9/gcc/config/m68k/t-uclinux b/gcc-4.9/gcc/config/m68k/t-uclinux
new file mode 100644
index 000000000..5102d7d8b
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/t-uclinux
@@ -0,0 +1,33 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Include multilibs for CPUs without an MMU or with FL_UCLINUX
+M68K_MLIB_CPU += && (!match(FLAGS, "FL_MMU") || match(FLAGS, "FL_UCLINUX"))
+
+# Add multilibs for execute-in-place and shared-library code.
+M68K_MLIB_OPTIONS += msep-data/mid-shared-library
+M68K_MLIB_DIRNAMES += msep-data mid-shared-library
+
+# This rule uses MULTILIB_MATCHES to generate a definition of
+# SYSROOT_SUFFIX_SPEC.
+sysroot-suffix.h: $(srcdir)/config/m68k/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/m68k/print-sysroot-suffix.sh \
+	  "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \
+	  "$(MULTILIB_OPTIONS)" > $@
+
+generated_files += sysroot-suffix.h
diff --git a/gcc-4.9/gcc/config/m68k/uclinux.h b/gcc-4.9/gcc/config/m68k/uclinux.h
new file mode 100644
index 000000000..dc8506b75
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/uclinux.h
@@ -0,0 +1,69 @@
+/* Definitions of target machine for GCC.  m68k/ColdFire based uClinux system
+   using ELF objects with special linker post-processing to produce FLAT
+   executables.
+
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{mshared-library-id=0|!mshared-library-id=*: crt1.o%s ;: Scrt1.o%s} \
+ crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+/* Override the default LIB_SPEC from gcc.c.  We don't currently support
+   profiling, or libg.a.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+"%{mid-shared-library:%{!static-libc:-R libc.gdb%s}} %{pthread:-lpthread} -lc"
+
+/* Default to using -elf2flt with no options.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+"%{!elf2flt*:-elf2flt} \
+ %{mid-shared-library: \
+   %{mshared-library-id=*:-shared-lib-id %*;:-shared-lib-id 0}}"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+      GNU_USER_TARGET_OS_CPP_BUILTINS ();			\
+      builtin_define ("__uClinux__");				\
+      if (TARGET_ID_SHARED_LIBRARY)				\
+	{							\
+	  builtin_define ("__ID_SHARED_LIBRARY__");		\
+	  /* Shared libraries and executables do not share	\
+	     typeinfo names.  */				\
+	  builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0");	\
+	  builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0");	\
+	}							\
+    }								\
+  while (0)
+
+/* -msep-data is the default PIC mode on this target.  */
+#define DRIVER_SELF_SPECS \
+  "%{fpie|fPIE|fpic|fPIC:%{!msep-data:%{!mid-shared-library: -msep-data}}}"
+
+/* The uclinux binary format relies on relocations against a segment being
+   within that segment.  Conservatively apply this rule to individual
+   sections.  */
+#undef M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
+#define M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
diff --git a/gcc-4.9/gcc/config/m68k/uclinux.opt b/gcc-4.9/gcc/config/m68k/uclinux.opt
new file mode 100644
index 000000000..d648be109
--- /dev/null
+++ b/gcc-4.9/gcc/config/m68k/uclinux.opt
@@ -0,0 +1,35 @@
+; m68k/ColdFire uClinux options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+elf2flt
+Driver
+
+elf2flt=
+Driver JoinedOrMissing
+
+static-libc
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/mcore/constraints.md b/gcc-4.9/gcc/config/mcore/constraints.md
new file mode 100644
index 000000000..dce46e974
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/constraints.md
@@ -0,0 +1,111 @@
+;; Constraint definitions for the Motorola MCore
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "LRW_REGS"
+  "@internal")
+
+(define_register_constraint "b" "ONLYR1_REGS"
+  "@internal")
+
+(define_register_constraint "c" "C_REGS"
+  "@internal")
+
+(define_register_constraint "x" "ALL_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "An integer in the range 0 to 127."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 127)")))
+
+(define_constraint "J"
+  "An integer in the range 1 to 32."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 32)")))
+
+(define_constraint "K"
+  "A shift operand, an integer in the range 0 to 31."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 31)")))
+
+(define_constraint "L"
+  "A negative arithmetic operand in the range -32 to -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32, -1)")))
+
+(define_constraint "M"
+  "A constant loadable by bgeni."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (ival) >= 0 && exact_log2 (ival) <= 30")))
+
+(define_constraint "N"
+  "A constant loadable by bmaskii, including -1."
+  (and (match_code "const_int")
+       (ior (match_test "ival == -1")
+	    (and (match_test "exact_log2 (ival + 1) >= 0")
+		 (match_test "exact_log2 (ival + 1) <= 30")))))
+
+(define_constraint "O"
+  "A constant allowed by cmov with two constants +/- 1 of each other."
+  (and (match_code "const_int")
+       (ior (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_I)")
+	    (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_M)")
+	    (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_N)")
+	    (match_test "insn_const_int_ok_for_constraint (ival - 1, CONSTRAINT_M)")
+	    (match_test "insn_const_int_ok_for_constraint (ival + 1, CONSTRAINT_N)"))))
+
+(define_constraint "P"
+  "A value that can be generated without an lrw instruction."
+  (and (match_code "const_int")
+       (match_test "mcore_const_ok_for_inline (ival)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "@internal"
+  (and (match_code "const_double")
+       (match_test "insn_const_int_ok_for_constraint (hval, CONSTRAINT_I)")
+       (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_I)")))
+
+;; Other constraints.
+(define_constraint "Q"
+  "The integer constant one."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "R"
+  "@internal"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == LABEL_REF")))
+
+(define_constraint "S"
+  "An integer constant with 0, 1, or 2 bits clear."
+  (and (match_code "const_int")
+       (match_test "mcore_num_zeros (ival) <= 2")))
+
+(define_constraint "T"
+  "An integer constant with 2 set bits."
+  (and (match_code "const_int")
+       (match_test "mcore_num_ones (ival) == 2")))
+
+(define_constraint "U"
+  "The integer constant zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
diff --git a/gcc-4.9/gcc/config/mcore/mcore-elf.h b/gcc-4.9/gcc/config/mcore/mcore-elf.h
new file mode 100644
index 000000000..bf48d6451
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/mcore-elf.h
@@ -0,0 +1,125 @@
+/* Definitions of MCore target. 
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Solutions.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __MCORE_ELF_H__
+#define __MCORE_ELF_H__
+
+/* Use DWARF2 debugging info.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define MCORE_EXPORT_NAME(STREAM, NAME)			\
+  do							\
+    {							\
+      fprintf (STREAM, "\t.section .exports\n");	\
+      fprintf (STREAM, "\t.ascii \" -export:%s\"\n",	\
+	       (* targetm.strip_name_encoding) (NAME));	\
+      in_section = NULL;				\
+    }							\
+  while (0);
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      if (mcore_dllexport_name_p (NAME))			\
+	{							\
+          MCORE_EXPORT_NAME (FILE, NAME);			\
+	  switch_to_section (function_section (DECL));		\
+	}							\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+
+/* Write the extra assembler code needed to declare an object properly.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+      if (mcore_dllexport_name_p (NAME))			\
+        {							\
+	  section *save_section = in_section;			\
+	  MCORE_EXPORT_NAME (FILE, NAME);			\
+	  switch_to_section (save_section);			\
+        }							\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive && DECL_SIZE (DECL))	\
+        {							\
+          size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+        }							\
+      ASM_OUTPUT_LABEL(FILE, NAME);				\
+    }								\
+  while (0)
+ 
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set
+   by ASM_DECLARE_OBJECT_NAME when it was run for the same decl.  */
+#undef  ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)         \
+  do                                                                     \
+    {                                                                    \
+      const char * name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);           \
+      HOST_WIDE_INT size;						 \
+      if (!flag_inhibit_size_directive && DECL_SIZE (DECL)               \
+          && ! AT_END && TOP_LEVEL                                       \
+          && DECL_INITIAL (DECL) == error_mark_node                      \
+          && !size_directive_output)                                     \
+        {                                                                \
+	  size_directive_output = 1;					 \
+	  size = int_size_in_bytes (TREE_TYPE (DECL));			 \
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			 \
+        }                                                                \
+    }                                                                    \
+  while (0)
+
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+/* Include the OS stub library, so that the code can be simulated.
+   This is not the right way to do this.  Ideally this kind of thing
+   should be done in the linker script - but I have not worked out how
+   to specify the location of a linker script in a gcc command line yet.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC  "%{!mno-lsim:-lsim} crtend.o%s crtn.o%s"
+
+/* The subroutine calls in the .init and .fini sections create literal
+   pools which must be jumped around....  */
+#define FORCE_CODE_SECTION_ALIGN	asm ("br 1f ; .literals ; 1:");
+
+#undef  CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"aw\""
+#undef  DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"aw\""
+     
+#endif /* __MCORE_ELF_H__ */
diff --git a/gcc-4.9/gcc/config/mcore/mcore-protos.h b/gcc-4.9/gcc/config/mcore/mcore-protos.h
new file mode 100644
index 000000000..c22ffce87
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/mcore-protos.h
@@ -0,0 +1,68 @@
+/* Prototypes for exported functions defined in mcore.c
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Nick Clifton (nickc@redhat.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+extern const char * mcore_output_jump_label_table	(void);
+extern void         mcore_expand_prolog          	(void);
+extern void         mcore_expand_epilog          	(void);
+extern int          mcore_const_ok_for_inline    	(HOST_WIDE_INT);
+extern int          mcore_num_ones               	(HOST_WIDE_INT);
+extern int          mcore_num_zeros              	(HOST_WIDE_INT);
+extern int          mcore_initial_elimination_offset	(int, int);
+extern int          mcore_byte_offset            	(unsigned int);
+extern int          mcore_halfword_offset        	(unsigned int);
+extern int          mcore_const_trick_uses_not   	(HOST_WIDE_INT);
+extern int          mcore_dllexport_name_p       	(const char *);
+extern int          mcore_dllimport_name_p       	(const char *);
+extern int          mcore_naked_function_p       	(void);
+
+#ifdef TREE_CODE
+#ifdef HAVE_MACHINE_MODES
+extern int          mcore_num_arg_regs           	(enum machine_mode, const_tree);
+#endif /* HAVE_MACHINE_MODES */
+
+#ifdef RTX_CODE
+extern rtx          mcore_function_value         	(const_tree, const_tree);
+#endif /* RTX_CODE */
+#endif /* TREE_CODE */
+
+#ifdef RTX_CODE
+
+extern const char * mcore_output_bclri         		(rtx, int);
+extern const char * mcore_output_bseti         		(rtx, int);
+extern const char * mcore_output_cmov          		(rtx *, int, const char *);
+extern char *       mcore_output_call          		(rtx *, int);
+extern int          mcore_is_dead                	(rtx, rtx);
+extern int          mcore_expand_insv            	(rtx *);
+extern bool         mcore_expand_block_move      	(rtx *);
+extern const char * mcore_output_andn          		(rtx, rtx *);
+extern bool         mcore_gen_compare	        	(RTX_CODE, rtx, rtx);
+extern int          mcore_symbolic_address_p     	(rtx);
+extern bool         mcore_r15_operand_p			(rtx);
+extern enum reg_class mcore_secondary_reload_class	(enum reg_class, enum machine_mode, rtx);
+extern enum reg_class mcore_reload_class 		(rtx, enum reg_class);
+extern int          mcore_is_same_reg            	(rtx, rtx);
+extern int          mcore_arith_S_operand         	(rtx);
+
+#ifdef HAVE_MACHINE_MODES
+extern const char * mcore_output_move          		(rtx, rtx *, enum machine_mode);
+extern const char * mcore_output_movedouble    		(rtx *, enum machine_mode);
+extern int          const_ok_for_mcore                  (HOST_WIDE_INT);
+#endif /* HAVE_MACHINE_MODES */
+#endif /* RTX_CODE */
diff --git a/gcc-4.9/gcc/config/mcore/mcore.c b/gcc-4.9/gcc/config/mcore/mcore.c
new file mode 100644
index 000000000..b2ac47e03
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/mcore.c
@@ -0,0 +1,3184 @@
+/* Output routines for Motorola MCore processor
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "stringpool.h"
+#include "calls.h"
+#include "tm_p.h"
+#include "mcore.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "obstack.h"
+#include "expr.h"
+#include "reload.h"
+#include "recog.h"
+#include "function.h"
+#include "ggc.h"
+#include "diagnostic-core.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* For dumping information about frame sizes.  */
+char * mcore_current_function_name = 0;
+long   mcore_current_compilation_timestamp = 0;
+
+/* Global variables for machine-dependent things.  */
+
+/* Provides the class number of the smallest class containing
+   reg number.  */
+const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  GENERAL_REGS,	ONLYR1_REGS,  LRW_REGS,	    LRW_REGS,
+  LRW_REGS,	LRW_REGS,     LRW_REGS,	    LRW_REGS,
+  LRW_REGS,	LRW_REGS,     LRW_REGS,	    LRW_REGS,
+  LRW_REGS,	LRW_REGS,     LRW_REGS,	    GENERAL_REGS,
+  GENERAL_REGS, C_REGS,       NO_REGS,      NO_REGS,
+};
+
+struct mcore_frame
+{
+  int arg_size;			/* Stdarg spills (bytes).  */
+  int reg_size;			/* Non-volatile reg saves (bytes).  */
+  int reg_mask;			/* Non-volatile reg saves.  */
+  int local_size;		/* Locals.  */
+  int outbound_size;		/* Arg overflow on calls out.  */
+  int pad_outbound;
+  int pad_local;
+  int pad_reg;
+  /* Describe the steps we'll use to grow it.  */
+#define	MAX_STACK_GROWS	4	/* Gives us some spare space.  */
+  int growth[MAX_STACK_GROWS];
+  int arg_offset;
+  int reg_offset;
+  int reg_growth;
+  int local_growth;
+};
+
+typedef enum
+{
+  COND_NO,
+  COND_MOV_INSN,
+  COND_CLR_INSN,
+  COND_INC_INSN,
+  COND_DEC_INSN,
+  COND_BRANCH_INSN
+}
+cond_type;
+
+static void       output_stack_adjust           (int, int);
+static int        calc_live_regs                (int *);
+static int        try_constant_tricks           (long, HOST_WIDE_INT *, HOST_WIDE_INT *);
+static const char *     output_inline_const     (enum machine_mode, rtx *);
+static void       layout_mcore_frame            (struct mcore_frame *);
+static void       mcore_setup_incoming_varargs	(cumulative_args_t, enum machine_mode, tree, int *, int);
+static cond_type  is_cond_candidate             (rtx);
+static rtx        emit_new_cond_insn            (rtx, int);
+static rtx        conditionalize_block          (rtx);
+static void       conditionalize_optimization   (void);
+static void       mcore_reorg                   (void);
+static rtx        handle_structs_in_regs        (enum machine_mode, const_tree, int);
+static void       mcore_mark_dllexport          (tree);
+static void       mcore_mark_dllimport          (tree);
+static int        mcore_dllexport_p             (tree);
+static int        mcore_dllimport_p             (tree);
+static tree       mcore_handle_naked_attribute  (tree *, tree, tree, int, bool *);
+#ifdef OBJECT_FORMAT_ELF
+static void	  mcore_asm_named_section       (const char *,
+						 unsigned int, tree);
+#endif
+static void       mcore_print_operand           (FILE *, rtx, int);
+static void       mcore_print_operand_address   (FILE *, rtx);
+static bool       mcore_print_operand_punct_valid_p (unsigned char code);
+static void       mcore_unique_section	        (tree, int);
+static void mcore_encode_section_info		(tree, rtx, int);
+static const char *mcore_strip_name_encoding	(const char *);
+static int        mcore_const_costs            	(rtx, RTX_CODE);
+static int        mcore_and_cost               	(rtx);
+static int        mcore_ior_cost               	(rtx);
+static bool       mcore_rtx_costs		(rtx, int, int, int,
+						 int *, bool);
+static void       mcore_external_libcall	(rtx);
+static bool       mcore_return_in_memory	(const_tree, const_tree);
+static int        mcore_arg_partial_bytes       (cumulative_args_t,
+						 enum machine_mode,
+						 tree, bool);
+static rtx        mcore_function_arg            (cumulative_args_t,
+						 enum machine_mode,
+						 const_tree, bool);
+static void       mcore_function_arg_advance    (cumulative_args_t,
+						 enum machine_mode,
+						 const_tree, bool);
+static unsigned int mcore_function_arg_boundary (enum machine_mode,
+						 const_tree);
+static void       mcore_asm_trampoline_template (FILE *);
+static void       mcore_trampoline_init		(rtx, tree, rtx);
+static bool       mcore_warn_func_return        (tree);
+static void       mcore_option_override		(void);
+static bool       mcore_legitimate_constant_p   (enum machine_mode, rtx);
+
+/* MCore specific attributes.  */
+
+static const struct attribute_spec mcore_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "dllexport", 0, 0, true,  false, false, NULL, false },
+  { "dllimport", 0, 0, true,  false, false, NULL, false },
+  { "naked",     0, 0, true,  false, false, mcore_handle_naked_attribute,
+    false },
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ASM_EXTERNAL_LIBCALL
+#define TARGET_ASM_EXTERNAL_LIBCALL	mcore_external_libcall
+
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef  TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES	merge_dllimport_decl_attributes
+#endif
+
+#ifdef OBJECT_FORMAT_ELF
+#undef  TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
+#undef  TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#endif
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND		mcore_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS	mcore_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P mcore_print_operand_punct_valid_p
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE 		mcore_attribute_table
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION 	mcore_unique_section
+#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO 	mcore_encode_section_info
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING	mcore_strip_name_encoding
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS 		mcore_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST 		hook_int_rtx_mode_as_bool_0
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG	mcore_reorg
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE	default_promote_function_mode_always_promote
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		mcore_return_in_memory
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK	must_pass_in_stack_var_size
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE  hook_pass_by_reference_must_pass_in_stack
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES	mcore_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		mcore_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	mcore_function_arg_advance
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY	mcore_function_arg_boundary
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS	mcore_setup_incoming_varargs
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	mcore_asm_trampoline_template
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		mcore_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mcore_option_override
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P mcore_legitimate_constant_p
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN mcore_warn_func_return
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Adjust the stack and return the number of bytes taken to do it.  */
+static void
+output_stack_adjust (int direction, int size)
+{
+  /* If extending stack a lot, we do it incrementally.  */
+  if (direction < 0 && size > mcore_stack_increment && mcore_stack_increment > 0)
+    {
+      rtx tmp = gen_rtx_REG (SImode, 1);
+      rtx memref;
+
+      emit_insn (gen_movsi (tmp, GEN_INT (mcore_stack_increment)));
+      do
+	{
+	  emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+	  memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	  MEM_VOLATILE_P (memref) = 1;
+	  emit_insn (gen_movsi (memref, stack_pointer_rtx));
+	  size -= mcore_stack_increment;
+	}
+      while (size > mcore_stack_increment);
+
+      /* SIZE is now the residual for the last adjustment,
+	 which doesn't require a probe.  */
+    }
+
+  if (size)
+    {
+      rtx insn;
+      rtx val = GEN_INT (size);
+
+      if (size > 32)
+	{
+	  rtx nval = gen_rtx_REG (SImode, 1);
+	  emit_insn (gen_movsi (nval, val));
+	  val = nval;
+	}
+      
+      if (direction > 0)
+	insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
+      else
+	insn = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
+      
+      emit_insn (insn);
+    }
+}
+
+/* Work out the registers which need to be saved,
+   both as a mask and a count.  */
+
+static int
+calc_live_regs (int * count)
+{
+  int reg;
+  int live_regs_mask = 0;
+  
+  * count = 0;
+
+  for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
+    {
+      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	{
+	  (*count)++;
+	  live_regs_mask |= (1 << reg);
+	}
+    }
+
+  return live_regs_mask;
+}
+
+/* Print the operand address in x to the stream.  */
+
+static void
+mcore_print_operand_address (FILE * stream, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fprintf (stream, "(%s)", reg_names[REGNO (x)]);
+      break;
+      
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx index = XEXP (x, 1);
+
+	if (GET_CODE (base) != REG)
+	  {
+	    /* Ensure that BASE is a register (one of them must be).  */
+	    rtx temp = base;
+	    base = index;
+	    index = temp;
+	  }
+
+	switch (GET_CODE (index))
+	  {
+	  case CONST_INT:
+	    fprintf (stream, "(%s," HOST_WIDE_INT_PRINT_DEC ")",
+		     reg_names[REGNO(base)], INTVAL (index));
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+      break;
+
+    default:
+      output_addr_const (stream, x);
+      break;
+    }
+}
+
+static bool
+mcore_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '*' || code == '^'
+	  || code == '!');
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+   according to modifier code.
+
+   'R'  print the next register or memory location along, i.e. the lsw in
+        a double word value
+   'O'  print a constant without the #
+   'M'  print a constant as its negative
+   'P'  print log2 of a power of two
+   'Q'  print log2 of an inverse of a power of two
+   'U'  print register for ldm/stm instruction
+   'X'  print byte number for xtrbN instruction.  */
+
+static void
+mcore_print_operand (FILE * stream, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'N':
+      if (INTVAL(x) == -1)
+	fprintf (asm_out_file, "32");
+      else
+	fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) + 1));
+      break;
+    case 'P':
+      fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) & 0xffffffff));
+      break;
+    case 'Q':
+      fprintf (asm_out_file, "%d", exact_log2 (~INTVAL (x)));
+      break;
+    case 'O':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+    case 'M':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, - INTVAL (x));
+      break;
+    case 'R':
+      /* Next location along in memory or register.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], (stream));
+	  break;
+	case MEM:
+	  mcore_print_operand_address
+	    (stream, XEXP (adjust_address (x, SImode, 4), 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'U':
+      fprintf (asm_out_file, "%s-%s", reg_names[REGNO (x)],
+	       reg_names[REGNO (x) + 3]);
+      break;
+    case 'x':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      break;
+    case 'X':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 3 - INTVAL (x) / 8);
+      break;
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x)], (stream));
+	  break;
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  break;
+	default:
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+/* What does a constant cost ?  */
+
+static int
+mcore_const_costs (rtx exp, enum rtx_code code)
+{
+  HOST_WIDE_INT val = INTVAL (exp);
+
+  /* Easy constants.  */
+  if (   CONST_OK_FOR_I (val)	
+      || CONST_OK_FOR_M (val)	
+      || CONST_OK_FOR_N (val)	
+      || (code == PLUS && CONST_OK_FOR_L (val)))
+    return 1;					
+  else if (code == AND
+	   && (   CONST_OK_FOR_M (~val)
+	       || CONST_OK_FOR_N (~val)))
+    return 2;
+  else if (code == PLUS			
+	   && (   CONST_OK_FOR_I (-val)	
+	       || CONST_OK_FOR_M (-val)	
+	       || CONST_OK_FOR_N (-val)))	
+    return 2;						
+
+  return 5;					
+}
+
+/* What does an and instruction cost - we do this b/c immediates may 
+   have been relaxed.   We want to ensure that cse will cse relaxed immeds
+   out.  Otherwise we'll get bad code (multiple reloads of the same const).  */
+
+static int
+mcore_and_cost (rtx x)
+{
+  HOST_WIDE_INT val;
+
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return 2;
+
+  val = INTVAL (XEXP (x, 1));
+   
+  /* Do it directly.  */
+  if (CONST_OK_FOR_K (val) || CONST_OK_FOR_M (~val))
+    return 2;
+  /* Takes one instruction to load.  */
+  else if (const_ok_for_mcore (val))
+    return 3;
+  /* Takes two instructions to load.  */
+  else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
+    return 4;
+
+  /* Takes a lrw to load.  */
+  return 5;
+}
+
+/* What does an or cost - see and_cost().  */
+
+static int
+mcore_ior_cost (rtx x)
+{
+  HOST_WIDE_INT val;
+
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return 2;
+
+  val = INTVAL (XEXP (x, 1));
+
+  /* Do it directly with bclri.  */
+  if (CONST_OK_FOR_M (val))
+    return 2;
+  /* Takes one instruction to load.  */
+  else if (const_ok_for_mcore (val))
+    return 3;
+  /* Takes two instructions to load.  */
+  else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
+    return 4;
+  
+  /* Takes a lrw to load.  */
+  return 5;
+}
+
+static bool
+mcore_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		 int * total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      *total = mcore_const_costs (x, (enum rtx_code) outer_code);
+      return true;
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 5;
+      return true;
+    case CONST_DOUBLE:
+      *total = 10;
+      return true;
+
+    case AND:
+      *total = COSTS_N_INSNS (mcore_and_cost (x));
+      return true;
+
+    case IOR:
+      *total = COSTS_N_INSNS (mcore_ior_cost (x));
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+    case FLOAT:
+    case FIX:
+      *total = COSTS_N_INSNS (100);
+      return true;
+  
+    default:
+      return false;
+    }
+}
+
+/* Prepare the operands for a comparison.  Return whether the branch/setcc
+   should reverse the operands.  */
+
+bool
+mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx cc_reg = gen_rtx_REG (CCmode, CC_REG);
+  bool invert;
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (op1);
+      
+      switch (code)
+	{
+	case GTU:
+	  /* Unsigned > 0 is the same as != 0; everything else is converted
+	     below to LEU (reversed cmphs).  */
+	  if (val == 0)
+	    code = NE;
+	  break;
+
+        /* Check whether (LE A imm) can become (LT A imm + 1),
+	   or (GT A imm) can become (GE A imm + 1).  */
+	case GT:
+	case LE:
+	  if (CONST_OK_FOR_J (val + 1))
+	    {
+	      op1 = GEN_INT (val + 1);
+	      code = code == LE ? LT : GE;
+	    }
+	  break;
+	  
+	default:
+	  break;
+	}
+    }
+ 
+  if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT)
+    op1 = force_reg (SImode, op1);
+
+  /* cmpnei: 0-31 (K immediate)
+     cmplti: 1-32 (J immediate, 0 using btsti x,31).  */
+  invert = false;
+  switch (code)
+    {
+    case EQ:	/* Use inverted condition, cmpne.  */
+      code = NE;
+      invert = true;
+      /* Drop through.  */
+      
+    case NE:	/* Use normal condition, cmpne.  */
+      if (GET_CODE (op1) == CONST_INT && ! CONST_OK_FOR_K (INTVAL (op1)))
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case LE:	/* Use inverted condition, reversed cmplt.  */
+      code = GT;
+      invert = true;
+      /* Drop through.  */
+      
+    case GT:	/* Use normal condition, reversed cmplt.  */
+      if (GET_CODE (op1) == CONST_INT)
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case GE:	/* Use inverted condition, cmplt.  */
+      code = LT;
+      invert = true;
+      /* Drop through.  */
+      
+    case LT:	/* Use normal condition, cmplt.  */
+      if (GET_CODE (op1) == CONST_INT && 
+	  /* covered by btsti x,31.  */
+	  INTVAL (op1) != 0 &&
+	  ! CONST_OK_FOR_J (INTVAL (op1)))
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case GTU:	/* Use inverted condition, cmple.  */
+      /* We coped with unsigned > 0 above.  */
+      gcc_assert (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0);
+      code = LEU;
+      invert = true;
+      /* Drop through.  */
+      
+    case LEU:	/* Use normal condition, reversed cmphs.  */
+      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case LTU:	/* Use inverted condition, cmphs.  */
+      code = GEU;
+      invert = true;
+      /* Drop through.  */
+      
+    case GEU:	/* Use normal condition, cmphs.  */
+      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
+	op1 = force_reg (SImode, op1);
+      break;
+
+    default:
+      break;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cc_reg,
+			  gen_rtx_fmt_ee (code, CCmode, op0, op1)));
+  return invert;
+}
+
+int
+mcore_symbolic_address_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      x = XEXP (x, 0);
+      return (   (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (x, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (x, 1)) == CONST_INT);
+    default:
+      return 0;
+    }
+}
+
+/* Functions to output assembly code for a function call.  */
+
+char *
+mcore_output_call (rtx operands[], int index)
+{
+  static char buffer[20];
+  rtx addr = operands [index];
+  
+  if (REG_P (addr))
+    {
+      if (TARGET_CG_DATA)
+	{
+	  gcc_assert (mcore_current_function_name);
+	  
+	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
+			      "unknown", 1);
+	}
+
+      sprintf (buffer, "jsr\t%%%d", index);
+    }
+  else
+    {
+      if (TARGET_CG_DATA)
+	{
+	  gcc_assert (mcore_current_function_name);
+	  gcc_assert (GET_CODE (addr) == SYMBOL_REF);
+	  
+	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
+			      XSTR (addr, 0), 0);
+	}
+      
+      sprintf (buffer, "jbsr\t%%%d", index);
+    }
+
+  return buffer;
+}
+
+/* Can we load a constant with a single instruction ?  */
+
+int
+const_ok_for_mcore (HOST_WIDE_INT value)
+{
+  if (value >= 0 && value <= 127)
+    return 1;
+  
+  /* Try exact power of two.  */
+  if (CONST_OK_FOR_M (value))
+    return 1;
+  
+  /* Try exact power of two - 1.  */
+  if (CONST_OK_FOR_N (value) && value != -1)
+    return 1;
+  
+  return 0;
+}
+
+/* Can we load a constant inline with up to 2 instructions ?  */
+
+int
+mcore_const_ok_for_inline (HOST_WIDE_INT value)
+{
+  HOST_WIDE_INT x, y;
+   
+  return try_constant_tricks (value, & x, & y) > 0;
+}
+
+/* Are we loading the constant using a not ?  */
+
+int
+mcore_const_trick_uses_not (HOST_WIDE_INT value)
+{
+  HOST_WIDE_INT x, y;
+
+  return try_constant_tricks (value, & x, & y) == 2; 
+}       
+
+/* Try tricks to load a constant inline and return the trick number if
+   success (0 is non-inlinable).
+  
+   0: not inlinable
+   1: single instruction (do the usual thing)
+   2: single insn followed by a 'not'
+   3: single insn followed by a subi
+   4: single insn followed by an addi
+   5: single insn followed by rsubi
+   6: single insn followed by bseti
+   7: single insn followed by bclri
+   8: single insn followed by rotli
+   9: single insn followed by lsli
+   10: single insn followed by ixh
+   11: single insn followed by ixw.  */
+
+static int
+try_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT * x, HOST_WIDE_INT * y)
+{
+  HOST_WIDE_INT i;
+  unsigned HOST_WIDE_INT bit, shf, rot;
+
+  if (const_ok_for_mcore (value))
+    return 1;	/* Do the usual thing.  */
+  
+  if (! TARGET_HARDLIT) 
+    return 0;
+
+  if (const_ok_for_mcore (~value))
+    {
+      *x = ~value;
+      return 2;
+    }
+
+  for (i = 1; i <= 32; i++)
+    {
+      if (const_ok_for_mcore (value - i))
+	{
+	  *x = value - i;
+	  *y = i;
+
+	  return 3;
+	}
+
+      if (const_ok_for_mcore (value + i))
+	{
+	  *x = value + i;
+	  *y = i;
+
+	  return 4;
+	}
+    }
+
+  bit = 0x80000000ULL;
+
+  for (i = 0; i <= 31; i++)
+    {
+      if (const_ok_for_mcore (i - value))
+	{
+	  *x = i - value;
+	  *y = i;
+
+	  return 5;
+	}
+
+      if (const_ok_for_mcore (value & ~bit))
+	{
+	  *y = bit;
+	  *x = value & ~bit;
+	  return 6;
+	}
+
+      if (const_ok_for_mcore (value | bit))
+	{
+	  *y = ~bit;
+	  *x = value | bit;
+
+	  return 7;
+	}
+
+      bit >>= 1;
+    }
+
+  shf = value;
+  rot = value;
+
+  for (i = 1; i < 31; i++)
+    {
+      int c;
+
+      /* MCore has rotate left.  */
+      c = rot << 31;
+      rot >>= 1;
+      rot &= 0x7FFFFFFF;
+      rot |= c;   /* Simulate rotate.  */
+
+      if (const_ok_for_mcore (rot))
+	{
+	  *y = i;
+	  *x = rot;
+
+	  return 8;
+	}
+
+      if (shf & 1)
+	shf = 0;	/* Can't use logical shift, low order bit is one.  */
+
+      shf >>= 1;
+
+      if (shf != 0 && const_ok_for_mcore (shf))
+	{
+	  *y = i;
+	  *x = shf;
+
+	  return 9;
+	}
+    }
+
+  if ((value % 3) == 0 && const_ok_for_mcore (value / 3))
+    {
+      *x = value / 3;
+
+      return 10;
+    }
+
+  if ((value % 5) == 0 && const_ok_for_mcore (value / 5))
+    {
+      *x = value / 5;
+
+      return 11;
+    }
+  
+  return 0;
+}
+
+/* Check whether reg is dead at first.  This is done by searching ahead
+   for either the next use (i.e., reg is live), a death note, or a set of
+   reg.  Don't just use dead_or_set_p() since reload does not always mark 
+   deaths (especially if PRESERVE_DEATH_NOTES_REGNO_P is not defined). We
+   can ignore subregs by extracting the actual register.  BRC  */
+
+int
+mcore_is_dead (rtx first, rtx reg)
+{
+  rtx insn;
+
+  /* For mcore, subregs can't live independently of their parent regs.  */
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  /* Dies immediately.  */
+  if (dead_or_set_p (first, reg))
+    return 1;
+
+  /* Look for conclusive evidence of live/death, otherwise we have
+     to assume that it is live.  */
+  for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn))
+	return 0;	/* We lose track, assume it is alive.  */
+
+      else if (CALL_P (insn))
+	{
+	  /* Call's might use it for target or register parms.  */
+	  if (reg_referenced_p (reg, PATTERN (insn))
+	      || find_reg_fusage (insn, USE, reg))
+	    return 0;
+	  else if (dead_or_set_p (insn, reg))
+            return 1;
+	}
+      else if (NONJUMP_INSN_P (insn))
+	{
+	  if (reg_referenced_p (reg, PATTERN (insn)))
+            return 0;
+	  else if (dead_or_set_p (insn, reg))
+            return 1;
+	}
+    }
+
+  /* No conclusive evidence either way, we cannot take the chance
+     that control flow hid the use from us -- "I'm not dead yet".  */
+  return 0;
+}
+
+/* Count the number of ones in mask.  */
+
+int
+mcore_num_ones (HOST_WIDE_INT mask)
+{
+  /* A trick to count set bits recently posted on comp.compilers.  */
+  mask =  (mask >> 1  & 0x55555555) + (mask & 0x55555555);
+  mask = ((mask >> 2) & 0x33333333) + (mask & 0x33333333);
+  mask = ((mask >> 4) + mask) & 0x0f0f0f0f;
+  mask = ((mask >> 8) + mask);
+
+  return (mask + (mask >> 16)) & 0xff;
+}
+
+/* Count the number of zeros in mask.  */
+
+int
+mcore_num_zeros (HOST_WIDE_INT mask)
+{
+  return 32 - mcore_num_ones (mask);
+}
+
+/* Determine byte being masked.  */
+
+int
+mcore_byte_offset (unsigned int mask)
+{
+  if (mask == 0x00ffffffL)
+    return 0;
+  else if (mask == 0xff00ffffL)
+    return 1;
+  else if (mask == 0xffff00ffL)
+    return 2;
+  else if (mask == 0xffffff00L)
+    return 3;
+
+  return -1;
+}
+
+/* Determine halfword being masked.  */
+
+int
+mcore_halfword_offset (unsigned int mask)
+{
+  if (mask == 0x0000ffffL)
+    return 0;
+  else if (mask == 0xffff0000L)
+    return 1;
+
+  return -1;
+}
+
+/* Output a series of bseti's corresponding to mask.  */
+
+const char *
+mcore_output_bseti (rtx dst, int mask)
+{
+  rtx out_operands[2];
+  int bit;
+
+  out_operands[0] = dst;
+
+  for (bit = 0; bit < 32; bit++)
+    {
+      if ((mask & 0x1) == 0x1)
+	{
+	  out_operands[1] = GEN_INT (bit);
+	  
+	  output_asm_insn ("bseti\t%0,%1", out_operands);
+	}
+      mask >>= 1;
+    }  
+
+  return "";
+}
+
+/* Output a series of bclri's corresponding to mask.  */
+
+const char *
+mcore_output_bclri (rtx dst, int mask)
+{
+  rtx out_operands[2];
+  int bit;
+
+  out_operands[0] = dst;
+
+  for (bit = 0; bit < 32; bit++)
+    {
+      if ((mask & 0x1) == 0x0)
+	{
+	  out_operands[1] = GEN_INT (bit);
+	  
+	  output_asm_insn ("bclri\t%0,%1", out_operands);
+	}
+      
+      mask >>= 1;
+    }  
+
+  return "";
+}
+
+/* Output a conditional move of two constants that are +/- 1 within each
+   other.  See the "movtK" patterns in mcore.md.   I'm not sure this is
+   really worth the effort.  */
+
+const char *
+mcore_output_cmov (rtx operands[], int cmp_t, const char * test)
+{
+  HOST_WIDE_INT load_value;
+  HOST_WIDE_INT adjust_value;
+  rtx out_operands[4];
+
+  out_operands[0] = operands[0];
+
+  /* Check to see which constant is loadable.  */
+  if (const_ok_for_mcore (INTVAL (operands[1])))
+    {
+      out_operands[1] = operands[1];
+      out_operands[2] = operands[2];
+    }
+  else if (const_ok_for_mcore (INTVAL (operands[2])))
+    {
+      out_operands[1] = operands[2];
+      out_operands[2] = operands[1];
+
+      /* Complement test since constants are swapped.  */
+      cmp_t = (cmp_t == 0);
+    }
+  load_value   = INTVAL (out_operands[1]);
+  adjust_value = INTVAL (out_operands[2]);
+
+  /* First output the test if folded into the pattern.  */
+
+  if (test) 
+    output_asm_insn (test, operands);
+
+  /* Load the constant - for now, only support constants that can be
+     generated with a single instruction.  maybe add general inlinable
+     constants later (this will increase the # of patterns since the
+     instruction sequence has a different length attribute).  */
+  if (load_value >= 0 && load_value <= 127)
+    output_asm_insn ("movi\t%0,%1", out_operands);
+  else if (CONST_OK_FOR_M (load_value))
+    output_asm_insn ("bgeni\t%0,%P1", out_operands);
+  else if (CONST_OK_FOR_N (load_value))
+    output_asm_insn ("bmaski\t%0,%N1", out_operands);
+   
+  /* Output the constant adjustment.  */
+  if (load_value > adjust_value)
+    {
+      if (cmp_t)
+	output_asm_insn ("decf\t%0", out_operands);
+      else
+	output_asm_insn ("dect\t%0", out_operands);
+    }
+  else
+    {
+      if (cmp_t)
+	output_asm_insn ("incf\t%0", out_operands);
+      else
+	output_asm_insn ("inct\t%0", out_operands);
+    }
+
+  return "";
+}
+
+/* Outputs the peephole for moving a constant that gets not'ed followed 
+   by an and (i.e. combine the not and the and into andn). BRC  */
+
+const char *
+mcore_output_andn (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
+{
+  HOST_WIDE_INT x, y;
+  rtx out_operands[3];
+  const char * load_op;
+  char buf[256];
+  int trick_no;
+
+  trick_no = try_constant_tricks (INTVAL (operands[1]), &x, &y);
+  gcc_assert (trick_no == 2);
+
+  out_operands[0] = operands[0];
+  out_operands[1] = GEN_INT (x);
+  out_operands[2] = operands[2];
+
+  if (x >= 0 && x <= 127)
+    load_op = "movi\t%0,%1";
+  
+  /* Try exact power of two.  */
+  else if (CONST_OK_FOR_M (x))
+    load_op = "bgeni\t%0,%P1";
+  
+  /* Try exact power of two - 1.  */
+  else if (CONST_OK_FOR_N (x))
+    load_op = "bmaski\t%0,%N1";
+  
+  else
+    {
+      load_op = "BADMOVI-andn\t%0, %1";
+      gcc_unreachable ();
+    }
+
+  sprintf (buf, "%s\n\tandn\t%%2,%%0", load_op);
+  output_asm_insn (buf, out_operands);
+
+  return "";
+}
+
+/* Output an inline constant.  */
+
+static const char *
+output_inline_const (enum machine_mode mode, rtx operands[])
+{
+  HOST_WIDE_INT x = 0, y = 0;
+  int trick_no;
+  rtx out_operands[3];
+  char buf[256];
+  char load_op[256];
+  const char *dst_fmt;
+  HOST_WIDE_INT value;
+
+  value = INTVAL (operands[1]);
+
+  trick_no = try_constant_tricks (value, &x, &y);
+  /* lrw's are handled separately: Large inlinable constants never get
+     turned into lrw's.  Our caller uses try_constant_tricks to back
+     off to an lrw rather than calling this routine.  */
+  gcc_assert (trick_no != 0);
+  
+  if (trick_no == 1)
+    x = value;
+
+  /* operands: 0 = dst, 1 = load immed., 2 = immed. adjustment.  */
+  out_operands[0] = operands[0];
+  out_operands[1] = GEN_INT (x);
+  
+  if (trick_no > 2)
+    out_operands[2] = GEN_INT (y);
+
+  /* Select dst format based on mode.  */
+  if (mode == DImode && (! TARGET_LITTLE_END))
+    dst_fmt = "%R0";
+  else
+    dst_fmt = "%0";
+
+  if (x >= 0 && x <= 127)
+    sprintf (load_op, "movi\t%s,%%1", dst_fmt);
+  
+  /* Try exact power of two.  */
+  else if (CONST_OK_FOR_M (x))
+    sprintf (load_op, "bgeni\t%s,%%P1", dst_fmt);
+  
+  /* Try exact power of two - 1.  */
+  else if (CONST_OK_FOR_N (x))
+    sprintf (load_op, "bmaski\t%s,%%N1", dst_fmt);
+  
+  else
+    {
+      sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt);
+      gcc_unreachable ();
+    }      
+
+  switch (trick_no)
+    {
+    case 1:
+      strcpy (buf, load_op);
+      break;
+    case 2:   /* not */
+      sprintf (buf, "%s\n\tnot\t%s\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 3:   /* add */
+      sprintf (buf, "%s\n\taddi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 4:   /* sub */
+      sprintf (buf, "%s\n\tsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 5:   /* rsub */
+      /* Never happens unless -mrsubi, see try_constant_tricks().  */
+      sprintf (buf, "%s\n\trsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 6:   /* bseti */
+      sprintf (buf, "%s\n\tbseti\t%s,%%P2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 7:   /* bclr */
+      sprintf (buf, "%s\n\tbclri\t%s,%%Q2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 8:   /* rotl */
+      sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 9:   /* lsl */
+      sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 10:  /* ixh */
+      sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value);
+      break;
+    case 11:  /* ixw */
+      sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value);
+      break;
+    default:
+      return "";
+    }
+  
+  output_asm_insn (buf, out_operands);
+
+  return "";
+}
+
+/* Output a move of a word or less value.  */
+
+const char *
+mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+		   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (GET_CODE (dst) == REG)
+    {
+      if (GET_CODE (src) == REG)
+	{               
+	  if (REGNO (src) == CC_REG)            /* r-c */
+            return "mvc\t%0"; 
+	  else 
+            return "mov\t%0,%1";                /* r-r*/
+	}
+      else if (GET_CODE (src) == MEM)
+	{
+	  if (GET_CODE (XEXP (src, 0)) == LABEL_REF) 
+            return "lrw\t%0,[%1]";              /* a-R */
+	  else
+	    switch (GET_MODE (src))		/* r-m */
+	      {
+	      case SImode:
+		return "ldw\t%0,%1";
+	      case HImode:
+		return "ld.h\t%0,%1";
+	      case QImode:
+		return "ld.b\t%0,%1";
+	      default:
+		gcc_unreachable ();
+	      }
+	}
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  HOST_WIDE_INT x, y;
+	  
+	  if (CONST_OK_FOR_I (INTVAL (src)))       /* r-I */
+            return "movi\t%0,%1";
+	  else if (CONST_OK_FOR_M (INTVAL (src)))  /* r-M */
+            return "bgeni\t%0,%P1\t// %1 %x1";
+	  else if (CONST_OK_FOR_N (INTVAL (src)))  /* r-N */
+            return "bmaski\t%0,%N1\t// %1 %x1";
+	  else if (try_constant_tricks (INTVAL (src), &x, &y))     /* R-P */
+            return output_inline_const (SImode, operands);  /* 1-2 insns */
+	  else 
+            return "lrw\t%0,%x1\t// %1";	/* Get it from literal pool.  */
+	}
+      else
+	return "lrw\t%0, %1";                /* Into the literal pool.  */
+    }
+  else if (GET_CODE (dst) == MEM)               /* m-r */
+    switch (GET_MODE (dst))
+      {
+      case SImode:
+	return "stw\t%1,%0";
+      case HImode:
+	return "st.h\t%1,%0";
+      case QImode:
+	return "st.b\t%1,%0";
+      default:
+	gcc_unreachable ();
+      }
+
+  gcc_unreachable ();
+}
+
+/* Return a sequence of instructions to perform DI or DF move.
+   Since the MCORE cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+
+const char *
+mcore_output_movedouble (rtx operands[], enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (GET_CODE (dst) == REG)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int dstreg = REGNO (dst);
+	  int srcreg = REGNO (src);
+	  
+	  /* Ensure the second source not overwritten.  */
+	  if (srcreg + 1 == dstreg)
+	    return "mov	%R0,%R1\n\tmov	%0,%1";
+	  else
+	    return "mov	%0,%1\n\tmov	%R0,%R1";
+	}
+      else if (GET_CODE (src) == MEM)
+	{
+	  rtx memexp = XEXP (src, 0);
+	  int dstreg = REGNO (dst);
+	  int basereg = -1;
+	  
+	  if (GET_CODE (memexp) == LABEL_REF)
+	    return "lrw\t%0,[%1]\n\tlrw\t%R0,[%R1]";
+	  else if (GET_CODE (memexp) == REG) 
+	    basereg = REGNO (memexp);
+	  else if (GET_CODE (memexp) == PLUS)
+	    {
+	      if (GET_CODE (XEXP (memexp, 0)) == REG)
+		basereg = REGNO (XEXP (memexp, 0));
+	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
+		basereg = REGNO (XEXP (memexp, 1));
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    gcc_unreachable ();
+
+          /* ??? length attribute is wrong here.  */
+	  if (dstreg == basereg)
+	    {
+	      /* Just load them in reverse order.  */
+	      return "ldw\t%R0,%R1\n\tldw\t%0,%1";
+	      
+	      /* XXX: alternative: move basereg to basereg+1
+	         and then fall through.  */
+	    }
+	  else
+	    return "ldw\t%0,%1\n\tldw\t%R0,%R1";
+	}
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  if (TARGET_LITTLE_END)
+	    {
+	      if (CONST_OK_FOR_I (INTVAL (src)))
+		output_asm_insn ("movi	%0,%1", operands);
+	      else if (CONST_OK_FOR_M (INTVAL (src)))
+		output_asm_insn ("bgeni	%0,%P1", operands);
+	      else if (CONST_OK_FOR_N (INTVAL (src)))
+		output_asm_insn ("bmaski	%0,%N1", operands);
+	      else
+		gcc_unreachable ();
+
+	      if (INTVAL (src) < 0)
+		return "bmaski	%R0,32";
+	      else
+		return "movi	%R0,0";
+	    }
+	  else
+	    {
+	      if (CONST_OK_FOR_I (INTVAL (src)))
+		output_asm_insn ("movi	%R0,%1", operands);
+	      else if (CONST_OK_FOR_M (INTVAL (src)))
+		output_asm_insn ("bgeni	%R0,%P1", operands);
+	      else if (CONST_OK_FOR_N (INTVAL (src)))
+		output_asm_insn ("bmaski	%R0,%N1", operands);
+	      else
+		gcc_unreachable ();
+
+	      if (INTVAL (src) < 0)
+		return "bmaski	%0,32";
+	      else
+		return "movi	%0,0";
+	    }
+	}
+      else
+	gcc_unreachable ();
+    }
+  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
+    return "stw\t%1,%0\n\tstw\t%R1,%R0";
+  else
+    gcc_unreachable ();
+}
+
+/* Predicates used by the templates.  */
+
+int
+mcore_arith_S_operand (rtx op)
+{
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (~INTVAL (op)))
+    return 1;
+  
+  return 0;
+}
+
+/* Expand insert bit field.  BRC  */
+
+int
+mcore_expand_insv (rtx operands[])
+{
+  int width = INTVAL (operands[1]);
+  int posn = INTVAL (operands[2]);
+  int mask;
+  rtx mreg, sreg, ereg;
+
+  /* To get width 1 insv, the test in store_bit_field() (expmed.c, line 191)
+     for width==1 must be removed.  Look around line 368.  This is something
+     we really want the md part to do.  */
+  if (width == 1 && GET_CODE (operands[3]) == CONST_INT)
+    {
+      /* Do directly with bseti or bclri.  */
+      /* RBE: 2/97 consider only low bit of constant.  */
+      if ((INTVAL (operands[3]) & 1) == 0)
+	{
+	  mask = ~(1 << posn);
+	  emit_insn (gen_rtx_SET (SImode, operands[0],
+			      gen_rtx_AND (SImode, operands[0], GEN_INT (mask))));
+	}
+      else
+	{
+	  mask = 1 << posn;
+	  emit_insn (gen_rtx_SET (SImode, operands[0],
+			    gen_rtx_IOR (SImode, operands[0], GEN_INT (mask))));
+	}
+      
+      return 1;
+    }
+
+  /* Look at some bit-field placements that we aren't interested
+     in handling ourselves, unless specifically directed to do so.  */
+  if (! TARGET_W_FIELD)
+    return 0;		/* Generally, give up about now.  */
+
+  if (width == 8 && posn % 8 == 0)
+    /* Byte sized and aligned; let caller break it up.  */
+    return 0;
+  
+  if (width == 16 && posn % 16 == 0)
+    /* Short sized and aligned; let caller break it up.  */
+    return 0;
+
+  /* The general case - we can do this a little bit better than what the
+     machine independent part tries.  This will get rid of all the subregs
+     that mess up constant folding in combine when working with relaxed
+     immediates.  */
+
+  /* If setting the entire field, do it directly.  */
+  if (GET_CODE (operands[3]) == CONST_INT
+      && INTVAL (operands[3]) == ((1 << width) - 1))
+    {
+      mreg = force_reg (SImode, GEN_INT (INTVAL (operands[3]) << posn));
+      emit_insn (gen_rtx_SET (SImode, operands[0],
+                         gen_rtx_IOR (SImode, operands[0], mreg)));
+      return 1;
+    }
+
+  /* Generate the clear mask.  */
+  mreg = force_reg (SImode, GEN_INT (~(((1 << width) - 1) << posn)));
+
+  /* Clear the field, to overlay it later with the source.  */
+  emit_insn (gen_rtx_SET (SImode, operands[0], 
+		      gen_rtx_AND (SImode, operands[0], mreg)));
+
+  /* If the source is constant 0, we've nothing to add back.  */
+  if (GET_CODE (operands[3]) == CONST_INT && INTVAL (operands[3]) == 0)
+    return 1;
+
+  /* XXX: Should we worry about more games with constant values?
+     We've covered the high profile: set/clear single-bit and many-bit
+     fields. How often do we see "arbitrary bit pattern" constants?  */
+  sreg = copy_to_mode_reg (SImode, operands[3]);
+
+  /* Extract src as same width as dst (needed for signed values).  We
+     always have to do this since we widen everything to SImode.
+     We don't have to mask if we're shifting this up against the
+     MSB of the register (e.g., the shift will push out any hi-order
+     bits.  */
+  if (width + posn != (int) GET_MODE_SIZE (SImode))
+    {
+      ereg = force_reg (SImode, GEN_INT ((1 << width) - 1));      
+      emit_insn (gen_rtx_SET (SImode, sreg,
+                          gen_rtx_AND (SImode, sreg, ereg)));
+    }
+
+  /* Insert source value in dest.  */
+  if (posn != 0)
+    emit_insn (gen_rtx_SET (SImode, sreg,
+		        gen_rtx_ASHIFT (SImode, sreg, GEN_INT (posn))));
+  
+  emit_insn (gen_rtx_SET (SImode, operands[0],
+		      gen_rtx_IOR (SImode, operands[0], sreg)));
+
+  return 1;
+}
+
+/* ??? Block move stuff stolen from m88k.  This code has not been
+   verified for correctness.  */
+
+/* Emit code to perform a block move.  Choose the best method.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the size.
+   OPERANDS[3] is the alignment safe to use.  */
+
+/* Emit code to perform a block move with an offset sequence of ldw/st
+   instructions (..., ldw 0, stw 1, ldw 1, stw 0, ...).  SIZE and ALIGN are
+   known constants.  DEST and SRC are registers.  OFFSET is the known
+   starting point for the output pattern.  */
+
+static const enum machine_mode mode_from_align[] =
+{
+  VOIDmode, QImode, HImode, VOIDmode, SImode,
+};
+
+static void
+block_move_sequence (rtx dst_mem, rtx src_mem, int size, int align)
+{
+  rtx temp[2];
+  enum machine_mode mode[2];
+  int amount[2];
+  bool active[2];
+  int phase = 0;
+  int next;
+  int offset_ld = 0;
+  int offset_st = 0;
+  rtx x;
+
+  x = XEXP (dst_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      dst_mem = replace_equiv_address (dst_mem, x);
+    }
+
+  x = XEXP (src_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      src_mem = replace_equiv_address (src_mem, x);
+    }
+
+  active[0] = active[1] = false;
+
+  do
+    {
+      next = phase;
+      phase ^= 1;
+
+      if (size > 0)
+	{
+	  int next_amount;
+
+	  next_amount = (size >= 4 ? 4 : (size >= 2 ? 2 : 1));
+	  next_amount = MIN (next_amount, align);
+
+	  amount[next] = next_amount;
+	  mode[next] = mode_from_align[next_amount];
+	  temp[next] = gen_reg_rtx (mode[next]);
+
+	  x = adjust_address (src_mem, mode[next], offset_ld);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
+
+	  offset_ld += next_amount;
+	  size -= next_amount;
+	  active[next] = true;
+	}
+
+      if (active[phase])
+	{
+	  active[phase] = false;
+	  
+	  x = adjust_address (dst_mem, mode[phase], offset_st);
+	  emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
+
+	  offset_st += amount[phase];
+	}
+    }
+  while (active[next]);
+}
+
+bool
+mcore_expand_block_move (rtx *operands)
+{
+  HOST_WIDE_INT align, bytes, max;
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    return false;
+
+  bytes = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+
+  if (bytes <= 0)
+    return false;
+  if (align > 4)
+    align = 4;
+
+  switch (align)
+    {
+    case 4:
+      if (bytes & 1)
+	max = 4*4;
+      else if (bytes & 3)
+	max = 8*4;
+      else
+	max = 16*4;
+      break;
+    case 2:
+      max = 4*2;
+      break;
+    case 1:
+      max = 4*1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (bytes <= max)
+    {
+      block_move_sequence (operands[0], operands[1], bytes, align);
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Code to generate prologue and epilogue sequences.  */
+static int number_of_regs_before_varargs;
+
+/* Set by TARGET_SETUP_INCOMING_VARARGS to indicate to prolog that this is
+   for a varargs function.  */
+static int current_function_anonymous_args;
+
+#define	STACK_BYTES (STACK_BOUNDARY/BITS_PER_UNIT)
+#define	STORE_REACH (64)	/* Maximum displace of word store + 4.  */
+#define	ADDI_REACH (32)		/* Maximum addi operand.  */
+
+static void
+layout_mcore_frame (struct mcore_frame * infp)
+{
+  int n;
+  unsigned int i;
+  int nbytes;
+  int regarg;
+  int localregarg;
+  int outbounds;
+  unsigned int growths;
+  int step;
+
+  /* Might have to spill bytes to re-assemble a big argument that
+     was passed partially in registers and partially on the stack.  */
+  nbytes = crtl->args.pretend_args_size;
+  
+  /* Determine how much space for spilled anonymous args (e.g., stdarg).  */
+  if (current_function_anonymous_args)
+    nbytes += (NPARM_REGS - number_of_regs_before_varargs) * UNITS_PER_WORD;
+  
+  infp->arg_size = nbytes;
+
+  /* How much space to save non-volatile registers we stomp.  */
+  infp->reg_mask = calc_live_regs (& n);
+  infp->reg_size = n * 4;
+
+  /* And the rest of it... locals and space for overflowed outbounds.  */
+  infp->local_size = get_frame_size ();
+  infp->outbound_size = crtl->outgoing_args_size;
+
+  /* Make sure we have a whole number of words for the locals.  */
+  if (infp->local_size % STACK_BYTES)
+    infp->local_size = (infp->local_size + STACK_BYTES - 1) & ~ (STACK_BYTES -1);
+  
+  /* Only thing we know we have to pad is the outbound space, since
+     we've aligned our locals assuming that base of locals is aligned.  */
+  infp->pad_local = 0;
+  infp->pad_reg = 0;
+  infp->pad_outbound = 0;
+  if (infp->outbound_size % STACK_BYTES)
+    infp->pad_outbound = STACK_BYTES - (infp->outbound_size % STACK_BYTES);
+
+  /* Now we see how we want to stage the prologue so that it does
+     the most appropriate stack growth and register saves to either:
+     (1) run fast,
+     (2) reduce instruction space, or
+     (3) reduce stack space.  */
+  for (i = 0; i < ARRAY_SIZE (infp->growth); i++)
+    infp->growth[i] = 0;
+
+  regarg      = infp->reg_size + infp->arg_size;
+  localregarg = infp->local_size + regarg;
+  outbounds   = infp->outbound_size + infp->pad_outbound;
+  growths     = 0;
+
+  /* XXX: Consider one where we consider localregarg + outbound too! */
+
+  /* Frame of <= 32 bytes and using stm would get <= 2 registers.
+     use stw's with offsets and buy the frame in one shot.  */
+  if (localregarg <= ADDI_REACH
+      && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000))
+    {
+      /* Make sure we'll be aligned.  */
+      if (localregarg % STACK_BYTES)
+	infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES);
+
+      step = localregarg + infp->pad_reg;
+      infp->reg_offset = infp->local_size;
+      
+      if (outbounds + step <= ADDI_REACH && !frame_pointer_needed)
+	{
+	  step += outbounds;
+	  infp->reg_offset += outbounds;
+	  outbounds = 0;
+	}
+      
+      infp->arg_offset = step - 4;
+      infp->growth[growths++] = step;
+      infp->reg_growth = growths;
+      infp->local_growth = growths;
+      
+      /* If we haven't already folded it in.  */
+      if (outbounds)
+	infp->growth[growths++] = outbounds;
+      
+      goto finish;
+    }
+
+  /* Frame can't be done with a single subi, but can be done with 2
+     insns.  If the 'stm' is getting <= 2 registers, we use stw's and
+     shift some of the stack purchase into the first subi, so both are
+     single instructions.  */
+  if (localregarg <= STORE_REACH
+      && (infp->local_size > ADDI_REACH)
+      && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000))
+    {
+      int all;
+
+      /* Make sure we'll be aligned; use either pad_reg or pad_local.  */
+      if (localregarg % STACK_BYTES)
+	infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES);
+
+      all = localregarg + infp->pad_reg + infp->pad_local;
+      step = ADDI_REACH;	/* As much up front as we can.  */
+      if (step > all)
+	step = all;
+      
+      /* XXX: Consider whether step will still be aligned; we believe so.  */
+      infp->arg_offset = step - 4;
+      infp->growth[growths++] = step;
+      infp->reg_growth = growths;
+      infp->reg_offset = step - infp->pad_reg - infp->reg_size;
+      all -= step;
+
+      /* Can we fold in any space required for outbounds?  */
+      if (outbounds + all <= ADDI_REACH && !frame_pointer_needed)
+	{
+	  all += outbounds;
+	  outbounds = 0;
+	}
+
+      /* Get the rest of the locals in place.  */
+      step = all;
+      infp->growth[growths++] = step;
+      infp->local_growth = growths;
+      all -= step;
+
+      gcc_assert (all == 0);
+
+      /* Finish off if we need to do so.  */
+      if (outbounds)
+	infp->growth[growths++] = outbounds;
+      
+      goto finish;
+    }
+
+  /* Registers + args is nicely aligned, so we'll buy that in one shot.
+     Then we buy the rest of the frame in 1 or 2 steps depending on
+     whether we need a frame pointer.  */
+  if ((regarg % STACK_BYTES) == 0)
+    {
+      infp->growth[growths++] = regarg;
+      infp->reg_growth = growths;
+      infp->arg_offset = regarg - 4;
+      infp->reg_offset = 0;
+
+      if (infp->local_size % STACK_BYTES)
+	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
+      
+      step = infp->local_size + infp->pad_local;
+      
+      if (!frame_pointer_needed)
+	{
+	  step += outbounds;
+	  outbounds = 0;
+	}
+      
+      infp->growth[growths++] = step;
+      infp->local_growth = growths;
+
+      /* If there's any left to be done.  */
+      if (outbounds)
+	infp->growth[growths++] = outbounds;
+      
+      goto finish;
+    }
+
+  /* XXX: optimizations that we'll want to play with....
+     -- regarg is not aligned, but it's a small number of registers;
+    	use some of localsize so that regarg is aligned and then 
+    	save the registers.  */
+
+  /* Simple encoding; plods down the stack buying the pieces as it goes.
+     -- does not optimize space consumption.
+     -- does not attempt to optimize instruction counts.
+     -- but it is safe for all alignments.  */
+  if (regarg % STACK_BYTES != 0)
+    infp->pad_reg = STACK_BYTES - (regarg % STACK_BYTES);
+  
+  infp->growth[growths++] = infp->arg_size + infp->reg_size + infp->pad_reg;
+  infp->reg_growth = growths;
+  infp->arg_offset = infp->growth[0] - 4;
+  infp->reg_offset = 0;
+  
+  if (frame_pointer_needed)
+    {
+      if (infp->local_size % STACK_BYTES != 0)
+	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
+      
+      infp->growth[growths++] = infp->local_size + infp->pad_local;
+      infp->local_growth = growths;
+      
+      infp->growth[growths++] = outbounds;
+    }
+  else
+    {
+      if ((infp->local_size + outbounds) % STACK_BYTES != 0)
+	infp->pad_local = STACK_BYTES - ((infp->local_size + outbounds) % STACK_BYTES);
+      
+      infp->growth[growths++] = infp->local_size + infp->pad_local + outbounds;
+      infp->local_growth = growths;
+    }
+
+  /* Anything else that we've forgotten?, plus a few consistency checks.  */
+ finish:
+  gcc_assert (infp->reg_offset >= 0);
+  gcc_assert (growths <= MAX_STACK_GROWS);
+  
+  for (i = 0; i < growths; i++)
+    gcc_assert (!(infp->growth[i] % STACK_BYTES));
+}
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+mcore_initial_elimination_offset (int from, int to)
+{
+  int above_frame;
+  int below_frame;
+  struct mcore_frame fi;
+
+  layout_mcore_frame (& fi);
+
+  /* fp to ap */
+  above_frame = fi.local_size + fi.pad_local + fi.reg_size + fi.pad_reg;
+  /* sp to fp */
+  below_frame = fi.outbound_size + fi.pad_outbound;
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return above_frame;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return above_frame + below_frame;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return below_frame;
+
+  gcc_unreachable ();
+}
+
+/* Keep track of some information about varargs for the prolog.  */
+
+static void
+mcore_setup_incoming_varargs (cumulative_args_t args_so_far_v,
+			      enum machine_mode mode, tree type,
+			      int * ptr_pretend_size ATTRIBUTE_UNUSED,
+			      int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *args_so_far = get_cumulative_args (args_so_far_v);
+
+  current_function_anonymous_args = 1;
+
+  /* We need to know how many argument registers are used before
+     the varargs start, so that we can push the remaining argument
+     registers during the prologue.  */
+  number_of_regs_before_varargs = *args_so_far + mcore_num_arg_regs (mode, type);
+  
+  /* There is a bug somewhere in the arg handling code.
+     Until I can find it this workaround always pushes the
+     last named argument onto the stack.  */
+  number_of_regs_before_varargs = *args_so_far;
+  
+  /* The last named argument may be split between argument registers
+     and the stack.  Allow for this here.  */
+  if (number_of_regs_before_varargs > NPARM_REGS)
+    number_of_regs_before_varargs = NPARM_REGS;
+}
+
+void
+mcore_expand_prolog (void)
+{
+  struct mcore_frame fi;
+  int space_allocated = 0;
+  int growth = 0;
+
+  /* Find out what we're doing.  */
+  layout_mcore_frame (&fi);
+  
+  space_allocated = fi.arg_size + fi.reg_size + fi.local_size +
+    fi.outbound_size + fi.pad_outbound + fi.pad_local + fi.pad_reg;
+
+  if (TARGET_CG_DATA)
+    {
+      /* Emit a symbol for this routine's frame size.  */
+      rtx x;
+
+      x = DECL_RTL (current_function_decl);
+      
+      gcc_assert (GET_CODE (x) == MEM);
+      
+      x = XEXP (x, 0);
+      
+      gcc_assert (GET_CODE (x) == SYMBOL_REF);
+      
+      free (mcore_current_function_name);
+      
+      mcore_current_function_name = xstrdup (XSTR (x, 0));
+      
+      ASM_OUTPUT_CG_NODE (asm_out_file, mcore_current_function_name, space_allocated);
+
+      if (cfun->calls_alloca)
+	ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, "alloca", 1);
+
+      /* 970425: RBE:
+         We're looking at how the 8byte alignment affects stack layout
+         and where we had to pad things. This emits information we can
+         extract which tells us about frame sizes and the like.  */
+      fprintf (asm_out_file,
+	       "\t.equ\t__$frame$info$_%s_$_%d_%d_x%x_%d_%d_%d,0\n",
+	       mcore_current_function_name,
+	       fi.arg_size, fi.reg_size, fi.reg_mask,
+	       fi.local_size, fi.outbound_size,
+	       frame_pointer_needed);
+    }
+
+  if (mcore_naked_function_p ())
+    return;
+  
+  /* Handle stdarg+regsaves in one shot: can't be more than 64 bytes.  */
+  output_stack_adjust (-1, fi.growth[growth++]);	/* Grows it.  */
+
+  /* If we have a parameter passed partially in regs and partially in memory,
+     the registers will have been stored to memory already in function.c.  So
+     we only need to do something here for varargs functions.  */
+  if (fi.arg_size != 0 && crtl->args.pretend_args_size == 0)
+    {
+      int offset;
+      int rn = FIRST_PARM_REG + NPARM_REGS - 1;
+      int remaining = fi.arg_size;
+
+      for (offset = fi.arg_offset; remaining >= 4; offset -= 4, rn--, remaining -= 4)
+        {
+          emit_insn (gen_movsi
+                     (gen_rtx_MEM (SImode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  offset)),
+                      gen_rtx_REG (SImode, rn)));
+        }
+    }
+
+  /* Do we need another stack adjustment before we do the register saves?  */
+  if (growth < fi.reg_growth)
+    output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
+
+  if (fi.reg_size != 0)
+    {
+      int i;
+      int offs = fi.reg_offset;
+      
+      for (i = 15; i >= 0; i--)
+        {
+          if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
+	    {
+	      int first_reg = 15;
+
+	      while (fi.reg_mask & (1 << first_reg))
+	        first_reg--;
+	      first_reg++;
+
+	      emit_insn (gen_store_multiple (gen_rtx_MEM (SImode, stack_pointer_rtx),
+					     gen_rtx_REG (SImode, first_reg),
+					     GEN_INT (16 - first_reg)));
+
+	      i -= (15 - first_reg);
+	      offs += (16 - first_reg) * 4;
+	    }
+          else if (fi.reg_mask & (1 << i))
+	    {
+	      emit_insn (gen_movsi
+		         (gen_rtx_MEM (SImode,
+				       plus_constant (Pmode, stack_pointer_rtx,
+						      offs)),
+		          gen_rtx_REG (SImode, i)));
+	      offs += 4;
+	    }
+        }
+    }
+
+  /* Figure the locals + outbounds.  */
+  if (frame_pointer_needed)
+    {
+      /* If we haven't already purchased to 'fp'.  */
+      if (growth < fi.local_growth)
+        output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
+      
+      emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+
+      /* ... and then go any remaining distance for outbounds, etc.  */
+      if (fi.growth[growth])
+        output_stack_adjust (-1, fi.growth[growth++]);
+    }
+  else
+    {
+      if (growth < fi.local_growth)
+        output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
+      if (fi.growth[growth])
+        output_stack_adjust (-1, fi.growth[growth++]);
+    }
+}
+
+void
+mcore_expand_epilog (void)
+{
+  struct mcore_frame fi;
+  int i;
+  int offs;
+  int growth = MAX_STACK_GROWS - 1 ;
+
+    
+  /* Find out what we're doing.  */
+  layout_mcore_frame(&fi);
+
+  if (mcore_naked_function_p ())
+    return;
+
+  /* If we had a frame pointer, restore the sp from that.  */
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+      growth = fi.local_growth - 1;
+    }
+  else
+    {
+      /* XXX: while loop should accumulate and do a single sell.  */
+      while (growth >= fi.local_growth)
+        {
+          if (fi.growth[growth] != 0)
+            output_stack_adjust (1, fi.growth[growth]);
+	  growth--;
+        }
+    }
+
+  /* Make sure we've shrunk stack back to the point where the registers
+     were laid down. This is typically 0/1 iterations.  Then pull the
+     register save information back off the stack.  */
+  while (growth >= fi.reg_growth)
+    output_stack_adjust ( 1, fi.growth[growth--]);
+  
+  offs = fi.reg_offset;
+  
+  for (i = 15; i >= 0; i--)
+    {
+      if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
+	{
+	  int first_reg;
+
+	  /* Find the starting register.  */
+	  first_reg = 15;
+	  
+	  while (fi.reg_mask & (1 << first_reg))
+	    first_reg--;
+	  
+	  first_reg++;
+
+	  emit_insn (gen_load_multiple (gen_rtx_REG (SImode, first_reg),
+					gen_rtx_MEM (SImode, stack_pointer_rtx),
+					GEN_INT (16 - first_reg)));
+
+	  i -= (15 - first_reg);
+	  offs += (16 - first_reg) * 4;
+	}
+      else if (fi.reg_mask & (1 << i))
+	{
+	  emit_insn (gen_movsi
+		     (gen_rtx_REG (SImode, i),
+		      gen_rtx_MEM (SImode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  offs))));
+	  offs += 4;
+	}
+    }
+
+  /* Give back anything else.  */
+  /* XXX: Should accumulate total and then give it back.  */
+  while (growth >= 0)
+    output_stack_adjust ( 1, fi.growth[growth--]);
+}
+
+/* This code is borrowed from the SH port.  */
+
+/* The MCORE cannot load a large constant into a register, constants have to
+   come from a pc relative load.  The reference of a pc relative load
+   instruction must be less than 1k in front of the instruction.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow things
+   down and make things bigger.
+
+   Worst case code looks like:
+
+   lrw   L1,r0
+   br    L2
+   align
+   L1:   .long value
+   L2:
+   ..
+
+   lrw   L3,r0
+   br    L4
+   align
+   L3:   .long value
+   L4:
+   ..
+
+   We fix this by performing a scan before scheduling, which notices which
+   instructions need to have their operands fetched from the constant table
+   and builds the table.
+
+   The algorithm is:
+
+   scan, find an instruction which needs a pcrel move.  Look forward, find the
+   last barrier which is within MAX_COUNT bytes of the requirement.
+   If there isn't one, make one.  Process all the instructions between
+   the find and the barrier.
+
+   In the above example, we can tell that L3 is within 1k of L1, so
+   the first move can be shrunk from the 2 insn+constant sequence into
+   just 1 insn, and the constant moved to L3 to make:
+
+   lrw          L1,r0
+   ..
+   lrw          L3,r0
+   bra          L4
+   align
+   L3:.long value
+   L4:.long value
+
+   Then the second move becomes the target for the shortening process.  */
+
+typedef struct
+{
+  rtx value;			/* Value in table.  */
+  rtx label;			/* Label of value.  */
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+   the pc relative range is 0...1020 bytes and constants are at least 4
+   bytes long.  We subtract 4 from the range to allow for the case where
+   we need to add a branch/align before the constant pool.  */
+
+#define MAX_COUNT 1016
+#define MAX_POOL_SIZE (MAX_COUNT/4)
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+
+/* Dump out any constants accumulated in the final pass.  These
+   will only be labels.  */
+
+const char *
+mcore_output_jump_label_table (void)
+{
+  int i;
+
+  if (pool_size)
+    {
+      fprintf (asm_out_file, "\t.align 2\n");
+      
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node * p = pool_vector + i;
+
+	  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (p->label));
+	  
+	  output_asm_insn (".long	%0", &p->value);
+	}
+      
+      pool_size = 0;
+    }
+
+  return "";
+}
+
+/* Check whether insn is a candidate for a conditional.  */
+
+static cond_type
+is_cond_candidate (rtx insn)
+{
+  /* The only things we conditionalize are those that can be directly
+     changed into a conditional.  Only bother with SImode items.  If 
+     we wanted to be a little more aggressive, we could also do other
+     modes such as DImode with reg-reg move or load 0.  */
+  if (NONJUMP_INSN_P (insn))
+    {
+      rtx pat = PATTERN (insn);
+      rtx src, dst;
+
+      if (GET_CODE (pat) != SET)
+	return COND_NO;
+
+      dst = XEXP (pat, 0);
+
+      if ((GET_CODE (dst) != REG &&
+           GET_CODE (dst) != SUBREG) ||
+	  GET_MODE (dst) != SImode)
+	return COND_NO;
+  
+      src = XEXP (pat, 1);
+
+      if ((GET_CODE (src) == REG ||
+           (GET_CODE (src) == SUBREG &&
+	    GET_CODE (SUBREG_REG (src)) == REG)) &&
+	  GET_MODE (src) == SImode)
+	return COND_MOV_INSN;
+      else if (GET_CODE (src) == CONST_INT && 
+               INTVAL (src) == 0)
+	return COND_CLR_INSN;
+      else if (GET_CODE (src) == PLUS &&
+               (GET_CODE (XEXP (src, 0)) == REG ||
+                (GET_CODE (XEXP (src, 0)) == SUBREG &&
+                 GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) &&
+               GET_MODE (XEXP (src, 0)) == SImode &&
+               GET_CODE (XEXP (src, 1)) == CONST_INT &&
+               INTVAL (XEXP (src, 1)) == 1)
+	return COND_INC_INSN;
+      else if (((GET_CODE (src) == MINUS &&
+		 GET_CODE (XEXP (src, 1)) == CONST_INT &&
+		 INTVAL( XEXP (src, 1)) == 1) ||
+                (GET_CODE (src) == PLUS &&
+		 GET_CODE (XEXP (src, 1)) == CONST_INT &&
+		 INTVAL (XEXP (src, 1)) == -1)) &&
+               (GET_CODE (XEXP (src, 0)) == REG ||
+		(GET_CODE (XEXP (src, 0)) == SUBREG &&
+		 GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) &&
+               GET_MODE (XEXP (src, 0)) == SImode)
+	return COND_DEC_INSN;
+
+      /* Some insns that we don't bother with:
+	 (set (rx:DI) (ry:DI))
+	 (set (rx:DI) (const_int 0))
+      */            
+
+    }
+  else if (JUMP_P (insn)
+	   && GET_CODE (PATTERN (insn)) == SET
+	   && GET_CODE (XEXP (PATTERN (insn), 1)) == LABEL_REF)
+    return COND_BRANCH_INSN;
+
+  return COND_NO;
+}
+
+/* Emit a conditional version of insn and replace the old insn with the
+   new one.  Return the new insn if emitted.  */
+
+static rtx
+emit_new_cond_insn (rtx insn, int cond)
+{
+  rtx c_insn = 0;
+  rtx pat, dst, src;
+  cond_type num;
+
+  if ((num = is_cond_candidate (insn)) == COND_NO)
+    return NULL;
+
+  pat = PATTERN (insn);
+
+  if (NONJUMP_INSN_P (insn))
+    {
+      dst = SET_DEST (pat);
+      src = SET_SRC (pat);
+    }
+  else
+    {
+      dst = JUMP_LABEL (insn);
+      src = NULL_RTX;
+    }
+
+  switch (num)
+    {
+    case COND_MOV_INSN: 
+    case COND_CLR_INSN:
+      if (cond)
+	c_insn = gen_movt0 (dst, src, dst);
+      else
+	c_insn = gen_movt0 (dst, dst, src);
+      break;
+
+    case COND_INC_INSN:
+      if (cond)
+	c_insn = gen_incscc (dst, dst);
+      else
+	c_insn = gen_incscc_false (dst, dst);
+      break;
+  
+    case COND_DEC_INSN:
+      if (cond)
+	c_insn = gen_decscc (dst, dst);
+      else
+	c_insn = gen_decscc_false (dst, dst);
+      break;
+
+    case COND_BRANCH_INSN:
+      if (cond)
+	c_insn = gen_branch_true (dst);
+      else
+	c_insn = gen_branch_false (dst);
+      break;
+
+    default:
+      return NULL;
+    }
+
+  /* Only copy the notes if they exist.  */
+  if (rtx_length [GET_CODE (c_insn)] >= 7 && rtx_length [GET_CODE (insn)] >= 7)
+    {
+      /* We really don't need to bother with the notes and links at this
+	 point, but go ahead and save the notes.  This will help is_dead()
+	 when applying peepholes (links don't matter since they are not
+	 used any more beyond this point for the mcore).  */
+      REG_NOTES (c_insn) = REG_NOTES (insn);
+    }
+  
+  if (num == COND_BRANCH_INSN)
+    {
+      /* For jumps, we need to be a little bit careful and emit the new jump
+         before the old one and to update the use count for the target label.
+         This way, the barrier following the old (uncond) jump will get
+	 deleted, but the label won't.  */
+      c_insn = emit_jump_insn_before (c_insn, insn);
+      
+      ++ LABEL_NUSES (dst);
+      
+      JUMP_LABEL (c_insn) = dst;
+    }
+  else
+    c_insn = emit_insn_after (c_insn, insn);
+
+  delete_insn (insn);
+  
+  return c_insn;
+}
+
+/* Attempt to change a basic block into a series of conditional insns.  This
+   works by taking the branch at the end of the 1st block and scanning for the 
+   end of the 2nd block.  If all instructions in the 2nd block have cond.
+   versions and the label at the start of block 3 is the same as the target
+   from the branch at block 1, then conditionalize all insn in block 2 using
+   the inverse condition of the branch at block 1.  (Note I'm bending the
+   definition of basic block here.)
+
+   e.g., change:   
+
+		bt	L2             <-- end of block 1 (delete)
+		mov	r7,r8          
+		addu	r7,1           
+		br	L3             <-- end of block 2
+
+	L2:	...                    <-- start of block 3 (NUSES==1)
+	L3:	...
+
+   to:
+
+		movf	r7,r8
+		incf	r7
+		bf	L3
+
+	L3:	...
+
+   we can delete the L2 label if NUSES==1 and re-apply the optimization
+   starting at the last instruction of block 2.  This may allow an entire
+   if-then-else statement to be conditionalized.  BRC  */
+static rtx
+conditionalize_block (rtx first)
+{
+  rtx insn;
+  rtx br_pat;
+  rtx end_blk_1_br = 0;
+  rtx end_blk_2_insn = 0;
+  rtx start_blk_3_lab = 0;
+  int cond;
+  int br_lab_num;
+  int blk_size = 0;
+
+    
+  /* Check that the first insn is a candidate conditional jump.  This is
+     the one that we'll eliminate.  If not, advance to the next insn to
+     try.  */
+  if (! JUMP_P (first)
+      || GET_CODE (PATTERN (first)) != SET
+      || GET_CODE (XEXP (PATTERN (first), 1)) != IF_THEN_ELSE)
+    return NEXT_INSN (first);
+
+  /* Extract some information we need.  */
+  end_blk_1_br = first;
+  br_pat = PATTERN (end_blk_1_br);
+
+  /* Complement the condition since we use the reverse cond. for the insns.  */
+  cond = (GET_CODE (XEXP (XEXP (br_pat, 1), 0)) == EQ);
+
+  /* Determine what kind of branch we have.  */
+  if (GET_CODE (XEXP (XEXP (br_pat, 1), 1)) == LABEL_REF)
+    {
+      /* A normal branch, so extract label out of first arm.  */
+      br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 1), 0));
+    }
+  else
+    {
+      /* An inverse branch, so extract the label out of the 2nd arm
+	 and complement the condition.  */
+      cond = (cond == 0);
+      br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 2), 0));
+    }
+
+  /* Scan forward for the start of block 2: it must start with a
+     label and that label must be the same as the branch target
+     label from block 1.  We don't care about whether block 2 actually
+     ends with a branch or a label (an uncond. branch is 
+     conditionalizable).  */
+  for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
+    {
+      enum rtx_code code;
+      
+      code = GET_CODE (insn);
+
+      /* Look for the label at the start of block 3.  */
+      if (code == CODE_LABEL && CODE_LABEL_NUMBER (insn) == br_lab_num)
+	break;
+
+      /* Skip barriers, notes, and conditionalizable insns.  If the
+         insn is not conditionalizable or makes this optimization fail,
+         just return the next insn so we can start over from that point.  */
+      if (code != BARRIER && code != NOTE && !is_cond_candidate (insn))
+	return NEXT_INSN (insn);
+     
+      /* Remember the last real insn before the label (i.e. end of block 2).  */
+      if (code == JUMP_INSN || code == INSN)
+	{
+	  blk_size ++;
+	  end_blk_2_insn = insn;
+	}
+    }
+
+  if (!insn)
+    return insn;
+ 
+  /* It is possible for this optimization to slow performance if the blocks 
+     are long.  This really depends upon whether the branch is likely taken 
+     or not.  If the branch is taken, we slow performance in many cases.  But,
+     if the branch is not taken, we always help performance (for a single 
+     block, but for a double block (i.e. when the optimization is re-applied) 
+     this is not true since the 'right thing' depends on the overall length of
+     the collapsed block).  As a compromise, don't apply this optimization on 
+     blocks larger than size 2 (unlikely for the mcore) when speed is important.
+     the best threshold depends on the latencies of the instructions (i.e., 
+     the branch penalty).  */
+  if (optimize > 1 && blk_size > 2)
+    return insn;
+
+  /* At this point, we've found the start of block 3 and we know that
+     it is the destination of the branch from block 1.   Also, all
+     instructions in the block 2 are conditionalizable.  So, apply the
+     conditionalization and delete the branch.  */
+  start_blk_3_lab = insn;   
+   
+  for (insn = NEXT_INSN (end_blk_1_br); insn != start_blk_3_lab; 
+       insn = NEXT_INSN (insn))
+    {
+      rtx newinsn;
+
+      if (INSN_DELETED_P (insn))
+	continue;
+      
+      /* Try to form a conditional variant of the instruction and emit it.  */
+      if ((newinsn = emit_new_cond_insn (insn, cond)))
+	{
+	  if (end_blk_2_insn == insn)
+            end_blk_2_insn = newinsn;
+
+	  insn = newinsn;
+	}
+    }
+
+  /* Note whether we will delete the label starting blk 3 when the jump
+     gets deleted.  If so, we want to re-apply this optimization at the 
+     last real instruction right before the label.  */
+  if (LABEL_NUSES (start_blk_3_lab) == 1)
+    {
+      start_blk_3_lab = 0;
+    }
+
+  /* ??? we probably should redistribute the death notes for this insn, esp.
+     the death of cc, but it doesn't really matter this late in the game.
+     The peepholes all use is_dead() which will find the correct death
+     regardless of whether there is a note.  */
+  delete_insn (end_blk_1_br);
+
+  if (! start_blk_3_lab)
+    return end_blk_2_insn;
+  
+  /* Return the insn right after the label at the start of block 3.  */
+  return NEXT_INSN (start_blk_3_lab);
+}
+
+/* Apply the conditionalization of blocks optimization.  This is the
+   outer loop that traverses through the insns scanning for a branch
+   that signifies an opportunity to apply the optimization.  Note that
+   this optimization is applied late.  If we could apply it earlier,
+   say before cse 2, it may expose more optimization opportunities.  
+   but, the pay back probably isn't really worth the effort (we'd have 
+   to update all reg/flow/notes/links/etc to make it work - and stick it
+   in before cse 2).  */
+
+static void
+conditionalize_optimization (void)
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = conditionalize_block (insn))
+    continue;
+}
+
+/* This is to handle loads from the constant pool.  */
+
+static void
+mcore_reorg (void)
+{
+  /* Reset this variable.  */
+  current_function_anonymous_args = 0;
+  
+  if (optimize == 0)
+    return;
+  
+  /* Conditionalize blocks where we can.  */
+  conditionalize_optimization ();
+
+  /* Literal pool generation is now pushed off until the assembler.  */
+}
+
+
+/* Return true if X is something that can be moved directly into r15.  */
+
+bool
+mcore_r15_operand_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      return mcore_const_ok_for_inline (INTVAL (x));
+
+    case REG:
+    case SUBREG:
+    case MEM:
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Implement SECONDARY_RELOAD_CLASS.  If RCLASS contains r15, and we can't
+   directly move X into it, use r1-r14 as a temporary.  */
+
+enum reg_class
+mcore_secondary_reload_class (enum reg_class rclass,
+			      enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  if (TEST_HARD_REG_BIT (reg_class_contents[rclass], 15)
+      && !mcore_r15_operand_p (x))
+    return LRW_REGS;
+  return NO_REGS;
+}
+
+/* Return the reg_class to use when reloading the rtx X into the class
+   RCLASS.  If X is too complex to move directly into r15, prefer to
+   use LRW_REGS instead.  */
+
+enum reg_class
+mcore_reload_class (rtx x, enum reg_class rclass)
+{
+  if (reg_class_subset_p (LRW_REGS, rclass) && !mcore_r15_operand_p (x))
+    return LRW_REGS;
+
+  return rclass;
+}
+
+/* Tell me if a pair of reg/subreg rtx's actually refer to the same
+   register.  Note that the current version doesn't worry about whether
+   they are the same mode or note (e.g., a QImode in r2 matches an HImode
+   in r2 matches an SImode in r2. Might think in the future about whether
+   we want to be able to say something about modes.  */
+
+int
+mcore_is_same_reg (rtx x, rtx y)
+{
+  /* Strip any and all of the subreg wrappers.  */
+  while (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+  
+  while (GET_CODE (y) == SUBREG)
+    y = SUBREG_REG (y);
+
+  if (GET_CODE(x) == REG && GET_CODE(y) == REG && REGNO(x) == REGNO(y))
+    return 1;
+
+  return 0;
+}
+
+static void
+mcore_option_override (void)
+{
+  /* Only the m340 supports little endian code.  */
+  if (TARGET_LITTLE_END && ! TARGET_M340)
+    target_flags |= MASK_M340;
+}
+
+
+/* Compute the number of word sized registers needed to 
+   hold a function argument of mode MODE and type TYPE.  */
+
+int
+mcore_num_arg_regs (enum machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  if (type && mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return ROUND_ADVANCE (size);
+}
+
+static rtx
+handle_structs_in_regs (enum machine_mode mode, const_tree type, int reg)
+{
+  int size;
+
+  /* The MCore ABI defines that a structure whose size is not a whole multiple
+     of bytes is passed packed into registers (or spilled onto the stack if
+     not enough registers are available) with the last few bytes of the
+     structure being packed, left-justified, into the last register/stack slot.
+     GCC handles this correctly if the last word is in a stack slot, but we
+     have to generate a special, PARALLEL RTX if the last word is in an
+     argument register.  */
+  if (type
+      && TYPE_MODE (type) == BLKmode
+      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+      && (size = int_size_in_bytes (type)) > UNITS_PER_WORD
+      && (size % UNITS_PER_WORD != 0)
+      && (reg + mcore_num_arg_regs (mode, type) <= (FIRST_PARM_REG + NPARM_REGS)))
+    {
+      rtx    arg_regs [NPARM_REGS]; 
+      int    nregs;
+      rtx    result;
+      rtvec  rtvec;
+		     
+      for (nregs = 0; size > 0; size -= UNITS_PER_WORD)
+        {
+          arg_regs [nregs] =
+	    gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, reg ++),
+		  	       GEN_INT (nregs * UNITS_PER_WORD));
+	  nregs ++;
+        }
+
+      /* We assume here that NPARM_REGS == 6.  The assert checks this.  */
+      gcc_assert (ARRAY_SIZE (arg_regs) == 6);
+      rtvec = gen_rtvec (nregs, arg_regs[0], arg_regs[1], arg_regs[2],
+			  arg_regs[3], arg_regs[4], arg_regs[5]);
+      
+      result = gen_rtx_PARALLEL (mode, rtvec);
+      return result;
+    }
+  
+  return gen_rtx_REG (mode, reg);
+}
+
+rtx
+mcore_function_value (const_tree valtype, const_tree func)
+{
+  enum machine_mode mode;
+  int unsigned_p;
+  
+  mode = TYPE_MODE (valtype);
+
+  /* Since we promote return types, we must promote the mode here too.  */
+  mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1);
+  
+  return handle_structs_in_regs (mode, valtype, FIRST_RET_REG);
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On MCore the first args are normally in registers
+   and the rest are pushed.  Any arg that starts within the first
+   NPARM_REGS words is at least partially passed in a register unless
+   its data type forbids.  */
+
+static rtx
+mcore_function_arg (cumulative_args_t cum, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  int arg_reg;
+  
+  if (! named || mode == VOIDmode)
+    return 0;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  arg_reg = ROUND_REG (*get_cumulative_args (cum), mode);
+  
+  if (arg_reg < NPARM_REGS)
+    return handle_structs_in_regs (mode, type, FIRST_PARM_REG + arg_reg);
+
+  return 0;
+}
+
+static void
+mcore_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum = (ROUND_REG (*cum, mode)
+	  + (int)named * mcore_num_arg_regs (mode, type));
+}
+
+static unsigned int
+mcore_function_arg_boundary (enum machine_mode mode,
+			     const_tree type ATTRIBUTE_UNUSED)
+{
+  /* Doubles must be aligned to an 8 byte boundary.  */
+  return (mode != BLKmode && GET_MODE_SIZE (mode) == 8
+	  ? BIGGEST_ALIGNMENT
+	  : PARM_BOUNDARY);
+}
+
+/* Returns the number of bytes of argument registers required to hold *part*
+   of a parameter of machine mode MODE and type TYPE (which may be NULL if
+   the type is not known).  If the argument fits entirely in the argument
+   registers, or entirely on the stack, then 0 is returned.  CUM is the
+   number of argument registers already used by earlier parameters to
+   the function.  */
+
+static int
+mcore_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
+			 tree type, bool named)
+{
+  int reg = ROUND_REG (*get_cumulative_args (cum), mode);
+
+  if (named == 0)
+    return 0;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+      
+  /* REG is not the *hardware* register number of the register that holds
+     the argument, it is the *argument* register number.  So for example,
+     the first argument to a function goes in argument register 0, which
+     translates (for the MCore) into hardware register 2.  The second
+     argument goes into argument register 1, which translates into hardware
+     register 3, and so on.  NPARM_REGS is the number of argument registers
+     supported by the target, not the maximum hardware register number of
+     the target.  */
+  if (reg >= NPARM_REGS)
+    return 0;
+
+  /* If the argument fits entirely in registers, return 0.  */
+  if (reg + mcore_num_arg_regs (mode, type) <= NPARM_REGS)
+    return 0;
+
+  /* The argument overflows the number of available argument registers.
+     Compute how many argument registers have not yet been assigned to
+     hold an argument.  */
+  reg = NPARM_REGS - reg;
+
+  /* Return partially in registers and partially on the stack.  */
+  return reg * UNITS_PER_WORD;
+}
+
+/* Return nonzero if SYMBOL is marked as being dllexport'd.  */
+
+int
+mcore_dllexport_name_p (const char * symbol)
+{
+  return symbol[0] == '@' && symbol[1] == 'e' && symbol[2] == '.';
+}
+
+/* Return nonzero if SYMBOL is marked as being dllimport'd.  */
+
+int
+mcore_dllimport_name_p (const char * symbol)
+{
+  return symbol[0] == '@' && symbol[1] == 'i' && symbol[2] == '.';
+}
+
+/* Mark a DECL as being dllexport'd.  */
+
+static void
+mcore_mark_dllexport (tree decl)
+{
+  const char * oldname;
+  char * newname;
+  rtx    rtlname;
+  tree   idp;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  
+  if (GET_CODE (rtlname) == MEM)
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+  
+  if (mcore_dllexport_name_p (oldname))
+    return;  /* Already done.  */
+
+  newname = XALLOCAVEC (char, strlen (oldname) + 4);
+  sprintf (newname, "@e.%s", oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  /* ??? At least I think that's why we do this.  */
+  idp = get_identifier (newname);
+
+  XEXP (DECL_RTL (decl), 0) =
+    gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+}
+
+/* Mark a DECL as being dllimport'd.  */
+
+static void
+mcore_mark_dllimport (tree decl)
+{
+  const char * oldname;
+  char * newname;
+  tree   idp;
+  rtx    rtlname;
+  rtx    newrtl;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  
+  if (GET_CODE (rtlname) == MEM)
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+  
+  gcc_assert (!mcore_dllexport_name_p (oldname));
+  if (mcore_dllimport_name_p (oldname))
+    return; /* Already done.  */
+
+  /* ??? One can well ask why we're making these checks here,
+     and that would be a good question.  */
+
+  /* Imported variables can't be initialized.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && !DECL_VIRTUAL_P (decl)
+      && DECL_INITIAL (decl))
+    {
+      error ("initialized variable %q+D is marked dllimport", decl);
+      return;
+    }
+  
+  /* `extern' needn't be specified with dllimport.
+     Specify `extern' now and hope for the best.  Sigh.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      /* ??? Is this test for vtables needed?  */
+      && !DECL_VIRTUAL_P (decl))
+    {
+      DECL_EXTERNAL (decl) = 1;
+      TREE_PUBLIC (decl) = 1;
+    }
+
+  newname = XALLOCAVEC (char, strlen (oldname) + 11);
+  sprintf (newname, "@i.__imp_%s", oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  /* ??? At least I think that's why we do this.  */
+  idp = get_identifier (newname);
+
+  newrtl = gen_rtx_MEM (Pmode,
+		    gen_rtx_SYMBOL_REF (Pmode,
+			     IDENTIFIER_POINTER (idp)));
+  XEXP (DECL_RTL (decl), 0) = newrtl;
+}
+
+static int
+mcore_dllexport_p (tree decl)
+{
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+
+  return lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)) != 0;
+}
+
+static int
+mcore_dllimport_p (tree decl)
+{
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+
+  return lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)) != 0;
+}
+
+/* We must mark dll symbols specially.  Definitions of dllexport'd objects
+   install some info in the .drective (PE) or .exports (ELF) sections.  */
+
+static void
+mcore_encode_section_info (tree decl, rtx rtl ATTRIBUTE_UNUSED, int first ATTRIBUTE_UNUSED)
+{
+  /* Mark the decl so we can tell from the rtl whether the object is
+     dllexport'd or dllimport'd.  */
+  if (mcore_dllexport_p (decl))
+    mcore_mark_dllexport (decl);
+  else if (mcore_dllimport_p (decl))
+    mcore_mark_dllimport (decl);
+  
+  /* It might be that DECL has already been marked as dllimport, but
+     a subsequent definition nullified that.  The attribute is gone
+     but DECL_RTL still has @i.__imp_foo.  We need to remove that.  */
+  else if ((TREE_CODE (decl) == FUNCTION_DECL
+	    || TREE_CODE (decl) == VAR_DECL)
+	   && DECL_RTL (decl) != NULL_RTX
+	   && GET_CODE (DECL_RTL (decl)) == MEM
+	   && GET_CODE (XEXP (DECL_RTL (decl), 0)) == MEM
+	   && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF
+	   && mcore_dllimport_name_p (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0)))
+    {
+      const char * oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0);
+      tree idp = get_identifier (oldname + 9);
+      rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+
+      XEXP (DECL_RTL (decl), 0) = newrtl;
+
+      /* We previously set TREE_PUBLIC and DECL_EXTERNAL.
+	 ??? We leave these alone for now.  */
+    }
+}
+
+/* Undo the effects of the above.  */
+
+static const char *
+mcore_strip_name_encoding (const char * str)
+{
+  return str + (str[0] == '@' ? 3 : 0);
+}
+
+/* MCore specific attribute support.
+   dllexport - for exporting a function/variable that will live in a dll
+   dllimport - for importing a function/variable from a dll
+   naked     - do not create a function prologue/epilogue.  */
+
+/* Handle a "naked" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+mcore_handle_naked_attribute (tree * node, tree name, tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* ??? It looks like this is PE specific?  Oh well, this is what the
+   old code did as well.  */
+
+static void
+mcore_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
+{
+  int len;
+  const char * name;
+  char * string;
+  const char * prefix;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  
+  /* Strip off any encoding in name.  */
+  name = (* targetm.strip_name_encoding) (name);
+
+  /* The object is put in, for example, section .text$foo.
+     The linker will then ultimately place them in .text
+     (everything from the $ on is stripped).  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    prefix = ".text$";
+  /* For compatibility with EPOC, we ignore the fact that the
+     section might have relocs against it.  */
+  else if (decl_readonly_section (decl, 0))
+    prefix = ".rdata$";
+  else
+    prefix = ".data$";
+  
+  len = strlen (name) + strlen (prefix);
+  string = XALLOCAVEC (char, len + 1);
+  
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+int
+mcore_naked_function_p (void)
+{
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE;
+}
+
+static bool
+mcore_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
+}
+
+#ifdef OBJECT_FORMAT_ELF
+static void
+mcore_asm_named_section (const char *name, 
+			 unsigned int flags ATTRIBUTE_UNUSED,
+			 tree decl ATTRIBUTE_UNUSED)
+{
+  fprintf (asm_out_file, "\t.section %s\n", name);
+}
+#endif /* OBJECT_FORMAT_ELF */
+
+/* Worker function for TARGET_ASM_EXTERNAL_LIBCALL.  */
+
+static void
+mcore_external_libcall (rtx fun)
+{
+  fprintf (asm_out_file, "\t.import\t");
+  assemble_name (asm_out_file, XSTR (fun, 0));
+  fprintf (asm_out_file, "\n");
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+mcore_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > 2 * UNITS_PER_WORD);
+}
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.
+   Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+
+   On the MCore, the trampoline looks like:
+   	lrw	r1,  function
+     	lrw	r13, area
+   	jmp	r13
+   	or	r0, r0
+    .literals                                                */
+
+static void
+mcore_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.short	0x7102\n");
+  fprintf (f, "\t.short	0x7d02\n");
+  fprintf (f, "\t.short	0x00cd\n");
+  fprintf (f, "\t.short	0x1e00\n");
+  fprintf (f, "\t.long	0\n");
+  fprintf (f, "\t.long	0\n");
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+mcore_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P
+
+   On the MCore, allow anything but a double.  */
+
+static bool
+mcore_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return GET_CODE (x) != CONST_DOUBLE;
+}
diff --git a/gcc-4.9/gcc/config/mcore/mcore.h b/gcc-4.9/gcc/config/mcore/mcore.h
new file mode 100644
index 000000000..4f12febd7
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/mcore.h
@@ -0,0 +1,839 @@
+/* Definitions of target machine for GNU compiler,
+   for Motorola M*CORE Processor.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MCORE_H
+#define GCC_MCORE_H
+
+/* RBE: need to move these elsewhere.  */
+#undef	LIKE_PPC_ABI 
+#define	MCORE_STRUCT_ARGS
+/* RBE: end of "move elsewhere".  */
+
+/* Run-time Target Specification.  */
+#define TARGET_MCORE
+
+/* Get tree.c to declare a target-specific specialization of
+   merge_decl_attributes.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+#define TARGET_CPU_CPP_BUILTINS()					  \
+  do									  \
+    {									  \
+      builtin_define ("__mcore__");					  \
+      builtin_define ("__MCORE__");					  \
+      if (TARGET_LITTLE_END)						  \
+        builtin_define ("__MCORELE__");					  \
+      else								  \
+        builtin_define ("__MCOREBE__");					  \
+      if (TARGET_M340)							  \
+        builtin_define ("__M340__");					  \
+      else								  \
+        builtin_define ("__M210__");					  \
+    }									  \
+  while (0)
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{m210:%{mlittle-endian:%ethe m210 does not have little endian support}}"
+
+/* We don't have a -lg library, so don't put it in the list.  */
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!shared: %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+#undef	ASM_SPEC
+#define	ASM_SPEC "%{mbig-endian:-EB} %{m210:-cpu=210 -EB}"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{m210:-EB} -X"
+
+#define TARGET_DEFAULT	\
+  (MASK_HARDLIT		\
+   | MASK_DIV		\
+   | MASK_RELAX_IMM	\
+   | MASK_M340		\
+   | MASK_LITTLE_END)
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mlittle-endian", "m340" }
+#endif
+
+/* The ability to have 4 byte alignment is being suppressed for now.
+   If this ability is reenabled, you must disable the definition below
+   *and* edit t-mcore to enable multilibs for 4 byte alignment code.  */
+#undef TARGET_8ALIGN
+#define TARGET_8ALIGN 1
+
+extern char * mcore_current_function_name;
+ 
+/* The MCore ABI says that bitfields are unsigned by default.  */
+#define CC1_SPEC "-funsigned-bitfields"
+
+/* Target machine storage Layout.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT         \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    {						\
+      (MODE) = SImode;				\
+      (UNSIGNEDP) = 1;				\
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (! TARGET_LITTLE_END)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (! TARGET_LITTLE_END)
+
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD	4
+
+/* A C expression for the size in bits of the type `long long' on the
+   target machine.  If you don't define this, the default is two
+   words.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY  	32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY  (TARGET_8ALIGN ? 64 : 32)
+
+/* Largest increment in UNITS we allow the stack to grow in a single operation.  */
+#define STACK_UNITS_MAXSTEP  4096
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY  ((TARGET_OVERALIGN_FUNC) ? 32 : 16)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT  (TARGET_8ALIGN ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Every structures size must be a multiple of 8 bits.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Look at the fundamental type that is used for a bit-field and use 
+   that to impose alignment on the enclosing structure.
+   struct s {int a:8}; should have same alignment as "int", not "char".  */
+#define	PCC_BITFIELD_TYPE_MATTERS	1
+
+/* Largest integer machine mode for structures.  If undefined, the default
+   is GET_MODE_SIZE(DImode).  */
+#define MAX_FIXED_MODE_SIZE 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+     
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Standard register usage.  */
+
+/* Register allocation for our first guess 
+
+	r0		stack pointer
+	r1		scratch, target reg for xtrb?
+	r2-r7		arguments.
+	r8-r14		call saved
+	r15		link register
+	ap		arg pointer (doesn't really exist, always eliminated)
+	c               c bit
+	fp		frame pointer (doesn't really exist, always eliminated)
+	x19		two control registers.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   MCore has 16 integer registers and 2 control registers + the arg
+   pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 20
+
+#define R1_REG  1	/* Where literals are forced.  */
+#define LK_REG	15	/* Overloaded on general register.  */
+#define AP_REG  16	/* Fake arg pointer register.  */
+/* RBE: mcore.md depends on CC_REG being set to 17.  */
+#define CC_REG	17	/* Can't name it C_REG.  */
+#define FP_REG  18	/* Fake frame pointer register.  */
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+
+#undef PC_REGNUM /* Define this if the program counter is overloaded on a register.  */
+#define STACK_POINTER_REGNUM 0 /* Register to use for pushing function arguments.  */
+#define FRAME_POINTER_REGNUM 8 /* When we need FP, use r8.  */
+
+/* The assembler's names for the registers.  RFP need not always be used as
+   the Real framepointer; it can also be used as a normal general register.
+   Note that the name `fp' is horribly misleading since `fp' is in fact only
+   the argument-and-return-context pointer.  */
+#define REGISTER_NAMES  				\
+{				                   	\
+  "sp", "r1", "r2",  "r3",  "r4",  "r5",  "r6",  "r7", 	\
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",	\
+  "apvirtual",  "c", "fpvirtual", "x19" \
+}
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS  \
+ /*  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 r11 r12 r13 r14 r15 ap  c  fp x19 */ \
+   { 1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1, 1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+/* RBE: r15 {link register} not available across calls,
+   But we don't mark it that way here....  */
+#define CALL_USED_REGISTERS \
+ /*  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 r11 r12 r13 r14 r15 ap  c   fp x19 */ \
+   { 1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1, 1}
+
+/* The order in which register should be allocated.  */
+#define REG_ALLOC_ORDER  \
+ /* r7  r6  r5  r4  r3  r2  r15 r14 r13 r12 r11 r10  r9  r8  r1  r0  ap  c   fp x19*/ \
+  {  7,  6,  5,  4,  3,  2,  15, 14, 13, 12, 11, 10,  9,  8,  1,  0, 16, 17, 18, 19}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the MCore regs are UNITS_PER_WORD bits wide; */
+#define HARD_REGNO_NREGS(REGNO, MODE)  \
+   (((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   We may keep double values in even registers.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)  \
+  ((TARGET_8ALIGN && GET_MODE_SIZE (MODE) > UNITS_PER_WORD) ? (((REGNO) & 1) == 0) : (REGNO < 18))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2) || GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the MCore.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.  */
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	16
+
+/* Register in which the static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	1
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+#define ELIMINABLE_REGS				\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM},}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = mcore_initial_elimination_offset (FROM, TO)
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The MCore has only general registers. There are
+   also some special purpose registers: the T bit register, the
+   procedure Link and the Count Registers.  */
+enum reg_class
+{
+  NO_REGS,
+  ONLYR1_REGS,
+  LRW_REGS,
+  GENERAL_REGS,
+  C_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES  \
+{			\
+  "NO_REGS",		\
+  "ONLYR1_REGS",	\
+  "LRW_REGS",		\
+  "GENERAL_REGS",	\
+  "C_REGS",		\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+/* ??? STACK_POINTER_REGNUM should be excluded from LRW_REGS.  */
+#define REG_CLASS_CONTENTS      	\
+{					\
+  {0x000000},  /* NO_REGS       */	\
+  {0x000002},  /* ONLYR1_REGS   */	\
+  {0x007FFE},  /* LRW_REGS      */	\
+  {0x01FFFF},  /* GENERAL_REGS  */	\
+  {0x020000},  /* C_REGS        */	\
+  {0x0FFFFF}   /* ALL_REGS      */	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) ((REGNO) < FIRST_PSEUDO_REGISTER ? regno_reg_class[REGNO] : NO_REGS)
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+ 
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS  NO_REGS
+#define BASE_REG_CLASS	 GENERAL_REGS
+
+/* Convenience wrappers around insn_const_int_ok_for_constraint.  */
+#define CONST_OK_FOR_I(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_I)
+#define CONST_OK_FOR_J(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_J)
+#define CONST_OK_FOR_L(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_L)
+#define CONST_OK_FOR_K(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_K)
+#define CONST_OK_FOR_M(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_M)
+#define CONST_OK_FOR_N(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_N)
+#define CONST_OK_FOR_O(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_O)
+#define CONST_OK_FOR_P(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_P)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+#define PREFERRED_RELOAD_CLASS(X, CLASS) mcore_reload_class (X, CLASS)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+  mcore_secondary_reload_class (CLASS, MODE, X)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS. 
+
+   On MCore this is the size of MODE in words.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)  \
+     (ROUND_ADVANCE (GET_MODE_SIZE (MODE)))
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define the number of register that can hold parameters.
+   These two macros are used only in other macro definitions below.  */
+#define NPARM_REGS 6
+#define FIRST_PARM_REG 2
+#define FIRST_RET_REG 2
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD  
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If defined, the maximum amount of space required for outgoing arguments
+   will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC)  mcore_function_value (VALTYPE, FUNC)
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, FIRST_RET_REG)
+
+/* 1 if N is a possible register number for a function value.
+   On the MCore, only r4 can return results.  */
+#define FUNCTION_VALUE_REGNO_P(REGNO)  ((REGNO) == FIRST_RET_REG)
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(REGNO)  \
+  ((REGNO) >= FIRST_PARM_REG && (REGNO) < (NPARM_REGS + FIRST_PARM_REG))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On MCore, this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus NARGREGS or more means all following args should go on the stack.  */
+#define CUMULATIVE_ARGS  int
+
+#define ROUND_ADVANCE(SIZE)	\
+  ((SIZE + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode 
+   MODE. 
+   
+   We round to an even reg for things larger than a word.  */
+#define ROUND_REG(X, MODE) 				\
+  ((TARGET_8ALIGN 					\
+   && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD) 	\
+   ? ((X) + ((X) & 1)) : (X))
+
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On MCore, the offset always starts at 0: the first parm reg is always
+   the same reg.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  ((CUM) = 0)
+
+/* Call the function profiler with a given profile label.  */
+#define FUNCTION_PROFILER(STREAM,LABELNO)		\
+{							\
+  fprintf (STREAM, "	trap	1\n");			\
+  fprintf (STREAM, "	.align	2\n");			\
+  fprintf (STREAM, "	.long	LP%d\n", (LABELNO));	\
+}
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 0
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  12
+
+/* Alignment required for a trampoline in bits.  */
+#define TRAMPOLINE_ALIGNMENT  32
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define REGNO_OK_FOR_BASE_P(REGNO)  \
+  ((REGNO) < AP_REG || (unsigned) reg_renumber[(REGNO)] < AP_REG)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)   0
+
+/* Maximum number of registers that can appear in a valid memory 
+   address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) 	 (GET_CODE (X) == LABEL_REF)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.  */
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+    	(REGNO (X) <= 16 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X)	0
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X)	\
+	REGNO_OK_FOR_BASE_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X)	0
+
+#endif
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS.  */
+#define BASE_REGISTER_RTX_P(X)  \
+  (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))
+
+#define INDEX_REGISTER_RTX_P(X)  \
+  (GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X))
+
+
+/* Jump to LABEL if X is a valid address RTX.  This must also take
+   REG_OK_STRICT into account when deciding about valid registers, but it uses
+   the above macros so we are in luck.  
+ 
+   Allow  REG
+	  REG+disp 
+
+   A legitimate index for a QI is 0..15, for HI is 0..30, for SI is 0..60,
+   and for DI is 0..56 because we use two SI loads, etc.  */
+#define GO_IF_LEGITIMATE_INDEX(MODE, REGNO, OP, LABEL)			\
+  do									\
+    {									\
+      if (GET_CODE (OP) == CONST_INT) 					\
+        {								\
+	  if (GET_MODE_SIZE (MODE) >= 4					\
+	      && (((unsigned HOST_WIDE_INT) INTVAL (OP)) % 4) == 0	\
+	      &&  ((unsigned HOST_WIDE_INT) INTVAL (OP))		\
+	      <= (unsigned HOST_WIDE_INT) 64 - GET_MODE_SIZE (MODE))	\
+	    goto LABEL;							\
+	  if (GET_MODE_SIZE (MODE) == 2 				\
+	      && (((unsigned HOST_WIDE_INT) INTVAL (OP)) % 2) == 0	\
+	      &&  ((unsigned HOST_WIDE_INT) INTVAL (OP)) <= 30)		\
+	    goto LABEL;							\
+	  if (GET_MODE_SIZE (MODE) == 1 				\
+	      && ((unsigned HOST_WIDE_INT) INTVAL (OP)) <= 15)		\
+	    goto LABEL;							\
+        }								\
+    }									\
+  while (0)
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)                  \
+{ 								  \
+  if (BASE_REGISTER_RTX_P (X))					  \
+    goto LABEL;							  \
+  else if (GET_CODE (X) == PLUS || GET_CODE (X) == LO_SUM) 	  \
+    {								  \
+      rtx xop0 = XEXP (X,0);					  \
+      rtx xop1 = XEXP (X,1);					  \
+      if (BASE_REGISTER_RTX_P (xop0))				  \
+	GO_IF_LEGITIMATE_INDEX (MODE, REGNO (xop0), xop1, LABEL); \
+      if (BASE_REGISTER_RTX_P (xop1))				  \
+	GO_IF_LEGITIMATE_INDEX (MODE, REGNO (xop1), xop0, LABEL); \
+    }								  \
+}								   
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* 'char' is signed by default.  */
+#define DEFAULT_SIGNED_CHAR  0
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS TARGET_SLOW_BYTES
+
+/* Shift counts are truncated to 6-bits (0 to 63) instead of the expected
+   5-bits, so we can not define SHIFT_COUNT_TRUNCATED to true for this
+   target.  */
+#define SHIFT_COUNT_TRUNCATED 0
+
+/* All integers have the same format so truncation is easy.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC)  1
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/* Why is this defined??? -- dac */
+#define NO_FUNCTION_CSE 1
+
+/* The machine modes of pointers and functions.  */
+#define Pmode          SImode
+#define FUNCTION_MODE  Pmode
+
+/* Compute extra cost of moving data between one register class
+   and another.  All register moves are cheap.  */
+#define REGISTER_MOVE_COST(MODE, SRCCLASS, DSTCLASS) 2
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Assembler output control.  */
+#define ASM_COMMENT_START "\t//"
+
+#define ASM_APP_ON	"// inline asm begin\n"
+#define ASM_APP_OFF	"// inline asm end\n"
+
+#define FILE_ASM_OP     "\t.file\n"
+
+/* Switch to the text or data segment.  */
+#define TEXT_SECTION_ASM_OP  "\t.text"
+#define DATA_SECTION_ASM_OP  "\t.data"
+
+/* Switch into a generic section.  */
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  mcore_asm_named_section
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, LK_REG)
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)  \
+  fprintf (FILE, "\tsubi\t %s,%d\n\tstw\t %s,(%s)\n",	\
+	   reg_names[STACK_POINTER_REGNUM],		\
+	   (STACK_BOUNDARY / BITS_PER_UNIT),		\
+	   reg_names[REGNO],				\
+	   reg_names[STACK_POINTER_REGNUM])
+
+/* Length in instructions of the code output by ASM_OUTPUT_REG_PUSH.  */
+#define REG_PUSH_LENGTH 2
+
+/* This is how to output an insn to pop a register from the stack.  */
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)  \
+  fprintf (FILE, "\tldw\t %s,(%s)\n\taddi\t %s,%d\n",	\
+	   reg_names[REGNO],				\
+	   reg_names[STACK_POINTER_REGNUM],		\
+	   reg_names[STACK_POINTER_REGNUM],		\
+	   (STACK_BOUNDARY / BITS_PER_UNIT))
+
+  
+/* Output a reference to a label.  */
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)  \
+  fprintf (STREAM, "%s%s", USER_LABEL_PREFIX, \
+	   (* targetm.strip_name_encoding) (NAME))
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align\t%d\n", LOG)
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+#define MULTIPLE_SYMBOL_SPACES 1
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* A pair of macros to output things for the callgraph data.
+   VALUE means (to the tools that reads this info later):
+  	0 a call from src to dst
+  	1 the call is special (e.g. dst is "unknown" or "alloca")
+  	2 the call is special (e.g., the src is a table instead of routine)
+  
+   Frame sizes are augmented with timestamps to help later tools 
+   differentiate between static entities with same names in different
+   files.  */
+extern long mcore_current_compilation_timestamp;
+#define	ASM_OUTPUT_CG_NODE(FILE,SRCNAME,VALUE)				\
+  do									\
+    {									\
+      if (mcore_current_compilation_timestamp == 0)			\
+        mcore_current_compilation_timestamp = time (0);			\
+      fprintf ((FILE),"\t.equ\t__$frame$size$_%s_$_%08lx,%d\n",		\
+             (SRCNAME), mcore_current_compilation_timestamp, (VALUE));	\
+    }									\
+  while (0)
+
+#define	ASM_OUTPUT_CG_EDGE(FILE,SRCNAME,DSTNAME,VALUE)		\
+  do								\
+    {								\
+      fprintf ((FILE),"\t.equ\t__$function$call$_%s_$_%s,%d\n",	\
+             (SRCNAME), (DSTNAME), (VALUE));			\
+    }								\
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.export\t"
+
+/* The prefix to add to user-visible assembler symbols.  */
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* Make an internal label into a string.  */
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
+  sprintf (STRING, "*.%s%ld", PREFIX, (long) NUM)
+
+/* Jump tables must be 32 bit aligned.  */
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(STREAM,PREFIX,NUM,TABLE) \
+  fprintf (STREAM, "\t.align 2\n.%s%d:\n", PREFIX, NUM);
+
+/* Output a relative address. Not needed since jump tables are absolute
+   but we must define it anyway.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)  \
+  fputs ("- - - ASM_OUTPUT_ADDR_DIFF_ELT called!\n", STREAM)
+
+/* Output an element of a dispatch table.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  \
+    fprintf (STREAM, "\t.long\t.L%d\n", VALUE)
+
+/* Output various types of constants.  */
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.fill %d, 1\n", (int)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol, with alignment information.  */
+/* XXX - for now we ignore the alignment.  */     
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)	\
+  do								\
+    {								\
+      if (mcore_dllexport_name_p (NAME))			\
+	MCORE_EXPORT_NAME (FILE, NAME)				\
+      if (! mcore_dllimport_name_p (NAME))			\
+        {							\
+          fputs ("\t.comm\t", FILE);				\
+          assemble_name (FILE, NAME);				\
+          fprintf (FILE, ",%lu\n", (unsigned long)(SIZE));	\
+        }							\
+    }								\
+  while (0)
+
+/* This says how to output an assembler line
+   to define a local common symbol....  */
+#undef  ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  (fputs ("\t.lcomm\t", FILE),				\
+  assemble_name (FILE, NAME),				\
+  fprintf (FILE, ",%d\n", (int)SIZE))
+
+/* ... and how to define a local common symbol whose alignment
+   we wish to specify.  ALIGN comes in as bits, we have to turn
+   it into bytes.  */
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      fputs ("\t.bss\t", (FILE));					\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ",%d,%d\n", (int)(SIZE), (ALIGN) / BITS_PER_UNIT);\
+    }									\
+  while (0)
+
+#endif /* ! GCC_MCORE_H */
diff --git a/gcc-4.9/gcc/config/mcore/mcore.md b/gcc-4.9/gcc/config/mcore/mcore.md
new file mode 100644
index 000000000..a65747d38
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/mcore.md
@@ -0,0 +1,3057 @@
+;;  Machine description the Motorola MCore
+;;  Copyright (C) 1993-2014 Free Software Foundation, Inc.
+;;  Contributed by Motorola.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+; Target CPU.
+
+(define_attr "type" "brcond,branch,jmp,load,store,move,alu,shift"
+  (const_string "alu"))
+
+;; If a branch destination is within -2048..2047 bytes away from the
+;; instruction it can be 2 bytes long.  All other conditional branches
+;; are 10 bytes long, and all other unconditional branches are 8 bytes.
+;;
+;; the assembler handles the long-branch span case for us if we use
+;; the "jb*" mnemonics for jumps/branches. This pushes the span
+;; calculations and the literal table placement into the assembler,
+;; where their interactions can be managed in a single place.
+
+;; All MCORE instructions are two bytes long.
+
+(define_attr "length" "" (const_int 2))
+
+;; Scheduling.  We only model a simple load latency.
+(define_insn_reservation "any_insn" 1
+			 (eq_attr "type" "!load")
+			 "nothing")
+(define_insn_reservation "memory" 2
+			 (eq_attr "type" "load")
+			 "nothing")
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; -------------------------------------------------------------------------
+;; Test and bit test
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI 17) 
+        (sign_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                         (const_int 1)
+                         (match_operand:SI 1 "mcore_literal_K_operand" "K")))]
+  ""
+  "btsti	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (reg:SI 17) 
+        (zero_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                         (const_int 1)
+                         (match_operand:SI 1 "mcore_literal_K_operand" "K")))]
+  ""
+  "btsti	%0,%1"
+  [(set_attr "type" "shift")])
+
+;;; This is created by combine.
+(define_insn ""
+  [(set (reg:CC 17)
+	(ne:CC (zero_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+				(const_int 1)
+				(match_operand:SI 1 "mcore_literal_K_operand" "K"))
+	       (const_int 0)))]
+  ""
+  "btsti	%0,%1"
+  [(set_attr "type" "shift")])
+
+
+;; Created by combine from conditional patterns below (see sextb/btsti rx,31)
+
+(define_insn ""
+  [(set (reg:CC 17)
+        (ne:CC (lshiftrt:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                            (const_int 7))
+               (const_int 0)))]
+  "GET_CODE(operands[0]) == SUBREG && 
+      GET_MODE(SUBREG_REG(operands[0])) == QImode"
+  "btsti	%0,7"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (reg:CC 17)
+        (ne:CC (lshiftrt:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                            (const_int 15))
+               (const_int 0)))]
+  "GET_CODE(operands[0]) == SUBREG && 
+      GET_MODE(SUBREG_REG(operands[0])) == HImode"
+  "btsti	%0,15"
+  [(set_attr "type" "shift")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (eq:CC (zero_extract:SI
+				  (match_operand:SI 0 "mcore_arith_reg_operand" "")
+				  (const_int 1)
+				  (match_operand:SI 1 "mcore_literal_K_operand" ""))
+				 (const_int 0))
+			  (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  [(set (reg:CC 17)
+	(zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))]
+  "")
+
+(define_split
+  [(set (pc)
+	(if_then_else (eq (ne:CC (zero_extract:SI
+				  (match_operand:SI 0 "mcore_arith_reg_operand" "")
+				  (const_int 1)
+				  (match_operand:SI 1 "mcore_literal_K_operand" ""))
+				 (const_int 0))
+			  (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  [(set (reg:CC 17)
+	(zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))]
+  "")
+
+;; XXX - disabled by nickc because it fails on libiberty/fnmatch.c
+;;
+;; ; Experimental - relax immediates for and, andn, or, and tst to allow
+;; ;    any immediate value (or an immediate at all -- or, andn, & tst).  
+;; ;    This is done to allow bit field masks to fold together in combine.
+;; ;    The reload phase will force the immediate into a register at the
+;; ;    very end.  This helps in some cases, but hurts in others: we'd
+;; ;    really like to cse these immediates.  However, there is a phase
+;; ;    ordering problem here.  cse picks up individual masks and cse's
+;; ;    those, but not folded masks (cse happens before combine).  It's
+;; ;    not clear what the best solution is because we really want cse
+;; ;    before combine (leaving the bit field masks alone).   To pick up
+;; ;    relaxed immediates use -mrelax-immediates.  It might take some
+;; ;    experimenting to see which does better (i.e. regular imms vs.
+;; ;    arbitrary imms) for a particular code.   BRC
+;; 
+;; (define_insn ""
+;;   [(set (reg:CC 17)
+;; 	(ne:CC (and:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+;; 		       (match_operand:SI 1 "mcore_arith_any_imm_operand" "rI"))
+;; 	       (const_int 0)))]
+;;   "TARGET_RELAX_IMM"
+;;   "tst	%0,%1")
+;; 
+;; (define_insn ""
+;;   [(set (reg:CC 17)
+;; 	(ne:CC (and:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+;; 		       (match_operand:SI 1 "mcore_arith_M_operand" "r"))
+;; 	       (const_int 0)))]
+;;   "!TARGET_RELAX_IMM"
+;;   "tst	%0,%1")
+
+(define_insn ""
+  [(set (reg:CC 17)
+	(ne:CC (and:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+		       (match_operand:SI 1 "mcore_arith_M_operand" "r"))
+	       (const_int 0)))]
+  ""
+  "tst	%0,%1")
+
+
+(define_split 
+  [(parallel[
+      (set (reg:CC 17)
+           (ne:CC (ne:SI (leu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "")
+                                 (match_operand:SI 1 "mcore_arith_reg_operand" ""))
+                         (const_int 0))
+                  (const_int 0)))
+      (clobber (match_operand:CC 2 "mcore_arith_reg_operand" ""))])]
+  ""
+  [(set (reg:CC 17) (ne:SI (match_dup 0) (const_int 0)))
+   (set (reg:CC 17) (leu:CC (match_dup 0) (match_dup 1)))])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "decne_t"
+  [(set (reg:CC 17) (ne:CC (plus:SI (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+				    (const_int -1))		  
+			   (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  ""
+  "decne	%0")
+
+;; The combiner seems to prefer the following to the former.
+;;
+(define_insn ""
+  [(set (reg:CC 17) (ne:CC (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+			   (const_int 1)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  ""
+  "decne	%0")
+
+(define_insn "cmpnesi_t"
+  [(set (reg:CC 17) (ne:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmpne	%0,%1")
+
+(define_insn "cmpneisi_t"
+  [(set (reg:CC 17) (ne:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_K_operand" "K")))]
+  ""
+  "cmpnei	%0,%1")
+
+(define_insn "cmpgtsi_t"
+  [(set (reg:CC 17) (gt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmplt	%1,%0")
+
+(define_insn ""
+  [(set (reg:CC 17) (gt:CC (plus:SI
+			    (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+			    (const_int -1))
+			   (const_int 0)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "decgt	%0")
+
+(define_insn "cmpltsi_t"
+  [(set (reg:CC 17) (lt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmplt	%0,%1")
+
+; cmplti is 1-32
+(define_insn "cmpltisi_t"
+  [(set (reg:CC 17) (lt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_J_operand" "J")))]
+  ""
+  "cmplti	%0,%1")
+
+; covers cmplti x,0
+(define_insn ""
+  [(set (reg:CC 17) (lt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                         (const_int 0)))]
+  ""
+  "btsti	%0,31")
+
+(define_insn ""
+  [(set (reg:CC 17) (lt:CC (plus:SI
+			    (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+			    (const_int -1))
+			   (const_int 0)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "declt	%0")
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeusi_t"
+  [(set (reg:CC 17) (geu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			    (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmphs	%0,%1")
+
+(define_insn "cmpgeusi_0"
+  [(set (reg:CC 17) (geu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			    (const_int 0)))]
+  ""
+  "cmpnei	%0, 0")
+
+(define_insn "cmpleusi_t"
+  [(set (reg:CC 17) (leu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			    (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmphs	%1,%0")
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+;; Logical AND clearing a single bit.  andsi3 knows that we have this
+;; pattern and allows the constant literal pass through.
+;;
+
+;; RBE 2/97: don't need this pattern any longer...
+;; RBE: I don't think we need both "S" and exact_log2() clauses.
+;;(define_insn ""
+;;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;;	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;;		(match_operand:SI 2 "const_int_operand" "S")))]
+;;  "mcore_arith_S_operand (operands[2])"
+;;  "bclri	%0,%Q2")
+;;
+
+(define_insn "andnsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))
+		(match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "andn	%0,%1")
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0
+      && ! mcore_arith_S_operand (operands[2]))
+    {
+      HOST_WIDE_INT not_value = ~ INTVAL (operands[2]);
+
+      if (   CONST_OK_FOR_I (not_value)
+          || CONST_OK_FOR_M (not_value)
+	  || CONST_OK_FOR_N (not_value))
+	{
+	  operands[2] = copy_to_mode_reg (SImode, GEN_INT (not_value));
+	  emit_insn (gen_andnsi3 (operands[0], operands[2], operands[1]));
+	  DONE;
+	}
+    }
+
+  if (! mcore_arith_K_S_operand (operands[2], SImode))
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0,r,0")
+		(match_operand:SI 2 "mcore_arith_any_imm_operand" "r,K,0,S")))]
+  "TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"and	%0,%2\";
+     case 1: return \"andi	%0,%2\";
+     case 2: return \"and	%0,%1\";
+     /* case -1: return \"bclri	%0,%Q2\";	 will not happen */
+     case 3: return mcore_output_bclri (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+;; This was the old "S" which was "!(2^n)" */
+;; case -1: return \"bclri	%0,%Q2\";	 will not happen */
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0,r,0")
+		(match_operand:SI 2 "mcore_arith_K_S_operand" "r,K,0,S")))]
+  "!TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"and	%0,%2\";
+     case 1: return \"andi	%0,%2\";
+     case 2: return \"and	%0,%1\";
+     case 3: return mcore_output_bclri (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+;(define_insn "iorsi3"
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;		(match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+;  ""
+;  "or	%0,%2")
+
+; need an expand to resolve ambiguity betw. the two iors below.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+   if (! mcore_arith_M_operand (operands[2], SImode))
+      operands[2] = copy_to_mode_reg (SImode, operands[2]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		(match_operand:SI 2 "mcore_arith_any_imm_operand" "r,M,T")))]
+  "TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"or	%0,%2\";
+     case 1: return \"bseti	%0,%P2\";
+     case 2: return mcore_output_bseti (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		(match_operand:SI 2 "mcore_arith_M_operand" "r,M,T")))]
+  "!TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"or	%0,%2\";
+     case 1: return \"bseti	%0,%P2\";
+     case 2: return mcore_output_bseti (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+;(define_insn ""
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+;		(match_operand:SI 2 "const_int_operand" "M")))]
+;  "exact_log2 (INTVAL (operands[2])) >= 0"
+;  "bseti	%0,%P2")
+
+;(define_insn ""
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+;		(match_operand:SI 2 "const_int_operand" "i")))]
+;  "mcore_num_ones (INTVAL (operands[2])) < 3"
+;  "* return mcore_output_bseti (operands[0], INTVAL (operands[2]));")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(xor:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+		(match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "xor	%0,%2")
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+;; Only allow these if the shift count is a convenient constant.
+(define_expand "rotlsi3"
+  [(set (match_operand:SI            0 "mcore_arith_reg_operand" "")
+	(rotate:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "if (! mcore_literal_K_operand (operands[2], SImode))
+	 FAIL;
+  ")
+
+;; We can only do constant rotates, which is what this pattern provides.
+;; The combiner will put it together for us when we do:
+;;	(x << N) | (x >> (32 - N))
+(define_insn ""
+  [(set (match_operand:SI              0 "mcore_arith_reg_operand" "=r")
+	(rotate:SI (match_operand:SI   1 "mcore_arith_reg_operand"  "0")
+		     (match_operand:SI 2 "mcore_literal_K_operand"  "K")))]
+  ""
+  "rotli	%0,%2"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0")
+		   (match_operand:SI 2 "mcore_arith_K_operand_not_0" "r,K")))]
+  ""
+  "@
+	lsl	%0,%2
+	lsli	%0,%2"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(ashift:SI (const_int 1)
+		   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "bgenr	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0")
+		     (match_operand:SI 2 "mcore_arith_K_operand_not_0" "r,K")))]
+  ""
+  "@
+	asr	%0,%2
+	asri	%0,%2"
+  [(set_attr "type" "shift")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0")
+		     (match_operand:SI 2 "mcore_arith_K_operand_not_0" "r,K")))]
+  ""
+  "@
+	lsr	%0,%2
+	lsri	%0,%2"
+  [(set_attr "type" "shift")])
+
+;(define_expand "ashldi3"
+;  [(parallel[(set (match_operand:DI 0 "mcore_arith_reg_operand" "")
+;		  (ashift:DI (match_operand:DI 1 "mcore_arith_reg_operand" "")
+;			     (match_operand:DI 2 "immediate_operand" "")))
+;
+;	     (clobber (reg:CC 17))])]
+;	    
+;  ""
+;  "
+;{
+;  if (GET_CODE (operands[2]) != CONST_INT
+;      || INTVAL (operands[2]) != 1)
+;    FAIL;
+;}")
+;
+;(define_insn ""
+;  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r")
+;	(ashift:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+;		     (const_int 1)))
+;   (clobber (reg:CC 17))]
+;  ""
+;  "lsli	%R0,0\;rotli	%0,0"
+;  [(set_attr "length" "4") (set_attr "type" "shift")])
+
+;; -------------------------------------------------------------------------
+;; Index instructions
+;; -------------------------------------------------------------------------
+;; The second of each set of patterns is borrowed from the alpha.md file.
+;; These variants of the above insns can occur if the second operand
+;; is the frame pointer.  This is a kludge, but there doesn't
+;; seem to be a way around it.  Only recognize them while reloading.
+
+;; We must use reload_operand for some operands in case frame pointer
+;; elimination put a MEM with invalid address there.  Otherwise,
+;; the result of the substitution will not match this pattern, and reload
+;; will not be able to correctly fix the result.
+
+;; indexing longlongs or doubles (8 bytes)
+
+(define_insn "indexdi_t"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+			  (const_int 8))
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "*
+    if (! mcore_is_same_reg (operands[1], operands[2]))
+      {
+        output_asm_insn (\"ixw\\t%0,%1\", operands);
+        output_asm_insn (\"ixw\\t%0,%1\", operands);
+      }
+    else
+      {
+        output_asm_insn (\"ixh\\t%0,%1\", operands);
+        output_asm_insn (\"ixh\\t%0,%1\", operands);
+      }
+    return \"\";
+  "
+;; if operands[1] == operands[2], the first option above is wrong! -- dac
+;; was this... -- dac
+;; ixw	%0,%1\;ixw	%0,%1"
+
+  [(set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_reload_operand" "=r,r,r")
+	(plus:SI (plus:SI (mult:SI (match_operand:SI 1 "mcore_reload_operand" "r,r,r")
+				   (const_int 8))
+			  (match_operand:SI 2 "mcore_arith_reg_operand" "0,0,0"))
+		 (match_operand:SI 3 "mcore_addsub_operand" "r,J,L")))]
+  "reload_in_progress"
+  "@
+	ixw	%0,%1\;ixw	%0,%1\;addu	%0,%3
+	ixw	%0,%1\;ixw	%0,%1\;addi	%0,%3
+	ixw	%0,%1\;ixw	%0,%1\;subi	%0,%M3"
+  [(set_attr "length" "6")])
+
+;; indexing longs (4 bytes)
+
+(define_insn "indexsi_t"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+			  (const_int 4))
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "ixw	%0,%1")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_reload_operand" "=r,r,r")
+	(plus:SI (plus:SI (mult:SI (match_operand:SI 1 "mcore_reload_operand" "r,r,r")
+				   (const_int 4))
+			  (match_operand:SI 2 "mcore_arith_reg_operand" "0,0,0"))
+		 (match_operand:SI 3 "mcore_addsub_operand" "r,J,L")))]
+  "reload_in_progress"
+  "@
+	ixw	%0,%1\;addu	%0,%3
+	ixw	%0,%1\;addi	%0,%3
+	ixw	%0,%1\;subi	%0,%M3"
+  [(set_attr "length" "4")])
+
+;; indexing shorts (2 bytes)
+
+(define_insn "indexhi_t"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+			  (const_int 2))
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "ixh	%0,%1")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_reload_operand" "=r,r,r")
+	(plus:SI (plus:SI (mult:SI (match_operand:SI 1 "mcore_reload_operand" "r,r,r")
+				   (const_int 2))
+			  (match_operand:SI 2 "mcore_arith_reg_operand" "0,0,0"))
+		 (match_operand:SI 3 "mcore_addsub_operand" "r,J,L")))]
+  "reload_in_progress"
+  "@
+	ixh	%0,%1\;addu	%0,%3
+	ixh	%0,%1\;addi	%0,%3
+	ixh	%0,%1\;subi	%0,%M3"
+  [(set_attr "length" "4")])
+
+;;
+;; Other sizes may be handy for indexing. 
+;; the tradeoffs to consider when adding these are
+;;	code size, execution time [vs. mul it is easy to win],
+;;	and register pressure -- these patterns don't use an extra
+;;	register to build the offset from the base
+;;	and whether the compiler will not come up with some other idiom.
+;;
+
+;; -------------------------------------------------------------------------
+;; Addition, Subtraction instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  /* If this is an add to the frame pointer, then accept it as is so
+     that we can later fold in the fp/sp offset from frame pointer
+     elimination.  */
+  if (flag_omit_frame_pointer
+      && GET_CODE (operands[1]) == REG
+      && (REGNO (operands[1]) == VIRTUAL_STACK_VARS_REGNUM
+	  || REGNO (operands[1]) == FRAME_POINTER_REGNUM))
+    {
+      emit_insn (gen_addsi3_fp (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  /* Convert adds to subtracts if this makes loading the constant cheaper.
+     But only if we are allowed to generate new pseudos.  */
+  if (! (reload_in_progress || reload_completed)
+      && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) < -32)
+    {
+      HOST_WIDE_INT neg_value = - INTVAL (operands[2]);
+
+      if (   CONST_OK_FOR_I (neg_value)
+	  || CONST_OK_FOR_M (neg_value)
+	  || CONST_OK_FOR_N (neg_value))
+	{
+	  operands[2] = copy_to_mode_reg (SImode, GEN_INT (neg_value));
+	  emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+    } 
+
+  if (! mcore_addsub_operand (operands[2], SImode))
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+}")
+ 
+;; RBE: for some constants which are not in the range which allows
+;; us to do a single operation, we will try a paired addi/addi instead
+;; of a movi/addi. This relieves some register pressure at the expense
+;; of giving away some potential constant reuse.
+;;
+;; RBE 6/17/97: this didn't buy us anything, but I keep the pattern
+;; for later reference
+;; 
+;; (define_insn "addsi3_i2"
+;;   [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;;      (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;;               (match_operand:SI 2 "const_int_operand" "g")))]
+;;   "GET_CODE(operands[2]) == CONST_INT
+;;    && ((INTVAL (operands[2]) > 32 && INTVAL(operands[2]) <= 64)
+;;        || (INTVAL (operands[2]) < -32 && INTVAL(operands[2]) >= -64))"
+;;   "*
+;; {
+;;    HOST_WIDE_INT n = INTVAL(operands[2]);
+;;    if (n > 0)
+;;      {
+;;        operands[2] = GEN_INT(n - 32);
+;;        return \"addi\\t%0,32\;addi\\t%0,%2\";
+;;      }
+;;    else
+;;      {
+;;        n = (-n);
+;;        operands[2] = GEN_INT(n - 32);
+;;        return \"subi\\t%0,32\;subi\\t%0,%2\";
+;;      }
+;; }"
+;;  [(set_attr "length" "4")])
+
+(define_insn "addsi3_i"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		 (match_operand:SI 2 "mcore_addsub_operand" "r,J,L")))]
+  ""
+  "@
+	addu	%0,%2
+	addi	%0,%2
+	subi	%0,%M2")
+
+;; This exists so that address computations based on the frame pointer
+;; can be folded in when frame pointer elimination occurs.  Ordinarily
+;; this would be bad because it allows insns which would require reloading,
+;; but without it, we get multiple adds where one would do.
+
+(define_insn "addsi3_fp"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		 (match_operand:SI 2 "immediate_operand" "r,J,L")))]
+  "flag_omit_frame_pointer
+   && (reload_in_progress || reload_completed || REGNO (operands[1]) == FRAME_POINTER_REGNUM)"
+  "@
+	addu	%0,%2
+	addi	%0,%2
+	subi	%0,%M2")
+
+;; RBE: for some constants which are not in the range which allows
+;; us to do a single operation, we will try a paired addi/addi instead
+;; of a movi/addi. This relieves some register pressure at the expense
+;; of giving away some potential constant reuse.
+;;
+;; RBE 6/17/97: this didn't buy us anything, but I keep the pattern
+;; for later reference
+;; 
+;; (define_insn "subsi3_i2"
+;;   [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;;      (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;;               (match_operand:SI 2 "const_int_operand" "g")))]
+;;   "TARGET_RBETEST && GET_CODE(operands[2]) == CONST_INT
+;;    && ((INTVAL (operands[2]) > 32 && INTVAL(operands[2]) <= 64)
+;;        || (INTVAL (operands[2]) < -32 && INTVAL(operands[2]) >= -64))"
+;;   "*
+;; {
+;;    HOST_WIDE_INT n = INTVAL(operands[2]);
+;;    if ( n > 0)
+;;      {
+;;        operands[2] = GEN_INT( n - 32);
+;;        return \"subi\\t%0,32\;subi\\t%0,%2\";
+;;      }
+;;    else
+;;      {
+;;        n = (-n);
+;;        operands[2] = GEN_INT(n - 32);
+;;        return \"addi\\t%0,32\;addi\\t%0,%2\";
+;;      }
+;; }"
+;;   [(set_attr "length" "4")])
+
+;(define_insn "subsi3"
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+;	(minus:SI (match_operand:SI 1 "mcore_arith_K_operand" "0,0,r,K")
+;		  (match_operand:SI 2 "mcore_arith_J_operand" "r,J,0,0")))]
+;  ""
+;  "@
+;	sub	%0,%2
+;	subi	%0,%2
+;	rsub	%0,%1
+;	rsubi	%0,%1")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+        (minus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0,r")
+                  (match_operand:SI 2 "mcore_arith_J_operand" "r,J,0")))]
+  ""
+  "@
+	subu	%0,%2
+	subi	%0,%2
+	rsub	%0,%1")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (minus:SI (match_operand:SI 1 "mcore_literal_K_operand" "K")
+                  (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "rsubi	%0,%1")
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+		 (match_operand:DI 2 "mcore_arith_reg_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+    if (TARGET_LITTLE_END)
+      return \"cmplt	%0,%0\;addc	%0,%2\;addc	%R0,%R2\";
+    return \"cmplt	%R0,%R0\;addc	%R0,%R2\;addc	%0,%2\";
+  }"
+  [(set_attr "length" "6")])
+
+;; special case for "longlong += 1"
+(define_insn ""
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		 (const_int 1)))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+   if (TARGET_LITTLE_END)
+      return \"addi	%0,1\;cmpnei %0,0\;incf	%R0\";
+    return \"addi	%R0,1\;cmpnei %R0,0\;incf	%0\";
+  }"
+  [(set_attr "length" "6")])
+
+;; special case for "longlong -= 1"
+(define_insn ""
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		 (const_int -1)))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+    if (TARGET_LITTLE_END)
+       return \"cmpnei %0,0\;decf	%R0\;subi	%0,1\";
+    return \"cmpnei %R0,0\;decf	%0\;subi	%R0,1\";
+  }"
+  [(set_attr "length" "6")])
+
+;; special case for "longlong += const_int"
+;; we have to use a register for the const_int because we don't
+;; have an unsigned compare immediate... only +/- 1 get to
+;; play the no-extra register game because they compare with 0.
+;; This winds up working out for any literal that is synthesized
+;; with a single instruction. The more complicated ones look
+;; like the get broken into subreg's to get initialized too soon
+;; for us to catch here. -- RBE 4/25/96
+;; only allow for-sure positive values.
+
+(define_insn ""
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		 (match_operand:SI 2 "const_int_operand" "r")))
+   (clobber (reg:CC 17))]
+  "GET_CODE (operands[2]) == CONST_INT
+   && INTVAL (operands[2]) > 0 && ! (INTVAL (operands[2]) & 0x80000000)"
+  "*
+{
+  gcc_assert (GET_MODE (operands[2]) == SImode);
+  if (TARGET_LITTLE_END)
+    return \"addu	%0,%2\;cmphs	%0,%2\;incf	%R0\";
+  return \"addu	%R0,%2\;cmphs	%R0,%2\;incf	%0\";
+}"
+  [(set_attr "length" "6")])
+
+;; optimize "long long" + "unsigned long"
+;; won't trigger because of how the extension is expanded upstream.
+;; (define_insn ""
+;;   [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+;; 	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+;; 		 (zero_extend:DI (match_operand:SI 2 "mcore_arith_reg_operand" "r"))))
+;;    (clobber (reg:CC 17))]
+;;   "0"
+;;   "cmplt	%R0,%R0\;addc	%R0,%2\;inct	%0"
+;;   [(set_attr "length" "6")])
+
+;; optimize "long long" + "signed long"
+;; won't trigger because of how the extension is expanded upstream.
+;; (define_insn ""
+;;   [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+;; 	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+;; 		 (sign_extend:DI (match_operand:SI 2 "mcore_arith_reg_operand" "r"))))
+;;    (clobber (reg:CC 17))]
+;;   "0"
+;;   "cmplt	%R0,%R0\;addc	%R0,%2\;inct	%0\;btsti	%2,31\;dect	%0"
+;;   [(set_attr "length" "6")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(minus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		  (match_operand:DI 2 "mcore_arith_reg_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+    if (TARGET_LITTLE_END)
+      return \"cmphs	%0,%0\;subc	%0,%2\;subc	%R0,%R2\";
+    return \"cmphs	%R0,%R0\;subc	%R0,%R2\;subc	%0,%2\";
+  }"
+  [(set_attr "length" "6")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "mult	%0,%2")
+
+;;
+;; 32/32 signed division -- added to the MCORE instruction set spring 1997
+;;
+;; Different constraints based on the architecture revision...
+;;
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (div:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  "TARGET_DIV"
+  "")
+ 
+;; MCORE Revision 1.50: restricts the divisor to be in r1. (6/97)
+;;
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (div:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+                (match_operand:SI 2 "mcore_arith_reg_operand" "b")))]
+  "TARGET_DIV"
+  "divs %0,%2")
+
+;;
+;; 32/32 signed division -- added to the MCORE instruction set spring 1997
+;;
+;; Different constraints based on the architecture revision...
+;;
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (udiv:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  "TARGET_DIV"
+  "")
+ 
+;; MCORE Revision 1.50: restricts the divisor to be in r1. (6/97)
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (udiv:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+                 (match_operand:SI 2 "mcore_arith_reg_operand" "b")))]
+  "TARGET_DIV"
+  "divu %0,%2")
+ 
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(neg:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "*
+{
+   return \"rsubi	%0,0\";
+}")
+
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(abs:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "abs	%0")
+	     
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(neg:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+{
+   if (TARGET_LITTLE_END)
+     return \"cmpnei	%0,0\\n\\trsubi	%0,0\\n\\tnot	%R0\\n\\tincf	%R0\";
+   return \"cmpnei	%R0,0\\n\\trsubi	%R0,0\\n\\tnot	%0\\n\\tincf	%0\";
+}"
+  [(set_attr "length" "8")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(not:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "not	%0")
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "mcore_arith_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_operand" "0,m")))]
+  ""
+  "@
+	zexth	%0
+	ld.h	%0,%1"
+  [(set_attr "type" "shift,load")])
+
+;; ldh gives us a free zero-extension. The combiner picks up on this.
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:HI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))))]
+  ""
+  "ld.h	%0,(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:HI (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+				         (match_operand:SI 2 "const_int_operand" "")))))]
+  "(INTVAL (operands[2]) >= 0) &&
+   (INTVAL (operands[2]) < 32) &&
+   ((INTVAL (operands[2])&1) == 0)"
+  "ld.h	%0,(%1,%2)"
+  [(set_attr "type" "load")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand" "")))]
+  ""
+  "") 
+
+;; RBE: XXX: we don't recognize that the xtrb3 kills the CC register.
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b,r")
+	(zero_extend:SI (match_operand:QI 1 "general_operand" "0,r,m")))]
+  ""
+  "@
+	zextb	%0
+	xtrb3	%0,%1
+	ld.b	%0,%1"
+  [(set_attr "type" "shift,shift,load")])
+
+;; ldb gives us a free zero-extension. The combiner picks up on this.
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:QI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))))]
+  ""
+  "ld.b	%0,(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+				         (match_operand:SI 2 "const_int_operand" "")))))]
+  "(INTVAL (operands[2]) >= 0) &&
+   (INTVAL (operands[2]) < 16)"
+  "ld.b	%0,(%1,%2)"
+  [(set_attr "type" "load")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "")))]
+  ""
+  "") 
+
+;; RBE: XXX: we don't recognize that the xtrb3 kills the CC register.
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r,b,r")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "0,r,m")))]
+  ""
+  "@
+	zextb	%0
+	xtrb3	%0,%1
+	ld.b	%0,%1"
+  [(set_attr "type" "shift,shift,load")])
+
+;; ldb gives us a free zero-extension. The combiner picks up on this.
+;; this doesn't catch references that are into a structure.
+;; note that normally the compiler uses the above insn, unless it turns
+;; out that we're dealing with a volatile...
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:HI (mem:QI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))))]
+  ""
+  "ld.b	%0,(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+				         (match_operand:SI 2 "const_int_operand" "")))))]
+  "(INTVAL (operands[2]) >= 0) &&
+   (INTVAL (operands[2]) < 16)"
+  "ld.b	%0,(%1,%2)"
+  [(set_attr "type" "load")])
+
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r") 
+	(match_operand:SI 1 "mcore_arith_reg_operand" "r"))]
+  ""
+  "
+  {
+    int low, high;
+
+    if (TARGET_LITTLE_END)
+      low = 0, high = 4;
+    else
+      low = 4, high = 0;
+    
+    emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], low),
+	      operands[1]));
+    emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], high),
+	      gen_rtx_ASHIFTRT (SImode,
+			       gen_rtx_SUBREG (SImode, operands[0], low),
+			       GEN_INT (31))));
+    DONE;
+  }"
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "sexth	%0")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "sextb	%0")
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "sextb	%0")
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; SImode
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (SImode, operands[1]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_general_movdst_operand" "=r,r,a,r,a,r,m")
+	(match_operand:SI 1 "mcore_general_movsrc_operand"  "r,P,i,c,R,m,r"))]
+  "(register_operand (operands[0], SImode)
+    || register_operand (operands[1], SImode))"
+  "* return mcore_output_move (insn, operands, SImode);"
+  [(set_attr "type" "move,move,move,move,load,load,store")])
+
+;;
+;; HImode
+;;
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand"  ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (HImode, operands[1]);
+  else if (CONSTANT_P (operands[1])
+	   && (GET_CODE (operands[1]) != CONST_INT
+	       || (! CONST_OK_FOR_I (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_M (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_N (INTVAL (operands[1]))))
+	   && ! reload_completed && ! reload_in_progress)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_movsi (reg, operands[1]));
+      operands[1] = gen_lowpart (HImode, reg);
+    }
+}")
+  
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_general_movdst_operand" "=r,r,a,r,r,m")
+	(match_operand:HI 1 "mcore_general_movsrc_operand"  "r,P,i,c,m,r"))]
+  "(register_operand (operands[0], HImode)
+    || register_operand (operands[1], HImode))"
+  "* return mcore_output_move (insn, operands, HImode);"
+  [(set_attr "type" "move,move,move,move,load,store")])
+
+;;
+;; QImode
+;;
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand"  ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (QImode, operands[1]);
+  else if (CONSTANT_P (operands[1])
+	   && (GET_CODE (operands[1]) != CONST_INT
+	       || (! CONST_OK_FOR_I (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_M (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_N (INTVAL (operands[1]))))
+	   && ! reload_completed && ! reload_in_progress)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_movsi (reg, operands[1]));
+      operands[1] = gen_lowpart (QImode, reg);
+    }
+}")
+  
+(define_insn ""
+  [(set (match_operand:QI 0 "mcore_general_movdst_operand" "=r,r,a,r,r,m")
+	(match_operand:QI 1 "mcore_general_movsrc_operand"  "r,P,i,c,m,r"))]
+  "(register_operand (operands[0], QImode)
+    || register_operand (operands[1], QImode))"
+  "* return mcore_output_move (insn, operands, QImode);"
+   [(set_attr "type" "move,move,move,move,load,store")])
+
+
+;; DImode
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DImode, operands[1]);
+  else if (GET_CODE (operands[1]) == CONST_INT
+           && ! CONST_OK_FOR_I (INTVAL (operands[1]))
+	   && ! CONST_OK_FOR_M (INTVAL (operands[1]))
+	   && ! CONST_OK_FOR_N (INTVAL (operands[1])))
+    {
+      int i;
+      for (i = 0; i < UNITS_PER_WORD * 2; i += UNITS_PER_WORD)
+        emit_move_insn (simplify_gen_subreg (SImode, operands[0], DImode, i),
+		        simplify_gen_subreg (SImode, operands[1], DImode, i));
+      DONE;
+    }
+}")
+
+(define_insn "movdi_i"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,a,r,m")
+	(match_operand:DI 1 "mcore_general_movsrc_operand" "I,M,N,r,R,m,r"))]
+  ""
+  "* return mcore_output_movedouble (operands, DImode);"
+  [(set_attr "length" "4") (set_attr "type" "move,move,move,move,load,load,store")])
+
+;; SFmode
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (SFmode, operands[1]);
+}")
+
+(define_insn "movsf_i"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:SF 1 "general_operand"  "r,m,r"))]
+  ""
+  "@
+	mov	%0,%1
+	ld.w	%0,%1
+	st.w	%1,%0"
+  [(set_attr "type" "move,load,store")])
+
+;; DFmode
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DFmode, operands[1]);
+}")
+
+(define_insn "movdf_k"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:DF 1 "general_operand" "r,m,r"))]
+  ""
+  "* return mcore_output_movedouble (operands, DFmode);"
+  [(set_attr "length" "4") (set_attr "type" "move,load,store")])
+
+
+;; Load/store multiple
+
+;; ??? This is not currently used.
+(define_insn "ldm"
+  [(set (match_operand:TI 0 "mcore_arith_reg_operand" "=r")
+	(mem:TI (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "ldq	%U0,(%1)")
+
+;; ??? This is not currently used.
+(define_insn "stm"
+  [(set (mem:TI (match_operand:SI 0 "mcore_arith_reg_operand" "r"))
+	(match_operand:TI 1 "mcore_arith_reg_operand" "r"))]
+  ""
+  "stq	%U1,(%0)")
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  ""
+  "
+{
+  int regno, count, i;
+
+  /* Support only loading a constant number of registers from memory and
+     only if at least two registers.  The last register must be r15.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[1]) != MEM
+      || XEXP (operands[1], 0) != stack_pointer_rtx
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) + INTVAL (operands[2]) != 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[0]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode,
+		 gen_rtx_REG (SImode, regno + i),
+		 gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
+						     i * 4)));
+}")
+
+(define_insn ""
+  [(match_parallel 0 "mcore_load_multiple_operation"
+		   [(set (match_operand:SI 1 "mcore_arith_reg_operand" "=r")
+			 (mem:SI (match_operand:SI 2 "register_operand" "r")))])]
+  "GET_CODE (operands[2]) == REG && REGNO (operands[2]) == STACK_POINTER_REGNUM"
+  "ldm	%1-r15,(%2)")
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  ""
+  "
+{
+  int regno, count, i;
+
+  /* Support only storing a constant number of registers to memory and
+     only if at least two registers.  The last register must be r15.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[0]) != MEM
+      || XEXP (operands[0], 0) != stack_pointer_rtx
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) + INTVAL (operands[2]) != 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[1]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode,
+		 gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
+						     i * 4)),
+		 gen_rtx_REG (SImode, regno + i));
+}")
+
+(define_insn ""
+  [(match_parallel 0 "mcore_store_multiple_operation"
+		   [(set (mem:SI (match_operand:SI 2 "register_operand" "r"))
+			 (match_operand:SI 1 "mcore_arith_reg_operand" "r"))])]
+  "GET_CODE (operands[2]) == REG && REGNO (operands[2]) == STACK_POINTER_REGNUM"
+  "stm	%1-r15,(%2)")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+;; At top-level, condition test are eq/ne, because we
+;; are comparing against the condition register (which
+;; has the result of the true relational test
+
+(define_insn "branch_true"
+  [(set (pc) (if_then_else (ne (reg:CC 17) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  ""
+  "jbt	%l0"
+  [(set_attr "type" "brcond")])
+
+(define_insn "branch_false"
+  [(set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  ""
+  "jbf	%l0"
+  [(set_attr "type" "brcond")])
+
+(define_insn "inverse_branch_true"
+  [(set (pc) (if_then_else (ne (reg:CC 17) (const_int 0))
+			   (pc)
+			   (label_ref (match_operand 0 "" ""))))]
+  ""
+  "jbf	%l0"
+  [(set_attr "type" "brcond")])
+
+(define_insn "inverse_branch_false"
+  [(set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+   			   (pc)
+			   (label_ref (match_operand 0 "" ""))))]
+  ""
+  "jbt	%l0"
+  [(set_attr "type" "brcond")])
+
+;; Conditional branch insns
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "mcore_compare_operand")
+			(match_operand:SI 2 "nonmemory_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "
+{
+  bool invert;
+  invert = mcore_gen_compare (GET_CODE (operands[0]),
+			      operands[1], operands[2]);
+
+  if (invert)
+    emit_jump_insn (gen_branch_false (operands[3]));
+  else
+    emit_jump_insn (gen_branch_true (operands[3]));
+  DONE;
+}")
+
+
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump_real"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jbr	%l0"
+  [(set_attr "type" "branch")])
+
+(define_expand "jump"
+ [(set (pc) (label_ref (match_operand 0 "" "")))]
+ ""
+ "
+{
+  emit_jump_insn (gen_jump_real (operand0));
+  DONE;
+}
+")
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "mcore_arith_reg_operand" "r"))]
+  ""
+  "jmp	%0"
+  [(set_attr "type" "jmp")])
+
+(define_expand "call"
+  [(parallel[(call (match_operand:SI 0 "" "")
+		   (match_operand 1 "" ""))
+	     (clobber (reg:SI 15))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM
+      && ! register_operand (XEXP (operands[0], 0), SImode)
+      && ! mcore_symbolic_address_p (XEXP (operands[0], 0)))
+    operands[0] = gen_rtx_MEM (GET_MODE (operands[0]),
+			   force_reg (Pmode, XEXP (operands[0], 0)));
+}")
+
+(define_insn "call_internal"
+  [(call (mem:SI (match_operand:SI 0 "mcore_call_address_operand" "riR"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 15))]
+  ""
+  "* return mcore_output_call (operands, 0);")
+
+(define_expand "call_value"
+  [(parallel[(set (match_operand 0 "register_operand" "")
+		  (call (match_operand:SI 1 "" "")
+			(match_operand 2 "" "")))
+	     (clobber (reg:SI 15))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM
+      && ! register_operand (XEXP (operands[0], 0), SImode)
+      && ! mcore_symbolic_address_p (XEXP (operands[0], 0)))
+    operands[1] = gen_rtx_MEM (GET_MODE (operands[1]),
+			   force_reg (Pmode, XEXP (operands[1], 0)));
+}")
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "mcore_call_address_operand" "riR"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 15))]
+  ""
+  "* return mcore_output_call (operands, 1);")
+
+(define_insn "call_value_struct"
+  [(parallel [(set (match_parallel 0 ""
+	             [(expr_list (match_operand 3 "register_operand" "") (match_operand 4 "immediate_operand" ""))
+		      (expr_list (match_operand 5 "register_operand" "") (match_operand 6 "immediate_operand" ""))])
+		  (call (match_operand:SI 1 "" "")
+			(match_operand 2 "" "")))
+	     (clobber (reg:SI 15))])]
+  ""
+  "* return mcore_output_call (operands, 1);"
+)
+
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "or	r0,r0")
+
+(define_insn "tablejump"
+  [(set (pc)
+	(match_operand:SI 0 "mcore_arith_reg_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp	%0"
+  [(set_attr "type" "jmp")])
+
+(define_insn "*return"
+ [(return)]
+ "reload_completed && ! mcore_naked_function_p ()"
+ "jmp	r15"
+ [(set_attr "type" "jmp")])
+
+(define_insn "*no_return"
+ [(return)]
+ "reload_completed && mcore_naked_function_p ()"
+ ""
+ [(set_attr "length" "0")]
+)
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "mcore_expand_prolog (); DONE;")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "mcore_expand_epilog ();")
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "mvc"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  ""
+  "mvc	%0"
+  [(set_attr "type" "move")])
+
+(define_insn "mvcv"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(eq:SI (reg:CC 17) (const_int 0)))]
+  ""
+  "mvcv	%0"
+  [(set_attr "type" "move")])
+
+; in 0.97 use (LE 0) with (LT 1) and complement c.  BRC
+(define_split 
+  [(parallel[
+     (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+          (ne:SI (gt:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                        (const_int 0))
+                 (const_int 0)))
+     (clobber (reg:SI 17))])]
+  ""
+  [(set (reg:CC 17)
+        (lt:CC (match_dup 1) (const_int 1)))
+   (set (match_dup 0) (eq:SI (reg:CC 17) (const_int 0)))])
+     
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "mcore_compare_operand" "")
+	  (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+  "
+{
+  bool invert;
+  invert = mcore_gen_compare (GET_CODE (operands[1]),
+			      operands[2], operands[3]);
+
+  if (invert)
+    emit_insn (gen_mvcv (operands[0]));
+  else
+    emit_insn (gen_mvc (operands[0]));
+  DONE;
+}")
+
+(define_insn "incscc"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (ne (reg:CC 17) (const_int 0))
+		 (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "inct	%0")
+
+(define_insn "incscc_false"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (eq (reg:CC 17) (const_int 0))
+		 (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "incf	%0")
+
+(define_insn "decscc"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(minus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+		  (ne (reg:CC 17) (const_int 0))))]
+  ""
+  "dect	%0")
+
+(define_insn "decscc_false"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(minus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+		  (eq (reg:CC 17) (const_int 0))))]
+  ""
+  "decf	%0")
+
+;; ------------------------------------------------------------------------
+;; Conditional move patterns.
+;; ------------------------------------------------------------------------
+
+(define_expand "smaxsi3"
+  [(set (reg:CC 17)
+	(lt:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+	       (match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "")
+	       
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(smax:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(lt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  "")
+
+; no tstgt in 0.97, so just use cmplti (btsti x,31) and reverse move 
+; condition  BRC
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (smax:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                 (const_int 0)))]
+  ""
+  [(set (reg:CC 17)
+        (lt:CC (match_dup 1) (const_int 0)))
+   (set (match_dup 0)
+        (if_then_else:SI (eq (reg:CC 17) (const_int 0))
+                         (match_dup 1) (const_int 0)))]
+  "")
+
+(define_expand "sminsi3"
+  [(set (reg:CC 17)
+	(lt:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+	       (match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(smin:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(lt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  "")
+
+;(define_split
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+;        (smin:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+;                 (const_int 0)))]
+;  ""
+;  [(set (reg:CC 17)
+;        (gt:CC (match_dup 1) (const_int 0)))
+;   (set (match_dup 0)
+;        (if_then_else:SI (eq (reg:CC 17) (const_int 0))
+;                         (match_dup 1) (const_int 0)))]
+;  "")
+
+; changed these unsigned patterns to use geu instead of ltu.  it appears
+; that the c-torture & ssrl test suites didn't catch these!  only showed
+; up in friedman's clib work.   BRC 7/7/95
+
+(define_expand "umaxsi3"
+  [(set (reg:CC 17)
+	(geu:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "")
+	       
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(umax:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(geu:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_expand "uminsi3"
+  [(set (reg:CC 17)
+	(geu:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(umin:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(geu:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  "")
+
+;; ------------------------------------------------------------------------
+;; conditional move patterns really start here
+;; ------------------------------------------------------------------------
+
+;; the "movtK" patterns are experimental.  they are intended to account for
+;; gcc's mucking on code such as:
+;;
+;;            free_ent = ((block_compress) ? 257 : 256 );
+;;
+;; these patterns help to get a tstne/bgeni/inct (or equivalent) sequence
+;; when both arms have constants that are +/- 1 of each other.
+;;
+;; note in the following patterns that the "movtK" ones should be the first
+;; one defined in each sequence.  this is because the general pattern also
+;; matches, so use ordering to determine priority (it's easier this way than
+;; adding conditions to the general patterns).   BRC
+;;
+;; the U and Q constraints are necessary to ensure that reload does the
+;; 'right thing'.  U constrains the operand to 0 and Q to 1 for use in the
+;; clrt & clrf and clrt/inct & clrf/incf patterns.    BRC 6/26
+;;
+;; ??? there appears to be some problems with these movtK patterns for ops
+;; other than eq & ne.  need to fix.  6/30 BRC
+
+;; ------------------------------------------------------------------------
+;; ne 
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+
+(define_insn "movtK_1"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (reg:CC 17) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "  GET_CODE (operands[1]) == CONST_INT
+  && GET_CODE (operands[2]) == CONST_INT
+  && (   (INTVAL (operands[1]) - INTVAL (operands[2]) == 1)
+      || (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 1, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movt0"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (ne (reg:CC 17) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movt	%0,%1
+    movf	%0,%2
+    clrt	%0
+    clrf	%0")
+
+;; ------------------------------------------------------------------------
+;; eq
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (reg:CC 17) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "  GET_CODE (operands[1]) == CONST_INT
+  && GET_CODE (operands[2]) == CONST_INT
+  && (   (INTVAL (operands[1]) - INTVAL (operands[2]) == 1)
+      || (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 0, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movf0"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (eq (reg:CC 17) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movf	%0,%1
+    movt	%0,%2
+    clrf	%0
+    clrt	%0")
+
+; turns lsli rx,imm/btsti rx,31 into btsti rx,imm.  not done by a peephole
+; because the instructions are not adjacent (peepholes are related by posn -
+; not by dataflow).   BRC
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (eq (zero_extract:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 1)
+                              (match_operand:SI 2 "mcore_literal_K_operand" "K,K,K,K"))
+                             (const_int 0))
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 4 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,%2\;movf	%0,%3
+    btsti	%1,%2\;movt	%0,%4
+    btsti	%1,%2\;clrf	%0
+    btsti	%1,%2\;clrt	%0"
+  [(set_attr "length" "4")])
+
+; turns sextb rx/btsti rx,31 into btsti rx,7.  must be QImode to be safe.  BRC
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (eq (lshiftrt:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 7))
+                             (const_int 0))
+                         (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  "GET_CODE (operands[1]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[1])) == QImode"
+  "@
+    btsti	%1,7\;movf	%0,%2
+    btsti	%1,7\;movt	%0,%3
+    btsti	%1,7\;clrf	%0
+    btsti	%1,7\;clrt	%0"
+  [(set_attr "length" "4")])
+
+
+;; ------------------------------------------------------------------------
+;; ne
+;; ------------------------------------------------------------------------
+
+;; Combine creates this from an andn instruction in a scc sequence.
+;; We must recognize it to get conditional moves generated.
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "  GET_CODE (operands[2]) == CONST_INT
+  && GET_CODE (operands[3]) == CONST_INT
+  && (   (INTVAL (operands[2]) - INTVAL (operands[3]) == 1)
+      || (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "*
+{
+  rtx out_operands[4];
+  out_operands[0] = operands[0];
+  out_operands[1] = operands[2];
+  out_operands[2] = operands[3];
+  out_operands[3] = operands[1];
+
+  return mcore_output_cmov (out_operands, 1, \"cmpnei	%3,0\");
+
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movt2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (ne (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""      
+  "@
+    cmpnei	%1,0\;movt	%0,%2
+    cmpnei	%1,0\;movf	%0,%3
+    cmpnei	%1,0\;clrt	%0
+    cmpnei	%1,0\;clrf	%0"
+  [(set_attr "length" "4")])
+
+; turns lsli rx,imm/btsti rx,31 into btsti rx,imm.  not done by a peephole
+; because the instructions are not adjacent (peepholes are related by posn -
+; not by dataflow).   BRC
+
+(define_insn ""
+ [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (ne (zero_extract:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 1)
+                              (match_operand:SI 2 "mcore_literal_K_operand" "K,K,K,K"))
+                             (const_int 0))
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 4 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,%2\;movt	%0,%3
+    btsti	%1,%2\;movf	%0,%4
+    btsti	%1,%2\;clrt	%0
+    btsti	%1,%2\;clrf	%0"
+  [(set_attr "length" "4")])
+
+; turns sextb rx/btsti rx,31 into btsti rx,7.  must be QImode to be safe.  BRC
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (ne (lshiftrt:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 7))
+                             (const_int 0))
+                         (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  "GET_CODE (operands[1]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[1])) == QImode"
+  "@
+    btsti	%1,7\;movt	%0,%2
+    btsti	%1,7\;movf	%0,%3
+    btsti	%1,7\;clrt	%0
+    btsti	%1,7\;clrf	%0"
+  [(set_attr "length" "4")])
+
+;; ------------------------------------------------------------------------
+;; eq/eq
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+   (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov(operands, 1, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movt3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (eq (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movt	%0,%1
+    movf	%0,%2
+    clrt	%0
+    clrf	%0")
+
+;; ------------------------------------------------------------------------
+;; eq/ne
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_5"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+    (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 0, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movf1"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (eq (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movf	%0,%1
+    movt	%0,%2
+    clrf	%0
+    clrt	%0")
+
+;; ------------------------------------------------------------------------
+;; eq
+;; ------------------------------------------------------------------------
+
+;; Combine creates this from an andn instruction in a scc sequence.
+;; We must recognize it to get conditional moves generated.
+
+; experimental conditional move with two constants +/- 1  BRC
+
+(define_insn "movtK_6"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[2]) - INTVAL (operands[3]) == 1) ||
+    (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "* 
+{
+   rtx out_operands[4];
+   out_operands[0] = operands[0];
+   out_operands[1] = operands[2];
+   out_operands[2] = operands[3];
+   out_operands[3] = operands[1];
+
+   return mcore_output_cmov (out_operands, 0, \"cmpnei	%3,0\");
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movf3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (eq (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    cmpnei	%1,0\;movf	%0,%2
+    cmpnei	%1,0\;movt	%0,%3
+    cmpnei	%1,0\;clrf	%0
+    cmpnei	%1,0\;clrt	%0"
+  [(set_attr "length" "4")])
+
+;; ------------------------------------------------------------------------
+;; ne/eq
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_7"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+    (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 0, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movf4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (ne (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movf	%0,%1
+    movt	%0,%2
+    clrf	%0
+    clrt	%0")
+
+;; ------------------------------------------------------------------------
+;; ne/ne
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_8"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+    (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 1, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movt4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (ne (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movt	%0,%1
+    movf	%0,%2
+    clrt	%0
+    clrf	%0")
+
+;; Also need patterns to recognize lt/ge, since otherwise the compiler will
+;; try to output not/asri/tstne/movf.
+
+;; ------------------------------------------------------------------------
+;; lt
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_9"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (lt (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[2]) == CONST_INT &&
+   GET_CODE (operands[3]) == CONST_INT &&
+   ((INTVAL (operands[2]) - INTVAL (operands[3]) == 1) ||
+    (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "*
+{
+   rtx out_operands[4];
+   out_operands[0] = operands[0];
+   out_operands[1] = operands[2];
+   out_operands[2] = operands[3];
+   out_operands[3] = operands[1];
+
+   return mcore_output_cmov (out_operands, 1, \"btsti	%3,31\");
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movt5"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (lt (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,31\;movt	%0,%2
+    btsti	%1,31\;movf	%0,%3
+    btsti	%1,31\;clrt	%0
+    btsti	%1,31\;clrf	%0"
+  [(set_attr "length" "4")])
+
+
+;; ------------------------------------------------------------------------
+;; ge
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_10"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ge (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[2]) == CONST_INT &&
+   GET_CODE (operands[3]) == CONST_INT &&
+   ((INTVAL (operands[2]) - INTVAL (operands[3]) == 1) ||
+    (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "*
+{
+  rtx out_operands[4];
+  out_operands[0] = operands[0];
+  out_operands[1] = operands[2];
+  out_operands[2] = operands[3];
+  out_operands[3] = operands[1];
+
+   return mcore_output_cmov (out_operands, 0, \"btsti	%3,31\");
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movf5"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (ge (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,31\;movf	%0,%2
+    btsti	%1,31\;movt	%0,%3
+    btsti	%1,31\;clrf	%0
+    btsti	%1,31\;clrt	%0"
+  [(set_attr "length" "4")])
+
+;; ------------------------------------------------------------------------
+;; Bitfield extract (xtrbN)
+;; ------------------------------------------------------------------------
+
+; sometimes we're better off using QI/HI mode and letting the machine indep.
+; part expand insv and extv.
+;
+; e.g., sequences like:a	[an insertion]
+;
+;      ldw r8,(r6)
+;      movi r7,0x00ffffff
+;      and r8,r7                 r7 dead
+;      stw r8,(r6)                r8 dead
+;
+; become:
+;
+;      movi r8,0
+;      stb r8,(r6)              r8 dead
+;
+; it looks like always using SI mode is a win except in this type of code 
+; (when adjacent bit fields collapse on a byte or halfword boundary).  when
+; expanding with SI mode, non-adjacent bit field masks fold, but with QI/HI
+; mode, they do not.  one thought is to add some peepholes to cover cases
+; like the above, but this is not a general solution.
+;
+; -mword-bitfields expands/inserts using SI mode.  otherwise, do it with
+; the smallest mode possible (using the machine indep. expansions).  BRC
+
+;(define_expand "extv"
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+;	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+;			 (match_operand:SI 2 "const_int_operand" "")
+;			 (match_operand:SI 3 "const_int_operand" "")))
+;   (clobber (reg:CC 17))]
+;  ""
+;  "
+;{
+;  if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) % 8 != 0)
+;    {
+;     if (TARGET_W_FIELD)
+;       {
+;        rtx lshft = GEN_INT (32 - (INTVAL (operands[2]) + INTVAL (operands[3])));
+;        rtx rshft = GEN_INT (32 - INTVAL (operands[2]));
+;
+;        emit_insn (gen_rtx_SET (SImode, operands[0], operands[1]));
+;        emit_insn (gen_rtx_SET (SImode, operands[0],
+;                            gen_rtx_ASHIFT (SImode, operands[0], lshft)));
+;        emit_insn (gen_rtx_SET (SImode, operands[0],
+;                            gen_rtx_ASHIFTRT (SImode, operands[0], rshft)));
+;        DONE;
+;     }
+;     else
+;        FAIL;
+;  }
+;}")
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  "
+{
+  if (INTVAL (operands[2]) == 8 && INTVAL (operands[3]) % 8 == 0)
+    {
+       /* 8-bit field, aligned properly, use the xtrb[0123]+sext sequence.  */
+       /* not DONE, not FAIL, but let the RTL get generated....  */
+    }
+  else if (TARGET_W_FIELD)
+    {
+      /* Arbitrary placement; note that the tree->rtl generator will make
+         something close to this if we return FAIL  */
+      rtx lshft = GEN_INT (32 - (INTVAL (operands[2]) + INTVAL (operands[3])));
+      rtx rshft = GEN_INT (32 - INTVAL (operands[2]));
+      rtx tmp1 = gen_reg_rtx (SImode);
+      rtx tmp2 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (SImode, tmp1, operands[1]));
+      emit_insn (gen_rtx_SET (SImode, tmp2,
+                         gen_rtx_ASHIFT (SImode, tmp1, lshft)));
+      emit_insn (gen_rtx_SET (SImode, operands[0],
+                         gen_rtx_ASHIFTRT (SImode, tmp2, rshft)));
+      DONE;
+    }
+  else
+    {
+      /* Let the caller choose an alternate sequence.  */
+      FAIL;
+    }
+}")
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  "
+{
+  if (INTVAL (operands[2]) == 8 && INTVAL (operands[3]) % 8 == 0)
+    {
+       /* 8-bit field, aligned properly, use the xtrb[0123] sequence.  */
+       /* Let the template generate some RTL....  */
+    }
+  else if (CONST_OK_FOR_K ((1 << INTVAL (operands[2])) - 1))
+    {
+      /* A narrow bit-field (<=5 bits) means we can do a shift to put
+         it in place and then use an andi to extract it.
+         This is as good as a shiftleft/shiftright.  */
+
+      rtx shifted;
+      rtx mask = GEN_INT ((1 << INTVAL (operands[2])) - 1);
+
+      if (INTVAL (operands[3]) == 0)
+        {
+          shifted = operands[1];
+        }
+      else
+        {
+          rtx rshft = GEN_INT (INTVAL (operands[3]));
+          shifted = gen_reg_rtx (SImode);
+          emit_insn (gen_rtx_SET (SImode, shifted,
+                         gen_rtx_LSHIFTRT (SImode, operands[1], rshft)));
+        }
+     emit_insn (gen_rtx_SET (SImode, operands[0],
+                       gen_rtx_AND (SImode, shifted, mask)));
+     DONE;
+   }
+ else if (TARGET_W_FIELD)
+   {
+     /* Arbitrary pattern; play shift/shift games to get it. 
+      * this is pretty much what the caller will do if we say FAIL */
+     rtx lshft = GEN_INT (32 - (INTVAL (operands[2]) + INTVAL (operands[3])));
+     rtx rshft = GEN_INT (32 - INTVAL (operands[2]));
+     rtx tmp1 = gen_reg_rtx (SImode);
+     rtx tmp2 = gen_reg_rtx (SImode);
+
+     emit_insn (gen_rtx_SET (SImode, tmp1, operands[1]));
+     emit_insn (gen_rtx_SET (SImode, tmp2,
+                         gen_rtx_ASHIFT (SImode, tmp1, lshft)));
+     emit_insn (gen_rtx_SET (SImode, operands[0],
+                       gen_rtx_LSHIFTRT (SImode, tmp2, rshft)));
+     DONE;
+   }
+ else
+   {
+     /* Make the compiler figure out some alternative mechanism.  */
+     FAIL;
+   }
+
+ /* Emit the RTL pattern; something will match it later.  */
+}")
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))
+   (clobber (reg:CC 17))]
+  ""
+  "
+{
+  if (mcore_expand_insv (operands))
+    {
+      DONE;
+    }
+  else
+    {
+      FAIL;
+    }
+}")
+
+;;
+;; the xtrb[0123] instructions handily get at 8-bit fields on nice boundaries.
+;; but then, they do force you through r1.
+;;
+;; the combiner will build such patterns for us, so we'll make them available
+;; for its use.
+;;
+;; Note that we have both SIGNED and UNSIGNED versions of these...
+;;
+
+;;
+;; These no longer worry about the clobbering of CC bit; not sure this is
+;; good...
+;;
+;; the SIGNED versions of these
+;;
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,r") (const_int 8) (const_int 24)))]
+  ""
+  "@
+	asri	%0,24
+	xtrb0	%0,%1\;sextb	%0"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 16)))]
+  ""
+  "xtrb1	%0,%1\;sextb	%0"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 8)))]
+  ""
+  "xtrb2	%0,%1\;sextb	%0"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0") (const_int 8) (const_int 0)))]
+  ""
+  "sextb	%0"
+  [(set_attr "type" "shift")])
+
+;; the UNSIGNED uses of xtrb[0123]
+;;
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,r") (const_int 8) (const_int 24)))]
+  ""
+  "@
+	lsri	%0,24
+	xtrb0	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 16)))]
+  ""
+  "xtrb1	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 8)))]
+  ""
+  "xtrb2	%0,%1"
+  [(set_attr "type" "shift")])
+
+;; This can be peepholed if it follows a ldb ...
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,r") (const_int 8) (const_int 0)))]
+  ""
+  "@
+	zextb	%0
+	xtrb3	%0,%1\;zextb	%0"
+  [(set_attr "type" "shift")])
+
+
+;; ------------------------------------------------------------------------
+;; Block move - adapted from m88k.md
+;; ------------------------------------------------------------------------
+
+(define_expand "movmemsi"
+  [(parallel [(set (mem:BLK (match_operand:BLK 0 "" ""))
+		   (mem:BLK (match_operand:BLK 1 "" "")))
+	      (use (match_operand:SI 2 "general_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))])]
+  ""
+  "
+{
+  if (mcore_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; ;;; ??? These patterns are meant to be generated from expand_block_move,
+;; ;;; but they currently are not.
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:QI 0 "mcore_arith_reg_operand" "=r")
+;; 	(match_operand:BLK 1 "mcore_general_movsrc_operand" "m"))]
+;;   ""
+;;   "ld.b	%0,%1"
+;;   [(set_attr "type" "load")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+;; 	(match_operand:BLK 1 "mcore_general_movsrc_operand" "m"))]
+;;   ""
+;;   "ld.h	%0,%1"
+;;   [(set_attr "type" "load")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;; 	(match_operand:BLK 1 "mcore_general_movsrc_operand" "m"))]
+;;   ""
+;;   "ld.w	%0,%1"
+;;   [(set_attr "type" "load")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:BLK 0 "mcore_general_movdst_operand" "=m")
+;; 	(match_operand:QI 1 "mcore_arith_reg_operand" "r"))]
+;;   ""
+;;   "st.b	%1,%0"
+;;   [(set_attr "type" "store")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:BLK 0 "mcore_general_movdst_operand" "=m")
+;; 	(match_operand:HI 1 "mcore_arith_reg_operand" "r"))]
+;;   ""
+;;   "st.h	%1,%0"
+;;   [(set_attr "type" "store")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:BLK 0 "mcore_general_movdst_operand" "=m")
+;; 	(match_operand:SI 1 "mcore_arith_reg_operand" "r"))]
+;;   ""
+;;   "st.w	%1,%0"
+;;   [(set_attr "type" "store")])
+
+;; ------------------------------------------------------------------------
+;; Misc Optimizing quirks
+;; ------------------------------------------------------------------------
+
+;; pair to catch constructs like:  (int *)((p+=4)-4) which happen
+;; in stdarg/varargs traversal. This changes a 3 insn sequence to a 2
+;; insn sequence. -- RBE 11/30/95
+(define_insn ""
+  [(parallel[
+      (set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	   (match_operand:SI 1 "mcore_arith_reg_operand" "+r"))
+      (set (match_dup 1) (plus:SI (match_dup 1) (match_operand 2 "mcore_arith_any_imm_operand" "")))])]
+  "GET_CODE(operands[2]) == CONST_INT"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split 
+  [(parallel[
+      (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	   (match_operand:SI 1 "mcore_arith_reg_operand" ""))
+      (set (match_dup 1) (plus:SI (match_dup 1) (match_operand 2 "mcore_arith_any_imm_operand" "")))])]
+  "GET_CODE(operands[2]) == CONST_INT &&
+   operands[0] != operands[1]"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))])
+
+
+;;; Peepholes
+
+; note: in the following patterns, use mcore_is_dead() to ensure that the
+; reg we may be trashing really is dead.  reload doesn't always mark
+; deaths, so mcore_is_dead() (see mcore.c) scans forward to find its death.  BRC
+
+;;; A peephole to convert the 3 instruction sequence generated by reload
+;;; to load a FP-offset address into a 2 instruction sequence.
+;;; ??? This probably never matches anymore.
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+	(match_operand:SI 1 "const_int_operand" "J"))
+   (set (match_dup 0) (neg:SI (match_dup 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+  "CONST_OK_FOR_J (INTVAL (operands[1]))"
+  "error\;mov	%0,%2\;subi	%0,%1")
+
+;; Moves of inlinable constants are done late, so when a 'not' is generated
+;; it is never combined with the following 'and' to generate an 'andn' b/c 
+;; the combiner never sees it.  use a peephole to pick up this case (happens
+;; mostly with bitfields)  BRC
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+        (match_operand:SI 1 "const_int_operand" "i"))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "r")
+        (and:SI (match_dup 2) (match_dup 0)))]
+  "mcore_const_trick_uses_not (INTVAL (operands[1])) &&
+ 	operands[0] != operands[2] &&
+        mcore_is_dead (insn, operands[0])"
+  "* return mcore_output_andn (insn, operands);")
+
+; when setting or clearing just two bits, it's cheapest to use two bseti's 
+; or bclri's.  only happens when relaxing immediates.  BRC
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (ior:SI (match_dup 2) (match_dup 0)))]
+  "TARGET_HARDLIT
+   && mcore_num_ones (INTVAL (operands[1])) == 2
+   && mcore_is_dead (insn, operands[0])"
+  "* return mcore_output_bseti (operands[2], INTVAL (operands[1]));")
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (and:SI (match_dup 2) (match_dup 0)))]
+  "TARGET_HARDLIT && mcore_num_zeros (INTVAL (operands[1])) == 2 &&
+       mcore_is_dead (insn, operands[0])"
+  "* return mcore_output_bclri (operands[2], INTVAL (operands[1]));")
+
+; change an and with a mask that has a single cleared bit into a bclri.  this
+; handles QI and HI mode values using the knowledge that the most significant
+; bits don't matter.
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (and:SI (match_operand:SI 3 "mcore_arith_reg_operand" "")
+                (match_dup 0)))]
+  "GET_CODE (operands[3]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[3])) == QImode &&
+      mcore_num_zeros (INTVAL (operands[1]) | 0xffffff00) == 1 &&
+      mcore_is_dead (insn, operands[0])"
+"*
+  if (! mcore_is_same_reg (operands[2], operands[3]))
+    output_asm_insn (\"mov\\t%2,%3\", operands);
+  return mcore_output_bclri (operands[2], INTVAL (operands[1]) | 0xffffff00);")
+
+/* Do not fold these together -- mode is lost at final output phase.  */
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (and:SI (match_operand:SI 3 "mcore_arith_reg_operand" "")
+                (match_dup 0)))]
+  "GET_CODE (operands[3]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[3])) == HImode &&
+      mcore_num_zeros (INTVAL (operands[1]) | 0xffff0000) == 1 &&
+      operands[2] == operands[3] &&
+      mcore_is_dead (insn, operands[0])"
+"*
+  if (! mcore_is_same_reg (operands[2], operands[3]))
+    output_asm_insn (\"mov\\t%2,%3\", operands);
+  return mcore_output_bclri (operands[2], INTVAL (operands[1]) | 0xffff0000);")
+
+; This peephole helps when using -mwide-bitfields to widen fields so they 
+; collapse.   This, however, has the effect that a narrower mode is not used
+; when desirable.  
+;
+; e.g., sequences like:
+;
+;      ldw r8,(r6)
+;      movi r7,0x00ffffff
+;      and r8,r7                 r7 dead
+;      stw r8,(r6)                r8 dead
+;
+; get peepholed to become:
+;
+;      movi r8,0
+;      stb r8,(r6)              r8 dead
+;
+; Do only easy addresses that have no offset.  This peephole is also applied 
+; to halfwords.  We need to check that the load is non-volatile before we get
+; rid of it.
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (match_operand:SI 3 "const_int_operand" ""))
+   (set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
+   (set (match_operand:SI 4 "memory_operand" "") (match_dup 0))]
+  "mcore_is_dead (insn, operands[0]) &&
+   ! MEM_VOLATILE_P (operands[1]) &&
+   mcore_is_dead (insn, operands[2]) && 
+   (mcore_byte_offset (INTVAL (operands[3])) > -1 || 
+    mcore_halfword_offset (INTVAL (operands[3])) > -1) &&
+   ! MEM_VOLATILE_P (operands[4]) &&
+   GET_CODE (XEXP (operands[4], 0)) == REG"
+"*
+{
+   int ofs;
+   enum machine_mode mode;
+   rtx base_reg = XEXP (operands[4], 0);
+
+   if ((ofs = mcore_byte_offset (INTVAL (operands[3]))) > -1)
+      mode = QImode;
+   else if ((ofs = mcore_halfword_offset (INTVAL (operands[3]))) > -1)
+      mode = HImode;
+   else
+      gcc_unreachable ();
+
+   if (ofs > 0) 
+      operands[4] = gen_rtx_MEM (mode, 
+                              gen_rtx_PLUS (SImode, base_reg, GEN_INT(ofs)));
+   else
+      operands[4] = gen_rtx_MEM (mode, base_reg);
+
+   if (mode == QImode)
+      return \"movi	%0,0\\n\\tst.b	%0,%4\";
+
+   return \"movi	%0,0\\n\\tst.h	%0,%4\";
+}")
+
+; from sop11. get btsti's for (LT A 0) where A is a QI or HI value
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+        (sign_extend:SI (match_operand:QI 1 "mcore_arith_reg_operand" "0")))
+   (set (reg:CC 17)
+	(lt:CC (match_dup 0)
+	    (const_int 0)))]
+  "mcore_is_dead (insn, operands[0])"
+  "btsti	%0,7")
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+        (sign_extend:SI (match_operand:HI 1 "mcore_arith_reg_operand" "0")))
+   (set (reg:CC 17)
+	(lt:CC (match_dup 0)
+	    (const_int 0)))]
+  "mcore_is_dead (insn, operands[0])"
+  "btsti	%0,15")
+
+; Pick up a tst.  This combination happens because the immediate is not
+; allowed to fold into one of the operands of the tst.  Does not happen
+; when relaxing immediates.  BRC
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "mcore_arith_reg_operand" ""))
+   (set (match_dup 0)
+        (and:SI (match_dup 0)
+                (match_operand:SI 2 "mcore_literal_K_operand" "")))
+   (set (reg:CC 17) (ne:CC (match_dup 0) (const_int 0)))]
+  "mcore_is_dead (insn, operands[0])"
+  "movi	%0,%2\;tst	%1,%0")
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (if_then_else:SI (ne (zero_extract:SI 
+                                (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                                (const_int 1)
+	                        (match_operand:SI 2 "mcore_literal_K_operand" ""))
+			     (const_int 0))
+	   (match_operand:SI 3 "mcore_arith_imm_operand" "")
+           (match_operand:SI 4 "mcore_arith_imm_operand" "")))
+    (set (reg:CC 17) (ne:CC (match_dup 0) (const_int 0)))]
+  ""
+"*
+{
+  unsigned int op0 = REGNO (operands[0]);
+
+  if (GET_CODE (operands[3]) == REG)
+    {
+     if (REGNO (operands[3]) == op0 && GET_CODE (operands[4]) == CONST_INT
+	 && INTVAL (operands[4]) == 0)
+        return \"btsti	%1,%2\\n\\tclrf	%0\";
+     else if (GET_CODE (operands[4]) == REG)
+       {
+        if (REGNO (operands[4]) == op0)
+   	   return \"btsti	%1,%2\\n\\tmovf	%0,%3\";
+        else if (REGNO (operands[3]) == op0)
+ 	   return \"btsti	%1,%2\\n\\tmovt	%0,%4\";
+       }
+
+     gcc_unreachable ();
+    }
+  else if (GET_CODE (operands[3]) == CONST_INT
+           && INTVAL (operands[3]) == 0
+	   && GET_CODE (operands[4]) == REG)
+     return \"btsti	%1,%2\\n\\tclrt	%0\";
+
+  gcc_unreachable ();
+}")
+
+; experimental - do the constant folding ourselves.  note that this isn't
+;   re-applied like we'd really want.  i.e., four ands collapse into two
+;   instead of one.  this is because peepholes are applied as a sliding
+;   window.  the peephole does not generate new rtl's, but instead slides
+;   across the rtl's generating machine instructions.  it would be nice
+;   if the peephole optimizer is changed to re-apply patterns and to gen
+;   new rtl's.  this is more flexible.  the pattern below helps when we're
+;   not using relaxed immediates.   BRC
+
+;(define_peephole
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+;        (match_operand:SI 1 "const_int_operand" ""))
+;   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+;          (and:SI (match_dup 2) (match_dup 0)))
+;   (set (match_dup 0)
+;        (match_operand:SI 3 "const_int_operand" ""))
+;   (set (match_dup 2)
+;           (and:SI (match_dup 2) (match_dup 0)))]
+;  "!TARGET_RELAX_IMM && mcore_is_dead (insn, operands[0]) &&
+;       mcore_const_ok_for_inline (INTVAL (operands[1]) & INTVAL (operands[3]))"
+;  "*
+;{
+;  rtx out_operands[2];
+;  out_operands[0] = operands[0];
+;  out_operands[1] = GEN_INT (INTVAL (operands[1]) & INTVAL (operands[3]));
+;  
+;  output_inline_const (SImode, out_operands);
+;
+;  output_asm_insn (\"and	%2,%0\", operands);
+;
+;  return \"\";   
+;}")
+
+; BRC: for inlining get rid of extra test - experimental
+;(define_peephole
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+;          (ne:SI (reg:CC 17) (const_int 0)))
+;   (set (reg:CC 17) (ne:CC (match_dup 0) (const_int 0)))
+;   (set (pc) 
+;       (if_then_else (eq (reg:CC 17) (const_int 0))
+;         (label_ref (match_operand 1 "" ""))
+;         (pc)))]
+;   ""
+;   "*
+;{
+;  if (get_attr_length (insn) == 10)
+;    {
+;      output_asm_insn (\"bt	2f\\n\\tjmpi	[1f]\", operands);
+;      output_asm_insn (\".align	2\\n1:\", operands);
+;      output_asm_insn (\".long	%1\\n2:\", operands);
+;      return \"\";
+;    }
+;  return \"bf	%l1\";
+;}")
+
+
+;;; Special patterns for dealing with the constant pool.
+
+;;; 4 byte integer in line.
+
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 0)]
+ ""
+ "*
+{
+  assemble_integer (operands[0], 4, BITS_PER_WORD, 1);
+  return \"\";
+}"
+ [(set_attr "length" "4")])
+
+;;; align to a four byte boundary.
+
+(define_insn "align_4"
+ [(unspec_volatile [(const_int 0)] 1)]
+ ""
+ ".align 2")
+
+;;; Handle extra constant pool entries created during final pass.
+
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] 2)]
+  ""
+  "* return mcore_output_jump_label_table ();")
+
+;;
+;; Stack allocation -- in particular, for alloca().
+;; this is *not* what we use for entry into functions.
+;;
+;; This is how we allocate stack space.  If we are allocating a
+;; constant amount of space and we know it is less than 4096
+;; bytes, we need do nothing.
+;;
+;; If it is more than 4096 bytes, we need to probe the stack
+;; periodically. 
+;;
+;; operands[1], the distance is a POSITIVE number indicating that we
+;; are allocating stack space
+;;
+(define_expand "allocate_stack"
+  [(set (reg:SI 0)
+	(plus:SI (reg:SI 0)
+		 (match_operand:SI 1 "general_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (match_dup 2))]
+  ""
+  "
+{
+  /* If he wants no probing, just do it for him.  */
+  if (mcore_stack_increment == 0)
+    {
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,operands[1]));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+
+  /* For small constant growth, we unroll the code.  */
+  if (GET_CODE (operands[1]) == CONST_INT
+      && INTVAL (operands[1]) < 8 * STACK_UNITS_MAXSTEP)
+    {
+      HOST_WIDE_INT left = INTVAL(operands[1]);
+
+      /* If it's a long way, get close enough for a last shot.  */
+      if (left >= STACK_UNITS_MAXSTEP)
+	{
+	  rtx tmp = gen_reg_rtx (Pmode);
+	  emit_insn (gen_movsi (tmp, GEN_INT (STACK_UNITS_MAXSTEP)));
+	  do
+	    {
+	      rtx memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
+
+              MEM_VOLATILE_P (memref) = 1;
+	      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+	      emit_insn (gen_movsi (memref, stack_pointer_rtx));
+	      left -= STACK_UNITS_MAXSTEP;
+	    }
+	  while (left > STACK_UNITS_MAXSTEP);
+	}
+      /* Perform the final adjustment.  */
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-left)));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+  else
+    {
+      rtx out_label = 0;
+      rtx loop_label = gen_label_rtx ();
+      rtx step = gen_reg_rtx (Pmode);
+      rtx tmp = gen_reg_rtx (Pmode);
+      rtx test, memref;
+
+#if 1
+      emit_insn (gen_movsi (tmp, operands[1]));
+      emit_insn (gen_movsi (step, GEN_INT (STACK_UNITS_MAXSTEP)));
+
+      if (GET_CODE (operands[1]) != CONST_INT)
+	{
+	  out_label = gen_label_rtx ();
+	  test = gen_rtx_GEU (VOIDmode, step, tmp);		/* quick out */
+	  emit_jump_insn (gen_cbranchsi4 (test, step, tmp, out_label));
+	}
+
+      /* Run a loop that steps it incrementally.  */
+      emit_label (loop_label);
+
+      /* Extend a step, probe, and adjust remaining count.  */
+      emit_insn(gen_subsi3(stack_pointer_rtx, stack_pointer_rtx, step));
+      memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_insn(gen_movsi(memref, stack_pointer_rtx));
+      emit_insn(gen_subsi3(tmp, tmp, step));
+
+      /* Loop condition -- going back up.  */
+      test = gen_rtx_LTU (VOIDmode, step, tmp);
+      emit_jump_insn (gen_cbranchsi4 (test, step, tmp, loop_label));
+
+      if (out_label)
+	emit_label (out_label);
+
+      /* Bump the residual.  */
+      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+#else
+      /* simple one-shot -- ensure register and do a subtract.
+       * This does NOT comply with the ABI.  */
+      emit_insn (gen_movsi (tmp, operands[1]));
+      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+#endif
+    }
+}")
diff --git a/gcc-4.9/gcc/config/mcore/mcore.opt b/gcc-4.9/gcc/config/mcore/mcore.opt
new file mode 100644
index 000000000..47f601e31
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/mcore.opt
@@ -0,0 +1,75 @@
+; Options for the Motorola MCore port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m210
+Target RejectNegative Report InverseMask(M340)
+Generate code for the M*Core M210
+
+m340
+Target RejectNegative Report Mask(M340)
+Generate code for the M*Core M340
+
+m4byte-functions
+Target Report Mask(OVERALIGN_FUNC)
+Force functions to be aligned to a 4 byte boundary
+
+mbig-endian
+Target RejectNegative Report InverseMask(LITTLE_END)
+Generate big-endian code
+
+mcallgraph-data
+Target Report Mask(CG_DATA)
+Emit call graph information
+
+mdiv
+Target Report Mask(DIV)
+Use the divide instruction
+
+mhardlit
+Target Report Mask(HARDLIT)
+Inline constants if it can be done in 2 insns or less
+
+mlittle-endian
+Target RejectNegative Report Mask(LITTLE_END)
+Generate little-endian code
+
+; Not used by the compiler proper.
+mno-lsim
+Target RejectNegative
+Assume that run-time support has been provided, so omit -lsim from the linker command line
+
+mrelax-immediates
+Target Report Mask(RELAX_IMM)
+Use arbitrary sized immediates in bit operations
+
+mslow-bytes
+Target Report Mask(SLOW_BYTES)
+Prefer word accesses over byte accesses
+
+; Maximum size we are allowed to grow the stack in a single operation.
+; If we want more, we must do it in increments of at most this size.
+; If this value is 0, we don't check at all.
+mstack-increment=
+Target RejectNegative Joined UInteger Var(mcore_stack_increment) Init(STACK_UNITS_MAXSTEP)
+Set the maximum amount for a single stack increment operation
+
+mwide-bitfields
+Target Report Mask(W_FIELD)
+Always treat bitfields as int-sized
diff --git a/gcc-4.9/gcc/config/mcore/predicates.md b/gcc-4.9/gcc/config/mcore/predicates.md
new file mode 100644
index 000000000..95a1f1f24
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/predicates.md
@@ -0,0 +1,338 @@
+;; Predicate definitions for Motorola MCore.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Nonzero if OP is a normal arithmetic register.
+
+(define_predicate "mcore_arith_reg_operand"
+  (match_code "reg,subreg")
+{
+  if (! register_operand (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) == REG)
+    return REGNO (op) != CC_REG;
+
+  return 1;
+})
+
+;; Nonzero if OP can be source of a simple move operation.
+
+(define_predicate "mcore_general_movsrc_operand"
+  (match_code "mem,const_int,reg,subreg,symbol_ref,label_ref,const")
+{
+  /* Any (MEM LABEL_REF) is OK.  That is a pc-relative load.  */
+  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == LABEL_REF)
+    return 1;
+
+  return general_operand (op, mode);
+})
+
+;; Nonzero if OP can be destination of a simple move operation.
+
+(define_predicate "mcore_general_movdst_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (GET_CODE (op) == REG && REGNO (op) == CC_REG)
+    return 0;
+
+  return general_operand (op, mode);
+})
+
+;; Nonzero if OP should be recognized during reload for an ixh/ixw
+;; operand.  See the ixh/ixw patterns.
+
+(define_predicate "mcore_reload_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (mcore_arith_reg_operand (op, mode))
+    return 1;
+
+  if (! reload_in_progress)
+    return 0;
+
+  return GET_CODE (op) == MEM;
+})
+
+;; Nonzero if OP is a valid source operand for an arithmetic insn.
+
+(define_predicate "mcore_arith_J_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for an arithmetic insn.
+
+(define_predicate "mcore_arith_K_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a shift or rotate insn.
+
+(define_predicate "mcore_arith_K_operand_not_0"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (   GET_CODE (op) == CONST_INT
+      && CONST_OK_FOR_K (INTVAL (op))
+      && INTVAL (op) != 0)
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_arith_M_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_arith_K_S_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      if (CONST_OK_FOR_K (INTVAL (op)) || (mcore_num_zeros (INTVAL (op)) <= 2))
+	return 1;
+    }
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a cmov with two consts
+;; +/- 1.
+
+(define_predicate "mcore_arith_O_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for loading.
+
+(define_predicate "mcore_arith_imm_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && const_ok_for_mcore (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_arith_any_imm_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a btsti.
+
+(define_predicate "mcore_literal_K_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for an add/sub insn.
+
+(define_predicate "mcore_addsub_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      /* The following has been removed because it precludes large constants from being
+	 returned as valid source operands for and add/sub insn.  While large
+	 constants may not directly be used in an add/sub, they may if first loaded
+	 into a register.  Thus, this predicate should indicate that they are valid,
+	 and the constraint in mcore.md should control whether an additional load to
+	 register is needed. (see mcore.md, addsi). -- DAC 4/2/1998
+      
+      if (CONST_OK_FOR_J (INTVAL (op)) || CONST_OK_FOR_L (INTVAL (op)))
+        return 1;
+
+	 However we do still need to check to make sure that the constant is not too
+	 big, especially if we are running on a 64-bit OS...  Nickc 8/1/07.  */
+
+      if (trunc_int_for_mode (INTVAL (op), mode) != INTVAL (op))
+	return 0;
+
+      return 1;
+
+    }
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a compare operation.
+
+(define_predicate "mcore_compare_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    return 1;
+
+  return 0;
+})
+
+;; Return 1 if OP is a load multiple operation.  It is known to be a
+;; PARALLEL and the first section will be tested.
+
+(define_predicate "mcore_load_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int dest_regno;
+  rtx src_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt))    != (unsigned) (dest_regno + i)
+	  || GET_CODE (SET_SRC (elt))  != MEM
+	  || GET_MODE (SET_SRC (elt))  != SImode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Similar, but tests for store multiple.
+
+(define_predicate "mcore_store_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int src_regno;
+  rtx dest_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != (unsigned) (src_regno + i)
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	  || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_call_address_operand"
+  (match_code "reg,subreg,const_int,symbol_ref")
+{
+  return register_operand (op, mode) || CONSTANT_P (op);
+})
diff --git a/gcc-4.9/gcc/config/mcore/t-mcore b/gcc-4.9/gcc/config/mcore/t-mcore
new file mode 100644
index 000000000..eb12d2e49
--- /dev/null
+++ b/gcc-4.9/gcc/config/mcore/t-mcore
@@ -0,0 +1,29 @@
+# Copyright (C) 2000-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# If support for -m4align is ever re-enabled then comment out the
+# following line and uncomment the multilib lines below.
+
+# MULTILIB_OPTIONS     = m8align/m4align
+# MULTILIB_DIRNAMES    = align8 align4
+# MULTILIB_MATCHES     = 
+# MULTILIB_EXTRA_OPTS  = 
+# MULTILIB_EXCEPTIONS  =
+
+MULTILIB_OPTIONS     = mbig-endian/mlittle-endian m210/m340
+MULTILIB_DIRNAMES    = big little m210 m340
diff --git a/gcc-4.9/gcc/config/mep/constraints.md b/gcc-4.9/gcc/config/mep/constraints.md
new file mode 100644
index 000000000..391ede961
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/constraints.md
@@ -0,0 +1,162 @@
+;; Toshiba Media Processor Machine constraints
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+
+
+(define_register_constraint "a" "SP_REGS"
+  "The $sp register.")
+
+(define_register_constraint "b" "TP_REGS"
+  "The $tp register.")
+
+(define_register_constraint "c" "CONTROL_REGS"
+  "Any control register.")
+
+(define_register_constraint "d" "HILO_REGS"
+  "Either the $hi or the $lo register.")
+
+(define_register_constraint "em" "LOADABLE_CR_REGS"
+  "Coprocessor registers that can be directly loaded ($c0-$c15).")
+
+(define_register_constraint "ex" "mep_have_copro_copro_moves_p ? CR_REGS : NO_REGS"
+  "Coprocessor registers that can be moved to each other.")
+
+(define_register_constraint "er" "mep_have_core_copro_moves_p ? CR_REGS : NO_REGS"
+  "Coprocessor registers that can be moved to core registers.")
+
+(define_register_constraint "h" "HI_REGS"
+  "The $hi register.")
+
+(define_register_constraint "j" "RPC_REGS"
+  "The $rpc register.")
+
+(define_register_constraint "l" "LO_REGS"
+  "The $lo register.")
+
+(define_register_constraint "t" "TPREL_REGS"
+  "Registers which can be used in $tp-relative addressing.")
+
+(define_register_constraint "v" "GP_REGS"
+  "The $gp register.")
+
+(define_register_constraint "x" "CR_REGS"
+  "The coprocessor registers.")
+
+(define_register_constraint "y" "CCR_REGS"
+  "The coprocessor control registers.")
+
+(define_register_constraint "z" "R0_REGS"
+  "The $0 register.")
+
+(define_register_constraint "A" "USER0_REGS"
+  "User-defined register set A.")
+
+(define_register_constraint "B" "USER1_REGS"
+  "User-defined register set B.")
+
+(define_register_constraint "C" "USER2_REGS"
+  "User-defined register set C.")
+
+(define_register_constraint "D" "USER3_REGS"
+  "User-defined register set D.")
+
+
+
+(define_constraint "I"
+  "Offsets for $gp-rel addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival < 32768")))
+
+(define_constraint "J"
+  "Constants that can be used directly with boolean insns."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 65536")))
+
+(define_constraint "K"
+  "Constants that can be moved directly to registers."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 0x01000000")))
+
+(define_constraint "L"
+  "Small constants that can be added to registers."
+  (and (match_code "const_int")
+       (match_test "ival >= -32 && ival < 32")))
+
+(define_constraint "M"
+  "Long shift counts."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 32")))
+
+(define_constraint "N"
+  "Small constants that can be compared to registers."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 16")))
+
+(define_constraint "O"
+  "Constants that can be loaded into the top half of registers."
+  (and (match_code "const_int")
+       (match_test "!(ival & 0xffff) && ival >= -2147483647-1 && ival <= 2147483647")))
+
+(define_constraint "S"
+  "Signed 8-bit immediates."
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival < 127")))
+
+
+
+;; This must only be used with mep_call_address_operand() as the predicate.
+(define_constraint "R"
+  "@internal
+Near symbols that can be used as addresses for CALL."
+  (not (match_code "reg")))
+
+(define_constraint "T"
+  "Symbols encoded for $tp-rel or $gp-rel addressing."
+  (ior (ior
+	(and (match_code "unspec")
+	     (match_code "symbol_ref" "a"))
+	(and (match_code "const")
+	     (and (match_code "unspec" "0")
+		  (match_code "symbol_ref" "0a"))))
+       (and (match_code "const")
+	    (and (match_code "plus" "0")
+		 (and (match_code "unspec" "00")
+		      (match_code "symbol_ref" "00a"))))))
+
+(define_constraint "U"
+  "Non-constant addresses for loading/saving coprocessor registers."
+  (and (match_code "mem")
+       (match_test "! CONSTANT_P (XEXP (op, 0))")))
+
+(define_constraint "W"
+  "The top half of a symbol's value."
+  (and (match_code "high")
+       (match_code "symbol_ref" "0")))
+
+(define_constraint "Y"
+  "A register indirect address without offset."
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+(define_constraint "Z"
+  "Symbolic references to the control bus."
+  (and (and (match_code "mem")
+	    (match_code "symbol_ref" "0"))
+       (match_test "mep_section_tag (op) == 'c'")))
diff --git a/gcc-4.9/gcc/config/mep/default.h b/gcc-4.9/gcc/config/mep/default.h
new file mode 100644
index 000000000..f5359721e
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/default.h
@@ -0,0 +1,10 @@
+/* Header created by MeP-Integrator */
+#undef __section
+#define __section(_secname) __attribute__((section(#_secname)))
+#undef mep_nop
+#define mep_nop() __asm__ volatile ("nop")
+
+#pragma GCC coprocessor available $c0...$c31
+#pragma GCC coprocessor call_saved $c6...$c7
+
+#include <intrinsics.h>
diff --git a/gcc-4.9/gcc/config/mep/intrinsics.h b/gcc-4.9/gcc/config/mep/intrinsics.h
new file mode 100644
index 000000000..b18217a8d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/intrinsics.h
@@ -0,0 +1,620 @@
+
+
+/* DO NOT EDIT: This file is automatically generated by CGEN.
+   Any changes you make will be discarded when it is next regenerated.
+*/
+
+/* GCC defines these internally, as follows... 
+#if __MEP_CONFIG_CP_DATA_BUS_WIDTH == 64
+  typedef long long cp_data_bus_int;
+#else
+  typedef long cp_data_bus_int;
+#endif
+typedef          char  cp_v8qi  __attribute__((vector_size(8)));
+typedef unsigned char  cp_v8uqi __attribute__((vector_size(8)));
+typedef          short cp_v4hi  __attribute__((vector_size(8)));
+typedef unsigned short cp_v4uhi __attribute__((vector_size(8)));
+typedef          int   cp_v2si  __attribute__((vector_size(8)));
+typedef unsigned int   cp_v2usi __attribute__((vector_size(8)));
+*/
+
+
+// default
+void mep_cpfmadila1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmadiua1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmadia1_b (cp_v8qi, cp_v8qi, long, long); // volatile
+void mep_cpfmadia1u_b (cp_v8uqi, cp_v8uqi, long, long); // volatile
+void mep_cpfmulila1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmuliua1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmulia1_b (cp_v8qi, cp_v8qi, long, long); // volatile
+void mep_cpfmulia1u_b (cp_v8uqi, cp_v8uqi, long, long); // volatile
+void mep_cpamadila1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamadiua1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamadia1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpamadia1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpamulila1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamuliua1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamulia1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpamulia1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmadila1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadiua1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadia1s1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmadia1s1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmulila1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmuliua1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmulia1s1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmulia1s1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmadila1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadiua1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadia1s0_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmadia1s0u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmulila1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmuliua1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmulia1s0_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmulia1s0u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpacswp ();                    // volatile
+void mep_cpaccpa1 ();                   // volatile
+void mep_cpacsuma1 ();                  // volatile
+void mep_c1nop ();                      // volatile
+void mep_cpfacla0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfacua0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfaca0s1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfaca0s1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpfsftbla0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftbua0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftba0s1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfsftba0s1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpfacla0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfacua0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfaca0s0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfaca0s0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpfsftbla0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftbua0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftba0s0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfsftba0s0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsllia0 (long);               // volatile
+void mep_cpsraia0 (long);               // volatile
+void mep_cpsrlia0 (long);               // volatile
+void mep_cpslla0 (cp_data_bus_int);     // volatile
+void mep_cpsraa0 (cp_data_bus_int);     // volatile
+void mep_cpsrla0 (cp_data_bus_int);     // volatile
+void mep_cpaccpa0 ();                   // volatile
+void mep_cpacsuma0 ();                  // volatile
+cp_v2si mep_cpmovhla0_w ();             // volatile
+cp_v2si mep_cpmovhua0_w ();             // volatile
+cp_v2si mep_cppackla0_w ();             // volatile
+cp_v2si mep_cppackua0_w ();             // volatile
+cp_v4hi mep_cppackla0_h ();             // volatile
+cp_v4hi mep_cppackua0_h ();             // volatile
+cp_v8qi mep_cppacka0_b ();              // volatile
+cp_v8uqi mep_cppacka0u_b ();            // volatile
+cp_v2si mep_cpmovlla0_w ();             // volatile
+cp_v2si mep_cpmovlua0_w ();             // volatile
+cp_v2si mep_cpmovula0_w ();             // volatile
+cp_v2si mep_cpmovuua0_w ();             // volatile
+cp_v4hi mep_cpmovla0_h ();              // volatile
+cp_v4hi mep_cpmovua0_h ();              // volatile
+cp_v8qi mep_cpmova0_b ();               // volatile
+void mep_cpsetla0_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsetua0_w (cp_v2si, cp_v2si); // volatile
+void mep_cpseta0_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpsadla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsadua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsada0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsada0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpabsla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsa0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpabsa0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubacla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubacua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubaca0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpsubaca0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsuba0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsuba0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddacla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddacua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddaca0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpaddaca0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpadda0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpadda0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_c0nop ();                      // volatile
+void mep_cpsmsbslla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbslua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbslla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmsbslua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadslla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadslua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadslla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadslua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmulslla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulslua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulslla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmulslua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmsbla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmsbua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmsbla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmsbua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmsbla1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmsbua1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmsbla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmsbua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmadla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmadua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmadla1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmadua1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmadla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmadua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmada1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpmada1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpmulla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulla1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmulua1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmulla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmulua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmula1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpmula1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpssda1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpssda1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpssqa1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpssqa1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsllia1 (long);               // volatile
+void mep_cpsraia1 (long);               // volatile
+void mep_cpsrlia1 (long);               // volatile
+void mep_cpslla1 (cp_data_bus_int);     // volatile
+void mep_cpsraa1 (cp_data_bus_int);     // volatile
+void mep_cpsrla1 (cp_data_bus_int);     // volatile
+cp_v2si mep_cpmovhla1_w ();             // volatile
+cp_v2si mep_cpmovhua1_w ();             // volatile
+cp_v2si mep_cppackla1_w ();             // volatile
+cp_v2si mep_cppackua1_w ();             // volatile
+cp_v4hi mep_cppackla1_h ();             // volatile
+cp_v4hi mep_cppackua1_h ();             // volatile
+cp_v8qi mep_cppacka1_b ();              // volatile
+cp_v8uqi mep_cppacka1u_b ();            // volatile
+cp_v2si mep_cpmovlla1_w ();             // volatile
+cp_v2si mep_cpmovlua1_w ();             // volatile
+cp_v2si mep_cpmovula1_w ();             // volatile
+cp_v2si mep_cpmovuua1_w ();             // volatile
+cp_v4hi mep_cpmovla1_h ();              // volatile
+cp_v4hi mep_cpmovua1_h ();              // volatile
+cp_v8qi mep_cpmova1_b ();               // volatile
+void mep_cpsetla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsetua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpseta1_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpsadla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsadua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsada1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsada1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpabsla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsa1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpabsa1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubacla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubacua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubaca1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpsubaca1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsuba1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsuba1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddacla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddacua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddaca1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpaddaca1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpadda1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpadda1u_b (cp_v8uqi, cp_v8uqi); // volatile
+cp_data_bus_int mep_cdmovi (long);
+cp_data_bus_int mep_cdmoviu (long);
+cp_v2si mep_cpmovi_w (long);
+cp_v2usi mep_cpmoviu_w (long);
+cp_v4hi mep_cpmovi_h (long);
+cp_v4uhi mep_cpmoviu_h (long);
+cp_v8qi mep_cpmovi_b (long);
+cp_data_bus_int mep_cdclipi3 (cp_data_bus_int, long);
+cp_data_bus_int mep_cdclipiu3 (cp_data_bus_int, long);
+cp_v2si mep_cpclipi3_w (cp_v2si, long);
+cp_v2si mep_cpclipiu3_w (cp_v2si, long);
+cp_v2si mep_cpslai3_w (cp_v2si, long);  // volatile
+cp_v4hi mep_cpslai3_h (cp_v4hi, long);  // volatile
+cp_data_bus_int mep_cdslli3 (cp_data_bus_int, long);
+cp_v2si mep_cpslli3_w (cp_v2si, long);
+cp_v4hi mep_cpslli3_h (cp_v4hi, long);
+cp_v8qi mep_cpslli3_b (cp_v8qi, long);
+cp_data_bus_int mep_cdsrai3 (cp_data_bus_int, long);
+cp_v2si mep_cpsrai3_w (cp_v2si, long);
+cp_v4hi mep_cpsrai3_h (cp_v4hi, long);
+cp_v8qi mep_cpsrai3_b (cp_v8qi, long);
+cp_data_bus_int mep_cdsrli3 (cp_data_bus_int, long);
+cp_v2si mep_cpsrli3_w (cp_v2si, long);
+cp_v4hi mep_cpsrli3_h (cp_v4hi, long);
+cp_v8qi mep_cpsrli3_b (cp_v8qi, long);
+void mep_cpocmpge_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpgeu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpocmpge_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpge_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpocmpgeu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpocmpgt_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpgtu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpocmpgt_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpgt_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpocmpgtu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpocmpne_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpne_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpne_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpocmpeq_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpeq_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpeq_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpge_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpgeu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpacmpge_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpge_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpgeu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpacmpgt_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpgtu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpacmpgt_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpgt_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpgtu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpacmpne_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpne_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpne_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpeq_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpeq_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpeq_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpcmpge_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpgeu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpcmpge_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpge_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpgeu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpcmpgt_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpgtu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpcmpgt_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpgt_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpgtu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpcmpne_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpne_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpne_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpeq_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpeq_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpeq_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpeqz_b (cp_v8qi, cp_v8qi); // volatile
+cp_data_bus_int mep_cdcastw (cp_data_bus_int);
+cp_data_bus_int mep_cdcastuw (cp_data_bus_int);
+cp_v2si mep_cpcasth_w (cp_v2si);
+cp_v2si mep_cpcastuh_w (cp_v2si);
+cp_v2si mep_cpcastb_w (cp_v2si);
+cp_v2si mep_cpcastub_w (cp_v2si);
+cp_v4hi mep_cpcastb_h (cp_v4hi);
+cp_v4hi mep_cpcastub_h (cp_v4hi);
+cp_v4hi mep_cpextl_h (cp_v4hi);
+cp_v4uhi mep_cpextlu_h (cp_v4uhi);
+cp_v8qi mep_cpextl_b (cp_v8qi);
+cp_v8uqi mep_cpextlu_b (cp_v8uqi);
+cp_v4uhi mep_cpextu_h (cp_v4uhi);
+cp_v4uhi mep_cpextuu_h (cp_v4uhi);
+cp_v8uqi mep_cpextu_b (cp_v8uqi);
+cp_v8uqi mep_cpextuu_b (cp_v8uqi);
+cp_v2si mep_cpbcast_w (cp_v2si);
+cp_v4hi mep_cpbcast_h (cp_v4hi);
+cp_v8qi mep_cpbcast_b (cp_v8qi);
+void mep_cpccadd_b (cp_v8qi*);          // volatile
+cp_v2si mep_cphadd_w (cp_v2si);
+cp_v4hi mep_cphadd_h (cp_v4hi);
+cp_v8qi mep_cphadd_b (cp_v8qi);
+cp_v8uqi mep_cphaddu_b (cp_v8uqi);
+cp_v2si mep_cpnorm_w (cp_v2si);
+cp_v4hi mep_cpnorm_h (cp_v4hi);
+cp_v2si mep_cpldz_w (cp_v2si);
+cp_v4hi mep_cpldz_h (cp_v4hi);
+cp_v2si mep_cpabsz_w (cp_v2si);
+cp_v4hi mep_cpabsz_h (cp_v4hi);
+cp_v8qi mep_cpabsz_b (cp_v8qi);
+void mep_cpmovtocc (cp_data_bus_int);   // volatile
+void mep_cpmovtocsar1 (cp_data_bus_int); // volatile
+void mep_cpmovtocsar0 (cp_data_bus_int); // volatile
+cp_data_bus_int mep_cpmovfrcc ();       // volatile
+cp_data_bus_int mep_cpmovfrcsar1 ();    // volatile
+cp_data_bus_int mep_cpmovfrcsar0 ();    // volatile
+cp_v2si mep_cpmin3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpminu3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpmin3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpmin3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpminu3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpmax3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpmaxu3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpmax3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpmax3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpmaxu3_b (cp_v8qi, cp_v8qi);
+cp_v4hi mep_cpabs3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpabs3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpabsu3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpaddsr3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpaddsr3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpaddsr3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpaddsru3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpave3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpave3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpave3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpaveu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextlsub3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextlsubu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextusub3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextusubu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextladd3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextladdu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextuadd3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextuaddu3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpssub3_w (cp_v2si, cp_v2si); // volatile
+cp_v4hi mep_cpssub3_h (cp_v4hi, cp_v4hi); // volatile
+cp_v2si mep_cpsadd3_w (cp_v2si, cp_v2si); // volatile
+cp_v4hi mep_cpsadd3_h (cp_v4hi, cp_v4hi); // volatile
+cp_v2si mep_cpsla3_w (cp_v2si, cp_v2si); // volatile
+cp_v4hi mep_cpsla3_h (cp_v4hi, cp_v4hi); // volatile
+cp_data_bus_int mep_cdsll3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpssll3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpsll3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpssll3_h (cp_v4hi, cp_v4hi);
+cp_v4hi mep_cpsll3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpssll3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpsll3_b (cp_v8qi, cp_v8qi);
+cp_data_bus_int mep_cdsra3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpssra3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpsra3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpssra3_h (cp_v4hi, cp_v4hi);
+cp_v4hi mep_cpsra3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpssra3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpsra3_b (cp_v8qi, cp_v8qi);
+cp_data_bus_int mep_cdsrl3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpssrl3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpsrl3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpssrl3_h (cp_v4hi, cp_v4hi);
+cp_v4hi mep_cpsrl3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpssrl3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpsrl3_b (cp_v8qi, cp_v8qi);
+cp_v4hi mep_cppack_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cppack_b (cp_v8qi, cp_v8qi);
+cp_v8uqi mep_cppacku_b (cp_v8uqi, cp_v8uqi);
+cp_v2si mep_cpunpackl_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpunpackl_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpunpackl_b (cp_v8qi, cp_v8qi);
+cp_v2usi mep_cpunpacku_w (cp_v2usi, cp_v2usi);
+cp_v4uhi mep_cpunpacku_h (cp_v4uhi, cp_v4uhi);
+cp_v8uqi mep_cpunpacku_b (cp_v8uqi, cp_v8uqi);
+cp_data_bus_int mep_cpfsftbs1 (cp_data_bus_int, cp_data_bus_int); // volatile
+cp_data_bus_int mep_cpfsftbs0 (cp_data_bus_int, cp_data_bus_int); // volatile
+cp_data_bus_int mep_cpfsftbi (cp_data_bus_int, cp_data_bus_int, long);
+cp_data_bus_int mep_cpsel (cp_data_bus_int, cp_data_bus_int); // volatile
+cp_vector mep_cpxor3 (cp_vector, cp_vector);
+cp_vector mep_cpnor3 (cp_vector, cp_vector);
+cp_vector mep_cpor3 (cp_vector, cp_vector);
+cp_vector mep_cpand3 (cp_vector, cp_vector);
+cp_data_bus_int mep_cdsub3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpsub3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpsub3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpsub3_b (cp_v8qi, cp_v8qi);
+cp_data_bus_int mep_cdadd3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpadd3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpadd3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpadd3_b (cp_v8qi, cp_v8qi);
+void mep_bsrv (void *);
+void mep_jsrv (long);
+void mep_synccp ();                     // volatile
+void mep_bcpaf (long, void *);
+void mep_bcpat (long, void *);
+void mep_bcpne (long, void *);
+void mep_bcpeq (long, void *);
+void mep_lmcpm1 (cp_data_bus_int*, long **, long);
+void mep_smcpm1 (cp_data_bus_int, long **, long);
+void mep_lwcpm1 (cp_data_bus_int*, long **, long);
+void mep_swcpm1 (cp_data_bus_int, long **, long);
+void mep_lhcpm1 (cp_data_bus_int*, long **, long);
+void mep_shcpm1 (cp_data_bus_int, long **, long);
+void mep_lbcpm1 (cp_data_bus_int*, long **, long);
+void mep_sbcpm1 (cp_data_bus_int, long **, long);
+void mep_lmcpm0 (cp_data_bus_int*, long **, long);
+void mep_smcpm0 (cp_data_bus_int, long **, long);
+void mep_lwcpm0 (cp_data_bus_int*, long **, long);
+void mep_swcpm0 (cp_data_bus_int, long **, long);
+void mep_lhcpm0 (cp_data_bus_int*, long **, long);
+void mep_shcpm0 (cp_data_bus_int, long **, long);
+void mep_lbcpm0 (cp_data_bus_int*, long **, long);
+void mep_sbcpm0 (cp_data_bus_int, long **, long);
+void mep_lmcpa (cp_data_bus_int*, long **, long);
+void mep_smcpa (cp_data_bus_int, long **, long);
+void mep_lwcpa (cp_data_bus_int*, long **, long);
+void mep_swcpa (cp_data_bus_int, long **, long);
+void mep_lhcpa (cp_data_bus_int*, long **, long);
+void mep_shcpa (cp_data_bus_int, long **, long);
+void mep_lbcpa (cp_data_bus_int*, long **, long);
+void mep_sbcpa (cp_data_bus_int, long **, long);
+void mep_lmcp16 (cp_data_bus_int*, long, long *);
+void mep_smcp16 (cp_data_bus_int, long, long *); // volatile
+void mep_lwcp16 (cp_data_bus_int*, long, long *);
+void mep_swcp16 (cp_data_bus_int, long, long *);
+void mep_lmcpi (cp_data_bus_int*, long **);
+void mep_smcpi (cp_data_bus_int, long **);
+void mep_lwcpi (cp_data_bus_int*, long **);
+void mep_swcpi (cp_data_bus_int, long **);
+void mep_lmcp (cp_data_bus_int*, long *);
+void mep_smcp (cp_data_bus_int, long *); // volatile
+void mep_lwcp (cp_data_bus_int*, long *);
+void mep_swcp (cp_data_bus_int, long *);
+void mep_ssubu (long*, long);
+void mep_saddu (long*, long);
+void mep_ssub (long*, long);
+void mep_sadd (long*, long);
+void mep_clipu (long*, long);
+void mep_clip (long*, long);
+void mep_maxu (long*, long);
+void mep_minu (long*, long);
+void mep_max (long*, long);
+void mep_min (long*, long);
+void mep_ave (long*, long);
+void mep_abs (long*, long);
+void mep_ldz (long*, long);
+void mep_dbreak ();                     // volatile
+void mep_dret ();
+void mep_divu (long, long);
+void mep_div (long, long);
+void mep_maddru (long*, long);
+void mep_maddr (long*, long);
+void mep_maddu (long, long);
+void mep_madd (long, long);
+void mep_mulru (long*, long);
+void mep_mulr (long*, long);
+void mep_mulu (long, long);
+void mep_mul (long, long);
+void mep_cache (long, long *);          // volatile
+void mep_tas (long*, long *);
+void mep_btstm (long*, long *, long);
+void mep_bnotm (long *, long);
+void mep_bclrm (long *, long);
+void mep_bsetm (long *, long);
+void mep_ldcb (long*, long);            // volatile
+void mep_stcb (long, long);             // volatile
+void mep_syncm ();                      // volatile
+void mep_break ();                      // volatile
+void mep_swi (long);                    // volatile
+void mep_sleep ();                      // volatile
+void mep_halt ();                       // volatile
+void mep_reti ();
+void mep_ei ();                         // volatile
+void mep_di ();                         // volatile
+void mep_ldc (long*, long);             // volatile
+void mep_ldc_lo (long*);
+void mep_ldc_hi (long*);
+void mep_ldc_lp (long*);
+void mep_stc (long, long);              // volatile
+void mep_stc_lo (long);
+void mep_stc_hi (long);
+void mep_stc_lp (long);
+void mep_erepeat (void *);
+void mep_repeat (long, void *);
+void mep_ret ();
+void mep_jsr (long);
+void mep_jmp24 (void *);
+void mep_jmp (long);
+void mep_bsr24 (void *);
+void mep_bsr12 (void *);
+void mep_bne (long, long, void *);
+void mep_beq (long, long, void *);
+void mep_bgei (long, long, void *);
+void mep_blti (long, long, void *);
+void mep_bnei (long, long, void *);
+void mep_beqi (long, long, void *);
+void mep_bnez (long, void *);
+void mep_beqz (long, void *);
+void mep_bra (void *);
+void mep_fsft (long*, long);            // volatile
+void mep_sll3 (long*, long, long);
+void mep_slli (long*, long);
+void mep_srli (long*, long);
+void mep_srai (long*, long);
+void mep_sll (long*, long);
+void mep_srl (long*, long);
+void mep_sra (long*, long);
+void mep_xor3 (long*, long, long);
+void mep_and3 (long*, long, long);
+void mep_or3 (long*, long, long);
+void mep_nor (long*, long);
+void mep_xor (long*, long);
+void mep_and (long*, long);
+void mep_or (long*, long);
+void mep_sltu3x (long*, long, long);
+void mep_slt3x (long*, long, long);
+void mep_add3x (long*, long, long);
+void mep_sl2ad3 (long*, long, long);
+void mep_sl1ad3 (long*, long, long);
+void mep_sltu3i (long*, long, long);
+void mep_slt3i (long*, long, long);
+void mep_sltu3 (long*, long, long);
+void mep_slt3 (long*, long, long);
+void mep_neg (long*, long);
+void mep_sbvck3 (long*, long, long);
+void mep_sub (long*, long);
+void mep_advck3 (long*, long, long);
+void mep_add3i (long*, long);
+void mep_add (long*, long);
+void mep_add3 (long*, long, long);
+void mep_movh (long*, long);
+void mep_movu16 (long*, long);
+void mep_movu24 (long*, long);
+void mep_movi16 (long*, long);
+void mep_movi8 (long*, long);
+void mep_mov (long*, long);
+void mep_ssarb (long, long);            // volatile
+void mep_extuh (long*);
+void mep_extub (long*);
+void mep_exth (long*);
+void mep_extb (long*);
+void mep_lw24 (long*, long);
+void mep_sw24 (long, long);
+void mep_lhu16 (long*, long, long *);
+void mep_lbu16 (long*, long, long *);
+void mep_lw16 (long*, long, long *);
+void mep_lh16 (long*, long, long *);
+void mep_lb16 (long*, long, long *);
+void mep_sw16 (long, long, long *);
+void mep_sh16 (long, long, long *);
+void mep_sb16 (long, long, long *);
+void mep_lhu_tp (long*, long);
+void mep_lbu_tp (long*, long);
+void mep_lw_tp (long*, long);
+void mep_lh_tp (long*, long);
+void mep_lb_tp (long*, long);
+void mep_sw_tp (long, long);
+void mep_sh_tp (long, long);
+void mep_sb_tp (long, long);
+void mep_lw_sp (long*, long);
+void mep_sw_sp (long, long);
+void mep_lhu (long*, long *);
+void mep_lbu (long*, long *);
+void mep_lw (long*, long *);
+void mep_lh (long*, long *);
+void mep_lb (long*, long *);
+void mep_sw (long, long *);
+void mep_sh (long, long *);
+void mep_sb (long, long *);
+void mep_dsp1 (long*, long);            // volatile
+void mep_dsp0 (long);                   // volatile
+void mep_dsp (long*, long, long);       // volatile
+void mep_uci (long*, long, long);       // volatile
+void mep_lhucpm1 (cp_data_bus_int*, long **, long);
+void mep_lbucpm1 (cp_data_bus_int*, long **, long);
+void mep_lhucpm0 (cp_data_bus_int*, long **, long);
+void mep_lbucpm0 (cp_data_bus_int*, long **, long);
+void mep_lhucpa (cp_data_bus_int*, long **, long);
+void mep_lbucpa (cp_data_bus_int*, long **, long);
+void mep_lhucp (cp_data_bus_int*, long, long *);
+void mep_lhcp (cp_data_bus_int*, long, long *);
+void mep_shcp (cp_data_bus_int, long, long *);
+void mep_lbucp (cp_data_bus_int*, long, long *);
+void mep_lbcp (cp_data_bus_int*, long, long *);
+void mep_sbcp (cp_data_bus_int, long, long *);
+void mep_casw3 (long*, long, long);     // volatile
+void mep_cash3 (long*, long, long);     // volatile
+void mep_casb3 (long*, long, long);     // volatile
+void mep_prefd (long, long, long *);    // volatile
+void mep_pref (long, long *);           // volatile
+void mep_ldcb_r (long*, long *);        // volatile
+void mep_stcb_r (long, long *);         // volatile
+void mep_cmovh2 (long*, cp_data_bus_int);
+void mep_cmovh1 (cp_data_bus_int*, long);
+void mep_cmovc2 (long*, long);          // volatile
+void mep_cmovc1 (long, long);           // volatile
+void mep_cmov2 (long*, cp_data_bus_int);
+void mep_cmov1 (cp_data_bus_int*, long);
+cp_data_bus_int mep_cpmov (cp_data_bus_int);
diff --git a/gcc-4.9/gcc/config/mep/intrinsics.md b/gcc-4.9/gcc/config/mep/intrinsics.md
new file mode 100644
index 000000000..44343d3b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/intrinsics.md
@@ -0,0 +1,21568 @@
+
+
+;; DO NOT EDIT: This file is automatically generated by CGEN.
+;; Any changes you make will be discarded when it is next regenerated.
+
+
+(define_predicate "cgen_h_sint_12a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -2048
+                   && INTVAL (op) < 2048")))
+
+(define_predicate "cgen_h_uint_20a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 1048576")))
+
+(define_predicate "cgen_h_uint_7a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 128")))
+
+(define_predicate "cgen_h_uint_6a2_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 1) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 128")))
+
+(define_predicate "cgen_h_uint_22a4_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 3) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 33554432")))
+
+(define_predicate "cgen_h_sint_2a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -2
+                   && INTVAL (op) < 2")))
+
+(define_predicate "cgen_h_uint_24a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 16777216")))
+
+(define_predicate "cgen_h_sint_6a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -32
+                   && INTVAL (op) < 32")))
+
+(define_predicate "cgen_h_uint_5a4_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 3) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 256")))
+
+(define_predicate "cgen_h_uint_2a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 4")))
+
+(define_predicate "cgen_h_sint_10a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -512
+                   && INTVAL (op) < 512")))
+
+(define_predicate "cgen_h_uint_4a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 16")))
+
+(define_predicate "cgen_h_uint_6a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 64")))
+
+(define_predicate "cgen_h_uint_16a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 65536")))
+
+(define_predicate "cgen_h_uint_8a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 256")))
+
+(define_predicate "cgen_h_sint_16a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -32768
+                   && INTVAL (op) < 32768")))
+
+(define_predicate "cgen_h_uint_5a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 32")))
+
+(define_predicate "cgen_h_sint_8a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -128
+                   && INTVAL (op) < 128")))
+
+(define_predicate "cgen_h_uint_3a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 8")))
+
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2198))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2200))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2202))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2204))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2206))]
+  "CGEN_ENABLE_INSN_P (0)"
+  "cpsmsbslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2198))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2200))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2202))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2204))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2206))]
+  "CGEN_ENABLE_INSN_P (1)"
+  "cpsmsbslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2208))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2210))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2212))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2214))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2216))]
+  "CGEN_ENABLE_INSN_P (2)"
+  "cpsmsbslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2208))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2210))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2212))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2214))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2216))]
+  "CGEN_ENABLE_INSN_P (3)"
+  "cpsmsbslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2218))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2220))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2222))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2224))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2226))]
+  "CGEN_ENABLE_INSN_P (4)"
+  "cpsmsbslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2218))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2220))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2222))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2224))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2226))]
+  "CGEN_ENABLE_INSN_P (5)"
+  "cpsmsbslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2228))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2230))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2232))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2234))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2236))]
+  "CGEN_ENABLE_INSN_P (6)"
+  "cpsmsbslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2228))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2230))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2232))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2234))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2236))]
+  "CGEN_ENABLE_INSN_P (7)"
+  "cpsmsbslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2238))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2240))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2242))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2244))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2246))]
+  "CGEN_ENABLE_INSN_P (8)"
+  "cpsmadslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2238))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2240))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2242))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2244))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2246))]
+  "CGEN_ENABLE_INSN_P (9)"
+  "cpsmadslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2248))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2250))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2252))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2254))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2256))]
+  "CGEN_ENABLE_INSN_P (10)"
+  "cpsmadslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2248))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2250))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2252))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2254))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2256))]
+  "CGEN_ENABLE_INSN_P (11)"
+  "cpsmadslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2258))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2260))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2262))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2264))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2266))]
+  "CGEN_ENABLE_INSN_P (12)"
+  "cpsmadslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2258))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2260))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2262))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2264))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2266))]
+  "CGEN_ENABLE_INSN_P (13)"
+  "cpsmadslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2268))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2270))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2272))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2274))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2276))]
+  "CGEN_ENABLE_INSN_P (14)"
+  "cpsmadslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2268))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2270))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2272))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2274))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2276))]
+  "CGEN_ENABLE_INSN_P (15)"
+  "cpsmadslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2278))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2280))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2282))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2284))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2286))]
+  "CGEN_ENABLE_INSN_P (16)"
+  "cpmulslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2278))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2280))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2282))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2284))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2286))]
+  "CGEN_ENABLE_INSN_P (17)"
+  "cpmulslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2288))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2290))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2292))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2294))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2296))]
+  "CGEN_ENABLE_INSN_P (18)"
+  "cpmulslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2288))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2290))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2292))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2294))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2296))]
+  "CGEN_ENABLE_INSN_P (19)"
+  "cpmulslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2298))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2300))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2302))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2304))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2306))]
+  "CGEN_ENABLE_INSN_P (20)"
+  "cpmulslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2298))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2300))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2302))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2304))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2306))]
+  "CGEN_ENABLE_INSN_P (21)"
+  "cpmulslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2308))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2310))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2312))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2314))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2316))]
+  "CGEN_ENABLE_INSN_P (22)"
+  "cpmulslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2308))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2310))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2312))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2314))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2316))]
+  "CGEN_ENABLE_INSN_P (23)"
+  "cpmulslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2318))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2320))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2322))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2324))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2326))]
+  "CGEN_ENABLE_INSN_P (24)"
+  "cpsmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2318))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2320))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2322))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2324))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2326))]
+  "CGEN_ENABLE_INSN_P (25)"
+  "cpsmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2328))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2330))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2332))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2334))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2336))]
+  "CGEN_ENABLE_INSN_P (26)"
+  "cpsmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2328))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2330))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2332))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2334))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2336))]
+  "CGEN_ENABLE_INSN_P (27)"
+  "cpsmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2338))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2340))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2342))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2344))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2346))]
+  "CGEN_ENABLE_INSN_P (28)"
+  "cpsmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2338))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2340))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2342))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2344))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2346))]
+  "CGEN_ENABLE_INSN_P (29)"
+  "cpsmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2348))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2350))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2352))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2354))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2356))]
+  "CGEN_ENABLE_INSN_P (30)"
+  "cpsmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2348))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2350))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2352))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2354))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2356))]
+  "CGEN_ENABLE_INSN_P (31)"
+  "cpsmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2358))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2360))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2362))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2364))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2366))]
+  "CGEN_ENABLE_INSN_P (32)"
+  "cpsmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2358))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2360))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2362))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2364))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2366))]
+  "CGEN_ENABLE_INSN_P (33)"
+  "cpsmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2368))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2370))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2372))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2374))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2376))]
+  "CGEN_ENABLE_INSN_P (34)"
+  "cpsmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2368))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2370))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2372))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2374))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2376))]
+  "CGEN_ENABLE_INSN_P (35)"
+  "cpsmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2378))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2380))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2382))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2384))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2386))]
+  "CGEN_ENABLE_INSN_P (36)"
+  "cpsmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2378))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2380))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2382))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2384))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2386))]
+  "CGEN_ENABLE_INSN_P (37)"
+  "cpsmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2388))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2390))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2392))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2394))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2396))]
+  "CGEN_ENABLE_INSN_P (38)"
+  "cpsmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2388))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2390))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2392))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2394))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2396))]
+  "CGEN_ENABLE_INSN_P (39)"
+  "cpsmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2398))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2400))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2402))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2404))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2406))]
+  "CGEN_ENABLE_INSN_P (40)"
+  "cpmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2398))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2400))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2402))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2404))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2406))]
+  "CGEN_ENABLE_INSN_P (41)"
+  "cpmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2408))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2410))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2412))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2414))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2416))]
+  "CGEN_ENABLE_INSN_P (42)"
+  "cpmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2408))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2410))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2412))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2414))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2416))]
+  "CGEN_ENABLE_INSN_P (43)"
+  "cpmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2418))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2420))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2422))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2424))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2426))]
+  "CGEN_ENABLE_INSN_P (44)"
+  "cpmsbla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2418))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2420))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2422))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2424))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2426))]
+  "CGEN_ENABLE_INSN_P (45)"
+  "cpmsbla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2428))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2430))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2432))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2434))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2436))]
+  "CGEN_ENABLE_INSN_P (46)"
+  "cpmsbua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2428))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2430))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2432))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2434))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2436))]
+  "CGEN_ENABLE_INSN_P (47)"
+  "cpmsbua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2438))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2440))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2442))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2444))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2446))]
+  "CGEN_ENABLE_INSN_P (48)"
+  "cpmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2438))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2440))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2442))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2444))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2446))]
+  "CGEN_ENABLE_INSN_P (49)"
+  "cpmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2448))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2450))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2452))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2454))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2456))]
+  "CGEN_ENABLE_INSN_P (50)"
+  "cpmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2448))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2450))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2452))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2454))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2456))]
+  "CGEN_ENABLE_INSN_P (51)"
+  "cpmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2458))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2460))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2462))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2464))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2466))]
+  "CGEN_ENABLE_INSN_P (52)"
+  "cpmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2458))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2460))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2462))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2464))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2466))]
+  "CGEN_ENABLE_INSN_P (53)"
+  "cpmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2468))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2470))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2472))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2474))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2476))]
+  "CGEN_ENABLE_INSN_P (54)"
+  "cpmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2468))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2470))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2472))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2474))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2476))]
+  "CGEN_ENABLE_INSN_P (55)"
+  "cpmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2478))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2480))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2482))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2484))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2486))]
+  "CGEN_ENABLE_INSN_P (56)"
+  "cpmadla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2478))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2480))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2482))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2484))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2486))]
+  "CGEN_ENABLE_INSN_P (57)"
+  "cpmadla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2488))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2490))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2492))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2494))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2496))]
+  "CGEN_ENABLE_INSN_P (58)"
+  "cpmadua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2488))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2490))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2492))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2494))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2496))]
+  "CGEN_ENABLE_INSN_P (59)"
+  "cpmadua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2498))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2500))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2502))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2504))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2506))]
+  "CGEN_ENABLE_INSN_P (60)"
+  "cpmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2498))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2500))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2502))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2504))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2506))]
+  "CGEN_ENABLE_INSN_P (61)"
+  "cpmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2508))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2510))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2512))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2514))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2516))]
+  "CGEN_ENABLE_INSN_P (62)"
+  "cpmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2508))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2510))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2512))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2514))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2516))]
+  "CGEN_ENABLE_INSN_P (63)"
+  "cpmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2518))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2520))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2522))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2524))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2526))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2528))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2530))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2532))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2534))]
+  "CGEN_ENABLE_INSN_P (64)"
+  "cpmada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2518))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2520))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2522))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2524))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2526))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2528))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2530))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2532))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2534))]
+  "CGEN_ENABLE_INSN_P (65)"
+  "cpmada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2536))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2538))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2540))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2542))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2544))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2546))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2548))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2550))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2552))]
+  "CGEN_ENABLE_INSN_P (66)"
+  "cpmada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2536))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2538))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2540))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2542))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2544))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2546))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2548))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2550))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2552))]
+  "CGEN_ENABLE_INSN_P (67)"
+  "cpmada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_w_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2554))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2556))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2558))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2560))]
+  "CGEN_ENABLE_INSN_P (68)"
+  "cpmulla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_w_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2554))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2556))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2558))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2560))]
+  "CGEN_ENABLE_INSN_P (69)"
+  "cpmulla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_w_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2562))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2564))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2566))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2568))]
+  "CGEN_ENABLE_INSN_P (70)"
+  "cpmulua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_w_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2562))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2564))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2566))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2568))]
+  "CGEN_ENABLE_INSN_P (71)"
+  "cpmulua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1u_w_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2570))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2572))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2574))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2576))]
+  "CGEN_ENABLE_INSN_P (72)"
+  "cpmulla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1u_w_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2570))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2572))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2574))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2576))]
+  "CGEN_ENABLE_INSN_P (73)"
+  "cpmulla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1u_w_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2578))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2580))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2582))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2584))]
+  "CGEN_ENABLE_INSN_P (74)"
+  "cpmulua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1u_w_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2578))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2580))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2582))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2584))]
+  "CGEN_ENABLE_INSN_P (75)"
+  "cpmulua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2586))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2588))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2590))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2592))]
+  "CGEN_ENABLE_INSN_P (76)"
+  "cpmulla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2586))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2588))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2590))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2592))]
+  "CGEN_ENABLE_INSN_P (77)"
+  "cpmulla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2594))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2596))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2598))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2600))]
+  "CGEN_ENABLE_INSN_P (78)"
+  "cpmulua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2594))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2596))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2598))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2600))]
+  "CGEN_ENABLE_INSN_P (79)"
+  "cpmulua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2602))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2604))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2606))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2608))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2610))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2612))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2614))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2616))]
+  "CGEN_ENABLE_INSN_P (80)"
+  "cpmula1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2602))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2604))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2606))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2608))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2610))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2612))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2614))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2616))]
+  "CGEN_ENABLE_INSN_P (81)"
+  "cpmula1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2618))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2620))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2622))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2624))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2626))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2628))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2630))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2632))]
+  "CGEN_ENABLE_INSN_P (82)"
+  "cpmula1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2618))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2620))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2622))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2624))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2626))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2628))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2630))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2632))]
+  "CGEN_ENABLE_INSN_P (83)"
+  "cpmula1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2634))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2636))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2638))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2640))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2642))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2644))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2646))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2648))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2650))]
+  "CGEN_ENABLE_INSN_P (84)"
+  "cpssda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2634))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2636))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2638))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2640))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2642))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2644))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2646))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2648))]
+  "CGEN_ENABLE_INSN_P (85)"
+  "cpssda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2650))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2652))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2654))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2656))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2658))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2660))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2662))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2664))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2666))]
+  "CGEN_ENABLE_INSN_P (86)"
+  "cpssda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2650))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2652))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2654))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2656))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2658))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2660))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2662))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2664))]
+  "CGEN_ENABLE_INSN_P (87)"
+  "cpssda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2666))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2668))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2670))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2672))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2674))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2676))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2678))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2680))]
+  "CGEN_ENABLE_INSN_P (88)"
+  "cpssqa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2666))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2668))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2670))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2672))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2674))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2676))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2678))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2680))]
+  "CGEN_ENABLE_INSN_P (89)"
+  "cpssqa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2682))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2684))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2686))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2688))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2690))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2692))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2694))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2696))]
+  "CGEN_ENABLE_INSN_P (90)"
+  "cpssqa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2682))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2684))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2686))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2688))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2690))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2692))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2694))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2696))]
+  "CGEN_ENABLE_INSN_P (91)"
+  "cpssqa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadila1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1000))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1002))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1004))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1006))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1008))]
+  "CGEN_ENABLE_INSN_P (92)"
+  "cpfmadila1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadiua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1010))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1012))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1014))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1016))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1018))]
+  "CGEN_ENABLE_INSN_P (93)"
+  "cpfmadiua1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1020))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1022))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1024))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1026))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1028))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1030))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1032))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1034))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1036))]
+  "CGEN_ENABLE_INSN_P (94)"
+  "cpfmadia1.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1038))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1040))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1042))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1044))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1046))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1048))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1050))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1052))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1054))]
+  "CGEN_ENABLE_INSN_P (95)"
+  "cpfmadia1u.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulila1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1056))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1058))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1060))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1062))]
+  "CGEN_ENABLE_INSN_P (96)"
+  "cpfmulila1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmuliua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1064))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1066))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1068))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1070))]
+  "CGEN_ENABLE_INSN_P (97)"
+  "cpfmuliua1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1072))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1074))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1076))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1078))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1080))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1082))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1084))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1086))]
+  "CGEN_ENABLE_INSN_P (98)"
+  "cpfmulia1.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1088))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1090))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1092))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1094))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1096))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1098))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1100))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1102))]
+  "CGEN_ENABLE_INSN_P (99)"
+  "cpfmulia1u.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadila1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1104))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1106))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1108))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1110))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1112))]
+  "CGEN_ENABLE_INSN_P (100)"
+  "cpamadila1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadiua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1114))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1116))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1118))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1120))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1122))]
+  "CGEN_ENABLE_INSN_P (101)"
+  "cpamadiua1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadia1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1124))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1126))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1128))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1130))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1132))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1134))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1136))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1138))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1140))]
+  "CGEN_ENABLE_INSN_P (102)"
+  "cpamadia1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadia1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1142))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1144))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1146))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1148))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1150))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1152))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1154))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1156))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1158))]
+  "CGEN_ENABLE_INSN_P (103)"
+  "cpamadia1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamulila1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1160))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1162))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1164))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1166))]
+  "CGEN_ENABLE_INSN_P (104)"
+  "cpamulila1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamuliua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1168))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1170))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1172))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1174))]
+  "CGEN_ENABLE_INSN_P (105)"
+  "cpamuliua1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamulia1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1176))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1178))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1180))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1182))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1184))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1186))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1188))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1190))]
+  "CGEN_ENABLE_INSN_P (106)"
+  "cpamulia1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamulia1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1192))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1194))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1196))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1198))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1200))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1202))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1204))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1206))]
+  "CGEN_ENABLE_INSN_P (107)"
+  "cpamulia1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadila1s1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1208))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1210))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1212))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1214))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1216))]
+  "CGEN_ENABLE_INSN_P (108)"
+  "cpfmadila1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadiua1s1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1218))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1220))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1222))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1224))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1226))]
+  "CGEN_ENABLE_INSN_P (109)"
+  "cpfmadiua1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1228))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1230))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1232))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1234))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1236))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1238))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1240))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1242))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1244))]
+  "CGEN_ENABLE_INSN_P (110)"
+  "cpfmadia1s1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1246))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1248))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1250))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1252))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1254))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1256))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1258))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1260))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1262))]
+  "CGEN_ENABLE_INSN_P (111)"
+  "cpfmadia1s1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulila1s1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1264))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1266))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1268))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1270))]
+  "CGEN_ENABLE_INSN_P (112)"
+  "cpfmulila1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmuliua1s1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1272))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1274))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1276))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1278))]
+  "CGEN_ENABLE_INSN_P (113)"
+  "cpfmuliua1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1280))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1282))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1284))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1286))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1288))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1290))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1292))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1294))]
+  "CGEN_ENABLE_INSN_P (114)"
+  "cpfmulia1s1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1296))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1298))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1300))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1302))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1304))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1306))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1308))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1310))]
+  "CGEN_ENABLE_INSN_P (115)"
+  "cpfmulia1s1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadila1s0_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1312))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1314))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1316))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1318))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1320))]
+  "CGEN_ENABLE_INSN_P (116)"
+  "cpfmadila1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadiua1s0_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1322))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1324))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1326))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1328))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1330))]
+  "CGEN_ENABLE_INSN_P (117)"
+  "cpfmadiua1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s0_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1332))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1334))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1336))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1338))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1340))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1342))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1344))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1346))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1348))]
+  "CGEN_ENABLE_INSN_P (118)"
+  "cpfmadia1s0.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s0u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1350))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1352))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1354))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1356))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1358))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1360))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1362))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1364))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1366))]
+  "CGEN_ENABLE_INSN_P (119)"
+  "cpfmadia1s0u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulila1s0_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1368))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1370))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1372))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1374))]
+  "CGEN_ENABLE_INSN_P (120)"
+  "cpfmulila1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmuliua1s0_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1376))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1378))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1380))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1382))]
+  "CGEN_ENABLE_INSN_P (121)"
+  "cpfmuliua1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s0_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1384))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1386))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1388))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1390))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1392))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1394))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1396))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1398))]
+  "CGEN_ENABLE_INSN_P (122)"
+  "cpfmulia1s0.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s0u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1400))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1402))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1404))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1406))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1408))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1410))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1412))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1414))]
+  "CGEN_ENABLE_INSN_P (123)"
+  "cpfmulia1s0u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsllia1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2698))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2700))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2702))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2704))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2706))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2708))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2710))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2712))]
+  "CGEN_ENABLE_INSN_P (124)"
+  "cpsllia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsllia1_1_p1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2698))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2700))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2702))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2704))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2706))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2708))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2710))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2712))]
+  "CGEN_ENABLE_INSN_P (125)"
+  "cpsllia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraia1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2714))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2716))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2718))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2720))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2722))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2724))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2726))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2728))]
+  "CGEN_ENABLE_INSN_P (126)"
+  "cpsraia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraia1_1_p1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2714))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2716))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2718))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2720))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2722))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2724))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2726))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2728))]
+  "CGEN_ENABLE_INSN_P (127)"
+  "cpsraia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrlia1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2730))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2732))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2734))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2736))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2738))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2740))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2742))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2744))]
+  "CGEN_ENABLE_INSN_P (128)"
+  "cpsrlia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrlia1_1_p1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2730))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2732))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2734))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2736))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2738))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2740))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2742))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2744))]
+  "CGEN_ENABLE_INSN_P (129)"
+  "cpsrlia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslla1_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2746))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2748))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2750))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2752))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2754))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2756))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2758))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2760))]
+  "CGEN_ENABLE_INSN_P (130)"
+  "cpslla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslla1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2746))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2748))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2750))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2752))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2754))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2756))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2758))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2760))]
+  "CGEN_ENABLE_INSN_P (131)"
+  "cpslla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraa1_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2762))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2764))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2766))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2768))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2770))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2772))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2774))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2776))]
+  "CGEN_ENABLE_INSN_P (132)"
+  "cpsraa1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraa1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2762))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2764))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2766))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2768))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2770))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2772))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2774))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2776))]
+  "CGEN_ENABLE_INSN_P (133)"
+  "cpsraa1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrla1_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2778))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2780))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2782))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2784))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2786))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2788))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2790))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2792))]
+  "CGEN_ENABLE_INSN_P (134)"
+  "cpsrla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrla1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2778))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2780))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2782))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2784))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2786))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2788))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2790))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2792))]
+  "CGEN_ENABLE_INSN_P (135)"
+  "cpsrla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacswp_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1416))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1418))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1420))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1422))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1424))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1426))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1428))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1430))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1432))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1434))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1436))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1438))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1440))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1442))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1444))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1446))]
+  "CGEN_ENABLE_INSN_P (136)"
+  "cpacswp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaccpa1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1448))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1450))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1452))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1454))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1456))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1458))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1460))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1462))]
+  "CGEN_ENABLE_INSN_P (137)"
+  "cpaccpa1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacsuma1_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1464))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1466))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1468))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1470))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1472))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1474))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1476))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1478))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1480))]
+  "CGEN_ENABLE_INSN_P (138)"
+  "cpacsuma1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhla1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2794))]
+  "CGEN_ENABLE_INSN_P (139)"
+  "cpmovhla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhla1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2794))]
+  "CGEN_ENABLE_INSN_P (140)"
+  "cpmovhla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2796))]
+  "CGEN_ENABLE_INSN_P (141)"
+  "cpmovhua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2796))]
+  "CGEN_ENABLE_INSN_P (142)"
+  "cpmovhua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2798))]
+  "CGEN_ENABLE_INSN_P (143)"
+  "cppackla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2798))]
+  "CGEN_ENABLE_INSN_P (144)"
+  "cppackla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2800))]
+  "CGEN_ENABLE_INSN_P (145)"
+  "cppackua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2800))]
+  "CGEN_ENABLE_INSN_P (146)"
+  "cppackua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2802))]
+  "CGEN_ENABLE_INSN_P (147)"
+  "cppackla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2802))]
+  "CGEN_ENABLE_INSN_P (148)"
+  "cppackla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2804))]
+  "CGEN_ENABLE_INSN_P (149)"
+  "cppackua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2804))]
+  "CGEN_ENABLE_INSN_P (150)"
+  "cppackua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2806))]
+  "CGEN_ENABLE_INSN_P (151)"
+  "cppacka1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1_b_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2806))]
+  "CGEN_ENABLE_INSN_P (152)"
+  "cppacka1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1u_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2808))]
+  "CGEN_ENABLE_INSN_P (153)"
+  "cppacka1u.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1u_b_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2808))]
+  "CGEN_ENABLE_INSN_P (154)"
+  "cppacka1u.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlla1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2810))]
+  "CGEN_ENABLE_INSN_P (155)"
+  "cpmovlla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlla1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2810))]
+  "CGEN_ENABLE_INSN_P (156)"
+  "cpmovlla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2812))]
+  "CGEN_ENABLE_INSN_P (157)"
+  "cpmovlua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2812))]
+  "CGEN_ENABLE_INSN_P (158)"
+  "cpmovlua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovula1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2814))]
+  "CGEN_ENABLE_INSN_P (159)"
+  "cpmovula1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovula1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2814))]
+  "CGEN_ENABLE_INSN_P (160)"
+  "cpmovula1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovuua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2816))]
+  "CGEN_ENABLE_INSN_P (161)"
+  "cpmovuua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovuua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2816))]
+  "CGEN_ENABLE_INSN_P (162)"
+  "cpmovuua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovla1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2818))]
+  "CGEN_ENABLE_INSN_P (163)"
+  "cpmovla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovla1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2818))]
+  "CGEN_ENABLE_INSN_P (164)"
+  "cpmovla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovua1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2820))]
+  "CGEN_ENABLE_INSN_P (165)"
+  "cpmovua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovua1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2820))]
+  "CGEN_ENABLE_INSN_P (166)"
+  "cpmovua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmova1_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2822))]
+  "CGEN_ENABLE_INSN_P (167)"
+  "cpmova1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmova1_b_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2822))]
+  "CGEN_ENABLE_INSN_P (168)"
+  "cpmova1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetla1_w_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2824))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2826))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2828))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2830))]
+  "CGEN_ENABLE_INSN_P (169)"
+  "cpsetla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetla1_w_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2824))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2826))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2828))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2830))]
+  "CGEN_ENABLE_INSN_P (170)"
+  "cpsetla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetua1_w_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2832))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2834))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2836))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2838))]
+  "CGEN_ENABLE_INSN_P (171)"
+  "cpsetua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetua1_w_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2832))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2834))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2836))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2838))]
+  "CGEN_ENABLE_INSN_P (172)"
+  "cpsetua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpseta1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2840))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2842))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2844))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2846))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2848))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2850))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2852))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2854))]
+  "CGEN_ENABLE_INSN_P (173)"
+  "cpseta1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpseta1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2840))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2842))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2844))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2846))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2848))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2850))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2852))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2854))]
+  "CGEN_ENABLE_INSN_P (174)"
+  "cpseta1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2856))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2858))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2860))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2862))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2864))]
+  "CGEN_ENABLE_INSN_P (175)"
+  "cpsadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2856))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2858))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2860))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2862))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2864))]
+  "CGEN_ENABLE_INSN_P (176)"
+  "cpsadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2866))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2868))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2870))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2872))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2874))]
+  "CGEN_ENABLE_INSN_P (177)"
+  "cpsadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2866))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2868))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2870))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2872))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2874))]
+  "CGEN_ENABLE_INSN_P (178)"
+  "cpsadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2876))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2878))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2880))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2882))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2884))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2886))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2888))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2890))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2892))]
+  "CGEN_ENABLE_INSN_P (179)"
+  "cpsada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2876))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2878))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2880))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2882))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2884))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2886))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2888))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2890))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2892))]
+  "CGEN_ENABLE_INSN_P (180)"
+  "cpsada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2894))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2896))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2898))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2900))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2902))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2904))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2906))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2908))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2910))]
+  "CGEN_ENABLE_INSN_P (181)"
+  "cpsada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2894))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2896))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2898))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2900))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2902))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2904))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2906))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2908))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2910))]
+  "CGEN_ENABLE_INSN_P (182)"
+  "cpsada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2912))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2914))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2916))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2918))]
+  "CGEN_ENABLE_INSN_P (183)"
+  "cpabsla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2912))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2914))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2916))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2918))]
+  "CGEN_ENABLE_INSN_P (184)"
+  "cpabsla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2920))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2922))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2924))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2926))]
+  "CGEN_ENABLE_INSN_P (185)"
+  "cpabsua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2920))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2922))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2924))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2926))]
+  "CGEN_ENABLE_INSN_P (186)"
+  "cpabsua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2928))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2930))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2932))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2934))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2936))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2938))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2940))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2942))]
+  "CGEN_ENABLE_INSN_P (187)"
+  "cpabsa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2928))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2930))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2932))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2934))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2936))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2938))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2940))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2942))]
+  "CGEN_ENABLE_INSN_P (188)"
+  "cpabsa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2944))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2946))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2948))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2950))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2952))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2954))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2956))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2958))]
+  "CGEN_ENABLE_INSN_P (189)"
+  "cpabsa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2944))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2946))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2948))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2950))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2952))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2954))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2956))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2958))]
+  "CGEN_ENABLE_INSN_P (190)"
+  "cpabsa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2960))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2962))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2964))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2966))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2968))]
+  "CGEN_ENABLE_INSN_P (191)"
+  "cpsubacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2960))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2962))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2964))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2966))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2968))]
+  "CGEN_ENABLE_INSN_P (192)"
+  "cpsubacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2970))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2972))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2974))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2976))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2978))]
+  "CGEN_ENABLE_INSN_P (193)"
+  "cpsubacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2970))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2972))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2974))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2976))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2978))]
+  "CGEN_ENABLE_INSN_P (194)"
+  "cpsubacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2980))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2982))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2984))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2986))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2988))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2990))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2992))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2994))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2996))]
+  "CGEN_ENABLE_INSN_P (195)"
+  "cpsubaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2980))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2982))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2984))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2986))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2988))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2990))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2992))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2994))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2996))]
+  "CGEN_ENABLE_INSN_P (196)"
+  "cpsubaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2998))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3000))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3002))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3004))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3006))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3008))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3010))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3012))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3014))]
+  "CGEN_ENABLE_INSN_P (197)"
+  "cpsubaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2998))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3000))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3002))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3004))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3006))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3008))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3010))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3012))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3014))]
+  "CGEN_ENABLE_INSN_P (198)"
+  "cpsubaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3016))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3018))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3020))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3022))]
+  "CGEN_ENABLE_INSN_P (199)"
+  "cpsubla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3016))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3018))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3020))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3022))]
+  "CGEN_ENABLE_INSN_P (200)"
+  "cpsubla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3024))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3026))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3028))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3030))]
+  "CGEN_ENABLE_INSN_P (201)"
+  "cpsubua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3024))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3026))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3028))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3030))]
+  "CGEN_ENABLE_INSN_P (202)"
+  "cpsubua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3032))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3034))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3036))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3038))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3040))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3042))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3044))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3046))]
+  "CGEN_ENABLE_INSN_P (203)"
+  "cpsuba1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3032))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3034))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3036))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3038))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3040))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3042))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3044))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3046))]
+  "CGEN_ENABLE_INSN_P (204)"
+  "cpsuba1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3048))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3050))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3052))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3054))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3056))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3058))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3060))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3062))]
+  "CGEN_ENABLE_INSN_P (205)"
+  "cpsuba1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3048))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3050))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3052))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3054))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3056))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3058))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3060))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3062))]
+  "CGEN_ENABLE_INSN_P (206)"
+  "cpsuba1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3064))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3066))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3068))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3070))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3072))]
+  "CGEN_ENABLE_INSN_P (207)"
+  "cpaddacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3064))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3066))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3068))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3070))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3072))]
+  "CGEN_ENABLE_INSN_P (208)"
+  "cpaddacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3074))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3076))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3078))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3080))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3082))]
+  "CGEN_ENABLE_INSN_P (209)"
+  "cpaddacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3074))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3076))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3078))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3080))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3082))]
+  "CGEN_ENABLE_INSN_P (210)"
+  "cpaddacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3084))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3086))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3088))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3090))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3092))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3094))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3096))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3098))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3100))]
+  "CGEN_ENABLE_INSN_P (211)"
+  "cpaddaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3084))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3086))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3088))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3090))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3092))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3094))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3096))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3098))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3100))]
+  "CGEN_ENABLE_INSN_P (212)"
+  "cpaddaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3102))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3104))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3106))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3108))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3110))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3112))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3114))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3116))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3118))]
+  "CGEN_ENABLE_INSN_P (213)"
+  "cpaddaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3102))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3104))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3106))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3108))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3110))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3112))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3114))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3116))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3118))]
+  "CGEN_ENABLE_INSN_P (214)"
+  "cpaddaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3120))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3122))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3124))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3126))]
+  "CGEN_ENABLE_INSN_P (215)"
+  "cpaddla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3120))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3122))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3124))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3126))]
+  "CGEN_ENABLE_INSN_P (216)"
+  "cpaddla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3128))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3130))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3132))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3134))]
+  "CGEN_ENABLE_INSN_P (217)"
+  "cpaddua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3128))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3130))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3132))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3134))]
+  "CGEN_ENABLE_INSN_P (218)"
+  "cpaddua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3136))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3138))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3140))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3142))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3144))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3146))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3148))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3150))]
+  "CGEN_ENABLE_INSN_P (219)"
+  "cpadda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3136))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3138))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3140))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3142))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3144))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3146))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3148))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3150))]
+  "CGEN_ENABLE_INSN_P (220)"
+  "cpadda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3152))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3154))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3156))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3158))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3160))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3162))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3164))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3166))]
+  "CGEN_ENABLE_INSN_P (221)"
+  "cpadda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3152))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3154))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3156))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3158))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3160))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3162))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3164))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3166))]
+  "CGEN_ENABLE_INSN_P (222)"
+  "cpadda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3180))]
+  "CGEN_ENABLE_INSN_P (223)"
+  "cpmovi.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3180))]
+  "CGEN_ENABLE_INSN_P (224)"
+  "cpmovi.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_c1nop_P1"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 1482)]
+  "CGEN_ENABLE_INSN_P (225)"
+  "c1nop"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmovi_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3168))]
+  "CGEN_ENABLE_INSN_P (226)"
+  "cdmovi\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmovi_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3168))]
+  "CGEN_ENABLE_INSN_P (227)"
+  "cdmovi\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmoviu_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_8a1_immediate" "")
+        ] 3170))]
+  "CGEN_ENABLE_INSN_P (228)"
+  "cdmoviu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmoviu_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3170))]
+  "CGEN_ENABLE_INSN_P (229)"
+  "cdmoviu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3172))]
+  "CGEN_ENABLE_INSN_P (230)"
+  "cpmovi.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3172))]
+  "CGEN_ENABLE_INSN_P (231)"
+  "cpmovi.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmoviu_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_8a1_immediate" "")
+        ] 3174))]
+  "CGEN_ENABLE_INSN_P (232)"
+  "cpmoviu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmoviu_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3174))]
+  "CGEN_ENABLE_INSN_P (233)"
+  "cpmoviu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3176))]
+  "CGEN_ENABLE_INSN_P (234)"
+  "cpmovi.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3176))]
+  "CGEN_ENABLE_INSN_P (235)"
+  "cpmovi.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipi3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3182))]
+  "CGEN_ENABLE_INSN_P (236)"
+  "cdclipi3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipi3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3182))]
+  "CGEN_ENABLE_INSN_P (237)"
+  "cdclipi3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipiu3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3184))]
+  "CGEN_ENABLE_INSN_P (238)"
+  "cdclipiu3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipiu3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3184))]
+  "CGEN_ENABLE_INSN_P (239)"
+  "cdclipiu3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipi3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3186))]
+  "CGEN_ENABLE_INSN_P (240)"
+  "cpclipi3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipi3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3186))]
+  "CGEN_ENABLE_INSN_P (241)"
+  "cpclipi3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipiu3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3188))]
+  "CGEN_ENABLE_INSN_P (242)"
+  "cpclipiu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipiu3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3188))]
+  "CGEN_ENABLE_INSN_P (243)"
+  "cpclipiu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3190))]
+  "CGEN_ENABLE_INSN_P (244)"
+  "cpslai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3190))]
+  "CGEN_ENABLE_INSN_P (245)"
+  "cpslai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3192))]
+  "CGEN_ENABLE_INSN_P (246)"
+  "cpslai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3192))]
+  "CGEN_ENABLE_INSN_P (247)"
+  "cpslai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdslli3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3194))]
+  "CGEN_ENABLE_INSN_P (248)"
+  "cdslli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdslli3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3194))]
+  "CGEN_ENABLE_INSN_P (249)"
+  "cdslli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3196))]
+  "CGEN_ENABLE_INSN_P (250)"
+  "cpslli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3196))]
+  "CGEN_ENABLE_INSN_P (251)"
+  "cpslli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3198))]
+  "CGEN_ENABLE_INSN_P (252)"
+  "cpslli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3198))]
+  "CGEN_ENABLE_INSN_P (253)"
+  "cpslli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3200))]
+  "CGEN_ENABLE_INSN_P (254)"
+  "cpslli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3200))]
+  "CGEN_ENABLE_INSN_P (255)"
+  "cpslli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrai3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3202))]
+  "CGEN_ENABLE_INSN_P (256)"
+  "cdsrai3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrai3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3202))]
+  "CGEN_ENABLE_INSN_P (257)"
+  "cdsrai3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3204))]
+  "CGEN_ENABLE_INSN_P (258)"
+  "cpsrai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3204))]
+  "CGEN_ENABLE_INSN_P (259)"
+  "cpsrai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3206))]
+  "CGEN_ENABLE_INSN_P (260)"
+  "cpsrai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3206))]
+  "CGEN_ENABLE_INSN_P (261)"
+  "cpsrai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3208))]
+  "CGEN_ENABLE_INSN_P (262)"
+  "cpsrai3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3208))]
+  "CGEN_ENABLE_INSN_P (263)"
+  "cpsrai3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrli3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3210))]
+  "CGEN_ENABLE_INSN_P (264)"
+  "cdsrli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrli3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3210))]
+  "CGEN_ENABLE_INSN_P (265)"
+  "cdsrli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3212))]
+  "CGEN_ENABLE_INSN_P (266)"
+  "cpsrli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3212))]
+  "CGEN_ENABLE_INSN_P (267)"
+  "cpsrli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3214))]
+  "CGEN_ENABLE_INSN_P (268)"
+  "cpsrli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3214))]
+  "CGEN_ENABLE_INSN_P (269)"
+  "cpsrli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3216))]
+  "CGEN_ENABLE_INSN_P (270)"
+  "cpsrli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3216))]
+  "CGEN_ENABLE_INSN_P (271)"
+  "cpsrli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3460))]
+  "CGEN_ENABLE_INSN_P (272)"
+  "cpsla3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3460))]
+  "CGEN_ENABLE_INSN_P (273)"
+  "cpsla3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3462))]
+  "CGEN_ENABLE_INSN_P (274)"
+  "cpsla3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3462))]
+  "CGEN_ENABLE_INSN_P (275)"
+  "cpsla3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsll3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3464))]
+  "CGEN_ENABLE_INSN_P (276)"
+  "cdsll3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsll3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3464))]
+  "CGEN_ENABLE_INSN_P (277)"
+  "cdsll3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3466))]
+  "CGEN_ENABLE_INSN_P (278)"
+  "cpssll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3466))]
+  "CGEN_ENABLE_INSN_P (279)"
+  "cpssll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3468))]
+  "CGEN_ENABLE_INSN_P (280)"
+  "cpsll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3468))]
+  "CGEN_ENABLE_INSN_P (281)"
+  "cpsll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3470))]
+  "CGEN_ENABLE_INSN_P (282)"
+  "cpssll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3470))]
+  "CGEN_ENABLE_INSN_P (283)"
+  "cpssll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3472))]
+  "CGEN_ENABLE_INSN_P (284)"
+  "cpsll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3472))]
+  "CGEN_ENABLE_INSN_P (285)"
+  "cpsll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3474))]
+  "CGEN_ENABLE_INSN_P (286)"
+  "cpssll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3474))]
+  "CGEN_ENABLE_INSN_P (287)"
+  "cpssll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3476))]
+  "CGEN_ENABLE_INSN_P (288)"
+  "cpsll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3476))]
+  "CGEN_ENABLE_INSN_P (289)"
+  "cpsll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsra3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3478))]
+  "CGEN_ENABLE_INSN_P (290)"
+  "cdsra3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsra3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3478))]
+  "CGEN_ENABLE_INSN_P (291)"
+  "cdsra3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3480))]
+  "CGEN_ENABLE_INSN_P (292)"
+  "cpssra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3480))]
+  "CGEN_ENABLE_INSN_P (293)"
+  "cpssra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3482))]
+  "CGEN_ENABLE_INSN_P (294)"
+  "cpsra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3482))]
+  "CGEN_ENABLE_INSN_P (295)"
+  "cpsra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3484))]
+  "CGEN_ENABLE_INSN_P (296)"
+  "cpssra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3484))]
+  "CGEN_ENABLE_INSN_P (297)"
+  "cpssra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3486))]
+  "CGEN_ENABLE_INSN_P (298)"
+  "cpsra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3486))]
+  "CGEN_ENABLE_INSN_P (299)"
+  "cpsra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3488))]
+  "CGEN_ENABLE_INSN_P (300)"
+  "cpssra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3488))]
+  "CGEN_ENABLE_INSN_P (301)"
+  "cpssra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3490))]
+  "CGEN_ENABLE_INSN_P (302)"
+  "cpsra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3490))]
+  "CGEN_ENABLE_INSN_P (303)"
+  "cpsra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrl3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3492))]
+  "CGEN_ENABLE_INSN_P (304)"
+  "cdsrl3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrl3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3492))]
+  "CGEN_ENABLE_INSN_P (305)"
+  "cdsrl3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3494))]
+  "CGEN_ENABLE_INSN_P (306)"
+  "cpssrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3494))]
+  "CGEN_ENABLE_INSN_P (307)"
+  "cpssrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3496))]
+  "CGEN_ENABLE_INSN_P (308)"
+  "cpsrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3496))]
+  "CGEN_ENABLE_INSN_P (309)"
+  "cpsrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3498))]
+  "CGEN_ENABLE_INSN_P (310)"
+  "cpssrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3498))]
+  "CGEN_ENABLE_INSN_P (311)"
+  "cpssrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3500))]
+  "CGEN_ENABLE_INSN_P (312)"
+  "cpsrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3500))]
+  "CGEN_ENABLE_INSN_P (313)"
+  "cpsrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3502))]
+  "CGEN_ENABLE_INSN_P (314)"
+  "cpssrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3502))]
+  "CGEN_ENABLE_INSN_P (315)"
+  "cpssrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3504))]
+  "CGEN_ENABLE_INSN_P (316)"
+  "cpsrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3504))]
+  "CGEN_ENABLE_INSN_P (317)"
+  "cpsrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3390))]
+  "CGEN_ENABLE_INSN_P (318)"
+  "cpmin3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3390))]
+  "CGEN_ENABLE_INSN_P (319)"
+  "cpmin3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3392))]
+  "CGEN_ENABLE_INSN_P (320)"
+  "cpminu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3392))]
+  "CGEN_ENABLE_INSN_P (321)"
+  "cpminu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3394))]
+  "CGEN_ENABLE_INSN_P (322)"
+  "cpmin3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3394))]
+  "CGEN_ENABLE_INSN_P (323)"
+  "cpmin3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3396))]
+  "CGEN_ENABLE_INSN_P (324)"
+  "cpmin3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3396))]
+  "CGEN_ENABLE_INSN_P (325)"
+  "cpmin3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3398))]
+  "CGEN_ENABLE_INSN_P (326)"
+  "cpminu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3398))]
+  "CGEN_ENABLE_INSN_P (327)"
+  "cpminu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3400))]
+  "CGEN_ENABLE_INSN_P (328)"
+  "cpmax3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3400))]
+  "CGEN_ENABLE_INSN_P (329)"
+  "cpmax3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3402))]
+  "CGEN_ENABLE_INSN_P (330)"
+  "cpmaxu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3402))]
+  "CGEN_ENABLE_INSN_P (331)"
+  "cpmaxu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3404))]
+  "CGEN_ENABLE_INSN_P (332)"
+  "cpmax3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3404))]
+  "CGEN_ENABLE_INSN_P (333)"
+  "cpmax3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3406))]
+  "CGEN_ENABLE_INSN_P (334)"
+  "cpmax3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3406))]
+  "CGEN_ENABLE_INSN_P (335)"
+  "cpmax3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3408))]
+  "CGEN_ENABLE_INSN_P (336)"
+  "cpmaxu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3408))]
+  "CGEN_ENABLE_INSN_P (337)"
+  "cpmaxu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3506))]
+  "CGEN_ENABLE_INSN_P (338)"
+  "cppack.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3506))]
+  "CGEN_ENABLE_INSN_P (339)"
+  "cppack.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3508))]
+  "CGEN_ENABLE_INSN_P (340)"
+  "cppack.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3508))]
+  "CGEN_ENABLE_INSN_P (341)"
+  "cppack.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacku_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3510))]
+  "CGEN_ENABLE_INSN_P (342)"
+  "cppacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacku_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3510))]
+  "CGEN_ENABLE_INSN_P (343)"
+  "cppacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpxor3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3532))]
+  "CGEN_ENABLE_INSN_P (344)"
+  "cpxor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpxor3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3532))]
+  "CGEN_ENABLE_INSN_P (345)"
+  "cpxor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnor3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3534))]
+  "CGEN_ENABLE_INSN_P (346)"
+  "cpnor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnor3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3534))]
+  "CGEN_ENABLE_INSN_P (347)"
+  "cpnor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpor3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3536))]
+  "CGEN_ENABLE_INSN_P (348)"
+  "cpor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpor3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3536))]
+  "CGEN_ENABLE_INSN_P (349)"
+  "cpor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpand3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3538))]
+  "CGEN_ENABLE_INSN_P (350)"
+  "cpand3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpand3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3538))]
+  "CGEN_ENABLE_INSN_P (351)"
+  "cpand3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3410))]
+  "CGEN_ENABLE_INSN_P (352)"
+  "cpabs3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3410))]
+  "CGEN_ENABLE_INSN_P (353)"
+  "cpabs3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3412))]
+  "CGEN_ENABLE_INSN_P (354)"
+  "cpabs3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3412))]
+  "CGEN_ENABLE_INSN_P (355)"
+  "cpabs3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3414))]
+  "CGEN_ENABLE_INSN_P (356)"
+  "cpabsu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3414))]
+  "CGEN_ENABLE_INSN_P (357)"
+  "cpabsu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3416))]
+  "CGEN_ENABLE_INSN_P (358)"
+  "cpaddsr3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3416))]
+  "CGEN_ENABLE_INSN_P (359)"
+  "cpaddsr3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3418))]
+  "CGEN_ENABLE_INSN_P (360)"
+  "cpaddsr3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3418))]
+  "CGEN_ENABLE_INSN_P (361)"
+  "cpaddsr3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3420))]
+  "CGEN_ENABLE_INSN_P (362)"
+  "cpaddsr3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3420))]
+  "CGEN_ENABLE_INSN_P (363)"
+  "cpaddsr3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsru3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3422))]
+  "CGEN_ENABLE_INSN_P (364)"
+  "cpaddsru3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsru3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3422))]
+  "CGEN_ENABLE_INSN_P (365)"
+  "cpaddsru3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3424))]
+  "CGEN_ENABLE_INSN_P (366)"
+  "cpave3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3424))]
+  "CGEN_ENABLE_INSN_P (367)"
+  "cpave3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3426))]
+  "CGEN_ENABLE_INSN_P (368)"
+  "cpave3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3426))]
+  "CGEN_ENABLE_INSN_P (369)"
+  "cpave3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3428))]
+  "CGEN_ENABLE_INSN_P (370)"
+  "cpave3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3428))]
+  "CGEN_ENABLE_INSN_P (371)"
+  "cpave3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaveu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3430))]
+  "CGEN_ENABLE_INSN_P (372)"
+  "cpaveu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaveu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3430))]
+  "CGEN_ENABLE_INSN_P (373)"
+  "cpaveu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsub3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3432))]
+  "CGEN_ENABLE_INSN_P (374)"
+  "cpextlsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsub3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3432))]
+  "CGEN_ENABLE_INSN_P (375)"
+  "cpextlsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsubu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3434))]
+  "CGEN_ENABLE_INSN_P (376)"
+  "cpextlsubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsubu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3434))]
+  "CGEN_ENABLE_INSN_P (377)"
+  "cpextlsubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusub3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3436))]
+  "CGEN_ENABLE_INSN_P (378)"
+  "cpextusub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusub3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3436))]
+  "CGEN_ENABLE_INSN_P (379)"
+  "cpextusub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusubu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3438))]
+  "CGEN_ENABLE_INSN_P (380)"
+  "cpextusubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusubu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3438))]
+  "CGEN_ENABLE_INSN_P (381)"
+  "cpextusubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladd3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3440))]
+  "CGEN_ENABLE_INSN_P (382)"
+  "cpextladd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladd3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3440))]
+  "CGEN_ENABLE_INSN_P (383)"
+  "cpextladd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladdu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3442))]
+  "CGEN_ENABLE_INSN_P (384)"
+  "cpextladdu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladdu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3442))]
+  "CGEN_ENABLE_INSN_P (385)"
+  "cpextladdu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuadd3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3444))]
+  "CGEN_ENABLE_INSN_P (386)"
+  "cpextuadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuadd3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3444))]
+  "CGEN_ENABLE_INSN_P (387)"
+  "cpextuadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuaddu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3446))]
+  "CGEN_ENABLE_INSN_P (388)"
+  "cpextuaddu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuaddu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3446))]
+  "CGEN_ENABLE_INSN_P (389)"
+  "cpextuaddu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3448))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3450))]
+  "CGEN_ENABLE_INSN_P (390)"
+  "cpssub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3448))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3450))]
+  "CGEN_ENABLE_INSN_P (391)"
+  "cpssub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3452))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3454))]
+  "CGEN_ENABLE_INSN_P (392)"
+  "cpssub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3452))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3454))]
+  "CGEN_ENABLE_INSN_P (393)"
+  "cpssub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3456))]
+  "CGEN_ENABLE_INSN_P (394)"
+  "cpsadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3456))]
+  "CGEN_ENABLE_INSN_P (395)"
+  "cpsadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3458))]
+  "CGEN_ENABLE_INSN_P (396)"
+  "cpsadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3458))]
+  "CGEN_ENABLE_INSN_P (397)"
+  "cpsadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsub3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3540))]
+  "CGEN_ENABLE_INSN_P (398)"
+  "cdsub3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsub3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3540))]
+  "CGEN_ENABLE_INSN_P (399)"
+  "cdsub3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3542))]
+  "CGEN_ENABLE_INSN_P (400)"
+  "cpsub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3542))]
+  "CGEN_ENABLE_INSN_P (401)"
+  "cpsub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3544))]
+  "CGEN_ENABLE_INSN_P (402)"
+  "cpsub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3544))]
+  "CGEN_ENABLE_INSN_P (403)"
+  "cpsub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3546))]
+  "CGEN_ENABLE_INSN_P (404)"
+  "cpsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3546))]
+  "CGEN_ENABLE_INSN_P (405)"
+  "cpsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdadd3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3548))]
+  "CGEN_ENABLE_INSN_P (406)"
+  "cdadd3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdadd3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3548))]
+  "CGEN_ENABLE_INSN_P (407)"
+  "cdadd3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3218))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3219))]
+  "CGEN_ENABLE_INSN_P (408)"
+  "cpocmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3218)]
+  "CGEN_ENABLE_INSN_P (409)"
+  "cpocmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3220))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3221))]
+  "CGEN_ENABLE_INSN_P (410)"
+  "cpocmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3220)]
+  "CGEN_ENABLE_INSN_P (411)"
+  "cpocmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3222))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3223))]
+  "CGEN_ENABLE_INSN_P (412)"
+  "cpocmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3222)]
+  "CGEN_ENABLE_INSN_P (413)"
+  "cpocmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3224))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3225))]
+  "CGEN_ENABLE_INSN_P (414)"
+  "cpocmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3224)]
+  "CGEN_ENABLE_INSN_P (415)"
+  "cpocmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3226))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3227))]
+  "CGEN_ENABLE_INSN_P (416)"
+  "cpocmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3226)]
+  "CGEN_ENABLE_INSN_P (417)"
+  "cpocmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3228))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3229))]
+  "CGEN_ENABLE_INSN_P (418)"
+  "cpocmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3228)]
+  "CGEN_ENABLE_INSN_P (419)"
+  "cpocmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3230))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3231))]
+  "CGEN_ENABLE_INSN_P (420)"
+  "cpocmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3230)]
+  "CGEN_ENABLE_INSN_P (421)"
+  "cpocmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3232))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3233))]
+  "CGEN_ENABLE_INSN_P (422)"
+  "cpocmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3232)]
+  "CGEN_ENABLE_INSN_P (423)"
+  "cpocmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3234))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3235))]
+  "CGEN_ENABLE_INSN_P (424)"
+  "cpocmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3234)]
+  "CGEN_ENABLE_INSN_P (425)"
+  "cpocmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3236))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3237))]
+  "CGEN_ENABLE_INSN_P (426)"
+  "cpocmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3236)]
+  "CGEN_ENABLE_INSN_P (427)"
+  "cpocmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3238))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3239))]
+  "CGEN_ENABLE_INSN_P (428)"
+  "cpocmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3238)]
+  "CGEN_ENABLE_INSN_P (429)"
+  "cpocmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3240))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3241))]
+  "CGEN_ENABLE_INSN_P (430)"
+  "cpocmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3240)]
+  "CGEN_ENABLE_INSN_P (431)"
+  "cpocmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3242))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3243))]
+  "CGEN_ENABLE_INSN_P (432)"
+  "cpocmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3242)]
+  "CGEN_ENABLE_INSN_P (433)"
+  "cpocmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3244))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3245))]
+  "CGEN_ENABLE_INSN_P (434)"
+  "cpocmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3244)]
+  "CGEN_ENABLE_INSN_P (435)"
+  "cpocmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3246))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3247))]
+  "CGEN_ENABLE_INSN_P (436)"
+  "cpocmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3246)]
+  "CGEN_ENABLE_INSN_P (437)"
+  "cpocmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3248))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3249))]
+  "CGEN_ENABLE_INSN_P (438)"
+  "cpocmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3248)]
+  "CGEN_ENABLE_INSN_P (439)"
+  "cpocmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3250))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3251))]
+  "CGEN_ENABLE_INSN_P (440)"
+  "cpacmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3250)]
+  "CGEN_ENABLE_INSN_P (441)"
+  "cpacmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3252))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3253))]
+  "CGEN_ENABLE_INSN_P (442)"
+  "cpacmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3252)]
+  "CGEN_ENABLE_INSN_P (443)"
+  "cpacmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3254))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3255))]
+  "CGEN_ENABLE_INSN_P (444)"
+  "cpacmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3254)]
+  "CGEN_ENABLE_INSN_P (445)"
+  "cpacmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3256))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3257))]
+  "CGEN_ENABLE_INSN_P (446)"
+  "cpacmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3256)]
+  "CGEN_ENABLE_INSN_P (447)"
+  "cpacmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3258))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3259))]
+  "CGEN_ENABLE_INSN_P (448)"
+  "cpacmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3258)]
+  "CGEN_ENABLE_INSN_P (449)"
+  "cpacmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3260))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3261))]
+  "CGEN_ENABLE_INSN_P (450)"
+  "cpacmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3260)]
+  "CGEN_ENABLE_INSN_P (451)"
+  "cpacmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3262))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3263))]
+  "CGEN_ENABLE_INSN_P (452)"
+  "cpacmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3262)]
+  "CGEN_ENABLE_INSN_P (453)"
+  "cpacmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3264))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3265))]
+  "CGEN_ENABLE_INSN_P (454)"
+  "cpacmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3264)]
+  "CGEN_ENABLE_INSN_P (455)"
+  "cpacmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3266))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3267))]
+  "CGEN_ENABLE_INSN_P (456)"
+  "cpacmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3266)]
+  "CGEN_ENABLE_INSN_P (457)"
+  "cpacmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3268))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3269))]
+  "CGEN_ENABLE_INSN_P (458)"
+  "cpacmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3268)]
+  "CGEN_ENABLE_INSN_P (459)"
+  "cpacmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3270))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3271))]
+  "CGEN_ENABLE_INSN_P (460)"
+  "cpacmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3270)]
+  "CGEN_ENABLE_INSN_P (461)"
+  "cpacmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3272))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3273))]
+  "CGEN_ENABLE_INSN_P (462)"
+  "cpacmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3272)]
+  "CGEN_ENABLE_INSN_P (463)"
+  "cpacmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3274))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3275))]
+  "CGEN_ENABLE_INSN_P (464)"
+  "cpacmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3274)]
+  "CGEN_ENABLE_INSN_P (465)"
+  "cpacmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3276))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3277))]
+  "CGEN_ENABLE_INSN_P (466)"
+  "cpacmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3276)]
+  "CGEN_ENABLE_INSN_P (467)"
+  "cpacmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3278))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3279))]
+  "CGEN_ENABLE_INSN_P (468)"
+  "cpacmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3278)]
+  "CGEN_ENABLE_INSN_P (469)"
+  "cpacmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3280))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3281))]
+  "CGEN_ENABLE_INSN_P (470)"
+  "cpacmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3280)]
+  "CGEN_ENABLE_INSN_P (471)"
+  "cpacmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbi_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+          (match_operand:DI 3 "cgen_h_uint_3a1_immediate" "")
+        ] 3528))]
+  "CGEN_ENABLE_INSN_P (472)"
+  "cpfsftbi\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbi_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+          (match_operand:DI 3 "cgen_h_uint_3a1_immediate" "")
+        ] 3528))]
+  "CGEN_ENABLE_INSN_P (473)"
+  "cpfsftbi\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacla0s1_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1484))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1486))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1488))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1490))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1492))]
+  "CGEN_ENABLE_INSN_P (474)"
+  "cpfacla0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacua0s1_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1494))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1496))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1498))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1500))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1502))]
+  "CGEN_ENABLE_INSN_P (475)"
+  "cpfacua0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s1_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1504))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1506))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1508))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1510))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1512))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1514))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1516))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1518))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1520))]
+  "CGEN_ENABLE_INSN_P (476)"
+  "cpfaca0s1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s1u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1522))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1524))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1526))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1528))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1530))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1532))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1534))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1536))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1538))]
+  "CGEN_ENABLE_INSN_P (477)"
+  "cpfaca0s1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbla0s1_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1540))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1542))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1544))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1546))]
+  "CGEN_ENABLE_INSN_P (478)"
+  "cpfsftbla0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbua0s1_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1548))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1550))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1552))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1554))]
+  "CGEN_ENABLE_INSN_P (479)"
+  "cpfsftbua0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s1_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1556))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1558))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1560))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1562))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1564))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1566))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1568))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1570))]
+  "CGEN_ENABLE_INSN_P (480)"
+  "cpfsftba0s1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s1u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1572))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1574))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1576))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1578))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1580))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1582))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1584))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1586))]
+  "CGEN_ENABLE_INSN_P (481)"
+  "cpfsftba0s1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacla0s0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1588))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1590))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1592))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1594))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1596))]
+  "CGEN_ENABLE_INSN_P (482)"
+  "cpfacla0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacua0s0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1598))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1600))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1602))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1604))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1606))]
+  "CGEN_ENABLE_INSN_P (483)"
+  "cpfacua0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1608))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1610))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1612))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1614))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1616))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1618))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1620))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1622))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1624))]
+  "CGEN_ENABLE_INSN_P (484)"
+  "cpfaca0s0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1626))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1628))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1630))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1632))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1634))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1636))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1638))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1640))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1642))]
+  "CGEN_ENABLE_INSN_P (485)"
+  "cpfaca0s0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbla0s0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1644))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1646))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1648))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1650))]
+  "CGEN_ENABLE_INSN_P (486)"
+  "cpfsftbla0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbua0s0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1652))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1654))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1656))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1658))]
+  "CGEN_ENABLE_INSN_P (487)"
+  "cpfsftbua0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1660))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1662))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1664))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1666))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1668))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1670))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1672))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1674))]
+  "CGEN_ENABLE_INSN_P (488)"
+  "cpfsftba0s0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1676))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1678))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1680))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1682))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1684))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1686))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1688))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1690))]
+  "CGEN_ENABLE_INSN_P (489)"
+  "cpfsftba0s0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsllia0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 1692))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1694))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1696))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1698))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1700))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1702))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1704))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1706))]
+  "CGEN_ENABLE_INSN_P (490)"
+  "cpsllia0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraia0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 1708))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1710))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1712))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1714))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1716))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1718))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1720))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1722))]
+  "CGEN_ENABLE_INSN_P (491)"
+  "cpsraia0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrlia0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 1724))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1726))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1728))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1730))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1732))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1734))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1736))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1738))]
+  "CGEN_ENABLE_INSN_P (492)"
+  "cpsrlia0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslla0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 1740))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1742))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1744))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1746))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1748))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1750))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1752))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1754))]
+  "CGEN_ENABLE_INSN_P (493)"
+  "cpslla0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraa0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 1756))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1758))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1760))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1762))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1764))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1766))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1768))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1770))]
+  "CGEN_ENABLE_INSN_P (494)"
+  "cpsraa0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrla0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 1772))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1774))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1776))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1778))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1780))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1782))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1784))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1786))]
+  "CGEN_ENABLE_INSN_P (495)"
+  "cpsrla0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaccpa0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1788))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1790))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1792))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1794))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1796))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1798))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1800))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1802))]
+  "CGEN_ENABLE_INSN_P (496)"
+  "cpaccpa0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacsuma0_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1804))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1806))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1808))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1810))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1812))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1814))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1816))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1818))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1820))]
+  "CGEN_ENABLE_INSN_P (497)"
+  "cpacsuma0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhla0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1822))]
+  "CGEN_ENABLE_INSN_P (498)"
+  "cpmovhla0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1824))]
+  "CGEN_ENABLE_INSN_P (499)"
+  "cpmovhua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1826))]
+  "CGEN_ENABLE_INSN_P (500)"
+  "cppackla0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1828))]
+  "CGEN_ENABLE_INSN_P (501)"
+  "cppackua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1830))]
+  "CGEN_ENABLE_INSN_P (502)"
+  "cppackla0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1832))]
+  "CGEN_ENABLE_INSN_P (503)"
+  "cppackua0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka0_b_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1834))]
+  "CGEN_ENABLE_INSN_P (504)"
+  "cppacka0.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka0u_b_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1836))]
+  "CGEN_ENABLE_INSN_P (505)"
+  "cppacka0u.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlla0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1838))]
+  "CGEN_ENABLE_INSN_P (506)"
+  "cpmovlla0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1840))]
+  "CGEN_ENABLE_INSN_P (507)"
+  "cpmovlua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovula0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1842))]
+  "CGEN_ENABLE_INSN_P (508)"
+  "cpmovula0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovuua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1844))]
+  "CGEN_ENABLE_INSN_P (509)"
+  "cpmovuua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovla0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1846))]
+  "CGEN_ENABLE_INSN_P (510)"
+  "cpmovla0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovua0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1848))]
+  "CGEN_ENABLE_INSN_P (511)"
+  "cpmovua0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmova0_b_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1850))]
+  "CGEN_ENABLE_INSN_P (512)"
+  "cpmova0.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetla0_w_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1852))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1854))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1856))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1858))]
+  "CGEN_ENABLE_INSN_P (513)"
+  "cpsetla0.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetua0_w_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1860))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1862))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1864))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1866))]
+  "CGEN_ENABLE_INSN_P (514)"
+  "cpsetua0.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpseta0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1868))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1870))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1872))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1874))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1876))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1878))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1880))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1882))]
+  "CGEN_ENABLE_INSN_P (515)"
+  "cpseta0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadla0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1884))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1886))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1888))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1890))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1892))]
+  "CGEN_ENABLE_INSN_P (516)"
+  "cpsadla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadua0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1894))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1896))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1898))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1900))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1902))]
+  "CGEN_ENABLE_INSN_P (517)"
+  "cpsadua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1904))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1906))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1908))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1910))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1912))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1914))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1916))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1918))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1920))]
+  "CGEN_ENABLE_INSN_P (518)"
+  "cpsada0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1922))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1924))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1926))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1928))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1930))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1932))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1934))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1936))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1938))]
+  "CGEN_ENABLE_INSN_P (519)"
+  "cpsada0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsla0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1940))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1942))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1944))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1946))]
+  "CGEN_ENABLE_INSN_P (520)"
+  "cpabsla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsua0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1948))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1950))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1952))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1954))]
+  "CGEN_ENABLE_INSN_P (521)"
+  "cpabsua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1956))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1958))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1960))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1962))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1964))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1966))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1968))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1970))]
+  "CGEN_ENABLE_INSN_P (522)"
+  "cpabsa0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1972))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1974))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1976))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1978))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1980))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1982))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1984))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1986))]
+  "CGEN_ENABLE_INSN_P (523)"
+  "cpabsa0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacla0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1988))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1990))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1992))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1994))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1996))]
+  "CGEN_ENABLE_INSN_P (524)"
+  "cpsubacla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacua0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1998))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2000))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2002))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2004))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2006))]
+  "CGEN_ENABLE_INSN_P (525)"
+  "cpsubacua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2008))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2010))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2012))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2014))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2016))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2018))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2020))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2022))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2024))]
+  "CGEN_ENABLE_INSN_P (526)"
+  "cpsubaca0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2026))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2028))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2030))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2032))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2034))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2036))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2038))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2040))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2042))]
+  "CGEN_ENABLE_INSN_P (527)"
+  "cpsubaca0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubla0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2044))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2046))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2048))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2050))]
+  "CGEN_ENABLE_INSN_P (528)"
+  "cpsubla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubua0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2052))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2054))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2056))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2058))]
+  "CGEN_ENABLE_INSN_P (529)"
+  "cpsubua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2060))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2062))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2064))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2066))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2068))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2070))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2072))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2074))]
+  "CGEN_ENABLE_INSN_P (530)"
+  "cpsuba0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2076))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2078))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2080))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2082))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2084))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2086))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2088))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2090))]
+  "CGEN_ENABLE_INSN_P (531)"
+  "cpsuba0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacla0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2092))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2094))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2096))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2098))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2100))]
+  "CGEN_ENABLE_INSN_P (532)"
+  "cpaddacla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacua0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2102))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2104))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2106))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2108))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2110))]
+  "CGEN_ENABLE_INSN_P (533)"
+  "cpaddacua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2112))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2114))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2116))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2118))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2120))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2122))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2124))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2126))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2128))]
+  "CGEN_ENABLE_INSN_P (534)"
+  "cpaddaca0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2130))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2132))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2134))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2136))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2138))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2140))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2142))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2144))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2146))]
+  "CGEN_ENABLE_INSN_P (535)"
+  "cpaddaca0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddla0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2148))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2150))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2152))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2154))]
+  "CGEN_ENABLE_INSN_P (536)"
+  "cpaddla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddua0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2156))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2158))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2160))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2162))]
+  "CGEN_ENABLE_INSN_P (537)"
+  "cpaddua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2164))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2166))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2168))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2170))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2172))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2174))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2176))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2178))]
+  "CGEN_ENABLE_INSN_P (538)"
+  "cpadda0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2180))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2182))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2184))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2186))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2188))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2190))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2192))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2194))]
+  "CGEN_ENABLE_INSN_P (539)"
+  "cpadda0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3282))]
+  "CGEN_ENABLE_INSN_P (540)"
+  "cpcmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3282))]
+  "CGEN_ENABLE_INSN_P (541)"
+  "cpcmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3284))]
+  "CGEN_ENABLE_INSN_P (542)"
+  "cpcmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3284))]
+  "CGEN_ENABLE_INSN_P (543)"
+  "cpcmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3286))]
+  "CGEN_ENABLE_INSN_P (544)"
+  "cpcmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3286))]
+  "CGEN_ENABLE_INSN_P (545)"
+  "cpcmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3288))]
+  "CGEN_ENABLE_INSN_P (546)"
+  "cpcmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3288))]
+  "CGEN_ENABLE_INSN_P (547)"
+  "cpcmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3290))]
+  "CGEN_ENABLE_INSN_P (548)"
+  "cpcmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3290))]
+  "CGEN_ENABLE_INSN_P (549)"
+  "cpcmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3292))]
+  "CGEN_ENABLE_INSN_P (550)"
+  "cpcmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3292))]
+  "CGEN_ENABLE_INSN_P (551)"
+  "cpcmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3294))]
+  "CGEN_ENABLE_INSN_P (552)"
+  "cpcmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3294))]
+  "CGEN_ENABLE_INSN_P (553)"
+  "cpcmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3296))]
+  "CGEN_ENABLE_INSN_P (554)"
+  "cpcmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3296))]
+  "CGEN_ENABLE_INSN_P (555)"
+  "cpcmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3298))]
+  "CGEN_ENABLE_INSN_P (556)"
+  "cpcmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3298))]
+  "CGEN_ENABLE_INSN_P (557)"
+  "cpcmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3300))]
+  "CGEN_ENABLE_INSN_P (558)"
+  "cpcmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3300))]
+  "CGEN_ENABLE_INSN_P (559)"
+  "cpcmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3302))]
+  "CGEN_ENABLE_INSN_P (560)"
+  "cpcmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3302))]
+  "CGEN_ENABLE_INSN_P (561)"
+  "cpcmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3304))]
+  "CGEN_ENABLE_INSN_P (562)"
+  "cpcmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3304))]
+  "CGEN_ENABLE_INSN_P (563)"
+  "cpcmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3306))]
+  "CGEN_ENABLE_INSN_P (564)"
+  "cpcmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3306))]
+  "CGEN_ENABLE_INSN_P (565)"
+  "cpcmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3308))]
+  "CGEN_ENABLE_INSN_P (566)"
+  "cpcmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3308))]
+  "CGEN_ENABLE_INSN_P (567)"
+  "cpcmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3310))]
+  "CGEN_ENABLE_INSN_P (568)"
+  "cpcmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3310))]
+  "CGEN_ENABLE_INSN_P (569)"
+  "cpcmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3312))]
+  "CGEN_ENABLE_INSN_P (570)"
+  "cpcmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3312))]
+  "CGEN_ENABLE_INSN_P (571)"
+  "cpcmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeqz_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3314))]
+  "CGEN_ENABLE_INSN_P (572)"
+  "cpcmpeqz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeqz_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3314))]
+  "CGEN_ENABLE_INSN_P (573)"
+  "cpcmpeqz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocc_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3378))]
+  "CGEN_ENABLE_INSN_P (574)"
+  "cpmovtocc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocc_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3378))]
+  "CGEN_ENABLE_INSN_P (575)"
+  "cpmovtocc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar1_C3"
+  [(set (reg:SI 95)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3380))]
+  "CGEN_ENABLE_INSN_P (576)"
+  "cpmovtocsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar1_P0S_P1"
+  [(set (reg:SI 95)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3380))]
+  "CGEN_ENABLE_INSN_P (577)"
+  "cpmovtocsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar0_C3"
+  [(set (reg:SI 80)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3382))]
+  "CGEN_ENABLE_INSN_P (578)"
+  "cpmovtocsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar0_P0S_P1"
+  [(set (reg:SI 80)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3382))]
+  "CGEN_ENABLE_INSN_P (579)"
+  "cpmovtocsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcc_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3384))]
+  "CGEN_ENABLE_INSN_P (580)"
+  "cpmovfrcc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcc_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3384))]
+  "CGEN_ENABLE_INSN_P (581)"
+  "cpmovfrcc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar1_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3386))]
+  "CGEN_ENABLE_INSN_P (582)"
+  "cpmovfrcsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar1_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3386))]
+  "CGEN_ENABLE_INSN_P (583)"
+  "cpmovfrcsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar0_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3388))]
+  "CGEN_ENABLE_INSN_P (584)"
+  "cpmovfrcsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar0_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3388))]
+  "CGEN_ENABLE_INSN_P (585)"
+  "cpmovfrcsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastw_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3316))]
+  "CGEN_ENABLE_INSN_P (586)"
+  "cdcastw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastw_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3316))]
+  "CGEN_ENABLE_INSN_P (587)"
+  "cdcastw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastuw_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3318))]
+  "CGEN_ENABLE_INSN_P (588)"
+  "cdcastuw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastuw_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3318))]
+  "CGEN_ENABLE_INSN_P (589)"
+  "cdcastuw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcasth_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3320))]
+  "CGEN_ENABLE_INSN_P (590)"
+  "cpcasth.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcasth_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3320))]
+  "CGEN_ENABLE_INSN_P (591)"
+  "cpcasth.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastuh_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3322))]
+  "CGEN_ENABLE_INSN_P (592)"
+  "cpcastuh.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastuh_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3322))]
+  "CGEN_ENABLE_INSN_P (593)"
+  "cpcastuh.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3324))]
+  "CGEN_ENABLE_INSN_P (594)"
+  "cpcastb.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3324))]
+  "CGEN_ENABLE_INSN_P (595)"
+  "cpcastb.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3326))]
+  "CGEN_ENABLE_INSN_P (596)"
+  "cpcastub.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3326))]
+  "CGEN_ENABLE_INSN_P (597)"
+  "cpcastub.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3328))]
+  "CGEN_ENABLE_INSN_P (598)"
+  "cpcastb.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3328))]
+  "CGEN_ENABLE_INSN_P (599)"
+  "cpcastb.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3330))]
+  "CGEN_ENABLE_INSN_P (600)"
+  "cpcastub.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3330))]
+  "CGEN_ENABLE_INSN_P (601)"
+  "cpcastub.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3332))]
+  "CGEN_ENABLE_INSN_P (602)"
+  "cpextl.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3332))]
+  "CGEN_ENABLE_INSN_P (603)"
+  "cpextl.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3334))]
+  "CGEN_ENABLE_INSN_P (604)"
+  "cpextlu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3334))]
+  "CGEN_ENABLE_INSN_P (605)"
+  "cpextlu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3336))]
+  "CGEN_ENABLE_INSN_P (606)"
+  "cpextl.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3336))]
+  "CGEN_ENABLE_INSN_P (607)"
+  "cpextl.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3338))]
+  "CGEN_ENABLE_INSN_P (608)"
+  "cpextlu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3338))]
+  "CGEN_ENABLE_INSN_P (609)"
+  "cpextlu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3340))]
+  "CGEN_ENABLE_INSN_P (610)"
+  "cpextu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3340))]
+  "CGEN_ENABLE_INSN_P (611)"
+  "cpextu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3342))]
+  "CGEN_ENABLE_INSN_P (612)"
+  "cpextuu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3342))]
+  "CGEN_ENABLE_INSN_P (613)"
+  "cpextuu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3344))]
+  "CGEN_ENABLE_INSN_P (614)"
+  "cpextu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3344))]
+  "CGEN_ENABLE_INSN_P (615)"
+  "cpextu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3346))]
+  "CGEN_ENABLE_INSN_P (616)"
+  "cpextuu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3346))]
+  "CGEN_ENABLE_INSN_P (617)"
+  "cpextuu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3348))]
+  "CGEN_ENABLE_INSN_P (618)"
+  "cpbcast.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3348))]
+  "CGEN_ENABLE_INSN_P (619)"
+  "cpbcast.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3350))]
+  "CGEN_ENABLE_INSN_P (620)"
+  "cpbcast.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3350))]
+  "CGEN_ENABLE_INSN_P (621)"
+  "cpbcast.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3352))]
+  "CGEN_ENABLE_INSN_P (622)"
+  "cpbcast.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3352))]
+  "CGEN_ENABLE_INSN_P (623)"
+  "cpbcast.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpccadd_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "0")
+        ] 3354))]
+  "CGEN_ENABLE_INSN_P (624)"
+  "cpccadd.b\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpccadd_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "0")
+        ] 3354))]
+  "CGEN_ENABLE_INSN_P (625)"
+  "cpccadd.b\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3356))]
+  "CGEN_ENABLE_INSN_P (626)"
+  "cphadd.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3356))]
+  "CGEN_ENABLE_INSN_P (627)"
+  "cphadd.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3358))]
+  "CGEN_ENABLE_INSN_P (628)"
+  "cphadd.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3358))]
+  "CGEN_ENABLE_INSN_P (629)"
+  "cphadd.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3360))]
+  "CGEN_ENABLE_INSN_P (630)"
+  "cphadd.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3360))]
+  "CGEN_ENABLE_INSN_P (631)"
+  "cphadd.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphaddu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3362))]
+  "CGEN_ENABLE_INSN_P (632)"
+  "cphaddu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphaddu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3362))]
+  "CGEN_ENABLE_INSN_P (633)"
+  "cphaddu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3364))]
+  "CGEN_ENABLE_INSN_P (634)"
+  "cpnorm.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3364))]
+  "CGEN_ENABLE_INSN_P (635)"
+  "cpnorm.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3366))]
+  "CGEN_ENABLE_INSN_P (636)"
+  "cpnorm.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3366))]
+  "CGEN_ENABLE_INSN_P (637)"
+  "cpnorm.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3368))]
+  "CGEN_ENABLE_INSN_P (638)"
+  "cpldz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3368))]
+  "CGEN_ENABLE_INSN_P (639)"
+  "cpldz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3370))]
+  "CGEN_ENABLE_INSN_P (640)"
+  "cpldz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3370))]
+  "CGEN_ENABLE_INSN_P (641)"
+  "cpldz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3372))]
+  "CGEN_ENABLE_INSN_P (642)"
+  "cpabsz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3372))]
+  "CGEN_ENABLE_INSN_P (643)"
+  "cpabsz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3374))]
+  "CGEN_ENABLE_INSN_P (644)"
+  "cpabsz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3374))]
+  "CGEN_ENABLE_INSN_P (645)"
+  "cpabsz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3376))]
+  "CGEN_ENABLE_INSN_P (646)"
+  "cpabsz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3376))]
+  "CGEN_ENABLE_INSN_P (647)"
+  "cpabsz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmov_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4172))]
+  "CGEN_ENABLE_INSN_P (648)"
+  "cpmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmov_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4172))]
+  "CGEN_ENABLE_INSN_P (649)"
+  "cpmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs1_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3524))]
+  "CGEN_ENABLE_INSN_P (650)"
+  "cpfsftbs1\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs1_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3524))]
+  "CGEN_ENABLE_INSN_P (651)"
+  "cpfsftbs1\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs0_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3526))]
+  "CGEN_ENABLE_INSN_P (652)"
+  "cpfsftbs0\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs0_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3526))]
+  "CGEN_ENABLE_INSN_P (653)"
+  "cpfsftbs0\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsel_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3530))]
+  "CGEN_ENABLE_INSN_P (654)"
+  "cpsel\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsel_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3530))]
+  "CGEN_ENABLE_INSN_P (655)"
+  "cpsel\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3512))]
+  "CGEN_ENABLE_INSN_P (656)"
+  "cpunpackl.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3512))]
+  "CGEN_ENABLE_INSN_P (657)"
+  "cpunpackl.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3514))]
+  "CGEN_ENABLE_INSN_P (658)"
+  "cpunpackl.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3514))]
+  "CGEN_ENABLE_INSN_P (659)"
+  "cpunpackl.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3516))]
+  "CGEN_ENABLE_INSN_P (660)"
+  "cpunpackl.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3516))]
+  "CGEN_ENABLE_INSN_P (661)"
+  "cpunpackl.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3518))]
+  "CGEN_ENABLE_INSN_P (662)"
+  "cpunpacku.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3518))]
+  "CGEN_ENABLE_INSN_P (663)"
+  "cpunpacku.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3520))]
+  "CGEN_ENABLE_INSN_P (664)"
+  "cpunpacku.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3520))]
+  "CGEN_ENABLE_INSN_P (665)"
+  "cpunpacku.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3522))]
+  "CGEN_ENABLE_INSN_P (666)"
+  "cpunpacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3522))]
+  "CGEN_ENABLE_INSN_P (667)"
+  "cpunpacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3550))]
+  "CGEN_ENABLE_INSN_P (668)"
+  "cpadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3550))]
+  "CGEN_ENABLE_INSN_P (669)"
+  "cpadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3552))]
+  "CGEN_ENABLE_INSN_P (670)"
+  "cpadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3552))]
+  "CGEN_ENABLE_INSN_P (671)"
+  "cpadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3554))]
+  "CGEN_ENABLE_INSN_P (672)"
+  "cpadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3554))]
+  "CGEN_ENABLE_INSN_P (673)"
+  "cpadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_c0nop_P0_P0S"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 2196)]
+  "CGEN_ENABLE_INSN_P (674)"
+  "c0nop"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmoviu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_8a1_immediate" "")
+        ] 3178))]
+  "CGEN_ENABLE_INSN_P (675)"
+  "cpmoviu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_rn_crm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4156))]
+  "CGEN_ENABLE_INSN_P (676)"
+  "cmovh\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_rn_crm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4156))]
+  "CGEN_ENABLE_INSN_P (677)"
+  "cmovh\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_crn_rm"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4158))]
+  "CGEN_ENABLE_INSN_P (678)"
+  "cmovh\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_crn_rm_p0"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4158))]
+  "CGEN_ENABLE_INSN_P (679)"
+  "cmovh\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_rn_ccrm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "y")
+        ] 4160))]
+  "CGEN_ENABLE_INSN_P (680)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_rn_ccrm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "y")
+        ] 4160))]
+  "CGEN_ENABLE_INSN_P (681)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_ccrn_rm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=y")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4162))]
+  "CGEN_ENABLE_INSN_P (682)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_ccrn_rm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=y")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4162))]
+  "CGEN_ENABLE_INSN_P (683)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_rn_crm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4164))]
+  "CGEN_ENABLE_INSN_P (684)"
+  "cmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_rn_crm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4164))]
+  "CGEN_ENABLE_INSN_P (685)"
+  "cmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_crn_rm"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4166))]
+  "CGEN_ENABLE_INSN_P (686)"
+  "cmov\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_crn_rm_p0"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4166))]
+  "CGEN_ENABLE_INSN_P (687)"
+  "cmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsrv"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3556)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3558))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3559))]
+  "CGEN_ENABLE_INSN_P (688)"
+  "bsrv\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_jsrv"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3560))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3562))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3563))]
+  "CGEN_ENABLE_INSN_P (689)"
+  "jsrv\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_synccp"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 3564)]
+  "CGEN_ENABLE_INSN_P (690)"
+  "synccp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpaf"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3566)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (691)"
+  "bcpaf\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpat"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3568)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (692)"
+  "bcpat\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpne"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3570)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (693)"
+  "bcpne\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpeq"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3572)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (694)"
+  "bcpeq\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lmcpm1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:DI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3574))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3576))]
+  "CGEN_ENABLE_INSN_P (695)"
+  "lmcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_smcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3578))]
+  "CGEN_ENABLE_INSN_P (696)"
+  "smcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lwcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3580))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3582))]
+  "CGEN_ENABLE_INSN_P (697)"
+  "lwcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_swcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3584))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3586))]
+  "CGEN_ENABLE_INSN_P (698)"
+  "swcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3588))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3590))]
+  "CGEN_ENABLE_INSN_P (699)"
+  "lhcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_shcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3592))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3594))]
+  "CGEN_ENABLE_INSN_P (700)"
+  "shcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3596))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3598))]
+  "CGEN_ENABLE_INSN_P (701)"
+  "lbcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sbcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3600))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3602))]
+  "CGEN_ENABLE_INSN_P (702)"
+  "sbcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lmcpm0"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:DI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3604))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3606))]
+  "CGEN_ENABLE_INSN_P (703)"
+  "lmcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_smcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3608))]
+  "CGEN_ENABLE_INSN_P (704)"
+  "smcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lwcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3610))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3612))]
+  "CGEN_ENABLE_INSN_P (705)"
+  "lwcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_swcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3614))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3616))]
+  "CGEN_ENABLE_INSN_P (706)"
+  "swcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3618))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3620))]
+  "CGEN_ENABLE_INSN_P (707)"
+  "lhcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_shcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3622))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3624))]
+  "CGEN_ENABLE_INSN_P (708)"
+  "shcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3626))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3628))]
+  "CGEN_ENABLE_INSN_P (709)"
+  "lbcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sbcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3630))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3632))]
+  "CGEN_ENABLE_INSN_P (710)"
+  "sbcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lmcpa"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:DI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3634))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+        ] 3636))]
+  "CGEN_ENABLE_INSN_P (711)"
+  "lmcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3638))]
+  "CGEN_ENABLE_INSN_P (712)"
+  "smcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3640))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 3642))]
+  "CGEN_ENABLE_INSN_P (713)"
+  "lwcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3644))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 3646))]
+  "CGEN_ENABLE_INSN_P (714)"
+  "swcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3648))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 3650))]
+  "CGEN_ENABLE_INSN_P (715)"
+  "lhcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_shcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3652))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 3654))]
+  "CGEN_ENABLE_INSN_P (716)"
+  "shcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lbcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3656))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 3658))]
+  "CGEN_ENABLE_INSN_P (717)"
+  "lbcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sbcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3660))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 3662))]
+  "CGEN_ENABLE_INSN_P (718)"
+  "sbcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lmcp16"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3664))]
+  "CGEN_ENABLE_INSN_P (719)"
+  "lmcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcp16"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "em")
+     (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+     (match_operand:SI 2 "general_operand" "r")
+   ] 3666)]
+  "CGEN_ENABLE_INSN_P (720)"
+  "smcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcp16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3668))]
+  "CGEN_ENABLE_INSN_P (721)"
+  "lwcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcp16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3670))]
+  "CGEN_ENABLE_INSN_P (722)"
+  "swcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lmcpi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+        ] 3672))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+        ] 3674))]
+  "CGEN_ENABLE_INSN_P (723)"
+  "lmcpi\\t%0,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcpi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+        ] 3676))]
+  "CGEN_ENABLE_INSN_P (724)"
+  "smcpi\\t%1,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcpi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (mem:SI (scratch:SI))
+        ] 3678))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (mem:SI (scratch:SI))
+        ] 3680))]
+  "CGEN_ENABLE_INSN_P (725)"
+  "lwcpi\\t%0,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcpi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+        ] 3682))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3684))]
+  "CGEN_ENABLE_INSN_P (726)"
+  "swcpi\\t%1,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lmcp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3686))]
+  "CGEN_ENABLE_INSN_P (727)"
+  "lmcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcp"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "em")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 3688)]
+  "CGEN_ENABLE_INSN_P (728)"
+  "smcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3690))]
+  "CGEN_ENABLE_INSN_P (729)"
+  "lwcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3692))]
+  "CGEN_ENABLE_INSN_P (730)"
+  "swcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_ssubu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3694))]
+  "CGEN_ENABLE_INSN_P (731)"
+  "ssubu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_saddu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3696))]
+  "CGEN_ENABLE_INSN_P (732)"
+  "saddu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_ssub"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3698))]
+  "CGEN_ENABLE_INSN_P (733)"
+  "ssub\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sadd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3700))]
+  "CGEN_ENABLE_INSN_P (734)"
+  "sadd\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_clipu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3702))]
+  "CGEN_ENABLE_INSN_P (735)"
+  "clipu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_clip"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3704))]
+  "CGEN_ENABLE_INSN_P (736)"
+  "clip\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_maxu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3706))]
+  "CGEN_ENABLE_INSN_P (737)"
+  "maxu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_minu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3708))]
+  "CGEN_ENABLE_INSN_P (738)"
+  "minu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_max"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3710))]
+  "CGEN_ENABLE_INSN_P (739)"
+  "max\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_min"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3712))]
+  "CGEN_ENABLE_INSN_P (740)"
+  "min\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_ave"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3714))]
+  "CGEN_ENABLE_INSN_P (741)"
+  "ave\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_abs"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3716))]
+  "CGEN_ENABLE_INSN_P (742)"
+  "abs\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_ldz"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3718))]
+  "CGEN_ENABLE_INSN_P (743)"
+  "ldz\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_dbreak"
+  [(set (reg:SI 40)
+        (unspec_volatile:SI [
+          (reg:SI 40)
+        ] 3720))]
+  "CGEN_ENABLE_INSN_P (744)"
+  "dbreak"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_dret"
+  [(set (pc)
+        (unspec:SI [
+          (reg:SI 41)
+          (reg:SI 40)
+        ] 3722))
+   (set (reg:SI 40)
+        (unspec:SI [
+          (reg:SI 41)
+          (reg:SI 40)
+        ] 3724))
+   (set (reg:SI 115)
+        (unspec:SI [
+          (reg:SI 41)
+          (reg:SI 40)
+        ] 3725))]
+  "CGEN_ENABLE_INSN_P (745)"
+  "dret"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_divu"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3726))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3728))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3729))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3730))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3731))]
+  "CGEN_ENABLE_INSN_P (746)"
+  "divu\\t%0,%1"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "34")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "div")])
+
+
+(define_insn "cgen_intrinsic_div"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3732))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3734))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3735))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3736))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3737))]
+  "CGEN_ENABLE_INSN_P (747)"
+  "div\\t%0,%1"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "34")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "div")])
+
+
+(define_insn "cgen_intrinsic_maddru"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3738))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3740))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3741))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3742))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3743))]
+  "CGEN_ENABLE_INSN_P (748)"
+  "maddru\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_maddr"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3744))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3746))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3747))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3748))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3749))]
+  "CGEN_ENABLE_INSN_P (749)"
+  "maddr\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_maddu"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3750))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3751))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3752))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3753))]
+  "CGEN_ENABLE_INSN_P (750)"
+  "maddu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_madd"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3754))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3755))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3756))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3757))]
+  "CGEN_ENABLE_INSN_P (751)"
+  "madd\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_mulru"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3758))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3760))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3761))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3762))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3763))]
+  "CGEN_ENABLE_INSN_P (752)"
+  "mulru\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_mulr"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3764))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3766))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3767))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3768))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3769))]
+  "CGEN_ENABLE_INSN_P (753)"
+  "mulr\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_mulu"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3770))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3771))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3772))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3773))]
+  "CGEN_ENABLE_INSN_P (754)"
+  "mulu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_mul"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3774))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3775))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3776))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3777))]
+  "CGEN_ENABLE_INSN_P (755)"
+  "mul\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_cache"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 3778)]
+  "CGEN_ENABLE_INSN_P (756)"
+  "cache\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_tas"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3780))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (mem:SI (scratch:SI))
+        ] 3782))]
+  "CGEN_ENABLE_INSN_P (757)"
+  "tas\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_btstm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3784))]
+  "CGEN_ENABLE_INSN_P (758)"
+  "btstm\\t$0,(%1),%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bnotm"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3786))]
+  "CGEN_ENABLE_INSN_P (759)"
+  "bnotm\\t(%0),%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bclrm"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3788))]
+  "CGEN_ENABLE_INSN_P (760)"
+  "bclrm\\t(%0),%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsetm"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3790))]
+  "CGEN_ENABLE_INSN_P (761)"
+  "bsetm\\t(%0),%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ldcb"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3792))]
+  "CGEN_ENABLE_INSN_P (762)"
+  "ldcb\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldcb")])
+
+
+(define_insn "cgen_intrinsic_stcb"
+  [(unspec_volatile [
+     (match_operand:SI 0 "general_operand" "r")
+     (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+   ] 3794)]
+  "CGEN_ENABLE_INSN_P (763)"
+  "stcb\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stcb")])
+
+
+(define_insn "cgen_intrinsic_syncm"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 3796)]
+  "CGEN_ENABLE_INSN_P (764)"
+  "syncm"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_break"
+  [(set (pc)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 3798))]
+  "CGEN_ENABLE_INSN_P (765)"
+  "break"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_swi"
+  [(set (reg:SI 36)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_2a1_immediate" "")
+          (reg:SI 36)
+        ] 3800))]
+  "CGEN_ENABLE_INSN_P (766)"
+  "swi\\t%0"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sleep"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 3802)]
+  "CGEN_ENABLE_INSN_P (767)"
+  "sleep"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_halt"
+  [(unspec_volatile [
+     (reg:SI 32)
+   ] 3804)]
+  "CGEN_ENABLE_INSN_P (768)"
+  "halt"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_reti"
+  [(set (pc)
+        (unspec:SI [
+          (reg:SI 32)
+          (reg:SI 42)
+          (reg:SI 39)
+          (reg:SI 35)
+        ] 3806))]
+  "CGEN_ENABLE_INSN_P (769)"
+  "reti"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ret")])
+
+
+(define_insn "cgen_intrinsic_ei"
+  [(set (reg:SI 32)
+        (unspec_volatile:SI [
+          (reg:SI 32)
+        ] 3808))]
+  "CGEN_ENABLE_INSN_P (770)"
+  "ei"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_di"
+  [(set (reg:SI 32)
+        (unspec_volatile:SI [
+          (reg:SI 32)
+        ] 3810))]
+  "CGEN_ENABLE_INSN_P (771)"
+  "di"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ldc"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "c")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3812))]
+  "CGEN_ENABLE_INSN_P (772)"
+  "ldc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_ldc_lo"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (reg:SI 24)
+        ] 3814))]
+  "CGEN_ENABLE_INSN_P (773)"
+  "ldc\\t%0,$lo"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_ldc_hi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (reg:SI 23)
+        ] 3816))]
+  "CGEN_ENABLE_INSN_P (774)"
+  "ldc\\t%0,$hi"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_ldc_lp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (reg:SI 17)
+        ] 3818))]
+  "CGEN_ENABLE_INSN_P (775)"
+  "ldc\\t%0,$lp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_stc"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=c")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3820))]
+  "CGEN_ENABLE_INSN_P (776)"
+  "stc\\t%1,%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_stc_lo"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+        ] 3822))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+        ] 3823))]
+  "CGEN_ENABLE_INSN_P (777)"
+  "stc\\t%0,$lo"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_stc_hi"
+  [(set (reg:SI 23)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+        ] 3824))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+        ] 3825))]
+  "CGEN_ENABLE_INSN_P (778)"
+  "stc\\t%0,$hi"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_stc_lp"
+  [(set (reg:SI 17)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+        ] 3826))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+        ] 3827))]
+  "CGEN_ENABLE_INSN_P (779)"
+  "stc\\t%0,$lp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_erepeat"
+  [(set (reg:SI 22)
+        (unspec:SI [
+          (match_operand:SI 0 "immediate_operand" "")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3828))
+   (set (reg:SI 118)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3829))
+   (set (reg:SI 21)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3830))
+   (set (reg:SI 119)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3831))
+   (set (reg:SI 20)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3832))
+   (set (reg:SI 120)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3833))]
+  "CGEN_ENABLE_INSN_P (780)"
+  "erepeat\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_repeat"
+  [(set (reg:SI 22)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "immediate_operand" "")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3834))
+   (set (reg:SI 118)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3835))
+   (set (reg:SI 21)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3836))
+   (set (reg:SI 119)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3837))
+   (set (reg:SI 20)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3838))
+   (set (reg:SI 120)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3839))]
+  "CGEN_ENABLE_INSN_P (781)"
+  "repeat\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ret"
+  [(set (pc)
+        (unspec:SI [
+          (reg:SI 32)
+          (reg:SI 42)
+          (reg:SI 17)
+        ] 3840))]
+  "CGEN_ENABLE_INSN_P (782)"
+  "ret"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ret")])
+
+
+(define_insn "cgen_intrinsic_jsr"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3842))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3844))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3845))]
+  "CGEN_ENABLE_INSN_P (783)"
+  "jsr\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_jmp24"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3846)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (784)"
+  "jmp\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_jmp"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3848))]
+  "CGEN_ENABLE_INSN_P (785)"
+  "jmp\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsr12"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3854)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3856))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3857))]
+  "CGEN_ENABLE_INSN_P (786)"
+  "bsr\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsr24"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3850)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3852))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3853))]
+  "CGEN_ENABLE_INSN_P (787)"
+  "bsr\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bne"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "general_operand" "r")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3858)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (788)"
+  "bne\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_beq"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "general_operand" "r")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3860)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (789)"
+  "beq\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bgei"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3862)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (790)"
+  "bgei\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_blti"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3864)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (791)"
+  "blti\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bnei"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3866)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (792)"
+  "bnei\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_beqi"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3868)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (793)"
+  "beqi\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bnez"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3870)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (794)"
+  "bnez\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_beqz"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3872)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (795)"
+  "beqz\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bra"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3874)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (796)"
+  "bra\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_fsft"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (reg:SI 18)
+        ] 3876))]
+  "CGEN_ENABLE_INSN_P (797)"
+  "fsft\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "fsft")])
+
+
+(define_insn "cgen_intrinsic_sll3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3878))]
+  "CGEN_ENABLE_INSN_P (798)"
+  "sll3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_slli"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3880))]
+  "CGEN_ENABLE_INSN_P (799)"
+  "sll\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "shiftop" "operand2")])
+
+
+(define_insn "cgen_intrinsic_srli"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3882))]
+  "CGEN_ENABLE_INSN_P (800)"
+  "srl\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "shiftop" "operand2")])
+
+
+(define_insn "cgen_intrinsic_srai"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3884))]
+  "CGEN_ENABLE_INSN_P (801)"
+  "sra\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "shiftop" "operand2")])
+
+
+(define_insn "cgen_intrinsic_sll"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3886))]
+  "CGEN_ENABLE_INSN_P (802)"
+  "sll\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_srl"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3888))]
+  "CGEN_ENABLE_INSN_P (803)"
+  "srl\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sra"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3890))]
+  "CGEN_ENABLE_INSN_P (804)"
+  "sra\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_xor3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3892))]
+  "CGEN_ENABLE_INSN_P (805)"
+  "xor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_and3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3894))]
+  "CGEN_ENABLE_INSN_P (806)"
+  "and3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_or3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3896))]
+  "CGEN_ENABLE_INSN_P (807)"
+  "or3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_nor"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3898))]
+  "CGEN_ENABLE_INSN_P (808)"
+  "nor\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_xor"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3900))]
+  "CGEN_ENABLE_INSN_P (809)"
+  "xor\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_and"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3902))]
+  "CGEN_ENABLE_INSN_P (810)"
+  "and\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_or"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3904))]
+  "CGEN_ENABLE_INSN_P (811)"
+  "or\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sltu3x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3906))]
+  "CGEN_ENABLE_INSN_P (812)"
+  "sltu3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_slt3x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_sint_16a1_immediate" "")
+        ] 3908))]
+  "CGEN_ENABLE_INSN_P (813)"
+  "slt3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_add3x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_sint_16a1_immediate" "")
+        ] 3910))]
+  "CGEN_ENABLE_INSN_P (814)"
+  "add3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sl2ad3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3912))]
+  "CGEN_ENABLE_INSN_P (815)"
+  "sl2ad3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sl1ad3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3914))]
+  "CGEN_ENABLE_INSN_P (816)"
+  "sl1ad3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sltu3i"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3916))]
+  "CGEN_ENABLE_INSN_P (817)"
+  "sltu3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_slt3i"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3918))]
+  "CGEN_ENABLE_INSN_P (818)"
+  "slt3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sltu3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3920))]
+  "CGEN_ENABLE_INSN_P (819)"
+  "sltu3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_slt3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3922))]
+  "CGEN_ENABLE_INSN_P (820)"
+  "slt3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_neg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3924))]
+  "CGEN_ENABLE_INSN_P (821)"
+  "neg\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sbvck3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3926))]
+  "CGEN_ENABLE_INSN_P (822)"
+  "sbvck3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "advck")])
+
+
+(define_insn "cgen_intrinsic_sub"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3928))]
+  "CGEN_ENABLE_INSN_P (823)"
+  "sub\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_advck3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3930))]
+  "CGEN_ENABLE_INSN_P (824)"
+  "advck3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "advck")])
+
+
+(define_insn "cgen_intrinsic_add3i"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 15)
+        ] 3932))]
+  "CGEN_ENABLE_INSN_P (825)"
+  "add3\\t%0,$sp,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_add"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_sint_6a1_immediate" "")
+        ] 3934))]
+  "CGEN_ENABLE_INSN_P (826)"
+  "add\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_add3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3936))]
+  "CGEN_ENABLE_INSN_P (827)"
+  "add3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movh"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3938))]
+  "CGEN_ENABLE_INSN_P (828)"
+  "movh\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movu16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3940))]
+  "CGEN_ENABLE_INSN_P (829)"
+  "movu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movu24"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_24a1_immediate" "")
+        ] 3942))]
+  "CGEN_ENABLE_INSN_P (830)"
+  "movu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movi8"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3946))]
+  "CGEN_ENABLE_INSN_P (831)"
+  "mov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movi16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3944))]
+  "CGEN_ENABLE_INSN_P (832)"
+  "mov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_mov"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3948))]
+  "CGEN_ENABLE_INSN_P (833)"
+  "mov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ssarb"
+  [(set (reg:SI 18)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_sint_2a1_immediate" "")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3950))]
+  "CGEN_ENABLE_INSN_P (834)"
+  "ssarb\\t%0(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ssarb")])
+
+
+(define_insn "cgen_intrinsic_extuh"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3952))]
+  "CGEN_ENABLE_INSN_P (835)"
+  "extuh\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_extub"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3954))]
+  "CGEN_ENABLE_INSN_P (836)"
+  "extub\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_exth"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3956))]
+  "CGEN_ENABLE_INSN_P (837)"
+  "exth\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_extb"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3958))]
+  "CGEN_ENABLE_INSN_P (838)"
+  "extb\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lw24"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_22a4_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3960))]
+  "CGEN_ENABLE_INSN_P (839)"
+  "lw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw24"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_22a4_immediate" "")
+        ] 3962))]
+  "CGEN_ENABLE_INSN_P (840)"
+  "sw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhu16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3964))]
+  "CGEN_ENABLE_INSN_P (841)"
+  "lhu\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbu16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3966))]
+  "CGEN_ENABLE_INSN_P (842)"
+  "lbu\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lw16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3968))]
+  "CGEN_ENABLE_INSN_P (843)"
+  "lw\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lh16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3970))]
+  "CGEN_ENABLE_INSN_P (844)"
+  "lh\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lb16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3972))]
+  "CGEN_ENABLE_INSN_P (845)"
+  "lb\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3974))]
+  "CGEN_ENABLE_INSN_P (846)"
+  "sw\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sh16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3976))]
+  "CGEN_ENABLE_INSN_P (847)"
+  "sh\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sb16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3978))]
+  "CGEN_ENABLE_INSN_P (848)"
+  "sb\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhu_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_6a2_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3980))]
+  "CGEN_ENABLE_INSN_P (849)"
+  "lhu\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbu_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_7a1_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3982))]
+  "CGEN_ENABLE_INSN_P (850)"
+  "lbu\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lw_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3984))]
+  "CGEN_ENABLE_INSN_P (851)"
+  "lw\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lh_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_6a2_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3986))]
+  "CGEN_ENABLE_INSN_P (852)"
+  "lh\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lb_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_7a1_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3988))]
+  "CGEN_ENABLE_INSN_P (853)"
+  "lb\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw_tp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "t")
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 13)
+        ] 3990))]
+  "CGEN_ENABLE_INSN_P (854)"
+  "sw\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sh_tp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "t")
+          (match_operand:SI 1 "cgen_h_uint_6a2_immediate" "")
+          (reg:SI 13)
+        ] 3992))]
+  "CGEN_ENABLE_INSN_P (855)"
+  "sh\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sb_tp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "t")
+          (match_operand:SI 1 "cgen_h_uint_7a1_immediate" "")
+          (reg:SI 13)
+        ] 3994))]
+  "CGEN_ENABLE_INSN_P (856)"
+  "sb\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lw_sp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 15)
+          (mem:SI (scratch:SI))
+        ] 3996))]
+  "CGEN_ENABLE_INSN_P (857)"
+  "lw\\t%0,%1($sp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw_sp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 15)
+        ] 3998))]
+  "CGEN_ENABLE_INSN_P (858)"
+  "sw\\t%0,%1($sp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4000))]
+  "CGEN_ENABLE_INSN_P (859)"
+  "lhu\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4002))]
+  "CGEN_ENABLE_INSN_P (860)"
+  "lbu\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lw"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4004))]
+  "CGEN_ENABLE_INSN_P (861)"
+  "lw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lh"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4006))]
+  "CGEN_ENABLE_INSN_P (862)"
+  "lh\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lb"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4008))]
+  "CGEN_ENABLE_INSN_P (863)"
+  "lb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4010))]
+  "CGEN_ENABLE_INSN_P (864)"
+  "sw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sh"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4012))]
+  "CGEN_ENABLE_INSN_P (865)"
+  "sh\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sb"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4014))]
+  "CGEN_ENABLE_INSN_P (866)"
+  "sb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_dsp1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_20a1_immediate" "")
+        ] 4016))]
+  "CGEN_ENABLE_INSN_P (867)"
+  "dsp1\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_dsp0"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_24a1_immediate" "")
+   ] 4018)]
+  "CGEN_ENABLE_INSN_P (868)"
+  "dsp0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_dsp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "cgen_h_uint_16a1_immediate" "")
+        ] 4020))]
+  "CGEN_ENABLE_INSN_P (869)"
+  "dsp\\t%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_uci"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "cgen_h_uint_16a1_immediate" "")
+        ] 4022))]
+  "CGEN_ENABLE_INSN_P (870)"
+  "uci\\t%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhucpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4024))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4026))]
+  "CGEN_ENABLE_INSN_P (871)"
+  "lhucpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbucpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4028))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4030))]
+  "CGEN_ENABLE_INSN_P (872)"
+  "lbucpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhucpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4032))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4034))]
+  "CGEN_ENABLE_INSN_P (873)"
+  "lhucpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbucpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4036))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4038))]
+  "CGEN_ENABLE_INSN_P (874)"
+  "lbucpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhucpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 4040))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 4042))]
+  "CGEN_ENABLE_INSN_P (875)"
+  "lhucpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbucpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 4044))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 4046))]
+  "CGEN_ENABLE_INSN_P (876)"
+  "lbucpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lhucp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4048))]
+  "CGEN_ENABLE_INSN_P (877)"
+  "lhucp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhcp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4050))]
+  "CGEN_ENABLE_INSN_P (878)"
+  "lhcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_shcp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4052))]
+  "CGEN_ENABLE_INSN_P (879)"
+  "shcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lbucp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4054))]
+  "CGEN_ENABLE_INSN_P (880)"
+  "lbucp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lbcp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4056))]
+  "CGEN_ENABLE_INSN_P (881)"
+  "lbcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sbcp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4058))]
+  "CGEN_ENABLE_INSN_P (882)"
+  "sbcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_casw3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "general_operand" "r")
+        ] 4060))]
+  "CGEN_ENABLE_INSN_P (883)"
+  "casw3\\t%1,%2,(%3)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cash3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "general_operand" "r")
+        ] 4062))]
+  "CGEN_ENABLE_INSN_P (884)"
+  "cash3\\t%1,%2,(%3)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_casb3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "general_operand" "r")
+        ] 4064))]
+  "CGEN_ENABLE_INSN_P (885)"
+  "casb3\\t%1,%2,(%3)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_prefd"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+     (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+     (match_operand:SI 2 "general_operand" "r")
+   ] 4066)]
+  "CGEN_ENABLE_INSN_P (886)"
+  "pref\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_pref"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 4068)]
+  "CGEN_ENABLE_INSN_P (887)"
+  "pref\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ldcb_r"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4070))]
+  "CGEN_ENABLE_INSN_P (888)"
+  "ldcb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_stcb_r"
+  [(unspec_volatile [
+     (match_operand:SI 0 "general_operand" "r")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 4072)]
+  "CGEN_ENABLE_INSN_P (889)"
+  "stcb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
diff --git a/gcc-4.9/gcc/config/mep/ivc2-template.h b/gcc-4.9/gcc/config/mep/ivc2-template.h
new file mode 100644
index 000000000..da0440c0d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/ivc2-template.h
@@ -0,0 +1,9 @@
+#undef __section
+#define __section(_secname) __attribute__((section(#_secname)))
+#undef mep_nop
+#define mep_nop() __asm__ volatile ("nop")
+
+#pragma GCC coprocessor available $c0...$c31
+#pragma GCC coprocessor call_saved $c6...$c7
+
+#include <intrinsics.h>
diff --git a/gcc-4.9/gcc/config/mep/mep-c5.cpu b/gcc-4.9/gcc/config/mep/mep-c5.cpu
new file mode 100644
index 000000000..610afa94f
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-c5.cpu
@@ -0,0 +1,277 @@
+; Toshiba MeP C5 Core description.  -*- scheme -*-
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+(dnf f-c5n4     "extended field"     (all-mep-core-isas)   16  4)
+(dnf f-c5n5     "extended field"     (all-mep-core-isas)   20  4)
+(dnf f-c5n6     "extended field"     (all-mep-core-isas)   24  4)
+(dnf f-c5n7     "extended field"     (all-mep-core-isas)   28  4)
+(dnf f-rl5      "register l c5"      (all-mep-core-isas)   20  4)
+(df  f-12s20    "extended field"     (all-mep-core-isas)   20  12  INT #f #f)
+
+(dnop rl5       "register Rl c5"     (all-mep-core-isas) h-gpr   f-rl5)
+(dnop cdisp12   "copro addend (12 bits)" (all-mep-core-isas) h-sint  f-12s20)
+
+(dnci stcb_r "store in control bus space" (VOLATILE (MACH c5))
+     "stcb $rn,($rma)"
+     (+ MAJ_7 rn rma (f-sub4 12))
+     (c-call VOID "do_stcb" rn (and rma #xffff))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-stcb))))
+
+(dnci ldcb_r "load from control bus space" (VOLATILE (MACH c5) (LATENCY 3))
+     "ldcb $rn,($rma)"
+     (+ MAJ_7 rn rma (f-sub4 13))
+     (set rn (c-call SI "do_ldcb" (and rma #xffff)))
+      ((mep (unit u-use-gpr (in usereg rma))
+	    (unit u-ldcb)
+	    (unit u-exec)
+	    (unit u-ldcb-gpr (out loadreg rn)))))
+
+(dnci pref "cache prefetch" ((MACH c5) VOLATILE)
+     "pref $cimm4,($rma)"
+     (+ MAJ_7 cimm4 rma (f-sub4 5))
+     (sequence ()
+	       (c-call VOID "check_option_dcache" pc)
+	       (c-call VOID "do_cache_prefetch" cimm4 rma pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci prefd "cache prefetch" ((MACH c5) VOLATILE)
+     "pref $cimm4,$sdisp16($rma)"
+     (+ MAJ_15 cimm4 rma (f-sub4 3) sdisp16)
+     (sequence ()
+	       (c-call VOID "check_option_dcache" pc)
+	       (c-call VOID "do_cache_prefetch" cimm4 (add INT rma (ext SI sdisp16)) pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci casb3 "compare and swap byte 3" ((MACH c5) VOLATILE OPTIONAL_BIT_INSN)
+      "casb3 $rl5,$rn,($rm)"
+      (+ MAJ_15 rn rm (f-sub4 #x1) (f-c5n4 #x2) rl5 (f-c5n6 #x0) (f-c5n7 #x0))
+      (sequence ()
+		(c-call VOID "do_casb3" (index-of rl5) rn rm pc)
+		(set rl5 rl5)
+		)
+      ((mep (unit u-use-gpr (in usereg rl5))
+	    (unit u-load-gpr (out loadreg rl5))
+	    (unit u-exec))))
+
+(dnci cash3 "compare and swap halfword 3" ((MACH c5) VOLATILE OPTIONAL_BIT_INSN)
+      "cash3 $rl5,$rn,($rm)"
+      (+ MAJ_15 rn rm (f-sub4 #x1) (f-c5n4 #x2) rl5 (f-c5n6 #x0) (f-c5n7 #x1))
+      (sequence ()
+		(c-call VOID "do_cash3" (index-of rl5) rn rm pc)
+		(set rl5 rl5)
+		)
+      ((mep (unit u-use-gpr (in usereg rl5))
+	    (unit u-load-gpr (out loadreg rl5))
+	    (unit u-exec))))
+
+(dnci casw3 "compare and swap word 3" ((MACH c5) VOLATILE OPTIONAL_BIT_INSN)
+      "casw3 $rl5,$rn,($rm)"
+      (+ MAJ_15 rn rm (f-sub4 #x1) (f-c5n4 #x2) rl5 (f-c5n6 #x0) (f-c5n7 #x2))
+      (sequence ()
+		(c-call VOID "do_casw3" (index-of rl5) rn rm pc)
+		(set rl5 rl5)
+		)
+      ((mep (unit u-use-gpr (in usereg rl5))
+	    (unit u-load-gpr (out loadreg rl5))
+	    (unit u-exec))))
+
+
+
+(dnci sbcp "store byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "sbcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 0) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (add rma (ext SI cdisp12)))
+	       (set (mem QI (add rma (ext SI cdisp12))) (and crn #xff)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcp "load byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lbcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 4) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbucp "load byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lbucp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 12) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+(dnci shcp "store half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "shcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 1) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (add rma (ext SI cdisp12)))
+	       (set (mem HI (add rma (ext SI cdisp12))) (and crn #xffff)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcp "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lhcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 5) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucp "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lhucp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 13) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+(dnci lbucpa "load byte coprocessor" (OPTIONAL_CP_INSN (STALL LOAD) (MACH c5))
+     "lbucpa $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xC) (f-ext62 #x0) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI rma)))
+	       (set rma (add rma cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucpa "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD) (MACH c5))
+     "lhucpa $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xD) (f-ext62 #x0) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (add rma (ext SI cdisp10a2))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbucpm0 "lbucpm0" (OPTIONAL_CP_INSN (MACH c5))
+     "lbucpm0 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xc) (f-ext62 #x2) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI rma)))
+	       (set rma (mod0 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucpm0 "lhucpm0" (OPTIONAL_CP_INSN (MACH c5))
+     "lhucpm0 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xd) (f-ext62 #x2) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod0 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbucpm1 "lbucpm1" (OPTIONAL_CP_INSN (MACH c5))
+     "lbucpm1 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xc) (f-ext62 #x3) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI rma)))
+	       (set rma (mod1 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucpm1 "lhucpm1" (OPTIONAL_CP_INSN (MACH c5))
+     "lhucpm1 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xd) (f-ext62 #x3) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod1 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci uci "uci" ((MACH c5) VOLATILE)
+     "uci $rn,$rm,$uimm16"
+     (+ MAJ_15 rn rm (f-sub4 2) simm16)
+     (set rn (c-call SI "do_UCI" rn rm (zext SI uimm16) pc))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnf f-c5-rnm     "register n/m"              (all-mep-isas)    4  8)
+(dnf f-c5-rm      "register m"              (all-mep-isas)    8  4)
+(df  f-c5-16u16  "general 16-bit u-val"    (all-mep-isas) 16 16 UINT #f #f)
+
+(dnmf f-c5-rmuimm20 "20-bit immediate in Rm/Imm16" (all-mep-isas) UINT
+      (f-c5-rm f-c5-16u16)
+      (sequence () ; insert
+		(set (ifield f-c5-rm)    (srl (ifield f-c5-rmuimm20) 16))
+		(set (ifield f-c5-16u16) (and (ifield f-c5-rmuimm20) #xffff))
+		)
+      (sequence () ; extract
+		(set (ifield f-c5-rmuimm20) (or (ifield f-c5-16u16)
+						(sll (ifield f-c5-rm) 16)))
+		)
+      )
+(dnop c5rmuimm20 "20-bit immediate in rm and imm16" (all-mep-core-isas) h-uint f-c5-rmuimm20)
+
+(dnmf f-c5-rnmuimm24 "24-bit immediate in Rm/Imm16" (all-mep-isas) UINT
+      (f-c5-rnm f-c5-16u16)
+      (sequence () ; insert
+		(set (ifield f-c5-rnm)    (srl (ifield f-c5-rnmuimm24) 16))
+		(set (ifield f-c5-16u16) (and (ifield f-c5-rnmuimm24) #xffff))
+		)
+      (sequence () ; extract
+		(set (ifield f-c5-rnmuimm24) (or (ifield f-c5-16u16)
+						(sll (ifield f-c5-rnm) 16)))
+		)
+      )
+(dnop c5rnmuimm24 "24-bit immediate in rn, rm, and imm16" (all-mep-core-isas) h-uint f-c5-rnmuimm24)
+
+(dnci dsp "dsp" ((MACH c5) VOLATILE)
+     "dsp $rn,$rm,$uimm16"
+     (+ MAJ_15 rn rm (f-sub4 0) uimm16)
+     (set rn (c-call SI "do_DSP" rn rm (zext SI uimm16) pc))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci dsp0 "dsp0" ((MACH c5) VOLATILE NO-DIS ALIAS)
+     "dsp0 $c5rnmuimm24"
+     (+ MAJ_15 c5rnmuimm24 (f-sub4 0))
+     (c-call VOID "do_DSP" (zext SI c5rnmuimm24) pc)
+     ((mep (unit u-exec))))
+
+(dnci dsp1 "dsp1" ((MACH c5) VOLATILE NO-DIS ALIAS)
+     "dsp1 $rn,$c5rmuimm20"
+     (+ MAJ_15 rn (f-sub4 0) c5rmuimm20)
+     (set rn (c-call SI "do_DSP" rn (zext SI c5rmuimm20) pc))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
diff --git a/gcc-4.9/gcc/config/mep/mep-core.cpu b/gcc-4.9/gcc/config/mep/mep-core.cpu
new file mode 100644
index 000000000..ea66cc985
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-core.cpu
@@ -0,0 +1,3080 @@
+; Toshiba MeP Media Engine architecture description.  -*- Scheme -*-
+; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+(include "simplify.inc")
+
+(define-pmacro isa-enum ()
+  (isas mep 
+; begin-isa-enum
+	ext_core1 ext_cop1_16 ext_cop1_32 ext_cop1_48 ext_cop1_64
+; end-isa-enum
+  )
+)
+
+(define-arch
+  (name mep)
+  (comment "Toshiba MeP Media Engine")
+  (insn-lsb0? #f) ;; work around cgen limitation
+  (machs mep h1 c5)
+  isa-enum
+)
+
+(define-isa
+  (name mep)
+  (comment "MeP core instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+; begin-isas
+(define-isa
+  (name ext_core1)
+  (comment "MeP core extension instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_16)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_32)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_48)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_64)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-pmacro all-mep-isas () (ISA mep,ext_core1,ext_cop1_16,ext_cop1_32,ext_cop1_48,ext_cop1_64))
+
+(define-pmacro all-mep-core-isas () (ISA mep,ext_core1,ext_cop1_32))
+
+(define-pmacro all-core-isa-list () mep,ext_core1)
+; end-isas
+
+(define-cpu
+  (name mepf)
+  (comment "MeP family")
+  (endian either)
+  (insn-chunk-bitsize 16)
+  (word-bitsize 32)
+)
+
+(define-mach
+  (name mep)
+  (comment "MeP media engine")
+  (cpu mepf)
+  isa-enum
+)
+
+(define-mach
+  (name h1)
+  (comment "H1 media engine")
+  (cpu mepf)
+  isa-enum
+)
+
+(define-mach
+  (name c5)
+  (comment "C5 media engine")
+  (cpu mepf)
+  isa-enum
+)
+
+(define-model
+  (name mep)
+  (comment "MeP media engine processor")
+  (mach c5) ; mach gets changed by MeP-Integrator
+
+  (unit u-exec "execution unit" ()
+	1 1 ; issue done
+	() () () ())
+
+  ; Branch unit
+  (unit u-branch "Branch Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((pc)) ; outputs
+	() ; profile action (default)
+	)
+
+  ; Multiply unit
+  (unit u-multiply "Multiply Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Divide unit
+  (unit u-divide "Divide Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Stcb unit
+  (unit u-stcb "stcb Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Ldcb unit
+  (unit u-ldcb "ldcb Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Load gpr unit
+  (unit u-load-gpr "Load into GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((loadreg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+
+  (unit u-ldcb-gpr "Ldcb into GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((loadreg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+
+  ; Multiply into GPR unit
+  (unit u-mul-gpr "Multiply into GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((resultreg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+
+  ; Use gpr unit -- stalls if GPR not ready
+  (unit u-use-gpr "Use GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	((usereg INT -1)) ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Use ctrl-reg unit -- stalls if CTRL-REG not ready
+  (unit u-use-ctrl-reg "Use CTRL-REG Unit" ()
+	0 0 ; issue done
+	() ; state
+	((usereg INT -1)) ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Store ctrl-reg unit -- stalls if CTRL-REG not ready
+  (unit u-store-ctrl-reg "Store CTRL-REG Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((storereg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+)
+
+; Hardware elements.
+
+(dnh h-pc "program counter" (PC PROFILE all-mep-isas) (pc) () () ())
+
+(define-hardware
+  (name h-gpr)
+  (comment "General purpose registers")
+  (attrs all-mep-isas CACHE-ADDR PROFILE)
+  (type register SI (16))
+  (indices keyword "$"
+	   (("0" 0) ("1" 1) ("2" 2) ("3" 3) ("4" 4) ("5" 5)
+	    ("6" 6) ("7" 7) ("8" 8) ("9" 9) ("10" 10) ("11" 11)
+	    ; "$8" is the preferred name for register 8, but "$tp", "$gp"
+	    ; and "$sp" are preferred for their respective registers.
+	    (fp  8) (tp 13) (gp 14) (sp 15)
+	    ("12" 12) ("13" 13) ("14" 14) ("15" 15)))
+)
+
+(define-hardware
+  (name h-csr)
+  (comment "Control/special registers")
+  (attrs all-mep-isas PROFILE)
+  (type register SI (32))
+  (indices keyword "$"
+	   ((pc 0)   (lp 1)   (sar 2)   (rpb  4) (rpe 5)   (rpc 6)
+	    (hi 7)   (lo 8)   (mb0 12)  (me0 13) (mb1 14)  (me1 15)
+	    (psw 16) (id 17)  (tmp 18)  (epc 19) (exc 20)  (cfg 21)
+	    (npc 23) (dbg 24) (depc 25) (opt 26) (rcfg 27) (ccfg 28)
+; begin-extra-csr-registers
+	    (vid 22)
+; end-extra-csr-registers
+  ))
+  (get (index) (c-call SI "cgen_get_csr_value" index))
+  (set (index newval) (c-call VOID "cgen_set_csr_value" index newval))
+)
+
+(define-pmacro (-reg-pair n) ((.sym n) n))
+(define-hardware
+  (name h-cr64)
+  (comment "64-bit coprocessor registers")
+  (attrs all-mep-isas)
+  ; This assumes that the data path of the co-pro is 64 bits.
+  (type register DI (32))
+  (indices keyword "$c" (.map -reg-pair (.iota 32)))
+  (set (index newval) (c-call VOID "h_cr64_queue_set" index newval))
+)
+(define-hardware
+  (name h-cr64-w)
+  (comment "64-bit coprocessor registers, pending writes")
+  (attrs all-mep-isas)
+  ; This assumes that the data path of the co-pro is 64 bits.
+  (type register DI (32))
+)
+
+(define-hardware
+  (name h-cr)
+  (comment "32-bit coprocessor registers")
+  (attrs all-mep-isas VIRTUAL)
+  (type register SI (32))
+  (indices keyword "$c" (.map -reg-pair (.iota 32)))
+  (set (index newval) (c-call VOID "h_cr64_set" index (ext DI newval)))
+  (get (index) (trunc SI (c-call DI "h_cr64_get" index)))
+)
+
+;; Given a coprocessor control register number N, expand to a
+;; name/index pair: ($ccrN N)
+(define-pmacro (-ccr-reg-pair n) ((.sym "$ccr" n) n))
+
+(define-hardware
+  (name h-ccr)
+  (comment "Coprocessor control registers")
+  (attrs all-mep-isas)
+  (type register SI (64))
+  (indices keyword "" (.map -ccr-reg-pair (.iota 64)))
+  (set (index newval) (c-call VOID "h_ccr_queue_set" index newval))
+)
+(define-hardware
+  (name h-ccr-w)
+  (comment "Coprocessor control registers, pending writes")
+  (attrs all-mep-isas)
+  (type register SI (64))
+)
+
+
+; Instruction fields.  Bit numbering reversed.
+
+; Conventions:
+;
+; N = number of bits in value
+; A = alignment (2 or 4, omit for 1)
+; B = leftmost (i.e. closest to zero) bit position
+;
+; -- Generic Fields (f-*) --
+; N		number of bits in *value* (1-24)
+; [us]		signed vs unsigned
+; B		position of left-most bit (4-16)
+; aA		opt. alignment (2=drop 1 lsb, 4=drop 2 lsbs, etc)
+; n		opt. for noncontiguous fields
+; f-foo-{hi,lo}	msb/lsb parts of field f-foo
+;
+; -- Operands --
+; pcrelNaA	PC-relative branch target (signed)
+; pcabsNaA	Absolute branch target (unsigned)
+;
+; [us]dispNaA	[un]signed displacement
+; [us]immN	[un]signed immediate value
+; addrNaA	absolute address (unsigned)
+;
+; Additional prefixes may be used for special cases.
+
+(dnf f-major   "major opcode"            (all-mep-core-isas)    0  4)
+
+(dnf f-rn      "register n"              (all-mep-core-isas)    4  4)
+(dnf f-rn3     "register 0-7"            (all-mep-core-isas)    5  3)
+(dnf f-rm      "register m"              (all-mep-core-isas)    8  4)
+(dnf f-rl      "register l"              (all-mep-core-isas)   12  4)
+(dnf f-sub2    "sub opcode (2 bits)"     (all-mep-core-isas)   14  2)
+(dnf f-sub3    "sub opcode (3 bits)"     (all-mep-core-isas)   13  3)
+(dnf f-sub4    "sub opcode (4 bits)"     (all-mep-core-isas)   12  4)
+(dnf f-ext     "extended field"          (all-mep-core-isas)   16  8)
+(dnf f-ext4    "extended field 16:4"     (all-mep-core-isas)   16  4)
+(dnf f-ext62   "extended field 20:2"     (all-mep-core-isas)   20  2)
+(dnf f-crn     "copro register n"        (all-mep-core-isas)    4  4)
+
+(df f-csrn-hi "cr hi 1u15" (all-mep-core-isas) 15 1 UINT #f #f)
+(df f-csrn-lo "cr lo 4u8"  (all-mep-core-isas)  8 4 UINT #f #f)
+(define-multi-ifield
+  (name f-csrn)
+  (comment "control reg")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-csrn-hi f-csrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-csrn-lo) (and (ifield f-csrn) #xf))
+		    (set (ifield f-csrn-hi) (srl (ifield f-csrn) 4))))
+  (extract (set (ifield f-csrn)
+		(or (sll (ifield f-csrn-hi) 4) (ifield f-csrn-lo))))
+  )
+
+(df f-crnx-hi "crx hi 1u28" (all-mep-core-isas) 28 1 UINT #f #f)
+(df f-crnx-lo "crx lo 4u4"  (all-mep-core-isas)  4 4 UINT #f #f)
+(define-multi-ifield
+  (name f-crnx)
+  (comment "copro register n (0-31)")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-crnx-hi f-crnx-lo)
+  (insert (sequence ()
+		    (set (ifield f-crnx-lo) (and (ifield f-crnx) #xf))
+		    (set (ifield f-crnx-hi) (srl (ifield f-crnx) 4))))
+  (extract (set (ifield f-crnx)
+		(or (sll (ifield f-crnx-hi) 4) (ifield f-crnx-lo))))
+  )
+
+; Miscellaneous fields.
+
+(define-pmacro (dnfb n)
+  (dnf (.sym f- n) (.str "bit " n) (all-mep-isas) n 1))
+
+; Define small fields used throughout the instruction set description.
+; Each field (eg. `f-N') is at single bit field at position N.
+
+(dnfb  0)
+(dnfb  1)
+(dnfb  2)
+(dnfb  3)
+(dnfb  4)
+(dnfb  5)
+(dnfb  6)
+(dnfb  7)
+(dnfb  8)
+(dnfb  9)
+(dnfb  10)
+(dnfb  11)
+(dnfb  12)
+(dnfb  13)
+(dnfb  14)
+(dnfb  15)
+(dnfb  16)
+(dnfb  17)
+(dnfb  18)
+(dnfb  19)
+(dnfb  20)
+(dnfb  21)
+(dnfb  22)
+(dnfb  23)
+(dnfb  24)
+(dnfb  25)
+(dnfb  26)
+(dnfb  27)
+(dnfb  28)
+(dnfb  29)
+(dnfb  30)
+(dnfb  31)
+
+; Branch/Jump target addresses
+
+(df f-8s8a2 "pc-rel addr (8 bits)"    (all-mep-core-isas PCREL-ADDR)  8  7 INT
+    ((value pc) (sra SI (sub SI value    pc) 1))
+    ((value pc) (add SI (sll SI value 1) pc)))
+
+(df f-12s4a2 "pc-rel addr (12 bits)"  (all-mep-core-isas PCREL-ADDR)  4 11 INT
+    ((value pc) (sra SI (sub SI value    pc) 1))
+    ((value pc) (add SI (sll SI value 1) pc)))
+
+(df f-17s16a2 "pc-rel addr (17 bits)" (all-mep-core-isas PCREL-ADDR) 16 16 INT
+    ((value pc) (sra SI (sub SI value    pc) 1))
+    ((value pc) (add SI (sll SI value 1) pc)))
+
+(df f-24s5a2n-hi "24s5a2n hi 16s16" (all-mep-core-isas PCREL-ADDR) 16 16  INT #f #f)
+(df f-24s5a2n-lo "24s5a2n lo 7s5a2" (all-mep-core-isas PCREL-ADDR)  5  7 UINT #f #f)
+(define-multi-ifield
+  (name f-24s5a2n)
+  (comment "pc-rel addr (24 bits align 2)")
+  (attrs all-mep-core-isas PCREL-ADDR)
+  (mode INT)
+  (subfields f-24s5a2n-hi f-24s5a2n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24s5a2n)
+			 (sub (ifield f-24s5a2n) pc))
+		    (set (ifield f-24s5a2n-lo)
+			 (srl (and (ifield f-24s5a2n) #xfe) 1))
+		    (set (ifield f-24s5a2n-hi)
+			 (sra INT (ifield f-24s5a2n) 8))))
+  (extract (set (ifield f-24s5a2n)
+		(add SI (or (sll (ifield f-24s5a2n-hi) 8)
+			    (sll (ifield f-24s5a2n-lo) 1))
+		     pc)))
+  )
+
+(df f-24u5a2n-hi "24u5a2n hi 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-24u5a2n-lo "24u5a2n lo 7u5a2" (all-mep-core-isas)  5  7 UINT #f #f)
+(define-multi-ifield
+  (name f-24u5a2n)
+  (comment "abs jump target (24 bits, alignment 2)")
+  (attrs all-mep-core-isas ABS-ADDR)
+  (mode UINT)
+  (subfields f-24u5a2n-hi f-24u5a2n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u5a2n-lo)
+			 (srl (and (ifield f-24u5a2n) #xff) 1))
+		    (set (ifield f-24u5a2n-hi)
+			 (srl (ifield f-24u5a2n) 8))
+		    ))
+  (extract (set (ifield f-24u5a2n)
+		(or (sll (ifield f-24u5a2n-hi) 8)
+		    (sll (ifield f-24u5a2n-lo) 1))))
+  )
+
+; Displacement fields.
+
+(df f-2u6     "SAR offset (2 bits)"    (all-mep-core-isas)  6  2 UINT #f #f)
+(df f-7u9     "tp-rel b (7 bits)"      (all-mep-core-isas)  9  7 UINT #f #f)
+(df f-7u9a2   "tp-rel h (7 bits)"      (all-mep-core-isas)  9  6 UINT
+    ((value pc) (srl SI value 1))
+    ((value pc) (sll SI value 1)))
+(df f-7u9a4   "tp/sp-rel w (7 bits)"   (all-mep-core-isas)  9  5 UINT
+    ((value pc) (srl SI value 2))
+    ((value pc) (sll SI value 2)))
+(df f-16s16   "general 16-bit s-val"   (all-mep-core-isas) 16 16  INT #f #f)
+
+; Immediate fields.
+
+(df f-2u10   "swi level (2 bits)"      (all-mep-core-isas) 10  2 UINT #f #f)
+(df f-3u5    "bit offset (3 bits)"     (all-mep-core-isas)  5  3 UINT #f #f)
+(df f-4u8    "bCC const (4 bits)"      (all-mep-core-isas)  8  4 UINT #f #f)
+(df f-5u8    "slt & shifts (5 bits)"   (all-mep-core-isas)  8  5 UINT #f #f)
+(df f-5u24   "clip immediate (5 bits)" (all-mep-core-isas) 24  5 UINT #f #f)
+(df f-6s8    "add immediate (6 bits)"  (all-mep-core-isas)  8  6  INT #f #f)
+(df f-8s8    "add imm (8 bits)"        (all-mep-core-isas)  8  8  INT #f #f)
+(df f-16u16  "general 16-bit u-val"    (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-12u16  "cmov fixed 1"            (all-mep-core-isas) 16 12 UINT #f #f)
+(df f-3u29   "cmov fixed 2"            (all-mep-core-isas) 29  3 UINT #f #f)
+
+
+; These are all for the coprocessor opcodes
+
+; The field is like IJKiiiiiii where I and J are toggled if K is set,
+; for compatibility with older cores.
+(define-pmacro (compute-cdisp10 val)
+  (cond SI
+	((and SI (cond SI ((and SI val #x80) (xor SI val #x300)) (else val)) #x200)
+	 (sub (cond SI ((and SI val #x80) (xor SI val #x300)) (else val)) #x400))
+	(else
+	 (cond SI ((and SI val #x80) (xor SI val #x300)) (else val)))
+	)
+  )
+(define-pmacro (extend-cdisp10 val)
+  (cond SI
+	((and SI (compute-cdisp10 val) #x200)
+	 (sub (and SI (compute-cdisp10 val) #x3ff) #x400))
+	(else
+	 (and SI (compute-cdisp10 val) #x3ff))
+	)
+  )
+
+(df f-cdisp10    "cop imm10"          (all-mep-core-isas)   22  10 INT
+    ((value pc) (extend-cdisp10 value))
+    ((value pc) (extend-cdisp10 value))
+    )
+
+; Non-contiguous fields.
+
+(df f-24u8a4n-hi "24u8a4n hi 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-24u8a4n-lo "24u8a4n lo 8u8a4" (all-mep-core-isas)  8  6 UINT #f #f)
+(define-multi-ifield
+  (name f-24u8a4n)
+  (comment "absolute 24-bit address")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-24u8a4n-hi f-24u8a4n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u8a4n-hi) (srl (ifield f-24u8a4n) 8))
+		    (set (ifield f-24u8a4n-lo) (srl (and (ifield f-24u8a4n) #xfc) 2))))
+  (extract (set (ifield f-24u8a4n)
+		(or (sll (ifield f-24u8a4n-hi) 8)
+		    (sll (ifield f-24u8a4n-lo) 2))))
+  )
+
+(df f-24u8n-hi "24u8n hi 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-24u8n-lo "24u8n lo  8u8"  (all-mep-core-isas)  8  8 UINT #f #f)
+(define-multi-ifield
+  (name f-24u8n)
+  (comment "24-bit constant")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-24u8n-hi f-24u8n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u8n-hi) (srl (ifield f-24u8n) 8))
+		    (set (ifield f-24u8n-lo) (and (ifield f-24u8n) #xff))))
+  (extract (set (ifield f-24u8n)
+		(or (sll (ifield f-24u8n-hi) 8)
+		    (ifield f-24u8n-lo))))
+  )
+
+(df f-24u4n-hi "24u4n hi  8u4"  (all-mep-core-isas)  4  8 UINT #f #f)
+(df f-24u4n-lo "24u4n lo 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(define-multi-ifield
+  (name f-24u4n)
+  (comment "coprocessor code")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-24u4n-hi f-24u4n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u4n-hi) (srl (ifield f-24u4n) 16))
+		    (set (ifield f-24u4n-lo) (and (ifield f-24u4n) #xffff))))
+  (extract (set (ifield f-24u4n)
+		(or (sll (ifield f-24u4n-hi) 16)
+		    (ifield f-24u4n-lo))))
+  )
+
+(define-multi-ifield
+  (name f-callnum)
+  (comment "system call number field")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-5 f-6 f-7 f-11)
+  (insert (sequence ()
+		    (set (ifield f-5)  (and (srl (ifield f-callnum) 3) 1))
+		    (set (ifield f-6)  (and (srl (ifield f-callnum) 2) 1))
+		    (set (ifield f-7)  (and (srl (ifield f-callnum) 1) 1))
+		    (set (ifield f-11) (and (ifield f-callnum) 1))))
+  (extract (set (ifield f-callnum)
+		(or (sll (ifield f-5) 3)
+		    (or (sll (ifield f-6) 2)
+			(or (sll (ifield f-7) 1)
+			    (ifield f-11))))))
+  )
+
+(df f-ccrn-hi "ccrn hi  2u28" (all-mep-core-isas) 28 2 UINT #f #f)
+(df f-ccrn-lo "ccrn lo  4u4"  (all-mep-core-isas)  4 4 UINT #f #f)
+(define-multi-ifield
+  (name f-ccrn)
+  (comment "Coprocessor register number field")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-ccrn-hi f-ccrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-ccrn-hi)  (and (srl (ifield f-ccrn) 4) #x3))
+		    (set (ifield f-ccrn-lo)  (and (ifield f-ccrn) #xf))))
+  (extract (set (ifield f-ccrn)
+		(or (sll (ifield f-ccrn-hi) 4)
+		    (ifield f-ccrn-lo))))
+  )
+
+; Operands.
+
+;; Only LABEL, REGNUM, FMAX_FLOAT and FMAX_INT are now relevant for correct
+;; operation.  The others are mostly kept for backwards compatibility,
+;; although they do affect the dummy prototypes in
+;; gcc/config/mep/intrinsics.h.
+(define-attr
+  (type enum)
+  (for operand)
+  (name CDATA)
+  (comment "datatype to use for C intrinsics mapping")
+  (values LABEL REGNUM FMAX_FLOAT FMAX_INT
+	  POINTER LONG ULONG SHORT USHORT CHAR UCHAR CP_DATA_BUS_INT)
+  (default LONG))
+
+(define-attr
+  (type enum)
+  (for insn)
+  (name CPTYPE)
+  (comment "datatype to use for coprocessor values")
+  (values CP_DATA_BUS_INT VECT V2SI V4HI V8QI V2USI V4UHI V8UQI)
+  (default CP_DATA_BUS_INT))
+
+(define-attr
+  (type enum)
+  (for insn)
+  (name CRET)
+  ;; VOID - all arguments are passed as parameters; if any are written, pointers to them are passed.
+  ;; FIRST - the first argument is the return value.
+  ;; FIRSTCOPY - the first argument is the return value, but a copy is also the first parameter.
+  (values VOID FIRST FIRSTCOPY)
+  (default VOID)
+  (comment "Insn's intrinsic returns void, or the first argument rather than (or in addition to) passing it."))
+
+(define-attr
+  (type integer)
+  (for operand)
+  (name ALIGN)
+  (comment "alignment of immediate operands")
+  (default 1))
+
+(define-attr
+  (for operand)
+  (type boolean)
+  (name RELOC_IMPLIES_OVERFLOW)
+  (comment "Operand should not be considered as a candidate for relocs"))
+
+(define-attr
+  (for hardware)
+  (type boolean)
+  (name IS_FLOAT)
+  (comment "Register contains a floating point value"))
+
+(define-pmacro (dpop name commment attrib hwr field func)
+  (define-full-operand name comment attrib
+    hwr DFLT field ((parse func)) () ()))
+(define-pmacro (dprp name commment attrib hwr field pafunc prfunc)
+  (define-full-operand name comment attrib
+    hwr DFLT field ((parse pafunc) (print prfunc)) () ()))
+
+(dnop r0        "register 0"              (all-mep-core-isas) h-gpr   0)
+(dnop rn        "register Rn"             (all-mep-core-isas) h-gpr   f-rn)
+(dnop rm        "register Rm"             (all-mep-core-isas) h-gpr   f-rm)
+(dnop rl        "register Rl"             (all-mep-core-isas) h-gpr   f-rl)
+(dnop rn3       "register 0-7"            (all-mep-core-isas) h-gpr   f-rn3)
+
+;; Variants of RM/RN with different CDATA attributes.  See comment above
+;; CDATA for more details.
+
+(dnop rma       "register Rm holding pointer"          (all-mep-core-isas (CDATA POINTER)) h-gpr   f-rm)
+
+(dnop rnc       "register Rn holding char"             (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnuc      "register Rn holding unsigned char"    (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rns       "register Rn holding short"            (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnus      "register Rn holding unsigned short"   (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnl       "register Rn holding long"             (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnul      "register Rn holding unsigned  long"   (all-mep-core-isas (CDATA ULONG))   h-gpr   f-rn)
+
+(dnop rn3c       "register 0-7 holding unsigned char"    (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3uc      "register 0-7 holding byte"             (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3s       "register 0-7 holding unsigned short"   (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3us      "register 0-7 holding short"            (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3l       "register 0-7 holding unsigned long"    (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3ul      "register 0-7 holding long"             (all-mep-core-isas (CDATA ULONG)) h-gpr   f-rn3)
+
+
+(dnop lp        "link pointer"            (all-mep-core-isas) h-csr   1)
+(dnop sar       "shift amount register"   (all-mep-core-isas) h-csr   2)
+(dnop hi        "high result"             (all-mep-core-isas) h-csr   7)
+(dnop lo        "low result"              (all-mep-core-isas) h-csr   8)
+(dnop mb0       "modulo begin register 0" (all-mep-core-isas) h-csr  12)
+(dnop me0       "modulo end register 0"   (all-mep-core-isas) h-csr  13)
+(dnop mb1       "modulo begin register 1" (all-mep-core-isas) h-csr  14)
+(dnop me1       "modulo end register 1"   (all-mep-core-isas) h-csr  15)
+(dnop psw       "program status word"     (all-mep-core-isas) h-csr  16)
+(dnop epc	"exception prog counter"  (all-mep-core-isas) h-csr  19)
+(dnop exc       "exception cause"         (all-mep-core-isas) h-csr  20)
+(dnop npc       "nmi program counter"     (all-mep-core-isas) h-csr  23)
+(dnop dbg       "debug register"          (all-mep-core-isas) h-csr  24)
+(dnop depc      "debug exception pc"      (all-mep-core-isas) h-csr  25)
+(dnop opt       "option register"         (all-mep-core-isas) h-csr  26)
+(dnop r1        "register 1"              (all-mep-core-isas) h-gpr   1)
+(dnop tp        "tiny data area pointer"  (all-mep-core-isas) h-gpr  13)
+(dnop sp        "stack pointer"           (all-mep-core-isas) h-gpr  15)
+(dprp tpr       "TP register"             (all-mep-core-isas) h-gpr  13       "tpreg" "tpreg")
+(dprp spr       "SP register"             (all-mep-core-isas) h-gpr  15       "spreg" "spreg")
+
+(define-full-operand
+  csrn "control/special register" (all-mep-core-isas (CDATA REGNUM)) h-csr
+  DFLT f-csrn ((parse "csrn")) () ()
+)
+
+(dnop csrn-idx  "control/special reg idx" (all-mep-core-isas) h-uint  f-csrn)
+(dnop crn64     "copro Rn (64-bit)"       (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr64  f-crn)
+(dnop crn       "copro Rn (32-bit)"       (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr    f-crn)
+(dnop crnx64    "copro Rn (0-31, 64-bit)" (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr64  f-crnx)
+(dnop crnx      "copro Rn (0-31, 32-bit)" (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr    f-crnx)
+(dnop ccrn      "copro control reg CCRn"  (all-mep-core-isas (CDATA REGNUM)) h-ccr   f-ccrn)
+(dnop cccc      "copro flags"             (all-mep-core-isas) h-uint  f-rm)
+
+(dprp pcrel8a2  "pc-rel addr (8 bits)"    (all-mep-core-isas (CDATA LABEL) RELAX) h-sint  f-8s8a2   "mep_align" "address")
+(dprp pcrel12a2 "pc-rel addr (12 bits)"   (all-mep-core-isas (CDATA LABEL) RELAX) h-sint  f-12s4a2  "mep_align" "address")
+(dprp pcrel17a2 "pc-rel addr (17 bits)"   (all-mep-core-isas (CDATA LABEL) RELAX) h-sint  f-17s16a2 "mep_align" "address")
+(dprp pcrel24a2 "pc-rel addr (24 bits)"   (all-mep-core-isas (CDATA LABEL))       h-sint  f-24s5a2n "mep_align" "address")
+(dprp pcabs24a2 "pc-abs addr (24 bits)"   (all-mep-core-isas (CDATA LABEL))       h-uint  f-24u5a2n "mep_alignu" "address")
+
+(dpop sdisp16   "displacement (16 bits)"  (all-mep-core-isas) h-sint  f-16s16    "signed16")
+(dpop simm16    "signed imm (16 bits)"    (all-mep-core-isas) h-sint  f-16s16    "signed16")
+(dpop uimm16    "unsigned imm (16 bits)"  (all-mep-core-isas) h-uint  f-16u16    "unsigned16")
+(dnop code16    "uci/dsp code (16 bits)"  (all-mep-core-isas) h-uint  f-16u16)
+
+(dnop udisp2    "SSARB addend (2 bits)"   (all-mep-core-isas) h-sint  f-2u6)
+(dnop uimm2     "interrupt (2 bits)"      (all-mep-core-isas) h-uint  f-2u10)
+
+(dnop simm6     "add const (6 bits)"      (all-mep-core-isas) h-sint  f-6s8)
+(dnop simm8     "mov const (8 bits)"      (all-mep-core-isas RELOC_IMPLIES_OVERFLOW) 
+                                             h-sint  f-8s8)
+
+(dpop addr24a4  "sw/lw addr (24 bits)"    (all-mep-core-isas (ALIGN 4)) h-uint  f-24u8a4n  "mep_alignu")
+(dnop code24    "coprocessor code"        (all-mep-core-isas) h-uint  f-24u4n)
+
+(dnop callnum   "system call number"      (all-mep-core-isas) h-uint  f-callnum)
+(dnop uimm3     "bit immediate (3 bits)"  (all-mep-core-isas) h-uint  f-3u5)
+(dnop uimm4     "bCC const (4 bits)"      (all-mep-core-isas) h-uint  f-4u8)
+(dnop uimm5     "bit/shift val (5 bits)"  (all-mep-core-isas) h-uint  f-5u8)
+
+(dpop udisp7    "tp-rel b (7 bits)"       (all-mep-core-isas)           h-uint  f-7u9      "unsigned7")
+(dpop udisp7a2  "tp-rel h (7 bits)"       (all-mep-core-isas (ALIGN 2)) h-uint  f-7u9a2    "unsigned7")
+(dpop udisp7a4  "tp/sp-rel w (7 bits)"    (all-mep-core-isas (ALIGN 4)) h-uint  f-7u9a4    "unsigned7")
+(dpop uimm7a4   "sp w-addend (7 bits)"    (all-mep-core-isas (ALIGN 4)) h-uint  f-7u9a4    "mep_alignu")
+
+(dnop uimm24    "immediate (24 bits)"     (all-mep-core-isas) h-uint  f-24u8n)
+
+(dnop cimm4     "cache immed'te (4 bits)" (all-mep-core-isas) h-uint  f-rn)
+(dnop cimm5     "clip immediate (5 bits)" (all-mep-core-isas) h-uint  f-5u24)
+
+(dpop cdisp10   "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+(dpop cdisp10a2 "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+(dpop cdisp10a4 "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+(dpop cdisp10a8 "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+
+; Special operand representing the various ways that the literal zero can be
+; specified.
+(define-full-operand
+  zero "Zero operand" (all-mep-core-isas) h-sint DFLT f-nil
+  ((parse "zero")) () ()
+)
+
+; Attributes.
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_BIT_INSN)
+  (comment "optional bit manipulation instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_MUL_INSN)
+  (comment "optional 32-bit multiply instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_DIV_INSN)
+  (comment "optional 32-bit divide instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_DEBUG_INSN)
+  (comment "optional debug instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_LDZ_INSN)
+  (comment "optional leading zeroes instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_ABS_INSN)
+  (comment "optional absolute difference instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_AVE_INSN)
+  (comment "optional average instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_MINMAX_INSN)
+  (comment "optional min/max instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_CLIP_INSN)
+  (comment "optional clipping instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_SAT_INSN)
+  (comment "optional saturation instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_UCI_INSN)
+  (comment "optional UCI instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_DSP_INSN)
+  (comment "optional DSP instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_CP_INSN)
+  (comment "optional coprocessor-related instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_CP64_INSN)
+  (comment "optional coprocessor-related 64 data bit instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_VLIW64)
+  (comment "optional vliw64 mode (vliw32 is default)"))
+
+(define-attr
+  (for insn)
+  (type enum)
+  (name STALL)
+  (attrs META)
+  (values NONE SHIFTI INT2 LOAD STORE LDC STC LDCB STCB SSARB FSFT RET
+	  ADVCK MUL MULR DIV)
+  (default NONE)
+  (comment "gcc stall attribute"))
+
+(define-attr
+  (for insn)
+  (type string)
+  (name INTRINSIC)
+  (attrs META)
+  (comment "gcc intrinsic name"))
+
+(define-attr
+  (for insn)
+  (type enum)
+  (name SLOT)
+  (attrs META)
+  (values NONE C3 V1 V3 P0S P0 P1)
+  (default NONE)
+  (comment "coprocessor slot type"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name MAY_TRAP)
+  (comment "instruction may generate an exception"))
+
+; Attributes for scheduling restrictions in vliw mode
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW_ALONE)
+  (comment "instruction can be scheduled alone in vliw mode"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW_NO_CORE_NOP)
+  (comment "there is no corresponding nop core instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW_NO_COP_NOP)
+  (comment "there is no corresponding nop coprocessor instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW64_NO_MATCHING_NOP)
+  (comment "there is no corresponding nop coprocessor instruction"))
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW32_NO_MATCHING_NOP)
+  (comment "there is no corresponding nop coprocessor instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VOLATILE)
+  (comment "Insn is volatile."))
+
+(define-attr
+  (for insn)
+  (type integer)
+  (name LATENCY)
+  (comment "The latency of this insn, used for scheduling as an intrinsic in gcc")
+  (default 0))
+
+; The MeP config tool will edit this.
+(define-attr
+  (type enum)
+  (for insn)
+  (name CONFIG)
+  (values NONE ; config-attr-start
+	default
+	  ) ; config-attr-end
+)
+
+
+; Enumerations.
+
+(define-normal-insn-enum major "major opcodes" (all-mep-core-isas) MAJ_
+  f-major
+  (.map .str (.iota 16))
+)
+
+
+(define-pmacro (dni-isa xname xcomment xattrs xsyntax xformat xsemantics xtiming isa)
+  (define-insn          
+    (name xname)        
+    (comment xcomment)
+    (.splice attrs (.unsplice xattrs) (ISA isa))
+    (syntax xsyntax)
+    (format xformat)
+    (semantics xsemantics)
+    (.splice timing (.unsplice xtiming))
+    )
+) 
+  
+(define-pmacro (dnmi-isa xname xcomment xattrs xsyntax xemit isa)
+  (dnmi xname xcomment (.splice (.unsplice xattrs) (ISA isa)) xsyntax xemit)
+)
+
+; For making profiling calls and dynamic configuration
+(define-pmacro (cg-profile caller callee)
+  (c-call "cg_profile" caller callee)
+)
+; For dynamic configuration only
+(define-pmacro (cg-profile-jump caller callee)
+  (c-call "cg_profile_jump" caller callee)
+)
+
+; For defining Core Instructions
+(define-pmacro (dnci xname xcomment xattrs xsyntax xformat xsemantics xtiming)
+  (dni-isa xname xcomment xattrs xsyntax xformat xsemantics xtiming all-core-isa-list)
+)
+(define-pmacro (dncmi xname xcomment xattrs xsyntax xemit)
+  (dnmi-isa xname xcomment xattrs xsyntax xemit all-core-isa-list)
+)
+
+; For defining Coprocessor Instructions
+;(define-pmacro (dncpi xname xcomment xattrs xsyntax xformat xsemantics xtiming)  (dni-isa xname xcomment xattrs xsyntax xformat xsemantics xtiming cop)
+;)
+
+;; flag setting macro
+(define-pmacro (set-bit xop xbitnum xval) 
+  (set xop (or 
+	    (and xop (inv (sll 1 xbitnum)))
+	    (and (sll 1 xbitnum) (sll xval xbitnum)))))
+
+;; some flags we commonly use in vliw reasoning / mode-switching etc.
+(define-pmacro (get-opt.vliw64) (and (srl opt 6) 1))
+(define-pmacro (get-opt.vliw32) (and (srl opt 5) 1))
+(define-pmacro (get-rm.lsb) (and rm 1))
+(define-pmacro (get-psw.om) (and (srl psw 12) 1))
+(define-pmacro (get-psw.nmi) (and (srl psw 9) 1))
+(define-pmacro (get-psw.iep) (and (srl psw 1) 1))
+(define-pmacro (get-psw.ump) (and (srl psw 3) 1))
+(define-pmacro (get-epc.etom) (and epc 1))
+(define-pmacro (get-npc.ntom) (and npc 1))
+(define-pmacro (get-lp.ltom) (and lp 1))
+
+(define-pmacro (set-psw.om zval) (set-bit (raw-reg h-csr 16) 12 zval))
+(define-pmacro (set-psw.nmi zval) (set-bit (raw-reg h-csr 16) 9 zval))
+(define-pmacro (set-psw.umc zval) (set-bit (raw-reg h-csr 16) 2 zval))
+(define-pmacro (set-psw.iec zval) (set-bit (raw-reg h-csr 16) 0 zval))
+(define-pmacro (set-rpe.elr zval) (set-bit (raw-reg h-csr 5) 0 zval))
+
+
+;; the "3 way switch" depending on our current operating mode and vliw status flags
+(define-pmacro (core-vliw-switch core-rtl vliw32-rtl vliw64-rtl) 
+  (cond
+   ((andif (get-psw.om) (get-opt.vliw64)) vliw64-rtl)
+   ((andif (get-psw.om) (get-opt.vliw32)) vliw32-rtl)
+   (else core-rtl)))
+
+;; the varying-pcrel idiom
+(define-pmacro (set-vliw-modified-pcrel-offset xtarg xa xb xc)
+  (core-vliw-switch (set xtarg (add pc xa))
+		    (set xtarg (add pc xb))
+		    (set xtarg (add pc xc))))
+
+;; the increasing-alignment idiom in branch displacements
+(define-pmacro (set-vliw-alignment-modified xtarg zaddr)
+  (core-vliw-switch (set xtarg (and zaddr (inv 1)))
+		    (set xtarg (and zaddr (inv 3)))
+		    (set xtarg (and zaddr (inv 7)))))
+
+;; the increasing-alignment idiom in option-only form
+(define-pmacro (set-vliw-aliignment-modified-by-option xtarg zaddr)
+  (if (get-opt.vliw32)
+      (set xtarg (and zaddr (inv 3)))
+      (set xtarg (and zaddr (inv 7)))))
+
+
+
+; pmacros needed for coprocessor modulo addressing.
+
+; Taken from supplement ``The operation of the modulo addressing'' in
+; Toshiba documentation rev 2.2, p. 34.
+
+(define-pmacro (compute-mask0)
+  (sequence SI ((SI temp))
+    (set temp (or mb0 me0))
+    (srl (const SI -1) (c-call SI "do_ldz" temp))))
+
+(define-pmacro (mod0 immed)
+  (sequence SI ((SI modulo-mask))
+	    (set modulo-mask (compute-mask0))
+	    (if SI (eq (and rma modulo-mask) me0)
+		(or (and rma (inv modulo-mask)) mb0)
+		(add rma (ext SI immed)))))
+
+(define-pmacro (compute-mask1)
+  (sequence SI ((SI temp))
+    (set temp (or mb1 me1))
+    (srl (const SI -1) (c-call SI "do_ldz" temp))))
+
+(define-pmacro (mod1 immed)
+  (sequence SI ((SI modulo-mask))
+	    (set modulo-mask (compute-mask1))
+	    (if SI (eq (and rma modulo-mask) me1)
+		(or (and rma (inv modulo-mask)) mb1)
+		(add rma (ext SI immed)))))
+
+
+; Instructions.
+
+; A pmacro for use in semantic bodies of unimplemented insns.
+(define-pmacro (unimp mnemonic) (nop))
+
+; Core specific instructions
+; (include "mep-h1.cpu") ; -- exposed by MeP-Integrator
+(include "mep-c5.cpu") ; -- exposed by MeP-Integrator
+
+; Load/store instructions.
+
+(dnci sb "store byte (register indirect)" ((STALL STORE))
+     "sb $rnc,($rma)"
+     (+ MAJ_0 rnc rma (f-sub4 8))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem UQI rma) (and rnc #xff)))
+     ((mep (unit u-use-gpr (in usereg rnc))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sh "store half-word (register indirect)" ((STALL STORE))
+     "sh $rns,($rma)"
+     (+ MAJ_0 rns rma (f-sub4 9))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and rma (inv 1)))
+	       (set (mem UHI (and rma (inv 1))) (and rns #xffff)))
+     ((mep (unit u-use-gpr (in usereg rns))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sw "store word (register indirect)" ((STALL STORE))
+     "sw $rnl,($rma)"
+     (+ MAJ_0 rnl rma (f-sub4 10))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and rma (inv 3)))
+	       (set (mem USI (and rma (inv 3))) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lb "load byte (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lb $rnc,($rma)"
+     (+ MAJ_0 rnc rma (f-sub4 12))
+     (set rnc (ext SI (mem QI rma)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnc)))))
+
+(dnci lh "load half-word (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lh $rns,($rma)"
+     (+ MAJ_0 rns rma (f-sub4 13))
+     (set rns (ext SI (mem HI (and rma (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rns)))))
+
+(dnci lw "load word (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,($rma)"
+     (+ MAJ_0 rnl rma (f-sub4 14))
+     (set rnl (mem SI (and rma (inv 3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+(dnci lbu "load unsigned byte (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lbu $rnuc,($rma)"
+     (+ MAJ_0 rnuc rma (f-sub4 11))
+     (set rnuc (zext SI (mem UQI rma)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnuc)))))
+
+(dnci lhu "load unsigned half-word (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lhu $rnus,($rma)"
+     (+ MAJ_0 rnus rma (f-sub4 15))
+     (set rnus (zext SI (mem UHI (and rma (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnus)))))
+
+(dnci sw-sp "store word (sp relative)" ((STALL STORE))
+     "sw $rnl,$udisp7a4($spr)"
+     (+ MAJ_4 rnl (f-8 0) udisp7a4 (f-sub2 2))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add udisp7a4 sp) (inv 3)))
+	       (set (mem SI (and (add udisp7a4 sp) (inv 3))) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-use-gpr (in usereg sp))
+	   (unit u-exec))))
+
+
+(dnci lw-sp "load word (sp relative)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,$udisp7a4($spr)"
+     (+ MAJ_4 rnl (f-8 0) udisp7a4 (f-sub2 3))
+     (set rnl (mem SI (and (add udisp7a4 sp) (inv 3))))
+     ((mep (unit u-use-gpr (in usereg sp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+(dnci sb-tp "store byte (tp relative)" ((STALL STORE))
+     "sb $rn3c,$udisp7($tpr)"
+     (+ MAJ_8 (f-4 0) rn3c (f-8 0) udisp7)
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (add (zext SI udisp7) tp))
+	       (set (mem QI (add (zext SI udisp7) tp)) (and rn3c #xff)))
+     ((mep (unit u-use-gpr (in usereg rn3c))
+	   (unit u-use-gpr (in usereg tp))
+	   (unit u-exec))))
+
+(dnci sh-tp "store half-word (tp relative)" ((STALL STORE))
+     "sh $rn3s,$udisp7a2($tpr)"
+     (+ MAJ_8 (f-4 0) rn3s (f-8 1) udisp7a2 (f-15 0))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add (zext SI udisp7a2) tp) (inv 1)))
+	       (set (mem HI (and (add (zext SI udisp7a2) tp) (inv 1))) (and rn3s #xffff)))
+     ((mep (unit u-use-gpr (in usereg rn3s))
+	   (unit u-use-gpr (in usereg tp))
+	   (unit u-exec))))
+
+(dnci sw-tp "store word (tp relative)" ((STALL STORE))
+     "sw $rn3l,$udisp7a4($tpr)"
+     (+ MAJ_4 (f-4 0) rn3l (f-8 1) udisp7a4 (f-sub2 2))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add (zext SI udisp7a4) tp) (inv 3)))
+	       (set (mem SI (and (add (zext SI udisp7a4) tp) (inv 3))) rn3l))
+     ((mep (unit u-use-gpr (in usereg rn3l))
+	   (unit u-use-gpr (in usereg tp))
+	   (unit u-exec))))
+
+(dnci lb-tp "load byte (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lb $rn3c,$udisp7($tpr)"
+     (+ MAJ_8 (f-4 1) rn3c (f-8 0) udisp7)
+     (set rn3c (ext SI (mem QI (add (zext SI udisp7) tp))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3c)))))
+
+(dnci lh-tp "load half-word (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lh $rn3s,$udisp7a2($tpr)"
+     (+ MAJ_8 (f-4 1) rn3s (f-8 1) udisp7a2 (f-15 0))
+     (set rn3s (ext SI (mem HI (and (add (zext SI udisp7a2) tp) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3s)))))
+
+(dnci lw-tp "load word (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lw $rn3l,$udisp7a4($tpr)"
+     (+ MAJ_4 (f-4 0) rn3l (f-8 1) udisp7a4 (f-sub2 3))
+     (set rn3l (mem SI (and (add (zext SI udisp7a4) tp) (inv 3))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3l)))))
+
+(dnci lbu-tp "load unsigned byte (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lbu $rn3uc,$udisp7($tpr)"
+     (+ MAJ_4 (f-4 1) rn3uc (f-8 1) udisp7)
+     (set rn3uc (zext SI (mem QI (add (zext SI udisp7) tp))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3uc)))))
+
+(dnci lhu-tp "load unsigned half-word (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lhu $rn3us,$udisp7a2($tpr)"
+     (+ MAJ_8 (f-4 1) rn3us (f-8 1) udisp7a2 (f-15 1))
+     (set rn3us (zext SI (mem HI (and (add (zext SI udisp7a2) tp) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3us)))))
+
+(dnci sb16 "store byte (16 bit displacement)" ((STALL STORE))
+     "sb $rnc,$sdisp16($rma)"
+     (+ MAJ_12 rnc rma (f-sub4 8) sdisp16)
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (add rma (ext SI sdisp16)))
+	       (set (mem QI (add rma (ext SI sdisp16))) (and rnc #xff)))
+     ((mep (unit u-use-gpr (in usereg rnc))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sh16 "store half-word (16 bit displacement)" ((STALL STORE))
+     "sh $rns,$sdisp16($rma)"
+     (+ MAJ_12 rns rma (f-sub4 9) sdisp16)
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add rma (ext SI sdisp16)) (inv 1)))
+	       (set (mem HI (and (add rma (ext SI sdisp16)) (inv 1))) (and rns #xffff)))
+     ((mep (unit u-use-gpr (in usereg rns))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sw16 "store word (16 bit displacement)" ((STALL STORE))
+     "sw $rnl,$sdisp16($rma)"
+     (+ MAJ_12 rnl rma (f-sub4 10) sdisp16)
+     (sequence ()
+	       (c-call "check_write_to_text" (and (add rma (ext SI sdisp16)) (inv 3)))
+	       (set (mem SI (and (add rma (ext SI sdisp16)) (inv 3))) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lb16 "load byte (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lb $rnc,$sdisp16($rma)"
+     (+ MAJ_12 rnc rma (f-sub4 12) sdisp16)
+     (set rnc (ext SI (mem QI (add rma (ext SI sdisp16)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnc)))))
+
+(dnci lh16 "load half-word (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lh $rns,$sdisp16($rma)"
+     (+ MAJ_12 rns rma (f-sub4 13) sdisp16)
+     (set rns (ext SI (mem HI (and (add rma (ext SI sdisp16)) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rns)))))
+
+(dnci lw16 "load word (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,$sdisp16($rma)"
+     (+ MAJ_12 rnl rma (f-sub4 14) sdisp16)
+     (set rnl (mem SI (and (add rma (ext SI sdisp16)) (inv 3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+(dnci lbu16 "load unsigned byte (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lbu $rnuc,$sdisp16($rma)"
+     (+ MAJ_12 rnuc rma (f-sub4 11) sdisp16)
+     (set rnuc (zext SI (mem QI (add rma (ext SI sdisp16)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnuc)))))
+
+(dnci lhu16 "load unsigned half-word (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lhu $rnus,$sdisp16($rma)"
+     (+ MAJ_12 rnus rma (f-sub4 15) sdisp16)
+     (set rnus (zext SI (mem HI (and (add rma (ext SI sdisp16)) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnus)))))
+
+(dnci sw24 "store word (24 bit absolute addressing)" ((STALL STORE))
+     "sw $rnl,($addr24a4)"
+     (+ MAJ_14 rnl addr24a4 (f-sub2 2))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (zext SI addr24a4))
+	       (set (mem SI (zext SI addr24a4)) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-exec))))
+
+(dnci lw24 "load word (24 bit absolute addressing)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,($addr24a4)"
+     (+ MAJ_14 rnl addr24a4 (f-sub2 3))
+     (set rnl (mem SI (zext SI addr24a4)))
+     ((mep (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+
+; Extension instructions.
+
+(dnci extb "sign extend byte" ()
+     "extb $rn"
+     (+ MAJ_1 rn (f-rm 0) (f-sub4 13))
+     (set rn (ext SI (and QI rn #xff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci exth "sign extend half-word" ()
+     "exth $rn"
+     (+ MAJ_1 rn (f-rm 2) (f-sub4 13))
+     (set rn (ext SI (and HI rn #xffff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci extub "zero extend byte" ()
+     "extub $rn"
+     (+ MAJ_1 rn (f-rm 8) (f-sub4 13))
+     (set rn (zext SI (and rn #xff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci extuh "zero extend half-word" ()
+     "extuh $rn"
+     (+ MAJ_1 rn (f-rm 10) (f-sub4 13))
+     (set rn (zext SI (and rn #xffff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Shift amount manipulation instructions.
+
+(dnci ssarb "set sar to bytes" ((STALL SSARB) VOLATILE)
+     "ssarb $udisp2($rm)"
+     (+ MAJ_1 (f-4 0) (f-5 0) udisp2 rm (f-sub4 12))
+     (if (c-call BI "big_endian_p")
+         (set sar (zext SI (mul (and (add udisp2 rm) 3) 8)))
+         (set sar (sub 32 (zext SI (mul (and (add udisp2 rm) 3) 8)))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Move instructions.
+
+(dnci mov "move" ()
+     "mov $rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 0))
+     (set rn rm)
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci movi8 "move 8-bit immediate" ()
+     "mov $rn,$simm8"
+     (+ MAJ_5 rn simm8)
+     (set rn (ext SI simm8))
+     ())
+
+(dnci movi16 "move 16-bit immediate" ()
+     "mov $rn,$simm16"
+     (+ MAJ_12 rn (f-rm 0) (f-sub4 1) simm16)
+     (set rn (ext SI simm16))
+     ())
+
+(dnci movu24 "move 24-bit unsigned immediate" ()
+     "movu $rn3,$uimm24"
+     (+ MAJ_13 (f-4 0) rn3 uimm24)
+     (set rn3 (zext SI uimm24))
+     ())
+
+(dnci movu16 "move 16-bit unsigned immediate" ()
+     "movu $rn,$uimm16"
+     (+ MAJ_12 rn (f-rm 1) (f-sub4 1) uimm16)
+     (set rn (zext SI uimm16))
+     ())
+
+(dnci movh "move high 16-bit immediate" ()
+     "movh $rn,$uimm16"
+     (+ MAJ_12 rn (f-rm 2) (f-sub4 1) uimm16)
+     (set rn (sll uimm16 16))
+     ())
+
+
+; Arithmetic instructions.
+
+(dnci add3 "add three registers" ()
+     "add3 $rl,$rn,$rm"
+     (+ MAJ_9 rn rm rl)
+     (set rl (add rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci add "add" ()
+     "add $rn,$simm6"
+     (+ MAJ_6 rn simm6 (f-sub2 0))
+     (set rn (add rn (ext SI simm6)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci add3i "add two registers and immediate" ()
+     "add3 $rn,$spr,$uimm7a4"
+     (+ MAJ_4 rn (f-8 0) uimm7a4 (f-sub2 0))
+     (set rn (add sp (zext SI uimm7a4)))
+     ((mep (unit u-use-gpr (in usereg sp))
+	   (unit u-exec))))
+
+(dnci advck3 "add overflow check" ((STALL ADVCK))
+     "advck3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 7))
+     (if (add-oflag rn rm 0)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sub "subtract" ()
+     "sub $rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 4))
+     (set rn (sub rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm)))))
+
+(dnci sbvck3 "subtraction overflow check" ((STALL ADVCK))
+     "sbvck3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 5))
+     (if (sub-oflag rn rm 0)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci neg "negate" ()
+     "neg $rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 1))
+     (set rn (neg rm))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci slt3 "set if less than" ()
+     "slt3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 2))
+     (if (lt rn rm)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sltu3 "set less than unsigned" ()
+     "sltu3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 3))
+     (if (ltu rn rm)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci slt3i "set if less than immediate" ()
+     "slt3 \\$0,$rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 1))
+     (if (lt rn (zext SI uimm5))
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci sltu3i "set if less than unsigned immediate" ()
+     "sltu3 \\$0,$rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 5))
+     (if (ltu rn (zext SI uimm5))
+	 (set r0 1)
+	 (set r0 0))
+     ())
+
+(dnci sl1ad3 "shift left one and add" ((STALL INT2))
+     "sl1ad3 \\$0,$rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 6))
+     (set r0 (add (sll rn 1) rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sl2ad3 "shift left two and add" ((STALL INT2))
+     "sl2ad3 \\$0,$rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 7))
+     (set r0 (add (sll rn 2) rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci add3x "three operand add (extended)" ()
+     "add3 $rn,$rm,$simm16"
+     (+ MAJ_12 rn rm (f-sub4 0) simm16)
+     (set rn (add rm (ext SI simm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci slt3x "set if less than (extended)" ()
+     "slt3 $rn,$rm,$simm16"
+     (+ MAJ_12 rn rm (f-sub4 2) simm16)
+     (if (lt rm (ext SI simm16))
+	 (set rn 1)
+	 (set rn 0))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sltu3x "set if less than unsigned (extended)" ()
+     "sltu3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 3) uimm16)
+     (if (ltu rm (zext SI uimm16))
+	 (set rn 1)
+	 (set rn 0))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Logical instructions.
+
+(dnci or "bitwise or" ()
+     "or $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 0))
+     (set rn (or rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci and "bitwise and" ()
+     "and $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 1))
+     (set rn (and rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci xor "bitwise exclusive or" ()
+     "xor $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 2))
+     (set rn (xor rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci nor "bitwise negated or" ()
+     "nor $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 3))
+     (set rn (inv (or rn rm)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci or3 "or three operand" ()
+     "or3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 4) uimm16)
+     (set rn (or rm (zext SI uimm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci and3 "and three operand" ()
+     "and3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 5) uimm16)
+     (set rn (and rm (zext SI uimm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci xor3 "exclusive or three operand" ()
+     "xor3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 6) uimm16)
+     (set rn (xor rm (zext SI uimm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Shift instructions.
+
+(dnci sra "shift right arithmetic" ((STALL INT2))
+     "sra $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 13))
+     (set rn (sra rn (and rm #x1f)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci srl "shift right logical" ((STALL INT2))
+     "srl $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 12))
+     (set rn (srl rn (and rm #x1f)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sll "shift left logical" ((STALL INT2))
+     "sll $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 14))
+     (set rn (sll rn (and rm #x1f)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci srai "shift right arithmetic (immediate)" ((STALL SHIFTI))
+     "sra $rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 3))
+     (set rn (sra rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci srli "shift right logical (immediate)" ((STALL SHIFTI))
+     "srl $rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 2))
+     (set rn (srl rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci slli "shift left logical (immediate)" ((STALL SHIFTI))
+     "sll $rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 6))
+     (set rn (sll rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci sll3 "three-register shift left logical" ((STALL INT2))
+     "sll3 \\$0,$rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 7))
+     (set r0 (sll rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci fsft "field shift" ((STALL FSFT) VOLATILE)
+     "fsft $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 15))
+     (sequence ((DI temp) (QI shamt))
+	       (set shamt (and sar #x3f))
+	       (set temp (sll (or (sll (zext DI rn) 32) (zext DI rm)) shamt))
+	       (set rn (subword SI (srl temp 32) 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Branch/jump instructions.
+
+(dnci bra "branch" (RELAXABLE)
+     "bra $pcrel12a2"
+     (+ MAJ_11 pcrel12a2 (f-15 0))
+     (set-vliw-alignment-modified pc pcrel12a2)
+     ((mep (unit u-branch)
+	   (unit u-exec))))
+
+(dnci beqz "branch if equal zero" (RELAXABLE)
+     "beqz $rn,$pcrel8a2"
+     (+ MAJ_10 rn pcrel8a2 (f-15 0))
+     (if (eq rn 0)
+	 (set-vliw-alignment-modified pc pcrel8a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bnez "branch if not equal zero" (RELAXABLE)
+     "bnez $rn,$pcrel8a2"
+     (+ MAJ_10 rn pcrel8a2 (f-15 1))
+     (if (ne rn 0)
+	 (set-vliw-alignment-modified pc pcrel8a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci beqi "branch equal immediate" (RELAXABLE)
+     "beqi $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 0) pcrel17a2)
+     (if (eq rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bnei "branch not equal immediate" (RELAXABLE)
+     "bnei $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 4) pcrel17a2)
+     (if (ne rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci blti "branch less than immediate" (RELAXABLE)
+     "blti $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 12) pcrel17a2)
+     (if (lt rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bgei "branch greater than immediate" (RELAXABLE)
+     "bgei $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 8) pcrel17a2)
+     (if (ge rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci beq "branch equal" ()
+     "beq $rn,$rm,$pcrel17a2"
+     (+ MAJ_14 rn rm (f-sub4 1) pcrel17a2)
+     (if (eq rn rm)
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bne "branch not equal" ()
+     "bne $rn,$rm,$pcrel17a2"
+     (+ MAJ_14 rn rm (f-sub4 5) pcrel17a2)
+     (if (ne rn rm)
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bsr12 "branch to subroutine (12 bit displacement)" (RELAXABLE)
+     "bsr $pcrel12a2"
+     (+ MAJ_11 pcrel12a2 (f-15 1))
+     (sequence ()
+	       (cg-profile pc pcrel12a2)
+	       (set-vliw-modified-pcrel-offset lp 2 4 8)
+	       (set-vliw-alignment-modified pc pcrel12a2))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bsr24 "branch to subroutine (24 bit displacement)" ()
+     "bsr $pcrel24a2"
+     (+ MAJ_13 (f-4 1) (f-sub4 9) pcrel24a2)
+     (sequence ()
+	       (cg-profile pc pcrel24a2)
+	       (set-vliw-modified-pcrel-offset lp 4 4 8)
+	       (set-vliw-alignment-modified pc pcrel24a2))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci jmp "jump" ()
+     "jmp $rm"
+     (+ MAJ_1 (f-rn 0) rm (f-sub4 14))
+     (sequence ()
+	       (if (eq (get-psw.om) 0)
+		   ;; core mode
+		   (if (get-rm.lsb)
+		       (sequence ()
+				 (set-psw.om 1) ;; enter VLIW mode
+				 (set-vliw-aliignment-modified-by-option pc rm))
+		       (set pc (and rm (inv 1))))
+		   ;; VLIW mode
+		   (if (get-rm.lsb)
+		       (sequence ()
+				 (set-psw.om 0) ;; enter core mode
+				 (set pc (and rm (inv 1))))
+		       (set-vliw-aliignment-modified-by-option pc rm)))
+	       (cg-profile-jump pc rm))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci jmp24 "jump (24 bit target)" ()
+     "jmp $pcabs24a2"
+     (+ MAJ_13 (f-4 1) (f-sub4 8) pcabs24a2)
+     (sequence ()
+	       (set-vliw-alignment-modified pc (or (and pc #xf0000000) pcabs24a2))
+	       (cg-profile-jump pc pcabs24a2))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci jsr "jump to subroutine" ()
+     "jsr $rm"
+     (+ MAJ_1 (f-rn 0) rm (f-sub4 15))
+     (sequence ()
+	       (cg-profile pc rm)
+	       (set-vliw-modified-pcrel-offset lp 2 4 8)
+	       (set-vliw-alignment-modified pc rm))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci ret "return from subroutine" ((STALL RET))
+     "ret"
+     (+ MAJ_7 (f-rn 0) (f-rm 0) (f-sub4 2))
+     (sequence ()
+	       (if (eq (get-psw.om) 0)
+		   ;; core mode
+		   (if (get-lp.ltom) ;; link-pointer "toggle mode" bit
+		       (sequence ()
+				 (set-psw.om 1) ;; enter VLIW mode
+				 (set-vliw-aliignment-modified-by-option pc lp))
+		       (set pc (and lp (inv 1))))
+		   ;; VLIW mode
+		   (if (get-lp.ltom) ;; link-pointer "toggle mode" bit
+		       (sequence ()
+				 (set-psw.om 0) ;; enter VLIW mode
+				 (set pc (and lp (inv 1))))
+		       (set-vliw-aliignment-modified-by-option pc lp)))
+	       (c-call VOID "notify_ret" pc))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+
+; Repeat instructions.
+
+(dnci repeat "repeat specified repeat block" ()
+     "repeat $rn,$pcrel17a2"
+     (+ MAJ_14 rn (f-rm 0) (f-sub4 9) pcrel17a2)
+     (sequence ()
+	       (set-vliw-modified-pcrel-offset (reg h-csr 4) 4 4 8)
+	       (set-vliw-alignment-modified (reg h-csr 5) pcrel17a2)
+	       (set (reg h-csr 6) rn))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci erepeat "endless repeat" ()
+     "erepeat $pcrel17a2"
+     (+ MAJ_14 (f-rn 0) (f-rm 1) (f-sub4 9) pcrel17a2)
+     (sequence ()
+	       (set-vliw-modified-pcrel-offset (reg h-csr 4) 4 4 8)
+	       (set-vliw-alignment-modified (reg h-csr 5) pcrel17a2)
+	       (set-rpe.elr 1)
+	       ; rpc may be undefined for erepeat
+	       ; use 1 to trigger repeat logic in the sim's main loop
+	       (set (reg h-csr 6) 1))
+     ())
+
+
+; Control instructions.
+
+;; special store variants
+
+(dnci stc_lp "store to control register lp" ((STALL STC))
+      "stc $rn,\\$lp" 
+      (+ MAJ_7 rn (f-csrn-lo 1) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 0))  
+      (set lp rn)     
+      ((mep (unit u-use-gpr (in usereg rn))
+	    (unit u-store-ctrl-reg (out storereg lp))
+	    (unit u-exec))))
+
+(dnci stc_hi "store to control register hi" ((STALL STC))
+      "stc $rn,\\$hi" 
+      (+ MAJ_7 rn (f-csrn-lo 7) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 0))  
+      (set hi rn)     
+      ((mep (unit u-use-gpr (in usereg rn))
+	    (unit u-store-ctrl-reg (out storereg hi))
+	    (unit u-exec))))
+
+(dnci stc_lo "store to control register lo" ((STALL STC))
+      "stc $rn,\\$lo" 
+      (+ MAJ_7 rn (f-csrn-lo 8) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 0))  
+      (set lo rn)    
+      ((mep (unit u-use-gpr (in usereg rn))
+	    (unit u-store-ctrl-reg (out storereg lo))
+	    (unit u-exec))))
+
+;; general store
+
+(dnci stc "store to control register" (VOLATILE (STALL STC))
+     "stc $rn,$csrn"
+     (+ MAJ_7 rn csrn (f-12 1) (f-13 0) (f-14 0))
+     (set csrn rn)
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-store-ctrl-reg (out storereg csrn))
+	   (unit u-exec))))
+
+;; special load variants 
+
+(dnci ldc_lp "load from control register lp" ((STALL LDC))
+      "ldc $rn,\\$lp"    
+      (+ MAJ_7 rn (f-csrn-lo 1) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 1))     
+      (set rn lp)     
+      ((mep (unit u-use-ctrl-reg (in usereg lp))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+       
+
+(dnci ldc_hi "load from control register hi" ((STALL LDC))
+      "ldc $rn,\\$hi"    
+      (+ MAJ_7 rn (f-csrn-lo 7) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 1))
+      (set rn hi)
+      ((mep (unit u-use-ctrl-reg (in usereg hi))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+
+(dnci ldc_lo "load from control register lo" ((STALL LDC))
+      "ldc $rn,\\$lo"
+      (+ MAJ_7 rn (f-csrn-lo 8) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 1))     
+      (set rn lo)
+      ((mep (unit u-use-ctrl-reg (in usereg lo))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+
+;; general load
+
+(dnci ldc "load from control register" (VOLATILE (STALL LDC) (LATENCY 2))
+     "ldc $rn,$csrn"
+     (+ MAJ_7 rn csrn (f-12 1) (f-13 0) (f-14 1))
+     (if (eq (ifield f-csrn) 0) 
+	 ;; loading from the pc
+	 (set-vliw-modified-pcrel-offset rn 2 4 8)
+	 ;; loading from something else
+	 (set rn csrn))
+      ((mep (unit u-use-ctrl-reg (in usereg csrn))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+
+(dnci di "disable interrupt" (VOLATILE)
+     "di"
+     (+ MAJ_7 (f-rn 0) (f-rm 0) (f-sub4 0))
+     ; clear psw.iec
+     (set psw (sll (srl psw 1) 1)) 
+     ())
+
+(dnci ei "enable interrupt" (VOLATILE)
+     "ei"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 0))
+     ; set psw.iec
+     (set psw (or psw 1))
+     ())
+
+(dnci reti "return from interrupt" ((STALL RET))
+     "reti"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 2))
+     (if (eq (get-psw.om) 0)
+	 ;; core operation mode
+	 (if (get-psw.nmi)
+	     ;; return from NMI
+	     (if (get-npc.ntom)
+		 ;; return in VLIW operation mode
+		 (sequence ()
+			   (set-psw.om 1)
+			   (set-vliw-aliignment-modified-by-option pc npc)
+			   (set-psw.nmi 0))
+		 ;; return in core mode
+		 (sequence ()
+			   (set pc (and npc (inv 1)))
+			   (set-psw.nmi 0)))
+	     ;; return from non-NMI
+	     (if (get-epc.etom)
+		 ;; return in VLIW mode
+		 (sequence () 
+			   (set-psw.om 1)
+			   (set-vliw-aliignment-modified-by-option pc epc)
+			   (set-psw.umc (get-psw.ump))
+			   (set-psw.iec (get-psw.iep)))
+		 ;; return in core mode
+		 (sequence ()
+			   (set pc (and epc (inv 1)))
+			   (set-psw.umc (get-psw.ump))
+			   (set-psw.iec (get-psw.iep)))))
+	 ;; VLIW operation mode
+	 ;; xxx undefined
+	 (nop))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci halt "halt pipeline" (VOLATILE)
+     "halt"
+     (+ MAJ_7 (f-rn 0) (f-rm 2) (f-sub4 2))
+     ; set psw.halt
+     (set (raw-reg h-csr 16) (or psw (sll 1 11)))
+     ())
+
+(dnci sleep "sleep pipeline" (VOLATILE)
+     "sleep"
+     (+ MAJ_7 (f-rn 0) (f-rm 6) (f-sub4 2))
+     (c-call VOID "do_sleep")
+     ())
+
+(dnci swi "software interrupt" (MAY_TRAP VOLATILE)
+     "swi $uimm2"
+     (+ MAJ_7 (f-rn 0) (f-8 0) (f-9 0) uimm2 (f-sub4 6))
+     (cond
+      ((eq uimm2 0) (set exc (or exc (sll 1 4))))
+      ((eq uimm2 1) (set exc (or exc (sll 1 5))))
+      ((eq uimm2 2) (set exc (or exc (sll 1 6))))
+      ((eq uimm2 3) (set exc (or exc (sll 1 7)))))
+     ())
+
+(dnci break "break exception" (MAY_TRAP VOLATILE)
+     "break"
+     (+ MAJ_7 (f-rn 0) (f-rm 3) (f-sub4 2))
+     (set pc (c-call USI "break_exception" pc))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci syncm "synchronise with memory" (VOLATILE)
+     "syncm"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 1))
+     (unimp "syncm")
+     ())
+
+(dnci stcb "store in control bus space" (VOLATILE (STALL STCB))
+     "stcb $rn,$uimm16"
+     (+ MAJ_15 rn (f-rm 0) (f-sub4 4) uimm16)
+     (c-call VOID "do_stcb" rn uimm16)
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-stcb))))
+
+(dnci ldcb "load from control bus space" (VOLATILE (STALL LDCB) (LATENCY 3))
+     "ldcb $rn,$uimm16"
+     (+ MAJ_15 rn (f-rm 1) (f-sub4 4) uimm16)
+     (set rn (c-call SI "do_ldcb" uimm16))
+      ((mep (unit u-ldcb)
+	    (unit u-exec)
+	    (unit u-ldcb-gpr (out loadreg rn)))))
+
+
+; Bit manipulation instructions.
+; The following instructions become the reserved instruction when the
+; bit manipulation option is off.
+
+(dnci bsetm "set bit in memory" (OPTIONAL_BIT_INSN)
+     "bsetm ($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 0))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set (mem UQI rma) (or (mem UQI rma) (sll 1 uimm3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci bclrm "clear bit in memory" (OPTIONAL_BIT_INSN)
+     "bclrm ($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 1))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set (mem UQI rma) (and (mem UQI rma) (inv (sll 1 uimm3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci bnotm "toggle bit in memory" (OPTIONAL_BIT_INSN)
+     "bnotm ($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 2))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set (mem UQI rma) (xor (mem UQI rma) (sll 1 uimm3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci btstm "test bit in memory" (OPTIONAL_BIT_INSN)
+     "btstm \\$0,($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set r0 (zext SI (and UQI (mem UQI rma) (sll 1 uimm3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci tas "test and set" (OPTIONAL_BIT_INSN)
+     "tas $rn,($rma)"
+     (+ MAJ_2 rn rma (f-sub4 4))
+     (sequence ((SI result))
+	       (c-call "check_option_bit" pc)
+	       (set result (zext SI (mem UQI rma)))
+	       (set (mem UQI rma) 1)
+	       (set rn result))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+; Data cache instruction.
+
+(dnci cache "cache operations" (VOLATILE)
+     "cache $cimm4,($rma)"
+     (+ MAJ_7 cimm4 rma (f-sub4 4))
+     (c-call VOID "do_cache" cimm4 rma pc)
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+; Multiply instructions.
+; These instructions become the RI when the 32-bit multiply
+; instruction option is off.
+
+(dnci mul "multiply" (OPTIONAL_MUL_INSN (STALL MUL))
+     "mul $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 4))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (ext DI rn) (ext DI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+(dnci mulu "multiply unsigned" (OPTIONAL_MUL_INSN (STALL MUL))
+     "mulu $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 5))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (zext UDI rn) (zext UDI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+(dnci mulr "multiply, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "mulr $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 6))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (ext DI rn) (ext DI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+(dnci mulru "multiply unsigned, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "mulru $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 7))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (zext UDI rn) (zext UDI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+(dnci madd "multiply accumulate" (OPTIONAL_MUL_INSN (STALL MUL))
+     "madd $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3004))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (ext DI rn) (ext DI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+(dnci maddu "multiply accumulate unsigned" (OPTIONAL_MUL_INSN (STALL MUL))
+     "maddu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3005))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (zext UDI rn) (zext UDI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+
+(dnci maddr "multiply accumulate, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "maddr $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3006))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (ext DI rn) (ext DI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+(dnci maddru "multiple accumulate unsigned, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "maddru $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3007))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (zext UDI rn) (zext UDI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+
+; Divide instructions.
+; These instructions become the RI when the 32-bit divide instruction
+; option is off.
+
+(dnci div "divide" (OPTIONAL_DIV_INSN (STALL DIV) (LATENCY 34) MAY_TRAP)
+     "div $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 8))
+     (sequence ()
+	       (c-call "check_option_div" pc)
+	       (if (eq rm 0)
+		   (set pc (c-call USI "zdiv_exception" pc))
+		   ; Special case described on p. 76.
+		   (if (and (eq rn #x80000000)
+			    (eq rm #xffffffff))
+		       (sequence ()
+				 (set lo #x80000000)
+				 (set hi 0))
+		       (sequence ()
+				 (set lo (div rn rm))
+				 (set hi (mod rn rm))))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-divide)
+           (unit u-branch))))
+
+(dnci divu "divide unsigned" (OPTIONAL_DIV_INSN (STALL DIV) (LATENCY 34) MAY_TRAP)
+     "divu $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 9))
+     (sequence ()
+	       (c-call "check_option_div" pc)
+	       (if (eq rm 0)
+		   (set pc (c-call USI "zdiv_exception" pc))
+		   (sequence ()
+			     (set lo (udiv rn rm))
+			     (set hi (umod rn rm)))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-divide)
+           (unit u-branch))))
+
+
+; Debug functions.
+; These instructions become the RI when the debug function option is
+; off.
+
+(dnci dret "return from debug exception" (OPTIONAL_DEBUG_INSN)
+     "dret"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_debug" pc)
+	       ; set DBG.DM.
+	       (set dbg (and dbg (inv (sll SI 1 15))))
+	       (set pc depc))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci dbreak "generate debug exception" (OPTIONAL_DEBUG_INSN MAY_TRAP VOLATILE)
+     "dbreak"
+     (+ MAJ_7 (f-rn 0) (f-rm 3) (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_debug" pc)
+	       ; set DBG.DPB.
+	       (set dbg (or dbg 1)))
+     ())
+
+
+; Leading zero instruction.
+
+(dnci ldz "leading zeroes" (OPTIONAL_LDZ_INSN (STALL INT2))
+     "ldz $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 0))
+     (sequence ()
+	       (c-call "check_option_ldz" pc)
+	       (set rn (c-call SI "do_ldz" rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Absolute difference instruction.
+
+(dnci abs "absolute difference" (OPTIONAL_ABS_INSN (STALL INT2))
+     "abs $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 3))
+     (sequence ()
+	       (c-call "check_option_abs" pc)
+	       (set rn (abs (sub rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Average instruction.
+
+(dnci ave "average" (OPTIONAL_AVE_INSN (STALL INT2))
+     "ave $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 2))
+     (sequence ()
+	       (c-call "check_option_ave" pc)
+	       (set rn (sra (add (add rn rm) 1) 1)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; MIN/MAX instructions.
+
+(dnci min "minimum" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "min $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 4))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (gt rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci max "maximum" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "max $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 5))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (lt rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci minu "minimum unsigned" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "minu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 6))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (gtu rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci maxu "maximum unsigned" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "maxu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 7))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (ltu rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Clipping instruction.
+
+(dnci clip "clip" (OPTIONAL_CLIP_INSN (STALL INT2))
+     "clip $rn,$cimm5"
+     (+ MAJ_15 rn (f-rm 0) (f-sub4 1) (f-ext #x10) cimm5 (f-29 0) (f-30 0) (f-31 0))
+     (sequence ((SI min) (SI max))
+	       (c-call "check_option_clip" pc)
+	       (set max (sub (sll 1 (sub cimm5 1)) 1))
+	       (set min (neg (sll 1 (sub cimm5 1))))
+	       (cond
+		((eq cimm5 0) (set rn 0))
+		((gt rn max) (set rn max))
+		((lt rn min) (set rn min))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci clipu "clip unsigned" (OPTIONAL_CLIP_INSN (STALL INT2))
+     "clipu $rn,$cimm5"
+     (+ MAJ_15 rn (f-rm 0) (f-sub4 1) (f-ext #x10) cimm5 (f-29 0) (f-30 0) (f-31 1))
+     (sequence ((SI max))
+	       (c-call "check_option_clip" pc)
+	       (set max (sub (sll 1 cimm5) 1))
+	       (cond
+		((eq cimm5 0) (set rn 0))
+		((gt rn max) (set rn max))
+		((lt rn 0) (set rn 0))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Saturation instructions.
+
+(dnci sadd "saturating addition" (OPTIONAL_SAT_INSN (STALL INT2))
+     "sadd $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 8))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (add-oflag rn rm 0)
+		   (if (nflag rn)
+		       ; underflow
+		       (set rn (neg (sll 1 31)))
+		       ; overflow
+		       (set rn (sub (sll 1 31) 1)))
+		   (set rn (add rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci ssub "saturating subtraction" (OPTIONAL_SAT_INSN (STALL INT2))
+     "ssub $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 10))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (sub-oflag rn rm 0)
+		   (if (nflag rn)
+		       ; underflow
+		       (set rn (neg (sll 1 31)))
+		       ; overflow
+		       (set rn (sub (sll 1 31) 1)))
+		   (set rn (sub rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci saddu "saturating unsigned addition" (OPTIONAL_SAT_INSN (STALL INT2))
+     "saddu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 9))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (add-cflag rn rm 0)
+		   (set rn (inv 0))
+		   (set rn (add rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci ssubu "saturating unsigned subtraction" (OPTIONAL_SAT_INSN (STALL INT2))
+     "ssubu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 11))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (sub-cflag rn rm 0)
+		   (set rn 0)
+		   (set rn (sub rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; UCI and DSP options are defined in an external file.
+; See `mep-sample-ucidsp.cpu' for a sample.
+
+
+; Coprocessor instructions.
+
+(dnci swcp "store word coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcp $crn,($rma)"
+     (+ MAJ_3 crn rma (f-sub4 8))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcp "load word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcp $crn,($rma)"
+     (+ MAJ_3 crn rma (f-sub4 9))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcp "smcp" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcp $crn64,($rma)"
+     (+ MAJ_3 crn64 rma (f-sub4 10))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcp" rma crn64 pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcp "lmcp" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcp $crn64,($rma)"
+     (+ MAJ_3 crn64 rma (f-sub4 11))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp" rma pc)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpi "swcp (post-increment)" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcpi $crn,($rma+)"
+     (+ MAJ_3 crn rma (f-sub4 0))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (add rma 4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpi "lwcp (post-increment)" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcpi $crn,($rma+)"
+     (+ MAJ_3 crn rma (f-sub4 1))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3))))
+	       (set rma (add rma 4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpi "smcp (post-increment)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcpi $crn64,($rma+)"
+     (+ MAJ_3 crn64 rma (f-sub4 2))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcpi" (index-of rma) crn64 pc)
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpi "lmcp (post-increment)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcpi $crn64,($rma+)"
+     (+ MAJ_3 crn64 rma (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcpi" (index-of rma) pc))
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcp16 "swcp (16-bit displacement)" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcp $crn,$sdisp16($rma)"
+     (+ MAJ_15 crn rma (f-sub4 12) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set (mem SI (and (add rma sdisp16) (inv SI 3))) crn))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcp16 "lwcp (16-bit displacement)" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcp $crn,$sdisp16($rma)"
+     (+ MAJ_15 crn rma (f-sub4 13) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and (add rma sdisp16) (inv SI 3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcp16 "smcp (16-bit displacement)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcp $crn64,$sdisp16($rma)"
+     (+ MAJ_15 crn64 rma (f-sub4 14) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call "do_smcp16" rma sdisp16 crn64 pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcp16 "lmcp (16-bit displacement)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcp $crn64,$sdisp16($rma)"
+     (+ MAJ_15 crn64 rma (f-sub4 15) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp16" rma sdisp16 pc)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sbcpa "store byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "sbcpa $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 0) (f-ext62 0) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem QI rma) (and crn #xff))
+	       (set rma (add rma (ext SI cdisp10))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcpa "load byte coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lbcpa $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x4) (f-ext62 #x0) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI rma)))
+	       (set rma (add rma (ext SI cdisp10))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci shcpa "store half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "shcpa $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x1) (f-ext62 #x0) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 1)))
+	       (set (mem HI (and rma (inv SI 1))) (and crn #xffff))
+	       (set rma (add rma (ext SI cdisp10a2))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcpa "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lhcpa $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x5) (f-ext62 #x0) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (add rma (ext SI cdisp10a2))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpa "store word coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcpa $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x2) (f-ext62 #x0) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (add rma (ext SI cdisp10a4))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpa "load word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcpa $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x6) (f-ext62 #x0) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3))))
+	       (set rma (add rma (ext SI cdisp10a4))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpa "smcpa" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcpa $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x3) (f-ext62 #x0) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcpa" (index-of rma) cdisp10a8 crn64 pc)
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpa "lmcpa" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcpa $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x7) (f-ext62 #x0) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcpa" (index-of rma) cdisp10a8 pc))
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+(dnci sbcpm0 "sbcpm0" (OPTIONAL_CP_INSN)
+     "sbcpm0 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x0) (f-ext62 #x2) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem QI rma) (and crn #xff))
+	       (set rma (mod0 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcpm0 "lbcpm0" (OPTIONAL_CP_INSN)
+     "lbcpm0 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x4) (f-ext62 #x2) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI rma)))
+	       (set rma (mod0 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci shcpm0 "shcpm0" (OPTIONAL_CP_INSN)
+     "shcpm0 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x1) (f-ext62 #x2) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 1)))
+	       (set (mem HI (and rma (inv SI 1))) (and crn #xffff))
+	       (set rma (mod0 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcpm0 "lhcpm0" (OPTIONAL_CP_INSN)
+     "lhcpm0 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x5) (f-ext62 #x2) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod0 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpm0 "swcpm0" (OPTIONAL_CP_INSN)
+     "swcpm0 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x2) (f-ext62 #x2) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (mod0 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpm0 "lwcpm0" (OPTIONAL_CP_INSN)
+     "lwcpm0 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x6) (f-ext62 #x2) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3))))
+	       (set rma (mod0 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpm0 "smcpm0" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "smcpm0 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x3) (f-ext62 #x2) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcp" rma crn64 pc)
+	       (set rma (mod0 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpm0 "lmcpm0" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "lmcpm0 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x7) (f-ext62 #x2) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp" rma pc))
+	       (set rma (mod0 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sbcpm1 "sbcpm1" (OPTIONAL_CP_INSN)
+     "sbcpm1 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x0) (f-ext62 #x3) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem QI rma) (and crn #xff))
+	       (set rma (mod1 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcpm1 "lbcpm1" (OPTIONAL_CP_INSN)
+     "lbcpm1 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x4) (f-ext62 #x3) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI rma)))
+	       (set rma (mod1 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci shcpm1 "shcpm1" (OPTIONAL_CP_INSN)
+     "shcpm1 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x1) (f-ext62 #x3) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 1)))
+	       (set (mem HI (and rma (inv SI 1))) (and crn #xffff))
+	       (set rma (mod1 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcpm1 "lhcpm1" (OPTIONAL_CP_INSN)
+     "lhcpm1 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x5) (f-ext62 #x3) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod1 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpm1 "swcpm1" (OPTIONAL_CP_INSN)
+     "swcpm1 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x2) (f-ext62 #x3) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (mod1 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpm1 "lwcpm1" (OPTIONAL_CP_INSN)
+     "lwcpm1 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x6) (f-ext62 #x3) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem SI (and rma (inv SI 3)))))
+	       (set rma (mod1 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpm1 "smcpm1" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "smcpm1 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x3) (f-ext62 #x3) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call "do_smcp" rma crn64 pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set rma (mod1 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpm1 "lmcpm1" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "lmcpm1 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x7) (f-ext62 #x3) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp" rma pc))
+	       (set rma (mod1 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnop cp_flag       "branch condition register"  (all-mep-isas) h-ccr   1)
+
+(dnci bcpeq "branch coprocessor equal" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpeq $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 4) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (eq (xor cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci bcpne "branch coprocessor not equal" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpne $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 5) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (ne (xor cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci bcpat "branch coprocessor and true" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpat $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 6) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (ne (and cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci bcpaf "branch coprocessor and false" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpaf $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 7) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (eq (and cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci synccp "synchronise with coprocessor" (OPTIONAL_CP_INSN)
+     "synccp"
+     (+ MAJ_7 (f-rn 0) (f-rm 2) (f-sub4 1))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (unimp "synccp"))
+     ())
+
+(dnci jsrv "jump to vliw subroutine " (OPTIONAL_CP_INSN)
+     "jsrv $rm"
+     (+ MAJ_1 (f-rn 8) rm (f-sub4 15))
+     (sequence ()
+	       (cg-profile pc rm)
+	       (c-call "check_option_cp" pc)
+	       (core-vliw-switch
+
+		;; in core operating mode
+		(sequence ()
+			  (set lp (or (add pc 2) 1))
+			  (set-vliw-aliignment-modified-by-option pc rm)
+			  (set-psw.om 1)) ;; to VLIW operation mode
+
+		;; in VLIW32 operating mode
+		(sequence ()
+			  (set lp (or (add pc 4) 1))
+			  (set pc (and rm (inv 1)))
+			  (set-psw.om 0)) ;; to core operation mode
+
+		;; in VLIW64 operating mode
+		(sequence ()
+			  (set lp (or (add pc 8) 1))
+			  (set pc (and rm (inv 1)))
+			  (set-psw.om 0)))) ;; to core operation mode
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bsrv "branch to vliw subroutine" (OPTIONAL_CP_INSN)
+     "bsrv $pcrel24a2"
+     (+ MAJ_13 (f-4 1) (f-sub4 11) pcrel24a2)
+     (sequence ()
+	       (cg-profile pc pcrel24a2)
+	       (c-call "check_option_cp" pc)
+	       (core-vliw-switch
+
+		;; in core operating mode
+		(sequence ()
+			  (set lp (or (add pc 4) 1))
+			  (set-vliw-aliignment-modified-by-option pc pcrel24a2)
+			  (set-psw.om 1)) ;; to VLIW operation mode
+
+		;; in VLIW32 operating mode
+		(sequence ()
+			  (set lp (or (add pc 4) 1))
+			  (set pc (and pcrel24a2 (inv 1)))
+			  (set-psw.om 0)) ;; to core operation mode
+
+		;; in VLIW64 operating mode
+		(sequence ()
+			  (set lp (or (add pc 8) 1))
+			  (set pc (and pcrel24a2 (inv 1)))
+			  (set-psw.om 0)))) ;; to core operation mode
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+
+; An instruction for test instrumentation.
+; Using a reserved opcode.
+
+(dnci sim-syscall "simulator system call" ()
+     "--syscall--"
+     (+ MAJ_7 (f-4 1) callnum (f-8 0) (f-9 0) (f-10 0) (f-sub4 0))
+     (c-call "do_syscall" pc callnum)
+     ())
+
+(define-pmacro (dnri n major minor)
+  (dnci (.sym ri- n) "reserved instruction" ()
+	"--reserved--"
+	(+ major rn rm (f-sub4 minor))
+	(set pc (c-call USI "ri_exception" pc))
+	((mep (unit u-exec)
+	      (unit u-branch)))))
+
+(dnri 0  MAJ_0   6)
+(dnri 1  MAJ_1  10)
+(dnri 2  MAJ_1  11)
+(dnri 3  MAJ_2   5)
+(dnri 4  MAJ_2   8)
+(dnri 5  MAJ_2   9)
+(dnri 6  MAJ_2  10)
+(dnri 7  MAJ_2  11)
+(dnri 8  MAJ_3   4)
+(dnri 9  MAJ_3   5)
+(dnri 10 MAJ_3   6)
+(dnri 11 MAJ_3   7)
+(dnri 12 MAJ_3  12)
+(dnri 13 MAJ_3  13)
+(dnri 14 MAJ_3  14)
+(dnri 15 MAJ_3  15)
+(dnri 17 MAJ_7   7)
+(dnri 20 MAJ_7  14)
+(dnri 21 MAJ_7  15)
+(dnri 22 MAJ_12  7)
+(dnri 23 MAJ_14 13)
+;(dnri 24 MAJ_15  3)
+(dnri 26 MAJ_15  8)
+; begin core-specific reserved insns
+; end core-specific reserved insns
+
+
+; Macro instructions.
+
+(dnmi nop "nop"
+      ()
+      "nop"
+      (emit mov (rn 0) (rm 0)))
+
+; Emit the 16 bit form of these 32 bit insns when the displacement is zero.
+;
+(dncmi sb16-0 "store byte (explicit 16 bit displacement of zero)" (NO-DIS)
+     "sb $rnc,$zero($rma)"
+     (emit sb rnc rma))
+
+(dncmi sh16-0 "store half (explicit 16 bit displacement of zero)" (NO-DIS)
+     "sh $rns,$zero($rma)"
+     (emit sh rns rma))
+
+(dncmi sw16-0 "store word (explicit 16 bit displacement of zero)" (NO-DIS)
+     "sw $rnl,$zero($rma)"
+     (emit sw rnl rma))
+
+(dncmi lb16-0 "load byte (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lb $rnc,$zero($rma)"
+     (emit lb rnc rma))
+
+(dncmi lh16-0 "load half (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lh $rns,$zero($rma)"
+     (emit lh rns rma))
+
+(dncmi lw16-0 "load word (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lw $rnl,$zero($rma)"
+     (emit lw rnl rma))
+
+(dncmi lbu16-0 "load unsigned byte (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lbu $rnuc,$zero($rma)"
+     (emit lbu rnuc rma))
+
+(dncmi lhu16-0 "load unsigned half (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lhu $rnus,$zero($rma)"
+     (emit lhu rnus rma))
+
+(dncmi swcp16-0 "swcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN NO-DIS)
+     "swcp $crn,$zero($rma)"
+     (emit swcp crn rma))
+
+(dncmi lwcp16-0 "lwcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN NO-DIS)
+     "lwcp $crn,$zero($rma)"
+     (emit lwcp crn rma))
+
+(dncmi smcp16-0 "smcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN NO-DIS)
+     "smcp $crn64,$zero($rma)"
+     (emit smcp crn64 rma))
+
+(dncmi lmcp16-0 "lmcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN NO-DIS)
+     "lmcp $crn64,$zero($rma)"
+     (emit lmcp crn64 rma))
diff --git a/gcc-4.9/gcc/config/mep/mep-default.cpu b/gcc-4.9/gcc/config/mep/mep-default.cpu
new file mode 100644
index 000000000..d472d0d32
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-default.cpu
@@ -0,0 +1,25 @@
+; Toshiba MeP Media Engine architecture description.  -*- Scheme -*-
+; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; This file serves as a wrapper to bring in the core description plus
+; sample implementations of the UCI and DSP instructions.
+
+(include "mep-core.cpu")
+(include "mep-ext-cop.cpu")
diff --git a/gcc-4.9/gcc/config/mep/mep-ext-cop.cpu b/gcc-4.9/gcc/config/mep/mep-ext-cop.cpu
new file mode 100644
index 000000000..39b28056b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-ext-cop.cpu
@@ -0,0 +1,23 @@
+; Toshiba MeP IVC2 Coprocessor description.  -*- scheme -*-
+; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; begin-user-isa-includes
+(include "mep-ivc2.cpu")
+;; end-user-isa-includes
diff --git a/gcc-4.9/gcc/config/mep/mep-intrin.h b/gcc-4.9/gcc/config/mep/mep-intrin.h
new file mode 100644
index 000000000..d556459b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-intrin.h
@@ -0,0 +1,8933 @@
+
+
+/* DO NOT EDIT: This file is automatically generated by CGEN.
+   Any changes you make will be discarded when it is next regenerated. */
+
+#ifdef WANT_GCC_DECLARATIONS
+#define FIRST_SHADOW_REGISTER 113
+#define LAST_SHADOW_REGISTER 120
+#define FIXED_SHADOW_REGISTERS \
+  1, 1, 1, 1, 1, 1, 1, 1
+#define CALL_USED_SHADOW_REGISTERS FIXED_SHADOW_REGISTERS
+#define SHADOW_REG_ALLOC_ORDER \
+  113, 114, 115, 116, 117, 118, 119, 120
+#define SHADOW_REGISTER_NAMES \
+  "$shadow81", "$shadow17", "$shadow40", "$shadow24", "$shadow23", "$shadow22", "$shadow21", "$shadow20"
+
+
+
+#ifndef __MEP__
+enum {
+  mep_fcmpleis = 591,
+  mep_fcmplis = 593,
+  mep_fcmpes = 595,
+  mep_fcmpules = 597,
+  mep_fcmpuls = 599,
+  mep_fcmpues = 601,
+  mep_fcmpus = 603,
+  mep_fcvtsw = 605,
+  mep_ftruncws = 607,
+  mep_fnegs = 609,
+  mep_fabss = 611,
+  mep_fsqrts = 613,
+  mep_fdivs = 615,
+  mep_fmuls = 617,
+  mep_fsubs = 619,
+  mep_fadds = 621,
+  mep_fmovs = 623,
+  mep_cextb = 624,
+  mep_cexth = 625,
+  mep_cextub = 626,
+  mep_cextuh = 627,
+  mep_xmula0 = 628,
+  mep_cmula0 = 629,
+  mep_cneg = 630,
+  mep_cmovh2 = 632,
+  mep_cmovh1 = 633,
+  mep_cmovc2 = 634,
+  mep_cmovc1 = 635,
+  mep_cmov2 = 636,
+  mep_cmov1 = 637,
+  mep_cmovi = 638,
+  mep_cpmov = 640,
+  mep_cmov = 641,
+  mep_csrai3 = 642,
+  mep_csrai = 644,
+  mep_csra3 = 646,
+  mep_csra = 648,
+  mep_csrli3 = 650,
+  mep_csrli = 652,
+  mep_csrl3 = 654,
+  mep_csrl = 656,
+  mep_cslli3 = 658,
+  mep_cslli = 660,
+  mep_csll3 = 662,
+  mep_csll = 664,
+  mep_cxori3 = 666,
+  mep_cxori = 668,
+  mep_cxor3 = 670,
+  mep_cxor = 672,
+  mep_cnori3 = 674,
+  mep_cnori = 676,
+  mep_cnor3 = 678,
+  mep_cnor = 680,
+  mep_cori3 = 682,
+  mep_cori = 684,
+  mep_cor3 = 686,
+  mep_cor = 688,
+  mep_candi3 = 690,
+  mep_candi = 692,
+  mep_cand3 = 694,
+  mep_cand = 696,
+  mep_csubi3 = 698,
+  mep_csubi = 700,
+  mep_csub3 = 702,
+  mep_csub = 704,
+  mep_caddi3 = 706,
+  mep_caddi = 708,
+  mep_cadd3 = 710,
+  mep_cadd = 712
+};
+#endif /* ! defined (__MEP__) */
+
+
+enum cgen_regnum_operand_type {
+  cgen_regnum_operand_type_POINTER,         /* long *          */
+  cgen_regnum_operand_type_LABEL,           /* void *          */
+  cgen_regnum_operand_type_LONG,            /* long            */
+  cgen_regnum_operand_type_ULONG,           /* unsigned long   */
+  cgen_regnum_operand_type_SHORT,           /* short           */
+  cgen_regnum_operand_type_USHORT,          /* unsigned short  */
+  cgen_regnum_operand_type_CHAR,            /* char            */
+  cgen_regnum_operand_type_UCHAR,           /* unsigned char   */
+  cgen_regnum_operand_type_SI,           /* __cop long      */
+  cgen_regnum_operand_type_DI,           /* __cop long long */
+  cgen_regnum_operand_type_CP_DATA_BUS_INT, /* cp_data_bus_int */
+  cgen_regnum_operand_type_VECTOR,		/* opaque vector type */
+  cgen_regnum_operand_type_V8QI,		/* V8QI vector type */
+  cgen_regnum_operand_type_V4HI,		/* V4HI vector type */
+  cgen_regnum_operand_type_V2SI,		/* V2SI vector type */
+  cgen_regnum_operand_type_V8UQI,		/* V8UQI vector type */
+  cgen_regnum_operand_type_V4UHI,		/* V4UHI vector type */
+  cgen_regnum_operand_type_V2USI,		/* V2USI vector type */
+  cgen_regnum_operand_type_DEFAULT = cgen_regnum_operand_type_LONG
+};
+
+struct cgen_regnum_operand {
+  /* The number of addressable registers, 0 for non-regnum operands.  */
+  unsigned char count;
+
+  /* The first register.  */
+  unsigned char base;
+
+  /* The type of the operand.  */
+  enum cgen_regnum_operand_type type;
+
+  /* Is it passed by reference?  */
+  int reference_p;
+};
+
+struct cgen_insn {
+  /* An index into cgen_intrinsics[].  */
+  unsigned int intrinsic;
+
+  /* A bitmask of the ISAs which include this instruction.  */
+  unsigned int isas;
+
+  /* A bitmask of the target-specific groups to which this instruction
+     belongs.  */
+  unsigned int groups;
+
+  /* The insn_code for this instruction.  */
+  int icode;
+
+  /* The number of arguments to the intrinsic function.  */
+  unsigned int num_args;
+
+  /* If true, the first argument is the return value.  */
+  unsigned int cret_p;
+
+  /* Maps operand numbers to argument numbers.  */
+  unsigned int op_mapping[10];
+
+  /* Array of regnum properties, indexed by argument number.  */
+  struct cgen_regnum_operand regnums[10];
+
+  /* The length of the instruction, in bytes.  */
+  int length;
+};
+
+extern const struct cgen_insn cgen_insns[];
+extern const char *const cgen_intrinsics[];
+
+/* Is the instruction described by cgen_insns[INDEX] enabled?  */
+#define CGEN_ENABLE_INSN_P(INDEX) \
+  ((CGEN_CURRENT_ISAS & cgen_insns[INDEX].isas) != 0 \
+   && (CGEN_CURRENT_GROUP & cgen_insns[INDEX].groups) != 0)
+
+#define ISA_EXT1 1
+#define ISA_MEP 2
+
+#define GROUP_KNOWN_CODE 1
+#define GROUP_NORMAL 2
+#define GROUP_VLIW 4
+
+#endif
+#ifdef WANT_GCC_DEFINITIONS
+struct cgen_immediate_predicate {
+  insn_operand_predicate_fn predicate;
+  int lower, upper, align;
+};
+
+const struct cgen_immediate_predicate cgen_immediate_predicates[] = {
+  { cgen_h_sint_12a1_immediate, -2048, 2048, 1 },
+  { cgen_h_uint_20a1_immediate, 0, 1048576, 1 },
+  { cgen_h_uint_7a1_immediate, 0, 128, 1 },
+  { cgen_h_uint_6a2_immediate, 0, 128, 2 },
+  { cgen_h_uint_22a4_immediate, 0, 33554432, 4 },
+  { cgen_h_sint_2a1_immediate, -2, 2, 1 },
+  { cgen_h_uint_24a1_immediate, 0, 16777216, 1 },
+  { cgen_h_sint_6a1_immediate, -32, 32, 1 },
+  { cgen_h_uint_5a4_immediate, 0, 256, 4 },
+  { cgen_h_uint_2a1_immediate, 0, 4, 1 },
+  { cgen_h_sint_10a1_immediate, -512, 512, 1 },
+  { cgen_h_uint_4a1_immediate, 0, 16, 1 },
+  { cgen_h_uint_6a1_immediate, 0, 64, 1 },
+  { cgen_h_uint_16a1_immediate, 0, 65536, 1 },
+  { cgen_h_uint_8a1_immediate, 0, 256, 1 },
+  { cgen_h_sint_16a1_immediate, -32768, 32768, 1 },
+  { cgen_h_uint_5a1_immediate, 0, 32, 1 },
+  { cgen_h_sint_8a1_immediate, -128, 128, 1 },
+  { cgen_h_uint_3a1_immediate, 0, 8, 1 }
+};
+
+const char *const cgen_intrinsics[] = {
+  "mep_cpfmadila1_h",
+  "mep_cpfmadiua1_h",
+  "mep_cpfmadia1_b",
+  "mep_cpfmadia1u_b",
+  "mep_cpfmulila1_h",
+  "mep_cpfmuliua1_h",
+  "mep_cpfmulia1_b",
+  "mep_cpfmulia1u_b",
+  "mep_cpamadila1_h",
+  "mep_cpamadiua1_h",
+  "mep_cpamadia1_b",
+  "mep_cpamadia1u_b",
+  "mep_cpamulila1_h",
+  "mep_cpamuliua1_h",
+  "mep_cpamulia1_b",
+  "mep_cpamulia1u_b",
+  "mep_cpfmadila1s1_h",
+  "mep_cpfmadiua1s1_h",
+  "mep_cpfmadia1s1_b",
+  "mep_cpfmadia1s1u_b",
+  "mep_cpfmulila1s1_h",
+  "mep_cpfmuliua1s1_h",
+  "mep_cpfmulia1s1_b",
+  "mep_cpfmulia1s1u_b",
+  "mep_cpfmadila1s0_h",
+  "mep_cpfmadiua1s0_h",
+  "mep_cpfmadia1s0_b",
+  "mep_cpfmadia1s0u_b",
+  "mep_cpfmulila1s0_h",
+  "mep_cpfmuliua1s0_h",
+  "mep_cpfmulia1s0_b",
+  "mep_cpfmulia1s0u_b",
+  "mep_cpacswp",
+  "mep_cpaccpa1",
+  "mep_cpacsuma1",
+  "mep_c1nop",
+  "mep_cpfacla0s1_h",
+  "mep_cpfacua0s1_h",
+  "mep_cpfaca0s1_b",
+  "mep_cpfaca0s1u_b",
+  "mep_cpfsftbla0s1_h",
+  "mep_cpfsftbua0s1_h",
+  "mep_cpfsftba0s1_b",
+  "mep_cpfsftba0s1u_b",
+  "mep_cpfacla0s0_h",
+  "mep_cpfacua0s0_h",
+  "mep_cpfaca0s0_b",
+  "mep_cpfaca0s0u_b",
+  "mep_cpfsftbla0s0_h",
+  "mep_cpfsftbua0s0_h",
+  "mep_cpfsftba0s0_b",
+  "mep_cpfsftba0s0u_b",
+  "mep_cpsllia0",
+  "mep_cpsraia0",
+  "mep_cpsrlia0",
+  "mep_cpslla0",
+  "mep_cpsraa0",
+  "mep_cpsrla0",
+  "mep_cpaccpa0",
+  "mep_cpacsuma0",
+  "mep_cpmovhla0_w",
+  "mep_cpmovhua0_w",
+  "mep_cppackla0_w",
+  "mep_cppackua0_w",
+  "mep_cppackla0_h",
+  "mep_cppackua0_h",
+  "mep_cppacka0_b",
+  "mep_cppacka0u_b",
+  "mep_cpmovlla0_w",
+  "mep_cpmovlua0_w",
+  "mep_cpmovula0_w",
+  "mep_cpmovuua0_w",
+  "mep_cpmovla0_h",
+  "mep_cpmovua0_h",
+  "mep_cpmova0_b",
+  "mep_cpsetla0_w",
+  "mep_cpsetua0_w",
+  "mep_cpseta0_h",
+  "mep_cpsadla0_h",
+  "mep_cpsadua0_h",
+  "mep_cpsada0_b",
+  "mep_cpsada0u_b",
+  "mep_cpabsla0_h",
+  "mep_cpabsua0_h",
+  "mep_cpabsa0_b",
+  "mep_cpabsa0u_b",
+  "mep_cpsubacla0_h",
+  "mep_cpsubacua0_h",
+  "mep_cpsubaca0_b",
+  "mep_cpsubaca0u_b",
+  "mep_cpsubla0_h",
+  "mep_cpsubua0_h",
+  "mep_cpsuba0_b",
+  "mep_cpsuba0u_b",
+  "mep_cpaddacla0_h",
+  "mep_cpaddacua0_h",
+  "mep_cpaddaca0_b",
+  "mep_cpaddaca0u_b",
+  "mep_cpaddla0_h",
+  "mep_cpaddua0_h",
+  "mep_cpadda0_b",
+  "mep_cpadda0u_b",
+  "mep_c0nop",
+  "mep_cpsmsbslla1_w",
+  "mep_cpsmsbslua1_w",
+  "mep_cpsmsbslla1_h",
+  "mep_cpsmsbslua1_h",
+  "mep_cpsmadslla1_w",
+  "mep_cpsmadslua1_w",
+  "mep_cpsmadslla1_h",
+  "mep_cpsmadslua1_h",
+  "mep_cpmulslla1_w",
+  "mep_cpmulslua1_w",
+  "mep_cpmulslla1_h",
+  "mep_cpmulslua1_h",
+  "mep_cpsmsbla1_w",
+  "mep_cpsmsbua1_w",
+  "mep_cpsmsbla1_h",
+  "mep_cpsmsbua1_h",
+  "mep_cpsmadla1_w",
+  "mep_cpsmadua1_w",
+  "mep_cpsmadla1_h",
+  "mep_cpsmadua1_h",
+  "mep_cpmsbla1_w",
+  "mep_cpmsbua1_w",
+  "mep_cpmsbla1u_w",
+  "mep_cpmsbua1u_w",
+  "mep_cpmsbla1_h",
+  "mep_cpmsbua1_h",
+  "mep_cpmadla1_w",
+  "mep_cpmadua1_w",
+  "mep_cpmadla1u_w",
+  "mep_cpmadua1u_w",
+  "mep_cpmadla1_h",
+  "mep_cpmadua1_h",
+  "mep_cpmada1_b",
+  "mep_cpmada1u_b",
+  "mep_cpmulla1_w",
+  "mep_cpmulua1_w",
+  "mep_cpmulla1u_w",
+  "mep_cpmulua1u_w",
+  "mep_cpmulla1_h",
+  "mep_cpmulua1_h",
+  "mep_cpmula1_b",
+  "mep_cpmula1u_b",
+  "mep_cpssda1_b",
+  "mep_cpssda1u_b",
+  "mep_cpssqa1_b",
+  "mep_cpssqa1u_b",
+  "mep_cpsllia1",
+  "mep_cpsraia1",
+  "mep_cpsrlia1",
+  "mep_cpslla1",
+  "mep_cpsraa1",
+  "mep_cpsrla1",
+  "mep_cpmovhla1_w",
+  "mep_cpmovhua1_w",
+  "mep_cppackla1_w",
+  "mep_cppackua1_w",
+  "mep_cppackla1_h",
+  "mep_cppackua1_h",
+  "mep_cppacka1_b",
+  "mep_cppacka1u_b",
+  "mep_cpmovlla1_w",
+  "mep_cpmovlua1_w",
+  "mep_cpmovula1_w",
+  "mep_cpmovuua1_w",
+  "mep_cpmovla1_h",
+  "mep_cpmovua1_h",
+  "mep_cpmova1_b",
+  "mep_cpsetla1_w",
+  "mep_cpsetua1_w",
+  "mep_cpseta1_h",
+  "mep_cpsadla1_h",
+  "mep_cpsadua1_h",
+  "mep_cpsada1_b",
+  "mep_cpsada1u_b",
+  "mep_cpabsla1_h",
+  "mep_cpabsua1_h",
+  "mep_cpabsa1_b",
+  "mep_cpabsa1u_b",
+  "mep_cpsubacla1_h",
+  "mep_cpsubacua1_h",
+  "mep_cpsubaca1_b",
+  "mep_cpsubaca1u_b",
+  "mep_cpsubla1_h",
+  "mep_cpsubua1_h",
+  "mep_cpsuba1_b",
+  "mep_cpsuba1u_b",
+  "mep_cpaddacla1_h",
+  "mep_cpaddacua1_h",
+  "mep_cpaddaca1_b",
+  "mep_cpaddaca1u_b",
+  "mep_cpaddla1_h",
+  "mep_cpaddua1_h",
+  "mep_cpadda1_b",
+  "mep_cpadda1u_b",
+  "mep_cdmovi",
+  "mep_cdmoviu",
+  "mep_cpmovi_w",
+  "mep_cpmoviu_w",
+  "mep_cpmovi_h",
+  "mep_cpmoviu_h",
+  "mep_cpmovi_b",
+  "mep_cdclipi3",
+  "mep_cdclipiu3",
+  "mep_cpclipi3_w",
+  "mep_cpclipiu3_w",
+  "mep_cpslai3_w",
+  "mep_cpslai3_h",
+  "mep_cdslli3",
+  "mep_cpslli3_w",
+  "mep_cpslli3_h",
+  "mep_cpslli3_b",
+  "mep_cdsrai3",
+  "mep_cpsrai3_w",
+  "mep_cpsrai3_h",
+  "mep_cpsrai3_b",
+  "mep_cdsrli3",
+  "mep_cpsrli3_w",
+  "mep_cpsrli3_h",
+  "mep_cpsrli3_b",
+  "mep_cpocmpge_w",
+  "mep_cpocmpgeu_w",
+  "mep_cpocmpge_h",
+  "mep_cpocmpge_b",
+  "mep_cpocmpgeu_b",
+  "mep_cpocmpgt_w",
+  "mep_cpocmpgtu_w",
+  "mep_cpocmpgt_h",
+  "mep_cpocmpgt_b",
+  "mep_cpocmpgtu_b",
+  "mep_cpocmpne_w",
+  "mep_cpocmpne_h",
+  "mep_cpocmpne_b",
+  "mep_cpocmpeq_w",
+  "mep_cpocmpeq_h",
+  "mep_cpocmpeq_b",
+  "mep_cpacmpge_w",
+  "mep_cpacmpgeu_w",
+  "mep_cpacmpge_h",
+  "mep_cpacmpge_b",
+  "mep_cpacmpgeu_b",
+  "mep_cpacmpgt_w",
+  "mep_cpacmpgtu_w",
+  "mep_cpacmpgt_h",
+  "mep_cpacmpgt_b",
+  "mep_cpacmpgtu_b",
+  "mep_cpacmpne_w",
+  "mep_cpacmpne_h",
+  "mep_cpacmpne_b",
+  "mep_cpacmpeq_w",
+  "mep_cpacmpeq_h",
+  "mep_cpacmpeq_b",
+  "mep_cpcmpge_w",
+  "mep_cpcmpgeu_w",
+  "mep_cpcmpge_h",
+  "mep_cpcmpge_b",
+  "mep_cpcmpgeu_b",
+  "mep_cpcmpgt_w",
+  "mep_cpcmpgtu_w",
+  "mep_cpcmpgt_h",
+  "mep_cpcmpgt_b",
+  "mep_cpcmpgtu_b",
+  "mep_cpcmpne_w",
+  "mep_cpcmpne_h",
+  "mep_cpcmpne_b",
+  "mep_cpcmpeq_w",
+  "mep_cpcmpeq_h",
+  "mep_cpcmpeq_b",
+  "mep_cpcmpeqz_b",
+  "mep_cdcastw",
+  "mep_cdcastuw",
+  "mep_cpcasth_w",
+  "mep_cpcastuh_w",
+  "mep_cpcastb_w",
+  "mep_cpcastub_w",
+  "mep_cpcastb_h",
+  "mep_cpcastub_h",
+  "mep_cpextl_h",
+  "mep_cpextlu_h",
+  "mep_cpextl_b",
+  "mep_cpextlu_b",
+  "mep_cpextu_h",
+  "mep_cpextuu_h",
+  "mep_cpextu_b",
+  "mep_cpextuu_b",
+  "mep_cpbcast_w",
+  "mep_cpbcast_h",
+  "mep_cpbcast_b",
+  "mep_cpccadd_b",
+  "mep_cphadd_w",
+  "mep_cphadd_h",
+  "mep_cphadd_b",
+  "mep_cphaddu_b",
+  "mep_cpnorm_w",
+  "mep_cpnorm_h",
+  "mep_cpldz_w",
+  "mep_cpldz_h",
+  "mep_cpabsz_w",
+  "mep_cpabsz_h",
+  "mep_cpabsz_b",
+  "mep_cpmovtocc",
+  "mep_cpmovtocsar1",
+  "mep_cpmovtocsar0",
+  "mep_cpmovfrcc",
+  "mep_cpmovfrcsar1",
+  "mep_cpmovfrcsar0",
+  "mep_cpmin3_w",
+  "mep_cpminu3_w",
+  "mep_cpmin3_h",
+  "mep_cpmin3_b",
+  "mep_cpminu3_b",
+  "mep_cpmax3_w",
+  "mep_cpmaxu3_w",
+  "mep_cpmax3_h",
+  "mep_cpmax3_b",
+  "mep_cpmaxu3_b",
+  "mep_cpabs3_h",
+  "mep_cpabs3_b",
+  "mep_cpabsu3_b",
+  "mep_cpaddsr3_w",
+  "mep_cpaddsr3_h",
+  "mep_cpaddsr3_b",
+  "mep_cpaddsru3_b",
+  "mep_cpave3_w",
+  "mep_cpave3_h",
+  "mep_cpave3_b",
+  "mep_cpaveu3_b",
+  "mep_cpextlsub3_b",
+  "mep_cpextlsubu3_b",
+  "mep_cpextusub3_b",
+  "mep_cpextusubu3_b",
+  "mep_cpextladd3_b",
+  "mep_cpextladdu3_b",
+  "mep_cpextuadd3_b",
+  "mep_cpextuaddu3_b",
+  "mep_cpssub3_w",
+  "mep_cpssub3_h",
+  "mep_cpsadd3_w",
+  "mep_cpsadd3_h",
+  "mep_cpsla3_w",
+  "mep_cpsla3_h",
+  "mep_cdsll3",
+  "mep_cpssll3_w",
+  "mep_cpsll3_w",
+  "mep_cpssll3_h",
+  "mep_cpsll3_h",
+  "mep_cpssll3_b",
+  "mep_cpsll3_b",
+  "mep_cdsra3",
+  "mep_cpssra3_w",
+  "mep_cpsra3_w",
+  "mep_cpssra3_h",
+  "mep_cpsra3_h",
+  "mep_cpssra3_b",
+  "mep_cpsra3_b",
+  "mep_cdsrl3",
+  "mep_cpssrl3_w",
+  "mep_cpsrl3_w",
+  "mep_cpssrl3_h",
+  "mep_cpsrl3_h",
+  "mep_cpssrl3_b",
+  "mep_cpsrl3_b",
+  "mep_cppack_h",
+  "mep_cppack_b",
+  "mep_cppacku_b",
+  "mep_cpunpackl_w",
+  "mep_cpunpackl_h",
+  "mep_cpunpackl_b",
+  "mep_cpunpacku_w",
+  "mep_cpunpacku_h",
+  "mep_cpunpacku_b",
+  "mep_cpfsftbs1",
+  "mep_cpfsftbs0",
+  "mep_cpfsftbi",
+  "mep_cpsel",
+  "mep_cpxor3",
+  "mep_cpnor3",
+  "mep_cpor3",
+  "mep_cpand3",
+  "mep_cdsub3",
+  "mep_cpsub3_w",
+  "mep_cpsub3_h",
+  "mep_cpsub3_b",
+  "mep_cdadd3",
+  "mep_cpadd3_w",
+  "mep_cpadd3_h",
+  "mep_cpadd3_b",
+  "mep_bsrv",
+  "mep_jsrv",
+  "mep_synccp",
+  "mep_bcpaf",
+  "mep_bcpat",
+  "mep_bcpne",
+  "mep_bcpeq",
+  "mep_lmcpm1",
+  "mep_smcpm1",
+  "mep_lwcpm1",
+  "mep_swcpm1",
+  "mep_lhcpm1",
+  "mep_shcpm1",
+  "mep_lbcpm1",
+  "mep_sbcpm1",
+  "mep_lmcpm0",
+  "mep_smcpm0",
+  "mep_lwcpm0",
+  "mep_swcpm0",
+  "mep_lhcpm0",
+  "mep_shcpm0",
+  "mep_lbcpm0",
+  "mep_sbcpm0",
+  "mep_lmcpa",
+  "mep_smcpa",
+  "mep_lwcpa",
+  "mep_swcpa",
+  "mep_lhcpa",
+  "mep_shcpa",
+  "mep_lbcpa",
+  "mep_sbcpa",
+  "mep_lmcp16",
+  "mep_smcp16",
+  "mep_lwcp16",
+  "mep_swcp16",
+  "mep_lmcpi",
+  "mep_smcpi",
+  "mep_lwcpi",
+  "mep_swcpi",
+  "mep_lmcp",
+  "mep_smcp",
+  "mep_lwcp",
+  "mep_swcp",
+  "mep_ssubu",
+  "mep_saddu",
+  "mep_ssub",
+  "mep_sadd",
+  "mep_clipu",
+  "mep_clip",
+  "mep_maxu",
+  "mep_minu",
+  "mep_max",
+  "mep_min",
+  "mep_ave",
+  "mep_abs",
+  "mep_ldz",
+  "mep_dbreak",
+  "mep_dret",
+  "mep_divu",
+  "mep_div",
+  "mep_maddru",
+  "mep_maddr",
+  "mep_maddu",
+  "mep_madd",
+  "mep_mulru",
+  "mep_mulr",
+  "mep_mulu",
+  "mep_mul",
+  "mep_cache",
+  "mep_tas",
+  "mep_btstm",
+  "mep_bnotm",
+  "mep_bclrm",
+  "mep_bsetm",
+  "mep_ldcb",
+  "mep_stcb",
+  "mep_syncm",
+  "mep_break",
+  "mep_swi",
+  "mep_sleep",
+  "mep_halt",
+  "mep_reti",
+  "mep_ei",
+  "mep_di",
+  "mep_ldc",
+  "mep_ldc_lo",
+  "mep_ldc_hi",
+  "mep_ldc_lp",
+  "mep_stc",
+  "mep_stc_lo",
+  "mep_stc_hi",
+  "mep_stc_lp",
+  "mep_erepeat",
+  "mep_repeat",
+  "mep_ret",
+  "mep_jsr",
+  "mep_jmp24",
+  "mep_jmp",
+  "mep_bsr24",
+  "mep_bsr12",
+  "mep_bne",
+  "mep_beq",
+  "mep_bgei",
+  "mep_blti",
+  "mep_bnei",
+  "mep_beqi",
+  "mep_bnez",
+  "mep_beqz",
+  "mep_bra",
+  "mep_fsft",
+  "mep_sll3",
+  "mep_slli",
+  "mep_srli",
+  "mep_srai",
+  "mep_sll",
+  "mep_srl",
+  "mep_sra",
+  "mep_xor3",
+  "mep_and3",
+  "mep_or3",
+  "mep_nor",
+  "mep_xor",
+  "mep_and",
+  "mep_or",
+  "mep_sltu3x",
+  "mep_slt3x",
+  "mep_add3x",
+  "mep_sl2ad3",
+  "mep_sl1ad3",
+  "mep_sltu3i",
+  "mep_slt3i",
+  "mep_sltu3",
+  "mep_slt3",
+  "mep_neg",
+  "mep_sbvck3",
+  "mep_sub",
+  "mep_advck3",
+  "mep_add3i",
+  "mep_add",
+  "mep_add3",
+  "mep_movh",
+  "mep_movu16",
+  "mep_movu24",
+  "mep_movi16",
+  "mep_movi8",
+  "mep_mov",
+  "mep_ssarb",
+  "mep_extuh",
+  "mep_extub",
+  "mep_exth",
+  "mep_extb",
+  "mep_lw24",
+  "mep_sw24",
+  "mep_lhu16",
+  "mep_lbu16",
+  "mep_lw16",
+  "mep_lh16",
+  "mep_lb16",
+  "mep_sw16",
+  "mep_sh16",
+  "mep_sb16",
+  "mep_lhu_tp",
+  "mep_lbu_tp",
+  "mep_lw_tp",
+  "mep_lh_tp",
+  "mep_lb_tp",
+  "mep_sw_tp",
+  "mep_sh_tp",
+  "mep_sb_tp",
+  "mep_lw_sp",
+  "mep_sw_sp",
+  "mep_lhu",
+  "mep_lbu",
+  "mep_lw",
+  "mep_lh",
+  "mep_lb",
+  "mep_sw",
+  "mep_sh",
+  "mep_sb",
+  "mep_dsp1",
+  "mep_dsp0",
+  "mep_dsp",
+  "mep_uci",
+  "mep_lhucpm1",
+  "mep_lbucpm1",
+  "mep_lhucpm0",
+  "mep_lbucpm0",
+  "mep_lhucpa",
+  "mep_lbucpa",
+  "mep_lhucp",
+  "mep_lhcp",
+  "mep_shcp",
+  "mep_lbucp",
+  "mep_lbcp",
+  "mep_sbcp",
+  "mep_casw3",
+  "mep_cash3",
+  "mep_casb3",
+  "mep_prefd",
+  "mep_pref",
+  "mep_ldcb_r",
+  "mep_stcb_r",
+  "mep_fcmpleis",
+  "mep_fcmpleis",
+  "mep_fcmplis",
+  "mep_fcmplis",
+  "mep_fcmpes",
+  "mep_fcmpes",
+  "mep_fcmpules",
+  "mep_fcmpules",
+  "mep_fcmpuls",
+  "mep_fcmpuls",
+  "mep_fcmpues",
+  "mep_fcmpues",
+  "mep_fcmpus",
+  "mep_fcmpus",
+  "mep_fcvtsw",
+  "mep_fcvtsw",
+  "mep_ftruncws",
+  "mep_ftruncws",
+  "mep_fnegs",
+  "mep_fnegs",
+  "mep_fabss",
+  "mep_fabss",
+  "mep_fsqrts",
+  "mep_fsqrts",
+  "mep_fdivs",
+  "mep_fdivs",
+  "mep_fmuls",
+  "mep_fmuls",
+  "mep_fsubs",
+  "mep_fsubs",
+  "mep_fadds",
+  "mep_fadds",
+  "mep_fmovs",
+  "mep_cextb",
+  "mep_cexth",
+  "mep_cextub",
+  "mep_cextuh",
+  "mep_xmula0",
+  "mep_cmula0",
+  "mep_cneg",
+  "mep_cneg",
+  "mep_cmovh2",
+  "mep_cmovh1",
+  "mep_cmovc2",
+  "mep_cmovc1",
+  "mep_cmov2",
+  "mep_cmov1",
+  "mep_cmovi",
+  "mep_cmovi",
+  "mep_cpmov",
+  "mep_cmov",
+  "mep_csrai3",
+  "mep_csrai3",
+  "mep_csrai",
+  "mep_csrai",
+  "mep_csra3",
+  "mep_csra3",
+  "mep_csra",
+  "mep_csra",
+  "mep_csrli3",
+  "mep_csrli3",
+  "mep_csrli",
+  "mep_csrli",
+  "mep_csrl3",
+  "mep_csrl3",
+  "mep_csrl",
+  "mep_csrl",
+  "mep_cslli3",
+  "mep_cslli3",
+  "mep_cslli",
+  "mep_cslli",
+  "mep_csll3",
+  "mep_csll3",
+  "mep_csll",
+  "mep_csll",
+  "mep_cxori3",
+  "mep_cxori3",
+  "mep_cxori",
+  "mep_cxori",
+  "mep_cxor3",
+  "mep_cxor3",
+  "mep_cxor",
+  "mep_cxor",
+  "mep_cnori3",
+  "mep_cnori3",
+  "mep_cnori",
+  "mep_cnori",
+  "mep_cnor3",
+  "mep_cnor3",
+  "mep_cnor",
+  "mep_cnor",
+  "mep_cori3",
+  "mep_cori3",
+  "mep_cori",
+  "mep_cori",
+  "mep_cor3",
+  "mep_cor3",
+  "mep_cor",
+  "mep_cor",
+  "mep_candi3",
+  "mep_candi3",
+  "mep_candi",
+  "mep_candi",
+  "mep_cand3",
+  "mep_cand3",
+  "mep_cand",
+  "mep_cand",
+  "mep_csubi3",
+  "mep_csubi3",
+  "mep_csubi",
+  "mep_csubi",
+  "mep_csub3",
+  "mep_csub3",
+  "mep_csub",
+  "mep_csub",
+  "mep_caddi3",
+  "mep_caddi3",
+  "mep_caddi",
+  "mep_caddi",
+  "mep_cadd3",
+  "mep_cadd3",
+  "mep_cadd",
+  "mep_cadd"
+};
+
+const struct cgen_insn cgen_insns[] = {
+  { 103,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 103,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 104,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 104,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 105,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 105,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 106,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 106,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 107,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 107,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 108,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 108,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 109,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 109,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 110,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 110,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 111,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 111,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 112,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 112,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 113,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 113,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 114,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 114,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 115,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 115,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 116,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 116,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 117,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 117,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 118,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 118,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 119,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 119,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 120,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 120,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 121,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 121,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 122,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 122,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 123,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 123,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 124,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 124,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 125,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbla1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 125,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbla1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 126,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbua1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 126,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbua1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 127,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 127,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 128,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 128,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 129,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 129,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 130,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 130,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 131,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadla1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 131,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadla1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 132,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadua1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 132,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadua1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 133,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 133,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 134,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 134,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 135,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmada1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 135,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmada1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 136,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmada1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 136,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmada1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 137,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 137,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 138,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 138,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 139,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulla1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 139,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulla1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 140,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulua1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 140,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulua1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 141,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 141,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 142,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 142,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 143,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmula1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 143,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmula1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 144,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmula1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 144,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmula1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 145,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssda1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 145,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssda1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 146,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssda1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 146,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssda1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 147,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssqa1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 147,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssqa1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 148,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssqa1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 148,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssqa1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 0,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadila1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 1,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadiua1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 2,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 3,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1u_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 4,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulila1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 5,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmuliua1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 6,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 7,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1u_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 8,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadila1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 9,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadiua1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 10,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadia1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 11,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadia1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 12,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamulila1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 13,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamuliua1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 14,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamulia1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 15,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamulia1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 16,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadila1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 17,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadiua1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 18,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 19,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 20,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulila1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 21,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmuliua1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 22,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 23,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 24,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadila1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 25,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadiua1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 26,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s0_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 27,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s0u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 28,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulila1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 29,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmuliua1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 30,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s0_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 31,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s0u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 149,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsllia1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 149,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsllia1_1_p1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 150,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsraia1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 150,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraia1_1_p1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 151,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrlia1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 151,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrlia1_1_p1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 152,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslla1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 152,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslla1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 153,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsraa1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 153,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraa1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 154,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrla1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 154,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrla1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 32,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacswp_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 33,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaccpa1_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 34,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacsuma1_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 155,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovhla1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 155,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhla1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 156,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovhua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 156,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 157,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackla1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 157,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 158,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 158,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 159,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackla1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 159,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 160,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackua1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 160,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 161,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppacka1_b_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 161,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka1_b_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 162,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppacka1u_b_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 } },
+    4 },
+  { 162,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka1u_b_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 } },
+    4 },
+  { 163,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovlla1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 163,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlla1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 164,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovlua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 164,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 165,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovula1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 165,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovula1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 166,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovuua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 166,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovuua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 167,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovla1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 167,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovla1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 168,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovua1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 168,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovua1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 169,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmova1_b_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 169,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmova1_b_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 170,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsetla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 170,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 171,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsetua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 171,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 172,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpseta1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 172,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpseta1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 173,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 173,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 174,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 174,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 175,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsada1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 175,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 176,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsada1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 176,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 177,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 177,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 178,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 178,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 179,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsa1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 179,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 180,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsa1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 180,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 181,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubacla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 181,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 182,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubacua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 182,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 183,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubaca1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 183,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 184,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubaca1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 184,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 185,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 185,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 186,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 186,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 187,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsuba1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 187,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 188,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsuba1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 188,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 189,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddacla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 189,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 190,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddacua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 190,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 191,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddaca1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 191,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 192,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddaca1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 192,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 193,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 193,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 194,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 194,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 195,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadda1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 195,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 196,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadda1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 196,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 203,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovi_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 203,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovi_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 35,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_c1nop_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 197,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdmovi_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 197,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdmovi_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 198,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdmoviu_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 198,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdmoviu_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 199,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovi_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 199,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovi_w_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 200,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmoviu_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 200,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmoviu_w_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 201,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovi_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 201,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovi_h_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 204,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdclipi3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 204,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdclipi3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 205,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdclipiu3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 205,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdclipiu3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 206,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpclipi3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 206,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpclipi3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 207,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpclipiu3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 207,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpclipiu3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 208,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslai3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 208,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslai3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 209,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslai3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 209,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslai3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 210,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdslli3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 210,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdslli3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 211,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslli3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 211,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslli3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 212,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslli3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 212,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslli3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 213,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslli3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 213,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslli3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 214,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsrai3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 214,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsrai3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 215,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrai3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 215,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrai3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 216,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrai3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 216,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrai3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 217,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrai3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 217,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrai3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 218,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsrli3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 218,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsrli3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 219,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrli3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 219,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrli3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 220,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrli3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 220,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrli3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 221,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrli3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 221,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrli3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 341,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsla3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 341,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsla3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 342,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsla3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 342,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsla3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 343,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsll3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 343,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsll3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 344,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssll3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 344,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssll3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 345,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsll3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 345,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsll3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 346,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssll3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 346,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssll3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 347,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsll3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 347,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsll3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 348,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssll3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 348,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssll3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 349,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsll3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 349,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsll3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 350,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsra3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 350,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsra3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 351,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssra3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 351,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssra3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 352,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsra3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 352,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsra3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 353,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssra3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 353,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssra3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 354,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsra3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 354,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsra3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 355,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssra3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 355,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssra3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 356,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsra3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 356,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsra3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 357,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsrl3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 357,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsrl3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 358,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssrl3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 358,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssrl3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 359,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrl3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 359,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrl3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 360,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssrl3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 360,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssrl3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 361,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrl3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 361,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrl3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 362,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssrl3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 362,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssrl3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 363,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrl3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 363,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrl3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 308,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmin3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 308,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmin3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 309,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpminu3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 309,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpminu3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 310,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmin3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 310,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmin3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 311,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmin3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 311,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmin3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 312,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpminu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 312,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpminu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 313,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmax3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 313,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmax3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 314,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 314,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 315,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmax3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 315,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmax3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 316,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmax3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 316,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmax3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 317,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 317,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 364,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppack_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 364,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppack_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 365,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppack_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 365,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppack_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 366,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppacku_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 366,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacku_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 377,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpxor3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 377,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpxor3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 378,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpnor3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 378,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpnor3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 379,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpor3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 379,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpor3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 380,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpand3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 380,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpand3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 318,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabs3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 318,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabs3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 319,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabs3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 319,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabs3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 320,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 320,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 321,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 321,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 322,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 322,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 323,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 323,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 324,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsru3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 324,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsru3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 325,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpave3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 325,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpave3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 326,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpave3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 326,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpave3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 327,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpave3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 327,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpave3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 328,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaveu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 328,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaveu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 329,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlsub3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 329,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlsub3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 330,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlsubu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 330,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlsubu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 331,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextusub3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 331,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextusub3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 332,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextusubu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 332,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextusubu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 333,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextladd3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 333,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextladd3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 334,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextladdu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 334,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextladdu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 335,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuadd3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 335,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuadd3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 336,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuaddu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 336,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuaddu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 337,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssub3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 337,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssub3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 338,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssub3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 338,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssub3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 339,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadd3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 339,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadd3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 340,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadd3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 340,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadd3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 381,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsub3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 381,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsub3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 382,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsub3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 382,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsub3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 383,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsub3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 383,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsub3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 384,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsub3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 384,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsub3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 385,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdadd3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 385,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdadd3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 222,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpge_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 222,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpge_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 223,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 223,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 224,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpge_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 224,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpge_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 225,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpge_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 225,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpge_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 226,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 226,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 227,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 227,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 228,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 228,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 229,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 229,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 230,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 230,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 231,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 231,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 232,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpne_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 232,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpne_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 233,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpne_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 233,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpne_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 234,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpne_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 234,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpne_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 235,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 235,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 236,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 236,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 237,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 237,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 238,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpge_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 238,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpge_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 239,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 239,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 240,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpge_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 240,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpge_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 241,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpge_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 241,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpge_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 242,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 242,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 243,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 243,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 244,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 244,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 245,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 245,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 246,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 246,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 247,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 247,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 248,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpne_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 248,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpne_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 249,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpne_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 249,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpne_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 250,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpne_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 250,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpne_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 251,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 251,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 252,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 252,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 253,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 253,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 375,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpfsftbi_C3,
+    4,
+    1,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 375,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbi_P0_P1,
+    4,
+    1,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 36,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacla0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 37,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacua0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 38,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s1_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 39,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s1u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 40,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbla0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 41,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbua0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 42,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s1_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 43,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s1u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 44,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacla0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 45,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacua0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 46,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 47,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 48,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbla0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 49,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbua0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 50,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 51,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 52,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsllia0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 53,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraia0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 54,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrlia0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 55,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslla0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 56,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraa0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 57,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrla0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 58,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaccpa0_P0S,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 59,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacsuma0_P0S,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 60,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhla0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 61,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 62,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 63,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 64,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 65,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 66,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka0_b_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 67,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka0u_b_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 } },
+    4 },
+  { 68,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlla0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 69,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 70,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovula0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 71,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovuua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 72,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovla0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 73,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovua0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 74,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmova0_b_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 75,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetla0_w_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 76,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetua0_w_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 77,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpseta0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 78,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 79,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 80,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 81,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 82,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 83,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 84,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 85,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 86,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 87,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 88,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 89,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 90,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 91,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 92,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 93,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 94,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 95,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 96,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 97,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 98,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 99,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 100,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 101,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 254,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpge_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 254,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpge_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 255,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 255,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 256,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpge_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 256,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpge_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 257,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpge_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 257,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpge_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 258,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 258,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 259,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 259,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 260,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 260,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 261,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 261,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 262,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 262,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 263,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 263,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 264,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpne_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 264,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpne_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 265,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpne_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 265,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpne_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 266,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpne_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 266,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpne_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 267,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 267,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 268,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 268,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 269,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 269,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 270,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeqz_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 270,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeqz_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 302,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovtocc_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 302,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovtocc_P0S_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 303,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 303,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar1_P0S_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 304,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar0_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 304,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar0_P0S_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 305,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovfrcc_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 305,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovfrcc_P0S_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 306,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar1_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 306,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar1_P0S_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 307,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar0_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 307,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar0_P0S_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 271,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdcastw_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 271,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdcastw_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 272,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdcastuw_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 272,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdcastuw_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 273,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcasth_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 273,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcasth_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 274,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastuh_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 274,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastuh_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 275,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastb_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 275,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastb_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 276,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastub_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 276,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastub_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 277,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastb_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 277,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastb_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 278,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastub_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 278,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastub_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 279,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextl_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 279,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextl_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 280,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 280,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlu_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 281,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextl_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 281,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextl_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 282,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 282,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 283,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 283,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextu_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 284,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 284,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuu_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 285,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 285,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 286,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 286,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 287,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpbcast_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 287,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpbcast_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 288,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpbcast_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 288,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpbcast_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 289,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpbcast_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 289,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpbcast_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 290,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpccadd_b_C3,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 290,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpccadd_b_P0S_P1,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 291,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphadd_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 291,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphadd_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 292,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphadd_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 292,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphadd_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 293,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphadd_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 293,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphadd_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 294,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphaddu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 294,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphaddu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 295,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpnorm_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 295,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpnorm_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 296,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpnorm_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 296,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpnorm_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 297,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpldz_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 297,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpldz_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 298,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpldz_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 298,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpldz_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 299,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsz_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 299,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsz_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 300,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsz_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 300,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsz_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 301,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsz_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 301,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsz_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 640,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmov_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 640,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmov_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 373,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpfsftbs1_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 373,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbs1_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 374,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpfsftbs0_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 374,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbs0_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 376,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsel_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 376,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsel_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 367,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpackl_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 367,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpackl_w_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 368,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpackl_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 368,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpackl_h_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 369,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpackl_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 369,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpackl_b_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 370,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpacku_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 370,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpacku_w_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 371,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpacku_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 371,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpacku_h_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 372,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpacku_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 372,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpacku_b_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 386,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadd3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 386,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadd3_w_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 387,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadd3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 387,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadd3_h_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 388,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadd3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 388,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadd3_b_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 102,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_c0nop_P0_P0S,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 202,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmoviu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 632,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovh_rn_crm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 632,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovh_rn_crm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 633,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovh_crn_rm,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 633,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovh_crn_rm_p0,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 634,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovc_rn_ccrm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 } },
+    4 },
+  { 634,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovc_rn_ccrm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 } },
+    4 },
+  { 635,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovc_ccrn_rm,
+    2,
+    0,
+    { 0, 1 },
+    { { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 635,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovc_ccrn_rm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 636,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmov_rn_crm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 636,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmov_rn_crm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 637,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmov_crn_rm,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 637,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmov_crn_rm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 389,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsrv,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 390,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jsrv,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 391,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_synccp,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 392,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpaf,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 393,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpat,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 394,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpne,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 395,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpeq,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 396,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 397,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 398,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 399,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 400,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 401,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 402,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 403,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 404,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 405,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 406,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 407,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 408,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 409,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 410,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 411,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 412,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 413,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 414,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 415,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 416,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 417,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 418,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 419,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 420,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 421,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 422,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 423,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 424,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpi,
+    2,
+    0,
+    { 0, 1, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 425,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpi,
+    2,
+    0,
+    { 1, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 426,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpi,
+    2,
+    0,
+    { 0, 1, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 427,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpi,
+    2,
+    0,
+    { 1, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 428,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 429,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 430,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 431,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 432,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ssubu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 433,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_saddu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 434,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ssub,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 435,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sadd,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 436,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_clipu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 437,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_clip,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 438,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maxu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 439,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_minu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 440,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_max,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 441,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_min,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 442,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ave,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 443,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_abs,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 444,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldz,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 445,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dbreak,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 446,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dret,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 447,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_divu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 448,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_div,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 449,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maddru,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 450,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maddr,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 451,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maddu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 452,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_madd,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 453,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mulru,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 454,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mulr,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 455,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mulu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 456,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mul,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 457,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cache,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 458,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_tas,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 459,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_btstm,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 460,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bnotm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 461,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bclrm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 462,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsetm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 463,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldcb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 464,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stcb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 465,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_syncm,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 466,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_break,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 467,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swi,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 468,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sleep,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 469,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_halt,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 470,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_reti,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 471,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ei,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 472,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_di,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 473,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 32, 16, cgen_regnum_operand_type_DEFAULT, 0 } },
+    2 },
+  { 474,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc_lo,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 475,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc_hi,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 476,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc_lp,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 477,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc,
+    2,
+    0,
+    { 1, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 32, 16, cgen_regnum_operand_type_DEFAULT, 0 } },
+    2 },
+  { 478,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc_lo,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 479,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc_hi,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 480,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc_lp,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 481,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_erepeat,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 482,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_repeat,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 483,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ret,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 484,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jsr,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 485,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jmp24,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 486,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jmp,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 488,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsr12,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 487,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsr24,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 489,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bne,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 490,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_beq,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 491,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bgei,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 492,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_blti,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 493,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bnei,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 494,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_beqi,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 495,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bnez,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 496,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_beqz,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 497,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bra,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 498,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_fsft,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 499,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sll3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 500,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slli,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 501,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_srli,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 502,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_srai,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 503,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sll,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 504,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_srl,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 505,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sra,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 506,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_xor3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 507,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_and3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 508,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_or3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 509,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_nor,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 510,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_xor,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 511,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_and,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 512,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_or,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 513,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sltu3x,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 514,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slt3x,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 515,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add3x,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 516,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sl2ad3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 517,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sl1ad3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 518,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sltu3i,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 519,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slt3i,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 520,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sltu3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 521,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slt3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 522,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_neg,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 523,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbvck3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 524,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sub,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 525,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_advck3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 526,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add3i,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 527,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 528,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 529,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movh,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 530,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movu16,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 531,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movu24,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 533,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movi8,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 532,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movi16,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 534,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mov,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 535,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ssarb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 536,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_extuh,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 537,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_extub,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 538,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_exth,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 539,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_extb,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 540,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw24,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 541,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw24,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 542,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhu16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 543,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbu16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 544,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 545,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lh16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 546,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lb16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 547,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 548,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sh16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 549,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sb16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 550,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhu_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 551,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbu_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 552,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 553,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lh_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 554,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lb_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 555,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 556,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sh_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 557,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sb_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 558,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw_sp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 559,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw_sp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 560,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 561,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 562,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 563,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lh,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 564,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 565,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 566,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sh,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 567,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 568,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dsp1,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 569,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dsp0,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 570,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dsp,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 571,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_uci,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 572,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 573,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 574,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 575,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 576,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 577,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 578,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 579,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 580,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 581,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 582,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 583,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 584,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_casw3,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 585,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cash3,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 586,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_casb3,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 587,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_prefd,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 588,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_pref,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 589,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldcb_r,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 590,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stcb_r,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 }
+};
+#endif
diff --git a/gcc-4.9/gcc/config/mep/mep-ivc2.cpu b/gcc-4.9/gcc/config/mep/mep-ivc2.cpu
new file mode 100644
index 000000000..dc895a369
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-ivc2.cpu
@@ -0,0 +1,9775 @@
+; Toshiba MeP IVC2 Coprocessor description.  -*- scheme -*-
+; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;------------------------------------------------------------------------------
+; MeP-Integrator will redefine the isa pmacros below to allow the bit widths
+; specified below for each ME_MODULE using this coprocessor.
+; IVC2 uses the isas as follows:
+;   C3    32
+;   P0S   16
+;   P0    48
+;   P1    64
+;------------------------------------------------------------------------------
+; begin-isas
+(define-pmacro ivc2-core-isa () (ISA ext_core1))
+(define-pmacro ivc2-16-isa   () (ISA ext_cop1_16))
+(define-pmacro ivc2-32-isa   () (ISA ext_cop1_32))
+(define-pmacro ivc2-48-isa   () (ISA ext_cop1_48))
+(define-pmacro ivc2-64-isa   () (ISA ext_cop1_64))
+(define-pmacro all-ivc2-isas () (ISA ext_core1,ext_cop1_16,ext_cop1_32,ext_cop1_48,ext_cop1_64))
+(define-pmacro ivc2-p0s-isa  () (ISA ext_cop1_16))
+(define-pmacro ivc2-p0-isa  () (ISA ext_cop1_48))
+(define-pmacro ivc2-p0s-p0-isa  () (ISA ext_cop1_16,ext_cop1_48))
+(define-pmacro ivc2-p1-isa  () (ISA ext_cop1_64))
+(define-pmacro ivc2-p0s-p1-isa  () (ISA ext_cop1_16,ext_cop1_64))
+(define-pmacro ivc2-p0-p1-isa  () (ISA ext_cop1_48,ext_cop1_64))
+(define-pmacro ivc2-p0s-p0-p1-isa  () (ISA ext_cop1_16,ext_cop1_48,ext_cop1_64))
+(define-pmacro ivc2-c3-isa  () (ISA ext_cop1_32))
+; end-isas
+
+; register definitions
+; ---------------------
+; NOTE: This exists solely for the purpose of providing the proper register names for this coprocessor.
+; GDB will use the hardware table generated from this declaration. The operands use h-cr
+; from mep-core.cpu so that SID's semantic trace will be consistent between
+; the core and the coprocessor but use parse/print handlers which reference the hardware table
+; generated from this declarations
+(define-hardware
+  (name h-cr-ivc2)
+  (comment "64-bit coprocessor registers for ivc2 coprocessor")
+  (attrs VIRTUAL all-mep-core-isas (CDATA CP_DATA_BUS_INT))
+  (type register DI (64))
+  (set (index newval) (c-call VOID "h_cr64_set" index newval))
+  (get (index) (c-call DI "h_cr64_get" index))
+  (indices keyword "$c" (.map -reg-pair (.iota 8)))
+)
+
+; NOTE: This exists solely for the purpose of providing the proper register names for this coprocessor.
+; GDB will use the hardware table generated from this declaration. The operands use h-ccr
+; from mep-core.cpu so that SID's semantic trace will be consistent between
+; the core and the coprocessor but use parse/print handlers which reference the hardware table
+; generated from this declarations
+(define-hardware
+  (name h-ccr-ivc2)
+  (comment "Coprocessor control registers for ivc2 coprocessor")
+  (attrs VIRTUAL all-mep-isas)
+  (type register SI (32))
+  (set (index newval) (c-call VOID "h_ccr_set" index newval))
+  (get (index) (c-call SI "h_ccr_get" index))
+  (indices keyword ""
+	(.splice
+
+	 ($csar0 0)
+	 ($cc 1)
+	 ($cofr0 4)
+	 ($cofr1 5)
+	 ($cofa0 6)
+	 ($cofa1 7)
+
+	 ($csar1 15)
+
+	 ($acc0_0 16)
+	 ($acc0_1 17)
+	 ($acc0_2 18)
+	 ($acc0_3 19)
+	 ($acc0_4 20)
+	 ($acc0_5 21)
+	 ($acc0_6 22)
+	 ($acc0_7 23)
+
+	 ($acc1_0 24)
+	 ($acc1_1 25)
+	 ($acc1_2 26)
+	 ($acc1_3 27)
+	 ($acc1_4 28)
+	 ($acc1_5 29)
+	 ($acc1_6 30)
+	 ($acc1_7 31)
+	 (.unsplice (.map -ccr-reg-pair (.iota 32)))
+        )
+  )
+)
+
+(define-attr
+  (type bitset)
+  (for insn)
+  (name SLOTS)
+  (comment "slots for which this opcode is valid - c3, p0s, p0, p1")
+  (values CORE C3 P0S P0 P1)
+  (default CORE)
+  )
+
+;-----------------------------------------------------------------------------
+; macros for standard opcodes for each slot type
+
+; C3
+(dnf f-ivc2-2u4 "sub opcode field" (all-mep-isas) 4 2)
+(dnf f-ivc2-3u4 "sub opcode field" (all-mep-isas) 4 3)
+(dnf f-ivc2-8u4 "sub opcode field" (all-mep-isas) 4 8)
+(df  f-ivc2-8s4 "sub opcode field" (all-mep-isas) 4 8 INT #f #f)
+(dnf f-ivc2-1u6 "sub opcode field" (all-mep-isas) 6 1)
+(dnf f-ivc2-2u6 "sub opcode field" (all-mep-isas) 6 2)
+(dnf f-ivc2-3u6 "sub opcode field" (all-mep-isas) 6 3)
+(dnf f-ivc2-6u6 "sub opcode field" (all-mep-isas) 6 6)
+(dnf f-ivc2-5u7 "sub opcode field" (all-mep-isas) 7 5)
+(dnf f-ivc2-4u8 "sub opcode field" (all-mep-isas) 8 4)
+(dnf f-ivc2-3u9 "sub opcode field" (all-mep-isas) 9 3)
+(dnf f-ivc2-5u16 "sub opcode field" (all-mep-isas) 16 5)
+(dnf f-ivc2-5u21 "sub opcode field" (all-mep-isas) 21 5)
+(dnf f-ivc2-5u26 "sub opcode field" (all-mep-isas) 26 5)
+(dnf f-ivc2-1u31 "sub opcode field" (all-mep-isas) 31 1)
+
+(dnf f-ivc2-4u16 "sub opcode field" (all-mep-isas) 16 4)
+(dnf f-ivc2-4u20 "sub opcode field" (all-mep-isas) 20 4)
+(dnf f-ivc2-4u24 "sub opcode field" (all-mep-isas) 24 4)
+(dnf f-ivc2-4u28 "sub opcode field" (all-mep-isas) 28 4)
+
+; P0S/P0/P1
+(dnf f-ivc2-2u0 "sub opcode field" (all-mep-isas) 0 2)
+(dnf f-ivc2-3u0 "sub opcode field" (all-mep-isas) 0 3)
+(dnf f-ivc2-4u0 "sub opcode field" (all-mep-isas) 0 4)
+(dnf f-ivc2-5u0 "sub opcode field" (all-mep-isas) 0 5)
+(dnf f-ivc2-8u0 "sub opcode field" (all-mep-isas) 0 8)
+(df  f-ivc2-8s0 "sub opcode field" (all-mep-isas) 0 8 INT #f #f)
+(dnf f-ivc2-6u2 "sub opcode field" (all-mep-isas) 2 6)
+(dnf f-ivc2-5u3 "sub opcode field" (all-mep-isas) 3 5)
+(dnf f-ivc2-4u4 "sub opcode field" (all-mep-isas) 4 4)
+(dnf f-ivc2-3u5 "sub opcode field" (all-mep-isas) 5 3)
+(dnf f-ivc2-5u8 "sub opcode field" (all-mep-isas) 8 5)
+(dnf f-ivc2-4u10 "sub opcode field" (all-mep-isas) 10 4)
+(dnf f-ivc2-3u12 "sub opcode field" (all-mep-isas) 12 3)
+(dnf f-ivc2-5u13 "sub opcode field" (all-mep-isas) 13 5)
+(dnf f-ivc2-2u18 "sub opcode field" (all-mep-isas) 18 2)
+(dnf f-ivc2-5u18 "sub opcode field" (all-mep-isas) 18 5)
+(dnf f-ivc2-8u20 "sub opcode field" (all-mep-isas) 20 8)
+(df  f-ivc2-8s20 "sub opcode field" (all-mep-isas) 20 8 INT #f #f)
+(dnf f-ivc2-5u23 "sub opcode field" (all-mep-isas) 23 5)
+(dnf f-ivc2-2u23 "sub opcode field" (all-mep-isas) 23 2)
+(dnf f-ivc2-3u25 "sub opcode field" (all-mep-isas) 25 3)
+
+(dnmf f-ivc2-imm16p0 "16-bit immediate in P0/P1" (all-mep-isas) UINT
+      (f-ivc2-8u0 f-ivc2-8u20)
+      (sequence () ; insert
+		(set (ifield f-ivc2-8u0) (and (srl (ifield f-ivc2-imm16p0) 8) #xff))
+		(set (ifield f-ivc2-8u20) (and (ifield f-ivc2-imm16p0) #xff))
+		)
+      (sequence () ; extract
+		(set (ifield f-ivc2-imm16p0) (or (ifield f-ivc2-8u20)
+						 (sll (ifield f-ivc2-8u0) 8)))
+		)
+      )
+
+(dnmf f-ivc2-simm16p0 "16-bit immediate in P0/P1" (all-mep-isas) INT
+      (f-ivc2-8u0 f-ivc2-8u20)
+      (sequence () ; insert
+		(set (ifield f-ivc2-8u0) (and (srl (ifield f-ivc2-simm16p0) 8) #xff))
+		(set (ifield f-ivc2-8u20) (and (ifield f-ivc2-simm16p0) #xff))
+		)
+      (sequence () ; extract
+		(set (ifield f-ivc2-simm16p0) (or (ifield f-ivc2-8u20)
+						  (sll (ifield f-ivc2-8u0) 8)))
+		)
+      )
+
+(dnop ivc2_csar0  "ivc2_csar0" (all-ivc2-isas) h-ccr-ivc2 0)
+(dnop ivc2_cc     "ivc2_cc"    (all-ivc2-isas) h-ccr-ivc2 1)
+(dnop ivc2_cofr0  "ivc2_cofr0" (all-ivc2-isas) h-ccr-ivc2 4)
+(dnop ivc2_cofr1  "ivc2_cofr1" (all-ivc2-isas) h-ccr-ivc2 5)
+(dnop ivc2_cofa0  "ivc2_cofa0" (all-ivc2-isas) h-ccr-ivc2 6)
+(dnop ivc2_cofa1  "ivc2_cofa1" (all-ivc2-isas) h-ccr-ivc2 7)
+
+(dnop ivc2_csar1  "ivc2_csar1" (all-ivc2-isas) h-ccr-ivc2 15)
+
+(dnop ivc2_acc0_0      "acc0_0"     (all-ivc2-isas) h-ccr-ivc2 16)
+(dnop ivc2_acc0_1      "acc0_1"     (all-ivc2-isas) h-ccr-ivc2 17)
+(dnop ivc2_acc0_2      "acc0_2"     (all-ivc2-isas) h-ccr-ivc2 18)
+(dnop ivc2_acc0_3      "acc0_3"     (all-ivc2-isas) h-ccr-ivc2 19)
+(dnop ivc2_acc0_4      "acc0_4"     (all-ivc2-isas) h-ccr-ivc2 20)
+(dnop ivc2_acc0_5      "acc0_5"     (all-ivc2-isas) h-ccr-ivc2 21)
+(dnop ivc2_acc0_6      "acc0_6"     (all-ivc2-isas) h-ccr-ivc2 22)
+(dnop ivc2_acc0_7      "acc0_7"     (all-ivc2-isas) h-ccr-ivc2 23)
+
+(dnop ivc2_acc1_0      "acc1_0"     (all-ivc2-isas) h-ccr-ivc2 24)
+(dnop ivc2_acc1_1      "acc1_1"     (all-ivc2-isas) h-ccr-ivc2 25)
+(dnop ivc2_acc1_2      "acc1_2"     (all-ivc2-isas) h-ccr-ivc2 26)
+(dnop ivc2_acc1_3      "acc1_3"     (all-ivc2-isas) h-ccr-ivc2 27)
+(dnop ivc2_acc1_4      "acc1_4"     (all-ivc2-isas) h-ccr-ivc2 28)
+(dnop ivc2_acc1_5      "acc1_5"     (all-ivc2-isas) h-ccr-ivc2 29)
+(dnop ivc2_acc1_6      "acc1_6"     (all-ivc2-isas) h-ccr-ivc2 30)
+(dnop ivc2_acc1_7      "acc1_7"     (all-ivc2-isas) h-ccr-ivc2 31)
+
+(dnop croc "$CRo C3" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u7)
+(dnop crqc "$CRq C3" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u21)
+(dnop crpc "$CRp C3" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u26)
+
+(dnop ivc-x-6-1 "filler" (all-mep-isas) h-uint f-ivc2-1u6)
+(dnop ivc-x-6-2 "filler" (all-mep-isas) h-uint f-ivc2-2u6)
+(dnop ivc-x-6-3 "filler" (all-mep-isas) h-uint f-ivc2-3u6)
+
+
+(dnop imm3p4 "Imm3p4" (all-mep-isas) h-uint f-ivc2-3u4)
+(dnop imm3p9 "Imm3p9" (all-mep-isas) h-uint f-ivc2-3u9)
+(dnop imm4p8 "Imm4p8" (all-mep-isas) h-uint f-ivc2-4u8)
+(dnop imm5p7 "Imm5p7" (all-mep-isas) h-uint f-ivc2-5u7)
+(dnop imm6p6 "Imm6p6" (all-mep-isas) h-uint f-ivc2-6u6)
+(dnop imm8p4 "Imm8p4" (all-mep-isas) h-uint f-ivc2-8u4)
+(dnop simm8p4 "sImm8p4" (all-mep-isas) h-sint f-ivc2-8s4)
+
+(dnop imm3p5  "Imm3p5"  (all-mep-isas) h-uint f-ivc2-3u5)
+(dnop imm3p12 "Imm3p12" (all-mep-isas) h-uint f-ivc2-3u12)
+(dnop imm4p4  "Imm4p4"  (all-mep-isas) h-uint f-ivc2-4u4)
+(dnop imm4p10 "Imm4p10" (all-mep-isas) h-uint f-ivc2-4u10)
+(dnop imm5p8  "Imm5p8"  (all-mep-isas) h-uint f-ivc2-5u8)
+(dnop imm5p3  "Imm5p3"  (all-mep-isas) h-uint f-ivc2-5u3)
+(dnop imm6p2  "Imm6p2"  (all-mep-isas) h-uint f-ivc2-6u2)
+(dnop imm5p23 "Imm5p23" (all-mep-isas) h-uint f-ivc2-5u23)
+(dnop imm3p25 "Imm3p25" (all-mep-isas) h-uint f-ivc2-3u25)
+(dnop imm8p0  "Imm8p0"  (all-mep-isas) h-uint f-ivc2-8u0)
+(dnop simm8p0 "sImm8p0" (all-mep-isas) h-sint f-ivc2-8s0)
+(dnop simm8p20 "sImm8p20" (all-mep-isas) h-sint f-ivc2-8s20)
+(dnop imm8p20 "Imm8p20" (all-mep-isas) h-uint f-ivc2-8u20)
+
+(dnop crop "$CRo Pn" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u23)
+(dnop crqp "$CRq Pn" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u13)
+(dnop crpp "$CRp Pn" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u18)
+
+(dnop ivc-x-0-2 "filler" (all-mep-isas) h-uint f-ivc2-2u0)
+(dnop ivc-x-0-3 "filler" (all-mep-isas) h-uint f-ivc2-3u0)
+(dnop ivc-x-0-4 "filler" (all-mep-isas) h-uint f-ivc2-4u0)
+(dnop ivc-x-0-5 "filler" (all-mep-isas) h-uint f-ivc2-5u0)
+
+(dpop imm16p0 "Imm16p0" (all-mep-isas) h-uint f-ivc2-imm16p0 "unsigned16_range")
+(dpop simm16p0 "sImm16p0" (all-mep-isas) h-sint f-ivc2-simm16p0 "signed16_range")
+
+
+(df f-ivc2-ccrn-c3hi "ccrn hi  2u28" (all-mep-isas) 28 2 UINT #f #f)
+(df f-ivc2-ccrn-c3lo "ccrn lo  4u4"  (all-mep-isas)  4 4 UINT #f #f)
+
+(df f-ivc2-crn     "ivc2 crn"      (all-mep-isas)  0 4 UINT #f #f)
+(df f-ivc2-crm     "ivc2 crm"      (all-mep-isas)  4 4 UINT #f #f)
+(df f-ivc2-ccrn-h1 "ccrx hi 1u20"  (all-mep-isas) 20 1 UINT #f #f)
+(df f-ivc2-ccrn-h2 "ccrx hi 2u20"  (all-mep-isas) 20 2 UINT #f #f)
+(df f-ivc2-ccrn-lo "ccrx lo 4u0"   (all-mep-isas)  0 4 UINT #f #f)
+(df f-ivc2-cmov1   "ivc2 cmov op1" (all-mep-isas) 8 12 UINT #f #f)
+(df f-ivc2-cmov2   "ivc2 cmov op2" (all-mep-isas) 22 6 UINT #f #f)
+(df f-ivc2-cmov3   "ivc2 cmov op2" (all-mep-isas) 28 4 UINT #f #f)
+
+(define-multi-ifield
+  (name f-ivc2-ccrn-c3)
+  (comment "Coprocessor register number field")
+  (attrs all-mep-isas)
+  (mode UINT)
+  (subfields f-ivc2-ccrn-c3hi f-ivc2-ccrn-c3lo)
+  (insert (sequence ()
+		    (set (ifield f-ivc2-ccrn-c3hi)  (and (srl (ifield f-ivc2-ccrn-c3) 4) #x3))
+		    (set (ifield f-ivc2-ccrn-c3lo)  (and (ifield f-ivc2-ccrn-c3) #xf))))
+  (extract (set (ifield f-ivc2-ccrn-c3)
+		(or (sll (ifield f-ivc2-ccrn-c3hi) 4)
+		    (ifield f-ivc2-ccrn-c3lo))))
+  )
+
+(define-multi-ifield
+  (name f-ivc2-ccrn)
+  (comment "Coprocessor control register number field")
+  (attrs all-mep-isas)
+  (mode UINT)
+  (subfields f-ivc2-ccrn-h2 f-ivc2-ccrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-ivc2-ccrn-h2)  (and (srl (ifield f-ivc2-ccrn) 4) #x3))
+		    (set (ifield f-ivc2-ccrn-lo)  (and (ifield f-ivc2-ccrn) #xf))))
+  (extract (set (ifield f-ivc2-ccrn)
+		(or (sll (ifield f-ivc2-ccrn-h2) 4)
+		    (ifield f-ivc2-ccrn-lo))))
+  )
+
+(define-multi-ifield
+  (name f-ivc2-crnx)
+  (comment "Coprocessor register number field")
+  (attrs all-mep-isas)
+  (mode UINT)
+  (subfields f-ivc2-ccrn-h1 f-ivc2-ccrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-ivc2-ccrn-h1)  (and (srl (ifield f-ivc2-crnx) 4) #x1))
+		    (set (ifield f-ivc2-ccrn-lo)  (and (ifield f-ivc2-crnx) #xf))))
+  (extract (set (ifield f-ivc2-crnx)
+		(or (sll (ifield f-ivc2-ccrn-h1) 4)
+		    (ifield f-ivc2-ccrn-lo))))
+  )
+
+(dnop ivc2rm   "reg Rm"                  (all-mep-isas) h-gpr  f-ivc2-crm)
+(dnop ivc2crn  "copro Rn (0-31, 64-bit"  (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-crnx)
+(dnop ivc2ccrn "copro control reg CCRn"  (all-mep-isas (CDATA REGNUM)) h-ccr-ivc2  f-ivc2-ccrn)
+(dnop ivc2c3ccrn "copro control reg CCRn"  (all-mep-isas (CDATA REGNUM)) h-ccr-ivc2  f-ivc2-ccrn-c3)
+
+; [--][--] [--][--] [--][--] [--]
+; 0----+-- --1----+ ----2--- -+--
+; 01234567 89012345 67890123 4567
+
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N000   cmov =crn,rm
+(dni cmov-crn-rm
+     "cmov CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmov1"))
+     "cmov $crnx64,$rm"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 0))
+     (set crnx64 (or (zext DI rm) (and DI crnx64 #xffffffff00000000)))
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N001   cmov =rm,crn
+(dni cmov-rn-crm
+     "cmov Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmov2"))
+     "cmov $rm,$crnx64"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 1))
+     (set rm crnx64)
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N000   cmov =crn,rm
+(dni cmovc-ccrn-rm
+     "cmovc CCRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovc1"))
+     "cmovc $ivc2c3ccrn,$rm"
+     (+ MAJ_15 ivc2c3ccrn rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-30 1) (f-31 0))
+     (set ivc2c3ccrn rm)
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N001   cmov =rm,crn
+(dni cmovc-rn-ccrm
+     "cmovc Rm,CCRn"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovc2"))
+     "cmovc $rm,$ivc2c3ccrn"
+     (+ MAJ_15 ivc2c3ccrn rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-30 1) (f-31 1))
+     (set rm ivc2c3ccrn)
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N000   cmov =crn,rm
+(dni cmovh-crn-rm
+     "cmovh CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovh1"))
+     "cmovh $crnx64,$rm"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 1) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 0))
+     (set crnx64 (or (sll (zext DI rm) 32) (and DI crnx64 #xffffffff)))
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N001   cmov =rm,crn
+(dni cmovh-rn-crm
+     "cmovh Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovh2"))
+     "cmovh $rm,$crnx64"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 1) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 1))
+     (set rm (srl crnx64 32))
+     ()
+)
+
+; nnnnmmmm 11110000 0000N000 0000	cmov =crn,rm
+(dni cmov-crn-rm-p0
+     "cmov CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmov1"))
+     "cmov $ivc2crn,$ivc2rm"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf00) (f-21 0) (f-ivc2-cmov2 #x00) (f-ivc2-cmov3 0))
+     (set ivc2crn ivc2rm)
+     ()
+)
+
+; nnnnmmmm 11110000 0000N001 0000	cmov =rm,crn
+(dni cmov-rn-crm-p0
+     "cmov Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmov2"))
+     "cmov $ivc2rm,$ivc2crn"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf00) (f-21 0) (f-ivc2-cmov2 #x10) (f-ivc2-cmov3 0))
+     (set ivc2rm ivc2crn)
+     ()
+)
+
+; nnnnmmmm 11110000 0000NN10 0000	cmovc =ccrn,rm
+(dni cmovc-ccrn-rm-p0
+     "cmovc CCRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovc1"))
+     "cmovc $ivc2ccrn,$ivc2rm"
+     (+ ivc2ccrn ivc2rm (f-ivc2-cmov1 #xf00) (f-ivc2-cmov2 #x20) (f-ivc2-cmov3 0))
+     (set ivc2ccrn ivc2rm)
+     ()
+)
+
+; nnnnmmmm 11110000 0000NN11 0000	cmovc =rm,ccrn
+(dni cmovc-rn-ccrm-p0
+     "cmovc Rm,CCRn"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovc2"))
+     "cmovc $ivc2rm,$ivc2ccrn"
+     (+ ivc2ccrn ivc2rm (f-ivc2-cmov1 #xf00) (f-ivc2-cmov2 #x30) (f-ivc2-cmov3 0))
+     (set ivc2rm ivc2ccrn)
+     ()
+)
+
+; nnnnmmmm 11110001 0000N000 0000	cmovh =crn,rm		
+(dni cmovh-crn-rm-p0
+     "cmovh CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovh1"))
+     "cmovh $ivc2crn,$ivc2rm"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf10) (f-21 0) (f-ivc2-cmov2 #x00) (f-ivc2-cmov3 0))
+     (set ivc2crn (or (sll (zext DI ivc2rm) 32) (and DI ivc2crn #xffffffff)))
+     ()
+)
+
+; nnnnmmmm 11110001 0000N001 0000	cmovh =rm,crn
+(dni cmovh-rn-crm-p0
+     "cmovh Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovh2"))
+     "cmovh $ivc2rm,$ivc2crn"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf10) (f-21 0) (f-ivc2-cmov2 #x10) (f-ivc2-cmov3 0))
+     (set ivc2rm (srl ivc2crn 32))
+     ()
+)
+
+
+; 1111 000 ooooo 0111 00000 qqqqq ppppp 0   cpadd3.b =croc,crqc,crpc (c3_1)
+(dni cpadd3_b_C3 "cpadd3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpadd3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpadd3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00000 qqqqq ppppp 0   cpadd3.h =croc,crqc,crpc (c3_1)
+(dni cpadd3_h_C3 "cpadd3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadd3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpadd3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpadd3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00000 qqqqq ppppp 0   cpadd3.w =croc,crqc,crpc (c3_1)
+(dni cpadd3_w_C3 "cpadd3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadd3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpadd3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpadd3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00000 qqqqq ppppp 0   cdadd3 =croc,crqc,crpc (c3_1)
+(dni cdadd3_C3 "cdadd3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdadd3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdadd3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdadd3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00000 qqqqq ppppp 0   cpsub3.b =croc,crqc,crpc (c3_1)
+(dni cpsub3_b_C3 "cpsub3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsub3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsub3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00000 qqqqq ppppp 0   cpsub3.h =croc,crqc,crpc (c3_1)
+(dni cpsub3_h_C3 "cpsub3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsub3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsub3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsub3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00000 qqqqq ppppp 0   cpsub3.w =croc,crqc,crpc (c3_1)
+(dni cpsub3_w_C3 "cpsub3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsub3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsub3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsub3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 00000 qqqqq ppppp 0   cdsub3 =croc,crqc,crpc (c3_1)
+(dni cdsub3_C3 "cdsub3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsub3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsub3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsub3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00001 qqqqq ppppp 0   cpand3 =croc,crqc,crpc (c3_1)
+(dni cpand3_C3 "cpand3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpand3") (CPTYPE VECT) (CRET FIRST))
+  "cpand3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpand3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00001 qqqqq ppppp 0   cpor3 =croc,crqc,crpc (c3_1)
+(dni cpor3_C3 "cpor3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpor3") (CPTYPE VECT) (CRET FIRST))
+  "cpor3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpor3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00001 qqqqq ppppp 0   cpnor3 =croc,crqc,crpc (c3_1)
+(dni cpnor3_C3 "cpnor3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpnor3") (CPTYPE VECT) (CRET FIRST))
+  "cpnor3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpnor3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00001 qqqqq ppppp 0   cpxor3 =croc,crqc,crpc (c3_1)
+(dni cpxor3_C3 "cpxor3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpxor3") (CPTYPE VECT) (CRET FIRST))
+  "cpxor3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpxor3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00001 qqqqq ppppp 0   cpsel =croc,crqc,crpc (c3_1)
+(dni cpsel_C3 "cpsel $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsel") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpsel $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsel" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 iii ooooo 0111 11101 qqqqq ppppp 0   cpfsftbi =croc,crqc,crpc,imm3p4 (c3_1)
+(dni cpfsftbi_C3 "cpfsftbi $croc,$crqc,$crpc,imm3p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpfsftbi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpfsftbi $croc,$crqc,$crpc,$imm3p4"
+  (+ MAJ_15 imm3p4 croc (f-sub4 7)
+	(f-ivc2-5u16 #x1d) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpfsftbi" pc crqc crpc imm3p4)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00001 qqqqq ppppp 0   cpfsftbs0 =croc,crqc,crpc (c3_1)
+(dni cpfsftbs0_C3 "cpfsftbs0 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpfsftbs0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs0 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpfsftbs0" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 00001 qqqqq ppppp 0   cpfsftbs1 =croc,crqc,crpc (c3_1)
+(dni cpfsftbs1_C3 "cpfsftbs1 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpfsftbs1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs1 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpfsftbs1" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00010 qqqqq ppppp 0   cpunpacku.b =croc,crqc,crpc (c3_1)
+(dni cpunpacku_b_C3 "cpunpacku.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpunpacku.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpacku_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00010 qqqqq ppppp 0   cpunpacku.h =croc,crqc,crpc (c3_1)
+(dni cpunpacku_h_C3 "cpunpacku.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpacku_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpunpacku.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpacku_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00010 qqqqq ppppp 0   cpunpacku.w =croc,crqc,crpc (c3_1)
+(dni cpunpacku_w_C3 "cpunpacku.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpacku_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpunpacku.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpacku_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00010 qqqqq ppppp 0   cpunpackl.b =croc,crqc,crpc (c3_1)
+(dni cpunpackl_b_C3 "cpunpackl.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpackl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpunpackl.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpackl_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00010 qqqqq ppppp 0   cpunpackl.h =croc,crqc,crpc (c3_1)
+(dni cpunpackl_h_C3 "cpunpackl.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpackl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpunpackl.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpackl_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00010 qqqqq ppppp 0   cpunpackl.w =croc,crqc,crpc (c3_1)
+(dni cpunpackl_w_C3 "cpunpackl.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpackl_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpunpackl.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpackl_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00011 qqqqq ppppp 0   cppacku.b =croc,crqc,crpc (c3_1)
+(dni cppacku_b_C3 "cppacku.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cppacku.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x3) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppacku_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00011 qqqqq ppppp 0   cppack.b =croc,crqc,crpc (c3_1)
+(dni cppack_b_C3 "cppack.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppack_b") (CPTYPE V8QI) (CRET FIRST))
+  "cppack.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x3) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppack_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 00011 qqqqq ppppp 0   cppack.h =croc,crqc,crpc (c3_1)
+(dni cppack_h_C3 "cppack.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppack_h") (CPTYPE V4HI) (CRET FIRST))
+  "cppack.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x3) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppack_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 qqqqq ppppp 0   cpsrl3.b =croc,crqc,crpc (c3_1)
+(dni cpsrl3_b_C3 "cpsrl3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrl3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsrl3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00100 qqqqq ppppp 0   cpssrl3.b =croc,crqc,crpc (c3_1)
+(dni cpssrl3_b_C3 "cpssrl3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssrl3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssrl3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00100 qqqqq ppppp 0   cpsrl3.h =croc,crqc,crpc (c3_1)
+(dni cpsrl3_h_C3 "cpsrl3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrl3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsrl3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00100 qqqqq ppppp 0   cpssrl3.h =croc,crqc,crpc (c3_1)
+(dni cpssrl3_h_C3 "cpssrl3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssrl3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssrl3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00100 qqqqq ppppp 0   cpsrl3.w =croc,crqc,crpc (c3_1)
+(dni cpsrl3_w_C3 "cpsrl3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrl3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsrl3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00100 qqqqq ppppp 0   cpssrl3.w =croc,crqc,crpc (c3_1)
+(dni cpssrl3_w_C3 "cpssrl3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssrl3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssrl3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00100 qqqqq ppppp 0   cdsrl3 =croc,crqc,crpc (c3_1)
+(dni cdsrl3_C3 "cdsrl3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsrl3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrl3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsrl3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00101 qqqqq ppppp 0   cpsra3.b =croc,crqc,crpc (c3_1)
+(dni cpsra3_b_C3 "cpsra3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsra3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsra3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00101 qqqqq ppppp 0   cpssra3.b =croc,crqc,crpc (c3_1)
+(dni cpssra3_b_C3 "cpssra3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssra3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssra3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00101 qqqqq ppppp 0   cpsra3.h =croc,crqc,crpc (c3_1)
+(dni cpsra3_h_C3 "cpsra3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsra3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsra3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00101 qqqqq ppppp 0   cpssra3.h =croc,crqc,crpc (c3_1)
+(dni cpssra3_h_C3 "cpssra3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssra3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssra3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00101 qqqqq ppppp 0   cpsra3.w =croc,crqc,crpc (c3_1)
+(dni cpsra3_w_C3 "cpsra3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsra3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsra3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00101 qqqqq ppppp 0   cpssra3.w =croc,crqc,crpc (c3_1)
+(dni cpssra3_w_C3 "cpssra3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssra3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssra3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00101 qqqqq ppppp 0   cdsra3 =croc,crqc,crpc (c3_1)
+(dni cdsra3_C3 "cdsra3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsra3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsra3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsra3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00110 qqqqq ppppp 0   cpsll3.b =croc,crqc,crpc (c3_1)
+(dni cpsll3_b_C3 "cpsll3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsll3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsll3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00110 qqqqq ppppp 0   cpssll3.b =croc,crqc,crpc (c3_1)
+(dni cpssll3_b_C3 "cpssll3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssll3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssll3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00110 qqqqq ppppp 0   cpsll3.h =croc,crqc,crpc (c3_1)
+(dni cpsll3_h_C3 "cpsll3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsll3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsll3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00110 qqqqq ppppp 0   cpssll3.h =croc,crqc,crpc (c3_1)
+(dni cpssll3_h_C3 "cpssll3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssll3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssll3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00110 qqqqq ppppp 0   cpsll3.w =croc,crqc,crpc (c3_1)
+(dni cpsll3_w_C3 "cpsll3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsll3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsll3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00110 qqqqq ppppp 0   cpssll3.w =croc,crqc,crpc (c3_1)
+(dni cpssll3_w_C3 "cpssll3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssll3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssll3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00110 qqqqq ppppp 0   cdsll3 =croc,crqc,crpc (c3_1)
+(dni cdsll3_C3 "cdsll3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsll3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsll3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsll3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00111 qqqqq ppppp 0   cpsla3.h =croc,crqc,crpc (c3_1)
+(dni cpsla3_h_C3 "cpsla3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsla3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsla3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x7) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsla3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00111 qqqqq ppppp 0   cpsla3.w =croc,crqc,crpc (c3_1)
+(dni cpsla3_w_C3 "cpsla3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsla3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsla3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x7) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsla3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01000 qqqqq ppppp 0   cpsadd3.h =croc,crqc,crpc (c3_1)
+(dni cpsadd3_h_C3 "cpsadd3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadd3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsadd3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsadd3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01000 qqqqq ppppp 0   cpsadd3.w =croc,crqc,crpc (c3_1)
+(dni cpsadd3_w_C3 "cpsadd3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadd3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsadd3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsadd3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 01000 qqqqq ppppp 0   cpssub3.h =croc,crqc,crpc (c3_1)
+(dni cpssub3_h_C3 "cpssub3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssub3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpssub3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set croc (c-call DI "ivc2_cpssub3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 01000 qqqqq ppppp 0   cpssub3.w =croc,crqc,crpc (c3_1)
+(dni cpssub3_w_C3 "cpssub3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssub3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpssub3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set croc (c-call DI "ivc2_cpssub3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01001 qqqqq ppppp 0   cpextuaddu3.b =croc,crqc,crpc (c3_1)
+(dni cpextuaddu3_b_C3 "cpextuaddu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuaddu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuaddu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuaddu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01001 qqqqq ppppp 0   cpextuadd3.b =croc,crqc,crpc (c3_1)
+(dni cpextuadd3_b_C3 "cpextuadd3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuadd3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuadd3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01001 qqqqq ppppp 0   cpextladdu3.b =croc,crqc,crpc (c3_1)
+(dni cpextladdu3_b_C3 "cpextladdu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextladdu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladdu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextladdu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01001 qqqqq ppppp 0   cpextladd3.b =croc,crqc,crpc (c3_1)
+(dni cpextladd3_b_C3 "cpextladd3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextladd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladd3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextladd3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01001 qqqqq ppppp 0   cpextusubu3.b =croc,crqc,crpc (c3_1)
+(dni cpextusubu3_b_C3 "cpextusubu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextusubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusubu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextusubu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01001 qqqqq ppppp 0   cpextusub3.b =croc,crqc,crpc (c3_1)
+(dni cpextusub3_b_C3 "cpextusub3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextusub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusub3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextusub3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 01001 qqqqq ppppp 0   cpextlsubu3.b =croc,crqc,crpc (c3_1)
+(dni cpextlsubu3_b_C3 "cpextlsubu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlsubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsubu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlsubu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 01001 qqqqq ppppp 0   cpextlsub3.b =croc,crqc,crpc (c3_1)
+(dni cpextlsub3_b_C3 "cpextlsub3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsub3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlsub3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01010 qqqqq ppppp 0   cpaveu3.b =croc,crqc,crpc (c3_1)
+(dni cpaveu3_b_C3 "cpaveu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaveu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaveu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaveu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01010 qqqqq ppppp 0   cpave3.b =croc,crqc,crpc (c3_1)
+(dni cpave3_b_C3 "cpave3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpave3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpave3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpave3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01010 qqqqq ppppp 0   cpave3.h =croc,crqc,crpc (c3_1)
+(dni cpave3_h_C3 "cpave3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpave3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpave3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpave3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01010 qqqqq ppppp 0   cpave3.w =croc,crqc,crpc (c3_1)
+(dni cpave3_w_C3 "cpave3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpave3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpave3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpave3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01010 qqqqq ppppp 0   cpaddsru3.b =croc,crqc,crpc (c3_1)
+(dni cpaddsru3_b_C3 "cpaddsru3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsru3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsru3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsru3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01010 qqqqq ppppp 0   cpaddsr3.b =croc,crqc,crpc (c3_1)
+(dni cpaddsr3_b_C3 "cpaddsr3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsr3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsr3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsr3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 01010 qqqqq ppppp 0   cpaddsr3.h =croc,crqc,crpc (c3_1)
+(dni cpaddsr3_h_C3 "cpaddsr3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsr3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpaddsr3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsr3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 01010 qqqqq ppppp 0   cpaddsr3.w =croc,crqc,crpc (c3_1)
+(dni cpaddsr3_w_C3 "cpaddsr3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsr3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpaddsr3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsr3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01011 qqqqq ppppp 0   cpabsu3.b =croc,crqc,crpc (c3_1)
+(dni cpabsu3_b_C3 "cpabsu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xb) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01011 qqqqq ppppp 0   cpabs3.b =croc,crqc,crpc (c3_1)
+(dni cpabs3_b_C3 "cpabs3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabs3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabs3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xb) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabs3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01011 qqqqq ppppp 0   cpabs3.h =croc,crqc,crpc (c3_1)
+(dni cpabs3_h_C3 "cpabs3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabs3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabs3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #xb) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabs3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01100 qqqqq ppppp 0   cpmaxu3.b =croc,crqc,crpc (c3_1)
+(dni cpmaxu3_b_C3 "cpmaxu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmaxu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmaxu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmaxu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01100 qqqqq ppppp 0   cpmax3.b =croc,crqc,crpc (c3_1)
+(dni cpmax3_b_C3 "cpmax3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmax3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmax3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmax3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01100 qqqqq ppppp 0   cpmax3.h =croc,crqc,crpc (c3_1)
+(dni cpmax3_h_C3 "cpmax3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmax3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmax3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmax3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01100 qqqqq ppppp 0   cpmaxu3.w =croc,crqc,crpc (c3_1)
+(dni cpmaxu3_w_C3 "cpmaxu3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmaxu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmaxu3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmaxu3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01100 qqqqq ppppp 0   cpmax3.w =croc,crqc,crpc (c3_1)
+(dni cpmax3_w_C3 "cpmax3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmax3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmax3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmax3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01101 qqqqq ppppp 0   cpminu3.b =croc,crqc,crpc (c3_1)
+(dni cpminu3_b_C3 "cpminu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpminu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpminu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpminu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01101 qqqqq ppppp 0   cpmin3.b =croc,crqc,crpc (c3_1)
+(dni cpmin3_b_C3 "cpmin3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmin3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmin3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmin3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01101 qqqqq ppppp 0   cpmin3.h =croc,crqc,crpc (c3_1)
+(dni cpmin3_h_C3 "cpmin3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmin3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmin3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmin3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01101 qqqqq ppppp 0   cpminu3.w =croc,crqc,crpc (c3_1)
+(dni cpminu3_w_C3 "cpminu3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpminu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpminu3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpminu3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01101 qqqqq ppppp 0   cpmin3.w =croc,crqc,crpc (c3_1)
+(dni cpmin3_w_C3 "cpmin3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmin3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmin3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmin3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10000 00000 00000 0   cpmovfrcsar0 =croc (c3_1)
+(dni cpmovfrcsar0_C3 "cpmovfrcsar0 $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovfrcsar0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar0 $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x10) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovfrcsar0" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10000 00000 01111 0   cpmovfrcsar1 =croc (c3_1)
+(dni cpmovfrcsar1_C3 "cpmovfrcsar1 $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovfrcsar1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar1 $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x10) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #xf) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovfrcsar1" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10000 00000 00001 0   cpmovfrcc =croc (c3_1)
+(dni cpmovfrcc_C3 "cpmovfrcc $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovfrcc") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcc $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x10) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x1) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovfrcc" pc)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10000 qqqqq 10000 0   cpmovtocsar0 crqc (c3_1)
+(dni cpmovtocsar0_C3 "cpmovtocsar0 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovtocsar0") VOLATILE)
+  "cpmovtocsar0 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x10) crqc (f-ivc2-5u26 #x10) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar0 0)
+	(c-call "ivc2_cpmovtocsar0" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10000 qqqqq 11111 0   cpmovtocsar1 crqc (c3_1)
+(dni cpmovtocsar1_C3 "cpmovtocsar1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovtocsar1") VOLATILE)
+  "cpmovtocsar1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x10) crqc (f-ivc2-5u26 #x1f) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar1 0)
+	(c-call "ivc2_cpmovtocsar1" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10000 qqqqq 10001 0   cpmovtocc crqc (c3_1)
+(dni cpmovtocc_C3 "cpmovtocc $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovtocc") VOLATILE)
+  "cpmovtocc $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x10) crqc (f-ivc2-5u26 #x11) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpmovtocc" pc crqc) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00000 0   cpmov =croc,crqc (c3_1)
+(dni cpmov_C3 "cpmov $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmov") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpmov $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmov" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00001 0   cpabsz.b =croc,crqc (c3_1)
+(dni cpabsz_b_C3 "cpabsz.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsz_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsz.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsz_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00010 0   cpabsz.h =croc,crqc (c3_1)
+(dni cpabsz_h_C3 "cpabsz.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabsz.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x2) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsz_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00011 0   cpabsz.w =croc,crqc (c3_1)
+(dni cpabsz_w_C3 "cpabsz.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpabsz.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x3) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsz_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00100 0   cpldz.h =croc,crqc (c3_1)
+(dni cpldz_h_C3 "cpldz.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpldz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpldz.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x4) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpldz_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00101 0   cpldz.w =croc,crqc (c3_1)
+(dni cpldz_w_C3 "cpldz.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpldz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpldz.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x5) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpldz_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00110 0   cpnorm.h =croc,crqc (c3_1)
+(dni cpnorm_h_C3 "cpnorm.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpnorm_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpnorm.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x6) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpnorm_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00111 0   cpnorm.w =croc,crqc (c3_1)
+(dni cpnorm_w_C3 "cpnorm.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpnorm_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpnorm.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x7) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpnorm_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01000 0   cphaddu.b =croc,crqc (c3_1)
+(dni cphaddu_b_C3 "cphaddu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphaddu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cphaddu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x8) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphaddu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01001 0   cphadd.b =croc,crqc (c3_1)
+(dni cphadd_b_C3 "cphadd.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphadd_b") (CPTYPE V8QI) (CRET FIRST))
+  "cphadd.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x9) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphadd_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01010 0   cphadd.h =croc,crqc (c3_1)
+(dni cphadd_h_C3 "cphadd.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphadd_h") (CPTYPE V4HI) (CRET FIRST))
+  "cphadd.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xa) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphadd_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01011 0   cphadd.w =croc,crqc (c3_1)
+(dni cphadd_w_C3 "cphadd.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphadd_w") (CPTYPE V2SI) (CRET FIRST))
+  "cphadd.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xb) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphadd_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01100 0   cpccadd.b +crqc (c3_1)
+(dni cpccadd_b_C3 "cpccadd.b $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpccadd_b") (CPTYPE V8QI) (CRET FIRSTCOPY) VOLATILE)
+  "cpccadd.b $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xc) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpccadd_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01101 0   cpbcast.b =croc,crqc (c3_1)
+(dni cpbcast_b_C3 "cpbcast.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpbcast_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpbcast.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xd) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpbcast_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01110 0   cpbcast.h =croc,crqc (c3_1)
+(dni cpbcast_h_C3 "cpbcast.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpbcast_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpbcast.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xe) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpbcast_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01111 0   cpbcast.w =croc,crqc (c3_1)
+(dni cpbcast_w_C3 "cpbcast.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpbcast_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpbcast.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xf) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpbcast_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10000 0   cpextuu.b =croc,crqc (c3_1)
+(dni cpextuu_b_C3 "cpextuu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextuu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x10) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10001 0   cpextu.b =croc,crqc (c3_1)
+(dni cpextu_b_C3 "cpextu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x11) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10010 0   cpextuu.h =croc,crqc (c3_1)
+(dni cpextuu_h_C3 "cpextuu.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextuu.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x12) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuu_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10011 0   cpextu.h =croc,crqc (c3_1)
+(dni cpextu_h_C3 "cpextu.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextu.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x13) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextu_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10100 0   cpextlu.b =croc,crqc (c3_1)
+(dni cpextlu_b_C3 "cpextlu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextlu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x14) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10101 0   cpextl.b =croc,crqc (c3_1)
+(dni cpextl_b_C3 "cpextl.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextl.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x15) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextl_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10110 0   cpextlu.h =croc,crqc (c3_1)
+(dni cpextlu_h_C3 "cpextlu.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextlu.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x16) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlu_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10111 0   cpextl.h =croc,crqc (c3_1)
+(dni cpextl_h_C3 "cpextl.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpextl.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x17) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextl_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11000 0   cpcastub.h =croc,crqc (c3_1)
+(dni cpcastub_h_C3 "cpcastub.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastub_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastub.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x18) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastub_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11001 0   cpcastb.h =croc,crqc (c3_1)
+(dni cpcastb_h_C3 "cpcastb.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastb_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastb.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x19) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastb_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11100 0   cpcastub.w =croc,crqc (c3_1)
+(dni cpcastub_w_C3 "cpcastub.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastub_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastub.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1c) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastub_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11101 0   cpcastb.w =croc,crqc (c3_1)
+(dni cpcastb_w_C3 "cpcastb.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastb_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastb.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1d) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastb_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11110 0   cpcastuh.w =croc,crqc (c3_1)
+(dni cpcastuh_w_C3 "cpcastuh.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastuh_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastuh.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1e) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastuh_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11111 0   cpcasth.w =croc,crqc (c3_1)
+(dni cpcasth_w_C3 "cpcasth.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcasth_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcasth.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1f) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcasth_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11010 0   cdcastuw =croc,crqc (c3_1)
+(dni cdcastuw_C3 "cdcastuw $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdcastuw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastuw $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1a) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdcastuw" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11011 0   cdcastw =croc,crqc (c3_1)
+(dni cdcastw_C3 "cdcastw $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdcastw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastw $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1b) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdcastw" pc crqc)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10010 qqqqq ppppp 0   cpcmpeqz.b crqc,crpc (c3_1)
+(dni cpcmpeqz_b_C3 "cpcmpeqz.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeqz_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeqz.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeqz_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 10010 qqqqq ppppp 0   cpcmpeq.b crqc,crpc (c3_1)
+(dni cpcmpeq_b_C3 "cpcmpeq.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeq_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeq.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0011 0111 10010 qqqqq ppppp 0   cpcmpeq.h crqc,crpc (c3_1)
+(dni cpcmpeq_h_C3 "cpcmpeq.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeq_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpeq.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0101 0111 10010 qqqqq ppppp 0   cpcmpeq.w crqc,crpc (c3_1)
+(dni cpcmpeq_w_C3 "cpcmpeq.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeq_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpeq.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1001 0111 10010 qqqqq ppppp 0   cpcmpne.b crqc,crpc (c3_1)
+(dni cpcmpne_b_C3 "cpcmpne.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpne_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpne.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1011 0111 10010 qqqqq ppppp 0   cpcmpne.h crqc,crpc (c3_1)
+(dni cpcmpne_h_C3 "cpcmpne.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpne_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpne.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1101 0111 10010 qqqqq ppppp 0   cpcmpne.w crqc,crpc (c3_1)
+(dni cpcmpne_w_C3 "cpcmpne.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpne_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpne.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0000 0111 10010 qqqqq ppppp 0   cpcmpgtu.b crqc,crpc (c3_1)
+(dni cpcmpgtu_b_C3 "cpcmpgtu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgtu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgtu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0001 0111 10010 qqqqq ppppp 0   cpcmpgt.b crqc,crpc (c3_1)
+(dni cpcmpgt_b_C3 "cpcmpgt.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgt_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpgt.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0011 0111 10010 qqqqq ppppp 0   cpcmpgt.h crqc,crpc (c3_1)
+(dni cpcmpgt_h_C3 "cpcmpgt.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgt_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpgt.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0100 0111 10010 qqqqq ppppp 0   cpcmpgtu.w crqc,crpc (c3_1)
+(dni cpcmpgtu_w_C3 "cpcmpgtu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgtu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgtu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0101 0111 10010 qqqqq ppppp 0   cpcmpgt.w crqc,crpc (c3_1)
+(dni cpcmpgt_w_C3 "cpcmpgt.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgt_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpgt.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1000 0111 10010 qqqqq ppppp 0   cpcmpgeu.b crqc,crpc (c3_1)
+(dni cpcmpgeu_b_C3 "cpcmpgeu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgeu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgeu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x18) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1001 0111 10010 qqqqq ppppp 0   cpcmpge.b crqc,crpc (c3_1)
+(dni cpcmpge_b_C3 "cpcmpge.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpge_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpge.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x19) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1011 0111 10010 qqqqq ppppp 0   cpcmpge.h crqc,crpc (c3_1)
+(dni cpcmpge_h_C3 "cpcmpge.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpge_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpge.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1100 0111 10010 qqqqq ppppp 0   cpcmpgeu.w crqc,crpc (c3_1)
+(dni cpcmpgeu_w_C3 "cpcmpgeu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgeu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgeu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1101 0111 10010 qqqqq ppppp 0   cpcmpge.w crqc,crpc (c3_1)
+(dni cpcmpge_w_C3 "cpcmpge.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpge_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpge.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0001 0111 10010 qqqqq ppppp 0   cpacmpeq.b crqc,crpc (c3_1)
+(dni cpacmpeq_b_C3 "cpacmpeq.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpeq_b") (CPTYPE V8QI))
+  "cpacmpeq.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpeq_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0011 0111 10010 qqqqq ppppp 0   cpacmpeq.h crqc,crpc (c3_1)
+(dni cpacmpeq_h_C3 "cpacmpeq.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpeq_h") (CPTYPE V4HI))
+  "cpacmpeq.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpeq_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0101 0111 10010 qqqqq ppppp 0   cpacmpeq.w crqc,crpc (c3_1)
+(dni cpacmpeq_w_C3 "cpacmpeq.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpeq_w") (CPTYPE V2SI))
+  "cpacmpeq.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpeq_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 1001 0111 10010 qqqqq ppppp 0   cpacmpne.b crqc,crpc (c3_1)
+(dni cpacmpne_b_C3 "cpacmpne.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpne_b") (CPTYPE V8QI))
+  "cpacmpne.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpne_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 1011 0111 10010 qqqqq ppppp 0   cpacmpne.h crqc,crpc (c3_1)
+(dni cpacmpne_h_C3 "cpacmpne.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpne_h") (CPTYPE V4HI))
+  "cpacmpne.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpne_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 1101 0111 10010 qqqqq ppppp 0   cpacmpne.w crqc,crpc (c3_1)
+(dni cpacmpne_w_C3 "cpacmpne.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpne_w") (CPTYPE V2SI))
+  "cpacmpne.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpne_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0000 0111 10010 qqqqq ppppp 0   cpacmpgtu.b crqc,crpc (c3_1)
+(dni cpacmpgtu_b_C3 "cpacmpgtu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgtu_b") (CPTYPE V8UQI))
+  "cpacmpgtu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgtu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0001 0111 10010 qqqqq ppppp 0   cpacmpgt.b crqc,crpc (c3_1)
+(dni cpacmpgt_b_C3 "cpacmpgt.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgt_b") (CPTYPE V8QI))
+  "cpacmpgt.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgt_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0011 0111 10010 qqqqq ppppp 0   cpacmpgt.h crqc,crpc (c3_1)
+(dni cpacmpgt_h_C3 "cpacmpgt.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgt_h") (CPTYPE V4HI))
+  "cpacmpgt.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgt_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0100 0111 10010 qqqqq ppppp 0   cpacmpgtu.w crqc,crpc (c3_1)
+(dni cpacmpgtu_w_C3 "cpacmpgtu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgtu_w") (CPTYPE V2USI))
+  "cpacmpgtu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgtu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0101 0111 10010 qqqqq ppppp 0   cpacmpgt.w crqc,crpc (c3_1)
+(dni cpacmpgt_w_C3 "cpacmpgt.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgt_w") (CPTYPE V2SI))
+  "cpacmpgt.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgt_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1000 0111 10010 qqqqq ppppp 0   cpacmpgeu.b crqc,crpc (c3_1)
+(dni cpacmpgeu_b_C3 "cpacmpgeu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgeu_b") (CPTYPE V8UQI))
+  "cpacmpgeu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x18) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgeu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1001 0111 10010 qqqqq ppppp 0   cpacmpge.b crqc,crpc (c3_1)
+(dni cpacmpge_b_C3 "cpacmpge.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpge_b") (CPTYPE V8QI))
+  "cpacmpge.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x19) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpge_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1011 0111 10010 qqqqq ppppp 0   cpacmpge.h crqc,crpc (c3_1)
+(dni cpacmpge_h_C3 "cpacmpge.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpge_h") (CPTYPE V4HI))
+  "cpacmpge.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpge_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1100 0111 10010 qqqqq ppppp 0   cpacmpgeu.w crqc,crpc (c3_1)
+(dni cpacmpgeu_w_C3 "cpacmpgeu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgeu_w") (CPTYPE V2USI))
+  "cpacmpgeu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgeu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1101 0111 10010 qqqqq ppppp 0   cpacmpge.w crqc,crpc (c3_1)
+(dni cpacmpge_w_C3 "cpacmpge.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpge_w") (CPTYPE V2SI))
+  "cpacmpge.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpge_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 0001 0111 10010 qqqqq ppppp 0   cpocmpeq.b crqc,crpc (c3_1)
+(dni cpocmpeq_b_C3 "cpocmpeq.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpeq_b") (CPTYPE V8QI))
+  "cpocmpeq.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpeq_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 0011 0111 10010 qqqqq ppppp 0   cpocmpeq.h crqc,crpc (c3_1)
+(dni cpocmpeq_h_C3 "cpocmpeq.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpeq_h") (CPTYPE V4HI))
+  "cpocmpeq.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpeq_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 0101 0111 10010 qqqqq ppppp 0   cpocmpeq.w crqc,crpc (c3_1)
+(dni cpocmpeq_w_C3 "cpocmpeq.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpeq_w") (CPTYPE V2SI))
+  "cpocmpeq.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpeq_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1001 0111 10010 qqqqq ppppp 0   cpocmpne.b crqc,crpc (c3_1)
+(dni cpocmpne_b_C3 "cpocmpne.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpne_b") (CPTYPE V8QI))
+  "cpocmpne.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpne_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1011 0111 10010 qqqqq ppppp 0   cpocmpne.h crqc,crpc (c3_1)
+(dni cpocmpne_h_C3 "cpocmpne.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpne_h") (CPTYPE V4HI))
+  "cpocmpne.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpne_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1101 0111 10010 qqqqq ppppp 0   cpocmpne.w crqc,crpc (c3_1)
+(dni cpocmpne_w_C3 "cpocmpne.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpne_w") (CPTYPE V2SI))
+  "cpocmpne.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpne_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0000 0111 10010 qqqqq ppppp 0   cpocmpgtu.b crqc,crpc (c3_1)
+(dni cpocmpgtu_b_C3 "cpocmpgtu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgtu_b") (CPTYPE V8UQI))
+  "cpocmpgtu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgtu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0001 0111 10010 qqqqq ppppp 0   cpocmpgt.b crqc,crpc (c3_1)
+(dni cpocmpgt_b_C3 "cpocmpgt.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgt_b") (CPTYPE V8QI))
+  "cpocmpgt.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgt_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0011 0111 10010 qqqqq ppppp 0   cpocmpgt.h crqc,crpc (c3_1)
+(dni cpocmpgt_h_C3 "cpocmpgt.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgt_h") (CPTYPE V4HI))
+  "cpocmpgt.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgt_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0100 0111 10010 qqqqq ppppp 0   cpocmpgtu.w crqc,crpc (c3_1)
+(dni cpocmpgtu_w_C3 "cpocmpgtu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgtu_w") (CPTYPE V2USI))
+  "cpocmpgtu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgtu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0101 0111 10010 qqqqq ppppp 0   cpocmpgt.w crqc,crpc (c3_1)
+(dni cpocmpgt_w_C3 "cpocmpgt.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgt_w") (CPTYPE V2SI))
+  "cpocmpgt.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgt_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1000 0111 10010 qqqqq ppppp 0   cpocmpgeu.b crqc,crpc (c3_1)
+(dni cpocmpgeu_b_C3 "cpocmpgeu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgeu_b") (CPTYPE V8UQI))
+  "cpocmpgeu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x18) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgeu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1001 0111 10010 qqqqq ppppp 0   cpocmpge.b crqc,crpc (c3_1)
+(dni cpocmpge_b_C3 "cpocmpge.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpge_b") (CPTYPE V8QI))
+  "cpocmpge.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x19) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpge_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1011 0111 10010 qqqqq ppppp 0   cpocmpge.h crqc,crpc (c3_1)
+(dni cpocmpge_h_C3 "cpocmpge.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpge_h") (CPTYPE V4HI))
+  "cpocmpge.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpge_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1100 0111 10010 qqqqq ppppp 0   cpocmpgeu.w crqc,crpc (c3_1)
+(dni cpocmpgeu_w_C3 "cpocmpgeu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgeu_w") (CPTYPE V2USI))
+  "cpocmpgeu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgeu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1101 0111 10010 qqqqq ppppp 0   cpocmpge.w crqc,crpc (c3_1)
+(dni cpocmpge_w_C3 "cpocmpge.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpge_w") (CPTYPE V2SI))
+  "cpocmpge.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpge_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 00xx xiii 0111 10100 qqqqq ppppp 0   cpsrli3.b =crqc,crpc,imm3p9 (c3_imm)
+(dni cpsrli3_b_C3 "cpsrli3.b $crqc,$crpc,imm3p9 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrli3.b $crqc,$crpc,$imm3p9"
+  (+ MAJ_15 ivc-x-6-3 (f-ivc2-2u4 #x0) imm3p9 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrli3_b" pc crpc imm3p9)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10100 qqqqq ppppp 0   cpsrli3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpsrli3_h_C3 "cpsrli3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrli3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrli3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10100 qqqqq ppppp 0   cpsrli3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpsrli3_w_C3 "cpsrli3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrli3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrli3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 10100 qqqqq ppppp 0   cdsrli3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdsrli3_C3 "cdsrli3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsrli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrli3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdsrli3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 00xx xiii 0111 10101 qqqqq ppppp 0   cpsrai3.b =crqc,crpc,imm3p9 (c3_imm)
+(dni cpsrai3_b_C3 "cpsrai3.b $crqc,$crpc,imm3p9 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrai3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrai3.b $crqc,$crpc,$imm3p9"
+  (+ MAJ_15 ivc-x-6-3 (f-ivc2-2u4 #x0) imm3p9 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrai3_b" pc crpc imm3p9)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10101 qqqqq ppppp 0   cpsrai3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpsrai3_h_C3 "cpsrai3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrai3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrai3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrai3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10101 qqqqq ppppp 0   cpsrai3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpsrai3_w_C3 "cpsrai3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrai3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrai3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrai3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 10101 qqqqq ppppp 0   cdsrai3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdsrai3_C3 "cdsrai3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsrai3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrai3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdsrai3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 00xx xiii 0111 10110 qqqqq ppppp 0   cpslli3.b =crqc,crpc,imm3p9 (c3_imm)
+(dni cpslli3_b_C3 "cpslli3.b $crqc,$crpc,imm3p9 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpslli3.b $crqc,$crpc,$imm3p9"
+  (+ MAJ_15 ivc-x-6-3 (f-ivc2-2u4 #x0) imm3p9 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslli3_b" pc crpc imm3p9)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10110 qqqqq ppppp 0   cpslli3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpslli3_h_C3 "cpslli3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpslli3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslli3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10110 qqqqq ppppp 0   cpslli3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpslli3_w_C3 "cpslli3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpslli3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslli3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 10110 qqqqq ppppp 0   cdslli3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdslli3_C3 "cdslli3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdslli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdslli3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdslli3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10111 qqqqq ppppp 0   cpslai3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpslai3_h_C3 "cpslai3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslai3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpslai3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x17) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslai3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10111 qqqqq ppppp 0   cpslai3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpslai3_w_C3 "cpslai3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslai3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpslai3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x17) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslai3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 00xi iiii 0111 11000 qqqqq ppppp 0   cpclipiu3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpclipiu3_w_C3 "cpclipiu3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpclipiu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipiu3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x0) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpclipiu3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 01xi iiii 0111 11000 qqqqq ppppp 0   cpclipi3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpclipi3_w_C3 "cpclipi3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpclipi3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipi3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x1) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpclipi3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 10ii iiii 0111 11000 qqqqq ppppp 0   cdclipiu3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdclipiu3_C3 "cdclipiu3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdclipiu3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipiu3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x2) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdclipiu3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 11000 qqqqq ppppp 0   cdclipi3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdclipi3_C3 "cdclipi3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdclipi3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipi3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdclipi3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00000 0   cpmovi.b =crqc,simm8p4 (c3_imm)
+(dni cpmovi_b_C3 "cpmovi.b $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovi_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmovi.b $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmovi_b" pc simm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00010 0   cpmoviu.h =crqc,imm8p4 (c3_imm)
+(dni cpmoviu_h_C3 "cpmoviu.h $crqc,imm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmoviu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpmoviu.h $crqc,$imm8p4"
+  (+ MAJ_15 imm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x2) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmoviu_h" pc imm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00011 0   cpmovi.h =crqc,simm8p4 (c3_imm)
+(dni cpmovi_h_C3 "cpmovi.h $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovi_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmovi.h $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x3) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmovi_h" pc simm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00100 0   cpmoviu.w =crqc,imm8p4 (c3_imm)
+(dni cpmoviu_w_C3 "cpmoviu.w $crqc,imm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmoviu_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpmoviu.w $crqc,$imm8p4"
+  (+ MAJ_15 imm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x4) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmoviu_w" pc imm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00101 0   cpmovi.w =crqc,simm8p4 (c3_imm)
+(dni cpmovi_w_C3 "cpmovi.w $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovi_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmovi.w $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x5) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmovi_w" pc simm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00110 0   cdmoviu =crqc,imm8p4 (c3_imm)
+(dni cdmoviu_C3 "cdmoviu $crqc,imm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdmoviu") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmoviu $crqc,$imm8p4"
+  (+ MAJ_15 imm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x6) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdmoviu" pc imm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00111 0   cdmovi =crqc,simm8p4 (c3_imm)
+(dni cdmovi_C3 "cdmovi $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdmovi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmovi $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x7) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdmovi" pc simm8p4)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 00000 qqqqq ppppp 1   cpadda1u.b crqc,crpc (c3_1)
+(dni cpadda1u_b_C3 "cpadda1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpadda1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 00000 qqqqq ppppp 1   cpadda1.b crqc,crpc (c3_1)
+(dni cpadda1_b_C3 "cpadda1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpadda1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0010 0111 00000 qqqqq ppppp 1   cpaddua1.h crqc,crpc (c3_1)
+(dni cpaddua1_h_C3 "cpaddua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x2) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpaddua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0011 0111 00000 qqqqq ppppp 1   cpaddla1.h crqc,crpc (c3_1)
+(dni cpaddla1_h_C3 "cpaddla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpaddla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0100 0111 00000 qqqqq ppppp 1   cpaddaca1u.b crqc,crpc (c3_1)
+(dni cpaddaca1u_b_C3 "cpaddaca1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpaddaca1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x4) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0101 0111 00000 qqqqq ppppp 1   cpaddaca1.b crqc,crpc (c3_1)
+(dni cpaddaca1_b_C3 "cpaddaca1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpaddaca1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0110 0111 00000 qqqqq ppppp 1   cpaddacua1.h crqc,crpc (c3_1)
+(dni cpaddacua1_h_C3 "cpaddacua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x6) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0111 0111 00000 qqqqq ppppp 1   cpaddacla1.h crqc,crpc (c3_1)
+(dni cpaddacla1_h_C3 "cpaddacla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x7) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1000 0111 00000 qqqqq ppppp 1   cpsuba1u.b crqc,crpc (c3_1)
+(dni cpsuba1u_b_C3 "cpsuba1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsuba1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsuba1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x8) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1001 0111 00000 qqqqq ppppp 1   cpsuba1.b crqc,crpc (c3_1)
+(dni cpsuba1_b_C3 "cpsuba1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsuba1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsuba1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1010 0111 00000 qqqqq ppppp 1   cpsubua1.h crqc,crpc (c3_1)
+(dni cpsubua1_h_C3 "cpsubua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xa) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsubua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1011 0111 00000 qqqqq ppppp 1   cpsubla1.h crqc,crpc (c3_1)
+(dni cpsubla1_h_C3 "cpsubla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsubla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1100 0111 00000 qqqqq ppppp 1   cpsubaca1u.b crqc,crpc (c3_1)
+(dni cpsubaca1u_b_C3 "cpsubaca1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsubaca1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xc) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1101 0111 00000 qqqqq ppppp 1   cpsubaca1.b crqc,crpc (c3_1)
+(dni cpsubaca1_b_C3 "cpsubaca1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsubaca1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1110 0111 00000 qqqqq ppppp 1   cpsubacua1.h crqc,crpc (c3_1)
+(dni cpsubacua1_h_C3 "cpsubacua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xe) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1111 0111 00000 qqqqq ppppp 1   cpsubacla1.h crqc,crpc (c3_1)
+(dni cpsubacla1_h_C3 "cpsubacla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xf) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0000 0111 00000 qqqqq ppppp 1   cpabsa1u.b crqc,crpc (c3_1)
+(dni cpabsa1u_b_C3 "cpabsa1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpabsa1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0001 0111 00000 qqqqq ppppp 1   cpabsa1.b crqc,crpc (c3_1)
+(dni cpabsa1_b_C3 "cpabsa1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpabsa1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0010 0111 00000 qqqqq ppppp 1   cpabsua1.h crqc,crpc (c3_1)
+(dni cpabsua1_h_C3 "cpabsua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0011 0111 00000 qqqqq ppppp 1   cpabsla1.h crqc,crpc (c3_1)
+(dni cpabsla1_h_C3 "cpabsla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpabsla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0100 0111 00000 qqqqq ppppp 1   cpsada1u.b crqc,crpc (c3_1)
+(dni cpsada1u_b_C3 "cpsada1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsada1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0101 0111 00000 qqqqq ppppp 1   cpsada1.b crqc,crpc (c3_1)
+(dni cpsada1_b_C3 "cpsada1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsada1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0110 0111 00000 qqqqq ppppp 1   cpsadua1.h crqc,crpc (c3_1)
+(dni cpsadua1_h_C3 "cpsadua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0111 0111 00000 qqqqq ppppp 1   cpsadla1.h crqc,crpc (c3_1)
+(dni cpsadla1_h_C3 "cpsadla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0000 0111 00000 qqqqq ppppp 1   cpseta1.h crqc,crpc (c3_1)
+(dni cpseta1_h_C3 "cpseta1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpseta1_h") (CPTYPE V4HI) VOLATILE)
+  "cpseta1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpseta1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0010 0111 00000 qqqqq ppppp 1   cpsetua1.w crqc,crpc (c3_1)
+(dni cpsetua1_w_C3 "cpsetua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsetua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x2) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsetua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0011 0111 00000 qqqqq ppppp 1   cpsetla1.w crqc,crpc (c3_1)
+(dni cpsetla1_w_C3 "cpsetla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsetla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsetla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00000 1   cpmova1.b =croc (c3_1)
+(dni cpmova1_b_C3 "cpmova1.b $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmova1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cpmova1.b $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmova1_b" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00010 1   cpmovua1.h =croc (c3_1)
+(dni cpmovua1_h_C3 "cpmovua1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovua1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x2) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovua1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00011 1   cpmovla1.h =croc (c3_1)
+(dni cpmovla1_h_C3 "cpmovla1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovla1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x3) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovla1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00100 1   cpmovuua1.w =croc (c3_1)
+(dni cpmovuua1_w_C3 "cpmovuua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovuua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovuua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x4) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovuua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00101 1   cpmovula1.w =croc (c3_1)
+(dni cpmovula1_w_C3 "cpmovula1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovula1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovula1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x5) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovula1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00110 1   cpmovlua1.w =croc (c3_1)
+(dni cpmovlua1_w_C3 "cpmovlua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovlua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x6) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovlua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00111 1   cpmovlla1.w =croc (c3_1)
+(dni cpmovlla1_w_C3 "cpmovlla1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovlla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlla1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x7) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovlla1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10000 1   cppacka1u.b =croc (c3_1)
+(dni cppacka1u_b_C3 "cppacka1u.b $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppacka1u_b") (CPTYPE V8UQI) (CRET FIRST) VOLATILE)
+  "cppacka1u.b $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x10) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppacka1u_b" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10001 1   cppacka1.b =croc (c3_1)
+(dni cppacka1_b_C3 "cppacka1.b $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppacka1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cppacka1.b $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x11) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppacka1_b" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10010 1   cppackua1.h =croc (c3_1)
+(dni cppackua1_h_C3 "cppackua1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackua1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x12) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackua1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10011 1   cppackla1.h =croc (c3_1)
+(dni cppackla1_h_C3 "cppackla1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackla1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x13) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackla1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10100 1   cppackua1.w =croc (c3_1)
+(dni cppackua1_w_C3 "cppackua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x14) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10101 1   cppackla1.w =croc (c3_1)
+(dni cppackla1_w_C3 "cppackla1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackla1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x15) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackla1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10110 1   cpmovhua1.w =croc (c3_1)
+(dni cpmovhua1_w_C3 "cpmovhua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovhua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x16) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovhua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10111 1   cpmovhla1.w =croc (c3_1)
+(dni cpmovhla1_w_C3 "cpmovhla1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovhla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhla1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x17) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovhla1_w" pc)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 00010 qqqqq 00000 1   cpsrla1 crqc (c3_1)
+(dni cpsrla1_C3 "cpsrla1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrla1") VOLATILE)
+  "cpsrla1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrla1" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 00010 qqqqq 00000 1   cpsraa1 crqc (c3_1)
+(dni cpsraa1_C3 "cpsraa1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsraa1") VOLATILE)
+  "cpsraa1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraa1" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0010 0111 00010 qqqqq 00000 1   cpslla1 crqc (c3_1)
+(dni cpslla1_C3 "cpslla1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslla1") VOLATILE)
+  "cpslla1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x2) (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpslla1" pc crqc) )
+  ()
+  )
+
+; 1111 00xi iiii 0111 00011 00000 00000 1   cpsrlia1 imm5p7 (c3_imm)
+(dni cpsrlia1_P1 "cpsrlia1 imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrlia1") VOLATILE)
+  "cpsrlia1 $imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x0) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x3) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrlia1" pc imm5p7) )
+  ()
+  )
+
+; 1111 01xi iiii 0111 00011 00000 00000 1   cpsraia1 imm5p7 (c3_imm)
+(dni cpsraia1_P1 "cpsraia1 imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsraia1") VOLATILE)
+  "cpsraia1 $imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x1) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x3) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraia1" pc imm5p7) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 00011 00000 00000 1   cpsllia1 imm5p7 (c3_imm)
+(dni cpsllia1_P1 "cpsllia1 imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsllia1") VOLATILE)
+  "cpsllia1 $imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x3) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsllia1" pc imm5p7) )
+  ()
+  )
+
+; 1111 0000 0000 0111 00001 qqqqq ppppp 1   cpssqa1u.b crqc,crpc (c3_1)
+(dni cpssqa1u_b_C3 "cpssqa1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssqa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssqa1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 00001 qqqqq ppppp 1   cpssqa1.b crqc,crpc (c3_1)
+(dni cpssqa1_b_C3 "cpssqa1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssqa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssqa1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0100 0111 00001 qqqqq ppppp 1   cpssda1u.b crqc,crpc (c3_1)
+(dni cpssda1u_b_C3 "cpssda1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssda1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x4) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpssda1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0101 0111 00001 qqqqq ppppp 1   cpssda1.b crqc,crpc (c3_1)
+(dni cpssda1_b_C3 "cpssda1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssda1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpssda1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1000 0111 00001 qqqqq ppppp 1   cpmula1u.b crqc,crpc (c3_1)
+(dni cpmula1u_b_C3 "cpmula1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmula1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmula1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x8) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1001 0111 00001 qqqqq ppppp 1   cpmula1.b crqc,crpc (c3_1)
+(dni cpmula1_b_C3 "cpmula1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmula1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmula1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1010 0111 00001 qqqqq ppppp 1   cpmulua1.h crqc,crpc (c3_1)
+(dni cpmulua1_h_C3 "cpmulua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xa) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1011 0111 00001 qqqqq ppppp 1   cpmulla1.h crqc,crpc (c3_1)
+(dni cpmulla1_h_C3 "cpmulla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1100 0111 00001 qqqqq ppppp 1   cpmulua1u.w crqc,crpc (c3_1)
+(dni cpmulua1u_w_C3 "cpmulua1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulua1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xc) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1101 0111 00001 qqqqq ppppp 1   cpmulla1u.w crqc,crpc (c3_1)
+(dni cpmulla1u_w_C3 "cpmulla1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulla1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1110 0111 00001 qqqqq ppppp 1   cpmulua1.w crqc,crpc (c3_1)
+(dni cpmulua1_w_C3 "cpmulua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xe) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1111 0111 00001 qqqqq ppppp 1   cpmulla1.w crqc,crpc (c3_1)
+(dni cpmulla1_w_C3 "cpmulla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xf) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0000 0111 00001 qqqqq ppppp 1   cpmada1u.b crqc,crpc (c3_1)
+(dni cpmada1u_b_C3 "cpmada1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmada1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0001 0111 00001 qqqqq ppppp 1   cpmada1.b crqc,crpc (c3_1)
+(dni cpmada1_b_C3 "cpmada1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmada1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0010 0111 00001 qqqqq ppppp 1   cpmadua1.h crqc,crpc (c3_1)
+(dni cpmadua1_h_C3 "cpmadua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0011 0111 00001 qqqqq ppppp 1   cpmadla1.h crqc,crpc (c3_1)
+(dni cpmadla1_h_C3 "cpmadla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0100 0111 00001 qqqqq ppppp 1   cpmadua1u.w crqc,crpc (c3_1)
+(dni cpmadua1u_w_C3 "cpmadua1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadua1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0101 0111 00001 qqqqq ppppp 1   cpmadla1u.w crqc,crpc (c3_1)
+(dni cpmadla1u_w_C3 "cpmadla1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadla1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0110 0111 00001 qqqqq ppppp 1   cpmadua1.w crqc,crpc (c3_1)
+(dni cpmadua1_w_C3 "cpmadua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0111 0111 00001 qqqqq ppppp 1   cpmadla1.w crqc,crpc (c3_1)
+(dni cpmadla1_w_C3 "cpmadla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1010 0111 00001 qqqqq ppppp 1   cpmsbua1.h crqc,crpc (c3_1)
+(dni cpmsbua1_h_C3 "cpmsbua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1a) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1011 0111 00001 qqqqq ppppp 1   cpmsbla1.h crqc,crpc (c3_1)
+(dni cpmsbla1_h_C3 "cpmsbla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1100 0111 00001 qqqqq ppppp 1   cpmsbua1u.w crqc,crpc (c3_1)
+(dni cpmsbua1u_w_C3 "cpmsbua1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbua1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1101 0111 00001 qqqqq ppppp 1   cpmsbla1u.w crqc,crpc (c3_1)
+(dni cpmsbla1u_w_C3 "cpmsbla1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbla1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1110 0111 00001 qqqqq ppppp 1   cpmsbua1.w crqc,crpc (c3_1)
+(dni cpmsbua1_w_C3 "cpmsbua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1e) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1111 0111 00001 qqqqq ppppp 1   cpmsbla1.w crqc,crpc (c3_1)
+(dni cpmsbla1_w_C3 "cpmsbla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1f) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0010 0111 00001 qqqqq ppppp 1   cpsmadua1.h crqc,crpc (c3_1)
+(dni cpsmadua1_h_C3 "cpsmadua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0011 0111 00001 qqqqq ppppp 1   cpsmadla1.h crqc,crpc (c3_1)
+(dni cpsmadla1_h_C3 "cpsmadla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0110 0111 00001 qqqqq ppppp 1   cpsmadua1.w crqc,crpc (c3_1)
+(dni cpsmadua1_w_C3 "cpsmadua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0111 0111 00001 qqqqq ppppp 1   cpsmadla1.w crqc,crpc (c3_1)
+(dni cpsmadla1_w_C3 "cpsmadla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1010 0111 00001 qqqqq ppppp 1   cpsmsbua1.h crqc,crpc (c3_1)
+(dni cpsmsbua1_h_C3 "cpsmsbua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1a) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1011 0111 00001 qqqqq ppppp 1   cpsmsbla1.h crqc,crpc (c3_1)
+(dni cpsmsbla1_h_C3 "cpsmsbla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1110 0111 00001 qqqqq ppppp 1   cpsmsbua1.w crqc,crpc (c3_1)
+(dni cpsmsbua1_w_C3 "cpsmsbua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1e) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1111 0111 00001 qqqqq ppppp 1   cpsmsbla1.w crqc,crpc (c3_1)
+(dni cpsmsbla1_w_C3 "cpsmsbla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1f) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1010 0111 00001 qqqqq ppppp 1   cpmulslua1.h crqc,crpc (c3_1)
+(dni cpmulslua1_h_C3 "cpmulslua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xa) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1011 0111 00001 qqqqq ppppp 1   cpmulslla1.h crqc,crpc (c3_1)
+(dni cpmulslla1_h_C3 "cpmulslla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1110 0111 00001 qqqqq ppppp 1   cpmulslua1.w crqc,crpc (c3_1)
+(dni cpmulslua1_w_C3 "cpmulslua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xe) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1111 0111 00001 qqqqq ppppp 1   cpmulslla1.w crqc,crpc (c3_1)
+(dni cpmulslla1_w_C3 "cpmulslla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xf) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0010 0111 00001 qqqqq ppppp 1   cpsmadslua1.h crqc,crpc (c3_1)
+(dni cpsmadslua1_h_C3 "cpsmadslua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0011 0111 00001 qqqqq ppppp 1   cpsmadslla1.h crqc,crpc (c3_1)
+(dni cpsmadslla1_h_C3 "cpsmadslla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0110 0111 00001 qqqqq ppppp 1   cpsmadslua1.w crqc,crpc (c3_1)
+(dni cpsmadslua1_w_C3 "cpsmadslua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0111 0111 00001 qqqqq ppppp 1   cpsmadslla1.w crqc,crpc (c3_1)
+(dni cpsmadslla1_w_C3 "cpsmadslla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1010 0111 00001 qqqqq ppppp 1   cpsmsbslua1.h crqc,crpc (c3_1)
+(dni cpsmsbslua1_h_C3 "cpsmsbslua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1a) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1011 0111 00001 qqqqq ppppp 1   cpsmsbslla1.h crqc,crpc (c3_1)
+(dni cpsmsbslla1_h_C3 "cpsmsbslla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1110 0111 00001 qqqqq ppppp 1   cpsmsbslua1.w crqc,crpc (c3_1)
+(dni cpsmsbslua1_w_C3 "cpsmsbslua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1e) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1111 0111 00001 qqqqq ppppp 1   cpsmsbslla1.w crqc,crpc (c3_1)
+(dni cpsmsbslla1_w_C3 "cpsmsbslla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1f) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 00000 00000 00000 00000   c0nop  (p0_1)
+(dni c0nop_P0_P0S "c0nop  Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p0-isa (SLOTS P0,P0S) (INTRINSIC "c0nop"))
+  "c0nop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x0) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x0) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_c0nop" pc) )
+  ()
+  )
+
+; 00001 qqqqq ppppp ooooo   cpadd3.b =crop,crqp,crpp (p0_1)
+(dni cpadd3_b_P0S_P1 "cpadd3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpadd3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpadd3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010 qqqqq ppppp ooooo   cpadd3.h =crop,crqp,crpp (p0_1)
+(dni cpadd3_h_P0S_P1 "cpadd3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpadd3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpadd3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x2) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpadd3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00011 qqqqq ppppp ooooo   cpadd3.w =crop,crqp,crpp (p0_1)
+(dni cpadd3_w_P0S_P1 "cpadd3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpadd3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpadd3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x3) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpadd3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00101 qqqqq ppppp ooooo   cpunpacku.b =crop,crqp,crpp (p0_1)
+(dni cpunpacku_b_P0S_P1 "cpunpacku.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpunpacku.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x5) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpacku_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00110 qqqqq ppppp ooooo   cpunpacku.h =crop,crqp,crpp (p0_1)
+(dni cpunpacku_h_P0S_P1 "cpunpacku.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpacku_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpunpacku.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x6) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpacku_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00111 qqqqq ppppp ooooo   cpunpacku.w =crop,crqp,crpp (p0_1)
+(dni cpunpacku_w_P0S_P1 "cpunpacku.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpacku_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpunpacku.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x7) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpacku_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01001 qqqqq ppppp ooooo   cpunpackl.b =crop,crqp,crpp (p0_1)
+(dni cpunpackl_b_P0S_P1 "cpunpackl.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpackl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpunpackl.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x9) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpackl_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01010 qqqqq ppppp ooooo   cpunpackl.h =crop,crqp,crpp (p0_1)
+(dni cpunpackl_h_P0S_P1 "cpunpackl.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpackl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpunpackl.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xa) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpackl_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01011 qqqqq ppppp ooooo   cpunpackl.w =crop,crqp,crpp (p0_1)
+(dni cpunpackl_w_P0S_P1 "cpunpackl.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpackl_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpunpackl.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xb) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpackl_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00100 qqqqq ppppp ooooo   cpsel =crop,crqp,crpp (p0_1)
+(dni cpsel_P0S_P1 "cpsel $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpsel") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpsel $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x4) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsel" pc crqp crpp)) )
+  ()
+  )
+
+; 01100 qqqqq ppppp ooooo   cpfsftbs0 =crop,crqp,crpp (p0_1)
+(dni cpfsftbs0_P0S_P1 "cpfsftbs0 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpfsftbs0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs0 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xc) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpfsftbs0" pc crqp crpp)) )
+  ()
+  )
+
+; 01101 qqqqq ppppp ooooo   cpfsftbs1 =crop,crqp,crpp (p0_1)
+(dni cpfsftbs1_P0S_P1 "cpfsftbs1 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpfsftbs1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs1 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xd) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpfsftbs1" pc crqp crpp)) )
+  ()
+  )
+
+; 10000 qqqqq 00000 ooooo   cpmov =crop,crqp (p0_1)
+(dni cpmov_P0S_P1 "cpmov $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmov") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpmov $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x0) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmov" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00001 ooooo   cpabsz.b =crop,crqp (p0_1)
+(dni cpabsz_b_P0S_P1 "cpabsz.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpabsz_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsz.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsz_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00010 ooooo   cpabsz.h =crop,crqp (p0_1)
+(dni cpabsz_h_P0S_P1 "cpabsz.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpabsz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabsz.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsz_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00011 ooooo   cpabsz.w =crop,crqp (p0_1)
+(dni cpabsz_w_P0S_P1 "cpabsz.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpabsz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpabsz.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsz_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00100 ooooo   cpldz.h =crop,crqp (p0_1)
+(dni cpldz_h_P0S_P1 "cpldz.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpldz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpldz.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpldz_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00101 ooooo   cpldz.w =crop,crqp (p0_1)
+(dni cpldz_w_P0S_P1 "cpldz.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpldz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpldz.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpldz_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00110 ooooo   cpnorm.h =crop,crqp (p0_1)
+(dni cpnorm_h_P0S_P1 "cpnorm.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpnorm_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpnorm.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpnorm_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00111 ooooo   cpnorm.w =crop,crqp (p0_1)
+(dni cpnorm_w_P0S_P1 "cpnorm.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpnorm_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpnorm.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpnorm_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01000 ooooo   cphaddu.b =crop,crqp (p0_1)
+(dni cphaddu_b_P0S_P1 "cphaddu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphaddu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cphaddu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphaddu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01001 ooooo   cphadd.b =crop,crqp (p0_1)
+(dni cphadd_b_P0S_P1 "cphadd.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphadd_b") (CPTYPE V8QI) (CRET FIRST))
+  "cphadd.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphadd_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01010 ooooo   cphadd.h =crop,crqp (p0_1)
+(dni cphadd_h_P0S_P1 "cphadd.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphadd_h") (CPTYPE V4HI) (CRET FIRST))
+  "cphadd.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphadd_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01011 ooooo   cphadd.w =crop,crqp (p0_1)
+(dni cphadd_w_P0S_P1 "cphadd.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphadd_w") (CPTYPE V2SI) (CRET FIRST))
+  "cphadd.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphadd_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01100 00000   cpccadd.b +crqp (p0_1)
+(dni cpccadd_b_P0S_P1 "cpccadd.b $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpccadd_b") (CPTYPE V8QI) (CRET FIRSTCOPY) VOLATILE)
+  "cpccadd.b $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xc) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpccadd_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01101 ooooo   cpbcast.b =crop,crqp (p0_1)
+(dni cpbcast_b_P0S_P1 "cpbcast.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpbcast_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpbcast.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpbcast_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01110 ooooo   cpbcast.h =crop,crqp (p0_1)
+(dni cpbcast_h_P0S_P1 "cpbcast.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpbcast_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpbcast.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpbcast_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01111 ooooo   cpbcast.w =crop,crqp (p0_1)
+(dni cpbcast_w_P0S_P1 "cpbcast.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpbcast_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpbcast.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpbcast_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10000 ooooo   cpextuu.b =crop,crqp (p0_1)
+(dni cpextuu_b_P0S_P1 "cpextuu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextuu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextuu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x10) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10001 ooooo   cpextu.b =crop,crqp (p0_1)
+(dni cpextu_b_P0S_P1 "cpextu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x11) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10010 ooooo   cpextuu.h =crop,crqp (p0_1)
+(dni cpextuu_h_P0S_P1 "cpextuu.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextuu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextuu.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x12) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuu_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10011 ooooo   cpextu.h =crop,crqp (p0_1)
+(dni cpextu_h_P0S_P1 "cpextu.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextu.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x13) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextu_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10100 ooooo   cpextlu.b =crop,crqp (p0_1)
+(dni cpextlu_b_P0S_P1 "cpextlu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextlu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextlu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x14) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10101 ooooo   cpextl.b =crop,crqp (p0_1)
+(dni cpextl_b_P0S_P1 "cpextl.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextl.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x15) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextl_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10110 ooooo   cpextlu.h =crop,crqp (p0_1)
+(dni cpextlu_h_P0S_P1 "cpextlu.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextlu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextlu.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x16) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlu_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10111 ooooo   cpextl.h =crop,crqp (p0_1)
+(dni cpextl_h_P0S_P1 "cpextl.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpextl.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x17) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextl_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11000 ooooo   cpcastub.h =crop,crqp (p0_1)
+(dni cpcastub_h_P0S_P1 "cpcastub.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastub_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastub.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x18) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastub_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11001 ooooo   cpcastb.h =crop,crqp (p0_1)
+(dni cpcastb_h_P0S_P1 "cpcastb.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastb_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastb.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x19) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastb_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11100 ooooo   cpcastub.w =crop,crqp (p0_1)
+(dni cpcastub_w_P0S_P1 "cpcastub.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastub_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastub.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1c) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastub_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11101 ooooo   cpcastb.w =crop,crqp (p0_1)
+(dni cpcastb_w_P0S_P1 "cpcastb.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastb_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastb.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1d) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastb_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11110 ooooo   cpcastuh.w =crop,crqp (p0_1)
+(dni cpcastuh_w_P0S_P1 "cpcastuh.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastuh_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastuh.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1e) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastuh_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11111 ooooo   cpcasth.w =crop,crqp (p0_1)
+(dni cpcasth_w_P0S_P1 "cpcasth.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcasth_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcasth.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1f) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcasth_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11010 ooooo   cdcastuw =crop,crqp (p0_1)
+(dni cdcastuw_P0S_P1 "cdcastuw $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cdcastuw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastuw $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1a) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdcastuw" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11011 ooooo   cdcastw =crop,crqp (p0_1)
+(dni cdcastw_P0S_P1 "cdcastw $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cdcastw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastw $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1b) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdcastw" pc crqp)) )
+  ()
+  )
+
+; 10001 00000 00000 ooooo   cpmovfrcsar0 =crop (p0_1)
+(dni cpmovfrcsar0_P0S_P1 "cpmovfrcsar0 $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovfrcsar0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar0 $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x0) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovfrcsar0" pc)) )
+  ()
+  )
+
+; 10001 00000 01111 ooooo   cpmovfrcsar1 =crop (p0_1)
+(dni cpmovfrcsar1_P0S_P1 "cpmovfrcsar1 $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovfrcsar1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar1 $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovfrcsar1" pc)) )
+  ()
+  )
+
+; 10001 00000 00001 ooooo   cpmovfrcc =crop (p0_1)
+(dni cpmovfrcc_P0S_P1 "cpmovfrcc $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovfrcc") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcc $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovfrcc" pc)) )
+  ()
+  )
+
+; 10001 qqqqq 10000 00000   cpmovtocsar0 crqp (p0_1)
+(dni cpmovtocsar0_P0S_P1 "cpmovtocsar0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovtocsar0") VOLATILE)
+  "cpmovtocsar0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) crqp (f-ivc2-5u18 #x10) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar0 0)
+	(c-call "ivc2_cpmovtocsar0" pc crqp) )
+  ()
+  )
+
+; 10001 qqqqq 11111 00000   cpmovtocsar1 crqp (p0_1)
+(dni cpmovtocsar1_P0S_P1 "cpmovtocsar1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovtocsar1") VOLATILE)
+  "cpmovtocsar1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) crqp (f-ivc2-5u18 #x1f) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar1 0)
+	(c-call "ivc2_cpmovtocsar1" pc crqp) )
+  ()
+  )
+
+; 10001 qqqqq 10001 00000   cpmovtocc crqp (p0_1)
+(dni cpmovtocc_P0S_P1 "cpmovtocc $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovtocc") VOLATILE)
+  "cpmovtocc $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) crqp (f-ivc2-5u18 #x11) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpmovtocc" pc crqp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00000   cpcmpeqz.b crqp,crpp (p0_1)
+(dni cpcmpeqz_b_P0S_P1 "cpcmpeqz.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeqz_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeqz.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeqz_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00001   cpcmpeq.b crqp,crpp (p0_1)
+(dni cpcmpeq_b_P0S_P1 "cpcmpeq.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeq_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeq.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00011   cpcmpeq.h crqp,crpp (p0_1)
+(dni cpcmpeq_h_P0S_P1 "cpcmpeq.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeq_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpeq.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00101   cpcmpeq.w crqp,crpp (p0_1)
+(dni cpcmpeq_w_P0S_P1 "cpcmpeq.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeq_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpeq.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 01001   cpcmpne.b crqp,crpp (p0_1)
+(dni cpcmpne_b_P0S_P1 "cpcmpne.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpne_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpne.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 01011   cpcmpne.h crqp,crpp (p0_1)
+(dni cpcmpne_h_P0S_P1 "cpcmpne.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpne_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpne.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 01101   cpcmpne.w crqp,crpp (p0_1)
+(dni cpcmpne_w_P0S_P1 "cpcmpne.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpne_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpne.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10000   cpcmpgtu.b crqp,crpp (p0_1)
+(dni cpcmpgtu_b_P0S_P1 "cpcmpgtu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgtu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgtu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10001   cpcmpgt.b crqp,crpp (p0_1)
+(dni cpcmpgt_b_P0S_P1 "cpcmpgt.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgt_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpgt.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10011   cpcmpgt.h crqp,crpp (p0_1)
+(dni cpcmpgt_h_P0S_P1 "cpcmpgt.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgt_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpgt.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10100   cpcmpgtu.w crqp,crpp (p0_1)
+(dni cpcmpgtu_w_P0S_P1 "cpcmpgtu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgtu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgtu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10101   cpcmpgt.w crqp,crpp (p0_1)
+(dni cpcmpgt_w_P0S_P1 "cpcmpgt.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgt_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpgt.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11000   cpcmpgeu.b crqp,crpp (p0_1)
+(dni cpcmpgeu_b_P0S_P1 "cpcmpgeu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgeu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgeu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x18) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11001   cpcmpge.b crqp,crpp (p0_1)
+(dni cpcmpge_b_P0S_P1 "cpcmpge.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpge_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpge.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x19) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11011   cpcmpge.h crqp,crpp (p0_1)
+(dni cpcmpge_h_P0S_P1 "cpcmpge.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpge_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpge.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11100   cpcmpgeu.w crqp,crpp (p0_1)
+(dni cpcmpgeu_w_P0S_P1 "cpcmpgeu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgeu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgeu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11101   cpcmpge.w crqp,crpp (p0_1)
+(dni cpcmpge_w_P0S_P1 "cpcmpge.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpge_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpge.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_w" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00000   cpadda0u.b crqp,crpp (p0_1)
+(dni cpadda0u_b_P0S "cpadda0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpadda0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpadda0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpadda0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00001   cpadda0.b crqp,crpp (p0_1)
+(dni cpadda0_b_P0S "cpadda0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpadda0_b") (CPTYPE V8QI) VOLATILE)
+  "cpadda0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpadda0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00010   cpaddua0.h crqp,crpp (p0_1)
+(dni cpaddua0_h_P0S "cpaddua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpaddua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00011   cpaddla0.h crqp,crpp (p0_1)
+(dni cpaddla0_h_P0S "cpaddla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpaddla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00100   cpaddaca0u.b crqp,crpp (p0_1)
+(dni cpaddaca0u_b_P0S "cpaddaca0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddaca0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpaddaca0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddaca0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00101   cpaddaca0.b crqp,crpp (p0_1)
+(dni cpaddaca0_b_P0S "cpaddaca0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddaca0_b") (CPTYPE V8QI) VOLATILE)
+  "cpaddaca0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddaca0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00110   cpaddacua0.h crqp,crpp (p0_1)
+(dni cpaddacua0_h_P0S "cpaddacua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddacua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddacua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00111   cpaddacla0.h crqp,crpp (p0_1)
+(dni cpaddacla0_h_P0S "cpaddacla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddacla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddacla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01000   cpsuba0u.b crqp,crpp (p0_1)
+(dni cpsuba0u_b_P0S "cpsuba0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsuba0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsuba0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsuba0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01001   cpsuba0.b crqp,crpp (p0_1)
+(dni cpsuba0_b_P0S "cpsuba0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsuba0_b") (CPTYPE V8QI) VOLATILE)
+  "cpsuba0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsuba0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01010   cpsubua0.h crqp,crpp (p0_1)
+(dni cpsubua0_h_P0S "cpsubua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsubua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01011   cpsubla0.h crqp,crpp (p0_1)
+(dni cpsubla0_h_P0S "cpsubla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpsubla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01100   cpsubaca0u.b crqp,crpp (p0_1)
+(dni cpsubaca0u_b_P0S "cpsubaca0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubaca0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsubaca0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubaca0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01101   cpsubaca0.b crqp,crpp (p0_1)
+(dni cpsubaca0_b_P0S "cpsubaca0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubaca0_b") (CPTYPE V8QI) VOLATILE)
+  "cpsubaca0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubaca0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01110   cpsubacua0.h crqp,crpp (p0_1)
+(dni cpsubacua0_h_P0S "cpsubacua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubacua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubacua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01111   cpsubacla0.h crqp,crpp (p0_1)
+(dni cpsubacla0_h_P0S "cpsubacla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubacla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubacla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10000   cpabsa0u.b crqp,crpp (p0_1)
+(dni cpabsa0u_b_P0S "cpabsa0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsa0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpabsa0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpabsa0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10001   cpabsa0.b crqp,crpp (p0_1)
+(dni cpabsa0_b_P0S "cpabsa0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsa0_b") (CPTYPE V8QI) VOLATILE)
+  "cpabsa0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpabsa0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10010   cpabsua0.h crqp,crpp (p0_1)
+(dni cpabsua0_h_P0S "cpabsua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpabsua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10011   cpabsla0.h crqp,crpp (p0_1)
+(dni cpabsla0_h_P0S "cpabsla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpabsla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10100   cpsada0u.b crqp,crpp (p0_1)
+(dni cpsada0u_b_P0S "cpsada0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsada0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsada0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsada0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10101   cpsada0.b crqp,crpp (p0_1)
+(dni cpsada0_b_P0S "cpsada0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsada0_b") (CPTYPE V8QI) VOLATILE)
+  "cpsada0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsada0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10110   cpsadua0.h crqp,crpp (p0_1)
+(dni cpsadua0_h_P0S "cpsadua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsadua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsadua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10111   cpsadla0.h crqp,crpp (p0_1)
+(dni cpsadla0_h_P0S "cpsadla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsadla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsadla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 11011   cpseta0.h crqp,crpp (p0_1)
+(dni cpseta0_h_P0S "cpseta0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpseta0_h") (CPTYPE V4HI) VOLATILE)
+  "cpseta0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpseta0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 11100   cpsetua0.w crqp,crpp (p0_1)
+(dni cpsetua0_w_P0S "cpsetua0.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsetua0_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetua0.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsetua0_w" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 11101   cpsetla0.w crqp,crpp (p0_1)
+(dni cpsetla0_w_P0S "cpsetla0.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsetla0_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetla0.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpsetla0_w" pc crqp crpp) )
+  ()
+  )
+
+; 11001 00000 00001 ooooo   cpmova0.b =crop (p0_1)
+(dni cpmova0_b_P0S "cpmova0.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmova0_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cpmova0.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmova0_b" pc)) )
+  ()
+  )
+
+; 11001 00000 00010 ooooo   cpmovua0.h =crop (p0_1)
+(dni cpmovua0_h_P0S "cpmovua0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovua0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovua0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovua0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 00011 ooooo   cpmovla0.h =crop (p0_1)
+(dni cpmovla0_h_P0S "cpmovla0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovla0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovla0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovla0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 00100 ooooo   cpmovuua0.w =crop (p0_1)
+(dni cpmovuua0_w_P0S "cpmovuua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovuua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovuua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovuua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 00101 ooooo   cpmovula0.w =crop (p0_1)
+(dni cpmovula0_w_P0S "cpmovula0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovula0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovula0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovula0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 00110 ooooo   cpmovlua0.w =crop (p0_1)
+(dni cpmovlua0_w_P0S "cpmovlua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovlua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 00111 ooooo   cpmovlla0.w =crop (p0_1)
+(dni cpmovlla0_w_P0S "cpmovlla0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovlla0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlla0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlla0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01000 ooooo   cppacka0u.b =crop (p0_1)
+(dni cppacka0u_b_P0S "cppacka0u.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppacka0u_b") (CPTYPE V8UQI) (CRET FIRST) VOLATILE)
+  "cppacka0u.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka0u_b" pc)) )
+  ()
+  )
+
+; 11001 00000 01001 ooooo   cppacka0.b =crop (p0_1)
+(dni cppacka0_b_P0S "cppacka0.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppacka0_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cppacka0.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka0_b" pc)) )
+  ()
+  )
+
+; 11001 00000 01010 ooooo   cppackua0.h =crop (p0_1)
+(dni cppackua0_h_P0S "cppackua0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackua0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackua0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 01011 ooooo   cppackla0.h =crop (p0_1)
+(dni cppackla0_h_P0S "cppackla0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackla0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackla0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 01100 ooooo   cppackua0.w =crop (p0_1)
+(dni cppackua0_w_P0S "cppackua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xc) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01101 ooooo   cppackla0.w =crop (p0_1)
+(dni cppackla0_w_P0S "cppackla0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackla0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackla0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01110 ooooo   cpmovhua0.w =crop (p0_1)
+(dni cpmovhua0_w_P0S "cpmovhua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovhua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01111 ooooo   cpmovhla0.w =crop (p0_1)
+(dni cpmovhla0_w_P0S "cpmovhla0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovhla0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhla0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhla0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 10000 00000   cpacsuma0  (p0_1)
+(dni cpacsuma0_P0S "cpacsuma0  Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpacsuma0") VOLATILE)
+  "cpacsuma0"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x10) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpacsuma0" pc) )
+  ()
+  )
+
+; 11001 00000 10001 00000   cpaccpa0  (p0_1)
+(dni cpaccpa0_P0S "cpaccpa0  Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaccpa0") VOLATILE)
+  "cpaccpa0"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x11) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpaccpa0" pc) )
+  ()
+  )
+
+; 11001 qqqqq 11000 00000   cpsrla0 crqp (p0_1)
+(dni cpsrla0_P0S "cpsrla0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsrla0") VOLATILE)
+  "cpsrla0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x18) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsrla0" pc crqp) )
+  ()
+  )
+
+; 11001 qqqqq 11001 00000   cpsraa0 crqp (p0_1)
+(dni cpsraa0_P0S "cpsraa0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsraa0") VOLATILE)
+  "cpsraa0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x19) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsraa0" pc crqp) )
+  ()
+  )
+
+; 11001 qqqqq 11010 00000   cpslla0 crqp (p0_1)
+(dni cpslla0_P0S "cpslla0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpslla0") VOLATILE)
+  "cpslla0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x1a) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpslla0" pc crqp) )
+  ()
+  )
+
+; 11001 00000 11100 iiiii   cpsrlia0 imm5p23 (p0_1)
+(dni cpsrlia0_P0S "cpsrlia0 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsrlia0") VOLATILE)
+  "cpsrlia0 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1c) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsrlia0" pc imm5p23) )
+  ()
+  )
+
+; 11001 00000 11101 iiiii   cpsraia0 imm5p23 (p0_1)
+(dni cpsraia0_P0S "cpsraia0 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsraia0") VOLATILE)
+  "cpsraia0 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1d) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsraia0" pc imm5p23) )
+  ()
+  )
+
+; 11001 00000 11110 iiiii   cpsllia0 imm5p23 (p0_1)
+(dni cpsllia0_P0S "cpsllia0 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsllia0") VOLATILE)
+  "cpsllia0 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1e) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsllia0" pc imm5p23) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00000   cpfsftba0s0u.b crqp,crpp (p0_1)
+(dni cpfsftba0s0u_b_P0S "cpfsftba0s0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfsftba0s0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00001   cpfsftba0s0.b crqp,crpp (p0_1)
+(dni cpfsftba0s0_b_P0S "cpfsftba0s0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfsftba0s0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00010   cpfsftbua0s0.h crqp,crpp (p0_1)
+(dni cpfsftbua0s0_h_P0S "cpfsftbua0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbua0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbua0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftbua0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00011   cpfsftbla0s0.h crqp,crpp (p0_1)
+(dni cpfsftbla0s0_h_P0S "cpfsftbla0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbla0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbla0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpfsftbla0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00100   cpfaca0s0u.b crqp,crpp (p0_1)
+(dni cpfaca0s0u_b_P0S "cpfaca0s0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfaca0s0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00101   cpfaca0s0.b crqp,crpp (p0_1)
+(dni cpfaca0s0_b_P0S "cpfaca0s0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfaca0s0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00110   cpfacua0s0.h crqp,crpp (p0_1)
+(dni cpfacua0s0_h_P0S "cpfacua0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacua0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacua0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacua0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00111   cpfacla0s0.h crqp,crpp (p0_1)
+(dni cpfacla0s0_h_P0S "cpfacla0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacla0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacla0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacla0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01000   cpfsftba0s1u.b crqp,crpp (p0_1)
+(dni cpfsftba0s1u_b_P0S "cpfsftba0s1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfsftba0s1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01001   cpfsftba0s1.b crqp,crpp (p0_1)
+(dni cpfsftba0s1_b_P0S "cpfsftba0s1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfsftba0s1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s1_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01010   cpfsftbua0s1.h crqp,crpp (p0_1)
+(dni cpfsftbua0s1_h_P0S "cpfsftbua0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbua0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbua0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftbua0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01011   cpfsftbla0s1.h crqp,crpp (p0_1)
+(dni cpfsftbla0s1_h_P0S "cpfsftbla0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbla0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbla0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpfsftbla0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01100   cpfaca0s1u.b crqp,crpp (p0_1)
+(dni cpfaca0s1u_b_P0S "cpfaca0s1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfaca0s1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01101   cpfaca0s1.b crqp,crpp (p0_1)
+(dni cpfaca0s1_b_P0S "cpfaca0s1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfaca0s1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s1_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01110   cpfacua0s1.h crqp,crpp (p0_1)
+(dni cpfacua0s1_h_P0S "cpfacua0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacua0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacua0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacua0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01111   cpfacla0s1.h crqp,crpp (p0_1)
+(dni cpfacla0s1_h_P0S "cpfacla0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacla0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacla0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacla0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; xxxxxiii 01000 qqqqq ppppp ooooo   cpfsftbi =crop,crqp,crpp,imm3p5 (p0_1)
+(dni cpfsftbi_P0_P1 "cpfsftbi $crop,$crqp,$crpp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpfsftbi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpfsftbi $crop,$crqp,$crpp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x8) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpfsftbi" pc crqp crpp imm3p5)) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 00001   cpacmpeq.b crqp,crpp (p0_1)
+(dni cpacmpeq_b_P0_P1 "cpacmpeq.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpeq_b") (CPTYPE V8QI))
+  "cpacmpeq.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpeq_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 00011   cpacmpeq.h crqp,crpp (p0_1)
+(dni cpacmpeq_h_P0_P1 "cpacmpeq.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpeq_h") (CPTYPE V4HI))
+  "cpacmpeq.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpeq_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 00101   cpacmpeq.w crqp,crpp (p0_1)
+(dni cpacmpeq_w_P0_P1 "cpacmpeq.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpeq_w") (CPTYPE V2SI))
+  "cpacmpeq.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpeq_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 01001   cpacmpne.b crqp,crpp (p0_1)
+(dni cpacmpne_b_P0_P1 "cpacmpne.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpne_b") (CPTYPE V8QI))
+  "cpacmpne.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpne_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 01011   cpacmpne.h crqp,crpp (p0_1)
+(dni cpacmpne_h_P0_P1 "cpacmpne.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpne_h") (CPTYPE V4HI))
+  "cpacmpne.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpne_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 01101   cpacmpne.w crqp,crpp (p0_1)
+(dni cpacmpne_w_P0_P1 "cpacmpne.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpne_w") (CPTYPE V2SI))
+  "cpacmpne.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpne_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10000   cpacmpgtu.b crqp,crpp (p0_1)
+(dni cpacmpgtu_b_P0_P1 "cpacmpgtu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgtu_b") (CPTYPE V8UQI))
+  "cpacmpgtu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgtu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10001   cpacmpgt.b crqp,crpp (p0_1)
+(dni cpacmpgt_b_P0_P1 "cpacmpgt.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgt_b") (CPTYPE V8QI))
+  "cpacmpgt.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgt_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10011   cpacmpgt.h crqp,crpp (p0_1)
+(dni cpacmpgt_h_P0_P1 "cpacmpgt.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgt_h") (CPTYPE V4HI))
+  "cpacmpgt.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgt_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10100   cpacmpgtu.w crqp,crpp (p0_1)
+(dni cpacmpgtu_w_P0_P1 "cpacmpgtu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgtu_w") (CPTYPE V2USI))
+  "cpacmpgtu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgtu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10101   cpacmpgt.w crqp,crpp (p0_1)
+(dni cpacmpgt_w_P0_P1 "cpacmpgt.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgt_w") (CPTYPE V2SI))
+  "cpacmpgt.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgt_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11000   cpacmpgeu.b crqp,crpp (p0_1)
+(dni cpacmpgeu_b_P0_P1 "cpacmpgeu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgeu_b") (CPTYPE V8UQI))
+  "cpacmpgeu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x18) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgeu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11001   cpacmpge.b crqp,crpp (p0_1)
+(dni cpacmpge_b_P0_P1 "cpacmpge.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpge_b") (CPTYPE V8QI))
+  "cpacmpge.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x19) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpge_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11011   cpacmpge.h crqp,crpp (p0_1)
+(dni cpacmpge_h_P0_P1 "cpacmpge.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpge_h") (CPTYPE V4HI))
+  "cpacmpge.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpge_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11100   cpacmpgeu.w crqp,crpp (p0_1)
+(dni cpacmpgeu_w_P0_P1 "cpacmpgeu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgeu_w") (CPTYPE V2USI))
+  "cpacmpgeu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgeu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11101   cpacmpge.w crqp,crpp (p0_1)
+(dni cpacmpge_w_P0_P1 "cpacmpge.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpge_w") (CPTYPE V2SI))
+  "cpacmpge.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpge_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 00001   cpocmpeq.b crqp,crpp (p0_1)
+(dni cpocmpeq_b_P0_P1 "cpocmpeq.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpeq_b") (CPTYPE V8QI))
+  "cpocmpeq.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpeq_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 00011   cpocmpeq.h crqp,crpp (p0_1)
+(dni cpocmpeq_h_P0_P1 "cpocmpeq.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpeq_h") (CPTYPE V4HI))
+  "cpocmpeq.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpeq_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 00101   cpocmpeq.w crqp,crpp (p0_1)
+(dni cpocmpeq_w_P0_P1 "cpocmpeq.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpeq_w") (CPTYPE V2SI))
+  "cpocmpeq.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpeq_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 01001   cpocmpne.b crqp,crpp (p0_1)
+(dni cpocmpne_b_P0_P1 "cpocmpne.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpne_b") (CPTYPE V8QI))
+  "cpocmpne.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpne_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 01011   cpocmpne.h crqp,crpp (p0_1)
+(dni cpocmpne_h_P0_P1 "cpocmpne.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpne_h") (CPTYPE V4HI))
+  "cpocmpne.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpne_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 01101   cpocmpne.w crqp,crpp (p0_1)
+(dni cpocmpne_w_P0_P1 "cpocmpne.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpne_w") (CPTYPE V2SI))
+  "cpocmpne.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpne_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10000   cpocmpgtu.b crqp,crpp (p0_1)
+(dni cpocmpgtu_b_P0_P1 "cpocmpgtu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgtu_b") (CPTYPE V8UQI))
+  "cpocmpgtu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgtu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10001   cpocmpgt.b crqp,crpp (p0_1)
+(dni cpocmpgt_b_P0_P1 "cpocmpgt.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgt_b") (CPTYPE V8QI))
+  "cpocmpgt.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgt_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10011   cpocmpgt.h crqp,crpp (p0_1)
+(dni cpocmpgt_h_P0_P1 "cpocmpgt.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgt_h") (CPTYPE V4HI))
+  "cpocmpgt.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgt_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10100   cpocmpgtu.w crqp,crpp (p0_1)
+(dni cpocmpgtu_w_P0_P1 "cpocmpgtu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgtu_w") (CPTYPE V2USI))
+  "cpocmpgtu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgtu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10101   cpocmpgt.w crqp,crpp (p0_1)
+(dni cpocmpgt_w_P0_P1 "cpocmpgt.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgt_w") (CPTYPE V2SI))
+  "cpocmpgt.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgt_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11000   cpocmpgeu.b crqp,crpp (p0_1)
+(dni cpocmpgeu_b_P0_P1 "cpocmpgeu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgeu_b") (CPTYPE V8UQI))
+  "cpocmpgeu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x18) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgeu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11001   cpocmpge.b crqp,crpp (p0_1)
+(dni cpocmpge_b_P0_P1 "cpocmpge.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpge_b") (CPTYPE V8QI))
+  "cpocmpge.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x19) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpge_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11011   cpocmpge.h crqp,crpp (p0_1)
+(dni cpocmpge_h_P0_P1 "cpocmpge.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpge_h") (CPTYPE V4HI))
+  "cpocmpge.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpge_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11100   cpocmpgeu.w crqp,crpp (p0_1)
+(dni cpocmpgeu_w_P0_P1 "cpocmpgeu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgeu_w") (CPTYPE V2USI))
+  "cpocmpgeu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgeu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11101   cpocmpge.w crqp,crpp (p0_1)
+(dni cpocmpge_w_P0_P1 "cpocmpge.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpge_w") (CPTYPE V2SI))
+  "cpocmpge.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpge_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 10100 qqqqq ppppp ooooo   cdadd3 =crop,crqp,crpp (p0_1)
+(dni cdadd3_P0_P1 "cdadd3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdadd3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdadd3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdadd3" pc crqp crpp)) )
+  ()
+  )
+
+; 00000100 10100 qqqqq ppppp ooooo   cpsub3.b =crop,crqp,crpp (p0_1)
+(dni cpsub3_b_P0_P1 "cpsub3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsub3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsub3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00000101 10100 qqqqq ppppp ooooo   cpsub3.h =crop,crqp,crpp (p0_1)
+(dni cpsub3_h_P0_P1 "cpsub3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsub3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsub3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x5) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsub3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00000110 10100 qqqqq ppppp ooooo   cpsub3.w =crop,crqp,crpp (p0_1)
+(dni cpsub3_w_P0_P1 "cpsub3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsub3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsub3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x6) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsub3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00000111 10100 qqqqq ppppp ooooo   cdsub3 =crop,crqp,crpp (p0_1)
+(dni cdsub3_P0_P1 "cdsub3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsub3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsub3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x7) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsub3" pc crqp crpp)) )
+  ()
+  )
+
+; 00001010 10100 qqqqq ppppp ooooo   cpsadd3.h =crop,crqp,crpp (p0_1)
+(dni cpsadd3_h_P0_P1 "cpsadd3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsadd3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsadd3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xa) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsadd3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00001011 10100 qqqqq ppppp ooooo   cpsadd3.w =crop,crqp,crpp (p0_1)
+(dni cpsadd3_w_P0_P1 "cpsadd3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsadd3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsadd3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xb) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsadd3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00001110 10100 qqqqq ppppp ooooo   cpssub3.h =crop,crqp,crpp (p0_1)
+(dni cpssub3_h_P0_P1 "cpssub3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssub3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpssub3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xe) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set crop (c-call DI "ivc2_cpssub3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00001111 10100 qqqqq ppppp ooooo   cpssub3.w =crop,crqp,crpp (p0_1)
+(dni cpssub3_w_P0_P1 "cpssub3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssub3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpssub3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xf) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set crop (c-call DI "ivc2_cpssub3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00010000 10100 qqqqq ppppp ooooo   cpextuaddu3.b =crop,crqp,crpp (p0_1)
+(dni cpextuaddu3_b_P0_P1 "cpextuaddu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextuaddu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuaddu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x10) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuaddu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010001 10100 qqqqq ppppp ooooo   cpextuadd3.b =crop,crqp,crpp (p0_1)
+(dni cpextuadd3_b_P0_P1 "cpextuadd3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextuadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuadd3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x11) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuadd3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010010 10100 qqqqq ppppp ooooo   cpextladdu3.b =crop,crqp,crpp (p0_1)
+(dni cpextladdu3_b_P0_P1 "cpextladdu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextladdu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladdu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x12) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextladdu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010011 10100 qqqqq ppppp ooooo   cpextladd3.b =crop,crqp,crpp (p0_1)
+(dni cpextladd3_b_P0_P1 "cpextladd3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextladd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladd3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x13) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextladd3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010100 10100 qqqqq ppppp ooooo   cpextusubu3.b =crop,crqp,crpp (p0_1)
+(dni cpextusubu3_b_P0_P1 "cpextusubu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextusubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusubu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x14) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextusubu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010101 10100 qqqqq ppppp ooooo   cpextusub3.b =crop,crqp,crpp (p0_1)
+(dni cpextusub3_b_P0_P1 "cpextusub3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextusub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusub3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x15) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextusub3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010110 10100 qqqqq ppppp ooooo   cpextlsubu3.b =crop,crqp,crpp (p0_1)
+(dni cpextlsubu3_b_P0_P1 "cpextlsubu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextlsubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsubu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x16) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlsubu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010111 10100 qqqqq ppppp ooooo   cpextlsub3.b =crop,crqp,crpp (p0_1)
+(dni cpextlsub3_b_P0_P1 "cpextlsub3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextlsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsub3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x17) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlsub3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011000 10100 qqqqq ppppp ooooo   cpaveu3.b =crop,crqp,crpp (p0_1)
+(dni cpaveu3_b_P0_P1 "cpaveu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaveu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaveu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x18) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaveu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011001 10100 qqqqq ppppp ooooo   cpave3.b =crop,crqp,crpp (p0_1)
+(dni cpave3_b_P0_P1 "cpave3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpave3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpave3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x19) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpave3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011010 10100 qqqqq ppppp ooooo   cpave3.h =crop,crqp,crpp (p0_1)
+(dni cpave3_h_P0_P1 "cpave3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpave3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpave3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1a) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpave3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00011011 10100 qqqqq ppppp ooooo   cpave3.w =crop,crqp,crpp (p0_1)
+(dni cpave3_w_P0_P1 "cpave3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpave3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpave3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1b) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpave3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00011100 10100 qqqqq ppppp ooooo   cpaddsru3.b =crop,crqp,crpp (p0_1)
+(dni cpaddsru3_b_P0_P1 "cpaddsru3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsru3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsru3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsru3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011101 10100 qqqqq ppppp ooooo   cpaddsr3.b =crop,crqp,crpp (p0_1)
+(dni cpaddsr3_b_P0_P1 "cpaddsr3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsr3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsr3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsr3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011110 10100 qqqqq ppppp ooooo   cpaddsr3.h =crop,crqp,crpp (p0_1)
+(dni cpaddsr3_h_P0_P1 "cpaddsr3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsr3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpaddsr3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1e) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsr3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00011111 10100 qqqqq ppppp ooooo   cpaddsr3.w =crop,crqp,crpp (p0_1)
+(dni cpaddsr3_w_P0_P1 "cpaddsr3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsr3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpaddsr3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1f) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsr3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00100000 10100 qqqqq ppppp ooooo   cpabsu3.b =crop,crqp,crpp (p0_1)
+(dni cpabsu3_b_P0_P1 "cpabsu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpabsu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x20) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00100001 10100 qqqqq ppppp ooooo   cpabs3.b =crop,crqp,crpp (p0_1)
+(dni cpabs3_b_P0_P1 "cpabs3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpabs3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabs3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x21) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabs3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00100010 10100 qqqqq ppppp ooooo   cpabs3.h =crop,crqp,crpp (p0_1)
+(dni cpabs3_h_P0_P1 "cpabs3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpabs3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabs3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x22) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabs3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00100100 10100 qqqqq ppppp ooooo   cpand3 =crop,crqp,crpp (p0_1)
+(dni cpand3_P0_P1 "cpand3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpand3") (CPTYPE VECT) (CRET FIRST))
+  "cpand3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x24) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpand3" pc crqp crpp)) )
+  ()
+  )
+
+; 00100101 10100 qqqqq ppppp ooooo   cpor3 =crop,crqp,crpp (p0_1)
+(dni cpor3_P0_P1 "cpor3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpor3") (CPTYPE VECT) (CRET FIRST))
+  "cpor3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x25) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpor3" pc crqp crpp)) )
+  ()
+  )
+
+; 00100110 10100 qqqqq ppppp ooooo   cpnor3 =crop,crqp,crpp (p0_1)
+(dni cpnor3_P0_P1 "cpnor3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpnor3") (CPTYPE VECT) (CRET FIRST))
+  "cpnor3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x26) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpnor3" pc crqp crpp)) )
+  ()
+  )
+
+; 00100111 10100 qqqqq ppppp ooooo   cpxor3 =crop,crqp,crpp (p0_1)
+(dni cpxor3_P0_P1 "cpxor3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpxor3") (CPTYPE VECT) (CRET FIRST))
+  "cpxor3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x27) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpxor3" pc crqp crpp)) )
+  ()
+  )
+
+; 00101100 10100 qqqqq ppppp ooooo   cppacku.b =crop,crqp,crpp (p0_1)
+(dni cppacku_b_P0_P1 "cppacku.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cppacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cppacku.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacku_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00101101 10100 qqqqq ppppp ooooo   cppack.b =crop,crqp,crpp (p0_1)
+(dni cppack_b_P0_P1 "cppack.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cppack_b") (CPTYPE V8QI) (CRET FIRST))
+  "cppack.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppack_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00101111 10100 qqqqq ppppp ooooo   cppack.h =crop,crqp,crpp (p0_1)
+(dni cppack_h_P0_P1 "cppack.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cppack_h") (CPTYPE V4HI) (CRET FIRST))
+  "cppack.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2f) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppack_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00110000 10100 qqqqq ppppp ooooo   cpmaxu3.b =crop,crqp,crpp (p0_1)
+(dni cpmaxu3_b_P0_P1 "cpmaxu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmaxu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmaxu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x30) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmaxu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00110001 10100 qqqqq ppppp ooooo   cpmax3.b =crop,crqp,crpp (p0_1)
+(dni cpmax3_b_P0_P1 "cpmax3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmax3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmax3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x31) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmax3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00110011 10100 qqqqq ppppp ooooo   cpmax3.h =crop,crqp,crpp (p0_1)
+(dni cpmax3_h_P0_P1 "cpmax3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmax3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmax3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x33) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmax3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00110100 10100 qqqqq ppppp ooooo   cpmaxu3.w =crop,crqp,crpp (p0_1)
+(dni cpmaxu3_w_P0_P1 "cpmaxu3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmaxu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmaxu3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x34) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmaxu3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00110101 10100 qqqqq ppppp ooooo   cpmax3.w =crop,crqp,crpp (p0_1)
+(dni cpmax3_w_P0_P1 "cpmax3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmax3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmax3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x35) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmax3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00111000 10100 qqqqq ppppp ooooo   cpminu3.b =crop,crqp,crpp (p0_1)
+(dni cpminu3_b_P0_P1 "cpminu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpminu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpminu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x38) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpminu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00111001 10100 qqqqq ppppp ooooo   cpmin3.b =crop,crqp,crpp (p0_1)
+(dni cpmin3_b_P0_P1 "cpmin3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmin3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmin3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x39) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmin3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00111011 10100 qqqqq ppppp ooooo   cpmin3.h =crop,crqp,crpp (p0_1)
+(dni cpmin3_h_P0_P1 "cpmin3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmin3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmin3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3b) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmin3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00111100 10100 qqqqq ppppp ooooo   cpminu3.w =crop,crqp,crpp (p0_1)
+(dni cpminu3_w_P0_P1 "cpminu3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpminu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpminu3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpminu3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00111101 10100 qqqqq ppppp ooooo   cpmin3.w =crop,crqp,crpp (p0_1)
+(dni cpmin3_w_P0_P1 "cpmin3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmin3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmin3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmin3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01000000 10100 qqqqq ppppp ooooo   cpsrl3.b =crop,crqp,crpp (p0_1)
+(dni cpsrl3_b_P0_P1 "cpsrl3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrl3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x40) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrl3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01000001 10100 qqqqq ppppp ooooo   cpssrl3.b =crop,crqp,crpp (p0_1)
+(dni cpssrl3_b_P0_P1 "cpssrl3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssrl3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x41) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssrl3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01000010 10100 qqqqq ppppp ooooo   cpsrl3.h =crop,crqp,crpp (p0_1)
+(dni cpsrl3_h_P0_P1 "cpsrl3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrl3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x42) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrl3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01000011 10100 qqqqq ppppp ooooo   cpssrl3.h =crop,crqp,crpp (p0_1)
+(dni cpssrl3_h_P0_P1 "cpssrl3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssrl3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x43) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssrl3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01000100 10100 qqqqq ppppp ooooo   cpsrl3.w =crop,crqp,crpp (p0_1)
+(dni cpsrl3_w_P0_P1 "cpsrl3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrl3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x44) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrl3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01000101 10100 qqqqq ppppp ooooo   cpssrl3.w =crop,crqp,crpp (p0_1)
+(dni cpssrl3_w_P0_P1 "cpssrl3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssrl3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x45) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssrl3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01000110 10100 qqqqq ppppp ooooo   cdsrl3 =crop,crqp,crpp (p0_1)
+(dni cdsrl3_P0_P1 "cdsrl3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsrl3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrl3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x46) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsrl3" pc crqp crpp)) )
+  ()
+  )
+
+; 01001000 10100 qqqqq ppppp ooooo   cpsra3.b =crop,crqp,crpp (p0_1)
+(dni cpsra3_b_P0_P1 "cpsra3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsra3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x48) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsra3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01001001 10100 qqqqq ppppp ooooo   cpssra3.b =crop,crqp,crpp (p0_1)
+(dni cpssra3_b_P0_P1 "cpssra3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssra3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x49) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssra3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01001010 10100 qqqqq ppppp ooooo   cpsra3.h =crop,crqp,crpp (p0_1)
+(dni cpsra3_h_P0_P1 "cpsra3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsra3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4a) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsra3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01001011 10100 qqqqq ppppp ooooo   cpssra3.h =crop,crqp,crpp (p0_1)
+(dni cpssra3_h_P0_P1 "cpssra3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssra3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4b) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssra3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01001100 10100 qqqqq ppppp ooooo   cpsra3.w =crop,crqp,crpp (p0_1)
+(dni cpsra3_w_P0_P1 "cpsra3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsra3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsra3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01001101 10100 qqqqq ppppp ooooo   cpssra3.w =crop,crqp,crpp (p0_1)
+(dni cpssra3_w_P0_P1 "cpssra3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssra3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssra3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01001110 10100 qqqqq ppppp ooooo   cdsra3 =crop,crqp,crpp (p0_1)
+(dni cdsra3_P0_P1 "cdsra3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsra3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsra3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4e) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsra3" pc crqp crpp)) )
+  ()
+  )
+
+; 01010000 10100 qqqqq ppppp ooooo   cpsll3.b =crop,crqp,crpp (p0_1)
+(dni cpsll3_b_P0_P1 "cpsll3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsll3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x50) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsll3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01010001 10100 qqqqq ppppp ooooo   cpssll3.b =crop,crqp,crpp (p0_1)
+(dni cpssll3_b_P0_P1 "cpssll3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssll3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x51) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssll3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01010010 10100 qqqqq ppppp ooooo   cpsll3.h =crop,crqp,crpp (p0_1)
+(dni cpsll3_h_P0_P1 "cpsll3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsll3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x52) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsll3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01010011 10100 qqqqq ppppp ooooo   cpssll3.h =crop,crqp,crpp (p0_1)
+(dni cpssll3_h_P0_P1 "cpssll3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssll3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x53) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssll3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01010100 10100 qqqqq ppppp ooooo   cpsll3.w =crop,crqp,crpp (p0_1)
+(dni cpsll3_w_P0_P1 "cpsll3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsll3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x54) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsll3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01010101 10100 qqqqq ppppp ooooo   cpssll3.w =crop,crqp,crpp (p0_1)
+(dni cpssll3_w_P0_P1 "cpssll3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssll3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x55) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssll3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01010110 10100 qqqqq ppppp ooooo   cdsll3 =crop,crqp,crpp (p0_1)
+(dni cdsll3_P0_P1 "cdsll3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsll3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsll3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x56) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsll3" pc crqp crpp)) )
+  ()
+  )
+
+; 01011010 10100 qqqqq ppppp ooooo   cpsla3.h =crop,crqp,crpp (p0_1)
+(dni cpsla3_h_P0_P1 "cpsla3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsla3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsla3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x5a) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsla3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01011100 10100 qqqqq ppppp ooooo   cpsla3.w =crop,crqp,crpp (p0_1)
+(dni cpsla3_w_P0_P1 "cpsla3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsla3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsla3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x5c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsla3_w" pc crqp crpp)) )
+  ()
+  )
+
+; xxxxxiii 10101 qqqqq 00000 ooooo   cpsrli3.b =crop,crqp,imm3p5 (p0_1)
+(dni cpsrli3_b_P0_P1 "cpsrli3.b $crop,$crqp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrli3.b $crop,$crqp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x0) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrli3_b" pc crqp imm3p5)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 00001 ooooo   cpsrli3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpsrli3_h_P0_P1 "cpsrli3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrli3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrli3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 00010 ooooo   cpsrli3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpsrli3_w_P0_P1 "cpsrli3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrli3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrli3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 00011 ooooo   cdsrli3 =crop,crqp,imm6p2 (p0_1)
+(dni cdsrli3_P0_P1 "cdsrli3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsrli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrli3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsrli3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxxxxiii 10101 qqqqq 00100 ooooo   cpsrai3.b =crop,crqp,imm3p5 (p0_1)
+(dni cpsrai3_b_P0_P1 "cpsrai3.b $crop,$crqp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrai3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrai3.b $crop,$crqp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrai3_b" pc crqp imm3p5)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 00101 ooooo   cpsrai3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpsrai3_h_P0_P1 "cpsrai3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrai3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrai3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrai3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 00110 ooooo   cpsrai3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpsrai3_w_P0_P1 "cpsrai3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrai3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrai3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrai3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 00111 ooooo   cdsrai3 =crop,crqp,imm6p2 (p0_1)
+(dni cdsrai3_P0_P1 "cdsrai3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsrai3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrai3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsrai3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxxxxiii 10101 qqqqq 01000 ooooo   cpslli3.b =crop,crqp,imm3p5 (p0_1)
+(dni cpslli3_b_P0_P1 "cpslli3.b $crop,$crqp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpslli3.b $crop,$crqp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslli3_b" pc crqp imm3p5)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 01001 ooooo   cpslli3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpslli3_h_P0_P1 "cpslli3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpslli3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslli3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 01010 ooooo   cpslli3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpslli3_w_P0_P1 "cpslli3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpslli3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslli3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 01011 ooooo   cdslli3 =crop,crqp,imm6p2 (p0_1)
+(dni cdslli3_P0_P1 "cdslli3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdslli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdslli3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdslli3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 01101 ooooo   cpslai3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpslai3_h_P0_P1 "cpslai3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslai3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpslai3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslai3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 01110 ooooo   cpslai3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpslai3_w_P0_P1 "cpslai3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslai3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpslai3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslai3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 10000 ooooo   cpclipiu3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpclipiu3_w_P0_P1 "cpclipiu3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpclipiu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipiu3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x10) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpclipiu3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 10001 ooooo   cpclipi3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpclipi3_w_P0_P1 "cpclipi3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpclipi3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipi3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x11) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpclipi3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 10010 ooooo   cdclipiu3 =crop,crqp,imm6p2 (p0_1)
+(dni cdclipiu3_P0_P1 "cdclipiu3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdclipiu3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipiu3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x12) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdclipiu3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 10011 ooooo   cdclipi3 =crop,crqp,imm6p2 (p0_1)
+(dni cdclipi3_P0_P1 "cdclipi3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdclipi3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipi3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x13) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdclipi3" pc crqp imm6p2)) )
+  ()
+  )
+
+; iiiiiiii 10110 qqqqq 01iii iiiii   cpmovi.h =crqp,simm16p0 (p0_i)
+(dni cpmovi_h_P0_P1 "cpmovi.h $crqp,simm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmovi_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmovi.h $crqp,$simm16p0"
+  (+ (f-ivc2-5u8 #x16) crqp (f-ivc2-2u18 #x1)  simm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmovi_h16" pc simm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 00iii iiiii   cpmoviu.w =crqp,imm16p0 (p0_i)
+(dni cpmoviu_w_P0_P1 "cpmoviu.w $crqp,imm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmoviu_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpmoviu.w $crqp,$imm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x0)  imm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmoviu_w16" pc imm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 01iii iiiii   cpmovi.w =crqp,simm16p0 (p0_i)
+(dni cpmovi_w_P0_P1 "cpmovi.w $crqp,simm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmovi_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmovi.w $crqp,$simm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x1)  simm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmovi_w16" pc simm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 10iii iiiii   cdmoviu =crqp,imm16p0 (p0_i)
+(dni cdmoviu_P0_P1 "cdmoviu $crqp,imm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdmoviu") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmoviu $crqp,$imm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x2)  imm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cdmoviu16" pc imm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 11iii iiiii   cdmovi =crqp,simm16p0 (p0_i)
+(dni cdmovi_P0_P1 "cdmovi $crqp,simm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdmovi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmovi $crqp,$simm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x3)  simm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cdmovi16" pc simm16p0)) )
+  ()
+  )
+
+; 00000000 00000 00000 00000 00000   c1nop  (p0_1)
+(dni c1nop_P1 "c1nop  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "c1nop"))
+  "c1nop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x0) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x0) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_c1nop" pc) )
+  ()
+  )
+
+; 00000000 10110 qqqqq 00iii iiiii   cpmovi.b =crqp,simm8p20 (p0_i)
+(dni cpmovi_b_P0S_P1 "cpmovi.b $crqp,simm8p20 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovi_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmovi.b $crqp,$simm8p20"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x16) crqp (f-ivc2-2u18 #x0)  imm8p20(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmovi_b" pc simm8p20)) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00000   cpadda1u.b crqp,crpp (p0_1)
+(dni cpadda1u_b_P1 "cpadda1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpadda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpadda1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00001   cpadda1.b crqp,crpp (p0_1)
+(dni cpadda1_b_P1 "cpadda1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpadda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpadda1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00010   cpaddua1.h crqp,crpp (p0_1)
+(dni cpaddua1_h_P1 "cpaddua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpaddua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00011   cpaddla1.h crqp,crpp (p0_1)
+(dni cpaddla1_h_P1 "cpaddla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpaddla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00100   cpaddaca1u.b crqp,crpp (p0_1)
+(dni cpaddaca1u_b_P1 "cpaddaca1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpaddaca1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00101   cpaddaca1.b crqp,crpp (p0_1)
+(dni cpaddaca1_b_P1 "cpaddaca1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpaddaca1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00110   cpaddacua1.h crqp,crpp (p0_1)
+(dni cpaddacua1_h_P1 "cpaddacua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00111   cpaddacla1.h crqp,crpp (p0_1)
+(dni cpaddacla1_h_P1 "cpaddacla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01000   cpsuba1u.b crqp,crpp (p0_1)
+(dni cpsuba1u_b_P1 "cpsuba1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsuba1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsuba1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01001   cpsuba1.b crqp,crpp (p0_1)
+(dni cpsuba1_b_P1 "cpsuba1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsuba1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsuba1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01010   cpsubua1.h crqp,crpp (p0_1)
+(dni cpsubua1_h_P1 "cpsubua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsubua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01011   cpsubla1.h crqp,crpp (p0_1)
+(dni cpsubla1_h_P1 "cpsubla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsubla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01100   cpsubaca1u.b crqp,crpp (p0_1)
+(dni cpsubaca1u_b_P1 "cpsubaca1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsubaca1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01101   cpsubaca1.b crqp,crpp (p0_1)
+(dni cpsubaca1_b_P1 "cpsubaca1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsubaca1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01110   cpsubacua1.h crqp,crpp (p0_1)
+(dni cpsubacua1_h_P1 "cpsubacua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01111   cpsubacla1.h crqp,crpp (p0_1)
+(dni cpsubacla1_h_P1 "cpsubacla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10000   cpabsa1u.b crqp,crpp (p0_1)
+(dni cpabsa1u_b_P1 "cpabsa1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpabsa1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10001   cpabsa1.b crqp,crpp (p0_1)
+(dni cpabsa1_b_P1 "cpabsa1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpabsa1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10010   cpabsua1.h crqp,crpp (p0_1)
+(dni cpabsua1_h_P1 "cpabsua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10011   cpabsla1.h crqp,crpp (p0_1)
+(dni cpabsla1_h_P1 "cpabsla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpabsla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10100   cpsada1u.b crqp,crpp (p0_1)
+(dni cpsada1u_b_P1 "cpsada1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsada1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10101   cpsada1.b crqp,crpp (p0_1)
+(dni cpsada1_b_P1 "cpsada1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsada1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10110   cpsadua1.h crqp,crpp (p0_1)
+(dni cpsadua1_h_P1 "cpsadua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10111   cpsadla1.h crqp,crpp (p0_1)
+(dni cpsadla1_h_P1 "cpsadla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 11011   cpseta1.h crqp,crpp (p0_1)
+(dni cpseta1_h_P1 "cpseta1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpseta1_h") (CPTYPE V4HI) VOLATILE)
+  "cpseta1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpseta1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 11100   cpsetua1.w crqp,crpp (p0_1)
+(dni cpsetua1_w_P1 "cpsetua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsetua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsetua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 11101   cpsetla1.w crqp,crpp (p0_1)
+(dni cpsetla1_w_P1 "cpsetla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsetla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsetla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11001 00000 00001 ooooo   cpmova1.b =crop (p0_1)
+(dni cpmova1_b_P1 "cpmova1.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmova1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cpmova1.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmova1_b" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00010 ooooo   cpmovua1.h =crop (p0_1)
+(dni cpmovua1_h_P1 "cpmovua1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovua1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovua1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00011 ooooo   cpmovla1.h =crop (p0_1)
+(dni cpmovla1_h_P1 "cpmovla1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovla1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovla1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00100 ooooo   cpmovuua1.w =crop (p0_1)
+(dni cpmovuua1_w_P1 "cpmovuua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovuua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovuua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovuua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00101 ooooo   cpmovula1.w =crop (p0_1)
+(dni cpmovula1_w_P1 "cpmovula1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovula1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovula1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovula1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00110 ooooo   cpmovlua1.w =crop (p0_1)
+(dni cpmovlua1_w_P1 "cpmovlua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovlua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00111 ooooo   cpmovlla1.w =crop (p0_1)
+(dni cpmovlla1_w_P1 "cpmovlla1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovlla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlla1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlla1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01000 ooooo   cppacka1u.b =crop (p0_1)
+(dni cppacka1u_b_P1 "cppacka1u.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppacka1u_b") (CPTYPE V8UQI) (CRET FIRST) VOLATILE)
+  "cppacka1u.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka1u_b" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01001 ooooo   cppacka1.b =crop (p0_1)
+(dni cppacka1_b_P1 "cppacka1.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppacka1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cppacka1.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka1_b" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01010 ooooo   cppackua1.h =crop (p0_1)
+(dni cppackua1_h_P1 "cppackua1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackua1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01011 ooooo   cppackla1.h =crop (p0_1)
+(dni cppackla1_h_P1 "cppackla1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackla1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01100 ooooo   cppackua1.w =crop (p0_1)
+(dni cppackua1_w_P1 "cppackua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xc) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01101 ooooo   cppackla1.w =crop (p0_1)
+(dni cppackla1_w_P1 "cppackla1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackla1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01110 ooooo   cpmovhua1.w =crop (p0_1)
+(dni cpmovhua1_w_P1 "cpmovhua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovhua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01111 ooooo   cpmovhla1.w =crop (p0_1)
+(dni cpmovhla1_w_P1 "cpmovhla1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovhla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhla1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhla1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 10000 00000   cpacsuma1  (p0_1)
+(dni cpacsuma1_P1 "cpacsuma1  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpacsuma1") VOLATILE)
+  "cpacsuma1"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x10) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpacsuma1" pc) )
+  ()
+  )
+
+; 00000000 11001 00000 10001 00000   cpaccpa1  (p0_1)
+(dni cpaccpa1_P1 "cpaccpa1  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaccpa1") VOLATILE)
+  "cpaccpa1"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x11) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpaccpa1" pc) )
+  ()
+  )
+
+; 00000000 11001 00000 10010 00000   cpacswp  (p0_1)
+(dni cpacswp_P1 "cpacswp  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpacswp") VOLATILE)
+  "cpacswp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x12) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpacswp" pc) )
+  ()
+  )
+
+; 00000000 11001 qqqqq 11000 00000   cpsrla1 crqp (p0_1)
+(dni cpsrla1_P1 "cpsrla1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsrla1") VOLATILE)
+  "cpsrla1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x18) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrla1" pc crqp) )
+  ()
+  )
+
+; 00000000 11001 qqqqq 11001 00000   cpsraa1 crqp (p0_1)
+(dni cpsraa1_P1 "cpsraa1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsraa1") VOLATILE)
+  "cpsraa1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x19) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraa1" pc crqp) )
+  ()
+  )
+
+; 00000000 11001 qqqqq 11010 00000   cpslla1 crqp (p0_1)
+(dni cpslla1_P1 "cpslla1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpslla1") VOLATILE)
+  "cpslla1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x1a) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpslla1" pc crqp) )
+  ()
+  )
+
+; 00000000 11001 00000 11100 iiiii   cpsrlia1 imm5p23 (p0_1)
+(dni cpsrlia1_1_p1 "cpsrlia1 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsrlia1") VOLATILE)
+  "cpsrlia1 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1c) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrlia1" pc imm5p23) )
+  ()
+  )
+
+; 00000000 11001 00000 11101 iiiii   cpsraia1 imm5p23 (p0_1)
+(dni cpsraia1_1_p1 "cpsraia1 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsraia1") VOLATILE)
+  "cpsraia1 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1d) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraia1" pc imm5p23) )
+  ()
+  )
+
+; 00000000 11001 00000 11110 iiiii   cpsllia1 imm5p23 (p0_1)
+(dni cpsllia1_1_p1 "cpsllia1 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsllia1") VOLATILE)
+  "cpsllia1 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1e) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsllia1" pc imm5p23) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00000   cpfmulia1s0u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s0u_b_P1 "cpfmulia1s0u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmulia1s0u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s0u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00001   cpfmulia1s0.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s0_b_P1 "cpfmulia1s0.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmulia1s0.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s0_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00010   cpfmuliua1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmuliua1s0_h_P1 "cpfmuliua1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmuliua1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmuliua1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmuliua1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00011   cpfmulila1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulila1s0_h_P1 "cpfmulila1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulila1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmulila1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpfmulila1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00100   cpfmadia1s0u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s0u_b_P1 "cpfmadia1s0u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmadia1s0u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s0u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00101   cpfmadia1s0.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s0_b_P1 "cpfmadia1s0.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmadia1s0.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s0_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00110   cpfmadiua1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadiua1s0_h_P1 "cpfmadiua1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadiua1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadiua1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadiua1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00111   cpfmadila1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadila1s0_h_P1 "cpfmadila1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadila1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadila1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadila1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01000   cpfmulia1s1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s1u_b_P1 "cpfmulia1s1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmulia1s1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01001   cpfmulia1s1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s1_b_P1 "cpfmulia1s1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmulia1s1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01010   cpfmuliua1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmuliua1s1_h_P1 "cpfmuliua1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmuliua1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmuliua1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmuliua1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01011   cpfmulila1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulila1s1_h_P1 "cpfmulila1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulila1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmulila1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpfmulila1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01100   cpfmadia1s1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s1u_b_P1 "cpfmadia1s1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmadia1s1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01101   cpfmadia1s1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s1_b_P1 "cpfmadia1s1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmadia1s1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01110   cpfmadiua1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadiua1s1_h_P1 "cpfmadiua1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadiua1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadiua1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadiua1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01111   cpfmadila1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadila1s1_h_P1 "cpfmadila1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadila1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadila1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadila1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10000   cpamulia1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamulia1u_b_P1 "cpamulia1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamulia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpamulia1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpamulia1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10001   cpamulia1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamulia1_b_P1 "cpamulia1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamulia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpamulia1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpamulia1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10010   cpamuliua1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamuliua1_h_P1 "cpamuliua1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamuliua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamuliua1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpamuliua1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10011   cpamulila1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamulila1_h_P1 "cpamulila1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamulila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamulila1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpamulila1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10100   cpamadia1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamadia1u_b_P1 "cpamadia1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpamadia1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadia1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10101   cpamadia1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamadia1_b_P1 "cpamadia1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpamadia1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadia1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10110   cpamadiua1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamadiua1_h_P1 "cpamadiua1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadiua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamadiua1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadiua1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10111   cpamadila1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamadila1_h_P1 "cpamadila1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamadila1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadila1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 00 III   cpfmulia1u.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmulia1u_b_P1 "cpfmulia1u.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmulia1u.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x0) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1u_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 01 III   cpfmulia1.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmulia1_b_P1 "cpfmulia1.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmulia1.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x1) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 10 III   cpfmuliua1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmuliua1_h_P1 "cpfmuliua1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmuliua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmuliua1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x2) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmuliua1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 11 III   cpfmulila1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmulila1_h_P1 "cpfmulila1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmulila1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x3) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpfmulila1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 00 III   cpfmadia1u.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadia1u_b_P1 "cpfmadia1u.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmadia1u.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x0) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1u_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 01 III   cpfmadia1.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadia1_b_P1 "cpfmadia1.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmadia1.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x1) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 10 III   cpfmadiua1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadiua1_h_P1 "cpfmadiua1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadiua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadiua1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x2) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadiua1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 11 III   cpfmadila1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadila1_h_P1 "cpfmadila1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadila1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x3) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadila1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00000   cpssqa1u.b crqp,crpp (p0_1)
+(dni cpssqa1u_b_P1 "cpssqa1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssqa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssqa1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00001   cpssqa1.b crqp,crpp (p0_1)
+(dni cpssqa1_b_P1 "cpssqa1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssqa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssqa1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00100   cpssda1u.b crqp,crpp (p0_1)
+(dni cpssda1u_b_P1 "cpssda1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssda1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssda1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00101   cpssda1.b crqp,crpp (p0_1)
+(dni cpssda1_b_P1 "cpssda1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssda1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssda1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01000   cpmula1u.b crqp,crpp (p0_1)
+(dni cpmula1u_b_P1 "cpmula1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmula1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmula1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01001   cpmula1.b crqp,crpp (p0_1)
+(dni cpmula1_b_P1 "cpmula1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmula1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmula1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01010   cpmulua1.h crqp,crpp (p0_1)
+(dni cpmulua1_h_P1 "cpmulua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01011   cpmulla1.h crqp,crpp (p0_1)
+(dni cpmulla1_h_P1 "cpmulla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01100   cpmulua1u.w crqp,crpp (p0_1)
+(dni cpmulua1u_w_P1 "cpmulua1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulua1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01101   cpmulla1u.w crqp,crpp (p0_1)
+(dni cpmulla1u_w_P1 "cpmulla1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulla1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01110   cpmulua1.w crqp,crpp (p0_1)
+(dni cpmulua1_w_P1 "cpmulua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01111   cpmulla1.w crqp,crpp (p0_1)
+(dni cpmulla1_w_P1 "cpmulla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10000   cpmada1u.b crqp,crpp (p0_1)
+(dni cpmada1u_b_P1 "cpmada1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmada1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10001   cpmada1.b crqp,crpp (p0_1)
+(dni cpmada1_b_P1 "cpmada1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmada1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10010   cpmadua1.h crqp,crpp (p0_1)
+(dni cpmadua1_h_P1 "cpmadua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10011   cpmadla1.h crqp,crpp (p0_1)
+(dni cpmadla1_h_P1 "cpmadla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10100   cpmadua1u.w crqp,crpp (p0_1)
+(dni cpmadua1u_w_P1 "cpmadua1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadua1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10101   cpmadla1u.w crqp,crpp (p0_1)
+(dni cpmadla1u_w_P1 "cpmadla1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadla1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10110   cpmadua1.w crqp,crpp (p0_1)
+(dni cpmadua1_w_P1 "cpmadua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10111   cpmadla1.w crqp,crpp (p0_1)
+(dni cpmadla1_w_P1 "cpmadla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11010   cpmsbua1.h crqp,crpp (p0_1)
+(dni cpmsbua1_h_P1 "cpmsbua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1a) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11011   cpmsbla1.h crqp,crpp (p0_1)
+(dni cpmsbla1_h_P1 "cpmsbla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11100   cpmsbua1u.w crqp,crpp (p0_1)
+(dni cpmsbua1u_w_P1 "cpmsbua1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbua1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11101   cpmsbla1u.w crqp,crpp (p0_1)
+(dni cpmsbla1u_w_P1 "cpmsbla1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbla1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11110   cpmsbua1.w crqp,crpp (p0_1)
+(dni cpmsbua1_w_P1 "cpmsbua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1e) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11111   cpmsbla1.w crqp,crpp (p0_1)
+(dni cpmsbla1_w_P1 "cpmsbla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1f) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10010   cpsmadua1.h crqp,crpp (p0_1)
+(dni cpsmadua1_h_P1 "cpsmadua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10011   cpsmadla1.h crqp,crpp (p0_1)
+(dni cpsmadla1_h_P1 "cpsmadla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10110   cpsmadua1.w crqp,crpp (p0_1)
+(dni cpsmadua1_w_P1 "cpsmadua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10111   cpsmadla1.w crqp,crpp (p0_1)
+(dni cpsmadla1_w_P1 "cpsmadla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11010   cpsmsbua1.h crqp,crpp (p0_1)
+(dni cpsmsbua1_h_P1 "cpsmsbua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1a) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11011   cpsmsbla1.h crqp,crpp (p0_1)
+(dni cpsmsbla1_h_P1 "cpsmsbla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11110   cpsmsbua1.w crqp,crpp (p0_1)
+(dni cpsmsbua1_w_P1 "cpsmsbua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1e) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11111   cpsmsbla1.w crqp,crpp (p0_1)
+(dni cpsmsbla1_w_P1 "cpsmsbla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1f) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01010   cpmulslua1.h crqp,crpp (p0_1)
+(dni cpmulslua1_h_P1 "cpmulslua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01011   cpmulslla1.h crqp,crpp (p0_1)
+(dni cpmulslla1_h_P1 "cpmulslla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01110   cpmulslua1.w crqp,crpp (p0_1)
+(dni cpmulslua1_w_P1 "cpmulslua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01111   cpmulslla1.w crqp,crpp (p0_1)
+(dni cpmulslla1_w_P1 "cpmulslla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10010   cpsmadslua1.h crqp,crpp (p0_1)
+(dni cpsmadslua1_h_P1 "cpsmadslua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10011   cpsmadslla1.h crqp,crpp (p0_1)
+(dni cpsmadslla1_h_P1 "cpsmadslla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10110   cpsmadslua1.w crqp,crpp (p0_1)
+(dni cpsmadslua1_w_P1 "cpsmadslua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10111   cpsmadslla1.w crqp,crpp (p0_1)
+(dni cpsmadslla1_w_P1 "cpsmadslla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11010   cpsmsbslua1.h crqp,crpp (p0_1)
+(dni cpsmsbslua1_h_P1 "cpsmsbslua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1a) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11011   cpsmsbslla1.h crqp,crpp (p0_1)
+(dni cpsmsbslla1_h_P1 "cpsmsbslla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11110   cpsmsbslua1.w crqp,crpp (p0_1)
+(dni cpsmsbslua1_w_P1 "cpsmsbslua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1e) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11111   cpsmsbslla1.w crqp,crpp (p0_1)
+(dni cpsmsbslla1_w_P1 "cpsmsbslla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1f) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_w" pc crqp crpp) )
+  ()
+  )
+
diff --git a/gcc-4.9/gcc/config/mep/mep-pragma.c b/gcc-4.9/gcc/config/mep/mep-pragma.c
new file mode 100644
index 000000000..632e92da1
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-pragma.c
@@ -0,0 +1,404 @@
+/* Definitions of Toshiba Media Processor
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "diagnostic-core.h"
+#include "c-family/c-pragma.h"
+#include "cpplib.h"
+#include "hard-reg-set.h"
+#include "output.h" /* for decode_reg_name */
+#include "mep-protos.h"
+#include "function.h"
+#define MAX_RECOG_OPERANDS 10
+#include "reload.h"
+#include "target.h"
+
+enum cw_which { CW_AVAILABLE, CW_CALL_SAVED };
+
+/* This is normally provided by rtl.h but we can't include that file
+   here.  It's safe to copy the definition here because we're only
+   using it internally; the value isn't passed to functions outside
+   this file.  */
+#ifndef INVALID_REGNUM
+#define INVALID_REGNUM                    (~(unsigned int) 0)
+#endif
+
+static enum cpp_ttype
+mep_pragma_lex (tree *valp)
+{
+  enum cpp_ttype t = pragma_lex (valp);
+  if (t == CPP_EOF)
+    t = CPP_PRAGMA_EOL;
+  return t;
+}
+
+static void
+mep_pragma_io_volatile (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  /* On off.  */
+  tree val;
+  enum cpp_ttype type;
+  const char * str;
+
+  type = mep_pragma_lex (&val);
+  if (type == CPP_NAME)
+    {
+      str = IDENTIFIER_POINTER (val);
+
+      type = mep_pragma_lex (&val);
+      if (type != CPP_PRAGMA_EOL)
+	warning (0, "junk at end of #pragma io_volatile");
+
+      if (strcmp (str, "on") == 0)
+	{
+	  target_flags |= MASK_IO_VOLATILE;
+	  return;
+	}
+      if (strcmp (str, "off") == 0)
+	{
+	  target_flags &= ~ MASK_IO_VOLATILE;
+	  return;
+	}
+    }
+
+  error ("#pragma io_volatile takes only on or off");
+}
+
+static unsigned int
+parse_cr_reg (const char * str)
+{
+  unsigned int regno;
+
+  regno = decode_reg_name (str);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return INVALID_REGNUM;
+
+  /* Verify that the regno is in CR_REGS.  */
+  if (! TEST_HARD_REG_BIT (reg_class_contents[CR_REGS], regno))
+    return INVALID_REGNUM;
+  return regno;
+}
+
+static bool
+parse_cr_set (HARD_REG_SET * set)
+{
+  tree val;
+  enum cpp_ttype type;
+  unsigned int last_regno = INVALID_REGNUM;
+  bool do_range = false;
+
+  CLEAR_HARD_REG_SET (*set);
+
+  while ((type = mep_pragma_lex (&val)) != CPP_PRAGMA_EOL)
+    {
+      if (type == CPP_COMMA)
+	{
+	  last_regno = INVALID_REGNUM;
+	  do_range = false;
+	}
+      else if (type == CPP_ELLIPSIS)
+	{
+	  if (last_regno == INVALID_REGNUM)
+	    {
+	      error ("invalid coprocessor register range");
+	      return false;
+	    }
+	  do_range = true;
+	}
+      else if (type == CPP_NAME || type == CPP_STRING)
+	{
+	  const char *str;
+	  unsigned int regno, i;
+
+	  if (TREE_CODE (val) == IDENTIFIER_NODE)
+	    str = IDENTIFIER_POINTER (val);
+  	  else if (TREE_CODE (val) == STRING_CST)
+	    str = TREE_STRING_POINTER (val);
+	  else
+	    gcc_unreachable ();
+
+	  regno = parse_cr_reg (str);
+	  if (regno == INVALID_REGNUM)
+	    {
+	      error ("invalid coprocessor register %qE", val);
+	      return false;
+	    }
+
+	  if (do_range)
+	    {
+	      if (last_regno > regno)
+		i = regno, regno = last_regno;
+	      else
+		i = last_regno;
+	      do_range = false;
+	    }
+	  else
+	    last_regno = i = regno;
+
+	  while (i <= regno)
+	    {
+	      SET_HARD_REG_BIT (*set, i);
+	      i++;
+	    }
+	}
+      else
+	{
+	  error ("malformed coprocessor register");
+	  return false;
+	}
+    }
+  return true;
+}
+
+static void
+mep_pragma_coprocessor_which (enum cw_which cw_which)
+{
+  HARD_REG_SET set;
+
+  /* Process the balance of the pragma and turn it into a hard reg set.  */
+  if (! parse_cr_set (&set))
+    return;
+
+  /* Process the collected hard reg set.  */
+  switch (cw_which)
+    {
+    case CW_AVAILABLE:
+      {
+	int i;
+	for (i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
+	  if (TEST_HARD_REG_BIT (set, i))
+	    fixed_regs[i] = 0;
+      }
+      break;
+
+    case CW_CALL_SAVED:
+      {
+	int i;
+	for (i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
+	  if (TEST_HARD_REG_BIT (set, i))
+	    fixed_regs[i] = call_used_regs[i] = 0;
+      }
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Fix up register class hierarchy.  */
+  mep_save_register_info ();
+  mep_reinit_regs ();
+
+  if (cfun == 0)
+    {
+      init_dummy_function_start ();
+      init_caller_save ();
+      expand_dummy_function_end ();
+    }
+  else
+    {
+      init_caller_save ();
+    }
+}
+
+static void
+mep_pragma_coprocessor_width (void)
+{
+  tree val;
+  enum cpp_ttype type;
+  HOST_WIDE_INT i;
+
+  type = mep_pragma_lex (&val);
+  switch (type)
+    {
+    case CPP_NUMBER:
+      if (! tree_fits_uhwi_p (val))
+	break;
+      i = tree_to_uhwi (val);
+      /* This pragma no longer has any effect.  */
+#if 0
+      if (i == 32)
+	target_flags &= ~MASK_64BIT_CR_REGS;
+      else if (i == 64)
+	target_flags |= MASK_64BIT_CR_REGS;
+      else
+	break;
+      targetm.init_builtins ();
+#else
+      if (i != 32 && i != 64)
+	break;
+#endif
+
+      type = mep_pragma_lex (&val);
+      if (type != CPP_PRAGMA_EOL)
+	warning (0, "junk at end of #pragma GCC coprocessor width");
+      return;
+
+    default:
+      break;
+    }
+
+  error ("#pragma GCC coprocessor width takes only 32 or 64");
+}
+
+static void
+mep_pragma_coprocessor_subclass (void)
+{
+  tree val;
+  enum cpp_ttype type;
+  HARD_REG_SET set;
+  int class_letter;
+  enum reg_class rclass;
+
+  type = mep_pragma_lex (&val);
+  if (type != CPP_CHAR)
+    goto syntax_error;
+  class_letter = tree_to_uhwi (val);
+  if (class_letter >= 'A' && class_letter <= 'D')
+    switch (class_letter)
+      {
+      case 'A':
+	rclass = USER0_REGS;
+	break;
+      case 'B':
+	rclass = USER1_REGS;
+	break;
+      case 'C':
+	rclass = USER2_REGS;
+	break;
+      case 'D':
+	rclass = USER3_REGS;
+	break;
+      }
+  else
+    {
+      error ("#pragma GCC coprocessor subclass letter must be in [ABCD]");
+      return;
+    }
+  if (reg_class_size[rclass] > 0)
+    {
+      error ("#pragma GCC coprocessor subclass '%c' already defined",
+	     class_letter);
+      return;
+    }
+
+  type = mep_pragma_lex (&val);
+  if (type != CPP_EQ)
+    goto syntax_error;
+
+  if (! parse_cr_set (&set))
+    return;
+
+  /* Fix up register class hierarchy.  */
+  COPY_HARD_REG_SET (reg_class_contents[rclass], set);
+  mep_init_regs ();
+  return;
+
+ syntax_error:
+  error ("malformed #pragma GCC coprocessor subclass");
+}
+
+static void
+mep_pragma_disinterrupt (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  tree val;
+  enum cpp_ttype type;
+  int saw_one = 0;
+
+  for (;;)
+    {
+      type = mep_pragma_lex (&val);
+      if (type == CPP_COMMA)
+	continue;
+      if (type != CPP_NAME)
+	break;
+      mep_note_pragma_disinterrupt (IDENTIFIER_POINTER (val));
+      saw_one = 1;
+    }
+  if (!saw_one || type != CPP_PRAGMA_EOL)
+    {
+      error ("malformed #pragma disinterrupt");
+      return;
+    }
+}
+
+static void
+mep_pragma_coprocessor (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  tree val;
+  enum cpp_ttype type;
+
+  type = mep_pragma_lex (&val);
+  if (type != CPP_NAME)
+    {
+      error ("malformed #pragma GCC coprocessor");
+      return;
+    }
+
+  if (!TARGET_COP)
+    error ("coprocessor not enabled");
+
+  if (strcmp (IDENTIFIER_POINTER (val), "available") == 0)
+    mep_pragma_coprocessor_which (CW_AVAILABLE);
+  else if (strcmp (IDENTIFIER_POINTER (val), "call_saved") == 0)
+    mep_pragma_coprocessor_which (CW_CALL_SAVED);
+  else if (strcmp (IDENTIFIER_POINTER (val), "width") == 0)
+    mep_pragma_coprocessor_width ();
+  else if (strcmp (IDENTIFIER_POINTER (val), "subclass") == 0)
+    mep_pragma_coprocessor_subclass ();
+  else
+    error ("unknown #pragma GCC coprocessor %E", val);
+}
+
+static void
+mep_pragma_call (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  tree val;
+  enum cpp_ttype type;
+  int saw_one = 0;
+
+  for (;;)
+    {
+      type = mep_pragma_lex (&val);
+      if (type == CPP_COMMA)
+	continue;
+      if (type != CPP_NAME)
+	break;
+      mep_note_pragma_call (IDENTIFIER_POINTER (val));
+      saw_one = 1;
+    }
+  if (!saw_one || type != CPP_PRAGMA_EOL)
+    {
+      error ("malformed #pragma call");
+      return;
+    }
+}
+
+void
+mep_register_pragmas (void)
+{
+  c_register_pragma ("custom", "io_volatile", mep_pragma_io_volatile);
+  c_register_pragma ("GCC", "coprocessor", mep_pragma_coprocessor);
+  c_register_pragma (0, "disinterrupt", mep_pragma_disinterrupt);
+  c_register_pragma (0, "call", mep_pragma_call);
+}
diff --git a/gcc-4.9/gcc/config/mep/mep-protos.h b/gcc-4.9/gcc/config/mep/mep-protos.h
new file mode 100644
index 000000000..c4b74e549
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep-protos.h
@@ -0,0 +1,128 @@
+/* Prototypes for exported functions defined in mep.c
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat Inc (dj@redhat.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern int mep_regno_reg_class (int);
+extern rtx mep_mulr_source (rtx, rtx, rtx, rtx);
+extern bool mep_reuse_lo_p (rtx, rtx, rtx, bool);
+extern bool mep_use_post_modify_p (rtx, rtx, rtx);
+extern bool mep_allow_clip (rtx, rtx, int);
+extern bool mep_bit_position_p (rtx, bool);
+extern bool mep_split_mov (rtx *, int);
+extern bool mep_vliw_mode_match (rtx);
+extern bool mep_vliw_jmp_match (rtx);
+extern bool mep_multi_slot (rtx);
+extern bool mep_legitimate_address (enum machine_mode, rtx, int);
+extern int mep_legitimize_address (rtx *, rtx, enum machine_mode);
+extern int mep_legitimize_reload_address (rtx *, enum machine_mode, int, /*enum reload_type*/ int, int);
+extern int mep_core_address_length (rtx, int);
+extern int mep_cop_address_length (rtx, int);
+extern bool mep_expand_mov (rtx *, enum machine_mode);
+extern bool mep_mov_ok (rtx *, enum machine_mode);
+extern void mep_split_wide_move (rtx *, enum machine_mode);
+#ifdef RTX_CODE
+extern bool mep_expand_setcc (rtx *);
+extern rtx mep_expand_cbranch (rtx *);
+#endif
+extern const char *mep_emit_cbranch (rtx *, int);
+extern void mep_expand_call (rtx *, int);
+extern rtx mep_find_base_term (rtx);
+extern enum reg_class mep_secondary_input_reload_class (enum reg_class, enum machine_mode, rtx);
+extern enum reg_class mep_secondary_output_reload_class (enum reg_class, enum machine_mode, rtx);
+extern bool mep_secondary_memory_needed (enum reg_class, enum reg_class,
+					 enum machine_mode);
+extern void mep_expand_reload (rtx *, enum machine_mode);
+extern enum reg_class mep_preferred_reload_class (rtx, enum reg_class);
+extern int mep_register_move_cost (enum machine_mode, enum reg_class, enum reg_class);
+extern void mep_init_expanders (void);
+extern rtx mep_return_addr_rtx (int);
+extern bool mep_epilogue_uses (int);
+extern int mep_elimination_offset (int, int);
+extern void mep_expand_prologue (void);
+extern void mep_expand_epilogue (void);
+extern void mep_expand_eh_return (rtx *);
+extern void mep_emit_eh_epilogue (rtx *);
+extern void mep_expand_sibcall_epilogue (void);
+extern rtx mep_return_stackadj_rtx (void);
+extern rtx mep_return_handler_rtx (void);
+extern void mep_function_profiler (FILE *);
+extern const char *mep_emit_bb_trace_ret (void);
+extern void mep_print_operand_address (FILE *, rtx);
+extern void mep_print_operand (FILE *, rtx, int);
+extern void mep_final_prescan_insn (rtx, rtx *, int);
+extern void mep_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+extern bool mep_return_in_memory (const_tree, const_tree);
+extern rtx mep_function_value (const_tree, const_tree);
+extern rtx mep_libcall_value (enum machine_mode);
+extern void mep_asm_output_opcode (FILE *, const char *);
+extern void mep_note_pragma_disinterrupt (const char *);
+extern void mep_note_pragma_call (const char *);
+extern void mep_file_cleanups (void);
+extern const char *mep_strip_name_encoding (const char *);
+extern void mep_output_aligned_common (FILE *, tree, const char *,
+				       int, int, int);
+extern void mep_emit_doloop (rtx *, int);
+extern bool mep_vliw_function_p (tree);
+extern bool mep_store_data_bypass_p (rtx, rtx);
+extern bool mep_mul_hilo_bypass_p (rtx, rtx);
+extern bool mep_ipipe_ldc_p (rtx);
+extern bool mep_emit_intrinsic (int, const rtx *);
+extern bool mep_expand_unary_intrinsic (int, rtx *);
+extern bool mep_expand_binary_intrinsic (int, int, int, int, rtx *);
+extern int mep_intrinsic_length (int);
+
+extern void mep_register_pragmas (void);
+extern int mep_section_tag (rtx);
+extern bool mep_lookup_pragma_call (const char *);
+extern bool mep_have_core_copro_moves_p;
+extern bool mep_have_copro_copro_moves_p;
+
+extern bool mep_cannot_change_mode_class (enum machine_mode, enum machine_mode,
+					  enum reg_class);
+
+/* These are called from mep-pragmas (front end) and then call into
+   the RTL layer to re-initialize the register tables once we're done
+   changing them via pragmas.  */
+extern void mep_save_register_info (void);
+extern void mep_reinit_regs (void);
+extern void mep_init_regs (void);
+
+
+extern int cgen_h_uint_6a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_7a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_8a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_6a2_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_22a4_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_2a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_24a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_6a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_5a4_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_2a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_16a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_3a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_5a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_16a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_8a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_7a2_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_6a4_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_5a8_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_4a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_10a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_12a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_20a1_immediate (rtx, enum machine_mode);
diff --git a/gcc-4.9/gcc/config/mep/mep.c b/gcc-4.9/gcc/config/mep/mep.c
new file mode 100644
index 000000000..858136b98
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep.c
@@ -0,0 +1,7303 @@
+/* Definitions for Toshiba Media Processor
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "except.h"
+#include "function.h"
+#include "optabs.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "diagnostic-core.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "opts.h"
+#include "dumpfile.h"
+
+/* Structure of this file:
+
+ + Command Line Option Support
+ + Pattern support - constraints, predicates, expanders
+ + Reload Support
+ + Costs
+ + Functions to save and restore machine-specific function data.
+ + Frame/Epilog/Prolog Related
+ + Operand Printing
+ + Function args in registers
+ + Handle pipeline hazards
+ + Handle attributes
+ + Trampolines
+ + Machine-dependent Reorg
+ + Builtins.  */
+
+/* Symbol encodings:
+
+   Symbols are encoded as @ <char> . <name> where <char> is one of these:
+
+   b - based
+   t - tiny
+   n - near
+   f - far
+   i - io, near
+   I - io, far
+   c - cb (control bus)  */
+
+struct GTY(()) machine_function
+{
+  int mep_frame_pointer_needed;
+  
+  /* For varargs. */
+  int arg_regs_to_save;
+  int regsave_filler;
+  int frame_filler;
+  int frame_locked;
+  
+  /* Records __builtin_return address.  */
+  rtx eh_stack_adjust;
+  
+  int reg_save_size;
+  int reg_save_slot[FIRST_PSEUDO_REGISTER];
+  unsigned char reg_saved[FIRST_PSEUDO_REGISTER];
+  
+  /* 2 if the current function has an interrupt attribute, 1 if not, 0
+     if unknown.  This is here because resource.c uses EPILOGUE_USES
+     which needs it.  */
+  int interrupt_handler;
+  
+  /* Likewise, for disinterrupt attribute.  */
+  int disable_interrupts;
+
+  /* Number of doloop tags used so far.  */
+  int doloop_tags;
+
+  /* True if the last tag was allocated to a doloop_end.  */
+  bool doloop_tag_from_end;
+
+  /* True if reload changes $TP.  */
+  bool reload_changes_tp;
+
+  /* 2 if there are asm()s without operands, 1 if not, 0 if unknown.
+     We only set this if the function is an interrupt handler.  */
+  int asms_without_operands;
+};
+
+#define MEP_CONTROL_REG(x) \
+  (GET_CODE (x) == REG && ANY_CONTROL_REGNO_P (REGNO (x)))
+
+static GTY(()) section * based_section;
+static GTY(()) section * tinybss_section;
+static GTY(()) section * far_section;
+static GTY(()) section * farbss_section;
+static GTY(()) section * frodata_section;
+static GTY(()) section * srodata_section;
+
+static GTY(()) section * vtext_section;
+static GTY(()) section * vftext_section;
+static GTY(()) section * ftext_section;
+
+static void mep_set_leaf_registers (int);
+static bool symbol_p (rtx);
+static bool symbolref_p (rtx);
+static void encode_pattern_1 (rtx);
+static void encode_pattern (rtx);
+static bool const_in_range (rtx, int, int);
+static void mep_rewrite_mult (rtx, rtx);
+static void mep_rewrite_mulsi3 (rtx, rtx, rtx, rtx);
+static void mep_rewrite_maddsi3 (rtx, rtx, rtx, rtx, rtx);
+static bool mep_reuse_lo_p_1 (rtx, rtx, rtx, bool);
+static bool move_needs_splitting (rtx, rtx, enum machine_mode);
+static bool mep_expand_setcc_1 (enum rtx_code, rtx, rtx, rtx);
+static bool mep_nongeneral_reg (rtx);
+static bool mep_general_copro_reg (rtx);
+static bool mep_nonregister (rtx);
+static struct machine_function* mep_init_machine_status (void);
+static rtx mep_tp_rtx (void);
+static rtx mep_gp_rtx (void);
+static bool mep_interrupt_p (void);
+static bool mep_disinterrupt_p (void);
+static bool mep_reg_set_p (rtx, rtx);
+static bool mep_reg_set_in_function (int);
+static bool mep_interrupt_saved_reg (int);
+static bool mep_call_saves_register (int);
+static rtx F (rtx);
+static void add_constant (int, int, int, int);
+static rtx maybe_dead_move (rtx, rtx, bool);
+static void mep_reload_pointer (int, const char *);
+static void mep_start_function (FILE *, HOST_WIDE_INT);
+static bool mep_function_ok_for_sibcall (tree, tree);
+static int unique_bit_in (HOST_WIDE_INT);
+static int bit_size_for_clip (HOST_WIDE_INT);
+static int bytesize (const_tree, enum machine_mode);
+static tree mep_validate_based_tiny (tree *, tree, tree, int, bool *);
+static tree mep_validate_near_far (tree *, tree, tree, int, bool *);
+static tree mep_validate_disinterrupt (tree *, tree, tree, int, bool *);
+static tree mep_validate_interrupt (tree *, tree, tree, int, bool *);
+static tree mep_validate_io_cb (tree *, tree, tree, int, bool *);
+static tree mep_validate_vliw (tree *, tree, tree, int, bool *);
+static bool mep_function_attribute_inlinable_p (const_tree);
+static bool mep_can_inline_p (tree, tree);
+static bool mep_lookup_pragma_disinterrupt (const char *);
+static int mep_multiple_address_regions (tree, bool);
+static int mep_attrlist_to_encoding (tree, tree);
+static void mep_insert_attributes (tree, tree *);
+static void mep_encode_section_info (tree, rtx, int);
+static section * mep_select_section (tree, int, unsigned HOST_WIDE_INT);
+static void mep_unique_section (tree, int);
+static unsigned int mep_section_type_flags (tree, const char *, int);
+static void mep_asm_named_section (const char *, unsigned int, tree);
+static bool mep_mentioned_p (rtx, rtx, int);
+static void mep_reorg_regmove (rtx);
+static rtx mep_insert_repeat_label_last (rtx, rtx, bool, bool);
+static void mep_reorg_repeat (rtx);
+static bool mep_invertable_branch_p (rtx);
+static void mep_invert_branch (rtx, rtx);
+static void mep_reorg_erepeat (rtx);
+static void mep_jmp_return_reorg (rtx);
+static void mep_reorg_addcombine (rtx);
+static void mep_reorg (void);
+static void mep_init_intrinsics (void);
+static void mep_init_builtins (void);
+static void mep_intrinsic_unavailable (int);
+static bool mep_get_intrinsic_insn (int, const struct cgen_insn **);
+static bool mep_get_move_insn (int, const struct cgen_insn **);
+static rtx mep_convert_arg (enum machine_mode, rtx);
+static rtx mep_convert_regnum (const struct cgen_regnum_operand *, rtx);
+static rtx mep_legitimize_arg (const struct insn_operand_data *, rtx, int);
+static void mep_incompatible_arg (const struct insn_operand_data *, rtx, int, tree);
+static rtx mep_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static int mep_adjust_cost (rtx, rtx, rtx, int);
+static int mep_issue_rate (void);
+static rtx mep_find_ready_insn (rtx *, int, enum attr_slot, int);
+static void mep_move_ready_insn (rtx *, int, rtx);
+static int mep_sched_reorder (FILE *, int, rtx *, int *, int);
+static rtx mep_make_bundle (rtx, rtx);
+static void mep_bundle_insns (rtx);
+static bool mep_rtx_cost (rtx, int, int, int, int *, bool);
+static int mep_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static void mep_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+					tree, int *, int);
+static bool mep_pass_by_reference (cumulative_args_t cum, enum machine_mode,
+				   const_tree, bool);
+static rtx mep_function_arg (cumulative_args_t, enum machine_mode,
+			     const_tree, bool);
+static void mep_function_arg_advance (cumulative_args_t, enum machine_mode,
+				      const_tree, bool);
+static bool mep_vector_mode_supported_p (enum machine_mode);
+static rtx  mep_allocate_initial_value (rtx);
+static void mep_asm_init_sections (void);
+static int mep_comp_type_attributes (const_tree, const_tree);
+static bool mep_narrow_volatile_bitfield (void);
+static rtx mep_expand_builtin_saveregs (void);
+static tree mep_build_builtin_va_list (void);
+static void mep_expand_va_start (tree, rtx);
+static tree mep_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool mep_can_eliminate (const int, const int);
+static void mep_conditional_register_usage (void);
+static void mep_trampoline_init (rtx, tree, rtx);
+
+#define WANT_GCC_DEFINITIONS
+#include "mep-intrin.h"
+#undef WANT_GCC_DEFINITIONS
+
+
+/* Command Line Option Support.  */
+
+char mep_leaf_registers [FIRST_PSEUDO_REGISTER];
+
+/* True if we can use cmov instructions to move values back and forth
+   between core and coprocessor registers.  */
+bool mep_have_core_copro_moves_p;
+
+/* True if we can use cmov instructions (or a work-alike) to move
+   values between coprocessor registers.  */
+bool mep_have_copro_copro_moves_p;
+
+/* A table of all coprocessor instructions that can act like
+   a coprocessor-to-coprocessor cmov.  */
+static const int mep_cmov_insns[] = {
+  mep_cmov,
+  mep_cpmov,
+  mep_fmovs,
+  mep_caddi3,
+  mep_csubi3,
+  mep_candi3,
+  mep_cori3,
+  mep_cxori3,
+  mep_cand3,
+  mep_cor3
+};
+
+
+static void
+mep_set_leaf_registers (int enable)
+{
+  int i;
+
+  if (mep_leaf_registers[0] != enable)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      mep_leaf_registers[i] = enable;
+}
+
+static void
+mep_conditional_register_usage (void)
+{
+  int i;
+
+  if (!TARGET_OPT_MULT && !TARGET_OPT_DIV)
+    {
+      fixed_regs[HI_REGNO] = 1;
+      fixed_regs[LO_REGNO] = 1;
+      call_used_regs[HI_REGNO] = 1;
+      call_used_regs[LO_REGNO] = 1;
+    }
+
+  for (i = FIRST_SHADOW_REGISTER; i <= LAST_SHADOW_REGISTER; i++)
+    global_regs[i] = 1;
+}
+
+static void
+mep_option_override (void)
+{
+  unsigned int i;
+  int j;
+  cl_deferred_option *opt;
+  vec<cl_deferred_option> *v = (vec<cl_deferred_option> *) mep_deferred_options;
+
+  if (v)
+    FOR_EACH_VEC_ELT (*v, i, opt)
+      {
+	switch (opt->opt_index)
+	  {
+	  case OPT_mivc2:
+	    for (j = 0; j < 32; j++)
+	      fixed_regs[j + 48] = 0;
+	    for (j = 0; j < 32; j++)
+	      call_used_regs[j + 48] = 1;
+	    for (j = 6; j < 8; j++)
+	      call_used_regs[j + 48] = 0;
+
+#define RN(n,s) reg_names[FIRST_CCR_REGNO + n] = s
+	    RN (0, "$csar0");
+	    RN (1, "$cc");
+	    RN (4, "$cofr0");
+	    RN (5, "$cofr1");
+	    RN (6, "$cofa0");
+	    RN (7, "$cofa1");
+	    RN (15, "$csar1");
+
+	    RN (16, "$acc0_0");
+	    RN (17, "$acc0_1");
+	    RN (18, "$acc0_2");
+	    RN (19, "$acc0_3");
+	    RN (20, "$acc0_4");
+	    RN (21, "$acc0_5");
+	    RN (22, "$acc0_6");
+	    RN (23, "$acc0_7");
+
+	    RN (24, "$acc1_0");
+	    RN (25, "$acc1_1");
+	    RN (26, "$acc1_2");
+	    RN (27, "$acc1_3");
+	    RN (28, "$acc1_4");
+	    RN (29, "$acc1_5");
+	    RN (30, "$acc1_6");
+	    RN (31, "$acc1_7");
+#undef RN
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+  if (flag_pic == 1)
+    warning (OPT_fpic, "-fpic is not supported");
+  if (flag_pic == 2)
+    warning (OPT_fPIC, "-fPIC is not supported");
+  if (TARGET_S && TARGET_M)
+    error ("only one of -ms and -mm may be given");
+  if (TARGET_S && TARGET_L)
+    error ("only one of -ms and -ml may be given");
+  if (TARGET_M && TARGET_L)
+    error ("only one of -mm and -ml may be given");
+  if (TARGET_S && global_options_set.x_mep_tiny_cutoff)
+    error ("only one of -ms and -mtiny= may be given");
+  if (TARGET_M && global_options_set.x_mep_tiny_cutoff)
+    error ("only one of -mm and -mtiny= may be given");
+  if (TARGET_OPT_CLIP && ! TARGET_OPT_MINMAX)
+    warning (0, "-mclip currently has no effect without -mminmax");
+
+  if (mep_const_section)
+    {
+      if (strcmp (mep_const_section, "tiny") != 0
+	  && strcmp (mep_const_section, "near") != 0
+	  && strcmp (mep_const_section, "far") != 0)
+	error ("-mc= must be -mc=tiny, -mc=near, or -mc=far");
+    }
+
+  if (TARGET_S)
+    mep_tiny_cutoff = 65536;
+  if (TARGET_M)
+    mep_tiny_cutoff = 0;
+  if (TARGET_L && ! global_options_set.x_mep_tiny_cutoff)
+    mep_tiny_cutoff = 0;
+
+  if (TARGET_64BIT_CR_REGS)
+    flag_split_wide_types = 0;
+
+  init_machine_status = mep_init_machine_status;
+  mep_init_intrinsics ();
+}
+  
+/* Pattern Support - constraints, predicates, expanders.  */
+
+/* MEP has very few instructions that can refer to the span of
+   addresses used by symbols, so it's common to check for them.  */
+
+static bool
+symbol_p (rtx x)
+{
+  int c = GET_CODE (x);
+
+  return (c == CONST_INT
+	  || c == CONST
+	  || c == SYMBOL_REF);
+}
+
+static bool
+symbolref_p (rtx x)
+{
+  int c;
+
+  if (GET_CODE (x) != MEM)
+    return false;
+
+  c = GET_CODE (XEXP (x, 0));
+  return (c == CONST_INT
+	  || c == CONST
+	  || c == SYMBOL_REF);
+}
+
+/* static const char *reg_class_names[] = REG_CLASS_NAMES; */
+
+#define GEN_REG(R, STRICT)				\
+  (GR_REGNO_P (R)					\
+   || (!STRICT						\
+       && ((R) == ARG_POINTER_REGNUM			\
+	   || (R) >= FIRST_PSEUDO_REGISTER)))
+
+static char pattern[12], *patternp;
+static GTY(()) rtx patternr[12];
+#define RTX_IS(x) (strcmp (pattern, x) == 0)
+
+static void
+encode_pattern_1 (rtx x)
+{
+  int i;
+
+  if (patternp == pattern + sizeof (pattern) - 2)
+    {
+      patternp[-1] = '?';
+      return;
+    }
+
+  patternr[patternp-pattern] = x;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      *patternp++ = 'r';
+      break;
+    case MEM:
+      *patternp++ = 'm';
+    case CONST:
+      encode_pattern_1 (XEXP(x, 0));
+      break;
+    case PLUS:
+      *patternp++ = '+';
+      encode_pattern_1 (XEXP(x, 0));
+      encode_pattern_1 (XEXP(x, 1));
+      break;
+    case LO_SUM:
+      *patternp++ = 'L';
+      encode_pattern_1 (XEXP(x, 0));
+      encode_pattern_1 (XEXP(x, 1));
+      break;
+    case HIGH:
+      *patternp++ = 'H';
+      encode_pattern_1 (XEXP(x, 0));
+      break;
+    case SYMBOL_REF:
+      *patternp++ = 's';
+      break;
+    case LABEL_REF:
+      *patternp++ = 'l';
+      break;
+    case CONST_INT:
+    case CONST_DOUBLE:
+      *patternp++ = 'i';
+      break;
+    case UNSPEC:
+      *patternp++ = 'u';
+      *patternp++ = '0' + XCINT(x, 1, UNSPEC);
+      for (i=0; i<XVECLEN (x, 0); i++)
+	encode_pattern_1 (XVECEXP (x, 0, i));
+      break;
+    case USE:
+      *patternp++ = 'U';
+      break;
+    default:
+      *patternp++ = '?';
+#if 0
+      fprintf (stderr, "can't encode pattern %s\n", GET_RTX_NAME(GET_CODE(x)));
+      debug_rtx (x);
+      gcc_unreachable ();
+#endif
+      break;
+    }      
+}
+
+static void
+encode_pattern (rtx x)
+{
+  patternp = pattern;
+  encode_pattern_1 (x);
+  *patternp = 0;
+}
+
+int
+mep_section_tag (rtx x)
+{
+  const char *name;
+
+  while (1)
+    {
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	case CONST:
+	  x = XEXP (x, 0);
+	  break;
+	case UNSPEC:
+	  x = XVECEXP (x, 0, 0);
+	  break;
+	case PLUS:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    return 0;
+	  x = XEXP (x, 0);
+	  break;
+	default:
+	  goto done;
+	}
+    }
+ done:
+  if (GET_CODE (x) != SYMBOL_REF)
+    return 0;
+  name = XSTR (x, 0);
+  if (name[0] == '@' && name[2] == '.')
+    {
+      if (name[1] == 'i' || name[1] == 'I')
+	{
+	  if (name[1] == 'I')
+	    return 'f'; /* near */
+	  return 'n'; /* far */
+	}
+      return name[1];
+    }
+  return 0;
+}
+
+int
+mep_regno_reg_class (int regno)
+{
+  switch (regno)
+    {
+    case SP_REGNO:		return SP_REGS;
+    case TP_REGNO:		return TP_REGS;
+    case GP_REGNO:		return GP_REGS;
+    case 0: 			return R0_REGS;
+    case HI_REGNO:		return HI_REGS;
+    case LO_REGNO:		return LO_REGS;
+    case ARG_POINTER_REGNUM:	return GENERAL_REGS;
+    }
+
+  if (GR_REGNO_P (regno))
+    return regno < FIRST_GR_REGNO + 8 ? TPREL_REGS : GENERAL_REGS;
+  if (CONTROL_REGNO_P (regno))
+    return CONTROL_REGS;
+
+  if (CR_REGNO_P (regno))
+    {
+      int i, j;
+
+      /* Search for the register amongst user-defined subclasses of
+	 the coprocessor registers.  */
+      for (i = USER0_REGS; i <= USER3_REGS; ++i)
+	{
+	  if (! TEST_HARD_REG_BIT (reg_class_contents[i], regno))
+	    continue;
+	  for (j = 0; j < N_REG_CLASSES; ++j)
+	    {
+	      enum reg_class sub = reg_class_subclasses[i][j];
+
+	      if (sub == LIM_REG_CLASSES)
+		return i;
+	      if (TEST_HARD_REG_BIT (reg_class_contents[sub], regno))
+		break;
+	    }
+	}
+
+      return LOADABLE_CR_REGNO_P (regno) ? LOADABLE_CR_REGS : CR_REGS;
+    }
+
+  if (CCR_REGNO_P (regno))
+    return CCR_REGS;
+
+  gcc_assert (regno >= FIRST_SHADOW_REGISTER && regno <= LAST_SHADOW_REGISTER);
+  return NO_REGS;
+}
+
+static bool
+const_in_range (rtx x, int minv, int maxv)
+{
+  return (GET_CODE (x) == CONST_INT
+	  && INTVAL (x) >= minv
+	  && INTVAL (x) <= maxv);
+}
+
+/* Given three integer registers DEST, SRC1 and SRC2, return an rtx X
+   such that "mulr DEST,X" will calculate DEST = SRC1 * SRC2.  If a move
+   is needed, emit it before INSN if INSN is nonnull, otherwise emit it
+   at the end of the insn stream.  */
+
+rtx
+mep_mulr_source (rtx insn, rtx dest, rtx src1, rtx src2)
+{
+  if (rtx_equal_p (dest, src1))
+    return src2;
+  else if (rtx_equal_p (dest, src2))
+    return src1;
+  else
+    {
+      if (insn == 0)
+	emit_insn (gen_movsi (copy_rtx (dest), src1));
+      else
+	emit_insn_before (gen_movsi (copy_rtx (dest), src1), insn);
+      return src2;
+    }
+}
+
+/* Replace INSN's pattern with PATTERN, a multiplication PARALLEL.
+   Change the last element of PATTERN from (clobber (scratch:SI))
+   to (clobber (reg:SI HI_REGNO)).  */
+
+static void
+mep_rewrite_mult (rtx insn, rtx pattern)
+{
+  rtx hi_clobber;
+
+  hi_clobber = XVECEXP (pattern, 0, XVECLEN (pattern, 0) - 1);
+  XEXP (hi_clobber, 0) = gen_rtx_REG (SImode, HI_REGNO);
+  PATTERN (insn) = pattern;
+  INSN_CODE (insn) = -1;
+}
+
+/* Subroutine of mep_reuse_lo_p.  Rewrite instruction INSN so that it
+   calculates SRC1 * SRC2 and stores the result in $lo.  Also make it
+   store the result in DEST if nonnull.  */
+
+static void
+mep_rewrite_mulsi3 (rtx insn, rtx dest, rtx src1, rtx src2)
+{
+  rtx lo, pattern;
+
+  lo = gen_rtx_REG (SImode, LO_REGNO);
+  if (dest)
+    pattern = gen_mulsi3r (lo, dest, copy_rtx (dest),
+			   mep_mulr_source (insn, dest, src1, src2));
+  else
+    pattern = gen_mulsi3_lo (lo, src1, src2);
+  mep_rewrite_mult (insn, pattern);
+}
+
+/* Like mep_rewrite_mulsi3, but calculate SRC1 * SRC2 + SRC3.  First copy
+   SRC3 into $lo, then use either madd or maddr.  The move into $lo will
+   be deleted by a peephole2 if SRC3 is already in $lo.  */
+
+static void
+mep_rewrite_maddsi3 (rtx insn, rtx dest, rtx src1, rtx src2, rtx src3)
+{
+  rtx lo, pattern;
+
+  lo = gen_rtx_REG (SImode, LO_REGNO);
+  emit_insn_before (gen_movsi (copy_rtx (lo), src3), insn);
+  if (dest)
+    pattern = gen_maddsi3r (lo, dest, copy_rtx (dest),
+			    mep_mulr_source (insn, dest, src1, src2),
+			    copy_rtx (lo));
+  else
+    pattern = gen_maddsi3_lo (lo, src1, src2, copy_rtx (lo));
+  mep_rewrite_mult (insn, pattern);
+}
+
+/* Return true if $lo has the same value as integer register GPR when
+   instruction INSN is reached.  If necessary, rewrite the instruction
+   that sets $lo so that it uses a proper SET, not a CLOBBER.  LO is an
+   rtx for (reg:SI LO_REGNO).
+
+   This function is intended to be used by the peephole2 pass.  Since
+   that pass goes from the end of a basic block to the beginning, and
+   propagates liveness information on the way, there is no need to
+   update register notes here.
+
+   If GPR_DEAD_P is true on entry, and this function returns true,
+   then the caller will replace _every_ use of GPR in and after INSN
+   with LO.  This means that if the instruction that sets $lo is a
+   mulr- or maddr-type instruction, we can rewrite it to use mul or
+   madd instead.  In combination with the copy progagation pass,
+   this allows us to replace sequences like:
+
+	mov GPR,R1
+	mulr GPR,R2
+
+   with:
+
+	mul R1,R2
+
+   if GPR is no longer used.  */
+
+static bool
+mep_reuse_lo_p_1 (rtx lo, rtx gpr, rtx insn, bool gpr_dead_p)
+{
+  do
+    {
+      insn = PREV_INSN (insn);
+      if (INSN_P (insn))
+	switch (recog_memoized (insn))
+	  {
+	  case CODE_FOR_mulsi3_1:
+	    extract_insn (insn);
+	    if (rtx_equal_p (recog_data.operand[0], gpr))
+	      {
+		mep_rewrite_mulsi3 (insn,
+				    gpr_dead_p ? NULL : recog_data.operand[0],
+				    recog_data.operand[1],
+				    recog_data.operand[2]);
+		return true;
+	      }
+	    return false;
+
+	  case CODE_FOR_maddsi3:
+	    extract_insn (insn);
+	    if (rtx_equal_p (recog_data.operand[0], gpr))
+	      {
+		mep_rewrite_maddsi3 (insn,
+				     gpr_dead_p ? NULL : recog_data.operand[0],
+				     recog_data.operand[1],
+				     recog_data.operand[2],
+				     recog_data.operand[3]);
+		return true;
+	      }
+	    return false;
+
+	  case CODE_FOR_mulsi3r:
+	  case CODE_FOR_maddsi3r:
+	    extract_insn (insn);
+	    return rtx_equal_p (recog_data.operand[1], gpr);
+
+	  default:
+	    if (reg_set_p (lo, insn)
+		|| reg_set_p (gpr, insn)
+		|| volatile_insn_p (PATTERN (insn)))
+	      return false;
+
+	    if (gpr_dead_p && reg_referenced_p (gpr, PATTERN (insn)))
+	      gpr_dead_p = false;
+	    break;
+	  }
+    }
+  while (!NOTE_INSN_BASIC_BLOCK_P (insn));
+  return false;
+}
+
+/* A wrapper around mep_reuse_lo_p_1 that preserves recog_data.  */
+
+bool
+mep_reuse_lo_p (rtx lo, rtx gpr, rtx insn, bool gpr_dead_p)
+{
+  bool result = mep_reuse_lo_p_1 (lo, gpr, insn, gpr_dead_p);
+  extract_insn (insn);
+  return result;
+}
+
+/* Return true if SET can be turned into a post-modify load or store
+   that adds OFFSET to GPR.  In other words, return true if SET can be
+   changed into:
+
+       (parallel [SET (set GPR (plus:SI GPR OFFSET))]).
+
+   It's OK to change SET to an equivalent operation in order to
+   make it match.  */
+
+static bool
+mep_use_post_modify_for_set_p (rtx set, rtx gpr, rtx offset)
+{
+  rtx *reg, *mem;
+  unsigned int reg_bytes, mem_bytes;
+  enum machine_mode reg_mode, mem_mode;
+
+  /* Only simple SETs can be converted.  */
+  if (GET_CODE (set) != SET)
+    return false;
+
+  /* Point REG to what we hope will be the register side of the set and
+     MEM to what we hope will be the memory side.  */
+  if (GET_CODE (SET_DEST (set)) == MEM)
+    {
+      mem = &SET_DEST (set);
+      reg = &SET_SRC (set);
+    }
+  else
+    {
+      reg = &SET_DEST (set);
+      mem = &SET_SRC (set);
+      if (GET_CODE (*mem) == SIGN_EXTEND)
+	mem = &XEXP (*mem, 0);
+    }
+
+  /* Check that *REG is a suitable coprocessor register.  */
+  if (GET_CODE (*reg) != REG || !LOADABLE_CR_REGNO_P (REGNO (*reg)))
+    return false;
+
+  /* Check that *MEM is a suitable memory reference.  */
+  if (GET_CODE (*mem) != MEM || !rtx_equal_p (XEXP (*mem, 0), gpr))
+    return false;
+
+  /* Get the number of bytes in each operand.  */
+  mem_bytes = GET_MODE_SIZE (GET_MODE (*mem));
+  reg_bytes = GET_MODE_SIZE (GET_MODE (*reg));
+
+  /* Check that OFFSET is suitably aligned.  */
+  if (INTVAL (offset) & (mem_bytes - 1))
+    return false;
+
+  /* Convert *MEM to a normal integer mode.  */
+  mem_mode = mode_for_size (mem_bytes * BITS_PER_UNIT, MODE_INT, 0);
+  *mem = change_address (*mem, mem_mode, NULL);
+
+  /* Adjust *REG as well.  */
+  *reg = shallow_copy_rtx (*reg);
+  if (reg == &SET_DEST (set) && reg_bytes < UNITS_PER_WORD)
+    {
+      /* SET is a subword load.  Convert it to an explicit extension.  */
+      PUT_MODE (*reg, SImode);
+      *mem = gen_rtx_SIGN_EXTEND (SImode, *mem);
+    }
+  else
+    {
+      reg_mode = mode_for_size (reg_bytes * BITS_PER_UNIT, MODE_INT, 0);
+      PUT_MODE (*reg, reg_mode);
+    }
+  return true;
+}
+
+/* Return the effect of frame-related instruction INSN.  */
+
+static rtx
+mep_frame_expr (rtx insn)
+{
+  rtx note, expr;
+
+  note = find_reg_note (insn, REG_FRAME_RELATED_EXPR, 0);
+  expr = (note != 0 ? XEXP (note, 0) : copy_rtx (PATTERN (insn)));
+  RTX_FRAME_RELATED_P (expr) = 1;
+  return expr;
+}
+
+/* Merge instructions INSN1 and INSN2 using a PARALLEL.  Store the
+   new pattern in INSN1; INSN2 will be deleted by the caller.  */
+
+static void
+mep_make_parallel (rtx insn1, rtx insn2)
+{
+  rtx expr;
+
+  if (RTX_FRAME_RELATED_P (insn2))
+    {
+      expr = mep_frame_expr (insn2);
+      if (RTX_FRAME_RELATED_P (insn1))
+	expr = gen_rtx_SEQUENCE (VOIDmode,
+				 gen_rtvec (2, mep_frame_expr (insn1), expr));
+      set_unique_reg_note (insn1, REG_FRAME_RELATED_EXPR, expr);
+      RTX_FRAME_RELATED_P (insn1) = 1;
+    }
+
+  PATTERN (insn1) = gen_rtx_PARALLEL (VOIDmode,
+				      gen_rtvec (2, PATTERN (insn1),
+						 PATTERN (insn2)));
+  INSN_CODE (insn1) = -1;
+}
+
+/* SET_INSN is an instruction that adds OFFSET to REG.  Go back through
+   the basic block to see if any previous load or store instruction can
+   be persuaded to do SET_INSN as a side-effect.  Return true if so.  */
+
+static bool
+mep_use_post_modify_p_1 (rtx set_insn, rtx reg, rtx offset)
+{
+  rtx insn;
+
+  insn = set_insn;
+  do
+    {
+      insn = PREV_INSN (insn);
+      if (INSN_P (insn))
+	{
+	  if (mep_use_post_modify_for_set_p (PATTERN (insn), reg, offset))
+	    {
+	      mep_make_parallel (insn, set_insn);
+	      return true;
+	    }
+
+	  if (reg_set_p (reg, insn)
+	      || reg_referenced_p (reg, PATTERN (insn))
+	      || volatile_insn_p (PATTERN (insn)))
+	    return false;
+	}
+    }
+  while (!NOTE_INSN_BASIC_BLOCK_P (insn));
+  return false;
+}
+
+/* A wrapper around mep_use_post_modify_p_1 that preserves recog_data.  */
+
+bool
+mep_use_post_modify_p (rtx insn, rtx reg, rtx offset)
+{
+  bool result = mep_use_post_modify_p_1 (insn, reg, offset);
+  extract_insn (insn);
+  return result;
+}
+
+bool
+mep_allow_clip (rtx ux, rtx lx, int s)
+{
+  HOST_WIDE_INT u = INTVAL (ux);
+  HOST_WIDE_INT l = INTVAL (lx);
+  int i;
+
+  if (!TARGET_OPT_CLIP)
+    return false;
+
+  if (s)
+    {
+      for (i = 0; i < 30; i ++)
+	if ((u == ((HOST_WIDE_INT) 1 << i) - 1)
+	    && (l == - ((HOST_WIDE_INT) 1 << i)))
+	  return true;
+    }
+  else
+    {
+      if (l != 0)
+	return false;
+
+      for (i = 0; i < 30; i ++)
+	if ((u == ((HOST_WIDE_INT) 1 << i) - 1))
+	  return true;
+    }
+  return false;
+}
+
+bool
+mep_bit_position_p (rtx x, bool looking_for)
+{
+  if (GET_CODE (x) != CONST_INT)
+    return false;
+  switch ((int) INTVAL(x) & 0xff)
+    {
+    case 0x01: case 0x02: case 0x04: case 0x08:
+    case 0x10: case 0x20: case 0x40: case 0x80:
+      return looking_for;
+    case 0xfe: case 0xfd: case 0xfb: case 0xf7:
+    case 0xef: case 0xdf: case 0xbf: case 0x7f:
+      return !looking_for;
+    }
+  return false;
+}
+
+static bool
+move_needs_splitting (rtx dest, rtx src,
+		      enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int s = mep_section_tag (src);
+
+  while (1)
+    {
+      if (GET_CODE (src) == CONST
+	  || GET_CODE (src) == MEM)
+	src = XEXP (src, 0);
+      else if (GET_CODE (src) == SYMBOL_REF
+	       || GET_CODE (src) == LABEL_REF
+	       || GET_CODE (src) == PLUS)
+	break;
+      else
+	return false;
+    }
+  if (s == 'f'
+      || (GET_CODE (src) == PLUS
+	  && GET_CODE (XEXP (src, 1)) == CONST_INT
+	  && (INTVAL (XEXP (src, 1)) < -65536
+	      || INTVAL (XEXP (src, 1)) > 0xffffff))
+      || (GET_CODE (dest) == REG
+	  && REGNO (dest) > 7 && REGNO (dest) < FIRST_PSEUDO_REGISTER))
+    return true;
+  return false;
+}
+
+bool
+mep_split_mov (rtx *operands, int symbolic)
+{
+  if (symbolic)
+    {
+      if (move_needs_splitting (operands[0], operands[1], SImode))
+	return true;
+      return false;
+    }
+
+  if (GET_CODE (operands[1]) != CONST_INT)
+    return false;
+
+  if (constraint_satisfied_p (operands[1], CONSTRAINT_I)
+      || constraint_satisfied_p (operands[1], CONSTRAINT_J)
+      || constraint_satisfied_p (operands[1], CONSTRAINT_O))
+    return false;
+
+  if (((!reload_completed && !reload_in_progress)
+       || (REG_P (operands[0]) && REGNO (operands[0]) < 8))
+      && constraint_satisfied_p (operands[1], CONSTRAINT_K))
+    return false;
+
+  return true;
+}
+
+/* Irritatingly, the "jsrv" insn *toggles* PSW.OM rather than set
+   it to one specific value.  So the insn chosen depends on whether
+   the source and destination modes match.  */
+
+bool
+mep_vliw_mode_match (rtx tgt)
+{
+  bool src_vliw = mep_vliw_function_p (cfun->decl);
+  bool tgt_vliw = INTVAL (tgt);
+
+  return src_vliw == tgt_vliw;
+}
+
+/* Like the above, but also test for near/far mismatches.  */
+
+bool
+mep_vliw_jmp_match (rtx tgt)
+{
+  bool src_vliw = mep_vliw_function_p (cfun->decl);
+  bool tgt_vliw = INTVAL (tgt);
+
+  if (mep_section_tag (DECL_RTL (cfun->decl)) == 'f')
+    return false;
+
+  return src_vliw == tgt_vliw;
+}
+
+bool
+mep_multi_slot (rtx x)
+{
+  return get_attr_slot (x) == SLOT_MULTI;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+mep_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  /* We can't convert symbol values to gp- or tp-rel values after
+     reload, as reload might have used $gp or $tp for other
+     purposes.  */
+  if (GET_CODE (x) == SYMBOL_REF && (reload_in_progress || reload_completed))
+    {
+      char e = mep_section_tag (x);
+      return (e != 't' && e != 'b');
+    }
+  return 1;
+}
+
+/* Be careful not to use macros that need to be compiled one way for
+   strict, and another way for not-strict, like REG_OK_FOR_BASE_P.  */
+
+bool
+mep_legitimate_address (enum machine_mode mode, rtx x, int strict)
+{
+  int the_tag;
+
+#define DEBUG_LEGIT 0
+#if DEBUG_LEGIT
+  fprintf (stderr, "legit: mode %s strict %d ", mode_name[mode], strict);
+  debug_rtx (x);
+#endif
+
+  if (GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GEN_REG (REGNO (XEXP (x, 0)), strict)
+      && CONSTANT_P (XEXP (x, 1)))
+    {
+      if (GET_MODE_SIZE (mode) > 4)
+	{
+	  /* We will end up splitting this, and lo_sums are not
+	     offsettable for us.  */
+#if DEBUG_LEGIT
+	  fprintf(stderr, " - nope, %%lo(sym)[reg] not splittable\n");
+#endif
+	  return false;
+	}
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, %%lo(sym)[reg]\n");
+#endif
+      return true;
+    }
+
+  if (GET_CODE (x) == REG
+      && GEN_REG (REGNO (x), strict))
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, [reg]\n");
+#endif
+      return true;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GEN_REG (REGNO (XEXP (x, 0)), strict)
+      && const_in_range (XEXP (x, 1), -32768, 32767))
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, [reg+const]\n");
+#endif
+      return true;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GEN_REG (REGNO (XEXP (x, 0)), strict)
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && (GET_CODE (XEXP (XEXP (x, 1), 0)) == UNSPEC
+	  || (GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == UNSPEC
+	      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 1)) == CONST_INT)))
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, [reg+unspec]\n");
+#endif
+      return true;
+    }
+
+  the_tag = mep_section_tag (x);
+
+  if (the_tag == 'f')
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - nope, [far]\n");
+#endif
+      return false;
+    }
+
+  if (mode == VOIDmode
+      && GET_CODE (x) == SYMBOL_REF)
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, call [symbol]\n");
+#endif
+      return true;
+    }
+
+  if ((mode == SImode || mode == SFmode)
+      && CONSTANT_P (x)
+      && mep_legitimate_constant_p (mode, x)
+      && the_tag != 't' && the_tag != 'b')
+    {
+      if (GET_CODE (x) != CONST_INT
+	  || (INTVAL (x) <= 0xfffff
+	      && INTVAL (x) >= 0
+	      && (INTVAL (x) % 4) == 0))
+	{
+#if DEBUG_LEGIT
+	  fprintf (stderr, " - yup, [const]\n");
+#endif
+	  return true;
+	}
+    }
+
+#if DEBUG_LEGIT
+  fprintf (stderr, " - nope.\n");
+#endif
+  return false;
+}
+
+int
+mep_legitimize_reload_address (rtx *x, enum machine_mode mode, int opnum,
+			       int type_i,
+			       int ind_levels ATTRIBUTE_UNUSED)
+{
+  enum reload_type type = (enum reload_type) type_i;
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == MEM
+      && GET_CODE (XEXP (*x, 1)) == REG)
+    {
+      /* GCC will by default copy the MEM into a REG, which results in
+	 an invalid address.  For us, the best thing to do is move the
+	 whole expression to a REG.  */
+      push_reload (*x, NULL_RTX, x, NULL,
+		   GENERAL_REGS, mode, VOIDmode,
+		   0, 0, opnum, type);
+      return 1;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT)
+    {
+      char e = mep_section_tag (XEXP (*x, 0));
+
+      if (e != 't' && e != 'b')
+	{
+	  /* GCC thinks that (sym+const) is a valid address.  Well,
+	     sometimes it is, this time it isn't.  The best thing to
+	     do is reload the symbol to a register, since reg+int
+	     tends to work, and we can't just add the symbol and
+	     constant anyway.  */
+	  push_reload (XEXP (*x, 0), NULL_RTX, &(XEXP(*x, 0)), NULL,
+		       GENERAL_REGS, mode, VOIDmode,
+		       0, 0, opnum, type);
+	  return 1;
+	}
+    }
+  return 0;
+}
+
+int
+mep_core_address_length (rtx insn, int opn)
+{
+  rtx set = single_set (insn);
+  rtx mem = XEXP (set, opn);
+  rtx other = XEXP (set, 1-opn);
+  rtx addr = XEXP (mem, 0);
+
+  if (register_operand (addr, Pmode))
+    return 2;
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx addend = XEXP (addr, 1);
+
+      gcc_assert (REG_P (XEXP (addr, 0)));
+
+      switch (REGNO (XEXP (addr, 0)))
+	{
+	case STACK_POINTER_REGNUM:
+	  if (GET_MODE_SIZE (GET_MODE (mem)) == 4
+	      && mep_imm7a4_operand (addend, VOIDmode))
+	    return 2;
+	  break;
+
+	case 13: /* TP */
+	  gcc_assert (REG_P (other));
+
+	  if (REGNO (other) >= 8)
+	    break;
+
+	  if (GET_CODE (addend) == CONST
+	      && GET_CODE (XEXP (addend, 0)) == UNSPEC
+	      && XINT (XEXP (addend, 0), 1) == UNS_TPREL)
+	    return 2;
+
+	  if (GET_CODE (addend) == CONST_INT
+	      && INTVAL (addend) >= 0
+	      && INTVAL (addend) <= 127
+	      && INTVAL (addend) % GET_MODE_SIZE (GET_MODE (mem)) == 0)
+	    return 2;
+	  break;
+	}
+    }
+
+  return 4;
+}
+
+int
+mep_cop_address_length (rtx insn, int opn)
+{
+  rtx set = single_set (insn);
+  rtx mem = XEXP (set, opn);
+  rtx addr = XEXP (mem, 0);
+
+  if (GET_CODE (mem) != MEM)
+    return 2;
+  if (register_operand (addr, Pmode))
+    return 2;
+  if (GET_CODE (addr) == POST_INC)
+    return 2;
+
+  return 4;
+}
+
+#define DEBUG_EXPAND_MOV 0
+bool
+mep_expand_mov (rtx *operands, enum machine_mode mode)
+{
+  int i, t;
+  int tag[2];
+  rtx tpsym, tpoffs;
+  int post_reload = 0;
+
+  tag[0] = mep_section_tag (operands[0]);
+  tag[1] = mep_section_tag (operands[1]);
+
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) != REG
+      && GET_CODE (operands[0]) != SUBREG
+      && GET_CODE (operands[1]) != REG
+      && GET_CODE (operands[1]) != SUBREG)
+    operands[1] = copy_to_mode_reg (mode, operands[1]);
+  
+#if DEBUG_EXPAND_MOV
+  fprintf(stderr, "expand move %s %d\n", mode_name[mode],
+	  reload_in_progress || reload_completed);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  if (mode == DImode || mode == DFmode)
+    return false;
+
+  if (reload_in_progress || reload_completed)
+    {
+      rtx r;
+
+      if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == TP_REGNO)
+	cfun->machine->reload_changes_tp = true;
+
+      if (tag[0] == 't' || tag[1] == 't')
+	{
+	  r = has_hard_reg_initial_val (Pmode, GP_REGNO);
+	  if (!r || GET_CODE (r) != REG || REGNO (r) != GP_REGNO)
+	    post_reload = 1;
+	}
+      if (tag[0] == 'b' || tag[1] == 'b')
+	{
+	  r = has_hard_reg_initial_val (Pmode, TP_REGNO);
+	  if (!r || GET_CODE (r) != REG || REGNO (r) != TP_REGNO)
+	    post_reload = 1;
+	}
+      if (cfun->machine->reload_changes_tp == true)
+	post_reload = 1;
+    }
+
+  if (!post_reload)
+    {
+      rtx n;
+      if (symbol_p (operands[1]))
+	{
+	  t = mep_section_tag (operands[1]);
+	  if (t == 'b' || t == 't')
+	    {
+
+	      if (GET_CODE (operands[1]) == SYMBOL_REF)
+		{
+		  tpsym = operands[1];
+		  n = gen_rtx_UNSPEC (mode,
+				      gen_rtvec (1, operands[1]),
+				      t == 'b' ? UNS_TPREL : UNS_GPREL);
+		  n = gen_rtx_CONST (mode, n);
+		}
+	      else if (GET_CODE (operands[1]) == CONST
+		       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+		       && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF
+		       && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT)
+		{
+		  tpsym = XEXP (XEXP (operands[1], 0), 0);
+		  tpoffs = XEXP (XEXP (operands[1], 0), 1);
+		  n = gen_rtx_UNSPEC (mode,
+				      gen_rtvec (1, tpsym),
+				      t == 'b' ? UNS_TPREL : UNS_GPREL);
+		  n = gen_rtx_PLUS (mode, n, tpoffs);
+		  n = gen_rtx_CONST (mode, n);
+		}
+	      else if (GET_CODE (operands[1]) == CONST
+		       && GET_CODE (XEXP (operands[1], 0)) == UNSPEC)
+		return false;
+	      else
+		{
+		  error ("unusual TP-relative address");
+		  return false;
+		}
+
+	      n = gen_rtx_PLUS (mode, (t == 'b' ? mep_tp_rtx ()
+				       : mep_gp_rtx ()), n);
+	      n = emit_insn (gen_rtx_SET (mode, operands[0], n));
+#if DEBUG_EXPAND_MOV
+	      fprintf(stderr, "mep_expand_mov emitting ");
+	      debug_rtx(n);
+#endif
+	      return true;
+	    }
+	}
+
+      for (i=0; i < 2; i++)
+	{
+	  t = mep_section_tag (operands[i]);
+	  if (GET_CODE (operands[i]) == MEM && (t == 'b' || t == 't'))
+	    {
+	      rtx sym, n, r;
+	      int u;
+
+	      sym = XEXP (operands[i], 0);
+	      if (GET_CODE (sym) == CONST
+		  && GET_CODE (XEXP (sym, 0)) == UNSPEC)
+		sym = XVECEXP (XEXP (sym, 0), 0, 0);
+
+	      if (t == 'b')
+		{
+		  r = mep_tp_rtx ();
+		  u = UNS_TPREL;
+		}
+	      else
+		{
+		  r = mep_gp_rtx ();
+		  u = UNS_GPREL;
+		}
+
+	      n = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), u);
+	      n = gen_rtx_CONST (Pmode, n);
+	      n = gen_rtx_PLUS (Pmode, r, n);
+	      operands[i] = replace_equiv_address (operands[i], n);
+	    }
+	}
+    }
+
+  if ((GET_CODE (operands[1]) != REG
+       && MEP_CONTROL_REG (operands[0]))
+      || (GET_CODE (operands[0]) != REG
+	  && MEP_CONTROL_REG (operands[1])))
+    {
+      rtx temp;
+#if DEBUG_EXPAND_MOV
+      fprintf (stderr, "cr-mem, forcing op1 to reg\n");
+#endif
+      temp = gen_reg_rtx (mode);
+      emit_move_insn (temp, operands[1]);
+      operands[1] = temp;
+    }
+
+  if (symbolref_p (operands[0])
+      && (mep_section_tag (XEXP (operands[0], 0)) == 'f'
+	  || (GET_MODE_SIZE (mode) != 4)))
+    {
+      rtx temp;
+
+      gcc_assert (!reload_in_progress && !reload_completed);
+
+      temp = force_reg (Pmode, XEXP (operands[0], 0));
+      operands[0] = replace_equiv_address (operands[0], temp);
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+
+  if (!post_reload && (tag[1] == 't' || tag[1] == 'b'))
+    tag[1] = 0;
+
+  if (symbol_p (operands[1])
+      && (tag[1] == 'f' || tag[1] == 't' || tag[1] == 'b'))
+    {
+      emit_insn (gen_movsi_topsym_s (operands[0], operands[1]));
+      emit_insn (gen_movsi_botsym_s (operands[0], operands[0], operands[1]));
+      return true;
+    }
+
+  if (symbolref_p (operands[1])
+      && (tag[1] == 'f' || tag[1] == 't' || tag[1] == 'b'))
+    {
+      rtx temp;
+
+      if (reload_in_progress || reload_completed)
+	temp = operands[0];
+      else
+	temp = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_movsi_topsym_s (temp, operands[1]));
+      emit_insn (gen_movsi_botsym_s (temp, temp, operands[1]));
+      emit_move_insn (operands[0], replace_equiv_address (operands[1], temp));
+      return true;
+    }
+
+  return false;
+}
+
+/* Cases where the pattern can't be made to use at all.  */
+
+bool
+mep_mov_ok (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int i;
+
+#define DEBUG_MOV_OK 0
+#if DEBUG_MOV_OK
+  fprintf (stderr, "mep_mov_ok %s %c=%c\n", mode_name[mode], mep_section_tag (operands[0]),
+	   mep_section_tag (operands[1]));
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  /* We want the movh patterns to get these.  */
+  if (GET_CODE (operands[1]) == HIGH)
+    return false;
+
+  /* We can't store a register to a far variable without using a
+     scratch register to hold the address.  Using far variables should
+     be split by mep_emit_mov anyway.  */
+  if (mep_section_tag (operands[0]) == 'f'
+      || mep_section_tag (operands[1]) == 'f')
+    {
+#if DEBUG_MOV_OK
+      fprintf (stderr, " - no, f\n");
+#endif
+      return false;
+    }
+  i = mep_section_tag (operands[1]);
+  if ((i == 'b' || i == 't') && !reload_completed && !reload_in_progress)
+    /* These are supposed to be generated with adds of the appropriate
+       register.  During and after reload, however, we allow them to
+       be accessed as normal symbols because adding a dependency on
+       the base register now might cause problems.  */
+    {
+#if DEBUG_MOV_OK
+      fprintf (stderr, " - no, bt\n");
+#endif
+      return false;
+    }
+
+  /* The only moves we can allow involve at least one general
+     register, so require it.  */
+  for (i = 0; i < 2; i ++)
+    {
+      /* Allow subregs too, before reload.  */
+      rtx x = operands[i];
+
+      if (GET_CODE (x) == SUBREG)
+	x = XEXP (x, 0);
+      if (GET_CODE (x) == REG
+	  && ! MEP_CONTROL_REG (x))
+	{
+#if DEBUG_MOV_OK
+	  fprintf (stderr, " - ok\n");
+#endif
+	  return true;
+	}
+    }
+#if DEBUG_MOV_OK
+  fprintf (stderr, " - no, no gen reg\n");
+#endif
+  return false;
+}
+
+#define DEBUG_SPLIT_WIDE_MOVE 0
+void
+mep_split_wide_move (rtx *operands, enum machine_mode mode)
+{
+  int i;
+
+#if DEBUG_SPLIT_WIDE_MOVE
+  fprintf (stderr, "\n\033[34mmep_split_wide_move\033[0m mode %s\n", mode_name[mode]);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  for (i = 0; i <= 1; i++)
+    {
+      rtx op = operands[i], hi, lo;
+
+      switch (GET_CODE (op))
+	{
+	case REG:
+	  {
+	    unsigned int regno = REGNO (op);
+
+	    if (TARGET_64BIT_CR_REGS && CR_REGNO_P (regno))
+	      {
+		rtx i32;
+
+		lo = gen_rtx_REG (SImode, regno);
+		i32 = GEN_INT (32);
+		hi = gen_rtx_ZERO_EXTRACT (SImode,
+					   gen_rtx_REG (DImode, regno),
+					   i32, i32);
+	      }
+	    else
+	      {
+		hi = gen_rtx_REG (SImode, regno + TARGET_LITTLE_ENDIAN);
+		lo = gen_rtx_REG (SImode, regno + TARGET_BIG_ENDIAN);
+	      }
+	  }
+	  break;
+
+	case CONST_INT:
+	case CONST_DOUBLE:
+	case MEM:
+	  hi = operand_subword (op, TARGET_LITTLE_ENDIAN, 0, mode);
+	  lo = operand_subword (op, TARGET_BIG_ENDIAN, 0, mode);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* The high part of CR <- GPR moves must be done after the low part.  */
+      operands [i + 4] = lo;
+      operands [i + 2] = hi;
+    }
+
+  if (reg_mentioned_p (operands[2], operands[5])
+      || GET_CODE (operands[2]) == ZERO_EXTRACT
+      || GET_CODE (operands[4]) == ZERO_EXTRACT)
+    {
+      rtx tmp;
+
+      /* Overlapping register pairs -- make sure we don't
+	 early-clobber ourselves.  */
+      tmp = operands[2];
+      operands[2] = operands[4];
+      operands[4] = tmp;
+      tmp = operands[3];
+      operands[3] = operands[5];
+      operands[5] = tmp;
+    }
+
+#if DEBUG_SPLIT_WIDE_MOVE
+  fprintf(stderr, "\033[34m");
+  debug_rtx (operands[2]);
+  debug_rtx (operands[3]);
+  debug_rtx (operands[4]);
+  debug_rtx (operands[5]);
+  fprintf(stderr, "\033[0m");
+#endif
+}
+
+/* Emit a setcc instruction in its entirity.  */
+
+static bool
+mep_expand_setcc_1 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
+{
+  rtx tmp;
+
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      tmp = op1, op1 = op2, op2 = tmp;
+      code = swap_condition (code);
+      /* FALLTHRU */
+
+    case LT:
+    case LTU:
+      op1 = force_reg (SImode, op1);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_fmt_ee (code, SImode, op1, op2)));
+      return true;
+
+    case EQ:
+      if (op2 != const0_rtx)
+	op1 = expand_binop (SImode, sub_optab, op1, op2, NULL, 1, OPTAB_WIDEN);
+      mep_expand_setcc_1 (LTU, dest, op1, const1_rtx);
+      return true;
+
+    case NE:
+      /* Branchful sequence:
+		mov dest, 0		16-bit
+		beq op1, op2, Lover	16-bit (op2 < 16), 32-bit otherwise
+		mov dest, 1		16-bit
+
+	 Branchless sequence:
+		add3 tmp, op1, -op2	32-bit (or mov + sub)
+		sltu3 tmp, tmp, 1	16-bit
+		xor3 dest, tmp, 1	32-bit
+	*/
+      if (optimize_size && op2 != const0_rtx)
+	return false;
+
+      if (op2 != const0_rtx)
+	op1 = expand_binop (SImode, sub_optab, op1, op2, NULL, 1, OPTAB_WIDEN);
+
+      op2 = gen_reg_rtx (SImode);
+      mep_expand_setcc_1 (LTU, op2, op1, const1_rtx);
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_XOR (SImode, op2, const1_rtx)));
+      return true;
+
+    case LE:
+      if (GET_CODE (op2) != CONST_INT
+	  || INTVAL (op2) == 0x7ffffff)
+	return false;
+      op2 = GEN_INT (INTVAL (op2) + 1);
+      return mep_expand_setcc_1 (LT, dest, op1, op2);
+
+    case LEU:
+      if (GET_CODE (op2) != CONST_INT
+	  || INTVAL (op2) == -1)
+	return false;
+      op2 = GEN_INT (trunc_int_for_mode (INTVAL (op2) + 1, SImode));
+      return mep_expand_setcc_1 (LTU, dest, op1, op2);
+
+    case GE:
+      if (GET_CODE (op2) != CONST_INT
+	  || INTVAL (op2) == trunc_int_for_mode (0x80000000, SImode))
+	return false;
+      op2 = GEN_INT (INTVAL (op2) - 1);
+      return mep_expand_setcc_1 (GT, dest, op1, op2);
+
+    case GEU:
+      if (GET_CODE (op2) != CONST_INT
+	  || op2 == const0_rtx)
+	return false;
+      op2 = GEN_INT (trunc_int_for_mode (INTVAL (op2) - 1, SImode));
+      return mep_expand_setcc_1 (GTU, dest, op1, op2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+bool
+mep_expand_setcc (rtx *operands)
+{
+  rtx dest = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  return mep_expand_setcc_1 (code, dest, op0, op1);
+}
+
+rtx
+mep_expand_cbranch (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx tmp;
+
+ restart:
+  switch (code)
+    {
+    case LT:
+      if (mep_imm4_operand (op1, SImode))
+	break;
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LT, tmp, op0, op1));
+      code = NE;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case GE:
+      if (mep_imm4_operand (op1, SImode))
+	break;
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LT, tmp, op0, op1));
+
+      code = EQ;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case EQ:
+    case NE:
+      if (! mep_reg_or_imm4_operand (op1, SImode))
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case LE:
+    case GT:
+      if (GET_CODE (op1) == CONST_INT
+	  && INTVAL (op1) != 0x7fffffff)
+	{
+	  op1 = GEN_INT (INTVAL (op1) + 1);
+	  code = (code == LE ? LT : GE);
+	  goto restart;
+	}
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LT, tmp, op1, op0));
+
+      code = (code == LE ? EQ : NE);
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case LTU:
+      if (op1 == const1_rtx)
+	{
+	  code = EQ;
+	  op1 = const0_rtx;
+	  break;
+	}
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LTU, tmp, op0, op1));
+      code = NE;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case LEU:
+      tmp = gen_reg_rtx (SImode);
+      if (mep_expand_setcc_1 (LEU, tmp, op0, op1))
+	code = NE;
+      else if (mep_expand_setcc_1 (LTU, tmp, op1, op0))
+	code = EQ;
+      else
+	gcc_unreachable ();
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case GTU:
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (GTU, tmp, op0, op1)
+		  || mep_expand_setcc_1 (LTU, tmp, op1, op0));
+      code = NE;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case GEU:
+      tmp = gen_reg_rtx (SImode);
+      if (mep_expand_setcc_1 (GEU, tmp, op0, op1))
+	code = NE;
+      else if (mep_expand_setcc_1 (LTU, tmp, op0, op1))
+	code = EQ;
+      else
+	gcc_unreachable ();
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+}
+
+const char *
+mep_emit_cbranch (rtx *operands, int ne)
+{
+  if (GET_CODE (operands[1]) == REG)
+    return ne ? "bne\t%0, %1, %l2" : "beq\t%0, %1, %l2";
+  else if (INTVAL (operands[1]) == 0 && !mep_vliw_function_p(cfun->decl))
+    return ne ? "bnez\t%0, %l2" : "beqz\t%0, %l2";
+  else
+    return ne ? "bnei\t%0, %1, %l2" : "beqi\t%0, %1, %l2";
+}
+
+void
+mep_expand_call (rtx *operands, int returns_value)
+{
+  rtx addr = operands[returns_value];
+  rtx tp = mep_tp_rtx ();
+  rtx gp = mep_gp_rtx ();
+
+  gcc_assert (GET_CODE (addr) == MEM);
+
+  addr = XEXP (addr, 0);
+
+  if (! mep_call_address_operand (addr, VOIDmode))
+    addr = force_reg (SImode, addr);
+
+  if (! operands[returns_value+2])
+    operands[returns_value+2] = const0_rtx;
+
+  if (returns_value)
+    emit_call_insn (gen_call_value_internal (operands[0], addr, operands[2],
+					     operands[3], tp, gp));
+  else
+    emit_call_insn (gen_call_internal (addr, operands[1],
+				       operands[2], tp, gp));
+}
+
+/* Aliasing Support.  */
+
+/* If X is a machine specific address (i.e. a symbol or label being
+   referenced as a displacement from the GOT implemented using an
+   UNSPEC), then return the base term.  Otherwise return X.  */
+
+rtx
+mep_find_base_term (rtx x)
+{
+  rtx base, term;
+  int unspec;
+
+  if (GET_CODE (x) != PLUS)
+    return x;
+  base = XEXP (x, 0);
+  term = XEXP (x, 1);
+
+  if (has_hard_reg_initial_val(Pmode, TP_REGNO)
+      && base == mep_tp_rtx ())
+    unspec = UNS_TPREL;
+  else if (has_hard_reg_initial_val(Pmode, GP_REGNO)
+	   && base == mep_gp_rtx ())
+    unspec = UNS_GPREL;
+  else
+    return x;
+
+  if (GET_CODE (term) != CONST)
+    return x;
+  term = XEXP (term, 0);
+
+  if (GET_CODE (term) != UNSPEC
+      || XINT (term, 1) != unspec)
+    return x;
+
+  return XVECEXP (term, 0, 0);
+}
+
+/* Reload Support.  */
+
+/* Return true if the registers in CLASS cannot represent the change from
+   modes FROM to TO.  */
+
+bool
+mep_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			       enum reg_class regclass)
+{
+  if (from == to)
+    return false;
+
+  /* 64-bit COP regs must remain 64-bit COP regs.  */
+  if (TARGET_64BIT_CR_REGS
+      && (regclass == CR_REGS
+	  || regclass == LOADABLE_CR_REGS)
+      && (GET_MODE_SIZE (to) < 8
+	  || GET_MODE_SIZE (from) < 8))
+    return true;
+
+  return false;
+}
+
+#define MEP_NONGENERAL_CLASS(C) (!reg_class_subset_p (C, GENERAL_REGS))
+
+static bool
+mep_general_reg (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return GET_CODE (x) == REG && GR_REGNO_P (REGNO (x));
+}
+
+static bool
+mep_nongeneral_reg (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return (GET_CODE (x) == REG
+	  && !GR_REGNO_P (REGNO (x)) && REGNO (x) < FIRST_PSEUDO_REGISTER);
+}
+
+static bool
+mep_general_copro_reg (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return (GET_CODE (x) == REG && CR_REGNO_P (REGNO (x)));
+}
+
+static bool
+mep_nonregister (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return (GET_CODE (x) != REG || REGNO (x) >= FIRST_PSEUDO_REGISTER);
+}
+
+#define DEBUG_RELOAD 0
+
+/* Return the secondary reload class needed for moving value X to or
+   from a register in coprocessor register class CLASS.  */
+
+static enum reg_class
+mep_secondary_copro_reload_class (enum reg_class rclass, rtx x)
+{
+  if (mep_general_reg (x))
+    /* We can do the move directly if mep_have_core_copro_moves_p,
+       otherwise we need to go through memory.  Either way, no secondary
+       register is needed.  */
+    return NO_REGS;
+
+  if (mep_general_copro_reg (x))
+    {
+      /* We can do the move directly if mep_have_copro_copro_moves_p.  */
+      if (mep_have_copro_copro_moves_p)
+	return NO_REGS;
+
+      /* Otherwise we can use a temporary if mep_have_core_copro_moves_p.  */
+      if (mep_have_core_copro_moves_p)
+	return GENERAL_REGS;
+
+      /* Otherwise we need to do it through memory.  No secondary
+	 register is needed.  */
+      return NO_REGS;
+    }
+
+  if (reg_class_subset_p (rclass, LOADABLE_CR_REGS)
+      && constraint_satisfied_p (x, CONSTRAINT_U))
+    /* X is a memory value that we can access directly.  */
+    return NO_REGS;
+
+  /* We have to move X into a GPR first and then copy it to
+     the coprocessor register.  The move from the GPR to the
+     coprocessor might be done directly or through memory,
+     depending on mep_have_core_copro_moves_p. */
+  return GENERAL_REGS;
+}
+
+/* Copying X to register in RCLASS.  */
+
+enum reg_class
+mep_secondary_input_reload_class (enum reg_class rclass,
+				  enum machine_mode mode ATTRIBUTE_UNUSED,
+				  rtx x)
+{
+  int rv = NO_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "secondary input reload copy to %s %s from ", reg_class_names[rclass], mode_name[mode]);
+  debug_rtx (x);
+#endif
+
+  if (reg_class_subset_p (rclass, CR_REGS))
+    rv = mep_secondary_copro_reload_class (rclass, x);
+  else if (MEP_NONGENERAL_CLASS (rclass)
+	   && (mep_nonregister (x) || mep_nongeneral_reg (x)))
+    rv = GENERAL_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, " - requires %s\n", reg_class_names[rv]);
+#endif
+  return (enum reg_class) rv;
+}
+
+/* Copying register in RCLASS to X.  */
+
+enum reg_class
+mep_secondary_output_reload_class (enum reg_class rclass,
+				   enum machine_mode mode ATTRIBUTE_UNUSED,
+				   rtx x)
+{
+  int rv = NO_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "secondary output reload copy from %s %s to ", reg_class_names[rclass], mode_name[mode]);
+  debug_rtx (x);
+#endif
+
+  if (reg_class_subset_p (rclass, CR_REGS))
+    rv = mep_secondary_copro_reload_class (rclass, x);
+  else if (MEP_NONGENERAL_CLASS (rclass)
+	   && (mep_nonregister (x) || mep_nongeneral_reg (x)))
+    rv = GENERAL_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, " - requires %s\n", reg_class_names[rv]);
+#endif
+
+  return (enum reg_class) rv;
+}
+
+/* Implement SECONDARY_MEMORY_NEEDED.  */
+
+bool
+mep_secondary_memory_needed (enum reg_class rclass1, enum reg_class rclass2,
+			     enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (!mep_have_core_copro_moves_p)
+    {
+      if (reg_classes_intersect_p (rclass1, CR_REGS)
+	  && reg_classes_intersect_p (rclass2, GENERAL_REGS))
+	return true;
+      if (reg_classes_intersect_p (rclass2, CR_REGS)
+	  && reg_classes_intersect_p (rclass1, GENERAL_REGS))
+	return true;
+      if (!mep_have_copro_copro_moves_p
+	  && reg_classes_intersect_p (rclass1, CR_REGS)
+	  && reg_classes_intersect_p (rclass2, CR_REGS))
+	return true;
+    }
+  return false;
+}
+
+void
+mep_expand_reload (rtx *operands, enum machine_mode mode)
+{
+  /* There are three cases for each direction:
+     register, farsym
+     control, farsym
+     control, nearsym */
+
+  int s0 = mep_section_tag (operands[0]) == 'f';
+  int s1 = mep_section_tag (operands[1]) == 'f';
+  int c0 = mep_nongeneral_reg (operands[0]);
+  int c1 = mep_nongeneral_reg (operands[1]);
+  int which = (s0 ? 20:0) + (c0 ? 10:0) + (s1 ? 2:0) + (c1 ? 1:0);
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "expand_reload %s\n", mode_name[mode]);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  switch (which)
+    {
+    case 00: /* Don't know why this gets here.  */
+    case 02: /* general = far */
+      emit_move_insn (operands[0], operands[1]);
+      return;
+
+    case 10: /* cr = mem */
+    case 11: /* cr = cr */
+    case 01: /* mem = cr */
+    case 12: /* cr = far */
+      emit_move_insn (operands[2], operands[1]);
+      emit_move_insn (operands[0], operands[2]);
+      return;
+
+    case 20: /* far = general */
+      emit_move_insn (operands[2], XEXP (operands[1], 0));
+      emit_move_insn (operands[0], gen_rtx_MEM (mode, operands[2]));
+      return;
+
+    case 21: /* far = cr */
+    case 22: /* far = far */
+    default:
+      fprintf (stderr, "unsupported expand reload case %02d for mode %s\n",
+	       which, mode_name[mode]);
+      debug_rtx (operands[0]);
+      debug_rtx (operands[1]);
+      gcc_unreachable ();
+    }
+}
+
+/* Implement PREFERRED_RELOAD_CLASS.  See whether X is a constant that
+   can be moved directly into registers 0 to 7, but not into the rest.
+   If so, and if the required class includes registers 0 to 7, restrict
+   it to those registers.  */
+
+enum reg_class
+mep_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      if (INTVAL (x) >= 0x10000
+	  && INTVAL (x) < 0x01000000
+	  && (INTVAL (x) & 0xffff) != 0
+	  && reg_class_subset_p (TPREL_REGS, rclass))
+	rclass = TPREL_REGS;
+      break;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (mep_section_tag (x) != 'f'
+	  && reg_class_subset_p (TPREL_REGS, rclass))
+	rclass = TPREL_REGS;
+      break;
+
+    default:
+      break;
+    }
+  return rclass;
+}
+
+/* Implement REGISTER_MOVE_COST.  Return 2 for direct single-register
+   moves, 4 for direct double-register moves, and 1000 for anything
+   that requires a temporary register or temporary stack slot.  */
+
+int
+mep_register_move_cost (enum machine_mode mode, enum reg_class from, enum reg_class to)
+{
+  if (mep_have_copro_copro_moves_p
+      && reg_class_subset_p (from, CR_REGS)
+      && reg_class_subset_p (to, CR_REGS))
+    {
+      if (TARGET_32BIT_CR_REGS && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return 4;
+      return 2;
+    }
+  if (reg_class_subset_p (from, CR_REGS)
+      && reg_class_subset_p (to, CR_REGS))
+    {
+      if (TARGET_32BIT_CR_REGS && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return 8;
+      return 4;
+    }
+  if (reg_class_subset_p (from, CR_REGS)
+      || reg_class_subset_p (to, CR_REGS))
+    {
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return 4;
+      return 2;
+    }
+  if (mep_secondary_memory_needed (from, to, mode))
+    return 1000;
+  if (MEP_NONGENERAL_CLASS (from) && MEP_NONGENERAL_CLASS (to))
+    return 1000;
+
+  if (GET_MODE_SIZE (mode) > 4)
+    return 4;
+
+  return 2;
+}
+
+
+/* Functions to save and restore machine-specific function data.  */
+
+static struct machine_function *
+mep_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+static rtx
+mep_allocate_initial_value (rtx reg)
+{
+  int rss;
+
+  if (GET_CODE (reg) != REG)
+    return NULL_RTX;
+
+  if (REGNO (reg) >= FIRST_PSEUDO_REGISTER)
+    return NULL_RTX;
+
+  /* In interrupt functions, the "initial" values of $gp and $tp are
+     provided by the prologue.  They are not necessarily the same as
+     the values that the caller was using.  */
+  if (REGNO (reg) == TP_REGNO || REGNO (reg) == GP_REGNO)
+    if (mep_interrupt_p ())
+      return NULL_RTX;
+
+  if (! cfun->machine->reg_save_slot[REGNO(reg)])
+    {
+      cfun->machine->reg_save_size += 4;
+      cfun->machine->reg_save_slot[REGNO(reg)] = cfun->machine->reg_save_size;
+    }
+
+  rss = cfun->machine->reg_save_slot[REGNO(reg)];
+  return gen_rtx_MEM (SImode, plus_constant (Pmode, arg_pointer_rtx, -rss));
+}
+
+rtx
+mep_return_addr_rtx (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, LP_REGNO);
+}
+
+static rtx
+mep_tp_rtx (void)
+{
+  return get_hard_reg_initial_val (Pmode, TP_REGNO);
+}
+
+static rtx
+mep_gp_rtx (void)
+{
+  return get_hard_reg_initial_val (Pmode, GP_REGNO);
+}
+
+static bool
+mep_interrupt_p (void)
+{
+  if (cfun->machine->interrupt_handler == 0)
+    {
+      int interrupt_handler
+	= (lookup_attribute ("interrupt",
+			     DECL_ATTRIBUTES (current_function_decl))
+	   != NULL_TREE);
+      cfun->machine->interrupt_handler = interrupt_handler ? 2 : 1;
+    }
+  return cfun->machine->interrupt_handler == 2;
+}
+
+static bool
+mep_disinterrupt_p (void)
+{
+  if (cfun->machine->disable_interrupts == 0)
+    {
+      int disable_interrupts
+	= (lookup_attribute ("disinterrupt",
+			     DECL_ATTRIBUTES (current_function_decl))
+	   != NULL_TREE);
+      cfun->machine->disable_interrupts = disable_interrupts ? 2 : 1;
+    }
+  return cfun->machine->disable_interrupts == 2;
+}
+
+
+/* Frame/Epilog/Prolog Related.  */
+
+static bool
+mep_reg_set_p (rtx reg, rtx insn)
+{
+  /* Similar to reg_set_p in rtlanal.c, but we ignore calls */
+  if (INSN_P (insn))
+    {
+      if (FIND_REG_INC_NOTE (insn, reg))
+	return true;
+      insn = PATTERN (insn);
+    }
+
+  if (GET_CODE (insn) == SET
+      && GET_CODE (XEXP (insn, 0)) == REG
+      && GET_CODE (XEXP (insn, 1)) == REG
+      && REGNO (XEXP (insn, 0)) == REGNO (XEXP (insn, 1)))
+    return false;
+
+  return set_of (reg, insn) != NULL_RTX;
+}
+
+
+#define MEP_SAVES_UNKNOWN 0
+#define MEP_SAVES_YES 1
+#define MEP_SAVES_MAYBE 2
+#define MEP_SAVES_NO 3
+
+static bool
+mep_reg_set_in_function (int regno)
+{
+  rtx reg, insn;
+
+  if (mep_interrupt_p () && df_regs_ever_live_p(regno))
+    return true;
+
+  if (regno == LP_REGNO && (profile_arc_flag > 0 || profile_flag > 0))
+    return true;
+
+  push_topmost_sequence ();
+  insn = get_insns ();
+  pop_topmost_sequence ();
+
+  if (!insn)
+    return false;
+
+  reg = gen_rtx_REG (SImode, regno);
+
+  for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn) && mep_reg_set_p (reg, insn))
+      return true;
+  return false;
+}
+
+static bool
+mep_asm_without_operands_p (void)
+{
+  if (cfun->machine->asms_without_operands == 0)
+    {
+      rtx insn;
+
+      push_topmost_sequence ();
+      insn = get_insns ();
+      pop_topmost_sequence ();
+
+      cfun->machine->asms_without_operands = 1;
+      while (insn)
+	{
+	  if (INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) == ASM_INPUT)
+	    {
+	      cfun->machine->asms_without_operands = 2;
+	      break;
+	    }
+	  insn = NEXT_INSN (insn);
+	}
+
+    }
+  return cfun->machine->asms_without_operands == 2;
+}
+
+/* Interrupt functions save/restore every call-preserved register, and
+   any call-used register it uses (or all if it calls any function,
+   since they may get clobbered there too).  Here we check to see
+   which call-used registers need saving.  */
+
+#define IVC2_ISAVED_REG(r) (TARGET_IVC2 \
+			   && (r == FIRST_CCR_REGNO + 1 \
+			       || (r >= FIRST_CCR_REGNO + 8 && r <= FIRST_CCR_REGNO + 11) \
+			       || (r >= FIRST_CCR_REGNO + 16 && r <= FIRST_CCR_REGNO + 31)))
+
+static bool
+mep_interrupt_saved_reg (int r)
+{
+  if (!mep_interrupt_p ())
+    return false;
+  if (r == REGSAVE_CONTROL_TEMP
+      || (TARGET_64BIT_CR_REGS && TARGET_COP && r == REGSAVE_CONTROL_TEMP+1))
+    return true;
+  if (mep_asm_without_operands_p ()
+      && (!fixed_regs[r]
+	  || (r == RPB_REGNO || r == RPE_REGNO || r == RPC_REGNO || r == LP_REGNO)
+	  || IVC2_ISAVED_REG (r)))
+    return true;
+  if (!crtl->is_leaf)
+    /* Function calls mean we need to save $lp.  */
+    if (r == LP_REGNO || IVC2_ISAVED_REG (r))
+      return true;
+  if (!crtl->is_leaf || cfun->machine->doloop_tags > 0)
+    /* The interrupt handler might use these registers for repeat blocks,
+       or it might call a function that does so.  */
+    if (r == RPB_REGNO || r == RPE_REGNO || r == RPC_REGNO)
+      return true;
+  if (crtl->is_leaf && call_used_regs[r] && !df_regs_ever_live_p(r))
+    return false;
+  /* Functions we call might clobber these.  */
+  if (call_used_regs[r] && !fixed_regs[r])
+    return true;
+  /* Additional registers that need to be saved for IVC2.  */
+  if (IVC2_ISAVED_REG (r))
+    return true;
+
+  return false;
+}
+
+static bool
+mep_call_saves_register (int r)
+{
+  if (! cfun->machine->frame_locked)
+    {
+      int rv = MEP_SAVES_NO;
+
+      if (cfun->machine->reg_save_slot[r])
+  	rv = MEP_SAVES_YES;
+      else if (r == LP_REGNO && (profile_arc_flag > 0 || profile_flag > 0))
+	rv = MEP_SAVES_YES;
+      else if (r == FRAME_POINTER_REGNUM && frame_pointer_needed)
+	rv = MEP_SAVES_YES;
+      else if ((!call_used_regs[r] || r == LP_REGNO) && df_regs_ever_live_p(r))
+	rv = MEP_SAVES_YES;
+      else if (crtl->calls_eh_return && (r == 10 || r == 11))
+	/* We need these to have stack slots so that they can be set during
+	   unwinding.  */
+	rv = MEP_SAVES_YES;
+      else if (mep_interrupt_saved_reg (r))
+	rv = MEP_SAVES_YES;
+      cfun->machine->reg_saved[r] = rv;
+    }
+  return cfun->machine->reg_saved[r] == MEP_SAVES_YES;
+}
+
+/* Return true if epilogue uses register REGNO.  */
+
+bool
+mep_epilogue_uses (int regno)
+{
+  /* Since $lp is a call-saved register, the generic code will normally
+     mark it used in the epilogue if it needs to be saved and restored.
+     However, when profiling is enabled, the profiling code will implicitly
+     clobber $11.  This case has to be handled specially both here and in
+     mep_call_saves_register.  */
+  if (regno == LP_REGNO && (profile_arc_flag > 0 || profile_flag > 0))
+    return true;
+  /* Interrupt functions save/restore pretty much everything.  */
+  return (reload_completed && mep_interrupt_saved_reg (regno));
+}
+
+static int
+mep_reg_size (int regno)
+{
+  if (CR_REGNO_P (regno) && TARGET_64BIT_CR_REGS)
+    return 8;
+  return 4;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+mep_can_eliminate (const int from, const int to)
+{
+  return  (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+           ? ! frame_pointer_needed
+           : true);
+}
+
+int
+mep_elimination_offset (int from, int to)
+{
+  int reg_save_size;
+  int i;
+  int frame_size = get_frame_size () + crtl->outgoing_args_size;
+  int total_size;
+
+  if (!cfun->machine->frame_locked)
+    memset (cfun->machine->reg_saved, 0, sizeof (cfun->machine->reg_saved));
+
+  /* We don't count arg_regs_to_save in the arg pointer offset, because
+     gcc thinks the arg pointer has moved along with the saved regs.
+     However, we do count it when we adjust $sp in the prologue.  */
+  reg_save_size = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (mep_call_saves_register (i))
+      reg_save_size += mep_reg_size (i);
+
+  if (reg_save_size % 8)
+    cfun->machine->regsave_filler = 8 - (reg_save_size % 8);
+  else
+    cfun->machine->regsave_filler = 0;
+
+  /* This is what our total stack adjustment looks like.  */
+  total_size = (reg_save_size + frame_size + cfun->machine->regsave_filler);
+
+  if (total_size % 8)
+    cfun->machine->frame_filler = 8 - (total_size % 8);
+  else
+    cfun->machine->frame_filler = 0;
+
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return reg_save_size + cfun->machine->regsave_filler;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return cfun->machine->frame_filler + frame_size;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return reg_save_size + cfun->machine->regsave_filler + cfun->machine->frame_filler + frame_size;
+
+  gcc_unreachable ();
+}
+
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Since the prologue/epilogue code is generated after optimization,
+   we can't rely on gcc to split constants for us.  So, this code
+   captures all the ways to add a constant to a register in one logic
+   chunk, including optimizing away insns we just don't need.  This
+   makes the prolog/epilog code easier to follow.  */
+static void
+add_constant (int dest, int src, int value, int mark_frame)
+{
+  rtx insn;
+  int hi, lo;
+
+  if (src == dest && value == 0)
+    return;
+
+  if (value == 0)
+    {
+      insn = emit_move_insn (gen_rtx_REG (SImode, dest),
+			     gen_rtx_REG (SImode, src));
+      if (mark_frame)
+	RTX_FRAME_RELATED_P(insn) = 1;
+      return;
+    }
+
+  if (value >= -32768 && value <= 32767)
+    {
+      insn = emit_insn (gen_addsi3 (gen_rtx_REG (SImode, dest),
+				    gen_rtx_REG (SImode, src),
+				    GEN_INT (value)));
+      if (mark_frame)
+	RTX_FRAME_RELATED_P(insn) = 1;
+      return;
+    }
+
+  /* Big constant, need to use a temp register.  We use
+     REGSAVE_CONTROL_TEMP because it's call clobberable (the reg save
+     area is always small enough to directly add to).  */
+
+  hi = trunc_int_for_mode (value & 0xffff0000, SImode);
+  lo = value & 0xffff;
+
+  insn = emit_move_insn (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+			 GEN_INT (hi));
+
+  if (lo)
+    {
+      insn = emit_insn (gen_iorsi3 (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+				    gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+				    GEN_INT (lo)));
+    }
+
+  insn = emit_insn (gen_addsi3 (gen_rtx_REG (SImode, dest),
+				gen_rtx_REG (SImode, src),
+				gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP)));
+  if (mark_frame)
+    {
+      RTX_FRAME_RELATED_P(insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (SImode,
+				 gen_rtx_REG (SImode, dest),
+				 gen_rtx_PLUS (SImode,
+					       gen_rtx_REG (SImode, dest),
+					       GEN_INT (value))));
+    }
+}
+
+/* Move SRC to DEST.  Mark the move as being potentially dead if
+   MAYBE_DEAD_P.  */
+
+static rtx
+maybe_dead_move (rtx dest, rtx src, bool ATTRIBUTE_UNUSED maybe_dead_p)
+{
+  rtx insn = emit_move_insn (dest, src);
+#if 0
+  if (maybe_dead_p)
+    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
+#endif
+  return insn;
+}
+
+/* Used for interrupt functions, which can't assume that $tp and $gp
+   contain the correct pointers.  */
+
+static void
+mep_reload_pointer (int regno, const char *symbol)
+{
+  rtx reg, sym;
+
+  if (!df_regs_ever_live_p(regno) && crtl->is_leaf)
+    return;
+
+  reg = gen_rtx_REG (SImode, regno);
+  sym = gen_rtx_SYMBOL_REF (SImode, symbol);
+  emit_insn (gen_movsi_topsym_s (reg, sym));
+  emit_insn (gen_movsi_botsym_s (reg, reg, sym));
+}
+
+/* Assign save slots for any register not already saved.  DImode
+   registers go at the end of the reg save area; the rest go at the
+   beginning.  This is for alignment purposes.  Returns true if a frame
+   is really needed.  */
+static bool
+mep_assign_save_slots (int reg_save_size)
+{
+  bool really_need_stack_frame = false;
+  int di_ofs = 0;
+  int i;
+
+  for (i=0; i<FIRST_PSEUDO_REGISTER; i++)
+    if (mep_call_saves_register(i))
+      {
+	int regsize = mep_reg_size (i);
+
+	if ((i != TP_REGNO && i != GP_REGNO && i != LP_REGNO)
+	    || mep_reg_set_in_function (i))
+	  really_need_stack_frame = true;
+
+	if (cfun->machine->reg_save_slot[i])
+	  continue;
+
+	if (regsize < 8)
+	  {
+	    cfun->machine->reg_save_size += regsize;
+	    cfun->machine->reg_save_slot[i] = cfun->machine->reg_save_size;
+	  }
+	else
+	  {
+	    cfun->machine->reg_save_slot[i] = reg_save_size - di_ofs;
+	    di_ofs += 8;
+	  }
+      }
+  cfun->machine->frame_locked = 1;
+  return really_need_stack_frame;
+}
+
+void
+mep_expand_prologue (void)
+{
+  int i, rss, sp_offset = 0;
+  int reg_save_size;
+  int frame_size;
+  int really_need_stack_frame;
+
+  /* We must not allow register renaming in interrupt functions,
+     because that invalidates the correctness of the set of call-used
+     registers we're going to save/restore.  */
+  mep_set_leaf_registers (mep_interrupt_p () ? 0 : 1);
+
+  if (mep_disinterrupt_p ())
+    emit_insn (gen_mep_disable_int ());
+
+  cfun->machine->mep_frame_pointer_needed = frame_pointer_needed;
+
+  reg_save_size = mep_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM);
+  frame_size = mep_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM);
+  really_need_stack_frame = frame_size;
+
+  really_need_stack_frame |= mep_assign_save_slots (reg_save_size);
+
+  sp_offset = reg_save_size;
+  if (sp_offset + frame_size < 128)
+    sp_offset += frame_size ;
+
+  add_constant (SP_REGNO, SP_REGNO, -sp_offset, 1);
+
+  for (i=0; i<FIRST_PSEUDO_REGISTER; i++)
+    if (mep_call_saves_register(i))
+      {
+	rtx mem;
+	bool maybe_dead_p;
+	enum machine_mode rmode;
+
+	rss = cfun->machine->reg_save_slot[i];
+
+  	if ((i == TP_REGNO || i == GP_REGNO || i == LP_REGNO)
+	    && (!mep_reg_set_in_function (i)
+		&& !mep_interrupt_p ()))
+	  continue;
+
+	if (mep_reg_size (i) == 8)
+	  rmode = DImode;
+	else
+	  rmode = SImode;
+
+	/* If there is a pseudo associated with this register's initial value,
+	   reload might have already spilt it to the stack slot suggested by
+	   ALLOCATE_INITIAL_VALUE.  The moves emitted here can then be safely
+	   deleted as dead.  */
+	mem = gen_rtx_MEM (rmode,
+			   plus_constant (Pmode, stack_pointer_rtx,
+					  sp_offset - rss));
+	maybe_dead_p = rtx_equal_p (mem, has_hard_reg_initial_val (rmode, i));
+
+	if (GR_REGNO_P (i) || LOADABLE_CR_REGNO_P (i))
+	  F(maybe_dead_move (mem, gen_rtx_REG (rmode, i), maybe_dead_p));
+	else if (rmode == DImode)
+	  {
+	    rtx insn;
+	    int be = TARGET_BIG_ENDIAN ? 4 : 0;
+
+	    mem = gen_rtx_MEM (SImode,
+			       plus_constant (Pmode, stack_pointer_rtx,
+					      sp_offset - rss + be));
+
+	    maybe_dead_move (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+			     gen_rtx_REG (SImode, i),
+			     maybe_dead_p);
+	    maybe_dead_move (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP+1),
+			     gen_rtx_ZERO_EXTRACT (SImode,
+						   gen_rtx_REG (DImode, i),
+						   GEN_INT (32),
+						   GEN_INT (32)),
+			     maybe_dead_p);
+	    insn = maybe_dead_move (mem,
+				    gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+				    maybe_dead_p);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode,
+				       copy_rtx (mem),
+				       gen_rtx_REG (rmode, i)));
+	    mem = gen_rtx_MEM (SImode,
+			       plus_constant (Pmode, stack_pointer_rtx,
+					      sp_offset - rss + (4-be)));
+	    insn = maybe_dead_move (mem,
+				    gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP+1),
+				    maybe_dead_p);
+	  }
+	else
+	  {
+	    rtx insn;
+	    maybe_dead_move (gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP),
+			     gen_rtx_REG (rmode, i),
+			     maybe_dead_p);
+	    insn = maybe_dead_move (mem,
+				    gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP),
+				    maybe_dead_p);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode,
+				       copy_rtx (mem),
+				       gen_rtx_REG (rmode, i)));
+	  }
+      }
+  
+  if (frame_pointer_needed)
+    {
+      /* We've already adjusted down by sp_offset.  Total $sp change
+	 is reg_save_size + frame_size.  We want a net change here of
+	 just reg_save_size.  */
+      add_constant (FP_REGNO, SP_REGNO, sp_offset - reg_save_size, 1);
+    }
+
+  add_constant (SP_REGNO, SP_REGNO, sp_offset-(reg_save_size+frame_size), 1);
+
+  if (mep_interrupt_p ())
+    {
+      mep_reload_pointer(GP_REGNO, "__sdabase");
+      mep_reload_pointer(TP_REGNO, "__tpbase");
+    }
+}
+
+static void
+mep_start_function (FILE *file, HOST_WIDE_INT hwi_local)
+{
+  int local = hwi_local;
+  int frame_size = local + crtl->outgoing_args_size;
+  int reg_save_size;
+  int ffill;
+  int i, sp, skip;
+  int sp_offset;
+  int slot_map[FIRST_PSEUDO_REGISTER], si, sj;
+
+  reg_save_size = mep_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM);
+  frame_size = mep_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM);
+  sp_offset = reg_save_size + frame_size;
+
+  ffill = cfun->machine->frame_filler;
+
+  if (cfun->machine->mep_frame_pointer_needed)
+    reg_names[FP_REGNO] = "$fp";
+  else
+    reg_names[FP_REGNO] = "$8";
+
+  if (sp_offset == 0)
+    return;
+
+  if (debug_info_level == DINFO_LEVEL_NONE)
+    {
+      fprintf (file, "\t# frame: %d", sp_offset);
+      if (reg_save_size)
+	fprintf (file, "   %d regs", reg_save_size);
+      if (local)
+	fprintf (file, "   %d locals", local);
+      if (crtl->outgoing_args_size)
+	fprintf (file, "   %d args", crtl->outgoing_args_size);
+      fprintf (file, "\n");
+      return;
+    }
+
+  fprintf (file, "\t#\n");
+  fprintf (file, "\t# Initial Frame Information:\n");
+  if (sp_offset || !frame_pointer_needed)
+    fprintf (file, "\t# Entry   ---------- 0\n");
+
+  /* Sort registers by save slots, so they're printed in the order
+     they appear in memory, not the order they're saved in.  */
+  for (si=0; si<FIRST_PSEUDO_REGISTER; si++)
+    slot_map[si] = si;
+  for (si=0; si<FIRST_PSEUDO_REGISTER-1; si++)
+    for (sj=si+1; sj<FIRST_PSEUDO_REGISTER; sj++)
+      if (cfun->machine->reg_save_slot[slot_map[si]]
+	  > cfun->machine->reg_save_slot[slot_map[sj]])
+	{
+	  int t = slot_map[si];
+	  slot_map[si] = slot_map[sj];
+	  slot_map[sj] = t;
+	}
+
+  sp = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      int rsize;
+      int r = slot_map[i];
+      int rss = cfun->machine->reg_save_slot[r];
+
+      if (!mep_call_saves_register (r))
+	continue;
+
+      if ((r == TP_REGNO || r == GP_REGNO || r == LP_REGNO)
+	  && (!mep_reg_set_in_function (r)
+	      && !mep_interrupt_p ()))
+	continue;
+
+      rsize = mep_reg_size(r);
+      skip = rss - (sp+rsize);
+      if (skip)
+	fprintf (file, "\t#         %3d bytes for alignment\n", skip);
+      fprintf (file, "\t#         %3d bytes for saved %-3s   %3d($sp)\n",
+	       rsize, reg_names[r], sp_offset - rss);
+      sp = rss;
+    }
+
+  skip = reg_save_size - sp;
+  if (skip)
+    fprintf (file, "\t#         %3d bytes for alignment\n", skip);
+
+  if (frame_pointer_needed)
+    fprintf (file, "\t# FP ---> ---------- %d (sp-%d)\n", reg_save_size, sp_offset-reg_save_size);
+  if (local)
+    fprintf (file, "\t#         %3d bytes for local vars\n", local);
+  if (ffill)
+    fprintf (file, "\t#         %3d bytes for alignment\n", ffill);
+  if (crtl->outgoing_args_size)
+    fprintf (file, "\t#         %3d bytes for outgoing args\n",
+	     crtl->outgoing_args_size);
+  fprintf (file, "\t# SP ---> ---------- %d\n", sp_offset);
+  fprintf (file, "\t#\n");
+}
+
+
+static int mep_prevent_lp_restore = 0;
+static int mep_sibcall_epilogue = 0;
+
+void
+mep_expand_epilogue (void)
+{
+  int i, sp_offset = 0;
+  int reg_save_size = 0;
+  int frame_size;
+  int lp_temp = LP_REGNO, lp_slot = -1;
+  int really_need_stack_frame = get_frame_size() + crtl->outgoing_args_size;
+  int interrupt_handler = mep_interrupt_p ();
+
+  if (profile_arc_flag == 2)
+    emit_insn (gen_mep_bb_trace_ret ());
+
+  reg_save_size = mep_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM);
+  frame_size = mep_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM);
+
+  really_need_stack_frame |= mep_assign_save_slots (reg_save_size);
+
+  if (frame_pointer_needed)
+    {
+      /* If we have a frame pointer, we won't have a reliable stack
+	 pointer (alloca, you know), so rebase SP from FP */
+      emit_move_insn (gen_rtx_REG (SImode, SP_REGNO),
+		      gen_rtx_REG (SImode, FP_REGNO));
+      sp_offset = reg_save_size;
+    }
+  else
+    {
+      /* SP is right under our local variable space.  Adjust it if
+	 needed.  */
+      sp_offset = reg_save_size + frame_size;
+      if (sp_offset >= 128)
+	{
+	  add_constant (SP_REGNO, SP_REGNO, frame_size, 0);
+	  sp_offset -= frame_size;
+	}
+    }
+
+  /* This is backwards so that we restore the control and coprocessor
+     registers before the temporary registers we use to restore
+     them.  */
+  for (i=FIRST_PSEUDO_REGISTER-1; i>=1; i--)
+    if (mep_call_saves_register (i))
+      {
+	enum machine_mode rmode;
+	int rss = cfun->machine->reg_save_slot[i];
+
+	if (mep_reg_size (i) == 8)
+	  rmode = DImode;
+	else
+	  rmode = SImode;
+
+	if ((i == TP_REGNO || i == GP_REGNO || i == LP_REGNO)
+	    && !(mep_reg_set_in_function (i) || interrupt_handler))
+	  continue;
+	if (mep_prevent_lp_restore && i == LP_REGNO)
+	  continue;
+	if (!mep_prevent_lp_restore
+	    && !interrupt_handler
+	    && (i == 10 || i == 11))
+	  continue;
+  
+	if (GR_REGNO_P (i) || LOADABLE_CR_REGNO_P (i))
+	  emit_move_insn (gen_rtx_REG (rmode, i),
+			  gen_rtx_MEM (rmode,
+				       plus_constant (Pmode, stack_pointer_rtx,
+						      sp_offset - rss)));
+	else
+	  {
+	    if (i == LP_REGNO && !mep_sibcall_epilogue && !interrupt_handler)
+	      /* Defer this one so we can jump indirect rather than
+		 copying the RA to $lp and "ret".  EH epilogues
+		 automatically skip this anyway.  */
+	      lp_slot = sp_offset-rss;
+	    else
+	      {
+		emit_move_insn (gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP),
+				gen_rtx_MEM (rmode,
+					     plus_constant (Pmode,
+							    stack_pointer_rtx,
+							    sp_offset-rss)));
+		emit_move_insn (gen_rtx_REG (rmode, i),
+				gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP));
+	      }
+	  }
+      }
+  if (lp_slot != -1)
+    {
+      /* Restore this one last so we know it will be in the temp
+	 register when we return by jumping indirectly via the temp.  */
+      emit_move_insn (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+		      gen_rtx_MEM (SImode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  lp_slot)));
+      lp_temp = REGSAVE_CONTROL_TEMP;
+    }
+
+
+  add_constant (SP_REGNO, SP_REGNO, sp_offset, 0);
+
+  if (crtl->calls_eh_return && mep_prevent_lp_restore)
+    emit_insn (gen_addsi3 (gen_rtx_REG (SImode, SP_REGNO),
+			   gen_rtx_REG (SImode, SP_REGNO),
+			   cfun->machine->eh_stack_adjust));
+
+  if (mep_sibcall_epilogue)
+    return;
+
+  if (mep_disinterrupt_p ())
+    emit_insn (gen_mep_enable_int ());
+
+  if (mep_prevent_lp_restore)
+    {
+      emit_jump_insn (gen_eh_return_internal ());
+      emit_barrier ();
+    }
+  else if (interrupt_handler)
+    emit_jump_insn (gen_mep_reti ());
+  else
+    emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode, lp_temp)));
+}
+
+void
+mep_expand_eh_return (rtx *operands)
+{
+  if (GET_CODE (operands[0]) != REG || REGNO (operands[0]) != LP_REGNO)
+    {
+      rtx ra = gen_rtx_REG (Pmode, LP_REGNO);
+      emit_move_insn (ra, operands[0]);
+      operands[0] = ra;
+    }
+
+  emit_insn (gen_eh_epilogue (operands[0]));
+}
+
+void
+mep_emit_eh_epilogue (rtx *operands ATTRIBUTE_UNUSED)
+{
+  cfun->machine->eh_stack_adjust = gen_rtx_REG (Pmode, 0);
+  mep_prevent_lp_restore = 1;
+  mep_expand_epilogue ();
+  mep_prevent_lp_restore = 0;
+}
+
+void
+mep_expand_sibcall_epilogue (void)
+{
+  mep_sibcall_epilogue = 1;
+  mep_expand_epilogue ();
+  mep_sibcall_epilogue = 0;
+}
+
+static bool
+mep_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  if (decl == NULL)
+    return false;
+
+  if (mep_section_tag (DECL_RTL (decl)) == 'f')
+    return false;
+
+  /* Can't call to a sibcall from an interrupt or disinterrupt function.  */
+  if (mep_interrupt_p () || mep_disinterrupt_p ())
+    return false;
+
+  return true;
+}
+
+rtx
+mep_return_stackadj_rtx (void)
+{
+  return gen_rtx_REG (SImode, 10);
+}
+
+rtx
+mep_return_handler_rtx (void)
+{
+  return gen_rtx_REG (SImode, LP_REGNO);
+}
+
+void
+mep_function_profiler (FILE *file)
+{
+  /* Always right at the beginning of the function.  */
+  fprintf (file, "\t# mep function profiler\n");
+  fprintf (file, "\tadd\t$sp, -8\n");
+  fprintf (file, "\tsw\t$0, ($sp)\n");
+  fprintf (file, "\tldc\t$0, $lp\n");
+  fprintf (file, "\tsw\t$0, 4($sp)\n");
+  fprintf (file, "\tbsr\t__mep_mcount\n");
+  fprintf (file, "\tlw\t$0, 4($sp)\n");
+  fprintf (file, "\tstc\t$0, $lp\n");
+  fprintf (file, "\tlw\t$0, ($sp)\n");
+  fprintf (file, "\tadd\t$sp, 8\n\n");
+}
+
+const char *
+mep_emit_bb_trace_ret (void)
+{
+  fprintf (asm_out_file, "\t# end of block profiling\n");
+  fprintf (asm_out_file, "\tadd\t$sp, -8\n");
+  fprintf (asm_out_file, "\tsw\t$0, ($sp)\n");
+  fprintf (asm_out_file, "\tldc\t$0, $lp\n");
+  fprintf (asm_out_file, "\tsw\t$0, 4($sp)\n");
+  fprintf (asm_out_file, "\tbsr\t__bb_trace_ret\n");
+  fprintf (asm_out_file, "\tlw\t$0, 4($sp)\n");
+  fprintf (asm_out_file, "\tstc\t$0, $lp\n");
+  fprintf (asm_out_file, "\tlw\t$0, ($sp)\n");
+  fprintf (asm_out_file, "\tadd\t$sp, 8\n\n");
+  return "";
+}
+
+#undef SAVE
+#undef RESTORE
+
+/* Operand Printing.  */
+
+void
+mep_print_operand_address (FILE *stream, rtx address)
+{
+  if (GET_CODE (address) == MEM)
+    address = XEXP (address, 0);
+  else
+    /* cf: gcc.dg/asm-4.c.  */
+    gcc_assert (GET_CODE (address) == REG);
+
+  mep_print_operand (stream, address, 0);
+}
+
+static struct
+{
+  char code;
+  const char *pattern;
+  const char *format;
+}
+const conversions[] =
+{
+  { 0, "r", "0" },
+  { 0, "m+ri", "3(2)" },
+  { 0, "mr", "(1)" },
+  { 0, "ms", "(1)" },
+  { 0, "ml", "(1)" },
+  { 0, "mLrs", "%lo(3)(2)" },
+  { 0, "mLr+si", "%lo(4+5)(2)" },
+  { 0, "m+ru2s", "%tpoff(5)(2)" },
+  { 0, "m+ru3s", "%sdaoff(5)(2)" },
+  { 0, "m+r+u2si", "%tpoff(6+7)(2)" },
+  { 0, "m+ru2+si", "%tpoff(6+7)(2)" },
+  { 0, "m+r+u3si", "%sdaoff(6+7)(2)" },
+  { 0, "m+ru3+si", "%sdaoff(6+7)(2)" },
+  { 0, "mi", "(1)" },
+  { 0, "m+si", "(2+3)" },
+  { 0, "m+li", "(2+3)" },
+  { 0, "i", "0" },
+  { 0, "s", "0" },
+  { 0, "+si", "1+2" },
+  { 0, "+u2si", "%tpoff(3+4)" },
+  { 0, "+u3si", "%sdaoff(3+4)" },
+  { 0, "l", "0" },
+  { 'b', "i", "0" },
+  { 'B', "i", "0" },
+  { 'U', "i", "0" },
+  { 'h', "i", "0" },
+  { 'h', "Hs", "%hi(1)" },
+  { 'I', "i", "0" },
+  { 'I', "u2s", "%tpoff(2)" },
+  { 'I', "u3s", "%sdaoff(2)" },
+  { 'I', "+u2si", "%tpoff(3+4)" },
+  { 'I', "+u3si", "%sdaoff(3+4)" },
+  { 'J', "i", "0" },
+  { 'P', "mr", "(1\\+),\\0" },
+  { 'x', "i", "0" },
+  { 0, 0, 0 }
+};
+
+static int
+unique_bit_in (HOST_WIDE_INT i)
+{
+  switch (i & 0xff)
+    {
+    case 0x01: case 0xfe: return 0;
+    case 0x02: case 0xfd: return 1;
+    case 0x04: case 0xfb: return 2;
+    case 0x08: case 0xf7: return 3;
+    case 0x10: case 0x7f: return 4;
+    case 0x20: case 0xbf: return 5;
+    case 0x40: case 0xdf: return 6;
+    case 0x80: case 0xef: return 7;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int
+bit_size_for_clip (HOST_WIDE_INT i)
+{
+  int rv;
+
+  for (rv = 0; rv < 31; rv ++)
+    if (((HOST_WIDE_INT) 1 << rv) > i)
+      return rv + 1;
+  gcc_unreachable ();
+}
+
+/* Print an operand to a assembler instruction.  */
+
+void
+mep_print_operand (FILE *file, rtx x, int code)
+{
+  int i, j;
+  const char *real_name;
+
+  if (code == '<')
+    {
+      /* Print a mnemonic to do CR <- CR moves.  Find out which intrinsic
+	 we're using, then skip over the "mep_" part of its name.  */
+      const struct cgen_insn *insn;
+
+      if (mep_get_move_insn (mep_cmov, &insn))
+	fputs (cgen_intrinsics[insn->intrinsic] + 4, file);
+      else
+	mep_intrinsic_unavailable (mep_cmov);
+      return;
+    }
+  if (code == 'L')
+    {
+      switch (GET_CODE (x))
+	{
+	case AND:
+	  fputs ("clr", file);
+	  return;
+	case IOR:
+	  fputs ("set", file);
+	  return;
+	case XOR:
+	  fputs ("not", file);
+	  return;
+	default:
+	  output_operand_lossage ("invalid %%L code");
+	}
+    }
+  if (code == 'M')
+    {
+      /* Print the second operand of a CR <- CR move.  If we're using
+	 a two-operand instruction (i.e., a real cmov), then just print
+	 the operand normally.  If we're using a "reg, reg, immediate"
+	 instruction such as caddi3, print the operand followed by a
+	 zero field.  If we're using a three-register instruction,
+	 print the operand twice.  */
+      const struct cgen_insn *insn;
+
+      mep_print_operand (file, x, 0);
+      if (mep_get_move_insn (mep_cmov, &insn)
+	  && insn_data[insn->icode].n_operands == 3)
+	{
+	  fputs (", ", file);
+	  if (insn_data[insn->icode].operand[2].predicate (x, VOIDmode))
+	    mep_print_operand (file, x, 0);
+	  else
+	    mep_print_operand (file, const0_rtx, 0);
+	}
+      return;
+    }
+
+  encode_pattern (x);
+  for (i = 0; conversions[i].pattern; i++)
+    if (conversions[i].code == code
+	&& strcmp(conversions[i].pattern, pattern) == 0)
+      {
+	for (j = 0; conversions[i].format[j]; j++)
+	  if (conversions[i].format[j] == '\\')
+	    {
+	      fputc (conversions[i].format[j+1], file);
+	      j++;
+	    }
+	  else if (ISDIGIT(conversions[i].format[j]))
+	    {
+	      rtx r = patternr[conversions[i].format[j] - '0'];
+	      switch (GET_CODE (r))
+		{
+		case REG:
+		  fprintf (file, "%s", reg_names [REGNO (r)]);
+		  break;
+		case CONST_INT:
+		  switch (code)
+		    {
+		    case 'b':
+		      fprintf (file, "%d", unique_bit_in (INTVAL (r)));
+		      break;
+		    case 'B':
+		      fprintf (file, "%d", bit_size_for_clip (INTVAL (r)));
+		      break;
+		    case 'h':
+		      fprintf (file, "0x%x", ((int) INTVAL (r) >> 16) & 0xffff);
+		      break;
+		    case 'U':
+		      fprintf (file, "%d", bit_size_for_clip (INTVAL (r)) - 1);
+		      break;
+		    case 'J':
+		      fprintf (file, "0x%x", (int) INTVAL (r) & 0xffff);
+		      break;
+		    case 'x':
+		      if (INTVAL (r) & ~(HOST_WIDE_INT)0xff
+			  && !(INTVAL (r) & 0xff))
+			fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL(r));
+		      else
+			fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL(r));
+		      break;
+		    case 'I':
+		      if (INTVAL (r) & ~(HOST_WIDE_INT)0xff
+			  && conversions[i].format[j+1] == 0)
+			{
+			  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (r));
+			  fprintf (file, " # 0x%x", (int) INTVAL(r) & 0xffff);
+			}
+		      else
+			fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL(r));
+		      break;
+		    default:
+		      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL(r));
+		      break;
+		    }
+		  break;
+		case CONST_DOUBLE:
+		  fprintf(file, "[const_double 0x%lx]",
+			  (unsigned long) CONST_DOUBLE_HIGH(r));
+		  break;
+		case SYMBOL_REF:
+		  real_name = targetm.strip_name_encoding (XSTR (r, 0));
+		  assemble_name (file, real_name);
+		  break;
+		case LABEL_REF:
+		  output_asm_label (r);
+		  break;
+		default:
+		  fprintf (stderr, "don't know how to print this operand:");
+		  debug_rtx (r);
+		  gcc_unreachable ();
+		}
+	    }
+	  else
+	    {
+	      if (conversions[i].format[j] == '+'
+		  && (!code || code == 'I')
+		  && ISDIGIT (conversions[i].format[j+1])
+		  && GET_CODE (patternr[conversions[i].format[j+1] - '0']) == CONST_INT
+		  && INTVAL (patternr[conversions[i].format[j+1] - '0']) < 0)
+		continue;
+	      fputc(conversions[i].format[j], file);
+	    }
+	break;
+      }
+  if (!conversions[i].pattern)
+    {
+      error ("unconvertible operand %c %qs", code?code:'-', pattern);
+      debug_rtx(x);
+    }
+
+  return;
+}
+
+void
+mep_final_prescan_insn (rtx insn, rtx *operands ATTRIBUTE_UNUSED,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  /* Despite the fact that MeP is perfectly capable of branching and
+     doing something else in the same bundle, gcc does jump
+     optimization *after* scheduling, so we cannot trust the bundling
+     flags on jump instructions.  */
+  if (GET_MODE (insn) == BImode
+      && get_attr_slots (insn) != SLOTS_CORE)
+    fputc ('+', asm_out_file);
+}
+
+/* Function args in registers.  */
+
+static void
+mep_setup_incoming_varargs (cumulative_args_t cum,
+			    enum machine_mode mode ATTRIBUTE_UNUSED,
+			    tree type ATTRIBUTE_UNUSED, int *pretend_size,
+			    int second_time ATTRIBUTE_UNUSED)
+{
+  int nsave = 4 - (get_cumulative_args (cum)->nregs + 1);
+
+  if (nsave > 0)
+    cfun->machine->arg_regs_to_save = nsave;
+  *pretend_size = nsave * 4;
+}
+
+static int
+bytesize (const_tree type, enum machine_mode mode)
+{
+  if (mode == BLKmode)
+    return int_size_in_bytes (type);
+  return GET_MODE_SIZE (mode);
+}
+
+static rtx
+mep_expand_builtin_saveregs (void)
+{
+  int bufsize, i, ns;
+  rtx regbuf;
+
+  ns = cfun->machine->arg_regs_to_save;
+  if (TARGET_IVC2)
+    {
+      bufsize = 8 * ((ns + 1) / 2) + 8 * ns;
+      regbuf = assign_stack_local (SImode, bufsize, 64);
+    }
+  else
+    {
+      bufsize = ns * 4;
+      regbuf = assign_stack_local (SImode, bufsize, 32);
+    }
+
+  move_block_from_reg (5-ns, regbuf, ns);
+
+  if (TARGET_IVC2)
+    {
+      rtx tmp = gen_rtx_MEM (DImode, XEXP (regbuf, 0));
+      int ofs = 8 * ((ns+1)/2);
+
+      for (i=0; i<ns; i++)
+	{
+	  int rn = (4-ns) + i + 49;
+	  rtx ptr;
+
+	  ptr = offset_address (tmp, GEN_INT (ofs), 2);
+	  emit_move_insn (ptr, gen_rtx_REG (DImode, rn));
+	  ofs += 8;
+	}
+    }
+  return XEXP (regbuf, 0);
+}
+
+#define VECTOR_TYPE_P(t) (TREE_CODE(t) == VECTOR_TYPE)
+
+static tree
+mep_build_builtin_va_list (void)
+{
+  tree f_next_gp, f_next_gp_limit, f_next_cop, f_next_stack;
+  tree record;
+
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+
+  f_next_gp = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			  get_identifier ("__va_next_gp"), ptr_type_node);
+  f_next_gp_limit = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				get_identifier ("__va_next_gp_limit"),
+				ptr_type_node);
+  f_next_cop = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__va_next_cop"),
+			   ptr_type_node);
+  f_next_stack = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__va_next_stack"),
+			     ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_next_gp) = record;
+  DECL_FIELD_CONTEXT (f_next_gp_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_cop) = record;
+  DECL_FIELD_CONTEXT (f_next_stack) = record;
+
+  TYPE_FIELDS (record) = f_next_gp;
+  DECL_CHAIN (f_next_gp) = f_next_gp_limit;
+  DECL_CHAIN (f_next_gp_limit) = f_next_cop;
+  DECL_CHAIN (f_next_cop) = f_next_stack;
+
+  layout_type (record);
+
+  return record;
+}
+
+static void
+mep_expand_va_start (tree valist, rtx nextarg)
+{
+  tree f_next_gp, f_next_gp_limit, f_next_cop, f_next_stack;
+  tree next_gp, next_gp_limit, next_cop, next_stack;
+  tree t, u;
+  int ns;
+
+  ns = cfun->machine->arg_regs_to_save;
+
+  f_next_gp = TYPE_FIELDS (va_list_type_node);
+  f_next_gp_limit = DECL_CHAIN (f_next_gp);
+  f_next_cop = DECL_CHAIN (f_next_gp_limit);
+  f_next_stack = DECL_CHAIN (f_next_cop);
+
+  next_gp = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp), valist, f_next_gp,
+		    NULL_TREE);
+  next_gp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp_limit),
+			  valist, f_next_gp_limit, NULL_TREE);
+  next_cop = build3 (COMPONENT_REF, TREE_TYPE (f_next_cop), valist, f_next_cop,
+		     NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* va_list.next_gp = expand_builtin_saveregs (); */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_gp, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* va_list.next_gp_limit = va_list.next_gp + 4 * ns; */
+  u = fold_build_pointer_plus_hwi (u, 4 * ns);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_gp_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  u = fold_build_pointer_plus_hwi (u, 8 * ((ns+1)/2));
+  /* va_list.next_cop = ROUND_UP(va_list.next_gp_limit,8); */
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_cop, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* va_list.next_stack = nextarg; */
+  u = make_tree (ptr_type_node, nextarg);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+static tree
+mep_gimplify_va_arg_expr (tree valist, tree type,
+			  gimple_seq *pre_p,
+			  gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size, rsize;
+  bool by_reference, ivc2_vec;
+  tree f_next_gp, f_next_gp_limit, f_next_cop, f_next_stack;
+  tree next_gp, next_gp_limit, next_cop, next_stack;
+  tree label_sover, label_selse;
+  tree tmp, res_addr;
+
+  ivc2_vec = TARGET_IVC2 && VECTOR_TYPE_P (type);
+
+  size = int_size_in_bytes (type);
+  by_reference = (size > (ivc2_vec ? 8 : 4)) || (size <= 0);
+
+  if (by_reference)
+    {
+      type = build_pointer_type (type);
+      size = 4;
+    }
+  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+
+  f_next_gp = TYPE_FIELDS (va_list_type_node);
+  f_next_gp_limit = DECL_CHAIN (f_next_gp);
+  f_next_cop = DECL_CHAIN (f_next_gp_limit);
+  f_next_stack = DECL_CHAIN (f_next_cop);
+
+  next_gp = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp), valist, f_next_gp,
+		    NULL_TREE);
+  next_gp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp_limit),
+			  valist, f_next_gp_limit, NULL_TREE);
+  next_cop = build3 (COMPONENT_REF, TREE_TYPE (f_next_cop), valist, f_next_cop,
+		     NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* if f_next_gp < f_next_gp_limit
+       IF (VECTOR_P && IVC2)
+         val = *f_next_cop;
+       ELSE
+         val = *f_next_gp;
+       f_next_gp += 4;
+       f_next_cop += 8;
+     else
+       label_selse:
+       val = *f_next_stack;
+       f_next_stack += rsize;
+     label_sover:
+  */
+
+  label_sover = create_artificial_label (UNKNOWN_LOCATION);
+  label_selse = create_artificial_label (UNKNOWN_LOCATION);
+  res_addr = create_tmp_var (ptr_type_node, NULL);
+
+  tmp = build2 (GE_EXPR, boolean_type_node, next_gp,
+		unshare_expr (next_gp_limit));
+  tmp = build3 (COND_EXPR, void_type_node, tmp,
+		build1 (GOTO_EXPR, void_type_node,
+			unshare_expr (label_selse)),
+		NULL_TREE);
+  gimplify_and_add (tmp, pre_p);
+
+  if (ivc2_vec)
+    {
+      tmp = build2 (MODIFY_EXPR, void_type_node, res_addr, next_cop);
+      gimplify_and_add (tmp, pre_p);
+    }
+  else
+    {
+      tmp = build2 (MODIFY_EXPR, void_type_node, res_addr, next_gp);
+      gimplify_and_add (tmp, pre_p);
+    }
+
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (next_gp), 4);
+  gimplify_assign (unshare_expr (next_gp), tmp, pre_p);
+
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (next_cop), 8);
+  gimplify_assign (unshare_expr (next_cop), tmp, pre_p);
+
+  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (label_sover));
+  gimplify_and_add (tmp, pre_p);
+
+  /* - - */
+
+  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (label_selse));
+  gimplify_and_add (tmp, pre_p);
+
+  tmp = build2 (MODIFY_EXPR, void_type_node, res_addr, unshare_expr (next_stack));
+  gimplify_and_add (tmp, pre_p);
+
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (next_stack), rsize);
+  gimplify_assign (unshare_expr (next_stack), tmp, pre_p);
+
+  /* - - */
+
+  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (label_sover));
+  gimplify_and_add (tmp, pre_p);
+
+  res_addr = fold_convert (build_pointer_type (type), res_addr);
+
+  if (by_reference)
+    res_addr = build_va_arg_indirect_ref (res_addr);
+
+  return build_va_arg_indirect_ref (res_addr);
+}
+
+void
+mep_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
+			  rtx libname ATTRIBUTE_UNUSED,
+			  tree fndecl ATTRIBUTE_UNUSED)
+{
+  pcum->nregs = 0;
+
+  if (fntype && lookup_attribute ("vliw", TYPE_ATTRIBUTES (fntype)))
+    pcum->vliw = 1;
+  else
+    pcum->vliw = 0;
+}
+
+/* The ABI is thus: Arguments are in $1, $2, $3, $4, stack.  Arguments
+   larger than 4 bytes are passed indirectly.  Return value in 0,
+   unless bigger than 4 bytes, then the caller passes a pointer as the
+   first arg.  For varargs, we copy $1..$4 to the stack.  */
+
+static rtx
+mep_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		  const_tree type ATTRIBUTE_UNUSED,
+		  bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  /* VOIDmode is a signal for the backend to pass data to the call
+     expander via the second operand to the call pattern.  We use
+     this to determine whether to use "jsr" or "jsrv".  */
+  if (mode == VOIDmode)
+    return GEN_INT (cum->vliw);
+
+  /* If we havn't run out of argument registers, return the next.  */
+  if (cum->nregs < 4)
+    {
+      if (type && TARGET_IVC2 && VECTOR_TYPE_P (type))
+	return gen_rtx_REG (mode, cum->nregs + 49);
+      else
+	return gen_rtx_REG (mode, cum->nregs + 1);
+    }
+
+  /* Otherwise the argument goes on the stack.  */
+  return NULL_RTX;
+}
+
+static bool
+mep_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		       enum machine_mode mode,
+		       const_tree        type,
+		       bool              named ATTRIBUTE_UNUSED)
+{
+  int size = bytesize (type, mode);
+
+  /* This is non-obvious, but yes, large values passed after we've run
+     out of registers are *still* passed by reference - we put the
+     address of the parameter on the stack, as well as putting the
+     parameter itself elsewhere on the stack.  */
+
+  if (size <= 0 || size > 8)
+    return true;
+  if (size <= 4)
+    return false;
+  if (TARGET_IVC2 && get_cumulative_args (cum)->nregs < 4
+      && type != NULL_TREE && VECTOR_TYPE_P (type))
+    return false;
+  return true;
+}
+
+static void
+mep_function_arg_advance (cumulative_args_t pcum,
+			  enum machine_mode mode ATTRIBUTE_UNUSED,
+			  const_tree type ATTRIBUTE_UNUSED,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  get_cumulative_args (pcum)->nregs += 1;
+}
+
+bool
+mep_return_in_memory (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
+{
+  int size = bytesize (type, BLKmode);
+  if (TARGET_IVC2 && VECTOR_TYPE_P (type))
+    return size > 0 && size <= 8 ? 0 : 1;
+  return size > 0 && size <= 4 ? 0 : 1;
+}
+
+static bool
+mep_narrow_volatile_bitfield (void)
+{
+  return true;
+  return false;
+}
+
+/* Implement FUNCTION_VALUE.  All values are returned in $0.  */
+
+rtx
+mep_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
+{
+  if (TARGET_IVC2 && VECTOR_TYPE_P (type))
+    return gen_rtx_REG (TYPE_MODE (type), 48);
+  return gen_rtx_REG (TYPE_MODE (type), RETURN_VALUE_REGNUM);
+}
+
+/* Implement LIBCALL_VALUE, using the same rules as mep_function_value.  */
+
+rtx
+mep_libcall_value (enum machine_mode mode)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Handle pipeline hazards.  */
+
+typedef enum { op_none, op_stc, op_fsft, op_ret } op_num;
+static const char *opnames[] = { "", "stc", "fsft", "ret" };
+
+static int prev_opcode = 0;
+
+/* This isn't as optimal as it could be, because we don't know what
+   control register the STC opcode is storing in.  We only need to add
+   the nop if it's the relevant register, but we add it for irrelevant
+   registers also.  */
+
+void
+mep_asm_output_opcode (FILE *file, const char *ptr)
+{
+  int this_opcode = op_none;
+  const char *hazard = 0;
+
+  switch (*ptr)
+    {
+    case 'f':
+      if (strncmp (ptr, "fsft", 4) == 0 && !ISGRAPH (ptr[4]))
+	this_opcode = op_fsft;
+      break;
+    case 'r':
+      if (strncmp (ptr, "ret", 3) == 0 && !ISGRAPH (ptr[3]))
+	this_opcode = op_ret;
+      break;
+    case 's':
+      if (strncmp (ptr, "stc", 3) == 0 && !ISGRAPH (ptr[3]))
+	this_opcode = op_stc;
+      break;
+    }
+
+  if (prev_opcode == op_stc && this_opcode == op_fsft)
+    hazard = "nop";
+  if (prev_opcode == op_stc && this_opcode == op_ret)
+    hazard = "nop";
+
+  if (hazard)
+    fprintf(file, "%s\t# %s-%s hazard\n\t",
+	    hazard, opnames[prev_opcode], opnames[this_opcode]);
+
+  prev_opcode = this_opcode;
+}
+
+/* Handle attributes.  */
+
+static tree
+mep_validate_based_tiny (tree *node, tree name, tree args,
+			 int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != VAR_DECL
+      && TREE_CODE (*node) != POINTER_TYPE
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (0, "%qE attribute only applies to variables", name);
+      *no_add = true;
+    }
+  else if (args == NULL_TREE && TREE_CODE (*node) == VAR_DECL)
+    {
+      if (! (TREE_PUBLIC (*node) || TREE_STATIC (*node)))
+	{
+	  warning (0, "address region attributes not allowed with auto storage class");
+	  *no_add = true;
+	}
+      /* Ignore storage attribute of pointed to variable: char __far * x;  */
+      if (TREE_TYPE (*node) && TREE_CODE (TREE_TYPE (*node)) == POINTER_TYPE)
+	{
+	  warning (0, "address region attributes on pointed-to types ignored");
+	  *no_add = true;
+	}
+    }
+  
+  return NULL_TREE;
+}
+
+static int
+mep_multiple_address_regions (tree list, bool check_section_attr)
+{
+  tree a;
+  int count_sections = 0;
+  int section_attr_count = 0;
+
+  for (a = list; a; a = TREE_CHAIN (a))
+    {
+      if (is_attribute_p ("based", TREE_PURPOSE (a))
+	  || is_attribute_p ("tiny", TREE_PURPOSE (a))
+	  || is_attribute_p ("near", TREE_PURPOSE (a))
+	  || is_attribute_p ("far", TREE_PURPOSE (a))
+	  || is_attribute_p ("io", TREE_PURPOSE (a)))
+	count_sections ++;
+      if (check_section_attr)
+	section_attr_count += is_attribute_p ("section", TREE_PURPOSE (a));
+    }
+	
+  if (check_section_attr)
+    return section_attr_count;
+  else
+    return count_sections;
+}
+
+#define MEP_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+                : DECL_ATTRIBUTES (decl) \
+                  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+static tree
+mep_validate_near_far (tree *node, tree name, tree args,
+		       int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != VAR_DECL
+      && TREE_CODE (*node) != FUNCTION_DECL
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != POINTER_TYPE
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (0, "%qE attribute only applies to variables and functions",
+	       name);
+      *no_add = true;
+    }
+  else if (args == NULL_TREE && TREE_CODE (*node) == VAR_DECL)
+    {
+      if (! (TREE_PUBLIC (*node) || TREE_STATIC (*node)))
+	{
+	  warning (0, "address region attributes not allowed with auto storage class");
+	  *no_add = true;
+	}
+      /* Ignore storage attribute of pointed to variable: char __far * x;  */
+      if (TREE_TYPE (*node) && TREE_CODE (TREE_TYPE (*node)) == POINTER_TYPE)
+	{
+	  warning (0, "address region attributes on pointed-to types ignored");
+	  *no_add = true;
+	}
+    }
+  else if (mep_multiple_address_regions (MEP_ATTRIBUTES (*node), false) > 0)
+    {
+      warning (0, "duplicate address region attribute %qE in declaration of %qE on line %d",
+	       name, DECL_NAME (*node), DECL_SOURCE_LINE (*node));
+      DECL_ATTRIBUTES (*node) = NULL_TREE;
+    }
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_disinterrupt (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			   int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL
+      && TREE_CODE (*node) != METHOD_TYPE)
+    {
+      warning (0, "%qE attribute only applies to functions", name);
+      *no_add = true;
+    }
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_interrupt (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  tree function_type;
+
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (0, "%qE attribute only applies to functions", name);
+      *no_add = true;
+      return NULL_TREE;
+    }
+
+  if (DECL_DECLARED_INLINE_P (*node))
+    error ("cannot inline interrupt function %qE", DECL_NAME (*node));
+  DECL_UNINLINABLE (*node) = 1;
+
+  function_type = TREE_TYPE (*node);
+
+  if (TREE_TYPE (function_type) != void_type_node)
+    error ("interrupt function must have return type of void");
+
+  if (prototype_p (function_type)
+      && (TREE_VALUE (TYPE_ARG_TYPES (function_type)) != void_type_node
+	  || TREE_CHAIN (TYPE_ARG_TYPES (function_type)) != NULL_TREE))
+    error ("interrupt function must have no arguments");
+
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_io_cb (tree *node, tree name, tree args,
+		    int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (0, "%qE attribute only applies to variables", name);
+      *no_add = true;
+    }
+
+  if (args != NULL_TREE)
+    {
+      if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
+	TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
+      if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+	{
+	  warning (0, "%qE attribute allows only an integer constant argument",
+		   name);
+	  *no_add = true;
+	}
+    }
+
+  if (*no_add == false && !TARGET_IO_NO_VOLATILE)
+    TREE_THIS_VOLATILE (*node) = 1;
+
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_vliw (tree *node, tree name, tree args ATTRIBUTE_UNUSED, 
+		   int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != FUNCTION_DECL
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      static int gave_pointer_note = 0;
+      static int gave_array_note = 0;
+      static const char * given_type = NULL;
+ 
+      given_type = get_tree_code_name (TREE_CODE (*node));
+      if (TREE_CODE (*node) == POINTER_TYPE)
+ 	given_type = "pointers";
+      if (TREE_CODE (*node) == ARRAY_TYPE)
+ 	given_type = "arrays";
+ 
+      if (given_type)
+ 	warning (0, "%qE attribute only applies to functions, not %s",
+ 		 name, given_type);
+      else
+ 	warning (0, "%qE attribute only applies to functions",
+ 		 name);
+      *no_add = true;
+ 
+      if (TREE_CODE (*node) == POINTER_TYPE
+ 	  && !gave_pointer_note)
+ 	{
+ 	  inform (input_location,
+ 	          "to describe a pointer to a VLIW function, use syntax like this:\n%s",
+ 	          "   typedef int (__vliw *vfuncptr) ();");
+ 	  gave_pointer_note = 1;
+ 	}
+ 
+      if (TREE_CODE (*node) == ARRAY_TYPE
+ 	  && !gave_array_note)
+ 	{
+ 	  inform (input_location,
+ 	          "to describe an array of VLIW function pointers, use syntax like this:\n%s",
+ 	          "   typedef int (__vliw *vfuncptr[]) ();");
+ 	  gave_array_note = 1;
+ 	}
+    }
+  if (!TARGET_VLIW)
+    error ("VLIW functions are not allowed without a VLIW configuration");
+  return NULL_TREE;
+}
+
+static const struct attribute_spec mep_attribute_table[11] =
+{
+  /* name         min max decl   type   func   handler
+     affects_type_identity */
+  { "based",        0, 0, false, false, false, mep_validate_based_tiny, false },
+  { "tiny",         0, 0, false, false, false, mep_validate_based_tiny, false },
+  { "near",         0, 0, false, false, false, mep_validate_near_far, false },
+  { "far",          0, 0, false, false, false, mep_validate_near_far, false },
+  { "disinterrupt", 0, 0, false, false, false, mep_validate_disinterrupt,
+    false },
+  { "interrupt",    0, 0, false, false, false, mep_validate_interrupt, false },
+  { "io",           0, 1, false, false, false, mep_validate_io_cb, false },
+  { "cb",           0, 1, false, false, false, mep_validate_io_cb, false },
+  { "vliw",         0, 0, false, true,  false, mep_validate_vliw, false },
+  { NULL,           0, 0, false, false, false, NULL, false }
+};
+
+static bool
+mep_function_attribute_inlinable_p (const_tree callee)
+{
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (callee));
+  if (!attrs) attrs = DECL_ATTRIBUTES (callee);
+  return (lookup_attribute ("disinterrupt", attrs) == 0
+	  && lookup_attribute ("interrupt", attrs) == 0);
+}
+
+static bool
+mep_can_inline_p (tree caller, tree callee)
+{
+  if (TREE_CODE (callee) == ADDR_EXPR)
+    callee = TREE_OPERAND (callee, 0);
+ 
+  if (!mep_vliw_function_p (caller)
+      && mep_vliw_function_p (callee))
+    {
+      return false;
+    }
+  return true;
+}
+
+#define FUNC_CALL		1
+#define FUNC_DISINTERRUPT	2
+
+
+struct GTY(()) pragma_entry {
+  int used;
+  int flag;
+  const char *funcname;
+};
+typedef struct pragma_entry pragma_entry;
+
+/* Hash table of farcall-tagged sections.  */
+static GTY((param_is (pragma_entry))) htab_t pragma_htab;
+
+static int
+pragma_entry_eq (const void *p1, const void *p2)
+{
+  const pragma_entry *old = (const pragma_entry *) p1;
+  const char *new_name = (const char *) p2;
+
+  return strcmp (old->funcname, new_name) == 0;
+}
+
+static hashval_t
+pragma_entry_hash (const void *p)
+{
+  const pragma_entry *old = (const pragma_entry *) p;
+  return htab_hash_string (old->funcname);
+}
+
+static void
+mep_note_pragma_flag (const char *funcname, int flag)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    pragma_htab = htab_create_ggc (31, pragma_entry_hash,
+				    pragma_entry_eq, NULL);
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, funcname,
+			      htab_hash_string (funcname), INSERT);
+
+  if (!*slot)
+    {
+      *slot = ggc_alloc_pragma_entry ();
+      (*slot)->flag = 0;
+      (*slot)->used = 0;
+      (*slot)->funcname = ggc_strdup (funcname);
+    }
+  (*slot)->flag |= flag;
+}
+
+static bool
+mep_lookup_pragma_flag (const char *funcname, int flag)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    return false;
+
+  if (funcname[0] == '@' && funcname[2] == '.')
+    funcname += 3;
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, funcname,
+			      htab_hash_string (funcname), NO_INSERT);
+  if (slot && *slot && ((*slot)->flag & flag))
+    {
+      (*slot)->used |= flag;
+      return true;
+    }
+  return false;
+}
+
+bool
+mep_lookup_pragma_call (const char *funcname)
+{
+  return mep_lookup_pragma_flag (funcname, FUNC_CALL);
+}
+
+void
+mep_note_pragma_call (const char *funcname)
+{
+  mep_note_pragma_flag (funcname, FUNC_CALL);
+}
+
+bool
+mep_lookup_pragma_disinterrupt (const char *funcname)
+{
+  return mep_lookup_pragma_flag (funcname, FUNC_DISINTERRUPT);
+}
+
+void
+mep_note_pragma_disinterrupt (const char *funcname)
+{
+  mep_note_pragma_flag (funcname, FUNC_DISINTERRUPT);
+}
+
+static int
+note_unused_pragma_disinterrupt (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  const pragma_entry *d = (const pragma_entry *)(*slot);
+
+  if ((d->flag & FUNC_DISINTERRUPT)
+      && !(d->used & FUNC_DISINTERRUPT))
+    warning (0, "\"#pragma disinterrupt %s\" not used", d->funcname);
+  return 1;
+}
+
+void
+mep_file_cleanups (void)
+{
+  if (pragma_htab)
+    htab_traverse (pragma_htab, note_unused_pragma_disinterrupt, NULL);
+}
+
+/* These three functions provide a bridge between the pramgas that
+   affect register classes, and the functions that maintain them.  We
+   can't call those functions directly as pragma handling is part of
+   the front end and doesn't have direct access to them.  */
+
+void
+mep_save_register_info (void)
+{
+  save_register_info ();
+}
+
+void
+mep_reinit_regs (void)
+{
+  reinit_regs ();
+}
+
+void
+mep_init_regs (void)
+{
+  init_regs ();
+}
+
+     
+
+static int
+mep_attrlist_to_encoding (tree list, tree decl)
+{
+  if (mep_multiple_address_regions (list, false) > 1)
+    {
+      warning (0, "duplicate address region attribute %qE in declaration of %qE on line %d",
+	       TREE_PURPOSE (TREE_CHAIN (list)),
+	       DECL_NAME (decl),
+	       DECL_SOURCE_LINE (decl));
+      TREE_CHAIN (list) = NULL_TREE;
+    }
+      
+  while (list)
+    {
+      if (is_attribute_p ("based", TREE_PURPOSE (list)))
+	return 'b';
+      if (is_attribute_p ("tiny", TREE_PURPOSE (list)))
+	return 't';
+      if (is_attribute_p ("near", TREE_PURPOSE (list)))
+	return 'n';
+      if (is_attribute_p ("far", TREE_PURPOSE (list)))
+	return 'f';
+      if (is_attribute_p ("io", TREE_PURPOSE (list)))
+	{
+	  if (TREE_VALUE (list)
+	      && TREE_VALUE (TREE_VALUE (list))
+	      && TREE_CODE (TREE_VALUE (TREE_VALUE (list))) == INTEGER_CST)
+	    {
+	      int location = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE(list)));
+	      if (location >= 0
+		  && location <= 0x1000000)
+		return 'i';
+	    }
+	  return 'I';
+	}
+      if (is_attribute_p ("cb", TREE_PURPOSE (list)))
+	return 'c';
+      list = TREE_CHAIN (list);
+    }
+  if (TARGET_TF
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && DECL_SECTION_NAME (decl) == 0)
+    return 'f';
+  return 0;
+}
+
+static int
+mep_comp_type_attributes (const_tree t1, const_tree t2)
+{
+  int vliw1, vliw2;
+
+  vliw1 = (lookup_attribute ("vliw", TYPE_ATTRIBUTES (t1)) != 0);
+  vliw2 = (lookup_attribute ("vliw", TYPE_ATTRIBUTES (t2)) != 0);
+
+  if (vliw1 != vliw2)
+    return 0;
+
+  return 1;
+}
+
+static void
+mep_insert_attributes (tree decl, tree *attributes)
+{
+  int size;
+  const char *secname = 0;
+  tree attrib, attrlist;
+  char encoding;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      const char *funcname = IDENTIFIER_POINTER (DECL_NAME (decl));
+
+      if (mep_lookup_pragma_disinterrupt (funcname))
+	{
+	  attrib = build_tree_list (get_identifier ("disinterrupt"), NULL_TREE);
+	  *attributes = chainon (*attributes, attrib);
+	}
+    }
+
+  if (TREE_CODE (decl) != VAR_DECL
+      || ! (TREE_PUBLIC (decl) || TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    return;
+
+  if (TREE_READONLY (decl) && TARGET_DC)
+    /* -mdc means that const variables default to the near section,
+       regardless of the size cutoff.  */
+    return;
+
+  /* User specified an attribute, so override the default.
+     Ignore storage attribute of pointed to variable. char __far * x;  */
+  if (! (TREE_TYPE (decl) && TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE))
+    {
+      if (TYPE_P (decl) && TYPE_ATTRIBUTES (decl) && *attributes)
+	TYPE_ATTRIBUTES (decl) = NULL_TREE;
+      else if (DECL_ATTRIBUTES (decl) && *attributes)
+	DECL_ATTRIBUTES (decl) = NULL_TREE;
+    }
+
+  attrlist = *attributes ? *attributes : DECL_ATTRIBUTES (decl);
+  encoding = mep_attrlist_to_encoding (attrlist, decl);
+  if (!encoding && TYPE_P (TREE_TYPE (decl)))
+    {
+      attrlist = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+      encoding = mep_attrlist_to_encoding (attrlist, decl);
+    }
+  if (encoding)
+    {	  
+      /* This means that the declaration has a specific section
+	 attribute, so we should not apply the default rules.  */
+
+      if (encoding == 'i' || encoding == 'I')
+	{
+	  tree attr = lookup_attribute ("io", attrlist);
+	  if (attr
+	      && TREE_VALUE (attr)
+	      && TREE_VALUE (TREE_VALUE(attr)))
+	    {
+	      int location = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE(attr)));
+	      static tree previous_value = 0;
+	      static int previous_location = 0;
+	      static tree previous_name = 0;
+
+	      /* We take advantage of the fact that gcc will reuse the
+		 same tree pointer when applying an attribute to a
+		 list of decls, but produce a new tree for attributes
+		 on separate source lines, even when they're textually
+		 identical.  This is the behavior we want.  */
+	      if (TREE_VALUE (attr) == previous_value
+		  && location == previous_location)
+		{
+		  warning(0, "__io address 0x%x is the same for %qE and %qE",
+			  location, previous_name, DECL_NAME (decl));
+		}
+	      previous_name = DECL_NAME (decl);
+	      previous_location = location;
+	      previous_value = TREE_VALUE (attr);
+	    }
+	}
+      return;
+    }
+
+
+  /* Declarations of arrays can change size.  Don't trust them.  */
+  if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
+    size = 0;
+  else
+    size = int_size_in_bytes (TREE_TYPE (decl));
+
+  if (TARGET_RAND_TPGP && size <= 4 && size > 0)
+    {
+      if (TREE_PUBLIC (decl)
+	  || DECL_EXTERNAL (decl)
+	  || TREE_STATIC (decl))
+	{
+	  const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
+	  int key = 0;
+
+	  while (*name)
+	    key += *name++;
+
+	  switch (key & 3)
+	    {
+	    case 0:
+	      secname = "based";
+	      break;
+	    case 1:
+	      secname = "tiny";
+	      break;
+	    case 2:
+	      secname = "far";
+	      break;
+	    default:
+	      ;
+	    }
+	}
+    }
+  else
+    {
+      if (size <= mep_based_cutoff && size > 0)
+	secname = "based";
+      else if (size <= mep_tiny_cutoff && size > 0)
+	secname = "tiny";
+      else if (TARGET_L)
+	secname = "far";
+    }
+
+  if (mep_const_section && TREE_READONLY (decl))
+    {
+      if (strcmp (mep_const_section, "tiny") == 0)
+	secname = "tiny";
+      else if (strcmp (mep_const_section, "near") == 0)
+	return;
+      else if (strcmp (mep_const_section, "far") == 0)
+	secname = "far";
+    }
+
+  if (!secname)
+    return;
+
+  if (!mep_multiple_address_regions (*attributes, true)
+      && !mep_multiple_address_regions (DECL_ATTRIBUTES (decl), false))
+    {
+      attrib = build_tree_list (get_identifier (secname), NULL_TREE);
+
+      /* Chain the attribute directly onto the variable's DECL_ATTRIBUTES
+	 in order to avoid the POINTER_TYPE bypasses in mep_validate_near_far
+	 and mep_validate_based_tiny.  */
+      DECL_ATTRIBUTES (decl) = chainon (DECL_ATTRIBUTES (decl), attrib);
+    }
+}
+
+static void
+mep_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx rtlname;
+  const char *oldname;
+  const char *secname;
+  char encoding;
+  char *newname;
+  tree idp;
+  int maxsize;
+  tree type;
+  tree mep_attributes;
+
+  if (! first)
+    return;
+
+  if (TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return;
+
+  rtlname = XEXP (rtl, 0);
+  if (GET_CODE (rtlname) == SYMBOL_REF)
+    oldname = XSTR (rtlname, 0);
+  else if (GET_CODE (rtlname) == MEM
+	   && GET_CODE (XEXP (rtlname, 0)) == SYMBOL_REF)
+    oldname = XSTR (XEXP (rtlname, 0), 0);
+  else
+    gcc_unreachable ();
+
+  type = TREE_TYPE (decl);
+  if (type == error_mark_node)
+    return;
+  mep_attributes = MEP_ATTRIBUTES (decl);
+
+  encoding = mep_attrlist_to_encoding (mep_attributes, decl);
+
+  if (encoding)
+    {
+      newname = (char *) alloca (strlen (oldname) + 4);
+      sprintf (newname, "@%c.%s", encoding, oldname);
+      idp = get_identifier (newname);
+      XEXP (rtl, 0) =
+	gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+      SYMBOL_REF_WEAK (XEXP (rtl, 0)) = DECL_WEAK (decl);
+      SET_SYMBOL_REF_DECL (XEXP (rtl, 0), decl);
+
+      switch (encoding)
+	{
+	case 'b':
+	  maxsize = 128;
+	  secname = "based";
+	  break;
+	case 't':
+	  maxsize = 65536;
+	  secname = "tiny";
+	  break;
+	case 'n':
+	  maxsize = 0x1000000;
+	  secname = "near";
+	  break;
+	default:
+	  maxsize = 0;
+	  secname = 0;
+	  break;
+	}
+      if (maxsize && int_size_in_bytes (TREE_TYPE (decl)) > maxsize)
+	{
+	  warning (0, "variable %s (%ld bytes) is too large for the %s section (%d bytes)",
+		   oldname,
+		   (long) int_size_in_bytes (TREE_TYPE (decl)),
+		   secname,
+		   maxsize);
+	}
+    }
+}
+
+const char *
+mep_strip_name_encoding (const char *sym)
+{
+  while (1)
+    {
+      if (*sym == '*')
+	sym++;
+      else if (*sym == '@' && sym[2] == '.')
+	sym += 3;
+      else
+	return sym;
+    }
+}
+
+static section *
+mep_select_section (tree decl, int reloc ATTRIBUTE_UNUSED,
+		    unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  int readonly = 1;
+  int encoding;
+
+  switch (TREE_CODE (decl))
+    {
+    case VAR_DECL:
+      if (!TREE_READONLY (decl)
+	  || TREE_SIDE_EFFECTS (decl)
+	  || !DECL_INITIAL (decl)
+	  || (DECL_INITIAL (decl) != error_mark_node
+	      && !TREE_CONSTANT (DECL_INITIAL (decl))))
+	readonly = 0;
+      break;
+    case CONSTRUCTOR:
+      if (! TREE_CONSTANT (decl))
+	readonly = 0;
+      break;
+
+    default:
+      break;
+    }
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+      if (name[0] == '@' && name[2] == '.')
+	encoding = name[1];
+      else
+	encoding = 0;
+
+      if (flag_function_sections || DECL_ONE_ONLY (decl))
+	mep_unique_section (decl, 0);
+      else if (lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+	{
+	  if (encoding == 'f')
+	    return vftext_section;
+	  else
+	    return vtext_section;
+	}
+      else if (encoding == 'f')
+	return ftext_section;
+      else
+	return text_section;
+    }
+
+  if (TREE_CODE (decl) == VAR_DECL)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+      if (name[0] == '@' && name[2] == '.')
+	switch (name[1])
+	  {
+	  case 'b':
+	    return based_section;
+
+	  case 't':
+	    if (readonly)
+	      return srodata_section;
+	    if (DECL_INITIAL (decl))
+	      return sdata_section;
+	    return tinybss_section;
+
+	  case 'f':
+	    if (readonly)
+	      return frodata_section;
+	    return far_section;
+
+	  case 'i':
+	  case 'I':
+	    error_at (DECL_SOURCE_LOCATION (decl),
+		      "variable %D of type %<io%> must be uninitialized", decl);
+	    return data_section;
+
+	  case 'c':
+	    error_at (DECL_SOURCE_LOCATION (decl),
+		      "variable %D of type %<cb%> must be uninitialized", decl);
+	    return data_section;
+	  }
+    }
+
+  if (readonly)
+    return readonly_data_section;
+
+  return data_section;
+}
+
+static void
+mep_unique_section (tree decl, int reloc)
+{
+  static const char *prefixes[][2] =
+  {
+    { ".text.",   ".gnu.linkonce.t." },
+    { ".rodata.", ".gnu.linkonce.r." },
+    { ".data.",   ".gnu.linkonce.d." },
+    { ".based.",   ".gnu.linkonce.based." },
+    { ".sdata.",   ".gnu.linkonce.s." },
+    { ".far.",     ".gnu.linkonce.far." },
+    { ".ftext.",   ".gnu.linkonce.ft." },
+    { ".frodata.", ".gnu.linkonce.frd." },
+    { ".srodata.", ".gnu.linkonce.srd." },
+    { ".vtext.",   ".gnu.linkonce.v." },
+    { ".vftext.",   ".gnu.linkonce.vf." }
+  };
+  int sec = 2; /* .data */
+  int len;
+  const char *name, *prefix;
+  char *string;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  if (DECL_RTL (decl))
+    name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      if (lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+	sec = 9; /* .vtext */
+      else
+	sec = 0; /* .text */
+    }
+  else if (decl_readonly_section (decl, reloc))
+    sec = 1; /* .rodata */
+
+  if (name[0] == '@' && name[2] == '.')
+    {
+      switch (name[1])
+	{
+	case 'b':
+	  sec = 3; /* .based */
+	  break;
+	case 't':
+	  if (sec == 1)
+	    sec = 8; /* .srodata */
+	  else
+	    sec = 4; /* .sdata */
+	  break;
+	case 'f':
+	  if (sec == 0)
+	    sec = 6; /* .ftext */
+	  else if (sec == 9)
+	    sec = 10; /* .vftext */
+	  else if (sec == 1)
+	    sec = 7; /* .frodata */
+	  else
+	    sec = 5; /* .far. */
+	  break;
+	}
+      name += 3;
+    }
+
+  prefix = prefixes[sec][DECL_ONE_ONLY(decl)];
+  len    = strlen (name) + strlen (prefix);
+  string = (char *) alloca (len + 1);
+
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+/* Given a decl, a section name, and whether the decl initializer
+   has relocs, choose attributes for the section.  */
+
+#define SECTION_MEP_VLIW	SECTION_MACH_DEP
+
+static unsigned int
+mep_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL
+      && lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+    flags |= SECTION_MEP_VLIW;
+
+  return flags;
+}
+
+/* Switch to an arbitrary section NAME with attributes as specified
+   by FLAGS.  ALIGN specifies any known alignment requirements for
+   the section; 0 if the default should be used.
+
+   Differs from the standard ELF version only in support of VLIW mode.  */
+
+static void
+mep_asm_named_section (const char *name, unsigned int flags, tree decl ATTRIBUTE_UNUSED)
+{
+  char flagchars[8], *f = flagchars;
+  const char *type;
+
+  if (!(flags & SECTION_DEBUG))
+    *f++ = 'a';
+  if (flags & SECTION_WRITE)
+    *f++ = 'w';
+  if (flags & SECTION_CODE)
+    *f++ = 'x';
+  if (flags & SECTION_SMALL)
+    *f++ = 's';
+  if (flags & SECTION_MEP_VLIW)
+    *f++ = 'v';
+  *f = '\0';
+
+  if (flags & SECTION_BSS)
+    type = "nobits";
+  else
+    type = "progbits";
+
+  fprintf (asm_out_file, "\t.section\t%s,\"%s\",@%s\n",
+	   name, flagchars, type);
+
+  if (flags & SECTION_CODE)
+    fputs ((flags & SECTION_MEP_VLIW ? "\t.vliw\n" : "\t.core\n"),
+	   asm_out_file);
+}
+
+void
+mep_output_aligned_common (FILE *stream, tree decl, const char *name,
+			   int size, int align, int global)
+{
+  /* We intentionally don't use mep_section_tag() here.  */
+  if (name[0] == '@'
+      && (name[1] == 'i' || name[1] == 'I' || name[1] == 'c')
+      && name[2] == '.')
+    {
+      int location = -1;
+      tree attr = lookup_attribute ((name[1] == 'c' ? "cb" : "io"),
+				    DECL_ATTRIBUTES (decl));
+      if (attr
+	  && TREE_VALUE (attr)
+	  && TREE_VALUE (TREE_VALUE(attr)))
+	location = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE(attr)));
+      if (location == -1)
+	return;
+      if (global)
+	{
+	  fprintf (stream, "\t.globl\t");
+	  assemble_name (stream, name);
+	  fprintf (stream, "\n");
+	}
+      assemble_name (stream, name);
+      fprintf (stream, " = %d\n", location);
+      return;
+    }
+  if (name[0] == '@' && name[2] == '.')
+    {
+      const char *sec = 0;
+      switch (name[1])
+	{
+	case 'b':
+	  switch_to_section (based_section);
+	  sec = ".based";
+	  break;
+	case 't':
+	  switch_to_section (tinybss_section);
+	  sec = ".sbss";
+	  break;
+	case 'f':
+	  switch_to_section (farbss_section);
+	  sec = ".farbss";
+	  break;
+	}
+      if (sec)
+	{
+	  const char *name2;
+	  int p2align = 0;
+
+	  while (align > BITS_PER_UNIT)
+	    {
+	      align /= 2;
+	      p2align ++;
+	    }
+	  name2 = targetm.strip_name_encoding (name);
+	  if (global)
+	    fprintf (stream, "\t.globl\t%s\n", name2);
+	  fprintf (stream, "\t.p2align %d\n", p2align);
+	  fprintf (stream, "\t.type\t%s,@object\n", name2);
+	  fprintf (stream, "\t.size\t%s,%d\n", name2, size);
+	  fprintf (stream, "%s:\n\t.zero\t%d\n", name2, size);
+	  return;
+	}
+    }
+
+  if (!global)
+    {
+      fprintf (stream, "\t.local\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Trampolines.  */
+
+static void
+mep_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx addr = XEXP (m_tramp, 0);
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__mep_trampoline_helper"),
+		     LCT_NORMAL, VOIDmode, 3,
+		     addr, Pmode,
+		     fnaddr, Pmode,
+		     static_chain, Pmode);
+}
+
+/* Experimental Reorg.  */
+
+static bool
+mep_mentioned_p (rtx in,
+		 rtx reg, /* NULL for mem */
+		 int modes_too) /* if nonzero, modes must match also.  */
+{
+  const char *fmt;
+  int i;
+  enum rtx_code code;
+
+  if (in == 0)
+    return false;
+  if (reg && GET_CODE (reg) != REG)
+    return false;
+
+  if (GET_CODE (in) == LABEL_REF)
+    return (reg == 0);
+
+  code = GET_CODE (in);
+
+  switch (code)
+    {
+    case MEM:
+      if (reg)
+	return mep_mentioned_p (XEXP (in, 0), reg, modes_too);
+      return true;
+
+    case REG:
+      if (!reg)
+	return false;
+      if (modes_too && (GET_MODE (in) != GET_MODE (reg)))
+	return false;
+      return (REGNO (in) == REGNO (reg));
+
+    case SCRATCH:
+    case CC0:
+    case PC:
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return false;
+
+    default:
+      break;
+    }
+
+  /* Set's source should be read-only.  */
+  if (code == SET && !reg)
+    return mep_mentioned_p (SET_DEST (in), reg, modes_too);
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = XVECLEN (in, i) - 1; j >= 0; j--)
+	    if (mep_mentioned_p (XVECEXP (in, i, j), reg, modes_too))
+	      return true;
+	}
+      else if (fmt[i] == 'e'
+	       && mep_mentioned_p (XEXP (in, i), reg, modes_too))
+	return true;
+    }
+  return false;
+}
+
+#define EXPERIMENTAL_REGMOVE_REORG 1
+
+#if EXPERIMENTAL_REGMOVE_REORG
+
+static int
+mep_compatible_reg_class (int r1, int r2)
+{
+  if (GR_REGNO_P (r1) && GR_REGNO_P (r2))
+    return 1;
+  if (CR_REGNO_P (r1) && CR_REGNO_P (r2))
+    return 1;
+  return 0;
+}
+
+static void
+mep_reorg_regmove (rtx insns)
+{
+  rtx insn, next, pat, follow, *where;
+  int count = 0, done = 0, replace, before = 0;
+
+  if (dump_file)
+    for (insn = insns; insn; insn = NEXT_INSN (insn))
+      if (NONJUMP_INSN_P (insn))
+	before++;
+
+  /* We're looking for (set r2 r1) moves where r1 dies, followed by a
+     set that uses the r2 and r2 dies there.  We replace r2 with r1
+     and see if it's still a valid insn.  If so, delete the first set.
+     Copied from reorg.c.  */
+
+  while (!done)
+    {
+      done = 1;
+      for (insn = insns; insn; insn = next)
+	{
+	  next = next_nonnote_nondebug_insn (insn);
+	  if (! NONJUMP_INSN_P (insn))
+	    continue;
+	  pat = PATTERN (insn);
+
+	  replace = 0;
+
+	  if (GET_CODE (pat) == SET
+	      && GET_CODE (SET_SRC (pat)) == REG
+	      && GET_CODE (SET_DEST (pat)) == REG
+	      && find_regno_note (insn, REG_DEAD, REGNO (SET_SRC (pat)))
+	      && mep_compatible_reg_class (REGNO (SET_SRC (pat)), REGNO (SET_DEST (pat))))
+	    {
+	      follow = next_nonnote_nondebug_insn (insn);
+	      if (dump_file)
+		fprintf (dump_file, "superfluous moves: considering %d\n", INSN_UID (insn));
+
+	      while (follow && NONJUMP_INSN_P (follow)
+		     && GET_CODE (PATTERN (follow)) == SET
+		     && !dead_or_set_p (follow, SET_SRC (pat))
+		     && !mep_mentioned_p (PATTERN (follow), SET_SRC (pat), 0)
+		     && !mep_mentioned_p (PATTERN (follow), SET_DEST (pat), 0))
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "\tskipping %d\n", INSN_UID (follow));
+		  follow = next_nonnote_insn (follow);
+		}
+
+	      if (dump_file)
+		fprintf (dump_file, "\tfollow is %d\n", INSN_UID (follow));
+	      if (follow && NONJUMP_INSN_P (follow)
+		  && GET_CODE (PATTERN (follow)) == SET
+		  && find_regno_note (follow, REG_DEAD, REGNO (SET_DEST (pat))))
+		{
+		  if (GET_CODE (SET_DEST (PATTERN (follow))) == REG)
+		    {
+		      if (mep_mentioned_p (SET_SRC (PATTERN (follow)), SET_DEST (pat), 1))
+			{
+			  replace = 1;
+			  where = & SET_SRC (PATTERN (follow));
+			}
+		    }
+		  else if (GET_CODE (SET_DEST (PATTERN (follow))) == MEM)
+		    {
+		      if (mep_mentioned_p (PATTERN (follow), SET_DEST (pat), 1))
+			{
+			  replace = 1;
+			  where = & PATTERN (follow);
+			}
+		    }
+		}
+	    }
+
+	  /* If so, follow is the corresponding insn */
+	  if (replace)
+	    {
+	      if (dump_file)
+		{
+		  rtx x;
+
+		  fprintf (dump_file, "----- Candidate for superfluous move deletion:\n\n");
+		  for (x = insn; x ;x = NEXT_INSN (x))
+		    {
+		      print_rtl_single (dump_file, x);
+		      if (x == follow)
+			break;
+		      fprintf (dump_file, "\n");
+		    }
+		}
+
+	      if (validate_replace_rtx_subexp (SET_DEST (pat), SET_SRC (pat),
+					       follow, where))
+		{
+		  count ++;
+		  delete_insn (insn);
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "\n----- Success!  new insn:\n\n");
+		      print_rtl_single (dump_file, follow);
+		    }
+		  done = 0;
+		}
+	    }
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n%d insn%s deleted out of %d.\n\n", count, count == 1 ? "" : "s", before);
+      fprintf (dump_file, "=====\n");
+    }
+}
+#endif
+
+
+/* Figure out where to put LABEL, which is the label for a repeat loop.
+   If INCLUDING, LAST_INSN is the last instruction in the loop, otherwise
+   the loop ends just before LAST_INSN.  If SHARED, insns other than the
+   "repeat" might use LABEL to jump to the loop's continuation point.
+
+   Return the last instruction in the adjusted loop.  */
+
+static rtx
+mep_insert_repeat_label_last (rtx last_insn, rtx label, bool including,
+			      bool shared)
+{
+  rtx next, prev;
+  int count = 0, code, icode;
+
+  if (dump_file)
+    fprintf (dump_file, "considering end of repeat loop at insn %d\n",
+	     INSN_UID (last_insn));
+
+  /* Set PREV to the last insn in the loop.  */
+  prev = last_insn;
+  if (!including)
+    prev = PREV_INSN (prev);
+
+  /* Set NEXT to the next insn after the repeat label.  */
+  next = last_insn;
+  if (!shared)
+    while (prev != 0)
+      {
+	code = GET_CODE (prev);
+	if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
+	  break;
+
+	if (INSN_P (prev))
+	  {
+	    if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+	      prev = XVECEXP (PATTERN (prev), 0, 1);
+
+	    /* Other insns that should not be in the last two opcodes.  */
+	    icode = recog_memoized (prev);
+	    if (icode < 0
+		|| icode == CODE_FOR_repeat
+		|| icode == CODE_FOR_erepeat
+		|| get_attr_may_trap (prev) == MAY_TRAP_YES)
+	      break;
+
+	    /* That leaves JUMP_INSN and INSN.  It will have BImode if it
+	       is the second instruction in a VLIW bundle.  In that case,
+	       loop again: if the first instruction also satisfies the
+	       conditions above then we will reach here again and put
+	       both of them into the repeat epilogue.  Otherwise both
+	       should remain outside.  */
+	    if (GET_MODE (prev) != BImode)
+	      {
+		count++;
+		next = prev;
+		if (dump_file)
+		  print_rtl_single (dump_file, next);
+		if (count == 2)
+		  break;
+	      }
+	  }
+	prev = PREV_INSN (prev);
+      }
+
+  /* See if we're adding the label immediately after the repeat insn.
+     If so, we need to separate them with a nop.  */
+  prev = prev_real_insn (next);
+  if (prev)
+    switch (recog_memoized (prev))
+      {
+      case CODE_FOR_repeat:
+      case CODE_FOR_erepeat:
+	if (dump_file)
+	  fprintf (dump_file, "Adding nop inside loop\n");
+	emit_insn_before (gen_nop (), next);
+	break;
+
+      default:
+	break;
+      }
+
+  /* Insert the label.  */
+  emit_label_before (label, next);
+
+  /* Insert the nops.  */
+  if (dump_file && count < 2)
+    fprintf (dump_file, "Adding %d nop%s\n\n",
+	     2 - count, count == 1 ? "" : "s");
+
+  for (; count < 2; count++)
+    if (including)
+      last_insn = emit_insn_after (gen_nop (), last_insn);
+    else
+      emit_insn_before (gen_nop (), last_insn);
+
+  return last_insn;
+}
+
+
+void
+mep_emit_doloop (rtx *operands, int is_end)
+{
+  rtx tag;
+
+  if (cfun->machine->doloop_tags == 0
+      || cfun->machine->doloop_tag_from_end == is_end)
+    {
+      cfun->machine->doloop_tags++;
+      cfun->machine->doloop_tag_from_end = is_end;
+    }
+
+  tag = GEN_INT (cfun->machine->doloop_tags - 1);
+  if (is_end)
+    emit_jump_insn (gen_doloop_end_internal (operands[0], operands[1], tag));
+  else
+    emit_insn (gen_doloop_begin_internal (operands[0], operands[0], tag));
+}
+
+
+/* Code for converting doloop_begins and doloop_ends into valid
+   MeP instructions.  A doloop_begin is just a placeholder:
+
+	$count = unspec ($count)
+
+   where $count is initially the number of iterations - 1.
+   doloop_end has the form:
+
+	if ($count-- == 0) goto label
+
+   The counter variable is private to the doloop insns, nothing else
+   relies on its value.
+
+   There are three cases, in decreasing order of preference:
+
+      1. A loop has exactly one doloop_begin and one doloop_end.
+	 The doloop_end branches to the first instruction after
+	 the doloop_begin.
+
+	 In this case we can replace the doloop_begin with a repeat
+	 instruction and remove the doloop_end.  I.e.:
+
+		$count1 = unspec ($count1)
+	    label:
+		...
+		insn1
+		insn2
+		if ($count2-- == 0) goto label
+
+	  becomes:
+
+		repeat $count1,repeat_label
+	    label:
+		...
+	    repeat_label:
+		insn1
+		insn2
+		# end repeat
+
+      2. As for (1), except there are several doloop_ends.  One of them
+	 (call it X) falls through to a label L.  All the others fall
+	 through to branches to L.
+
+	 In this case, we remove X and replace the other doloop_ends
+	 with branches to the repeat label.  For example:
+
+		$count1 = unspec ($count1)
+	    start:
+		...
+		if ($count2-- == 0) goto label
+	    end:
+		...
+		if ($count3-- == 0) goto label
+		goto end
+
+	 becomes:
+
+		repeat $count1,repeat_label
+	    start:
+		...
+	    repeat_label:
+		nop
+		nop
+		# end repeat
+	    end:
+		...
+		goto repeat_label
+
+      3. The fallback case.  Replace doloop_begins with:
+
+		$count = $count + 1
+
+	 Replace doloop_ends with the equivalent of:
+
+		$count = $count - 1
+		if ($count == 0) goto label
+
+	 Note that this might need a scratch register if $count
+	 is stored in memory.  */
+
+/* A structure describing one doloop_begin.  */
+struct mep_doloop_begin {
+  /* The next doloop_begin with the same tag.  */
+  struct mep_doloop_begin *next;
+
+  /* The instruction itself.  */
+  rtx insn;
+
+  /* The initial counter value.  This is known to be a general register.  */
+  rtx counter;
+};
+
+/* A structure describing a doloop_end.  */
+struct mep_doloop_end {
+  /* The next doloop_end with the same loop tag.  */
+  struct mep_doloop_end *next;
+
+  /* The instruction itself.  */
+  rtx insn;
+
+  /* The first instruction after INSN when the branch isn't taken.  */
+  rtx fallthrough;
+
+  /* The location of the counter value.  Since doloop_end_internal is a
+     jump instruction, it has to allow the counter to be stored anywhere
+     (any non-fixed register or memory location).  */
+  rtx counter;
+
+  /* The target label (the place where the insn branches when the counter
+     isn't zero).  */
+  rtx label;
+
+  /* A scratch register.  Only available when COUNTER isn't stored
+     in a general register.  */
+  rtx scratch;
+};
+
+
+/* One do-while loop.  */
+struct mep_doloop {
+  /* All the doloop_begins for this loop (in no particular order).  */
+  struct mep_doloop_begin *begin;
+
+  /* All the doloop_ends.  When there is more than one, arrange things
+     so that the first one is the most likely to be X in case (2) above.  */
+  struct mep_doloop_end *end;
+};
+
+
+/* Return true if LOOP can be converted into repeat/repeat_end form
+   (that is, if it matches cases (1) or (2) above).  */
+
+static bool
+mep_repeat_loop_p (struct mep_doloop *loop)
+{
+  struct mep_doloop_end *end;
+  rtx fallthrough;
+
+  /* There must be exactly one doloop_begin and at least one doloop_end.  */
+  if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
+    return false;
+
+  /* The first doloop_end (X) must branch back to the insn after
+     the doloop_begin.  */
+  if (prev_real_insn (loop->end->label) != loop->begin->insn)
+    return false;
+
+  /* All the other doloop_ends must branch to the same place as X.
+     When the branch isn't taken, they must jump to the instruction
+     after X.  */
+  fallthrough = loop->end->fallthrough;
+  for (end = loop->end->next; end != 0; end = end->next)
+    if (end->label != loop->end->label
+	|| !simplejump_p (end->fallthrough)
+	|| next_real_insn (JUMP_LABEL (end->fallthrough)) != fallthrough)
+      return false;
+
+  return true;
+}
+
+
+/* The main repeat reorg function.  See comment above for details.  */
+
+static void
+mep_reorg_repeat (rtx insns)
+{
+  rtx insn;
+  struct mep_doloop *loops, *loop;
+  struct mep_doloop_begin *begin;
+  struct mep_doloop_end *end;
+
+  /* Quick exit if we haven't created any loops.  */
+  if (cfun->machine->doloop_tags == 0)
+    return;
+
+  /* Create an array of mep_doloop structures.  */
+  loops = (struct mep_doloop *) alloca (sizeof (loops[0]) * cfun->machine->doloop_tags);
+  memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
+
+  /* Search the function for do-while insns and group them by loop tag.  */
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      switch (recog_memoized (insn))
+	{
+	case CODE_FOR_doloop_begin_internal:
+	  insn_extract (insn);
+	  loop = &loops[INTVAL (recog_data.operand[2])];
+
+	  begin = (struct mep_doloop_begin *) alloca (sizeof (struct mep_doloop_begin));
+	  begin->next = loop->begin;
+	  begin->insn = insn;
+	  begin->counter = recog_data.operand[0];
+
+	  loop->begin = begin;
+	  break;
+
+	case CODE_FOR_doloop_end_internal:
+	  insn_extract (insn);
+	  loop = &loops[INTVAL (recog_data.operand[2])];
+
+	  end = (struct mep_doloop_end *) alloca (sizeof (struct mep_doloop_end));
+	  end->insn = insn;
+	  end->fallthrough = next_real_insn (insn);
+	  end->counter = recog_data.operand[0];
+	  end->label = recog_data.operand[1];
+	  end->scratch = recog_data.operand[3];
+
+	  /* If this insn falls through to an unconditional jump,
+	     give it a lower priority than the others.  */
+	  if (loop->end != 0 && simplejump_p (end->fallthrough))
+	    {
+	      end->next = loop->end->next;
+	      loop->end->next = end;
+	    }
+	  else
+	    {
+	      end->next = loop->end;
+	      loop->end = end;
+	    }
+	  break;
+	}
+
+  /* Convert the insns for each loop in turn.  */
+  for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
+    if (mep_repeat_loop_p (loop))
+      {
+	/* Case (1) or (2).  */
+	rtx repeat_label, label_ref;
+
+	/* Create a new label for the repeat insn.  */
+	repeat_label = gen_label_rtx ();
+
+	/* Replace the doloop_begin with a repeat.  */
+	label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
+	emit_insn_before (gen_repeat (loop->begin->counter, label_ref),
+			  loop->begin->insn);
+	delete_insn (loop->begin->insn);
+
+	/* Insert the repeat label before the first doloop_end.
+	   Fill the gap with nops if there are other doloop_ends.  */
+	mep_insert_repeat_label_last (loop->end->insn, repeat_label,
+				      false, loop->end->next != 0);
+
+	/* Emit a repeat_end (to improve the readability of the output).  */
+	emit_insn_before (gen_repeat_end (), loop->end->insn);
+
+	/* Delete the first doloop_end.  */
+	delete_insn (loop->end->insn);
+
+	/* Replace the others with branches to REPEAT_LABEL.  */
+	for (end = loop->end->next; end != 0; end = end->next)
+	  {
+	    emit_jump_insn_before (gen_jump (repeat_label), end->insn);
+	    delete_insn (end->insn);
+	    delete_insn (end->fallthrough);
+	  }
+      }
+    else
+      {
+	/* Case (3).  First replace all the doloop_begins with increment
+	   instructions.  */
+	for (begin = loop->begin; begin != 0; begin = begin->next)
+	  {
+	    emit_insn_before (gen_add3_insn (copy_rtx (begin->counter),
+					     begin->counter, const1_rtx),
+			      begin->insn);
+	    delete_insn (begin->insn);
+	  }
+
+	/* Replace all the doloop_ends with decrement-and-branch sequences.  */
+	for (end = loop->end; end != 0; end = end->next)
+	  {
+	    rtx reg;
+
+	    start_sequence ();
+
+	    /* Load the counter value into a general register.  */
+	    reg = end->counter;
+	    if (!REG_P (reg) || REGNO (reg) > 15)
+	      {
+		reg = end->scratch;
+		emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
+	      }
+
+	    /* Decrement the counter.  */
+	    emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
+				      constm1_rtx));
+
+	    /* Copy it back to its original location.  */
+	    if (reg != end->counter)
+	      emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
+
+	    /* Jump back to the start label.  */
+	    insn = emit_jump_insn (gen_mep_bne_true (reg, const0_rtx,
+						     end->label));
+	    JUMP_LABEL (insn) = end->label;
+	    LABEL_NUSES (end->label)++;
+
+	    /* Emit the whole sequence before the doloop_end.  */
+	    insn = get_insns ();
+	    end_sequence ();
+	    emit_insn_before (insn, end->insn);
+
+	    /* Delete the doloop_end.  */
+	    delete_insn (end->insn);
+	  }
+      }
+}
+
+
+static bool
+mep_invertable_branch_p (rtx insn)
+{
+  rtx cond, set;
+  enum rtx_code old_code;
+  int i;
+
+  set = PATTERN (insn);
+  if (GET_CODE (set) != SET)
+    return false;
+  if (GET_CODE (XEXP (set, 1)) != IF_THEN_ELSE)
+    return false;
+  cond = XEXP (XEXP (set, 1), 0);
+  old_code = GET_CODE (cond);
+  switch (old_code)
+    {
+    case EQ:
+      PUT_CODE (cond, NE);
+      break;
+    case NE:
+      PUT_CODE (cond, EQ);
+      break;
+    case LT:
+      PUT_CODE (cond, GE);
+      break;
+    case GE:
+      PUT_CODE (cond, LT);
+      break;
+    default:
+      return false;
+    }
+  INSN_CODE (insn) = -1;
+  i = recog_memoized (insn);
+  PUT_CODE (cond, old_code);
+  INSN_CODE (insn) = -1;
+  return i >= 0;
+}
+
+static void
+mep_invert_branch (rtx insn, rtx after)
+{
+  rtx cond, set, label;
+  int i;
+
+  set = PATTERN (insn);
+
+  gcc_assert (GET_CODE (set) == SET);
+  gcc_assert (GET_CODE (XEXP (set, 1)) == IF_THEN_ELSE);
+
+  cond = XEXP (XEXP (set, 1), 0);
+  switch (GET_CODE (cond))
+    {
+    case EQ:
+      PUT_CODE (cond, NE);
+      break;
+    case NE:
+      PUT_CODE (cond, EQ);
+      break;
+    case LT:
+      PUT_CODE (cond, GE);
+      break;
+    case GE:
+      PUT_CODE (cond, LT);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  label = gen_label_rtx ();
+  emit_label_after (label, after);
+  for (i=1; i<=2; i++)
+    if (GET_CODE (XEXP (XEXP (set, 1), i)) == LABEL_REF)
+      {
+	rtx ref = XEXP (XEXP (set, 1), i);
+	if (LABEL_NUSES (XEXP (ref, 0)) == 1)
+	  delete_insn (XEXP (ref, 0));
+	XEXP (ref, 0) = label;
+	LABEL_NUSES (label) ++;
+	JUMP_LABEL (insn) = label;
+      }
+  INSN_CODE (insn) = -1;
+  i = recog_memoized (insn);
+  gcc_assert (i >= 0);
+}
+
+static void
+mep_reorg_erepeat (rtx insns)
+{
+  rtx insn, prev, l, x;
+  int count;
+
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (JUMP_P (insn)
+	&& mep_invertable_branch_p (insn))
+      {
+	if (dump_file)
+	  {
+	    fprintf (dump_file, "\n------------------------------\n");
+	    fprintf (dump_file, "erepeat: considering this jump:\n");
+	    print_rtl_single (dump_file, insn);
+	  }
+	count = simplejump_p (insn) ? 0 : 1;
+	for (prev = PREV_INSN (insn); prev; prev = PREV_INSN (prev))
+	  {
+	    if (CALL_P (prev) || BARRIER_P (prev))
+	      break;
+
+	    if (prev == JUMP_LABEL (insn))
+	      {
+		rtx newlast;
+		if (dump_file)
+		  fprintf (dump_file, "found loop top, %d insns\n", count);
+
+		if (LABEL_NUSES (prev) == 1)
+		  /* We're the only user, always safe */ ;
+		else if (LABEL_NUSES (prev) == 2)
+		  {
+		    /* See if there's a barrier before this label.  If
+		       so, we know nobody inside the loop uses it.
+		       But we must be careful to put the erepeat
+		       *after* the label.  */
+		    rtx barrier;
+		    for (barrier = PREV_INSN (prev);
+			 barrier && NOTE_P (barrier);
+			 barrier = PREV_INSN (barrier))
+		      ;
+		    if (barrier && ! BARRIER_P (barrier))
+		      break;
+		  }
+		else
+		  {
+		    /* We don't know who else, within or without our loop, uses this */
+		    if (dump_file)
+		      fprintf (dump_file, "... but there are multiple users, too risky.\n");
+		    break;
+		  }
+
+		/* Generate a label to be used by the erepat insn.  */
+		l = gen_label_rtx ();
+
+		/* Insert the erepeat after INSN's target label.  */
+		x = gen_erepeat (gen_rtx_LABEL_REF (VOIDmode, l));
+		LABEL_NUSES (l)++;
+		emit_insn_after (x, prev);
+
+		/* Insert the erepeat label.  */
+		newlast = (mep_insert_repeat_label_last
+			   (insn, l, !simplejump_p (insn), false));
+		if (simplejump_p (insn))
+		  {
+		    emit_insn_before (gen_erepeat_end (), insn);
+		    delete_insn (insn);
+		  }
+		else
+		  {
+		    mep_invert_branch (insn, newlast);
+		    emit_insn_after (gen_erepeat_end (), newlast);
+		  }
+		break;
+	      }
+
+	    if (LABEL_P (prev))
+	      {
+		/* A label is OK if there is exactly one user, and we
+		   can find that user before the next label.  */
+		rtx user = 0;
+		int safe = 0;
+		if (LABEL_NUSES (prev) == 1)
+		  {
+		    for (user = PREV_INSN (prev);
+			 user && (INSN_P (user) || NOTE_P (user));
+			 user = PREV_INSN (user))
+		      if (JUMP_P (user) && JUMP_LABEL (user) == prev)
+			{
+			  safe = INSN_UID (user);
+			  break;
+			}
+		  }
+		if (!safe)
+		  break;
+		if (dump_file)
+		  fprintf (dump_file, "... ignoring jump from insn %d to %d\n",
+			   safe, INSN_UID (prev));
+	      }
+
+	    if (INSN_P (prev))
+	      {
+		count ++;
+	      }
+	  }
+      }
+  if (dump_file)
+    fprintf (dump_file, "\n==============================\n");
+}
+  
+/* Replace a jump to a return, with a copy of the return.  GCC doesn't
+   always do this on its own.  */
+
+static void
+mep_jmp_return_reorg (rtx insns)
+{
+  rtx insn, label, ret;
+  int ret_code;
+
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (simplejump_p (insn))
+    {
+      /* Find the fist real insn the jump jumps to.  */
+      label = ret = JUMP_LABEL (insn);
+      while (ret
+	     && (NOTE_P (ret)
+		 || LABEL_P (ret)
+		 || GET_CODE (PATTERN (ret)) == USE))
+	ret = NEXT_INSN (ret);
+
+      if (ret)
+	{
+	  /* Is it a return?  */
+	  ret_code = recog_memoized (ret);
+	  if (ret_code == CODE_FOR_return_internal
+	      || ret_code == CODE_FOR_eh_return_internal)
+	    {
+	      /* It is.  Replace the jump with a return.  */
+	      LABEL_NUSES (label) --;
+	      if (LABEL_NUSES (label) == 0)
+		delete_insn (label);
+	      PATTERN (insn) = copy_rtx (PATTERN (ret));
+	      INSN_CODE (insn) = -1;
+	    }
+	}
+    }
+}
+
+
+static void
+mep_reorg_addcombine (rtx insns)
+{
+  rtx i, n;
+
+  for (i = insns; i; i = NEXT_INSN (i))
+    if (INSN_P (i)
+	&& INSN_CODE (i) == CODE_FOR_addsi3
+	&& GET_CODE (SET_DEST (PATTERN (i))) == REG
+	&& GET_CODE (XEXP (SET_SRC (PATTERN (i)), 0)) == REG
+	&& REGNO (SET_DEST (PATTERN (i))) == REGNO (XEXP (SET_SRC (PATTERN (i)), 0))
+	&& GET_CODE (XEXP (SET_SRC (PATTERN (i)), 1)) == CONST_INT)
+      {
+	n = NEXT_INSN (i);
+	if (INSN_P (n)
+	    && INSN_CODE (n) == CODE_FOR_addsi3
+	    && GET_CODE (SET_DEST (PATTERN (n))) == REG
+	    && GET_CODE (XEXP (SET_SRC (PATTERN (n)), 0)) == REG
+	    && REGNO (SET_DEST (PATTERN (n))) == REGNO (XEXP (SET_SRC (PATTERN (n)), 0))
+	    && GET_CODE (XEXP (SET_SRC (PATTERN (n)), 1)) == CONST_INT)
+	  {
+	    int ic = INTVAL (XEXP (SET_SRC (PATTERN (i)), 1));
+	    int nc = INTVAL (XEXP (SET_SRC (PATTERN (n)), 1));
+	    if (REGNO (SET_DEST (PATTERN (i))) == REGNO (SET_DEST (PATTERN (n)))
+		&& ic + nc < 32767
+		&& ic + nc > -32768)
+	      {
+		XEXP (SET_SRC (PATTERN (i)), 1) = GEN_INT (ic + nc);
+		NEXT_INSN (i) = NEXT_INSN (n);
+		if (NEXT_INSN (i))
+		  PREV_INSN (NEXT_INSN (i)) = i;
+	      }
+	  }
+      }
+}
+
+/* If this insn adjusts the stack, return the adjustment, else return
+   zero.  */
+static int
+add_sp_insn_p (rtx insn)
+{
+  rtx pat;
+
+  if (! single_set (insn))
+    return 0;
+  pat = PATTERN (insn);
+  if (GET_CODE (SET_DEST (pat)) != REG)
+    return 0;
+  if (REGNO (SET_DEST (pat)) != SP_REGNO)
+    return 0;
+  if (GET_CODE (SET_SRC (pat)) != PLUS)
+    return 0;
+  if (GET_CODE (XEXP (SET_SRC (pat), 0)) != REG)
+    return 0;
+  if (REGNO (XEXP (SET_SRC (pat), 0)) != SP_REGNO)
+    return 0;
+  if (GET_CODE (XEXP (SET_SRC (pat), 1)) != CONST_INT)
+    return 0;
+  return INTVAL (XEXP (SET_SRC (pat), 1));
+}
+
+/* Check for trivial functions that set up an unneeded stack
+   frame.  */
+static void
+mep_reorg_noframe (rtx insns)
+{
+  rtx start_frame_insn;
+  rtx end_frame_insn = 0;
+  int sp_adjust, sp2;
+  rtx sp;
+
+  /* The first insn should be $sp = $sp + N */
+  while (insns && ! INSN_P (insns))
+    insns = NEXT_INSN (insns);
+  if (!insns)
+    return;
+
+  sp_adjust = add_sp_insn_p (insns);
+  if (sp_adjust == 0)
+    return;
+
+  start_frame_insn = insns;
+  sp = SET_DEST (PATTERN (start_frame_insn));
+
+  insns = next_real_insn (insns);
+
+  while (insns)
+    {
+      rtx next = next_real_insn (insns);
+      if (!next)
+	break;
+
+      sp2 = add_sp_insn_p (insns);
+      if (sp2)
+	{
+	  if (end_frame_insn)
+	    return;
+	  end_frame_insn = insns;
+	  if (sp2 != -sp_adjust)
+	    return;
+	}
+      else if (mep_mentioned_p (insns, sp, 0))
+	return;
+      else if (CALL_P (insns))
+	return;
+
+      insns = next;
+    }
+
+  if (end_frame_insn)
+    {
+      delete_insn (start_frame_insn);
+      delete_insn (end_frame_insn);
+    }
+}
+
+static void
+mep_reorg (void)
+{
+  rtx insns = get_insns ();
+
+  /* We require accurate REG_DEAD notes.  */
+  compute_bb_for_insn ();
+  df_note_add_problem ();
+  df_analyze ();
+
+  mep_reorg_addcombine (insns);
+#if EXPERIMENTAL_REGMOVE_REORG
+  /* VLIW packing has been done already, so we can't just delete things.  */
+  if (!mep_vliw_function_p (cfun->decl))
+    mep_reorg_regmove (insns);
+#endif
+  mep_jmp_return_reorg (insns);
+  mep_bundle_insns (insns);
+  mep_reorg_repeat (insns);
+  if (optimize
+      && !profile_flag
+      && !profile_arc_flag
+      && TARGET_OPT_REPEAT
+      && (!mep_interrupt_p () || mep_interrupt_saved_reg (RPB_REGNO)))
+    mep_reorg_erepeat (insns);
+
+  /* This may delete *insns so make sure it's last.  */
+  mep_reorg_noframe (insns);
+
+  df_finish_pass (false);
+}
+
+
+
+/*----------------------------------------------------------------------*/
+/* Builtins								*/
+/*----------------------------------------------------------------------*/
+
+/* Element X gives the index into cgen_insns[] of the most general
+   implementation of intrinsic X.  Unimplemented intrinsics are
+   mapped to -1.  */
+int mep_intrinsic_insn[ARRAY_SIZE (cgen_intrinsics)];
+
+/* Element X gives the index of another instruction that is mapped to
+   the same intrinsic as cgen_insns[X].  It is -1 when there is no other
+   instruction.
+
+   Things are set up so that mep_intrinsic_chain[X] < X.  */
+static int mep_intrinsic_chain[ARRAY_SIZE (cgen_insns)];
+
+/* The bitmask for the current ISA.  The ISA masks are declared
+   in mep-intrin.h.  */
+unsigned int mep_selected_isa;
+
+struct mep_config {
+  const char *config_name;
+  unsigned int isa;
+};
+
+static struct mep_config mep_configs[] = {
+#ifdef COPROC_SELECTION_TABLE
+  COPROC_SELECTION_TABLE,
+#endif
+  { 0, 0 }
+};
+
+/* Initialize the global intrinsics variables above.  */
+
+static void
+mep_init_intrinsics (void)
+{
+  size_t i;
+
+  /* Set MEP_SELECTED_ISA to the ISA flag for this configuration.  */
+  mep_selected_isa = mep_configs[0].isa;
+  if (mep_config_string != 0)
+    for (i = 0; mep_configs[i].config_name; i++)
+      if (strcmp (mep_config_string, mep_configs[i].config_name) == 0)
+	{
+	  mep_selected_isa = mep_configs[i].isa;
+	  break;
+	}
+
+  /* Assume all intrinsics are unavailable.  */
+  for (i = 0; i < ARRAY_SIZE (mep_intrinsic_insn); i++)
+    mep_intrinsic_insn[i] = -1;
+
+  /* Build up the global intrinsic tables.  */
+  for (i = 0; i < ARRAY_SIZE (cgen_insns); i++)
+    if ((cgen_insns[i].isas & mep_selected_isa) != 0)
+      {
+	mep_intrinsic_chain[i] = mep_intrinsic_insn[cgen_insns[i].intrinsic];
+	mep_intrinsic_insn[cgen_insns[i].intrinsic] = i;
+      }
+  /* See whether we can directly move values between one coprocessor
+     register and another.  */
+  for (i = 0; i < ARRAY_SIZE (mep_cmov_insns); i++)
+    if (MEP_INTRINSIC_AVAILABLE_P (mep_cmov_insns[i]))
+      mep_have_copro_copro_moves_p = true;
+
+  /* See whether we can directly move values between core and
+     coprocessor registers.  */
+  mep_have_core_copro_moves_p = (MEP_INTRINSIC_AVAILABLE_P (mep_cmov1)
+                                 && MEP_INTRINSIC_AVAILABLE_P (mep_cmov2));
+
+  mep_have_core_copro_moves_p = 1;
+}
+
+/* Declare all available intrinsic functions.  Called once only.  */
+
+static tree cp_data_bus_int_type_node;
+static tree opaque_vector_type_node;
+static tree v8qi_type_node;
+static tree v4hi_type_node;
+static tree v2si_type_node;
+static tree v8uqi_type_node;
+static tree v4uhi_type_node;
+static tree v2usi_type_node;
+
+static tree
+mep_cgen_regnum_to_type (enum cgen_regnum_operand_type cr)
+{
+  switch (cr)
+    {
+    case cgen_regnum_operand_type_POINTER:	return ptr_type_node;
+    case cgen_regnum_operand_type_LONG:		return long_integer_type_node;
+    case cgen_regnum_operand_type_ULONG:	return long_unsigned_type_node;
+    case cgen_regnum_operand_type_SHORT:	return short_integer_type_node;
+    case cgen_regnum_operand_type_USHORT:	return short_unsigned_type_node;
+    case cgen_regnum_operand_type_CHAR:		return char_type_node;
+    case cgen_regnum_operand_type_UCHAR:	return unsigned_char_type_node;
+    case cgen_regnum_operand_type_SI:		return intSI_type_node;
+    case cgen_regnum_operand_type_DI:		return intDI_type_node;
+    case cgen_regnum_operand_type_VECTOR:	return opaque_vector_type_node;
+    case cgen_regnum_operand_type_V8QI:		return v8qi_type_node;
+    case cgen_regnum_operand_type_V4HI:		return v4hi_type_node;
+    case cgen_regnum_operand_type_V2SI:		return v2si_type_node;
+    case cgen_regnum_operand_type_V8UQI:	return v8uqi_type_node;
+    case cgen_regnum_operand_type_V4UHI:	return v4uhi_type_node;
+    case cgen_regnum_operand_type_V2USI:	return v2usi_type_node;
+    case cgen_regnum_operand_type_CP_DATA_BUS_INT: return cp_data_bus_int_type_node;
+    default:
+      return void_type_node;
+    }
+}
+
+static void
+mep_init_builtins (void)
+{
+  size_t i;
+
+  if (TARGET_64BIT_CR_REGS)
+    cp_data_bus_int_type_node = long_long_integer_type_node;
+  else
+    cp_data_bus_int_type_node = long_integer_type_node;
+
+  opaque_vector_type_node = build_opaque_vector_type (intQI_type_node, 8);
+  v8qi_type_node = build_vector_type (intQI_type_node, 8);
+  v4hi_type_node = build_vector_type (intHI_type_node, 4);
+  v2si_type_node = build_vector_type (intSI_type_node, 2);
+  v8uqi_type_node = build_vector_type (unsigned_intQI_type_node, 8);
+  v4uhi_type_node = build_vector_type (unsigned_intHI_type_node, 4);
+  v2usi_type_node = build_vector_type (unsigned_intSI_type_node, 2);
+
+  add_builtin_type ("cp_data_bus_int", cp_data_bus_int_type_node);
+
+  add_builtin_type ("cp_vector", opaque_vector_type_node);
+
+  add_builtin_type ("cp_v8qi", v8qi_type_node);
+  add_builtin_type ("cp_v4hi", v4hi_type_node);
+  add_builtin_type ("cp_v2si", v2si_type_node);
+
+  add_builtin_type ("cp_v8uqi", v8uqi_type_node);
+  add_builtin_type ("cp_v4uhi", v4uhi_type_node);
+  add_builtin_type ("cp_v2usi", v2usi_type_node);
+
+  /* Intrinsics like mep_cadd3 are implemented with two groups of
+     instructions, one which uses UNSPECs and one which uses a specific
+     rtl code such as PLUS.  Instructions in the latter group belong
+     to GROUP_KNOWN_CODE.
+
+     In such cases, the intrinsic will have two entries in the global
+     tables above.  The unspec form is accessed using builtin functions
+     while the specific form is accessed using the mep_* enum in
+     mep-intrin.h.
+
+     The idea is that __cop arithmetic and builtin functions have
+     different optimization requirements.  If mep_cadd3() appears in
+     the source code, the user will surely except gcc to use cadd3
+     rather than a work-alike such as add3.  However, if the user
+     just writes "a + b", where a or b are __cop variables, it is
+     reasonable for gcc to choose a core instruction rather than
+     cadd3 if it believes that is more optimal.  */
+  for (i = 0; i < ARRAY_SIZE (cgen_insns); i++)
+    if ((cgen_insns[i].groups & GROUP_KNOWN_CODE) == 0
+	&& mep_intrinsic_insn[cgen_insns[i].intrinsic] >= 0)
+      {
+	tree ret_type = void_type_node;
+	tree bi_type;
+
+	if (i > 0 && cgen_insns[i].intrinsic == cgen_insns[i-1].intrinsic)
+	  continue;
+
+	if (cgen_insns[i].cret_p)
+	  ret_type = mep_cgen_regnum_to_type (cgen_insns[i].regnums[0].type);
+
+	bi_type = build_function_type_list (ret_type, NULL_TREE);
+	add_builtin_function (cgen_intrinsics[cgen_insns[i].intrinsic],
+			      bi_type,
+			      cgen_insns[i].intrinsic, BUILT_IN_MD, NULL, NULL);
+      }
+}
+
+/* Report the unavailablity of the given intrinsic.  */
+
+#if 1
+static void
+mep_intrinsic_unavailable (int intrinsic)
+{
+  static int already_reported_p[ARRAY_SIZE (cgen_intrinsics)];
+
+  if (already_reported_p[intrinsic])
+    return;
+
+  if (mep_intrinsic_insn[intrinsic] < 0)
+    error ("coprocessor intrinsic %qs is not available in this configuration",
+	   cgen_intrinsics[intrinsic]);
+  else if (CGEN_CURRENT_GROUP == GROUP_VLIW)
+    error ("%qs is not available in VLIW functions",
+	   cgen_intrinsics[intrinsic]);
+  else
+    error ("%qs is not available in non-VLIW functions",
+	   cgen_intrinsics[intrinsic]);
+
+  already_reported_p[intrinsic] = 1;
+}
+#endif
+
+
+/* See if any implementation of INTRINSIC is available to the
+   current function.  If so, store the most general implementation
+   in *INSN_PTR and return true.  Return false otherwise.  */
+
+static bool
+mep_get_intrinsic_insn (int intrinsic ATTRIBUTE_UNUSED, const struct cgen_insn **insn_ptr ATTRIBUTE_UNUSED)
+{
+  int i;
+
+  i = mep_intrinsic_insn[intrinsic];
+  while (i >= 0 && !CGEN_ENABLE_INSN_P (i))
+    i = mep_intrinsic_chain[i];
+
+  if (i >= 0)
+    {
+      *insn_ptr = &cgen_insns[i];
+      return true;
+    }
+  return false;
+}
+
+
+/* Like mep_get_intrinsic_insn, but with extra handling for moves.
+   If INTRINSIC is mep_cmov, but there is no pure CR <- CR move insn,
+   try using a work-alike instead.  In this case, the returned insn
+   may have three operands rather than two.  */
+
+static bool
+mep_get_move_insn (int intrinsic, const struct cgen_insn **cgen_insn)
+{
+  size_t i;
+
+  if (intrinsic == mep_cmov)
+    {
+      for (i = 0; i < ARRAY_SIZE (mep_cmov_insns); i++)
+	if (mep_get_intrinsic_insn (mep_cmov_insns[i], cgen_insn))
+	  return true;
+      return false;
+    }
+  return mep_get_intrinsic_insn (intrinsic, cgen_insn);
+}
+
+
+/* If ARG is a register operand that is the same size as MODE, convert it
+   to MODE using a subreg.  Otherwise return ARG as-is.  */
+
+static rtx
+mep_convert_arg (enum machine_mode mode, rtx arg)
+{
+  if (GET_MODE (arg) != mode
+      && register_operand (arg, VOIDmode)
+      && GET_MODE_SIZE (GET_MODE (arg)) == GET_MODE_SIZE (mode))
+    return simplify_gen_subreg (mode, arg, GET_MODE (arg), 0);
+  return arg;
+}
+
+
+/* Apply regnum conversions to ARG using the description given by REGNUM.
+   Return the new argument on success and null on failure.  */
+
+static rtx
+mep_convert_regnum (const struct cgen_regnum_operand *regnum, rtx arg)
+{
+  if (regnum->count == 0)
+    return arg;
+
+  if (GET_CODE (arg) != CONST_INT
+      || INTVAL (arg) < 0
+      || INTVAL (arg) >= regnum->count)
+    return 0;
+
+  return gen_rtx_REG (SImode, INTVAL (arg) + regnum->base);
+}
+
+
+/* Try to make intrinsic argument ARG match the given operand.
+   UNSIGNED_P is true if the argument has an unsigned type.  */
+
+static rtx
+mep_legitimize_arg (const struct insn_operand_data *operand, rtx arg,
+		    int unsigned_p)
+{
+  if (GET_CODE (arg) == CONST_INT)
+    {
+      /* CONST_INTs can only be bound to integer operands.  */
+      if (GET_MODE_CLASS (operand->mode) != MODE_INT)
+	return 0;
+    }
+  else if (GET_CODE (arg) == CONST_DOUBLE)
+    /* These hold vector constants.  */;
+  else if (GET_MODE_SIZE (GET_MODE (arg)) != GET_MODE_SIZE (operand->mode))
+    {
+      /* If the argument is a different size from what's expected, we must
+	 have a value in the right mode class in order to convert it.  */
+      if (GET_MODE_CLASS (operand->mode) != GET_MODE_CLASS (GET_MODE (arg)))
+	return 0;
+
+      /* If the operand is an rvalue, promote or demote it to match the
+	 operand's size.  This might not need extra instructions when
+	 ARG is a register value.  */
+      if (operand->constraint[0] != '=')
+	arg = convert_to_mode (operand->mode, arg, unsigned_p);
+    }
+
+  /* If the operand is an lvalue, bind the operand to a new register.
+     The caller will copy this value into ARG after the main
+     instruction.  By doing this always, we produce slightly more
+     optimal code.  */
+  /* But not for control registers.  */
+  if (operand->constraint[0] == '='
+      && (! REG_P (arg)
+	  || ! (CONTROL_REGNO_P (REGNO (arg))
+		|| CCR_REGNO_P (REGNO (arg))
+		|| CR_REGNO_P (REGNO (arg)))
+	  ))
+    return gen_reg_rtx (operand->mode);
+
+  /* Try simple mode punning.  */
+  arg = mep_convert_arg (operand->mode, arg);
+  if (operand->predicate (arg, operand->mode))
+    return arg;
+
+  /* See if forcing the argument into a register will make it match.  */
+  if (GET_CODE (arg) == CONST_INT || GET_CODE (arg) == CONST_DOUBLE)
+    arg = force_reg (operand->mode, arg);
+  else
+    arg = mep_convert_arg (operand->mode, force_reg (GET_MODE (arg), arg));
+  if (operand->predicate (arg, operand->mode))
+    return arg;
+
+  return 0;
+}
+
+
+/* Report that ARG cannot be passed to argument ARGNUM of intrinsic
+   function FNNAME.  OPERAND describes the operand to which ARGNUM
+   is mapped.  */
+
+static void
+mep_incompatible_arg (const struct insn_operand_data *operand, rtx arg,
+		      int argnum, tree fnname)
+{
+  size_t i;
+
+  if (GET_CODE (arg) == CONST_INT)
+    for (i = 0; i < ARRAY_SIZE (cgen_immediate_predicates); i++)
+      if (operand->predicate == cgen_immediate_predicates[i].predicate)
+	{
+	  const struct cgen_immediate_predicate *predicate;
+	  HOST_WIDE_INT argval;
+
+	  predicate = &cgen_immediate_predicates[i];
+	  argval = INTVAL (arg);
+	  if (argval < predicate->lower || argval >= predicate->upper)
+	    error ("argument %d of %qE must be in the range %d...%d",
+		   argnum, fnname, predicate->lower, predicate->upper - 1);
+	  else
+	    error ("argument %d of %qE must be a multiple of %d",
+		   argnum, fnname, predicate->align);
+	  return;
+	}
+
+  error ("incompatible type for argument %d of %qE", argnum, fnname);
+}
+
+static rtx
+mep_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  rtx pat, op[10], arg[10];
+  unsigned int a;
+  int opindex, unsigned_p[10];
+  tree fndecl, args;
+  unsigned int n_args;
+  tree fnname;
+  const struct cgen_insn *cgen_insn;
+  const struct insn_data_d *idata;
+  unsigned int first_arg = 0;
+  unsigned int builtin_n_args;
+
+  fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  fnname = DECL_NAME (fndecl);
+
+  /* Find out which instruction we should emit.  Note that some coprocessor
+     intrinsics may only be available in VLIW mode, or only in normal mode.  */
+  if (!mep_get_intrinsic_insn (DECL_FUNCTION_CODE (fndecl), &cgen_insn))
+    {
+      mep_intrinsic_unavailable (DECL_FUNCTION_CODE (fndecl));
+      return NULL_RTX;
+    }
+  idata = &insn_data[cgen_insn->icode];
+
+  builtin_n_args = cgen_insn->num_args;
+
+  if (cgen_insn->cret_p)
+    {
+      if (cgen_insn->cret_p > 1)
+	builtin_n_args ++;
+      first_arg = 1;
+      mep_cgen_regnum_to_type (cgen_insn->regnums[0].type);
+      builtin_n_args --;
+    }
+
+  /* Evaluate each argument.  */
+  n_args = call_expr_nargs (exp);
+
+  if (n_args < builtin_n_args)
+    {
+      error ("too few arguments to %qE", fnname);
+      return NULL_RTX;
+    }
+  if (n_args > builtin_n_args)
+    {
+      error ("too many arguments to %qE", fnname);
+      return NULL_RTX;
+    }
+
+  for (a = first_arg; a < builtin_n_args + first_arg; a++)
+    {
+      tree value;
+
+      args = CALL_EXPR_ARG (exp, a - first_arg);
+
+      value = args;
+
+#if 0
+      if (cgen_insn->regnums[a].reference_p)
+	{
+	  if (TREE_CODE (value) != ADDR_EXPR)
+	    {
+	      debug_tree(value);
+	      error ("argument %d of %qE must be an address", a+1, fnname);
+	      return NULL_RTX;
+	    }
+	  value = TREE_OPERAND (value, 0);
+	}
+#endif
+
+      /* If the argument has been promoted to int, get the unpromoted
+	 value.  This is necessary when sub-int memory values are bound
+	 to reference parameters.  */
+      if (TREE_CODE (value) == NOP_EXPR
+	  && TREE_TYPE (value) == integer_type_node
+	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (value, 0)))
+	  && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (value, 0)))
+	      < TYPE_PRECISION (TREE_TYPE (value))))
+	value = TREE_OPERAND (value, 0);
+
+      /* If the argument has been promoted to double, get the unpromoted
+	 SFmode value.  This is necessary for FMAX support, for example.  */
+      if (TREE_CODE (value) == NOP_EXPR
+	  && SCALAR_FLOAT_TYPE_P (TREE_TYPE (value))
+	  && SCALAR_FLOAT_TYPE_P (TREE_TYPE (TREE_OPERAND (value, 0)))
+	  && TYPE_MODE (TREE_TYPE (value)) == DFmode
+	  && TYPE_MODE (TREE_TYPE (TREE_OPERAND (value, 0))) == SFmode)
+	value = TREE_OPERAND (value, 0);
+
+      unsigned_p[a] = TYPE_UNSIGNED (TREE_TYPE (value));
+      arg[a] = expand_expr (value, NULL, VOIDmode, EXPAND_NORMAL);
+      arg[a] = mep_convert_regnum (&cgen_insn->regnums[a], arg[a]);
+      if (cgen_insn->regnums[a].reference_p)
+	{
+	  tree pointed_to = TREE_TYPE (TREE_TYPE (value));
+	  enum machine_mode pointed_mode = TYPE_MODE (pointed_to);
+
+	  arg[a] = gen_rtx_MEM (pointed_mode, arg[a]);
+	}
+      if (arg[a] == 0)
+	{
+	  error ("argument %d of %qE must be in the range %d...%d",
+		 a + 1, fnname, 0, cgen_insn->regnums[a].count - 1);
+	  return NULL_RTX;
+	}
+    }
+
+  for (a = 0; a < first_arg; a++)
+    {
+      if (a == 0 && target && GET_MODE (target) == idata->operand[0].mode)
+	arg[a] = target;
+      else
+	arg[a] = gen_reg_rtx (idata->operand[0].mode);
+    }
+
+  /* Convert the arguments into a form suitable for the intrinsic.
+     Report an error if this isn't possible.  */
+  for (opindex = 0; opindex < idata->n_operands; opindex++)
+    {
+      a = cgen_insn->op_mapping[opindex];
+      op[opindex] = mep_legitimize_arg (&idata->operand[opindex],
+					arg[a], unsigned_p[a]);
+      if (op[opindex] == 0)
+	{
+	  mep_incompatible_arg (&idata->operand[opindex],
+				arg[a], a + 1 - first_arg, fnname);
+	  return NULL_RTX;
+	}
+    }
+
+  /* Emit the instruction.  */
+  pat = idata->genfun (op[0], op[1], op[2], op[3], op[4],
+		       op[5], op[6], op[7], op[8], op[9]);
+
+  if (GET_CODE (pat) == SET
+      && GET_CODE (SET_DEST (pat)) == PC
+      && GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
+    emit_jump_insn (pat);
+  else
+    emit_insn (pat);
+
+  /* Copy lvalues back to their final locations.  */
+  for (opindex = 0; opindex < idata->n_operands; opindex++)
+    if (idata->operand[opindex].constraint[0] == '=')
+      {
+	a = cgen_insn->op_mapping[opindex];
+	if (a >= first_arg)
+	  {
+	    if (GET_MODE_CLASS (GET_MODE (arg[a]))
+		!= GET_MODE_CLASS (GET_MODE (op[opindex])))
+	      emit_move_insn (arg[a], gen_lowpart (GET_MODE (arg[a]),
+						   op[opindex]));
+	    else
+	      {
+		/* First convert the operand to the right mode, then copy it
+		   into the destination.  Doing the conversion as a separate
+		   step (rather than using convert_move) means that we can
+		   avoid creating no-op moves when ARG[A] and OP[OPINDEX]
+		   refer to the same register.  */
+		op[opindex] = convert_to_mode (GET_MODE (arg[a]),
+					       op[opindex], unsigned_p[a]);
+		if (!rtx_equal_p (arg[a], op[opindex]))
+		  emit_move_insn (arg[a], op[opindex]);
+	      }
+	  }
+      }
+
+  if (first_arg > 0 && target && target != op[0])
+    {
+      emit_move_insn (target, op[0]);
+    }
+
+  return target;
+}
+
+static bool
+mep_vector_mode_supported_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* A subroutine of global_reg_mentioned_p, returns 1 if *LOC mentions
+   a global register.  */
+
+static int
+global_reg_mentioned_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  int regno;
+  rtx x = *loc;
+
+  if (! x)
+    return 0;
+
+  switch (GET_CODE (x))
+    {
+    case SUBREG:
+      if (REG_P (SUBREG_REG (x)))
+	{
+	  if (REGNO (SUBREG_REG (x)) < FIRST_PSEUDO_REGISTER
+	      && global_regs[subreg_regno (x)])
+	    return 1;
+	  return 0;
+	}
+      break;
+
+    case REG:
+      regno = REGNO (x);
+      if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
+	return 1;
+      return 0;
+
+    case SCRATCH:
+    case PC:
+    case CC0:
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case LABEL_REF:
+      return 0;
+
+    case CALL:
+      /* A non-constant call might use a global register.  */
+      return 1;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Returns nonzero if X mentions a global register.  */
+
+static int
+global_reg_mentioned_p (rtx x)
+{
+  if (INSN_P (x))
+    {
+      if (CALL_P (x))
+	{
+	  if (! RTL_CONST_OR_PURE_CALL_P (x))
+	    return 1;
+	  x = CALL_INSN_FUNCTION_USAGE (x);
+	  if (x == 0)
+	    return 0;
+	}
+      else
+	x = PATTERN (x);
+    }
+
+  return for_each_rtx (&x, global_reg_mentioned_p_1, NULL);
+}
+/* Scheduling hooks for VLIW mode.
+
+   Conceptually this is very simple: we have a two-pack architecture
+   that takes one core insn and one coprocessor insn to make up either
+   a 32- or 64-bit instruction word (depending on the option bit set in
+   the chip).  I.e. in VL32 mode, we can pack one 16-bit core insn and
+   one 16-bit cop insn; in VL64 mode we can pack one 16-bit core insn
+   and one 48-bit cop insn or two 32-bit core/cop insns.
+
+   In practice, instruction selection will be a bear.  Consider in
+   VL64 mode the following insns
+
+	add $1, 1
+	cmov $cr0, $0
+
+   these cannot pack, since the add is a 16-bit core insn and cmov
+   is a 32-bit cop insn.  However,
+
+	add3 $1, $1, 1
+	cmov $cr0, $0
+
+   packs just fine.  For good VLIW code generation in VL64 mode, we
+   will have to have 32-bit alternatives for many of the common core
+   insns.  Not implemented.  */
+
+static int
+mep_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  int cost_specified;
+
+  if (REG_NOTE_KIND (link) != 0)
+    {
+      /* See whether INSN and DEP_INSN are intrinsics that set the same
+	 hard register.  If so, it is more important to free up DEP_INSN
+	 than it is to free up INSN.
+
+	 Note that intrinsics like mep_mulr are handled differently from
+	 the equivalent mep.md patterns.  In mep.md, if we don't care
+	 about the value of $lo and $hi, the pattern will just clobber
+	 the registers, not set them.  Since clobbers don't count as
+	 output dependencies, it is often possible to reorder two mulrs,
+	 even after reload.
+
+	 In contrast, mep_mulr() sets both $lo and $hi to specific values,
+	 so any pair of mep_mulr()s will be inter-dependent.   We should
+	 therefore give the first mep_mulr() a higher priority.  */
+      if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
+	  && global_reg_mentioned_p (PATTERN (insn))
+	  && global_reg_mentioned_p (PATTERN (dep_insn)))
+	return 1;
+
+      /* If the dependence is an anti or output dependence, assume it
+	 has no cost.  */
+      return 0;
+    }
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (recog_memoized (dep_insn) < 0)
+    return cost;
+
+  /* The latency attribute doesn't apply to MeP-h1: we use the stall
+     attribute instead.  */
+  if (!TARGET_H1)
+    {
+      cost_specified = get_attr_latency (dep_insn);
+      if (cost_specified != 0)
+	return cost_specified;
+    }
+
+  return cost;
+}
+
+/* ??? We don't properly compute the length of a load/store insn,
+   taking into account the addressing mode.  */
+
+static int
+mep_issue_rate (void)
+{
+  return TARGET_IVC2 ? 3 : 2;
+}
+
+/* Return true if function DECL was declared with the vliw attribute.  */
+
+bool
+mep_vliw_function_p (tree decl)
+{
+  return lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))) != 0;
+}
+
+static rtx
+mep_find_ready_insn (rtx *ready, int nready, enum attr_slot slot, int length)
+{
+  int i;
+
+  for (i = nready - 1; i >= 0; --i)
+    {
+      rtx insn = ready[i];
+      if (recog_memoized (insn) >= 0
+	  && get_attr_slot (insn) == slot
+	  && get_attr_length (insn) == length)
+	return insn;
+    }
+
+  return NULL_RTX;
+}
+
+static void
+mep_move_ready_insn (rtx *ready, int nready, rtx insn)
+{
+  int i;
+
+  for (i = 0; i < nready; ++i)
+    if (ready[i] == insn)
+      {
+	for (; i < nready - 1; ++i)
+	  ready[i] = ready[i + 1];
+	ready[i] = insn;
+	return;
+      }
+
+  gcc_unreachable ();
+}
+
+static void
+mep_print_sched_insn (FILE *dump, rtx insn)
+{
+  const char *slots = "none";
+  const char *name = NULL;
+  int code;
+  char buf[30];
+
+  if (GET_CODE (PATTERN (insn)) == SET
+      || GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      switch (get_attr_slots (insn))
+	{
+	case SLOTS_CORE: slots = "core"; break;
+	case SLOTS_C3: slots = "c3"; break;
+	case SLOTS_P0: slots = "p0"; break;
+	case SLOTS_P0_P0S: slots = "p0,p0s"; break;
+	case SLOTS_P0_P1: slots = "p0,p1"; break;
+	case SLOTS_P0S: slots = "p0s"; break;
+	case SLOTS_P0S_P1: slots = "p0s,p1"; break;
+	case SLOTS_P1: slots = "p1"; break;
+	default:
+	  sprintf(buf, "%d", get_attr_slots (insn));
+	  slots = buf;
+	  break;
+	}
+    }
+  if (GET_CODE (PATTERN (insn)) == USE)
+    slots = "use";
+
+  code = INSN_CODE (insn);
+  if (code >= 0)
+    name = get_insn_name (code);
+  if (!name)
+    name = "{unknown}";
+
+  fprintf (dump,
+	   "insn %4d %4d  %8s  %s\n",
+	   code,
+	   INSN_UID (insn),
+	   name,
+	   slots);
+}
+
+static int
+mep_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
+		   int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
+		   int *pnready, int clock ATTRIBUTE_UNUSED)
+{
+  int nready = *pnready;
+  rtx core_insn, cop_insn;
+  int i;
+
+  if (dump && sched_verbose > 1)
+    {
+      fprintf (dump, "\nsched_reorder: clock %d nready %d\n", clock, nready);
+      for (i=0; i<nready; i++)
+	mep_print_sched_insn (dump, ready[i]);
+      fprintf (dump, "\n");
+    }
+
+  if (!mep_vliw_function_p (cfun->decl))
+    return 1;
+  if (nready < 2)
+    return 1;
+
+  /* IVC2 uses a DFA to determine what's ready and what's not. */
+  if (TARGET_IVC2)
+    return nready;
+
+  /* We can issue either a core or coprocessor instruction.
+     Look for a matched pair of insns to reorder.  If we don't
+     find any, don't second-guess the scheduler's priorities.  */
+
+  if ((core_insn = mep_find_ready_insn (ready, nready, SLOT_CORE, 2))
+      && (cop_insn = mep_find_ready_insn (ready, nready, SLOT_COP,
+					  TARGET_OPT_VL64 ? 6 : 2)))
+    ;
+  else if (TARGET_OPT_VL64
+	   && (core_insn = mep_find_ready_insn (ready, nready, SLOT_CORE, 4))
+	   && (cop_insn = mep_find_ready_insn (ready, nready, SLOT_COP, 4)))
+    ;
+  else
+    /* We didn't find a pair.  Issue the single insn at the head
+       of the ready list.  */
+    return 1;
+
+  /* Reorder the two insns first.  */
+  mep_move_ready_insn (ready, nready, core_insn);
+  mep_move_ready_insn (ready, nready - 1, cop_insn);
+  return 2;
+}
+
+/* A for_each_rtx callback.  Return true if *X is a register that is
+   set by insn PREV.  */
+
+static int
+mep_store_find_set (rtx *x, void *prev)
+{
+  return REG_P (*x) && reg_set_p (*x, (const_rtx) prev);
+}
+
+/* Like mep_store_bypass_p, but takes a pattern as the second argument,
+   not the containing insn.  */
+
+static bool
+mep_store_data_bypass_1 (rtx prev, rtx pat)
+{
+  /* Cope with intrinsics like swcpa.  */
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (mep_store_data_bypass_p (prev, XVECEXP (pat, 0, i)))
+	  return true;
+
+      return false;
+    }
+
+  /* Check for some sort of store.  */
+  if (GET_CODE (pat) != SET
+      || GET_CODE (SET_DEST (pat)) != MEM)
+    return false;
+
+  /* Intrinsics use patterns of the form (set (mem (scratch)) (unspec ...)).
+     The first operand to the unspec is the store data and the other operands
+     are used to calculate the address.  */
+  if (GET_CODE (SET_SRC (pat)) == UNSPEC)
+    {
+      rtx src;
+      int i;
+
+      src = SET_SRC (pat);
+      for (i = 1; i < XVECLEN (src, 0); i++)
+	if (for_each_rtx (&XVECEXP (src, 0, i), mep_store_find_set, prev))
+	  return false;
+
+      return true;
+    }
+
+  /* Otherwise just check that PREV doesn't modify any register mentioned
+     in the memory destination.  */
+  return !for_each_rtx (&SET_DEST (pat), mep_store_find_set, prev);
+}
+
+/* Return true if INSN is a store instruction and if the store address
+   has no true dependence on PREV.  */
+
+bool
+mep_store_data_bypass_p (rtx prev, rtx insn)
+{
+  return INSN_P (insn) ? mep_store_data_bypass_1 (prev, PATTERN (insn)) : false;
+}
+
+/* A for_each_rtx subroutine of mep_mul_hilo_bypass_p.  Return 1 if *X
+   is a register other than LO or HI and if PREV sets *X.  */
+
+static int
+mep_mul_hilo_bypass_1 (rtx *x, void *prev)
+{
+  return (REG_P (*x)
+	  && REGNO (*x) != LO_REGNO
+	  && REGNO (*x) != HI_REGNO
+	  && reg_set_p (*x, (const_rtx) prev));
+}
+
+/* Return true if, apart from HI/LO, there are no true dependencies
+   between multiplication instructions PREV and INSN.  */
+
+bool
+mep_mul_hilo_bypass_p (rtx prev, rtx insn)
+{
+  rtx pat;
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  return (GET_CODE (pat) == SET
+	  && !for_each_rtx (&SET_SRC (pat), mep_mul_hilo_bypass_1, prev));
+}
+
+/* Return true if INSN is an ldc instruction that issues to the
+   MeP-h1 integer pipeline.  This is true for instructions that
+   read from PSW, LP, SAR, HI and LO.  */
+
+bool
+mep_ipipe_ldc_p (rtx insn)
+{
+  rtx pat, src;
+
+  pat = PATTERN (insn);
+
+  /* Cope with instrinsics that set both a hard register and its shadow.
+     The set of the hard register comes first.  */
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+
+  if (GET_CODE (pat) == SET)
+    {
+      src = SET_SRC (pat);
+
+      /* Cope with intrinsics.  The first operand to the unspec is
+	 the source register.  */
+      if (GET_CODE (src) == UNSPEC || GET_CODE (src) == UNSPEC_VOLATILE)
+	src = XVECEXP (src, 0, 0);
+
+      if (REG_P (src))
+	switch (REGNO (src))
+	  {
+	  case PSW_REGNO:
+	  case LP_REGNO:
+	  case SAR_REGNO:
+	  case HI_REGNO:
+	  case LO_REGNO:
+	    return true;
+	  }
+    }
+  return false;
+}
+
+/* Create a VLIW bundle from core instruction CORE and coprocessor
+   instruction COP.  COP always satisfies INSN_P, but CORE can be
+   either a new pattern or an existing instruction.
+
+   Emit the bundle in place of COP and return it.  */
+
+static rtx
+mep_make_bundle (rtx core, rtx cop)
+{
+  rtx insn;
+
+  /* If CORE is an existing instruction, remove it, otherwise put
+     the new pattern in an INSN harness.  */
+  if (INSN_P (core))
+    remove_insn (core);
+  else
+    core = make_insn_raw (core);
+
+  /* Generate the bundle sequence and replace COP with it.  */
+  insn = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec (2, core, cop));
+  insn = emit_insn_after (insn, cop);
+  remove_insn (cop);
+
+  /* Set up the links of the insns inside the SEQUENCE.  */
+  PREV_INSN (core) = PREV_INSN (insn);
+  NEXT_INSN (core) = cop;
+  PREV_INSN (cop) = core;
+  NEXT_INSN (cop) = NEXT_INSN (insn);
+
+  /* Set the VLIW flag for the coprocessor instruction.  */
+  PUT_MODE (core, VOIDmode);
+  PUT_MODE (cop, BImode);
+
+  /* Derive a location for the bundle.  Individual instructions cannot
+     have their own location because there can be no assembler labels
+     between CORE and COP.  */
+  INSN_LOCATION (insn) = INSN_LOCATION (INSN_LOCATION (core) ? core : cop);
+  INSN_LOCATION (core) = 0;
+  INSN_LOCATION (cop) = 0;
+
+  return insn;
+}
+
+/* A helper routine for ms1_insn_dependent_p called through note_stores.  */
+
+static void
+mep_insn_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx * pinsn = (rtx *) data;
+
+  if (*pinsn && reg_mentioned_p (x, *pinsn))
+    *pinsn = NULL_RTX;
+}
+
+/* Return true if anything in insn X is (anti,output,true) dependent on
+   anything in insn Y.  */
+
+static int
+mep_insn_dependent_p (rtx x, rtx y)
+{
+  rtx tmp;
+
+  gcc_assert (INSN_P (x));
+  gcc_assert (INSN_P (y));
+
+  tmp = PATTERN (y);
+  note_stores (PATTERN (x), mep_insn_dependent_p_1, &tmp);
+  if (tmp == NULL_RTX)
+    return 1;
+
+  tmp = PATTERN (x);
+  note_stores (PATTERN (y), mep_insn_dependent_p_1, &tmp);
+  if (tmp == NULL_RTX)
+    return 1;
+
+  return 0;
+}
+
+static int
+core_insn_p (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == USE)
+    return 0;
+  if (get_attr_slot (insn) == SLOT_CORE)
+    return 1;
+  return 0;
+}
+
+/* Mark coprocessor instructions that can be bundled together with
+   the immediately preceding core instruction.  This is later used
+   to emit the "+" that tells the assembler to create a VLIW insn.
+
+   For unbundled insns, the assembler will automatically add coprocessor
+   nops, and 16-bit core nops.  Due to an apparent oversight in the
+   spec, the assembler will _not_ automatically add 32-bit core nops,
+   so we have to emit those here.
+
+   Called from mep_insn_reorg.  */
+
+static void
+mep_bundle_insns (rtx insns)
+{
+  rtx insn, last = NULL_RTX, first = NULL_RTX;
+  int saw_scheduling = 0;
+
+  /* Only do bundling if we're in vliw mode.  */
+  if (!mep_vliw_function_p (cfun->decl))
+    return;
+
+  /* The first insn in a bundle are TImode, the remainder are
+     VOIDmode.  After this function, the first has VOIDmode and the
+     rest have BImode.  */
+
+  /* Note: this doesn't appear to be true for JUMP_INSNs.  */
+
+  /* First, move any NOTEs that are within a bundle, to the beginning
+     of the bundle.  */
+  for (insn = insns; insn ; insn = NEXT_INSN (insn))
+    {
+      if (NOTE_P (insn) && first)
+	/* Don't clear FIRST.  */;
+
+      else if (NONJUMP_INSN_P (insn) && GET_MODE (insn) == TImode)
+	first = insn;
+
+      else if (NONJUMP_INSN_P (insn) && GET_MODE (insn) == VOIDmode && first)
+	{
+	  rtx note, prev;
+
+	  /* INSN is part of a bundle; FIRST is the first insn in that
+	     bundle.  Move all intervening notes out of the bundle.
+	     In addition, since the debug pass may insert a label
+	     whenever the current line changes, set the location info
+	     for INSN to match FIRST.  */
+
+	  INSN_LOCATION (insn) = INSN_LOCATION (first);
+
+	  note = PREV_INSN (insn);
+	  while (note && note != first)
+	    {
+	      prev = PREV_INSN (note);
+
+	      if (NOTE_P (note))
+		{
+		  /* Remove NOTE from here... */
+		  PREV_INSN (NEXT_INSN (note)) = PREV_INSN (note);
+		  NEXT_INSN (PREV_INSN (note)) = NEXT_INSN (note);
+		  /* ...and put it in here.  */
+		  NEXT_INSN (note) = first;
+		  PREV_INSN (note) = PREV_INSN (first);
+		  NEXT_INSN (PREV_INSN (note)) = note;
+		  PREV_INSN (NEXT_INSN (note)) = note;
+		}
+
+	      note = prev;
+	    }
+	}
+
+      else if (!NONJUMP_INSN_P (insn))
+	first = 0;
+    }
+
+  /* Now fix up the bundles.  */
+  for (insn = insns; insn ; insn = NEXT_INSN (insn))
+    {
+      if (NOTE_P (insn))
+	continue;
+
+      if (!NONJUMP_INSN_P (insn))
+	{
+	  last = 0;
+	  continue;
+	}
+
+      /* If we're not optimizing enough, there won't be scheduling
+	 info.  We detect that here.  */
+      if (GET_MODE (insn) == TImode)
+	saw_scheduling = 1;
+      if (!saw_scheduling)
+	continue;
+
+      if (TARGET_IVC2)
+	{
+	  rtx core_insn = NULL_RTX;
+
+	  /* IVC2 slots are scheduled by DFA, so we just accept
+	     whatever the scheduler gives us.  However, we must make
+	     sure the core insn (if any) is the first in the bundle.
+	     The IVC2 assembler can insert whatever NOPs are needed,
+	     and allows a COP insn to be first.  */
+
+	  if (NONJUMP_INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) != USE
+	      && GET_MODE (insn) == TImode)
+	    {
+	      for (last = insn;
+		   NEXT_INSN (last)
+		     && GET_MODE (NEXT_INSN (last)) == VOIDmode
+		     && NONJUMP_INSN_P (NEXT_INSN (last));
+		   last = NEXT_INSN (last))
+		{
+		  if (core_insn_p (last))
+		    core_insn = last;
+		}
+	      if (core_insn_p (last))
+		core_insn = last;
+
+	      if (core_insn && core_insn != insn)
+		{
+		  /* Swap core insn to first in the bundle.  */
+
+		  /* Remove core insn.  */
+		  if (PREV_INSN (core_insn))
+		    NEXT_INSN (PREV_INSN (core_insn)) = NEXT_INSN (core_insn);
+		  if (NEXT_INSN (core_insn))
+		    PREV_INSN (NEXT_INSN (core_insn)) = PREV_INSN (core_insn);
+
+		  /* Re-insert core insn.  */
+		  PREV_INSN (core_insn) = PREV_INSN (insn);
+		  NEXT_INSN (core_insn) = insn;
+
+		  if (PREV_INSN (core_insn))
+		    NEXT_INSN (PREV_INSN (core_insn)) = core_insn;
+		  PREV_INSN (insn) = core_insn;
+
+		  PUT_MODE (core_insn, TImode);
+		  PUT_MODE (insn, VOIDmode);
+		}
+	    }
+
+	  /* The first insn has TImode, the rest have VOIDmode */
+	  if (GET_MODE (insn) == TImode)
+	    PUT_MODE (insn, VOIDmode);
+	  else
+	    PUT_MODE (insn, BImode);
+	  continue;
+	}
+
+      PUT_MODE (insn, VOIDmode);
+      if (recog_memoized (insn) >= 0
+	  && get_attr_slot (insn) == SLOT_COP)
+	{
+	  if (JUMP_P (insn)
+	      || ! last
+	      || recog_memoized (last) < 0
+	      || get_attr_slot (last) != SLOT_CORE
+	      || (get_attr_length (insn)
+		  != (TARGET_OPT_VL64 ? 8 : 4) - get_attr_length (last))
+	      || mep_insn_dependent_p (insn, last))
+	    {
+	      switch (get_attr_length (insn))
+		{
+		case 8:
+		  break;
+		case 6:
+		  insn = mep_make_bundle (gen_nop (), insn);
+		  break;
+		case 4:
+		  if (TARGET_OPT_VL64)
+		    insn = mep_make_bundle (gen_nop32 (), insn);
+		  break;
+		case 2:
+		  if (TARGET_OPT_VL64)
+		    error ("2 byte cop instructions are"
+			   " not allowed in 64-bit VLIW mode");
+		  else
+		    insn = mep_make_bundle (gen_nop (), insn);
+		  break;
+		default:
+		  error ("unexpected %d byte cop instruction",
+			 get_attr_length (insn));
+		  break;
+		}
+	    }
+	  else
+	    insn = mep_make_bundle (last, insn);
+	}
+
+      last = insn;
+    }
+}
+
+
+/* Try to instantiate INTRINSIC with the operands given in OPERANDS.
+   Return true on success.  This function can fail if the intrinsic
+   is unavailable or if the operands don't satisfy their predicates.  */
+
+bool
+mep_emit_intrinsic (int intrinsic, const rtx *operands)
+{
+  const struct cgen_insn *cgen_insn;
+  const struct insn_data_d *idata;
+  rtx newop[10];
+  int i;
+
+  if (!mep_get_intrinsic_insn (intrinsic, &cgen_insn))
+    return false;
+
+  idata = &insn_data[cgen_insn->icode];
+  for (i = 0; i < idata->n_operands; i++)
+    {
+      newop[i] = mep_convert_arg (idata->operand[i].mode, operands[i]);
+      if (!idata->operand[i].predicate (newop[i], idata->operand[i].mode))
+	return false;
+    }
+
+  emit_insn (idata->genfun (newop[0], newop[1], newop[2],
+			    newop[3], newop[4], newop[5],
+			    newop[6], newop[7], newop[8]));
+
+  return true;
+}
+
+
+/* Apply the given unary intrinsic to OPERANDS[1] and store it on
+   OPERANDS[0].  Report an error if the instruction could not
+   be synthesized.  OPERANDS[1] is a register_operand.  For sign
+   and zero extensions, it may be smaller than SImode.  */
+
+bool
+mep_expand_unary_intrinsic (int ATTRIBUTE_UNUSED intrinsic,
+			    rtx * operands ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+/* Likewise, but apply a binary operation to OPERANDS[1] and
+   OPERANDS[2].  OPERANDS[1] is a register_operand, OPERANDS[2]
+   can be a general_operand.
+
+   IMMEDIATE and IMMEDIATE3 are intrinsics that take an immediate
+   third operand.  REG and REG3 take register operands only.  */
+
+bool
+mep_expand_binary_intrinsic (int ATTRIBUTE_UNUSED immediate,
+			     int ATTRIBUTE_UNUSED immediate3,
+			     int ATTRIBUTE_UNUSED reg,
+			     int ATTRIBUTE_UNUSED reg3,
+			     rtx * operands ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+static bool
+mep_rtx_cost (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+	      int opno ATTRIBUTE_UNUSED, int *total,
+	      bool ATTRIBUTE_UNUSED speed_t)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) >= -128 && INTVAL (x) < 127)
+	*total = 0;
+      else if (INTVAL (x) >= -32768 && INTVAL (x) < 65536)
+	*total = 1;
+      else
+	*total = 3;
+      return true;
+
+    case SYMBOL_REF:
+      *total = optimize_size ? COSTS_N_INSNS (0) : COSTS_N_INSNS (1);
+      return true;
+
+    case MULT:							
+      *total = (GET_CODE (XEXP (x, 1)) == CONST_INT
+		? COSTS_N_INSNS (3)
+		: COSTS_N_INSNS (2));
+      return true;
+    }
+  return false;
+}
+
+static int
+mep_address_cost (rtx addr ATTRIBUTE_UNUSED,
+		  enum machine_mode mode ATTRIBUTE_UNUSED,
+		  addr_space_t as ATTRIBUTE_UNUSED,
+		  bool ATTRIBUTE_UNUSED speed_p)
+{
+  return 1;
+}
+
+static void
+mep_asm_init_sections (void)
+{
+  based_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .based,\"aw\"");
+
+  tinybss_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS, output_section_asm_op,
+			   "\t.section .sbss,\"aw\"");
+
+  sdata_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .sdata,\"aw\",@progbits");
+
+  far_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .far,\"aw\"");
+
+  farbss_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS, output_section_asm_op,
+			   "\t.section .farbss,\"aw\"");
+
+  frodata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .frodata,\"a\"");
+
+  srodata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .srodata,\"a\"");
+
+  vtext_section
+    = get_unnamed_section (SECTION_CODE | SECTION_MEP_VLIW, output_section_asm_op,
+			   "\t.section .vtext,\"axv\"\n\t.vliw");
+
+  vftext_section
+    = get_unnamed_section (SECTION_CODE | SECTION_MEP_VLIW, output_section_asm_op,
+			   "\t.section .vftext,\"axv\"\n\t.vliw");
+
+  ftext_section
+    = get_unnamed_section (SECTION_CODE, output_section_asm_op,
+			   "\t.section .ftext,\"ax\"\n\t.core");
+
+}
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE	mep_start_function
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		mep_attribute_table
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES	mep_comp_type_attributes
+#undef  TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES	mep_insert_attributes
+#undef  TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P	mep_function_attribute_inlinable_p
+#undef  TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P		mep_can_inline_p
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS	mep_section_type_flags
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION	mep_asm_named_section
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS		mep_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN		mep_expand_builtin
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST	mep_adjust_cost
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE		mep_issue_rate
+#undef  TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER		mep_sched_reorder
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING	mep_strip_name_encoding
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION	mep_select_section
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION	mep_unique_section
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO	mep_encode_section_info
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL	mep_function_ok_for_sibcall
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS		mep_rtx_cost
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST 		mep_address_cost
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG  mep_reorg
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS	mep_setup_incoming_varargs
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE        mep_pass_by_reference
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG             mep_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     mep_function_arg_advance
+#undef  TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P	mep_vector_mode_supported_p
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		mep_option_override
+#undef  TARGET_ALLOCATE_INITIAL_VALUE
+#define TARGET_ALLOCATE_INITIAL_VALUE   mep_allocate_initial_value
+#undef  TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS 	mep_asm_init_sections
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		mep_return_in_memory
+#undef  TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD mep_narrow_volatile_bitfield
+#undef	TARGET_EXPAND_BUILTIN_SAVEREGS
+#define	TARGET_EXPAND_BUILTIN_SAVEREGS	mep_expand_builtin_saveregs
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST	mep_build_builtin_va_list
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START	mep_expand_va_start
+#undef	TARGET_GIMPLIFY_VA_ARG_EXPR
+#define	TARGET_GIMPLIFY_VA_ARG_EXPR	mep_gimplify_va_arg_expr
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE            mep_can_eliminate
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE	mep_conditional_register_usage
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		mep_trampoline_init
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P	mep_legitimate_constant_p
+#undef  TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P		can_use_doloop_if_innermost
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-mep.h"
diff --git a/gcc-4.9/gcc/config/mep/mep.cpu b/gcc-4.9/gcc/config/mep/mep.cpu
new file mode 100644
index 000000000..ecaacb4a7
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep.cpu
@@ -0,0 +1,21 @@
+; Toshiba MeP Media Engine description.  -*- Scheme -*-
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+(include "mep-default.cpu")
diff --git a/gcc-4.9/gcc/config/mep/mep.h b/gcc-4.9/gcc/config/mep/mep.h
new file mode 100644
index 000000000..d1360b626
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep.h
@@ -0,0 +1,793 @@
+/* Definitions for Toshiba Media Processor
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef  CPP_SPEC
+#define CPP_SPEC "\
+-D__MEP__ -D__MeP__ \
+-D__section(_x)=__attribute__((section(_x))) \
+-D__align(_x)=__attribute__((aligned(_x))) \
+-D__io(_x)=__attribute__((io(_x))) \
+-D__cb(_x)=__attribute__((cb(_x))) \
+-D__based=__attribute__((based)) \
+-D__tiny=__attribute__((tiny)) \
+-D__near=__attribute__((near)) \
+-D__far=__attribute__((far)) \
+-D__vliw=__attribute__((vliw)) \
+-D__interrupt=__attribute__((interrupt)) \
+-D__disinterrupt=__attribute__((disinterrupt)) \
+%{!meb:%{!mel:-D__BIG_ENDIAN__}} \
+%{meb:-U__LITTLE_ENDIAN__ -D__BIG_ENDIAN__} \
+%{mel:-U__BIG_ENDIAN__ -D__LITTLE_ENDIAN__} \
+%{mconfig=*:-D__MEP_CONFIG_%*} \
+%{mivc2:-D__MEP_CONFIG_CP_DATA_BUS_WIDTH=64} \
+"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{!mlibrary:%(config_cc_spec)} \
+%{!.cc:%{O2:%{!funroll*:--param max-completely-peeled-insns=6 \
+                        --param max-unrolled-insns=6 -funroll-loops}}}"
+
+#undef  CC1PLUS_SPEC
+#define CC1PLUS_SPEC "%{!mlibrary:%(config_cc_spec)}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "%{mconfig=*} %{meb:-EB} %{mel:-EL} \
+%{mno-satur} %{msatur} %{mno-clip} %{mclip} %{mno-minmax} %{mminmax} \
+%{mno-absdiff} %{mabsdiff} %{mno-leadz} %{mleadz} %{mno-bitops} %{mbitops} \
+%{mno-div} %{mdiv} %{mno-mult} %{mmult} %{mno-average} %{maverage} \
+%{mcop32} %{mno-debug} %{mdebug} %{mlibrary}"
+
+/* The MeP config tool will edit this spec.  */
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{msdram:%{msim:simsdram-crt0.o%s}} \
+%{mno-sdram:%{msim:sim-crt0.o%s}} \
+%{msdram:%{!msim*:sdram-crt0.o%s}} \
+%{mno-sdram:%{!msim*:crt0.o%s}} \
+%(config_start_spec) \
+%{msimnovec:simnovec-crt0.o%s} \
+crtbegin.o%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim*:-lsim}%{!msim*:-lnosys} -) %(config_link_spec)"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{meb:-EB} %{mel:-EL}"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s %{msim*:sim-crtn.o%s}%{!msim*:crtn.o%s}"
+
+/* The MeP config tool will edit this spec.  */
+#define CONFIG_CC_SPEC "\
+%{mconfig=default: -mbitops -mleadz -mabsdiff -maverage -mminmax -mclip -msatur -mvl64 -mvliw -mcop64 -D__MEP_CONFIG_CP_DATA_BUS_WIDTH=64 -mivc2}\
+"
+/* end-config-cc-spec */
+
+/* The MeP config tool will edit this spec.  */
+#define CONFIG_LINK_SPEC "\
+%{mconfig=default: %{!T*:-Tdefault.ld}}\
+"
+/* end-config-link-spec */
+
+/* The MeP config tool will edit this spec.  */
+#define CONFIG_START_SPEC "\
+%{!msdram:%{!mno-sdram:%{!msim*:crt0.o%s}}} \
+%{!msdram:%{!mno-sdram:%{msim:sim-crt0.o%s}}} \
+"
+/* end-config-start-spec */
+
+#define EXTRA_SPECS \
+  { "config_cc_spec",  CONFIG_CC_SPEC }, \
+  { "config_link_spec",  CONFIG_LINK_SPEC }, \
+  { "config_start_spec",  CONFIG_START_SPEC },
+
+
+#define TARGET_CPU_CPP_BUILTINS() 		\
+  do						\
+    {						\
+      builtin_define_std ("mep");		\
+      builtin_assert ("machine=mep");		\
+    }						\
+  while (0)
+
+/* Controlled by MeP-Integrator.  */
+#define TARGET_H1		0
+
+#define MEP_ALL_OPTS	(MASK_OPT_AVERAGE	\
+			 | MASK_OPT_MULT	\
+			 | MASK_OPT_DIV		\
+			 | MASK_OPT_BITOPS	\
+			 | MASK_OPT_LEADZ	\
+			 | MASK_OPT_ABSDIFF	\
+			 | MASK_OPT_MINMAX	\
+			 | MASK_OPT_CLIP	\
+			 | MASK_OPT_SATUR )
+
+#define TARGET_DEFAULT		(MASK_IO_VOLATILE | MASK_OPT_REPEAT | MEP_ALL_OPTS | MASK_LITTLE_ENDIAN)
+
+#define TARGET_IO_NO_VOLATILE	(! (target_flags & MASK_IO_VOLATILE))
+#define TARGET_OPT_NOREPEAT	(! (target_flags & MASK_OPT_REPEAT))
+#define TARGET_32BIT_CR_REGS	(! (target_flags & MASK_64BIT_CR_REGS))
+#define TARGET_BIG_ENDIAN	(! (target_flags & MASK_LITTLE_ENDIAN))
+
+#define TARGET_COPRO_MULT	0
+
+/* The MeP config tool will replace this as appropriate.  */
+#define DEFAULT_ENDIAN_SPEC "%{!meb: -mel}"
+
+/* The MeP config tool will replace this with an -mconfig= switch.  */
+#define LIBRARY_CONFIG_SPEC "-mconfig=default"
+
+/* Don't add an endian option when building the libraries.  */
+#define DRIVER_SELF_SPECS \
+  "%{!mlibrary:" DEFAULT_ENDIAN_SPEC "}", \
+  "%{mlibrary: " LIBRARY_CONFIG_SPEC " %{!mel:-meb}}", \
+  "%{mall-opts:-maverage -mmult -mdiv -mbitops -mleadz \
+     -mabsdiff -mminmax -mclip -msatur -mdebug} %<mall-opts", \
+  "%{mno-opts:-mno-average -mno-mult -mno-div -mno-bitops -mno-leadz \
+     -mno-absdiff -mno-minmax -mno-clip -mno-satur -mno-debug} %<mno-opts", \
+  "%{mfar:-ml -mtf -mc=far} %<mfar", \
+  "%{mconfig=default:-mmult -mdiv -D__MEP_CONFIG_ISA=1}"
+
+/* The MeP config tool will add COPROC_SELECTION_TABLE here.  */
+/* start-coproc-selection-table */
+#define COPROC_SELECTION_TABLE \
+{"default", ISA_EXT1}
+/* end-coproc-selection-table */
+
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN ? 0 : 1)
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+#define UNITS_PER_WORD 4
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+          && GET_MODE_SIZE (MODE) < 4)		\
+        (MODE) = SImode;		\
+    }						\
+  while (0)
+
+#define PARM_BOUNDARY 32
+#define STACK_BOUNDARY 32
+#define PREFERRED_STACK_BOUNDARY 64
+#define FUNCTION_BOUNDARY 16
+#define BIGGEST_ALIGNMENT 64
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+#define DEFAULT_VTABLE_THUNKS 1
+
+
+#define INT_TYPE_SIZE         32
+#define SHORT_TYPE_SIZE       16
+#define LONG_TYPE_SIZE        32
+#define LONG_LONG_TYPE_SIZE   64
+#define CHAR_TYPE_SIZE         8
+#define FLOAT_TYPE_SIZE       32
+#define DOUBLE_TYPE_SIZE      64
+#define LONG_DOUBLE_TYPE_SIZE 64
+#define DEFAULT_SIGNED_CHAR    1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Register numbers:
+ 	0..15	core registers
+	16..47	control registers
+	48..79	coprocessor registers
+	80..111	coprocessor control registers
+	112	virtual arg pointer register  */
+
+#define FIRST_PSEUDO_REGISTER (LAST_SHADOW_REGISTER + 1)
+
+  /* R12 is optionally FP.  R13 is TP, R14 is GP, R15 is SP. */
+  /* hi and lo can be used as general registers.  Others have
+     immutable bits.  */
+/* A "1" here means the register is generally not available to gcc,
+   and is assumed to remain unchanged or unused throughout.  */
+#define FIXED_REGISTERS {				\
+  /* core registers */					\
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1,	\
+  /* control registers */				\
+  1, 1, 1, 1,  1, 1, 1, 0,  0, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor registers */				\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor control registers */			\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* virtual arg pointer */				\
+  1, FIXED_SHADOW_REGISTERS				\
+  }
+
+/* This is a call-clobbered reg not used for args or return value,
+   that we use as a temp for saving control registers in the prolog
+   and restoring them in the epilog. */
+#define REGSAVE_CONTROL_TEMP	11
+
+/* A "1" here means a register may be changed by a function without
+   needing to preserve its previous value.  */
+#define CALL_USED_REGISTERS {				\
+  /* core registers */					\
+  1, 1, 1, 1,  1, 0, 0, 0,  0, 1, 1, 1,  1, 0, 0, 1,	\
+  /* control registers */				\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor registers */				\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor control registers */			\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* virtual arg pointer */				\
+  1, CALL_USED_SHADOW_REGISTERS				\
+  }
+
+#define REG_ALLOC_ORDER {						\
+  /* core registers */							\
+  3, 2, 1, 0, 9, 10, 11, 12, 4, 5, 6, 7, 8, 13, 14, 15, 		\
+  /* control registers */						\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,	\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  /* coprocessor registers */						\
+  /* Prefer to use the non-loadable registers when looking for a	\
+     member of CR_REGS (as opposed to LOADABLE_CR_REGS).  */		\
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 48, 49, 50, 51, 52, 58,	\
+  59, 60, 61, 62, 63, 53, 54, 55, 56, 57, 74, 75, 76, 77, 78, 79,	\
+  /* coprocessor control registers */					\
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,	\
+  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, \
+  /* virtual arg pointer */						\
+  112, SHADOW_REG_ALLOC_ORDER						\
+  }
+
+/* We must somehow disable register remapping for interrupt functions.  */
+extern char mep_leaf_registers[];
+#define LEAF_REGISTERS mep_leaf_registers
+#define LEAF_REG_REMAP(REG) (REG)
+
+
+#define FIRST_GR_REGNO 0
+#define FIRST_CONTROL_REGNO (FIRST_GR_REGNO + 16)
+#define FIRST_CR_REGNO (FIRST_CONTROL_REGNO + 32)
+#define FIRST_CCR_REGNO (FIRST_CR_REGNO + 32)
+
+#define GR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_GR_REGNO) < 16)
+
+#define CONTROL_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CONTROL_REGNO) < 32)
+
+#define LOADABLE_CR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CR_REGNO) < 16)
+
+#define CR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CR_REGNO) < 32)
+
+#define CCR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CCR_REGNO) < 32)
+
+#define ANY_CONTROL_REGNO_P(REGNO) \
+  (CONTROL_REGNO_P (REGNO) || CCR_REGNO_P (REGNO))
+
+#define HARD_REGNO_NREGS(REGNO, MODE)		\
+  ((CR_REGNO_P (REGNO) && TARGET_64BIT_CR_REGS)	\
+   ? (GET_MODE_SIZE (MODE) + 8 - 1) / 8		\
+   : (GET_MODE_SIZE (MODE) + 4 - 1) / 4)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  mep_cannot_change_mode_class (FROM, TO, CLASS)
+
+enum reg_class
+{
+  NO_REGS,
+  SP_REGS,
+  TP_REGS,
+  GP_REGS,
+  R0_REGS,
+  RPC_REGS,
+  HI_REGS,
+  LO_REGS,
+  HILO_REGS,
+  TPREL_REGS,
+  GENERAL_NOT_R0_REGS,
+  GENERAL_REGS,
+  CONTROL_REGS,
+  CONTROL_OR_GENERAL_REGS,
+  USER0_REGS,
+  USER1_REGS,
+  USER2_REGS,
+  USER3_REGS,
+  LOADABLE_CR_REGS,
+  CR_REGS,
+  CCR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define REG_CLASS_NAMES { \
+  "NO_REGS", \
+  "SP_REGS", \
+  "TP_REGS", \
+  "GP_REGS", \
+  "R0_REGS", \
+  "RPC_REGS", \
+  "HI_REGS", \
+  "LO_REGS", \
+  "HILO_REGS", \
+  "TPREL_REGS", \
+  "GENERAL_NOT_R0_REGS", \
+  "GENERAL_REGS", \
+  "CONTROL_REGS", \
+  "CONTROL_OR_GENERAL_REGS", \
+  "USER0_REGS", \
+  "USER1_REGS", \
+  "USER2_REGS", \
+  "USER3_REGS", \
+  "LOADABLE_CR_REGS", \
+  "CR_REGS", \
+  "CCR_REGS", \
+  "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS { \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
+  { 0x00008000, 0x00000000, 0x00000000, 0x00000000 }, /* SP_REGS */ \
+  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* TP_REGS */ \
+  { 0x00004000, 0x00000000, 0x00000000, 0x00000000 }, /* GP_REGS */ \
+  { 0x00000001, 0x00000000, 0x00000000, 0x00000000 }, /* R0_REGS */ \
+  { 0x00400000, 0x00000000, 0x00000000, 0x00000000 }, /* RPC_REGS */ \
+  { 0x00800000, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \
+  { 0x01000000, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */ \
+  { 0x01800000, 0x00000000, 0x00000000, 0x00000000 }, /* HILO_REGS */ \
+  { 0x000000ff, 0x00000000, 0x00000000, 0x00000000 }, /* TPREL_REGS */ \
+  { 0x0000fffe, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_NOT_R0_REGS */ \
+  { 0x0000ffff, 0x00000000, 0x00000000, 0x00010000 }, /* GENERAL_REGS */ \
+  { 0xffff0000, 0x0000ffff, 0x00000000, 0x00000000 }, /* CONTROL_REGS */ \
+  { 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000 }, /* CONTROL_OR_GENERAL_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER0_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER1_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER2_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER3_REGS */ \
+  { 0x00000000, 0xffff0000, 0x00000000, 0x00000000 }, /* LOADABLE_CR_REGS */ \
+  { 0x00000000, 0xffff0000, 0x0000ffff, 0x00000000 }, /* CR_REGS */ \
+  { 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff }, /* CCR_REGS */ \
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0x0001ffff }, /* ALL_REGS */ \
+  }
+
+#define REGNO_REG_CLASS(REGNO) (enum reg_class) mep_regno_reg_class (REGNO)
+
+#define BASE_REG_CLASS GENERAL_REGS
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(NUM) (GR_REGNO_P (NUM) \
+	|| (NUM) == ARG_POINTER_REGNUM \
+	|| (NUM) >= FIRST_PSEUDO_REGISTER)
+
+#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM)
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) mep_preferred_reload_class (X, CLASS)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+	mep_secondary_input_reload_class (CLASS, MODE, X)
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+	mep_secondary_output_reload_class (CLASS, MODE, X)
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+	mep_secondary_memory_needed (CLASS1, CLASS2, MODE)
+
+#define WANT_GCC_DECLARATIONS
+#include "mep-intrin.h"
+#undef WANT_GCC_DECLARATIONS
+
+extern int mep_intrinsic_insn[];
+extern unsigned int mep_selected_isa;
+
+/* True if intrinsic X is available.  X is a mep_* value declared
+   in mep-intrin.h.  */
+#define MEP_INTRINSIC_AVAILABLE_P(X) (mep_intrinsic_insn[X] >= 0)
+
+/* Used to define CGEN_ENABLE_INTRINSIC_P in mep-intrin.h.  */
+#define CGEN_CURRENT_ISAS mep_selected_isa
+#define CGEN_CURRENT_GROUP \
+  (mep_vliw_function_p (cfun->decl) ? GROUP_VLIW : GROUP_NORMAL)
+
+
+
+#define STACK_GROWS_DOWNWARD       1
+#define FRAME_GROWS_DOWNWARD	   1
+#define STARTING_FRAME_OFFSET      0
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+#define INCOMING_FRAME_SP_OFFSET   0
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) mep_return_addr_rtx (COUNT)
+#define INCOMING_RETURN_ADDR_RTX          gen_rtx_REG (SImode, LP_REGNO)
+#define DWARF_FRAME_RETURN_COLUMN         LP_REGNO
+
+#define STACK_POINTER_REGNUM          15
+#define FRAME_POINTER_REGNUM           8
+#define ARG_POINTER_REGNUM            112
+#define RETURN_ADDRESS_POINTER_REGNUM 17
+#define STATIC_CHAIN_REGNUM            0
+
+
+
+#define ELIMINABLE_REGS						\
+{								\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},			\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},			\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}			\
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+	(OFFSET) = mep_elimination_offset (FROM, TO)
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+
+#define FUNCTION_ARG_CALLEE_COPIES(CUM, MODE, TYPE, NAMED) 1
+
+typedef struct
+{
+  int nregs;
+  int vliw;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+	mep_init_cumulative_args (& (CUM), FNTYPE, LIBNAME, FNDECL)
+
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+	(((REGNO) >= 1 && (REGNO) <= 4) \
+	 || ((REGNO) >= FIRST_CR_REGNO + 1 \
+	     && (REGNO) <= FIRST_CR_REGNO + 4 \
+	     && TARGET_COP))
+
+#define RETURN_VALUE_REGNUM	 0
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) mep_function_value (VALTYPE, FUNC)
+#define LIBCALL_VALUE(MODE) mep_libcall_value (MODE)
+
+#define FUNCTION_VALUE_REGNO_P(REGNO)				\
+  ((REGNO) == RETURN_VALUE_REGNUM)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define STRUCT_VALUE 0
+
+#define FUNCTION_OK_FOR_SIBCALL(DECL) mep_function_ok_for_sibcall(DECL)
+
+/* Prologue and epilogues are all handled via RTL.  */
+
+#define EXIT_IGNORE_STACK 1
+
+#define EPILOGUE_USES(REGNO)  mep_epilogue_uses (REGNO)
+
+/* Profiling is supported.  */
+     
+#define FUNCTION_PROFILER(FILE, LABELNO) mep_function_profiler (FILE);
+#undef TARGET_HAS_F_SETLKW
+#define NO_PROFILE_COUNTERS 1
+
+/* Trampolines are built at run-time.  The cache is invalidated at
+   run-time also.  */
+
+#define TRAMPOLINE_SIZE 20
+
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#ifdef REG_OK_STRICT
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) 		\
+	if (mep_legitimate_address ((MODE), (X), 1)) goto LABEL
+#else
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) 		\
+	if (mep_legitimate_address ((MODE), (X), 0)) goto LABEL
+#endif
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) GR_REGNO_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X) (GR_REGNO_P (REGNO (X)) \
+				|| REGNO (X) == ARG_POINTER_REGNUM \
+				|| REGNO (X) >= FIRST_PSEUDO_REGISTER)
+#endif
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \
+  if (mep_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE), (IND_LEVELS))) \
+    goto WIN
+
+#define SELECT_CC_MODE(OP, X, Y)  CCmode
+
+
+/* Moves between control regs need a scratch.  */
+#define REGISTER_MOVE_COST(MODE, FROM, TO) mep_register_move_cost (MODE, FROM, TO)
+
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE 
+
+
+#define TEXT_SECTION_ASM_OP "\t.text\n\t.core"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP  ".bss"
+
+#define USE_SELECT_SECTION_FOR_FUNCTIONS 1
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+#define TARGET_ASM_FILE_END mep_file_cleanups
+
+#define ASM_APP_ON "#APP\n"
+#define ASM_APP_OFF "#NO_APP\n"
+
+#define ASM_OUTPUT_DOUBLE(FILE, VALUE)				\
+  do								\
+    {								\
+      long l[2];						\
+								\
+      REAL_VALUE_TO_TARGET_DOUBLE (VALUE, l);			\
+      fprintf (FILE, "\t.long\t0x%lx,0x%lx\n", l[0], l[1]);	\
+    }								\
+  while (0)
+
+#define ASM_OUTPUT_FLOAT(FILE, VALUE)		\
+  do						\
+    {						\
+      long l;					\
+						\
+      REAL_VALUE_TO_TARGET_SINGLE (VALUE, l);	\
+      fprintf ((FILE), "\t.long\t0x%lx\n", l);	\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_CHAR(FILE, VALUE)		\
+  do						\
+    {						\
+      fprintf (FILE, "\t.byte\t");		\
+      output_addr_const (FILE, (VALUE));	\
+      fprintf (FILE, "\n");			\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_SHORT(FILE, VALUE)		\
+  do						\
+    {						\
+      fprintf (FILE, "\t.hword\t");		\
+      output_addr_const (FILE, (VALUE));	\
+      fprintf (FILE, "\n");			\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_INT(FILE, VALUE)		\
+  do						\
+    {						\
+      fprintf (FILE, "\t.word\t");		\
+      output_addr_const (FILE, (VALUE));	\
+      fprintf (FILE, "\n");			\
+    }						\
+  while (0)
+
+/* Most of these are here to support based/tiny/far/io attributes.  */
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	mep_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 1)
+
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	mep_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+#define ASM_OUTPUT_LABEL(STREAM, NAME)		\
+  do						\
+    {						\
+      assemble_name (STREAM, NAME);		\
+      fputs (":\n", STREAM);			\
+    }						\
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  asm_fprintf ((STREAM), "%U%s", mep_strip_name_encoding (NAME))
+
+#define ASM_FORMAT_PRIVATE_NAME(OUTVAR, NAME, NUMBER)		\
+  do								\
+    {								\
+      (OUTVAR) = (char *) alloca (strlen ((NAME)) + 12);	\
+      sprintf ((OUTVAR), "%s.%ld", (NAME), (long)(NUMBER));	\
+    }								\
+  while (0)
+
+
+#define REGISTER_NAMES							\
+{									\
+  /* Core registers.  */						\
+  "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",			\
+  "$8", "$9", "$10", "$11", "$12", "$tp", "$gp", "$sp",			\
+  /* Control registers.  */						\
+  "$pc", "$lp", "$sar", "3", "$rpb", "$rpe", "$rpc", "$hi",		\
+  "$lo", "9", "10", "11", "$mb0", "$me0", "$mb1", "$me1",		\
+  "$psw", "$id", "$tmp", "$epc", "$exc", "$cfg", "22", "$npc",		\
+  "$dbg", "$depc", "$opt", "$rcfg", "$ccfg", "29", "30", "31",		\
+  /* Coprocessor registers.  */						\
+  "$c0", "$c1", "$c2", "$c3", "$c4", "$c5", "$c6", "$c7",		\
+  "$c8", "$c9", "$c10", "$c11", "$c12", "$c13", "$c14", "$c15",		\
+  "$c16", "$c17", "$c18", "$c19", "$c20", "$c21", "$c22", "$c23",	\
+  "$c24", "$c25", "$c26", "$c27", "$c28", "$c29", "$c30", "$c31",	\
+  /* Coprocessor control registers.  */					\
+  "$ccr0", "$ccr1", "$ccr2", "$ccr3", "$ccr4", "$ccr5", "$ccr6",	\
+  "$ccr7", "$ccr8", "$ccr9", "$ccr10", "$ccr11", "$ccr12", "$ccr13",	\
+  "$ccr14", "$ccr15", "$ccr16", "$ccr17", "$ccr18", "$ccr19", "$ccr20", \
+  "$ccr21", "$ccr22", "$ccr23", "$ccr24", "$ccr25", "$ccr26", "$ccr27", \
+  "$ccr28", "$ccr29", "$ccr30", "$ccr31",				\
+  /* Virtual arg pointer.  */						\
+  "$argp", SHADOW_REGISTER_NAMES					\
+}
+
+/* We duplicate some of the above because we twiddle the above
+   according to *how* the registers are used.  Likewise, we include
+   the standard names for coprocessor control registers so that
+   coprocessor options can rename them in the default table.  Note
+   that these are compared to stripped names (see REGISTER_PREFIX
+   below).  */
+#define ADDITIONAL_REGISTER_NAMES		\
+{						\
+  {  "8",  8 }, { "fp",  8 },			\
+  { "13", 13 }, { "tp", 13 },			\
+  { "14", 14 }, { "gp", 14 },			\
+  { "15", 15 }, { "sp", 15 },			\
+  { "ccr0", FIRST_CCR_REGNO + 0 },		\
+  { "ccr1", FIRST_CCR_REGNO + 1 },		\
+  { "ccr2", FIRST_CCR_REGNO + 2 },		\
+  { "ccr3", FIRST_CCR_REGNO + 3 },		\
+  { "ccr4", FIRST_CCR_REGNO + 4 },		\
+  { "ccr5", FIRST_CCR_REGNO + 5 },		\
+  { "ccr6", FIRST_CCR_REGNO + 6 },		\
+  { "ccr7", FIRST_CCR_REGNO + 7 },		\
+  { "ccr8", FIRST_CCR_REGNO + 8 },		\
+  { "ccr9", FIRST_CCR_REGNO + 9 },		\
+  { "ccr10", FIRST_CCR_REGNO + 10 },		\
+  { "ccr11", FIRST_CCR_REGNO + 11 },		\
+  { "ccr12", FIRST_CCR_REGNO + 12 },		\
+  { "ccr13", FIRST_CCR_REGNO + 13 },		\
+  { "ccr14", FIRST_CCR_REGNO + 14 },		\
+  { "ccr15", FIRST_CCR_REGNO + 15 },		\
+  { "ccr16", FIRST_CCR_REGNO + 16 },		\
+  { "ccr17", FIRST_CCR_REGNO + 17 },		\
+  { "ccr18", FIRST_CCR_REGNO + 18 },		\
+  { "ccr19", FIRST_CCR_REGNO + 19 },		\
+  { "ccr20", FIRST_CCR_REGNO + 20 },		\
+  { "ccr21", FIRST_CCR_REGNO + 21 },		\
+  { "ccr22", FIRST_CCR_REGNO + 22 },		\
+  { "ccr23", FIRST_CCR_REGNO + 23 },		\
+  { "ccr24", FIRST_CCR_REGNO + 24 },		\
+  { "ccr25", FIRST_CCR_REGNO + 25 },		\
+  { "ccr26", FIRST_CCR_REGNO + 26 },		\
+  { "ccr27", FIRST_CCR_REGNO + 27 },		\
+  { "ccr28", FIRST_CCR_REGNO + 28 },		\
+  { "ccr29", FIRST_CCR_REGNO + 29 },		\
+  { "ccr30", FIRST_CCR_REGNO + 30 },		\
+  { "ccr31", FIRST_CCR_REGNO + 31 }		\
+}
+
+/* We watch for pipeline hazards with these */
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) mep_asm_output_opcode (STREAM, PTR)
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) mep_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#define PRINT_OPERAND(STREAM, X, CODE) mep_print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) ((CODE) == '!' || (CODE) == '<')
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) mep_print_operand_address (STREAM, X)
+
+#define REGISTER_PREFIX    "$"
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX  ""
+#define IMMEDIATE_PREFIX   ""
+
+
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  fprintf (STREAM, "\t.word .L%d\n", VALUE)
+
+
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE  DWARF2_DEBUG
+#define DWARF2_DEBUGGING_INFO     1
+#define DWARF2_UNWIND_INFO        1
+
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 10 : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_RTX mep_return_stackadj_rtx ()
+#define EH_RETURN_HANDLER_RTX  mep_return_handler_rtx ()
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+
+
+#define CASE_VECTOR_MODE SImode
+
+#define WORD_REGISTER_OPERATIONS
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+#define MOVE_MAX 4
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define STORE_FLAG_VALUE 1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE SImode
+
+#define REGISTER_TARGET_PRAGMAS()	 mep_register_pragmas ()
+
+/* If defined, a C expression to determine the base term of address X.
+   This macro is used in only one place: `find_base_term' in alias.c.
+
+   It is always safe for this macro to not be defined.  It exists so
+   that alias analysis can understand machine-dependent addresses.
+
+   The typical use of this macro is to handle addresses containing
+   a label_ref or symbol_ref within an UNSPEC.  */
+#define FIND_BASE_TERM(X) mep_find_base_term (X)
diff --git a/gcc-4.9/gcc/config/mep/mep.md b/gcc-4.9/gcc/config/mep/mep.md
new file mode 100644
index 000000000..b6d80acd2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep.md
@@ -0,0 +1,2256 @@
+;; Toshiba Media Processor Machine description template
+;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat Inc
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+
+
+;; Constraints:
+;;
+;;  a   $sp
+;;  b   $tp
+;;  c   control regs
+;;  h   $hi ($23)
+;;  l   $lo ($24)
+;;  d   $hi/$lo pair (DImode)
+;;  j   $rpc ($22)
+;;  r   $0..$15
+;;  t   $0..$7
+;;  v   $gp
+;;  x	$c0..$c31
+;;  ex  coprocessor registers that can be moved to other coprocessor registers
+;;  er  coprocessor registers that can be moved to and from core registers
+;;  em  coprocessor registers that can be moves to and from memory
+;;  y	$ccr0..$ccr31
+;;  z   $0
+;;
+;;  I   sign imm16	mov/add
+;;  J   zero imm16	mov/add
+;;  K   zero imm24	mov
+;;  L   sign imm6	add
+;;  M   zero imm5	slt,shifts
+;;  N   zero imm4	bCC
+;;  O   high imm16	mov
+;;
+;;  R   near symbol
+;;  S   sign imm8	mov
+;;  T   tp or gp relative symbol
+;;  U   non-absolute memory
+;;  W   %hi(sym)
+;;  Y   (Rn)
+;;  Z   Control Bus Symbol
+;;
+;; Modifiers:
+;;
+;;  b   print unique bit in mask
+;;  B   print bits required for value (for clip)
+;;  h	print decimal >> 16.
+;;  I   print decimal, with hex comment if more than 8 bits
+;;  J   print unsigned hex
+;;  L   print set, clr or not (for bitops)
+;;  P	print memory as a post-inc with no increment
+;;  U   print bits required for value (for clipu)
+;;  x   print unsigned decimal or hex, depending on where set bits are
+
+(define_constants [
+		   (REGSAVE_CONTROL_TEMP 11)
+		   (FP_REGNO 8)
+		   (TP_REGNO 13)
+		   (GP_REGNO 14)
+		   (SP_REGNO 15)
+		   (PSW_REGNO 16)
+		   (LP_REGNO 17)
+		   (SAR_REGNO 18)
+		   (RPB_REGNO 20)
+		   (RPE_REGNO 21)
+		   (RPC_REGNO 22)
+		   (HI_REGNO 23)
+		   (LO_REGNO 24)
+		   (CBCR_REGNO 81)
+		   ])
+
+(define_constants [
+		   (UNS_BLOCKAGE 0)
+		   (UNS_TPREL 2)
+		   (UNS_GPREL 3)
+		   (UNS_REPEAT_BEG 4)
+		   (UNS_REPEAT_END 5)
+		   (UNS_EH_EPILOGUE 6)
+		   (UNS_EREPEAT_BEG 7)
+		   (UNS_EREPEAT_END 8)
+		   (UNS_BB_TRACE_RET 9)
+		   (UNS_DISABLE_INT 10)
+		   (UNS_ENABLE_INT 11)
+		   (UNS_RETI 12)
+		  ])
+
+;; This attribute determines the VLIW packing mechanism.  The IVC2
+;; coprocessor has two pipelines (P0 and P1), and a MeP+IVC2 can issue
+;; up to three insns at a time.  Most IVC2 insns can run on either
+;; pipeline, however, scheduling some insns on P0 precludes packing a
+;; core insn with it, and only 16-bit core insns can pack with any P0
+;; insn.
+(define_attr "vliw" "basic,ivc2"
+  (const (symbol_ref "TARGET_IVC2")))
+
+;; This attribute describes the kind of memory operand present in the
+;; instruction.  This is used to compute the length of the insn based
+;; on the addressing mode used.
+(define_attr "memop" "none,core0,core1,cop0,cop1"
+  (const_string "none"))
+
+(define_attr "intrinsic" "none,cmov,cmov1,cmov2,cmovc1,cmovc2,cmovh1,cmovh2"
+  (const_string "none"))
+
+;; This attribute describes how the instruction may be bundled in a
+;; VLIW instruction.  Type MULTI is assumed to use both slots.
+(define_attr "slot" "core,cop,multi"
+  (cond [(eq_attr "intrinsic" "!none")
+	   (const_string "cop")]
+	(const_string "core")))
+
+;; This attribute describes the latency of the opcode (ready delay).
+;; The 0 is used to indicate "unspecified".  An instruction that
+;; completes immediately with no potential stalls would have a value
+;; of 1, a one cycle stall would be 2, etc.
+(define_attr "latency" ""
+  (const_int 0))
+
+(define_attr "shiftop" "none,operand2"
+  (const_string "none"))
+
+;; This attribute describes the size of the instruction in bytes.
+;; This *must* be exact unless the pattern is SLOT_MULTI, as this
+;; is used by the VLIW bundling code.
+(define_attr "length" ""
+  (cond [(eq_attr "memop" "core0")
+	   (symbol_ref "mep_core_address_length (insn, 0)")
+	 (eq_attr "memop" "core1")
+	   (symbol_ref "mep_core_address_length (insn, 1)")
+	 (eq_attr "memop" "cop0")
+	   (symbol_ref "mep_cop_address_length (insn, 0)")
+	 (eq_attr "memop" "cop1")
+	   (symbol_ref "mep_cop_address_length (insn, 1)")
+         ]
+	 ; Catch patterns that don't define the length properly.
+         (symbol_ref "(abort (), 0)")))
+
+;; This attribute describes a pipeline hazard seen in the insn.
+(define_attr "stall" "none,int2,ssarb,load,store,ldc,stc,ldcb,stcb,ssrab,fsft,ret,advck,mul,mulr,div"
+  (cond [(and (eq_attr "shiftop" "operand2")
+	      (not (match_operand:SI 2 "mep_single_shift_operand" "")))
+	 (const_string "int2")]
+	(const_string "none")))
+
+(define_attr "may_trap" "no,yes"
+  (const_string "no"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "slot" "multi")])
+
+;; Each IVC2 instruction uses one of these two pipelines.  P0S insns
+;; use P0; C3 insns use P1.
+(define_automaton "mep_ivc2")
+(define_cpu_unit "ivc2_core,ivc2_p0,ivc2_p1" "mep_ivc2")
+
+;; Each core or IVC2 instruction is bundled into one of these slots.
+;; Supported bundlings:
+;; 
+;; Core mode:
+;;
+;;  C1	[-----core-----]
+;;  C2	[-------------core-------------]
+;;  C3	[--------------c3--------------]
+;;
+;; VLIW mode:
+;;
+;;  V1	[-----core-----][--------p0s-------][------------p1------------]
+;;  V2  [-------------core-------------]xxxx[------------p1------------]
+;;  V3	1111[--p0--]0111[--------p0--------][------------p1------------]
+
+(define_attr "slots" "core,c3,p0,p0_p0s,p0_p1,p0s,p0s_p1,p1" (const_string "core"))
+
+(define_cpu_unit "ivc2_slot_c16,ivc2_slot_c32,ivc2_slot_c3,ivc2_slot_p0s,ivc2_slot_p0,ivc2_slot_p1" "mep_ivc2")
+
+(define_insn_reservation "ivc2_insn_core16" 1
+  (and (eq_attr "vliw" "ivc2")
+       (and (eq (symbol_ref "get_attr_length(insn)") (const_int 2))
+	    (and (eq_attr "intrinsic" "none")
+		 (eq_attr "slot" "!cop"))))
+  "ivc2_core+ivc2_slot_c16")
+
+(define_insn_reservation "ivc2_insn_core32" 1
+  (and (eq_attr "vliw" "ivc2")
+       (and (eq (symbol_ref "get_attr_length(insn)") (const_int 4))
+	    (and (eq_attr "intrinsic" "none")
+		 (eq_attr "slot" "!cop"))))
+  "ivc2_core+ivc2_slot_c32")
+
+;; These shouldn't happen when in VLIW mode.
+(define_insn_reservation "ivc2_insn_c3" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "c3"))
+  "ivc2_p1+ivc2_slot_c3")
+
+(define_insn_reservation "ivc2_insn_p0" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0"))
+  "ivc2_p0+ivc2_slot_p0")
+
+(define_insn_reservation "ivc2_insn_p0_p0s" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0_p0s"))
+  "ivc2_p0+ivc2_slot_p0|ivc2_p0+ivc2_slot_p0s")
+
+(define_insn_reservation "ivc2_insn_p0_p1" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0_p1"))
+  "ivc2_p0+ivc2_slot_p0|ivc2_p1+ivc2_slot_p1")
+
+(define_insn_reservation "ivc2_insn_p0s" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0s"))
+  "ivc2_p0+ivc2_slot_p0s")
+
+(define_insn_reservation "ivc2_insn_p0s_p1" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0s_p1"))
+  "ivc2_p0+ivc2_slot_p0s|ivc2_p1+ivc2_slot_p1")
+
+(define_insn_reservation "ivc2_insn_p1" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p1"))
+  "ivc2_p1+ivc2_slot_p1")
+
+;; these run in C3 also, but when we're doing VLIW scheduling, they
+;; only run in P0.
+(define_insn_reservation "ivc2_insn_cmov" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "intrinsic" "!none"))
+  "ivc2_p0+ivc2_slot_p0")
+
+
+(exclusion_set "ivc2_slot_c32"
+	       "ivc2_slot_p0,ivc2_slot_p0s")
+(exclusion_set "ivc2_slot_p0"
+	       "ivc2_slot_p0s")
+(exclusion_set "ivc2_slot_c16"
+	       "ivc2_slot_p0")
+(exclusion_set "ivc2_slot_c16"
+	       "ivc2_slot_c32")
+
+;; Non-IVC2 scheduling.
+(define_automaton "mep")
+(define_cpu_unit "core,cop" "mep")
+
+;; Latencies are the time between one insn entering the second pipeline
+;; stage (E2, LD, A2 or V2) and the next instruction entering the same
+;; stage.  When an instruction assigns to general registers, the default
+;; latencies are for when the next instruction receives the register
+;; through bypass 1.
+
+;; Arithmetic instructions that execute in a single stage.
+(define_insn_reservation "h1_int1" 2
+  (and (eq_attr "slot" "!cop")
+       (eq_attr "stall" "none"))
+  "core")
+(define_bypass 1 "h1_int1" "h1_int1,h1_ssarb")
+(define_bypass 1 "h1_int1" "h1_store" "mep_store_data_bypass_p")
+
+;; $sar can be read by an immediately following fsft or ldc.
+(define_insn_reservation "h1_ssarb" 1
+  (eq_attr "stall" "ssarb")
+  "core")
+
+;; Arithmetic instructions that execute in two stages.
+(define_insn_reservation "h1_int2" 2
+  (eq_attr "stall" "int2,fsft")
+  "core")
+(define_bypass 1 "h1_int2" "h1_int1,h1_ssarb")
+(define_bypass 1 "h1_int2" "h1_store" "mep_store_data_bypass_p")
+
+(define_insn_reservation "h1_load" 4
+  (eq_attr "stall" "load")
+  "core")
+(define_bypass 3 "h1_load" "h1_int1,h1_ssarb")
+(define_bypass 3 "h1_load" "h1_store" "mep_store_data_bypass_p")
+
+(define_insn_reservation "h1_store" 1
+  (eq_attr "stall" "store")
+  "core")
+
+(define_insn_reservation "h1_ipipe_ldc" 2
+  (and (eq_attr "stall" "ldc")
+       (ne (symbol_ref "mep_ipipe_ldc_p(insn)") (const_int 0)))
+  "core")
+(define_bypass 1 "h1_ipipe_ldc" "h1_int1,h1_ssarb")
+(define_bypass 1 "h1_ipipe_ldc" "h1_store" "mep_store_data_bypass_p")
+
+(define_insn_reservation "h1_apipe_ldc" 2
+  (and (eq_attr "stall" "ldc")
+       (eq (symbol_ref "mep_ipipe_ldc_p(insn)") (const_int 0)))
+  "core")
+
+;; 2 is correct for stc->ret and stc->fsft.  The most important remaining
+;; case is stc->madd, which induces no stall.
+(define_insn_reservation "h1_stc" 2
+  (eq_attr "stall" "stc")
+  "core")
+(define_bypass 1 "h1_stc" "h1_mul")
+
+;; ??? Parameterised latency.
+(define_insn_reservation "h1_ldcb" 5
+  (eq_attr "stall" "ldcb")
+  "core")
+
+(define_insn_reservation "h1_stcb" 1
+  (eq_attr "stall" "stcb")
+  "core")
+
+(define_insn_reservation "h1_advck" 6
+  (eq_attr "stall" "advck")
+  "core")
+
+(define_insn_reservation "h1_mul" 5
+  (eq_attr "stall" "mul,mulr")
+  "core")
+(define_bypass 4 "h1_mul" "h1_int1,h1_ssarb")
+(define_bypass 4 "h1_mul" "h1_store" "mep_store_data_bypass_p")
+(define_bypass 1 "h1_mul" "h1_mul" "mep_mul_hilo_bypass_p")
+
+(define_insn_reservation "h1_div" 36
+  (eq_attr "stall" "div")
+  "core")
+
+(define_insn_reservation "h1_cop" 1
+  (eq_attr "slot" "cop")
+  "cop")
+
+(include "predicates.md")
+(include "constraints.md")
+(include "intrinsics.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, QImode))
+    DONE;
+}")
+
+;; The Idea here is to prefer the 16-bit tp-relative load, but to fall back
+;; to the general 32-bit load rather than do silly things with spill regs.
+(define_insn "*movqi_tprel_load"
+  [(set (match_operand:QI 0 "mep_tprel_operand" "=t,*r")
+	(mem:QI (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lb\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movqi_tprel_store"
+  [(set (mem:QI (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:QI 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sb\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r, r,m,r,c,r,y,r,er,ex,em,Y")
+	(match_operand:QI 1 "general_operand" " r,n,rm,r,c,r,y,r,er,r,ex,Y,em"))]
+  "mep_mov_ok (operands, QImode)"
+  "@
+   mov\\t%0, %1
+   mov\\t%0, %1
+   lb\\t%0, %1
+   sb\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lbcpa\\t%0, %P1
+   sbcpa\\t%1, %P0"
+  [(set_attr "length" "2,2,*,*,2,2,4,4,4,4,*,4,4")
+   (set_attr "intrinsic" "*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,load,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,core1,core0,*,*,*,*,*,*,*,*,*")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, HImode))
+    DONE;
+}")
+
+(define_insn "*movhi_tprel_load"
+  [(set (match_operand:HI 0 "mep_tprel_operand" "=t,*r")
+	(mem:HI (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lh\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movhi_tprel_store"
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:HI 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sh\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,c,r,y,r,er,ex,em,Y")
+	(match_operand:HI 1 "general_operand" " r,S,n,m,r,c,r,y,r,er,r,ex,Y,em"))]
+  "mep_mov_ok (operands, HImode)"
+  "@
+   mov\\t%0, %1
+   mov\\t%0, %I1
+   mov\\t%0, %I1
+   lh\\t%0, %1
+   sh\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lhcpa\\t%0, %P1
+   shcpa\\t%1, %P0"
+  [(set_attr "length" "2,2,4,*,*,2,2,4,4,4,4,*,4,4")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,*,load,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,*,core1,core0,*,*,*,*,*,*,*,*,*")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, SImode))
+    DONE;
+}")
+
+(define_insn "*movsi_tprel_load"
+  [(set (match_operand:SI 0 "mep_tprel_operand" "=t,*r")
+	(mem:SI (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lw\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movsi_tprel_store"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:SI 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sw\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "movsi_topsym_s"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "symbolic_operand" "s")))]
+  ""
+  "movh\\t%0, %%hi(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "movsi_botsym_s"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "symbolic_operand" "s")))]
+  ""
+  "add3\\t%0, %1, %%lo(%2)"
+  [(set_attr "length" "4")])
+
+
+
+(define_insn "cmovh_getsub"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(subreg:SI (match_operand:DI 1 "register_operand" "er") 4))]
+  "0 && TARGET_64BIT_CR_REGS"
+  "cmovh\\t%0, %1"
+  [(set_attr "intrinsic" "cmovh2")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "mep_movdest_operand"
+	    "=r,r,r,r,r, t,t,r,r,r,Z,m,r,c,r,y,r, er,ex,em,U ")
+	(match_operand:SI 1 "general_operand"
+	    " r,S,I,J,OW,K,s,i,Z,m,r,r,c,r,y,r,er,r, ex,U, em"))]
+  "mep_mov_ok (operands, SImode)"
+  "@
+   mov\\t%0, %1
+   mov\\t%0, %I1
+   mov\\t%0, %I1
+   movu\\t%0, %J1
+   movh\\t%0, %h1
+   movu\\t%0, %x1
+   movu\\t%0, %1
+   #
+   ldcb\\t%0, %1
+   lw\\t%0, %1
+   stcb\\t%1, %0
+   sw\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lwcp\\t%0, %1
+   swcp\\t%1, %0"
+  [(set_attr "length" "2,2,4,4,4,4,4,*,4,*,4,*,2,2,4,4,4,4,4,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,*,*,*,*,*,*,ldcb,load,stcb,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,*,*,*,*,*,*,*,core1,*,core0,*,*,*,*,*,*,*,cop1,cop0")
+   (set_attr "slot"   "*,*,*,*,*,*,*,multi,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))]
+  "mep_split_mov (operands, 0)"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 3)))]
+  "
+{
+  HOST_WIDE_INT value;
+  int lo, hi;
+
+  value = INTVAL (operands[1]);
+
+  lo = value & 0xffff;
+  hi = trunc_int_for_mode (value & 0xffff0000, SImode);
+
+  operands[2] = GEN_INT (hi);
+  operands[3] = GEN_INT (lo);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "immediate_operand" ""))]
+  "mep_split_mov (operands, 1)"
+  [(set (match_dup 0) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+;; ??? What purpose do these two serve that high+lo_sum do not?
+(define_insn "movsi_topsym_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_operand:SI 1 "symbolic_operand" "s")
+		(const_int -65536)))]
+  ""
+  "movh\\t%0, %%uhi(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "movsi_botsym_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "0")
+		(and:SI (match_operand:SI 2 "symbolic_operand" "s")
+			(const_int 65535))))]
+  ""
+  "or3\\t%0, %1, %%lo(%2)"
+  [(set_attr "length" "4")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "" "")
+	(match_operand:DI 1 "" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, DImode))
+    DONE;
+}")
+
+(define_insn "*movdi_internal_32"
+  [(set (match_operand:DI 0 "mep_movdest_operand" "= r,m,r,c,r,er,ex,em,U")
+	(match_operand:DI 1 "general_operand"     "rim,r,c,r,er,r,ex,U,em"))]
+  "TARGET_32BIT_CR_REGS && mep_mov_ok (operands, DImode)"
+  "#"
+  [(set_attr "slot" "multi")])
+
+(define_insn "*movdi_internal_64"
+  [(set (match_operand:DI 0 "mep_movdest_operand" "=r,r,m,r,c,r,er,ex,em,U")
+	(match_operand:DI 1 "general_operand"     "r,im,r,c,r,er,r,ex,U,em"))]
+  "TARGET_64BIT_CR_REGS && mep_mov_ok (operands, DImode)"
+  "@
+   #
+   #
+   #
+   #
+   #
+   #
+   #
+   %<\\t%0, %M1
+   lmcp\\t%0, %1
+   smcp\\t%1, %0"
+  [(set_attr "slot"  "multi,multi,multi,multi,multi,multi,multi,*,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,cmov,*,*")
+   (set_attr "memop" "*,*,*,*,*,*,*,cop0,cop1,cop0")
+   (set_attr "stall" "*,*,*,*,*,*,*,*,load,store")])
+
+(define_insn "*movdi_cop_postinc"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "=em")
+		   (mem:DI (reg:SI SP_REGNO)))
+	      (set (reg:SI SP_REGNO)
+		   (plus:SI (reg:SI SP_REGNO)
+			    (const_int 8)))
+	      ]
+	     )]
+  "TARGET_COP"
+  "lmcpi\\t%0,($sp+)"
+  [(set_attr "length" "2")])
+
+(define_insn "*movdi_cop_postinc"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "=em")
+		   (mem:DI (match_operand:SI 2 "register_operand" "r")))
+	      (set (match_operand:SI 1 "register_operand" "=0")
+		   (plus:SI (match_operand:SI 3 "register_operand" "0")
+			    (const_int 8)))
+	      ]
+	     )]
+  "TARGET_COP"
+  "lmcpi\\t%0,(%1+)"
+  [(set_attr "length" "2")])
+
+(define_insn "*cmovh_set"
+  [(set (zero_extract:SI (match_operand:DI 0 "register_operand" "+er")
+			 (const_int 32)
+			 (const_int 32))
+	(match_operand:SI 1 "register_operand" "r"))]
+  "TARGET_64BIT_CR_REGS"
+  "cmovh\\t%0, %1"
+  [(set_attr "intrinsic" "cmovh1")
+   (set_attr "length" "4")])
+
+(define_insn "cmovh_get"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:DI 1 "register_operand" "er")
+			 (const_int 32)
+			 (const_int 32)))]
+  "TARGET_64BIT_CR_REGS"
+  "cmovh\\t%0, %1"
+  [(set_attr "intrinsic" "cmovh2")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DI 0 "mep_movdest_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "reload_completed && mep_multi_slot (insn)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "mep_split_wide_move (operands, DImode);")
+
+;; Floating Point Moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, SFmode))
+    DONE;
+}")
+
+(define_insn "*movsf_tprel_load"
+  [(set (match_operand:SF 0 "mep_tprel_operand" "=t,*r")
+	(mem:SF (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lw\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movsf_tprel_store"
+  [(set (mem:SF (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:SF 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sw\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "mep_movdest_operand"
+	    "=r,r,r,r,Z,m,r,c,r,y,r,er,ex,em,U")
+	(match_operand:SF 1 "general_operand"
+	    " r,F,Z,m,r,r,c,r,y,r,er,r,ex,U,em"))]
+  "mep_mov_ok (operands, SFmode)"
+  "@
+   mov\\t%0, %1
+   #
+   ldcb\\t%0, %1
+   lw\\t%0, %1
+   stcb\\t%1, %0
+   sw\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lwcp\\t%0, %1
+   swcp\\t%1, %0"
+  [(set_attr "length" "2,*,2,*,2,*,2,2,*,*,4,4,*,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,ldcb,load,stcb,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,*,core1,*,core0,*,*,*,*,*,*,*,cop1,cop0")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  REAL_VALUE_TYPE rv;
+  HOST_WIDE_INT value;
+  HOST_WIDE_INT lo, hi;
+  rtx out;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (rv, value);
+
+  lo = value & 0xffff;
+  hi = trunc_int_for_mode (value & 0xffff0000, SImode);
+
+  out = gen_rtx_REG (SImode, REGNO (operands[0]));
+  emit_move_insn (out, GEN_INT (hi));
+  if (lo != 0)
+    emit_insn (gen_iorsi3 (out, out, GEN_INT (lo)));
+  DONE;
+}")
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "" "")
+	(match_operand:DF 1 "" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, DFmode))
+    DONE;
+}")
+
+(define_insn "*movdf_internal_32"
+  [(set (match_operand:DF 0 "mep_movdest_operand" "= r,m,r,c,r,er,ex,em,U")
+	(match_operand:DF 1 "general_operand"     "rFm,r,c,r,er,r,ex,U,em"))]
+  "TARGET_32BIT_CR_REGS && mep_mov_ok (operands, DFmode)"
+  "#"
+  [(set_attr "slot" "multi")])
+
+(define_insn "*movdf_internal_64"
+  [(set (match_operand:DF 0 "mep_movdest_operand" "= r,m,r,c,r,er,ex,em,U")
+	(match_operand:DF 1 "general_operand"     "rFm,r,c,r,er,r,ex,U,em"))]
+  "TARGET_64BIT_CR_REGS && mep_mov_ok (operands, DFmode)"
+  "@
+   #
+   #
+   #
+   #
+   #
+   #
+   %<\\t%0, %M1
+   lmcp\\t%0, %1
+   smcp\\t%1, %0"
+  [(set_attr "slot"  "multi,multi,multi,multi,multi,multi,*,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,cmov,*,*")
+   (set_attr "memop" "*,*,*,*,*,*,*,cop1,cop0")
+   (set_attr "stall" "*,*,*,*,*,*,*,load,store")])
+
+(define_split
+  [(set (match_operand:DF 0 "mep_movdest_operand" "")
+        (match_operand:DF 1 "general_operand" ""))]
+  "reload_completed && mep_multi_slot (insn)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "mep_split_wide_move (operands, DFmode);")
+
+
+(define_insn "*lbcpa"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(sign_extend:SI (mem:QI (match_operand:SI 2 "register_operand" "1"))))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "lbcpa\t%0, (%1+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "load")])
+
+(define_insn "*sbcpa"
+  [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:QI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "sbcpa\t%2, (%0+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "store")])
+
+(define_insn "*lhcpa"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(sign_extend:SI (mem:HI (match_operand:SI 2 "register_operand" "1"))))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (match_operand:SI 3 "cgen_h_sint_7a2_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "lhcpa\t%0, (%1+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "load")])
+
+(define_insn "*shcpa"
+  [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:HI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "cgen_h_sint_7a2_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "shcpa\t%2, (%0+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "store")])
+
+(define_insn "*lwcpi"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(mem:SI (match_operand:SI 2 "register_operand" "1")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (const_int 4)))]
+  "TARGET_COP && reload_completed"
+  "lwcpi\t%0, (%1+)"
+  [(set_attr "length" "2")
+   (set_attr "stall" "load")])
+
+(define_insn "*lwcpa"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(mem:SI (match_operand:SI 2 "register_operand" "1")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (match_operand:SI 3 "cgen_h_sint_6a4_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "lwcpa\t%0, (%1+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "load")])
+
+(define_insn "*swcpi"
+  [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (const_int 4)))]
+  "TARGET_COP && reload_completed"
+  "swcpi\t%2, (%0+)"
+  [(set_attr "length" "2")
+   (set_attr "stall" "store")])
+
+(define_insn "*swcpa"
+  [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "cgen_h_sint_6a4_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "swcpa\t%2, (%0+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "store")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "cgen_h_sint_8a1_immediate" "")))]
+  "TARGET_COP && mep_use_post_modify_p (insn, operands[0], operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Reloads
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "reload_insi"
+  [(set (match_operand:SI 0 "mep_reload_operand" "")
+        (match_operand:SI 1 "mep_reload_operand" "r"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  mep_expand_reload (operands, SImode);
+  DONE;
+}")
+
+(define_expand "reload_outsi"
+  [(set (match_operand:SI 0 "mep_reload_operand" "=r")
+        (match_operand:SI 1 "mep_reload_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  mep_expand_reload (operands, SImode);
+  DONE;
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,em")
+	(sign_extend:SI
+	  (match_operand:QI 1 "nonimmediate_operand" "0,m,Y")))]
+  ""
+  "@
+   extb\\t%0
+   lb\\t%0, %1
+   lbcpa\\t%0, %P1"
+  [(set_attr "length" "2,*,*")
+   (set_attr "stall"  "*,load,load")
+   (set_attr "memop"  "*,core1,cop1")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,em")
+	(sign_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "0,m,Y")))]
+  ""
+  "@
+   exth\\t%0
+   lh\\t%0, %1
+   lhcpa\\t%0, %P1"
+  [(set_attr "length" "2,*,*")
+   (set_attr "stall"  "*,load,load")
+   (set_attr "memop"  "*,core1,cop1")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI
+	  (match_operand:QI 1 "nonimmediate_operand" "0,r,m")))]
+  ""
+  "@
+   extub\\t%0
+   and3\\t%0, %1, 255
+   lbu\\t%0, %1"
+  [(set_attr "length" "2,4,*")
+   (set_attr "stall" "*,*,load")
+   (set_attr "memop"  "*,*,core1")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "0,r,m")))]
+  ""
+  "@
+   extuh\\t%0
+   and3\\t%0, %1, 65535
+   lhu\\t%0, %1"
+  [(set_attr "length" "2,4,*")
+   (set_attr "stall" "*,*,load")
+   (set_attr "memop"  "*,*,core1")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,0,r")
+		 (match_operand:SI 2 "mep_add_operand" "r,L,IT")))]
+  ""
+  "@
+   add3\\t%0, %1, %2
+   add\\t%0, %2
+   add3\\t%0, %1, %I2"
+  [(set (attr "length")
+	(if_then_else (eq_attr "alternative" "2")
+	  (if_then_else (and (match_operand:SI 1 "mep_sp_operand" "")
+			     (match_operand:SI 2 "mep_imm7a4_operand" ""))
+	    (const_int 2)
+	    (const_int 4))
+	  (const_int 2)))])
+
+;; The intention here is to combine the 16-bit add with the 16-bit
+;; move to create a 32-bit add.  It's the same size, but takes one
+;; less machine cycle.  It will happen to match a 32-bit add with a
+;; 16-bit move also, but gcc shouldn't be doing that ;)
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "immediate_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(match_operand:SI 4 "register_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && GR_REGNO_P (REGNO (operands[3]))
+   && dead_or_set_p (peep2_next_insn (1), operands[4])"
+  [(set (match_dup 3)
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\\t%0, %2"
+  [(set_attr "length" "2")])
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (mult:SI (match_operand:SI 1 "register_operand" "")
+                 (match_operand:SI 2 "register_operand" "")))]
+  "TARGET_OPT_MULT || TARGET_COPRO_MULT"
+{
+  emit_insn (gen_mulsi3_1 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Generated by mep_reuse_lo_p when no GPR destination is needed.
+(define_insn "mulsi3_lo"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(mult:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "mul\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+;; Generated by mep_reuse_lo_p when both destinations of a mulr
+;; are needed.
+(define_insn "mulsi3r"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(mult:SI (match_operand:SI 2 "register_operand" "1")
+		 (match_operand:SI 3 "register_operand" "r")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(mult:SI (match_dup 2)
+		 (match_dup 3)))
+   (clobber (match_scratch:SI 4 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "mulr\\t%2, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mulr")])
+
+(define_insn "mulsi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (match_scratch:SI 3 "=l"))
+   (clobber (match_scratch:SI 4 "=h"))]
+  "TARGET_OPT_MULT"
+  "mulr\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mulr")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+  "TARGET_OPT_MULT"
+  "
+{
+  rtx hi = gen_reg_rtx (SImode);
+  rtx lo = gen_reg_rtx (SImode);
+
+  emit_insn (gen_mulsidi3_i (hi, lo, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (SImode, operands[0]), lo);
+  emit_move_insn (gen_highpart (SImode, operands[0]), hi);
+  DONE;
+}")
+
+(define_insn "mulsidi3_i"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI
+		    (match_operand:SI 2 "register_operand" "r"))
+		   (sign_extend:DI
+		    (match_operand:SI 3 "register_operand" "r")))
+	  (const_int 32))))
+   (set (match_operand:SI 1 "mep_lo_operand" "=l")
+	(mult:SI (match_dup 2)
+		 (match_dup 3)))]
+  "TARGET_OPT_MULT"
+  "mul\\t%2, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI
+		    (match_operand:SI 1 "register_operand" "r"))
+		   (sign_extend:DI
+		    (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI LO_REGNO))]
+  "TARGET_OPT_MULT"
+  "mul\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "mep_hi_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+  "TARGET_OPT_MULT"
+  "
+{
+  rtx hi = gen_reg_rtx (SImode);
+  rtx lo = gen_reg_rtx (SImode);
+
+  emit_insn (gen_umulsidi3_i (hi, lo, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (SImode, operands[0]), lo);
+  emit_move_insn (gen_highpart (SImode, operands[0]), hi);
+  DONE;
+}")
+
+(define_insn "umulsidi3_i"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI
+		    (match_operand:SI 2 "register_operand" "r"))
+		   (zero_extend:DI
+		    (match_operand:SI 3 "register_operand" "r")))
+	  (const_int 32))))
+   (set (match_operand:SI 1 "mep_lo_operand" "=l")
+	(mult:SI (match_dup 2)
+		 (match_dup 3)))]
+  "TARGET_OPT_MULT"
+  "mulu\\t%2, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+(define_insn "umulsi3_highpart"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI
+		    (match_operand:SI 1 "register_operand" "r"))
+		   (zero_extend:DI
+		    (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI LO_REGNO))]
+  "TARGET_OPT_MULT"
+  "mulu %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+;; These two don't currently match because we don't have an adddi3 pattern.
+(define_insn "*smultdi_and_add"
+  [(set (match_operand:DI 0 "mep_hi_operand" "=d")
+	(plus:DI (mult:DI (zero_extend:DI
+			   (match_operand:SI 1 "register_operand" "r"))
+			  (zero_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "mep_hi_operand" "0")))]
+  "TARGET_OPT_MULT && TARGET_BIG_ENDIAN"
+  "maddu\\t%1, %2"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mul")])
+
+(define_insn "*umultdi_and_add"
+  [(set (match_operand:DI 0 "mep_hi_operand" "=d")
+	(plus:DI (mult:DI (sign_extend:DI
+			   (match_operand:SI 1 "register_operand" "r"))
+			  (sign_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "mep_hi_operand" "0")))]
+  "TARGET_OPT_MULT && TARGET_BIG_ENDIAN"
+  "madd\\t%1, %2"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mul")])
+
+;; A pattern for 'r1 = r2 * r3 + r4'.  There are three possible
+;; implementations:
+;;
+;;    (1) 'mulr;add3'.  This is usually the best choice if the instruction
+;;	  is not part of a natural multiply-accumulate chain.  It has the
+;;	  same latency as 'stc;maddr' but doesn't tie up $lo for as long.
+;;
+;;    (2) 'madd'.  This is the best choice if the instruction is in the
+;;	  middle of a natural multiply-accumulate chain.  r4 will already
+;;	  be in $lo and r1 will also be needed in $lo.
+;;
+;;    (3) 'maddr'.  This is the best choice if the instruction is at the
+;;	  end of a natural multiply-accumulate chain.  r4 will be in $lo
+;;	  but r1 will be needed in a GPR.
+;;
+;; In theory, we could put all the alternatives into a single pattern and
+;; leave the register allocator to choose between them.  However, this can
+;; sometimes produce poor results in practice.
+;;
+;; This pattern therefore describes a general GPR-to-GPR operation that
+;; has a slight preference for cases in which operands 0 and 1 are tied.
+;; After reload, we try to rewrite the patterns using peephole2s (if
+;; enabled), falling back on define_splits if that fails.  See also
+;; mep_reuse_lo_p.
+(define_insn "maddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%0,r")
+			  (match_operand:SI 2 "register_operand" "r,r"))
+		 (match_operand:SI 3 "register_operand" "r,r")))
+   (clobber (match_scratch:SI 4 "=l,l"))
+   (clobber (match_scratch:SI 5 "=h,h"))]
+  "TARGET_OPT_MULT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "stall" "mulr")])
+
+;; Implement maddsi3s using maddr if operand 3 is already available in $lo.
+(define_peephole2
+  [(parallel
+	[(set (match_operand:SI 0 "register_operand" "")
+	      (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+				(match_operand:SI 2 "register_operand" ""))
+		       (match_operand:SI 3 "register_operand" "")))
+	 (clobber (match_scratch:SI 4 ""))
+	 (clobber (match_scratch:SI 5 ""))])]
+  "TARGET_OPT_MULT
+   && reload_completed
+   && mep_reuse_lo_p (operands[4], operands[3], insn,
+		      !rtx_equal_p (operands[1], operands[3])
+		      && !rtx_equal_p (operands[2], operands[3])
+		      && (rtx_equal_p (operands[0], operands[3])
+			  || peep2_reg_dead_p (1, operands[3])))"
+  [(parallel
+	[(set (match_dup 4)
+	      (plus:SI (mult:SI (match_dup 0)
+			        (match_dup 2))
+		       (match_dup 4)))
+	 (set (match_dup 0)
+	      (plus:SI (mult:SI (match_dup 0)
+				(match_dup 2))
+		       (match_dup 4)))
+	 (clobber (match_dup 5))])]
+  "operands[2] = mep_mulr_source (0, operands[0], operands[1], operands[2]);")
+
+;; This splitter implements maddsi3 as "mulr;add3".  It only works if
+;; operands 0 and 3 are distinct, since operand 0 is clobbered before
+;; operand 3 is used.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (match_operand:SI 2 "register_operand" ""))
+		 (match_operand:SI 3 "register_operand" "")))
+   (clobber (match_scratch:SI 4 ""))
+   (clobber (match_scratch:SI 5 ""))]
+  "TARGET_OPT_MULT
+   && reload_completed
+   && !rtx_equal_p (operands[0], operands[3])"
+  [(parallel [(set (match_dup 0)
+		   (mult:SI (match_dup 0)
+			    (match_dup 2)))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 3)))]
+  "operands[2] = mep_mulr_source (0, operands[0], operands[1], operands[2]);")
+
+;; This is the fallback splitter for maddsi3.  It moves operand 3 into
+;; $lo and then uses maddr.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (match_operand:SI 2 "register_operand" ""))
+		 (match_operand:SI 3 "register_operand" "")))
+   (clobber (match_scratch:SI 4 ""))
+   (clobber (match_scratch:SI 5 ""))]
+  "TARGET_OPT_MULT
+   && reload_completed"
+  [(parallel [(set (match_dup 4)
+		   (plus:SI (mult:SI (match_dup 0)
+				     (match_dup 2))
+			    (match_dup 4)))
+	      (set (match_dup 0)
+		   (plus:SI (mult:SI (match_dup 0)
+				     (match_dup 2))
+			    (match_dup 4)))
+	      (clobber (match_dup 5))])]
+{
+  emit_move_insn (operands[4], operands[3]);
+  operands[2] = mep_mulr_source (0, operands[0], operands[1], operands[2]);
+})
+
+;; Remove unnecessary stcs to $lo.  This cleans up the moves generated
+;; by earlier calls to mep_reuse_lo_p.
+(define_peephole2
+  [(set (match_operand:SI 0 "mep_lo_operand" "")
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_OPT_MULT
+   && mep_reuse_lo_p (operands[0], operands[1], insn,
+		      peep2_reg_dead_p (1, operands[1]))"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+(define_insn "maddsi3_lo"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 2 "register_operand" "r"))
+		 (match_operand:SI 3 "mep_lo_operand" "0")))
+   (clobber (match_scratch:SI 4 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "madd\\t%1, %2"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mul")])
+
+(define_insn "maddsi3r"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(plus:SI (mult:SI (match_operand:SI 2 "register_operand" "1")
+			  (match_operand:SI 3 "register_operand" "r"))
+		 (match_operand:SI 4 "register_operand" "0")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))
+   (clobber (match_scratch:SI 5 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "maddr\\t%2, %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mulr")])
+
+(define_insn "*shift_1_or_2_and_add"
+  [(set (match_operand:SI 0 "mep_r0_operand" "=z")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 2 "mep_slad_operand" "n"))
+		 (match_operand:SI 3 "register_operand" "r")))]
+  ""
+  "sl%b2ad3\\t%0, %1, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "int2")])
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "mep_hi_operand" "=h")
+	(mod:SI (match_dup 1)
+		(match_dup 2)))]
+  "TARGET_OPT_DIV"
+  "div\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "div")
+   (set_attr "may_trap" "yes")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(udiv:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "mep_hi_operand" "=h")
+	(umod:SI (match_dup 1)
+		(match_dup 2)))]
+  "TARGET_OPT_DIV"
+  "divu\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "div")
+   (set_attr "may_trap" "yes")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg\\t%0, %1"
+  [(set_attr "length" "2")])
+
+;; We have "absolute difference between two regs" which isn't quite
+;; what gcc is expecting.
+(define_expand "abssi2"
+  [(set (match_dup 2) (const_int 0))
+   (set (match_operand:SI 0 "register_operand" "")
+	(abs:SI (minus:SI (match_operand:SI 1 "register_operand" "")
+			  (match_dup 2))
+		))]
+  "TARGET_OPT_ABSDIFF"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_insn "*absdiff"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(abs:SI (minus:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_OPT_ABSDIFF"
+  "abs\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(abs:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  "!reload_completed"
+  [(set (match_dup 3)
+	(match_dup 4))
+   (set (match_operand:SI 0 "register_operand" "")
+	(abs:SI (minus:SI (match_operand:SI 1 "register_operand" "")
+			  (match_dup 3))))]
+  "operands[4] = GEN_INT (-INTVAL (operands[2]));")
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "min\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "max\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_insn "uminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(umin:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "minu\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_insn "umaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(umax:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "maxu\\t%0, %2"
+  [(set_attr "length" "4")])
+
+;; Average:  a = (b+c+1)>>1
+(define_insn "*averagesi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (plus:SI (plus:SI
+				(match_operand:SI 1 "register_operand" "0")
+				(match_operand:SI 2 "register_operand" "r"))
+			      (const_int 1))
+		     (const_int 1)))]
+  "TARGET_OPT_AVERAGE"
+  "ave\\t%0, %2"
+  [(set_attr "length" "4")])
+
+;; clip support
+
+(define_insn "clip_maxmin"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smax:SI (smin:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[2], operands[3], 1)"
+  "clip\\t%0, %B2"
+  [(set_attr "length" "4")])
+
+(define_insn "clip_minmax"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smin:SI (smax:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[3], operands[2], 1)"
+  "clip\\t%0, %B3"
+  [(set_attr "length" "4")])
+
+(define_insn "clipu_maxmin"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smax:SI (smin:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[2], operands[3], 0)"
+  "clipu\\t%0, %U2"
+  [(set_attr "length" "4")])
+
+(define_insn "clipu_minmax"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smin:SI (smax:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[3], operands[2], 0)"
+  "clipu\\t%0, %U3"
+  [(set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,z")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,r")
+		   (match_operand:SI 2 "nonmemory_operand" "rM,M")))]
+  ""
+  "@
+   sll\\t%0, %2
+   sll3\\t%0, %1, %2"
+  [(set_attr "length" "2,2")
+   (set_attr "shiftop" "operand2")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "nonmemory_operand" "rM")))]
+  ""
+  "sra\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "shiftop" "operand2")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "nonmemory_operand" "rM")))]
+  ""
+  "srl\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "shiftop" "operand2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,J")))]
+  ""
+  "@
+   and\\t%0, %2
+   and3\\t%0, %1, %J2"
+  [(set_attr "length" "2,4")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,J")))]
+  ""
+  "@
+   or\\t%0, %2
+   or3\\t%0, %1, %J2"
+  [(set_attr "length" "2,4")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,J")))]
+  ""
+  "@
+   xor\\t%0, %2
+   xor3\\t%0, %1, %J2"
+  [(set_attr "length" "2,4")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "operands[2] = operands[1];
+   ")
+
+;; No separate insn for this; use NOR
+(define_insn "*one_cmplsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "nor\\t%0, %0"
+  [(set_attr "length" "2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit Manipulation
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "*bitop_be"
+  [(set (match_operand:QI 0 "mep_Y_operand" "=Y")
+	(subreg:QI (match_operator:SI 3 "mep_bit_operator"
+			[(subreg:SI (match_operand:QI 1 "mep_Y_operand" "0") 0)
+			 (match_operand 2 "immediate_operand" "n")])
+		   3)
+	)]
+  "TARGET_BIG_ENDIAN && TARGET_OPT_BITOPS
+   && rtx_equal_p (operands[0], operands[1])"
+  "b%L3m\\t%0, %b2"
+  [(set_attr "length" "2")])
+
+(define_insn "*bitop_le"
+  [(set (match_operand:QI 0 "mep_Y_operand" "=Y")
+	(subreg:QI (match_operator:SI 3 "mep_bit_operator"
+			[(subreg:SI (match_operand:QI 1 "mep_Y_operand" "0") 0)
+			 (match_operand 2 "immediate_operand" "n")])
+		   0)
+	)]
+  "!TARGET_BIG_ENDIAN && TARGET_OPT_BITOPS
+   && rtx_equal_p (operands[0], operands[1])"
+  "b%L3m\\t%0, %b2"
+  [(set_attr "length" "2")])
+
+(define_insn "btstm"
+  [(set (match_operand:SI 0 "mep_r0_operand" "=z")
+	(and:SI (subreg:SI (match_operand:QI 1 "mep_Y_operand" "Y") 0)
+		(match_operand 2 "immediate_operand" "n"))
+	)]
+  "TARGET_OPT_BITOPS && mep_bit_position_p (operands[2], 1)"
+  "btstm\\t%0, %1, %b2"
+  [(set_attr "length" "2")])
+
+(define_insn "tas"
+  [(parallel [(set (match_operand:SI 0 "mep_r0_operand" "=z")
+		   (zero_extend:SI (match_operand:QI 1 "mep_Y_operand" "+Y")))
+	      (set (match_dup 1)
+		   (const_int 1))
+	      ]
+	     )]
+  "TARGET_OPT_BITOPS"
+  "tas\\t%0, %1"
+  [(set_attr "length" "2")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "mep_r0_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "mep_Y_operand" "")))
+   (set (match_operand:QI 2 "register_operand" "")
+	(const_int 1))
+   (set (match_dup 1)
+	(match_dup 2))
+   ]
+  "TARGET_OPT_BITOPS"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:SI (match_dup 1)))
+	      (set (match_dup 1)
+		   (const_int 1))
+	      ])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "mep_r0_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "mep_Y_operand" "")))
+   (set (match_operand:QI 2 "register_operand" "")
+	(const_int 1))
+   (set (match_dup 1)
+	(match_dup 2))
+   ]
+  "TARGET_OPT_BITOPS"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:SI (match_dup 1)))
+	      (set (match_dup 1)
+		   (const_int 1))
+	      ])
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 3)))]
+  "operands[3] = gen_lowpart (QImode, operands[0]);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional branches and stores
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+				      [(match_operand:SI 1 "register_operand" "")
+				       (match_operand:SI 2 "nonmemory_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "emit_jump_insn (gen_branch_true (operands[3],
+			       mep_expand_cbranch (operands)));
+   DONE;")
+  
+(define_expand "branch_true"
+  [(set (pc)
+	(if_then_else (match_operand 1 "" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "")
+  
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+  "if (mep_expand_setcc (operands)) DONE; else FAIL;")
+
+;; ------------------------------------------------------------
+
+(define_insn "*slt"
+  [(set (match_operand:SI 0 "register_operand" "=z,z,r")
+	(lt:SI (match_operand:SI 1 "register_operand" "r,r,r")
+	    (match_operand:SI 2 "nonmemory_operand" "r,M,I")))]
+  ""
+  "slt3\\t%0, %1, %2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "*sltu"
+  [(set (match_operand:SI 0 "register_operand" "=z,z,r")
+	(ltu:SI (match_operand:SI 1 "register_operand" "r,r,r")
+	     (match_operand:SI 2 "nonmemory_operand" "r,M,J")))]
+  ""
+  "sltu3\\t%0, %1, %2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "*bcpeq_true"
+  [(set (pc)
+	(if_then_else (eq:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bcpeq\t0, %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "*bcpeq_false"
+  [(set (pc)
+	(if_then_else (eq:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bcpne\t0, %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "*bcpne_true"
+  [(set (pc)
+	(if_then_else (ne:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bcpne\t0, %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "*bcpne_false"
+  [(set (pc)
+	(if_then_else (ne:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bcpeq\t0, %l0"
+  [(set_attr "length" "4")])
+
+;; ??? The lengths here aren't correct, since no attempt it made to
+;; find "beqz" in the 256-byte range.  However, this should not affect
+;; bundling, since we never run core branches in parallel.
+
+(define_insn "mep_beq_true"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "* return mep_emit_cbranch (operands, 0);"
+  [(set_attr "length" "4")]  )
+
+(define_insn "*beq_false"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "* return mep_emit_cbranch (operands, 1);"
+  [(set_attr "length" "4")])
+
+(define_insn "mep_bne_true"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "* return mep_emit_cbranch (operands, 1); "
+  [(set_attr "length" "4")])
+
+(define_insn "*bne_false"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "* return mep_emit_cbranch (operands, 0); "
+  [(set_attr "length" "4")])
+
+(define_insn "mep_blti"
+  [(set (pc)
+	(if_then_else (lt (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_imm4_operand" "N"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "blti\\t%0, %1, %l2"
+  [(set_attr "length" "4")])
+
+(define_insn "*bgei"
+  [(set (pc)
+	(if_then_else (ge (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_imm4_operand" "N"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "bgei\\t%0, %1, %l2"
+  [(set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "" "")
+		    (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "
+{
+  mep_expand_call (operands, 0);
+  DONE;
+}")
+
+(define_insn "call_internal"
+  [(call (mem (match_operand:SI 0 "mep_call_address_operand" "R,r"))
+	 (match_operand:SI 1 "" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (match_operand:SI 3 "mep_tp_operand" "b,b"))
+   (use (match_operand:SI 4 "mep_gp_operand" "v,v"))
+   (clobber (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  ""
+{
+  static char const pattern[2][2][8] = 
+  {
+    { "bsrv\t%0", "jsrv\t%0" },
+    { "bsr\t%0", "jsr\t%0" }
+  };
+
+  return pattern[mep_vliw_mode_match (operands[2])][which_alternative];
+}
+  [(set_attr "length" "4,2")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand:QI 0 "" "")
+		    (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (reg:SI LP_REGNO))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "")
+
+(define_insn "*sibcall_internal"
+  [(call (mem (match_operand:SI 0 "mep_nearsym_operand" "s"))
+	 (match_operand:SI 1 "" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  "SIBLING_CALL_P (insn)"
+{
+  if (mep_vliw_jmp_match (operands[2]))
+    return "jmp\t%0";
+  else if (mep_vliw_mode_match (operands[2]))
+    return
+        "movu	$0, %0\n\
+	jmp	$0";
+  else
+    return
+	"ldc	$12, $lp\n\
+	movh	$11, %%hi(%0)\n\
+	xor3	$12, $12, 1\n\
+	add3	$11, $11, %%lo(%0+1)\n\
+	stc	$12, $lp\n\
+	jmp	$11";
+}
+  [(set_attr "length" "48")
+   (set_attr "slot" "multi")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "" "")
+		         (match_operand:SI 2 "" "")))
+	      (use (match_operand:SI 3 "" ""))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "
+{
+  mep_expand_call (operands, 1);
+  DONE;
+}")
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=rx,rx")
+	(call (mem:SI (match_operand:SI 1 "mep_call_address_operand" "R,r"))
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (match_operand:SI 4 "mep_tp_operand" "b,b"))
+   (use (match_operand:SI 5 "mep_gp_operand" "v,v"))
+   (clobber (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  ""
+{
+  static char const pattern[2][2][8] = 
+  {
+    { "bsrv\t%1", "jsrv\t%1" },
+    { "bsr\t%1", "jsr\t%1" }
+  };
+
+  return pattern[mep_vliw_mode_match (operands[3])][which_alternative];
+}
+  [(set_attr "length" "4,2")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "" "")
+		         (match_operand:SI 2 "" "")))
+	      (use (match_operand:SI 3 "" ""))
+	      (use (reg:SI LP_REGNO))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "")
+
+(define_insn "*sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "=rx")
+	(call (mem (match_operand:SI 1 "mep_nearsym_operand" "s"))
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  "SIBLING_CALL_P (insn)"
+{
+  if (mep_vliw_jmp_match (operands[3]))
+    return "jmp\t%1";
+  else if (mep_vliw_mode_match (operands[3]))
+    return
+        "movu	$0, %1\n\
+	jmp	$0";
+  else
+    return
+	"ldc	$12, $lp\n\
+	movh	$11, %%hi(%1)\n\
+	xor3	$12, $12, 1\n\
+	add3	$11, $11, %%lo(%1+1)\n\
+	stc	$12, $lp\n\
+	jmp	$11";
+}
+  [(set_attr "length" "48")
+   (set_attr "slot" "multi")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" ""))]
+  ""
+  "* return (REGNO (operands[0]) == LP_REGNO) ? \"ret\" : \"jmp\\t%0\";"
+  [(set_attr "length" "2")
+   (set_attr "stall" "ret")])
+
+(define_insn "eh_return_internal"
+  [(return)
+   (use (reg:SI 10))
+   (use (reg:SI 11))
+   (use (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  ""
+  "ret"
+  [(set_attr "length" "2")
+   (set_attr "stall" "ret")])
+
+;; The assembler replaces short jumps with long jumps as needed.
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bra\\t%l0"
+  [(set_attr "length" "4")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp\\t%0"
+  [(set_attr "length" "2")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp\\t%0"
+  [(set_attr "length" "2")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Low Overhead Looping
+;; ::
+;; ::::::::::::::::::::
+
+;; This insn is volatile because we'd like it to stay in its original
+;; position, just before the loop header.  If it stays there, we might
+;; be able to convert it into a "repeat" insn.
+(define_insn "doloop_begin_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand 2 "const_int_operand" "")] UNS_REPEAT_BEG))]
+  ""
+  { gcc_unreachable (); }
+  [(set_attr "length" "4")])
+
+(define_expand "doloop_begin"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand 1 "" ""))]
+  "!profile_arc_flag && TARGET_OPT_REPEAT"
+  "mep_emit_doloop (operands, 0);
+   DONE;
+  ")
+
+(define_insn "doloop_end_internal"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "+r,cxy,*m")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (unspec [(match_operand 2 "const_int_operand" "")] UNS_REPEAT_END)
+   (clobber (match_scratch:SI 3 "=X,&r,&r"))]
+  ""
+  { gcc_unreachable (); }
+  ;; Worst case length:
+  ;;
+  ;;      lw <op3>,<op0>	4
+  ;;      add <op3>,-1		2
+  ;;      sw <op3>,<op0>	4
+  ;;      jmp <op1>		4
+  ;; 1f:
+  [(set_attr "length" "14")
+   (set_attr "slot" "multi")])
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "nonimmediate_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  "!profile_arc_flag && TARGET_OPT_REPEAT"
+  "if (GET_CODE (operands[0]) == REG && GET_MODE (operands[0]) != SImode)
+     FAIL;
+   mep_emit_doloop (operands, 1);
+   DONE;
+  ")
+
+(define_insn "repeat"
+  [(set (reg:SI RPC_REGNO)
+	(unspec:SI [(match_operand:SI 0 "mep_r0_15_operand" "r")
+		    (match_operand:SI 1 "" "")]
+		   UNS_REPEAT_BEG))]
+  ""
+  "repeat\\t%0,%l1"
+  [(set_attr "length" "4")])
+
+(define_insn "repeat_end"
+  [(unspec [(const_int 0)] UNS_REPEAT_END)]
+  ""
+  "# repeat end"
+  [(set_attr "length" "0")])
+
+(define_insn "erepeat"
+  [(unspec [(match_operand 0 "" "")] UNS_EREPEAT_BEG)]
+  ""
+  "erepeat\\t%l0"
+  [(set_attr "length" "4")])
+
+(define_insn "erepeat_end"
+  [(unspec [(const_int 0)] UNS_EREPEAT_END)]
+  ""
+  "# erepeat end"
+  [(set_attr "length" "0")
+   (set_attr "slot" "multi")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  mep_expand_prologue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  mep_expand_epilogue ();
+  DONE;
+}")
+
+(define_expand "eh_return"
+  [(use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "
+{
+  mep_expand_eh_return (operands);
+  DONE;
+}")
+
+(define_insn_and_split "eh_epilogue"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNS_EH_EPILOGUE)
+   (use (reg:SI LP_REGNO))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 1)]
+  "mep_emit_eh_epilogue (operands); DONE;"
+  [(set_attr "slot" "multi")])
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+  "
+{
+  mep_expand_sibcall_epilogue ();
+  DONE;
+}")
+
+(define_insn "mep_bb_trace_ret"
+  [(unspec_volatile [(const_int 0)] UNS_BB_TRACE_RET)]
+  ""
+  "* return mep_emit_bb_trace_ret ();"
+  [(set_attr "slot" "multi")])
+
+(define_insn "mep_disable_int"
+  [(unspec_volatile [(const_int 0)] UNS_DISABLE_INT)]
+  ""
+  "di"
+  [(set_attr "length" "2")])
+
+(define_insn "mep_enable_int"
+  [(unspec_volatile [(const_int 0)] UNS_ENABLE_INT)]
+  ""
+  "ei"
+  [(set_attr "length" "2")])
+
+(define_insn "mep_reti"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNS_RETI)]
+  ""
+  "reti"
+  [(set_attr "length" "2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "2")])
+
+(define_insn "nop32"
+  [(const_int 1)]
+  ""
+  "or3\\t$0, $0, 0"
+  [(set_attr "length" "4")])
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNS_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "slot" "multi")])
+
+
+(define_insn "djmark"
+  [(unspec_volatile [(const_int 0)] 999)]
+  ""
+  "# dj"
+  [(set_attr "length" "0")
+   (set_attr "slot" "multi")])
+
diff --git a/gcc-4.9/gcc/config/mep/mep.opt b/gcc-4.9/gcc/config/mep/mep.opt
new file mode 100644
index 000000000..2b26ddc01
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/mep.opt
@@ -0,0 +1,164 @@
+; Target specific command line options for the MEP port of the compiler.
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat Inc.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+; 
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.  */
+
+Mask(IVC2)
+
+mabsdiff
+Target Mask(OPT_ABSDIFF)
+Enable absolute difference instructions
+
+mall-opts
+Target RejectNegative
+Enable all optional instructions
+
+maverage
+Target Mask(OPT_AVERAGE)
+Enable average instructions
+
+mbased=
+Target Joined Var(mep_based_cutoff) RejectNegative UInteger Init(0)
+Variables this size and smaller go in the based section. (default 0)
+
+mbitops
+Target Mask(OPT_BITOPS)
+Enable bit manipulation instructions
+
+mc=
+Target Joined Var(mep_const_section) RejectNegative
+Section to put all const variables in (tiny, near, far) (no default)
+
+mclip
+Target Mask(OPT_CLIP)
+Enable clip instructions
+
+mconfig=
+Target Joined Var(mep_config_string) RejectNegative
+Configuration name
+
+mcop
+Target Mask(COP)
+Enable MeP Coprocessor
+
+mcop32
+Target Mask(COP) RejectNegative
+Enable MeP Coprocessor with 32-bit registers
+
+mcop64
+Target Mask(64BIT_CR_REGS) RejectNegative
+Enable MeP Coprocessor with 64-bit registers
+
+mivc2
+Target RejectNegative Var(mep_deferred_options) Defer
+Enable IVC2 scheduling
+
+mdc
+Target Mask(DC) RejectNegative
+Const variables default to the near section
+
+mdebug
+Target Disabled Undocumented
+
+mdiv
+Target Mask(OPT_DIV)
+Enable 32-bit divide instructions
+
+meb
+Target InverseMask(LITTLE_ENDIAN) RejectNegative
+Use big-endian byte order
+
+mel
+Target Mask(LITTLE_ENDIAN) RejectNegative
+Use little-endian byte order
+
+mfar
+Driver RejectNegative
+
+mio-volatile
+Target Mask(IO_VOLATILE) 
+__io vars are volatile by default
+
+ml
+Target Mask(L) RejectNegative
+All variables default to the far section
+
+mleadz
+Target Mask(OPT_LEADZ)
+Enable leading zero instructions
+
+mlibrary
+Target Mask(LIBRARY) RejectNegative Undocumented
+
+mm
+Target Mask(M) RejectNegative
+All variables default to the near section
+
+mminmax
+Target Mask(OPT_MINMAX)
+Enable min/max instructions
+
+mmult
+Target Mask(OPT_MULT)
+Enable 32-bit multiply instructions
+
+mno-opts
+Target RejectNegative
+Disable all optional instructions
+
+mrand-tpgp
+Target Mask(RAND_TPGP) RejectNegative Undocumented
+
+mrepeat
+Target Mask(OPT_REPEAT)
+Allow gcc to use the repeat/erepeat instructions
+
+ms
+Target Mask(S) RejectNegative
+All variables default to the tiny section
+
+msatur
+Target Mask(OPT_SATUR)
+Enable saturation instructions
+
+msdram
+Target 
+Use sdram version of runtime
+
+msim
+Target RejectNegative
+Use simulator runtime
+
+msimnovec
+Target RejectNegative
+Use simulator runtime without vectors
+
+mtf
+Target Mask(TF) RejectNegative
+All functions default to the far section
+
+mtiny=
+Target Joined Var(mep_tiny_cutoff) RejectNegative UInteger Init(4)
+Variables this size and smaller go in the tiny section. (default 4)
+
+mvl32
+Target InverseMask(OPT_VL64) Undocumented RejectNegative
+
+mvl64
+Target Mask(OPT_VL64) Undocumented RejectNegative
+
+mvliw
+Target Mask(VLIW) Undocumented
diff --git a/gcc-4.9/gcc/config/mep/predicates.md b/gcc-4.9/gcc/config/mep/predicates.md
new file mode 100644
index 000000000..0e13117e0
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/predicates.md
@@ -0,0 +1,184 @@
+;; Toshiba Media Processor Machine predicates
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; (define_predicate "cgen_h_uint_7a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_6a2_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_22a4_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_2a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_24a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_6a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_5a4_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_2a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_16a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_3a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_5a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_16a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_5a8_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_4a1_immediate"
+;;    (match_code "const_int"))
+
+(define_predicate "cgen_h_sint_7a2_immediate"
+   (match_code "const_int")
+   { int i = INTVAL (op);
+     return ((i & 1) == 0 && i >= -128 && i < 128);
+   })
+
+(define_predicate "cgen_h_sint_6a4_immediate"
+   (match_code "const_int")
+   { int i = INTVAL (op);
+     return ((i & 3) == 0 && i >= -256 && i < 256);
+   })
+
+;; This is used below, to simplify things.
+(define_predicate "mep_subreg_operand"
+  (ior
+   (and (and (and (match_code "subreg")
+		  (match_code "reg" "0"))
+	     (match_test "REGNO (SUBREG_REG (op)) >= FIRST_PSEUDO_REGISTER"))
+	(match_test "!(reload_completed || reload_in_progress)"))
+   (and (match_code "reg")
+	(match_test "REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_predicate "symbolic_operand"
+  (match_code "const,symbol_ref,label_ref"))
+
+(define_predicate "mep_farsym_operand"
+  (and (match_code "const,symbol_ref")
+       (match_test "mep_section_tag (op) == 'f'")))
+
+(define_predicate "mep_nearsym_operand"
+  (and (match_code "const,symbol_ref,label_ref")
+       (match_test "mep_section_tag (op) != 'f'")))
+
+(define_predicate "mep_movdest_operand"
+  (and (match_test "mep_section_tag (op) != 'f'")
+       (match_operand 0 "nonimmediate_operand")))
+
+(define_predicate "mep_r0_15_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "GR_REGNO_P (REGNO (op))"))))
+
+(define_predicate "mep_r0_operand"
+  (and (match_code "reg")
+       (ior (match_test "REGNO (op) == 0")
+	    (match_test "!(reload_completed || reload_in_progress)
+		         && REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_predicate "mep_hi_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == HI_REGNO"))))
+
+(define_predicate "mep_lo_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == LO_REGNO"))))
+
+(define_predicate "mep_tp_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == TP_REGNO"))))
+
+(define_predicate "mep_gp_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == GP_REGNO"))))
+
+(define_predicate "mep_sp_operand"
+  (match_test "op == stack_pointer_rtx"))
+
+(define_predicate "mep_tprel_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) < 8"))))
+
+(define_predicate "mep_call_address_operand"
+  (and (match_test "mep_section_tag (op) != 'f'")
+       (and (ior (not (match_code "symbol_ref"))
+		 (match_test "mep_section_tag (DECL_RTL (cfun->decl)) != 'f'
+			      && !mep_lookup_pragma_call (XSTR (op, 0))"))
+	    (match_code "symbol_ref,reg"))))
+
+(define_predicate "mep_Y_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+(define_predicate "mep_imm4_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
+
+(define_predicate "mep_reg_or_imm4_operand"
+  (ior (match_code "reg")
+       (and (match_code "const_int")
+	    (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))))
+
+(define_predicate "mep_imm7a4_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 128 && INTVAL (op) % 4 == 0")))
+
+(define_predicate "mep_slad_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4")))
+
+(define_predicate "mep_add_operand"
+  (ior (and (match_code "const")
+	    (and (match_operand 0 "symbolic_operand")
+		 (and (match_test "mep_section_tag(op) == 'b' || mep_section_tag(op) == 't'")
+		      (ior (match_code "unspec" "0")
+			   (and (match_code "plus" "0")
+				(match_code "unspec" "00"))))))
+       (match_code "const_int,reg")))
+
+;; Return true if OP is an integer in the range 0..7 inclusive.
+;; On the MeP-h1, shifts by such constants execute in a single stage
+;; and shifts by larger values execute in two.
+(define_predicate "mep_single_shift_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
+
+;; Return true if OP is an operation that can be performed using bsetm,
+;; bclrm or bnotm.  The possibilities are:
+
+;; bsetm: (ior X Y), Y has one bit set
+;; bclrm: (and X Y), Y has one bit clear
+;; bnotm: (xor X Y), Y has one bit set.
+(define_predicate "mep_bit_operator"
+  (and (match_code "and,ior,xor")
+       (match_test "mep_bit_position_p (XEXP (op, 1), GET_CODE (op) != AND)")))
+
+(define_predicate "mep_reload_operand"
+  (ior (and (match_code "reg")
+	    (match_test "!ANY_CONTROL_REGNO_P (REGNO (op))"))
+       (and (match_code "mem,symbol_ref")
+	    (match_test "mep_section_tag (op) != 'f'"))))
diff --git a/gcc-4.9/gcc/config/mep/t-mep b/gcc-4.9/gcc/config/mep/t-mep
new file mode 100644
index 000000000..82ccabd73
--- /dev/null
+++ b/gcc-4.9/gcc/config/mep/t-mep
@@ -0,0 +1,68 @@
+# -*- makefile -*-
+# GCC makefile fragment for MeP
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Contributed by Red Hat Inc
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  */
+
+# Force genpreds to be rebuilt in case MeP-Integrator changed the predicates
+
+GTM_H = tm.h $(tm_file_list) $(srcdir)/config/mep/mep-intrin.h insn-constants.h
+
+TCFLAGS = -mlibrary
+
+mep-pragma.o: $(srcdir)/config/mep/mep-pragma.c $(CONFIG_H) $(SYSTEM_H) \
+	coretypes.h $(TM_H) $(TREE_H) $(RTL_H) $(C_PRAGMA_H) \
+	$(CPPLIB_H) hard-reg-set.h output.h $(srcdir)/config/mep/mep-protos.h \
+	function.h insn-config.h reload.h $(TARGET_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+MULTILIB_OPTIONS = mel mall-opts mfar
+MULTILIB_DIRNAMES = el allopt far
+
+MD_INCLUDES = \
+	$(srcdir)/config/mep/intrinsics.md \
+	$(srcdir)/config/mep/predicates.md \
+	$(srcdir)/config/mep/constraints.md
+
+mep.o : $(srcdir)/config/mep/mep-intrin.h dumpfile.h
+
+# begin-isas
+MEP_CORE = ext_core1
+MEP_COPRO = ext_cop1_16,ext_cop1_32,ext_cop1_48,ext_cop1_64
+# end-isas
+
+# To use this, you must have cgen and cgen/cpu in the same source tree as
+# gcc.
+cgen-maint :
+	S=`cd $(srcdir); pwd`; \
+	cd $$S/config/mep && \
+	guile -s $$S/../cgen/cgen-intrinsics.scm \
+		-s $$S/../cgen \
+		$(CGENFLAGS) \
+		-a $$S/../cgen/cpu/mep.cpu \
+		-m mep,c5 \
+		-i mep,$(MEP_CORE),$(MEP_COPRO) \
+		-K mep,$(MEP_CORE),$(MEP_COPRO) \
+		-M intrinsics.md \
+		-N mep-intrin.h \
+		-P intrinsics.h
+
+# start-extra-headers
+EXTRA_HEADERS = $(srcdir)/config/mep/intrinsics.h \
+	$(srcdir)/config/mep/default.h
+# end-extra-headers
diff --git a/gcc-4.9/gcc/config/microblaze/constraints.md b/gcc-4.9/gcc/config/microblaze/constraints.md
new file mode 100644
index 000000000..41ca4694e
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/constraints.md
@@ -0,0 +1,77 @@
+;; Constraint definitions for Xilinx MicroBlaze processors.
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+;; Contributed by Michael Eager <eager@eagercon.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>. 
+
+(define_register_constraint "d" "GR_REGS"
+  "A general register.")
+
+(define_register_constraint "z" "ST_REGS"
+  "A status register.")
+
+;; Define integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant."
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (ival)")))
+
+(define_constraint "J"
+  "Integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "M"
+  "A constant which needs two instructions to load."
+  (and (match_code "const_int")
+       (match_test "LARGE_OPERAND (ival)")))
+
+(define_constraint "N"
+  "A constant in the range -65535 to -1 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) (ival + 0xffff) < 0xffff")))
+
+(define_constraint "P"
+  "A constant in the range 1 to 65535 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "ival > 0 && ival < 0x10000")))
+
+;; Define floating point constraints
+
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Define memory constraints
+
+(define_memory_constraint "R"
+  "Memory operand which fits in single instruction."
+  (and (match_code "mem")
+       (match_test "simple_memory_operand (op, GET_MODE (op))")))
+
+(define_memory_constraint "T"
+  "Double word operand."
+  (and (match_code "mem")
+       (match_test "double_memory_operand (op, GET_MODE (op))")))
+
+(define_memory_constraint "Q"
+  "Memory operand which is a single register."
+  (and (match_code "mem")
+       (match_test "GET_CODE ( XEXP (op, 0)) == REG")))
diff --git a/gcc-4.9/gcc/config/microblaze/linux.h b/gcc-4.9/gcc/config/microblaze/linux.h
new file mode 100644
index 000000000..48038d560
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/linux.h
@@ -0,0 +1,45 @@
+/* Definitions for MicroBlaze running Linux.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_SUPPORTS_PIC
+#define TARGET_SUPPORTS_PIC 1
+
+#undef TLS_NEEDS_GOT
+#define TLS_NEEDS_GOT 1
+
+#define DYNAMIC_LINKER "/lib/ld.so.1"
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "dynamic_linker", DYNAMIC_LINKER }
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker %(dynamic_linker)} \
+    %{static:-static}} \
+  %{mbig-endian:-EB} \
+  %{mlittle-endian:-EL}"
+
+/* For the microblaze-*-linux* subtarget.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
diff --git a/gcc-4.9/gcc/config/microblaze/microblaze-c.c b/gcc-4.9/gcc/config/microblaze/microblaze-c.c
new file mode 100644
index 000000000..339ecac44
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/microblaze-c.c
@@ -0,0 +1,105 @@
+/* Subroutines used for the C front end for Xilinx MicroBlaze.
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "tm_p.h"
+#include "target.h"
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Define preprocessor symbols for MicroBlaze.  
+   Symbols which do not start with __ are deprecated.  */
+
+void 
+microblaze_cpp_define (cpp_reader *pfile)
+{
+  builtin_assert ("cpu=microblaze");
+  builtin_assert ("machine=microblaze");
+  builtin_define ("__MICROBLAZE__");
+  builtin_define ("__microblaze__");
+  if (TARGET_LITTLE_ENDIAN)
+    {
+      builtin_define ("_LITTLE_ENDIAN");
+      builtin_define ("__LITTLE_ENDIAN__");
+      builtin_define ("__MICROBLAZEEL__");
+    }
+  else
+    {
+      builtin_define ("_BIG_ENDIAN");
+      builtin_define ("__BIG_ENDIAN__");
+      builtin_define ("__MICROBLAZEEB__");
+    }
+  if (!TARGET_SOFT_MUL) 
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_MUL");
+      builtin_define ("__HAVE_HW_MUL__");
+    }
+  if (TARGET_MULTIPLY_HIGH)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_MUL_HIGH");
+      builtin_define ("__HAVE_HW_MUL_HIGH__");
+    }
+  if (!TARGET_SOFT_DIV)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_DIV");
+      builtin_define ("__HAVE_HW_DIV__");
+    }
+  if (TARGET_BARREL_SHIFT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_BSHIFT");
+      builtin_define ("__HAVE_HW_BSHIFT__");
+    }
+  if (TARGET_PATTERN_COMPARE)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_PCMP");
+      builtin_define ("__HAVE_HW_PCMP__");
+    }
+  if (TARGET_HARD_FLOAT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_FPU");
+      builtin_define ("__HAVE_HW_FPU__");
+    }
+  if (TARGET_FLOAT_CONVERT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_FPU_CONVERT");
+      builtin_define ("__HAVE_HW_FPU_CONVERT__");
+    }
+  if (TARGET_FLOAT_SQRT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_FPU_SQRT");
+      builtin_define ("__HAVE_HW_FPU_SQRT__");
+    }
+}  
diff --git a/gcc-4.9/gcc/config/microblaze/microblaze-protos.h b/gcc-4.9/gcc/config/microblaze/microblaze-protos.h
new file mode 100644
index 000000000..b03e9e126
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/microblaze-protos.h
@@ -0,0 +1,62 @@
+/* Definitions of target machine for GNU compiler, for Xilinx MicroBlaze.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MICROBLAZE_PROTOS_H
+#define GCC_MICROBLAZE_PROTOS_H
+
+#ifdef RTX_CODE
+extern int pic_address_needs_scratch (rtx);
+extern void expand_block_move        (rtx *);
+extern void microblaze_expand_prologue (void);
+extern void microblaze_expand_epilogue (void);
+extern void override_options (void);
+extern int microblaze_expand_shift (rtx *);
+extern bool microblaze_expand_move (enum machine_mode, rtx *);
+extern bool microblaze_expand_block_move (rtx, rtx, rtx, rtx);
+extern void microblaze_expand_divide (rtx *);
+extern void microblaze_expand_conditional_branch (enum machine_mode, rtx *); 
+extern void microblaze_expand_conditional_branch_sf (rtx *); 
+extern int microblaze_can_use_return_insn (void);
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void init_cumulative_args (CUMULATIVE_ARGS *,tree, rtx);
+extern bool microblaze_legitimate_address_p (enum machine_mode, rtx, bool);
+extern int microblaze_is_interrupt_variant (void);
+extern rtx microblaze_return_addr (int, rtx);
+extern int simple_memory_operand (rtx, enum machine_mode);
+extern int double_memory_operand (rtx, enum machine_mode);
+
+extern int microblaze_regno_ok_for_base_p (int, int);
+extern HOST_WIDE_INT microblaze_initial_elimination_offset (int, int);
+extern void microblaze_declare_object (FILE *, const char *, const char *,
+   const char *, int);
+extern void microblaze_asm_output_ident (const char *);
+extern int microblaze_legitimate_pic_operand (rtx);
+extern bool microblaze_tls_referenced_p (rtx);
+extern int symbol_mentioned_p (rtx);
+extern int label_mentioned_p (rtx);
+extern bool microblaze_cannot_force_const_mem (enum machine_mode, rtx);
+#endif  /* RTX_CODE */
+
+/* Declare functions in microblaze-c.c.  */
+extern void microblaze_cpp_define (struct cpp_reader *); 
+
+#endif  /* GCC_MICROBLAZE_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/microblaze/microblaze.c b/gcc-4.9/gcc/config/microblaze/microblaze.c
new file mode 100644
index 000000000..ba8109bc1
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/microblaze.c
@@ -0,0 +1,3594 @@
+/* Subroutines used for code generation on Xilinx MicroBlaze.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "function.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "output.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "gstab.h"
+#include "df.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "cgraph.h"
+
+#define MICROBLAZE_VERSION_COMPARE(VA,VB) strcasecmp (VA, VB)
+
+/* Classifies an address.
+
+ADDRESS_INVALID
+An invalid address.
+
+ADDRESS_REG
+
+A natural register or a register + const_int offset address.  
+The register satisfies microblaze_valid_base_register_p and the 
+offset is a const_arith_operand.
+
+ADDRESS_REG_INDEX
+
+A natural register offset by the index contained in an index register. The base
+register satisfies microblaze_valid_base_register_p and the index register
+satisfies microblaze_valid_index_register_p
+
+ADDRESS_CONST_INT
+
+A signed 16/32-bit constant address.
+
+ADDRESS_SYMBOLIC:
+
+A constant symbolic address or a (register + symbol).  */
+
+enum microblaze_address_type
+{
+  ADDRESS_INVALID,
+  ADDRESS_REG,
+  ADDRESS_REG_INDEX,
+  ADDRESS_CONST_INT,
+  ADDRESS_SYMBOLIC,
+  ADDRESS_GOTOFF,
+  ADDRESS_PLT,
+  ADDRESS_TLS
+};
+
+/* Classifies symbols
+
+SYMBOL_TYPE_GENERAL
+        
+A general symbol.  */
+enum microblaze_symbol_type
+{
+  SYMBOL_TYPE_INVALID,
+  SYMBOL_TYPE_GENERAL
+};
+
+/* TLS Address Type.  */
+enum tls_reloc {
+  TLS_GD,
+  TLS_LDM,
+  TLS_DTPREL,
+  TLS_IE,
+  TLS_LE
+};
+
+/* Classification of a MicroBlaze address.  */
+struct microblaze_address_info
+{
+  enum microblaze_address_type type;
+  rtx regA; 	/* Contains valid values on ADDRESS_REG, ADDRESS_REG_INDEX, 
+     		   ADDRESS_SYMBOLIC.  */
+  rtx regB; 	/* Contains valid values on ADDRESS_REG_INDEX.  */
+  rtx offset; 	/* Contains valid values on ADDRESS_CONST_INT and ADDRESS_REG.  */
+  rtx symbol; 	/* Contains valid values on ADDRESS_SYMBOLIC.  */
+  enum microblaze_symbol_type symbol_type;
+  enum tls_reloc tls_type;
+};
+
+/* Structure to be filled in by compute_frame_size with register
+   save masks, and offsets for the current function.  */
+
+struct GTY(()) microblaze_frame_info {
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  int link_debug_size;		/* # bytes for the link reg and back pointer.  */
+  int gp_reg_size;		/* # bytes needed to store gp regs.  */
+  long gp_offset;		/* offset from new sp to store gp registers.  */
+  long mask;			/* mask of saved gp registers.  */
+  int initialized;		/* != 0 if frame size already calculated.  */
+  int num_gp;			/* number of gp registers saved.  */
+  long insns_len;		/* length of insns.  */
+  int alloc_stack;		/* Flag to indicate if the current function 
+				   must not create stack space. (As an optimization).  */
+};
+
+/* Global variables for machine-dependent things.  */
+
+/* Toggle which pipleline interface to use.  */
+static GTY(()) int microblaze_sched_use_dfa = 0;
+
+/* Threshold for data being put into the small data/bss area, instead
+   of the normal data area (references to the small data/bss area take
+   1 instruction, and use the global pointer, references to the normal
+   data area takes 2 instructions).  */
+int microblaze_section_threshold = -1;
+
+/* Prevent scheduling potentially exception causing instructions in 
+   delay slots.  -mcpu=v3.00.a or v4.00.a turns this on.  */
+int microblaze_no_unsafe_delay;
+
+/* Set to one if the targeted core has the CLZ insn.  */
+int microblaze_has_clz = 0;
+
+/* Which CPU pipeline do we use. We haven't really standardized on a CPU 
+   version having only a particular type of pipeline. There can still be 
+   options on the CPU to scale pipeline features up or down. :( 
+   Bad Presentation (??), so we let the MD file rely on the value of 
+   this variable instead Making PIPE_5 the default. It should be backward 
+   optimal with PIPE_3 MicroBlazes.  */
+enum pipeline_type microblaze_pipe = MICROBLAZE_PIPE_5;
+
+/* High and low marks for floating point values which we will accept
+   as legitimate constants for TARGET_LEGITIMATE_CONSTANT_P.  These are
+   initialized in override_options.  */
+REAL_VALUE_TYPE dfhigh, dflow, sfhigh, sflow;
+
+/* Array giving truth value on whether or not a given hard register
+   can support a given mode.  */
+char microblaze_hard_regno_mode_ok[(int)MAX_MACHINE_MODE]
+				  [FIRST_PSEUDO_REGISTER];
+
+/* Current frame information calculated by compute_frame_size.  */
+struct microblaze_frame_info current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+struct microblaze_frame_info zero_frame_info;
+
+/* List of all MICROBLAZE punctuation characters used by print_operand.  */
+char microblaze_print_operand_punct[256];
+
+/* Map GCC register number to debugger register number.  */
+int microblaze_dbx_regno[FIRST_PSEUDO_REGISTER];
+
+/* Map hard register number to register class.  */
+enum reg_class microblaze_regno_to_class[] =
+{
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  ST_REGS,	GR_REGS,	GR_REGS,	GR_REGS
+};
+
+/* MicroBlaze specific machine attributes.
+   interrupt_handler - Interrupt handler attribute to add interrupt prologue 
+		       and epilogue and use appropriate interrupt return.
+   save_volatiles    - Similar to interrupt handler, but use normal return.  */
+int interrupt_handler;
+int fast_interrupt;
+int save_volatiles;
+
+const struct attribute_spec microblaze_attribute_table[] = {
+  /* name         min_len, max_len, decl_req, type_req, fn_type, req_handler,
+     affects_type_identity */
+  {"interrupt_handler", 0,       0,     true,    false,   false,        NULL,
+    false },
+  {"fast_interrupt",    0,       0,     true,    false,   false,        NULL,
+    false },
+  {"save_volatiles"   , 0,       0,     true,    false,   false,        NULL,
+    false },
+  { NULL,        	0,       0,    false,    false,   false,        NULL,
+    false }
+};
+
+static int microblaze_interrupt_function_p (tree);
+
+section *sdata2_section;
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+/* Return truth value if a CONST_DOUBLE is ok to be a legitimate constant.  */
+static bool
+microblaze_const_double_ok (rtx op, enum machine_mode mode)
+{
+  REAL_VALUE_TYPE d;
+
+  if (GET_CODE (op) != CONST_DOUBLE)
+    return 0;
+
+  if (GET_MODE (op) == VOIDmode)
+    return 1;
+
+  if (mode != SFmode && mode != DFmode)
+    return 0;
+
+  if (op == CONST0_RTX (mode))
+    return 1;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (d, op);
+
+  if (REAL_VALUE_ISNAN (d))
+    return FALSE;
+
+  if (REAL_VALUE_NEGATIVE (d))
+    d = real_value_negate (&d);
+
+  if (mode == DFmode)
+    {
+      if (REAL_VALUES_LESS (d, dfhigh) && REAL_VALUES_LESS (dflow, d))
+	return 1;
+    }
+  else
+    {
+      if (REAL_VALUES_LESS (d, sfhigh) && REAL_VALUES_LESS (sflow, d))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return truth value if a memory operand fits in a single instruction
+   (ie, register + small offset) or (register + register).  */
+
+int
+simple_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx addr, plus0, plus1;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* dword operations really put out 2 instructions, so eliminate them.  */
+  /* ??? This isn't strictly correct.  It is OK to accept multiword modes
+     here, since the length attributes are being set correctly, but only
+     if the address is offsettable.  */
+  if (GET_MODE_SIZE (GET_MODE (op)) > UNITS_PER_WORD)
+    return 0;
+
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+
+    {
+    case REG:
+      return 1;
+
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+
+      if (GET_CODE (plus0) != REG)
+        return 0;
+
+      if (GET_CODE (plus0) == REG && GET_CODE (plus1) == CONST_INT
+	  && SMALL_INT (plus1))
+	{
+	  return 1;
+	}
+      else if (GET_CODE (plus1) == REG && GET_CODE (plus0) == CONST_INT)
+	{
+	  return 1;
+	}
+      else if (GET_CODE (plus0) == REG && GET_CODE (plus1) == REG)
+	{
+	  return 1;
+	}
+      else
+	return 0;
+
+    case SYMBOL_REF:
+      return 0;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Return nonzero for a memory address that can be used to load or store
+   a doubleword.  */
+
+int
+double_memory_operand (rtx op, enum machine_mode mode)
+{
+  rtx addr;
+
+  if (GET_CODE (op) != MEM || !memory_operand (op, mode))
+    {
+      /* During reload, we accept a pseudo register if it has an
+         appropriate memory address.  If we don't do this, we will
+         wind up reloading into a register, and then reloading that
+         register from memory, when we could just reload directly from
+         memory.  */
+      if (reload_in_progress
+	  && GET_CODE (op) == REG
+	  && REGNO (op) >= FIRST_PSEUDO_REGISTER
+	  && reg_renumber[REGNO (op)] < 0
+	  && reg_equiv_mem (REGNO (op)) != 0
+	  && double_memory_operand (reg_equiv_mem (REGNO (op)), mode))
+	return 1;
+      return 0;
+    }
+
+  /* Make sure that 4 added to the address is a valid memory address.
+     This essentially just checks for overflow in an added constant.  */
+
+  addr = XEXP (op, 0);
+
+  if (CONSTANT_ADDRESS_P (addr))
+    return 1;
+
+  return memory_address_p ((GET_MODE_CLASS (mode) == MODE_INT
+			    ? SImode : SFmode),
+			   plus_constant (Pmode, addr, 4));
+}
+
+/* Implement REG_OK_FOR_BASE_P -and- REG_OK_FOR_INDEX_P.  */
+int
+microblaze_regno_ok_for_base_p (int regno, int strict)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!strict)
+	return true;
+      regno = reg_renumber[regno];
+    }
+
+  /* These fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
+    return true;
+
+  return GP_REG_P (regno);
+}
+
+/* Return true if X is a valid base register for the given mode.
+   Allow only hard registers if STRICT.  */
+
+static bool
+microblaze_valid_base_register_p (rtx x,
+				  enum machine_mode mode ATTRIBUTE_UNUSED,
+				  int strict)
+{
+  if (!strict && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (GET_CODE (x) == REG
+	  && microblaze_regno_ok_for_base_p (REGNO (x), strict));
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+/* Return TRUE if X is a thread-local symbol.  */
+bool
+microblaze_tls_symbol_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+static int
+microblaze_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+     TLS offsets, not real symbol references.  */
+  if (GET_CODE (*x) == UNSPEC && XINT (*x, 1) == UNSPEC_TLS)
+    return -1;
+
+  return 0;
+}
+
+/* Return TRUE if X contains any TLS symbol references.  */
+
+bool
+microblaze_tls_referenced_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, microblaze_tls_operand_p_1, NULL);
+}
+
+bool
+microblaze_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return microblaze_tls_referenced_p(x);
+}
+
+/* Return TRUE if X references a SYMBOL_REF.  */
+int
+symbol_mentioned_p (rtx x)
+{
+  const char * fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return 1;
+
+  /* UNSPEC entries for a symbol include the SYMBOL_REF, but they
+     are constant offsets, not symbols.  */
+  if (GET_CODE (x) == UNSPEC)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+        {
+          int j;
+
+          for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+            if (symbol_mentioned_p (XVECEXP (x, i, j)))
+              return 1;
+        }
+      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
+        return 1;
+    }
+
+  return 0;
+}
+
+/* Return TRUE if X references a LABEL_REF.  */
+int
+label_mentioned_p (rtx x)
+{
+  const char * fmt;
+  int i;
+
+  if (GET_CODE (x) == LABEL_REF)
+    return 1;
+
+  /* UNSPEC entries for a symbol include a LABEL_REF for the referencing
+     instruction, but they are constant offsets, not symbols.  */
+  if (GET_CODE (x) == UNSPEC)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+        {
+          int j;
+
+          for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+            if (label_mentioned_p (XVECEXP (x, i, j)))
+              return 1;
+        }
+      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
+        return 1;
+    }
+
+  return 0;
+}
+
+int
+tls_mentioned_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+      case CONST:
+        return tls_mentioned_p (XEXP (x, 0));
+
+      case UNSPEC:
+        if (XINT (x, 1) == UNSPEC_TLS)
+          return 1;
+
+      default:
+        return 0;
+    }
+}
+
+static rtx
+load_tls_operand (rtx x, rtx reg)
+{
+  rtx tmp;
+
+  if (reg == NULL_RTX)
+    reg = gen_reg_rtx (Pmode);
+
+  tmp = gen_rtx_CONST (Pmode, x);
+
+  emit_insn (gen_rtx_SET (VOIDmode, reg,
+                          gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmp)));
+
+  return reg;
+}
+
+static rtx
+microblaze_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
+{
+  rtx insns, tls_entry;
+
+  df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+  start_sequence ();
+
+  tls_entry = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+                              UNSPEC_TLS);
+
+  reg = load_tls_operand (tls_entry, reg);
+
+  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
+                                     LCT_PURE, /* LCT_CONST?  */
+                                     Pmode, 1, reg, Pmode);
+
+  insns = get_insns ();
+  end_sequence ();
+
+  return insns;
+}
+
+rtx
+microblaze_legitimize_tls_address(rtx x, rtx reg)
+{
+  rtx dest, insns, ret, eqv, addend;
+  enum tls_model model;
+  model = SYMBOL_REF_TLS_MODEL (x);
+
+  switch (model)
+    {
+       case TLS_MODEL_LOCAL_DYNAMIC:
+       case TLS_MODEL_GLOBAL_DYNAMIC:
+       case TLS_MODEL_INITIAL_EXEC:
+         insns = microblaze_call_tls_get_addr (x, reg, &ret, TLS_GD);
+         dest = gen_reg_rtx (Pmode);
+         emit_libcall_block (insns, dest, ret, x);
+         break;
+
+       case TLS_MODEL_LOCAL_EXEC:
+         insns = microblaze_call_tls_get_addr (x, reg, &ret, TLS_LDM);
+
+         /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+            share the LDM result with other LD model accesses.  */
+         eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_TLS);
+         dest = gen_reg_rtx (Pmode);
+         emit_libcall_block (insns, dest, ret, eqv);
+
+         /* Load the addend.  */
+         addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_DTPREL)),
+				  UNSPEC_TLS);
+         addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
+         dest = gen_rtx_PLUS (Pmode, dest, addend);
+         break;
+
+       default:
+         gcc_unreachable ();
+    }
+  return dest;
+}
+
+static bool
+microblaze_classify_unspec (struct microblaze_address_info *info, rtx x)
+{
+  info->symbol_type = SYMBOL_TYPE_GENERAL;
+  info->symbol = XVECEXP (x, 0, 0);
+
+  if (XINT (x, 1) == UNSPEC_GOTOFF)
+    {
+      info->regA = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
+      info->type = ADDRESS_GOTOFF;
+    }
+  else if (XINT (x, 1) == UNSPEC_PLT)
+    {
+      info->type = ADDRESS_PLT;
+    }
+  else if (XINT (x, 1) == UNSPEC_TLS)
+    {
+      info->type = ADDRESS_TLS;
+      info->tls_type = tls_reloc INTVAL(XVECEXP(x, 0, 1));
+    }
+  else
+    {
+      return false;
+    }
+  return true;
+}
+
+
+/* Return true if X is a valid index register for the given mode.
+   Allow only hard registers if STRICT.  */
+
+static bool
+microblaze_valid_index_register_p (rtx x,
+				   enum machine_mode mode ATTRIBUTE_UNUSED,
+				   int strict)
+{
+  if (!strict && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (GET_CODE (x) == REG
+	  /* A base register is good enough to be an index register on MicroBlaze.  */
+	  && microblaze_regno_ok_for_base_p (REGNO (x), strict));
+}
+
+/* Get the base register for accessing a value from the memory or
+   Symbol ref. Used for MicroBlaze Small Data Area Pointer Optimization.  */
+static int
+get_base_reg (rtx x)
+{
+  tree decl;
+  int base_reg;
+
+  if (!flag_pic || microblaze_tls_symbol_p(x))
+    base_reg = MB_ABI_BASE_REGNUM;
+  else if (flag_pic)
+    base_reg = MB_ABI_PIC_ADDR_REGNUM;
+
+  if (TARGET_XLGPOPT
+      && GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_SMALL_P (x) && (decl = SYMBOL_REF_DECL (x)) != NULL)
+    {
+      if (TREE_READONLY (decl))
+	base_reg = MB_ABI_GPRO_REGNUM;
+      else
+	base_reg = MB_ABI_GPRW_REGNUM;
+    }
+
+  return base_reg;
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT is true if we should only accept
+   hard base registers.  
+
+      type                     regA      regB    offset      symbol
+
+   ADDRESS_INVALID             NULL      NULL     NULL        NULL
+
+   ADDRESS_REG                 %0        NULL     const_0 /   NULL
+                                                  const_int
+   ADDRESS_REG_INDEX           %0        %1       NULL        NULL
+
+   ADDRESS_SYMBOLIC            r0 /      NULL     NULL        symbol    
+                           sda_base_reg 
+
+   ADDRESS_CONST_INT           r0       NULL      const       NULL
+
+   For modes spanning multiple registers (DFmode in 32-bit GPRs,
+   DImode, TImode), indexed addressing cannot be used because
+   adjacent memory cells are accessed by adding word-sized offsets
+   during assembly output.  */
+
+static bool
+microblaze_classify_address (struct microblaze_address_info *info, rtx x,
+			     enum machine_mode mode, int strict)
+{
+  rtx xplus0;
+  rtx xplus1;
+
+  info->type = ADDRESS_INVALID;
+  info->regA = NULL;
+  info->regB = NULL;
+  info->offset = NULL;
+  info->symbol = NULL;
+  info->symbol_type = SYMBOL_TYPE_INVALID;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      {
+	info->type = ADDRESS_REG;
+	info->regA = x;
+	info->offset = const0_rtx;
+	return microblaze_valid_base_register_p (info->regA, mode, strict);
+      }
+    case PLUS:
+      {
+	xplus0 = XEXP (x, 0);
+	xplus1 = XEXP (x, 1);
+
+	if (microblaze_valid_base_register_p (xplus0, mode, strict))
+	  {
+	    info->type = ADDRESS_REG;
+	    info->regA = xplus0;
+
+	    if (GET_CODE (xplus1) == CONST_INT)
+	      {
+		info->offset = xplus1;
+		return true;
+	      }
+	    else if (GET_CODE (xplus1) == UNSPEC)
+	      {
+		/* Need offsettable address.  */
+		if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+		  return false;
+
+		return microblaze_classify_unspec (info, xplus1);
+	      }
+	    else if ((GET_CODE (xplus1) == SYMBOL_REF ||
+		      GET_CODE (xplus1) == LABEL_REF))
+	      {
+		if (flag_pic == 2 || microblaze_tls_symbol_p(xplus1))
+		  return false;
+		info->type = ADDRESS_SYMBOLIC;
+		info->symbol = xplus1;
+		info->symbol_type = SYMBOL_TYPE_GENERAL;
+		return true;
+	      }
+	    else if (GET_CODE (xplus1) == CONST)
+	      {
+		rtx xconst0 = XEXP(xplus1, 0);
+
+		/* base + unspec.  */
+		if (GET_CODE (xconst0) == UNSPEC)
+		  {
+		    /* Need offsettable address.  */
+		    if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+		      return false;
+		    return microblaze_classify_unspec(info, xconst0);
+		  }
+
+		/* for (plus x const_int) just look at x.  */
+		if (GET_CODE (xconst0) == PLUS
+		    && GET_CODE (XEXP (xconst0, 1)) == CONST_INT
+		    && SMALL_INT (XEXP (xconst0, 1)))
+		  {
+		    /* This is ok as info->symbol is set to xplus1 the full
+		       const-expression below.  */
+		    xconst0 = XEXP (xconst0, 0);
+		  }
+
+		if (GET_CODE (xconst0) == SYMBOL_REF
+		    || GET_CODE (xconst0) == LABEL_REF)
+		  {
+		    if (flag_pic == 2 || microblaze_tls_symbol_p(xconst0))
+		      return false;
+
+		    info->type = ADDRESS_SYMBOLIC;
+		    info->symbol = xplus1;
+		    info->symbol_type = SYMBOL_TYPE_GENERAL;
+		    return true;
+		  }
+
+		/* Not base + symbol || base + UNSPEC.  */
+		return false;
+
+	      }
+	    else if (GET_CODE (xplus1) == REG
+		     && microblaze_valid_index_register_p (xplus1, mode,
+							   strict)
+		     && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD))
+	      {
+		/* Restrict larger than word-width modes from using an index register.  */
+		info->type = ADDRESS_REG_INDEX;
+		info->regB = xplus1;
+		return true;
+	      }
+	  }
+	break;
+      }
+    case CONST_INT:
+      {
+	info->regA = gen_rtx_raw_REG (mode, 0);
+	info->type = ADDRESS_CONST_INT;
+	info->offset = x;
+	return true;
+      }
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      {
+	info->type = ADDRESS_SYMBOLIC;
+	info->symbol_type = SYMBOL_TYPE_GENERAL;
+	info->symbol = x;
+	info->regA = gen_rtx_raw_REG (mode, get_base_reg (x));
+
+	if (GET_CODE (x) == CONST)
+	  {
+	    if (GET_CODE (XEXP (x, 0)) == UNSPEC)
+	     {
+		info->regA = gen_rtx_raw_REG (mode,
+				  get_base_reg (XVECEXP (XEXP (x,0), 0, 0)));
+		return microblaze_classify_unspec (info, XEXP (x, 0));
+	     }
+	     return !(flag_pic && pic_address_needs_scratch (x));
+	  }
+
+	if (flag_pic == 2)
+	  return false;
+	else if (microblaze_tls_symbol_p(x))
+	  return false;
+
+	return true;
+      }
+
+    case UNSPEC:
+      {
+	if (reload_in_progress)
+	  df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	return microblaze_classify_unspec (info, x);
+      }
+
+    default:
+      return false;
+    }
+
+  return false;
+}
+
+/* This function is used to implement GO_IF_LEGITIMATE_ADDRESS.  It
+   returns a nonzero value if X is a legitimate address for a memory
+   operand of the indicated MODE.  STRICT is nonzero if this function
+   is called during reload.  */
+
+bool
+microblaze_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  struct microblaze_address_info addr;
+
+  return microblaze_classify_address (&addr, x, mode, strict);
+}
+
+int
+microblaze_valid_pic_const (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return true;
+    default:
+      return false;
+    }
+}
+
+int
+microblaze_legitimate_pic_operand (rtx x)
+{
+  if (flag_pic == 2 && (symbol_mentioned_p(x) || label_mentioned_p(x)))
+    return 0;
+
+  if (microblaze_tls_referenced_p(x))
+    return 0;
+
+  return 1;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This is used from only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was
+   called.  In some cases it is useful to look at this to decide what
+   needs to be done.
+
+   It is always safe for this function to do nothing.  It exists to
+   recognize opportunities to optimize the output.
+
+   For the MicroBlaze, transform:
+
+   memory(X + <large int>)
+
+   into:
+
+   Y = <large int> & ~0x7fff;
+   Z = X + Y
+   memory (Z + (<large int> & 0x7fff));
+
+   This is for CSE to find several similar references, and only use one Z.
+
+   When PIC, convert addresses of the form memory (symbol+large int) to
+   memory (reg+large int).  */
+
+static rtx
+microblaze_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			       enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  register rtx xinsn = x, result;
+
+  if (GET_CODE (xinsn) == CONST
+      && flag_pic && pic_address_needs_scratch (xinsn))
+    {
+      rtx ptr_reg = gen_reg_rtx (Pmode);
+      rtx constant = XEXP (XEXP (xinsn, 0), 1);
+
+      emit_move_insn (ptr_reg, XEXP (XEXP (xinsn, 0), 0));
+
+      result = gen_rtx_PLUS (Pmode, ptr_reg, constant);
+      if (SMALL_INT (constant))
+	return result;
+      /* Otherwise we fall through so the code below will fix the 
+         constant.  */
+      xinsn = result;
+    }
+
+  if (GET_CODE (xinsn) == PLUS)
+    {
+      register rtx xplus0 = XEXP (xinsn, 0);
+      register rtx xplus1 = XEXP (xinsn, 1);
+      register enum rtx_code code0 = GET_CODE (xplus0);
+      register enum rtx_code code1 = GET_CODE (xplus1);
+
+      if (code0 != REG && code1 == REG)
+	{
+	  xplus0 = XEXP (xinsn, 1);
+	  xplus1 = XEXP (xinsn, 0);
+	  code0 = GET_CODE (xplus0);
+	  code1 = GET_CODE (xplus1);
+	}
+
+      if (code0 == REG && REG_OK_FOR_BASE_P (xplus0)
+	  && code1 == CONST_INT && !SMALL_INT (xplus1))
+	{
+	  rtx int_reg = gen_reg_rtx (Pmode);
+	  rtx ptr_reg = gen_reg_rtx (Pmode);
+
+	  emit_move_insn (int_reg, GEN_INT (INTVAL (xplus1) & ~0x7fff));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  ptr_reg,
+				  gen_rtx_PLUS (Pmode, xplus0, int_reg)));
+
+	  result = gen_rtx_PLUS (Pmode, ptr_reg,
+				 GEN_INT (INTVAL (xplus1) & 0x7fff));
+	  return result;
+	}
+
+      if (code0 == REG && REG_OK_FOR_BASE_P (xplus0))
+	{
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  if (code1 == CONST)
+	    {
+	      xplus1 = XEXP (xplus1, 0);
+	      code1 = GET_CODE (xplus1);
+	    }
+	  if (code1 == SYMBOL_REF)
+	    {
+	      if (microblaze_tls_symbol_p(xplus1))
+		{
+		  rtx tls_ref, reg;
+		  reg = gen_reg_rtx (Pmode);
+
+		  tls_ref = microblaze_legitimize_tls_address (xplus1,
+							       NULL_RTX);
+		  emit_move_insn (reg, tls_ref);
+
+		  result = gen_rtx_PLUS (Pmode, xplus0, reg);
+
+		  return result;
+		}
+	      else if (flag_pic == 2)
+		{
+		  rtx pic_ref, reg;
+		  reg = gen_reg_rtx (Pmode);
+
+		  pic_ref = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xplus1),
+					    UNSPEC_GOTOFF);
+		  pic_ref = gen_rtx_CONST (Pmode, pic_ref);
+		  pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pic_ref);
+		  pic_ref = gen_const_mem (Pmode, pic_ref);
+		  emit_move_insn (reg, pic_ref);
+		  result = gen_rtx_PLUS (Pmode, xplus0, reg);
+		  return result;
+		}
+	    }
+	}
+    }
+
+  if (GET_CODE (xinsn) == SYMBOL_REF)
+    {
+      rtx reg;
+      if (microblaze_tls_symbol_p(xinsn))
+        {
+          reg = microblaze_legitimize_tls_address (xinsn, NULL_RTX);
+        }
+      else
+        {
+          rtx pic_ref;
+
+          if (reload_in_progress)
+            df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+          pic_ref = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xinsn), UNSPEC_GOTOFF);
+          pic_ref = gen_rtx_CONST (Pmode, pic_ref);
+          pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pic_ref);
+          pic_ref = gen_const_mem (Pmode, pic_ref);
+          reg = pic_ref;
+        }
+      return reg;
+    }
+
+  return x;
+}
+
+/* Block Moves.  */
+
+#define MAX_MOVE_REGS 8
+#define MAX_MOVE_BYTES (MAX_MOVE_REGS * UNITS_PER_WORD)
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+microblaze_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  bits = BITS_PER_WORD;
+  mode = mode_for_size (bits, MODE_INT, 0);
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+     the source has enough alignment, otherwise use left/right pairs.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      emit_move_insn (regs[i], adjust_address (src, mode, offset));
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    emit_move_insn (adjust_address (dest, mode, offset), regs[i]);
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+    }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+microblaze_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+			     rtx * loop_reg, rtx * loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem,
+		 MIN ((HOST_WIDE_INT) MEM_ALIGN (mem),
+		      length * BITS_PER_UNIT));
+}
+
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves MAX_MOVE_BYTES
+   per iteration.  LENGTH must be at least MAX_MOVE_BYTES.  Assume that the
+   memory regions do not overlap.  */
+
+static void
+microblaze_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  rtx label, src_reg, dest_reg, final_src;
+  HOST_WIDE_INT leftover;
+
+  leftover = length % MAX_MOVE_BYTES;
+  length -= leftover;
+
+  /* Create registers and memory references for use within the loop.  */
+  microblaze_adjust_block_mem (src, MAX_MOVE_BYTES, &src_reg, &src);
+  microblaze_adjust_block_mem (dest, MAX_MOVE_BYTES, &dest_reg, &dest);
+
+  /* Calculate the value that SRC_REG should have after the last iteration
+     of the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Emit the loop body.  */
+  microblaze_block_move_straight (dest, src, MAX_MOVE_BYTES);
+
+  /* Move on to the next block.  */
+  emit_move_insn (src_reg, plus_constant (Pmode, src_reg, MAX_MOVE_BYTES));
+  emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, MAX_MOVE_BYTES));
+
+  /* Emit the test & branch.  */
+  emit_insn (gen_cbranchsi4 (gen_rtx_NE (SImode, src_reg, final_src),
+			     src_reg, final_src, label));
+
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    microblaze_block_move_straight (dest, src, leftover);
+}
+
+/* Expand a movmemsi instruction.  */
+
+bool
+microblaze_expand_block_move (rtx dest, rtx src, rtx length, rtx align_rtx)
+{
+
+  if (GET_CODE (length) == CONST_INT)
+    {
+      HOST_WIDE_INT bytes = INTVAL (length);
+      int align = INTVAL (align_rtx);
+
+      if (align > UNITS_PER_WORD)
+	{
+	  align = UNITS_PER_WORD;	/* We can't do any better.  */
+	}
+      else if (align < UNITS_PER_WORD)
+	{
+	  if (INTVAL (length) <= MAX_MOVE_BYTES)
+	    {
+	      move_by_pieces (dest, src, bytes, align, 0);
+	      return true;
+	    }
+	  else
+	    return false;
+	}
+
+      if (INTVAL (length) <= 2 * MAX_MOVE_BYTES)
+	{
+	  microblaze_block_move_straight (dest, src, INTVAL (length));
+	  return true;
+	}
+      else if (optimize)
+	{
+	  microblaze_block_move_loop (dest, src, INTVAL (length));
+	  return true;
+	}
+    }
+  return false;
+}
+
+static bool
+microblaze_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		      int opno ATTRIBUTE_UNUSED, int *total,
+		      bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case MEM:
+      {
+	int num_words = (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ? 2 : 1;
+	if (simple_memory_operand (x, mode))
+	  *total = COSTS_N_INSNS (2 * num_words);
+	else
+	  *total = COSTS_N_INSNS (2 * (2 * num_words));
+
+	return true;
+      }
+    case NOT:
+      {
+	if (mode == DImode)
+	  {
+	    *total = COSTS_N_INSNS (2);
+	  }
+	else
+	  *total = COSTS_N_INSNS (1);
+	return false;
+      }
+    case AND:
+    case IOR:
+    case XOR:
+      {
+	if (mode == DImode)
+	  {
+	    *total = COSTS_N_INSNS (2);
+	  }
+	else
+	  *total = COSTS_N_INSNS (1);
+
+	return false;
+      }
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      {
+	if (TARGET_BARREL_SHIFT)
+	  {
+	    if (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v5.00.a")
+		>= 0)
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+	else if (!TARGET_SOFT_MUL)
+	  *total = COSTS_N_INSNS (1);
+	else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	  {
+	    /* Add 1 to make shift slightly more expensive than add.  */
+	    *total = COSTS_N_INSNS (INTVAL (XEXP (x, 1))) + 1;
+	    /* Reduce shift costs for special circumstances.  */
+	    if (optimize_size && INTVAL (XEXP (x, 1)) > 5)
+	      *total -= 2;
+	    if (!optimize_size && INTVAL (XEXP (x, 1)) > 17)
+	      *total -= 2;
+	  }
+	else
+	  /* Double the worst cost of shifts when there is no barrel shifter and 
+	     the shift amount is in a reg.  */
+	  *total = COSTS_N_INSNS (32 * 4);
+	return true;
+      }
+    case PLUS:
+    case MINUS:
+      {
+	if (mode == SFmode || mode == DFmode)
+	  {
+	    if (TARGET_HARD_FLOAT)
+	      *total = COSTS_N_INSNS (6);
+	    return true;
+	  }
+	else if (mode == DImode)
+	  {
+	    *total = COSTS_N_INSNS (4);
+	    return true;
+	  }
+	else
+	  {
+	    *total = COSTS_N_INSNS (1);
+	    return true;
+	  }
+
+	return false;
+      }
+    case NEG:
+      {
+	if (mode == DImode)
+	  *total = COSTS_N_INSNS (4);
+
+	return false;
+      }
+    case MULT:
+      {
+	if (mode == SFmode)
+	  {
+	    if (TARGET_HARD_FLOAT)
+	      *total = COSTS_N_INSNS (6);
+	  }
+	else if (!TARGET_SOFT_MUL)
+	  {
+	    if (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v5.00.a")
+		>= 0)
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (3);
+	  }
+	else
+	  *total = COSTS_N_INSNS (10);
+	return true;
+      }
+    case DIV:
+    case UDIV:
+      {
+	if (mode == SFmode)
+	  {
+	    if (TARGET_HARD_FLOAT)
+	      *total = COSTS_N_INSNS (23);
+	  }
+	return false;
+      }
+    case SIGN_EXTEND:
+      {
+	*total = COSTS_N_INSNS (1);
+	return false;
+      }
+    case ZERO_EXTEND:
+      {
+	*total = COSTS_N_INSNS (1);
+	return false;
+      }
+    }
+
+  return false;
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at X.  Return 0 if X isn't valid for MODE.  */
+
+static int
+microblaze_address_insns (rtx x, enum machine_mode mode)
+{
+  struct microblaze_address_info addr;
+
+  if (microblaze_classify_address (&addr, x, mode, false))
+    {
+      switch (addr.type)
+	{
+	case ADDRESS_REG:
+	  if (SMALL_INT (addr.offset))
+	    return 1;
+	  else
+	    return 2;
+	case ADDRESS_CONST_INT:
+	  if (SMALL_INT (x))
+	    return 1;
+	  else
+	    return 2;
+	case ADDRESS_REG_INDEX:
+	  return 1;
+	case ADDRESS_SYMBOLIC:
+	case ADDRESS_GOTOFF:
+	  return 2;
+	case ADDRESS_TLS:
+	  switch (addr.tls_type)
+	    {
+	      case TLS_GD:
+		return 2;
+	      case TLS_LDM:
+		return 2;
+	      case TLS_DTPREL:
+		return 1;
+	      default :
+		abort();
+	    }
+	default:
+	  break;
+	}
+    }
+  return 0;
+}
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   If ADDR is not a valid address, its cost is irrelevant.  */
+static int
+microblaze_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+			 addr_space_t as ATTRIBUTE_UNUSED,
+			 bool speed ATTRIBUTE_UNUSED)
+{
+  return COSTS_N_INSNS (microblaze_address_insns (addr, GET_MODE (addr)));
+}
+
+/* Return nonzero if X is an address which needs a temporary register when 
+   reloaded while generating PIC code.  */
+
+int
+pic_address_needs_scratch (rtx x)
+{
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x,0)) == PLUS)
+    {
+     rtx p0, p1;
+
+      p0 = XEXP (XEXP (x, 0), 0);
+      p1 = XEXP (XEXP (x, 0), 1);
+
+      if ((GET_CODE (p0) == SYMBOL_REF || GET_CODE (p0) == LABEL_REF)
+          && (GET_CODE (p1) == CONST_INT)
+          && (flag_pic == 2 || microblaze_tls_symbol_p (p0) || !SMALL_INT (p1)))
+        return 1;
+    }
+  return 0;
+}
+
+/* Argument support functions.  */
+/* Initialize CUMULATIVE_ARGS for a function.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS * cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cum;
+  tree param, next_param;
+
+  *cum = zero_cum;
+
+  /* Determine if this function has variable arguments.  This is
+     indicated by the last argument being 'void_type_mode' if there
+     are no variable arguments.  The standard MicroBlaze calling sequence
+     passes all arguments in the general purpose registers in this case. */
+
+  for (param = fntype ? TYPE_ARG_TYPES (fntype) : 0;
+       param != 0; param = next_param)
+    {
+      next_param = TREE_CHAIN (param);
+      if (next_param == 0 && TREE_VALUE (param) != void_type_node)
+	cum->gp_reg_found = 1;
+    }
+}
+
+/* Advance the argument to the next argument position.  */
+
+static void
+microblaze_function_arg_advance (cumulative_args_t cum_v,
+				 enum machine_mode mode,
+				 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  cum->arg_number++;
+  switch (mode)
+    {
+    case VOIDmode:
+      break;
+
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case BLKmode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((int_size_in_bytes (type) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case SFmode:
+      cum->arg_words++;
+      if (!cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 1 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DFmode:
+      cum->arg_words += 2;
+      if (!cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 2 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += 2;
+      break;
+
+    case QImode:
+    case HImode:
+    case SImode:
+    case TImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words++;
+      break;
+    }
+}
+
+/* Return an RTL expression containing the register for the given mode,
+   or 0 if the argument is to be passed on the stack.  */
+
+static rtx
+microblaze_function_arg (cumulative_args_t cum_v, enum machine_mode mode, 
+			 const_tree type ATTRIBUTE_UNUSED,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  rtx ret;
+  int regbase = -1;
+  int *arg_words = &cum->arg_words;
+
+  cum->last_arg_fp = 0;
+  switch (mode)
+    {
+    case SFmode:
+    case DFmode:
+    case VOIDmode:
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode:
+      regbase = GP_ARG_FIRST;
+      break;
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+      /* Drops through.  */
+    case BLKmode:
+      regbase = GP_ARG_FIRST;
+      break;
+    }
+
+  if (*arg_words >= MAX_ARGS_IN_REGISTERS)
+    ret = 0;
+  else
+    {
+      gcc_assert (regbase != -1);
+
+      ret = gen_rtx_REG (mode, regbase + *arg_words);
+    }
+
+  if (mode == VOIDmode)
+    {
+      if (cum->num_adjusts > 0)
+	ret = gen_rtx_PARALLEL ((enum machine_mode) cum->fp_code,
+				gen_rtvec_v (cum->num_adjusts, cum->adjust));
+    }
+
+  return ret;
+}
+
+/* Return number of bytes of argument to put in registers. */
+static int
+function_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,	
+			    tree type, bool named ATTRIBUTE_UNUSED)	
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if ((mode == BLKmode
+       || GET_MODE_CLASS (mode) != MODE_COMPLEX_INT
+       || GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
+      && cum->arg_words < MAX_ARGS_IN_REGISTERS)
+    {
+      int words;
+      if (mode == BLKmode)
+	words = ((int_size_in_bytes (type) + UNITS_PER_WORD - 1)
+		 / UNITS_PER_WORD);
+      else
+	words = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+      if (words + cum->arg_words <= MAX_ARGS_IN_REGISTERS)
+	return 0;		/* structure fits in registers */
+
+      return (MAX_ARGS_IN_REGISTERS - cum->arg_words) * UNITS_PER_WORD;
+    }
+
+  else if (mode == DImode && cum->arg_words == MAX_ARGS_IN_REGISTERS - 1)
+    return UNITS_PER_WORD;
+
+  return 0;
+}
+
+/*  Convert a version number of the form "vX.YY.Z" to an integer encoding 
+    for easier range comparison.  */
+static int
+microblaze_version_to_int (const char *version)
+{
+  const char *p, *v;
+  const char *tmpl = "vXX.YY.Z";
+  int iver = 0;
+
+  p = version;
+  v = tmpl;
+
+  while (*p)
+    {
+      if (*v == 'X')
+	{			/* Looking for major  */
+          if (*p == '.')
+            {
+              *v++;
+            }
+          else
+            {
+	      if (!(*p >= '0' && *p <= '9'))
+	        return -1;
+	      iver += (int) (*p - '0');
+              iver *= 10;
+	     }
+        }
+      else if (*v == 'Y')
+	{			/* Looking for minor  */
+	  if (!(*p >= '0' && *p <= '9'))
+	    return -1;
+	  iver += (int) (*p - '0');
+	  iver *= 10;
+	}
+      else if (*v == 'Z')
+	{			/* Looking for compat  */
+	  if (!(*p >= 'a' && *p <= 'z'))
+	    return -1;
+	  iver *= 10;
+	  iver += (int) (*p - 'a');
+	}
+      else
+	{
+	  if (*p != *v)
+	    return -1;
+	}
+
+      v++;
+      p++;
+    }
+
+  if (*p)
+    return -1;
+
+  return iver;
+}
+
+
+static void
+microblaze_option_override (void)
+{
+  register int i, start;
+  register int regno;
+  register enum machine_mode mode;
+  int ver;
+
+  microblaze_section_threshold = (global_options_set.x_g_switch_value
+				  ? g_switch_value
+				  : MICROBLAZE_DEFAULT_GVALUE);
+
+  if (flag_pic)
+    {
+      /* Make sure it's 2, we only support one kind of PIC.  */
+      flag_pic = 2;
+      if (!TARGET_SUPPORTS_PIC)
+        {
+          error ("-fPIC/-fpic not supported for this target");
+          /* Clear it to avoid further errors.  */
+          flag_pic = 0;
+        }
+    }
+
+  /* Check the MicroBlaze CPU version for any special action to be done.  */
+  if (microblaze_select_cpu == NULL)
+    microblaze_select_cpu = MICROBLAZE_DEFAULT_CPU;
+  ver = microblaze_version_to_int (microblaze_select_cpu);
+  if (ver == -1)
+    {
+      error ("%qs is an invalid argument to -mcpu=", microblaze_select_cpu);
+    }
+
+  ver = MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v3.00.a");
+  if (ver < 0)
+    {
+      /* No hardware exceptions in earlier versions. So no worries.  */
+#if 0
+      microblaze_select_flags &= ~(MICROBLAZE_MASK_NO_UNSAFE_DELAY);
+#endif
+      microblaze_no_unsafe_delay = 0;
+      microblaze_pipe = MICROBLAZE_PIPE_3;
+    }
+  else if (ver == 0
+	   || (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v4.00.b")
+	       == 0))
+    {
+#if 0
+      microblaze_select_flags |= (MICROBLAZE_MASK_NO_UNSAFE_DELAY);
+#endif
+      microblaze_no_unsafe_delay = 1;
+      microblaze_pipe = MICROBLAZE_PIPE_3;
+    }
+  else
+    {
+      /* We agree to use 5 pipe-stage model even on area optimized 3 
+         pipe-stage variants.  */
+#if 0
+      microblaze_select_flags &= ~(MICROBLAZE_MASK_NO_UNSAFE_DELAY);
+#endif
+      microblaze_no_unsafe_delay = 0;
+      microblaze_pipe = MICROBLAZE_PIPE_5;
+      if (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v5.00.a") == 0
+	  || MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu,
+					 "v5.00.b") == 0
+	  || MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu,
+					 "v5.00.c") == 0)
+	{
+	  /* Pattern compares are to be turned on by default only when 
+ 	     compiling for MB v5.00.'z'.  */
+	  target_flags |= MASK_PATTERN_COMPARE;
+	}
+    }
+
+  ver = MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v6.00.a");
+  if (ver < 0)
+    {
+      if (TARGET_MULTIPLY_HIGH)
+	warning (0,
+		 "-mxl-multiply-high can be used only with -mcpu=v6.00.a or greater");
+    }
+
+  ver = MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v8.10.a");
+  microblaze_has_clz = 1;
+  if (ver < 0)
+    {
+        /* MicroBlaze prior to 8.10.a didn't have clz.  */
+        microblaze_has_clz = 0;
+    }
+
+  /* TARGET_REORDER defaults to 2 if -mxl-reorder not specified.  */
+  ver = MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v8.30.a");
+  if (ver < 0)
+    {
+        if (TARGET_REORDER == 1)
+          warning (0, "-mxl-reorder can be used only with -mcpu=v8.30.a or greater");
+        TARGET_REORDER = 0;
+    }
+  else if ((ver == 0) && !TARGET_PATTERN_COMPARE)
+    {
+        if (TARGET_REORDER == 1)
+          warning (0, "-mxl-reorder requires -mxl-pattern-compare for -mcpu=v8.30.a");
+        TARGET_REORDER = 0;
+    }
+
+  if (TARGET_MULTIPLY_HIGH && TARGET_SOFT_MUL)
+    error ("-mxl-multiply-high requires -mno-xl-soft-mul");
+
+  /* Always use DFA scheduler.  */
+  microblaze_sched_use_dfa = 1;
+
+#if 0
+  microblaze_abicalls = MICROBLAZE_ABICALLS_NO;
+#endif
+
+  /* Initialize the high, low values for legit floating point constants.  */
+  real_maxval (&dfhigh, 0, DFmode);
+  real_maxval (&dflow, 1, DFmode);
+  real_maxval (&sfhigh, 0, SFmode);
+  real_maxval (&sflow, 1, SFmode);
+
+  microblaze_print_operand_punct['?'] = 1;
+  microblaze_print_operand_punct['#'] = 1;
+  microblaze_print_operand_punct['&'] = 1;
+  microblaze_print_operand_punct['!'] = 1;
+  microblaze_print_operand_punct['*'] = 1;
+  microblaze_print_operand_punct['@'] = 1;
+  microblaze_print_operand_punct['.'] = 1;
+  microblaze_print_operand_punct['('] = 1;
+  microblaze_print_operand_punct[')'] = 1;
+  microblaze_print_operand_punct['['] = 1;
+  microblaze_print_operand_punct[']'] = 1;
+  microblaze_print_operand_punct['<'] = 1;
+  microblaze_print_operand_punct['>'] = 1;
+  microblaze_print_operand_punct['{'] = 1;
+  microblaze_print_operand_punct['}'] = 1;
+  microblaze_print_operand_punct['^'] = 1;
+  microblaze_print_operand_punct['$'] = 1;
+  microblaze_print_operand_punct['+'] = 1;
+
+  /* Set up array to map GCC register number to debug register number.
+     Ignore the special purpose register numbers.  */
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    microblaze_dbx_regno[i] = -1;
+
+  start = GP_DBX_FIRST - GP_REG_FIRST;
+  for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++)
+    microblaze_dbx_regno[i] = i + start;
+
+  /* Set up array giving whether a given register can hold a given mode.   */
+
+  for (mode = VOIDmode;
+       mode != MAX_MACHINE_MODE; mode = (enum machine_mode) ((int) mode + 1))
+    {
+      register int size = GET_MODE_SIZE (mode);
+
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	{
+	  register int ok;
+
+	  if (mode == CCmode)
+	    {
+	      ok = (ST_REG_P (regno) || GP_REG_P (regno));
+	    }
+	  else if (GP_REG_P (regno))
+	    ok = ((regno & 1) == 0 || size <= UNITS_PER_WORD);
+	  else
+	    ok = 0;
+
+	  microblaze_hard_regno_mode_ok[(int) mode][regno] = ok;
+	}
+    }
+}
+
+/* Return true if FUNC is an interrupt function as specified
+   by the "interrupt_handler" attribute.  */
+
+static int
+microblaze_interrupt_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+static int
+microblaze_fast_interrupt_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("fast_interrupt", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return true if FUNC is an interrupt function which uses
+   normal return, indicated by the "save_volatiles" attribute.  */
+
+static int
+microblaze_save_volatiles (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("save_volatiles", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return whether function is tagged with 'interrupt_handler'
+   or 'fast_interrupt' attribute.  Return true if function
+   should use return from interrupt rather than normal
+   function return.  */
+int
+microblaze_is_interrupt_variant (void)
+{
+  return (interrupt_handler || fast_interrupt);
+}
+
+/* Determine of register must be saved/restored in call.  */
+static int
+microblaze_must_save_register (int regno)
+{
+  if (pic_offset_table_rtx &&
+      (regno == MB_ABI_PIC_ADDR_REGNUM) && df_regs_ever_live_p (regno))
+    return 1;
+
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return 1;
+
+  if (frame_pointer_needed && (regno == HARD_FRAME_POINTER_REGNUM))
+    return 1;
+
+  if (!crtl->is_leaf)
+    {
+      if (regno == MB_ABI_SUB_RETURN_ADDR_REGNUM)
+	return 1;
+      if ((microblaze_is_interrupt_variant () || save_volatiles) &&
+	  (regno >= 3 && regno <= 12))
+	return 1;
+    }
+
+  if (microblaze_is_interrupt_variant ())
+    {
+      if (df_regs_ever_live_p (regno) 
+	  || regno == MB_ABI_MSR_SAVE_REG
+	  || (interrupt_handler
+              && (regno == MB_ABI_ASM_TEMP_REGNUM
+	          || regno == MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM)))
+	return 1;
+    }
+
+  if (save_volatiles)
+    {
+      if (df_regs_ever_live_p (regno)
+	  || regno == MB_ABI_ASM_TEMP_REGNUM
+	  || regno == MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM)
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   MicroBlaze stack frames look like:
+
+
+
+             Before call		        After call
+        +-----------------------+	+-----------------------+
+   high |			|       |      			|
+   mem. |  local variables,     |	|  local variables,	|
+        |  callee saved and     |       |  callee saved and    	|
+	|  temps     		|       |  temps     	        |
+        +-----------------------+	+-----------------------+
+        |  arguments for called	|       |  arguments for called |
+	|  subroutines		|	|  subroutines  	|
+        |  (optional)           |       |  (optional)           |
+        +-----------------------+	+-----------------------+
+	|  Link register 	|	|  Link register        |
+    SP->|                       |       |                       |
+	+-----------------------+       +-----------------------+
+					|		        |
+                                        |  local variables,     |
+                                        |  callee saved and     |
+                                        |  temps                |
+					+-----------------------+
+                                        |   MSR (optional if,   |
+                                        |   interrupt handler)  |
+					+-----------------------+
+					|			|
+                                        |  alloca allocations   |
+        				|			|
+					+-----------------------+
+					|			|
+                                        |  arguments for called |
+                                        |  subroutines          |
+                                        |  (optional)           |
+        				|		        |
+					+-----------------------+
+                                        |  Link register        |
+   low                           FP,SP->|                       |
+   memory        			+-----------------------+
+
+*/
+
+static HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size)	
+{
+  int regno;
+  HOST_WIDE_INT total_size;	/* # bytes that the entire frame takes up.  */
+  HOST_WIDE_INT var_size;	/* # bytes that local variables take up.  */
+  HOST_WIDE_INT args_size;	/* # bytes that outgoing arguments take up.  */
+  int link_debug_size;		/* # bytes for link register.  */
+  HOST_WIDE_INT gp_reg_size;	/* # bytes needed to store calle-saved gp regs.  */
+  long mask;			/* mask of saved gp registers.  */
+
+  interrupt_handler =
+    microblaze_interrupt_function_p (current_function_decl);
+  fast_interrupt =
+    microblaze_fast_interrupt_function_p (current_function_decl);
+  save_volatiles = microblaze_save_volatiles (current_function_decl);
+
+  gp_reg_size = 0;
+  mask = 0;
+  var_size = size;
+  args_size = crtl->outgoing_args_size;
+
+  if ((args_size == 0) && cfun->calls_alloca)
+    args_size = NUM_OF_ARGS * UNITS_PER_WORD;
+
+  total_size = var_size + args_size;
+
+  if (flag_pic == 2)
+    /* force setting GOT.  */
+    df_set_regs_ever_live (MB_ABI_PIC_ADDR_REGNUM, true);
+
+  /* Calculate space needed for gp registers.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (microblaze_must_save_register (regno))
+	{
+
+	  if (regno != MB_ABI_SUB_RETURN_ADDR_REGNUM)
+	    /* Don't account for link register. It is accounted specially below.  */
+	    gp_reg_size += GET_MODE_SIZE (SImode);
+
+	  mask |= (1L << (regno - GP_REG_FIRST));
+	}
+    }
+
+  total_size += gp_reg_size;
+
+  /* Add 4 bytes for MSR.  */
+  if (microblaze_is_interrupt_variant ())
+    total_size += 4;
+
+  /* No space to be allocated for link register in leaf functions with no other
+     stack requirements.  */
+  if (total_size == 0 && crtl->is_leaf)
+    link_debug_size = 0;
+  else
+    link_debug_size = UNITS_PER_WORD;
+
+  total_size += link_debug_size;
+
+  /* Save other computed information.  */
+  current_frame_info.total_size = total_size;
+  current_frame_info.var_size = var_size;
+  current_frame_info.args_size = args_size;
+  current_frame_info.gp_reg_size = gp_reg_size;
+  current_frame_info.mask = mask;
+  current_frame_info.initialized = reload_completed;
+  current_frame_info.num_gp = gp_reg_size / UNITS_PER_WORD;
+  current_frame_info.link_debug_size = link_debug_size;
+
+  if (mask)
+    /* Offset from which to callee-save GP regs.  */
+    current_frame_info.gp_offset = (total_size - gp_reg_size);
+  else
+    current_frame_info.gp_offset = 0;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Make sure that we're not trying to eliminate to the wrong hard frame
+   pointer.  */
+
+static bool
+microblaze_can_eliminate (const int from, const int to)
+{
+  return ((from == RETURN_ADDRESS_POINTER_REGNUM && !leaf_function_p())
+   	  || (to == MB_ABI_SUB_RETURN_ADDR_REGNUM && leaf_function_p())
+  	  || (from != RETURN_ADDRESS_POINTER_REGNUM
+   	      && (to == HARD_FRAME_POINTER_REGNUM
+		  || (to == STACK_POINTER_REGNUM && !frame_pointer_needed))));
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer or argument pointer or the return address pointer.  TO is either 
+   the stack pointer or hard frame pointer.  */
+
+HOST_WIDE_INT
+microblaze_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      offset = 0;
+      break;
+    case ARG_POINTER_REGNUM:
+      if (to == STACK_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
+	offset = compute_frame_size (get_frame_size ());
+      else
+	gcc_unreachable ();
+      break;
+    case RETURN_ADDRESS_POINTER_REGNUM:
+      if (crtl->is_leaf)
+	offset = 0;
+      else
+	offset = current_frame_info.gp_offset +
+	  ((UNITS_PER_WORD - (POINTER_SIZE / BITS_PER_UNIT)));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return offset;
+}
+
+/* Print operands using format code.
+ 
+   The MicroBlaze specific codes are:
+
+   'X'  X is CONST_INT, prints 32 bits in hexadecimal format = "0x%08x",
+   'x'  X is CONST_INT, prints 16 bits in hexadecimal format = "0x%04x",
+   'F'  op is CONST_DOUBLE, print 32 bits in hex,
+   'd'  output integer constant in decimal,
+   'z'	if the operand is 0, use $0 instead of normal operand.
+   'D'  print second register of double-word register operand.
+   'L'  print low-order register of double-word register operand.
+   'M'  print high-order register of double-word register operand.
+   'C'  print part of opcode for a branch condition.
+   'N'  print part of opcode for a branch condition, inverted.
+   'S'  X is CODE_LABEL, print with prefix of "LS" (for embedded switch).
+   'B'  print 'z' for EQ, 'n' for NE
+   'b'  print 'n' for EQ, 'z' for NE
+   'T'  print 'f' for EQ, 't' for NE
+   't'  print 't' for EQ, 'f' for NE
+   'm'  Print 1<<operand.
+   'i'  Print 'i' if MEM operand has immediate value
+   'y'  Print 'y' if MEM operand is single register
+   'o'	Print operand address+4
+   '?'	Print 'd' if we use a branch with delay slot instead of normal branch.
+   'h'  Print high word of const_double (int or float) value as hex
+   'j'  Print low word of const_double (int or float) value as hex
+   's'  Print -1 if operand is negative, 0 if positive (sign extend)
+   '@'	Print the name of the temporary register (rMB_ABI_ASM_TEMP_REGNUM).
+   '#'	Print nop if the delay slot of a branch is not filled. 
+*/
+
+void
+print_operand (FILE * file, rtx op, int letter)
+{
+  register enum rtx_code code;
+
+  if (PRINT_OPERAND_PUNCT_VALID_P (letter))
+    {
+      switch (letter)
+	{
+	case '?':
+	  /* Conditionally add a 'd' to indicate filled delay slot.  */
+	  if (final_sequence != NULL)
+	    fputs ("d", file);
+	  break;
+
+	case '#':
+	  /* Conditionally add a nop in unfilled delay slot.  */
+	  if (final_sequence == NULL)
+	    fputs ("nop\t\t# Unfilled delay slot\n", file);
+	  break;
+
+	case '@':
+	  fputs (reg_names[GP_REG_FIRST + MB_ABI_ASM_TEMP_REGNUM], file);
+	  break;
+
+	default:
+	  output_operand_lossage ("unknown punctuation '%c'", letter);
+	  break;
+	}
+
+      return;
+    }
+
+  if (!op)
+    {
+      output_operand_lossage ("null pointer");
+      return;
+    }
+
+  code = GET_CODE (op);
+
+  if (code == SIGN_EXTEND)
+    op = XEXP (op, 0), code = GET_CODE (op);
+
+  if (letter == 'C')
+    switch (code)
+      {
+      case EQ:
+	fputs ("eq", file);
+	break;
+      case NE:
+	fputs ("ne", file);
+	break;
+      case GT:
+      case GTU:
+	fputs ("gt", file);
+	break;
+      case GE:
+      case GEU:
+	fputs ("ge", file);
+	break;
+      case LT:
+      case LTU:
+	fputs ("lt", file);
+	break;
+      case LE:
+      case LEU:
+	fputs ("le", file);
+	break;
+      default:
+	fatal_insn ("PRINT_OPERAND, invalid insn for %%C", op);
+      }
+
+  else if (letter == 'N')
+    switch (code)
+      {
+      case EQ:
+	fputs ("ne", file);
+	break;
+      case NE:
+	fputs ("eq", file);
+	break;
+      case GT:
+      case GTU:
+	fputs ("le", file);
+	break;
+      case GE:
+      case GEU:
+	fputs ("lt", file);
+	break;
+      case LT:
+      case LTU:
+	fputs ("ge", file);
+	break;
+      case LE:
+      case LEU:
+	fputs ("gt", file);
+	break;
+      default:
+	fatal_insn ("PRINT_OPERAND, invalid insn for %%N", op);
+      }
+
+  else if (letter == 'S')
+    {
+      char buffer[100];
+
+      ASM_GENERATE_INTERNAL_LABEL (buffer, "LS", CODE_LABEL_NUMBER (op));
+      assemble_name (file, buffer);
+    }
+
+  /* Print 'i' for memory operands which have immediate values.  */
+  else if (letter == 'i')
+    {
+      if (code == MEM)
+	{
+	  struct microblaze_address_info info;
+
+	  if (!microblaze_classify_address
+	      (&info, XEXP (op, 0), GET_MODE (op), 1))
+	    fatal_insn ("insn contains an invalid address !", op);
+
+	  switch (info.type)
+	    {
+	    case ADDRESS_REG:
+	    case ADDRESS_CONST_INT:
+	    case ADDRESS_SYMBOLIC:
+	    case ADDRESS_GOTOFF:
+	    case ADDRESS_TLS:
+	      fputs ("i", file);
+	      break;
+	    case ADDRESS_REG_INDEX:
+	      break;
+	    case ADDRESS_INVALID:
+	    case ADDRESS_PLT:
+	      fatal_insn ("invalid address", op);
+	    }
+	}
+    }
+
+  else if (code == REG || code == SUBREG)
+    {
+      register int regnum;
+
+      if (code == REG)
+	regnum = REGNO (op);
+      else
+	regnum = true_regnum (op);
+
+      if ((letter == 'M' && !WORDS_BIG_ENDIAN)
+	  || (letter == 'L' && WORDS_BIG_ENDIAN) || letter == 'D')
+	regnum++;
+
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+
+  else if (code == MEM)
+    if (letter == 'o')
+      {
+	rtx op4 = adjust_address (op, GET_MODE (op), 4);
+	output_address (XEXP (op4, 0));
+      }
+    else if (letter == 'y')
+      {
+        rtx mem_reg = XEXP (op, 0);
+        if (GET_CODE (mem_reg) == REG)
+        {
+            register int regnum = REGNO (mem_reg);
+            fprintf (file, "%s", reg_names[regnum]);
+        }
+      }
+    else
+      output_address (XEXP (op, 0));
+
+  else if (letter == 'h' || letter == 'j')
+    {
+      long val[2];
+      if (code == CONST_DOUBLE)
+	{
+	  if (GET_MODE (op) == DFmode)
+	    {
+	      REAL_VALUE_TYPE value;
+	      REAL_VALUE_FROM_CONST_DOUBLE (value, op);
+	      REAL_VALUE_TO_TARGET_DOUBLE (value, val);
+	    }
+	  else
+	    {
+	      val[0] = CONST_DOUBLE_HIGH (op);
+	      val[1] = CONST_DOUBLE_LOW (op);
+	    }
+	}
+      else if (code == CONST_INT)
+        {
+	  val[0] = (INTVAL (op) & 0xffffffff00000000LL) >> 32;
+	  val[1] = INTVAL (op) & 0x00000000ffffffffLL;
+	  if (val[0] == 0 && val[1] < 0)
+	    val[0] = -1;
+	    
+        }
+      fprintf (file, "0x%8.8lx", (letter == 'h') ? val[0] : val[1]);
+    }
+  else if (code == CONST_DOUBLE)
+    {
+      if (letter == 'F')
+	{
+	  unsigned long value_long;
+	  REAL_VALUE_TYPE value;
+	  REAL_VALUE_FROM_CONST_DOUBLE (value, op);
+	  REAL_VALUE_TO_TARGET_SINGLE (value, value_long);
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, value_long);
+	}
+      else
+	{
+	  char s[60];
+	  real_to_decimal (s, CONST_DOUBLE_REAL_VALUE (op), sizeof (s), 0, 1);
+	  fputs (s, file);
+	}
+    }
+
+  else if (code == UNSPEC)
+    {
+      print_operand_address (file, op);
+    }
+
+  else if (letter == 'x' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, 0xffff & INTVAL (op));
+
+  else if (letter == 'X' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
+
+  else if (letter == 'd' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, (INTVAL (op)));
+
+  else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    fputs (reg_names[GP_REG_FIRST], file);
+
+  else if (letter == 's' && GET_CODE (op) == CONST_INT)
+    if (INTVAL (op) < 0)
+      fputs ("-1", file);
+    else
+      fputs ("0", file);
+
+  else if (letter == 'd' || letter == 'x' || letter == 'X' || letter == 's')
+    output_operand_lossage ("letter %c was found & insn was not CONST_INT", letter);
+
+  else if (letter == 'B')
+    fputs (code == EQ ? "z" : "n", file);
+  else if (letter == 'b')
+    fputs (code == EQ ? "n" : "z", file);
+  else if (letter == 'T')
+    fputs (code == EQ ? "f" : "t", file);
+  else if (letter == 't')
+    fputs (code == EQ ? "t" : "f", file);
+
+  else if (code == CONST
+           && ((GET_CODE (XEXP (op, 0)) == REG)
+               || (GET_CODE (XEXP (op, 0)) == UNSPEC)))
+    {
+      print_operand (file, XEXP (op, 0), letter);
+    }
+  else if (code == CONST
+           && (GET_CODE (XEXP (op, 0)) == PLUS)
+           && (GET_CODE (XEXP (XEXP (op, 0), 0)) == REG)
+           && (GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))
+    {
+      print_operand_address (file, XEXP (op, 0));
+    }
+  else if (letter == 'm')
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, (1L << INTVAL (op)));
+  else
+    output_addr_const (file, op);
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   Possible address classifications and output formats are,
+   
+   ADDRESS_REG                  "%0, r0"
+
+   ADDRESS_REG with non-zero    "%0, <addr_const>"
+   offset       
+
+   ADDRESS_REG_INDEX            "rA, RB"    
+                                (if rA is r0, rA and rB are swapped)
+
+   ADDRESS_CONST_INT            "r0, <addr_const>"
+
+   ADDRESS_SYMBOLIC             "rBase, <addr_const>"   
+                                (rBase is a base register suitable for the 
+				 symbol's type)
+*/
+
+void
+print_operand_address (FILE * file, rtx addr)
+{
+  struct microblaze_address_info info;
+  enum microblaze_address_type type;
+  if (!microblaze_classify_address (&info, addr, GET_MODE (addr), 1))
+    fatal_insn ("insn contains an invalid address !", addr);
+
+  type = info.type;
+  switch (info.type)
+    {
+    case ADDRESS_REG:
+      fprintf (file, "%s,", reg_names[REGNO (info.regA)]);
+      output_addr_const (file, info.offset);
+      break;
+    case ADDRESS_REG_INDEX:
+      if (REGNO (info.regA) == 0)
+	/* Make rB == r0 instead of rA == r0. This helps reduce read port 
+           congestion.  */
+	fprintf (file, "%s,%s", reg_names[REGNO (info.regB)],
+		 reg_names[REGNO (info.regA)]);
+      else if (REGNO (info.regB) != 0)
+	/* This is a silly swap to help Dhrystone.  */
+	fprintf (file, "%s,%s", reg_names[REGNO (info.regB)],
+		 reg_names[REGNO (info.regA)]);
+      break;
+    case ADDRESS_CONST_INT:
+      fprintf (file, "%s,", reg_names[REGNO (info.regA)]);
+      output_addr_const (file, info.offset);
+      break;
+    case ADDRESS_SYMBOLIC:
+    case ADDRESS_GOTOFF:
+    case ADDRESS_PLT:
+    case ADDRESS_TLS:
+      if (info.regA)
+	fprintf (file, "%s,", reg_names[REGNO (info.regA)]);
+      output_addr_const (file, info.symbol);
+      if (type == ADDRESS_GOTOFF)
+	{
+	  fputs ("@GOT", file);
+	}
+      else if (type == ADDRESS_PLT)
+	{
+	  fputs ("@PLT", file);
+	}
+      else if (type == ADDRESS_TLS)
+	{
+	  switch (info.tls_type)
+	    {
+	      case TLS_GD:
+		fputs ("@TLSGD", file);
+		break;
+	      case TLS_LDM:
+		fputs ("@TLSLDM", file);
+		break;
+	      case TLS_DTPREL:
+		fputs ("@TLSDTPREL", file);
+		break;
+	      default :
+		abort();
+		break;
+	    }
+	}
+      break;
+    case ADDRESS_INVALID:
+      fatal_insn ("invalid address", addr);
+      break;
+    }
+}
+
+/* Emit either a label, .comm, or .lcomm directive, and mark that the symbol
+   is used, so that we don't emit an .extern for it in 
+   microblaze_asm_file_end.  */
+
+void
+microblaze_declare_object (FILE * stream, const char *name,
+			   const char *section, const char *fmt, int size)
+{
+
+  fputs (section, stream);	
+  assemble_name (stream, name);
+  fprintf (stream, fmt, size);
+}
+
+/* Common code to emit the insns (or to write the instructions to a file)
+   to save/restore registers.
+
+   Other parts of the code assume that MICROBLAZE_TEMP1_REGNUM (aka large_reg)
+   is not modified within save_restore_insns.  */
+
+#define BITSET_P(VALUE,BIT) (((VALUE) & (1L << (BIT))) != 0)
+
+/* Save or restore instructions based on whether this is the prologue or 
+   epilogue.  prologue is 1 for the prologue.  */
+static void
+save_restore_insns (int prologue)
+{
+  rtx base_reg_rtx, reg_rtx, mem_rtx, /* msr_rtx, */ isr_reg_rtx =
+    0, isr_mem_rtx = 0;
+  rtx isr_msr_rtx = 0, insn;
+  long mask = current_frame_info.mask;
+  HOST_WIDE_INT gp_offset;
+  int regno;
+
+  if (frame_pointer_needed
+      && !BITSET_P (mask, HARD_FRAME_POINTER_REGNUM - GP_REG_FIRST))
+    gcc_unreachable ();
+
+  if (mask == 0)
+    return;
+
+  /* Save registers starting from high to low.  The debuggers prefer at least
+     the return register be stored at func+4, and also it allows us not to
+     need a nop in the epilog if at least one register is reloaded in
+     addition to return address.  */
+
+  /* Pick which pointer to use as a base register.  For small frames, just
+     use the stack pointer.  Otherwise, use a temporary register.  Save 2
+     cycles if the save area is near the end of a large frame, by reusing
+     the constant created in the prologue/epilogue to adjust the stack
+     frame.  */
+
+  gp_offset = current_frame_info.gp_offset;
+
+  gcc_assert (gp_offset > 0);
+
+  base_reg_rtx = stack_pointer_rtx;
+
+  /* For interrupt_handlers, need to save/restore the MSR.  */
+  if (microblaze_is_interrupt_variant ())
+    {
+      isr_mem_rtx = gen_rtx_MEM (SImode,
+				 gen_rtx_PLUS (Pmode, base_reg_rtx,
+					       GEN_INT (current_frame_info.
+							gp_offset -
+							UNITS_PER_WORD)));
+
+      /* Do not optimize in flow analysis.  */
+      MEM_VOLATILE_P (isr_mem_rtx) = 1;
+      isr_reg_rtx = gen_rtx_REG (SImode, MB_ABI_MSR_SAVE_REG);
+      isr_msr_rtx = gen_rtx_REG (SImode, ST_REG);
+    }
+
+  if (microblaze_is_interrupt_variant () && !prologue)
+    {
+      emit_move_insn (isr_reg_rtx, isr_mem_rtx);
+      emit_move_insn (isr_msr_rtx, isr_reg_rtx);
+      /* Do not optimize in flow analysis.  */
+      emit_insn (gen_rtx_USE (SImode, isr_reg_rtx));
+      emit_insn (gen_rtx_USE (SImode, isr_msr_rtx));
+    }
+
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (BITSET_P (mask, regno - GP_REG_FIRST))
+	{
+	  if (regno == MB_ABI_SUB_RETURN_ADDR_REGNUM)
+	    /* Don't handle here. Already handled as the first register.  */
+	    continue;
+
+	  reg_rtx = gen_rtx_REG (SImode, regno);
+	  insn = gen_rtx_PLUS (Pmode, base_reg_rtx, GEN_INT (gp_offset));
+	  mem_rtx = gen_rtx_MEM (SImode, insn);
+	  if (microblaze_is_interrupt_variant () || save_volatiles)
+	    /* Do not optimize in flow analysis.  */
+	    MEM_VOLATILE_P (mem_rtx) = 1;
+
+	  if (prologue)
+	    {
+	      insn = emit_move_insn (mem_rtx, reg_rtx);
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  else
+	    {
+	      insn = emit_move_insn (reg_rtx, mem_rtx);
+	    }
+
+	  gp_offset += GET_MODE_SIZE (SImode);
+	}
+    }
+
+  if (microblaze_is_interrupt_variant () && prologue)
+    {
+      emit_move_insn (isr_reg_rtx, isr_msr_rtx);
+      emit_move_insn (isr_mem_rtx, isr_reg_rtx);
+
+      /* Do not optimize in flow analysis.  */
+      emit_insn (gen_rtx_USE (SImode, isr_reg_rtx));
+      emit_insn (gen_rtx_USE (SImode, isr_msr_rtx));
+    }
+
+  /* Done saving and restoring */
+}
+
+
+/* Set up the stack and frame (if desired) for the function.  */
+static void
+microblaze_function_prologue (FILE * file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+  long fsiz = current_frame_info.total_size;
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent\t", file);
+      if (interrupt_handler && strcmp (INTERRUPT_HANDLER_NAME, fnname))
+	fputs ("_interrupt_handler", file);
+      else if (fast_interrupt && strcmp (FAST_INTERRUPT_NAME, fnname))
+	fputs ("_fast_interrupt", file);
+      else
+	assemble_name (file, fnname);
+      fputs ("\n", file);
+      if (!microblaze_is_interrupt_variant ())
+	ASM_OUTPUT_TYPE_DIRECTIVE (file, fnname, "function");
+    }
+
+  assemble_name (file, fnname);
+  fputs (":\n", file);
+
+  if (interrupt_handler && strcmp (INTERRUPT_HANDLER_NAME, fnname))
+    fputs ("_interrupt_handler:\n", file);
+
+  if (!flag_inhibit_size_directive)
+    {
+      /* .frame FRAMEREG, FRAMESIZE, RETREG.  */
+      fprintf (file,
+	       "\t.frame\t%s,%ld,%s\t\t# vars= %ld, regs= %d, args= %d\n",
+	       (reg_names[(frame_pointer_needed)
+			  ? HARD_FRAME_POINTER_REGNUM :
+			  STACK_POINTER_REGNUM]), fsiz,
+	       reg_names[MB_ABI_SUB_RETURN_ADDR_REGNUM + GP_REG_FIRST],
+	       current_frame_info.var_size, current_frame_info.num_gp,
+	       crtl->outgoing_args_size);
+      fprintf (file, "\t.mask\t0x%08lx\n", current_frame_info.mask);
+    }
+}
+
+/* Output extra assembler code at the end of a prologue.  */
+static void
+microblaze_function_end_prologue (FILE * file)
+{
+  if (TARGET_STACK_CHECK)
+    {
+      fprintf (file, "\t# Stack Check Stub -- Start.\n\t");
+      fprintf (file, "ori\tr18,r0,_stack_end\n\t");
+      fprintf (file, "cmpu\tr18,r1,r18\n\t");
+      fprintf (file, "bgei\tr18,_stack_overflow_exit\n\t");
+      fprintf (file, "# Stack Check Stub -- End.\n");
+    }
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+microblaze_expand_prologue (void)
+{
+  int regno;
+  HOST_WIDE_INT fsiz;
+  const char *arg_name = 0;
+  tree fndecl = current_function_decl;
+  tree fntype = TREE_TYPE (fndecl);
+  tree fnargs = DECL_ARGUMENTS (fndecl);
+  rtx next_arg_reg;
+  int i;
+  tree next_arg;
+  tree cur_arg;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+  rtx mem_rtx, reg_rtx;
+
+  /* If struct value address is treated as the first argument, make it so.  */
+  if (aggregate_value_p (DECL_RESULT (fndecl), fntype)
+      && !cfun->returns_pcc_struct)
+    {
+      tree type = build_pointer_type (fntype);
+      tree function_result_decl = build_decl (BUILTINS_LOCATION, PARM_DECL, 
+					      NULL_TREE, type);
+
+      DECL_ARG_TYPE (function_result_decl) = type;
+      TREE_CHAIN (function_result_decl) = fnargs;
+      fnargs = function_result_decl;
+    }
+
+  /* Determine the last argument, and get its name.  */
+
+  INIT_CUMULATIVE_ARGS (args_so_far_v, fntype, NULL_RTX, 0, 0);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+  regno = GP_ARG_FIRST;
+
+  for (cur_arg = fnargs; cur_arg != 0; cur_arg = next_arg)
+    {
+      tree passed_type = DECL_ARG_TYPE (cur_arg);
+      enum machine_mode passed_mode = TYPE_MODE (passed_type);
+      rtx entry_parm;
+
+      if (TREE_ADDRESSABLE (passed_type))
+	{
+	  passed_type = build_pointer_type (passed_type);
+	  passed_mode = Pmode;
+	}
+
+      entry_parm = targetm.calls.function_arg (args_so_far, passed_mode,
+					       passed_type, true);
+
+      if (entry_parm)
+	{
+	  int words;
+
+	  /* passed in a register, so will get homed automatically.  */
+	  if (GET_MODE (entry_parm) == BLKmode)
+	    words = (int_size_in_bytes (passed_type) + 3) / 4;
+	  else
+	    words = (GET_MODE_SIZE (GET_MODE (entry_parm)) + 3) / 4;
+
+	  regno = REGNO (entry_parm) + words - 1;
+	}
+      else
+	{
+	  regno = GP_ARG_LAST + 1;
+	  break;
+	}
+
+      targetm.calls.function_arg_advance (args_so_far, passed_mode,
+					  passed_type, true);
+
+      next_arg = TREE_CHAIN (cur_arg);
+      if (next_arg == 0)
+	{
+	  if (DECL_NAME (cur_arg))
+	    arg_name = IDENTIFIER_POINTER (DECL_NAME (cur_arg));
+
+	  break;
+	}
+    }
+
+  /* Split parallel insn into a sequence of insns.  */
+
+  next_arg_reg = targetm.calls.function_arg (args_so_far, VOIDmode,
+					     void_type_node, true);
+  if (next_arg_reg != 0 && GET_CODE (next_arg_reg) == PARALLEL)
+    {
+      rtvec adjust = XVEC (next_arg_reg, 0);
+      int num = GET_NUM_ELEM (adjust);
+
+      for (i = 0; i < num; i++)
+	{
+	  rtx pattern = RTVEC_ELT (adjust, i);
+	  emit_insn (pattern);
+	}
+    }
+
+  fsiz = compute_frame_size (get_frame_size ());
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = fsiz;
+
+  /* If this function is a varargs function, store any registers that
+     would normally hold arguments ($5 - $10) on the stack.  */
+  if (((TYPE_ARG_TYPES (fntype) != 0
+	&& (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+	    != void_type_node))
+       || (arg_name != 0
+	   && ((arg_name[0] == '_'
+		&& strcmp (arg_name, "__builtin_va_alist") == 0)
+	       || (arg_name[0] == 'v'
+		   && strcmp (arg_name, "va_alist") == 0)))))
+    {
+      int offset = (regno - GP_ARG_FIRST + 1) * UNITS_PER_WORD;
+      rtx ptr = stack_pointer_rtx;
+
+      /* If we are doing svr4-abi, sp has already been decremented by fsiz. */
+      for (; regno <= GP_ARG_LAST; regno++)
+	{
+	  if (offset != 0)
+	    ptr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	  emit_move_insn (gen_rtx_MEM (SImode, ptr),
+			  gen_rtx_REG (SImode, regno));
+
+	  offset += GET_MODE_SIZE (SImode);
+	}
+
+    }
+
+  if (fsiz > 0)
+    {
+      rtx fsiz_rtx = GEN_INT (fsiz);
+
+      rtx insn = NULL;
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    fsiz_rtx));
+      if (insn)
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Handle SUB_RETURN_ADDR_REGNUM specially at first.  */
+      if (!crtl->is_leaf || interrupt_handler)
+	{
+	  mem_rtx = gen_rtx_MEM (SImode,
+				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					       const0_rtx));
+
+	  if (interrupt_handler)
+	    /* Do not optimize in flow analysis.  */
+	    MEM_VOLATILE_P (mem_rtx) = 1;
+
+	  reg_rtx = gen_rtx_REG (SImode, MB_ABI_SUB_RETURN_ADDR_REGNUM);
+	  insn = emit_move_insn (mem_rtx, reg_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* _save_ registers for prologue.  */
+      save_restore_insns (1);
+
+      if (frame_pointer_needed)
+	{
+	  rtx insn = 0;
+
+	  insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				       stack_pointer_rtx));
+
+	  if (insn)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if ((flag_pic == 2 || TLS_NEEDS_GOT )
+      && df_regs_ever_live_p (MB_ABI_PIC_ADDR_REGNUM))
+    {
+      SET_REGNO (pic_offset_table_rtx, MB_ABI_PIC_ADDR_REGNUM);
+      emit_insn (gen_set_got (pic_offset_table_rtx));	/* setting GOT.  */
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  */
+
+  if (profile_flag)
+    emit_insn (gen_blockage ());
+}
+
+/* Do necessary cleanup after a function to restore stack, frame, and regs.  */
+
+#define RA_MASK ((long) 0x80000000)	/* 1 << 31 */
+#define PIC_OFFSET_TABLE_MASK (1 << (PIC_OFFSET_TABLE_REGNUM - GP_REG_FIRST))
+
+static void
+microblaze_function_epilogue (FILE * file ATTRIBUTE_UNUSED,
+			      HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.end\t", file);
+      if (interrupt_handler)
+	fputs ("_interrupt_handler", file);
+      else
+	assemble_name (file, fnname);
+      fputs ("\n", file);
+    }
+
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+
+  /* Restore the output file if optimizing the GP (optimizing the GP causes
+     the text to be diverted to a tempfile, so that data decls come before
+     references to the data).  */
+}
+
+/* Expand the epilogue into a bunch of separate insns.  */
+
+void
+microblaze_expand_epilogue (void)
+{
+  HOST_WIDE_INT fsiz = current_frame_info.total_size;
+  rtx fsiz_rtx = GEN_INT (fsiz);
+  rtx reg_rtx;
+  rtx mem_rtx;
+
+  /* In case of interrupt handlers use addki instead of addi for changing the 
+     stack pointer value.  */
+
+  if (microblaze_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode,
+							GP_REG_FIRST +
+							MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+      return;
+    }
+
+  if (fsiz > 0)
+    {
+      /* Restore SUB_RETURN_ADDR_REGNUM at first. This is to prevent the 
+         sequence of load-followed by a use (in rtsd) in every prologue. Saves 
+         a load-use stall cycle  :)   This is also important to handle alloca. 
+         (See comments for if (frame_pointer_needed) below.  */
+
+      if (!crtl->is_leaf || interrupt_handler)
+	{
+	  mem_rtx =
+	    gen_rtx_MEM (SImode,
+			 gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx));
+	  if (interrupt_handler)
+	    /* Do not optimize in flow analysis.  */
+	    MEM_VOLATILE_P (mem_rtx) = 1;
+	  reg_rtx = gen_rtx_REG (SImode, MB_ABI_SUB_RETURN_ADDR_REGNUM);
+	  emit_move_insn (reg_rtx, mem_rtx);
+	}
+
+      /* It is important that this is done after we restore the return address 
+         register (above).  When alloca is used, we want to restore the 
+	 sub-routine return address only from the current stack top and not 
+	 from the frame pointer (which we restore below). (frame_pointer + 0) 
+	 might have been over-written since alloca allocates memory on the 
+	 current stack.  */
+      if (frame_pointer_needed)
+	emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+
+      /* _restore_ registers for epilogue.  */
+      save_restore_insns (0);
+      emit_insn (gen_blockage ());
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, fsiz_rtx));
+    }
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, GP_REG_FIRST +
+						    MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+}
+
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+int
+microblaze_can_use_return_insn (void)
+{
+  if (!reload_completed)
+    return 0;
+
+  if (df_regs_ever_live_p (MB_ABI_SUB_RETURN_ADDR_REGNUM) || profile_flag)
+    return 0;
+
+  if (current_frame_info.initialized)
+    return current_frame_info.total_size == 0;
+
+  return compute_frame_size (get_frame_size ()) == 0;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+microblaze_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x ATTRIBUTE_UNUSED, 
+			     reg_class_t rclass, enum machine_mode mode ATTRIBUTE_UNUSED, 
+			     secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  if (rclass == ST_REGS)
+    return GR_REGS;
+
+  return NO_REGS;
+}
+
+static void
+microblaze_globalize_label (FILE * stream, const char *name)
+{
+  fputs ("\t.globl\t", stream);
+  if (microblaze_is_interrupt_variant ())
+    {
+      if (interrupt_handler && strcmp (name, INTERRUPT_HANDLER_NAME))
+        fputs (INTERRUPT_HANDLER_NAME, stream);
+      else if (fast_interrupt && strcmp (name, FAST_INTERRUPT_NAME))
+        fputs (FAST_INTERRUPT_NAME, stream);
+      fputs ("\n\t.globl\t", stream);
+    }
+  assemble_name (stream, name);
+  fputs ("\n", stream);
+}
+
+/* Returns true if decl should be placed into a "small data" section.  */
+static bool
+microblaze_elf_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+
+  if (!TARGET_XLGPOPT)
+    return false;
+
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (decl) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (section, ".sdata") == 0
+	  || strcmp (section, ".sdata2") == 0
+	  || strcmp (section, ".sbss") == 0
+	  || strcmp (section, ".sbss2") == 0)
+	return true;
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+  return (size > 0 && size <= microblaze_section_threshold);
+}
+
+
+static section *
+microblaze_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  switch (categorize_decl_for_section (decl, reloc))
+    {
+    case SECCAT_RODATA_MERGE_STR:
+    case SECCAT_RODATA_MERGE_STR_INIT:
+      /* MB binutils have various issues with mergeable string sections and
+         relaxation/relocation. Currently, turning mergeable sections 
+         into regular readonly sections.  */
+
+      return readonly_data_section;
+    default:
+      return default_elf_select_section (decl, reloc, align);
+    }
+}
+
+/*
+  Encode info about sections into the RTL based on a symbol's declaration.
+  The default definition of this hook, default_encode_section_info in 
+  `varasm.c', sets a number of commonly-useful bits in SYMBOL_REF_FLAGS. */
+
+static void
+microblaze_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+}
+
+static rtx
+expand_pic_symbol_ref (enum machine_mode mode ATTRIBUTE_UNUSED, rtx op)
+{
+  rtx result;
+  result = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), UNSPEC_GOTOFF);
+  result = gen_rtx_CONST (Pmode, result);
+  result = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, result);
+  result = gen_const_mem (Pmode, result);
+  return result;
+}
+
+static void
+microblaze_asm_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+        HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+        tree function)
+{
+  rtx this_rtx, insn, funexp;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in MB_ABI_FIRST_ARG_REGNUM.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, (MB_ABI_FIRST_ARG_REGNUM + 1));
+  else
+    this_rtx = gen_rtx_REG (Pmode, MB_ABI_FIRST_ARG_REGNUM);
+
+  /* Apply the constant offset, if required.  */
+  if (delta)
+    emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta)));
+
+  /* Apply the offset from the vtable, if required.  */
+  if (vcall_offset)
+  {
+    rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+    rtx temp1 = gen_rtx_REG (Pmode, MB_ABI_TEMP1_REGNUM);
+
+    emit_move_insn (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+    rtx loc = gen_rtx_PLUS (Pmode, temp1, vcall_offset_rtx);
+    emit_move_insn (temp1, gen_rtx_MEM (Pmode, loc));
+
+    emit_insn (gen_addsi3 (this_rtx, this_rtx, temp1));
+  }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+  {
+    assemble_external (function);
+    TREE_USED (function) = 1;
+  }
+
+  funexp = XEXP (DECL_RTL (function), 0);
+  rtx temp2 = gen_rtx_REG (Pmode, MB_ABI_TEMP2_REGNUM);
+
+  if (flag_pic)
+    emit_move_insn (temp2, expand_pic_symbol_ref (Pmode, funexp));
+  else
+    emit_move_insn (temp2, funexp);
+
+  emit_insn (gen_indirect_jump (temp2));
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from rs6000.c.  */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+bool
+microblaze_expand_move (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1;
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (!register_operand (op0, SImode)
+      && !register_operand (op1, SImode)
+      && (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0))
+    {
+      rtx temp = force_reg (SImode, op1);
+      emit_move_insn (op0, temp);
+      return true;
+    }
+  /* If operands[1] is a constant address invalid for pic, then we need to
+     handle it just like LEGITIMIZE_ADDRESS does.  */
+  if (GET_CODE (op1) == SYMBOL_REF || GET_CODE (op1) == LABEL_REF)
+    {
+      rtx result;
+      if (microblaze_tls_symbol_p(op1))
+	{
+	  result = microblaze_legitimize_tls_address (op1, NULL_RTX);
+	  emit_move_insn (op0, result);
+	  return true;
+	}
+      else if (flag_pic)
+	{
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  result = expand_pic_symbol_ref (mode, op1);
+	  emit_move_insn (op0, result);
+	  return true;
+	}
+    }
+  /* Handle Case of (const (plus symbol const_int)).  */
+  if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1,0)) == PLUS)
+    {
+      rtx p0, p1;
+
+      p0 = XEXP (XEXP (op1, 0), 0);
+      p1 = XEXP (XEXP (op1, 0), 1);
+
+      if ((GET_CODE (p1) == CONST_INT)
+	  && ((GET_CODE (p0) == UNSPEC)
+	      || ((GET_CODE (p0) == SYMBOL_REF || GET_CODE (p0) == LABEL_REF)
+	          && (flag_pic == 2 || microblaze_tls_symbol_p (p0)
+		      || !SMALL_INT (p1)))))
+	{
+	  rtx temp = force_reg (SImode, p0);
+	  rtx temp2 = p1;
+
+	  if (flag_pic && reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  emit_move_insn (op0, gen_rtx_PLUS (SImode, temp, temp2));
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Expand shift operations.  */
+int
+microblaze_expand_shift (rtx operands[])
+{
+  gcc_assert ((GET_CODE (operands[2]) == CONST_INT)
+	      || (GET_CODE (operands[2]) == REG)
+	      || (GET_CODE (operands[2]) == SUBREG));
+
+  /* Shift by one -- generate pattern.  */
+  if ((GET_CODE (operands[2]) == CONST_INT) && (INTVAL (operands[2]) == 1))
+    return 0;
+
+  /* Have barrel shifter and shift > 1: use it.  */
+  if (TARGET_BARREL_SHIFT)
+    return 0;
+
+  gcc_assert ((GET_CODE (operands[0]) == REG)
+	      || (GET_CODE (operands[0]) == SUBREG)
+	      || (GET_CODE (operands[1]) == REG)
+	      || (GET_CODE (operands[1]) == SUBREG));
+
+  /* Shift by zero -- copy regs if necessary.  */
+  if ((GET_CODE (operands[2]) == CONST_INT) && (INTVAL (operands[2]) == 0))
+    {
+      if (REGNO (operands[0]) != REGNO (operands[1]))
+	emit_insn (gen_movsi (operands[0], operands[1]));
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+rtx
+microblaze_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL_RTX;
+
+  return gen_rtx_PLUS (Pmode,
+		       get_hard_reg_initial_val (Pmode,
+						 MB_ABI_SUB_RETURN_ADDR_REGNUM),
+		       GEN_INT (8));
+}
+
+/* Queue an .ident string in the queue of top-level asm statements.
+   If the string size is below the threshold, put it into .sdata2.
+   If the front-end is done, we must be being called from toplev.c.
+   In that case, do nothing.  */
+void 
+microblaze_asm_output_ident (const char *string)
+{
+  const char *section_asm_op;
+  int size;
+  char *buf;
+
+  if (cgraph_state != CGRAPH_STATE_PARSING)
+    return;
+
+  size = strlen (string) + 1;
+  if (size <= microblaze_section_threshold)
+    section_asm_op = SDATA2_SECTION_ASM_OP;
+  else
+    section_asm_op = READONLY_DATA_SECTION_ASM_OP;
+
+  buf = ACONCAT ((section_asm_op, "\n\t.ascii \"", string, "\\0\"\n", NULL));
+  add_asm_node (build_string (strlen (buf), buf));
+}
+
+static void
+microblaze_elf_asm_init_sections (void)
+{
+  sdata2_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   SDATA2_SECTION_ASM_OP);
+}
+
+/*  Generate assembler code for constant parts of a trampoline.  */
+
+static void
+microblaze_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tmfs r18, rpc\n");
+  fprintf (f, "\tlwi r3, r18, 16\n");
+  fprintf (f, "\tlwi r18, r18, 20\n");
+  fprintf (f, "\tbra r18\n");
+  /* fprintf (f, "\t.word\t0x00000000\t\t# <function address>\n");  */
+  /* fprintf (f, "\t.word\t0x00000000\t\t# <static chain value>\n");  */
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+microblaze_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (6*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 16);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 20);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Generate conditional branch -- first, generate test condition,
+   second, generate correct branch instruction.  */
+
+void
+microblaze_expand_conditional_branch (enum machine_mode mode, rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx cmp_op0 = operands[1];
+  rtx cmp_op1 = operands[2];
+  rtx label1 = operands[3];
+  rtx comp_reg = gen_reg_rtx (SImode);
+  rtx condition;
+
+  gcc_assert ((GET_CODE (cmp_op0) == REG) || (GET_CODE (cmp_op0) == SUBREG));
+
+  /* If comparing against zero, just test source reg.  */
+  if (cmp_op1 == const0_rtx)
+    {
+      comp_reg = cmp_op0;
+      condition = gen_rtx_fmt_ee (signed_condition (code), SImode, comp_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (condition, label1));
+    }
+
+  else if (code == EQ || code == NE)
+    {
+      /* Use xor for equal/not-equal comparison.  */
+      emit_insn (gen_xorsi3 (comp_reg, cmp_op0, cmp_op1));
+      condition = gen_rtx_fmt_ee (signed_condition (code), SImode, comp_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (condition, label1));
+    }
+  else
+    {
+      /* Generate compare and branch in single instruction. */
+      cmp_op1 = force_reg (mode, cmp_op1);
+      condition = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+      emit_jump_insn (gen_branch_compare(condition, cmp_op0, cmp_op1, label1));
+    }
+}
+
+
+void
+microblaze_expand_conditional_branch_sf (rtx operands[])
+{
+  rtx condition;
+  rtx cmp_op0 = XEXP (operands[0], 0);
+  rtx cmp_op1 = XEXP (operands[0], 1);
+  rtx comp_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_cstoresf4 (comp_reg, operands[0], cmp_op0, cmp_op1));
+  condition = gen_rtx_NE (SImode, comp_reg, const0_rtx);
+  emit_jump_insn (gen_condjump (condition, operands[3]));
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+static bool
+microblaze_frame_pointer_required (void)
+{
+  /* If the function contains dynamic stack allocations, we need to
+     use the frame pointer to access the static parts of the frame.  */
+  if (cfun->calls_alloca)
+    return true;
+  return false;
+}
+
+void
+microblaze_expand_divide (rtx operands[])
+{
+  /* Table lookup software divides. Works for all (nr/dr) where (0 <= nr,dr <= 15).  */
+
+  rtx regt1 = gen_reg_rtx (SImode); 
+  rtx reg18 = gen_rtx_REG (SImode, R_TMP);
+  rtx regqi = gen_reg_rtx (QImode);
+  rtx div_label = gen_label_rtx ();
+  rtx div_end_label = gen_label_rtx ();
+  rtx div_table_rtx = gen_rtx_SYMBOL_REF (QImode,"_divsi3_table");
+  rtx mem_rtx;
+  rtx ret;
+  rtx jump, cjump, insn;
+
+  insn = emit_insn (gen_iorsi3 (regt1, operands[1], operands[2]));
+  cjump = emit_jump_insn_after (gen_cbranchsi4 (
+					gen_rtx_GTU (SImode, regt1, GEN_INT (15)), 
+					regt1, GEN_INT (15), div_label), insn);
+  LABEL_NUSES (div_label) = 1; 
+  JUMP_LABEL (cjump) = div_label;
+  emit_insn (gen_rtx_CLOBBER (SImode, reg18));
+
+  emit_insn (gen_ashlsi3_bshift (regt1, operands[1], GEN_INT(4)));
+  emit_insn (gen_addsi3 (regt1, regt1, operands[2]));
+  mem_rtx = gen_rtx_MEM (QImode,
+                            gen_rtx_PLUS (Pmode, regt1, div_table_rtx));
+
+  insn = emit_insn (gen_movqi (regqi, mem_rtx)); 
+  insn = emit_insn (gen_movsi (operands[0], gen_rtx_SUBREG (SImode, regqi, 0)));
+  jump = emit_jump_insn_after (gen_jump (div_end_label), insn); 
+  JUMP_LABEL (jump) = div_end_label;
+  LABEL_NUSES (div_end_label) = 1; 
+  emit_barrier ();
+
+  emit_label (div_label);
+  ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__divsi3"), 
+				       operands[0], LCT_NORMAL, 
+				       GET_MODE (operands[0]), 2, operands[1], 
+				       GET_MODE (operands[1]), operands[2], 
+				       GET_MODE (operands[2]));
+  if (ret != operands[0])
+                emit_move_insn (operands[0], ret);    
+
+  emit_label (div_end_label);
+  emit_insn (gen_blockage ());
+}
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+static rtx
+microblaze_function_value (const_tree valtype,
+			   const_tree func ATTRIBUTE_UNUSED,
+			   bool outgoing ATTRIBUTE_UNUSED)
+{
+  return LIBCALL_VALUE (TYPE_MODE (valtype));
+}
+
+/* Implement TARGET_SCHED_ADJUST_COST.  */
+static int
+microblaze_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link,
+			rtx dep ATTRIBUTE_UNUSED, int cost)
+{
+  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+    return cost;
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+  return cost;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.
+
+   At present, GAS doesn't understand li.[sd], so don't allow it
+   to be generated at present.  */
+static bool
+microblaze_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+
+  if (microblaze_cannot_force_const_mem(mode, x))
+        return false;
+
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      return microblaze_const_double_ok (x, GET_MODE (x));
+    }
+
+   /* Handle Case of (const (plus unspec const_int)).  */
+   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x,0)) == PLUS)
+     {
+        rtx p0, p1;
+
+        p0 = XEXP (XEXP (x, 0), 0);
+        p1 = XEXP (XEXP (x, 0), 1);
+
+        if (GET_CODE(p1) == CONST_INT)
+          {
+            /* Const offset from UNSPEC is not supported.  */
+            if ((GET_CODE (p0) == UNSPEC))
+              return false;
+
+            if ((GET_CODE (p0) == SYMBOL_REF || GET_CODE (p0) == LABEL_REF)
+                 && (microblaze_tls_symbol_p (p0) || !SMALL_INT (p1)))
+              return false;
+          }
+      }
+
+  return true;
+}
+
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO      microblaze_encode_section_info
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL      microblaze_globalize_label
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE    microblaze_function_prologue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE    microblaze_function_epilogue
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS                microblaze_rtx_costs
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM   microblaze_cannot_force_const_mem
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST             microblaze_address_cost
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE          microblaze_attribute_table
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P          microblaze_elf_in_small_data_p
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION       microblaze_select_section
+
+#undef TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION     true
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE \
+                                        microblaze_function_end_prologue
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES	function_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		microblaze_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	microblaze_function_arg_advance
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE 		microblaze_can_eliminate
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS 	microblaze_legitimize_address
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P 	microblaze_legitimate_address_p 
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED	microblaze_frame_pointer_required
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	microblaze_asm_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		microblaze_trampoline_init
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE 	default_promote_function_mode_always_promote
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE		microblaze_function_value 
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD		microblaze_secondary_reload
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK      microblaze_asm_output_mi_thunk
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST	microblaze_adjust_cost
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS	microblaze_elf_asm_init_sections
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		microblaze_option_override 
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P microblaze_legitimate_constant_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-microblaze.h"
diff --git a/gcc-4.9/gcc/config/microblaze/microblaze.h b/gcc-4.9/gcc/config/microblaze/microblaze.h
new file mode 100644
index 000000000..58d8895a3
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/microblaze.h
@@ -0,0 +1,937 @@
+/* Definitions of target machine for GNU compiler for Xilinx MicroBlaze.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Standard GCC variables that we reference.  */
+
+/* MicroBlaze external variables defined in microblaze.c.  */
+
+/* Which pipeline to schedule for.  */
+enum pipeline_type
+{
+  MICROBLAZE_PIPE_3 = 0,
+  MICROBLAZE_PIPE_5 = 1
+};
+
+#define MICROBLAZE_MASK_NO_UNSAFE_DELAY         0x00000001
+
+/* print_operand punctuation chars */
+extern char microblaze_print_operand_punct[];
+
+/* # bytes of data/sdata cutoff */
+extern int microblaze_section_threshold;
+
+/* Map register # to debug register # */
+extern int microblaze_dbx_regno[];
+
+extern int microblaze_no_unsafe_delay;
+extern int microblaze_has_clz;
+extern enum pipeline_type microblaze_pipe;
+
+#define OBJECT_FORMAT_ELF
+
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT    0
+#define TARGET_ENDIAN_OPTION     "mbig-endian"
+#else
+#define TARGET_ENDIAN_DEFAULT    MASK_LITTLE_ENDIAN
+#define TARGET_ENDIAN_OPTION     "mlittle-endian"
+#endif
+
+/* Default target_flags if no switches are specified  */
+#define TARGET_DEFAULT      (MASK_SOFT_MUL | MASK_SOFT_DIV | MASK_SOFT_FLOAT \
+                             | TARGET_ENDIAN_DEFAULT)
+
+/* Do we have CLZ?  */
+#define TARGET_HAS_CLZ      (TARGET_PATTERN_COMPARE && microblaze_has_clz)
+
+/* The default is to support PIC.  */
+#define TARGET_SUPPORTS_PIC 1
+
+/* The default is to not need GOT for TLS.  */
+#define TLS_NEEDS_GOT 0
+
+/* What is the default setting for -mcpu= . We set it to v4.00.a even though 
+   we are actually ahead. This is safest version that has generate code 
+   compatible for the original ISA */
+#define MICROBLAZE_DEFAULT_CPU      "v4.00.a"
+
+/* Macros to decide whether certain features are available or not,
+   depending on the instruction set architecture level.  */
+
+#define DRIVER_SELF_SPECS    				\
+	"%{mxl-soft-mul:%<mno-xl-soft-mul}", 		\
+	"%{mno-xl-barrel-shift:%<mxl-barrel-shift}", 	\
+	"%{mno-xl-pattern-compare:%<mxl-pattern-compare}", \
+	"%{mxl-soft-div:%<mno-xl-soft-div}", 		\
+	"%{mxl-reorder:%<mno-xl-reorder}", 		\
+	"%{msoft-float:%<mhard-float}"
+
+/* Tell collect what flags to pass to nm.  */
+#ifndef NM_FLAGS
+#define NM_FLAGS "-Bn"
+#endif
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS() microblaze_cpp_define (pfile)
+
+/* Assembler specs.  */
+
+#define TARGET_ASM_SPEC ""
+
+#define ASM_SPEC "\
+%(target_asm_spec) \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL}"
+
+/* Extra switches sometimes passed to the linker.  */
+/* -xl-mode-xmdstub translated to -Zxl-mode-xmdstub -- deprecated.  */
+
+#define LINK_SPEC "%{shared:-shared} -N -relax \
+  %{mbig-endian:-EB --oformat=elf32-microblaze} \
+  %{mlittle-endian:-EL --oformat=elf32-microblazeel} \
+  %{Zxl-mode-xmdstub:-defsym _TEXT_START_ADDR=0x800} \
+  %{mxl-mode-xmdstub:-defsym _TEXT_START_ADDR=0x800} \
+  %{mxl-gp-opt:%{G*}} %{!mxl-gp-opt: -G 0} \
+  %{!T*: -dT xilinx.ld%s}"
+
+/* Specs for the compiler proper  */
+
+#ifndef CC1_SPEC
+#define CC1_SPEC " \
+%{G*} \
+%(subtarget_cc1_spec) \
+%{mxl-multiply-high:-mcpu=v6.00.a} \
+"
+#endif
+
+#define EXTRA_SPECS							\
+  { "target_asm_spec", TARGET_ASM_SPEC },				\
+  SUBTARGET_EXTRA_SPECS
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.   */
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"$"
+#endif
+
+/* fixed registers.  */
+#define MB_ABI_BASE_REGNUM                   0
+#define MB_ABI_STACK_POINTER_REGNUM          1
+#define MB_ABI_GPRO_REGNUM                   2
+#define MB_ABI_GPRW_REGNUM                  13
+#define MB_ABI_INTR_RETURN_ADDR_REGNUM      14
+#define MB_ABI_SUB_RETURN_ADDR_REGNUM       15
+#define MB_ABI_DEBUG_RETURN_ADDR_REGNUM     16
+#define MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM 17
+#define MB_ABI_ASM_TEMP_REGNUM              18	
+/* This is our temp register.  */
+#define MB_ABI_FRAME_POINTER_REGNUM         19
+#define MB_ABI_PIC_ADDR_REGNUM              20
+#define MB_ABI_PIC_FUNC_REGNUM              21
+/* Volatile registers.  */
+#define MB_ABI_INT_RETURN_VAL_REGNUM         3
+#define MB_ABI_INT_RETURN_VAL2_REGNUM        4
+#define MB_ABI_FIRST_ARG_REGNUM              5
+#define MB_ABI_LAST_ARG_REGNUM              10
+#define MB_ABI_MAX_ARG_REGS                 (MB_ABI_LAST_ARG_REGNUM 	\
+					     - MB_ABI_FIRST_ARG_REGNUM + 1)
+#define MB_ABI_STATIC_CHAIN_REGNUM           3
+#define MB_ABI_TEMP1_REGNUM                 11
+#define MB_ABI_TEMP2_REGNUM                 12
+#define MB_ABI_MSR_SAVE_REG                 11	
+/* Volatile register used to save MSR in interrupt handlers.  */
+
+
+/* Debug stuff.  */
+
+/* How to renumber registers for dbx and gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO) microblaze_dbx_regno[(REGNO)]
+
+/* Generate DWARF exception handling info.  */
+#define DWARF2_UNWIND_INFO 1
+
+/* Don't generate .loc operations.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 0
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN \
+	(GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)
+
+/* Initial state of return address on entry to func = R15.
+   Actually, the RA is at R15+8, but gcc doesn't know how 
+   to generate this. 
+   NOTE:  GDB has a workaround and expects this incorrect value.
+   If this is fixed, a corresponding fix to GDB is needed.  */
+#define INCOMING_RETURN_ADDR_RTX  			\
+  gen_rtx_REG (VOIDmode, GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)
+
+/* Use DWARF 2 debugging information by default.  */
+#define DWARF2_DEBUGGING_INFO
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Target machine storage layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN)
+#define BITS_PER_WORD           32
+#define UNITS_PER_WORD          4
+#define MIN_UNITS_PER_WORD      4
+#define INT_TYPE_SIZE           32
+#define SHORT_TYPE_SIZE         16
+#define LONG_TYPE_SIZE          32
+#define LONG_LONG_TYPE_SIZE     64
+#define FLOAT_TYPE_SIZE         32
+#define DOUBLE_TYPE_SIZE        64
+#define LONG_DOUBLE_TYPE_SIZE   64
+#define POINTER_SIZE            32
+#define PARM_BOUNDARY           32
+#define FUNCTION_BOUNDARY       32
+#define EMPTY_FIELD_BOUNDARY    32
+#define STRUCTURE_SIZE_BOUNDARY 8
+#define BIGGEST_ALIGNMENT       32
+#define STRICT_ALIGNMENT        1
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD						\
+	? BITS_PER_WORD							\
+	: (ALIGN))
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)     				\
+    (((TREE_CODE (TYPE) == ARRAY_TYPE 					\
+       && TYPE_MODE (TREE_TYPE (TYPE)) == QImode)			\
+     && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN))
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE)  ZERO_EXTEND
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)		\
+    (MODE) = SImode;
+
+/* Standard register usage.  */
+
+/* On the MicroBlaze, we have 32 integer registers */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,			\
+  1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 1, 1 								\
+}
+
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 1, 1								\
+}
+
+#define GP_REG_FIRST    0
+#define GP_REG_LAST     31
+#define GP_REG_NUM      (GP_REG_LAST - GP_REG_FIRST + 1)
+#define GP_DBX_FIRST    0
+
+#define ST_REG		32
+#define AP_REG_NUM      33
+#define RAP_REG_NUM     34
+#define FRP_REG_NUM     35
+
+#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define ST_REG_P(REGNO) ((REGNO) == ST_REG)
+
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+	((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  In 32 bit mode, require that DImode and DFmode be in even
+   registers.  For DImode, this makes some of the insns easier to
+   write, since you don't have to worry about a DImode value in
+   registers 3 & 4, producing a result in 4 & 5.
+
+   To make the code simpler HARD_REGNO_MODE_OK now just references an
+   array built in override_options.  Because machmodes.h is not yet
+   included before this file is processed, the MODE bound can't be
+   expressed here.  */
+extern char microblaze_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+            microblaze_hard_regno_mode_ok[ (int)(MODE) ][ (REGNO)]
+
+#define MODES_TIEABLE_P(MODE1, MODE2)					\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||				\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)			\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||				\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+#define STACK_POINTER_REGNUM   (GP_REG_FIRST + MB_ABI_STACK_POINTER_REGNUM)
+
+#define STACK_POINTER_OFFSET   FIRST_PARM_OFFSET(FNDECL)
+
+/* Base register for access to local variables of the function.  We
+   pretend that the frame pointer is
+   MB_ABI_INTR_RETURN_ADDR_REGNUM, and then eliminate it
+   to HARD_FRAME_POINTER_REGNUM.  We can get away with this because
+   rMB_ABI_INTR_RETUREN_ADDR_REGNUM is a fixed
+   register(return address for interrupt), and will not be used for
+   anything else.  */
+   
+#define FRAME_POINTER_REGNUM 		FRP_REG_NUM
+#define HARD_FRAME_POINTER_REGNUM       \
+        (GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM)
+#define ARG_POINTER_REGNUM		AP_REG_NUM
+#define RETURN_ADDRESS_POINTER_REGNUM	RAP_REG_NUM
+#define STATIC_CHAIN_REGNUM             \
+        (GP_REG_FIRST + MB_ABI_STATIC_CHAIN_REGNUM)
+
+/* registers used in prologue/epilogue code when the stack frame
+   is larger than 32K bytes.  These registers must come from the
+   scratch register set, and not used for passing and returning
+   arguments and any other information used in the calling sequence
+   (such as pic).  */
+
+#define MICROBLAZE_TEMP1_REGNUM         \
+        (GP_REG_FIRST + MB_ABI_TEMP1_REGNUM)
+
+#define MICROBLAZE_TEMP2_REGNUM         \
+        (GP_REG_FIRST + MB_ABI_TEMP2_REGNUM)
+
+#define NO_FUNCTION_CSE                 1
+
+#define PIC_OFFSET_TABLE_REGNUM   (GP_REG_FIRST + MB_ABI_PIC_ADDR_REGNUM)
+
+enum reg_class
+{
+  NO_REGS,			/* no registers in set.  */
+  GR_REGS,			/* integer registers.  */
+  ST_REGS,			/* status register.  */
+  ALL_REGS,			/* all registers.  */
+  LIM_REG_CLASSES		/* max value + 1.  */
+};
+
+#define N_REG_CLASSES 		(int) LIM_REG_CLASSES
+
+#define GENERAL_REGS 		GR_REGS
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "GR_REGS",								\
+  "ST_REGS",								\
+  "ALL_REGS"								\
+}
+
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000 },		/* no registers.  */		\
+  { 0xffffffff, 0x00000000 },		/* integer registers.  */	\
+  { 0x00000000, 0x00000001 },		/* status registers.  */	\
+  { 0xffffffff, 0x0000000f }		/* all registers.  */		\
+}
+
+extern enum reg_class microblaze_regno_to_class[];
+
+#define REGNO_REG_CLASS(REGNO) 		microblaze_regno_to_class[ (REGNO) ]
+
+#define BASE_REG_CLASS  		GR_REGS
+
+#define INDEX_REG_CLASS 		GR_REGS
+
+#define GR_REG_CLASS_P(CLASS) 		((CLASS) == GR_REGS)
+
+/* REGISTER AND CONSTANT CLASSES */
+
+#define SMALL_INT(X) ((unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000)
+#define LARGE_INT(X) \
+  (INTVAL (X) > 0 && UINTVAL (X) >= 0x80000000 && UINTVAL (X) <= 0xffffffff)
+#define PLT_ADDR_P(X) (GET_CODE (X) == UNSPEC && XINT (X,1) == UNSPEC_PLT)
+/* Test for a valid operand for a call instruction.
+   Don't allow the arg pointer register or virtual regs
+   since they may change into reg + const, which the patterns
+   can't handle yet.  */
+#define CALL_INSN_OP(X) (CONSTANT_ADDRESS_P (X) \
+                         || (GET_CODE (X) == REG && X != arg_pointer_rtx\
+                             && ! (REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+                             && REGNO (X) <= LAST_VIRTUAL_REGISTER)))
+
+/* True if VALUE is a signed 16-bit number.  */
+#define SMALL_OPERAND(VALUE) 						\
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000)
+
+/* Constant which cannot be loaded in one instruction.  */
+#define LARGE_OPERAND(VALUE)						\
+  ((((VALUE) & ~0x0000ffff) != 0)					\
+   && (((VALUE) & ~0x0000ffff) != ~0x0000ffff)				\
+   && (((VALUE) & 0x0000ffff) != 0					\
+       || (((VALUE) & ~2147483647) != 0					\
+	   && ((VALUE) & ~2147483647) != ~2147483647)))
+	
+#define PREFERRED_RELOAD_CLASS(X,CLASS)					\
+  ((CLASS) != ALL_REGS							\
+   ? (CLASS)							\
+   : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT			\
+       || GET_MODE_CLASS (GET_MODE (X)) == MODE_COMPLEX_FLOAT)		\
+      ? (GR_REGS)			\
+      : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_INT			\
+	  || GET_MODE (X) == VOIDmode)					\
+	 ? (GR_REGS) : (CLASS))))
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Changed the starting frame offset to including the new link stuff */
+#define STARTING_FRAME_OFFSET						\
+   (crtl->outgoing_args_size + FIRST_PARM_OFFSET(FNDECL))
+
+/* The return address for the current frame is in r31 if this is a leaf
+   function.  Otherwise, it is on the stack.  It is at a variable offset
+   from sp/fp/ap, so we define a fake hard register rap which is a
+   poiner to the return address on the stack.  This always gets eliminated
+   during reload to be either the frame pointer or the stack pointer plus
+   an offset.  */
+
+#define RETURN_ADDR_RTX(count, frame)			\
+  microblaze_return_addr(count,frame)
+
+extern struct microblaze_frame_info current_frame_info;
+
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+ { ARG_POINTER_REGNUM,   GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, 					\
+   GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM},				\
+ { RETURN_ADDRESS_POINTER_REGNUM, 					\
+   GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM},			\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+ { FRAME_POINTER_REGNUM, GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			 \
+        (OFFSET) = microblaze_initial_elimination_offset ((FROM), (TO))
+
+#define ACCUMULATE_OUTGOING_ARGS        1
+
+#define FIRST_PARM_OFFSET(FNDECL)		(UNITS_PER_WORD)
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL)		0
+
+#define REG_PARM_STACK_SPACE(FNDECL)  		(MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE)	1
+
+#define STACK_BOUNDARY				32
+
+#define NUM_OF_ARGS				6
+
+#define GP_RETURN				(GP_REG_FIRST + MB_ABI_INT_RETURN_VAL_REGNUM)
+
+#define GP_ARG_FIRST				(GP_REG_FIRST + MB_ABI_FIRST_ARG_REGNUM)
+#define GP_ARG_LAST				(GP_REG_FIRST + MB_ABI_LAST_ARG_REGNUM)
+
+#define MAX_ARGS_IN_REGISTERS			MB_ABI_MAX_ARG_REGS
+
+#define LIBCALL_VALUE(MODE)						\
+  gen_rtx_REG (								\
+	   ((GET_MODE_CLASS (MODE) != MODE_INT				\
+	     || GET_MODE_SIZE (MODE) >= 4)				\
+	    ? (MODE)							\
+	    : SImode), GP_RETURN)
+
+/* 1 if N is a possible register number for a function value.
+   On the MicroBlaze, R2 R3 are the only register thus used.
+   Currently, R2 are only implemented  here (C has no complex type)  */
+
+#define FUNCTION_VALUE_REGNO_P(N)		((N) == GP_RETURN)
+
+#define FUNCTION_ARG_REGNO_P(N)			(((N) >= GP_ARG_FIRST && (N) <= GP_ARG_LAST))
+
+typedef struct microblaze_args
+{
+  int gp_reg_found;		/* whether a gp register was found yet */
+  int arg_number;		/* argument number */
+  int arg_words;		/* # total words the arguments take */
+  int fp_arg_words;		/* # words for FP args */
+  int last_arg_fp;		/* nonzero if last arg was FP (EABI only) */
+  int fp_code;			/* Mode of FP arguments */
+  int num_adjusts;		/* number of adjustments made */
+  /* Adjustments made to args pass in regs.  */
+  /* ??? The size is doubled to work around a bug in the code that sets the 
+     adjustments in function_arg.  */
+  rtx adjust[MAX_ARGS_IN_REGISTERS * 2];
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS)	\
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME)
+
+#define NO_PROFILE_COUNTERS			1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) { \
+  {                                        \
+    fprintf (FILE, "\tbrki\tr16,_mcount\n");           \
+  }                                                    \
+ }
+
+#define EXIT_IGNORE_STACK			1
+
+/* 4 insns + 2 words of data.  */
+#define TRAMPOLINE_SIZE				(6 * 4)
+
+#define TRAMPOLINE_ALIGNMENT			32
+
+#define REGNO_OK_FOR_BASE_P(regno)		microblaze_regno_ok_for_base_p ((regno), 1)
+
+#define REGNO_OK_FOR_INDEX_P(regno)		microblaze_regno_ok_for_base_p ((regno), 1)
+
+#ifndef REG_OK_STRICT
+#define REG_STRICT_FLAG				0
+#else
+#define REG_STRICT_FLAG				1
+#endif
+
+#define REG_OK_FOR_BASE_P(X)    \
+  microblaze_regno_ok_for_base_p (REGNO (X), REG_STRICT_FLAG)
+
+#define REG_OK_FOR_INDEX_P(X)   \
+  microblaze_regno_ok_for_base_p (REGNO (X), REG_STRICT_FLAG)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* Identify valid constant addresses.  Exclude if PIC addr which 
+   needs scratch register.  */
+#define CONSTANT_ADDRESS_P(X)						\
+  (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT 		                        \
+    || (GET_CODE (X) == CONST						\
+	&& ! (flag_pic && pic_address_needs_scratch (X))))
+
+/* Define this, so that when PIC, reload won't try to reload invalid
+   addresses which require two reload registers.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)  microblaze_legitimate_pic_operand (X)
+
+#define CASE_VECTOR_MODE			(SImode)
+
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR			1
+#endif
+
+#define MOVE_MAX				4
+#define MAX_MOVE_MAX				8
+
+#define SLOW_BYTE_ACCESS			1
+
+/* sCOND operations return 1.  */
+#define STORE_FLAG_VALUE			1
+
+#define SHIFT_COUNT_TRUNCATED			1
+
+/* This results in inefficient code for 64 bit to 32 conversions.
+   Something needs to be done about this.  Perhaps not use any 32 bit
+   instructions?  Perhaps use PROMOTE_MODE?  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE   SImode
+
+/* Mode should always be SImode */
+#define REGISTER_MOVE_COST(MODE, FROM, TO)			\
+  ( GR_REG_CLASS_P (FROM) && GR_REG_CLASS_P (TO) ? 2 		\
+   : (FROM) == ST_REGS && GR_REG_CLASS_P (TO) ? 4		\
+   : 12)
+
+#define MEMORY_MOVE_COST(MODE,CLASS,TO_P) \
+  (4 + memory_move_secondary_cost ((MODE), (CLASS), (TO_P)))
+
+#define BRANCH_COST(speed_p, predictable_p)	2
+
+/* Control the assembler format that we output.  */
+#define ASM_APP_ON " #APP\n"
+#define ASM_APP_OFF " #NO_APP\n"
+
+#define REGISTER_NAMES {						\
+  "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",		\
+  "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",	\
+  "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",	\
+  "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",	\
+  "rmsr", "$ap",  "$rap", "$frp" }
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "r0",	 0 + GP_REG_FIRST },					\
+  { "r1",	 1 + GP_REG_FIRST },					\
+  { "r2",	 2 + GP_REG_FIRST },					\
+  { "r3",	 3 + GP_REG_FIRST },					\
+  { "r4",	 4 + GP_REG_FIRST },					\
+  { "r5",	 5 + GP_REG_FIRST },					\
+  { "r6",	 6 + GP_REG_FIRST },					\
+  { "r7",	 7 + GP_REG_FIRST },					\
+  { "r8",	 8 + GP_REG_FIRST },					\
+  { "r9",	 9 + GP_REG_FIRST },					\
+  { "r10",	10 + GP_REG_FIRST },					\
+  { "r11",	11 + GP_REG_FIRST },					\
+  { "r12",	12 + GP_REG_FIRST },					\
+  { "r13",	13 + GP_REG_FIRST },					\
+  { "r14",	14 + GP_REG_FIRST },					\
+  { "r15",	15 + GP_REG_FIRST },					\
+  { "r16",	16 + GP_REG_FIRST },					\
+  { "r17",	17 + GP_REG_FIRST },					\
+  { "r18",	18 + GP_REG_FIRST },					\
+  { "r19",	19 + GP_REG_FIRST },					\
+  { "r20",	20 + GP_REG_FIRST },					\
+  { "r21",	21 + GP_REG_FIRST },					\
+  { "r22",	22 + GP_REG_FIRST },					\
+  { "r23",	23 + GP_REG_FIRST },					\
+  { "r24",	24 + GP_REG_FIRST },					\
+  { "r25",	25 + GP_REG_FIRST },					\
+  { "r26",	26 + GP_REG_FIRST },					\
+  { "r27",	27 + GP_REG_FIRST },					\
+  { "r28",	28 + GP_REG_FIRST },					\
+  { "r29",	29 + GP_REG_FIRST },					\
+  { "r30",	30 + GP_REG_FIRST },					\
+  { "r31",	31 + GP_REG_FIRST },					\
+  { "rmsr",     ST_REG}							\
+}
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) microblaze_print_operand_punct[CODE]
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* ASM_OUTPUT_ALIGNED_COMMON and ASM_OUTPUT_ALIGNED_LOCAL
+
+   Unfortunately, we still need to set the section explicitly. Somehow,
+   our binutils assign .comm and .lcomm variables to the "current" section 
+   in the assembly file, rather than where they implicitly belong. We need to
+   remove this explicit setting in GCC when binutils can understand sections
+   better.  */
+#undef	ASM_OUTPUT_ALIGNED_COMMON
+#define	ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) > 0 && (SIZE) <= INT_MAX					\
+      && (int) (SIZE) <= microblaze_section_threshold			\
+      && TARGET_XLGPOPT)						\
+    {                                                                   \
+      switch_to_section (sbss_section);					\
+    }									\
+  else									\
+    {									\
+      switch_to_section (bss_section);					\
+    }                                                                   \
+  fprintf (FILE, "%s", COMMON_ASM_OP);                                  \
+  assemble_name ((FILE), (NAME));					\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+           (SIZE), (ALIGN) / BITS_PER_UNIT);                            \
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+} while (0)
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define	ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) > 0 && (SIZE) <= INT_MAX					\
+      && (int) (SIZE) <= microblaze_section_threshold			\
+      && TARGET_XLGPOPT)						\
+    {                                                                   \
+      switch_to_section (sbss_section);					\
+    }									\
+  else									\
+    {									\
+      switch_to_section (bss_section);					\
+    }                                                                   \
+  fprintf (FILE, "%s", LCOMMON_ASM_OP);                                 \
+  assemble_name ((FILE), (NAME));					\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+           (SIZE), (ALIGN) / BITS_PER_UNIT);                            \
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+} while (0)
+
+#define	ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+do {									\
+  ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL)                     \
+{                                                                       \
+}
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM))
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	   ".gpword",                                                   \
+	   LOCAL_LABEL_PREFIX, VALUE)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+do {									\
+  if (flag_pic == 2)                                               \
+    fprintf (STREAM, "\t%s\t%sL%d@GOTOFF\n",                            \
+	     ".gpword",                                                 \
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+  else                                                                  \
+    fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	     ".gpword",                                                 \
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+} while (0)
+
+#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+  fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+  fprintf (STREAM, "\t.space\t%lu\n", (SIZE))
+
+#define ASCII_DATA_ASM_OP		"\t.ascii\t"
+#define STRING_ASM_OP			"\t.asciz\t"
+
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT microblaze_asm_output_ident
+
+/* Default to -G 8 */
+#ifndef MICROBLAZE_DEFAULT_GVALUE
+#define MICROBLAZE_DEFAULT_GVALUE 8
+#endif
+
+/* Given a decl node or constant node, choose the section to output it in
+   and select that section.  */
+
+/* Store in OUTPUT a string (made with alloca) containing
+   an assembler-name for a local static variable named NAME.
+   LABELNO is an integer which is different for each call.  */
+#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO)			\
+( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10),			\
+  sprintf ((OUTPUT), "%s.%lu", (NAME), (unsigned long)(LABELNO)))
+
+/* How to start an assembler comment.
+   The leading space is important (the microblaze assembler requires it).  */
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START		" #"
+#endif
+
+#define BSS_VAR         1
+#define SBSS_VAR        2
+#define DATA_VAR        4
+#define SDATA_VAR       5
+#define RODATA_VAR      6
+#define SDATA2_VAR      7
+
+/* These definitions are used in with the shift_type flag in the rtl.  */
+#define SHIFT_CONST     1
+#define SHIFT_REG       2
+#define USE_ADDK        3
+
+/* Handle interrupt attribute.  */
+extern int interrupt_handler;
+extern int fast_interrupt;
+extern int save_volatiles;
+
+#define INTERRUPT_HANDLER_NAME "_interrupt_handler"
+#define FAST_INTERRUPT_NAME "_fast_interrupt"
+
+/* The following #defines are used in the headers files. Always retain these.  */
+
+/* Added for declaring size at the end of the function.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do {									\
+    if (!flag_inhibit_size_directive)					\
+      {									\
+        char label[256];						\
+	static int labelno;						\
+	labelno++;							\
+	ASM_GENERATE_INTERNAL_LABEL (label, "Lfe", labelno);		\
+        (*targetm.asm_out.internal_label) (FILE, "Lfe", labelno);	\
+	fprintf (FILE, "%s", SIZE_ASM_OP);				\
+	assemble_name (FILE, (FNAME));					\
+        fprintf (FILE, ",");						\
+	assemble_name (FILE, label);					\
+        fprintf (FILE, "-");						\
+	assemble_name (FILE, (FNAME));					\
+	putc ('\n', FILE);						\
+      }									\
+  } while (0)
+
+#define GLOBAL_ASM_OP			"\t.globl\t"
+#define TYPE_ASM_OP			"\t.type\t"
+#define SIZE_ASM_OP			"\t.size\t"
+#define COMMON_ASM_OP			"\t.comm\t"
+#define LCOMMON_ASM_OP			"\t.lcomm\t"
+
+#define MAX_OFILE_ALIGNMENT		(32768*8)
+
+#define TYPE_OPERAND_FMT        	"@%s"
+
+/* Write the extra assembler code needed to declare an object properly.  */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
+  do {									\
+    fprintf (FILE, "%s", TYPE_ASM_OP);			         	\
+    assemble_name (FILE, NAME);						\
+    putc (',', FILE);							\
+    fprintf (FILE, TYPE_OPERAND_FMT, "object");				\
+    putc ('\n', FILE);							\
+    size_directive_output = 0;						\
+    if (!flag_inhibit_size_directive && DECL_SIZE (DECL))		\
+      {									\
+	size_directive_output = 1;					\
+	fprintf (FILE, "%s", SIZE_ASM_OP);				\
+	assemble_name (FILE, NAME);					\
+	fprintf (FILE, "," HOST_WIDE_INT_PRINT_DEC "\n",		\
+	int_size_in_bytes (TREE_TYPE (DECL)));				\
+      }									\
+    microblaze_declare_object (FILE, NAME, "", ":\n", 0);			\
+  } while (0)
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
+do {									 \
+     const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
+     if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		 \
+         && ! AT_END && TOP_LEVEL					 \
+	 && DECL_INITIAL (DECL) == error_mark_node			 \
+	 && !size_directive_output)					 \
+       {								 \
+	 size_directive_output = 1;					 \
+	 fprintf (FILE, "%s", SIZE_ASM_OP);			         \
+	 assemble_name (FILE, name);					 \
+	 fprintf (FILE, "," HOST_WIDE_INT_PRINT_DEC "\n",		 \
+		  int_size_in_bytes (TREE_TYPE (DECL)));		 \
+       }								 \
+   } while (0)
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)                            \
+ do { fputc ( '\t', FILE);                                            \
+      assemble_name (FILE, LABEL1);                                   \
+      fputs ( " = ", FILE);                                           \
+      assemble_name (FILE, LABEL2);                                   \
+      fputc ( '\n', FILE);                                            \
+ } while (0)
+
+#define ASM_WEAKEN_LABEL(FILE,NAME) 					\
+ do { fputs ("\t.weakext\t", FILE);					\
+      assemble_name (FILE, NAME);					\
+      fputc ('\n', FILE);						\
+    } while (0)
+
+#define MAKE_DECL_ONE_ONLY(DECL)	(DECL_WEAK (DECL) = 1)
+#undef UNIQUE_SECTION_P
+#define UNIQUE_SECTION_P(DECL)		(DECL_ONE_ONLY (DECL))
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION        default_elf_asm_named_section
+
+/* Define the strings to put out for each section in the object file.  
+   
+   Note: For ctors/dtors, we want to give these sections the SHF_WRITE 
+   attribute to allow shared libraries to patch/resolve addresses into 
+   these locations.  On Microblaze, there is no concept of shared libraries 
+   yet, so this is for future use.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define READONLY_DATA_SECTION_ASM_OP    \
+                                "\t.rodata"
+#define BSS_SECTION_ASM_OP      "\t.bss"
+#define CTORS_SECTION_ASM_OP    "\t.section\t.ctors,\"aw\""
+#define DTORS_SECTION_ASM_OP    "\t.section\t.dtors,\"aw\""
+#define INIT_SECTION_ASM_OP     "\t.section\t.init,\"ax\""
+#define FINI_SECTION_ASM_OP     "\t.section\t.fini,\"ax\""
+
+#define SDATA_SECTION_ASM_OP	"\t.sdata"	/* Small RW initialized data   */
+#define SDATA2_SECTION_ASM_OP	"\t.sdata2"	/* Small RO initialized data   */
+#define SBSS_SECTION_ASM_OP     "\t.sbss"	/* Small RW uninitialized data */
+#define SBSS2_SECTION_ASM_OP    "\t.sbss2"	/* Small RO uninitialized data */
+
+/* We do this to save a few 10s of code space that would be taken up
+   by the call_FUNC () wrappers, used by the generic CRT_CALL_STATIC_FUNCTION
+   definition in crtstuff.c.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+    asm ( SECTION_OP "\n"                               \
+          "\tbrlid   r15, " #FUNC "\n\t nop\n"         \
+          TEXT_SECTION_ASM_OP);
+
+/* We need to group -lm as well, since some Newlib math functions 
+   reference __errno!  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+"%{!nostdlib: \
+%{pg:-start-group -lxilprofile -lgloss -lxil -lc -lm -end-group } \
+%{!pg:-start-group -lgloss -lxil -lc -lm -end-group }} "
+
+/* microblaze-unknown-elf target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#define STARTFILE_EXECUTABLE_SPEC   "crt0.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_XMDSTUB_SPEC      "crt1.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_BOOTSTRAP_SPEC    "crt2.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_NOVECTORS_SPEC    "crt3.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_CRTINIT_SPEC      "%{!pg: %{!mno-clearbss: crtinit.o%s} \
+%{mno-clearbss: sim-crtinit.o%s}} \
+%{pg: %{!mno-clearbss: pgcrtinit.o%s} %{mno-clearbss: sim-pgcrtinit.o%s}}"
+
+#define STARTFILE_DEFAULT_SPEC      STARTFILE_EXECUTABLE_SPEC
+
+#undef SUBTARGET_EXTRA_SPECS
+#define	SUBTARGET_EXTRA_SPECS						\
+  { "startfile_executable",	STARTFILE_EXECUTABLE_SPEC },		\
+  { "startfile_xmdstub",	STARTFILE_XMDSTUB_SPEC },		\
+  { "startfile_bootstrap",	STARTFILE_BOOTSTRAP_SPEC },		\
+  { "startfile_novectors",	STARTFILE_NOVECTORS_SPEC },		\
+  { "startfile_crtinit",        STARTFILE_CRTINIT_SPEC },               \
+  { "startfile_default",	STARTFILE_DEFAULT_SPEC },
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC  "\
+%{Zxl-mode-executable   : %(startfile_executable)  ; \
+  mxl-mode-executable   : %(startfile_executable)  ; \
+  Zxl-mode-xmdstub      : %(startfile_xmdstub)     ; \
+  mxl-mode-xmdstub      : %(startfile_xmdstub)     ; \
+  Zxl-mode-bootstrap    : %(startfile_bootstrap)   ; \
+  mxl-mode-bootstrap    : %(startfile_bootstrap)   ; \
+  Zxl-mode-novectors    : %(startfile_novectors)   ; \
+  mxl-mode-novectors    : %(startfile_novectors)   ; \
+  Zxl-mode-xilkernel    : %(startfile_xilkernel)   ; \
+  mxl-mode-xilkernel    : %(startfile_xilkernel)   ; \
+                        : %(startfile_default)       \
+} \
+%(startfile_crtinit)"
diff --git a/gcc-4.9/gcc/config/microblaze/microblaze.md b/gcc-4.9/gcc/config/microblaze/microblaze.md
new file mode 100644
index 000000000..815d6b5d7
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/microblaze.md
@@ -0,0 +1,2264 @@
+;; microblaze.md -- Machine description for Xilinx MicroBlaze processors.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+;; Contributed by Michael Eager <eager@eagercon.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(include "constraints.md")
+(include "predicates.md")
+
+;;----------------------------------------------------
+;; Constants
+;;----------------------------------------------------
+(define_constants [
+  (R_SP        1)       ;; Stack pointer reg
+  (R_SR       15)       ;; Sub-routine return addr reg
+  (R_IR       14)       ;; Interrupt return addr reg
+  (R_DR       16)       ;; Debug trap return addr reg
+  (R_ER       17)       ;; Exception return addr reg
+  (R_TMP      18)       ;; Assembler temporary reg
+  (R_GOT      20)       ;; GOT ptr reg
+  (MB_PIPE_3   0)       ;; Microblaze 3-stage pipeline 
+  (MB_PIPE_5   1)       ;; Microblaze 5-stage pipeline 
+  (UNSPEC_SET_GOT       101)    ;;
+  (UNSPEC_GOTOFF        102)    ;; GOT offset
+  (UNSPEC_PLT           103)    ;; jump table
+  (UNSPEC_CMP		104)    ;; signed compare
+  (UNSPEC_CMPU		105)    ;; unsigned compare
+  (UNSPEC_TLS           106)    ;; jump table
+])
+
+
+;;----------------------------------------------------
+;; Instruction Attributes
+;;----------------------------------------------------
+
+;; Classification of each insn.
+;; branch	conditional branch
+;; jump		unconditional jump
+;; call		unconditional call
+;; load		load instruction(s)
+;; store	store instruction(s)
+;; move		data movement within same register set
+;; arith	integer arithmetic instruction
+;; darith	double precision integer arithmetic instructions
+;; imul		integer multiply
+;; idiv		integer divide
+;; icmp		integer compare
+;; Xfadd		floating point add/subtract
+;; Xfmul		floating point multiply
+;; Xfmadd	floating point multiply-add
+;; Xfdiv		floating point divide
+;; Xfabs		floating point absolute value
+;; Xfneg		floating point negation
+;; Xfcmp		floating point compare
+;; Xfcvt		floating point convert
+;; Xfsqrt	floating point square root
+;; multi	multiword sequence (or user asm statements)
+;; nop		no operation
+;; bshift 	Shift operations
+
+(define_attr "type"
+  "unknown,branch,jump,call,load,store,move,arith,darith,imul,idiv,icmp,multi,nop,no_delay_arith,no_delay_load,no_delay_store,no_delay_imul,no_delay_move,bshift,fadd,frsub,fmul,fdiv,fcmp,fsl,fsqrt,fcvt,trap"
+  (const_string "unknown"))
+
+;; Main data type used by the insn
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,SF,DF" (const_string "unknown"))
+
+;; # instructions (4 bytes each)
+(define_attr "length" "" (const_int 4))
+
+(define_code_iterator any_return [return simple_return])
+
+;; <optab> expands to the name of the optab for a particular code.
+(define_code_attr optab [(return "return")
+			 (simple_return "simple_return")])
+
+
+;;----------------------------------------------------
+;; Attribute describing the processor.  
+;;----------------------------------------------------
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+;; whether or not generating calls to position independent functions
+(define_attr "abicalls" "no,yes"
+  (const (symbol_ref "microblaze_abicalls_attr")))
+
+;;----------------------------------------------------------------
+;; Microblaze DFA Pipeline description
+;;----------------------------------------------------------------
+                  
+;;-----------------------------------------------------------------
+/*
+   This is description of pipeline hazards based on DFA.  The
+   following constructions can be used for this:
+
+   o define_cpu_unit string [string]) describes a cpu functional unit
+     (separated by comma).
+
+     1st operand: Names of cpu function units.
+     2nd operand: Name of automaton (see comments for
+     DEFINE_AUTOMATON).
+
+     All define_reservations and define_cpu_units should have unique
+     names which can not be "nothing".
+
+   o (exclusion_set string string) means that each CPU function unit
+     in the first string can not be reserved simultaneously with each
+     unit whose name is in the second string and vise versa.  CPU
+     units in the string are separated by commas. For example, it is
+     useful for description CPU with fully pipelined floating point
+     functional unit which can execute simultaneously only single
+     floating point insns or only double floating point insns.
+
+   o (presence_set string string) means that each CPU function unit in
+     the first string can not be reserved unless at least one of units
+     whose names are in the second string is reserved.  This is an
+     asymmetric relation.  CPU units in the string are separated by
+     commas.  For example, it is useful for description that slot1 is
+     reserved after slot0 reservation for a VLIW processor.
+
+   o (absence_set string string) means that each CPU function unit in
+     the first string can not be reserved only if each unit whose name
+     is in the second string is not reserved.  This is an asymmetric
+     relation (actually exclusion set is analogous to this one but it
+     is symmetric).  CPU units in the string are separated by commas.
+     For example, it is useful for description that slot0 can not be
+     reserved after slot1 or slot2 reservation for a VLIW processor.
+
+   o (define_bypass number out_insn_names in_insn_names) names bypass with
+     given latency (the first number) from insns given by the first
+     string (see define_insn_reservation) into insns given by the
+     second string.  Insn names in the strings are separated by
+     commas.
+
+   o (define_automaton string) describes names of an automaton
+     generated and used for pipeline hazards recognition.  The names
+     are separated by comma.  Actually it is possibly to generate the
+     single automaton but unfortunately it can be very large.  If we
+     use more one automata, the summary size of the automata usually
+     is less than the single one.  The automaton name is used in
+     define_cpu_unit.  All automata should have unique names.
+
+   o (define_reservation string string) names reservation (the first
+     string) of cpu functional units (the 2nd string).  Sometimes unit
+     reservations for different insns contain common parts.  In such
+     case, you describe common part and use one its name (the 1st
+     parameter) in regular expression in define_insn_reservation.  All
+     define_reservations, define results and define_cpu_units should
+     have unique names which can not be "nothing".
+
+   o (define_insn_reservation name default_latency condition regexpr)
+     describes reservation of cpu functional units (the 3nd operand)
+     for instruction which is selected by the condition (the 2nd
+     parameter).  The first parameter is used for output of debugging
+     information.  The reservations are described by a regular
+     expression according the following syntax:
+
+       regexp = regexp "," oneof
+              | oneof
+
+       oneof = oneof "|" allof
+             | allof
+
+       allof = allof "+" repeat
+             | repeat
+
+       repeat = element "*" number
+              | element
+
+       element = cpu_function_name
+               | reservation_name
+               | result_name
+               | "nothing"
+               | "(" regexp ")"
+
+       1. "," is used for describing start of the next cycle in
+          reservation.
+
+       2. "|" is used for describing the reservation described by the
+          first regular expression *or* the reservation described by
+          the second regular expression *or* etc.
+
+       3. "+" is used for describing the reservation described by the
+          first regular expression *and* the reservation described by
+          the second regular expression *and* etc.
+
+       4. "*" is used for convenience and simply means sequence in
+          which the regular expression are repeated NUMBER times with
+          cycle advancing (see ",").
+
+       5. cpu function unit name which means reservation.
+
+       6. reservation name -- see define_reservation.
+
+       7. string "nothing" means no units reservation.
+
+*/
+;;-----------------------------------------------------------------
+
+
+;;----------------------------------------------------------------
+;; Microblaze 5-stage pipeline description (v5.00.a and later)
+;;----------------------------------------------------------------                 
+                    
+(define_automaton   "mbpipe_5")
+(define_cpu_unit    "mb_issue,mb_iu,mb_wb,mb_fpu,mb_fpu_2,mb_mul,mb_mul_2,mb_div,mb_div_2,mb_bs,mb_bs_2" "mbpipe_5")
+
+(define_insn_reservation "mb-integer" 1 
+  (and (eq_attr "type" "branch,jump,call,arith,darith,icmp,nop,no_delay_arith")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu,mb_wb")
+
+(define_insn_reservation "mb-special-move" 2
+  (and (eq_attr "type" "move")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu*2,mb_wb")
+
+(define_insn_reservation "mb-mem-load" 3
+  (and (eq_attr "type" "load,no_delay_load")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu,mb_wb")
+
+(define_insn_reservation "mb-mem-store" 1
+  (and (eq_attr "type" "store,no_delay_store")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu,mb_wb")
+
+(define_insn_reservation "mb-mul" 3
+  (and (eq_attr "type" "imul,no_delay_imul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_mul,mb_mul_2*2,mb_wb")
+
+(define_insn_reservation "mb-div" 34            
+  (and (eq_attr "type" "idiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+    "mb_issue,mb_div,mb_div_2*33,mb_wb")
+
+(define_insn_reservation "mb-bs" 2 
+  (and (eq_attr "type" "bshift")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+   "mb_issue,mb_bs,mb_bs_2,mb_wb")
+
+(define_insn_reservation "mb-fpu-add-sub-mul" 6
+  (and (eq_attr "type" "fadd,frsub,fmul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*5,mb_wb")
+
+(define_insn_reservation "mb-fpu-fcmp" 3
+  (and (eq_attr "type" "fcmp")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu*2,mb_wb")
+
+(define_insn_reservation "mb-fpu-div" 30
+  (and (eq_attr "type" "fdiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*29,mb_wb")
+
+(define_insn_reservation "mb-fpu-sqrt" 30
+  (and (eq_attr "type" "fsqrt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*29,mb_wb")
+
+(define_insn_reservation "mb-fpu-fcvt" 4
+  (and (eq_attr "type" "fcvt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*3,mb_wb")
+
+;;----------------------------------------------------------------
+;; Microblaze 3-stage pipeline description (for v4.00.a and earlier)
+;;----------------------------------------------------------------
+
+(define_automaton   "mbpipe_3")
+(define_cpu_unit    "mb3_iu" "mbpipe_3")
+
+(define_insn_reservation "mb3-integer" 1 
+  (and (eq_attr "type" "branch,jump,call,arith,darith,icmp,nop,no_delay_arith")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-special-move" 2
+  (and (eq_attr "type" "move")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu*2")
+
+(define_insn_reservation "mb3-mem-load" 2
+  (and (eq_attr "type" "load,no_delay_load")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-mem-store" 1
+  (and (eq_attr "type" "store,no_delay_store")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-mul" 3
+  (and (eq_attr "type" "imul,no_delay_imul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-div" 34            
+  (and (eq_attr "type" "idiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+    "mb3_iu")
+
+(define_insn_reservation "mb3-bs" 2 
+  (and (eq_attr "type" "bshift")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+   "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-add-sub-mul" 6
+  (and (eq_attr "type" "fadd,frsub,fmul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-fcmp" 3
+  (and (eq_attr "type" "fcmp")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-div" 30
+  (and (eq_attr "type" "fdiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-sqrt" 30
+  (and (eq_attr "type" "fsqrt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-fcvt" 4
+  (and (eq_attr "type" "fcvt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(automata_option "v")
+(automata_option "time")
+(automata_option "progress")
+
+(define_insn "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_REORDER"
+  "swapb %0, %1"
+)
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_REORDER"
+  "swapb %0, %1
+   swaph %0, %0"
+)
+
+;;----------------------------------------------------------------
+;; Microblaze delay slot description
+;;----------------------------------------------------------------
+(define_delay (eq_attr "type" "branch,call,jump")
+  [(and (eq_attr "type" "!branch,call,jump,icmp,multi,no_delay_arith,no_delay_load,no_delay_store,no_delay_imul,no_delay_move,darith") 
+        (ior (not (match_test "microblaze_no_unsafe_delay"))
+             (eq_attr "type" "!fadd,frsub,fmul,fdiv,fcmp,store,load")
+             ))
+  (nil) (nil)])
+
+
+;;----------------------------------------------------------------
+;; Microblaze FPU
+;;----------------------------------------------------------------
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (plus:SF (match_operand:SF 1 "register_operand" "d")
+                 (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "fadd\t%0,%1,%2"
+  [(set_attr "type"     "fadd")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (minus:SF (match_operand:SF 1 "register_operand" "d")
+                  (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "frsub\t%0,%2,%1"
+  [(set_attr "type"     "frsub")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (mult:SF (match_operand:SF 1 "register_operand" "d")
+                 (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "fmul\t%0,%1,%2"
+  [(set_attr "type"     "fmul")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (div:SF (match_operand:SF 1 "register_operand" "d")
+                (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "fdiv\t%0,%2,%1"
+  [(set_attr "type"     "fdiv")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (sqrt:SF (match_operand:SF 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT_SQRT"
+  "fsqrt\t%0,%1"
+  [(set_attr "type"     "fsqrt")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (float:SF (match_operand:SI 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT_CONVERT"
+  "flt\t%0,%1"
+  [(set_attr "type"     "fcvt")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (fix:SI (match_operand:SF 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT_CONVERT"
+  "fint\t%0,%1"
+  [(set_attr "type"     "fcvt")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+;;----------------------------------------------------------------
+;; Add
+;;----------------------------------------------------------------
+
+;; Add 2 SImode integers [ src1 = reg ; src2 = arith ; dest = reg ]
+;; Leave carry as is
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%dJ,dJ,dJ")
+		 (match_operand:SI 2 "arith_plus_operand" "d,I,i")))]
+  ""
+  "@
+   addk\t%0,%z1,%2
+   addik\t%0,%z1,%2
+   addik\t%0,%z1,%2"
+  [(set_attr "type"	"arith,arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"4,4,8")])
+
+;;----------------------------------------------------------------
+;; Double Precision Additions
+;;----------------------------------------------------------------
+
+;; reg_DI_dest = reg_DI_src1 + DI_src2
+
+;; Adding 2 DI operands in register or reg/imm
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(plus:DI (match_operand:DI 1 "register_operand" "%d,d,d")
+		 (match_operand:DI 2 "arith_operand32" "d,P,N")))]
+  ""
+  "@
+  add\t%L0,%L1,%L2\;addc\t%M0,%M1,%M2
+  addi\t%L0,%L1,%2\;addc\t%M0,%M1,r0
+  addi\t%L0,%L1,%2\;addc\t%M0,%M1,r0\;addi\t%M0,%M0,-1"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"8,8,12")])
+
+;;----------------------------------------------------------------
+;; Subtraction
+;;----------------------------------------------------------------
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(minus:SI (match_operand:SI 1 "arith_operand" "d,d")
+		  (match_operand:SI 2 "arith_operand" "d,n")))]
+  ""
+  "@
+   rsubk\t%0,%2,%z1
+   addik\t%0,%z1,-%2"
+  [(set_attr "type"	"arith,no_delay_arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4,8")])
+
+
+;;----------------------------------------------------------------
+;; Double Precision Subtraction
+;;----------------------------------------------------------------
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+	(minus:DI (match_operand:DI 1 "register_operand" "d")
+		  (match_operand:DI 2 "arith_operand32" "d")))]
+  ""
+  "@
+   rsub\t%L0,%L2,%L1\;rsubc\t%M0,%M2,%M1"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"8")])
+
+
+;;----------------------------------------------------------------
+;; Multiplication
+;;----------------------------------------------------------------
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(mult:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		 (match_operand:SI 2 "arith_operand" "d,I,i")))]
+  "!TARGET_SOFT_MUL"
+  "@
+  mul\t%0,%1,%2
+  muli\t%0,%1,%2
+  muli\t%0,%1,%2"
+  [(set_attr "type"	"imul,imul,no_delay_imul")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4,4,8")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (mult:DI
+         (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
+         (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mul\t%L0,%1,%2\;mulh\t%M0,%1,%2"
+  [(set_attr "type"     "no_delay_arith")
+   (set_attr "mode"     "DI")
+   (set_attr "length"   "8")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (mult:DI
+         (zero_extend:DI (match_operand:SI 1 "register_operand" "d"))
+         (zero_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mul\t%L0,%1,%2\;mulhu\t%M0,%1,%2"
+  [(set_attr "type"     "no_delay_arith")
+   (set_attr "mode"     "DI")
+   (set_attr "length"   "8")])
+
+(define_insn "usmulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (mult:DI
+         (zero_extend:DI (match_operand:SI 1 "register_operand" "d"))
+         (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mul\t%L0,%1,%2\;mulhsu\t%M0,%2,%1"
+  [(set_attr "type"     "no_delay_arith")
+   (set_attr "mode"     "DI")
+   (set_attr "length"   "8")])
+
+(define_insn "*smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"  "d"))
+                   (sign_extend:DI (match_operand:SI 2 "register_operand"  "d")))
+          (const_int 32))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mulh\t%0,%1,%2"
+  [(set_attr "type"     "imul")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+(define_insn "*umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                            "=d")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"  "d"))
+                   (zero_extend:DI (match_operand:SI 2 "register_operand"  "d"))
+)
+          (const_int 32))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mulhu\t%0,%1,%2"
+  [(set_attr "type"     "imul")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+(define_insn "*usmulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                            "=d")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"  "d"))
+                   (sign_extend:DI (match_operand:SI 2 "register_operand"  "d"))
+)
+          (const_int 32))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mulhsu\t%0,%2,%1"
+  [(set_attr "type"     "imul")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+
+;;----------------------------------------------------------------
+;; Division and remainder
+;;----------------------------------------------------------------
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(div:SI (match_operand:SI 1 "register_operand" "d")
+                (match_operand:SI 2 "register_operand" "d")))
+  ]
+  "(!TARGET_SOFT_DIV) || (TARGET_BARREL_SHIFT && TARGET_SMALL_DIVIDES)"
+  {
+    if (TARGET_SOFT_DIV && TARGET_BARREL_SHIFT && TARGET_SMALL_DIVIDES) 
+      { 
+        microblaze_expand_divide (operands);
+        DONE;
+      } 
+    else if (!TARGET_SOFT_DIV) 
+      {
+        emit_insn (gen_divsi3_internal (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  }     
+)
+
+
+(define_insn "divsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(div:SI (match_operand:SI 1 "register_operand" "d")
+		(match_operand:SI 2 "register_operand" "d")))
+  ]
+  "!TARGET_SOFT_DIV"
+  "idiv\t%0,%2,%1"
+  [(set_attr "type"	"idiv")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(udiv:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))
+  ]
+  "!TARGET_SOFT_DIV"
+  "idivu\t%0,%2,%1"
+  [(set_attr "type"	"idiv")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+
+;;----------------------------------------------------------------
+;; Negation and one's complement
+;;----------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "rsubk\t%0,%1,r0"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(neg:DI (match_operand:DI 1 "register_operand" "d")))]
+  ""
+  "rsub\t%L0,%L1,r0\;rsubc\t%M0,%M1,r0"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"8")])
+
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(not:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "xori\t%0,%1,-1"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "*one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(not:DI (match_operand:DI 1 "register_operand" "d")))]
+  ""
+  "nor\t%M0,r0,%M1\;nor\t%L0,r0,%L1"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"    "8")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(not:DI (match_operand:DI 1 "register_operand" "")))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))"
+
+  [(set (subreg:SI (match_dup 0) 0) (not:SI (subreg:SI (match_dup 1) 0)))
+  (set (subreg:SI (match_dup 0) 4) (not:SI (subreg:SI (match_dup 1) 4)))]
+  "")
+
+
+;;----------------------------------------------------------------
+;; Logical
+;;----------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+	(and:SI (match_operand:SI 1 "arith_operand" "%d,d,d,d")
+		(match_operand:SI 2 "arith_operand" "d,I,i,M")))]
+  ""
+  "@
+   and\t%0,%1,%2
+   andi\t%0,%1,%2 #and1
+   andi\t%0,%1,%2 #and2
+   andi\t%0,%1,%2 #and3"
+  [(set_attr "type"	"arith,arith,no_delay_arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI,SI")
+  (set_attr "length"	"4,8,8,8")])
+
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+	(ior:SI (match_operand:SI 1 "arith_operand" "%d,d,d,d")
+		(match_operand:SI 2 "arith_operand" "d,I,M,i")))]
+  ""
+  "@
+   or\t%0,%1,%2
+   ori\t%0,%1,%2
+   ori\t%0,%1,%2
+   ori\t%0,%1,%2" 
+  [(set_attr "type"	"arith,no_delay_arith,no_delay_arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI,SI")
+  (set_attr "length"	"4,8,8,8")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(xor:SI (match_operand:SI 1 "arith_operand" "%d,d,d")
+		(match_operand:SI 2 "arith_operand" "d,I,i")))]
+  ""
+  "@
+   xor\t%0,%1,%2
+   xori\t%0,%1,%2
+   xori\t%0,%1,%2"
+  [(set_attr "type"	"arith,arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"4,8,8")])
+
+;;----------------------------------------------------------------
+;; Zero extension
+;;----------------------------------------------------------------
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "@
+  andi\t%0,%1,0xffff
+  lhu%i1\t%0,%1
+  lhu%i1\t%0,%1"
+  [(set_attr "type"	"no_delay_arith,load,no_delay_load")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"8,4,8")])
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "@
+  andi\t%0,%1,0x00ff
+  lbu%i1\t%0,%1
+  lbu%i1\t%0,%1"
+  [(set_attr "type"	"arith,load,no_delay_load")
+  (set_attr "mode"	"HI")
+  (set_attr "length"	"4,4,8")])
+  
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "@
+  andi\t%0,%1,0x00ff
+  lbu%i1\t%0,%1
+  lbu%i1\t%0,%1"
+  [(set_attr "type"	"arith,load,no_delay_load")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"4,4,8")])
+
+;;----------------------------------------------------------------
+;; Sign extension
+;;----------------------------------------------------------------
+
+;; basic Sign Extend Operations
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "d")))]
+  ""
+  "sext8\t%0,%1"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "sext16\t%0,%1"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+;; Those for integer source operand are ordered
+;; widest source type first.
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  { 
+     if (which_alternative == 0)
+       output_asm_insn ("addk\t%L0,r0,%1", operands);
+     else
+       output_asm_insn ("lw%i1\t%L0,%1", operands);
+
+     output_asm_insn ("add\t%M0,%L0,%L0", operands);
+     output_asm_insn ("addc\t%M0,r0,r0", operands);
+     output_asm_insn ("beqi\t%M0,.+8", operands);
+     return "addi\t%M0,r0,0xffffffff";
+  }
+  [(set_attr "type"	"multi,multi,multi")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"20,20,20")])
+
+;;----------------------------------------------------------------
+;; Data movement
+;;----------------------------------------------------------------
+
+;; 64-bit integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  {
+    /* If operands[1] is a constant address illegal for pic, then we need to
+       handle it just like microblaze_legitimize_address does.  */
+    if (flag_pic && pic_address_needs_scratch (operands[1]))
+    {
+        rtx temp = force_reg (DImode, XEXP (XEXP (operands[1], 0), 0));
+        rtx temp2 = XEXP (XEXP (operands[1], 0), 1);
+        emit_move_insn (operands[0], gen_rtx_PLUS (DImode, temp, temp2));
+        DONE;
+    }
+
+
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], DImode)
+        && !register_operand (operands[1], DImode)
+        && (((GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+	       && operands[1] != CONST0_RTX (DImode))))
+    {
+
+      rtx temp = force_reg (DImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+  }
+)
+
+
+
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,d,d,R,o")
+	(match_operand:DI 1 "general_operand"      " d,i,J,R,o,d,d"))]
+  ""
+  { 
+    switch (which_alternative)
+    {
+      case 0:
+        return "addk\t%0,%1\n\taddk\t%D0,%d1";
+      case 1:
+	return "addik\t%M0,r0,%h1\n\taddik\t%L0,r0,%j1 #li => la";
+      case 2:
+	  return "addk\t%0,r0,r0\n\taddk\t%D0,r0,r0";
+      case 3:
+      case 4:
+        if (reg_mentioned_p (operands[0], operands[1]))
+          return "lwi\t%D0,%o1\n\tlwi\t%0,%1";
+	else
+	  return "lwi\t%0,%1\n\tlwi\t%D0,%o1";
+      case 5:
+      case 6:
+        return "swi\t%1,%0\n\tswi\t%D1,%o0";
+    }
+    return "unreachable";
+  }
+  [(set_attr "type"	"no_delay_move,no_delay_arith,no_delay_arith,no_delay_load,no_delay_load,no_delay_store,no_delay_store")
+  (set_attr "mode"	"DI")
+  (set_attr "length"   "8,8,8,8,12,8,12")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1])) 
+   && (REGNO(operands[0]) == (REGNO(operands[1]) + 1))"
+
+  [(set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))
+  (set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1])) 
+   && (REGNO (operands[0]) != (REGNO (operands[1]) + 1))"
+
+  [(set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))
+  (set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))]
+  "")
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  {
+    if (microblaze_expand_move (SImode, operands)) DONE;
+  }
+)
+
+;; Added for status registers
+(define_insn "movsi_status"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,z")
+        (match_operand:SI 1 "register_operand" "z,d,d"))]
+  "microblaze_is_interrupt_variant ()"
+  "@
+	mfs\t%0,%1  #mfs
+	addk\t%0,%1,r0 #add movsi
+	mts\t%0,%1  #mts"	
+  [(set_attr "type" "move")
+  (set_attr "mode" "SI")
+  (set_attr "length" "12")])
+
+;; This move will be not be moved to delay slot.	
+(define_insn "*movsi_internal3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d")
+	(match_operand:SI 1 "immediate_operand" "J,I,Mnis"))]
+  "(register_operand (operands[0], SImode) && 
+           (GET_CODE (operands[1]) == CONST_INT && 
+                 (INTVAL (operands[1]) <= 32767 && INTVAL (operands[1]) >= -32768)))"  
+  "@
+   addk\t%0,r0,r0
+   addik\t%0,r0,%1\t# %X1
+   addik\t%0,r0,%1\t# %X1"
+  [(set_attr "type"	"arith,arith,no_delay_arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+;; This move may be used for PLT label operand
+(define_insn "*movsi_internal5_pltop"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(match_operand:SI 1 "call_insn_operand" ""))]
+  "(register_operand (operands[0], Pmode) && 
+           PLT_ADDR_P (operands[1]))"
+  { 
+     gcc_unreachable ();
+  }
+  [(set_attr "type"	"load")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "*movsi_internal2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,   d,d,R,m")
+	(match_operand:SI 1 "move_src_operand"         " d,I,Mnis,R,m,dJ,dJ"))]
+  ""
+  "@
+   addk\t%0,%1,r0
+   addik\t%0,r0,%1\t# %X1
+   addik\t%0,%a1
+   lw%i1\t%0,%1
+   lw%i1\t%0,%1
+   sw%i0\t%z1,%0
+   sw%i0\t%z1,%0"
+  [(set_attr "type"	"load,load,no_delay_load,load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4,4,8,4,8,4,8")])
+
+
+;; 16-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  {
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], HImode)
+        && !register_operand (operands[1], HImode)
+        && ((GET_CODE (operands[1]) != CONST_INT
+  	    || INTVAL (operands[1]) != 0)))
+    {
+        rtx temp = force_reg (HImode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+(define_insn "*movhi_internal2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,R,m")
+	(match_operand:HI 1 "general_operand"       "I,d,R,m,dJ,dJ"))]
+  ""
+  "@
+   addik\t%0,r0,%1\t# %X1
+   addk\t%0,%1,r0
+   lhui\t%0,%1
+   lhui\t%0,%1
+   sh%i0\t%z1,%0
+   sh%i0\t%z1,%0"
+  [(set_attr "type"	"arith,move,load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"	"HI")
+  (set_attr "length"	"4,4,4,8,8,8")])
+
+;; 8-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  {
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], QImode)
+        && !register_operand (operands[1], QImode)
+        && ((GET_CODE (operands[1]) != CONST_INT
+            || INTVAL (operands[1]) != 0)))
+    {
+        rtx temp = force_reg (QImode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+(define_insn "*movqi_internal2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+	(match_operand:QI 1 "general_operand"       "J,I,d,R,m,dJ,dJ"))]
+  ""
+  "@
+   addk\t%0,r0,%z1
+   addik\t%0,r0,%1\t# %X1
+   addk\t%0,%1,r0
+   lbu%i1\t%0,%1
+   lbu%i1\t%0,%1
+   sb%i0\t%z1,%0
+   sbi\t%z1,%0"
+  [(set_attr "type"	"arith,arith,move,load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"	"QI")
+  (set_attr "length"	"4,4,8,4,8,4,8")])
+
+;; Block moves, see microblaze.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+ 
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand")
+		   (match_operand:BLK 1 "general_operand"))
+	      (use (match_operand:SI 2 ""))
+	      (use (match_operand:SI 3 "const_int_operand"))])]
+  ""
+  {
+    if (microblaze_expand_block_move (operands[0], operands[1], 
+				      operands[2], operands[3]))
+        DONE;
+    else  
+        FAIL;
+  }
+)
+
+;;Load and store reverse
+(define_insn "movsi4_rev"
+  [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Q")
+        (bswap:SI (match_operand:SF 1 "reg_or_mem_operand" "Q,r")))]
+  "TARGET_REORDER"
+  "@
+   lwr\t%0,%y1,r0
+   swr\t%1,%y0,r0"
+  [(set_attr "type"     "load,store")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4,4")])
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+        (match_operand:SF 1 "general_operand" ""))]
+  ""
+  {
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], SFmode)
+        && !register_operand (operands[1], SFmode)
+        && ( ((GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+                 && operands[1] != CONST0_RTX (SFmode))))
+    {
+        rtx temp = force_reg (SFmode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+;; Applies to both TARGET_SOFT_FLOAT and TARGET_HARD_FLOAT
+;;
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+        (match_operand:SF 1 "general_operand" "G,d,R,F,m,d,d"))]
+  "(register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode)
+       || operands[1] == CONST0_RTX (SFmode))"
+  "@
+   addk\t%0,r0,r0
+   addk\t%0,%1,r0
+   lw%i1\t%0,%1
+   addik\t%0,r0,%F1
+   lw%i1\t%0,%1
+   sw%i0\t%z1,%0
+   swi\t%z1,%0"
+  [(set_attr "type"     "move,no_delay_load,load,no_delay_load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4,4,4,4,4,4,4")])
+
+;; 64-bit floating point moves
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+        (match_operand:DF 1 "general_operand" ""))]
+  ""
+  {
+    if (flag_pic == 2) {
+      if (GET_CODE (operands[1]) == MEM 
+          && !microblaze_legitimate_address_p (DFmode, XEXP (operands[1],0), 0))
+      {
+        rtx ptr_reg;
+        rtx result;
+        ptr_reg = force_reg (Pmode, XEXP (operands[1],0));
+        result = gen_rtx_MEM (DFmode, ptr_reg);
+        emit_move_insn (operands[0], result);
+        DONE;
+      }
+    }
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], DFmode)
+        && !register_operand (operands[1], DFmode)
+        && (((GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+                 && operands[1] != CONST0_RTX (DFmode))))
+    {
+        rtx temp = force_reg (DFmode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+;; movdf_internal
+;; Applies to both TARGET_SOFT_FLOAT and TARGET_HARD_FLOAT
+;;
+(define_insn "*movdf_internal"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,d,d,o")
+        (match_operand:DF 1 "general_operand" "dG,o,F,T,d"))]
+  ""
+  {
+    switch (which_alternative)
+    {
+      case 0:
+	return "addk\t%0,r0,r0\n\taddk\t%D0,r0,r0";
+      case 1:
+      case 3:
+	if (reg_mentioned_p (operands[0], operands[1]))
+          return "lwi\t%D0,%o1\n\tlwi\t%0,%1";
+        else
+	  return "lwi\t%0,%1\n\tlwi\t%D0,%o1";
+      case 2:
+      {
+	return "addik\t%0,r0,%h1 \n\taddik\t%D0,r0,%j1 #Xfer Lo";
+      }
+      case 4:
+	return "swi\t%1,%0\n\tswi\t%D1,%o0";
+    }
+    gcc_unreachable ();
+  }
+  [(set_attr "type"     "no_delay_move,no_delay_load,no_delay_load,no_delay_load,no_delay_store")
+  (set_attr "mode"      "DF")
+  (set_attr "length"    "4,8,8,16,8")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && (REGNO (operands[0]) == (REGNO (operands[1]) + 1))"
+  [(set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))
+  (set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))]
+  "")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && (REGNO (operands[0]) != (REGNO (operands[1]) + 1))"
+  [(set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))
+  (set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))]
+  "")
+
+;;----------------------------------------------------------------
+;; Shifts
+;;----------------------------------------------------------------
+
+;;----------------------------------------------------------------
+;; 32-bit left shifts
+;;----------------------------------------------------------------
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  { 
+    /* Avoid recursion for trivial cases. */
+    if (!((GET_CODE (operands [2]) == CONST_INT) && (INTVAL (operands[2]) == 1)))
+      if (microblaze_expand_shift (operands))
+        DONE;
+  }
+)
+
+;; Irrespective of if we have a barrel-shifter or not, we want to match 
+;; shifts by 1 with a special pattern. When a barrel shifter is present, 
+;; saves a cycle. If not, allows us to annotate the instruction for delay 
+;; slot optimization
+(define_insn "*ashlsi3_byone"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d")
+                   (match_operand:SI 2 "arith_operand"    "I")))] 
+  "(INTVAL (operands[2]) == 1)"
+  "addk\t%0,%1,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+;; Barrel shift left
+(define_insn "ashlsi3_bshift"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d,d")
+                   (match_operand:SI 2 "arith_operand"    "I,d")))]
+  "TARGET_BARREL_SHIFT"
+  "@
+  bslli\t%0,%1,%2
+  bsll\t%0,%1,%2"
+  [(set_attr "type"	"bshift,bshift")
+  (set_attr "mode"	"SI,SI")
+  (set_attr "length"	"4,4")]
+)
+
+;; The following patterns apply when there is no barrel shifter present
+
+(define_insn "*ashlsi3_with_mul_delay"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))] 
+  "!TARGET_SOFT_MUL 
+   && ((1 << INTVAL (operands[2])) <= 32767 && (1 << INTVAL (operands[2])) >= -32768)"
+  "muli\t%0,%1,%m2"
+  ;; This MUL will not generate an imm. Can go into a delay slot.
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+(define_insn "*ashlsi3_with_mul_nodelay"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))] 
+  "!TARGET_SOFT_MUL"
+  "muli\t%0,%1,%m2"
+  ;; This MUL will generate an IMM. Cannot go into a delay slot
+  [(set_attr "type"	"no_delay_arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"8")]
+)
+
+(define_insn "*ashlsi3_with_size_opt"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  "(INTVAL (operands[2]) > 5 && optimize_size)"
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+
+    output_asm_insn ("ori\t%3,r0,%2", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+        output_asm_insn ("addk\t%0,%1,r0", operands);
+
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "addk\t%0,%0,%0";
+  }
+  [(set_attr "type"    "multi")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "20")]
+)
+
+(define_insn "*ashlsi3_with_rotate"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  "(INTVAL (operands[2]) > 17 && !optimize_size)"
+  {
+    int i, nshift;
+    
+    nshift = INTVAL (operands[2]);
+    operands[3] = gen_int_mode (0xFFFFFFFF << nshift, SImode);
+
+    /* We do one extra shift so that the first bit (carry) coming into the MSB
+       will be masked out */
+    output_asm_insn ("src\t%0,%1", operands);
+    for (i = 0; i < (32 - nshift); i++)
+       output_asm_insn ("src\t%0,%0", operands);
+
+    return "andi\t%0,%0,%3";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "80")]
+)
+
+(define_insn "*ashlsi_inline"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  ""
+  {
+    int i;
+    int nshift = INTVAL (operands[2]);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    output_asm_insn ("addk\t%0,%1,%1", operands);
+    for (i = 0; i < (nshift - 2); i++)
+      output_asm_insn ("addk\t%0,%0,%0", operands);
+    return "addk\t%0,%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "124")]
+)
+
+(define_insn "*ashlsi_reg"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    output_asm_insn ("andi\t%3,%2,31", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1])) 
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    /* Exit the loop if zero shift. */
+    output_asm_insn ("beqid\t%3,.+20", operands);
+    /* Emit the loop.  */
+    output_asm_insn ("addk\t%0,%0,r0", operands);
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "addk\t%0,%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "28")]
+)
+
+
+;;----------------------------------------------------------------
+;; 32-bit right shifts
+;;----------------------------------------------------------------
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  {
+    /* Avoid recursion for trivial cases. */
+    if (!((GET_CODE (operands [2]) == CONST_INT) && (INTVAL (operands[2]) == 1)))
+      if (microblaze_expand_shift (operands))
+        DONE;
+  }
+)
+
+;; Irrespective of if we have a barrel-shifter or not, we want to match 
+;; shifts by 1 with a special pattern. When a barrel shifter is present, 
+;; saves a cycle. If not, allows us to annotate the instruction for delay 
+;; slot optimization
+(define_insn "*ashrsi3_byone"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand"    "I")))] 
+  "(INTVAL (operands[2]) == 1)"
+  "sra\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+;; Barrel shift right logical
+(define_insn "*ashrsi3_bshift"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand"    "I,d")))]
+  "TARGET_BARREL_SHIFT"
+  "@
+  bsrai\t%0,%1,%2
+  bsra\t%0,%1,%2"
+  [(set_attr "type"	"bshift,bshift")
+  (set_attr "mode"	"SI,SI")
+  (set_attr "length"	"4,4")]
+)
+
+(define_insn "*ashrsi_inline"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  ""
+  {
+    int i;
+    int nshift = INTVAL (operands[2]);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    output_asm_insn ("sra\t%0,%1", operands);
+    for (i = 0; i < (nshift - 2); i++)
+      output_asm_insn ("sra\t%0,%0", operands);
+    return "sra\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "124")]
+)
+
+(define_insn "*ashrsi_reg"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    output_asm_insn ("andi\t%3,%2,31", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1])) 
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    /* Exit the loop if zero shift. */
+    output_asm_insn ("beqid\t%3,.+20", operands);
+    /* Emit the loop.  */
+    output_asm_insn ("addk\t%0,%0,r0", operands);
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "sra\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "28")]
+)
+
+;;----------------------------------------------------------------
+;; 32-bit right shifts (logical)
+;;----------------------------------------------------------------
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  {
+    /* Avoid recursion for trivial cases. */
+    if (!((GET_CODE (operands [2]) == CONST_INT) && (INTVAL (operands[2]) == 1)))
+      if (microblaze_expand_shift (operands))
+        DONE;
+  }
+)
+
+;; Irrespective of if we have a barrel-shifter or not, we want to match 
+;; shifts by 1 with a special pattern. When a barrel shifter is present, 
+;; saves a cycle. If not, allows us to annotate the instruction for delay 
+;; slot optimization
+(define_insn "*lshrsi3_byone"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand"    "I")))] 
+  "(INTVAL (operands[2]) == 1)"
+  "srl\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+;; Barrel shift right logical
+(define_insn "*lshrsi3_bshift"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand"    "I,d")))]
+  "TARGET_BARREL_SHIFT"
+  "@
+  bsrli\t%0,%1,%2
+  bsrl\t%0,%1,%2"
+  [(set_attr "type"	"bshift,bshift")
+  (set_attr "mode"	"SI,SI")
+  (set_attr "length"	"4,4")]
+)
+
+(define_insn "*lshrsi_inline"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (lshiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  ""
+  {
+    int i;
+    int nshift = INTVAL (operands[2]);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    output_asm_insn ("srl\t%0,%1", operands);
+    for (i = 0; i < (nshift - 2); i++)
+      output_asm_insn ("srl\t%0,%0", operands);
+    return "srl\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "124")]
+)
+
+(define_insn "*lshrsi_reg"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (lshiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    output_asm_insn ("andi\t%3,%2,31", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1])) 
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    /* Exit the loop if zero shift. */
+    output_asm_insn ("beqid\t%3,.+20", operands);
+    /* Emit the loop.  */
+    output_asm_insn ("addk\t%0,%0,r0", operands);
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "srl\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "28")]
+)
+
+;;----------------------------------------------------------------
+;; Setting a register from an integer comparison. 
+;;----------------------------------------------------------------
+(define_expand "cstoresi4"
+   [(set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator"
+	      [(match_operand:SI 2 "register_operand")
+	       (match_operand:SI 3 "register_operand")]))]
+  "TARGET_PATTERN_COMPARE"
+  "if (GET_CODE (operand1) != EQ && GET_CODE (operand1) != NE) 
+     FAIL;
+  "
+)
+
+(define_insn "seq_internal_pat" 
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(eq:SI 
+	       (match_operand:SI 1 "register_operand" "d")
+	       (match_operand:SI 2 "register_operand" "d")))]
+  "TARGET_PATTERN_COMPARE"
+  "pcmpeq\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)              
+
+(define_insn "sne_internal_pat" 
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI 
+	       (match_operand:SI 1 "register_operand" "d")
+	       (match_operand:SI 2 "register_operand" "d")))]
+  "TARGET_PATTERN_COMPARE"
+  "pcmpne\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")]
+)              
+
+;;----------------------------------------------------------------
+;; Setting a register from an floating point comparison. 
+;;----------------------------------------------------------------
+(define_insn "cstoresf4"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operator:SI 1 "ordered_comparison_operator"
+	      [(match_operand:SF 2 "register_operand" "r")
+	       (match_operand:SF 3 "register_operand" "r")]))]
+  "TARGET_HARD_FLOAT"
+  "fcmp.%C1\t%0,%3,%2"
+  [(set_attr "type"     "fcmp")
+   (set_attr "mode"      "SF")
+   (set_attr "length"    "4")]
+)
+
+;;----------------------------------------------------------------
+;; Conditional branches
+;;----------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "register_operand")
+		        (match_operand:SI 2 "arith_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  microblaze_expand_conditional_branch (SImode, operands);
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SF 1 "register_operand")
+		        (match_operand:SF 2 "register_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  microblaze_expand_conditional_branch_sf (operands);
+  DONE;
+
+})
+
+;; Used to implement comparison instructions
+(define_expand "condjump"
+  [(set (pc)
+	(if_then_else (match_operand 0)
+		      (label_ref (match_operand 1))
+		      (pc)))])
+
+(define_insn "branch_zero"
+  [(set (pc)
+	(if_then_else (match_operator:SI 0 "ordered_comparison_operator"
+  				 [(match_operand:SI 1 "register_operand" "d")
+                                  (const_int 0)])
+                      (match_operand:SI 2 "pc_or_label_operand" "")
+                      (match_operand:SI 3 "pc_or_label_operand" "")))
+  ]
+  ""
+  {
+    if (operands[3] == pc_rtx) 
+      return "b%C0i%?\t%z1,%2";
+    else 
+      return "b%N0i%?\t%z1,%3";
+  }
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"4")]
+)
+
+(define_insn "branch_compare"
+  [(set (pc)
+        (if_then_else (match_operator:SI 0 "cmp_op"
+                                         [(match_operand:SI 1 "register_operand" "d")
+                                          (match_operand:SI 2 "register_operand" "d")
+                                         ])
+                      (label_ref (match_operand 3))
+                      (pc)))
+  (clobber(reg:SI R_TMP))]
+  ""
+  {
+    operands[4] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    enum rtx_code code = GET_CODE (operands[0]);
+
+    if (code == GT || code == LE)
+      {
+        output_asm_insn ("cmp\tr18,%z1,%z2", operands);
+        code = swap_condition (code);
+      }
+    else if (code == GTU || code == LEU)
+      {
+        output_asm_insn ("cmpu\tr18,%z1,%z2", operands);
+        code = swap_condition (code);
+      }
+    else if (code == GE || code == LT)
+      {
+        output_asm_insn ("cmp\tr18,%z2,%z1", operands);
+      }
+    else if (code == GEU || code == LTU)
+      {
+        output_asm_insn ("cmpu\tr18,%z2,%z1", operands);
+      }
+
+    operands[0] = gen_rtx_fmt_ee (signed_condition (code), SImode, operands[4], const0_rtx);
+    return "b%C0i%?\tr18,%3";
+  }
+  [(set_attr "type"     "branch")
+   (set_attr "mode"     "none")
+   (set_attr "length"   "12")]
+)
+
+;;----------------------------------------------------------------
+;; Unconditional branches
+;;----------------------------------------------------------------
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  {
+    if (GET_CODE (operands[0]) == REG)
+        return "br%?\t%0";
+    else	
+        return "bri%?\t%l0";
+  }
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "d"))]
+  ""
+  {
+    rtx dest = operands[0];
+    if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+      operands[0] = copy_to_mode_reg (Pmode, dest);
+
+    emit_jump_insn (gen_indirect_jump_internal1 (operands[0]));
+    DONE;
+  }
+)
+
+;; Indirect jumps. Jump to register values. Assuming absolute jumps
+
+(define_insn "indirect_jump_internal1"
+  [(set (pc) (match_operand:SI 0 "register_operand" "d"))]
+  ""
+  "bra%?\t%0"
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "tablejump"
+  [(set (pc)
+	(match_operand 0 "register_operand" "d"))
+  (use (label_ref (match_operand 1 "" "")))]
+  ""
+  {
+    gcc_assert (GET_MODE (operands[0]) == Pmode);
+
+    if (!flag_pic)
+      emit_jump_insn (gen_tablejump_internal1 (operands[0], operands[1]));
+    else
+      emit_jump_insn (gen_tablejump_internal3 (operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "tablejump_internal1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "d"))
+  (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "bra%?\t%0 "
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "tablejump_internal3"
+  [(parallel [(set (pc)
+		   (plus:SI (match_operand:SI 0 "register_operand" "d")
+			    (label_ref:SI (match_operand:SI 1 "" ""))))
+             (use (label_ref:SI (match_dup 1)))])]
+  ""
+  ""
+)
+
+;; need to change for MicroBlaze PIC
+(define_insn ""
+ [(set (pc)
+	(plus:SI (match_operand:SI 0 "register_operand" "d")
+		 (label_ref:SI (match_operand 1 "" ""))))
+  (use (label_ref:SI (match_dup 1)))]
+ "NEXT_INSN (operands[1]) != 0
+  && GET_CODE (PATTERN (NEXT_INSN (operands[1]))) == ADDR_DIFF_VEC
+  && flag_pic"
+  {
+    output_asm_insn ("addk\t%0,%0,r20",operands);
+    return "bra%?\t%0";
+}
+ [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "tablejump_internal4"
+  [(parallel [(set (pc)
+		   (plus:DI (match_operand:DI 0 "register_operand" "d")
+			    (label_ref:DI (match_operand:SI 1 "" ""))))
+             (use (label_ref:DI (match_dup 1)))])]
+  ""
+  ""
+)
+
+;;----------------------------------------------------------------
+;; Function prologue/epilogue and stack allocation
+;;----------------------------------------------------------------
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  {
+      microblaze_expand_prologue ();
+      DONE;
+  }
+)
+
+(define_expand "epilogue"
+  [(use (const_int 0))]
+  ""
+  {
+      microblaze_expand_epilogue ();
+      DONE;
+  }
+)
+
+;; An insn to allocate new stack space for dynamic use (e.g., alloca).
+;; We copy the return address, decrement the stack pointer and save the 
+;; return address again at the new stack top 
+
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "register_operand" "=r")
+	(minus (reg 1) (match_operand 1 "register_operand" "")))
+   (set (reg 1)
+	(minus (reg 1) (match_dup 1)))]
+  ""
+  { 
+    rtx retaddr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+    rtx rtmp    = gen_rtx_REG (SImode, R_TMP);
+    rtx neg_op0;
+
+    emit_move_insn (rtmp, retaddr);
+    if (GET_CODE (operands[1]) != CONST_INT)
+    {
+        neg_op0 = gen_reg_rtx (Pmode);
+	emit_insn (gen_negsi2 (neg_op0, operands[1]));
+    } else
+        neg_op0 = GEN_INT (- INTVAL (operands[1]));
+
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, neg_op0));
+    emit_move_insn (gen_rtx_MEM (Pmode, stack_pointer_rtx), rtmp);
+    emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+    emit_insn (gen_rtx_CLOBBER (SImode, rtmp));
+    DONE;
+  }
+)
+
+(define_expand "save_stack_block"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "register_operand" "")]
+  ""
+  {
+    emit_move_insn (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+(define_expand "restore_stack_block"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "register_operand" "")]
+  ""
+  {
+    rtx retaddr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+    rtx rtmp    = gen_rtx_REG (SImode, R_TMP);
+
+    /* Move the retaddr.  */
+    emit_move_insn (rtmp, retaddr);
+    emit_move_insn (operands[0], operands[1]);
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), rtmp);
+    DONE;
+  }
+)
+
+;; Trivial return.  Make it look like a normal return insn as that
+;; allows jump optimizations to work better .
+(define_expand "return"
+  [(simple_return)]
+  "microblaze_can_use_return_insn ()"
+  {}
+)
+
+(define_expand "simple_return"
+  [(simple_return)]
+  ""
+  {}
+)
+
+(define_insn "*<optab>"
+  [(any_return)]
+  ""
+  { 
+    if (microblaze_is_interrupt_variant ())
+        return "rtid\tr14, 0\;%#";
+    else
+        return "rtsd\tr15, 8\;%#";
+  }
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")]
+)
+
+;; Normal return.
+
+(define_insn "<optab>_internal"
+  [(any_return)
+   (use (match_operand:SI 0 "register_operand" ""))]
+  ""
+  {	
+    if (microblaze_is_interrupt_variant ())
+        return "rtid\tr14,0 \;%#";
+    else
+        return "rtsd\tr15,8 \;%#";
+  }
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+
+;; Block any insns from across this point
+;; Useful to group sequences together.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "type"	"unknown")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"0")])
+
+  
+;;----------------------------------------------------------------
+;; Function calls
+;;----------------------------------------------------------------
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "m")
+		    (match_operand 1 "" "i"))
+             (clobber (reg:SI R_SR))
+             (use (match_operand 2 "" ""))
+             (use (match_operand 3 "" ""))])]
+  ""
+  {
+    rtx addr = XEXP (operands[0], 0);
+
+    if (flag_pic == 2 && GET_CODE (addr) == SYMBOL_REF 
+	&& !SYMBOL_REF_LOCAL_P (addr)) 
+      {
+        rtx temp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PLT);
+        XEXP (operands[0], 0) = temp;
+      }
+    
+    if ((GET_CODE (addr) != REG && !CONSTANT_ADDRESS_P (addr))
+	|| !call_insn_operand (addr, VOIDmode))
+      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+
+    if (GET_CODE (XEXP (operands[0], 0)) == UNSPEC)
+      emit_call_insn (gen_call_internal_plt0 (operands[0], operands[1],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM),
+                               	     pic_offset_table_rtx));
+    else
+      emit_call_insn (gen_call_internal0 (operands[0], operands[1],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+
+        DONE;
+  }
+)
+
+(define_expand "call_internal0"
+  [(parallel [(call (match_operand 0 "" "")
+		    (match_operand 1 "" ""))
+             (clobber (match_operand:SI 2 "" ""))])]
+  ""
+  {
+  }
+)
+ 
+(define_expand "call_internal_plt0"
+  [(parallel [(call (match_operand 0 "" "")
+		    (match_operand 1 "" ""))
+             (clobber (match_operand:SI 2 "" ""))
+             (use (match_operand:SI 3 "" ""))])]
+  ""
+  {
+  }
+)
+ 
+(define_insn "call_internal_plt"
+  [(call (mem (match_operand:SI 0 "call_insn_plt_operand" ""))
+	 (match_operand:SI 1 "" "i"))
+  (clobber (reg:SI R_SR))
+  (use (reg:SI R_GOT))]
+  "flag_pic"
+  {
+    register rtx target2 = gen_rtx_REG (Pmode, 
+			      GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+    gen_rtx_CLOBBER (VOIDmode, target2);
+    return "brlid\tr15,%0\;%#";
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_insn "call_internal1"
+  [(call (mem (match_operand:SI 0 "call_insn_simple_operand" "ri"))
+	 (match_operand:SI 1 "" "i"))
+  (clobber (reg:SI R_SR))]
+  ""
+  {
+    register rtx target = operands[0];
+    register rtx target2 = gen_rtx_REG (Pmode,
+			      GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+    if (GET_CODE (target) == SYMBOL_REF) {
+        gen_rtx_CLOBBER (VOIDmode, target2);
+        return "brlid\tr15,%0\;%#";
+    } else if (GET_CODE (target) == CONST_INT)
+        return "la\t%@,r0,%0\;brald\tr15,%@\;%#";
+    else if (GET_CODE (target) == REG)
+        return "brald\tr15,%0\;%#";	
+    else {
+        fprintf (stderr,"Unsupported call insn\n");
+        return NULL;
+    }
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+;; calls.c now passes a fourth argument, make saber happy
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "=d")
+		   (call (match_operand 1 "memory_operand" "m")
+			 (match_operand 2 "" "i")))
+             (clobber (reg:SI R_SR))
+             (use (match_operand 3 "" ""))])] ;; next_arg_reg
+  ""
+  {
+    rtx addr = XEXP (operands[1], 0);
+
+    if (flag_pic == 2 && GET_CODE (addr) == SYMBOL_REF
+	&& !SYMBOL_REF_LOCAL_P (addr)) 
+      {
+        rtx temp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PLT);
+        XEXP (operands[1], 0) = temp;
+      }
+
+    if ((GET_CODE (addr) != REG && !CONSTANT_ADDRESS_P (addr))
+        || !call_insn_operand (addr, VOIDmode))
+      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+
+    if (GET_CODE (XEXP (operands[1], 0)) == UNSPEC)
+      emit_call_insn (gen_call_value_intern_plt0 (operands[0], operands[1], 
+			operands[2],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM),
+				     pic_offset_table_rtx));
+    else
+      emit_call_insn (gen_call_value_internal (operands[0], operands[1], 
+			operands[2],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+
+    DONE;
+  }
+)
+
+
+(define_expand "call_value_internal"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "" "")
+			 (match_operand 2 "" "")))
+             (clobber (match_operand:SI 3 "" ""))
+             ])]
+  ""
+  {}
+)
+
+(define_expand "call_value_intern_plt0"
+  [(parallel[(set (match_operand 0 "" "")
+                  (call (match_operand 1 "" "")
+                        (match_operand 2 "" "")))
+             (clobber (match_operand:SI 3 "" ""))
+             (use (match_operand:SI 4 "" ""))])]
+  "flag_pic"
+  {}
+)
+
+(define_insn "call_value_intern_plt"
+  [(set (match_operand:VOID 0 "register_operand" "=d")
+        (call (mem (match_operand:SI 1 "call_insn_plt_operand" ""))
+              (match_operand:SI 2 "" "i")))
+   (clobber (match_operand:SI 3 "register_operand" "=d"))
+   (use (match_operand:SI 4 "register_operand"))]
+  "flag_pic"
+  { 
+    register rtx target2=gen_rtx_REG (Pmode,GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+
+    gen_rtx_CLOBBER (VOIDmode,target2);
+    return "brlid\tr15,%1\;%#";
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_insn "call_value_intern"
+  [(set (match_operand:VOID 0 "register_operand" "=d")
+        (call (mem (match_operand:VOID 1 "call_insn_operand" "ri"))
+              (match_operand:SI 2 "" "i")))
+   (clobber (match_operand:SI 3 "register_operand" "=d"))]
+  ""
+  { 
+    register rtx target = operands[1];
+    register rtx target2=gen_rtx_REG (Pmode,GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+
+    if (GET_CODE (target) == SYMBOL_REF)
+    {
+      gen_rtx_CLOBBER (VOIDmode,target2);
+      if (SYMBOL_REF_FLAGS (target) & SYMBOL_FLAG_FUNCTION)
+        {
+	  return "brlid\tr15,%1\;%#";
+        }
+      else
+        {
+	  return "bralid\tr15,%1\;%#";
+        }
+    }
+    else if (GET_CODE (target) == CONST_INT)
+        return "la\t%@,r0,%1\;brald\tr15,%@\;%#";
+    else if (GET_CODE (target) == REG)
+        return "brald\tr15,%1\;%#";	
+    else 
+        return "Unsupported call insn\n";
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+             (match_operand 1 "" "")
+             (match_operand 2 "" "")])]
+  ""
+  {
+    if (operands[0])		/* silence statement not reached warnings */
+    {
+        int i;
+
+        emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
+
+        for (i = 0; i < XVECLEN (operands[2], 0); i++)
+	{
+	    rtx set = XVECEXP (operands[2], 0, i);
+	    emit_move_insn (SET_DEST (set), SET_SRC (set));
+	}
+
+        emit_insn (gen_blockage ());
+        DONE;
+      }
+  }
+)
+
+;;----------------------------------------------------------------
+;; Misc.
+;;----------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type"	"nop")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+;; Trap instruction pattern for __builtin_trap. Same as the glibc ABORT_INSTRUCTION
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "brki\tr0,-1"
+ [(set_attr "type" "trap")]
+)
+
+;; The insn to set GOT. The hardcoded number "8" accounts for $pc difference
+;; between "mfs" and "addik" instructions.
+(define_insn "set_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+    (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))]
+  ""
+  "mfs\t%0,rpc\n\taddik\t%0,%0,_GLOBAL_OFFSET_TABLE_+8"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+;; This insn gives the count of leading number of zeros for the second
+;; operand and stores the result in first operand.
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_HAS_CLZ"
+  "clz\t%0,%1"
+  [(set_attr "type"     "arith")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/microblaze/microblaze.opt b/gcc-4.9/gcc/config/microblaze/microblaze.opt
new file mode 100644
index 000000000..ae07dced3
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/microblaze.opt
@@ -0,0 +1,127 @@
+; Options for the MicroBlaze port of the compiler
+;
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;
+; Contributed by Michael Eager <eager@eagercon.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.  */
+
+Zxl-mode-bootstrap
+Driver
+
+Zxl-mode-executable
+Driver
+
+Zxl-mode-novectors
+Driver
+
+Zxl-mode-xilkernel
+Driver
+
+Zxl-mode-xmdstub
+Driver
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT)
+Use software emulation for floating point (default)
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Use hardware floating point instructions
+
+msmall-divides
+Target Mask(SMALL_DIVIDES)
+Use table lookup optimization for small signed integer divisions
+
+mcpu=
+Target RejectNegative Joined Var(microblaze_select_cpu)
+-mcpu=PROCESSOR		Use features of and schedule code for given CPU
+
+mmemcpy
+Target Mask(MEMCPY)
+Don't optimize block moves, use memcpy
+
+mbig-endian
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Assume target CPU is configured as big endian
+
+mlittle-endian
+Target Report RejectNegative Mask(LITTLE_ENDIAN)
+Assume target CPU is configured as little endian
+
+mxl-soft-mul
+Target Mask(SOFT_MUL)
+Use the soft multiply emulation (default)
+
+mxl-reorder
+Target Var(TARGET_REORDER) Init(2)
+Use reorder instructions (swap and byte reversed load/store) (default)
+
+mxl-soft-div
+Target Mask(SOFT_DIV)
+Use the software emulation for divides (default)
+
+mxl-barrel-shift
+Target Mask(BARREL_SHIFT)
+Use the hardware barrel shifter instead of emulation
+
+mxl-pattern-compare
+Target Mask(PATTERN_COMPARE)
+Use pattern compare instructions
+
+mxl-stack-check
+Target Mask(STACK_CHECK) Warn(%qs is deprecated; use -fstack-check)
+Check for stack overflow at runtime
+
+mxl-gp-opt
+Target Mask(XLGPOPT)
+Use GP relative sdata/sbss sections
+
+mno-clearbss
+Target RejectNegative Var(flag_zero_initialized_in_bss, 0) Warn(%qs is deprecated; use -fno-zero-initialized-in-bss)
+Clear the BSS to zero and place zero initialized in BSS
+
+mxl-multiply-high
+Target Mask(MULTIPLY_HIGH)
+Use multiply high instructions for high part of 32x32 multiply
+
+mxl-float-convert
+Target Mask(FLOAT_CONVERT)
+Use hardware floating point conversion instructions
+
+mxl-float-sqrt
+Target Mask(FLOAT_SQRT)
+Use hardware floating point square root instruction
+
+mxl-mode-executable
+Target Mask(XL_MODE_EXECUTABLE)
+Description for mxl-mode-executable
+
+mxl-mode-xmdstub
+Target Mask(XL_MODE_XMDSTUB)
+Description for mxl-mode-xmdstub
+
+mxl-mode-bootstrap
+Target Mask(XL_MODE_BOOTSTRAP)
+Description for mxl-mode-bootstrap
+
+mxl-mode-novectors
+Target Mask(XL_MODE_NOVECTORS)
+Description for mxl-mode-novectors
+
+mxl-mode-xilkernel
+Target
diff --git a/gcc-4.9/gcc/config/microblaze/predicates.md b/gcc-4.9/gcc/config/microblaze/predicates.md
new file mode 100644
index 000000000..bfdf4ea81
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/predicates.md
@@ -0,0 +1,129 @@
+;; Predicate definitions for Xilinx MicroBlaze
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;
+;; Contributed by Michael Eager <eager@eagercon.com>.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  
+
+
+;; Return whether OP can be used as an operands in arithmetic.
+(define_predicate "arith_operand"
+  (ior (match_code "const_int,const_double")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "arith_operand32"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int,const_double")
+	    (match_test "LARGE_INT (op)"))))
+
+(define_predicate "arith_plus_operand"
+ (match_operand 0 "general_operand")
+{
+  switch (GET_CODE (op))
+    {
+      default:
+        return 0;
+      case CONST_INT:
+      case REG:
+        return 1;
+      case SYMBOL_REF:
+      case LABEL_REF:
+        if (flag_pic || microblaze_tls_referenced_p(op))
+          return 0;
+        return 1;
+      case CONST:
+        {
+          rtx const0;
+          const0 = XEXP (op, 0);
+
+          switch (GET_CODE(const0))
+            {
+              default:
+                return 0;
+              case UNSPEC :
+                return 1;
+
+              case PLUS :
+                {
+                  rtx p0, p1;
+                  p0 = XEXP (const0, 0);
+                  p1 = XEXP (const0, 1);
+
+                  if ((GET_CODE(p0) == SYMBOL_REF
+                       || GET_CODE (p0) == LABEL_REF)
+                      && GET_CODE(p1) == CONST_INT)
+                    {
+                      return arith_plus_operand (p0, GET_MODE(p0));
+                    }
+                }
+            }
+        }
+    }
+  return 0;
+})
+
+(define_predicate "const_0_operand"
+  (and (match_code "const_int,const_double")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+;; Return whether OP is a register or the constant 0.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "const_0_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "reg_or_mem_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "register_operand")))
+
+;;  Return if the operand is either the PC or a label_ref.  
+(define_special_predicate "pc_or_label_operand"
+  (ior (match_code "pc,label_ref")
+       (and (match_code "symbol_ref")
+            (match_test "!(strcmp ((XSTR (op, 0)), \"_stack_overflow_exit\"))"))))
+
+;; Test for valid call operand
+(define_predicate "call_insn_operand"
+  (match_test "CALL_INSN_OP (op)"))
+
+(define_predicate "call_insn_simple_operand"
+  (and (match_test "CALL_INSN_OP (op)")
+       (match_test "GET_CODE (op) == REG || GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST_INT")))
+
+;; Return if OPERAND is valid as a source operand for a move instruction.
+(define_predicate "move_src_operand"
+  (and (
+     not (
+       and (match_code "plus")
+           (not (match_test "(GET_CODE (XEXP (op, 0)) == REG) ^ (GET_CODE (XEXP (op,1)) == REG)"))
+	 )
+       )
+       (match_operand 0 "general_operand"))
+{
+  if (microblaze_tls_referenced_p(op)
+      || (flag_pic && (symbol_mentioned_p(op) || label_mentioned_p(op))))
+    return false;
+
+  return true;
+})
+
+;; Test for valid PIC call operand
+(define_predicate "call_insn_plt_operand"
+  (match_test "PLT_ADDR_P (op)"))
+
+;; Return if the code of this rtx pattern is a comparison.
+(define_predicate "cmp_op"
+  (match_code "gt,ge,gtu,geu,lt,le,ltu,leu"))
diff --git a/gcc-4.9/gcc/config/microblaze/rtems.h b/gcc-4.9/gcc/config/microblaze/rtems.h
new file mode 100644
index 000000000..4d8a29eaf
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/rtems.h
@@ -0,0 +1,25 @@
+/* Definitions for rtems targeting a microblaze using ELF.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc-4.9/gcc/config/microblaze/sync.md b/gcc-4.9/gcc/config/microblaze/sync.md
new file mode 100644
index 000000000..df21af925
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/sync.md
@@ -0,0 +1,43 @@
+;; Machine description for Xilinx MicroBlaze synchronization instructions.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "atomic_compare_and_swapsi"
+  [(match_operand:SI 0 "register_operand" "=&d")	;; bool output
+   (match_operand:SI 1 "register_operand" "=&d")	;; val output
+   (match_operand:SI 2 "nonimmediate_operand" "+Q")	;; memory
+   (match_operand:SI 3 "register_operand" "d")		;; expected value
+   (match_operand:SI 4 "register_operand" "d")		;; desired value
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; mod_s
+   (match_operand:SI 7 "const_int_operand" "")		;; mod_f
+   (clobber (match_scratch:SI 8 "=&d"))]
+  ""
+  {
+    output_asm_insn ("addc \tr0,r0,r0", operands);
+    output_asm_insn ("lwx  \t%1,%y2,r0", operands);
+    output_asm_insn ("addic\t%8,r0,0", operands);
+    output_asm_insn ("bnei \t%8,.-8", operands);
+    output_asm_insn ("cmp  \t%0,%1,%3", operands);
+    output_asm_insn ("bnei \t%0,.+16", operands);
+    output_asm_insn ("swx  \t%4,%y2,r0", operands);
+    output_asm_insn ("addic\t%8,r0,0", operands);
+    output_asm_insn ("bnei \t%8,.-28", operands);
+    return "";
+  }
+)
diff --git a/gcc-4.9/gcc/config/microblaze/t-microblaze b/gcc-4.9/gcc/config/microblaze/t-microblaze
new file mode 100644
index 000000000..41fa9a920
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/t-microblaze
@@ -0,0 +1,12 @@
+MULTILIB_OPTIONS = mxl-barrel-shift mno-xl-soft-mul mxl-multiply-high mlittle-endian
+MULTILIB_DIRNAMES = bs m mh le
+MULTILIB_EXCEPTIONS = *mxl-barrel-shift/mxl-multiply-high mxl-multiply-high
+MULTILIB_EXCEPTIONS += *mxl-barrel-shift/mxl-multiply-high/mlittle-endian
+MULTILIB_EXCEPTIONS += mxl-multiply-high/mlittle-endian
+
+# Extra files
+microblaze-c.o: $(srcdir)/config/microblaze/microblaze-c.c \
+    $(srcdir)/config/microblaze/microblaze-protos.h \
+    $(CONFIG_H) $(SYSTEM_H) $(CPPLIB_H) $(TM_P_H) $(TREE_H) errors.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/microblaze/microblaze-c.c
diff --git a/gcc-4.9/gcc/config/microblaze/t-microblaze-linux b/gcc-4.9/gcc/config/microblaze/t-microblaze-linux
new file mode 100644
index 000000000..e8e8f3c87
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/t-microblaze-linux
@@ -0,0 +1,3 @@
+MULTILIB_OPTIONS = mxl-barrel-shift mno-xl-soft-mul mxl-multiply-high
+MULTILIB_DIRNAMES = bs m mh
+MULTILIB_EXCEPTIONS = *mxl-barrel-shift/mxl-multiply-high mxl-multiply-high
diff --git a/gcc-4.9/gcc/config/microblaze/t-rtems b/gcc-4.9/gcc/config/microblaze/t-rtems
new file mode 100644
index 000000000..d0c38261a
--- /dev/null
+++ b/gcc-4.9/gcc/config/microblaze/t-rtems
@@ -0,0 +1 @@
+# Custom multilibs for RTEMS
diff --git a/gcc-4.9/gcc/config/mips/10000.md b/gcc-4.9/gcc/config/mips/10000.md
new file mode 100644
index 000000000..74dd3eca0
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/10000.md
@@ -0,0 +1,251 @@
+;; DFA-based pipeline description for the VR1x000.
+;;   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
+;; until specific tuning for each is added.
+
+;; R10000 has an int queue, fp queue, address queue.
+;; The int queue feeds ALU1 and ALU2.
+;; The fp queue feeds the fp-adder and fp-multiplier.
+;; The addr queue feeds the Load/Store unit.
+;;
+;; However, we define the fp-adder and fp-multiplier as
+;; separate automatons, because the fp-multiplier is
+;; divided into fp-multiplier, fp-division, and
+;; fp-squareroot units, all of which share the same
+;; issue and completion logic, yet can operate in
+;; parallel.
+;;
+;; This is based on the model described in the R10K Manual
+;; and it helps to reduce the size of the automata.
+(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
+                   r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
+
+(define_cpu_unit "r10k_alu1" "r10k_a_int")
+(define_cpu_unit "r10k_alu2" "r10k_a_int")
+(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
+(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
+(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
+(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
+(define_cpu_unit "r10k_loadstore" "r10k_a_addr")
+
+
+;; R10k Loads and Stores.
+(define_insn_reservation "r10k_load" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "load,prefetch,prefetchx"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_store" 0
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_fpload" 3
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r10k_loadstore")
+
+
+;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
+;; Miscellaneous arith goes here too (this is a guess).
+(define_insn_reservation "r10k_arith" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "arith,mthi,mtlo,slt,clz,const,nop,trap,logical"))
+  "r10k_alu1 | r10k_alu2")
+
+;; We treat mfhilo differently, because we need to know when
+;; it's HI and when it's LO.
+(define_insn_reservation "r10k_mfhi" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mfhi"))
+  "r10k_alu1 | r10k_alu2")
+
+(define_insn_reservation "r10k_mflo" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mflo"))
+  "r10k_alu1 | r10k_alu2")
+
+
+;; ALU1 handles shifts, branch eval, and condmove.
+;;
+;; Brancher is separate, but part of ALU1, but can only
+;; do one branch per cycle (is this even implementable?).
+;;
+;; Unsure if the brancher handles jumps and calls as well, but since
+;; they're related, we'll add them here for now.
+(define_insn_reservation "r10k_brancher" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "shift,branch,jump,call"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_int_cmove" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SI,DI")))
+  "r10k_alu1")
+
+
+;; Coprocessor Moves.
+;; mtc1/dmtc1 are handled by ALU1.
+;; mfc1/dmfc1 are handled by the fp-multiplier.
+(define_insn_reservation "r10k_mt_xfer" 3
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mtc"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_mf_xfer" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mfc"))
+  "r10k_fpmpy")
+
+
+;; Only ALU2 does int multiplications and divisions.
+;;
+;; According to the Vr10000 series user manual,
+;; integer mult and div insns can be issued one
+;; cycle earlier if using register Lo.  We model
+;; this by using the Lo value by default, as it
+;; is the more common value, and use a bypass
+;; for the Hi value when needed.
+;;
+;; Also of note, There are different latencies
+;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
+;; However, gcc does not have separate types
+;; for these insns.  Thus to strike a balance,
+;; we use the Hi latency value for imul
+;; operations until the imul type can be split.
+(define_insn_reservation "r10k_imul_single" 6
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 6")
+
+(define_insn_reservation "r10k_imul_double" 10
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 10")
+
+;; Divides keep ALU2 busy.
+(define_insn_reservation "r10k_idiv_single" 34
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 35")
+
+(define_insn_reservation "r10k_idiv_double" 66
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 67")
+
+(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
+(define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
+
+
+;; Floating point add/sub, mul, abs value, neg, comp, & moves.
+(define_insn_reservation "r10k_fp_miscadd" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fadd,fabs,fneg,fcmp"))
+  "r10k_fpadd")
+
+(define_insn_reservation "r10k_fp_miscmul" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fmul,fmove"))
+  "r10k_fpmpy")
+
+(define_insn_reservation "r10k_fp_cmove" 2
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SF,DF")))
+  "r10k_fpmpy")
+
+
+;; The fcvt.s.[wl] insn has latency 4, repeat 2.
+;; All other fcvt insns have latency 2, repeat 1.
+(define_insn_reservation "r10k_fcvt_single" 4
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "I2S")))
+  "r10k_fpadd * 2")
+
+(define_insn_reservation "r10k_fcvt_other" 2
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "!I2S")))
+  "r10k_fpadd")
+
+
+;; Run the fmadd insn through fp-adder first, then fp-multiplier.
+;;
+;; The latency for fmadd is 2 cycles if the result is used
+;; by another fmadd instruction.
+(define_insn_reservation "r10k_fmadd" 4
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fmadd"))
+  "r10k_fpadd, r10k_fpmpy")
+
+(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
+
+
+;; Floating point Divisions & square roots.
+(define_insn_reservation "r10k_fdiv_single" 12
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "SF")))
+  "r10k_fpdiv * 14")
+
+(define_insn_reservation "r10k_fdiv_double" 19
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "DF")))
+  "r10k_fpdiv * 21")
+
+(define_insn_reservation "r10k_fsqrt_single" 18
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_fsqrt_double" 33
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+(define_insn_reservation "r10k_frsqrt_single" 30
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_frsqrt_double" 52
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+
+;; Handle unknown/multi insns here (this is a guess).
+(define_insn_reservation "r10k_unknown" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "r10k_alu1 + r10k_alu2")
diff --git a/gcc-4.9/gcc/config/mips/20kc.md b/gcc-4.9/gcc/config/mips/20kc.md
new file mode 100644
index 000000000..c0c3feb35
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/20kc.md
@@ -0,0 +1,284 @@
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; .........................
+;;
+;; DFA-based pipeline description for MIPS64 model R20Kc.
+;; Contributed by Jason Eckhardt (jle@cygnus.com).
+;;
+;; The R20Kc is a dual-issue processor that can generally bundle
+;; instructions as follows:
+;;   1. integer with integer
+;;   2. integer with fp
+;;   3. fp with fpload/fpstore 
+;;
+;; Of course, there are various restrictions.
+;; Reference:
+;;   "Ruby (R20K) Technical Specification Rev. 1.2, December 28, 1999."
+;;
+;; .........................
+
+;; Use three automata to isolate long latency operations, reducing space.
+(define_automaton "r20kc_other, r20kc_fdiv, r20kc_idiv")
+
+;;
+;; Describe the resources.
+;;
+
+;; Global.
+(define_cpu_unit "r20kc_iss0, r20kc_iss1" "r20kc_other")
+
+;; Integer execution unit (pipeline A).
+(define_cpu_unit "r20kc_ixua_addsub_agen" "r20kc_other")
+(define_cpu_unit "r20kc_ixua_shift"	  "r20kc_other")
+
+(exclusion_set "r20kc_ixua_addsub_agen" "r20kc_ixua_shift")
+
+;; Integer execution unit (pipeline B).
+(define_cpu_unit "r20kc_ixub_addsub"	  "r20kc_other")
+(define_cpu_unit "r20kc_ixub_branch"	  "r20kc_other")
+(define_cpu_unit "r20kc_ixub_mpydiv"	  "r20kc_other")
+(define_cpu_unit "r20kc_ixub_mpydiv_iter" "r20kc_idiv")
+
+(exclusion_set "r20kc_ixub_addsub" "r20kc_ixub_branch, r20kc_ixub_mpydiv")
+(exclusion_set "r20kc_ixub_branch" "r20kc_ixub_mpydiv")
+
+;; Cache / memory interface.
+(define_cpu_unit "r20kc_cache"	    "r20kc_other")
+
+;; Floating-point unit.
+(define_cpu_unit "r20kc_fpu_add"	  "r20kc_other")
+(define_cpu_unit "r20kc_fpu_mpy"	  "r20kc_other")
+(define_cpu_unit "r20kc_fpu_mpy_iter"	  "r20kc_fdiv")
+(define_cpu_unit "r20kc_fpu_divsqrt"	  "r20kc_other")
+(define_cpu_unit "r20kc_fpu_divsqrt_iter" "r20kc_fdiv")
+
+(exclusion_set "r20kc_fpu_add" "r20kc_fpu_mpy, r20kc_fpu_divsqrt")
+(exclusion_set "r20kc_fpu_mpy" "r20kc_fpu_divsqrt")
+
+;; After branch any insn can not be issued.
+(absence_set "r20kc_iss0,r20kc_iss1" "r20kc_ixub_branch")
+
+;;
+;; Define reservations for unit name mnemonics or combinations.
+;;
+
+(define_reservation "r20kc_iss"
+  "r20kc_iss0|r20kc_iss1")
+(define_reservation "r20kc_single_dispatch"
+  "r20kc_iss0+r20kc_iss1")
+(define_reservation "r20kc_iaddsub"
+  "r20kc_iss+(r20kc_ixua_addsub_agen|r20kc_ixub_addsub)")
+(define_reservation "r20kc_ishift"
+  "r20kc_iss+r20kc_ixua_shift")
+(define_reservation "r20kc_fpmove"
+  "r20kc_iss+r20kc_ixua_addsub_agen")
+(define_reservation "r20kc_imem"
+  "r20kc_iss+r20kc_ixua_addsub_agen+r20kc_cache")
+(define_reservation "r20kc_icache"
+  "r20kc_cache")
+(define_reservation "r20kc_impydiv"
+  "r20kc_iss+r20kc_ixub_mpydiv")
+(define_reservation "r20kc_impydiv_iter"
+  "r20kc_ixub_mpydiv_iter")
+(define_reservation "r20kc_ibranch"
+  "r20kc_iss+r20kc_ixub_branch")
+
+(define_reservation "r20kc_fpadd"
+  "r20kc_iss+r20kc_fpu_add")
+(define_reservation "r20kc_fpmpy"
+  "r20kc_iss+r20kc_fpu_mpy")
+(define_reservation "r20kc_fpmpy_iter"
+  "r20kc_fpu_mpy_iter")
+(define_reservation "r20kc_fpdivsqrt"
+  "r20kc_iss+r20kc_fpu_divsqrt")
+(define_reservation "r20kc_fpdivsqrt_iter"
+  "r20kc_fpu_divsqrt_iter")
+
+;;
+;; Describe instruction reservations for integer operations.
+;;
+
+;; Conditional moves always force single-dispatch.
+(define_insn_reservation "r20kc_cond_move_int" 1 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "condmove")
+				   (eq_attr "mode" "!SF,DF")))
+			 "r20kc_single_dispatch")
+
+(define_insn_reservation "r20kc_cond_move_fp" 4 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "condmove")
+				   (eq_attr "mode" "SF,DF")))
+			 "r20kc_single_dispatch")
+
+(define_insn_reservation "r20kc_int_other" 1
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "move,arith,const,nop"))
+			  "r20kc_iaddsub")
+
+;; Shifts can only execute on ixu pipeline A.
+(define_insn_reservation "r20kc_int_shift" 1
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "shift"))
+			  "r20kc_ishift")
+
+(define_insn_reservation "r20kc_ld" 2 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "load,prefetch,prefetchx"))
+			 "r20kc_imem")
+
+
+;; A load immediately following a store will stall, so
+;; say that a store uses the cache for an extra cycle.
+(define_insn_reservation "r20kc_st" 2 
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "store"))
+			  "r20kc_imem,r20kc_icache")
+
+(define_insn_reservation "r20kc_fld" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "fpload"))
+			 "r20kc_imem")
+
+(define_insn_reservation "r20kc_ffst" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "fpstore"))
+			 "r20kc_imem,r20kc_icache*2")
+
+;; Integer divide latency is between 13 and 42 cycles for DIV[U] and between
+;; 13 and 72 cycles for DDIV[U]. This depends on the value of the inputs
+;; so we just choose the worst case latency.
+(define_insn_reservation "r20kc_idiv_si" 42 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "mode" "SI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*42)")
+
+(define_insn_reservation "r20kc_idiv_di" 72 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "mode" "DI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*72)")
+
+;; Integer multiply latency is 4 or 7 cycles for word and double-word
+;; respectively.
+(define_insn_reservation "r20kc_impy_si" 4 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "imadd,imul,imul3")
+				   (eq_attr "mode" "SI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*2)")
+
+(define_insn_reservation "r20kc_impy_di" 7 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "imadd,imul,imul3")
+				   (eq_attr "mode" "DI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*7)")
+
+;; Move to/from HI/LO.
+;; Moving to HI/LO has a 3 cycle latency while moving from only has a 1
+;; cycle latency.  Repeat rate is 3 for both.
+(define_insn_reservation "r20kc_imthilo" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "mthi,mtlo"))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*3)")
+
+(define_insn_reservation "r20kc_imfhilo" 1
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "mfhi,mflo"))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*3)")
+
+;; Move to fp coprocessor.
+(define_insn_reservation "r20kc_ixfer_mt" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "mtc"))
+			 "r20kc_fpmove")
+
+;; Move from fp coprocessor.
+(define_insn_reservation "r20kc_ixfer_mf" 2 
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "mfc"))
+			"r20kc_fpmove")
+
+;; Assume branch predicted correctly.
+(define_insn_reservation "r20kc_ibr" 1 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "branch,jump,call"))
+			 "r20kc_ibranch")
+
+;;
+;; Describe instruction reservations for the floating-point operations.
+;;
+(define_insn_reservation "r20kc_fp_other" 4
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "fmove,fadd,fabs,fneg,fcmp"))
+			 "r20kc_fpadd")
+
+(define_insn_reservation "r20kc_fp_cvt_a" 4
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fcvt")
+				   (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+			 "r20kc_fpadd")
+
+(define_insn_reservation "r20kc_fp_cvt_b" 5
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fcvt")
+				   (eq_attr "cnv_mode" "D2S,S2I")))
+			 "r20kc_fpadd")
+
+(define_insn_reservation "r20kc_fp_divsqrt_df" 32
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fdiv,fsqrt")
+				   (eq_attr "mode" "DF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*32)")
+
+(define_insn_reservation "r20kc_fp_divsqrt_sf" 17
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fdiv,fsqrt")
+				   (eq_attr "mode" "SF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)")
+
+(define_insn_reservation "r20kc_fp_rsqrt_df" 35 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "frsqrt")
+				   (eq_attr "mode" "DF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*35)")
+
+(define_insn_reservation "r20kc_fp_rsqrt_sf" 17
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "frsqrt")
+				   (eq_attr "mode" "SF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)")
+
+(define_insn_reservation "r20kc_fp_mpy_sf" 4
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fmul,fmadd")
+				   (eq_attr "mode" "SF")))
+			 "r20kc_fpmpy+r20kc_fpmpy_iter")
+
+(define_insn_reservation "r20kc_fp_mpy_df" 5
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fmul,fmadd")
+				   (eq_attr "mode" "DF")))
+			 "r20kc_fpmpy+(r20kc_fpmpy_iter*2)")
+
+;; Force single-dispatch for unknown or multi.
+(define_insn_reservation "r20kc_unknown" 1 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "unknown,multi,atomic,syncloop"))
+			 "r20kc_single_dispatch")
diff --git a/gcc-4.9/gcc/config/mips/24k.md b/gcc-4.9/gcc/config/mips/24k.md
new file mode 100644
index 000000000..9ed83dd96
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/24k.md
@@ -0,0 +1,545 @@
+;; DFA-based pipeline descriptions for MIPS Technologies 24K core.
+;; Contributed by Chao-ying Fu (fu@mips.com), Nigel Stephens (nigel@mips.com)
+;;   and David Ung (davidu@mips.com)
+;;
+;; The 24kf2_1 is a single-issue processor with a half-clocked fpu.
+;; The 24kf1_1 is 24k with 1:1 clocked fpu.
+;;
+;; References:
+;;   "MIPS32 24K Processor Core Family Software User's Manual, Rev 3.04."
+;;
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r24k_cpu, r24k_mdu, r24k_fpu")
+
+;; Integer execution unit.
+(define_cpu_unit "r24k_iss"		"r24k_cpu")
+(define_cpu_unit "r24k_ixu_arith"	"r24k_cpu")
+(define_cpu_unit "r24k_mul3a"	        "r24k_mdu")
+(define_cpu_unit "r24k_mul3b"	        "r24k_mdu")
+(define_cpu_unit "r24k_mul3c"	        "r24k_mdu")
+
+;; --------------------------------------------------------------
+;; Producers
+;; --------------------------------------------------------------
+
+;; 1. Loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs
+(define_insn_reservation "r24k_int_load" 2
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "load"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 2. Arithmetic: add, addi, addiu, addiupc, addu, and, andi, clo, clz,
+;;    ext, ins, lui, movn, movz, nor, or, ori, rotr, rotrv, seb, seh, sll,
+;;    sllv, slt, slti, sltiu, sltu, sra, srav, srl, srlv, sub, subu, wsbh,
+;;    xor, xori
+;; (movn/movz is not matched, we'll need to split condmov to
+;;  differentiate between integer/float moves)
+(define_insn_reservation "r24k_int_arith" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 3. Links: bgezal, bgezall, bltzal, bltzall, jal, jalr, jalx
+;; 3a. jr/jalr consumer
+(define_insn_reservation "r24k_int_jump" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "call,jump"))
+  "r24k_iss+r24k_ixu_arith")
+
+;; 3b. branch consumer
+(define_insn_reservation "r24k_int_branch" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "branch"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 4. MDU: fully pipelined multiplier
+;; mult - delivers result to hi/lo in 1 cycle (pipelined)
+(define_insn_reservation "r24k_int_mult" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "imul"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; madd, msub - delivers result to hi/lo in 1 cycle (pipelined)
+(define_insn_reservation "r24k_int_madd" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "imadd"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; mul - delivers result to gpr in 5 cycles
+(define_insn_reservation "r24k_int_mul3" 5
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "imul3"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)*5")
+
+;; mfhi, mflo, mflhxu - deliver result to gpr in 5 cycles
+(define_insn_reservation "r24k_int_mfhilo" 5
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "mfhi,mflo"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass
+(define_insn_reservation "r24k_int_mthilo" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "mthi,mtlo"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; div - default to 36 cycles for 32bit operands.  Faster for 24bit, 16bit and
+;; 8bit, but is tricky to identify.
+(define_insn_reservation "r24k_int_div" 36
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "idiv"))
+  "r24k_iss+(r24k_mul3a+r24k_mul3b+r24k_mul3c)*36")
+
+
+;; 5. Cop: cfc1, di, ei, mfc0, mtc0
+;; (Disabled until we add proper cop0 support)
+;;(define_insn_reservation "r24k_int_cop" 3
+;;  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+;;       (eq_attr "type" "cop0"))
+;;  "r24k_iss+r24k_ixu_arith")
+
+
+;; 6. Store
+(define_insn_reservation "r24k_int_store" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "store"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 7. Multiple instructions
+(define_insn_reservation "r24k_int_multi" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "multi"))
+  "r24k_iss+r24k_ixu_arith+r24k_fpu_arith+(r24k_mul3a+r24k_mul3b+r24k_mul3c)")
+
+
+;; 8. Unknowns - Currently these include blockage, consttable and alignment
+;;    rtls. They do not really affect scheduling latency, (blockage affects
+;;    scheduling via log links, but not used here).
+(define_insn_reservation "r24k_int_unknown" 0
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "unknown,atomic,syncloop"))
+  "r24k_iss")
+
+
+;; 9. Prefetch
+(define_insn_reservation "r24k_int_prefetch" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+
+;; load->next use :  2 cycles (Default)
+;; load->load base:  3 cycles
+;; load->store base: 3 cycles
+;; load->prefetch:   3 cycles
+(define_bypass 3 "r24k_int_load" "r24k_int_load")
+(define_bypass 3 "r24k_int_load" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 3 "r24k_int_load" "r24k_int_prefetch")
+
+;; arith->next use :  1 cycles (Default)
+;; arith->load base:  2 cycles
+;; arith->store base: 2 cycles
+;; arith->prefetch:   2 cycles
+(define_bypass 2 "r24k_int_arith" "r24k_int_load")
+(define_bypass 2 "r24k_int_arith" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 2 "r24k_int_arith" "r24k_int_prefetch")
+
+;; mul3->next use : 5 cycles (default)
+;; mul3->l/s base : 6 cycles
+;; mul3->prefetch : 6 cycles
+(define_bypass 6 "r24k_int_mul3" "r24k_int_load")
+(define_bypass 6 "r24k_int_mul3" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 6 "r24k_int_mul3" "r24k_int_prefetch")
+
+;; mul3->madd/msub : 1 cycle
+(define_bypass 1 "r24k_int_mul3" "r24k_int_madd" "mips_linked_madd_p")
+
+;; mfhilo->next use  : 5 cycles (default)
+;; mfhilo->l/s base  : 6 cycles
+;; mfhilo->prefetch  : 6 cycles
+;; mthilo->madd/msub : 2 cycle (only for mthi/lo not mfhi/lo)
+(define_bypass 6 "r24k_int_mfhilo" "r24k_int_load")
+(define_bypass 6 "r24k_int_mfhilo" "r24k_int_store"
+  "!mips_store_data_bypass_p")
+(define_bypass 6 "r24k_int_mfhilo" "r24k_int_prefetch")
+(define_bypass 2 "r24k_int_mthilo" "r24k_int_madd")
+
+;; cop->next use : 3 cycles (Default)
+;; cop->l/s base : 4 cycles
+;; (define_bypass 4 "r24k_int_cop" "r24k_int_load")
+;; (define_bypass 4 "r24k_int_cop" "r24k_int_store"
+;;   "!mips_store_data_bypass_p")
+
+;; multi->next use : 1 cycles (Default)
+;; multi->l/s base : 2 cycles
+;; multi->prefetch : 2 cycles
+(define_bypass 2 "r24k_int_multi" "r24k_int_load")
+(define_bypass 2 "r24k_int_multi" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 2 "r24k_int_multi" "r24k_int_prefetch")
+
+
+;; --------------------------------------------------------------
+;; DSP instructions
+;; --------------------------------------------------------------
+
+;; absq, addq, addsc, addu, addwc, bitrev, cmp, cmpgu, cmpu, insv, modsub,
+;; packrl, pick, preceq, preceu, precequ, precrq, precrqu, raddu, rddsp, repl,
+;; replv, shll, shllv, shra, shrav, shrl, shrlv, subq, subu, wrdsp
+(define_insn_reservation "r24k_dsp_alu" 2
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "dspalu,dspalusat"))
+  "r24k_iss+r24k_ixu_arith")
+
+;; dpaq_s, dpau, dpsq_s, dpsu, maq_s, mulsaq
+(define_insn_reservation "r24k_dsp_mac" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "dspmac"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; dpaq_sa, dpsq_sa, maq_sa
+(define_insn_reservation "r24k_dsp_mac_sat" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "dspmacsat"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; extp, extpdp, extpdpv, extpv, extr, extrv
+(define_insn_reservation "r24k_dsp_acc_ext" 5
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "accext"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; mthlip, shilo, shilov
+(define_insn_reservation "r24k_dsp_acc_mod" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "accmod"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+
+;; mult/madd->dsp_acc_ext : 4 cycles
+;; mult/madd->dsp_acc_mod : 4 cycles
+(define_bypass 4 "r24k_int_mult" "r24k_dsp_acc_ext")
+(define_bypass 4 "r24k_int_mult" "r24k_dsp_acc_mod")
+
+;; mthilo->dsp_acc_ext : 4 cycles
+;; mthilo->dsp_acc_ext : 4 cycles
+(define_bypass 4 "r24k_int_mthilo" "r24k_dsp_acc_ext")
+(define_bypass 4 "r24k_int_mthilo" "r24k_dsp_acc_mod")
+
+;; dsp_mac->next use    : 1 cycles (default)
+;; dsp_mac->dsp_acc_ext : 4 cycles
+;; dsp_mac->dsp_acc_mod : 4 cycles
+(define_bypass 4 "r24k_dsp_mac" "r24k_dsp_acc_ext")
+(define_bypass 4 "r24k_dsp_mac" "r24k_dsp_acc_mod")
+
+;; dsp_mac_sat->next use    : 1 cycles (default)
+;; dsp_mac_sat->mult/madd   : 2 cycles
+;; dsp_mac_sat->dsp_mac     : 2 cycles
+;; dsp_mac_sat->dsp_mac_sat : 2 cycles
+;; dsp_mac_sat->dsp_acc_ext : 4 cycles
+;; dsp_mac_sat->dsp_acc_mod : 4 cycles
+(define_bypass 2 "r24k_dsp_mac_sat" "r24k_int_mult")
+(define_bypass 2 "r24k_dsp_mac_sat" "r24k_dsp_mac")
+(define_bypass 2 "r24k_dsp_mac_sat" "r24k_dsp_mac_sat")
+(define_bypass 4 "r24k_dsp_mac_sat" "r24k_dsp_acc_ext")
+(define_bypass 4 "r24k_dsp_mac_sat" "r24k_dsp_acc_mod")
+
+;; dsp_acc_ext->next use : 5 cycles (default)
+;; dsp_acc_ext->l/s base : 6 cycles
+;; dsp_acc_ext->prefetch : 6 cycles
+(define_bypass 6 "r24k_dsp_acc_ext" "r24k_int_load")
+(define_bypass 6 "r24k_dsp_acc_ext" "r24k_int_store"
+  "!mips_store_data_bypass_p")
+(define_bypass 6 "r24k_dsp_acc_ext" "r24k_int_prefetch")
+
+;; dsp_acc_mod->next use    : 1 cycles (default)
+;; dsp_acc_mod->mult/madd   : 2 cycles
+;; dsp_acc_mod->dsp_mac     : 2 cycles
+;; dsp_acc_mod->dsp_mac_sat : 2 cycles
+;; dsp_acc_mod->dsp_acc_ext : 4 cycles
+;; dsp_acc_mod->dsp_acc_mod : 4 cycles
+(define_bypass 2 "r24k_dsp_acc_mod" "r24k_int_mult")
+(define_bypass 2 "r24k_dsp_acc_mod" "r24k_dsp_mac")
+(define_bypass 2 "r24k_dsp_acc_mod" "r24k_dsp_mac_sat")
+(define_bypass 4 "r24k_dsp_acc_mod" "r24k_dsp_acc_ext")
+(define_bypass 4 "r24k_dsp_acc_mod" "r24k_dsp_acc_mod")
+
+;; dspalu->next use : 2 cycles (default)
+;; dspalu->l/s base : 3 cycles
+;; dspalu->prefetch : 3 cycles
+;; some pairs of dspalu (addsc/addwc, cmp/pick, wrdsp/insv) : 1 cycle
+(define_bypass 3 "r24k_dsp_alu" "r24k_int_load")
+(define_bypass 3 "r24k_dsp_alu" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 3 "r24k_dsp_alu" "r24k_int_prefetch")
+(define_bypass 1 "r24k_dsp_alu" "r24k_dsp_alu" "mips_dspalu_bypass_p")
+
+
+;; --------------------------------------------------------------
+;; Floating Point Instructions
+;; --------------------------------------------------------------
+
+(define_cpu_unit "r24k_fpu_arith" "r24k_fpu")
+
+;; The 24k is a single issue cpu, and the fpu runs at half clock speed,
+;; so each fpu instruction ties up the shared instruction scheduler for
+;; 1 cycle, and the fpu scheduler for 2 cycles.
+;;
+;; These timings are therefore twice the values in the 24K manual,
+;; which are quoted in fpu clocks.
+;;
+;; The 24kf1_1 is a 24k configured with 1:1 cpu and fpu, so use
+;; the unscaled timings
+
+(define_reservation "r24kf2_1_fpu_iss"	"r24k_iss+(r24k_fpu_arith*2)")
+
+;; fadd, fabs, fneg
+(define_insn_reservation "r24kf2_1_fadd" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r24kf2_1_fpu_iss")
+
+;; fmove, fcmove
+(define_insn_reservation "r24kf2_1_fmove" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fmove,condmove"))
+  "r24kf2_1_fpu_iss")
+
+;; fload
+(define_insn_reservation "r24kf2_1_fload" 6
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r24kf2_1_fpu_iss")
+
+;; fstore
+(define_insn_reservation "r24kf2_1_fstore" 2
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fpstore"))
+  "r24kf2_1_fpu_iss")
+
+;; fmul, fmadd
+(define_insn_reservation "r24kf2_1_fmul_sf" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r24kf2_1_fpu_iss")
+
+(define_insn_reservation "r24kf2_1_fmul_df" 10
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*2)")
+
+
+;; fdiv, fsqrt, frsqrt
+(define_insn_reservation "r24kf2_1_fdiv_sf" 34
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*26)")
+
+(define_insn_reservation "r24kf2_1_fdiv_df" 64
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*56)")
+
+;; frsqrt
+(define_insn_reservation "r24kf2_1_frsqrt_df" 70
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*60)")
+
+;; fcmp
+(define_insn_reservation "r24kf2_1_fcmp" 4
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fcmp"))
+  "r24kf2_1_fpu_iss")
+
+;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on the condition)
+(define_bypass 2 "r24kf2_1_fcmp" "r24kf2_1_fmove")
+
+;; fcvt (cvt.d.s, cvt.[sd].[wl])
+(define_insn_reservation "r24kf2_1_fcvt_i2f_s2d" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+  "r24kf2_1_fpu_iss")
+
+;; fcvt (cvt.s.d)
+(define_insn_reservation "r24kf2_1_fcvt_s2d" 12
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "D2S")))
+  "r24kf2_1_fpu_iss")
+
+;; fcvt (cvt.[wl].[sd], etc)
+(define_insn_reservation "r24kf2_1_fcvt_f2i" 10
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "S2I,D2I")))
+  "r24kf2_1_fpu_iss")
+
+;; fxfer (mfc1, mfhc1, mtc1, mthc1)
+(define_insn_reservation "r24kf2_1_fxfer" 4
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "mfc,mtc"))
+  "r24kf2_1_fpu_iss")
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+;; r24kf2_1_fcvt_f2i->l/s base : 11 cycles
+;; r24kf2_1_fcvt_f2i->prefetch : 11 cycles
+(define_bypass 11 "r24kf2_1_fcvt_f2i" "r24k_int_load")
+(define_bypass 11 "r24kf2_1_fcvt_f2i" "r24k_int_store"
+  "!mips_store_data_bypass_p")
+(define_bypass 11 "r24kf2_1_fcvt_f2i" "r24k_int_prefetch")
+
+;; r24kf2_1_fxfer->l/s base : 5 cycles
+;; r24kf2_1_fxfer->prefetch : 5 cycles
+(define_bypass 5 "r24kf2_1_fxfer" "r24k_int_load")
+(define_bypass 5 "r24kf2_1_fxfer" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 5 "r24kf2_1_fxfer" "r24k_int_prefetch")
+
+;; --------------------------------------------------------------
+;; The 24kf1_1 is a 24k configured with 1:1 cpu and fpu, so use
+;; the unscaled timings
+;; --------------------------------------------------------------
+
+(define_reservation "r24kf1_1_fpu_iss"	"r24k_iss+r24k_fpu_arith")
+
+;; fadd, fabs, fneg
+(define_insn_reservation "r24kf1_1_fadd" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r24kf1_1_fpu_iss")
+
+;; fmove, fcmove
+(define_insn_reservation "r24kf1_1_fmove" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fmove,condmove"))
+  "r24kf1_1_fpu_iss")
+
+;; fload
+(define_insn_reservation "r24kf1_1_fload" 3
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r24kf1_1_fpu_iss")
+
+;; fstore
+(define_insn_reservation "r24kf1_1_fstore" 1
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fpstore"))
+  "r24kf1_1_fpu_iss")
+
+;; fmul, fmadd
+(define_insn_reservation "r24kf1_1_fmul_sf" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r24kf1_1_fpu_iss")
+
+(define_insn_reservation "r24kf1_1_fmul_df" 5
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r24kf1_1_fpu_iss,r24k_fpu_arith")
+
+
+;; fdiv, fsqrt, frsqrt
+(define_insn_reservation "r24kf1_1_fdiv_sf" 17
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r24kf1_1_fpu_iss,(r24k_fpu_arith*13)")
+
+(define_insn_reservation "r24kf1_1_fdiv_df" 32
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf1_1_fpu_iss,(r24k_fpu_arith*28)")
+
+;; frsqrt
+(define_insn_reservation "r24kf1_1_frsqrt_df" 35
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf1_1_fpu_iss,(r24k_fpu_arith*30)")
+
+;; fcmp
+(define_insn_reservation "r24kf1_1_fcmp" 2
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fcmp"))
+  "r24kf1_1_fpu_iss")
+
+;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on the condition)
+(define_bypass 1 "r24kf1_1_fcmp" "r24kf1_1_fmove")
+
+;; fcvt (cvt.d.s, cvt.[sd].[wl])
+(define_insn_reservation "r24kf1_1_fcvt_i2f_s2d" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+  "r24kf1_1_fpu_iss")
+
+;; fcvt (cvt.s.d)
+(define_insn_reservation "r24kf1_1_fcvt_s2d" 6
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "D2S")))
+  "r24kf1_1_fpu_iss")
+
+;; fcvt (cvt.[wl].[sd], etc)
+(define_insn_reservation "r24kf1_1_fcvt_f2i" 5
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "S2I,D2I")))
+  "r24kf1_1_fpu_iss")
+
+;; fxfer (mfc1, mfhc1, mtc1, mthc1)
+(define_insn_reservation "r24kf1_1_fxfer" 2
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "mfc,mtc"))
+  "r24kf1_1_fpu_iss")
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+;; r24kf1_1_fcvt_f2i->l/s base : 6 cycles
+;; r24kf1_1_fcvt_f2i->prefetch : 6 cycles
+(define_bypass 6 "r24kf1_1_fcvt_f2i" "r24k_int_load")
+(define_bypass 6 "r24kf1_1_fcvt_f2i" "r24k_int_store"
+  "!mips_store_data_bypass_p")
+(define_bypass 6 "r24kf1_1_fcvt_f2i" "r24k_int_prefetch")
+
+;; r24kf1_1_fxfer->l/s base : 3 cycles
+;; r24kf1_1_fxfer->prefetch : 3 cycles
+(define_bypass 3 "r24kf1_1_fxfer" "r24k_int_load")
+(define_bypass 3 "r24kf1_1_fxfer" "r24k_int_store" "!mips_store_data_bypass_p")
+(define_bypass 3 "r24kf1_1_fxfer" "r24k_int_prefetch")
+
diff --git a/gcc-4.9/gcc/config/mips/3000.md b/gcc-4.9/gcc/config/mips/3000.md
new file mode 100644
index 000000000..2d4250385
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/3000.md
@@ -0,0 +1,71 @@
+;; R3000 and TX39 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r3k_load" 2
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "alu")
+
+(define_insn_reservation "r3k_imul" 12
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*12")
+
+(define_insn_reservation "r3k_idiv" 35
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "idiv"))
+  "imuldiv*35")
+
+(define_insn_reservation "r3k_fmove" 1
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "alu")
+
+(define_insn_reservation "r3k_fadd" 2
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "fcmp,fadd"))
+  "alu")
+
+(define_insn_reservation "r3k_fmul_single" 4
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r3k_fmul_double" 5
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r3k_fdiv_single" 12
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r3k_fdiv_double" 19
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc-4.9/gcc/config/mips/4000.md b/gcc-4.9/gcc/config/mips/4000.md
new file mode 100644
index 000000000..1851d831b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/4000.md
@@ -0,0 +1,32 @@
+;; R4000 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r4k_imul" 10
+  (and (eq_attr "cpu" "r4000")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*10")
+
+(define_insn_reservation "r4k_idiv" 69
+  (and (eq_attr "cpu" "r4000")
+       (eq_attr "type" "idiv"))
+  "imuldiv*69")
diff --git a/gcc-4.9/gcc/config/mips/4100.md b/gcc-4.9/gcc/config/mips/4100.md
new file mode 100644
index 000000000..c816ef086
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/4100.md
@@ -0,0 +1,51 @@
+;; VR4100 and VR4120 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r4100_load" 2
+  (and (eq_attr "cpu" "r4100,r4120")
+       (eq_attr "type" "load,fpload,fpidxload,mfc,mtc"))
+  "alu")
+
+(define_insn_reservation "r4100_imul_si" 1
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "imuldiv")
+
+(define_insn_reservation "r4100_imul_di" 4
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*4")
+
+(define_insn_reservation "r4100_idiv_si" 35
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*35")
+
+(define_insn_reservation "r4100_idiv_di" 67
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*67")
diff --git a/gcc-4.9/gcc/config/mips/4130.md b/gcc-4.9/gcc/config/mips/4130.md
new file mode 100644
index 000000000..14197acf5
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/4130.md
@@ -0,0 +1,153 @@
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; Pipeline description for the VR4130 family.
+;;
+;; The processor issues each 8-byte aligned pair of instructions together,
+;; stalling the second instruction if it depends on the first.  Thus, if we
+;; want two instructions to issue in parallel, we need to make sure that the
+;; first one is 8-byte aligned.
+;;
+;; For the purposes of this pipeline description, we treat the processor
+;; like a standard two-way superscalar architecture.  If scheduling were
+;; the last pass to run, we could use the scheduler hooks to vary the
+;; issue rate depending on whether an instruction is at an aligned or
+;; unaligned address.  Unfortunately, delayed branch scheduling and
+;; hazard avoidance are done after the final scheduling pass, and they
+;; can change the addresses of many instructions.
+;;
+;; We get around this in two ways:
+;;
+;;   (1) By running an extra pass at the end of compilation.  This pass goes
+;;	 through the function looking for pairs of instructions that could
+;;	 execute in parallel.  It makes sure that the first instruction in
+;;	 each pair is suitably aligned, inserting nops if necessary.  Doing
+;;	 this gives the same kind of pipeline behavior we would see on a
+;;	 normal superscalar target.
+;;
+;;	 This pass is generally a speed improvement, but the extra nops will
+;;	 obviously make the program bigger.  It is therefore unsuitable for
+;;	 -Os (at the very least).
+;;
+;;   (2) By modifying the scheduler hooks so that, where possible:
+;;
+;;	 (a) dependent instructions are separated by a non-dependent
+;;	     instruction;
+;;
+;;	 (b) instructions that use the multiplication unit are separated
+;;	     by non-multiplication instructions; and
+;;
+;;	 (c) memory access instructions are separated by non-memory
+;;	     instructions.
+;;
+;;	 The idea is to keep conflicting instructions apart wherever possible
+;;	 and thus make the schedule less dependent on alignment.
+
+(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre")
+
+(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main")
+(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv")
+
+;; This is a fake unit for pre-reload scheduling of multiplications.
+;; It enforces the true post-reload repeat rate.
+(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre")
+
+;; The scheduling hooks use this attribute for (b) above.
+(define_attr "vr4130_class" "mul,mem,alu"
+  (cond [(eq_attr "type" "load,store")
+	 (const_string "mem")
+
+	 (eq_attr "type" "mfhi,mflo,mthi,mtlo,imul,imul3,imadd,idiv")
+	 (const_string "mul")]
+	(const_string "alu")))
+
+(define_insn_reservation "vr4130_multi" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "multi,unknown,atomic,syncloop"))
+  "vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv")
+
+(define_insn_reservation "vr4130_int" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt"))
+  "vr4130_alu1 | vr4130_alu2")
+
+(define_insn_reservation "vr4130_load" 3
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "load"))
+  "vr4130_dcache")
+
+(define_insn_reservation "vr4130_store" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "store"))
+  "vr4130_dcache")
+
+(define_insn_reservation "vr4130_mfhilo" 3
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "mfhi,mflo"))
+  "vr4130_muldiv")
+
+(define_insn_reservation "vr4130_mthilo" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "mthi,mtlo"))
+  "vr4130_muldiv")
+
+;; The product is available in LO & HI after one cycle.  Moving the result
+;; into an integer register will take an additional three cycles, see mflo
+;; & mfhi above.  Note that the same latencies and repeat rates apply if we
+;; use "mtlo; macc" instead of "mult; mflo".
+(define_insn_reservation "vr4130_mulsi" 4
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "SI")))
+  "vr4130_muldiv + (vr4130_mulpre * 2)")
+
+;; As for vr4130_mulsi, but the product is available in LO and HI
+;; after 3 cycles.
+(define_insn_reservation "vr4130_muldi" 6
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "DI")))
+  "(vr4130_muldiv * 3) + (vr4130_mulpre * 4)")
+
+;; maccs can execute in consecutive cycles without stalling, but it
+;; is 3 cycles before the integer destination can be read.
+(define_insn_reservation "vr4130_macc" 3
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "imadd"))
+  "vr4130_muldiv")
+
+(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p")
+(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo")
+(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo")
+
+(define_insn_reservation "vr4130_divsi" 36
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "vr4130_muldiv * 36")
+
+(define_insn_reservation "vr4130_divdi" 72
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "vr4130_muldiv * 72")
+
+(define_insn_reservation "vr4130_branch" 0
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "branch,jump,call"))
+  "vr4130_alu1 | vr4130_alu2")
diff --git a/gcc-4.9/gcc/config/mips/4300.md b/gcc-4.9/gcc/config/mips/4300.md
new file mode 100644
index 000000000..04a25c360
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/4300.md
@@ -0,0 +1,85 @@
+;; VR4300 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r4300_load" 2
+  (and (eq_attr "cpu" "r4300")
+       (eq_attr "type" "load,fpload,fpidxload,mfc,mtc"))
+  "alu")
+
+(define_insn_reservation "r4300_imul_si" 5
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*5")
+
+(define_insn_reservation "r4300_imul_di" 8
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*8")
+
+(define_insn_reservation "r4300_idiv_si" 37
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*37")
+
+(define_insn_reservation "r4300_idiv_di" 69
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*69")
+
+(define_insn_reservation "r4300_fmove" 1
+  (and (eq_attr "cpu" "r4300")
+       (eq_attr "type" "fcmp,fabs,fneg,fmove"))
+  "imuldiv")
+
+(define_insn_reservation "r4300_fadd" 3
+  (and (eq_attr "cpu" "r4300")
+       (eq_attr "type" "fadd"))
+  "imuldiv*3")
+
+(define_insn_reservation "r4300_fmul_single" 5
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "imuldiv*5")
+
+(define_insn_reservation "r4300_fmul_double" 8
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "imuldiv*8")
+
+(define_insn_reservation "r4300_fdiv_single" 29
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "imuldiv*29")
+
+(define_insn_reservation "r4300_fdiv_double" 58
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "imuldiv*58")
diff --git a/gcc-4.9/gcc/config/mips/4600.md b/gcc-4.9/gcc/config/mips/4600.md
new file mode 100644
index 000000000..29ec0eacd
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/4600.md
@@ -0,0 +1,130 @@
+;; R4600, R4650, and R4700 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+;;
+;; We handle the R4600, R4650, and R4700 in much the same way.  The only
+;; differences between R4600 and R4650 are the integer multiplication and
+;; division costs. The only differences between R4600 and R4700 are the
+;; integer and floating-point multiplication costs.
+
+(define_insn_reservation "r4600_imul_si" 10
+  (and (eq_attr "cpu" "r4600")
+       (eq_attr "type" "imul,imul3,imadd")
+       (eq_attr "mode" "SI"))
+  "imuldiv*10")
+
+(define_insn_reservation "r4600_imul_di" 12
+  (and (eq_attr "cpu" "r4600")
+       (eq_attr "type" "imul,imul3,imadd")
+       (eq_attr "mode" "DI"))
+  "imuldiv*12")
+
+(define_insn_reservation "r4600_idiv_si" 42
+  (and (eq_attr "cpu" "r4600,r4700")
+       (eq_attr "type" "idiv")
+       (eq_attr "mode" "SI"))
+  "imuldiv*42")
+
+(define_insn_reservation "r4600_idiv_di" 74
+  (and (eq_attr "cpu" "r4600,r4700")
+       (eq_attr "type" "idiv")
+       (eq_attr "mode" "DI"))
+  "imuldiv*74")
+
+
+(define_insn_reservation "r4650_imul" 4
+  (and (eq_attr "cpu" "r4650")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*4")
+
+(define_insn_reservation "r4650_idiv" 36
+  (and (eq_attr "cpu" "r4650")
+       (eq_attr "type" "idiv"))
+  "imuldiv*36")
+
+
+(define_insn_reservation "r4700_imul_si" 8
+  (and (eq_attr "cpu" "r4700")
+       (eq_attr "type" "imul,imul3,imadd")
+       (eq_attr "mode" "SI"))
+  "imuldiv*8")
+
+(define_insn_reservation "r4700_imul_di" 10
+  (and (eq_attr "cpu" "r4700")
+       (eq_attr "type" "imul,imul3,imadd")
+       (eq_attr "mode" "DI"))
+  "imuldiv*10")
+
+
+(define_insn_reservation "r4600_load" 2
+  (and (eq_attr "cpu" "r4600,r4650,r4700")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "alu")
+
+(define_insn_reservation "r4600_fmove" 1
+  (and (eq_attr "cpu" "r4600,r4650,r4700")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "alu")
+
+(define_insn_reservation "r4600_fmul_single" 8
+  (and (eq_attr "cpu" "r4600,r4650")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+
+(define_insn_reservation "r4700_fmul_single" 4
+  (and (eq_attr "cpu" "r4700")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r4700_fmul_double" 5
+  (and (eq_attr "cpu" "r4700")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+
+(define_insn_reservation "r4600_fdiv_single" 32
+  (and (eq_attr "cpu" "r4600,r4650,r4700")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r4600_fdiv_double" 61
+  (and (eq_attr "cpu" "r4600,r4650,r4700")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r4600_fsqrt_single" 31
+  (and (eq_attr "cpu" "r4600,r4650,r4700")
+       (and (eq_attr "type" "fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r4600_fsqrt_double" 60
+  (and (eq_attr "cpu" "r4600,r4650,r4700")
+       (and (eq_attr "type" "fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc-4.9/gcc/config/mips/4k.md b/gcc-4.9/gcc/config/mips/4k.md
new file mode 100644
index 000000000..9d4304b18
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/4k.md
@@ -0,0 +1,153 @@
+;; DFA-based pipeline descriptions for MIPS32 4K processor family
+;; Contributed by Nigel Stephens (nigel@mips.com)
+;;   and David Ung (davidu@mips.com)
+;;
+;; References:
+;;   "MIPS32 4K Processor Core Family Software User's Manual,
+;;     Doc no: MD00016, Rev 1.18, Nov 15, 2004."
+;;
+;; 4Kc - pipelined multiplier and translation lookaside buffer (TLB)
+;; 4km - pipelined multiplier and block address translator (BAT)
+;; 4kp - non-pipelined multiplier and block address translator (BAT)
+;;
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r4k_cpu, r4k_mdu")
+
+;; Integer execution unit.
+(define_cpu_unit "r4k_ixu_arith"       "r4k_cpu")
+(define_cpu_unit "r4k_ixu_mpydiv"      "r4k_mdu")
+
+(define_insn_reservation "r4k_int_load" 2
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "load"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_prefetch" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "prefetch"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_store" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "store"))
+  "r4k_ixu_arith")
+
+;; 4Kc/4Km 
+;; unsigned divide - 8/16/24/32-bit operand have latencies  9/17/25/33
+;;   signed divide - 8/16/24/32-bit operand have latencies 10/18/26/34
+(define_insn_reservation "r4k_idiv_4kc" 34
+  (and (eq_attr "cpu" "4kc")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*34)")
+
+;; 4Kp
+;; unsigned divide - 33
+;;   signed divide - 33-35
+(define_insn_reservation "r4k_idiv_4kp" 35
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*35)")
+
+;; 4Kc/4Km fast 32x32 multiply
+;; 16x32 is faster, but there's no way to detect this
+(define_insn_reservation "r4k_mult_4kc" 2
+  (and (eq_attr "cpu" "4kc")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "SI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*2)")
+
+;; 4Kc/4Km MUL has 2 cycle latency, but has the special property that it will
+;; stall the integer unit pipeline. MUL 16x16 or 32x16 forces 1 cycle stall,
+;; while MUL 32x32 forces 2 cycle stall.  If next insn use the result, an
+;; additional stall is forced.
+(define_insn_reservation "r4k_mul_4kc" 4
+  (and (eq_attr "cpu" "4kc")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "(r4k_ixu_arith+r4k_ixu_mpydiv)*3")
+
+;; 4Kp slow iterative 2-op MULT
+;; Latency of 32 if next insn is MADD/MSUB,MFHI/MFLO.
+;; Repeat rate of 33 cycles.
+(define_insn_reservation "r4k_mult_4kp" 32
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "mode" "SI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*32)")
+
+;; 4Kp slow iterative 3-op MUL
+;; Latency of 32 cycles, but stalls the whole pipeline until complete.
+(define_insn_reservation "r4k_mul_4kp" 32
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "(r4k_ixu_arith+r4k_ixu_mpydiv)*32")
+
+;; 4Kp slow iterative MADD
+;; Latency of 34 if next use insn is MADD/MSUB,MFHI/MFLO.
+;; Repeat rate of 35 cycles.
+(define_insn_reservation "r4k_madd_4kp" 34
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "imadd")
+	    (eq_attr "mode" "SI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*34)")
+
+;; Move to HI/LO -> MADD/MSUB,MFHI/MFLO has a 1 cycle latency.
+(define_insn_reservation "r4k_int_mthilo" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "mthi,mtlo"))
+  "r4k_ixu_arith+r4k_ixu_mpydiv")
+
+;; Move from HI/LO -> integer operation has a 2 cycle latency.
+(define_insn_reservation "r4k_int_mfhilo" 2
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "mfhi,mflo"))
+  "r4k_ixu_arith+r4k_ixu_mpydiv")
+
+;; All other integer insns.
+(define_insn_reservation "r4k_int_alu" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "arith,condmove,const,logical,move,nop,shift,signext,slt"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_branch" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "branch"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_jump_4k" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "jump,call"))
+  "r4k_ixu_arith")
+
+;; mfcx/mtcx - non FPU
+;; (Disabled until we add cop0 support)
+;; (define_insn_reservation "r4k_int_cop" 2
+;;   (and (eq_attr "cpu" "4kc,4kp")
+;;      (eq_attr "type" "cop0"))
+;;  "r4k_ixu_arith")
+
+;; Unknown or multi - single issue
+(define_insn_reservation "r4k_unknown" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "r4k_ixu_arith+r4k_ixu_mpydiv")
diff --git a/gcc-4.9/gcc/config/mips/5000.md b/gcc-4.9/gcc/config/mips/5000.md
new file mode 100644
index 000000000..611749620
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/5000.md
@@ -0,0 +1,80 @@
+;; VR5000 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r5k_load" 2
+  (and (eq_attr "cpu" "r5000")
+       (eq_attr "type" "load,fpload,fpidxload,mfc,mtc"))
+  "alu")
+
+(define_insn_reservation "r5k_imul_si" 5
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*5")
+
+(define_insn_reservation "r5k_imul_di" 9
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*9")
+
+(define_insn_reservation "r5k_idiv_si" 36
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*36")
+
+(define_insn_reservation "r5k_idiv_di" 68
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*68")
+
+(define_insn_reservation "r5k_fmove" 1
+  (and (eq_attr "cpu" "r5000")
+       (eq_attr "type" "fcmp,fabs,fneg,fmove"))
+  "alu")
+
+(define_insn_reservation "r5k_fmul_single" 4
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r5k_fmul_double" 5
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r5k_fdiv_single" 21
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r5k_fsqrt_double" 36
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc-4.9/gcc/config/mips/5400.md b/gcc-4.9/gcc/config/mips/5400.md
new file mode 100644
index 000000000..f54bacef2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/5400.md
@@ -0,0 +1,184 @@
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; DFA-based pipeline description for 5400
+(define_automaton "vr54")
+(define_cpu_unit "vr54_dp0"     "vr54")
+(define_cpu_unit "vr54_dp1"     "vr54")
+(define_cpu_unit "vr54_mem"     "vr54")
+(define_cpu_unit "vr54_mac"     "vr54")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+(define_insn_reservation "ir_vr54_unknown" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "unknown,atomic,syncloop"))
+  "vr54_dp0+vr54_dp1+vr54_mem+vr54_mac")
+
+;; Assume prediction fails.
+(define_insn_reservation "ir_vr54_branch" 3
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "branch,jump,call"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_load" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "vr54_mem")
+
+(define_insn_reservation "ir_vr54_store" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "store"))
+  "vr54_mem")
+
+(define_insn_reservation "ir_vr54_fstore" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "vr54_mem")
+
+
+;; This reservation is for conditional move based on integer
+;; or floating point CC.
+(define_insn_reservation "ir_vr54_condmove" 4
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "condmove"))
+  "vr54_dp0|vr54_dp1")
+
+;; Move to/from FPU registers
+(define_insn_reservation "ir_vr54_xfer" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "mfc,mtc"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_hilo" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "mthi,mtlo,mfhi,mflo"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_arith" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,nop,trap"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_imul_si" 3
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_imul_di" 4
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_imadd_si" 3
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "imul,imul3"))
+  "vr54_mac")
+
+(define_insn_reservation "ir_vr54_idiv_si" 42
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_idiv_di" 74
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fadd" 4
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fadd"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmul_sf" 5
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmul_df" 6
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmadd_sf" 9
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmadd_df" 10
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fdiv_sf" 42
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fdiv_df" 72
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fabs" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fcmp" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fcmp"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fcvt" 6
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fcvt"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_frsqrt_sf" 61
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_frsqrt_df" 121
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_multi" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "multi"))
+  "vr54_dp0+vr54_dp1+vr54_mem+vr54_mac")
diff --git a/gcc-4.9/gcc/config/mips/5500.md b/gcc-4.9/gcc/config/mips/5500.md
new file mode 100644
index 000000000..23cab21a5
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/5500.md
@@ -0,0 +1,227 @@
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; DFA-based pipeline description for 5500
+(define_automaton "vr55")
+(define_cpu_unit "vr55_dp0"     "vr55")
+(define_cpu_unit "vr55_dp1"     "vr55")
+(define_cpu_unit "vr55_mem"     "vr55")
+(define_cpu_unit "vr55_mac"     "vr55")
+(define_cpu_unit "vr55_fp"      "vr55")
+(define_cpu_unit "vr55_bru"     "vr55")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+(define_insn_reservation "ir_vr55_unknown" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "unknown,atomic,syncloop"))
+  "vr55_dp0+vr55_dp1+vr55_mem+vr55_mac+vr55_fp+vr55_bru")
+
+;; Assume prediction fails.
+(define_insn_reservation "ir_vr55_branch" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "branch,jump,call"))
+  "vr55_bru")
+
+(define_insn_reservation "ir_vr55_load" 3
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "vr55_mem")
+
+(define_bypass 4
+  "ir_vr55_load"
+  "ir_vr55_mthilo,ir_vr55_imul_si,ir_vr55_imul_di,ir_vr55_imadd,
+   ir_vr55_idiv_si,ir_vr55_idiv_di")
+
+(define_insn_reservation "ir_vr55_store" 0
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "vr55_mem")
+
+;; This reservation is for conditional move based on integer
+;; or floating point CC.
+(define_insn_reservation "ir_vr55_condmove" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "condmove"))
+  "vr55_dp0|vr55_dp1")
+
+;; Move to/from FPU registers
+(define_insn_reservation "ir_vr55_xfer" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mfc,mtc"))
+  "vr55_dp0|vr55_dp1")
+
+(define_insn_reservation "ir_vr55_arith" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,nop,trap"))
+  "vr55_dp0|vr55_dp1")
+
+(define_bypass 2
+  "ir_vr55_arith"
+  "ir_vr55_mthilo,ir_vr55_imul_si,ir_vr55_imul_di,ir_vr55_imadd,
+   ir_vr55_idiv_si,ir_vr55_idiv_di")
+
+(define_insn_reservation "ir_vr55_mthilo" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mthi,mtlo"))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_mfhilo" 5
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mfhi,mflo"))
+  "vr55_mac")
+
+;; The default latency is for the GPR result of a mul.  Bypasses handle the
+;; latency of {mul,mult}->{mfhi,mflo}.
+(define_insn_reservation "ir_vr55_imul_si" 5
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "vr55_mac")
+
+;; The default latency is for pre-reload scheduling and handles the case
+;; where a pseudo destination will be stored in a GPR (as it usually is).
+;; The delay includes the latency of the dmult itself and the anticipated
+;; mflo or mfhi.
+;;
+;; Once the mflo or mfhi has been created, bypasses handle the latency
+;; between it and the dmult.
+(define_insn_reservation "ir_vr55_imul_di" 9
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "vr55_mac*4")
+
+;; The default latency is as for ir_vr55_imul_si.
+(define_insn_reservation "ir_vr55_imadd" 5
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "imadd"))
+  "vr55_mac")
+
+(define_bypass 1
+  "ir_vr55_imul_si,ir_vr55_imadd"
+  "ir_vr55_imadd"
+  "mips_linked_madd_p")
+
+(define_bypass 2
+  "ir_vr55_imul_si,ir_vr55_imadd"
+  "ir_vr55_mfhilo")
+
+(define_bypass 4
+  "ir_vr55_imul_di"
+  "ir_vr55_mfhilo")
+
+;; Divide algorithm is early out with best latency of 7 pcycles.
+;; Use worst case for scheduling purposes.
+(define_insn_reservation "ir_vr55_idiv_si" 42
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_idiv_di" 74
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fadd" 4
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "fadd"))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fmul_sf" 5
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fmul_df" 6
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fmadd_sf" 9
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fmadd_df" 10
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fdiv_sf" 30
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fdiv_df" 59
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fabs" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fcmp" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "fcmp"))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fcvt_sf" 4
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "mode" "SF")))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fcvt_df" 6
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "mode" "DF")))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_frsqrt_sf" 60
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_frsqrt_df" 118
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_multi" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "multi"))
+  "vr55_dp0+vr55_dp1+vr55_mem+vr55_mac+vr55_fp+vr55_bru")
diff --git a/gcc-4.9/gcc/config/mips/5k.md b/gcc-4.9/gcc/config/mips/5k.md
new file mode 100644
index 000000000..397c74c93
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/5k.md
@@ -0,0 +1,229 @@
+;; DFA-based pipeline descriptions for MIPS32 5K processor family
+;; Contributed by David Ung (davidu@mips.com)
+;;   and Nigel Stephens (nigel@mips.com)
+;;
+;; References:
+;;   "MIPS64 5K Processor Core Family Software User's Manual,
+;;     Doc no: MD00012, Rev 2.09, Jan 28, 2005."
+;;
+;; 5Kc - Single issue with no floating point unit.
+;; 5kf - Separate floating point pipe which can dual-issue with the
+;;       integer pipe.
+;;
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r5k_cpu, r5k_mdu, r5k_fpu")
+
+;; Integer execution unit.
+(define_cpu_unit "r5k_ixu_arith"       "r5k_cpu")
+(define_cpu_unit "r5k_ixu_mpydiv"      "r5k_mdu")
+(define_cpu_unit "r5kf_fpu_arith"      "r5k_fpu")
+
+(define_insn_reservation "r5k_int_load" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "load"))
+  "r5k_ixu_arith")
+
+(define_insn_reservation "r5k_int_prefetch" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "r5k_ixu_arith")
+
+(define_insn_reservation "r5k_int_store" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "store"))
+  "r5k_ixu_arith")
+
+;; Divides
+(define_insn_reservation "r5k_int_divsi" 34
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+ "r5k_ixu_arith+(r5k_ixu_mpydiv*34)")
+
+(define_insn_reservation "r5k_int_divdi" 66
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*66)")
+
+;; 32x32 multiply
+;; 32x16 is faster, but there's no way to detect this
+(define_insn_reservation "r5k_int_mult" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "SI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*2)")
+
+;; 64x64 multiply
+(define_insn_reservation "r5k_int_mult_64" 9
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "DI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*2)")
+
+;; 3 operand MUL 32x32
+(define_insn_reservation "r5k_int_mul" 4
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*2)")
+
+;; Move to HI/LO -> MADD/MSUB,MFHI/MFLO has a 1 cycle latency.
+(define_insn_reservation "r5k_int_mthilo" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "mthi,mtlo"))
+  "r5k_ixu_arith+r5k_ixu_mpydiv")
+
+;; Move from HI/LO -> integer operation has a 2 cycle latency.
+(define_insn_reservation "r5k_int_mfhilo" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "mfhi,mflo"))
+  "r5k_ixu_arith+r5k_ixu_mpydiv")
+
+;; All other integer insns.
+(define_insn_reservation "r5k_int_alu" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "arith,condmove,const,logical,move,nop,shift,signext,slt"))
+  "r5k_ixu_arith")
+
+(define_insn_reservation "r5k_int_branch" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "branch"))
+  "r5k_ixu_arith")
+
+;; JR/JALR always cause one pipeline bubble because of interlock.
+(define_insn_reservation "r5k_int_jump" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "jump,call"))
+  "r5k_ixu_arith")
+
+;; Any    -> JR/JALR (without dependency) : 1 clock issue delay
+;; Any    -> JR/JALR (with dependency)    : 2 clock issue delay
+;; load   -> JR/JALR (with dependency)    : 3 clock issue delay
+;; mfhilo -> JR/JALR (with dependency)    : 3 clock issue delay
+;; mul    -> JR/JALR (with dependency)    : 3 clock issue delay
+(define_bypass 2 "r5k_int_alu"    "r5k_int_jump")
+(define_bypass 3 "r5k_int_load"   "r5k_int_jump")
+(define_bypass 3 "r5k_int_mfhilo" "r5k_int_jump")
+(define_bypass 3 "r5k_int_mul"    "r5k_int_jump")
+
+;; Unknown or multi - single issue
+(define_insn_reservation "r5k_int_unknown" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "r5k_ixu_arith+r5k_ixu_mpydiv")
+
+
+;; Floating Point Instructions
+;; The 5Kf is a partial dual-issue cpu which can dual issue an integer
+;; and floating-point instruction in the same cycle.
+
+;; fadd, fabs, fneg
+(define_insn_reservation "r5kf_fadd" 4
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r5kf_fpu_arith")
+
+;; fmove, fcmove
+(define_insn_reservation "r5kf_fmove" 4
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fmove"))
+  "r5kf_fpu_arith")
+
+;; fload
+(define_insn_reservation "r5kf_fload" 3
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r5kf_fpu_arith")
+
+;; fstore
+(define_insn_reservation "r5kf_fstore" 1
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fpstore"))
+  "r5kf_fpu_arith")
+
+;; fmul, fmadd
+(define_insn_reservation "r5kf_fmul_sf" 4
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r5kf_fpu_arith")
+
+(define_insn_reservation "r5kf_fmul_df" 5
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r5kf_fpu_arith*2")
+
+;; fdiv, fsqrt, frsqrt
+(define_insn_reservation "r5kf_fdiv_sf" 17
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r5kf_fpu_arith*14")
+
+(define_insn_reservation "r5kf_fdiv_df" 32
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r5kf_fpu_arith*29")
+
+;; frsqrt
+(define_insn_reservation "r5kf_frsqrt_df" 35
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r5kf_fpu_arith*31")
+
+;; fcmp
+(define_insn_reservation "r5kf_fcmp" 2
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fcmp"))
+  "r5kf_fpu_arith")
+
+;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on condition)
+(define_bypass 1 "r5kf_fcmp" "r5kf_fmove")
+
+;; fcvt (cvt.d.s, cvt.[sd].[wl]
+(define_insn_reservation "r5kf_fcvt_d2s" 4
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+  "r5kf_fpu_arith")
+
+;; fcvt (cvt.s.d)
+(define_insn_reservation "r5kf_fcvt_s2d" 6
+  (and (eq_attr "cpu" "5kc")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "D2S")))
+  "r5kf_fpu_arith")
+
+;; fcvt (cvt.[wl].[sd], etc)
+(define_insn_reservation "r5kf_fcvt_f2i" 5
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "S2I,D2I")))
+  "r5kf_fpu_arith")
+
+;; fxfer (mfc1, mfhc1, mtc1, mthc1) - single issue
+(define_insn_reservation "r5kf_fxfer" 2
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "mfc,mtc"))
+  "r5k_ixu_arith+r5kf_fpu_arith")
diff --git a/gcc-4.9/gcc/config/mips/6000.md b/gcc-4.9/gcc/config/mips/6000.md
new file mode 100644
index 000000000..32bc31601
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/6000.md
@@ -0,0 +1,56 @@
+;; R6000 pipeline description.
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r6k_fcmp" 2
+  (and (eq_attr "cpu" "r6000")
+       (eq_attr "type" "fcmp"))
+  "alu")
+
+(define_insn_reservation "r6k_fadd" 3
+  (and (eq_attr "cpu" "r6000")
+       (eq_attr "type" "fadd"))
+  "alu")
+
+(define_insn_reservation "r6k_fmul_single" 5
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r6k_fmul_double" 6
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r6k_fdiv_single" 15
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r6k_fdiv_double" 16
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc-4.9/gcc/config/mips/7000.md b/gcc-4.9/gcc/config/mips/7000.md
new file mode 100644
index 000000000..6f020c58a
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/7000.md
@@ -0,0 +1,214 @@
+;; DFA-based pipeline description for the RM7000.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; .........................
+;;
+;; The RM7000 is a dual-issue processor that can bundle instructions as:
+;; {arith|load|store}{arith|imul|idiv|branch|float}
+;;
+;; Reference:
+;;   "RM7000 Family User Manual, PMC-2002296"
+;;
+;; .........................
+
+;; Use three automata to isolate long latency operations, reducing space.
+(define_automaton "rm7000_other, rm7000_fdiv, rm7000_idiv")
+
+;;
+;; Describe the resources.
+;;
+
+;; Global
+(define_cpu_unit "rm7_iss0,rm7_iss1" "rm7000_other")
+
+;; Integer execution unit (M-Pipe).
+(define_cpu_unit "ixum_addsub_agen" "rm7000_other")
+
+;; Integer execution unit (F-Pipe).
+(define_cpu_unit "ixuf_addsub" "rm7000_other")
+(define_cpu_unit "ixuf_branch" "rm7000_other")
+(define_cpu_unit "ixuf_mpydiv" "rm7000_other")
+(define_cpu_unit "ixuf_mpydiv_iter" "rm7000_idiv")
+;; Floating-point unit (F-Pipe).
+(define_cpu_unit "fxuf_add" "rm7000_other")
+(define_cpu_unit "fxuf_mpy" "rm7000_other")
+(define_cpu_unit "fxuf_mpy_iter" "rm7000_fdiv")
+(define_cpu_unit "fxuf_divsqrt" "rm7000_other")
+(define_cpu_unit "fxuf_divsqrt_iter" "rm7000_fdiv")
+
+(exclusion_set "ixuf_addsub"
+	       "ixuf_branch,ixuf_mpydiv,fxuf_add,fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "ixuf_branch" "ixuf_mpydiv,fxuf_add,fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "ixuf_mpydiv" "fxuf_add,fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "fxuf_add" "fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "fxuf_mpy" "fxuf_divsqrt")
+
+;; After branch any insn cannot be issued.
+(absence_set "rm7_iss0,rm7_iss1" "ixuf_branch")
+
+;;
+;; Define reservations for unit name mnemonics or combinations.
+;;
+
+(define_reservation "rm7_iss" "rm7_iss0|rm7_iss1")
+(define_reservation "rm7_single_dispatch" "rm7_iss0+rm7_iss1")
+
+(define_reservation "rm7_iaddsub" "rm7_iss+(ixum_addsub_agen|ixuf_addsub)")
+(define_reservation "rm7_imem" "rm7_iss+ixum_addsub_agen")
+(define_reservation "rm7_impydiv" "rm7_iss+ixuf_mpydiv")
+(define_reservation "rm7_impydiv_iter" "ixuf_mpydiv_iter")
+(define_reservation "rm7_branch" "rm7_iss+ixuf_branch")
+
+(define_reservation "rm7_fpadd"	"rm7_iss+fxuf_add")
+(define_reservation "rm7_fpmpy"	"rm7_iss+fxuf_mpy")
+(define_reservation "rm7_fpmpy_iter" "fxuf_mpy_iter")
+(define_reservation "rm7_fpdivsqr" "rm7_iss+fxuf_divsqrt")
+(define_reservation "rm7_fpdivsqr_iter" "fxuf_divsqrt_iter")
+
+;;
+;; Describe instruction reservations for integer operations.
+;;
+
+(define_insn_reservation "rm7_int_other" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,condmove,logical,move,nop,trap"))
+  "rm7_iaddsub")
+
+(define_insn_reservation "rm7_ld" 2
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "rm7_imem")
+
+(define_insn_reservation "rm7_st" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "rm7_imem")
+
+(define_insn_reservation "rm7_idiv_si" 36
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "rm7_impydiv+(rm7_impydiv_iter*36)")
+
+(define_insn_reservation "rm7_idiv_di" 68
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "rm7_impydiv+(rm7_impydiv_iter*68)")
+
+(define_insn_reservation "rm7_impy_si_mult" 5
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "SI")))
+  "rm7_impydiv+(rm7_impydiv_iter*3)")
+
+;; There are an additional 2 stall cycles.
+(define_insn_reservation "rm7_impy_si_mul" 2
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "rm7_impydiv")
+
+(define_insn_reservation "rm7_impy_di" 9
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "DI")))
+  "rm7_impydiv+(rm7_impydiv_iter*8)")
+
+;; Move to/from HI/LO.
+(define_insn_reservation "rm7_mthilo" 3
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "mthi,mtlo"))
+  "rm7_impydiv")
+
+(define_insn_reservation "rm7_mfhilo" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "mfhi,mflo"))
+  "rm7_impydiv")
+
+;; Move to/from fp coprocessor.
+(define_insn_reservation "rm7_ixfer" 2
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "mfc,mtc"))
+  "rm7_iaddsub")
+
+(define_insn_reservation "rm7_ibr" 3
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "branch,jump,call"))
+  "rm7_branch")
+
+;;
+;; Describe instruction reservations for the floating-point operations.
+;;
+(define_insn_reservation "rm7_fp_quick" 4
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "fneg,fcmp,fabs,fmove"))
+  "rm7_fpadd")
+
+(define_insn_reservation "rm7_fp_other" 4
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "fadd"))
+  "rm7_fpadd")
+
+(define_insn_reservation "rm7_fp_cvt" 4
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "fcvt"))
+  "rm7_fpadd")
+
+(define_insn_reservation "rm7_fp_divsqrt_df" 36
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*36)")
+
+(define_insn_reservation "rm7_fp_divsqrt_sf" 21
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*21)")
+
+(define_insn_reservation "rm7_fp_rsqrt_df" 68
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*68)")
+
+(define_insn_reservation "rm7_fp_rsqrt_sf" 38
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*38)")
+
+(define_insn_reservation "rm7_fp_mpy_sf" 4
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "rm7_fpmpy+rm7_fpmpy_iter")
+
+(define_insn_reservation "rm7_fp_mpy_df" 5
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "rm7_fpmpy+(rm7_fpmpy_iter*2)")
+
+;; Force single-dispatch for unknown or multi.
+(define_insn_reservation "rm7_unknown" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "rm7_single_dispatch")
diff --git a/gcc-4.9/gcc/config/mips/74k.md b/gcc-4.9/gcc/config/mips/74k.md
new file mode 100644
index 000000000..e07c190db
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/74k.md
@@ -0,0 +1,501 @@
+;; DFA-based pipeline description for MIPS32 model 74k.
+;; Contributed by MIPS Technologies and CodeSourcery.
+;;
+;; Reference:
+;;   "MIPS32 74K Microarchitecure Specification Rev. 01.02 Jun 15, 2006"
+;;   "MIPS32 74Kf Processor Core Datasheet Jun 2, 2006"
+;;
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r74k_mdu_pipe, r74k_alu_pipe, r74k_agen_pipe, r74k_fpu")
+(define_cpu_unit "r74k_mul" "r74k_mdu_pipe")
+(define_cpu_unit "r74k_alu" "r74k_alu_pipe")
+(define_cpu_unit "r74k_agen" "r74k_agen_pipe")
+(define_cpu_unit "r74k_fpu_arith" "r74k_fpu")
+(define_cpu_unit "r74k_fpu_ldst" "r74k_fpu")
+
+;; --------------------------------------------------------------
+;; Producers
+;; --------------------------------------------------------------
+
+;; ALU: Logicals/Arithmetics
+;; - Logicals, move (addu/addiu with rt = 0), Set less than, 
+;;   sign extend - 1 cycle
+(define_insn_reservation "r74k_int_logical" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "logical,move,signext,slt"))
+  "r74k_alu")
+
+;; - Arithmetics - 2 cycles
+(define_insn_reservation "r74k_int_arith" 2
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "arith,const,shift,clz"))
+  "r74k_alu")
+
+(define_insn_reservation "r74k_int_nop" 0
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "nop"))
+  "nothing")
+
+(define_insn_reservation "r74k_int_cmove" 4
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "condmove"))
+  "r74k_agen*2")
+
+;; MDU: fully pipelined multiplier
+;; mult - delivers result to hi/lo in 4 cycle (pipelined)
+(define_insn_reservation "r74k_int_mult" 4
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "imul"))
+  "r74k_alu+r74k_mul")
+
+;; madd, msub - delivers result to hi/lo in 4 cycle (pipelined)
+(define_insn_reservation "r74k_int_madd" 4
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "imadd"))
+  "r74k_alu+r74k_mul")
+
+;; mul - delivers result to general register in 7 cycles
+(define_insn_reservation "r74k_int_mul3" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "imul3"))
+  "r74k_alu+r74k_mul")
+
+;; mfhi, mflo, mflhxu - deliver result to gpr in 7 cycles
+(define_insn_reservation "r74k_int_mfhilo" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "mfhi,mflo"))
+  "r74k_alu+r74k_mul")
+
+;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass
+(define_insn_reservation "r74k_int_mthilo" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "mthi,mtlo"))
+  "r74k_alu+r74k_mul")
+
+;; div - default to 50 cycles for 32bit operands.  Faster for 8 bit,
+;; but is tricky to identify.
+(define_insn_reservation "r74k_int_div" 50
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "idiv"))
+  "r74k_alu+r74k_mul*50")
+
+;; call
+(define_insn_reservation "r74k_int_call" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "call"))
+  "r74k_agen")
+
+;; branch/jump
+(define_insn_reservation "r74k_int_jump" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "branch,jump"))
+  "r74k_agen")
+
+;; loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs
+;; prefetch: prefetch, prefetchx
+(define_insn_reservation "r74k_int_load" 3 
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "load,prefetch,prefetchx"))
+  "r74k_agen")
+
+;; stores
+(define_insn_reservation "r74k_int_store" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "store"))
+  "r74k_agen")
+
+
+;; Unknowns - Currently these include blockage, consttable and alignment
+;;            rtls.  They do not really affect scheduling latency, (blockage
+;;            affects scheduling via log links, but not used here).
+;;
+(define_insn_reservation "r74k_unknown" 1 
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "unknown,atomic,syncloop"))
+  "r74k_alu")
+
+(define_insn_reservation "r74k_multi" 10
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "multi"))
+  "(r74k_alu+r74k_agen)*10")
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+
+;; load->next use :  3 cycles (Default)
+;; load->load base:  4 cycles
+;; load->store base: 4 cycles
+(define_bypass 4 "r74k_int_load" "r74k_int_load")
+(define_bypass 4 "r74k_int_load" "r74k_int_store" "!mips_store_data_bypass_p")
+
+;; logical/move/slt/signext->next use :  1 cycles (Default)
+;; logical/move/slt/signext->load base:  2 cycles
+;; logical/move/slt/signext->store base: 2 cycles
+(define_bypass 2 "r74k_int_logical" "r74k_int_load")
+(define_bypass 2 "r74k_int_logical" "r74k_int_store"
+  "!mips_store_data_bypass_p")
+
+;; arith->next use :  2 cycles (Default)
+;; arith->load base:  3 cycles
+;; arith->store base: 3 cycles
+(define_bypass 3 "r74k_int_arith" "r74k_int_load")
+(define_bypass 3 "r74k_int_arith" "r74k_int_store" "!mips_store_data_bypass_p")
+
+;; cmove->next use :  4 cycles (Default)
+;; cmove->load base:  5 cycles
+;; cmove->store base: 5 cycles
+(define_bypass 5 "r74k_int_cmove"  "r74k_int_load")
+(define_bypass 5 "r74k_int_cmove"  "r74k_int_store"
+  "!mips_store_data_bypass_p")
+
+;; mult/madd/msub->int_mfhilo  : 4 cycles (default)
+;; mult->madd/msub             : 1 cycles
+;; madd/msub->madd/msub        : 1 cycles
+(define_bypass 1 "r74k_int_mult,r74k_int_mul3" "r74k_int_madd"
+  "mips_linked_madd_p")
+(define_bypass 1 "r74k_int_madd" "r74k_int_madd"
+  "mips_linked_madd_p")
+
+
+;; --------------------------------------------------------------
+;; DSP instructions
+;; --------------------------------------------------------------
+
+;; Non-saturating insn have the same latency as normal ALU operations,
+(define_insn_reservation "r74k_dsp_alu" 2
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspalu"))
+  "r74k_alu")
+
+;; Saturating insn takes an extra cycle.
+(define_insn_reservation "r74k_dsp_alu_sat" 3
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspalusat"))
+  "r74k_alu")
+
+;; dpaq_s, dpau, dpsq_s, dpsu, maq_s, mulsaq
+;; - delivers result to hi/lo in 6 cycle (bypass at M4)
+(define_insn_reservation "r74k_dsp_mac" 6
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspmac"))
+  "r74k_alu+r74k_mul")
+
+;; dpaq_sa, dpsq_sa, maq_sa
+;; - delivers result to hi/lo in 7 cycle (bypass at WB)
+(define_insn_reservation "r74k_dsp_mac_sat" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspmacsat"))
+  "r74k_alu+r74k_mul")
+
+;; extp, extpdp, extpdpv, extpv, extr, extrv
+;; - same latency as "mul"
+(define_insn_reservation "r74k_dsp_acc_ext" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "accext"))
+  "r74k_alu+r74k_mul")
+
+;; mthlip, shilo, shilov
+;; - same latency as "mul"
+(define_insn_reservation "r74k_dsp_acc_mod" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "accmod"))
+  "r74k_alu+r74k_mul")
+
+;; dspalu   ->load/store base
+;; dspalusat->load/store base
+;; - we should never see these in real life.
+
+;; dsp_mac->dsp_mac     : 1 cycles (repeat rate of 1)
+;; dsp_mac->dsp_mac_sat : 1 cycles (repeat rate of 1)
+(define_bypass 1 "r74k_dsp_mac" "r74k_dsp_mac")
+(define_bypass 1 "r74k_dsp_mac" "r74k_dsp_mac_sat")
+
+;; dsp_mac_sat->dsp_mac_sat : 2 cycles (repeat rate of 2)
+;; dsp_mac_sat->dsp_mac     : 2 cycles (repeat rate of 2)
+(define_bypass 2 "r74k_dsp_mac_sat" "r74k_dsp_mac_sat")
+(define_bypass 2 "r74k_dsp_mac_sat" "r74k_dsp_mac")
+
+(define_bypass 1 "r74k_int_mult" "r74k_dsp_mac")
+(define_bypass 1 "r74k_int_mult" "r74k_dsp_mac_sat")
+
+(define_bypass 1 "r74k_int_mul3" "r74k_dsp_mac" "mips_linked_madd_p")
+(define_bypass 1 "r74k_int_mul3" "r74k_dsp_mac_sat" "mips_linked_madd_p")
+
+;; Assuming the following is true (bypass at M4)
+;;  AP AF AM MB M1 M2 M3 M4 WB GR GC
+;;              AP AF AM MB M1 M2 M3 M4 WB GR GC
+;; dsp_mac->dsp_acc_ext : 4 cycles
+;; dsp_mac->dsp_acc_mod : 4 cycles
+(define_bypass 4 "r74k_dsp_mac" "r74k_dsp_acc_ext")
+(define_bypass 4 "r74k_dsp_mac" "r74k_dsp_acc_mod")
+
+;; Assuming the following is true (bypass at WB)
+;;  AP AF AM MB M1 M2 M3 M4 WB GR GC
+;;                 AP AF AM MB M1 M2 M3 M4 WB GR GC
+;; dsp_mac_sat->dsp_acc_ext : 5 cycles
+;; dsp_mac_sat->dsp_acc_mod : 5 cycles
+(define_bypass 5 "r74k_dsp_mac_sat" "r74k_dsp_acc_ext")
+(define_bypass 5 "r74k_dsp_mac_sat" "r74k_dsp_acc_mod")
+
+
+;; --------------------------------------------------------------
+;; Floating Point Instructions
+;; --------------------------------------------------------------
+
+;; 74Kf FPU runs at 1:1 or 2:1 core/FPU clock ratio.
+
+;; fadd, fabs, fneg, 
+(define_insn_reservation "r74kf1_1_fadd" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fadd" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fadd" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r74k_fpu_arith")
+
+;; fmove, fcmove
+(define_insn_reservation "r74kf1_1_fmove" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fmove"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fmove" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fmove"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fmove" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fmove"))
+  "r74k_fpu_arith")
+
+;; fload
+(define_insn_reservation "r74kf1_1_fload" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r74k_agen+r74k_fpu_ldst")
+
+(define_insn_reservation "r74kf2_1_fload" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r74k_agen+(r74k_fpu_ldst*2)")
+
+(define_insn_reservation "r74kf3_2_fload" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r74k_agen+r74k_fpu_ldst")
+
+;; fstore
+(define_insn_reservation "r74kf1_1_fstore" 1
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "r74k_agen+r74k_fpu_ldst")
+
+(define_insn_reservation "r74kf2_1_fstore" 2
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "r74k_agen+(r74k_fpu_ldst*2)")
+
+(define_insn_reservation "r74kf3_2_fstore" 1
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "r74k_agen+r74k_fpu_ldst")
+
+;; fmul, fmadd
+(define_insn_reservation "r74kf1_1_fmul_sf" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fmul_sf" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fmul_sf" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf1_1_fmul_df" 5
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf2_1_fmul_df" 10
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*4")
+
+(define_insn_reservation "r74kf3_2_fmul_df" 7
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*2")
+
+;; fdiv, fsqrt
+(define_insn_reservation "r74kf1_1_fdiv_sf" 17
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf2_1_fdiv_sf" 34
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*28")
+
+(define_insn_reservation "r74kf3_2_fdiv_sf" 25
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf1_1_fdiv_df" 32
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*29")
+
+(define_insn_reservation "r74kf2_1_fdiv_df" 64
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*58")
+
+(define_insn_reservation "r74kf3_2_fdiv_df" 48
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*29")
+
+;; frsqrt
+(define_insn_reservation "r74kf1_1_frsqrt_sf" 17
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf2_1_frsqrt_sf" 34
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*28")
+
+(define_insn_reservation "r74kf3_2_frsqrt_sf" 25
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf1_1_frsqrt_df" 36
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*31")
+
+(define_insn_reservation "r74kf2_1_frsqrt_df" 72
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*62")
+
+(define_insn_reservation "r74kf3_2_frsqrt_df" 54
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*31")
+
+;; fcmp
+(define_insn_reservation "r74kf1_1_fcmp" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fcmp"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fcmp" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fcmp"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fcmp" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fcmp"))
+  "r74k_fpu_arith")
+
+;; fcvt
+(define_insn_reservation "r74kf1_1_fcvt" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fcvt"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fcvt" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fcvt"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fcvt" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fcvt"))
+  "r74k_fpu_arith")
+
+;; fxfer (MTC1, DMTC1: latency is 4) (MFC1, DMFC1: latency is 1)
+(define_insn_reservation "r74kf1_1_fxfer_to_c1" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "mtc"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fxfer_to_c1" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "mtc"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fxfer_to_c1" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "mtc"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf1_1_fxfer_from_c1" 1
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "mfc"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fxfer_from_c1" 2
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "mfc"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fxfer_from_c1" 1
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "mfc"))
+  "r74k_fpu_arith")
diff --git a/gcc-4.9/gcc/config/mips/9000.md b/gcc-4.9/gcc/config/mips/9000.md
new file mode 100644
index 000000000..efc646f5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/9000.md
@@ -0,0 +1,151 @@
+;; DFA-based pipeline description for the RM9000.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "rm9k_main, rm9k_imul, rm9k_fdiv")
+
+;; These units are for insns that can issue in either pipe.  We don't
+;; want to use constructs like "rm9k_m | rm9k_f_int" since that would
+;; needlessly make an insn prefer the M pipe.
+(define_cpu_unit "rm9k_any1" "rm9k_main")
+(define_cpu_unit "rm9k_any2" "rm9k_main")
+
+;; F and M pipe units, for instructions that must be issued by a
+;; particular pipe.  Split the F pipe into two units so that integer
+;; instructions can issue while the FPU is busy.  We don't need to
+;; split M because it is only ever reserved for a single cycle.
+(define_cpu_unit "rm9k_m" "rm9k_main")
+(define_cpu_unit "rm9k_f_int" "rm9k_main")
+(define_cpu_unit "rm9k_f_float" "rm9k_main")
+
+(exclusion_set "rm9k_f_int" "rm9k_f_float")
+
+;; Multiply/divide units.
+(define_cpu_unit "rm9k_imul" "rm9k_imul")
+(define_cpu_unit "rm9k_fdiv" "rm9k_fdiv")
+
+(define_insn_reservation "rm9k_load" 3
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_store" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_int" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,nop,trap"))
+  "rm9k_any1 | rm9k_any2")
+
+(define_insn_reservation "rm9k_int_cmove" 2
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "condmove")
+	    (eq_attr "mode" "SI,DI")))
+  "rm9k_any1 | rm9k_any2")
+
+;; This applies to both 'mul' and 'mult'.
+(define_insn_reservation "rm9k_mulsi" 3
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "!DI")))
+  "rm9k_f_int")
+
+(define_insn_reservation "rm9k_muldi" 7
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "rm9k_f_int + rm9k_imul * 7")
+
+(define_insn_reservation "rm9k_divsi" 38
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+  "rm9k_f_int + rm9k_imul * 38")
+
+(define_insn_reservation "rm9k_divdi" 70
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "rm9k_f_int + rm9k_imul * 70")
+
+(define_insn_reservation "rm9k_mfhilo" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "mfhi,mflo"))
+  "rm9k_f_int")
+
+(define_insn_reservation "rm9k_mthilo" 5
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "mthi,mtlo"))
+  "rm9k_f_int")
+
+(define_insn_reservation "rm9k_xfer" 2
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "mfc,mtc"))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_fquick" 2
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "fabs,fneg,fcmp,fmove"))
+  "rm9k_f_float")
+
+(define_insn_reservation "rm9k_fcmove" 2
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "condmove")
+	    (eq_attr "mode" "SF,DF")))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_fadd" 6
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "fadd,fcvt"))
+  "rm9k_f_float")
+
+(define_insn_reservation "rm9k_fmuls" 6
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "rm9k_f_float")
+
+(define_insn_reservation "rm9k_fmuld" 9
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "rm9k_f_float * 3")
+
+(define_insn_reservation "rm9k_fdivs" 22
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "rm9k_f_float + rm9k_fdiv * 22")
+
+(define_insn_reservation "rm9k_fdivd" 37
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "rm9k_f_float + rm9k_fdiv * 37")
+
+(define_insn_reservation "rm9k_branch" 2
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "branch,jump,call"))
+  "rm9k_any1 | rm9k_any2")
+
+(define_insn_reservation "rm9k_unknown" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "rm9k_m + rm9k_f_int + rm9k_any1 + rm9k_any2")
diff --git a/gcc-4.9/gcc/config/mips/constraints.md b/gcc-4.9/gcc/config/mips/constraints.md
new file mode 100644
index 000000000..49e48954f
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/constraints.md
@@ -0,0 +1,363 @@
+;; Constraint definitions for MIPS.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+(define_register_constraint "d" "BASE_REG_CLASS"
+  "An address register.  This is equivalent to @code{r} unless
+   generating MIPS16 code.")
+
+(define_register_constraint "t" "T_REG"
+  "@internal")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+  "A floating-point register (if available).")
+
+(define_register_constraint "h" "NO_REGS"
+  "Formerly the @code{hi} register.  This constraint is no longer supported.")
+
+(define_register_constraint "l" "TARGET_BIG_ENDIAN ? MD1_REG : MD0_REG"
+  "The @code{lo} register.  Use this register to store values that are
+   no bigger than a word.")
+
+(define_register_constraint "x" "MD_REGS"
+  "The concatenated @code{hi} and @code{lo} registers.  Use this register
+   to store doubleword values.")
+
+(define_register_constraint "b" "ALL_REGS"
+  "@internal")
+
+(define_register_constraint "u" "M16_REGS"
+  "@internal")
+
+;; MIPS16 code always calls through a MIPS16 register; see mips_emit_call_insn
+;; for details.
+(define_register_constraint "c" "TARGET_MIPS16 ? M16_REGS
+				 : TARGET_USE_PIC_FN_ADDR_REG ? PIC_FN_ADDR_REG
+				 : GR_REGS"
+  "A register suitable for use in an indirect jump.  This will always be
+   @code{$25} for @option{-mabicalls}.")
+
+(define_register_constraint "e" "LEA_REGS"
+  "@internal")
+
+(define_register_constraint "j" "PIC_FN_ADDR_REG"
+  "@internal")
+
+;; Don't use this constraint in gcc code!  It runs the risk of
+;; introducing a spill failure; see tls_get_tp_<mode>.
+(define_register_constraint "v" "V1_REG"
+  "Register @code{$3}.  Do not use this constraint in new code;
+   it is retained only for compatibility with glibc.")
+
+(define_register_constraint "y" "GR_REGS"
+  "Equivalent to @code{r}; retained for backwards compatibility.")
+
+(define_register_constraint "z" "ST_REGS"
+  "A floating-point condition code register.")
+
+(define_register_constraint "A" "DSP_ACC_REGS"
+  "@internal")
+
+(define_register_constraint "a" "ACC_REGS"
+  "@internal")
+
+(define_register_constraint "B" "COP0_REGS"
+  "@internal")
+
+(define_register_constraint "C" "COP2_REGS"
+  "@internal")
+
+(define_register_constraint "D" "COP3_REGS"
+  "@internal")
+
+;; Registers that can be used as the target of multiply-accumulate
+;; instructions.  The core MIPS32 ISA provides a hi/lo madd,
+;; but the DSP version allows any accumulator target.
+(define_register_constraint "ka" "ISA_HAS_DSP_MULT ? ACC_REGS : MD_REGS")
+
+(define_constraint "kf"
+  "@internal"
+  (match_operand 0 "force_to_mem_operand"))
+
+;; This is a normal rather than a register constraint because we can
+;; never use the stack pointer as a reload register.
+(define_constraint "ks"
+  "@internal"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == STACK_POINTER_REGNUM")))
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant (for arithmetic instructions)."
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (ival)")))
+
+(define_constraint "J"
+  "Integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "An unsigned 16-bit constant (for logic instructions)."
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND_UNSIGNED (ival)")))
+ 
+(define_constraint "L"
+  "A signed 32-bit constant in which the lower 16 bits are zero.
+   Such constants can be loaded using @code{lui}."
+  (and (match_code "const_int")
+       (match_test "LUI_OPERAND (ival)")))
+
+(define_constraint "M"
+  "A constant that cannot be loaded using @code{lui}, @code{addiu}
+   or @code{ori}."
+  (and (match_code "const_int")
+       (not (match_test "SMALL_OPERAND (ival)"))
+       (not (match_test "SMALL_OPERAND_UNSIGNED (ival)"))
+       (not (match_test "LUI_OPERAND (ival)"))))
+
+(define_constraint "N"
+  "A constant in the range -65535 to -1 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "ival >= -0xffff && ival < 0")))
+
+(define_constraint "O"
+  "A signed 15-bit constant."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x4000 && ival < 0x4000")))
+
+(define_constraint "P"
+  "A constant in the range 1 to 65535 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "ival > 0 && ival < 0x10000")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; General constraints
+
+(define_constraint "Q"
+  "@internal"
+  (match_operand 0 "const_arith_operand"))
+
+(define_memory_constraint "R"
+  "An address that can be used in a non-macro load or store."
+  (and (match_code "mem")
+       (match_test "mips_address_insns (XEXP (op, 0), mode, false) == 1")))
+
+(define_constraint "S"
+  "@internal
+   A constant call address."
+  (and (match_operand 0 "call_insn_operand")
+       (match_test "CONSTANT_P (op)")))
+
+(define_constraint "Udb7"
+  "@internal
+   A decremented unsigned constant of 7 bits."
+  (match_operand 0 "db7_operand"))
+
+(define_constraint "Udb8"
+  "@internal
+   A decremented unsigned constant of 8 bits."
+  (match_operand 0 "db8_operand"))
+
+(define_constraint "Uead"
+  "@internal
+   A microMIPS encoded ADDIUR2 immediate operand."
+  (match_operand 0 "addiur2_operand"))
+  
+(define_constraint "Uean"
+  "@internal
+   A microMIPS encoded ANDI operand."
+  (match_operand 0 "andi16_operand"))
+
+(define_constraint "Uesp"
+  "@internal
+   A microMIPS encoded ADDIUSP operand."
+  (match_operand 0 "addiusp_operand"))
+
+(define_constraint "Uib3"
+  "@internal
+   An unsigned, incremented constant of 3 bits."
+  (match_operand 0 "ib3_operand"))
+
+(define_constraint "Usb4"
+  "@internal
+   A signed constant of 4 bits."
+  (match_operand 0 "sb4_operand"))
+
+(define_constraint "Usb5"
+  "@internal
+   A signed constant of 5 bits."
+  (match_operand 0 "sb5_operand"))
+
+(define_constraint "Usb8"
+  "@internal
+   A signed constant of 8 bits."
+  (match_operand 0 "sb8_operand"))
+
+(define_constraint "Usd8"
+  "@internal
+   A signed constant of 8 bits, shifted left three places."
+  (match_operand 0 "sd8_operand"))
+
+(define_constraint "Uub8"
+  "@internal
+   An unsigned constant of 8 bits."
+  (match_operand 0 "ub8_operand"))
+
+(define_constraint "Uuw5"
+  "@internal
+   An unsigned constant of 5 bits, shifted left two places."
+  (match_operand 0 "uw5_operand"))
+
+(define_constraint "Uuw6"
+  "@internal
+   An unsigned constant of 6 bits, shifted left two places."
+  (match_operand 0 "uw6_operand"))
+
+(define_constraint "Uuw8"
+  "@internal
+   An unsigned constant of 8 bits, shifted left two places."
+  (match_operand 0 "uw8_operand"))
+
+(define_memory_constraint "W"
+  "@internal
+   A memory address based on a member of @code{BASE_REG_CLASS}.  This is
+   true for all non-mips16 references (although it can sometimes be implicit
+   if @samp{!TARGET_EXPLICIT_RELOCS}).  For MIPS16, it excludes stack and
+   constant-pool references."
+  (and (match_code "mem")
+       (match_operand 0 "memory_operand")
+       (ior (not (match_test "TARGET_MIPS16"))
+	    (and (not (match_operand 0 "stack_operand"))
+		 (not (match_test "CONSTANT_P (XEXP (op, 0))"))))))
+
+(define_constraint "YG"
+  "@internal
+   A vector zero."
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "YA"
+  "@internal
+   An unsigned 6-bit constant."
+  (and (match_code "const_int")
+       (match_test "UIMM6_OPERAND (ival)")))
+
+(define_constraint "YB"
+  "@internal
+   A signed 10-bit constant."
+  (and (match_code "const_int")
+       (match_test "IMM10_OPERAND (ival)")))
+
+(define_constraint "Yb"
+   "@internal"
+   (match_operand 0 "qi_mask_operand"))
+
+(define_constraint "Yd"
+  "@internal
+   A constant @code{move_operand} that can be safely loaded into @code{$25}
+   using @code{la}."
+  (and (match_operand 0 "move_operand")
+       (match_test "CONSTANT_P (op)")
+       (not (match_test "mips_dangerous_for_la25_p (op)"))))
+
+(define_constraint "Yf"
+  "@internal
+   A constant @code{move_operand} that cannot be safely loaded into @code{$25}
+   using @code{la}."
+  (and (match_operand 0 "move_operand")
+       (match_test "CONSTANT_P (op)")
+       (match_test "mips_dangerous_for_la25_p (op)")))
+
+(define_constraint "Yh"
+   "@internal"
+    (match_operand 0 "hi_mask_operand"))
+
+(define_constraint "Yw"
+   "@internal"
+    (match_operand 0 "si_mask_operand"))
+
+(define_constraint "Yx"
+   "@internal"
+   (match_operand 0 "low_bitmask_operand"))
+
+(define_memory_constraint "ZC"
+  "When compiling microMIPS code, this constraint matches a memory operand
+   whose address is formed from a base register and a 12-bit offset.  These
+   operands can be used for microMIPS instructions such as @code{ll} and
+   @code{sc}.  When not compiling for microMIPS code, @code{ZC} is
+   equivalent to @code{R}."
+  (and (match_code "mem")
+       (if_then_else
+	 (match_test "TARGET_MICROMIPS")
+	 (match_test "umips_12bit_offset_address_p (XEXP (op, 0), mode)")
+	 (match_test "mips_address_insns (XEXP (op, 0), mode, false)"))))
+
+(define_address_constraint "ZD"
+  "When compiling microMIPS code, this constraint matches an address operand
+   that is formed from a base register and a 12-bit offset.  These operands
+   can be used for microMIPS instructions such as @code{prefetch}.  When
+   not compiling for microMIPS code, @code{ZD} is equivalent to @code{p}."
+   (if_then_else (match_test "TARGET_MICROMIPS")
+		 (match_test "umips_12bit_offset_address_p (op, mode)")
+		 (match_test "mips_address_insns (op, mode, false)")))
+
+(define_memory_constraint "ZR"
+ "@internal
+  An address valid for loading/storing register exclusive"
+ (match_operand 0 "mem_noofs_operand"))
+
+(define_memory_constraint "ZS"
+  "@internal
+   A microMIPS memory operand for use with the LWSP/SWSP insns."
+  (and (match_code "mem")
+       (match_operand 0 "lwsp_swsp_operand")))
+
+(define_memory_constraint "ZT"
+  "@internal
+   A microMIPS memory operand for use with the LW16/SW16 insns."
+  (and (match_code "mem")
+       (match_operand 0 "lw16_sw16_operand")))
+
+(define_memory_constraint "ZU"
+  "@internal
+   A microMIPS memory operand for use with the LHU16/SH16 insns."
+  (and (match_code "mem")
+       (match_operand 0 "lhu16_sh16_operand")))
+
+(define_memory_constraint "ZV"
+  "@internal
+   A microMIPS memory operand for use with the SB16 insn."
+  (and (match_code "mem")
+       (match_operand 0 "sb16_operand")))
+
+(define_memory_constraint "ZW"
+  "@internal
+   A microMIPS memory operand for use with the LBU16 insn."
+  (and (match_code "mem")
+       (match_operand 0 "lbu16_operand")))
+
diff --git a/gcc-4.9/gcc/config/mips/driver-native.c b/gcc-4.9/gcc/config/mips/driver-native.c
new file mode 100644
index 000000000..4c2a658c7
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/driver-native.c
@@ -0,0 +1,89 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch" or "tune" as argument depending on if -march=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-march=loongson2f" on a Loongson 2F for
+   -march=native.  If the routine can't detect a known processor,
+   the -march or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu = NULL;
+  char buf[128];
+  FILE *f;
+  bool arch;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = strcmp (argv[0], "arch") == 0;
+  if (!arch && strcmp (argv[0], "tune"))
+    return NULL;
+
+  f = fopen ("/proc/cpuinfo", "r");
+  if (f == NULL)
+    return NULL;
+
+  while (fgets (buf, sizeof (buf), f) != NULL)
+    if (strncmp (buf, "cpu model", sizeof ("cpu model") - 1) == 0)
+      {
+	if (strstr (buf, "Godson2 V0.2") != NULL
+	    || strstr (buf, "Loongson-2 V0.2") != NULL
+	    || strstr (buf, "Loongson-2E") != NULL)
+	  cpu = "loongson2e";
+	else if (strstr (buf, "Godson2 V0.3") != NULL
+		 || strstr (buf, "Loongson-2 V0.3") != NULL
+		 || strstr (buf, "Loongson-2F") != NULL)
+	  cpu = "loongson2f";
+	else if (strstr (buf, "Godson3 V0.5") != NULL
+		 || strstr (buf, "Loongson-3 V0.5") != NULL
+		 || strstr (buf, "Loongson-3A") != NULL)
+	  cpu = "loongson3a";
+	else if (strstr (buf, "SiByte SB1") != NULL)
+	  cpu = "sb1";
+	else if (strstr (buf, "R5000") != NULL)
+	  cpu = "r5000";
+	else if (strstr (buf, "Octeon II") != NULL)
+	  cpu = "octeon2";
+	else if (strstr (buf, "Octeon") != NULL)
+	  cpu = "octeon";
+	break;
+      }
+
+  fclose (f);
+
+  if (cpu == NULL)
+    return NULL;
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
diff --git a/gcc-4.9/gcc/config/mips/elf.h b/gcc-4.9/gcc/config/mips/elf.h
new file mode 100644
index 000000000..95f107d5e
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/elf.h
@@ -0,0 +1,50 @@
+/* Target macros for mips*-elf targets.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* MIPS assemblers don't have the usual .set foo,bar construct;
+   .set is used for assembler options instead.  */
+#undef SET_ASM_OP
+#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2)			\
+  do								\
+    {								\
+      fputc ('\t', FILE);					\
+      assemble_name (FILE, LABEL1);				\
+      fputs (" = ", FILE);					\
+      assemble_name (FILE, LABEL2);				\
+      fputc ('\n', FILE);					\
+    }								\
+  while (0)
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME mips_declare_object_name
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object
+
+/* Leave the linker script to choose the appropriate libraries.  */
+#undef  LIB_SPEC
+#define LIB_SPEC ""
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
+
+#define NO_IMPLICIT_EXTERN_C 1
diff --git a/gcc-4.9/gcc/config/mips/elfoabi.h b/gcc-4.9/gcc/config/mips/elfoabi.h
new file mode 100644
index 000000000..d88a79c0b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/elfoabi.h
@@ -0,0 +1,39 @@
+/* Target macros for mips*-elf targets that selected between o32 and o64
+   based on the target architecture.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS						\
+  /* Make sure a -mips option is present.  This helps us to pick	\
+     the right multilib, and also makes the later specs easier		\
+     to write.  */							\
+  MIPS_ISA_LEVEL_SPEC,							\
+									\
+  /* If no ABI option is specified, infer one from the ISA level	\
+     or -mgp setting.  */						\
+  "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=o64}}",	\
+									\
+  /* Remove a redundant -mfp64 for -mabi=o64; we want the !mfp64	\
+     multilibs.  There's no need to check whether the architecture	\
+     is 64-bit; cc1 will complain if it isn't.  */			\
+  "%{mabi=o64: %<mfp64}",						\
+									\
+  /* Configuration-independent MIPS rules.*/				\
+  BASE_DRIVER_SELF_SPECS
+
diff --git a/gcc-4.9/gcc/config/mips/elforion.h b/gcc-4.9/gcc/config/mips/elforion.h
new file mode 100644
index 000000000..4ba39af6e
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/elforion.h
@@ -0,0 +1,20 @@
+/* Definitions of target machine for GNU compiler.  MIPS ORION version.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define MIPS_CPU_STRING_DEFAULT "orion"
diff --git a/gcc-4.9/gcc/config/mips/generic.md b/gcc-4.9/gcc/config/mips/generic.md
new file mode 100644
index 000000000..b5e2840e2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/generic.md
@@ -0,0 +1,121 @@
+;; Generic DFA-based pipeline description for MIPS targets
+;;   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file is derived from the old define_function_unit description.
+;; Each reservation can be overridden on a processor-by-processor basis.
+
+(define_insn_reservation "generic_alu" 1
+  (eq_attr "type" "unknown,prefetch,prefetchx,condmove,const,arith,
+		   shift,slt,clz,trap,multi,nop,logical,signext,move")
+  "alu")
+
+(define_insn_reservation "generic_load" 3
+  (eq_attr "type" "load,fpload,fpidxload")
+  "alu")
+
+(define_insn_reservation "generic_store" 1
+  (eq_attr "type" "store,fpstore,fpidxstore")
+  "alu")
+
+(define_insn_reservation "generic_xfer" 2
+  (eq_attr "type" "mfc,mtc")
+  "alu")
+
+(define_insn_reservation "generic_branch" 1
+  (eq_attr "type" "branch,jump,call")
+  "alu")
+
+(define_insn_reservation "generic_hilo" 1
+  (eq_attr "type" "mfhi,mflo,mthi,mtlo")
+  "imuldiv*3")
+
+(define_insn_reservation "generic_imul" 17
+  (eq_attr "type" "imul,imul3,imadd")
+  "imuldiv*17")
+
+(define_insn_reservation "generic_idiv" 38
+  (eq_attr "type" "idiv")
+  "imuldiv*38")
+
+(define_insn_reservation "generic_fcvt" 1
+  (eq_attr "type" "fcvt")
+  "alu")
+
+(define_insn_reservation "generic_fmove" 2
+  (eq_attr "type" "fabs,fneg,fmove")
+  "alu")
+
+(define_insn_reservation "generic_fcmp" 3
+  (eq_attr "type" "fcmp")
+  "alu")
+
+(define_insn_reservation "generic_fadd" 4
+  (eq_attr "type" "fadd")
+  "alu")
+
+(define_insn_reservation "generic_fmul_single" 7
+  (and (eq_attr "type" "fmul,fmadd")
+       (eq_attr "mode" "SF"))
+  "alu")
+
+(define_insn_reservation "generic_fmul_double" 8
+  (and (eq_attr "type" "fmul,fmadd")
+       (eq_attr "mode" "DF"))
+  "alu")
+
+(define_insn_reservation "generic_fdiv_single" 23
+  (and (eq_attr "type" "fdiv,frdiv")
+       (eq_attr "mode" "SF"))
+  "alu")
+
+(define_insn_reservation "generic_fdiv_double" 36
+  (and (eq_attr "type" "fdiv,frdiv")
+       (eq_attr "mode" "DF"))
+  "alu")
+
+(define_insn_reservation "generic_fsqrt_single" 54
+  (and (eq_attr "type" "fsqrt,frsqrt")
+       (eq_attr "mode" "SF"))
+  "alu")
+
+(define_insn_reservation "generic_fsqrt_double" 112
+  (and (eq_attr "type" "fsqrt,frsqrt")
+       (eq_attr "mode" "DF"))
+  "alu")
+
+(define_insn_reservation "generic_frecip_fsqrt_step" 5
+  (eq_attr "type" "frdiv1,frdiv2,frsqrt1,frsqrt2")
+  "alu")
+
+(define_insn_reservation "generic_atomic" 10
+  (eq_attr "type" "atomic")
+  "alu")
+
+;; Sync loop consists of (in order)
+;; (1) optional sync,
+;; (2) LL instruction,
+;; (3) branch and 1-2 ALU instructions,
+;; (4) SC instruction,
+;; (5) branch and ALU instruction.
+;; The net result of this reservation is a big delay with a flush of
+;; ALU pipeline.
+(define_insn_reservation "generic_sync_loop" 40
+  (eq_attr "type" "syncloop")
+  "alu*39")
diff --git a/gcc-4.9/gcc/config/mips/genopt.sh b/gcc-4.9/gcc/config/mips/genopt.sh
new file mode 100755
index 000000000..457d5bb82
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/genopt.sh
@@ -0,0 +1,123 @@
+#!/bin/sh
+# Generate mips-tables.opt from the list of CPUs in mips-cpus.def.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+cat <<EOF
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from mips-cpus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(mips_arch_opt_value) Type(int)
+Known MIPS CPUs (for use with the -march= and -mtune= options):
+
+Enum
+Name(mips_mips_opt_value) Type(int)
+Known MIPS ISA levels (for use with the -mips option):
+
+EnumValue
+Enum(mips_arch_opt_value) String(from-abi) Value(MIPS_ARCH_OPTION_FROM_ABI)
+
+EnumValue
+Enum(mips_arch_opt_value) String(native) Value(MIPS_ARCH_OPTION_NATIVE) DriverOnly
+
+EOF
+
+awk -F'[(, 	]+' '
+BEGIN {
+    value = 0
+}
+
+# Write an entry for a single string accepted as a -march= argument.
+
+function write_one_arch_value(name, value, flags)
+{
+    print "EnumValue"
+    print "Enum(mips_arch_opt_value) String(" name ") Value(" value ")" flags
+    print ""
+    if (name ~ "^mips") {
+	sub("^mips", "", name)
+	print "EnumValue"
+	print "Enum(mips_mips_opt_value) String(" name ") Value(" value ")"
+	print ""
+    }
+}
+
+# The logic for matching CPU name variants should be the same as in GAS.
+
+# Write an entry for a single string accepted as a -march= argument,
+# plus any variant with a final "000" replaced by "k".
+
+function write_arch_value_maybe_k(name, value, flags)
+{
+    write_one_arch_value(name, value, flags)
+    if (name ~ "000$") {
+	sub("000$", "k", name)
+	write_one_arch_value(name, value, "")
+    }
+}
+
+# Write all the entries for a -march= argument.  In addition to
+# replacement of a final "000" with "k", an argument starting with
+# "vr", "rm" or "r" followed by a number, or just a plain number,
+# matches a plain number or "r" followed by a plain number.
+
+function write_all_arch_values(name, value)
+{
+    write_arch_value_maybe_k(name, value, " Canonical")
+    cname = name
+    if (cname ~ "^vr") {
+	sub("^vr", "", cname)
+    } else if (cname ~ "^rm") {
+	sub("^rm", "", cname)
+    } else if (cname ~ "^r") {
+	sub("^r", "", cname)
+    }
+    if (cname ~ "^[0-9]") {
+	if (cname != name)
+	    write_arch_value_maybe_k(cname, value, "")
+	rname = "r" cname
+	if (rname != name)
+	    write_arch_value_maybe_k(rname, value, "")
+    }
+}
+
+/^MIPS_CPU/ {
+    name = $2
+    gsub("\"", "", name)
+    write_all_arch_values(name, value)
+    value++
+}' $1/mips-cpus.def
diff --git a/gcc-4.9/gcc/config/mips/gnu-user.h b/gcc-4.9/gcc/config/mips/gnu-user.h
new file mode 100644
index 000000000..638d7f0f5
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/gnu-user.h
@@ -0,0 +1,139 @@
+/* Definitions for MIPS systems using GNU userspace.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME mips_declare_object_name
+
+/* If we don't set MASK_ABICALLS, we can't default to PIC.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_ABICALLS
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do {								\
+    GNU_USER_TARGET_OS_CPP_BUILTINS();				\
+    /* The GNU C++ standard library requires this.  */		\
+    if (c_dialect_cxx ())					\
+      builtin_define ("_GNU_SOURCE");				\
+  } while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* A standard GNU/Linux mapping.  On most targets, it is included in
+   CC1_SPEC itself by config/linux.h, but mips.h overrides CC1_SPEC
+   and provides this hook instead.  */
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+
+/* -G is incompatible with -KPIC which is the default, so only allow objects
+   in the small data section if the user explicitly asks for it.  */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+/* Borrowed from sparc/linux.h */
+#undef GNU_USER_TARGET_LINK_SPEC
+#define GNU_USER_TARGET_LINK_SPEC \
+ "%(endian_spec) \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+#undef LINK_SPEC
+#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC \
+  "%{!mno-abicalls:%{mplt:-call_nonpic;:-KPIC}}"
+
+/* The MIPS assembler has different syntax for .set. We set it to
+   .dummy to trap any errors.  */
+#undef SET_ASM_OP
+#define SET_ASM_OP "\t.dummy\t"
+
+#undef ASM_OUTPUT_DEF
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {									\
+	fputc ( '\t', FILE);						\
+	assemble_name (FILE, LABEL1);					\
+	fputs ( " = ", FILE);						\
+	assemble_name (FILE, LABEL2);					\
+	fputc ( '\n', FILE);						\
+ } while (0)
+
+/* The glibc _mcount stub will save $v0 for us.  Don't mess with saving
+   it, since ASM_OUTPUT_REG_PUSH/ASM_OUTPUT_REG_POP do not work in the
+   presence of $gp-relative calls.  */
+#undef ASM_OUTPUT_REG_PUSH
+#undef ASM_OUTPUT_REG_POP
+
+#undef LIB_SPEC
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+
+#ifdef HAVE_AS_NO_SHARED
+/* Default to -mno-shared for non-PIC.  */
+# define NO_SHARED_SPECS \
+  " %{mshared|mno-shared|fpic|fPIC|fpie|fPIE:;:-mno-shared}"
+#else
+# define NO_SHARED_SPECS ""
+#endif
+
+/* -march=native handling only makes sense with compiler running on
+   a MIPS chip.  */
+#if defined(__mips__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS \
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MARCH_MTUNE_NATIVE_SPECS				\
+  " %{march=native:%<march=native %:local_cpu_detect(arch)}"	\
+  " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MARCH_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define LINUX_DRIVER_SELF_SPECS \
+  NO_SHARED_SPECS							\
+  MARCH_MTUNE_NATIVE_SPECS,						\
+  /* -mplt has no effect without -mno-shared.  Simplify later		\
+     specs handling by removing a redundant option.  */			\
+  "%{!mno-shared:%<mplt}",						\
+  /* -mplt likewise has no effect for -mabi=64 without -msym32.  */	\
+  "%{mabi=64:%{!msym32:%<mplt}}"
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+  BASE_DRIVER_SELF_SPECS, \
+  LINUX_DRIVER_SELF_SPECS
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef	GNU_USER_TARGET_MATHFILE_SPEC
+#define GNU_USER_TARGET_MATHFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  GNU_USER_TARGET_MATHFILE_SPEC " " \
+  GNU_USER_TARGET_ENDFILE_SPEC
diff --git a/gcc-4.9/gcc/config/mips/gnu-user64.h b/gcc-4.9/gcc/config/mips/gnu-user64.h
new file mode 100644
index 000000000..b97b4a768
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/gnu-user64.h
@@ -0,0 +1,52 @@
+/* Definitions for MIPS systems using GNU userspace and n32/64 abi.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Force the default endianness and ABI flags onto the command line
+   in order to make the other specs easier to write.  */
+
+#define LINUX64_DRIVER_SELF_SPECS \
+  LINUX_DRIVER_SELF_SPECS \
+  " %{!EB:%{!EL:%(endian_spec)}}" \
+  " %{!mabi=*: -" MULTILIB_ABI_DEFAULT "}"
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+  BASE_DRIVER_SELF_SPECS, \
+  LINUX64_DRIVER_SELF_SPECS
+
+#undef GNU_USER_TARGET_LINK_SPEC
+#define GNU_USER_TARGET_LINK_SPEC "\
+%{G*} %{EB} %{EL} %{mips1} %{mips2} %{mips3} %{mips4} \
+%{shared} \
+ %(endian_spec) \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      %{mabi=n32: -dynamic-linker " GNU_USER_DYNAMIC_LINKERN32 "} \
+      %{mabi=64: -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "} \
+      %{mabi=32: -dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "}} \
+    %{static:-static}} \
+%{mabi=n32:-m" GNU_USER_LINK_EMULATIONN32 "} \
+%{mabi=64:-m" GNU_USER_LINK_EMULATION64 "} \
+%{mabi=32:-m" GNU_USER_LINK_EMULATION32 "}"
+#undef LINK_SPEC
+#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX (TARGET_OLDABI ? "$" : ".")
diff --git a/gcc-4.9/gcc/config/mips/linux-common.h b/gcc-4.9/gcc/config/mips/linux-common.h
new file mode 100644
index 000000000..e1e977eb9
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/linux-common.h
@@ -0,0 +1,64 @@
+/* Definitions for MIPS running Linux-based GNU systems with ELF format.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do {								\
+    GNU_USER_TARGET_OS_CPP_BUILTINS();				\
+    /* The GNU C++ standard library requires this.  */		\
+    if (c_dialect_cxx ())					\
+      builtin_define ("_GNU_SOURCE");				\
+    ANDROID_TARGET_OS_CPP_BUILTINS();				\
+  } while (0)
+
+#undef  LINK_SPEC
+#define LINK_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LINK_SPEC,			\
+		       GNU_USER_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef  SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC						\
+  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC,			\
+		       GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC)
+
+#undef  CC1PLUS_SPEC
+#define CC1PLUS_SPEC							\
+  LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+
+#undef  LIB_SPEC
+#define LIB_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " "		\
+		       GNU_USER_TARGET_ENDFILE_SPEC,			\
+		       GNU_USER_TARGET_MATHFILE_SPEC " "		\
+		       ANDROID_ENDFILE_SPEC)
+
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* The default value isn't sufficient in 64-bit mode.  */
+#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
diff --git a/gcc-4.9/gcc/config/mips/linux.h b/gcc-4.9/gcc/config/mips/linux.h
new file mode 100644
index 000000000..e539422d4
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/linux.h
@@ -0,0 +1,25 @@
+/* Definitions for MIPS running Linux-based GNU systems with ELF format.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GLIBC_DYNAMIC_LINKER \
+  "%{mnan=2008:/lib/ld-linux-mipsn8.so.1;:/lib/ld.so.1}"
+
+#undef UCLIBC_DYNAMIC_LINKER
+#define UCLIBC_DYNAMIC_LINKER \
+  "%{mnan=2008:/lib/ld-uClibc-mipsn8.so.0;:/lib/ld-uClibc.so.0}"
diff --git a/gcc-4.9/gcc/config/mips/linux64.h b/gcc-4.9/gcc/config/mips/linux64.h
new file mode 100644
index 000000000..7ad3b2af2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/linux64.h
@@ -0,0 +1,44 @@
+/* Definitions for MIPS running Linux-based GNU systems with ELF format
+   using n32/64 abi.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GNU_USER_LINK_EMULATION32 "elf32%{EB:b}%{EL:l}tsmip"
+#define GNU_USER_LINK_EMULATION64 "elf64%{EB:b}%{EL:l}tsmip"
+#define GNU_USER_LINK_EMULATIONN32 "elf32%{EB:b}%{EL:l}tsmipn32"
+
+#define GLIBC_DYNAMIC_LINKER32 \
+  "%{mnan=2008:/lib/ld-linux-mipsn8.so.1;:/lib/ld.so.1}"
+#define GLIBC_DYNAMIC_LINKER64 \
+  "%{mnan=2008:/lib64/ld-linux-mipsn8.so.1;:/lib64/ld.so.1}"
+#define GLIBC_DYNAMIC_LINKERN32 \
+  "%{mnan=2008:/lib32/ld-linux-mipsn8.so.1;:/lib32/ld.so.1}"
+
+#undef UCLIBC_DYNAMIC_LINKER32
+#define UCLIBC_DYNAMIC_LINKER32 \
+  "%{mnan=2008:/lib/ld-uClibc-mipsn8.so.0;:/lib/ld-uClibc.so.0}"
+#undef UCLIBC_DYNAMIC_LINKER64
+#define UCLIBC_DYNAMIC_LINKER64 \
+  "%{mnan=2008:/lib/ld64-uClibc-mipsn8.so.0;:/lib/ld64-uClibc.so.0}"
+#define UCLIBC_DYNAMIC_LINKERN32 \
+  "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}"
+
+#define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
+#define GNU_USER_DYNAMIC_LINKERN32 \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
+			 BIONIC_DYNAMIC_LINKERN32)
diff --git a/gcc-4.9/gcc/config/mips/loongson.h b/gcc-4.9/gcc/config/mips/loongson.h
new file mode 100644
index 000000000..6e3de0d1d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/loongson.h
@@ -0,0 +1,690 @@
+/* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
+
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_LOONGSON_H
+#define _GCC_LOONGSON_H
+
+#if !defined(__mips_loongson_vector_rev)
+# error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* Vectors of unsigned bytes, halfwords and words.  */
+typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
+typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
+typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
+
+/* Vectors of signed bytes, halfwords and words.  */
+typedef int8_t int8x8_t __attribute__((vector_size (8)));
+typedef int16_t int16x4_t __attribute__((vector_size (8)));
+typedef int32_t int32x2_t __attribute__((vector_size (8)));
+
+/* SIMD intrinsics.
+   Unless otherwise noted, calls to the functions below will expand into
+   precisely one machine instruction, modulo any moves required to
+   satisfy register allocation constraints.  */
+
+/* Pack with signed saturation.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+packsswh (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_packsswh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+packsshb (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_packsshb (s, t);
+}
+
+/* Pack with unsigned saturation.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+packushb (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_packushb (s, t);
+}
+
+/* Vector addition, treating overflow by wraparound.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+paddw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_paddw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+paddh_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_paddh_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+paddb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_paddb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+paddw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_paddw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+paddh_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_paddh_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+paddb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_paddb_s (s, t);
+}
+
+/* Addition of doubleword integers, treating overflow by wraparound.  */
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+paddd_u (uint64_t s, uint64_t t)
+{
+  return __builtin_loongson_paddd_u (s, t);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+paddd_s (int64_t s, int64_t t)
+{
+  return __builtin_loongson_paddd_s (s, t);
+}
+
+/* Vector addition, treating overflow by signed saturation.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+paddsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_paddsh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+paddsb (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_paddsb (s, t);
+}
+
+/* Vector addition, treating overflow by unsigned saturation.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+paddush (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_paddush (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+paddusb (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_paddusb (s, t);
+}
+
+/* Logical AND NOT.  */
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+pandn_ud (uint64_t s, uint64_t t)
+{
+  return __builtin_loongson_pandn_ud (s, t);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+pandn_uw (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pandn_uw (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pandn_uh (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pandn_uh (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pandn_ub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pandn_ub (s, t);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+pandn_sd (int64_t s, int64_t t)
+{
+  return __builtin_loongson_pandn_sd (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pandn_sw (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_pandn_sw (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pandn_sh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pandn_sh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pandn_sb (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_pandn_sb (s, t);
+}
+
+/* Average.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pavgh (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pavgh (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pavgb (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pavgb (s, t);
+}
+
+/* Equality test.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+pcmpeqw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pcmpeqw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pcmpeqh_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pcmpeqh_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pcmpeqb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pcmpeqb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pcmpeqw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_pcmpeqw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pcmpeqh_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pcmpeqh_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pcmpeqb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_pcmpeqb_s (s, t);
+}
+
+/* Greater-than test.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+pcmpgtw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pcmpgtw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pcmpgth_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pcmpgth_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pcmpgtb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pcmpgtb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pcmpgtw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_pcmpgtw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pcmpgth_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pcmpgth_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pcmpgtb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_pcmpgtb_s (s, t);
+}
+
+/* Extract halfword.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pextrh_u (uint16x4_t s, int field /* 0--3 */)
+{
+  return __builtin_loongson_pextrh_u (s, field);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pextrh_s (int16x4_t s, int field /* 0--3 */)
+{
+  return __builtin_loongson_pextrh_s (s, field);
+}
+
+/* Insert halfword.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_0_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_0_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_1_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_1_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_2_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_2_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_3_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_3_u (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_0_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_0_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_1_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_1_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_2_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_2_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_3_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_3_s (s, t);
+}
+
+/* Multiply and add.  */
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pmaddhw (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmaddhw (s, t);
+}
+
+/* Maximum of signed halfwords.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pmaxsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmaxsh (s, t);
+}
+
+/* Maximum of unsigned bytes.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pmaxub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pmaxub (s, t);
+}
+
+/* Minimum of signed halfwords.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pminsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pminsh (s, t);
+}
+
+/* Minimum of unsigned bytes.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pminub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pminub (s, t);
+}
+
+/* Move byte mask.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pmovmskb_u (uint8x8_t s)
+{
+  return __builtin_loongson_pmovmskb_u (s);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pmovmskb_s (int8x8_t s)
+{
+  return __builtin_loongson_pmovmskb_s (s);
+}
+
+/* Multiply unsigned integers and store high result.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pmulhuh (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pmulhuh (s, t);
+}
+
+/* Multiply signed integers and store high result.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pmulhh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmulhh (s, t);
+}
+
+/* Multiply signed integers and store low result.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pmullh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmullh (s, t);
+}
+
+/* Multiply unsigned word integers.  */
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+pmuluw (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pmuluw (s, t);
+}
+
+/* Absolute difference.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pasubub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pasubub (s, t);
+}
+
+/* Sum of unsigned byte integers.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+biadd (uint8x8_t s)
+{
+  return __builtin_loongson_biadd (s);
+}
+
+/* Sum of absolute differences.
+   Note that this intrinsic expands into two machine instructions:
+   PASUBUB followed by BIADD.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psadbh (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_psadbh (s, t);
+}
+
+/* Shuffle halfwords.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
+{
+  return __builtin_loongson_pshufh_u (s, order);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
+{
+  return __builtin_loongson_pshufh_s (s, order);
+}
+
+/* Shift left logical.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psllh_u (uint16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllh_u (s, amount);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psllh_s (int16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllh_s (s, amount);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psllw_u (uint32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllw_u (s, amount);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psllw_s (int32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllw_s (s, amount);
+}
+
+/* Shift right logical.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psrlh_u (uint16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlh_u (s, amount);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psrlh_s (int16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlh_s (s, amount);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psrlw_u (uint32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlw_u (s, amount);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psrlw_s (int32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlw_s (s, amount);
+}
+
+/* Shift right arithmetic.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psrah_u (uint16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrah_u (s, amount);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psrah_s (int16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrah_s (s, amount);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psraw_u (uint32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psraw_u (s, amount);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psraw_s (int32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psraw_s (s, amount);
+}
+
+/* Vector subtraction, treating overflow by wraparound.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psubw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_psubw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psubh_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_psubh_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+psubb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_psubb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psubw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_psubw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psubh_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_psubh_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+psubb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_psubb_s (s, t);
+}
+
+/* Subtraction of doubleword integers, treating overflow by wraparound.  */
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+psubd_u (uint64_t s, uint64_t t)
+{
+  return __builtin_loongson_psubd_u (s, t);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+psubd_s (int64_t s, int64_t t)
+{
+  return __builtin_loongson_psubd_s (s, t);
+}
+
+/* Vector subtraction, treating overflow by signed saturation.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psubsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_psubsh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+psubsb (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_psubsb (s, t);
+}
+
+/* Vector subtraction, treating overflow by unsigned saturation.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psubush (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_psubush (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+psubusb (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_psubusb (s, t);
+}
+
+/* Unpack high data.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+punpckhwd_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_punpckhwd_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+punpckhhw_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_punpckhhw_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+punpckhbh_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_punpckhbh_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+punpckhwd_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_punpckhwd_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+punpckhhw_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_punpckhhw_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+punpckhbh_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_punpckhbh_s (s, t);
+}
+
+/* Unpack low data.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+punpcklwd_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_punpcklwd_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+punpcklhw_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_punpcklhw_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+punpcklbh_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_punpcklbh_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+punpcklwd_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_punpcklwd_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+punpcklhw_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_punpcklhw_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+punpcklbh_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_punpcklbh_s (s, t);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gcc-4.9/gcc/config/mips/loongson.md b/gcc-4.9/gcc/config/mips/loongson.md
new file mode 100644
index 000000000..474033d1e
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/loongson.md
@@ -0,0 +1,939 @@
+;; Machine description for Loongson-specific patterns, such as
+;; ST Microelectronics Loongson-2E/2F etc.
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_LOONGSON_PAVG
+  UNSPEC_LOONGSON_PCMPEQ
+  UNSPEC_LOONGSON_PCMPGT
+  UNSPEC_LOONGSON_PEXTR
+  UNSPEC_LOONGSON_PINSRH
+  UNSPEC_LOONGSON_VINIT
+  UNSPEC_LOONGSON_PMADD
+  UNSPEC_LOONGSON_PMOVMSK
+  UNSPEC_LOONGSON_PMULHU
+  UNSPEC_LOONGSON_PMULH
+  UNSPEC_LOONGSON_PMULU
+  UNSPEC_LOONGSON_PASUBUB
+  UNSPEC_LOONGSON_BIADD
+  UNSPEC_LOONGSON_PSADBH
+  UNSPEC_LOONGSON_PSHUFH
+  UNSPEC_LOONGSON_PUNPCKH
+  UNSPEC_LOONGSON_PUNPCKL
+  UNSPEC_LOONGSON_PADDD
+  UNSPEC_LOONGSON_PSUBD
+  UNSPEC_LOONGSON_DSLL
+  UNSPEC_LOONGSON_DSRL
+])
+
+;; Mode iterators and attributes.
+
+;; 64-bit vectors of bytes.
+(define_mode_iterator VB [V8QI])
+
+;; 64-bit vectors of halfwords.
+(define_mode_iterator VH [V4HI])
+
+;; 64-bit vectors of words.
+(define_mode_iterator VW [V2SI])
+
+;; 64-bit vectors of halfwords and bytes.
+(define_mode_iterator VHB [V4HI V8QI])
+
+;; 64-bit vectors of words and halfwords.
+(define_mode_iterator VWH [V2SI V4HI])
+
+;; 64-bit vectors of words and bytes
+(define_mode_iterator VWB [V2SI V8QI])
+
+;; 64-bit vectors of words, halfwords and bytes.
+(define_mode_iterator VWHB [V2SI V4HI V8QI])
+
+;; 64-bit vectors of words, halfwords and bytes; and DImode.
+(define_mode_iterator VWHBDI [V2SI V4HI V8QI DI])
+
+;; The Loongson instruction suffixes corresponding to the modes in the
+;; VWHBDI iterator.
+(define_mode_attr V_suffix [(V2SI "w") (V4HI "h") (V8QI "b") (DI "d")])
+
+;; Given a vector type T, the mode of a vector half the size of T
+;; and with the same number of elements.
+(define_mode_attr V_squash [(V2SI "V2HI") (V4HI "V4QI")])
+
+;; Given a vector type T, the mode of a vector the same size as T
+;; but with half as many elements.
+(define_mode_attr V_stretch_half [(V2SI "DI") (V4HI "V2SI") (V8QI "V4HI")])
+
+;; The Loongson instruction suffixes corresponding to the transformation
+;; expressed by V_stretch_half.
+(define_mode_attr V_stretch_half_suffix [(V2SI "wd") (V4HI "hw") (V8QI "bh")])
+
+;; Given a vector type T, the mode of a vector the same size as T
+;; but with twice as many elements.
+(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
+
+;; Given a vector type T, the inner mode.
+(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
+
+;; The Loongson instruction suffixes corresponding to the conversions
+;; specified by V_half_width.
+(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
+
+;; Move patterns.
+
+;; Expander to legitimize moves involving values of vector modes.
+(define_expand "mov<mode>"
+  [(set (match_operand:VWHB 0)
+	(match_operand:VWHB 1))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+;; Handle legitimized moves between values of vector modes.
+(define_insn "mov<mode>_internal"
+  [(set (match_operand:VWHB 0 "nonimmediate_operand" "=m,f,d,f,  d,  m,  d")
+	(match_operand:VWHB 1 "move_operand"          "f,m,f,dYG,dYG,dYG,m"))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fpstore,fpload,mfc,mtc,move,store,load")
+   (set_attr "mode" "DI")])
+
+;; Initialization of a vector.
+
+(define_expand "vec_init<mode>"
+  [(set (match_operand:VWHB 0 "register_operand")
+	(match_operand 1 ""))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;; Helper for vec_init.  Initialize element 0 of the output from the input.
+;; All other elements are undefined.
+(define_insn "loongson_vec_init1_<mode>"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+	(unspec:VHB [(truncate:<V_inner>
+		       (match_operand:DI 1 "reg_or_0_operand" "Jd"))]
+		    UNSPEC_LOONGSON_VINIT))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "dmtc1\t%z1,%0"
+  [(set_attr "move_type" "mtc")
+   (set_attr "mode" "DI")])
+
+;; Helper for vec_initv2si.
+(define_insn "*vec_concatv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=f")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "register_operand" "f")
+	  (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Instruction patterns for SIMD instructions.
+
+;; Pack with signed saturation.
+(define_insn "vec_pack_ssat_<mode>"
+  [(set (match_operand:<V_squash_double> 0 "register_operand" "=f")
+        (vec_concat:<V_squash_double>
+	 (ss_truncate:<V_squash>
+	  (match_operand:VWH 1 "register_operand" "f"))
+	 (ss_truncate:<V_squash>
+	  (match_operand:VWH 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "packss<V_squash_double_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Pack with unsigned saturation.
+(define_insn "vec_pack_usat_<mode>"
+  [(set (match_operand:<V_squash_double> 0 "register_operand" "=f")
+        (vec_concat:<V_squash_double>
+	 (us_truncate:<V_squash>
+	  (match_operand:VH 1 "register_operand" "f"))
+	 (us_truncate:<V_squash>
+	  (match_operand:VH 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "packus<V_squash_double_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Addition, treating overflow by wraparound.
+(define_insn "add<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (plus:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		   (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "padd<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Addition of doubleword integers stored in FP registers.
+;; Overflow is treated by wraparound.
+;; We use 'unspec' instead of 'plus' here to avoid clash with
+;; mips.md::add<mode>3.  If 'plus' was used, then such instruction
+;; would be recognized as adddi3 and reload would make it use
+;; GPRs instead of FPRs.
+(define_insn "loongson_paddd"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "f")
+		    (match_operand:DI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PADDD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "paddd\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Addition, treating overflow by signed saturation.
+(define_insn "ssadd<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f")
+		     (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "padds<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Addition, treating overflow by unsigned saturation.
+(define_insn "usadd<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (us_plus:VHB (match_operand:VHB 1 "register_operand" "f")
+		     (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "paddus<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Logical AND NOT.
+(define_insn "loongson_pandn_<V_suffix>"
+  [(set (match_operand:VWHBDI 0 "register_operand" "=f")
+        (and:VWHBDI
+	 (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f"))
+	 (match_operand:VWHBDI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pandn\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Logical AND.
+(define_insn "and<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+	(and:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		  (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "and\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Logical OR.
+(define_insn "ior<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+	(ior:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		  (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "or\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Logical XOR.
+(define_insn "xor<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+	(xor:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		  (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "xor\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Logical NOR.
+(define_insn "*loongson_nor"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+	(and:VWHB
+	  (not:VWHB (match_operand:VWHB 1 "register_operand" "f"))
+	  (not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "nor\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Logical NOT.
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+	(not:VWHB (match_operand:VWHB 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "nor\t%0,%1,%1"
+  [(set_attr "type" "fmul")])
+
+;; Average.
+(define_insn "loongson_pavg<V_suffix>"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (unspec:VHB [(match_operand:VHB 1 "register_operand" "f")
+		     (match_operand:VHB 2 "register_operand" "f")]
+		    UNSPEC_LOONGSON_PAVG))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pavg<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Equality test.
+(define_insn "loongson_pcmpeq<V_suffix>"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
+		      (match_operand:VWHB 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PCMPEQ))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pcmpeq<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Greater-than test.
+(define_insn "loongson_pcmpgt<V_suffix>"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
+		      (match_operand:VWHB 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PCMPGT))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pcmpgt<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Extract halfword.
+(define_insn "loongson_pextrh"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
+		      (match_operand:SI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PEXTR))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pextrh\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Insert halfword.
+(define_insn "loongson_pinsrh_0"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 4) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsrh_0\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_pinsrh_1"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsrh_1\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_pinsrh_2"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 4) (const_int 3)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsrh_2\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_pinsrh_3"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 4)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsrh_3\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "*vec_setv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
+		      (match_operand:SI 2 "register_operand" "f")
+		      (match_operand:SI 3 "const_0_to_3_operand" "")]
+		     UNSPEC_LOONGSON_PINSRH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsrh_%3\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_expand "vec_setv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
+		      (match_operand:HI 2 "register_operand" "f")
+		      (match_operand:SI 3 "const_0_to_3_operand" "")]
+		     UNSPEC_LOONGSON_PINSRH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  rtx ext = gen_reg_rtx (SImode);
+  emit_move_insn (ext, gen_lowpart (SImode, operands[1]));
+  operands[1] = ext;
+})
+
+;; Multiply and add packed integers.
+(define_insn "loongson_pmaddhw"
+  [(set (match_operand:V2SI 0 "register_operand" "=f")
+        (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "f")
+		      (match_operand:V4HI 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PMADD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmaddhw\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+(define_expand "sdot_prodv4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")
+   (match_operand:V4HI 2 "register_operand" "")
+   (match_operand:V2SI 3 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  rtx t = gen_reg_rtx (V2SImode);
+  emit_insn (gen_loongson_pmaddhw (t, operands[1], operands[2]));
+  emit_insn (gen_addv2si3 (operands[0], t, operands[3]));
+  DONE;
+})
+
+;; Maximum of signed halfwords.
+(define_insn "smaxv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+        (smax:V4HI (match_operand:V4HI 1 "register_operand" "f")
+		   (match_operand:V4HI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmaxsh\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+(define_expand "smax<mode>3"
+  [(match_operand:VWB 0 "register_operand" "")
+   (match_operand:VWB 1 "register_operand" "")
+   (match_operand:VWB 2 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_minmax (operands[0], operands[1], operands[2],
+			  gen_loongson_pcmpgt<V_suffix>, false);
+  DONE;
+})
+
+;; Maximum of unsigned bytes.
+(define_insn "umaxv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+        (umax:V8QI (match_operand:V8QI 1 "register_operand" "f")
+		   (match_operand:V8QI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmaxub\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Minimum of signed halfwords.
+(define_insn "sminv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+        (smin:V4HI (match_operand:V4HI 1 "register_operand" "f")
+		   (match_operand:V4HI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pminsh\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+(define_expand "smin<mode>3"
+  [(match_operand:VWB 0 "register_operand" "")
+   (match_operand:VWB 1 "register_operand" "")
+   (match_operand:VWB 2 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_minmax (operands[0], operands[1], operands[2],
+			  gen_loongson_pcmpgt<V_suffix>, true);
+  DONE;
+})
+
+;; Minimum of unsigned bytes.
+(define_insn "uminv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+        (umin:V8QI (match_operand:V8QI 1 "register_operand" "f")
+		   (match_operand:V8QI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pminub\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Move byte mask.
+(define_insn "loongson_pmovmsk<V_suffix>"
+  [(set (match_operand:VB 0 "register_operand" "=f")
+        (unspec:VB [(match_operand:VB 1 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMOVMSK))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmovmsk<V_suffix>\t%0,%1"
+  [(set_attr "type" "fabs")])
+
+;; Multiply unsigned integers and store high result.
+(define_insn "umul<mode>3_highpart"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMULHU))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmulhu<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Multiply signed integers and store high result.
+(define_insn "smul<mode>3_highpart"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMULH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmulh<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Multiply signed integers and store low result.
+(define_insn "mul<mode>3"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (mult:VH (match_operand:VH 1 "register_operand" "f")
+                 (match_operand:VH 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmull<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Multiply unsigned word integers.
+(define_insn "loongson_pmulu<V_suffix>"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+        (unspec:DI [(match_operand:VW 1 "register_operand" "f")
+		    (match_operand:VW 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMULU))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmulu<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Absolute difference.
+(define_insn "loongson_pasubub"
+  [(set (match_operand:VB 0 "register_operand" "=f")
+        (unspec:VB [(match_operand:VB 1 "register_operand" "f")
+		    (match_operand:VB 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PASUBUB))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pasubub\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Sum of unsigned byte integers.
+(define_insn "loongson_biadd"
+  [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
+        (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")]
+				 UNSPEC_LOONGSON_BIADD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "biadd\t%0,%1"
+  [(set_attr "type" "fabs")])
+
+(define_insn "reduc_uplus_v8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")]
+		     UNSPEC_LOONGSON_BIADD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "biadd\t%0,%1"
+  [(set_attr "type" "fabs")])
+
+;; Sum of absolute differences.
+(define_insn "loongson_psadbh"
+  [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
+        (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")
+				  (match_operand:VB 2 "register_operand" "f")]
+				 UNSPEC_LOONGSON_PSADBH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pasubub\t%0,%1,%2;biadd\t%0,%0"
+  [(set_attr "type" "fadd")])
+
+;; Shuffle halfwords.
+(define_insn "loongson_pshufh"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:SI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PSHUFH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pshufh\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Shift left logical.
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VWH 0 "register_operand" "=f")
+        (ashift:VWH (match_operand:VWH 1 "register_operand" "f")
+		    (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psll<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Shift right arithmetic.
+(define_insn "ashr<mode>3"
+  [(set (match_operand:VWH 0 "register_operand" "=f")
+        (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f")
+		      (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psra<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Shift right logical.
+(define_insn "lshr<mode>3"
+  [(set (match_operand:VWH 0 "register_operand" "=f")
+        (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f")
+		      (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psrl<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Subtraction, treating overflow by wraparound.
+(define_insn "sub<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (minus:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		    (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psub<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Subtraction of doubleword integers stored in FP registers.
+;; Overflow is treated by wraparound.
+;; See loongson_paddd for the reason we use 'unspec' rather than
+;; 'minus' here.
+(define_insn "loongson_psubd"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "f")
+		    (match_operand:DI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PSUBD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psubd\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Subtraction, treating overflow by signed saturation.
+(define_insn "sssub<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f")
+		      (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psubs<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Subtraction, treating overflow by unsigned saturation.
+(define_insn "ussub<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (us_minus:VHB (match_operand:VHB 1 "register_operand" "f")
+		      (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psubus<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Unpack high data.  Recall that Loongson only runs in little-endian.
+(define_insn "loongson_punpckhbh"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "f")
+	    (match_operand:V8QI 2 "register_operand" "f"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckhbh\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_punpckhhw"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckhhw\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_punpckhhw_qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "f")
+	    (match_operand:V8QI 2 "register_operand" "f"))
+	  (parallel [(const_int 4)  (const_int 5)
+		     (const_int 12) (const_int 13)
+		     (const_int 6)  (const_int 7)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckhhw\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_punpckhwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=f")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "register_operand" "f")
+	    (match_operand:V2SI 2 "register_operand" "f"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckhwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_insn "loongson_punpckhwd_qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "f")
+	    (match_operand:V8QI 2 "register_operand" "f"))
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckhwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_insn "loongson_punpckhwd_hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 2) (const_int 3)
+		     (const_int 6) (const_int 7)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckhwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+;; Unpack low data.
+(define_insn "loongson_punpcklbh"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "f")
+	    (match_operand:V8QI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklbh\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_punpcklhw"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklhw\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "*loongson_punpcklhw_qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "f")
+	    (match_operand:V8QI 2 "register_operand" "f"))
+	  (parallel [(const_int 0)  (const_int 1)
+		     (const_int 8)  (const_int 9)
+		     (const_int 2)  (const_int 3)
+		     (const_int 10) (const_int 11)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklhw\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_punpcklwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=f")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "register_operand" "f")
+	    (match_operand:V2SI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_insn "*loongson_punpcklwd_qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=f")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "f")
+	    (match_operand:V8QI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_insn "*loongson_punpcklwd_hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=f")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "f")
+	    (match_operand:V4HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 4) (const_int 5)])))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklwd\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VWHB 0 "register_operand" "")
+   (match_operand:VWHB 1 "register_operand" "")
+   (match_operand:VWHB 2 "register_operand" "")
+   (match_operand:VWHB 3 "" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  if (mips_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<V_stretch_half> 0 "register_operand" "")
+   (match_operand:VHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<V_stretch_half> 0 "register_operand" "")
+   (match_operand:VHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<V_stretch_half> 0 "register_operand" "")
+   (match_operand:VHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<V_stretch_half> 0 "register_operand" "")
+   (match_operand:VHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_unpack (operands, true, true);
+  DONE;
+})
+
+;; Whole vector shifts, used for reduction epilogues.
+(define_insn "vec_shl_<mode>"
+  [(set (match_operand:VWHBDI 0 "register_operand" "=f")
+        (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
+                        (match_operand:SI 2 "register_operand" "f")]
+                       UNSPEC_LOONGSON_DSLL))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "dsll\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_insn "vec_shr_<mode>"
+  [(set (match_operand:VWHBDI 0 "register_operand" "=f")
+        (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
+                        (match_operand:SI 2 "register_operand" "f")]
+                       UNSPEC_LOONGSON_DSRL))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "dsrl\t%0,%1,%2"
+  [(set_attr "type" "fcvt")])
+
+(define_expand "reduc_uplus_<mode>"
+  [(match_operand:VWH 0 "register_operand" "")
+   (match_operand:VWH 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_add<mode>3);
+  DONE;
+})
+
+; ??? Given that we're not describing a widening reduction, we should
+; not have separate optabs for signed and unsigned.
+(define_expand "reduc_splus_<mode>"
+  [(match_operand:VWHB 0 "register_operand" "")
+   (match_operand:VWHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  emit_insn (gen_reduc_uplus_<mode>(operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+  [(match_operand:VWHB 0 "register_operand" "")
+   (match_operand:VWHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_smax<mode>3);
+  DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+  [(match_operand:VWHB 0 "register_operand" "")
+   (match_operand:VWHB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_smin<mode>3);
+  DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+  [(match_operand:VB 0 "register_operand" "")
+   (match_operand:VB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_umax<mode>3);
+  DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+  [(match_operand:VB 0 "register_operand" "")
+   (match_operand:VB 1 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_umin<mode>3);
+  DONE;
+})
+
+;; Integer division and modulus.  For integer multiplication, see mips.md.
+
+(define_insn "<u>div<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+	(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A"
+  {
+    if (TARGET_LOONGSON_2EF)
+      return mips_output_division ("<d>div<u>.g\t%0,%1,%2", operands);
+    else
+      return mips_output_division ("gs<d>div<u>\t%0,%1,%2", operands);
+  }
+  [(set_attr "type" "idiv3")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<u>mod<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+	(any_mod:GPR (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A"
+  {
+    if (TARGET_LOONGSON_2EF)
+      return mips_output_division ("<d>mod<u>.g\t%0,%1,%2", operands);
+    else
+      return mips_output_division ("gs<d>mod<u>\t%0,%1,%2", operands);
+  }
+  [(set_attr "type" "idiv3")
+   (set_attr "mode" "<MODE>")])
diff --git a/gcc-4.9/gcc/config/mips/loongson2ef.md b/gcc-4.9/gcc/config/mips/loongson2ef.md
new file mode 100644
index 000000000..0f71d0bb6
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/loongson2ef.md
@@ -0,0 +1,252 @@
+;; Pipeline model for ST Microelectronics Loongson-2E/2F cores.
+
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN
+  UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN
+  UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN
+  UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN
+])
+
+;; Automaton for integer instructions.
+(define_automaton "ls2_alu")
+
+;; ALU1 and ALU2.
+;; We need to query these units to adjust round-robin counter.
+(define_query_cpu_unit "ls2_alu1_core,ls2_alu2_core" "ls2_alu")
+
+;; Pseudo units to help modeling of ALU1/2 round-robin dispatch strategy.
+(define_cpu_unit "ls2_alu1_turn,ls2_alu2_turn" "ls2_alu")
+
+;; Pseudo units to enable/disable ls2_alu[12]_turn units.
+;; ls2_alu[12]_turn unit can be subscribed only after ls2_alu[12]_turn_enabled
+;; unit is subscribed.
+(define_cpu_unit "ls2_alu1_turn_enabled,ls2_alu2_turn_enabled" "ls2_alu")
+(presence_set "ls2_alu1_turn" "ls2_alu1_turn_enabled")
+(presence_set "ls2_alu2_turn" "ls2_alu2_turn_enabled")
+
+;; Reservations for ALU1 (ALU2) instructions.
+;; Instruction goes to ALU1 (ALU2) and makes next ALU1/2 instruction to
+;; be dispatched to ALU2 (ALU1).
+(define_reservation "ls2_alu1"
+  "(ls2_alu1_core+ls2_alu2_turn_enabled)|ls2_alu1_core")
+(define_reservation "ls2_alu2"
+  "(ls2_alu2_core+ls2_alu1_turn_enabled)|ls2_alu2_core")
+
+;; Reservation for ALU1/2 instructions.
+;; Instruction will go to ALU1 iff ls2_alu1_turn_enabled is subscribed and
+;; switch the turn to ALU2 by subscribing ls2_alu2_turn_enabled.
+;; Or to ALU2 otherwise.
+(define_reservation "ls2_alu"
+  "(ls2_alu1_core+ls2_alu1_turn+ls2_alu2_turn_enabled)
+   |(ls2_alu1_core+ls2_alu1_turn)
+   |(ls2_alu2_core+ls2_alu2_turn+ls2_alu1_turn_enabled)
+   |(ls2_alu2_core+ls2_alu2_turn)")
+
+;; Automaton for floating-point instructions.
+(define_automaton "ls2_falu")
+
+;; FALU1 and FALU2.
+;; We need to query these units to adjust round-robin counter.
+(define_query_cpu_unit "ls2_falu1_core,ls2_falu2_core" "ls2_falu")
+
+;; Pseudo units to help modeling of FALU1/2 round-robin dispatch strategy.
+(define_cpu_unit "ls2_falu1_turn,ls2_falu2_turn" "ls2_falu")
+
+;; Pseudo units to enable/disable ls2_falu[12]_turn units.
+;; ls2_falu[12]_turn unit can be subscribed only after
+;; ls2_falu[12]_turn_enabled unit is subscribed.
+(define_cpu_unit "ls2_falu1_turn_enabled,ls2_falu2_turn_enabled" "ls2_falu")
+(presence_set "ls2_falu1_turn" "ls2_falu1_turn_enabled")
+(presence_set "ls2_falu2_turn" "ls2_falu2_turn_enabled")
+
+;; Reservations for FALU1 (FALU2) instructions.
+;; Instruction goes to FALU1 (FALU2) and makes next FALU1/2 instruction to
+;; be dispatched to FALU2 (FALU1).
+(define_reservation "ls2_falu1"
+  "(ls2_falu1_core+ls2_falu2_turn_enabled)|ls2_falu1_core")
+(define_reservation "ls2_falu2"
+  "(ls2_falu2_core+ls2_falu1_turn_enabled)|ls2_falu2_core")
+
+;; Reservation for FALU1/2 instructions.
+;; Instruction will go to FALU1 iff ls2_falu1_turn_enabled is subscribed and
+;; switch the turn to FALU2 by subscribing ls2_falu2_turn_enabled.
+;; Or to FALU2 otherwise.
+(define_reservation "ls2_falu"
+  "(ls2_falu1+ls2_falu1_turn+ls2_falu2_turn_enabled)
+   |(ls2_falu1+ls2_falu1_turn)
+   |(ls2_falu2+ls2_falu2_turn+ls2_falu1_turn_enabled)
+   |(ls2_falu2+ls2_falu2_turn)")
+
+;; The following 4 instructions each subscribe one of
+;; ls2_[f]alu{1,2}_turn_enabled units according to this attribute.
+;; These instructions are used in mips.c: sched_ls2_dfa_post_advance_cycle.
+
+(define_attr "ls2_turn_type" "alu1,alu2,falu1,falu2,unknown,atomic,syncloop"
+  (const_string "unknown"))
+
+;; Subscribe ls2_alu1_turn_enabled.
+(define_insn "ls2_alu1_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "alu1")])
+
+(define_insn_reservation "ls2_alu1_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "alu1")
+  "ls2_alu1_turn_enabled")
+
+;; Subscribe ls2_alu2_turn_enabled.
+(define_insn "ls2_alu2_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "alu2")])
+
+(define_insn_reservation "ls2_alu2_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "alu2")
+  "ls2_alu2_turn_enabled")
+
+;; Subscribe ls2_falu1_turn_enabled.
+(define_insn "ls2_falu1_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "falu1")])
+
+(define_insn_reservation "ls2_falu1_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "falu1")
+  "ls2_falu1_turn_enabled")
+
+;; Subscribe ls2_falu2_turn_enabled.
+(define_insn "ls2_falu2_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "falu2")])
+
+(define_insn_reservation "ls2_falu2_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "falu2")
+  "ls2_falu2_turn_enabled")
+
+;; Automaton for memory operations.
+(define_automaton "ls2_mem")
+
+;; Memory unit.
+(define_query_cpu_unit "ls2_mem" "ls2_mem")
+
+;; Reservation for integer instructions.
+(define_insn_reservation "ls2_alu" 2
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "arith,condmove,const,logical,mfhi,mflo,move,
+                        mthi,mtlo,nop,shift,signext,slt"))
+  "ls2_alu")
+
+;; Reservation for branch instructions.
+(define_insn_reservation "ls2_branch" 2
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "branch,jump,call,trap"))
+  "ls2_alu1")
+
+;; Reservation for integer multiplication instructions.
+(define_insn_reservation "ls2_imult" 5
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "imul,imul3nc"))
+  "ls2_alu2,ls2_alu2_core")
+
+;; Reservation for integer division / remainder instructions.
+;; These instructions use the SRT algorithm and hence take 2-38 cycles.
+(define_insn_reservation "ls2_idiv" 20
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "idiv,idiv3"))
+  "ls2_alu2,ls2_alu2_core*18")
+
+;; Reservation for memory load instructions.
+(define_insn_reservation "ls2_load" 5
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "load,fpload,mfc,mtc"))
+  "ls2_mem")
+
+(define_insn_reservation "ls2_prefetch" 0
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "ls2_mem")
+
+;; Reservation for memory store instructions.
+;; With stores we assume they don't alias with dependent loads.
+;; Therefore we set the latency to zero.
+(define_insn_reservation "ls2_store" 0
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "store,fpstore"))
+  "ls2_mem")
+
+;; Reservation for floating-point instructions of latency 3.
+(define_insn_reservation "ls2_fp3" 3
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fabs,fneg,fcmp,fmove"))
+  "ls2_falu1")
+
+;; Reservation for floating-point instructions of latency 5.
+(define_insn_reservation "ls2_fp5" 5
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fcvt"))
+  "ls2_falu1")
+
+;; Reservation for floating-point instructions that can go
+;; to either of FALU1/2 units.
+(define_insn_reservation "ls2_falu" 7
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fadd,fmul,fmadd"))
+  "ls2_falu")
+
+;; Reservation for floating-point division / remainder instructions.
+;; These instructions use the SRT algorithm and hence take a variable amount
+;; of cycles:
+;; div.s takes 5-11 cycles
+;; div.d takes 5-18 cycles
+(define_insn_reservation "ls2_fdiv" 9
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fdiv"))
+  "ls2_falu2,ls2_falu2_core*7")
+
+;; Reservation for floating-point sqrt instructions.
+;; These instructions use the SRT algorithm and hence take a variable amount
+;; of cycles:
+;; sqrt.s takes 5-17 cycles
+;; sqrt.d takes 5-32 cycles
+(define_insn_reservation "ls2_fsqrt" 15
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fsqrt"))
+  "ls2_falu2,ls2_falu2_core*13")
+
+;; Two consecutive ALU instructions.
+(define_insn_reservation "ls2_multi" 4
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "multi"))
+  "(ls2_alu1,ls2_alu2_core)|(ls2_alu2,ls2_alu1_core)")
+
+;; Reservation for everything else.  Normally, this reservation
+;; will only be used to handle cases like compiling for non-loongson
+;; CPUs with -mtune=loongson2?.
+;;
+;; This reservation depends upon the fact that DFA will check
+;; reservations in the same order as they appear in the file.
+(define_insn_reservation "ls2_unknown" 1
+  (eq_attr "cpu" "loongson_2e,loongson_2f")
+  "ls2_alu1_core+ls2_alu2_core+ls2_falu1_core+ls2_falu2_core+ls2_mem")
diff --git a/gcc-4.9/gcc/config/mips/loongson3a.md b/gcc-4.9/gcc/config/mips/loongson3a.md
new file mode 100644
index 000000000..6fc3d3c34
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/loongson3a.md
@@ -0,0 +1,137 @@
+;; Pipeline model for Loongson-3A cores.
+
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Uncomment the following line to output automata for debugging.
+;; (automata_option "v")
+
+;; Automaton for integer instructions.
+(define_automaton "ls3a_a_alu")
+
+;; Automaton for floating-point instructions.
+(define_automaton "ls3a_a_falu")
+
+;; Automaton for memory operations.
+(define_automaton "ls3a_a_mem")
+
+;; Describe the resources.
+
+(define_cpu_unit "ls3a_alu1" "ls3a_a_alu")
+(define_cpu_unit "ls3a_alu2" "ls3a_a_alu")
+(define_cpu_unit "ls3a_mem" "ls3a_a_mem")
+(define_cpu_unit "ls3a_falu1" "ls3a_a_falu")
+(define_cpu_unit "ls3a_falu2" "ls3a_a_falu")
+
+;; Describe instruction reservations.
+
+(define_insn_reservation "ls3a_arith" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "arith,clz,const,logical,
+                        move,nop,shift,signext,slt"))
+  "ls3a_alu1 | ls3a_alu2")
+
+(define_insn_reservation "ls3a_branch" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "branch,jump,call,condmove,trap"))
+  "ls3a_alu1")
+
+(define_insn_reservation "ls3a_mfhilo" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "mfhi,mflo,mthi,mtlo"))
+  "ls3a_alu2")
+
+;; Operation imul3nc is fully pipelined.
+(define_insn_reservation "ls3a_imul3nc" 5
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "imul3nc"))
+  "ls3a_alu2")
+ 
+(define_insn_reservation "ls3a_imul" 7
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "imul,imadd"))
+  "ls3a_alu2 * 7")
+ 
+(define_insn_reservation "ls3a_idiv_si" 12
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "ls3a_alu2 * 12")
+
+(define_insn_reservation "ls3a_idiv_di" 25
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "ls3a_alu2 * 25")
+
+(define_insn_reservation "ls3a_load" 3
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "load"))
+  "ls3a_mem")
+ 
+(define_insn_reservation "ls3a_fpload" 4
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "load,mfc,mtc"))
+  "ls3a_mem")
+
+(define_insn_reservation "ls3a_prefetch" 0
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "ls3a_mem")
+ 
+(define_insn_reservation "ls3a_store" 0
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "ls3a_mem")
+
+;; All the fp operations can be executed in FALU1.  Only fp add,
+;; sub, mul, madd can be executed in FALU2.  Try FALU2 firstly.
+(define_insn_reservation "ls3a_fadd" 6
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "fadd,fmul,fmadd"))
+  "ls3a_falu2 | ls3a_falu1")
+
+(define_insn_reservation "ls3a_fcmp" 2
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "fabs,fcmp,fmove,fneg"))
+  "ls3a_falu1")
+
+(define_insn_reservation "ls3a_fcvt" 4
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "fcvt"))
+  "ls3a_falu1")
+
+(define_insn_reservation "ls3a_fdiv_sf" 12
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+            (eq_attr "mode" "SF")))
+  "ls3a_falu1 * 12")
+ 
+(define_insn_reservation "ls3a_fdiv_df" 19
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+            (eq_attr "mode" "DF")))
+  "ls3a_falu1 * 19")
+
+;; Force single-dispatch for unknown or multi.
+(define_insn_reservation "ls3a_unknown" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "ls3a_alu1 + ls3a_alu2 + ls3a_falu1 + ls3a_falu2 + ls3a_mem")
+
+;; End of DFA-based pipeline description for loongson_3a
diff --git a/gcc-4.9/gcc/config/mips/micromips.md b/gcc-4.9/gcc/config/mips/micromips.md
new file mode 100644
index 000000000..3504044f9
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/micromips.md
@@ -0,0 +1,138 @@
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;
+;; micromips.md   Machine Description for the microMIPS instruction set
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "*store_word_multiple"
+  [(match_parallel 0 ""
+       [(set (match_operand:SI 1 "memory_operand")
+	     (match_operand:SI 2 "register_operand"))])]
+  "TARGET_MICROMIPS
+   && umips_save_restore_pattern_p (true, operands[0])"
+  { return umips_output_save_restore (true, operands[0]); }
+  [(set_attr "type" "multimem")
+   (set_attr "mode" "SI")
+   (set_attr "can_delay" "no")])
+
+(define_insn "*load_word_multiple"
+  [(match_parallel 0 ""
+       [(set (match_operand:SI 1 "register_operand")
+	     (match_operand:SI 2 "memory_operand"))])]
+  "TARGET_MICROMIPS
+   && umips_save_restore_pattern_p (false, operands[0])"
+  { return umips_output_save_restore (false, operands[0]); }
+  [(set_attr "type" "multimem")
+   (set_attr "mode" "SI")
+   (set_attr "can_delay" "no")])
+
+;; For LWP.
+(define_peephole2
+  [(set (match_operand:SI 0 "d_operand" "")
+        (match_operand:SI 1 "non_volatile_mem_operand" ""))
+   (set (match_operand:SI 2 "d_operand" "")
+        (match_operand:SI 3 "non_volatile_mem_operand" ""))]
+  "TARGET_MICROMIPS
+   && umips_load_store_pair_p (true, operands)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+              (set (match_dup 2) (match_dup 3))])])
+
+;; The behavior of the LWP insn is undefined if placed in a delay slot.
+(define_insn "*lwp"
+  [(parallel [(set (match_operand:SI 0 "d_operand")
+		   (match_operand:SI 1 "non_volatile_mem_operand"))
+	      (set (match_operand:SI 2 "d_operand")
+		   (match_operand:SI 3 "non_volatile_mem_operand"))])]
+
+  "TARGET_MICROMIPS
+   && umips_load_store_pair_p (true, operands)"
+{
+  umips_output_load_store_pair (true, operands);
+  return "";
+}
+  [(set_attr "type" "load")
+   (set_attr "mode" "SI")
+   (set_attr "can_delay" "no")])
+
+;; For SWP.
+(define_peephole2
+  [(set (match_operand:SI 0 "non_volatile_mem_operand" "")
+        (match_operand:SI 1 "d_operand" ""))
+   (set (match_operand:SI 2 "non_volatile_mem_operand" "")
+        (match_operand:SI 3 "d_operand" ""))]
+  "TARGET_MICROMIPS
+   && umips_load_store_pair_p (false, operands)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+              (set (match_dup 2) (match_dup 3))])])
+
+;; The behavior of the SWP insn is undefined if placed in a delay slot.
+(define_insn "*swp"
+  [(parallel [(set (match_operand:SI 0 "non_volatile_mem_operand")
+		   (match_operand:SI 1 "d_operand"))
+	      (set (match_operand:SI 2 "non_volatile_mem_operand")
+		   (match_operand:SI 3 "d_operand"))])]
+
+  "TARGET_MICROMIPS
+   && umips_load_store_pair_p (false, operands)"
+{
+  umips_output_load_store_pair (false, operands);
+  return "";
+}
+  [(set_attr "type" "store")
+   (set_attr "mode" "SI")
+   (set_attr "can_delay" "no")])
+
+;; For JRADDIUSP.
+(define_insn "jraddiusp"
+  [(parallel [(return)
+	      (use (reg:SI 31))
+	      (set (reg:SI 29)
+		   (plus:SI (reg:SI 29)
+			    (match_operand 0 "uw5_operand")))])]
+  "TARGET_MICROMIPS"
+  "jraddiusp\t%0"
+  [(set_attr "type"	"trap")
+   (set_attr "can_delay" "no")
+   (set_attr "mode"	"SI")])
+
+;; For MOVEP.
+(define_peephole2
+  [(set (match_operand:MOVEP1 0 "register_operand" "")
+        (match_operand:MOVEP1 1 "movep_src_operand" ""))
+   (set (match_operand:MOVEP2 2 "register_operand" "")
+        (match_operand:MOVEP2 3 "movep_src_operand" ""))]
+  "TARGET_MICROMIPS
+   && umips_movep_target_p (operands[0], operands[2])"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+              (set (match_dup 2) (match_dup 3))])])
+
+;; The behavior of the MOVEP insn is undefined if placed in a delay slot.
+(define_insn "*movep<MOVEP1:mode><MOVEP2:mode>"
+  [(parallel [(set (match_operand:MOVEP1 0 "register_operand")
+		   (match_operand:MOVEP1 1 "movep_src_operand"))
+	      (set (match_operand:MOVEP2 2 "register_operand")
+		   (match_operand:MOVEP2 3 "movep_src_operand"))])]
+  "TARGET_MICROMIPS
+   && umips_movep_target_p (operands[0], operands[2])"
+{
+  if (REGNO (operands[0]) < REGNO (operands[2]))
+    return "movep\t%0,%2,%z1,%z3";
+  else
+    return "movep\t%2,%0,%z3,%z1";
+}
+  [(set_attr "type" "move")
+   (set_attr "mode" "<MODE>")
+   (set_attr "can_delay" "no")])
diff --git a/gcc-4.9/gcc/config/mips/mips-cpus.def b/gcc-4.9/gcc/config/mips/mips-cpus.def
new file mode 100644
index 000000000..07fbf9c7e
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-cpus.def
@@ -0,0 +1,154 @@
+/* MIPS CPU names.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* A table describing all the processors GCC knows about.  The first
+   mention of an ISA level is taken as the canonical name for that
+   ISA.
+
+   To ease comparison, please keep this table in the same order
+   as GAS's mips_cpu_info_table.  Please also make sure that
+   MIPS_ISA_LEVEL_SPEC and MIPS_ARCH_FLOAT_SPEC handle all -march
+   options correctly.
+
+   Before including this file, define a macro:
+
+   MIPS_CPU (NAME, CPU, ISA, FLAGS)
+
+   where the arguments are the fields of struct mips_cpu_info.  */
+
+/* Entries for generic ISAs.  */
+MIPS_CPU ("mips1", PROCESSOR_R3000, 1, 0)
+MIPS_CPU ("mips2", PROCESSOR_R6000, 2, 0)
+MIPS_CPU ("mips3", PROCESSOR_R4000, 3, 0)
+MIPS_CPU ("mips4", PROCESSOR_R8000, 4, 0)
+/* Prefer not to use branch-likely instructions for generic MIPS32rX
+   and MIPS64rX code.  The instructions were officially deprecated
+   in revisions 2 and earlier, but revision 3 is likely to downgrade
+   that to a recommendation to avoid the instructions in code that
+   isn't tuned to a specific processor.  */
+MIPS_CPU ("mips32", PROCESSOR_4KC, 32, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("mips32r2", PROCESSOR_74KF2_1, 33, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("mips64", PROCESSOR_5KC, 64, PTF_AVOID_BRANCHLIKELY)
+/* ??? For now just tune the generic MIPS64r2 for 5KC as well.   */
+MIPS_CPU ("mips64r2", PROCESSOR_5KC, 65, PTF_AVOID_BRANCHLIKELY)
+
+/* MIPS I processors.  */
+MIPS_CPU ("r3000", PROCESSOR_R3000, 1, 0)
+MIPS_CPU ("r2000", PROCESSOR_R3000, 1, 0)
+MIPS_CPU ("r3900", PROCESSOR_R3900, 1, 0)
+
+/* MIPS II processors.  */
+MIPS_CPU ("r6000", PROCESSOR_R6000, 2, 0)
+
+/* MIPS III processors.  */
+MIPS_CPU ("r4000", PROCESSOR_R4000, 3, 0)
+MIPS_CPU ("vr4100", PROCESSOR_R4100, 3, 0)
+MIPS_CPU ("vr4111", PROCESSOR_R4111, 3, 0)
+MIPS_CPU ("vr4120", PROCESSOR_R4120, 3, 0)
+MIPS_CPU ("vr4130", PROCESSOR_R4130, 3, 0)
+MIPS_CPU ("vr4300", PROCESSOR_R4300, 3, 0)
+MIPS_CPU ("r4400", PROCESSOR_R4000, 3, 0)
+MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0)
+MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0)
+MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0)
+MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0)
+MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0)
+/* ST Loongson 2E/2F processors.  */
+MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY)
+
+/* MIPS IV processors. */
+MIPS_CPU ("r8000", PROCESSOR_R8000, 4, 0)
+MIPS_CPU ("r10000", PROCESSOR_R10000, 4, 0)
+MIPS_CPU ("r12000", PROCESSOR_R10000, 4, 0)
+MIPS_CPU ("r14000", PROCESSOR_R10000, 4, 0)
+MIPS_CPU ("r16000", PROCESSOR_R10000, 4, 0)
+MIPS_CPU ("vr5000", PROCESSOR_R5000, 4, 0)
+MIPS_CPU ("vr5400", PROCESSOR_R5400, 4, 0)
+MIPS_CPU ("vr5500", PROCESSOR_R5500, 4, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("rm7000", PROCESSOR_R7000, 4, 0)
+MIPS_CPU ("rm9000", PROCESSOR_R9000, 4, 0)
+
+/* MIPS32 processors.  */
+MIPS_CPU ("4kc", PROCESSOR_4KC, 32, 0)
+MIPS_CPU ("4km", PROCESSOR_4KC, 32, 0)
+MIPS_CPU ("4kp", PROCESSOR_4KP, 32, 0)
+MIPS_CPU ("4ksc", PROCESSOR_4KC, 32, 0)
+
+/* MIPS32 Release 2 processors.  */
+MIPS_CPU ("m4k", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14kc", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14k", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14ke", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14kec", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("4kec", PROCESSOR_4KC, 33, 0)
+MIPS_CPU ("4kem", PROCESSOR_4KC, 33, 0)
+MIPS_CPU ("4kep", PROCESSOR_4KP, 33, 0)
+MIPS_CPU ("4ksd", PROCESSOR_4KC, 33, 0)
+
+MIPS_CPU ("24kc", PROCESSOR_24KC, 33, 0)
+MIPS_CPU ("24kf2_1", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("24kf", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("24kf1_1", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("24kfx", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("24kx", PROCESSOR_24KF1_1, 33, 0)
+
+MIPS_CPU ("24kec", PROCESSOR_24KC, 33, 0) /* 24K with DSP.  */
+MIPS_CPU ("24kef2_1", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("24kef", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("24kef1_1", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("24kefx", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("24kex", PROCESSOR_24KF1_1, 33, 0)
+
+MIPS_CPU ("34kc", PROCESSOR_24KC, 33, 0) /* 34K with MT/DSP.  */
+MIPS_CPU ("34kf2_1", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("34kf", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("34kf1_1", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("34kfx", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("34kx", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("34kn", PROCESSOR_24KC, 33, 0)  /* 34K with MT but no DSP.  */
+
+MIPS_CPU ("74kc", PROCESSOR_74KC, 33, PTF_AVOID_IMADD) /* 74K with DSPr2.  */
+MIPS_CPU ("74kf2_1", PROCESSOR_74KF2_1, 33, PTF_AVOID_IMADD)
+MIPS_CPU ("74kf", PROCESSOR_74KF2_1, 33, PTF_AVOID_IMADD)
+MIPS_CPU ("74kf1_1", PROCESSOR_74KF1_1, 33, PTF_AVOID_IMADD)
+MIPS_CPU ("74kfx", PROCESSOR_74KF1_1, 33, PTF_AVOID_IMADD)
+MIPS_CPU ("74kx", PROCESSOR_74KF1_1, 33, PTF_AVOID_IMADD)
+MIPS_CPU ("74kf3_2", PROCESSOR_74KF3_2, 33, PTF_AVOID_IMADD)
+
+MIPS_CPU ("1004kc", PROCESSOR_24KC, 33, 0) /* 1004K with MT/DSP.  */
+MIPS_CPU ("1004kf2_1", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("1004kf", PROCESSOR_24KF2_1, 33, 0)
+MIPS_CPU ("1004kf1_1", PROCESSOR_24KF1_1, 33, 0)
+
+/* MIPS64 processors.  */
+MIPS_CPU ("5kc", PROCESSOR_5KC, 64, 0)
+MIPS_CPU ("5kf", PROCESSOR_5KF, 64, 0)
+MIPS_CPU ("20kc", PROCESSOR_20KC, 64, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("sb1", PROCESSOR_SB1, 64, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("sb1a", PROCESSOR_SB1A, 64, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("sr71000", PROCESSOR_SR71000, 64, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("xlr", PROCESSOR_XLR, 64, PTF_AVOID_BRANCHLIKELY)
+
+/* MIPS64 Release 2 processors.  */
+MIPS_CPU ("loongson3a", PROCESSOR_LOONGSON_3A, 65, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("octeon", PROCESSOR_OCTEON, 65, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("octeon+", PROCESSOR_OCTEON, 65, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("octeon2", PROCESSOR_OCTEON2, 65, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("xlp", PROCESSOR_XLP, 65, PTF_AVOID_BRANCHLIKELY)
diff --git a/gcc-4.9/gcc/config/mips/mips-dsp.md b/gcc-4.9/gcc/config/mips/mips-dsp.md
new file mode 100644
index 000000000..58c11fe9a
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-dsp.md
@@ -0,0 +1,1205 @@
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; MIPS DSP ASE Revision 0.98 3/24/2005
+(define_c_enum "unspec" [
+  UNSPEC_ADDQ
+  UNSPEC_ADDQ_S
+  UNSPEC_SUBQ
+  UNSPEC_SUBQ_S
+  UNSPEC_ADDSC
+  UNSPEC_ADDWC
+  UNSPEC_MODSUB
+  UNSPEC_RADDU_W_QB
+  UNSPEC_ABSQ_S
+  UNSPEC_PRECRQ_QB_PH
+  UNSPEC_PRECRQ_PH_W
+  UNSPEC_PRECRQ_RS_PH_W
+  UNSPEC_PRECRQU_S_QB_PH
+  UNSPEC_PRECEQ_W_PHL
+  UNSPEC_PRECEQ_W_PHR
+  UNSPEC_PRECEQU_PH_QBL
+  UNSPEC_PRECEQU_PH_QBR
+  UNSPEC_PRECEQU_PH_QBLA
+  UNSPEC_PRECEQU_PH_QBRA
+  UNSPEC_PRECEU_PH_QBL
+  UNSPEC_PRECEU_PH_QBR
+  UNSPEC_PRECEU_PH_QBLA
+  UNSPEC_PRECEU_PH_QBRA
+  UNSPEC_SHLL
+  UNSPEC_SHLL_S
+  UNSPEC_SHRL_QB
+  UNSPEC_SHRA_PH
+  UNSPEC_SHRA_R
+  UNSPEC_MULEU_S_PH_QBL
+  UNSPEC_MULEU_S_PH_QBR
+  UNSPEC_MULQ_RS_PH
+  UNSPEC_MULEQ_S_W_PHL
+  UNSPEC_MULEQ_S_W_PHR
+  UNSPEC_DPAU_H_QBL
+  UNSPEC_DPAU_H_QBR
+  UNSPEC_DPSU_H_QBL
+  UNSPEC_DPSU_H_QBR
+  UNSPEC_DPAQ_S_W_PH
+  UNSPEC_DPSQ_S_W_PH
+  UNSPEC_MULSAQ_S_W_PH
+  UNSPEC_DPAQ_SA_L_W
+  UNSPEC_DPSQ_SA_L_W
+  UNSPEC_MAQ_S_W_PHL
+  UNSPEC_MAQ_S_W_PHR
+  UNSPEC_MAQ_SA_W_PHL
+  UNSPEC_MAQ_SA_W_PHR
+  UNSPEC_BITREV
+  UNSPEC_INSV
+  UNSPEC_REPL_QB
+  UNSPEC_REPL_PH
+  UNSPEC_CMP_EQ
+  UNSPEC_CMP_LT
+  UNSPEC_CMP_LE
+  UNSPEC_CMPGU_EQ_QB
+  UNSPEC_CMPGU_LT_QB
+  UNSPEC_CMPGU_LE_QB
+  UNSPEC_PICK
+  UNSPEC_PACKRL_PH
+  UNSPEC_EXTR_W
+  UNSPEC_EXTR_R_W
+  UNSPEC_EXTR_RS_W
+  UNSPEC_EXTR_S_H
+  UNSPEC_EXTP
+  UNSPEC_EXTPDP
+  UNSPEC_SHILO
+  UNSPEC_MTHLIP
+  UNSPEC_WRDSP
+  UNSPEC_RDDSP
+])
+
+(define_constants
+  [(CCDSP_PO_REGNUM	182)
+   (CCDSP_SC_REGNUM	183)
+   (CCDSP_CA_REGNUM	184)
+   (CCDSP_OU_REGNUM	185)
+   (CCDSP_CC_REGNUM	186)
+   (CCDSP_EF_REGNUM	187)])
+
+;; This mode iterator allows si, v2hi, v4qi for all possible modes in DSP ASE.
+(define_mode_iterator DSP [(SI "ISA_HAS_DSP")
+			   (V2HI "ISA_HAS_DSP")
+		 	   (V4QI "ISA_HAS_DSP")])
+
+;; This mode iterator allows v2hi, v4qi for vector/SIMD data.
+(define_mode_iterator DSPV [(V2HI "ISA_HAS_DSP")
+			    (V4QI "ISA_HAS_DSP")])
+
+;; This mode iterator allows si, v2hi for Q31 and V2Q15 fixed-point data.
+(define_mode_iterator DSPQ [(SI "ISA_HAS_DSP")
+			    (V2HI "ISA_HAS_DSP")])
+
+;; DSP instructions use q for fixed-point data, and u for integer in the infix.
+(define_mode_attr dspfmt1 [(SI "q") (V2HI "q") (V4QI "u")])
+
+;; DSP instructions use nothing for fixed-point data, and u for integer in
+;; the infix.
+(define_mode_attr dspfmt1_1 [(SI "") (V2HI "") (V4QI "u")])
+
+;; DSP instructions use w, ph, qb in the postfix.
+(define_mode_attr dspfmt2 [(SI "w") (V2HI "ph") (V4QI "qb")])
+
+;; DSP shift masks for SI, V2HI, V4QI.
+(define_mode_attr dspshift_mask [(SI "0x1f") (V2HI "0xf") (V4QI "0x7")])
+
+;; MIPS DSP ASE Revision 0.98 3/24/2005
+;; Table 2-1. MIPS DSP ASE Instructions: Arithmetic
+;; ADDQ*
+(define_insn "add<DSPV:mode>3"
+  [(parallel
+    [(set (match_operand:DSPV 0 "register_operand" "=d")
+	  (plus:DSPV (match_operand:DSPV 1 "register_operand" "d")
+		     (match_operand:DSPV 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ))])]
+  "ISA_HAS_DSP"
+  "add<DSPV:dspfmt1>.<DSPV:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_add<DSP:dspfmt1>_s_<DSP:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSP 0 "register_operand" "=d")
+	  (unspec:DSP [(match_operand:DSP 1 "register_operand" "d")
+		       (match_operand:DSP 2 "register_operand" "d")]
+		      UNSPEC_ADDQ_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
+  "ISA_HAS_DSP"
+  "add<DSP:dspfmt1>_s.<DSP:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+;; SUBQ*
+(define_insn "sub<DSPV:mode>3"
+  [(parallel
+    [(set (match_operand:DSPV 0 "register_operand" "=d")
+	  (minus:DSPV (match_operand:DSPV 1 "register_operand" "d")
+		      (match_operand:DSPV 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ))])]
+  "ISA_HAS_DSP"
+  "sub<DSPV:dspfmt1>.<DSPV:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_sub<DSP:dspfmt1>_s_<DSP:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSP 0 "register_operand" "=d")
+	  (unspec:DSP [(match_operand:DSP 1 "register_operand" "d")
+		       (match_operand:DSP 2 "register_operand" "d")]
+		      UNSPEC_SUBQ_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
+  "ISA_HAS_DSP"
+  "sub<DSP:dspfmt1>_s.<DSP:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+;; ADDSC
+(define_insn "mips_addsc"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d")]
+		     UNSPEC_ADDSC))
+     (set (reg:CCDSP CCDSP_CA_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDSC))])]
+  "ISA_HAS_DSP"
+  "addsc\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; ADDWC
+(define_insn "mips_addwc"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d")
+		    (reg:CCDSP CCDSP_CA_REGNUM)]
+		     UNSPEC_ADDWC))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDWC))])]
+  "ISA_HAS_DSP"
+  "addwc\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; MODSUB
+(define_insn "mips_modsub"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")
+		    (match_operand:SI 2 "register_operand" "d")]
+		   UNSPEC_MODSUB))]
+  "ISA_HAS_DSP"
+  "modsub\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; RADDU*
+(define_insn "mips_raddu_w_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")]
+		   UNSPEC_RADDU_W_QB))]
+  "ISA_HAS_DSP"
+  "raddu.w.qb\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; ABSQ*
+(define_insn "mips_absq_s_<DSPQ:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSPQ 0 "register_operand" "=d")
+	  (unspec:DSPQ [(match_operand:DSPQ 1 "register_operand" "d")]
+		       UNSPEC_ABSQ_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1)] UNSPEC_ABSQ_S))])]
+  "ISA_HAS_DSP"
+  "absq_s.<DSPQ:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+;; PRECRQ*
+(define_insn "mips_precrq_qb_ph"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_PRECRQ_QB_PH))]
+  "ISA_HAS_DSP"
+  "precrq.qb.ph\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precrq_ph_w"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d")]
+		     UNSPEC_PRECRQ_PH_W))]
+  "ISA_HAS_DSP"
+  "precrq.ph.w\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precrq_rs_ph_w"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:SI 1 "register_operand" "d")
+			(match_operand:SI 2 "register_operand" "d")]
+		       UNSPEC_PRECRQ_RS_PH_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)]
+			UNSPEC_PRECRQ_RS_PH_W))])]
+  "ISA_HAS_DSP"
+  "precrq_rs.ph.w\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; PRECRQU*
+(define_insn "mips_precrqu_s_qb_ph"
+  [(parallel
+    [(set (match_operand:V4QI 0 "register_operand" "=d")
+	  (unspec:V4QI [(match_operand:V2HI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_PRECRQU_S_QB_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)]
+			UNSPEC_PRECRQU_S_QB_PH))])]
+  "ISA_HAS_DSP"
+  "precrqu_s.qb.ph\t%0,%1,%2"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+;; PRECEQ*
+(define_insn "mips_preceq_w_phl"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "d")]
+		   UNSPEC_PRECEQ_W_PHL))]
+  "ISA_HAS_DSP"
+  "preceq.w.phl\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceq_w_phr"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "d")]
+		   UNSPEC_PRECEQ_W_PHR))]
+  "ISA_HAS_DSP"
+  "preceq.w.phr\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; PRECEQU*
+(define_insn "mips_precequ_ph_qbl"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBL))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbl\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precequ_ph_qbr"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBR))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbr\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precequ_ph_qbla"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBLA))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbla\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precequ_ph_qbra"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBRA))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbra\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; PRECEU*
+(define_insn "mips_preceu_ph_qbl"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBL))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbl\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceu_ph_qbr"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBR))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbr\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceu_ph_qbla"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBLA))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbla\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceu_ph_qbra"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBRA))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbra\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-2. MIPS DSP ASE Instructions: Shift
+;; SHLL*
+(define_insn "mips_shll_<DSPV:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSPV 0 "register_operand" "=d,d")
+	  (unspec:DSPV [(match_operand:DSPV 1 "register_operand" "d,d")
+			(match_operand:SI 2 "arith_operand" "I,d")]
+		       UNSPEC_SHLL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SHLL))])]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2])
+	  & ~(unsigned HOST_WIDE_INT) <DSPV:dspshift_mask>)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & <DSPV:dspshift_mask>);
+      return "shll.<DSPV:dspfmt2>\t%0,%1,%2";
+    }
+  return "shllv.<DSPV:dspfmt2>\t%0,%1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shll_s_<DSPQ:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSPQ 0 "register_operand" "=d,d")
+	  (unspec:DSPQ [(match_operand:DSPQ 1 "register_operand" "d,d")
+			(match_operand:SI 2 "arith_operand" "I,d")]
+		       UNSPEC_SHLL_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SHLL_S))])]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2])
+          & ~(unsigned HOST_WIDE_INT) <DSPQ:dspshift_mask>)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & <DSPQ:dspshift_mask>);
+      return "shll_s.<DSPQ:dspfmt2>\t%0,%1,%2";
+    }
+  return "shllv_s.<DSPQ:dspfmt2>\t%0,%1,%2";
+}
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+;; SHRL*
+(define_insn "mips_shrl_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:V4QI 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRL_QB))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x7)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x7);
+      return "shrl.qb\t%0,%1,%2";
+    }
+  return "shrlv.qb\t%0,%1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; SHRA*
+(define_insn "mips_shra_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_PH))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xf)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0xf);
+      return "shra.ph\t%0,%1,%2";
+    }
+  return "shrav.ph\t%0,%1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shra_r_<DSPQ:dspfmt2>"
+  [(set (match_operand:DSPQ 0 "register_operand" "=d,d")
+	(unspec:DSPQ [(match_operand:DSPQ 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_R))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2])
+	  & ~(unsigned HOST_WIDE_INT) <DSPQ:dspshift_mask>)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & <DSPQ:dspshift_mask>);
+      return "shra_r.<DSPQ:dspfmt2>\t%0,%1,%2";
+    }
+  return "shrav_r.<DSPQ:dspfmt2>\t%0,%1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-3. MIPS DSP ASE Instructions: Multiply
+;; MULEU*
+(define_insn "mips_muleu_s_ph_qbl"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_MULEU_S_PH_QBL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEU_S_PH_QBL))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleu_s.ph.qbl\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_muleu_s_ph_qbr"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_MULEU_S_PH_QBR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEU_S_PH_QBR))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleu_s.ph.qbr\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+;; MULQ*
+(define_insn "mips_mulq_rs_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_MULQ_RS_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_RS_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "mulq_rs.ph\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+;; MULEQ*
+(define_insn "mips_muleq_s_w_phl"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_MULEQ_S_W_PHL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEQ_S_W_PHL))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleq_s.w.phl\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_muleq_s_w_phr"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_MULEQ_S_W_PHR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEQ_S_W_PHR))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleq_s.w.phr\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+;; DPAU*
+(define_insn "mips_dpau_h_qbl"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPAU_H_QBL))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpau.h.qbl\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpau_h_qbr"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPAU_H_QBR))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpau.h.qbr\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; DPSU*
+(define_insn "mips_dpsu_h_qbl"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPSU_H_QBL))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsu.h.qbl\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsu_h_qbr"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPSU_H_QBR))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsu.h.qbr\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; DPAQ*
+(define_insn "mips_dpaq_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_DPAQ_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQ_S_W_PH))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpaq_s.w.ph\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; DPSQ*
+(define_insn "mips_dpsq_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_DPSQ_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQ_S_W_PH))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsq_s.w.ph\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; MULSAQ*
+(define_insn "mips_mulsaq_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MULSAQ_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MULSAQ_S_W_PH))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "mulsaq_s.w.ph\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; DPAQ*
+(define_insn "mips_dpaq_sa_l_w"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:SI 2 "register_operand" "d")
+		      (match_operand:SI 3 "register_operand" "d")]
+		     UNSPEC_DPAQ_SA_L_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpaq_sa.l.w\t%q0,%2,%3"
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; DPSQ*
+(define_insn "mips_dpsq_sa_l_w"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:SI 2 "register_operand" "d")
+		      (match_operand:SI 3 "register_operand" "d")]
+		     UNSPEC_DPSQ_SA_L_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsq_sa.l.w\t%q0,%2,%3"
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; MAQ*
+(define_insn "mips_maq_s_w_phl"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_S_W_PHL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_S_W_PHL))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_s.w.phl\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_maq_s_w_phr"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_S_W_PHR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_S_W_PHR))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_s.w.phr\t%q0,%2,%3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; MAQ_SA*
+(define_insn "mips_maq_sa_w_phl"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_SA_W_PHL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_SA_W_PHL))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_sa.w.phl\t%q0,%2,%3"
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_maq_sa_w_phr"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_SA_W_PHR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_SA_W_PHR))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_sa.w.phr\t%q0,%2,%3"
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-4. MIPS DSP ASE Instructions: General Bit/Manipulation
+;; BITREV
+(define_insn "mips_bitrev"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")]
+		   UNSPEC_BITREV))]
+  "ISA_HAS_DSP"
+  "bitrev\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; INSV
+(define_insn "mips_insv"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "register_operand" "d")
+		    (reg:CCDSP CCDSP_SC_REGNUM)
+		    (reg:CCDSP CCDSP_PO_REGNUM)]
+		   UNSPEC_INSV))]
+  "ISA_HAS_DSP"
+  "insv\t%0,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; REPL*
+(define_insn "mips_repl_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:SI 1 "arith_operand" "I,d")]
+		     UNSPEC_REPL_QB))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[1]) & ~(unsigned HOST_WIDE_INT) 0xff)
+	operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+      return "repl.qb\t%0,%1";
+    }
+  return "replv.qb\t%0,%1";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_repl_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(unspec:V2HI [(match_operand:SI 1 "reg_imm10_operand" "YB,d")]
+		     UNSPEC_REPL_PH))]
+  "ISA_HAS_DSP"
+  "@
+   repl.ph\t%0,%1
+   replv.ph\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-5. MIPS DSP ASE Instructions: Compare-Pick
+;; CMPU.* CMP.*
+(define_insn "mips_cmp<DSPV:dspfmt1_1>_eq_<DSPV:dspfmt2>"
+  [(set (reg:CCDSP CCDSP_CC_REGNUM)
+	(unspec:CCDSP [(match_operand:DSPV 0 "register_operand" "d")
+		       (match_operand:DSPV 1 "register_operand" "d")
+		       (reg:CCDSP CCDSP_CC_REGNUM)]
+		      UNSPEC_CMP_EQ))]
+  "ISA_HAS_DSP"
+  "cmp<DSPV:dspfmt1_1>.eq.<DSPV:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmp<DSPV:dspfmt1_1>_lt_<DSPV:dspfmt2>"
+  [(set (reg:CCDSP CCDSP_CC_REGNUM)
+	(unspec:CCDSP [(match_operand:DSPV 0 "register_operand" "d")
+		       (match_operand:DSPV 1 "register_operand" "d")
+		       (reg:CCDSP CCDSP_CC_REGNUM)]
+		      UNSPEC_CMP_LT))]
+  "ISA_HAS_DSP"
+  "cmp<DSPV:dspfmt1_1>.lt.<DSPV:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmp<DSPV:dspfmt1_1>_le_<DSPV:dspfmt2>"
+  [(set (reg:CCDSP CCDSP_CC_REGNUM)
+	(unspec:CCDSP [(match_operand:DSPV 0 "register_operand" "d")
+		       (match_operand:DSPV 1 "register_operand" "d")
+		       (reg:CCDSP CCDSP_CC_REGNUM)]
+		      UNSPEC_CMP_LE))]
+  "ISA_HAS_DSP"
+  "cmp<DSPV:dspfmt1_1>.le.<DSPV:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgu_eq_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")
+		    (match_operand:V4QI 2 "register_operand" "d")]
+		   UNSPEC_CMPGU_EQ_QB))]
+  "ISA_HAS_DSP"
+  "cmpgu.eq.qb\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgu_lt_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")
+		    (match_operand:V4QI 2 "register_operand" "d")]
+		   UNSPEC_CMPGU_LT_QB))]
+  "ISA_HAS_DSP"
+  "cmpgu.lt.qb\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgu_le_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")
+		    (match_operand:V4QI 2 "register_operand" "d")]
+		   UNSPEC_CMPGU_LE_QB))]
+  "ISA_HAS_DSP"
+  "cmpgu.le.qb\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; PICK*
+(define_insn "mips_pick_<DSPV:dspfmt2>"
+  [(set (match_operand:DSPV 0 "register_operand" "=d")
+	(unspec:DSPV [(match_operand:DSPV 1 "register_operand" "d")
+		      (match_operand:DSPV 2 "register_operand" "d")
+		      (reg:CCDSP CCDSP_CC_REGNUM)]
+		     UNSPEC_PICK))]
+  "ISA_HAS_DSP"
+  "pick.<DSPV:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; PACKRL*
+(define_insn "mips_packrl_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_PACKRL_PH))]
+  "ISA_HAS_DSP"
+  "packrl.ph\t%0,%1,%2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-6. MIPS DSP ASE Instructions: Accumulator and DSPControl Access
+;; EXTR*
+(define_insn "mips_extr_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr.w\t%0,%q1,%2";
+    }
+  return "extrv.w\t%0,%q1,%2";
+}
+  [(set_attr "type"	"accext")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_extr_r_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_R_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_R_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr_r.w\t%0,%q1,%2";
+    }
+  return "extrv_r.w\t%0,%q1,%2";
+}
+  [(set_attr "type"	"accext")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_extr_rs_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_RS_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_RS_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr_rs.w\t%0,%q1,%2";
+    }
+  return "extrv_rs.w\t%0,%q1,%2";
+}
+  [(set_attr "type"	"accext")
+   (set_attr "mode"	"SI")])
+
+;; EXTR*_S.H
+(define_insn "mips_extr_s_h"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_S_H))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_S_H))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr_s.h\t%0,%q1,%2";
+    }
+  return "extrv_s.h\t%0,%q1,%2";
+}
+  [(set_attr "type"	"accext")
+   (set_attr "mode"	"SI")])
+
+;; EXTP*
+(define_insn "mips_extp"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")
+		      (reg:CCDSP CCDSP_PO_REGNUM)]
+		     UNSPEC_EXTP))
+     (set (reg:CCDSP CCDSP_EF_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTP))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extp\t%0,%q1,%2";
+    }
+  return "extpv\t%0,%q1,%2";
+}
+  [(set_attr "type"	"accext")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_extpdp"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")
+		      (reg:CCDSP CCDSP_PO_REGNUM)]
+		     UNSPEC_EXTPDP))
+     (set (reg:CCDSP CCDSP_PO_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_PO_REGNUM)] UNSPEC_EXTPDP))
+     (set (reg:CCDSP CCDSP_EF_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTPDP))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extpdp\t%0,%q1,%2";
+    }
+  return "extpdpv\t%0,%q1,%2";
+}
+  [(set_attr "type"	"accext")
+   (set_attr "mode"	"SI")])
+
+;; SHILO*
+(define_insn "mips_shilo"
+  [(set (match_operand:DI 0 "register_operand" "=a,a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0,0")
+		    (match_operand:SI 2 "arith_operand" "I,d")]
+		   UNSPEC_SHILO))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) < -32 || INTVAL (operands[2]) > 31)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+      return "shilo\t%q0,%2";
+    }
+  return "shilov\t%q0,%2";
+}
+  [(set_attr "type"	"accmod")
+   (set_attr "mode"	"SI")])
+
+;; MTHLIP*
+(define_insn "mips_mthlip"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:SI 2 "register_operand" "d")
+		      (reg:CCDSP CCDSP_PO_REGNUM)]
+		     UNSPEC_MTHLIP))
+     (set (reg:CCDSP CCDSP_PO_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_PO_REGNUM)] UNSPEC_MTHLIP))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "mthlip\t%2,%q0"
+  [(set_attr "type"	"accmod")
+   (set_attr "mode"	"SI")])
+
+;; WRDSP
+(define_insn "mips_wrdsp"
+  [(parallel
+    [(set (reg:CCDSP CCDSP_PO_REGNUM)
+	  (unspec:CCDSP [(match_operand:SI 0 "register_operand" "d")
+			 (match_operand:SI 1 "const_uimm6_operand" "YA")]
+			 UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_SC_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_CA_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_EF_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))])]
+  "ISA_HAS_DSP"
+  "wrdsp\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; RDDSP
+(define_insn "mips_rddsp"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "const_uimm6_operand" "YA")
+		    (reg:CCDSP CCDSP_PO_REGNUM)
+		    (reg:CCDSP CCDSP_SC_REGNUM)
+		    (reg:CCDSP CCDSP_CA_REGNUM)
+		    (reg:CCDSP CCDSP_OU_REGNUM)
+		    (reg:CCDSP CCDSP_CC_REGNUM)
+		    (reg:CCDSP CCDSP_EF_REGNUM)]
+		   UNSPEC_RDDSP))]
+  "ISA_HAS_DSP"
+  "rddsp\t%0,%1"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-7. MIPS DSP ASE Instructions: Indexed-Load
+;; L*X
+(define_expand "mips_lbux"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "ISA_HAS_DSP"
+{
+  operands[2] = convert_to_mode (Pmode, operands[2], false);
+  emit_insn (PMODE_INSN (gen_mips_lbux_extsi,
+			 (operands[0], operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn "mips_l<SHORT:size><u>x_ext<GPR:mode>_<P:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+   	(any_extend:GPR
+	  (mem:SHORT (plus:P (match_operand:P 1 "register_operand" "d")
+			     (match_operand:P 2 "register_operand" "d")))))]
+  "ISA_HAS_L<SHORT:SIZE><U>X"
+  "l<SHORT:size><u>x\t%0,%2(%1)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"<GPR:MODE>")])
+
+(define_expand "mips_lhx"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "ISA_HAS_DSP"
+{
+  operands[2] = convert_to_mode (Pmode, operands[2], false);
+  emit_insn (PMODE_INSN (gen_mips_lhx_extsi,
+			 (operands[0], operands[1], operands[2])));
+  DONE;
+})
+
+(define_expand "mips_l<size>x"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "ISA_HAS_DSP"
+{
+  operands[2] = convert_to_mode (Pmode, operands[2], false);
+  emit_insn (PMODE_INSN (gen_mips_l<size>x,
+			 (operands[0], operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn "mips_l<GPR:size>x_<P:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(mem:GPR (plus:P (match_operand:P 1 "register_operand" "d")
+			 (match_operand:P 2 "register_operand" "d"))))]
+  "ISA_HAS_L<GPR:SIZE>X"
+  "l<GPR:size>x\t%0,%2(%1)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"<GPR:MODE>")])
+
+(define_insn "*mips_lw<u>x_<P:mode>_ext"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+   	(any_extend:DI
+	  (mem:SI (plus:P (match_operand:P 1 "register_operand" "d")
+			     (match_operand:P 2 "register_operand" "d")))))]
+  "ISA_HAS_LW<U>X && TARGET_64BIT"
+  "lw<u>x\t%0,%2(%1)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"DI")])
+
+;; Table 2-8. MIPS DSP ASE Instructions: Branch
+;; BPOSGE32
+(define_insn "mips_bposge"
+  [(set (pc)
+	(if_then_else (ge (reg:CCDSP CCDSP_PO_REGNUM)
+			  (match_operand:SI 1 "immediate_operand" "I"))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "ISA_HAS_DSP"
+  "%*bposge%1\t%0%/"
+  [(set_attr "type"	"branch")])
+
+(define_expand "mips_madd<u>"
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI
+	 (mult:DI (any_extend:DI (match_operand:SI 2 "register_operand"))
+		  (any_extend:DI (match_operand:SI 3 "register_operand")))
+	 (match_operand:DI 1 "register_operand")))]
+  "ISA_HAS_DSP && !TARGET_64BIT")
+
+(define_expand "mips_msub<u>"
+  [(set (match_operand:DI 0 "register_operand")
+	(minus:DI
+	 (match_operand:DI 1 "register_operand")
+	 (mult:DI (any_extend:DI (match_operand:SI 2 "register_operand"))
+		  (any_extend:DI (match_operand:SI 3 "register_operand")))))]
+  "ISA_HAS_DSP && !TARGET_64BIT")
diff --git a/gcc-4.9/gcc/config/mips/mips-dspr2.md b/gcc-4.9/gcc/config/mips/mips-dspr2.md
new file mode 100644
index 000000000..3fe401f9f
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-dspr2.md
@@ -0,0 +1,632 @@
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+; MIPS DSP ASE REV 2 Revision 0.02 11/24/2006
+
+(define_c_enum "unspec" [
+  UNSPEC_ABSQ_S_QB
+  UNSPEC_ADDU_PH
+  UNSPEC_ADDU_S_PH
+  UNSPEC_ADDUH_QB
+  UNSPEC_ADDUH_R_QB
+  UNSPEC_APPEND
+  UNSPEC_BALIGN
+  UNSPEC_CMPGDU_EQ_QB
+  UNSPEC_CMPGDU_LT_QB
+  UNSPEC_CMPGDU_LE_QB
+  UNSPEC_DPA_W_PH
+  UNSPEC_DPS_W_PH
+  UNSPEC_MADD
+  UNSPEC_MADDU
+  UNSPEC_MSUB
+  UNSPEC_MSUBU
+  UNSPEC_MUL_PH
+  UNSPEC_MUL_S_PH
+  UNSPEC_MULQ_RS_W
+  UNSPEC_MULQ_S_PH
+  UNSPEC_MULQ_S_W
+  UNSPEC_MULSA_W_PH
+  UNSPEC_MULT
+  UNSPEC_MULTU
+  UNSPEC_PRECR_QB_PH
+  UNSPEC_PRECR_SRA_PH_W
+  UNSPEC_PRECR_SRA_R_PH_W
+  UNSPEC_PREPEND
+  UNSPEC_SHRA_QB
+  UNSPEC_SHRA_R_QB
+  UNSPEC_SHRL_PH
+  UNSPEC_SUBU_PH
+  UNSPEC_SUBU_S_PH
+  UNSPEC_SUBUH_QB
+  UNSPEC_SUBUH_R_QB
+  UNSPEC_ADDQH_PH
+  UNSPEC_ADDQH_R_PH
+  UNSPEC_ADDQH_W
+  UNSPEC_ADDQH_R_W
+  UNSPEC_SUBQH_PH
+  UNSPEC_SUBQH_R_PH
+  UNSPEC_SUBQH_W
+  UNSPEC_SUBQH_R_W
+  UNSPEC_DPAX_W_PH
+  UNSPEC_DPSX_W_PH
+  UNSPEC_DPAQX_S_W_PH
+  UNSPEC_DPAQX_SA_W_PH
+  UNSPEC_DPSQX_S_W_PH
+  UNSPEC_DPSQX_SA_W_PH
+])
+
+(define_insn "mips_absq_s_qb"
+  [(parallel
+    [(set (match_operand:V4QI 0 "register_operand" "=d")
+	  (unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")]
+		       UNSPEC_ABSQ_S_QB))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1)] UNSPEC_ABSQ_S_QB))])]
+  "ISA_HAS_DSPR2"
+  "absq_s.qb\t%0,%z1"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addu_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (plus:V2HI (match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		     (match_operand:V2HI 2 "reg_or_0_operand" "dYG")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDU_PH))])]
+  "ISA_HAS_DSPR2"
+  "addu.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addu_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_ADDU_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDU_S_PH))])]
+  "ISA_HAS_DSPR2"
+  "addu_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_adduh_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDUH_QB))]
+  "ISA_HAS_DSPR2"
+  "adduh.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_adduh_r_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDUH_R_QB))]
+  "ISA_HAS_DSPR2"
+  "adduh_r.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_append"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_APPEND))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "append\t%0,%z2,%3";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_balign"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_BALIGN))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 3)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 3);
+  return "balign\t%0,%z2,%3";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgdu_eq_qb"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_CMPGDU_EQ_QB))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_CC_REGNUM)]
+			UNSPEC_CMPGDU_EQ_QB))])]
+  "ISA_HAS_DSPR2"
+  "cmpgdu.eq.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgdu_lt_qb"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_CMPGDU_LT_QB))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_CC_REGNUM)]
+			UNSPEC_CMPGDU_LT_QB))])]
+  "ISA_HAS_DSPR2"
+  "cmpgdu.lt.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgdu_le_qb"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_CMPGDU_LE_QB))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_CC_REGNUM)]
+			UNSPEC_CMPGDU_LE_QB))])]
+  "ISA_HAS_DSPR2"
+  "cmpgdu.le.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpa_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPA_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dps_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPS_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dps.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mulv2hi3"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (mult:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		     (match_operand:V2HI 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MUL_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mul.ph\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mul_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_MUL_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MUL_S_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mul_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulq_rs_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		     UNSPEC_MULQ_RS_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_RS_W))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mulq_rs.w\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulq_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_MULQ_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_S_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mulq_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulq_s_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		     UNSPEC_MULQ_S_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_S_W))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mulq_s.w\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulsa_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_MULSA_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "mulsa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precr_qb_ph"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_PRECR_QB_PH))]
+  "ISA_HAS_DSPR2"
+  "precr.qb.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precr_sra_ph_w"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:SI 1 "register_operand" "0")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_PRECR_SRA_PH_W))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "precr_sra.ph.w\t%0,%z2,%3";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precr_sra_r_ph_w"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:SI 1 "register_operand" "0")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_PRECR_SRA_R_PH_W))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "precr_sra_r.ph.w\t%0,%z2,%3";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_prepend"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_PREPEND))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[3] = GEN_INT (INTVAL (operands[3]) & 31);
+  return "prepend\t%0,%z2,%3";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shra_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG,dYG")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_QB))]
+  "ISA_HAS_DSPR2"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 7)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 7);
+      return "shra.qb\t%0,%z1,%2";
+    }
+  return "shrav.qb\t%0,%z1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+
+(define_insn "mips_shra_r_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG,dYG")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_R_QB))]
+  "ISA_HAS_DSPR2"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 7)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 7);
+      return "shra_r.qb\t%0,%z1,%2";
+    }
+  return "shrav_r.qb\t%0,%z1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shrl_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG,dYG")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRL_PH))]
+  "ISA_HAS_DSPR2"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 15)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 15);
+      return "shrl.ph\t%0,%z1,%2";
+    }
+  return "shrlv.ph\t%0,%z1,%2";
+}
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subu_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_SUBU_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBU_PH))])]
+  "ISA_HAS_DSPR2"
+  "subu.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subu_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_SUBU_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBU_S_PH))])]
+  "ISA_HAS_DSPR2"
+  "subu_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalusat")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subuh_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBUH_QB))]
+  "ISA_HAS_DSPR2"
+  "subuh.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subuh_r_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBUH_R_QB))]
+  "ISA_HAS_DSPR2"
+  "subuh_r.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDQH_PH))]
+  "ISA_HAS_DSPR2"
+  "addqh.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_r_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDQH_R_PH))]
+  "ISA_HAS_DSPR2"
+  "addqh_r.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_ADDQH_W))]
+  "ISA_HAS_DSPR2"
+  "addqh.w\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_r_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_ADDQH_R_W))]
+  "ISA_HAS_DSPR2"
+  "addqh_r.w\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBQH_PH))]
+  "ISA_HAS_DSPR2"
+  "subqh.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_r_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBQH_R_PH))]
+  "ISA_HAS_DSPR2"
+  "subqh_r.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_SUBQH_W))]
+  "ISA_HAS_DSPR2"
+  "subqh.w\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_r_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_SUBQH_R_W))]
+  "ISA_HAS_DSPR2"
+  "subqh_r.w\t%0,%z1,%z2"
+  [(set_attr "type"	"dspalu")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpax_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPAX_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpax.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsx_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPSX_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpsx.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpaqx_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPAQX_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQX_S_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpaqx_s.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpaqx_sa_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPAQX_SA_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQX_SA_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpaqx_sa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsqx_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPSQX_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQX_S_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpsqx_s.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmac")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsqx_sa_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPSQX_SA_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQX_SA_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpsqx_sa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "accum_in" "1")
+   (set_attr "mode"	"SI")])
diff --git a/gcc-4.9/gcc/config/mips/mips-fixed.md b/gcc-4.9/gcc/config/mips/mips-fixed.md
new file mode 100644
index 000000000..dbb60f4ca
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-fixed.md
@@ -0,0 +1,156 @@
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; This file contains MIPS instructions that support fixed-point operations.
+
+;; All supported fixed-point modes
+(define_mode_iterator FIXED [(QQ "") (HQ "") (SQ "") (DQ "TARGET_64BIT")
+			     (UQQ "") (UHQ "") (USQ "") (UDQ "TARGET_64BIT")
+			     (HA "") (SA "") (DA "TARGET_64BIT")
+			     (UHA "") (USA "") (UDA "TARGET_64BIT")])
+
+;; For signed add/sub with saturation
+(define_mode_iterator ADDSUB [(HQ "") (SQ "") (HA "") (SA "") (V2HQ "")
+			      (V2HA "")])
+(define_mode_attr addsubfmt [(HQ "ph") (SQ "w") (HA "ph") (SA "w")
+			     (V2HQ "ph") (V2HA "ph")])
+
+;; For unsigned add/sub with saturation
+(define_mode_iterator UADDSUB [(UQQ "ISA_HAS_DSP") (UHQ "ISA_HAS_DSPR2")
+			       (UHA "ISA_HAS_DSPR2") (V4UQQ "ISA_HAS_DSP")
+			       (V2UHQ "ISA_HAS_DSPR2") (V2UHA "ISA_HAS_DSPR2")])
+(define_mode_attr uaddsubfmt [(UQQ "qb") (UHQ "ph") (UHA "ph")
+			      (V4UQQ "qb") (V2UHQ "ph") (V2UHA "ph")])
+
+;; For signed multiplication with saturation
+(define_mode_iterator MULQ [(V2HQ "ISA_HAS_DSP") (HQ "ISA_HAS_DSP")
+			    (SQ "ISA_HAS_DSPR2")])
+(define_mode_attr mulqfmt [(V2HQ "ph") (HQ "ph") (SQ "w")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:FIXED 0 "register_operand" "=d")
+	(plus:FIXED (match_operand:FIXED 1 "register_operand" "d")
+		    (match_operand:FIXED 2 "register_operand" "d")))]
+  ""
+  "<d>addu\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "usadd<mode>3"
+  [(parallel
+    [(set (match_operand:UADDSUB 0 "register_operand" "=d")
+	  (us_plus:UADDSUB (match_operand:UADDSUB 1 "register_operand" "d")
+			   (match_operand:UADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
+  ""
+  "addu_s.<uaddsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "ssadd<mode>3"
+  [(parallel
+    [(set (match_operand:ADDSUB 0 "register_operand" "=d")
+	  (ss_plus:ADDSUB (match_operand:ADDSUB 1 "register_operand" "d")
+			  (match_operand:ADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
+  "ISA_HAS_DSP"
+  "addq_s.<addsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:FIXED 0 "register_operand" "=d")
+        (minus:FIXED (match_operand:FIXED 1 "register_operand" "d")
+		     (match_operand:FIXED 2 "register_operand" "d")))]
+  ""
+  "<d>subu\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "ussub<mode>3"
+  [(parallel
+    [(set (match_operand:UADDSUB 0 "register_operand" "=d")
+	  (us_minus:UADDSUB (match_operand:UADDSUB 1 "register_operand" "d")
+			    (match_operand:UADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
+  ""
+  "subu_s.<uaddsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "sssub<mode>3"
+  [(parallel
+    [(set (match_operand:ADDSUB 0 "register_operand" "=d")
+	  (ss_minus:ADDSUB (match_operand:ADDSUB 1 "register_operand" "d")
+			   (match_operand:ADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
+  "ISA_HAS_DSP"
+  "subq_s.<addsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "ssmul<mode>3"
+  [(parallel
+    [(set (match_operand:MULQ 0 "register_operand" "=d")
+          (ss_mult:MULQ (match_operand:MULQ 1 "register_operand" "d")
+			(match_operand:MULQ 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+          (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_RS_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  ""
+  "mulq_rs.<mulqfmt>\t%0,%1,%2"
+  [(set_attr "type"     "imul3")
+   (set_attr "mode"     "<IMODE>")])
+
+(define_insn "ssmaddsqdq4"
+  [(parallel
+    [(set (match_operand:DQ 0 "register_operand" "=a")
+	  (ss_plus:DQ
+	  (ss_mult:DQ (sat_fract:DQ (match_operand:SQ 1
+				     "register_operand" "d"))
+                      (sat_fract:DQ (match_operand:SQ 2
+				     "register_operand" "d")))
+          (match_operand:DQ 3 "register_operand" "0")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpaq_sa.l.w\t%q0,%1,%2"
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
+
+(define_insn "ssmsubsqdq4"
+  [(parallel
+    [(set (match_operand:DQ 0 "register_operand" "=a")
+          (ss_minus:DQ
+	   (match_operand:DQ 3 "register_operand" "0")
+           (ss_mult:DQ (sat_fract:DQ (match_operand:SQ 1
+				      "register_operand" "d"))
+                       (sat_fract:DQ (match_operand:SQ 2
+				      "register_operand" "d")))))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsq_sa.l.w\t%q0,%1,%2"
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
diff --git a/gcc-4.9/gcc/config/mips/mips-ftypes.def b/gcc-4.9/gcc/config/mips/mips-ftypes.def
new file mode 100644
index 000000000..078a595a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-ftypes.def
@@ -0,0 +1,129 @@
+/* Definitions of prototypes for MIPS built-in functions.  -*- C -*-
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Invoke DEF_MIPS_FTYPE (NARGS, LIST) for each prototype used by
+   MIPS built-in functions, where:
+
+      NARGS is the number of arguments.
+      LIST contains the return-type code followed by the codes for each
+        argument type.
+
+   Argument- and return-type codes are either modes or one of the following:
+
+      VOID for void_type_node
+      INT for integer_type_node
+      POINTER for ptr_type_node
+
+   (we don't use PTR because that's a ANSI-compatibillity macro).
+
+   Please keep this list lexicographically sorted by the LIST argument.  */
+DEF_MIPS_FTYPE (1, (DF, DF))
+DEF_MIPS_FTYPE (2, (DF, DF, DF))
+
+DEF_MIPS_FTYPE (2, (DI, DI, DI))
+DEF_MIPS_FTYPE (2, (DI, DI, SI))
+DEF_MIPS_FTYPE (3, (DI, DI, SI, SI))
+DEF_MIPS_FTYPE (3, (DI, DI, USI, USI))
+DEF_MIPS_FTYPE (3, (DI, DI, V2HI, V2HI))
+DEF_MIPS_FTYPE (3, (DI, DI, V4QI, V4QI))
+DEF_MIPS_FTYPE (2, (DI, SI, SI))
+DEF_MIPS_FTYPE (2, (DI, USI, USI))
+
+DEF_MIPS_FTYPE (2, (INT, DF, DF))
+DEF_MIPS_FTYPE (2, (INT, SF, SF))
+DEF_MIPS_FTYPE (2, (INT, V2SF, V2SF))
+DEF_MIPS_FTYPE (4, (INT, V2SF, V2SF, V2SF, V2SF))
+
+DEF_MIPS_FTYPE (2, (SI, DI, SI))
+DEF_MIPS_FTYPE (2, (SI, POINTER, SI))
+DEF_MIPS_FTYPE (2, (DI, POINTER, SI))
+DEF_MIPS_FTYPE (1, (SI, SI))
+DEF_MIPS_FTYPE (2, (SI, SI, SI))
+DEF_MIPS_FTYPE (3, (SI, SI, SI, SI))
+DEF_MIPS_FTYPE (1, (SI, V2HI))
+DEF_MIPS_FTYPE (2, (SI, V2HI, V2HI))
+DEF_MIPS_FTYPE (1, (SI, V4QI))
+DEF_MIPS_FTYPE (2, (SI, V4QI, V4QI))
+DEF_MIPS_FTYPE (1, (SI, VOID))
+
+DEF_MIPS_FTYPE (1, (SF, SF))
+DEF_MIPS_FTYPE (2, (SF, SF, SF))
+DEF_MIPS_FTYPE (1, (SF, V2SF))
+
+DEF_MIPS_FTYPE (2, (UDI, UDI, UDI))
+DEF_MIPS_FTYPE (2, (UDI, UV2SI, UV2SI))
+
+DEF_MIPS_FTYPE (1, (USI, VOID))
+
+DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UQI))
+DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UV2SI))
+
+DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, UQI))
+DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, USI))
+DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, UQI))
+DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, USI))
+DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, UV4HI))
+DEF_MIPS_FTYPE (1, (UV4HI, UV8QI))
+DEF_MIPS_FTYPE (2, (UV4HI, UV8QI, UV8QI))
+
+DEF_MIPS_FTYPE (2, (UV8QI, UV4HI, UV4HI))
+DEF_MIPS_FTYPE (1, (UV8QI, UV8QI))
+DEF_MIPS_FTYPE (2, (UV8QI, UV8QI, UV8QI))
+
+DEF_MIPS_FTYPE (1, (V2HI, SI))
+DEF_MIPS_FTYPE (2, (V2HI, SI, SI))
+DEF_MIPS_FTYPE (3, (V2HI, SI, SI, SI))
+DEF_MIPS_FTYPE (1, (V2HI, V2HI))
+DEF_MIPS_FTYPE (2, (V2HI, V2HI, SI))
+DEF_MIPS_FTYPE (2, (V2HI, V2HI, V2HI))
+DEF_MIPS_FTYPE (1, (V2HI, V4QI))
+DEF_MIPS_FTYPE (2, (V2HI, V4QI, V2HI))
+
+DEF_MIPS_FTYPE (2, (V2SF, SF, SF))
+DEF_MIPS_FTYPE (1, (V2SF, V2SF))
+DEF_MIPS_FTYPE (2, (V2SF, V2SF, V2SF))
+DEF_MIPS_FTYPE (3, (V2SF, V2SF, V2SF, INT))
+DEF_MIPS_FTYPE (4, (V2SF, V2SF, V2SF, V2SF, V2SF))
+
+DEF_MIPS_FTYPE (2, (V2SI, V2SI, UQI))
+DEF_MIPS_FTYPE (2, (V2SI, V2SI, V2SI))
+DEF_MIPS_FTYPE (2, (V2SI, V4HI, V4HI))
+
+DEF_MIPS_FTYPE (2, (V4HI, V2SI, V2SI))
+DEF_MIPS_FTYPE (2, (V4HI, V4HI, UQI))
+DEF_MIPS_FTYPE (2, (V4HI, V4HI, USI))
+DEF_MIPS_FTYPE (2, (V4HI, V4HI, V4HI))
+DEF_MIPS_FTYPE (3, (V4HI, V4HI, V4HI, UQI))
+DEF_MIPS_FTYPE (3, (V4HI, V4HI, V4HI, USI))
+
+DEF_MIPS_FTYPE (1, (V4QI, SI))
+DEF_MIPS_FTYPE (2, (V4QI, V2HI, V2HI))
+DEF_MIPS_FTYPE (1, (V4QI, V4QI))
+DEF_MIPS_FTYPE (2, (V4QI, V4QI, SI))
+DEF_MIPS_FTYPE (2, (V4QI, V4QI, V4QI))
+
+DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI))
+DEF_MIPS_FTYPE (1, (V8QI, V8QI))
+DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI))
+
+DEF_MIPS_FTYPE (2, (VOID, SI, CVPOINTER))
+DEF_MIPS_FTYPE (2, (VOID, SI, SI))
+DEF_MIPS_FTYPE (1, (VOID, USI))
+DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
+DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
diff --git a/gcc-4.9/gcc/config/mips/mips-modes.def b/gcc-4.9/gcc/config/mips/mips-modes.def
new file mode 100644
index 000000000..fa1d1e7d6
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-modes.def
@@ -0,0 +1,48 @@
+/* MIPS extra machine modes. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);        /* V4QI  V2HI      */
+VECTOR_MODES (INT, 8);        /* V8QI  V4HI V2SI */
+VECTOR_MODES (FLOAT, 8);      /*       V4HF V2SF */
+
+/* Double-sized vector modes for vec_concat.  */
+VECTOR_MODE (INT, QI, 16);    /* V16QI           */
+VECTOR_MODE (INT, HI, 8);     /*       V8HI      */
+VECTOR_MODE (INT, SI, 4);     /*            V4SI */
+VECTOR_MODE (FLOAT, SF, 4);   /*            V4SF */
+
+VECTOR_MODES (FRACT, 4);	/* V4QQ  V2HQ */
+VECTOR_MODES (UFRACT, 4);	/* V4UQQ V2UHQ */
+VECTOR_MODES (ACCUM, 4);	/*       V2HA */
+VECTOR_MODES (UACCUM, 4);	/*       V2UHA */
+
+/* Paired single comparison instructions use 2 or 4 CC.  */
+CC_MODE (CCV2);
+ADJUST_BYTESIZE (CCV2, 8);
+ADJUST_ALIGNMENT (CCV2, 8);
+
+CC_MODE (CCV4);
+ADJUST_BYTESIZE (CCV4, 16);
+ADJUST_ALIGNMENT (CCV4, 16);
+
+/* For MIPS DSP control registers.  */
+CC_MODE (CCDSP);
diff --git a/gcc-4.9/gcc/config/mips/mips-opts.h b/gcc-4.9/gcc/config/mips/mips-opts.h
new file mode 100644
index 000000000..be288d64c
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-opts.h
@@ -0,0 +1,50 @@
+/* Definitions for option handling for MIPS.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef MIPS_OPTS_H
+#define MIPS_OPTS_H
+
+/* Enumerates the setting of the -mcode-readable option.  */
+enum mips_code_readable_setting {
+  CODE_READABLE_NO,
+  CODE_READABLE_PCREL,
+  CODE_READABLE_YES
+};
+
+/* Enumerates the setting of the -mabs and -mnan options.  */
+enum mips_ieee_754_setting {
+  MIPS_IEEE_754_DEFAULT,
+  MIPS_IEEE_754_LEGACY,
+  MIPS_IEEE_754_2008
+};
+
+/* Enumerates the setting of the -mr10k-cache-barrier option.  */
+enum mips_r10k_cache_barrier_setting {
+  R10K_CACHE_BARRIER_NONE,
+  R10K_CACHE_BARRIER_STORE,
+  R10K_CACHE_BARRIER_LOAD_STORE
+};
+
+/* No enumeration is defined to index the -march= values (entries in
+   mips_cpu_info_table), with the type int being used instead, but we
+   need to distinguish the special "from-abi" and "native" values.  */
+#define MIPS_ARCH_OPTION_FROM_ABI -1
+#define MIPS_ARCH_OPTION_NATIVE -2
+
+#endif
diff --git a/gcc-4.9/gcc/config/mips/mips-protos.h b/gcc-4.9/gcc/config/mips/mips-protos.h
new file mode 100644
index 000000000..3d59b7b51
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-protos.h
@@ -0,0 +1,362 @@
+/* Prototypes of target machine for GNU compiler.  MIPS version.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+   Contributed by A. Lichnewsky (lich@inria.inria.fr).
+   Changed by Michael Meissner	(meissner@osf.org).
+   64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
+   Brendan Eich (brendan@microunity.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MIPS_PROTOS_H
+#define GCC_MIPS_PROTOS_H
+
+/* Describes how a symbol is used.
+
+   SYMBOL_CONTEXT_CALL
+       The symbol is used as the target of a call instruction.
+
+   SYMBOL_CONTEXT_LEA
+       The symbol is used in a load-address operation.
+
+   SYMBOL_CONTEXT_MEM
+       The symbol is used as the address in a MEM.  */
+enum mips_symbol_context {
+  SYMBOL_CONTEXT_CALL,
+  SYMBOL_CONTEXT_LEA,
+  SYMBOL_CONTEXT_MEM
+};
+
+/* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address.
+
+   SYMBOL_ABSOLUTE
+       The symbol's value will be calculated using absolute relocations,
+       such as %hi and %lo.
+
+   SYMBOL_GP_RELATIVE
+       The symbol's value will be calculated by adding a 16-bit offset
+       from $gp.
+
+   SYMBOL_PC_RELATIVE
+       The symbol's value will be calculated using a MIPS16 PC-relative
+       calculation.
+
+   SYMBOL_GOT_PAGE_OFST
+       The symbol's value will be calculated by loading an address
+       from the GOT and then applying a 16-bit offset.
+
+   SYMBOL_GOT_DISP
+       The symbol's value will be loaded directly from the GOT.
+
+   SYMBOL_GOTOFF_PAGE
+       An UNSPEC wrapper around a SYMBOL_GOT_PAGE_OFST.  It represents the
+       offset from _gp of the GOT entry.
+
+   SYMBOL_GOTOFF_DISP
+       An UNSPEC wrapper around a SYMBOL_GOT_DISP.  It represents the
+       offset from _gp of the symbol's GOT entry.
+
+   SYMBOL_GOTOFF_CALL
+       Like SYMBOL_GOTOFF_DISP, but used when calling a global function.
+       The GOT entry is allowed to point to a stub rather than to the
+       function itself.
+
+   SYMBOL_GOTOFF_LOADGP
+       An UNSPEC wrapper around a function's address.  It represents the
+       offset of _gp from the start of the function.
+
+   SYMBOL_TLS
+       A thread-local symbol.
+
+   SYMBOL_TLSGD
+   SYMBOL_TLSLDM
+   SYMBOL_DTPREL
+   SYMBOL_GOTTPREL
+   SYMBOL_TPREL
+       UNSPEC wrappers around SYMBOL_TLS, corresponding to the
+       thread-local storage relocation operators.
+
+   SYMBOL_64_HIGH
+       For a 64-bit symbolic address X, this is the value of
+       (%highest(X) << 16) + %higher(X).
+
+   SYMBOL_64_MID
+       For a 64-bit symbolic address X, this is the value of
+       (%higher(X) << 16) + %hi(X).
+
+   SYMBOL_64_LOW
+       For a 64-bit symbolic address X, this is the value of
+       (%hi(X) << 16) + %lo(X).
+
+   SYMBOL_HALF
+       An UNSPEC wrapper around any kind of address.  It represents the
+       low 16 bits of that address.  */
+enum mips_symbol_type {
+  SYMBOL_ABSOLUTE,
+  SYMBOL_GP_RELATIVE,
+  SYMBOL_PC_RELATIVE,
+  SYMBOL_GOT_PAGE_OFST,
+  SYMBOL_GOT_DISP,
+  SYMBOL_GOTOFF_PAGE,
+  SYMBOL_GOTOFF_DISP,
+  SYMBOL_GOTOFF_CALL,
+  SYMBOL_GOTOFF_LOADGP,
+  SYMBOL_TLS,
+  SYMBOL_TLSGD,
+  SYMBOL_TLSLDM,
+  SYMBOL_DTPREL,
+  SYMBOL_GOTTPREL,
+  SYMBOL_TPREL,
+  SYMBOL_64_HIGH,
+  SYMBOL_64_MID,
+  SYMBOL_64_LOW,
+  SYMBOL_HALF
+};
+#define NUM_SYMBOL_TYPES (SYMBOL_HALF + 1)
+
+/* Identifiers a style of $gp initialization sequence.
+
+   LOADGP_NONE
+	No initialization sequence is needed.
+
+   LOADGP_OLDABI
+	The o32 and o64 PIC sequence (the kind traditionally generated
+	by .cpload).
+
+   LOADGP_NEWABI
+	The n32 and n64 PIC sequence (the kind traditionally generated
+	by .cpsetup).
+
+   LOADGP_ABSOLUTE
+	The GNU absolute sequence, as generated by loadgp_absolute.
+
+   LOADGP_RTP
+	The VxWorks RTP PIC sequence, as generated by loadgp_rtp.  */
+enum mips_loadgp_style {
+  LOADGP_NONE,
+  LOADGP_OLDABI,
+  LOADGP_NEWABI,
+  LOADGP_ABSOLUTE,
+  LOADGP_RTP
+};
+
+struct mips16e_save_restore_info;
+
+/* Classifies a type of call.
+
+   MIPS_CALL_NORMAL
+	A normal call or call_value pattern.
+
+   MIPS_CALL_SIBCALL
+	A sibcall or sibcall_value pattern.
+
+   MIPS_CALL_EPILOGUE
+	A call inserted in the epilogue.  */
+enum mips_call_type {
+  MIPS_CALL_NORMAL,
+  MIPS_CALL_SIBCALL,
+  MIPS_CALL_EPILOGUE
+};
+
+/* Controls the conditions under which certain instructions are split.
+
+   SPLIT_IF_NECESSARY
+	Only perform splits that are necessary for correctness
+	(because no unsplit version exists).
+
+   SPLIT_FOR_SPEED
+	Perform splits that are necessary for correctness or
+	beneficial for code speed.
+
+   SPLIT_FOR_SIZE
+	Perform splits that are necessary for correctness or
+	beneficial for code size.  */
+enum mips_split_type {
+  SPLIT_IF_NECESSARY,
+  SPLIT_FOR_SPEED,
+  SPLIT_FOR_SIZE
+};
+
+extern bool mips_symbolic_constant_p (rtx, enum mips_symbol_context,
+				      enum mips_symbol_type *);
+extern int mips_regno_mode_ok_for_base_p (int, enum machine_mode, bool);
+extern bool mips_stack_address_p (rtx, enum machine_mode);
+extern int mips_address_insns (rtx, enum machine_mode, bool);
+extern int mips_const_insns (rtx);
+extern int mips_split_const_insns (rtx);
+extern int mips_load_store_insns (rtx, rtx);
+extern int mips_idiv_insns (void);
+extern rtx mips_emit_move (rtx, rtx);
+#ifdef RTX_CODE
+extern void mips_emit_binary (enum rtx_code, rtx, rtx, rtx);
+#endif
+extern rtx mips_pic_base_register (rtx);
+extern rtx mips_got_load (rtx, rtx, enum mips_symbol_type);
+extern bool mips_split_symbol (rtx, rtx, enum machine_mode, rtx *);
+extern rtx mips_unspec_address (rtx, enum mips_symbol_type);
+extern rtx mips_strip_unspec_address (rtx);
+extern void mips_move_integer (rtx, rtx, unsigned HOST_WIDE_INT);
+extern bool mips_legitimize_move (enum machine_mode, rtx, rtx);
+
+extern rtx mips_subword (rtx, bool);
+extern bool mips_split_move_p (rtx, rtx, enum mips_split_type);
+extern void mips_split_move (rtx, rtx, enum mips_split_type);
+extern bool mips_split_move_insn_p (rtx, rtx, rtx);
+extern void mips_split_move_insn (rtx, rtx, rtx);
+extern const char *mips_output_move (rtx, rtx);
+extern bool mips_cfun_has_cprestore_slot_p (void);
+extern bool mips_cprestore_address_p (rtx, bool);
+extern void mips_save_gp_to_cprestore_slot (rtx, rtx, rtx, rtx);
+extern void mips_restore_gp_from_cprestore_slot (rtx);
+#ifdef RTX_CODE
+extern void mips_expand_scc (rtx *);
+extern void mips_expand_conditional_branch (rtx *);
+extern void mips_expand_vcondv2sf (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
+extern void mips_expand_conditional_move (rtx *);
+extern void mips_expand_conditional_trap (rtx);
+#endif
+extern bool mips_use_pic_fn_addr_reg_p (const_rtx);
+extern rtx mips_expand_call (enum mips_call_type, rtx, rtx, rtx, rtx, bool);
+extern void mips_split_call (rtx, rtx);
+extern bool mips_get_pic_call_symbol (rtx *, int);
+extern void mips_expand_fcc_reload (rtx, rtx, rtx);
+extern void mips_set_return_address (rtx, rtx);
+extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+extern bool mips_expand_block_move (rtx, rtx, rtx);
+extern void mips_expand_synci_loop (rtx, rtx);
+
+extern void mips_init_cumulative_args (CUMULATIVE_ARGS *, tree);
+extern bool mips_pad_arg_upward (enum machine_mode, const_tree);
+extern bool mips_pad_reg_upward (enum machine_mode, tree);
+
+extern bool mips_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+					       HOST_WIDE_INT, bool);
+extern bool mips_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+						HOST_WIDE_INT);
+extern bool mips_mem_fits_mode_p (enum machine_mode mode, rtx x);
+extern void mips_order_regs_for_local_alloc (void);
+extern HOST_WIDE_INT mips_debugger_offset (rtx, HOST_WIDE_INT);
+
+extern void mips_push_asm_switch (struct mips_asm_switch *);
+extern void mips_pop_asm_switch (struct mips_asm_switch *);
+extern void mips_output_external (FILE *, tree, const char *);
+extern void mips_output_ascii (FILE *, const char *, size_t);
+extern const char *mips_output_tls_reloc_directive (rtx *);
+extern void mips_output_aligned_decl_common (FILE *, tree, const char *,
+					     unsigned HOST_WIDE_INT,
+					     unsigned int);
+extern void mips_declare_common_object (FILE *, const char *,
+					const char *, unsigned HOST_WIDE_INT,
+					unsigned int, bool);
+extern void mips_declare_object (FILE *, const char *, const char *,
+				 const char *, ...) ATTRIBUTE_PRINTF_4;
+extern void mips_declare_object_name (FILE *, const char *, tree);
+extern void mips_finish_declare_object (FILE *, tree, int, int);
+
+extern bool mips_small_data_pattern_p (rtx);
+extern rtx mips_rewrite_small_data (rtx);
+extern HOST_WIDE_INT mips_initial_elimination_offset (int, int);
+extern rtx mips_return_addr (int, rtx);
+extern bool mips_must_initialize_gp_p (void);
+extern enum mips_loadgp_style mips_current_loadgp_style (void);
+extern void mips_emit_save_slot_move (rtx, rtx, rtx);
+extern void mips_expand_prologue (void);
+extern void mips_expand_before_return (void);
+extern void mips_expand_epilogue (bool);
+extern bool mips_can_use_return_insn (void);
+
+extern bool mips_cannot_change_mode_class (enum machine_mode,
+					   enum machine_mode, enum reg_class);
+extern bool mips_dangerous_for_la25_p (rtx);
+extern bool mips_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern enum reg_class mips_secondary_reload_class (enum reg_class,
+						   enum machine_mode,
+						   rtx, bool);
+extern int mips_class_max_nregs (enum reg_class, enum machine_mode);
+
+extern int mips_adjust_insn_length (rtx, int);
+extern void mips_output_load_label (rtx);
+extern const char *mips_output_conditional_branch (rtx, rtx *, const char *,
+						   const char *);
+extern const char *mips_output_order_conditional_branch (rtx, rtx *, bool);
+extern const char *mips_output_sync (void);
+extern const char *mips_output_sync_loop (rtx, rtx *);
+extern unsigned int mips_sync_loop_insns (rtx, rtx *);
+extern const char *mips_output_division (const char *, rtx *);
+extern const char *mips_output_probe_stack_range (rtx, rtx);
+extern unsigned int mips_hard_regno_nregs (int, enum machine_mode);
+extern bool mips_linked_madd_p (rtx, rtx);
+extern bool mips_store_data_bypass_p (rtx, rtx);
+extern int mips_dspalu_bypass_p (rtx, rtx);
+extern rtx mips_prefetch_cookie (rtx, rtx);
+
+extern const char *current_section_name (void);
+extern unsigned int current_section_flags (void);
+extern bool mips_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+
+extern const char *mips16e_output_save_restore (rtx, HOST_WIDE_INT);
+extern bool mips16e_save_restore_pattern_p (rtx, HOST_WIDE_INT,
+					    struct mips16e_save_restore_info *);
+
+extern bool mask_low_and_shift_p (enum machine_mode, rtx, rtx, int);
+extern int mask_low_and_shift_len (enum machine_mode, rtx, rtx);
+extern bool and_operands_ok (enum machine_mode, rtx, rtx);
+
+union mips_gen_fn_ptrs
+{
+  rtx (*fn_6) (rtx, rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_5) (rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_4) (rtx, rtx, rtx, rtx);
+};
+
+extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
+				     rtx, rtx, rtx, rtx);
+
+extern void mips_expand_vector_init (rtx, rtx);
+extern bool mips_expand_vec_perm_const (rtx op[4]);
+extern void mips_expand_vec_unpack (rtx op[2], bool, bool);
+extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
+extern void mips_expand_vec_minmax (rtx, rtx, rtx,
+				    rtx (*) (rtx, rtx, rtx), bool);
+
+extern bool mips_signed_immediate_p (unsigned HOST_WIDE_INT, int, int);
+extern bool mips_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int);
+extern const char *umips_output_save_restore (bool, rtx);
+extern bool umips_save_restore_pattern_p (bool, rtx);
+extern bool umips_load_store_pair_p (bool, rtx *);
+extern void umips_output_load_store_pair (bool, rtx *);
+extern bool umips_movep_target_p (rtx, rtx);
+extern bool umips_12bit_offset_address_p (rtx, enum machine_mode);
+extern bool lwsp_swsp_address_p (rtx, enum machine_mode);
+extern bool m16_based_address_p (rtx, enum machine_mode,
+			         int (*)(rtx_def*, machine_mode)); 
+extern rtx mips_expand_thread_pointer (rtx);
+extern void mips16_expand_get_fcsr (rtx);
+extern void mips16_expand_set_fcsr (rtx);
+
+extern bool mips_eh_uses (unsigned int);
+extern bool mips_epilogue_uses (unsigned int);
+extern void mips_final_prescan_insn (rtx, rtx *, int);
+extern int mips_trampoline_code_size (void);
+extern void mips_function_profiler (FILE *);
+
+typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
+#ifdef RTX_CODE
+extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
+#endif
+
+#endif /* ! GCC_MIPS_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/mips/mips-ps-3d.md b/gcc-4.9/gcc/config/mips/mips-ps-3d.md
new file mode 100644
index 000000000..549080b88
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-ps-3d.md
@@ -0,0 +1,764 @@
+;; MIPS Paired-Single Floating and MIPS-3D Instructions.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_MOVE_TF_PS
+  UNSPEC_C
+
+  ;; MIPS64/MIPS32R2 alnv.ps
+  UNSPEC_ALNV_PS
+
+  ;; MIPS-3D instructions
+  UNSPEC_CABS
+
+  UNSPEC_ADDR_PS
+  UNSPEC_CVT_PW_PS
+  UNSPEC_CVT_PS_PW
+  UNSPEC_MULR_PS
+  UNSPEC_ABS_PS
+
+  UNSPEC_RSQRT1
+  UNSPEC_RSQRT2
+  UNSPEC_RECIP1
+  UNSPEC_RECIP2
+  UNSPEC_SINGLE_CC
+  UNSPEC_SCC
+])
+
+(define_insn "*movcc_v2sf_<mode>"
+  [(set (match_operand:V2SF 0 "register_operand" "=f,f")
+	(if_then_else:V2SF
+	 (match_operator:GPR 4 "equality_operator"
+			 [(match_operand:GPR 1 "register_operand" "d,d")
+			  (const_int 0)])
+	 (match_operand:V2SF 2 "register_operand" "f,0")
+	 (match_operand:V2SF 3 "register_operand" "0,f")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "@
+    mov%T4.ps\t%0,%2,%1
+    mov%t4.ps\t%0,%3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "SF")])
+
+(define_insn "mips_cond_move_tf_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f,f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f,0")
+		      (match_operand:V2SF 2 "register_operand" "0,f")
+		      (match_operand:CCV2 3 "register_operand" "z,z")]
+		     UNSPEC_MOVE_TF_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "@
+    movt.ps\t%0,%1,%3
+    movf.ps\t%0,%2,%3"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "SF")])
+
+(define_expand "movv2sfcc"
+  [(set (match_dup 4) (match_operand 1 "comparison_operator"))
+   (set (match_operand:V2SF 0 "register_operand")
+	(if_then_else:V2SF (match_dup 5)
+			   (match_operand:V2SF 2 "register_operand")
+			   (match_operand:V2SF 3 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  /* We can only support MOVN.PS and MOVZ.PS.
+     NOTE: MOVT.PS and MOVF.PS have different semantics from MOVN.PS and 
+	   MOVZ.PS.  MOVT.PS and MOVF.PS depend on two CC values and move 
+	   each item independently.  */
+
+  if (GET_MODE_CLASS (GET_MODE (XEXP (operands[1], 0))) != MODE_INT)
+    FAIL;
+
+  mips_expand_conditional_move (operands);
+  DONE;
+})
+
+(define_insn "vec_perm_const_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "register_operand" "f")
+	    (match_operand:V2SF 2 "register_operand" "f"))
+	  (parallel [(match_operand:SI 3 "const_0_or_1_operand" "")
+		     (match_operand:SI 4 "const_2_or_3_operand" "")])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  /* Let <op>L be the lower part of operand <op> and <op>U be the upper part.
+     The P[UL][UL].PS instruction always specifies the upper part of the
+     result first, so the instruction is:
+
+	P<aUL><bUL>.PS %0,<aop>,<bop>
+
+     where 0U == <aop><aUL> and 0L == <bop><bUL>.
+
+     GCC's vector indices are specified in memory order, which means
+     that vector element 0 is the lower part (L) on little-endian targets
+     and the upper part (U) on big-endian targets.  vec_concat likewise
+     concatenates in memory order, which means that operand 3 (being
+     0 or 1) selects part of operand 1 and operand 4 (being 2 or 3)
+     selects part of operand 2.
+
+     Let:
+
+	I3 = INTVAL (operands[3])
+	I4 = INTVAL (operands[4]) - 2
+
+     Taking the two endiannesses in turn:
+
+     Little-endian:
+
+        The semantics of the RTL pattern are:
+
+	{ 0L, 0U } = { X[I3], X[I4 + 2] }, where X = { 1L, 1U, 2L, 2U }
+
+	so: 0L = { 1L, 1U }[I3] (= <bop><bUL>)
+	    0U = { 2L, 2U }[I4] (= <aop><aUL>)
+
+	    <aop> = 2, <aUL> = I4 ? U : L
+	    <bop> = 1, <bUL> = I3 ? U : L
+
+	    [LL] !I4 && !I3   [UL] I4 && !I3
+	    [LU] !I4 && I3    [UU] I4 && I3
+
+     Big-endian:
+
+        The semantics of the RTL pattern are:
+
+	{ 0U, 0L } = { X[I3], X[I4 + 2] }, where X = { 1U, 1L, 2U, 2L }
+
+	so: 0U = { 1U, 1L }[I3] (= <aop><aUL>)
+	    0L = { 2U, 2L }[I4] (= <bop><bUL>)
+
+	    <aop> = 1, <aUL> = I3 ? L : U
+	    <bop> = 2, <bUL> = I4 ? L : U
+
+	    [UU] !I3 && !I4   [UL] !I3 && I4
+	    [LU] I3 && !I4    [LL] I3 && I4.  */
+
+  static const char * const mnemonics[2][4] = {
+    /* LE */ { "pll.ps\t%0,%2,%1", "pul.ps\t%0,%2,%1",
+	       "plu.ps\t%0,%2,%1", "puu.ps\t%0,%2,%1" },
+    /* BE */ { "puu.ps\t%0,%1,%2", "pul.ps\t%0,%1,%2",
+	       "plu.ps\t%0,%1,%2", "pll.ps\t%0,%1,%2" },
+  };
+
+  unsigned mask = INTVAL (operands[3]) * 2 + (INTVAL (operands[4]) - 2);
+  return mnemonics[BYTES_BIG_ENDIAN][mask];
+}
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+(define_expand "vec_perm_constv2sf"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:V2SF 1 "register_operand" "")
+   (match_operand:V2SF 2 "register_operand" "")
+   (match_operand:V2SI 3 "" "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (mips_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Expanders for builtins.  The instruction:
+;;
+;;     P[UL][UL].PS <result>, <a>, <b>
+;;
+;; says that the upper part of <result> is taken from half of <a> and
+;; the lower part of <result> is taken from half of <b>.  This means
+;; that the P[UL][UL].PS operand order matches memory order on big-endian
+;; targets; <a> is element 0 of the V2SF result while <b> is element 1.
+;; However, the P[UL][UL].PS operand order is the reverse of memory order
+;; on little-endian targets; <a> is element 1 of the V2SF result while
+;; <b> is element 0.  The arguments to vec_perm_const_ps are always in
+;; memory order.
+;;
+;; Similarly, "U" corresponds to element 0 on big-endian targets but
+;; to element 1 on little-endian targets.
+
+(define_expand "mips_puu_ps"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:V2SF 1 "register_operand" "")
+   (match_operand:V2SF 2 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+				      const0_rtx, const2_rtx));
+  else
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+				      const1_rtx, GEN_INT (3)));
+  DONE;
+})
+
+(define_expand "mips_pul_ps"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:V2SF 1 "register_operand" "")
+   (match_operand:V2SF 2 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+				      const0_rtx, GEN_INT (3)));
+  else
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+				      const0_rtx, GEN_INT (3)));
+  DONE;
+})
+
+(define_expand "mips_plu_ps"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:V2SF 1 "register_operand" "")
+   (match_operand:V2SF 2 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+				      const1_rtx, const2_rtx));
+  else
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+				      const1_rtx, const2_rtx));
+  DONE;
+})
+
+(define_expand "mips_pll_ps"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:V2SF 1 "register_operand" "")
+   (match_operand:V2SF 2 "register_operand" "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+				      const1_rtx, GEN_INT (3)));
+  else
+    emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+				      const0_rtx, const2_rtx));
+  DONE;
+})
+
+; vec_init
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "vec_concatv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_concat:V2SF
+	 (match_operand:SF 1 "register_operand" "f")
+	 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "cvt.ps.s\t%0,%1,%2";
+  else
+    return "cvt.ps.s\t%0,%2,%1";
+}
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+;; ??? This is only generated if we perform a vector operation that has to be
+;; emulated.  There is no other way to get a vector mode bitfield extract
+;; currently.
+
+(define_insn "vec_extractv2sf"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(vec_select:SF (match_operand:V2SF 1 "register_operand" "f")
+		       (parallel
+			[(match_operand 2 "const_0_or_1_operand" "")])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN)
+    return "cvt.s.pu\t%0,%1";
+  else
+    return "cvt.s.pl\t%0,%1";
+}
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+;; ??? This is only generated if we disable the vec_init pattern.  There is
+;; no other way to get a vector mode bitfield store currently.
+
+(define_expand "vec_setv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:SF 1 "register_operand" "")
+	    (match_dup 0))
+	  (parallel [(match_operand 2 "const_0_or_1_operand" "")
+		     (match_dup 3)])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  /* We don't have an insert instruction, so we duplicate the float, and
+     then use a PUL instruction.  */
+  rtx temp = gen_reg_rtx (V2SFmode);
+  emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1]));
+  operands[1] = temp;
+  operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2);
+})
+
+; cvt.ps.s - Floating Point Convert Pair to Paired Single
+(define_expand "mips_cvt_ps_s"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:SF 1 "register_operand")
+   (match_operand:SF 2 "register_operand")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1]));
+  DONE;
+})
+
+; cvt.s.pl - Floating Point Convert Pair Lower to Single Floating Point
+(define_expand "mips_cvt_s_pl"
+  [(set (match_operand:SF 0 "register_operand")
+	(vec_select:SF (match_operand:V2SF 1 "register_operand")
+		       (parallel [(match_dup 2)])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  { operands[2] = GEN_INT (BYTES_BIG_ENDIAN); })
+
+; cvt.s.pu - Floating Point Convert Pair Upper to Single Floating Point
+(define_expand "mips_cvt_s_pu"
+  [(set (match_operand:SF 0 "register_operand")
+	(vec_select:SF (match_operand:V2SF 1 "register_operand")
+		       (parallel [(match_dup 2)])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  { operands[2] = GEN_INT (!BYTES_BIG_ENDIAN); })
+
+; alnv.ps - Floating Point Align Variable
+(define_insn "mips_alnv_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand:SI 3 "register_operand" "d")]
+		     UNSPEC_ALNV_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "alnv.ps\t%0,%1,%2,%3"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+; addr.ps - Floating Point Reduction Add
+(define_insn "mips_addr_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")]
+		     UNSPEC_ADDR_PS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "addr.ps\t%0,%1,%2"
+  [(set_attr "type" "fadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "reduc_splus_v2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_dup 1)]
+		     UNSPEC_ADDR_PS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "")
+
+; cvt.pw.ps - Floating Point Convert Paired Single to Paired Word
+(define_insn "mips_cvt_pw_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")]
+		     UNSPEC_CVT_PW_PS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "cvt.pw.ps\t%0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+; cvt.ps.pw - Floating Point Convert Paired Word to Paired Single
+(define_insn "mips_cvt_ps_pw"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")]
+		     UNSPEC_CVT_PS_PW))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "cvt.ps.pw\t%0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+; mulr.ps - Floating Point Reduction Multiply
+(define_insn "mips_mulr_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")]
+		     UNSPEC_MULR_PS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "mulr.ps\t%0,%1,%2"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "SF")])
+
+; abs.ps
+(define_expand "mips_abs_ps"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")]
+		     UNSPEC_ABS_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  /* If we can ignore NaNs, this operation is equivalent to the
+     rtl ABS code.  */
+  if (!HONOR_NANS (V2SFmode))
+    {
+      emit_insn (gen_absv2sf2 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*mips_abs_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")]
+		     UNSPEC_ABS_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "abs.ps\t%0,%1"
+  [(set_attr "type" "fabs")
+   (set_attr "mode" "SF")])
+
+;----------------------------------------------------------------------------
+; Floating Point Comparisons for Scalars
+;----------------------------------------------------------------------------
+
+(define_insn "mips_cabs_cond_<fmt>"
+  [(set (match_operand:CC 0 "register_operand" "=z")
+	(unspec:CC [(match_operand:SCALARF 1 "register_operand" "f")
+		    (match_operand:SCALARF 2 "register_operand" "f")
+		    (match_operand 3 "const_int_operand" "")]
+		   UNSPEC_CABS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "cabs.%Y3.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+
+;----------------------------------------------------------------------------
+; Floating Point Comparisons for Four Singles
+;----------------------------------------------------------------------------
+
+(define_insn_and_split "mips_c_cond_4s"
+  [(set (match_operand:CCV4 0 "register_operand" "=z")
+	(unspec:CCV4 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand:V2SF 3 "register_operand" "f")
+		      (match_operand:V2SF 4 "register_operand" "f")
+		      (match_operand 5 "const_int_operand" "")]
+		     UNSPEC_C))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 6)
+	(unspec:CCV2 [(match_dup 1)
+		      (match_dup 2)
+		      (match_dup 5)]
+		     UNSPEC_C))
+   (set (match_dup 7)
+	(unspec:CCV2 [(match_dup 3)
+		      (match_dup 4)
+		      (match_dup 5)]
+		     UNSPEC_C))]
+{
+  operands[6] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 0);
+  operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
+}
+  [(set_attr "type" "fcmp")
+   (set_attr "insn_count" "2")
+   (set_attr "mode" "FPSW")])
+
+(define_insn_and_split "mips_cabs_cond_4s"
+  [(set (match_operand:CCV4 0 "register_operand" "=z")
+	(unspec:CCV4 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand:V2SF 3 "register_operand" "f")
+		      (match_operand:V2SF 4 "register_operand" "f")
+		      (match_operand 5 "const_int_operand" "")]
+		     UNSPEC_CABS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 6)
+	(unspec:CCV2 [(match_dup 1)
+		      (match_dup 2)
+		      (match_dup 5)]
+		     UNSPEC_CABS))
+   (set (match_dup 7)
+	(unspec:CCV2 [(match_dup 3)
+		      (match_dup 4)
+		      (match_dup 5)]
+		     UNSPEC_CABS))]
+{
+  operands[6] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 0);
+  operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
+}
+  [(set_attr "type" "fcmp")
+   (set_attr "insn_count" "2")
+   (set_attr "mode" "FPSW")])
+
+
+;----------------------------------------------------------------------------
+; Floating Point Comparisons for Paired Singles
+;----------------------------------------------------------------------------
+
+(define_insn "mips_c_cond_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand 3 "const_int_operand" "")]
+		     UNSPEC_C))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "c.%Y3.ps\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+(define_insn "mips_cabs_cond_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand 3 "const_int_operand" "")]
+		     UNSPEC_CABS))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "cabs.%Y3.ps\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+;; An expander for generating an scc operation.
+(define_expand "scc_ps"
+  [(set (match_operand:CCV2 0)
+	(unspec:CCV2 [(match_operand 1)] UNSPEC_SCC))])
+
+(define_insn "s<code>_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2
+	   [(fcond (match_operand:V2SF 1 "register_operand" "f")
+		   (match_operand:V2SF 2 "register_operand" "f"))]
+	   UNSPEC_SCC))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "c.<fcond>.ps\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+(define_insn "s<code>_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2
+	   [(swapped_fcond (match_operand:V2SF 1 "register_operand" "f")
+			   (match_operand:V2SF 2 "register_operand" "f"))]
+	   UNSPEC_SCC))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "c.<swapped_fcond>.ps\t%0,%2,%1"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+;----------------------------------------------------------------------------
+; Floating Point Branch Instructions.
+;----------------------------------------------------------------------------
+
+; Branch on Any of Four Floating Point Condition Codes True
+(define_insn "bc1any4t"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV4 1 "register_operand" "z")
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "%*bc1any4t\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Branch on Any of Four Floating Point Condition Codes False
+(define_insn "bc1any4f"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV4 1 "register_operand" "z")
+			  (const_int -1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "%*bc1any4f\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Branch on Any of Two Floating Point Condition Codes True
+(define_insn "bc1any2t"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV2 1 "register_operand" "z")
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "%*bc1any2t\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Branch on Any of Two Floating Point Condition Codes False
+(define_insn "bc1any2f"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV2 1 "register_operand" "z")
+			  (const_int -1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "%*bc1any2f\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Used to access one register in a CCV2 pair.  Operand 0 is the register
+; pair and operand 1 is the index of the register we want (a CONST_INT).
+(define_expand "single_cc"
+  [(ne (unspec:CC [(match_operand 0) (match_operand 1)] UNSPEC_SINGLE_CC)
+       (const_int 0))])
+
+; This is a normal floating-point branch pattern, but rather than check
+; a single CCmode register, it checks one register in a CCV2 pair.
+; Operand 2 is the register pair and operand 3 is the index of the
+; register we want.
+(define_insn "*branch_upper_lower"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 1 "equality_operator"
+	    [(unspec:CC [(match_operand:CCV2 2 "register_operand" "z")
+			 (match_operand 3 "const_int_operand")]
+			UNSPEC_SINGLE_CC)
+	     (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2]
+    = gen_rtx_REG (CCmode, REGNO (operands[2]) + INTVAL (operands[3]));
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%F1", "%2,%0"),
+					 MIPS_BRANCH ("b%W1", "%2,%0"));
+}
+  [(set_attr "type" "branch")])
+
+; As above, but with the sense of the condition reversed.
+(define_insn "*branch_upper_lower_inverted"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 1 "equality_operator"
+	    [(unspec:CC [(match_operand:CCV2 2 "register_operand" "z")
+			 (match_operand 3 "const_int_operand")]
+			UNSPEC_SINGLE_CC)
+	     (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2]
+    = gen_rtx_REG (CCmode, REGNO (operands[2]) + INTVAL (operands[3]));
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%W1", "%2,%0"),
+					 MIPS_BRANCH ("b%F1", "%2,%0"));
+}
+  [(set_attr "type" "branch")])
+
+;----------------------------------------------------------------------------
+; Floating Point Reduced Precision Reciprocal Square Root Instructions.
+;----------------------------------------------------------------------------
+
+(define_insn "mips_rsqrt1_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+		     UNSPEC_RSQRT1))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "rsqrt1.<fmt>\t%0,%1"
+  [(set_attr "type" "frsqrt1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "mips_rsqrt2_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")]
+		     UNSPEC_RSQRT2))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "rsqrt2.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "frsqrt2")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "mips_recip1_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+		     UNSPEC_RECIP1))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "recip1.<fmt>\t%0,%1"
+  [(set_attr "type" "frdiv1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "mips_recip2_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")]
+		     UNSPEC_RECIP2))]
+  "TARGET_HARD_FLOAT && TARGET_MIPS3D"
+  "recip2.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "frdiv2")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "vcondv2sfv2sf"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(if_then_else:V2SF
+	  (match_operator 3 ""
+	    [(match_operand:V2SF 4 "register_operand")
+	     (match_operand:V2SF 5 "register_operand")])
+	  (match_operand:V2SF 1 "register_operand")
+	  (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vcondv2sf (operands[0], operands[1], operands[2],
+			 GET_CODE (operands[3]), operands[4], operands[5]);
+  DONE;
+})
+
+(define_expand "sminv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(smin:V2SF (match_operand:V2SF 1 "register_operand")
+		   (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vcondv2sf (operands[0], operands[1], operands[2],
+			 LE, operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "smaxv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(smax:V2SF (match_operand:V2SF 1 "register_operand")
+		   (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vcondv2sf (operands[0], operands[1], operands[2],
+			 LE, operands[2], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_smin_v2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "register_operand")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_sminv2sf3);
+  DONE;
+})
+
+(define_expand "reduc_smax_v2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "register_operand")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vec_reduc (operands[0], operands[1], gen_smaxv2sf3);
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/mips/mips-tables.opt b/gcc-4.9/gcc/config/mips/mips-tables.opt
new file mode 100644
index 000000000..760b764e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips-tables.opt
@@ -0,0 +1,644 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from mips-cpus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(mips_arch_opt_value) Type(int)
+Known MIPS CPUs (for use with the -march= and -mtune= options):
+
+Enum
+Name(mips_mips_opt_value) Type(int)
+Known MIPS ISA levels (for use with the -mips option):
+
+EnumValue
+Enum(mips_arch_opt_value) String(from-abi) Value(MIPS_ARCH_OPTION_FROM_ABI)
+
+EnumValue
+Enum(mips_arch_opt_value) String(native) Value(MIPS_ARCH_OPTION_NATIVE) DriverOnly
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips1) Value(0) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(1) Value(0)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips2) Value(1) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(2) Value(1)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips3) Value(2) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(3) Value(2)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips4) Value(3) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(4) Value(3)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips32) Value(4) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(32) Value(4)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips32r2) Value(5) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(32r2) Value(5)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips64) Value(6) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(64) Value(6)
+
+EnumValue
+Enum(mips_arch_opt_value) String(mips64r2) Value(7) Canonical
+
+EnumValue
+Enum(mips_mips_opt_value) String(64r2) Value(7)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r3000) Value(8) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r3k) Value(8)
+
+EnumValue
+Enum(mips_arch_opt_value) String(3000) Value(8)
+
+EnumValue
+Enum(mips_arch_opt_value) String(3k) Value(8)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r2000) Value(9) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r2k) Value(9)
+
+EnumValue
+Enum(mips_arch_opt_value) String(2000) Value(9)
+
+EnumValue
+Enum(mips_arch_opt_value) String(2k) Value(9)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r3900) Value(10) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(3900) Value(10)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r6000) Value(11) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r6k) Value(11)
+
+EnumValue
+Enum(mips_arch_opt_value) String(6000) Value(11)
+
+EnumValue
+Enum(mips_arch_opt_value) String(6k) Value(11)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4000) Value(12) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4k) Value(12)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4000) Value(12)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4k) Value(12)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr4100) Value(13) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4100) Value(13)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4100) Value(13)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr4111) Value(14) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4111) Value(14)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4111) Value(14)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr4120) Value(15) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4120) Value(15)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4120) Value(15)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr4130) Value(16) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4130) Value(16)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4130) Value(16)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr4300) Value(17) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4300) Value(17)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4300) Value(17)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4400) Value(18) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4400) Value(18)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4600) Value(19) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4600) Value(19)
+
+EnumValue
+Enum(mips_arch_opt_value) String(orion) Value(20) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4650) Value(21) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4650) Value(21)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4700) Value(22) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4700) Value(22)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(5900) Value(23)
+
+EnumValue
+Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r8k) Value(26)
+
+EnumValue
+Enum(mips_arch_opt_value) String(8000) Value(26)
+
+EnumValue
+Enum(mips_arch_opt_value) String(8k) Value(26)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r10k) Value(27)
+
+EnumValue
+Enum(mips_arch_opt_value) String(10000) Value(27)
+
+EnumValue
+Enum(mips_arch_opt_value) String(10k) Value(27)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r12k) Value(28)
+
+EnumValue
+Enum(mips_arch_opt_value) String(12000) Value(28)
+
+EnumValue
+Enum(mips_arch_opt_value) String(12k) Value(28)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r14k) Value(29)
+
+EnumValue
+Enum(mips_arch_opt_value) String(14000) Value(29)
+
+EnumValue
+Enum(mips_arch_opt_value) String(14k) Value(29)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r16k) Value(30)
+
+EnumValue
+Enum(mips_arch_opt_value) String(16000) Value(30)
+
+EnumValue
+Enum(mips_arch_opt_value) String(16k) Value(30)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr5k) Value(31)
+
+EnumValue
+Enum(mips_arch_opt_value) String(5000) Value(31)
+
+EnumValue
+Enum(mips_arch_opt_value) String(5k) Value(31)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5000) Value(31)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5k) Value(31)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(5400) Value(32)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5400) Value(32)
+
+EnumValue
+Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(5500) Value(33)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5500) Value(33)
+
+EnumValue
+Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(rm7k) Value(34)
+
+EnumValue
+Enum(mips_arch_opt_value) String(7000) Value(34)
+
+EnumValue
+Enum(mips_arch_opt_value) String(7k) Value(34)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r7000) Value(34)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r7k) Value(34)
+
+EnumValue
+Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(rm9k) Value(35)
+
+EnumValue
+Enum(mips_arch_opt_value) String(9000) Value(35)
+
+EnumValue
+Enum(mips_arch_opt_value) String(9k) Value(35)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r9000) Value(35)
+
+EnumValue
+Enum(mips_arch_opt_value) String(r9k) Value(35)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4kc) Value(36)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4km) Value(37) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4km) Value(37)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4kp) Value(38)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4ksc) Value(39)
+
+EnumValue
+Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(m14ke) Value(43) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(m14kec) Value(44) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(4kec) Value(45) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4kec) Value(45)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4kem) Value(46) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4kem) Value(46)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4kep) Value(47) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4kep) Value(47)
+
+EnumValue
+Enum(mips_arch_opt_value) String(4ksd) Value(48) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r4ksd) Value(48)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kc) Value(49) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kc) Value(49)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kf2_1) Value(50) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kf2_1) Value(50)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kf) Value(51) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kf) Value(51)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kf1_1) Value(52) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kf1_1) Value(52)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kfx) Value(53) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kfx) Value(53)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kx) Value(54) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kx) Value(54)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kec) Value(55) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kec) Value(55)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kef2_1) Value(56) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kef2_1) Value(56)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kef) Value(57) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kef) Value(57)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kef1_1) Value(58) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kef1_1) Value(58)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kefx) Value(59) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kefx) Value(59)
+
+EnumValue
+Enum(mips_arch_opt_value) String(24kex) Value(60) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r24kex) Value(60)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kc) Value(61) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kc) Value(61)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kf2_1) Value(62) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kf2_1) Value(62)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kf) Value(63) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kf) Value(63)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kf1_1) Value(64) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kf1_1) Value(64)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kfx) Value(65) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kfx) Value(65)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kx) Value(66) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kx) Value(66)
+
+EnumValue
+Enum(mips_arch_opt_value) String(34kn) Value(67) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r34kn) Value(67)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kc) Value(68) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kc) Value(68)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kf2_1) Value(69) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kf2_1) Value(69)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kf) Value(70) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kf) Value(70)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kf1_1) Value(71) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kf1_1) Value(71)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kfx) Value(72) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kfx) Value(72)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kx) Value(73) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kx) Value(73)
+
+EnumValue
+Enum(mips_arch_opt_value) String(74kf3_2) Value(74) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r74kf3_2) Value(74)
+
+EnumValue
+Enum(mips_arch_opt_value) String(1004kc) Value(75) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r1004kc) Value(75)
+
+EnumValue
+Enum(mips_arch_opt_value) String(1004kf2_1) Value(76) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r1004kf2_1) Value(76)
+
+EnumValue
+Enum(mips_arch_opt_value) String(1004kf) Value(77) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r1004kf) Value(77)
+
+EnumValue
+Enum(mips_arch_opt_value) String(1004kf1_1) Value(78) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r1004kf1_1) Value(78)
+
+EnumValue
+Enum(mips_arch_opt_value) String(5kc) Value(79) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5kc) Value(79)
+
+EnumValue
+Enum(mips_arch_opt_value) String(5kf) Value(80) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r5kf) Value(80)
+
+EnumValue
+Enum(mips_arch_opt_value) String(20kc) Value(81) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(r20kc) Value(81)
+
+EnumValue
+Enum(mips_arch_opt_value) String(sb1) Value(82) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(sb1a) Value(83) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(sr71000) Value(84) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(sr71k) Value(84)
+
+EnumValue
+Enum(mips_arch_opt_value) String(xlr) Value(85) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(loongson3a) Value(86) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon) Value(87) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon+) Value(88) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon2) Value(89) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(xlp) Value(90) Canonical
+
diff --git a/gcc-4.9/gcc/config/mips/mips.c b/gcc-4.9/gcc/config/mips/mips.c
new file mode 100644
index 000000000..143169bc1
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips.c
@@ -0,0 +1,19139 @@
+/* Subroutines used for MIPS code generation.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+   Contributed by A. Lichnewsky, lich@inria.inria.fr.
+   Changes by Michael Meissner, meissner@osf.org.
+   64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
+   Brendan Eich, brendan@microunity.com.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "gstab.h"
+#include "hash-table.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "common/common-target.h"
+#include "langhooks.h"
+#include "sched-int.h"
+#include "pointer-set.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "bitmap.h"
+#include "diagnostic.h"
+#include "target-globals.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "context.h"
+
+/* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
+#define UNSPEC_ADDRESS_P(X)					\
+  (GET_CODE (X) == UNSPEC					\
+   && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST			\
+   && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
+
+/* Extract the symbol or label from UNSPEC wrapper X.  */
+#define UNSPEC_ADDRESS(X) \
+  XVECEXP (X, 0, 0)
+
+/* Extract the symbol type from UNSPEC wrapper X.  */
+#define UNSPEC_ADDRESS_TYPE(X) \
+  ((enum mips_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
+
+/* The maximum distance between the top of the stack frame and the
+   value $sp has when we save and restore registers.
+
+   The value for normal-mode code must be a SMALL_OPERAND and must
+   preserve the maximum stack alignment.  We therefore use a value
+   of 0x7ff0 in this case.
+
+   microMIPS LWM and SWM support 12-bit offsets (from -0x800 to 0x7ff),
+   so we use a maximum of 0x7f0 for TARGET_MICROMIPS.
+
+   MIPS16e SAVE and RESTORE instructions can adjust the stack pointer by
+   up to 0x7f8 bytes and can usually save or restore all the registers
+   that we need to save or restore.  (Note that we can only use these
+   instructions for o32, for which the stack alignment is 8 bytes.)
+
+   We use a maximum gap of 0x100 or 0x400 for MIPS16 code when SAVE and
+   RESTORE are not available.  We can then use unextended instructions
+   to save and restore registers, and to allocate and deallocate the top
+   part of the frame.  */
+#define MIPS_MAX_FIRST_STACK_STEP					\
+  (!TARGET_COMPRESSION ? 0x7ff0						\
+   : TARGET_MICROMIPS || GENERATE_MIPS16E_SAVE_RESTORE ? 0x7f8		\
+   : TARGET_64BIT ? 0x100 : 0x400)
+
+/* True if INSN is a mips.md pattern or asm statement.  */
+/* ???	This test exists through the compiler, perhaps it should be
+	moved to rtl.h.  */
+#define USEFUL_INSN_P(INSN)						\
+  (NONDEBUG_INSN_P (INSN)						\
+   && GET_CODE (PATTERN (INSN)) != USE					\
+   && GET_CODE (PATTERN (INSN)) != CLOBBER)
+
+/* If INSN is a delayed branch sequence, return the first instruction
+   in the sequence, otherwise return INSN itself.  */
+#define SEQ_BEGIN(INSN)							\
+  (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE		\
+   ? XVECEXP (PATTERN (INSN), 0, 0)					\
+   : (INSN))
+
+/* Likewise for the last instruction in a delayed branch sequence.  */
+#define SEQ_END(INSN)							\
+  (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE		\
+   ? XVECEXP (PATTERN (INSN), 0, XVECLEN (PATTERN (INSN), 0) - 1)	\
+   : (INSN))
+
+/* Execute the following loop body with SUBINSN set to each instruction
+   between SEQ_BEGIN (INSN) and SEQ_END (INSN) inclusive.  */
+#define FOR_EACH_SUBINSN(SUBINSN, INSN)					\
+  for ((SUBINSN) = SEQ_BEGIN (INSN);					\
+       (SUBINSN) != NEXT_INSN (SEQ_END (INSN));				\
+       (SUBINSN) = NEXT_INSN (SUBINSN))
+
+/* True if bit BIT is set in VALUE.  */
+#define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
+
+/* Return the opcode for a ptr_mode load of the form:
+
+       l[wd]    DEST, OFFSET(BASE).  */
+#define MIPS_LOAD_PTR(DEST, OFFSET, BASE)	\
+  (((ptr_mode == DImode ? 0x37 : 0x23) << 26)	\
+   | ((BASE) << 21)				\
+   | ((DEST) << 16)				\
+   | (OFFSET))
+
+/* Return the opcode to move register SRC into register DEST.  */
+#define MIPS_MOVE(DEST, SRC)		\
+  ((TARGET_64BIT ? 0x2d : 0x21)		\
+   | ((DEST) << 11)			\
+   | ((SRC) << 21))
+
+/* Return the opcode for:
+
+       lui      DEST, VALUE.  */
+#define MIPS_LUI(DEST, VALUE) \
+  ((0xf << 26) | ((DEST) << 16) | (VALUE))
+
+/* Return the opcode to jump to register DEST.  */
+#define MIPS_JR(DEST) \
+  (((DEST) << 21) | 0x8)
+
+/* Return the opcode for:
+
+       bal     . + (1 + OFFSET) * 4.  */
+#define MIPS_BAL(OFFSET) \
+  ((0x1 << 26) | (0x11 << 16) | (OFFSET))
+
+/* Return the usual opcode for a nop.  */
+#define MIPS_NOP 0
+
+/* Classifies an address.
+
+   ADDRESS_REG
+       A natural register + offset address.  The register satisfies
+       mips_valid_base_register_p and the offset is a const_arith_operand.
+
+   ADDRESS_LO_SUM
+       A LO_SUM rtx.  The first operand is a valid base register and
+       the second operand is a symbolic address.
+
+   ADDRESS_CONST_INT
+       A signed 16-bit constant address.
+
+   ADDRESS_SYMBOLIC:
+       A constant symbolic address.  */
+enum mips_address_type {
+  ADDRESS_REG,
+  ADDRESS_LO_SUM,
+  ADDRESS_CONST_INT,
+  ADDRESS_SYMBOLIC
+};
+
+/* Macros to create an enumeration identifier for a function prototype.  */
+#define MIPS_FTYPE_NAME1(A, B) MIPS_##A##_FTYPE_##B
+#define MIPS_FTYPE_NAME2(A, B, C) MIPS_##A##_FTYPE_##B##_##C
+#define MIPS_FTYPE_NAME3(A, B, C, D) MIPS_##A##_FTYPE_##B##_##C##_##D
+#define MIPS_FTYPE_NAME4(A, B, C, D, E) MIPS_##A##_FTYPE_##B##_##C##_##D##_##E
+
+/* Classifies the prototype of a built-in function.  */
+enum mips_function_type {
+#define DEF_MIPS_FTYPE(NARGS, LIST) MIPS_FTYPE_NAME##NARGS LIST,
+#include "config/mips/mips-ftypes.def"
+#undef DEF_MIPS_FTYPE
+  MIPS_MAX_FTYPE_MAX
+};
+
+/* Specifies how a built-in function should be converted into rtl.  */
+enum mips_builtin_type {
+  /* The function corresponds directly to an .md pattern.  The return
+     value is mapped to operand 0 and the arguments are mapped to
+     operands 1 and above.  */
+  MIPS_BUILTIN_DIRECT,
+
+  /* The function corresponds directly to an .md pattern.  There is no return
+     value and the arguments are mapped to operands 0 and above.  */
+  MIPS_BUILTIN_DIRECT_NO_TARGET,
+
+  /* The function corresponds to a comparison instruction followed by
+     a mips_cond_move_tf_ps pattern.  The first two arguments are the
+     values to compare and the second two arguments are the vector
+     operands for the movt.ps or movf.ps instruction (in assembly order).  */
+  MIPS_BUILTIN_MOVF,
+  MIPS_BUILTIN_MOVT,
+
+  /* The function corresponds to a V2SF comparison instruction.  Operand 0
+     of this instruction is the result of the comparison, which has mode
+     CCV2 or CCV4.  The function arguments are mapped to operands 1 and
+     above.  The function's return value is an SImode boolean that is
+     true under the following conditions:
+
+     MIPS_BUILTIN_CMP_ANY: one of the registers is true
+     MIPS_BUILTIN_CMP_ALL: all of the registers are true
+     MIPS_BUILTIN_CMP_LOWER: the first register is true
+     MIPS_BUILTIN_CMP_UPPER: the second register is true.  */
+  MIPS_BUILTIN_CMP_ANY,
+  MIPS_BUILTIN_CMP_ALL,
+  MIPS_BUILTIN_CMP_UPPER,
+  MIPS_BUILTIN_CMP_LOWER,
+
+  /* As above, but the instruction only sets a single $fcc register.  */
+  MIPS_BUILTIN_CMP_SINGLE,
+
+  /* For generating bposge32 branch instructions in MIPS32 DSP ASE.  */
+  MIPS_BUILTIN_BPOSGE32
+};
+
+/* Invoke MACRO (COND) for each C.cond.fmt condition.  */
+#define MIPS_FP_CONDITIONS(MACRO) \
+  MACRO (f),	\
+  MACRO (un),	\
+  MACRO (eq),	\
+  MACRO (ueq),	\
+  MACRO (olt),	\
+  MACRO (ult),	\
+  MACRO (ole),	\
+  MACRO (ule),	\
+  MACRO (sf),	\
+  MACRO (ngle),	\
+  MACRO (seq),	\
+  MACRO (ngl),	\
+  MACRO (lt),	\
+  MACRO (nge),	\
+  MACRO (le),	\
+  MACRO (ngt)
+
+/* Enumerates the codes above as MIPS_FP_COND_<X>.  */
+#define DECLARE_MIPS_COND(X) MIPS_FP_COND_ ## X
+enum mips_fp_condition {
+  MIPS_FP_CONDITIONS (DECLARE_MIPS_COND)
+};
+#undef DECLARE_MIPS_COND
+
+/* Index X provides the string representation of MIPS_FP_COND_<X>.  */
+#define STRINGIFY(X) #X
+static const char *const mips_fp_conditions[] = {
+  MIPS_FP_CONDITIONS (STRINGIFY)
+};
+#undef STRINGIFY
+
+/* A class used to control a comdat-style stub that we output in each
+   translation unit that needs it.  */
+class mips_one_only_stub {
+public:
+  virtual ~mips_one_only_stub () {}
+
+  /* Return the name of the stub.  */
+  virtual const char *get_name () = 0;
+
+  /* Output the body of the function to asm_out_file.  */
+  virtual void output_body () = 0;
+};
+
+/* Tuning information that is automatically derived from other sources
+   (such as the scheduler).  */
+static struct {
+  /* The architecture and tuning settings that this structure describes.  */
+  enum processor arch;
+  enum processor tune;
+
+  /* True if this structure describes MIPS16 settings.  */
+  bool mips16_p;
+
+  /* True if the structure has been initialized.  */
+  bool initialized_p;
+
+  /* True if "MULT $0, $0" is preferable to "MTLO $0; MTHI $0"
+     when optimizing for speed.  */
+  bool fast_mult_zero_zero_p;
+} mips_tuning_info;
+
+/* Information about a function's frame layout.  */
+struct GTY(())  mips_frame_info {
+  /* The size of the frame in bytes.  */
+  HOST_WIDE_INT total_size;
+
+  /* The number of bytes allocated to variables.  */
+  HOST_WIDE_INT var_size;
+
+  /* The number of bytes allocated to outgoing function arguments.  */
+  HOST_WIDE_INT args_size;
+
+  /* The number of bytes allocated to the .cprestore slot, or 0 if there
+     is no such slot.  */
+  HOST_WIDE_INT cprestore_size;
+
+  /* Bit X is set if the function saves or restores GPR X.  */
+  unsigned int mask;
+
+  /* Likewise FPR X.  */
+  unsigned int fmask;
+
+  /* Likewise doubleword accumulator X ($acX).  */
+  unsigned int acc_mask;
+
+  /* The number of GPRs, FPRs, doubleword accumulators and COP0
+     registers saved.  */
+  unsigned int num_gp;
+  unsigned int num_fp;
+  unsigned int num_acc;
+  unsigned int num_cop0_regs;
+
+  /* The offset of the topmost GPR, FPR, accumulator and COP0-register
+     save slots from the top of the frame, or zero if no such slots are
+     needed.  */
+  HOST_WIDE_INT gp_save_offset;
+  HOST_WIDE_INT fp_save_offset;
+  HOST_WIDE_INT acc_save_offset;
+  HOST_WIDE_INT cop0_save_offset;
+
+  /* Likewise, but giving offsets from the bottom of the frame.  */
+  HOST_WIDE_INT gp_sp_offset;
+  HOST_WIDE_INT fp_sp_offset;
+  HOST_WIDE_INT acc_sp_offset;
+  HOST_WIDE_INT cop0_sp_offset;
+
+  /* Similar, but the value passed to _mcount.  */
+  HOST_WIDE_INT ra_fp_offset;
+
+  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
+  HOST_WIDE_INT arg_pointer_offset;
+
+  /* The offset of hard_frame_pointer_rtx from the bottom of the frame.  */
+  HOST_WIDE_INT hard_frame_pointer_offset;
+};
+
+struct GTY(())  machine_function {
+  /* The next floating-point condition-code register to allocate
+     for ISA_HAS_8CC targets, relative to ST_REG_FIRST.  */
+  unsigned int next_fcc;
+
+  /* The register returned by mips16_gp_pseudo_reg; see there for details.  */
+  rtx mips16_gp_pseudo_rtx;
+
+  /* The number of extra stack bytes taken up by register varargs.
+     This area is allocated by the callee at the very top of the frame.  */
+  int varargs_size;
+
+  /* The current frame information, calculated by mips_compute_frame_info.  */
+  struct mips_frame_info frame;
+
+  /* The register to use as the function's global pointer, or INVALID_REGNUM
+     if the function doesn't need one.  */
+  unsigned int global_pointer;
+
+  /* How many instructions it takes to load a label into $AT, or 0 if
+     this property hasn't yet been calculated.  */
+  unsigned int load_label_num_insns;
+
+  /* True if mips_adjust_insn_length should ignore an instruction's
+     hazard attribute.  */
+  bool ignore_hazard_length_p;
+
+  /* True if the whole function is suitable for .set noreorder and
+     .set nomacro.  */
+  bool all_noreorder_p;
+
+  /* True if the function has "inflexible" and "flexible" references
+     to the global pointer.  See mips_cfun_has_inflexible_gp_ref_p
+     and mips_cfun_has_flexible_gp_ref_p for details.  */
+  bool has_inflexible_gp_insn_p;
+  bool has_flexible_gp_insn_p;
+
+  /* True if the function's prologue must load the global pointer
+     value into pic_offset_table_rtx and store the same value in
+     the function's cprestore slot (if any).  Even if this value
+     is currently false, we may decide to set it to true later;
+     see mips_must_initialize_gp_p () for details.  */
+  bool must_initialize_gp_p;
+
+  /* True if the current function must restore $gp after any potential
+     clobber.  This value is only meaningful during the first post-epilogue
+     split_insns pass; see mips_must_initialize_gp_p () for details.  */
+  bool must_restore_gp_when_clobbered_p;
+
+  /* True if this is an interrupt handler.  */
+  bool interrupt_handler_p;
+
+  /* True if this is an interrupt handler that uses shadow registers.  */
+  bool use_shadow_register_set_p;
+
+  /* True if this is an interrupt handler that should keep interrupts
+     masked.  */
+  bool keep_interrupts_masked_p;
+
+  /* True if this is an interrupt handler that should use DERET
+     instead of ERET.  */
+  bool use_debug_exception_return_p;
+};
+
+/* Information about a single argument.  */
+struct mips_arg_info {
+  /* True if the argument is passed in a floating-point register, or
+     would have been if we hadn't run out of registers.  */
+  bool fpr_p;
+
+  /* The number of words passed in registers, rounded up.  */
+  unsigned int reg_words;
+
+  /* For EABI, the offset of the first register from GP_ARG_FIRST or
+     FP_ARG_FIRST.  For other ABIs, the offset of the first register from
+     the start of the ABI's argument structure (see the CUMULATIVE_ARGS
+     comment for details).
+
+     The value is MAX_ARGS_IN_REGISTERS if the argument is passed entirely
+     on the stack.  */
+  unsigned int reg_offset;
+
+  /* The number of words that must be passed on the stack, rounded up.  */
+  unsigned int stack_words;
+
+  /* The offset from the start of the stack overflow area of the argument's
+     first stack word.  Only meaningful when STACK_WORDS is nonzero.  */
+  unsigned int stack_offset;
+};
+
+/* Information about an address described by mips_address_type.
+
+   ADDRESS_CONST_INT
+       No fields are used.
+
+   ADDRESS_REG
+       REG is the base register and OFFSET is the constant offset.
+
+   ADDRESS_LO_SUM
+       REG and OFFSET are the operands to the LO_SUM and SYMBOL_TYPE
+       is the type of symbol it references.
+
+   ADDRESS_SYMBOLIC
+       SYMBOL_TYPE is the type of symbol that the address references.  */
+struct mips_address_info {
+  enum mips_address_type type;
+  rtx reg;
+  rtx offset;
+  enum mips_symbol_type symbol_type;
+};
+
+/* One stage in a constant building sequence.  These sequences have
+   the form:
+
+	A = VALUE[0]
+	A = A CODE[1] VALUE[1]
+	A = A CODE[2] VALUE[2]
+	...
+
+   where A is an accumulator, each CODE[i] is a binary rtl operation
+   and each VALUE[i] is a constant integer.  CODE[0] is undefined.  */
+struct mips_integer_op {
+  enum rtx_code code;
+  unsigned HOST_WIDE_INT value;
+};
+
+/* The largest number of operations needed to load an integer constant.
+   The worst accepted case for 64-bit constants is LUI,ORI,SLL,ORI,SLL,ORI.
+   When the lowest bit is clear, we can try, but reject a sequence with
+   an extra SLL at the end.  */
+#define MIPS_MAX_INTEGER_OPS 7
+
+/* Information about a MIPS16e SAVE or RESTORE instruction.  */
+struct mips16e_save_restore_info {
+  /* The number of argument registers saved by a SAVE instruction.
+     0 for RESTORE instructions.  */
+  unsigned int nargs;
+
+  /* Bit X is set if the instruction saves or restores GPR X.  */
+  unsigned int mask;
+
+  /* The total number of bytes to allocate.  */
+  HOST_WIDE_INT size;
+};
+
+/* Costs of various operations on the different architectures.  */
+
+struct mips_rtx_cost_data
+{
+  unsigned short fp_add;
+  unsigned short fp_mult_sf;
+  unsigned short fp_mult_df;
+  unsigned short fp_div_sf;
+  unsigned short fp_div_df;
+  unsigned short int_mult_si;
+  unsigned short int_mult_di;
+  unsigned short int_div_si;
+  unsigned short int_div_di;
+  unsigned short branch_cost;
+  unsigned short memory_latency;
+};
+
+/* Global variables for machine-dependent things.  */
+
+/* The -G setting, or the configuration's default small-data limit if
+   no -G option is given.  */
+static unsigned int mips_small_data_threshold;
+
+/* The number of file directives written by mips_output_filename.  */
+int num_source_filenames;
+
+/* The name that appeared in the last .file directive written by
+   mips_output_filename, or "" if mips_output_filename hasn't
+   written anything yet.  */
+const char *current_function_file = "";
+
+/* Arrays that map GCC register numbers to debugger register numbers.  */
+int mips_dbx_regno[FIRST_PSEUDO_REGISTER];
+int mips_dwarf_regno[FIRST_PSEUDO_REGISTER];
+
+/* Information about the current function's epilogue, used only while
+   expanding it.  */
+static struct {
+  /* A list of queued REG_CFA_RESTORE notes.  */
+  rtx cfa_restores;
+
+  /* The CFA is currently defined as CFA_REG + CFA_OFFSET.  */
+  rtx cfa_reg;
+  HOST_WIDE_INT cfa_offset;
+
+  /* The offset of the CFA from the stack pointer while restoring
+     registers.  */
+  HOST_WIDE_INT cfa_restore_sp_offset;
+} mips_epilogue;
+
+/* The nesting depth of the PRINT_OPERAND '%(', '%<' and '%[' constructs.  */
+struct mips_asm_switch mips_noreorder = { "reorder", 0 };
+struct mips_asm_switch mips_nomacro = { "macro", 0 };
+struct mips_asm_switch mips_noat = { "at", 0 };
+
+/* True if we're writing out a branch-likely instruction rather than a
+   normal branch.  */
+static bool mips_branch_likely;
+
+/* The current instruction-set architecture.  */
+enum processor mips_arch;
+const struct mips_cpu_info *mips_arch_info;
+
+/* The processor that we should tune the code for.  */
+enum processor mips_tune;
+const struct mips_cpu_info *mips_tune_info;
+
+/* The ISA level associated with mips_arch.  */
+int mips_isa;
+
+/* The architecture selected by -mipsN, or null if -mipsN wasn't used.  */
+static const struct mips_cpu_info *mips_isa_option_info;
+
+/* Which cost information to use.  */
+static const struct mips_rtx_cost_data *mips_cost;
+
+/* The ambient target flags, excluding MASK_MIPS16.  */
+static int mips_base_target_flags;
+
+/* The default compression mode.  */
+unsigned int mips_base_compression_flags;
+
+/* The ambient values of other global variables.  */
+static int mips_base_schedule_insns; /* flag_schedule_insns */
+static int mips_base_reorder_blocks_and_partition; /* flag_reorder... */
+static int mips_base_move_loop_invariants; /* flag_move_loop_invariants */
+static int mips_base_align_loops; /* align_loops */
+static int mips_base_align_jumps; /* align_jumps */
+static int mips_base_align_functions; /* align_functions */
+
+/* Index [M][R] is true if register R is allowed to hold a value of mode M.  */
+bool mips_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
+
+/* Index C is true if character C is a valid PRINT_OPERAND punctation
+   character.  */
+static bool mips_print_operand_punct[256];
+
+static GTY (()) int mips_output_filename_first_time = 1;
+
+/* mips_split_p[X] is true if symbols of type X can be split by
+   mips_split_symbol.  */
+bool mips_split_p[NUM_SYMBOL_TYPES];
+
+/* mips_split_hi_p[X] is true if the high parts of symbols of type X
+   can be split by mips_split_symbol.  */
+bool mips_split_hi_p[NUM_SYMBOL_TYPES];
+
+/* mips_use_pcrel_pool_p[X] is true if symbols of type X should be
+   forced into a PC-relative constant pool.  */
+bool mips_use_pcrel_pool_p[NUM_SYMBOL_TYPES];
+
+/* mips_lo_relocs[X] is the relocation to use when a symbol of type X
+   appears in a LO_SUM.  It can be null if such LO_SUMs aren't valid or
+   if they are matched by a special .md file pattern.  */
+const char *mips_lo_relocs[NUM_SYMBOL_TYPES];
+
+/* Likewise for HIGHs.  */
+const char *mips_hi_relocs[NUM_SYMBOL_TYPES];
+
+/* Target state for MIPS16.  */
+struct target_globals *mips16_globals;
+
+/* Cached value of can_issue_more. This is cached in mips_variable_issue hook
+   and returned from mips_sched_reorder2.  */
+static int cached_can_issue_more;
+
+/* The stubs for various MIPS16 support functions, if used.   */
+static mips_one_only_stub *mips16_rdhwr_stub;
+static mips_one_only_stub *mips16_get_fcsr_stub;
+static mips_one_only_stub *mips16_set_fcsr_stub;
+
+/* Index R is the smallest register class that contains register R.  */
+const enum reg_class mips_regno_to_class[FIRST_PSEUDO_REGISTER] = {
+  LEA_REGS,	LEA_REGS,	M16_REGS,	V1_REG,
+  M16_REGS,	M16_REGS,	M16_REGS,	M16_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  M16_REGS,	M16_REGS,	LEA_REGS,	LEA_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  T_REG,	PIC_FN_ADDR_REG, LEA_REGS,	LEA_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  MD0_REG,	MD1_REG,	NO_REGS,	ST_REGS,
+  ST_REGS,	ST_REGS,	ST_REGS,	ST_REGS,
+  ST_REGS,	ST_REGS,	ST_REGS,	NO_REGS,
+  NO_REGS,	FRAME_REGS,	FRAME_REGS,	NO_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  DSP_ACC_REGS,	DSP_ACC_REGS,	DSP_ACC_REGS,	DSP_ACC_REGS,
+  DSP_ACC_REGS,	DSP_ACC_REGS,	ALL_REGS,	ALL_REGS,
+  ALL_REGS,	ALL_REGS,	ALL_REGS,	ALL_REGS
+};
+
+/* The value of TARGET_ATTRIBUTE_TABLE.  */
+static const struct attribute_spec mips_attribute_table[] = {
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       om_diagnostic } */
+  { "long_call",   0, 0, false, true,  true,  NULL, false },
+  { "far",     	   0, 0, false, true,  true,  NULL, false },
+  { "near",        0, 0, false, true,  true,  NULL, false },
+  /* We would really like to treat "mips16" and "nomips16" as type
+     attributes, but GCC doesn't provide the hooks we need to support
+     the right conversion rules.  As declaration attributes, they affect
+     code generation but don't carry other semantics.  */
+  { "mips16", 	   0, 0, true,  false, false, NULL, false },
+  { "nomips16",    0, 0, true,  false, false, NULL, false },
+  { "micromips",   0, 0, true,  false, false, NULL, false },
+  { "nomicromips", 0, 0, true,  false, false, NULL, false },
+  { "nocompression", 0, 0, true,  false, false, NULL, false },
+  /* Allow functions to be specified as interrupt handlers */
+  { "interrupt",   0, 0, false, true,  true, NULL, false },
+  { "use_shadow_register_set",	0, 0, false, true,  true, NULL, false },
+  { "keep_interrupts_masked",	0, 0, false, true,  true, NULL, false },
+  { "use_debug_exception_return", 0, 0, false, true,  true, NULL, false },
+  { NULL,	   0, 0, false, false, false, NULL, false }
+};
+
+/* A table describing all the processors GCC knows about; see
+   mips-cpus.def for details.  */
+static const struct mips_cpu_info mips_cpu_info_table[] = {
+#define MIPS_CPU(NAME, CPU, ISA, FLAGS) \
+  { NAME, CPU, ISA, FLAGS },
+#include "mips-cpus.def"
+#undef MIPS_CPU
+};
+
+/* Default costs.  If these are used for a processor we should look
+   up the actual costs.  */
+#define DEFAULT_COSTS COSTS_N_INSNS (6),  /* fp_add */       \
+                      COSTS_N_INSNS (7),  /* fp_mult_sf */   \
+                      COSTS_N_INSNS (8),  /* fp_mult_df */   \
+                      COSTS_N_INSNS (23), /* fp_div_sf */    \
+                      COSTS_N_INSNS (36), /* fp_div_df */    \
+                      COSTS_N_INSNS (10), /* int_mult_si */  \
+                      COSTS_N_INSNS (10), /* int_mult_di */  \
+                      COSTS_N_INSNS (69), /* int_div_si */   \
+                      COSTS_N_INSNS (69), /* int_div_di */   \
+                                       2, /* branch_cost */  \
+                                       4  /* memory_latency */
+
+/* Floating-point costs for processors without an FPU.  Just assume that
+   all floating-point libcalls are very expensive.  */
+#define SOFT_FP_COSTS COSTS_N_INSNS (256), /* fp_add */       \
+                      COSTS_N_INSNS (256), /* fp_mult_sf */   \
+                      COSTS_N_INSNS (256), /* fp_mult_df */   \
+                      COSTS_N_INSNS (256), /* fp_div_sf */    \
+                      COSTS_N_INSNS (256)  /* fp_div_df */
+
+/* Costs to use when optimizing for size.  */
+static const struct mips_rtx_cost_data mips_rtx_cost_optimize_size = {
+  COSTS_N_INSNS (1),            /* fp_add */
+  COSTS_N_INSNS (1),            /* fp_mult_sf */
+  COSTS_N_INSNS (1),            /* fp_mult_df */
+  COSTS_N_INSNS (1),            /* fp_div_sf */
+  COSTS_N_INSNS (1),            /* fp_div_df */
+  COSTS_N_INSNS (1),            /* int_mult_si */
+  COSTS_N_INSNS (1),            /* int_mult_di */
+  COSTS_N_INSNS (1),            /* int_div_si */
+  COSTS_N_INSNS (1),            /* int_div_di */
+		   2,           /* branch_cost */
+		   4            /* memory_latency */
+};
+
+/* Costs to use when optimizing for speed, indexed by processor.  */
+static const struct mips_rtx_cost_data
+  mips_rtx_cost_data[NUM_PROCESSOR_VALUES] = {
+  { /* R3000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (12),           /* int_mult_si */
+    COSTS_N_INSNS (12),           /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (35),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 4KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (6),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (36),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 4KP */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (36),           /* int_mult_si */
+    COSTS_N_INSNS (36),           /* int_mult_di */
+    COSTS_N_INSNS (37),           /* int_div_si */
+    COSTS_N_INSNS (37),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 5KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (11),           /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 5KF */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (11),           /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 20KC */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (7),            /* int_mult_di */
+    COSTS_N_INSNS (42),           /* int_div_si */
+    COSTS_N_INSNS (72),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 24KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 24KF2_1 */
+    COSTS_N_INSNS (8),            /* fp_add */
+    COSTS_N_INSNS (8),            /* fp_mult_sf */
+    COSTS_N_INSNS (10),           /* fp_mult_df */
+    COSTS_N_INSNS (34),           /* fp_div_sf */
+    COSTS_N_INSNS (64),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 24KF1_1 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KF2_1 */
+    COSTS_N_INSNS (8),            /* fp_add */
+    COSTS_N_INSNS (8),            /* fp_mult_sf */
+    COSTS_N_INSNS (10),           /* fp_mult_df */
+    COSTS_N_INSNS (34),           /* fp_div_sf */
+    COSTS_N_INSNS (64),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KF1_1 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KF3_2 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (6),            /* fp_mult_sf */
+    COSTS_N_INSNS (7),            /* fp_mult_df */
+    COSTS_N_INSNS (25),           /* fp_div_sf */
+    COSTS_N_INSNS (48),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* Loongson-2E */
+    DEFAULT_COSTS
+  },
+  { /* Loongson-2F */
+    DEFAULT_COSTS
+  },
+  { /* Loongson-3A */
+    DEFAULT_COSTS
+  },
+  { /* M4k */
+    DEFAULT_COSTS
+  },
+    /* Octeon */
+  {
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (72),           /* int_div_si */
+    COSTS_N_INSNS (72),           /* int_div_di */
+                     1,		  /* branch_cost */
+                     4		  /* memory_latency */
+  },
+    /* Octeon II */
+  {
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (6),            /* int_mult_di */
+    COSTS_N_INSNS (18),           /* int_div_si */
+    COSTS_N_INSNS (35),           /* int_div_di */
+                     4,		  /* branch_cost */
+                     4		  /* memory_latency */
+  },
+  { /* R3900 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (2),            /* int_mult_si */
+    COSTS_N_INSNS (2),            /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (35),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R6000 */
+    COSTS_N_INSNS (3),            /* fp_add */
+    COSTS_N_INSNS (5),            /* fp_mult_sf */
+    COSTS_N_INSNS (6),            /* fp_mult_df */
+    COSTS_N_INSNS (15),           /* fp_div_sf */
+    COSTS_N_INSNS (16),           /* fp_div_df */
+    COSTS_N_INSNS (17),           /* int_mult_si */
+    COSTS_N_INSNS (17),           /* int_mult_di */
+    COSTS_N_INSNS (38),           /* int_div_si */
+    COSTS_N_INSNS (38),           /* int_div_di */
+		     2,           /* branch_cost */
+		     6            /* memory_latency */
+  },
+  { /* R4000 */
+     COSTS_N_INSNS (6),           /* fp_add */
+     COSTS_N_INSNS (7),           /* fp_mult_sf */
+     COSTS_N_INSNS (8),           /* fp_mult_df */
+     COSTS_N_INSNS (23),          /* fp_div_sf */
+     COSTS_N_INSNS (36),          /* fp_div_df */
+     COSTS_N_INSNS (10),          /* int_mult_si */
+     COSTS_N_INSNS (10),          /* int_mult_di */
+     COSTS_N_INSNS (69),          /* int_div_si */
+     COSTS_N_INSNS (69),          /* int_div_di */
+		      2,          /* branch_cost */
+		      6           /* memory_latency */
+  },
+  { /* R4100 */
+    DEFAULT_COSTS
+  },
+  { /* R4111 */
+    DEFAULT_COSTS
+  },
+  { /* R4120 */
+    DEFAULT_COSTS
+  },
+  { /* R4130 */
+    /* The only costs that appear to be updated here are
+       integer multiplication.  */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (6),            /* int_mult_di */
+    COSTS_N_INSNS (69),           /* int_div_si */
+    COSTS_N_INSNS (69),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R4300 */
+    DEFAULT_COSTS
+  },
+  { /* R4600 */
+    DEFAULT_COSTS
+  },
+  { /* R4650 */
+    DEFAULT_COSTS
+  },
+  { /* R4700 */
+    DEFAULT_COSTS
+  },
+  { /* R5000 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (23),           /* fp_div_sf */
+    COSTS_N_INSNS (36),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (36),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R5400 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (5),            /* fp_mult_sf */
+    COSTS_N_INSNS (6),            /* fp_mult_df */
+    COSTS_N_INSNS (30),           /* fp_div_sf */
+    COSTS_N_INSNS (59),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (4),            /* int_mult_di */
+    COSTS_N_INSNS (42),           /* int_div_si */
+    COSTS_N_INSNS (74),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R5500 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (5),            /* fp_mult_sf */
+    COSTS_N_INSNS (6),            /* fp_mult_df */
+    COSTS_N_INSNS (30),           /* fp_div_sf */
+    COSTS_N_INSNS (59),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (9),            /* int_mult_di */
+    COSTS_N_INSNS (42),           /* int_div_si */
+    COSTS_N_INSNS (74),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R5900 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (256),          /* fp_mult_df */
+    COSTS_N_INSNS (8),            /* fp_div_sf */
+    COSTS_N_INSNS (256),          /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (256),          /* int_mult_di */
+    COSTS_N_INSNS (37),           /* int_div_si */
+    COSTS_N_INSNS (256),          /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R7000 */
+    /* The only costs that are changed here are
+       integer multiplication.  */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (7),            /* fp_mult_sf */
+    COSTS_N_INSNS (8),            /* fp_mult_df */
+    COSTS_N_INSNS (23),           /* fp_div_sf */
+    COSTS_N_INSNS (36),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (9),            /* int_mult_di */
+    COSTS_N_INSNS (69),           /* int_div_si */
+    COSTS_N_INSNS (69),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R8000 */
+    DEFAULT_COSTS
+  },
+  { /* R9000 */
+    /* The only costs that are changed here are
+       integer multiplication.  */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (7),            /* fp_mult_sf */
+    COSTS_N_INSNS (8),            /* fp_mult_df */
+    COSTS_N_INSNS (23),           /* fp_div_sf */
+    COSTS_N_INSNS (36),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (8),            /* int_mult_di */
+    COSTS_N_INSNS (69),           /* int_div_si */
+    COSTS_N_INSNS (69),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R1x000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (2),            /* fp_mult_sf */
+    COSTS_N_INSNS (2),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (9),            /* int_mult_di */
+    COSTS_N_INSNS (34),           /* int_div_si */
+    COSTS_N_INSNS (66),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* SB1 */
+    /* These costs are the same as the SB-1A below.  */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (4),            /* fp_mult_df */
+    COSTS_N_INSNS (24),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (4),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* SB1-A */
+    /* These costs are the same as the SB-1 above.  */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (4),            /* fp_mult_df */
+    COSTS_N_INSNS (24),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (4),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* SR71000 */
+    DEFAULT_COSTS
+  },
+  { /* XLR */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (8),            /* int_mult_si */
+    COSTS_N_INSNS (8),            /* int_mult_di */
+    COSTS_N_INSNS (72),           /* int_div_si */
+    COSTS_N_INSNS (72),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* XLP */
+    /* These costs are the same as 5KF above.  */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (11),           /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  }
+};
+
+static rtx mips_find_pic_call_symbol (rtx, rtx, bool);
+static int mips_register_move_cost (enum machine_mode, reg_class_t,
+				    reg_class_t);
+static unsigned int mips_function_arg_boundary (enum machine_mode, const_tree);
+
+/* This hash table keeps track of implicit "mips16" and "nomips16" attributes
+   for -mflip_mips16.  It maps decl names onto a boolean mode setting.  */
+struct GTY (())  mflip_mips16_entry {
+  const char *name;
+  bool mips16_p;
+};
+static GTY ((param_is (struct mflip_mips16_entry))) htab_t mflip_mips16_htab;
+
+/* Hash table callbacks for mflip_mips16_htab.  */
+
+static hashval_t
+mflip_mips16_htab_hash (const void *entry)
+{
+  return htab_hash_string (((const struct mflip_mips16_entry *) entry)->name);
+}
+
+static int
+mflip_mips16_htab_eq (const void *entry, const void *name)
+{
+  return strcmp (((const struct mflip_mips16_entry *) entry)->name,
+		 (const char *) name) == 0;
+}
+
+/* True if -mflip-mips16 should next add an attribute for the default MIPS16
+   mode, false if it should next add an attribute for the opposite mode.  */
+static GTY(()) bool mips16_flipper;
+
+/* DECL is a function that needs a default "mips16" or "nomips16" attribute
+   for -mflip-mips16.  Return true if it should use "mips16" and false if
+   it should use "nomips16".  */
+
+static bool
+mflip_mips16_use_mips16_p (tree decl)
+{
+  struct mflip_mips16_entry *entry;
+  const char *name;
+  hashval_t hash;
+  void **slot;
+  bool base_is_mips16 = (mips_base_compression_flags & MASK_MIPS16) != 0;
+
+  /* Use the opposite of the command-line setting for anonymous decls.  */
+  if (!DECL_NAME (decl))
+    return !base_is_mips16;
+
+  if (!mflip_mips16_htab)
+    mflip_mips16_htab = htab_create_ggc (37, mflip_mips16_htab_hash,
+					 mflip_mips16_htab_eq, NULL);
+
+  name = IDENTIFIER_POINTER (DECL_NAME (decl));
+  hash = htab_hash_string (name);
+  slot = htab_find_slot_with_hash (mflip_mips16_htab, name, hash, INSERT);
+  entry = (struct mflip_mips16_entry *) *slot;
+  if (!entry)
+    {
+      mips16_flipper = !mips16_flipper;
+      entry = ggc_alloc_mflip_mips16_entry ();
+      entry->name = name;
+      entry->mips16_p = mips16_flipper ? !base_is_mips16 : base_is_mips16;
+      *slot = entry;
+    }
+  return entry->mips16_p;
+}
+
+/* Predicates to test for presence of "near" and "far"/"long_call"
+   attributes on the given TYPE.  */
+
+static bool
+mips_near_type_p (const_tree type)
+{
+  return lookup_attribute ("near", TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+static bool
+mips_far_type_p (const_tree type)
+{
+  return (lookup_attribute ("long_call", TYPE_ATTRIBUTES (type)) != NULL
+	  || lookup_attribute ("far", TYPE_ATTRIBUTES (type)) != NULL);
+}
+
+
+/* Check if the interrupt attribute is set for a function.  */
+
+static bool
+mips_interrupt_type_p (tree type)
+{
+  return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to use shadow register set is set for a function.  */
+
+static bool
+mips_use_shadow_register_set_p (tree type)
+{
+  return lookup_attribute ("use_shadow_register_set",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to keep interrupts masked is set for a function.  */
+
+static bool
+mips_keep_interrupts_masked_p (tree type)
+{
+  return lookup_attribute ("keep_interrupts_masked",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to use debug exception return is set for
+   a function.  */
+
+static bool
+mips_use_debug_exception_return_p (tree type)
+{
+  return lookup_attribute ("use_debug_exception_return",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Return the set of compression modes that are explicitly required
+   by the attributes in ATTRIBUTES.  */
+
+static unsigned int
+mips_get_compress_on_flags (tree attributes)
+{
+  unsigned int flags = 0;
+
+  if (lookup_attribute ("mips16", attributes) != NULL)
+    flags |= MASK_MIPS16;
+
+  if (lookup_attribute ("micromips", attributes) != NULL)
+    flags |= MASK_MICROMIPS;
+
+  return flags;
+}
+
+/* Return the set of compression modes that are explicitly forbidden
+   by the attributes in ATTRIBUTES.  */
+
+static unsigned int
+mips_get_compress_off_flags (tree attributes)
+{
+  unsigned int flags = 0;
+
+  if (lookup_attribute ("nocompression", attributes) != NULL)
+    flags |= MASK_MIPS16 | MASK_MICROMIPS;
+
+  if (lookup_attribute ("nomips16", attributes) != NULL)
+    flags |= MASK_MIPS16;
+
+  if (lookup_attribute ("nomicromips", attributes) != NULL)
+    flags |= MASK_MICROMIPS;
+
+  return flags;
+}
+
+/* Return the compression mode that should be used for function DECL.
+   Return the ambient setting if DECL is null.  */
+
+static unsigned int
+mips_get_compress_mode (tree decl)
+{
+  unsigned int flags, force_on;
+
+  flags = mips_base_compression_flags;
+  if (decl)
+    {
+      /* Nested functions must use the same frame pointer as their
+	 parent and must therefore use the same ISA mode.  */
+      tree parent = decl_function_context (decl);
+      if (parent)
+	decl = parent;
+      force_on = mips_get_compress_on_flags (DECL_ATTRIBUTES (decl));
+      if (force_on)
+	return force_on;
+      flags &= ~mips_get_compress_off_flags (DECL_ATTRIBUTES (decl));
+    }
+  return flags;
+}
+
+/* Return the attribute name associated with MASK_MIPS16 and MASK_MICROMIPS
+   flags FLAGS.  */
+
+static const char *
+mips_get_compress_on_name (unsigned int flags)
+{
+  if (flags == MASK_MIPS16)
+    return "mips16";
+  return "micromips";
+}
+
+/* Return the attribute name that forbids MASK_MIPS16 and MASK_MICROMIPS
+   flags FLAGS.  */
+
+static const char *
+mips_get_compress_off_name (unsigned int flags)
+{
+  if (flags == MASK_MIPS16)
+    return "nomips16";
+  if (flags == MASK_MICROMIPS)
+    return "nomicromips";
+  return "nocompression";
+}
+
+/* Implement TARGET_COMP_TYPE_ATTRIBUTES.  */
+
+static int
+mips_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  /* Disallow mixed near/far attributes.  */
+  if (mips_far_type_p (type1) && mips_near_type_p (type2))
+    return 0;
+  if (mips_near_type_p (type1) && mips_far_type_p (type2))
+    return 0;
+  return 1;
+}
+
+/* Implement TARGET_INSERT_ATTRIBUTES.  */
+
+static void
+mips_insert_attributes (tree decl, tree *attributes)
+{
+  const char *name;
+  unsigned int compression_flags, nocompression_flags;
+
+  /* Check for "mips16" and "nomips16" attributes.  */
+  compression_flags = mips_get_compress_on_flags (*attributes);
+  nocompression_flags = mips_get_compress_off_flags (*attributes);
+
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    {
+      if (nocompression_flags)
+	error ("%qs attribute only applies to functions",
+	       mips_get_compress_off_name (nocompression_flags));
+
+      if (compression_flags)
+	error ("%qs attribute only applies to functions",
+	       mips_get_compress_on_name (nocompression_flags));
+    }
+  else
+    {
+      compression_flags |= mips_get_compress_on_flags (DECL_ATTRIBUTES (decl));
+      nocompression_flags |=
+	mips_get_compress_off_flags (DECL_ATTRIBUTES (decl));
+
+      if (compression_flags && nocompression_flags)
+	error ("%qE cannot have both %qs and %qs attributes",
+	       DECL_NAME (decl), mips_get_compress_on_name (compression_flags),
+	       mips_get_compress_off_name (nocompression_flags));
+
+      if (compression_flags & MASK_MIPS16
+          && compression_flags & MASK_MICROMIPS)
+	error ("%qE cannot have both %qs and %qs attributes",
+	       DECL_NAME (decl), "mips16", "micromips");
+
+      if (TARGET_FLIP_MIPS16
+	  && !DECL_ARTIFICIAL (decl)
+	  && compression_flags == 0
+	  && nocompression_flags == 0)
+	{
+	  /* Implement -mflip-mips16.  If DECL has neither a "nomips16" nor a
+	     "mips16" attribute, arbitrarily pick one.  We must pick the same
+	     setting for duplicate declarations of a function.  */
+	  name = mflip_mips16_use_mips16_p (decl) ? "mips16" : "nomips16";
+	  *attributes = tree_cons (get_identifier (name), NULL, *attributes);
+	  name = "nomicromips";
+	  *attributes = tree_cons (get_identifier (name), NULL, *attributes);
+	}
+    }
+}
+
+/* Implement TARGET_MERGE_DECL_ATTRIBUTES.  */
+
+static tree
+mips_merge_decl_attributes (tree olddecl, tree newdecl)
+{
+  unsigned int diff;
+
+  diff = (mips_get_compress_on_flags (DECL_ATTRIBUTES (olddecl))
+	  ^ mips_get_compress_on_flags (DECL_ATTRIBUTES (newdecl)));
+  if (diff)
+    error ("%qE redeclared with conflicting %qs attributes",
+	   DECL_NAME (newdecl), mips_get_compress_on_name (diff));
+
+  diff = (mips_get_compress_off_flags (DECL_ATTRIBUTES (olddecl))
+	  ^ mips_get_compress_off_flags (DECL_ATTRIBUTES (newdecl)));
+  if (diff)
+    error ("%qE redeclared with conflicting %qs attributes",
+	   DECL_NAME (newdecl), mips_get_compress_off_name (diff));
+
+  return merge_attributes (DECL_ATTRIBUTES (olddecl),
+			   DECL_ATTRIBUTES (newdecl));
+}
+
+/* Implement TARGET_CAN_INLINE_P.  */
+
+static bool
+mips_can_inline_p (tree caller, tree callee)
+{
+  if (mips_get_compress_mode (callee) != mips_get_compress_mode (caller))
+    return false;
+  return default_target_can_inline_p (caller, callee);
+}
+
+/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
+   and *OFFSET_PTR.  Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise.  */
+
+static void
+mips_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
+{
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    {
+      *base_ptr = XEXP (x, 0);
+      *offset_ptr = INTVAL (XEXP (x, 1));
+    }
+  else
+    {
+      *base_ptr = x;
+      *offset_ptr = 0;
+    }
+}
+
+static unsigned int mips_build_integer (struct mips_integer_op *,
+					unsigned HOST_WIDE_INT);
+
+/* A subroutine of mips_build_integer, with the same interface.
+   Assume that the final action in the sequence should be a left shift.  */
+
+static unsigned int
+mips_build_shift (struct mips_integer_op *codes, HOST_WIDE_INT value)
+{
+  unsigned int i, shift;
+
+  /* Shift VALUE right until its lowest bit is set.  Shift arithmetically
+     since signed numbers are easier to load than unsigned ones.  */
+  shift = 0;
+  while ((value & 1) == 0)
+    value /= 2, shift++;
+
+  i = mips_build_integer (codes, value);
+  codes[i].code = ASHIFT;
+  codes[i].value = shift;
+  return i + 1;
+}
+
+/* As for mips_build_shift, but assume that the final action will be
+   an IOR or PLUS operation.  */
+
+static unsigned int
+mips_build_lower (struct mips_integer_op *codes, unsigned HOST_WIDE_INT value)
+{
+  unsigned HOST_WIDE_INT high;
+  unsigned int i;
+
+  high = value & ~(unsigned HOST_WIDE_INT) 0xffff;
+  if (!LUI_OPERAND (high) && (value & 0x18000) == 0x18000)
+    {
+      /* The constant is too complex to load with a simple LUI/ORI pair,
+	 so we want to give the recursive call as many trailing zeros as
+	 possible.  In this case, we know bit 16 is set and that the
+	 low 16 bits form a negative number.  If we subtract that number
+	 from VALUE, we will clear at least the lowest 17 bits, maybe more.  */
+      i = mips_build_integer (codes, CONST_HIGH_PART (value));
+      codes[i].code = PLUS;
+      codes[i].value = CONST_LOW_PART (value);
+    }
+  else
+    {
+      /* Either this is a simple LUI/ORI pair, or clearing the lowest 16
+	 bits gives a value with at least 17 trailing zeros.  */
+      i = mips_build_integer (codes, high);
+      codes[i].code = IOR;
+      codes[i].value = value & 0xffff;
+    }
+  return i + 1;
+}
+
+/* Fill CODES with a sequence of rtl operations to load VALUE.
+   Return the number of operations needed.  */
+
+static unsigned int
+mips_build_integer (struct mips_integer_op *codes,
+		    unsigned HOST_WIDE_INT value)
+{
+  if (SMALL_OPERAND (value)
+      || SMALL_OPERAND_UNSIGNED (value)
+      || LUI_OPERAND (value))
+    {
+      /* The value can be loaded with a single instruction.  */
+      codes[0].code = UNKNOWN;
+      codes[0].value = value;
+      return 1;
+    }
+  else if ((value & 1) != 0 || LUI_OPERAND (CONST_HIGH_PART (value)))
+    {
+      /* Either the constant is a simple LUI/ORI combination or its
+	 lowest bit is set.  We don't want to shift in this case.  */
+      return mips_build_lower (codes, value);
+    }
+  else if ((value & 0xffff) == 0)
+    {
+      /* The constant will need at least three actions.  The lowest
+	 16 bits are clear, so the final action will be a shift.  */
+      return mips_build_shift (codes, value);
+    }
+  else
+    {
+      /* The final action could be a shift, add or inclusive OR.
+	 Rather than use a complex condition to select the best
+	 approach, try both mips_build_shift and mips_build_lower
+	 and pick the one that gives the shortest sequence.
+	 Note that this case is only used once per constant.  */
+      struct mips_integer_op alt_codes[MIPS_MAX_INTEGER_OPS];
+      unsigned int cost, alt_cost;
+
+      cost = mips_build_shift (codes, value);
+      alt_cost = mips_build_lower (alt_codes, value);
+      if (alt_cost < cost)
+	{
+	  memcpy (codes, alt_codes, alt_cost * sizeof (codes[0]));
+	  cost = alt_cost;
+	}
+      return cost;
+    }
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+mips_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return mips_const_insns (x) > 0;
+}
+
+/* Return a SYMBOL_REF for a MIPS16 function called NAME.  */
+
+static rtx
+mips16_stub_function (const char *name)
+{
+  rtx x;
+
+  x = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+  SYMBOL_REF_FLAGS (x) |= (SYMBOL_FLAG_EXTERNAL | SYMBOL_FLAG_FUNCTION);
+  return x;
+}
+
+/* Return a legitimate call address for STUB, given that STUB is a MIPS16
+   support function.  */
+
+static rtx
+mips16_stub_call_address (mips_one_only_stub *stub)
+{
+  rtx fn = mips16_stub_function (stub->get_name ());
+  SYMBOL_REF_FLAGS (fn) |= SYMBOL_FLAG_LOCAL;
+  if (!call_insn_operand (fn, VOIDmode))
+    fn = force_reg (Pmode, fn);
+  return fn;
+}
+
+/* A stub for moving the thread pointer into TLS_GET_TP_REGNUM.  */
+
+class mips16_rdhwr_one_only_stub : public mips_one_only_stub
+{
+  virtual const char *get_name ();
+  virtual void output_body ();
+};
+
+const char *
+mips16_rdhwr_one_only_stub::get_name ()
+{
+  return "__mips16_rdhwr";
+}
+
+void
+mips16_rdhwr_one_only_stub::output_body ()
+{
+  fprintf (asm_out_file,
+	   "\t.set\tpush\n"
+	   "\t.set\tmips32r2\n"
+	   "\t.set\tnoreorder\n"
+	   "\trdhwr\t$3,$29\n"
+	   "\t.set\tpop\n"
+	   "\tj\t$31\n");
+}
+
+/* A stub for moving the FCSR into GET_FCSR_REGNUM.  */
+class mips16_get_fcsr_one_only_stub : public mips_one_only_stub
+{
+  virtual const char *get_name ();
+  virtual void output_body ();
+};
+
+const char *
+mips16_get_fcsr_one_only_stub::get_name ()
+{
+  return "__mips16_get_fcsr";
+}
+
+void
+mips16_get_fcsr_one_only_stub::output_body ()
+{
+  fprintf (asm_out_file,
+	   "\tcfc1\t%s,$31\n"
+	   "\tj\t$31\n", reg_names[GET_FCSR_REGNUM]);
+}
+
+/* A stub for moving SET_FCSR_REGNUM into the FCSR.  */
+class mips16_set_fcsr_one_only_stub : public mips_one_only_stub
+{
+  virtual const char *get_name ();
+  virtual void output_body ();
+};
+
+const char *
+mips16_set_fcsr_one_only_stub::get_name ()
+{
+  return "__mips16_set_fcsr";
+}
+
+void
+mips16_set_fcsr_one_only_stub::output_body ()
+{
+  fprintf (asm_out_file,
+	   "\tctc1\t%s,$31\n"
+	   "\tj\t$31\n", reg_names[SET_FCSR_REGNUM]);
+}
+
+/* Return true if symbols of type TYPE require a GOT access.  */
+
+static bool
+mips_got_symbol_type_p (enum mips_symbol_type type)
+{
+  switch (type)
+    {
+    case SYMBOL_GOT_PAGE_OFST:
+    case SYMBOL_GOT_DISP:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if X is a thread-local symbol.  */
+
+static bool
+mips_tls_symbol_p (rtx x)
+{
+  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Return true if SYMBOL_REF X is associated with a global symbol
+   (in the STB_GLOBAL sense).  */
+
+static bool
+mips_global_symbol_p (const_rtx x)
+{
+  const_tree decl = SYMBOL_REF_DECL (x);
+
+  if (!decl)
+    return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
+
+  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
+     or weak symbols.  Relocations in the object file will be against
+     the target symbol, so it's that symbol's binding that matters here.  */
+  return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
+}
+
+/* Return true if function X is a libgcc MIPS16 stub function.  */
+
+static bool
+mips16_stub_function_p (const_rtx x)
+{
+  return (GET_CODE (x) == SYMBOL_REF
+	  && strncmp (XSTR (x, 0), "__mips16_", 9) == 0);
+}
+
+/* Return true if function X is a locally-defined and locally-binding
+   MIPS16 function.  */
+
+static bool
+mips16_local_function_p (const_rtx x)
+{
+  return (GET_CODE (x) == SYMBOL_REF
+	  && SYMBOL_REF_LOCAL_P (x)
+	  && !SYMBOL_REF_EXTERNAL_P (x)
+	  && (mips_get_compress_mode (SYMBOL_REF_DECL (x)) & MASK_MIPS16));
+}
+
+/* Return true if SYMBOL_REF X binds locally.  */
+
+static bool
+mips_symbol_binds_local_p (const_rtx x)
+{
+  return (SYMBOL_REF_DECL (x)
+	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+	  : SYMBOL_REF_LOCAL_P (x));
+}
+
+/* Return true if rtx constants of mode MODE should be put into a small
+   data section.  */
+
+static bool
+mips_rtx_constant_in_small_data_p (enum machine_mode mode)
+{
+  return (!TARGET_EMBEDDED_DATA
+	  && TARGET_LOCAL_SDATA
+	  && GET_MODE_SIZE (mode) <= mips_small_data_threshold);
+}
+
+/* Return true if X should not be moved directly into register $25.
+   We need this because many versions of GAS will treat "la $25,foo" as
+   part of a call sequence and so allow a global "foo" to be lazily bound.  */
+
+bool
+mips_dangerous_for_la25_p (rtx x)
+{
+  return (!TARGET_EXPLICIT_RELOCS
+	  && TARGET_USE_GOT
+	  && GET_CODE (x) == SYMBOL_REF
+	  && mips_global_symbol_p (x));
+}
+
+/* Return true if calls to X might need $25 to be valid on entry.  */
+
+bool
+mips_use_pic_fn_addr_reg_p (const_rtx x)
+{
+  if (!TARGET_USE_PIC_FN_ADDR_REG)
+    return false;
+
+  /* MIPS16 stub functions are guaranteed not to use $25.  */
+  if (mips16_stub_function_p (x))
+    return false;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      /* If PLTs and copy relocations are available, the static linker
+	 will make sure that $25 is valid on entry to the target function.  */
+      if (TARGET_ABICALLS_PIC0)
+	return false;
+
+      /* Locally-defined functions use absolute accesses to set up
+	 the global pointer.  */
+      if (TARGET_ABSOLUTE_ABICALLS
+	  && mips_symbol_binds_local_p (x)
+	  && !SYMBOL_REF_EXTERNAL_P (x))
+	return false;
+    }
+
+  return true;
+}
+
+/* Return the method that should be used to access SYMBOL_REF or
+   LABEL_REF X in context CONTEXT.  */
+
+static enum mips_symbol_type
+mips_classify_symbol (const_rtx x, enum mips_symbol_context context)
+{
+  if (TARGET_RTP_PIC)
+    return SYMBOL_GOT_DISP;
+
+  if (GET_CODE (x) == LABEL_REF)
+    {
+      /* Only return SYMBOL_PC_RELATIVE if we are generating MIPS16
+	 code and if we know that the label is in the current function's
+	 text section.  LABEL_REFs are used for jump tables as well as
+	 text labels, so we must check whether jump tables live in the
+	 text section.  */
+      if (TARGET_MIPS16_SHORT_JUMP_TABLES
+	  && !LABEL_REF_NONLOCAL_P (x))
+	return SYMBOL_PC_RELATIVE;
+
+      if (TARGET_ABICALLS && !TARGET_ABSOLUTE_ABICALLS)
+	return SYMBOL_GOT_PAGE_OFST;
+
+      return SYMBOL_ABSOLUTE;
+    }
+
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (SYMBOL_REF_TLS_MODEL (x))
+    return SYMBOL_TLS;
+
+  if (CONSTANT_POOL_ADDRESS_P (x))
+    {
+      if (TARGET_MIPS16_TEXT_LOADS)
+	return SYMBOL_PC_RELATIVE;
+
+      if (TARGET_MIPS16_PCREL_LOADS && context == SYMBOL_CONTEXT_MEM)
+	return SYMBOL_PC_RELATIVE;
+
+      if (mips_rtx_constant_in_small_data_p (get_pool_mode (x)))
+	return SYMBOL_GP_RELATIVE;
+    }
+
+  /* Do not use small-data accesses for weak symbols; they may end up
+     being zero.  */
+  if (TARGET_GPOPT && SYMBOL_REF_SMALL_P (x) && !SYMBOL_REF_WEAK (x))
+    return SYMBOL_GP_RELATIVE;
+
+  /* Don't use GOT accesses for locally-binding symbols when -mno-shared
+     is in effect.  */
+  if (TARGET_ABICALLS_PIC2
+      && !(TARGET_ABSOLUTE_ABICALLS && mips_symbol_binds_local_p (x)))
+    {
+      /* There are three cases to consider:
+
+	    - o32 PIC (either with or without explicit relocs)
+	    - n32/n64 PIC without explicit relocs
+	    - n32/n64 PIC with explicit relocs
+
+	 In the first case, both local and global accesses will use an
+	 R_MIPS_GOT16 relocation.  We must correctly predict which of
+	 the two semantics (local or global) the assembler and linker
+	 will apply.  The choice depends on the symbol's binding rather
+	 than its visibility.
+
+	 In the second case, the assembler will not use R_MIPS_GOT16
+	 relocations, but it chooses between local and global accesses
+	 in the same way as for o32 PIC.
+
+	 In the third case we have more freedom since both forms of
+	 access will work for any kind of symbol.  However, there seems
+	 little point in doing things differently.  */
+      if (mips_global_symbol_p (x))
+	return SYMBOL_GOT_DISP;
+
+      return SYMBOL_GOT_PAGE_OFST;
+    }
+
+  return SYMBOL_ABSOLUTE;
+}
+
+/* Classify the base of symbolic expression X, given that X appears in
+   context CONTEXT.  */
+
+static enum mips_symbol_type
+mips_classify_symbolic_expression (rtx x, enum mips_symbol_context context)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  if (UNSPEC_ADDRESS_P (x))
+    return UNSPEC_ADDRESS_TYPE (x);
+
+  return mips_classify_symbol (x, context);
+}
+
+/* Return true if OFFSET is within the range [0, ALIGN), where ALIGN
+   is the alignment in bytes of SYMBOL_REF X.  */
+
+static bool
+mips_offset_within_alignment_p (rtx x, HOST_WIDE_INT offset)
+{
+  HOST_WIDE_INT align;
+
+  align = SYMBOL_REF_DECL (x) ? DECL_ALIGN_UNIT (SYMBOL_REF_DECL (x)) : 1;
+  return IN_RANGE (offset, 0, align - 1);
+}
+
+/* Return true if X is a symbolic constant that can be used in context
+   CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
+
+bool
+mips_symbolic_constant_p (rtx x, enum mips_symbol_context context,
+			  enum mips_symbol_type *symbol_type)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  if (UNSPEC_ADDRESS_P (x))
+    {
+      *symbol_type = UNSPEC_ADDRESS_TYPE (x);
+      x = UNSPEC_ADDRESS (x);
+    }
+  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    {
+      *symbol_type = mips_classify_symbol (x, context);
+      if (*symbol_type == SYMBOL_TLS)
+	return false;
+    }
+  else
+    return false;
+
+  if (offset == const0_rtx)
+    return true;
+
+  /* Check whether a nonzero offset is valid for the underlying
+     relocations.  */
+  switch (*symbol_type)
+    {
+    case SYMBOL_ABSOLUTE:
+    case SYMBOL_64_HIGH:
+    case SYMBOL_64_MID:
+    case SYMBOL_64_LOW:
+      /* If the target has 64-bit pointers and the object file only
+	 supports 32-bit symbols, the values of those symbols will be
+	 sign-extended.  In this case we can't allow an arbitrary offset
+	 in case the 32-bit value X + OFFSET has a different sign from X.  */
+      if (Pmode == DImode && !ABI_HAS_64BIT_SYMBOLS)
+	return offset_within_block_p (x, INTVAL (offset));
+
+      /* In other cases the relocations can handle any offset.  */
+      return true;
+
+    case SYMBOL_PC_RELATIVE:
+      /* Allow constant pool references to be converted to LABEL+CONSTANT.
+	 In this case, we no longer have access to the underlying constant,
+	 but the original symbol-based access was known to be valid.  */
+      if (GET_CODE (x) == LABEL_REF)
+	return true;
+
+      /* Fall through.  */
+
+    case SYMBOL_GP_RELATIVE:
+      /* Make sure that the offset refers to something within the
+	 same object block.  This should guarantee that the final
+	 PC- or GP-relative offset is within the 16-bit limit.  */
+      return offset_within_block_p (x, INTVAL (offset));
+
+    case SYMBOL_GOT_PAGE_OFST:
+    case SYMBOL_GOTOFF_PAGE:
+      /* If the symbol is global, the GOT entry will contain the symbol's
+	 address, and we will apply a 16-bit offset after loading it.
+	 If the symbol is local, the linker should provide enough local
+	 GOT entries for a 16-bit offset, but larger offsets may lead
+	 to GOT overflow.  */
+      return SMALL_INT (offset);
+
+    case SYMBOL_TPREL:
+    case SYMBOL_DTPREL:
+      /* There is no carry between the HI and LO REL relocations, so the
+	 offset is only valid if we know it won't lead to such a carry.  */
+      return mips_offset_within_alignment_p (x, INTVAL (offset));
+
+    case SYMBOL_GOT_DISP:
+    case SYMBOL_GOTOFF_DISP:
+    case SYMBOL_GOTOFF_CALL:
+    case SYMBOL_GOTOFF_LOADGP:
+    case SYMBOL_TLSGD:
+    case SYMBOL_TLSLDM:
+    case SYMBOL_GOTTPREL:
+    case SYMBOL_TLS:
+    case SYMBOL_HALF:
+      return false;
+    }
+  gcc_unreachable ();
+}
+
+/* Like mips_symbol_insns, but treat extended MIPS16 instructions as a
+   single instruction.  We rely on the fact that, in the worst case,
+   all instructions involved in a MIPS16 address calculation are usually
+   extended ones.  */
+
+static int
+mips_symbol_insns_1 (enum mips_symbol_type type, enum machine_mode mode)
+{
+  if (mips_use_pcrel_pool_p[(int) type])
+    {
+      if (mode == MAX_MACHINE_MODE)
+	/* LEAs will be converted into constant-pool references by
+	   mips_reorg.  */
+	type = SYMBOL_PC_RELATIVE;
+      else
+	/* The constant must be loaded and then dereferenced.  */
+	return 0;
+    }
+
+  switch (type)
+    {
+    case SYMBOL_ABSOLUTE:
+      /* When using 64-bit symbols, we need 5 preparatory instructions,
+	 such as:
+
+	     lui     $at,%highest(symbol)
+	     daddiu  $at,$at,%higher(symbol)
+	     dsll    $at,$at,16
+	     daddiu  $at,$at,%hi(symbol)
+	     dsll    $at,$at,16
+
+	 The final address is then $at + %lo(symbol).  With 32-bit
+	 symbols we just need a preparatory LUI for normal mode and
+	 a preparatory LI and SLL for MIPS16.  */
+      return ABI_HAS_64BIT_SYMBOLS ? 6 : TARGET_MIPS16 ? 3 : 2;
+
+    case SYMBOL_GP_RELATIVE:
+      /* Treat GP-relative accesses as taking a single instruction on
+	 MIPS16 too; the copy of $gp can often be shared.  */
+      return 1;
+
+    case SYMBOL_PC_RELATIVE:
+      /* PC-relative constants can be only be used with ADDIUPC,
+	 DADDIUPC, LWPC and LDPC.  */
+      if (mode == MAX_MACHINE_MODE
+	  || GET_MODE_SIZE (mode) == 4
+	  || GET_MODE_SIZE (mode) == 8)
+	return 1;
+
+      /* The constant must be loaded using ADDIUPC or DADDIUPC first.  */
+      return 0;
+
+    case SYMBOL_GOT_DISP:
+      /* The constant will have to be loaded from the GOT before it
+	 is used in an address.  */
+      if (mode != MAX_MACHINE_MODE)
+	return 0;
+
+      /* Fall through.  */
+
+    case SYMBOL_GOT_PAGE_OFST:
+      /* Unless -funit-at-a-time is in effect, we can't be sure whether the
+	 local/global classification is accurate.  The worst cases are:
+
+	 (1) For local symbols when generating o32 or o64 code.  The assembler
+	     will use:
+
+		 lw	      $at,%got(symbol)
+		 nop
+
+	     ...and the final address will be $at + %lo(symbol).
+
+	 (2) For global symbols when -mxgot.  The assembler will use:
+
+	         lui     $at,%got_hi(symbol)
+	         (d)addu $at,$at,$gp
+
+	     ...and the final address will be $at + %got_lo(symbol).  */
+      return 3;
+
+    case SYMBOL_GOTOFF_PAGE:
+    case SYMBOL_GOTOFF_DISP:
+    case SYMBOL_GOTOFF_CALL:
+    case SYMBOL_GOTOFF_LOADGP:
+    case SYMBOL_64_HIGH:
+    case SYMBOL_64_MID:
+    case SYMBOL_64_LOW:
+    case SYMBOL_TLSGD:
+    case SYMBOL_TLSLDM:
+    case SYMBOL_DTPREL:
+    case SYMBOL_GOTTPREL:
+    case SYMBOL_TPREL:
+    case SYMBOL_HALF:
+      /* A 16-bit constant formed by a single relocation, or a 32-bit
+	 constant formed from a high 16-bit relocation and a low 16-bit
+	 relocation.  Use mips_split_p to determine which.  32-bit
+	 constants need an "lui; addiu" sequence for normal mode and
+	 an "li; sll; addiu" sequence for MIPS16 mode.  */
+      return !mips_split_p[type] ? 1 : TARGET_MIPS16 ? 3 : 2;
+
+    case SYMBOL_TLS:
+      /* We don't treat a bare TLS symbol as a constant.  */
+      return 0;
+    }
+  gcc_unreachable ();
+}
+
+/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed
+   to load symbols of type TYPE into a register.  Return 0 if the given
+   type of symbol cannot be used as an immediate operand.
+
+   Otherwise, return the number of instructions needed to load or store
+   values of mode MODE to or from addresses of type TYPE.  Return 0 if
+   the given type of symbol is not valid in addresses.
+
+   In both cases, instruction counts are based off BASE_INSN_LENGTH.  */
+
+static int
+mips_symbol_insns (enum mips_symbol_type type, enum machine_mode mode)
+{
+  return mips_symbol_insns_1 (type, mode) * (TARGET_MIPS16 ? 2 : 1);
+}
+
+/* A for_each_rtx callback.  Stop the search if *X references a
+   thread-local symbol.  */
+
+static int
+mips_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return mips_tls_symbol_p (*x);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+mips_cannot_force_const_mem (enum machine_mode mode, rtx x)
+{
+  enum mips_symbol_type type;
+  rtx base, offset;
+
+  /* There is no assembler syntax for expressing an address-sized
+     high part.  */
+  if (GET_CODE (x) == HIGH)
+    return true;
+
+  /* As an optimization, reject constants that mips_legitimize_move
+     can expand inline.
+
+     Suppose we have a multi-instruction sequence that loads constant C
+     into register R.  If R does not get allocated a hard register, and
+     R is used in an operand that allows both registers and memory
+     references, reload will consider forcing C into memory and using
+     one of the instruction's memory alternatives.  Returning false
+     here will force it to use an input reload instead.  */
+  if (CONST_INT_P (x) && mips_legitimate_constant_p (mode, x))
+    return true;
+
+  split_const (x, &base, &offset);
+  if (mips_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type))
+    {
+      /* See whether we explicitly want these symbols in the pool.  */
+      if (mips_use_pcrel_pool_p[(int) type])
+	return false;
+
+      /* The same optimization as for CONST_INT.  */
+      if (SMALL_INT (offset) && mips_symbol_insns (type, MAX_MACHINE_MODE) > 0)
+	return true;
+
+      /* If MIPS16 constant pools live in the text section, they should
+	 not refer to anything that might need run-time relocation.  */
+      if (TARGET_MIPS16_PCREL_LOADS && mips_got_symbol_type_p (type))
+	return true;
+    }
+
+  /* TLS symbols must be computed by mips_legitimize_move.  */
+  if (for_each_rtx (&x, &mips_tls_symbol_ref_1, NULL))
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  We can't use blocks for
+   constants when we're using a per-function constant pool.  */
+
+static bool
+mips_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				const_rtx x ATTRIBUTE_UNUSED)
+{
+  return !TARGET_MIPS16_PCREL_LOADS;
+}
+
+/* Return true if register REGNO is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+int
+mips_regno_mode_ok_for_base_p (int regno, enum machine_mode mode,
+			       bool strict_p)
+{
+  if (!HARD_REGISTER_NUM_P (regno))
+    {
+      if (!strict_p)
+	return true;
+      regno = reg_renumber[regno];
+    }
+
+  /* These fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
+    return true;
+
+  /* In MIPS16 mode, the stack pointer can only address word and doubleword
+     values, nothing smaller.  There are two problems here:
+
+       (a) Instantiating virtual registers can introduce new uses of the
+	   stack pointer.  If these virtual registers are valid addresses,
+	   the stack pointer should be too.
+
+       (b) Most uses of the stack pointer are not made explicit until
+	   FRAME_POINTER_REGNUM and ARG_POINTER_REGNUM have been eliminated.
+	   We don't know until that stage whether we'll be eliminating to the
+	   stack pointer (which needs the restriction) or the hard frame
+	   pointer (which doesn't).
+
+     All in all, it seems more consistent to only enforce this restriction
+     during and after reload.  */
+  if (TARGET_MIPS16 && regno == STACK_POINTER_REGNUM)
+    return !strict_p || GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8;
+
+  return TARGET_MIPS16 ? M16_REG_P (regno) : GP_REG_P (regno);
+}
+
+/* Return true if X is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+static bool
+mips_valid_base_register_p (rtx x, enum machine_mode mode, bool strict_p)
+{
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && mips_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
+}
+
+/* Return true if, for every base register BASE_REG, (plus BASE_REG X)
+   can address a value of mode MODE.  */
+
+static bool
+mips_valid_offset_p (rtx x, enum machine_mode mode)
+{
+  /* Check that X is a signed 16-bit number.  */
+  if (!const_arith_operand (x, Pmode))
+    return false;
+
+  /* We may need to split multiword moves, so make sure that every word
+     is accessible.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
+    return false;
+
+  return true;
+}
+
+/* Return true if a LO_SUM can address a value of mode MODE when the
+   LO_SUM symbol has type SYMBOL_TYPE.  */
+
+static bool
+mips_valid_lo_sum_p (enum mips_symbol_type symbol_type, enum machine_mode mode)
+{
+  /* Check that symbols of type SYMBOL_TYPE can be used to access values
+     of mode MODE.  */
+  if (mips_symbol_insns (symbol_type, mode) == 0)
+    return false;
+
+  /* Check that there is a known low-part relocation.  */
+  if (mips_lo_relocs[symbol_type] == NULL)
+    return false;
+
+  /* We may need to split multiword moves, so make sure that each word
+     can be accessed without inducing a carry.  This is mainly needed
+     for o64, which has historically only guaranteed 64-bit alignment
+     for 128-bit types.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))
+    return false;
+
+  return true;
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
+   effect.  */
+
+static bool
+mips_classify_address (struct mips_address_info *info, rtx x,
+		       enum machine_mode mode, bool strict_p)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      info->type = ADDRESS_REG;
+      info->reg = x;
+      info->offset = const0_rtx;
+      return mips_valid_base_register_p (info->reg, mode, strict_p);
+
+    case PLUS:
+      info->type = ADDRESS_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      return (mips_valid_base_register_p (info->reg, mode, strict_p)
+	      && mips_valid_offset_p (info->offset, mode));
+
+    case LO_SUM:
+      info->type = ADDRESS_LO_SUM;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      /* We have to trust the creator of the LO_SUM to do something vaguely
+	 sane.  Target-independent code that creates a LO_SUM should also
+	 create and verify the matching HIGH.  Target-independent code that
+	 adds an offset to a LO_SUM must prove that the offset will not
+	 induce a carry.  Failure to do either of these things would be
+	 a bug, and we are not required to check for it here.  The MIPS
+	 backend itself should only create LO_SUMs for valid symbolic
+	 constants, with the high part being either a HIGH or a copy
+	 of _gp. */
+      info->symbol_type
+	= mips_classify_symbolic_expression (info->offset, SYMBOL_CONTEXT_MEM);
+      return (mips_valid_base_register_p (info->reg, mode, strict_p)
+	      && mips_valid_lo_sum_p (info->symbol_type, mode));
+
+    case CONST_INT:
+      /* Small-integer addresses don't occur very often, but they
+	 are legitimate if $0 is a valid base register.  */
+      info->type = ADDRESS_CONST_INT;
+      return !TARGET_MIPS16 && SMALL_INT (x);
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      info->type = ADDRESS_SYMBOLIC;
+      return (mips_symbolic_constant_p (x, SYMBOL_CONTEXT_MEM,
+					&info->symbol_type)
+	      && mips_symbol_insns (info->symbol_type, mode) > 0
+	      && !mips_split_p[info->symbol_type]);
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+mips_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  struct mips_address_info addr;
+
+  return mips_classify_address (&addr, x, mode, strict_p);
+}
+
+/* Return true if X is a legitimate $sp-based address for mode MDOE.  */
+
+bool
+mips_stack_address_p (rtx x, enum machine_mode mode)
+{
+  struct mips_address_info addr;
+
+  return (mips_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && addr.reg == stack_pointer_rtx);
+}
+
+/* Return true if ADDR matches the pattern for the LWXS load scaled indexed
+   address instruction.  Note that such addresses are not considered
+   legitimate in the TARGET_LEGITIMATE_ADDRESS_P sense, because their use
+   is so restricted.  */
+
+static bool
+mips_lwxs_address_p (rtx addr)
+{
+  if (ISA_HAS_LWXS
+      && GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 1)))
+    {
+      rtx offset = XEXP (addr, 0);
+      if (GET_CODE (offset) == MULT
+	  && REG_P (XEXP (offset, 0))
+	  && CONST_INT_P (XEXP (offset, 1))
+	  && INTVAL (XEXP (offset, 1)) == 4)
+	return true;
+    }
+  return false;
+}
+
+/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load 
+   indexed address instruction.  Note that such addresses are
+   not considered legitimate in the TARGET_LEGITIMATE_ADDRESS_P
+   sense, because their use is so restricted.  */
+
+static bool
+mips_lx_address_p (rtx addr, enum machine_mode mode)
+{
+  if (GET_CODE (addr) != PLUS
+      || !REG_P (XEXP (addr, 0))
+      || !REG_P (XEXP (addr, 1)))
+    return false;
+  if (ISA_HAS_LBX && mode == QImode)
+    return true;
+  if (ISA_HAS_LHX && mode == HImode)
+    return true;
+  if (ISA_HAS_LWX && mode == SImode)
+    return true;
+  if (ISA_HAS_LDX && mode == DImode)
+    return true;
+  return false;
+}
+
+/* Return true if a value at OFFSET bytes from base register BASE can be
+   accessed using an unextended MIPS16 instruction.  MODE is the mode of
+   the value.
+
+   Usually the offset in an unextended instruction is a 5-bit field.
+   The offset is unsigned and shifted left once for LH and SH, twice
+   for LW and SW, and so on.  An exception is LWSP and SWSP, which have
+   an 8-bit immediate field that's shifted left twice.  */
+
+static bool
+mips16_unextended_reference_p (enum machine_mode mode, rtx base,
+			       unsigned HOST_WIDE_INT offset)
+{
+  if (mode != BLKmode && offset % GET_MODE_SIZE (mode) == 0)
+    {
+      if (GET_MODE_SIZE (mode) == 4 && base == stack_pointer_rtx)
+	return offset < 256U * GET_MODE_SIZE (mode);
+      return offset < 32U * GET_MODE_SIZE (mode);
+    }
+  return false;
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at address X, assuming that BASE_INSN_LENGTH is the
+   length of one instruction.  Return 0 if X isn't valid for MODE.
+   Assume that multiword moves may need to be split into word moves
+   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+   enough.  */
+
+int
+mips_address_insns (rtx x, enum machine_mode mode, bool might_split_p)
+{
+  struct mips_address_info addr;
+  int factor;
+
+  /* BLKmode is used for single unaligned loads and stores and should
+     not count as a multiword mode.  (GET_MODE_SIZE (BLKmode) is pretty
+     meaningless, so we have to single it out as a special case one way
+     or the other.)  */
+  if (mode != BLKmode && might_split_p)
+    factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  else
+    factor = 1;
+
+  if (mips_classify_address (&addr, x, mode, false))
+    switch (addr.type)
+      {
+      case ADDRESS_REG:
+	if (TARGET_MIPS16
+	    && !mips16_unextended_reference_p (mode, addr.reg,
+					       UINTVAL (addr.offset)))
+	  return factor * 2;
+	return factor;
+
+      case ADDRESS_LO_SUM:
+	return TARGET_MIPS16 ? factor * 2 : factor;
+
+      case ADDRESS_CONST_INT:
+	return factor;
+
+      case ADDRESS_SYMBOLIC:
+	return factor * mips_symbol_insns (addr.symbol_type, mode);
+      }
+  return 0;
+}
+
+/* Return true if X fits within an unsigned field of BITS bits that is
+   shifted left SHIFT bits before being used.  */
+
+bool
+mips_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0)
+{
+  return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits));
+}
+
+/* Return true if X fits within a signed field of BITS bits that is
+   shifted left SHIFT bits before being used.  */
+
+bool
+mips_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0)
+{
+  x += 1 << (bits + shift - 1);
+  return mips_unsigned_immediate_p (x, bits, shift);
+}
+
+/* Return true if X is legitimate for accessing values of mode MODE,
+   if it is based on a MIPS16 register, and if the offset satisfies
+   OFFSET_PREDICATE.  */
+
+bool
+m16_based_address_p (rtx x, enum machine_mode mode,
+		     insn_operand_predicate_fn offset_predicate)
+{
+  struct mips_address_info addr;
+
+  return (mips_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && M16_REG_P (REGNO (addr.reg))
+	  && offset_predicate (addr.offset, mode));
+}
+
+/* Return true if X is a legitimate address that conforms to the requirements
+   for a microMIPS LWSP or SWSP insn.  */
+
+bool
+lwsp_swsp_address_p (rtx x, enum machine_mode mode)
+{
+  struct mips_address_info addr;
+
+  return (mips_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && REGNO (addr.reg) == STACK_POINTER_REGNUM
+	  && uw5_operand (addr.offset, mode));
+}
+
+/* Return true if X is a legitimate address with a 12-bit offset.
+   MODE is the mode of the value being accessed.  */
+
+bool
+umips_12bit_offset_address_p (rtx x, enum machine_mode mode)
+{
+  struct mips_address_info addr;
+
+  return (mips_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && CONST_INT_P (addr.offset)
+	  && UMIPS_12BIT_OFFSET_P (INTVAL (addr.offset)));
+}
+
+/* Return the number of instructions needed to load constant X,
+   assuming that BASE_INSN_LENGTH is the length of one instruction.
+   Return 0 if X isn't a valid constant.  */
+
+int
+mips_const_insns (rtx x)
+{
+  struct mips_integer_op codes[MIPS_MAX_INTEGER_OPS];
+  enum mips_symbol_type symbol_type;
+  rtx offset;
+
+  switch (GET_CODE (x))
+    {
+    case HIGH:
+      if (!mips_symbolic_constant_p (XEXP (x, 0), SYMBOL_CONTEXT_LEA,
+				     &symbol_type)
+	  || !mips_split_p[symbol_type])
+	return 0;
+
+      /* This is simply an LUI for normal mode.  It is an extended
+	 LI followed by an extended SLL for MIPS16.  */
+      return TARGET_MIPS16 ? 4 : 1;
+
+    case CONST_INT:
+      if (TARGET_MIPS16)
+	/* Unsigned 8-bit constants can be loaded using an unextended
+	   LI instruction.  Unsigned 16-bit constants can be loaded
+	   using an extended LI.  Negative constants must be loaded
+	   using LI and then negated.  */
+	return (IN_RANGE (INTVAL (x), 0, 255) ? 1
+		: SMALL_OPERAND_UNSIGNED (INTVAL (x)) ? 2
+		: IN_RANGE (-INTVAL (x), 0, 255) ? 2
+		: SMALL_OPERAND_UNSIGNED (-INTVAL (x)) ? 3
+		: 0);
+
+      return mips_build_integer (codes, INTVAL (x));
+
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+      /* Allow zeros for normal mode, where we can use $0.  */
+      return !TARGET_MIPS16 && x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+
+    case CONST:
+      if (CONST_GP_P (x))
+	return 1;
+
+      /* See if we can refer to X directly.  */
+      if (mips_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type))
+	return mips_symbol_insns (symbol_type, MAX_MACHINE_MODE);
+
+      /* Otherwise try splitting the constant into a base and offset.
+	 If the offset is a 16-bit value, we can load the base address
+	 into a register and then use (D)ADDIU to add in the offset.
+	 If the offset is larger, we can load the base and offset
+	 into separate registers and add them together with (D)ADDU.
+	 However, the latter is only possible before reload; during
+	 and after reload, we must have the option of forcing the
+	 constant into the pool instead.  */
+      split_const (x, &x, &offset);
+      if (offset != 0)
+	{
+	  int n = mips_const_insns (x);
+	  if (n != 0)
+	    {
+	      if (SMALL_INT (offset))
+		return n + 1;
+	      else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
+		return n + 1 + mips_build_integer (codes, INTVAL (offset));
+	    }
+	}
+      return 0;
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return mips_symbol_insns (mips_classify_symbol (x, SYMBOL_CONTEXT_LEA),
+				MAX_MACHINE_MODE);
+
+    default:
+      return 0;
+    }
+}
+
+/* X is a doubleword constant that can be handled by splitting it into
+   two words and loading each word separately.  Return the number of
+   instructions required to do this, assuming that BASE_INSN_LENGTH
+   is the length of one instruction.  */
+
+int
+mips_split_const_insns (rtx x)
+{
+  unsigned int low, high;
+
+  low = mips_const_insns (mips_subword (x, false));
+  high = mips_const_insns (mips_subword (x, true));
+  gcc_assert (low > 0 && high > 0);
+  return low + high;
+}
+
+/* Return the number of instructions needed to implement INSN,
+   given that it loads from or stores to MEM.  Assume that
+   BASE_INSN_LENGTH is the length of one instruction.  */
+
+int
+mips_load_store_insns (rtx mem, rtx insn)
+{
+  enum machine_mode mode;
+  bool might_split_p;
+  rtx set;
+
+  gcc_assert (MEM_P (mem));
+  mode = GET_MODE (mem);
+
+  /* Try to prove that INSN does not need to be split.  */
+  might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
+  if (might_split_p)
+    {
+      set = single_set (insn);
+      if (set && !mips_split_move_insn_p (SET_DEST (set), SET_SRC (set), insn))
+	might_split_p = false;
+    }
+
+  return mips_address_insns (XEXP (mem, 0), mode, might_split_p);
+}
+
+/* Return the number of instructions needed for an integer division,
+   assuming that BASE_INSN_LENGTH is the length of one instruction.  */
+
+int
+mips_idiv_insns (void)
+{
+  int count;
+
+  count = 1;
+  if (TARGET_CHECK_ZERO_DIV)
+    {
+      if (GENERATE_DIVIDE_TRAPS)
+        count++;
+      else
+        count += 2;
+    }
+
+  if (TARGET_FIX_R4000 || TARGET_FIX_R4400)
+    count++;
+  return count;
+}
+
+/* Emit a move from SRC to DEST.  Assume that the move expanders can
+   handle all moves if !can_create_pseudo_p ().  The distinction is
+   important because, unlike emit_move_insn, the move expanders know
+   how to force Pmode objects into the constant pool even when the
+   constant pool address is not itself legitimate.  */
+
+rtx
+mips_emit_move (rtx dest, rtx src)
+{
+  return (can_create_pseudo_p ()
+	  ? emit_move_insn (dest, src)
+	  : emit_move_insn_1 (dest, src));
+}
+
+/* Emit a move from SRC to DEST, splitting compound moves into individual
+   instructions.  SPLIT_TYPE is the type of split to perform.  */
+
+static void
+mips_emit_move_or_split (rtx dest, rtx src, enum mips_split_type split_type)
+{
+  if (mips_split_move_p (dest, src, split_type))
+    mips_split_move (dest, src, split_type);
+  else
+    mips_emit_move (dest, src);
+}
+
+/* Emit an instruction of the form (set TARGET (CODE OP0)).  */
+
+static void
+mips_emit_unary (enum rtx_code code, rtx target, rtx op0)
+{
+  emit_insn (gen_rtx_SET (VOIDmode, target,
+			  gen_rtx_fmt_e (code, GET_MODE (op0), op0)));
+}
+
+/* Compute (CODE OP0) and store the result in a new register of mode MODE.
+   Return that new register.  */
+
+static rtx
+mips_force_unary (enum machine_mode mode, enum rtx_code code, rtx op0)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  mips_emit_unary (code, reg, op0);
+  return reg;
+}
+
+/* Emit an instruction of the form (set TARGET (CODE OP0 OP1)).  */
+
+void
+mips_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
+{
+  emit_insn (gen_rtx_SET (VOIDmode, target,
+			  gen_rtx_fmt_ee (code, GET_MODE (target), op0, op1)));
+}
+
+/* Compute (CODE OP0 OP1) and store the result in a new register
+   of mode MODE.  Return that new register.  */
+
+static rtx
+mips_force_binary (enum machine_mode mode, enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  mips_emit_binary (code, reg, op0, op1);
+  return reg;
+}
+
+/* Copy VALUE to a register and return that register.  If new pseudos
+   are allowed, copy it into a new register, otherwise use DEST.  */
+
+static rtx
+mips_force_temporary (rtx dest, rtx value)
+{
+  if (can_create_pseudo_p ())
+    return force_reg (Pmode, value);
+  else
+    {
+      mips_emit_move (dest, value);
+      return dest;
+    }
+}
+
+/* Emit a call sequence with call pattern PATTERN and return the call
+   instruction itself (which is not necessarily the last instruction
+   emitted).  ORIG_ADDR is the original, unlegitimized address,
+   ADDR is the legitimized form, and LAZY_P is true if the call
+   address is lazily-bound.  */
+
+static rtx
+mips_emit_call_insn (rtx pattern, rtx orig_addr, rtx addr, bool lazy_p)
+{
+  rtx insn, reg;
+
+  insn = emit_call_insn (pattern);
+
+  if (TARGET_MIPS16 && mips_use_pic_fn_addr_reg_p (orig_addr))
+    {
+      /* MIPS16 JALRs only take MIPS16 registers.  If the target
+	 function requires $25 to be valid on entry, we must copy it
+	 there separately.  The move instruction can be put in the
+	 call's delay slot.  */
+      reg = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      emit_insn_before (gen_move_insn (reg, addr), insn);
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
+    }
+
+  if (lazy_p)
+    /* Lazy-binding stubs require $gp to be valid on entry.  */
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+
+  if (TARGET_USE_GOT)
+    {
+      /* See the comment above load_call<mode> for details.  */
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+	       gen_rtx_REG (Pmode, GOT_VERSION_REGNUM));
+      emit_insn (gen_update_got_version ());
+    }
+  return insn;
+}
+
+/* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
+   then add CONST_INT OFFSET to the result.  */
+
+static rtx
+mips_unspec_address_offset (rtx base, rtx offset,
+			    enum mips_symbol_type symbol_type)
+{
+  base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
+			 UNSPEC_ADDRESS_FIRST + symbol_type);
+  if (offset != const0_rtx)
+    base = gen_rtx_PLUS (Pmode, base, offset);
+  return gen_rtx_CONST (Pmode, base);
+}
+
+/* Return an UNSPEC address with underlying address ADDRESS and symbol
+   type SYMBOL_TYPE.  */
+
+rtx
+mips_unspec_address (rtx address, enum mips_symbol_type symbol_type)
+{
+  rtx base, offset;
+
+  split_const (address, &base, &offset);
+  return mips_unspec_address_offset (base, offset, symbol_type);
+}
+
+/* If OP is an UNSPEC address, return the address to which it refers,
+   otherwise return OP itself.  */
+
+rtx
+mips_strip_unspec_address (rtx op)
+{
+  rtx base, offset;
+
+  split_const (op, &base, &offset);
+  if (UNSPEC_ADDRESS_P (base))
+    op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
+  return op;
+}
+
+/* If mips_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
+   high part to BASE and return the result.  Just return BASE otherwise.
+   TEMP is as for mips_force_temporary.
+
+   The returned expression can be used as the first operand to a LO_SUM.  */
+
+static rtx
+mips_unspec_offset_high (rtx temp, rtx base, rtx addr,
+			 enum mips_symbol_type symbol_type)
+{
+  if (mips_split_p[symbol_type])
+    {
+      addr = gen_rtx_HIGH (Pmode, mips_unspec_address (addr, symbol_type));
+      addr = mips_force_temporary (temp, addr);
+      base = mips_force_temporary (temp, gen_rtx_PLUS (Pmode, addr, base));
+    }
+  return base;
+}
+
+/* Return an instruction that copies $gp into register REG.  We want
+   GCC to treat the register's value as constant, so that its value
+   can be rematerialized on demand.  */
+
+static rtx
+gen_load_const_gp (rtx reg)
+{
+  return PMODE_INSN (gen_load_const_gp, (reg));
+}
+
+/* Return a pseudo register that contains the value of $gp throughout
+   the current function.  Such registers are needed by MIPS16 functions,
+   for which $gp itself is not a valid base register or addition operand.  */
+
+static rtx
+mips16_gp_pseudo_reg (void)
+{
+  if (cfun->machine->mips16_gp_pseudo_rtx == NULL_RTX)
+    {
+      rtx insn, scan;
+
+      cfun->machine->mips16_gp_pseudo_rtx = gen_reg_rtx (Pmode);
+
+      push_topmost_sequence ();
+
+      scan = get_insns ();
+      while (NEXT_INSN (scan) && !INSN_P (NEXT_INSN (scan)))
+	scan = NEXT_INSN (scan);
+
+      insn = gen_load_const_gp (cfun->machine->mips16_gp_pseudo_rtx);
+      insn = emit_insn_after (insn, scan);
+      INSN_LOCATION (insn) = 0;
+
+      pop_topmost_sequence ();
+    }
+
+  return cfun->machine->mips16_gp_pseudo_rtx;
+}
+
+/* Return a base register that holds pic_offset_table_rtx.
+   TEMP, if nonnull, is a scratch Pmode base register.  */
+
+rtx
+mips_pic_base_register (rtx temp)
+{
+  if (!TARGET_MIPS16)
+    return pic_offset_table_rtx;
+
+  if (currently_expanding_to_rtl)
+    return mips16_gp_pseudo_reg ();
+
+  if (can_create_pseudo_p ())
+    temp = gen_reg_rtx (Pmode);
+
+  if (TARGET_USE_GOT)
+    /* The first post-reload split exposes all references to $gp
+       (both uses and definitions).  All references must remain
+       explicit after that point.
+
+       It is safe to introduce uses of $gp at any time, so for
+       simplicity, we do that before the split too.  */
+    mips_emit_move (temp, pic_offset_table_rtx);
+  else
+    emit_insn (gen_load_const_gp (temp));
+  return temp;
+}
+
+/* Return the RHS of a load_call<mode> insn.  */
+
+static rtx
+mips_unspec_call (rtx reg, rtx symbol)
+{
+  rtvec vec;
+
+  vec = gen_rtvec (3, reg, symbol, gen_rtx_REG (SImode, GOT_VERSION_REGNUM));
+  return gen_rtx_UNSPEC (Pmode, vec, UNSPEC_LOAD_CALL);
+}
+
+/* If SRC is the RHS of a load_call<mode> insn, return the underlying symbol
+   reference.  Return NULL_RTX otherwise.  */
+
+static rtx
+mips_strip_unspec_call (rtx src)
+{
+  if (GET_CODE (src) == UNSPEC && XINT (src, 1) == UNSPEC_LOAD_CALL)
+    return mips_strip_unspec_address (XVECEXP (src, 0, 1));
+  return NULL_RTX;
+}
+
+/* Create and return a GOT reference of type TYPE for address ADDR.
+   TEMP, if nonnull, is a scratch Pmode base register.  */
+
+rtx
+mips_got_load (rtx temp, rtx addr, enum mips_symbol_type type)
+{
+  rtx base, high, lo_sum_symbol;
+
+  base = mips_pic_base_register (temp);
+
+  /* If we used the temporary register to load $gp, we can't use
+     it for the high part as well.  */
+  if (temp != NULL && reg_overlap_mentioned_p (base, temp))
+    temp = NULL;
+
+  high = mips_unspec_offset_high (temp, base, addr, type);
+  lo_sum_symbol = mips_unspec_address (addr, type);
+
+  if (type == SYMBOL_GOTOFF_CALL)
+    return mips_unspec_call (high, lo_sum_symbol);
+  else
+    return PMODE_INSN (gen_unspec_got, (high, lo_sum_symbol));
+}
+
+/* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
+   it appears in a MEM of that mode.  Return true if ADDR is a legitimate
+   constant in that context and can be split into high and low parts.
+   If so, and if LOW_OUT is nonnull, emit the high part and store the
+   low part in *LOW_OUT.  Leave *LOW_OUT unchanged otherwise.
+
+   TEMP is as for mips_force_temporary and is used to load the high
+   part into a register.
+
+   When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
+   a legitimize SET_SRC for an .md pattern, otherwise the low part
+   is guaranteed to be a legitimate address for mode MODE.  */
+
+bool
+mips_split_symbol (rtx temp, rtx addr, enum machine_mode mode, rtx *low_out)
+{
+  enum mips_symbol_context context;
+  enum mips_symbol_type symbol_type;
+  rtx high;
+
+  context = (mode == MAX_MACHINE_MODE
+	     ? SYMBOL_CONTEXT_LEA
+	     : SYMBOL_CONTEXT_MEM);
+  if (GET_CODE (addr) == HIGH && context == SYMBOL_CONTEXT_LEA)
+    {
+      addr = XEXP (addr, 0);
+      if (mips_symbolic_constant_p (addr, context, &symbol_type)
+	  && mips_symbol_insns (symbol_type, mode) > 0
+	  && mips_split_hi_p[symbol_type])
+	{
+	  if (low_out)
+	    switch (symbol_type)
+	      {
+	      case SYMBOL_GOT_PAGE_OFST:
+		/* The high part of a page/ofst pair is loaded from the GOT.  */
+		*low_out = mips_got_load (temp, addr, SYMBOL_GOTOFF_PAGE);
+		break;
+
+	      default:
+		gcc_unreachable ();
+	      }
+	  return true;
+	}
+    }
+  else
+    {
+      if (mips_symbolic_constant_p (addr, context, &symbol_type)
+	  && mips_symbol_insns (symbol_type, mode) > 0
+	  && mips_split_p[symbol_type])
+	{
+	  if (low_out)
+	    switch (symbol_type)
+	      {
+	      case SYMBOL_GOT_DISP:
+		/* SYMBOL_GOT_DISP symbols are loaded from the GOT.  */
+		*low_out = mips_got_load (temp, addr, SYMBOL_GOTOFF_DISP);
+		break;
+
+	      case SYMBOL_GP_RELATIVE:
+		high = mips_pic_base_register (temp);
+		*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+		break;
+
+	      default:
+		high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
+		high = mips_force_temporary (temp, high);
+		*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+		break;
+	      }
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Return a legitimate address for REG + OFFSET.  TEMP is as for
+   mips_force_temporary; it is only needed when OFFSET is not a
+   SMALL_OPERAND.  */
+
+static rtx
+mips_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+{
+  if (!SMALL_OPERAND (offset))
+    {
+      rtx high;
+
+      if (TARGET_MIPS16)
+	{
+	  /* Load the full offset into a register so that we can use
+	     an unextended instruction for the address itself.  */
+	  high = GEN_INT (offset);
+	  offset = 0;
+	}
+      else
+	{
+	  /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
+	     The addition inside the macro CONST_HIGH_PART may cause an
+	     overflow, so we need to force a sign-extension check.  */
+	  high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
+	  offset = CONST_LOW_PART (offset);
+	}
+      high = mips_force_temporary (temp, high);
+      reg = mips_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
+    }
+  return plus_constant (Pmode, reg, offset);
+}
+
+/* The __tls_get_attr symbol.  */
+static GTY(()) rtx mips_tls_symbol;
+
+/* Return an instruction sequence that calls __tls_get_addr.  SYM is
+   the TLS symbol we are referencing and TYPE is the symbol type to use
+   (either global dynamic or local dynamic).  V0 is an RTX for the
+   return value location.  */
+
+static rtx
+mips_call_tls_get_addr (rtx sym, enum mips_symbol_type type, rtx v0)
+{
+  rtx insn, loc, a0;
+
+  a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+
+  if (!mips_tls_symbol)
+    mips_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  loc = mips_unspec_address (sym, type);
+
+  start_sequence ();
+
+  emit_insn (gen_rtx_SET (Pmode, a0,
+			  gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, loc)));
+  insn = mips_expand_call (MIPS_CALL_NORMAL, v0, mips_tls_symbol,
+			   const0_rtx, NULL_RTX, false);
+  RTL_CONST_CALL_P (insn) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
+  insn = get_insns ();
+
+  end_sequence ();
+
+  return insn;
+}
+
+/* Return a pseudo register that contains the current thread pointer.  */
+
+rtx
+mips_expand_thread_pointer (rtx tp)
+{
+  rtx fn;
+
+  if (TARGET_MIPS16)
+    {
+      if (!mips16_rdhwr_stub)
+	mips16_rdhwr_stub = new mips16_rdhwr_one_only_stub ();
+      fn = mips16_stub_call_address (mips16_rdhwr_stub);
+      emit_insn (PMODE_INSN (gen_tls_get_tp_mips16, (tp, fn)));
+    }
+  else
+    emit_insn (PMODE_INSN (gen_tls_get_tp, (tp)));
+  return tp;
+}
+
+static rtx
+mips_get_tp (void)
+{
+  return mips_expand_thread_pointer (gen_reg_rtx (Pmode));
+}
+
+/* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
+   its address.  The return value will be both a valid address and a valid
+   SET_SRC (either a REG or a LO_SUM).  */
+
+static rtx
+mips_legitimize_tls_address (rtx loc)
+{
+  rtx dest, insn, v0, tp, tmp1, tmp2, eqv, offset;
+  enum tls_model model;
+
+  model = SYMBOL_REF_TLS_MODEL (loc);
+  /* Only TARGET_ABICALLS code can have more than one module; other
+     code must be be static and should not use a GOT.  All TLS models
+     reduce to local exec in this situation.  */
+  if (!TARGET_ABICALLS)
+    model = TLS_MODEL_LOCAL_EXEC;
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      v0 = gen_rtx_REG (Pmode, GP_RETURN);
+      insn = mips_call_tls_get_addr (loc, SYMBOL_TLSGD, v0);
+      dest = gen_reg_rtx (Pmode);
+      emit_libcall_block (insn, dest, v0, loc);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      v0 = gen_rtx_REG (Pmode, GP_RETURN);
+      insn = mips_call_tls_get_addr (loc, SYMBOL_TLSLDM, v0);
+      tmp1 = gen_reg_rtx (Pmode);
+
+      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	 share the LDM result with other LD model accesses.  */
+      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			    UNSPEC_TLS_LDM);
+      emit_libcall_block (insn, tmp1, v0, eqv);
+
+      offset = mips_unspec_address (loc, SYMBOL_DTPREL);
+      if (mips_split_p[SYMBOL_DTPREL])
+	{
+	  tmp2 = mips_unspec_offset_high (NULL, tmp1, loc, SYMBOL_DTPREL);
+	  dest = gen_rtx_LO_SUM (Pmode, tmp2, offset);
+	}
+      else
+	dest = expand_binop (Pmode, add_optab, tmp1, offset,
+			     0, 0, OPTAB_DIRECT);
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      tp = mips_get_tp ();
+      tmp1 = gen_reg_rtx (Pmode);
+      tmp2 = mips_unspec_address (loc, SYMBOL_GOTTPREL);
+      if (Pmode == DImode)
+	emit_insn (gen_load_gotdi (tmp1, pic_offset_table_rtx, tmp2));
+      else
+	emit_insn (gen_load_gotsi (tmp1, pic_offset_table_rtx, tmp2));
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_add3_insn (dest, tmp1, tp));
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      tmp1 = mips_get_tp ();
+      offset = mips_unspec_address (loc, SYMBOL_TPREL);
+      if (mips_split_p[SYMBOL_TPREL])
+	{
+	  tmp2 = mips_unspec_offset_high (NULL, tmp1, loc, SYMBOL_TPREL);
+	  dest = gen_rtx_LO_SUM (Pmode, tmp2, offset);
+	}
+      else
+	dest = expand_binop (Pmode, add_optab, tmp1, offset,
+			     0, 0, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return dest;
+}
+
+/* Implement "TARGET = __builtin_mips_get_fcsr ()" for MIPS16,
+   using a stub.  */
+
+void
+mips16_expand_get_fcsr (rtx target)
+{
+  if (!mips16_get_fcsr_stub)
+    mips16_get_fcsr_stub = new mips16_get_fcsr_one_only_stub ();
+  rtx fn = mips16_stub_call_address (mips16_get_fcsr_stub);
+  emit_insn (PMODE_INSN (gen_mips_get_fcsr_mips16, (fn)));
+  emit_move_insn (target, gen_rtx_REG (SImode, GET_FCSR_REGNUM));
+}
+
+/* Implement __builtin_mips_set_fcsr (TARGET) for MIPS16, using a stub.  */
+
+void
+mips16_expand_set_fcsr (rtx newval)
+{
+  if (!mips16_set_fcsr_stub)
+    mips16_set_fcsr_stub = new mips16_set_fcsr_one_only_stub ();
+  rtx fn = mips16_stub_call_address (mips16_set_fcsr_stub);
+  emit_move_insn (gen_rtx_REG (SImode, SET_FCSR_REGNUM), newval);
+  emit_insn (PMODE_INSN (gen_mips_set_fcsr_mips16, (fn)));
+}
+
+/* If X is not a valid address for mode MODE, force it into a register.  */
+
+static rtx
+mips_force_address (rtx x, enum machine_mode mode)
+{
+  if (!mips_legitimate_address_p (mode, x, false))
+    x = force_reg (Pmode, x);
+  return x;
+}
+
+/* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
+   be legitimized in a way that the generic machinery might not expect,
+   return a new address, otherwise return NULL.  MODE is the mode of
+   the memory being accessed.  */
+
+static rtx
+mips_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  rtx base, addr;
+  HOST_WIDE_INT offset;
+
+  if (mips_tls_symbol_p (x))
+    return mips_legitimize_tls_address (x);
+
+  /* See if the address can split into a high part and a LO_SUM.  */
+  if (mips_split_symbol (NULL, x, mode, &addr))
+    return mips_force_address (addr, mode);
+
+  /* Handle BASE + OFFSET using mips_add_offset.  */
+  mips_split_plus (x, &base, &offset);
+  if (offset != 0)
+    {
+      if (!mips_valid_base_register_p (base, mode, false))
+	base = copy_to_mode_reg (Pmode, base);
+      addr = mips_add_offset (NULL, base, offset);
+      return mips_force_address (addr, mode);
+    }
+
+  return x;
+}
+
+/* Load VALUE into DEST.  TEMP is as for mips_force_temporary.  */
+
+void
+mips_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+{
+  struct mips_integer_op codes[MIPS_MAX_INTEGER_OPS];
+  enum machine_mode mode;
+  unsigned int i, num_ops;
+  rtx x;
+
+  mode = GET_MODE (dest);
+  num_ops = mips_build_integer (codes, value);
+
+  /* Apply each binary operation to X.  Invariant: X is a legitimate
+     source operand for a SET pattern.  */
+  x = GEN_INT (codes[0].value);
+  for (i = 1; i < num_ops; i++)
+    {
+      if (!can_create_pseudo_p ())
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, x));
+	  x = temp;
+	}
+      else
+	x = force_reg (mode, x);
+      x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Subroutine of mips_legitimize_move.  Move constant SRC into register
+   DEST given that SRC satisfies immediate_operand but doesn't satisfy
+   move_operand.  */
+
+static void
+mips_legitimize_const_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  rtx base, offset;
+
+  /* Split moves of big integers into smaller pieces.  */
+  if (splittable_const_int_operand (src, mode))
+    {
+      mips_move_integer (dest, dest, INTVAL (src));
+      return;
+    }
+
+  /* Split moves of symbolic constants into high/low pairs.  */
+  if (mips_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, src));
+      return;
+    }
+
+  /* Generate the appropriate access sequences for TLS symbols.  */
+  if (mips_tls_symbol_p (src))
+    {
+      mips_emit_move (dest, mips_legitimize_tls_address (src));
+      return;
+    }
+
+  /* If we have (const (plus symbol offset)), and that expression cannot
+     be forced into memory, load the symbol first and add in the offset.
+     In non-MIPS16 mode, prefer to do this even if the constant _can_ be
+     forced into memory, as it usually produces better code.  */
+  split_const (src, &base, &offset);
+  if (offset != const0_rtx
+      && (targetm.cannot_force_const_mem (mode, src)
+	  || (!TARGET_MIPS16 && can_create_pseudo_p ())))
+    {
+      base = mips_force_temporary (dest, base);
+      mips_emit_move (dest, mips_add_offset (NULL, base, INTVAL (offset)));
+      return;
+    }
+
+  src = force_const_mem (mode, src);
+
+  /* When using explicit relocs, constant pool references are sometimes
+     not legitimate addresses.  */
+  mips_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
+  mips_emit_move (dest, src);
+}
+
+/* If (set DEST SRC) is not a valid move instruction, emit an equivalent
+   sequence that is valid.  */
+
+bool
+mips_legitimize_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
+    {
+      mips_emit_move (dest, force_reg (mode, src));
+      return true;
+    }
+
+  /* We need to deal with constants that would be legitimate
+     immediate_operands but aren't legitimate move_operands.  */
+  if (CONSTANT_P (src) && !move_operand (src, mode))
+    {
+      mips_legitimize_const_move (mode, dest, src);
+      set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
+      return true;
+    }
+  return false;
+}
+
+/* Return true if value X in context CONTEXT is a small-data address
+   that can be rewritten as a LO_SUM.  */
+
+static bool
+mips_rewrite_small_data_p (rtx x, enum mips_symbol_context context)
+{
+  enum mips_symbol_type symbol_type;
+
+  return (mips_lo_relocs[SYMBOL_GP_RELATIVE]
+	  && !mips_split_p[SYMBOL_GP_RELATIVE]
+	  && mips_symbolic_constant_p (x, context, &symbol_type)
+	  && symbol_type == SYMBOL_GP_RELATIVE);
+}
+
+/* A for_each_rtx callback for mips_small_data_pattern_p.  DATA is the
+   containing MEM, or null if none.  */
+
+static int
+mips_small_data_pattern_1 (rtx *loc, void *data)
+{
+  enum mips_symbol_context context;
+
+  /* Ignore things like "g" constraints in asms.  We make no particular
+     guarantee about which symbolic constants are acceptable as asm operands
+     versus which must be forced into a GPR.  */
+  if (GET_CODE (*loc) == LO_SUM || GET_CODE (*loc) == ASM_OPERANDS)
+    return -1;
+
+  if (MEM_P (*loc))
+    {
+      if (for_each_rtx (&XEXP (*loc, 0), mips_small_data_pattern_1, *loc))
+	return 1;
+      return -1;
+    }
+
+  context = data ? SYMBOL_CONTEXT_MEM : SYMBOL_CONTEXT_LEA;
+  return mips_rewrite_small_data_p (*loc, context);
+}
+
+/* Return true if OP refers to small data symbols directly, not through
+   a LO_SUM.  */
+
+bool
+mips_small_data_pattern_p (rtx op)
+{
+  return for_each_rtx (&op, mips_small_data_pattern_1, NULL);
+}
+
+/* A for_each_rtx callback, used by mips_rewrite_small_data.
+   DATA is the containing MEM, or null if none.  */
+
+static int
+mips_rewrite_small_data_1 (rtx *loc, void *data)
+{
+  enum mips_symbol_context context;
+
+  if (MEM_P (*loc))
+    {
+      for_each_rtx (&XEXP (*loc, 0), mips_rewrite_small_data_1, *loc);
+      return -1;
+    }
+
+  context = data ? SYMBOL_CONTEXT_MEM : SYMBOL_CONTEXT_LEA;
+  if (mips_rewrite_small_data_p (*loc, context))
+    *loc = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, *loc);
+
+  if (GET_CODE (*loc) == LO_SUM)
+    return -1;
+
+  return 0;
+}
+
+/* Rewrite instruction pattern PATTERN so that it refers to small data
+   using explicit relocations.  */
+
+rtx
+mips_rewrite_small_data (rtx pattern)
+{
+  pattern = copy_insn (pattern);
+  for_each_rtx (&pattern, mips_rewrite_small_data_1, NULL);
+  return pattern;
+}
+
+/* The cost of loading values from the constant pool.  It should be
+   larger than the cost of any constant we want to synthesize inline.  */
+#define CONSTANT_POOL_COST COSTS_N_INSNS (TARGET_MIPS16 ? 4 : 8)
+
+/* Return the cost of X when used as an operand to the MIPS16 instruction
+   that implements CODE.  Return -1 if there is no such instruction, or if
+   X is not a valid immediate operand for it.  */
+
+static int
+mips16_constant_cost (int code, HOST_WIDE_INT x)
+{
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* Shifts by between 1 and 8 bits (inclusive) are unextended,
+	 other shifts are extended.  The shift patterns truncate the shift
+	 count to the right size, so there are no out-of-range values.  */
+      if (IN_RANGE (x, 1, 8))
+	return 0;
+      return COSTS_N_INSNS (1);
+
+    case PLUS:
+      if (IN_RANGE (x, -128, 127))
+	return 0;
+      if (SMALL_OPERAND (x))
+	return COSTS_N_INSNS (1);
+      return -1;
+
+    case LEU:
+      /* Like LE, but reject the always-true case.  */
+      if (x == -1)
+	return -1;
+    case LE:
+      /* We add 1 to the immediate and use SLT.  */
+      x += 1;
+    case XOR:
+      /* We can use CMPI for an xor with an unsigned 16-bit X.  */
+    case LT:
+    case LTU:
+      if (IN_RANGE (x, 0, 255))
+	return 0;
+      if (SMALL_OPERAND_UNSIGNED (x))
+	return COSTS_N_INSNS (1);
+      return -1;
+
+    case EQ:
+    case NE:
+      /* Equality comparisons with 0 are cheap.  */
+      if (x == 0)
+	return 0;
+      return -1;
+
+    default:
+      return -1;
+    }
+}
+
+/* Return true if there is a non-MIPS16 instruction that implements CODE
+   and if that instruction accepts X as an immediate operand.  */
+
+static int
+mips_immediate_operand_p (int code, HOST_WIDE_INT x)
+{
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* All shift counts are truncated to a valid constant.  */
+      return true;
+
+    case ROTATE:
+    case ROTATERT:
+      /* Likewise rotates, if the target supports rotates at all.  */
+      return ISA_HAS_ROR;
+
+    case AND:
+    case IOR:
+    case XOR:
+      /* These instructions take 16-bit unsigned immediates.  */
+      return SMALL_OPERAND_UNSIGNED (x);
+
+    case PLUS:
+    case LT:
+    case LTU:
+      /* These instructions take 16-bit signed immediates.  */
+      return SMALL_OPERAND (x);
+
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+      /* The "immediate" forms of these instructions are really
+	 implemented as comparisons with register 0.  */
+      return x == 0;
+
+    case GE:
+    case GEU:
+      /* Likewise, meaning that the only valid immediate operand is 1.  */
+      return x == 1;
+
+    case LE:
+      /* We add 1 to the immediate and use SLT.  */
+      return SMALL_OPERAND (x + 1);
+
+    case LEU:
+      /* Likewise SLTU, but reject the always-true case.  */
+      return SMALL_OPERAND (x + 1) && x + 1 != 0;
+
+    case SIGN_EXTRACT:
+    case ZERO_EXTRACT:
+      /* The bit position and size are immediate operands.  */
+      return ISA_HAS_EXT_INS;
+
+    default:
+      /* By default assume that $0 can be used for 0.  */
+      return x == 0;
+    }
+}
+
+/* Return the cost of binary operation X, given that the instruction
+   sequence for a word-sized or smaller operation has cost SINGLE_COST
+   and that the sequence of a double-word operation has cost DOUBLE_COST.
+   If SPEED is true, optimize for speed otherwise optimize for size.  */
+
+static int
+mips_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
+{
+  int cost;
+
+  if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
+    cost = double_cost;
+  else
+    cost = single_cost;
+  return (cost
+	  + set_src_cost (XEXP (x, 0), speed)
+	  + rtx_cost (XEXP (x, 1), GET_CODE (x), 1, speed));
+}
+
+/* Return the cost of floating-point multiplications of mode MODE.  */
+
+static int
+mips_fp_mult_cost (enum machine_mode mode)
+{
+  return mode == DFmode ? mips_cost->fp_mult_df : mips_cost->fp_mult_sf;
+}
+
+/* Return the cost of floating-point divisions of mode MODE.  */
+
+static int
+mips_fp_div_cost (enum machine_mode mode)
+{
+  return mode == DFmode ? mips_cost->fp_div_df : mips_cost->fp_div_sf;
+}
+
+/* Return the cost of sign-extending OP to mode MODE, not including the
+   cost of OP itself.  */
+
+static int
+mips_sign_extend_cost (enum machine_mode mode, rtx op)
+{
+  if (MEM_P (op))
+    /* Extended loads are as cheap as unextended ones.  */
+    return 0;
+
+  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+    /* A sign extension from SImode to DImode in 64-bit mode is free.  */
+    return 0;
+
+  if (ISA_HAS_SEB_SEH || GENERATE_MIPS16E)
+    /* We can use SEB or SEH.  */
+    return COSTS_N_INSNS (1);
+
+  /* We need to use a shift left and a shift right.  */
+  return COSTS_N_INSNS (TARGET_MIPS16 ? 4 : 2);
+}
+
+/* Return the cost of zero-extending OP to mode MODE, not including the
+   cost of OP itself.  */
+
+static int
+mips_zero_extend_cost (enum machine_mode mode, rtx op)
+{
+  if (MEM_P (op))
+    /* Extended loads are as cheap as unextended ones.  */
+    return 0;
+
+  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+    /* We need a shift left by 32 bits and a shift right by 32 bits.  */
+    return COSTS_N_INSNS (TARGET_MIPS16 ? 4 : 2);
+
+  if (GENERATE_MIPS16E)
+    /* We can use ZEB or ZEH.  */
+    return COSTS_N_INSNS (1);
+
+  if (TARGET_MIPS16)
+    /* We need to load 0xff or 0xffff into a register and use AND.  */
+    return COSTS_N_INSNS (GET_MODE (op) == QImode ? 2 : 3);
+
+  /* We can use ANDI.  */
+  return COSTS_N_INSNS (1);
+}
+
+/* Return the cost of moving between two registers of mode MODE,
+   assuming that the move will be in pieces of at most UNITS bytes.  */
+
+static int
+mips_set_reg_reg_piece_cost (enum machine_mode mode, unsigned int units)
+{
+  return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
+}
+
+/* Return the cost of moving between two registers of mode MODE.  */
+
+static int
+mips_set_reg_reg_cost (enum machine_mode mode)
+{
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_CC:
+      return mips_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode));
+
+    case MODE_FLOAT:
+    case MODE_COMPLEX_FLOAT:
+    case MODE_VECTOR_FLOAT:
+      if (TARGET_HARD_FLOAT)
+	return mips_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE);
+      /* Fall through */
+
+    default:
+      return mips_set_reg_reg_piece_cost (mode, UNITS_PER_WORD);
+    }
+}
+
+/* Implement TARGET_RTX_COSTS.  */
+
+static bool
+mips_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+  int cost;
+  rtx addr;
+
+  /* The cost of a COMPARE is hard to define for MIPS.  COMPAREs don't
+     appear in the instruction stream, and the cost of a comparison is
+     really the cost of the branch or scc condition.  At the time of
+     writing, GCC only uses an explicit outer COMPARE code when optabs
+     is testing whether a constant is expensive enough to force into a
+     register.  We want optabs to pass such constants through the MIPS
+     expanders instead, so make all constants very cheap here.  */
+  if (outer_code == COMPARE)
+    {
+      gcc_assert (CONSTANT_P (x));
+      *total = 0;
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* Treat *clear_upper32-style ANDs as having zero cost in the
+	 second operand.  The cost is entirely in the first operand.
+
+	 ??? This is needed because we would otherwise try to CSE
+	 the constant operand.  Although that's the right thing for
+	 instructions that continue to be a register operation throughout
+	 compilation, it is disastrous for instructions that could
+	 later be converted into a memory operation.  */
+      if (TARGET_64BIT
+	  && outer_code == AND
+	  && UINTVAL (x) == 0xffffffff)
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      if (TARGET_MIPS16)
+	{
+	  cost = mips16_constant_cost (outer_code, INTVAL (x));
+	  if (cost >= 0)
+	    {
+	      *total = cost;
+	      return true;
+	    }
+	}
+      else
+	{
+	  /* When not optimizing for size, we care more about the cost
+	     of hot code, and hot code is often in a loop.  If a constant
+	     operand needs to be forced into a register, we will often be
+	     able to hoist the constant load out of the loop, so the load
+	     should not contribute to the cost.  */
+	  if (speed || mips_immediate_operand_p (outer_code, INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	}
+      /* Fall through.  */
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      if (force_to_mem_operand (x, VOIDmode))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      cost = mips_const_insns (x);
+      if (cost > 0)
+	{
+	  /* If the constant is likely to be stored in a GPR, SETs of
+	     single-insn constants are as cheap as register sets; we
+	     never want to CSE them.
+
+	     Don't reduce the cost of storing a floating-point zero in
+	     FPRs.  If we have a zero in an FPR for other reasons, we
+	     can get better cfg-cleanup and delayed-branch results by
+	     using it consistently, rather than using $0 sometimes and
+	     an FPR at other times.  Also, moves between floating-point
+	     registers are sometimes cheaper than (D)MTC1 $0.  */
+	  if (cost == 1
+	      && outer_code == SET
+	      && !(float_mode_p && TARGET_HARD_FLOAT))
+	    cost = 0;
+	  /* When non-MIPS16 code loads a constant N>1 times, we rarely
+	     want to CSE the constant itself.  It is usually better to
+	     have N copies of the last operation in the sequence and one
+	     shared copy of the other operations.  (Note that this is
+	     not true for MIPS16 code, where the final operation in the
+	     sequence is often an extended instruction.)
+
+	     Also, if we have a CONST_INT, we don't know whether it is
+	     for a word or doubleword operation, so we cannot rely on
+	     the result of mips_build_integer.  */
+	  else if (!TARGET_MIPS16
+		   && (outer_code == SET || mode == VOIDmode))
+	    cost = 1;
+	  *total = COSTS_N_INSNS (cost);
+	  return true;
+	}
+      /* The value will need to be fetched from the constant pool.  */
+      *total = CONSTANT_POOL_COST;
+      return true;
+
+    case MEM:
+      /* If the address is legitimate, return the number of
+	 instructions it needs.  */
+      addr = XEXP (x, 0);
+      cost = mips_address_insns (addr, mode, true);
+      if (cost > 0)
+	{
+	  *total = COSTS_N_INSNS (cost + 1);
+	  return true;
+	}
+      /* Check for a scaled indexed address.  */
+      if (mips_lwxs_address_p (addr)
+	  || mips_lx_address_p (addr, mode))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+      /* Otherwise use the default handling.  */
+      return false;
+
+    case FFS:
+      *total = COSTS_N_INSNS (6);
+      return false;
+
+    case NOT:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
+      return false;
+
+    case AND:
+      /* Check for a *clear_upper32 pattern and treat it like a zero
+	 extension.  See the pattern's comment for details.  */
+      if (TARGET_64BIT
+	  && mode == DImode
+	  && CONST_INT_P (XEXP (x, 1))
+	  && UINTVAL (XEXP (x, 1)) == 0xffffffff)
+	{
+	  *total = (mips_zero_extend_cost (mode, XEXP (x, 0))
+		    + set_src_cost (XEXP (x, 0), speed));
+	  return true;
+	}
+      if (ISA_HAS_CINS && CONST_INT_P (XEXP (x, 1)))
+	{
+	  rtx op = XEXP (x, 0);
+	  if (GET_CODE (op) == ASHIFT
+	      && CONST_INT_P (XEXP (op, 1))
+	      && mask_low_and_shift_p (mode, XEXP (x, 1), XEXP (op, 1), 32))
+	    {
+	      *total = COSTS_N_INSNS (1) + set_src_cost (XEXP (op, 0), speed);
+	      return true;
+	    }
+	}
+      /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
+	 a single instruction.  */
+      if (!TARGET_MIPS16
+	  && GET_CODE (XEXP (x, 0)) == NOT
+	  && GET_CODE (XEXP (x, 1)) == NOT)
+	{
+	  cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1;
+          *total = (COSTS_N_INSNS (cost)
+		    + set_src_cost (XEXP (XEXP (x, 0), 0), speed)
+		    + set_src_cost (XEXP (XEXP (x, 1), 0), speed));
+	  return true;
+	}
+	    
+      /* Fall through.  */
+
+    case IOR:
+    case XOR:
+      /* Double-word operations use two single-word operations.  */
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+				 speed);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+      if (CONSTANT_P (XEXP (x, 1)))
+	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				   speed);
+      else
+	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12),
+				   speed);
+      return true;
+
+    case ABS:
+      if (float_mode_p)
+        *total = mips_cost->fp_add;
+      else
+        *total = COSTS_N_INSNS (4);
+      return false;
+
+    case LO_SUM:
+      /* Low-part immediates need an extended MIPS16 instruction.  */
+      *total = (COSTS_N_INSNS (TARGET_MIPS16 ? 2 : 1)
+		+ set_src_cost (XEXP (x, 0), speed));
+      return true;
+
+    case LT:
+    case LTU:
+    case LE:
+    case LEU:
+    case GT:
+    case GTU:
+    case GE:
+    case GEU:
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case LTGT:
+      /* Branch comparisons have VOIDmode, so use the first operand's
+	 mode instead.  */
+      mode = GET_MODE (XEXP (x, 0));
+      if (FLOAT_MODE_P (mode))
+	{
+	  *total = mips_cost->fp_add;
+	  return false;
+	}
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				 speed);
+      return true;
+
+    case MINUS:
+      if (float_mode_p
+	  && (ISA_HAS_NMADD4_NMSUB4 || ISA_HAS_NMADD3_NMSUB3)
+	  && TARGET_FUSED_MADD
+	  && !HONOR_NANS (mode)
+	  && !HONOR_SIGNED_ZEROS (mode))
+	{
+	  /* See if we can use NMADD or NMSUB.  See mips.md for the
+	     associated patterns.  */
+	  rtx op0 = XEXP (x, 0);
+	  rtx op1 = XEXP (x, 1);
+	  if (GET_CODE (op0) == MULT && GET_CODE (XEXP (op0, 0)) == NEG)
+	    {
+	      *total = (mips_fp_mult_cost (mode)
+			+ set_src_cost (XEXP (XEXP (op0, 0), 0), speed)
+			+ set_src_cost (XEXP (op0, 1), speed)
+			+ set_src_cost (op1, speed));
+	      return true;
+	    }
+	  if (GET_CODE (op1) == MULT)
+	    {
+	      *total = (mips_fp_mult_cost (mode)
+			+ set_src_cost (op0, speed)
+			+ set_src_cost (XEXP (op1, 0), speed)
+			+ set_src_cost (XEXP (op1, 1), speed));
+	      return true;
+	    }
+	}
+      /* Fall through.  */
+
+    case PLUS:
+      if (float_mode_p)
+	{
+	  /* If this is part of a MADD or MSUB, treat the PLUS as
+	     being free.  */
+	  if ((ISA_HAS_FP_MADD4_MSUB4 || ISA_HAS_FP_MADD3_MSUB3)
+	      && TARGET_FUSED_MADD
+	      && GET_CODE (XEXP (x, 0)) == MULT)
+	    *total = 0;
+	  else
+	    *total = mips_cost->fp_add;
+	  return false;
+	}
+
+      /* Double-word operations require three single-word operations and
+	 an SLTU.  The MIPS16 version then needs to move the result of
+	 the SLTU from $24 to a MIPS16 register.  */
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1),
+				 COSTS_N_INSNS (TARGET_MIPS16 ? 5 : 4),
+				 speed);
+      return true;
+
+    case NEG:
+      if (float_mode_p
+	  && (ISA_HAS_NMADD4_NMSUB4 || ISA_HAS_NMADD3_NMSUB3)
+	  && TARGET_FUSED_MADD
+	  && !HONOR_NANS (mode)
+	  && HONOR_SIGNED_ZEROS (mode))
+	{
+	  /* See if we can use NMADD or NMSUB.  See mips.md for the
+	     associated patterns.  */
+	  rtx op = XEXP (x, 0);
+	  if ((GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+	      && GET_CODE (XEXP (op, 0)) == MULT)
+	    {
+	      *total = (mips_fp_mult_cost (mode)
+			+ set_src_cost (XEXP (XEXP (op, 0), 0), speed)
+			+ set_src_cost (XEXP (XEXP (op, 0), 1), speed)
+			+ set_src_cost (XEXP (op, 1), speed));
+	      return true;
+	    }
+	}
+
+      if (float_mode_p)
+	*total = mips_cost->fp_add;
+      else
+	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = mips_fp_mult_cost (mode);
+      else if (mode == DImode && !TARGET_64BIT)
+	/* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions,
+	   where the mulsidi3 always includes an MFHI and an MFLO.  */
+	*total = (speed
+		  ? mips_cost->int_mult_si * 3 + 6
+		  : COSTS_N_INSNS (ISA_HAS_MUL3 ? 7 : 9));
+      else if (!speed)
+	*total = COSTS_N_INSNS (ISA_HAS_MUL3 ? 1 : 2) + 1;
+      else if (mode == DImode)
+	*total = mips_cost->int_mult_di;
+      else
+	*total = mips_cost->int_mult_si;
+      return false;
+
+    case DIV:
+      /* Check for a reciprocal.  */
+      if (float_mode_p
+	  && ISA_HAS_FP_RECIP_RSQRT (mode)
+	  && flag_unsafe_math_optimizations
+	  && XEXP (x, 0) == CONST1_RTX (mode))
+	{
+	  if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
+	    /* An rsqrt<mode>a or rsqrt<mode>b pattern.  Count the
+	       division as being free.  */
+	    *total = set_src_cost (XEXP (x, 1), speed);
+	  else
+	    *total = (mips_fp_div_cost (mode)
+		      + set_src_cost (XEXP (x, 1), speed));
+	  return true;
+	}
+      /* Fall through.  */
+
+    case SQRT:
+    case MOD:
+      if (float_mode_p)
+	{
+	  *total = mips_fp_div_cost (mode);
+	  return false;
+	}
+      /* Fall through.  */
+
+    case UDIV:
+    case UMOD:
+      if (!speed)
+	{
+	  /* It is our responsibility to make division by a power of 2
+	     as cheap as 2 register additions if we want the division
+	     expanders to be used for such operations; see the setting
+	     of sdiv_pow2_cheap in optabs.c.  Using (D)DIV for MIPS16
+	     should always produce shorter code than using
+	     expand_sdiv2_pow2.  */
+	  if (TARGET_MIPS16
+	      && CONST_INT_P (XEXP (x, 1))
+	      && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
+	    {
+	      *total = COSTS_N_INSNS (2) + set_src_cost (XEXP (x, 0), speed);
+	      return true;
+	    }
+	  *total = COSTS_N_INSNS (mips_idiv_insns ());
+	}
+      else if (mode == DImode)
+        *total = mips_cost->int_div_di;
+      else
+	*total = mips_cost->int_div_si;
+      return false;
+
+    case SIGN_EXTEND:
+      *total = mips_sign_extend_cost (mode, XEXP (x, 0));
+      return false;
+
+    case ZERO_EXTEND:
+      if (outer_code == SET
+	  && ISA_HAS_BADDU
+	  && (GET_CODE (XEXP (x, 0)) == TRUNCATE
+	      || GET_CODE (XEXP (x, 0)) == SUBREG)
+	  && GET_MODE (XEXP (x, 0)) == QImode
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
+	{
+	  *total = set_src_cost (XEXP (XEXP (x, 0), 0), speed);
+	  return true;
+	}
+      *total = mips_zero_extend_cost (mode, XEXP (x, 0));
+      return false;
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+      *total = mips_cost->fp_add;
+      return false;
+
+    case SET:
+      if (register_operand (SET_DEST (x), VOIDmode)
+	  && reg_or_0_operand (SET_SRC (x), VOIDmode))
+	{
+	  *total = mips_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
+	  return true;
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_ADDRESS_COST.  */
+
+static int
+mips_address_cost (rtx addr, enum machine_mode mode,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  return mips_address_insns (addr, mode, false);
+}
+
+/* Information about a single instruction in a multi-instruction
+   asm sequence.  */
+struct mips_multi_member {
+  /* True if this is a label, false if it is code.  */
+  bool is_label_p;
+
+  /* The output_asm_insn format of the instruction.  */
+  const char *format;
+
+  /* The operands to the instruction.  */
+  rtx operands[MAX_RECOG_OPERANDS];
+};
+typedef struct mips_multi_member mips_multi_member;
+
+/* The instructions that make up the current multi-insn sequence.  */
+static vec<mips_multi_member> mips_multi_members;
+
+/* How many instructions (as opposed to labels) are in the current
+   multi-insn sequence.  */
+static unsigned int mips_multi_num_insns;
+
+/* Start a new multi-insn sequence.  */
+
+static void
+mips_multi_start (void)
+{
+  mips_multi_members.truncate (0);
+  mips_multi_num_insns = 0;
+}
+
+/* Add a new, uninitialized member to the current multi-insn sequence.  */
+
+static struct mips_multi_member *
+mips_multi_add (void)
+{
+  mips_multi_member empty;
+  return mips_multi_members.safe_push (empty);
+}
+
+/* Add a normal insn with the given asm format to the current multi-insn
+   sequence.  The other arguments are a null-terminated list of operands.  */
+
+static void
+mips_multi_add_insn (const char *format, ...)
+{
+  struct mips_multi_member *member;
+  va_list ap;
+  unsigned int i;
+  rtx op;
+
+  member = mips_multi_add ();
+  member->is_label_p = false;
+  member->format = format;
+  va_start (ap, format);
+  i = 0;
+  while ((op = va_arg (ap, rtx)))
+    member->operands[i++] = op;
+  va_end (ap);
+  mips_multi_num_insns++;
+}
+
+/* Add the given label definition to the current multi-insn sequence.
+   The definition should include the colon.  */
+
+static void
+mips_multi_add_label (const char *label)
+{
+  struct mips_multi_member *member;
+
+  member = mips_multi_add ();
+  member->is_label_p = true;
+  member->format = label;
+}
+
+/* Return the index of the last member of the current multi-insn sequence.  */
+
+static unsigned int
+mips_multi_last_index (void)
+{
+  return mips_multi_members.length () - 1;
+}
+
+/* Add a copy of an existing instruction to the current multi-insn
+   sequence.  I is the index of the instruction that should be copied.  */
+
+static void
+mips_multi_copy_insn (unsigned int i)
+{
+  struct mips_multi_member *member;
+
+  member = mips_multi_add ();
+  memcpy (member, &mips_multi_members[i], sizeof (*member));
+  gcc_assert (!member->is_label_p);
+}
+
+/* Change the operand of an existing instruction in the current
+   multi-insn sequence.  I is the index of the instruction,
+   OP is the index of the operand, and X is the new value.  */
+
+static void
+mips_multi_set_operand (unsigned int i, unsigned int op, rtx x)
+{
+  mips_multi_members[i].operands[op] = x;
+}
+
+/* Write out the asm code for the current multi-insn sequence.  */
+
+static void
+mips_multi_write (void)
+{
+  struct mips_multi_member *member;
+  unsigned int i;
+
+  FOR_EACH_VEC_ELT (mips_multi_members, i, member)
+    if (member->is_label_p)
+      fprintf (asm_out_file, "%s\n", member->format);
+    else
+      output_asm_insn (member->format, member->operands);
+}
+
+/* Return one word of double-word value OP, taking into account the fixed
+   endianness of certain registers.  HIGH_P is true to select the high part,
+   false to select the low part.  */
+
+rtx
+mips_subword (rtx op, bool high_p)
+{
+  unsigned int byte, offset;
+  enum machine_mode mode;
+
+  mode = GET_MODE (op);
+  if (mode == VOIDmode)
+    mode = TARGET_64BIT ? TImode : DImode;
+
+  if (TARGET_BIG_ENDIAN ? !high_p : high_p)
+    byte = UNITS_PER_WORD;
+  else
+    byte = 0;
+
+  if (FP_REG_RTX_P (op))
+    {
+      /* Paired FPRs are always ordered little-endian.  */
+      offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0);
+      return gen_rtx_REG (word_mode, REGNO (op) + offset);
+    }
+
+  if (MEM_P (op))
+    return mips_rewrite_small_data (adjust_address (op, word_mode, byte));
+
+  return simplify_gen_subreg (word_mode, op, mode, byte);
+}
+
+/* Return true if SRC should be moved into DEST using "MULT $0, $0".
+   SPLIT_TYPE is the condition under which moves should be split.  */
+
+static bool
+mips_mult_move_p (rtx dest, rtx src, enum mips_split_type split_type)
+{
+  return ((split_type != SPLIT_FOR_SPEED
+	   || mips_tuning_info.fast_mult_zero_zero_p)
+	  && src == const0_rtx
+	  && REG_P (dest)
+	  && GET_MODE_SIZE (GET_MODE (dest)) == 2 * UNITS_PER_WORD
+	  && (ISA_HAS_DSP_MULT
+	      ? ACC_REG_P (REGNO (dest))
+	      : MD_REG_P (REGNO (dest))));
+}
+
+/* Return true if a move from SRC to DEST should be split into two.
+   SPLIT_TYPE describes the split condition.  */
+
+bool
+mips_split_move_p (rtx dest, rtx src, enum mips_split_type split_type)
+{
+  /* Check whether the move can be done using some variant of MULT $0,$0.  */
+  if (mips_mult_move_p (dest, src, split_type))
+    return false;
+
+  /* FPR-to-FPR moves can be done in a single instruction, if they're
+     allowed at all.  */
+  unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
+  if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+    return false;
+
+  /* Check for floating-point loads and stores.  */
+  if (size == 8 && ISA_HAS_LDC1_SDC1)
+    {
+      if (FP_REG_RTX_P (dest) && MEM_P (src))
+	return false;
+      if (FP_REG_RTX_P (src) && MEM_P (dest))
+	return false;
+    }
+
+  /* Otherwise split all multiword moves.  */
+  return size > UNITS_PER_WORD;
+}
+
+/* Split a move from SRC to DEST, given that mips_split_move_p holds.
+   SPLIT_TYPE describes the split condition.  */
+
+void
+mips_split_move (rtx dest, rtx src, enum mips_split_type split_type)
+{
+  rtx low_dest;
+
+  gcc_checking_assert (mips_split_move_p (dest, src, split_type));
+  if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+    {
+      if (!TARGET_64BIT && GET_MODE (dest) == DImode)
+	emit_insn (gen_move_doubleword_fprdi (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
+	emit_insn (gen_move_doubleword_fprdf (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V2SFmode)
+	emit_insn (gen_move_doubleword_fprv2sf (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V2SImode)
+	emit_insn (gen_move_doubleword_fprv2si (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V4HImode)
+	emit_insn (gen_move_doubleword_fprv4hi (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V8QImode)
+	emit_insn (gen_move_doubleword_fprv8qi (dest, src));
+      else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
+	emit_insn (gen_move_doubleword_fprtf (dest, src));
+      else
+	gcc_unreachable ();
+    }
+  else if (REG_P (dest) && REGNO (dest) == MD_REG_FIRST)
+    {
+      low_dest = mips_subword (dest, false);
+      mips_emit_move (low_dest, mips_subword (src, false));
+      if (TARGET_64BIT)
+	emit_insn (gen_mthidi_ti (dest, mips_subword (src, true), low_dest));
+      else
+	emit_insn (gen_mthisi_di (dest, mips_subword (src, true), low_dest));
+    }
+  else if (REG_P (src) && REGNO (src) == MD_REG_FIRST)
+    {
+      mips_emit_move (mips_subword (dest, false), mips_subword (src, false));
+      if (TARGET_64BIT)
+	emit_insn (gen_mfhidi_ti (mips_subword (dest, true), src));
+      else
+	emit_insn (gen_mfhisi_di (mips_subword (dest, true), src));
+    }
+  else
+    {
+      /* The operation can be split into two normal moves.  Decide in
+	 which order to do them.  */
+      low_dest = mips_subword (dest, false);
+      if (REG_P (low_dest)
+	  && reg_overlap_mentioned_p (low_dest, src))
+	{
+	  mips_emit_move (mips_subword (dest, true), mips_subword (src, true));
+	  mips_emit_move (low_dest, mips_subword (src, false));
+	}
+      else
+	{
+	  mips_emit_move (low_dest, mips_subword (src, false));
+	  mips_emit_move (mips_subword (dest, true), mips_subword (src, true));
+	}
+    }
+}
+
+/* Return the split type for instruction INSN.  */
+
+static enum mips_split_type
+mips_insn_split_type (rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  if (bb)
+    {
+      if (optimize_bb_for_speed_p (bb))
+	return SPLIT_FOR_SPEED;
+      else
+	return SPLIT_FOR_SIZE;
+    }
+  /* Once CFG information has been removed, we should trust the optimization
+     decisions made by previous passes and only split where necessary.  */
+  return SPLIT_IF_NECESSARY;
+}
+
+/* Return true if a move from SRC to DEST in INSN should be split.  */
+
+bool
+mips_split_move_insn_p (rtx dest, rtx src, rtx insn)
+{
+  return mips_split_move_p (dest, src, mips_insn_split_type (insn));
+}
+
+/* Split a move from SRC to DEST in INSN, given that mips_split_move_insn_p
+   holds.  */
+
+void
+mips_split_move_insn (rtx dest, rtx src, rtx insn)
+{
+  mips_split_move (dest, src, mips_insn_split_type (insn));
+}
+
+/* Return the appropriate instructions to move SRC into DEST.  Assume
+   that SRC is operand 1 and DEST is operand 0.  */
+
+const char *
+mips_output_move (rtx dest, rtx src)
+{
+  enum rtx_code dest_code, src_code;
+  enum machine_mode mode;
+  enum mips_symbol_type symbol_type;
+  bool dbl_p;
+
+  dest_code = GET_CODE (dest);
+  src_code = GET_CODE (src);
+  mode = GET_MODE (dest);
+  dbl_p = (GET_MODE_SIZE (mode) == 8);
+
+  if (mips_split_move_p (dest, src, SPLIT_IF_NECESSARY))
+    return "#";
+
+  if ((src_code == REG && GP_REG_P (REGNO (src)))
+      || (!TARGET_MIPS16 && src == CONST0_RTX (mode)))
+    {
+      if (dest_code == REG)
+	{
+	  if (GP_REG_P (REGNO (dest)))
+	    return "move\t%0,%z1";
+
+	  if (mips_mult_move_p (dest, src, SPLIT_IF_NECESSARY))
+	    {
+	      if (ISA_HAS_DSP_MULT)
+		return "mult\t%q0,%.,%.";
+	      else
+		return "mult\t%.,%.";
+	    }
+
+	  /* Moves to HI are handled by special .md insns.  */
+	  if (REGNO (dest) == LO_REGNUM)
+	    return "mtlo\t%z1";
+
+	  if (DSP_ACC_REG_P (REGNO (dest)))
+	    {
+	      static char retval[] = "mt__\t%z1,%q0";
+
+	      retval[2] = reg_names[REGNO (dest)][4];
+	      retval[3] = reg_names[REGNO (dest)][5];
+	      return retval;
+	    }
+
+	  if (FP_REG_P (REGNO (dest)))
+	    return dbl_p ? "dmtc1\t%z1,%0" : "mtc1\t%z1,%0";
+
+	  if (ALL_COP_REG_P (REGNO (dest)))
+	    {
+	      static char retval[] = "dmtc_\t%z1,%0";
+
+	      retval[4] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (dest));
+	      return dbl_p ? retval : retval + 1;
+	    }
+	}
+      if (dest_code == MEM)
+	switch (GET_MODE_SIZE (mode))
+	  {
+	  case 1: return "sb\t%z1,%0";
+	  case 2: return "sh\t%z1,%0";
+	  case 4: return "sw\t%z1,%0";
+	  case 8: return "sd\t%z1,%0";
+	  }
+    }
+  if (dest_code == REG && GP_REG_P (REGNO (dest)))
+    {
+      if (src_code == REG)
+	{
+	  /* Moves from HI are handled by special .md insns.  */
+	  if (REGNO (src) == LO_REGNUM)
+	    {
+	      /* When generating VR4120 or VR4130 code, we use MACC and
+		 DMACC instead of MFLO.  This avoids both the normal
+		 MIPS III HI/LO hazards and the errata related to
+		 -mfix-vr4130.  */
+	      if (ISA_HAS_MACCHI)
+		return dbl_p ? "dmacc\t%0,%.,%." : "macc\t%0,%.,%.";
+	      return "mflo\t%0";
+	    }
+
+	  if (DSP_ACC_REG_P (REGNO (src)))
+	    {
+	      static char retval[] = "mf__\t%0,%q1";
+
+	      retval[2] = reg_names[REGNO (src)][4];
+	      retval[3] = reg_names[REGNO (src)][5];
+	      return retval;
+	    }
+
+	  if (FP_REG_P (REGNO (src)))
+	    return dbl_p ? "dmfc1\t%0,%1" : "mfc1\t%0,%1";
+
+	  if (ALL_COP_REG_P (REGNO (src)))
+	    {
+	      static char retval[] = "dmfc_\t%0,%1";
+
+	      retval[4] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (src));
+	      return dbl_p ? retval : retval + 1;
+	    }
+	}
+
+      if (src_code == MEM)
+	switch (GET_MODE_SIZE (mode))
+	  {
+	  case 1: return "lbu\t%0,%1";
+	  case 2: return "lhu\t%0,%1";
+	  case 4: return "lw\t%0,%1";
+	  case 8: return "ld\t%0,%1";
+	  }
+
+      if (src_code == CONST_INT)
+	{
+	  /* Don't use the X format for the operand itself, because that
+	     will give out-of-range numbers for 64-bit hosts and 32-bit
+	     targets.  */
+	  if (!TARGET_MIPS16)
+	    return "li\t%0,%1\t\t\t# %X1";
+
+	  if (SMALL_OPERAND_UNSIGNED (INTVAL (src)))
+	    return "li\t%0,%1";
+
+	  if (SMALL_OPERAND_UNSIGNED (-INTVAL (src)))
+	    return "#";
+	}
+
+      if (src_code == HIGH)
+	return TARGET_MIPS16 ? "#" : "lui\t%0,%h1";
+
+      if (CONST_GP_P (src))
+	return "move\t%0,%1";
+
+      if (mips_symbolic_constant_p (src, SYMBOL_CONTEXT_LEA, &symbol_type)
+	  && mips_lo_relocs[symbol_type] != 0)
+	{
+	  /* A signed 16-bit constant formed by applying a relocation
+	     operator to a symbolic address.  */
+	  gcc_assert (!mips_split_p[symbol_type]);
+	  return "li\t%0,%R1";
+	}
+
+      if (symbolic_operand (src, VOIDmode))
+	{
+	  gcc_assert (TARGET_MIPS16
+		      ? TARGET_MIPS16_TEXT_LOADS
+		      : !TARGET_EXPLICIT_RELOCS);
+	  return dbl_p ? "dla\t%0,%1" : "la\t%0,%1";
+	}
+    }
+  if (src_code == REG && FP_REG_P (REGNO (src)))
+    {
+      if (dest_code == REG && FP_REG_P (REGNO (dest)))
+	{
+	  if (GET_MODE (dest) == V2SFmode)
+	    return "mov.ps\t%0,%1";
+	  else
+	    return dbl_p ? "mov.d\t%0,%1" : "mov.s\t%0,%1";
+	}
+
+      if (dest_code == MEM)
+	return dbl_p ? "sdc1\t%1,%0" : "swc1\t%1,%0";
+    }
+  if (dest_code == REG && FP_REG_P (REGNO (dest)))
+    {
+      if (src_code == MEM)
+	return dbl_p ? "ldc1\t%0,%1" : "lwc1\t%0,%1";
+    }
+  if (dest_code == REG && ALL_COP_REG_P (REGNO (dest)) && src_code == MEM)
+    {
+      static char retval[] = "l_c_\t%0,%1";
+
+      retval[1] = (dbl_p ? 'd' : 'w');
+      retval[3] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (dest));
+      return retval;
+    }
+  if (dest_code == MEM && src_code == REG && ALL_COP_REG_P (REGNO (src)))
+    {
+      static char retval[] = "s_c_\t%1,%0";
+
+      retval[1] = (dbl_p ? 'd' : 'w');
+      retval[3] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (src));
+      return retval;
+    }
+  gcc_unreachable ();
+}
+
+/* Return true if CMP1 is a suitable second operand for integer ordering
+   test CODE.  See also the *sCC patterns in mips.md.  */
+
+static bool
+mips_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
+{
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      return reg_or_0_operand (cmp1, VOIDmode);
+
+    case GE:
+    case GEU:
+      return !TARGET_MIPS16 && cmp1 == const1_rtx;
+
+    case LT:
+    case LTU:
+      return arith_operand (cmp1, VOIDmode);
+
+    case LE:
+      return sle_operand (cmp1, VOIDmode);
+
+    case LEU:
+      return sleu_operand (cmp1, VOIDmode);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if *CMP1 (of mode MODE) is a valid second operand for
+   integer ordering test *CODE, or if an equivalent combination can
+   be formed by adjusting *CODE and *CMP1.  When returning true, update
+   *CODE and *CMP1 with the chosen code and operand, otherwise leave
+   them alone.  */
+
+static bool
+mips_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
+				  enum machine_mode mode)
+{
+  HOST_WIDE_INT plus_one;
+
+  if (mips_int_order_operand_ok_p (*code, *cmp1))
+    return true;
+
+  if (CONST_INT_P (*cmp1))
+    switch (*code)
+      {
+      case LE:
+	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
+	if (INTVAL (*cmp1) < plus_one)
+	  {
+	    *code = LT;
+	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
+	    return true;
+	  }
+	break;
+
+      case LEU:
+	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
+	if (plus_one != 0)
+	  {
+	    *code = LTU;
+	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
+	    return true;
+	  }
+	break;
+
+      default:
+	break;
+      }
+  return false;
+}
+
+/* Compare CMP0 and CMP1 using ordering test CODE and store the result
+   in TARGET.  CMP0 and TARGET are register_operands.  If INVERT_PTR
+   is nonnull, it's OK to set TARGET to the inverse of the result and
+   flip *INVERT_PTR instead.  */
+
+static void
+mips_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
+			  rtx target, rtx cmp0, rtx cmp1)
+{
+  enum machine_mode mode;
+
+  /* First see if there is a MIPS instruction that can do this operation.
+     If not, try doing the same for the inverse operation.  If that also
+     fails, force CMP1 into a register and try again.  */
+  mode = GET_MODE (cmp0);
+  if (mips_canonicalize_int_order_test (&code, &cmp1, mode))
+    mips_emit_binary (code, target, cmp0, cmp1);
+  else
+    {
+      enum rtx_code inv_code = reverse_condition (code);
+      if (!mips_canonicalize_int_order_test (&inv_code, &cmp1, mode))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	  mips_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
+	}
+      else if (invert_ptr == 0)
+	{
+	  rtx inv_target;
+
+	  inv_target = mips_force_binary (GET_MODE (target),
+					  inv_code, cmp0, cmp1);
+	  mips_emit_binary (XOR, target, inv_target, const1_rtx);
+	}
+      else
+	{
+	  *invert_ptr = !*invert_ptr;
+	  mips_emit_binary (inv_code, target, cmp0, cmp1);
+	}
+    }
+}
+
+/* Return a register that is zero iff CMP0 and CMP1 are equal.
+   The register will have the same mode as CMP0.  */
+
+static rtx
+mips_zero_if_equal (rtx cmp0, rtx cmp1)
+{
+  if (cmp1 == const0_rtx)
+    return cmp0;
+
+  if (uns_arith_operand (cmp1, VOIDmode))
+    return expand_binop (GET_MODE (cmp0), xor_optab,
+			 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
+
+  return expand_binop (GET_MODE (cmp0), sub_optab,
+		       cmp0, cmp1, 0, 0, OPTAB_DIRECT);
+}
+
+/* Convert *CODE into a code that can be used in a floating-point
+   scc instruction (C.cond.fmt).  Return true if the values of
+   the condition code registers will be inverted, with 0 indicating
+   that the condition holds.  */
+
+static bool
+mips_reversed_fp_cond (enum rtx_code *code)
+{
+  switch (*code)
+    {
+    case NE:
+    case LTGT:
+    case ORDERED:
+      *code = reverse_condition_maybe_unordered (*code);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Allocate a floating-point condition-code register of mode MODE.
+
+   These condition code registers are used for certain kinds
+   of compound operation, such as compare and branches, vconds,
+   and built-in functions.  At expand time, their use is entirely
+   controlled by MIPS-specific code and is entirely internal
+   to these compound operations.
+
+   We could (and did in the past) expose condition-code values
+   as pseudo registers and leave the register allocator to pick
+   appropriate registers.  The problem is that it is not practically
+   possible for the rtl optimizers to guarantee that no spills will
+   be needed, even when AVOID_CCMODE_COPIES is defined.  We would
+   therefore need spill and reload sequences to handle the worst case.
+
+   Although such sequences do exist, they are very expensive and are
+   not something we'd want to use.  This is especially true of CCV2 and
+   CCV4, where all the shuffling would greatly outweigh whatever benefit
+   the vectorization itself provides.
+
+   The main benefit of having more than one condition-code register
+   is to allow the pipelining of operations, especially those involving
+   comparisons and conditional moves.  We don't really expect the
+   registers to be live for long periods, and certainly never want
+   them to be live across calls.
+
+   Also, there should be no penalty attached to using all the available
+   registers.  They are simply bits in the same underlying FPU control
+   register.
+
+   We therefore expose the hardware registers from the outset and use
+   a simple round-robin allocation scheme.  */
+
+static rtx
+mips_allocate_fcc (enum machine_mode mode)
+{
+  unsigned int regno, count;
+
+  gcc_assert (TARGET_HARD_FLOAT && ISA_HAS_8CC);
+
+  if (mode == CCmode)
+    count = 1;
+  else if (mode == CCV2mode)
+    count = 2;
+  else if (mode == CCV4mode)
+    count = 4;
+  else
+    gcc_unreachable ();
+
+  cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
+  if (cfun->machine->next_fcc > ST_REG_LAST - ST_REG_FIRST)
+    cfun->machine->next_fcc = 0;
+  regno = ST_REG_FIRST + cfun->machine->next_fcc;
+  cfun->machine->next_fcc += count;
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Convert a comparison into something that can be used in a branch or
+   conditional move.  On entry, *OP0 and *OP1 are the values being
+   compared and *CODE is the code used to compare them.
+
+   Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
+   If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are possible,
+   otherwise any standard branch condition can be used.  The standard branch
+   conditions are:
+
+      - EQ or NE between two registers.
+      - any comparison between a register and zero.  */
+
+static void
+mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p)
+{
+  rtx cmp_op0 = *op0;
+  rtx cmp_op1 = *op1;
+
+  if (GET_MODE_CLASS (GET_MODE (*op0)) == MODE_INT)
+    {
+      if (!need_eq_ne_p && *op1 == const0_rtx)
+	;
+      else if (*code == EQ || *code == NE)
+	{
+	  if (need_eq_ne_p)
+	    {
+	      *op0 = mips_zero_if_equal (cmp_op0, cmp_op1);
+	      *op1 = const0_rtx;
+	    }
+	  else
+	    *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1);
+	}
+      else
+	{
+	  /* The comparison needs a separate scc instruction.  Store the
+	     result of the scc in *OP0 and compare it against zero.  */
+	  bool invert = false;
+	  *op0 = gen_reg_rtx (GET_MODE (cmp_op0));
+	  mips_emit_int_order_test (*code, &invert, *op0, cmp_op0, cmp_op1);
+	  *code = (invert ? EQ : NE);
+	  *op1 = const0_rtx;
+	}
+    }
+  else if (ALL_FIXED_POINT_MODE_P (GET_MODE (cmp_op0)))
+    {
+      *op0 = gen_rtx_REG (CCDSPmode, CCDSP_CC_REGNUM);
+      mips_emit_binary (*code, *op0, cmp_op0, cmp_op1);
+      *code = NE;
+      *op1 = const0_rtx;
+    }
+  else
+    {
+      enum rtx_code cmp_code;
+
+      /* Floating-point tests use a separate C.cond.fmt comparison to
+	 set a condition code register.  The branch or conditional move
+	 will then compare that register against zero.
+
+	 Set CMP_CODE to the code of the comparison instruction and
+	 *CODE to the code that the branch or move should use.  */
+      cmp_code = *code;
+      *code = mips_reversed_fp_cond (&cmp_code) ? EQ : NE;
+      *op0 = (ISA_HAS_8CC
+	      ? mips_allocate_fcc (CCmode)
+	      : gen_rtx_REG (CCmode, FPSW_REGNUM));
+      *op1 = const0_rtx;
+      mips_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
+    }
+}
+
+/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
+   and OPERAND[3].  Store the result in OPERANDS[0].
+
+   On 64-bit targets, the mode of the comparison and target will always be
+   SImode, thus possibly narrower than that of the comparison's operands.  */
+
+void
+mips_expand_scc (rtx operands[])
+{
+  rtx target = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
+
+  if (code == EQ || code == NE)
+    {
+      if (ISA_HAS_SEQ_SNE
+	  && reg_imm10_operand (op1, GET_MODE (op1)))
+	mips_emit_binary (code, target, op0, op1);
+      else
+	{
+	  rtx zie = mips_zero_if_equal (op0, op1);
+	  mips_emit_binary (code, target, zie, const0_rtx);
+	}
+    }
+  else
+    mips_emit_int_order_test (code, 0, target, op0, op1);
+}
+
+/* Compare OPERANDS[1] with OPERANDS[2] using comparison code
+   CODE and jump to OPERANDS[3] if the condition holds.  */
+
+void
+mips_expand_conditional_branch (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx condition;
+
+  mips_emit_compare (&code, &op0, &op1, TARGET_MIPS16);
+  condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+  emit_jump_insn (gen_condjump (condition, operands[3]));
+}
+
+/* Implement:
+
+   (set temp (COND:CCV2 CMP_OP0 CMP_OP1))
+   (set DEST (unspec [TRUE_SRC FALSE_SRC temp] UNSPEC_MOVE_TF_PS))  */
+
+void
+mips_expand_vcondv2sf (rtx dest, rtx true_src, rtx false_src,
+		       enum rtx_code cond, rtx cmp_op0, rtx cmp_op1)
+{
+  rtx cmp_result;
+  bool reversed_p;
+
+  reversed_p = mips_reversed_fp_cond (&cond);
+  cmp_result = mips_allocate_fcc (CCV2mode);
+  emit_insn (gen_scc_ps (cmp_result,
+			 gen_rtx_fmt_ee (cond, VOIDmode, cmp_op0, cmp_op1)));
+  if (reversed_p)
+    emit_insn (gen_mips_cond_move_tf_ps (dest, false_src, true_src,
+					 cmp_result));
+  else
+    emit_insn (gen_mips_cond_move_tf_ps (dest, true_src, false_src,
+					 cmp_result));
+}
+
+/* Perform the comparison in OPERANDS[1].  Move OPERANDS[2] into OPERANDS[0]
+   if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0].  */
+
+void
+mips_expand_conditional_move (rtx *operands)
+{
+  rtx cond;
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  mips_emit_compare (&code, &op0, &op1, true);
+  cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
+						operands[2], operands[3])));
+}
+
+/* Perform the comparison in COMPARISON, then trap if the condition holds.  */
+
+void
+mips_expand_conditional_trap (rtx comparison)
+{
+  rtx op0, op1;
+  enum machine_mode mode;
+  enum rtx_code code;
+
+  /* MIPS conditional trap instructions don't have GT or LE flavors,
+     so we must swap the operands and convert to LT and GE respectively.  */
+  code = GET_CODE (comparison);
+  switch (code)
+    {
+    case GT:
+    case LE:
+    case GTU:
+    case LEU:
+      code = swap_condition (code);
+      op0 = XEXP (comparison, 1);
+      op1 = XEXP (comparison, 0);
+      break;
+
+    default:
+      op0 = XEXP (comparison, 0);
+      op1 = XEXP (comparison, 1);
+      break;
+    }
+
+  mode = GET_MODE (XEXP (comparison, 0));
+  op0 = force_reg (mode, op0);
+  if (!arith_operand (op1, mode))
+    op1 = force_reg (mode, op1);
+
+  emit_insn (gen_rtx_TRAP_IF (VOIDmode,
+			      gen_rtx_fmt_ee (code, mode, op0, op1),
+			      const0_rtx));
+}
+
+/* Initialize *CUM for a call to a function of type FNTYPE.  */
+
+void
+mips_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype)
+{
+  memset (cum, 0, sizeof (*cum));
+  cum->prototype = (fntype && prototype_p (fntype));
+  cum->gp_reg_found = (cum->prototype && stdarg_p (fntype));
+}
+
+/* Fill INFO with information about a single argument.  CUM is the
+   cumulative state for earlier arguments.  MODE is the mode of this
+   argument and TYPE is its type (if known).  NAMED is true if this
+   is a named (fixed) argument rather than a variable one.  */
+
+static void
+mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
+		   enum machine_mode mode, const_tree type, bool named)
+{
+  bool doubleword_aligned_p;
+  unsigned int num_bytes, num_words, max_regs;
+
+  /* Work out the size of the argument.  */
+  num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* Decide whether it should go in a floating-point register, assuming
+     one is free.  Later code checks for availability.
+
+     The checks against UNITS_PER_FPVALUE handle the soft-float and
+     single-float cases.  */
+  switch (mips_abi)
+    {
+    case ABI_EABI:
+      /* The EABI conventions have traditionally been defined in terms
+	 of TYPE_MODE, regardless of the actual type.  */
+      info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT
+		      || mode == V2SFmode)
+		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
+      break;
+
+    case ABI_32:
+    case ABI_O64:
+      /* Only leading floating-point scalars are passed in
+	 floating-point registers.  We also handle vector floats the same
+	 say, which is OK because they are not covered by the standard ABI.  */
+      info->fpr_p = (!cum->gp_reg_found
+		     && cum->arg_number < 2
+		     && (type == 0
+			 || SCALAR_FLOAT_TYPE_P (type)
+			 || VECTOR_FLOAT_TYPE_P (type))
+		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
+			 || mode == V2SFmode)
+		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
+      break;
+
+    case ABI_N32:
+    case ABI_64:
+      /* Scalar, complex and vector floating-point types are passed in
+	 floating-point registers, as long as this is a named rather
+	 than a variable argument.  */
+      info->fpr_p = (named
+		     && (type == 0 || FLOAT_TYPE_P (type))
+		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
+			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+			 || mode == V2SFmode)
+		     && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE);
+
+      /* ??? According to the ABI documentation, the real and imaginary
+	 parts of complex floats should be passed in individual registers.
+	 The real and imaginary parts of stack arguments are supposed
+	 to be contiguous and there should be an extra word of padding
+	 at the end.
+
+	 This has two problems.  First, it makes it impossible to use a
+	 single "void *" va_list type, since register and stack arguments
+	 are passed differently.  (At the time of writing, MIPSpro cannot
+	 handle complex float varargs correctly.)  Second, it's unclear
+	 what should happen when there is only one register free.
+
+	 For now, we assume that named complex floats should go into FPRs
+	 if there are two FPRs free, otherwise they should be passed in the
+	 same way as a struct containing two floats.  */
+      if (info->fpr_p
+	  && GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+	  && GET_MODE_UNIT_SIZE (mode) < UNITS_PER_FPVALUE)
+	{
+	  if (cum->num_gprs >= MAX_ARGS_IN_REGISTERS - 1)
+	    info->fpr_p = false;
+	  else
+	    num_words = 2;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* See whether the argument has doubleword alignment.  */
+  doubleword_aligned_p = (mips_function_arg_boundary (mode, type)
+			  > BITS_PER_WORD);
+
+  /* Set REG_OFFSET to the register count we're interested in.
+     The EABI allocates the floating-point registers separately,
+     but the other ABIs allocate them like integer registers.  */
+  info->reg_offset = (mips_abi == ABI_EABI && info->fpr_p
+		      ? cum->num_fprs
+		      : cum->num_gprs);
+
+  /* Advance to an even register if the argument is doubleword-aligned.  */
+  if (doubleword_aligned_p)
+    info->reg_offset += info->reg_offset & 1;
+
+  /* Work out the offset of a stack argument.  */
+  info->stack_offset = cum->stack_words;
+  if (doubleword_aligned_p)
+    info->stack_offset += info->stack_offset & 1;
+
+  max_regs = MAX_ARGS_IN_REGISTERS - info->reg_offset;
+
+  /* Partition the argument between registers and stack.  */
+  info->reg_words = MIN (num_words, max_regs);
+  info->stack_words = num_words - info->reg_words;
+}
+
+/* INFO describes a register argument that has the normal format for the
+   argument's mode.  Return the register it uses, assuming that FPRs are
+   available if HARD_FLOAT_P.  */
+
+static unsigned int
+mips_arg_regno (const struct mips_arg_info *info, bool hard_float_p)
+{
+  if (!info->fpr_p || !hard_float_p)
+    return GP_ARG_FIRST + info->reg_offset;
+  else if (mips_abi == ABI_32 && TARGET_DOUBLE_FLOAT && info->reg_offset > 0)
+    /* In o32, the second argument is always passed in $f14
+       for TARGET_DOUBLE_FLOAT, regardless of whether the
+       first argument was a word or doubleword.  */
+    return FP_ARG_FIRST + 2;
+  else
+    return FP_ARG_FIRST + info->reg_offset;
+}
+
+/* Implement TARGET_STRICT_ARGUMENT_NAMING.  */
+
+static bool
+mips_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
+{
+  return !TARGET_OLDABI;
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+mips_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  struct mips_arg_info info;
+
+  /* We will be called with a mode of VOIDmode after the last argument
+     has been seen.  Whatever we return will be passed to the call expander.
+     If we need a MIPS16 fp_code, return a REG with the code stored as
+     the mode.  */
+  if (mode == VOIDmode)
+    {
+      if (TARGET_MIPS16 && cum->fp_code != 0)
+	return gen_rtx_REG ((enum machine_mode) cum->fp_code, 0);
+      else
+	return NULL;
+    }
+
+  mips_get_arg_info (&info, cum, mode, type, named);
+
+  /* Return straight away if the whole argument is passed on the stack.  */
+  if (info.reg_offset == MAX_ARGS_IN_REGISTERS)
+    return NULL;
+
+  /* The n32 and n64 ABIs say that if any 64-bit chunk of the structure
+     contains a double in its entirety, then that 64-bit chunk is passed
+     in a floating-point register.  */
+  if (TARGET_NEWABI
+      && TARGET_HARD_FLOAT
+      && named
+      && type != 0
+      && TREE_CODE (type) == RECORD_TYPE
+      && TYPE_SIZE_UNIT (type)
+      && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
+    {
+      tree field;
+
+      /* First check to see if there is any such field.  */
+      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	if (TREE_CODE (field) == FIELD_DECL
+	    && SCALAR_FLOAT_TYPE_P (TREE_TYPE (field))
+	    && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD
+	    && tree_fits_shwi_p (bit_position (field))
+	    && int_bit_position (field) % BITS_PER_WORD == 0)
+	  break;
+
+      if (field != 0)
+	{
+	  /* Now handle the special case by returning a PARALLEL
+	     indicating where each 64-bit chunk goes.  INFO.REG_WORDS
+	     chunks are passed in registers.  */
+	  unsigned int i;
+	  HOST_WIDE_INT bitpos;
+	  rtx ret;
+
+	  /* assign_parms checks the mode of ENTRY_PARM, so we must
+	     use the actual mode here.  */
+	  ret = gen_rtx_PARALLEL (mode, rtvec_alloc (info.reg_words));
+
+	  bitpos = 0;
+	  field = TYPE_FIELDS (type);
+	  for (i = 0; i < info.reg_words; i++)
+	    {
+	      rtx reg;
+
+	      for (; field; field = DECL_CHAIN (field))
+		if (TREE_CODE (field) == FIELD_DECL
+		    && int_bit_position (field) >= bitpos)
+		  break;
+
+	      if (field
+		  && int_bit_position (field) == bitpos
+		  && SCALAR_FLOAT_TYPE_P (TREE_TYPE (field))
+		  && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD)
+		reg = gen_rtx_REG (DFmode, FP_ARG_FIRST + info.reg_offset + i);
+	      else
+		reg = gen_rtx_REG (DImode, GP_ARG_FIRST + info.reg_offset + i);
+
+	      XVECEXP (ret, 0, i)
+		= gen_rtx_EXPR_LIST (VOIDmode, reg,
+				     GEN_INT (bitpos / BITS_PER_UNIT));
+
+	      bitpos += BITS_PER_WORD;
+	    }
+	  return ret;
+	}
+    }
+
+  /* Handle the n32/n64 conventions for passing complex floating-point
+     arguments in FPR pairs.  The real part goes in the lower register
+     and the imaginary part goes in the upper register.  */
+  if (TARGET_NEWABI
+      && info.fpr_p
+      && GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+    {
+      rtx real, imag;
+      enum machine_mode inner;
+      unsigned int regno;
+
+      inner = GET_MODE_INNER (mode);
+      regno = FP_ARG_FIRST + info.reg_offset;
+      if (info.reg_words * UNITS_PER_WORD == GET_MODE_SIZE (inner))
+	{
+	  /* Real part in registers, imaginary part on stack.  */
+	  gcc_assert (info.stack_words == info.reg_words);
+	  return gen_rtx_REG (inner, regno);
+	}
+      else
+	{
+	  gcc_assert (info.stack_words == 0);
+	  real = gen_rtx_EXPR_LIST (VOIDmode,
+				    gen_rtx_REG (inner, regno),
+				    const0_rtx);
+	  imag = gen_rtx_EXPR_LIST (VOIDmode,
+				    gen_rtx_REG (inner,
+						 regno + info.reg_words / 2),
+				    GEN_INT (GET_MODE_SIZE (inner)));
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, real, imag));
+	}
+    }
+
+  return gen_rtx_REG (mode, mips_arg_regno (&info, TARGET_HARD_FLOAT));
+}
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+mips_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  struct mips_arg_info info;
+
+  mips_get_arg_info (&info, cum, mode, type, named);
+
+  if (!info.fpr_p)
+    cum->gp_reg_found = true;
+
+  /* See the comment above the CUMULATIVE_ARGS structure in mips.h for
+     an explanation of what this code does.  It assumes that we're using
+     either the o32 or the o64 ABI, both of which pass at most 2 arguments
+     in FPRs.  */
+  if (cum->arg_number < 2 && info.fpr_p)
+    cum->fp_code += (mode == SFmode ? 1 : 2) << (cum->arg_number * 2);
+
+  /* Advance the register count.  This has the effect of setting
+     num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
+     argument required us to skip the final GPR and pass the whole
+     argument on the stack.  */
+  if (mips_abi != ABI_EABI || !info.fpr_p)
+    cum->num_gprs = info.reg_offset + info.reg_words;
+  else if (info.reg_words > 0)
+    cum->num_fprs += MAX_FPRS_PER_FMT;
+
+  /* Advance the stack word count.  */
+  if (info.stack_words > 0)
+    cum->stack_words = info.stack_offset + info.stack_words;
+
+  cum->arg_number++;
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.  */
+
+static int
+mips_arg_partial_bytes (cumulative_args_t cum,
+			enum machine_mode mode, tree type, bool named)
+{
+  struct mips_arg_info info;
+
+  mips_get_arg_info (&info, get_cumulative_args (cum), mode, type, named);
+  return info.stack_words > 0 ? info.reg_words * UNITS_PER_WORD : 0;
+}
+
+/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Every parameter gets at
+   least PARM_BOUNDARY bits of alignment, but will be given anything up
+   to STACK_BOUNDARY bits if the type requires it.  */
+
+static unsigned int
+mips_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+/* Return true if FUNCTION_ARG_PADDING (MODE, TYPE) should return
+   upward rather than downward.  In other words, return true if the
+   first byte of the stack slot has useful data, false if the last
+   byte does.  */
+
+bool
+mips_pad_arg_upward (enum machine_mode mode, const_tree type)
+{
+  /* On little-endian targets, the first byte of every stack argument
+     is passed in the first byte of the stack slot.  */
+  if (!BYTES_BIG_ENDIAN)
+    return true;
+
+  /* Otherwise, integral types are padded downward: the last byte of a
+     stack argument is passed in the last byte of the stack slot.  */
+  if (type != 0
+      ? (INTEGRAL_TYPE_P (type)
+	 || POINTER_TYPE_P (type)
+	 || FIXED_POINT_TYPE_P (type))
+      : (SCALAR_INT_MODE_P (mode)
+	 || ALL_SCALAR_FIXED_POINT_MODE_P (mode)))
+    return false;
+
+  /* Big-endian o64 pads floating-point arguments downward.  */
+  if (mips_abi == ABI_O64)
+    if (type != 0 ? FLOAT_TYPE_P (type) : GET_MODE_CLASS (mode) == MODE_FLOAT)
+      return false;
+
+  /* Other types are padded upward for o32, o64, n32 and n64.  */
+  if (mips_abi != ABI_EABI)
+    return true;
+
+  /* Arguments smaller than a stack slot are padded downward.  */
+  if (mode != BLKmode)
+    return GET_MODE_BITSIZE (mode) >= PARM_BOUNDARY;
+  else
+    return int_size_in_bytes (type) >= (PARM_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Likewise BLOCK_REG_PADDING (MODE, TYPE, ...).  Return !BYTES_BIG_ENDIAN
+   if the least significant byte of the register has useful data.  Return
+   the opposite if the most significant byte does.  */
+
+bool
+mips_pad_reg_upward (enum machine_mode mode, tree type)
+{
+  /* No shifting is required for floating-point arguments.  */
+  if (type != 0 ? FLOAT_TYPE_P (type) : GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return !BYTES_BIG_ENDIAN;
+
+  /* Otherwise, apply the same padding to register arguments as we do
+     to stack arguments.  */
+  return mips_pad_arg_upward (mode, type);
+}
+
+/* Return nonzero when an argument must be passed by reference.  */
+
+static bool
+mips_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  if (mips_abi == ABI_EABI)
+    {
+      int size;
+
+      /* ??? How should SCmode be handled?  */
+      if (mode == DImode || mode == DFmode
+	  || mode == DQmode || mode == UDQmode
+	  || mode == DAmode || mode == UDAmode)
+	return 0;
+
+      size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+      return size == -1 || size > UNITS_PER_WORD;
+    }
+  else
+    {
+      /* If we have a variable-sized parameter, we have no choice.  */
+      return targetm.calls.must_pass_in_stack (mode, type);
+    }
+}
+
+/* Implement TARGET_CALLEE_COPIES.  */
+
+static bool
+mips_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    const_tree type ATTRIBUTE_UNUSED, bool named)
+{
+  return mips_abi == ABI_EABI && named;
+}
+
+/* See whether VALTYPE is a record whose fields should be returned in
+   floating-point registers.  If so, return the number of fields and
+   list them in FIELDS (which should have two elements).  Return 0
+   otherwise.
+
+   For n32 & n64, a structure with one or two fields is returned in
+   floating-point registers as long as every field has a floating-point
+   type.  */
+
+static int
+mips_fpr_return_fields (const_tree valtype, tree *fields)
+{
+  tree field;
+  int i;
+
+  if (!TARGET_NEWABI)
+    return 0;
+
+  if (TREE_CODE (valtype) != RECORD_TYPE)
+    return 0;
+
+  i = 0;
+  for (field = TYPE_FIELDS (valtype); field != 0; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+
+      if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (field)))
+	return 0;
+
+      if (i == 2)
+	return 0;
+
+      fields[i++] = field;
+    }
+  return i;
+}
+
+/* Implement TARGET_RETURN_IN_MSB.  For n32 & n64, we should return
+   a value in the most significant part of $2/$3 if:
+
+      - the target is big-endian;
+
+      - the value has a structure or union type (we generalize this to
+	cover aggregates from other languages too); and
+
+      - the structure is not returned in floating-point registers.  */
+
+static bool
+mips_return_in_msb (const_tree valtype)
+{
+  tree fields[2];
+
+  return (TARGET_NEWABI
+	  && TARGET_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && mips_fpr_return_fields (valtype, fields) == 0);
+}
+
+/* Return true if the function return value MODE will get returned in a
+   floating-point register.  */
+
+static bool
+mips_return_mode_in_fpr_p (enum machine_mode mode)
+{
+  return ((GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || mode == V2SFmode
+	   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	  && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE);
+}
+
+/* Return the representation of an FPR return register when the
+   value being returned in FP_RETURN has mode VALUE_MODE and the
+   return type itself has mode TYPE_MODE.  On NewABI targets,
+   the two modes may be different for structures like:
+
+       struct __attribute__((packed)) foo { float f; }
+
+   where we return the SFmode value of "f" in FP_RETURN, but where
+   the structure itself has mode BLKmode.  */
+
+static rtx
+mips_return_fpr_single (enum machine_mode type_mode,
+			enum machine_mode value_mode)
+{
+  rtx x;
+
+  x = gen_rtx_REG (value_mode, FP_RETURN);
+  if (type_mode != value_mode)
+    {
+      x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
+      x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
+    }
+  return x;
+}
+
+/* Return a composite value in a pair of floating-point registers.
+   MODE1 and OFFSET1 are the mode and byte offset for the first value,
+   likewise MODE2 and OFFSET2 for the second.  MODE is the mode of the
+   complete value.
+
+   For n32 & n64, $f0 always holds the first value and $f2 the second.
+   Otherwise the values are packed together as closely as possible.  */
+
+static rtx
+mips_return_fpr_pair (enum machine_mode mode,
+		      enum machine_mode mode1, HOST_WIDE_INT offset1,
+		      enum machine_mode mode2, HOST_WIDE_INT offset2)
+{
+  int inc;
+
+  inc = (TARGET_NEWABI ? 2 : MAX_FPRS_PER_FMT);
+  return gen_rtx_PARALLEL
+    (mode,
+     gen_rtvec (2,
+		gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode1, FP_RETURN),
+				   GEN_INT (offset1)),
+		gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode2, FP_RETURN + inc),
+				   GEN_INT (offset2))));
+
+}
+
+/* Implement TARGET_FUNCTION_VALUE and TARGET_LIBCALL_VALUE.
+   For normal calls, VALTYPE is the return type and MODE is VOIDmode.
+   For libcalls, VALTYPE is null and MODE is the mode of the return value.  */
+
+static rtx
+mips_function_value_1 (const_tree valtype, const_tree fn_decl_or_type,
+		       enum machine_mode mode)
+{
+  if (valtype)
+    {
+      tree fields[2];
+      int unsigned_p;
+      const_tree func;
+
+      if (fn_decl_or_type && DECL_P (fn_decl_or_type))
+	func = fn_decl_or_type;
+      else
+	func = NULL;
+
+      mode = TYPE_MODE (valtype);
+      unsigned_p = TYPE_UNSIGNED (valtype);
+
+      /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+	 return values, promote the mode here too.  */
+      mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1);
+
+      /* Handle structures whose fields are returned in $f0/$f2.  */
+      switch (mips_fpr_return_fields (valtype, fields))
+	{
+	case 1:
+	  return mips_return_fpr_single (mode,
+					 TYPE_MODE (TREE_TYPE (fields[0])));
+
+	case 2:
+	  return mips_return_fpr_pair (mode,
+				       TYPE_MODE (TREE_TYPE (fields[0])),
+				       int_byte_position (fields[0]),
+				       TYPE_MODE (TREE_TYPE (fields[1])),
+				       int_byte_position (fields[1]));
+	}
+
+      /* If a value is passed in the most significant part of a register, see
+	 whether we have to round the mode up to a whole number of words.  */
+      if (mips_return_in_msb (valtype))
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (valtype);
+	  if (size % UNITS_PER_WORD != 0)
+	    {
+	      size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	    }
+	}
+
+      /* For EABI, the class of return register depends entirely on MODE.
+	 For example, "struct { some_type x; }" and "union { some_type x; }"
+	 are returned in the same way as a bare "some_type" would be.
+	 Other ABIs only use FPRs for scalar, complex or vector types.  */
+      if (mips_abi != ABI_EABI && !FLOAT_TYPE_P (valtype))
+	return gen_rtx_REG (mode, GP_RETURN);
+    }
+
+  if (!TARGET_MIPS16)
+    {
+      /* Handle long doubles for n32 & n64.  */
+      if (mode == TFmode)
+	return mips_return_fpr_pair (mode,
+				     DImode, 0,
+				     DImode, GET_MODE_SIZE (mode) / 2);
+
+      if (mips_return_mode_in_fpr_p (mode))
+	{
+	  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	    return mips_return_fpr_pair (mode,
+					 GET_MODE_INNER (mode), 0,
+					 GET_MODE_INNER (mode),
+					 GET_MODE_SIZE (mode) / 2);
+	  else
+	    return gen_rtx_REG (mode, FP_RETURN);
+	}
+    }
+
+  return gen_rtx_REG (mode, GP_RETURN);
+}
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+
+static rtx
+mips_function_value (const_tree valtype, const_tree fn_decl_or_type,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  return mips_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
+}
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+
+static rtx
+mips_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return mips_function_value_1 (NULL_TREE, NULL_TREE, mode);
+}
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the MIPS, R2 R3 and F0 F2 are the only register thus used.
+   Currently, R2 and F0 are only implemented here (C has no complex type).  */
+
+static bool
+mips_function_value_regno_p (const unsigned int regno)
+{
+  if (regno == GP_RETURN
+      || regno == FP_RETURN
+      || (LONG_DOUBLE_TYPE_SIZE == 128
+	  && FP_RETURN != GP_RETURN
+	  && regno == FP_RETURN + 2))
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_RETURN_IN_MEMORY.  Under the o32 and o64 ABIs,
+   all BLKmode objects are returned in memory.  Under the n32, n64
+   and embedded ABIs, small structures are returned in a register.
+   Objects with varying size must still be returned in memory, of
+   course.  */
+
+static bool
+mips_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  return (TARGET_OLDABI
+	  ? TYPE_MODE (type) == BLKmode
+	  : !IN_RANGE (int_size_in_bytes (type), 0, 2 * UNITS_PER_WORD));
+}
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+mips_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
+			     int no_rtl)
+{
+  CUMULATIVE_ARGS local_cum;
+  int gp_saved, fp_saved;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argument.  Advance a local copy of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  local_cum = *get_cumulative_args (cum);
+  mips_function_arg_advance (pack_cumulative_args (&local_cum), mode, type,
+			     true);
+
+  /* Found out how many registers we need to save.  */
+  gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
+  fp_saved = (EABI_FLOAT_VARARGS_P
+	      ? MAX_ARGS_IN_REGISTERS - local_cum.num_fprs
+	      : 0);
+
+  if (!no_rtl)
+    {
+      if (gp_saved > 0)
+	{
+	  rtx ptr, mem;
+
+	  ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
+			       REG_PARM_STACK_SPACE (cfun->decl)
+			       - gp_saved * UNITS_PER_WORD);
+	  mem = gen_frame_mem (BLKmode, ptr);
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+
+	  move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
+			       mem, gp_saved);
+	}
+      if (fp_saved > 0)
+	{
+	  /* We can't use move_block_from_reg, because it will use
+	     the wrong mode.  */
+	  enum machine_mode mode;
+	  int off, i;
+
+	  /* Set OFF to the offset from virtual_incoming_args_rtx of
+	     the first float register.  The FP save area lies below
+	     the integer one, and is aligned to UNITS_PER_FPVALUE bytes.  */
+	  off = (-gp_saved * UNITS_PER_WORD) & -UNITS_PER_FPVALUE;
+	  off -= fp_saved * UNITS_PER_FPREG;
+
+	  mode = TARGET_SINGLE_FLOAT ? SFmode : DFmode;
+
+	  for (i = local_cum.num_fprs; i < MAX_ARGS_IN_REGISTERS;
+	       i += MAX_FPRS_PER_FMT)
+	    {
+	      rtx ptr, mem;
+
+	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
+	      mem = gen_frame_mem (mode, ptr);
+	      set_mem_alias_set (mem, get_varargs_alias_set ());
+	      mips_emit_move (mem, gen_rtx_REG (mode, FP_ARG_FIRST + i));
+	      off += UNITS_PER_HWFPVALUE;
+	    }
+	}
+    }
+  if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
+    cfun->machine->varargs_size = (gp_saved * UNITS_PER_WORD
+				   + fp_saved * UNITS_PER_FPREG);
+}
+
+/* Implement TARGET_BUILTIN_VA_LIST.  */
+
+static tree
+mips_build_builtin_va_list (void)
+{
+  if (EABI_FLOAT_VARARGS_P)
+    {
+      /* We keep 3 pointers, and two offsets.
+
+	 Two pointers are to the overflow area, which starts at the CFA.
+	 One of these is constant, for addressing into the GPR save area
+	 below it.  The other is advanced up the stack through the
+	 overflow region.
+
+	 The third pointer is to the bottom of the GPR save area.
+	 Since the FPR save area is just below it, we can address
+	 FPR slots off this pointer.
+
+	 We also keep two one-byte offsets, which are to be subtracted
+	 from the constant pointers to yield addresses in the GPR and
+	 FPR save areas.  These are downcounted as float or non-float
+	 arguments are used, and when they get to zero, the argument
+	 must be obtained from the overflow region.  */
+      tree f_ovfl, f_gtop, f_ftop, f_goff, f_foff, f_res, record;
+      tree array, index;
+
+      record = lang_hooks.types.make_type (RECORD_TYPE);
+
+      f_ovfl = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__overflow_argptr"),
+			   ptr_type_node);
+      f_gtop = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__gpr_top"),
+			   ptr_type_node);
+      f_ftop = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__fpr_top"),
+			   ptr_type_node);
+      f_goff = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__gpr_offset"),
+			   unsigned_char_type_node);
+      f_foff = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__fpr_offset"),
+			   unsigned_char_type_node);
+      /* Explicitly pad to the size of a pointer, so that -Wpadded won't
+	 warn on every user file.  */
+      index = build_int_cst (NULL_TREE, GET_MODE_SIZE (ptr_mode) - 2 - 1);
+      array = build_array_type (unsigned_char_type_node,
+			        build_index_type (index));
+      f_res = build_decl (BUILTINS_LOCATION,
+			  FIELD_DECL, get_identifier ("__reserved"), array);
+
+      DECL_FIELD_CONTEXT (f_ovfl) = record;
+      DECL_FIELD_CONTEXT (f_gtop) = record;
+      DECL_FIELD_CONTEXT (f_ftop) = record;
+      DECL_FIELD_CONTEXT (f_goff) = record;
+      DECL_FIELD_CONTEXT (f_foff) = record;
+      DECL_FIELD_CONTEXT (f_res) = record;
+
+      TYPE_FIELDS (record) = f_ovfl;
+      DECL_CHAIN (f_ovfl) = f_gtop;
+      DECL_CHAIN (f_gtop) = f_ftop;
+      DECL_CHAIN (f_ftop) = f_goff;
+      DECL_CHAIN (f_goff) = f_foff;
+      DECL_CHAIN (f_foff) = f_res;
+
+      layout_type (record);
+      return record;
+    }
+  else
+    /* Otherwise, we use 'void *'.  */
+    return ptr_type_node;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+
+static void
+mips_va_start (tree valist, rtx nextarg)
+{
+  if (EABI_FLOAT_VARARGS_P)
+    {
+      const CUMULATIVE_ARGS *cum;
+      tree f_ovfl, f_gtop, f_ftop, f_goff, f_foff;
+      tree ovfl, gtop, ftop, goff, foff;
+      tree t;
+      int gpr_save_area_size;
+      int fpr_save_area_size;
+      int fpr_offset;
+
+      cum = &crtl->args.info;
+      gpr_save_area_size
+	= (MAX_ARGS_IN_REGISTERS - cum->num_gprs) * UNITS_PER_WORD;
+      fpr_save_area_size
+	= (MAX_ARGS_IN_REGISTERS - cum->num_fprs) * UNITS_PER_FPREG;
+
+      f_ovfl = TYPE_FIELDS (va_list_type_node);
+      f_gtop = DECL_CHAIN (f_ovfl);
+      f_ftop = DECL_CHAIN (f_gtop);
+      f_goff = DECL_CHAIN (f_ftop);
+      f_foff = DECL_CHAIN (f_goff);
+
+      ovfl = build3 (COMPONENT_REF, TREE_TYPE (f_ovfl), valist, f_ovfl,
+		     NULL_TREE);
+      gtop = build3 (COMPONENT_REF, TREE_TYPE (f_gtop), valist, f_gtop,
+		     NULL_TREE);
+      ftop = build3 (COMPONENT_REF, TREE_TYPE (f_ftop), valist, f_ftop,
+		     NULL_TREE);
+      goff = build3 (COMPONENT_REF, TREE_TYPE (f_goff), valist, f_goff,
+		     NULL_TREE);
+      foff = build3 (COMPONENT_REF, TREE_TYPE (f_foff), valist, f_foff,
+		     NULL_TREE);
+
+      /* Emit code to initialize OVFL, which points to the next varargs
+	 stack argument.  CUM->STACK_WORDS gives the number of stack
+	 words used by named arguments.  */
+      t = make_tree (TREE_TYPE (ovfl), virtual_incoming_args_rtx);
+      if (cum->stack_words > 0)
+	t = fold_build_pointer_plus_hwi (t, cum->stack_words * UNITS_PER_WORD);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (ovfl), ovfl, t);
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Emit code to initialize GTOP, the top of the GPR save area.  */
+      t = make_tree (TREE_TYPE (gtop), virtual_incoming_args_rtx);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (gtop), gtop, t);
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Emit code to initialize FTOP, the top of the FPR save area.
+	 This address is gpr_save_area_bytes below GTOP, rounded
+	 down to the next fp-aligned boundary.  */
+      t = make_tree (TREE_TYPE (ftop), virtual_incoming_args_rtx);
+      fpr_offset = gpr_save_area_size + UNITS_PER_FPVALUE - 1;
+      fpr_offset &= -UNITS_PER_FPVALUE;
+      if (fpr_offset)
+	t = fold_build_pointer_plus_hwi (t, -fpr_offset);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (ftop), ftop, t);
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Emit code to initialize GOFF, the offset from GTOP of the
+	 next GPR argument.  */
+      t = build2 (MODIFY_EXPR, TREE_TYPE (goff), goff,
+		  build_int_cst (TREE_TYPE (goff), gpr_save_area_size));
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Likewise emit code to initialize FOFF, the offset from FTOP
+	 of the next FPR argument.  */
+      t = build2 (MODIFY_EXPR, TREE_TYPE (foff), foff,
+		  build_int_cst (TREE_TYPE (foff), fpr_save_area_size));
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+  else
+    {
+      nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
+      std_expand_builtin_va_start (valist, nextarg);
+    }
+}
+
+/* Like std_gimplify_va_arg_expr, but apply alignment to zero-sized
+   types as well.  */
+
+static tree
+mips_std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			       gimple_seq *post_p)
+{
+  tree addr, t, type_size, rounded_size, valist_tmp;
+  unsigned HOST_WIDE_INT align, boundary;
+  bool indirect;
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type (type);
+
+  align = PARM_BOUNDARY / BITS_PER_UNIT;
+  boundary = targetm.calls.function_arg_boundary (TYPE_MODE (type), type);
+
+  /* When we align parameter on stack for caller, if the parameter
+     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
+     here with caller.  */
+  if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+    boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  boundary /= BITS_PER_UNIT;
+
+  /* Hoist the valist value into a temporary for the moment.  */
+  valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
+
+  /* va_list pointer is aligned to PARM_BOUNDARY.  If argument actually
+     requires greater alignment, we must perform dynamic alignment.  */
+  if (boundary > align)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
+      gimplify_and_add (t, pre_p);
+
+      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_build2 (BIT_AND_EXPR, TREE_TYPE (valist),
+			       valist_tmp,
+			       build_int_cst (TREE_TYPE (valist), -boundary)));
+      gimplify_and_add (t, pre_p);
+    }
+  else
+    boundary = align;
+
+  /* If the actual alignment is less than the alignment of the type,
+     adjust the type accordingly so that we don't assume strict alignment
+     when dereferencing the pointer.  */
+  boundary *= BITS_PER_UNIT;
+  if (boundary < TYPE_ALIGN (type))
+    {
+      type = build_variant_type_copy (type);
+      TYPE_ALIGN (type) = boundary;
+    }
+
+  /* Compute the rounded size of the type.  */
+  type_size = size_in_bytes (type);
+  rounded_size = round_up (type_size, align);
+
+  /* Reduce rounded_size so it's sharable with the postqueue.  */
+  gimplify_expr (&rounded_size, pre_p, post_p, is_gimple_val, fb_rvalue);
+
+  /* Get AP.  */
+  addr = valist_tmp;
+  if (PAD_VARARGS_DOWN && !integer_zerop (rounded_size))
+    {
+      /* Small args are padded downward.  */
+      t = fold_build2_loc (input_location, GT_EXPR, sizetype,
+		       rounded_size, size_int (align));
+      t = fold_build3 (COND_EXPR, sizetype, t, size_zero_node,
+		       size_binop (MINUS_EXPR, rounded_size, type_size));
+      addr = fold_build_pointer_plus (addr, t);
+    }
+
+  /* Compute new value for AP.  */
+  t = fold_build_pointer_plus (valist_tmp, rounded_size);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+  gimplify_and_add (t, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+
+  if (indirect)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+
+static tree
+mips_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			   gimple_seq *post_p)
+{
+  tree addr;
+  bool indirect_p;
+
+  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
+  if (indirect_p)
+    type = build_pointer_type (type);
+
+  if (!EABI_FLOAT_VARARGS_P)
+    addr = mips_std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+  else
+    {
+      tree f_ovfl, f_gtop, f_ftop, f_goff, f_foff;
+      tree ovfl, top, off, align;
+      HOST_WIDE_INT size, rsize, osize;
+      tree t, u;
+
+      f_ovfl = TYPE_FIELDS (va_list_type_node);
+      f_gtop = DECL_CHAIN (f_ovfl);
+      f_ftop = DECL_CHAIN (f_gtop);
+      f_goff = DECL_CHAIN (f_ftop);
+      f_foff = DECL_CHAIN (f_goff);
+
+      /* Let:
+
+	 TOP be the top of the GPR or FPR save area;
+	 OFF be the offset from TOP of the next register;
+	 ADDR_RTX be the address of the argument;
+	 SIZE be the number of bytes in the argument type;
+	 RSIZE be the number of bytes used to store the argument
+	   when it's in the register save area; and
+	 OSIZE be the number of bytes used to store it when it's
+	   in the stack overflow area.
+
+	 The code we want is:
+
+	 1: off &= -rsize;	  // round down
+	 2: if (off != 0)
+	 3:   {
+	 4:	addr_rtx = top - off + (BYTES_BIG_ENDIAN ? RSIZE - SIZE : 0);
+	 5:	off -= rsize;
+	 6:   }
+	 7: else
+	 8:   {
+	 9:	ovfl = ((intptr_t) ovfl + osize - 1) & -osize;
+	 10:	addr_rtx = ovfl + (BYTES_BIG_ENDIAN ? OSIZE - SIZE : 0);
+	 11:	ovfl += osize;
+	 14:  }
+
+	 [1] and [9] can sometimes be optimized away.  */
+
+      ovfl = build3 (COMPONENT_REF, TREE_TYPE (f_ovfl), valist, f_ovfl,
+		     NULL_TREE);
+      size = int_size_in_bytes (type);
+
+      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT
+	  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FPVALUE)
+	{
+	  top = build3 (COMPONENT_REF, TREE_TYPE (f_ftop),
+			unshare_expr (valist), f_ftop, NULL_TREE);
+	  off = build3 (COMPONENT_REF, TREE_TYPE (f_foff),
+			unshare_expr (valist), f_foff, NULL_TREE);
+
+	  /* When va_start saves FPR arguments to the stack, each slot
+	     takes up UNITS_PER_HWFPVALUE bytes, regardless of the
+	     argument's precision.  */
+	  rsize = UNITS_PER_HWFPVALUE;
+
+	  /* Overflow arguments are padded to UNITS_PER_WORD bytes
+	     (= PARM_BOUNDARY bits).  This can be different from RSIZE
+	     in two cases:
+
+	     (1) On 32-bit targets when TYPE is a structure such as:
+
+	     struct s { float f; };
+
+	     Such structures are passed in paired FPRs, so RSIZE
+	     will be 8 bytes.  However, the structure only takes
+	     up 4 bytes of memory, so OSIZE will only be 4.
+
+	     (2) In combinations such as -mgp64 -msingle-float
+	     -fshort-double.  Doubles passed in registers will then take
+	     up 4 (UNITS_PER_HWFPVALUE) bytes, but those passed on the
+	     stack take up UNITS_PER_WORD bytes.  */
+	  osize = MAX (GET_MODE_SIZE (TYPE_MODE (type)), UNITS_PER_WORD);
+	}
+      else
+	{
+	  top = build3 (COMPONENT_REF, TREE_TYPE (f_gtop),
+			unshare_expr (valist), f_gtop, NULL_TREE);
+	  off = build3 (COMPONENT_REF, TREE_TYPE (f_goff),
+			unshare_expr (valist), f_goff, NULL_TREE);
+	  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+	  if (rsize > UNITS_PER_WORD)
+	    {
+	      /* [1] Emit code for: off &= -rsize.	*/
+	      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), unshare_expr (off),
+			  build_int_cst (TREE_TYPE (off), -rsize));
+	      gimplify_assign (unshare_expr (off), t, pre_p);
+	    }
+	  osize = rsize;
+	}
+
+      /* [2] Emit code to branch if off == 0.  */
+      t = build2 (NE_EXPR, boolean_type_node, unshare_expr (off),
+		  build_int_cst (TREE_TYPE (off), 0));
+      addr = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
+
+      /* [5] Emit code for: off -= rsize.  We do this as a form of
+	 post-decrement not available to C.  */
+      t = fold_convert (TREE_TYPE (off), build_int_cst (NULL_TREE, rsize));
+      t = build2 (POSTDECREMENT_EXPR, TREE_TYPE (off), off, t);
+
+      /* [4] Emit code for:
+	 addr_rtx = top - off + (BYTES_BIG_ENDIAN ? RSIZE - SIZE : 0).  */
+      t = fold_convert (sizetype, t);
+      t = fold_build1 (NEGATE_EXPR, sizetype, t);
+      t = fold_build_pointer_plus (top, t);
+      if (BYTES_BIG_ENDIAN && rsize > size)
+	t = fold_build_pointer_plus_hwi (t, rsize - size);
+      COND_EXPR_THEN (addr) = t;
+
+      if (osize > UNITS_PER_WORD)
+	{
+	  /* [9] Emit: ovfl = ((intptr_t) ovfl + osize - 1) & -osize.  */
+	  t = fold_build_pointer_plus_hwi (unshare_expr (ovfl), osize - 1);
+	  u = build_int_cst (TREE_TYPE (t), -osize);
+	  t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
+	  align = build2 (MODIFY_EXPR, TREE_TYPE (ovfl),
+			  unshare_expr (ovfl), t);
+	}
+      else
+	align = NULL;
+
+      /* [10, 11] Emit code for:
+	 addr_rtx = ovfl + (BYTES_BIG_ENDIAN ? OSIZE - SIZE : 0)
+	 ovfl += osize.  */
+      u = fold_convert (TREE_TYPE (ovfl), build_int_cst (NULL_TREE, osize));
+      t = build2 (POSTINCREMENT_EXPR, TREE_TYPE (ovfl), ovfl, u);
+      if (BYTES_BIG_ENDIAN && osize > size)
+	t = fold_build_pointer_plus_hwi (t, osize - size);
+
+      /* String [9] and [10, 11] together.  */
+      if (align)
+	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), align, t);
+      COND_EXPR_ELSE (addr) = t;
+
+      addr = fold_convert (build_pointer_type (type), addr);
+      addr = build_va_arg_indirect_ref (addr);
+    }
+
+  if (indirect_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return addr;
+}
+
+/* Declare a unique, locally-binding function called NAME, then start
+   its definition.  */
+
+static void
+mips_start_unique_function (const char *name)
+{
+  tree decl;
+
+  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+		     get_identifier (name),
+		     build_function_type_list (void_type_node, NULL_TREE));
+  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				   NULL_TREE, void_type_node);
+  TREE_PUBLIC (decl) = 1;
+  TREE_STATIC (decl) = 1;
+
+  DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+
+  targetm.asm_out.unique_section (decl, 0);
+  switch_to_section (get_named_section (decl, NULL, 0));
+
+  targetm.asm_out.globalize_label (asm_out_file, name);
+  fputs ("\t.hidden\t", asm_out_file);
+  assemble_name (asm_out_file, name);
+  putc ('\n', asm_out_file);
+}
+
+/* Start a definition of function NAME.  MIPS16_P indicates whether the
+   function contains MIPS16 code.  */
+
+static void
+mips_start_function_definition (const char *name, bool mips16_p)
+{
+  if (mips16_p)
+    fprintf (asm_out_file, "\t.set\tmips16\n");
+  else
+    fprintf (asm_out_file, "\t.set\tnomips16\n");
+
+  if (TARGET_MICROMIPS)
+    fprintf (asm_out_file, "\t.set\tmicromips\n");
+#ifdef HAVE_GAS_MICROMIPS
+  else
+    fprintf (asm_out_file, "\t.set\tnomicromips\n");
+#endif
+
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent\t", asm_out_file);
+      assemble_name (asm_out_file, name);
+      fputs ("\n", asm_out_file);
+    }
+
+  ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function");
+
+  /* Start the definition proper.  */
+  assemble_name (asm_out_file, name);
+  fputs (":\n", asm_out_file);
+}
+
+/* End a function definition started by mips_start_function_definition.  */
+
+static void
+mips_end_function_definition (const char *name)
+{
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.end\t", asm_out_file);
+      assemble_name (asm_out_file, name);
+      fputs ("\n", asm_out_file);
+    }
+}
+
+/* If *STUB_PTR points to a stub, output a comdat-style definition for it,
+   then free *STUB_PTR.  */
+
+static void
+mips_finish_stub (mips_one_only_stub **stub_ptr)
+{
+  mips_one_only_stub *stub = *stub_ptr;
+  if (!stub)
+    return;
+
+  const char *name = stub->get_name ();
+  mips_start_unique_function (name);
+  mips_start_function_definition (name, false);
+  stub->output_body ();
+  mips_end_function_definition (name);
+  delete stub;
+  *stub_ptr = 0;
+}
+
+/* Return true if calls to X can use R_MIPS_CALL* relocations.  */
+
+static bool
+mips_ok_for_lazy_binding_p (rtx x)
+{
+  return (TARGET_USE_GOT
+	  && GET_CODE (x) == SYMBOL_REF
+	  && !SYMBOL_REF_BIND_NOW_P (x)
+	  && !mips_symbol_binds_local_p (x));
+}
+
+/* Load function address ADDR into register DEST.  TYPE is as for
+   mips_expand_call.  Return true if we used an explicit lazy-binding
+   sequence.  */
+
+static bool
+mips_load_call_address (enum mips_call_type type, rtx dest, rtx addr)
+{
+  /* If we're generating PIC, and this call is to a global function,
+     try to allow its address to be resolved lazily.  This isn't
+     possible for sibcalls when $gp is call-saved because the value
+     of $gp on entry to the stub would be our caller's gp, not ours.  */
+  if (TARGET_EXPLICIT_RELOCS
+      && !(type == MIPS_CALL_SIBCALL && TARGET_CALL_SAVED_GP)
+      && mips_ok_for_lazy_binding_p (addr))
+    {
+      addr = mips_got_load (dest, addr, SYMBOL_GOTOFF_CALL);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, addr));
+      return true;
+    }
+  else
+    {
+      mips_emit_move (dest, addr);
+      return false;
+    }
+}
+
+/* Each locally-defined hard-float MIPS16 function has a local symbol
+   associated with it.  This hash table maps the function symbol (FUNC)
+   to the local symbol (LOCAL). */
+struct GTY(()) mips16_local_alias {
+  rtx func;
+  rtx local;
+};
+static GTY ((param_is (struct mips16_local_alias))) htab_t mips16_local_aliases;
+
+/* Hash table callbacks for mips16_local_aliases.  */
+
+static hashval_t
+mips16_local_aliases_hash (const void *entry)
+{
+  const struct mips16_local_alias *alias;
+
+  alias = (const struct mips16_local_alias *) entry;
+  return htab_hash_string (XSTR (alias->func, 0));
+}
+
+static int
+mips16_local_aliases_eq (const void *entry1, const void *entry2)
+{
+  const struct mips16_local_alias *alias1, *alias2;
+
+  alias1 = (const struct mips16_local_alias *) entry1;
+  alias2 = (const struct mips16_local_alias *) entry2;
+  return rtx_equal_p (alias1->func, alias2->func);
+}
+
+/* FUNC is the symbol for a locally-defined hard-float MIPS16 function.
+   Return a local alias for it, creating a new one if necessary.  */
+
+static rtx
+mips16_local_alias (rtx func)
+{
+  struct mips16_local_alias *alias, tmp_alias;
+  void **slot;
+
+  /* Create the hash table if this is the first call.  */
+  if (mips16_local_aliases == NULL)
+    mips16_local_aliases = htab_create_ggc (37, mips16_local_aliases_hash,
+					    mips16_local_aliases_eq, NULL);
+
+  /* Look up the function symbol, creating a new entry if need be.  */
+  tmp_alias.func = func;
+  slot = htab_find_slot (mips16_local_aliases, &tmp_alias, INSERT);
+  gcc_assert (slot != NULL);
+
+  alias = (struct mips16_local_alias *) *slot;
+  if (alias == NULL)
+    {
+      const char *func_name, *local_name;
+      rtx local;
+
+      /* Create a new SYMBOL_REF for the local symbol.  The choice of
+	 __fn_local_* is based on the __fn_stub_* names that we've
+	 traditionally used for the non-MIPS16 stub.  */
+      func_name = targetm.strip_name_encoding (XSTR (func, 0));
+      local_name = ACONCAT (("__fn_local_", func_name, NULL));
+      local = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (local_name));
+      SYMBOL_REF_FLAGS (local) = SYMBOL_REF_FLAGS (func) | SYMBOL_FLAG_LOCAL;
+
+      /* Create a new structure to represent the mapping.  */
+      alias = ggc_alloc_mips16_local_alias ();
+      alias->func = func;
+      alias->local = local;
+      *slot = alias;
+    }
+  return alias->local;
+}
+
+/* A chained list of functions for which mips16_build_call_stub has already
+   generated a stub.  NAME is the name of the function and FP_RET_P is true
+   if the function returns a value in floating-point registers.  */
+struct mips16_stub {
+  struct mips16_stub *next;
+  char *name;
+  bool fp_ret_p;
+};
+static struct mips16_stub *mips16_stubs;
+
+/* Return the two-character string that identifies floating-point
+   return mode MODE in the name of a MIPS16 function stub.  */
+
+static const char *
+mips16_call_stub_mode_suffix (enum machine_mode mode)
+{
+  if (mode == SFmode)
+    return "sf";
+  else if (mode == DFmode)
+    return "df";
+  else if (mode == SCmode)
+    return "sc";
+  else if (mode == DCmode)
+    return "dc";
+  else if (mode == V2SFmode)
+    return "df";
+  else
+    gcc_unreachable ();
+}
+
+/* Write instructions to move a 32-bit value between general register
+   GPREG and floating-point register FPREG.  DIRECTION is 't' to move
+   from GPREG to FPREG and 'f' to move in the opposite direction.  */
+
+static void
+mips_output_32bit_xfer (char direction, unsigned int gpreg, unsigned int fpreg)
+{
+  fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+	   reg_names[gpreg], reg_names[fpreg]);
+}
+
+/* Likewise for 64-bit values.  */
+
+static void
+mips_output_64bit_xfer (char direction, unsigned int gpreg, unsigned int fpreg)
+{
+  if (TARGET_64BIT)
+    fprintf (asm_out_file, "\tdm%cc1\t%s,%s\n", direction,
+ 	     reg_names[gpreg], reg_names[fpreg]);
+  else if (TARGET_FLOAT64)
+    {
+      fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+ 	       reg_names[gpreg + TARGET_BIG_ENDIAN], reg_names[fpreg]);
+      fprintf (asm_out_file, "\tm%chc1\t%s,%s\n", direction,
+ 	       reg_names[gpreg + TARGET_LITTLE_ENDIAN], reg_names[fpreg]);
+    }
+  else
+    {
+      /* Move the least-significant word.  */
+      fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+	       reg_names[gpreg + TARGET_BIG_ENDIAN], reg_names[fpreg]);
+      /* ...then the most significant word.  */
+      fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+	       reg_names[gpreg + TARGET_LITTLE_ENDIAN], reg_names[fpreg + 1]);
+    }
+}
+
+/* Write out code to move floating-point arguments into or out of
+   general registers.  FP_CODE is the code describing which arguments
+   are present (see the comment above the definition of CUMULATIVE_ARGS
+   in mips.h).  DIRECTION is as for mips_output_32bit_xfer.  */
+
+static void
+mips_output_args_xfer (int fp_code, char direction)
+{
+  unsigned int gparg, fparg, f;
+  CUMULATIVE_ARGS cum;
+
+  /* This code only works for o32 and o64.  */
+  gcc_assert (TARGET_OLDABI);
+
+  mips_init_cumulative_args (&cum, NULL);
+
+  for (f = (unsigned int) fp_code; f != 0; f >>= 2)
+    {
+      enum machine_mode mode;
+      struct mips_arg_info info;
+
+      if ((f & 3) == 1)
+	mode = SFmode;
+      else if ((f & 3) == 2)
+	mode = DFmode;
+      else
+	gcc_unreachable ();
+
+      mips_get_arg_info (&info, &cum, mode, NULL, true);
+      gparg = mips_arg_regno (&info, false);
+      fparg = mips_arg_regno (&info, true);
+
+      if (mode == SFmode)
+	mips_output_32bit_xfer (direction, gparg, fparg);
+      else
+	mips_output_64bit_xfer (direction, gparg, fparg);
+
+      mips_function_arg_advance (pack_cumulative_args (&cum), mode, NULL, true);
+    }
+}
+
+/* Write a MIPS16 stub for the current function.  This stub is used
+   for functions which take arguments in the floating-point registers.
+   It is normal-mode code that moves the floating-point arguments
+   into the general registers and then jumps to the MIPS16 code.  */
+
+static void
+mips16_build_function_stub (void)
+{
+  const char *fnname, *alias_name, *separator;
+  char *secname, *stubname;
+  tree stubdecl;
+  unsigned int f;
+  rtx symbol, alias;
+
+  /* Create the name of the stub, and its unique section.  */
+  symbol = XEXP (DECL_RTL (current_function_decl), 0);
+  alias = mips16_local_alias (symbol);
+
+  fnname = targetm.strip_name_encoding (XSTR (symbol, 0));
+  alias_name = targetm.strip_name_encoding (XSTR (alias, 0));
+  secname = ACONCAT ((".mips16.fn.", fnname, NULL));
+  stubname = ACONCAT (("__fn_stub_", fnname, NULL));
+
+  /* Build a decl for the stub.  */
+  stubdecl = build_decl (BUILTINS_LOCATION,
+			 FUNCTION_DECL, get_identifier (stubname),
+			 build_function_type_list (void_type_node, NULL_TREE));
+  DECL_SECTION_NAME (stubdecl) = build_string (strlen (secname), secname);
+  DECL_RESULT (stubdecl) = build_decl (BUILTINS_LOCATION,
+				       RESULT_DECL, NULL_TREE, void_type_node);
+
+  /* Output a comment.  */
+  fprintf (asm_out_file, "\t# Stub function for %s (",
+	   current_function_name ());
+  separator = "";
+  for (f = (unsigned int) crtl->args.info.fp_code; f != 0; f >>= 2)
+    {
+      fprintf (asm_out_file, "%s%s", separator,
+	       (f & 3) == 1 ? "float" : "double");
+      separator = ", ";
+    }
+  fprintf (asm_out_file, ")\n");
+
+  /* Start the function definition.  */
+  assemble_start_function (stubdecl, stubname);
+  mips_start_function_definition (stubname, false);
+
+  /* If generating pic2 code, either set up the global pointer or
+     switch to pic0.  */
+  if (TARGET_ABICALLS_PIC2)
+    {
+      if (TARGET_ABSOLUTE_ABICALLS)
+	fprintf (asm_out_file, "\t.option\tpic0\n");
+      else
+	{
+	  output_asm_insn ("%(.cpload\t%^%)", NULL);
+	  /* Emit an R_MIPS_NONE relocation to tell the linker what the
+	     target function is.  Use a local GOT access when loading the
+	     symbol, to cut down on the number of unnecessary GOT entries
+	     for stubs that aren't needed.  */
+	  output_asm_insn (".reloc\t0,R_MIPS_NONE,%0", &symbol);
+	  symbol = alias;
+	}
+    }
+
+  /* Load the address of the MIPS16 function into $25.  Do this first so
+     that targets with coprocessor interlocks can use an MFC1 to fill the
+     delay slot.  */
+  output_asm_insn ("la\t%^,%0", &symbol);
+
+  /* Move the arguments from floating-point registers to general registers.  */
+  mips_output_args_xfer (crtl->args.info.fp_code, 'f');
+
+  /* Jump to the MIPS16 function.  */
+  output_asm_insn ("jr\t%^", NULL);
+
+  if (TARGET_ABICALLS_PIC2 && TARGET_ABSOLUTE_ABICALLS)
+    fprintf (asm_out_file, "\t.option\tpic2\n");
+
+  mips_end_function_definition (stubname);
+
+  /* If the linker needs to create a dynamic symbol for the target
+     function, it will associate the symbol with the stub (which,
+     unlike the target function, follows the proper calling conventions).
+     It is therefore useful to have a local alias for the target function,
+     so that it can still be identified as MIPS16 code.  As an optimization,
+     this symbol can also be used for indirect MIPS16 references from
+     within this file.  */
+  ASM_OUTPUT_DEF (asm_out_file, alias_name, fnname);
+
+  switch_to_section (function_section (current_function_decl));
+}
+
+/* The current function is a MIPS16 function that returns a value in an FPR.
+   Copy the return value from its soft-float to its hard-float location.
+   libgcc2 has special non-MIPS16 helper functions for each case.  */
+
+static void
+mips16_copy_fpr_return_value (void)
+{
+  rtx fn, insn, retval;
+  tree return_type;
+  enum machine_mode return_mode;
+  const char *name;
+
+  return_type = DECL_RESULT (current_function_decl);
+  return_mode = DECL_MODE (return_type);
+
+  name = ACONCAT (("__mips16_ret_",
+		   mips16_call_stub_mode_suffix (return_mode),
+		   NULL));
+  fn = mips16_stub_function (name);
+
+  /* The function takes arguments in $2 (and possibly $3), so calls
+     to it cannot be lazily bound.  */
+  SYMBOL_REF_FLAGS (fn) |= SYMBOL_FLAG_BIND_NOW;
+
+  /* Model the call as something that takes the GPR return value as
+     argument and returns an "updated" value.  */
+  retval = gen_rtx_REG (return_mode, GP_RETURN);
+  insn = mips_expand_call (MIPS_CALL_EPILOGUE, retval, fn,
+			   const0_rtx, NULL_RTX, false);
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), retval);
+}
+
+/* Consider building a stub for a MIPS16 call to function *FN_PTR.
+   RETVAL is the location of the return value, or null if this is
+   a "call" rather than a "call_value".  ARGS_SIZE is the size of the
+   arguments and FP_CODE is the code built by mips_function_arg;
+   see the comment before the fp_code field in CUMULATIVE_ARGS for details.
+
+   There are three alternatives:
+
+   - If a stub was needed, emit the call and return the call insn itself.
+
+   - If we can avoid using a stub by redirecting the call, set *FN_PTR
+     to the new target and return null.
+
+   - If *FN_PTR doesn't need a stub, return null and leave *FN_PTR
+     unmodified.
+
+   A stub is needed for calls to functions that, in normal mode,
+   receive arguments in FPRs or return values in FPRs.  The stub
+   copies the arguments from their soft-float positions to their
+   hard-float positions, calls the real function, then copies the
+   return value from its hard-float position to its soft-float
+   position.
+
+   We can emit a JAL to *FN_PTR even when *FN_PTR might need a stub.
+   If *FN_PTR turns out to be to a non-MIPS16 function, the linker
+   automatically redirects the JAL to the stub, otherwise the JAL
+   continues to call FN directly.  */
+
+static rtx
+mips16_build_call_stub (rtx retval, rtx *fn_ptr, rtx args_size, int fp_code)
+{
+  const char *fnname;
+  bool fp_ret_p;
+  struct mips16_stub *l;
+  rtx insn, fn;
+
+  /* We don't need to do anything if we aren't in MIPS16 mode, or if
+     we were invoked with the -msoft-float option.  */
+  if (!TARGET_MIPS16 || TARGET_SOFT_FLOAT_ABI)
+    return NULL_RTX;
+
+  /* Figure out whether the value might come back in a floating-point
+     register.  */
+  fp_ret_p = retval && mips_return_mode_in_fpr_p (GET_MODE (retval));
+
+  /* We don't need to do anything if there were no floating-point
+     arguments and the value will not be returned in a floating-point
+     register.  */
+  if (fp_code == 0 && !fp_ret_p)
+    return NULL_RTX;
+
+  /* We don't need to do anything if this is a call to a special
+     MIPS16 support function.  */
+  fn = *fn_ptr;
+  if (mips16_stub_function_p (fn))
+    return NULL_RTX;
+
+  /* If we're calling a locally-defined MIPS16 function, we know that
+     it will return values in both the "soft-float" and "hard-float"
+     registers.  There is no need to use a stub to move the latter
+     to the former.  */
+  if (fp_code == 0 && mips16_local_function_p (fn))
+    return NULL_RTX;
+
+  /* This code will only work for o32 and o64 abis.  The other ABI's
+     require more sophisticated support.  */
+  gcc_assert (TARGET_OLDABI);
+
+  /* If we're calling via a function pointer, use one of the magic
+     libgcc.a stubs provided for each (FP_CODE, FP_RET_P) combination.
+     Each stub expects the function address to arrive in register $2.  */
+  if (GET_CODE (fn) != SYMBOL_REF
+      || !call_insn_operand (fn, VOIDmode))
+    {
+      char buf[30];
+      rtx stub_fn, insn, addr;
+      bool lazy_p;
+
+      /* If this is a locally-defined and locally-binding function,
+	 avoid the stub by calling the local alias directly.  */
+      if (mips16_local_function_p (fn))
+	{
+	  *fn_ptr = mips16_local_alias (fn);
+	  return NULL_RTX;
+	}
+
+      /* Create a SYMBOL_REF for the libgcc.a function.  */
+      if (fp_ret_p)
+	sprintf (buf, "__mips16_call_stub_%s_%d",
+		 mips16_call_stub_mode_suffix (GET_MODE (retval)),
+		 fp_code);
+      else
+	sprintf (buf, "__mips16_call_stub_%d", fp_code);
+      stub_fn = mips16_stub_function (buf);
+
+      /* The function uses $2 as an argument, so calls to it
+	 cannot be lazily bound.  */
+      SYMBOL_REF_FLAGS (stub_fn) |= SYMBOL_FLAG_BIND_NOW;
+
+      /* Load the target function into $2.  */
+      addr = gen_rtx_REG (Pmode, GP_REG_FIRST + 2);
+      lazy_p = mips_load_call_address (MIPS_CALL_NORMAL, addr, fn);
+
+      /* Emit the call.  */
+      insn = mips_expand_call (MIPS_CALL_NORMAL, retval, stub_fn,
+			       args_size, NULL_RTX, lazy_p);
+
+      /* Tell GCC that this call does indeed use the value of $2.  */
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), addr);
+
+      /* If we are handling a floating-point return value, we need to
+         save $18 in the function prologue.  Putting a note on the
+         call will mean that df_regs_ever_live_p ($18) will be true if the
+         call is not eliminated, and we can check that in the prologue
+         code.  */
+      if (fp_ret_p)
+	CALL_INSN_FUNCTION_USAGE (insn) =
+	  gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_REG (word_mode, 18)),
+			     CALL_INSN_FUNCTION_USAGE (insn));
+
+      return insn;
+    }
+
+  /* We know the function we are going to call.  If we have already
+     built a stub, we don't need to do anything further.  */
+  fnname = targetm.strip_name_encoding (XSTR (fn, 0));
+  for (l = mips16_stubs; l != NULL; l = l->next)
+    if (strcmp (l->name, fnname) == 0)
+      break;
+
+  if (l == NULL)
+    {
+      const char *separator;
+      char *secname, *stubname;
+      tree stubid, stubdecl;
+      unsigned int f;
+
+      /* If the function does not return in FPRs, the special stub
+	 section is named
+	     .mips16.call.FNNAME
+
+	 If the function does return in FPRs, the stub section is named
+	     .mips16.call.fp.FNNAME
+
+	 Build a decl for the stub.  */
+      secname = ACONCAT ((".mips16.call.", fp_ret_p ? "fp." : "",
+			  fnname, NULL));
+      stubname = ACONCAT (("__call_stub_", fp_ret_p ? "fp_" : "",
+			   fnname, NULL));
+      stubid = get_identifier (stubname);
+      stubdecl = build_decl (BUILTINS_LOCATION,
+			     FUNCTION_DECL, stubid,
+			     build_function_type_list (void_type_node,
+						       NULL_TREE));
+      DECL_SECTION_NAME (stubdecl) = build_string (strlen (secname), secname);
+      DECL_RESULT (stubdecl) = build_decl (BUILTINS_LOCATION,
+					   RESULT_DECL, NULL_TREE,
+					   void_type_node);
+
+      /* Output a comment.  */
+      fprintf (asm_out_file, "\t# Stub function to call %s%s (",
+	       (fp_ret_p
+		? (GET_MODE (retval) == SFmode ? "float " : "double ")
+		: ""),
+	       fnname);
+      separator = "";
+      for (f = (unsigned int) fp_code; f != 0; f >>= 2)
+	{
+	  fprintf (asm_out_file, "%s%s", separator,
+		   (f & 3) == 1 ? "float" : "double");
+	  separator = ", ";
+	}
+      fprintf (asm_out_file, ")\n");
+
+      /* Start the function definition.  */
+      assemble_start_function (stubdecl, stubname);
+      mips_start_function_definition (stubname, false);
+
+      if (fp_ret_p)
+	{
+	  fprintf (asm_out_file, "\t.cfi_startproc\n");
+
+	  /* Create a fake CFA 4 bytes below the stack pointer.
+	     This works around unwinders (like libgcc's) that expect
+	     the CFA for non-signal frames to be unique.  */
+	  fprintf (asm_out_file, "\t.cfi_def_cfa 29,-4\n");
+
+	  /* "Save" $sp in itself so we don't use the fake CFA.
+	     This is: DW_CFA_val_expression r29, { DW_OP_reg29 }.  */
+	  fprintf (asm_out_file, "\t.cfi_escape 0x16,29,1,0x6d\n");
+	}
+      else
+	{
+	  /* Load the address of the MIPS16 function into $25.  Do this
+	     first so that targets with coprocessor interlocks can use
+	     an MFC1 to fill the delay slot.  */
+	  if (TARGET_EXPLICIT_RELOCS)
+	    {
+	      output_asm_insn ("lui\t%^,%%hi(%0)", &fn);
+	      output_asm_insn ("addiu\t%^,%^,%%lo(%0)", &fn);
+	    }
+	  else
+	    output_asm_insn ("la\t%^,%0", &fn);
+	}
+
+      /* Move the arguments from general registers to floating-point
+	 registers.  */
+      mips_output_args_xfer (fp_code, 't');
+
+      if (fp_ret_p)
+	{
+	  /* Save the return address in $18 and call the non-MIPS16 function.
+	     The stub's caller knows that $18 might be clobbered, even though
+	     $18 is usually a call-saved register.  */
+	  fprintf (asm_out_file, "\tmove\t%s,%s\n",
+		   reg_names[GP_REG_FIRST + 18], reg_names[RETURN_ADDR_REGNUM]);
+	  output_asm_insn (MIPS_CALL ("jal", &fn, 0, -1), &fn);
+	  fprintf (asm_out_file, "\t.cfi_register 31,18\n");
+
+	  /* Move the result from floating-point registers to
+	     general registers.  */
+	  switch (GET_MODE (retval))
+	    {
+	    case SCmode:
+	      mips_output_32bit_xfer ('f', GP_RETURN + TARGET_BIG_ENDIAN,
+				      TARGET_BIG_ENDIAN
+				      ? FP_REG_FIRST + MAX_FPRS_PER_FMT
+				      : FP_REG_FIRST);
+	      mips_output_32bit_xfer ('f', GP_RETURN + TARGET_LITTLE_ENDIAN,
+				      TARGET_LITTLE_ENDIAN
+				      ? FP_REG_FIRST + MAX_FPRS_PER_FMT
+				      : FP_REG_FIRST);
+	      if (GET_MODE (retval) == SCmode && TARGET_64BIT)
+		{
+		  /* On 64-bit targets, complex floats are returned in
+		     a single GPR, such that "sd" on a suitably-aligned
+		     target would store the value correctly.  */
+		  fprintf (asm_out_file, "\tdsll\t%s,%s,32\n",
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN],
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN]);
+		  fprintf (asm_out_file, "\tdsll\t%s,%s,32\n",
+			   reg_names[GP_RETURN + TARGET_LITTLE_ENDIAN],
+			   reg_names[GP_RETURN + TARGET_LITTLE_ENDIAN]);
+		  fprintf (asm_out_file, "\tdsrl\t%s,%s,32\n",
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN],
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN]);
+		  fprintf (asm_out_file, "\tor\t%s,%s,%s\n",
+			   reg_names[GP_RETURN],
+			   reg_names[GP_RETURN],
+			   reg_names[GP_RETURN + 1]);
+		}
+	      break;
+
+	    case SFmode:
+	      mips_output_32bit_xfer ('f', GP_RETURN, FP_REG_FIRST);
+	      break;
+
+	    case DCmode:
+	      mips_output_64bit_xfer ('f', GP_RETURN + (8 / UNITS_PER_WORD),
+				      FP_REG_FIRST + MAX_FPRS_PER_FMT);
+	      /* Fall though.  */
+ 	    case DFmode:
+	    case V2SFmode:
+	      mips_output_64bit_xfer ('f', GP_RETURN, FP_REG_FIRST);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  fprintf (asm_out_file, "\tjr\t%s\n", reg_names[GP_REG_FIRST + 18]);
+	  fprintf (asm_out_file, "\t.cfi_endproc\n");
+	}
+      else
+	{
+	  /* Jump to the previously-loaded address.  */
+	  output_asm_insn ("jr\t%^", NULL);
+	}
+
+#ifdef ASM_DECLARE_FUNCTION_SIZE
+      ASM_DECLARE_FUNCTION_SIZE (asm_out_file, stubname, stubdecl);
+#endif
+
+      mips_end_function_definition (stubname);
+
+      /* Record this stub.  */
+      l = XNEW (struct mips16_stub);
+      l->name = xstrdup (fnname);
+      l->fp_ret_p = fp_ret_p;
+      l->next = mips16_stubs;
+      mips16_stubs = l;
+    }
+
+  /* If we expect a floating-point return value, but we've built a
+     stub which does not expect one, then we're in trouble.  We can't
+     use the existing stub, because it won't handle the floating-point
+     value.  We can't build a new stub, because the linker won't know
+     which stub to use for the various calls in this object file.
+     Fortunately, this case is illegal, since it means that a function
+     was declared in two different ways in a single compilation.  */
+  if (fp_ret_p && !l->fp_ret_p)
+    error ("cannot handle inconsistent calls to %qs", fnname);
+
+  if (retval == NULL_RTX)
+    insn = gen_call_internal_direct (fn, args_size);
+  else
+    insn = gen_call_value_internal_direct (retval, fn, args_size);
+  insn = mips_emit_call_insn (insn, fn, fn, false);
+
+  /* If we are calling a stub which handles a floating-point return
+     value, we need to arrange to save $18 in the prologue.  We do this
+     by marking the function call as using the register.  The prologue
+     will later see that it is used, and emit code to save it.  */
+  if (fp_ret_p)
+    CALL_INSN_FUNCTION_USAGE (insn) =
+      gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_CLOBBER (VOIDmode,
+					  gen_rtx_REG (word_mode, 18)),
+			 CALL_INSN_FUNCTION_USAGE (insn));
+
+  return insn;
+}
+
+/* Expand a call of type TYPE.  RESULT is where the result will go (null
+   for "call"s and "sibcall"s), ADDR is the address of the function,
+   ARGS_SIZE is the size of the arguments and AUX is the value passed
+   to us by mips_function_arg.  LAZY_P is true if this call already
+   involves a lazily-bound function address (such as when calling
+   functions through a MIPS16 hard-float stub).
+
+   Return the call itself.  */
+
+rtx
+mips_expand_call (enum mips_call_type type, rtx result, rtx addr,
+		  rtx args_size, rtx aux, bool lazy_p)
+{
+  rtx orig_addr, pattern, insn;
+  int fp_code;
+
+  fp_code = aux == 0 ? 0 : (int) GET_MODE (aux);
+  insn = mips16_build_call_stub (result, &addr, args_size, fp_code);
+  if (insn)
+    {
+      gcc_assert (!lazy_p && type == MIPS_CALL_NORMAL);
+      return insn;
+    }
+
+  orig_addr = addr;
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      if (type == MIPS_CALL_EPILOGUE)
+	addr = MIPS_EPILOGUE_TEMP (Pmode);
+      else
+	addr = gen_reg_rtx (Pmode);
+      lazy_p |= mips_load_call_address (type, addr, orig_addr);
+    }
+
+  if (result == 0)
+    {
+      rtx (*fn) (rtx, rtx);
+
+      if (type == MIPS_CALL_SIBCALL)
+	fn = gen_sibcall_internal;
+      else
+	fn = gen_call_internal;
+
+      pattern = fn (addr, args_size);
+    }
+  else if (GET_CODE (result) == PARALLEL && XVECLEN (result, 0) == 2)
+    {
+      /* Handle return values created by mips_return_fpr_pair.  */
+      rtx (*fn) (rtx, rtx, rtx, rtx);
+      rtx reg1, reg2;
+
+      if (type == MIPS_CALL_SIBCALL)
+	fn = gen_sibcall_value_multiple_internal;
+      else
+	fn = gen_call_value_multiple_internal;
+
+      reg1 = XEXP (XVECEXP (result, 0, 0), 0);
+      reg2 = XEXP (XVECEXP (result, 0, 1), 0);
+      pattern = fn (reg1, addr, args_size, reg2);
+    }
+  else
+    {
+      rtx (*fn) (rtx, rtx, rtx);
+
+      if (type == MIPS_CALL_SIBCALL)
+	fn = gen_sibcall_value_internal;
+      else
+	fn = gen_call_value_internal;
+
+      /* Handle return values created by mips_return_fpr_single.  */
+      if (GET_CODE (result) == PARALLEL && XVECLEN (result, 0) == 1)
+	result = XEXP (XVECEXP (result, 0, 0), 0);
+      pattern = fn (result, addr, args_size);
+    }
+
+  return mips_emit_call_insn (pattern, orig_addr, addr, lazy_p);
+}
+
+/* Split call instruction INSN into a $gp-clobbering call and
+   (where necessary) an instruction to restore $gp from its save slot.
+   CALL_PATTERN is the pattern of the new call.  */
+
+void
+mips_split_call (rtx insn, rtx call_pattern)
+{
+  emit_call_insn (call_pattern);
+  if (!find_reg_note (insn, REG_NORETURN, 0))
+    /* Pick a temporary register that is suitable for both MIPS16 and
+       non-MIPS16 code.  $4 and $5 are used for returning complex double
+       values in soft-float code, so $6 is the first suitable candidate.  */
+    mips_restore_gp_from_cprestore_slot (gen_rtx_REG (Pmode, GP_ARG_FIRST + 2));
+}
+
+/* Return true if a call to DECL may need to use JALX.  */
+
+static bool
+mips_call_may_need_jalx_p (tree decl)
+{
+  /* If the current translation unit would use a different mode for DECL,
+     assume that the call needs JALX.  */
+  if (mips_get_compress_mode (decl) != TARGET_COMPRESSION)
+    return true;
+
+  /* mips_get_compress_mode is always accurate for locally-binding
+     functions in the current translation unit.  */
+  if (!DECL_EXTERNAL (decl) && targetm.binds_local_p (decl))
+    return false;
+
+  /* When -minterlink-compressed is in effect, assume that functions
+     could use a different encoding mode unless an attribute explicitly
+     tells us otherwise.  */
+  if (TARGET_INTERLINK_COMPRESSED)
+    {
+      if (!TARGET_COMPRESSION
+	  && mips_get_compress_off_flags (DECL_ATTRIBUTES (decl)) ==0)
+	return true;
+      if (TARGET_COMPRESSION
+	  && mips_get_compress_on_flags (DECL_ATTRIBUTES (decl)) == 0)
+	return true;
+    }
+
+  return false;
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+
+static bool
+mips_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  if (!TARGET_SIBCALLS)
+    return false;
+
+  /* Interrupt handlers need special epilogue code and therefore can't
+     use sibcalls.  */
+  if (mips_interrupt_type_p (TREE_TYPE (current_function_decl)))
+    return false;
+
+  /* Direct Js are only possible to functions that use the same ISA encoding.
+     There is no JX counterpoart of JALX.  */
+  if (decl
+      && const_call_insn_operand (XEXP (DECL_RTL (decl), 0), VOIDmode)
+      && mips_call_may_need_jalx_p (decl))
+    return false;
+
+  /* Sibling calls should not prevent lazy binding.  Lazy-binding stubs
+     require $gp to be valid on entry, so sibcalls can only use stubs
+     if $gp is call-clobbered.  */
+  if (decl
+      && TARGET_CALL_SAVED_GP
+      && !TARGET_ABICALLS_PIC0
+      && !targetm.binds_local_p (decl))
+    return false;
+
+  /* Otherwise OK.  */
+  return true;
+}
+
+/* Emit code to move general operand SRC into condition-code
+   register DEST given that SCRATCH is a scratch TFmode FPR.
+   The sequence is:
+
+	FP1 = SRC
+	FP2 = 0.0f
+	DEST = FP2 < FP1
+
+   where FP1 and FP2 are single-precision FPRs taken from SCRATCH.  */
+
+void
+mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
+{
+  rtx fp1, fp2;
+
+  /* Change the source to SFmode.  */
+  if (MEM_P (src))
+    src = adjust_address (src, SFmode, 0);
+  else if (REG_P (src) || GET_CODE (src) == SUBREG)
+    src = gen_rtx_REG (SFmode, true_regnum (src));
+
+  fp1 = gen_rtx_REG (SFmode, REGNO (scratch));
+  fp2 = gen_rtx_REG (SFmode, REGNO (scratch) + MAX_FPRS_PER_FMT);
+
+  mips_emit_move (copy_rtx (fp1), src);
+  mips_emit_move (copy_rtx (fp2), CONST0_RTX (SFmode));
+  emit_insn (gen_slt_sf (dest, fp2, fp1));
+}
+
+/* Implement MOVE_BY_PIECES_P.  */
+
+bool
+mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
+{
+  if (HAVE_movmemsi)
+    {
+      /* movmemsi is meant to generate code that is at least as good as
+	 move_by_pieces.  However, movmemsi effectively uses a by-pieces
+	 implementation both for moves smaller than a word and for
+	 word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT
+	 bytes.  We should allow the tree-level optimisers to do such
+	 moves by pieces, as it often exposes other optimization
+	 opportunities.  We might as well continue to use movmemsi at
+	 the rtl level though, as it produces better code when
+	 scheduling is disabled (such as at -O).  */
+      if (currently_expanding_to_rtl)
+	return false;
+      if (align < BITS_PER_WORD)
+	return size < UNITS_PER_WORD;
+      return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
+    }
+  /* The default value.  If this becomes a target hook, we should
+     call the default definition instead.  */
+  return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
+	  < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
+}
+
+/* Implement STORE_BY_PIECES_P.  */
+
+bool
+mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
+{
+  /* Storing by pieces involves moving constants into registers
+     of size MIN (ALIGN, BITS_PER_WORD), then storing them.
+     We need to decide whether it is cheaper to load the address of
+     constant data into a register and use a block move instead.  */
+
+  /* If the data is only byte aligned, then:
+
+     (a1) A block move of less than 4 bytes would involve three 3 LBs and
+	  3 SBs.  We might as well use 3 single-instruction LIs and 3 SBs
+	  instead.
+
+     (a2) A block move of 4 bytes from aligned source data can use an
+	  LW/SWL/SWR sequence.  This is often better than the 4 LIs and
+	  4 SBs that we would generate when storing by pieces.  */
+  if (align <= BITS_PER_UNIT)
+    return size < 4;
+
+  /* If the data is 2-byte aligned, then:
+
+     (b1) A block move of less than 4 bytes would use a combination of LBs,
+	  LHs, SBs and SHs.  We get better code by using single-instruction
+	  LIs, SBs and SHs instead.
+
+     (b2) A block move of 4 bytes from aligned source data would again use
+	  an LW/SWL/SWR sequence.  In most cases, loading the address of
+	  the source data would require at least one extra instruction.
+	  It is often more efficient to use 2 single-instruction LIs and
+	  2 SHs instead.
+
+     (b3) A block move of up to 3 additional bytes would be like (b1).
+
+     (b4) A block move of 8 bytes from aligned source data can use two
+	  LW/SWL/SWR sequences or a single LD/SDL/SDR sequence.  Both
+	  sequences are better than the 4 LIs and 4 SHs that we'd generate
+	  when storing by pieces.
+
+     The reasoning for higher alignments is similar:
+
+     (c1) A block move of less than 4 bytes would be the same as (b1).
+
+     (c2) A block move of 4 bytes would use an LW/SW sequence.  Again,
+	  loading the address of the source data would typically require
+	  at least one extra instruction.  It is generally better to use
+	  LUI/ORI/SW instead.
+
+     (c3) A block move of up to 3 additional bytes would be like (b1).
+
+     (c4) A block move of 8 bytes can use two LW/SW sequences or a single
+	  LD/SD sequence, and in these cases we've traditionally preferred
+	  the memory copy over the more bulky constant moves.  */
+  return size < 8;
+}
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  /* Work out how many bits to move at a time.  If both operands have
+     half-word alignment, it is usually better to move in half words.
+     For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr
+     and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr.
+     Otherwise move word-sized chunks.  */
+  if (MEM_ALIGN (src) == BITS_PER_WORD / 2
+      && MEM_ALIGN (dest) == BITS_PER_WORD / 2)
+    bits = BITS_PER_WORD / 2;
+  else
+    bits = BITS_PER_WORD;
+
+  mode = mode_for_size (bits, MODE_INT, 0);
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+     the source has enough alignment, otherwise use left/right pairs.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      if (MEM_ALIGN (src) >= bits)
+	mips_emit_move (regs[i], adjust_address (src, mode, offset));
+      else
+	{
+	  rtx part = adjust_address (src, BLKmode, offset);
+	  set_mem_size (part, delta);
+	  if (!mips_expand_ext_as_unaligned_load (regs[i], part, bits, 0, 0))
+	    gcc_unreachable ();
+	}
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    if (MEM_ALIGN (dest) >= bits)
+      mips_emit_move (adjust_address (dest, mode, offset), regs[i]);
+    else
+      {
+	rtx part = adjust_address (dest, BLKmode, offset);
+	set_mem_size (part, delta);
+	if (!mips_expand_ins_as_unaligned_store (part, regs[i], bits, 0))
+	  gcc_unreachable ();
+      }
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+    }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+		       rtx *loop_reg, rtx *loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
+
+static void
+mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+		      HOST_WIDE_INT bytes_per_iter)
+{
+  rtx label, src_reg, dest_reg, final_src, test;
+  HOST_WIDE_INT leftover;
+
+  leftover = length % bytes_per_iter;
+  length -= leftover;
+
+  /* Create registers and memory references for use within the loop.  */
+  mips_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  mips_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+
+  /* Calculate the value that SRC_REG should have after the last iteration
+     of the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Emit the loop body.  */
+  mips_block_move_straight (dest, src, bytes_per_iter);
+
+  /* Move on to the next block.  */
+  mips_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
+  mips_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
+
+  /* Emit the loop condition.  */
+  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+  if (Pmode == DImode)
+    emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
+  else
+    emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    mips_block_move_straight (dest, src, leftover);
+}
+
+/* Expand a movmemsi instruction, which copies LENGTH bytes from
+   memory reference SRC to memory reference DEST.  */
+
+bool
+mips_expand_block_move (rtx dest, rtx src, rtx length)
+{
+  if (CONST_INT_P (length))
+    {
+      if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)
+	{
+	  mips_block_move_straight (dest, src, INTVAL (length));
+	  return true;
+	}
+      else if (optimize)
+	{
+	  mips_block_move_loop (dest, src, INTVAL (length),
+				MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Expand a loop of synci insns for the address range [BEGIN, END).  */
+
+void
+mips_expand_synci_loop (rtx begin, rtx end)
+{
+  rtx inc, label, end_label, cmp_result, mask, length;
+
+  /* Create end_label.  */
+  end_label = gen_label_rtx ();
+
+  /* Check if begin equals end.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, begin, end);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
+  /* Load INC with the cache line size (rdhwr INC,$1).  */
+  inc = gen_reg_rtx (Pmode);
+  emit_insn (PMODE_INSN (gen_rdhwr_synci_step, (inc)));
+
+  /* Check if inc is 0.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, inc, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
+  /* Calculate mask.  */
+  mask = mips_force_unary (Pmode, NEG, inc);
+
+  /* Mask out begin by mask.  */
+  begin = mips_force_binary (Pmode, AND, begin, mask);
+
+  /* Calculate length.  */
+  length = mips_force_binary (Pmode, MINUS, end, begin);
+
+  /* Loop back to here.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  emit_insn (gen_synci (begin));
+
+  /* Update length.  */
+  mips_emit_binary (MINUS, length, length, inc);
+
+  /* Update begin.  */
+  mips_emit_binary (PLUS, begin, begin, inc);
+
+  /* Check if length is greater than 0.  */
+  cmp_result = gen_rtx_GT (VOIDmode, length, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, label));
+
+  emit_label (end_label);
+}
+
+/* Expand a QI or HI mode atomic memory operation.
+
+   GENERATOR contains a pointer to the gen_* function that generates
+   the SI mode underlying atomic operation using masks that we
+   calculate.
+
+   RESULT is the return register for the operation.  Its value is NULL
+   if unused.
+
+   MEM is the location of the atomic access.
+
+   OLDVAL is the first operand for the operation.
+
+   NEWVAL is the optional second operand for the operation.  Its value
+   is NULL if unused.  */
+
+void
+mips_expand_atomic_qihi (union mips_gen_fn_ptrs generator,
+                         rtx result, rtx mem, rtx oldval, rtx newval)
+{
+  rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
+  rtx unshifted_mask_reg, mask, inverted_mask, si_op;
+  rtx res = NULL;
+  enum machine_mode mode;
+
+  mode = GET_MODE (mem);
+
+  /* Compute the address of the containing SImode value.  */
+  orig_addr = force_reg (Pmode, XEXP (mem, 0));
+  memsi_addr = mips_force_binary (Pmode, AND, orig_addr,
+				  force_reg (Pmode, GEN_INT (-4)));
+
+  /* Create a memory reference for it.  */
+  memsi = gen_rtx_MEM (SImode, memsi_addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  /* Work out the byte offset of the QImode or HImode value,
+     counting from the least significant byte.  */
+  shift = mips_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
+  if (TARGET_BIG_ENDIAN)
+    mips_emit_binary (XOR, shift, shift, GEN_INT (mode == QImode ? 3 : 2));
+
+  /* Multiply by eight to convert the shift value from bytes to bits.  */
+  mips_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
+
+  /* Make the final shift an SImode value, so that it can be used in
+     SImode operations.  */
+  shiftsi = force_reg (SImode, gen_lowpart (SImode, shift));
+
+  /* Set MASK to an inclusive mask of the QImode or HImode value.  */
+  unshifted_mask = GEN_INT (GET_MODE_MASK (mode));
+  unshifted_mask_reg = force_reg (SImode, unshifted_mask);
+  mask = mips_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi);
+
+  /* Compute the equivalent exclusive mask.  */
+  inverted_mask = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (VOIDmode, inverted_mask,
+			  gen_rtx_NOT (SImode, mask)));
+
+  /* Shift the old value into place.  */
+  if (oldval != const0_rtx)
+    {
+      oldval = convert_modes (SImode, mode, oldval, true);
+      oldval = force_reg (SImode, oldval);
+      oldval = mips_force_binary (SImode, ASHIFT, oldval, shiftsi);
+    }
+
+  /* Do the same for the new value.  */
+  if (newval && newval != const0_rtx)
+    {
+      newval = convert_modes (SImode, mode, newval, true);
+      newval = force_reg (SImode, newval);
+      newval = mips_force_binary (SImode, ASHIFT, newval, shiftsi);
+    }
+
+  /* Do the SImode atomic access.  */
+  if (result)
+    res = gen_reg_rtx (SImode);
+  if (newval)
+    si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, newval);
+  else if (result)
+    si_op = generator.fn_5 (res, memsi, mask, inverted_mask, oldval);
+  else
+    si_op = generator.fn_4 (memsi, mask, inverted_mask, oldval);
+
+  emit_insn (si_op);
+
+  if (result)
+    {
+      /* Shift and convert the result.  */
+      mips_emit_binary (AND, res, res, mask);
+      mips_emit_binary (LSHIFTRT, res, res, shiftsi);
+      mips_emit_move (result, gen_lowpart (GET_MODE (result), res));
+    }
+}
+
+/* Return true if it is possible to use left/right accesses for a
+   bitfield of WIDTH bits starting BITPOS bits into BLKmode memory OP.
+   When returning true, update *LEFT and *RIGHT as follows:
+
+   *LEFT is a QImode reference to the first byte if big endian or
+   the last byte if little endian.  This address can be used in the
+   left-side instructions (LWL, SWL, LDL, SDL).
+
+   *RIGHT is a QImode reference to the opposite end of the field and
+   can be used in the patterning right-side instruction.  */
+
+static bool
+mips_get_unaligned_mem (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos,
+			rtx *left, rtx *right)
+{
+  rtx first, last;
+
+  /* Check that the size is valid.  */
+  if (width != 32 && (!TARGET_64BIT || width != 64))
+    return false;
+
+  /* We can only access byte-aligned values.  Since we are always passed
+     a reference to the first byte of the field, it is not necessary to
+     do anything with BITPOS after this check.  */
+  if (bitpos % BITS_PER_UNIT != 0)
+    return false;
+
+  /* Reject aligned bitfields: we want to use a normal load or store
+     instead of a left/right pair.  */
+  if (MEM_ALIGN (op) >= width)
+    return false;
+
+  /* Get references to both ends of the field.  */
+  first = adjust_address (op, QImode, 0);
+  last = adjust_address (op, QImode, width / BITS_PER_UNIT - 1);
+
+  /* Allocate to LEFT and RIGHT according to endianness.  LEFT should
+     correspond to the MSB and RIGHT to the LSB.  */
+  if (TARGET_BIG_ENDIAN)
+    *left = first, *right = last;
+  else
+    *left = last, *right = first;
+
+  return true;
+}
+
+/* Try to use left/right loads to expand an "extv" or "extzv" pattern.
+   DEST, SRC, WIDTH and BITPOS are the operands passed to the expander;
+   the operation is the equivalent of:
+
+      (set DEST (*_extract SRC WIDTH BITPOS))
+
+   Return true on success.  */
+
+bool
+mips_expand_ext_as_unaligned_load (rtx dest, rtx src, HOST_WIDE_INT width,
+				   HOST_WIDE_INT bitpos, bool unsigned_p)
+{
+  rtx left, right, temp;
+  rtx dest1 = NULL_RTX;
+
+  /* If TARGET_64BIT, the destination of a 32-bit "extz" or "extzv" will
+     be a DImode, create a new temp and emit a zero extend at the end.  */
+  if (GET_MODE (dest) == DImode
+      && REG_P (dest)
+      && GET_MODE_BITSIZE (SImode) == width)
+    {
+      dest1 = dest;
+      dest = gen_reg_rtx (SImode);
+    }
+
+  if (!mips_get_unaligned_mem (src, width, bitpos, &left, &right))
+    return false;
+
+  temp = gen_reg_rtx (GET_MODE (dest));
+  if (GET_MODE (dest) == DImode)
+    {
+      emit_insn (gen_mov_ldl (temp, src, left));
+      emit_insn (gen_mov_ldr (dest, copy_rtx (src), right, temp));
+    }
+  else
+    {
+      emit_insn (gen_mov_lwl (temp, src, left));
+      emit_insn (gen_mov_lwr (dest, copy_rtx (src), right, temp));
+    }
+
+  /* If we were loading 32bits and the original register was DI then
+     sign/zero extend into the orignal dest.  */
+  if (dest1)
+    {
+      if (unsigned_p)
+        emit_insn (gen_zero_extendsidi2 (dest1, dest));
+      else
+        emit_insn (gen_extendsidi2 (dest1, dest));
+    }
+  return true;
+}
+
+/* Try to use left/right stores to expand an "ins" pattern.  DEST, WIDTH,
+   BITPOS and SRC are the operands passed to the expander; the operation
+   is the equivalent of:
+
+       (set (zero_extract DEST WIDTH BITPOS) SRC)
+
+   Return true on success.  */
+
+bool
+mips_expand_ins_as_unaligned_store (rtx dest, rtx src, HOST_WIDE_INT width,
+				    HOST_WIDE_INT bitpos)
+{
+  rtx left, right;
+  enum machine_mode mode;
+
+  if (!mips_get_unaligned_mem (dest, width, bitpos, &left, &right))
+    return false;
+
+  mode = mode_for_size (width, MODE_INT, 0);
+  src = gen_lowpart (mode, src);
+  if (mode == DImode)
+    {
+      emit_insn (gen_mov_sdl (dest, src, left));
+      emit_insn (gen_mov_sdr (copy_rtx (dest), copy_rtx (src), right));
+    }
+  else
+    {
+      emit_insn (gen_mov_swl (dest, src, left));
+      emit_insn (gen_mov_swr (copy_rtx (dest), copy_rtx (src), right));
+    }
+  return true;
+}
+
+/* Return true if X is a MEM with the same size as MODE.  */
+
+bool
+mips_mem_fits_mode_p (enum machine_mode mode, rtx x)
+{
+  return (MEM_P (x)
+	  && MEM_SIZE_KNOWN_P (x)
+	  && MEM_SIZE (x) == GET_MODE_SIZE (mode));
+}
+
+/* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
+   source of an "ext" instruction or the destination of an "ins"
+   instruction.  OP must be a register operand and the following
+   conditions must hold:
+
+     0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
+     0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+     0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+
+   Also reject lengths equal to a word as they are better handled
+   by the move patterns.  */
+
+bool
+mips_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
+{
+  if (!ISA_HAS_EXT_INS
+      || !register_operand (op, VOIDmode)
+      || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD)
+    return false;
+
+  if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1))
+    return false;
+
+  if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op)))
+    return false;
+
+  return true;
+}
+
+/* Check if MASK and SHIFT are valid in mask-low-and-shift-left
+   operation if MAXLEN is the maxium length of consecutive bits that
+   can make up MASK.  MODE is the mode of the operation.  See
+   mask_low_and_shift_len for the actual definition.  */
+
+bool
+mask_low_and_shift_p (enum machine_mode mode, rtx mask, rtx shift, int maxlen)
+{
+  return IN_RANGE (mask_low_and_shift_len (mode, mask, shift), 1, maxlen);
+}
+
+/* Return true iff OP1 and OP2 are valid operands together for the
+   *and<MODE>3 and *and<MODE>3_mips16 patterns.  For the cases to consider,
+   see the table in the comment before the pattern.  */
+
+bool
+and_operands_ok (enum machine_mode mode, rtx op1, rtx op2)
+{
+  return (memory_operand (op1, mode)
+	  ? and_load_operand (op2, mode)
+	  : and_reg_operand (op2, mode));
+}
+
+/* The canonical form of a mask-low-and-shift-left operation is
+   (and (ashift X SHIFT) MASK) where MASK has the lower SHIFT number of bits
+   cleared.  Thus we need to shift MASK to the right before checking if it
+   is a valid mask value.  MODE is the mode of the operation.  If true
+   return the length of the mask, otherwise return -1.  */
+
+int
+mask_low_and_shift_len (enum machine_mode mode, rtx mask, rtx shift)
+{
+  HOST_WIDE_INT shval;
+
+  shval = INTVAL (shift) & (GET_MODE_BITSIZE (mode) - 1);
+  return exact_log2 ((UINTVAL (mask) >> shval) + 1);
+}
+
+/* Return true if -msplit-addresses is selected and should be honored.
+
+   -msplit-addresses is a half-way house between explicit relocations
+   and the traditional assembler macros.  It can split absolute 32-bit
+   symbolic constants into a high/lo_sum pair but uses macros for other
+   sorts of access.
+
+   Like explicit relocation support for REL targets, it relies
+   on GNU extensions in the assembler and the linker.
+
+   Although this code should work for -O0, it has traditionally
+   been treated as an optimization.  */
+
+static bool
+mips_split_addresses_p (void)
+{
+  return (TARGET_SPLIT_ADDRESSES
+	  && optimize
+	  && !TARGET_MIPS16
+	  && !flag_pic
+	  && !ABI_HAS_64BIT_SYMBOLS);
+}
+
+/* (Re-)Initialize mips_split_p, mips_lo_relocs and mips_hi_relocs.  */
+
+static void
+mips_init_relocs (void)
+{
+  memset (mips_split_p, '\0', sizeof (mips_split_p));
+  memset (mips_split_hi_p, '\0', sizeof (mips_split_hi_p));
+  memset (mips_use_pcrel_pool_p, '\0', sizeof (mips_use_pcrel_pool_p));
+  memset (mips_hi_relocs, '\0', sizeof (mips_hi_relocs));
+  memset (mips_lo_relocs, '\0', sizeof (mips_lo_relocs));
+
+  if (TARGET_MIPS16_PCREL_LOADS)
+    mips_use_pcrel_pool_p[SYMBOL_ABSOLUTE] = true;
+  else
+    {
+      if (ABI_HAS_64BIT_SYMBOLS)
+	{
+	  if (TARGET_EXPLICIT_RELOCS)
+	    {
+	      mips_split_p[SYMBOL_64_HIGH] = true;
+	      mips_hi_relocs[SYMBOL_64_HIGH] = "%highest(";
+	      mips_lo_relocs[SYMBOL_64_HIGH] = "%higher(";
+
+	      mips_split_p[SYMBOL_64_MID] = true;
+	      mips_hi_relocs[SYMBOL_64_MID] = "%higher(";
+	      mips_lo_relocs[SYMBOL_64_MID] = "%hi(";
+
+	      mips_split_p[SYMBOL_64_LOW] = true;
+	      mips_hi_relocs[SYMBOL_64_LOW] = "%hi(";
+	      mips_lo_relocs[SYMBOL_64_LOW] = "%lo(";
+
+	      mips_split_p[SYMBOL_ABSOLUTE] = true;
+	      mips_lo_relocs[SYMBOL_ABSOLUTE] = "%lo(";
+	    }
+	}
+      else
+	{
+	  if (TARGET_EXPLICIT_RELOCS
+	      || mips_split_addresses_p ()
+	      || TARGET_MIPS16)
+	    {
+	      mips_split_p[SYMBOL_ABSOLUTE] = true;
+	      mips_hi_relocs[SYMBOL_ABSOLUTE] = "%hi(";
+	      mips_lo_relocs[SYMBOL_ABSOLUTE] = "%lo(";
+	    }
+	}
+    }
+
+  if (TARGET_MIPS16)
+    {
+      /* The high part is provided by a pseudo copy of $gp.  */
+      mips_split_p[SYMBOL_GP_RELATIVE] = true;
+      mips_lo_relocs[SYMBOL_GP_RELATIVE] = "%gprel(";
+    }
+  else if (TARGET_EXPLICIT_RELOCS)
+    /* Small data constants are kept whole until after reload,
+       then lowered by mips_rewrite_small_data.  */
+    mips_lo_relocs[SYMBOL_GP_RELATIVE] = "%gp_rel(";
+
+  if (TARGET_EXPLICIT_RELOCS)
+    {
+      mips_split_p[SYMBOL_GOT_PAGE_OFST] = true;
+      if (TARGET_NEWABI)
+	{
+	  mips_lo_relocs[SYMBOL_GOTOFF_PAGE] = "%got_page(";
+	  mips_lo_relocs[SYMBOL_GOT_PAGE_OFST] = "%got_ofst(";
+	}
+      else
+	{
+	  mips_lo_relocs[SYMBOL_GOTOFF_PAGE] = "%got(";
+	  mips_lo_relocs[SYMBOL_GOT_PAGE_OFST] = "%lo(";
+	}
+      if (TARGET_MIPS16)
+	/* Expose the use of $28 as soon as possible.  */
+	mips_split_hi_p[SYMBOL_GOT_PAGE_OFST] = true;
+
+      if (TARGET_XGOT)
+	{
+	  /* The HIGH and LO_SUM are matched by special .md patterns.  */
+	  mips_split_p[SYMBOL_GOT_DISP] = true;
+
+	  mips_split_p[SYMBOL_GOTOFF_DISP] = true;
+	  mips_hi_relocs[SYMBOL_GOTOFF_DISP] = "%got_hi(";
+	  mips_lo_relocs[SYMBOL_GOTOFF_DISP] = "%got_lo(";
+
+	  mips_split_p[SYMBOL_GOTOFF_CALL] = true;
+	  mips_hi_relocs[SYMBOL_GOTOFF_CALL] = "%call_hi(";
+	  mips_lo_relocs[SYMBOL_GOTOFF_CALL] = "%call_lo(";
+	}
+      else
+	{
+	  if (TARGET_NEWABI)
+	    mips_lo_relocs[SYMBOL_GOTOFF_DISP] = "%got_disp(";
+	  else
+	    mips_lo_relocs[SYMBOL_GOTOFF_DISP] = "%got(";
+	  mips_lo_relocs[SYMBOL_GOTOFF_CALL] = "%call16(";
+	  if (TARGET_MIPS16)
+	    /* Expose the use of $28 as soon as possible.  */
+	    mips_split_p[SYMBOL_GOT_DISP] = true;
+	}
+    }
+
+  if (TARGET_NEWABI)
+    {
+      mips_split_p[SYMBOL_GOTOFF_LOADGP] = true;
+      mips_hi_relocs[SYMBOL_GOTOFF_LOADGP] = "%hi(%neg(%gp_rel(";
+      mips_lo_relocs[SYMBOL_GOTOFF_LOADGP] = "%lo(%neg(%gp_rel(";
+    }
+
+  mips_lo_relocs[SYMBOL_TLSGD] = "%tlsgd(";
+  mips_lo_relocs[SYMBOL_TLSLDM] = "%tlsldm(";
+
+  if (TARGET_MIPS16_PCREL_LOADS)
+    {
+      mips_use_pcrel_pool_p[SYMBOL_DTPREL] = true;
+      mips_use_pcrel_pool_p[SYMBOL_TPREL] = true;
+    }
+  else
+    {
+      mips_split_p[SYMBOL_DTPREL] = true;
+      mips_hi_relocs[SYMBOL_DTPREL] = "%dtprel_hi(";
+      mips_lo_relocs[SYMBOL_DTPREL] = "%dtprel_lo(";
+
+      mips_split_p[SYMBOL_TPREL] = true;
+      mips_hi_relocs[SYMBOL_TPREL] = "%tprel_hi(";
+      mips_lo_relocs[SYMBOL_TPREL] = "%tprel_lo(";
+    }
+
+  mips_lo_relocs[SYMBOL_GOTTPREL] = "%gottprel(";
+  mips_lo_relocs[SYMBOL_HALF] = "%half(";
+}
+
+/* Print symbolic operand OP, which is part of a HIGH or LO_SUM
+   in context CONTEXT.  RELOCS is the array of relocations to use.  */
+
+static void
+mips_print_operand_reloc (FILE *file, rtx op, enum mips_symbol_context context,
+			  const char **relocs)
+{
+  enum mips_symbol_type symbol_type;
+  const char *p;
+
+  symbol_type = mips_classify_symbolic_expression (op, context);
+  gcc_assert (relocs[symbol_type]);
+
+  fputs (relocs[symbol_type], file);
+  output_addr_const (file, mips_strip_unspec_address (op));
+  for (p = relocs[symbol_type]; *p != 0; p++)
+    if (*p == '(')
+      fputc (')', file);
+}
+
+/* Start a new block with the given asm switch enabled.  If we need
+   to print a directive, emit PREFIX before it and SUFFIX after it.  */
+
+static void
+mips_push_asm_switch_1 (struct mips_asm_switch *asm_switch,
+			const char *prefix, const char *suffix)
+{
+  if (asm_switch->nesting_level == 0)
+    fprintf (asm_out_file, "%s.set\tno%s%s", prefix, asm_switch->name, suffix);
+  asm_switch->nesting_level++;
+}
+
+/* Likewise, but end a block.  */
+
+static void
+mips_pop_asm_switch_1 (struct mips_asm_switch *asm_switch,
+		       const char *prefix, const char *suffix)
+{
+  gcc_assert (asm_switch->nesting_level);
+  asm_switch->nesting_level--;
+  if (asm_switch->nesting_level == 0)
+    fprintf (asm_out_file, "%s.set\t%s%s", prefix, asm_switch->name, suffix);
+}
+
+/* Wrappers around mips_push_asm_switch_1 and mips_pop_asm_switch_1
+   that either print a complete line or print nothing.  */
+
+void
+mips_push_asm_switch (struct mips_asm_switch *asm_switch)
+{
+  mips_push_asm_switch_1 (asm_switch, "\t", "\n");
+}
+
+void
+mips_pop_asm_switch (struct mips_asm_switch *asm_switch)
+{
+  mips_pop_asm_switch_1 (asm_switch, "\t", "\n");
+}
+
+/* Print the text for PRINT_OPERAND punctation character CH to FILE.
+   The punctuation characters are:
+
+   '('	Start a nested ".set noreorder" block.
+   ')'	End a nested ".set noreorder" block.
+   '['	Start a nested ".set noat" block.
+   ']'	End a nested ".set noat" block.
+   '<'	Start a nested ".set nomacro" block.
+   '>'	End a nested ".set nomacro" block.
+   '*'	Behave like %(%< if generating a delayed-branch sequence.
+   '#'	Print a nop if in a ".set noreorder" block.
+   '/'	Like '#', but do nothing within a delayed-branch sequence.
+   '?'	Print "l" if mips_branch_likely is true
+   '~'	Print a nop if mips_branch_likely is true
+   '.'	Print the name of the register with a hard-wired zero (zero or $0).
+   '@'	Print the name of the assembler temporary register (at or $1).
+   '^'	Print the name of the pic call-through register (t9 or $25).
+   '+'	Print the name of the gp register (usually gp or $28).
+   '$'	Print the name of the stack pointer register (sp or $29).
+   ':'  Print "c" to use the compact version if the delay slot is a nop.
+   '!'  Print "s" to use the short version if the delay slot contains a
+	16-bit instruction.
+
+   See also mips_init_print_operand_pucnt.  */
+
+static void
+mips_print_operand_punctuation (FILE *file, int ch)
+{
+  switch (ch)
+    {
+    case '(':
+      mips_push_asm_switch_1 (&mips_noreorder, "", "\n\t");
+      break;
+
+    case ')':
+      mips_pop_asm_switch_1 (&mips_noreorder, "\n\t", "");
+      break;
+
+    case '[':
+      mips_push_asm_switch_1 (&mips_noat, "", "\n\t");
+      break;
+
+    case ']':
+      mips_pop_asm_switch_1 (&mips_noat, "\n\t", "");
+      break;
+
+    case '<':
+      mips_push_asm_switch_1 (&mips_nomacro, "", "\n\t");
+      break;
+
+    case '>':
+      mips_pop_asm_switch_1 (&mips_nomacro, "\n\t", "");
+      break;
+
+    case '*':
+      if (final_sequence != 0)
+	{
+	  mips_print_operand_punctuation (file, '(');
+	  mips_print_operand_punctuation (file, '<');
+	}
+      break;
+
+    case '#':
+      if (mips_noreorder.nesting_level > 0)
+	fputs ("\n\tnop", file);
+      break;
+
+    case '/':
+      /* Print an extra newline so that the delayed insn is separated
+	 from the following ones.  This looks neater and is consistent
+	 with non-nop delayed sequences.  */
+      if (mips_noreorder.nesting_level > 0 && final_sequence == 0)
+	fputs ("\n\tnop\n", file);
+      break;
+
+    case '?':
+      if (mips_branch_likely)
+	putc ('l', file);
+      break;
+
+    case '~':
+      if (mips_branch_likely)
+	fputs ("\n\tnop", file);
+      break;
+
+    case '.':
+      fputs (reg_names[GP_REG_FIRST + 0], file);
+      break;
+
+    case '@':
+      fputs (reg_names[AT_REGNUM], file);
+      break;
+
+    case '^':
+      fputs (reg_names[PIC_FUNCTION_ADDR_REGNUM], file);
+      break;
+
+    case '+':
+      fputs (reg_names[PIC_OFFSET_TABLE_REGNUM], file);
+      break;
+
+    case '$':
+      fputs (reg_names[STACK_POINTER_REGNUM], file);
+      break;
+
+    case ':':
+      /* When final_sequence is 0, the delay slot will be a nop.  We can
+	 use the compact version for microMIPS.  */
+      if (final_sequence == 0)
+	putc ('c', file);
+      break;
+
+    case '!':
+      /* If the delay slot instruction is short, then use the
+	 compact version.  */
+      if (final_sequence == 0
+	  || get_attr_length (XVECEXP (final_sequence, 0, 1)) == 2)
+	putc ('s', file);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Initialize mips_print_operand_punct.  */
+
+static void
+mips_init_print_operand_punct (void)
+{
+  const char *p;
+
+  for (p = "()[]<>*#/?~.@^+$:!"; *p; p++)
+    mips_print_operand_punct[(unsigned char) *p] = true;
+}
+
+/* PRINT_OPERAND prefix LETTER refers to the integer branch instruction
+   associated with condition CODE.  Print the condition part of the
+   opcode to FILE.  */
+
+static void
+mips_print_int_branch_condition (FILE *file, enum rtx_code code, int letter)
+{
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case GT:
+    case GE:
+    case LT:
+    case LE:
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      /* Conveniently, the MIPS names for these conditions are the same
+	 as their RTL equivalents.  */
+      fputs (GET_RTX_NAME (code), file);
+      break;
+
+    default:
+      output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
+      break;
+    }
+}
+
+/* Likewise floating-point branches.  */
+
+static void
+mips_print_float_branch_condition (FILE *file, enum rtx_code code, int letter)
+{
+  switch (code)
+    {
+    case EQ:
+      fputs ("c1f", file);
+      break;
+
+    case NE:
+      fputs ("c1t", file);
+      break;
+
+    default:
+      output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
+      break;
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+mips_print_operand_punct_valid_p (unsigned char code)
+{
+  return mips_print_operand_punct[code];
+}
+
+/* Implement TARGET_PRINT_OPERAND.  The MIPS-specific operand codes are:
+
+   'X'	Print CONST_INT OP in hexadecimal format.
+   'x'	Print the low 16 bits of CONST_INT OP in hexadecimal format.
+   'd'	Print CONST_INT OP in decimal.
+   'm'	Print one less than CONST_INT OP in decimal.
+   'h'	Print the high-part relocation associated with OP, after stripping
+	  any outermost HIGH.
+   'R'	Print the low-part relocation associated with OP.
+   'C'	Print the integer branch condition for comparison OP.
+   'N'	Print the inverse of the integer branch condition for comparison OP.
+   'F'	Print the FPU branch condition for comparison OP.
+   'W'	Print the inverse of the FPU branch condition for comparison OP.
+   'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+   't'	Like 'T', but with the EQ/NE cases reversed
+   'Y'	Print mips_fp_conditions[INTVAL (OP)]
+   'Z'	Print OP and a comma for ISA_HAS_8CC, otherwise print nothing.
+   'q'	Print a DSP accumulator register.
+   'D'	Print the second part of a double-word register or memory operand.
+   'L'	Print the low-order register in a double-word register operand.
+   'M'	Print high-order register in a double-word register operand.
+   'z'	Print $0 if OP is zero, otherwise print OP normally.
+   'b'	Print the address of a memory operand, without offset.  */
+
+static void
+mips_print_operand (FILE *file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  if (mips_print_operand_punct_valid_p (letter))
+    {
+      mips_print_operand_punctuation (file, letter);
+      return;
+    }
+
+  gcc_assert (op);
+  code = GET_CODE (op);
+
+  switch (letter)
+    {
+    case 'X':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'x':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'd':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'm':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'h':
+      if (code == HIGH)
+	op = XEXP (op, 0);
+      mips_print_operand_reloc (file, op, SYMBOL_CONTEXT_LEA, mips_hi_relocs);
+      break;
+
+    case 'R':
+      mips_print_operand_reloc (file, op, SYMBOL_CONTEXT_LEA, mips_lo_relocs);
+      break;
+
+    case 'C':
+      mips_print_int_branch_condition (file, code, letter);
+      break;
+
+    case 'N':
+      mips_print_int_branch_condition (file, reverse_condition (code), letter);
+      break;
+
+    case 'F':
+      mips_print_float_branch_condition (file, code, letter);
+      break;
+
+    case 'W':
+      mips_print_float_branch_condition (file, reverse_condition (code),
+					 letter);
+      break;
+
+    case 'T':
+    case 't':
+      {
+	int truth = (code == NE) == (letter == 'T');
+	fputc ("zfnt"[truth * 2 + ST_REG_P (REGNO (XEXP (op, 0)))], file);
+      }
+      break;
+
+    case 'Y':
+      if (code == CONST_INT && UINTVAL (op) < ARRAY_SIZE (mips_fp_conditions))
+	fputs (mips_fp_conditions[UINTVAL (op)], file);
+      else
+	output_operand_lossage ("'%%%c' is not a valid operand prefix",
+				letter);
+      break;
+
+    case 'Z':
+      if (ISA_HAS_8CC)
+	{
+	  mips_print_operand (file, op, 0);
+	  fputc (',', file);
+	}
+      break;
+
+    case 'q':
+      if (code == REG && MD_REG_P (REGNO (op)))
+	fprintf (file, "$ac0");
+      else if (code == REG && DSP_ACC_REG_P (REGNO (op)))
+	fprintf (file, "$ac%c", reg_names[REGNO (op)][3]);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    default:
+      switch (code)
+	{
+	case REG:
+	  {
+	    unsigned int regno = REGNO (op);
+	    if ((letter == 'M' && TARGET_LITTLE_ENDIAN)
+		|| (letter == 'L' && TARGET_BIG_ENDIAN)
+		|| letter == 'D')
+	      regno++;
+	    else if (letter && letter != 'z' && letter != 'M' && letter != 'L')
+	      output_operand_lossage ("invalid use of '%%%c'", letter);
+	    /* We need to print $0 .. $31 for COP0 registers.  */
+	    if (COP0_REG_P (regno))
+	      fprintf (file, "$%s", &reg_names[regno][4]);
+	    else
+	      fprintf (file, "%s", reg_names[regno]);
+	  }
+	  break;
+
+	case MEM:
+	  if (letter == 'D')
+	    output_address (plus_constant (Pmode, XEXP (op, 0), 4));
+	  else if (letter == 'b')
+	    {
+	      gcc_assert (REG_P (XEXP (op, 0)));
+	      mips_print_operand (file, XEXP (op, 0), 0);
+	    }
+	  else if (letter && letter != 'z')
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	  else
+	    output_address (XEXP (op, 0));
+	  break;
+
+	default:
+	  if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
+	    fputs (reg_names[GP_REG_FIRST], file);
+	  else if (letter && letter != 'z')
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	  else if (CONST_GP_P (op))
+	    fputs (reg_names[GLOBAL_POINTER_REGNUM], file);
+	  else
+	    output_addr_const (file, mips_strip_unspec_address (op));
+	  break;
+	}
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+mips_print_operand_address (FILE *file, rtx x)
+{
+  struct mips_address_info addr;
+
+  if (mips_classify_address (&addr, x, word_mode, true))
+    switch (addr.type)
+      {
+      case ADDRESS_REG:
+	mips_print_operand (file, addr.offset, 0);
+	fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
+	return;
+
+      case ADDRESS_LO_SUM:
+	mips_print_operand_reloc (file, addr.offset, SYMBOL_CONTEXT_MEM,
+				  mips_lo_relocs);
+	fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
+	return;
+
+      case ADDRESS_CONST_INT:
+	output_addr_const (file, x);
+	fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
+	return;
+
+      case ADDRESS_SYMBOLIC:
+	output_addr_const (file, mips_strip_unspec_address (x));
+	return;
+      }
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_ENCODE_SECTION_INFO.  */
+
+static void
+mips_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      rtx symbol = XEXP (rtl, 0);
+      tree type = TREE_TYPE (decl);
+
+      /* Encode whether the symbol is short or long.  */
+      if ((TARGET_LONG_CALLS && !mips_near_type_p (type))
+	  || mips_far_type_p (type))
+	SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_LONG_CALL;
+    }
+}
+
+/* Implement TARGET_SELECT_RTX_SECTION.  */
+
+static section *
+mips_select_rtx_section (enum machine_mode mode, rtx x,
+			 unsigned HOST_WIDE_INT align)
+{
+  /* ??? Consider using mergeable small data sections.  */
+  if (mips_rtx_constant_in_small_data_p (mode))
+    return get_named_section (NULL, ".sdata", 0);
+
+  return default_elf_select_rtx_section (mode, x, align);
+}
+
+/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
+
+   The complication here is that, with the combination TARGET_ABICALLS
+   && !TARGET_ABSOLUTE_ABICALLS && !TARGET_GPWORD, jump tables will use
+   absolute addresses, and should therefore not be included in the
+   read-only part of a DSO.  Handle such cases by selecting a normal
+   data section instead of a read-only one.  The logic apes that in
+   default_function_rodata_section.  */
+
+static section *
+mips_function_rodata_section (tree decl)
+{
+  if (!TARGET_ABICALLS || TARGET_ABSOLUTE_ABICALLS || TARGET_GPWORD)
+    return default_function_rodata_section (decl);
+
+  if (decl && DECL_SECTION_NAME (decl))
+    {
+      const char *name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (DECL_ONE_ONLY (decl) && strncmp (name, ".gnu.linkonce.t.", 16) == 0)
+	{
+	  char *rname = ASTRDUP (name);
+	  rname[14] = 'd';
+	  return get_section (rname, SECTION_LINKONCE | SECTION_WRITE, decl);
+	}
+      else if (flag_function_sections
+	       && flag_data_sections
+	       && strncmp (name, ".text.", 6) == 0)
+	{
+	  char *rname = ASTRDUP (name);
+	  memcpy (rname + 1, "data", 4);
+	  return get_section (rname, SECTION_WRITE, decl);
+	}
+    }
+  return data_section;
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  */
+
+static bool
+mips_in_small_data_p (const_tree decl)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  /* We don't yet generate small-data references for -mabicalls
+     or VxWorks RTP code.  See the related -G handling in
+     mips_option_override.  */
+  if (TARGET_ABICALLS || TARGET_VXWORKS_RTP)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+
+      /* Reject anything that isn't in a known small-data section.  */
+      name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
+	return false;
+
+      /* If a symbol is defined externally, the assembler will use the
+	 usual -G rules when deciding how to implement macros.  */
+      if (mips_lo_relocs[SYMBOL_GP_RELATIVE] || !DECL_EXTERNAL (decl))
+	return true;
+    }
+  else if (TARGET_EMBEDDED_DATA)
+    {
+      /* Don't put constants into the small data section: we want them
+	 to be in ROM rather than RAM.  */
+      if (TREE_CODE (decl) != VAR_DECL)
+	return false;
+
+      if (TREE_READONLY (decl)
+	  && !TREE_SIDE_EFFECTS (decl)
+	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
+	return false;
+    }
+
+  /* Enforce -mlocal-sdata.  */
+  if (!TARGET_LOCAL_SDATA && !TREE_PUBLIC (decl))
+    return false;
+
+  /* Enforce -mextern-sdata.  */
+  if (!TARGET_EXTERN_SDATA && DECL_P (decl))
+    {
+      if (DECL_EXTERNAL (decl))
+	return false;
+      if (DECL_COMMON (decl) && DECL_INITIAL (decl) == NULL)
+	return false;
+    }
+
+  /* We have traditionally not treated zero-sized objects as small data,
+     so this is now effectively part of the ABI.  */
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  return size > 0 && size <= mips_small_data_threshold;
+}
+
+/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
+   anchors for small data: the GP register acts as an anchor in that
+   case.  We also don't want to use them for PC-relative accesses,
+   where the PC acts as an anchor.  */
+
+static bool
+mips_use_anchors_for_symbol_p (const_rtx symbol)
+{
+  switch (mips_classify_symbol (symbol, SYMBOL_CONTEXT_MEM))
+    {
+    case SYMBOL_PC_RELATIVE:
+    case SYMBOL_GP_RELATIVE:
+      return false;
+
+    default:
+      return default_use_anchors_for_symbol_p (symbol);
+    }
+}
+
+/* The MIPS debug format wants all automatic variables and arguments
+   to be in terms of the virtual frame pointer (stack pointer before
+   any adjustment in the function), while the MIPS 3.0 linker wants
+   the frame pointer to be the stack pointer after the initial
+   adjustment.  So, we do the adjustment here.  The arg pointer (which
+   is eliminated) points to the virtual frame pointer, while the frame
+   pointer (which may be eliminated) points to the stack pointer after
+   the initial adjustments.  */
+
+HOST_WIDE_INT
+mips_debugger_offset (rtx addr, HOST_WIDE_INT offset)
+{
+  rtx offset2 = const0_rtx;
+  rtx reg = eliminate_constant_term (addr, &offset2);
+
+  if (offset == 0)
+    offset = INTVAL (offset2);
+
+  if (reg == stack_pointer_rtx
+      || reg == frame_pointer_rtx
+      || reg == hard_frame_pointer_rtx)
+    {
+      offset -= cfun->machine->frame.total_size;
+      if (reg == hard_frame_pointer_rtx)
+	offset += cfun->machine->frame.hard_frame_pointer_offset;
+    }
+
+  return offset;
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL.  */
+
+void
+mips_output_external (FILE *file, tree decl, const char *name)
+{
+  default_elf_asm_output_external (file, decl, name);
+
+  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+     set in order to avoid putting out names that are never really
+     used. */
+  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+    {
+      if (!TARGET_EXPLICIT_RELOCS && mips_in_small_data_p (decl))
+	{
+	  /* When using assembler macros, emit .extern directives for
+	     all small-data externs so that the assembler knows how
+	     big they are.
+
+	     In most cases it would be safe (though pointless) to emit
+	     .externs for other symbols too.  One exception is when an
+	     object is within the -G limit but declared by the user to
+	     be in a section other than .sbss or .sdata.  */
+	  fputs ("\t.extern\t", file);
+	  assemble_name (file, name);
+	  fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n",
+		   int_size_in_bytes (TREE_TYPE (decl)));
+	}
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_SOURCE_FILENAME.  */
+
+static void
+mips_output_filename (FILE *stream, const char *name)
+{
+  /* If we are emitting DWARF-2, let dwarf2out handle the ".file"
+     directives.  */
+  if (write_symbols == DWARF2_DEBUG)
+    return;
+  else if (mips_output_filename_first_time)
+    {
+      mips_output_filename_first_time = 0;
+      num_source_filenames += 1;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+      output_quoted_string (stream, name);
+      putc ('\n', stream);
+    }
+  /* If we are emitting stabs, let dbxout.c handle this (except for
+     the mips_output_filename_first_time case).  */
+  else if (write_symbols == DBX_DEBUG)
+    return;
+  else if (name != current_function_file
+	   && strcmp (name, current_function_file) != 0)
+    {
+      num_source_filenames += 1;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+      output_quoted_string (stream, name);
+      putc ('\n', stream);
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void ATTRIBUTE_UNUSED
+mips_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.dtprelword\t", file);
+      break;
+
+    case 8:
+      fputs ("\t.dtpreldword\t", file);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("+0x8000", file);
+}
+
+/* Implement TARGET_DWARF_REGISTER_SPAN.  */
+
+static rtx
+mips_dwarf_register_span (rtx reg)
+{
+  rtx high, low;
+  enum machine_mode mode;
+
+  /* By default, GCC maps increasing register numbers to increasing
+     memory locations, but paired FPRs are always little-endian,
+     regardless of the prevailing endianness.  */
+  mode = GET_MODE (reg);
+  if (FP_REG_P (REGNO (reg))
+      && TARGET_BIG_ENDIAN
+      && MAX_FPRS_PER_FMT > 1
+      && GET_MODE_SIZE (mode) > UNITS_PER_FPREG)
+    {
+      gcc_assert (GET_MODE_SIZE (mode) == UNITS_PER_HWFPVALUE);
+      high = mips_subword (reg, true);
+      low = mips_subword (reg, false);
+      return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, high, low));
+    }
+
+  return NULL_RTX;
+}
+
+/* DSP ALU can bypass data with no delays for the following pairs. */
+enum insn_code dspalu_bypass_table[][2] =
+{
+  {CODE_FOR_mips_addsc, CODE_FOR_mips_addwc},
+  {CODE_FOR_mips_cmpu_eq_qb, CODE_FOR_mips_pick_qb},
+  {CODE_FOR_mips_cmpu_lt_qb, CODE_FOR_mips_pick_qb},
+  {CODE_FOR_mips_cmpu_le_qb, CODE_FOR_mips_pick_qb},
+  {CODE_FOR_mips_cmp_eq_ph, CODE_FOR_mips_pick_ph},
+  {CODE_FOR_mips_cmp_lt_ph, CODE_FOR_mips_pick_ph},
+  {CODE_FOR_mips_cmp_le_ph, CODE_FOR_mips_pick_ph},
+  {CODE_FOR_mips_wrdsp, CODE_FOR_mips_insv}
+};
+
+int
+mips_dspalu_bypass_p (rtx out_insn, rtx in_insn)
+{
+  int i;
+  int num_bypass = ARRAY_SIZE (dspalu_bypass_table);
+  enum insn_code out_icode = (enum insn_code) INSN_CODE (out_insn);
+  enum insn_code in_icode = (enum insn_code) INSN_CODE (in_insn);
+
+  for (i = 0; i < num_bypass; i++)
+    {
+      if (out_icode == dspalu_bypass_table[i][0]
+	  && in_icode == dspalu_bypass_table[i][1])
+       return true;
+    }
+
+  return false;
+}
+/* Implement ASM_OUTPUT_ASCII.  */
+
+void
+mips_output_ascii (FILE *stream, const char *string, size_t len)
+{
+  size_t i;
+  int cur_pos;
+
+  cur_pos = 17;
+  fprintf (stream, "\t.ascii\t\"");
+  for (i = 0; i < len; i++)
+    {
+      int c;
+
+      c = (unsigned char) string[i];
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
+	    {
+	      putc ('\\', stream);
+	      cur_pos++;
+	    }
+	  putc (c, stream);
+	  cur_pos++;
+	}
+      else
+	{
+	  fprintf (stream, "\\%03o", c);
+	  cur_pos += 4;
+	}
+
+      if (cur_pos > 72 && i+1 < len)
+	{
+	  cur_pos = 17;
+	  fprintf (stream, "\"\n\t.ascii\t\"");
+	}
+    }
+  fprintf (stream, "\"\n");
+}
+
+/* Return the pseudo-op for full SYMBOL_(D)TPREL address *ADDR.
+   Update *ADDR with the operand that should be printed.  */
+
+const char *
+mips_output_tls_reloc_directive (rtx *addr)
+{
+  enum mips_symbol_type type;
+
+  type = mips_classify_symbolic_expression (*addr, SYMBOL_CONTEXT_LEA);
+  *addr = mips_strip_unspec_address (*addr);
+  switch (type)
+    {
+    case SYMBOL_DTPREL:
+      return Pmode == SImode ? ".dtprelword\t%0" : ".dtpreldword\t%0";
+
+    case SYMBOL_TPREL:
+      return Pmode == SImode ? ".tprelword\t%0" : ".tpreldword\t%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Emit either a label, .comm, or .lcomm directive.  When using assembler
+   macros, mark the symbol as written so that mips_asm_output_external
+   won't emit an .extern for it.  STREAM is the output file, NAME is the
+   name of the symbol, INIT_STRING is the string that should be written
+   before the symbol and FINAL_STRING is the string that should be
+   written after it.  FINAL_STRING is a printf format that consumes the
+   remaining arguments.  */
+
+void
+mips_declare_object (FILE *stream, const char *name, const char *init_string,
+		     const char *final_string, ...)
+{
+  va_list ap;
+
+  fputs (init_string, stream);
+  assemble_name (stream, name);
+  va_start (ap, final_string);
+  vfprintf (stream, final_string, ap);
+  va_end (ap);
+
+  if (!TARGET_EXPLICIT_RELOCS)
+    {
+      tree name_tree = get_identifier (name);
+      TREE_ASM_WRITTEN (name_tree) = 1;
+    }
+}
+
+/* Declare a common object of SIZE bytes using asm directive INIT_STRING.
+   NAME is the name of the object and ALIGN is the required alignment
+   in bytes.  TAKES_ALIGNMENT_P is true if the directive takes a third
+   alignment argument.  */
+
+void
+mips_declare_common_object (FILE *stream, const char *name,
+			    const char *init_string,
+			    unsigned HOST_WIDE_INT size,
+			    unsigned int align, bool takes_alignment_p)
+{
+  if (!takes_alignment_p)
+    {
+      size += (align / BITS_PER_UNIT) - 1;
+      size -= size % (align / BITS_PER_UNIT);
+      mips_declare_object (stream, name, init_string,
+			   "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size);
+    }
+  else
+    mips_declare_object (stream, name, init_string,
+			 "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
+			 size, align / BITS_PER_UNIT);
+}
+
+/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON.  This is usually the same as the
+   elfos.h version, but we also need to handle -muninit-const-in-rodata.  */
+
+void
+mips_output_aligned_decl_common (FILE *stream, tree decl, const char *name,
+				 unsigned HOST_WIDE_INT size,
+				 unsigned int align)
+{
+  /* If the target wants uninitialized const declarations in
+     .rdata then don't put them in .comm.  */
+  if (TARGET_EMBEDDED_DATA
+      && TARGET_UNINIT_CONST_IN_RODATA
+      && TREE_CODE (decl) == VAR_DECL
+      && TREE_READONLY (decl)
+      && (DECL_INITIAL (decl) == 0 || DECL_INITIAL (decl) == error_mark_node))
+    {
+      if (TREE_PUBLIC (decl) && DECL_NAME (decl))
+	targetm.asm_out.globalize_label (stream, name);
+
+      switch_to_section (readonly_data_section);
+      ASM_OUTPUT_ALIGN (stream, floor_log2 (align / BITS_PER_UNIT));
+      mips_declare_object (stream, name, "",
+			   ":\n\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n",
+			   size);
+    }
+  else
+    mips_declare_common_object (stream, name, "\n\t.comm\t",
+				size, align, true);
+}
+
+#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
+extern int size_directive_output;
+
+/* Implement ASM_DECLARE_OBJECT_NAME.  This is like most of the standard ELF
+   definitions except that it uses mips_declare_object to emit the label.  */
+
+void
+mips_declare_object_name (FILE *stream, const char *name,
+			  tree decl ATTRIBUTE_UNUSED)
+{
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+#endif
+
+  size_directive_output = 0;
+  if (!flag_inhibit_size_directive && DECL_SIZE (decl))
+    {
+      HOST_WIDE_INT size;
+
+      size_directive_output = 1;
+      size = int_size_in_bytes (TREE_TYPE (decl));
+      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+    }
+
+  mips_declare_object (stream, name, "", ":\n");
+}
+
+/* Implement ASM_FINISH_DECLARE_OBJECT.  This is generic ELF stuff.  */
+
+void
+mips_finish_declare_object (FILE *stream, tree decl, int top_level, int at_end)
+{
+  const char *name;
+
+  name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  if (!flag_inhibit_size_directive
+      && DECL_SIZE (decl) != 0
+      && !at_end
+      && top_level
+      && DECL_INITIAL (decl) == error_mark_node
+      && !size_directive_output)
+    {
+      HOST_WIDE_INT size;
+
+      size_directive_output = 1;
+      size = int_size_in_bytes (TREE_TYPE (decl));
+      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+    }
+}
+#endif
+
+/* Return the FOO in the name of the ".mdebug.FOO" section associated
+   with the current ABI.  */
+
+static const char *
+mips_mdebug_abi_name (void)
+{
+  switch (mips_abi)
+    {
+    case ABI_32:
+      return "abi32";
+    case ABI_O64:
+      return "abiO64";
+    case ABI_N32:
+      return "abiN32";
+    case ABI_64:
+      return "abi64";
+    case ABI_EABI:
+      return TARGET_64BIT ? "eabi64" : "eabi32";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_ASM_FILE_START.  */
+
+static void
+mips_file_start (void)
+{
+  default_file_start ();
+
+  /* Generate a special section to describe the ABI switches used to
+     produce the resultant binary.  */
+
+  /* Record the ABI itself.  Modern versions of binutils encode
+     this information in the ELF header flags, but GDB needs the
+     information in order to correctly debug binaries produced by
+     older binutils.  See the function mips_gdbarch_init in
+     gdb/mips-tdep.c.  */
+  fprintf (asm_out_file, "\t.section .mdebug.%s\n\t.previous\n",
+	   mips_mdebug_abi_name ());
+
+  /* There is no ELF header flag to distinguish long32 forms of the
+     EABI from long64 forms.  Emit a special section to help tools
+     such as GDB.  Do the same for o64, which is sometimes used with
+     -mlong64.  */
+  if (mips_abi == ABI_EABI || mips_abi == ABI_O64)
+    fprintf (asm_out_file, "\t.section .gcc_compiled_long%d\n"
+	     "\t.previous\n", TARGET_LONG64 ? 64 : 32);
+
+  /* Record the NaN encoding.  */
+  if (HAVE_AS_NAN || mips_nan != MIPS_IEEE_754_DEFAULT)
+    fprintf (asm_out_file, "\t.nan\t%s\n",
+	     mips_nan == MIPS_IEEE_754_2008 ? "2008" : "legacy");
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  {
+    int attr;
+
+    /* No floating-point operations, -mno-float.  */
+    if (TARGET_NO_FLOAT)
+      attr = 0;
+    /* Soft-float code, -msoft-float.  */
+    else if (!TARGET_HARD_FLOAT_ABI)
+      attr = 3;
+    /* Single-float code, -msingle-float.  */
+    else if (!TARGET_DOUBLE_FLOAT)
+      attr = 2;
+    /* 64-bit FP registers on a 32-bit target, -mips32r2 -mfp64.  */
+    else if (!TARGET_64BIT && TARGET_FLOAT64)
+      attr = 4;
+    /* Regular FP code, FP regs same size as GP regs, -mdouble-float.  */
+    else
+      attr = 1;
+
+    fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", attr);
+  }
+#endif
+
+  /* If TARGET_ABICALLS, tell GAS to generate -KPIC code.  */
+  if (TARGET_ABICALLS)
+    {
+      fprintf (asm_out_file, "\t.abicalls\n");
+      if (TARGET_ABICALLS_PIC0)
+	fprintf (asm_out_file, "\t.option\tpic0\n");
+    }
+
+  if (flag_verbose_asm)
+    fprintf (asm_out_file, "\n%s -G value = %d, Arch = %s, ISA = %d\n",
+	     ASM_COMMENT_START,
+	     mips_small_data_threshold, mips_arch_info->name, mips_isa);
+}
+
+/* Implement TARGET_ASM_CODE_END.  */
+
+static void
+mips_code_end (void)
+{
+  mips_finish_stub (&mips16_rdhwr_stub);
+  mips_finish_stub (&mips16_get_fcsr_stub);
+  mips_finish_stub (&mips16_set_fcsr_stub);
+}
+
+/* Make the last instruction frame-related and note that it performs
+   the operation described by FRAME_PATTERN.  */
+
+static void
+mips_set_frame_expr (rtx frame_pattern)
+{
+  rtx insn;
+
+  insn = get_last_insn ();
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      frame_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Return a frame-related rtx that stores REG at MEM.
+   REG must be a single register.  */
+
+static rtx
+mips_frame_set (rtx mem, rtx reg)
+{
+  rtx set;
+
+  set = gen_rtx_SET (VOIDmode, mem, reg);
+  RTX_FRAME_RELATED_P (set) = 1;
+
+  return set;
+}
+
+/* Record that the epilogue has restored call-saved register REG.  */
+
+static void
+mips_add_cfa_restore (rtx reg)
+{
+  mips_epilogue.cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					       mips_epilogue.cfa_restores);
+}
+
+/* If a MIPS16e SAVE or RESTORE instruction saves or restores register
+   mips16e_s2_s8_regs[X], it must also save the registers in indexes
+   X + 1 onwards.  Likewise mips16e_a0_a3_regs.  */
+static const unsigned char mips16e_s2_s8_regs[] = {
+  30, 23, 22, 21, 20, 19, 18
+};
+static const unsigned char mips16e_a0_a3_regs[] = {
+  4, 5, 6, 7
+};
+
+/* A list of the registers that can be saved by the MIPS16e SAVE instruction,
+   ordered from the uppermost in memory to the lowest in memory.  */
+static const unsigned char mips16e_save_restore_regs[] = {
+  31, 30, 23, 22, 21, 20, 19, 18, 17, 16, 7, 6, 5, 4
+};
+
+/* Return the index of the lowest X in the range [0, SIZE) for which
+   bit REGS[X] is set in MASK.  Return SIZE if there is no such X.  */
+
+static unsigned int
+mips16e_find_first_register (unsigned int mask, const unsigned char *regs,
+			     unsigned int size)
+{
+  unsigned int i;
+
+  for (i = 0; i < size; i++)
+    if (BITSET_P (mask, regs[i]))
+      break;
+
+  return i;
+}
+
+/* *MASK_PTR is a mask of general-purpose registers and *NUM_REGS_PTR
+   is the number of set bits.  If *MASK_PTR contains REGS[X] for some X
+   in [0, SIZE), adjust *MASK_PTR and *NUM_REGS_PTR so that the same
+   is true for all indexes (X, SIZE).  */
+
+static void
+mips16e_mask_registers (unsigned int *mask_ptr, const unsigned char *regs,
+			unsigned int size, unsigned int *num_regs_ptr)
+{
+  unsigned int i;
+
+  i = mips16e_find_first_register (*mask_ptr, regs, size);
+  for (i++; i < size; i++)
+    if (!BITSET_P (*mask_ptr, regs[i]))
+      {
+	*num_regs_ptr += 1;
+	*mask_ptr |= 1 << regs[i];
+      }
+}
+
+/* Return a simplified form of X using the register values in REG_VALUES.
+   REG_VALUES[R] is the last value assigned to hard register R, or null
+   if R has not been modified.
+
+   This function is rather limited, but is good enough for our purposes.  */
+
+static rtx
+mips16e_collect_propagate_value (rtx x, rtx *reg_values)
+{
+  x = avoid_constant_pool_reference (x);
+
+  if (UNARY_P (x))
+    {
+      rtx x0 = mips16e_collect_propagate_value (XEXP (x, 0), reg_values);
+      return simplify_gen_unary (GET_CODE (x), GET_MODE (x),
+				 x0, GET_MODE (XEXP (x, 0)));
+    }
+
+  if (ARITHMETIC_P (x))
+    {
+      rtx x0 = mips16e_collect_propagate_value (XEXP (x, 0), reg_values);
+      rtx x1 = mips16e_collect_propagate_value (XEXP (x, 1), reg_values);
+      return simplify_gen_binary (GET_CODE (x), GET_MODE (x), x0, x1);
+    }
+
+  if (REG_P (x)
+      && reg_values[REGNO (x)]
+      && !rtx_unstable_p (reg_values[REGNO (x)]))
+    return reg_values[REGNO (x)];
+
+  return x;
+}
+
+/* Return true if (set DEST SRC) stores an argument register into its
+   caller-allocated save slot, storing the number of that argument
+   register in *REGNO_PTR if so.  REG_VALUES is as for
+   mips16e_collect_propagate_value.  */
+
+static bool
+mips16e_collect_argument_save_p (rtx dest, rtx src, rtx *reg_values,
+				 unsigned int *regno_ptr)
+{
+  unsigned int argno, regno;
+  HOST_WIDE_INT offset, required_offset;
+  rtx addr, base;
+
+  /* Check that this is a word-mode store.  */
+  if (!MEM_P (dest) || !REG_P (src) || GET_MODE (dest) != word_mode)
+    return false;
+
+  /* Check that the register being saved is an unmodified argument
+     register.  */
+  regno = REGNO (src);
+  if (!IN_RANGE (regno, GP_ARG_FIRST, GP_ARG_LAST) || reg_values[regno])
+    return false;
+  argno = regno - GP_ARG_FIRST;
+
+  /* Check whether the address is an appropriate stack-pointer or
+     frame-pointer access.  */
+  addr = mips16e_collect_propagate_value (XEXP (dest, 0), reg_values);
+  mips_split_plus (addr, &base, &offset);
+  required_offset = cfun->machine->frame.total_size + argno * UNITS_PER_WORD;
+  if (base == hard_frame_pointer_rtx)
+    required_offset -= cfun->machine->frame.hard_frame_pointer_offset;
+  else if (base != stack_pointer_rtx)
+    return false;
+  if (offset != required_offset)
+    return false;
+
+  *regno_ptr = regno;
+  return true;
+}
+
+/* A subroutine of mips_expand_prologue, called only when generating
+   MIPS16e SAVE instructions.  Search the start of the function for any
+   instructions that save argument registers into their caller-allocated
+   save slots.  Delete such instructions and return a value N such that
+   saving [GP_ARG_FIRST, GP_ARG_FIRST + N) would make all the deleted
+   instructions redundant.  */
+
+static unsigned int
+mips16e_collect_argument_saves (void)
+{
+  rtx reg_values[FIRST_PSEUDO_REGISTER];
+  rtx insn, next, set, dest, src;
+  unsigned int nargs, regno;
+
+  push_topmost_sequence ();
+  nargs = 0;
+  memset (reg_values, 0, sizeof (reg_values));
+  for (insn = get_insns (); insn; insn = next)
+    {
+      next = NEXT_INSN (insn);
+      if (NOTE_P (insn) || DEBUG_INSN_P (insn))
+	continue;
+
+      if (!INSN_P (insn))
+	break;
+
+      set = PATTERN (insn);
+      if (GET_CODE (set) != SET)
+	break;
+
+      dest = SET_DEST (set);
+      src = SET_SRC (set);
+      if (mips16e_collect_argument_save_p (dest, src, reg_values, &regno))
+	{
+	  if (!BITSET_P (cfun->machine->frame.mask, regno))
+	    {
+	      delete_insn (insn);
+	      nargs = MAX (nargs, (regno - GP_ARG_FIRST) + 1);
+	    }
+	}
+      else if (REG_P (dest) && GET_MODE (dest) == word_mode)
+	reg_values[REGNO (dest)]
+	  = mips16e_collect_propagate_value (src, reg_values);
+      else
+	break;
+    }
+  pop_topmost_sequence ();
+
+  return nargs;
+}
+
+/* Return a move between register REGNO and memory location SP + OFFSET.
+   REG_PARM_P is true if SP + OFFSET belongs to REG_PARM_STACK_SPACE.
+   Make the move a load if RESTORE_P, otherwise make it a store.  */
+
+static rtx
+mips16e_save_restore_reg (bool restore_p, bool reg_parm_p,
+			  HOST_WIDE_INT offset, unsigned int regno)
+{
+  rtx reg, mem;
+
+  mem = gen_frame_mem (SImode, plus_constant (Pmode, stack_pointer_rtx,
+					      offset));
+  reg = gen_rtx_REG (SImode, regno);
+  if (restore_p)
+    {
+      mips_add_cfa_restore (reg);
+      return gen_rtx_SET (VOIDmode, reg, mem);
+    }
+  if (reg_parm_p)
+    return gen_rtx_SET (VOIDmode, mem, reg);
+  return mips_frame_set (mem, reg);
+}
+
+/* Return RTL for a MIPS16e SAVE or RESTORE instruction; RESTORE_P says which.
+   The instruction must:
+
+     - Allocate or deallocate SIZE bytes in total; SIZE is known
+       to be nonzero.
+
+     - Save or restore as many registers in *MASK_PTR as possible.
+       The instruction saves the first registers at the top of the
+       allocated area, with the other registers below it.
+
+     - Save NARGS argument registers above the allocated area.
+
+   (NARGS is always zero if RESTORE_P.)
+
+   The SAVE and RESTORE instructions cannot save and restore all general
+   registers, so there may be some registers left over for the caller to
+   handle.  Destructively modify *MASK_PTR so that it contains the registers
+   that still need to be saved or restored.  The caller can save these
+   registers in the memory immediately below *OFFSET_PTR, which is a
+   byte offset from the bottom of the allocated stack area.  */
+
+static rtx
+mips16e_build_save_restore (bool restore_p, unsigned int *mask_ptr,
+			    HOST_WIDE_INT *offset_ptr, unsigned int nargs,
+			    HOST_WIDE_INT size)
+{
+  rtx pattern, set;
+  HOST_WIDE_INT offset, top_offset;
+  unsigned int i, regno;
+  int n;
+
+  gcc_assert (cfun->machine->frame.num_fp == 0);
+
+  /* Calculate the number of elements in the PARALLEL.  We need one element
+     for the stack adjustment, one for each argument register save, and one
+     for each additional register move.  */
+  n = 1 + nargs;
+  for (i = 0; i < ARRAY_SIZE (mips16e_save_restore_regs); i++)
+    if (BITSET_P (*mask_ptr, mips16e_save_restore_regs[i]))
+      n++;
+
+  /* Create the final PARALLEL.  */
+  pattern = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (n));
+  n = 0;
+
+  /* Add the stack pointer adjustment.  */
+  set = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (Pmode, stack_pointer_rtx,
+				    restore_p ? size : -size));
+  RTX_FRAME_RELATED_P (set) = 1;
+  XVECEXP (pattern, 0, n++) = set;
+
+  /* Stack offsets in the PARALLEL are relative to the old stack pointer.  */
+  top_offset = restore_p ? size : 0;
+
+  /* Save the arguments.  */
+  for (i = 0; i < nargs; i++)
+    {
+      offset = top_offset + i * UNITS_PER_WORD;
+      set = mips16e_save_restore_reg (restore_p, true, offset,
+				      GP_ARG_FIRST + i);
+      XVECEXP (pattern, 0, n++) = set;
+    }
+
+  /* Then fill in the other register moves.  */
+  offset = top_offset;
+  for (i = 0; i < ARRAY_SIZE (mips16e_save_restore_regs); i++)
+    {
+      regno = mips16e_save_restore_regs[i];
+      if (BITSET_P (*mask_ptr, regno))
+	{
+	  offset -= UNITS_PER_WORD;
+	  set = mips16e_save_restore_reg (restore_p, false, offset, regno);
+	  XVECEXP (pattern, 0, n++) = set;
+	  *mask_ptr &= ~(1 << regno);
+	}
+    }
+
+  /* Tell the caller what offset it should use for the remaining registers.  */
+  *offset_ptr = size + (offset - top_offset);
+
+  gcc_assert (n == XVECLEN (pattern, 0));
+
+  return pattern;
+}
+
+/* PATTERN is a PARALLEL whose first element adds ADJUST to the stack
+   pointer.  Return true if PATTERN matches the kind of instruction
+   generated by mips16e_build_save_restore.  If INFO is nonnull,
+   initialize it when returning true.  */
+
+bool
+mips16e_save_restore_pattern_p (rtx pattern, HOST_WIDE_INT adjust,
+				struct mips16e_save_restore_info *info)
+{
+  unsigned int i, nargs, mask, extra;
+  HOST_WIDE_INT top_offset, save_offset, offset;
+  rtx set, reg, mem, base;
+  int n;
+
+  if (!GENERATE_MIPS16E_SAVE_RESTORE)
+    return false;
+
+  /* Stack offsets in the PARALLEL are relative to the old stack pointer.  */
+  top_offset = adjust > 0 ? adjust : 0;
+
+  /* Interpret all other members of the PARALLEL.  */
+  save_offset = top_offset - UNITS_PER_WORD;
+  mask = 0;
+  nargs = 0;
+  i = 0;
+  for (n = 1; n < XVECLEN (pattern, 0); n++)
+    {
+      /* Check that we have a SET.  */
+      set = XVECEXP (pattern, 0, n);
+      if (GET_CODE (set) != SET)
+	return false;
+
+      /* Check that the SET is a load (if restoring) or a store
+	 (if saving).  */
+      mem = adjust > 0 ? SET_SRC (set) : SET_DEST (set);
+      if (!MEM_P (mem))
+	return false;
+
+      /* Check that the address is the sum of the stack pointer and a
+	 possibly-zero constant offset.  */
+      mips_split_plus (XEXP (mem, 0), &base, &offset);
+      if (base != stack_pointer_rtx)
+	return false;
+
+      /* Check that SET's other operand is a register.  */
+      reg = adjust > 0 ? SET_DEST (set) : SET_SRC (set);
+      if (!REG_P (reg))
+	return false;
+
+      /* Check for argument saves.  */
+      if (offset == top_offset + nargs * UNITS_PER_WORD
+	  && REGNO (reg) == GP_ARG_FIRST + nargs)
+	nargs++;
+      else if (offset == save_offset)
+	{
+	  while (mips16e_save_restore_regs[i++] != REGNO (reg))
+	    if (i == ARRAY_SIZE (mips16e_save_restore_regs))
+	      return false;
+
+	  mask |= 1 << REGNO (reg);
+	  save_offset -= UNITS_PER_WORD;
+	}
+      else
+	return false;
+    }
+
+  /* Check that the restrictions on register ranges are met.  */
+  extra = 0;
+  mips16e_mask_registers (&mask, mips16e_s2_s8_regs,
+			  ARRAY_SIZE (mips16e_s2_s8_regs), &extra);
+  mips16e_mask_registers (&mask, mips16e_a0_a3_regs,
+			  ARRAY_SIZE (mips16e_a0_a3_regs), &extra);
+  if (extra != 0)
+    return false;
+
+  /* Make sure that the topmost argument register is not saved twice.
+     The checks above ensure that the same is then true for the other
+     argument registers.  */
+  if (nargs > 0 && BITSET_P (mask, GP_ARG_FIRST + nargs - 1))
+    return false;
+
+  /* Pass back information, if requested.  */
+  if (info)
+    {
+      info->nargs = nargs;
+      info->mask = mask;
+      info->size = (adjust > 0 ? adjust : -adjust);
+    }
+
+  return true;
+}
+
+/* Add a MIPS16e SAVE or RESTORE register-range argument to string S
+   for the register range [MIN_REG, MAX_REG].  Return a pointer to
+   the null terminator.  */
+
+static char *
+mips16e_add_register_range (char *s, unsigned int min_reg,
+			    unsigned int max_reg)
+{
+  if (min_reg != max_reg)
+    s += sprintf (s, ",%s-%s", reg_names[min_reg], reg_names[max_reg]);
+  else
+    s += sprintf (s, ",%s", reg_names[min_reg]);
+  return s;
+}
+
+/* Return the assembly instruction for a MIPS16e SAVE or RESTORE instruction.
+   PATTERN and ADJUST are as for mips16e_save_restore_pattern_p.  */
+
+const char *
+mips16e_output_save_restore (rtx pattern, HOST_WIDE_INT adjust)
+{
+  static char buffer[300];
+
+  struct mips16e_save_restore_info info;
+  unsigned int i, end;
+  char *s;
+
+  /* Parse the pattern.  */
+  if (!mips16e_save_restore_pattern_p (pattern, adjust, &info))
+    gcc_unreachable ();
+
+  /* Add the mnemonic.  */
+  s = strcpy (buffer, adjust > 0 ? "restore\t" : "save\t");
+  s += strlen (s);
+
+  /* Save the arguments.  */
+  if (info.nargs > 1)
+    s += sprintf (s, "%s-%s,", reg_names[GP_ARG_FIRST],
+		  reg_names[GP_ARG_FIRST + info.nargs - 1]);
+  else if (info.nargs == 1)
+    s += sprintf (s, "%s,", reg_names[GP_ARG_FIRST]);
+
+  /* Emit the amount of stack space to allocate or deallocate.  */
+  s += sprintf (s, "%d", (int) info.size);
+
+  /* Save or restore $16.  */
+  if (BITSET_P (info.mask, 16))
+    s += sprintf (s, ",%s", reg_names[GP_REG_FIRST + 16]);
+
+  /* Save or restore $17.  */
+  if (BITSET_P (info.mask, 17))
+    s += sprintf (s, ",%s", reg_names[GP_REG_FIRST + 17]);
+
+  /* Save or restore registers in the range $s2...$s8, which
+     mips16e_s2_s8_regs lists in decreasing order.  Note that this
+     is a software register range; the hardware registers are not
+     numbered consecutively.  */
+  end = ARRAY_SIZE (mips16e_s2_s8_regs);
+  i = mips16e_find_first_register (info.mask, mips16e_s2_s8_regs, end);
+  if (i < end)
+    s = mips16e_add_register_range (s, mips16e_s2_s8_regs[end - 1],
+				    mips16e_s2_s8_regs[i]);
+
+  /* Save or restore registers in the range $a0...$a3.  */
+  end = ARRAY_SIZE (mips16e_a0_a3_regs);
+  i = mips16e_find_first_register (info.mask, mips16e_a0_a3_regs, end);
+  if (i < end)
+    s = mips16e_add_register_range (s, mips16e_a0_a3_regs[i],
+				    mips16e_a0_a3_regs[end - 1]);
+
+  /* Save or restore $31.  */
+  if (BITSET_P (info.mask, RETURN_ADDR_REGNUM))
+    s += sprintf (s, ",%s", reg_names[RETURN_ADDR_REGNUM]);
+
+  return buffer;
+}
+
+/* Return true if the current function returns its value in a floating-point
+   register in MIPS16 mode.  */
+
+static bool
+mips16_cfun_returns_in_fpr_p (void)
+{
+  tree return_type = DECL_RESULT (current_function_decl);
+  return (TARGET_MIPS16
+	  && TARGET_HARD_FLOAT_ABI
+	  && !aggregate_value_p (return_type, current_function_decl)
+ 	  && mips_return_mode_in_fpr_p (DECL_MODE (return_type)));
+}
+
+/* Return true if predicate PRED is true for at least one instruction.
+   Cache the result in *CACHE, and assume that the result is true
+   if *CACHE is already true.  */
+
+static bool
+mips_find_gp_ref (bool *cache, bool (*pred) (rtx))
+{
+  rtx insn;
+
+  if (!*cache)
+    {
+      push_topmost_sequence ();
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if (USEFUL_INSN_P (insn) && pred (insn))
+	  {
+	    *cache = true;
+	    break;
+	  }
+      pop_topmost_sequence ();
+    }
+  return *cache;
+}
+
+/* Return true if INSN refers to the global pointer in an "inflexible" way.
+   See mips_cfun_has_inflexible_gp_ref_p for details.  */
+
+static bool
+mips_insn_has_inflexible_gp_ref_p (rtx insn)
+{
+  /* Uses of pic_offset_table_rtx in CALL_INSN_FUNCTION_USAGE
+     indicate that the target could be a traditional MIPS
+     lazily-binding stub.  */
+  return find_reg_fusage (insn, USE, pic_offset_table_rtx);
+}
+
+/* Return true if the current function refers to the global pointer
+   in a way that forces $28 to be valid.  This means that we can't
+   change the choice of global pointer, even for NewABI code.
+
+   One example of this (and one which needs several checks) is that
+   $28 must be valid when calling traditional MIPS lazy-binding stubs.
+   (This restriction does not apply to PLTs.)  */
+
+static bool
+mips_cfun_has_inflexible_gp_ref_p (void)
+{
+  /* If the function has a nonlocal goto, $28 must hold the correct
+     global pointer for the target function.  That is, the target
+     of the goto implicitly uses $28.  */
+  if (crtl->has_nonlocal_goto)
+    return true;
+
+  if (TARGET_ABICALLS_PIC2)
+    {
+      /* Symbolic accesses implicitly use the global pointer unless
+	 -mexplicit-relocs is in effect.  JAL macros to symbolic addresses
+	 might go to traditional MIPS lazy-binding stubs.  */
+      if (!TARGET_EXPLICIT_RELOCS)
+	return true;
+
+      /* FUNCTION_PROFILER includes a JAL to _mcount, which again
+	 can be lazily-bound.  */
+      if (crtl->profile)
+	return true;
+
+      /* MIPS16 functions that return in FPRs need to call an
+	 external libgcc routine.  This call is only made explict
+	 during mips_expand_epilogue, and it too might be lazily bound.  */
+      if (mips16_cfun_returns_in_fpr_p ())
+	return true;
+    }
+
+  return mips_find_gp_ref (&cfun->machine->has_inflexible_gp_insn_p,
+			   mips_insn_has_inflexible_gp_ref_p);
+}
+
+/* Return true if INSN refers to the global pointer in a "flexible" way.
+   See mips_cfun_has_flexible_gp_ref_p for details.  */
+
+static bool
+mips_insn_has_flexible_gp_ref_p (rtx insn)
+{
+  return (get_attr_got (insn) != GOT_UNSET
+	  || mips_small_data_pattern_p (PATTERN (insn))
+	  || reg_overlap_mentioned_p (pic_offset_table_rtx, PATTERN (insn)));
+}
+
+/* Return true if the current function references the global pointer,
+   but if those references do not inherently require the global pointer
+   to be $28.  Assume !mips_cfun_has_inflexible_gp_ref_p ().  */
+
+static bool
+mips_cfun_has_flexible_gp_ref_p (void)
+{
+  /* Reload can sometimes introduce constant pool references
+     into a function that otherwise didn't need them.  For example,
+     suppose we have an instruction like:
+
+	(set (reg:DF R1) (float:DF (reg:SI R2)))
+
+     If R2 turns out to be a constant such as 1, the instruction may
+     have a REG_EQUAL note saying that R1 == 1.0.  Reload then has
+     the option of using this constant if R2 doesn't get allocated
+     to a register.
+
+     In cases like these, reload will have added the constant to the
+     pool but no instruction will yet refer to it.  */
+  if (TARGET_ABICALLS_PIC2 && !reload_completed && crtl->uses_const_pool)
+    return true;
+
+  return mips_find_gp_ref (&cfun->machine->has_flexible_gp_insn_p,
+			   mips_insn_has_flexible_gp_ref_p);
+}
+
+/* Return the register that should be used as the global pointer
+   within this function.  Return INVALID_REGNUM if the function
+   doesn't need a global pointer.  */
+
+static unsigned int
+mips_global_pointer (void)
+{
+  unsigned int regno;
+
+  /* $gp is always available unless we're using a GOT.  */
+  if (!TARGET_USE_GOT)
+    return GLOBAL_POINTER_REGNUM;
+
+  /* If there are inflexible references to $gp, we must use the
+     standard register.  */
+  if (mips_cfun_has_inflexible_gp_ref_p ())
+    return GLOBAL_POINTER_REGNUM;
+
+  /* If there are no current references to $gp, then the only uses
+     we can introduce later are those involved in long branches.  */
+  if (TARGET_ABSOLUTE_JUMPS && !mips_cfun_has_flexible_gp_ref_p ())
+    return INVALID_REGNUM;
+
+  /* If the global pointer is call-saved, try to use a call-clobbered
+     alternative.  */
+  if (TARGET_CALL_SAVED_GP && crtl->is_leaf)
+    for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+      if (!df_regs_ever_live_p (regno)
+	  && call_really_used_regs[regno]
+	  && !fixed_regs[regno]
+	  && regno != PIC_FUNCTION_ADDR_REGNUM)
+	return regno;
+
+  return GLOBAL_POINTER_REGNUM;
+}
+
+/* Return true if the current function's prologue must load the global
+   pointer value into pic_offset_table_rtx and store the same value in
+   the function's cprestore slot (if any).
+
+   One problem we have to deal with is that, when emitting GOT-based
+   position independent code, long-branch sequences will need to load
+   the address of the branch target from the GOT.  We don't know until
+   the very end of compilation whether (and where) the function needs
+   long branches, so we must ensure that _any_ branch can access the
+   global pointer in some form.  However, we do not want to pessimize
+   the usual case in which all branches are short.
+
+   We handle this as follows:
+
+   (1) During reload, we set cfun->machine->global_pointer to
+       INVALID_REGNUM if we _know_ that the current function
+       doesn't need a global pointer.  This is only valid if
+       long branches don't need the GOT.
+
+       Otherwise, we assume that we might need a global pointer
+       and pick an appropriate register.
+
+   (2) If cfun->machine->global_pointer != INVALID_REGNUM,
+       we ensure that the global pointer is available at every
+       block boundary bar entry and exit.  We do this in one of two ways:
+
+       - If the function has a cprestore slot, we ensure that this
+	 slot is valid at every branch.  However, as explained in
+	 point (6) below, there is no guarantee that pic_offset_table_rtx
+	 itself is valid if new uses of the global pointer are introduced
+	 after the first post-epilogue split.
+
+	 We guarantee that the cprestore slot is valid by loading it
+	 into a fake register, CPRESTORE_SLOT_REGNUM.  We then make
+	 this register live at every block boundary bar function entry
+	 and exit.  It is then invalid to move the load (and thus the
+	 preceding store) across a block boundary.
+
+       - If the function has no cprestore slot, we guarantee that
+	 pic_offset_table_rtx itself is valid at every branch.
+
+       See mips_eh_uses for the handling of the register liveness.
+
+   (3) During prologue and epilogue generation, we emit "ghost"
+       placeholder instructions to manipulate the global pointer.
+
+   (4) During prologue generation, we set cfun->machine->must_initialize_gp_p
+       and cfun->machine->must_restore_gp_when_clobbered_p if we already know
+       that the function needs a global pointer.  (There is no need to set
+       them earlier than this, and doing it as late as possible leads to
+       fewer false positives.)
+
+   (5) If cfun->machine->must_initialize_gp_p is true during a
+       split_insns pass, we split the ghost instructions into real
+       instructions.  These split instructions can then be optimized in
+       the usual way.  Otherwise, we keep the ghost instructions intact,
+       and optimize for the case where they aren't needed.  We still
+       have the option of splitting them later, if we need to introduce
+       new uses of the global pointer.
+
+       For example, the scheduler ignores a ghost instruction that
+       stores $28 to the stack, but it handles the split form of
+       the ghost instruction as an ordinary store.
+
+   (6) [OldABI only.]  If cfun->machine->must_restore_gp_when_clobbered_p
+       is true during the first post-epilogue split_insns pass, we split
+       calls and restore_gp patterns into instructions that explicitly
+       load pic_offset_table_rtx from the cprestore slot.  Otherwise,
+       we split these patterns into instructions that _don't_ load from
+       the cprestore slot.
+
+       If cfun->machine->must_restore_gp_when_clobbered_p is true at the
+       time of the split, then any instructions that exist at that time
+       can make free use of pic_offset_table_rtx.  However, if we want
+       to introduce new uses of the global pointer after the split,
+       we must explicitly load the value from the cprestore slot, since
+       pic_offset_table_rtx itself might not be valid at a given point
+       in the function.
+
+       The idea is that we want to be able to delete redundant
+       loads from the cprestore slot in the usual case where no
+       long branches are needed.
+
+   (7) If cfun->machine->must_initialize_gp_p is still false at the end
+       of md_reorg, we decide whether the global pointer is needed for
+       long branches.  If so, we set cfun->machine->must_initialize_gp_p
+       to true and split the ghost instructions into real instructions
+       at that stage.
+
+   Note that the ghost instructions must have a zero length for three reasons:
+
+   - Giving the length of the underlying $gp sequence might cause
+     us to use long branches in cases where they aren't really needed.
+
+   - They would perturb things like alignment calculations.
+
+   - More importantly, the hazard detection in md_reorg relies on
+     empty instructions having a zero length.
+
+   If we find a long branch and split the ghost instructions at the
+   end of md_reorg, the split could introduce more long branches.
+   That isn't a problem though, because we still do the split before
+   the final shorten_branches pass.
+
+   This is extremely ugly, but it seems like the best compromise between
+   correctness and efficiency.  */
+
+bool
+mips_must_initialize_gp_p (void)
+{
+  return cfun->machine->must_initialize_gp_p;
+}
+
+/* Return true if REGNO is a register that is ordinarily call-clobbered
+   but must nevertheless be preserved by an interrupt handler.  */
+
+static bool
+mips_interrupt_extra_call_saved_reg_p (unsigned int regno)
+{
+  if (MD_REG_P (regno))
+    return true;
+
+  if (TARGET_DSP && DSP_ACC_REG_P (regno))
+    return true;
+
+  if (GP_REG_P (regno) && !cfun->machine->use_shadow_register_set_p)
+    {
+      /* $0 is hard-wired.  */
+      if (regno == GP_REG_FIRST)
+	return false;
+
+      /* The interrupt handler can treat kernel registers as
+	 scratch registers.  */
+      if (KERNEL_REG_P (regno))
+	return false;
+
+      /* The function will return the stack pointer to its original value
+	 anyway.  */
+      if (regno == STACK_POINTER_REGNUM)
+	return false;
+
+      /* Otherwise, return true for registers that aren't ordinarily
+	 call-clobbered.  */
+      return call_really_used_regs[regno];
+    }
+
+  return false;
+}
+
+/* Return true if the current function should treat register REGNO
+   as call-saved.  */
+
+static bool
+mips_cfun_call_saved_reg_p (unsigned int regno)
+{
+  /* If the user makes an ordinarily-call-saved register global,
+     that register is no longer call-saved.  */
+  if (global_regs[regno])
+    return false;
+
+  /* Interrupt handlers need to save extra registers.  */
+  if (cfun->machine->interrupt_handler_p
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  /* call_insns preserve $28 unless they explicitly say otherwise,
+     so call_really_used_regs[] treats $28 as call-saved.  However,
+     we want the ABI property rather than the default call_insn
+     property here.  */
+  return (regno == GLOBAL_POINTER_REGNUM
+	  ? TARGET_CALL_SAVED_GP
+	  : !call_really_used_regs[regno]);
+}
+
+/* Return true if the function body might clobber register REGNO.
+   We know that REGNO is call-saved.  */
+
+static bool
+mips_cfun_might_clobber_call_saved_reg_p (unsigned int regno)
+{
+  /* Some functions should be treated as clobbering all call-saved
+     registers.  */
+  if (crtl->saves_all_registers)
+    return true;
+
+  /* DF handles cases where a register is explicitly referenced in
+     the rtl.  Incoming values are passed in call-clobbered registers,
+     so we can assume that any live call-saved register is set within
+     the function.  */
+  if (df_regs_ever_live_p (regno))
+    return true;
+
+  /* Check for registers that are clobbered by FUNCTION_PROFILER.
+     These clobbers are not explicit in the rtl.  */
+  if (crtl->profile && MIPS_SAVE_REG_FOR_PROFILING_P (regno))
+    return true;
+
+  /* If we're using a call-saved global pointer, the function's
+     prologue will need to set it up.  */
+  if (cfun->machine->global_pointer == regno)
+    return true;
+
+  /* The function's prologue will need to set the frame pointer if
+     frame_pointer_needed.  */
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return true;
+
+  /* If a MIPS16 function returns a value in FPRs, its epilogue
+     will need to call an external libgcc routine.  This yet-to-be
+     generated call_insn will clobber $31.  */
+  if (regno == RETURN_ADDR_REGNUM && mips16_cfun_returns_in_fpr_p ())
+    return true;
+
+  /* If REGNO is ordinarily call-clobbered, we must assume that any
+     called function could modify it.  */
+  if (cfun->machine->interrupt_handler_p
+      && !crtl->is_leaf
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  return false;
+}
+
+/* Return true if the current function must save register REGNO.  */
+
+static bool
+mips_save_reg_p (unsigned int regno)
+{
+  if (mips_cfun_call_saved_reg_p (regno))
+    {
+      if (mips_cfun_might_clobber_call_saved_reg_p (regno))
+	return true;
+
+      /* Save both registers in an FPR pair if either one is used.  This is
+	 needed for the case when MIN_FPRS_PER_FMT == 1, which allows the odd
+	 register to be used without the even register.  */
+      if (FP_REG_P (regno)
+	  && MAX_FPRS_PER_FMT == 2
+	  && mips_cfun_might_clobber_call_saved_reg_p (regno + 1))
+	return true;
+    }
+
+  /* We need to save the incoming return address if __builtin_eh_return
+     is being used to set a different return address.  */
+  if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
+    return true;
+
+  return false;
+}
+
+/* Populate the current function's mips_frame_info structure.
+
+   MIPS stack frames look like:
+
+	+-------------------------------+
+	|                               |
+	|  incoming stack arguments     |
+	|                               |
+	+-------------------------------+
+	|                               |
+	|  caller-allocated save area   |
+      A |  for register arguments       |
+	|                               |
+	+-------------------------------+ <-- incoming stack pointer
+	|                               |
+	|  callee-allocated save area   |
+      B |  for arguments that are       |
+	|  split between registers and  |
+	|  the stack                    |
+	|                               |
+	+-------------------------------+ <-- arg_pointer_rtx
+	|                               |
+      C |  callee-allocated save area   |
+	|  for register varargs         |
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx
+	|                               |       + cop0_sp_offset
+	|  COP0 reg save area           |	+ UNITS_PER_WORD
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx + acc_sp_offset
+	|                               |       + UNITS_PER_WORD
+	|  accumulator save area        |
+	|                               |
+	+-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
+	|                               |       + UNITS_PER_HWFPVALUE
+	|  FPR save area                |
+	|                               |
+	+-------------------------------+ <-- stack_pointer_rtx + gp_sp_offset
+	|                               |       + UNITS_PER_WORD
+	|  GPR save area                |
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx with
+	|                               | \     -fstack-protector
+	|  local variables              |  | var_size
+	|                               | /
+	+-------------------------------+
+	|                               | \
+	|  $gp save area                |  | cprestore_size
+	|                               | /
+      P +-------------------------------+ <-- hard_frame_pointer_rtx for
+	|                               | \     MIPS16 code
+	|  outgoing stack arguments     |  |
+	|                               |  |
+	+-------------------------------+  | args_size
+	|                               |  |
+	|  caller-allocated save area   |  |
+	|  for register arguments       |  |
+	|                               | /
+	+-------------------------------+ <-- stack_pointer_rtx
+					      frame_pointer_rtx without
+					        -fstack-protector
+					      hard_frame_pointer_rtx for
+						non-MIPS16 code.
+
+   At least two of A, B and C will be empty.
+
+   Dynamic stack allocations such as alloca insert data at point P.
+   They decrease stack_pointer_rtx but leave frame_pointer_rtx and
+   hard_frame_pointer_rtx unchanged.  */
+
+static void
+mips_compute_frame_info (void)
+{
+  struct mips_frame_info *frame;
+  HOST_WIDE_INT offset, size;
+  unsigned int regno, i;
+
+  /* Set this function's interrupt properties.  */
+  if (mips_interrupt_type_p (TREE_TYPE (current_function_decl)))
+    {
+      if (!ISA_MIPS32R2)
+	error ("the %<interrupt%> attribute requires a MIPS32r2 processor");
+      else if (TARGET_HARD_FLOAT)
+	error ("the %<interrupt%> attribute requires %<-msoft-float%>");
+      else if (TARGET_MIPS16)
+	error ("interrupt handlers cannot be MIPS16 functions");
+      else
+	{
+	  cfun->machine->interrupt_handler_p = true;
+	  cfun->machine->use_shadow_register_set_p =
+	    mips_use_shadow_register_set_p (TREE_TYPE (current_function_decl));
+	  cfun->machine->keep_interrupts_masked_p =
+	    mips_keep_interrupts_masked_p (TREE_TYPE (current_function_decl));
+	  cfun->machine->use_debug_exception_return_p =
+	    mips_use_debug_exception_return_p (TREE_TYPE
+					       (current_function_decl));
+	}
+    }
+
+  frame = &cfun->machine->frame;
+  memset (frame, 0, sizeof (*frame));
+  size = get_frame_size ();
+
+  cfun->machine->global_pointer = mips_global_pointer ();
+
+  /* The first two blocks contain the outgoing argument area and the $gp save
+     slot.  This area isn't needed in leaf functions, but if the
+     target-independent frame size is nonzero, we have already committed to
+     allocating these in STARTING_FRAME_OFFSET for !FRAME_GROWS_DOWNWARD.  */
+  if ((size == 0 || FRAME_GROWS_DOWNWARD) && crtl->is_leaf)
+    {
+      /* The MIPS 3.0 linker does not like functions that dynamically
+	 allocate the stack and have 0 for STACK_DYNAMIC_OFFSET, since it
+	 looks like we are trying to create a second frame pointer to the
+	 function, so allocate some stack space to make it happy.  */
+      if (cfun->calls_alloca)
+	frame->args_size = REG_PARM_STACK_SPACE (cfun->decl);
+      else
+	frame->args_size = 0;
+      frame->cprestore_size = 0;
+    }
+  else
+    {
+      frame->args_size = crtl->outgoing_args_size;
+      frame->cprestore_size = MIPS_GP_SAVE_AREA_SIZE;
+    }
+  offset = frame->args_size + frame->cprestore_size;
+
+  /* Move above the local variables.  */
+  frame->var_size = MIPS_STACK_ALIGN (size);
+  offset += frame->var_size;
+
+  /* Find out which GPRs we need to save.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (mips_save_reg_p (regno))
+      {
+	frame->num_gp++;
+	frame->mask |= 1 << (regno - GP_REG_FIRST);
+      }
+
+  /* If this function calls eh_return, we must also save and restore the
+     EH data registers.  */
+  if (crtl->calls_eh_return)
+    for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++)
+      {
+	frame->num_gp++;
+	frame->mask |= 1 << (EH_RETURN_DATA_REGNO (i) - GP_REG_FIRST);
+      }
+
+  /* The MIPS16e SAVE and RESTORE instructions have two ranges of registers:
+     $a3-$a0 and $s2-$s8.  If we save one register in the range, we must
+     save all later registers too.  */
+  if (GENERATE_MIPS16E_SAVE_RESTORE)
+    {
+      mips16e_mask_registers (&frame->mask, mips16e_s2_s8_regs,
+ 			      ARRAY_SIZE (mips16e_s2_s8_regs), &frame->num_gp);
+      mips16e_mask_registers (&frame->mask, mips16e_a0_a3_regs,
+ 			      ARRAY_SIZE (mips16e_a0_a3_regs), &frame->num_gp);
+    }
+
+  /* Move above the GPR save area.  */
+  if (frame->num_gp > 0)
+    {
+      offset += MIPS_STACK_ALIGN (frame->num_gp * UNITS_PER_WORD);
+      frame->gp_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Find out which FPRs we need to save.  This loop must iterate over
+     the same space as its companion in mips_for_each_saved_gpr_and_fpr.  */
+  if (TARGET_HARD_FLOAT)
+    for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno += MAX_FPRS_PER_FMT)
+      if (mips_save_reg_p (regno))
+	{
+	  frame->num_fp += MAX_FPRS_PER_FMT;
+	  frame->fmask |= ~(~0 << MAX_FPRS_PER_FMT) << (regno - FP_REG_FIRST);
+	}
+
+  /* Move above the FPR save area.  */
+  if (frame->num_fp > 0)
+    {
+      offset += MIPS_STACK_ALIGN (frame->num_fp * UNITS_PER_FPREG);
+      frame->fp_sp_offset = offset - UNITS_PER_HWFPVALUE;
+    }
+
+  /* Add in space for the interrupt context information.  */
+  if (cfun->machine->interrupt_handler_p)
+    {
+      /* Check HI/LO.  */
+      if (mips_save_reg_p (LO_REGNUM) || mips_save_reg_p (HI_REGNUM))
+	{
+	  frame->num_acc++;
+	  frame->acc_mask |= (1 << 0);
+	}
+
+      /* Check accumulators 1, 2, 3.  */
+      for (i = DSP_ACC_REG_FIRST; i <= DSP_ACC_REG_LAST; i += 2)
+	if (mips_save_reg_p (i) || mips_save_reg_p (i + 1))
+	  {
+	    frame->num_acc++;
+	    frame->acc_mask |= 1 << (((i - DSP_ACC_REG_FIRST) / 2) + 1);
+	  }
+
+      /* All interrupt context functions need space to preserve STATUS.  */
+      frame->num_cop0_regs++;
+
+      /* If we don't keep interrupts masked, we need to save EPC.  */
+      if (!cfun->machine->keep_interrupts_masked_p)
+	frame->num_cop0_regs++;
+    }
+
+  /* Move above the accumulator save area.  */
+  if (frame->num_acc > 0)
+    {
+      /* Each accumulator needs 2 words.  */
+      offset += frame->num_acc * 2 * UNITS_PER_WORD;
+      frame->acc_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Move above the COP0 register save area.  */
+  if (frame->num_cop0_regs > 0)
+    {
+      offset += frame->num_cop0_regs * UNITS_PER_WORD;
+      frame->cop0_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Move above the callee-allocated varargs save area.  */
+  offset += MIPS_STACK_ALIGN (cfun->machine->varargs_size);
+  frame->arg_pointer_offset = offset;
+
+  /* Move above the callee-allocated area for pretend stack arguments.  */
+  offset += crtl->args.pretend_args_size;
+  frame->total_size = offset;
+
+  /* Work out the offsets of the save areas from the top of the frame.  */
+  if (frame->gp_sp_offset > 0)
+    frame->gp_save_offset = frame->gp_sp_offset - offset;
+  if (frame->fp_sp_offset > 0)
+    frame->fp_save_offset = frame->fp_sp_offset - offset;
+  if (frame->acc_sp_offset > 0)
+    frame->acc_save_offset = frame->acc_sp_offset - offset;
+  if (frame->num_cop0_regs > 0)
+    frame->cop0_save_offset = frame->cop0_sp_offset - offset;
+
+  /* MIPS16 code offsets the frame pointer by the size of the outgoing
+     arguments.  This tends to increase the chances of using unextended
+     instructions for local variables and incoming arguments.  */
+  if (TARGET_MIPS16)
+    frame->hard_frame_pointer_offset = frame->args_size;
+}
+
+/* Return the style of GP load sequence that is being used for the
+   current function.  */
+
+enum mips_loadgp_style
+mips_current_loadgp_style (void)
+{
+  if (!TARGET_USE_GOT || cfun->machine->global_pointer == INVALID_REGNUM)
+    return LOADGP_NONE;
+
+  if (TARGET_RTP_PIC)
+    return LOADGP_RTP;
+
+  if (TARGET_ABSOLUTE_ABICALLS)
+    return LOADGP_ABSOLUTE;
+
+  return TARGET_NEWABI ? LOADGP_NEWABI : LOADGP_OLDABI;
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+static bool
+mips_frame_pointer_required (void)
+{
+  /* If the function contains dynamic stack allocations, we need to
+     use the frame pointer to access the static parts of the frame.  */
+  if (cfun->calls_alloca)
+    return true;
+
+  /* In MIPS16 mode, we need a frame pointer for a large frame; otherwise,
+     reload may be unable to compute the address of a local variable,
+     since there is no way to add a large constant to the stack pointer
+     without using a second temporary register.  */
+  if (TARGET_MIPS16)
+    {
+      mips_compute_frame_info ();
+      if (!SMALL_OPERAND (cfun->machine->frame.total_size))
+	return true;
+    }
+
+  return false;
+}
+
+/* Make sure that we're not trying to eliminate to the wrong hard frame
+   pointer.  */
+
+static bool
+mips_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame pointer
+   or argument pointer.  TO is either the stack pointer or hard frame
+   pointer.  */
+
+HOST_WIDE_INT
+mips_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  mips_compute_frame_info ();
+
+  /* Set OFFSET to the offset from the end-of-prologue stack pointer.  */
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      if (FRAME_GROWS_DOWNWARD)
+	offset = (cfun->machine->frame.args_size
+		  + cfun->machine->frame.cprestore_size
+		  + cfun->machine->frame.var_size);
+      else
+	offset = 0;
+      break;
+
+    case ARG_POINTER_REGNUM:
+      offset = cfun->machine->frame.arg_pointer_offset;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (to == HARD_FRAME_POINTER_REGNUM)
+    offset -= cfun->machine->frame.hard_frame_pointer_offset;
+
+  return offset;
+}
+
+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  */
+
+static void
+mips_extra_live_on_entry (bitmap regs)
+{
+  if (TARGET_USE_GOT)
+    {
+      /* PIC_FUNCTION_ADDR_REGNUM is live if we need it to set up
+	 the global pointer.   */
+      if (!TARGET_ABSOLUTE_ABICALLS)
+	bitmap_set_bit (regs, PIC_FUNCTION_ADDR_REGNUM);
+
+      /* The prologue may set MIPS16_PIC_TEMP_REGNUM to the value of
+	 the global pointer.  */
+      if (TARGET_MIPS16)
+	bitmap_set_bit (regs, MIPS16_PIC_TEMP_REGNUM);
+
+      /* See the comment above load_call<mode> for details.  */
+      bitmap_set_bit (regs, GOT_VERSION_REGNUM);
+    }
+}
+
+/* Implement RETURN_ADDR_RTX.  We do not support moving back to a
+   previous frame.  */
+
+rtx
+mips_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
+}
+
+/* Emit code to change the current function's return address to
+   ADDRESS.  SCRATCH is available as a scratch register, if needed.
+   ADDRESS and SCRATCH are both word-mode GPRs.  */
+
+void
+mips_set_return_address (rtx address, rtx scratch)
+{
+  rtx slot_address;
+
+  gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
+  slot_address = mips_add_offset (scratch, stack_pointer_rtx,
+				  cfun->machine->frame.gp_sp_offset);
+  mips_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
+}
+
+/* Return true if the current function has a cprestore slot.  */
+
+bool
+mips_cfun_has_cprestore_slot_p (void)
+{
+  return (cfun->machine->global_pointer != INVALID_REGNUM
+	  && cfun->machine->frame.cprestore_size > 0);
+}
+
+/* Fill *BASE and *OFFSET such that *BASE + *OFFSET refers to the
+   cprestore slot.  LOAD_P is true if the caller wants to load from
+   the cprestore slot; it is false if the caller wants to store to
+   the slot.  */
+
+static void
+mips_get_cprestore_base_and_offset (rtx *base, HOST_WIDE_INT *offset,
+				    bool load_p)
+{
+  const struct mips_frame_info *frame;
+
+  frame = &cfun->machine->frame;
+  /* .cprestore always uses the stack pointer instead of the frame pointer.
+     We have a free choice for direct stores for non-MIPS16 functions,
+     and for MIPS16 functions whose cprestore slot is in range of the
+     stack pointer.  Using the stack pointer would sometimes give more
+     (early) scheduling freedom, but using the frame pointer would
+     sometimes give more (late) scheduling freedom.  It's hard to
+     predict which applies to a given function, so let's keep things
+     simple.
+
+     Loads must always use the frame pointer in functions that call
+     alloca, and there's little benefit to using the stack pointer
+     otherwise.  */
+  if (frame_pointer_needed && !(TARGET_CPRESTORE_DIRECTIVE && !load_p))
+    {
+      *base = hard_frame_pointer_rtx;
+      *offset = frame->args_size - frame->hard_frame_pointer_offset;
+    }
+  else
+    {
+      *base = stack_pointer_rtx;
+      *offset = frame->args_size;
+    }
+}
+
+/* Return true if X is the load or store address of the cprestore slot;
+   LOAD_P says which.  */
+
+bool
+mips_cprestore_address_p (rtx x, bool load_p)
+{
+  rtx given_base, required_base;
+  HOST_WIDE_INT given_offset, required_offset;
+
+  mips_split_plus (x, &given_base, &given_offset);
+  mips_get_cprestore_base_and_offset (&required_base, &required_offset, load_p);
+  return given_base == required_base && given_offset == required_offset;
+}
+
+/* Return a MEM rtx for the cprestore slot.  LOAD_P is true if we are
+   going to load from it, false if we are going to store to it.
+   Use TEMP as a temporary register if need be.  */
+
+static rtx
+mips_cprestore_slot (rtx temp, bool load_p)
+{
+  rtx base;
+  HOST_WIDE_INT offset;
+
+  mips_get_cprestore_base_and_offset (&base, &offset, load_p);
+  return gen_frame_mem (Pmode, mips_add_offset (temp, base, offset));
+}
+
+/* Emit instructions to save global pointer value GP into cprestore
+   slot MEM.  OFFSET is the offset that MEM applies to the base register.
+
+   MEM may not be a legitimate address.  If it isn't, TEMP is a
+   temporary register that can be used, otherwise it is a SCRATCH.  */
+
+void
+mips_save_gp_to_cprestore_slot (rtx mem, rtx offset, rtx gp, rtx temp)
+{
+  if (TARGET_CPRESTORE_DIRECTIVE)
+    {
+      gcc_assert (gp == pic_offset_table_rtx);
+      emit_insn (PMODE_INSN (gen_cprestore, (mem, offset)));
+    }
+  else
+    mips_emit_move (mips_cprestore_slot (temp, false), gp);
+}
+
+/* Restore $gp from its save slot, using TEMP as a temporary base register
+   if need be.  This function is for o32 and o64 abicalls only.
+
+   See mips_must_initialize_gp_p for details about how we manage the
+   global pointer.  */
+
+void
+mips_restore_gp_from_cprestore_slot (rtx temp)
+{
+  gcc_assert (TARGET_ABICALLS && TARGET_OLDABI && epilogue_completed);
+
+  if (!cfun->machine->must_restore_gp_when_clobbered_p)
+    {
+      emit_note (NOTE_INSN_DELETED);
+      return;
+    }
+
+  if (TARGET_MIPS16)
+    {
+      mips_emit_move (temp, mips_cprestore_slot (temp, true));
+      mips_emit_move (pic_offset_table_rtx, temp);
+    }
+  else
+    mips_emit_move (pic_offset_table_rtx, mips_cprestore_slot (temp, true));
+  if (!TARGET_EXPLICIT_RELOCS)
+    emit_insn (gen_blockage ());
+}
+
+/* A function to save or store a register.  The first argument is the
+   register and the second is the stack slot.  */
+typedef void (*mips_save_restore_fn) (rtx, rtx);
+
+/* Use FN to save or restore register REGNO.  MODE is the register's
+   mode and OFFSET is the offset of its save slot from the current
+   stack pointer.  */
+
+static void
+mips_save_restore_reg (enum machine_mode mode, int regno,
+		       HOST_WIDE_INT offset, mips_save_restore_fn fn)
+{
+  rtx mem;
+
+  mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+					    offset));
+  fn (gen_rtx_REG (mode, regno), mem);
+}
+
+/* Call FN for each accumlator that is saved by the current function.
+   SP_OFFSET is the offset of the current stack pointer from the start
+   of the frame.  */
+
+static void
+mips_for_each_saved_acc (HOST_WIDE_INT sp_offset, mips_save_restore_fn fn)
+{
+  HOST_WIDE_INT offset;
+  int regno;
+
+  offset = cfun->machine->frame.acc_sp_offset - sp_offset;
+  if (BITSET_P (cfun->machine->frame.acc_mask, 0))
+    {
+      mips_save_restore_reg (word_mode, LO_REGNUM, offset, fn);
+      offset -= UNITS_PER_WORD;
+      mips_save_restore_reg (word_mode, HI_REGNUM, offset, fn);
+      offset -= UNITS_PER_WORD;
+    }
+
+  for (regno = DSP_ACC_REG_FIRST; regno <= DSP_ACC_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.acc_mask,
+		  ((regno - DSP_ACC_REG_FIRST) / 2) + 1))
+      {
+	mips_save_restore_reg (word_mode, regno, offset, fn);
+	offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Save register REG to MEM.  Make the instruction frame-related.  */
+
+static void
+mips_save_reg (rtx reg, rtx mem)
+{
+  if (GET_MODE (reg) == DFmode && !TARGET_FLOAT64)
+    {
+      rtx x1, x2;
+
+      mips_emit_move_or_split (mem, reg, SPLIT_IF_NECESSARY);
+
+      x1 = mips_frame_set (mips_subword (mem, false),
+			   mips_subword (reg, false));
+      x2 = mips_frame_set (mips_subword (mem, true),
+			   mips_subword (reg, true));
+      mips_set_frame_expr (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x1, x2)));
+    }
+  else
+    mips_emit_save_slot_move (mem, reg, MIPS_PROLOGUE_TEMP (GET_MODE (reg)));
+}
+
+/* Capture the register combinations that are allowed in a SWM or LWM
+   instruction.  The entries are ordered by number of registers set in
+   the mask.  We also ignore the single register encodings because a
+   normal SW/LW is preferred.  */
+
+static const unsigned int umips_swm_mask[17] = {
+  0xc0ff0000, 0x80ff0000, 0x40ff0000, 0x807f0000,
+  0x00ff0000, 0x803f0000, 0x007f0000, 0x801f0000,
+  0x003f0000, 0x800f0000, 0x001f0000, 0x80070000,
+  0x000f0000, 0x80030000, 0x00070000, 0x80010000,
+  0x00030000
+};
+
+static const unsigned int umips_swm_encoding[17] = {
+  25, 24, 9, 23, 8, 22, 7, 21, 6, 20, 5, 19, 4, 18, 3, 17, 2
+};
+
+/* Try to use a microMIPS LWM or SWM instruction to save or restore
+   as many GPRs in *MASK as possible.  *OFFSET is the offset from the
+   stack pointer of the topmost save slot.
+
+   Remove from *MASK all registers that were handled using LWM and SWM.
+   Update *OFFSET so that it points to the first unused save slot.  */
+
+static bool
+umips_build_save_restore (mips_save_restore_fn fn,
+			  unsigned *mask, HOST_WIDE_INT *offset)
+{
+  int nregs;
+  unsigned int i, j;
+  rtx pattern, set, reg, mem;
+  HOST_WIDE_INT this_offset;
+  rtx this_base;
+
+  /* Try matching $16 to $31 (s0 to ra).  */
+  for (i = 0; i < ARRAY_SIZE (umips_swm_mask); i++)
+    if ((*mask & 0xffff0000) == umips_swm_mask[i])
+      break;
+
+  if (i == ARRAY_SIZE (umips_swm_mask))
+    return false;
+
+  /* Get the offset of the lowest save slot.  */
+  nregs = (umips_swm_encoding[i] & 0xf) + (umips_swm_encoding[i] >> 4);
+  this_offset = *offset - UNITS_PER_WORD * (nregs - 1);
+
+  /* LWM/SWM can only support offsets from -2048 to 2047.  */
+  if (!UMIPS_12BIT_OFFSET_P (this_offset))
+    return false;
+
+  /* Create the final PARALLEL.  */
+  pattern = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
+  this_base = stack_pointer_rtx;
+
+  /* For registers $16-$23 and $30.  */
+  for (j = 0; j < (umips_swm_encoding[i] & 0xf); j++)
+    {
+      HOST_WIDE_INT offset = this_offset + j * UNITS_PER_WORD;
+      mem = gen_frame_mem (SImode, plus_constant (Pmode, this_base, offset));
+      unsigned int regno = (j != 8) ? 16 + j : 30;
+      *mask &= ~(1 << regno);
+      reg = gen_rtx_REG (SImode, regno);
+      if (fn == mips_save_reg)
+	set = mips_frame_set (mem, reg);
+      else
+	{
+	  set = gen_rtx_SET (VOIDmode, reg, mem);
+	  mips_add_cfa_restore (reg);
+	}
+      XVECEXP (pattern, 0, j) = set;
+    }
+
+  /* For register $31.  */
+  if (umips_swm_encoding[i] >> 4)
+    {
+      HOST_WIDE_INT offset = this_offset + j * UNITS_PER_WORD;
+      *mask &= ~(1 << 31);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode, this_base, offset));
+      reg = gen_rtx_REG (SImode, 31);
+      if (fn == mips_save_reg)
+	set = mips_frame_set (mem, reg);
+      else
+	{
+	  set = gen_rtx_SET (VOIDmode, reg, mem);
+	  mips_add_cfa_restore (reg);
+	}
+      XVECEXP (pattern, 0, j) = set;
+    }
+
+  pattern = emit_insn (pattern);
+  if (fn == mips_save_reg)
+    RTX_FRAME_RELATED_P (pattern) = 1;
+
+  /* Adjust the last offset.  */
+  *offset -= UNITS_PER_WORD * nregs;
+
+  return true;
+}
+
+/* Call FN for each register that is saved by the current function.
+   SP_OFFSET is the offset of the current stack pointer from the start
+   of the frame.  */
+
+static void
+mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
+				 mips_save_restore_fn fn)
+{
+  enum machine_mode fpr_mode;
+  int regno;
+  const struct mips_frame_info *frame = &cfun->machine->frame;
+  HOST_WIDE_INT offset;
+  unsigned int mask;
+
+  /* Save registers starting from high to low.  The debuggers prefer at least
+     the return register be stored at func+4, and also it allows us not to
+     need a nop in the epilogue if at least one register is reloaded in
+     addition to return address.  */
+  offset = frame->gp_sp_offset - sp_offset;
+  mask = frame->mask;
+
+  if (TARGET_MICROMIPS)
+    umips_build_save_restore (fn, &mask, &offset);
+
+  for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+    if (BITSET_P (mask, regno - GP_REG_FIRST))
+      {
+	/* Record the ra offset for use by mips_function_profiler.  */
+	if (regno == RETURN_ADDR_REGNUM)
+	  cfun->machine->frame.ra_fp_offset = offset + sp_offset;
+	mips_save_restore_reg (word_mode, regno, offset, fn);
+	offset -= UNITS_PER_WORD;
+      }
+
+  /* This loop must iterate over the same space as its companion in
+     mips_compute_frame_info.  */
+  offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+  fpr_mode = (TARGET_SINGLE_FLOAT ? SFmode : DFmode);
+  for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1;
+       regno >= FP_REG_FIRST;
+       regno -= MAX_FPRS_PER_FMT)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+	mips_save_restore_reg (fpr_mode, regno, offset, fn);
+	offset -= GET_MODE_SIZE (fpr_mode);
+      }
+}
+
+/* Return true if a move between register REGNO and its save slot (MEM)
+   can be done in a single move.  LOAD_P is true if we are loading
+   from the slot, false if we are storing to it.  */
+
+static bool
+mips_direct_save_slot_move_p (unsigned int regno, rtx mem, bool load_p)
+{
+  /* There is a specific MIPS16 instruction for saving $31 to the stack.  */
+  if (TARGET_MIPS16 && !load_p && regno == RETURN_ADDR_REGNUM)
+    return false;
+
+  return mips_secondary_reload_class (REGNO_REG_CLASS (regno),
+				      GET_MODE (mem), mem, load_p) == NO_REGS;
+}
+
+/* Emit a move from SRC to DEST, given that one of them is a register
+   save slot and that the other is a register.  TEMP is a temporary
+   GPR of the same mode that is available if need be.  */
+
+void
+mips_emit_save_slot_move (rtx dest, rtx src, rtx temp)
+{
+  unsigned int regno;
+  rtx mem;
+
+  if (REG_P (src))
+    {
+      regno = REGNO (src);
+      mem = dest;
+    }
+  else
+    {
+      regno = REGNO (dest);
+      mem = src;
+    }
+
+  if (regno == cfun->machine->global_pointer && !mips_must_initialize_gp_p ())
+    {
+      /* We don't yet know whether we'll need this instruction or not.
+	 Postpone the decision by emitting a ghost move.  This move
+	 is specifically not frame-related; only the split version is.  */
+      if (TARGET_64BIT)
+	emit_insn (gen_move_gpdi (dest, src));
+      else
+	emit_insn (gen_move_gpsi (dest, src));
+      return;
+    }
+
+  if (regno == HI_REGNUM)
+    {
+      if (REG_P (dest))
+	{
+	  mips_emit_move (temp, src);
+	  if (TARGET_64BIT)
+	    emit_insn (gen_mthisi_di (gen_rtx_REG (TImode, MD_REG_FIRST),
+				      temp, gen_rtx_REG (DImode, LO_REGNUM)));
+	  else
+	    emit_insn (gen_mthisi_di (gen_rtx_REG (DImode, MD_REG_FIRST),
+				      temp, gen_rtx_REG (SImode, LO_REGNUM)));
+	}
+      else
+	{
+	  if (TARGET_64BIT)
+	    emit_insn (gen_mfhidi_ti (temp,
+				      gen_rtx_REG (TImode, MD_REG_FIRST)));
+	  else
+	    emit_insn (gen_mfhisi_di (temp,
+				      gen_rtx_REG (DImode, MD_REG_FIRST)));
+	  mips_emit_move (dest, temp);
+	}
+    }
+  else if (mips_direct_save_slot_move_p (regno, mem, mem == src))
+    mips_emit_move (dest, src);
+  else
+    {
+      gcc_assert (!reg_overlap_mentioned_p (dest, temp));
+      mips_emit_move (temp, src);
+      mips_emit_move (dest, temp);
+    }
+  if (MEM_P (dest))
+    mips_set_frame_expr (mips_frame_set (dest, src));
+}
+
+/* If we're generating n32 or n64 abicalls, and the current function
+   does not use $28 as its global pointer, emit a cplocal directive.
+   Use pic_offset_table_rtx as the argument to the directive.  */
+
+static void
+mips_output_cplocal (void)
+{
+  if (!TARGET_EXPLICIT_RELOCS
+      && mips_must_initialize_gp_p ()
+      && cfun->machine->global_pointer != GLOBAL_POINTER_REGNUM)
+    output_asm_insn (".cplocal %+", 0);
+}
+
+/* Implement TARGET_OUTPUT_FUNCTION_PROLOGUE.  */
+
+static void
+mips_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+
+  /* In MIPS16 mode, we may need to generate a non-MIPS16 stub to handle
+     floating-point arguments.  */
+  if (TARGET_MIPS16
+      && TARGET_HARD_FLOAT_ABI
+      && crtl->args.info.fp_code != 0)
+    mips16_build_function_stub ();
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  mips_start_function_definition (fnname, TARGET_MIPS16);
+
+  /* Output MIPS-specific frame information.  */
+  if (!flag_inhibit_size_directive)
+    {
+      const struct mips_frame_info *frame;
+
+      frame = &cfun->machine->frame;
+
+      /* .frame FRAMEREG, FRAMESIZE, RETREG.  */
+      fprintf (file,
+	       "\t.frame\t%s," HOST_WIDE_INT_PRINT_DEC ",%s\t\t"
+	       "# vars= " HOST_WIDE_INT_PRINT_DEC
+	       ", regs= %d/%d"
+	       ", args= " HOST_WIDE_INT_PRINT_DEC
+	       ", gp= " HOST_WIDE_INT_PRINT_DEC "\n",
+	       reg_names[frame_pointer_needed
+			 ? HARD_FRAME_POINTER_REGNUM
+			 : STACK_POINTER_REGNUM],
+	       (frame_pointer_needed
+		? frame->total_size - frame->hard_frame_pointer_offset
+		: frame->total_size),
+	       reg_names[RETURN_ADDR_REGNUM],
+	       frame->var_size,
+	       frame->num_gp, frame->num_fp,
+	       frame->args_size,
+	       frame->cprestore_size);
+
+      /* .mask MASK, OFFSET.  */
+      fprintf (file, "\t.mask\t0x%08x," HOST_WIDE_INT_PRINT_DEC "\n",
+	       frame->mask, frame->gp_save_offset);
+
+      /* .fmask MASK, OFFSET.  */
+      fprintf (file, "\t.fmask\t0x%08x," HOST_WIDE_INT_PRINT_DEC "\n",
+	       frame->fmask, frame->fp_save_offset);
+    }
+
+  /* Handle the initialization of $gp for SVR4 PIC, if applicable.
+     Also emit the ".set noreorder; .set nomacro" sequence for functions
+     that need it.  */
+  if (mips_must_initialize_gp_p ()
+      && mips_current_loadgp_style () == LOADGP_OLDABI)
+    {
+      if (TARGET_MIPS16)
+	{
+	  /* This is a fixed-form sequence.  The position of the
+	     first two instructions is important because of the
+	     way _gp_disp is defined.  */
+	  output_asm_insn ("li\t$2,%%hi(_gp_disp)", 0);
+	  output_asm_insn ("addiu\t$3,$pc,%%lo(_gp_disp)", 0);
+	  output_asm_insn ("sll\t$2,16", 0);
+	  output_asm_insn ("addu\t$2,$3", 0);
+	}
+      else
+	{
+	  /* .cpload must be in a .set noreorder but not a
+	     .set nomacro block.  */
+	  mips_push_asm_switch (&mips_noreorder);
+	  output_asm_insn (".cpload\t%^", 0);
+	  if (!cfun->machine->all_noreorder_p)
+	    mips_pop_asm_switch (&mips_noreorder);
+	  else
+	    mips_push_asm_switch (&mips_nomacro);
+	}
+    }
+  else if (cfun->machine->all_noreorder_p)
+    {
+      mips_push_asm_switch (&mips_noreorder);
+      mips_push_asm_switch (&mips_nomacro);
+    }
+
+  /* Tell the assembler which register we're using as the global
+     pointer.  This is needed for thunks, since they can use either
+     explicit relocs or assembler macros.  */
+  mips_output_cplocal ();
+}
+
+/* Implement TARGET_OUTPUT_FUNCTION_EPILOGUE.  */
+
+static void
+mips_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+
+  /* Reinstate the normal $gp.  */
+  SET_REGNO (pic_offset_table_rtx, GLOBAL_POINTER_REGNUM);
+  mips_output_cplocal ();
+
+  if (cfun->machine->all_noreorder_p)
+    {
+      mips_pop_asm_switch (&mips_nomacro);
+      mips_pop_asm_switch (&mips_noreorder);
+    }
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  mips_end_function_definition (fnname);
+}
+
+/* Emit an optimisation barrier for accesses to the current frame.  */
+
+static void
+mips_frame_barrier (void)
+{
+  emit_clobber (gen_frame_mem (BLKmode, stack_pointer_rtx));
+}
+
+
+/* The __gnu_local_gp symbol.  */
+
+static GTY(()) rtx mips_gnu_local_gp;
+
+/* If we're generating n32 or n64 abicalls, emit instructions
+   to set up the global pointer.  */
+
+static void
+mips_emit_loadgp (void)
+{
+  rtx addr, offset, incoming_address, base, index, pic_reg;
+
+  pic_reg = TARGET_MIPS16 ? MIPS16_PIC_TEMP : pic_offset_table_rtx;
+  switch (mips_current_loadgp_style ())
+    {
+    case LOADGP_ABSOLUTE:
+      if (mips_gnu_local_gp == NULL)
+	{
+	  mips_gnu_local_gp = gen_rtx_SYMBOL_REF (Pmode, "__gnu_local_gp");
+	  SYMBOL_REF_FLAGS (mips_gnu_local_gp) |= SYMBOL_FLAG_LOCAL;
+	}
+      emit_insn (PMODE_INSN (gen_loadgp_absolute,
+			     (pic_reg, mips_gnu_local_gp)));
+      break;
+
+    case LOADGP_OLDABI:
+      /* Added by mips_output_function_prologue.  */
+      break;
+
+    case LOADGP_NEWABI:
+      addr = XEXP (DECL_RTL (current_function_decl), 0);
+      offset = mips_unspec_address (addr, SYMBOL_GOTOFF_LOADGP);
+      incoming_address = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      emit_insn (PMODE_INSN (gen_loadgp_newabi,
+			     (pic_reg, offset, incoming_address)));
+      break;
+
+    case LOADGP_RTP:
+      base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (VXWORKS_GOTT_BASE));
+      index = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (VXWORKS_GOTT_INDEX));
+      emit_insn (PMODE_INSN (gen_loadgp_rtp, (pic_reg, base, index)));
+      break;
+
+    default:
+      return;
+    }
+
+  if (TARGET_MIPS16)
+    emit_insn (PMODE_INSN (gen_copygp_mips16,
+			   (pic_offset_table_rtx, pic_reg)));
+
+  /* Emit a blockage if there are implicit uses of the GP register.
+     This includes profiled functions, because FUNCTION_PROFILE uses
+     a jal macro.  */
+  if (!TARGET_EXPLICIT_RELOCS || crtl->profile)
+    emit_insn (gen_loadgp_blockage ());
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if PROBE_INTERVAL > 32768
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+mips_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  if (TARGET_MIPS16)
+    sorry ("-fstack-check=specific not implemented for MIPS16");
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if (first + size <= 32768)
+    {
+      HOST_WIDE_INT i;
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+	 it exceeds SIZE.  If only one probe is needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+        emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					 -(first + i)));
+
+      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+				       -(first + size)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx r3 = MIPS_PROLOGUE_TEMP (Pmode);
+      rtx r12 = MIPS_PROLOGUE_TEMP2 (Pmode);
+
+      /* Sanity check for the addressing mode we're going to use.  */
+      gcc_assert (first <= 32768);
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_insn (gen_rtx_SET (VOIDmode, r3,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     -first)));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      if (rounded_size > 32768)
+	{
+          emit_move_insn (r12, GEN_INT (rounded_size));
+	  emit_insn (gen_rtx_SET (VOIDmode, r12,
+			          gen_rtx_MINUS (Pmode, r3, r12)));
+	}
+      else
+	emit_insn (gen_rtx_SET (VOIDmode, r12,
+			        plus_constant (Pmode, r3, -rounded_size)));
+
+
+      /* Step 3: the loop
+
+	while (TEST_ADDR != LAST_ADDR)
+	  {
+	    TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	    probe at TEST_ADDR
+	  }
+
+	probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (PMODE_INSN (gen_probe_stack_range, (r3, r3, r12)));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+mips_output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32], tmp[64];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg1;
+  xops[1] = reg2;
+  strcpy (tmp, "%(%<beq\t%0,%1,");
+  output_asm_insn (strcat (tmp, &end_lab[1]), xops);
+ 
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  if (TARGET_64BIT && TARGET_LONG64)
+    output_asm_insn ("daddiu\t%0,%0,%1", xops);
+  else
+    output_asm_insn ("addiu\t%0,%0,%1", xops);
+
+  /* Probe at TEST_ADDR and branch.  */
+  fprintf (asm_out_file, "\tb\t");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+  if (TARGET_64BIT)
+    output_asm_insn ("sd\t$0,0(%0)%)", xops);
+  else
+    output_asm_insn ("sw\t$0,0(%0)%)", xops);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* A for_each_rtx callback.  Stop the search if *X is a kernel register.  */
+
+static int
+mips_kernel_reg_p (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return REG_P (*x) && KERNEL_REG_P (REGNO (*x));
+}
+
+/* Expand the "prologue" pattern.  */
+
+void
+mips_expand_prologue (void)
+{
+  const struct mips_frame_info *frame;
+  HOST_WIDE_INT size;
+  unsigned int nargs;
+  rtx insn;
+
+  if (cfun->machine->global_pointer != INVALID_REGNUM)
+    {
+      /* Check whether an insn uses pic_offset_table_rtx, either explicitly
+	 or implicitly.  If so, we can commit to using a global pointer
+	 straight away, otherwise we need to defer the decision.  */
+      if (mips_cfun_has_inflexible_gp_ref_p ()
+	  || mips_cfun_has_flexible_gp_ref_p ())
+	{
+	  cfun->machine->must_initialize_gp_p = true;
+	  cfun->machine->must_restore_gp_when_clobbered_p = true;
+	}
+
+      SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer);
+    }
+
+  frame = &cfun->machine->frame;
+  size = frame->total_size;
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    mips_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					 size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
+
+  /* Save the registers.  Allocate up to MIPS_MAX_FIRST_STACK_STEP
+     bytes beforehand; this is enough to cover the register save area
+     without going out of range.  */
+  if (((frame->mask | frame->fmask | frame->acc_mask) != 0)
+      || frame->num_cop0_regs > 0)
+    {
+      HOST_WIDE_INT step1;
+
+      step1 = MIN (size, MIPS_MAX_FIRST_STACK_STEP);
+      if (GENERATE_MIPS16E_SAVE_RESTORE)
+ 	{
+ 	  HOST_WIDE_INT offset;
+ 	  unsigned int mask, regno;
+
+	  /* Try to merge argument stores into the save instruction.  */
+	  nargs = mips16e_collect_argument_saves ();
+
+	  /* Build the save instruction.  */
+	  mask = frame->mask;
+	  insn = mips16e_build_save_restore (false, &mask, &offset,
+					     nargs, step1);
+	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	  mips_frame_barrier ();
+ 	  size -= step1;
+
+ 	  /* Check if we need to save other registers.  */
+ 	  for (regno = GP_REG_FIRST; regno < GP_REG_LAST; regno++)
+ 	    if (BITSET_P (mask, regno - GP_REG_FIRST))
+ 	      {
+		offset -= UNITS_PER_WORD;
+		mips_save_restore_reg (word_mode, regno,
+				       offset, mips_save_reg);
+ 	      }
+ 	}
+      else
+ 	{
+	  if (cfun->machine->interrupt_handler_p)
+	    {
+	      HOST_WIDE_INT offset;
+	      rtx mem;
+
+	      /* If this interrupt is using a shadow register set, we need to
+		 get the stack pointer from the previous register set.  */
+	      if (cfun->machine->use_shadow_register_set_p)
+		emit_insn (gen_mips_rdpgpr (stack_pointer_rtx,
+					    stack_pointer_rtx));
+
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		{
+		  /* Move from COP0 Cause to K0.  */
+		  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K0_REG_NUM),
+					    gen_rtx_REG (SImode,
+							 COP0_CAUSE_REG_NUM)));
+		  /* Move from COP0 EPC to K1.  */
+		  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K1_REG_NUM),
+					    gen_rtx_REG (SImode,
+							 COP0_EPC_REG_NUM)));
+		}
+
+	      /* Allocate the first part of the frame.  */
+	      insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (-step1));
+	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	      mips_frame_barrier ();
+	      size -= step1;
+
+	      /* Start at the uppermost location for saving.  */
+	      offset = frame->cop0_sp_offset - size;
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		{
+		  /* Push EPC into its stack slot.  */
+		  mem = gen_frame_mem (word_mode,
+				       plus_constant (Pmode, stack_pointer_rtx,
+						      offset));
+		  mips_emit_move (mem, gen_rtx_REG (word_mode, K1_REG_NUM));
+		  offset -= UNITS_PER_WORD;
+		}
+
+	      /* Move from COP0 Status to K1.  */
+	      emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K1_REG_NUM),
+					gen_rtx_REG (SImode,
+						     COP0_STATUS_REG_NUM)));
+
+	      /* Right justify the RIPL in k0.  */
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		emit_insn (gen_lshrsi3 (gen_rtx_REG (SImode, K0_REG_NUM),
+					gen_rtx_REG (SImode, K0_REG_NUM),
+					GEN_INT (CAUSE_IPL)));
+
+	      /* Push Status into its stack slot.  */
+	      mem = gen_frame_mem (word_mode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  offset));
+	      mips_emit_move (mem, gen_rtx_REG (word_mode, K1_REG_NUM));
+	      offset -= UNITS_PER_WORD;
+
+	      /* Insert the RIPL into our copy of SR (k1) as the new IPL.  */
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (6),
+				       GEN_INT (SR_IPL),
+				       gen_rtx_REG (SImode, K0_REG_NUM)));
+
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		/* Enable interrupts by clearing the KSU ERL and EXL bits.
+		   IE is already the correct value, so we don't have to do
+		   anything explicit.  */
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (4),
+				       GEN_INT (SR_EXL),
+				       gen_rtx_REG (SImode, GP_REG_FIRST)));
+	      else
+		/* Disable interrupts by clearing the KSU, ERL, EXL,
+		   and IE bits.  */
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (5),
+				       GEN_INT (SR_IE),
+				       gen_rtx_REG (SImode, GP_REG_FIRST)));
+	    }
+	  else
+	    {
+	      insn = gen_add3_insn (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-step1));
+	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	      mips_frame_barrier ();
+	      size -= step1;
+	    }
+	  mips_for_each_saved_acc (size, mips_save_reg);
+	  mips_for_each_saved_gpr_and_fpr (size, mips_save_reg);
+	}
+    }
+
+  /* Allocate the rest of the frame.  */
+  if (size > 0)
+    {
+      if (SMALL_OPERAND (-size))
+	RTX_FRAME_RELATED_P (emit_insn (gen_add3_insn (stack_pointer_rtx,
+						       stack_pointer_rtx,
+						       GEN_INT (-size)))) = 1;
+      else
+	{
+	  mips_emit_move (MIPS_PROLOGUE_TEMP (Pmode), GEN_INT (size));
+	  if (TARGET_MIPS16)
+	    {
+	      /* There are no instructions to add or subtract registers
+		 from the stack pointer, so use the frame pointer as a
+		 temporary.  We should always be using a frame pointer
+		 in this case anyway.  */
+	      gcc_assert (frame_pointer_needed);
+	      mips_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx);
+	      emit_insn (gen_sub3_insn (hard_frame_pointer_rtx,
+					hard_frame_pointer_rtx,
+					MIPS_PROLOGUE_TEMP (Pmode)));
+	      mips_emit_move (stack_pointer_rtx, hard_frame_pointer_rtx);
+	    }
+	  else
+	    emit_insn (gen_sub3_insn (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      MIPS_PROLOGUE_TEMP (Pmode)));
+
+	  /* Describe the combined effect of the previous instructions.  */
+	  mips_set_frame_expr
+	    (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			  plus_constant (Pmode, stack_pointer_rtx, -size)));
+	}
+      mips_frame_barrier ();
+    }
+
+  /* Set up the frame pointer, if we're using one.  */
+  if (frame_pointer_needed)
+    {
+      HOST_WIDE_INT offset;
+
+      offset = frame->hard_frame_pointer_offset;
+      if (offset == 0)
+	{
+	  insn = mips_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else if (SMALL_OPERAND (offset))
+	{
+	  insn = gen_add3_insn (hard_frame_pointer_rtx,
+				stack_pointer_rtx, GEN_INT (offset));
+	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	}
+      else
+	{
+	  mips_emit_move (MIPS_PROLOGUE_TEMP (Pmode), GEN_INT (offset));
+	  mips_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
+				    hard_frame_pointer_rtx,
+				    MIPS_PROLOGUE_TEMP (Pmode)));
+	  mips_set_frame_expr
+	    (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+			  plus_constant (Pmode, stack_pointer_rtx, offset)));
+	}
+    }
+
+  mips_emit_loadgp ();
+
+  /* Initialize the $gp save slot.  */
+  if (mips_cfun_has_cprestore_slot_p ())
+    {
+      rtx base, mem, gp, temp;
+      HOST_WIDE_INT offset;
+
+      mips_get_cprestore_base_and_offset (&base, &offset, false);
+      mem = gen_frame_mem (Pmode, plus_constant (Pmode, base, offset));
+      gp = TARGET_MIPS16 ? MIPS16_PIC_TEMP : pic_offset_table_rtx;
+      temp = (SMALL_OPERAND (offset)
+	      ? gen_rtx_SCRATCH (Pmode)
+	      : MIPS_PROLOGUE_TEMP (Pmode));
+      emit_insn (PMODE_INSN (gen_potential_cprestore,
+			     (mem, GEN_INT (offset), gp, temp)));
+
+      mips_get_cprestore_base_and_offset (&base, &offset, true);
+      mem = gen_frame_mem (Pmode, plus_constant (Pmode, base, offset));
+      emit_insn (PMODE_INSN (gen_use_cprestore, (mem)));
+    }
+
+  /* We need to search back to the last use of K0 or K1.  */
+  if (cfun->machine->interrupt_handler_p)
+    {
+      for (insn = get_last_insn (); insn != NULL_RTX; insn = PREV_INSN (insn))
+	if (INSN_P (insn)
+	    && for_each_rtx (&PATTERN (insn), mips_kernel_reg_p, NULL))
+	  break;
+      /* Emit a move from K1 to COP0 Status after insn.  */
+      gcc_assert (insn != NULL_RTX);
+      emit_insn_after (gen_cop0_move (gen_rtx_REG (SImode, COP0_STATUS_REG_NUM),
+				      gen_rtx_REG (SImode, K1_REG_NUM)),
+		       insn);
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  */
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+}
+
+/* Attach all pending register saves to the previous instruction.
+   Return that instruction.  */
+
+static rtx
+mips_epilogue_emit_cfa_restores (void)
+{
+  rtx insn;
+
+  insn = get_last_insn ();
+  gcc_assert (insn && !REG_NOTES (insn));
+  if (mips_epilogue.cfa_restores)
+    {
+      RTX_FRAME_RELATED_P (insn) = 1;
+      REG_NOTES (insn) = mips_epilogue.cfa_restores;
+      mips_epilogue.cfa_restores = 0;
+    }
+  return insn;
+}
+
+/* Like mips_epilogue_emit_cfa_restores, but also record that the CFA is
+   now at REG + OFFSET.  */
+
+static void
+mips_epilogue_set_cfa (rtx reg, HOST_WIDE_INT offset)
+{
+  rtx insn;
+
+  insn = mips_epilogue_emit_cfa_restores ();
+  if (reg != mips_epilogue.cfa_reg || offset != mips_epilogue.cfa_offset)
+    {
+      RTX_FRAME_RELATED_P (insn) = 1;
+      REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA,
+					 plus_constant (Pmode, reg, offset),
+					 REG_NOTES (insn));
+      mips_epilogue.cfa_reg = reg;
+      mips_epilogue.cfa_offset = offset;
+    }
+}
+
+/* Emit instructions to restore register REG from slot MEM.  Also update
+   the cfa_restores list.  */
+
+static void
+mips_restore_reg (rtx reg, rtx mem)
+{
+  /* There's no MIPS16 instruction to load $31 directly.  Load into
+     $7 instead and adjust the return insn appropriately.  */
+  if (TARGET_MIPS16 && REGNO (reg) == RETURN_ADDR_REGNUM)
+    reg = gen_rtx_REG (GET_MODE (reg), GP_REG_FIRST + 7);
+  else if (GET_MODE (reg) == DFmode && !TARGET_FLOAT64)
+    {
+      mips_add_cfa_restore (mips_subword (reg, true));
+      mips_add_cfa_restore (mips_subword (reg, false));
+    }
+  else
+    mips_add_cfa_restore (reg);
+
+  mips_emit_save_slot_move (reg, mem, MIPS_EPILOGUE_TEMP (GET_MODE (reg)));
+  if (REGNO (reg) == REGNO (mips_epilogue.cfa_reg))
+    /* The CFA is currently defined in terms of the register whose
+       value we have just restored.  Redefine the CFA in terms of
+       the stack pointer.  */
+    mips_epilogue_set_cfa (stack_pointer_rtx,
+			   mips_epilogue.cfa_restore_sp_offset);
+}
+
+/* Emit code to set the stack pointer to BASE + OFFSET, given that
+   BASE + OFFSET is NEW_FRAME_SIZE bytes below the top of the frame.
+   BASE, if not the stack pointer, is available as a temporary.  */
+
+static void
+mips_deallocate_stack (rtx base, rtx offset, HOST_WIDE_INT new_frame_size)
+{
+  if (base == stack_pointer_rtx && offset == const0_rtx)
+    return;
+
+  mips_frame_barrier ();
+  if (offset == const0_rtx)
+    {
+      emit_move_insn (stack_pointer_rtx, base);
+      mips_epilogue_set_cfa (stack_pointer_rtx, new_frame_size);
+    }
+  else if (TARGET_MIPS16 && base != stack_pointer_rtx)
+    {
+      emit_insn (gen_add3_insn (base, base, offset));
+      mips_epilogue_set_cfa (base, new_frame_size);
+      emit_move_insn (stack_pointer_rtx, base);
+    }
+  else
+    {
+      emit_insn (gen_add3_insn (stack_pointer_rtx, base, offset));
+      mips_epilogue_set_cfa (stack_pointer_rtx, new_frame_size);
+    }
+}
+
+/* Emit any instructions needed before a return.  */
+
+void
+mips_expand_before_return (void)
+{
+  /* When using a call-clobbered gp, we start out with unified call
+     insns that include instructions to restore the gp.  We then split
+     these unified calls after reload.  These split calls explicitly
+     clobber gp, so there is no need to define
+     PIC_OFFSET_TABLE_REG_CALL_CLOBBERED.
+
+     For consistency, we should also insert an explicit clobber of $28
+     before return insns, so that the post-reload optimizers know that
+     the register is not live on exit.  */
+  if (TARGET_CALL_CLOBBERED_GP)
+    emit_clobber (pic_offset_table_rtx);
+}
+
+/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
+   says which.  */
+
+void
+mips_expand_epilogue (bool sibcall_p)
+{
+  const struct mips_frame_info *frame;
+  HOST_WIDE_INT step1, step2;
+  rtx base, adjust, insn;
+  bool use_jraddiusp_p = false;
+
+  if (!sibcall_p && mips_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  /* In MIPS16 mode, if the return value should go into a floating-point
+     register, we need to call a helper routine to copy it over.  */
+  if (mips16_cfun_returns_in_fpr_p ())
+    mips16_copy_fpr_return_value ();
+
+  /* Split the frame into two.  STEP1 is the amount of stack we should
+     deallocate before restoring the registers.  STEP2 is the amount we
+     should deallocate afterwards.
+
+     Start off by assuming that no registers need to be restored.  */
+  frame = &cfun->machine->frame;
+  step1 = frame->total_size;
+  step2 = 0;
+
+  /* Work out which register holds the frame address.  */
+  if (!frame_pointer_needed)
+    base = stack_pointer_rtx;
+  else
+    {
+      base = hard_frame_pointer_rtx;
+      step1 -= frame->hard_frame_pointer_offset;
+    }
+  mips_epilogue.cfa_reg = base;
+  mips_epilogue.cfa_offset = step1;
+  mips_epilogue.cfa_restores = NULL_RTX;
+
+  /* If we need to restore registers, deallocate as much stack as
+     possible in the second step without going out of range.  */
+  if ((frame->mask | frame->fmask | frame->acc_mask) != 0
+      || frame->num_cop0_regs > 0)
+    {
+      step2 = MIN (step1, MIPS_MAX_FIRST_STACK_STEP);
+      step1 -= step2;
+    }
+
+  /* Get an rtx for STEP1 that we can add to BASE.  */
+  adjust = GEN_INT (step1);
+  if (!SMALL_OPERAND (step1))
+    {
+      mips_emit_move (MIPS_EPILOGUE_TEMP (Pmode), adjust);
+      adjust = MIPS_EPILOGUE_TEMP (Pmode);
+    }
+  mips_deallocate_stack (base, adjust, step2);
+
+  /* If we're using addressing macros, $gp is implicitly used by all
+     SYMBOL_REFs.  We must emit a blockage insn before restoring $gp
+     from the stack.  */
+  if (TARGET_CALL_SAVED_GP && !TARGET_EXPLICIT_RELOCS)
+    emit_insn (gen_blockage ());
+
+  mips_epilogue.cfa_restore_sp_offset = step2;
+  if (GENERATE_MIPS16E_SAVE_RESTORE && frame->mask != 0)
+    {
+      unsigned int regno, mask;
+      HOST_WIDE_INT offset;
+      rtx restore;
+
+      /* Generate the restore instruction.  */
+      mask = frame->mask;
+      restore = mips16e_build_save_restore (true, &mask, &offset, 0, step2);
+
+      /* Restore any other registers manually.  */
+      for (regno = GP_REG_FIRST; regno < GP_REG_LAST; regno++)
+ 	if (BITSET_P (mask, regno - GP_REG_FIRST))
+ 	  {
+ 	    offset -= UNITS_PER_WORD;
+ 	    mips_save_restore_reg (word_mode, regno, offset, mips_restore_reg);
+ 	  }
+
+      /* Restore the remaining registers and deallocate the final bit
+	 of the frame.  */
+      mips_frame_barrier ();
+      emit_insn (restore);
+      mips_epilogue_set_cfa (stack_pointer_rtx, 0);
+    }
+  else
+    {
+      /* Restore the registers.  */
+      mips_for_each_saved_acc (frame->total_size - step2, mips_restore_reg);
+      mips_for_each_saved_gpr_and_fpr (frame->total_size - step2,
+				       mips_restore_reg);
+
+      if (cfun->machine->interrupt_handler_p)
+	{
+	  HOST_WIDE_INT offset;
+	  rtx mem;
+
+	  offset = frame->cop0_sp_offset - (frame->total_size - step2);
+	  if (!cfun->machine->keep_interrupts_masked_p)
+	    {
+	      /* Restore the original EPC.  */
+	      mem = gen_frame_mem (word_mode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  offset));
+	      mips_emit_move (gen_rtx_REG (word_mode, K0_REG_NUM), mem);
+	      offset -= UNITS_PER_WORD;
+
+	      /* Move to COP0 EPC.  */
+	      emit_insn (gen_cop0_move (gen_rtx_REG (SImode, COP0_EPC_REG_NUM),
+					gen_rtx_REG (SImode, K0_REG_NUM)));
+	    }
+
+	  /* Restore the original Status.  */
+	  mem = gen_frame_mem (word_mode,
+			       plus_constant (Pmode, stack_pointer_rtx,
+					      offset));
+	  mips_emit_move (gen_rtx_REG (word_mode, K0_REG_NUM), mem);
+	  offset -= UNITS_PER_WORD;
+
+	  /* If we don't use shadow register set, we need to update SP.  */
+	  if (!cfun->machine->use_shadow_register_set_p)
+	    mips_deallocate_stack (stack_pointer_rtx, GEN_INT (step2), 0);
+	  else
+	    /* The choice of position is somewhat arbitrary in this case.  */
+	    mips_epilogue_emit_cfa_restores ();
+
+	  /* Move to COP0 Status.  */
+	  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, COP0_STATUS_REG_NUM),
+				    gen_rtx_REG (SImode, K0_REG_NUM)));
+	}
+      else if (TARGET_MICROMIPS
+	       && !crtl->calls_eh_return
+	       && !sibcall_p
+	       && step2 > 0
+	       && mips_unsigned_immediate_p (step2, 5, 2))
+	use_jraddiusp_p = true;
+      else
+	/* Deallocate the final bit of the frame.  */
+	mips_deallocate_stack (stack_pointer_rtx, GEN_INT (step2), 0);
+    }
+
+  if (!use_jraddiusp_p)
+    gcc_assert (!mips_epilogue.cfa_restores);
+
+  /* Add in the __builtin_eh_return stack adjustment.  We need to
+     use a temporary in MIPS16 code.  */
+  if (crtl->calls_eh_return)
+    {
+      if (TARGET_MIPS16)
+	{
+	  mips_emit_move (MIPS_EPILOGUE_TEMP (Pmode), stack_pointer_rtx);
+	  emit_insn (gen_add3_insn (MIPS_EPILOGUE_TEMP (Pmode),
+				    MIPS_EPILOGUE_TEMP (Pmode),
+				    EH_RETURN_STACKADJ_RTX));
+	  mips_emit_move (stack_pointer_rtx, MIPS_EPILOGUE_TEMP (Pmode));
+	}
+      else
+	emit_insn (gen_add3_insn (stack_pointer_rtx,
+				  stack_pointer_rtx,
+				  EH_RETURN_STACKADJ_RTX));
+    }
+
+  if (!sibcall_p)
+    {
+      mips_expand_before_return ();
+      if (cfun->machine->interrupt_handler_p)
+	{
+	  /* Interrupt handlers generate eret or deret.  */
+	  if (cfun->machine->use_debug_exception_return_p)
+	    emit_jump_insn (gen_mips_deret ());
+	  else
+	    emit_jump_insn (gen_mips_eret ());
+	}
+      else
+	{
+	  rtx pat;
+
+	  /* When generating MIPS16 code, the normal
+	     mips_for_each_saved_gpr_and_fpr path will restore the return
+	     address into $7 rather than $31.  */
+	  if (TARGET_MIPS16
+	      && !GENERATE_MIPS16E_SAVE_RESTORE
+	      && BITSET_P (frame->mask, RETURN_ADDR_REGNUM))
+	    {
+	      /* simple_returns cannot rely on values that are only available
+		 on paths through the epilogue (because return paths that do
+		 not pass through the epilogue may nevertheless reuse a
+		 simple_return that occurs at the end of the epilogue).
+		 Use a normal return here instead.  */
+	      rtx reg = gen_rtx_REG (Pmode, GP_REG_FIRST + 7);
+	      pat = gen_return_internal (reg);
+	    }
+	  else if (use_jraddiusp_p)
+	    pat = gen_jraddiusp (GEN_INT (step2));
+	  else
+	    {
+	      rtx reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+	      pat = gen_simple_return_internal (reg);
+	    }
+	  emit_jump_insn (pat);
+	  if (use_jraddiusp_p)
+	    mips_epilogue_set_cfa (stack_pointer_rtx, step2);
+	}
+    }
+
+  /* Search from the beginning to the first use of K0 or K1.  */
+  if (cfun->machine->interrupt_handler_p
+      && !cfun->machine->keep_interrupts_masked_p)
+    {
+      for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+	if (INSN_P (insn)
+	    && for_each_rtx (&PATTERN(insn), mips_kernel_reg_p, NULL))
+	  break;
+      gcc_assert (insn != NULL_RTX);
+      /* Insert disable interrupts before the first use of K0 or K1.  */
+      emit_insn_before (gen_mips_di (), insn);
+      emit_insn_before (gen_mips_ehb (), insn);
+    }
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+bool
+mips_can_use_return_insn (void)
+{
+  /* Interrupt handlers need to go through the epilogue.  */
+  if (cfun->machine->interrupt_handler_p)
+    return false;
+
+  if (!reload_completed)
+    return false;
+
+  if (crtl->profile)
+    return false;
+
+  /* In MIPS16 mode, a function that returns a floating-point value
+     needs to arrange to copy the return value into the floating-point
+     registers.  */
+  if (mips16_cfun_returns_in_fpr_p ())
+    return false;
+
+  return cfun->machine->frame.total_size == 0;
+}
+
+/* Return true if register REGNO can store a value of mode MODE.
+   The result of this function is cached in mips_hard_regno_mode_ok.  */
+
+static bool
+mips_hard_regno_mode_ok_p (unsigned int regno, enum machine_mode mode)
+{
+  unsigned int size;
+  enum mode_class mclass;
+
+  if (mode == CCV2mode)
+    return (ISA_HAS_8CC
+	    && ST_REG_P (regno)
+	    && (regno - ST_REG_FIRST) % 2 == 0);
+
+  if (mode == CCV4mode)
+    return (ISA_HAS_8CC
+	    && ST_REG_P (regno)
+	    && (regno - ST_REG_FIRST) % 4 == 0);
+
+  if (mode == CCmode)
+    return ISA_HAS_8CC ? ST_REG_P (regno) : regno == FPSW_REGNUM;
+
+  size = GET_MODE_SIZE (mode);
+  mclass = GET_MODE_CLASS (mode);
+
+  if (GP_REG_P (regno))
+    return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
+
+  if (FP_REG_P (regno)
+      && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0
+	  || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG)))
+    {
+      /* Allow 64-bit vector modes for Loongson-2E/2F.  */
+      if (TARGET_LOONGSON_VECTORS
+	  && (mode == V2SImode
+	      || mode == V4HImode
+	      || mode == V8QImode
+	      || mode == DImode))
+	return true;
+
+      if (mclass == MODE_FLOAT
+	  || mclass == MODE_COMPLEX_FLOAT
+	  || mclass == MODE_VECTOR_FLOAT)
+	return size <= UNITS_PER_FPVALUE;
+
+      /* Allow integer modes that fit into a single register.  We need
+	 to put integers into FPRs when using instructions like CVT
+	 and TRUNC.  There's no point allowing sizes smaller than a word,
+	 because the FPU has no appropriate load/store instructions.  */
+      if (mclass == MODE_INT)
+	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
+    }
+
+  if (ACC_REG_P (regno)
+      && (INTEGRAL_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode)))
+    {
+      if (MD_REG_P (regno))
+	{
+	  /* After a multiplication or division, clobbering HI makes
+	     the value of LO unpredictable, and vice versa.  This means
+	     that, for all interesting cases, HI and LO are effectively
+	     a single register.
+
+	     We model this by requiring that any value that uses HI
+	     also uses LO.  */
+	  if (size <= UNITS_PER_WORD * 2)
+	    return regno == (size <= UNITS_PER_WORD ? LO_REGNUM : MD_REG_FIRST);
+	}
+      else
+	{
+	  /* DSP accumulators do not have the same restrictions as
+	     HI and LO, so we can treat them as normal doubleword
+	     registers.  */
+	  if (size <= UNITS_PER_WORD)
+	    return true;
+
+	  if (size <= UNITS_PER_WORD * 2
+	      && ((regno - DSP_ACC_REG_FIRST) & 1) == 0)
+	    return true;
+	}
+    }
+
+  if (ALL_COP_REG_P (regno))
+    return mclass == MODE_INT && size <= UNITS_PER_WORD;
+
+  if (regno == GOT_VERSION_REGNUM)
+    return mode == SImode;
+
+  return false;
+}
+
+/* Implement HARD_REGNO_NREGS.  */
+
+unsigned int
+mips_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  if (ST_REG_P (regno))
+    /* The size of FP status registers is always 4, because they only hold
+       CCmode values, and CCmode is always considered to be 4 bytes wide.  */
+    return (GET_MODE_SIZE (mode) + 3) / 4;
+
+  if (FP_REG_P (regno))
+    return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+
+  /* All other registers are word-sized.  */
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Implement CLASS_MAX_NREGS, taking the maximum of the cases
+   in mips_hard_regno_nregs.  */
+
+int
+mips_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+{
+  int size;
+  HARD_REG_SET left;
+
+  size = 0x8000;
+  COPY_HARD_REG_SET (left, reg_class_contents[(int) rclass]);
+  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) ST_REGS]))
+    {
+      if (HARD_REGNO_MODE_OK (ST_REG_FIRST, mode))
+	size = MIN (size, 4);
+      AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) ST_REGS]);
+    }
+  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
+    {
+      if (HARD_REGNO_MODE_OK (FP_REG_FIRST, mode))
+	size = MIN (size, UNITS_PER_FPREG);
+      AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) FP_REGS]);
+    }
+  if (!hard_reg_set_empty_p (left))
+    size = MIN (size, UNITS_PER_WORD);
+  return (GET_MODE_SIZE (mode) + size - 1) / size;
+}
+
+/* Implement CANNOT_CHANGE_MODE_CLASS.  */
+
+bool
+mips_cannot_change_mode_class (enum machine_mode from,
+			       enum machine_mode to,
+			       enum reg_class rclass)
+{
+  /* Allow conversions between different Loongson integer vectors,
+     and between those vectors and DImode.  */
+  if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
+      && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
+    return false;
+
+  /* Otherwise, there are several problems with changing the modes of
+     values in floating-point registers:
+
+     - When a multi-word value is stored in paired floating-point
+       registers, the first register always holds the low word.  We
+       therefore can't allow FPRs to change between single-word and
+       multi-word modes on big-endian targets.
+
+     - GCC assumes that each word of a multiword register can be
+       accessed individually using SUBREGs.  This is not true for
+       floating-point registers if they are bigger than a word.
+
+     - Loading a 32-bit value into a 64-bit floating-point register
+       will not sign-extend the value, despite what LOAD_EXTEND_OP
+       says.  We can't allow FPRs to change from SImode to a wider
+       mode on 64-bit targets.
+
+     - If the FPU has already interpreted a value in one format, we
+       must not ask it to treat the value as having a different
+       format.
+
+     We therefore disallow all mode changes involving FPRs.  */
+
+  return reg_classes_intersect_p (FP_REGS, rclass);
+}
+
+/* Implement target hook small_register_classes_for_mode_p.  */
+
+static bool
+mips_small_register_classes_for_mode_p (enum machine_mode mode
+					ATTRIBUTE_UNUSED)
+{
+  return TARGET_MIPS16;
+}
+
+/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction.  */
+
+static bool
+mips_mode_ok_for_mov_fmt_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return TARGET_HARD_FLOAT;
+
+    case DFmode:
+      return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
+
+    case V2SFmode:
+      return TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement MODES_TIEABLE_P.  */
+
+bool
+mips_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  /* FPRs allow no mode punning, so it's not worth tying modes if we'd
+     prefer to put one of them in FPRs.  */
+  return (mode1 == mode2
+	  || (!mips_mode_ok_for_mov_fmt_p (mode1)
+	      && !mips_mode_ok_for_mov_fmt_p (mode2)));
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+mips_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (mips_dangerous_for_la25_p (x) && reg_class_subset_p (LEA_REGS, rclass))
+    return LEA_REGS;
+
+  if (reg_class_subset_p (FP_REGS, rclass)
+      && mips_mode_ok_for_mov_fmt_p (GET_MODE (x)))
+    return FP_REGS;
+
+  if (reg_class_subset_p (GR_REGS, rclass))
+    rclass = GR_REGS;
+
+  if (TARGET_MIPS16 && reg_class_subset_p (M16_REGS, rclass))
+    rclass = M16_REGS;
+
+  return rclass;
+}
+
+/* RCLASS is a class involved in a REGISTER_MOVE_COST calculation.
+   Return a "canonical" class to represent it in later calculations.  */
+
+static reg_class_t
+mips_canonicalize_move_class (reg_class_t rclass)
+{
+  /* All moves involving accumulator registers have the same cost.  */
+  if (reg_class_subset_p (rclass, ACC_REGS))
+    rclass = ACC_REGS;
+
+  /* Likewise promote subclasses of general registers to the most
+     interesting containing class.  */
+  if (TARGET_MIPS16 && reg_class_subset_p (rclass, M16_REGS))
+    rclass = M16_REGS;
+  else if (reg_class_subset_p (rclass, GENERAL_REGS))
+    rclass = GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Return the cost of moving a value of mode MODE from a register of
+   class FROM to a GPR.  Return 0 for classes that are unions of other
+   classes handled by this function.  */
+
+static int
+mips_move_to_gpr_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t from)
+{
+  switch (from)
+    {
+    case M16_REGS:
+    case GENERAL_REGS:
+      /* A MIPS16 MOVE instruction, or a non-MIPS16 MOVE macro.  */
+      return 2;
+
+    case ACC_REGS:
+      /* MFLO and MFHI.  */
+      return 6;
+
+    case FP_REGS:
+      /* MFC1, etc.  */
+      return 4;
+
+    case ST_REGS:
+      /* LUI followed by MOVF.  */
+      return 4;
+
+    case COP0_REGS:
+    case COP2_REGS:
+    case COP3_REGS:
+      /* This choice of value is historical.  */
+      return 5;
+
+    default:
+      return 0;
+    }
+}
+
+/* Return the cost of moving a value of mode MODE from a GPR to a
+   register of class TO.  Return 0 for classes that are unions of
+   other classes handled by this function.  */
+
+static int
+mips_move_from_gpr_cost (enum machine_mode mode, reg_class_t to)
+{
+  switch (to)
+    {
+    case M16_REGS:
+    case GENERAL_REGS:
+      /* A MIPS16 MOVE instruction, or a non-MIPS16 MOVE macro.  */
+      return 2;
+
+    case ACC_REGS:
+      /* MTLO and MTHI.  */
+      return 6;
+
+    case FP_REGS:
+      /* MTC1, etc.  */
+      return 4;
+
+    case ST_REGS:
+      /* A secondary reload through an FPR scratch.  */
+      return (mips_register_move_cost (mode, GENERAL_REGS, FP_REGS)
+	      + mips_register_move_cost (mode, FP_REGS, ST_REGS));
+
+    case COP0_REGS:
+    case COP2_REGS:
+    case COP3_REGS:
+      /* This choice of value is historical.  */
+      return 5;
+
+    default:
+      return 0;
+    }
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.  Return 0 for classes that are the
+   maximum of the move costs for subclasses; regclass will work out
+   the maximum for us.  */
+
+static int
+mips_register_move_cost (enum machine_mode mode,
+			 reg_class_t from, reg_class_t to)
+{
+  reg_class_t dregs;
+  int cost1, cost2;
+
+  from = mips_canonicalize_move_class (from);
+  to = mips_canonicalize_move_class (to);
+
+  /* Handle moves that can be done without using general-purpose registers.  */
+  if (from == FP_REGS)
+    {
+      if (to == FP_REGS && mips_mode_ok_for_mov_fmt_p (mode))
+	/* MOV.FMT.  */
+	return 4;
+      if (to == ST_REGS)
+	/* The sequence generated by mips_expand_fcc_reload.  */
+	return 8;
+    }
+
+  /* Handle cases in which only one class deviates from the ideal.  */
+  dregs = TARGET_MIPS16 ? M16_REGS : GENERAL_REGS;
+  if (from == dregs)
+    return mips_move_from_gpr_cost (mode, to);
+  if (to == dregs)
+    return mips_move_to_gpr_cost (mode, from);
+
+  /* Handles cases that require a GPR temporary.  */
+  cost1 = mips_move_to_gpr_cost (mode, from);
+  if (cost1 != 0)
+    {
+      cost2 = mips_move_from_gpr_cost (mode, to);
+      if (cost2 != 0)
+	return cost1 + cost2;
+    }
+
+  return 0;
+}
+
+/* Implement TARGET_MEMORY_MOVE_COST.  */
+
+static int
+mips_memory_move_cost (enum machine_mode mode, reg_class_t rclass, bool in)
+{
+  return (mips_cost->memory_latency
+	  + memory_move_secondary_cost (mode, rclass, in));
+} 
+
+/* Return the register class required for a secondary register when
+   copying between one of the registers in RCLASS and value X, which
+   has mode MODE.  X is the source of the move if IN_P, otherwise it
+   is the destination.  Return NO_REGS if no secondary register is
+   needed.  */
+
+enum reg_class
+mips_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode, rtx x, bool in_p)
+{
+  int regno;
+
+  /* If X is a constant that cannot be loaded into $25, it must be loaded
+     into some other GPR.  No other register class allows a direct move.  */
+  if (mips_dangerous_for_la25_p (x))
+    return reg_class_subset_p (rclass, LEA_REGS) ? NO_REGS : LEA_REGS;
+
+  regno = true_regnum (x);
+  if (TARGET_MIPS16)
+    {
+      /* In MIPS16 mode, every move must involve a member of M16_REGS.  */
+      if (!reg_class_subset_p (rclass, M16_REGS) && !M16_REG_P (regno))
+	return M16_REGS;
+
+      return NO_REGS;
+    }
+
+  /* Copying from accumulator registers to anywhere other than a general
+     register requires a temporary general register.  */
+  if (reg_class_subset_p (rclass, ACC_REGS))
+    return GP_REG_P (regno) ? NO_REGS : GR_REGS;
+  if (ACC_REG_P (regno))
+    return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+
+  /* We can only copy a value to a condition code register from a
+     floating-point register, and even then we require a scratch
+     floating-point register.  We can only copy a value out of a
+     condition-code register into a general register.  */
+  if (reg_class_subset_p (rclass, ST_REGS))
+    {
+      if (in_p)
+	return FP_REGS;
+      return GP_REG_P (regno) ? NO_REGS : GR_REGS;
+    }
+  if (ST_REG_P (regno))
+    {
+      if (!in_p)
+	return FP_REGS;
+      return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+    }
+
+  if (reg_class_subset_p (rclass, FP_REGS))
+    {
+      if (MEM_P (x)
+	  && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8))
+	/* In this case we can use lwc1, swc1, ldc1 or sdc1.  We'll use
+	   pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported.  */
+	return NO_REGS;
+
+      if (GP_REG_P (regno) || x == CONST0_RTX (mode))
+	/* In this case we can use mtc1, mfc1, dmtc1 or dmfc1.  */
+	return NO_REGS;
+
+      if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x))
+	/* We can force the constant to memory and use lwc1
+	   and ldc1.  As above, we will use pairs of lwc1s if
+	   ldc1 is not supported.  */
+	return NO_REGS;
+
+      if (FP_REG_P (regno) && mips_mode_ok_for_mov_fmt_p (mode))
+	/* In this case we can use mov.fmt.  */
+	return NO_REGS;
+
+      /* Otherwise, we need to reload through an integer register.  */
+      return GR_REGS;
+    }
+  if (FP_REG_P (regno))
+    return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_MODE_REP_EXTENDED.  */
+
+static int
+mips_mode_rep_extended (enum machine_mode mode, enum machine_mode mode_rep)
+{
+  /* On 64-bit targets, SImode register values are sign-extended to DImode.  */
+  if (TARGET_64BIT && mode == SImode && mode_rep == DImode)
+    return SIGN_EXTEND;
+
+  return UNKNOWN;
+}
+
+/* Implement TARGET_VALID_POINTER_MODE.  */
+
+static bool
+mips_valid_pointer_mode (enum machine_mode mode)
+{
+  return mode == SImode || (TARGET_64BIT && mode == DImode);
+}
+
+/* Implement TARGET_VECTOR_MODE_SUPPORTED_P.  */
+
+static bool
+mips_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V2SFmode:
+      return TARGET_PAIRED_SINGLE_FLOAT;
+
+    case V2HImode:
+    case V4QImode:
+    case V2HQmode:
+    case V2UHQmode:
+    case V2HAmode:
+    case V2UHAmode:
+    case V4QQmode:
+    case V4UQQmode:
+      return TARGET_DSP;
+
+    case V2SImode:
+    case V4HImode:
+    case V8QImode:
+      return TARGET_LOONGSON_VECTORS;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
+
+static bool
+mips_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (ALL_FIXED_POINT_MODE_P (mode)
+      && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
+    return true;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static enum machine_mode
+mips_preferred_simd_mode (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (TARGET_PAIRED_SINGLE_FLOAT
+      && mode == SFmode)
+    return V2SFmode;
+  return word_mode;
+}
+
+/* Implement TARGET_INIT_LIBFUNCS.  */
+
+static void
+mips_init_libfuncs (void)
+{
+  if (TARGET_FIX_VR4120)
+    {
+      /* Register the special divsi3 and modsi3 functions needed to work
+	 around VR4120 division errata.  */
+      set_optab_libfunc (sdiv_optab, SImode, "__vr4120_divsi3");
+      set_optab_libfunc (smod_optab, SImode, "__vr4120_modsi3");
+    }
+
+  if (TARGET_MIPS16 && TARGET_HARD_FLOAT_ABI)
+    {
+      /* Register the MIPS16 -mhard-float stubs.  */
+      set_optab_libfunc (add_optab, SFmode, "__mips16_addsf3");
+      set_optab_libfunc (sub_optab, SFmode, "__mips16_subsf3");
+      set_optab_libfunc (smul_optab, SFmode, "__mips16_mulsf3");
+      set_optab_libfunc (sdiv_optab, SFmode, "__mips16_divsf3");
+
+      set_optab_libfunc (eq_optab, SFmode, "__mips16_eqsf2");
+      set_optab_libfunc (ne_optab, SFmode, "__mips16_nesf2");
+      set_optab_libfunc (gt_optab, SFmode, "__mips16_gtsf2");
+      set_optab_libfunc (ge_optab, SFmode, "__mips16_gesf2");
+      set_optab_libfunc (lt_optab, SFmode, "__mips16_ltsf2");
+      set_optab_libfunc (le_optab, SFmode, "__mips16_lesf2");
+      set_optab_libfunc (unord_optab, SFmode, "__mips16_unordsf2");
+
+      set_conv_libfunc (sfix_optab, SImode, SFmode, "__mips16_fix_truncsfsi");
+      set_conv_libfunc (sfloat_optab, SFmode, SImode, "__mips16_floatsisf");
+      set_conv_libfunc (ufloat_optab, SFmode, SImode, "__mips16_floatunsisf");
+
+      if (TARGET_DOUBLE_FLOAT)
+	{
+	  set_optab_libfunc (add_optab, DFmode, "__mips16_adddf3");
+	  set_optab_libfunc (sub_optab, DFmode, "__mips16_subdf3");
+	  set_optab_libfunc (smul_optab, DFmode, "__mips16_muldf3");
+	  set_optab_libfunc (sdiv_optab, DFmode, "__mips16_divdf3");
+
+	  set_optab_libfunc (eq_optab, DFmode, "__mips16_eqdf2");
+	  set_optab_libfunc (ne_optab, DFmode, "__mips16_nedf2");
+	  set_optab_libfunc (gt_optab, DFmode, "__mips16_gtdf2");
+	  set_optab_libfunc (ge_optab, DFmode, "__mips16_gedf2");
+	  set_optab_libfunc (lt_optab, DFmode, "__mips16_ltdf2");
+	  set_optab_libfunc (le_optab, DFmode, "__mips16_ledf2");
+	  set_optab_libfunc (unord_optab, DFmode, "__mips16_unorddf2");
+
+	  set_conv_libfunc (sext_optab, DFmode, SFmode,
+			    "__mips16_extendsfdf2");
+	  set_conv_libfunc (trunc_optab, SFmode, DFmode,
+			    "__mips16_truncdfsf2");
+	  set_conv_libfunc (sfix_optab, SImode, DFmode,
+			    "__mips16_fix_truncdfsi");
+	  set_conv_libfunc (sfloat_optab, DFmode, SImode,
+			    "__mips16_floatsidf");
+	  set_conv_libfunc (ufloat_optab, DFmode, SImode,
+			    "__mips16_floatunsidf");
+	}
+    }
+
+  /* The MIPS16 ISA does not have an encoding for "sync", so we rely
+     on an external non-MIPS16 routine to implement __sync_synchronize.
+     Similarly for the rest of the ll/sc libfuncs.  */
+  if (TARGET_MIPS16)
+    {
+      synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+      init_sync_libfuncs (UNITS_PER_WORD);
+    }
+}
+
+/* Build up a multi-insn sequence that loads label TARGET into $AT.  */
+
+static void
+mips_process_load_label (rtx target)
+{
+  rtx base, gp, intop;
+  HOST_WIDE_INT offset;
+
+  mips_multi_start ();
+  switch (mips_abi)
+    {
+    case ABI_N32:
+      mips_multi_add_insn ("lw\t%@,%%got_page(%0)(%+)", target, 0);
+      mips_multi_add_insn ("addiu\t%@,%@,%%got_ofst(%0)", target, 0);
+      break;
+
+    case ABI_64:
+      mips_multi_add_insn ("ld\t%@,%%got_page(%0)(%+)", target, 0);
+      mips_multi_add_insn ("daddiu\t%@,%@,%%got_ofst(%0)", target, 0);
+      break;
+
+    default:
+      gp = pic_offset_table_rtx;
+      if (mips_cfun_has_cprestore_slot_p ())
+	{
+	  gp = gen_rtx_REG (Pmode, AT_REGNUM);
+	  mips_get_cprestore_base_and_offset (&base, &offset, true);
+	  if (!SMALL_OPERAND (offset))
+	    {
+	      intop = GEN_INT (CONST_HIGH_PART (offset));
+	      mips_multi_add_insn ("lui\t%0,%1", gp, intop, 0);
+	      mips_multi_add_insn ("addu\t%0,%0,%1", gp, base, 0);
+
+	      base = gp;
+	      offset = CONST_LOW_PART (offset);
+	    }
+	  intop = GEN_INT (offset);
+	  if (ISA_HAS_LOAD_DELAY)
+	    mips_multi_add_insn ("lw\t%0,%1(%2)%#", gp, intop, base, 0);
+	  else
+	    mips_multi_add_insn ("lw\t%0,%1(%2)", gp, intop, base, 0);
+	}
+      if (ISA_HAS_LOAD_DELAY)
+	mips_multi_add_insn ("lw\t%@,%%got(%0)(%1)%#", target, gp, 0);
+      else
+	mips_multi_add_insn ("lw\t%@,%%got(%0)(%1)", target, gp, 0);
+      mips_multi_add_insn ("addiu\t%@,%@,%%lo(%0)", target, 0);
+      break;
+    }
+}
+
+/* Return the number of instructions needed to load a label into $AT.  */
+
+static unsigned int
+mips_load_label_num_insns (void)
+{
+  if (cfun->machine->load_label_num_insns == 0)
+    {
+      mips_process_load_label (pc_rtx);
+      cfun->machine->load_label_num_insns = mips_multi_num_insns;
+    }
+  return cfun->machine->load_label_num_insns;
+}
+
+/* Emit an asm sequence to start a noat block and load the address
+   of a label into $1.  */
+
+void
+mips_output_load_label (rtx target)
+{
+  mips_push_asm_switch (&mips_noat);
+  if (TARGET_EXPLICIT_RELOCS)
+    {
+      mips_process_load_label (target);
+      mips_multi_write ();
+    }
+  else
+    {
+      if (Pmode == DImode)
+	output_asm_insn ("dla\t%@,%0", &target);
+      else
+	output_asm_insn ("la\t%@,%0", &target);
+    }
+}
+
+/* Return the length of INSN.  LENGTH is the initial length computed by
+   attributes in the machine-description file.  */
+
+int
+mips_adjust_insn_length (rtx insn, int length)
+{
+  /* mips.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length
+     of a PIC long-branch sequence.  Substitute the correct value.  */
+  if (length == MAX_PIC_BRANCH_LENGTH
+      && JUMP_P (insn)
+      && INSN_CODE (insn) >= 0
+      && get_attr_type (insn) == TYPE_BRANCH)
+    {
+      /* Add the branch-over instruction and its delay slot, if this
+	 is a conditional branch.  */
+      length = simplejump_p (insn) ? 0 : 8;
+
+      /* Add the size of a load into $AT.  */
+      length += BASE_INSN_LENGTH * mips_load_label_num_insns ();
+
+      /* Add the length of an indirect jump, ignoring the delay slot.  */
+      length += TARGET_COMPRESSION ? 2 : 4;
+    }
+
+  /* A unconditional jump has an unfilled delay slot if it is not part
+     of a sequence.  A conditional jump normally has a delay slot, but
+     does not on MIPS16.  */
+  if (CALL_P (insn) || (TARGET_MIPS16 ? simplejump_p (insn) : JUMP_P (insn)))
+    length += TARGET_MIPS16 ? 2 : 4;
+
+  /* See how many nops might be needed to avoid hardware hazards.  */
+  if (!cfun->machine->ignore_hazard_length_p
+      && INSN_P (insn)
+      && INSN_CODE (insn) >= 0)
+    switch (get_attr_hazard (insn))
+      {
+      case HAZARD_NONE:
+	break;
+
+      case HAZARD_DELAY:
+	length += NOP_INSN_LENGTH;
+	break;
+
+      case HAZARD_HILO:
+	length += NOP_INSN_LENGTH * 2;
+	break;
+      }
+
+  return length;
+}
+
+/* Return the assembly code for INSN, which has the operands given by
+   OPERANDS, and which branches to OPERANDS[0] if some condition is true.
+   BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
+   is in range of a direct branch.  BRANCH_IF_FALSE is an inverted
+   version of BRANCH_IF_TRUE.  */
+
+const char *
+mips_output_conditional_branch (rtx insn, rtx *operands,
+				const char *branch_if_true,
+				const char *branch_if_false)
+{
+  unsigned int length;
+  rtx taken, not_taken;
+
+  gcc_assert (LABEL_P (operands[0]));
+
+  length = get_attr_length (insn);
+  if (length <= 8)
+    {
+      /* Just a simple conditional branch.  */
+      mips_branch_likely = (final_sequence && INSN_ANNULLED_BRANCH_P (insn));
+      return branch_if_true;
+    }
+
+  /* Generate a reversed branch around a direct jump.  This fallback does
+     not use branch-likely instructions.  */
+  mips_branch_likely = false;
+  not_taken = gen_label_rtx ();
+  taken = operands[0];
+
+  /* Generate the reversed branch to NOT_TAKEN.  */
+  operands[0] = not_taken;
+  output_asm_insn (branch_if_false, operands);
+
+  /* If INSN has a delay slot, we must provide delay slots for both the
+     branch to NOT_TAKEN and the conditional jump.  We must also ensure
+     that INSN's delay slot is executed in the appropriate cases.  */
+  if (final_sequence)
+    {
+      /* This first delay slot will always be executed, so use INSN's
+	 delay slot if is not annulled.  */
+      if (!INSN_ANNULLED_BRANCH_P (insn))
+	{
+	  final_scan_insn (XVECEXP (final_sequence, 0, 1),
+			   asm_out_file, optimize, 1, NULL);
+	  INSN_DELETED_P (XVECEXP (final_sequence, 0, 1)) = 1;
+	}
+      else
+	output_asm_insn ("nop", 0);
+      fprintf (asm_out_file, "\n");
+    }
+
+  /* Output the unconditional branch to TAKEN.  */
+  if (TARGET_ABSOLUTE_JUMPS)
+    output_asm_insn (MIPS_ABSOLUTE_JUMP ("j\t%0%/"), &taken);
+  else
+    {
+      mips_output_load_label (taken);
+      output_asm_insn ("jr\t%@%]%/", 0);
+    }
+
+  /* Now deal with its delay slot; see above.  */
+  if (final_sequence)
+    {
+      /* This delay slot will only be executed if the branch is taken.
+	 Use INSN's delay slot if is annulled.  */
+      if (INSN_ANNULLED_BRANCH_P (insn))
+	{
+	  final_scan_insn (XVECEXP (final_sequence, 0, 1),
+			   asm_out_file, optimize, 1, NULL);
+	  INSN_DELETED_P (XVECEXP (final_sequence, 0, 1)) = 1;
+	}
+      else
+	output_asm_insn ("nop", 0);
+      fprintf (asm_out_file, "\n");
+    }
+
+  /* Output NOT_TAKEN.  */
+  targetm.asm_out.internal_label (asm_out_file, "L",
+				  CODE_LABEL_NUMBER (not_taken));
+  return "";
+}
+
+/* Return the assembly code for INSN, which branches to OPERANDS[0]
+   if some ordering condition is true.  The condition is given by
+   OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
+   OPERANDS[1].  OPERANDS[2] is the comparison's first operand;
+   its second is always zero.  */
+
+const char *
+mips_output_order_conditional_branch (rtx insn, rtx *operands, bool inverted_p)
+{
+  const char *branch[2];
+
+  /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
+     Make BRANCH[0] branch on the inverse condition.  */
+  switch (GET_CODE (operands[1]))
+    {
+      /* These cases are equivalent to comparisons against zero.  */
+    case LEU:
+      inverted_p = !inverted_p;
+      /* Fall through.  */
+    case GTU:
+      branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0");
+      branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0");
+      break;
+
+      /* These cases are always true or always false.  */
+    case LTU:
+      inverted_p = !inverted_p;
+      /* Fall through.  */
+    case GEU:
+      branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0");
+      branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0");
+      break;
+
+    default:
+      branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0");
+      branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0");
+      break;
+    }
+  return mips_output_conditional_branch (insn, operands, branch[1], branch[0]);
+}
+
+/* Start a block of code that needs access to the LL, SC and SYNC
+   instructions.  */
+
+static void
+mips_start_ll_sc_sync_block (void)
+{
+  if (!ISA_HAS_LL_SC)
+    {
+      output_asm_insn (".set\tpush", 0);
+      if (TARGET_64BIT)
+	output_asm_insn (".set\tmips3", 0);
+      else
+	output_asm_insn (".set\tmips2", 0);
+    }
+}
+
+/* End a block started by mips_start_ll_sc_sync_block.  */
+
+static void
+mips_end_ll_sc_sync_block (void)
+{
+  if (!ISA_HAS_LL_SC)
+    output_asm_insn (".set\tpop", 0);
+}
+
+/* Output and/or return the asm template for a sync instruction.  */
+
+const char *
+mips_output_sync (void)
+{
+  mips_start_ll_sc_sync_block ();
+  output_asm_insn ("sync", 0);
+  mips_end_ll_sc_sync_block ();
+  return "";
+}
+
+/* Return the asm template associated with sync_insn1 value TYPE.
+   IS_64BIT_P is true if we want a 64-bit rather than 32-bit operation.  */
+
+static const char *
+mips_sync_insn1_template (enum attr_sync_insn1 type, bool is_64bit_p)
+{
+  switch (type)
+    {
+    case SYNC_INSN1_MOVE:
+      return "move\t%0,%z2";
+    case SYNC_INSN1_LI:
+      return "li\t%0,%2";
+    case SYNC_INSN1_ADDU:
+      return is_64bit_p ? "daddu\t%0,%1,%z2" : "addu\t%0,%1,%z2";
+    case SYNC_INSN1_ADDIU:
+      return is_64bit_p ? "daddiu\t%0,%1,%2" : "addiu\t%0,%1,%2";
+    case SYNC_INSN1_SUBU:
+      return is_64bit_p ? "dsubu\t%0,%1,%z2" : "subu\t%0,%1,%z2";
+    case SYNC_INSN1_AND:
+      return "and\t%0,%1,%z2";
+    case SYNC_INSN1_ANDI:
+      return "andi\t%0,%1,%2";
+    case SYNC_INSN1_OR:
+      return "or\t%0,%1,%z2";
+    case SYNC_INSN1_ORI:
+      return "ori\t%0,%1,%2";
+    case SYNC_INSN1_XOR:
+      return "xor\t%0,%1,%z2";
+    case SYNC_INSN1_XORI:
+      return "xori\t%0,%1,%2";
+    }
+  gcc_unreachable ();
+}
+
+/* Return the asm template associated with sync_insn2 value TYPE.  */
+
+static const char *
+mips_sync_insn2_template (enum attr_sync_insn2 type)
+{
+  switch (type)
+    {
+    case SYNC_INSN2_NOP:
+      gcc_unreachable ();
+    case SYNC_INSN2_AND:
+      return "and\t%0,%1,%z2";
+    case SYNC_INSN2_XOR:
+      return "xor\t%0,%1,%z2";
+    case SYNC_INSN2_NOT:
+      return "nor\t%0,%1,%.";
+    }
+  gcc_unreachable ();
+}
+
+/* OPERANDS are the operands to a sync loop instruction and INDEX is
+   the value of the one of the sync_* attributes.  Return the operand
+   referred to by the attribute, or DEFAULT_VALUE if the insn doesn't
+   have the associated attribute.  */
+
+static rtx
+mips_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+  if (index > 0)
+    default_value = operands[index - 1];
+  return default_value;
+}
+
+/* INSN is a sync loop with operands OPERANDS.  Build up a multi-insn
+   sequence for it.  */
+
+static void
+mips_process_sync_loop (rtx insn, rtx *operands)
+{
+  rtx at, mem, oldval, newval, inclusive_mask, exclusive_mask;
+  rtx required_oldval, insn1_op2, tmp1, tmp2, tmp3, cmp;
+  unsigned int tmp3_insn;
+  enum attr_sync_insn1 insn1;
+  enum attr_sync_insn2 insn2;
+  bool is_64bit_p;
+  int memmodel_attr;
+  enum memmodel model;
+
+  /* Read an operand from the sync_WHAT attribute and store it in
+     variable WHAT.  DEFAULT is the default value if no attribute
+     is specified.  */
+#define READ_OPERAND(WHAT, DEFAULT) \
+  WHAT = mips_get_sync_operand (operands, (int) get_attr_sync_##WHAT (insn), \
+  				DEFAULT)
+
+  /* Read the memory.  */
+  READ_OPERAND (mem, 0);
+  gcc_assert (mem);
+  is_64bit_p = (GET_MODE_BITSIZE (GET_MODE (mem)) == 64);
+
+  /* Read the other attributes.  */
+  at = gen_rtx_REG (GET_MODE (mem), AT_REGNUM);
+  READ_OPERAND (oldval, at);
+  READ_OPERAND (cmp, 0);
+  READ_OPERAND (newval, at);
+  READ_OPERAND (inclusive_mask, 0);
+  READ_OPERAND (exclusive_mask, 0);
+  READ_OPERAND (required_oldval, 0);
+  READ_OPERAND (insn1_op2, 0);
+  insn1 = get_attr_sync_insn1 (insn);
+  insn2 = get_attr_sync_insn2 (insn);
+
+  /* Don't bother setting CMP result that is never used.  */
+  if (cmp && find_reg_note (insn, REG_UNUSED, cmp))
+    cmp = 0;
+
+  memmodel_attr = get_attr_sync_memmodel (insn);
+  switch (memmodel_attr)
+    {
+    case 10:
+      model = MEMMODEL_ACQ_REL;
+      break;
+    case 11:
+      model = MEMMODEL_ACQUIRE;
+      break;
+    default:
+      model = (enum memmodel) INTVAL (operands[memmodel_attr]);
+    }
+
+  mips_multi_start ();
+
+  /* Output the release side of the memory barrier.  */
+  if (need_atomic_barrier_p (model, true))
+    {
+      if (required_oldval == 0 && TARGET_OCTEON)
+	{
+	  /* Octeon doesn't reorder reads, so a full barrier can be
+	     created by using SYNCW to order writes combined with the
+	     write from the following SC.  When the SC successfully
+	     completes, we know that all preceding writes are also
+	     committed to the coherent memory system.  It is possible
+	     for a single SYNCW to fail, but a pair of them will never
+	     fail, so we use two.  */
+	  mips_multi_add_insn ("syncw", NULL);
+	  mips_multi_add_insn ("syncw", NULL);
+	}
+      else
+	mips_multi_add_insn ("sync", NULL);
+    }
+
+  /* Output the branch-back label.  */
+  mips_multi_add_label ("1:");
+
+  /* OLDVAL = *MEM.  */
+  mips_multi_add_insn (is_64bit_p ? "lld\t%0,%1" : "ll\t%0,%1",
+		       oldval, mem, NULL);
+
+  /* if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2.  */
+  if (required_oldval)
+    {
+      if (inclusive_mask == 0)
+	tmp1 = oldval;
+      else
+	{
+	  gcc_assert (oldval != at);
+	  mips_multi_add_insn ("and\t%0,%1,%2",
+			       at, oldval, inclusive_mask, NULL);
+	  tmp1 = at;
+	}
+      mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
+
+      /* CMP = 0 [delay slot].  */
+      if (cmp)
+        mips_multi_add_insn ("li\t%0,0", cmp, NULL);
+    }
+
+  /* $TMP1 = OLDVAL & EXCLUSIVE_MASK.  */
+  if (exclusive_mask == 0)
+    tmp1 = const0_rtx;
+  else
+    {
+      gcc_assert (oldval != at);
+      mips_multi_add_insn ("and\t%0,%1,%z2",
+			   at, oldval, exclusive_mask, NULL);
+      tmp1 = at;
+    }
+
+  /* $TMP2 = INSN1 (OLDVAL, INSN1_OP2).
+
+     We can ignore moves if $TMP4 != INSN1_OP2, since we'll still emit
+     at least one instruction in that case.  */
+  if (insn1 == SYNC_INSN1_MOVE
+      && (tmp1 != const0_rtx || insn2 != SYNC_INSN2_NOP))
+    tmp2 = insn1_op2;
+  else
+    {
+      mips_multi_add_insn (mips_sync_insn1_template (insn1, is_64bit_p),
+			   newval, oldval, insn1_op2, NULL);
+      tmp2 = newval;
+    }
+
+  /* $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK).  */
+  if (insn2 == SYNC_INSN2_NOP)
+    tmp3 = tmp2;
+  else
+    {
+      mips_multi_add_insn (mips_sync_insn2_template (insn2),
+			   newval, tmp2, inclusive_mask, NULL);
+      tmp3 = newval;
+    }
+  tmp3_insn = mips_multi_last_index ();
+
+  /* $AT = $TMP1 | $TMP3.  */
+  if (tmp1 == const0_rtx || tmp3 == const0_rtx)
+    {
+      mips_multi_set_operand (tmp3_insn, 0, at);
+      tmp3 = at;
+    }
+  else
+    {
+      gcc_assert (tmp1 != tmp3);
+      mips_multi_add_insn ("or\t%0,%1,%2", at, tmp1, tmp3, NULL);
+    }
+
+  /* if (!commit (*MEM = $AT)) goto 1.
+
+     This will sometimes be a delayed branch; see the write code below
+     for details.  */
+  mips_multi_add_insn (is_64bit_p ? "scd\t%0,%1" : "sc\t%0,%1", at, mem, NULL);
+  mips_multi_add_insn ("beq%?\t%0,%.,1b", at, NULL);
+
+  /* if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot].  */
+  if (insn1 != SYNC_INSN1_MOVE && insn1 != SYNC_INSN1_LI && tmp3 != newval)
+    {
+      mips_multi_copy_insn (tmp3_insn);
+      mips_multi_set_operand (mips_multi_last_index (), 0, newval);
+    }
+  else if (!(required_oldval && cmp))
+    mips_multi_add_insn ("nop", NULL);
+
+  /* CMP = 1 -- either standalone or in a delay slot.  */
+  if (required_oldval && cmp)
+    mips_multi_add_insn ("li\t%0,1", cmp, NULL);
+
+  /* Output the acquire side of the memory barrier.  */
+  if (TARGET_SYNC_AFTER_SC && need_atomic_barrier_p (model, false))
+    mips_multi_add_insn ("sync", NULL);
+
+  /* Output the exit label, if needed.  */
+  if (required_oldval)
+    mips_multi_add_label ("2:");
+
+#undef READ_OPERAND
+}
+
+/* Output and/or return the asm template for sync loop INSN, which has
+   the operands given by OPERANDS.  */
+
+const char *
+mips_output_sync_loop (rtx insn, rtx *operands)
+{
+  mips_process_sync_loop (insn, operands);
+
+  /* Use branch-likely instructions to work around the LL/SC R10000
+     errata.  */
+  mips_branch_likely = TARGET_FIX_R10000;
+
+  mips_push_asm_switch (&mips_noreorder);
+  mips_push_asm_switch (&mips_nomacro);
+  mips_push_asm_switch (&mips_noat);
+  mips_start_ll_sc_sync_block ();
+
+  mips_multi_write ();
+
+  mips_end_ll_sc_sync_block ();
+  mips_pop_asm_switch (&mips_noat);
+  mips_pop_asm_switch (&mips_nomacro);
+  mips_pop_asm_switch (&mips_noreorder);
+
+  return "";
+}
+
+/* Return the number of individual instructions in sync loop INSN,
+   which has the operands given by OPERANDS.  */
+
+unsigned int
+mips_sync_loop_insns (rtx insn, rtx *operands)
+{
+  mips_process_sync_loop (insn, operands);
+  return mips_multi_num_insns;
+}
+
+/* Return the assembly code for DIV or DDIV instruction DIVISION, which has
+   the operands given by OPERANDS.  Add in a divide-by-zero check if needed.
+
+   When working around R4000 and R4400 errata, we need to make sure that
+   the division is not immediately followed by a shift[1][2].  We also
+   need to stop the division from being put into a branch delay slot[3].
+   The easiest way to avoid both problems is to add a nop after the
+   division.  When a divide-by-zero check is needed, this nop can be
+   used to fill the branch delay slot.
+
+   [1] If a double-word or a variable shift executes immediately
+       after starting an integer division, the shift may give an
+       incorrect result.  See quotations of errata #16 and #28 from
+       "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+       in mips.md for details.
+
+   [2] A similar bug to [1] exists for all revisions of the
+       R4000 and the R4400 when run in an MC configuration.
+       From "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0":
+
+       "19. In this following sequence:
+
+		    ddiv		(or ddivu or div or divu)
+		    dsll32		(or dsrl32, dsra32)
+
+	    if an MPT stall occurs, while the divide is slipping the cpu
+	    pipeline, then the following double shift would end up with an
+	    incorrect result.
+
+	    Workaround: The compiler needs to avoid generating any
+	    sequence with divide followed by extended double shift."
+
+       This erratum is also present in "MIPS R4400MC Errata, Processor
+       Revision 1.0" and "MIPS R4400MC Errata, Processor Revision 2.0
+       & 3.0" as errata #10 and #4, respectively.
+
+   [3] From "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+       (also valid for MIPS R4000MC processors):
+
+       "52. R4000SC: This bug does not apply for the R4000PC.
+
+	    There are two flavors of this bug:
+
+	    1) If the instruction just after divide takes an RF exception
+	       (tlb-refill, tlb-invalid) and gets an instruction cache
+	       miss (both primary and secondary) and the line which is
+	       currently in secondary cache at this index had the first
+	       data word, where the bits 5..2 are set, then R4000 would
+	       get a wrong result for the div.
+
+	    ##1
+		    nop
+		    div	r8, r9
+		    -------------------		# end-of page. -tlb-refill
+		    nop
+	    ##2
+		    nop
+		    div	r8, r9
+		    -------------------		# end-of page. -tlb-invalid
+		    nop
+
+	    2) If the divide is in the taken branch delay slot, where the
+	       target takes RF exception and gets an I-cache miss for the
+	       exception vector or where I-cache miss occurs for the
+	       target address, under the above mentioned scenarios, the
+	       div would get wrong results.
+
+	    ##1
+		    j	r2		# to next page mapped or unmapped
+		    div	r8,r9		# this bug would be there as long
+					# as there is an ICache miss and
+		    nop			# the "data pattern" is present
+
+	    ##2
+		    beq	r0, r0, NextPage	# to Next page
+		    div	r8,r9
+		    nop
+
+	    This bug is present for div, divu, ddiv, and ddivu
+	    instructions.
+
+	    Workaround: For item 1), OS could make sure that the next page
+	    after the divide instruction is also mapped.  For item 2), the
+	    compiler could make sure that the divide instruction is not in
+	    the branch delay slot."
+
+       These processors have PRId values of 0x00004220 and 0x00004300 for
+       the R4000 and 0x00004400, 0x00004500 and 0x00004600 for the R4400.  */
+
+const char *
+mips_output_division (const char *division, rtx *operands)
+{
+  const char *s;
+
+  s = division;
+  if (TARGET_FIX_R4000 || TARGET_FIX_R4400)
+    {
+      output_asm_insn (s, operands);
+      s = "nop";
+    }
+  if (TARGET_CHECK_ZERO_DIV)
+    {
+      if (TARGET_MIPS16)
+	{
+	  output_asm_insn (s, operands);
+	  s = "bnez\t%2,1f\n\tbreak\t7\n1:";
+	}
+      else if (GENERATE_DIVIDE_TRAPS)
+	{
+	  /* Avoid long replay penalty on load miss by putting the trap before
+	     the divide.  */
+	  if (TUNE_74K)
+	    output_asm_insn ("teq\t%2,%.,7", operands);
+	  else
+	    {
+	      output_asm_insn (s, operands);
+	      s = "teq\t%2,%.,7";
+	    }
+	}
+      else
+	{
+	  output_asm_insn ("%(bne\t%2,%.,1f", operands);
+	  output_asm_insn (s, operands);
+	  s = "break\t7%)\n1:";
+	}
+    }
+  return s;
+}
+
+/* Return true if IN_INSN is a multiply-add or multiply-subtract
+   instruction and if OUT_INSN assigns to the accumulator operand.  */
+
+bool
+mips_linked_madd_p (rtx out_insn, rtx in_insn)
+{
+  enum attr_accum_in accum_in;
+  int accum_in_opnum;
+  rtx accum_in_op;
+
+  if (recog_memoized (in_insn) < 0)
+    return false;
+
+  accum_in = get_attr_accum_in (in_insn);
+  if (accum_in == ACCUM_IN_NONE)
+    return false;
+
+  accum_in_opnum = accum_in - ACCUM_IN_0;
+
+  extract_insn (in_insn);
+  gcc_assert (accum_in_opnum < recog_data.n_operands);
+  accum_in_op = recog_data.operand[accum_in_opnum];
+
+  return reg_set_p (accum_in_op, out_insn);
+}
+
+/* True if the dependency between OUT_INSN and IN_INSN is on the store
+   data rather than the address.  We need this because the cprestore
+   pattern is type "store", but is defined using an UNSPEC_VOLATILE,
+   which causes the default routine to abort.  We just return false
+   for that case.  */
+
+bool
+mips_store_data_bypass_p (rtx out_insn, rtx in_insn)
+{
+  if (GET_CODE (PATTERN (in_insn)) == UNSPEC_VOLATILE)
+    return false;
+
+  return !store_data_bypass_p (out_insn, in_insn);
+}
+
+
+/* Variables and flags used in scheduler hooks when tuning for
+   Loongson 2E/2F.  */
+static struct
+{
+  /* Variables to support Loongson 2E/2F round-robin [F]ALU1/2 dispatch
+     strategy.  */
+
+  /* If true, then next ALU1/2 instruction will go to ALU1.  */
+  bool alu1_turn_p;
+
+  /* If true, then next FALU1/2 unstruction will go to FALU1.  */
+  bool falu1_turn_p;
+
+  /* Codes to query if [f]alu{1,2}_core units are subscribed or not.  */
+  int alu1_core_unit_code;
+  int alu2_core_unit_code;
+  int falu1_core_unit_code;
+  int falu2_core_unit_code;
+
+  /* True if current cycle has a multi instruction.
+     This flag is used in mips_ls2_dfa_post_advance_cycle.  */
+  bool cycle_has_multi_p;
+
+  /* Instructions to subscribe ls2_[f]alu{1,2}_turn_enabled units.
+     These are used in mips_ls2_dfa_post_advance_cycle to initialize
+     DFA state.
+     E.g., when alu1_turn_enabled_insn is issued it makes next ALU1/2
+     instruction to go ALU1.  */
+  rtx alu1_turn_enabled_insn;
+  rtx alu2_turn_enabled_insn;
+  rtx falu1_turn_enabled_insn;
+  rtx falu2_turn_enabled_insn;
+} mips_ls2;
+
+/* Implement TARGET_SCHED_ADJUST_COST.  We assume that anti and output
+   dependencies have no cost, except on the 20Kc where output-dependence
+   is treated like input-dependence.  */
+
+static int
+mips_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link,
+		  rtx dep ATTRIBUTE_UNUSED, int cost)
+{
+  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
+      && TUNE_20KC)
+    return cost;
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+  return cost;
+}
+
+/* Return the number of instructions that can be issued per cycle.  */
+
+static int
+mips_issue_rate (void)
+{
+  switch (mips_tune)
+    {
+    case PROCESSOR_74KC:
+    case PROCESSOR_74KF2_1:
+    case PROCESSOR_74KF1_1:
+    case PROCESSOR_74KF3_2:
+      /* The 74k is not strictly quad-issue cpu, but can be seen as one
+	 by the scheduler.  It can issue 1 ALU, 1 AGEN and 2 FPU insns,
+	 but in reality only a maximum of 3 insns can be issued as
+	 floating-point loads and stores also require a slot in the
+	 AGEN pipe.  */
+    case PROCESSOR_R10000:
+      /* All R10K Processors are quad-issue (being the first MIPS
+         processors to support this feature). */
+      return 4;
+
+    case PROCESSOR_20KC:
+    case PROCESSOR_R4130:
+    case PROCESSOR_R5400:
+    case PROCESSOR_R5500:
+    case PROCESSOR_R5900:
+    case PROCESSOR_R7000:
+    case PROCESSOR_R9000:
+    case PROCESSOR_OCTEON:
+    case PROCESSOR_OCTEON2:
+      return 2;
+
+    case PROCESSOR_SB1:
+    case PROCESSOR_SB1A:
+      /* This is actually 4, but we get better performance if we claim 3.
+	 This is partly because of unwanted speculative code motion with the
+	 larger number, and partly because in most common cases we can't
+	 reach the theoretical max of 4.  */
+      return 3;
+
+    case PROCESSOR_LOONGSON_2E:
+    case PROCESSOR_LOONGSON_2F:
+    case PROCESSOR_LOONGSON_3A:
+      return 4;
+
+    case PROCESSOR_XLP:
+      return (reload_completed ? 4 : 3);
+
+    default:
+      return 1;
+    }
+}
+
+/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook for Loongson2.  */
+
+static void
+mips_ls2_init_dfa_post_cycle_insn (void)
+{
+  start_sequence ();
+  emit_insn (gen_ls2_alu1_turn_enabled_insn ());
+  mips_ls2.alu1_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  emit_insn (gen_ls2_alu2_turn_enabled_insn ());
+  mips_ls2.alu2_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  emit_insn (gen_ls2_falu1_turn_enabled_insn ());
+  mips_ls2.falu1_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  emit_insn (gen_ls2_falu2_turn_enabled_insn ());
+  mips_ls2.falu2_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  mips_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core");
+  mips_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core");
+  mips_ls2.falu1_core_unit_code = get_cpu_unit_code ("ls2_falu1_core");
+  mips_ls2.falu2_core_unit_code = get_cpu_unit_code ("ls2_falu2_core");
+}
+
+/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook.
+   Init data used in mips_dfa_post_advance_cycle.  */
+
+static void
+mips_init_dfa_post_cycle_insn (void)
+{
+  if (TUNE_LOONGSON_2EF)
+    mips_ls2_init_dfa_post_cycle_insn ();
+}
+
+/* Initialize STATE when scheduling for Loongson 2E/2F.
+   Support round-robin dispatch scheme by enabling only one of
+   ALU1/ALU2 and one of FALU1/FALU2 units for ALU1/2 and FALU1/2 instructions
+   respectively.  */
+
+static void
+mips_ls2_dfa_post_advance_cycle (state_t state)
+{
+  if (cpu_unit_reservation_p (state, mips_ls2.alu1_core_unit_code))
+    {
+      /* Though there are no non-pipelined ALU1 insns,
+	 we can get an instruction of type 'multi' before reload.  */
+      gcc_assert (mips_ls2.cycle_has_multi_p);
+      mips_ls2.alu1_turn_p = false;
+    }
+
+  mips_ls2.cycle_has_multi_p = false;
+
+  if (cpu_unit_reservation_p (state, mips_ls2.alu2_core_unit_code))
+    /* We have a non-pipelined alu instruction in the core,
+       adjust round-robin counter.  */
+    mips_ls2.alu1_turn_p = true;
+
+  if (mips_ls2.alu1_turn_p)
+    {
+      if (state_transition (state, mips_ls2.alu1_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+  else
+    {
+      if (state_transition (state, mips_ls2.alu2_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+
+  if (cpu_unit_reservation_p (state, mips_ls2.falu1_core_unit_code))
+    {
+      /* There are no non-pipelined FALU1 insns.  */
+      gcc_unreachable ();
+      mips_ls2.falu1_turn_p = false;
+    }
+
+  if (cpu_unit_reservation_p (state, mips_ls2.falu2_core_unit_code))
+    /* We have a non-pipelined falu instruction in the core,
+       adjust round-robin counter.  */
+    mips_ls2.falu1_turn_p = true;
+
+  if (mips_ls2.falu1_turn_p)
+    {
+      if (state_transition (state, mips_ls2.falu1_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+  else
+    {
+      if (state_transition (state, mips_ls2.falu2_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_SCHED_DFA_POST_ADVANCE_CYCLE.
+   This hook is being called at the start of each cycle.  */
+
+static void
+mips_dfa_post_advance_cycle (void)
+{
+  if (TUNE_LOONGSON_2EF)
+    mips_ls2_dfa_post_advance_cycle (curr_state);
+}
+
+/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD.  This should
+   be as wide as the scheduling freedom in the DFA.  */
+
+static int
+mips_multipass_dfa_lookahead (void)
+{
+  /* Can schedule up to 4 of the 6 function units in any one cycle.  */
+  if (TUNE_SB1)
+    return 4;
+
+  if (TUNE_LOONGSON_2EF || TUNE_LOONGSON_3A)
+    return 4;
+
+  if (TUNE_OCTEON)
+    return 2;
+
+  return 0;
+}
+
+/* Remove the instruction at index LOWER from ready queue READY and
+   reinsert it in front of the instruction at index HIGHER.  LOWER must
+   be <= HIGHER.  */
+
+static void
+mips_promote_ready (rtx *ready, int lower, int higher)
+{
+  rtx new_head;
+  int i;
+
+  new_head = ready[lower];
+  for (i = lower; i < higher; i++)
+    ready[i] = ready[i + 1];
+  ready[i] = new_head;
+}
+
+/* If the priority of the instruction at POS2 in the ready queue READY
+   is within LIMIT units of that of the instruction at POS1, swap the
+   instructions if POS2 is not already less than POS1.  */
+
+static void
+mips_maybe_swap_ready (rtx *ready, int pos1, int pos2, int limit)
+{
+  if (pos1 < pos2
+      && INSN_PRIORITY (ready[pos1]) + limit >= INSN_PRIORITY (ready[pos2]))
+    {
+      rtx temp;
+
+      temp = ready[pos1];
+      ready[pos1] = ready[pos2];
+      ready[pos2] = temp;
+    }
+}
+
+/* Used by TUNE_MACC_CHAINS to record the last scheduled instruction
+   that may clobber hi or lo.  */
+static rtx mips_macc_chains_last_hilo;
+
+/* A TUNE_MACC_CHAINS helper function.  Record that instruction INSN has
+   been scheduled, updating mips_macc_chains_last_hilo appropriately.  */
+
+static void
+mips_macc_chains_record (rtx insn)
+{
+  if (get_attr_may_clobber_hilo (insn))
+    mips_macc_chains_last_hilo = insn;
+}
+
+/* A TUNE_MACC_CHAINS helper function.  Search ready queue READY, which
+   has NREADY elements, looking for a multiply-add or multiply-subtract
+   instruction that is cumulative with mips_macc_chains_last_hilo.
+   If there is one, promote it ahead of anything else that might
+   clobber hi or lo.  */
+
+static void
+mips_macc_chains_reorder (rtx *ready, int nready)
+{
+  int i, j;
+
+  if (mips_macc_chains_last_hilo != 0)
+    for (i = nready - 1; i >= 0; i--)
+      if (mips_linked_madd_p (mips_macc_chains_last_hilo, ready[i]))
+	{
+	  for (j = nready - 1; j > i; j--)
+	    if (recog_memoized (ready[j]) >= 0
+		&& get_attr_may_clobber_hilo (ready[j]))
+	      {
+		mips_promote_ready (ready, i, j);
+		break;
+	      }
+	  break;
+	}
+}
+
+/* The last instruction to be scheduled.  */
+static rtx vr4130_last_insn;
+
+/* A note_stores callback used by vr4130_true_reg_dependence_p.  DATA
+   points to an rtx that is initially an instruction.  Nullify the rtx
+   if the instruction uses the value of register X.  */
+
+static void
+vr4130_true_reg_dependence_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+				void *data)
+{
+  rtx *insn_ptr;
+
+  insn_ptr = (rtx *) data;
+  if (REG_P (x)
+      && *insn_ptr != 0
+      && reg_referenced_p (x, PATTERN (*insn_ptr)))
+    *insn_ptr = 0;
+}
+
+/* Return true if there is true register dependence between vr4130_last_insn
+   and INSN.  */
+
+static bool
+vr4130_true_reg_dependence_p (rtx insn)
+{
+  note_stores (PATTERN (vr4130_last_insn),
+	       vr4130_true_reg_dependence_p_1, &insn);
+  return insn == 0;
+}
+
+/* A TUNE_MIPS4130 helper function.  Given that INSN1 is at the head of
+   the ready queue and that INSN2 is the instruction after it, return
+   true if it is worth promoting INSN2 ahead of INSN1.  Look for cases
+   in which INSN1 and INSN2 can probably issue in parallel, but for
+   which (INSN2, INSN1) should be less sensitive to instruction
+   alignment than (INSN1, INSN2).  See 4130.md for more details.  */
+
+static bool
+vr4130_swap_insns_p (rtx insn1, rtx insn2)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+
+  /* Check for the following case:
+
+     1) there is some other instruction X with an anti dependence on INSN1;
+     2) X has a higher priority than INSN2; and
+     3) X is an arithmetic instruction (and thus has no unit restrictions).
+
+     If INSN1 is the last instruction blocking X, it would better to
+     choose (INSN1, X) over (INSN2, INSN1).  */
+  FOR_EACH_DEP (insn1, SD_LIST_FORW, sd_it, dep)
+    if (DEP_TYPE (dep) == REG_DEP_ANTI
+	&& INSN_PRIORITY (DEP_CON (dep)) > INSN_PRIORITY (insn2)
+	&& recog_memoized (DEP_CON (dep)) >= 0
+	&& get_attr_vr4130_class (DEP_CON (dep)) == VR4130_CLASS_ALU)
+      return false;
+
+  if (vr4130_last_insn != 0
+      && recog_memoized (insn1) >= 0
+      && recog_memoized (insn2) >= 0)
+    {
+      /* See whether INSN1 and INSN2 use different execution units,
+	 or if they are both ALU-type instructions.  If so, they can
+	 probably execute in parallel.  */
+      enum attr_vr4130_class class1 = get_attr_vr4130_class (insn1);
+      enum attr_vr4130_class class2 = get_attr_vr4130_class (insn2);
+      if (class1 != class2 || class1 == VR4130_CLASS_ALU)
+	{
+	  /* If only one of the instructions has a dependence on
+	     vr4130_last_insn, prefer to schedule the other one first.  */
+	  bool dep1_p = vr4130_true_reg_dependence_p (insn1);
+	  bool dep2_p = vr4130_true_reg_dependence_p (insn2);
+	  if (dep1_p != dep2_p)
+	    return dep1_p;
+
+	  /* Prefer to schedule INSN2 ahead of INSN1 if vr4130_last_insn
+	     is not an ALU-type instruction and if INSN1 uses the same
+	     execution unit.  (Note that if this condition holds, we already
+	     know that INSN2 uses a different execution unit.)  */
+	  if (class1 != VR4130_CLASS_ALU
+	      && recog_memoized (vr4130_last_insn) >= 0
+	      && class1 == get_attr_vr4130_class (vr4130_last_insn))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* A TUNE_MIPS4130 helper function.  (READY, NREADY) describes a ready
+   queue with at least two instructions.  Swap the first two if
+   vr4130_swap_insns_p says that it could be worthwhile.  */
+
+static void
+vr4130_reorder (rtx *ready, int nready)
+{
+  if (vr4130_swap_insns_p (ready[nready - 1], ready[nready - 2]))
+    mips_promote_ready (ready, nready - 2, nready - 1);
+}
+
+/* Record whether last 74k AGEN instruction was a load or store.  */
+static enum attr_type mips_last_74k_agen_insn = TYPE_UNKNOWN;
+
+/* Initialize mips_last_74k_agen_insn from INSN.  A null argument
+   resets to TYPE_UNKNOWN state.  */
+
+static void
+mips_74k_agen_init (rtx insn)
+{
+  if (!insn || CALL_P (insn) || JUMP_P (insn))
+    mips_last_74k_agen_insn = TYPE_UNKNOWN;
+  else
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_LOAD || type == TYPE_STORE)
+	mips_last_74k_agen_insn = type;
+    }
+}
+
+/* A TUNE_74K helper function.  The 74K AGEN pipeline likes multiple
+   loads to be grouped together, and multiple stores to be grouped
+   together.  Swap things around in the ready queue to make this happen.  */
+
+static void
+mips_74k_agen_reorder (rtx *ready, int nready)
+{
+  int i;
+  int store_pos, load_pos;
+
+  store_pos = -1;
+  load_pos = -1;
+
+  for (i = nready - 1; i >= 0; i--)
+    {
+      rtx insn = ready[i];
+      if (USEFUL_INSN_P (insn))
+	switch (get_attr_type (insn))
+	  {
+	  case TYPE_STORE:
+	    if (store_pos == -1)
+	      store_pos = i;
+	    break;
+
+	  case TYPE_LOAD:
+	    if (load_pos == -1)
+	      load_pos = i;
+	    break;
+
+	  default:
+	    break;
+	  }
+    }
+
+  if (load_pos == -1 || store_pos == -1)
+    return;
+
+  switch (mips_last_74k_agen_insn)
+    {
+    case TYPE_UNKNOWN:
+      /* Prefer to schedule loads since they have a higher latency.  */
+    case TYPE_LOAD:
+      /* Swap loads to the front of the queue.  */
+      mips_maybe_swap_ready (ready, load_pos, store_pos, 4);
+      break;
+    case TYPE_STORE:
+      /* Swap stores to the front of the queue.  */
+      mips_maybe_swap_ready (ready, store_pos, load_pos, 4);
+      break;
+    default:
+      break;
+    }
+}
+
+/* Implement TARGET_SCHED_INIT.  */
+
+static void
+mips_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		 int max_ready ATTRIBUTE_UNUSED)
+{
+  mips_macc_chains_last_hilo = 0;
+  vr4130_last_insn = 0;
+  mips_74k_agen_init (NULL_RTX);
+
+  /* When scheduling for Loongson2, branch instructions go to ALU1,
+     therefore basic block is most likely to start with round-robin counter
+     pointed to ALU2.  */
+  mips_ls2.alu1_turn_p = false;
+  mips_ls2.falu1_turn_p = true;
+}
+
+/* Subroutine used by TARGET_SCHED_REORDER and TARGET_SCHED_REORDER2.  */
+
+static void
+mips_sched_reorder_1 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		      rtx *ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+  if (!reload_completed
+      && TUNE_MACC_CHAINS
+      && *nreadyp > 0)
+    mips_macc_chains_reorder (ready, *nreadyp);
+
+  if (reload_completed
+      && TUNE_MIPS4130
+      && !TARGET_VR4130_ALIGN
+      && *nreadyp > 1)
+    vr4130_reorder (ready, *nreadyp);
+
+  if (TUNE_74K)
+    mips_74k_agen_reorder (ready, *nreadyp);
+}
+
+/* Implement TARGET_SCHED_REORDER.  */
+
+static int
+mips_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		    rtx *ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+  mips_sched_reorder_1 (file, verbose, ready, nreadyp, cycle);
+  return mips_issue_rate ();
+}
+
+/* Implement TARGET_SCHED_REORDER2.  */
+
+static int
+mips_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		     rtx *ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+  mips_sched_reorder_1 (file, verbose, ready, nreadyp, cycle);
+  return cached_can_issue_more;
+}
+
+/* Update round-robin counters for ALU1/2 and FALU1/2.  */
+
+static void
+mips_ls2_variable_issue (rtx insn)
+{
+  if (mips_ls2.alu1_turn_p)
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.alu1_core_unit_code))
+	mips_ls2.alu1_turn_p = false;
+    }
+  else
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.alu2_core_unit_code))
+	mips_ls2.alu1_turn_p = true;
+    }
+
+  if (mips_ls2.falu1_turn_p)
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.falu1_core_unit_code))
+	mips_ls2.falu1_turn_p = false;
+    }
+  else
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.falu2_core_unit_code))
+	mips_ls2.falu1_turn_p = true;
+    }
+
+  if (recog_memoized (insn) >= 0)
+    mips_ls2.cycle_has_multi_p |= (get_attr_type (insn) == TYPE_MULTI);
+}
+
+/* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+
+static int
+mips_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		     rtx insn, int more)
+{
+  /* Ignore USEs and CLOBBERs; don't count them against the issue rate.  */
+  if (USEFUL_INSN_P (insn))
+    {
+      if (get_attr_type (insn) != TYPE_GHOST)
+	more--;
+      if (!reload_completed && TUNE_MACC_CHAINS)
+	mips_macc_chains_record (insn);
+      vr4130_last_insn = insn;
+      if (TUNE_74K)
+	mips_74k_agen_init (insn);
+      else if (TUNE_LOONGSON_2EF)
+	mips_ls2_variable_issue (insn);
+    }
+
+  /* Instructions of type 'multi' should all be split before
+     the second scheduling pass.  */
+  gcc_assert (!reload_completed
+	      || recog_memoized (insn) < 0
+	      || get_attr_type (insn) != TYPE_MULTI);
+
+  cached_can_issue_more = more;
+  return more;
+}
+
+/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
+   return the first operand of the associated PREF or PREFX insn.  */
+
+rtx
+mips_prefetch_cookie (rtx write, rtx locality)
+{
+  /* store_streamed / load_streamed.  */
+  if (INTVAL (locality) <= 0)
+    return GEN_INT (INTVAL (write) + 4);
+
+  /* store / load.  */
+  if (INTVAL (locality) <= 2)
+    return write;
+
+  /* store_retained / load_retained.  */
+  return GEN_INT (INTVAL (write) + 6);
+}
+
+/* Flags that indicate when a built-in function is available.
+
+   BUILTIN_AVAIL_NON_MIPS16
+	The function is available on the current target if !TARGET_MIPS16.
+
+   BUILTIN_AVAIL_MIPS16
+	The function is available on the current target if TARGET_MIPS16.  */
+#define BUILTIN_AVAIL_NON_MIPS16 1
+#define BUILTIN_AVAIL_MIPS16 2
+
+/* Declare an availability predicate for built-in functions that
+   require non-MIPS16 mode and also require COND to be true.
+   NAME is the main part of the predicate's name.  */
+#define AVAIL_NON_MIPS16(NAME, COND)					\
+ static unsigned int							\
+ mips_builtin_avail_##NAME (void)					\
+ {									\
+   return (COND) ? BUILTIN_AVAIL_NON_MIPS16 : 0;			\
+ }
+
+/* Declare an availability predicate for built-in functions that
+   support both MIPS16 and non-MIPS16 code and also require COND
+   to be true.  NAME is the main part of the predicate's name.  */
+#define AVAIL_ALL(NAME, COND)						\
+ static unsigned int							\
+ mips_builtin_avail_##NAME (void)					\
+ {									\
+   return (COND) ? BUILTIN_AVAIL_NON_MIPS16 | BUILTIN_AVAIL_MIPS16 : 0;	\
+ }
+
+/* This structure describes a single built-in function.  */
+struct mips_builtin_description {
+  /* The code of the main .md file instruction.  See mips_builtin_type
+     for more information.  */
+  enum insn_code icode;
+
+  /* The floating-point comparison code to use with ICODE, if any.  */
+  enum mips_fp_condition cond;
+
+  /* The name of the built-in function.  */
+  const char *name;
+
+  /* Specifies how the function should be expanded.  */
+  enum mips_builtin_type builtin_type;
+
+  /* The function's prototype.  */
+  enum mips_function_type function_type;
+
+  /* Whether the function is available.  */
+  unsigned int (*avail) (void);
+};
+
+AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
+AVAIL_NON_MIPS16 (paired_single, TARGET_PAIRED_SINGLE_FLOAT)
+AVAIL_NON_MIPS16 (sb1_paired_single, TARGET_SB1 && TARGET_PAIRED_SINGLE_FLOAT)
+AVAIL_NON_MIPS16 (mips3d, TARGET_MIPS3D)
+AVAIL_NON_MIPS16 (dsp, TARGET_DSP)
+AVAIL_NON_MIPS16 (dspr2, TARGET_DSPR2)
+AVAIL_NON_MIPS16 (dsp_32, !TARGET_64BIT && TARGET_DSP)
+AVAIL_NON_MIPS16 (dsp_64, TARGET_64BIT && TARGET_DSP)
+AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2)
+AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS)
+AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
+
+/* Construct a mips_builtin_description from the given arguments.
+
+   INSN is the name of the associated instruction pattern, without the
+   leading CODE_FOR_mips_.
+
+   CODE is the floating-point condition code associated with the
+   function.  It can be 'f' if the field is not applicable.
+
+   NAME is the name of the function itself, without the leading
+   "__builtin_mips_".
+
+   BUILTIN_TYPE and FUNCTION_TYPE are mips_builtin_description fields.
+
+   AVAIL is the name of the availability predicate, without the leading
+   mips_builtin_avail_.  */
+#define MIPS_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE,			\
+		     FUNCTION_TYPE, AVAIL)				\
+  { CODE_FOR_mips_ ## INSN, MIPS_FP_COND_ ## COND,			\
+    "__builtin_mips_" NAME, BUILTIN_TYPE, FUNCTION_TYPE,		\
+    mips_builtin_avail_ ## AVAIL }
+
+/* Define __builtin_mips_<INSN>, which is a MIPS_BUILTIN_DIRECT function
+   mapped to instruction CODE_FOR_mips_<INSN>,  FUNCTION_TYPE and AVAIL
+   are as for MIPS_BUILTIN.  */
+#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)			\
+  MIPS_BUILTIN (INSN, f, #INSN, MIPS_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL)
+
+/* Define __builtin_mips_<INSN>_<COND>_{s,d} functions, both of which
+   are subject to mips_builtin_avail_<AVAIL>.  */
+#define CMP_SCALAR_BUILTINS(INSN, COND, AVAIL)				\
+  MIPS_BUILTIN (INSN ## _cond_s, COND, #INSN "_" #COND "_s",		\
+		MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_SF_SF, AVAIL),	\
+  MIPS_BUILTIN (INSN ## _cond_d, COND, #INSN "_" #COND "_d",		\
+		MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_DF_DF, AVAIL)
+
+/* Define __builtin_mips_{any,all,upper,lower}_<INSN>_<COND>_ps.
+   The lower and upper forms are subject to mips_builtin_avail_<AVAIL>
+   while the any and all forms are subject to mips_builtin_avail_mips3d.  */
+#define CMP_PS_BUILTINS(INSN, COND, AVAIL)				\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "any_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_ANY, MIPS_INT_FTYPE_V2SF_V2SF,		\
+		mips3d),						\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "all_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_ALL, MIPS_INT_FTYPE_V2SF_V2SF,		\
+		mips3d),						\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "lower_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_LOWER, MIPS_INT_FTYPE_V2SF_V2SF,	\
+		AVAIL),							\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "upper_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_UPPER, MIPS_INT_FTYPE_V2SF_V2SF,	\
+		AVAIL)
+
+/* Define __builtin_mips_{any,all}_<INSN>_<COND>_4s.  The functions
+   are subject to mips_builtin_avail_mips3d.  */
+#define CMP_4S_BUILTINS(INSN, COND)					\
+  MIPS_BUILTIN (INSN ## _cond_4s, COND, "any_" #INSN "_" #COND "_4s",	\
+		MIPS_BUILTIN_CMP_ANY,					\
+		MIPS_INT_FTYPE_V2SF_V2SF_V2SF_V2SF, mips3d),		\
+  MIPS_BUILTIN (INSN ## _cond_4s, COND, "all_" #INSN "_" #COND "_4s",	\
+		MIPS_BUILTIN_CMP_ALL,					\
+		MIPS_INT_FTYPE_V2SF_V2SF_V2SF_V2SF, mips3d)
+
+/* Define __builtin_mips_mov{t,f}_<INSN>_<COND>_ps.  The comparison
+   instruction requires mips_builtin_avail_<AVAIL>.  */
+#define MOVTF_BUILTINS(INSN, COND, AVAIL)				\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "movt_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_MOVT, MIPS_V2SF_FTYPE_V2SF_V2SF_V2SF_V2SF,	\
+		AVAIL),							\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "movf_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_MOVF, MIPS_V2SF_FTYPE_V2SF_V2SF_V2SF_V2SF,	\
+		AVAIL)
+
+/* Define all the built-in functions related to C.cond.fmt condition COND.  */
+#define CMP_BUILTINS(COND)						\
+  MOVTF_BUILTINS (c, COND, paired_single),				\
+  MOVTF_BUILTINS (cabs, COND, mips3d),					\
+  CMP_SCALAR_BUILTINS (cabs, COND, mips3d),				\
+  CMP_PS_BUILTINS (c, COND, paired_single),				\
+  CMP_PS_BUILTINS (cabs, COND, mips3d),					\
+  CMP_4S_BUILTINS (c, COND),						\
+  CMP_4S_BUILTINS (cabs, COND)
+
+/* Define __builtin_mips_<INSN>, which is a MIPS_BUILTIN_DIRECT_NO_TARGET
+   function mapped to instruction CODE_FOR_mips_<INSN>,  FUNCTION_TYPE
+   and AVAIL are as for MIPS_BUILTIN.  */
+#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)		\
+  MIPS_BUILTIN (INSN, f, #INSN,	MIPS_BUILTIN_DIRECT_NO_TARGET,		\
+		FUNCTION_TYPE, AVAIL)
+
+/* Define __builtin_mips_bposge<VALUE>.  <VALUE> is 32 for the MIPS32 DSP
+   branch instruction.  AVAIL is as for MIPS_BUILTIN.  */
+#define BPOSGE_BUILTIN(VALUE, AVAIL)					\
+  MIPS_BUILTIN (bposge, f, "bposge" #VALUE,				\
+		MIPS_BUILTIN_BPOSGE ## VALUE, MIPS_SI_FTYPE_VOID, AVAIL)
+
+/* Define a Loongson MIPS_BUILTIN_DIRECT function __builtin_loongson_<FN_NAME>
+   for instruction CODE_FOR_loongson_<INSN>.  FUNCTION_TYPE is a
+   builtin_description field.  */
+#define LOONGSON_BUILTIN_ALIAS(INSN, FN_NAME, FUNCTION_TYPE)		\
+  { CODE_FOR_loongson_ ## INSN, MIPS_FP_COND_f,				\
+    "__builtin_loongson_" #FN_NAME, MIPS_BUILTIN_DIRECT,		\
+    FUNCTION_TYPE, mips_builtin_avail_loongson }
+
+/* Define a Loongson MIPS_BUILTIN_DIRECT function __builtin_loongson_<INSN>
+   for instruction CODE_FOR_loongson_<INSN>.  FUNCTION_TYPE is a
+   builtin_description field.  */
+#define LOONGSON_BUILTIN(INSN, FUNCTION_TYPE)				\
+  LOONGSON_BUILTIN_ALIAS (INSN, INSN, FUNCTION_TYPE)
+
+/* Like LOONGSON_BUILTIN, but add _<SUFFIX> to the end of the function name.
+   We use functions of this form when the same insn can be usefully applied
+   to more than one datatype.  */
+#define LOONGSON_BUILTIN_SUFFIX(INSN, SUFFIX, FUNCTION_TYPE)		\
+  LOONGSON_BUILTIN_ALIAS (INSN, INSN ## _ ## SUFFIX, FUNCTION_TYPE)
+
+#define CODE_FOR_mips_sqrt_ps CODE_FOR_sqrtv2sf2
+#define CODE_FOR_mips_addq_ph CODE_FOR_addv2hi3
+#define CODE_FOR_mips_addu_qb CODE_FOR_addv4qi3
+#define CODE_FOR_mips_subq_ph CODE_FOR_subv2hi3
+#define CODE_FOR_mips_subu_qb CODE_FOR_subv4qi3
+#define CODE_FOR_mips_mul_ph CODE_FOR_mulv2hi3
+#define CODE_FOR_mips_mult CODE_FOR_mulsidi3_32bit
+#define CODE_FOR_mips_multu CODE_FOR_umulsidi3_32bit
+
+#define CODE_FOR_loongson_packsswh CODE_FOR_vec_pack_ssat_v2si
+#define CODE_FOR_loongson_packsshb CODE_FOR_vec_pack_ssat_v4hi
+#define CODE_FOR_loongson_packushb CODE_FOR_vec_pack_usat_v4hi
+#define CODE_FOR_loongson_paddw CODE_FOR_addv2si3
+#define CODE_FOR_loongson_paddh CODE_FOR_addv4hi3
+#define CODE_FOR_loongson_paddb CODE_FOR_addv8qi3
+#define CODE_FOR_loongson_paddsh CODE_FOR_ssaddv4hi3
+#define CODE_FOR_loongson_paddsb CODE_FOR_ssaddv8qi3
+#define CODE_FOR_loongson_paddush CODE_FOR_usaddv4hi3
+#define CODE_FOR_loongson_paddusb CODE_FOR_usaddv8qi3
+#define CODE_FOR_loongson_pmaxsh CODE_FOR_smaxv4hi3
+#define CODE_FOR_loongson_pmaxub CODE_FOR_umaxv8qi3
+#define CODE_FOR_loongson_pminsh CODE_FOR_sminv4hi3
+#define CODE_FOR_loongson_pminub CODE_FOR_uminv8qi3
+#define CODE_FOR_loongson_pmulhuh CODE_FOR_umulv4hi3_highpart
+#define CODE_FOR_loongson_pmulhh CODE_FOR_smulv4hi3_highpart
+#define CODE_FOR_loongson_pmullh CODE_FOR_mulv4hi3
+#define CODE_FOR_loongson_psllh CODE_FOR_ashlv4hi3
+#define CODE_FOR_loongson_psllw CODE_FOR_ashlv2si3
+#define CODE_FOR_loongson_psrlh CODE_FOR_lshrv4hi3
+#define CODE_FOR_loongson_psrlw CODE_FOR_lshrv2si3
+#define CODE_FOR_loongson_psrah CODE_FOR_ashrv4hi3
+#define CODE_FOR_loongson_psraw CODE_FOR_ashrv2si3
+#define CODE_FOR_loongson_psubw CODE_FOR_subv2si3
+#define CODE_FOR_loongson_psubh CODE_FOR_subv4hi3
+#define CODE_FOR_loongson_psubb CODE_FOR_subv8qi3
+#define CODE_FOR_loongson_psubsh CODE_FOR_sssubv4hi3
+#define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3
+#define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3
+#define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3
+
+static const struct mips_builtin_description mips_builtins[] = {
+#define MIPS_GET_FCSR 0
+  DIRECT_BUILTIN (get_fcsr, MIPS_USI_FTYPE_VOID, hard_float),
+#define MIPS_SET_FCSR 1
+  DIRECT_NO_TARGET_BUILTIN (set_fcsr, MIPS_VOID_FTYPE_USI, hard_float),
+
+  DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (pul_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (plu_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (puu_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (cvt_ps_s, MIPS_V2SF_FTYPE_SF_SF, paired_single),
+  DIRECT_BUILTIN (cvt_s_pl, MIPS_SF_FTYPE_V2SF, paired_single),
+  DIRECT_BUILTIN (cvt_s_pu, MIPS_SF_FTYPE_V2SF, paired_single),
+  DIRECT_BUILTIN (abs_ps, MIPS_V2SF_FTYPE_V2SF, paired_single),
+
+  DIRECT_BUILTIN (alnv_ps, MIPS_V2SF_FTYPE_V2SF_V2SF_INT, paired_single),
+  DIRECT_BUILTIN (addr_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+  DIRECT_BUILTIN (mulr_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+  DIRECT_BUILTIN (cvt_pw_ps, MIPS_V2SF_FTYPE_V2SF, mips3d),
+  DIRECT_BUILTIN (cvt_ps_pw, MIPS_V2SF_FTYPE_V2SF, mips3d),
+
+  DIRECT_BUILTIN (recip1_s, MIPS_SF_FTYPE_SF, mips3d),
+  DIRECT_BUILTIN (recip1_d, MIPS_DF_FTYPE_DF, mips3d),
+  DIRECT_BUILTIN (recip1_ps, MIPS_V2SF_FTYPE_V2SF, mips3d),
+  DIRECT_BUILTIN (recip2_s, MIPS_SF_FTYPE_SF_SF, mips3d),
+  DIRECT_BUILTIN (recip2_d, MIPS_DF_FTYPE_DF_DF, mips3d),
+  DIRECT_BUILTIN (recip2_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+
+  DIRECT_BUILTIN (rsqrt1_s, MIPS_SF_FTYPE_SF, mips3d),
+  DIRECT_BUILTIN (rsqrt1_d, MIPS_DF_FTYPE_DF, mips3d),
+  DIRECT_BUILTIN (rsqrt1_ps, MIPS_V2SF_FTYPE_V2SF, mips3d),
+  DIRECT_BUILTIN (rsqrt2_s, MIPS_SF_FTYPE_SF_SF, mips3d),
+  DIRECT_BUILTIN (rsqrt2_d, MIPS_DF_FTYPE_DF_DF, mips3d),
+  DIRECT_BUILTIN (rsqrt2_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+
+  MIPS_FP_CONDITIONS (CMP_BUILTINS),
+
+  /* Built-in functions for the SB-1 processor.  */
+  DIRECT_BUILTIN (sqrt_ps, MIPS_V2SF_FTYPE_V2SF, sb1_paired_single),
+
+  /* Built-in functions for the DSP ASE (32-bit and 64-bit).  */
+  DIRECT_BUILTIN (addq_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (addq_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (addq_s_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (addu_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (addu_s_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (subq_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (subq_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (subq_s_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (subu_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (subu_s_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (addsc, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (addwc, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (modsub, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (raddu_w_qb, MIPS_SI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (absq_s_ph, MIPS_V2HI_FTYPE_V2HI, dsp),
+  DIRECT_BUILTIN (absq_s_w, MIPS_SI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (precrq_qb_ph, MIPS_V4QI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (precrq_ph_w, MIPS_V2HI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (precrq_rs_ph_w, MIPS_V2HI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (precrqu_s_qb_ph, MIPS_V4QI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (preceq_w_phl, MIPS_SI_FTYPE_V2HI, dsp),
+  DIRECT_BUILTIN (preceq_w_phr, MIPS_SI_FTYPE_V2HI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbl, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbr, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbla, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbra, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbl, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbr, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbla, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbra, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (shll_qb, MIPS_V4QI_FTYPE_V4QI_SI, dsp),
+  DIRECT_BUILTIN (shll_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shll_s_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shll_s_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (shrl_qb, MIPS_V4QI_FTYPE_V4QI_SI, dsp),
+  DIRECT_BUILTIN (shra_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shra_r_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shra_r_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (muleu_s_ph_qbl, MIPS_V2HI_FTYPE_V4QI_V2HI, dsp),
+  DIRECT_BUILTIN (muleu_s_ph_qbr, MIPS_V2HI_FTYPE_V4QI_V2HI, dsp),
+  DIRECT_BUILTIN (mulq_rs_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (muleq_s_w_phl, MIPS_SI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (muleq_s_w_phr, MIPS_SI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (bitrev, MIPS_SI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (insv, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (repl_qb, MIPS_V4QI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (repl_ph, MIPS_V2HI_FTYPE_SI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmpu_eq_qb, MIPS_VOID_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmpu_lt_qb, MIPS_VOID_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmpu_le_qb, MIPS_VOID_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (cmpgu_eq_qb, MIPS_SI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (cmpgu_lt_qb, MIPS_SI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (cmpgu_le_qb, MIPS_SI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmp_eq_ph, MIPS_VOID_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmp_lt_ph, MIPS_VOID_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmp_le_ph, MIPS_VOID_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (pick_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (pick_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (packrl_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (wrdsp, MIPS_VOID_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (rddsp, MIPS_SI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (lbux, MIPS_SI_FTYPE_POINTER_SI, dsp),
+  DIRECT_BUILTIN (lhx, MIPS_SI_FTYPE_POINTER_SI, dsp),
+  DIRECT_BUILTIN (lwx, MIPS_SI_FTYPE_POINTER_SI, dsp),
+  BPOSGE_BUILTIN (32, dsp),
+
+  /* The following are for the MIPS DSP ASE REV 2 (32-bit and 64-bit).  */
+  DIRECT_BUILTIN (absq_s_qb, MIPS_V4QI_FTYPE_V4QI, dspr2),
+  DIRECT_BUILTIN (addu_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (addu_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (adduh_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (adduh_r_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (append, MIPS_SI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (balign, MIPS_SI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (cmpgdu_eq_qb, MIPS_SI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (cmpgdu_lt_qb, MIPS_SI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (cmpgdu_le_qb, MIPS_SI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (mul_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (mul_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (mulq_rs_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (mulq_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (mulq_s_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (precr_qb_ph, MIPS_V4QI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (precr_sra_ph_w, MIPS_V2HI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (precr_sra_r_ph_w, MIPS_V2HI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (prepend, MIPS_SI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (shra_qb, MIPS_V4QI_FTYPE_V4QI_SI, dspr2),
+  DIRECT_BUILTIN (shra_r_qb, MIPS_V4QI_FTYPE_V4QI_SI, dspr2),
+  DIRECT_BUILTIN (shrl_ph, MIPS_V2HI_FTYPE_V2HI_SI, dspr2),
+  DIRECT_BUILTIN (subu_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subu_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subuh_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (subuh_r_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (addqh_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (addqh_r_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (addqh_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (addqh_r_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (subqh_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subqh_r_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subqh_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (subqh_r_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+
+  /* Built-in functions for the DSP ASE (32-bit only).  */
+  DIRECT_BUILTIN (dpau_h_qbl, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpau_h_qbr, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpsu_h_qbl, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpsu_h_qbr, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpaq_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (dpsq_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (mulsaq_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (dpaq_sa_l_w, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (dpsq_sa_l_w, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (maq_s_w_phl, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (maq_s_w_phr, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (maq_sa_w_phl, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (maq_sa_w_phr, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (extr_w, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extr_r_w, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extr_rs_w, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extr_s_h, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extp, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extpdp, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (shilo, MIPS_DI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (mthlip, MIPS_DI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (madd, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (maddu, MIPS_DI_FTYPE_DI_USI_USI, dsp_32),
+  DIRECT_BUILTIN (msub, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (msubu, MIPS_DI_FTYPE_DI_USI_USI, dsp_32),
+  DIRECT_BUILTIN (mult, MIPS_DI_FTYPE_SI_SI, dsp_32),
+  DIRECT_BUILTIN (multu, MIPS_DI_FTYPE_USI_USI, dsp_32),
+
+  /* Built-in functions for the DSP ASE (64-bit only).  */
+  DIRECT_BUILTIN (ldx, MIPS_DI_FTYPE_POINTER_SI, dsp_64),
+
+  /* The following are for the MIPS DSP ASE REV 2 (32-bit only).  */
+  DIRECT_BUILTIN (dpa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dps_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (mulsa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpax_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpsx_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpaqx_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpaqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpsqx_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpsqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+
+  /* Builtin functions for ST Microelectronics Loongson-2E/2F cores.  */
+  LOONGSON_BUILTIN (packsswh, MIPS_V4HI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN (packsshb, MIPS_V8QI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (packushb, MIPS_UV8QI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (paddw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (paddh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (paddb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (paddw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (paddh, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (paddb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (paddd, u, MIPS_UDI_FTYPE_UDI_UDI),
+  LOONGSON_BUILTIN_SUFFIX (paddd, s, MIPS_DI_FTYPE_DI_DI),
+  LOONGSON_BUILTIN (paddsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (paddsb, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN (paddush, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (paddusb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_ALIAS (pandn_d, pandn_ud, MIPS_UDI_FTYPE_UDI_UDI),
+  LOONGSON_BUILTIN_ALIAS (pandn_w, pandn_uw, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_ALIAS (pandn_h, pandn_uh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_ALIAS (pandn_b, pandn_ub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_ALIAS (pandn_d, pandn_sd, MIPS_DI_FTYPE_DI_DI),
+  LOONGSON_BUILTIN_ALIAS (pandn_w, pandn_sw, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_ALIAS (pandn_h, pandn_sh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_ALIAS (pandn_b, pandn_sb, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN (pavgh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (pavgb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqh, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgth, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgth, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (pextrh, u, MIPS_UV4HI_FTYPE_UV4HI_USI),
+  LOONGSON_BUILTIN_SUFFIX (pextrh, s, MIPS_V4HI_FTYPE_V4HI_USI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_0, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_1, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_2, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_3, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_0, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_1, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_2, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_3, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmaddhw, MIPS_V2SI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmaxsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmaxub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN (pminsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pminub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pmovmskb, u, MIPS_UV8QI_FTYPE_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pmovmskb, s, MIPS_V8QI_FTYPE_V8QI),
+  LOONGSON_BUILTIN (pmulhuh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (pmulhh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmullh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmuluw, MIPS_UDI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI),
+  LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllw, s, MIPS_V2SI_FTYPE_V2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrah, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrah, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psraw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psraw, s, MIPS_V2SI_FTYPE_V2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlw, s, MIPS_V2SI_FTYPE_V2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psubw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (psubh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (psubb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (psubw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (psubh, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (psubb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (psubd, u, MIPS_UDI_FTYPE_UDI_UDI),
+  LOONGSON_BUILTIN_SUFFIX (psubd, s, MIPS_DI_FTYPE_DI_DI),
+  LOONGSON_BUILTIN (psubsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (psubsb, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN (psubush, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (psubusb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhbh, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhhw, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklbh, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklhw, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+
+  /* Sundry other built-in functions.  */
+  DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache)
+};
+
+/* Index I is the function declaration for mips_builtins[I], or null if the
+   function isn't defined on this target.  */
+static GTY(()) tree mips_builtin_decls[ARRAY_SIZE (mips_builtins)];
+
+/* MODE is a vector mode whose elements have type TYPE.  Return the type
+   of the vector itself.  */
+
+static tree
+mips_builtin_vector_type (tree type, enum machine_mode mode)
+{
+  static tree types[2 * (int) MAX_MACHINE_MODE];
+  int mode_index;
+
+  mode_index = (int) mode;
+
+  if (TREE_CODE (type) == INTEGER_TYPE && TYPE_UNSIGNED (type))
+    mode_index += MAX_MACHINE_MODE;
+
+  if (types[mode_index] == NULL_TREE)
+    types[mode_index] = build_vector_type_for_mode (type, mode);
+  return types[mode_index];
+}
+
+/* Return a type for 'const volatile void *'.  */
+
+static tree
+mips_build_cvpointer_type (void)
+{
+  static tree cache;
+
+  if (cache == NULL_TREE)
+    cache = build_pointer_type (build_qualified_type
+				(void_type_node,
+				 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
+  return cache;
+}
+
+/* Source-level argument types.  */
+#define MIPS_ATYPE_VOID void_type_node
+#define MIPS_ATYPE_INT integer_type_node
+#define MIPS_ATYPE_POINTER ptr_type_node
+#define MIPS_ATYPE_CVPOINTER mips_build_cvpointer_type ()
+
+/* Standard mode-based argument types.  */
+#define MIPS_ATYPE_UQI unsigned_intQI_type_node
+#define MIPS_ATYPE_SI intSI_type_node
+#define MIPS_ATYPE_USI unsigned_intSI_type_node
+#define MIPS_ATYPE_DI intDI_type_node
+#define MIPS_ATYPE_UDI unsigned_intDI_type_node
+#define MIPS_ATYPE_SF float_type_node
+#define MIPS_ATYPE_DF double_type_node
+
+/* Vector argument types.  */
+#define MIPS_ATYPE_V2SF mips_builtin_vector_type (float_type_node, V2SFmode)
+#define MIPS_ATYPE_V2HI mips_builtin_vector_type (intHI_type_node, V2HImode)
+#define MIPS_ATYPE_V2SI mips_builtin_vector_type (intSI_type_node, V2SImode)
+#define MIPS_ATYPE_V4QI mips_builtin_vector_type (intQI_type_node, V4QImode)
+#define MIPS_ATYPE_V4HI mips_builtin_vector_type (intHI_type_node, V4HImode)
+#define MIPS_ATYPE_V8QI mips_builtin_vector_type (intQI_type_node, V8QImode)
+#define MIPS_ATYPE_UV2SI					\
+  mips_builtin_vector_type (unsigned_intSI_type_node, V2SImode)
+#define MIPS_ATYPE_UV4HI					\
+  mips_builtin_vector_type (unsigned_intHI_type_node, V4HImode)
+#define MIPS_ATYPE_UV8QI					\
+  mips_builtin_vector_type (unsigned_intQI_type_node, V8QImode)
+
+/* MIPS_FTYPE_ATYPESN takes N MIPS_FTYPES-like type codes and lists
+   their associated MIPS_ATYPEs.  */
+#define MIPS_FTYPE_ATYPES1(A, B) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B
+
+#define MIPS_FTYPE_ATYPES2(A, B, C) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B, MIPS_ATYPE_##C
+
+#define MIPS_FTYPE_ATYPES3(A, B, C, D) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B, MIPS_ATYPE_##C, MIPS_ATYPE_##D
+
+#define MIPS_FTYPE_ATYPES4(A, B, C, D, E) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B, MIPS_ATYPE_##C, MIPS_ATYPE_##D, \
+  MIPS_ATYPE_##E
+
+/* Return the function type associated with function prototype TYPE.  */
+
+static tree
+mips_build_function_type (enum mips_function_type type)
+{
+  static tree types[(int) MIPS_MAX_FTYPE_MAX];
+
+  if (types[(int) type] == NULL_TREE)
+    switch (type)
+      {
+#define DEF_MIPS_FTYPE(NUM, ARGS)					\
+  case MIPS_FTYPE_NAME##NUM ARGS:					\
+    types[(int) type]							\
+      = build_function_type_list (MIPS_FTYPE_ATYPES##NUM ARGS,		\
+				  NULL_TREE);				\
+    break;
+#include "config/mips/mips-ftypes.def"
+#undef DEF_MIPS_FTYPE
+      default:
+	gcc_unreachable ();
+      }
+
+  return types[(int) type];
+}
+
+/* Implement TARGET_INIT_BUILTINS.  */
+
+static void
+mips_init_builtins (void)
+{
+  const struct mips_builtin_description *d;
+  unsigned int i;
+
+  /* Iterate through all of the bdesc arrays, initializing all of the
+     builtin functions.  */
+  for (i = 0; i < ARRAY_SIZE (mips_builtins); i++)
+    {
+      d = &mips_builtins[i];
+      if (d->avail ())
+	mips_builtin_decls[i]
+	  = add_builtin_function (d->name,
+				  mips_build_function_type (d->function_type),
+				  i, BUILT_IN_MD, NULL, NULL);
+    }
+}
+
+/* Implement TARGET_BUILTIN_DECL.  */
+
+static tree
+mips_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ARRAY_SIZE (mips_builtins))
+    return error_mark_node;
+  return mips_builtin_decls[code];
+}
+
+/* Take argument ARGNO from EXP's argument list and convert it into
+   an expand operand.  Store the operand in *OP.  */
+
+static void
+mips_prepare_builtin_arg (struct expand_operand *op, tree exp,
+			  unsigned int argno)
+{
+  tree arg;
+  rtx value;
+
+  arg = CALL_EXPR_ARG (exp, argno);
+  value = expand_normal (arg);
+  create_input_operand (op, value, TYPE_MODE (TREE_TYPE (arg)));
+}
+
+/* Expand instruction ICODE as part of a built-in function sequence.
+   Use the first NOPS elements of OPS as the instruction's operands.
+   HAS_TARGET_P is true if operand 0 is a target; it is false if the
+   instruction has no target.
+
+   Return the target rtx if HAS_TARGET_P, otherwise return const0_rtx.  */
+
+static rtx
+mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
+			  struct expand_operand *ops, bool has_target_p)
+{
+  if (!maybe_expand_insn (icode, nops, ops))
+    {
+      error ("invalid argument to built-in function");
+      return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;
+    }
+  return has_target_p ? ops[0].value : const0_rtx;
+}
+
+/* Expand a floating-point comparison for built-in function call EXP.
+   The first NARGS arguments are the values to be compared.  ICODE is
+   the .md pattern that does the comparison and COND is the condition
+   that is being tested.  Return an rtx for the result.  */
+
+static rtx
+mips_expand_builtin_compare_1 (enum insn_code icode,
+			       enum mips_fp_condition cond,
+			       tree exp, int nargs)
+{
+  struct expand_operand ops[MAX_RECOG_OPERANDS];
+  rtx output;
+  int opno, argno;
+
+  /* The instruction should have a target operand, an operand for each
+     argument, and an operand for COND.  */
+  gcc_assert (nargs + 2 == insn_data[(int) icode].n_generator_args);
+
+  output = mips_allocate_fcc (insn_data[(int) icode].operand[0].mode);
+  opno = 0;
+  create_fixed_operand (&ops[opno++], output);
+  for (argno = 0; argno < nargs; argno++)
+    mips_prepare_builtin_arg (&ops[opno++], exp, argno);
+  create_integer_operand (&ops[opno++], (int) cond);
+  return mips_expand_builtin_insn (icode, opno, ops, true);
+}
+
+/* Expand a MIPS_BUILTIN_DIRECT or MIPS_BUILTIN_DIRECT_NO_TARGET function;
+   HAS_TARGET_P says which.  EXP is the CALL_EXPR that calls the function
+   and ICODE is the code of the associated .md pattern.  TARGET, if nonnull,
+   suggests a good place to put the result.  */
+
+static rtx
+mips_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
+			    bool has_target_p)
+{
+  struct expand_operand ops[MAX_RECOG_OPERANDS];
+  int opno, argno;
+
+  /* Map any target to operand 0.  */
+  opno = 0;
+  if (has_target_p)
+    create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp)));
+
+  /* Map the arguments to the other operands.  */
+  gcc_assert (opno + call_expr_nargs (exp)
+	      == insn_data[icode].n_generator_args);
+  for (argno = 0; argno < call_expr_nargs (exp); argno++)
+    mips_prepare_builtin_arg (&ops[opno++], exp, argno);
+
+  return mips_expand_builtin_insn (icode, opno, ops, has_target_p);
+}
+
+/* Expand a __builtin_mips_movt_*_ps or __builtin_mips_movf_*_ps
+   function; TYPE says which.  EXP is the CALL_EXPR that calls the
+   function, ICODE is the instruction that should be used to compare
+   the first two arguments, and COND is the condition it should test.
+   TARGET, if nonnull, suggests a good place to put the result.  */
+
+static rtx
+mips_expand_builtin_movtf (enum mips_builtin_type type,
+			   enum insn_code icode, enum mips_fp_condition cond,
+			   rtx target, tree exp)
+{
+  struct expand_operand ops[4];
+  rtx cmp_result;
+
+  cmp_result = mips_expand_builtin_compare_1 (icode, cond, exp, 2);
+  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp)));
+  if (type == MIPS_BUILTIN_MOVT)
+    {
+      mips_prepare_builtin_arg (&ops[2], exp, 2);
+      mips_prepare_builtin_arg (&ops[1], exp, 3);
+    }
+  else
+    {
+      mips_prepare_builtin_arg (&ops[1], exp, 2);
+      mips_prepare_builtin_arg (&ops[2], exp, 3);
+    }
+  create_fixed_operand (&ops[3], cmp_result);
+  return mips_expand_builtin_insn (CODE_FOR_mips_cond_move_tf_ps,
+				   4, ops, true);
+}
+
+/* Move VALUE_IF_TRUE into TARGET if CONDITION is true; move VALUE_IF_FALSE
+   into TARGET otherwise.  Return TARGET.  */
+
+static rtx
+mips_builtin_branch_and_move (rtx condition, rtx target,
+			      rtx value_if_true, rtx value_if_false)
+{
+  rtx true_label, done_label;
+
+  true_label = gen_label_rtx ();
+  done_label = gen_label_rtx ();
+
+  /* First assume that CONDITION is false.  */
+  mips_emit_move (target, value_if_false);
+
+  /* Branch to TRUE_LABEL if CONDITION is true and DONE_LABEL otherwise.  */
+  emit_jump_insn (gen_condjump (condition, true_label));
+  emit_jump_insn (gen_jump (done_label));
+  emit_barrier ();
+
+  /* Fix TARGET if CONDITION is true.  */
+  emit_label (true_label);
+  mips_emit_move (target, value_if_true);
+
+  emit_label (done_label);
+  return target;
+}
+
+/* Expand a comparison built-in function of type BUILTIN_TYPE.  EXP is
+   the CALL_EXPR that calls the function, ICODE is the code of the
+   comparison instruction, and COND is the condition it should test.
+   TARGET, if nonnull, suggests a good place to put the boolean result.  */
+
+static rtx
+mips_expand_builtin_compare (enum mips_builtin_type builtin_type,
+			     enum insn_code icode, enum mips_fp_condition cond,
+			     rtx target, tree exp)
+{
+  rtx offset, condition, cmp_result;
+
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+  cmp_result = mips_expand_builtin_compare_1 (icode, cond, exp,
+					      call_expr_nargs (exp));
+
+  /* If the comparison sets more than one register, we define the result
+     to be 0 if all registers are false and -1 if all registers are true.
+     The value of the complete result is indeterminate otherwise.  */
+  switch (builtin_type)
+    {
+    case MIPS_BUILTIN_CMP_ALL:
+      condition = gen_rtx_NE (VOIDmode, cmp_result, constm1_rtx);
+      return mips_builtin_branch_and_move (condition, target,
+					   const0_rtx, const1_rtx);
+
+    case MIPS_BUILTIN_CMP_UPPER:
+    case MIPS_BUILTIN_CMP_LOWER:
+      offset = GEN_INT (builtin_type == MIPS_BUILTIN_CMP_UPPER);
+      condition = gen_single_cc (cmp_result, offset);
+      return mips_builtin_branch_and_move (condition, target,
+					   const1_rtx, const0_rtx);
+
+    default:
+      condition = gen_rtx_NE (VOIDmode, cmp_result, const0_rtx);
+      return mips_builtin_branch_and_move (condition, target,
+					   const1_rtx, const0_rtx);
+    }
+}
+
+/* Expand a bposge built-in function of type BUILTIN_TYPE.  TARGET,
+   if nonnull, suggests a good place to put the boolean result.  */
+
+static rtx
+mips_expand_builtin_bposge (enum mips_builtin_type builtin_type, rtx target)
+{
+  rtx condition, cmp_result;
+  int cmp_value;
+
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+
+  cmp_result = gen_rtx_REG (CCDSPmode, CCDSP_PO_REGNUM);
+
+  if (builtin_type == MIPS_BUILTIN_BPOSGE32)
+    cmp_value = 32;
+  else
+    gcc_assert (0);
+
+  condition = gen_rtx_GE (VOIDmode, cmp_result, GEN_INT (cmp_value));
+  return mips_builtin_branch_and_move (condition, target,
+				       const1_rtx, const0_rtx);
+}
+
+/* Implement TARGET_EXPAND_BUILTIN.  */
+
+static rtx
+mips_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode, int ignore)
+{
+  tree fndecl;
+  unsigned int fcode, avail;
+  const struct mips_builtin_description *d;
+
+  fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  fcode = DECL_FUNCTION_CODE (fndecl);
+  gcc_assert (fcode < ARRAY_SIZE (mips_builtins));
+  d = &mips_builtins[fcode];
+  avail = d->avail ();
+  gcc_assert (avail != 0);
+  if (TARGET_MIPS16 && !(avail & BUILTIN_AVAIL_MIPS16))
+    {
+      error ("built-in function %qE not supported for MIPS16",
+	     DECL_NAME (fndecl));
+      return ignore ? const0_rtx : CONST0_RTX (mode);
+    }
+  switch (d->builtin_type)
+    {
+    case MIPS_BUILTIN_DIRECT:
+      return mips_expand_builtin_direct (d->icode, target, exp, true);
+
+    case MIPS_BUILTIN_DIRECT_NO_TARGET:
+      return mips_expand_builtin_direct (d->icode, target, exp, false);
+
+    case MIPS_BUILTIN_MOVT:
+    case MIPS_BUILTIN_MOVF:
+      return mips_expand_builtin_movtf (d->builtin_type, d->icode,
+					d->cond, target, exp);
+
+    case MIPS_BUILTIN_CMP_ANY:
+    case MIPS_BUILTIN_CMP_ALL:
+    case MIPS_BUILTIN_CMP_UPPER:
+    case MIPS_BUILTIN_CMP_LOWER:
+    case MIPS_BUILTIN_CMP_SINGLE:
+      return mips_expand_builtin_compare (d->builtin_type, d->icode,
+					  d->cond, target, exp);
+
+    case MIPS_BUILTIN_BPOSGE32:
+      return mips_expand_builtin_bposge (d->builtin_type, target);
+    }
+  gcc_unreachable ();
+}
+
+/* An entry in the MIPS16 constant pool.  VALUE is the pool constant,
+   MODE is its mode, and LABEL is the CODE_LABEL associated with it.  */
+struct mips16_constant {
+  struct mips16_constant *next;
+  rtx value;
+  rtx label;
+  enum machine_mode mode;
+};
+
+/* Information about an incomplete MIPS16 constant pool.  FIRST is the
+   first constant, HIGHEST_ADDRESS is the highest address that the first
+   byte of the pool can have, and INSN_ADDRESS is the current instruction
+   address.  */
+struct mips16_constant_pool {
+  struct mips16_constant *first;
+  int highest_address;
+  int insn_address;
+};
+
+/* Add constant VALUE to POOL and return its label.  MODE is the
+   value's mode (used for CONST_INTs, etc.).  */
+
+static rtx
+mips16_add_constant (struct mips16_constant_pool *pool,
+		     rtx value, enum machine_mode mode)
+{
+  struct mips16_constant **p, *c;
+  bool first_of_size_p;
+
+  /* See whether the constant is already in the pool.  If so, return the
+     existing label, otherwise leave P pointing to the place where the
+     constant should be added.
+
+     Keep the pool sorted in increasing order of mode size so that we can
+     reduce the number of alignments needed.  */
+  first_of_size_p = true;
+  for (p = &pool->first; *p != 0; p = &(*p)->next)
+    {
+      if (mode == (*p)->mode && rtx_equal_p (value, (*p)->value))
+	return (*p)->label;
+      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE ((*p)->mode))
+	break;
+      if (GET_MODE_SIZE (mode) == GET_MODE_SIZE ((*p)->mode))
+	first_of_size_p = false;
+    }
+
+  /* In the worst case, the constant needed by the earliest instruction
+     will end up at the end of the pool.  The entire pool must then be
+     accessible from that instruction.
+
+     When adding the first constant, set the pool's highest address to
+     the address of the first out-of-range byte.  Adjust this address
+     downwards each time a new constant is added.  */
+  if (pool->first == 0)
+    /* For LWPC, ADDIUPC and DADDIUPC, the base PC value is the address
+       of the instruction with the lowest two bits clear.  The base PC
+       value for LDPC has the lowest three bits clear.  Assume the worst
+       case here; namely that the PC-relative instruction occupies the
+       last 2 bytes in an aligned word.  */
+    pool->highest_address = pool->insn_address - (UNITS_PER_WORD - 2) + 0x8000;
+  pool->highest_address -= GET_MODE_SIZE (mode);
+  if (first_of_size_p)
+    /* Take into account the worst possible padding due to alignment.  */
+    pool->highest_address -= GET_MODE_SIZE (mode) - 1;
+
+  /* Create a new entry.  */
+  c = XNEW (struct mips16_constant);
+  c->value = value;
+  c->mode = mode;
+  c->label = gen_label_rtx ();
+  c->next = *p;
+  *p = c;
+
+  return c->label;
+}
+
+/* Output constant VALUE after instruction INSN and return the last
+   instruction emitted.  MODE is the mode of the constant.  */
+
+static rtx
+mips16_emit_constants_1 (enum machine_mode mode, rtx value, rtx insn)
+{
+  if (SCALAR_INT_MODE_P (mode) || ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+    {
+      rtx size = GEN_INT (GET_MODE_SIZE (mode));
+      return emit_insn_after (gen_consttable_int (value, size), insn);
+    }
+
+  if (SCALAR_FLOAT_MODE_P (mode))
+    return emit_insn_after (gen_consttable_float (value), insn);
+
+  if (VECTOR_MODE_P (mode))
+    {
+      int i;
+
+      for (i = 0; i < CONST_VECTOR_NUNITS (value); i++)
+	insn = mips16_emit_constants_1 (GET_MODE_INNER (mode),
+					CONST_VECTOR_ELT (value, i), insn);
+      return insn;
+    }
+
+  gcc_unreachable ();
+}
+
+/* Dump out the constants in CONSTANTS after INSN.  */
+
+static void
+mips16_emit_constants (struct mips16_constant *constants, rtx insn)
+{
+  struct mips16_constant *c, *next;
+  int align;
+
+  align = 0;
+  for (c = constants; c != NULL; c = next)
+    {
+      /* If necessary, increase the alignment of PC.  */
+      if (align < GET_MODE_SIZE (c->mode))
+	{
+	  int align_log = floor_log2 (GET_MODE_SIZE (c->mode));
+	  insn = emit_insn_after (gen_align (GEN_INT (align_log)), insn);
+	}
+      align = GET_MODE_SIZE (c->mode);
+
+      insn = emit_label_after (c->label, insn);
+      insn = mips16_emit_constants_1 (c->mode, c->value, insn);
+
+      next = c->next;
+      free (c);
+    }
+
+  emit_barrier_after (insn);
+}
+
+/* Return the length of instruction INSN.  */
+
+static int
+mips16_insn_length (rtx insn)
+{
+  if (JUMP_TABLE_DATA_P (insn))
+    {
+      rtx body = PATTERN (insn);
+      if (GET_CODE (body) == ADDR_VEC)
+	return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, 0);
+      else if (GET_CODE (body) == ADDR_DIFF_VEC)
+	return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, 1);
+      else
+	gcc_unreachable ();
+    }
+  return get_attr_length (insn);
+}
+
+/* If *X is a symbolic constant that refers to the constant pool, add
+   the constant to POOL and rewrite *X to use the constant's label.  */
+
+static void
+mips16_rewrite_pool_constant (struct mips16_constant_pool *pool, rtx *x)
+{
+  rtx base, offset, label;
+
+  split_const (*x, &base, &offset);
+  if (GET_CODE (base) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (base))
+    {
+      label = mips16_add_constant (pool, copy_rtx (get_pool_constant (base)),
+				   get_pool_mode (base));
+      base = gen_rtx_LABEL_REF (Pmode, label);
+      *x = mips_unspec_address_offset (base, offset, SYMBOL_PC_RELATIVE);
+    }
+}
+
+/* This structure is used to communicate with mips16_rewrite_pool_refs.
+   INSN is the instruction we're rewriting and POOL points to the current
+   constant pool.  */
+struct mips16_rewrite_pool_refs_info {
+  rtx insn;
+  struct mips16_constant_pool *pool;
+};
+
+/* Rewrite *X so that constant pool references refer to the constant's
+   label instead.  DATA points to a mips16_rewrite_pool_refs_info
+   structure.  */
+
+static int
+mips16_rewrite_pool_refs (rtx *x, void *data)
+{
+  struct mips16_rewrite_pool_refs_info *info =
+    (struct mips16_rewrite_pool_refs_info *) data;
+
+  if (force_to_mem_operand (*x, Pmode))
+    {
+      rtx mem = force_const_mem (GET_MODE (*x), *x);
+      validate_change (info->insn, x, mem, false);
+    }
+
+  if (MEM_P (*x))
+    {
+      mips16_rewrite_pool_constant (info->pool, &XEXP (*x, 0));
+      return -1;
+    }
+
+  /* Don't rewrite the __mips16_rdwr symbol.  */
+  if (GET_CODE (*x) == UNSPEC && XINT (*x, 1) == UNSPEC_TLS_GET_TP)
+    return -1;
+
+  if (TARGET_MIPS16_TEXT_LOADS)
+    mips16_rewrite_pool_constant (info->pool, x);
+
+  return GET_CODE (*x) == CONST ? -1 : 0;
+}
+
+/* Return whether CFG is used in mips_reorg.  */
+
+static bool
+mips_cfg_in_reorg (void)
+{
+  return (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE
+	  || TARGET_RELAX_PIC_CALLS);
+}
+
+/* Build MIPS16 constant pools.  Split the instructions if SPLIT_P,
+   otherwise assume that they are already split.  */
+
+static void
+mips16_lay_out_constants (bool split_p)
+{
+  struct mips16_constant_pool pool;
+  struct mips16_rewrite_pool_refs_info info;
+  rtx insn, barrier;
+
+  if (!TARGET_MIPS16_PCREL_LOADS)
+    return;
+
+  if (split_p)
+    {
+      if (mips_cfg_in_reorg ())
+	split_all_insns ();
+      else
+	split_all_insns_noflow ();
+    }
+  barrier = 0;
+  memset (&pool, 0, sizeof (pool));
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      /* Rewrite constant pool references in INSN.  */
+      if (USEFUL_INSN_P (insn))
+	{
+	  info.insn = insn;
+	  info.pool = &pool;
+	  for_each_rtx (&PATTERN (insn), mips16_rewrite_pool_refs, &info);
+	}
+
+      pool.insn_address += mips16_insn_length (insn);
+
+      if (pool.first != NULL)
+	{
+	  /* If there are no natural barriers between the first user of
+	     the pool and the highest acceptable address, we'll need to
+	     create a new instruction to jump around the constant pool.
+	     In the worst case, this instruction will be 4 bytes long.
+
+	     If it's too late to do this transformation after INSN,
+	     do it immediately before INSN.  */
+	  if (barrier == 0 && pool.insn_address + 4 > pool.highest_address)
+	    {
+	      rtx label, jump;
+
+	      label = gen_label_rtx ();
+
+	      jump = emit_jump_insn_before (gen_jump (label), insn);
+	      JUMP_LABEL (jump) = label;
+	      LABEL_NUSES (label) = 1;
+	      barrier = emit_barrier_after (jump);
+
+	      emit_label_after (label, barrier);
+	      pool.insn_address += 4;
+	    }
+
+	  /* See whether the constant pool is now out of range of the first
+	     user.  If so, output the constants after the previous barrier.
+	     Note that any instructions between BARRIER and INSN (inclusive)
+	     will use negative offsets to refer to the pool.  */
+	  if (pool.insn_address > pool.highest_address)
+	    {
+	      mips16_emit_constants (pool.first, barrier);
+	      pool.first = NULL;
+	      barrier = 0;
+	    }
+	  else if (BARRIER_P (insn))
+	    barrier = insn;
+	}
+    }
+  mips16_emit_constants (pool.first, get_last_insn ());
+}
+
+/* Return true if it is worth r10k_simplify_address's while replacing
+   an address with X.  We are looking for constants, and for addresses
+   at a known offset from the incoming stack pointer.  */
+
+static bool
+r10k_simplified_address_p (rtx x)
+{
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    x = XEXP (x, 0);
+  return x == virtual_incoming_args_rtx || CONSTANT_P (x);
+}
+
+/* X is an expression that appears in INSN.  Try to use the UD chains
+   to simplify it, returning the simplified form on success and the
+   original form otherwise.  Replace the incoming value of $sp with
+   virtual_incoming_args_rtx (which should never occur in X otherwise).  */
+
+static rtx
+r10k_simplify_address (rtx x, rtx insn)
+{
+  rtx newx, op0, op1, set, def_insn, note;
+  df_ref use, def;
+  struct df_link *defs;
+
+  newx = NULL_RTX;
+  if (UNARY_P (x))
+    {
+      op0 = r10k_simplify_address (XEXP (x, 0), insn);
+      if (op0 != XEXP (x, 0))
+	newx = simplify_gen_unary (GET_CODE (x), GET_MODE (x),
+				   op0, GET_MODE (XEXP (x, 0)));
+    }
+  else if (BINARY_P (x))
+    {
+      op0 = r10k_simplify_address (XEXP (x, 0), insn);
+      op1 = r10k_simplify_address (XEXP (x, 1), insn);
+      if (op0 != XEXP (x, 0) || op1 != XEXP (x, 1))
+	newx = simplify_gen_binary (GET_CODE (x), GET_MODE (x), op0, op1);
+    }
+  else if (GET_CODE (x) == LO_SUM)
+    {
+      /* LO_SUMs can be offset from HIGHs, if we know they won't
+	 overflow.  See mips_classify_address for the rationale behind
+	 the lax check.  */
+      op0 = r10k_simplify_address (XEXP (x, 0), insn);
+      if (GET_CODE (op0) == HIGH)
+	newx = XEXP (x, 1);
+    }
+  else if (REG_P (x))
+    {
+      /* Uses are recorded by regno_reg_rtx, not X itself.  */
+      use = df_find_use (insn, regno_reg_rtx[REGNO (x)]);
+      gcc_assert (use);
+      defs = DF_REF_CHAIN (use);
+
+      /* Require a single definition.  */
+      if (defs && defs->next == NULL)
+	{
+	  def = defs->ref;
+	  if (DF_REF_IS_ARTIFICIAL (def))
+	    {
+	      /* Replace the incoming value of $sp with
+		 virtual_incoming_args_rtx.  */
+	      if (x == stack_pointer_rtx
+		  && DF_REF_BB (def) == ENTRY_BLOCK_PTR_FOR_FN (cfun))
+		newx = virtual_incoming_args_rtx;
+	    }
+	  else if (dominated_by_p (CDI_DOMINATORS, DF_REF_BB (use),
+				   DF_REF_BB (def)))
+	    {
+	      /* Make sure that DEF_INSN is a single set of REG.  */
+	      def_insn = DF_REF_INSN (def);
+	      if (NONJUMP_INSN_P (def_insn))
+		{
+		  set = single_set (def_insn);
+		  if (set && rtx_equal_p (SET_DEST (set), x))
+		    {
+		      /* Prefer to use notes, since the def-use chains
+			 are often shorter.  */
+		      note = find_reg_equal_equiv_note (def_insn);
+		      if (note)
+			newx = XEXP (note, 0);
+		      else
+			newx = SET_SRC (set);
+		      newx = r10k_simplify_address (newx, def_insn);
+		    }
+		}
+	    }
+	}
+    }
+  if (newx && r10k_simplified_address_p (newx))
+    return newx;
+  return x;
+}
+
+/* Return true if ADDRESS is known to be an uncached address
+   on R10K systems.  */
+
+static bool
+r10k_uncached_address_p (unsigned HOST_WIDE_INT address)
+{
+  unsigned HOST_WIDE_INT upper;
+
+  /* Check for KSEG1.  */
+  if (address + 0x60000000 < 0x20000000)
+    return true;
+
+  /* Check for uncached XKPHYS addresses.  */
+  if (Pmode == DImode)
+    {
+      upper = (address >> 40) & 0xf9ffff;
+      if (upper == 0x900000 || upper == 0xb80000)
+	return true;
+    }
+  return false;
+}
+
+/* Return true if we can prove that an access to address X in instruction
+   INSN would be safe from R10K speculation.  This X is a general
+   expression; it might not be a legitimate address.  */
+
+static bool
+r10k_safe_address_p (rtx x, rtx insn)
+{
+  rtx base, offset;
+  HOST_WIDE_INT offset_val;
+
+  x = r10k_simplify_address (x, insn);
+
+  /* Check for references to the stack frame.  It doesn't really matter
+     how much of the frame has been allocated at INSN; -mr10k-cache-barrier
+     allows us to assume that accesses to any part of the eventual frame
+     is safe from speculation at any point in the function.  */
+  mips_split_plus (x, &base, &offset_val);
+  if (base == virtual_incoming_args_rtx
+      && offset_val >= -cfun->machine->frame.total_size
+      && offset_val < cfun->machine->frame.args_size)
+    return true;
+
+  /* Check for uncached addresses.  */
+  if (CONST_INT_P (x))
+    return r10k_uncached_address_p (INTVAL (x));
+
+  /* Check for accesses to a static object.  */
+  split_const (x, &base, &offset);
+  return offset_within_block_p (base, INTVAL (offset));
+}
+
+/* Return true if a MEM with MEM_EXPR EXPR and MEM_OFFSET OFFSET is
+   an in-range access to an automatic variable, or to an object with
+   a link-time-constant address.  */
+
+static bool
+r10k_safe_mem_expr_p (tree expr, unsigned HOST_WIDE_INT offset)
+{
+  HOST_WIDE_INT bitoffset, bitsize;
+  tree inner, var_offset;
+  enum machine_mode mode;
+  int unsigned_p, volatile_p;
+
+  inner = get_inner_reference (expr, &bitsize, &bitoffset, &var_offset, &mode,
+			       &unsigned_p, &volatile_p, false);
+  if (!DECL_P (inner) || !DECL_SIZE_UNIT (inner) || var_offset)
+    return false;
+
+  offset += bitoffset / BITS_PER_UNIT;
+  return offset < tree_to_uhwi (DECL_SIZE_UNIT (inner));
+}
+
+/* A for_each_rtx callback for which DATA points to the instruction
+   containing *X.  Stop the search if we find a MEM that is not safe
+   from R10K speculation.  */
+
+static int
+r10k_needs_protection_p_1 (rtx *loc, void *data)
+{
+  rtx mem;
+
+  mem = *loc;
+  if (!MEM_P (mem))
+    return 0;
+
+  if (MEM_EXPR (mem)
+      && MEM_OFFSET_KNOWN_P (mem)
+      && r10k_safe_mem_expr_p (MEM_EXPR (mem), MEM_OFFSET (mem)))
+    return -1;
+
+  if (r10k_safe_address_p (XEXP (mem, 0), (rtx) data))
+    return -1;
+
+  return 1;
+}
+
+/* A note_stores callback for which DATA points to an instruction pointer.
+   If *DATA is nonnull, make it null if it X contains a MEM that is not
+   safe from R10K speculation.  */
+
+static void
+r10k_needs_protection_p_store (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+			       void *data)
+{
+  rtx *insn_ptr;
+
+  insn_ptr = (rtx *) data;
+  if (*insn_ptr && for_each_rtx (&x, r10k_needs_protection_p_1, *insn_ptr))
+    *insn_ptr = NULL_RTX;
+}
+
+/* A for_each_rtx callback that iterates over the pattern of a CALL_INSN.
+   Return nonzero if the call is not to a declared function.  */
+
+static int
+r10k_needs_protection_p_call (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x;
+
+  x = *loc;
+  if (!MEM_P (x))
+    return 0;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DECL (x))
+    return -1;
+
+  return 1;
+}
+
+/* Return true if instruction INSN needs to be protected by an R10K
+   cache barrier.  */
+
+static bool
+r10k_needs_protection_p (rtx insn)
+{
+  if (CALL_P (insn))
+    return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_call, NULL);
+
+  if (mips_r10k_cache_barrier == R10K_CACHE_BARRIER_STORE)
+    {
+      note_stores (PATTERN (insn), r10k_needs_protection_p_store, &insn);
+      return insn == NULL_RTX;
+    }
+
+  return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_1, insn);
+}
+
+/* Return true if BB is only reached by blocks in PROTECTED_BBS and if every
+   edge is unconditional.  */
+
+static bool
+r10k_protected_bb_p (basic_block bb, sbitmap protected_bbs)
+{
+  edge_iterator ei;
+  edge e;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    if (!single_succ_p (e->src)
+	|| !bitmap_bit_p (protected_bbs, e->src->index)
+	|| (e->flags & EDGE_COMPLEX) != 0)
+      return false;
+  return true;
+}
+
+/* Implement -mr10k-cache-barrier= for the current function.  */
+
+static void
+r10k_insert_cache_barriers (void)
+{
+  int *rev_post_order;
+  unsigned int i, n;
+  basic_block bb;
+  sbitmap protected_bbs;
+  rtx insn, end, unprotected_region;
+
+  if (TARGET_MIPS16)
+    {
+      sorry ("%qs does not support MIPS16 code", "-mr10k-cache-barrier");
+      return;
+    }
+
+  /* Calculate dominators.  */
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  /* Bit X of PROTECTED_BBS is set if the last operation in basic block
+     X is protected by a cache barrier.  */
+  protected_bbs = sbitmap_alloc (last_basic_block_for_fn (cfun));
+  bitmap_clear (protected_bbs);
+
+  /* Iterate over the basic blocks in reverse post-order.  */
+  rev_post_order = XNEWVEC (int, last_basic_block_for_fn (cfun));
+  n = pre_and_rev_post_order_compute (NULL, rev_post_order, false);
+  for (i = 0; i < n; i++)
+    {
+      bb = BASIC_BLOCK_FOR_FN (cfun, rev_post_order[i]);
+
+      /* If this block is only reached by unconditional edges, and if the
+	 source of every edge is protected, the beginning of the block is
+	 also protected.  */
+      if (r10k_protected_bb_p (bb, protected_bbs))
+	unprotected_region = NULL_RTX;
+      else
+	unprotected_region = pc_rtx;
+      end = NEXT_INSN (BB_END (bb));
+
+      /* UNPROTECTED_REGION is:
+
+	 - null if we are processing a protected region,
+	 - pc_rtx if we are processing an unprotected region but have
+	   not yet found the first instruction in it
+	 - the first instruction in an unprotected region otherwise.  */
+      for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
+	{
+	  if (unprotected_region && USEFUL_INSN_P (insn))
+	    {
+	      if (recog_memoized (insn) == CODE_FOR_mips_cache)
+		/* This CACHE instruction protects the following code.  */
+		unprotected_region = NULL_RTX;
+	      else
+		{
+		  /* See if INSN is the first instruction in this
+		     unprotected region.  */
+		  if (unprotected_region == pc_rtx)
+		    unprotected_region = insn;
+
+		  /* See if INSN needs to be protected.  If so,
+		     we must insert a cache barrier somewhere between
+		     PREV_INSN (UNPROTECTED_REGION) and INSN.  It isn't
+		     clear which position is better performance-wise,
+		     but as a tie-breaker, we assume that it is better
+		     to allow delay slots to be back-filled where
+		     possible, and that it is better not to insert
+		     barriers in the middle of already-scheduled code.
+		     We therefore insert the barrier at the beginning
+		     of the region.  */
+		  if (r10k_needs_protection_p (insn))
+		    {
+		      emit_insn_before (gen_r10k_cache_barrier (),
+					unprotected_region);
+		      unprotected_region = NULL_RTX;
+		    }
+		}
+	    }
+
+	  if (CALL_P (insn))
+	    /* The called function is not required to protect the exit path.
+	       The code that follows a call is therefore unprotected.  */
+	    unprotected_region = pc_rtx;
+	}
+
+      /* Record whether the end of this block is protected.  */
+      if (unprotected_region == NULL_RTX)
+	bitmap_set_bit (protected_bbs, bb->index);
+    }
+  XDELETEVEC (rev_post_order);
+
+  sbitmap_free (protected_bbs);
+
+  free_dominance_info (CDI_DOMINATORS);
+}
+
+/* If INSN is a call, return the underlying CALL expr.  Return NULL_RTX
+   otherwise.  If INSN has two call rtx, then store the second one in
+   SECOND_CALL.  */
+
+static rtx
+mips_call_expr_from_insn (rtx insn, rtx *second_call)
+{
+  rtx x;
+  rtx x2;
+
+  if (!CALL_P (insn))
+    return NULL_RTX;
+
+  x = PATTERN (insn);
+  if (GET_CODE (x) == PARALLEL)
+    {
+      /* Calls returning complex values have two CALL rtx.  Look for the second
+	 one here, and return it via the SECOND_CALL arg.  */
+      x2 = XVECEXP (x, 0, 1);
+      if (GET_CODE (x2) == SET)
+	x2 = XEXP (x2, 1);
+      if (GET_CODE (x2) == CALL)
+	*second_call = x2;
+
+      x = XVECEXP (x, 0, 0);
+    }
+  if (GET_CODE (x) == SET)
+    x = XEXP (x, 1);
+  gcc_assert (GET_CODE (x) == CALL);
+
+  return x;
+}
+
+/* REG is set in DEF.  See if the definition is one of the ways we load a
+   register with a symbol address for a mips_use_pic_fn_addr_reg_p call.
+   If it is, return the symbol reference of the function, otherwise return
+   NULL_RTX.
+
+   If RECURSE_P is true, use mips_find_pic_call_symbol to interpret
+   the values of source registers, otherwise treat such registers as
+   having an unknown value.  */
+
+static rtx
+mips_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p)
+{
+  rtx def_insn, set;
+
+  if (DF_REF_IS_ARTIFICIAL (def))
+    return NULL_RTX;
+
+  def_insn = DF_REF_INSN (def);
+  set = single_set (def_insn);
+  if (set && rtx_equal_p (SET_DEST (set), reg))
+    {
+      rtx note, src, symbol;
+
+      /* First see whether the source is a plain symbol.  This is used
+	 when calling symbols that are not lazily bound.  */
+      src = SET_SRC (set);
+      if (GET_CODE (src) == SYMBOL_REF)
+	return src;
+
+      /* Handle %call16 references.  */
+      symbol = mips_strip_unspec_call (src);
+      if (symbol)
+	{
+	  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+	  return symbol;
+	}
+
+      /* If we have something more complicated, look for a
+	 REG_EQUAL or REG_EQUIV note.  */
+      note = find_reg_equal_equiv_note (def_insn);
+      if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF)
+	return XEXP (note, 0);
+
+      /* Follow at most one simple register copy.  Such copies are
+	 interesting in cases like:
+
+	     for (...)
+	       {
+	         locally_binding_fn (...);
+	       }
+
+	 and:
+
+	     locally_binding_fn (...);
+	     ...
+	     locally_binding_fn (...);
+
+	 where the load of locally_binding_fn can legitimately be
+	 hoisted or shared.  However, we do not expect to see complex
+	 chains of copies, so a full worklist solution to the problem
+	 would probably be overkill.  */
+      if (recurse_p && REG_P (src))
+	return mips_find_pic_call_symbol (def_insn, src, false);
+    }
+
+  return NULL_RTX;
+}
+
+/* Find the definition of the use of REG in INSN.  See if the definition
+   is one of the ways we load a register with a symbol address for a
+   mips_use_pic_fn_addr_reg_p call.  If it is return the symbol reference
+   of the function, otherwise return NULL_RTX.  RECURSE_P is as for
+   mips_pic_call_symbol_from_set.  */
+
+static rtx
+mips_find_pic_call_symbol (rtx insn, rtx reg, bool recurse_p)
+{
+  df_ref use;
+  struct df_link *defs;
+  rtx symbol;
+
+  use = df_find_use (insn, regno_reg_rtx[REGNO (reg)]);
+  if (!use)
+    return NULL_RTX;
+  defs = DF_REF_CHAIN (use);
+  if (!defs)
+    return NULL_RTX;
+  symbol = mips_pic_call_symbol_from_set (defs->ref, reg, recurse_p);
+  if (!symbol)
+    return NULL_RTX;
+
+  /* If we have more than one definition, they need to be identical.  */
+  for (defs = defs->next; defs; defs = defs->next)
+    {
+      rtx other;
+
+      other = mips_pic_call_symbol_from_set (defs->ref, reg, recurse_p);
+      if (!rtx_equal_p (symbol, other))
+	return NULL_RTX;
+    }
+
+  return symbol;
+}
+
+/* Replace the args_size operand of the call expression CALL with the
+   call-attribute UNSPEC and fill in SYMBOL as the function symbol.  */
+
+static void
+mips_annotate_pic_call_expr (rtx call, rtx symbol)
+{
+  rtx args_size;
+
+  args_size = XEXP (call, 1);
+  XEXP (call, 1) = gen_rtx_UNSPEC (GET_MODE (args_size),
+				   gen_rtvec (2, args_size, symbol),
+				   UNSPEC_CALL_ATTR);
+}
+
+/* OPERANDS[ARGS_SIZE_OPNO] is the arg_size operand of a CALL expression.  See
+   if instead of the arg_size argument it contains the call attributes.  If
+   yes return true along with setting OPERANDS[ARGS_SIZE_OPNO] to the function
+   symbol from the call attributes.  Also return false if ARGS_SIZE_OPNO is
+   -1.  */
+
+bool
+mips_get_pic_call_symbol (rtx *operands, int args_size_opno)
+{
+  rtx args_size, symbol;
+
+  if (!TARGET_RELAX_PIC_CALLS || args_size_opno == -1)
+    return false;
+
+  args_size = operands[args_size_opno];
+  if (GET_CODE (args_size) != UNSPEC)
+    return false;
+  gcc_assert (XINT (args_size, 1) == UNSPEC_CALL_ATTR);
+
+  symbol = XVECEXP (args_size, 0, 1);
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+  operands[args_size_opno] = symbol;
+  return true;
+}
+
+/* Use DF to annotate PIC indirect calls with the function symbol they
+   dispatch to.  */
+
+static void
+mips_annotate_pic_calls (void)
+{
+  basic_block bb;
+  rtx insn;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    FOR_BB_INSNS (bb, insn)
+    {
+      rtx call, reg, symbol, second_call;
+
+      second_call = 0;
+      call = mips_call_expr_from_insn (insn, &second_call);
+      if (!call)
+	continue;
+      gcc_assert (MEM_P (XEXP (call, 0)));
+      reg = XEXP (XEXP (call, 0), 0);
+      if (!REG_P (reg))
+	continue;
+
+      symbol = mips_find_pic_call_symbol (insn, reg, true);
+      if (symbol)
+	{
+	  mips_annotate_pic_call_expr (call, symbol);
+	  if (second_call)
+	    mips_annotate_pic_call_expr (second_call, symbol);
+	}
+    }
+}
+
+/* A temporary variable used by for_each_rtx callbacks, etc.  */
+static rtx mips_sim_insn;
+
+/* A structure representing the state of the processor pipeline.
+   Used by the mips_sim_* family of functions.  */
+struct mips_sim {
+  /* The maximum number of instructions that can be issued in a cycle.
+     (Caches mips_issue_rate.)  */
+  unsigned int issue_rate;
+
+  /* The current simulation time.  */
+  unsigned int time;
+
+  /* How many more instructions can be issued in the current cycle.  */
+  unsigned int insns_left;
+
+  /* LAST_SET[X].INSN is the last instruction to set register X.
+     LAST_SET[X].TIME is the time at which that instruction was issued.
+     INSN is null if no instruction has yet set register X.  */
+  struct {
+    rtx insn;
+    unsigned int time;
+  } last_set[FIRST_PSEUDO_REGISTER];
+
+  /* The pipeline's current DFA state.  */
+  state_t dfa_state;
+};
+
+/* Reset STATE to the initial simulation state.  */
+
+static void
+mips_sim_reset (struct mips_sim *state)
+{
+  curr_state = state->dfa_state;
+
+  state->time = 0;
+  state->insns_left = state->issue_rate;
+  memset (&state->last_set, 0, sizeof (state->last_set));
+  state_reset (curr_state);
+
+  targetm.sched.init (0, false, 0);
+  advance_state (curr_state);
+}
+
+/* Initialize STATE before its first use.  DFA_STATE points to an
+   allocated but uninitialized DFA state.  */
+
+static void
+mips_sim_init (struct mips_sim *state, state_t dfa_state)
+{
+  if (targetm.sched.init_dfa_pre_cycle_insn)
+    targetm.sched.init_dfa_pre_cycle_insn ();
+
+  if (targetm.sched.init_dfa_post_cycle_insn)
+    targetm.sched.init_dfa_post_cycle_insn ();
+
+  state->issue_rate = mips_issue_rate ();
+  state->dfa_state = dfa_state;
+  mips_sim_reset (state);
+}
+
+/* Advance STATE by one clock cycle.  */
+
+static void
+mips_sim_next_cycle (struct mips_sim *state)
+{
+  curr_state = state->dfa_state;
+
+  state->time++;
+  state->insns_left = state->issue_rate;
+  advance_state (curr_state);
+}
+
+/* Advance simulation state STATE until instruction INSN can read
+   register REG.  */
+
+static void
+mips_sim_wait_reg (struct mips_sim *state, rtx insn, rtx reg)
+{
+  unsigned int regno, end_regno;
+
+  end_regno = END_REGNO (reg);
+  for (regno = REGNO (reg); regno < end_regno; regno++)
+    if (state->last_set[regno].insn != 0)
+      {
+	unsigned int t;
+
+	t = (state->last_set[regno].time
+	     + insn_latency (state->last_set[regno].insn, insn));
+	while (state->time < t)
+	  mips_sim_next_cycle (state);
+    }
+}
+
+/* A for_each_rtx callback.  If *X is a register, advance simulation state
+   DATA until mips_sim_insn can read the register's value.  */
+
+static int
+mips_sim_wait_regs_2 (rtx *x, void *data)
+{
+  if (REG_P (*x))
+    mips_sim_wait_reg ((struct mips_sim *) data, mips_sim_insn, *x);
+  return 0;
+}
+
+/* Call mips_sim_wait_regs_2 (R, DATA) for each register R mentioned in *X.  */
+
+static void
+mips_sim_wait_regs_1 (rtx *x, void *data)
+{
+  for_each_rtx (x, mips_sim_wait_regs_2, data);
+}
+
+/* Advance simulation state STATE until all of INSN's register
+   dependencies are satisfied.  */
+
+static void
+mips_sim_wait_regs (struct mips_sim *state, rtx insn)
+{
+  mips_sim_insn = insn;
+  note_uses (&PATTERN (insn), mips_sim_wait_regs_1, state);
+}
+
+/* Advance simulation state STATE until the units required by
+   instruction INSN are available.  */
+
+static void
+mips_sim_wait_units (struct mips_sim *state, rtx insn)
+{
+  state_t tmp_state;
+
+  tmp_state = alloca (state_size ());
+  while (state->insns_left == 0
+	 || (memcpy (tmp_state, state->dfa_state, state_size ()),
+	     state_transition (tmp_state, insn) >= 0))
+    mips_sim_next_cycle (state);
+}
+
+/* Advance simulation state STATE until INSN is ready to issue.  */
+
+static void
+mips_sim_wait_insn (struct mips_sim *state, rtx insn)
+{
+  mips_sim_wait_regs (state, insn);
+  mips_sim_wait_units (state, insn);
+}
+
+/* mips_sim_insn has just set X.  Update the LAST_SET array
+   in simulation state DATA.  */
+
+static void
+mips_sim_record_set (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  struct mips_sim *state;
+
+  state = (struct mips_sim *) data;
+  if (REG_P (x))
+    {
+      unsigned int regno, end_regno;
+
+      end_regno = END_REGNO (x);
+      for (regno = REGNO (x); regno < end_regno; regno++)
+	{
+	  state->last_set[regno].insn = mips_sim_insn;
+	  state->last_set[regno].time = state->time;
+	}
+    }
+}
+
+/* Issue instruction INSN in scheduler state STATE.  Assume that INSN
+   can issue immediately (i.e., that mips_sim_wait_insn has already
+   been called).  */
+
+static void
+mips_sim_issue_insn (struct mips_sim *state, rtx insn)
+{
+  curr_state = state->dfa_state;
+
+  state_transition (curr_state, insn);
+  state->insns_left = targetm.sched.variable_issue (0, false, insn,
+						    state->insns_left);
+
+  mips_sim_insn = insn;
+  note_stores (PATTERN (insn), mips_sim_record_set, state);
+}
+
+/* Simulate issuing a NOP in state STATE.  */
+
+static void
+mips_sim_issue_nop (struct mips_sim *state)
+{
+  if (state->insns_left == 0)
+    mips_sim_next_cycle (state);
+  state->insns_left--;
+}
+
+/* Update simulation state STATE so that it's ready to accept the instruction
+   after INSN.  INSN should be part of the main rtl chain, not a member of a
+   SEQUENCE.  */
+
+static void
+mips_sim_finish_insn (struct mips_sim *state, rtx insn)
+{
+  /* If INSN is a jump with an implicit delay slot, simulate a nop.  */
+  if (JUMP_P (insn))
+    mips_sim_issue_nop (state);
+
+  switch (GET_CODE (SEQ_BEGIN (insn)))
+    {
+    case CODE_LABEL:
+    case CALL_INSN:
+      /* We can't predict the processor state after a call or label.  */
+      mips_sim_reset (state);
+      break;
+
+    case JUMP_INSN:
+      /* The delay slots of branch likely instructions are only executed
+	 when the branch is taken.  Therefore, if the caller has simulated
+	 the delay slot instruction, STATE does not really reflect the state
+	 of the pipeline for the instruction after the delay slot.  Also,
+	 branch likely instructions tend to incur a penalty when not taken,
+	 so there will probably be an extra delay between the branch and
+	 the instruction after the delay slot.  */
+      if (INSN_ANNULLED_BRANCH_P (SEQ_BEGIN (insn)))
+	mips_sim_reset (state);
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Use simulator state STATE to calculate the execution time of
+   instruction sequence SEQ.  */
+
+static unsigned int
+mips_seq_time (struct mips_sim *state, rtx seq)
+{
+  mips_sim_reset (state);
+  for (rtx insn = seq; insn; insn = NEXT_INSN (insn))
+    {
+      mips_sim_wait_insn (state, insn);
+      mips_sim_issue_insn (state, insn);
+    }
+  return state->time;
+}
+
+/* Return the execution-time cost of mips_tuning_info.fast_mult_zero_zero_p
+   setting SETTING, using STATE to simulate instruction sequences.  */
+
+static unsigned int
+mips_mult_zero_zero_cost (struct mips_sim *state, bool setting)
+{
+  mips_tuning_info.fast_mult_zero_zero_p = setting;
+  start_sequence ();
+
+  enum machine_mode dword_mode = TARGET_64BIT ? TImode : DImode;
+  rtx hilo = gen_rtx_REG (dword_mode, MD_REG_FIRST);
+  mips_emit_move_or_split (hilo, const0_rtx, SPLIT_FOR_SPEED);
+
+  /* If the target provides mulsidi3_32bit then that's the most likely
+     consumer of the result.  Test for bypasses.  */
+  if (dword_mode == DImode && HAVE_maddsidi4)
+    {
+      rtx gpr = gen_rtx_REG (SImode, GP_REG_FIRST + 4);
+      emit_insn (gen_maddsidi4 (hilo, gpr, gpr, hilo));
+    }
+
+  unsigned int time = mips_seq_time (state, get_insns ());
+  end_sequence ();
+  return time;
+}
+
+/* Check the relative speeds of "MULT $0,$0" and "MTLO $0; MTHI $0"
+   and set up mips_tuning_info.fast_mult_zero_zero_p accordingly.
+   Prefer MULT -- which is shorter -- in the event of a tie.  */
+
+static void
+mips_set_fast_mult_zero_zero_p (struct mips_sim *state)
+{
+  if (TARGET_MIPS16)
+    /* No MTLO or MTHI available.  */
+    mips_tuning_info.fast_mult_zero_zero_p = true;
+  else
+    {
+      unsigned int true_time = mips_mult_zero_zero_cost (state, true);
+      unsigned int false_time = mips_mult_zero_zero_cost (state, false);
+      mips_tuning_info.fast_mult_zero_zero_p = (true_time <= false_time);
+    }
+}
+
+/* Set up costs based on the current architecture and tuning settings.  */
+
+static void
+mips_set_tuning_info (void)
+{
+  if (mips_tuning_info.initialized_p
+      && mips_tuning_info.arch == mips_arch
+      && mips_tuning_info.tune == mips_tune
+      && mips_tuning_info.mips16_p == TARGET_MIPS16)
+    return;
+
+  mips_tuning_info.arch = mips_arch;
+  mips_tuning_info.tune = mips_tune;
+  mips_tuning_info.mips16_p = TARGET_MIPS16;
+  mips_tuning_info.initialized_p = true;
+
+  dfa_start ();
+
+  struct mips_sim state;
+  mips_sim_init (&state, alloca (state_size ()));
+
+  mips_set_fast_mult_zero_zero_p (&state);
+
+  dfa_finish ();
+}
+
+/* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
+
+static void
+mips_expand_to_rtl_hook (void)
+{
+  /* We need to call this at a point where we can safely create sequences
+     of instructions, so TARGET_OVERRIDE_OPTIONS is too early.  We also
+     need to call it at a point where the DFA infrastructure is not
+     already in use, so we can't just call it lazily on demand.
+
+     At present, mips_tuning_info is only needed during post-expand
+     RTL passes such as split_insns, so this hook should be early enough.
+     We may need to move the call elsewhere if mips_tuning_info starts
+     to be used for other things (such as rtx_costs, or expanders that
+     could be called during gimple optimization).  */
+  mips_set_tuning_info ();
+}
+
+/* The VR4130 pipeline issues aligned pairs of instructions together,
+   but it stalls the second instruction if it depends on the first.
+   In order to cut down the amount of logic required, this dependence
+   check is not based on a full instruction decode.  Instead, any non-SPECIAL
+   instruction is assumed to modify the register specified by bits 20-16
+   (which is usually the "rt" field).
+
+   In BEQ, BEQL, BNE and BNEL instructions, the rt field is actually an
+   input, so we can end up with a false dependence between the branch
+   and its delay slot.  If this situation occurs in instruction INSN,
+   try to avoid it by swapping rs and rt.  */
+
+static void
+vr4130_avoid_branch_rt_conflict (rtx insn)
+{
+  rtx first, second;
+
+  first = SEQ_BEGIN (insn);
+  second = SEQ_END (insn);
+  if (JUMP_P (first)
+      && NONJUMP_INSN_P (second)
+      && GET_CODE (PATTERN (first)) == SET
+      && GET_CODE (SET_DEST (PATTERN (first))) == PC
+      && GET_CODE (SET_SRC (PATTERN (first))) == IF_THEN_ELSE)
+    {
+      /* Check for the right kind of condition.  */
+      rtx cond = XEXP (SET_SRC (PATTERN (first)), 0);
+      if ((GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
+	  && REG_P (XEXP (cond, 0))
+	  && REG_P (XEXP (cond, 1))
+	  && reg_referenced_p (XEXP (cond, 1), PATTERN (second))
+	  && !reg_referenced_p (XEXP (cond, 0), PATTERN (second)))
+	{
+	  /* SECOND mentions the rt register but not the rs register.  */
+	  rtx tmp = XEXP (cond, 0);
+	  XEXP (cond, 0) = XEXP (cond, 1);
+	  XEXP (cond, 1) = tmp;
+	}
+    }
+}
+
+/* Implement -mvr4130-align.  Go through each basic block and simulate the
+   processor pipeline.  If we find that a pair of instructions could execute
+   in parallel, and the first of those instructions is not 8-byte aligned,
+   insert a nop to make it aligned.  */
+
+static void
+vr4130_align_insns (void)
+{
+  struct mips_sim state;
+  rtx insn, subinsn, last, last2, next;
+  bool aligned_p;
+
+  dfa_start ();
+
+  /* LAST is the last instruction before INSN to have a nonzero length.
+     LAST2 is the last such instruction before LAST.  */
+  last = 0;
+  last2 = 0;
+
+  /* ALIGNED_P is true if INSN is known to be at an aligned address.  */
+  aligned_p = true;
+
+  mips_sim_init (&state, alloca (state_size ()));
+  for (insn = get_insns (); insn != 0; insn = next)
+    {
+      unsigned int length;
+
+      next = NEXT_INSN (insn);
+
+      /* See the comment above vr4130_avoid_branch_rt_conflict for details.
+	 This isn't really related to the alignment pass, but we do it on
+	 the fly to avoid a separate instruction walk.  */
+      vr4130_avoid_branch_rt_conflict (insn);
+
+      length = get_attr_length (insn);
+      if (length > 0 && USEFUL_INSN_P (insn))
+	FOR_EACH_SUBINSN (subinsn, insn)
+	  {
+	    mips_sim_wait_insn (&state, subinsn);
+
+	    /* If we want this instruction to issue in parallel with the
+	       previous one, make sure that the previous instruction is
+	       aligned.  There are several reasons why this isn't worthwhile
+	       when the second instruction is a call:
+
+	          - Calls are less likely to be performance critical,
+		  - There's a good chance that the delay slot can execute
+		    in parallel with the call.
+	          - The return address would then be unaligned.
+
+	       In general, if we're going to insert a nop between instructions
+	       X and Y, it's better to insert it immediately after X.  That
+	       way, if the nop makes Y aligned, it will also align any labels
+	       between X and Y.  */
+	    if (state.insns_left != state.issue_rate
+		&& !CALL_P (subinsn))
+	      {
+		if (subinsn == SEQ_BEGIN (insn) && aligned_p)
+		  {
+		    /* SUBINSN is the first instruction in INSN and INSN is
+		       aligned.  We want to align the previous instruction
+		       instead, so insert a nop between LAST2 and LAST.
+
+		       Note that LAST could be either a single instruction
+		       or a branch with a delay slot.  In the latter case,
+		       LAST, like INSN, is already aligned, but the delay
+		       slot must have some extra delay that stops it from
+		       issuing at the same time as the branch.  We therefore
+		       insert a nop before the branch in order to align its
+		       delay slot.  */
+		    gcc_assert (last2);
+		    emit_insn_after (gen_nop (), last2);
+		    aligned_p = false;
+		  }
+		else if (subinsn != SEQ_BEGIN (insn) && !aligned_p)
+		  {
+		    /* SUBINSN is the delay slot of INSN, but INSN is
+		       currently unaligned.  Insert a nop between
+		       LAST and INSN to align it.  */
+		    gcc_assert (last);
+		    emit_insn_after (gen_nop (), last);
+		    aligned_p = true;
+		  }
+	      }
+	    mips_sim_issue_insn (&state, subinsn);
+	  }
+      mips_sim_finish_insn (&state, insn);
+
+      /* Update LAST, LAST2 and ALIGNED_P for the next instruction.  */
+      length = get_attr_length (insn);
+      if (length > 0)
+	{
+	  /* If the instruction is an asm statement or multi-instruction
+	     mips.md patern, the length is only an estimate.  Insert an
+	     8 byte alignment after it so that the following instructions
+	     can be handled correctly.  */
+	  if (NONJUMP_INSN_P (SEQ_BEGIN (insn))
+	      && (recog_memoized (insn) < 0 || length >= 8))
+	    {
+	      next = emit_insn_after (gen_align (GEN_INT (3)), insn);
+	      next = NEXT_INSN (next);
+	      mips_sim_next_cycle (&state);
+	      aligned_p = true;
+	    }
+	  else if (length & 4)
+	    aligned_p = !aligned_p;
+	  last2 = last;
+	  last = insn;
+	}
+
+      /* See whether INSN is an aligned label.  */
+      if (LABEL_P (insn) && label_to_alignment (insn) >= 3)
+	aligned_p = true;
+    }
+  dfa_finish ();
+}
+
+/* This structure records that the current function has a LO_SUM
+   involving SYMBOL_REF or LABEL_REF BASE and that MAX_OFFSET is
+   the largest offset applied to BASE by all such LO_SUMs.  */
+struct mips_lo_sum_offset {
+  rtx base;
+  HOST_WIDE_INT offset;
+};
+
+/* Return a hash value for SYMBOL_REF or LABEL_REF BASE.  */
+
+static hashval_t
+mips_hash_base (rtx base)
+{
+  int do_not_record_p;
+
+  return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false);
+}
+
+/* Hashtable helpers.  */
+
+struct mips_lo_sum_offset_hasher : typed_free_remove <mips_lo_sum_offset>
+{
+  typedef mips_lo_sum_offset value_type;
+  typedef rtx_def compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
+
+/* Hash-table callbacks for mips_lo_sum_offsets.  */
+
+inline hashval_t
+mips_lo_sum_offset_hasher::hash (const value_type *entry)
+{
+  return mips_hash_base (entry->base);
+}
+
+inline bool
+mips_lo_sum_offset_hasher::equal (const value_type *entry,
+				  const compare_type *value)
+{
+  return rtx_equal_p (entry->base, value);
+}
+
+typedef hash_table <mips_lo_sum_offset_hasher> mips_offset_table;
+
+/* Look up symbolic constant X in HTAB, which is a hash table of
+   mips_lo_sum_offsets.  If OPTION is NO_INSERT, return true if X can be
+   paired with a recorded LO_SUM, otherwise record X in the table.  */
+
+static bool
+mips_lo_sum_offset_lookup (mips_offset_table htab, rtx x,
+			   enum insert_option option)
+{
+  rtx base, offset;
+  mips_lo_sum_offset **slot;
+  struct mips_lo_sum_offset *entry;
+
+  /* Split X into a base and offset.  */
+  split_const (x, &base, &offset);
+  if (UNSPEC_ADDRESS_P (base))
+    base = UNSPEC_ADDRESS (base);
+
+  /* Look up the base in the hash table.  */
+  slot = htab.find_slot_with_hash (base, mips_hash_base (base), option);
+  if (slot == NULL)
+    return false;
+
+  entry = (struct mips_lo_sum_offset *) *slot;
+  if (option == INSERT)
+    {
+      if (entry == NULL)
+	{
+	  entry = XNEW (struct mips_lo_sum_offset);
+	  entry->base = base;
+	  entry->offset = INTVAL (offset);
+	  *slot = entry;
+	}
+      else
+	{
+	  if (INTVAL (offset) > entry->offset)
+	    entry->offset = INTVAL (offset);
+	}
+    }
+  return INTVAL (offset) <= entry->offset;
+}
+
+/* A for_each_rtx callback for which DATA is a mips_lo_sum_offset hash table.
+   Record every LO_SUM in *LOC.  */
+
+static int
+mips_record_lo_sum (rtx *loc, void *data)
+{
+  if (GET_CODE (*loc) == LO_SUM)
+    mips_lo_sum_offset_lookup (*(mips_offset_table*) data,
+			       XEXP (*loc, 1), INSERT);
+  return 0;
+}
+
+/* Return true if INSN is a SET of an orphaned high-part relocation.
+   HTAB is a hash table of mips_lo_sum_offsets that describes all the
+   LO_SUMs in the current function.  */
+
+static bool
+mips_orphaned_high_part_p (mips_offset_table htab, rtx insn)
+{
+  enum mips_symbol_type type;
+  rtx x, set;
+
+  set = single_set (insn);
+  if (set)
+    {
+      /* Check for %his.  */
+      x = SET_SRC (set);
+      if (GET_CODE (x) == HIGH
+	  && absolute_symbolic_operand (XEXP (x, 0), VOIDmode))
+	return !mips_lo_sum_offset_lookup (htab, XEXP (x, 0), NO_INSERT);
+
+      /* Check for local %gots (and %got_pages, which is redundant but OK).  */
+      if (GET_CODE (x) == UNSPEC
+	  && XINT (x, 1) == UNSPEC_LOAD_GOT
+	  && mips_symbolic_constant_p (XVECEXP (x, 0, 1),
+				       SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_GOTOFF_PAGE)
+	return !mips_lo_sum_offset_lookup (htab, XVECEXP (x, 0, 1), NO_INSERT);
+    }
+  return false;
+}
+
+/* Subroutine of mips_reorg_process_insns.  If there is a hazard between
+   INSN and a previous instruction, avoid it by inserting nops after
+   instruction AFTER.
+
+   *DELAYED_REG and *HILO_DELAY describe the hazards that apply at
+   this point.  If *DELAYED_REG is non-null, INSN must wait a cycle
+   before using the value of that register.  *HILO_DELAY counts the
+   number of instructions since the last hilo hazard (that is,
+   the number of instructions since the last MFLO or MFHI).
+
+   After inserting nops for INSN, update *DELAYED_REG and *HILO_DELAY
+   for the next instruction.
+
+   LO_REG is an rtx for the LO register, used in dependence checking.  */
+
+static void
+mips_avoid_hazard (rtx after, rtx insn, int *hilo_delay,
+		   rtx *delayed_reg, rtx lo_reg)
+{
+  rtx pattern, set;
+  int nops, ninsns;
+
+  pattern = PATTERN (insn);
+
+  /* Do not put the whole function in .set noreorder if it contains
+     an asm statement.  We don't know whether there will be hazards
+     between the asm statement and the gcc-generated code.  */
+  if (GET_CODE (pattern) == ASM_INPUT || asm_noperands (pattern) >= 0)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Ignore zero-length instructions (barriers and the like).  */
+  ninsns = get_attr_length (insn) / 4;
+  if (ninsns == 0)
+    return;
+
+  /* Work out how many nops are needed.  Note that we only care about
+     registers that are explicitly mentioned in the instruction's pattern.
+     It doesn't matter that calls use the argument registers or that they
+     clobber hi and lo.  */
+  if (*hilo_delay < 2 && reg_set_p (lo_reg, pattern))
+    nops = 2 - *hilo_delay;
+  else if (*delayed_reg != 0 && reg_referenced_p (*delayed_reg, pattern))
+    nops = 1;
+  else
+    nops = 0;
+
+  /* Insert the nops between this instruction and the previous one.
+     Each new nop takes us further from the last hilo hazard.  */
+  *hilo_delay += nops;
+  while (nops-- > 0)
+    emit_insn_after (gen_hazard_nop (), after);
+
+  /* Set up the state for the next instruction.  */
+  *hilo_delay += ninsns;
+  *delayed_reg = 0;
+  if (INSN_CODE (insn) >= 0)
+    switch (get_attr_hazard (insn))
+      {
+      case HAZARD_NONE:
+	break;
+
+      case HAZARD_HILO:
+	*hilo_delay = 0;
+	break;
+
+      case HAZARD_DELAY:
+	set = single_set (insn);
+	gcc_assert (set);
+	*delayed_reg = SET_DEST (set);
+	break;
+      }
+}
+
+/* Go through the instruction stream and insert nops where necessary.
+   Also delete any high-part relocations whose partnering low parts
+   are now all dead.  See if the whole function can then be put into
+   .set noreorder and .set nomacro.  */
+
+static void
+mips_reorg_process_insns (void)
+{
+  rtx insn, last_insn, subinsn, next_insn, lo_reg, delayed_reg;
+  int hilo_delay;
+  mips_offset_table htab;
+
+  /* Force all instructions to be split into their final form.  */
+  split_all_insns_noflow ();
+
+  /* Recalculate instruction lengths without taking nops into account.  */
+  cfun->machine->ignore_hazard_length_p = true;
+  shorten_branches (get_insns ());
+
+  cfun->machine->all_noreorder_p = true;
+
+  /* We don't track MIPS16 PC-relative offsets closely enough to make
+     a good job of "set .noreorder" code in MIPS16 mode.  */
+  if (TARGET_MIPS16)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Code that doesn't use explicit relocs can't be ".set nomacro".  */
+  if (!TARGET_EXPLICIT_RELOCS)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Profiled functions can't be all noreorder because the profiler
+     support uses assembler macros.  */
+  if (crtl->profile)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Code compiled with -mfix-vr4120, -mfix-rm7000 or -mfix-24k can't be
+     all noreorder because we rely on the assembler to work around some
+     errata.  The R5900 too has several bugs.  */
+  if (TARGET_FIX_VR4120
+      || TARGET_FIX_RM7000
+      || TARGET_FIX_24K
+      || TARGET_MIPS5900)
+    cfun->machine->all_noreorder_p = false;
+
+  /* The same is true for -mfix-vr4130 if we might generate MFLO or
+     MFHI instructions.  Note that we avoid using MFLO and MFHI if
+     the VR4130 MACC and DMACC instructions are available instead;
+     see the *mfhilo_{si,di}_macc patterns.  */
+  if (TARGET_FIX_VR4130 && !ISA_HAS_MACCHI)
+    cfun->machine->all_noreorder_p = false;
+
+  htab.create (37);
+
+  /* Make a first pass over the instructions, recording all the LO_SUMs.  */
+  for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
+    FOR_EACH_SUBINSN (subinsn, insn)
+      if (USEFUL_INSN_P (subinsn))
+	{
+	  rtx body = PATTERN (insn);
+	  int noperands = asm_noperands (body);
+	  if (noperands >= 0)
+	    {
+	      rtx *ops = XALLOCAVEC (rtx, noperands);
+	      bool *used = XALLOCAVEC (bool, noperands);
+	      const char *string = decode_asm_operands (body, ops, NULL, NULL,
+							NULL, NULL);
+	      get_referenced_operands (string, used, noperands);
+	      for (int i = 0; i < noperands; ++i)
+		if (used[i])
+		  for_each_rtx (&ops[i], mips_record_lo_sum, &htab);
+	    }
+	  else
+	    for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, &htab);
+	}
+
+  last_insn = 0;
+  hilo_delay = 2;
+  delayed_reg = 0;
+  lo_reg = gen_rtx_REG (SImode, LO_REGNUM);
+
+  /* Make a second pass over the instructions.  Delete orphaned
+     high-part relocations or turn them into NOPs.  Avoid hazards
+     by inserting NOPs.  */
+  for (insn = get_insns (); insn != 0; insn = next_insn)
+    {
+      next_insn = NEXT_INSN (insn);
+      if (USEFUL_INSN_P (insn))
+	{
+	  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+	    {
+	      /* If we find an orphaned high-part relocation in a delay
+		 slot, it's easier to turn that instruction into a NOP than
+		 to delete it.  The delay slot will be a NOP either way.  */
+	      FOR_EACH_SUBINSN (subinsn, insn)
+		if (INSN_P (subinsn))
+		  {
+		    if (mips_orphaned_high_part_p (htab, subinsn))
+		      {
+			PATTERN (subinsn) = gen_nop ();
+			INSN_CODE (subinsn) = CODE_FOR_nop;
+		      }
+		    mips_avoid_hazard (last_insn, subinsn, &hilo_delay,
+				       &delayed_reg, lo_reg);
+		  }
+	      last_insn = insn;
+	    }
+	  else
+	    {
+	      /* INSN is a single instruction.  Delete it if it's an
+		 orphaned high-part relocation.  */
+	      if (mips_orphaned_high_part_p (htab, insn))
+		delete_insn (insn);
+	      /* Also delete cache barriers if the last instruction
+		 was an annulled branch.  INSN will not be speculatively
+		 executed.  */
+	      else if (recog_memoized (insn) == CODE_FOR_r10k_cache_barrier
+		       && last_insn
+		       && JUMP_P (SEQ_BEGIN (last_insn))
+		       && INSN_ANNULLED_BRANCH_P (SEQ_BEGIN (last_insn)))
+		delete_insn (insn);
+	      else
+		{
+		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
+				     &delayed_reg, lo_reg);
+		  last_insn = insn;
+		}
+	    }
+	}
+    }
+
+  htab.dispose ();
+}
+
+/* Return true if the function has a long branch instruction.  */
+
+static bool
+mips_has_long_branch_p (void)
+{
+  rtx insn, subinsn;
+  int normal_length;
+
+  /* We need up-to-date instruction lengths.  */
+  shorten_branches (get_insns ());
+
+  /* Look for a branch that is longer than normal.  The normal length for
+     non-MIPS16 branches is 8, because the length includes the delay slot.
+     It is 4 for MIPS16, because MIPS16 branches are extended instructions,
+     but they have no delay slot.  */
+  normal_length = (TARGET_MIPS16 ? 4 : 8);
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    FOR_EACH_SUBINSN (subinsn, insn)
+      if (JUMP_P (subinsn)
+	  && get_attr_length (subinsn) > normal_length
+	  && (any_condjump_p (subinsn) || any_uncondjump_p (subinsn)))
+	return true;
+
+  return false;
+}
+
+/* If we are using a GOT, but have not decided to use a global pointer yet,
+   see whether we need one to implement long branches.  Convert the ghost
+   global-pointer instructions into real ones if so.  */
+
+static bool
+mips_expand_ghost_gp_insns (void)
+{
+  /* Quick exit if we already know that we will or won't need a
+     global pointer.  */
+  if (!TARGET_USE_GOT
+      || cfun->machine->global_pointer == INVALID_REGNUM
+      || mips_must_initialize_gp_p ())
+    return false;
+
+  /* Run a full check for long branches.  */
+  if (!mips_has_long_branch_p ())
+    return false;
+
+  /* We've now established that we need $gp.  */
+  cfun->machine->must_initialize_gp_p = true;
+  split_all_insns_noflow ();
+
+  return true;
+}
+
+/* Subroutine of mips_reorg to manage passes that require DF.  */
+
+static void
+mips_df_reorg (void)
+{
+  /* Create def-use chains.  */
+  df_set_flags (DF_EQ_NOTES);
+  df_chain_add_problem (DF_UD_CHAIN);
+  df_analyze ();
+
+  if (TARGET_RELAX_PIC_CALLS)
+    mips_annotate_pic_calls ();
+
+  if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE)
+    r10k_insert_cache_barriers ();
+
+  df_finish_pass (false);
+}
+
+/* Emit code to load LABEL_REF SRC into MIPS16 register DEST.  This is
+   called very late in mips_reorg, but the caller is required to run
+   mips16_lay_out_constants on the result.  */
+
+static void
+mips16_load_branch_target (rtx dest, rtx src)
+{
+  if (TARGET_ABICALLS && !TARGET_ABSOLUTE_ABICALLS)
+    {
+      rtx page, low;
+
+      if (mips_cfun_has_cprestore_slot_p ())
+	mips_emit_move (dest, mips_cprestore_slot (dest, true));
+      else
+	mips_emit_move (dest, pic_offset_table_rtx);
+      page = mips_unspec_address (src, SYMBOL_GOTOFF_PAGE);
+      low = mips_unspec_address (src, SYMBOL_GOT_PAGE_OFST);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      PMODE_INSN (gen_unspec_got, (dest, page))));
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_LO_SUM (Pmode, dest, low)));
+    }
+  else
+    {
+      src = mips_unspec_address (src, SYMBOL_ABSOLUTE);
+      mips_emit_move (dest, src);
+    }
+}
+
+/* If we're compiling a MIPS16 function, look for and split any long branches.
+   This must be called after all other instruction modifications in
+   mips_reorg.  */
+
+static void
+mips16_split_long_branches (void)
+{
+  bool something_changed;
+
+  if (!TARGET_MIPS16)
+    return;
+
+  /* Loop until the alignments for all targets are sufficient.  */
+  do
+    {
+      rtx insn;
+
+      shorten_branches (get_insns ());
+      something_changed = false;
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if (JUMP_P (insn)
+	    && get_attr_length (insn) > 4
+	    && (any_condjump_p (insn) || any_uncondjump_p (insn)))
+	  {
+	    rtx old_label, new_label, temp, saved_temp;
+	    rtx target, jump, jump_sequence;
+
+	    start_sequence ();
+
+	    /* Free up a MIPS16 register by saving it in $1.  */
+	    saved_temp = gen_rtx_REG (Pmode, AT_REGNUM);
+	    temp = gen_rtx_REG (Pmode, GP_REG_FIRST + 2);
+	    emit_move_insn (saved_temp, temp);
+
+	    /* Load the branch target into TEMP.  */
+	    old_label = JUMP_LABEL (insn);
+	    target = gen_rtx_LABEL_REF (Pmode, old_label);
+	    mips16_load_branch_target (temp, target);
+
+	    /* Jump to the target and restore the register's
+	       original value.  */
+	    jump = emit_jump_insn (PMODE_INSN (gen_indirect_jump_and_restore,
+					       (temp, temp, saved_temp)));
+	    JUMP_LABEL (jump) = old_label;
+	    LABEL_NUSES (old_label)++;
+
+	    /* Rewrite any symbolic references that are supposed to use
+	       a PC-relative constant pool.  */
+	    mips16_lay_out_constants (false);
+
+	    if (simplejump_p (insn))
+	      /* We're going to replace INSN with a longer form.  */
+	      new_label = NULL_RTX;
+	    else
+	      {
+		/* Create a branch-around label for the original
+		   instruction.  */
+		new_label = gen_label_rtx ();
+		emit_label (new_label);
+	      }
+
+	    jump_sequence = get_insns ();
+	    end_sequence ();
+
+	    emit_insn_after (jump_sequence, insn);
+	    if (new_label)
+	      invert_jump (insn, new_label, false);
+	    else
+	      delete_insn (insn);
+	    something_changed = true;
+	  }
+    }
+  while (something_changed);
+}
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  */
+
+static void
+mips_reorg (void)
+{
+  /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF.  Also during
+     insn splitting in mips16_lay_out_constants, DF insn info is only kept up
+     to date if the CFG is available.  */
+  if (mips_cfg_in_reorg ())
+    compute_bb_for_insn ();
+  mips16_lay_out_constants (true);
+  if (mips_cfg_in_reorg ())
+    {
+      mips_df_reorg ();
+      free_bb_for_insn ();
+    }
+}
+
+/* We use a machine specific pass to do a second machine dependent reorg
+   pass after delay branch scheduling.  */
+
+static unsigned int
+mips_machine_reorg2 (void)
+{
+  mips_reorg_process_insns ();
+  if (!TARGET_MIPS16
+      && TARGET_EXPLICIT_RELOCS
+      && TUNE_MIPS4130
+      && TARGET_VR4130_ALIGN)
+    vr4130_align_insns ();
+  if (mips_expand_ghost_gp_insns ())
+    /* The expansion could invalidate some of the VR4130 alignment
+       optimizations, but this should be an extremely rare case anyhow.  */
+    mips_reorg_process_insns ();
+  mips16_split_long_branches ();
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_mips_machine_reorg2 =
+{
+  RTL_PASS, /* type */
+  "mach2", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_verify_rtl_sharing, /* todo_flags_finish */
+};
+
+class pass_mips_machine_reorg2 : public rtl_opt_pass
+{
+public:
+  pass_mips_machine_reorg2(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_mips_machine_reorg2, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return mips_machine_reorg2 (); }
+
+}; // class pass_mips_machine_reorg2
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_mips_machine_reorg2 (gcc::context *ctxt)
+{
+  return new pass_mips_machine_reorg2 (ctxt);
+}
+
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+   in order to avoid duplicating too much logic from elsewhere.  */
+
+static void
+mips_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx this_rtx, temp1, temp2, insn, fnaddr;
+  bool use_sibcall_p;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Determine if we can use a sibcall to call FUNCTION directly.  */
+  fnaddr = XEXP (DECL_RTL (function), 0);
+  use_sibcall_p = (mips_function_ok_for_sibcall (function, NULL)
+		   && const_call_insn_operand (fnaddr, Pmode));
+
+  /* Determine if we need to load FNADDR from the GOT.  */
+  if (!use_sibcall_p
+      && (mips_got_symbol_type_p
+	  (mips_classify_symbol (fnaddr, SYMBOL_CONTEXT_LEA))))
+    {
+      /* Pick a global pointer.  Use a call-clobbered register if
+	 TARGET_CALL_SAVED_GP.  */
+      cfun->machine->global_pointer
+	= TARGET_CALL_SAVED_GP ? 15 : GLOBAL_POINTER_REGNUM;
+      cfun->machine->must_initialize_gp_p = true;
+      SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer);
+
+      /* Set up the global pointer for n32 or n64 abicalls.  */
+      mips_emit_loadgp ();
+    }
+
+  /* We need two temporary registers in some cases.  */
+  temp1 = gen_rtx_REG (Pmode, 2);
+  temp2 = gen_rtx_REG (Pmode, 3);
+
+  /* Find out which register contains the "this" pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (delta != 0)
+    {
+      rtx offset = GEN_INT (delta);
+      if (!SMALL_OPERAND (delta))
+	{
+	  mips_emit_move (temp1, offset);
+	  offset = temp1;
+	}
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+    }
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset != 0)
+    {
+      rtx addr;
+
+      /* Set TEMP1 to *THIS_RTX.  */
+      mips_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+      addr = mips_add_offset (temp2, temp1, vcall_offset);
+
+      /* Load the offset and add it to THIS_RTX.  */
+      mips_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+    }
+
+  /* Jump to the target function.  Use a sibcall if direct jumps are
+     allowed, otherwise load the address into a register first.  */
+  if (use_sibcall_p)
+    {
+      insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
+      SIBLING_CALL_P (insn) = 1;
+    }
+  else
+    {
+      /* This is messy.  GAS treats "la $25,foo" as part of a call
+	 sequence and may allow a global "foo" to be lazily bound.
+	 The general move patterns therefore reject this combination.
+
+	 In this context, lazy binding would actually be OK
+	 for TARGET_CALL_CLOBBERED_GP, but it's still wrong for
+	 TARGET_CALL_SAVED_GP; see mips_load_call_address.
+	 We must therefore load the address via a temporary
+	 register if mips_dangerous_for_la25_p.
+
+	 If we jump to the temporary register rather than $25,
+	 the assembler can use the move insn to fill the jump's
+	 delay slot.
+
+	 We can use the same technique for MIPS16 code, where $25
+	 is not a valid JR register.  */
+      if (TARGET_USE_PIC_FN_ADDR_REG
+	  && !TARGET_MIPS16
+	  && !mips_dangerous_for_la25_p (fnaddr))
+	temp1 = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      mips_load_call_address (MIPS_CALL_SIBCALL, temp1, fnaddr);
+
+      if (TARGET_USE_PIC_FN_ADDR_REG
+	  && REGNO (temp1) != PIC_FUNCTION_ADDR_REGNUM)
+	mips_emit_move (gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM), temp1);
+      emit_jump_insn (gen_indirect_jump (temp1));
+    }
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from alpha.c.  */
+  insn = get_insns ();
+  split_all_insns_noflow ();
+  mips16_lay_out_constants (true);
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Clean up the vars set above.  Note that final_end_function resets
+     the global pointer for us.  */
+  reload_completed = 0;
+}
+
+
+/* The last argument passed to mips_set_compression_mode,
+   or negative if the function hasn't been called yet.  */
+static unsigned int old_compression_mode = -1;
+
+/* Set up the target-dependent global state for ISA mode COMPRESSION_MODE,
+   which is either MASK_MIPS16 or MASK_MICROMIPS.  */
+
+static void
+mips_set_compression_mode (unsigned int compression_mode)
+{
+
+  if (compression_mode == old_compression_mode)
+    return;
+
+  /* Restore base settings of various flags.  */
+  target_flags = mips_base_target_flags;
+  flag_schedule_insns = mips_base_schedule_insns;
+  flag_reorder_blocks_and_partition = mips_base_reorder_blocks_and_partition;
+  flag_move_loop_invariants = mips_base_move_loop_invariants;
+  align_loops = mips_base_align_loops;
+  align_jumps = mips_base_align_jumps;
+  align_functions = mips_base_align_functions;
+  target_flags &= ~(MASK_MIPS16 | MASK_MICROMIPS);
+  target_flags |= compression_mode;
+
+  if (compression_mode & MASK_MIPS16)
+    {
+      /* Switch to MIPS16 mode.  */
+      target_flags |= MASK_MIPS16;
+
+      /* Turn off SYNCI if it was on, MIPS16 doesn't support it.  */
+      target_flags &= ~MASK_SYNCI;
+
+      /* Don't run the scheduler before reload, since it tends to
+         increase register pressure.  */
+      flag_schedule_insns = 0;
+
+      /* Don't do hot/cold partitioning.  mips16_lay_out_constants expects
+	 the whole function to be in a single section.  */
+      flag_reorder_blocks_and_partition = 0;
+
+      /* Don't move loop invariants, because it tends to increase
+	 register pressure.  It also introduces an extra move in cases
+	 where the constant is the first operand in a two-operand binary
+	 instruction, or when it forms a register argument to a functon
+	 call.  */
+      flag_move_loop_invariants = 0;
+
+      target_flags |= MASK_EXPLICIT_RELOCS;
+
+      /* Experiments suggest we get the best overall section-anchor
+	 results from using the range of an unextended LW or SW.  Code
+	 that makes heavy use of byte or short accesses can do better
+	 with ranges of 0...31 and 0...63 respectively, but most code is
+	 sensitive to the range of LW and SW instead.  */
+      targetm.min_anchor_offset = 0;
+      targetm.max_anchor_offset = 127;
+
+      targetm.const_anchor = 0;
+
+      /* MIPS16 has no BAL instruction.  */
+      target_flags &= ~MASK_RELAX_PIC_CALLS;
+
+      /* The R4000 errata don't apply to any known MIPS16 cores.
+	 It's simpler to make the R4000 fixes and MIPS16 mode
+	 mutually exclusive.  */
+      target_flags &= ~MASK_FIX_R4000;
+
+      if (flag_pic && !TARGET_OLDABI)
+	sorry ("MIPS16 PIC for ABIs other than o32 and o64");
+
+      if (TARGET_XGOT)
+	sorry ("MIPS16 -mxgot code");
+
+      if (TARGET_HARD_FLOAT_ABI && !TARGET_OLDABI)
+	sorry ("hard-float MIPS16 code for ABIs other than o32 and o64");
+    }
+  else
+    {
+      /* Switch to microMIPS or the standard encoding.  */
+
+      if (TARGET_MICROMIPS)
+	/* Avoid branch likely.  */
+	target_flags &= ~MASK_BRANCHLIKELY;
+
+      /* Provide default values for align_* for 64-bit targets.  */
+      if (TARGET_64BIT)
+	{
+	  if (align_loops == 0)
+	    align_loops = 8;
+	  if (align_jumps == 0)
+	    align_jumps = 8;
+	  if (align_functions == 0)
+	    align_functions = 8;
+	}
+
+      targetm.min_anchor_offset = -32768;
+      targetm.max_anchor_offset = 32767;
+
+      targetm.const_anchor = 0x8000;
+    }
+
+  /* (Re)initialize MIPS target internals for new ISA.  */
+  mips_init_relocs ();
+
+  if (compression_mode & MASK_MIPS16)
+    {
+      if (!mips16_globals)
+	mips16_globals = save_target_globals_default_opts ();
+      else
+	restore_target_globals (mips16_globals);
+    }
+  else
+    restore_target_globals (&default_target_globals);
+
+  old_compression_mode = compression_mode;
+}
+
+/* Implement TARGET_SET_CURRENT_FUNCTION.  Decide whether the current
+   function should use the MIPS16 or microMIPS ISA and switch modes
+   accordingly.  */
+
+static void
+mips_set_current_function (tree fndecl)
+{
+  mips_set_compression_mode (mips_get_compress_mode (fndecl));
+}
+
+/* Allocate a chunk of memory for per-function machine-dependent data.  */
+
+static struct machine_function *
+mips_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Return the processor associated with the given ISA level, or null
+   if the ISA isn't valid.  */
+
+static const struct mips_cpu_info *
+mips_cpu_info_from_isa (int isa)
+{
+  unsigned int i;
+
+  for (i = 0; i < ARRAY_SIZE (mips_cpu_info_table); i++)
+    if (mips_cpu_info_table[i].isa == isa)
+      return mips_cpu_info_table + i;
+
+  return NULL;
+}
+
+/* Return a mips_cpu_info entry determined by an option valued
+   OPT.  */
+
+static const struct mips_cpu_info *
+mips_cpu_info_from_opt (int opt)
+{
+  switch (opt)
+    {
+    case MIPS_ARCH_OPTION_FROM_ABI:
+      /* 'from-abi' selects the most compatible architecture for the
+	 given ABI: MIPS I for 32-bit ABIs and MIPS III for 64-bit
+	 ABIs.  For the EABIs, we have to decide whether we're using
+	 the 32-bit or 64-bit version.  */
+      return mips_cpu_info_from_isa (ABI_NEEDS_32BIT_REGS ? 1
+				     : ABI_NEEDS_64BIT_REGS ? 3
+				     : (TARGET_64BIT ? 3 : 1));
+
+    case MIPS_ARCH_OPTION_NATIVE:
+      gcc_unreachable ();
+
+    default:
+      return &mips_cpu_info_table[opt];
+    }
+}
+
+/* Return a default mips_cpu_info entry, given that no -march= option
+   was explicitly specified.  */
+
+static const struct mips_cpu_info *
+mips_default_arch (void)
+{
+#if defined (MIPS_CPU_STRING_DEFAULT)
+  unsigned int i;
+  for (i = 0; i < ARRAY_SIZE (mips_cpu_info_table); i++)
+    if (strcmp (mips_cpu_info_table[i].name, MIPS_CPU_STRING_DEFAULT) == 0)
+      return mips_cpu_info_table + i;
+  gcc_unreachable ();
+#elif defined (MIPS_ISA_DEFAULT)
+  return mips_cpu_info_from_isa (MIPS_ISA_DEFAULT);
+#else
+  /* 'from-abi' makes a good default: you get whatever the ABI
+     requires.  */
+  return mips_cpu_info_from_opt (MIPS_ARCH_OPTION_FROM_ABI);
+#endif
+}
+
+/* Set up globals to generate code for the ISA or processor
+   described by INFO.  */
+
+static void
+mips_set_architecture (const struct mips_cpu_info *info)
+{
+  if (info != 0)
+    {
+      mips_arch_info = info;
+      mips_arch = info->cpu;
+      mips_isa = info->isa;
+    }
+}
+
+/* Likewise for tuning.  */
+
+static void
+mips_set_tune (const struct mips_cpu_info *info)
+{
+  if (info != 0)
+    {
+      mips_tune_info = info;
+      mips_tune = info->cpu;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+mips_option_override (void)
+{
+  int i, start, regno, mode;
+
+  if (global_options_set.x_mips_isa_option)
+    mips_isa_option_info = &mips_cpu_info_table[mips_isa_option];
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* MIPS16 and microMIPS cannot coexist.  */
+  if (TARGET_MICROMIPS && TARGET_MIPS16)
+    error ("unsupported combination: %s", "-mips16 -mmicromips");
+
+  /* Save the base compression state and process flags as though we
+     were generating uncompressed code.  */
+  mips_base_compression_flags = TARGET_COMPRESSION;
+  target_flags &= ~TARGET_COMPRESSION;
+
+  /* -mno-float overrides -mhard-float and -msoft-float.  */
+  if (TARGET_NO_FLOAT)
+    {
+      target_flags |= MASK_SOFT_FLOAT_ABI;
+      target_flags_explicit |= MASK_SOFT_FLOAT_ABI;
+    }
+
+  if (TARGET_FLIP_MIPS16)
+    TARGET_INTERLINK_COMPRESSED = 1;
+
+  /* Set the small data limit.  */
+  mips_small_data_threshold = (global_options_set.x_g_switch_value
+			       ? g_switch_value
+			       : MIPS_DEFAULT_GVALUE);
+
+  /* The following code determines the architecture and register size.
+     Similar code was added to GAS 2.14 (see tc-mips.c:md_after_parse_args()).
+     The GAS and GCC code should be kept in sync as much as possible.  */
+
+  if (global_options_set.x_mips_arch_option)
+    mips_set_architecture (mips_cpu_info_from_opt (mips_arch_option));
+
+  if (mips_isa_option_info != 0)
+    {
+      if (mips_arch_info == 0)
+	mips_set_architecture (mips_isa_option_info);
+      else if (mips_arch_info->isa != mips_isa_option_info->isa)
+	error ("%<-%s%> conflicts with the other architecture options, "
+	       "which specify a %s processor",
+	       mips_isa_option_info->name,
+	       mips_cpu_info_from_isa (mips_arch_info->isa)->name);
+    }
+
+  if (mips_arch_info == 0)
+    mips_set_architecture (mips_default_arch ());
+
+  if (ABI_NEEDS_64BIT_REGS && !ISA_HAS_64BIT_REGS)
+    error ("%<-march=%s%> is not compatible with the selected ABI",
+	   mips_arch_info->name);
+
+  /* Optimize for mips_arch, unless -mtune selects a different processor.  */
+  if (global_options_set.x_mips_tune_option)
+    mips_set_tune (mips_cpu_info_from_opt (mips_tune_option));
+
+  if (mips_tune_info == 0)
+    mips_set_tune (mips_arch_info);
+
+  if ((target_flags_explicit & MASK_64BIT) != 0)
+    {
+      /* The user specified the size of the integer registers.  Make sure
+	 it agrees with the ABI and ISA.  */
+      if (TARGET_64BIT && !ISA_HAS_64BIT_REGS)
+	error ("%<-mgp64%> used with a 32-bit processor");
+      else if (!TARGET_64BIT && ABI_NEEDS_64BIT_REGS)
+	error ("%<-mgp32%> used with a 64-bit ABI");
+      else if (TARGET_64BIT && ABI_NEEDS_32BIT_REGS)
+	error ("%<-mgp64%> used with a 32-bit ABI");
+    }
+  else
+    {
+      /* Infer the integer register size from the ABI and processor.
+	 Restrict ourselves to 32-bit registers if that's all the
+	 processor has, or if the ABI cannot handle 64-bit registers.  */
+      if (ABI_NEEDS_32BIT_REGS || !ISA_HAS_64BIT_REGS)
+	target_flags &= ~MASK_64BIT;
+      else
+	target_flags |= MASK_64BIT;
+    }
+
+  if ((target_flags_explicit & MASK_FLOAT64) != 0)
+    {
+      if (TARGET_SINGLE_FLOAT && TARGET_FLOAT64)
+	error ("unsupported combination: %s", "-mfp64 -msingle-float");
+      else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT && !TARGET_FLOAT64)
+	error ("unsupported combination: %s", "-mgp64 -mfp32 -mdouble-float");
+      else if (!TARGET_64BIT && TARGET_FLOAT64)
+	{
+	  if (!ISA_HAS_MXHC1)
+	    error ("%<-mgp32%> and %<-mfp64%> can only be combined if"
+		   " the target supports the mfhc1 and mthc1 instructions");
+	  else if (mips_abi != ABI_32)
+	    error ("%<-mgp32%> and %<-mfp64%> can only be combined when using"
+		   " the o32 ABI");
+	}
+    }
+  else
+    {
+      /* -msingle-float selects 32-bit float registers.  Otherwise the
+	 float registers should be the same size as the integer ones.  */
+      if (TARGET_64BIT && TARGET_DOUBLE_FLOAT)
+	target_flags |= MASK_FLOAT64;
+      else
+	target_flags &= ~MASK_FLOAT64;
+    }
+
+  /* End of code shared with GAS.  */
+
+  /* The R5900 FPU only supports single precision.  */
+  if (TARGET_MIPS5900 && TARGET_HARD_FLOAT_ABI && TARGET_DOUBLE_FLOAT)
+    error ("unsupported combination: %s",
+	   "-march=r5900 -mhard-float -mdouble-float");
+
+  /* If a -mlong* option was given, check that it matches the ABI,
+     otherwise infer the -mlong* setting from the other options.  */
+  if ((target_flags_explicit & MASK_LONG64) != 0)
+    {
+      if (TARGET_LONG64)
+	{
+	  if (mips_abi == ABI_N32)
+	    error ("%qs is incompatible with %qs", "-mabi=n32", "-mlong64");
+	  else if (mips_abi == ABI_32)
+	    error ("%qs is incompatible with %qs", "-mabi=32", "-mlong64");
+	  else if (mips_abi == ABI_O64 && TARGET_ABICALLS)
+	    /* We have traditionally allowed non-abicalls code to use
+	       an LP64 form of o64.  However, it would take a bit more
+	       effort to support the combination of 32-bit GOT entries
+	       and 64-bit pointers, so we treat the abicalls case as
+	       an error.  */
+	    error ("the combination of %qs and %qs is incompatible with %qs",
+		   "-mabi=o64", "-mabicalls", "-mlong64");
+	}
+      else
+	{
+	  if (mips_abi == ABI_64)
+	    error ("%qs is incompatible with %qs", "-mabi=64", "-mlong32");
+	}
+    }
+  else
+    {
+      if ((mips_abi == ABI_EABI && TARGET_64BIT) || mips_abi == ABI_64)
+	target_flags |= MASK_LONG64;
+      else
+	target_flags &= ~MASK_LONG64;
+    }
+
+  if (!TARGET_OLDABI)
+    flag_pcc_struct_return = 0;
+
+  /* Decide which rtx_costs structure to use.  */
+  if (optimize_size)
+    mips_cost = &mips_rtx_cost_optimize_size;
+  else
+    mips_cost = &mips_rtx_cost_data[mips_tune];
+
+  /* If the user hasn't specified a branch cost, use the processor's
+     default.  */
+  if (mips_branch_cost == 0)
+    mips_branch_cost = mips_cost->branch_cost;
+
+  /* If neither -mbranch-likely nor -mno-branch-likely was given
+     on the command line, set MASK_BRANCHLIKELY based on the target
+     architecture and tuning flags.  Annulled delay slots are a
+     size win, so we only consider the processor-specific tuning
+     for !optimize_size.  */
+  if ((target_flags_explicit & MASK_BRANCHLIKELY) == 0)
+    {
+      if (ISA_HAS_BRANCHLIKELY
+	  && (optimize_size
+	      || (mips_tune_info->tune_flags & PTF_AVOID_BRANCHLIKELY) == 0))
+	target_flags |= MASK_BRANCHLIKELY;
+      else
+	target_flags &= ~MASK_BRANCHLIKELY;
+    }
+  else if (TARGET_BRANCHLIKELY && !ISA_HAS_BRANCHLIKELY)
+    warning (0, "the %qs architecture does not support branch-likely"
+	     " instructions", mips_arch_info->name);
+
+  /* If the user hasn't specified -mimadd or -mno-imadd set
+     MASK_IMADD based on the target architecture and tuning
+     flags.  */
+  if ((target_flags_explicit & MASK_IMADD) == 0)
+    {
+      if (ISA_HAS_MADD_MSUB &&
+          (mips_tune_info->tune_flags & PTF_AVOID_IMADD) == 0)
+	target_flags |= MASK_IMADD;
+      else
+	target_flags &= ~MASK_IMADD;
+    }
+  else if (TARGET_IMADD && !ISA_HAS_MADD_MSUB)
+    warning (0, "the %qs architecture does not support madd or msub"
+	     " instructions", mips_arch_info->name);
+
+  /* The effect of -mabicalls isn't defined for the EABI.  */
+  if (mips_abi == ABI_EABI && TARGET_ABICALLS)
+    {
+      error ("unsupported combination: %s", "-mabicalls -mabi=eabi");
+      target_flags &= ~MASK_ABICALLS;
+    }
+
+  /* PIC requires -mabicalls.  */
+  if (flag_pic)
+    {
+      if (mips_abi == ABI_EABI)
+	error ("cannot generate position-independent code for %qs",
+	       "-mabi=eabi");
+      else if (!TARGET_ABICALLS)
+	error ("position-independent code requires %qs", "-mabicalls");
+    }
+
+  if (TARGET_ABICALLS_PIC2)
+    /* We need to set flag_pic for executables as well as DSOs
+       because we may reference symbols that are not defined in
+       the final executable.  (MIPS does not use things like
+       copy relocs, for example.)
+
+       There is a body of code that uses __PIC__ to distinguish
+       between -mabicalls and -mno-abicalls code.  The non-__PIC__
+       variant is usually appropriate for TARGET_ABICALLS_PIC0, as
+       long as any indirect jumps use $25.  */
+    flag_pic = 1;
+
+  /* -mvr4130-align is a "speed over size" optimization: it usually produces
+     faster code, but at the expense of more nops.  Enable it at -O3 and
+     above.  */
+  if (optimize > 2 && (target_flags_explicit & MASK_VR4130_ALIGN) == 0)
+    target_flags |= MASK_VR4130_ALIGN;
+
+  /* Prefer a call to memcpy over inline code when optimizing for size,
+     though see MOVE_RATIO in mips.h.  */
+  if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0)
+    target_flags |= MASK_MEMCPY;
+
+  /* If we have a nonzero small-data limit, check that the -mgpopt
+     setting is consistent with the other target flags.  */
+  if (mips_small_data_threshold > 0)
+    {
+      if (!TARGET_GPOPT)
+	{
+	  if (!TARGET_EXPLICIT_RELOCS)
+	    error ("%<-mno-gpopt%> needs %<-mexplicit-relocs%>");
+
+	  TARGET_LOCAL_SDATA = false;
+	  TARGET_EXTERN_SDATA = false;
+	}
+      else
+	{
+	  if (TARGET_VXWORKS_RTP)
+	    warning (0, "cannot use small-data accesses for %qs", "-mrtp");
+
+	  if (TARGET_ABICALLS)
+	    warning (0, "cannot use small-data accesses for %qs",
+		     "-mabicalls");
+	}
+    }
+
+  /* Pre-IEEE 754-2008 MIPS hardware has a quirky almost-IEEE format
+     for all its floating point.  */
+  if (mips_nan != MIPS_IEEE_754_2008)
+    {
+      REAL_MODE_FORMAT (SFmode) = &mips_single_format;
+      REAL_MODE_FORMAT (DFmode) = &mips_double_format;
+      REAL_MODE_FORMAT (TFmode) = &mips_quad_format;
+    }
+
+  /* Make sure that the user didn't turn off paired single support when
+     MIPS-3D support is requested.  */
+  if (TARGET_MIPS3D
+      && (target_flags_explicit & MASK_PAIRED_SINGLE_FLOAT)
+      && !TARGET_PAIRED_SINGLE_FLOAT)
+    error ("%<-mips3d%> requires %<-mpaired-single%>");
+
+  /* If TARGET_MIPS3D, enable MASK_PAIRED_SINGLE_FLOAT.  */
+  if (TARGET_MIPS3D)
+    target_flags |= MASK_PAIRED_SINGLE_FLOAT;
+
+  /* Make sure that when TARGET_PAIRED_SINGLE_FLOAT is true, TARGET_FLOAT64
+     and TARGET_HARD_FLOAT_ABI are both true.  */
+  if (TARGET_PAIRED_SINGLE_FLOAT && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI))
+    {
+      error ("%qs must be used with %qs",
+	     TARGET_MIPS3D ? "-mips3d" : "-mpaired-single",
+	     TARGET_HARD_FLOAT_ABI ? "-mfp64" : "-mhard-float");
+      target_flags &= ~MASK_PAIRED_SINGLE_FLOAT;
+      TARGET_MIPS3D = 0;
+    }
+
+  /* Make sure that -mpaired-single is only used on ISAs that support it.
+     We must disable it otherwise since it relies on other ISA properties
+     like ISA_HAS_8CC having their normal values.  */
+  if (TARGET_PAIRED_SINGLE_FLOAT && !ISA_HAS_PAIRED_SINGLE)
+    {
+      error ("the %qs architecture does not support paired-single"
+	     " instructions", mips_arch_info->name);
+      target_flags &= ~MASK_PAIRED_SINGLE_FLOAT;
+      TARGET_MIPS3D = 0;
+    }
+
+  if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE
+      && !TARGET_CACHE_BUILTIN)
+    {
+      error ("%qs requires a target that provides the %qs instruction",
+	     "-mr10k-cache-barrier", "cache");
+      mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE;
+    }
+
+  /* If TARGET_DSPR2, enable TARGET_DSP.  */
+  if (TARGET_DSPR2)
+    TARGET_DSP = true;
+
+  /* .eh_frame addresses should be the same width as a C pointer.
+     Most MIPS ABIs support only one pointer size, so the assembler
+     will usually know exactly how big an .eh_frame address is.
+
+     Unfortunately, this is not true of the 64-bit EABI.  The ABI was
+     originally defined to use 64-bit pointers (i.e. it is LP64), and
+     this is still the default mode.  However, we also support an n32-like
+     ILP32 mode, which is selected by -mlong32.  The problem is that the
+     assembler has traditionally not had an -mlong option, so it has
+     traditionally not known whether we're using the ILP32 or LP64 form.
+
+     As it happens, gas versions up to and including 2.19 use _32-bit_
+     addresses for EABI64 .cfi_* directives.  This is wrong for the
+     default LP64 mode, so we can't use the directives by default.
+     Moreover, since gas's current behavior is at odds with gcc's
+     default behavior, it seems unwise to rely on future versions
+     of gas behaving the same way.  We therefore avoid using .cfi
+     directives for -mlong32 as well.  */
+  if (mips_abi == ABI_EABI && TARGET_64BIT)
+    flag_dwarf2_cfi_asm = 0;
+
+  /* .cfi_* directives generate a read-only section, so fall back on
+     manual .eh_frame creation if we need the section to be writable.  */
+  if (TARGET_WRITABLE_EH_FRAME)
+    flag_dwarf2_cfi_asm = 0;
+
+  mips_init_print_operand_punct ();
+
+  /* Set up array to map GCC register number to debug register number.
+     Ignore the special purpose register numbers.  */
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      mips_dbx_regno[i] = IGNORED_DWARF_REGNUM;
+      if (GP_REG_P (i) || FP_REG_P (i) || ALL_COP_REG_P (i))
+	mips_dwarf_regno[i] = i;
+      else
+	mips_dwarf_regno[i] = INVALID_REGNUM;
+    }
+
+  start = GP_DBX_FIRST - GP_REG_FIRST;
+  for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++)
+    mips_dbx_regno[i] = i + start;
+
+  start = FP_DBX_FIRST - FP_REG_FIRST;
+  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+    mips_dbx_regno[i] = i + start;
+
+  /* Accumulator debug registers use big-endian ordering.  */
+  mips_dbx_regno[HI_REGNUM] = MD_DBX_FIRST + 0;
+  mips_dbx_regno[LO_REGNUM] = MD_DBX_FIRST + 1;
+  mips_dwarf_regno[HI_REGNUM] = MD_REG_FIRST + 0;
+  mips_dwarf_regno[LO_REGNUM] = MD_REG_FIRST + 1;
+  for (i = DSP_ACC_REG_FIRST; i <= DSP_ACC_REG_LAST; i += 2)
+    {
+      mips_dwarf_regno[i + TARGET_LITTLE_ENDIAN] = i;
+      mips_dwarf_regno[i + TARGET_BIG_ENDIAN] = i + 1;
+    }
+
+  /* Set up mips_hard_regno_mode_ok.  */
+  for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
+    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+      mips_hard_regno_mode_ok[mode][regno]
+	= mips_hard_regno_mode_ok_p (regno, (enum machine_mode) mode);
+
+  /* Function to allocate machine-dependent function status.  */
+  init_machine_status = &mips_init_machine_status;
+
+  /* Default to working around R4000 errata only if the processor
+     was selected explicitly.  */
+  if ((target_flags_explicit & MASK_FIX_R4000) == 0
+      && strcmp (mips_arch_info->name, "r4000") == 0)
+    target_flags |= MASK_FIX_R4000;
+
+  /* Default to working around R4400 errata only if the processor
+     was selected explicitly.  */
+  if ((target_flags_explicit & MASK_FIX_R4400) == 0
+      && strcmp (mips_arch_info->name, "r4400") == 0)
+    target_flags |= MASK_FIX_R4400;
+
+  /* Default to working around R10000 errata only if the processor
+     was selected explicitly.  */
+  if ((target_flags_explicit & MASK_FIX_R10000) == 0
+      && strcmp (mips_arch_info->name, "r10000") == 0)
+    target_flags |= MASK_FIX_R10000;
+
+  /* Make sure that branch-likely instructions available when using
+     -mfix-r10000.  The instructions are not available if either:
+
+	1. -mno-branch-likely was passed.
+	2. The selected ISA does not support branch-likely and
+	   the command line does not include -mbranch-likely.  */
+  if (TARGET_FIX_R10000
+      && ((target_flags_explicit & MASK_BRANCHLIKELY) == 0
+          ? !ISA_HAS_BRANCHLIKELY
+          : !TARGET_BRANCHLIKELY))
+    sorry ("%qs requires branch-likely instructions", "-mfix-r10000");
+
+  if (TARGET_SYNCI && !ISA_HAS_SYNCI)
+    {
+      warning (0, "the %qs architecture does not support the synci "
+	       "instruction", mips_arch_info->name);
+      target_flags &= ~MASK_SYNCI;
+    }
+
+  /* Only optimize PIC indirect calls if they are actually required.  */
+  if (!TARGET_USE_GOT || !TARGET_EXPLICIT_RELOCS)
+    target_flags &= ~MASK_RELAX_PIC_CALLS;
+
+  /* Save base state of options.  */
+  mips_base_target_flags = target_flags;
+  mips_base_schedule_insns = flag_schedule_insns;
+  mips_base_reorder_blocks_and_partition = flag_reorder_blocks_and_partition;
+  mips_base_move_loop_invariants = flag_move_loop_invariants;
+  mips_base_align_loops = align_loops;
+  mips_base_align_jumps = align_jumps;
+  mips_base_align_functions = align_functions;
+
+  /* Now select the ISA mode.
+
+     Do all CPP-sensitive stuff in uncompressed mode; we'll switch modes
+     later if required.  */
+  mips_set_compression_mode (0);
+
+  /* We register a second machine specific reorg pass after delay slot
+     filling.  Registering the pass must be done at start up.  It's
+     convenient to do it here.  */
+  opt_pass *new_pass = make_pass_mips_machine_reorg2 (g);
+  struct register_pass_info insert_pass_mips_machine_reorg2 =
+    {
+      new_pass,		/* pass */
+      "dbr",			/* reference_pass_name */
+      1,			/* ref_pass_instance_number */
+      PASS_POS_INSERT_AFTER	/* po_op */
+    };
+  register_pass (&insert_pass_mips_machine_reorg2);
+
+  if (TARGET_HARD_FLOAT_ABI && TARGET_MIPS5900)
+    REAL_MODE_FORMAT (SFmode) = &spu_single_format;
+}
+
+/* Swap the register information for registers I and I + 1, which
+   currently have the wrong endianness.  Note that the registers'
+   fixedness and call-clobberedness might have been set on the
+   command line.  */
+
+static void
+mips_swap_registers (unsigned int i)
+{
+  int tmpi;
+  const char *tmps;
+
+#define SWAP_INT(X, Y) (tmpi = (X), (X) = (Y), (Y) = tmpi)
+#define SWAP_STRING(X, Y) (tmps = (X), (X) = (Y), (Y) = tmps)
+
+  SWAP_INT (fixed_regs[i], fixed_regs[i + 1]);
+  SWAP_INT (call_used_regs[i], call_used_regs[i + 1]);
+  SWAP_INT (call_really_used_regs[i], call_really_used_regs[i + 1]);
+  SWAP_STRING (reg_names[i], reg_names[i + 1]);
+
+#undef SWAP_STRING
+#undef SWAP_INT
+}
+
+/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+mips_conditional_register_usage (void)
+{
+
+  if (ISA_HAS_DSP)
+    {
+      /* These DSP control register fields are global.  */
+      global_regs[CCDSP_PO_REGNUM] = 1;
+      global_regs[CCDSP_SC_REGNUM] = 1;
+    }
+  else
+    AND_COMPL_HARD_REG_SET (accessible_reg_set,
+			    reg_class_contents[(int) DSP_ACC_REGS]);
+
+  if (!TARGET_HARD_FLOAT)
+    {
+      AND_COMPL_HARD_REG_SET (accessible_reg_set,
+			      reg_class_contents[(int) FP_REGS]);
+      AND_COMPL_HARD_REG_SET (accessible_reg_set,
+			      reg_class_contents[(int) ST_REGS]);
+    }
+  else if (!ISA_HAS_8CC)
+    {
+      /* We only have a single condition-code register.  We implement
+	 this by fixing all the condition-code registers and generating
+	 RTL that refers directly to ST_REG_FIRST.  */
+      AND_COMPL_HARD_REG_SET (accessible_reg_set,
+			      reg_class_contents[(int) ST_REGS]);
+      SET_HARD_REG_BIT (accessible_reg_set, FPSW_REGNUM);
+      fixed_regs[FPSW_REGNUM] = call_used_regs[FPSW_REGNUM] = 1;
+    }
+  if (TARGET_MIPS16)
+    {
+      /* In MIPS16 mode, we prohibit the unused $s registers, since they
+	 are call-saved, and saving them via a MIPS16 register would
+	 probably waste more time than just reloading the value.
+
+	 We permit the $t temporary registers when optimizing for speed
+	 but not when optimizing for space because using them results in
+	 code that is larger (but faster) then not using them.  We do
+	 allow $24 (t8) because it is used in CMP and CMPI instructions
+	 and $25 (t9) because it is used as the function call address in
+	 SVR4 PIC code.  */
+
+      fixed_regs[18] = call_used_regs[18] = 1;
+      fixed_regs[19] = call_used_regs[19] = 1;
+      fixed_regs[20] = call_used_regs[20] = 1;
+      fixed_regs[21] = call_used_regs[21] = 1;
+      fixed_regs[22] = call_used_regs[22] = 1;
+      fixed_regs[23] = call_used_regs[23] = 1;
+      fixed_regs[26] = call_used_regs[26] = 1;
+      fixed_regs[27] = call_used_regs[27] = 1;
+      fixed_regs[30] = call_used_regs[30] = 1;
+      if (optimize_size)
+	{
+	  fixed_regs[8] = call_used_regs[8] = 1;
+	  fixed_regs[9] = call_used_regs[9] = 1;
+	  fixed_regs[10] = call_used_regs[10] = 1;
+	  fixed_regs[11] = call_used_regs[11] = 1;
+	  fixed_regs[12] = call_used_regs[12] = 1;
+	  fixed_regs[13] = call_used_regs[13] = 1;
+	  fixed_regs[14] = call_used_regs[14] = 1;
+	  fixed_regs[15] = call_used_regs[15] = 1;
+	}
+
+      /* Do not allow HI and LO to be treated as register operands.
+	 There are no MTHI or MTLO instructions (or any real need
+	 for them) and one-way registers cannot easily be reloaded.  */
+      AND_COMPL_HARD_REG_SET (operand_reg_set,
+			      reg_class_contents[(int) MD_REGS]);
+    }
+  /* $f20-$f23 are call-clobbered for n64.  */
+  if (mips_abi == ABI_64)
+    {
+      int regno;
+      for (regno = FP_REG_FIRST + 20; regno < FP_REG_FIRST + 24; regno++)
+	call_really_used_regs[regno] = call_used_regs[regno] = 1;
+    }
+  /* Odd registers in the range $f21-$f31 (inclusive) are call-clobbered
+     for n32.  */
+  if (mips_abi == ABI_N32)
+    {
+      int regno;
+      for (regno = FP_REG_FIRST + 21; regno <= FP_REG_FIRST + 31; regno+=2)
+	call_really_used_regs[regno] = call_used_regs[regno] = 1;
+    }
+  /* Make sure that double-register accumulator values are correctly
+     ordered for the current endianness.  */
+  if (TARGET_LITTLE_ENDIAN)
+    {
+      unsigned int regno;
+
+      mips_swap_registers (MD_REG_FIRST);
+      for (regno = DSP_ACC_REG_FIRST; regno <= DSP_ACC_REG_LAST; regno += 2)
+	mips_swap_registers (regno);
+    }
+}
+
+/* When generating MIPS16 code, we want to allocate $24 (T_REG) before
+   other registers for instructions for which it is possible.  This
+   encourages the compiler to use CMP in cases where an XOR would
+   require some register shuffling.  */
+
+void
+mips_order_regs_for_local_alloc (void)
+{
+  int i;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    reg_alloc_order[i] = i;
+
+  if (TARGET_MIPS16)
+    {
+      /* It really doesn't matter where we put register 0, since it is
+         a fixed register anyhow.  */
+      reg_alloc_order[0] = 24;
+      reg_alloc_order[24] = 0;
+    }
+}
+
+/* Implement EH_USES.  */
+
+bool
+mips_eh_uses (unsigned int regno)
+{
+  if (reload_completed && !TARGET_ABSOLUTE_JUMPS)
+    {
+      /* We need to force certain registers to be live in order to handle
+	 PIC long branches correctly.  See mips_must_initialize_gp_p for
+	 details.  */
+      if (mips_cfun_has_cprestore_slot_p ())
+	{
+	  if (regno == CPRESTORE_SLOT_REGNUM)
+	    return true;
+	}
+      else
+	{
+	  if (cfun->machine->global_pointer == regno)
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Implement EPILOGUE_USES.  */
+
+bool
+mips_epilogue_uses (unsigned int regno)
+{
+  /* Say that the epilogue uses the return address register.  Note that
+     in the case of sibcalls, the values "used by the epilogue" are
+     considered live at the start of the called function.  */
+  if (regno == RETURN_ADDR_REGNUM)
+    return true;
+
+  /* If using a GOT, say that the epilogue also uses GOT_VERSION_REGNUM.
+     See the comment above load_call<mode> for details.  */
+  if (TARGET_USE_GOT && (regno) == GOT_VERSION_REGNUM)
+    return true;
+
+  /* An interrupt handler must preserve some registers that are
+     ordinarily call-clobbered.  */
+  if (cfun->machine->interrupt_handler_p
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  return false;
+}
+
+/* A for_each_rtx callback.  Stop the search if *X is an AT register.  */
+
+static int
+mips_at_reg_p (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return REG_P (*x) && REGNO (*x) == AT_REGNUM;
+}
+
+/* Return true if INSN needs to be wrapped in ".set noat".
+   INSN has NOPERANDS operands, stored in OPVEC.  */
+
+static bool
+mips_need_noat_wrapper_p (rtx insn, rtx *opvec, int noperands)
+{
+  int i;
+
+  if (recog_memoized (insn) >= 0)
+    for (i = 0; i < noperands; i++)
+      if (for_each_rtx (&opvec[i], mips_at_reg_p, NULL))
+	return true;
+  return false;
+}
+
+/* Implement FINAL_PRESCAN_INSN.  */
+
+void
+mips_final_prescan_insn (rtx insn, rtx *opvec, int noperands)
+{
+  if (mips_need_noat_wrapper_p (insn, opvec, noperands))
+    mips_push_asm_switch (&mips_noat);
+}
+
+/* Implement TARGET_ASM_FINAL_POSTSCAN_INSN.  */
+
+static void
+mips_final_postscan_insn (FILE *file ATTRIBUTE_UNUSED, rtx insn,
+			  rtx *opvec, int noperands)
+{
+  if (mips_need_noat_wrapper_p (insn, opvec, noperands))
+    mips_pop_asm_switch (&mips_noat);
+}
+
+/* Return the function that is used to expand the <u>mulsidi3 pattern.
+   EXT_CODE is the code of the extension used.  Return NULL if widening
+   multiplication shouldn't be used.  */
+
+mulsidi3_gen_fn
+mips_mulsidi3_gen_fn (enum rtx_code ext_code)
+{
+  bool signed_p;
+
+  signed_p = ext_code == SIGN_EXTEND;
+  if (TARGET_64BIT)
+    {
+      /* Don't use widening multiplication with MULT when we have DMUL.  Even
+	 with the extension of its input operands DMUL is faster.  Note that
+	 the extension is not needed for signed multiplication.  In order to
+	 ensure that we always remove the redundant sign-extension in this
+	 case we still expand mulsidi3 for DMUL.  */
+      if (ISA_HAS_DMUL3)
+	return signed_p ? gen_mulsidi3_64bit_dmul : NULL;
+      if (TARGET_MIPS16)
+	return (signed_p
+		? gen_mulsidi3_64bit_mips16
+		: gen_umulsidi3_64bit_mips16);
+      if (TARGET_FIX_R4000)
+	return NULL;
+      return signed_p ? gen_mulsidi3_64bit : gen_umulsidi3_64bit;
+    }
+  else
+    {
+      if (TARGET_MIPS16)
+	return (signed_p
+		? gen_mulsidi3_32bit_mips16
+		: gen_umulsidi3_32bit_mips16);
+      if (TARGET_FIX_R4000 && !ISA_HAS_DSP)
+	return signed_p ? gen_mulsidi3_32bit_r4000 : gen_umulsidi3_32bit_r4000;
+      return signed_p ? gen_mulsidi3_32bit : gen_umulsidi3_32bit;
+    }
+}
+
+/* Return true if PATTERN matches the kind of instruction generated by
+   umips_build_save_restore.  SAVE_P is true for store.  */
+
+bool
+umips_save_restore_pattern_p (bool save_p, rtx pattern)
+{
+  int n;
+  unsigned int i;
+  HOST_WIDE_INT first_offset = 0;
+  rtx first_base = 0;
+  unsigned int regmask = 0;
+
+  for (n = 0; n < XVECLEN (pattern, 0); n++)
+    {
+      rtx set, reg, mem, this_base;
+      HOST_WIDE_INT this_offset;
+
+      /* Check that we have a SET.  */
+      set = XVECEXP (pattern, 0, n);
+      if (GET_CODE (set) != SET)
+	return false;
+
+      /* Check that the SET is a load (if restoring) or a store
+	 (if saving).  */
+      mem = save_p ? SET_DEST (set) : SET_SRC (set);
+      if (!MEM_P (mem) || MEM_VOLATILE_P (mem))
+	return false;
+
+      /* Check that the address is the sum of base and a possibly-zero
+	 constant offset.  Determine if the offset is in range.  */
+      mips_split_plus (XEXP (mem, 0), &this_base, &this_offset);
+      if (!REG_P (this_base))
+	return false;
+
+      if (n == 0)
+	{
+	  if (!UMIPS_12BIT_OFFSET_P (this_offset))
+	    return false;
+	  first_base = this_base;
+	  first_offset = this_offset;
+	}
+      else
+	{
+	  /* Check that the save slots are consecutive.  */
+	  if (REGNO (this_base) != REGNO (first_base)
+	      || this_offset != first_offset + UNITS_PER_WORD * n)
+	    return false;
+	}
+
+      /* Check that SET's other operand is a register.  */
+      reg = save_p ? SET_SRC (set) : SET_DEST (set);
+      if (!REG_P (reg))
+	return false;
+
+      regmask |= 1 << REGNO (reg);
+    }
+
+  for (i = 0; i < ARRAY_SIZE (umips_swm_mask); i++)
+    if (regmask == umips_swm_mask[i])
+      return true;
+
+  return false;
+}
+
+/* Return the assembly instruction for microMIPS LWM or SWM.
+   SAVE_P and PATTERN are as for umips_save_restore_pattern_p.  */
+
+const char *
+umips_output_save_restore (bool save_p, rtx pattern)
+{
+  static char buffer[300];
+  char *s;
+  int n;
+  HOST_WIDE_INT offset;
+  rtx base, mem, set, last_set, last_reg;
+
+  /* Parse the pattern.  */
+  gcc_assert (umips_save_restore_pattern_p (save_p, pattern));
+
+  s = strcpy (buffer, save_p ? "swm\t" : "lwm\t");
+  s += strlen (s);
+  n = XVECLEN (pattern, 0);
+
+  set = XVECEXP (pattern, 0, 0);
+  mem = save_p ? SET_DEST (set) : SET_SRC (set);
+  mips_split_plus (XEXP (mem, 0), &base, &offset);
+
+  last_set = XVECEXP (pattern, 0, n - 1);
+  last_reg = save_p ? SET_SRC (last_set) : SET_DEST (last_set);
+
+  if (REGNO (last_reg) == 31)
+    n--;
+
+  gcc_assert (n <= 9);
+  if (n == 0)
+    ;
+  else if (n == 1)
+    s += sprintf (s, "%s,", reg_names[16]);
+  else if (n < 9)
+    s += sprintf (s, "%s-%s,", reg_names[16], reg_names[15 + n]);
+  else if (n == 9)
+    s += sprintf (s, "%s-%s,%s,", reg_names[16], reg_names[23],
+		  reg_names[30]);
+
+  if (REGNO (last_reg) == 31)
+    s += sprintf (s, "%s,", reg_names[31]);
+
+  s += sprintf (s, "%d(%s)", (int)offset, reg_names[REGNO (base)]);
+  return buffer;
+}
+
+/* Return true if MEM1 and MEM2 use the same base register, and the
+   offset of MEM2 equals the offset of MEM1 plus 4.  FIRST_REG is the
+   register into (from) which the contents of MEM1 will be loaded
+   (stored), depending on the value of LOAD_P.
+   SWAP_P is true when the 1st and 2nd instructions are swapped.  */
+
+static bool
+umips_load_store_pair_p_1 (bool load_p, bool swap_p,
+			   rtx first_reg, rtx mem1, rtx mem2)
+{
+  rtx base1, base2;
+  HOST_WIDE_INT offset1, offset2;
+
+  if (!MEM_P (mem1) || !MEM_P (mem2))
+    return false;
+
+  mips_split_plus (XEXP (mem1, 0), &base1, &offset1);
+  mips_split_plus (XEXP (mem2, 0), &base2, &offset2);
+
+  if (!REG_P (base1) || !rtx_equal_p (base1, base2))
+    return false;
+
+  /* Avoid invalid load pair instructions.  */
+  if (load_p && REGNO (first_reg) == REGNO (base1))
+    return false;
+
+  /* We must avoid this case for anti-dependence.
+     Ex:  lw $3, 4($3)
+          lw $2, 0($3)
+     first_reg is $2, but the base is $3.  */
+  if (load_p
+      && swap_p
+      && REGNO (first_reg) + 1 == REGNO (base1))
+    return false;
+
+  if (offset2 != offset1 + 4)
+    return false;
+
+  if (!UMIPS_12BIT_OFFSET_P (offset1))
+    return false;
+
+  return true;
+}
+
+/* OPERANDS describes the operands to a pair of SETs, in the order
+   dest1, src1, dest2, src2.  Return true if the operands can be used
+   in an LWP or SWP instruction; LOAD_P says which.  */
+
+bool
+umips_load_store_pair_p (bool load_p, rtx *operands)
+{
+  rtx reg1, reg2, mem1, mem2;
+
+  if (load_p)
+    {
+      reg1 = operands[0];
+      reg2 = operands[2];
+      mem1 = operands[1];
+      mem2 = operands[3];
+    }
+  else
+    {
+      reg1 = operands[1];
+      reg2 = operands[3];
+      mem1 = operands[0];
+      mem2 = operands[2];
+    }
+
+  if (REGNO (reg2) == REGNO (reg1) + 1)
+    return umips_load_store_pair_p_1 (load_p, false, reg1, mem1, mem2);
+
+  if (REGNO (reg1) == REGNO (reg2) + 1)
+    return umips_load_store_pair_p_1 (load_p, true, reg2, mem2, mem1);
+
+  return false;
+}
+
+/* Return the assembly instruction for a microMIPS LWP or SWP in which
+   the first register is REG and the first memory slot is MEM.
+   LOAD_P is true for LWP.  */
+
+static void
+umips_output_load_store_pair_1 (bool load_p, rtx reg, rtx mem)
+{
+  rtx ops[] = {reg, mem};
+
+  if (load_p)
+    output_asm_insn ("lwp\t%0,%1", ops);
+  else
+    output_asm_insn ("swp\t%0,%1", ops);
+}
+
+/* Output the assembly instruction for a microMIPS LWP or SWP instruction.
+   LOAD_P and OPERANDS are as for umips_load_store_pair_p.  */
+
+void
+umips_output_load_store_pair (bool load_p, rtx *operands)
+{
+  rtx reg1, reg2, mem1, mem2;
+  if (load_p)
+    {
+      reg1 = operands[0];
+      reg2 = operands[2];
+      mem1 = operands[1];
+      mem2 = operands[3];
+    }
+  else
+    {
+      reg1 = operands[1];
+      reg2 = operands[3];
+      mem1 = operands[0];
+      mem2 = operands[2];
+    }
+
+  if (REGNO (reg2) == REGNO (reg1) + 1)
+    {
+      umips_output_load_store_pair_1 (load_p, reg1, mem1);
+      return;
+    }
+
+  gcc_assert (REGNO (reg1) == REGNO (reg2) + 1);
+  umips_output_load_store_pair_1 (load_p, reg2, mem2);
+}
+
+/* Return true if REG1 and REG2 match the criteria for a movep insn.  */
+
+bool
+umips_movep_target_p (rtx reg1, rtx reg2)
+{
+  int regno1, regno2, pair;
+  unsigned int i;
+  static const int match[8] = {
+    0x00000060, /* 5, 6 */
+    0x000000a0, /* 5, 7 */
+    0x000000c0, /* 6, 7 */
+    0x00200010, /* 4, 21 */
+    0x00400010, /* 4, 22 */
+    0x00000030, /* 4, 5 */
+    0x00000050, /* 4, 6 */
+    0x00000090  /* 4, 7 */
+  };
+
+  if (!REG_P (reg1) || !REG_P (reg2))
+    return false;
+
+  regno1 = REGNO (reg1);
+  regno2 = REGNO (reg2);
+
+  if (!GP_REG_P (regno1) || !GP_REG_P (regno2))
+    return false;
+
+  pair = (1 << regno1) | (1 << regno2);
+
+  for (i = 0; i < ARRAY_SIZE (match); i++)
+    if (pair == match[i])
+      return true;
+
+  return false;
+}
+
+/* Return the size in bytes of the trampoline code, padded to
+   TRAMPOLINE_ALIGNMENT bits.  The static chain pointer and target
+   function address immediately follow.  */
+
+int
+mips_trampoline_code_size (void)
+{
+  if (TARGET_USE_PIC_FN_ADDR_REG)
+    return 4 * 4;
+  else if (ptr_mode == DImode)
+    return 8 * 4;
+  else if (ISA_HAS_LOAD_DELAY)
+    return 6 * 4;
+  else
+    return 4 * 4;
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx addr, end_addr, high, low, opcode, mem;
+  rtx trampoline[8];
+  unsigned int i, j;
+  HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
+
+  /* Work out the offsets of the pointers from the start of the
+     trampoline code.  */
+  end_addr_offset = mips_trampoline_code_size ();
+  static_chain_offset = end_addr_offset;
+  target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
+
+  /* Get pointers to the beginning and end of the code block.  */
+  addr = force_reg (Pmode, XEXP (m_tramp, 0));
+  end_addr = mips_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
+
+#define OP(X) gen_int_mode (X, SImode)
+
+  /* Build up the code in TRAMPOLINE.  */
+  i = 0;
+  if (TARGET_USE_PIC_FN_ADDR_REG)
+    {
+      /* $25 contains the address of the trampoline.  Emit code of the form:
+
+	     l[wd]    $1, target_function_offset($25)
+	     l[wd]    $static_chain, static_chain_offset($25)
+	     jr       $1
+	     move     $25,$1.  */
+      trampoline[i++] = OP (MIPS_LOAD_PTR (AT_REGNUM,
+					   target_function_offset,
+					   PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+					   static_chain_offset,
+					   PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_JR (AT_REGNUM));
+      trampoline[i++] = OP (MIPS_MOVE (PIC_FUNCTION_ADDR_REGNUM, AT_REGNUM));
+    }
+  else if (ptr_mode == DImode)
+    {
+      /* It's too cumbersome to create the full 64-bit address, so let's
+	 instead use:
+
+	     move    $1, $31
+	     bal     1f
+	     nop
+	 1:  l[wd]   $25, target_function_offset - 12($31)
+	     l[wd]   $static_chain, static_chain_offset - 12($31)
+	     jr      $25
+	     move    $31, $1
+
+	where 12 is the offset of "1:" from the start of the code block.  */
+      trampoline[i++] = OP (MIPS_MOVE (AT_REGNUM, RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_BAL (1));
+      trampoline[i++] = OP (MIPS_NOP);
+      trampoline[i++] = OP (MIPS_LOAD_PTR (PIC_FUNCTION_ADDR_REGNUM,
+					   target_function_offset - 12,
+					   RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+					   static_chain_offset - 12,
+					   RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_MOVE (RETURN_ADDR_REGNUM, AT_REGNUM));
+    }
+  else
+    {
+      /* If the target has load delays, emit:
+
+	     lui     $1, %hi(end_addr)
+	     lw      $25, %lo(end_addr + ...)($1)
+	     lw      $static_chain, %lo(end_addr + ...)($1)
+	     jr      $25
+	     nop
+
+	 Otherwise emit:
+
+	     lui     $1, %hi(end_addr)
+	     lw      $25, %lo(end_addr + ...)($1)
+	     jr      $25
+	     lw      $static_chain, %lo(end_addr + ...)($1).  */
+
+      /* Split END_ADDR into %hi and %lo values.  Trampolines are aligned
+	 to 64 bits, so the %lo value will have the bottom 3 bits clear.  */
+      high = expand_simple_binop (SImode, PLUS, end_addr, GEN_INT (0x8000),
+				  NULL, false, OPTAB_WIDEN);
+      high = expand_simple_binop (SImode, LSHIFTRT, high, GEN_INT (16),
+				  NULL, false, OPTAB_WIDEN);
+      low = convert_to_mode (SImode, gen_lowpart (HImode, end_addr), true);
+
+      /* Emit the LUI.  */
+      opcode = OP (MIPS_LUI (AT_REGNUM, 0));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, high,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the load of the target function.  */
+      opcode = OP (MIPS_LOAD_PTR (PIC_FUNCTION_ADDR_REGNUM,
+				  target_function_offset - end_addr_offset,
+				  AT_REGNUM));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, low,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the JR here, if we can.  */
+      if (!ISA_HAS_LOAD_DELAY)
+	trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+
+      /* Emit the load of the static chain register.  */
+      opcode = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+				  static_chain_offset - end_addr_offset,
+				  AT_REGNUM));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, low,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the JR, if we couldn't above.  */
+      if (ISA_HAS_LOAD_DELAY)
+	{
+	  trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+	  trampoline[i++] = OP (MIPS_NOP);
+	}
+    }
+
+#undef OP
+
+  /* Copy the trampoline code.  Leave any padding uninitialized.  */
+  for (j = 0; j < i; j++)
+    {
+      mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
+      mips_emit_move (mem, trampoline[j]);
+    }
+
+  /* Set up the static chain pointer field.  */
+  mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
+  mips_emit_move (mem, chain_value);
+
+  /* Set up the target function field.  */
+  mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
+  mips_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
+
+  /* Flush the code part of the trampoline.  */
+  emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
+  emit_insn (gen_clear_cache (addr, end_addr));
+}
+
+/* Implement FUNCTION_PROFILER.  */
+
+void mips_function_profiler (FILE *file)
+{
+  if (TARGET_MIPS16)
+    sorry ("mips16 function profiling");
+  if (TARGET_LONG_CALLS)
+    {
+      /* For TARGET_LONG_CALLS use $3 for the address of _mcount.  */
+      if (Pmode == DImode)
+	fprintf (file, "\tdla\t%s,_mcount\n", reg_names[3]);
+      else
+	fprintf (file, "\tla\t%s,_mcount\n", reg_names[3]);
+    }
+  mips_push_asm_switch (&mips_noat);
+  fprintf (file, "\tmove\t%s,%s\t\t# save current return address\n",
+	   reg_names[AT_REGNUM], reg_names[RETURN_ADDR_REGNUM]);
+  /* _mcount treats $2 as the static chain register.  */
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\tmove\t%s,%s\n", reg_names[2],
+	     reg_names[STATIC_CHAIN_REGNUM]);
+  if (TARGET_MCOUNT_RA_ADDRESS)
+    {
+      /* If TARGET_MCOUNT_RA_ADDRESS load $12 with the address of the
+	 ra save location.  */
+      if (cfun->machine->frame.ra_fp_offset == 0)
+	/* ra not saved, pass zero.  */
+	fprintf (file, "\tmove\t%s,%s\n", reg_names[12], reg_names[0]);
+      else
+	fprintf (file, "\t%s\t%s," HOST_WIDE_INT_PRINT_DEC "(%s)\n",
+		 Pmode == DImode ? "dla" : "la", reg_names[12],
+		 cfun->machine->frame.ra_fp_offset,
+		 reg_names[STACK_POINTER_REGNUM]);
+    }
+  if (!TARGET_NEWABI)
+    fprintf (file,
+	     "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from  stack\n",
+	     TARGET_64BIT ? "dsubu" : "subu",
+	     reg_names[STACK_POINTER_REGNUM],
+	     reg_names[STACK_POINTER_REGNUM],
+	     Pmode == DImode ? 16 : 8);
+
+  if (TARGET_LONG_CALLS)
+    fprintf (file, "\tjalr\t%s\n", reg_names[3]);
+  else
+    fprintf (file, "\tjal\t_mcount\n");
+  mips_pop_asm_switch (&mips_noat);
+  /* _mcount treats $2 as the static chain register.  */
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],
+	     reg_names[2]);
+}
+
+/* Implement TARGET_SHIFT_TRUNCATION_MASK.  We want to keep the default
+   behaviour of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even
+   when TARGET_LOONGSON_VECTORS is true.  */
+
+static unsigned HOST_WIDE_INT
+mips_shift_truncation_mask (enum machine_mode mode)
+{
+  if (TARGET_LOONGSON_VECTORS && VECTOR_MODE_P (mode))
+    return 0;
+
+  return GET_MODE_BITSIZE (mode) - 1;
+}
+
+/* Implement TARGET_PREPARE_PCH_SAVE.  */
+
+static void
+mips_prepare_pch_save (void)
+{
+  /* We are called in a context where the current MIPS16 vs. non-MIPS16
+     setting should be irrelevant.  The question then is: which setting
+     makes most sense at load time?
+
+     The PCH is loaded before the first token is read.  We should never
+     have switched into MIPS16 mode by that point, and thus should not
+     have populated mips16_globals.  Nor can we load the entire contents
+     of mips16_globals from the PCH file, because mips16_globals contains
+     a combination of GGC and non-GGC data.
+
+     There is therefore no point in trying save the GGC part of
+     mips16_globals to the PCH file, or to preserve MIPS16ness across
+     the PCH save and load.  The loading compiler would not have access
+     to the non-GGC parts of mips16_globals (either from the PCH file,
+     or from a copy that the loading compiler generated itself) and would
+     have to call target_reinit anyway.
+
+     It therefore seems best to switch back to non-MIPS16 mode at
+     save time, and to ensure that mips16_globals remains null after
+     a PCH load.  */
+  mips_set_compression_mode (0);
+  mips16_globals = 0;
+}
+
+/* Generate or test for an insn that supports a constant permutation.  */
+
+#define MAX_VECT_LEN 8
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool one_vector_p;
+  bool testing_p;
+};
+
+/* Construct (set target (vec_select op0 (parallel perm))) and
+   return true if that's a valid instruction in the active ISA.  */
+
+static bool
+mips_expand_vselect (rtx target, rtx op0,
+		     const unsigned char *perm, unsigned nelt)
+{
+  rtx rperm[MAX_VECT_LEN], x;
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    rperm[i] = GEN_INT (perm[i]);
+
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
+  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
+  x = gen_rtx_SET (VOIDmode, target, x);
+
+  x = emit_insn (x);
+  if (recog_memoized (x) < 0)
+    {
+      remove_insn (x);
+      return false;
+    }
+  return true;
+}
+
+/* Similar, but generate a vec_concat from op0 and op1 as well.  */
+
+static bool
+mips_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+			     const unsigned char *perm, unsigned nelt)
+{
+  enum machine_mode v2mode;
+  rtx x;
+
+  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
+  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+  return mips_expand_vselect (target, x, perm, nelt);
+}
+
+/* Recognize patterns for even-odd extraction.  */
+
+static bool
+mips_expand_vpc_loongson_even_odd (struct expand_vec_perm_d *d)
+{
+  unsigned i, odd, nelt = d->nelt;
+  rtx t0, t1, t2, t3;
+
+  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+    return false;
+  /* Even-odd for V2SI/V2SFmode is matched by interleave directly.  */
+  if (nelt < 4)
+    return false;
+
+  odd = d->perm[0];
+  if (odd > 1)
+    return false;
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != i * 2 + odd)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* We need 2*log2(N)-1 operations to achieve odd/even with interleave. */
+  t0 = gen_reg_rtx (d->vmode);
+  t1 = gen_reg_rtx (d->vmode);
+  switch (d->vmode)
+    {
+    case V4HImode:
+      emit_insn (gen_loongson_punpckhhw (t0, d->op0, d->op1));
+      emit_insn (gen_loongson_punpcklhw (t1, d->op0, d->op1));
+      if (odd)
+	emit_insn (gen_loongson_punpckhhw (d->target, t1, t0));
+      else
+	emit_insn (gen_loongson_punpcklhw (d->target, t1, t0));
+      break;
+
+    case V8QImode:
+      t2 = gen_reg_rtx (d->vmode);
+      t3 = gen_reg_rtx (d->vmode);
+      emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op1));
+      emit_insn (gen_loongson_punpcklbh (t1, d->op0, d->op1));
+      emit_insn (gen_loongson_punpckhbh (t2, t1, t0));
+      emit_insn (gen_loongson_punpcklbh (t3, t1, t0));
+      if (odd)
+	emit_insn (gen_loongson_punpckhbh (d->target, t3, t2));
+      else
+	emit_insn (gen_loongson_punpcklbh (d->target, t3, t2));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return true;
+}
+
+/* Recognize patterns for the Loongson PSHUFH instruction.  */
+
+static bool
+mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d)
+{
+  unsigned i, mask;
+  rtx rmask;
+
+  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+    return false;
+  if (d->vmode != V4HImode)
+    return false;
+  if (d->testing_p)
+    return true;
+
+  /* Convert the selector into the packed 8-bit form for pshufh.  */
+  /* Recall that loongson is little-endian only.  No big-endian
+     adjustment required.  */
+  for (i = mask = 0; i < 4; i++)
+    mask |= (d->perm[i] & 3) << (i * 2);
+  rmask = force_reg (SImode, GEN_INT (mask));
+
+  if (d->one_vector_p)
+    emit_insn (gen_loongson_pshufh (d->target, d->op0, rmask));
+  else
+    {
+      rtx t0, t1, x, merge, rmerge[4];
+
+      t0 = gen_reg_rtx (V4HImode);
+      t1 = gen_reg_rtx (V4HImode);
+      emit_insn (gen_loongson_pshufh (t1, d->op1, rmask));
+      emit_insn (gen_loongson_pshufh (t0, d->op0, rmask));
+
+      for (i = 0; i < 4; ++i)
+	rmerge[i] = (d->perm[i] & 4 ? constm1_rtx : const0_rtx);
+      merge = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmerge));
+      merge = force_reg (V4HImode, merge);
+
+      x = gen_rtx_AND (V4HImode, merge, t1);
+      emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+      x = gen_rtx_NOT (V4HImode, merge);
+      x = gen_rtx_AND (V4HImode, x, t0);
+      emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+      x = gen_rtx_IOR (V4HImode, t0, t1);
+      emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
+    }
+
+  return true;
+}
+
+/* Recognize broadcast patterns for the Loongson.  */
+
+static bool
+mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt;
+  rtx t0, t1;
+
+  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+    return false;
+  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
+  if (d->vmode != V8QImode)
+    return false;
+  if (!d->one_vector_p)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < 8; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* With one interleave we put two of the desired element adjacent.  */
+  t0 = gen_reg_rtx (V8QImode);
+  if (elt < 4)
+    emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0));
+  else
+    emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0));
+
+  /* Shuffle that one HImode element into all locations.  */
+  elt &= 3;
+  elt *= 0x55;
+  t1 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0),
+				  force_reg (SImode, GEN_INT (elt))));
+
+  emit_move_insn (d->target, gen_lowpart (V8QImode, t1));
+  return true;
+}
+
+static bool
+mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  unsigned int i, nelt = d->nelt;
+  unsigned char perm2[MAX_VECT_LEN];
+
+  if (d->one_vector_p)
+    {
+      /* Try interleave with alternating operands.  */
+      memcpy (perm2, d->perm, sizeof(perm2));
+      for (i = 1; i < nelt; i += 2)
+	perm2[i] += nelt;
+      if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
+	return true;
+    }
+  else
+    {
+      if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1,
+				       d->perm, nelt))
+	return true;
+
+      /* Try again with swapped operands.  */
+      for (i = 0; i < nelt; ++i)
+	perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
+      if (mips_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+	return true;
+    }
+
+  if (mips_expand_vpc_loongson_even_odd (d))
+    return true;
+  if (mips_expand_vpc_loongson_pshufh (d))
+    return true;
+  if (mips_expand_vpc_loongson_bcast (d))
+    return true;
+  return false;
+}
+
+/* Expand a vec_perm_const pattern.  */
+
+bool
+mips_expand_vec_perm_const (rtx operands[4])
+{
+  struct expand_vec_perm_d d;
+  int i, nelt, which;
+  unsigned char orig_perm[MAX_VECT_LEN];
+  rtx sel;
+  bool ok;
+
+  d.target = operands[0];
+  d.op0 = operands[1];
+  d.op1 = operands[2];
+  sel = operands[3];
+
+  d.vmode = GET_MODE (d.target);
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  for (i = which = 0; i < nelt; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      int ei = INTVAL (e) & (2 * nelt - 1);
+      which |= (ei < nelt ? 1 : 2);
+      orig_perm[i] = ei;
+    }
+  memcpy (d.perm, orig_perm, MAX_VECT_LEN);
+
+  switch (which)
+    {
+    default:
+      gcc_unreachable();
+
+    case 3:
+      d.one_vector_p = false;
+      if (!rtx_equal_p (d.op0, d.op1))
+	break;
+      /* FALLTHRU */
+
+    case 2:
+      for (i = 0; i < nelt; ++i)
+        d.perm[i] &= nelt - 1;
+      d.op0 = d.op1;
+      d.one_vector_p = true;
+      break;
+
+    case 1:
+      d.op1 = d.op0;
+      d.one_vector_p = true;
+      break;
+    }
+
+  ok = mips_expand_vec_perm_const_1 (&d);
+
+  /* If we were given a two-vector permutation which just happened to
+     have both input vectors equal, we folded this into a one-vector
+     permutation.  There are several loongson patterns that are matched
+     via direct vec_select+vec_concat expansion, but we do not have
+     support in mips_expand_vec_perm_const_1 to guess the adjustment
+     that should be made for a single operand.  Just try again with
+     the original permutation.  */
+  if (!ok && which == 3)
+    {
+      d.op0 = operands[1];
+      d.op1 = operands[2];
+      d.one_vector_p = false;
+      memcpy (d.perm, orig_perm, MAX_VECT_LEN);
+      ok = mips_expand_vec_perm_const_1 (&d);
+    }
+
+  return ok;
+}
+
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
+
+static bool
+mips_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				  const unsigned char *sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned int i, nelt, which;
+  bool ret;
+
+  d.vmode = vmode;
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+  memcpy (d.perm, sel, nelt);
+
+  /* Categorize the set of elements in the selector.  */
+  for (i = which = 0; i < nelt; ++i)
+    {
+      unsigned char e = d.perm[i];
+      gcc_assert (e < 2 * nelt);
+      which |= (e < nelt ? 1 : 2);
+    }
+
+  /* For all elements from second vector, fold the elements to first.  */
+  if (which == 2)
+    for (i = 0; i < nelt; ++i)
+      d.perm[i] -= nelt;
+
+  /* Check whether the mask can be applied to the vector type.  */
+  d.one_vector_p = (which != 3);
+
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!d.one_vector_p)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = mips_expand_vec_perm_const_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+/* Expand an integral vector unpack operation.  */
+
+void
+mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack) (rtx, rtx, rtx);
+  rtx (*cmpgt) (rtx, rtx, rtx);
+  rtx tmp, dest, zero;
+
+  switch (imode)
+    {
+    case V8QImode:
+      if (high_p)
+	unpack = gen_loongson_punpckhbh;
+      else
+	unpack = gen_loongson_punpcklbh;
+      cmpgt = gen_loongson_pcmpgtb;
+      break;
+    case V4HImode:
+      if (high_p)
+	unpack = gen_loongson_punpckhhw;
+      else
+	unpack = gen_loongson_punpcklhw;
+      cmpgt = gen_loongson_pcmpgth;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  zero = force_reg (imode, CONST0_RTX (imode));
+  if (unsigned_p)
+    tmp = zero;
+  else
+    {
+      tmp = gen_reg_rtx (imode);
+      emit_insn (cmpgt (tmp, zero, operands[1]));
+    }
+
+  dest = gen_reg_rtx (imode);
+  emit_insn (unpack (dest, operands[1], tmp));
+
+  emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
+}
+
+/* A subroutine of mips_expand_vec_init, match constant vector elements.  */
+
+static inline bool
+mips_constant_elt_p (rtx x)
+{
+  return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
+}
+
+/* A subroutine of mips_expand_vec_init, expand via broadcast.  */
+
+static void
+mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
+{
+  struct expand_vec_perm_d d;
+  rtx t1;
+  bool ok;
+
+  if (elt != const0_rtx)
+    elt = force_reg (GET_MODE_INNER (vmode), elt);
+  if (REG_P (elt))
+    elt = gen_lowpart (DImode, elt);
+
+  t1 = gen_reg_rtx (vmode);
+  switch (vmode)
+    {
+    case V8QImode:
+      emit_insn (gen_loongson_vec_init1_v8qi (t1, elt));
+      break;
+    case V4HImode:
+      emit_insn (gen_loongson_vec_init1_v4hi (t1, elt));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  memset (&d, 0, sizeof (d));
+  d.target = target;
+  d.op0 = t1;
+  d.op1 = t1;
+  d.vmode = vmode;
+  d.nelt = GET_MODE_NUNITS (vmode);
+  d.one_vector_p = true;
+
+  ok = mips_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+/* A subroutine of mips_expand_vec_init, replacing all of the non-constant
+   elements of VALS with zeros, copy the constant vector to TARGET.  */
+
+static void
+mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt,
+			 rtx target, rtx vals)
+{
+  rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      if (!mips_constant_elt_p (RTVEC_ELT (vec, i)))
+	RTVEC_ELT (vec, i) = const0_rtx;
+    }
+
+  emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
+}
+
+
+/* A subroutine of mips_expand_vec_init, expand via pinsrh.  */
+
+static void
+mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var)
+{
+  mips_expand_vi_constant (V4HImode, 4, target, vals);
+
+  emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var),
+			      GEN_INT (one_var)));
+}
+
+/* A subroutine of mips_expand_vec_init, expand anything via memory.  */
+
+static void
+mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode,
+			unsigned nelt, unsigned nvar, rtx target, rtx vals)
+{
+  rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode));
+  unsigned int i, isize = GET_MODE_SIZE (imode);
+
+  if (nvar < nelt)
+    mips_expand_vi_constant (vmode, nelt, mem, vals);
+
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx x = XVECEXP (vals, 0, i);
+      if (!mips_constant_elt_p (x))
+	emit_move_insn (adjust_address (mem, imode, i * isize), x);
+    }
+
+  emit_move_insn (target, mem);
+}
+
+/* Expand a vector initialization.  */
+
+void
+mips_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  enum machine_mode imode = GET_MODE_INNER (vmode);
+  unsigned i, nelt = GET_MODE_NUNITS (vmode);
+  unsigned nvar = 0, one_var = -1u;
+  bool all_same = true;
+  rtx x;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!mips_constant_elt_p (x))
+	nvar++, one_var = i;
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  /* Load constants from the pool, or whatever's handy.  */
+  if (nvar == 0)
+    {
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
+      return;
+    }
+
+  /* For two-part initialization, always use CONCAT.  */
+  if (nelt == 2)
+    {
+      rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0));
+      rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1));
+      x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
+      emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      return;
+    }
+
+  /* Loongson is the only cpu with vectors with more elements.  */
+  gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS);
+
+  /* If all values are identical, broadcast the value.  */
+  if (all_same)
+    {
+      mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0));
+      return;
+    }
+
+  /* If we've only got one non-variable V4HImode, use PINSRH.  */
+  if (nvar == 1 && vmode == V4HImode)
+    {
+      mips_expand_vi_loongson_one_pinsrh (target, vals, one_var);
+      return;
+    }
+
+  mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals);
+}
+
+/* Expand a vector reduction.  */
+
+void
+mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx))
+{
+  enum machine_mode vmode = GET_MODE (in);
+  unsigned char perm2[2];
+  rtx last, next, fold, x;
+  bool ok;
+
+  last = in;
+  fold = gen_reg_rtx (vmode);
+  switch (vmode)
+    {
+    case V2SFmode:
+      /* Use PUL/PLU to produce { L, H } op { H, L }.
+	 By reversing the pair order, rather than a pure interleave high,
+	 we avoid erroneous exceptional conditions that we might otherwise
+	 produce from the computation of H op H.  */
+      perm2[0] = 1;
+      perm2[1] = 2;
+      ok = mips_expand_vselect_vconcat (fold, last, last, perm2, 2);
+      gcc_assert (ok);
+      break;
+
+    case V2SImode:
+      /* Use interleave to produce { H, L } op { H, H }.  */
+      emit_insn (gen_loongson_punpckhwd (fold, last, last));
+      break;
+
+    case V4HImode:
+      /* Perform the first reduction with interleave,
+	 and subsequent reductions with shifts.  */
+      emit_insn (gen_loongson_punpckhwd_hi (fold, last, last));
+
+      next = gen_reg_rtx (vmode);
+      emit_insn (gen (next, last, fold));
+      last = next;
+
+      fold = gen_reg_rtx (vmode);
+      x = force_reg (SImode, GEN_INT (16));
+      emit_insn (gen_vec_shr_v4hi (fold, last, x));
+      break;
+
+    case V8QImode:
+      emit_insn (gen_loongson_punpckhwd_qi (fold, last, last));
+
+      next = gen_reg_rtx (vmode);
+      emit_insn (gen (next, last, fold));
+      last = next;
+
+      fold = gen_reg_rtx (vmode);
+      x = force_reg (SImode, GEN_INT (16));
+      emit_insn (gen_vec_shr_v8qi (fold, last, x));
+
+      next = gen_reg_rtx (vmode);
+      emit_insn (gen (next, last, fold));
+      last = next;
+
+      fold = gen_reg_rtx (vmode);
+      x = force_reg (SImode, GEN_INT (8));
+      emit_insn (gen_vec_shr_v8qi (fold, last, x));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (target, last, fold));
+}
+
+/* Expand a vector minimum/maximum.  */
+
+void
+mips_expand_vec_minmax (rtx target, rtx op0, rtx op1,
+			rtx (*cmp) (rtx, rtx, rtx), bool min_p)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  rtx tc, t0, t1, x;
+
+  tc = gen_reg_rtx (vmode);
+  t0 = gen_reg_rtx (vmode);
+  t1 = gen_reg_rtx (vmode);
+
+  /* op0 > op1 */
+  emit_insn (cmp (tc, op0, op1));
+
+  x = gen_rtx_AND (vmode, tc, (min_p ? op1 : op0));
+  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+  x = gen_rtx_NOT (vmode, tc);
+  x = gen_rtx_AND (vmode, x, (min_p ? op0 : op1));
+  emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+  x = gen_rtx_IOR (vmode, t0, t1);
+  emit_insn (gen_rtx_SET (VOIDmode, target, x));
+}
+
+/* Implement TARGET_CASE_VALUES_THRESHOLD.  */
+
+unsigned int
+mips_case_values_threshold (void)
+{
+  /* In MIPS16 mode using a larger case threshold generates smaller code.  */
+  if (TARGET_MIPS16 && optimize_size)
+    return 10;
+  else
+    return default_case_values_threshold ();
+}
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+mips_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  if (!TARGET_HARD_FLOAT_ABI)
+    return;
+  tree exceptions_var = create_tmp_var (MIPS_ATYPE_USI, NULL);
+  tree fcsr_orig_var = create_tmp_var (MIPS_ATYPE_USI, NULL);
+  tree fcsr_mod_var = create_tmp_var (MIPS_ATYPE_USI, NULL);
+  tree get_fcsr = mips_builtin_decls[MIPS_GET_FCSR];
+  tree set_fcsr = mips_builtin_decls[MIPS_SET_FCSR];
+  tree get_fcsr_hold_call = build_call_expr (get_fcsr, 0);
+  tree hold_assign_orig = build2 (MODIFY_EXPR, MIPS_ATYPE_USI,
+				  fcsr_orig_var, get_fcsr_hold_call);
+  tree hold_mod_val = build2 (BIT_AND_EXPR, MIPS_ATYPE_USI, fcsr_orig_var,
+			      build_int_cst (MIPS_ATYPE_USI, 0xfffff003));
+  tree hold_assign_mod = build2 (MODIFY_EXPR, MIPS_ATYPE_USI,
+				 fcsr_mod_var, hold_mod_val);
+  tree set_fcsr_hold_call = build_call_expr (set_fcsr, 1, fcsr_mod_var);
+  tree hold_all = build2 (COMPOUND_EXPR, MIPS_ATYPE_USI,
+			  hold_assign_orig, hold_assign_mod);
+  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all,
+		  set_fcsr_hold_call);
+
+  *clear = build_call_expr (set_fcsr, 1, fcsr_mod_var);
+
+  tree get_fcsr_update_call = build_call_expr (get_fcsr, 0);
+  *update = build2 (MODIFY_EXPR, MIPS_ATYPE_USI,
+		    exceptions_var, get_fcsr_update_call);
+  tree set_fcsr_update_call = build_call_expr (set_fcsr, 1, fcsr_orig_var);
+  *update = build2 (COMPOUND_EXPR, void_type_node, *update,
+		    set_fcsr_update_call);
+  tree atomic_feraiseexcept
+    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  tree int_exceptions_var = fold_convert (integer_type_node,
+					  exceptions_var);
+  tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
+						    1, int_exceptions_var);
+  *update = build2 (COMPOUND_EXPR, void_type_node, *update,
+		    atomic_feraiseexcept_call);
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mips_option_override
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS mips_legitimize_address
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE mips_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE mips_output_function_epilogue
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION mips_select_rtx_section
+#undef TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION mips_function_rodata_section
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT mips_sched_init
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER mips_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 mips_sched_reorder2
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE mips_variable_issue
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST mips_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE mips_issue_rate
+#undef TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN
+#define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN mips_init_dfa_post_cycle_insn
+#undef TARGET_SCHED_DFA_POST_ADVANCE_CYCLE
+#define TARGET_SCHED_DFA_POST_ADVANCE_CYCLE mips_dfa_post_advance_cycle
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  mips_multipass_dfa_lookahead
+#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  mips_small_register_classes_for_mode_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL mips_function_ok_for_sibcall
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES mips_insert_attributes
+#undef TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES mips_merge_decl_attributes
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P mips_can_inline_p
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION mips_set_current_function
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE mips_valid_pointer_mode
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST mips_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST mips_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS mips_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST mips_address_cost
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P mips_in_small_data_p
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG mips_reorg
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS mips_preferred_reload_class
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK mips_expand_to_rtl_hook
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START mips_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_CODE_END
+#define TARGET_ASM_CODE_END mips_code_end
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS mips_init_libfuncs
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST mips_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START mips_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR mips_gimplify_va_arg_expr
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mips_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mips_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P mips_function_value_regno_p
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY mips_return_in_memory
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB mips_return_in_msb
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK mips_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND mips_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS mips_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P mips_print_operand_punct_valid_p
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS mips_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING mips_strict_argument_naming
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE mips_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES mips_callee_copies
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES mips_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG mips_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE mips_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY mips_function_arg_boundary
+
+#undef TARGET_MODE_REP_EXTENDED
+#define TARGET_MODE_REP_EXTENDED mips_mode_rep_extended
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P mips_vector_mode_supported_p
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P mips_scalar_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS mips_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL mips_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN mips_expand_builtin
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM mips_cannot_force_const_mem
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P mips_legitimate_constant_p
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO mips_encode_section_info
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE mips_attribute_table
+/* All our function attributes are related to how out-of-line copies should
+   be compiled or called.  They don't in themselves prevent inlining.  */
+#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY mips_extra_live_on_entry
+
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P mips_use_blocks_for_constant_p
+#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+#define TARGET_USE_ANCHORS_FOR_SYMBOL_P mips_use_anchors_for_symbol_p
+
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES mips_comp_type_attributes
+
+#ifdef HAVE_AS_DTPRELWORD
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL mips_output_dwarf_dtprel
+#endif
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN mips_dwarf_register_span
+
+#undef TARGET_ASM_FINAL_POSTSCAN_INSN
+#define TARGET_ASM_FINAL_POSTSCAN_INSN mips_final_postscan_insn
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	mips_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED mips_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE mips_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE mips_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT mips_trampoline_init
+
+#undef TARGET_ASM_OUTPUT_SOURCE_FILENAME
+#define TARGET_ASM_OUTPUT_SOURCE_FILENAME mips_output_filename
+
+#undef TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK mips_shift_truncation_mask
+
+#undef TARGET_PREPARE_PCH_SAVE
+#define TARGET_PREPARE_PCH_SAVE mips_prepare_pch_save
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok
+
+#undef TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD mips_case_values_threshold
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV mips_atomic_assign_expand_fenv
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-mips.h"
diff --git a/gcc-4.9/gcc/config/mips/mips.h b/gcc-4.9/gcc/config/mips/mips.h
new file mode 100644
index 000000000..a786d4ce3
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips.h
@@ -0,0 +1,3005 @@
+/* Definitions of target machine for GNU compiler.  MIPS version.
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+   Contributed by A. Lichnewsky (lich@inria.inria.fr).
+   Changed by Michael Meissner	(meissner@osf.org).
+   64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
+   Brendan Eich (brendan@microunity.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "config/vxworks-dummy.h"
+
+#ifdef GENERATOR_FILE
+/* This is used in some insn conditions, so needs to be declared, but
+   does not need to be defined.  */
+extern int target_flags_explicit;
+#endif
+
+/* MIPS external variables defined in mips.c.  */
+
+/* Which ABI to use.  ABI_32 (original 32, or o32), ABI_N32 (n32),
+   ABI_64 (n64) are all defined by SGI.  ABI_O64 is o32 extended
+   to work on a 64-bit machine.  */
+
+#define ABI_32  0
+#define ABI_N32 1
+#define ABI_64  2
+#define ABI_EABI 3
+#define ABI_O64  4
+
+/* Masks that affect tuning.
+
+   PTF_AVOID_BRANCHLIKELY
+	Set if it is usually not profitable to use branch-likely instructions
+	for this target, typically because the branches are always predicted
+	taken and so incur a large overhead when not taken.
+
+   PTF_AVOID_IMADD
+	Set if it is usually not profitable to use the integer MADD or MSUB
+	instructions because of the overhead of getting the result out of
+	the HI/LO registers.  */
+
+#define PTF_AVOID_BRANCHLIKELY	0x1
+#define PTF_AVOID_IMADD		0x2
+
+/* Information about one recognized processor.  Defined here for the
+   benefit of TARGET_CPU_CPP_BUILTINS.  */
+struct mips_cpu_info {
+  /* The 'canonical' name of the processor as far as GCC is concerned.
+     It's typically a manufacturer's prefix followed by a numerical
+     designation.  It should be lowercase.  */
+  const char *name;
+
+  /* The internal processor number that most closely matches this
+     entry.  Several processors can have the same value, if there's no
+     difference between them from GCC's point of view.  */
+  enum processor cpu;
+
+  /* The ISA level that the processor implements.  */
+  int isa;
+
+  /* A mask of PTF_* values.  */
+  unsigned int tune_flags;
+};
+
+#include "config/mips/mips-opts.h"
+
+/* Macros to silence warnings about numbers being signed in traditional
+   C and unsigned in ISO C when compiled on 32-bit hosts.  */
+
+#define BITMASK_HIGH	(((unsigned long)1) << 31)	/* 0x80000000 */
+#define BITMASK_UPPER16	((unsigned long)0xffff << 16)	/* 0xffff0000 */
+#define BITMASK_LOWER16	((unsigned long)0xffff)		/* 0x0000ffff */
+
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* True if we are generating position-independent VxWorks RTP code.  */
+#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic)
+
+/* True if the output file is marked as ".abicalls; .option pic0"
+   (-call_nonpic).  */
+#define TARGET_ABICALLS_PIC0 \
+  (TARGET_ABSOLUTE_ABICALLS && TARGET_PLT)
+
+/* True if the output file is marked as ".abicalls; .option pic2" (-KPIC).  */
+#define TARGET_ABICALLS_PIC2 \
+  (TARGET_ABICALLS && !TARGET_ABICALLS_PIC0)
+
+/* True if the call patterns should be split into a jalr followed by
+   an instruction to restore $gp.  It is only safe to split the load
+   from the call when every use of $gp is explicit.
+
+   See mips_must_initialize_gp_p for details about how we manage the
+   global pointer.  */
+
+#define TARGET_SPLIT_CALLS \
+  (TARGET_EXPLICIT_RELOCS && TARGET_CALL_CLOBBERED_GP && epilogue_completed)
+
+/* True if we're generating a form of -mabicalls in which we can use
+   operators like %hi and %lo to refer to locally-binding symbols.
+   We can only do this for -mno-shared, and only then if we can use
+   relocation operations instead of assembly macros.  It isn't really
+   worth using absolute sequences for 64-bit symbols because GOT
+   accesses are so much shorter.  */
+
+#define TARGET_ABSOLUTE_ABICALLS	\
+  (TARGET_ABICALLS			\
+   && !TARGET_SHARED			\
+   && TARGET_EXPLICIT_RELOCS		\
+   && !ABI_HAS_64BIT_SYMBOLS)
+
+/* True if we can optimize sibling calls.  For simplicity, we only
+   handle cases in which call_insn_operand will reject invalid
+   sibcall addresses.  There are two cases in which this isn't true:
+
+      - TARGET_MIPS16.  call_insn_operand accepts constant addresses
+	but there is no direct jump instruction.  It isn't worth
+	using sibling calls in this case anyway; they would usually
+	be longer than normal calls.
+
+      - TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS.  call_insn_operand
+	accepts global constants, but all sibcalls must be indirect.  */
+#define TARGET_SIBCALLS \
+  (!TARGET_MIPS16 && (!TARGET_USE_GOT || TARGET_EXPLICIT_RELOCS))
+
+/* True if we need to use a global offset table to access some symbols.  */
+#define TARGET_USE_GOT (TARGET_ABICALLS || TARGET_RTP_PIC)
+
+/* True if TARGET_USE_GOT and if $gp is a call-clobbered register.  */
+#define TARGET_CALL_CLOBBERED_GP (TARGET_ABICALLS && TARGET_OLDABI)
+
+/* True if TARGET_USE_GOT and if $gp is a call-saved register.  */
+#define TARGET_CALL_SAVED_GP (TARGET_USE_GOT && !TARGET_CALL_CLOBBERED_GP)
+
+/* True if we should use .cprestore to store to the cprestore slot.
+
+   We continue to use .cprestore for explicit-reloc code so that JALs
+   inside inline asms will work correctly.  */
+#define TARGET_CPRESTORE_DIRECTIVE \
+  (TARGET_ABICALLS_PIC2 && !TARGET_MIPS16)
+
+/* True if we can use the J and JAL instructions.  */
+#define TARGET_ABSOLUTE_JUMPS \
+  (!flag_pic || TARGET_ABSOLUTE_ABICALLS)
+
+/* True if indirect calls must use register class PIC_FN_ADDR_REG.
+   This is true for both the PIC and non-PIC VxWorks RTP modes.  */
+#define TARGET_USE_PIC_FN_ADDR_REG (TARGET_ABICALLS || TARGET_VXWORKS_RTP)
+
+/* True if .gpword or .gpdword should be used for switch tables.  */
+#define TARGET_GPWORD				\
+  (TARGET_ABICALLS && !TARGET_ABSOLUTE_ABICALLS)
+
+/* True if the output must have a writable .eh_frame.
+   See ASM_PREFERRED_EH_DATA_FORMAT for details.  */
+#ifdef HAVE_LD_PERSONALITY_RELAXATION
+#define TARGET_WRITABLE_EH_FRAME 0
+#else
+#define TARGET_WRITABLE_EH_FRAME (flag_pic && TARGET_SHARED)
+#endif
+
+/* Test the assembler to set ISA_HAS_DSP_MULT to DSP Rev 1 or 2.  */
+#ifdef HAVE_AS_DSPR1_MULT
+#define ISA_HAS_DSP_MULT ISA_HAS_DSP
+#else
+#define ISA_HAS_DSP_MULT ISA_HAS_DSPR2
+#endif
+
+/* The ISA compression flags that are currently in effect.  */
+#define TARGET_COMPRESSION (target_flags & (MASK_MIPS16 | MASK_MICROMIPS))
+
+/* Generate mips16 code */
+#define TARGET_MIPS16		((target_flags & MASK_MIPS16) != 0)
+/* Generate mips16e code. Default 16bit ASE for mips32* and mips64* */
+#define GENERATE_MIPS16E	(TARGET_MIPS16 && mips_isa >= 32)
+/* Generate mips16e register save/restore sequences.  */
+#define GENERATE_MIPS16E_SAVE_RESTORE (GENERATE_MIPS16E && mips_abi == ABI_32)
+
+/* True if we're generating a form of MIPS16 code in which general
+   text loads are allowed.  */
+#define TARGET_MIPS16_TEXT_LOADS \
+  (TARGET_MIPS16 && mips_code_readable == CODE_READABLE_YES)
+
+/* True if we're generating a form of MIPS16 code in which PC-relative
+   loads are allowed.  */
+#define TARGET_MIPS16_PCREL_LOADS \
+  (TARGET_MIPS16 && mips_code_readable >= CODE_READABLE_PCREL)
+
+/* Generic ISA defines.  */
+#define ISA_MIPS1		    (mips_isa == 1)
+#define ISA_MIPS2		    (mips_isa == 2)
+#define ISA_MIPS3                   (mips_isa == 3)
+#define ISA_MIPS4		    (mips_isa == 4)
+#define ISA_MIPS32		    (mips_isa == 32)
+#define ISA_MIPS32R2		    (mips_isa == 33)
+#define ISA_MIPS64                  (mips_isa == 64)
+#define ISA_MIPS64R2		    (mips_isa == 65)
+
+/* Architecture target defines.  */
+#define TARGET_LOONGSON_2E          (mips_arch == PROCESSOR_LOONGSON_2E)
+#define TARGET_LOONGSON_2F          (mips_arch == PROCESSOR_LOONGSON_2F)
+#define TARGET_LOONGSON_2EF         (TARGET_LOONGSON_2E || TARGET_LOONGSON_2F)
+#define TARGET_LOONGSON_3A          (mips_arch == PROCESSOR_LOONGSON_3A)
+#define TARGET_MIPS3900             (mips_arch == PROCESSOR_R3900)
+#define TARGET_MIPS4000             (mips_arch == PROCESSOR_R4000)
+#define TARGET_MIPS4120             (mips_arch == PROCESSOR_R4120)
+#define TARGET_MIPS4130             (mips_arch == PROCESSOR_R4130)
+#define TARGET_MIPS5400             (mips_arch == PROCESSOR_R5400)
+#define TARGET_MIPS5500             (mips_arch == PROCESSOR_R5500)
+#define TARGET_MIPS5900             (mips_arch == PROCESSOR_R5900)
+#define TARGET_MIPS7000             (mips_arch == PROCESSOR_R7000)
+#define TARGET_MIPS9000             (mips_arch == PROCESSOR_R9000)
+#define TARGET_OCTEON		    (mips_arch == PROCESSOR_OCTEON	\
+				     || mips_arch == PROCESSOR_OCTEON2)
+#define TARGET_OCTEON2		    (mips_arch == PROCESSOR_OCTEON2)
+#define TARGET_SB1                  (mips_arch == PROCESSOR_SB1		\
+				     || mips_arch == PROCESSOR_SB1A)
+#define TARGET_SR71K                (mips_arch == PROCESSOR_SR71000)
+#define TARGET_XLP                  (mips_arch == PROCESSOR_XLP)
+
+/* Scheduling target defines.  */
+#define TUNE_20KC		    (mips_tune == PROCESSOR_20KC)
+#define TUNE_24K		    (mips_tune == PROCESSOR_24KC	\
+				     || mips_tune == PROCESSOR_24KF2_1	\
+				     || mips_tune == PROCESSOR_24KF1_1)
+#define TUNE_74K                    (mips_tune == PROCESSOR_74KC	\
+				     || mips_tune == PROCESSOR_74KF2_1	\
+				     || mips_tune == PROCESSOR_74KF1_1  \
+				     || mips_tune == PROCESSOR_74KF3_2)
+#define TUNE_LOONGSON_2EF           (mips_tune == PROCESSOR_LOONGSON_2E	\
+				     || mips_tune == PROCESSOR_LOONGSON_2F)
+#define TUNE_LOONGSON_3A            (mips_tune == PROCESSOR_LOONGSON_3A)
+#define TUNE_MIPS3000               (mips_tune == PROCESSOR_R3000)
+#define TUNE_MIPS3900               (mips_tune == PROCESSOR_R3900)
+#define TUNE_MIPS4000               (mips_tune == PROCESSOR_R4000)
+#define TUNE_MIPS4120               (mips_tune == PROCESSOR_R4120)
+#define TUNE_MIPS4130               (mips_tune == PROCESSOR_R4130)
+#define TUNE_MIPS5000               (mips_tune == PROCESSOR_R5000)
+#define TUNE_MIPS5400               (mips_tune == PROCESSOR_R5400)
+#define TUNE_MIPS5500               (mips_tune == PROCESSOR_R5500)
+#define TUNE_MIPS6000               (mips_tune == PROCESSOR_R6000)
+#define TUNE_MIPS7000               (mips_tune == PROCESSOR_R7000)
+#define TUNE_MIPS9000               (mips_tune == PROCESSOR_R9000)
+#define TUNE_OCTEON		    (mips_tune == PROCESSOR_OCTEON	\
+				     || mips_tune == PROCESSOR_OCTEON2)
+#define TUNE_SB1                    (mips_tune == PROCESSOR_SB1		\
+				     || mips_tune == PROCESSOR_SB1A)
+
+/* Whether vector modes and intrinsics for ST Microelectronics
+   Loongson-2E/2F processors should be enabled.  In o32 pairs of
+   floating-point registers provide 64-bit values.  */
+#define TARGET_LOONGSON_VECTORS	    (TARGET_HARD_FLOAT_ABI		\
+				     && (TARGET_LOONGSON_2EF		\
+					 || TARGET_LOONGSON_3A))
+
+/* True if the pre-reload scheduler should try to create chains of
+   multiply-add or multiply-subtract instructions.  For example,
+   suppose we have:
+
+	t1 = a * b
+	t2 = t1 + c * d
+	t3 = e * f
+	t4 = t3 - g * h
+
+   t1 will have a higher priority than t2 and t3 will have a higher
+   priority than t4.  However, before reload, there is no dependence
+   between t1 and t3, and they can often have similar priorities.
+   The scheduler will then tend to prefer:
+
+	t1 = a * b
+	t3 = e * f
+	t2 = t1 + c * d
+	t4 = t3 - g * h
+
+   which stops us from making full use of macc/madd-style instructions.
+   This sort of situation occurs frequently in Fourier transforms and
+   in unrolled loops.
+
+   To counter this, the TUNE_MACC_CHAINS code will reorder the ready
+   queue so that chained multiply-add and multiply-subtract instructions
+   appear ahead of any other instruction that is likely to clobber lo.
+   In the example above, if t2 and t3 become ready at the same time,
+   the code ensures that t2 is scheduled first.
+
+   Multiply-accumulate instructions are a bigger win for some targets
+   than others, so this macro is defined on an opt-in basis.  */
+#define TUNE_MACC_CHAINS	    (TUNE_MIPS5500		\
+				     || TUNE_MIPS4120		\
+				     || TUNE_MIPS4130		\
+				     || TUNE_24K)
+
+#define TARGET_OLDABI		    (mips_abi == ABI_32 || mips_abi == ABI_O64)
+#define TARGET_NEWABI		    (mips_abi == ABI_N32 || mips_abi == ABI_64)
+
+/* TARGET_HARD_FLOAT and TARGET_SOFT_FLOAT reflect whether the FPU is
+   directly accessible, while the command-line options select
+   TARGET_HARD_FLOAT_ABI and TARGET_SOFT_FLOAT_ABI to reflect the ABI
+   in use.  */
+#define TARGET_HARD_FLOAT (TARGET_HARD_FLOAT_ABI && !TARGET_MIPS16)
+#define TARGET_SOFT_FLOAT (TARGET_SOFT_FLOAT_ABI || TARGET_MIPS16)
+
+/* False if SC acts as a memory barrier with respect to itself,
+   otherwise a SYNC will be emitted after SC for atomic operations
+   that require ordering between the SC and following loads and
+   stores.  It does not tell anything about ordering of loads and
+   stores prior to and following the SC, only about the SC itself and
+   those loads and stores follow it.  */
+#define TARGET_SYNC_AFTER_SC (!TARGET_OCTEON && !TARGET_XLP)
+
+/* Define preprocessor macros for the -march and -mtune options.
+   PREFIX is either _MIPS_ARCH or _MIPS_TUNE, INFO is the selected
+   processor.  If INFO's canonical name is "foo", define PREFIX to
+   be "foo", and define an additional macro PREFIX_FOO.  */
+#define MIPS_CPP_SET_PROCESSOR(PREFIX, INFO)			\
+  do								\
+    {								\
+      char *macro, *p;						\
+								\
+      macro = concat ((PREFIX), "_", (INFO)->name, NULL);	\
+      for (p = macro; *p != 0; p++)				\
+        if (*p == '+')                                          \
+          *p = 'P';                                             \
+        else                                                    \
+          *p = TOUPPER (*p);                                    \
+								\
+      builtin_define (macro);					\
+      builtin_define_with_value ((PREFIX), (INFO)->name, 1);	\
+      free (macro);						\
+    }								\
+  while (0)
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_assert ("machine=mips");                        		\
+      builtin_assert ("cpu=mips");					\
+      builtin_define ("__mips__");     					\
+      builtin_define ("_mips");						\
+									\
+      /* We do this here because __mips is defined below and so we	\
+	 can't use builtin_define_std.  We don't ever want to define	\
+	 "mips" for VxWorks because some of the VxWorks headers		\
+	 construct include filenames from a root directory macro,	\
+	 an architecture macro and a filename, where the architecture	\
+	 macro expands to 'mips'.  If we define 'mips' to 1, the	\
+	 architecture macro expands to 1 as well.  */			\
+      if (!flag_iso && !TARGET_VXWORKS)					\
+	builtin_define ("mips");					\
+									\
+      if (TARGET_64BIT)							\
+	builtin_define ("__mips64");					\
+									\
+      /* Treat _R3000 and _R4000 like register-size			\
+	 defines, which is how they've historically			\
+	 been used.  */							\
+      if (TARGET_64BIT)							\
+	{								\
+	  builtin_define_std ("R4000");					\
+	  builtin_define ("_R4000");					\
+	}								\
+      else								\
+	{								\
+	  builtin_define_std ("R3000");					\
+	  builtin_define ("_R3000");					\
+	}								\
+									\
+      if (TARGET_FLOAT64)						\
+	builtin_define ("__mips_fpr=64");				\
+      else								\
+	builtin_define ("__mips_fpr=32");				\
+									\
+      if (mips_base_compression_flags & MASK_MIPS16)			\
+	builtin_define ("__mips16");					\
+									\
+      if (TARGET_MIPS3D)						\
+	builtin_define ("__mips3d");					\
+									\
+      if (TARGET_SMARTMIPS)						\
+	builtin_define ("__mips_smartmips");				\
+									\
+      if (mips_base_compression_flags & MASK_MICROMIPS)			\
+	builtin_define ("__mips_micromips");				\
+									\
+      if (TARGET_MCU)							\
+	builtin_define ("__mips_mcu");					\
+									\
+      if (TARGET_EVA)							\
+	builtin_define ("__mips_eva");					\
+									\
+      if (TARGET_DSP)							\
+	{								\
+	  builtin_define ("__mips_dsp");				\
+	  if (TARGET_DSPR2)						\
+	    {								\
+	      builtin_define ("__mips_dspr2");				\
+	      builtin_define ("__mips_dsp_rev=2");			\
+	    }								\
+	  else								\
+	    builtin_define ("__mips_dsp_rev=1");			\
+	}								\
+									\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_ARCH", mips_arch_info);		\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_TUNE", mips_tune_info);		\
+									\
+      if (ISA_MIPS1)							\
+	{								\
+	  builtin_define ("__mips=1");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS1");			\
+	}								\
+      else if (ISA_MIPS2)						\
+	{								\
+	  builtin_define ("__mips=2");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS2");			\
+	}								\
+      else if (ISA_MIPS3)						\
+	{								\
+	  builtin_define ("__mips=3");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS3");			\
+	}								\
+      else if (ISA_MIPS4)						\
+	{								\
+	  builtin_define ("__mips=4");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS4");			\
+	}								\
+      else if (ISA_MIPS32)						\
+	{								\
+	  builtin_define ("__mips=32");					\
+	  builtin_define ("__mips_isa_rev=1");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS32");		\
+	}								\
+      else if (ISA_MIPS32R2)						\
+	{								\
+	  builtin_define ("__mips=32");					\
+	  builtin_define ("__mips_isa_rev=2");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS32");		\
+	}								\
+      else if (ISA_MIPS64)						\
+	{								\
+	  builtin_define ("__mips=64");					\
+	  builtin_define ("__mips_isa_rev=1");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS64");		\
+	}								\
+      else if (ISA_MIPS64R2)						\
+	{								\
+	  builtin_define ("__mips=64");					\
+	  builtin_define ("__mips_isa_rev=2");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS64");		\
+	}								\
+									\
+      switch (mips_abi)							\
+	{								\
+	case ABI_32:							\
+	  builtin_define ("_ABIO32=1");					\
+	  builtin_define ("_MIPS_SIM=_ABIO32");				\
+	  break;							\
+									\
+	case ABI_N32:							\
+	  builtin_define ("_ABIN32=2");					\
+	  builtin_define ("_MIPS_SIM=_ABIN32");				\
+	  break;							\
+									\
+	case ABI_64:							\
+	  builtin_define ("_ABI64=3");					\
+	  builtin_define ("_MIPS_SIM=_ABI64");				\
+	  break;							\
+									\
+	case ABI_O64:							\
+	  builtin_define ("_ABIO64=4");					\
+	  builtin_define ("_MIPS_SIM=_ABIO64");				\
+	  break;							\
+	}								\
+									\
+      builtin_define_with_int_value ("_MIPS_SZINT", INT_TYPE_SIZE);	\
+      builtin_define_with_int_value ("_MIPS_SZLONG", LONG_TYPE_SIZE);	\
+      builtin_define_with_int_value ("_MIPS_SZPTR", POINTER_SIZE);	\
+      builtin_define_with_int_value ("_MIPS_FPSET",			\
+				     32 / MAX_FPRS_PER_FMT);		\
+									\
+      /* These defines reflect the ABI in use, not whether the  	\
+	 FPU is directly accessible.  */				\
+      if (TARGET_NO_FLOAT)						\
+	builtin_define ("__mips_no_float");				\
+      else if (TARGET_HARD_FLOAT_ABI)					\
+	builtin_define ("__mips_hard_float");				\
+      else								\
+	builtin_define ("__mips_soft_float");				\
+									\
+      if (TARGET_SINGLE_FLOAT)						\
+	builtin_define ("__mips_single_float");				\
+									\
+      if (TARGET_PAIRED_SINGLE_FLOAT)					\
+	builtin_define ("__mips_paired_single_float");			\
+									\
+      if (mips_abs == MIPS_IEEE_754_2008)				\
+	builtin_define ("__mips_abs2008");				\
+									\
+      if (mips_nan == MIPS_IEEE_754_2008)				\
+	builtin_define ("__mips_nan2008");				\
+									\
+      if (TARGET_BIG_ENDIAN)						\
+	{								\
+	  builtin_define_std ("MIPSEB");				\
+	  builtin_define ("_MIPSEB");					\
+	}								\
+      else								\
+	{								\
+	  builtin_define_std ("MIPSEL");				\
+	  builtin_define ("_MIPSEL");					\
+	}								\
+                                                                        \
+      /* Whether calls should go through $25.  The separate __PIC__	\
+	 macro indicates whether abicalls code might use a GOT.  */	\
+      if (TARGET_ABICALLS)						\
+	builtin_define ("__mips_abicalls");				\
+									\
+      /* Whether Loongson vector modes are enabled.  */                 \
+      if (TARGET_LOONGSON_VECTORS)					\
+        builtin_define ("__mips_loongson_vector_rev");                  \
+									\
+      /* Historical Octeon macro.  */					\
+      if (TARGET_OCTEON)						\
+	builtin_define ("__OCTEON__");					\
+									\
+      if (TARGET_SYNCI)							\
+	builtin_define ("__mips_synci");				\
+									\
+      /* Macros dependent on the C dialect.  */				\
+      if (preprocessing_asm_p ())					\
+	{								\
+	  builtin_define_std ("LANGUAGE_ASSEMBLY");			\
+	  builtin_define ("_LANGUAGE_ASSEMBLY");			\
+	}								\
+      else if (c_dialect_cxx ())					\
+	{								\
+	  builtin_define ("_LANGUAGE_C_PLUS_PLUS");			\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS");			\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS__");			\
+	}								\
+      else								\
+	{								\
+	  builtin_define_std ("LANGUAGE_C");				\
+	  builtin_define ("_LANGUAGE_C");				\
+	}								\
+      if (c_dialect_objc ())						\
+	{								\
+	  builtin_define ("_LANGUAGE_OBJECTIVE_C");			\
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C");			\
+	  /* Bizarre, but retained for backwards compatibility.  */	\
+	  builtin_define_std ("LANGUAGE_C");				\
+	  builtin_define ("_LANGUAGE_C");				\
+	}								\
+									\
+      if (mips_abi == ABI_EABI)						\
+	builtin_define ("__mips_eabi");					\
+									\
+      if (TARGET_CACHE_BUILTIN)						\
+	builtin_define ("__GCC_HAVE_BUILTIN_MIPS_CACHE");		\
+    }									\
+  while (0)
+
+/* Default target_flags if no switches are specified  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT MASK_BIG_ENDIAN
+#endif
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+/* Make this compile time constant for libgcc2 */
+#ifdef __mips64
+#define TARGET_64BIT		1
+#else
+#define TARGET_64BIT		0
+#endif
+#endif /* IN_LIBGCC2 */
+
+/* Force the call stack unwinders in unwind.inc not to be MIPS16 code
+   when compiled with hardware floating point.  This is because MIPS16
+   code cannot save and restore the floating-point registers, which is
+   important if in a mixed MIPS16/non-MIPS16 environment.  */
+
+#ifdef IN_LIBGCC2
+#if __mips_hard_float
+#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((__nomips16__))
+#endif
+#endif /* IN_LIBGCC2 */
+
+#define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+
+#ifndef MULTILIB_ENDIAN_DEFAULT
+#if TARGET_ENDIAN_DEFAULT == 0
+#define MULTILIB_ENDIAN_DEFAULT "EL"
+#else
+#define MULTILIB_ENDIAN_DEFAULT "EB"
+#endif
+#endif
+
+#ifndef MULTILIB_ISA_DEFAULT
+#if MIPS_ISA_DEFAULT == 1
+#define MULTILIB_ISA_DEFAULT "mips1"
+#elif MIPS_ISA_DEFAULT == 2
+#define MULTILIB_ISA_DEFAULT "mips2"
+#elif MIPS_ISA_DEFAULT == 3
+#define MULTILIB_ISA_DEFAULT "mips3"
+#elif MIPS_ISA_DEFAULT == 4
+#define MULTILIB_ISA_DEFAULT "mips4"
+#elif MIPS_ISA_DEFAULT == 32
+#define MULTILIB_ISA_DEFAULT "mips32"
+#elif MIPS_ISA_DEFAULT == 33
+#define MULTILIB_ISA_DEFAULT "mips32r2"
+#elif MIPS_ISA_DEFAULT == 64
+#define MULTILIB_ISA_DEFAULT "mips64"
+#elif MIPS_ISA_DEFAULT == 65
+#define MULTILIB_ISA_DEFAULT "mips64r2"
+#else
+#define MULTILIB_ISA_DEFAULT "mips1"
+#endif
+#endif
+
+#ifndef MIPS_ABI_DEFAULT
+#define MIPS_ABI_DEFAULT ABI_32
+#endif
+
+/* Use the most portable ABI flag for the ASM specs.  */
+
+#if MIPS_ABI_DEFAULT == ABI_32
+#define MULTILIB_ABI_DEFAULT "mabi=32"
+#elif MIPS_ABI_DEFAULT == ABI_O64
+#define MULTILIB_ABI_DEFAULT "mabi=o64"
+#elif MIPS_ABI_DEFAULT == ABI_N32
+#define MULTILIB_ABI_DEFAULT "mabi=n32"
+#elif MIPS_ABI_DEFAULT == ABI_64
+#define MULTILIB_ABI_DEFAULT "mabi=64"
+#elif MIPS_ABI_DEFAULT == ABI_EABI
+#define MULTILIB_ABI_DEFAULT "mabi=eabi"
+#endif
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+    { MULTILIB_ENDIAN_DEFAULT, MULTILIB_ISA_DEFAULT, MULTILIB_ABI_DEFAULT }
+#endif
+
+/* We must pass -EL to the linker by default for little endian embedded
+   targets using linker scripts with a OUTPUT_FORMAT line.  Otherwise, the
+   linker will default to using big-endian output files.  The OUTPUT_FORMAT
+   line must be in the linker script, otherwise -EB/-EL will not work.  */
+
+#ifndef ENDIAN_SPEC
+#if TARGET_ENDIAN_DEFAULT == 0
+#define ENDIAN_SPEC "%{!EB:%{!meb:-EL}} %{EB|meb:-EB}"
+#else
+#define ENDIAN_SPEC "%{!EL:%{!mel:-EB}} %{EL|mel:-EL}"
+#endif
+#endif
+
+/* A spec condition that matches all non-mips16 -mips arguments.  */
+
+#define MIPS_ISA_LEVEL_OPTION_SPEC \
+  "mips1|mips2|mips3|mips4|mips32*|mips64*"
+
+/* A spec condition that matches all non-mips16 architecture arguments.  */
+
+#define MIPS_ARCH_OPTION_SPEC \
+  MIPS_ISA_LEVEL_OPTION_SPEC "|march=*"
+
+/* A spec that infers a -mips argument from an -march argument,
+   or injects the default if no architecture is specified.  */
+
+#define MIPS_ISA_LEVEL_SPEC \
+  "%{" MIPS_ISA_LEVEL_OPTION_SPEC ":;: \
+     %{march=mips1|march=r2000|march=r3000|march=r3900:-mips1} \
+     %{march=mips2|march=r6000:-mips2} \
+     %{march=mips3|march=r4*|march=vr4*|march=orion|march=loongson2*:-mips3} \
+     %{march=mips4|march=r8000|march=vr5*|march=rm7000|march=rm9000 \
+       |march=r10000|march=r12000|march=r14000|march=r16000:-mips4} \
+     %{march=mips32|march=4kc|march=4km|march=4kp|march=4ksc:-mips32} \
+     %{march=mips32r2|march=m4k|march=4ke*|march=4ksd|march=24k* \
+       |march=34k*|march=74k*|march=m14k*|march=1004k*: -mips32r2} \
+     %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000 \
+       |march=xlr: -mips64} \
+     %{march=mips64r2|march=loongson3a|march=octeon|march=xlp: -mips64r2} \
+     %{!march=*: -" MULTILIB_ISA_DEFAULT "}}"
+
+/* A spec that infers a -mhard-float or -msoft-float setting from an
+   -march argument.  Note that soft-float and hard-float code are not
+   link-compatible.  */
+
+#define MIPS_ARCH_FLOAT_SPEC \
+  "%{mhard-float|msoft-float|mno-float|march=mips*:; \
+     march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \
+     |march=34kc|march=34kn|march=74kc|march=1004kc|march=5kc \
+     |march=m14k*|march=octeon|march=xlr: -msoft-float;		  \
+     march=*: -mhard-float}"
+
+/* A spec condition that matches 32-bit options.  It only works if
+   MIPS_ISA_LEVEL_SPEC has been applied.  */
+
+#define MIPS_32BIT_OPTION_SPEC \
+  "mips1|mips2|mips32*|mgp32"
+
+/* Infer a -msynci setting from a -mips argument, on the assumption that
+   -msynci is desired where possible.  */
+#define MIPS_ISA_SYNCI_SPEC \
+  "%{msynci|mno-synci:;:%{mips32r2|mips64r2:-msynci;:-mno-synci}}"
+
+#if (MIPS_ABI_DEFAULT == ABI_O64 \
+     || MIPS_ABI_DEFAULT == ABI_N32 \
+     || MIPS_ABI_DEFAULT == ABI_64)
+#define OPT_ARCH64 "mabi=32|mgp32:;"
+#define OPT_ARCH32 "mabi=32|mgp32"
+#else
+#define OPT_ARCH64 "mabi=o64|mabi=n32|mabi=64|mgp64"
+#define OPT_ARCH32 "mabi=o64|mabi=n32|mabi=64|mgp64:;"
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-arch is ignored if -march is specified or a -mips is specified
+     (other than -mips16); likewise --with-arch-32 and --with-arch-64.
+   --with-tune is ignored if -mtune is specified; likewise
+     --with-tune-32 and --with-tune-64.
+   --with-abi is ignored if -mabi is specified.
+   --with-float is ignored if -mhard-float or -msoft-float are
+     specified.
+   --with-nan is ignored if -mnan is specified.
+   --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
+     specified. */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \
+  {"arch_32", "%{" OPT_ARCH32 ":%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+  {"arch_64", "%{" OPT_ARCH64 ":%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \
+  {"fpu", "%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}" }, \
+  {"nan", "%{!mnan=*:-mnan=%(VALUE)}" }, \
+  {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \
+  {"llsc", "%{!mllsc:%{!mno-llsc:-m%(VALUE)}}" }, \
+  {"mips-plt", "%{!mplt:%{!mno-plt:-m%(VALUE)}}" }, \
+  {"synci", "%{!msynci:%{!mno-synci:-m%(VALUE)}}" }
+
+/* A spec that infers the -mdsp setting from an -march argument.  */
+#define BASE_DRIVER_SELF_SPECS \
+  "%{!mno-dsp: \
+     %{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k*: -mdsp} \
+     %{march=74k*|march=m14ke*: %{!mno-dspr2: -mdspr2 -mdsp}}}"
+
+#define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS
+
+#define GENERATE_DIVIDE_TRAPS (TARGET_DIVIDE_TRAPS \
+                               && ISA_HAS_COND_TRAP)
+
+#define GENERATE_BRANCHLIKELY   (TARGET_BRANCHLIKELY && !TARGET_MIPS16)
+
+/* True if the ABI can only work with 64-bit integer registers.  We
+   generally allow ad-hoc variations for TARGET_SINGLE_FLOAT, but
+   otherwise floating-point registers must also be 64-bit.  */
+#define ABI_NEEDS_64BIT_REGS	(TARGET_NEWABI || mips_abi == ABI_O64)
+
+/* Likewise for 32-bit regs.  */
+#define ABI_NEEDS_32BIT_REGS	(mips_abi == ABI_32)
+
+/* True if the file format uses 64-bit symbols.  At present, this is
+   only true for n64, which uses 64-bit ELF.  */
+#define FILE_HAS_64BIT_SYMBOLS	(mips_abi == ABI_64)
+
+/* True if symbols are 64 bits wide.  This is usually determined by
+   the ABI's file format, but it can be overridden by -msym32.  Note that
+   overriding the size with -msym32 changes the ABI of relocatable objects,
+   although it doesn't change the ABI of a fully-linked object.  */
+#define ABI_HAS_64BIT_SYMBOLS	(FILE_HAS_64BIT_SYMBOLS \
+				 && Pmode == DImode	\
+				 && !TARGET_SYM32)
+
+/* ISA has instructions for managing 64-bit fp and gp regs (e.g. mips3).  */
+#define ISA_HAS_64BIT_REGS	(ISA_MIPS3				\
+				 || ISA_MIPS4				\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2)
+
+/* ISA has branch likely instructions (e.g. mips2).  */
+/* Disable branchlikely for tx39 until compare rewrite.  They haven't
+   been generated up to this point.  */
+#define ISA_HAS_BRANCHLIKELY	(!ISA_MIPS1)
+
+/* ISA has a three-operand multiplication instruction (usually spelt "mul").  */
+#define ISA_HAS_MUL3		((TARGET_MIPS3900                       \
+				  || TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_MIPS5900			\
+				  || TARGET_MIPS7000			\
+				  || TARGET_MIPS9000			\
+				  || TARGET_MAD				\
+				  || ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has a three-operand multiplication instruction.  */
+#define ISA_HAS_DMUL3		(TARGET_64BIT				\
+				 && TARGET_OCTEON			\
+				 && !TARGET_MIPS16)
+
+/* ISA supports instructions DMULT and DMULTU. */
+#define ISA_HAS_DMULT		(TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions MULT and MULTU.
+   This is always true, but the macro is needed for ISA_HAS_<D>MULT
+   in mips.md.  */
+#define ISA_HAS_MULT		(1)
+
+/* ISA supports instructions DDIV and DDIVU. */
+#define ISA_HAS_DDIV		(TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions DIV and DIVU.
+   This is always true, but the macro is needed for ISA_HAS_<D>DIV
+   in mips.md.  */
+#define ISA_HAS_DIV		(1)
+
+#define ISA_HAS_DIV3		((TARGET_LOONGSON_2EF			\
+				  || TARGET_LOONGSON_3A)		\
+				 && !TARGET_MIPS16)
+
+/* ISA has the floating-point conditional move instructions introduced
+   in mips4.  */
+#define ISA_HAS_FP_CONDMOVE	((ISA_MIPS4				\
+				  || ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS5500			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the integer conditional move instructions introduced in mips4 and
+   ST Loongson 2E/2F.  */
+#define ISA_HAS_CONDMOVE        (ISA_HAS_FP_CONDMOVE			\
+				 || TARGET_MIPS5900			\
+				 || TARGET_LOONGSON_2EF)
+
+/* ISA has LDC1 and SDC1.  */
+#define ISA_HAS_LDC1_SDC1	(!ISA_MIPS1				\
+				 && !TARGET_MIPS5900			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the mips4 FP condition code instructions: FP-compare to CC,
+   branch on CC, and move (both FP and non-FP) on CC.  */
+#define ISA_HAS_8CC		(ISA_MIPS4				\
+				 || ISA_MIPS32				\
+				 || ISA_MIPS32R2			\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2)
+
+/* This is a catch all for other mips4 instructions: indexed load, the
+   FP madd and msub instructions, and the FP recip and recip sqrt
+   instructions.  Note that this macro should only be used by other
+   ISA_HAS_* macros.  */
+#define ISA_HAS_FP4		((ISA_MIPS4				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has floating-point indexed load and store instructions
+   (LWXC1, LDXC1, SWXC1 and SDXC1).  */
+#define ISA_HAS_LXC1_SXC1	ISA_HAS_FP4
+
+/* ISA has paired-single instructions.  */
+#define ISA_HAS_PAIRED_SINGLE	(ISA_MIPS32R2 || ISA_MIPS64 || ISA_MIPS64R2)
+
+/* ISA has conditional trap instructions.  */
+#define ISA_HAS_COND_TRAP	(!ISA_MIPS1				\
+				 && !TARGET_MIPS16)
+
+/* ISA has integer multiply-accumulate instructions, madd and msub.  */
+#define ISA_HAS_MADD_MSUB	(ISA_MIPS32				\
+				 || ISA_MIPS32R2			\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2)
+
+/* Integer multiply-accumulate instructions should be generated.  */
+#define GENERATE_MADD_MSUB	(TARGET_IMADD && !TARGET_MIPS16)
+
+/* ISA has floating-point madd and msub instructions 'd = a * b [+-] c'.  */
+#define ISA_HAS_FP_MADD4_MSUB4  ISA_HAS_FP4
+
+/* ISA has floating-point madd and msub instructions 'c = a * b [+-] c'.  */
+#define ISA_HAS_FP_MADD3_MSUB3  TARGET_LOONGSON_2EF
+
+/* ISA has floating-point nmadd and nmsub instructions
+   'd = -((a * b) [+-] c)'.  */
+#define ISA_HAS_NMADD4_NMSUB4	ISA_HAS_FP4
+
+/* ISA has floating-point nmadd and nmsub instructions
+   'c = -((a * b) [+-] c)'.  */
+#define ISA_HAS_NMADD3_NMSUB3	TARGET_LOONGSON_2EF
+
+/* ISA has floating-point RECIP.fmt and RSQRT.fmt instructions.  The
+   MIPS64 rev. 1 ISA says that RECIP.D and RSQRT.D are unpredictable when
+   doubles are stored in pairs of FPRs, so for safety's sake, we apply
+   this restriction to the MIPS IV ISA too.  */
+#define ISA_HAS_FP_RECIP_RSQRT(MODE)					\
+				(((ISA_HAS_FP4				\
+				   && ((MODE) == SFmode			\
+				       || ((TARGET_FLOAT64		\
+					    || ISA_MIPS32R2		\
+					    || ISA_MIPS64R2)		\
+					   && (MODE) == DFmode)))	\
+				  || (TARGET_SB1			\
+				      && (MODE) == V2SFmode))		\
+				 && !TARGET_MIPS16)
+
+/* ISA has count leading zeroes/ones instruction (not implemented).  */
+#define ISA_HAS_CLZ_CLO		((ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that put
+   the high part in an accumulator: mulhi or mulhiu.  */
+#define ISA_HAS_MULHI		((TARGET_MIPS5400			 \
+				  || TARGET_MIPS5500			 \
+				  || TARGET_SR71K)			 \
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that negate the
+   result and put the result in an accumulator.  */
+#define ISA_HAS_MULS		((TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that subtract the
+   result from a 4th operand and put the result in an accumulator.  */
+#define ISA_HAS_MSAC		((TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that add the result
+   to a 4th operand and put the result in an accumulator.  */
+#define ISA_HAS_MACC		((TARGET_MIPS4120			\
+				  || TARGET_MIPS4130			\
+				  || TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has NEC VR-style MACC, MACCHI, DMACC and DMACCHI instructions.  */
+#define ISA_HAS_MACCHI		((TARGET_MIPS4120			\
+				  || TARGET_MIPS4130)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the "ror" (rotate right) instructions.  */
+#define ISA_HAS_ROR		((ISA_MIPS32R2				\
+				  || ISA_MIPS64R2			\
+				  || TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K			\
+				  || TARGET_SMARTMIPS)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the WSBH (word swap bytes within halfwords) instruction.
+   64-bit targets also provide DSBH and DSHD.  */
+#define ISA_HAS_WSBH		((ISA_MIPS32R2 || ISA_MIPS64R2)		\
+				 && !TARGET_MIPS16)
+
+/* ISA has data prefetch instructions.  This controls use of 'pref'.  */
+#define ISA_HAS_PREFETCH	((ISA_MIPS4				\
+				  || TARGET_LOONGSON_2EF		\
+				  || TARGET_MIPS5900			\
+				  || ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has data indexed prefetch instructions.  This controls use of
+   'prefx', along with TARGET_HARD_FLOAT and TARGET_DOUBLE_FLOAT.
+   (prefx is a cop1x instruction, so can only be used if FP is
+   enabled.)  */
+#define ISA_HAS_PREFETCHX	ISA_HAS_FP4
+
+/* True if trunc.w.s and trunc.w.d are real (not synthetic)
+   instructions.  Both require TARGET_HARD_FLOAT, and trunc.w.d
+   also requires TARGET_DOUBLE_FLOAT.  */
+#define ISA_HAS_TRUNC_W		(!ISA_MIPS1)
+
+/* ISA includes the MIPS32r2 seb and seh instructions.  */
+#define ISA_HAS_SEB_SEH		((ISA_MIPS32R2		\
+				  || ISA_MIPS64R2)	\
+				 && !TARGET_MIPS16)
+
+/* ISA includes the MIPS32/64 rev 2 ext and ins instructions.  */
+#define ISA_HAS_EXT_INS		((ISA_MIPS32R2		\
+				  || ISA_MIPS64R2)	\
+				 && !TARGET_MIPS16)
+
+/* ISA has instructions for accessing top part of 64-bit fp regs.  */
+#define ISA_HAS_MXHC1		(TARGET_FLOAT64		\
+				 && (ISA_MIPS32R2	\
+				     || ISA_MIPS64R2))
+
+/* ISA has lwxs instruction (load w/scaled index address.  */
+#define ISA_HAS_LWXS		((TARGET_SMARTMIPS || TARGET_MICROMIPS) \
+				 && !TARGET_MIPS16)
+
+/* ISA has lbx, lbux, lhx, lhx, lhux, lwx, lwux, or ldx instruction. */
+#define ISA_HAS_LBX		(TARGET_OCTEON2)
+#define ISA_HAS_LBUX		(ISA_HAS_DSP || TARGET_OCTEON2)
+#define ISA_HAS_LHX		(ISA_HAS_DSP || TARGET_OCTEON2)
+#define ISA_HAS_LHUX		(TARGET_OCTEON2)
+#define ISA_HAS_LWX		(ISA_HAS_DSP || TARGET_OCTEON2)
+#define ISA_HAS_LWUX		(TARGET_OCTEON2 && TARGET_64BIT)
+#define ISA_HAS_LDX		((ISA_HAS_DSP || TARGET_OCTEON2) \
+				 && TARGET_64BIT)
+
+/* The DSP ASE is available.  */
+#define ISA_HAS_DSP		(TARGET_DSP && !TARGET_MIPS16)
+
+/* Revision 2 of the DSP ASE is available.  */
+#define ISA_HAS_DSPR2		(TARGET_DSPR2 && !TARGET_MIPS16)
+
+/* True if the result of a load is not available to the next instruction.
+   A nop will then be needed between instructions like "lw $4,..."
+   and "addiu $4,$4,1".  */
+#define ISA_HAS_LOAD_DELAY	(ISA_MIPS1				\
+				 && !TARGET_MIPS3900			\
+				 && !TARGET_MIPS5900			\
+				 && !TARGET_MIPS16			\
+				 && !TARGET_MICROMIPS)
+
+/* Likewise mtc1 and mfc1.  */
+#define ISA_HAS_XFER_DELAY	(mips_isa <= 3			\
+				 && !TARGET_MIPS5900		\
+				 && !TARGET_LOONGSON_2EF)
+
+/* Likewise floating-point comparisons.  */
+#define ISA_HAS_FCMP_DELAY	(mips_isa <= 3			\
+				 && !TARGET_MIPS5900		\
+				 && !TARGET_LOONGSON_2EF)
+
+/* True if mflo and mfhi can be immediately followed by instructions
+   which write to the HI and LO registers.
+
+   According to MIPS specifications, MIPS ISAs I, II, and III need
+   (at least) two instructions between the reads of HI/LO and
+   instructions which write them, and later ISAs do not.  Contradicting
+   the MIPS specifications, some MIPS IV processor user manuals (e.g.
+   the UM for the NEC Vr5000) document needing the instructions between
+   HI/LO reads and writes, as well.  Therefore, we declare only MIPS32,
+   MIPS64 and later ISAs to have the interlocks, plus any specific
+   earlier-ISA CPUs for which CPU documentation declares that the
+   instructions are really interlocked.  */
+#define ISA_HAS_HILO_INTERLOCKS	(ISA_MIPS32				\
+				 || ISA_MIPS32R2			\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2			\
+				 || TARGET_MIPS5500			\
+				 || TARGET_MIPS5900			\
+				 || TARGET_LOONGSON_2EF)
+
+/* ISA includes synci, jr.hb and jalr.hb.  */
+#define ISA_HAS_SYNCI ((ISA_MIPS32R2		\
+			|| ISA_MIPS64R2)	\
+		       && !TARGET_MIPS16)
+
+/* ISA includes sync.  */
+#define ISA_HAS_SYNC ((mips_isa >= 2 || TARGET_MIPS3900) && !TARGET_MIPS16)
+#define GENERATE_SYNC			\
+  (target_flags_explicit & MASK_LLSC	\
+   ? TARGET_LLSC && !TARGET_MIPS16	\
+   : ISA_HAS_SYNC)
+
+/* ISA includes ll and sc.  Note that this implies ISA_HAS_SYNC
+   because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC
+   instructions.  */
+#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16)
+#define GENERATE_LL_SC			\
+  (target_flags_explicit & MASK_LLSC	\
+   ? TARGET_LLSC && !TARGET_MIPS16	\
+   : ISA_HAS_LL_SC)
+
+#define ISA_HAS_SWAP (TARGET_XLP)
+#define ISA_HAS_LDADD (TARGET_XLP)
+
+/* ISA includes the baddu instruction.  */
+#define ISA_HAS_BADDU		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the bbit* instructions.  */
+#define ISA_HAS_BBIT		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the cins instruction.  */
+#define ISA_HAS_CINS		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the exts instruction.  */
+#define ISA_HAS_EXTS		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the seq and sne instructions.  */
+#define ISA_HAS_SEQ_SNE		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the pop instruction.  */
+#define ISA_HAS_POP		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* The CACHE instruction is available in non-MIPS16 code.  */
+#define TARGET_CACHE_BUILTIN (mips_isa >= 3)
+
+/* The CACHE instruction is available.  */
+#define ISA_HAS_CACHE (TARGET_CACHE_BUILTIN && !TARGET_MIPS16)
+
+/* Tell collect what flags to pass to nm.  */
+#ifndef NM_FLAGS
+#define NM_FLAGS "-Bn"
+#endif
+
+
+/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to
+   the assembler.  It may be overridden by subtargets.
+
+   Beginning with gas 2.13, -mdebug must be passed to correctly handle
+   COFF debugging info.  */
+
+#ifndef SUBTARGET_ASM_DEBUGGING_SPEC
+#define SUBTARGET_ASM_DEBUGGING_SPEC "\
+%{g} %{g0} %{g1} %{g2} %{g3} \
+%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \
+%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \
+%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3} \
+%{gcoff:-g} %{gcoff0:-g0} %{gcoff1:-g1} %{gcoff2:-g2} %{gcoff3:-g3} \
+%{gcoff*:-mdebug} %{!gcoff*:-no-mdebug}"
+#endif
+
+/* SUBTARGET_ASM_SPEC is always passed to the assembler.  It may be
+   overridden by subtargets.  */
+
+#ifndef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC ""
+#endif
+
+#undef ASM_SPEC
+#define ASM_SPEC "\
+%{G*} %(endian_spec) %{mips1} %{mips2} %{mips3} %{mips4} \
+%{mips32*} %{mips64*} \
+%{mips16} %{mno-mips16:-no-mips16} \
+%{mmicromips} %{mno-micromips} \
+%{mips3d} %{mno-mips3d:-no-mips3d} \
+%{mdmx} %{mno-mdmx:-no-mdmx} \
+%{mdsp} %{mno-dsp} \
+%{mdspr2} %{mno-dspr2} \
+%{mmcu} %{mno-mcu} \
+%{meva} %{mno-eva} \
+%{mvirt} %{mno-virt} \
+%{msmartmips} %{mno-smartmips} \
+%{mmt} %{mno-mt} \
+%{mfix-rm7000} %{mno-fix-rm7000} \
+%{mfix-vr4120} %{mfix-vr4130} \
+%{mfix-24k} \
+%{noasmopt:-O0; O0|fno-delayed-branch:-O1; O*:-O2; :-O1} \
+%(subtarget_asm_debugging_spec) \
+%{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \
+%{mgp32} %{mgp64} %{march=*} %{mxgot:-xgot} \
+%{mfp32} %{mfp64} %{mnan=*} \
+%{mshared} %{mno-shared} \
+%{msym32} %{mno-sym32} \
+%{mtune=*} \
+%(subtarget_asm_spec)"
+
+/* Extra switches sometimes passed to the linker.  */
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32*} %{mips64*} \
+%{shared}"
+#endif  /* LINK_SPEC defined */
+
+
+/* Specs for the compiler proper */
+
+/* SUBTARGET_CC1_SPEC is passed to the compiler proper.  It may be
+   overridden by subtargets.  */
+#ifndef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC ""
+#endif
+
+/* CC1_SPEC is the set of arguments to pass to the compiler proper.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "\
+%{G*} %{EB:-meb} %{EL:-mel} %{EB:%{EL:%emay not use both -EB and -EL}} \
+%(subtarget_cc1_spec)"
+
+/* Preprocessor specs.  */
+
+/* SUBTARGET_CPP_SPEC is passed to the preprocessor.  It may be
+   overridden by subtargets.  */
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define CPP_SPEC "%(subtarget_cpp_spec)"
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define EXTRA_SPECS							\
+  { "subtarget_cc1_spec", SUBTARGET_CC1_SPEC },				\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },				\
+  { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC },	\
+  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },				\
+  { "asm_abi_default_spec", "-" MULTILIB_ABI_DEFAULT },			\
+  { "endian_spec", ENDIAN_SPEC },					\
+  SUBTARGET_EXTRA_SPECS
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define DBX_DEBUGGING_INFO 1		/* generate stabs (OSF/rose) */
+#define DWARF2_DEBUGGING_INFO 1         /* dwarf2 debugging info */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+
+/* The size of DWARF addresses should be the same as the size of symbols
+   in the target file format.  They shouldn't depend on things like -msym32,
+   because many DWARF consumers do not allow the mixture of address sizes
+   that one would then get from linking -msym32 code with -msym64 code.
+
+   Note that the default POINTER_SIZE test is not appropriate for MIPS.
+   EABI64 has 64-bit pointers but uses 32-bit ELF.  */
+#define DWARF2_ADDR_SIZE (FILE_HAS_64BIT_SYMBOLS ? 8 : 4)
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.   By default, this is $, although some targets (e.g.,
+   NetBSD-ELF) need to override this.  */
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"$"
+#endif
+
+/* By default on the mips, external symbols do not have an underscore
+   prepended, but some targets (e.g., NetBSD) require this.  */
+
+#ifndef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	""
+#endif
+
+/* On Sun 4, this limit is 2048.  We use 1500 to be safe,
+   since the length can run past this up to a continuation point.  */
+#undef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 1500
+
+/* How to renumber registers for dbx and gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO) mips_dbx_regno[REGNO]
+
+/* The mapping from gcc register number to DWARF 2 CFA column number.  */
+#define DWARF_FRAME_REGNUM(REGNO) mips_dwarf_regno[REGNO]
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM
+
+/* Before the prologue, RA lives in r31.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, RETURN_ADDR_REGNUM)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < (TARGET_MIPS16 ? 2 : 4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, GP_REG_FIRST + 3)
+
+#define EH_USES(N) mips_eh_uses (N)
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.
+   The default for this in 64-bit mode is 8, which causes problems with
+   SFmode register saves.  */
+#define DWARF_CIE_DATA_ALIGNMENT -4
+
+/* Correct the offset of automatic variables and arguments.  Note that
+   the MIPS debug format wants all automatic variables and arguments
+   to be in terms of the virtual frame pointer (stack pointer before
+   any adjustment in the function), while the MIPS 3.0 linker wants
+   the frame pointer to be the stack pointer after the initial
+   adjustment.  */
+
+#define DEBUGGER_AUTO_OFFSET(X)				\
+  mips_debugger_offset (X, (HOST_WIDE_INT) 0)
+#define DEBUGGER_ARG_OFFSET(OFFSET, X)			\
+  mips_debugger_offset (X, (HOST_WIDE_INT) OFFSET)
+
+/* Target machine storage layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* For MIPS, width of a floating point register.  */
+#define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4)
+
+/* The number of consecutive floating-point registers needed to store the
+   largest format supported by the FPU.  */
+#define MAX_FPRS_PER_FMT (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2)
+
+/* The number of consecutive floating-point registers needed to store the
+   smallest format supported by the FPU.  */
+#define MIN_FPRS_PER_FMT \
+  (ISA_MIPS32 || ISA_MIPS32R2 || ISA_MIPS64 || ISA_MIPS64R2 \
+   ? 1 : MAX_FPRS_PER_FMT)
+
+/* The largest size of value that can be held in floating-point
+   registers and moved with a single instruction.  */
+#define UNITS_PER_HWFPVALUE \
+  (TARGET_SOFT_FLOAT_ABI ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG)
+
+/* The largest size of value that can be held in floating-point
+   registers.  */
+#define UNITS_PER_FPVALUE			\
+  (TARGET_SOFT_FLOAT_ABI ? 0			\
+   : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG	\
+   : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+
+/* The number of bytes in a double.  */
+#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
+
+/* Set the sizes of the core types.  */
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE (TARGET_LONG64 ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_NEWABI ? 128 : 64)
+
+/* Define the sizes of fixed-point types.  */
+#define SHORT_FRACT_TYPE_SIZE 8
+#define FRACT_TYPE_SIZE 16
+#define LONG_FRACT_TYPE_SIZE 32
+#define LONG_LONG_FRACT_TYPE_SIZE 64
+
+#define SHORT_ACCUM_TYPE_SIZE 16
+#define ACCUM_TYPE_SIZE 32
+#define LONG_ACCUM_TYPE_SIZE 64
+/* FIXME.  LONG_LONG_ACCUM_TYPE_SIZE should be 128 bits, but GCC
+   doesn't support 128-bit integers for MIPS32 currently.  */
+#define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+
+/* long double is not a fixed mode, but the idea is that, if we
+   support long double, we also want a 128-bit integer type.  */
+#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
+
+#ifdef IN_LIBGCC2
+#if ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+     || (defined _ABI64 && _MIPS_SIM == _ABI64))
+#  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+# else
+#  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+# endif
+#endif
+
+/* Width in bits of a pointer.  */
+#ifndef POINTER_SIZE
+#define POINTER_SIZE ((TARGET_LONG64 && TARGET_64BIT) ? 64 : 32)
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+/* 8 is observed right on a DECstation and on riscos 4.02.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT LONG_DOUBLE_TYPE_SIZE
+
+/* All accesses must be aligned.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define this if you wish to imitate the way many other C compilers
+   handle alignment of bitfields and the structures that contain
+   them.
+
+   The behavior is that the type written for a bit-field (`int',
+   `short', or other integer type) imposes an alignment for the
+   entire structure, as if the structure really did contain an
+   ordinary field of that type.  In addition, the bit-field is placed
+   within the structure so that it would fit within such a field,
+   not crossing a boundary for it.
+
+   Thus, on most machines, a bit-field whose type is written as `int'
+   would not cross a four-byte boundary, and would force four-byte
+   alignment for the whole structure.  (The alignment used may not
+   be four bytes; it is controlled by the other alignment
+   parameters.)
+
+   If the macro is defined, its definition should be a C expression;
+   a nonzero value for the expression enables this behavior.  */
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* If defined, a C expression to compute the alignment given to a
+   constant that is being placed in memory.  CONSTANT is the constant
+   and ALIGN is the alignment that the object would ordinarily have.
+   The value of this macro is used instead of that alignment to align
+   the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that `strcpy' calls that copy
+   constants can be done inline.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* We need this for the same reason as DATA_ALIGNMENT, namely to cause
+   character arrays to be word-aligned so that `strcpy' calls that copy
+   constants to character arrays can be done inline, and 'strcmp' can be
+   optimised to use word loads. */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  DATA_ALIGNMENT (TYPE, ALIGN)
+  
+#define PAD_VARARGS_DOWN \
+  (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* When in 64-bit mode, move insns will sign extend SImode and CCmode
+   moves.  All other references are zero extended.  */
+#define LOAD_EXTEND_OP(MODE) \
+  (TARGET_64BIT && ((MODE) == SImode || (MODE) == CCmode) \
+   ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    {                                           \
+      if ((MODE) == SImode)                     \
+        (UNSIGNEDP) = 0;                        \
+      (MODE) = Pmode;                           \
+    }
+
+/* Pmode is always the same as ptr_mode, but not always the same as word_mode.
+   Extensions of pointers to word_mode must be signed.  */
+#define POINTERS_EXTEND_UNSIGNED false
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* The [d]clz instructions have the natural values at 0.  */
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
+
+/* Standard register usage.  */
+
+/* Number of hardware registers.  We have:
+
+   - 32 integer registers
+   - 32 floating point registers
+   - 8 condition code registers
+   - 2 accumulator registers (hi and lo)
+   - 32 registers each for coprocessors 0, 2 and 3
+   - 4 fake registers:
+	- ARG_POINTER_REGNUM
+	- FRAME_POINTER_REGNUM
+	- GOT_VERSION_REGNUM (see the comment above load_call<mode> for details)
+	- CPRESTORE_SLOT_REGNUM
+   - 2 dummy entries that were used at various times in the past.
+   - 6 DSP accumulator registers (3 hi-lo pairs) for MIPS DSP ASE
+   - 6 DSP control registers  */
+
+#define FIRST_PSEUDO_REGISTER 188
+
+/* By default, fix the kernel registers ($26 and $27), the global
+   pointer ($28) and the stack pointer ($29).  This can change
+   depending on the command-line options.
+
+   Regarding coprocessor registers: without evidence to the contrary,
+   it's best to assume that each coprocessor register has a unique
+   use.  This can be overridden, in, e.g., mips_option_override or
+   TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be
+   inappropriate for a particular target.  */
+
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,			\
+  /* COP0 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP2 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP3 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* 6 DSP accumulator registers & 6 control registers */		\
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1					\
+}
+
+
+/* Set up this array for o32 by default.
+
+   Note that we don't mark $31 as a call-clobbered register.  The idea is
+   that it's really the call instructions themselves which clobber $31.
+   We don't care what the called function does with it afterwards.
+
+   This approach makes it easier to implement sibcalls.  Unlike normal
+   calls, sibcalls don't clobber $31, so the register reaches the
+   called function in tact.  EPILOGUE_USES says that $31 is useful
+   to the called function.  */
+
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP0 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP2 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP3 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* 6 DSP accumulator registers & 6 control registers */		\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1					\
+}
+
+
+/* Define this since $28, though fixed, is call-saved in many ABIs.  */
+
+#define CALL_REALLY_USED_REGISTERS                                      \
+{ /* General registers.  */                                             \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,                       \
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,                       \
+  /* Floating-point registers.  */                                      \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* Others.  */                                                        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,			\
+  /* COP0 registers */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* COP2 registers */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* COP3 registers */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* 6 DSP accumulator registers & 6 control registers */		\
+  1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0					\
+}
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register, a floating point register, a
+   multiply/divide register, or a status register.  */
+
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  31
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+#define GP_DBX_FIRST 0
+#define K0_REG_NUM   (GP_REG_FIRST + 26)
+#define K1_REG_NUM   (GP_REG_FIRST + 27)
+#define KERNEL_REG_P(REGNO)	(IN_RANGE (REGNO, K0_REG_NUM, K1_REG_NUM))
+
+#define FP_REG_FIRST 32
+#define FP_REG_LAST  63
+#define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
+#define FP_DBX_FIRST ((write_symbols == DBX_DEBUG) ? 38 : 32)
+
+#define MD_REG_FIRST 64
+#define MD_REG_LAST  65
+#define MD_REG_NUM   (MD_REG_LAST - MD_REG_FIRST + 1)
+#define MD_DBX_FIRST (FP_DBX_FIRST + FP_REG_NUM)
+
+/* The DWARF 2 CFA column which tracks the return address from a
+   signal handler context.  This means that to maintain backwards
+   compatibility, no hard register can be assigned this column if it
+   would need to be handled by the DWARF unwinder.  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN 66
+
+#define ST_REG_FIRST 67
+#define ST_REG_LAST  74
+#define ST_REG_NUM   (ST_REG_LAST - ST_REG_FIRST + 1)
+
+
+/* FIXME: renumber.  */
+#define COP0_REG_FIRST 80
+#define COP0_REG_LAST 111
+#define COP0_REG_NUM (COP0_REG_LAST - COP0_REG_FIRST + 1)
+
+#define COP0_STATUS_REG_NUM	(COP0_REG_FIRST + 12)
+#define COP0_CAUSE_REG_NUM	(COP0_REG_FIRST + 13)
+#define COP0_EPC_REG_NUM	(COP0_REG_FIRST + 14)
+
+#define COP2_REG_FIRST 112
+#define COP2_REG_LAST 143
+#define COP2_REG_NUM (COP2_REG_LAST - COP2_REG_FIRST + 1)
+
+#define COP3_REG_FIRST 144
+#define COP3_REG_LAST 175
+#define COP3_REG_NUM (COP3_REG_LAST - COP3_REG_FIRST + 1)
+
+/* These definitions assume that COP0, 2 and 3 are numbered consecutively.  */
+#define ALL_COP_REG_FIRST COP0_REG_FIRST
+#define ALL_COP_REG_LAST COP3_REG_LAST
+#define ALL_COP_REG_NUM (ALL_COP_REG_LAST - ALL_COP_REG_FIRST + 1)
+
+#define DSP_ACC_REG_FIRST 176
+#define DSP_ACC_REG_LAST 181
+#define DSP_ACC_REG_NUM (DSP_ACC_REG_LAST - DSP_ACC_REG_FIRST + 1)
+
+#define AT_REGNUM	(GP_REG_FIRST + 1)
+#define HI_REGNUM	(TARGET_BIG_ENDIAN ? MD_REG_FIRST : MD_REG_FIRST + 1)
+#define LO_REGNUM	(TARGET_BIG_ENDIAN ? MD_REG_FIRST + 1 : MD_REG_FIRST)
+
+/* A few bitfield locations for the coprocessor registers.  */
+/* Request Interrupt Priority Level is from bit 10 to bit 15 of
+   the cause register for the EIC interrupt mode.  */
+#define CAUSE_IPL	10
+/* Interrupt Priority Level is from bit 10 to bit 15 of the status register.  */
+#define SR_IPL		10
+/* Exception Level is at bit 1 of the status register.  */
+#define SR_EXL		1
+/* Interrupt Enable is at bit 0 of the status register.  */
+#define SR_IE		0
+
+/* FPSW_REGNUM is the single condition code used if !ISA_HAS_8CC.
+   If ISA_HAS_8CC, it should not be used, and an arbitrary ST_REG
+   should be used instead.  */
+#define FPSW_REGNUM	ST_REG_FIRST
+
+#define GP_REG_P(REGNO)	\
+  ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define M16_REG_P(REGNO) \
+  (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 16 || (REGNO) == 17)
+#define M16STORE_REG_P(REGNO) \
+  (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 0 || (REGNO) == 17)
+#define FP_REG_P(REGNO)  \
+  ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define MD_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - MD_REG_FIRST) < MD_REG_NUM)
+#define ST_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - ST_REG_FIRST) < ST_REG_NUM)
+#define COP0_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP0_REG_FIRST) < COP0_REG_NUM)
+#define COP2_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP2_REG_FIRST) < COP2_REG_NUM)
+#define COP3_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP3_REG_FIRST) < COP3_REG_NUM)
+#define ALL_COP_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP0_REG_FIRST) < ALL_COP_REG_NUM)
+/* Test if REGNO is one of the 6 new DSP accumulators.  */
+#define DSP_ACC_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - DSP_ACC_REG_FIRST) < DSP_ACC_REG_NUM)
+/* Test if REGNO is hi, lo, or one of the 6 new DSP accumulators.  */
+#define ACC_REG_P(REGNO) \
+  (MD_REG_P (REGNO) || DSP_ACC_REG_P (REGNO))
+
+#define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X)))
+
+/* True if X is (const (unspec [(const_int 0)] UNSPEC_GP)).  This is used
+   to initialize the mips16 gp pseudo register.  */
+#define CONST_GP_P(X)				\
+  (GET_CODE (X) == CONST			\
+   && GET_CODE (XEXP (X, 0)) == UNSPEC		\
+   && XINT (XEXP (X, 0), 1) == UNSPEC_GP)
+
+/* Return coprocessor number from register number.  */
+
+#define COPNUM_AS_CHAR_FROM_REGNUM(REGNO) 				\
+  (COP0_REG_P (REGNO) ? '0' : COP2_REG_P (REGNO) ? '2'			\
+   : COP3_REG_P (REGNO) ? '3' : '?')
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE) mips_hard_regno_nregs (REGNO, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  mips_hard_regno_mode_ok[ (int)(MODE) ][ (REGNO) ]
+
+#define MODES_TIEABLE_P mips_modes_tieable_p
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 29)
+
+/* These two registers don't really exist: they get eliminated to either
+   the stack or hard frame pointer.  */
+#define ARG_POINTER_REGNUM 77
+#define FRAME_POINTER_REGNUM 78
+
+/* $30 is not available on the mips16, so we use $17 as the frame
+   pointer.  */
+#define HARD_FRAME_POINTER_REGNUM \
+  (TARGET_MIPS16 ? GP_REG_FIRST + 17 : GP_REG_FIRST + 30)
+
+#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
+#define HARD_FRAME_POINTER_IS_ARG_POINTER 0
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 15)
+
+/* Registers used as temporaries in prologue/epilogue code:
+
+   - If a MIPS16 PIC function needs access to _gp, it first loads
+     the value into MIPS16_PIC_TEMP and then copies it to $gp.
+
+   - The prologue can use MIPS_PROLOGUE_TEMP as a general temporary
+     register.  The register must not conflict with MIPS16_PIC_TEMP.
+
+   - If we aren't generating MIPS16 code, the prologue can also use
+     MIPS_PROLOGUE_TEMP2 as a general temporary register.
+
+   - The epilogue can use MIPS_EPILOGUE_TEMP as a general temporary
+     register.
+
+   If we're generating MIPS16 code, these registers must come from the
+   core set of 8.  The prologue registers mustn't conflict with any
+   incoming arguments, the static chain pointer, or the frame pointer.
+   The epilogue temporary mustn't conflict with the return registers,
+   the PIC call register ($25), the frame pointer, the EH stack adjustment,
+   or the EH data registers.
+
+   If we're generating interrupt handlers, we use K0 as a temporary register
+   in prologue/epilogue code.  */
+
+#define MIPS16_PIC_TEMP_REGNUM (GP_REG_FIRST + 2)
+#define MIPS_PROLOGUE_TEMP_REGNUM \
+  (cfun->machine->interrupt_handler_p ? K0_REG_NUM : GP_REG_FIRST + 3)
+#define MIPS_PROLOGUE_TEMP2_REGNUM \
+  (TARGET_MIPS16 \
+   ? (gcc_unreachable (), INVALID_REGNUM) \
+   : cfun->machine->interrupt_handler_p ? K1_REG_NUM : GP_REG_FIRST + 12)
+#define MIPS_EPILOGUE_TEMP_REGNUM		\
+  (cfun->machine->interrupt_handler_p		\
+   ? K0_REG_NUM					\
+   : GP_REG_FIRST + (TARGET_MIPS16 ? 6 : 8))
+
+#define MIPS16_PIC_TEMP gen_rtx_REG (Pmode, MIPS16_PIC_TEMP_REGNUM)
+#define MIPS_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, MIPS_PROLOGUE_TEMP_REGNUM)
+#define MIPS_PROLOGUE_TEMP2(MODE) \
+  gen_rtx_REG (MODE, MIPS_PROLOGUE_TEMP2_REGNUM)
+#define MIPS_EPILOGUE_TEMP(MODE) gen_rtx_REG (MODE, MIPS_EPILOGUE_TEMP_REGNUM)
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE 1
+
+/* The ABI-defined global pointer.  Sometimes we use a different
+   register in leaf functions: see PIC_OFFSET_TABLE_REGNUM.  */
+#define GLOBAL_POINTER_REGNUM (GP_REG_FIRST + 28)
+
+/* We normally use $28 as the global pointer.  However, when generating
+   n32/64 PIC, it is better for leaf functions to use a call-clobbered
+   register instead.  They can then avoid saving and restoring $28
+   and perhaps avoid using a frame at all.
+
+   When a leaf function uses something other than $28, mips_expand_prologue
+   will modify pic_offset_table_rtx in place.  Take the register number
+   from there after reload.  */
+#define PIC_OFFSET_TABLE_REGNUM \
+  (reload_completed ? REGNO (pic_offset_table_rtx) : GLOBAL_POINTER_REGNUM)
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  M16_REGS,			/* mips16 directly accessible registers */
+  T_REG,			/* mips16 T register ($24) */
+  M16_T_REGS,			/* mips16 registers plus T register */
+  PIC_FN_ADDR_REG,		/* SVR4 PIC function address register */
+  V1_REG,			/* Register $v1 ($3) used for TLS access.  */
+  LEA_REGS,			/* Every GPR except $25 */
+  GR_REGS,			/* integer registers */
+  FP_REGS,			/* floating point registers */
+  MD0_REG,			/* first multiply/divide register */
+  MD1_REG,			/* second multiply/divide register */
+  MD_REGS,			/* multiply/divide registers (hi/lo) */
+  COP0_REGS,			/* generic coprocessor classes */
+  COP2_REGS,
+  COP3_REGS,
+  ST_REGS,			/* status registers (fp status) */
+  DSP_ACC_REGS,			/* DSP accumulator registers */
+  ACC_REGS,			/* Hi/Lo and DSP accumulator registers */
+  FRAME_REGS,			/* $arg and $frame */
+  GR_AND_MD0_REGS,		/* union classes */
+  GR_AND_MD1_REGS,
+  GR_AND_MD_REGS,
+  GR_AND_ACC_REGS,
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES		/* max value + 1 */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS GR_REGS
+
+/* An initializer containing the names of the register classes as C
+   string constants.  These names are used in writing some of the
+   debugging dumps.  */
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "M16_REGS",								\
+  "T_REG",								\
+  "M16_T_REGS",								\
+  "PIC_FN_ADDR_REG",							\
+  "V1_REG",								\
+  "LEA_REGS",								\
+  "GR_REGS",								\
+  "FP_REGS",								\
+  "MD0_REG",								\
+  "MD1_REG",								\
+  "MD_REGS",								\
+  /* coprocessor registers */						\
+  "COP0_REGS",								\
+  "COP2_REGS",								\
+  "COP3_REGS",								\
+  "ST_REGS",								\
+  "DSP_ACC_REGS",							\
+  "ACC_REGS",								\
+  "FRAME_REGS",								\
+  "GR_AND_MD0_REGS",							\
+  "GR_AND_MD1_REGS",							\
+  "GR_AND_MD_REGS",							\
+  "GR_AND_ACC_REGS",							\
+  "ALL_REGS"								\
+}
+
+/* An initializer containing the contents of the register classes,
+   as integers which are bit masks.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not
+   suffice.  Then the integers are replaced by sub-initializers,
+   braced groupings containing several integers.  Each
+   sub-initializer must be suitable as an initializer for the type
+   `HARD_REG_SET' which is defined in `hard-reg-set.h'.  */
+
+#define REG_CLASS_CONTENTS						                                \
+{									                                \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x000300fc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* M16_REGS */		\
+  { 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* T_REG */		\
+  { 0x010300fc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* M16_T_REGS */	\
+  { 0x02000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* PIC_FN_ADDR_REG */	\
+  { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* V1_REG */		\
+  { 0xfdffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* LEA_REGS */		\
+  { 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* GR_REGS */		\
+  { 0x00000000, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* FP_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000 },	/* MD0_REG */		\
+  { 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000 },	/* MD1_REG */		\
+  { 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000 },	/* MD_REGS */		\
+  { 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff, 0x00000000, 0x00000000 },   /* COP0_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff, 0x00000000 },   /* COP2_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff },   /* COP3_REGS */		\
+  { 0x00000000, 0x00000000, 0x000007f8, 0x00000000, 0x00000000, 0x00000000 },	/* ST_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x003f0000 },	/* DSP_ACC_REGS */	\
+  { 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x003f0000 },	/* ACC_REGS */		\
+  { 0x00000000, 0x00000000, 0x00006000, 0x00000000, 0x00000000, 0x00000000 },	/* FRAME_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000 },	/* GR_AND_MD0_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000 },	/* GR_AND_MD1_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000 },	/* GR_AND_MD_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x003f0000 },	/* GR_AND_ACC_REGS */	\
+  { 0xffffffff, 0xffffffff, 0xffff67ff, 0xffffffff, 0xffffffff, 0x0fffffff }	/* ALL_REGS */		\
+}
+
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+
+#define REGNO_REG_CLASS(REGNO) mips_regno_to_class[ (REGNO) ]
+
+/* A macro whose definition is the name of the class to which a
+   valid base register must belong.  A base register is one used in
+   an address which is the register value plus a displacement.  */
+
+#define BASE_REG_CLASS  (TARGET_MIPS16 ? M16_REGS : GR_REGS)
+
+/* A macro whose definition is the name of the class to which a
+   valid index register must belong.  An index register is one used
+   in an address where its value is either multiplied by a scale
+   factor or added to another register (as well as added to a
+   displacement).  */
+
+#define INDEX_REG_CLASS NO_REGS
+
+/* We generally want to put call-clobbered registers ahead of
+   call-saved ones.  (IRA expects this.)  */
+
+#define REG_ALLOC_ORDER							\
+{ /* Accumulator registers.  When GPRs and accumulators have equal	\
+     cost, we generally prefer to use accumulators.  For example,	\
+     a division of multiplication result is better allocated to LO,	\
+     so that we put the MFLO at the point of use instead of at the	\
+     point of definition.  It's also needed if we're to take advantage	\
+     of the extra accumulators available with -mdspr2.  In some cases,	\
+     it can also help to reduce register pressure.  */			\
+  64, 65,176,177,178,179,180,181,					\
+  /* Call-clobbered GPRs.  */						\
+  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,		\
+  24, 25, 31,								\
+  /* The global pointer.  This is call-clobbered for o32 and o64	\
+     abicalls, call-saved for n32 and n64 abicalls, and a program	\
+     invariant otherwise.  Putting it between the call-clobbered	\
+     and call-saved registers should cope with all eventualities.  */	\
+  28,									\
+  /* Call-saved GPRs.  */						\
+  16, 17, 18, 19, 20, 21, 22, 23, 30,					\
+  /* GPRs that can never be exposed to the register allocator.  */	\
+  0,  26, 27, 29,							\
+  /* Call-clobbered FPRs.  */						\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49, 50, 51,							\
+  /* FPRs that are usually call-saved.  The odd ones are actually	\
+     call-clobbered for n32, but listing them ahead of the even		\
+     registers might encourage the register allocator to fragment	\
+     the available FPR pairs.  We need paired FPRs to store long	\
+     doubles, so it isn't clear that using a different order		\
+     for n32 would be a win.  */					\
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,			\
+  /* None of the remaining classes have defined call-saved		\
+     registers.  */							\
+  66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,		\
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,	\
+  96, 97, 98, 99, 100,101,102,103,104,105,106,107,108,109,110,111,	\
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,	\
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,	\
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,	\
+  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,	\
+  182,183,184,185,186,187						\
+}
+
+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
+   to be rearranged based on a particular function.  On the mips16, we
+   want to allocate $24 (T_REG) before other registers for
+   instructions for which it is possible.  */
+
+#define ADJUST_REG_ALLOC_ORDER mips_order_regs_for_local_alloc ()
+
+/* True if VALUE is an unsigned 6-bit number.  */
+
+#define UIMM6_OPERAND(VALUE) \
+  (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0)
+
+/* True if VALUE is a signed 10-bit number.  */
+
+#define IMM10_OPERAND(VALUE) \
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400)
+
+/* True if VALUE is a signed 16-bit number.  */
+
+#define SMALL_OPERAND(VALUE) \
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000)
+
+/* True if VALUE is an unsigned 16-bit number.  */
+
+#define SMALL_OPERAND_UNSIGNED(VALUE) \
+  (((VALUE) & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
+
+/* True if VALUE can be loaded into a register using LUI.  */
+
+#define LUI_OPERAND(VALUE)					\
+  (((VALUE) | 0x7fff0000) == 0x7fff0000				\
+   || ((VALUE) | 0x7fff0000) + 0x10000 == 0)
+
+/* Return a value X with the low 16 bits clear, and such that
+   VALUE - X is a signed 16-bit value.  */
+
+#define CONST_HIGH_PART(VALUE) \
+  (((VALUE) + 0x8000) & ~(unsigned HOST_WIDE_INT) 0xffff)
+
+#define CONST_LOW_PART(VALUE) \
+  ((VALUE) - CONST_HIGH_PART (VALUE))
+
+#define SMALL_INT(X) SMALL_OPERAND (INTVAL (X))
+#define SMALL_INT_UNSIGNED(X) SMALL_OPERAND_UNSIGNED (INTVAL (X))
+#define LUI_INT(X) LUI_OPERAND (INTVAL (X))
+#define UMIPS_12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047))
+
+/* The HI and LO registers can only be reloaded via the general
+   registers.  Condition code registers can only be loaded to the
+   general registers, and from the floating point registers.  */
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  mips_secondary_reload_class (CLASS, MODE, X, true)
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  mips_secondary_reload_class (CLASS, MODE, X, false)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) mips_class_max_nregs (CLASS, MODE)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  mips_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD flag_stack_protect
+
+/* Size of the area allocated in the frame to save the GP.  */
+
+#define MIPS_GP_SAVE_AREA_SIZE \
+  (TARGET_CALL_CLOBBERED_GP ? MIPS_STACK_ALIGN (UNITS_PER_WORD) : 0)
+
+/* The offset of the first local variable from the frame pointer.  See
+   mips_compute_frame_info for details about the frame layout.  */
+
+#define STARTING_FRAME_OFFSET				\
+  (FRAME_GROWS_DOWNWARD					\
+   ? 0							\
+   : crtl->outgoing_args_size + MIPS_GP_SAVE_AREA_SIZE)
+
+#define RETURN_ADDR_RTX mips_return_addr
+
+/* Mask off the MIPS16 ISA bit in unwind addresses.
+
+   The reason for this is a little subtle.  When unwinding a call,
+   we are given the call's return address, which on most targets
+   is the address of the following instruction.  However, what we
+   actually want to find is the EH region for the call itself.
+   The target-independent unwind code therefore searches for "RA - 1".
+
+   In the MIPS16 case, RA is always an odd-valued (ISA-encoded) address.
+   RA - 1 is therefore the real (even-valued) start of the return
+   instruction.  EH region labels are usually odd-valued MIPS16 symbols
+   too, so a search for an even address within a MIPS16 region would
+   usually work.
+
+   However, there is an exception.  If the end of an EH region is also
+   the end of a function, the end label is allowed to be even.  This is
+   necessary because a following non-MIPS16 function may also need EH
+   information for its first instruction.
+
+   Thus a MIPS16 region may be terminated by an ISA-encoded or a
+   non-ISA-encoded address.  This probably isn't ideal, but it is
+   the traditional (legacy) behavior.  It is therefore only safe
+   to search MIPS EH regions for an _odd-valued_ address.
+
+   Masking off the ISA bit means that the target-independent code
+   will search for "(RA & -2) - 1", which is guaranteed to be odd.  */
+#define MASK_RETURN_ADDR GEN_INT (-2)
+
+
+/* Similarly, don't use the least-significant bit to tell pointers to
+   code from vtable index.  */
+
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+
+/* The eliminations to $17 are only used for mips16 code.  See the
+   definition of HARD_FRAME_POINTER_REGNUM.  */
+
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+ { ARG_POINTER_REGNUM,   GP_REG_FIRST + 30},				\
+ { ARG_POINTER_REGNUM,   GP_REG_FIRST + 17},				\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+ { FRAME_POINTER_REGNUM, GP_REG_FIRST + 30},				\
+ { FRAME_POINTER_REGNUM, GP_REG_FIRST + 17}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = mips_initial_elimination_offset ((FROM), (TO))
+
+/* Allocate stack space for arguments at the beginning of each function.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* The argument pointer always points to the first argument.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* o32 and o64 reserve stack space for all argument registers.  */
+#define REG_PARM_STACK_SPACE(FNDECL) 			\
+  (TARGET_OLDABI					\
+   ? (MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)		\
+   : 0)
+
+/* Define this if it is the responsibility of the caller to
+   allocate the area reserved for arguments passed in registers.
+   If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect
+   of this macro is to determine whether the space is included in
+   `crtl->outgoing_args_size'.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+#define STACK_BOUNDARY (TARGET_NEWABI ? 128 : 64)
+
+/* Symbolic macros for the registers used to return integer and floating
+   point values.  */
+
+#define GP_RETURN (GP_REG_FIRST + 2)
+#define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0))
+
+#define MAX_ARGS_IN_REGISTERS (TARGET_OLDABI ? 4 : 8)
+
+/* Symbolic macros for the first/last argument registers.  */
+
+#define GP_ARG_FIRST (GP_REG_FIRST + 4)
+#define GP_ARG_LAST  (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+#define FP_ARG_FIRST (FP_REG_FIRST + 12)
+#define FP_ARG_LAST  (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+
+/* 1 if N is a possible register number for function argument passing.
+   We have no FP argument registers when soft-float.  When FP registers
+   are 32 bits, we can't directly reference the odd numbered ones.  */
+
+#define FUNCTION_ARG_REGNO_P(N)					\
+  ((IN_RANGE((N), GP_ARG_FIRST, GP_ARG_LAST)			\
+    || (IN_RANGE((N), FP_ARG_FIRST, FP_ARG_LAST)))		\
+   && !fixed_regs[N])
+
+/* This structure has to cope with two different argument allocation
+   schemes.  Most MIPS ABIs view the arguments as a structure, of which
+   the first N words go in registers and the rest go on the stack.  If I
+   < N, the Ith word might go in Ith integer argument register or in a
+   floating-point register.  For these ABIs, we only need to remember
+   the offset of the current argument into the structure.
+
+   The EABI instead allocates the integer and floating-point arguments
+   separately.  The first N words of FP arguments go in FP registers,
+   the rest go on the stack.  Likewise, the first N words of the other
+   arguments go in integer registers, and the rest go on the stack.  We
+   need to maintain three counts: the number of integer registers used,
+   the number of floating-point registers used, and the number of words
+   passed on the stack.
+
+   We could keep separate information for the two ABIs (a word count for
+   the standard ABIs, and three separate counts for the EABI).  But it
+   seems simpler to view the standard ABIs as forms of EABI that do not
+   allocate floating-point registers.
+
+   So for the standard ABIs, the first N words are allocated to integer
+   registers, and mips_function_arg decides on an argument-by-argument
+   basis whether that argument should really go in an integer register,
+   or in a floating-point one.  */
+
+typedef struct mips_args {
+  /* Always true for varargs functions.  Otherwise true if at least
+     one argument has been passed in an integer register.  */
+  int gp_reg_found;
+
+  /* The number of arguments seen so far.  */
+  unsigned int arg_number;
+
+  /* The number of integer registers used so far.  For all ABIs except
+     EABI, this is the number of words that have been added to the
+     argument structure, limited to MAX_ARGS_IN_REGISTERS.  */
+  unsigned int num_gprs;
+
+  /* For EABI, the number of floating-point registers used so far.  */
+  unsigned int num_fprs;
+
+  /* The number of words passed on the stack.  */
+  unsigned int stack_words;
+
+  /* On the mips16, we need to keep track of which floating point
+     arguments were passed in general registers, but would have been
+     passed in the FP regs if this were a 32-bit function, so that we
+     can move them to the FP regs if we wind up calling a 32-bit
+     function.  We record this information in fp_code, encoded in base
+     four.  A zero digit means no floating point argument, a one digit
+     means an SFmode argument, and a two digit means a DFmode argument,
+     and a three digit is not used.  The low order digit is the first
+     argument.  Thus 6 == 1 * 4 + 2 means a DFmode argument followed by
+     an SFmode argument.  ??? A more sophisticated approach will be
+     needed if MIPS_ABI != ABI_32.  */
+  int fp_code;
+
+  /* True if the function has a prototype.  */
+  int prototype;
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  mips_init_cumulative_args (&CUM, FNTYPE)
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  (mips_pad_arg_upward (MODE, TYPE) ? upward : downward)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (mips_pad_reg_upward (MODE, TYPE) ? upward : downward)
+
+/* True if using EABI and varargs can be passed in floating-point
+   registers.  Under these conditions, we need a more complex form
+   of va_list, which tracks GPR, FPR and stack arguments separately.  */
+#define EABI_FLOAT_VARARGS_P \
+	(mips_abi == ABI_EABI && UNITS_PER_FPVALUE >= UNITS_PER_DOUBLE)
+
+
+#define EPILOGUE_USES(REGNO)	mips_epilogue_uses (REGNO)
+
+/* Treat LOC as a byte offset from the stack pointer and round it up
+   to the next fully-aligned offset.  */
+#define MIPS_STACK_ALIGN(LOC) \
+  (TARGET_NEWABI ? ((LOC) + 15) & -16 : ((LOC) + 7) & -8)
+
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) mips_function_profiler ((FILE))
+
+/* The profiler preserves all interesting registers, including $31.  */
+#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) false
+
+/* No mips port has ever used the profiler counter word, so don't emit it
+   or the label for it.  */
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+
+/* #define PROFILE_BEFORE_PROLOGUE */
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+
+/* Trampolines are a block of code followed by two pointers.  */
+
+#define TRAMPOLINE_SIZE \
+  (mips_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2)
+
+/* Forcing a 64-bit alignment for 32-bit targets allows us to load two
+   pointers from a single LUI base.  */
+
+#define TRAMPOLINE_ALIGNMENT 64
+
+/* mips_trampoline_init calls this library function to flush
+   program and data caches.  */
+
+#ifndef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_flush_cache"
+#endif
+
+#define MIPS_ICACHE_SYNC(ADDR, SIZE)					\
+  /* Flush both caches.  We need to flush the data cache in case	\
+     the system has a write-back cache.  */				\
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),	\
+		     LCT_NORMAL, VOIDmode, 3, ADDR, Pmode, SIZE, Pmode,	\
+		     GEN_INT (3), TYPE_MODE (integer_type_node))
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
+  mips_regno_mode_ok_for_base_p (REGNO, MODE, 1)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Check for constness inline but use mips_legitimate_address_p
+   to check whether a constant really is an address.  */
+
+#define CONSTANT_ADDRESS_P(X) \
+  (CONSTANT_P (X) && memory_address_p (SImode, X))
+
+/* This handles the magic '..CURRENT_FUNCTION' symbol, which means
+   'the start of the function that this code is output in'.  */
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)  \
+  if (strcmp (NAME, "..CURRENT_FUNCTION") == 0)				\
+    asm_fprintf ((FILE), "%U%s",					\
+		 XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));	\
+  else									\
+    asm_fprintf ((FILE), "%U%s", (NAME))
+
+/* Flag to mark a function decl symbol that requires a long call.  */
+#define SYMBOL_FLAG_LONG_CALL	(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_LONG_CALL_P(X)					\
+  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0)
+
+/* This flag marks functions that cannot be lazily bound.  */
+#define SYMBOL_FLAG_BIND_NOW (SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_BIND_NOW_P(RTX) \
+  ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_BIND_NOW) != 0)
+
+/* True if we're generating a form of MIPS16 code in which jump tables
+   are stored in the text section and encoded as 16-bit PC-relative
+   offsets.  This is only possible when general text loads are allowed,
+   since the table access itself will be an "lh" instruction.  If the
+   PC-relative offsets grow too large, 32-bit offsets are used instead.  */
+#define TARGET_MIPS16_SHORT_JUMP_TABLES TARGET_MIPS16_TEXT_LOADS
+
+#define JUMP_TABLES_IN_TEXT_SECTION TARGET_MIPS16_SHORT_JUMP_TABLES
+
+#define CASE_VECTOR_MODE (TARGET_MIPS16_SHORT_JUMP_TABLES ? SImode : ptr_mode)
+
+/* Only use short offsets if their range will not overflow.  */
+#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) \
+  (!TARGET_MIPS16_SHORT_JUMP_TABLES ? ptr_mode \
+   : ((MIN) >= -32768 && (MAX) < 32768) ? HImode \
+   : SImode)
+
+#define CASE_VECTOR_PC_RELATIVE TARGET_MIPS16_SHORT_JUMP_TABLES
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 1
+#endif
+
+/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets,
+   we generally don't want to use them for copying arbitrary data.
+   A single N-word move is usually the same cost as N single-word moves.  */
+#define MOVE_MAX UNITS_PER_WORD
+#define MAX_MOVE_MAX 8
+
+/* Define this macro as a C expression which is nonzero if
+   accessing less than a word of memory (i.e. a `char' or a
+   `short') is no faster than accessing a word of memory, i.e., if
+   such access require more than one instruction or if there is no
+   difference in cost between byte and (aligned) word loads.
+
+   On RISC machines, it tends to generate better code to define
+   this as 1, since it avoids making a QI or HI mode register.
+
+   But, generating word accesses for -mips16 is generally bad as shifts
+   (often extended) would be needed for byte accesses.  */
+#define SLOW_BYTE_ACCESS (!TARGET_MIPS16)
+
+/* Standard MIPS integer shifts truncate the shift amount to the
+   width of the shifted operand.  However, Loongson vector shifts
+   do not truncate the shift amount at all.  */
+#define SHIFT_COUNT_TRUNCATED (!TARGET_LOONGSON_VECTORS)
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) \
+  (TARGET_64BIT ? ((INPREC) <= 32 || (OUTPREC) > 32) : 1)
+
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+
+#ifndef Pmode
+#define Pmode (TARGET_64BIT && TARGET_LONG64 ? DImode : SImode)
+#endif
+
+/* Give call MEMs SImode since it is the "most permissive" mode
+   for both 32-bit and 64-bit targets.  */
+
+#define FUNCTION_MODE SImode
+
+
+/* We allocate $fcc registers by hand and can't cope with moves of
+   CCmode registers to and from pseudos (or memory).  */
+#define AVOID_CCMODE_COPIES
+
+/* A C expression for the cost of a branch instruction.  A value of
+   1 is the default; other values are interpreted relative to that.  */
+
+#define BRANCH_COST(speed_p, predictable_p) mips_branch_cost
+#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+
+/* The MIPS port has several functions that return an instruction count.
+   Multiplying the count by this value gives the number of bytes that
+   the instructions occupy.  */
+#define BASE_INSN_LENGTH (TARGET_MIPS16 ? 2 : 4)
+
+/* The length of a NOP in bytes.  */
+#define NOP_INSN_LENGTH (TARGET_COMPRESSION ? 2 : 4)
+
+/* If defined, modifies the length assigned to instruction INSN as a
+   function of the context in which it is used.  LENGTH is an lvalue
+   that contains the initially computed length of the insn and should
+   be updated with the correct length of the insn.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = mips_adjust_insn_length ((INSN), (LENGTH)))
+
+/* Return the asm template for a non-MIPS16 conditional branch instruction.
+   OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
+   its operands.  */
+#define MIPS_BRANCH(OPCODE, OPERANDS) \
+  "%*" OPCODE "%?\t" OPERANDS "%/"
+
+/* Return an asm string that forces INSN to be treated as an absolute
+   J or JAL instruction instead of an assembler macro.  */
+#define MIPS_ABSOLUTE_JUMP(INSN) \
+  (TARGET_ABICALLS_PIC2						\
+   ? ".option\tpic0\n\t" INSN "\n\t.option\tpic2"		\
+   : INSN)
+
+/* Return the asm template for a call.  INSN is the instruction's mnemonic
+   ("j" or "jal"), OPERANDS are its operands, TARGET_OPNO is the operand
+   number of the target.  SIZE_OPNO is the operand number of the argument size
+   operand that can optionally hold the call attributes.  If SIZE_OPNO is not
+   -1 and the call is indirect, use the function symbol from the call
+   attributes to attach a R_MIPS_JALR relocation to the call.
+
+   When generating GOT code without explicit relocation operators,
+   all calls should use assembly macros.  Otherwise, all indirect
+   calls should use "jr" or "jalr"; we will arrange to restore $gp
+   afterwards if necessary.  Finally, we can only generate direct
+   calls for -mabicalls by temporarily switching to non-PIC mode.
+
+   For microMIPS jal(r), we try to generate jal(r)s when a 16-bit
+   instruction is in the delay slot of jal(r).  */
+#define MIPS_CALL(INSN, OPERANDS, TARGET_OPNO, SIZE_OPNO)	\
+  (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS			\
+   ? "%*" INSN "\t%" #TARGET_OPNO "%/"				\
+   : REG_P (OPERANDS[TARGET_OPNO])				\
+   ? (mips_get_pic_call_symbol (OPERANDS, SIZE_OPNO)		\
+      ? ("%*.reloc\t1f,R_MIPS_JALR,%" #SIZE_OPNO "\n"		\
+	 "1:\t" INSN "r\t%" #TARGET_OPNO "%/")			\
+      : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED	\
+      ? "%*" INSN "r%!\t%" #TARGET_OPNO "%/"			\
+      : "%*" INSN "r\t%" #TARGET_OPNO "%/")			\
+   : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED		\
+     ? MIPS_ABSOLUTE_JUMP ("%*" INSN "%!\t%" #TARGET_OPNO "%/")	\
+     : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #TARGET_OPNO "%/"))	\
+
+/* Similar to MIPS_CALL, but this is for MICROMIPS "j" to generate
+   "jrc" when nop is in the delay slot of "jr".  */
+
+#define MICROMIPS_J(INSN, OPERANDS, OPNO)			\
+  (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS			\
+   ? "%*j\t%" #OPNO "%/"					\
+   : REG_P (OPERANDS[OPNO])					\
+   ? "%*jr%:\t%" #OPNO						\
+   : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #OPNO "%/"))
+
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#ifndef ASM_APP_ON
+#define ASM_APP_ON " #APP\n"
+#endif
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#ifndef ASM_APP_OFF
+#define ASM_APP_OFF " #NO_APP\n"
+#endif
+
+#define REGISTER_NAMES							   \
+{ "$0",   "$1",   "$2",   "$3",   "$4",   "$5",   "$6",   "$7",		   \
+  "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",	   \
+  "$16",  "$17",  "$18",  "$19",  "$20",  "$21",  "$22",  "$23",	   \
+  "$24",  "$25",  "$26",  "$27",  "$28",  "$sp",  "$fp",  "$31",	   \
+  "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",	   \
+  "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",	   \
+  "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",	   \
+  "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",	   \
+  "hi",   "lo",   "",     "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4",	   \
+  "$fcc5","$fcc6","$fcc7","", "$cprestore", "$arg", "$frame", "$fakec",	   \
+  "$c0r0", "$c0r1", "$c0r2", "$c0r3", "$c0r4", "$c0r5", "$c0r6", "$c0r7",  \
+  "$c0r8", "$c0r9", "$c0r10","$c0r11","$c0r12","$c0r13","$c0r14","$c0r15", \
+  "$c0r16","$c0r17","$c0r18","$c0r19","$c0r20","$c0r21","$c0r22","$c0r23", \
+  "$c0r24","$c0r25","$c0r26","$c0r27","$c0r28","$c0r29","$c0r30","$c0r31", \
+  "$c2r0", "$c2r1", "$c2r2", "$c2r3", "$c2r4", "$c2r5", "$c2r6", "$c2r7",  \
+  "$c2r8", "$c2r9", "$c2r10","$c2r11","$c2r12","$c2r13","$c2r14","$c2r15", \
+  "$c2r16","$c2r17","$c2r18","$c2r19","$c2r20","$c2r21","$c2r22","$c2r23", \
+  "$c2r24","$c2r25","$c2r26","$c2r27","$c2r28","$c2r29","$c2r30","$c2r31", \
+  "$c3r0", "$c3r1", "$c3r2", "$c3r3", "$c3r4", "$c3r5", "$c3r6", "$c3r7",  \
+  "$c3r8", "$c3r9", "$c3r10","$c3r11","$c3r12","$c3r13","$c3r14","$c3r15", \
+  "$c3r16","$c3r17","$c3r18","$c3r19","$c3r20","$c3r21","$c3r22","$c3r23", \
+  "$c3r24","$c3r25","$c3r26","$c3r27","$c3r28","$c3r29","$c3r30","$c3r31", \
+  "$ac1hi","$ac1lo","$ac2hi","$ac2lo","$ac3hi","$ac3lo","$dsp_po","$dsp_sc", \
+  "$dsp_ca","$dsp_ou","$dsp_cc","$dsp_ef" }
+
+/* List the "software" names for each register.  Also list the numerical
+   names for $fp and $sp.  */
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "$29",	29 + GP_REG_FIRST },					\
+  { "$30",	30 + GP_REG_FIRST },					\
+  { "at",	 1 + GP_REG_FIRST },					\
+  { "v0",	 2 + GP_REG_FIRST },					\
+  { "v1",	 3 + GP_REG_FIRST },					\
+  { "a0",	 4 + GP_REG_FIRST },					\
+  { "a1",	 5 + GP_REG_FIRST },					\
+  { "a2",	 6 + GP_REG_FIRST },					\
+  { "a3",	 7 + GP_REG_FIRST },					\
+  { "t0",	 8 + GP_REG_FIRST },					\
+  { "t1",	 9 + GP_REG_FIRST },					\
+  { "t2",	10 + GP_REG_FIRST },					\
+  { "t3",	11 + GP_REG_FIRST },					\
+  { "t4",	12 + GP_REG_FIRST },					\
+  { "t5",	13 + GP_REG_FIRST },					\
+  { "t6",	14 + GP_REG_FIRST },					\
+  { "t7",	15 + GP_REG_FIRST },					\
+  { "s0",	16 + GP_REG_FIRST },					\
+  { "s1",	17 + GP_REG_FIRST },					\
+  { "s2",	18 + GP_REG_FIRST },					\
+  { "s3",	19 + GP_REG_FIRST },					\
+  { "s4",	20 + GP_REG_FIRST },					\
+  { "s5",	21 + GP_REG_FIRST },					\
+  { "s6",	22 + GP_REG_FIRST },					\
+  { "s7",	23 + GP_REG_FIRST },					\
+  { "t8",	24 + GP_REG_FIRST },					\
+  { "t9",	25 + GP_REG_FIRST },					\
+  { "k0",	26 + GP_REG_FIRST },					\
+  { "k1",	27 + GP_REG_FIRST },					\
+  { "gp",	28 + GP_REG_FIRST },					\
+  { "sp",	29 + GP_REG_FIRST },					\
+  { "fp",	30 + GP_REG_FIRST },					\
+  { "ra",	31 + GP_REG_FIRST }					\
+}
+
+#define DBR_OUTPUT_SEQEND(STREAM)					\
+do									\
+  {									\
+    /* Undo the effect of '%*'.  */					\
+    mips_pop_asm_switch (&mips_nomacro);				\
+    mips_pop_asm_switch (&mips_noreorder);				\
+    /* Emit a blank line after the delay slot for emphasis.  */		\
+    fputs ("\n", STREAM);						\
+  }									\
+while (0)
+
+/* The MIPS implementation uses some labels for its own purpose.  The
+   following lists what labels are created, and are all formed by the
+   pattern $L[a-z].*.  The machine independent portion of GCC creates
+   labels matching:  $L[A-Z][0-9]+ and $L[0-9]+.
+
+	LM[0-9]+	Silicon Graphics/ECOFF stabs label before each stmt.
+	$Lb[0-9]+	Begin blocks for MIPS debug support
+	$Lc[0-9]+	Label for use in s<xx> operation.
+	$Le[0-9]+	End blocks for MIPS debug support  */
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \
+  mips_declare_object (STREAM, NAME, "", ":\n")
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+/* This says how to define a global common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON mips_output_aligned_decl_common
+
+/* This says how to define a local common symbol (i.e., not visible to
+   linker).  */
+
+#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \
+  mips_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, false)
+#endif
+
+/* This says how to output an external.  It would be possible not to
+   output anything and let undefined symbol become external. However
+   the assembler uses length information on externals to allocate in
+   data/sdata bss/sbss, thereby saving exec time.  */
+
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(STREAM,DECL,NAME) \
+  mips_output_external(STREAM,DECL,NAME)
+
+/* This is how to declare a function name.  The actual work of
+   emitting the label is moved to function_prologue, so that we can
+   get the line number correctly emitted before the .ent directive,
+   and after any .file directives.  Define as empty so that the function
+   is not declared before the .ent directive elsewhere.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL)
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM))
+
+/* Print debug labels as "foo = ." rather than "foo:" because they should
+   represent a byte pointer rather than an ISA-encoded address.  This is
+   particularly important for code like:
+
+	$LFBxxx = .
+		.cfi_startproc
+		...
+		.section .gcc_except_table,...
+		...
+		.uleb128 foo-$LFBxxx
+
+   The .uleb128 requies $LFBxxx to match the FDE start address, which is
+   likewise a byte pointer rather than an ISA-encoded address.
+
+   At the time of writing, this hook is not used for the function end
+   label:
+
+   	$LFExxx:
+		.end foo
+
+   But this doesn't matter, because GAS doesn't treat a pre-.end label
+   as a MIPS16 one anyway.  */
+
+#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM)			\
+  fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	   ptr_mode == DImode ? ".dword" : ".word",			\
+	   LOCAL_LABEL_PREFIX,						\
+	   VALUE)
+
+/* This is how to output an element of a case-vector.  We can make the
+   entries PC-relative in MIPS16 code and GP-relative when .gp(d)word
+   is supported.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+do {									\
+  if (TARGET_MIPS16_SHORT_JUMP_TABLES)					\
+    {									\
+      if (GET_MODE (BODY) == HImode)					\
+	fprintf (STREAM, "\t.half\t%sL%d-%sL%d\n",			\
+		 LOCAL_LABEL_PREFIX, VALUE, LOCAL_LABEL_PREFIX, REL);	\
+      else								\
+	fprintf (STREAM, "\t.word\t%sL%d-%sL%d\n",			\
+		 LOCAL_LABEL_PREFIX, VALUE, LOCAL_LABEL_PREFIX, REL);	\
+    }									\
+  else if (TARGET_GPWORD)						\
+    fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	     ptr_mode == DImode ? ".gpdword" : ".gpword",		\
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+  else if (TARGET_RTP_PIC)						\
+    {									\
+      /* Make the entry relative to the start of the function.  */	\
+      rtx fnsym = XEXP (DECL_RTL (current_function_decl), 0);		\
+      fprintf (STREAM, "\t%s\t%sL%d-",					\
+	       Pmode == DImode ? ".dword" : ".word",			\
+	       LOCAL_LABEL_PREFIX, VALUE);				\
+      assemble_name (STREAM, XSTR (fnsym, 0));				\
+      fprintf (STREAM, "\n");						\
+    }									\
+  else									\
+    fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	     ptr_mode == DImode ? ".dword" : ".word",			\
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+} while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+  fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+/* This is how to output an assembler line to advance the location
+   counter by SIZE bytes.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+  fprintf (STREAM, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This is how to output a string.  */
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII mips_output_ascii
+
+
+/* Default to -G 8 */
+#ifndef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 8
+#endif
+
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"	/* instructions */
+#define DATA_SECTION_ASM_OP	"\t.data"	/* large data */
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.rdata"	/* read-only data */
+
+#define ASM_OUTPUT_REG_PUSH(STREAM,REGNO)				\
+do									\
+  {									\
+    fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,0(%s)\n",		\
+	     TARGET_64BIT ? "daddiu" : "addiu",				\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     TARGET_64BIT ? "sd" : "sw",				\
+	     reg_names[REGNO],						\
+	     reg_names[STACK_POINTER_REGNUM]);				\
+  }									\
+while (0)
+
+#define ASM_OUTPUT_REG_POP(STREAM,REGNO)				\
+do									\
+  {									\
+    mips_push_asm_switch (&mips_noreorder);				\
+    fprintf (STREAM, "\t%s\t%s,0(%s)\n\t%s\t%s,%s,8\n",			\
+	     TARGET_64BIT ? "ld" : "lw",				\
+	     reg_names[REGNO],						\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     TARGET_64BIT ? "daddu" : "addu",				\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     reg_names[STACK_POINTER_REGNUM]);				\
+    mips_pop_asm_switch (&mips_noreorder);				\
+  }									\
+while (0)
+
+/* How to start an assembler comment.
+   The leading space is important (the mips native assembler requires it).  */
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START " #"
+#endif
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+
+/* The maximum number of bytes that can be copied by one iteration of
+   a movmemsi loop; see mips_block_move_loop.  */
+#define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
+  (UNITS_PER_WORD * 4)
+
+/* The maximum number of bytes that can be copied by a straight-line
+   implementation of movmemsi; see mips_block_move_straight.  We want
+   to make sure that any loop-based implementation will iterate at
+   least twice.  */
+#define MIPS_MAX_MOVE_BYTES_STRAIGHT \
+  (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+
+/* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
+   values were determined experimentally by benchmarking with CSiBE.
+   In theory, the call overhead is higher for TARGET_ABICALLS (especially
+   for o32 where we have to restore $gp afterwards as well as make an
+   indirect call), but in practice, bumping this up higher for
+   TARGET_ABICALLS doesn't make much difference to code size.  */
+
+#define MIPS_CALL_RATIO 8
+
+/* Any loop-based implementation of movmemsi will have at least
+   MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
+   moves, so allow individual copies of fewer elements.
+
+   When movmemsi is not available, use a value approximating
+   the length of a memcpy call sequence, so that move_by_pieces
+   will generate inline code if it is shorter than a function call.
+   Since move_by_pieces_ninsns counts memory-to-memory moves, but
+   we'll have to generate a load/store pair for each, halve the
+   value of MIPS_CALL_RATIO to take that into account.  */
+
+#define MOVE_RATIO(speed)				\
+  (HAVE_movmemsi					\
+   ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
+   : MIPS_CALL_RATIO / 2)
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
+  mips_move_by_pieces_p (SIZE, ALIGN)
+
+/* For CLEAR_RATIO, when optimizing for size, give a better estimate
+   of the length of a memset call, but use the default otherwise.  */
+
+#define CLEAR_RATIO(speed)\
+  ((speed) ? 15 : MIPS_CALL_RATIO)
+
+/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
+   optimizing for size adjust the ratio to account for the overhead of
+   loading the constant and replicating it across the word.  */
+
+#define SET_RATIO(speed) \
+  ((speed) ? 15 : MIPS_CALL_RATIO - 2)
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+  mips_store_by_pieces_p (SIZE, ALIGN)
+
+/* Since the bits of the _init and _fini function is spread across
+   many object files, each potentially with its own GP, we must assume
+   we need to load our GP.  We don't preserve $gp or $ra, since each
+   init/fini chunk is supposed to initialize $gp, and crti/crtn
+   already take care of preserving $ra and, when appropriate, $gp.  */
+#if (defined _ABIO32 && _MIPS_SIM == _ABIO32)
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n\
+	.set push\n\
+	.set nomips16\n\
+	.set noreorder\n\
+	bal 1f\n\
+	nop\n\
+1:	.cpload $31\n\
+	.set reorder\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	.set pop\n\
+	" TEXT_SECTION_ASM_OP);
+#elif ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+       || (defined _ABI64 && _MIPS_SIM == _ABI64))
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n\
+	.set push\n\
+	.set nomips16\n\
+	.set noreorder\n\
+	bal 1f\n\
+	nop\n\
+1:	.set reorder\n\
+	.cpsetup $31, $2, 1b\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	.set pop\n\
+	" TEXT_SECTION_ASM_OP);
+#endif
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#ifndef HAVE_AS_NAN
+#define HAVE_AS_NAN 0
+#endif
+
+#ifndef USED_FOR_TARGET
+/* Information about ".set noFOO; ...; .set FOO" blocks.  */
+struct mips_asm_switch {
+  /* The FOO in the description above.  */
+  const char *name;
+
+  /* The current block nesting level, or 0 if we aren't in a block.  */
+  int nesting_level;
+};
+
+extern const enum reg_class mips_regno_to_class[];
+extern bool mips_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+extern const char *current_function_file; /* filename current function is in */
+extern int num_source_filenames;	/* current .file # */
+extern struct mips_asm_switch mips_noreorder;
+extern struct mips_asm_switch mips_nomacro;
+extern struct mips_asm_switch mips_noat;
+extern int mips_dbx_regno[];
+extern int mips_dwarf_regno[];
+extern bool mips_split_p[];
+extern bool mips_split_hi_p[];
+extern bool mips_use_pcrel_pool_p[];
+extern const char *mips_lo_relocs[];
+extern const char *mips_hi_relocs[];
+extern enum processor mips_arch;        /* which cpu to codegen for */
+extern enum processor mips_tune;        /* which cpu to schedule for */
+extern int mips_isa;			/* architectural level */
+extern const struct mips_cpu_info *mips_arch_info;
+extern const struct mips_cpu_info *mips_tune_info;
+extern unsigned int mips_base_compression_flags;
+extern GTY(()) struct target_globals *mips16_globals;
+#endif
+
+/* Enable querying of DFA units.  */
+#define CPU_UNITS_QUERY 1
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
+  mips_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+/* As on most targets, we want the .eh_frame section to be read-only where
+   possible.  And as on most targets, this means two things:
+
+     (a) Non-locally-binding pointers must have an indirect encoding,
+	 so that the addresses in the .eh_frame section itself become
+	 locally-binding.
+
+     (b) A shared library's .eh_frame section must encode locally-binding
+	 pointers in a relative (relocation-free) form.
+
+   However, MIPS has traditionally not allowed directives like:
+
+	.long	x-.
+
+   in cases where "x" is in a different section, or is not defined in the
+   same assembly file.  We are therefore unable to emit the PC-relative
+   form required by (b) at assembly time.
+
+   Fortunately, the linker is able to convert absolute addresses into
+   PC-relative addresses on our behalf.  Unfortunately, only certain
+   versions of the linker know how to do this for indirect pointers,
+   and for personality data.  We must fall back on using writable
+   .eh_frame sections for shared libraries if the linker does not
+   support this feature.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
+
+/* For switching between MIPS16 and non-MIPS16 modes.  */
+#define SWITCHABLE_TARGET 1
+
+/* Several named MIPS patterns depend on Pmode.  These patterns have the
+   form <NAME>_si for Pmode == SImode and <NAME>_di for Pmode == DImode.
+   Add the appropriate suffix to generator function NAME and invoke it
+   with arguments ARGS.  */
+#define PMODE_INSN(NAME, ARGS) \
+  (Pmode == SImode ? NAME ## _si ARGS : NAME ## _di ARGS)
diff --git a/gcc-4.9/gcc/config/mips/mips.md b/gcc-4.9/gcc/config/mips/mips.md
new file mode 100644
index 000000000..1e3e9e659
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips.md
@@ -0,0 +1,7190 @@
+;;  Mips.md	     Machine Description for MIPS based processors
+;;  Copyright (C) 1989-2014 Free Software Foundation, Inc.
+;;  Contributed by   A. Lichnewsky, lich@inria.inria.fr
+;;  Changes by       Michael Meissner, meissner@osf.org
+;;  64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
+;;  Brendan Eich, brendan@microunity.com.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_enum "processor" [
+  r3000
+  4kc
+  4kp
+  5kc
+  5kf
+  20kc
+  24kc
+  24kf2_1
+  24kf1_1
+  74kc
+  74kf2_1
+  74kf1_1
+  74kf3_2
+  loongson_2e
+  loongson_2f
+  loongson_3a
+  m4k
+  octeon
+  octeon2
+  r3900
+  r6000
+  r4000
+  r4100
+  r4111
+  r4120
+  r4130
+  r4300
+  r4600
+  r4650
+  r4700
+  r5000
+  r5400
+  r5500
+  r5900
+  r7000
+  r8000
+  r9000
+  r10000
+  sb1
+  sb1a
+  sr71000
+  xlr
+  xlp
+])
+
+(define_c_enum "unspec" [
+  ;; Unaligned accesses.
+  UNSPEC_LOAD_LEFT
+  UNSPEC_LOAD_RIGHT
+  UNSPEC_STORE_LEFT
+  UNSPEC_STORE_RIGHT
+
+  ;; Integer operations that are too cumbersome to describe directly.
+  UNSPEC_WSBH
+  UNSPEC_DSBH
+  UNSPEC_DSHD
+
+  ;; Floating-point moves.
+  UNSPEC_LOAD_LOW
+  UNSPEC_LOAD_HIGH
+  UNSPEC_STORE_WORD
+  UNSPEC_MFHC1
+  UNSPEC_MTHC1
+
+  ;; Floating-point environment.
+  UNSPEC_GET_FCSR
+  UNSPEC_SET_FCSR
+
+  ;; HI/LO moves.
+  UNSPEC_MFHI
+  UNSPEC_MTHI
+  UNSPEC_SET_HILO
+
+  ;; GP manipulation.
+  UNSPEC_LOADGP
+  UNSPEC_COPYGP
+  UNSPEC_MOVE_GP
+  UNSPEC_POTENTIAL_CPRESTORE
+  UNSPEC_CPRESTORE
+  UNSPEC_RESTORE_GP
+  UNSPEC_EH_RETURN
+  UNSPEC_GP
+  UNSPEC_SET_GOT_VERSION
+  UNSPEC_UPDATE_GOT_VERSION
+
+  ;; Symbolic accesses.
+  UNSPEC_LOAD_CALL
+  UNSPEC_LOAD_GOT
+  UNSPEC_TLS_LDM
+  UNSPEC_TLS_GET_TP
+  UNSPEC_UNSHIFTED_HIGH
+
+  ;; MIPS16 constant pools.
+  UNSPEC_ALIGN
+  UNSPEC_CONSTTABLE_INT
+  UNSPEC_CONSTTABLE_FLOAT
+
+  ;; Blockage and synchronisation.
+  UNSPEC_BLOCKAGE
+  UNSPEC_CLEAR_HAZARD
+  UNSPEC_RDHWR
+  UNSPEC_SYNCI
+  UNSPEC_SYNC
+
+  ;; Cache manipulation.
+  UNSPEC_MIPS_CACHE
+  UNSPEC_R10K_CACHE_BARRIER
+
+  ;; Interrupt handling.
+  UNSPEC_ERET
+  UNSPEC_DERET
+  UNSPEC_DI
+  UNSPEC_EHB
+  UNSPEC_RDPGPR
+  UNSPEC_COP0
+
+  ;; Used in a call expression in place of args_size.  It's present for PIC
+  ;; indirect calls where it contains args_size and the function symbol.
+  UNSPEC_CALL_ATTR
+
+  ;; MIPS16 casesi jump table dispatch.
+  UNSPEC_CASESI_DISPATCH
+
+  ;; Stack checking.
+  UNSPEC_PROBE_STACK_RANGE
+])
+
+(define_constants
+  [(TLS_GET_TP_REGNUM		3)
+   (GET_FCSR_REGNUM		2)
+   (SET_FCSR_REGNUM		4)
+   (MIPS16_T_REGNUM		24)
+   (PIC_FUNCTION_ADDR_REGNUM	25)
+   (RETURN_ADDR_REGNUM		31)
+   (CPRESTORE_SLOT_REGNUM	76)
+   (GOT_VERSION_REGNUM		79)
+
+   ;; PIC long branch sequences are never longer than 100 bytes.
+   (MAX_PIC_BRANCH_LENGTH	100)
+  ]
+)
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ....................
+;;
+;;	Attributes
+;;
+;; ....................
+
+(define_attr "got" "unset,xgot_high,load"
+  (const_string "unset"))
+
+;; For jal instructions, this attribute is DIRECT when the target address
+;; is symbolic and INDIRECT when it is a register.
+(define_attr "jal" "unset,direct,indirect"
+  (const_string "unset"))
+
+;; This attribute is YES if the instruction is a jal macro (not a
+;; real jal instruction).
+;;
+;; jal is always a macro for TARGET_CALL_CLOBBERED_GP because it includes
+;; an instruction to restore $gp.  Direct jals are also macros for
+;; !TARGET_ABSOLUTE_JUMPS because they first load the target address
+;; into a register.
+(define_attr "jal_macro" "no,yes"
+  (cond [(eq_attr "jal" "direct")
+	 (symbol_ref "(TARGET_CALL_CLOBBERED_GP || !TARGET_ABSOLUTE_JUMPS
+		       ? JAL_MACRO_YES : JAL_MACRO_NO)")
+	 (eq_attr "jal" "indirect")
+	 (symbol_ref "(TARGET_CALL_CLOBBERED_GP
+		       ? JAL_MACRO_YES : JAL_MACRO_NO)")]
+	(const_string "no")))
+
+;; Classification of moves, extensions and truncations.  Most values
+;; are as for "type" (see below) but there are also the following
+;; move-specific values:
+;;
+;; constN	move an N-constraint integer into a MIPS16 register
+;; sll0		"sll DEST,SRC,0", which on 64-bit targets is guaranteed
+;;		to produce a sign-extended DEST, even if SRC is not
+;;		properly sign-extended
+;; ext_ins	EXT, DEXT, INS or DINS instruction
+;; andi		a single ANDI instruction
+;; loadpool	move a constant into a MIPS16 register by loading it
+;;		from the pool
+;; shift_shift	a shift left followed by a shift right
+;;
+;; This attribute is used to determine the instruction's length and
+;; scheduling type.  For doubleword moves, the attribute always describes
+;; the split instructions; in some cases, it is more appropriate for the
+;; scheduling type to be "multi" instead.
+(define_attr "move_type"
+  "unknown,load,fpload,store,fpstore,mtc,mfc,mtlo,mflo,imul,move,fmove,
+   const,constN,signext,ext_ins,logical,arith,sll0,andi,loadpool,
+   shift_shift"
+  (const_string "unknown"))
+
+(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor"
+  (const_string "unknown"))
+
+;; Main data type used by the insn
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FPSW"
+  (const_string "unknown"))
+
+;; True if the main data type is twice the size of a word.
+(define_attr "dword_mode" "no,yes"
+  (cond [(and (eq_attr "mode" "DI,DF")
+	      (not (match_test "TARGET_64BIT")))
+	 (const_string "yes")
+
+	 (and (eq_attr "mode" "TI,TF")
+	      (match_test "TARGET_64BIT"))
+	 (const_string "yes")]
+	(const_string "no")))
+
+;; Attributes describing a sync loop.  These loops have the form:
+;;
+;;       if (RELEASE_BARRIER == YES) sync
+;;    1: OLDVAL = *MEM
+;;       if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2
+;;         CMP  = 0 [delay slot]
+;;       $TMP1 = OLDVAL & EXCLUSIVE_MASK
+;;       $TMP2 = INSN1 (OLDVAL, INSN1_OP2)
+;;       $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK)
+;;       $AT |= $TMP1 | $TMP3
+;;       if (!commit (*MEM = $AT)) goto 1.
+;;         if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot]
+;;       CMP  = 1
+;;       if (ACQUIRE_BARRIER == YES) sync
+;;    2:
+;;
+;; where "$" values are temporaries and where the other values are
+;; specified by the attributes below.  Values are specified as operand
+;; numbers and insns are specified as enums.  If no operand number is
+;; specified, the following values are used instead:
+;;
+;;    - OLDVAL: $AT
+;;    - CMP: NONE
+;;    - NEWVAL: $AT
+;;    - INCLUSIVE_MASK: -1
+;;    - REQUIRED_OLDVAL: OLDVAL & INCLUSIVE_MASK
+;;    - EXCLUSIVE_MASK: 0
+;;
+;; MEM and INSN1_OP2 are required.
+;;
+;; Ideally, the operand attributes would be integers, with -1 meaning "none",
+;; but the gen* programs don't yet support that.
+(define_attr "sync_mem" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_oldval" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_cmp" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_newval" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_inclusive_mask" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_exclusive_mask" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_required_oldval" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_insn1_op2" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_insn1" "move,li,addu,addiu,subu,and,andi,or,ori,xor,xori"
+  (const_string "move"))
+(define_attr "sync_insn2" "nop,and,xor,not"
+  (const_string "nop"))
+;; Memory model specifier.
+;; "0"-"9" values specify the operand that stores the memory model value.
+;; "10" specifies MEMMODEL_ACQ_REL,
+;; "11" specifies MEMMODEL_ACQUIRE.
+(define_attr "sync_memmodel" "" (const_int 10))
+
+;; Accumulator operand for madd patterns.
+(define_attr "accum_in" "none,0,1,2,3,4,5" (const_string "none"))
+
+;; Classification of each insn.
+;; branch	conditional branch
+;; jump		unconditional jump
+;; call		unconditional call
+;; load		load instruction(s)
+;; fpload	floating point load
+;; fpidxload    floating point indexed load
+;; store	store instruction(s)
+;; fpstore	floating point store
+;; fpidxstore	floating point indexed store
+;; prefetch	memory prefetch (register + offset)
+;; prefetchx	memory indexed prefetch (register + register)
+;; condmove	conditional moves
+;; mtc		transfer to coprocessor
+;; mfc		transfer from coprocessor
+;; mthi		transfer to a hi register
+;; mtlo		transfer to a lo register
+;; mfhi		transfer from a hi register
+;; mflo		transfer from a lo register
+;; const	load constant
+;; arith	integer arithmetic instructions
+;; logical      integer logical instructions
+;; shift	integer shift instructions
+;; slt		set less than instructions
+;; signext      sign extend instructions
+;; clz		the clz and clo instructions
+;; pop		the pop instruction
+;; trap		trap if instructions
+;; imul		integer multiply 2 operands
+;; imul3	integer multiply 3 operands
+;; imul3nc	integer multiply 3 operands without clobbering HI/LO
+;; imadd	integer multiply-add
+;; idiv		integer divide 2 operands
+;; idiv3	integer divide 3 operands
+;; move		integer register move ({,D}ADD{,U} with rt = 0)
+;; fmove	floating point register move
+;; fadd		floating point add/subtract
+;; fmul		floating point multiply
+;; fmadd	floating point multiply-add
+;; fdiv		floating point divide
+;; frdiv	floating point reciprocal divide
+;; frdiv1	floating point reciprocal divide step 1
+;; frdiv2	floating point reciprocal divide step 2
+;; fabs		floating point absolute value
+;; fneg		floating point negation
+;; fcmp		floating point compare
+;; fcvt		floating point convert
+;; fsqrt	floating point square root
+;; frsqrt       floating point reciprocal square root
+;; frsqrt1      floating point reciprocal square root step1
+;; frsqrt2      floating point reciprocal square root step2
+;; dspmac       DSP MAC instructions not saturating the accumulator
+;; dspmacsat    DSP MAC instructions that saturate the accumulator
+;; accext       DSP accumulator extract instructions
+;; accmod       DSP accumulator modify instructions
+;; dspalu       DSP ALU instructions not saturating the result
+;; dspalusat    DSP ALU instructions that saturate the result
+;; multi	multiword sequence (or user asm statements)
+;; atomic	atomic memory update instruction
+;; syncloop	memory atomic operation implemented as a sync loop
+;; nop		no operation
+;; ghost	an instruction that produces no real code
+;; multimem	microMIPS multiword load and store
+(define_attr "type"
+  "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+   prefetch,prefetchx,condmove,mtc,mfc,mthi,mtlo,mfhi,mflo,const,arith,logical,
+   shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move,
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt,
+   frsqrt,frsqrt1,frsqrt2,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat,
+   multi,atomic,syncloop,nop,ghost,multimem"
+  (cond [(eq_attr "jal" "!unset") (const_string "call")
+	 (eq_attr "got" "load") (const_string "load")
+
+	 (eq_attr "alu_type" "add,sub") (const_string "arith")
+
+	 (eq_attr "alu_type" "not,nor,and,or,xor") (const_string "logical")
+
+	 ;; If a doubleword move uses these expensive instructions,
+	 ;; it is usually better to schedule them in the same way
+	 ;; as the singleword form, rather than as "multi".
+	 (eq_attr "move_type" "load") (const_string "load")
+	 (eq_attr "move_type" "fpload") (const_string "fpload")
+	 (eq_attr "move_type" "store") (const_string "store")
+	 (eq_attr "move_type" "fpstore") (const_string "fpstore")
+	 (eq_attr "move_type" "mtc") (const_string "mtc")
+	 (eq_attr "move_type" "mfc") (const_string "mfc")
+	 (eq_attr "move_type" "mtlo") (const_string "mtlo")
+	 (eq_attr "move_type" "mflo") (const_string "mflo")
+
+	 ;; These types of move are always single insns.
+	 (eq_attr "move_type" "imul") (const_string "imul")
+	 (eq_attr "move_type" "fmove") (const_string "fmove")
+	 (eq_attr "move_type" "loadpool") (const_string "load")
+	 (eq_attr "move_type" "signext") (const_string "signext")
+	 (eq_attr "move_type" "ext_ins") (const_string "arith")
+	 (eq_attr "move_type" "arith") (const_string "arith")
+	 (eq_attr "move_type" "logical") (const_string "logical")
+	 (eq_attr "move_type" "sll0") (const_string "shift")
+	 (eq_attr "move_type" "andi") (const_string "logical")
+
+	 ;; These types of move are always split.
+	 (eq_attr "move_type" "constN,shift_shift")
+	   (const_string "multi")
+
+	 ;; These types of move are split for doubleword modes only.
+	 (and (eq_attr "move_type" "move,const")
+	      (eq_attr "dword_mode" "yes"))
+	   (const_string "multi")
+	 (eq_attr "move_type" "move") (const_string "move")
+	 (eq_attr "move_type" "const") (const_string "const")
+	 (eq_attr "sync_mem" "!none") (const_string "syncloop")]
+	(const_string "unknown")))
+
+;; Mode for conversion types (fcvt)
+;; I2S          integer to float single (SI/DI to SF)
+;; I2D          integer to float double (SI/DI to DF)
+;; S2I          float to integer (SF to SI/DI)
+;; D2I          float to integer (DF to SI/DI)
+;; D2S          double to float single
+;; S2D          float single to double
+
+(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" 
+  (const_string "unknown"))
+
+;; Is this an extended instruction in mips16 mode?
+(define_attr "extended_mips16" "no,yes"
+  (if_then_else (ior ;; In general, constant-pool loads are extended
+  		     ;; instructions.  We don't yet optimize for 16-bit
+		     ;; PC-relative references.
+  		     (eq_attr "move_type" "sll0,loadpool")
+		     (eq_attr "jal" "direct")
+		     (eq_attr "got" "load"))
+		(const_string "yes")
+		(const_string "no")))
+
+(define_attr "compression" "none,all,micromips"
+  (const_string "none"))
+
+(define_attr "enabled" "no,yes"
+  (if_then_else (ior (eq_attr "compression" "all,none")
+		     (and (eq_attr "compression" "micromips")
+	                  (match_test "TARGET_MICROMIPS")))
+	        (const_string "yes")
+	        (const_string "no")))
+
+;; The number of individual instructions that a non-branch pattern generates,
+;; using units of BASE_INSN_LENGTH.
+(define_attr "insn_count" ""
+  (cond [;; "Ghost" instructions occupy no space.
+	 (eq_attr "type" "ghost")
+	 (const_int 0)
+
+	 ;; Extended instructions count as 2.
+   	 (and (eq_attr "extended_mips16" "yes")
+	      (match_test "TARGET_MIPS16"))
+	 (const_int 2)
+
+	 ;; A GOT load followed by an add of $gp.  This is not used for MIPS16.
+	 (eq_attr "got" "xgot_high")
+	 (const_int 2)
+
+	 ;; SHIFT_SHIFTs are decomposed into two separate instructions.
+	 ;; They are extended instructions on MIPS16 targets.
+	 (eq_attr "move_type" "shift_shift")
+	 (if_then_else (match_test "TARGET_MIPS16")
+	 	       (const_int 4)
+	 	       (const_int 2))
+
+	 ;; Check for doubleword moves that are decomposed into two
+	 ;; instructions.  The individual instructions are unextended
+	 ;; MIPS16 ones.
+	 (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
+	      (eq_attr "dword_mode" "yes"))
+	 (const_int 2)
+
+	 ;; Constants, loads and stores are handled by external routines.
+	 (and (eq_attr "move_type" "const,constN")
+	      (eq_attr "dword_mode" "yes"))
+	 (symbol_ref "mips_split_const_insns (operands[1])")
+	 (eq_attr "move_type" "const,constN")
+	 (symbol_ref "mips_const_insns (operands[1])")
+	 (eq_attr "move_type" "load,fpload")
+	 (symbol_ref "mips_load_store_insns (operands[1], insn)")
+	 (eq_attr "move_type" "store,fpstore")
+	 (symbol_ref "mips_load_store_insns (operands[0], insn)
+		      + (TARGET_FIX_24K ? 1 : 0)")
+
+	 ;; In the worst case, a call macro will take 8 instructions:
+	 ;;
+	 ;;	lui $25,%call_hi(FOO)
+	 ;;	addu $25,$25,$28
+	 ;;	lw $25,%call_lo(FOO)($25)
+	 ;;	nop
+	 ;;	jalr $25
+	 ;;	nop
+	 ;;	lw $gp,X($sp)
+	 ;;	nop
+	 (eq_attr "jal_macro" "yes")
+	 (const_int 8)
+
+	 ;; Various VR4120 errata require a nop to be inserted after a macc
+	 ;; instruction.  The assembler does this for us, so account for
+	 ;; the worst-case length here.
+	 (and (eq_attr "type" "imadd")
+	      (match_test "TARGET_FIX_VR4120"))
+	 (const_int 2)
+
+	 ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
+	 ;; the result of the second one is missed.  The assembler should work
+	 ;; around this by inserting a nop after the first dmult.
+	 (and (eq_attr "type" "imul,imul3")
+	      (eq_attr "mode" "DI")
+	      (match_test "TARGET_FIX_VR4120"))
+	 (const_int 2)
+
+	 (eq_attr "type" "idiv,idiv3")
+	 (symbol_ref "mips_idiv_insns ()")
+
+	 (not (eq_attr "sync_mem" "none"))
+	 (symbol_ref "mips_sync_loop_insns (insn, operands)")]
+	(const_int 1)))
+
+;; Length of instruction in bytes.  The default is derived from "insn_count",
+;; but there are special cases for branches (which must be handled here)
+;; and for compressed single instructions.
+(define_attr "length" ""
+   (cond [(and (eq_attr "compression" "micromips,all")
+	       (eq_attr "dword_mode" "no")
+	       (match_test "TARGET_MICROMIPS"))
+	  (const_int 2)
+
+	  ;; Direct microMIPS branch instructions have a range of
+	  ;; [-0x10000,0xfffe], otherwise the range is [-0x20000,0x1fffc].
+	  ;; If a branch is outside this range, we have a choice of two
+	  ;; sequences.
+	  ;;
+	  ;; For PIC, an out-of-range branch like:
+	  ;;
+	  ;;	bne	r1,r2,target
+	  ;;	dslot
+	  ;;
+	  ;; becomes the equivalent of:
+	  ;;
+	  ;;	beq	r1,r2,1f
+	  ;;	dslot
+	  ;;	la	$at,target
+	  ;;	jr	$at
+	  ;;	nop
+	  ;; 1:
+	  ;;
+	  ;; The non-PIC case is similar except that we use a direct
+	  ;; jump instead of an la/jr pair.  Since the target of this
+	  ;; jump is an absolute 28-bit bit address (the other bits
+	  ;; coming from the address of the delay slot) this form cannot
+	  ;; cross a 256MB boundary.  We could provide the option of
+	  ;; using la/jr in this case too, but we do not do so at
+	  ;; present.
+	  ;;
+	  ;; The value we specify here does not account for the delay slot
+	  ;; instruction, whose length is added separately.  If the RTL
+	  ;; pattern has no explicit delay slot, mips_adjust_insn_length
+	  ;; will add the length of the implicit nop.  The range of
+	  ;; [-0x20000, 0x1fffc] from the address of the delay slot
+	  ;; therefore translates to a range of:
+	  ;;
+	  ;;    [-(0x20000 - sizeof (branch)), 0x1fffc - sizeof (slot)]
+	  ;; == [-0x1fffc, 0x1fff8]
+	  ;;
+	  ;; from the shorten_branches reference address.
+	  (and (eq_attr "type" "branch")
+	       (not (match_test "TARGET_MIPS16")))
+	  (cond [;; Any variant can handle the 17-bit range.
+		 (and (le (minus (match_dup 0) (pc)) (const_int 65532))
+		      (le (minus (pc) (match_dup 0)) (const_int 65534)))
+		   (const_int 4)
+
+		 ;; The 18-bit range is OK other than for microMIPS.
+		 (and (not (match_test "TARGET_MICROMIPS"))
+		      (and (le (minus (match_dup 0) (pc)) (const_int 131064))
+		      	   (le (minus (pc) (match_dup 0)) (const_int 131068))))
+		   (const_int 4)
+
+		 ;; The non-PIC case: branch, first delay slot, and J.
+		 (match_test "TARGET_ABSOLUTE_JUMPS")
+		   (const_int 12)]
+
+		 ;; Use MAX_PIC_BRANCH_LENGTH as a (gross) overestimate.
+		 ;; mips_adjust_insn_length substitutes the correct length.
+		 ;;
+		 ;; Note that we can't simply use (symbol_ref ...) here
+		 ;; because genattrtab needs to know the maximum length
+		 ;; of an insn.
+		 (const_int MAX_PIC_BRANCH_LENGTH))
+
+	  ;; An unextended MIPS16 branch has a range of [-0x100, 0xfe]
+	  ;; from the address of the following instruction, which leads
+	  ;; to a range of:
+	  ;;
+	  ;;    [-(0x100 - sizeof (branch)), 0xfe]
+	  ;; == [-0xfe, 0xfe]
+	  ;;
+	  ;; from the shorten_branches reference address.  Extended branches
+	  ;; likewise have a range of [-0x10000, 0xfffe] from the address
+	  ;; of the following instruction, which leads to a range of:
+	  ;;
+	  ;;    [-(0x10000 - sizeof (branch)), 0xfffe]
+	  ;; == [-0xfffc, 0xfffe]
+	  ;;
+	  ;; from the reference address.
+	  ;;
+	  ;; When a branch is out of range, mips_reorg splits it into a form
+	  ;; that uses in-range branches.  There are four basic sequences:
+	  ;;
+	  ;; (1) Absolute addressing with a readable text segment
+	  ;;     (32-bit addresses):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     lw $2,label		2 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;;	 .align 2		0 or 2 bytes
+	  ;; label:
+	  ;;	 .word target		4 bytes
+	  ;; foo:
+	  ;;				(16 bytes in the worst case)
+	  ;;
+	  ;; (2) Absolute addressing with a readable text segment
+	  ;;     (64-bit addresses):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     ld $2,label		2 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;;	 .align 3		0 to 6 bytes
+	  ;; label:
+	  ;;	 .dword target		8 bytes
+	  ;; foo:
+	  ;;				(24 bytes in the worst case)
+	  ;;
+	  ;; (3) Absolute addressing without a readable text segment
+	  ;;     (which requires 32-bit addresses at present):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     lui $2,%hi(target)	4 bytes
+	  ;;	 sll $2,8		2 bytes
+	  ;;	 sll $2,8		2 bytes
+	  ;;     addiu $2,%lo(target)	4 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;; foo:
+	  ;;				(20 bytes)
+	  ;;
+	  ;; (4) PIC addressing (which requires 32-bit addresses at present):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     lw $2,cprestore	0, 2 or 4 bytes
+	  ;;	 lw $2,%got(target)($2)	4 bytes
+	  ;;     addiu $2,%lo(target)	4 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;; foo:
+	  ;;				(20 bytes in the worst case)
+	  (and (eq_attr "type" "branch")
+	       (match_test "TARGET_MIPS16"))
+	  (cond [(and (le (minus (match_dup 0) (pc)) (const_int 254))
+		      (le (minus (pc) (match_dup 0)) (const_int 254)))
+		 (const_int 2)
+		 (and (le (minus (match_dup 0) (pc)) (const_int 65534))
+		      (le (minus (pc) (match_dup 0)) (const_int 65532)))
+		 (const_int 4)
+		 (and (match_test "TARGET_ABICALLS")
+		      (not (match_test "TARGET_ABSOLUTE_ABICALLS")))
+		 (const_int 20)
+		 (match_test "Pmode == SImode")
+		 (const_int 16)
+		 ] (const_int 24))]
+	 (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH")))
+
+;; Attribute describing the processor.
+(define_enum_attr "cpu" "processor"
+  (const (symbol_ref "mips_tune")))
+
+;; The type of hardware hazard associated with this instruction.
+;; DELAY means that the next instruction cannot read the result
+;; of this one.  HILO means that the next two instructions cannot
+;; write to HI or LO.
+(define_attr "hazard" "none,delay,hilo"
+  (cond [(and (eq_attr "type" "load,fpload,fpidxload")
+	      (match_test "ISA_HAS_LOAD_DELAY"))
+	 (const_string "delay")
+
+	 (and (eq_attr "type" "mfc,mtc")
+	      (match_test "ISA_HAS_XFER_DELAY"))
+	 (const_string "delay")
+
+	 (and (eq_attr "type" "fcmp")
+	      (match_test "ISA_HAS_FCMP_DELAY"))
+	 (const_string "delay")
+
+	 ;; The r4000 multiplication patterns include an mflo instruction.
+	 (and (eq_attr "type" "imul")
+	      (match_test "TARGET_FIX_R4000"))
+	 (const_string "hilo")
+
+	 (and (eq_attr "type" "mfhi,mflo")
+	      (not (match_test "ISA_HAS_HILO_INTERLOCKS")))
+	 (const_string "hilo")]
+	(const_string "none")))
+
+;; Can the instruction be put into a delay slot?
+(define_attr "can_delay" "no,yes"
+  (if_then_else (and (eq_attr "type" "!branch,call,jump")
+		     (eq_attr "hazard" "none")
+		     (match_test "get_attr_insn_count (insn) == 1"))
+		(const_string "yes")
+		(const_string "no")))
+
+;; Attribute defining whether or not we can use the branch-likely
+;; instructions.
+(define_attr "branch_likely" "no,yes"
+  (if_then_else (match_test "GENERATE_BRANCHLIKELY")
+		(const_string "yes")
+		(const_string "no")))
+
+;; True if an instruction might assign to hi or lo when reloaded.
+;; This is used by the TUNE_MACC_CHAINS code.
+(define_attr "may_clobber_hilo" "no,yes"
+  (if_then_else (eq_attr "type" "imul,imul3,imadd,idiv,mthi,mtlo")
+		(const_string "yes")
+		(const_string "no")))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")
+   (set_attr "can_delay" "no")])
+
+;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated
+;; from the same template.
+(define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
+
+;; A copy of GPR that can be used when a pattern has two independent
+;; modes.
+(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator MOVEP1 [SI SF])
+(define_mode_iterator MOVEP2 [SI SF])
+
+;; This mode iterator allows :HILO to be used as the mode of the
+;; concatenated HI and LO registers.
+(define_mode_iterator HILO [(DI "!TARGET_64BIT") (TI "TARGET_64BIT")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; This mode iterator allows :MOVECC to be used anywhere that a
+;; conditional-move-type condition is needed.
+(define_mode_iterator MOVECC [SI (DI "TARGET_64BIT")
+                              (CC "TARGET_HARD_FLOAT
+				   && !TARGET_LOONGSON_2EF
+				   && !TARGET_MIPS5900")])
+
+;; 32-bit integer moves for which we provide move patterns.
+(define_mode_iterator IMOVE32
+  [SI
+   (V2HI "TARGET_DSP")
+   (V4QI "TARGET_DSP")
+   (V2HQ "TARGET_DSP")
+   (V2UHQ "TARGET_DSP")
+   (V2HA "TARGET_DSP")
+   (V2UHA "TARGET_DSP")
+   (V4QQ "TARGET_DSP")
+   (V4UQQ "TARGET_DSP")])
+
+;; 64-bit modes for which we provide move patterns.
+(define_mode_iterator MOVE64
+  [DI DF
+   (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT")
+   (V2SI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")
+   (V4HI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")
+   (V8QI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")])
+
+;; 128-bit modes for which we provide move patterns on 64-bit targets.
+(define_mode_iterator MOVE128 [TI TF])
+
+;; This mode iterator allows the QI and HI extension patterns to be
+;; defined from the same template.
+(define_mode_iterator SHORT [QI HI])
+
+;; Likewise the 64-bit truncate-and-shift patterns.
+(define_mode_iterator SUBDI [QI HI SI])
+
+;; This mode iterator allows :ANYF to be used wherever a scalar or vector
+;; floating-point mode is allowed.
+(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
+			    (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")
+			    (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT")])
+
+;; Like ANYF, but only applies to scalar modes.
+(define_mode_iterator SCALARF [(SF "TARGET_HARD_FLOAT")
+			       (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")])
+
+;; A floating-point mode for which moves involving FPRs may need to be split.
+(define_mode_iterator SPLITF
+  [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+   (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+   (V2SF "!TARGET_64BIT && TARGET_PAIRED_SINGLE_FLOAT")
+   (V2SI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS")
+   (V4HI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS")
+   (V8QI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS")
+   (TF "TARGET_64BIT && TARGET_FLOAT64")])
+
+;; In GPR templates, a string like "<d>subu" will expand to "subu" in the
+;; 32-bit version and "dsubu" in the 64-bit version.
+(define_mode_attr d [(SI "") (DI "d")
+		     (QQ "") (HQ "") (SQ "") (DQ "d")
+		     (UQQ "") (UHQ "") (USQ "") (UDQ "d")
+		     (HA "") (SA "") (DA "d")
+		     (UHA "") (USA "") (UDA "d")])
+
+;; Same as d but upper-case.
+(define_mode_attr D [(SI "") (DI "D")
+		     (QQ "") (HQ "") (SQ "") (DQ "D")
+		     (UQQ "") (UHQ "") (USQ "") (UDQ "D")
+		     (HA "") (SA "") (DA "D")
+		     (UHA "") (USA "") (UDA "D")])
+
+;; This attribute gives the length suffix for a load or store instruction.
+;; The same suffixes work for zero and sign extensions.
+(define_mode_attr size [(QI "b") (HI "h") (SI "w") (DI "d")])
+(define_mode_attr SIZE [(QI "B") (HI "H") (SI "W") (DI "D")])
+
+;; This attributes gives the mode mask of a SHORT.
+(define_mode_attr mask [(QI "0x00ff") (HI "0xffff")])
+
+;; Mode attributes for GPR loads.
+(define_mode_attr load [(SI "lw") (DI "ld")])
+;; Instruction names for stores.
+(define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd")])
+
+;; Similarly for MIPS IV indexed FPR loads and stores.
+(define_mode_attr loadx [(SF "lwxc1") (DF "ldxc1") (V2SF "ldxc1")])
+(define_mode_attr storex [(SF "swxc1") (DF "sdxc1") (V2SF "sdxc1")])
+
+;; The unextended ranges of the MIPS16 addiu and daddiu instructions
+;; are different.  Some forms of unextended addiu have an 8-bit immediate
+;; field but the equivalent daddiu has only a 5-bit field.
+(define_mode_attr si8_di5 [(SI "8") (DI "5")])
+
+;; This attribute gives the best constraint to use for registers of
+;; a given mode.
+(define_mode_attr reg [(SI "d") (DI "d") (CC "z")])
+
+;; This attribute gives the format suffix for floating-point operations.
+(define_mode_attr fmt [(SF "s") (DF "d") (V2SF "ps")])
+
+;; This attribute gives the upper-case mode name for one unit of a
+;; floating-point mode.
+(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF")])
+
+;; This attribute gives the integer mode that has the same size as a
+;; fixed-point mode.
+(define_mode_attr IMODE [(QQ "QI") (HQ "HI") (SQ "SI") (DQ "DI")
+			 (UQQ "QI") (UHQ "HI") (USQ "SI") (UDQ "DI")
+			 (HA "HI") (SA "SI") (DA "DI")
+			 (UHA "HI") (USA "SI") (UDA "DI")
+			 (V4UQQ "SI") (V2UHQ "SI") (V2UHA "SI")
+			 (V2HQ "SI") (V2HA "SI")])
+
+;; This attribute gives the integer mode that has half the size of
+;; the controlling mode.
+(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI")
+			    (V2SI "SI") (V4HI "SI") (V8QI "SI")
+			    (TF "DI")])
+
+;; This attribute works around the early SB-1 rev2 core "F2" erratum:
+;;
+;; In certain cases, div.s and div.ps may have a rounding error
+;; and/or wrong inexact flag.
+;;
+;; Therefore, we only allow div.s if not working around SB-1 rev2
+;; errata or if a slight loss of precision is OK.
+(define_mode_attr divide_condition
+  [DF (SF "!TARGET_FIX_SB1 || flag_unsafe_math_optimizations")
+   (V2SF "TARGET_SB1 && (!TARGET_FIX_SB1 || flag_unsafe_math_optimizations)")])
+
+;; This attribute gives the conditions under which SQRT.fmt instructions
+;; can be used.
+(define_mode_attr sqrt_condition
+  [(SF "!ISA_MIPS1") (DF "!ISA_MIPS1") (V2SF "TARGET_SB1")])
+
+;; This code iterator allows signed and unsigned widening multiplications
+;; to use the same template.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; This code iterator allows the two right shift instructions to be
+;; generated from the same template.
+(define_code_iterator any_shiftrt [ashiftrt lshiftrt])
+
+;; This code iterator allows the three shift instructions to be generated
+;; from the same template.
+(define_code_iterator any_shift [ashift ashiftrt lshiftrt])
+
+;; This code iterator allows unsigned and signed division to be generated
+;; from the same template.
+(define_code_iterator any_div [div udiv])
+
+;; This code iterator allows unsigned and signed modulus to be generated
+;; from the same template.
+(define_code_iterator any_mod [mod umod])
+
+;; This code iterator allows all native floating-point comparisons to be
+;; generated from the same template.
+(define_code_iterator fcond [unordered uneq unlt unle eq lt le])
+
+;; This code iterator is used for comparisons that can be implemented
+;; by swapping the operands.
+(define_code_iterator swapped_fcond [ge gt unge ungt])
+
+;; Equality operators.
+(define_code_iterator equality_op [eq ne])
+
+;; These code iterators allow the signed and unsigned scc operations to use
+;; the same template.
+(define_code_iterator any_gt [gt gtu])
+(define_code_iterator any_ge [ge geu])
+(define_code_iterator any_lt [lt ltu])
+(define_code_iterator any_le [le leu])
+
+(define_code_iterator any_return [return simple_return])
+
+;; <u> expands to an empty string when doing a signed operation and
+;; "u" when doing an unsigned operation.
+(define_code_attr u [(sign_extend "") (zero_extend "u")
+		     (div "") (udiv "u")
+		     (mod "") (umod "u")
+		     (gt "") (gtu "u")
+		     (ge "") (geu "u")
+		     (lt "") (ltu "u")
+		     (le "") (leu "u")])
+
+;; <U> is like <u> except uppercase.
+(define_code_attr U [(sign_extend "") (zero_extend "U")])
+
+;; <su> is like <u>, but the signed form expands to "s" rather than "".
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; <optab> expands to the name of the optab for a particular code.
+(define_code_attr optab [(ashift "ashl")
+			 (ashiftrt "ashr")
+			 (lshiftrt "lshr")
+			 (ior "ior")
+			 (xor "xor")
+			 (and "and")
+			 (plus "add")
+			 (minus "sub")
+			 (return "return")
+			 (simple_return "simple_return")])
+
+;; <insn> expands to the name of the insn that implements a particular code.
+(define_code_attr insn [(ashift "sll")
+			(ashiftrt "sra")
+			(lshiftrt "srl")
+			(ior "or")
+			(xor "xor")
+			(and "and")
+			(plus "addu")
+			(minus "subu")])
+
+;; <immediate_insn> expands to the name of the insn that implements
+;; a particular code to operate on immediate values.
+(define_code_attr immediate_insn [(ior "ori")
+				  (xor "xori")
+				  (and "andi")])
+
+(define_code_attr shift_compression [(ashift "micromips")
+				     (lshiftrt "micromips")
+				     (ashiftrt "none")])
+
+;; <fcond> is the c.cond.fmt condition associated with a particular code.
+(define_code_attr fcond [(unordered "un")
+			 (uneq "ueq")
+			 (unlt "ult")
+			 (unle "ule")
+			 (eq "eq")
+			 (lt "lt")
+			 (le "le")])
+
+;; Similar, but for swapped conditions.
+(define_code_attr swapped_fcond [(ge "le")
+				 (gt "lt")
+				 (unge "ule")
+				 (ungt "ult")])
+
+;; The value of the bit when the branch is taken for branch_bit patterns.
+;; Comparison is always against zero so this depends on the operator.
+(define_code_attr bbv [(eq "0") (ne "1")])
+
+;; This is the inverse value of bbv.
+(define_code_attr bbinv [(eq "1") (ne "0")])
+
+;; .........................
+;;
+;;	Branch, call and jump delay slots
+;;
+;; .........................
+
+(define_delay (and (eq_attr "type" "branch")
+		   (not (match_test "TARGET_MIPS16"))
+		   (eq_attr "branch_likely" "yes"))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (eq_attr "can_delay" "yes")])
+
+;; Branches that don't have likely variants do not annul on false.
+(define_delay (and (eq_attr "type" "branch")
+		   (not (match_test "TARGET_MIPS16"))
+		   (eq_attr "branch_likely" "no"))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+(define_delay (eq_attr "type" "jump")
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+(define_delay (and (eq_attr "type" "call")
+		   (eq_attr "jal_macro" "no"))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+;; Pipeline descriptions.
+;;
+;; generic.md provides a fallback for processors without a specific
+;; pipeline description.  It is derived from the old define_function_unit
+;; version and uses the "alu" and "imuldiv" units declared below.
+;;
+;; Some of the processor-specific files are also derived from old
+;; define_function_unit descriptions and simply override the parts of
+;; generic.md that don't apply.  The other processor-specific files
+;; are self-contained.
+(define_automaton "alu,imuldiv")
+
+(define_cpu_unit "alu" "alu")
+(define_cpu_unit "imuldiv" "imuldiv")
+
+;; Ghost instructions produce no real code and introduce no hazards.
+;; They exist purely to express an effect on dataflow.
+(define_insn_reservation "ghost" 0
+  (eq_attr "type" "ghost")
+  "nothing")
+
+(include "4k.md")
+(include "5k.md")
+(include "20kc.md")
+(include "24k.md")
+(include "74k.md")
+(include "3000.md")
+(include "4000.md")
+(include "4100.md")
+(include "4130.md")
+(include "4300.md")
+(include "4600.md")
+(include "5000.md")
+(include "5400.md")
+(include "5500.md")
+(include "6000.md")
+(include "7000.md")
+(include "9000.md")
+(include "10000.md")
+(include "loongson2ef.md")
+(include "loongson3a.md")
+(include "octeon.md")
+(include "sb1.md")
+(include "sr71k.md")
+(include "xlr.md")
+(include "xlp.md")
+(include "generic.md")
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL TRAPS
+;;
+;;  ....................
+;;
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+{
+  if (ISA_HAS_COND_TRAP)
+    return "teq\t$0,$0";
+  else if (TARGET_MIPS16)
+    return "break 0";
+  else
+    return "break";
+}
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "comparison_operator"
+			    [(match_operand:GPR 1 "reg_or_0_operand")
+			     (match_operand:GPR 2 "arith_operand")])
+	    (match_operand 3 "const_0_operand"))]
+  "ISA_HAS_COND_TRAP"
+{
+  mips_expand_conditional_trap (operands[0]);
+  DONE;
+})
+
+(define_insn "*conditional_trap<mode>"
+  [(trap_if (match_operator:GPR 0 "trap_comparison_operator"
+				[(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+				 (match_operand:GPR 2 "arith_operand" "dI")])
+	    (const_int 0))]
+  "ISA_HAS_COND_TRAP"
+  "t%C0\t%z1,%2"
+  [(set_attr "type" "trap")])
+
+;;
+;;  ....................
+;;
+;;	ADDITION
+;;
+;;  ....................
+;;
+
+(define_insn "add<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "add.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(plus:GPR (match_operand:GPR 1 "register_operand")
+		  (match_operand:GPR 2 "arith_operand")))]
+  "")
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d,!u,!u,!ks,!d,d")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "!u,d,!u,!ks,!ks,0,d")
+		  (match_operand:GPR 2 "arith_operand" "!u,d,Uead,Uuw6,Uesp,Usb4,Q")))]
+  "!TARGET_MIPS16"
+{
+  if (which_alternative == 0 
+      || which_alternative == 1)
+    return "<d>addu\t%0,%1,%2";
+  else
+    return "<d>addiu\t%0,%1,%2";
+}
+  [(set_attr "alu_type" "add")
+   (set_attr "compression" "micromips,*,micromips,micromips,micromips,micromips,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=ks,ks,d,d,d,d,d,d,d")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "ks,ks,ks,ks,0,0,d,d,d")
+		  (match_operand:GPR 2 "arith_operand" "Usd8,Q,Uuw<si8_di5>,Q,Usb<si8_di5>,Q,Usb4,O,d")))]
+  "TARGET_MIPS16"
+  "@
+    <d>addiu\t%0,%2
+    <d>addiu\t%0,%2
+    <d>addiu\t%0,%1,%2
+    <d>addiu\t%0,%1,%2
+    <d>addiu\t%0,%2
+    <d>addiu\t%0,%2
+    <d>addiu\t%0,%1,%2
+    <d>addiu\t%0,%1,%2
+    <d>addu\t%0,%1,%2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")
+   (set_attr "extended_mips16" "no,yes,no,yes,no,yes,no,yes,no")])
+
+;; On the mips16, we can sometimes split an add of a constant which is
+;; a 4 byte instruction into two adds which are both 2 byte
+;; instructions.  There are two cases: one where we are adding a
+;; constant plus a register to another register, and one where we are
+;; simply adding a constant to a register.
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "const_int_operand")))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) > 0x7f
+	&& INTVAL (operands[1]) <= 0x7f + 0x7f)
+       || (INTVAL (operands[1]) < - 0x80
+	   && INTVAL (operands[1]) >= - 0x80 - 0x80))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val >= 0)
+    {
+      operands[1] = GEN_INT (0x7f);
+      operands[2] = GEN_INT (val - 0x7f);
+    }
+  else
+    {
+      operands[1] = GEN_INT (- 0x80);
+      operands[2] = GEN_INT (val + 0x80);
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(plus:SI (match_operand:SI 1 "d_operand")
+		 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && ((INTVAL (operands[2]) > 0x7
+	&& INTVAL (operands[2]) <= 0x7 + 0x7f)
+       || (INTVAL (operands[2]) < - 0x8
+	   && INTVAL (operands[2]) >= - 0x8 - 0x80))"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+
+  if (val >= 0)
+    {
+      operands[2] = GEN_INT (0x7);
+      operands[3] = GEN_INT (val - 0x7);
+    }
+  else
+    {
+      operands[2] = GEN_INT (- 0x8);
+      operands[3] = GEN_INT (val + 0x8);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(plus:DI (match_dup 0)
+		 (match_operand:DI 1 "const_int_operand")))]
+  "TARGET_MIPS16 && TARGET_64BIT && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) > 0xf
+	&& INTVAL (operands[1]) <= 0xf + 0xf)
+       || (INTVAL (operands[1]) < - 0x10
+	   && INTVAL (operands[1]) >= - 0x10 - 0x10))"
+  [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val >= 0)
+    {
+      operands[1] = GEN_INT (0xf);
+      operands[2] = GEN_INT (val - 0xf);
+    }
+  else
+    {
+      operands[1] = GEN_INT (- 0x10);
+      operands[2] = GEN_INT (val + 0x10);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(plus:DI (match_operand:DI 1 "d_operand")
+		 (match_operand:DI 2 "const_int_operand")))]
+  "TARGET_MIPS16 && TARGET_64BIT && reload_completed && !TARGET_DEBUG_D_MODE
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && ((INTVAL (operands[2]) > 0x7
+	&& INTVAL (operands[2]) <= 0x7 + 0xf)
+       || (INTVAL (operands[2]) < - 0x8
+	   && INTVAL (operands[2]) >= - 0x8 - 0x10))"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+
+  if (val >= 0)
+    {
+      operands[2] = GEN_INT (0x7);
+      operands[3] = GEN_INT (val - 0x7);
+    }
+  else
+    {
+      operands[2] = GEN_INT (- 0x8);
+      operands[3] = GEN_INT (val + 0x8);
+    }
+})
+
+(define_insn "*addsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(sign_extend:DI
+	     (plus:SI (match_operand:SI 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "d,Q"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "@
+    addu\t%0,%1,%2
+    addiu\t%0,%1,%2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "SI")])
+
+;; Split this insn so that the addiu splitters can have a crack at it.
+;; Use a conservative length estimate until the split.
+(define_insn_and_split "*addsi3_extended_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(sign_extend:DI
+	     (plus:SI (match_operand:SI 1 "register_operand" "0,d,d")
+		      (match_operand:SI 2 "arith_operand" "Q,O,d"))))]
+  "TARGET_64BIT && TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2)))]
+  { operands[3] = gen_lowpart (SImode, operands[0]); }
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "SI")
+   (set_attr "extended_mips16" "yes")])
+
+;; Combiner patterns for unsigned byte-add.
+
+(define_insn "*baddu_si_eb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI
+	 (subreg:QI
+	  (plus:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "register_operand" "d")) 3)))]
+  "ISA_HAS_BADDU && BYTES_BIG_ENDIAN"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+(define_insn "*baddu_si_el"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI
+	 (subreg:QI
+	  (plus:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "register_operand" "d")) 0)))]
+  "ISA_HAS_BADDU && !BYTES_BIG_ENDIAN"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+(define_insn "*baddu_di<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR
+	 (truncate:QI
+	  (plus:DI (match_operand:DI 1 "register_operand" "d")
+		   (match_operand:DI 2 "register_operand" "d")))))]
+  "ISA_HAS_BADDU && TARGET_64BIT"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+;;
+;;  ....................
+;;
+;;	SUBTRACTION
+;;
+;;  ....................
+;;
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		    (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "sub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d")
+	(minus:GPR (match_operand:GPR 1 "register_operand" "!u,d")
+		   (match_operand:GPR 2 "register_operand" "!u,d")))]
+  ""
+  "<d>subu\t%0,%1,%2"
+  [(set_attr "alu_type" "sub")
+   (set_attr "compression" "micromips,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	    (minus:SI (match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d"))))]
+  "TARGET_64BIT"
+  "subu\t%0,%1,%2"
+  [(set_attr "alu_type" "sub")
+   (set_attr "mode" "DI")])
+
+;;
+;;  ....................
+;;
+;;	MULTIPLICATION
+;;
+;;  ....................
+;;
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:SCALARF 0 "register_operand")
+	(mult:SCALARF (match_operand:SCALARF 1 "register_operand")
+		      (match_operand:SCALARF 2 "register_operand")))]
+  ""
+  "")
+
+(define_insn "*mul<mode>3"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+	(mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+		      (match_operand:SCALARF 2 "register_operand" "f")))]
+  "!TARGET_4300_MUL_FIX"
+  "mul.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "<MODE>")])
+
+;; Early VR4300 silicon has a CPU bug where multiplies with certain
+;; operands may corrupt immediately following multiplies. This is a
+;; simple fix to insert NOPs.
+
+(define_insn "*mul<mode>3_r4300"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+	(mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+		      (match_operand:SCALARF 2 "register_operand" "f")))]
+  "TARGET_4300_MUL_FIX"
+  "mul.<fmt>\t%0,%1,%2\;nop"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "2")])
+
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 1 "register_operand" "f")
+		   (match_operand:V2SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "mul.ps\t%0,%1,%2"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "SF")])
+
+;; The original R4000 has a cpu bug.  If a double-word or a variable
+;; shift executes while an integer multiplication is in progress, the
+;; shift may give an incorrect result.  Avoid this by keeping the mflo
+;; with the mult on the R4000.
+;;
+;; From "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+;; (also valid for MIPS R4000MC processors):
+;;
+;; "16. R4000PC, R4000SC: Please refer to errata 28 for an update to
+;;	this errata description.
+;;	The following code sequence causes the R4000 to incorrectly
+;;	execute the Double Shift Right Arithmetic 32 (dsra32)
+;;	instruction.  If the dsra32 instruction is executed during an
+;;	integer multiply, the dsra32 will only shift by the amount in
+;;	specified in the instruction rather than the amount plus 32
+;;	bits.
+;;	instruction 1:		mult	rs,rt		integer multiply
+;;	instruction 2-12:	dsra32	rd,rt,rs	doubleword shift
+;;							right arithmetic + 32
+;;	Workaround: A dsra32 instruction placed after an integer
+;;	multiply should not be one of the 11 instructions after the
+;;	multiply instruction."
+;;
+;; and:
+;;
+;; "28. R4000PC, R4000SC: The text from errata 16 should be replaced by
+;;	the following description.
+;;	All extended shifts (shift by n+32) and variable shifts (32 and
+;;	64-bit versions) may produce incorrect results under the
+;;	following conditions:
+;;	1) An integer multiply is currently executing
+;;	2) These types of shift instructions are executed immediately
+;;	   following an integer divide instruction.
+;;	Workaround:
+;;	1) Make sure no integer multiply is running wihen these
+;;	   instruction are executed.  If this cannot be predicted at
+;;	   compile time, then insert a "mfhi" to R0 instruction
+;;	   immediately after the integer multiply instruction.  This
+;;	   will cause the integer multiply to complete before the shift
+;;	   is executed.
+;;	2) Separate integer divide and these two classes of shift
+;;	   instructions by another instruction or a noop."
+;;
+;; These processors have PRId values of 0x00004220 and 0x00004300,
+;; respectively.
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(mult:GPR (match_operand:GPR 1 "register_operand")
+		  (match_operand:GPR 2 "register_operand")))]
+  "ISA_HAS_<D>MULT"
+{
+  rtx lo;
+
+  if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A)
+    emit_insn (gen_mul<mode>3_mul3_loongson (operands[0], operands[1],
+                                             operands[2]));
+  else if (ISA_HAS_<D>MUL3)
+    emit_insn (gen_mul<mode>3_mul3 (operands[0], operands[1], operands[2]));
+  else if (TARGET_MIPS16)
+    {
+      lo = gen_rtx_REG (<MODE>mode, LO_REGNUM);
+      emit_insn (gen_mul<mode>3_internal (lo, operands[1], operands[2]));
+      emit_move_insn (operands[0], lo);
+    }
+  else if (TARGET_FIX_R4000)
+    emit_insn (gen_mul<mode>3_r4000 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn
+      (gen_mul<mode>3_internal (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mul<mode>3_mul3_loongson"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (mult:GPR (match_operand:GPR 1 "register_operand" "d")
+                  (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A"
+{
+  if (TARGET_LOONGSON_2EF)
+    return "<d>multu.g\t%0,%1,%2";
+  else
+    return "gs<d>multu\t%0,%1,%2";
+}
+  [(set_attr "type" "imul3nc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3_mul3"
+  [(set (match_operand:GPR 0 "register_operand" "=d,l")
+	(mult:GPR (match_operand:GPR 1 "register_operand" "d,d")
+		  (match_operand:GPR 2 "register_operand" "d,d")))
+   (clobber (match_scratch:GPR 3 "=l,X"))]
+  "ISA_HAS_<D>MUL3"
+{
+  if (which_alternative == 1)
+    return "<d>mult\t%1,%2";
+  if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900))
+    return "mult\t%0,%1,%2";
+  return "<d>mul\t%0,%1,%2";
+}
+  [(set_attr "type" "imul3,imul")
+   (set_attr "mode" "<MODE>")])
+
+;; If a register gets allocated to LO, and we spill to memory, the reload
+;; will include a move from LO to a GPR.  Merge it into the multiplication
+;; if it can set the GPR directly.
+;;
+;; Operand 0: LO
+;; Operand 1: GPR (1st multiplication operand)
+;; Operand 2: GPR (2nd multiplication operand)
+;; Operand 3: GPR (destination)
+(define_peephole2
+  [(parallel
+       [(set (match_operand:SI 0 "lo_operand")
+	     (mult:SI (match_operand:SI 1 "d_operand")
+		      (match_operand:SI 2 "d_operand")))
+        (clobber (scratch:SI))])
+   (set (match_operand:SI 3 "d_operand")
+	(match_dup 0))]
+  "ISA_HAS_MUL3 && peep2_reg_dead_p (2, operands[0])"
+  [(parallel
+       [(set (match_dup 3)
+	     (mult:SI (match_dup 1)
+		      (match_dup 2)))
+        (clobber (match_dup 0))])])
+
+(define_insn "mul<mode>3_internal"
+  [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
+	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))]
+  "ISA_HAS_<D>MULT && !TARGET_FIX_R4000"
+  "<d>mult\t%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3_r4000"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))
+   (clobber (match_scratch:GPR 3 "=l"))]
+  "ISA_HAS_<D>MULT && TARGET_FIX_R4000"
+  "<d>mult\t%1,%2\;mflo\t%0"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "2")])
+
+;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead
+;; of "mult; mflo".  They have the same latency, but the first form gives
+;; us an extra cycle to compute the operands.
+
+;; Operand 0: LO
+;; Operand 1: GPR (1st multiplication operand)
+;; Operand 2: GPR (2nd multiplication operand)
+;; Operand 3: GPR (destination)
+(define_peephole2
+  [(set (match_operand:SI 0 "lo_operand")
+	(mult:SI (match_operand:SI 1 "d_operand")
+		 (match_operand:SI 2 "d_operand")))
+   (set (match_operand:SI 3 "d_operand")
+	(match_dup 0))]
+  "ISA_HAS_MACC && !ISA_HAS_MUL3"
+  [(set (match_dup 0)
+	(const_int 0))
+   (parallel
+       [(set (match_dup 0)
+	     (plus:SI (mult:SI (match_dup 1)
+			       (match_dup 2))
+		      (match_dup 0)))
+	(set (match_dup 3)
+	     (plus:SI (mult:SI (match_dup 1)
+			       (match_dup 2))
+		      (match_dup 0)))])])
+
+;; Multiply-accumulate patterns
+
+;; This pattern is first matched by combine, which tries to use the
+;; pattern wherever it can.  We don't know until later whether it
+;; is actually profitable to use MADD over a "MUL; ADDIU" sequence,
+;; so we need to keep both options open.
+;;
+;; The second alternative has a "?" marker because it is generally
+;; one instruction more costly than the first alternative.  This "?"
+;; marker is enough to convey the relative costs to the register
+;; allocator.
+;;
+;; However, reload counts reloads of operands 4 and 5 in the same way as
+;; reloads of the other operands, even though operands 4 and 5 need no
+;; copy instructions.  Reload therefore thinks that the second alternative
+;; is two reloads more costly than the first.  We add "*?*?" to the first
+;; alternative as a counterweight.
+(define_insn "*mul_acc_si"
+  [(set (match_operand:SI 0 "register_operand" "=l*?*?,d?")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d")
+			  (match_operand:SI 2 "register_operand" "d,d"))
+		 (match_operand:SI 3 "register_operand" "0,d")))
+   (clobber (match_scratch:SI 4 "=X,l"))
+   (clobber (match_scratch:SI 5 "=X,&d"))]
+  "GENERATE_MADD_MSUB && !TARGET_MIPS16"
+  "@
+    madd\t%1,%2
+    #"
+  [(set_attr "type"	"imadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode"	"SI")
+   (set_attr "insn_count" "1,2")])
+
+;; The same idea applies here.  The middle alternative needs one less
+;; clobber than the final alternative, so we add "*?" as a counterweight.
+(define_insn "*mul_acc_si_r3900"
+  [(set (match_operand:SI 0 "register_operand" "=l*?*?,d*?,d?")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d,d")
+			  (match_operand:SI 2 "register_operand" "d,d,d"))
+		 (match_operand:SI 3 "register_operand" "0,l,d")))
+   (clobber (match_scratch:SI 4 "=X,3,l"))
+   (clobber (match_scratch:SI 5 "=X,X,&d"))]
+  "TARGET_MIPS3900 && !TARGET_MIPS16"
+  "@
+    madd\t%1,%2
+    madd\t%0,%1,%2
+    #"
+  [(set_attr "type"	"imadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode"	"SI")
+   (set_attr "insn_count" "1,1,2")])
+
+;; Split *mul_acc_si if both the source and destination accumulator
+;; values are GPRs.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(plus:SI (mult:SI (match_operand:SI 1 "d_operand")
+			  (match_operand:SI 2 "d_operand"))
+		 (match_operand:SI 3 "d_operand")))
+   (clobber (match_operand:SI 4 "lo_operand"))
+   (clobber (match_operand:SI 5 "d_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5)
+		   (mult:SI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0) (plus:SI (match_dup 5) (match_dup 3)))]
+  "")
+
+(define_insn "*macc"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d")
+			  (match_operand:SI 2 "register_operand" "d,d"))
+		 (match_operand:SI 3 "register_operand" "0,l")))
+   (clobber (match_scratch:SI 4 "=X,3"))]
+  "ISA_HAS_MACC"
+{
+  if (which_alternative == 1)
+    return "macc\t%0,%1,%2";
+  else if (TARGET_MIPS5500)
+    return "madd\t%1,%2";
+  else
+    /* The VR4130 assumes that there is a two-cycle latency between a macc
+       that "writes" to $0 and an instruction that reads from it.  We avoid
+       this by assigning to $1 instead.  */
+    return "%[macc\t%@,%1,%2%]";
+}
+  [(set_attr "type" "imadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "SI")])
+
+(define_insn "*msac"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,l")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d")
+                           (match_operand:SI 3 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 4 "=X,1"))]
+  "ISA_HAS_MSAC"
+{
+  if (which_alternative == 1)
+    return "msac\t%0,%2,%3";
+  else if (TARGET_MIPS5500)
+    return "msub\t%2,%3";
+  else
+    return "msac\t$0,%2,%3";
+}
+  [(set_attr "type"     "imadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "mode"     "SI")])
+
+;; An msac-like instruction implemented using negation and a macc.
+(define_insn_and_split "*msac_using_macc"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,l")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d")
+                           (match_operand:SI 3 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 4 "=X,1"))
+   (clobber (match_scratch:SI 5 "=d,d"))]
+  "ISA_HAS_MACC && !ISA_HAS_MSAC"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 5)
+	(neg:SI (match_dup 3)))
+   (parallel
+       [(set (match_dup 0)
+	     (plus:SI (mult:SI (match_dup 2)
+			       (match_dup 5))
+		      (match_dup 1)))
+	(clobber (match_dup 4))])]
+  ""
+  [(set_attr "type"     "imadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "insn_count" "2")])
+
+;; Patterns generated by the define_peephole2 below.
+
+(define_insn "*macc2"
+  [(set (match_operand:SI 0 "muldiv_target_operand" "=l")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d")
+			  (match_operand:SI 2 "register_operand" "d"))
+		 (match_dup 0)))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 0)))]
+  "ISA_HAS_MACC && reload_completed"
+  "macc\t%3,%1,%2"
+  [(set_attr "type"	"imadd")
+   (set_attr "accum_in"	"0")
+   (set_attr "mode"	"SI")])
+
+(define_insn "*msac2"
+  [(set (match_operand:SI 0 "muldiv_target_operand" "=l")
+	(minus:SI (match_dup 0)
+		  (mult:SI (match_operand:SI 1 "register_operand" "d")
+			   (match_operand:SI 2 "register_operand" "d"))))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(minus:SI (match_dup 0)
+		  (mult:SI (match_dup 1)
+			   (match_dup 2))))]
+  "ISA_HAS_MSAC && reload_completed"
+  "msac\t%3,%1,%2"
+  [(set_attr "type"	"imadd")
+   (set_attr "accum_in"	"0")
+   (set_attr "mode"	"SI")])
+
+;; Convert macc $0,<r1>,<r2> & mflo <r3> into macc <r3>,<r1>,<r2>
+;; Similarly msac.
+;;
+;; Operand 0: LO
+;; Operand 1: macc/msac
+;; Operand 2: GPR (destination)
+(define_peephole2
+  [(parallel
+       [(set (match_operand:SI 0 "lo_operand")
+	     (match_operand:SI 1 "macc_msac_operand"))
+	(clobber (scratch:SI))])
+   (set (match_operand:SI 2 "d_operand")
+	(match_dup 0))]
+  ""
+  [(parallel [(set (match_dup 0)
+		   (match_dup 1))
+	      (set (match_dup 2)
+		   (match_dup 1))])])
+
+;; When we have a three-address multiplication instruction, it should
+;; be faster to do a separate multiply and add, rather than moving
+;; something into LO in order to use a macc instruction.
+;;
+;; This peephole needs a scratch register to cater for the case when one
+;; of the multiplication operands is the same as the destination.
+;;
+;; Operand 0: GPR (scratch)
+;; Operand 1: LO
+;; Operand 2: GPR (addend)
+;; Operand 3: GPR (destination)
+;; Operand 4: macc/msac
+;; Operand 5: new multiplication
+;; Operand 6: new addition/subtraction
+(define_peephole2
+  [(match_scratch:SI 0 "d")
+   (set (match_operand:SI 1 "lo_operand")
+	(match_operand:SI 2 "d_operand"))
+   (match_dup 0)
+   (parallel
+       [(set (match_operand:SI 3 "d_operand")
+	     (match_operand:SI 4 "macc_msac_operand"))
+	(clobber (match_dup 1))])]
+  "ISA_HAS_MUL3 && peep2_reg_dead_p (2, operands[1])"
+  [(parallel [(set (match_dup 0)
+		   (match_dup 5))
+	      (clobber (match_dup 1))])
+   (set (match_dup 3)
+	(match_dup 6))]
+{
+  operands[5] = XEXP (operands[4], GET_CODE (operands[4]) == PLUS ? 0 : 1);
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[4]), SImode,
+				operands[2], operands[0]);
+})
+
+;; Same as above, except LO is the initial target of the macc.
+;;
+;; Operand 0: GPR (scratch)
+;; Operand 1: LO
+;; Operand 2: GPR (addend)
+;; Operand 3: macc/msac
+;; Operand 4: GPR (destination)
+;; Operand 5: new multiplication
+;; Operand 6: new addition/subtraction
+(define_peephole2
+  [(match_scratch:SI 0 "d")
+   (set (match_operand:SI 1 "lo_operand")
+	(match_operand:SI 2 "d_operand"))
+   (match_dup 0)
+   (parallel
+       [(set (match_dup 1)
+	     (match_operand:SI 3 "macc_msac_operand"))
+	(clobber (scratch:SI))])
+   (match_dup 0)
+   (set (match_operand:SI 4 "d_operand")
+	(match_dup 1))]
+  "ISA_HAS_MUL3 && peep2_reg_dead_p (3, operands[1])"
+  [(parallel [(set (match_dup 0)
+		   (match_dup 5))
+	      (clobber (match_dup 1))])
+   (set (match_dup 4)
+	(match_dup 6))]
+{
+  operands[5] = XEXP (operands[3], GET_CODE (operands[3]) == PLUS ? 0 : 1);
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[2], operands[0]);
+})
+
+;; See the comment above *mul_add_si for details.
+(define_insn "*mul_sub_si"
+  [(set (match_operand:SI 0 "register_operand" "=l*?*?,d?")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,d")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d")
+                           (match_operand:SI 3 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 4 "=X,l"))
+   (clobber (match_scratch:SI 5 "=X,&d"))]
+  "GENERATE_MADD_MSUB"
+  "@
+   msub\t%2,%3
+   #"
+  [(set_attr "type"     "imadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "mode"     "SI")
+   (set_attr "insn_count" "1,2")])
+
+;; Split *mul_sub_si if both the source and destination accumulator
+;; values are GPRs.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+        (minus:SI (match_operand:SI 1 "d_operand")
+                  (mult:SI (match_operand:SI 2 "d_operand")
+                           (match_operand:SI 3 "d_operand"))))
+   (clobber (match_operand:SI 4 "lo_operand"))
+   (clobber (match_operand:SI 5 "d_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5)
+                   (mult:SI (match_dup 2) (match_dup 3)))
+              (clobber (match_dup 4))])
+   (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 5)))]
+  "")
+
+(define_insn "*muls"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+        (neg:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d")
+                         (match_operand:SI 2 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 3 "=X,l"))]
+  "ISA_HAS_MULS"
+  "@
+   muls\t$0,%1,%2
+   muls\t%0,%1,%2"
+  [(set_attr "type"     "imul,imul3")
+   (set_attr "mode"     "SI")])
+
+(define_expand "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "mips_mulsidi3_gen_fn (<CODE>) != NULL"
+{
+  mulsidi3_gen_fn fn = mips_mulsidi3_gen_fn (<CODE>);
+  emit_insn (fn (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "<u>mulsidi3_32bit_mips16"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "!TARGET_64BIT && TARGET_MIPS16"
+{
+  rtx hilo;
+
+  hilo = gen_rtx_REG (DImode, MD_REG_FIRST);
+  emit_insn (gen_<u>mulsidi3_32bit (hilo, operands[1], operands[2]));
+  emit_move_insn (operands[0], hilo);
+  DONE;
+})
+
+;; As well as being named patterns, these instructions are used by the
+;; __builtin_mips_mult<u>() functions.  We must always make those functions
+;; available if !TARGET_64BIT && ISA_HAS_DSP.
+(define_insn "<u>mulsidi3_32bit"
+  [(set (match_operand:DI 0 "muldiv_target_operand" "=ka")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_64BIT && (!TARGET_FIX_R4000 || ISA_HAS_DSP)"
+{
+  if (ISA_HAS_DSP_MULT)
+    return "mult<u>\t%q0,%1,%2";
+  else
+    return "mult<u>\t%1,%2";
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")])
+
+(define_insn "<u>mulsidi3_32bit_r4000"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+   (clobber (match_scratch:DI 3 "=x"))]
+  "!TARGET_64BIT && TARGET_FIX_R4000 && !ISA_HAS_DSP"
+  "mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "3")])
+
+(define_insn_and_split "<u>mulsidi3_64bit"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+   (clobber (match_scratch:TI 3 "=x"))
+   (clobber (match_scratch:DI 4 "=d"))]
+  "TARGET_64BIT && !TARGET_FIX_R4000 && !ISA_HAS_DMUL3 && !TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  emit_insn (gen_<u>mulsidi3_64bit_split (operands[0], operands[1],
+					  operands[2], operands[4]));
+  DONE;
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set (attr "insn_count")
+	(if_then_else (match_test "ISA_HAS_EXT_INS")
+		      (const_int 4)
+		      (const_int 7)))])
+
+(define_expand "<u>mulsidi3_64bit_mips16"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  emit_insn (gen_<u>mulsidi3_64bit_split (operands[0], operands[1],
+					  operands[2], gen_reg_rtx (DImode)));
+  DONE;
+})
+
+(define_expand "<u>mulsidi3_64bit_split"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  ""
+{
+  rtx hilo;
+
+  hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+  emit_insn (gen_<u>mulsidi3_64bit_hilo (hilo, operands[1], operands[2]));
+
+  emit_move_insn (operands[0], gen_rtx_REG (DImode, LO_REGNUM));
+  emit_insn (gen_mfhidi_ti (operands[3], hilo));
+
+  if (ISA_HAS_EXT_INS)
+    emit_insn (gen_insvdi (operands[0], GEN_INT (32), GEN_INT (32),
+			   operands[3]));
+  else
+    {
+      /* Zero-extend the low part.  */
+      mips_emit_binary (ASHIFT, operands[0], operands[0], GEN_INT (32));
+      mips_emit_binary (LSHIFTRT, operands[0], operands[0], GEN_INT (32));
+
+      /* Shift the high part into place.  */
+      mips_emit_binary (ASHIFT, operands[3], operands[3], GEN_INT (32));
+
+      /* OR the two halves together.  */
+      mips_emit_binary (IOR, operands[0], operands[0], operands[3]);
+    }
+  DONE;
+})
+
+(define_insn "<u>mulsidi3_64bit_hilo"
+  [(set (match_operand:TI 0 "muldiv_target_operand" "=x")
+	(unspec:TI
+	  [(mult:DI
+	     (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	     (any_extend:DI (match_operand:SI 2 "register_operand" "d")))]
+	  UNSPEC_SET_HILO))]
+  "TARGET_64BIT && !TARGET_FIX_R4000"
+  "mult<u>\t%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")])
+
+;; See comment before the ISA_HAS_DMUL3 case in mips_mulsidi3_gen_fn.
+(define_insn "mulsidi3_64bit_dmul"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+   (clobber (match_scratch:DI 3 "=l"))]
+  "ISA_HAS_DMUL3"
+  "dmul\t%0,%1,%2"
+  [(set_attr "type" "imul3")
+   (set_attr "mode" "DI")])
+
+;; Widening multiply with negation.
+(define_insn "*muls<u>_di"
+  [(set (match_operand:DI 0 "muldiv_target_operand" "=x")
+        (neg:DI
+	 (mult:DI
+	  (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	  (any_extend:DI (match_operand:SI 2 "register_operand" "d")))))]
+  "!TARGET_64BIT && ISA_HAS_MULS"
+  "muls<u>\t$0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")])
+
+;; As well as being named patterns, these instructions are used by the
+;; __builtin_mips_msub<u>() functions.  We must always make those functions
+;; available if !TARGET_64BIT && ISA_HAS_DSP.
+;;
+;; This leads to a slight inconsistency.  We honor any tuning overrides
+;; in GENERATE_MADD_MSUB for -mno-dsp, but always ignore them for -mdsp,
+;; even if !ISA_HAS_DSP_MULT.
+(define_insn "<u>msubsidi4"
+  [(set (match_operand:DI 0 "muldiv_target_operand" "=ka")
+        (minus:DI
+	   (match_operand:DI 3 "muldiv_target_operand" "0")
+	   (mult:DI
+	      (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	      (any_extend:DI (match_operand:SI 2 "register_operand" "d")))))]
+  "!TARGET_64BIT && (ISA_HAS_MSAC || GENERATE_MADD_MSUB || ISA_HAS_DSP)"
+{
+  if (ISA_HAS_DSP_MULT)
+    return "msub<u>\t%q0,%1,%2";
+  else if (TARGET_MIPS5500 || GENERATE_MADD_MSUB)
+    return "msub<u>\t%1,%2";
+  else
+    return "msac<u>\t$0,%1,%2";
+}
+  [(set_attr "type" "imadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "SI")])
+
+;; _highpart patterns
+
+(define_expand "<su>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand")))
+	  (const_int 32))))]
+  ""
+{
+  if (ISA_HAS_MULHI)
+    emit_insn (gen_<su>mulsi3_highpart_mulhi_internal (operands[0],
+						       operands[1],
+						       operands[2]));
+  else if (TARGET_MIPS16)
+    emit_insn (gen_<su>mulsi3_highpart_split (operands[0], operands[1],
+					      operands[2]));
+  else
+    emit_insn (gen_<su>mulsi3_highpart_internal (operands[0], operands[1],
+					         operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "<su>mulsi3_highpart_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand" "d")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=l"))]
+  "!ISA_HAS_MULHI && !TARGET_MIPS16"
+  { return TARGET_FIX_R4000 ? "mult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
+  "&& reload_completed && !TARGET_FIX_R4000"
+  [(const_int 0)]
+{
+  emit_insn (gen_<su>mulsi3_highpart_split (operands[0], operands[1],
+					    operands[2]));
+  DONE;
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "2")])
+
+(define_expand "<su>mulsi3_highpart_split"
+  [(set (match_operand:SI 0 "register_operand")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand")))
+	  (const_int 32))))]
+  ""
+{
+  rtx hilo;
+
+  if (TARGET_64BIT)
+    {
+      hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+      emit_insn (gen_<u>mulsidi3_64bit_hilo (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhisi_ti (operands[0], hilo));
+    }
+  else
+    {
+      hilo = gen_rtx_REG (DImode, MD_REG_FIRST);
+      emit_insn (gen_<u>mulsidi3_32bit (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhisi_di (operands[0], hilo));
+    }
+  DONE;
+})
+
+(define_insn "<su>mulsi3_highpart_mulhi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	   (any_extend:DI (match_operand:SI 2 "register_operand" "d")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=l"))]
+  "ISA_HAS_MULHI"
+  "mulhi<u>\t%0,%1,%2"
+  [(set_attr "type" "imul3")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<su>mulsi3_highpart_neg_mulhi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+	 (lshiftrt:DI
+	  (neg:DI
+	   (mult:DI
+	    (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	    (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=l"))]
+  "ISA_HAS_MULHI"
+  "mulshi<u>\t%0,%1,%2"
+  [(set_attr "type" "imul3")
+   (set_attr "mode" "SI")])
+
+;; Disable unsigned multiplication for -mfix-vr4120.  This is for VR4120
+;; errata MD(0), which says that dmultu does not always produce the
+;; correct result.
+(define_expand "<su>muldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
+		   (any_extend:TI (match_operand:DI 2 "register_operand")))
+	  (const_int 64))))]
+  "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+{
+  if (TARGET_MIPS16)
+    emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1],
+					      operands[2]));
+  else
+    emit_insn (gen_<su>muldi3_highpart_internal (operands[0], operands[1],
+						 operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "<su>muldi3_highpart_internal"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
+		   (any_extend:TI (match_operand:DI 2 "register_operand" "d")))
+	  (const_int 64))))
+   (clobber (match_scratch:DI 3 "=l"))]
+  "ISA_HAS_DMULT
+   && !TARGET_MIPS16
+   && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  { return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
+  "&& reload_completed && !TARGET_FIX_R4000"
+  [(const_int 0)]
+{
+  emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1],
+					    operands[2]));
+  DONE;
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")
+   (set_attr "insn_count" "2")])
+
+(define_expand "<su>muldi3_highpart_split"
+  [(set (match_operand:DI 0 "register_operand")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
+		   (any_extend:TI (match_operand:DI 2 "register_operand")))
+	  (const_int 64))))]
+  ""
+{
+  rtx hilo;
+
+  hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+  emit_insn (gen_<u>mulditi3_internal (hilo, operands[1], operands[2]));
+  emit_insn (gen_mfhidi_ti (operands[0], hilo));
+  DONE;
+})
+
+(define_expand "<u>mulditi3"
+  [(set (match_operand:TI 0 "register_operand")
+	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
+		 (any_extend:TI (match_operand:DI 2 "register_operand"))))]
+  "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+{
+  rtx hilo;
+
+  if (TARGET_MIPS16)
+    {
+      hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+      emit_insn (gen_<u>mulditi3_internal (hilo, operands[1], operands[2]));
+      emit_move_insn (operands[0], hilo);
+    }
+  else if (TARGET_FIX_R4000)
+    emit_insn (gen_<u>mulditi3_r4000 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_<u>mulditi3_internal (operands[0], operands[1],
+					 operands[2]));
+  DONE;
+})
+
+(define_insn "<u>mulditi3_internal"
+  [(set (match_operand:TI 0 "muldiv_target_operand" "=x")
+	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
+		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))]
+  "ISA_HAS_DMULT
+   && !TARGET_FIX_R4000
+   && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "dmult<u>\t%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")])
+
+(define_insn "<u>mulditi3_r4000"
+  [(set (match_operand:TI 0 "register_operand" "=d")
+	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
+		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))
+   (clobber (match_scratch:TI 3 "=x"))]
+  "ISA_HAS_DMULT
+   && TARGET_FIX_R4000
+   && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")
+   (set_attr "insn_count" "3")])
+
+;; The R4650 supports a 32-bit multiply/ 64-bit accumulate
+;; instruction.  The HI/LO registers are used as a 64-bit accumulator.
+
+(define_insn "madsi"
+  [(set (match_operand:SI 0 "register_operand" "+l")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d")
+			  (match_operand:SI 2 "register_operand" "d"))
+		 (match_dup 0)))]
+  "TARGET_MAD"
+  "mad\t%1,%2"
+  [(set_attr "type"	"imadd")
+   (set_attr "accum_in"	"0")
+   (set_attr "mode"	"SI")])
+
+;; See the comment above <u>msubsidi4 for the relationship between
+;; ISA_HAS_DSP and ISA_HAS_DSP_MULT.
+(define_insn "<u>maddsidi4"
+  [(set (match_operand:DI 0 "muldiv_target_operand" "=ka")
+	(plus:DI
+	 (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		  (any_extend:DI (match_operand:SI 2 "register_operand" "d")))
+	 (match_operand:DI 3 "muldiv_target_operand" "0")))]
+  "(TARGET_MAD || ISA_HAS_MACC || GENERATE_MADD_MSUB || ISA_HAS_DSP)
+   && !TARGET_64BIT"
+{
+  if (TARGET_MAD)
+    return "mad<u>\t%1,%2";
+  else if (ISA_HAS_DSP_MULT)
+    return "madd<u>\t%q0,%1,%2";
+  else if (GENERATE_MADD_MSUB || TARGET_MIPS5500)
+    return "madd<u>\t%1,%2";
+  else
+    /* See comment in *macc.  */
+    return "%[macc<u>\t%@,%1,%2%]";
+}
+  [(set_attr "type" "imadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "SI")])
+
+;; Floating point multiply accumulate instructions.
+
+(define_insn "*madd4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "f")))]
+  "ISA_HAS_FP_MADD4_MSUB4 && TARGET_FUSED_MADD"
+  "madd.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*madd3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_FP_MADD3_MSUB3 && TARGET_FUSED_MADD"
+  "madd.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*msub4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			       (match_operand:ANYF 2 "register_operand" "f"))
+		    (match_operand:ANYF 3 "register_operand" "f")))]
+  "ISA_HAS_FP_MADD4_MSUB4 && TARGET_FUSED_MADD"
+  "msub.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*msub3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			       (match_operand:ANYF 2 "register_operand" "f"))
+		    (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_FP_MADD3_MSUB3 && TARGET_FUSED_MADD"
+  "msub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (plus:ANYF
+		   (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "f"))))]
+  "ISA_HAS_NMADD4_NMSUB4
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (plus:ANYF
+		   (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "0"))))]
+  "ISA_HAS_NMADD3_NMSUB3
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd4<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (mult:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		    (match_operand:ANYF 2 "register_operand" "f"))
+	 (match_operand:ANYF 3 "register_operand" "f")))]
+  "ISA_HAS_NMADD4_NMSUB4
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd3<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (mult:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		    (match_operand:ANYF 2 "register_operand" "f"))
+	 (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_NMADD3_NMSUB3
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"3")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (minus:ANYF
+		   (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+			      (match_operand:ANYF 3 "register_operand" "f"))
+		   (match_operand:ANYF 1 "register_operand" "f"))))]
+  "ISA_HAS_NMADD4_NMSUB4
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2,%3"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (minus:ANYF
+		   (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+			      (match_operand:ANYF 3 "register_operand" "f"))
+		   (match_operand:ANYF 1 "register_operand" "0"))))]
+  "ISA_HAS_NMADD3_NMSUB3
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub4<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (match_operand:ANYF 1 "register_operand" "f")
+	 (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+		    (match_operand:ANYF 3 "register_operand" "f"))))]
+  "ISA_HAS_NMADD4_NMSUB4
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2,%3"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub3<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (match_operand:ANYF 1 "register_operand" "f")
+	 (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+		    (match_operand:ANYF 3 "register_operand" "0"))))]
+  "ISA_HAS_NMADD3_NMSUB3
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "accum_in"	"1")
+   (set_attr "mode" "<UNITMODE>")])
+
+;;
+;;  ....................
+;;
+;;	DIVISION and REMAINDER
+;;
+;;  ....................
+;;
+
+(define_expand "div<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand")
+	(div:ANYF (match_operand:ANYF 1 "reg_or_1_operand")
+		  (match_operand:ANYF 2 "register_operand")))]
+  "<divide_condition>"
+{
+  if (const_1_operand (operands[1], <MODE>mode))
+    if (!(ISA_HAS_FP_RECIP_RSQRT (<MODE>mode)
+	  && flag_unsafe_math_optimizations))
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+;; These patterns work around the early SB-1 rev2 core "F1" erratum:
+;;
+;; If an mfc1 or dmfc1 happens to access the floating point register
+;; file at the same time a long latency operation (div, sqrt, recip,
+;; sqrt) iterates an intermediate result back through the floating
+;; point register file bypass, then instead returning the correct
+;; register value the mfc1 or dmfc1 operation returns the intermediate
+;; result of the long latency operation.
+;;
+;; The workaround is to insert an unconditional 'mov' from/to the
+;; long latency op destination register.
+
+(define_insn "*div<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")))]
+  "<divide_condition>"
+{
+  if (TARGET_FIX_SB1)
+    return "div.<fmt>\t%0,%1,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "div.<fmt>\t%0,%1,%2";
+}
+  [(set_attr "type" "fdiv")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "insn_count")
+        (if_then_else (match_test "TARGET_FIX_SB1")
+                      (const_int 2)
+                      (const_int 1)))])
+
+(define_insn "*recip<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+		  (match_operand:ANYF 2 "register_operand" "f")))]
+  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
+{
+  if (TARGET_FIX_SB1)
+    return "recip.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "recip.<fmt>\t%0,%2";
+}
+  [(set_attr "type" "frdiv")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "insn_count")
+        (if_then_else (match_test "TARGET_FIX_SB1")
+                      (const_int 2)
+                      (const_int 1)))])
+
+;; VR4120 errata MD(A1): signed division instructions do not work correctly
+;; with negative operands.  We use special libgcc functions instead.
+(define_expand "divmod<mode>4"
+  [(parallel
+     [(set (match_operand:GPR 0 "register_operand")
+	   (div:GPR (match_operand:GPR 1 "register_operand")
+		    (match_operand:GPR 2 "register_operand")))
+      (set (match_operand:GPR 3 "register_operand")
+	   (mod:GPR (match_dup 1)
+		    (match_dup 2)))])]
+  "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120"
+{
+  if (TARGET_MIPS16)
+    {
+      rtx lo = gen_rtx_REG (<MODE>mode, LO_REGNUM);
+      emit_insn (gen_divmod<mode>4_mips16 (operands[0], operands[1],
+					   operands[2], operands[3], lo));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*divmod<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=l")
+	(div:GPR (match_operand:GPR 1 "register_operand" "d")
+		 (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_operand:GPR 3 "register_operand" "=d")
+	(mod:GPR (match_dup 1)
+		 (match_dup 2)))]
+  "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120 && !TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  emit_insn (gen_divmod<mode>4_split (operands[3], operands[1], operands[2]));
+  DONE;
+}
+ [(set_attr "type" "idiv")
+  (set_attr "mode" "<MODE>")
+  (set_attr "insn_count" "2")])
+
+;; Expand generates divmod instructions for individual division and modulus
+;; operations.  We then rely on CSE to reuse earlier divmods where possible.
+;; This means that, when generating MIPS16 code, it is better not to expose
+;; the fixed LO register until after CSE has finished.  However, it's still
+;; better to split before register allocation, so that we don't allocate
+;; one of the scarce MIPS16 registers to an unused result.
+(define_insn_and_split "divmod<mode>4_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(div:GPR (match_operand:GPR 1 "register_operand" "d")
+		 (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_operand:GPR 3 "register_operand" "=d")
+	(mod:GPR (match_dup 1)
+		 (match_dup 2)))
+   (clobber (match_operand:GPR 4 "lo_operand" "=l"))]
+  "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120 && TARGET_MIPS16"
+  "#"
+  "&& cse_not_expected"
+  [(const_int 0)]
+{
+  emit_insn (gen_divmod<mode>4_split (operands[3], operands[1], operands[2]));
+  emit_move_insn (operands[0], operands[4]);
+  DONE;
+}
+ [(set_attr "type" "idiv")
+  (set_attr "mode" "<MODE>")
+  (set_attr "insn_count" "3")])
+
+(define_expand "udivmod<mode>4"
+  [(parallel
+     [(set (match_operand:GPR 0 "register_operand")
+	   (udiv:GPR (match_operand:GPR 1 "register_operand")
+		     (match_operand:GPR 2 "register_operand")))
+      (set (match_operand:GPR 3 "register_operand")
+	   (umod:GPR (match_dup 1)
+		     (match_dup 2)))])]
+  "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120"
+{
+  if (TARGET_MIPS16)
+    {
+      rtx lo = gen_rtx_REG (<MODE>mode, LO_REGNUM);
+      emit_insn (gen_udivmod<mode>4_mips16 (operands[0], operands[1],
+					    operands[2], operands[3], lo));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*udivmod<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=l")
+	(udiv:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_operand:GPR 3 "register_operand" "=d")
+	(umod:GPR (match_dup 1)
+		  (match_dup 2)))]
+  "ISA_HAS_<D>DIV && !TARGET_MIPS16"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "2")])
+
+;; See the comment above "divmod<mode>4_mips16" for the split timing.
+(define_insn_and_split "udivmod<mode>4_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(udiv:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_operand:GPR 3 "register_operand" "=d")
+	(umod:GPR (match_dup 1)
+		  (match_dup 2)))
+   (clobber (match_operand:GPR 4 "lo_operand" "=l"))]
+  "ISA_HAS_<D>DIV && TARGET_MIPS16"
+  "#"
+  "cse_not_expected"
+  [(const_int 0)]
+{
+  emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1], operands[2]));
+  emit_move_insn (operands[0], operands[4]);
+  DONE;
+}
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "3")])
+
+(define_expand "<u>divmod<mode>4_split"
+  [(set (match_operand:GPR 0 "register_operand")
+	(any_mod:GPR (match_operand:GPR 1 "register_operand")
+		     (match_operand:GPR 2 "register_operand")))]
+  ""
+{
+  rtx hilo;
+
+  if (TARGET_64BIT)
+    {
+      hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+      emit_insn (gen_<u>divmod<mode>4_hilo_ti (hilo, operands[1],
+					       operands[2]));
+      emit_insn (gen_mfhi<mode>_ti (operands[0], hilo));
+    }
+  else
+    {
+      hilo = gen_rtx_REG (DImode, MD_REG_FIRST);
+      emit_insn (gen_<u>divmod<mode>4_hilo_di (hilo, operands[1],
+					       operands[2]));
+      emit_insn (gen_mfhi<mode>_di (operands[0], hilo));
+    }
+  DONE;
+})
+
+(define_insn "<u>divmod<GPR:mode>4_hilo_<HILO:mode>"
+  [(set (match_operand:HILO 0 "muldiv_target_operand" "=x")
+	(unspec:HILO
+	  [(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
+			(match_operand:GPR 2 "register_operand" "d"))]
+	  UNSPEC_SET_HILO))]
+  "ISA_HAS_<GPR:D>DIV"
+  { return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); }
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;;
+;;  ....................
+;;
+;;	SQUARE ROOT
+;;
+;;  ....................
+
+;; These patterns work around the early SB-1 rev2 core "F1" erratum (see
+;; "*div[sd]f3" comment for details).
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  "<sqrt_condition>"
+{
+  if (TARGET_FIX_SB1)
+    return "sqrt.<fmt>\t%0,%1\;mov.<fmt>\t%0,%0";
+  else
+    return "sqrt.<fmt>\t%0,%1";
+}
+  [(set_attr "type" "fsqrt")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "insn_count")
+        (if_then_else (match_test "TARGET_FIX_SB1")
+                      (const_int 2)
+                      (const_int 1)))])
+
+(define_insn "*rsqrt<mode>a"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
+  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
+{
+  if (TARGET_FIX_SB1)
+    return "rsqrt.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "rsqrt.<fmt>\t%0,%2";
+}
+  [(set_attr "type" "frsqrt")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "insn_count")
+        (if_then_else (match_test "TARGET_FIX_SB1")
+                      (const_int 2)
+                      (const_int 1)))])
+
+(define_insn "*rsqrt<mode>b"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+			     (match_operand:ANYF 2 "register_operand" "f"))))]
+  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
+{
+  if (TARGET_FIX_SB1)
+    return "rsqrt.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "rsqrt.<fmt>\t%0,%2";
+}
+  [(set_attr "type" "frsqrt")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "insn_count")
+        (if_then_else (match_test "TARGET_FIX_SB1")
+                      (const_int 2)
+                      (const_int 1)))])
+
+;;
+;;  ....................
+;;
+;;	ABSOLUTE VALUE
+;;
+;;  ....................
+
+;; Do not use the integer abs macro instruction, since that signals an
+;; exception on -2147483648 (sigh).
+
+;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic
+;; instruction that treats all NaN inputs as invalid; it does not clear
+;; their sign bit.  We therefore can't use that form if the signs of
+;; NaNs matter.
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  "mips_abs == MIPS_IEEE_754_2008 || !HONOR_NANS (<MODE>mode)"
+  "abs.<fmt>\t%0,%1"
+  [(set_attr "type" "fabs")
+   (set_attr "mode" "<UNITMODE>")])
+
+;;
+;;  ...................
+;;
+;;  Count leading zeroes.
+;;
+;;  ...................
+;;
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(clz:GPR (match_operand:GPR 1 "register_operand" "d")))]
+  "ISA_HAS_CLZ_CLO"
+  "<d>clz\t%0,%1"
+  [(set_attr "type" "clz")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ...................
+;;
+;;  Count number of set bits.
+;;
+;;  ...................
+;;
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(popcount:GPR (match_operand:GPR 1 "register_operand" "d")))]
+  "ISA_HAS_POP"
+  "<d>pop\t%0,%1"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+;; The POP instruction is special as it does not take into account the upper
+;; 32bits and is documented that way.
+(define_insn "*popcountdi2_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+       (popcount:SI (truncate:SI (match_operand:DI 1 "register_operand" "d"))))]
+  "ISA_HAS_POP && TARGET_64BIT"
+  "pop\t%0,%1"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "SI")])
+
+;;
+;;  ....................
+;;
+;;	NEGATION and ONE'S COMPLEMENT
+;;
+;;  ....................
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+{
+  if (TARGET_MIPS16)
+    return "neg\t%0,%1";
+  else
+    return "subu\t%0,%.,%1";
+}
+  [(set_attr "alu_type"	"sub")
+   (set_attr "mode"	"SI")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(neg:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "dsubu\t%0,%.,%1"
+  [(set_attr "alu_type"	"sub")
+   (set_attr "mode"	"DI")])
+
+;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic
+;; instruction that treats all NaN inputs as invalid; it does not flip
+;; their sign bit.  We therefore can't use that form if the signs of
+;; NaNs matter.
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  "mips_abs == MIPS_IEEE_754_2008 || !HONOR_NANS (<MODE>mode)"
+  "neg.<fmt>\t%0,%1"
+  [(set_attr "type" "fneg")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d")
+	(not:GPR (match_operand:GPR 1 "register_operand" "!u,d")))]
+  ""
+{
+  if (TARGET_MIPS16)
+    return "not\t%0,%1";
+  else
+    return "nor\t%0,%.,%1";
+}
+  [(set_attr "alu_type" "not")
+   (set_attr "compression" "micromips,*")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	LOGICAL
+;;
+;;  ....................
+;;
+
+;; Many of these instructions use trivial define_expands, because we
+;; want to use a different set of constraints when TARGET_MIPS16.
+
+(define_expand "and<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(and:GPR (match_operand:GPR 1 "register_operand")
+		 (match_operand:GPR 2 "and_reg_operand")))])
+
+;; The middle-end is not allowed to convert ANDing with 0xffff_ffff into a
+;; zero_extendsidi2 because of TRULY_NOOP_TRUNCATION, so handle these here.
+;; Note that this variant does not trigger for SI mode because we require
+;; a 64-bit HOST_WIDE_INT and 0xffff_ffff wouldn't be a canonical
+;; sign-extended SImode value.
+;;
+;; These are possible combinations for operand 1 and 2.  The table
+;; includes both MIPS and MIPS16 cases.  (r=register, mem=memory,
+;; 16=MIPS16, x=match, S=split):
+;;
+;;     \ op1    r/EXT   r/!EXT  mem   r/16   mem/16
+;;  op2
+;;
+;;  andi           x     x
+;;  0xff           x     x       x             x
+;;  0xffff         x     x       x             x
+;;  0xffff_ffff    x     S       x     S       x
+;;  low-bitmask    x
+;;  register       x     x
+;;  register =op1                      x
+
+(define_insn "*and<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d,!u,d,d,d,!u,d")
+	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "o,o,W,!u,d,d,d,0,d")
+		 (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,Uean,K,Yx,Yw,!u,d")))]
+  "!TARGET_MIPS16 && and_operands_ok (<MODE>mode, operands[1], operands[2])"
+{
+  int len;
+
+  switch (which_alternative)
+    {
+    case 0:
+      operands[1] = gen_lowpart (QImode, operands[1]);
+      return "lbu\t%0,%1";
+    case 1:
+      operands[1] = gen_lowpart (HImode, operands[1]);
+      return "lhu\t%0,%1";
+    case 2:
+      operands[1] = gen_lowpart (SImode, operands[1]);
+      return "lwu\t%0,%1";
+    case 3:
+    case 4:
+      return "andi\t%0,%1,%x2";
+    case 5:
+      len = low_bitmask_len (<MODE>mode, INTVAL (operands[2]));
+      operands[2] = GEN_INT (len);
+      return "<d>ext\t%0,%1,0,%2";
+    case 6:
+      return "#";
+    case 7:
+    case 8:
+      return "and\t%0,%1,%2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "move_type" "load,load,load,andi,andi,ext_ins,shift_shift,logical,logical")
+   (set_attr "compression" "*,*,*,micromips,*,*,*,micromips,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*and<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d,d")
+	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "%W,W,W,d,0")
+		 (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,Yw,d")))]
+  "TARGET_MIPS16 && and_operands_ok (<MODE>mode, operands[1], operands[2])"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      operands[1] = gen_lowpart (QImode, operands[1]);
+      return "lbu\t%0,%1";
+    case 1:
+      operands[1] = gen_lowpart (HImode, operands[1]);
+      return "lhu\t%0,%1";
+    case 2:
+      operands[1] = gen_lowpart (SImode, operands[1]);
+      return "lwu\t%0,%1";
+    case 3:
+      return "#";
+    case 4:
+      return "and\t%0,%2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "move_type" "load,load,load,shift_shift,logical")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(ior:GPR (match_operand:GPR 1 "register_operand")
+		 (match_operand:GPR 2 "uns_arith_operand")))]
+  ""
+{
+  if (TARGET_MIPS16)
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+})
+
+(define_insn "*ior<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d,d")
+	(ior:GPR (match_operand:GPR 1 "register_operand" "%0,d,d")
+		 (match_operand:GPR 2 "uns_arith_operand" "!u,d,K")))]
+  "!TARGET_MIPS16"
+  "@
+   or\t%0,%1,%2
+   or\t%0,%1,%2
+   ori\t%0,%1,%x2"
+  [(set_attr "alu_type" "or")
+   (set_attr "compression" "micromips,*,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ior<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(ior:GPR (match_operand:GPR 1 "register_operand" "%0")
+		 (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_MIPS16"
+  "or\t%0,%2"
+  [(set_attr "alu_type" "or")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(xor:GPR (match_operand:GPR 1 "register_operand")
+		 (match_operand:GPR 2 "uns_arith_operand")))]
+  ""
+  "")
+
+(define_insn "*xor<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d,d")
+	(xor:GPR (match_operand:GPR 1 "register_operand" "%0,d,d")
+		 (match_operand:GPR 2 "uns_arith_operand" "!u,d,K")))]
+  "!TARGET_MIPS16"
+  "@
+   xor\t%0,%1,%2
+   xor\t%0,%1,%2
+   xori\t%0,%1,%x2"
+  [(set_attr "alu_type" "xor")
+   (set_attr "compression" "micromips,*,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*xor<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d,t,t,t")
+	(xor:GPR (match_operand:GPR 1 "register_operand" "%0,d,d,d")
+		 (match_operand:GPR 2 "uns_arith_operand" "d,Uub8,K,d")))]
+  "TARGET_MIPS16"
+  "@
+   xor\t%0,%2
+   cmpi\t%1,%2
+   cmpi\t%1,%2
+   cmp\t%1,%2"
+  [(set_attr "alu_type" "xor")
+   (set_attr "mode" "<MODE>")
+   (set_attr "extended_mips16" "no,no,yes,no")])
+
+(define_insn "*nor<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "d"))
+		 (not:GPR (match_operand:GPR 2 "register_operand" "d"))))]
+  "!TARGET_MIPS16"
+  "nor\t%0,%1,%2"
+  [(set_attr "alu_type" "nor")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	TRUNCATION
+;;
+;;  ....................
+
+
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "cvt.s.d\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "cnv_mode"	"D2S")   
+   (set_attr "mode"	"SF")])
+
+;; Integer truncation patterns.  Truncating SImode values to smaller
+;; modes is a no-op, as it is for most other GCC ports.  Truncating
+;; DImode values to SImode is not a no-op for TARGET_64BIT since we
+;; need to make sure that the lower 32 bits are properly sign-extended
+;; (see TRULY_NOOP_TRUNCATION).  Truncating DImode values into modes
+;; smaller than SImode is equivalent to two separate truncations:
+;;
+;;                        A       B
+;;    DI ---> HI  ==  DI ---> SI ---> HI
+;;    DI ---> QI  ==  DI ---> SI ---> QI
+;;
+;; Step A needs a real instruction but step B does not.
+
+(define_insn "truncdi<mode>2"
+  [(set (match_operand:SUBDI 0 "nonimmediate_operand" "=d,m")
+        (truncate:SUBDI (match_operand:DI 1 "register_operand" "d,d")))]
+  "TARGET_64BIT"
+  "@
+    sll\t%0,%1,0
+    <store>\t%1,%0"
+  [(set_attr "move_type" "sll0,store")
+   (set_attr "mode" "SI")])
+
+;; Combiner patterns to optimize shift/truncate combinations.
+
+(define_insn "*ashr_trunc<mode>"
+  [(set (match_operand:SUBDI 0 "register_operand" "=d")
+        (truncate:SUBDI
+	  (ashiftrt:DI (match_operand:DI 1 "register_operand" "d")
+		       (match_operand:DI 2 "const_arith_operand" ""))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && IN_RANGE (INTVAL (operands[2]), 32, 63)"
+  "dsra\t%0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*lshr32_trunc<mode>"
+  [(set (match_operand:SUBDI 0 "register_operand" "=d")
+        (truncate:SUBDI
+	  (lshiftrt:DI (match_operand:DI 1 "register_operand" "d")
+		       (const_int 32))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "dsra\t%0,%1,32"
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+;; Logical shift by more than 32 results in proper SI values so truncation is
+;; removed by the middle end.  Note that a logical shift by 32 is handled by
+;; the previous pattern.
+(define_insn "*<optab>_trunc<mode>_exts"
+  [(set (match_operand:SUBDI 0 "register_operand" "=d")
+        (truncate:SUBDI
+	 (any_shiftrt:DI (match_operand:DI 1 "register_operand" "d")
+			 (match_operand:DI 2 "const_arith_operand" ""))))]
+  "ISA_HAS_EXTS && TARGET_64BIT && UINTVAL (operands[2]) < 32"
+  "exts\t%0,%1,%2,31"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	ZERO EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT")
+
+(define_insn_and_split "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,W")))]
+  "TARGET_64BIT && !ISA_HAS_EXT_INS"
+  "@
+   #
+   lwu\t%0,%1"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 0)
+        (ashift:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 0)
+        (lshiftrt:DI (match_dup 0) (const_int 32)))]
+  { operands[1] = gen_lowpart (DImode, operands[1]); }
+  [(set_attr "move_type" "shift_shift,load")
+   (set_attr "mode" "DI")])
+
+(define_insn "*zero_extendsidi2_dext"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,W")))]
+  "TARGET_64BIT && ISA_HAS_EXT_INS"
+  "@
+   dext\t%0,%1,0,32
+   lwu\t%0,%1"
+  [(set_attr "move_type" "arith,load")
+   (set_attr "mode" "DI")])
+
+;; See the comment before the *and<mode>3 pattern why this is generated by
+;; combine.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+        (and:DI (match_operand:DI 1 "register_operand")
+		(const_int 4294967295)))]
+  "TARGET_64BIT && !ISA_HAS_EXT_INS && reload_completed"
+  [(set (match_dup 0)
+        (ashift:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 0)
+        (lshiftrt:DI (match_dup 0) (const_int 32)))])
+
+(define_expand "zero_extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand")
+        (zero_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_MIPS16 && !GENERATE_MIPS16E
+      && !memory_operand (operands[1], <SHORT:MODE>mode))
+    {
+      emit_insn (gen_and<GPR:mode>3 (operands[0],
+				     gen_lowpart (<GPR:MODE>mode, operands[1]),
+				     force_reg (<GPR:MODE>mode,
+						GEN_INT (<SHORT:mask>))));
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d,d")
+        (zero_extend:GPR
+	     (match_operand:SHORT 1 "nonimmediate_operand" "!u,d,m")))]
+  "!TARGET_MIPS16"
+  "@
+   andi\t%0,%1,<SHORT:mask>
+   andi\t%0,%1,<SHORT:mask>
+   l<SHORT:size>u\t%0,%1"
+  [(set_attr "move_type" "andi,andi,load")
+   (set_attr "compression" "micromips,*,*")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*zero_extend<SHORT:mode><GPR:mode>2_mips16e"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR (match_operand:SHORT 1 "register_operand" "0")))]
+  "GENERATE_MIPS16E"
+  "ze<SHORT:size>\t%0"
+  ;; This instruction is effectively a special encoding of ANDI.
+  [(set_attr "move_type" "andi")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*zero_extend<SHORT:mode><GPR:mode>2_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR (match_operand:SHORT 1 "memory_operand" "m")))]
+  "TARGET_MIPS16"
+  "l<SHORT:size>u\t%0,%1"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_MIPS16 && !memory_operand (operands[1], QImode))
+    {
+      emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				       operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "!TARGET_MIPS16"
+  "@
+   andi\t%0,%1,0x00ff
+   lbu\t%0,%1"
+  [(set_attr "move_type" "andi,load")
+   (set_attr "mode" "HI")])
+
+(define_insn "*zero_extendqihi2_mips16"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (zero_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
+  "TARGET_MIPS16"
+  "lbu\t%0,%1"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "HI")])
+
+;; Combiner patterns to optimize truncate/zero_extend combinations.
+
+(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+{
+  operands[2] = GEN_INT (GET_MODE_MASK (<SHORT:MODE>mode));
+  return "andi\t%0,%1,%x2";
+}
+  [(set_attr "alu_type" "and")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*zero_extendhi_truncqi"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (zero_extend:HI
+	    (truncate:QI (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "andi\t%0,%1,0xff"
+  [(set_attr "alu_type" "and")
+   (set_attr "mode" "HI")])
+
+;;
+;;  ....................
+;;
+;;	SIGN EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+;; Those for integer source operand are ordered widest source type first.
+
+;; When TARGET_64BIT, all SImode integer and accumulator registers
+;; should already be in sign-extended form (see TRULY_NOOP_TRUNCATION
+;; and truncdisi2).  We can therefore get rid of register->register
+;; instructions if we constrain the source to be in the same register as
+;; the destination.
+;;
+;; Only the pre-reload scheduler sees the type of the register alternatives;
+;; we split them into nothing before the post-reload scheduler runs.
+;; These alternatives therefore have type "move" in order to reflect
+;; what happens if the two pre-reload operands cannot be tied, and are
+;; instead allocated two separate GPRs.  We don't distinguish between
+;; the GPR and LO cases because we don't usually know during pre-reload
+;; scheduling whether an operand will be LO or not.
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,l,d")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,0,m")))]
+  "TARGET_64BIT"
+  "@
+   #
+   #
+   lw\t%0,%1"
+  "&& reload_completed && register_operand (operands[1], VOIDmode)"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr "move_type" "move,move,load")
+   (set_attr "mode" "DI")])
+
+(define_expand "extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand")
+        (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))]
+  "")
+
+(define_insn "*extend<SHORT:mode><GPR:mode>2_mips16e"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand" "0,m")))]
+  "GENERATE_MIPS16E"
+  "@
+   se<SHORT:size>\t%0
+   l<SHORT:size>\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn_and_split "*extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR
+	     (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))]
+  "!ISA_HAS_SEB_SEH && !GENERATE_MIPS16E"
+  "@
+   #
+   l<SHORT:size>\t%0,%1"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 0) (ashift:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ashiftrt:GPR (match_dup 0) (match_dup 2)))]
+{
+  operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode)
+			 - GET_MODE_BITSIZE (<SHORT:MODE>mode));
+}
+  [(set_attr "move_type" "shift_shift,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*extend<SHORT:mode><GPR:mode>2_se<SHORT:size>"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR
+	     (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))]
+  "ISA_HAS_SEB_SEH"
+  "@
+   se<SHORT:size>\t%0,%1
+   l<SHORT:size>\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand")
+        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
+  "")
+
+(define_insn "*extendqihi2_mips16e"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,m")))]
+  "GENERATE_MIPS16E"
+  "@
+   seb\t%0
+   lb\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (sign_extend:HI
+	     (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "!ISA_HAS_SEB_SEH && !GENERATE_MIPS16E"
+  "@
+   #
+   lb\t%0,%1"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 2)))]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (SImode)
+			 - GET_MODE_BITSIZE (QImode));
+}
+  [(set_attr "move_type" "shift_shift,load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendqihi2_seb"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (sign_extend:HI
+	     (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "ISA_HAS_SEB_SEH"
+  "@
+   seb\t%0,%1
+   lb\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "SI")])
+
+;; Combiner patterns for truncate/sign_extend combinations.  The SI versions
+;; use the shift/truncate patterns.
+
+(define_insn_and_split "*extenddi_truncate<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && !ISA_HAS_EXTS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ashift:DI (match_dup 1)
+		   (match_dup 3)))
+   (set (match_dup 0)
+	(ashiftrt:DI (match_dup 2)
+		     (match_dup 3)))]
+{
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (<MODE>mode));
+}
+  [(set_attr "move_type" "shift_shift")
+   (set_attr "mode" "DI")])
+
+(define_insn_and_split "*extendsi_truncate<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && !ISA_HAS_EXTS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ashift:DI (match_dup 1)
+		   (match_dup 3)))
+   (set (match_dup 0)
+	(truncate:SI (ashiftrt:DI (match_dup 2)
+				  (match_dup 3))))]
+{
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (<MODE>mode));
+}
+  [(set_attr "move_type" "shift_shift")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*extendhi_truncateqi"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sign_extend:HI
+	    (truncate:QI (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && !ISA_HAS_EXTS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ashift:DI (match_dup 1)
+		   (const_int 56)))
+   (set (match_dup 0)
+	(truncate:HI (ashiftrt:DI (match_dup 2)
+				  (const_int 56))))]
+{
+  operands[2] = gen_lowpart (DImode, operands[0]);
+}
+  [(set_attr "move_type" "shift_shift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extend<GPR:mode>_truncate<SHORT:mode>_exts"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(sign_extend:GPR
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && ISA_HAS_EXTS"
+{
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<SHORT:MODE>mode));
+  return "exts\t%0,%1,0,%m2";
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*extendhi_truncateqi_exts"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sign_extend:HI
+	    (truncate:QI (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && ISA_HAS_EXTS"
+  "exts\t%0,%1,0,7"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "cvt.d.s\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "cnv_mode"	"S2D")   
+   (set_attr "mode"	"DF")])
+
+;;
+;;  ....................
+;;
+;;	CONVERSIONS
+;;
+;;  ....................
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(fix:SI (match_operand:DF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+{
+  if (!ISA_HAS_TRUNC_W)
+    {
+      emit_insn (gen_fix_truncdfsi2_macro (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "fix_truncdfsi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && ISA_HAS_TRUNC_W"
+  "trunc.w.d %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"D2I")])
+
+(define_insn "fix_truncdfsi2_macro"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:DF 1 "register_operand" "f")))
+   (clobber (match_scratch:DF 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !ISA_HAS_TRUNC_W"
+{
+  if (mips_nomacro.nesting_level > 0)
+    return ".set\tmacro\;trunc.w.d %0,%1,%2\;.set\tnomacro";
+  else
+    return "trunc.w.d %0,%1,%2";
+}
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"D2I")
+   (set_attr "insn_count" "9")])
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(fix:SI (match_operand:SF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT"
+{
+  if (!ISA_HAS_TRUNC_W)
+    {
+      emit_insn (gen_fix_truncsfsi2_macro (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "fix_truncsfsi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && ISA_HAS_TRUNC_W"
+  "trunc.w.s %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"S2I")])
+
+(define_insn "fix_truncsfsi2_macro"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))
+   (clobber (match_scratch:SF 2 "=d"))]
+  "TARGET_HARD_FLOAT && !ISA_HAS_TRUNC_W"
+{
+  if (mips_nomacro.nesting_level > 0)
+    return ".set\tmacro\;trunc.w.s %0,%1,%2\;.set\tnomacro";
+  else
+    return "trunc.w.s %0,%1,%2";
+}
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"S2I")
+   (set_attr "insn_count" "9")])
+
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "trunc.l.d %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"D2I")])
+
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "trunc.l.s %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"S2I")])
+
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "cvt.d.w\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"I2D")])
+
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "cvt.d.l\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"I2D")])
+
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "cvt.s.w\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"I2S")])
+
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "cvt.s.l\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"I2S")])
+
+
+(define_expand "fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(unsigned_fix:SI (match_operand:DF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (DFmode);
+  rtx reg2 = gen_reg_rtx (DFmode);
+  rtx reg3 = gen_reg_rtx (SImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 31, DFmode);
+
+  if (reg1)			/* Turn off complaints about unreached code.  */
+    {
+      mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, DFmode));
+      do_pending_stack_adjust ();
+
+      test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+      emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
+
+      emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1]));
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				   gen_rtx_LABEL_REF (VOIDmode, label2)));
+      emit_barrier ();
+
+      emit_label (label1);
+      mips_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
+      mips_emit_move (reg3, GEN_INT (trunc_int_for_mode
+				     (BITMASK_HIGH, SImode)));
+
+      emit_insn (gen_fix_truncdfsi2 (operands[0], reg2));
+      emit_insn (gen_iorsi3 (operands[0], operands[0], reg3));
+
+      emit_label (label2);
+
+      /* Allow REG_NOTES to be set on last insn (labels don't have enough
+	 fields, and can't be used for REG_NOTES anyway).  */
+      emit_use (stack_pointer_rtx);
+      DONE;
+    }
+})
+
+
+(define_expand "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(unsigned_fix:DI (match_operand:DF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (DFmode);
+  rtx reg2 = gen_reg_rtx (DFmode);
+  rtx reg3 = gen_reg_rtx (DImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 63, DFmode);
+
+  mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, DFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  mips_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
+  mips_emit_move (reg3, GEN_INT (BITMASK_HIGH));
+  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], reg2));
+  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+})
+
+
+(define_expand "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (SFmode);
+  rtx reg2 = gen_reg_rtx (SFmode);
+  rtx reg3 = gen_reg_rtx (SImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 31, SFmode);
+
+  mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, SFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  mips_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1));
+  mips_emit_move (reg3, GEN_INT (trunc_int_for_mode
+				 (BITMASK_HIGH, SImode)));
+
+  emit_insn (gen_fix_truncsfsi2 (operands[0], reg2));
+  emit_insn (gen_iorsi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+})
+
+
+(define_expand "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(unsigned_fix:DI (match_operand:SF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (SFmode);
+  rtx reg2 = gen_reg_rtx (SFmode);
+  rtx reg3 = gen_reg_rtx (DImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 63, SFmode);
+
+  mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, SFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncsfdi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  mips_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1));
+  mips_emit_move (reg3, GEN_INT (BITMASK_HIGH));
+  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
+
+  emit_insn (gen_fix_truncsfdi2 (operands[0], reg2));
+  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	DATA MOVEMENT
+;;
+;;  ....................
+
+;; Bit field extract patterns which use lwl/lwr or ldl/ldr.
+
+(define_expand "extvmisalign<mode>"
+  [(set (match_operand:GPR 0 "register_operand")
+	(sign_extract:GPR (match_operand:BLK 1 "memory_operand")
+			  (match_operand 2 "const_int_operand")
+			  (match_operand 3 "const_int_operand")))]
+  "!TARGET_MIPS16"
+{
+  if (mips_expand_ext_as_unaligned_load (operands[0], operands[1],
+					 INTVAL (operands[2]),
+					 INTVAL (operands[3]),
+					 /*unsigned=*/ false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "extv<mode>"
+  [(set (match_operand:GPR 0 "register_operand")
+	(sign_extract:GPR (match_operand:GPR 1 "register_operand")
+			  (match_operand 2 "const_int_operand")
+			  (match_operand 3 "const_int_operand")))]
+  "ISA_HAS_EXTS"
+{
+  if (UINTVAL (operands[2]) > 32)
+    FAIL;
+})
+
+(define_insn "*extv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (sign_extract:GPR (match_operand:GPR 1 "register_operand" "d")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" "")))]
+  "ISA_HAS_EXTS && UINTVAL (operands[2]) <= 32"
+  "exts\t%0,%1,%3,%m2"
+  [(set_attr "type"     "arith")
+   (set_attr "mode"     "<MODE>")])
+
+(define_expand "extzvmisalign<mode>"
+  [(set (match_operand:GPR 0 "register_operand")
+	(zero_extract:GPR (match_operand:BLK 1 "memory_operand")
+			  (match_operand 2 "const_int_operand")
+			  (match_operand 3 "const_int_operand")))]
+  "!TARGET_MIPS16"
+{
+  if (mips_expand_ext_as_unaligned_load (operands[0], operands[1],
+					 INTVAL (operands[2]),
+					 INTVAL (operands[3]),
+					 /*unsigned=*/ true))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "extzv<mode>"
+  [(set (match_operand:GPR 0 "register_operand")
+	(zero_extract:GPR (match_operand:GPR 1 "register_operand")
+			  (match_operand 2 "const_int_operand")
+			  (match_operand 3 "const_int_operand")))]
+  ""
+{
+  if (!mips_use_ins_ext_p (operands[1], INTVAL (operands[2]),
+			   INTVAL (operands[3])))
+    FAIL;
+})
+
+(define_insn "*extzv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(zero_extract:GPR (match_operand:GPR 1 "register_operand" "d")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" "")))]
+  "mips_use_ins_ext_p (operands[1], INTVAL (operands[2]),
+		       INTVAL (operands[3]))"
+  "<d>ext\t%0,%1,%3,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"<MODE>")])
+
+(define_insn "*extzv_truncsi_exts"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+	 (zero_extract:DI (match_operand:DI 1 "register_operand" "d")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" ""))))]
+  "ISA_HAS_EXTS && TARGET_64BIT && IN_RANGE (INTVAL (operands[2]), 32, 63)"
+  "exts\t%0,%1,%3,31"
+  [(set_attr "type"     "arith")
+   (set_attr "mode"     "SI")])
+
+
+(define_expand "insvmisalign<mode>"
+  [(set (zero_extract:GPR (match_operand:BLK 0 "memory_operand")
+			  (match_operand 1 "const_int_operand")
+			  (match_operand 2 "const_int_operand"))
+	(match_operand:GPR 3 "reg_or_0_operand"))]
+  "!TARGET_MIPS16"
+{
+  if (mips_expand_ins_as_unaligned_store (operands[0], operands[3],
+					  INTVAL (operands[1]),
+					  INTVAL (operands[2])))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "insv<mode>"
+  [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand")
+			  (match_operand 1 "const_int_operand")
+			  (match_operand 2 "const_int_operand"))
+	(match_operand:GPR 3 "reg_or_0_operand"))]
+  ""
+{
+  if (!mips_use_ins_ext_p (operands[0], INTVAL (operands[1]),
+			   INTVAL (operands[2])))
+    FAIL;
+})
+
+(define_insn "*insv<mode>"
+  [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand" "+d")
+			  (match_operand:SI 1 "const_int_operand" "")
+			  (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand:GPR 3 "reg_or_0_operand" "dJ"))]
+  "mips_use_ins_ext_p (operands[0], INTVAL (operands[1]),
+		       INTVAL (operands[2]))"
+  "<d>ins\t%0,%z3,%2,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"<MODE>")])
+
+;; Combiner pattern for cins (clear and insert bit field).  We can
+;; implement mask-and-shift-left operation with this.  Note that if
+;; the upper bit of the mask is set in an SImode operation, the mask
+;; itself will be sign-extended.  mask_low_and_shift_len will
+;; therefore be greater than our threshold of 32.
+
+(define_insn "*cins<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(and:GPR
+	 (ashift:GPR (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "const_int_operand" ""))
+	 (match_operand:GPR 3 "const_int_operand" "")))]
+  "ISA_HAS_CINS
+   && mask_low_and_shift_p (<MODE>mode, operands[3], operands[2], 32)"
+{
+  operands[3] =
+    GEN_INT (mask_low_and_shift_len (<MODE>mode, operands[3], operands[2]));
+  return "cins\t%0,%1,%2,%m3";
+}
+  [(set_attr "type"     "shift")
+   (set_attr "mode"     "<MODE>")])
+
+;; Unaligned word moves generated by the bit field patterns.
+;;
+;; As far as the rtl is concerned, both the left-part and right-part
+;; instructions can access the whole field.  However, the real operand
+;; refers to just the first or the last byte (depending on endianness).
+;; We therefore use two memory operands to each instruction, one to
+;; describe the rtl effect and one to use in the assembly output.
+;;
+;; Operands 0 and 1 are the rtl-level target and source respectively.
+;; This allows us to use the standard length calculations for the "load"
+;; and "store" type attributes.
+
+(define_insn "mov_<load>l"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:BLK 1 "memory_operand" "m")
+		     (match_operand:QI 2 "memory_operand" "ZC")]
+		    UNSPEC_LOAD_LEFT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "<load>l\t%0,%2"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_<load>r"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:BLK 1 "memory_operand" "m")
+		     (match_operand:QI 2 "memory_operand" "ZC")
+		     (match_operand:GPR 3 "register_operand" "0")]
+		    UNSPEC_LOAD_RIGHT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "<load>r\t%0,%2"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_<store>l"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec:BLK [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		     (match_operand:QI 2 "memory_operand" "ZC")]
+		    UNSPEC_STORE_LEFT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
+  "<store>l\t%z1,%2"
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_<store>r"
+  [(set (match_operand:BLK 0 "memory_operand" "+m")
+	(unspec:BLK [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		     (match_operand:QI 2 "memory_operand" "ZC")
+		     (match_dup 0)]
+		    UNSPEC_STORE_RIGHT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
+  "<store>r\t%z1,%2"
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "<MODE>")])
+
+;; An instruction to calculate the high part of a 64-bit SYMBOL_ABSOLUTE.
+;; The required value is:
+;;
+;;	(%highest(op1) << 48) + (%higher(op1) << 32) + (%hi(op1) << 16)
+;;
+;; which translates to:
+;;
+;;	lui	op0,%highest(op1)
+;;	daddiu	op0,op0,%higher(op1)
+;;	dsll	op0,op0,16
+;;	daddiu	op0,op0,%hi(op1)
+;;	dsll	op0,op0,16
+;;
+;; The split is deferred until after flow2 to allow the peephole2 below
+;; to take effect.
+(define_insn_and_split "*lea_high64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(high:DI (match_operand:DI 1 "absolute_symbolic_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && ABI_HAS_64BIT_SYMBOLS"
+  "#"
+  "&& epilogue_completed"
+  [(set (match_dup 0) (high:DI (match_dup 2)))
+   (set (match_dup 0) (lo_sum:DI (match_dup 0) (match_dup 2)))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 16)))
+   (set (match_dup 0) (lo_sum:DI (match_dup 0) (match_dup 3)))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
+  operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID);
+}
+  [(set_attr "insn_count" "5")])
+
+;; Use a scratch register to reduce the latency of the above pattern
+;; on superscalar machines.  The optimized sequence is:
+;;
+;;	lui	op1,%highest(op2)
+;;	lui	op0,%hi(op2)
+;;	daddiu	op1,op1,%higher(op2)
+;;	dsll32	op1,op1,0
+;;	daddu	op1,op1,op0
+(define_peephole2
+  [(set (match_operand:DI 1 "d_operand")
+	(high:DI (match_operand:DI 2 "absolute_symbolic_operand")))
+   (match_scratch:DI 0 "d")]
+  "TARGET_EXPLICIT_RELOCS && ABI_HAS_64BIT_SYMBOLS"
+  [(set (match_dup 1) (high:DI (match_dup 3)))
+   (set (match_dup 0) (high:DI (match_dup 4)))
+   (set (match_dup 1) (lo_sum:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 1) (ashift:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 0)))]
+{
+  operands[3] = mips_unspec_address (operands[2], SYMBOL_64_HIGH);
+  operands[4] = mips_unspec_address (operands[2], SYMBOL_64_LOW);
+})
+
+;; On most targets, the expansion of (lo_sum (high X) X) for a 64-bit
+;; SYMBOL_ABSOLUTE X will take 6 cycles.  This next pattern allows combine
+;; to merge the HIGH and LO_SUM parts of a move if the HIGH part is only
+;; used once.  We can then use the sequence:
+;;
+;;	lui	op0,%highest(op1)
+;;	lui	op2,%hi(op1)
+;;	daddiu	op0,op0,%higher(op1)
+;;	daddiu	op2,op2,%lo(op1)
+;;	dsll32	op0,op0,0
+;;	daddu	op0,op0,op2
+;;
+;; which takes 4 cycles on most superscalar targets.
+(define_insn_and_split "*lea64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(match_operand:DI 1 "absolute_symbolic_operand" ""))
+   (clobber (match_scratch:DI 2 "=&d"))]
+  "TARGET_EXPLICIT_RELOCS && ABI_HAS_64BIT_SYMBOLS && cse_not_expected"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (high:DI (match_dup 3)))
+   (set (match_dup 2) (high:DI (match_dup 4)))
+   (set (match_dup 0) (lo_sum:DI (match_dup 0) (match_dup 3)))
+   (set (match_dup 2) (lo_sum:DI (match_dup 2) (match_dup 4)))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))]
+{
+  operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
+  operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW);
+}
+  [(set_attr "insn_count" "6")])
+
+;; Split HIGHs into:
+;;
+;;	li op0,%hi(sym)
+;;	sll op0,16
+;;
+;; on MIPS16 targets.
+(define_split
+  [(set (match_operand:P 0 "d_operand")
+	(high:P (match_operand:P 1 "symbolic_operand_with_high")))]
+  "TARGET_MIPS16 && reload_completed"
+  [(set (match_dup 0) (unspec:P [(match_dup 1)] UNSPEC_UNSHIFTED_HIGH))
+   (set (match_dup 0) (ashift:P (match_dup 0) (const_int 16)))])
+
+(define_insn "*unshifted_high"
+  [(set (match_operand:P 0 "d_operand" "=d")
+	(unspec:P [(match_operand:P 1 "symbolic_operand_with_high")]
+		  UNSPEC_UNSHIFTED_HIGH))]
+  ""
+  "li\t%0,%h1"
+  [(set_attr "extended_mips16" "yes")])
+
+;; Insns to fetch a symbol from a big GOT.
+
+(define_insn_and_split "*xgot_hi<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(high:P (match_operand:P 1 "got_disp_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_XGOT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (high:P (match_dup 2)))
+   (set (match_dup 0) (plus:P (match_dup 0) (match_dup 3)))]
+{
+  operands[2] = mips_unspec_address (operands[1], SYMBOL_GOTOFF_DISP);
+  operands[3] = pic_offset_table_rtx;
+}
+  [(set_attr "got" "xgot_high")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*xgot_lo<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(lo_sum:P (match_operand:P 1 "register_operand" "d")
+		  (match_operand:P 2 "got_disp_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_XGOT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:P [(match_dup 1) (match_dup 3)] UNSPEC_LOAD_GOT))]
+  { operands[3] = mips_unspec_address (operands[2], SYMBOL_GOTOFF_DISP); }
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Insns to fetch a symbol from a normal GOT.
+
+(define_insn_and_split "*got_disp<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(match_operand:P 1 "got_disp_operand" ""))]
+  "TARGET_EXPLICIT_RELOCS && !mips_split_p[SYMBOL_GOT_DISP]"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 2))]
+  { operands[2] = mips_got_load (NULL, operands[1], SYMBOL_GOTOFF_DISP); }
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Insns for loading the "page" part of a page/ofst address from the GOT.
+
+(define_insn_and_split "*got_page<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(high:P (match_operand:P 1 "got_page_ofst_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && !mips_split_hi_p[SYMBOL_GOT_PAGE_OFST]"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 2))]
+  { operands[2] = mips_got_load (NULL, operands[1], SYMBOL_GOTOFF_PAGE); }
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Convenience expander that generates the rhs of a load_got<mode> insn.
+(define_expand "unspec_got_<mode>"
+  [(unspec:P [(match_operand:P 0)
+	      (match_operand:P 1)] UNSPEC_LOAD_GOT)])
+
+;; Lower-level instructions for loading an address from the GOT.
+;; We could use MEMs, but an unspec gives more optimization
+;; opportunities.
+
+(define_insn "load_got<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "register_operand" "d")
+		   (match_operand:P 2 "immediate_operand" "")]
+		  UNSPEC_LOAD_GOT))]
+  ""
+  "<load>\t%0,%R2(%1)"
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Instructions for adding the low 16 bits of an address to a register.
+;; Operand 2 is the address: mips_print_operand works out which relocation
+;; should be applied.
+
+(define_insn "*low<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(lo_sum:P (match_operand:P 1 "register_operand" "d")
+		  (match_operand:P 2 "immediate_operand" "")))]
+  "!TARGET_MIPS16"
+  "<d>addiu\t%0,%1,%R2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*low<mode>_mips16"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(lo_sum:P (match_operand:P 1 "register_operand" "0")
+		  (match_operand:P 2 "immediate_operand" "")))]
+  "TARGET_MIPS16"
+  "<d>addiu\t%0,%R2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")
+   (set_attr "extended_mips16" "yes")])
+
+;; Expose MIPS16 uses of the global pointer after reload if the function
+;; is responsible for setting up the register itself.
+(define_split
+  [(set (match_operand:GPR 0 "d_operand")
+	(const:GPR (unspec:GPR [(const_int 0)] UNSPEC_GP)))]
+  "TARGET_MIPS16 && TARGET_USE_GOT && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  { operands[1] = pic_offset_table_rtx; })
+
+;; Allow combine to split complex const_int load sequences, using operand 2
+;; to store the intermediate results.  See move_operand for details.
+(define_split
+  [(set (match_operand:GPR 0 "register_operand")
+	(match_operand:GPR 1 "splittable_const_int_operand"))
+   (clobber (match_operand:GPR 2 "register_operand"))]
+  ""
+  [(const_int 0)]
+{
+  mips_move_integer (operands[2], operands[0], INTVAL (operands[1]));
+  DONE;
+})
+
+;; Likewise, for symbolic operands.
+(define_split
+  [(set (match_operand:P 0 "register_operand")
+	(match_operand:P 1))
+   (clobber (match_operand:P 2 "register_operand"))]
+  "mips_split_symbol (operands[2], operands[1], MAX_MACHINE_MODE, NULL)"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  mips_split_symbol (operands[2], operands[1],
+		     MAX_MACHINE_MODE, &operands[3]);
+})
+
+;; 64-bit integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "")
+	(match_operand:DI 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (DImode, operands[0], operands[1]))
+    DONE;
+})
+
+;; For mips16, we need a special case to handle storing $31 into
+;; memory, since we don't have a constraint to match $31.  This
+;; instruction can be generated by save_restore_insns.
+
+(define_insn "*mov<mode>_ra"
+  [(set (match_operand:GPR 0 "stack_operand" "=m")
+	(reg:GPR RETURN_ADDR_REGNUM))]
+  "TARGET_MIPS16"
+  "<store>\t$31,%0"
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movdi_32bit"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,m,*a,*a,*d,*f,*f,*d,*m,*B*C*D,*B*C*D,*d,*m")
+	(match_operand:DI 1 "move_operand" "d,i,m,d,*J,*d,*a,*J*d,*m,*f,*f,*d,*m,*B*C*D,*B*C*D"))]
+  "!TARGET_64BIT && !TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,load,store,imul,mtlo,mflo,mtc,fpload,mfc,fpstore,mtc,fpload,mfc,fpstore")
+   (set (attr "mode")
+   	(if_then_else (eq_attr "move_type" "imul")
+		      (const_string "SI")
+		      (const_string "DI")))])
+
+(define_insn "*movdi_32bit_mips16"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:DI 1 "move_operand" "d,d,y,K,N,m,d,*x"))]
+  "!TARGET_64BIT && TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mflo")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movdi_64bit"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,e,d,m,*f,*f,*d,*m,*a,*d,*B*C*D,*B*C*D,*d,*m")
+	(match_operand:DI 1 "move_operand" "d,Yd,Yf,m,dJ,*d*J,*m,*f,*f,*J*d,*a,*d,*m,*B*C*D,*B*C*D"))]
+  "TARGET_64BIT && !TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,const,load,store,mtc,fpload,mfc,fpstore,mtlo,mflo,mtc,fpload,mfc,fpstore")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movdi_64bit_mips16"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,y,d,d,d,d,d,d,m,*d")
+	(match_operand:DI 1 "move_operand" "d,d,y,K,N,Yd,kf,m,d,*a"))]
+  "TARGET_64BIT && TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,const,loadpool,load,store,mflo")
+   (set_attr "mode" "DI")])
+
+;; On the mips16, we can split ld $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(mem:DI (plus:DI (match_dup 0)
+			 (match_operand:DI 1 "const_int_operand"))))]
+  "TARGET_64BIT && TARGET_MIPS16 && reload_completed
+   && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x10)
+       || (INTVAL (operands[1]) >= 32 * 8
+	   && INTVAL (operands[1]) <= 31 * 8 + 0x8)
+       || (INTVAL (operands[1]) >= 0
+	   && INTVAL (operands[1]) < 32 * 8
+	   && (INTVAL (operands[1]) & 7) != 0))"
+  [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:DI (plus:DI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else if (val >= 32 * 8)
+    {
+      int off = val & 7;
+
+      operands[1] = GEN_INT (0x8 + off);
+      operands[2] = GEN_INT (val - off - 0x8);
+    }
+  else
+    {
+      int off = val & 7;
+
+      operands[1] = GEN_INT (off);
+      operands[2] = GEN_INT (val - off);
+    }
+})
+
+;; 32-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:IMOVE32 0 "")
+	(match_operand:IMOVE32 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=d,!u,!u,d,e,!u,!ks,d,ZS,ZT,m,*f,*f,*d,*m,*d,*z,*a,*d,*B*C*D,*B*C*D,*d,*m")
+	(match_operand:IMOVE32 1 "move_operand" "d,J,Udb7,Yd,Yf,ZT,ZS,m,!ks,!u,dJ,*d*J,*m,*f,*f,*z,*d,*J*d,*a,*d,*m,*B*C*D,*B*C*D"))]
+  "!TARGET_MIPS16
+   && (register_operand (operands[0], <MODE>mode)
+       || reg_or_0_operand (operands[1], <MODE>mode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,const,const,const,load,load,load,store,store,store,mtc,fpload,mfc,fpstore,mfc,mtc,mtlo,mflo,mtc,fpload,mfc,fpstore")
+   (set_attr "compression" "all,micromips,micromips,*,*,micromips,micromips,*,micromips,micromips,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "mode" "SI")])
+
+(define_insn "*mov<mode>_mips16"
+  [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=d,y,d,d,d,d,d,d,m,*d")
+	(match_operand:IMOVE32 1 "move_operand" "d,d,y,K,N,Yd,kf,m,d,*a"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,const,loadpool,load,store,mflo")
+   (set_attr "mode" "SI")])
+
+;; On the mips16, we can split lw $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(mem:SI (plus:SI (match_dup 0)
+			 (match_operand:SI 1 "const_int_operand"))))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x80)
+       || (INTVAL (operands[1]) >= 32 * 4
+	   && INTVAL (operands[1]) <= 31 * 4 + 0x7c)
+       || (INTVAL (operands[1]) >= 0
+	   && INTVAL (operands[1]) < 32 * 4
+	   && (INTVAL (operands[1]) & 3) != 0))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:SI (plus:SI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else if (val >= 32 * 4)
+    {
+      int off = val & 3;
+
+      operands[1] = GEN_INT (0x7c + off);
+      operands[2] = GEN_INT (val - off - 0x7c);
+    }
+  else
+    {
+      int off = val & 3;
+
+      operands[1] = GEN_INT (off);
+      operands[2] = GEN_INT (val - off);
+    }
+})
+
+;; On the mips16, we can split a load of certain constants into a load
+;; and an add.  This turns a 4 byte instruction into 2 2 byte
+;; instructions.
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(match_operand:SI 1 "const_int_operand"))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && INTVAL (operands[1]) >= 0x100
+   && INTVAL (operands[1]) <= 0xff + 0x7f"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+{
+  int val = INTVAL (operands[1]);
+
+  operands[1] = GEN_INT (0xff);
+  operands[2] = GEN_INT (val - 0xff);
+})
+
+;; MIPS4 supports loading and storing a floating point register from
+;; the sum of two general registers.  We use two versions for each of
+;; these four instructions: one where the two general registers are
+;; SImode, and one where they are DImode.  This is because general
+;; registers will be in SImode when they hold 32-bit values, but,
+;; since the 32-bit values are always sign extended, the [ls][wd]xc1
+;; instructions will still work correctly.
+
+;; ??? Perhaps it would be better to support these instructions by
+;; modifying TARGET_LEGITIMATE_ADDRESS_P and friends.  However, since
+;; these instructions can only be used to load and store floating
+;; point registers, that would probably cause trouble in reload.
+
+(define_insn "*<ANYF:loadx>_<P:mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d")
+			  (match_operand:P 2 "register_operand" "d"))))]
+  "ISA_HAS_LXC1_SXC1"
+  "<ANYF:loadx>\t%0,%1(%2)"
+  [(set_attr "type" "fpidxload")
+   (set_attr "mode" "<ANYF:UNITMODE>")])
+
+(define_insn "*<ANYF:storex>_<P:mode>"
+  [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d")
+			  (match_operand:P 2 "register_operand" "d")))
+	(match_operand:ANYF 0 "register_operand" "f"))]
+  "ISA_HAS_LXC1_SXC1"
+  "<ANYF:storex>\t%0,%1(%2)"
+  [(set_attr "type" "fpidxstore")
+   (set_attr "mode" "<ANYF:UNITMODE>")])
+
+;; Scaled indexed address load.
+;; Per md.texi, we only need to look for a pattern with multiply in the
+;; address expression, not shift.
+
+(define_insn "*lwxs"
+  [(set (match_operand:IMOVE32 0 "register_operand" "=d")
+	(mem:IMOVE32
+	  (plus:P (mult:P (match_operand:P 1 "register_operand" "d")
+			  (const_int 4))
+		  (match_operand:P 2 "register_operand" "d"))))]
+  "ISA_HAS_LWXS"
+  "lwxs\t%0,%1(%2)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")])
+
+;; 16-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND.
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "")
+	(match_operand:HI 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (HImode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,!u,d,!u,d,ZU,m,*a,*d")
+	(match_operand:HI 1 "move_operand"         "d,J,I,ZU,m,!u,dJ,*d*J,*a"))]
+  "!TARGET_MIPS16
+   && (register_operand (operands[0], HImode)
+       || reg_or_0_operand (operands[1], HImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,const,load,load,store,store,mtlo,mflo")
+   (set_attr "compression" "all,micromips,*,micromips,*,micromips,*,*,*")
+   (set_attr "mode" "HI")])
+
+(define_insn "*movhi_mips16"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:HI 1 "move_operand"         "d,d,y,K,N,m,d,*a"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mflo")
+   (set_attr "mode" "HI")])
+
+;; On the mips16, we can split lh $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:HI 0 "d_operand")
+	(mem:HI (plus:SI (match_dup 0)
+			 (match_operand:SI 1 "const_int_operand"))))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x80)
+       || (INTVAL (operands[1]) >= 32 * 2
+	   && INTVAL (operands[1]) <= 31 * 2 + 0x7e)
+       || (INTVAL (operands[1]) >= 0
+	   && INTVAL (operands[1]) < 32 * 2
+	   && (INTVAL (operands[1]) & 1) != 0))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:HI (plus:SI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else if (val >= 32 * 2)
+    {
+      int off = val & 1;
+
+      operands[1] = GEN_INT (0x7e + off);
+      operands[2] = GEN_INT (val - off - 0x7e);
+    }
+  else
+    {
+      int off = val & 1;
+
+      operands[1] = GEN_INT (off);
+      operands[2] = GEN_INT (val - off);
+    }
+})
+
+;; 8-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "")
+	(match_operand:QI 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (QImode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,!u,d,!u,d,ZV,m,*a,*d")
+	(match_operand:QI 1 "move_operand"         "d,J,I,ZW,m,!u,dJ,*d*J,*a"))]
+  "!TARGET_MIPS16
+   && (register_operand (operands[0], QImode)
+       || reg_or_0_operand (operands[1], QImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,const,load,load,store,store,mtlo,mflo")
+   (set_attr "compression" "all,micromips,*,micromips,*,micromips,*,*,*")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movqi_mips16"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:QI 1 "move_operand"         "d,d,y,K,N,m,d,*a"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mflo")
+   (set_attr "mode" "QI")])
+
+;; On the mips16, we can split lb $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:QI 0 "d_operand")
+	(mem:QI (plus:SI (match_dup 0)
+			 (match_operand:SI 1 "const_int_operand"))))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x80)
+       || (INTVAL (operands[1]) >= 32
+	   && INTVAL (operands[1]) <= 31 + 0x7f))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:QI (plus:SI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else
+    {
+      operands[1] = GEN_INT (0x7f);
+      operands[2] = GEN_INT (val - 0x7f);
+    }
+})
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "")
+	(match_operand:SF 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (SFmode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movsf_hardfloat"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m")
+	(match_operand:SF 1 "move_operand" "f,G,m,f,G,*d,*f,*G*d,*m,*d"))]
+  "TARGET_HARD_FLOAT
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
+   (set_attr "mode" "SF")])
+
+(define_insn "*movsf_softfloat"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,m")
+	(match_operand:SF 1 "move_operand" "Gd,m,d"))]
+  "TARGET_SOFT_FLOAT && !TARGET_MIPS16
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,load,store")
+   (set_attr "mode" "SF")])
+
+(define_insn "*movsf_mips16"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,y,d,d,m")
+	(match_operand:SF 1 "move_operand" "d,d,y,m,d"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,load,store")
+   (set_attr "mode" "SF")])
+
+;; 64-bit floating point moves
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "")
+	(match_operand:DF 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (DFmode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movdf_hardfloat"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m")
+	(match_operand:DF 1 "move_operand" "f,G,m,f,G,*d,*f,*d*G,*m,*d"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
+   (set_attr "mode" "DF")])
+
+(define_insn "*movdf_softfloat"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,m")
+	(match_operand:DF 1 "move_operand" "dG,m,dG"))]
+  "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) && !TARGET_MIPS16
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,load,store")
+   (set_attr "mode" "DF")])
+
+(define_insn "*movdf_mips16"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,y,d,d,m")
+	(match_operand:DF 1 "move_operand" "d,d,y,m,d"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,load,store")
+   (set_attr "mode" "DF")])
+
+;; 128-bit integer moves
+
+(define_expand "movti"
+  [(set (match_operand:TI 0)
+	(match_operand:TI 1))]
+  "TARGET_64BIT"
+{
+  if (mips_legitimize_move (TImode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m,*a,*a,*d")
+	(match_operand:TI 1 "move_operand" "d,i,m,dJ,*J,*d,*a"))]
+  "TARGET_64BIT
+   && !TARGET_MIPS16
+   && (register_operand (operands[0], TImode)
+       || reg_or_0_operand (operands[1], TImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,load,store,imul,mtlo,mflo")
+   (set (attr "mode")
+   	(if_then_else (eq_attr "move_type" "imul")
+		      (const_string "SI")
+		      (const_string "TI")))])
+
+(define_insn "*movti_mips16"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:TI 1 "move_operand" "d,d,y,K,N,m,d,*a"))]
+  "TARGET_64BIT
+   && TARGET_MIPS16
+   && (register_operand (operands[0], TImode)
+       || register_operand (operands[1], TImode))"
+  "#"
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mflo")
+   (set_attr "mode" "TI")])
+
+;; 128-bit floating point moves
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0)
+	(match_operand:TF 1))]
+  "TARGET_64BIT"
+{
+  if (mips_legitimize_move (TFmode, operands[0], operands[1]))
+    DONE;
+})
+
+;; This pattern handles both hard- and soft-float cases.
+(define_insn "*movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=d,d,m,f,d,f,m")
+	(match_operand:TF 1 "move_operand" "dG,m,dG,dG,f,m,f"))]
+  "TARGET_64BIT
+   && !TARGET_MIPS16
+   && (register_operand (operands[0], TFmode)
+       || reg_or_0_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "move_type" "move,load,store,mtc,mfc,fpload,fpstore")
+   (set_attr "mode" "TF")])
+
+(define_insn "*movtf_mips16"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=d,y,d,d,m")
+	(match_operand:TF 1 "move_operand" "d,d,y,m,d"))]
+  "TARGET_64BIT
+   && TARGET_MIPS16
+   && (register_operand (operands[0], TFmode)
+       || register_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "move_type" "move,move,move,load,store")
+   (set_attr "mode" "TF")])
+
+(define_split
+  [(set (match_operand:MOVE64 0 "nonimmediate_operand")
+	(match_operand:MOVE64 1 "move_operand"))]
+  "reload_completed && mips_split_move_insn_p (operands[0], operands[1], insn)"
+  [(const_int 0)]
+{
+  mips_split_move_insn (operands[0], operands[1], curr_insn);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MOVE128 0 "nonimmediate_operand")
+	(match_operand:MOVE128 1 "move_operand"))]
+  "reload_completed && mips_split_move_insn_p (operands[0], operands[1], insn)"
+  [(const_int 0)]
+{
+  mips_split_move_insn (operands[0], operands[1], curr_insn);
+  DONE;
+})
+
+;; When generating mips16 code, split moves of negative constants into
+;; a positive "li" followed by a negation.
+(define_split
+  [(set (match_operand 0 "d_operand")
+	(match_operand 1 "const_int_operand"))]
+  "TARGET_MIPS16 && reload_completed && INTVAL (operands[1]) < 0"
+  [(set (match_dup 2)
+	(match_dup 3))
+   (set (match_dup 2)
+	(neg:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = GEN_INT (-INTVAL (operands[1]));
+})
+
+;; 64-bit paired-single floating point moves
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0)
+	(match_operand:V2SF 1))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (mips_legitimize_move (V2SFmode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movv2sf"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m")
+	(match_operand:V2SF 1 "move_operand" "f,YG,m,f,YG,*d,*f,*d*YG,*m,*d"))]
+  "TARGET_HARD_FLOAT
+   && TARGET_PAIRED_SINGLE_FLOAT
+   && (register_operand (operands[0], V2SFmode)
+       || reg_or_0_operand (operands[1], V2SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
+   (set_attr "mode" "DF")])
+
+;; Extract the high part of a HI/LO value.  See mips_hard_regno_mode_ok_p
+;; for the reason why we can't just use (reg:GPR HI_REGNUM).
+;;
+;; When generating VR4120 or VR4130 code, we use MACCHI and DMACCHI
+;; instead of MFHI.  This avoids both the normal MIPS III hi/lo hazards
+;; and the errata related to -mfix-vr4130.
+(define_insn "mfhi<GPR:mode>_<HILO:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:HILO 1 "hilo_operand" "x")]
+		    UNSPEC_MFHI))]
+  ""
+  { return ISA_HAS_MACCHI ? "<GPR:d>macchi\t%0,%.,%." : "mfhi\t%0"; }
+  [(set_attr "type" "mfhi")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;; Set the high part of a HI/LO value, given that the low part has
+;; already been set.  See mips_hard_regno_mode_ok_p for the reason
+;; why we can't just use (reg:GPR HI_REGNUM).
+(define_insn "mthi<GPR:mode>_<HILO:mode>"
+  [(set (match_operand:HILO 0 "register_operand" "=x")
+	(unspec:HILO [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		      (match_operand:GPR 2 "register_operand" "l")]
+		     UNSPEC_MTHI))]
+  ""
+  "mthi\t%z1"
+  [(set_attr "type" "mthi")
+   (set_attr "mode" "SI")])
+
+;; Emit a doubleword move in which exactly one of the operands is
+;; a floating-point register.  We can't just emit two normal moves
+;; because of the constraints imposed by the FPU register model;
+;; see mips_cannot_change_mode_class for details.  Instead, we keep
+;; the FPR whole and use special patterns to refer to each word of
+;; the other operand.
+
+(define_expand "move_doubleword_fpr<mode>"
+  [(set (match_operand:SPLITF 0)
+	(match_operand:SPLITF 1))]
+  ""
+{
+  if (FP_REG_RTX_P (operands[0]))
+    {
+      rtx low = mips_subword (operands[1], 0);
+      rtx high = mips_subword (operands[1], 1);
+      emit_insn (gen_load_low<mode> (operands[0], low));
+      if (TARGET_FLOAT64 && !TARGET_64BIT)
+      	emit_insn (gen_mthc1<mode> (operands[0], high, operands[0]));
+      else
+	emit_insn (gen_load_high<mode> (operands[0], high, operands[0]));
+    }
+  else
+    {
+      rtx low = mips_subword (operands[0], 0);
+      rtx high = mips_subword (operands[0], 1);
+      emit_insn (gen_store_word<mode> (low, operands[1], const0_rtx));
+      if (TARGET_FLOAT64 && !TARGET_64BIT)
+	emit_insn (gen_mfhc1<mode> (high, operands[1]));
+      else
+	emit_insn (gen_store_word<mode> (high, operands[1], const1_rtx));
+    }
+  DONE;
+})
+
+;; Load the low word of operand 0 with operand 1.
+(define_insn "load_low<mode>"
+  [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "general_operand" "dJ,m")]
+		       UNSPEC_LOAD_LOW))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[0] = mips_subword (operands[0], 0);
+  return mips_output_move (operands[0], operands[1]);
+}
+  [(set_attr "move_type" "mtc,fpload")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Load the high word of operand 0 from operand 1, preserving the value
+;; in the low word.
+(define_insn "load_high<mode>"
+  [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "general_operand" "dJ,m")
+			(match_operand:SPLITF 2 "register_operand" "0,0")]
+		       UNSPEC_LOAD_HIGH))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[0] = mips_subword (operands[0], 1);
+  return mips_output_move (operands[0], operands[1]);
+}
+  [(set_attr "move_type" "mtc,fpload")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Store one word of operand 1 in operand 0.  Operand 2 is 1 to store the
+;; high word and 0 to store the low word.
+(define_insn "store_word<mode>"
+  [(set (match_operand:<HALFMODE> 0 "nonimmediate_operand" "=d,m")
+	(unspec:<HALFMODE> [(match_operand:SPLITF 1 "register_operand" "f,f")
+			    (match_operand 2 "const_int_operand")]
+			   UNSPEC_STORE_WORD))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[1] = mips_subword (operands[1], INTVAL (operands[2]));
+  return mips_output_move (operands[0], operands[1]);
+}
+  [(set_attr "move_type" "mfc,fpstore")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Move operand 1 to the high word of operand 0 using mthc1, preserving the
+;; value in the low word.
+(define_insn "mthc1<mode>"
+  [(set (match_operand:SPLITF 0 "register_operand" "=f")
+	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "reg_or_0_operand" "dJ")
+		        (match_operand:SPLITF 2 "register_operand" "0")]
+		       UNSPEC_MTHC1))]
+  "TARGET_HARD_FLOAT && ISA_HAS_MXHC1"
+  "mthc1\t%z1,%0"
+  [(set_attr "move_type" "mtc")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Move high word of operand 1 to operand 0 using mfhc1.
+(define_insn "mfhc1<mode>"
+  [(set (match_operand:<HALFMODE> 0 "register_operand" "=d")
+	(unspec:<HALFMODE> [(match_operand:SPLITF 1 "register_operand" "f")]
+			    UNSPEC_MFHC1))]
+  "TARGET_HARD_FLOAT && ISA_HAS_MXHC1"
+  "mfhc1\t%0,%1"
+  [(set_attr "move_type" "mfc")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Move a constant that satisfies CONST_GP_P into operand 0.
+(define_expand "load_const_gp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(const:P (unspec:P [(const_int 0)] UNSPEC_GP)))])
+
+;; Insn to initialize $gp for n32/n64 abicalls.  Operand 0 is the offset
+;; of _gp from the start of this function.  Operand 1 is the incoming
+;; function address.
+(define_insn_and_split "loadgp_newabi_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=&d")
+	(unspec:P [(match_operand:P 1)
+		   (match_operand:P 2 "register_operand" "d")]
+		  UNSPEC_LOADGP))]
+  "mips_current_loadgp_style () == LOADGP_NEWABI"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))
+   (set (match_dup 0) (match_dup 5))]
+{
+  operands[3] = gen_rtx_HIGH (Pmode, operands[1]);
+  operands[4] = gen_rtx_PLUS (Pmode, operands[0], operands[2]);
+  operands[5] = gen_rtx_LO_SUM (Pmode, operands[0], operands[1]);
+}
+  [(set_attr "type" "ghost")])
+
+;; Likewise, for -mno-shared code.  Operand 0 is the __gnu_local_gp symbol.
+(define_insn_and_split "loadgp_absolute_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1)] UNSPEC_LOADGP))]
+  "mips_current_loadgp_style () == LOADGP_ABSOLUTE"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(const_int 0)]
+{
+  mips_emit_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;; This blockage instruction prevents the gp load from being
+;; scheduled after an implicit use of gp.  It also prevents
+;; the load from being deleted as dead.
+(define_insn "loadgp_blockage"
+  [(unspec_volatile [(reg:SI 28)] UNSPEC_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "ghost")])
+
+;; Initialize $gp for RTP PIC.  Operand 0 is the __GOTT_BASE__ symbol
+;; and operand 1 is the __GOTT_INDEX__ symbol.
+(define_insn_and_split "loadgp_rtp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "symbol_ref_operand")
+		   (match_operand:P 2 "symbol_ref_operand")]
+		  UNSPEC_LOADGP))]
+  "mips_current_loadgp_style () == LOADGP_RTP"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(set (match_dup 0) (high:P (match_dup 3)))
+   (set (match_dup 0) (unspec:P [(match_dup 0)
+				 (match_dup 3)] UNSPEC_LOAD_GOT))
+   (set (match_dup 0) (unspec:P [(match_dup 0)
+				 (match_dup 4)] UNSPEC_LOAD_GOT))]
+{
+  operands[3] = mips_unspec_address (operands[1], SYMBOL_ABSOLUTE);
+  operands[4] = mips_unspec_address (operands[2], SYMBOL_HALF);
+}
+  [(set_attr "type" "ghost")])
+
+;; Initialize the global pointer for MIPS16 code.  Operand 0 is the
+;; global pointer and operand 1 is the MIPS16 register that holds
+;; the required value.
+(define_insn_and_split "copygp_mips16_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=y")
+	(unspec:P [(match_operand:P 1 "register_operand" "d")]
+		  UNSPEC_COPYGP))]
+  "TARGET_MIPS16"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(set (match_dup 0) (match_dup 1))]
+  ""
+  [(set_attr "type" "ghost")])
+
+;; A placeholder for where the cprestore instruction should go,
+;; if we decide we need one.  Operand 0 and operand 1 are as for
+;; "cprestore".  Operand 2 is a register that holds the gp value.
+;;
+;; The "cprestore" pattern requires operand 2 to be pic_offset_table_rtx,
+;; otherwise any register that holds the correct value will do.
+(define_insn_and_split "potential_cprestore_<mode>"
+  [(set (match_operand:P 0 "cprestore_save_slot_operand" "=X,X")
+	(unspec:P [(match_operand:P 1 "const_int_operand" "I,i")
+		   (match_operand:P 2 "register_operand" "d,d")]
+		  UNSPEC_POTENTIAL_CPRESTORE))
+   (clobber (match_operand:P 3 "scratch_operand" "=X,&d"))]
+  "!TARGET_CPRESTORE_DIRECTIVE || operands[2] == pic_offset_table_rtx"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "mips_must_initialize_gp_p ()"
+  [(const_int 0)]
+{
+  mips_save_gp_to_cprestore_slot (operands[0], operands[1],
+				  operands[2], operands[3]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;; Emit a .cprestore directive, which normally expands to a single store
+;; instruction.  Operand 0 is a (possibly illegitimate) sp-based MEM
+;; for the cprestore slot.  Operand 1 is the offset of the slot from
+;; the stack pointer.  (This is redundant with operand 0, but it makes
+;; things a little simpler.)
+(define_insn "cprestore_<mode>"
+  [(set (match_operand:P 0 "cprestore_save_slot_operand" "=X,X")
+	(unspec:P [(match_operand:P 1 "const_int_operand" "I,i")
+		   (reg:P 28)]
+		  UNSPEC_CPRESTORE))]
+  "TARGET_CPRESTORE_DIRECTIVE"
+{
+  if (mips_nomacro.nesting_level > 0 && which_alternative == 1)
+    return ".set\tmacro\;.cprestore\t%1\;.set\tnomacro";
+  else
+    return ".cprestore\t%1";
+}
+  [(set_attr "type" "store")
+   (set_attr "insn_count" "1,3")])
+
+(define_insn "use_cprestore_<mode>"
+  [(set (reg:P CPRESTORE_SLOT_REGNUM)
+	(match_operand:P 0 "cprestore_load_slot_operand"))]
+  ""
+  ""
+  [(set_attr "type" "ghost")])
+
+;; Expand in-line code to clear the instruction cache between operand[0] and
+;; operand[1].
+(define_expand "clear_cache"
+  [(match_operand 0 "pmode_register_operand")
+   (match_operand 1 "pmode_register_operand")]
+  ""
+  "
+{
+  if (TARGET_SYNCI)
+    {
+      mips_expand_synci_loop (operands[0], operands[1]);
+      emit_insn (gen_sync ());
+      emit_insn (PMODE_INSN (gen_clear_hazard, ()));
+    }
+  else if (mips_cache_flush_func && mips_cache_flush_func[0])
+    {
+      rtx len = gen_reg_rtx (Pmode);
+      emit_insn (gen_sub3_insn (len, operands[1], operands[0]));
+      MIPS_ICACHE_SYNC (operands[0], len);
+    }
+  DONE;
+}")
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
+  "GENERATE_SYNC"
+  { return mips_output_sync (); })
+
+(define_insn "synci"
+  [(unspec_volatile [(match_operand 0 "pmode_register_operand" "d")]
+		    UNSPEC_SYNCI)]
+  "TARGET_SYNCI"
+  "synci\t0(%0)")
+
+(define_insn "rdhwr_synci_step_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+        (unspec_volatile [(const_int 1)]
+        UNSPEC_RDHWR))]
+  "ISA_HAS_SYNCI"
+  "rdhwr\t%0,$1")
+
+(define_insn "clear_hazard_<mode>"
+  [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
+  "ISA_HAS_SYNCI"
+{
+  return "%(%<bal\t1f\n"
+         "\tnop\n"
+         "1:\t<d>addiu\t$31,$31,12\n"
+         "\tjr.hb\t$31\n"
+         "\tnop%>%)";
+}
+  [(set_attr "insn_count" "5")])
+
+;; Cache operations for R4000-style caches.
+(define_insn "mips_cache"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:SI 0 "const_int_operand")
+		     (match_operand:QI 1 "address_operand" "p")]
+		    UNSPEC_MIPS_CACHE))]
+  "ISA_HAS_CACHE"
+  "cache\t%X0,%a1")
+
+;; Similar, but with the operands hard-coded to an R10K cache barrier
+;; operation.  We keep the pattern distinct so that we can identify
+;; cache operations inserted by -mr10k-cache-barrier=, and so that
+;; the operation is never inserted into a delay slot.
+(define_insn "r10k_cache_barrier"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(const_int 0)] UNSPEC_R10K_CACHE_BARRIER))]
+  "ISA_HAS_CACHE"
+  "cache\t0x14,0(%$)"
+  [(set_attr "can_delay" "no")])
+
+;; Block moves, see mips.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand")
+		   (match_operand:BLK 1 "general_operand"))
+	      (use (match_operand:SI 2 ""))
+	      (use (match_operand:SI 3 "const_int_operand"))])]
+  "!TARGET_MIPS16 && !TARGET_MEMCPY"
+{
+  if (mips_expand_block_move (operands[0], operands[1], operands[2]))
+    DONE;
+  else
+    FAIL;
+})
+
+;;
+;;  ....................
+;;
+;;	SHIFTS
+;;
+;;  ....................
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(any_shift:GPR (match_operand:GPR 1 "register_operand")
+		       (match_operand:SI 2 "arith_operand")))]
+  ""
+{
+  /* On the mips16, a shift of more than 8 is a four byte instruction,
+     so, for a shift between 8 and 16, it is just as fast to do two
+     shifts of 8 or less.  If there is a lot of shifting going on, we
+     may win in CSE.  Otherwise combine will put the shifts back
+     together again.  This can be called by mips_function_arg, so we must
+     be careful not to allocate a new register if we've reached the
+     reload pass.  */
+  if (TARGET_MIPS16
+      && optimize
+      && CONST_INT_P (operands[2])
+      && INTVAL (operands[2]) > 8
+      && INTVAL (operands[2]) <= 16
+      && !reload_in_progress
+      && !reload_completed)
+    {
+      rtx temp = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_<optab><mode>3 (temp, operands[1], GEN_INT (8)));
+      emit_insn (gen_<optab><mode>3 (operands[0], temp,
+				     GEN_INT (INTVAL (operands[2]) - 8)));
+      DONE;
+    }
+})
+
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=!u,d")
+	(any_shift:GPR (match_operand:GPR 1 "register_operand" "!u,d")
+		       (match_operand:SI 2 "arith_operand" "Uib3,dI")))]
+  "!TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2])
+			   & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+
+  return "<d><insn>\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "compression" "<shift_compression>,none")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<optab>si3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	   (any_shift:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "arith_operand" "dI"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+
+  return "<insn>\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<optab>si3_mips16"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(any_shift:SI (match_operand:SI 1 "register_operand" "0,d,d")
+		      (match_operand:SI 2 "arith_operand" "d,Uib3,I")))]
+  "TARGET_MIPS16"
+{
+  if (which_alternative == 0)
+    return "<insn>\t%0,%2";
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+  return "<insn>\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "SI")
+   (set_attr "extended_mips16" "no,no,yes")])
+
+;; We need separate DImode MIPS16 patterns because of the irregularity
+;; of right shifts.
+(define_insn "*ashldi3_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(ashift:DI (match_operand:DI 1 "register_operand" "0,d,d")
+		   (match_operand:SI 2 "arith_operand" "d,Uib3,I")))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  if (which_alternative == 0)
+    return "dsll\t%0,%2";
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+  return "dsll\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "DI")
+   (set_attr "extended_mips16" "no,no,yes")])
+
+(define_insn "*ashrdi3_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0,0,0")
+		     (match_operand:SI 2 "arith_operand" "d,Uib3,I")))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
+  return "dsra\t%0,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "DI")
+   (set_attr "extended_mips16" "no,no,yes")])
+
+(define_insn "*lshrdi3_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,0")
+		     (match_operand:SI 2 "arith_operand" "d,Uib3,I")))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
+  return "dsrl\t%0,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "DI")
+   (set_attr "extended_mips16" "no,no,yes")])
+
+;; On the mips16, we can split a 4 byte shift into 2 2 byte shifts.
+
+(define_split
+  [(set (match_operand:GPR 0 "d_operand")
+	(any_shift:GPR (match_operand:GPR 1 "d_operand")
+		       (match_operand:GPR 2 "const_int_operand")))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && INTVAL (operands[2]) > 8
+   && INTVAL (operands[2]) <= 16"
+  [(set (match_dup 0) (any_shift:GPR (match_dup 1) (const_int 8)))
+   (set (match_dup 0) (any_shift:GPR (match_dup 0) (match_dup 2)))]
+  { operands[2] = GEN_INT (INTVAL (operands[2]) - 8); })
+
+;; If we load a byte on the mips16 as a bitfield, the resulting
+;; sequence of instructions is too complicated for combine, because it
+;; involves four instructions: a load, a shift, a constant load into a
+;; register, and an and (the key problem here is that the mips16 does
+;; not have and immediate).  We recognize a shift of a load in order
+;; to make it simple enough for combine to understand.
+;;
+;; The instruction count here is the worst case.
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
+		     (match_operand:SI 2 "immediate_operand" "I")))]
+  "TARGET_MIPS16"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 2)))]
+  ""
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set (attr "insn_count")
+	(symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))])
+
+(define_insn "rotr<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(rotatert:GPR (match_operand:GPR 1 "register_operand" "d")
+		      (match_operand:SI 2 "arith_operand" "dI")))]
+  "ISA_HAS_ROR"
+{
+  if (CONST_INT_P (operands[2]))
+    gcc_assert (INTVAL (operands[2]) >= 0
+		&& INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode));
+
+  return "<d>ror\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(bswap:HI (match_operand:HI 1 "register_operand" "d")))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn_and_split "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(bswap:SI (match_operand:SI 1 "register_operand" "d")))]
+  "ISA_HAS_WSBH && ISA_HAS_ROR"
+  "#"
+  ""
+  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH))
+   (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))]
+  ""
+  [(set_attr "insn_count" "2")])
+
+(define_insn_and_split "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(bswap:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "#"
+  ""
+  [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH))
+   (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))]
+  ""
+  [(set_attr "insn_count" "2")])
+
+(define_insn "wsbh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")] UNSPEC_WSBH))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn "dsbh"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSBH))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn "dshd"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSHD))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dshd\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL BRANCHES
+;;
+;;  ....................
+
+;; Conditional branches on floating-point equality tests.
+
+(define_insn "*branch_fp"
+  [(set (pc)
+        (if_then_else
+         (match_operator 1 "equality_operator"
+                         [(match_operand:CC 2 "register_operand" "z")
+			  (const_int 0)])
+         (label_ref (match_operand 0 "" ""))
+         (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%F1", "%Z2%0"),
+					 MIPS_BRANCH ("b%W1", "%Z2%0"));
+}
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_fp_inverted"
+  [(set (pc)
+        (if_then_else
+         (match_operator 1 "equality_operator"
+                         [(match_operand:CC 2 "register_operand" "z")
+			  (const_int 0)])
+         (pc)
+         (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%W1", "%Z2%0"),
+					 MIPS_BRANCH ("b%F1", "%Z2%0"));
+}
+  [(set_attr "type" "branch")])
+
+;; Conditional branches on ordered comparisons with zero.
+
+(define_insn "*branch_order<mode>"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "order_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "!TARGET_MIPS16"
+  { return mips_output_order_conditional_branch (insn, operands, false); }
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_order<mode>_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "order_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_MIPS16"
+  { return mips_output_order_conditional_branch (insn, operands, true); }
+  [(set_attr "type" "branch")])
+
+;; Conditional branch on equality comparison.
+
+(define_insn "*branch_equality<mode>"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (match_operand:GPR 3 "reg_or_0_operand" "dJ")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "!TARGET_MIPS16"
+{
+  /* For a simple BNEZ or BEQZ microMIPS branch.  */
+  if (TARGET_MICROMIPS
+      && operands[3] == const0_rtx
+      && get_attr_length (insn) <= 8)
+    return mips_output_conditional_branch (insn, operands,
+					   "%*b%C1z%:\t%2,%0",
+					   "%*b%N1z%:\t%2,%0");
+
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%C1", "%2,%z3,%0"),
+					 MIPS_BRANCH ("b%N1", "%2,%z3,%0"));
+}
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_equality<mode>_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (match_operand:GPR 3 "reg_or_0_operand" "dJ")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_MIPS16"
+{
+  /* For a simple BNEZ or BEQZ microMIPS branch.  */
+  if (TARGET_MICROMIPS
+      && operands[3] == const0_rtx
+      && get_attr_length (insn) <= 8)
+    return mips_output_conditional_branch (insn, operands,
+					   "%*b%N0z%:\t%2,%1",
+					   "%*b%C0z%:\t%2,%1");
+
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%N1", "%2,%z3,%0"),
+					 MIPS_BRANCH ("b%C1", "%2,%z3,%0"));
+}
+  [(set_attr "type" "branch")])
+
+;; MIPS16 branches
+
+(define_insn "*branch_equality<mode>_mips16"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d,t")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_MIPS16"
+  "@
+   b%C1z\t%2,%0
+   bt%C1z\t%0"
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_equality<mode>_mips16_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d,t")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_MIPS16"
+  "@
+   b%N1z\t%2,%0
+   bt%N1z\t%0"
+  [(set_attr "type" "branch")])
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:GPR 1 "register_operand")
+		        (match_operand:GPR 2 "nonmemory_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  mips_expand_conditional_branch (operands);
+  DONE;
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:SCALARF 1 "register_operand")
+		        (match_operand:SCALARF 2 "register_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  mips_expand_conditional_branch (operands);
+  DONE;
+})
+
+;; Used to implement built-in functions.
+(define_expand "condjump"
+  [(set (pc)
+	(if_then_else (match_operand 0)
+		      (label_ref (match_operand 1))
+		      (pc)))])
+
+;; Branch if bit is set/clear.
+
+(define_insn "*branch_bit<bbv><mode>"
+  [(set (pc)
+	(if_then_else
+	 (equality_op (zero_extract:GPR
+		       (match_operand:GPR 1 "register_operand" "d")
+		       (const_int 1)
+		       (match_operand 2 "const_int_operand" ""))
+		      (const_int 0))
+	 (label_ref (match_operand 0 ""))
+	 (pc)))]
+  "ISA_HAS_BBIT && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+{
+  return
+    mips_output_conditional_branch (insn, operands,
+				    MIPS_BRANCH ("bbit<bbv>", "%1,%2,%0"),
+				    MIPS_BRANCH ("bbit<bbinv>", "%1,%2,%0"));
+}
+  [(set_attr "type"	     "branch")
+   (set_attr "branch_likely" "no")])
+
+(define_insn "*branch_bit<bbv><mode>_inverted"
+  [(set (pc)
+	(if_then_else
+	 (equality_op (zero_extract:GPR
+		       (match_operand:GPR 1 "register_operand" "d")
+		       (const_int 1)
+		       (match_operand 2 "const_int_operand" ""))
+		      (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 0 ""))))]
+  "ISA_HAS_BBIT && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+{
+  return
+    mips_output_conditional_branch (insn, operands,
+				    MIPS_BRANCH ("bbit<bbinv>", "%1,%2,%0"),
+				    MIPS_BRANCH ("bbit<bbv>", "%1,%2,%0"));
+}
+  [(set_attr "type"	     "branch")
+   (set_attr "branch_likely" "no")])
+
+;;
+;;  ....................
+;;
+;;	SETTING A REGISTER FROM A COMPARISON
+;;
+;;  ....................
+
+;; Destination is always set in SI mode.
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "mips_cstore_operator"
+	 [(match_operand:GPR 2 "register_operand")
+	  (match_operand:GPR 3 "nonmemory_operand")]))]
+  ""
+{
+  mips_expand_scc (operands);
+  DONE;
+})
+
+(define_insn "*seq_zero_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(eq:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		 (const_int 0)))]
+  "!TARGET_MIPS16 && !ISA_HAS_SEQ_SNE"
+  "sltu\t%0,%1,1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*seq_zero_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t")
+	(eq:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		 (const_int 0)))]
+  "TARGET_MIPS16 && !ISA_HAS_SEQ_SNE"
+  "sltu\t%1,1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;; Generate sltiu unless using seq results in better code.
+(define_insn "*seq_<GPR:mode><GPR2:mode>_seq"
+  [(set (match_operand:GPR2 0 "register_operand" "=d,d,d")
+	(eq:GPR2 (match_operand:GPR 1 "register_operand" "%d,d,d")
+		 (match_operand:GPR 2 "reg_imm10_operand" "d,J,YB")))]
+  "ISA_HAS_SEQ_SNE"
+  "@
+   seq\t%0,%1,%2
+   sltiu\t%0,%1,1
+   seqi\t%0,%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sne_zero_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(ne:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		 (const_int 0)))]
+  "!TARGET_MIPS16 && !ISA_HAS_SEQ_SNE"
+  "sltu\t%0,%.,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;; Generate sltu unless using sne results in better code.
+(define_insn "*sne_<GPR:mode><GPR2:mode>_sne"
+  [(set (match_operand:GPR2 0 "register_operand" "=d,d,d")
+	(ne:GPR2 (match_operand:GPR 1 "register_operand" "%d,d,d")
+		 (match_operand:GPR 2 "reg_imm10_operand" "d,J,YB")))]
+  "ISA_HAS_SEQ_SNE"
+  "@
+   sne\t%0,%1,%2
+   sltu\t%0,%.,%1
+   snei\t%0,%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sgt<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_gt:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "reg_or_0_operand" "dJ")))]
+  "!TARGET_MIPS16"
+  "slt<u>\t%0,%z2,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sgt<u>_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t")
+	(any_gt:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_MIPS16"
+  "slt<u>\t%2,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sge<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_ge:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (const_int 1)))]
+  "!TARGET_MIPS16"
+  "slt<u>\t%0,%.,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*slt<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_lt:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "arith_operand" "dI")))]
+  "!TARGET_MIPS16"
+  "slt<u>\t%0,%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*slt<u>_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t,t,t")
+	(any_lt:GPR2 (match_operand:GPR 1 "register_operand" "d,d,d")
+		     (match_operand:GPR 2 "arith_operand" "d,Uub8,I")))]
+  "TARGET_MIPS16"
+  "slt<u>\t%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")
+   (set_attr "extended_mips16" "no,no,yes")])
+
+(define_insn "*sle<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_le:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "sle_operand" "")))]
+  "!TARGET_MIPS16"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+  return "slt<u>\t%0,%1,%2";
+}
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sle<u>_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t,t")
+	(any_le:GPR2 (match_operand:GPR 1 "register_operand" "d,d")
+		     (match_operand:GPR 2 "sle_operand" "Udb8,i")))]
+  "TARGET_MIPS16"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+  return "slt<u>\t%1,%2";
+}
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")
+   (set_attr "extended_mips16" "no,yes")])
+
+;;
+;;  ....................
+;;
+;;	FLOATING POINT COMPARISONS
+;;
+;;  ....................
+
+(define_insn "s<code>_<mode>"
+  [(set (match_operand:CC 0 "register_operand" "=z")
+	(fcond:CC (match_operand:SCALARF 1 "register_operand" "f")
+		  (match_operand:SCALARF 2 "register_operand" "f")))]
+  ""
+  "c.<fcond>.<fmt>\t%Z0%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+(define_insn "s<code>_<mode>"
+  [(set (match_operand:CC 0 "register_operand" "=z")
+	(swapped_fcond:CC (match_operand:SCALARF 1 "register_operand" "f")
+		          (match_operand:SCALARF 2 "register_operand" "f")))]
+  ""
+  "c.<swapped_fcond>.<fmt>\t%Z0%2,%1"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+;;
+;;  ....................
+;;
+;;	UNCONDITIONAL BRANCHES
+;;
+;;  ....................
+
+;; Unconditional branches.
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0)))])
+
+(define_insn "*jump_absolute"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  "!TARGET_MIPS16 && TARGET_ABSOLUTE_JUMPS"
+{
+  /* Use a branch for microMIPS.  The assembler will choose
+     a 16-bit branch, a 32-bit branch, or a 32-bit jump.  */
+  if (TARGET_MICROMIPS && !TARGET_ABICALLS_PIC2)
+    return "%*b\t%l0%/";
+  else
+    return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/");
+}
+  [(set_attr "type" "jump")])
+
+(define_insn "*jump_pic"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  "!TARGET_MIPS16 && !TARGET_ABSOLUTE_JUMPS"
+{
+  if (get_attr_length (insn) <= 8)
+    return "%*b\t%l0%/";
+  else
+    {
+      mips_output_load_label (operands[0]);
+      return "%*jr\t%@%/%]";
+    }
+}
+  [(set_attr "type" "branch")])
+
+;; We need a different insn for the mips16, because a mips16 branch
+;; does not have a delay slot.
+
+(define_insn "*jump_mips16"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_MIPS16"
+  "b\t%l0"
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	;; This calculation is like the normal branch one, but the
+	;; range of the unextended instruction is [-0x800, 0x7fe] rather
+	;; than [-0x100, 0xfe].  This translates to a range of:
+	;;
+	;;    [-(0x800 - sizeof (branch)), 0x7fe]
+	;; == [-0x7fe, 0x7fe]
+	;;
+	;; from the shorten_branches reference address.  Long-branch
+	;; sequences will replace this one, so the minimum length
+	;; is one instruction shorter than for conditional branches.
+	(cond [(and (le (minus (match_dup 0) (pc)) (const_int 2046))
+		    (le (minus (pc) (match_dup 0)) (const_int 2046)))
+	       (const_int 2)
+	       (and (le (minus (match_dup 0) (pc)) (const_int 65534))
+		    (le (minus (pc) (match_dup 0)) (const_int 65532)))
+	       (const_int 4)
+	       (and (match_test "TARGET_ABICALLS")
+		    (not (match_test "TARGET_ABSOLUTE_ABICALLS")))
+	       (const_int 18)
+	       (match_test "Pmode == SImode")
+	       (const_int 14)
+	       ] (const_int 22)))])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand"))]
+  ""
+{
+  operands[0] = force_reg (Pmode, operands[0]);
+  emit_jump_insn (PMODE_INSN (gen_indirect_jump, (operands[0])));
+  DONE;
+})
+
+(define_insn "indirect_jump_<mode>"
+  [(set (pc) (match_operand:P 0 "register_operand" "d"))]
+  ""
+{
+  if (TARGET_MICROMIPS)
+    return "%*jr%:\t%0";
+  else
+    return "%*j\t%0%/";
+}
+  [(set_attr "type" "jump")
+   (set_attr "mode" "none")])
+
+;; A combined jump-and-move instruction, used for MIPS16 long-branch
+;; sequences.  Having a dedicated pattern is more convenient than
+;; creating a SEQUENCE for this special case.
+(define_insn "indirect_jump_and_restore_<mode>"
+  [(set (pc) (match_operand:P 1 "register_operand" "d"))
+   (set (match_operand:P 0 "register_operand" "=d")
+   	(match_operand:P 2 "register_operand" "y"))]
+  ""
+  "%(%<jr\t%1\;move\t%0,%2%>%)"
+  [(set_attr "type" "multi")
+   (set_attr "extended_mips16" "yes")])
+
+(define_expand "tablejump"
+  [(set (pc)
+	(match_operand 0 "register_operand"))
+   (use (label_ref (match_operand 1 "")))]
+  "!TARGET_MIPS16_SHORT_JUMP_TABLES"
+{
+  if (TARGET_GPWORD)
+    operands[0] = expand_binop (Pmode, add_optab, operands[0],
+				pic_offset_table_rtx, 0, 0, OPTAB_WIDEN);
+  else if (TARGET_RTP_PIC)
+    {
+      /* When generating RTP PIC, we use case table entries that are relative
+	 to the start of the function.  Add the function's address to the
+	 value we loaded.  */
+      rtx start = get_hard_reg_initial_val (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      operands[0] = expand_binop (ptr_mode, add_optab, operands[0],
+				  start, 0, 0, OPTAB_WIDEN);
+    }
+
+  emit_jump_insn (PMODE_INSN (gen_tablejump, (operands[0], operands[1])));
+  DONE;
+})
+
+(define_insn "tablejump_<mode>"
+  [(set (pc)
+	(match_operand:P 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  if (TARGET_MICROMIPS)
+    return "%*jr%:\t%0";
+  else
+    return "%*j\t%0%/";
+}
+  [(set_attr "type" "jump")
+   (set_attr "mode" "none")])
+
+;; For MIPS16, we don't know whether a given jump table will use short or
+;; word-sized offsets until late in compilation, when we are able to determine
+;; the sizes of the insns which comprise the containing function.  This
+;; necessitates the use of the casesi rather than the tablejump pattern, since
+;; the latter tries to calculate the index of the offset to jump through early
+;; in compilation, i.e. at expand time, when nothing is known about the
+;; eventual function layout.
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand" "")	; index to jump on
+   (match_operand:SI 1 "const_int_operand" "")	; lower bound
+   (match_operand:SI 2 "const_int_operand" "")	; total range
+   (match_operand 3 "" "")			; table label
+   (match_operand 4 "" "")]			; out of range label
+  "TARGET_MIPS16_SHORT_JUMP_TABLES"
+{
+  if (operands[1] != const0_rtx)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      rtx offset = gen_int_mode (-INTVAL (operands[1]), SImode);
+      
+      if (!arith_operand (offset, SImode))
+        offset = force_reg (SImode, offset);
+      
+      emit_insn (gen_addsi3 (reg, operands[0], offset));
+      operands[0] = reg;
+    }
+
+  if (!arith_operand (operands[0], SImode))
+    operands[0] = force_reg (SImode, operands[0]);
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+
+  emit_jump_insn (PMODE_INSN (gen_casesi_internal_mips16,
+			      (operands[0], operands[2],
+			       operands[3], operands[4])));
+
+  DONE;
+})
+
+(define_insn "casesi_internal_mips16_<mode>"
+  [(set (pc)
+     (if_then_else
+       (leu (match_operand:SI 0 "register_operand" "d")
+	    (match_operand:SI 1 "arith_operand" "dI"))
+       (unspec:P
+        [(match_dup 0)
+	 (label_ref (match_operand 2 "" ""))]
+	UNSPEC_CASESI_DISPATCH)
+       (label_ref (match_operand 3 "" ""))))
+   (clobber (match_scratch:P 4 "=d"))
+   (clobber (match_scratch:P 5 "=d"))
+   (clobber (reg:SI MIPS16_T_REGNUM))]
+  "TARGET_MIPS16_SHORT_JUMP_TABLES"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+  
+  output_asm_insn ("sltu\t%0, %1", operands);
+  output_asm_insn ("bteqz\t%3", operands);
+  
+  switch (GET_MODE (diff_vec))
+    {
+    case HImode:
+      output_asm_insn ("sll\t%5, %0, 1", operands);
+      output_asm_insn ("la\t%4, %2", operands);
+      output_asm_insn ("<d>addu\t%5, %4, %5", operands);
+      output_asm_insn ("lh\t%5, 0(%5)", operands);
+      break;
+    
+    case SImode:
+      output_asm_insn ("sll\t%5, %0, 2", operands);
+      output_asm_insn ("la\t%4, %2", operands);
+      output_asm_insn ("<d>addu\t%5, %4, %5", operands);
+      output_asm_insn ("lw\t%5, 0(%5)", operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  
+  output_asm_insn ("addu\t%4, %4, %5", operands);
+  
+  return "j\t%4";
+}
+  [(set_attr "insn_count" "16")])
+
+;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well.
+;; While it is possible to either pull it off the stack (in the
+;; o32 case) or recalculate it given t9 and our target label,
+;; it takes 3 or 4 insns to do so.
+
+(define_expand "builtin_setjmp_setup"
+  [(use (match_operand 0 "register_operand"))]
+  "TARGET_USE_GOT"
+{
+  rtx addr;
+
+  addr = plus_constant (Pmode, operands[0], GET_MODE_SIZE (Pmode) * 3);
+  mips_emit_move (gen_rtx_MEM (Pmode, addr), pic_offset_table_rtx);
+  DONE;
+})
+
+;; Restore the gp that we saved above.  Despite the earlier comment, it seems
+;; that older code did recalculate the gp from $25.  Continue to jump through
+;; $25 for compatibility (we lose nothing by doing so).
+
+(define_expand "builtin_longjmp"
+  [(use (match_operand 0 "register_operand"))]
+  "TARGET_USE_GOT"
+{
+  /* The elements of the buffer are, in order:  */
+  int W = GET_MODE_SIZE (Pmode);
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 1*W));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 2*W));
+  rtx gpv = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 3*W));
+  rtx pv = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+  /* Use gen_raw_REG to avoid being given pic_offset_table_rtx.
+     The target is bound to be using $28 as the global pointer
+     but the current function might not be.  */
+  rtx gp = gen_raw_REG (Pmode, GLOBAL_POINTER_REGNUM);
+
+  /* This bit is similar to expand_builtin_longjmp except that it
+     restores $gp as well.  */
+  mips_emit_move (hard_frame_pointer_rtx, fp);
+  mips_emit_move (pv, lab);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  mips_emit_move (gp, gpv);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+  emit_use (gp);
+  emit_indirect_jump (pv);
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	Function prologue/epilogue
+;;
+;;  ....................
+;;
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  mips_expand_prologue ();
+  DONE;
+})
+
+;; Block any insns from being moved before this point, since the
+;; profiling call to mcount can use various registers that aren't
+;; saved or used to pass arguments.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "ghost")
+   (set_attr "mode" "none")])
+
+(define_insn "probe_stack_range_<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "register_operand" "d")]
+			    UNSPEC_PROBE_STACK_RANGE))]
+  ""
+ { return mips_output_probe_stack_range (operands[0], operands[2]); }
+  [(set_attr "type" "unknown")
+   (set_attr "can_delay" "no")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+{
+  mips_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(const_int 2)]
+  ""
+{
+  mips_expand_epilogue (true);
+  DONE;
+})
+
+;; Trivial return.  Make it look like a normal return insn as that
+;; allows jump optimizations to work better.
+
+(define_expand "return"
+  [(simple_return)]
+  "mips_can_use_return_insn ()"
+  { mips_expand_before_return (); })
+
+(define_expand "simple_return"
+  [(simple_return)]
+  ""
+  { mips_expand_before_return (); })
+
+(define_insn "*<optab>"
+  [(any_return)]
+  ""
+  {
+    if (TARGET_MICROMIPS)
+      return "%*jr%:\t$31";
+    else
+      return "%*j\t$31%/";
+  }
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+;; Normal return.
+
+(define_insn "<optab>_internal"
+  [(any_return)
+   (use (match_operand 0 "pmode_register_operand" ""))]
+  ""
+{
+  if (TARGET_MICROMIPS)
+    return "%*jr%:\t%0";
+  else
+    return "%*j\t%0%/";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+;; Exception return.
+(define_insn "mips_eret"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNSPEC_ERET)]
+  ""
+  "eret"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Debug exception return.
+(define_insn "mips_deret"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNSPEC_DERET)]
+  ""
+  "deret"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Disable interrupts.
+(define_insn "mips_di"
+  [(unspec_volatile [(const_int 0)] UNSPEC_DI)]
+  ""
+  "di"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Execution hazard barrier.
+(define_insn "mips_ehb"
+  [(unspec_volatile [(const_int 0)] UNSPEC_EHB)]
+  ""
+  "ehb"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Read GPR from previous shadow register set.
+(define_insn "mips_rdpgpr"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d")]
+			    UNSPEC_RDPGPR))]
+  ""
+  "rdpgpr\t%0,%1"
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")])
+
+;; Move involving COP0 registers.
+(define_insn "cop0_move"
+  [(set (match_operand:SI 0 "register_operand" "=B,d")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d,B")]
+			    UNSPEC_COP0))]
+  ""
+{ return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "type"	"mtc,mfc")
+   (set_attr "mode"	"SI")])
+
+;; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  if (GET_MODE (operands[0]) != word_mode)
+    operands[0] = convert_to_mode (word_mode, operands[0], 0);
+  if (TARGET_64BIT)
+    emit_insn (gen_eh_set_lr_di (operands[0]));
+  else
+    emit_insn (gen_eh_set_lr_si (operands[0]));
+  DONE;
+})
+
+;; Clobber the return address on the stack.  We can't expand this
+;; until we know where it will be put in the stack frame.
+
+(define_insn "eh_set_lr_si"
+  [(unspec [(match_operand:SI 0 "register_operand" "d")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&d"))]
+  "! TARGET_64BIT"
+  "#")
+
+(define_insn "eh_set_lr_di"
+  [(unspec [(match_operand:DI 0 "register_operand" "d")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch:DI 1 "=&d"))]
+  "TARGET_64BIT"
+  "#")
+
+(define_split
+  [(unspec [(match_operand 0 "register_operand")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch 1))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  mips_set_return_address (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "exception_receiver"
+  [(const_int 0)]
+  "TARGET_USE_GOT"
+{
+  /* See the comment above load_call<mode> for details.  */
+  emit_insn (gen_set_got_version ());
+
+  /* If we have a call-clobbered $gp, restore it from its save slot.  */
+  if (HAVE_restore_gp_si)
+    emit_insn (gen_restore_gp_si ());
+  else if (HAVE_restore_gp_di)
+    emit_insn (gen_restore_gp_di ());
+  DONE;
+})
+
+(define_expand "nonlocal_goto_receiver"
+  [(const_int 0)]
+  "TARGET_USE_GOT"
+{
+  /* See the comment above load_call<mode> for details.  */
+  emit_insn (gen_set_got_version ());
+  DONE;
+})
+
+;; Restore $gp from its .cprestore stack slot.  The instruction remains
+;; volatile until all uses of $28 are exposed.
+(define_insn_and_split "restore_gp_<mode>"
+  [(set (reg:P 28)
+	(unspec_volatile:P [(const_int 0)] UNSPEC_RESTORE_GP))
+   (clobber (match_scratch:P 0 "=&d"))]
+  "TARGET_CALL_CLOBBERED_GP"
+  "#"
+  "&& epilogue_completed"
+  [(const_int 0)]
+{
+  mips_restore_gp_from_cprestore_slot (operands[0]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;; Move between $gp and its register save slot.
+(define_insn_and_split "move_gp<mode>"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d,m")
+  	(unspec:GPR [(match_operand:GPR 1 "move_operand" "m,d")]
+		    UNSPEC_MOVE_GP))]
+  ""
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "mips_must_initialize_gp_p ()"
+  [(const_int 0)]
+{
+  mips_emit_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;;
+;;  ....................
+;;
+;;	FUNCTION CALLS
+;;
+;;  ....................
+
+;; Instructions to load a call address from the GOT.  The address might
+;; point to a function or to a lazy binding stub.  In the latter case,
+;; the stub will use the dynamic linker to resolve the function, which
+;; in turn will change the GOT entry to point to the function's real
+;; address.
+;;
+;; This means that every call, even pure and constant ones, can
+;; potentially modify the GOT entry.  And once a stub has been called,
+;; we must not call it again.
+;;
+;; We represent this restriction using an imaginary, fixed, call-saved
+;; register called GOT_VERSION_REGNUM.  The idea is to make the register
+;; live throughout the function and to change its value after every
+;; potential call site.  This stops any rtx value that uses the register
+;; from being computed before an earlier call.  To do this, we:
+;;
+;;    - Ensure that the register is live on entry to the function,
+;;	so that it is never thought to be used uninitalized.
+;;
+;;    - Ensure that the register is live on exit from the function,
+;;	so that it is live throughout.
+;;
+;;    - Make each call (lazily-bound or not) use the current value
+;;	of GOT_VERSION_REGNUM, so that updates of the register are
+;;	not moved across call boundaries.
+;;
+;;    - Add "ghost" definitions of the register to the beginning of
+;;	blocks reached by EH and ABNORMAL_CALL edges, because those
+;;	edges may involve calls that normal paths don't.  (E.g. the
+;;	unwinding code that handles a non-call exception may change
+;;	lazily-bound GOT entries.)  We do this by making the
+;;	exception_receiver and nonlocal_goto_receiver expanders emit
+;;	a set_got_version instruction.
+;;
+;;    - After each call (lazily-bound or not), use a "ghost"
+;;	update_got_version instruction to change the register's value.
+;;	This instruction mimics the _possible_ effect of the dynamic
+;;	resolver during the call and it remains live even if the call
+;;	itself becomes dead.
+;;
+;;    - Leave GOT_VERSION_REGNUM out of all register classes.
+;;	The register is therefore not a valid register_operand
+;;	and cannot be moved to or from other registers.
+
+(define_insn "load_call<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "register_operand" "d")
+		   (match_operand:P 2 "immediate_operand" "")
+		   (reg:SI GOT_VERSION_REGNUM)] UNSPEC_LOAD_CALL))]
+  "TARGET_USE_GOT"
+  "<load>\t%0,%R2(%1)"
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "set_got_version"
+  [(set (reg:SI GOT_VERSION_REGNUM)
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_SET_GOT_VERSION))]
+  "TARGET_USE_GOT"
+  ""
+  [(set_attr "type" "ghost")])
+
+(define_insn "update_got_version"
+  [(set (reg:SI GOT_VERSION_REGNUM)
+	(unspec:SI [(reg:SI GOT_VERSION_REGNUM)] UNSPEC_UPDATE_GOT_VERSION))]
+  "TARGET_USE_GOT"
+  ""
+  [(set_attr "type" "ghost")])
+
+;; Sibling calls.  All these patterns use jump instructions.
+
+;; If TARGET_SIBCALLS, call_insn_operand will only accept constant
+;; addresses if a direct jump is acceptable.  Since the 'S' constraint
+;; is defined in terms of call_insn_operand, the same is true of the
+;; constraints.
+
+;; When we use an indirect jump, we need a register that will be
+;; preserved by the epilogue.  Since TARGET_USE_PIC_FN_ADDR_REG forces
+;; us to use $25 for this purpose -- and $25 is never clobbered by the
+;; epilogue -- we might as well use it for !TARGET_USE_PIC_FN_ADDR_REG
+;; as well.
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "")
+		    (match_operand 1 ""))
+	      (use (match_operand 2 ""))	;; next_arg_reg
+	      (use (match_operand 3 ""))])]	;; struct_value_size_rtx
+  "TARGET_SIBCALLS"
+{
+  mips_expand_call (MIPS_CALL_SIBCALL, NULL_RTX, XEXP (operands[0], 0),
+		    operands[1], operands[2], false);
+  DONE;
+})
+
+(define_insn "sibcall_internal"
+  [(call (mem:SI (match_operand 0 "call_insn_operand" "j,S"))
+	 (match_operand 1 "" ""))]
+  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
+{
+  if (TARGET_MICROMIPS)
+    return MICROMIPS_J ("j", operands, 0);
+  else
+    return MIPS_CALL ("j", operands, 0, 1);
+}
+  [(set_attr "jal" "indirect,direct")
+   (set_attr "jal_macro" "no")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "")
+		   (call (match_operand 1 "")
+			 (match_operand 2 "")))
+	      (use (match_operand 3 ""))])]		;; next_arg_reg
+  "TARGET_SIBCALLS"
+{
+  mips_expand_call (MIPS_CALL_SIBCALL, operands[0], XEXP (operands[1], 0),
+		    operands[2], operands[3], false);
+  DONE;
+})
+
+(define_insn "sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "j,S"))
+              (match_operand 2 "" "")))]
+  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
+{
+  if (TARGET_MICROMIPS)
+    return MICROMIPS_J ("j", operands, 1);
+  else
+    return MIPS_CALL ("j", operands, 1, 2);
+}
+  [(set_attr "jal" "indirect,direct")
+   (set_attr "jal_macro" "no")])
+
+(define_insn "sibcall_value_multiple_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "j,S"))
+              (match_operand 2 "" "")))
+   (set (match_operand 3 "register_operand" "")
+	(call (mem:SI (match_dup 1))
+	      (match_dup 2)))]
+  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
+{
+  if (TARGET_MICROMIPS)
+    return MICROMIPS_J ("j", operands, 1);
+  else
+    return MIPS_CALL ("j", operands, 1, 2);
+}
+  [(set_attr "jal" "indirect,direct")
+   (set_attr "jal_macro" "no")])
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "")
+		    (match_operand 1 ""))
+	      (use (match_operand 2 ""))	;; next_arg_reg
+	      (use (match_operand 3 ""))])]	;; struct_value_size_rtx
+  ""
+{
+  mips_expand_call (MIPS_CALL_NORMAL, NULL_RTX, XEXP (operands[0], 0),
+		    operands[1], operands[2], false);
+  DONE;
+})
+
+;; This instruction directly corresponds to an assembly-language "jal".
+;; There are four cases:
+;;
+;;    - -mno-abicalls:
+;;	  Both symbolic and register destinations are OK.  The pattern
+;;	  always expands to a single mips instruction.
+;;
+;;    - -mabicalls/-mno-explicit-relocs:
+;;	  Again, both symbolic and register destinations are OK.
+;;	  The call is treated as a multi-instruction black box.
+;;
+;;    - -mabicalls/-mexplicit-relocs with n32 or n64:
+;;	  Only "jal $25" is allowed.  This expands to a single "jalr $25"
+;;	  instruction.
+;;
+;;    - -mabicalls/-mexplicit-relocs with o32 or o64:
+;;	  Only "jal $25" is allowed.  The call is actually two instructions:
+;;	  "jalr $25" followed by an insn to reload $gp.
+;;
+;; In the last case, we can generate the individual instructions with
+;; a define_split.  There are several things to be wary of:
+;;
+;;   - We can't expose the load of $gp before reload.  If we did,
+;;     it might get removed as dead, but reload can introduce new
+;;     uses of $gp by rematerializing constants.
+;;
+;;   - We shouldn't restore $gp after calls that never return.
+;;     It isn't valid to insert instructions between a noreturn
+;;     call and the following barrier.
+;;
+;;   - The splitter deliberately changes the liveness of $gp.  The unsplit
+;;     instruction preserves $gp and so have no effect on its liveness.
+;;     But once we generate the separate insns, it becomes obvious that
+;;     $gp is not live on entry to the call.
+;;
+(define_insn_and_split "call_internal"
+  [(call (mem:SI (match_operand 0 "call_insn_operand" "c,S"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, 1); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn, gen_call_split (operands[0], operands[1]));
+  DONE;
+}
+  [(set_attr "jal" "indirect,direct")])
+
+(define_insn "call_split"
+  [(call (mem:SI (match_operand 0 "call_insn_operand" "c,S"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 0, 1); }
+  [(set_attr "jal" "indirect,direct")
+   (set_attr "jal_macro" "no")])
+
+;; A pattern for calls that must be made directly.  It is used for
+;; MIPS16 calls that the linker may need to redirect to a hard-float
+;; stub; the linker relies on the call relocation type to detect when
+;; such redirection is needed.
+(define_insn_and_split "call_internal_direct"
+  [(call (mem:SI (match_operand 0 "const_call_insn_operand"))
+	 (match_operand 1))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, -1); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_direct_split (operands[0], operands[1]));
+  DONE;
+}
+  [(set_attr "jal" "direct")])
+
+(define_insn "call_direct_split"
+  [(call (mem:SI (match_operand 0 "const_call_insn_operand"))
+	 (match_operand 1))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 0, -1); }
+  [(set_attr "jal" "direct")
+   (set_attr "jal_macro" "no")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "")
+		   (call (match_operand 1 "")
+			 (match_operand 2 "")))
+	      (use (match_operand 3 ""))])]		;; next_arg_reg
+  ""
+{
+  mips_expand_call (MIPS_CALL_NORMAL, operands[0], XEXP (operands[1], 0),
+		    operands[2], operands[3], false);
+  DONE;
+})
+
+;; See comment for call_internal.
+(define_insn_and_split "call_value_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_value_split (operands[0], operands[1],
+					 operands[2]));
+  DONE;
+}
+  [(set_attr "jal" "indirect,direct")])
+
+(define_insn "call_value_split"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 1, 2); }
+  [(set_attr "jal" "indirect,direct")
+   (set_attr "jal_macro" "no")])
+
+;; See call_internal_direct.
+(define_insn_and_split "call_value_internal_direct"
+  [(set (match_operand 0 "register_operand")
+        (call (mem:SI (match_operand 1 "const_call_insn_operand"))
+              (match_operand 2)))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, -1); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_value_direct_split (operands[0], operands[1],
+						operands[2]));
+  DONE;
+}
+  [(set_attr "jal" "direct")])
+
+(define_insn "call_value_direct_split"
+  [(set (match_operand 0 "register_operand")
+        (call (mem:SI (match_operand 1 "const_call_insn_operand"))
+              (match_operand 2)))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 1, -1); }
+  [(set_attr "jal" "direct")
+   (set_attr "jal_macro" "no")])
+
+;; See comment for call_internal.
+(define_insn_and_split "call_value_multiple_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
+              (match_operand 2 "" "")))
+   (set (match_operand 3 "register_operand" "")
+	(call (mem:SI (match_dup 1))
+	      (match_dup 2)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_value_multiple_split (operands[0], operands[1],
+						  operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "jal" "indirect,direct")])
+
+(define_insn "call_value_multiple_split"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
+              (match_operand 2 "" "")))
+   (set (match_operand 3 "register_operand" "")
+	(call (mem:SI (match_dup 1))
+	      (match_dup 2)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 1, 2); }
+  [(set_attr "jal" "indirect,direct")
+   (set_attr "jal_macro" "no")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "")
+		    (const_int 0))
+	      (match_operand 1 "")
+	      (match_operand 2 "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      mips_emit_move (SET_DEST (set), SET_SRC (set));
+    }
+
+  emit_insn (gen_blockage ());
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	MISC.
+;;
+;;  ....................
+;;
+
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:QI 0 "address_operand" "ZD")
+	     (match_operand 1 "const_int_operand" "n")
+	     (match_operand 2 "const_int_operand" "n"))]
+  "ISA_HAS_PREFETCH && TARGET_EXPLICIT_RELOCS"
+{
+  if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A)
+    {
+      /* Loongson 2[ef] and Loongson 3a use load to $0 for prefetching.  */
+      if (TARGET_64BIT)
+        return "ld\t$0,%a0";
+      else
+        return "lw\t$0,%a0";
+    }
+  operands[1] = mips_prefetch_cookie (operands[1], operands[2]);
+  return "pref\t%1,%a0";
+}
+  [(set_attr "type" "prefetch")])
+
+(define_insn "*prefetch_indexed_<mode>"
+  [(prefetch (plus:P (match_operand:P 0 "register_operand" "d")
+		     (match_operand:P 1 "register_operand" "d"))
+	     (match_operand 2 "const_int_operand" "n")
+	     (match_operand 3 "const_int_operand" "n"))]
+  "ISA_HAS_PREFETCHX && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+{
+  operands[2] = mips_prefetch_cookie (operands[2], operands[3]);
+  return "prefx\t%2,%1(%0)";
+}
+  [(set_attr "type" "prefetchx")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "%(nop%)"
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")])
+
+;; Like nop, but commented out when outside a .set noreorder block.
+(define_insn "hazard_nop"
+  [(const_int 1)]
+  ""
+  {
+    if (mips_noreorder.nesting_level > 0)
+      return "nop";
+    else
+      return "#nop";
+  }
+  [(set_attr "type"	"nop")])
+
+;; MIPS4 Conditional move instructions.
+
+(define_insn "*mov<GPR:mode>_on_<MOVECC:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+	(if_then_else:GPR
+	 (match_operator 4 "equality_operator"
+		[(match_operand:MOVECC 1 "register_operand" "<MOVECC:reg>,<MOVECC:reg>")
+		 (const_int 0)])
+	 (match_operand:GPR 2 "reg_or_0_operand" "dJ,0")
+	 (match_operand:GPR 3 "reg_or_0_operand" "0,dJ")))]
+  "ISA_HAS_CONDMOVE"
+  "@
+    mov%T4\t%0,%z2,%1
+    mov%t4\t%0,%z3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*mov<GPR:mode>_on_<GPR2:mode>_ne"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+       (if_then_else:GPR
+        (match_operand:GPR2 1 "register_operand" "<GPR2:reg>,<GPR2:reg>")
+        (match_operand:GPR 2 "reg_or_0_operand" "dJ,0")
+        (match_operand:GPR 3 "reg_or_0_operand" "0,dJ")))]
+  "ISA_HAS_CONDMOVE"
+  "@
+    movn\t%0,%z2,%1
+    movz\t%0,%z3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*mov<SCALARF:mode>_on_<MOVECC:mode>"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f,f")
+	(if_then_else:SCALARF
+	 (match_operator 4 "equality_operator"
+		[(match_operand:MOVECC 1 "register_operand" "<MOVECC:reg>,<MOVECC:reg>")
+		 (const_int 0)])
+	 (match_operand:SCALARF 2 "register_operand" "f,0")
+	 (match_operand:SCALARF 3 "register_operand" "0,f")))]
+  "ISA_HAS_FP_CONDMOVE"
+  "@
+    mov%T4.<fmt>\t%0,%2,%1
+    mov%t4.<fmt>\t%0,%3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "<SCALARF:MODE>")])
+
+;; These are the main define_expand's used to make conditional moves.
+
+(define_expand "mov<mode>cc"
+  [(set (match_dup 4) (match_operand 1 "comparison_operator"))
+   (set (match_operand:GPR 0 "register_operand")
+	(if_then_else:GPR (match_dup 5)
+			  (match_operand:GPR 2 "reg_or_0_operand")
+			  (match_operand:GPR 3 "reg_or_0_operand")))]
+  "ISA_HAS_CONDMOVE"
+{
+  mips_expand_conditional_move (operands);
+  DONE;
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_dup 4) (match_operand 1 "comparison_operator"))
+   (set (match_operand:SCALARF 0 "register_operand")
+	(if_then_else:SCALARF (match_dup 5)
+			      (match_operand:SCALARF 2 "register_operand")
+			      (match_operand:SCALARF 3 "register_operand")))]
+  "ISA_HAS_FP_CONDMOVE"
+{
+  mips_expand_conditional_move (operands);
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	mips16 inline constant tables
+;;
+;;  ....................
+;;
+
+(define_insn "consttable_tls_reloc"
+  [(unspec_volatile [(match_operand 0 "tls_reloc_operand" "")
+		     (match_operand 1 "const_int_operand" "")]
+		    UNSPEC_CONSTTABLE_INT)]
+  "TARGET_MIPS16_PCREL_LOADS"
+  { return mips_output_tls_reloc_directive (&operands[0]); }
+  [(set (attr "length") (symbol_ref "INTVAL (operands[1])"))])
+
+(define_insn "consttable_int"
+  [(unspec_volatile [(match_operand 0 "consttable_operand" "")
+		     (match_operand 1 "const_int_operand" "")]
+		    UNSPEC_CONSTTABLE_INT)]
+  "TARGET_MIPS16"
+{
+  assemble_integer (mips_strip_unspec_address (operands[0]),
+		    INTVAL (operands[1]),
+		    BITS_PER_UNIT * INTVAL (operands[1]), 1);
+  return "";
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[1])"))])
+
+(define_insn "consttable_float"
+  [(unspec_volatile [(match_operand 0 "consttable_operand" "")]
+		    UNSPEC_CONSTTABLE_FLOAT)]
+  "TARGET_MIPS16"
+{
+  REAL_VALUE_TYPE d;
+
+  gcc_assert (GET_CODE (operands[0]) == CONST_DOUBLE);
+  REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+  assemble_real (d, GET_MODE (operands[0]),
+		 GET_MODE_BITSIZE (GET_MODE (operands[0])));
+  return "";
+}
+  [(set (attr "length")
+	(symbol_ref "GET_MODE_SIZE (GET_MODE (operands[0]))"))])
+
+(define_insn "align"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPEC_ALIGN)]
+  ""
+  ".align\t%0"
+  [(set (attr "length") (symbol_ref "(1 << INTVAL (operands[0])) - 1"))])
+
+(define_split
+  [(match_operand 0 "small_data_pattern")]
+  "reload_completed"
+  [(match_dup 0)]
+  { operands[0] = mips_rewrite_small_data (operands[0]); })
+
+;;
+;;  ....................
+;;
+;;	MIPS16e Save/Restore
+;;
+;;  ....................
+;;
+
+(define_insn "*mips16e_save_restore"
+  [(match_parallel 0 ""
+       [(set (match_operand:SI 1 "register_operand")
+	     (plus:SI (match_dup 1)
+		      (match_operand:SI 2 "const_int_operand")))])]
+  "operands[1] == stack_pointer_rtx
+   && mips16e_save_restore_pattern_p (operands[0], INTVAL (operands[2]), NULL)"
+  { return mips16e_output_save_restore (operands[0], INTVAL (operands[2])); }
+  [(set_attr "type" "arith")
+   (set_attr "extended_mips16" "yes")])
+
+;; Thread-Local Storage
+
+;; The TLS base pointer is accessed via "rdhwr $3, $29".  No current
+;; MIPS architecture defines this register, and no current
+;; implementation provides it; instead, any OS which supports TLS is
+;; expected to trap and emulate this instruction.  rdhwr is part of the
+;; MIPS 32r2 specification, but we use it on any architecture because
+;; we expect it to be emulated.  Use .set to force the assembler to
+;; accept it.
+;;
+;; We do not use a constraint to force the destination to be $3
+;; because $3 can appear explicitly as a function return value.
+;; If we leave the use of $3 implicit in the constraints until
+;; reload, we may end up making a $3 return value live across
+;; the instruction, leading to a spill failure when reloading it.
+(define_insn_and_split "tls_get_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(const_int 0)] UNSPEC_TLS_GET_TP))
+   (clobber (reg:P TLS_GET_TP_REGNUM))]
+  "HAVE_AS_TLS && !TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(set (reg:P TLS_GET_TP_REGNUM)
+	(unspec:P [(const_int 0)] UNSPEC_TLS_GET_TP))
+   (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
+  ""
+  [(set_attr "type" "unknown")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "2")])
+
+(define_insn "*tls_get_tp_<mode>_split"
+  [(set (reg:P TLS_GET_TP_REGNUM)
+	(unspec:P [(const_int 0)] UNSPEC_TLS_GET_TP))]
+  "HAVE_AS_TLS && !TARGET_MIPS16"
+  ".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop"
+  [(set_attr "type" "unknown")
+   ; Since rdhwr always generates a trap for now, putting it in a delay
+   ; slot would make the kernel's emulation of it much slower.
+   (set_attr "can_delay" "no")
+   (set_attr "mode" "<MODE>")])
+
+;; In MIPS16 mode, the TLS base pointer is accessed by a
+;; libgcc helper function __mips16_rdhwr(), as 'rdhwr' is not
+;; accessible in MIPS16.
+;;
+;; This is not represented as a call insn, to avoid the
+;; unnecesarry clobbering of caller-save registers by a
+;; function consisting only of: "rdhwr $3,$29; j $31; nop;"
+;;
+;; A $25 clobber is added to cater for a $25 load stub added by the
+;; linker to __mips16_rdhwr when the call is made from non-PIC code.
+
+(define_insn_and_split "tls_get_tp_mips16_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "call_insn_operand" "dS")]
+		  UNSPEC_TLS_GET_TP))
+   (clobber (reg:P TLS_GET_TP_REGNUM))
+   (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
+  "HAVE_AS_TLS && TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:P TLS_GET_TP_REGNUM)
+	  	   (unspec:P [(match_dup 1)] UNSPEC_TLS_GET_TP))
+	      (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
+	      (clobber (reg:P RETURN_ADDR_REGNUM))])
+   (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "insn_count" "4")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tls_get_tp_mips16_call_<mode>"
+  [(set (reg:P TLS_GET_TP_REGNUM)
+	(unspec:P [(match_operand:P 0 "call_insn_operand" "dS")]
+		  UNSPEC_TLS_GET_TP))
+   (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
+  "HAVE_AS_TLS && TARGET_MIPS16"
+  { return MIPS_CALL ("jal", operands, 0, -1); }
+  [(set_attr "type" "call")
+   (set_attr "insn_count" "3")
+   (set_attr "mode" "<MODE>")])
+
+;; Named pattern for expanding thread pointer reference.
+(define_expand "get_thread_pointer<mode>"
+  [(match_operand:P 0 "register_operand" "=d")]
+  "HAVE_AS_TLS"
+{
+  mips_expand_thread_pointer (operands[0]);
+  DONE;
+})
+
+;; __builtin_mips_get_fcsr: move the FCSR into operand 0.
+(define_expand "mips_get_fcsr"
+  [(set (match_operand:SI 0 "register_operand")
+  	(unspec_volatile [(const_int 0)] UNSPEC_GET_FCSR))]
+  "TARGET_HARD_FLOAT_ABI"
+{
+  if (TARGET_MIPS16)
+    {
+      mips16_expand_get_fcsr (operands[0]);
+      DONE;
+    }
+})
+
+(define_insn "*mips_get_fcsr"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+  	(unspec_volatile [(const_int 0)] UNSPEC_GET_FCSR))]
+  "TARGET_HARD_FLOAT"
+  "cfc1\t%0,$31")
+
+;; See tls_get_tp_mips16_<mode> for why this form is used.
+(define_insn "mips_get_fcsr_mips16_<mode>"
+  [(set (reg:SI GET_FCSR_REGNUM)
+	(unspec:SI [(match_operand:P 0 "call_insn_operand" "dS")]
+		   UNSPEC_GET_FCSR))
+   (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
+  "TARGET_HARD_FLOAT_ABI && TARGET_MIPS16"
+  { return MIPS_CALL ("jal", operands, 0, -1); }
+  [(set_attr "type" "call")
+   (set_attr "insn_count" "3")])
+
+;; __builtin_mips_set_fcsr: move operand 0 into the FCSR.
+(define_expand "mips_set_fcsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand")]
+  		    UNSPEC_SET_FCSR)]
+  "TARGET_HARD_FLOAT_ABI"
+{
+  if (TARGET_MIPS16)
+    {
+      mips16_expand_set_fcsr (operands[0]);
+      DONE;
+    }
+})
+
+(define_insn "*mips_set_fcsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")]
+  		    UNSPEC_SET_FCSR)]
+  "TARGET_HARD_FLOAT"
+  "ctc1\t%0,$31")
+
+;; See tls_get_tp_mips16_<mode> for why this form is used.
+(define_insn "mips_set_fcsr_mips16_<mode>"
+  [(unspec_volatile:SI [(match_operand:P 0 "call_insn_operand" "dS")
+  	                (reg:SI SET_FCSR_REGNUM)] UNSPEC_SET_FCSR)
+   (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
+  "TARGET_HARD_FLOAT_ABI && TARGET_MIPS16"
+  { return MIPS_CALL ("jal", operands, 0, -1); }
+  [(set_attr "type" "call")
+   (set_attr "insn_count" "3")])
+
+;; Synchronization instructions.
+
+(include "sync.md")
+
+; The MIPS Paired-Single Floating Point and MIPS-3D Instructions.
+
+(include "mips-ps-3d.md")
+
+; The MIPS DSP Instructions.
+
+(include "mips-dsp.md")
+
+; The MIPS DSP REV 2 Instructions.
+
+(include "mips-dspr2.md")
+
+; MIPS fixed-point instructions.
+(include "mips-fixed.md")
+
+; microMIPS patterns.
+(include "micromips.md")
+
+; ST-Microelectronics Loongson-2E/2F-specific patterns.
+(include "loongson.md")
+
+(define_c_enum "unspec" [
+  UNSPEC_ADDRESS_FIRST
+])
diff --git a/gcc-4.9/gcc/config/mips/mips.opt b/gcc-4.9/gcc/config/mips/mips.opt
new file mode 100644
index 000000000..6ee539837
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mips.opt
@@ -0,0 +1,404 @@
+; Options for the MIPS port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/mips/mips-opts.h
+
+EB
+Driver
+
+EL
+Driver
+
+mabi=
+Target RejectNegative Joined Enum(mips_abi) Var(mips_abi) Init(MIPS_ABI_DEFAULT)
+-mabi=ABI	Generate code that conforms to the given ABI
+
+Enum
+Name(mips_abi) Type(int)
+Known MIPS ABIs (for use with the -mabi= option):
+
+EnumValue
+Enum(mips_abi) String(32) Value(ABI_32)
+
+EnumValue
+Enum(mips_abi) String(o64) Value(ABI_O64)
+
+EnumValue
+Enum(mips_abi) String(n32) Value(ABI_N32)
+
+EnumValue
+Enum(mips_abi) String(64) Value(ABI_64)
+
+EnumValue
+Enum(mips_abi) String(eabi) Value(ABI_EABI)
+
+mabicalls
+Target Report Mask(ABICALLS)
+Generate code that can be used in SVR4-style dynamic objects
+
+mmad
+Target Report Var(TARGET_MAD)
+Use PMC-style 'mad' instructions
+
+mimadd
+Target Report Mask(IMADD)
+Use integer madd/msub instructions
+
+march=
+Target RejectNegative Joined Var(mips_arch_option) ToLower Enum(mips_arch_opt_value)
+-march=ISA	Generate code for the given ISA
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(mips_branch_cost)
+-mbranch-cost=COST	Set the cost of branches to roughly COST instructions
+
+mbranch-likely
+Target Report Mask(BRANCHLIKELY)
+Use Branch Likely instructions, overriding the architecture default
+
+mflip-mips16
+Target Report Var(TARGET_FLIP_MIPS16)
+Switch on/off MIPS16 ASE on alternating functions for compiler testing
+
+mcheck-zero-division
+Target Report Mask(CHECK_ZERO_DIV)
+Trap on integer divide by zero
+
+mcode-readable=
+Target RejectNegative Joined Enum(mips_code_readable_setting) Var(mips_code_readable) Init(CODE_READABLE_YES)
+-mcode-readable=SETTING	Specify when instructions are allowed to access code
+
+Enum
+Name(mips_code_readable_setting) Type(enum mips_code_readable_setting)
+Valid arguments to -mcode-readable=:
+
+EnumValue
+Enum(mips_code_readable_setting) String(yes) Value(CODE_READABLE_YES)
+
+EnumValue
+Enum(mips_code_readable_setting) String(pcrel) Value(CODE_READABLE_PCREL)
+
+EnumValue
+Enum(mips_code_readable_setting) String(no) Value(CODE_READABLE_NO)
+
+mdivide-breaks
+Target Report RejectNegative Mask(DIVIDE_BREAKS)
+Use branch-and-break sequences to check for integer divide by zero
+
+mdivide-traps
+Target Report RejectNegative InverseMask(DIVIDE_BREAKS, DIVIDE_TRAPS)
+Use trap instructions to check for integer divide by zero
+
+mdmx
+Target Report RejectNegative Var(TARGET_MDMX)
+Allow the use of MDMX instructions
+
+mdouble-float
+Target Report RejectNegative InverseMask(SINGLE_FLOAT, DOUBLE_FLOAT)
+Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations
+
+mdsp
+Target Report Var(TARGET_DSP)
+Use MIPS-DSP instructions
+
+mdspr2
+Target Report Var(TARGET_DSPR2)
+Use MIPS-DSP REV 2 instructions
+
+mdebug
+Target Var(TARGET_DEBUG_MODE) Undocumented
+
+mdebugd
+Target Var(TARGET_DEBUG_D_MODE) Undocumented
+
+meb
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Use big-endian byte order
+
+mel
+Target Report RejectNegative InverseMask(BIG_ENDIAN, LITTLE_ENDIAN)
+Use little-endian byte order
+
+membedded-data
+Target Report Var(TARGET_EMBEDDED_DATA)
+Use ROM instead of RAM
+
+meva
+Target Report Var(TARGET_EVA)
+Use Enhanced Virtual Addressing instructions
+
+mexplicit-relocs
+Target Report Mask(EXPLICIT_RELOCS)
+Use NewABI-style %reloc() assembly operators
+
+mextern-sdata
+Target Report Var(TARGET_EXTERN_SDATA) Init(1)
+Use -G for data that is not defined by the current object
+
+mfix-24k
+Target Report Var(TARGET_FIX_24K)
+Work around certain 24K errata
+
+mfix-r4000
+Target Report Mask(FIX_R4000)
+Work around certain R4000 errata
+
+mfix-r4400
+Target Report Mask(FIX_R4400)
+Work around certain R4400 errata
+
+mfix-rm7000
+Target Report Var(TARGET_FIX_RM7000)
+Work around certain RM7000 errata
+
+mfix-r10000
+Target Report Mask(FIX_R10000)
+Work around certain R10000 errata
+
+mfix-sb1
+Target Report Var(TARGET_FIX_SB1)
+Work around errata for early SB-1 revision 2 cores
+
+mfix-vr4120
+Target Report Var(TARGET_FIX_VR4120)
+Work around certain VR4120 errata
+
+mfix-vr4130
+Target Report Var(TARGET_FIX_VR4130)
+Work around VR4130 mflo/mfhi errata
+
+mfix4300
+Target Report Var(TARGET_4300_MUL_FIX)
+Work around an early 4300 hardware bug
+
+mfp-exceptions
+Target Report Var(TARGET_FP_EXCEPTIONS) Init(1)
+FP exceptions are enabled
+
+mfp32
+Target Report RejectNegative InverseMask(FLOAT64)
+Use 32-bit floating-point registers
+
+mfp64
+Target Report RejectNegative Mask(FLOAT64)
+Use 64-bit floating-point registers
+
+mflush-func=
+Target RejectNegative Joined Var(mips_cache_flush_func) Init(CACHE_FLUSH_FUNC)
+-mflush-func=FUNC	Use FUNC to flush the cache before calling stack trampolines
+
+mfused-madd
+Target Report Var(TARGET_FUSED_MADD) Init(1)
+Generate floating-point multiply-add instructions
+
+mabs=
+Target RejectNegative Joined Enum(mips_ieee_754_value) Var(mips_abs) Init(MIPS_IEEE_754_DEFAULT)
+-mabs=MODE	Select the IEEE 754 ABS/NEG instruction execution mode
+
+mnan=
+Target RejectNegative Joined Enum(mips_ieee_754_value) Var(mips_nan) Init(MIPS_IEEE_754_DEFAULT)
+-mnan=ENCODING	Select the IEEE 754 NaN data encoding
+
+Enum
+Name(mips_ieee_754_value) Type(int)
+Known MIPS IEEE 754 settings (for use with the -mabs= and -mnan= options):
+
+EnumValue
+Enum(mips_ieee_754_value) String(2008) Value(MIPS_IEEE_754_2008)
+
+EnumValue
+Enum(mips_ieee_754_value) String(legacy) Value(MIPS_IEEE_754_LEGACY)
+
+mgp32
+Target Report RejectNegative InverseMask(64BIT)
+Use 32-bit general registers
+
+mgp64
+Target Report RejectNegative Mask(64BIT)
+Use 64-bit general registers
+
+mgpopt
+Target Report Var(TARGET_GPOPT) Init(1)
+Use GP-relative addressing to access small data
+
+mplt
+Target Report Var(TARGET_PLT)
+When generating -mabicalls code, allow executables to use PLTs and copy relocations
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT_ABI, HARD_FLOAT_ABI)
+Allow the use of hardware floating-point ABI and instructions
+
+minterlink-compressed
+Target Report Var(TARGET_INTERLINK_COMPRESSED) Init(0)
+Generate code that is link-compatible with MIPS16 and microMIPS code.
+
+minterlink-mips16
+Target Report Var(TARGET_INTERLINK_COMPRESSED) Init(0)
+An alias for minterlink-compressed provided for backward-compatibility.
+
+mips
+Target RejectNegative Joined ToLower Enum(mips_mips_opt_value) Var(mips_isa_option)
+-mipsN	Generate code for ISA level N
+
+mips16
+Target Report RejectNegative Mask(MIPS16)
+Generate MIPS16 code
+
+mips3d
+Target Report RejectNegative Var(TARGET_MIPS3D)
+Use MIPS-3D instructions
+
+mllsc
+Target Report Mask(LLSC)
+Use ll, sc and sync instructions
+
+mlocal-sdata
+Target Report Var(TARGET_LOCAL_SDATA) Init(1)
+Use -G for object-local data
+
+mlong-calls
+Target Report Var(TARGET_LONG_CALLS)
+Use indirect calls
+
+mlong32
+Target Report RejectNegative InverseMask(LONG64, LONG32)
+Use a 32-bit long type
+
+mlong64
+Target Report RejectNegative Mask(LONG64)
+Use a 64-bit long type
+
+mmcount-ra-address
+Target Report Var(TARGET_MCOUNT_RA_ADDRESS)
+Pass the address of the ra save location to _mcount in $12
+
+mmemcpy
+Target Report Mask(MEMCPY)
+Don't optimize block moves
+
+mmicromips
+Target Report Mask(MICROMIPS)
+Use microMIPS instructions
+
+mmt
+Target Report Var(TARGET_MT)
+Allow the use of MT instructions
+
+mno-float
+Target Report RejectNegative Var(TARGET_NO_FLOAT) Condition(TARGET_SUPPORTS_NO_FLOAT)
+Prevent the use of all floating-point operations
+
+mmcu
+Target Report Var(TARGET_MCU)
+Use MCU instructions
+
+mno-flush-func
+Target RejectNegative
+Do not use a cache-flushing function before calling stack trampolines
+
+mno-mdmx
+Target Report RejectNegative Var(TARGET_MDMX, 0)
+Do not use MDMX instructions
+
+mno-mips16
+Target Report RejectNegative InverseMask(MIPS16)
+Generate normal-mode code
+
+mno-mips3d
+Target Report RejectNegative Var(TARGET_MIPS3D, 0)
+Do not use MIPS-3D instructions
+
+mpaired-single
+Target Report Mask(PAIRED_SINGLE_FLOAT)
+Use paired-single floating-point instructions
+
+mr10k-cache-barrier=
+Target Joined RejectNegative Enum(mips_r10k_cache_barrier_setting) Var(mips_r10k_cache_barrier) Init(R10K_CACHE_BARRIER_NONE)
+-mr10k-cache-barrier=SETTING	Specify when r10k cache barriers should be inserted
+
+Enum
+Name(mips_r10k_cache_barrier_setting) Type(enum mips_r10k_cache_barrier_setting)
+Valid arguments to -mr10k-cache-barrier=:
+
+EnumValue
+Enum(mips_r10k_cache_barrier_setting) String(load-store) Value(R10K_CACHE_BARRIER_LOAD_STORE)
+
+EnumValue
+Enum(mips_r10k_cache_barrier_setting) String(store) Value(R10K_CACHE_BARRIER_STORE)
+
+EnumValue
+Enum(mips_r10k_cache_barrier_setting) String(none) Value(R10K_CACHE_BARRIER_NONE)
+
+mrelax-pic-calls
+Target Report Mask(RELAX_PIC_CALLS)
+Try to allow the linker to turn PIC calls into direct calls
+
+mshared
+Target Report Var(TARGET_SHARED) Init(1)
+When generating -mabicalls code, make the code suitable for use in shared libraries
+
+msingle-float
+Target Report RejectNegative Mask(SINGLE_FLOAT)
+Restrict the use of hardware floating-point instructions to 32-bit operations
+
+msmartmips
+Target Report Mask(SMARTMIPS)
+Use SmartMIPS instructions
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT_ABI)
+Prevent the use of all hardware floating-point instructions
+
+msplit-addresses
+Target Report Mask(SPLIT_ADDRESSES)
+Optimize lui/addiu address loads
+
+msym32
+Target Report Var(TARGET_SYM32)
+Assume all symbols have 32-bit values
+
+msynci
+Target Report Mask(SYNCI)
+Use synci instruction to invalidate i-cache
+
+mtune=
+Target RejectNegative Joined Var(mips_tune_option) ToLower Enum(mips_arch_opt_value)
+-mtune=PROCESSOR	Optimize the output for PROCESSOR
+
+muninit-const-in-rodata
+Target Report Var(TARGET_UNINIT_CONST_IN_RODATA)
+Put uninitialized constants in ROM (needs -membedded-data)
+
+mvirt
+Target Report Var(TARGET_VIRT)
+Use Virtualization Application Specific instructions
+
+mvr4130-align
+Target Report Mask(VR4130_ALIGN)
+Perform VR4130-specific alignment optimizations
+
+mxgot
+Target Report Var(TARGET_XGOT)
+Lift restrictions on GOT size
+
+noasmopt
+Driver
diff --git a/gcc-4.9/gcc/config/mips/mti-elf.h b/gcc-4.9/gcc/config/mips/mti-elf.h
new file mode 100644
index 000000000..76d289eae
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mti-elf.h
@@ -0,0 +1,42 @@
+/* Target macros for mips*-mti-elf targets.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS						\
+  /* Make sure a -mips option is present.  This helps us to pick	\
+     the right multilib, and also makes the later specs easier		\
+     to write.  */							\
+  MIPS_ISA_LEVEL_SPEC,							\
+									\
+  /* Infer the default float setting from -march.  */			\
+  MIPS_ARCH_FLOAT_SPEC,							\
+									\
+  /* Infer the -msynci setting from -march if not explicitly set.  */	\
+  MIPS_ISA_SYNCI_SPEC,							\
+									\
+  /* If no ABI option is specified, infer one from the ISA level	\
+     or -mgp setting.  */						\
+  "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=n32}}",	\
+									\
+  /* Make sure that an endian option is always present.  This makes	\
+     things like LINK_SPEC easier to write.  */				\
+  "%{!EB:%{!EL:%(endian_spec)}}",					\
+									\
+  /* Configuration-independent MIPS rules.  */				\
+  BASE_DRIVER_SELF_SPECS
diff --git a/gcc-4.9/gcc/config/mips/mti-linux.h b/gcc-4.9/gcc/config/mips/mti-linux.h
new file mode 100644
index 000000000..db9896b40
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/mti-linux.h
@@ -0,0 +1,46 @@
+/* Target macros for mips*-mti-linux* targets.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This target is a multilib target, specify the sysroot paths.  */
+#undef SYSROOT_SUFFIX_SPEC
+#define SYSROOT_SUFFIX_SPEC \
+    "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}%{mfp64:/fp64}%{mnan=2008:/nan2008}"
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS						\
+  /* Make sure a -mips option is present.  This helps us to pick	\
+     the right multilib, and also makes the later specs easier		\
+     to write.  */							\
+  MIPS_ISA_LEVEL_SPEC,							\
+									\
+  /* Infer the default float setting from -march.  */			\
+  MIPS_ARCH_FLOAT_SPEC,							\
+									\
+  /* Infer the -msynci setting from -march if not explicitly set.  */	\
+  MIPS_ISA_SYNCI_SPEC,							\
+									\
+  /* If no ABI option is specified, infer one from the ISA level	\
+     or -mgp setting.  */						\
+  "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=n32}}",	\
+									\
+  /* Base SPECs.  */							\
+  BASE_DRIVER_SELF_SPECS						\
+									\
+  /* Use the standard linux specs for everything else.  */		\
+  LINUX64_DRIVER_SELF_SPECS
diff --git a/gcc-4.9/gcc/config/mips/n32-elf.h b/gcc-4.9/gcc/config/mips/n32-elf.h
new file mode 100644
index 000000000..cb0cbbf95
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/n32-elf.h
@@ -0,0 +1,35 @@
+/* Definitions of target machine for GNU compiler.
+   n32 for embedded systems.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use standard ELF-style local labels (not '$' as on early Irix).  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Use periods rather than dollar signs in special g++ assembler names.  */
+#define NO_DOLLAR_IN_LABEL
+
+/* Force n32 to use 64-bit long doubles.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#ifdef IN_LIBGCC2
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
diff --git a/gcc-4.9/gcc/config/mips/netbsd.h b/gcc-4.9/gcc/config/mips/netbsd.h
new file mode 100644
index 000000000..0313345fd
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/netbsd.h
@@ -0,0 +1,179 @@
+/* Definitions of target machine for GNU compiler, for MIPS NetBSD systems.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Define default target values.  */
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      NETBSD_OS_CPP_BUILTINS_ELF();			\
+      builtin_define ("__NO_LEADING_UNDERSCORES__");	\
+      builtin_define ("__GP_SUPPORT__");		\
+      if (TARGET_LONG64)				\
+	builtin_define ("__LONG64");			\
+							\
+      if (TARGET_ABICALLS)				\
+	builtin_define ("__ABICALLS__");		\
+							\
+      if (mips_abi == ABI_EABI)				\
+	builtin_define ("__mips_eabi");			\
+      else if (mips_abi == ABI_N32)			\
+	builtin_define ("__mips_n32");			\
+      else if (mips_abi == ABI_64)			\
+	builtin_define ("__mips_n64");			\
+      else if (mips_abi == ABI_O64)			\
+	builtin_define ("__mips_o64");			\
+    }							\
+  while (0)
+
+/* The generic MIPS TARGET_CPU_CPP_BUILTINS are incorrect for NetBSD.
+   Specifically, they define too many namespace-invasive macros.  Override
+   them here.  Note this is structured for easy comparison to the version
+   in mips.h.
+
+   FIXME: This probably isn't the best solution.  But in the absence
+   of something better, it will have to do, for now.  */
+
+#undef TARGET_CPU_CPP_BUILTINS
+#define TARGET_CPU_CPP_BUILTINS()				\
+  do								\
+    {								\
+      builtin_assert ("cpu=mips");				\
+      builtin_define ("__mips__");				\
+      builtin_define ("_mips");					\
+								\
+      /* No _R3000 or _R4000.  */				\
+      if (TARGET_64BIT)						\
+	builtin_define ("__mips64");				\
+								\
+      if (TARGET_FLOAT64)					\
+	builtin_define ("__mips_fpr=64");			\
+      else							\
+	builtin_define ("__mips_fpr=32");			\
+								\
+      if (TARGET_MIPS16)					\
+	builtin_define ("__mips16");				\
+								\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_ARCH", mips_arch_info);	\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_TUNE", mips_tune_info);	\
+								\
+      if (ISA_MIPS1)						\
+	builtin_define ("__mips=1");				\
+      else if (ISA_MIPS2)					\
+	builtin_define ("__mips=2");				\
+      else if (ISA_MIPS3)					\
+	builtin_define ("__mips=3");				\
+      else if (ISA_MIPS4)					\
+	builtin_define ("__mips=4");				\
+      else if (ISA_MIPS32)					\
+	{							\
+	  builtin_define ("__mips=32");				\
+	  builtin_define ("__mips_isa_rev=1");			\
+	}							\
+      else if (ISA_MIPS32R2)					\
+	{							\
+	  builtin_define ("__mips=32");				\
+	  builtin_define ("__mips_isa_rev=2");			\
+	}							\
+      else if (ISA_MIPS64)					\
+	{							\
+	  builtin_define ("__mips=64");				\
+	  builtin_define ("__mips_isa_rev=1");			\
+	}							\
+								\
+      if (TARGET_HARD_FLOAT)					\
+	builtin_define ("__mips_hard_float");			\
+      else if (TARGET_SOFT_FLOAT)				\
+	builtin_define ("__mips_soft_float");			\
+								\
+      if (TARGET_SINGLE_FLOAT)					\
+	builtin_define ("__mips_single_float");			\
+								\
+      if (TARGET_BIG_ENDIAN)					\
+	builtin_define ("__MIPSEB__");				\
+      else							\
+	builtin_define ("__MIPSEL__");				\
+								\
+      /* No language dialect defines.  */			\
+								\
+      /* ABIs handled in TARGET_OS_CPP_BUILTINS.  */		\
+    }								\
+  while (0)
+
+
+/* Extra specs we need.  */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "netbsd_cpp_spec",		NETBSD_CPP_SPEC },			\
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF },			\
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },
+
+/* Provide a SUBTARGET_CPP_SPEC appropriate for NetBSD.  */
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%(netbsd_cpp_spec)"
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/mips target.
+   This is a copy of LINK_SPEC from <netbsd-elf.h> tweaked for
+   the MIPS target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{EL:-m elf32lmip} \
+   %{EB:-m elf32bmip} \
+   %(endian_spec) \
+   %{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32} %{mips32r2} %{mips64} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC \
+  "%{!mno-abicalls: \
+     %{!fno-PIC:%{!fno-pic:-KPIC}}}"
+
+
+/* -G is incompatible with -KPIC which is the default, so only allow objects
+   in the small data section if the user explicitly asks for it.  */
+
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+
+#undef ASM_FINAL_SPEC
+#undef SET_ASM_OP
+
+
+/* NetBSD hasn't historically provided _flush_cache(), but rather
+   _cacheflush(), which takes the same arguments as the former.  */
+#undef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_cacheflush"
+
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
diff --git a/gcc-4.9/gcc/config/mips/octeon.md b/gcc-4.9/gcc/config/mips/octeon.md
new file mode 100644
index 000000000..1d6251c40
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/octeon.md
@@ -0,0 +1,136 @@
+;;  Octeon pipeline description.
+;;  Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;   Copyright (C) 2004, 2005, 2006 Cavium Networks.
+
+
+;; Octeon is a dual-issue processor that can issue all instructions on
+;; pipe0 and a subset on pipe1.
+
+(define_automaton "octeon_main, octeon_mult")
+
+(define_cpu_unit "octeon_pipe0" "octeon_main")
+(define_cpu_unit "octeon_pipe1" "octeon_main")
+(define_cpu_unit "octeon_mult" "octeon_mult")
+
+(define_insn_reservation "octeon_arith" 1
+  (and (eq_attr "cpu" "octeon,octeon2")
+       (eq_attr "type" "arith,const,logical,move,shift,signext,slt,nop"))
+  "octeon_pipe0 | octeon_pipe1")
+
+(define_insn_reservation "octeon_condmove" 2
+  (and (eq_attr "cpu" "octeon,octeon2")
+       (eq_attr "type" "condmove"))
+  "octeon_pipe0 | octeon_pipe1")
+
+(define_insn_reservation "octeon_load_o1" 2
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "load,prefetch,mtc,mfc"))
+  "octeon_pipe0")
+
+(define_insn_reservation "octeon_load_o2" 3
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "load,prefetch"))
+  "octeon_pipe0")
+
+;; ??? memory-related cop0 reads are pipe0 with 3-cycle latency.
+;; Front-end-related ones are 1-cycle on pipe1.  Assume front-end for now.
+(define_insn_reservation "octeon_cop_o2" 1
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "mtc,mfc"))
+  "octeon_pipe1")
+
+(define_insn_reservation "octeon_store" 1
+  (and (eq_attr "cpu" "octeon,octeon2")
+       (eq_attr "type" "store"))
+  "octeon_pipe0")
+
+(define_insn_reservation "octeon_brj_o1" 1
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "branch,jump,call,trap"))
+  "octeon_pipe0")
+
+(define_insn_reservation "octeon_brj_o2" 2
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "branch,jump,call,trap"))
+  "octeon_pipe1")
+
+(define_insn_reservation "octeon_imul3_o1" 5
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "imul3,pop,clz"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult")
+
+(define_insn_reservation "octeon_imul3_o2" 6
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "imul3,pop,clz"))
+  "octeon_pipe1 + octeon_mult")
+
+(define_insn_reservation "octeon_imul_o1" 2
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "imul,mthi,mtlo"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult, octeon_mult")
+
+(define_insn_reservation "octeon_imul_o2" 1
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "imul,mthi,mtlo"))
+  "octeon_pipe1 + octeon_mult")
+
+(define_insn_reservation "octeon_mfhilo_o1" 5
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "mfhi,mflo"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult")
+
+(define_insn_reservation "octeon_mfhilo_o2" 6
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "mfhi,mflo"))
+  "octeon_pipe1 + octeon_mult")
+
+(define_insn_reservation "octeon_imadd_o1" 4
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "imadd"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult, octeon_mult*3")
+
+(define_insn_reservation "octeon_imadd_o2" 1
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "type" "imadd"))
+  "octeon_pipe1 + octeon_mult")
+
+(define_insn_reservation "octeon_idiv_o1" 72
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "idiv"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult, octeon_mult*71")
+
+(define_insn_reservation "octeon_idiv_o2_si" 18
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "mode" "SI")
+       (eq_attr "type" "idiv"))
+  "octeon_pipe1 + octeon_mult, octeon_mult*17")
+
+(define_insn_reservation "octeon_idiv_o2_di" 35
+  (and (eq_attr "cpu" "octeon2")
+       (eq_attr "mode" "DI")
+       (eq_attr "type" "idiv"))
+  "octeon_pipe1 + octeon_mult, octeon_mult*34")
+
+;; Assume both pipes are needed for unknown and multiple-instruction
+;; patterns.
+
+(define_insn_reservation "octeon_unknown" 1
+  (and (eq_attr "cpu" "octeon,octeon2")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "octeon_pipe0 + octeon_pipe1")
diff --git a/gcc-4.9/gcc/config/mips/predicates.md b/gcc-4.9/gcc/config/mips/predicates.md
new file mode 100644
index 000000000..8ac8e0b6a
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/predicates.md
@@ -0,0 +1,494 @@
+;; Predicate definitions for MIPS.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "const_uns_arith_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND_UNSIGNED (INTVAL (op))")))
+
+(define_predicate "uns_arith_operand"
+  (ior (match_operand 0 "const_uns_arith_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_arith_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (INTVAL (op))")))
+
+(define_predicate "arith_operand"
+  (ior (match_operand 0 "const_arith_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_uimm6_operand"
+  (and (match_code "const_int")
+       (match_test "UIMM6_OPERAND (INTVAL (op))")))
+
+(define_predicate "const_imm10_operand"
+  (and (match_code "const_int")
+       (match_test "IMM10_OPERAND (INTVAL (op))")))
+
+(define_predicate "reg_imm10_operand"
+  (ior (match_operand 0 "const_imm10_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "sle_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (INTVAL (op) + 1)")))
+
+(define_predicate "sleu_operand"
+  (and (match_operand 0 "sle_operand")
+       (match_test "INTVAL (op) + 1 != 0")))
+
+(define_predicate "const_0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_predicate "reg_or_0_operand"
+  (ior (and (match_operand 0 "const_0_operand")
+	    (not (match_test "TARGET_MIPS16")))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_1_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST1_RTX (GET_MODE (op))")))
+
+(define_predicate "reg_or_1_operand"
+  (ior (match_operand 0 "const_1_operand")
+       (match_operand 0 "register_operand")))
+
+;; This is used for indexing into vectors, and hence only accepts const_int.
+(define_predicate "const_0_or_1_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
+
+(define_predicate "const_2_or_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+
+(define_predicate "const_0_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+
+(define_predicate "qi_mask_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) == 0xff")))
+
+(define_predicate "hi_mask_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) == 0xffff")))
+
+(define_predicate "si_mask_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) == 0xffffffff")))
+
+(define_predicate "and_load_operand"
+  (ior (match_operand 0 "qi_mask_operand")
+       (match_operand 0 "hi_mask_operand")
+       (match_operand 0 "si_mask_operand")))
+
+(define_predicate "low_bitmask_operand"
+  (and (match_test "ISA_HAS_EXT_INS")
+       (match_code "const_int")
+       (match_test "low_bitmask_len (mode, INTVAL (op)) > 16")))
+
+(define_predicate "and_reg_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (not (match_test "TARGET_MIPS16"))
+	    (match_operand 0 "const_uns_arith_operand"))
+       (match_operand 0 "low_bitmask_operand")
+       (match_operand 0 "si_mask_operand")))
+
+(define_predicate "and_operand"
+  (ior (match_operand 0 "and_load_operand")
+       (match_operand 0 "and_reg_operand")))
+
+(define_predicate "d_operand"
+  (and (match_code "reg")
+       (match_test "TARGET_MIPS16
+		    ? M16_REG_P (REGNO (op))
+		    : GP_REG_P (REGNO (op))")))
+
+(define_predicate "lwsp_swsp_operand"
+  (and (match_code "mem")
+       (match_test "lwsp_swsp_address_p (XEXP (op, 0), mode)")))
+
+(define_predicate "lw16_sw16_operand"
+  (and (match_code "mem")
+       (match_test "m16_based_address_p (XEXP (op, 0), mode, uw4_operand)")))
+
+(define_predicate "lhu16_sh16_operand"
+  (and (match_code "mem")
+       (match_test "m16_based_address_p (XEXP (op, 0), mode, uh4_operand)")))
+
+(define_predicate "lbu16_operand"
+  (and (match_code "mem")
+       (match_test "m16_based_address_p (XEXP (op, 0), mode, db4_operand)")))
+
+(define_predicate "sb16_operand"
+  (and (match_code "mem")
+       (match_test "m16_based_address_p (XEXP (op, 0), mode, ub4_operand)")))
+
+(define_predicate "db4_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op) + 1, 4, 0)")))
+
+(define_predicate "db7_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op) + 1, 7, 0)")))
+
+(define_predicate "db8_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op) + 1, 8, 0)")))
+
+(define_predicate "ib3_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op) - 1, 3, 0)")))
+
+(define_predicate "sb4_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 4, 0)")))
+
+(define_predicate "sb5_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 5, 0)")))
+
+(define_predicate "sb8_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 8, 0)")))
+
+(define_predicate "sd8_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 8, 3)")))
+
+(define_predicate "ub4_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 4, 0)")))
+
+(define_predicate "ub8_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 8, 0)")))
+
+(define_predicate "uh4_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 4, 1)")))
+
+(define_predicate "uw4_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 4, 2)")))
+
+(define_predicate "uw5_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 5, 2)")))
+
+(define_predicate "uw6_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 6, 2)")))
+
+(define_predicate "uw8_operand"
+  (and (match_code "const_int")
+       (match_test "mips_unsigned_immediate_p (INTVAL (op), 8, 2)")))
+
+(define_predicate "addiur2_operand"
+  (and (match_code "const_int")
+	(ior (match_test "INTVAL (op) == -1")
+	     (match_test "INTVAL (op) == 1")
+	     (match_test "INTVAL (op) == 4")
+	     (match_test "INTVAL (op) == 8")
+	     (match_test "INTVAL (op) == 12")
+	     (match_test "INTVAL (op) == 16")
+	     (match_test "INTVAL (op) == 20")
+	     (match_test "INTVAL (op) == 24"))))
+
+(define_predicate "addiusp_operand"
+  (and (match_code "const_int")
+       (ior (match_test "(IN_RANGE (INTVAL (op), 2, 257))")
+	    (match_test "(IN_RANGE (INTVAL (op), -258, -3))"))))
+
+(define_predicate "andi16_operand"
+  (and (match_code "const_int")
+	(ior (match_test "IN_RANGE (INTVAL (op), 1, 4)")
+	     (match_test "IN_RANGE (INTVAL (op), 7, 8)")
+	     (match_test "IN_RANGE (INTVAL (op), 15, 16)")
+	     (match_test "IN_RANGE (INTVAL (op), 31, 32)")
+	     (match_test "IN_RANGE (INTVAL (op), 63, 64)")
+	     (match_test "INTVAL (op) == 255")
+	     (match_test "INTVAL (op) == 32768")
+	     (match_test "INTVAL (op) == 65535"))))
+
+(define_predicate "movep_src_register"
+  (and (match_code "reg")
+       (ior (match_test ("IN_RANGE (REGNO (op), 2, 3)"))
+	    (match_test ("IN_RANGE (REGNO (op), 16, 20)")))))
+
+(define_predicate "movep_src_operand"
+  (ior (match_operand 0 "const_0_operand")
+       (match_operand 0 "movep_src_register")))
+
+(define_predicate "lo_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == LO_REGNUM")))
+
+(define_predicate "hilo_operand"
+  (and (match_code "reg")
+       (match_test "MD_REG_P (REGNO (op))")))
+
+(define_predicate "fcc_reload_operand"
+  (and (match_code "reg,subreg")
+       (match_test "ST_REG_P (true_regnum (op))")))
+
+(define_predicate "muldiv_target_operand"
+  (if_then_else (match_test "TARGET_MIPS16")
+		(match_operand 0 "hilo_operand")
+		(match_operand 0 "register_operand")))
+
+(define_predicate "const_call_insn_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type symbol_type;
+
+  if (!mips_symbolic_constant_p (op, SYMBOL_CONTEXT_CALL, &symbol_type))
+    return false;
+
+  switch (symbol_type)
+    {
+    case SYMBOL_ABSOLUTE:
+      /* We can only use direct calls if we're sure that the target
+	 function does not need $25 to be valid on entry.  */
+      if (mips_use_pic_fn_addr_reg_p (op))
+	return false;
+
+      /* If -mlong-calls or if this function has an explicit long_call
+	 attribute, we must use register addressing.  The
+	 SYMBOL_FLAG_LONG_CALL bit is set by mips_encode_section_info.  */
+      return !(GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_LONG_CALL_P (op));
+
+    case SYMBOL_GOT_DISP:
+      /* Without explicit relocs, there is no special syntax for
+	 loading the address of a call destination into a register.
+	 Using "la $25,foo; jal $25" would prevent the lazy binding
+	 of "foo", so keep the address of global symbols with the
+	 jal macro.  */
+      return !TARGET_EXPLICIT_RELOCS;
+
+    default:
+      return false;
+    }
+})
+
+(define_predicate "call_insn_operand"
+  (ior (match_operand 0 "const_call_insn_operand")
+       (match_operand 0 "register_operand")))
+
+;; A legitimate CONST_INT operand that takes more than one instruction
+;; to load.
+(define_predicate "splittable_const_int_operand"
+  (match_code "const_int")
+{
+  /* When generating mips16 code, TARGET_LEGITIMATE_CONSTANT_P rejects
+     CONST_INTs that can't be loaded using simple insns.  */
+  if (TARGET_MIPS16)
+    return false;
+
+  /* Don't handle multi-word moves this way; we don't want to introduce
+     the individual word-mode moves until after reload.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return false;
+
+  /* Otherwise check whether the constant can be loaded in a single
+     instruction.  */
+  return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op);
+})
+
+(define_predicate "move_operand"
+  ;; Allow HI and LO to be used as the source of a MIPS16 move.
+  (ior (match_operand 0 "general_operand")
+       (match_operand 0 "hilo_operand"))
+{
+  enum mips_symbol_type symbol_type;
+
+  /* The thinking here is as follows:
+
+     (1) The move expanders should split complex load sequences into
+	 individual instructions.  Those individual instructions can
+	 then be optimized by all rtl passes.
+
+     (2) The target of pre-reload load sequences should not be used
+	 to store temporary results.  If the target register is only
+	 assigned one value, reload can rematerialize that value
+	 on demand, rather than spill it to the stack.
+
+     (3) If we allowed pre-reload passes like combine and cse to recreate
+	 complex load sequences, we would want to be able to split the
+	 sequences before reload as well, so that the pre-reload scheduler
+	 can see the individual instructions.  This falls foul of (2);
+	 the splitter would be forced to reuse the target register for
+	 intermediate results.
+
+     (4) We want to define complex load splitters for combine.  These
+	 splitters can request a temporary scratch register, which avoids
+	 the problem in (2).  They allow things like:
+
+	      (set (reg T1) (high SYM))
+	      (set (reg T2) (low (reg T1) SYM))
+	      (set (reg X) (plus (reg T2) (const_int OFFSET)))
+
+	 to be combined into:
+
+	      (set (reg T3) (high SYM+OFFSET))
+	      (set (reg X) (lo_sum (reg T3) SYM+OFFSET))
+
+	 if T2 is only used this once.  */
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      return !splittable_const_int_operand (op, mode);
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (CONST_GP_P (op))
+	return true;
+      return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+	      && !mips_split_p[symbol_type]);
+
+    case HIGH:
+      op = XEXP (op, 0);
+      return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+	      && !mips_split_hi_p[symbol_type]);
+
+    default:
+      return true;
+    }
+})
+
+(define_predicate "cprestore_save_slot_operand"
+  (and (match_code "mem")
+       (match_test "mips_cprestore_address_p (XEXP (op, 0), false)")))
+
+(define_predicate "cprestore_load_slot_operand"
+  (and (match_code "mem")
+       (match_test "mips_cprestore_address_p (XEXP (op, 0), true)")))
+
+(define_predicate "consttable_operand"
+  (match_test "CONSTANT_P (op)"))
+
+(define_predicate "symbolic_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type);
+})
+
+(define_predicate "absolute_symbolic_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_ABSOLUTE);
+})
+
+(define_predicate "symbolic_operand_with_high"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && mips_hi_relocs[(int) type]);
+})
+
+(define_predicate "force_to_mem_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type symbol_type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+	  && mips_use_pcrel_pool_p[(int) symbol_type]);
+})
+
+(define_predicate "got_disp_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_GOT_DISP);
+})
+
+(define_predicate "got_page_ofst_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_GOT_PAGE_OFST);
+})
+
+(define_predicate "tls_reloc_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && (type == SYMBOL_DTPREL || type == SYMBOL_TPREL));
+})
+
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+(define_predicate "stack_operand"
+  (and (match_code "mem")
+       (match_test "mips_stack_address_p (XEXP (op, 0), GET_MODE (op))")))
+
+(define_predicate "macc_msac_operand"
+  (ior (and (match_code "plus") (match_test "ISA_HAS_MACC"))
+       (and (match_code "minus") (match_test "ISA_HAS_MSAC")))
+{
+  rtx mult = XEXP (op, GET_CODE (op) == PLUS ? 0 : 1);
+  rtx accum = XEXP (op, GET_CODE (op) == PLUS ? 1 : 0);
+  return (GET_CODE (mult) == MULT
+	  && REG_P (XEXP (mult, 0))
+	  && REG_P (XEXP (mult, 1))
+	  && REG_P (accum));
+})
+
+
+(define_predicate "equality_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "extend_operator"
+  (match_code "zero_extend,sign_extend"))
+
+(define_predicate "trap_comparison_operator"
+  (match_code "eq,ne,lt,ltu,ge,geu"))
+
+(define_predicate "order_operator"
+  (match_code "lt,ltu,le,leu,ge,geu,gt,gtu"))
+
+;; For NE, cstore uses sltu instructions in which the first operand is $0.
+;; This isn't possible in mips16 code.
+
+(define_predicate "mips_cstore_operator"
+  (ior (match_code "eq,gt,gtu,ge,geu,lt,ltu,le,leu")
+       (and (match_code "ne") (not (match_test "TARGET_MIPS16")))))
+
+(define_predicate "small_data_pattern"
+  (and (match_code "set,parallel,unspec,unspec_volatile,prefetch")
+       (match_test "mips_small_data_pattern_p (op)")))
+
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+;; Return 1 if the operand is in non-volatile memory.
+(define_predicate "non_volatile_mem_operand"
+  (and (match_operand 0 "memory_operand")
+       (not (match_test "MEM_VOLATILE_P (op)"))))
diff --git a/gcc-4.9/gcc/config/mips/r3900.h b/gcc-4.9/gcc/config/mips/r3900.h
new file mode 100644
index 000000000..d5b191264
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/r3900.h
@@ -0,0 +1,39 @@
+/* Definitions of MIPS sub target machine for GNU compiler.
+   Toshiba r3900.  You should include mips.h after this.
+
+   Copyright (C) 1989-2014 Free Software Foundation, Inc.
+   Contributed by Gavin Koch (gavin@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef MIPS_CPU_STRING_DEFAULT
+#define MIPS_CPU_STRING_DEFAULT "r3900"
+#define MIPS_ISA_DEFAULT 1
+
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { MULTILIB_ENDIAN_DEFAULT, "msoft-float" }
+
+/* We use the MIPS EABI by default.  */
+#undef MIPS_ABI_DEFAULT
+#define MIPS_ABI_DEFAULT ABI_EABI
+
+/* By default (if not mips-something-else) produce code for the r3900 */
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC "\
+%{mhard-float:%e-mhard-float not supported} \
+%{msingle-float:%{msoft-float: \
+  %e-msingle-float and -msoft-float cannot both be specified}}"
diff --git a/gcc-4.9/gcc/config/mips/rtems.h b/gcc-4.9/gcc/config/mips/rtems.h
new file mode 100644
index 000000000..09b46b875
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/rtems.h
@@ -0,0 +1,34 @@
+/* Definitions for rtems targeting a MIPS using ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()	\
+do {					\
+  builtin_define ("__rtems__");		\
+  builtin_define ("__USE_INIT_FINI__");	\
+  builtin_assert ("system=rtems");	\
+} while (0)
+
+/* No sdata.
+ * The RTEMS BSPs expect -G0
+ */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
diff --git a/gcc-4.9/gcc/config/mips/sb1.md b/gcc-4.9/gcc/config/mips/sb1.md
new file mode 100644
index 000000000..311300e50
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/sb1.md
@@ -0,0 +1,573 @@
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; DFA-based pipeline description for Broadcom SB-1
+;;
+
+;; The Broadcom SB-1 core is 4-way superscalar, in-order.  It has 2 load/store
+;; pipes (one of which can support some ALU operations), 2 alu pipes, 2 FP
+;; pipes, and 1 MDMX pipes.  It can issue 2 ls insns and 2 exe/fpu/mdmx insns
+;; each cycle.
+
+;; We model the 4-way issue by ordering unit choices.  The possible choices are
+;; {ex1,fp1}|{ex0,fp0}|ls1|ls0.  Instructions issue to the first eligible unit
+;; in the list in most cases.  Non-indexed load/stores issue to ls0 first.
+;; simple alu operations issue to ls1 if it is still available, and their
+;; operands are ready (no co-issue with loads), otherwise to the first
+;; available ex unit.
+
+;; When exceptions are enabled, can only issue FP insns to fp1.  This is
+;; to ensure that instructions complete in order.  The -mfp-exceptions option
+;; can be used to specify whether the system has FP exceptions enabled or not.
+
+;; In 32-bit mode, dependent FP can't co-issue with load, and only one FP exe
+;; insn can issue per cycle (fp1).
+
+;; The A1 MDMX pipe is separate from the FP pipes, but uses the same register
+;; file.  As a result, once an MDMX insn is issued, no FP insns can be issued
+;; for 3 cycles.  When an FP insn is issued, no MDMX insn can be issued for
+;; 5 cycles.  This is currently not handled because there is no MDMX insn
+;; support as yet.
+
+;;
+;; We use two automata.  sb1_cpu_div is for the integer divides, which are
+;; not pipelined.  sb1_cpu is for everything else.
+;;
+(define_automaton "sb1_cpu, sb1_cpu_div")
+
+;; Load/store function units.
+(define_cpu_unit "sb1_ls0" "sb1_cpu")
+(define_cpu_unit "sb1_ls1" "sb1_cpu")
+
+;; CPU function units.
+(define_cpu_unit "sb1_ex0" "sb1_cpu")
+(define_cpu_unit "sb1_ex1" "sb1_cpu")
+
+;; The divide unit is not pipelined, and blocks hi/lo reads and writes.
+(define_cpu_unit "sb1_div" "sb1_cpu_div")
+;; DMULT block any multiply from issuing in the next cycle.
+(define_cpu_unit "sb1_mul" "sb1_cpu")
+
+;; Floating-point units.
+(define_cpu_unit "sb1_fp0" "sb1_cpu")
+(define_cpu_unit "sb1_fp1" "sb1_cpu")
+
+;; Can only issue to one of the ex and fp pipes at a time.
+(exclusion_set "sb1_ex0" "sb1_fp0")
+(exclusion_set "sb1_ex1" "sb1_fp1")
+
+;; Define an SB-1 specific attribute to simplify some FP descriptions.
+;; We can use 2 FP pipes only if we have 64-bit FP code, and exceptions are
+;; disabled.
+
+(define_attr "sb1_fp_pipes" "one,two"
+  (cond [(and (match_test "TARGET_FLOAT64")
+	      (not (match_test "TARGET_FP_EXCEPTIONS")))
+	 (const_string "two")]
+	(const_string "one")))
+
+;; Define reservations for common combinations.
+
+;; For long cycle operations, the FPU has a 4 cycle pipeline that repeats,
+;; effectively re-issuing the operation every 4 cycles.  This means that we
+;; can have at most 4 long-cycle operations per pipe.
+
+;; ??? The fdiv operations should be e.g.
+;; sb1_fp1_4cycles*7" | "sb1_fp0_4cycle*7
+;; but the DFA is too large when we do that.  Perhaps have to use scheduler
+;; hooks here.
+
+;; ??? Try limiting scheduler to 2 long latency operations, and see if this
+;; results in a usable DFA, and whether it helps code performance.
+
+;;(define_reservation "sb1_fp0_4cycles" "sb1_fp0, nothing*3")
+;;(define_reservation "sb1_fp1_4cycles" "sb1_fp1, nothing*3")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+(define_insn_reservation "ir_sb1_unknown" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "unknown,multi,atomic,syncloop"))
+  "sb1_ls0+sb1_ls1+sb1_ex0+sb1_ex1+sb1_fp0+sb1_fp1")
+
+;; predicted taken branch causes 2 cycle ifetch bubble.  predicted not
+;; taken branch causes 0 cycle ifetch bubble.  mispredicted branch causes 8
+;; cycle ifetch bubble.  We assume all branches predicted not taken.
+
+;; ??? This assumption that branches are predicated not taken should be
+;; investigated.  Maybe using 2 here will give better results.
+
+(define_insn_reservation "ir_sb1_branch" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "branch,jump,call"))
+  "sb1_ex0")
+
+;; ??? This is 1 cycle for ldl/ldr to ldl/ldr when they use the same data
+;; register as destination.
+
+;; ??? SB-1 can co-issue a load with a dependent arith insn if it executes on
+;; an EX unit.  Can not co-issue if the dependent insn executes on an LS unit.
+;; SB-1A can always co-issue here.
+
+;; A load normally has a latency of zero cycles.  In some cases, dependent
+;; insns can be issued in the same cycle.  However, a value of 1 gives
+;; better performance in empirical testing.
+
+(define_insn_reservation "ir_sb1_load" 1
+  (and (eq_attr "cpu" "sb1")
+       (eq_attr "type" "load,prefetch"))
+  "sb1_ls0 | sb1_ls1")
+
+(define_insn_reservation "ir_sb1a_load" 0
+  (and (eq_attr "cpu" "sb1a")
+       (eq_attr "type" "load,prefetch"))
+  "sb1_ls0 | sb1_ls1")
+
+;; Can not co-issue fpload with fp exe when in 32-bit mode.
+
+(define_insn_reservation "ir_sb1_fpload" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpload")
+	    (match_test "TARGET_FLOAT64")))
+  "sb1_ls0 | sb1_ls1")
+
+(define_insn_reservation "ir_sb1_fpload_32bitfp" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpload")
+	    (not (match_test "TARGET_FLOAT64"))))
+  "sb1_ls0 | sb1_ls1")
+
+;; Indexed loads can only execute on LS1 pipe.
+
+(define_insn_reservation "ir_sb1_fpidxload" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpidxload")
+	    (match_test "TARGET_FLOAT64")))
+  "sb1_ls1")
+
+(define_insn_reservation "ir_sb1_fpidxload_32bitfp" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpidxload")
+	    (not (match_test "TARGET_FLOAT64"))))
+  "sb1_ls1")
+
+;; prefx can only execute on the ls1 pipe.
+
+(define_insn_reservation "ir_sb1_prefetchx" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "prefetchx"))
+  "sb1_ls1")
+
+;; ??? There is a 4.5 cycle latency if a store is followed by a load, and
+;; there is a RAW dependency.
+
+(define_insn_reservation "ir_sb1_store" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "store"))
+  "sb1_ls0+sb1_ex1 | sb1_ls0+sb1_ex0 | sb1_ls1+sb1_ex1 | sb1_ls1+sb1_ex0")
+
+(define_insn_reservation "ir_sb1_fpstore" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "fpstore"))
+  "sb1_ls0+sb1_fp1 | sb1_ls0+sb1_fp0 | sb1_ls1+sb1_fp1 | sb1_ls1+sb1_fp0")
+
+;; Indexed stores can only execute on LS1 pipe.
+
+(define_insn_reservation "ir_sb1_fpidxstore" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "fpidxstore"))
+  "sb1_ls1+sb1_fp1 | sb1_ls1+sb1_fp0")
+
+;; Load latencies are 3 cycles for one load to another load or store (address
+;; only).  This is 0 cycles for one load to a store using it as the data
+;; written.
+
+;; This assumes that if a load is dependent on a previous insn, then it must
+;; be an address dependence.
+
+(define_bypass 3
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp"
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
+
+(define_bypass 3
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp"
+  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
+  "mips_store_data_bypass_p")
+
+;; On SB-1, simple alu instructions can execute on the LS1 unit.
+
+;; ??? A simple alu insn issued on an LS unit has 0 cycle latency to an EX
+;; insn, to a store (for data), and to an xfer insn.  It has 1 cycle latency to
+;; another LS insn (excluding store data).  A simple alu insn issued on an EX
+;; unit has a latency of 5 cycles when the results goes to a LS unit (excluding
+;; store data), otherwise a latency of 1 cycle.
+
+;; ??? We cannot handle latencies properly for simple alu instructions
+;; within the DFA pipeline model.  Latencies can be defined only from one
+;; insn reservation to another.  We can't make them depend on which function
+;; unit was used.  This isn't a DFA flaw.  There is a conflict here, as we
+;; need to know the latency before we can determine which unit will be
+;; available, but we need to know which unit it is issued to before we can
+;; compute the latency.  Perhaps this can be handled via scheduler hooks.
+;; This needs to be investigated.
+
+;; ??? Optimal scheduling taking the LS units into account seems to require
+;; a pre-scheduling pass.  We need to determine which instructions feed results
+;; into store/load addresses, and thus benefit most from being issued to the
+;; LS unit.  Also, we need to prune the list to ensure we don't overschedule
+;; insns to the LS unit, and that we don't conflict with insns that need LS1
+;; such as indexed loads.  We then need to emit nops to ensure that simple
+;; alu instructions that are not supposed to be scheduled to LS1 don't
+;; accidentally end up there because LS1 is free when they are issued.  This
+;; will be a lot of work, and it isn't clear how useful it will be.
+
+;; Empirical testing shows that 2 gives the best result.
+
+(define_insn_reservation "ir_sb1_simple_alu" 2
+  (and (eq_attr "cpu" "sb1")
+       (eq_attr "type" "const,arith,logical,move,signext"))
+  "sb1_ls1 | sb1_ex1 | sb1_ex0")
+
+;; On SB-1A, simple alu instructions can not execute on the LS1 unit, and we
+;; have none of the above problems.
+
+(define_insn_reservation "ir_sb1a_simple_alu" 1
+  (and (eq_attr "cpu" "sb1a")
+       (eq_attr "type" "const,arith,logical,move,signext"))
+  "sb1_ex1 | sb1_ex0")
+
+;; ??? condmove also includes some FP instructions that execute on the FP
+;; units.  This needs to be clarified.
+
+(define_insn_reservation "ir_sb1_alu" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "condmove,nop,shift"))
+  "sb1_ex1 | sb1_ex0")
+
+;; These are type arith/darith that only execute on the EX0 unit.
+
+(define_insn_reservation "ir_sb1_alu_0" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "slt,clz,trap"))
+  "sb1_ex0")
+
+;; An alu insn issued on an EX unit has a latency of 5 cycles when the
+;; result goes to a LS unit (excluding store data).
+
+;; This assumes that if a load is dependent on a previous insn, then it must
+;; be an address dependence.
+
+(define_bypass 5
+  "ir_sb1a_simple_alu,ir_sb1_alu,ir_sb1_alu_0,ir_sb1_mfhi,ir_sb1_mflo"
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
+
+(define_bypass 5
+  "ir_sb1a_simple_alu,ir_sb1_alu,ir_sb1_alu_0,ir_sb1_mfhi,ir_sb1_mflo"
+  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
+  "mips_store_data_bypass_p")
+
+;; mf{hi,lo} is 1 cycle.  
+
+(define_insn_reservation "ir_sb1_mfhi" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mfhi"))
+  "sb1_ex1")
+
+(define_insn_reservation "ir_sb1_mflo" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mflo"))
+  "sb1_ex1")
+
+;; mt{hi,lo} to mul/div is 4 cycles.
+
+(define_insn_reservation "ir_sb1_mthilo" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mthi,mtlo"))
+  "sb1_ex1")
+
+;; mt{hi,lo} to mf{hi,lo} is 3 cycles.
+
+(define_bypass 3 "ir_sb1_mthilo" "ir_sb1_mfhi,ir_sb1_mflo")
+
+;; multiply latency to an EX operation is 3 cycles.
+
+;; ??? Should check whether we need to make multiply conflict with moves
+;; to/from hilo registers.
+
+(define_insn_reservation "ir_sb1_mulsi" 3
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "sb1_ex1+sb1_mul")
+
+;; muldi to mfhi is 4 cycles.
+;; Blocks any other multiply insn issue for 1 cycle.
+
+(define_insn_reservation "ir_sb1_muldi" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "DI")))
+  "sb1_ex1+sb1_mul, sb1_mul")
+
+;; muldi to mflo is 3 cycles.
+
+(define_bypass 3 "ir_sb1_muldi" "ir_sb1_mflo")
+
+;;  mul latency is 7 cycles if the result is used by any LS insn.
+
+;; This assumes that if a load is dependent on a previous insn, then it must
+;; be an address dependence.
+
+(define_bypass 7
+  "ir_sb1_mulsi,ir_sb1_muldi"
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
+
+(define_bypass 7
+  "ir_sb1_mulsi,ir_sb1_muldi"
+  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
+  "mips_store_data_bypass_p")
+
+;; The divide unit is not pipelined.  Divide busy is asserted in the 4th
+;; cycle, and then deasserted on the latency cycle.  So only one divide at
+;; a time, but the first/last 4 cycles can overlap.
+
+;; ??? All divides block writes to hi/lo regs.  hi/lo regs are written 4 cycles
+;; after the latency cycle for divides (e.g. 40/72).  dmult writes lo in
+;; cycle 7, and hi in cycle 8.  All other insns write hi/lo regs in cycle 7.
+;; Default for output dependencies is the difference in latencies, which is
+;; only 1 cycle off here, e.g. div to mtlo stalls for 32 cycles, but should
+;; stall for 33 cycles.  This does not seem significant enough to worry about.
+
+(define_insn_reservation "ir_sb1_divsi" 36
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "sb1_ex1, nothing*3, sb1_div*32")
+
+(define_insn_reservation "ir_sb1_divdi" 68
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "sb1_ex1, nothing*3, sb1_div*64")
+
+(define_insn_reservation "ir_sb1_fpu_2pipes" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmove,fadd,fmul,fabs,fneg,fcvt,frdiv1,frsqrt1")
+	    (eq_attr "sb1_fp_pipes" "two")))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_fpu_1pipe" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmove,fadd,fmul,fabs,fneg,fcvt,frdiv1,frsqrt1")
+	    (eq_attr "sb1_fp_pipes" "one")))
+  "sb1_fp1")
+
+(define_insn_reservation "ir_sb1_fpu_step2_2pipes" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv2,frsqrt2")
+	    (eq_attr "sb1_fp_pipes" "two")))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_fpu_step2_1pipe" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv2,frsqrt2")
+	    (eq_attr "sb1_fp_pipes" "one")))
+  "sb1_fp1")
+
+;; ??? madd/msub 4-cycle latency to itself (same fr?), but 8 cycle latency
+;; otherwise.
+
+;; ??? Blocks issue of another non-madd/msub after 4 cycles.
+
+(define_insn_reservation "ir_sb1_fmadd_2pipes" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmadd")
+	    (eq_attr "sb1_fp_pipes" "two")))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_fmadd_1pipe" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmadd")
+	    (eq_attr "sb1_fp_pipes" "one")))
+  "sb1_fp1")
+
+(define_insn_reservation "ir_sb1_fcmp" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "fcmp"))
+  "sb1_fp1")
+
+;; mtc1 latency 5 cycles.
+
+(define_insn_reservation "ir_sb1_mtxfer" 5
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mtc"))
+  "sb1_fp0")
+
+;; mfc1 latency 1 cycle.  
+
+(define_insn_reservation "ir_sb1_mfxfer" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mfc"))
+  "sb1_fp0")
+
+;; ??? Can deliver at most 1 result per every 6 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_divsf_2pipes" 24
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_divsf_1pipe" 24
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 8 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_divdf_2pipes" 32
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_divdf_1pipe" 32
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 3 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_recipsf_2pipes" 12
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_recipsf_1pipe" 12
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 5 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_recipdf_2pipes" 20
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_recipdf_1pipe" 20
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 7 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_sqrtsf_2pipes" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_sqrtsf_1pipe" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 10 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_sqrtdf_2pipes" 40
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_sqrtdf_1pipe" 40
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 4 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_rsqrtsf_2pipes" 16
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_rsqrtsf_1pipe" 16
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 7 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_rsqrtdf_2pipes" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_rsqrtdf_1pipe" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
diff --git a/gcc-4.9/gcc/config/mips/sde.h b/gcc-4.9/gcc/config/mips/sde.h
new file mode 100644
index 000000000..c138949c2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/sde.h
@@ -0,0 +1,103 @@
+/* Definitions of target machine for GNU compiler.
+   MIPS SDE version.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS						\
+  /* Make sure a -mips option is present.  This helps us to pick	\
+     the right multilib, and also makes the later specs easier		\
+     to write.  */							\
+  MIPS_ISA_LEVEL_SPEC,							\
+									\
+  /* Infer the default float setting from -march.  */			\
+  MIPS_ARCH_FLOAT_SPEC,							\
+									\
+  /* If no ABI option is specified, infer one from the ISA level	\
+     or -mgp setting.  */						\
+  "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=n32}}",	\
+									\
+  /* Remove a redundant -mfp64 for -mabi=n32; we want the !mfp64	\
+     multilibs.  There's no need to check whether the architecture	\
+     is 64-bit; cc1 will complain if it isn't.  */			\
+  "%{mabi=n32: %<mfp64}",						\
+									\
+  /* Make sure that an endian option is always present.  This makes	\
+     things like LINK_SPEC easier to write.  */				\
+  "%{!EB:%{!EL:%(endian_spec)}}",					\
+									\
+  /* Configuration-independent MIPS rules.  */				\
+  BASE_DRIVER_SELF_SPECS				
+
+/* Use trap rather than break for all but MIPS I ISA.  Force -no-mips16,
+   so that MIPS16 assembler code requires an explicit ".set mips16".
+   Very little hand-written MIPS16 assembler exists, and some build
+   systems expect code to be assembled as non-MIPS16 even if the
+   prevailing compiler flags select -mips16.  */
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "\
+%{!mips1:--trap} \
+%{mips16:-no-mips16}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32*} %{mips64*} \
+%{shared} \
+%{mabi=n32:-melf32%{EB:b}%{EL:l}tsmipn32} \
+%{mabi=64:-melf64%{EB:b}%{EL:l}tsmip} \
+%{mabi=32:-melf32%{EB:b}%{EL:l}tsmip}"
+
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 0
+
+/* Describe how we implement __builtin_eh_return.  */
+
+/* At the moment, nothing appears to use more than 2 EH data registers.
+   The chosen registers must not clash with the return register ($2),
+   EH_RETURN_STACKADJ ($3), or MIPS_EPILOGUE_TEMP ($5), and they must
+   be general MIPS16 registers.  Pick $6 and $7.  */
+#undef EH_RETURN_DATA_REGNO
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 2 ? 7 - (N) : INVALID_REGNUM)
+
+/* Use $5 as a temporary for both MIPS16 and non-MIPS16.  */
+#undef MIPS_EPILOGUE_TEMP_REGNUM
+#define MIPS_EPILOGUE_TEMP_REGNUM \
+  (cfun->machine->interrupt_handler_p ? K0_REG_NUM : GP_REG_FIRST + 5)
+
+/* Using long will always be right for size_t and ptrdiff_t, since
+   sizeof(long) must equal sizeof(void *), following from the setting
+   of the -mlong64 option.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Force all .init and .fini entries to be 32-bit, not mips16, so that
+   in a mixed environment they are all the same mode. The crti.asm and
+   crtn.asm files will also be compiled as 32-bit due to the
+   -no-mips16 flag in SUBTARGET_ASM_SPEC above. */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+   asm (SECTION_OP "\n\
+	.set push\n\
+	.set nomips16\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	.set pop\n\
+	" TEXT_SECTION_ASM_OP);
diff --git a/gcc-4.9/gcc/config/mips/sde.opt b/gcc-4.9/gcc/config/mips/sde.opt
new file mode 100644
index 000000000..4cf43b6cc
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/sde.opt
@@ -0,0 +1,28 @@
+; MIPS SDE options.
+;
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; -mcode-xonly is a traditional alias for -mcode-readable=pcrel and
+; -mno-data-in-code is a traditional alias for -mcode-readable=no.
+
+mno-data-in-code
+Target RejectNegative Alias(mcode-readable=, no)
+
+mcode-xonly
+Target RejectNegative Alias(mcode-readable=, pcrel)
diff --git a/gcc-4.9/gcc/config/mips/sdemtk.h b/gcc-4.9/gcc/config/mips/sdemtk.h
new file mode 100644
index 000000000..15dbf1653
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/sdemtk.h
@@ -0,0 +1,102 @@
+/* Definitions of target machine for GNU compiler.
+   MIPS SDE version, for use with the SDE C library rather than newlib.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_assert ("system=sde");			\
+      builtin_assert ("system=posix");			\
+      builtin_define ("__SDE_MIPS__");			\
+							\
+      /* Deprecated: use __mips_isa_rev >= 2.  */	\
+      if (ISA_MIPS32R2)					\
+        builtin_define ("__mipsr2");			\
+							\
+      /* Deprecated: use __mips_fpr == 64.  */		\
+      if (TARGET_FLOAT64)				\
+	builtin_define ("__mipsfp64");			\
+							\
+      if (TARGET_NO_FLOAT) 				\
+	builtin_define ("__NO_FLOAT");			\
+      else if (TARGET_SOFT_FLOAT_ABI)			\
+	builtin_define ("__SOFT_FLOAT");		\
+      else if (TARGET_SINGLE_FLOAT)			\
+	builtin_define ("__SINGLE_FLOAT");		\
+							\
+      if (TARGET_BIG_ENDIAN)				\
+        {						\
+	  builtin_assert ("endian=big");		\
+	  builtin_assert ("cpu=mipseb");		\
+	}						\
+      else						\
+        {						\
+	  builtin_assert ("endian=little");		\
+	  builtin_assert ("cpu=mipsel");		\
+	}						\
+    }							\
+  while (0)
+
+/* For __clear_cache in libgcc2.c.  */
+#ifdef IN_LIBGCC2
+extern void mips_sync_icache (void *beg, unsigned long len);
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(beg, end) \
+  mips_sync_icache (beg, end - beg)
+#endif
+
+/* For mips_cache_flush_func in mips.opt.  */
+#undef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "mips_sync_icache"
+
+/* For inline code which needs to sync the icache and dcache,
+   noting that the SDE library takes arguments (address, size).  */
+#undef MIPS_ICACHE_SYNC
+#define MIPS_ICACHE_SYNC(ADDR, SIZE)					\
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),	\
+		     LCT_NORMAL, VOIDmode, 2, ADDR, Pmode,		\
+		     SIZE, TYPE_MODE (sizetype))
+
+/* This version of _mcount does not pop 2 words from the stack.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+  {									\
+    mips_push_asm_switch (&mips_noat);					\
+    /* _mcount treats $2 as the static chain register.  */		\
+    if (cfun->static_chain_decl != NULL)				\
+      fprintf (FILE, "\tmove\t%s,%s\n", reg_names[2],			\
+	       reg_names[STATIC_CHAIN_REGNUM]);				\
+    /* MIPS16 code passes saved $ra in $v1 instead of $at.  */		\
+    fprintf (FILE, "\tmove\t%s,%s\n",					\
+	     reg_names[GP_REG_FIRST + (TARGET_MIPS16 ? 3 : 1)],		\
+	     reg_names[RETURN_ADDR_REGNUM]);				\
+    fprintf (FILE, "\tjal\t_mcount\n");					\
+    mips_pop_asm_switch (&mips_noat);					\
+    /* _mcount treats $2 as the static chain register.  */		\
+    if (cfun->static_chain_decl != NULL)				\
+      fprintf (FILE, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],	\
+	       reg_names[2]);						\
+  }
+
+/* ...nor does the call sequence preserve $31.  */
+#undef MIPS_SAVE_REG_FOR_PROFILING_P
+#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) ((REGNO) == RETURN_ADDR_REGNUM)
+
+/* Compile in support for the -mno-float option.  */
+#define TARGET_SUPPORTS_NO_FLOAT 1
diff --git a/gcc-4.9/gcc/config/mips/sr71k.md b/gcc-4.9/gcc/config/mips/sr71k.md
new file mode 100644
index 000000000..e66927655
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/sr71k.md
@@ -0,0 +1,337 @@
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; .........................
+;;
+;; DFA-based pipeline description for Sandcraft SR3 (MIPS64 based)
+;;
+;; The SR3 is described as:
+;;     - nine-stage pipeline, insn buffering with out-of-order issue to
+;;       multiple function units, with an average dispatch rate of 2
+;;       insn.s per cycle (max 6 insns: 2 fpu, 4 cpu).
+;;
+;;  The details on this are scant except for a diagram in
+;;  Chap. 6 of Rev. 1.0 SR3 Spec.
+;;
+;;  The model employed below is designed to closely approximate the
+;;  published latencies. Emulation of out-of-order issue and the insn
+;;  buffering is done via a VLIW dispatch style (with a packing of 6 insns);
+;;  the function unit reservations restrictions (define_*_set) are
+;;  contrived to support published timings.
+;;
+;; Reference:
+;;   "SR3 Microprocessor Specification, System development information,"
+;;   Revision 1.0, 13 December 2000.
+;;
+;;
+;; Reservation model is based on:
+;;   1) Figure 6-1, from the 1.0 specification.
+;;   2) Chapter 19, from the 1.0 specification.
+;;   3) following questions(Red Hat)/answers(Sandcraft):
+;;     RH> From Section 19.1
+;;     RH>      1) In terms of figure 6-1, are all the instructions in
+;;     RH>         table 19-1 restricted
+;;     RH>         to ALUx? When ALUx is not in use for an instruction in table;;     RH>          19-1 is
+;;     RH>         it fully compatible with all insns that issue to ALUy?
+;;
+;;     Yes, all the instructions in Table 19-1 only go to ALUX, and all the
+;;     instructions that can be issued to ALUY can also be issued to ALUX.
+;;
+;;
+;;     RH> From Section 19.2
+;;     RH>      2) Explain conditional moves execution path (in terms of
+;;     RH>      figure 6-1)
+;;
+;;     Conditional move of integer registers (based on floating point condition
+;;     codes or integer register value) go to ALUX or ALUY.
+;;
+;;     RH>      3) Explain floating point store execution path (in terms of
+;;     RH>      figure 6-1)
+;;
+;;     Floating point stores go to Ld/St and go to MOV in the floating point
+;;     pipeline.
+;;
+;;     Floating point loads go to Ld/St and go to LOAD in the floating point
+;;     pipeline.
+;;
+;;     RH>      4) Explain branch on floating condition (in terms of figure 6-1);;
+;;     Branch on floating condition go to BRU.
+;;
+;;     RH>      5) Is the column for single RECIP instruction latency correct?
+;;     RH>      What about for RSQRT single and double?
+;;
+;;     The latency/repeat for RECIP and RSQRT are correct.
+;;
+
+;;
+;; Use four automata to isolate long latency operations, and to
+;; reduce the complexity of cpu+fpu, reducing space.
+;;
+(define_automaton "sr71_cpu, sr71_cpu1, sr71_cp1, sr71_cp2, sr71_fextra, sr71_imacc")
+
+;;  feeders for CPU function units and feeders for fpu (CP1 interface)
+(define_cpu_unit "sr_iss0,sr_iss1,sr_iss2,sr_iss3,sr_iss4,sr_iss5" "sr71_cpu")
+
+;; CPU function units
+(define_cpu_unit "ipu_bru"       "sr71_cpu1")
+(define_cpu_unit "ipu_alux"      "sr71_cpu1")
+(define_cpu_unit "ipu_aluy"      "sr71_cpu1")
+(define_cpu_unit "ipu_ldst"      "sr71_cpu1")
+(define_cpu_unit "ipu_macc_iter" "sr71_imacc")
+
+
+;; Floating-point unit (Co-processor interface 1).
+(define_cpu_unit "fpu_mov"          "sr71_cp1")
+(define_cpu_unit "fpu_load"         "sr71_cp1")
+(define_cpu_unit "fpu_fpu"          "sr71_cp2")
+
+;; fictitous unit to track long float insns with separate automaton
+(define_cpu_unit "fpu_iter"         "sr71_fextra")
+
+
+;;
+;; Define common execution path (reservation) combinations
+;;
+
+;;
+(define_reservation "cpu_iss"         "sr_iss0|sr_iss1|sr_iss2|sr_iss3")
+
+;; two cycles are used for instruction using the fpu as it runs
+;; at half the clock speed of the cpu. By adding an extra cycle
+;; to the issue units, the default/minimum "repeat" dispatch delay is
+;; accounted for all insn.s
+(define_reservation "cp1_iss"         "(sr_iss4*2)|(sr_iss5*2)")
+
+(define_reservation "serial_dispatch" "sr_iss0+sr_iss1+sr_iss2+sr_iss3+sr_iss4+sr_iss5")
+
+;; Simulate a 6 insn VLIW dispatch, 1 cycle in dispatch followed by
+;; reservation of function unit.
+(define_reservation "ri_insns"         "cpu_iss,(ipu_alux|ipu_aluy)")
+(define_reservation "ri_mem"           "cpu_iss,ipu_ldst")
+(define_reservation "ri_alux"          "cpu_iss,ipu_alux")
+(define_reservation "ri_branch"        "cpu_iss,ipu_bru")
+
+(define_reservation "rf_insn"          "cp1_iss,fpu_fpu")
+(define_reservation "rf_ldmem"         "cp1_iss,fpu_load")
+
+; simultaneous reservation of pseudo-unit keeps cp1 fpu tied
+; up until long cycle insn is finished...
+(define_reservation "rf_multi1"        "rf_insn+fpu_iter")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+
+(define_insn_reservation "ir_sr70_unknown" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "unknown,atomic,syncloop"))
+  "serial_dispatch")
+
+
+;; Assume prediction fails.
+(define_insn_reservation "ir_sr70_branch" 6
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "branch,jump,call"))
+  "ri_branch")
+
+(define_insn_reservation "ir_sr70_load" 2
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "load"))
+  "ri_mem")
+
+(define_insn_reservation "ir_sr70_store" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "store"))
+  "ri_mem")
+
+
+;;
+;; float loads/stores flow through both cpu and cp1...
+;;
+(define_insn_reservation "ir_sr70_fload" 9
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fpload,fpidxload"))
+  "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)")
+
+(define_insn_reservation "ir_sr70_fstore" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)")
+
+
+;; This reservation is for conditional move based on integer
+;; or floating point CC.
+(define_insn_reservation "ir_sr70_condmove" 4
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "condmove"))
+  "ri_insns")
+
+;; Try to discriminate move-from-cp1 versus move-to-cp1 as latencies
+;; are different. Like float load/store, these insns use multiple
+;; resources simultaneously
+(define_insn_reservation "ir_sr70_xfer_from" 6
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "mfc"))
+  "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)")
+
+(define_insn_reservation "ir_sr70_xfer_to" 9
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "mtc"))
+  "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)")
+
+(define_insn_reservation "ir_sr70_hilo" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "mthi,mtlo,mfhi,mflo"))
+  "ri_insns")
+
+(define_insn_reservation "ir_sr70_arith" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,trap"))
+  "ri_insns")
+
+;; emulate repeat (dispatch stall) by spending extra cycle(s) in
+;; in iter unit
+(define_insn_reservation "ir_sr70_imul_si" 4
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "ri_alux,ipu_alux,ipu_macc_iter")
+
+(define_insn_reservation "ir_sr70_imul_di" 6
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "ri_alux,ipu_alux,(ipu_macc_iter*3)")
+
+;; Divide algorithm is early out with best latency of 7 pcycles.
+;; Use worst case for scheduling purposes.
+(define_insn_reservation "ir_sr70_idiv_si" 41
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "ri_alux,ipu_alux,(ipu_macc_iter*38)")
+
+(define_insn_reservation "ir_sr70_idiv_di" 73
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "ri_alux,ipu_alux,(ipu_macc_iter*70)")
+
+;; extra reservations of fpu_fpu are for repeat latency
+(define_insn_reservation "ir_sr70_fadd_sf" 8
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fadd")
+	    (eq_attr "mode" "SF")))
+  "rf_insn,fpu_fpu")
+
+(define_insn_reservation "ir_sr70_fadd_df" 10
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fadd")
+	    (eq_attr "mode" "DF")))
+  "rf_insn,fpu_fpu")
+
+;; Latencies for MADD,MSUB, NMADD, NMSUB assume the Multiply is fused
+;; with the sub or add.
+(define_insn_reservation "ir_sr70_fmul_sf" 8
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "rf_insn,fpu_fpu")
+
+;; tie up the fpu unit to emulate the balance for the "repeat
+;; rate" of 8 (2 are spent in the iss unit)
+(define_insn_reservation "ir_sr70_fmul_df" 16
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "rf_insn,fpu_fpu*6")
+
+
+;; RECIP insn uses same type attr as div, and for SR3, has same
+;; timings for double. However, single RECIP has a latency of
+;; 28 -- only way to fix this is to introduce new insn attrs.
+;; cycles spent in iter unit are designed to satisfy balance
+;; of "repeat" latency after insn uses up rf_multi1 reservation
+(define_insn_reservation "ir_sr70_fdiv_sf" 60
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "rf_multi1+(fpu_iter*51)")
+
+(define_insn_reservation "ir_sr70_fdiv_df" 120
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "rf_multi1+(fpu_iter*109)")
+
+(define_insn_reservation "ir_sr70_fabs" 4
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "rf_insn,fpu_fpu")
+
+(define_insn_reservation "ir_sr70_fcmp" 10
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fcmp"))
+  "rf_insn,fpu_fpu")
+
+;; "fcvt" type attribute covers a number of diff insns, most have the same
+;; latency descriptions, a few vary. We use the
+;; most common timing (which is also worst case).
+(define_insn_reservation "ir_sr70_fcvt" 12
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fcvt"))
+  "rf_insn,fpu_fpu*4")
+
+(define_insn_reservation "ir_sr70_fsqrt_sf" 62
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "mode" "SF")))
+  "rf_multi1+(fpu_iter*53)")
+
+(define_insn_reservation "ir_sr70_fsqrt_df" 122
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "mode" "DF")))
+  "rf_multi1+(fpu_iter*111)")
+
+(define_insn_reservation "ir_sr70_frsqrt_sf" 48
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "rf_multi1+(fpu_iter*39)")
+
+(define_insn_reservation "ir_sr70_frsqrt_df" 240
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "rf_multi1+(fpu_iter*229)")
+
+(define_insn_reservation "ir_sr70_multi" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "multi"))
+  "serial_dispatch")
+
+(define_insn_reservation "ir_sr70_nop" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "nop"))
+  "ri_insns")
diff --git a/gcc-4.9/gcc/config/mips/st.h b/gcc-4.9/gcc/config/mips/st.h
new file mode 100644
index 000000000..6e99074d0
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/st.h
@@ -0,0 +1,30 @@
+/* ST 2e / 2f GNU/Linux Configuration.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The various C libraries each have their own subdirectory.  */
+#undef SYSROOT_SUFFIX_SPEC
+#define SYSROOT_SUFFIX_SPEC			\
+  "%{march=loongson2e:/2e ;			\
+     march=loongson2f:/2f}"
+
+#undef STARTFILE_PREFIX_SPEC
+#define STARTFILE_PREFIX_SPEC				\
+  "%{mabi=32: /usr/local/lib/ /lib/ /usr/lib/}		\
+   %{mabi=n32: /usr/local/lib32/ /lib32/ /usr/lib32/}	\
+   %{mabi=64: /usr/local/lib64/ /lib64/ /usr/lib64/}"
diff --git a/gcc-4.9/gcc/config/mips/sync.md b/gcc-4.9/gcc/config/mips/sync.md
new file mode 100644
index 000000000..cf6c05be2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/sync.md
@@ -0,0 +1,716 @@
+;;  Machine Description for MIPS based processor synchronization
+;;  instructions.
+;;  Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_COMPARE_AND_SWAP
+  UNSPEC_COMPARE_AND_SWAP_12
+  UNSPEC_SYNC_OLD_OP
+  UNSPEC_SYNC_NEW_OP
+  UNSPEC_SYNC_NEW_OP_12
+  UNSPEC_SYNC_OLD_OP_12
+  UNSPEC_SYNC_EXCHANGE
+  UNSPEC_SYNC_EXCHANGE_12
+  UNSPEC_MEMORY_BARRIER
+  UNSPEC_ATOMIC_COMPARE_AND_SWAP
+  UNSPEC_ATOMIC_EXCHANGE
+  UNSPEC_ATOMIC_FETCH_OP
+])
+
+;; Atomic fetch bitwise operations.
+(define_code_iterator fetchop_bit [ior xor and])
+
+;; Atomic HI and QI operations
+(define_code_iterator atomic_hiqi_op [plus minus ior xor and])
+
+;; Atomic memory operations.
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "GENERATE_SYNC"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "GENERATE_SYNC"
+  { return mips_output_sync (); })
+
+;; Can be removed in favor of atomic_compare_and_swap below.
+(define_insn "sync_compare_and_swap<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "dJ,dJ")
+			      (match_operand:GPR 3 "arith_operand" "I,d")]
+	 UNSPEC_COMPARE_AND_SWAP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "li,move")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_required_oldval" "2")
+   (set_attr "sync_insn1_op2" "3")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(match_operand:SHORT 0 "register_operand")
+   (match_operand:SHORT 1 "memory_operand")
+   (match_operand:SHORT 2 "general_operand")
+   (match_operand:SHORT 3 "general_operand")]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_6 = gen_compare_and_swap_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+;; Helper insn for mips_expand_atomic_qihi.
+(define_insn "compare_and_swap_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d,&d")
+	(match_operand:SI 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "d,d")
+			     (match_operand:SI 3 "register_operand" "d,d")
+			     (match_operand:SI 4 "reg_or_0_operand" "dJ,dJ")
+			     (match_operand:SI 5 "reg_or_0_operand" "d,J")]
+			    UNSPEC_COMPARE_AND_SWAP_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_required_oldval" "4")
+   (set_attr "sync_insn1_op2" "5")])
+
+(define_insn "sync_add<mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+ZR,ZR")
+	(unspec_volatile:GPR
+          [(plus:GPR (match_dup 0)
+		     (match_operand:GPR 1 "arith_operand" "I,d"))]
+	  UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_expand "sync_<optab><mode>"
+  [(set (match_operand:SHORT 0 "memory_operand")
+	(unspec_volatile:SHORT
+	  [(atomic_hiqi_op:SHORT (match_dup 0)
+				 (match_operand:SHORT 1 "general_operand"))]
+	  UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_4 = gen_sync_<optab>_12;
+  mips_expand_atomic_qihi (generator,
+			   NULL, operands[0], operands[1], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_<optab><mode>
+(define_insn "sync_<optab>_12"
+  [(set (match_operand:SI 0 "memory_operand" "+ZR")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "register_operand" "d")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (atomic_hiqi_op:SI (match_dup 0)
+			      (match_operand:SI 3 "reg_or_0_operand" "dJ"))]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 4 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<insn>")
+   (set_attr "sync_insn2" "and")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_inclusive_mask" "1")
+   (set_attr "sync_exclusive_mask" "2")
+   (set_attr "sync_insn1_op2" "3")
+   (set_attr "sync_oldval" "4")
+   (set_attr "sync_newval" "4")])
+
+(define_expand "sync_old_<optab><mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (match_operand:SHORT 1 "memory_operand"))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(atomic_hiqi_op:SHORT
+				    (match_dup 1)
+				    (match_operand:SHORT 2 "general_operand"))]
+	    UNSPEC_SYNC_OLD_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_old_<optab>_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_old_<optab><mode>
+(define_insn "sync_old_<optab>_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(match_operand:SI 1 "memory_operand" "+ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+          [(match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (atomic_hiqi_op:SI (match_dup 0)
+			      (match_operand:SI 4 "reg_or_0_operand" "dJ"))]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 5 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<insn>")
+   (set_attr "sync_insn2" "and")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")
+   (set_attr "sync_newval" "5")])
+
+(define_expand "sync_new_<optab><mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (unspec_volatile:SHORT [(atomic_hiqi_op:SHORT
+				    (match_operand:SHORT 1 "memory_operand")
+				    (match_operand:SHORT 2 "general_operand"))]
+	    UNSPEC_SYNC_NEW_OP))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
+	    UNSPEC_SYNC_NEW_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_new_<optab>_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_new_<optab><mode>
+(define_insn "sync_new_<optab>_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "memory_operand" "+ZR")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (atomic_hiqi_op:SI (match_dup 0)
+			      (match_operand:SI 4 "reg_or_0_operand" "dJ"))]
+	  UNSPEC_SYNC_NEW_OP_12))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)] UNSPEC_SYNC_NEW_OP_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<insn>")
+   (set_attr "sync_insn2" "and")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")])
+
+(define_expand "sync_nand<mode>"
+  [(set (match_operand:SHORT 0 "memory_operand")
+	(unspec_volatile:SHORT
+	  [(match_dup 0)
+	   (match_operand:SHORT 1 "general_operand")]
+	  UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_4 = gen_sync_nand_12;
+  mips_expand_atomic_qihi (generator,
+			   NULL, operands[0], operands[1], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_nand<mode>
+(define_insn "sync_nand_12"
+  [(set (match_operand:SI 0 "memory_operand" "+ZR")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "register_operand" "d")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (match_dup 0)
+	   (match_operand:SI 3 "reg_or_0_operand" "dJ")]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 4 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "and")
+   (set_attr "sync_insn2" "xor")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_inclusive_mask" "1")
+   (set_attr "sync_exclusive_mask" "2")
+   (set_attr "sync_insn1_op2" "3")
+   (set_attr "sync_oldval" "4")
+   (set_attr "sync_newval" "4")])
+
+(define_expand "sync_old_nand<mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (match_operand:SHORT 1 "memory_operand"))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(match_dup 1)
+				  (match_operand:SHORT 2 "general_operand")]
+	    UNSPEC_SYNC_OLD_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_old_nand_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_old_nand<mode>
+(define_insn "sync_old_nand_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(match_operand:SI 1 "memory_operand" "+ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+          [(match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (match_operand:SI 4 "reg_or_0_operand" "dJ")]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 5 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "and")
+   (set_attr "sync_insn2" "xor")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")
+   (set_attr "sync_newval" "5")])
+
+(define_expand "sync_new_nand<mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (unspec_volatile:SHORT [(match_operand:SHORT 1 "memory_operand")
+				  (match_operand:SHORT 2 "general_operand")]
+	    UNSPEC_SYNC_NEW_OP))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
+	    UNSPEC_SYNC_NEW_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_new_nand_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_new_nand<mode>
+(define_insn "sync_new_nand_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "memory_operand" "+ZR")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (match_operand:SI 4 "reg_or_0_operand" "dJ")]
+	  UNSPEC_SYNC_NEW_OP_12))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)] UNSPEC_SYNC_NEW_OP_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "and")
+   (set_attr "sync_insn2" "xor")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")])
+
+(define_insn "sync_sub<mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+ZR")
+	(unspec_volatile:GPR
+          [(minus:GPR (match_dup 0)
+		      (match_operand:GPR 1 "register_operand" "d"))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "subu")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+;; Can be removed in favor of atomic_fetch_add below.
+(define_insn "sync_old_add<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(plus:GPR (match_dup 1)
+		     (match_operand:GPR 2 "arith_operand" "I,d"))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_old_sub<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(minus:GPR (match_dup 1)
+		      (match_operand:GPR 2 "register_operand" "d"))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "subu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_add<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+        (plus:GPR (match_operand:GPR 1 "memory_operand" "+ZR,ZR")
+		  (match_operand:GPR 2 "arith_operand" "I,d")))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	  [(plus:GPR (match_dup 1) (match_dup 2))]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_sub<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+        (minus:GPR (match_operand:GPR 1 "memory_operand" "+ZR")
+		   (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	  [(minus:GPR (match_dup 1) (match_dup 2))]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "subu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_<optab><mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+ZR,ZR")
+	(unspec_volatile:GPR
+          [(fetchop_bit:GPR (match_operand:GPR 1 "uns_arith_operand" "K,d")
+			      (match_dup 0))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<immediate_insn>,<insn>")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_insn "sync_old_<optab><mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(fetchop_bit:GPR (match_operand:GPR 2 "uns_arith_operand" "K,d")
+			    (match_dup 1))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<immediate_insn>,<insn>")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_<optab><mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(fetchop_bit:GPR (match_operand:GPR 2 "uns_arith_operand" "K,d")
+			    (match_dup 1))]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<immediate_insn>,<insn>")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_nand<mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+ZR,ZR")
+	(unspec_volatile:GPR [(match_operand:GPR 1 "uns_arith_operand" "K,d")]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "andi,and")
+   (set_attr "sync_insn2" "not")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_insn "sync_old_nand<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+        (unspec_volatile:GPR [(match_operand:GPR 2 "uns_arith_operand" "K,d")]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "andi,and")
+   (set_attr "sync_insn2" "not")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_nand<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "uns_arith_operand" "K,d")]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "andi,and")
+   (set_attr "sync_insn2" "not")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_lock_test_and_set<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+ZR,ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "arith_operand" "I,d")]
+	 UNSPEC_SYNC_EXCHANGE))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_memmodel" "11")
+   (set_attr "sync_insn1" "li,move")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:SHORT 0 "register_operand")
+   (match_operand:SHORT 1 "memory_operand")
+   (match_operand:SHORT 2 "general_operand")]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_test_and_set_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+(define_insn "test_and_set_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(match_operand:SI 1 "memory_operand" "+ZR"))
+   (set (match_dup 1)
+	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "d")
+			     (match_operand:SI 3 "register_operand" "d")
+			     (match_operand:SI 4 "reg_or_0_operand" "dJ")]
+	  UNSPEC_SYNC_EXCHANGE_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_memmodel" "11")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   ;; Unused, but needed to give the number of operands expected by
+   ;; the expander.
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")])
+
+(define_insn "atomic_compare_and_swap<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	;; Logically this unspec is an "eq" operator, but we need to obscure
+	;; reads and writes from/to memory with an unspec to prevent
+	;; optimizations on shared memory locations.  Otherwise, comparison in
+	;; { mem = 2; if (atomic_cmp_swap(mem,...) == 2) ...; }
+	;; would be optimized away.  In addition to that we need to use
+	;; unspec_volatile, not just plain unspec -- for the sake of other
+	;; threads -- to make sure we don't remove the entirety of the pattern
+	;; just because current thread doesn't observe any effect from it.
+	;; TODO: the obscuring unspec can be relaxed for permissive memory
+	;; models.
+	;; Same applies to other atomic_* patterns.
+	(unspec_volatile:GPR [(match_operand:GPR 2 "memory_operand" "+ZR,ZR")
+			      (match_operand:GPR 3 "reg_or_0_operand" "dJ,dJ")]
+	 UNSPEC_ATOMIC_COMPARE_AND_SWAP))
+   (set (match_operand:GPR 1 "register_operand" "=&d,&d")
+	(unspec_volatile:GPR [(match_dup 2)]
+	 UNSPEC_ATOMIC_COMPARE_AND_SWAP))
+   (set (match_dup 2)
+	(unspec_volatile:GPR [(match_dup 2)
+			      (match_dup 3)
+			      (match_operand:GPR 4 "arith_operand" "I,d")]
+	 UNSPEC_ATOMIC_COMPARE_AND_SWAP))
+   (unspec_volatile:GPR [(match_operand:SI 5 "const_int_operand")
+			 (match_operand:SI 6 "const_int_operand")
+			 (match_operand:SI 7 "const_int_operand")]
+    UNSPEC_ATOMIC_COMPARE_AND_SWAP)]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "li,move")
+   (set_attr "sync_oldval" "1")
+   (set_attr "sync_cmp" "0")
+   (set_attr "sync_mem" "2")
+   (set_attr "sync_required_oldval" "3")
+   (set_attr "sync_insn1_op2" "4")
+   (set_attr "sync_memmodel" "6")])
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "memory_operand")
+   (match_operand:GPR 2 "arith_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "GENERATE_LL_SC || ISA_HAS_SWAP"
+{
+  if (ISA_HAS_SWAP)
+    {
+      if (!mem_noofs_operand (operands[1], <MODE>mode))
+        {
+	  rtx addr;
+
+	  addr = force_reg (Pmode, XEXP (operands[1], 0));
+	  operands[1] = replace_equiv_address (operands[1], addr);
+	}
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_atomic_exchange<mode>_swap (operands[0], operands[1],
+						 operands[2]));
+    }
+  else
+    emit_insn (gen_atomic_exchange<mode>_llsc (operands[0], operands[1],
+					       operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "atomic_exchange<mode>_llsc"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(unspec_volatile:GPR [(match_operand:GPR 1 "memory_operand" "+ZR,ZR")]
+	 UNSPEC_ATOMIC_EXCHANGE))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "arith_operand" "I,d")]
+	 UNSPEC_ATOMIC_EXCHANGE))
+   (unspec_volatile:GPR [(match_operand:SI 3 "const_int_operand")]
+    UNSPEC_ATOMIC_EXCHANGE)]
+  "GENERATE_LL_SC && !ISA_HAS_SWAP"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "li,move")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")
+   (set_attr "sync_memmodel" "3")])
+
+;; XLP issues implicit sync for SWAP/LDADD, so no need for an explicit one.
+(define_insn "atomic_exchange<mode>_swap"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec_volatile:GPR [(match_operand:GPR 1 "mem_noofs_operand" "+ZR")]
+	 UNSPEC_ATOMIC_EXCHANGE))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "register_operand" "0")]
+	 UNSPEC_ATOMIC_EXCHANGE))]
+  "ISA_HAS_SWAP"
+  "swap<size>\t%0,%b1"
+  [(set_attr "type" "atomic")])
+
+(define_expand "atomic_fetch_add<mode>"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "memory_operand")
+   (match_operand:GPR 2 "arith_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "GENERATE_LL_SC || ISA_HAS_LDADD"
+{
+  if (ISA_HAS_LDADD)
+    {
+      if (!mem_noofs_operand (operands[1], <MODE>mode))
+        {
+	  rtx addr;
+
+	  addr = force_reg (Pmode, XEXP (operands[1], 0));
+	  operands[1] = replace_equiv_address (operands[1], addr);
+	}
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_atomic_fetch_add<mode>_ldadd (operands[0], operands[1],
+						   operands[2]));
+    }
+  else
+    emit_insn (gen_atomic_fetch_add<mode>_llsc (operands[0], operands[1],
+						operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "atomic_fetch_add<mode>_llsc"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(unspec_volatile:GPR [(match_operand:GPR 1 "memory_operand" "+ZR,ZR")]
+	 UNSPEC_ATOMIC_FETCH_OP))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	 [(plus:GPR (match_dup 1)
+		    (match_operand:GPR 2 "arith_operand" "I,d"))]
+	 UNSPEC_ATOMIC_FETCH_OP))
+   (unspec_volatile:GPR [(match_operand:SI 3 "const_int_operand")]
+    UNSPEC_ATOMIC_FETCH_OP)]
+  "GENERATE_LL_SC && !ISA_HAS_LDADD"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")
+   (set_attr "sync_memmodel" "3")])
+
+;; XLP issues implicit sync for SWAP/LDADD, so no need for an explicit one.
+(define_insn "atomic_fetch_add<mode>_ldadd"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec_volatile:GPR [(match_operand:GPR 1 "mem_noofs_operand" "+ZR")]
+	 UNSPEC_ATOMIC_FETCH_OP))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	 [(plus:GPR (match_dup 1)
+		    (match_operand:GPR 2 "register_operand" "0"))]
+	 UNSPEC_ATOMIC_FETCH_OP))]
+  "ISA_HAS_LDADD"
+  "ldadd<size>\t%0,%b1"
+  [(set_attr "type" "atomic")])
diff --git a/gcc-4.9/gcc/config/mips/t-elf b/gcc-4.9/gcc/config/mips/t-elf
new file mode 100644
index 000000000..4e44a9f93
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-elf
@@ -0,0 +1,23 @@
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build the libraries for both hard and soft floating point
+
+MULTILIB_OPTIONS = msoft-float EL/EB
+MULTILIB_DIRNAMES = soft-float el eb
+MULTILIB_MATCHES = EL=mel EB=meb msingle-float=m4650
diff --git a/gcc-4.9/gcc/config/mips/t-irix6 b/gcc-4.9/gcc/config/mips/t-irix6
new file mode 100644
index 000000000..6c76a4ad9
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-irix6
@@ -0,0 +1,4 @@
+MULTILIB_OPTIONS=mabi=n32/mabi=64
+MULTILIB_DIRNAMES=n32 64
+MULTILIB_MATCHES=
+MULTILIB_OSDIRNAMES=../lib32 ../lib64
diff --git a/gcc-4.9/gcc/config/mips/t-isa3264 b/gcc-4.9/gcc/config/mips/t-isa3264
new file mode 100644
index 000000000..a5e001ef7
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-isa3264
@@ -0,0 +1,33 @@
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build the libraries for both hard and soft floating point
+
+ifneq ($(filter MIPS_ABI_DEFAULT=ABI_EABI,$(tm_defines)),)
+MULTILIB_OPTIONS = msoft-float EL/EB mips32/mips32r2/mips64/mips64r2
+MULTILIB_DIRNAMES = soft-float el eb mips32 mips32r2 mips64 mips64r2
+else
+MULTILIB_OPTIONS = msoft-float/mfp64 EL/EB mips32/mips32r2/mips64/mips64r2
+MULTILIB_DIRNAMES = soft-float fp64 el eb mips32 mips32r2 mips64 mips64r2
+ifneq ($(filter MIPS_ISA_DEFAULT=33,$(tm_defines)),)
+MULTILIB_EXCLUSIONS = mips32/mfp64 mips64/mfp64 mips64r2/mfp64
+else
+MULTILIB_EXCLUSIONS = !mips32r2/mfp64
+endif
+endif
+MULTILIB_MATCHES = EL=mel EB=meb
diff --git a/gcc-4.9/gcc/config/mips/t-linux64 b/gcc-4.9/gcc/config/mips/t-linux64
new file mode 100644
index 000000000..4b28ad03d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-linux64
@@ -0,0 +1,26 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mabi=n32/mabi=32/mabi=64
+MULTILIB_DIRNAMES = n32 32 64
+MIPS_EL = $(if $(filter %el, $(firstword $(subst -, ,$(target)))),el)
+MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, $(target_cpu_default)) $(filter soft, $(with_float))),soft)
+MULTILIB_OSDIRNAMES = \
+	../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
+	../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
+	../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
diff --git a/gcc-4.9/gcc/config/mips/t-mips b/gcc-4.9/gcc/config/mips/t-mips
new file mode 100644
index 000000000..6e048f951
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-mips
@@ -0,0 +1,22 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+$(srcdir)/config/mips/mips-tables.opt: $(srcdir)/config/mips/genopt.sh \
+  $(srcdir)/config/mips/mips-cpus.def
+	$(SHELL) $(srcdir)/config/mips/genopt.sh $(srcdir)/config/mips > \
+		$(srcdir)/config/mips/mips-tables.opt
diff --git a/gcc-4.9/gcc/config/mips/t-mti-elf b/gcc-4.9/gcc/config/mips/t-mti-elf
new file mode 100644
index 000000000..1109ea716
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-mti-elf
@@ -0,0 +1,50 @@
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The default build is mips32r2, hard-float big-endian.  Add mips32,
+# soft-float, and little-endian variations.
+
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16/mmicromips mabi=64 EL msoft-float/mfp64 mnan=2008
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof fp64 nan2008
+MULTILIB_MATCHES = EL=mel EB=meb
+
+# The 64 bit ABI is not supported on the mips32 architecture.
+MULTILIB_EXCEPTIONS += *mips32*/*mabi=64*
+
+# The 64 bit ABI is not supported on the mips32r2 architecture.
+# Because mips32r2 is the default we can't use that flag to trigger
+# the exception so we check for mabi=64 with no specific mips
+# architecture flag instead.
+MULTILIB_EXCEPTIONS += mabi=64*
+
+# We do not want to build mips16 versions of mips64* architectures.
+MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_EXCEPTIONS += *mips16/mabi=64*
+
+# We only want micromips for mips32r2 architecture.
+MULTILIB_EXCEPTIONS += *mips32/mmicromips*
+MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
+MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
+
+# We do not want nan2008 libraries for soft-float.
+MULTILIB_EXCEPTIONS += *msoft-float*/*mnan=2008*
+
+# -mfp64 libraries are only built for mips32r2 and not in mips16 mode.
+MULTILIB_EXCEPTIONS += *mips32/*mfp64*
+MULTILIB_EXCEPTIONS += *mips64*/*mfp64*
+MULTILIB_EXCEPTIONS += *mips16*/*mfp64*
diff --git a/gcc-4.9/gcc/config/mips/t-mti-linux b/gcc-4.9/gcc/config/mips/t-mti-linux
new file mode 100644
index 000000000..1109ea716
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-mti-linux
@@ -0,0 +1,50 @@
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The default build is mips32r2, hard-float big-endian.  Add mips32,
+# soft-float, and little-endian variations.
+
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16/mmicromips mabi=64 EL msoft-float/mfp64 mnan=2008
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof fp64 nan2008
+MULTILIB_MATCHES = EL=mel EB=meb
+
+# The 64 bit ABI is not supported on the mips32 architecture.
+MULTILIB_EXCEPTIONS += *mips32*/*mabi=64*
+
+# The 64 bit ABI is not supported on the mips32r2 architecture.
+# Because mips32r2 is the default we can't use that flag to trigger
+# the exception so we check for mabi=64 with no specific mips
+# architecture flag instead.
+MULTILIB_EXCEPTIONS += mabi=64*
+
+# We do not want to build mips16 versions of mips64* architectures.
+MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_EXCEPTIONS += *mips16/mabi=64*
+
+# We only want micromips for mips32r2 architecture.
+MULTILIB_EXCEPTIONS += *mips32/mmicromips*
+MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
+MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
+
+# We do not want nan2008 libraries for soft-float.
+MULTILIB_EXCEPTIONS += *msoft-float*/*mnan=2008*
+
+# -mfp64 libraries are only built for mips32r2 and not in mips16 mode.
+MULTILIB_EXCEPTIONS += *mips32/*mfp64*
+MULTILIB_EXCEPTIONS += *mips64*/*mfp64*
+MULTILIB_EXCEPTIONS += *mips16*/*mfp64*
diff --git a/gcc-4.9/gcc/config/mips/t-r3900 b/gcc-4.9/gcc/config/mips/t-r3900
new file mode 100644
index 000000000..fcd9c25b5
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-r3900
@@ -0,0 +1,23 @@
+# Copyright (C) 1998-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build the libraries for both hard and soft floating point
+
+MULTILIB_OPTIONS = msoft-float EL/EB
+MULTILIB_DIRNAMES = soft-float el eb
+MULTILIB_MATCHES = EL=mel EB=meb
diff --git a/gcc-4.9/gcc/config/mips/t-rtems b/gcc-4.9/gcc/config/mips/t-rtems
new file mode 100644
index 000000000..4019f946b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-rtems
@@ -0,0 +1,34 @@
+# Custom multilibs for RTEMS
+#
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# default is mips1 EB hard-float
+MULTILIB_OPTIONS = mips1/mips3/mips32 EB/EL msoft-float
+MULTILIB_DIRNAMES = mips1 mips3 mips32 eb el soft-float
+MULTILIB_MATCHES = EL=mel EB=meb
+
+MULTILIB_EXCEPTIONS =
+
+# Big endian only
+MULTILIB_EXCEPTIONS += EL*
+MULTILIB_EXCEPTIONS += mips32/EL*
+
+# Little endian only
+MULTILIB_EXCEPTIONS += mips3
+MULTILIB_EXCEPTIONS += mips3/msoft-float
diff --git a/gcc-4.9/gcc/config/mips/t-sb1 b/gcc-4.9/gcc/config/mips/t-sb1
new file mode 100644
index 000000000..99214be4f
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-sb1
@@ -0,0 +1,62 @@
+# Copyright (C) 2006-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GP-rel: G0 only
+#
+# Endianness: EB or EL
+#
+# ABIs: mabi=32
+#	mabi=o64
+#	mabi=o64/mlong64
+#
+# FPU: (default mhard-float)
+#      msoft-float (only for mabi=32)
+#
+
+MULTILIB_EXTRA_OPTS = G0
+
+MULTILIB_OPTIONS =			\
+	EB/EL				\
+	mabi=32/mabi=o64		\
+	mips32/mips64                   \
+	mlong64/msoft-float		\
+
+MULTILIB_DIRNAMES =			\
+	eb el				\
+	o32 o64				\
+	mips32 mips64			\
+	long64 soft-float		\
+
+MULTILIB_MATCHES =			\
+	EB=meb EL=mel			\
+
+MULTILIB_EXCEPTIONS =			\
+        *mabi=32/*mlong64*		\
+
+MULTILIB_EXCLUSIONS =			\
+	mips32/!mabi=32                 \
+	mabi=32/!mips32			\
+	msoft-float/!mabi=32		\
+
+# Small multilib list for quick builds and tests.
+# Must either comment out everything above these lines, or everything below
+# these lines.
+
+#MULTILIB_OPTIONS = EB/EL msoft-float
+#MULTILIB_DIRNAMES = eb el soft-float
+#MULTILIB_MATCHES = EB=meb EL=mel
diff --git a/gcc-4.9/gcc/config/mips/t-sde b/gcc-4.9/gcc/config/mips/t-sde
new file mode 100644
index 000000000..229e3d644
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-sde
@@ -0,0 +1,37 @@
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = EL/EB mips32/mips32r2/mips64/mips64r2 mips16/mmicromips msoft-float/mfp64 mcode-readable=no
+MULTILIB_DIRNAMES = el eb mips32 mips32r2 mips64 mips64r2 mips16 micromips sof f64 spram
+MULTILIB_MATCHES = EL=mel EB=meb
+
+# The -mfp64 option is only valid in conjunction with -mips32r2.
+ifneq ($(filter MIPS_ISA_DEFAULT=33,$(tm_defines)),)
+MULTILIB_EXCLUSIONS := mips32/mfp64 mips64/mfp64 mips64r2/mfp64
+else
+MULTILIB_EXCLUSIONS := !mips32r2/mfp64
+endif
+
+# Don't build 64-bit MIPS16 and microMIPS multilibs.
+ifneq ($(filter MIPS_ISA_DEFAULT=6%,$(tm_defines)),)
+MULTILIB_EXCLUSIONS += !mips32/!mips32r2/mips16
+MULTILIB_EXCLUSIONS += !mips32/!mips32r2/mmicromips
+else
+MULTILIB_EXCLUSIONS += mips64/mips16 mips64r2/mips16
+MULTILIB_EXCLUSIONS += mips64/mmicromips mips64r2/mmicromips
+endif
diff --git a/gcc-4.9/gcc/config/mips/t-sdemtk b/gcc-4.9/gcc/config/mips/t-sdemtk
new file mode 100644
index 000000000..820faa305
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-sdemtk
@@ -0,0 +1,40 @@
+# Copyright (C) 2007-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Override newlib settings in t-sde and set up for building
+# against SDE header files and libraries.
+
+MULTILIB_OPTIONS = EL/EB mips32/mips32r2/mips64/mips64r2 mips16 msoft-float/mno-float/mfp64
+MULTILIB_DIRNAMES = el eb mips32 mips32r2 mips64 mips64r2 mips16 sof nof f64
+
+# Remove stdarg.h and stddef.h from USER_H.
+USER_H = $(srcdir)/ginclude/float.h \
+         $(srcdir)/ginclude/iso646.h \
+         $(srcdir)/ginclude/stdbool.h \
+         $(srcdir)/ginclude/varargs.h \
+         $(EXTRA_HEADERS)
+
+# Don't run fixinclude
+STMP_FIXINC = stmp-sdefixinc
+stmp-sdefixinc: gsyslimits.h
+	rm -rf include; mkdir include
+	chmod a+rx include
+	rm -f include/syslimits.h
+	cp $(srcdir)/gsyslimits.h include/syslimits.h
+	chmod a+r include/syslimits.h
+	$(STAMP) stmp-sdefixinc
diff --git a/gcc-4.9/gcc/config/mips/t-sr71k b/gcc-4.9/gcc/config/mips/t-sr71k
new file mode 100644
index 000000000..3346c914c
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-sr71k
@@ -0,0 +1,21 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build the libraries for both hard and soft floating point
+MULTILIB_OPTIONS = EL/EB msoft-float mips2
+MULTILIB_DIRNAMES = el eb soft-float mips2
diff --git a/gcc-4.9/gcc/config/mips/t-st b/gcc-4.9/gcc/config/mips/t-st
new file mode 100644
index 000000000..57daa9f0c
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-st
@@ -0,0 +1,30 @@
+# Copyright (C) 2008-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = march=loongson2e/march=loongson2f mabi=n32/mabi=32/mabi=64 
+MULTILIB_DIRNAMES = 2e 2f lib32 lib lib64
+
+MULTILIB_OSDIRNAMES  = march.loongson2e/mabi.n32=../lib32/2e
+MULTILIB_OSDIRNAMES += march.loongson2e/mabi.32=../lib/2e
+MULTILIB_OSDIRNAMES += march.loongson2e/mabi.64=../lib64/2e
+MULTILIB_OSDIRNAMES += march.loongson2f/mabi.n32=../lib32/2f
+MULTILIB_OSDIRNAMES += march.loongson2f/mabi.32=../lib/2f
+MULTILIB_OSDIRNAMES += march.loongson2f/mabi.64=../lib64/2f
+MULTILIB_OSDIRNAMES += mabi.n32=../lib32
+MULTILIB_OSDIRNAMES += mabi.32=../lib
+MULTILIB_OSDIRNAMES += mabi.64=../lib64
diff --git a/gcc-4.9/gcc/config/mips/t-vr b/gcc-4.9/gcc/config/mips/t-vr
new file mode 100644
index 000000000..ccc5d24e2
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-vr
@@ -0,0 +1,106 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Main multilibs
+# --------------
+#
+# Endianness: EB or EL
+#
+# ABIs: mabi=32
+#	mabi=o64
+#	mabi=eabi
+#	mabi=eabi/mlong32
+#	mabi=eabi/mgp32
+#	mabi=eabi/mgp32/mlong64
+#
+# Architecture: march=vr4120 with -mfix-vr4120
+#		march=vr4130 with -mfix-vr4130 (default)
+#		march=vr5000
+#		march=vr5400
+#		march=vr5500
+#
+# Total: 2 * 6 * 5 = 60 multilibs.
+#
+#
+# Extra vr4300 multilibs
+# ----------------------
+#
+# Endianness: EB or EL
+#
+# ABI: o64
+#
+# Architecture: vr4300.
+#
+# Total: 2 * 1 * 2 = 2 multilibs.
+#
+#
+# Extra MIPS16 multilibs
+# ----------------------
+#
+# Endianness: EB or EL
+#
+# ABIs: mabi=o64
+#	mabi=eabi/mlong32
+#	mabi=eabi/mgp32
+#
+# Architecture: march=vr4120 with -mfix-vr4120
+#		march=vr4130 with -mfix-vr4130 (default)
+#
+# Total: 2 * 3 * 2 = 12 multilibs.
+MULTILIB_OPTIONS =			\
+	EL/EB				\
+	mabi=32/mabi=o64/mabi=eabi	\
+	mgp32				\
+	mlong64				\
+	mips16				\
+	mfix-vr4120/mfix-vr4130/march=vr4300/march=vr5000/march=vr5400/march=vr5500
+
+MULTILIB_DIRNAMES =	\
+	el eb		\
+	o32 o64 eabi	\
+	gp32		\
+	long64		\
+	mips16		\
+	vr4120 vr4130 vr4300 vr5000 vr5400 vr5500
+
+MULTILIB_MATCHES = EL=mel EB=meb mfix-vr4120=march?vr4120 \
+		   mfix-vr4130=march?vr4130
+
+# Assume a 41xx-series is the default: we'd need a *mips16 entry if
+# the default processor didn't support mips16.  Also assume the
+# default ABI is EABI64 -mlong32.
+MULTILIB_EXCEPTIONS =				\
+	*mabi=32/mlong64*			\
+	*mabi=32/mgp32*				\
+	*mabi=o64/mgp32*			\
+	*mabi=o64/mlong64*			\
+	*mips16/march=vr5*			\
+	*mips16/march=vr4300			\
+	$(MIPS16_EXCEPTIONS)			\
+	$(VR4300_EXCEPTIONS)
+
+MIPS16_EXCEPTIONS =				\
+	*mabi=32*mips16*			\
+	*mlong64*mips16*
+
+VR4300_EXCEPTIONS =				\
+	*mabi=32*march=vr4300			\
+	*mgp32*march=vr4300			\
+	*mlong64*march=vr4300			\
+	march=vr4300				\
+	E[LB]/march=vr4300
diff --git a/gcc-4.9/gcc/config/mips/t-vxworks b/gcc-4.9/gcc/config/mips/t-vxworks
new file mode 100644
index 000000000..97c1e782b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/t-vxworks
@@ -0,0 +1,35 @@
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Multilibs for VxWorks.  We want these 8 architecture combinations:
+#
+#    {-mips2,-mips3} x {-EB,-EL} x {-mhard-float,-msoft-float}
+#
+# where the first option in each group is the default.  The -mips2
+# multilibs use o32 and the -mips3 multilibs use o64.
+#
+# We want three multilibs for each architecture combination:
+# default (kernel mode), -mrtp and -mrtp/-fPIC.
+MULTILIB_OPTIONS = mabi=o64 mips3 EL msoft-float mrtp fPIC
+MULTILIB_DIRNAMES = o64 mips3 EL msoft-float mrtp pic
+MULTILIB_MATCHES = EL=mel fPIC=fpic
+MULTILIB_EXCEPTIONS = mips3* mabi=o64 fPIC \
+		      $(addprefix mabi=o64/, EL* msoft-float* mrtp* fPIC*) \
+		      $(addsuffix /fPIC, *mabi=o64 *mips3 *EL *msoft-float)
+
+MULTILIB_EXTRA_OPTS = -G 0 -mno-branch-likely
diff --git a/gcc-4.9/gcc/config/mips/vr.h b/gcc-4.9/gcc/config/mips/vr.h
new file mode 100644
index 000000000..bbbefd64d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/vr.h
@@ -0,0 +1,58 @@
+/* Definitions of target machine for GNU compiler.
+   NEC VR Series Processors
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define DEFAULT_VR_ARCH "mfix-vr4130"
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+	{ MULTILIB_ENDIAN_DEFAULT,		\
+	  MULTILIB_ABI_DEFAULT,			\
+	  DEFAULT_VR_ARCH }
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+	/* Enforce the default architecture.  This is mostly for	\
+	   the assembler's benefit.  */					\
+	"%{!march=*:%{!mfix-vr4120:%{!mfix-vr4130:"			\
+	"-" DEFAULT_VR_ARCH "}}}",					\
+									\
+	/* Make -mfix-vr4120 imply -march=vr4120.  This cuts down	\
+	   on command-line tautology and makes it easier for t-vr to	\
+	   provide a -mfix-vr4120 multilib.  */				\
+	"%{mfix-vr4120:%{!march=*:-march=vr4120}}",			\
+									\
+	/* Same idea for -mfix-vr4130.  */				\
+	"%{mfix-vr4130:%{!march=*:-march=vr4130}}",			\
+									\
+	/* Infer the default float setting from -march.  */		\
+	MIPS_ARCH_FLOAT_SPEC,						\
+									\
+	/* Make -mabi=eabi -mlong32 the default.  */			\
+	"%{!mabi=*:-mabi=eabi %{!mlong*:-mlong32}}",			\
+									\
+	/* Make sure -mlong64 multilibs are chosen when	64-bit longs	\
+	   are needed.  */						\
+	"%{mabi=eabi:%{!mlong*:%{!mgp32:-mlong64}}}",			\
+									\
+	/* Remove -mgp32 if it is redundant.  */			\
+	"%{mabi=32:%<mgp32}",						\
+									\
+	/* Configuration-independent MIPS rules.  */			\
+	BASE_DRIVER_SELF_SPECS
diff --git a/gcc-4.9/gcc/config/mips/vxworks.h b/gcc-4.9/gcc/config/mips/vxworks.h
new file mode 100644
index 000000000..7771076d0
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/vxworks.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{!G:-G 0} %{G*} %(endian_spec) %{mips1} %{mips2} %{mips3} %{mips4} \
+%{mips32} %{mips32r2} %{mips64} \
+%{mips16:%{!mno-mips16:-mips16}} %{mno-mips16:-no-mips16} \
+%(subtarget_asm_optimizing_spec) \
+%(subtarget_asm_debugging_spec) \
+%{mabi=*} %{!mabi*: %(asm_abi_default_spec)} \
+%{mgp32} %{mgp64} %{march=*} %{mxgot:-xgot} \
+%{mtune=*} \
+%(subtarget_asm_spec)"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{!G:-G 0} %{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32} %{mips64} " \
+VXWORKS_LINK_SPEC
+
+#undef  LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#define TARGET_OS_CPP_BUILTINS()                        \
+  do                                                    \
+    {                                                   \
+      if (TARGET_64BIT)					\
+	builtin_define ("CPU=MIPS64");			\
+      else						\
+	builtin_define ("CPU=MIPS32");			\
+      if (TARGET_BIG_ENDIAN)				\
+	builtin_define ("MIPSEB");			\
+      else						\
+	builtin_define ("MIPSEL");			\
+      if (TARGET_SOFT_FLOAT)				\
+	builtin_define ("SOFT_FLOAT");			\
+      VXWORKS_OS_CPP_BUILTINS ();			\
+    }                                                   \
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+/* No sdata.  */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{mrtp:%{fPIC|fpic:-mvxworks-pic}}"
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+#undef DBX_REGISTER_NUMBER
diff --git a/gcc-4.9/gcc/config/mips/x-native b/gcc-4.9/gcc/config/mips/x-native
new file mode 100644
index 000000000..5e31121ed
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/x-native
@@ -0,0 +1,3 @@
+driver-native.o : $(srcdir)/config/mips/driver-native.c \
+  $(CONFIG_H) $(SYSTEM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/mips/xlp.md b/gcc-4.9/gcc/config/mips/xlp.md
new file mode 100644
index 000000000..905808dd7
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/xlp.md
@@ -0,0 +1,213 @@
+;; DFA-based pipeline description for the XLP.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; xlp.md   Machine Description for the Broadcom XLP Microprocessor
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "xlp_cpu")
+
+;; CPU function units.
+(define_cpu_unit "xlp_ex0" "xlp_cpu")
+(define_cpu_unit "xlp_ex1" "xlp_cpu")
+(define_cpu_unit "xlp_ex2" "xlp_cpu")
+(define_cpu_unit "xlp_ex3" "xlp_cpu")
+
+;; Integer Multiply Unit
+(define_cpu_unit "xlp_div" "xlp_cpu")
+
+;; ALU2 completion port.
+(define_cpu_unit "xlp_ex2_wrb" "xlp_cpu")
+
+(define_automaton "xlp_fpu")
+
+;; Floating-point units.
+(define_cpu_unit "xlp_fp" "xlp_fpu")
+
+;; Floating Point Sqrt/Divide
+(define_cpu_unit "xlp_divsq" "xlp_fpu")
+
+;; FPU completion port.
+(define_cpu_unit "xlp_fp_wrb" "xlp_fpu")
+
+;; Define reservations for common combinations.
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+(define_insn_reservation "ir_xlp_unknown" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "unknown,multi"))
+  "xlp_ex0+xlp_ex1+xlp_ex2+xlp_ex3")
+
+(define_insn_reservation "ir_xlp_branch" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "branch,jump,call"))
+  "xlp_ex3")
+
+(define_insn_reservation "ir_xlp_prefetch" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "xlp_ex0|xlp_ex1")
+
+(define_insn_reservation "ir_xlp_load" 4
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "load"))
+  "xlp_ex0|xlp_ex1")
+
+(define_insn_reservation "ir_xlp_fpload" 5
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "fpload,fpidxload"))
+  "xlp_ex0|xlp_ex1")
+
+(define_insn_reservation "ir_xlp_alu" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "const,arith,shift,slt,clz,signext,logical,move,trap,nop"))
+  "xlp_ex0|xlp_ex1|(xlp_ex2,xlp_ex2_wrb)|xlp_ex3")
+
+(define_insn_reservation "ir_xlp_condmov" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "condmove")
+       (eq_attr "mode" "SI,DI"))
+  "xlp_ex2,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_mul" 5
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "imul,imadd"))
+  "xlp_ex2,nothing*4,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_mul3" 3
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "imul3"))
+  "xlp_ex2,nothing*2,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_div" 24
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "SI")
+       (eq_attr "type" "idiv"))
+  "xlp_ex2+xlp_div,xlp_div*23,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_ddiv" 48
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "DI")
+       (eq_attr "type" "idiv"))
+  "xlp_ex2+xlp_div,xlp_div*47,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_store" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "xlp_ex0|xlp_ex1")
+
+(define_insn_reservation "ir_xlp_fpmove" 2
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "mfc"))
+ "xlp_ex3,xlp_fp,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_mfhi" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "mfhi"))
+  "xlp_ex2,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_mflo" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "mflo"))
+  "xlp_ex2,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_mthi" 1
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "mthi"))
+  "xlp_ex2,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_mtlo" 3
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "mtlo"))
+  "xlp_ex2,nothing*2,xlp_ex2_wrb")
+
+(define_insn_reservation "ir_xlp_fp2" 2
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "fmove,fneg,fabs,condmove"))
+  "xlp_fp,nothing,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fp3" 3
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "fcmp"))
+  "xlp_fp,nothing*2,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fp4" 4
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "fcvt"))
+  "xlp_fp,nothing*3,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fp5" 5
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "SF")
+       (eq_attr "type" "fadd,fmul"))
+  "xlp_fp,nothing*4,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fp6" 6
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "DF")
+       (eq_attr "type" "fadd,fmul"))
+  "xlp_fp,nothing*5,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fp9" 9
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "SF")
+       (eq_attr "type" "fmadd"))
+  "xlp_fp,nothing*3,xlp_fp,nothing*3,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fp11" 11
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "DF")
+       (eq_attr "type" "fmadd"))
+  "xlp_fp,nothing*4,xlp_fp,nothing*4,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fpcomplex_s" 23
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "SF")
+       (eq_attr "type" "fdiv,frdiv,frdiv1,frdiv2,fsqrt,frsqrt,frsqrt1,frsqrt2"))
+  "xlp_fp+xlp_divsq,xlp_divsq*22,xlp_fp_wrb")
+
+(define_insn_reservation "ir_xlp_fpcomplex_d" 38
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "mode" "DF")
+       (eq_attr "type" "fdiv,frdiv,frdiv1,frdiv2,fsqrt,frsqrt,frsqrt1,frsqrt2"))
+  "xlp_fp+xlp_divsq,xlp_divsq*37,xlp_fp_wrb")
+
+(define_bypass 3 "ir_xlp_mul" "ir_xlp_mfhi")
+
+(define_insn_reservation "ir_xlp_atomic" 15
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "atomic"))
+  "xlp_ex0|xlp_ex1")
+
+;; Sync loop consists of (in order)
+;; (1) optional sync,
+;; (2) LL instruction,
+;; (3) branch and 1-2 ALU instructions,
+;; (4) SC instruction,
+;; (5) optional sync,
+;; (6) branch and ALU instruction.
+;; The net result of this reservation is a big delay with flush of
+;; ALU pipeline and outgoing reservations discouraging use of EX3.
+(define_insn_reservation "ir_xlp_sync_loop" 40
+  (and (eq_attr "cpu" "xlp")
+       (eq_attr "type" "syncloop"))
+  "(xlp_ex0+xlp_ex1+xlp_ex2+xlp_ex3)*39,xlp_ex3+(xlp_ex0|xlp_ex1|(xlp_ex2,xlp_ex2_wrb))")
diff --git a/gcc-4.9/gcc/config/mips/xlr.md b/gcc-4.9/gcc/config/mips/xlr.md
new file mode 100644
index 000000000..a8d1a12c1
--- /dev/null
+++ b/gcc-4.9/gcc/config/mips/xlr.md
@@ -0,0 +1,94 @@
+;; DFA-based pipeline description for the XLR.
+;;   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;;
+;; xlr.md   Machine Description for the RMI XLR Microprocessor
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "xlr_main,xlr_muldiv")
+
+;; Definitions for xlr_main automaton.
+(define_cpu_unit "xlr_main_pipe" "xlr_main")
+
+(define_insn_reservation "ir_xlr_alu_slt" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "slt"))
+  "xlr_main_pipe")
+
+(define_insn_reservation "ir_xlr_alu_clz" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "clz"))
+  "xlr_main_pipe")
+
+;; Integer arithmetic instructions.
+(define_insn_reservation "ir_xlr_alu" 1
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "move,arith,shift,logical,signext,const,unknown,multi,nop,trap,atomic,syncloop"))
+  "xlr_main_pipe")
+
+;; Integer arithmetic instructions.
+(define_insn_reservation "ir_xlr_condmove" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "condmove"))
+  "xlr_main_pipe")
+
+;; Load/store instructions.
+(define_insn_reservation "ir_xlr_load" 4
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "load"))
+  "xlr_main_pipe")
+
+(define_insn_reservation "ir_xlr_store" 1
+  (and  (eq_attr "cpu" "xlr") 
+        (eq_attr "type" "store"))
+  "xlr_main_pipe")
+
+(define_insn_reservation "ir_xlr_prefetch_x" 1
+  (and (eq_attr "cpu" "xlr")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "xlr_main_pipe")
+
+;; Branch instructions - use branch misprediction latency.
+(define_insn_reservation "ir_xlr_branch" 1
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "branch,jump,call"))
+  "xlr_main_pipe")
+
+;; Coprocessor move instructions.
+(define_insn_reservation "ir_xlr_xfer" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "mtc,mfc"))
+  "xlr_main_pipe")
+
+(define_bypass 5 "ir_xlr_xfer" "ir_xlr_xfer")
+
+;; Definitions for the xlr_muldiv automaton.
+(define_cpu_unit "xlr_imuldiv_nopipe" "xlr_muldiv")
+
+(define_insn_reservation "ir_xlr_imul" 8
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "imul,imul3,imadd"))
+  "xlr_main_pipe,xlr_imuldiv_nopipe*6")
+
+(define_insn_reservation "ir_xlr_div" 68
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "idiv"))
+  "xlr_main_pipe,xlr_imuldiv_nopipe*67")
+
+(define_insn_reservation "xlr_hilo" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "mfhi,mflo,mthi,mtlo"))
+  "xlr_imuldiv_nopipe")
diff --git a/gcc-4.9/gcc/config/mmix/constraints.md b/gcc-4.9/gcc/config/mmix/constraints.md
new file mode 100644
index 000000000..2911cae14
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/constraints.md
@@ -0,0 +1,116 @@
+;; MMIX constraints
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_register_constraint "x" "SYSTEM_REGS"
+  "@internal")
+
+(define_register_constraint "y" "REMAINDER_REG"
+  "@internal")
+
+(define_register_constraint "z" "HIMULT_REG"
+  "@internal")
+
+(define_constraint "I"
+  "A 8-bit unsigned integer"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "J"
+  "A 16-bit unsigned integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "K"
+  "An integer between -255 and 0."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -255, 0)")))
+
+(define_constraint "L"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "mmix_shiftable_wyde_value (ival)")))
+
+(define_constraint "M"
+  "The value 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "N"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "mmix_shiftable_wyde_value (~ival)")))
+
+(define_constraint "O"
+  "The value 3, 5, 9, or 17."
+  (and (match_code "const_int")
+       (ior (match_test "ival == 3")
+	    (match_test "ival == 5")
+	    (match_test "ival == 9")
+	    (match_test "ival == 17"))))
+
+;; FIXME: M (or G) is redundant.
+
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; R asks whether x is to be loaded with GETA or something else.  Right
+;; now, only a SYMBOL_REF and LABEL_REF can fit for
+;; TARGET_BASE_ADDRESSES.
+;;
+;; Only constant symbolic addresses apply.  With TARGET_BASE_ADDRESSES,
+;; we just allow straight LABEL_REF or SYMBOL_REFs with SYMBOL_REF_FLAG
+;; set right now; only function addresses and code labels.  If we change
+;; to let SYMBOL_REF_FLAG be set on other symbols, we have to check
+;; inside CONST expressions.  When TARGET_BASE_ADDRESSES is not in
+;; effect, a "raw" constant check together with mmix_constant_address_p
+;; is all that's needed; we want all constant addresses to be loaded
+;; with GETA then.
+
+(define_constraint "R"
+  "@internal"
+  (and (not (match_code "const_int,const_double"))
+       (match_test "mmix_constant_address_p (op)")
+       (ior (match_test "!TARGET_BASE_ADDRESSES")
+	    (match_code "LABEL_REF")
+	    (and (match_code "SYMBOL_REF")
+		 (match_test "SYMBOL_REF_FLAG (op)")))))
+
+;; FIXME: L (or S) is redundant.
+
+(define_constraint "S"
+  "@internal"
+  (and (match_code "const_int,const_double")
+       (match_test "mmix_shiftable_wyde_value (mmix_intval (op))")))
+
+;; FIXME: N (or T) is redundant.
+
+(define_constraint "T"
+  "@internal"
+  (and (match_code "const_int,const_double")
+       (match_test "mmix_shiftable_wyde_value (~mmix_intval (op))")))
+
+(define_address_constraint "U"
+  "@internal"
+  (match_operand 0 "mmix_address_operand"))
+
+(define_constraint "Yf"
+  "@internal"
+  (match_operand 0 "frame_pointer_operand"))
diff --git a/gcc-4.9/gcc/config/mmix/mmix-modes.def b/gcc-4.9/gcc/config/mmix/mmix-modes.def
new file mode 100644
index 000000000..1c5cda85b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/mmix-modes.def
@@ -0,0 +1,49 @@
+/* Definitions of target machine for GNU compiler, for MMIX.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Node: Condition Code */
+
+/* Like other non-CC0 ports, MMIX need to code which combination of
+   comparison insn and branch insn or conditional-set insn to use into the
+   condition mode.  The CC mode depends partly on which condition is used
+   and partly on the type of the operands.  */
+
+/* The "usual" CC mode is used for a signed operands integer comparison,
+   where the CMP insn is used and the result is (integer) -1, 0 or 1 for
+   respectively a < b, a == b and a > b.  */
+
+/* The CC_UNS mode is for an unsigned operands integer comparison using
+   the CMPU insn.  Result values correspond to those in CCmode.  */
+CC_MODE (CC_UNS);
+
+/* The CC_FP mode is for a non-equality floating-point comparison, using
+   the FCMP or FCMPE insn.  The result is (integer) -1 or 1 for
+   respectively a < b and a > b, otherwise 0.  */
+CC_MODE (CC_FP);
+
+/* The CC_FPEQ mode is for an equality floating-point comparison, using
+   the FEQL or FEQLE insn.  The result is (integer) 1 for a == b,
+   otherwise 0 (including NaN:s).  */
+CC_MODE (CC_FPEQ);
+
+/* The CC_FUN mode is for an ordering comparison, using the FUN or FUNE
+   insn.  The result is (integer) 1 if a is unordered to b, otherwise the
+   result is 0.  */
+CC_MODE (CC_FUN);
diff --git a/gcc-4.9/gcc/config/mmix/mmix-protos.h b/gcc-4.9/gcc/config/mmix/mmix-protos.h
new file mode 100644
index 000000000..d738a1a08
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/mmix-protos.h
@@ -0,0 +1,85 @@
+/* Prototypes for exported functions defined in mmix.c
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void mmix_init_expanders (void);
+extern int mmix_eh_return_data_regno (int);
+extern int mmix_initial_elimination_offset (int, int);
+extern int mmix_starting_frame_offset (void);
+extern int mmix_function_arg_regno_p (int, int);
+extern void mmix_function_profiler (FILE *, int);
+extern int mmix_reversible_cc_mode (enum machine_mode);
+extern const char *mmix_text_section_asm_op (void);
+extern const char *mmix_data_section_asm_op (void);
+extern void mmix_output_quoted_string (FILE *, const char *, int);
+extern void mmix_asm_output_source_line  (FILE *, int);
+extern void mmix_asm_output_ascii (FILE *, const char *, int);
+extern void mmix_asm_output_label (FILE *, const char *);
+extern void mmix_asm_output_internal_label (FILE *, const char *);
+extern void mmix_asm_weaken_label (FILE *, const char *);
+extern void mmix_asm_output_labelref (FILE *, const char *);
+extern void mmix_asm_output_def (FILE *, const char *, const char *);
+extern void mmix_asm_output_reg_push (FILE *, int);
+extern void mmix_asm_output_reg_pop (FILE *, int);
+extern void mmix_asm_output_skip (FILE *, int);
+extern void mmix_asm_output_align (FILE *, int);
+extern HOST_WIDEST_INT mmix_intval (const_rtx);
+extern int mmix_shiftable_wyde_value (unsigned HOST_WIDEST_INT);
+extern void mmix_output_register_setting (FILE *, int, HOST_WIDEST_INT, int);
+extern int mmix_opposite_regno (int, int);
+extern int mmix_local_regno (int);
+extern unsigned mmix_dbx_register_number (unsigned);
+extern int mmix_use_simple_return (void);
+extern void mmix_make_decl_one_only (tree);
+extern int mmix_data_alignment (tree, int);
+extern int mmix_constant_alignment (tree, int);
+extern unsigned mmix_local_alignment (tree, unsigned);
+extern void mmix_asm_output_pool_prologue (FILE *, const char *, tree, int);
+extern void mmix_asm_output_aligned_common (FILE *, const char *, int, int);
+extern void mmix_asm_output_aligned_local (FILE *, const char *, int, int);
+extern void mmix_asm_declare_register_global
+  (FILE *, tree, int, const char *);
+extern void mmix_asm_output_addr_diff_elt (FILE *, rtx, int, int);
+extern void mmix_asm_output_addr_vec_elt (FILE *, int);
+extern enum reg_class mmix_secondary_reload_class
+  (enum reg_class, enum machine_mode, rtx, int);
+extern rtx mmix_dynamic_chain_address (rtx);
+extern rtx mmix_return_addr_rtx (int, rtx);
+extern rtx mmix_eh_return_stackadj_rtx (void);
+extern rtx mmix_eh_return_handler_rtx (void);
+extern int mmix_constant_address_p (rtx);
+extern void mmix_expand_prologue (void);
+extern void mmix_expand_epilogue (void);
+extern rtx mmix_get_hard_reg_initial_val (enum machine_mode, int);
+extern int mmix_asm_preferred_eh_data_format (int, int);
+extern void mmix_setup_frame_addresses (void);
+
+#ifdef RTX_CODE
+/* Needs to be ifdef:d for sake of enum rtx_code.  */
+extern enum machine_mode mmix_select_cc_mode (enum rtx_code, rtx, rtx);
+extern void mmix_canonicalize_comparison (enum rtx_code *, rtx *, rtx *);
+extern rtx mmix_gen_compare_reg (enum rtx_code, rtx, rtx);
+#endif
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc-4.9/gcc/config/mmix/mmix.c b/gcc-4.9/gcc/config/mmix/mmix.c
new file mode 100644
index 000000000..2616dd54a
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/mmix.c
@@ -0,0 +1,2799 @@
+/* Definitions of target machine for GNU compiler, for MMIX.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "hashtab.h"
+#include "insn-config.h"
+#include "output.h"
+#include "basic-block.h"
+#include "flags.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "function.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "ggc.h"
+#include "dwarf2.h"
+#include "debug.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+#include "tm-constrs.h"
+
+/* First some local helper definitions.  */
+#define MMIX_FIRST_GLOBAL_REGNUM 32
+
+/* We'd need a current_function_has_landing_pad.  It's marked as such when
+   a nonlocal_goto_receiver is expanded.  Not just a C++ thing, but
+   mostly.  */
+#define MMIX_CFUN_HAS_LANDING_PAD (cfun->machine->has_landing_pad != 0)
+
+/* We have no means to tell DWARF 2 about the register stack, so we need
+   to store the return address on the stack if an exception can get into
+   this function.  FIXME: Narrow condition.  Before any whole-function
+   analysis, df_regs_ever_live_p () isn't initialized.  We know it's up-to-date
+   after reload_completed; it may contain incorrect information some time
+   before that.  Within a RTL sequence (after a call to start_sequence,
+   such as in RTL expanders), leaf_function_p doesn't see all insns
+   (perhaps any insn).  But regs_ever_live is up-to-date when
+   leaf_function_p () isn't, so we "or" them together to get accurate
+   information.  FIXME: Some tweak to leaf_function_p might be
+   preferable.  */
+#define MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS			\
+ (flag_exceptions						\
+  && ((reload_completed && df_regs_ever_live_p (MMIX_rJ_REGNUM))	\
+      || !leaf_function_p ()))
+
+#define IS_MMIX_EH_RETURN_DATA_REG(REGNO)	\
+ (crtl->calls_eh_return		\
+  && (EH_RETURN_DATA_REGNO (0) == REGNO		\
+      || EH_RETURN_DATA_REGNO (1) == REGNO	\
+      || EH_RETURN_DATA_REGNO (2) == REGNO	\
+      || EH_RETURN_DATA_REGNO (3) == REGNO))
+
+/* For the default ABI, we rename registers at output-time to fill the gap
+   between the (statically partitioned) saved registers and call-clobbered
+   registers.  In effect this makes unused call-saved registers to be used
+   as call-clobbered registers.  The benefit comes from keeping the number
+   of local registers (value of rL) low, since there's a cost of
+   increasing rL and clearing unused (unset) registers with lower numbers.
+   Don't translate while outputting the prologue.  */
+#define MMIX_OUTPUT_REGNO(N)					\
+ (TARGET_ABI_GNU 						\
+  || (int) (N) < MMIX_RETURN_VALUE_REGNUM			\
+  || (int) (N) > MMIX_LAST_STACK_REGISTER_REGNUM		\
+  || cfun == NULL 						\
+  || cfun->machine == NULL 					\
+  || cfun->machine->in_prologue					\
+  ? (N) : ((N) - MMIX_RETURN_VALUE_REGNUM			\
+	   + cfun->machine->highest_saved_stack_register + 1))
+
+/* The %d in "POP %d,0".  */
+#define MMIX_POP_ARGUMENT()						\
+ ((! TARGET_ABI_GNU							\
+   && crtl->return_rtx != NULL				\
+   && ! cfun->returns_struct)				\
+  ? (GET_CODE (crtl->return_rtx) == PARALLEL			\
+     ? GET_NUM_ELEM (XVEC (crtl->return_rtx, 0)) : 1)	\
+  : 0)
+
+/* The canonical saved comparison operands for non-cc0 machines, set in
+   the compare expander.  */
+rtx mmix_compare_op0;
+rtx mmix_compare_op1;
+
+/* Declarations of locals.  */
+
+/* Intermediate for insn output.  */
+static int mmix_output_destination_register;
+
+static void mmix_option_override (void);
+static void mmix_asm_output_source_filename (FILE *, const char *);
+static void mmix_output_shiftvalue_op_from_str
+  (FILE *, const char *, HOST_WIDEST_INT);
+static void mmix_output_shifted_value (FILE *, HOST_WIDEST_INT);
+static void mmix_output_condition (FILE *, const_rtx, int);
+static void mmix_output_octa (FILE *, HOST_WIDEST_INT, int);
+static bool mmix_assemble_integer (rtx, unsigned int, int);
+static struct machine_function *mmix_init_machine_status (void);
+static void mmix_encode_section_info (tree, rtx, int);
+static const char *mmix_strip_name_encoding (const char *);
+static void mmix_emit_sp_add (HOST_WIDE_INT offset);
+static void mmix_target_asm_function_prologue (FILE *, HOST_WIDE_INT);
+static void mmix_target_asm_function_end_prologue (FILE *);
+static void mmix_target_asm_function_epilogue (FILE *, HOST_WIDE_INT);
+static reg_class_t mmix_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t mmix_preferred_output_reload_class (rtx, reg_class_t);
+static bool mmix_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool mmix_legitimate_constant_p (enum machine_mode, rtx);
+static void mmix_reorg (void);
+static void mmix_asm_output_mi_thunk
+  (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void mmix_setup_incoming_varargs
+  (cumulative_args_t, enum machine_mode, tree, int *, int);
+static void mmix_file_start (void);
+static void mmix_file_end (void);
+static bool mmix_rtx_costs (rtx, int, int, int, int *, bool);
+static int mmix_register_move_cost (enum machine_mode,
+				    reg_class_t, reg_class_t);
+static rtx mmix_struct_value_rtx (tree, int);
+static enum machine_mode mmix_promote_function_mode (const_tree,
+						     enum machine_mode,
+	                                             int *, const_tree, int);
+static void mmix_function_arg_advance (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static rtx mmix_function_arg_1 (const cumulative_args_t, enum machine_mode,
+				const_tree, bool, bool);
+static rtx mmix_function_incoming_arg (cumulative_args_t, enum machine_mode,
+				       const_tree, bool);
+static rtx mmix_function_arg (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static rtx mmix_function_value (const_tree, const_tree, bool);
+static rtx mmix_libcall_value (enum machine_mode, const_rtx);
+static bool mmix_function_value_regno_p (const unsigned int);
+static bool mmix_pass_by_reference (cumulative_args_t,
+				    enum machine_mode, const_tree, bool);
+static bool mmix_frame_pointer_required (void);
+static void mmix_asm_trampoline_template (FILE *);
+static void mmix_trampoline_init (rtx, tree, rtx);
+static void mmix_print_operand (FILE *, rtx, int);
+static void mmix_print_operand_address (FILE *, rtx);
+static bool mmix_print_operand_punct_valid_p (unsigned char);
+static void mmix_conditional_register_usage (void);
+
+/* Target structure macros.  Listed by node.  See `Using and Porting GCC'
+   for a general description.  */
+
+/* Node: Function Entry */
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP NULL
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP NULL
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP NULL
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP NULL
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER mmix_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE mmix_target_asm_function_prologue
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE mmix_target_asm_function_end_prologue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE mmix_target_asm_function_epilogue
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND mmix_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS mmix_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P mmix_print_operand_punct_valid_p
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO  mmix_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  mmix_strip_name_encoding
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK mmix_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START mmix_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END mmix_file_end
+#undef TARGET_ASM_OUTPUT_SOURCE_FILENAME
+#define TARGET_ASM_OUTPUT_SOURCE_FILENAME mmix_asm_output_source_filename
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE mmix_conditional_register_usage
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS mmix_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST mmix_register_move_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG mmix_reorg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE mmix_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mmix_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mmix_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P mmix_function_value_regno_p
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG mmix_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG mmix_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE mmix_function_arg_advance
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX mmix_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS mmix_setup_incoming_varargs
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE mmix_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS mmix_preferred_reload_class
+#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS mmix_preferred_output_reload_class
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	mmix_legitimate_address_p
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P	mmix_legitimate_constant_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED mmix_frame_pointer_required
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE mmix_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT mmix_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mmix_option_override
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Functions that are expansions for target macros.
+   See Target Macros in `Using and Porting GCC'.  */
+
+/* TARGET_OPTION_OVERRIDE.  */
+
+static void
+mmix_option_override (void)
+{
+  /* Should we err or should we warn?  Hmm.  At least we must neutralize
+     it.  For example the wrong kind of case-tables will be generated with
+     PIC; we use absolute address items for mmixal compatibility.  FIXME:
+     They could be relative if we just elide them to after all pertinent
+     labels.  */
+  if (flag_pic)
+    {
+      warning (0, "-f%s not supported: ignored", (flag_pic > 1) ? "PIC" : "pic");
+      flag_pic = 0;
+    }
+}
+
+/* INIT_EXPANDERS.  */
+
+void
+mmix_init_expanders (void)
+{
+  init_machine_status = mmix_init_machine_status;
+}
+
+/* Set the per-function data.  */
+
+static struct machine_function *
+mmix_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* DATA_ABI_ALIGNMENT.
+   We have trouble getting the address of stuff that is located at other
+   than 32-bit alignments (GETA requirements), so try to give everything
+   at least 32-bit alignment.  */
+
+int
+mmix_data_alignment (tree type ATTRIBUTE_UNUSED, int basic_align)
+{
+  if (basic_align < 32)
+    return 32;
+
+  return basic_align;
+}
+
+/* CONSTANT_ALIGNMENT.  */
+
+int
+mmix_constant_alignment (tree constant ATTRIBUTE_UNUSED, int basic_align)
+{
+  if (basic_align < 32)
+    return 32;
+
+  return basic_align;
+}
+
+/* LOCAL_ALIGNMENT.  */
+
+unsigned
+mmix_local_alignment (tree type ATTRIBUTE_UNUSED, unsigned basic_align)
+{
+  if (basic_align < 32)
+    return 32;
+
+  return basic_align;
+}
+
+/* TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+mmix_conditional_register_usage (void)
+{
+  int i;
+
+  if (TARGET_ABI_GNU)
+    {
+      static const int gnu_abi_reg_alloc_order[]
+	= MMIX_GNU_ABI_REG_ALLOC_ORDER;
+
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	reg_alloc_order[i] = gnu_abi_reg_alloc_order[i];
+
+      /* Change the default from the mmixware ABI.  For the GNU ABI,
+	 $15..$30 are call-saved just as $0..$14.  There must be one
+	 call-clobbered local register for the "hole" that holds the
+	 number of saved local registers saved by PUSHJ/PUSHGO during the
+	 function call, receiving the return value at return.  So best is
+	 to use the highest, $31.  It's already marked call-clobbered for
+	 the mmixware ABI.  */
+      for (i = 15; i <= 30; i++)
+	call_used_regs[i] = 0;
+
+      /* "Unfix" the parameter registers.  */
+      for (i = MMIX_RESERVED_GNU_ARG_0_REGNUM;
+	   i < MMIX_RESERVED_GNU_ARG_0_REGNUM + MMIX_MAX_ARGS_IN_REGS;
+	   i++)
+	fixed_regs[i] = 0;
+    }
+
+  /* Step over the ":" in special register names.  */
+  if (! TARGET_TOPLEVEL_SYMBOLS)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (reg_names[i][0] == ':')
+	reg_names[i]++;
+}
+
+/* INCOMING_REGNO and OUTGOING_REGNO worker function.
+   Those two macros must only be applied to function argument
+   registers and the function return value register for the opposite
+   use.  FIXME: for their current use in gcc, it'd be better with an
+   explicit specific additional FUNCTION_INCOMING_ARG_REGNO_P a'la
+   TARGET_FUNCTION_ARG / TARGET_FUNCTION_INCOMING_ARG instead of
+   forcing the target to commit to a fixed mapping and for any
+   unspecified register use.  Particularly when thinking about the
+   return-value, it is better to imagine INCOMING_REGNO and
+   OUTGOING_REGNO as named CALLEE_TO_CALLER_REGNO and INNER_REGNO as
+   named CALLER_TO_CALLEE_REGNO because the direction.  The "incoming"
+   and "outgoing" is from the perspective of the parameter-registers,
+   but the same macro is (must be, lacking an alternative like
+   suggested above) used to map the return-value-register from the
+   same perspective.  To make directions even more confusing, the macro
+   MMIX_OUTGOING_RETURN_VALUE_REGNUM holds the number of the register
+   in which to return a value, i.e. INCOMING_REGNO for the return-value-
+   register as received from a called function; the return-value on the
+   way out.  */
+
+int
+mmix_opposite_regno (int regno, int incoming)
+{
+  if (incoming && regno == MMIX_OUTGOING_RETURN_VALUE_REGNUM)
+    return MMIX_RETURN_VALUE_REGNUM;
+
+  if (!incoming && regno == MMIX_RETURN_VALUE_REGNUM)
+    return MMIX_OUTGOING_RETURN_VALUE_REGNUM;
+
+  if (!mmix_function_arg_regno_p (regno, incoming))
+    return regno;
+
+  return
+    regno - (incoming
+	     ? MMIX_FIRST_INCOMING_ARG_REGNUM - MMIX_FIRST_ARG_REGNUM
+	     : MMIX_FIRST_ARG_REGNUM - MMIX_FIRST_INCOMING_ARG_REGNUM);
+}
+
+/* LOCAL_REGNO.
+   All registers that are part of the register stack and that will be
+   saved are local.  */
+
+int
+mmix_local_regno (int regno)
+{
+  return regno <= MMIX_LAST_STACK_REGISTER_REGNUM && !call_used_regs[regno];
+}
+
+/* TARGET_PREFERRED_RELOAD_CLASS.
+   We need to extend the reload class of REMAINDER_REG and HIMULT_REG.  */
+
+static reg_class_t
+mmix_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  /* FIXME: Revisit.  */
+  return GET_CODE (x) == MOD && GET_MODE (x) == DImode
+    ? REMAINDER_REG : rclass;
+}
+
+/* TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.
+   We need to extend the reload class of REMAINDER_REG and HIMULT_REG.  */
+
+static reg_class_t
+mmix_preferred_output_reload_class (rtx x, reg_class_t rclass)
+{
+  /* FIXME: Revisit.  */
+  return GET_CODE (x) == MOD && GET_MODE (x) == DImode
+    ? REMAINDER_REG : rclass;
+}
+
+/* SECONDARY_RELOAD_CLASS.
+   We need to reload regs of REMAINDER_REG and HIMULT_REG elsewhere.  */
+
+enum reg_class
+mmix_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x ATTRIBUTE_UNUSED,
+			     int in_p ATTRIBUTE_UNUSED)
+{
+  if (rclass == REMAINDER_REG
+      || rclass == HIMULT_REG
+      || rclass == SYSTEM_REGS)
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* DYNAMIC_CHAIN_ADDRESS.  */
+
+rtx
+mmix_dynamic_chain_address (rtx frame)
+{
+  /* FIXME: the frame-pointer is stored at offset -8 from the current
+     frame-pointer.  Unfortunately, the caller assumes that a
+     frame-pointer is present for *all* previous frames.  There should be
+     a way to say that that cannot be done, like for RETURN_ADDR_RTX.  */
+  return plus_constant (Pmode, frame, -8);
+}
+
+/* STARTING_FRAME_OFFSET.  */
+
+int
+mmix_starting_frame_offset (void)
+{
+  /* The old frame pointer is in the slot below the new one, so
+     FIRST_PARM_OFFSET does not need to depend on whether the
+     frame-pointer is needed or not.  We have to adjust for the register
+     stack pointer being located below the saved frame pointer.
+     Similarly, we store the return address on the stack too, for
+     exception handling, and always if we save the register stack pointer.  */
+  return
+    (-8
+     + (MMIX_CFUN_HAS_LANDING_PAD
+	? -16 : (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS ? -8 : 0)));
+}
+
+/* RETURN_ADDR_RTX.  */
+
+rtx
+mmix_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  return count == 0
+    ? (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS
+       /* FIXME: Set frame_alias_set on the following.  (Why?)
+	  See mmix_initial_elimination_offset for the reason we can't use
+	  get_hard_reg_initial_val for both.  Always using a stack slot
+	  and not a register would be suboptimal.  */
+       ? validize_mem (gen_rtx_MEM (Pmode,
+				    plus_constant (Pmode,
+						   frame_pointer_rtx, -16)))
+       : get_hard_reg_initial_val (Pmode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM))
+    : NULL_RTX;
+}
+
+/* SETUP_FRAME_ADDRESSES.  */
+
+void
+mmix_setup_frame_addresses (void)
+{
+  /* Nothing needed at the moment.  */
+}
+
+/* The difference between the (imaginary) frame pointer and the stack
+   pointer.  Used to eliminate the frame pointer.  */
+
+int
+mmix_initial_elimination_offset (int fromreg, int toreg)
+{
+  int regno;
+  int fp_sp_offset
+    = (get_frame_size () + crtl->outgoing_args_size + 7) & ~7;
+
+  /* There is no actual offset between these two virtual values, but for
+     the frame-pointer, we have the old one in the stack position below
+     it, so the offset for the frame-pointer to the stack-pointer is one
+     octabyte larger.  */
+  if (fromreg == MMIX_ARG_POINTER_REGNUM
+      && toreg == MMIX_FRAME_POINTER_REGNUM)
+    return 0;
+
+  /* The difference is the size of local variables plus the size of
+     outgoing function arguments that would normally be passed as
+     registers but must be passed on stack because we're out of
+     function-argument registers.  Only global saved registers are
+     counted; the others go on the register stack.
+
+     The frame-pointer is counted too if it is what is eliminated, as we
+     need to balance the offset for it from STARTING_FRAME_OFFSET.
+
+     Also add in the slot for the register stack pointer we save if we
+     have a landing pad.
+
+     Unfortunately, we can't access $0..$14, from unwinder code easily, so
+     store the return address in a frame slot too.  FIXME: Only for
+     non-leaf functions.  FIXME: Always with a landing pad, because it's
+     hard to know whether we need the other at the time we know we need
+     the offset for one (and have to state it).  It's a kludge until we
+     can express the register stack in the EH frame info.
+
+     We have to do alignment here; get_frame_size will not return a
+     multiple of STACK_BOUNDARY.  FIXME: Add note in manual.  */
+
+  for (regno = MMIX_FIRST_GLOBAL_REGNUM;
+       regno <= 255;
+       regno++)
+    if ((df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      fp_sp_offset += 8;
+
+  return fp_sp_offset
+    + (MMIX_CFUN_HAS_LANDING_PAD
+       ? 16 : (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS ? 8 : 0))
+    + (fromreg == MMIX_ARG_POINTER_REGNUM ? 0 : 8);
+}
+
+static void
+mmix_function_arg_advance (cumulative_args_t argsp_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *argsp = get_cumulative_args (argsp_v);
+  int arg_size = MMIX_FUNCTION_ARG_SIZE (mode, type);
+
+  argsp->regs = ((targetm.calls.must_pass_in_stack (mode, type)
+		  || (arg_size > 8
+		      && !TARGET_LIBFUNC
+		      && !argsp->lib))
+		 ? (MMIX_MAX_ARGS_IN_REGS) + 1
+		 : argsp->regs + (7 + arg_size) / 8);
+}
+
+/* Helper function for mmix_function_arg and mmix_function_incoming_arg.  */
+
+static rtx
+mmix_function_arg_1 (const cumulative_args_t argsp_v,
+		     enum machine_mode mode,
+		     const_tree type,
+		     bool named ATTRIBUTE_UNUSED,
+		     bool incoming)
+{
+  CUMULATIVE_ARGS *argsp = get_cumulative_args (argsp_v);
+
+  /* Last-argument marker.  */
+  if (type == void_type_node)
+    return (argsp->regs < MMIX_MAX_ARGS_IN_REGS)
+      ? gen_rtx_REG (mode,
+		     (incoming
+		      ? MMIX_FIRST_INCOMING_ARG_REGNUM
+		      : MMIX_FIRST_ARG_REGNUM) + argsp->regs)
+      : NULL_RTX;
+
+  return (argsp->regs < MMIX_MAX_ARGS_IN_REGS
+	  && !targetm.calls.must_pass_in_stack (mode, type)
+	  && (GET_MODE_BITSIZE (mode) <= 64
+	      || argsp->lib
+	      || TARGET_LIBFUNC))
+    ? gen_rtx_REG (mode,
+		   (incoming
+		    ? MMIX_FIRST_INCOMING_ARG_REGNUM
+		    : MMIX_FIRST_ARG_REGNUM)
+		   + argsp->regs)
+    : NULL_RTX;
+}
+
+/* Return an rtx for a function argument to go in a register, and 0 for
+   one that must go on stack.  */
+
+static rtx
+mmix_function_arg (cumulative_args_t argsp,
+		   enum machine_mode mode,
+		   const_tree type,
+		   bool named)
+{
+  return mmix_function_arg_1 (argsp, mode, type, named, false);
+}
+
+static rtx
+mmix_function_incoming_arg (cumulative_args_t argsp,
+			    enum machine_mode mode,
+			    const_tree type,
+			    bool named)
+{
+  return mmix_function_arg_1 (argsp, mode, type, named, true);
+}
+
+/* Returns nonzero for everything that goes by reference, 0 for
+   everything that goes by value.  */
+
+static bool
+mmix_pass_by_reference (cumulative_args_t argsp_v, enum machine_mode mode,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *argsp = get_cumulative_args (argsp_v);
+
+  /* FIXME: Check: I'm not sure the must_pass_in_stack check is
+     necessary.  */
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return true;
+
+  if (MMIX_FUNCTION_ARG_SIZE (mode, type) > 8
+      && !TARGET_LIBFUNC
+      && (!argsp || !argsp->lib))
+    return true;
+
+  return false;
+}
+
+/* Return nonzero if regno is a register number where a parameter is
+   passed, and 0 otherwise.  */
+
+int
+mmix_function_arg_regno_p (int regno, int incoming)
+{
+  int first_arg_regnum
+    = incoming ? MMIX_FIRST_INCOMING_ARG_REGNUM : MMIX_FIRST_ARG_REGNUM;
+
+  return regno >= first_arg_regnum
+    && regno < first_arg_regnum + MMIX_MAX_ARGS_IN_REGS;
+}
+
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+mmix_function_value (const_tree valtype,
+		     const_tree func ATTRIBUTE_UNUSED,
+		     bool outgoing)
+{
+  enum machine_mode mode = TYPE_MODE (valtype);
+  enum machine_mode cmode;
+  int first_val_regnum = MMIX_OUTGOING_RETURN_VALUE_REGNUM;
+  rtx vec[MMIX_MAX_REGS_FOR_VALUE];
+  int i;
+  int nregs;
+
+  if (!outgoing)
+    return gen_rtx_REG (mode, MMIX_RETURN_VALUE_REGNUM);
+  
+  /* Return values that fit in a register need no special handling.
+     There's no register hole when parameters are passed in global
+     registers.  */
+  if (TARGET_ABI_GNU
+      || GET_MODE_BITSIZE (mode) <= BITS_PER_WORD)
+    return
+      gen_rtx_REG (mode, MMIX_OUTGOING_RETURN_VALUE_REGNUM);
+
+  if (COMPLEX_MODE_P (mode))
+    /* A complex type, made up of components.  */
+    cmode = TYPE_MODE (TREE_TYPE (valtype));
+  else
+    {
+      /* Of the other larger-than-register modes, we only support
+	 scalar mode TImode.  (At least, that's the only one that's
+	 been rudimentally tested.)  Make sure we're alerted for
+	 unexpected cases.  */
+      if (mode != TImode)
+	sorry ("support for mode %qs", GET_MODE_NAME (mode));
+
+      /* In any case, we will fill registers to the natural size.  */
+      cmode = DImode;
+    }
+
+  nregs = ((GET_MODE_BITSIZE (mode) + BITS_PER_WORD - 1) / BITS_PER_WORD);
+
+  /* We need to take care of the effect of the register hole on return
+     values of large sizes; the last register will appear as the first
+     register, with the rest shifted.  (For complex modes, this is just
+     swapped registers.)  */
+
+  if (nregs > MMIX_MAX_REGS_FOR_VALUE)
+    internal_error ("too large function value type, needs %d registers,\
+ have only %d registers for this", nregs, MMIX_MAX_REGS_FOR_VALUE);
+
+  /* FIXME: Maybe we should handle structure values like this too
+     (adjusted for BLKmode), perhaps for both ABI:s.  */
+  for (i = 0; i < nregs - 1; i++)
+    vec[i]
+      = gen_rtx_EXPR_LIST (VOIDmode,
+			   gen_rtx_REG (cmode, first_val_regnum + i),
+			   GEN_INT ((i + 1) * BITS_PER_UNIT));
+
+  vec[nregs - 1]
+    = gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_REG (cmode, first_val_regnum + nregs - 1),
+			 const0_rtx);
+
+  return gen_rtx_PARALLEL (mode, gen_rtvec_v (nregs, vec));
+}
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+mmix_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, MMIX_RETURN_VALUE_REGNUM);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+mmix_function_value_regno_p (const unsigned int regno)
+{
+  return regno == MMIX_RETURN_VALUE_REGNUM;
+}
+
+/* EH_RETURN_DATA_REGNO. */
+
+int
+mmix_eh_return_data_regno (int n)
+{
+  if (n >= 0 && n < 4)
+    return MMIX_EH_RETURN_DATA_REGNO_START + n;
+
+  return INVALID_REGNUM;
+}
+
+/* EH_RETURN_STACKADJ_RTX. */
+
+rtx
+mmix_eh_return_stackadj_rtx (void)
+{
+  return gen_rtx_REG (Pmode, MMIX_EH_RETURN_STACKADJ_REGNUM);
+}
+
+/* EH_RETURN_HANDLER_RTX.  */
+
+rtx
+mmix_eh_return_handler_rtx (void)
+{
+  return gen_rtx_REG (Pmode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+}
+
+/* ASM_PREFERRED_EH_DATA_FORMAT. */
+
+int
+mmix_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED,
+				   int global ATTRIBUTE_UNUSED)
+{
+  /* This is the default (was at 2001-07-20).  Revisit when needed.  */
+  return DW_EH_PE_absptr;
+}
+
+/* Make a note that we've seen the beginning of the prologue.  This
+   matters to whether we'll translate register numbers as calculated by
+   mmix_reorg.  */
+
+static void
+mmix_target_asm_function_prologue (FILE *stream ATTRIBUTE_UNUSED,
+				   HOST_WIDE_INT framesize ATTRIBUTE_UNUSED)
+{
+  cfun->machine->in_prologue = 1;
+}
+
+/* Make a note that we've seen the end of the prologue.  */
+
+static void
+mmix_target_asm_function_end_prologue (FILE *stream ATTRIBUTE_UNUSED)
+{
+  cfun->machine->in_prologue = 0;
+}
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  No actual rearrangements
+   done here; just virtually by calculating the highest saved stack
+   register number used to modify the register numbers at output time.  */
+
+static void
+mmix_reorg (void)
+{
+  int regno;
+
+  /* We put the number of the highest saved register-file register in a
+     location convenient for the call-patterns to output.  Note that we
+     don't tell dwarf2 about these registers, since it can't restore them
+     anyway.  */
+  for (regno = MMIX_LAST_STACK_REGISTER_REGNUM;
+       regno >= 0;
+       regno--)
+    if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	|| (regno == MMIX_FRAME_POINTER_REGNUM && frame_pointer_needed))
+      break;
+
+  /* Regardless of whether they're saved (they might be just read), we
+     mustn't include registers that carry parameters.  We could scan the
+     insns to see whether they're actually used (and indeed do other less
+     trivial register usage analysis and transformations), but it seems
+     wasteful to optimize for unused parameter registers.  As of
+     2002-04-30, df_regs_ever_live_p (n) seems to be set for only-reads too, but
+     that might change.  */
+  if (!TARGET_ABI_GNU && regno < crtl->args.info.regs - 1)
+    {
+      regno = crtl->args.info.regs - 1;
+
+      /* We don't want to let this cause us to go over the limit and make
+	 incoming parameter registers be misnumbered and treating the last
+	 parameter register and incoming return value register call-saved.
+	 Stop things at the unmodified scheme.  */
+      if (regno > MMIX_RETURN_VALUE_REGNUM - 1)
+	regno = MMIX_RETURN_VALUE_REGNUM - 1;
+    }
+
+  cfun->machine->highest_saved_stack_register = regno;
+}
+
+/* TARGET_ASM_FUNCTION_EPILOGUE.  */
+
+static void
+mmix_target_asm_function_epilogue (FILE *stream,
+				   HOST_WIDE_INT locals_size ATTRIBUTE_UNUSED)
+{
+  /* Emit an \n for readability of the generated assembly.  */
+  fputc ('\n', stream);
+}
+
+/* TARGET_ASM_OUTPUT_MI_THUNK.  */
+
+static void
+mmix_asm_output_mi_thunk (FILE *stream,
+			  tree fndecl ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT delta,
+			  HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			  tree func)
+{
+  /* If you define TARGET_STRUCT_VALUE_RTX that returns 0 (i.e. pass
+     location of structure to return as invisible first argument), you
+     need to tweak this code too.  */
+  const char *regname = reg_names[MMIX_FIRST_INCOMING_ARG_REGNUM];
+
+  if (delta >= 0 && delta < 65536)
+    fprintf (stream, "\tINCL %s,%d\n", regname, (int)delta);
+  else if (delta < 0 && delta >= -255)
+    fprintf (stream, "\tSUBU %s,%s,%d\n", regname, regname, (int)-delta);
+  else
+    {
+      mmix_output_register_setting (stream, 255, delta, 1);
+      fprintf (stream, "\tADDU %s,%s,$255\n", regname, regname);
+    }
+
+  fprintf (stream, "\tJMP ");
+  assemble_name (stream, XSTR (XEXP (DECL_RTL (func), 0), 0));
+  fprintf (stream, "\n");
+}
+
+/* FUNCTION_PROFILER.  */
+
+void
+mmix_function_profiler (FILE *stream ATTRIBUTE_UNUSED,
+			int labelno ATTRIBUTE_UNUSED)
+{
+  sorry ("function_profiler support for MMIX");
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  For the moment,
+   let's stick to pushing argument registers on the stack.  Later, we
+   can parse all arguments in registers, to improve performance.  */
+
+static void
+mmix_setup_incoming_varargs (cumulative_args_t args_so_farp_v,
+			     enum machine_mode mode,
+			     tree vartype,
+			     int *pretend_sizep,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *args_so_farp = get_cumulative_args (args_so_farp_v);
+
+  /* The last named variable has been handled, but
+     args_so_farp has not been advanced for it.  */
+  if (args_so_farp->regs + 1 < MMIX_MAX_ARGS_IN_REGS)
+    *pretend_sizep = (MMIX_MAX_ARGS_IN_REGS - (args_so_farp->regs + 1)) * 8;
+
+  /* We assume that one argument takes up one register here.  That should
+     be true until we start messing with multi-reg parameters.  */
+  if ((7 + (MMIX_FUNCTION_ARG_SIZE (mode, vartype))) / 8 != 1)
+    internal_error ("MMIX Internal: Last named vararg would not fit in a register");
+}
+
+/* TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+mmix_asm_trampoline_template (FILE *stream)
+{
+  /* Read a value into the static-chain register and jump somewhere.  The
+     static chain is stored at offset 16, and the function address is
+     stored at offset 24.  */
+
+  fprintf (stream, "\tGETA $255,1F\n\t");
+  fprintf (stream, "LDOU %s,$255,0\n\t", reg_names[MMIX_STATIC_CHAIN_REGNUM]);
+  fprintf (stream, "LDOU $255,$255,8\n\t");
+  fprintf (stream, "GO $255,$255,0\n");
+  fprintf (stream, "1H\tOCTA 0\n\t");
+  fprintf (stream, "OCTA 0\n");
+}
+
+/* TARGET_TRAMPOLINE_INIT.  */
+/* Set the static chain and function pointer field in the trampoline.
+   We also SYNCID here to be sure (doesn't matter in the simulator, but
+   some day it will).  */
+
+static void
+mmix_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, DImode, 2*UNITS_PER_WORD);
+  emit_move_insn (mem, static_chain);
+  mem = adjust_address (m_tramp, DImode, 3*UNITS_PER_WORD);
+  emit_move_insn (mem, fnaddr);
+
+  mem = adjust_address (m_tramp, DImode, 0);
+  emit_insn (gen_sync_icache (mem, GEN_INT (TRAMPOLINE_SIZE - 1)));
+}
+
+/* We must exclude constant addresses that have an increment that is not a
+   multiple of four bytes because of restrictions of the GETA
+   instruction, unless TARGET_BASE_ADDRESSES.  */
+
+int
+mmix_constant_address_p (rtx x)
+{
+  RTX_CODE code = GET_CODE (x);
+  int addend = 0;
+  /* When using "base addresses", anything constant goes.  */
+  int constant_ok = TARGET_BASE_ADDRESSES != 0;
+
+  switch (code)
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return 1;
+
+    case HIGH:
+      /* FIXME: Don't know how to dissect these.  Avoid them for now,
+	 except we know they're constants.  */
+      return constant_ok;
+
+    case CONST_INT:
+      addend = INTVAL (x);
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) != VOIDmode)
+	/* Strange that we got here.  FIXME: Check if we do.  */
+	return constant_ok;
+      addend = CONST_DOUBLE_LOW (x);
+      break;
+
+    case CONST:
+      /* Note that expressions with arithmetic on forward references don't
+	 work in mmixal.  People using gcc assembly code with mmixal might
+	 need to move arrays and such to before the point of use.  */
+      if (GET_CODE (XEXP (x, 0)) == PLUS)
+	{
+	  rtx x0 = XEXP (XEXP (x, 0), 0);
+	  rtx x1 = XEXP (XEXP (x, 0), 1);
+
+	  if ((GET_CODE (x0) == SYMBOL_REF
+	       || GET_CODE (x0) == LABEL_REF)
+	      && (GET_CODE (x1) == CONST_INT
+		  || (GET_CODE (x1) == CONST_DOUBLE
+		      && GET_MODE (x1) == VOIDmode)))
+	    addend = mmix_intval (x1);
+	  else
+	    return constant_ok;
+	}
+      else
+	return constant_ok;
+      break;
+
+    default:
+      return 0;
+    }
+
+  return constant_ok || (addend & 3) == 0;
+}
+
+/* Return 1 if the address is OK, otherwise 0.  */
+
+bool
+mmix_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x,
+			   bool strict_checking)
+{
+#define MMIX_REG_OK(X)							\
+  ((strict_checking							\
+    && (REGNO (X) <= MMIX_LAST_GENERAL_REGISTER				\
+	|| (reg_renumber[REGNO (X)] > 0					\
+	    && reg_renumber[REGNO (X)] <= MMIX_LAST_GENERAL_REGISTER)))	\
+   || (!strict_checking							\
+       && (REGNO (X) <= MMIX_LAST_GENERAL_REGISTER			\
+	   || REGNO (X) >= FIRST_PSEUDO_REGISTER			\
+	   || REGNO (X) == ARG_POINTER_REGNUM)))
+
+  /* We only accept:
+     (mem reg)
+     (mem (plus reg reg))
+     (mem (plus reg 0..255)).
+     unless TARGET_BASE_ADDRESSES, in which case we accept all
+     (mem constant_address) too.  */
+
+
+    /* (mem reg) */
+  if (REG_P (x) && MMIX_REG_OK (x))
+    return 1;
+
+  if (GET_CODE(x) == PLUS)
+    {
+      rtx x1 = XEXP (x, 0);
+      rtx x2 = XEXP (x, 1);
+
+      /* Try swapping the order.  FIXME: Do we need this?  */
+      if (! REG_P (x1))
+	{
+	  rtx tem = x1;
+	  x1 = x2;
+	  x2 = tem;
+	}
+
+      /* (mem (plus (reg?) (?))) */
+      if (!REG_P (x1) || !MMIX_REG_OK (x1))
+	return TARGET_BASE_ADDRESSES && mmix_constant_address_p (x);
+
+      /* (mem (plus (reg) (reg?))) */
+      if (REG_P (x2) && MMIX_REG_OK (x2))
+	return 1;
+
+      /* (mem (plus (reg) (0..255?))) */
+      if (satisfies_constraint_I (x2))
+	return 1;
+
+      return 0;
+    }
+
+  return TARGET_BASE_ADDRESSES && mmix_constant_address_p (x);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+mmix_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  RTX_CODE code = GET_CODE (x);
+
+  /* We must allow any number due to the way the cse passes works; if we
+     do not allow any number here, general_operand will fail, and insns
+     will fatally fail recognition instead of "softly".  */
+  if (code == CONST_INT || code == CONST_DOUBLE)
+    return 1;
+
+  return CONSTANT_ADDRESS_P (x);
+}
+
+/* SELECT_CC_MODE.  */
+
+enum machine_mode
+mmix_select_cc_mode (RTX_CODE op, rtx x, rtx y ATTRIBUTE_UNUSED)
+{
+  /* We use CCmode, CC_UNSmode, CC_FPmode, CC_FPEQmode and CC_FUNmode to
+     output different compare insns.  Note that we do not check the
+     validity of the comparison here.  */
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      if (op == ORDERED || op == UNORDERED || op == UNGE
+	  || op == UNGT || op == UNLE || op == UNLT)
+	return CC_FUNmode;
+
+      if (op == EQ || op == NE)
+	return CC_FPEQmode;
+
+      return CC_FPmode;
+    }
+
+  if (op == GTU || op == LTU || op == GEU || op == LEU)
+    return CC_UNSmode;
+
+  return CCmode;
+}
+
+/* REVERSIBLE_CC_MODE.  */
+
+int
+mmix_reversible_cc_mode (enum machine_mode mode)
+{
+  /* That is, all integer and the EQ, NE, ORDERED and UNORDERED float
+     compares.  */
+  return mode != CC_FPmode;
+}
+
+/* TARGET_RTX_COSTS.  */
+
+static bool
+mmix_rtx_costs (rtx x ATTRIBUTE_UNUSED,
+		int code ATTRIBUTE_UNUSED,
+		int outer_code ATTRIBUTE_UNUSED,
+		int opno ATTRIBUTE_UNUSED,
+		int *total ATTRIBUTE_UNUSED,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  /* For the time being, this is just a stub and we'll accept the
+     generic calculations, until we can do measurements, at least.
+     Say we did not modify any calculated costs.  */
+  return false;
+}
+
+/* TARGET_REGISTER_MOVE_COST.
+
+   The special registers can only move to and from general regs, and we
+   need to check that their constraints match, so say 3 for them.  */
+
+static int
+mmix_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from,
+			 reg_class_t to)
+{
+  return (from == GENERAL_REGS && from == to) ? 2 : 3;
+}
+
+/* Note that we don't have a TEXT_SECTION_ASM_OP, because it has to be a
+   compile-time constant; it's used in an asm in crtstuff.c, compiled for
+   the target.  */
+
+/* DATA_SECTION_ASM_OP.  */
+
+const char *
+mmix_data_section_asm_op (void)
+{
+  return "\t.data ! mmixal:= 8H LOC 9B";
+}
+
+static void
+mmix_encode_section_info (tree decl, rtx rtl, int first)
+{
+  /* Test for an external declaration, and do nothing if it is one.  */
+  if ((TREE_CODE (decl) == VAR_DECL
+       && (DECL_EXTERNAL (decl) || TREE_PUBLIC (decl)))
+      || (TREE_CODE (decl) == FUNCTION_DECL && TREE_PUBLIC (decl)))
+    ;
+  else if (first && DECL_P (decl))
+    {
+      /* For non-visible declarations, add a "@" prefix, which we skip
+	 when the label is output.  If the label does not have this
+	 prefix, a ":" is output if -mtoplevel-symbols.
+
+	 Note that this does not work for data that is declared extern and
+	 later defined as static.  If there's code in between, that code
+	 will refer to the extern declaration, and vice versa.  This just
+	 means that when -mtoplevel-symbols is in use, we can just handle
+	 well-behaved ISO-compliant code.  */
+
+      const char *str = XSTR (XEXP (rtl, 0), 0);
+      int len = strlen (str);
+      char *newstr = XALLOCAVEC (char, len + 2);
+      newstr[0] = '@';
+      strcpy (newstr + 1, str);
+      XSTR (XEXP (rtl, 0), 0) = ggc_alloc_string (newstr, len + 1);
+    }
+
+  /* Set SYMBOL_REF_FLAG for things that we want to access with GETA.  We
+     may need different options to reach for different things with GETA.
+     For now, functions and things we know or have been told are constant.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      || TREE_CONSTANT (decl)
+      || (TREE_CODE (decl) == VAR_DECL
+	  && TREE_READONLY (decl)
+	  && !TREE_SIDE_EFFECTS (decl)
+	  && (!DECL_INITIAL (decl)
+	      || TREE_CONSTANT (DECL_INITIAL (decl)))))
+    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+}
+
+static const char *
+mmix_strip_name_encoding (const char *name)
+{
+  for (; (*name == '@' || *name == '*'); name++)
+    ;
+
+  return name;
+}
+
+/* TARGET_ASM_FILE_START.
+   We just emit a little comment for the time being.  */
+
+static void
+mmix_file_start (void)
+{
+  default_file_start ();
+
+  fputs ("! mmixal:= 8H LOC Data_Section\n", asm_out_file);
+
+  /* Make sure each file starts with the text section.  */
+  switch_to_section (text_section);
+}
+
+/* TARGET_ASM_FILE_END.  */
+
+static void
+mmix_file_end (void)
+{
+  /* Make sure each file ends with the data section.  */
+  switch_to_section (data_section);
+}
+
+/* TARGET_ASM_OUTPUT_SOURCE_FILENAME.  */
+
+static void
+mmix_asm_output_source_filename (FILE *stream, const char *name)
+{
+  fprintf (stream, "# 1 ");
+  OUTPUT_QUOTED_STRING (stream, name);
+  fprintf (stream, "\n");
+}
+
+/* OUTPUT_QUOTED_STRING.  */
+
+void
+mmix_output_quoted_string (FILE *stream, const char *string, int length)
+{
+  const char * string_end = string + length;
+  static const char *const unwanted_chars = "\"[]\\";
+
+  /* Output "any character except newline and double quote character".  We
+     play it safe and avoid all control characters too.  We also do not
+     want [] as characters, should input be passed through m4 with [] as
+     quotes.  Further, we avoid "\", because the GAS port handles it as a
+     quoting character.  */
+  while (string < string_end)
+    {
+      if (*string
+	  && (unsigned char) *string < 128
+	  && !ISCNTRL (*string)
+	  && strchr (unwanted_chars, *string) == NULL)
+	{
+	  fputc ('"', stream);
+	  while (*string
+		 && (unsigned char) *string < 128
+		 && !ISCNTRL (*string)
+		 && strchr (unwanted_chars, *string) == NULL
+		 && string < string_end)
+	    {
+	      fputc (*string, stream);
+	      string++;
+	    }
+	  fputc ('"', stream);
+	  if (string < string_end)
+	    fprintf (stream, ",");
+	}
+      if (string < string_end)
+	{
+	  fprintf (stream, "#%x", *string & 255);
+	  string++;
+	  if (string < string_end)
+	    fprintf (stream, ",");
+	}
+    }
+}
+
+/* Target hook for assembling integer objects.  Use mmix_print_operand
+   for WYDE and TETRA.  Use mmix_output_octa to output 8-byte
+   CONST_DOUBLEs.  */
+
+static bool
+mmix_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (aligned_p)
+    switch (size)
+      {
+	/* We handle a limited number of types of operands in here.  But
+	   that's ok, because we can punt to generic functions.  We then
+	   pretend that aligned data isn't needed, so the usual .<pseudo>
+	   syntax is used (which works for aligned data too).  We actually
+	   *must* do that, since we say we don't have simple aligned
+	   pseudos, causing this function to be called.  We just try and
+	   keep as much compatibility as possible with mmixal syntax for
+	   normal cases (i.e. without GNU extensions and C only).  */
+      case 1:
+	if (GET_CODE (x) != CONST_INT)
+	  {
+	    aligned_p = 0;
+	    break;
+	  }
+	fputs ("\tBYTE\t", asm_out_file);
+	mmix_print_operand (asm_out_file, x, 'B');
+	fputc ('\n', asm_out_file);
+	return true;
+
+      case 2:
+	if (GET_CODE (x) != CONST_INT)
+	  {
+	    aligned_p = 0;
+	    break;
+	  }
+	fputs ("\tWYDE\t", asm_out_file);
+	mmix_print_operand (asm_out_file, x, 'W');
+	fputc ('\n', asm_out_file);
+	return true;
+
+      case 4:
+	if (GET_CODE (x) != CONST_INT)
+	  {
+	    aligned_p = 0;
+	    break;
+	  }
+	fputs ("\tTETRA\t", asm_out_file);
+	mmix_print_operand (asm_out_file, x, 'L');
+	fputc ('\n', asm_out_file);
+	return true;
+
+      case 8:
+	/* We don't get here anymore for CONST_DOUBLE, because DImode
+	   isn't expressed as CONST_DOUBLE, and DFmode is handled
+	   elsewhere.  */
+	gcc_assert (GET_CODE (x) != CONST_DOUBLE);
+	assemble_integer_with_op ("\tOCTA\t", x);
+	return true;
+      }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* ASM_OUTPUT_ASCII.  */
+
+void
+mmix_asm_output_ascii (FILE *stream, const char *string, int length)
+{
+  while (length > 0)
+    {
+      int chunk_size = length > 60 ? 60 : length;
+      fprintf (stream, "\tBYTE ");
+      mmix_output_quoted_string (stream, string, chunk_size);
+      string += chunk_size;
+      length -= chunk_size;
+      fprintf (stream, "\n");
+    }
+}
+
+/* ASM_OUTPUT_ALIGNED_COMMON.  */
+
+void
+mmix_asm_output_aligned_common (FILE *stream,
+				const char *name,
+				int size,
+				int align)
+{
+  /* This is mostly the elfos.h one.  There doesn't seem to be a way to
+     express this in a mmixal-compatible way.  */
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u ! mmixal-incompatible COMMON\n",
+	   size, align / BITS_PER_UNIT);
+}
+
+/* ASM_OUTPUT_ALIGNED_LOCAL.  */
+
+void
+mmix_asm_output_aligned_local (FILE *stream,
+			       const char *name,
+			       int size,
+			       int align)
+{
+  switch_to_section (data_section);
+
+  ASM_OUTPUT_ALIGN (stream, exact_log2 (align/BITS_PER_UNIT));
+  assemble_name (stream, name);
+  fprintf (stream, "\tLOC @+%d\n", size);
+}
+
+/* ASM_OUTPUT_LABEL.  */
+
+void
+mmix_asm_output_label (FILE *stream, const char *name)
+{
+  assemble_name (stream, name);
+  fprintf (stream, "\tIS @\n");
+}
+
+/* ASM_OUTPUT_INTERNAL_LABEL.  */
+
+void
+mmix_asm_output_internal_label (FILE *stream, const char *name)
+{
+  assemble_name_raw (stream, name);
+  fprintf (stream, "\tIS @\n");
+}
+
+/* ASM_DECLARE_REGISTER_GLOBAL.  */
+
+void
+mmix_asm_declare_register_global (FILE *stream ATTRIBUTE_UNUSED,
+				  tree decl ATTRIBUTE_UNUSED,
+				  int regno ATTRIBUTE_UNUSED,
+				  const char *name ATTRIBUTE_UNUSED)
+{
+  /* Nothing to do here, but there *will* be, therefore the framework is
+     here.  */
+}
+
+/* ASM_WEAKEN_LABEL.  */
+
+void
+mmix_asm_weaken_label (FILE *stream ATTRIBUTE_UNUSED,
+		       const char *name ATTRIBUTE_UNUSED)
+{
+  fprintf (stream, "\t.weak ");
+  assemble_name (stream, name);
+  fprintf (stream, " ! mmixal-incompatible\n");
+}
+
+/* MAKE_DECL_ONE_ONLY.  */
+
+void
+mmix_make_decl_one_only (tree decl)
+{
+  DECL_WEAK (decl) = 1;
+}
+
+/* ASM_OUTPUT_LABELREF.
+   Strip GCC's '*' and our own '@'.  No order is assumed.  */
+
+void
+mmix_asm_output_labelref (FILE *stream, const char *name)
+{
+  int is_extern = 1;
+
+  for (; (*name == '@' || *name == '*'); name++)
+    if (*name == '@')
+      is_extern = 0;
+
+  asm_fprintf (stream, "%s%U%s",
+	       is_extern && TARGET_TOPLEVEL_SYMBOLS ? ":" : "",
+	       name);
+}
+
+/* ASM_OUTPUT_DEF.  */
+
+void
+mmix_asm_output_def (FILE *stream, const char *name, const char *value)
+{
+  assemble_name (stream, name);
+  fprintf (stream, "\tIS ");
+  assemble_name (stream, value);
+  fputc ('\n', stream);
+}
+
+/* TARGET_PRINT_OPERAND.  */
+
+static void
+mmix_print_operand (FILE *stream, rtx x, int code)
+{
+  /* When we add support for different codes later, we can, when needed,
+     drop through to the main handler with a modified operand.  */
+  rtx modified_x = x;
+  int regno = x != NULL_RTX && REG_P (x) ? REGNO (x) : 0;
+
+  switch (code)
+    {
+      /* Unrelated codes are in alphabetic order.  */
+
+    case '+':
+      /* For conditional branches, output "P" for a probable branch.  */
+      if (TARGET_BRANCH_PREDICT)
+	{
+	  x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+	  if (x && XINT (x, 0) > REG_BR_PROB_BASE / 2)
+	    putc ('P', stream);
+	}
+      return;
+
+    case '.':
+      /* For the %d in POP %d,0.  */
+      fprintf (stream, "%d", MMIX_POP_ARGUMENT ());
+      return;
+
+    case 'B':
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("MMIX Internal: Expected a CONST_INT, not this", x);
+      fprintf (stream, "%d", (int) (INTVAL (x) & 0xff));
+      return;
+
+    case 'H':
+      /* Highpart.  Must be general register, and not the last one, as
+	 that one cannot be part of a consecutive register pair.  */
+      if (regno > MMIX_LAST_GENERAL_REGISTER - 1)
+	internal_error ("MMIX Internal: Bad register: %d", regno);
+
+      /* This is big-endian, so the high-part is the first one.  */
+      fprintf (stream, "%s", reg_names[MMIX_OUTPUT_REGNO (regno)]);
+      return;
+
+    case 'L':
+      /* Lowpart.  Must be CONST_INT or general register, and not the last
+	 one, as that one cannot be part of a consecutive register pair.  */
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  fprintf (stream, "#%lx",
+		   (unsigned long) (INTVAL (x)
+				    & ((unsigned int) 0x7fffffff * 2 + 1)));
+	  return;
+	}
+
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  output_addr_const (stream, x);
+	  return;
+	}
+
+      if (regno > MMIX_LAST_GENERAL_REGISTER - 1)
+	internal_error ("MMIX Internal: Bad register: %d", regno);
+
+      /* This is big-endian, so the low-part is + 1.  */
+      fprintf (stream, "%s", reg_names[MMIX_OUTPUT_REGNO (regno) + 1]);
+      return;
+
+      /* Can't use 'a' because that's a generic modifier for address
+	 output.  */
+    case 'A':
+      mmix_output_shiftvalue_op_from_str (stream, "ANDN",
+					  ~(unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 'i':
+      mmix_output_shiftvalue_op_from_str (stream, "INC",
+					  (unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 'o':
+      mmix_output_shiftvalue_op_from_str (stream, "OR",
+					  (unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 's':
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  (unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 'd':
+    case 'D':
+      mmix_output_condition (stream, x, (code == 'D'));
+      return;
+
+    case 'e':
+      /* Output an extra "e" to make fcmpe, fune.  */
+      if (TARGET_FCMP_EPSILON)
+	fprintf (stream, "e");
+      return;
+
+    case 'm':
+      /* Output the number minus 1.  */
+      if (GET_CODE (x) != CONST_INT)
+	{
+	  fatal_insn ("MMIX Internal: Bad value for 'm', not a CONST_INT",
+		      x);
+	}
+      fprintf (stream, HOST_WIDEST_INT_PRINT_DEC,
+	       (HOST_WIDEST_INT) (mmix_intval (x) - 1));
+      return;
+
+    case 'p':
+      /* Store the number of registers we want to save.  This was setup
+	 by the prologue.  The actual operand contains the number of
+	 registers to pass, but we don't use it currently.  Anyway, we
+	 need to output the number of saved registers here.  */
+      fprintf (stream, "%d",
+	       cfun->machine->highest_saved_stack_register + 1);
+      return;
+
+    case 'r':
+      /* Store the register to output a constant to.  */
+      if (! REG_P (x))
+	fatal_insn ("MMIX Internal: Expected a register, not this", x);
+      mmix_output_destination_register = MMIX_OUTPUT_REGNO (regno);
+      return;
+
+    case 'I':
+      /* Output the constant.  Note that we use this for floats as well.  */
+      if (GET_CODE (x) != CONST_INT
+	  && (GET_CODE (x) != CONST_DOUBLE
+	      || (GET_MODE (x) != VOIDmode && GET_MODE (x) != DFmode
+		  && GET_MODE (x) != SFmode)))
+	fatal_insn ("MMIX Internal: Expected a constant, not this", x);
+      mmix_output_register_setting (stream,
+				    mmix_output_destination_register,
+				    mmix_intval (x), 0);
+      return;
+
+    case 'U':
+      /* An U for unsigned, if TARGET_ZERO_EXTEND.  Ignore the operand.  */
+      if (TARGET_ZERO_EXTEND)
+	putc ('U', stream);
+      return;
+
+    case 'v':
+      mmix_output_shifted_value (stream, (HOST_WIDEST_INT) mmix_intval (x));
+      return;
+
+    case 'V':
+      mmix_output_shifted_value (stream, (HOST_WIDEST_INT) ~mmix_intval (x));
+      return;
+
+    case 'W':
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("MMIX Internal: Expected a CONST_INT, not this", x);
+      fprintf (stream, "#%x", (int) (INTVAL (x) & 0xffff));
+      return;
+
+    case 0:
+      /* Nothing to do.  */
+      break;
+
+    default:
+      /* Presumably there's a missing case above if we get here.  */
+      internal_error ("MMIX Internal: Missing %qc case in mmix_print_operand", code);
+    }
+
+  switch (GET_CODE (modified_x))
+    {
+    case REG:
+      regno = REGNO (modified_x);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	internal_error ("MMIX Internal: Bad register: %d", regno);
+      fprintf (stream, "%s", reg_names[MMIX_OUTPUT_REGNO (regno)]);
+      return;
+
+    case MEM:
+      output_address (XEXP (modified_x, 0));
+      return;
+
+    case CONST_INT:
+      /* For -2147483648, mmixal complains that the constant does not fit
+	 in 4 bytes, so let's output it as hex.  Take care to handle hosts
+	 where HOST_WIDE_INT is longer than an int.
+
+	 Print small constants +-255 using decimal.  */
+
+      if (INTVAL (modified_x) > -256 && INTVAL (modified_x) < 256)
+	fprintf (stream, "%d", (int) (INTVAL (modified_x)));
+      else
+	fprintf (stream, "#%x",
+		 (int) (INTVAL (modified_x)) & (unsigned int) ~0);
+      return;
+
+    case CONST_DOUBLE:
+      /* Do somewhat as CONST_INT.  */
+      mmix_output_octa (stream, mmix_intval (modified_x), 0);
+      return;
+
+    case CONST:
+      output_addr_const (stream, modified_x);
+      return;
+
+    default:
+      /* No need to test for all strange things.  Let output_addr_const do
+	 it for us.  */
+      if (CONSTANT_P (modified_x)
+	  /* Strangely enough, this is not included in CONSTANT_P.
+	     FIXME: Ask/check about sanity here.  */
+	  || LABEL_P (modified_x))
+	{
+	  output_addr_const (stream, modified_x);
+	  return;
+	}
+
+      /* We need the original here.  */
+      fatal_insn ("MMIX Internal: Cannot decode this operand", x);
+    }
+}
+
+/* TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+mmix_print_operand_punct_valid_p (unsigned char code)
+{
+  /* A '+' is used for branch prediction, similar to other ports.  */
+  return code == '+'
+    /* A '.' is used for the %d in the POP %d,0 return insn.  */
+    || code == '.';
+}
+
+/* TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+mmix_print_operand_address (FILE *stream, rtx x)
+{
+  if (REG_P (x))
+    {
+      /* I find the generated assembly code harder to read without
+	 the ",0".  */
+      fprintf (stream, "%s,0", reg_names[MMIX_OUTPUT_REGNO (REGNO (x))]);
+      return;
+    }
+  else if (GET_CODE (x) == PLUS)
+    {
+      rtx x1 = XEXP (x, 0);
+      rtx x2 = XEXP (x, 1);
+
+      if (REG_P (x1))
+	{
+	  fprintf (stream, "%s,", reg_names[MMIX_OUTPUT_REGNO (REGNO (x1))]);
+
+	  if (REG_P (x2))
+	    {
+	      fprintf (stream, "%s",
+		       reg_names[MMIX_OUTPUT_REGNO (REGNO (x2))]);
+	      return;
+	    }
+	  else if (satisfies_constraint_I (x2))
+	    {
+	      output_addr_const (stream, x2);
+	      return;
+	    }
+	}
+    }
+
+  if (TARGET_BASE_ADDRESSES && mmix_legitimate_constant_p (Pmode, x))
+    {
+      output_addr_const (stream, x);
+      return;
+    }
+
+  fatal_insn ("MMIX Internal: This is not a recognized address", x);
+}
+
+/* ASM_OUTPUT_REG_PUSH.  */
+
+void
+mmix_asm_output_reg_push (FILE *stream, int regno)
+{
+  fprintf (stream, "\tSUBU %s,%s,8\n\tSTOU %s,%s,0\n",
+	   reg_names[MMIX_STACK_POINTER_REGNUM],
+	   reg_names[MMIX_STACK_POINTER_REGNUM],
+	   reg_names[MMIX_OUTPUT_REGNO (regno)],
+	   reg_names[MMIX_STACK_POINTER_REGNUM]);
+}
+
+/* ASM_OUTPUT_REG_POP.  */
+
+void
+mmix_asm_output_reg_pop (FILE *stream, int regno)
+{
+  fprintf (stream, "\tLDOU %s,%s,0\n\tINCL %s,8\n",
+	   reg_names[MMIX_OUTPUT_REGNO (regno)],
+	   reg_names[MMIX_STACK_POINTER_REGNUM],
+	   reg_names[MMIX_STACK_POINTER_REGNUM]);
+}
+
+/* ASM_OUTPUT_ADDR_DIFF_ELT.  */
+
+void
+mmix_asm_output_addr_diff_elt (FILE *stream,
+			       rtx body ATTRIBUTE_UNUSED,
+			       int value,
+			       int rel)
+{
+  fprintf (stream, "\tTETRA L%d-L%d\n", value, rel);
+}
+
+/* ASM_OUTPUT_ADDR_VEC_ELT.  */
+
+void
+mmix_asm_output_addr_vec_elt (FILE *stream, int value)
+{
+  fprintf (stream, "\tOCTA L:%d\n", value);
+}
+
+/* ASM_OUTPUT_SKIP.  */
+
+void
+mmix_asm_output_skip (FILE *stream, int nbytes)
+{
+  fprintf (stream, "\tLOC @+%d\n", nbytes);
+}
+
+/* ASM_OUTPUT_ALIGN.  */
+
+void
+mmix_asm_output_align (FILE *stream, int power)
+{
+  /* We need to record the needed alignment of this section in the object,
+     so we have to output an alignment directive.  Use a .p2align (not
+     .align) so people will never have to wonder about whether the
+     argument is in number of bytes or the log2 thereof.  We do it in
+     addition to the LOC directive, so nothing needs tweaking when
+     copy-pasting assembly into mmixal.  */
+ fprintf (stream, "\t.p2align %d\n", power);
+ fprintf (stream, "\tLOC @+(%d-@)&%d\n", 1 << power, (1 << power) - 1);
+}
+
+/* DBX_REGISTER_NUMBER.  */
+
+unsigned
+mmix_dbx_register_number (unsigned regno)
+{
+  /* Adjust the register number to the one it will be output as, dammit.
+     It'd be nice if we could check the assumption that we're filling a
+     gap, but every register between the last saved register and parameter
+     registers might be a valid parameter register.  */
+  regno = MMIX_OUTPUT_REGNO (regno);
+
+  /* We need to renumber registers to get the number of the return address
+     register in the range 0..255.  It is also space-saving if registers
+     mentioned in the call-frame information (which uses this function by
+     defaulting DWARF_FRAME_REGNUM to DBX_REGISTER_NUMBER) are numbered
+     0 .. 63.  So map 224 .. 256+15 -> 0 .. 47 and 0 .. 223 -> 48..223+48.  */
+  return regno >= 224 ? (regno - 224) : (regno + 48);
+}
+
+/* End of target macro support functions.
+
+   Now the MMIX port's own functions.  First the exported ones.  */
+
+/* Wrapper for get_hard_reg_initial_val since integrate.h isn't included
+   from insn-emit.c.  */
+
+rtx
+mmix_get_hard_reg_initial_val (enum machine_mode mode, int regno)
+{
+  return get_hard_reg_initial_val (mode, regno);
+}
+
+/* Nonzero when the function epilogue is simple enough that a single
+   "POP %d,0" should be used even within the function.  */
+
+int
+mmix_use_simple_return (void)
+{
+  int regno;
+
+  int stack_space_to_allocate
+    = (crtl->outgoing_args_size
+       + crtl->args.pretend_args_size
+       + get_frame_size () + 7) & ~7;
+
+  if (!TARGET_USE_RETURN_INSN || !reload_completed)
+    return 0;
+
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    /* Note that we assume that the frame-pointer-register is one of these
+       registers, in which case we don't count it here.  */
+    if ((((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	  && df_regs_ever_live_p (regno) && !call_used_regs[regno]))
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      return 0;
+
+  if (frame_pointer_needed)
+    stack_space_to_allocate += 8;
+
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    stack_space_to_allocate += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    stack_space_to_allocate += 8;
+
+  return stack_space_to_allocate == 0;
+}
+
+
+/* Expands the function prologue into RTX.  */
+
+void
+mmix_expand_prologue (void)
+{
+  HOST_WIDE_INT locals_size = get_frame_size ();
+  int regno;
+  HOST_WIDE_INT stack_space_to_allocate
+    = (crtl->outgoing_args_size
+       + crtl->args.pretend_args_size
+       + locals_size + 7) & ~7;
+  HOST_WIDE_INT offset = -8;
+
+  /* Add room needed to save global non-register-stack registers.  */
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    /* Note that we assume that the frame-pointer-register is one of these
+       registers, in which case we don't count it here.  */
+    if ((((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	  && df_regs_ever_live_p (regno) && !call_used_regs[regno]))
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      stack_space_to_allocate += 8;
+
+  /* If we do have a frame-pointer, add room for it.  */
+  if (frame_pointer_needed)
+    stack_space_to_allocate += 8;
+
+  /* If we have a non-local label, we need to be able to unwind to it, so
+     store the current register stack pointer.  Also store the return
+     address if we do that.  */
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    stack_space_to_allocate += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    /* If we do have a saved return-address slot, add room for it.  */
+    stack_space_to_allocate += 8;
+
+  /* Make sure we don't get an unaligned stack.  */
+  if ((stack_space_to_allocate % 8) != 0)
+    internal_error ("stack frame not a multiple of 8 bytes: %wd",
+		    stack_space_to_allocate);
+
+  if (crtl->args.pretend_args_size)
+    {
+      int mmix_first_vararg_reg
+	= (MMIX_FIRST_INCOMING_ARG_REGNUM
+	   + (MMIX_MAX_ARGS_IN_REGS
+	      - crtl->args.pretend_args_size / 8));
+
+      for (regno
+	     = MMIX_FIRST_INCOMING_ARG_REGNUM + MMIX_MAX_ARGS_IN_REGS - 1;
+	   regno >= mmix_first_vararg_reg;
+	   regno--)
+	{
+	  if (offset < 0)
+	    {
+	      HOST_WIDE_INT stack_chunk
+		= stack_space_to_allocate > (256 - 8)
+		? (256 - 8) : stack_space_to_allocate;
+
+	      mmix_emit_sp_add (-stack_chunk);
+	      offset += stack_chunk;
+	      stack_space_to_allocate -= stack_chunk;
+	    }
+
+	  /* These registers aren't actually saved (as in "will be
+	     restored"), so don't tell DWARF2 they're saved.  */
+	  emit_move_insn (gen_rtx_MEM (DImode,
+				       plus_constant (Pmode, stack_pointer_rtx,
+						      offset)),
+			  gen_rtx_REG (DImode, regno));
+	  offset -= 8;
+	}
+    }
+
+  /* Store the frame-pointer.  */
+
+  if (frame_pointer_needed)
+    {
+      rtx insn;
+
+      if (offset < 0)
+	{
+	  /* Get 8 less than otherwise, since we need to reach offset + 8.  */
+	  HOST_WIDE_INT stack_chunk
+	    = stack_space_to_allocate > (256 - 8 - 8)
+	    ? (256 - 8 - 8) : stack_space_to_allocate;
+
+	  mmix_emit_sp_add (-stack_chunk);
+
+	  offset += stack_chunk;
+	  stack_space_to_allocate -= stack_chunk;
+	}
+
+      insn = emit_move_insn (gen_rtx_MEM (DImode,
+					  plus_constant (Pmode,
+							 stack_pointer_rtx,
+							 offset)),
+			     hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (offset + 8)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      offset -= 8;
+    }
+
+  if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    {
+      rtx tmpreg, retreg;
+      rtx insn;
+
+      /* Store the return-address, if one is needed on the stack.  We
+	 usually store it in a register when needed, but that doesn't work
+	 with -fexceptions.  */
+
+      if (offset < 0)
+	{
+	  /* Get 8 less than otherwise, since we need to reach offset + 8.  */
+	  HOST_WIDE_INT stack_chunk
+	    = stack_space_to_allocate > (256 - 8 - 8)
+	    ? (256 - 8 - 8) : stack_space_to_allocate;
+
+	  mmix_emit_sp_add (-stack_chunk);
+
+	  offset += stack_chunk;
+	  stack_space_to_allocate -= stack_chunk;
+	}
+
+      tmpreg = gen_rtx_REG (DImode, 255);
+      retreg = gen_rtx_REG (DImode, MMIX_rJ_REGNUM);
+
+      /* Dwarf2 code is confused by the use of a temporary register for
+	 storing the return address, so we have to express it as a note,
+	 which we attach to the actual store insn.  */
+      emit_move_insn (tmpreg, retreg);
+
+      insn = emit_move_insn (gen_rtx_MEM (DImode,
+					  plus_constant (Pmode,
+							 stack_pointer_rtx,
+							 offset)),
+			     tmpreg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode,
+				 gen_rtx_MEM (DImode,
+					      plus_constant (Pmode,
+							     stack_pointer_rtx,
+							     offset)),
+				 retreg));
+
+      offset -= 8;
+    }
+  else if (MMIX_CFUN_HAS_LANDING_PAD)
+    offset -= 8;
+
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    {
+      /* Store the register defining the numbering of local registers, so
+	 we know how long to unwind the register stack.  */
+
+      if (offset < 0)
+	{
+	  /* Get 8 less than otherwise, since we need to reach offset + 8.  */
+	  HOST_WIDE_INT stack_chunk
+	    = stack_space_to_allocate > (256 - 8 - 8)
+	    ? (256 - 8 - 8) : stack_space_to_allocate;
+
+	  mmix_emit_sp_add (-stack_chunk);
+
+	  offset += stack_chunk;
+	  stack_space_to_allocate -= stack_chunk;
+	}
+
+      /* We don't tell dwarf2 about this one; we just have it to unwind
+	 the register stack at landing pads.  FIXME: It's a kludge because
+	 we can't describe the effect of the PUSHJ and PUSHGO insns on the
+	 register stack at the moment.  Best thing would be to handle it
+	 like stack-pointer offsets.  Better: some hook into dwarf2out.c
+	 to produce DW_CFA_expression:s that specify the increment of rO,
+	 and unwind it at eh_return (preferred) or at the landing pad.
+	 Then saves to $0..$G-1 could be specified through that register.  */
+
+      emit_move_insn (gen_rtx_REG (DImode, 255),
+		      gen_rtx_REG (DImode,
+				   MMIX_rO_REGNUM));
+      emit_move_insn (gen_rtx_MEM (DImode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  offset)),
+		      gen_rtx_REG (DImode, 255));
+      offset -= 8;
+    }
+
+  /* After the return-address and the frame-pointer, we have the local
+     variables.  They're the ones that may have an "unaligned" size.  */
+  offset -= (locals_size + 7) & ~7;
+
+  /* Now store all registers that are global, i.e. not saved by the
+     register file machinery.
+
+     It is assumed that the frame-pointer is one of these registers, so it
+     is explicitly excluded in the count.  */
+
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    if (((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	 && df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      {
+	rtx insn;
+
+	if (offset < 0)
+	  {
+	    HOST_WIDE_INT stack_chunk
+	      = (stack_space_to_allocate > (256 - offset - 8)
+		 ? (256 - offset - 8) : stack_space_to_allocate);
+
+	    mmix_emit_sp_add (-stack_chunk);
+	    offset += stack_chunk;
+	    stack_space_to_allocate -= stack_chunk;
+	  }
+
+	insn = emit_move_insn (gen_rtx_MEM (DImode,
+					    plus_constant (Pmode,
+							   stack_pointer_rtx,
+							   offset)),
+			       gen_rtx_REG (DImode, regno));
+	RTX_FRAME_RELATED_P (insn) = 1;
+	offset -= 8;
+      }
+
+  /* Finally, allocate room for outgoing args and local vars if room
+     wasn't allocated above.  */
+  if (stack_space_to_allocate)
+    mmix_emit_sp_add (-stack_space_to_allocate);
+}
+
+/* Expands the function epilogue into RTX.  */
+
+void
+mmix_expand_epilogue (void)
+{
+  HOST_WIDE_INT locals_size = get_frame_size ();
+  int regno;
+  HOST_WIDE_INT stack_space_to_deallocate
+    = (crtl->outgoing_args_size
+       + crtl->args.pretend_args_size
+       + locals_size + 7) & ~7;
+
+  /* The first address to access is beyond the outgoing_args area.  */
+  HOST_WIDE_INT offset = crtl->outgoing_args_size;
+
+  /* Add the space for global non-register-stack registers.
+     It is assumed that the frame-pointer register can be one of these
+     registers, in which case it is excluded from the count when needed.  */
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    if (((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	 && df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      stack_space_to_deallocate += 8;
+
+  /* Add in the space for register stack-pointer.  If so, always add room
+     for the saved PC.  */
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    stack_space_to_deallocate += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    /* If we have a saved return-address slot, add it in.  */
+    stack_space_to_deallocate += 8;
+
+  /* Add in the frame-pointer.  */
+  if (frame_pointer_needed)
+    stack_space_to_deallocate += 8;
+
+  /* Make sure we don't get an unaligned stack.  */
+  if ((stack_space_to_deallocate % 8) != 0)
+    internal_error ("stack frame not a multiple of octabyte: %wd",
+		    stack_space_to_deallocate);
+
+  /* We will add back small offsets to the stack pointer as we go.
+     First, we restore all registers that are global, i.e. not saved by
+     the register file machinery.  */
+
+  for (regno = MMIX_FIRST_GLOBAL_REGNUM;
+       regno <= 255;
+       regno++)
+    if (((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	 && df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      {
+	if (offset > 255)
+	  {
+	    mmix_emit_sp_add (offset);
+	    stack_space_to_deallocate -= offset;
+	    offset = 0;
+	  }
+
+	emit_move_insn (gen_rtx_REG (DImode, regno),
+			gen_rtx_MEM (DImode,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    offset)));
+	offset += 8;
+      }
+
+  /* Here is where the local variables were.  As in the prologue, they
+     might be of an unaligned size.  */
+  offset += (locals_size + 7) & ~7;
+
+  /* The saved register stack pointer is just below the frame-pointer
+     register.  We don't need to restore it "manually"; the POP
+     instruction does that.  */
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    offset += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    /* The return-address slot is just below the frame-pointer register.
+       We don't need to restore it because we don't really use it.  */
+    offset += 8;
+
+  /* Get back the old frame-pointer-value.  */
+  if (frame_pointer_needed)
+    {
+      if (offset > 255)
+	{
+	  mmix_emit_sp_add (offset);
+
+	  stack_space_to_deallocate -= offset;
+	  offset = 0;
+	}
+
+      emit_move_insn (hard_frame_pointer_rtx,
+		      gen_rtx_MEM (DImode,
+				   plus_constant (Pmode, stack_pointer_rtx,
+						  offset)));
+      offset += 8;
+    }
+
+  /* We do not need to restore pretended incoming args, just add back
+     offset to sp.  */
+  if (stack_space_to_deallocate != 0)
+    mmix_emit_sp_add (stack_space_to_deallocate);
+
+  if (crtl->calls_eh_return)
+    /* Adjust the (normal) stack-pointer to that of the receiver.
+       FIXME: It would be nice if we could also adjust the register stack
+       here, but we need to express it through DWARF 2 too.  */
+    emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   gen_rtx_REG (DImode,
+					MMIX_EH_RETURN_STACKADJ_REGNUM)));
+}
+
+/* Output an optimal sequence for setting a register to a specific
+   constant.  Used in an alternative for const_ints in movdi, and when
+   using large stack-frame offsets.
+
+   Use do_begin_end to say if a line-starting TAB and newline before the
+   first insn and after the last insn is wanted.  */
+
+void
+mmix_output_register_setting (FILE *stream,
+			      int regno,
+			      HOST_WIDEST_INT value,
+			      int do_begin_end)
+{
+  if (do_begin_end)
+    fprintf (stream, "\t");
+
+  if (insn_const_int_ok_for_constraint (value, CONSTRAINT_K))
+    fprintf (stream, "NEGU %s,0," HOST_WIDEST_INT_PRINT_DEC, reg_names[regno], -value);
+  else if (mmix_shiftable_wyde_value ((unsigned HOST_WIDEST_INT) value))
+    {
+      /* First, the one-insn cases.  */
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  (unsigned HOST_WIDEST_INT)
+					  value);
+      fprintf (stream, " %s,", reg_names[regno]);
+      mmix_output_shifted_value (stream, (unsigned HOST_WIDEST_INT) value);
+    }
+  else if (mmix_shiftable_wyde_value (-(unsigned HOST_WIDEST_INT) value))
+    {
+      /* We do this to get a bit more legible assembly code.  The next
+	 alternative is mostly redundant with this.  */
+
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  -(unsigned HOST_WIDEST_INT)
+					  value);
+      fprintf (stream, " %s,", reg_names[regno]);
+      mmix_output_shifted_value (stream, -(unsigned HOST_WIDEST_INT) value);
+      fprintf (stream, "\n\tNEGU %s,0,%s", reg_names[regno],
+	       reg_names[regno]);
+    }
+  else if (mmix_shiftable_wyde_value (~(unsigned HOST_WIDEST_INT) value))
+    {
+      /* Slightly more expensive, the two-insn cases.  */
+
+      /* FIXME: We could of course also test if 0..255-N or ~(N | 1..255)
+	 is shiftable, or any other one-insn transformation of the value.
+	 FIXME: Check first if the value is "shiftable" by two loading
+	 with two insns, since it makes more readable assembly code (if
+	 anyone else cares).  */
+
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  ~(unsigned HOST_WIDEST_INT)
+					  value);
+      fprintf (stream, " %s,", reg_names[regno]);
+      mmix_output_shifted_value (stream, ~(unsigned HOST_WIDEST_INT) value);
+      fprintf (stream, "\n\tNOR %s,%s,0", reg_names[regno],
+	       reg_names[regno]);
+    }
+  else
+    {
+      /* The generic case.  2..4 insns.  */
+      static const char *const higher_parts[] = {"L", "ML", "MH", "H"};
+      const char *op = "SET";
+      const char *line_begin = "";
+      int insns = 0;
+      int i;
+      HOST_WIDEST_INT tmpvalue = value;
+
+      /* Compute the number of insns needed to output this constant.  */
+      for (i = 0; i < 4 && tmpvalue != 0; i++)
+	{
+	  if (tmpvalue & 65535)
+	    insns++;
+	  tmpvalue >>= 16;
+	}
+      if (TARGET_BASE_ADDRESSES && insns == 3)
+	{
+	  /* The number three is based on a static observation on
+	     ghostscript-6.52.  Two and four are excluded because there
+	     are too many such constants, and each unique constant (maybe
+	     offset by 1..255) were used few times compared to other uses,
+	     e.g. addresses.
+
+	     We use base-plus-offset addressing to force it into a global
+	     register; we just use a "LDA reg,VALUE", which will cause the
+	     assembler and linker to DTRT (for constants as well as
+	     addresses).  */
+	  fprintf (stream, "LDA %s,", reg_names[regno]);
+	  mmix_output_octa (stream, value, 0);
+	}
+      else
+	{
+	  /* Output pertinent parts of the 4-wyde sequence.
+	     Still more to do if we want this to be optimal, but hey...
+	     Note that the zero case has been handled above.  */
+	  for (i = 0; i < 4 && value != 0; i++)
+	    {
+	      if (value & 65535)
+		{
+		  fprintf (stream, "%s%s%s %s,#%x", line_begin, op,
+			   higher_parts[i], reg_names[regno],
+			   (int) (value & 65535));
+		  /* The first one sets the rest of the bits to 0, the next
+		     ones add set bits.  */
+		  op = "INC";
+		  line_begin = "\n\t";
+		}
+
+	      value >>= 16;
+	    }
+	}
+    }
+
+  if (do_begin_end)
+    fprintf (stream, "\n");
+}
+
+/* Return 1 if value is 0..65535*2**(16*N) for N=0..3.
+   else return 0.  */
+
+int
+mmix_shiftable_wyde_value (unsigned HOST_WIDEST_INT value)
+{
+  /* Shift by 16 bits per group, stop when we've found two groups with
+     nonzero bits.  */
+  int i;
+  int has_candidate = 0;
+
+  for (i = 0; i < 4; i++)
+    {
+      if (value & 65535)
+	{
+	  if (has_candidate)
+	    return 0;
+	  else
+	    has_candidate = 1;
+	}
+
+      value >>= 16;
+    }
+
+  return 1;
+}
+
+/* X and Y are two things to compare using CODE.  Return the rtx for
+   the cc-reg in the proper mode.  */
+
+rtx
+mmix_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
+{
+  enum machine_mode ccmode = SELECT_CC_MODE (code, x, y);
+  return gen_reg_rtx (ccmode);
+}
+
+/* Local (static) helper functions.  */
+
+static void
+mmix_emit_sp_add (HOST_WIDE_INT offset)
+{
+  rtx insn;
+
+  if (offset < 0)
+    {
+      /* Negative stack-pointer adjustments are allocations and appear in
+	 the prologue only.  We mark them as frame-related so unwind and
+	 debug info is properly emitted for them.  */
+      if (offset > -255)
+	insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      GEN_INT (offset)));
+      else
+	{
+	  rtx tmpr = gen_rtx_REG (DImode, 255);
+	  RTX_FRAME_RELATED_P (emit_move_insn (tmpr, GEN_INT (offset))) = 1;
+	  insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+					stack_pointer_rtx, tmpr));
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      /* Positive adjustments are in the epilogue only.  Don't mark them
+	 as "frame-related" for unwind info.  */
+      if (insn_const_int_ok_for_constraint (offset, CONSTRAINT_L))
+	emit_insn (gen_adddi3 (stack_pointer_rtx,
+			       stack_pointer_rtx,
+			       GEN_INT (offset)));
+      else
+	{
+	  rtx tmpr = gen_rtx_REG (DImode, 255);
+	  emit_move_insn (tmpr, GEN_INT (offset));
+	  insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+					stack_pointer_rtx, tmpr));
+	}
+    }
+}
+
+/* Print operator suitable for doing something with a shiftable
+   wyde.  The type of operator is passed as an asm output modifier.  */
+
+static void
+mmix_output_shiftvalue_op_from_str (FILE *stream,
+				    const char *mainop,
+				    HOST_WIDEST_INT value)
+{
+  static const char *const op_part[] = {"L", "ML", "MH", "H"};
+  int i;
+
+  if (! mmix_shiftable_wyde_value (value))
+    {
+      char s[sizeof ("0xffffffffffffffff")];
+      sprintf (s, HOST_WIDEST_INT_PRINT_HEX, value);
+      internal_error ("MMIX Internal: %s is not a shiftable int", s);
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      /* We know we're through when we find one-bits in the low
+	 16 bits.  */
+      if (value & 0xffff)
+	{
+	  fprintf (stream, "%s%s", mainop, op_part[i]);
+	  return;
+	}
+      value >>= 16;
+    }
+
+  /* No bits set?  Then it must have been zero.  */
+  fprintf (stream, "%sL", mainop);
+}
+
+/* Print a 64-bit value, optionally prefixed by assembly pseudo.  */
+
+static void
+mmix_output_octa (FILE *stream, HOST_WIDEST_INT value, int do_begin_end)
+{
+  if (do_begin_end)
+    fprintf (stream, "\tOCTA ");
+
+  /* Provide a few alternative output formats depending on the number, to
+     improve legibility of assembler output.  */
+  if ((value < (HOST_WIDEST_INT) 0 && value > (HOST_WIDEST_INT) -10000)
+      || (value >= (HOST_WIDEST_INT) 0 && value <= (HOST_WIDEST_INT) 16384))
+    fprintf (stream, "%d", (int) value);
+  else if (value > (HOST_WIDEST_INT) 0
+	   && value < ((HOST_WIDEST_INT) 1 << 31) * 2)
+    fprintf (stream, "#%x", (unsigned int) value);
+  else if (sizeof (HOST_WIDE_INT) == sizeof (HOST_WIDEST_INT))
+    /* We need to avoid the not-so-universal "0x" prefix; we need the
+       pure hex-digits together with the mmixal "#" hex prefix.  */
+    fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX_PURE,
+	     (HOST_WIDE_INT) value);
+  else /* Need to avoid the hex output; there's no ...WIDEST...HEX_PURE.  */
+    fprintf (stream, HOST_WIDEST_INT_PRINT_UNSIGNED, value);
+
+  if (do_begin_end)
+    fprintf (stream, "\n");
+}
+
+/* Print the presumed shiftable wyde argument shifted into place (to
+   be output with an operand).  */
+
+static void
+mmix_output_shifted_value (FILE *stream, HOST_WIDEST_INT value)
+{
+  int i;
+
+  if (! mmix_shiftable_wyde_value (value))
+    {
+      char s[16+2+1];
+      sprintf (s, HOST_WIDEST_INT_PRINT_HEX, value);
+      internal_error ("MMIX Internal: %s is not a shiftable int", s);
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      /* We know we're through when we find one-bits in the low 16 bits.  */
+      if (value & 0xffff)
+	{
+	  fprintf (stream, "#%x", (int) (value & 0xffff));
+	  return;
+	}
+
+    value >>= 16;
+  }
+
+  /* No bits set?  Then it must have been zero.  */
+  fprintf (stream, "0");
+}
+
+/* Output an MMIX condition name corresponding to an operator
+   and operands:
+   (comparison_operator [(comparison_operator ...) (const_int 0)])
+   which means we have to look at *two* operators.
+
+   The argument "reversed" refers to reversal of the condition (not the
+   same as swapping the arguments).  */
+
+static void
+mmix_output_condition (FILE *stream, const_rtx x, int reversed)
+{
+  struct cc_conv
+  {
+    RTX_CODE cc;
+
+    /* The normal output cc-code.  */
+    const char *const normal;
+
+    /* The reversed cc-code, or NULL if invalid.  */
+    const char *const reversed;
+  };
+
+  struct cc_type_conv
+  {
+    enum machine_mode cc_mode;
+
+    /* Terminated with {UNKNOWN, NULL, NULL} */
+    const struct cc_conv *const convs;
+  };
+
+#undef CCEND
+#define CCEND {UNKNOWN, NULL, NULL}
+
+  static const struct cc_conv cc_fun_convs[]
+    = {{ORDERED, "Z", "P"},
+       {UNORDERED, "P", "Z"},
+       CCEND};
+  static const struct cc_conv cc_fp_convs[]
+    = {{GT, "P", NULL},
+       {LT, "N", NULL},
+       CCEND};
+  static const struct cc_conv cc_fpeq_convs[]
+    = {{NE, "Z", "P"},
+       {EQ, "P", "Z"},
+       CCEND};
+  static const struct cc_conv cc_uns_convs[]
+    = {{GEU, "NN", "N"},
+       {GTU, "P", "NP"},
+       {LEU, "NP", "P"},
+       {LTU, "N", "NN"},
+       CCEND};
+  static const struct cc_conv cc_signed_convs[]
+    = {{NE, "NZ", "Z"},
+       {EQ, "Z", "NZ"},
+       {GE, "NN", "N"},
+       {GT, "P", "NP"},
+       {LE, "NP", "P"},
+       {LT, "N", "NN"},
+       CCEND};
+  static const struct cc_conv cc_di_convs[]
+    = {{NE, "NZ", "Z"},
+       {EQ, "Z", "NZ"},
+       {GE, "NN", "N"},
+       {GT, "P", "NP"},
+       {LE, "NP", "P"},
+       {LT, "N", "NN"},
+       {GTU, "NZ", "Z"},
+       {LEU, "Z", "NZ"},
+       CCEND};
+#undef CCEND
+
+  static const struct cc_type_conv cc_convs[]
+    = {{CC_FUNmode, cc_fun_convs},
+       {CC_FPmode, cc_fp_convs},
+       {CC_FPEQmode, cc_fpeq_convs},
+       {CC_UNSmode, cc_uns_convs},
+       {CCmode, cc_signed_convs},
+       {DImode, cc_di_convs}};
+
+  size_t i;
+  int j;
+
+  enum machine_mode mode = GET_MODE (XEXP (x, 0));
+  RTX_CODE cc = GET_CODE (x);
+
+  for (i = 0; i < ARRAY_SIZE (cc_convs); i++)
+    {
+      if (mode == cc_convs[i].cc_mode)
+	{
+	  for (j = 0; cc_convs[i].convs[j].cc != UNKNOWN; j++)
+	    if (cc == cc_convs[i].convs[j].cc)
+	      {
+		const char *mmix_cc
+		  = (reversed ? cc_convs[i].convs[j].reversed
+		     : cc_convs[i].convs[j].normal);
+
+		if (mmix_cc == NULL)
+		  fatal_insn ("MMIX Internal: Trying to output invalidly\
+ reversed condition:", x);
+
+		fprintf (stream, "%s", mmix_cc);
+		return;
+	      }
+
+	  fatal_insn ("MMIX Internal: What's the CC of this?", x);
+	}
+    }
+
+  fatal_insn ("MMIX Internal: What is the CC of this?", x);
+}
+
+/* Return the bit-value for a const_int or const_double.  */
+
+HOST_WIDEST_INT
+mmix_intval (const_rtx x)
+{
+  unsigned HOST_WIDEST_INT retval;
+
+  if (GET_CODE (x) == CONST_INT)
+    return INTVAL (x);
+
+  /* We make a little song and dance because converting to long long in
+     gcc-2.7.2 is broken.  I still want people to be able to use it for
+     cross-compilation to MMIX.  */
+  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == VOIDmode)
+    {
+      if (sizeof (HOST_WIDE_INT) < sizeof (HOST_WIDEST_INT))
+	{
+	  retval = (unsigned) CONST_DOUBLE_LOW (x) / 2;
+	  retval *= 2;
+	  retval |= CONST_DOUBLE_LOW (x) & 1;
+
+	  retval |=
+	    (unsigned HOST_WIDEST_INT) CONST_DOUBLE_HIGH (x)
+	      << (HOST_BITS_PER_LONG)/2 << (HOST_BITS_PER_LONG)/2;
+	}
+      else
+	retval = CONST_DOUBLE_HIGH (x);
+
+      return retval;
+    }
+
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE value;
+
+      /* FIXME:  This macro is not in the manual but should be.  */
+      REAL_VALUE_FROM_CONST_DOUBLE (value, x);
+
+      if (GET_MODE (x) == DFmode)
+	{
+	  long bits[2];
+
+	  REAL_VALUE_TO_TARGET_DOUBLE (value, bits);
+
+	  /* The double cast is necessary to avoid getting the long
+	     sign-extended to unsigned long long(!) when they're of
+	     different size (usually 32-bit hosts).  */
+	  return
+	    ((unsigned HOST_WIDEST_INT) (unsigned long) bits[0]
+	     << (unsigned HOST_WIDEST_INT) 32U)
+	    | (unsigned HOST_WIDEST_INT) (unsigned long) bits[1];
+	}
+      else if (GET_MODE (x) == SFmode)
+	{
+	  long bits;
+	  REAL_VALUE_TO_TARGET_SINGLE (value, bits);
+
+	  return (unsigned long) bits;
+	}
+    }
+
+  fatal_insn ("MMIX Internal: This is not a constant:", x);
+}
+
+/* Worker function for TARGET_PROMOTE_FUNCTION_MODE.  */
+
+enum machine_mode
+mmix_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                            enum machine_mode mode,
+                            int *punsignedp ATTRIBUTE_UNUSED,
+                            const_tree fntype ATTRIBUTE_UNUSED,
+                            int for_return)
+{
+  /* Apparently not doing TRT if int < register-size.  FIXME: Perhaps
+     FUNCTION_VALUE and LIBCALL_VALUE needs tweaking as some ports say.  */
+  if (for_return == 1)
+    return mode;
+
+  /* Promotion of modes currently generates slow code, extending before
+     operation, so we do it only for arguments.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < 8)
+    return DImode;
+  else
+    return mode;
+}
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+mmix_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, MMIX_STRUCT_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_FRAME_POINTER_REQUIRED.
+
+   FIXME: Is this requirement built-in?  Anyway, we should try to get rid
+   of it; we can deduce the value.  */
+
+bool
+mmix_frame_pointer_required (void)
+{
+  return (cfun->has_nonlocal_label);
+}
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc-4.9/gcc/config/mmix/mmix.h b/gcc-4.9/gcc/config/mmix/mmix.h
new file mode 100644
index 000000000..9d3c1bdb9
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/mmix.h
@@ -0,0 +1,831 @@
+/* Definitions of target machine for GNU compiler, for MMIX.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MMIX_H
+#define GCC_MMIX_H
+
+/* First, some local helper macros.  Note that the "default" value of
+   FIXED_REGISTERS, CALL_USED_REGISTERS, REG_ALLOC_ORDER and
+   REG_CLASS_CONTENTS depend on these values.  */
+#define MMIX_RESERVED_GNU_ARG_0_REGNUM 231
+#define MMIX_FIRST_ARG_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 16)
+#define MMIX_FIRST_INCOMING_ARG_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 0)
+#define MMIX_MAX_ARGS_IN_REGS 16
+
+/* FIXME: This one isn't fully implemented yet.  Return values larger than
+   one register are passed by reference in MMIX_STRUCT_VALUE_REGNUM by the
+   caller, except for return values of type "complex".  */
+#define MMIX_MAX_REGS_FOR_VALUE 16
+#define MMIX_RETURN_VALUE_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 15)
+#define MMIX_OUTGOING_RETURN_VALUE_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 0)
+#define MMIX_STRUCT_VALUE_REGNUM 251
+#define MMIX_STATIC_CHAIN_REGNUM 252
+#define MMIX_FRAME_POINTER_REGNUM 253
+#define MMIX_STACK_POINTER_REGNUM 254
+#define MMIX_LAST_GENERAL_REGISTER 255
+#define MMIX_INCOMING_RETURN_ADDRESS_REGNUM MMIX_rJ_REGNUM
+#define MMIX_HIMULT_REGNUM 258
+#define MMIX_REMAINDER_REGNUM MMIX_rR_REGNUM
+#define MMIX_ARG_POINTER_REGNUM 261
+#define MMIX_rO_REGNUM 262
+#define MMIX_LAST_STACK_REGISTER_REGNUM 31
+
+/* Four registers; "ideally, these registers should be call-clobbered", so
+   just grab a bunch of the common clobbered registers.  FIXME: Last
+   registers of return-value should be used, with an error if there's a
+   return-value (that collides in size).  */
+#define MMIX_EH_RETURN_DATA_REGNO_START (MMIX_STRUCT_VALUE_REGNUM - 4)
+
+/* Try to keep the definitions from running away on their own.  */
+#if (MMIX_EH_RETURN_DATA_REGNO_START \
+     != MMIX_RESERVED_GNU_ARG_0_REGNUM + MMIX_MAX_ARGS_IN_REGS)
+ #error MMIX register definition inconsistency
+#endif
+
+#if (MMIX_MAX_REGS_FOR_VALUE + MMIX_MAX_ARGS_IN_REGS > 32)
+ #error MMIX parameters and return values bad, more than 32 registers
+#endif
+
+/* This chosen as "a call-clobbered hard register that is otherwise
+   untouched by the epilogue".  */
+#define MMIX_EH_RETURN_STACKADJ_REGNUM MMIX_STATIC_CHAIN_REGNUM
+
+#define MMIX_FUNCTION_ARG_SIZE(MODE, TYPE) \
+ ((MODE) != BLKmode ? GET_MODE_SIZE (MODE) : int_size_in_bytes (TYPE))
+
+/* Per-function machine data.  This is normally an opaque type just
+   defined and used in the tm.c file, but we need to see the definition in
+   mmix.md too.  */
+struct GTY(()) machine_function
+ {
+   int has_landing_pad;
+   int highest_saved_stack_register;
+   int in_prologue;
+ };
+
+/* For these target macros, there is no generic documentation here.  You
+   should read `Using and Porting GCC' for that.  Only comments specific
+   to the MMIX target are here.
+
+   There are however references to the specific texinfo node (comments
+   with "Node:"), so there should be little or nothing amiss.  Probably
+   the opposite, since we don't have to care about old littering and
+   soon outdated generic comments.  */
+
+/* Node: Driver */
+
+/* User symbols are in the same name-space as built-in symbols, but we
+   don't need the built-in symbols, so remove those and instead apply
+   stricter operand checking.  Don't warn when expanding insns.  */
+#define ASM_SPEC "-no-predefined-syms -x"
+
+/* Pass on -mset-program-start=N and -mset-data-start=M to the linker.
+   Provide default program start 0x100 unless -mno-set-program-start.
+   Don't do this if linking relocatably, with -r.  For a final link,
+   produce mmo, unless ELF is requested or when linking relocatably.  */
+#define LINK_SPEC \
+ "%{mset-program-start=*:--defsym __.MMIX.start..text=%*}\
+  %{mset-data-start=*:--defsym __.MMIX.start..data=%*}\
+  %{!mset-program-start=*:\
+    %{!mno-set-program-start:\
+     %{!r:--defsym __.MMIX.start..text=0x100}}}\
+  %{!melf:%{!r:-m mmo}}%{melf|r:-m elf64mmix}"
+
+/* FIXME: There's no provision for profiling here.  */
+#define STARTFILE_SPEC  \
+  "crti%O%s crtbegin%O%s"
+
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
+
+/* Node: Run-time Target */
+
+/* Define __LONG_MAX__, since we're advised not to change glimits.h.  */
+#define TARGET_CPU_CPP_BUILTINS()				\
+  do								\
+    {								\
+      builtin_define ("__mmix__");				\
+      builtin_define ("__MMIX__");				\
+      if (TARGET_ABI_GNU)					\
+	builtin_define ("__MMIX_ABI_GNU__");			\
+      else							\
+	builtin_define ("__MMIX_ABI_MMIXWARE__");		\
+    }								\
+  while (0)
+
+#define TARGET_DEFAULT \
+ (MASK_BRANCH_PREDICT | MASK_BASE_ADDRESSES | MASK_USE_RETURN_INSN)
+
+
+/* Node: Per-Function Data */
+#define INIT_EXPANDERS mmix_init_expanders ()
+
+
+/* Node: Storage Layout */
+/* I see no bit-field instructions.  Anyway, the common order is from low
+   to high, as the power of two, hence little-endian.  */
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+#define FLOAT_WORDS_BIG_ENDIAN 1
+#define UNITS_PER_WORD 8
+
+/* We need to align everything to 64 bits that can affect the alignment
+   of other types.  Since address N is interpreted in MMIX as (N modulo
+   access_size), we must align.  */
+#define PARM_BOUNDARY 64
+#define STACK_BOUNDARY 64
+#define FUNCTION_BOUNDARY 32
+#define BIGGEST_ALIGNMENT 64
+
+/* This one is only used in the ADA front end.  */
+#define MINIMUM_ATOMIC_ALIGNMENT 8
+
+/* Copied from elfos.h.  */
+#define MAX_OFILE_ALIGNMENT (32768 * 8)
+
+#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \
+ mmix_data_alignment (TYPE, BASIC_ALIGN)
+
+#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \
+ mmix_constant_alignment (CONSTANT, BASIC_ALIGN)
+
+#define LOCAL_ALIGNMENT(TYPE, BASIC_ALIGN) \
+ mmix_local_alignment (TYPE, BASIC_ALIGN)
+
+/* Following other ports, this seems to most commonly be the word-size,
+   so let's do that here too.  */
+#define EMPTY_FIELD_BOUNDARY 64
+
+/* We chose to have this low solely for similarity with the alpha.  It has
+   nothing to do with passing the tests dg/c99-scope-2 and
+   execute/align-1.c.  Nothing.  Though the tests seem wrong.  Padding of
+   the structure is automatically added to get alignment when needed if we
+   set this to just byte-boundary.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* The lower bits are ignored.  */
+#define STRICT_ALIGNMENT 1
+
+
+/* Node: Type Layout */
+
+/* It might seem more natural to have 64-bit ints on a 64-bit machine,
+   but then an occasional MMIX programmer needs to know how to put a lot
+   of __attribute__ stuff to get to the 8, 16 and 32-bit modes rather
+   than the "intuitive" char, short and int types.  */
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+
+/* Node: Register Basics */
+/* We tell GCC about all 256 general registers, and we also include
+   rD, rE, rH, rJ, rR and rO (in that order) so we can describe what insns
+   clobber them.  We use a faked register for the argument pointer.  It is
+   always eliminated towards the frame-pointer or the stack-pointer, never
+   output in assembly.  Any fixed register would do for this, like $255,
+   but future debugging is easier when using a separate register.  It
+   counts as a global register for pseudorandom reasons.  */
+#define FIRST_PSEUDO_REGISTER 263
+
+/* We treat general registers with no assigned purpose as fixed.  The
+   stack pointer, $254, is also fixed.  Register $255 is referred to as a
+   temporary register in the MMIX papers, and used as such in mmixal, so
+   it should not be used as a stack pointer.  We set it to fixed, and use
+   it "manually" at times of despair.  */
+#define FIXED_REGISTERS \
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, \
+   1, 1, 0, 0, 0, 1, 1 \
+ }
+
+/* General registers are fixed and therefore "historically" marked
+   call-used.  (FIXME: This has changed).  Registers $15..$31 are
+   call-clobbered; we'll put arguments in $16 and up, and we need $15 for
+   the MMIX register-stack "hole".  */
+#define CALL_USED_REGISTERS \
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1 \
+ }
+
+#define INCOMING_REGNO(OUT) mmix_opposite_regno (OUT, 0)
+
+#define OUTGOING_REGNO(IN) mmix_opposite_regno (IN, 1)
+
+/* Defining LOCAL_REGNO is necessary in presence of prologue/epilogue,
+   else GCC will be confused that those registers aren't saved and
+   restored.  */
+#define LOCAL_REGNO(REGNO) mmix_local_regno (REGNO)
+
+/* Node: Allocation Order */
+
+/* We should allocate registers from 0 to 31 by increasing number, because
+   I think that's what people expect.  Beyond that, just use
+   call-clobbered global registers first, then call-clobbered special
+   registers.  Last, the fixed registers.  */
+#define MMIX_MMIXWARE_ABI_REG_ALLOC_ORDER	\
+ { 0, 1, 2, 3, 4, 5, 6, 7,			\
+   8, 9, 10, 11, 12, 13, 14, 15,		\
+   16, 17, 18, 19, 20, 21, 22, 23,		\
+   24, 25, 26, 27, 28, 29, 30, 31,    		\
+						\
+   252, 251, 250, 249, 248, 247, 		\
+						\
+   253,						\
+						\
+   258, 260, 259,				\
+						\
+   32, 33, 34, 35, 36, 37, 38, 39,		\
+   40, 41, 42, 43, 44, 45, 46, 47,		\
+   48, 49, 50, 51, 52, 53, 54, 55,		\
+   56, 57, 58, 59, 60, 61, 62, 63,		\
+   64, 65, 66, 67, 68, 69, 70, 71,		\
+   72, 73, 74, 75, 76, 77, 78, 79,		\
+   80, 81, 82, 83, 84, 85, 86, 87,		\
+   88, 89, 90, 91, 92, 93, 94, 95,		\
+   96, 97, 98, 99, 100, 101, 102, 103,		\
+   104, 105, 106, 107, 108, 109, 110, 111,	\
+   112, 113, 114, 115, 116, 117, 118, 119,	\
+   120, 121, 122, 123, 124, 125, 126, 127,	\
+   128, 129, 130, 131, 132, 133, 134, 135,	\
+   136, 137, 138, 139, 140, 141, 142, 143,	\
+   144, 145, 146, 147, 148, 149, 150, 151,	\
+   152, 153, 154, 155, 156, 157, 158, 159,	\
+   160, 161, 162, 163, 164, 165, 166, 167,	\
+   168, 169, 170, 171, 172, 173, 174, 175,	\
+   176, 177, 178, 179, 180, 181, 182, 183,	\
+   184, 185, 186, 187, 188, 189, 190, 191,	\
+   192, 193, 194, 195, 196, 197, 198, 199,	\
+   200, 201, 202, 203, 204, 205, 206, 207,	\
+   208, 209, 210, 211, 212, 213, 214, 215,	\
+   216, 217, 218, 219, 220, 221, 222, 223,	\
+   224, 225, 226, 227, 228, 229, 230, 231,	\
+   232, 233, 234, 235, 236, 237, 238, 239,	\
+   240, 241, 242, 243, 244, 245, 246,		\
+						\
+   254, 255, 256, 257, 261, 262			\
+ }
+
+/* As a convenience, we put this nearby, for ease of comparison.
+   First, call-clobbered registers in reverse order of assignment as
+   parameters (also the top ones; not because they're parameters, but
+   for continuity).
+
+   Second, saved registers that go on the register-stack.
+
+   Third, special registers rH, rR and rJ.  They should not normally be
+   allocated, but since they're call-clobbered, it is cheaper to use one
+   of them than using a call-saved register for a call-clobbered use,
+   assuming it is referenced a very limited number of times.  Other global
+   and fixed registers come next; they are never allocated.  */
+#define MMIX_GNU_ABI_REG_ALLOC_ORDER		\
+ { 252, 251, 250, 249, 248, 247, 246,		\
+   245, 244, 243, 242, 241, 240, 239, 238,	\
+   237, 236, 235, 234, 233, 232, 231,		\
+						\
+   0, 1, 2, 3, 4, 5, 6, 7,			\
+   8, 9, 10, 11, 12, 13, 14, 15,		\
+   16, 17, 18, 19, 20, 21, 22, 23,		\
+   24, 25, 26, 27, 28, 29, 30, 31,		\
+						\
+   253,						\
+						\
+   258, 260, 259,				\
+						\
+   32, 33, 34, 35, 36, 37, 38, 39,		\
+   40, 41, 42, 43, 44, 45, 46, 47,		\
+   48, 49, 50, 51, 52, 53, 54, 55,		\
+   56, 57, 58, 59, 60, 61, 62, 63,		\
+   64, 65, 66, 67, 68, 69, 70, 71,		\
+   72, 73, 74, 75, 76, 77, 78, 79,		\
+   80, 81, 82, 83, 84, 85, 86, 87,		\
+   88, 89, 90, 91, 92, 93, 94, 95,		\
+   96, 97, 98, 99, 100, 101, 102, 103,		\
+   104, 105, 106, 107, 108, 109, 110, 111,	\
+   112, 113, 114, 115, 116, 117, 118, 119,	\
+   120, 121, 122, 123, 124, 125, 126, 127,	\
+   128, 129, 130, 131, 132, 133, 134, 135,	\
+   136, 137, 138, 139, 140, 141, 142, 143,	\
+   144, 145, 146, 147, 148, 149, 150, 151,	\
+   152, 153, 154, 155, 156, 157, 158, 159,	\
+   160, 161, 162, 163, 164, 165, 166, 167,	\
+   168, 169, 170, 171, 172, 173, 174, 175,	\
+   176, 177, 178, 179, 180, 181, 182, 183,	\
+   184, 185, 186, 187, 188, 189, 190, 191,	\
+   192, 193, 194, 195, 196, 197, 198, 199,	\
+   200, 201, 202, 203, 204, 205, 206, 207,	\
+   208, 209, 210, 211, 212, 213, 214, 215,	\
+   216, 217, 218, 219, 220, 221, 222, 223,	\
+   224, 225, 226, 227, 228, 229, 230,		\
+						\
+   254, 255, 256, 257, 261, 262			\
+ }
+
+/* The default one.  */
+#define REG_ALLOC_ORDER MMIX_MMIXWARE_ABI_REG_ALLOC_ORDER
+
+/* Node: Values in Registers */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)            	\
+   ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)  	\
+    / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+/* Note that no register can really be accessed in single-float mode, so
+   we *can* say 1 here.  FIXME:  Will TRT happen for single-float, or do
+   we have to punt to libgcc1.asm?  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+
+/* Node: Leaf Functions */
+/* (empty) */
+
+
+/* Node: Register Classes */
+
+enum reg_class
+ {
+   NO_REGS, GENERAL_REGS, REMAINDER_REG, HIMULT_REG,
+   SYSTEM_REGS, ALL_REGS, LIM_REG_CLASSES
+ };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES						\
+ {"NO_REGS", "GENERAL_REGS", "REMAINDER_REG", "HIMULT_REG",	\
+  "SYSTEM_REGS", "ALL_REGS"}
+
+/* Note that the contents of each item is always 32 bits.  */
+#define REG_CLASS_CONTENTS			\
+ {{0, 0, 0, 0, 0, 0, 0, 0, 0},			\
+  {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0x20},	\
+  {0, 0, 0, 0, 0, 0, 0, 0, 0x10},		\
+  {0, 0, 0, 0, 0, 0, 0, 0, 4},			\
+  {0, 0, 0, 0, 0, 0, 0, 0, 0x7f},		\
+  {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0x7f}}
+
+#define REGNO_REG_CLASS(REGNO)					\
+ ((REGNO) <= MMIX_LAST_GENERAL_REGISTER				\
+  || (REGNO) == MMIX_ARG_POINTER_REGNUM				\
+  ? GENERAL_REGS						\
+  : (REGNO) == MMIX_REMAINDER_REGNUM ? REMAINDER_REG		\
+  : (REGNO) == MMIX_HIMULT_REGNUM ? HIMULT_REG : SYSTEM_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(REGNO)				\
+ ((REGNO) <= MMIX_LAST_GENERAL_REGISTER				\
+  || (REGNO) == MMIX_ARG_POINTER_REGNUM				\
+  || (reg_renumber[REGNO] > 0					\
+      && reg_renumber[REGNO] <= MMIX_LAST_GENERAL_REGISTER))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P (REGNO)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+ mmix_secondary_reload_class (CLASS, MODE, X, 1)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+ mmix_secondary_reload_class (CLASS, MODE, X, 0)
+
+#define CLASS_MAX_NREGS(CLASS, MODE) HARD_REGNO_NREGS (CLASS, MODE)
+
+
+/* Node: Frame Layout */
+
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET \
+  mmix_starting_frame_offset ()
+
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+#define DYNAMIC_CHAIN_ADDRESS(FRAMEADDR) \
+ mmix_dynamic_chain_address (FRAMEADDR)
+
+/* FIXME: It seems RETURN_ADDR_OFFSET is undocumented.  */
+
+#define SETUP_FRAME_ADDRESSES() \
+ mmix_setup_frame_addresses ()
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)		\
+ mmix_return_addr_rtx (COUNT, FRAME)
+
+/* It's in rJ before we store it somewhere.  */
+#define INCOMING_RETURN_ADDR_RTX \
+ gen_rtx_REG (Pmode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM)
+
+/* FIXME: This does not seem properly documented or cross-indexed.
+   Nowhere except in the code does it say it *has* to be in the range
+   0..255, or else it will be truncated.  That goes for the default too.  */
+#define DWARF_FRAME_RETURN_COLUMN \
+ DWARF_FRAME_REGNUM (MMIX_INCOMING_RETURN_ADDRESS_REGNUM)
+
+/* No return address is stored there.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+/* Node: Stack Checking */
+/* (empty) */
+
+
+/* Node: Exception Handling */
+
+#define EH_RETURN_DATA_REGNO(N) \
+ mmix_eh_return_data_regno (N)
+
+#define EH_RETURN_STACKADJ_RTX \
+ mmix_eh_return_stackadj_rtx ()
+
+#define EH_RETURN_HANDLER_RTX \
+ mmix_eh_return_handler_rtx ()
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ mmix_asm_preferred_eh_data_format (CODE, GLOBAL)
+
+/* Node: Frame Registers */
+#define STACK_POINTER_REGNUM MMIX_STACK_POINTER_REGNUM
+
+/* Perhaps we can use HARD_FRAME_POINTER_REGNUM and decide later on
+   what register we want to use.  */
+#define FRAME_POINTER_REGNUM MMIX_FRAME_POINTER_REGNUM
+#define ARG_POINTER_REGNUM MMIX_ARG_POINTER_REGNUM
+
+#define STATIC_CHAIN_REGNUM MMIX_STATIC_CHAIN_REGNUM
+
+
+/* Node: Elimination */
+
+/* The frame-pointer is stored in a location that either counts to the
+   offset of incoming parameters, or that counts to the offset of the
+   frame, so we can't use a single offset.  We therefore eliminate those
+   two separately.  */
+#define ELIMINABLE_REGS				\
+ {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ (OFFSET) = mmix_initial_elimination_offset (FROM, TO)
+
+
+/* Node: Stack Arguments */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Node: Register Arguments */
+
+typedef struct { int regs; int lib; } CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).regs = 0, (CUM).lib = ((LIBNAME) != 0))
+
+#define FUNCTION_ARG_REGNO_P(REGNO)		\
+ mmix_function_arg_regno_p (REGNO, 0)
+
+
+/* Node: Caller Saves */
+/* (empty) */
+
+
+/* Node: Function Entry */
+
+/* See mmix.c for TARGET_ASM_FUNCTION_PROLOGUE and
+   TARGET_ASM_FUNCTION_EPILOGUE.  */
+
+/* We need to say that the epilogue uses the return address, so the
+   initial-value machinery restores it.  FIXME: Some targets
+   conditionalize on "reload_completed &&".  Investigate difference.
+   FIXME: Not needed if nonlocal_goto_stack_level.  */
+#define EPILOGUE_USES(REGNO) \
+ ((REGNO) == MMIX_INCOMING_RETURN_ADDRESS_REGNUM)
+
+/* Node: Profiling */
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+ mmix_function_profiler (FILE, LABELNO)
+
+/* Node: Trampolines */
+
+#define TRAMPOLINE_SIZE		(4*UNITS_PER_WORD)
+#define TRAMPOLINE_ALIGNMENT	BITS_PER_WORD
+
+/* Node: Addressing Modes */
+
+#define CONSTANT_ADDRESS_P(X) \
+ mmix_constant_address_p (X)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* Node: Condition Code */
+
+#define SELECT_CC_MODE(OP, X, Y)		\
+ mmix_select_cc_mode (OP, X, Y)
+
+/* A definition of CANONICALIZE_COMPARISON that changed LE and GT
+   comparisons with -1 to LT and GE respectively, and LT, LTU, GE or GEU
+   comparisons with 256 to 255 and LE, LEU, GT and GTU has been
+   ineffective; the code path for performing the changes did not trig for
+   neither the GCC testsuite nor ghostscript-6.52 nor Knuth's mmix.tar.gz
+   itself (core GCC functionality supposedly handling it) with sources
+   from 2002-06-06.  */
+
+#define REVERSIBLE_CC_MODE(MODE)		\
+ mmix_reversible_cc_mode (MODE)
+
+
+/* Node: Costs */
+
+#define SLOW_BYTE_ACCESS 0
+
+
+/* Node: Sections */
+
+/* This must be a constant string, since it's used in crtstuff.c.  */
+#define TEXT_SECTION_ASM_OP \
+ "\t.text ! mmixal:= 9H LOC 8B"
+
+/* FIXME: Not documented.  */
+#define DATA_SECTION_ASM_OP \
+ mmix_data_section_asm_op ()
+
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rodata"
+
+/* Node: PIC */
+/* (empty) */
+
+
+/* Node: File Framework */
+
+/* While any other punctuation character but ";" would do, we prefer "%"
+   or "!"; "!" is an unary operator and so will not be mistakenly included
+   in correctly formed expressions.  The hash character adds mass; catches
+   the eye.  We can't have it as a comment char by itself, since it's a
+   hex-number prefix.  */
+#define ASM_COMMENT_START "!#"
+
+/* These aren't currently functional.  We just keep them as markers.  */
+#define ASM_APP_ON "%APP\n"
+#define ASM_APP_OFF "%NO_APP\n"
+
+#define OUTPUT_QUOTED_STRING(STREAM, STRING) \
+ mmix_output_quoted_string (STREAM, STRING, strlen (STRING))
+
+#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section
+
+/* Node: Data Output */
+
+#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN) \
+ mmix_asm_output_ascii (STREAM, PTR, LEN)
+
+/* Node: Uninitialized Data */
+
+#define ASM_OUTPUT_ALIGNED_COMMON(ST, N, S, A) \
+ mmix_asm_output_aligned_common (ST, N, S, A)
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(ST, N, S, A) \
+ mmix_asm_output_aligned_local (ST, N, S, A)
+
+
+/* Node: Label Output */
+
+#define ASM_OUTPUT_LABEL(STREAM, NAME) \
+ mmix_asm_output_label (STREAM, NAME)
+
+#define ASM_OUTPUT_INTERNAL_LABEL(STREAM, NAME) \
+ mmix_asm_output_internal_label (STREAM, NAME)
+
+#define ASM_DECLARE_REGISTER_GLOBAL(STREAM, DECL, REGNO, NAME) \
+ mmix_asm_declare_register_global (STREAM, DECL, REGNO, NAME)
+
+#define GLOBAL_ASM_OP "\t.global "
+
+#define ASM_WEAKEN_LABEL(STREAM, NAME) \
+ mmix_asm_weaken_label (STREAM, NAME)
+
+#define MAKE_DECL_ONE_ONLY(DECL) \
+ mmix_make_decl_one_only (DECL)
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+ mmix_asm_output_labelref (STREAM, NAME)
+
+/* We insert a ":" to disambiguate against user symbols like L5.  */
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+ sprintf (LABEL, "*%s:%ld", PREFIX, (long)(NUM))
+
+/* Insert "::"; these are rarer than internal labels.  FIXME: Make sure no
+   ":" is seen in the object file; we don't really want that mmixal
+   feature visible there.  We don't want the default, which uses a dot;
+   that'd be incompatible with mmixal.  */
+#define ASM_PN_FORMAT "%s::%lu"
+
+#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \
+ mmix_asm_output_def (STREAM, NAME, VALUE)
+
+/* Node: Macros for Initialization */
+/* We're compiling to ELF and linking to MMO; fundamental ELF features
+   that GCC depend on are there.  */
+
+/* These must be constant strings, since they're used in crtstuff.c.  */
+#define INIT_SECTION_ASM_OP "\t.section .init,\"ax\" ! mmixal-incompatible"
+
+#define FINI_SECTION_ASM_OP "\t.section .fini,\"ax\" ! mmixal-incompatible"
+
+#define OBJECT_FORMAT_ELF
+
+
+/* Node: Instruction Output */
+
+/* The non-$ register names must be prefixed with ":", since they're
+   affected by PREFIX.  We provide the non-colon names as additional
+   names.  */
+#define REGISTER_NAMES							\
+ {"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",			\
+  "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",			\
+  "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",		\
+  "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",		\
+  "$32", "$33", "$34", "$35", "$36", "$37", "$38", "$39",		\
+  "$40", "$41", "$42", "$43", "$44", "$45", "$46", "$47",		\
+  "$48", "$49", "$50", "$51", "$52", "$53", "$54", "$55",		\
+  "$56", "$57", "$58", "$59", "$60", "$61", "$62", "$63",		\
+  "$64", "$65", "$66", "$67", "$68", "$69", "$70", "$71",		\
+  "$72", "$73", "$74", "$75", "$76", "$77", "$78", "$79",		\
+  "$80", "$81", "$82", "$83", "$84", "$85", "$86", "$87",		\
+  "$88", "$89", "$90", "$91", "$92", "$93", "$94", "$95",		\
+  "$96", "$97", "$98", "$99", "$100", "$101", "$102", "$103",		\
+  "$104", "$105", "$106", "$107", "$108", "$109", "$110", "$111",	\
+  "$112", "$113", "$114", "$115", "$116", "$117", "$118", "$119",	\
+  "$120", "$121", "$122", "$123", "$124", "$125", "$126", "$127",	\
+  "$128", "$129", "$130", "$131", "$132", "$133", "$134", "$135",	\
+  "$136", "$137", "$138", "$139", "$140", "$141", "$142", "$143",	\
+  "$144", "$145", "$146", "$147", "$148", "$149", "$150", "$151",	\
+  "$152", "$153", "$154", "$155", "$156", "$157", "$158", "$159",	\
+  "$160", "$161", "$162", "$163", "$164", "$165", "$166", "$167",	\
+  "$168", "$169", "$170", "$171", "$172", "$173", "$174", "$175",	\
+  "$176", "$177", "$178", "$179", "$180", "$181", "$182", "$183",	\
+  "$184", "$185", "$186", "$187", "$188", "$189", "$190", "$191",	\
+  "$192", "$193", "$194", "$195", "$196", "$197", "$198", "$199",	\
+  "$200", "$201", "$202", "$203", "$204", "$205", "$206", "$207",	\
+  "$208", "$209", "$210", "$211", "$212", "$213", "$214", "$215",	\
+  "$216", "$217", "$218", "$219", "$220", "$221", "$222", "$223",	\
+  "$224", "$225", "$226", "$227", "$228", "$229", "$230", "$231",	\
+  "$232", "$233", "$234", "$235", "$236", "$237", "$238", "$239",	\
+  "$240", "$241", "$242", "$243", "$244", "$245", "$246", "$247",	\
+  "$248", "$249", "$250", "$251", "$252", "$253", "$254", "$255",	\
+  ":rD",  ":rE",  ":rH",  ":rJ",  ":rR",  "ap_!BAD!", ":rO"}
+
+#define ADDITIONAL_REGISTER_NAMES			\
+ {{"sp", 254}, {":sp", 254}, {"rD", 256}, {"rE", 257},	\
+  {"rH", 258}, {"rJ", MMIX_rJ_REGNUM}, {"rO", MMIX_rO_REGNUM}}
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \
+ mmix_asm_output_reg_push (STREAM, REGNO)
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \
+ mmix_asm_output_reg_pop (STREAM, REGNO)
+
+
+/* Node: Dispatch Tables */
+
+/* We define both types, since SImode is the better, but DImode the only
+   possible for mmixal so that's the one actually used.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+ mmix_asm_output_addr_diff_elt (STREAM, BODY, VALUE, REL)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+ mmix_asm_output_addr_vec_elt (STREAM, VALUE)
+
+
+/* Node: Exception Region Output */
+/* (empty) */
+
+/* Node: Alignment Output */
+
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES) \
+ mmix_asm_output_skip (STREAM, NBYTES)
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+ mmix_asm_output_align (STREAM, POWER)
+
+
+/* Node: All Debuggers */
+
+#define DBX_REGISTER_NUMBER(REGNO) \
+ mmix_dbx_register_number (REGNO)
+
+
+/* Node: DBX Options */
+/* (empty) */
+/* Node: DBX Hooks */
+/* (empty) */
+/* Node: File Names and DBX */
+/* (empty) */
+
+
+/* Node: SDB and DWARF */
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+/* Node: Misc */
+
+/* There's no way to get a PC-relative offset into tables for SImode, so
+   for the moment we have absolute entries in DImode.
+   When we're going ELF, these should be SImode and 1.  */
+#define CASE_VECTOR_MODE DImode
+#define CASE_VECTOR_PC_RELATIVE 0
+
+#define WORD_REGISTER_OPERATIONS
+
+/* We have a choice, which makes this yet another parameter to tweak.  The
+   gut feeling is currently that SIGN_EXTEND wins; "int" is more frequent
+   than "unsigned int", and we have signed characters.  FIXME: measure.  */
+#define LOAD_EXTEND_OP(MODE) (TARGET_ZERO_EXTEND ? ZERO_EXTEND : SIGN_EXTEND)
+
+#define MOVE_MAX 8
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* ??? MMIX allows a choice of STORE_FLAG_VALUE.  Revisit later,
+   we don't have scc expanders yet.  */
+
+#define Pmode DImode
+
+#define FUNCTION_MODE QImode
+
+#define NO_IMPLICIT_EXTERN_C
+
+/* mmix-knuth-mmixware target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* These are checked.  */
+#define DOLLARS_IN_IDENTIFIERS 0
+#define NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+#endif /* GCC_MMIX_H */
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc-4.9/gcc/config/mmix/mmix.md b/gcc-4.9/gcc/config/mmix/mmix.md
new file mode 100644
index 000000000..3c74b664f
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/mmix.md
@@ -0,0 +1,1243 @@
+;; GCC machine description for MMIX
+;; Copyright (C) 2000-2014 Free Software Foundation, Inc.
+;; Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et al.
+
+;; Uses of UNSPEC in this file:
+;; UNSPEC_VOLATILE:
+;;
+;;	0	sync_icache (sync icache before trampoline jump)
+;;	1	nonlocal_goto_receiver
+;;
+
+;; The order of insns is as in Node: Standard Names, with smaller modes
+;; before bigger modes.
+
+(define_constants
+  [(MMIX_rJ_REGNUM 259)
+   (MMIX_rR_REGNUM 260)
+   (MMIX_fp_rO_OFFSET -24)]
+)
+
+;; Operand and operator predicates.
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; FIXME: Can we remove the reg-to-reg for smaller modes?  Shouldn't they
+;; be synthesized ok?
+(define_insn "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r ,r,x ,r,r,m,??r")
+	(match_operand:QI 1 "general_operand"	    "r,LS,K,rI,x,m,r,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   LDB%U0 %0,%1
+   STBU %1,%0
+   %r0%I1")
+
+(define_insn "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,x,r,r,m,??r")
+	(match_operand:HI 1 "general_operand"	    "r,LS,K,r,x,m,r,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   LDW%U0 %0,%1
+   STWU %1,%0
+   %r0%I1")
+
+;; gcc.c-torture/compile/920428-2.c fails if there's no "n".
+(define_insn "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r ,r,x,r,r,m,??r")
+	(match_operand:SI 1 "general_operand"	    "r,LS,K,r,x,m,r,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   LDT%U0 %0,%1
+   STTU %1,%0
+   %r0%I1")
+
+;; We assume all "s" are addresses.  Does that hold?
+(define_insn "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r ,r,x,r,m,r,m,r,r,??r")
+	(match_operand:DI 1 "general_operand"	    "r,LS,K,r,x,I,m,r,R,s,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   STCO %1,%0
+   LDO %0,%1
+   STOU %1,%0
+   GETA %0,%1
+   LDA %0,%1
+   %r0%I1")
+
+;; Note that we move around the float as a collection of bits; no
+;; conversion to double.
+(define_insn "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,x,r,r,m,??r")
+       (match_operand:SF 1 "general_operand"	   "r,G,r,x,m,r,F"))]
+  ""
+  "@
+   SET %0,%1
+   SETL %0,0
+   PUT %0,%1
+   GET %0,%1
+   LDT %0,%1
+   STTU %1,%0
+   %r0%I1")
+
+(define_insn "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,x,r,r,m,??r")
+	(match_operand:DF 1 "general_operand"	    "r,G,r,x,m,r,F"))]
+  ""
+  "@
+   SET %0,%1
+   SETL %0,0
+   PUT %0,%1
+   GET %0,%1
+   LDO %0,%1
+   STOU %1,%0
+   %r0%I1")
+
+;; We need to be able to move around the values used as condition codes.
+;; First spotted as reported in
+;; <URL:http://gcc.gnu.org/ml/gcc-bugs/2003-03/msg00008.html> due to
+;; changes in loop optimization.  The file machmode.def says they're of
+;; size 4 QI.  Valid bit-patterns correspond to integers -1, 0 and 1, so
+;; we treat them as signed entities; see mmix-modes.def.  The following
+;; expanders should cover all MODE_CC modes, and expand for this pattern.
+(define_insn "*movcc_expanded"
+  [(set (match_operand 0 "nonimmediate_operand" "=r,x,r,r,m")
+	(match_operand 1 "nonimmediate_operand"  "r,r,x,m,r"))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_CC
+   && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
+  "@
+   SET %0,%1
+   PUT %0,%1
+   GET %0,%1
+   LDT %0,%1
+   STT %1,%0")
+
+(define_expand "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "")
+	(match_operand:CC 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_uns"
+  [(set (match_operand:CC_UNS 0 "nonimmediate_operand" "")
+	(match_operand:CC_UNS 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_fp"
+  [(set (match_operand:CC_FP 0 "nonimmediate_operand" "")
+	(match_operand:CC_FP 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_fpeq"
+  [(set (match_operand:CC_FPEQ 0 "nonimmediate_operand" "")
+	(match_operand:CC_FPEQ 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_fun"
+  [(set (match_operand:CC_FUN 0 "nonimmediate_operand" "")
+	(match_operand:CC_FUN 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand"	"=r,r,r")
+	(plus:DI
+	 (match_operand:DI 1 "register_operand" "%r,r,0")
+	 (match_operand:DI 2 "mmix_reg_or_constant_operand" "rI,K,LS")))]
+  ""
+  "@
+   ADDU %0,%1,%2
+   SUBU %0,%1,%n2
+   %i2 %0,%v2")
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(plus:DF (match_operand:DF 1 "register_operand" "%r")
+		 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FADD %0,%1,%2")
+
+;; Insn canonicalization *should* have removed the need for an integer
+;; in operand 2.
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "mmix_reg_or_8bit_operand" "r,I")
+		  (match_operand:DI 2 "register_operand" "r,r")))]
+  ""
+  "@
+   SUBU %0,%1,%2
+   NEGU %0,%1,%2")
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(minus:DF (match_operand:DF 1 "register_operand" "r")
+		  (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FSUB %0,%1,%2")
+
+;; FIXME: Should we define_expand and match 2, 4, 8 (etc) with shift (or
+;; %{something}2ADDU %0,%1,0)?  Hopefully GCC should still handle it, so
+;; we don't have to taint the machine description.  If results are bad
+;; enough, we may have to do it anyway.
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(mult:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "O,rI")))
+   (clobber (match_scratch:DI 3 "=X,z"))]
+  ""
+  "@
+   %m2ADDU %0,%1,%1
+   MULU %0,%1,%2")
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(mult:DF (match_operand:DF 1 "register_operand" "r")
+		 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FMUL %0,%1,%2")
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(div:DF (match_operand:DF 1 "register_operand" "r")
+		(match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FDIV %0,%1,%2")
+
+;; FIXME: Is "frem" doing the right operation for moddf3?
+(define_insn "moddf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(mod:DF (match_operand:DF 1 "register_operand" "r")
+		(match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FREM %0,%1,%2")
+
+;; FIXME: Should we define_expand for smin, smax, umin, umax using a
+;; nifty conditional sequence?
+
+;; FIXME: The cuter andn combinations don't get here, presumably because
+;; they ended up in the constant pool.  Check: still?
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(and:DI
+	 (match_operand:DI 1 "register_operand" "%r,0")
+	 (match_operand:DI 2 "mmix_reg_or_constant_operand" "rI,NT")))]
+  ""
+  "@
+   AND %0,%1,%2
+   %A2 %0,%V2")
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "register_operand" "%r,0")
+		(match_operand:DI 2 "mmix_reg_or_constant_operand" "rI,LS")))]
+  ""
+  "@
+   OR %0,%1,%2
+   %o2 %0,%v2")
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "XOR %0,%1,%2")
+
+;; FIXME:  When TImode works for other reasons (like cross-compiling from
+;; a 32-bit host), add back umulditi3 and umuldi3_highpart here.
+
+;; FIXME: Check what's really reasonable for the mod part.
+
+;; One day we might persuade GCC to expand divisions with constants the
+;; way MMIX does; giving the remainder the sign of the divisor.  But even
+;; then, it might be good to have an option to divide the way "everybody
+;; else" does.  Perhaps then, this option can be on by default.  However,
+;; it's not likely to happen because major (C, C++, Fortran) language
+;; standards in effect at 2002-04-29 reportedly demand that the sign of
+;; the remainder must follow the sign of the dividend.
+
+(define_insn "divmoddi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(div:DI (match_operand:DI 1 "register_operand" "r")
+		(match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))
+   (set (match_operand:DI 3 "register_operand" "=y")
+	(mod:DI (match_dup 1) (match_dup 2)))]
+  ;; Do the library stuff later.
+  "TARGET_KNUTH_DIVISION"
+  "DIV %0,%1,%2")
+
+(define_insn "udivmoddi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(udiv:DI (match_operand:DI 1 "register_operand" "r")
+		 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))
+   (set (match_operand:DI 3 "register_operand" "=y")
+	(umod:DI (match_dup 1) (match_dup 2)))]
+  ""
+  "DIVU %0,%1,%2")
+
+(define_expand "divdi3"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=&r")
+	  (div:DI (match_operand:DI 1 "register_operand" "r")
+		  (match_operand:DI 2 "register_operand" "r")))
+     (clobber (scratch:DI))
+     (clobber (scratch:DI))
+     (clobber (reg:DI MMIX_rR_REGNUM))])]
+  "! TARGET_KNUTH_DIVISION"
+  "")
+
+;; The %2-is-%1-case is there just to make sure things don't fail.  Could
+;; presumably happen with optimizations off; no evidence.
+(define_insn "*divdi3_nonknuth"
+  [(set (match_operand:DI 0 "register_operand" "=&r,&r")
+	(div:DI (match_operand:DI 1 "register_operand" "r,r")
+		(match_operand:DI 2 "register_operand" "1,r")))
+   (clobber (match_scratch:DI 3 "=1,1"))
+   (clobber (match_scratch:DI 4 "=2,2"))
+   (clobber (reg:DI MMIX_rR_REGNUM))]
+  "! TARGET_KNUTH_DIVISION"
+  "@
+   SETL %0,1
+   XOR $255,%1,%2\;NEGU %0,0,%2\;CSN %2,%2,%0\;NEGU %0,0,%1\;CSN %1,%1,%0\;\
+DIVU %0,%1,%2\;NEGU %1,0,%0\;CSN %0,$255,%1")
+
+(define_expand "moddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=&r")
+	  (mod:DI (match_operand:DI 1 "register_operand" "r")
+		  (match_operand:DI 2 "register_operand" "r")))
+     (clobber (scratch:DI))
+     (clobber (scratch:DI))
+     (clobber (reg:DI MMIX_rR_REGNUM))])]
+  "! TARGET_KNUTH_DIVISION"
+  "")
+
+;; The %2-is-%1-case is there just to make sure things don't fail.  Could
+;; presumably happen with optimizations off; no evidence.
+(define_insn "*moddi3_nonknuth"
+  [(set (match_operand:DI 0 "register_operand" "=&r,&r")
+	(mod:DI (match_operand:DI 1 "register_operand" "r,r")
+		(match_operand:DI 2 "register_operand" "1,r")))
+   (clobber (match_scratch:DI 3 "=1,1"))
+   (clobber (match_scratch:DI 4 "=2,2"))
+   (clobber (reg:DI MMIX_rR_REGNUM))]
+  "! TARGET_KNUTH_DIVISION"
+  "@
+   SETL %0,0
+   NEGU %0,0,%2\;CSN %2,%2,%0\;NEGU $255,0,%1\;CSN %1,%1,$255\;\
+DIVU %1,%1,%2\;GET %0,:rR\;NEGU %2,0,%0\;CSNN %0,$255,%2")
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "SLU %0,%1,%2")
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "SR %0,%1,%2")
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "SRU %0,%1,%2")
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "NEGU %0,0,%1")
+
+(define_expand "negdf2"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=r")
+                   (neg:DF (match_operand:DF 1 "register_operand" "r")))
+              (use (match_dup 2))])]
+  ""
+{
+  /* Emit bit-flipping sequence to be IEEE-safe wrt. -+0.  */
+  operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));
+})
+
+(define_insn "*expanded_negdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+        (neg:DF (match_operand:DF 1 "register_operand" "r")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  ""
+  "XOR %0,%1,%2")
+
+;; FIXME: define_expand for absdi2?
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(abs:DF (match_operand:DF 1 "register_operand" "0")))]
+  ""
+  "ANDNH %0,#8000")
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "r")))]
+  ""
+  "FSQRT %0,%1")
+
+;; FIXME: define_expand for ffssi2? (not ffsdi2 since int is SImode).
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "NOR %0,%1,0")
+
+;; When the user-patterns expand, the resulting insns will match the
+;; patterns below.
+
+;; We can fold the signed-compare where the register value is
+;; already equal to (compare:CCTYPE (reg) (const_int 0)).
+;;  We can't do that at all for floating-point, due to NaN, +0.0
+;; and -0.0, and we can only do it for the non/zero test of
+;; unsigned, so that has to be done another way.
+;;  FIXME: Perhaps a peep2 changing CCcode to a new code, that
+;; gets folded here.
+(define_insn "*cmpdi_folded"
+  [(set (match_operand:CC 0 "register_operand" "=r")
+	(compare:CC
+	 (match_operand:DI 1 "register_operand" "r")
+	 (const_int 0)))]
+  ;; FIXME: Can we test equivalence any other way?
+  ;; FIXME: Can we fold any other way?
+  "REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[1]) == REGNO (operands[0])"
+  "%% folded: cmp %0,%1,0")
+
+(define_insn "*cmps"
+  [(set (match_operand:CC 0 "register_operand" "=r")
+	(compare:CC
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "CMP %0,%1,%2")
+
+(define_insn "*cmpu"
+  [(set (match_operand:CC_UNS 0 "register_operand" "=r")
+	(compare:CC_UNS
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "CMPU %0,%1,%2")
+
+(define_insn "*fcmp"
+  [(set (match_operand:CC_FP 0 "register_operand" "=r")
+	(compare:CC_FP
+	 (match_operand:DF 1 "register_operand" "r")
+	 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FCMP%e0 %0,%1,%2")
+
+;; FIXME: for -mieee, add fsub %0,%1,%1\;fsub %0,%2,%2 before to
+;; make signalling compliant.
+(define_insn "*feql"
+  [(set (match_operand:CC_FPEQ 0 "register_operand" "=r")
+	(compare:CC_FPEQ
+	 (match_operand:DF 1 "register_operand" "r")
+	 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FEQL%e0 %0,%1,%2")
+
+(define_insn "*fun"
+  [(set (match_operand:CC_FUN 0 "register_operand" "=r")
+	(compare:CC_FUN
+	 (match_operand:DF 1 "register_operand" "r")
+	 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FUN%e0 %0,%1,%2")
+
+;; In order to get correct rounding, we have to use SFLOT and SFLOTU for
+;; conversion.  They do not convert to SFmode; they convert to DFmode,
+;; with rounding as of SFmode.  They are not usable as is, but we pretend
+;; we have a single instruction but emit two.
+
+;; Note that this will (somewhat unexpectedly) create an inexact
+;; exception if rounding is necessary - has to be masked off in crt0?
+(define_expand "floatdisf2"
+  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+		   (float:SF
+		    (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+	      ;; Let's use a DI scratch, since SF don't generally get into
+	      ;; registers.  Dunno what's best; it's really a DF, but that
+	      ;; doesn't logically follow from operands in the pattern.
+	      (clobber (match_scratch:DI 2 "=&r"))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != MEM)
+    {
+      rtx stack_slot;
+
+      /* FIXME: This stack-slot remains even at -O3.  There must be a
+	 better way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode)));
+      emit_insn (gen_floatdisf2 (stack_slot, operands[1]));
+      emit_move_insn (operands[0], stack_slot);
+      DONE;
+    }
+}")
+
+(define_insn "*floatdisf2_real"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float:SF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 2 "=&r"))]
+  ""
+  "SFLOT %2,%1\;STSF %2,%0")
+
+(define_expand "floatunsdisf2"
+  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+		   (unsigned_float:SF
+		    (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+	      ;; Let's use a DI scratch, since SF don't generally get into
+	      ;; registers.  Dunno what's best; it's really a DF, but that
+	      ;; doesn't logically follow from operands in the pattern.
+	      (clobber (scratch:DI))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != MEM)
+    {
+      rtx stack_slot;
+
+      /* FIXME: This stack-slot remains even at -O3.  Must be a better
+	 way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode)));
+      emit_insn (gen_floatunsdisf2 (stack_slot, operands[1]));
+      emit_move_insn (operands[0], stack_slot);
+      DONE;
+    }
+}")
+
+(define_insn "*floatunsdisf2_real"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(unsigned_float:SF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 2 "=&r"))]
+  ""
+  "SFLOTU %2,%1\;STSF %2,%0")
+
+;; Note that this will (somewhat unexpectedly) create an inexact
+;; exception if rounding is necessary - has to be masked off in crt0?
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(float:DF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "FLOT %0,%1")
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unsigned_float:DF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "FLOTU %0,%1")
+
+(define_insn "ftruncdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(fix:DF (match_operand:DF 1 "register_operand" "r")))]
+  ""
+  ;; ROUND_OFF
+  "FINT %0,1,%1")
+
+;; Note that this will (somewhat unexpectedly) create an inexact
+;; exception if rounding is necessary - has to be masked off in crt0?
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "r"))))]
+  ""
+  ;; ROUND_OFF
+  "FIX %0,1,%1")
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	 (fix:DF (match_operand:DF 1 "register_operand" "r"))))]
+  ""
+  ;; ROUND_OFF
+  "FIXU %0,1,%1")
+
+;; It doesn't seem like it's possible to have memory_operand as a
+;; predicate here (testcase: libgcc2 floathisf).  FIXME:  Shouldn't it be
+;; possible to do that?  Bug in GCC?  Anyway, this used to be a simple
+;; pattern with a memory_operand predicate, but was split up with a
+;; define_expand with the old pattern as "anonymous".
+;; FIXME: Perhaps with SECONDARY_MEMORY_NEEDED?
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(float_truncate:SF (match_operand:DF 1 "register_operand")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != MEM)
+    {
+      /* FIXME: There should be a way to say: 'put this in operands[0]
+	 but *after* the expanded insn'.  */
+      rtx stack_slot;
+
+      /* There is no sane destination but a register here, if it wasn't
+	 already MEM.  (It's too hard to get fatal_insn to work here.)  */
+      if (! REG_P (operands[0]))
+	internal_error (\"MMIX Internal: Bad truncdfsf2 expansion\");
+
+      /* FIXME: This stack-slot remains even at -O3.  Must be a better
+	 way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode)));
+      emit_insn (gen_truncdfsf2 (stack_slot, operands[1]));
+      emit_move_insn (operands[0], stack_slot);
+      DONE;
+    }
+}")
+
+(define_insn "*truncdfsf2_real"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "r")))]
+  ""
+  "STSF %1,%0")
+
+;; Same comment as for truncdfsf2.
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx stack_slot;
+
+      /* There is no sane destination but a register here, if it wasn't
+	 already MEM.  (It's too hard to get fatal_insn to work here.)  */
+      if (! REG_P (operands[0]))
+	internal_error (\"MMIX Internal: Bad extendsfdf2 expansion\");
+
+      /* FIXME: This stack-slot remains even at -O3.  There must be a
+	 better way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode)));
+      emit_move_insn (stack_slot, operands[1]);
+      emit_insn (gen_extendsfdf2 (operands[0], stack_slot));
+      DONE;
+    }
+}")
+
+(define_insn "*extendsfdf2_real"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(float_extend:DF (match_operand:SF 1 "memory_operand" "m")))]
+  ""
+  "LDSF %0,%1")
+
+;; Neither sign-extend nor zero-extend are necessary; gcc knows how to
+;; synthesize using shifts or and, except with a memory source and not
+;; completely optimal.  FIXME: Actually, other bugs surface when those
+;; patterns are defined; fix later.
+
+;; There are no sane values with the bit-patterns of (int) 0..255 except
+;; 0 to use in movdfcc.
+
+(define_expand "movdfcc"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_operand:DF 0 "register_operand" "")
+	(if_then_else:DF
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:DF 2 "mmix_reg_or_0_operand" "")
+	 (match_operand:DF 3 "mmix_reg_or_0_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  if (code == LE || code == GE)
+    FAIL;
+
+  operands[4] = mmix_gen_compare_reg (code, XEXP (operands[1], 0),
+				      XEXP (operands[1], 1));
+  operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				 XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[4], const0_rtx);
+}")
+
+(define_expand "movdicc"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "")
+	 (match_operand:DI 3 "mmix_reg_or_8bit_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  if (code == LE || code == GE)
+    FAIL;
+
+  operands[4] = mmix_gen_compare_reg (code, XEXP (operands[1], 0),
+				      XEXP (operands[1], 1));
+  operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				 XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[4], const0_rtx);
+}")
+
+;; FIXME: Is this the right way to do "folding" of CCmode -> DImode?
+(define_insn "*movdicc_real_foldable"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 2 "mmix_foldable_comparison_operator"
+			 [(match_operand:DI 3 "register_operand" "r,r,r,r")
+			  (const_int 0)])
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI,0 ,rI,GM")
+	 (match_operand:DI 4 "mmix_reg_or_8bit_operand" "0 ,rI,GM,rI")))]
+  ""
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdicc_real_reversible"
+  [(set
+    (match_operand:DI 0 "register_operand"	   "=r ,r ,r ,r")
+    (if_then_else:DI
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	    "r ,r ,r ,r")
+      (const_int 0)])
+     (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI,0 ,rI,GM")
+     (match_operand:DI 4 "mmix_reg_or_8bit_operand" "0 ,rI,GM,rI")))]
+  "REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdicc_real_nonreversible"
+  [(set
+    (match_operand:DI 0 "register_operand"	   "=r ,r")
+    (if_then_else:DI
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	    "r ,r")
+      (const_int 0)])
+     (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI,rI")
+     (match_operand:DI 4 "mmix_reg_or_0_operand" "0 ,GM")))]
+  "!REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   ZS%d2 %0,%3,%1")
+
+(define_insn "*movdfcc_real_foldable"
+  [(set
+    (match_operand:DF 0 "register_operand"	"=r  ,r  ,r  ,r")
+    (if_then_else:DF
+     (match_operator
+      2 "mmix_foldable_comparison_operator"
+      [(match_operand:DI 3 "register_operand"	 "r  ,r  ,r  ,r")
+      (const_int 0)])
+     (match_operand:DF 1 "mmix_reg_or_0_operand" "rGM,0  ,rGM,GM")
+     (match_operand:DF 4 "mmix_reg_or_0_operand" "0  ,rGM,GM ,rGM")))]
+  ""
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdfcc_real_reversible"
+  [(set
+    (match_operand:DF 0 "register_operand"	"=r  ,r  ,r  ,r")
+    (if_then_else:DF
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	 "r  ,r  ,r  ,r")
+      (const_int 0)])
+     (match_operand:DF 1 "mmix_reg_or_0_operand" "rGM,0  ,rGM,GM")
+     (match_operand:DF 4 "mmix_reg_or_0_operand" "0  ,rGM,GM ,rGM")))]
+  "REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdfcc_real_nonreversible"
+  [(set
+    (match_operand:DF 0 "register_operand"	"=r  ,r")
+    (if_then_else:DF
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	 "r  ,r")
+      (const_int 0)])
+     (match_operand:DF 1 "mmix_reg_or_0_operand" "rGM,rGM")
+     (match_operand:DF 4 "mmix_reg_or_0_operand" "0  ,GM")))]
+  "!REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   ZS%d2 %0,%3,%1")
+
+;; FIXME: scc insns will probably help, I just skip them
+;; right now.  Revisit.
+
+(define_expand "cbranchdi4"
+  [(set (match_dup 4)
+        (match_op_dup 5
+         [(match_operand:DI 1 "register_operand" "")
+          (match_operand:DI 2 "mmix_reg_or_8bit_operand" "")]))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator"
+               [(match_dup 4)
+                (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  ""
+  "
+{
+  operands[4] = mmix_gen_compare_reg (GET_CODE (operands[0]),
+                                      operands[1], operands[2]);
+  operands[5] = gen_rtx_fmt_ee (COMPARE,
+                                GET_MODE (operands[4]),
+                                operands[1], operands[2]);
+}")
+
+(define_expand "cbranchdf4"
+  [(set (match_dup 4)
+        (match_op_dup 5
+         [(match_operand:DF 1 "register_operand" "")
+          (match_operand:DF 2 "register_operand" "")]))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "float_comparison_operator"
+               [(match_dup 4)
+                (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  ""
+  "
+{
+  /* The head comment of optabs.c:can_compare_p says we're required to
+     implement this, so we have to clean up the mess here.  */
+  if (GET_CODE (operands[0]) == LE || GET_CODE (operands[0]) == GE)
+    {
+      enum rtx_code ltgt_code = GET_CODE (operands[0]) == LE ? LT : GT;
+      emit_cmp_and_jump_insns (operands[1], operands[2], ltgt_code, NULL_RTX,
+			       DFmode, 0, operands[3]);
+      emit_cmp_and_jump_insns (operands[1], operands[2], EQ, NULL_RTX,
+			       DFmode, 0, operands[3]);
+      DONE;
+    }
+
+  operands[4] = mmix_gen_compare_reg (GET_CODE (operands[0]),
+                                      operands[1], operands[2]);
+  operands[5] = gen_rtx_fmt_ee (COMPARE,
+                                GET_MODE (operands[4]),
+                                operands[1], operands[2]);
+}")
+
+
+;; FIXME: we can emit an unordered-or-*not*-equal compare in one insn, but
+;; there's no RTL code for it.  Maybe revisit in future.
+
+;; FIXME: Odd/Even matchers?
+(define_insn "*bCC_foldable"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_foldable_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "%+B%d1 %2,%0")
+
+(define_insn "*bCC"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_comparison_operator"
+			 [(match_operand 2 "mmix_reg_cc_operand" "r")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "%+B%d1 %2,%0")
+
+(define_insn "*bCC_inverted_foldable"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_foldable_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+;; REVERSIBLE_CC_MODE is checked by mmix_foldable_comparison_operator.
+  ""
+  "%+B%D1 %2,%0")
+
+(define_insn "*bCC_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_comparison_operator"
+			 [(match_operand 2 "mmix_reg_cc_operand" "r")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "REVERSIBLE_CC_MODE (GET_MODE (operands[2]))"
+  "%+B%D1 %2,%0")
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "general_operand" ""))
+	      (clobber (match_dup 4))])
+   (set (match_dup 4) (match_dup 3))]
+  ""
+  "
+{
+  /* The caller checks that the operand is generally valid as an
+     address, but at -O0 nothing makes sure that it's also a valid
+     call address for a *call*; a mmix_symbolic_or_address_operand.
+     Force into a register if it isn't.  */
+  if (!mmix_symbolic_or_address_operand (XEXP (operands[0], 0),
+					 GET_MODE (XEXP (operands[0], 0))))
+    operands[0]
+      = replace_equiv_address (operands[0],
+			       force_reg (Pmode, XEXP (operands[0], 0)));
+
+  /* Since the epilogue 'uses' the return address, and it is clobbered
+     in the call, and we set it back after every call (all but one setting
+     will be optimized away), integrity is maintained.  */
+  operands[3]
+    = mmix_get_hard_reg_initial_val (Pmode,
+				     MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+
+  /* FIXME: There's a bug in gcc which causes NULL to be passed as
+     operand[2] when we get out of registers, which later confuses gcc.
+     Work around it by replacing it with const_int 0.  Possibly documentation
+     error too.  */
+  if (operands[2] == NULL_RTX)
+    operands[2] = const0_rtx;
+
+  operands[4] = gen_rtx_REG (DImode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+}")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "general_operand" ""))
+	      (clobber (match_dup 5))])
+   (set (match_dup 5) (match_dup 4))]
+  ""
+  "
+{
+  /* The caller checks that the operand is generally valid as an
+     address, but at -O0 nothing makes sure that it's also a valid
+     call address for a *call*; a mmix_symbolic_or_address_operand.
+     Force into a register if it isn't.  */
+  if (!mmix_symbolic_or_address_operand (XEXP (operands[1], 0),
+					 GET_MODE (XEXP (operands[1], 0))))
+    operands[1]
+      = replace_equiv_address (operands[1],
+			       force_reg (Pmode, XEXP (operands[1], 0)));
+
+  /* Since the epilogue 'uses' the return address, and it is clobbered
+     in the call, and we set it back after every call (all but one setting
+     will be optimized away), integrity is maintained.  */
+  operands[4]
+    = mmix_get_hard_reg_initial_val (Pmode,
+				     MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+
+  /* FIXME: See 'call'.  */
+  if (operands[3] == NULL_RTX)
+    operands[3] = const0_rtx;
+
+  /* FIXME: Documentation bug: operands[3] (operands[2] for 'call') is the
+     *next* argument register, not the number of arguments in registers.
+     (There used to be code here where that mattered.)  */
+
+  operands[5] = gen_rtx_REG (DImode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+}")
+
+;; Don't use 'p' here.  A 'p' must stand first in constraints, or reload
+;; messes up, not registering the address for reload.  Several C++
+;; testcases, including g++.brendan/crash40.C.  FIXME: This is arguably a
+;; bug in gcc.  Note line ~2612 in reload.c, that does things on the
+;; condition <<else if (constraints[i][0] == 'p')>> and the comment on
+;; ~3017 that says:
+;; <<   case 'p':
+;;	     /* All necessary reloads for an address_operand
+;;	        were handled in find_reloads_address.  */>>
+;; Sorry, I have not dug deeper.  If symbolic addresses are used
+;; rarely compared to addresses in registers, disparaging the
+;; first ("p") alternative by adding ? in the first operand
+;; might do the trick.  We define 'U' as a synonym to 'p', but without the
+;; caveats (and very small advantages) of 'p'.
+;; As of r190682 still so: newlib/libc/stdlib/dtoa.c ICEs if "p" is used.
+(define_insn "*call_real"
+  [(call (mem:QI
+	  (match_operand:DI 0 "mmix_symbolic_or_address_operand" "s,rU"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:DI MMIX_rJ_REGNUM))]
+  ""
+  "@
+   PUSHJ $%p2,%0
+   PUSHGO $%p2,%a0")
+
+(define_insn "*call_value_real"
+  [(set (match_operand 0 "register_operand" "=r,r")
+	(call (mem:QI
+	       (match_operand:DI 1 "mmix_symbolic_or_address_operand" "s,rU"))
+	      (match_operand 2 "" "")))
+  (use (match_operand 3 "" ""))
+  (clobber (reg:DI MMIX_rJ_REGNUM))]
+  ""
+  "@
+   PUSHJ $%p3,%1
+   PUSHGO $%p3,%a1")
+
+;; I hope untyped_call and untyped_return are not needed for MMIX.
+;; Users of Objective-C will notice.
+
+; Generated by GCC.
+(define_expand "return"
+  [(return)]
+  "mmix_use_simple_return ()"
+  "")
+
+; Generated by the epilogue expander.
+(define_insn "*expanded_return"
+  [(return)]
+  ""
+  "POP %.,0")
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "mmix_expand_prologue (); DONE;")
+
+; Note that the (return) from the expander itself is always the last insn
+; in the epilogue.
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "mmix_expand_epilogue ();")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "SWYM 0,0,0")
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "JMP %0")
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand 0 "address_operand" "p"))]
+  ""
+  "GO $255,%a0")
+
+;; FIXME: This is just a jump, and should be expanded to one.
+(define_insn "tablejump"
+  [(set (pc) (match_operand:DI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "GO $255,%a0")
+
+;; The only peculiar thing is that the register stack has to be unwound at
+;; nonlocal_goto_receiver.  At each function that has a nonlocal label, we
+;; save at function entry the location of the "alpha" register stack
+;; pointer, rO, in a stack slot known to that function (right below where
+;; the frame-pointer would be located).
+;; In the nonlocal goto receiver, we unwind the register stack by a series
+;; of "pop 0,0" until rO equals the saved value.  (If it goes lower, we
+;; should die with a trap.)
+(define_expand "nonlocal_goto_receiver"
+  [(parallel [(unspec_volatile [(match_dup 1)] 1)
+	      (clobber (scratch:DI))
+	      (clobber (reg:DI MMIX_rJ_REGNUM))])
+   (set (reg:DI MMIX_rJ_REGNUM) (match_dup 0))]
+  ""
+  "
+{
+  operands[0]
+    = mmix_get_hard_reg_initial_val (Pmode,
+				     MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+
+  /* We need the frame-pointer to be live or the equivalent
+     expression, so refer to in in the pattern.  We can't use a MEM
+     (that may contain out-of-range offsets in the final expression)
+     for fear that middle-end will legitimize it or replace the address
+     using temporary registers (which are not revived at this point).  */
+  operands[1] = frame_pointer_rtx;
+
+  /* Mark this function as containing a landing-pad.  */
+  cfun->machine->has_landing_pad = 1;
+}")
+
+;; GCC can insist on using saved registers to keep the slot address in
+;; "across" the exception, or (perhaps) to use saved registers in the
+;; address and re-use them after the register stack unwind, so it's best
+;; to form the address ourselves.
+(define_insn "*nonlocal_goto_receiver_expanded"
+  [(unspec_volatile [(match_operand:DI 1 "frame_pointer_operand" "Yf")] 1)
+   (clobber (match_scratch:DI 0 "=&r"))
+   (clobber (reg:DI MMIX_rJ_REGNUM))]
+  ""
+{
+  rtx my_operands[3];
+  const char *my_template
+    = "GETA $255,0f\;PUT rJ,$255\;LDOU $255,%a0\n\
+0:\;GET %1,rO\;CMPU %1,%1,$255\;BNP %1,1f\;POP 0,0\n1:";
+
+  my_operands[1] = operands[0];
+  my_operands[2] = GEN_INT (-MMIX_fp_rO_OFFSET);
+
+  if (operands[1] == hard_frame_pointer_rtx)
+    {
+      mmix_output_register_setting (asm_out_file, REGNO (operands[0]),
+				    MMIX_fp_rO_OFFSET, 1);
+      my_operands[0]
+	= gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, operands[0]);
+    }
+  else
+    {
+      HOST_WIDEST_INT offs = INTVAL (XEXP (operands[1], 1));
+      offs += MMIX_fp_rO_OFFSET;
+
+      if (insn_const_int_ok_for_constraint (offs, CONSTRAINT_I))
+	my_operands[0]
+	  = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offs));
+      else
+	{
+	  mmix_output_register_setting (asm_out_file, REGNO (operands[0]),
+					offs, 1);
+	  my_operands[0]
+	    = gen_rtx_PLUS (Pmode, stack_pointer_rtx, operands[0]);
+	}
+    }
+
+  output_asm_insn (my_template, my_operands);
+  return "";
+})
+
+(define_insn "*Naddu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
+			  (match_operand:DI 2 "const_int_operand" "n"))
+		 (match_operand:DI 3 "mmix_reg_or_8bit_operand" "rI")))]
+  "GET_CODE (operands[2]) == CONST_INT
+   && (INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 4
+       || INTVAL (operands[2]) == 8
+       || INTVAL (operands[2]) == 16)"
+  "%2ADDU %0,%1,%3")
+
+(define_insn "*andn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	 (not:DI (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI"))
+	 (match_operand:DI 2 "register_operand" "r")))]
+  ""
+  "ANDN %0,%2,%1")
+
+(define_insn "*nand"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI
+	 (not:DI (match_operand:DI 1 "register_operand" "%r"))
+	 (not:DI (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI"))))]
+  ""
+  "NAND %0,%1,%2")
+
+(define_insn "*nor"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	 (not:DI (match_operand:DI 1 "register_operand" "%r"))
+	 (not:DI (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI"))))]
+  ""
+  "NOR %0,%1,%2")
+
+(define_insn "*nxor"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI
+	 (xor:DI (match_operand:DI 1 "register_operand" "%r")
+		 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI"))))]
+  ""
+  "NXOR %0,%1,%2")
+
+(define_insn "sync_icache"
+  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")
+		     (match_operand:DI 1 "const_int_operand" "I")] 0)]
+  ""
+  "SYNCID %1,%0")
+
+;; Local Variables:
+;; mode: lisp
+;; indent-tabs-mode: t
+;; End:
diff --git a/gcc-4.9/gcc/config/mmix/mmix.opt b/gcc-4.9/gcc/config/mmix/mmix.opt
new file mode 100644
index 000000000..1a003baae
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/mmix.opt
@@ -0,0 +1,99 @@
+; Options for the MMIX port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; FIXME: Get rid of this one.
+mlibfuncs
+Target Report Mask(LIBFUNC)
+For intrinsics library: pass all parameters in registers
+
+mabi=mmixware
+Target Report RejectNegative InverseMask(ABI_GNU)
+Use register stack for parameters and return value
+
+mabi=gnu
+Target Report RejectNegative Mask(ABI_GNU)
+Use call-clobbered registers for parameters and return value
+
+; FIXME: Provide a way to *load* the epsilon register.
+mepsilon
+Target Report Mask(FCMP_EPSILON)
+Use epsilon-respecting floating point compare instructions
+
+mzero-extend
+Target Report Mask(ZERO_EXTEND)
+Use zero-extending memory loads, not sign-extending ones
+
+mknuthdiv
+Target Report Mask(KNUTH_DIVISION)
+Generate divide results with reminder having the same sign as the divisor (not the dividend)
+
+mtoplevel-symbols
+Target Report Mask(TOPLEVEL_SYMBOLS)
+Prepend global symbols with \":\" (for use with PREFIX)
+
+mno-set-program-start
+Target Report RejectNegative
+Do not provide a default start-address 0x100 of the program
+
+melf
+Target Report RejectNegative
+Link to emit program in ELF format (rather than mmo)
+
+mbranch-predict
+Target Report RejectNegative Mask(BRANCH_PREDICT)
+Use P-mnemonics for branches statically predicted as taken
+
+mno-branch-predict
+Target Report RejectNegative InverseMask(BRANCH_PREDICT)
+Don't use P-mnemonics for branches
+
+; We use the term "base address" since that's what Knuth uses.  The base
+; address goes in a global register.  When addressing, it's more like
+; "base address plus offset", with the offset being 0..255 from the base,
+; which itself can be a symbol plus an offset.  The effect is like having
+; a constant pool in global registers, code offsetting from those
+; registers (automatically causing a request for a suitable constant base
+; address register) without having to know the specific register or the
+; specific offset.  The setback is that there's a limited number of
+; registers, and you'll not find out until link time whether you
+; should have compiled with -mno-base-addresses.
+mbase-addresses
+Target Report RejectNegative Mask(BASE_ADDRESSES)
+Use addresses that allocate global registers
+
+mno-base-addresses
+Target Report RejectNegative InverseMask(BASE_ADDRESSES)
+Do not use addresses that allocate global registers
+
+msingle-exit
+Target Report RejectNegative InverseMask(USE_RETURN_INSN)
+Generate a single exit point for each function
+
+mno-single-exit
+Target Report RejectNegative Mask(USE_RETURN_INSN)
+Do not generate a single exit point for each function
+
+mset-program-start=
+Target Report RejectNegative Joined
+Set start-address of the program
+
+mset-data-start=
+Target Report RejectNegative Joined
+Set start-address of data
diff --git a/gcc-4.9/gcc/config/mmix/predicates.md b/gcc-4.9/gcc/config/mmix/predicates.md
new file mode 100644
index 000000000..df5d1f21b
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/predicates.md
@@ -0,0 +1,174 @@
+;; Operand and operator predicates for the GCC MMIX port.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; LE and GE are further lowered by the cbranchdf4 pattern.
+(define_predicate "float_comparison_operator"
+  (match_code "ne, eq, le, ge, lt, gt, ordered, unordered"))
+
+;; True if this is a foldable comparison operator
+;; - one where a the result of (compare:CC (reg) (const_int 0)) can be
+;; replaced by (reg).  */
+
+(define_predicate "mmix_foldable_comparison_operator"
+  (match_code "ne, eq, ge, gt, le, lt, gtu, leu")
+{
+  RTX_CODE code = GET_CODE (op);
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* This little bit is why the body of this predicate is kept as C.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (XEXP (op, 0));
+
+  return ((mode == CCmode || mode == DImode)
+	  && (code == NE || code == EQ || code == GE || code == GT
+	      || code == LE || code == LT))
+    /* FIXME: This may be a stupid trick.  What happens when GCC wants to
+       reverse the condition?  Can it do that by itself?  Maybe it can
+       even reverse the condition to fit a foldable one in the first
+       place?  */
+    || (mode == CC_UNSmode && (code == GTU || code == LEU));
+})
+
+;; Like comparison_operator, but only true if this comparison operator is
+;; applied to a valid mode.  Needed to avoid jump.c generating invalid
+;; code with -ffast-math (gcc.dg/20001228-1.c).
+
+(define_predicate "mmix_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  RTX_CODE code = GET_CODE (op);
+
+  /* Comparison operators usually don't have a mode, but let's try and get
+     one anyway for the day that changes.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* Get the mode from the first operand if we don't have one.
+     Also the reason why we do this in C.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (XEXP (op, 0));
+
+  /* FIXME: This needs to be kept in sync with the tables in
+     mmix_output_condition.  */
+  return
+    mode == VOIDmode
+    || (mode == CC_FUNmode
+	&& (code == ORDERED || code == UNORDERED))
+    || (mode == CC_FPmode
+	&& (code == GT || code == LT))
+    || (mode == CC_FPEQmode
+	&& (code == NE || code == EQ))
+    || (mode == CC_UNSmode
+	&& (code == GEU || code == GTU || code == LEU || code == LTU))
+    || (mode == CCmode
+	&& (code == NE || code == EQ || code == GE || code == GT
+	    || code == LE || code == LT))
+    || (mode == DImode
+	&& (code == NE || code == EQ || code == GE || code == GT
+	    || code == LE || code == LT || code == LEU || code == GTU));
+})
+
+;; True if this is a register with a condition-code mode.
+
+(define_predicate "mmix_reg_cc_operand"
+  (and (match_operand 0 "register_operand")
+       (ior (match_test "GET_MODE (op) == CCmode")
+	    (ior (match_test "GET_MODE (op) == CC_UNSmode")
+		 (ior (match_test "GET_MODE (op) == CC_FPmode")
+		      (ior (match_test "GET_MODE (op) == CC_FPEQmode")
+			   (match_test "GET_MODE (op) == CC_FUNmode")))))))
+
+;; True if this is an address_operand or a symbolic operand.
+
+(define_predicate "mmix_symbolic_or_address_operand"
+  (match_code "symbol_ref, label_ref, const, subreg, reg, plus")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      /* The reason why this body still is C.  */
+      op = XEXP (op, 0);
+      if ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	   || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	  && (GET_CODE (XEXP (op, 1)) == CONST_INT
+	      || (GET_CODE (XEXP (op, 1)) == CONST_DOUBLE
+		  && GET_MODE (XEXP (op, 1)) == VOIDmode)))
+	return 1;
+      /* Fall through.  */
+    default:
+      return mmix_address_operand (op, mode);
+    }
+})
+
+;; True if this is a register or CONST_INT (or CONST_DOUBLE for DImode).
+;; We could narrow the value down with a couple of predicates, but that
+;; doesn't seem to be worth it at the moment.
+
+(define_predicate "mmix_reg_or_constant_operand"
+  (ior (match_operand 0 "register_operand")
+       (ior (match_code "const_int")
+	    (and (match_code "const_double")
+		 (match_test "GET_MODE (op) == VOIDmode")))))
+
+;; True if this is a register or 0 (int or float).
+
+(define_predicate "mmix_reg_or_0_operand"
+  (ior
+   (match_operand 0 "register_operand")
+   (ior
+    (and (match_code "const_int")
+	 (match_test "op == const0_rtx"))
+    (and
+     (match_code "const_double")
+     ;; FIXME: Is mode calculation necessary and correct?
+     (match_test
+      "op == CONST0_RTX (mode == VOIDmode ? GET_MODE (op) : mode)")))))
+
+;; True if this is a register or an int 0..255.
+
+(define_predicate "mmix_reg_or_8bit_operand"
+  (ior
+   (match_operand 0 "register_operand")
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_I (op)"))))
+
+;; True if this is a memory address, possibly strictly.
+;; See also comment above the "*call_real" pattern.
+
+(define_predicate "mmix_address_operand"
+  (if_then_else (match_test "reload_in_progress || reload_completed")
+    (match_test "strict_memory_address_p (Pmode, op)")
+    (match_test "memory_address_p (Pmode, op)")))
+
+(define_predicate "frame_pointer_operand"
+  (ior
+   (and
+    (match_code "reg")
+    (match_test "op == hard_frame_pointer_rtx || op == frame_pointer_rtx"))
+   (and
+    (match_code "plus")
+    (match_code "reg" "0")
+    (match_code "const_int" "1")
+    (match_test "XEXP (op, 0) == stack_pointer_rtx"))))
diff --git a/gcc-4.9/gcc/config/mmix/t-mmix b/gcc-4.9/gcc/config/mmix/t-mmix
new file mode 100644
index 000000000..d6e62ad9d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mmix/t-mmix
@@ -0,0 +1,20 @@
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mabi=gnu
+MULTILIB_DIRNAMES = gnuabi
diff --git a/gcc-4.9/gcc/config/mn10300/constraints.md b/gcc-4.9/gcc/config/mn10300/constraints.md
new file mode 100644
index 000000000..cb2997a3d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/constraints.md
@@ -0,0 +1,107 @@
+;; Constraint definitions for the MN10300.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "d" "DATA_REGS"
+  "A data register.")
+
+(define_register_constraint "a" "ADDRESS_REGS"
+  "An address register.")
+
+;; This can be used for QI/HImode memory operations, and most arithmetic.
+;; AM33 supports these on all registers, where MN103 needs DATA_REGS.
+(define_register_constraint "D" "TARGET_AM33 ? GENERAL_REGS : DATA_REGS"
+  "A general register for AM33, and a data register otherwise.")
+
+;; Similarly for ADDRESS_REGS vs GENERAL_REGS.
+(define_register_constraint "A" "TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS"
+  "A general register for AM33, and an address register otherwise.")
+
+(define_register_constraint "y" "SP_REGS"
+  "An SP register (if available).")
+
+(define_register_constraint "z" "MDR_REGS"
+  "The MDR register.")
+
+(define_register_constraint "x" "TARGET_AM33 ? EXTENDED_REGS : NO_REGS"
+  "An extended register.")
+
+(define_register_constraint "f" "TARGET_AM33_2 ? FP_REGS : NO_REGS"
+  "A floating point register.")
+
+(define_register_constraint "c" "TARGET_AM33_2 ? FP_ACC_REGS : NO_REGS"
+  "A floating point accumulator register.")
+
+(define_memory_constraint "Q"
+  "@internal"
+  (and (match_code "mem")
+       (match_test "!CONSTANT_ADDRESS_P (XEXP (op, 0))")))
+
+(define_constraint "S"
+  "@internal"
+  (if_then_else (match_test "flag_pic")
+	(and (match_test "GET_CODE (op) == UNSPEC")
+	     (ior (match_test "XINT (op, 1) == UNSPEC_PLT")
+		  (match_test "XINT (op, 1) == UNSPEC_PIC")
+		  (match_test "XINT (op, 1) == UNSPEC_GOTSYM_OFF")))
+	(match_test "GET_CODE (op) == SYMBOL_REF")))
+
+;; Integer constraints
+
+(define_constraint "I"
+  "An integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "J"
+  "An integer one."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "K"
+  "An integer two."
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "L"
+  "An integer four."
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "M"
+  "An integer three."
+  (and (match_code "const_int")
+       (match_test "ival == 3")))
+
+(define_constraint "N"
+  "An integer of either 255 or 65535."
+  (and (match_code "const_int")
+       (ior (match_test "ival == 255")
+	    (match_test "ival == 65535"))))
+
+(define_constraint "O"
+  "An integer between -8 and +7 inclusive."
+  (and (match_code "const_int")
+       (and (match_test "ival >= -8")
+	    (match_test "ival <=  7"))))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
diff --git a/gcc-4.9/gcc/config/mn10300/linux.h b/gcc-4.9/gcc/config/mn10300/linux.h
new file mode 100644
index 000000000..5c465e9c8
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/linux.h
@@ -0,0 +1,87 @@
+/* Definitions of taret machine for GNU compiler.
+   Matsushita AM33/2.0
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Alexandre Oliva <aoliva@redhat.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+   
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{mam33:-D__AM33__} %{!mam33:-D__AM33__=2 -D__AM33_2__} \
+  %{posix:-D_POSIX_SOURCE} \
+  %{pthread:-D_REENTRANT -D_PTHREADS}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mrelax:--relax} %{shared:-shared} \
+   %{!static: \
+     %{rdynamic:-export-dynamic} \
+     -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+   %{static:-static}"
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_AM33_2
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+extern int mn10300_protect_label;
+
+#undef  PRINT_OPERAND
+#define PRINT_OPERAND(FILE, X, CODE)		\
+  do						\
+    {						\
+      mn10300_protect_label = 1;		\
+      mn10300_print_operand ((FILE), (X), (CODE));	\
+      mn10300_protect_label = 0;		\
+    }						\
+  while (0)
+
+#undef  PRINT_OPERAND_ADDRESS
+#define PRINT_OPERAND_ADDRESS(FILE, X)		\
+  do						\
+    {						\
+      mn10300_protect_label = 1;		\
+      mn10300_print_operand_address ((FILE), (X));	\
+      mn10300_protect_label = 0;		\
+    }						\
+   while (0)
+
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME)		\
+  do						\
+    {						\
+      const char * real_name;			\
+						\
+      real_name = (*targetm.strip_name_encoding) (NAME);	\
+      if (mn10300_protect_label)		\
+        asm_fprintf (FILE, "+");		\
+      asm_fprintf (FILE, "%U%s", real_name);	\
+    }						\
+  while (0)           
+
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300-modes.def b/gcc-4.9/gcc/config/mn10300/mn10300-modes.def
new file mode 100644
index 000000000..2ddd04e7d
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300-modes.def
@@ -0,0 +1,23 @@
+/* Definitions of target machine for GNU compiler, for MN10300.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CCZN);
+CC_MODE (CCZNC);
+CC_MODE (CC_FLOAT);
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300-opts.h b/gcc-4.9/gcc/config/mn10300/mn10300-opts.h
new file mode 100644
index 000000000..7cc53f1aa
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300-opts.h
@@ -0,0 +1,31 @@
+/* Definitions for option handling for Matsushita MN10300 series.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef MN10300_OPTS_H
+#define MN10300_OPTS_H
+
+enum processor_type
+{
+  PROCESSOR_MN10300,
+  PROCESSOR_AM33,
+  PROCESSOR_AM33_2,
+  PROCESSOR_AM34
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300-protos.h b/gcc-4.9/gcc/config/mn10300/mn10300-protos.h
new file mode 100644
index 000000000..c63c9c585
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300-protos.h
@@ -0,0 +1,48 @@
+/* Definitions of target machine for GNU compiler. Matsushita MN10300 series
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef RTX_CODE
+extern rtx   mn10300_legitimize_pic_address (rtx, rtx);
+extern int   mn10300_legitimate_pic_operand_p (rtx);
+extern rtx   mn10300_legitimize_reload_address (rtx, enum machine_mode,
+						int, int, int);
+extern bool  mn10300_function_value_regno_p (const unsigned int);
+extern unsigned int   mn10300_get_live_callee_saved_regs (unsigned int *);
+extern bool  mn10300_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool  mn10300_modes_tieable (enum machine_mode, enum machine_mode);
+extern const char *mn10300_output_add (rtx[3], bool);
+extern void  mn10300_print_operand (FILE *, rtx, int);
+extern void  mn10300_print_operand_address (FILE *, rtx);
+extern void  mn10300_print_reg_list (FILE *, int);
+extern enum machine_mode mn10300_select_cc_mode (enum rtx_code, rtx, rtx);
+extern unsigned int mn10300_store_multiple_regs (rtx);
+extern int   mn10300_symbolic_operand (rtx, enum machine_mode);
+extern void  mn10300_split_cbranch (enum machine_mode, rtx, rtx);
+extern int   mn10300_split_and_operand_count (rtx);
+extern bool  mn10300_match_ccmode (rtx, enum machine_mode);
+#endif /* RTX_CODE */
+
+extern bool  mn10300_regno_in_class_p (unsigned, int, bool);
+extern bool  mn10300_can_use_rets_insn (void);
+extern bool  mn10300_can_use_retf_insn (void);
+extern void  mn10300_expand_prologue (void);
+extern void  mn10300_expand_epilogue (void);
+extern int   mn10300_initial_offset (int, int);
+extern int   mn10300_frame_size (void);
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300.c b/gcc-4.9/gcc/config/mn10300/mn10300.c
new file mode 100644
index 000000000..eb0007767
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300.c
@@ -0,0 +1,3426 @@
+/* Subroutines for insn-output.c for Matsushita MN10300 series
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "expr.h"
+#include "optabs.h"
+#include "function.h"
+#include "obstack.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+#include "opts.h"
+#include "cfgloop.h"
+#include "dumpfile.h"
+
+/* This is used in the am33_2.0-linux-gnu port, in which global symbol
+   names are not prefixed by underscores, to tell whether to prefix a
+   label with a plus sign or not, so that the assembler can tell
+   symbol names from register names.  */
+int mn10300_protect_label;
+
+/* Selected processor type for tuning.  */
+enum processor_type mn10300_tune_cpu = PROCESSOR_DEFAULT;
+
+#define CC_FLAG_Z	1
+#define CC_FLAG_N	2
+#define CC_FLAG_C	4
+#define CC_FLAG_V	8
+
+static int cc_flags_for_mode(enum machine_mode);
+static int cc_flags_for_code(enum rtx_code);
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+static void
+mn10300_option_override (void)
+{
+  if (TARGET_AM33)
+    target_flags &= ~MASK_MULT_BUG;
+  else
+    {
+      /* Disable scheduling for the MN10300 as we do
+	 not have timing information available for it.  */
+      flag_schedule_insns = 0;
+      flag_schedule_insns_after_reload = 0;
+
+      /* Force enable splitting of wide types, as otherwise it is trivial
+	 to run out of registers.  Indeed, this works so well that register
+	 allocation problems are now more common *without* optimization,
+	 when this flag is not enabled by default.  */
+      flag_split_wide_types = 1;
+    }
+
+  if (mn10300_tune_string)
+    {
+      if (strcasecmp (mn10300_tune_string, "mn10300") == 0)
+	mn10300_tune_cpu = PROCESSOR_MN10300;
+      else if (strcasecmp (mn10300_tune_string, "am33") == 0)
+	mn10300_tune_cpu = PROCESSOR_AM33;
+      else if (strcasecmp (mn10300_tune_string, "am33-2") == 0)
+	mn10300_tune_cpu = PROCESSOR_AM33_2;
+      else if (strcasecmp (mn10300_tune_string, "am34") == 0)
+	mn10300_tune_cpu = PROCESSOR_AM34;
+      else
+	error ("-mtune= expects mn10300, am33, am33-2, or am34");
+    }
+}
+
+static void
+mn10300_file_start (void)
+{
+  default_file_start ();
+
+  if (TARGET_AM33_2)
+    fprintf (asm_out_file, "\t.am33_2\n");
+  else if (TARGET_AM33)
+    fprintf (asm_out_file, "\t.am33\n");
+}
+
+/* Note: This list must match the liw_op attribute in mn10300.md.  */
+
+static const char *liw_op_names[] =
+{
+  "add", "cmp", "sub", "mov",
+  "and", "or", "xor",
+  "asr", "lsr", "asl",
+  "none", "max"
+};
+
+/* Print operand X using operand code CODE to assembly language output file
+   FILE.  */
+
+void
+mn10300_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'W':
+      {
+	unsigned int liw_op = UINTVAL (x);
+
+	gcc_assert (TARGET_ALLOW_LIW);
+	gcc_assert (liw_op < LIW_OP_MAX);
+	fputs (liw_op_names[liw_op], file);
+	break;
+      }
+
+    case 'b':
+    case 'B':
+      {
+	enum rtx_code cmp = GET_CODE (x);
+	enum machine_mode mode = GET_MODE (XEXP (x, 0));
+	const char *str;
+	int have_flags;
+
+	if (code == 'B')
+	  cmp = reverse_condition (cmp);
+	have_flags = cc_flags_for_mode (mode);
+
+	switch (cmp)
+	  {
+	  case NE:
+	    str = "ne";
+	    break;
+	  case EQ:
+	    str = "eq";
+	    break;
+	  case GE:
+	    /* bge is smaller than bnc.  */
+	    str = (have_flags & CC_FLAG_V ? "ge" : "nc");
+	    break;
+	  case LT:
+	    str = (have_flags & CC_FLAG_V ? "lt" : "ns");
+	    break;
+	  case GT:
+	    str = "gt";
+	    break;
+	  case LE:
+	    str = "le";
+	    break;
+	  case GEU:
+	    str = "cc";
+	    break;
+	  case GTU:
+	    str = "hi";
+	    break;
+	  case LEU:
+	    str = "ls";
+	    break;
+	  case LTU:
+	    str = "cs";
+	    break;
+	  case ORDERED:
+	    str = "lge";
+	    break;
+	  case UNORDERED:
+	    str = "uo";
+	    break;
+	  case LTGT:
+	    str = "lg";
+	    break;
+	  case UNEQ:
+	    str = "ue";
+	    break;
+	  case UNGE:
+	    str = "uge";
+	    break;
+	  case UNGT:
+	    str = "ug";
+	    break;
+	  case UNLE:
+	    str = "ule";
+	    break;
+	  case UNLT:
+	    str = "ul";
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	gcc_checking_assert ((cc_flags_for_code (cmp) & ~have_flags) == 0);
+	fputs (str, file);
+      }
+      break;
+
+    case 'C':
+      /* This is used for the operand to a call instruction;
+	 if it's a REG, enclose it in parens, else output
+	 the operand normally.  */
+      if (REG_P (x))
+	{
+	  fputc ('(', file);
+	  mn10300_print_operand (file, x, 0);
+	  fputc (')', file);
+	}
+      else
+	mn10300_print_operand (file, x, 0);
+      break;
+
+    case 'D':
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case REG:
+	  fprintf (file, "fd%d", REGNO (x) - 18);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+      /* These are the least significant word in a 64bit value.  */
+    case 'L':
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case SUBREG:
+	  fprintf (file, "%s", reg_names[subreg_regno (x)]);
+	  break;
+
+	case CONST_DOUBLE:
+	  {
+	    long val[2];
+	    REAL_VALUE_TYPE rv;
+
+	    switch (GET_MODE (x))
+	      {
+	      case DFmode:
+		REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+		REAL_VALUE_TO_TARGET_DOUBLE (rv, val);
+		fprintf (file, "0x%lx", val[0]);
+		break;;
+	      case SFmode:
+		REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+		REAL_VALUE_TO_TARGET_SINGLE (rv, val[0]);
+		fprintf (file, "0x%lx", val[0]);
+		break;;
+	      case VOIDmode:
+	      case DImode:
+		mn10300_print_operand_address (file,
+					       GEN_INT (CONST_DOUBLE_LOW (x)));
+		break;
+	      default:
+		break;
+	      }
+	    break;
+	  }
+
+	case CONST_INT:
+	  {
+	    rtx low, high;
+	    split_double (x, &low, &high);
+	    fprintf (file, "%ld", (long)INTVAL (low));
+	    break;
+	    }
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+      /* Similarly, but for the most significant word.  */
+    case 'H':
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  x = adjust_address (x, SImode, 4);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x) + 1]);
+	  break;
+
+	case SUBREG:
+	  fprintf (file, "%s", reg_names[subreg_regno (x) + 1]);
+	  break;
+
+	case CONST_DOUBLE:
+	  {
+	    long val[2];
+	    REAL_VALUE_TYPE rv;
+
+	    switch (GET_MODE (x))
+	      {
+	      case DFmode:
+		REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+		REAL_VALUE_TO_TARGET_DOUBLE (rv, val);
+		fprintf (file, "0x%lx", val[1]);
+		break;;
+	      case SFmode:
+		gcc_unreachable ();
+	      case VOIDmode:
+	      case DImode:
+		mn10300_print_operand_address (file,
+					       GEN_INT (CONST_DOUBLE_HIGH (x)));
+		break;
+	      default:
+		break;
+	      }
+	    break;
+	  }
+
+	case CONST_INT:
+	  {
+	    rtx low, high;
+	    split_double (x, &low, &high);
+	    fprintf (file, "%ld", (long)INTVAL (high));
+	    break;
+	  }
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'A':
+      fputc ('(', file);
+      if (REG_P (XEXP (x, 0)))
+	output_address (gen_rtx_PLUS (SImode, XEXP (x, 0), const0_rtx));
+      else
+	output_address (XEXP (x, 0));
+      fputc (')', file);
+      break;
+
+    case 'N':
+      gcc_assert (INTVAL (x) >= -128 && INTVAL (x) <= 255);
+      fprintf (file, "%d", (int)((~INTVAL (x)) & 0xff));
+      break;
+
+    case 'U':
+      gcc_assert (INTVAL (x) >= -128 && INTVAL (x) <= 255);
+      fprintf (file, "%d", (int)(INTVAL (x) & 0xff));
+      break;
+
+      /* For shift counts.  The hardware ignores the upper bits of
+	 any immediate, but the assembler will flag an out of range
+	 shift count as an error.  So we mask off the high bits
+	 of the immediate here.  */
+    case 'S':
+      if (CONST_INT_P (x))
+	{
+	  fprintf (file, "%d", (int)(INTVAL (x) & 0x1f));
+	  break;
+	}
+      /* FALL THROUGH */
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case PLUS:
+	  output_address (x);
+	  break;
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case SUBREG:
+	  fprintf (file, "%s", reg_names[subreg_regno (x)]);
+	  break;
+
+	  /* This will only be single precision....  */
+	case CONST_DOUBLE:
+	  {
+	    unsigned long val;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "0x%lx", val);
+	    break;
+	  }
+
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	case CODE_LABEL:
+	case UNSPEC:
+	  mn10300_print_operand_address (file, x);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    }
+}
+
+/* Output assembly language output for the address ADDR to FILE.  */
+
+void
+mn10300_print_operand_address (FILE *file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case POST_INC:
+      mn10300_print_operand (file, XEXP (addr, 0), 0);
+      fputc ('+', file);
+      break;
+
+    case POST_MODIFY:
+      mn10300_print_operand (file, XEXP (addr, 0), 0);
+      fputc ('+', file);
+      fputc (',', file);
+      mn10300_print_operand (file, XEXP (addr, 1), 0);
+      break;
+
+    case REG:
+      mn10300_print_operand (file, addr, 0);
+      break;
+    case PLUS:
+      {
+	rtx base = XEXP (addr, 0);
+	rtx index = XEXP (addr, 1);
+	
+	if (REG_P (index) && !REG_OK_FOR_INDEX_P (index))
+	  {
+	    rtx x = base;
+	    base = index;
+	    index = x;
+
+	    gcc_assert (REG_P (index) && REG_OK_FOR_INDEX_P (index));
+	  }
+	gcc_assert (REG_OK_FOR_BASE_P (base));
+
+	mn10300_print_operand (file, index, 0);
+	fputc (',', file);
+	mn10300_print_operand (file, base, 0);
+	break;
+      }
+    case SYMBOL_REF:
+      output_addr_const (file, addr);
+      break;
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.
+
+   Used for PIC-specific UNSPECs.  */
+
+static bool
+mn10300_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_PIC:
+	  /* GLOBAL_OFFSET_TABLE or local symbols, no suffix.  */
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_GOTSYM_OFF:
+	  assemble_name (file, GOT_SYMBOL_NAME);
+	  fputs ("-(", file);
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("-.)", file);
+	  break;
+	default:
+	  return false;
+	}
+      return true;
+    }
+  else
+    return false;
+}
+
+/* Count the number of FP registers that have to be saved.  */
+static int
+fp_regs_to_save (void)
+{
+  int i, n = 0;
+
+  if (! TARGET_AM33_2)
+    return 0;
+
+  for (i = FIRST_FP_REGNUM; i <= LAST_FP_REGNUM; ++i)
+    if (df_regs_ever_live_p (i) && ! call_really_used_regs[i])
+      ++n;
+
+  return n;
+}
+
+/* Print a set of registers in the format required by "movm" and "ret".
+   Register K is saved if bit K of MASK is set.  The data and address
+   registers can be stored individually, but the extended registers cannot.
+   We assume that the mask already takes that into account.  For instance,
+   bits 14 to 17 must have the same value.  */
+
+void
+mn10300_print_reg_list (FILE *file, int mask)
+{
+  int need_comma;
+  int i;
+
+  need_comma = 0;
+  fputc ('[', file);
+
+  for (i = 0; i < FIRST_EXTENDED_REGNUM; i++)
+    if ((mask & (1 << i)) != 0)
+      {
+	if (need_comma)
+	  fputc (',', file);
+	fputs (reg_names [i], file);
+	need_comma = 1;
+      }
+
+  if ((mask & 0x3c000) != 0)
+    {
+      gcc_assert ((mask & 0x3c000) == 0x3c000);
+      if (need_comma)
+	fputc (',', file);
+      fputs ("exreg1", file);
+      need_comma = 1;
+    }
+
+  fputc (']', file);
+}
+
+/* If the MDR register is never clobbered, we can use the RETF instruction
+   which takes the address from the MDR register.  This is 3 cycles faster
+   than having to load the address from the stack.  */
+
+bool
+mn10300_can_use_retf_insn (void)
+{
+  /* Don't bother if we're not optimizing.  In this case we won't
+     have proper access to df_regs_ever_live_p.  */
+  if (!optimize)
+    return false;
+
+  /* EH returns alter the saved return address; MDR is not current.  */
+  if (crtl->calls_eh_return)
+    return false;
+
+  /* Obviously not if MDR is ever clobbered.  */
+  if (df_regs_ever_live_p (MDR_REG))
+    return false;
+
+  /* ??? Careful not to use this during expand_epilogue etc.  */
+  gcc_assert (!in_sequence_p ());
+  return leaf_function_p ();
+}
+
+bool
+mn10300_can_use_rets_insn (void)
+{
+  return !mn10300_initial_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM);
+}
+
+/* Returns the set of live, callee-saved registers as a bitmask.  The
+   callee-saved extended registers cannot be stored individually, so
+   all of them will be included in the mask if any one of them is used.
+   Also returns the number of bytes in the registers in the mask if
+   BYTES_SAVED is not NULL.  */
+
+unsigned int
+mn10300_get_live_callee_saved_regs (unsigned int * bytes_saved)
+{
+  int mask;
+  int i;
+  unsigned int count;
+
+  count = mask = 0;
+  for (i = 0; i <= LAST_EXTENDED_REGNUM; i++)
+    if (df_regs_ever_live_p (i) && ! call_really_used_regs[i])
+      {
+	mask |= (1 << i);
+	++ count;
+      }
+
+  if ((mask & 0x3c000) != 0)
+    {
+      for (i = 0x04000; i < 0x40000; i <<= 1)
+	if ((mask & i) == 0)
+	  ++ count;
+      
+      mask |= 0x3c000;
+    }
+
+  if (bytes_saved)
+    * bytes_saved = count * UNITS_PER_WORD;
+
+  return mask;
+}
+
+static rtx
+F (rtx r)
+{
+  RTX_FRAME_RELATED_P (r) = 1;
+  return r;
+}
+
+/* Generate an instruction that pushes several registers onto the stack.
+   Register K will be saved if bit K in MASK is set.  The function does
+   nothing if MASK is zero.
+
+   To be compatible with the "movm" instruction, the lowest-numbered
+   register must be stored in the lowest slot.  If MASK is the set
+   { R1,...,RN }, where R1...RN are ordered least first, the generated
+   instruction will have the form:
+
+       (parallel
+         (set (reg:SI 9) (plus:SI (reg:SI 9) (const_int -N*4)))
+	 (set (mem:SI (plus:SI (reg:SI 9)
+	                       (const_int -1*4)))
+	      (reg:SI RN))
+	 ...
+	 (set (mem:SI (plus:SI (reg:SI 9)
+	                       (const_int -N*4)))
+	      (reg:SI R1))) */
+
+static void
+mn10300_gen_multiple_store (unsigned int mask)
+{
+  /* The order in which registers are stored, from SP-4 through SP-N*4.  */
+  static const unsigned int store_order[8] = {
+    /* e2, e3: never saved */
+    FIRST_EXTENDED_REGNUM + 4,
+    FIRST_EXTENDED_REGNUM + 5,
+    FIRST_EXTENDED_REGNUM + 6,
+    FIRST_EXTENDED_REGNUM + 7,
+    /* e0, e1, mdrq, mcrh, mcrl, mcvf: never saved. */
+    FIRST_DATA_REGNUM + 2,
+    FIRST_DATA_REGNUM + 3,
+    FIRST_ADDRESS_REGNUM + 2,
+    FIRST_ADDRESS_REGNUM + 3,
+    /* d0, d1, a0, a1, mdr, lir, lar: never saved.  */
+  };
+
+  rtx x, elts[9];
+  unsigned int i;
+  int count;
+
+  if (mask == 0)
+    return;
+
+  for (i = count = 0; i < ARRAY_SIZE(store_order); ++i)
+    {
+      unsigned regno = store_order[i];
+
+      if (((mask >> regno) & 1) == 0)
+	continue;
+
+      ++count;
+      x = plus_constant (Pmode, stack_pointer_rtx, count * -4);
+      x = gen_frame_mem (SImode, x);
+      x = gen_rtx_SET (VOIDmode, x, gen_rtx_REG (SImode, regno));
+      elts[count] = F(x);
+
+      /* Remove the register from the mask so that... */
+      mask &= ~(1u << regno);
+    }
+
+  /* ... we can make sure that we didn't try to use a register
+     not listed in the store order.  */
+  gcc_assert (mask == 0);
+
+  /* Create the instruction that updates the stack pointer.  */
+  x = plus_constant (Pmode, stack_pointer_rtx, count * -4);
+  x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+  elts[0] = F(x);
+
+  /* We need one PARALLEL element to update the stack pointer and
+     an additional element for each register that is stored.  */
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (count + 1, elts));
+  F (emit_insn (x));
+}
+
+static inline unsigned int
+popcount (unsigned int mask)
+{
+  unsigned int count = 0;
+  
+  while (mask)
+    {
+      ++ count;
+      mask &= ~ (mask & - mask);
+    }
+  return count;
+}
+
+void
+mn10300_expand_prologue (void)
+{
+  HOST_WIDE_INT size = mn10300_frame_size ();
+  unsigned int mask;
+
+  mask = mn10300_get_live_callee_saved_regs (NULL);
+  /* If we use any of the callee-saved registers, save them now.  */
+  mn10300_gen_multiple_store (mask);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = size + popcount (mask) * 4;
+
+  if (TARGET_AM33_2 && fp_regs_to_save ())
+    {
+      int num_regs_to_save = fp_regs_to_save (), i;
+      HOST_WIDE_INT xsize;
+      enum
+      {
+	save_sp_merge,
+	save_sp_no_merge,
+	save_sp_partial_merge,
+	save_a0_merge,
+	save_a0_no_merge
+      } strategy;
+      unsigned int strategy_size = (unsigned)-1, this_strategy_size;
+      rtx reg;
+
+      if (flag_stack_usage_info)
+	current_function_static_stack_size += num_regs_to_save * 4;
+
+      /* We have several different strategies to save FP registers.
+	 We can store them using SP offsets, which is beneficial if
+	 there are just a few registers to save, or we can use `a0' in
+	 post-increment mode (`a0' is the only call-clobbered address
+	 register that is never used to pass information to a
+	 function).  Furthermore, if we don't need a frame pointer, we
+	 can merge the two SP adds into a single one, but this isn't
+	 always beneficial; sometimes we can just split the two adds
+	 so that we don't exceed a 16-bit constant size.  The code
+	 below will select which strategy to use, so as to generate
+	 smallest code.  Ties are broken in favor or shorter sequences
+	 (in terms of number of instructions).  */
+
+#define SIZE_ADD_AX(S) ((((S) >= (1 << 15)) || ((S) < -(1 << 15))) ? 6 \
+			: (((S) >= (1 << 7)) || ((S) < -(1 << 7))) ? 4 : 2)
+#define SIZE_ADD_SP(S) ((((S) >= (1 << 15)) || ((S) < -(1 << 15))) ? 6 \
+			: (((S) >= (1 << 7)) || ((S) < -(1 << 7))) ? 4 : 3)
+
+/* We add 0 * (S) in two places to promote to the type of S,
+   so that all arms of the conditional have the same type.  */
+#define SIZE_FMOV_LIMIT(S,N,L,SIZE1,SIZE2,ELSE) \
+  (((S) >= (L)) ? 0 * (S) + (SIZE1) * (N) \
+   : ((S) + 4 * (N) >= (L)) ? (((L) - (S)) / 4 * (SIZE2) \
+			       + ((S) + 4 * (N) - (L)) / 4 * (SIZE1)) \
+   : 0 * (S) + (ELSE))
+#define SIZE_FMOV_SP_(S,N) \
+  (SIZE_FMOV_LIMIT ((S), (N), (1 << 24), 7, 6, \
+                   SIZE_FMOV_LIMIT ((S), (N), (1 << 8), 6, 4, \
+				    (S) ? 4 * (N) : 3 + 4 * ((N) - 1))))
+#define SIZE_FMOV_SP(S,N) (SIZE_FMOV_SP_ ((unsigned HOST_WIDE_INT)(S), (N)))
+
+      /* Consider alternative save_sp_merge only if we don't need the
+	 frame pointer and size is nonzero.  */
+      if (! frame_pointer_needed && size)
+	{
+	  /* Insn: add -(size + 4 * num_regs_to_save), sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-(size + 4 * num_regs_to_save));
+	  /* Insn: fmov fs#, (##, sp), for each fs# to be saved.  */
+	  this_strategy_size += SIZE_FMOV_SP (size, num_regs_to_save);
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_sp_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Consider alternative save_sp_no_merge unconditionally.  */
+      /* Insn: add -4 * num_regs_to_save, sp.  */
+      this_strategy_size = SIZE_ADD_SP (-4 * num_regs_to_save);
+      /* Insn: fmov fs#, (##, sp), for each fs# to be saved.  */
+      this_strategy_size += SIZE_FMOV_SP (0, num_regs_to_save);
+      if (size)
+	{
+	  /* Insn: add -size, sp.  */
+	  this_strategy_size += SIZE_ADD_SP (-size);
+	}
+
+      if (this_strategy_size < strategy_size)
+	{
+	  strategy = save_sp_no_merge;
+	  strategy_size = this_strategy_size;
+	}
+
+      /* Consider alternative save_sp_partial_merge only if we don't
+	 need a frame pointer and size is reasonably large.  */
+      if (! frame_pointer_needed && size + 4 * num_regs_to_save > 128)
+	{
+	  /* Insn: add -128, sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-128);
+	  /* Insn: fmov fs#, (##, sp), for each fs# to be saved.  */
+	  this_strategy_size += SIZE_FMOV_SP (128 - 4 * num_regs_to_save,
+					      num_regs_to_save);
+	  if (size)
+	    {
+	      /* Insn: add 128-size, sp.  */
+	      this_strategy_size += SIZE_ADD_SP (128 - size);
+	    }
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_sp_partial_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Consider alternative save_a0_merge only if we don't need a
+	 frame pointer, size is nonzero and the user hasn't
+	 changed the calling conventions of a0.  */
+      if (! frame_pointer_needed && size
+	  && call_really_used_regs [FIRST_ADDRESS_REGNUM]
+	  && ! fixed_regs[FIRST_ADDRESS_REGNUM])
+	{
+	  /* Insn: add -(size + 4 * num_regs_to_save), sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-(size + 4 * num_regs_to_save));
+	  /* Insn: mov sp, a0.  */
+	  this_strategy_size++;
+	  if (size)
+	    {
+	      /* Insn: add size, a0.  */
+	      this_strategy_size += SIZE_ADD_AX (size);
+	    }
+	  /* Insn: fmov fs#, (a0+), for each fs# to be saved.  */
+	  this_strategy_size += 3 * num_regs_to_save;
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_a0_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Consider alternative save_a0_no_merge if the user hasn't
+	 changed the calling conventions of a0.  */
+      if (call_really_used_regs [FIRST_ADDRESS_REGNUM]
+	  && ! fixed_regs[FIRST_ADDRESS_REGNUM])
+	{
+	  /* Insn: add -4 * num_regs_to_save, sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-4 * num_regs_to_save);
+	  /* Insn: mov sp, a0.  */
+	  this_strategy_size++;
+	  /* Insn: fmov fs#, (a0+), for each fs# to be saved.  */
+	  this_strategy_size += 3 * num_regs_to_save;
+	  if (size)
+	    {
+	      /* Insn: add -size, sp.  */
+	      this_strategy_size += SIZE_ADD_SP (-size);
+	    }
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_a0_no_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Emit the initial SP add, common to all strategies.  */
+      switch (strategy)
+	{
+	case save_sp_no_merge:
+	case save_a0_no_merge:
+	  F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-4 * num_regs_to_save))));
+	  xsize = 0;
+	  break;
+
+	case save_sp_partial_merge:
+	  F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-128))));
+	  xsize = 128 - 4 * num_regs_to_save;
+	  size -= xsize;
+	  break;
+
+	case save_sp_merge:
+	case save_a0_merge:
+	  F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-(size + 4 * num_regs_to_save)))));
+	  /* We'll have to adjust FP register saves according to the
+	     frame size.  */
+	  xsize = size;
+	  /* Since we've already created the stack frame, don't do it
+	     again at the end of the function.  */
+	  size = 0;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Now prepare register a0, if we have decided to use it.  */
+      switch (strategy)
+	{
+	case save_sp_merge:
+	case save_sp_no_merge:
+	case save_sp_partial_merge:
+	  reg = 0;
+	  break;
+
+	case save_a0_merge:
+	case save_a0_no_merge:
+	  reg = gen_rtx_REG (SImode, FIRST_ADDRESS_REGNUM);
+	  F (emit_insn (gen_movsi (reg, stack_pointer_rtx)));
+	  if (xsize)
+	    F (emit_insn (gen_addsi3 (reg, reg, GEN_INT (xsize))));
+	  reg = gen_rtx_POST_INC (SImode, reg);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Now actually save the FP registers.  */
+      for (i = FIRST_FP_REGNUM; i <= LAST_FP_REGNUM; ++i)
+	if (df_regs_ever_live_p (i) && ! call_really_used_regs [i])
+	  {
+	    rtx addr;
+
+	    if (reg)
+	      addr = reg;
+	    else
+	      {
+		/* If we aren't using `a0', use an SP offset.  */
+		if (xsize)
+		  {
+		    addr = gen_rtx_PLUS (SImode,
+					 stack_pointer_rtx,
+					 GEN_INT (xsize));
+		  }
+		else
+		  addr = stack_pointer_rtx;
+
+		xsize += 4;
+	      }
+
+	    F (emit_insn (gen_movsf (gen_rtx_MEM (SFmode, addr),
+				     gen_rtx_REG (SFmode, i))));
+	  }
+    }
+
+  /* Now put the frame pointer into the frame pointer register.  */
+  if (frame_pointer_needed)
+    F (emit_move_insn (frame_pointer_rtx, stack_pointer_rtx));
+
+  /* Allocate stack for this frame.  */
+  if (size)
+    F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+			      stack_pointer_rtx,
+			      GEN_INT (-size))));
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    emit_insn (gen_load_pic ());
+}
+
+void
+mn10300_expand_epilogue (void)
+{
+  HOST_WIDE_INT size = mn10300_frame_size ();
+  unsigned int reg_save_bytes;
+
+  mn10300_get_live_callee_saved_regs (& reg_save_bytes);
+
+  if (TARGET_AM33_2 && fp_regs_to_save ())
+    {
+      int num_regs_to_save = fp_regs_to_save (), i;
+      rtx reg = 0;
+
+      /* We have several options to restore FP registers.  We could
+	 load them from SP offsets, but, if there are enough FP
+	 registers to restore, we win if we use a post-increment
+	 addressing mode.  */
+
+      /* If we have a frame pointer, it's the best option, because we
+	 already know it has the value we want.  */
+      if (frame_pointer_needed)
+	reg = gen_rtx_REG (SImode, FRAME_POINTER_REGNUM);
+      /* Otherwise, we may use `a1', since it's call-clobbered and
+	 it's never used for return values.  But only do so if it's
+	 smaller than using SP offsets.  */
+      else
+	{
+	  enum { restore_sp_post_adjust,
+		 restore_sp_pre_adjust,
+		 restore_sp_partial_adjust,
+		 restore_a1 } strategy;
+	  unsigned int this_strategy_size, strategy_size = (unsigned)-1;
+
+	  /* Consider using sp offsets before adjusting sp.  */
+	  /* Insn: fmov (##,sp),fs#, for each fs# to be restored.  */
+	  this_strategy_size = SIZE_FMOV_SP (size, num_regs_to_save);
+	  /* If size is too large, we'll have to adjust SP with an
+		 add.  */
+	  if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
+	    {
+	      /* Insn: add size + 4 * num_regs_to_save, sp.  */
+	      this_strategy_size += SIZE_ADD_SP (size + 4 * num_regs_to_save);
+	    }
+	  /* If we don't have to restore any non-FP registers,
+		 we'll be able to save one byte by using rets.  */
+	  if (! reg_save_bytes)
+	    this_strategy_size--;
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = restore_sp_post_adjust;
+	      strategy_size = this_strategy_size;
+	    }
+
+	  /* Consider using sp offsets after adjusting sp.  */
+	  /* Insn: add size, sp.  */
+	  this_strategy_size = SIZE_ADD_SP (size);
+	  /* Insn: fmov (##,sp),fs#, for each fs# to be restored.  */
+	  this_strategy_size += SIZE_FMOV_SP (0, num_regs_to_save);
+	  /* We're going to use ret to release the FP registers
+		 save area, so, no savings.  */
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = restore_sp_pre_adjust;
+	      strategy_size = this_strategy_size;
+	    }
+
+	  /* Consider using sp offsets after partially adjusting sp.
+	     When size is close to 32Kb, we may be able to adjust SP
+	     with an imm16 add instruction while still using fmov
+	     (d8,sp).  */
+	  if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
+	    {
+	      /* Insn: add size + 4 * num_regs_to_save
+				+ reg_save_bytes - 252,sp.  */
+	      this_strategy_size = SIZE_ADD_SP (size + 4 * num_regs_to_save
+						+ (int) reg_save_bytes - 252);
+	      /* Insn: fmov (##,sp),fs#, fo each fs# to be restored.  */
+	      this_strategy_size += SIZE_FMOV_SP (252 - reg_save_bytes
+						  - 4 * num_regs_to_save,
+						  num_regs_to_save);
+	      /* We're going to use ret to release the FP registers
+		 save area, so, no savings.  */
+
+	      if (this_strategy_size < strategy_size)
+		{
+		  strategy = restore_sp_partial_adjust;
+		  strategy_size = this_strategy_size;
+		}
+	    }
+
+	  /* Consider using a1 in post-increment mode, as long as the
+	     user hasn't changed the calling conventions of a1.  */
+	  if (call_really_used_regs [FIRST_ADDRESS_REGNUM + 1]
+	      && ! fixed_regs[FIRST_ADDRESS_REGNUM+1])
+	    {
+	      /* Insn: mov sp,a1.  */
+	      this_strategy_size = 1;
+	      if (size)
+		{
+		  /* Insn: add size,a1.  */
+		  this_strategy_size += SIZE_ADD_AX (size);
+		}
+	      /* Insn: fmov (a1+),fs#, for each fs# to be restored.  */
+	      this_strategy_size += 3 * num_regs_to_save;
+	      /* If size is large enough, we may be able to save a
+		 couple of bytes.  */
+	      if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
+		{
+		  /* Insn: mov a1,sp.  */
+		  this_strategy_size += 2;
+		}
+	      /* If we don't have to restore any non-FP registers,
+		 we'll be able to save one byte by using rets.  */
+	      if (! reg_save_bytes)
+		this_strategy_size--;
+
+	      if (this_strategy_size < strategy_size)
+		{
+		  strategy = restore_a1;
+		  strategy_size = this_strategy_size;
+		}
+	    }
+
+	  switch (strategy)
+	    {
+	    case restore_sp_post_adjust:
+	      break;
+
+	    case restore_sp_pre_adjust:
+	      emit_insn (gen_addsi3 (stack_pointer_rtx,
+				     stack_pointer_rtx,
+				     GEN_INT (size)));
+	      size = 0;
+	      break;
+
+	    case restore_sp_partial_adjust:
+	      emit_insn (gen_addsi3 (stack_pointer_rtx,
+				     stack_pointer_rtx,
+				     GEN_INT (size + 4 * num_regs_to_save
+					      + reg_save_bytes - 252)));
+	      size = 252 - reg_save_bytes - 4 * num_regs_to_save;
+	      break;
+
+	    case restore_a1:
+	      reg = gen_rtx_REG (SImode, FIRST_ADDRESS_REGNUM + 1);
+	      emit_insn (gen_movsi (reg, stack_pointer_rtx));
+	      if (size)
+		emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Adjust the selected register, if any, for post-increment.  */
+      if (reg)
+	reg = gen_rtx_POST_INC (SImode, reg);
+
+      for (i = FIRST_FP_REGNUM; i <= LAST_FP_REGNUM; ++i)
+	if (df_regs_ever_live_p (i) && ! call_really_used_regs [i])
+	  {
+	    rtx addr;
+
+	    if (reg)
+	      addr = reg;
+	    else if (size)
+	      {
+		/* If we aren't using a post-increment register, use an
+		   SP offset.  */
+		addr = gen_rtx_PLUS (SImode,
+				     stack_pointer_rtx,
+				     GEN_INT (size));
+	      }
+	    else
+	      addr = stack_pointer_rtx;
+
+	    size += 4;
+
+	    emit_insn (gen_movsf (gen_rtx_REG (SFmode, i),
+				  gen_rtx_MEM (SFmode, addr)));
+	  }
+
+      /* If we were using the restore_a1 strategy and the number of
+	 bytes to be released won't fit in the `ret' byte, copy `a1'
+	 to `sp', to avoid having to use `add' to adjust it.  */
+      if (! frame_pointer_needed && reg && size + reg_save_bytes > 255)
+	{
+	  emit_move_insn (stack_pointer_rtx, XEXP (reg, 0));
+	  size = 0;
+	}
+    }
+
+  /* Maybe cut back the stack, except for the register save area.
+
+     If the frame pointer exists, then use the frame pointer to
+     cut back the stack.
+
+     If the stack size + register save area is more than 255 bytes,
+     then the stack must be cut back here since the size + register
+     save size is too big for a ret/retf instruction.
+
+     Else leave it alone, it will be cut back as part of the
+     ret/retf instruction, or there wasn't any stack to begin with.
+
+     Under no circumstances should the register save area be
+     deallocated here, that would leave a window where an interrupt
+     could occur and trash the register save area.  */
+  if (frame_pointer_needed)
+    {
+      emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+      size = 0;
+    }
+  else if (size + reg_save_bytes > 255)
+    {
+      emit_insn (gen_addsi3 (stack_pointer_rtx,
+			     stack_pointer_rtx,
+			     GEN_INT (size)));
+      size = 0;
+    }
+
+  /* Adjust the stack and restore callee-saved registers, if any.  */
+  if (mn10300_can_use_rets_insn ())
+    emit_jump_insn (ret_rtx);
+  else
+    emit_jump_insn (gen_return_ret (GEN_INT (size + reg_save_bytes)));
+}
+
+/* Recognize the PARALLEL rtx generated by mn10300_gen_multiple_store().
+   This function is for MATCH_PARALLEL and so assumes OP is known to be
+   parallel.  If OP is a multiple store, return a mask indicating which
+   registers it saves.  Return 0 otherwise.  */
+
+unsigned int
+mn10300_store_multiple_regs (rtx op)
+{
+  int count;
+  int mask;
+  int i;
+  unsigned int last;
+  rtx elt;
+
+  count = XVECLEN (op, 0);
+  if (count < 2)
+    return 0;
+
+  /* Check that first instruction has the form (set (sp) (plus A B)) */
+  elt = XVECEXP (op, 0, 0);
+  if (GET_CODE (elt) != SET
+      || (! REG_P (SET_DEST (elt)))
+      || REGNO (SET_DEST (elt)) != STACK_POINTER_REGNUM
+      || GET_CODE (SET_SRC (elt)) != PLUS)
+    return 0;
+
+  /* Check that A is the stack pointer and B is the expected stack size.
+     For OP to match, each subsequent instruction should push a word onto
+     the stack.  We therefore expect the first instruction to create
+     COUNT-1 stack slots.  */
+  elt = SET_SRC (elt);
+  if ((! REG_P (XEXP (elt, 0)))
+      || REGNO (XEXP (elt, 0)) != STACK_POINTER_REGNUM
+      || (! CONST_INT_P (XEXP (elt, 1)))
+      || INTVAL (XEXP (elt, 1)) != -(count - 1) * 4)
+    return 0;
+
+  mask = 0;
+  for (i = 1; i < count; i++)
+    {
+      /* Check that element i is a (set (mem M) R).  */
+      /* ??? Validate the register order a-la mn10300_gen_multiple_store.
+	 Remember: the ordering is *not* monotonic.  */
+      elt = XVECEXP (op, 0, i);
+      if (GET_CODE (elt) != SET
+	  || (! MEM_P (SET_DEST (elt)))
+	  || (! REG_P (SET_SRC (elt))))
+	return 0;
+
+      /* Remember which registers are to be saved.  */
+      last = REGNO (SET_SRC (elt));
+      mask |= (1 << last);
+
+      /* Check that M has the form (plus (sp) (const_int -I*4)) */
+      elt = XEXP (SET_DEST (elt), 0);
+      if (GET_CODE (elt) != PLUS
+	  || (! REG_P (XEXP (elt, 0)))
+	  || REGNO (XEXP (elt, 0)) != STACK_POINTER_REGNUM
+	  || (! CONST_INT_P (XEXP (elt, 1)))
+	  || INTVAL (XEXP (elt, 1)) != -i * 4)
+	return 0;
+    }
+
+  /* All or none of the callee-saved extended registers must be in the set.  */
+  if ((mask & 0x3c000) != 0
+      && (mask & 0x3c000) != 0x3c000)
+    return 0;
+
+  return mask;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+mn10300_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (x == stack_pointer_rtx && rclass != SP_REGS)
+    return (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+  else if (MEM_P (x)
+	   || (REG_P (x) 
+	       && !HARD_REGISTER_P (x))
+	   || (GET_CODE (x) == SUBREG
+	       && REG_P (SUBREG_REG (x))
+	       && !HARD_REGISTER_P (SUBREG_REG (x))))
+    return LIMIT_RELOAD_CLASS (GET_MODE (x), rclass);
+  else
+    return rclass;
+}
+
+/* Implement TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+mn10300_preferred_output_reload_class (rtx x, reg_class_t rclass)
+{
+  if (x == stack_pointer_rtx && rclass != SP_REGS)
+    return (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+  return rclass;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+mn10300_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+			  enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+  enum reg_class xclass = NO_REGS;
+  unsigned int xregno = INVALID_REGNUM;
+
+  if (REG_P (x))
+    {
+      xregno = REGNO (x);
+      if (xregno >= FIRST_PSEUDO_REGISTER)
+	xregno = true_regnum (x);
+      if (xregno != INVALID_REGNUM)
+	xclass = REGNO_REG_CLASS (xregno);
+    }
+
+  if (!TARGET_AM33)
+    {
+      /* Memory load/stores less than a full word wide can't have an
+         address or stack pointer destination.  They must use a data
+         register as an intermediate register.  */
+      if (rclass != DATA_REGS
+	  && (mode == QImode || mode == HImode)
+	  && xclass == NO_REGS)
+	return DATA_REGS;
+
+      /* We can only move SP to/from an address register.  */
+      if (in_p
+	  && rclass == SP_REGS
+	  && xclass != ADDRESS_REGS)
+	return ADDRESS_REGS;
+      if (!in_p
+	  && xclass == SP_REGS
+	  && rclass != ADDRESS_REGS
+	  && rclass != SP_OR_ADDRESS_REGS)
+	return ADDRESS_REGS;
+    }
+
+  /* We can't directly load sp + const_int into a register;
+     we must use an address register as an scratch.  */
+  if (in_p
+      && rclass != SP_REGS
+      && rclass != SP_OR_ADDRESS_REGS
+      && rclass != SP_OR_GENERAL_REGS
+      && GET_CODE (x) == PLUS
+      && (XEXP (x, 0) == stack_pointer_rtx
+	  || XEXP (x, 1) == stack_pointer_rtx))
+    {
+      sri->icode = CODE_FOR_reload_plus_sp_const;
+      return NO_REGS;
+    }
+
+  /* We can only move MDR to/from a data register.  */
+  if (rclass == MDR_REGS && xclass != DATA_REGS)
+    return DATA_REGS;
+  if (xclass == MDR_REGS && rclass != DATA_REGS)
+    return DATA_REGS;
+
+  /* We can't load/store an FP register from a constant address.  */
+  if (TARGET_AM33_2
+      && (rclass == FP_REGS || xclass == FP_REGS)
+      && (xclass == NO_REGS || rclass == NO_REGS))
+    {
+      rtx addr = NULL;
+
+      if (xregno >= FIRST_PSEUDO_REGISTER && xregno != INVALID_REGNUM)
+	{
+	  addr = reg_equiv_mem (xregno);
+	  if (addr)
+	    addr = XEXP (addr, 0);
+	}
+      else if (MEM_P (x))
+	addr = XEXP (x, 0);
+
+      if (addr && CONSTANT_ADDRESS_P (addr))
+	return GENERAL_REGS;
+    }
+  /* Otherwise assume no secondary reloads are needed.  */
+  return NO_REGS;
+}
+
+int
+mn10300_frame_size (void)
+{
+  /* size includes the fixed stack space needed for function calls.  */
+  int size = get_frame_size () + crtl->outgoing_args_size;
+
+  /* And space for the return pointer.  */
+  size += crtl->outgoing_args_size ? 4 : 0;
+
+  return size;
+}
+
+int
+mn10300_initial_offset (int from, int to)
+{
+  int diff = 0;
+
+  gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
+  gcc_assert (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
+
+  if (to == STACK_POINTER_REGNUM)
+    diff = mn10300_frame_size ();
+
+  /* The difference between the argument pointer and the frame pointer
+     is the size of the callee register save area.  */
+  if (from == ARG_POINTER_REGNUM)
+    {
+      unsigned int reg_save_bytes;
+
+      mn10300_get_live_callee_saved_regs (& reg_save_bytes);
+      diff += reg_save_bytes;
+      diff += 4 * fp_regs_to_save ();
+    }
+
+  return diff;
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+mn10300_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Return values > 8 bytes in length in memory.  */
+  return (int_size_in_bytes (type) > 8
+	  || int_size_in_bytes (type) == 0
+	  || TYPE_MODE (type) == BLKmode);
+}
+
+/* Flush the argument registers to the stack for a stdarg function;
+   return the new argument pointer.  */
+static rtx
+mn10300_builtin_saveregs (void)
+{
+  rtx offset, mem;
+  tree fntype = TREE_TYPE (current_function_decl);
+  int argadj = ((!stdarg_p (fntype))
+                ? UNITS_PER_WORD : 0);
+  alias_set_type set = get_varargs_alias_set ();
+
+  if (argadj)
+    offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
+  else
+    offset = crtl->args.arg_offset_rtx;
+
+  mem = gen_rtx_MEM (SImode, crtl->args.internal_arg_pointer);
+  set_mem_alias_set (mem, set);
+  emit_move_insn (mem, gen_rtx_REG (SImode, 0));
+
+  mem = gen_rtx_MEM (SImode,
+		     plus_constant (Pmode,
+				    crtl->args.internal_arg_pointer, 4));
+  set_mem_alias_set (mem, set);
+  emit_move_insn (mem, gen_rtx_REG (SImode, 1));
+
+  return copy_to_reg (expand_binop (Pmode, add_optab,
+				    crtl->args.internal_arg_pointer,
+				    offset, 0, 0, OPTAB_LIB_WIDEN));
+}
+
+static void
+mn10300_va_start (tree valist, rtx nextarg)
+{
+  nextarg = expand_builtin_saveregs ();
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Return true when a parameter should be passed by reference.  */
+
+static bool
+mn10300_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			   enum machine_mode mode, const_tree type,
+			   bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return (size > 8 || size == 0);
+}
+
+/* Return an RTX to represent where a value with mode MODE will be returned
+   from a function.  If the result is NULL_RTX, the argument is pushed.  */
+
+static rtx
+mn10300_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  rtx result = NULL_RTX;
+  int size;
+
+  /* We only support using 2 data registers as argument registers.  */
+  int nregs = 2;
+
+  /* Figure out the size of the object to be passed.  */
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  cum->nbytes = (cum->nbytes + 3) & ~3;
+
+  /* Don't pass this arg via a register if all the argument registers
+     are used up.  */
+  if (cum->nbytes > nregs * UNITS_PER_WORD)
+    return result;
+
+  /* Don't pass this arg via a register if it would be split between
+     registers and memory.  */
+  if (type == NULL_TREE
+      && cum->nbytes + size > nregs * UNITS_PER_WORD)
+    return result;
+
+  switch (cum->nbytes / UNITS_PER_WORD)
+    {
+    case 0:
+      result = gen_rtx_REG (mode, FIRST_ARGUMENT_REGNUM);
+      break;
+    case 1:
+      result = gen_rtx_REG (mode, FIRST_ARGUMENT_REGNUM + 1);
+      break;
+    default:
+      break;
+    }
+
+  return result;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+mn10300_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  cum->nbytes += (mode != BLKmode
+		  ? (GET_MODE_SIZE (mode) + 3) & ~3
+		  : (int_size_in_bytes (type) + 3) & ~3);
+}
+
+/* Return the number of bytes of registers to use for an argument passed
+   partially in registers and partially in memory.  */
+
+static int
+mn10300_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+			   tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int size;
+
+  /* We only support using 2 data registers as argument registers.  */
+  int nregs = 2;
+
+  /* Figure out the size of the object to be passed.  */
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  cum->nbytes = (cum->nbytes + 3) & ~3;
+
+  /* Don't pass this arg via a register if all the argument registers
+     are used up.  */
+  if (cum->nbytes > nregs * UNITS_PER_WORD)
+    return 0;
+
+  if (cum->nbytes + size <= nregs * UNITS_PER_WORD)
+    return 0;
+
+  /* Don't pass this arg via a register if it would be split between
+     registers and memory.  */
+  if (type == NULL_TREE
+      && cum->nbytes + size > nregs * UNITS_PER_WORD)
+    return 0;
+
+  return nregs * UNITS_PER_WORD - cum->nbytes;
+}
+
+/* Return the location of the function's value.  This will be either
+   $d0 for integer functions, $a0 for pointers, or a PARALLEL of both
+   $d0 and $a0 if the -mreturn-pointer-on-do flag is set.  Note that
+   we only return the PARALLEL for outgoing values; we do not want
+   callers relying on this extra copy.  */
+
+static rtx
+mn10300_function_value (const_tree valtype,
+			const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+			bool outgoing)
+{
+  rtx rv;
+  enum machine_mode mode = TYPE_MODE (valtype);
+
+  if (! POINTER_TYPE_P (valtype))
+    return gen_rtx_REG (mode, FIRST_DATA_REGNUM);
+  else if (! TARGET_PTR_A0D0 || ! outgoing
+	   || cfun->returns_struct)
+    return gen_rtx_REG (mode, FIRST_ADDRESS_REGNUM);
+
+  rv = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
+  XVECEXP (rv, 0, 0)
+    = gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_REG (mode, FIRST_ADDRESS_REGNUM),
+			 GEN_INT (0));
+
+  XVECEXP (rv, 0, 1)
+    = gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_REG (mode, FIRST_DATA_REGNUM),
+			 GEN_INT (0));
+  return rv;
+}
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+mn10300_libcall_value (enum machine_mode mode,
+		       const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, FIRST_DATA_REGNUM);
+}
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+mn10300_function_value_regno_p (const unsigned int regno)
+{
+ return (regno == FIRST_DATA_REGNUM || regno == FIRST_ADDRESS_REGNUM);
+}
+
+/* Output an addition operation.  */
+
+const char *
+mn10300_output_add (rtx operands[3], bool need_flags)
+{
+  rtx dest, src1, src2;
+  unsigned int dest_regnum, src1_regnum, src2_regnum;
+  enum reg_class src1_class, src2_class, dest_class;
+
+  dest = operands[0];
+  src1 = operands[1];
+  src2 = operands[2];
+
+  dest_regnum = true_regnum (dest);
+  src1_regnum = true_regnum (src1);
+
+  dest_class = REGNO_REG_CLASS (dest_regnum);
+  src1_class = REGNO_REG_CLASS (src1_regnum);
+
+  if (CONST_INT_P (src2))
+    {
+      gcc_assert (dest_regnum == src1_regnum);
+
+      if (src2 == const1_rtx && !need_flags)
+	return "inc %0";
+      if (INTVAL (src2) == 4 && !need_flags && dest_class != DATA_REGS)
+        return "inc4 %0";
+
+      gcc_assert (!need_flags || dest_class != SP_REGS);
+      return "add %2,%0";
+    }
+  else if (CONSTANT_P (src2))
+    return "add %2,%0";
+
+  src2_regnum = true_regnum (src2);
+  src2_class = REGNO_REG_CLASS (src2_regnum);
+      
+  if (dest_regnum == src1_regnum)
+    return "add %2,%0";
+  if (dest_regnum == src2_regnum)
+    return "add %1,%0";
+
+  /* The rest of the cases are reg = reg+reg.  For AM33, we can implement
+     this directly, as below, but when optimizing for space we can sometimes
+     do better by using a mov+add.  For MN103, we claimed that we could
+     implement a three-operand add because the various move and add insns
+     change sizes across register classes, and we can often do better than
+     reload in choosing which operand to move.  */
+  if (TARGET_AM33 && optimize_insn_for_speed_p ())
+    return "add %2,%1,%0";
+
+  /* Catch cases where no extended register was used.  */
+  if (src1_class != EXTENDED_REGS
+      && src2_class != EXTENDED_REGS
+      && dest_class != EXTENDED_REGS)
+    {
+      /* We have to copy one of the sources into the destination, then
+         add the other source to the destination.
+
+         Carefully select which source to copy to the destination; a
+         naive implementation will waste a byte when the source classes
+         are different and the destination is an address register.
+         Selecting the lowest cost register copy will optimize this
+         sequence.  */
+      if (src1_class == dest_class)
+        return "mov %1,%0\n\tadd %2,%0";
+      else
+	return "mov %2,%0\n\tadd %1,%0";
+    }
+
+  /* At least one register is an extended register.  */
+
+  /* The three operand add instruction on the am33 is a win iff the
+     output register is an extended register, or if both source
+     registers are extended registers.  */
+  if (dest_class == EXTENDED_REGS || src1_class == src2_class)
+    return "add %2,%1,%0";
+
+  /* It is better to copy one of the sources to the destination, then
+     perform a 2 address add.  The destination in this case must be
+     an address or data register and one of the sources must be an
+     extended register and the remaining source must not be an extended
+     register.
+
+     The best code for this case is to copy the extended reg to the
+     destination, then emit a two address add.  */
+  if (src1_class == EXTENDED_REGS)
+    return "mov %1,%0\n\tadd %2,%0";
+  else
+    return "mov %2,%0\n\tadd %1,%0";
+}
+
+/* Return 1 if X contains a symbolic expression.  We know these
+   expressions will have one of a few well defined forms, so
+   we need only check those forms.  */
+
+int
+mn10300_symbolic_operand (rtx op,
+			  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      op = XEXP (op, 0);
+      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+               || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+              && CONST_INT_P (XEXP (op, 1)));
+    default:
+      return 0;
+    }
+}
+
+/* Try machine dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   Normally it is always safe for this macro to do nothing.  It exists to
+   recognize opportunities to optimize the output.
+
+   But on a few ports with segmented architectures and indexed addressing
+   (mn10300, hppa) it is used to rewrite certain problematical addresses.  */
+
+static rtx
+mn10300_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			    enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic && ! mn10300_legitimate_pic_operand_p (x))
+    x = mn10300_legitimize_pic_address (oldx, NULL_RTX);
+
+  /* Uh-oh.  We might have an address for x[n-100000].  This needs
+     special handling to avoid creating an indexed memory address
+     with x-100000 as the base.  */
+  if (GET_CODE (x) == PLUS
+      && mn10300_symbolic_operand (XEXP (x, 1), VOIDmode))
+    {
+      /* Ugly.  We modify things here so that the address offset specified
+         by the index expression is computed first, then added to x to form
+         the entire address.  */
+
+      rtx regx1, regy1, regy2, y;
+
+      /* Strip off any CONST.  */
+      y = XEXP (x, 1);
+      if (GET_CODE (y) == CONST)
+        y = XEXP (y, 0);
+
+      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
+	{
+	  regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
+	  regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
+	  regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
+	  regx1 = force_reg (Pmode,
+			     gen_rtx_fmt_ee (GET_CODE (y), Pmode, regx1,
+					     regy2));
+	  return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
+	}
+    }
+  return x;
+}
+
+/* Convert a non-PIC address in `orig' to a PIC address using @GOT or
+   @GOTOFF in `reg'.  */
+
+rtx
+mn10300_legitimize_pic_address (rtx orig, rtx reg)
+{
+  rtx x;
+
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF
+	  && (CONSTANT_POOL_ADDRESS_P (orig)
+	      || ! MN10300_GLOBAL_P (orig))))
+    {
+      if (reg == NULL)
+	reg = gen_reg_rtx (Pmode);
+
+      x = gen_rtx_UNSPEC (SImode, gen_rtvec (1, orig), UNSPEC_GOTOFF);
+      x = gen_rtx_CONST (SImode, x);
+      emit_move_insn (reg, x);
+
+      x = emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+    }
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      if (reg == NULL)
+	reg = gen_reg_rtx (Pmode);
+
+      x = gen_rtx_UNSPEC (SImode, gen_rtvec (1, orig), UNSPEC_GOT);
+      x = gen_rtx_CONST (SImode, x);
+      x = gen_rtx_PLUS (SImode, pic_offset_table_rtx, x);
+      x = gen_const_mem (SImode, x);
+
+      x = emit_move_insn (reg, x);
+    }
+  else
+    return orig;
+
+  set_unique_reg_note (x, REG_EQUAL, orig);
+  return reg;
+}
+
+/* Return zero if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec; nonzero otherwise.  */
+
+int
+mn10300_legitimate_pic_operand_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    return 0;
+
+  if (GET_CODE (x) == UNSPEC
+      && (XINT (x, 1) == UNSPEC_PIC
+	  || XINT (x, 1) == UNSPEC_GOT
+	  || XINT (x, 1) == UNSPEC_GOTOFF
+	  || XINT (x, 1) == UNSPEC_PLT
+	  || XINT (x, 1) == UNSPEC_GOTSYM_OFF))
+      return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (! mn10300_legitimate_pic_operand_p (XVECEXP (x, i, j)))
+	      return 0;
+	}
+      else if (fmt[i] == 'e'
+	       && ! mn10300_legitimate_pic_operand_p (XEXP (x, i)))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Return TRUE if the address X, taken from a (MEM:MODE X) rtx, is
+   legitimate, and FALSE otherwise.
+
+   On the mn10300, the value in the address register must be
+   in the same memory space/segment as the effective address.
+
+   This is problematical for reload since it does not understand
+   that base+index != index+base in a memory reference.
+
+   Note it is still possible to use reg+reg addressing modes,
+   it's just much more difficult.  For a discussion of a possible
+   workaround and solution, see the comments in pa.c before the
+   function record_unscaled_index_insn_codes.  */
+
+static bool
+mn10300_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  rtx base, index;
+
+  if (CONSTANT_ADDRESS_P (x))
+    return !flag_pic || mn10300_legitimate_pic_operand_p (x);
+
+  if (RTX_OK_FOR_BASE_P (x, strict))
+    return true;
+
+  if (TARGET_AM33 && (mode == SImode || mode == SFmode || mode == HImode))
+    {
+      if (GET_CODE (x) == POST_INC)
+	return RTX_OK_FOR_BASE_P (XEXP (x, 0), strict);
+      if (GET_CODE (x) == POST_MODIFY)
+	return (RTX_OK_FOR_BASE_P (XEXP (x, 0), strict)
+		&& CONSTANT_ADDRESS_P (XEXP (x, 1)));
+    }
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  base = XEXP (x, 0);
+  index = XEXP (x, 1);
+
+  if (!REG_P (base))
+    return false;
+  if (REG_P (index))
+    {
+      /* ??? Without AM33 generalized (Ri,Rn) addressing, reg+reg
+	 addressing is hard to satisfy.  */
+      if (!TARGET_AM33)
+	return false;
+
+      return (REGNO_GENERAL_P (REGNO (base), strict)
+	      && REGNO_GENERAL_P (REGNO (index), strict));
+    }
+
+  if (!REGNO_STRICT_OK_FOR_BASE_P (REGNO (base), strict))
+    return false;
+
+  if (CONST_INT_P (index))
+    return IN_RANGE (INTVAL (index), -1 - 0x7fffffff, 0x7fffffff);
+
+  if (CONSTANT_ADDRESS_P (index))
+    return !flag_pic || mn10300_legitimate_pic_operand_p (index);
+
+  return false;
+}
+
+bool
+mn10300_regno_in_class_p (unsigned regno, int rclass, bool strict)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!strict)
+	return true;
+      if (!reg_renumber)
+	return false;
+      regno = reg_renumber[regno];
+      if (regno == INVALID_REGNUM)
+	return false;
+    }
+  return TEST_HARD_REG_BIT (reg_class_contents[rclass], regno);
+}
+
+rtx
+mn10300_legitimize_reload_address (rtx x,
+				   enum machine_mode mode ATTRIBUTE_UNUSED,
+				   int opnum, int type,
+				   int ind_levels ATTRIBUTE_UNUSED)
+{
+  bool any_change = false;
+
+  /* See above re disabling reg+reg addressing for MN103.  */
+  if (!TARGET_AM33)
+    return NULL_RTX;
+
+  if (GET_CODE (x) != PLUS)
+    return NULL_RTX;
+
+  if (XEXP (x, 0) == stack_pointer_rtx)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   GENERAL_REGS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      any_change = true;
+    }
+  if (XEXP (x, 1) == stack_pointer_rtx)
+    {
+      push_reload (XEXP (x, 1), NULL_RTX, &XEXP (x, 1), NULL,
+		   GENERAL_REGS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      any_change = true;
+    }
+
+  return any_change ? x : NULL_RTX;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  Returns TRUE if X is a valid
+   constant.  Note that some "constants" aren't valid, such as TLS
+   symbols and unconverted GOT-based references, so we eliminate
+   those here.  */
+
+static bool
+mn10300_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (! CONST_INT_P (XEXP (x, 1)))
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_PIC:
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	      return true;
+	    default:
+	      return false;
+	    }
+	}
+
+      /* We must have drilled down to a symbol.  */
+      if (! mn10300_symbolic_operand (x, Pmode))
+	return false;
+      break;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+/* Undo pic address legitimization for the benefit of debug info.  */
+
+static rtx
+mn10300_delegitimize_address (rtx orig_x)
+{
+  rtx x = orig_x, ret, addend = NULL;
+  bool need_mem;
+
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
+    return orig_x;
+
+  if (XEXP (x, 0) == pic_offset_table_rtx)
+    ;
+  /* With the REG+REG addressing of AM33, var-tracking can re-assemble
+     some odd-looking "addresses" that were never valid in the first place.
+     We need to look harder to avoid warnings being emitted.  */
+  else if (GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      rtx x0 = XEXP (x, 0);
+      rtx x00 = XEXP (x0, 0);
+      rtx x01 = XEXP (x0, 1);
+
+      if (x00 == pic_offset_table_rtx)
+	addend = x01;
+      else if (x01 == pic_offset_table_rtx)
+	addend = x00;
+      else
+	return orig_x;
+
+    }
+  else
+    return orig_x;
+  x = XEXP (x, 1);
+
+  if (GET_CODE (x) != CONST)
+    return orig_x;
+  x = XEXP (x, 0);
+  if (GET_CODE (x) != UNSPEC)
+    return orig_x;
+
+  ret = XVECEXP (x, 0, 0);
+  if (XINT (x, 1) == UNSPEC_GOTOFF)
+    need_mem = false;
+  else if (XINT (x, 1) == UNSPEC_GOT)
+    need_mem = true;
+  else
+    return orig_x;
+
+  gcc_assert (GET_CODE (ret) == SYMBOL_REF);
+  if (need_mem != MEM_P (orig_x))
+    return orig_x;
+  if (need_mem && addend)
+    return orig_x;
+  if (addend)
+    ret = gen_rtx_PLUS (Pmode, addend, ret);
+  return ret;
+}
+
+/* For addresses, costs are relative to "MOV (Rm),Rn".  For AM33 this is
+   the 3-byte fully general instruction; for MN103 this is the 2-byte form
+   with an address register.  */
+
+static int
+mn10300_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+		      addr_space_t as ATTRIBUTE_UNUSED, bool speed)
+{
+  HOST_WIDE_INT i;
+  rtx base, index;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      /* We assume all of these require a 32-bit constant, even though
+	 some symbol and label references can be relaxed.  */
+      return speed ? 1 : 4;
+
+    case REG:
+    case SUBREG:
+    case POST_INC:
+      return 0;
+
+    case POST_MODIFY:
+      /* Assume any symbolic offset is a 32-bit constant.  */
+      i = (CONST_INT_P (XEXP (x, 1)) ? INTVAL (XEXP (x, 1)) : 0x12345678);
+      if (IN_RANGE (i, -128, 127))
+	return speed ? 0 : 1;
+      if (speed)
+	return 1;
+      if (IN_RANGE (i, -0x800000, 0x7fffff))
+	return 3;
+      return 4;
+
+    case PLUS:
+      base = XEXP (x, 0);
+      index = XEXP (x, 1);
+      if (register_operand (index, SImode))
+	{
+	  /* Attempt to minimize the number of registers in the address.
+	     This is similar to what other ports do.  */
+	  if (register_operand (base, SImode))
+	    return 1;
+
+	  base = XEXP (x, 1);
+	  index = XEXP (x, 0);
+	}
+
+      /* Assume any symbolic offset is a 32-bit constant.  */
+      i = (CONST_INT_P (XEXP (x, 1)) ? INTVAL (XEXP (x, 1)) : 0x12345678);
+      if (IN_RANGE (i, -128, 127))
+	return speed ? 0 : 1;
+      if (IN_RANGE (i, -32768, 32767))
+	return speed ? 0 : 2;
+      return speed ? 2 : 6;
+
+    default:
+      return rtx_cost (x, MEM, 0, speed);
+    }
+}
+
+/* Implement the TARGET_REGISTER_MOVE_COST hook.
+
+   Recall that the base value of 2 is required by assumptions elsewhere
+   in the body of the compiler, and that cost 2 is special-cased as an
+   early exit from reload meaning no work is required.  */
+
+static int
+mn10300_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    reg_class_t ifrom, reg_class_t ito)
+{
+  enum reg_class from = (enum reg_class) ifrom;
+  enum reg_class to = (enum reg_class) ito;
+  enum reg_class scratch, test;
+
+  /* Simplify the following code by unifying the fp register classes.  */
+  if (to == FP_ACC_REGS)
+    to = FP_REGS;
+  if (from == FP_ACC_REGS)
+    from = FP_REGS;
+
+  /* Diagnose invalid moves by costing them as two moves.  */
+
+  scratch = NO_REGS;
+  test = from;
+  if (to == SP_REGS)
+    scratch = (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+  else if (to == MDR_REGS)
+    scratch = DATA_REGS;
+  else if (to == FP_REGS && to != from)
+    scratch = GENERAL_REGS;
+  else
+    {
+      test = to;
+      if (from == SP_REGS)
+	scratch = (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+      else if (from == MDR_REGS)
+	scratch = DATA_REGS;
+      else if (from == FP_REGS && to != from)
+	scratch = GENERAL_REGS;
+    }
+  if (scratch != NO_REGS && !reg_class_subset_p (test, scratch))
+    return (mn10300_register_move_cost (VOIDmode, from, scratch)
+	    + mn10300_register_move_cost (VOIDmode, scratch, to));
+
+  /* From here on, all we need consider are legal combinations.  */
+
+  if (optimize_size)
+    {
+      /* The scale here is bytes * 2.  */
+
+      if (from == to && (to == ADDRESS_REGS || to == DATA_REGS))
+	return 2;
+
+      if (from == SP_REGS)
+	return (to == ADDRESS_REGS ? 2 : 6);
+
+      /* For MN103, all remaining legal moves are two bytes.  */
+      if (TARGET_AM33)
+	return 4;
+
+      if (to == SP_REGS)
+	return (from == ADDRESS_REGS ? 4 : 6);
+
+      if ((from == ADDRESS_REGS || from == DATA_REGS)
+	   && (to == ADDRESS_REGS || to == DATA_REGS))
+	return 4;
+
+      if (to == EXTENDED_REGS)
+	return (to == from ? 6 : 4);
+
+      /* What's left are SP_REGS, FP_REGS, or combinations of the above.  */
+      return 6;
+    }
+  else
+    {
+      /* The scale here is cycles * 2.  */
+
+      if (to == FP_REGS)
+	return 8;
+      if (from == FP_REGS)
+	return 4;
+
+      /* All legal moves between integral registers are single cycle.  */
+      return 2;
+    }
+}
+
+/* Implement the TARGET_MEMORY_MOVE_COST hook.
+
+   Given lack of the form of the address, this must be speed-relative,
+   though we should never be less expensive than a size-relative register
+   move cost above.  This is not a problem.  */
+
+static int
+mn10300_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, 
+			  reg_class_t iclass, bool in ATTRIBUTE_UNUSED)
+{
+  enum reg_class rclass = (enum reg_class) iclass;
+
+  if (rclass == FP_REGS)
+    return 8;
+  return 6;
+}
+
+/* Implement the TARGET_RTX_COSTS hook.
+
+   Speed-relative costs are relative to COSTS_N_INSNS, which is intended
+   to represent cycles.  Size-relative costs are in bytes.  */
+
+static bool
+mn10300_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		   int *ptotal, bool speed)
+{
+  /* This value is used for SYMBOL_REF etc where we want to pretend
+     we have a full 32-bit constant.  */
+  HOST_WIDE_INT i = 0x12345678;
+  int total;
+
+  switch (code)
+    {
+    case CONST_INT:
+      i = INTVAL (x);
+    do_int_costs:
+      if (speed)
+	{
+	  if (outer_code == SET)
+	    {
+	      /* 16-bit integer loads have latency 1, 32-bit loads 2.  */
+	      if (IN_RANGE (i, -32768, 32767))
+		total = COSTS_N_INSNS (1);
+	      else
+		total = COSTS_N_INSNS (2);
+	    }
+	  else
+	    {
+	      /* 16-bit integer operands don't affect latency;
+		 24-bit and 32-bit operands add a cycle.  */
+	      if (IN_RANGE (i, -32768, 32767))
+		total = 0;
+	      else
+		total = COSTS_N_INSNS (1);
+	    }
+	}
+      else
+	{
+	  if (outer_code == SET)
+	    {
+	      if (i == 0)
+		total = 1;
+	      else if (IN_RANGE (i, -128, 127))
+		total = 2;
+	      else if (IN_RANGE (i, -32768, 32767))
+		total = 3;
+	      else
+		total = 6;
+	    }
+	  else
+	    {
+	      /* Reference here is ADD An,Dn, vs ADD imm,Dn.  */
+	      if (IN_RANGE (i, -128, 127))
+		total = 0;
+	      else if (IN_RANGE (i, -32768, 32767))
+		total = 2;
+	      else if (TARGET_AM33 && IN_RANGE (i, -0x01000000, 0x00ffffff))
+		total = 3;
+	      else
+		total = 4;
+	    }
+	}
+      goto alldone;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      /* We assume all of these require a 32-bit constant, even though
+	 some symbol and label references can be relaxed.  */
+      goto do_int_costs;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_PIC:
+	case UNSPEC_GOT:
+	case UNSPEC_GOTOFF:
+	case UNSPEC_PLT:
+	case UNSPEC_GOTSYM_OFF:
+	  /* The PIC unspecs also resolve to a 32-bit constant.  */
+	  goto do_int_costs;
+
+	default:
+	  /* Assume any non-listed unspec is some sort of arithmetic.  */
+	  goto do_arith_costs;
+	}
+
+    case PLUS:
+      /* Notice the size difference of INC and INC4.  */
+      if (!speed && outer_code == SET && CONST_INT_P (XEXP (x, 1)))
+	{
+	  i = INTVAL (XEXP (x, 1));
+	  if (i == 1 || i == 4)
+	    {
+	      total = 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed);
+	      goto alldone;
+	    }
+	}
+      goto do_arith_costs;
+	
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+    case NEG:
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+    case COMPARE:
+    case BSWAP:
+    case CLZ:
+    do_arith_costs:
+      total = (speed ? COSTS_N_INSNS (1) : 2);
+      break;
+
+    case ASHIFT:
+      /* Notice the size difference of ASL2 and variants.  */
+      if (!speed && CONST_INT_P (XEXP (x, 1)))
+	switch (INTVAL (XEXP (x, 1)))
+	  {
+	  case 1:
+	  case 2:
+	    total = 1;
+	    goto alldone;
+	  case 3:
+	  case 4:
+	    total = 2;
+	    goto alldone;
+	  }
+      /* FALLTHRU */
+
+    case ASHIFTRT:
+    case LSHIFTRT:
+      total = (speed ? COSTS_N_INSNS (1) : 3);
+      goto alldone;
+
+    case MULT:
+      total = (speed ? COSTS_N_INSNS (3) : 2);
+      break;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      total = (speed ? COSTS_N_INSNS (39)
+		/* Include space to load+retrieve MDR.  */
+		: code == MOD || code == UMOD ? 6 : 4);
+      break;
+
+    case MEM:
+      total = mn10300_address_cost (XEXP (x, 0), GET_MODE (x),
+				    MEM_ADDR_SPACE (x), speed);
+      if (speed)
+	total = COSTS_N_INSNS (2 + total);
+      goto alldone;
+
+    default:
+      /* Probably not implemented.  Assume external call.  */
+      total = (speed ? COSTS_N_INSNS (10) : 7);
+      break;
+    }
+
+  *ptotal = total;
+  return false;
+
+ alldone:
+  *ptotal = total;
+  return true;
+}
+
+/* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
+   may access it using GOTOFF instead of GOT.  */
+
+static void
+mn10300_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx symbol;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (! MEM_P (rtl))
+    return;
+
+  symbol = XEXP (rtl, 0);
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return;
+
+  if (flag_pic)
+    SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
+}
+
+/* Dispatch tables on the mn10300 are extremely expensive in terms of code
+   and readonly data size.  So we crank up the case threshold value to
+   encourage a series of if/else comparisons to implement many small switch
+   statements.  In theory, this value could be increased much more if we
+   were solely optimizing for space, but we keep it "reasonable" to avoid
+   serious code efficiency lossage.  */
+
+static unsigned int
+mn10300_case_values_threshold (void)
+{
+  return 6;
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+mn10300_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, disp, fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  /* This is a strict alignment target, which means that we play
+     some games to make sure that the locations at which we need
+     to store <chain> and <disp> wind up at aligned addresses.
+
+	0x28 0x00			add 0,d0
+	          0xfc 0xdd		mov chain,a1
+        <chain>
+	0xf8 0xed 0x00			btst 0,d1
+	               0xdc		jmp fnaddr
+	<disp>
+
+     Note that the two extra insns are effectively nops; they 
+     clobber the flags but do not affect the contents of D0 or D1.  */
+
+  disp = expand_binop (SImode, sub_optab, fnaddr,
+		       plus_constant (Pmode, XEXP (m_tramp, 0), 11),
+		       NULL_RTX, 1, OPTAB_DIRECT);
+
+  mem = adjust_address (m_tramp, SImode, 0);
+  emit_move_insn (mem, gen_int_mode (0xddfc0028, SImode));
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, gen_int_mode (0xdc00edf8, SImode));
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, disp);
+}
+
+/* Output the assembler code for a C++ thunk function.
+   THUNK_DECL is the declaration for the thunk function itself, FUNCTION
+   is the decl for the target function.  DELTA is an immediate constant
+   offset to be added to the THIS parameter.  If VCALL_OFFSET is nonzero
+   the word at the adjusted address *(*THIS' + VCALL_OFFSET) should be
+   additionally added to THIS.  Finally jump to the entry point of
+   FUNCTION.  */
+
+static void
+mn10300_asm_output_mi_thunk (FILE *        file,
+			     tree          thunk_fndecl ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT delta,
+			     HOST_WIDE_INT vcall_offset,
+			     tree          function)
+{
+  const char * _this;
+
+  /* Get the register holding the THIS parameter.  Handle the case
+     where there is a hidden first argument for a returned structure.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    _this = reg_names [FIRST_ARGUMENT_REGNUM + 1];
+  else
+    _this = reg_names [FIRST_ARGUMENT_REGNUM];
+
+  fprintf (file, "\t%s Thunk Entry Point:\n", ASM_COMMENT_START);
+
+  if (delta)
+    fprintf (file, "\tadd %d, %s\n", (int) delta, _this);
+
+  if (vcall_offset)
+    {
+      const char * scratch = reg_names [FIRST_ADDRESS_REGNUM + 1];
+
+      fprintf (file, "\tmov %s, %s\n", _this, scratch);
+      fprintf (file, "\tmov (%s), %s\n", scratch, scratch);
+      fprintf (file, "\tadd %d, %s\n", (int) vcall_offset, scratch);
+      fprintf (file, "\tmov (%s), %s\n", scratch, scratch);
+      fprintf (file, "\tadd %s, %s\n", scratch, _this);
+    }
+
+  fputs ("\tjmp ", file);
+  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+  putc ('\n', file);
+}
+
+/* Return true if mn10300_output_mi_thunk would be able to output the
+   assembler code for the thunk function specified by the arguments
+   it is passed, and false otherwise.  */
+
+static bool
+mn10300_can_output_mi_thunk (const_tree    thunk_fndecl ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT delta        ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			     const_tree    function     ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+bool
+mn10300_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (REGNO_REG_CLASS (regno) == FP_REGS
+      || REGNO_REG_CLASS (regno) == FP_ACC_REGS)
+    /* Do not store integer values in FP registers.  */
+    return GET_MODE_CLASS (mode) == MODE_FLOAT && ((regno & 1) == 0);
+
+  if (! TARGET_AM33 && REGNO_REG_CLASS (regno) == EXTENDED_REGS)
+    return false;
+
+  if (((regno) & 1) == 0 || GET_MODE_SIZE (mode) == 4)
+    return true;
+
+  if (REGNO_REG_CLASS (regno) == DATA_REGS
+      || (TARGET_AM33 && REGNO_REG_CLASS (regno) == ADDRESS_REGS)
+      || REGNO_REG_CLASS (regno) == EXTENDED_REGS)
+    return GET_MODE_SIZE (mode) <= 4;
+  
+  return false;
+}
+
+bool
+mn10300_modes_tieable (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (GET_MODE_CLASS (mode1) == MODE_FLOAT
+      && GET_MODE_CLASS (mode2) != MODE_FLOAT)
+    return false;
+
+  if (GET_MODE_CLASS (mode2) == MODE_FLOAT
+      && GET_MODE_CLASS (mode1) != MODE_FLOAT)
+    return false;
+
+  if (TARGET_AM33
+      || mode1 == mode2
+      || (GET_MODE_SIZE (mode1) <= 4 && GET_MODE_SIZE (mode2) <= 4))
+    return true;
+
+  return false;
+}
+
+static int
+cc_flags_for_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case CCmode:
+      return CC_FLAG_Z | CC_FLAG_N | CC_FLAG_C | CC_FLAG_V;
+    case CCZNCmode:
+      return CC_FLAG_Z | CC_FLAG_N | CC_FLAG_C;
+    case CCZNmode:
+      return CC_FLAG_Z | CC_FLAG_N;
+    case CC_FLOATmode:
+      return -1;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int
+cc_flags_for_code (enum rtx_code code)
+{
+  switch (code)
+    {
+    case EQ:	/* Z */
+    case NE:	/* ~Z */
+      return CC_FLAG_Z;
+
+    case LT:	/* N */
+    case GE:	/* ~N */
+      return CC_FLAG_N;
+      break;
+
+    case GT:    /* ~(Z|(N^V)) */
+    case LE:    /* Z|(N^V) */
+      return CC_FLAG_Z | CC_FLAG_N | CC_FLAG_V;
+
+    case GEU:	/* ~C */
+    case LTU:	/* C */
+      return CC_FLAG_C;
+
+    case GTU:	/* ~(C | Z) */
+    case LEU:	/* C | Z */
+      return CC_FLAG_Z | CC_FLAG_C;
+
+    case ORDERED:
+    case UNORDERED:
+    case LTGT:
+    case UNEQ:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
+      return -1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+enum machine_mode
+mn10300_select_cc_mode (enum rtx_code code, rtx x, rtx y ATTRIBUTE_UNUSED)
+{
+  int req;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    return CC_FLOATmode;
+
+  req = cc_flags_for_code (code);
+
+  if (req & CC_FLAG_V)
+    return CCmode;
+  if (req & CC_FLAG_C)
+    return CCZNCmode;
+  return CCZNmode;
+}
+
+static inline bool
+is_load_insn (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) != SET)
+    return false;
+
+  return MEM_P (SET_SRC (PATTERN (insn)));
+}
+
+static inline bool
+is_store_insn (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) != SET)
+    return false;
+
+  return MEM_P (SET_DEST (PATTERN (insn)));
+}
+
+/* Update scheduling costs for situations that cannot be
+   described using the attributes and DFA machinery.
+   DEP is the insn being scheduled.
+   INSN is the previous insn.
+   COST is the current cycle cost for DEP.  */
+
+static int
+mn10300_adjust_sched_cost (rtx insn, rtx link, rtx dep, int cost)
+{
+  int timings = get_attr_timings (insn);
+
+  if (!TARGET_AM33)
+    return 1;
+
+  if (GET_CODE (insn) == PARALLEL)
+    insn = XVECEXP (insn, 0, 0);
+
+  if (GET_CODE (dep) == PARALLEL)
+    dep = XVECEXP (dep, 0, 0);
+
+  /* For the AM34 a load instruction that follows a
+     store instruction incurs an extra cycle of delay.  */
+  if (mn10300_tune_cpu == PROCESSOR_AM34
+      && is_load_insn (dep)
+      && is_store_insn (insn))
+    cost += 1;
+
+  /* For the AM34 a non-store, non-branch FPU insn that follows
+     another FPU insn incurs a one cycle throughput increase.  */
+  else if (mn10300_tune_cpu == PROCESSOR_AM34
+      && ! is_store_insn (insn)
+      && ! JUMP_P (insn)
+      && GET_CODE (PATTERN (dep)) == SET
+      && GET_CODE (PATTERN (insn)) == SET
+      && GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (dep)))) == MODE_FLOAT
+      && GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
+    cost += 1;
+
+  /*  Resolve the conflict described in section 1-7-4 of
+      Chapter 3 of the MN103E Series Instruction Manual
+      where it says:
+
+        "When the preceding instruction is a CPU load or
+	 store instruction, a following FPU instruction
+	 cannot be executed until the CPU completes the
+	 latency period even though there are no register
+	 or flag dependencies between them."  */
+
+  /* Only the AM33-2 (and later) CPUs have FPU instructions.  */
+  if (! TARGET_AM33_2)
+    return cost;
+
+  /* If a data dependence already exists then the cost is correct.  */
+  if (REG_NOTE_KIND (link) == 0)
+    return cost;
+
+  /* Check that the instruction about to scheduled is an FPU instruction.  */
+  if (GET_CODE (PATTERN (dep)) != SET)
+    return cost;
+
+  if (GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (dep)))) != MODE_FLOAT)
+    return cost;
+
+  /* Now check to see if the previous instruction is a load or store.  */
+  if (! is_load_insn (insn) && ! is_store_insn (insn))
+    return cost;
+
+  /* XXX: Verify: The text of 1-7-4 implies that the restriction
+     only applies when an INTEGER load/store precedes an FPU
+     instruction, but is this true ?  For now we assume that it is.  */
+  if (GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (insn)))) != MODE_INT)
+    return cost;
+
+  /* Extract the latency value from the timings attribute.  */
+  return timings < 100 ? (timings % 10) : (timings % 100);
+}
+
+static void
+mn10300_conditional_register_usage (void)
+{
+  unsigned int i;
+
+  if (!TARGET_AM33)
+    {
+      for (i = FIRST_EXTENDED_REGNUM;
+	   i <= LAST_EXTENDED_REGNUM; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (!TARGET_AM33_2)
+    {
+      for (i = FIRST_FP_REGNUM;
+	   i <= LAST_FP_REGNUM; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (flag_pic)
+    fixed_regs[PIC_OFFSET_TABLE_REGNUM] =
+    call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+   We do this in the mn10300 backend to maintain source compatibility
+   with the old cc0-based compiler.  */
+
+static tree
+mn10300_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+                         tree inputs ATTRIBUTE_UNUSED,
+                         tree clobbers)
+{
+  clobbers = tree_cons (NULL_TREE, build_string (5, "EPSW"),
+                        clobbers);
+  return clobbers;
+}
+
+/* A helper function for splitting cbranch patterns after reload.  */
+
+void
+mn10300_split_cbranch (enum machine_mode cmp_mode, rtx cmp_op, rtx label_ref)
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (cmp_mode, CC_REG);
+  x = gen_rtx_COMPARE (cmp_mode, XEXP (cmp_op, 0), XEXP (cmp_op, 1));
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (GET_CODE (cmp_op), VOIDmode, flags, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label_ref, pc_rtx);
+  x = gen_rtx_SET (VOIDmode, pc_rtx, x);
+  emit_jump_insn (x);
+}
+
+/* A helper function for matching parallels that set the flags.  */
+
+bool
+mn10300_match_ccmode (rtx insn, enum machine_mode cc_mode)
+{
+  rtx op1, flags;
+  enum machine_mode flags_mode;
+
+  gcc_checking_assert (XVECLEN (PATTERN (insn), 0) == 2);
+
+  op1 = XVECEXP (PATTERN (insn), 0, 1);
+  gcc_checking_assert (GET_CODE (SET_SRC (op1)) == COMPARE);
+
+  flags = SET_DEST (op1);
+  flags_mode = GET_MODE (flags);
+
+  if (GET_MODE (SET_SRC (op1)) != flags_mode)
+    return false;
+  if (GET_MODE_CLASS (flags_mode) != MODE_CC)
+    return false;
+
+  /* Ensure that the mode of FLAGS is compatible with CC_MODE.  */
+  if (cc_flags_for_mode (flags_mode) & ~cc_flags_for_mode (cc_mode))
+    return false;
+
+  return true;
+}
+
+/* This function is used to help split:
+   
+     (set (reg) (and (reg) (int)))
+     
+   into:
+   
+     (set (reg) (shift (reg) (int))
+     (set (reg) (shift (reg) (int))
+     
+   where the shitfs will be shorter than the "and" insn.
+
+   It returns the number of bits that should be shifted.  A positive
+   values means that the low bits are to be cleared (and hence the
+   shifts should be right followed by left) whereas a negative value
+   means that the high bits are to be cleared (left followed by right).
+   Zero is returned when it would not be economical to split the AND.  */
+
+int
+mn10300_split_and_operand_count (rtx op)
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int count;
+
+  if (val < 0)
+    {
+      /* High bit is set, look for bits clear at the bottom.  */
+      count = exact_log2 (-val);
+      if (count < 0)
+	return 0;
+      /* This is only size win if we can use the asl2 insn.  Otherwise we
+	 would be replacing 1 6-byte insn with 2 3-byte insns.  */
+      if (count > (optimize_insn_for_speed_p () ? 2 : 4))
+	return 0;
+      return count;
+    }
+  else
+    {
+      /* High bit is clear, look for bits set at the bottom.  */
+      count = exact_log2 (val + 1);
+      count = 32 - count;
+      /* Again, this is only a size win with asl2.  */
+      if (count > (optimize_insn_for_speed_p () ? 2 : 4))
+	return 0;
+      return -count;
+    }
+}
+
+struct liw_data
+{
+  enum attr_liw slot;
+  enum attr_liw_op op;
+  rtx dest;
+  rtx src;
+};
+
+/* Decide if the given insn is a candidate for LIW bundling.  If it is then
+   extract the operands and LIW attributes from the insn and use them to fill
+   in the liw_data structure.  Return true upon success or false if the insn
+   cannot be bundled.  */
+
+static bool
+extract_bundle (rtx insn, struct liw_data * pdata)
+{
+  bool allow_consts = true;
+  rtx p;
+
+  gcc_assert (pdata != NULL);
+
+  if (insn == NULL_RTX)
+    return false;
+  /* Make sure that we are dealing with a simple SET insn.  */
+  p = single_set (insn);
+  if (p == NULL_RTX)
+    return false;
+
+  /* Make sure that it could go into one of the LIW pipelines.  */
+  pdata->slot = get_attr_liw (insn);
+  if (pdata->slot == LIW_BOTH)
+    return false;
+
+  pdata->op = get_attr_liw_op (insn);
+
+  switch (pdata->op)
+    {
+    case LIW_OP_MOV:
+      pdata->dest = SET_DEST (p);
+      pdata->src = SET_SRC (p);
+      break;
+    case LIW_OP_CMP:
+      pdata->dest = XEXP (SET_SRC (p), 0);
+      pdata->src = XEXP (SET_SRC (p), 1);
+      break;
+    case LIW_OP_NONE:
+      return false;
+    case LIW_OP_AND:
+    case LIW_OP_OR:
+    case LIW_OP_XOR:
+      /* The AND, OR and XOR long instruction words only accept register arguments.  */
+      allow_consts = false;
+      /* Fall through.  */
+    default:
+      pdata->dest = SET_DEST (p);
+      pdata->src = XEXP (SET_SRC (p), 1);
+      break;
+    }
+
+  if (! REG_P (pdata->dest))
+    return false;
+
+  if (REG_P (pdata->src))
+    return true;
+
+  return allow_consts && satisfies_constraint_O (pdata->src);
+}
+
+/* Make sure that it is OK to execute LIW1 and LIW2 in parallel.  GCC generated
+   the instructions with the assumption that LIW1 would be executed before LIW2
+   so we must check for overlaps between their sources and destinations.  */
+
+static bool
+check_liw_constraints (struct liw_data * pliw1, struct liw_data * pliw2)
+{
+  /* Check for slot conflicts.  */
+  if (pliw2->slot == pliw1->slot && pliw1->slot != LIW_EITHER)
+    return false;
+
+  /* If either operation is a compare, then "dest" is really an input; the real
+     destination is CC_REG.  So these instructions need different checks.  */
+
+  /* Changing "CMP ; OP" into "CMP | OP" is OK because the comparison will
+     check its values prior to any changes made by OP.  */
+  if (pliw1->op == LIW_OP_CMP)
+    {
+      /* Two sequential comparisons means dead code, which ought to 
+         have been eliminated given that bundling only happens with
+         optimization.  We cannot bundle them in any case.  */
+      gcc_assert (pliw1->op != pliw2->op);
+      return true;
+    }
+
+  /* Changing "OP ; CMP" into "OP | CMP" does not work if the value being compared
+     is the destination of OP, as the CMP will look at the old value, not the new
+     one.  */
+  if (pliw2->op == LIW_OP_CMP)
+    {
+      if (REGNO (pliw2->dest) == REGNO (pliw1->dest))
+	return false;
+
+      if (REG_P (pliw2->src))
+	return REGNO (pliw2->src) != REGNO (pliw1->dest);
+
+      return true;
+    }
+
+  /* Changing "OP1 ; OP2" into "OP1 | OP2" does not work if they both write to the
+     same destination register.  */
+  if (REGNO (pliw2->dest) == REGNO (pliw1->dest))
+    return false;
+
+  /* Changing "OP1 ; OP2" into "OP1 | OP2" generally does not work if the destination
+     of OP1 is the source of OP2.  The exception is when OP1 is a MOVE instruction when
+     we can replace the source in OP2 with the source of OP1.  */
+  if (REG_P (pliw2->src) && REGNO (pliw2->src) == REGNO (pliw1->dest))
+    {
+      if (pliw1->op == LIW_OP_MOV && REG_P (pliw1->src))
+	{
+	  if (! REG_P (pliw1->src)
+	      && (pliw2->op == LIW_OP_AND
+		  || pliw2->op == LIW_OP_OR
+		  || pliw2->op == LIW_OP_XOR))
+	    return false;
+		  
+	  pliw2->src = pliw1->src;
+	  return true;
+	}
+      return false;
+    }
+
+  /* Everything else is OK.  */
+  return true;
+}
+
+/* Combine pairs of insns into LIW bundles.  */
+
+static void
+mn10300_bundle_liw (void)
+{
+  rtx r;
+
+  for (r = get_insns (); r != NULL_RTX; r = next_nonnote_nondebug_insn (r))
+    {
+      rtx insn1, insn2;
+      struct liw_data liw1, liw2;
+
+      insn1 = r;
+      if (! extract_bundle (insn1, & liw1))
+	continue;
+
+      insn2 = next_nonnote_nondebug_insn (insn1);
+      if (! extract_bundle (insn2, & liw2))
+	continue;
+
+      /* Check for source/destination overlap.  */
+      if (! check_liw_constraints (& liw1, & liw2))
+	continue;
+
+      if (liw1.slot == LIW_OP2 || liw2.slot == LIW_OP1)
+	{
+	  struct liw_data temp;
+	  
+	  temp = liw1;
+	  liw1 = liw2;
+	  liw2 = temp;
+	}
+
+      delete_insn (insn2);
+
+      if (liw1.op == LIW_OP_CMP)
+	insn2 = gen_cmp_liw (liw2.dest, liw2.src, liw1.dest, liw1.src,
+			     GEN_INT (liw2.op));
+      else if (liw2.op == LIW_OP_CMP)
+	insn2 = gen_liw_cmp (liw1.dest, liw1.src, liw2.dest, liw2.src,
+			     GEN_INT (liw1.op));
+      else
+	insn2 = gen_liw (liw1.dest, liw2.dest, liw1.src, liw2.src,
+			 GEN_INT (liw1.op), GEN_INT (liw2.op));
+
+      insn2 = emit_insn_after (insn2, insn1);
+      delete_insn (insn1);
+      r = insn2;
+    }
+}
+
+#define DUMP(reason, insn)			\
+  do						\
+    {						\
+      if (dump_file)				\
+	{					\
+	  fprintf (dump_file, reason "\n");	\
+	  if (insn != NULL_RTX)			\
+	    print_rtl_single (dump_file, insn);	\
+	  fprintf(dump_file, "\n");		\
+	}					\
+    }						\
+  while (0)
+
+/* Replace the BRANCH insn with a Lcc insn that goes to LABEL.
+   Insert a SETLB insn just before LABEL.  */
+
+static void
+mn10300_insert_setlb_lcc (rtx label, rtx branch)
+{
+  rtx lcc, comparison, cmp_reg;
+
+  if (LABEL_NUSES (label) > 1)
+    {
+      rtx insn;
+
+      /* This label is used both as an entry point to the loop
+	 and as a loop-back point for the loop.  We need to separate
+	 these two functions so that the SETLB happens upon entry,
+	 but the loop-back does not go to the SETLB instruction.  */
+      DUMP ("Inserting SETLB insn after:", label);
+      insn = emit_insn_after (gen_setlb (), label);
+      label = gen_label_rtx ();
+      emit_label_after (label, insn);
+      DUMP ("Created new loop-back label:", label);
+    }
+  else
+    {
+      DUMP ("Inserting SETLB insn before:", label);
+      emit_insn_before (gen_setlb (), label);
+    }
+
+  comparison = XEXP (SET_SRC (PATTERN (branch)), 0);
+  cmp_reg = XEXP (comparison, 0);
+  gcc_assert (REG_P (cmp_reg));
+
+  /* If the comparison has not already been split out of the branch
+     then do so now.  */
+  gcc_assert (REGNO (cmp_reg) == CC_REG);
+
+  if (GET_MODE (cmp_reg) == CC_FLOATmode)
+    lcc = gen_FLcc (comparison, label);
+  else
+    lcc = gen_Lcc (comparison, label);    
+
+  lcc = emit_jump_insn_before (lcc, branch);
+  mark_jump_label (XVECEXP (PATTERN (lcc), 0, 0), lcc, 0);
+  JUMP_LABEL (lcc) = label;
+  DUMP ("Replacing branch insn...", branch);
+  DUMP ("... with Lcc insn:", lcc);  
+  delete_insn (branch);
+}
+
+static bool
+mn10300_block_contains_call (basic_block block)
+{
+  rtx insn;
+
+  FOR_BB_INSNS (block, insn)
+    if (CALL_P (insn))
+      return true;
+
+  return false;
+}
+
+static bool
+mn10300_loop_contains_call_insn (loop_p loop)
+{
+  basic_block * bbs;
+  bool result = false;
+  unsigned int i;
+
+  bbs = get_loop_body (loop);
+
+  for (i = 0; i < loop->num_nodes; i++)
+    if (mn10300_block_contains_call (bbs[i]))
+      {
+	result = true;
+	break;
+      }
+
+  free (bbs);
+  return result;
+}
+
+static void
+mn10300_scan_for_setlb_lcc (void)
+{
+  loop_p loop;
+
+  DUMP ("Looking for loops that can use the SETLB insn", NULL_RTX);
+
+  df_analyze ();
+  compute_bb_for_insn ();
+
+  /* Find the loops.  */
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+  /* FIXME: For now we only investigate innermost loops.  In practice however
+     if an inner loop is not suitable for use with the SETLB/Lcc insns, it may
+     be the case that its parent loop is suitable.  Thus we should check all
+     loops, but work from the innermost outwards.  */
+  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
+    {
+      const char * reason = NULL;
+
+      /* Check to see if we can modify this loop.  If we cannot
+	 then set 'reason' to describe why it could not be done.  */
+      if (loop->latch == NULL)
+	reason = "it contains multiple latches";
+      else if (loop->header != loop->latch)
+	/* FIXME: We could handle loops that span multiple blocks,
+	   but this requires a lot more work tracking down the branches
+	   that need altering, so for now keep things simple.  */
+	reason = "the loop spans multiple blocks";
+      else if (mn10300_loop_contains_call_insn (loop))
+	reason = "it contains CALL insns";
+      else
+	{
+	  rtx branch = BB_END (loop->latch);
+
+	  gcc_assert (JUMP_P (branch));
+	  if (single_set (branch) == NULL_RTX || ! any_condjump_p (branch))
+	    /* We cannot optimize tablejumps and the like.  */
+	    /* FIXME: We could handle unconditional jumps.  */
+	    reason = "it is not a simple loop";
+	  else
+	    {
+	      rtx label;
+
+	      if (dump_file)
+		flow_loop_dump (loop, dump_file, NULL, 0);
+
+	      label = BB_HEAD (loop->header);
+	      gcc_assert (LABEL_P (label));
+
+	      mn10300_insert_setlb_lcc (label, branch);
+	    }
+	}
+
+      if (dump_file && reason != NULL)
+	fprintf (dump_file, "Loop starting with insn %d is not suitable because %s\n",
+		 INSN_UID (BB_HEAD (loop->header)),
+		 reason);
+    }
+
+  loop_optimizer_finalize ();
+
+  df_finish_pass (false);  
+
+  DUMP ("SETLB scan complete", NULL_RTX);
+}
+
+static void
+mn10300_reorg (void)
+{
+  /* These are optimizations, so only run them if optimizing.  */
+  if (TARGET_AM33 && (optimize > 0 || optimize_size))
+    {
+      if (TARGET_ALLOW_SETLB)
+	mn10300_scan_for_setlb_lcc ();
+
+      if (TARGET_ALLOW_LIW)
+	mn10300_bundle_liw ();
+    }
+}
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG mn10300_reorg
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef  TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS mn10300_legitimize_address
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST  mn10300_address_cost
+#undef  TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST  mn10300_register_move_cost
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST  mn10300_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS mn10300_rtx_costs
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START mn10300_file_start
+#undef  TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA mn10300_asm_output_addr_const_extra
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mn10300_option_override
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO mn10300_encode_section_info
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY mn10300_return_in_memory
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE mn10300_pass_by_reference
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES mn10300_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG mn10300_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE mn10300_function_arg_advance
+
+#undef  TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS mn10300_builtin_saveregs
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START mn10300_va_start
+
+#undef  TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD mn10300_case_values_threshold
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	mn10300_legitimate_address_p
+#undef  TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS	mn10300_delegitimize_address
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P	mn10300_legitimate_constant_p
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS mn10300_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS \
+  mn10300_preferred_output_reload_class
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD  mn10300_secondary_reload
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT mn10300_trampoline_init
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mn10300_function_value
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mn10300_libcall_value
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK      mn10300_asm_output_mi_thunk
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK  mn10300_can_output_mi_thunk
+
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST mn10300_adjust_sched_cost
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE mn10300_conditional_register_usage
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS  mn10300_md_asm_clobbers
+
+#undef  TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM  CC_REG
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300.h b/gcc-4.9/gcc/config/mn10300/mn10300.h
new file mode 100644
index 000000000..1d911045f
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300.h
@@ -0,0 +1,732 @@
+/* Definitions of target machine for GNU compiler.
+   Matsushita MN10300 series
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef ASM_SPEC
+#undef LIB_SPEC
+#undef ENDFILE_SPEC
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mrelax:%{!r:--relax}}"
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!mno-crt0:%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}"
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__mn10300__");		\
+      builtin_define ("__MN10300__");		\
+      builtin_assert ("cpu=mn10300");		\
+      builtin_assert ("machine=mn10300");	\
+						\
+      if (TARGET_AM34)				\
+        { 					\
+          builtin_define ("__AM33__=4");	\
+          builtin_define ("__AM34__");		\
+        }					\
+      else if (TARGET_AM33_2)			\
+        { 					\
+          builtin_define ("__AM33__=2");	\
+          builtin_define ("__AM33_2__");	\
+        }					\
+      else if (TARGET_AM33)			\
+        builtin_define ("__AM33__=1");		\
+						\
+      builtin_define (TARGET_ALLOW_LIW ?	\
+		      "__LIW__" : "__NO_LIW__");\
+						\
+      builtin_define (TARGET_ALLOW_SETLB  ?	\
+		      "__SETLB__" : "__NO_SETLB__");\
+    }						\
+  while (0)
+
+#ifndef MN10300_OPTS_H
+#include "config/mn10300/mn10300-opts.h"
+#endif
+
+extern enum processor_type mn10300_tune_cpu;
+
+#define TARGET_AM33	(mn10300_processor >= PROCESSOR_AM33)
+#define TARGET_AM33_2	(mn10300_processor >= PROCESSOR_AM33_2)
+#define TARGET_AM34	(mn10300_processor >= PROCESSOR_AM34)
+
+#ifndef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_MN10300
+#endif
+
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the Matsushita MN1003.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* This is not true on the Matsushita MN10300.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+   This is not true on the Matsushita MN10300.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		4
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY		32
+
+/* The stack goes in 32-bit lumps.  */
+#define STACK_BOUNDARY 		32
+
+/* Allocation boundary (in *bits*) for the code of a function.
+   8 is the minimum boundary; it's unclear if bigger alignments
+   would improve performance.  */
+#define FUNCTION_BOUNDARY 8
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT	32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY    32
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define FIRST_PSEUDO_REGISTER 52
+
+/* Specify machine-specific register numbers.  The commented out entries
+   are defined in mn10300.md.  */
+#define FIRST_DATA_REGNUM      0
+#define LAST_DATA_REGNUM       3
+#define FIRST_ADDRESS_REGNUM   4
+/* #define PIC_REG             6 */
+#define LAST_ADDRESS_REGNUM    8
+/* #define SP_REG              9 */
+#define FIRST_EXTENDED_REGNUM 10
+#define LAST_EXTENDED_REGNUM  17
+#define FIRST_FP_REGNUM       18
+#define LAST_FP_REGNUM        49
+/* #define MDR_REG            50 */
+/* #define CC_REG             51 */
+#define FIRST_ARGUMENT_REGNUM  0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (LAST_ADDRESS_REGNUM + 1)
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM (LAST_ADDRESS_REGNUM - 1)
+
+/* Base register for access to arguments of the function.  This
+   is a fake register and will be eliminated into either the frame
+   pointer or stack pointer.  */
+#define ARG_POINTER_REGNUM LAST_ADDRESS_REGNUM
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (FIRST_ADDRESS_REGNUM + 1)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS \
+  { 0, 0, 0, 0,				/* data regs */		\
+    0, 0, 0, 0,				/* addr regs */		\
+    1,					/* arg reg */		\
+    1,					/* sp reg */		\
+    0, 0, 0, 0, 0, 0, 0, 0,		/* extended regs */	\
+    0, 0,				/* fp regs (18-19) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (20-29) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (30-39) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (40-49) */	\
+    0,					/* mdr reg */		\
+    1					/* cc reg */		\
+  }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you
+   like.  */
+
+#define CALL_USED_REGISTERS \
+  { 1, 1, 0, 0,				/* data regs */		\
+    1, 1, 0, 0,				/* addr regs */		\
+    1,					/* arg reg */		\
+    1,					/* sp reg */		\
+    1, 1, 1, 1, 0, 0, 0, 0,		/* extended regs */	\
+    1, 1,				/* fp regs (18-19) */	\
+    1, 1, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (20-29) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1,	/* fp regs (30-39) */	\
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* fp regs (40-49) */	\
+    1,					/* mdr reg */		\
+    1					/* cc reg */		\
+  }
+
+/* Note: The definition of CALL_REALLY_USED_REGISTERS is not
+   redundant.  It is needed when compiling in PIC mode because
+   the a2 register becomes fixed (and hence must be marked as
+   call_used) but in order to preserve the ABI it is not marked
+   as call_really_used.  */
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+#define REG_ALLOC_ORDER \
+  { 0, 1, 4, 5, 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 8, 9 \
+  , 42, 43, 44, 45, 46, 47, 48, 49, 34, 35, 36, 37, 38, 39, 40, 41 \
+  , 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 50, 51 \
+  }
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  mn10300_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  mn10300_modes_tieable ((MODE1), (MODE2))
+
+/* 4 data, and effectively 3 address registers is small as far as I'm
+   concerned.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class
+{
+  NO_REGS, DATA_REGS, ADDRESS_REGS, SP_REGS, SP_OR_ADDRESS_REGS,
+  EXTENDED_REGS, FP_REGS, FP_ACC_REGS, CC_REGS, MDR_REGS,
+  GENERAL_REGS, SP_OR_GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES					   		\
+{ "NO_REGS", "DATA_REGS", "ADDRESS_REGS", "SP_REGS", "SP_OR_ADDRESS_REGS", \
+  "EXTENDED_REGS", "FP_REGS", "FP_ACC_REGS", "CC_REGS", "MDR_REGS",	\
+  "GENERAL_REGS", "SP_OR_GENERAL_REGS", "ALL_REGS", "LIM_REGS"		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS					\
+{ { 0,	        0 },	  /* No regs */				\
+  { 0x0000000f, 0 },	  /* DATA_REGS */			\
+  { 0x000001f0, 0 },	  /* ADDRESS_REGS */			\
+  { 0x00000200, 0 },	  /* SP_REGS */				\
+  { 0x000003f0, 0 },	  /* SP_OR_ADDRESS_REGS */		\
+  { 0x0003fc00, 0 },	  /* EXTENDED_REGS */			\
+  { 0xfffc0000, 0x3ffff },/* FP_REGS */				\
+  { 0x03fc0000, 0 },	  /* FP_ACC_REGS */			\
+  { 0x00000000, 0x80000 },/* CC_REGS */				\
+  { 0x00000000, 0x40000 },/* MDR_REGS */			\
+  { 0x0003fdff, 0 }, 	  /* GENERAL_REGS */			\
+  { 0x0003ffff, 0 },      /* SP_OR_GENERAL_REGS */		\
+  { 0xffffffff, 0xfffff } /* ALL_REGS */			\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)			     \
+  ((REGNO) <= LAST_DATA_REGNUM ? DATA_REGS :	     \
+   (REGNO) <= LAST_ADDRESS_REGNUM ? ADDRESS_REGS :   \
+   (REGNO) == STACK_POINTER_REGNUM ? SP_REGS :	     \
+   (REGNO) <= LAST_EXTENDED_REGNUM ? EXTENDED_REGS : \
+   (REGNO) <= LAST_FP_REGNUM ? FP_REGS :	     \
+   (REGNO) == MDR_REG ? MDR_REGS :		     \
+   (REGNO) == CC_REG ? CC_REGS :		     \
+   NO_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS \
+  (TARGET_AM33 ? GENERAL_REGS : DATA_REGS)
+#define BASE_REG_CLASS \
+  (TARGET_AM33 ? SP_OR_GENERAL_REGS : SP_OR_ADDRESS_REGS)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#ifndef REG_OK_STRICT
+# define REG_STRICT 0
+#else
+# define REG_STRICT 1
+#endif
+
+#define REGNO_DATA_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, DATA_REGS, strict)
+#define REGNO_ADDRESS_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, ADDRESS_REGS, strict)
+#define REGNO_EXTENDED_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, EXTENDED_REGS, strict)
+#define REGNO_GENERAL_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, GENERAL_REGS, strict)
+
+#define REGNO_STRICT_OK_FOR_BASE_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, BASE_REG_CLASS, strict)
+#define REGNO_OK_FOR_BASE_P(regno) \
+  (REGNO_STRICT_OK_FOR_BASE_P ((regno), REG_STRICT))
+#define REG_OK_FOR_BASE_P(X) \
+  (REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+#define REGNO_STRICT_OK_FOR_BIT_BASE_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, ADDRESS_REGS, strict)
+#define REGNO_OK_FOR_BIT_BASE_P(regno) \
+  (REGNO_STRICT_OK_FOR_BIT_BASE_P ((regno), REG_STRICT))
+#define REG_OK_FOR_BIT_BASE_P(X) \
+  (REGNO_OK_FOR_BIT_BASE_P (REGNO (X)))
+
+#define REGNO_STRICT_OK_FOR_INDEX_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, INDEX_REG_CLASS, strict)
+#define REGNO_OK_FOR_INDEX_P(regno) \
+  (REGNO_STRICT_OK_FOR_INDEX_P ((regno), REG_STRICT))
+#define REG_OK_FOR_INDEX_P(X) \
+  (REGNO_OK_FOR_INDEX_P (REGNO (X)))
+
+#define LIMIT_RELOAD_CLASS(MODE, CLASS) \
+  (!TARGET_AM33 && (MODE == QImode || MODE == HImode) ? DATA_REGS : CLASS)
+
+/* A class that contains registers which the compiler must always
+   access in a mode that is the same size as the mode in which it
+   loaded the register.  */
+#define CLASS_CANNOT_CHANGE_SIZE FP_REGS
+
+/* Return 1 if VALUE is in the range specified.  */
+
+#define INT_8_BITS(VALUE) ((unsigned) (VALUE) + 0x80 < 0x100)
+#define INT_16_BITS(VALUE) ((unsigned) (VALUE) + 0x8000 < 0x10000)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* Is equal to the size of the saved fp + pc, even if an fp isn't
+   saved since the value is used before we know.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 4
+
+/* But the CFA is at the arg pointer directly, not at the first argument.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL) 0
+
+#define ELIMINABLE_REGS				\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = mn10300_initial_offset (FROM, TO)
+
+/* We use d0/d1 for passing parameters, so allocate 8 bytes of space
+   for a register flushback area.  */
+#define REG_PARM_STACK_SPACE(DECL) 8
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* So we can allocate space for return pointers once for the function
+   instead of around every call.  */
+#define STACK_POINTER_OFFSET 4
+
+/* 1 if N is a possible register number for function argument passing.
+   On the MN10300, d0 and d1 are used in this way.  */
+
+#define FUNCTION_ARG_REGNO_P(N) ((N) <= 1)
+
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the MN10300, this is a single integer, which is a number of bytes
+   of arguments scanned so far.  */
+
+#define CUMULATIVE_ARGS struct cum_arg
+
+struct cum_arg
+{
+  int nbytes;
+};
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On the MN10300, the offset starts at 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).nbytes = 0)
+
+#define FUNCTION_VALUE_REGNO_P(N)  mn10300_function_value_regno_p (N)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) ;
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE		16
+#define TRAMPOLINE_ALIGNMENT	32
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+
+   On the mn10300, the return address is not at a constant location
+   due to the frame layout.  Luckily, it is at a constant offset from
+   the argument pointer, so we define RETURN_ADDR_RTX to return a
+   MEM using arg_pointer_rtx.  Reload will replace arg_pointer_rtx
+   with a reference to the stack/frame pointer + an appropriate offset.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)   \
+  ((COUNT == 0)                         \
+   ? gen_rtx_MEM (Pmode, arg_pointer_rtx) \
+   : (rtx) 0)
+
+/* The return address is saved both in the stack and in MDR.  Using
+   the stack location is handiest for what unwinding needs.  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* We have post-increments.  */
+#define HAVE_POST_INCREMENT	TARGET_AM33
+#define HAVE_POST_MODIFY_DISP	TARGET_AM33
+
+/* ... But we don't want to use them for block moves.  Small offsets are
+   just as effective, at least for inline block move sizes, and appears
+   to produce cleaner code.  */
+#define USE_LOAD_POST_INCREMENT(M)	0
+#define USE_STORE_POST_INCREMENT(M)	0
+
+/* Accept either REG or SUBREG where a register is valid.  */
+
+#define RTX_OK_FOR_BASE_P(X, strict)				\
+  ((REG_P (X) && REGNO_STRICT_OK_FOR_BASE_P (REGNO (X),		\
+ 					     (strict))) 	\
+   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))		\
+       && REGNO_STRICT_OK_FOR_BASE_P (REGNO (SUBREG_REG (X)),	\
+ 				      (strict))))
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN)		     \
+do {									     \
+  rtx new_x = mn10300_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \
+  if (new_x)								     \
+    {									     \
+      X = new_x;							     \
+      goto WIN;								     \
+    }									     \
+} while (0)
+
+
+/* Zero if this needs fixing up to become PIC.  */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) \
+  mn10300_legitimate_pic_operand_p (X)
+
+/* Register to hold the addressing base for
+   position independent code access to data items.  */
+#define PIC_OFFSET_TABLE_REGNUM	PIC_REG
+
+/* The name of the pseudo-symbol representing the Global Offset Table.  */
+#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
+
+#define SYMBOLIC_CONST_P(X)	\
+((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == LABEL_REF)	\
+  && ! LEGITIMATE_PIC_OPERAND_P (X))
+
+/* Non-global SYMBOL_REFs have SYMBOL_REF_FLAG enabled.  */
+#define MN10300_GLOBAL_P(X) (! SYMBOL_REF_FLAG (X))
+
+#define SELECT_CC_MODE(OP, X, Y)  mn10300_select_cc_mode (OP, X, Y)
+#define REVERSIBLE_CC_MODE(MODE)  0
+
+/* Nonzero if access to memory by bytes or half words is no faster
+   than accessing full words.  */
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE
+
+/* According expr.c, a value of around 6 should minimize code size, and
+   for the MN10300 series, that's our primary concern.  */
+#define MOVE_RATIO(speed) 6
+
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+#define DATA_SECTION_ASM_OP "\t.section .data"
+#define BSS_SECTION_ASM_OP  "\t.section .bss"
+
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* This says how to output the assembler to define a global
+   uninitialized but not common symbol.
+   Try to use asm_output_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  asm_fprintf (FILE, "%U%s", (*targetm.strip_name_encoding) (NAME))
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+  do					 \
+    {					 \
+      assemble_name (FILE, NAME1);	 \
+      fputs (" = ", FILE);		 \
+      assemble_name (FILE, NAME2);	 \
+      fputc ('\n', FILE);		 \
+    }					 \
+  while (0)
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES							\
+{ "d0", "d1", "d2", "d3", "a0", "a1", "a2", "a3", "ap", "sp",		\
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7"			\
+, "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"		\
+, "fs8", "fs9", "fs10", "fs11", "fs12", "fs13", "fs14", "fs15"		\
+, "fs16", "fs17", "fs18", "fs19", "fs20", "fs21", "fs22", "fs23"	\
+, "fs24", "fs25", "fs26", "fs27", "fs28", "fs29", "fs30", "fs31"	\
+, "mdr", "EPSW"								\
+}
+
+#define ADDITIONAL_REGISTER_NAMES				\
+{ {"r8",  4}, {"r9",  5}, {"r10", 6}, {"r11", 7},		\
+  {"r12", 0}, {"r13", 1}, {"r14", 2}, {"r15", 3},		\
+  {"e0", 10}, {"e1", 11}, {"e2", 12}, {"e3", 13},		\
+  {"e4", 14}, {"e5", 15}, {"e6", 16}, {"e7", 17}		\
+, {"fd0", 18}, {"fd2", 20}, {"fd4", 22}, {"fd6", 24}		\
+, {"fd8", 26}, {"fd10", 28}, {"fd12", 30}, {"fd14", 32}		\
+, {"fd16", 34}, {"fd18", 36}, {"fd20", 38}, {"fd22", 40}	\
+, {"fd24", 42}, {"fd26", 44}, {"fd28", 46}, {"fd30", 48}	\
+, {"cc", CC_REG}						\
+}
+
+/* Print an instruction operand X on file FILE.
+   look in mn10300.c for details */
+
+#define PRINT_OPERAND(FILE, X, CODE) \
+  mn10300_print_operand (FILE, X, CODE)
+
+/* Print a memory operand whose address is X, on file FILE.
+   This uses a function in output-vax.c.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+  mn10300_print_operand_address (FILE, ADDR)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t%s .L%d\n", ".long", VALUE)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t%s .L%d-.L%d\n", ".long", VALUE, REL)
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* We don't have to worry about dbx compatibility for the mn10300.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Use dwarf2 debugging info by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX	4
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* The assembler op to get a word.  */
+
+#define FILE_ASM_OP "\t.file\n"
+
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300.md b/gcc-4.9/gcc/config/mn10300/mn10300.md
new file mode 100644
index 000000000..6b6381cc3
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300.md
@@ -0,0 +1,2217 @@
+;; GCC machine description for Matsushita MN10300
+;; Copyright (C) 1996-2014 Free Software Foundation, Inc.
+;; Contributed by Jeff Law (law@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+(define_constants [
+  (PIC_REG   6)
+  (SP_REG    9)
+  (MDR_REG  50)
+  (CC_REG   51)
+
+  (UNSPEC_PIC		1)
+  (UNSPEC_GOT		2)
+  (UNSPEC_GOTOFF	3)
+  (UNSPEC_PLT		4)
+  (UNSPEC_GOTSYM_OFF	5)
+
+  (UNSPEC_EXT		6)
+  (UNSPEC_BSCH		7)
+
+  ;; This is used to encode LIW patterns.
+  (UNSPEC_LIW		8)
+  ;; This is for the low overhead loop instructions.
+  (UNSPEC_SETLB         9)
+])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in mn10300.h.
+(define_attr "cpu" "mn10300,am33,am33_2,am34"
+  (const (symbol_ref "(enum attr_cpu) mn10300_tune_cpu")))
+
+;; Used to control the "enabled" attribute on a per-instruction basis.
+(define_attr "isa" "base,am33,am33_2,am34"
+  (const_string "base"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "isa" "base")
+         (const_int 1)
+
+         (and (eq_attr "isa" "am33")
+	      (match_test "TARGET_AM33"))
+         (const_int 1)
+
+         (and (eq_attr "isa" "am33_2")
+	      (match_test "TARGET_AM33_2"))
+         (const_int 1)
+        
+         (and (eq_attr "isa" "am34")
+	      (match_test "TARGET_AM34"))
+         (const_int 1)
+	]
+	(const_int 0))
+)
+
+(define_mode_iterator INT [QI HI SI])
+
+
+;; Bundling of smaller insns into a long instruction word (LIW)
+(define_automaton "liw_bundling")
+(automata_option "ndfa")
+
+(define_cpu_unit "liw_op1_u,liw_op2_u" "liw_bundling")
+
+(define_attr "liw" "op1,op2,both,either"
+  (const_string "both"))
+;; Note: this list must match the one defined for liw_op_names[].
+(define_attr "liw_op" "add,cmp,sub,mov,and,or,xor,asr,lsr,asl,none,max"
+  (const_string "none"))
+
+(define_insn_reservation "liw_op1" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "op1"))
+  "liw_op1_u");
+(define_insn_reservation "liw_op2" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "op2"))
+  "liw_op2_u");
+(define_insn_reservation "liw_both" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "both"))
+  "liw_op1_u + liw_op2_u");
+(define_insn_reservation "liw_either" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "either"))
+  "liw_op1_u | liw_op2_u");
+
+;; ----------------------------------------------------------------------
+;; Pipeline description.
+;; ----------------------------------------------------------------------
+
+;; The AM33 only has a single pipeline.  It has five stages (fetch,
+;; decode, execute, memory access, writeback) each of which normally
+;; takes a single CPU clock cycle.
+
+;; The timings attribute consists of two numbers, the first is the
+;; throughput, which is the number of cycles the instruction takes
+;; to execute and generate a result.  The second is the latency
+;; which is the effective number of cycles the instruction takes to
+;; execute if its result is used by the following instruction.  The
+;; latency is always greater than or equal to the throughput.
+;; These values were taken from the Appendix of the "MN103E Series
+;; Instruction Manual" and the timings for the AM34.
+
+;; Note - it would be nice to use strings rather than integers for
+;; the possible values of this attribute, so that we can have the
+;; gcc build mechanism check for values that are not supported by
+;; the reservations below.  But this will not work because the code
+;; in mn10300_adjust_sched_cost() needs integers not strings.
+
+(define_attr "timings" "" (const_int 11))
+
+(define_automaton "pipelining")
+(define_cpu_unit "throughput" "pipelining")
+
+(define_insn_reservation "throughput__1_latency__1"  1
+  (eq_attr "timings" "11") "throughput")
+(define_insn_reservation "throughput__1_latency__2"  2
+  (eq_attr "timings" "12") "throughput,nothing")
+(define_insn_reservation "throughput__1_latency__3"  3
+  (eq_attr "timings" "13") "throughput,nothing*2")
+(define_insn_reservation "throughput__1_latency__4"  4
+  (eq_attr "timings" "14") "throughput,nothing*3")
+(define_insn_reservation "throughput__2_latency__2"  2
+  (eq_attr "timings" "22") "throughput*2")
+(define_insn_reservation "throughput__2_latency__3"  3
+  (eq_attr "timings" "23") "throughput*2,nothing")
+(define_insn_reservation "throughput__2_latency__4"  4
+  (eq_attr "timings" "24") "throughput*2,nothing*2")
+(define_insn_reservation "throughput__2_latency__5"  5
+  (eq_attr "timings" "25") "throughput*2,nothing*3")
+(define_insn_reservation "throughput__3_latency__3"  3
+  (eq_attr "timings" "33") "throughput*3")
+(define_insn_reservation "throughput__3_latency__7"  7
+  (eq_attr "timings" "37") "throughput*3,nothing*4")
+(define_insn_reservation "throughput__4_latency__4"  4
+  (eq_attr "timings" "44") "throughput*4")
+(define_insn_reservation "throughput__4_latency__7"  7
+  (eq_attr "timings" "47") "throughput*4,nothing*3")
+(define_insn_reservation "throughput__4_latency__8"  8
+  (eq_attr "timings" "48") "throughput*4,nothing*4")
+(define_insn_reservation "throughput__5_latency__5"  5
+  (eq_attr "timings" "55") "throughput*5")
+(define_insn_reservation "throughput__6_latency__6"  6
+  (eq_attr "timings" "66") "throughput*6")
+(define_insn_reservation "throughput__7_latency__7"  7
+  (eq_attr "timings" "77") "throughput*7")
+(define_insn_reservation "throughput__7_latency__8"  8
+  (eq_attr "timings" "78") "throughput*7,nothing")
+(define_insn_reservation "throughput__8_latency__8"  8
+  (eq_attr "timings" "88") "throughput*8")
+(define_insn_reservation "throughput__9_latency__9"  9
+  (eq_attr "timings" "99") "throughput*9")
+(define_insn_reservation "throughput__8_latency_14" 14
+  (eq_attr "timings" "814") "throughput*8,nothing*6")
+(define_insn_reservation "throughput__9_latency_10" 10
+  (eq_attr "timings" "910") "throughput*9,nothing")
+(define_insn_reservation "throughput_10_latency_10" 10
+  (eq_attr "timings" "1010") "throughput*10")
+(define_insn_reservation "throughput_12_latency_16" 16
+  (eq_attr "timings" "1216") "throughput*12,nothing*4")
+(define_insn_reservation "throughput_13_latency_13" 13
+  (eq_attr "timings" "1313") "throughput*13")
+(define_insn_reservation "throughput_14_latency_14" 14
+  (eq_attr "timings" "1414") "throughput*14")
+(define_insn_reservation "throughput_13_latency_17" 17
+  (eq_attr "timings" "1317") "throughput*13,nothing*4")
+(define_insn_reservation "throughput_23_latency_27" 27
+  (eq_attr "timings" "2327") "throughput*23,nothing*4")
+(define_insn_reservation "throughput_25_latency_31" 31
+  (eq_attr "timings" "2531") "throughput*25,nothing*6")
+(define_insn_reservation "throughput_38_latency_39" 39
+  (eq_attr "timings" "3839") "throughput*38,nothing")
+(define_insn_reservation "throughput_39_latency_40" 40
+  (eq_attr "timings" "3940") "throughput*39,nothing")
+(define_insn_reservation "throughput_40_latency_40" 40
+  (eq_attr "timings" "4040") "throughput*40")
+(define_insn_reservation "throughput_41_latency_42" 42
+  (eq_attr "timings" "4142") "throughput*41,nothing")
+(define_insn_reservation "throughput_42_latency_43" 44
+  (eq_attr "timings" "4243") "throughput*42,nothing")
+(define_insn_reservation "throughput_43_latency_44" 44
+  (eq_attr "timings" "4344") "throughput*43,nothing")
+(define_insn_reservation "throughput_45_latency_46" 46
+  (eq_attr "timings" "4546") "throughput*45,nothing")
+(define_insn_reservation "throughput_47_latency_53" 53
+  (eq_attr "timings" "4753") "throughput*47,nothing*6")
+
+;; Note - the conflict between memory load/store instructions
+;; and floating point instructions described in section 1-7-4
+;; of Chapter 3 of the MN103E Series Instruction Manual is
+;; handled by the mn10300_adjust_sched_cost function.
+
+;; ----------------------------------------------------------------------
+;; MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; movqi
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+	(match_operand:QI 1 "general_operand"))]
+  ""
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand0, QImode)
+      && !register_operand (operand1, QImode))
+    operands[1] = force_reg (QImode, operand1);
+})
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=*r,D*r,D*r,D,m,*z,d")
+	(match_operand:QI 1 "general_operand"      "  0,D*r,  i,m,D,d,*z"))]
+  "(register_operand (operands[0], QImode)
+    || register_operand (operands[1], QImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1:
+    case 2:
+    case 5:
+    case 6:
+      return "mov %1,%0";
+    case 3:
+    case 4:
+      return "movbu %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr_alternative "timings"
+	 [(const_int 11)
+	  (const_int 11)
+	  (const_int 11)
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 13) (const_int 24))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 11) (const_int 22))
+	  (const_int 11)
+	  (const_int 11)
+	 ])]
+)
+
+;; movhi
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand")
+	(match_operand:HI 1 "general_operand"))]
+  ""
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, HImode)
+      && !register_operand (operand0, HImode))
+    operands[1] = force_reg (HImode, operand1);
+})
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=*r,D*r,D*r,D,m,*z,d")
+	(match_operand:HI 1 "general_operand"      "  0,  i,D*r,m,D,d,*z"))]
+  "(register_operand (operands[0], HImode)
+    || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1:
+      /* Note that "MOV imm8,An" is already zero-extending, and is 2 bytes.
+	 We have "MOV imm16,Dn" at 3 bytes.  The only win for the 4 byte
+	 movu is for an 8-bit unsigned move into Rn.  */
+      if (TARGET_AM33
+	  && CONST_INT_P (operands[1])
+	  && IN_RANGE (INTVAL (operands[1]), 0x80, 0xff)
+	  && REGNO_EXTENDED_P (REGNO (operands[0]), 1))
+	return "movu %1,%0";
+      /* FALLTHRU */
+    case 5:
+    case 6:
+    case 2:
+      return "mov %1,%0";
+    case 3:
+    case 4:
+      return "movhu %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr_alternative "timings"
+	 [(const_int 11)
+	  (const_int 11)
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 13) (const_int 24))
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 11) (const_int 22))
+	 ])]
+)
+
+;; movsi and helpers
+
+;; We use this to handle addition of two values when one operand is the
+;; stack pointer and the other is a memory reference of some kind.  Reload
+;; does not handle them correctly without this expander.
+(define_expand "reload_plus_sp_const"
+  [(set (match_operand:SI     0 "register_operand" "=r")
+	(match_operand:SI     1 "impossible_plus_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&A"))]
+  ""
+{
+  rtx dest, scratch, other;
+
+  dest = operands[0];
+  scratch = operands[2];
+
+  other = XEXP (operands[1], 1);
+  if (other == stack_pointer_rtx)
+    other = XEXP (operands[1], 0);
+
+  if (true_regnum (other) == true_regnum (dest))
+    {
+      gcc_assert (true_regnum (scratch) != true_regnum (dest));
+      emit_move_insn (scratch, stack_pointer_rtx);
+      emit_insn (gen_addsi3 (dest, dest, scratch));
+    }
+  else if (TARGET_AM33 || REGNO_REG_CLASS (true_regnum (dest)) == ADDRESS_REGS)
+    {
+      emit_move_insn (dest, stack_pointer_rtx);
+      if (other == stack_pointer_rtx)
+        emit_insn (gen_addsi3 (dest, dest, dest));
+      else if (other != const0_rtx)
+        emit_insn (gen_addsi3 (dest, dest, other));
+    }
+  else
+    {
+      emit_move_insn (scratch, stack_pointer_rtx);
+      if (other == stack_pointer_rtx)
+	{
+	  emit_move_insn (dest, scratch);
+          emit_insn (gen_addsi3 (dest, dest, dest));
+	}
+      else if (other != const0_rtx)
+	{
+	  emit_move_insn (dest, other);
+          emit_insn (gen_addsi3 (dest, dest, scratch));
+	}
+      else
+	emit_move_insn (dest, scratch);
+    }
+  DONE;
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "general_operand"))]
+  ""
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, SImode)
+      && !register_operand (operand0, SImode))
+    operands[1] = force_reg (SImode, operand1);
+  if (flag_pic)
+    {
+      rtx temp;
+      if (SYMBOLIC_CONST_P (operands[1]))
+	{
+	  if (MEM_P (operands[0]))
+	    operands[1] = force_reg (Pmode, operands[1]);
+	  else
+	    {
+	      temp = (!can_create_pseudo_p ()
+		      ? operands[0]
+		      : gen_reg_rtx (Pmode));
+	      operands[1] = mn10300_legitimize_pic_address (operands[1], temp);
+	    }
+	}
+      else if (GET_CODE (operands[1]) == CONST
+	       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
+	{
+	  temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+	  temp = mn10300_legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
+						 temp);
+	  operands[1] = expand_binop (SImode, add_optab, temp,
+				      XEXP (XEXP (operands[1], 0), 1),
+				      (!can_create_pseudo_p ()
+				       ? temp
+				       : gen_reg_rtx (Pmode)),
+				      0, OPTAB_LIB_WIDEN);
+	}
+    }
+})
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			  "=r,r,r,r,m,r, A,*y,*y,*z,*d")
+	(match_operand:SI 1 "general_operand"
+			  " 0,O,i,r,r,m,*y, A, i,*d,*z"))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1: /* imm-reg.  */
+    case 2: 
+      /* See movhi for a discussion of sizes for 8-bit movu.  Note that the
+	 24-bit movu is 6 bytes, which is the same size as the full 32-bit
+	 mov form for An and Dn.  So again movu is only a win for Rn.  */
+      if (TARGET_AM33
+	  && CONST_INT_P (operands[1])
+	  && REGNO_EXTENDED_P (REGNO (operands[0]), 1))
+	{
+	  HOST_WIDE_INT val = INTVAL (operands[1]);
+	  if (IN_RANGE (val, 0x80, 0xff)
+	      || IN_RANGE (val, 0x800000, 0xffffff))
+	    return "movu %1,%0";
+	}
+      /* FALLTHRU */
+    case 3:  /* reg-reg */
+    case 4:  /* reg-mem */
+    case 5:  /* mem-reg */
+    case 6:  /* sp-reg */
+    case 7:  /* reg-sp */
+    case 8:  /* imm-sp */
+    case 9:  /* reg-mdr */
+    case 10:  /* mdr-reg */
+      return "mov %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,*,*,*,*,*,*,*,am33,*,*")
+   (set_attr "liw" "*,either,*,either,*,*,*,*,*,*,*")
+   (set_attr "liw_op" "mov")
+   (set_attr_alternative "timings"
+	 [(const_int 11)
+	  (const_int 22)
+	  (const_int 22)
+	  (const_int 11)
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 13) (const_int 24))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 13) (const_int 24))
+	  (const_int 11)
+	  (const_int 11)
+	  (const_int 11)
+	 ])]
+)
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(match_operand:SF 1 "general_operand"))]
+  "TARGET_AM33_2"
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, SFmode)
+      && !register_operand (operand0, SFmode))
+    operands[1] = force_reg (SFmode, operand1);
+})
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rf,r,f,r,f,r,f,r,m,f,Q,z,d")
+	(match_operand:SF 1 "general_operand"	   "  0,F,F,r,f,f,r,m,r,Q,f,d,z"))]
+  "TARGET_AM33_2
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1:
+    case 3:
+    case 7:
+    case 8:
+    case 11:
+    case 12:
+      return "mov %1,%0";
+    case 2:
+    case 4:
+    case 5:
+    case 6:
+    case 9:
+    case 10:
+      return "fmov %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr_alternative "timings"
+		 [(const_int 11)
+		  (const_int 22)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 47) (const_int 25))
+		  (const_int 11)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 14))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 12))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 14))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (const_int 22)
+		  (const_int 22)
+		 ])]
+)
+
+;; If the flags register is not live, generate CLR instead of MOV 0.
+;; For MN103, this is only legal for DATA_REGS; for AM33 this is legal
+;; but not a win for ADDRESS_REGS.
+(define_peephole2
+  [(set (match_operand:INT 0 "register_operand" "") (const_int 0))]
+  "peep2_regno_dead_p (0, CC_REG)
+   && (REGNO_DATA_P (REGNO (operands[0]), 1)
+       || REGNO_EXTENDED_P (REGNO (operands[0]), 1))"
+  [(parallel [(set (match_dup 0) (const_int 0))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_insn "*mov<mode>_clr"
+  [(set (match_operand:INT 0 "register_operand" "=D")
+	(const_int 0))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "clr %0"
+)
+
+;; ----------------------------------------------------------------------
+;; ADD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,!*y,!r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,  0, r")
+		 (match_operand:SI 2 "nonmemory_operand"  "r,O,i,  i, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  { return mn10300_output_add (operands, false); }
+  [(set_attr "timings" "11,11,11,11,22")
+   (set_attr "liw" "either,either,*,*,*")
+   (set_attr "liw_op" "add")]
+)
+
+;; Note that ADD IMM,SP does not set the flags, so omit that here.
+(define_insn "*addsi3_flags"
+  [(set (match_operand:SI          0 "register_operand"  "=r,!r")
+  	(plus:SI (match_operand:SI 1 "register_operand"  "%0, r")
+		 (match_operand:SI 2 "nonmemory_operand" "ri, r")))
+   (set (reg CC_REG)
+   	(compare (plus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNCmode)"
+  { return mn10300_output_add (operands, true); }
+  [(set_attr "timings" "11,22")]
+)
+
+;; A helper to expand the above, with the CC_MODE filled in.
+(define_expand "addsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (plus:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "nonmemory_operand")))
+	      (set (reg:CCZNC CC_REG)
+		   (compare:CCZNC (plus:SI (match_dup 1) (match_dup 2))
+				  (const_int 0)))])]
+  ""
+)
+
+(define_insn "addc_internal"
+  [(set (match_operand:SI 0 "register_operand"            "=D,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_operand:SI 1 "register_operand"        "%0,0,r"))
+	  (match_operand:SI 2 "reg_or_am33_const_operand" " D,i,r")))
+    (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "@
+   addc %2,%0
+   addc %2,%0
+   addc %2,%1,%0"
+  [(set_attr "isa" "*,am33,am33")]
+)
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  if (!reg_or_am33_const_operand (op2h, SImode))
+    op2h = force_reg (SImode, op2h);
+
+  emit_insn (gen_adddi3_internal (op0l, op0h, op1l, op2l, op1h, op2h));
+  DONE;
+})
+
+;; Note that reload only supports one commutative operand.  Thus we cannot
+;; auto-swap both the high and low outputs with their matching constraints.
+;; For MN103, we're strapped for registers but thankfully the alternatives
+;; are few.  For AM33, it becomes much easier to not represent the early
+;; clobber and 6 permutations of immediate and three-operand adds, but
+;; instead allocate a scratch register and do the expansion by hand.
+
+(define_insn_and_split "adddi3_internal"
+  [(set (match_operand:SI          0 "register_operand"   "=r, r, r")
+	(plus:SI (match_operand:SI 2 "register_operand"   "%0, 0, r")
+		 (match_operand:SI 3 "nonmemory_operand"  "ri,ri,ri")))
+   (set (match_operand:SI          1 "register_operand"   "=D, D, r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (plus:SI (match_dup 2) (match_dup 3)) (match_dup 2))
+	    (match_operand:SI      4 "register_operand"   " 1, D, r"))
+	  (match_operand:SI 5 "reg_or_am33_const_operand" " D, 1,ri")))
+   (clobber (match_scratch:SI      6                      "=X, X,&r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op0l = operands[0];
+  rtx op0h = operands[1];
+  rtx op1l = operands[2];
+  rtx op2l = operands[3];
+  rtx op1h = operands[4];
+  rtx op2h = operands[5];
+  rtx scratch = operands[6];
+  rtx x;
+
+  if (reg_overlap_mentioned_p (op0l, op1h))
+    {
+      emit_move_insn (scratch, op0l);
+      op1h = scratch;
+      if (reg_overlap_mentioned_p (op0l, op2h))
+	op2h = scratch;
+    }
+  else if (reg_overlap_mentioned_p (op0l, op2h))
+    {
+      emit_move_insn (scratch, op0l);
+      op2h = scratch;
+    }
+
+  if (rtx_equal_p (op0l, op1l))
+    ;
+  else if (rtx_equal_p (op0l, op2l))
+    x = op1l, op1l = op2l, op2l = x;
+  else
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2l))
+	{
+	  emit_move_insn (op0l, op2l);
+	  op2l = op1l;
+	  op1l = op0l;
+	}
+    }
+  emit_insn (gen_addsi3_flags (op0l, op1l, op2l));
+
+  if (rtx_equal_p (op0h, op1h))
+    ;
+  else if (rtx_equal_p (op0h, op2h))
+    x = op1h, op1h = op2h, op2h = x;
+  else
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2h))
+	{
+	  emit_move_insn (op0h, op2h);
+	  op2h = op1h;
+	  op1h = op0h;
+	}
+    }
+  emit_insn (gen_addc_internal (op0h, op1h, op2h));
+  DONE;
+}
+  [(set_attr "isa" "*,*,am33")]
+)
+
+;; The following pattern is generated by combine when it proves that one
+;; of the inputs to the low-part of the double-word add is zero, and thus
+;; no carry is generated into the high-part.
+
+(define_insn_and_split "*adddi3_degenerate"
+  [(set (match_operand:SI          0 "register_operand"  "=&r,&r")
+	(match_operand:SI          2 "nonmemory_operand" "  0, 0"))
+   (set (match_operand:SI          1 "register_operand"  "=r , r")
+	(plus:SI (match_operand:SI 3 "register_operand"  "%1 , r")
+		 (match_operand:SI 4 "nonmemory_operand" "ri, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+{
+  rtx scratch = NULL_RTX;
+  if (!rtx_equal_p (operands[0], operands[2]))
+    {
+      gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
+      if (reg_overlap_mentioned_p (operands[0], operands[3])
+	  || reg_overlap_mentioned_p (operands[0], operands[4]))
+	{
+	  scratch = gen_reg_rtx (SImode);
+	  emit_move_insn (scratch, operands[2]);
+	}
+      else
+	emit_move_insn (operands[0], operands[2]);
+    }
+  emit_insn (gen_addsi3 (operands[1], operands[3], operands[4]));
+  if (scratch)
+    emit_move_insn (operands[0], scratch);
+  DONE;
+})
+
+;; ----------------------------------------------------------------------
+;; SUBTRACT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "subsi3"
+  [(set (match_operand:SI           0 "register_operand"  "=r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"   "0,0,0,r")
+		  (match_operand:SI 2 "nonmemory_operand"  "r,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   sub %2,%0
+   sub %2,%0
+   sub %2,%0
+   sub %2,%1,%0"
+  [(set_attr "isa" "*,*,*,am33")
+   (set_attr "liw" "either,either,*,*")
+   (set_attr "liw_op" "sub")
+   (set_attr "timings" "11,11,11,22")]
+)
+
+(define_insn "*subsi3_flags"
+  [(set (match_operand:SI           0 "register_operand"  "=r, r")
+	(minus:SI (match_operand:SI 1 "register_operand"   "0, r")
+		  (match_operand:SI 2 "nonmemory_operand"  "ri,r")))
+   (set (reg CC_REG)
+   	(compare (minus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNCmode)"
+  "@
+   sub %2,%0
+   sub %2,%1,%0"
+  [(set_attr "isa" "*,am33")
+   (set_attr "timings" "11,22")]
+)
+
+;; A helper to expand the above, with the CC_MODE filled in.
+(define_expand "subsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (minus:SI (match_operand:SI 1 "register_operand")
+			     (match_operand:SI 2 "nonmemory_operand")))
+	      (set (reg:CCZNC CC_REG)
+		   (compare:CCZNC (minus:SI (match_dup 1) (match_dup 2))
+				  (const_int 0)))])]
+  ""
+)
+
+(define_insn "subc_internal"
+  [(set (match_operand:SI 0 "register_operand"                      "=D,r,r")
+	(minus:SI
+	  (minus:SI (match_operand:SI 1 "register_operand"          " 0,0,r")
+		    (match_operand:SI 2 "reg_or_am33_const_operand" " D,i,r"))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "@
+   subc %2,%0
+   subc %2,%0
+   subc %2,%1,%0"
+  [(set_attr "isa" "*,am33,am33")]
+)
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  if (!reg_or_am33_const_operand (op2h, SImode))
+    op2h = force_reg (SImode, op2h);
+
+  emit_insn (gen_subdi3_internal (op0l, op0h, op1l, op1h, op2l, op2h));
+  DONE;
+})
+
+;; As with adddi3, the use of the scratch register helps reduce the 
+;; number of permutations for AM33.
+;; ??? The early clobber on op0 avoids a reload bug wherein both output
+;; registers are set the same.  Consider negate, where both op2 and op3
+;; are 0, are csed to the same input register, and reload fails to undo
+;; the cse when satisfying the matching constraints.
+
+(define_insn_and_split "subdi3_internal"
+  [(set (match_operand:SI     0 "register_operand"         "=&r, r")
+	(minus:SI
+	  (match_operand:SI   2 "register_operand"         "  0, r")
+	  (match_operand:SI   4 "nonmemory_operand"        " ri,ri")))
+   (set (match_operand:SI     1 "register_operand"         "=D , r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI 3 "register_operand"         "  1, r")
+	    (match_operand:SI 5 "reg_or_am33_const_operand" " D,ri"))
+	  (ltu:SI (match_dup 2) (match_dup 4))))
+   (clobber (match_scratch:SI 6                            "=X ,&r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op0l = operands[0];
+  rtx op0h = operands[1];
+  rtx op1l = operands[2];
+  rtx op1h = operands[3];
+  rtx op2l = operands[4];
+  rtx op2h = operands[5];
+  rtx scratch = operands[6];
+
+  if (reg_overlap_mentioned_p (op0l, op1h))
+    {
+      emit_move_insn (scratch, op0l);
+      op1h = scratch;
+      if (reg_overlap_mentioned_p (op0l, op2h))
+	op2h = scratch;
+    }
+  else if (reg_overlap_mentioned_p (op0l, op2h))
+    {
+      emit_move_insn (scratch, op0l);
+      op2h = scratch;
+    }
+
+  if (!rtx_equal_p (op0l, op1l))
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2l))
+	{
+	  emit_move_insn (op0l, op1l);
+	  op1l = op0l;
+	}
+    }
+  emit_insn (gen_subsi3_flags (op0l, op1l, op2l));
+
+  if (!rtx_equal_p (op0h, op1h))
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2h))
+	{
+	  emit_move_insn (op0h, op1h);
+	  op1h = op0h;
+	}
+    }
+  emit_insn (gen_subc_internal (op0h, op1h, op2h));
+  DONE;
+}
+  [(set_attr "isa" "*,am33")]
+)
+
+;; The following pattern is generated by combine when it proves that one
+;; of the inputs to the low-part of the double-word sub is zero, and thus
+;; no carry is generated into the high-part.
+
+(define_insn_and_split "*subdi3_degenerate"
+  [(set (match_operand:SI          0 "register_operand"   "=&r,&r")
+	(match_operand:SI          2 "nonmemory_operand"  "  0, 0"))
+   (set (match_operand:SI          1 "register_operand"   "=r , r")
+	(minus:SI (match_operand:SI 3 "register_operand"  "  1, r")
+		  (match_operand:SI 4 "nonmemory_operand" " ri, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+{
+  rtx scratch = NULL_RTX;
+  if (!rtx_equal_p (operands[0], operands[2]))
+    {
+      gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
+      if (reg_overlap_mentioned_p (operands[0], operands[3])
+	  || reg_overlap_mentioned_p (operands[0], operands[4]))
+	{
+	  scratch = gen_reg_rtx (SImode);
+	  emit_move_insn (scratch, operands[2]);
+	}
+      else
+	emit_move_insn (operands[0], operands[2]);
+    }
+  emit_insn (gen_subsi3 (operands[1], operands[3], operands[4]));
+  if (scratch)
+    emit_move_insn (operands[0], scratch);
+  DONE;
+})
+
+(define_insn_and_split "negsi2"
+  [(set (match_operand:SI         0 "register_operand"  "=D,&r")
+	(neg:SI (match_operand:SI 1 "register_operand"  " 0, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  /* Recall that twos-compliment is ones-compliment plus one.  When
+     allocated in DATA_REGS this is 2+1 bytes; otherwise (for am33)
+     this is 3+3 bytes.
+
+     For AM33, it would have been possible to load zero and use the
+     three-address subtract to have a total size of 3+4*N bytes for
+     multiple negations, plus increased throughput.  Not attempted here.  */
+     
+  if (true_regnum (operands[0]) == true_regnum (operands[1]))
+    {
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+      emit_insn (gen_addsi3 (operands[0], operands[0], const1_rtx));
+    }
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_subsi3 (operands[0], operands[0], operands[1]));
+    }
+  DONE;
+})
+
+;; ----------------------------------------------------------------------
+;; MULTIPLY INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; ??? Note that AM33 has a third multiply variant that puts the high part
+;; into the MDRQ register, however this variant also constrains the inputs
+;; to be in DATA_REGS and thus isn't as helpful as it might be considering
+;; the existence of the 4-operand multiply.  Nor is there a set of divide
+;; insns that use MDRQ.  Given that there is an IMM->MDRQ insn, this would
+;; have been very handy for starting udivmodsi4...
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+                 (sign_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+  ""
+{
+  emit_insn (gen_mulsidi3_internal (gen_lowpart (SImode, operands[0]),
+				    gen_highpart (SImode, operands[0]),
+				    operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mulsidi3_internal"
+  [(set (match_operand:SI          0 "register_operand" "=D,r")
+	(mult:SI (match_operand:SI 2 "register_operand" "%0,r")
+		 (match_operand:SI 3 "register_operand" " D,r")))
+   (set (match_operand:SI          1 "register_operand" "=z,r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI (sign_extend:DI (match_dup 2))
+		     (sign_extend:DI (match_dup 3)))
+	    (const_int 32))))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  if (which_alternative == 1)
+    return "mul %2,%3,%1,%0";
+  else if (TARGET_MULT_BUG)
+    return "nop\;nop\;mul %3,%0";
+  else
+    return "mul %3,%0";
+}
+  [(set_attr "isa" "*,am33")
+   (set (attr "timings")
+        (if_then_else (eq_attr "cpu" "am34") (const_int 24) (const_int 23)))]
+)
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+                 (zero_extend:DI (match_operand:SI 2 "register_operand" ""))))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  emit_insn (gen_umulsidi3_internal (gen_lowpart (SImode, operands[0]),
+				     gen_highpart (SImode, operands[0]),
+				     operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "umulsidi3_internal"
+  [(set (match_operand:SI          0 "register_operand" "=D,r")
+	(mult:SI (match_operand:SI 2 "register_operand" "%0,r")
+		 (match_operand:SI 3 "register_operand" " D,r")))
+   (set (match_operand:SI          1 "register_operand" "=z,r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  if (which_alternative == 1)
+    return "mulu %2,%3,%1,%0";
+  else if (TARGET_MULT_BUG)
+    return "nop\;nop\;mulu %3,%0";
+  else
+    return "mulu %3,%0";
+}
+  [(set_attr "isa" "*,am33")
+   (set (attr "timings")
+        (if_then_else (eq_attr "cpu" "am34") (const_int 24) (const_int 23)))]
+)
+
+(define_expand "mulsi3"
+  [(parallel [(set (match_operand:SI          0 "register_operand")
+		   (mult:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "reg_or_am33_const_operand")))
+	      (clobber (match_scratch:SI      3))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+)
+
+(define_insn "*mulsi3"
+  [(set (match_operand:SI          0 "register_operand"          "=D, r,r")
+	(mult:SI (match_operand:SI 2 "register_operand"          "%0, 0,r")
+		 (match_operand:SI 3 "reg_or_am33_const_operand" " D,ri,r")))
+   (clobber (match_scratch:SI      1                             "=z, z,r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  if (which_alternative == 2)
+    return "mul %2,%3,%1,%0";
+  else if (TARGET_MULT_BUG)
+    return "nop\;nop\;mul %3,%0";
+  else
+    return "mul %3,%0";
+}
+  [(set_attr "isa" "*,am33,am33")
+   (set (attr "timings")
+	(if_then_else (eq_attr "cpu" "am34") (const_int 24) (const_int 23)))]
+)
+
+(define_expand "udivmodsi4"
+  [(parallel [(set (match_operand:SI          0 "register_operand")
+		   (udiv:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "register_operand")))
+	      (set (match_operand:SI          3 "register_operand")
+		   (umod:SI (match_dup 1) (match_dup 2)))
+	      (use (const_int 0))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+)
+
+;; Note the trick to get reload to put the zero into the MDR register,
+;; rather than exposing the load early and letting CSE or someone try
+;; to share the zeros between division insns.  Which tends to result
+;; in sequences like 0->r0->d0->mdr.
+
+(define_insn "*udivmodsi4"
+  [(set (match_operand:SI          0 "register_operand" "=D")
+	(udiv:SI (match_operand:SI 2 "register_operand" " 0")
+		 (match_operand:SI 3 "register_operand" " D")))
+   (set (match_operand:SI          1 "register_operand" "=z")
+	(umod:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI          4 "nonmemory_operand" " 1"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "divu %3,%0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+  	      		 	       (const_int 3839) (const_int 4243)))]
+)
+
+(define_expand "divmodsi4"
+  [(parallel [(set (match_operand:SI          0 "register_operand" "")
+		   (div:SI (match_operand:SI  1 "register_operand" "")
+			   (match_operand:SI  2 "register_operand" "")))
+	      (set (match_operand:SI          3 "register_operand" "")
+		   (mod:SI (match_dup 1) (match_dup 2)))
+	      (use (match_dup 4))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+{
+  operands[4] = gen_reg_rtx (SImode);
+  emit_insn (gen_ext_internal (operands[4], operands[1]));
+})
+
+;; ??? Ideally we'd represent this via shift, but it seems like adding a
+;; special-case pattern for (ashiftrt x 31) is just as likely to result
+;; in poor register allocation choices.
+(define_insn "ext_internal"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "D")] UNSPEC_EXT))]
+  ""
+  "ext %1"
+)
+
+(define_insn "*divmodsi4"
+  [(set (match_operand:SI          0 "register_operand" "=D")
+	(div:SI (match_operand:SI  2 "register_operand" " 0")
+		(match_operand:SI  3 "register_operand" " D")))
+   (set (match_operand:SI          1 "register_operand" "=z")
+	(mod:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI          4 "register_operand" " 1"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "div %3,%0";
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+  	      		 	       (const_int 3839) (const_int 4243)))]
+)
+
+
+;; ----------------------------------------------------------------------
+;; AND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   and %2,%0
+   and %2,%0
+   and %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "liw" "*,op1,*")
+   (set_attr "liw_op" "and")
+   (set_attr "timings" "22,11,11")]
+)
+
+(define_insn "*andsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (set (reg CC_REG)
+   	(compare (and:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "@
+   and %2,%0
+   and %2,%0
+   and %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "timings" "22,11,11")]
+)
+
+;; Make sure we generate extensions instead of ANDs.
+
+(define_split
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (and:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 255)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
+  { operands[1] = gen_lowpart (QImode, operands[1]); }
+)
+
+(define_split
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (and:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 65535)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
+  { operands[1] = gen_lowpart (HImode, operands[1]); }
+)
+
+;; Split AND by an appropriate constant into two shifts.  Recall that 
+;; operations with a full 32-bit immediate require an extra cycle, so
+;; this is a size optimization with no speed penalty.  This only applies
+;; do DATA_REGS; the shift insns that AM33 adds are too large for a win.
+
+(define_split
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (and:SI (match_dup 0)
+			   (match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (reg:CC CC_REG))])]
+  "reload_completed
+   && REGNO_DATA_P (true_regnum (operands[0]), 1)
+   && mn10300_split_and_operand_count (operands[1]) != 0"
+  [(const_int 0)]
+{
+  int count = mn10300_split_and_operand_count (operands[1]);
+  if (count > 0)
+    {
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (count)));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (count)));
+    }
+  else
+    {
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (-count)));
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (-count)));
+    }
+  DONE;
+})
+
+;; ----------------------------------------------------------------------
+;; OR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(ior:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   or %2,%0
+   or %2,%0
+   or %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "liw" "*,op1,*")
+   (set_attr "liw_op" "or")
+   (set_attr "timings" "22,11,11")]
+)
+
+(define_insn "*iorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(ior:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (set (reg CC_REG)
+   	(compare (ior:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "@
+   or %2,%0
+   or %2,%0
+   or %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "timings" "22,11,11")]
+)
+
+;; ----------------------------------------------------------------------
+;; XOR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   xor %2,%0
+   xor %2,%0
+   xor %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "liw" "*,op1,*")
+   (set_attr "liw_op" "xor")
+   (set_attr "timings" "22,11,11")]
+)
+
+(define_insn "*xorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (set (reg CC_REG)
+   	(compare (xor:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "@
+   xor %2,%0
+   xor %2,%0
+   xor %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "timings" "22,11,11")]
+)
+
+;; ----------------------------------------------------------------------
+;; NOT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=D")
+	(not:SI (match_operand:SI 1 "register_operand" " 0")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "not %0"
+)
+
+(define_insn "*one_cmplsi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=D")
+	(not:SI (match_operand:SI 1 "register_operand" " 0")))
+   (set (reg CC_REG)
+   	(compare (not:SI (match_dup 1))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "not %0"
+)
+
+;; ----------------------------------------------------------------------
+;; COMPARE AND BRANCH INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; We expand the comparison into a single insn so that it will not be split
+;; up by reload.
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	      (match_operator                    0 "ordered_comparison_operator"
+			      [(match_operand:SI 1 "register_operand")
+			       (match_operand:SI 2 "nonmemory_operand")])
+              (label_ref (match_operand 3 ""))
+              (pc)))]
+  ""
+  ""
+)
+
+(define_insn_and_split "*cbranchsi4_cmp"
+  [(set (pc)
+	(if_then_else (match_operator           3 "ordered_comparison_operator"
+                       [(match_operand:SI       0 "register_operand"  "r")
+		        (match_operand:SI       1 "nonmemory_operand" "ri")])
+		      (match_operand            2 "label_ref_operand" "")
+		      (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  mn10300_split_cbranch (CCmode, operands[3], operands[2]);
+  DONE;
+})
+
+(define_insn "cmpsi"
+  [(set (reg CC_REG)
+	(compare (match_operand:SI 0 "register_operand"  "r,r,r")
+		 (match_operand:SI 1 "nonmemory_operand" "r,O,i")))]
+  "reload_completed"
+{
+  /* The operands of CMP must be distinct registers.  In the case where
+     we've failed to optimize the comparison of a register to itself, we
+     must use another method to set the Z flag.  We can achieve this 
+     effect with a BTST 0,D0.  This will not alter the contents of D0;
+     the use of d0 is arbitrary; any data register would work.  */
+  if (rtx_equal_p (operands[0], operands[1]))
+    return "btst 0,d0";
+  else
+    return "cmp %1,%0";
+}
+  [(set_attr_alternative "timings"
+     [(if_then_else (eq_attr "cpu" "am34") (const_int 11) (const_int 22))
+      (if_then_else (eq_attr "cpu" "am34") (const_int 11) (const_int 22))
+      (if_then_else (eq_attr "cpu" "am34") (const_int 11) (const_int 22))])
+   (set_attr "liw" "either,either,*")
+   (set_attr "liw_op" "cmp")]
+)
+
+(define_insn "*integer_conditional_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 2 "int_mode_flags" "")
+			 (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "reload_completed"
+  "b%b0 %1"
+)
+
+(define_insn_and_split "*cbranchsi4_btst"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "CCZN_comparison_operator"
+	    [(and:SI (match_operand:SI 0 "register_operand" "D")
+		     (match_operand:SI 1 "immediate_operand" "i"))
+	     (const_int 0)])
+	  (match_operand 2 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  mn10300_split_cbranch (CCZNmode, operands[3], operands[2]);
+  DONE;
+})
+
+(define_insn "*btstsi"
+  [(set (reg:CCZN CC_REG)
+	(compare:CCZN
+	  (and:SI (match_operand:SI 0 "register_operand" "D")
+		  (match_operand:SI 1 "immediate_operand" "i"))
+	  (const_int 0)))]
+  "reload_completed"
+  "btst %1,%0"
+)
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+      (if_then_else
+            (match_operator                    0 "ordered_comparison_operator"
+			    [(match_operand:SF 1 "register_operand")
+			     (match_operand:SF 2 "nonmemory_operand")])
+	    (label_ref (match_operand 3 ""))
+	    (pc)))]
+  "TARGET_AM33_2"
+  ""
+)
+
+(define_insn_and_split "*cbranchsf4_cmp"
+  [(set (pc)
+	(if_then_else (match_operator            3 "ordered_comparison_operator"
+			[(match_operand:SF       0 "register_operand"  "f")
+			 (match_operand:SF       1 "nonmemory_operand" "fF")])
+		      (match_operand             2 "label_ref_operand" "")
+		      (pc)))
+   ]
+  "TARGET_AM33_2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  mn10300_split_cbranch (CC_FLOATmode, operands[3], operands[2]);
+  DONE;
+})
+
+(define_insn "*am33_cmpsf"
+  [(set (reg:CC_FLOAT CC_REG)
+	(compare:CC_FLOAT (match_operand:SF 0 "register_operand"  "f")
+			  (match_operand:SF 1 "nonmemory_operand" "fF")))]
+  "TARGET_AM33_2 && reload_completed"
+  "fcmp %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 25)))]
+)
+
+(define_insn "*float_conditional_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+				      [(reg:CC_FLOAT CC_REG) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_AM33_2 && reload_completed"
+  "fb%b0 %1"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 44) (const_int 33)))]
+)
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmp %l0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 11) (const_int 44)))]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))]
+  ""
+  "jmp (%0)"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 11) (const_int 33)))]
+)
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+{
+  emit_insn (gen_load_pic ());
+  DONE;
+})
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "immediate_operand")
+   (match_operand:SI 2 "immediate_operand")
+   (match_operand 3 "" "") (match_operand 4 "")]
+  ""
+{
+  rtx table = gen_reg_rtx (SImode);
+  rtx index = gen_reg_rtx (SImode);
+  rtx addr = gen_reg_rtx (Pmode);
+  rtx test;
+
+  emit_move_insn (table, gen_rtx_LABEL_REF (VOIDmode, operands[3]));
+  emit_insn (gen_addsi3 (index, operands[0], GEN_INT (- INTVAL (operands[1]))));
+  test = gen_rtx_fmt_ee (GTU, VOIDmode, index, operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, index, operands[2], operands[4]));
+ 
+  emit_insn (gen_ashlsi3 (index, index, const2_rtx));
+  emit_move_insn (addr, gen_rtx_MEM (SImode,
+				     gen_rtx_PLUS (SImode, table, index)));
+  if (flag_pic)
+    emit_insn (gen_addsi3 (addr, addr, table));
+
+  emit_jump_insn (gen_tablejump (addr, operands[3]));
+  DONE;
+})
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp (%0)"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 11) (const_int 33)))]
+)
+
+;; Call subroutine with no return value.
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand")
+	 (match_operand:SI 1 "general_operand"))]
+  ""
+{
+  rtx fn = XEXP (operands[0], 0);
+
+  if (flag_pic && GET_CODE (fn) == SYMBOL_REF)
+    {
+      if (MN10300_GLOBAL_P (fn))
+	{
+	  /* The PLT code won't run on AM30, but then, there's no
+	     shared library support for AM30 either, so we just assume
+	     the linker is going to adjust all @PLT relocs to the
+	     actual symbols.  */
+	  emit_use (pic_offset_table_rtx);
+	  fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PLT);
+	}
+      else
+	fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PIC);
+    }
+  if (! call_address_operand (fn, VOIDmode))
+    fn = force_reg (SImode, fn);
+
+  XEXP (operands[0], 0) = fn;
+})
+
+(define_insn "*call_internal"
+  [(call (mem:QI (match_operand:SI 0 "call_address_operand" "a,S"))
+	 (match_operand:SI 1 "" ""))]
+  ""
+  "@
+   calls %C0
+   call %C0,[],0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 33) (const_int 44))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 55) (const_int 33))
+			 ])
+  ]
+)
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+
+(define_expand "call_value"
+  [(set (match_operand 0 "")
+	(call (match_operand:QI 1 "general_operand")
+	      (match_operand:SI 2 "general_operand")))]
+  ""
+{
+  rtx fn = XEXP (operands[1], 0);
+
+  if (flag_pic && GET_CODE (fn) == SYMBOL_REF)
+    {
+      if (MN10300_GLOBAL_P (fn))
+	{
+	  /* The PLT code won't run on AM30, but then, there's no
+	     shared library support for AM30 either, so we just assume
+	     the linker is going to adjust all @PLT relocs to the
+	     actual symbols.  */
+	  emit_use (pic_offset_table_rtx);
+	  fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PLT);
+	}
+      else
+	fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PIC);
+    }
+  if (! call_address_operand (fn, VOIDmode))
+    fn = force_reg (SImode, fn);
+
+  XEXP (operands[1], 0) = fn;
+})
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_address_operand" "a,S"))
+	      (match_operand:SI 2 "" "")))]
+  ""
+  "@
+   calls %C1
+   call %C1,[],0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 33) (const_int 44))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 55) (const_int 33))
+			 ])
+  ]
+)
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "")
+                    (const_int 0))
+              (match_operand 1 "")
+              (match_operand 2 "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+)
+
+;; ----------------------------------------------------------------------
+;; EXTEND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand"      "=D,D,r")
+	(zero_extend:SI
+	 (match_operand:QI 1 "nonimmediate_operand" " 0,m,r")))]
+  ""
+  "@
+   extbu %0
+   movbu %1,%0
+   extbu %1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr_alternative "timings"
+		 [(const_int 11)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (const_int 11)
+		 ])]
+)
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"      "=D,D,r")
+	(zero_extend:SI
+	 (match_operand:HI 1 "nonimmediate_operand" " 0,m,r")))]
+  ""
+  "@
+   exthu %0
+   movhu %1,%0
+   exthu %1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr_alternative "timings"
+		 [(const_int 11)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (const_int 11)])]
+)
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,r")
+	(sign_extend:SI
+	 (match_operand:QI 1 "register_operand" "0,r")))]
+  ""
+  "@
+   extb %0
+   extb %1,%0"
+  [(set_attr "isa" "*,am33")]
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,r")
+	(sign_extend:SI
+	 (match_operand:HI 1 "register_operand" "0,r")))]
+  ""
+  "@
+   exth %0
+   exth %1,%0"
+  [(set_attr "isa" "*,am33")]
+)
+
+;; ----------------------------------------------------------------------
+;; SHIFTS
+;; ----------------------------------------------------------------------
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI  0 "register_operand"   "=r,D,d,d,D,D,D,r")
+	(ashift:SI
+	  (match_operand:SI 1 "register_operand"  " 0,0,0,0,0,0,0,r")
+	  (match_operand:QI 2 "nonmemory_operand" " J,K,M,L,D,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   add %0,%0
+   asl2 %0
+   asl2 %0\;add %0,%0
+   asl2 %0\;asl2 %0
+   asl %S2,%0
+   asl %S2,%0
+   asl %S2,%0
+   asl %2,%1,%0"
+  [(set_attr "isa" "*,*,*,*,*,*,*,am33")
+   (set_attr "liw" "op2,op2,op2,op2,op2,op2,*,*")
+   (set_attr "liw_op" "asl")
+   (set_attr "timings" "11,11,22,22,11,11,11,11")]
+)
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI  0 "register_operand"  "=D,D,D,r")
+	(lshiftrt:SI
+	  (match_operand:SI 1 "register_operand"  "0,0,0,r")
+	  (match_operand:QI 2 "nonmemory_operand" "D,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   lsr %S2,%0
+   lsr %S2,%0
+   lsr %S2,%0
+   lsr %2,%1,%0"
+  [(set_attr "isa" "*,*,*,am33")
+   (set_attr "liw" "op2,op2,*,*")
+   (set_attr "liw_op" "lsr")]
+)
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI  0 "register_operand"  "=D,D,D,r")
+	(ashiftrt:SI
+	  (match_operand:SI 1 "register_operand"  "0,0,0,r")
+	  (match_operand:QI 2 "nonmemory_operand" "D,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   asr %S2,%0
+   asr %S2,%0
+   asr %S2,%0
+   asr %2,%1,%0"
+  [(set_attr "isa" "*,*,*,am33")
+   (set_attr "liw" "op2,op2,*,*")
+   (set_attr "liw_op" "asr")]
+)
+
+;; ----------------------------------------------------------------------
+;; MISCELLANEOUS
+;; ----------------------------------------------------------------------
+
+;; Note the use of the (const_int 0) when generating the insn that matches
+;; the bsch pattern.  This ensures that the destination register is
+;; initialised with 0 which will make the BSCH instruction set searching
+;; at bit 31.
+;;
+;; The XOR in the instruction sequence below is there because the BSCH
+;; instruction returns the bit number of the highest set bit and we want
+;; the number of zero bits above that bit.  The AM33 does not have a
+;; reverse subtraction instruction, but we can use a simple xor instead
+;; since we know that the top 27 bits are clear.
+(define_expand "clzsi2"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (unspec:SI [(match_operand:SI 1 "register_operand")
+			       (const_int 0)] UNSPEC_BSCH))
+	      (clobber (reg:CC CC_REG))])
+   (parallel [(set (match_dup 0)
+		   (xor:SI (match_dup 0)
+			   (const_int 31)))
+	      (clobber (reg:CC CC_REG))])]
+  "TARGET_AM33"
+)
+
+(define_insn "*bsch"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "nonmemory_operand" "0")]
+		   UNSPEC_BSCH))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_AM33"
+  "bsch %1, %0"
+)
+
+;; ----------------------------------------------------------------------
+;; FP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "abssf2"
+  [(set (match_operand:SF         0 "register_operand" "=f,f")
+	(abs:SF (match_operand:SF 1 "register_operand" "0,?f")))]
+  "TARGET_AM33_2"
+  "@
+   fabs %0
+   fabs %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 14)))]
+)
+
+(define_insn "negsf2"
+  [(set (match_operand:SF         0 "register_operand" "=f,f")
+	(neg:SF (match_operand:SF 1 "register_operand" "0,?f")))]
+  "TARGET_AM33_2"
+  "@
+   fneg %0
+   fneg %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 14)))]
+)
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "")))]
+  "TARGET_AM33_2 && flag_unsafe_math_optimizations"
+{
+  rtx scratch = gen_reg_rtx (SFmode);
+  emit_insn (gen_rsqrtsf2 (scratch, operands[1], CONST1_RTX (SFmode)));
+  emit_insn (gen_divsf3 (operands[0], force_reg (SFmode, CONST1_RTX (SFmode)),
+			 scratch));
+  DONE;
+})
+
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF                  0 "register_operand" "=f,f")
+	(div:SF (match_operand:SF          2 "const_1f_operand" "F,F")
+		(sqrt:SF (match_operand:SF 1 "register_operand" "0,?f"))))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   frsqrt %0
+   frsqrt %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 4753) (const_int 2327)))]
+)
+
+(define_insn "addsf3"
+  [(set (match_operand:SF          0 "register_operand" "=f,f")
+	(plus:SF (match_operand:SF 1 "register_operand" "%0,f")
+		 (match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   fadd %2, %0
+   fadd %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 14))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 25))
+			 ])]
+)
+
+(define_insn "subsf3"
+  [(set (match_operand:SF           0 "register_operand" "=f,f")
+	(minus:SF (match_operand:SF 1 "register_operand" "0,f")
+		  (match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   fsub %2, %0
+   fsub %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 14))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 25))
+			 ])]
+)
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF          0 "register_operand" "=f,f")
+	(mult:SF (match_operand:SF 1 "register_operand" "%0,f")
+		 (match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+  (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "@
+   fmul %2, %0
+   fmul %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 14))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 25))
+			 ])]
+)
+
+(define_insn "divsf3"
+  [(set (match_operand:SF         0 "register_operand" "=f,f")
+	(div:SF (match_operand:SF 1 "register_operand"  "0,f")
+		(match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   fdiv %2, %0
+   fdiv %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 2531) (const_int 1216))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 2531) (const_int 1317))
+			 ])]
+)
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF         0 "register_operand" "=c")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fmadd %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=c")
+	(fma:SF (match_operand:SF         1 "register_operand" "f")
+		(match_operand:SF         2 "register_operand" "f")
+		(neg:SF (match_operand:SF 3 "register_operand" "f"))))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fmsub %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+(define_insn "fnmasf4"
+  [(set (match_operand:SF                 0 "register_operand" "=c")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF         2 "register_operand" "f")
+		(match_operand:SF         3 "register_operand" "f")))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fnmadd %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+(define_insn "fnmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=c")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF         2 "register_operand" "f")
+		(neg:SF (match_operand:SF 3 "register_operand" "f"))))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fnmsub %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+;; ----------------------------------------------------------------------
+;; PROLOGUE/EPILOGUE
+;; ----------------------------------------------------------------------
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  { mn10300_expand_prologue (); DONE; }
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  { mn10300_expand_epilogue (); DONE; }
+)
+
+(define_insn "return"
+  [(return)]
+  "mn10300_can_use_rets_insn ()"
+{
+  /* The RETF insn is 4 cycles faster than RETS, though 1 byte larger.  */
+  if (optimize_insn_for_speed_p () && mn10300_can_use_retf_insn ())
+    return "retf [],0";
+  else
+    return "rets";
+})
+
+(define_insn "return_ret"
+  [(return)
+   (use (match_operand:SI 0 "const_int_operand" ""))]
+  ""
+{
+  /* The RETF insn is up to 3 cycles faster than RET.  */
+  fputs ((mn10300_can_use_retf_insn () ? "\tretf " : "\tret "), asm_out_file);
+  mn10300_print_reg_list (asm_out_file, mn10300_get_live_callee_saved_regs (NULL));
+  fprintf (asm_out_file, ",%d\n", (int) INTVAL (operands[0]));
+  return "";
+})
+
+;; This instruction matches one generated by mn10300_gen_multiple_store()
+(define_insn "store_movm"
+  [(match_parallel 0 "mn10300_store_multiple_operation"
+    [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand 1 "" "")))])]
+  ""
+{
+  fputs ("\tmovm ", asm_out_file);
+  mn10300_print_reg_list (asm_out_file,
+                          mn10300_store_multiple_regs (operands[0]));
+  fprintf (asm_out_file, ",(sp)\n");
+  return "";
+}
+  ;; Assume that no more than 8 registers will be pushed.
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 99) (const_int 88)))]
+)
+
+(define_expand "load_pic"
+  [(const_int 0)]
+  "flag_pic"
+{
+  if (TARGET_AM33)
+    emit_insn (gen_am33_load_pic (pic_offset_table_rtx));
+  else if (mn10300_frame_size () == 0)
+    emit_insn (gen_mn10300_load_pic0 (pic_offset_table_rtx));
+  else
+    emit_insn (gen_mn10300_load_pic1 (pic_offset_table_rtx));
+  DONE;
+})
+
+(define_insn "am33_load_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_AM33"
+{
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+  return ".LPIC%=:\;mov pc,%0\;add %1-(.LPIC%=-.),%0";
+}
+  [(set_attr "timings" "33")]
+)
+
+;; Load pic register with push/pop of stack.
+(define_insn "mn10300_load_pic0"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))
+   (clobber (reg:SI MDR_REG))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+  return ("add -4,sp\;"
+	  "calls .LPIC%=\n"
+          ".LPIC%=:\;"
+	  "movm (sp),[%0]\;"
+	  "add %1-(.LPIC%=-.),%0");
+}
+  [(set_attr "timings" "88")]
+)
+
+;; Load pic register re-using existing stack space.
+(define_insn "mn10300_load_pic1"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))
+   (clobber (mem:SI (reg:SI SP_REG)))
+   (clobber (reg:SI MDR_REG))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+  return ("calls .LPIC%=\n"
+	  ".LPIC%=:\;"
+          "mov (sp),%0\;"
+          "add %1-(.LPIC%=-.),%0");
+}
+  [(set_attr "timings" "66")]
+)
+
+;; The mode on operand 3 has been deliberately omitted because it
+;; can be either SI (for arithmetic operations) or QI (for shifts).
+(define_insn "liw"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_dup 0)
+                    (match_operand    2 "liw_operand"       "rO")
+                    (match_operand:SI 4 "const_int_operand" "")]
+                   UNSPEC_LIW))
+   (set (match_operand:SI             1 "register_operand" "=r")
+        (unspec:SI [(match_dup 1)
+                    (match_operand    3 "liw_operand"       "rO")
+                    (match_operand:SI 5 "const_int_operand" "")]
+                   UNSPEC_LIW))]
+  "TARGET_ALLOW_LIW"
+  "%W4_%W5 %2, %0, %3, %1"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 13) (const_int 12)))]
+)
+
+;; The mode on operand 1 has been deliberately omitted because it
+;; can be either SI (for arithmetic operations) or QI (for shifts).
+(define_insn "cmp_liw"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 2 "register_operand" "r")
+		    (match_operand    3 "liw_operand"      "rO")))
+   (set (match_operand:SI             0 "register_operand" "=r")
+        (unspec:SI [(match_dup 0)
+                    (match_operand    1 "liw_operand"       "rO")
+                    (match_operand:SI 4 "const_int_operand" "")]
+                   UNSPEC_LIW))]
+  "TARGET_ALLOW_LIW"
+  "cmp_%W4 %3, %2, %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 13) (const_int 12)))]
+)
+
+(define_insn "liw_cmp"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+        (unspec:SI [(match_dup 0)
+                    (match_operand    1 "liw_operand"      "rO")
+                    (match_operand:SI 4 "const_int_operand" "")]
+                   UNSPEC_LIW))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 2 "register_operand" "r")
+		    (match_operand    3 "liw_operand"      "rO")))]
+  "TARGET_ALLOW_LIW"
+  "%W4_cmp %1, %0, %3, %2"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 13) (const_int 12)))]
+)
+
+;; Note - in theory the doloop patterns could be used here to express
+;; the SETLB and Lcc instructions.  In practice this does not work because
+;; the acceptable forms of the doloop patterns do not include UNSPECs
+;; and without them gcc's basic block reordering code can duplicate the
+;; doloop_end pattern, leading to bogus multiple decrements of the loop
+;; counter. 
+
+(define_insn "setlb"
+  [(unspec [(const_int 0)] UNSPEC_SETLB)]
+  "TARGET_AM33 && TARGET_ALLOW_SETLB"
+  "setlb"
+)
+
+(define_insn "Lcc"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		      [(reg:CC CC_REG) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (unspec [(const_int 1)] UNSPEC_SETLB)]
+  "TARGET_AM33 && TARGET_ALLOW_SETLB"
+  "L%b0 # loop back to: %1"
+)
+
+(define_insn "FLcc"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		      [(reg:CC_FLOAT CC_REG) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (unspec [(const_int 2)] UNSPEC_SETLB)]
+  "TARGET_AM33_2 && TARGET_ALLOW_SETLB"
+  "FL%b0 # loop back to: %1"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34") (const_int 44) (const_int 11)))]
+)
diff --git a/gcc-4.9/gcc/config/mn10300/mn10300.opt b/gcc-4.9/gcc/config/mn10300/mn10300.opt
new file mode 100644
index 000000000..5cc8af21e
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/mn10300.opt
@@ -0,0 +1,67 @@
+; Options for the Matsushita MN10300 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/mn10300/mn10300-opts.h
+
+; The selected processor.
+Variable
+enum processor_type mn10300_processor = PROCESSOR_DEFAULT
+
+mam33
+Target
+Target the AM33 processor
+
+mam33-2
+Target
+Target the AM33/2.0 processor
+
+mam34
+Target Report
+Target the AM34 processor
+
+mtune=
+Target RejectNegative Joined Var(mn10300_tune_string)
+Tune code for the given processor
+
+mmult-bug
+Target Report Mask(MULT_BUG)
+Work around hardware multiply bug
+
+; Ignored by the compiler
+mno-crt0
+Target RejectNegative
+
+; Ignored by the compiler
+mrelax
+Target RejectNegative
+Enable linker relaxations
+
+mreturn-pointer-on-d0
+Target Report Mask(PTR_A0D0)
+Return pointers in both a0 and d0
+
+mliw
+Target Report Mask(ALLOW_LIW)
+Allow gcc to generate LIW instructions
+
+msetlb
+Target Report Mask(ALLOW_SETLB)
+Allow gcc to generate the SETLB and Lcc instructions
diff --git a/gcc-4.9/gcc/config/mn10300/predicates.md b/gcc-4.9/gcc/config/mn10300/predicates.md
new file mode 100644
index 000000000..e56df5c4a
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/predicates.md
@@ -0,0 +1,73 @@
+;; Predicate definitions for Matsushita MN10300.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if the operand is the 1.0f constant.
+
+(define_predicate "const_1f_operand"
+  (match_code "const_int,const_double")
+{
+  return (op == CONST1_RTX (SFmode));
+})
+
+;; Return true if OP is a valid call operand.
+
+(define_predicate "call_address_operand"
+  (match_code "symbol_ref,reg,unspec")
+{
+  if (flag_pic)
+    return (satisfies_constraint_S (op) || GET_CODE (op) == REG);
+
+  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG);
+})
+
+(define_predicate "impossible_plus_operand"
+  (match_code "plus")
+{
+  return XEXP (op, 0) == stack_pointer_rtx
+      || XEXP (op, 1) == stack_pointer_rtx;
+})
+
+(define_predicate "reg_or_am33_const_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "TARGET_AM33")
+	    (match_operand 0 "immediate_operand"))))
+
+(define_predicate "label_ref_operand"
+  (match_code "label_ref"))
+
+(define_special_predicate "int_mode_flags"
+  (match_code "reg")
+{
+  if (REGNO (op) != CC_REG)
+    return false;
+  if (GET_MODE (op) == CC_FLOATmode)
+    return false;
+  return GET_MODE_CLASS (GET_MODE (op)) == MODE_CC;
+})
+
+(define_predicate "CCZN_comparison_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "liw_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "satisfies_constraint_O (op)")))
+
+(define_predicate "mn10300_store_multiple_operation"
+  (and (match_code "parallel")
+       (match_test "mn10300_store_multiple_regs (op) != 0")))
diff --git a/gcc-4.9/gcc/config/mn10300/t-mn10300 b/gcc-4.9/gcc/config/mn10300/t-mn10300
new file mode 100644
index 000000000..9c224faef
--- /dev/null
+++ b/gcc-4.9/gcc/config/mn10300/t-mn10300
@@ -0,0 +1,20 @@
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mam33/mam33-2/mam34
+MULTILIB_DIRNAMES = am33 am33-2 am34
diff --git a/gcc-4.9/gcc/config/moxie/constraints.md b/gcc-4.9/gcc/config/moxie/constraints.md
new file mode 100644
index 000000000..dcca9d6e1
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/constraints.md
@@ -0,0 +1,56 @@
+;; Constraint definitions for Moxie
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Anthony Green <green@moxielogic.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Constraints
+;; -------------------------------------------------------------------------
+
+(define_constraint "A"
+  "An absolute address."
+  (and (match_code "mem")
+       (ior (match_test "GET_CODE (XEXP (op, 0)) == SYMBOL_REF")
+	    (match_test "GET_CODE (XEXP (op, 0)) == LABEL_REF")
+	    (match_test "GET_CODE (XEXP (op, 0)) == CONST"))))
+
+(define_constraint "B"
+  "An offset address."
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")))
+
+(define_constraint "W"
+  "A register indirect memory operand."
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    && REGNO_OK_FOR_BASE_P (REGNO (XEXP (op, 0)))")))
+
+(define_constraint "O"
+  "The constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "I"
+  "An 8-bit constant (0..255)"
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 255")))
+
+(define_constraint "N"
+  "A constant -(0..255)"
+  (and (match_code "const_int")
+       (match_test "ival >= -255 && ival <= 0")))
diff --git a/gcc-4.9/gcc/config/moxie/moxie-protos.h b/gcc-4.9/gcc/config/moxie/moxie-protos.h
new file mode 100644
index 000000000..2b2a69700
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/moxie-protos.h
@@ -0,0 +1,24 @@
+/* Prototypes for moxie.c functions used in the md file & elsewhere.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void  moxie_expand_prologue (void);
+extern void  moxie_expand_epilogue (void);
+extern int   moxie_initial_elimination_offset (int, int);
+extern void  moxie_print_operand (FILE *, rtx, int);
+extern void  moxie_print_operand_address (FILE *, rtx);
diff --git a/gcc-4.9/gcc/config/moxie/moxie.c b/gcc-4.9/gcc/config/moxie/moxie.c
new file mode 100644
index 000000000..b646a432b
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/moxie.c
@@ -0,0 +1,614 @@
+/* Target Code for moxie
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Anthony Green.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "df.h"
+
+#define LOSE_AND_RETURN(msgid, x)		\
+  do						\
+    {						\
+      moxie_operand_lossage (msgid, x);		\
+      return;					\
+    } while (0)
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+moxie_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > 2 * UNITS_PER_WORD);
+}
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its
+   FUNCTION_DECL; otherwise, FUNC is 0.  
+
+   We always return values in register $r0 for moxie.  */
+
+static rtx
+moxie_function_value (const_tree valtype, 
+		      const_tree fntype_or_decl ATTRIBUTE_UNUSED,
+		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), MOXIE_R0);
+}
+
+/* Define how to find the value returned by a library function.
+
+   We always return values in register $r0 for moxie.  */
+
+static rtx
+moxie_libcall_value (enum machine_mode mode,
+                     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, MOXIE_R0);
+}
+
+/* Handle TARGET_FUNCTION_VALUE_REGNO_P.
+
+   We always return values in register $r0 for moxie.  */
+
+static bool
+moxie_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == MOXIE_R0);
+}
+
+/* Emit an error message when we're in an asm, and a fatal error for
+   "normal" insns.  Formatted output isn't easily implemented, since we
+   use output_operand_lossage to output the actual message and handle the
+   categorization of the error.  */
+
+static void
+moxie_operand_lossage (const char *msgid, rtx op)
+{
+  debug_rtx (op);
+  output_operand_lossage ("%s", msgid);
+}
+
+/* The PRINT_OPERAND_ADDRESS worker.  */
+
+void
+moxie_print_operand_address (FILE *file, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fprintf (file, "(%s)", reg_names[REGNO (x)]);
+      break;
+      
+    case PLUS:
+      switch (GET_CODE (XEXP (x, 1)))
+	{
+	case CONST_INT:
+	  fprintf (file, "%ld(%s)", 
+		   INTVAL(XEXP (x, 1)), reg_names[REGNO (XEXP (x, 0))]);
+	  break;
+	case SYMBOL_REF:
+	  output_addr_const (file, XEXP (x, 1));
+	  fprintf (file, "(%s)", reg_names[REGNO (XEXP (x, 0))]);
+	  break;
+	case CONST:
+	  {
+	    rtx plus = XEXP (XEXP (x, 1), 0);
+	    if (GET_CODE (XEXP (plus, 0)) == SYMBOL_REF 
+		&& CONST_INT_P (XEXP (plus, 1)))
+	      {
+		output_addr_const(file, XEXP (plus, 0));
+		fprintf (file,"+%ld(%s)", INTVAL (XEXP (plus, 1)),
+			 reg_names[REGNO (XEXP (x, 0))]);
+	      }
+	    else
+	      abort();
+	  }
+	  break;
+	default:
+	  abort();
+	}
+      break;
+
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+/* The PRINT_OPERAND worker.  */
+
+void
+moxie_print_operand (FILE *file, rtx x, int code)
+{
+  rtx operand = x;
+
+  /* New code entries should just be added to the switch below.  If
+     handling is finished, just return.  If handling was just a
+     modification of the operand, the modified operand should be put in
+     "operand", and then do a break to let default handling
+     (zero-modifier) output the operand.  */
+
+  switch (code)
+    {
+    case 0:
+      /* No code, print as usual.  */
+      break;
+
+    default:
+      LOSE_AND_RETURN ("invalid operand modifier letter", x);
+    }
+
+  /* Print an operand as without a modifier letter.  */
+  switch (GET_CODE (operand))
+    {
+    case REG:
+      if (REGNO (operand) > MOXIE_R13)
+	internal_error ("internal error: bad register: %d", REGNO (operand));
+      fprintf (file, "%s", reg_names[REGNO (operand)]);
+      return;
+
+    case MEM:
+      output_address (XEXP (operand, 0));
+      return;
+
+    default:
+      /* No need to handle all strange variants, let output_addr_const
+	 do it for us.  */
+      if (CONSTANT_P (operand))
+	{
+	  output_addr_const (file, operand);
+	  return;
+	}
+
+      LOSE_AND_RETURN ("unexpected operand", x);
+    }
+}
+
+/* Per-function machine data.  */
+struct GTY(()) machine_function
+ {
+   /* Number of bytes saved on the stack for callee saved registers.  */
+   int callee_saved_reg_size;
+
+   /* Number of bytes saved on the stack for local variables.  */
+   int local_vars_size;
+
+   /* The sum of 2 sizes: locals vars and padding byte for saving the
+    * registers.  Used in expand_prologue () and expand_epilogue().  */
+   int size_for_adjusting_sp;
+ };
+
+/* Zero initialization is OK for all current fields.  */
+
+static struct machine_function *
+moxie_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* The TARGET_OPTION_OVERRIDE worker.
+   All this curently does is set init_machine_status.  */
+static void
+moxie_option_override (void)
+{
+  /* Set the per-function-data initializer.  */
+  init_machine_status = moxie_init_machine_status;
+}
+
+/* Compute the size of the local area and the size to be adjusted by the
+ * prologue and epilogue.  */
+
+static void
+moxie_compute_frame (void)
+{
+  /* For aligning the local variables.  */
+  int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  int padding_locals;
+  int regno;
+
+  /* Padding needed for each element of the frame.  */
+  cfun->machine->local_vars_size = get_frame_size ();
+
+  /* Align to the stack alignment.  */
+  padding_locals = cfun->machine->local_vars_size % stack_alignment;
+  if (padding_locals)
+    padding_locals = stack_alignment - padding_locals;
+
+  cfun->machine->local_vars_size += padding_locals;
+
+  cfun->machine->callee_saved_reg_size = 0;
+
+  /* Save callee-saved registers.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (df_regs_ever_live_p (regno) && (! call_used_regs[regno]))
+      cfun->machine->callee_saved_reg_size += 4;
+
+  cfun->machine->size_for_adjusting_sp = 
+    crtl->args.pretend_args_size
+    + cfun->machine->local_vars_size 
+    + (ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0);
+}
+
+void
+moxie_expand_prologue (void)
+{
+  int regno;
+  rtx insn;
+
+  moxie_compute_frame ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = cfun->machine->size_for_adjusting_sp;
+
+  /* Save callee-saved registers.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      if (!fixed_regs[regno] && df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (cfun->machine->size_for_adjusting_sp > 0)
+    {
+      int i = cfun->machine->size_for_adjusting_sp; 
+      while ((i >= 255) && (i <= 510))
+	{
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
+					stack_pointer_rtx, 
+					GEN_INT (255)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  i -= 255;
+	}
+      if (i <= 255)
+	{
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
+					stack_pointer_rtx, 
+					GEN_INT (i)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (SImode, MOXIE_R12);
+	  insn = emit_move_insn (reg, GEN_INT (i));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
+					stack_pointer_rtx, 
+					reg));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+void
+moxie_expand_epilogue (void)
+{
+  int regno;
+  rtx reg;
+
+  if (cfun->machine->callee_saved_reg_size != 0)
+    {
+      reg = gen_rtx_REG (Pmode, MOXIE_R12);
+      if (cfun->machine->callee_saved_reg_size <= 255)
+	{
+	  emit_move_insn (reg, hard_frame_pointer_rtx);
+	  emit_insn (gen_subsi3 
+		     (reg, reg, 
+		      GEN_INT (cfun->machine->callee_saved_reg_size)));
+	}
+      else
+	{
+	  emit_move_insn (reg,
+			  GEN_INT (-cfun->machine->callee_saved_reg_size));
+	  emit_insn (gen_addsi3 (reg, reg, hard_frame_pointer_rtx));
+	}
+      for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
+	if (!fixed_regs[regno] && !call_used_regs[regno]
+	    && df_regs_ever_live_p (regno))
+	  {
+	    rtx preg = gen_rtx_REG (Pmode, regno);
+	    emit_insn (gen_movsi_pop (reg, preg));
+	  }
+    }
+
+  emit_jump_insn (gen_returner ());
+}
+
+/* Implements the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
+
+int
+moxie_initial_elimination_offset (int from, int to)
+{
+  int ret;
+  
+  if ((from) == FRAME_POINTER_REGNUM && (to) == HARD_FRAME_POINTER_REGNUM)
+    {
+      /* Compute this since we need to use cfun->machine->local_vars_size.  */
+      moxie_compute_frame ();
+      ret = -cfun->machine->callee_saved_reg_size;
+    }
+  else if ((from) == ARG_POINTER_REGNUM && (to) == HARD_FRAME_POINTER_REGNUM)
+    ret = 0x00;
+  else
+    abort ();
+
+  return ret;
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+moxie_setup_incoming_varargs (cumulative_args_t cum_v,
+			      enum machine_mode mode ATTRIBUTE_UNUSED,
+			      tree type ATTRIBUTE_UNUSED,
+			      int *pretend_size, int no_rtl)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int regno;
+  int regs = 8 - *cum;
+  
+  *pretend_size = regs < 0 ? 0 : GET_MODE_SIZE (SImode) * regs;
+  
+  if (no_rtl)
+    return;
+  
+  for (regno = *cum; regno < 8; regno++)
+    {
+      rtx reg = gen_rtx_REG (SImode, regno);
+      rtx slot = gen_rtx_PLUS (Pmode,
+			       gen_rtx_REG (SImode, ARG_POINTER_REGNUM),
+			       GEN_INT (UNITS_PER_WORD * (3 + (regno-2))));
+      
+      emit_move_insn (gen_rtx_MEM (SImode, slot), reg);
+    }
+}
+
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+moxie_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CC_REG;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+/* Return the next register to be used to hold a function argument or
+   NULL_RTX if there's no more space.  */
+
+static rtx
+moxie_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		    const_tree type ATTRIBUTE_UNUSED,
+		    bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (*cum < 8)
+    return gen_rtx_REG (mode, *cum);
+  else 
+    return NULL_RTX;
+}
+
+#define MOXIE_FUNCTION_ARG_SIZE(MODE, TYPE)	\
+  ((MODE) != BLKmode ? GET_MODE_SIZE (MODE)	\
+   : (unsigned) int_size_in_bytes (TYPE))
+
+static void
+moxie_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum = (*cum < MOXIE_R6
+	  ? *cum + ((3 + MOXIE_FUNCTION_ARG_SIZE (mode, type)) / 4)
+	  : *cum);
+}
+
+/* Return non-zero if the function argument described by TYPE is to be
+   passed by reference.  */
+
+static bool
+moxie_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			 enum machine_mode mode, const_tree type,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+      size = int_size_in_bytes (type);
+    }
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return size > 4*6;
+}
+
+/* Some function arguments will only partially fit in the registers
+   that hold arguments.  Given a new arg, return the number of bytes
+   that fit in argument passing registers.  */
+
+static int
+moxie_arg_partial_bytes (cumulative_args_t cum_v,
+			 enum machine_mode mode,
+			 tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int bytes_left, size;
+
+  if (*cum >= 8)
+    return 0;
+
+  if (moxie_pass_by_reference (cum_v, mode, type, named))
+    size = 4;
+  else if (type)
+    {
+      if (AGGREGATE_TYPE_P (type))
+	return 0;
+      size = int_size_in_bytes (type);
+    }
+  else
+    size = GET_MODE_SIZE (mode);
+
+  bytes_left = (4 * 6) - ((*cum - 2) * 4);
+
+  if (size > bytes_left)
+    return bytes_left;
+  else
+    return 0;
+}
+
+/* Worker function for TARGET_STATIC_CHAIN.  */
+
+static rtx
+moxie_static_chain (const_tree fndecl, bool incoming_p)
+{
+  rtx addr, mem;
+
+  if (!DECL_STATIC_CHAIN (fndecl))
+    return NULL;
+
+  if (incoming_p)
+    addr = plus_constant (Pmode, arg_pointer_rtx, 2 * UNITS_PER_WORD);
+  else
+    addr = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
+
+  mem = gen_rtx_MEM (Pmode, addr);
+  MEM_NOTRAP_P (mem) = 1;
+
+  return mem;
+}
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+moxie_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tpush  $sp, $r0\n");
+  fprintf (f, "\tldi.l $r0, 0x0\n");
+  fprintf (f, "\tsto.l 0x8($fp), $r0\n");
+  fprintf (f, "\tpop   $sp, $r0\n");
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\tjmpa  0x0\n");
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+moxie_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 20);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* The Global `targetm' Variable.  */
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		moxie_return_in_memory
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK	must_pass_in_stack_var_size
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE        moxie_pass_by_reference
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES        moxie_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		moxie_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	moxie_function_arg_advance
+
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS 	moxie_setup_incoming_varargs
+
+#undef	TARGET_FIXED_CONDITION_CODE_REGS
+#define	TARGET_FIXED_CONDITION_CODE_REGS moxie_fixed_condition_code_regs
+
+/* Define this to return an RTX representing the place where a
+   function returns or receives a value of data type RET_TYPE, a tree
+   node node representing a data type.  */
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE moxie_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE moxie_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P moxie_function_value_regno_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED hook_bool_void_true
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN moxie_static_chain
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE moxie_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT moxie_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE moxie_option_override
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-moxie.h"
diff --git a/gcc-4.9/gcc/config/moxie/moxie.h b/gcc-4.9/gcc/config/moxie/moxie.h
new file mode 100644
index 000000000..5379a4311
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/moxie.h
@@ -0,0 +1,491 @@
+/* Target Definitions for moxie.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Anthony Green.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MOXIE_H
+#define GCC_MOXIE_H
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!mno-crt0:crt0%O%s} crti.o%s crtbegin.o%s"
+
+/* Provide an ENDFILE_SPEC appropriate for svr4.  Here we tack on our own
+   magical crtend.o file (see crtstuff.c) which provides part of the
+   support for getting C++ file-scope static object constructed before
+   entering `main', followed by the normal svr3/svr4 "finalizer" file,
+   which is either `gcrtn.o' or `crtn.o'.  */
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+/* Provide a LIB_SPEC appropriate for svr4.  Here we tack on the default
+   standard C library (unless we are building a shared library) and
+   the simulator BSP code.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:-lc}}"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} %{!mel:-EB} %{mel:-EL}\
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "meb" }
+#endif
+
+/* Layout of Source Language Data Types */
+
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Registers...
+
+   $fp  - frame pointer
+   $sp  - stack pointer
+   $r0  - general purpose 32-bit register.
+   $r1  - general purpose 32-bit register.
+   $r2  - general purpose 32-bit register.
+   $r3  - general purpose 32-bit register.
+   $r4  - general purpose 32-bit register.
+   $r5  - general purpose 32-bit register.
+   $r6  - general purpose 32-bit register.
+   $r7  - general purpose 32-bit register.
+   $r8  - general purpose 32-bit register.
+   $r9  - general purpose 32-bit register.
+   $r10 - general purpose 32-bit register.
+   $r11 - general purpose 32-bit register.
+   $r12 - general purpose 32-bit register.
+   $r13 - reserved for execution environment.
+
+   Special Registers...
+
+   $pc - 32-bit program counter.
+   
+*/
+
+#define REGISTER_NAMES {	\
+  "$fp", "$sp", "$r0", "$r1",   \
+  "$r2", "$r3", "$r4", "$r5",   \
+  "$r6", "$r7", "$r8", "$r9",   \
+  "$r10", "$r11", "$r12", "$r13",   \
+  "?fp", "?ap", "$pc", "?cc" }
+
+#define MOXIE_FP     0
+#define MOXIE_SP     1
+#define MOXIE_R0     2
+#define MOXIE_R1     3 
+#define MOXIE_R2     4
+#define MOXIE_R3     5
+#define MOXIE_R4     6
+#define MOXIE_R5     7
+#define MOXIE_R6     8
+#define MOXIE_R7     9
+#define MOXIE_R8     10
+#define MOXIE_R9     11
+#define MOXIE_R10    12
+#define MOXIE_R11    13
+#define MOXIE_R12    14
+#define MOXIE_R13    15
+#define MOXIE_QFP    16
+#define MOXIE_QAP    17
+#define MOXIE_PC     18
+#define MOXIE_CC     19
+
+#define FIRST_PSEUDO_REGISTER 20
+
+enum reg_class
+{
+  NO_REGS,
+  GENERAL_REGS,
+  SPECIAL_REGS,
+  CC_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+
+#define REG_CLASS_CONTENTS \
+{ { 0x00000000 }, /* Empty */			   \
+  { 0x0003FFFF }, /* $fp, $sp, $r0 to $r13, ?fp */ \
+  { 0x00040000 }, /* $pc */	                   \
+  { 0x00080000 }, /* ?cc */                        \
+  { 0x000FFFFF }  /* All registers */              \
+}
+
+#define N_REG_CLASSES LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {\
+    "NO_REGS", \
+    "GENERAL_REGS", \
+    "SPECIAL_REGS", \
+    "CC_REGS", \
+    "ALL_REGS" }
+
+#define FIXED_REGISTERS     { 1, 1, 0, 0, \
+			      0, 0, 0, 0, \
+			      0, 0, 0, 0, \
+			      0, 0, 0, 1, \
+                              1, 1, 1, 1 }
+
+#define CALL_USED_REGISTERS { 1, 1, 1, 1, \
+			      1, 1, 1, 1, \
+			      0, 0, 0, 0, \
+			      0, 0, 1, 1, \
+                              1, 1, 1, 1 }
+
+/* We can't copy to or from our CC register. */
+#define AVOID_CCMODE_COPIES 1
+
+/* A C expression that is nonzero if it is permissible to store a
+   value of mode MODE in hard register number REGNO (or in several
+   registers starting with that one).  All gstore registers are 
+   equivalent, so we can set this to 1.  */
+#define HARD_REGNO_MODE_OK(R,M) 1
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  */
+#define REGNO_REG_CLASS(R) ((R < MOXIE_PC) ? GENERAL_REGS :		\
+                            (R == MOXIE_CC ? CC_REGS : SPECIAL_REGS))
+
+/* A C expression for the number of consecutive hard registers,
+   starting at register number REGNO, required to hold a value of mode
+   MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)			   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)		   \
+   / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if a value of mode MODE1 is
+   accessible in mode MODE2 without copying.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+/* The Overall Framework of an Assembler File */
+
+#undef  ASM_SPEC
+#define ASM_SPEC "%{!mel:-EB} %{mel:-EL}"
+#define ASM_COMMENT_START "#"
+#define ASM_APP_ON ""
+#define ASM_APP_OFF ""
+
+#define FILE_ASM_OP     "\t.file\n"
+
+/* Switch to the text or data segment.  */
+#define TEXT_SECTION_ASM_OP  "\t.text"
+#define DATA_SECTION_ASM_OP  "\t.data"
+
+/* Assembler Commands for Alignment */
+
+#define ASM_OUTPUT_ALIGN(STREAM,POWER) \
+	fprintf (STREAM, "\t.p2align\t%d\n", POWER);
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  */
+#define PRINT_OPERAND(STREAM, X, CODE) moxie_print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(STREAM ,X) moxie_print_operand_address (STREAM, X)
+
+/* Output and Generation of Labels */
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Passing Arguments in Registers */
+
+/* A C type for declaring a variable that is used as the first
+   argument of `FUNCTION_ARG' and other related values.  */
+#define CUMULATIVE_ARGS unsigned int
+
+/* If defined, the maximum amount of space required for outgoing arguments
+   will be computed and placed into the variable
+   `current_function_outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* A C statement (sans semicolon) for initializing the variable CUM
+   for the state at the beginning of the argument list.  
+   For moxie, the first arg is passed in register 2 (aka $r0).  */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+  (CUM = MOXIE_R0)
+
+/* How Scalar Function Values Are Returned */
+
+/* STACK AND CALLING */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+#define INITIAL_FRAME_POINTER_OFFSET(DEPTH) (DEPTH) = 0
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Define this if the above stack space is to be considered part of the
+   space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+#define STACK_PARMS_IN_REG_PARM_AREA
+
+/* Define this if it is the responsibility of the caller to allocate
+   the area reserved for arguments passed in registers.  */
+#define REG_PARM_STACK_SPACE(FNDECL) (6 * UNITS_PER_WORD)
+
+/* Offset from the argument pointer register to the first argument's
+   address.  On some machines it may depend on the data type of the
+   function.  */
+#define FIRST_PARM_OFFSET(F) 12
+
+/* Define this macro to nonzero value if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Define this macro as a C expression that is nonzero for registers that are
+   used by the epilogue or the return pattern.  The stack and frame
+   pointer registers are already assumed to be used as needed.  */
+#define EPILOGUE_USES(R) (R == MOXIE_R5)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before
+   the prologue.  */
+#define INCOMING_RETURN_ADDR_RTX					\
+  gen_frame_mem (Pmode,							\
+		 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD))
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) < 4 ? (N+2) : INVALID_REGNUM)
+
+/* Store the return handler into the call frame.  */
+#define EH_RETURN_HANDLER_RTX						\
+  gen_frame_mem (Pmode,							\
+		 plus_constant (Pmode, frame_pointer_rtx, UNITS_PER_WORD))
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN ( ! TARGET_LITTLE_ENDIAN )
+#define WORDS_BIG_ENDIAN ( ! TARGET_LITTLE_ENDIAN )
+
+/* Alignment required for a function entry point, in bits.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Define this macro as a C expression which is nonzero if accessing
+   less than a word of memory (i.e. a `char' or a `short') is no
+   faster than accessing a word of memory.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Number of storage units in a word; normally the size of a
+   general-purpose register, a power of two from 1 or 8.  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro to the minimum alignment enforced by hardware
+   for the stack pointer on this machine.  The definition is a C
+   expression for the desired alignment (measured in bits).  */
+#define STACK_BOUNDARY 32
+
+/* Normal alignment required for function parameters on the stack, in
+   bits.  All stack parameters receive at least this much alignment
+   regardless of data type.  */
+#define PARM_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Every structures size must be a multiple of 8 bits.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Look at the fundamental type that is used for a bit-field and use 
+   that to impose alignment on the enclosing structure.
+   struct s {int a:8}; should have same alignment as "int", not "char".  */
+#define	PCC_BITFIELD_TYPE_MATTERS	1
+
+/* Largest integer machine mode for structures.  If undefined, the default
+   is GET_MODE_SIZE(DImode).  */
+#define MAX_FIXED_MODE_SIZE 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+     
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Generating Code for Profiling */
+#define FUNCTION_PROFILER(FILE,LABELNO) (abort (), 0)
+
+/* Trampolines for Nested Functions.  */
+#define TRAMPOLINE_SIZE (2 + 6 + 6 + 2 + 2 + 6)
+
+/* Alignment required for trampolines, in bits.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/* An alias for the machine mode for pointers.  */
+#define Pmode         SImode
+
+/* An alias for the machine mode used for memory references to
+   functions being called, in `call' RTL expressions.  */
+#define FUNCTION_MODE QImode
+
+/* The register number of the stack pointer register, which must also
+   be a fixed register according to `FIXED_REGISTERS'.  */
+#define STACK_POINTER_REGNUM MOXIE_SP
+
+/* The register number of the frame pointer register, which is used to
+   access automatic variables in the stack frame.  */
+#define FRAME_POINTER_REGNUM MOXIE_QFP
+
+/* The register number of the arg pointer register, which is used to
+   access the function's argument list.  */
+#define ARG_POINTER_REGNUM MOXIE_QAP
+
+#define HARD_FRAME_POINTER_REGNUM MOXIE_FP
+
+#define ELIMINABLE_REGS							\
+{{ FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }}			
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  do {									\
+    (OFFSET) = moxie_initial_elimination_offset ((FROM), (TO));		\
+  } while (0)
+
+/* A C expression that is nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  */
+#define FUNCTION_ARG_REGNO_P(r) (r >= MOXIE_R0 && r <= MOXIE_R5)
+
+/* A macro whose definition is the name of the class to which a valid
+   base register must belong.  A base register is one used in an
+   address which is the register value plus a displacement.  */
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define HARD_REGNO_OK_FOR_BASE_P(NUM) \
+  ((unsigned) (NUM) < FIRST_PSEUDO_REGISTER \
+   && (REGNO_REG_CLASS(NUM) == GENERAL_REGS \
+       || (NUM) == HARD_FRAME_POINTER_REGNUM))
+
+/* A C expression which is nonzero if register number NUM is suitable
+   for use as a base register in operand addresses.  */
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(NUM)		 \
+  (HARD_REGNO_OK_FOR_BASE_P(NUM) 		 \
+   || HARD_REGNO_OK_FOR_BASE_P(reg_renumber[(NUM)]))
+#else
+#define REGNO_OK_FOR_BASE_P(NUM)		 \
+  ((NUM) >= FIRST_PSEUDO_REGISTER || HARD_REGNO_OK_FOR_BASE_P(NUM))
+#endif
+
+/* A C expression which is nonzero if register number NUM is suitable
+   for use as an index register in operand addresses.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) MOXIE_FP
+
+/* The maximum number of bytes that a single instruction can move
+   quickly between memory and registers or between two memory
+   locations.  */
+#define MOVE_MAX 4
+#define TRULY_NOOP_TRUNCATION(op,ip) 1
+
+/* All load operations zero extend.  */
+#define LOAD_EXTEND_OP(MEM) ZERO_EXTEND
+
+/* A number, the maximum number of registers that can appear in a
+   valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+#define TRULY_NOOP_TRUNCATION(op,ip) 1
+
+/* An alias for a machine mode name.  This is the machine mode that
+   elements of a jump-table should have.  */
+#define CASE_VECTOR_MODE SImode
+
+/* A C compound statement with a conditional `goto LABEL;' executed
+   if X (an RTX) is a legitimate memory address on the target machine
+   for a memory operand of mode MODE.  */
+#define GO_IF_LEGITIMATE_ADDRESS(MODE,X,LABEL)		\
+  do {                                                  \
+    if (GET_CODE(X) == PLUS)				\
+      {							\
+	rtx op1,op2;					\
+	op1 = XEXP(X,0);				\
+	op2 = XEXP(X,1);				\
+	if (GET_CODE(op1) == REG			\
+	    && CONSTANT_ADDRESS_P(op2)			\
+	    && REGNO_OK_FOR_BASE_P(REGNO(op1)))		\
+	  goto LABEL;					\
+      }							\
+    if (REG_P (X) && REGNO_OK_FOR_BASE_P (REGNO (X)))	\
+      goto LABEL;					\
+    if (GET_CODE (X) == SYMBOL_REF			\
+	|| GET_CODE (X) == LABEL_REF			\
+	|| GET_CODE (X) == CONST)			\
+      goto LABEL;					\
+  } while (0)
+
+/* Run-time Target Specification */
+
+#define TARGET_CPU_CPP_BUILTINS() \
+  { \
+    builtin_define_std ("moxie");			\
+    builtin_define_std ("MOXIE");			\
+    if (TARGET_LITTLE_ENDIAN)				\
+      builtin_define ("__MOXIE_LITTLE_ENDIAN__");	\
+    else						\
+      builtin_define ("__MOXIE_BIG_ENDIAN__");		\
+  }
+
+#define HAS_LONG_UNCOND_BRANCH true
+
+#endif /* GCC_MOXIE_H */
diff --git a/gcc-4.9/gcc/config/moxie/moxie.md b/gcc-4.9/gcc/config/moxie/moxie.md
new file mode 100644
index 000000000..713f9b65d
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/moxie.md
@@ -0,0 +1,450 @@
+;; Machine description for Moxie
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Anthony Green <green@moxielogic.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Moxie specific constraints, predicates and attributes
+;; -------------------------------------------------------------------------
+
+(include "constraints.md")
+(include "predicates.md")
+
+; Most instructions are two bytes long.
+(define_attr "length" "" (const_int 2))
+
+;; -------------------------------------------------------------------------
+;; nop instruction
+;; -------------------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; -------------------------------------------------------------------------
+;; Arithmetic instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	  (plus:SI
+	   (match_operand:SI 1 "register_operand" "0,0,0")
+	   (match_operand:SI 2 "moxie_add_operand" "I,N,r")))]
+  ""
+  "@
+  inc    %0, %2
+  dec	 %0, -%2
+  add.l  %0, %2")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	  (minus:SI
+	   (match_operand:SI 1 "register_operand" "0,0")
+	   (match_operand:SI 2 "moxie_sub_operand" "I,r")))]
+  ""
+  "@
+  dec    %0, %2
+  sub.l  %0, %2")
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (mult:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "mul.l  %0, %2")
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (div:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "div.l  %0, %2")
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (udiv:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "udiv.l %0, %2")
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (mod:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "mod.l  %0, %2")
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (umod:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "umod.l %0, %2")
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg    %0, %1")
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "not    %0, %1")
+
+;; -------------------------------------------------------------------------
+;; Logical operators
+;; -------------------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "and    %0, %2";
+})
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "xor    %0, %2";
+})
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "or     %0, %2";
+})
+
+;; -------------------------------------------------------------------------
+;; Shifters
+;; -------------------------------------------------------------------------
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "ashl   %0, %2";
+})
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "ashr   %0, %2";
+})
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "lshr   %0, %2";
+})
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; SImode
+
+;; Push a register onto the stack
+(define_insn "movsi_push"
+  [(set (mem:SI (pre_dec:SI (reg:SI 1)))
+  	(match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "push   $sp, %0")
+
+;; Pop a register from the stack
+(define_insn "movsi_pop"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+  	(mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" "r"))))]
+  ""
+  "pop    %0, %1")
+
+(define_expand "movsi"
+   [(set (match_operand:SI 0 "general_operand" "")
+ 	(match_operand:SI 1 "general_operand" ""))]
+   ""
+  "
+{
+  /* If this is a store, force the value into a register.  */
+  if (! (reload_in_progress || reload_completed))
+  {
+    if (MEM_P (operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      if (MEM_P (XEXP (operands[0], 0)))
+        operands[0] = gen_rtx_MEM (SImode, force_reg (SImode, XEXP (operands[0], 0)));
+    }
+    else 
+      if (MEM_P (operands[1])
+          && MEM_P (XEXP (operands[1], 0)))
+        operands[1] = gen_rtx_MEM (SImode, force_reg (SImode, XEXP (operands[1], 0)));
+  }
+}")
+
+(define_insn "*movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,W,A,r,r,B,r")
+	(match_operand:SI 1 "moxie_general_movsrc_operand" "O,r,i,r,r,W,A,r,B"))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+  "@
+   xor    %0, %0
+   mov    %0, %1
+   ldi.l  %0, %1
+   st.l   %0, %1
+   sta.l  %0, %1
+   ld.l   %0, %1
+   lda.l  %0, %1
+   sto.l  %0, %1
+   ldo.l  %0, %1"
+  [(set_attr "length"	"2,2,6,2,6,2,6,6,6")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* If this is a store, force the value into a register.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (QImode, operands[1]);
+}")
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,W,A,r,r,B,r")
+	(match_operand:QI 1 "moxie_general_movsrc_operand" "O,r,i,r,r,W,A,r,B"))]
+  "register_operand (operands[0], QImode)
+   || register_operand (operands[1], QImode)"
+  "@
+   xor    %0, %0
+   mov    %0, %1
+   ldi.b  %0, %1
+   st.b   %0, %1
+   sta.b  %0, %1
+   ld.b   %0, %1
+   lda.b  %0, %1
+   sto.b  %0, %1
+   ldo.b  %0, %1"
+  [(set_attr "length"	"2,2,6,2,6,2,6,6,6")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* If this is a store, force the value into a register.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (HImode, operands[1]);
+}")
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,W,A,r,r,B,r")
+	(match_operand:HI 1 "moxie_general_movsrc_operand" "O,r,i,r,r,W,A,r,B"))]
+  "(register_operand (operands[0], HImode)
+    || register_operand (operands[1], HImode))"
+  "@
+   xor    %0, %0
+   mov    %0, %1
+   ldi.s  %0, %1
+   st.s   %0, %1
+   sta.s  %0, %1
+   ld.s   %0, %1
+   lda.s  %0, %1
+   sto.s  %0, %1
+   ldo.s  %0, %1"
+  [(set_attr "length"	"2,2,6,2,6,2,6,6,6")])
+
+;; -------------------------------------------------------------------------
+;; Compare instructions
+;; -------------------------------------------------------------------------
+
+(define_constants
+  [(CC_REG 11)])
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC CC_REG)
+        (compare:CC
+         (match_operand:SI 1 "general_operand" "")
+         (match_operand:SI 2 "general_operand" "")))
+   (set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+                       [(reg:CC CC_REG) (const_int 0)])
+                      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  ""
+  "
+  /* Force the compare operands into registers.  */
+  if (GET_CODE (operands[1]) != REG)
+	operands[1] = force_reg (SImode, operands[1]);
+  if (GET_CODE (operands[2]) != REG)
+	operands[2] = force_reg (SImode, operands[2]);
+  ")
+
+(define_insn "*cmpsi"
+  [(set (reg:CC CC_REG)
+	(compare
+	 (match_operand:SI 0 "register_operand" "r")
+	 (match_operand:SI 1 "register_operand"	"r")))]
+  ""
+  "cmp    %0, %1")
+
+
+;; -------------------------------------------------------------------------
+;; Branch instructions
+;; -------------------------------------------------------------------------
+
+(define_code_iterator cond [ne eq lt ltu gt gtu ge le geu leu])
+(define_code_attr CC [(ne "ne") (eq "eq") (lt "lt") (ltu "ltu") 
+		      (gt "gt") (gtu "gtu") (ge "ge") (le "le") 
+		      (geu "geu") (leu "leu") ])
+(define_code_attr rCC [(ne "eq") (eq "ne") (lt "ge") (ltu "geu") 
+		       (gt "le") (gtu "leu") (ge "lt") (le "gt") 
+		       (geu "ltu") (leu "gtu") ])
+
+(define_insn "*b<cond:code>"
+  [(set (pc)
+	(if_then_else (cond (reg:CC CC_REG)
+			    (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if (get_attr_length (insn) == 2)
+    return "b<CC>   %l0";
+  else
+    return "b<rCC>   .+6\n\tjmpa   %l0";
+}
+  [(set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 1022))
+                      (const_int 2) (const_int 8)))])
+
+;; -------------------------------------------------------------------------
+;; Call and Jump instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+		(match_operand 1 "general_operand" ""))]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+})
+
+(define_insn "*call"
+  [(call (mem:QI (match_operand:SI
+		  0 "nonmemory_operand" "i,r"))
+	 (match_operand 1 "" ""))]
+  ""
+  "@
+   jsra   %0
+   jsr    %0"
+  [(set_attr "length"	"6,2")])
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+		(call (match_operand:QI 1 "memory_operand" "")
+		 (match_operand 2 "" "")))]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+})
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:QI (match_operand:SI
+		       1 "immediate_operand" "i"))
+	      (match_operand 2 "" "")))]
+  ""
+  "jsra   %1"
+  [(set_attr "length"	"6")])
+
+(define_insn "*call_value_indirect"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:QI (match_operand:SI
+		       1 "register_operand" "r"))
+	      (match_operand 2 "" "")))]
+  ""
+  "jsr    %1")
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "r"))]
+  ""
+  "jmp    %0")
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmpa   %l0"
+  [(set_attr "length"	"6")])
+
+
+;; -------------------------------------------------------------------------
+;; Prologue & Epilogue
+;; -------------------------------------------------------------------------
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+  "
+{
+  moxie_expand_prologue ();
+  DONE;
+}
+")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  moxie_expand_epilogue ();
+  DONE;
+}
+")
+
+(define_insn "returner"
+  [(return)]
+  "reload_completed"
+  "ret")
diff --git a/gcc-4.9/gcc/config/moxie/moxie.opt b/gcc-4.9/gcc/config/moxie/moxie.opt
new file mode 100644
index 000000000..0bbfde2ea
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/moxie.opt
@@ -0,0 +1,31 @@
+; Options for the moxie compiler port.
+
+; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+meb
+Target RejectNegative Report InverseMask(LITTLE_ENDIAN)
+Generate big-endian code
+
+mel
+Target RejectNegative Report Mask(LITTLE_ENDIAN)
+Generate little-endian code
+
+; Ignored by the compiler
+mno-crt0
+Target RejectNegative
diff --git a/gcc-4.9/gcc/config/moxie/predicates.md b/gcc-4.9/gcc/config/moxie/predicates.md
new file mode 100644
index 000000000..7cb312b7e
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/predicates.md
@@ -0,0 +1,55 @@
+;; Predicate definitions for Moxie
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Anthony Green <green@moxielogic.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Predicates
+;; -------------------------------------------------------------------------
+
+;; Nonzero if OP can be source of a simple move operation.
+
+(define_predicate "moxie_general_movsrc_operand"
+  (match_code "mem,const_int,reg,subreg,symbol_ref,label_ref,const")
+{
+  /* Any (MEM LABEL_REF) is OK.  That is a pc-relative load.  */
+  if (MEM_P (op) && GET_CODE (XEXP (op, 0)) == LABEL_REF)
+    return 1;
+
+  if (MEM_P (op)
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)
+    return 1;
+
+  return general_operand (op, mode);
+})
+
+;; Nonzero if OP can be an operand to an add/inc/dec instruction.
+
+(define_predicate "moxie_add_operand"
+  (ior (match_code "reg")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), -255, 255)"))))
+
+;; Nonzero if OP can be an operand to an sub/dec instruction.
+
+(define_predicate "moxie_sub_operand"
+  (ior (match_code "reg")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), 0, 255)"))))
\ No newline at end of file
diff --git a/gcc-4.9/gcc/config/moxie/rtems.h b/gcc-4.9/gcc/config/moxie/rtems.h
new file mode 100644
index 000000000..9df922ab3
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/rtems.h
@@ -0,0 +1,35 @@
+/* Definitions for rtems targeting the Moxie core.
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Anthony Green (green@moxielogic.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS preprocessor built-ins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("moxie");		\
+      builtin_define ("__rtems__");		\
+      builtin_assert ("system=rtems");		\
+    }						\
+  while (0)
+
+#undef LINK_SPEC
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
diff --git a/gcc-4.9/gcc/config/moxie/t-moxie b/gcc-4.9/gcc/config/moxie/t-moxie
new file mode 100644
index 000000000..2af19a6e8
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/t-moxie
@@ -0,0 +1,23 @@
+# Target Makefile Fragment for moxie
+# Copyright (C) 2008-2014 Free Software Foundation, Inc.
+# Contributed by Anthony Green.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS     = meb/mel
+MULTILIB_DIRNAMES    = eb el
+
diff --git a/gcc-4.9/gcc/config/moxie/uclinux.h b/gcc-4.9/gcc/config/moxie/uclinux.h
new file mode 100644
index 000000000..2e81b16c5
--- /dev/null
+++ b/gcc-4.9/gcc/config/moxie/uclinux.h
@@ -0,0 +1,42 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1%O%s} crti%O%s crtbegin%O%s"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-elf2flt"
+
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Like the definition in gcc.c, but for purposes of uClinux, every link is
+   static.  */
+#define MFWRAP_SPEC " %{fmudflap|fmudflapth: \
+ --wrap=malloc --wrap=free --wrap=calloc --wrap=realloc\
+ --wrap=mmap --wrap=munmap --wrap=alloca\
+ %{fmudflapth: --wrap=pthread_create\
+}} %{fmudflap|fmudflapth: --wrap=main}"
diff --git a/gcc-4.9/gcc/config/msp430/README.txt b/gcc-4.9/gcc/config/msp430/README.txt
new file mode 100644
index 000000000..e7343cfca
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/README.txt
@@ -0,0 +1,7 @@
+Random Notes
+------------
+
+The MSP430 port does not use leading underscores.  However, the
+assembler has no way of differentiating between, for example, register
+R12 and symbol R12.  So, if you do "int r12;" in your C program, you
+may get an assembler error, and will certainly have runtime problems.
diff --git a/gcc-4.9/gcc/config/msp430/constraints.md b/gcc-4.9/gcc/config/msp430/constraints.md
new file mode 100644
index 000000000..055125f5b
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/constraints.md
@@ -0,0 +1,85 @@
+;;  Machine Description for TI MSP43* processors
+;;  Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "R12" "R12_REGS"
+  "Register R12.")
+
+(define_register_constraint "R13" "R13_REGS"
+  "Register R13.")
+
+(define_constraint "K"
+  "Integer constant 1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 1)")))
+
+(define_constraint "L"
+  "Integer constant -1^20..1^19."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -1 << 20, 1 << 19)")))
+
+(define_constraint "M"
+  "Integer constant 1-4."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 4)")))
+
+;; We do not allow arbitrary constants, eg symbols or labels,
+;; because their address may be above the 16-bit address limit
+;; supported by the offset used in the MOVA instruction.
+(define_constraint "Ya"
+  "Memory reference, any type, but restricted range of constants"
+  (and (match_code "mem")
+       (ior (match_code "reg" "0")
+	    (and (match_code "plus" "0")
+		 (match_code "reg" "00")
+		 (match_test ("CONST_INT_P (XEXP (XEXP (op, 0), 1))")))
+	    (match_test "CONSTANT_P (XEXP (op, 0))")
+	    )))
+
+(define_constraint "Yl"
+  "Memory reference, labels only."
+  (and (match_code "mem")
+       (match_code "label_ref" "0")))
+
+
+;; These are memory references that are safe to use with the X suffix,
+;; because we know/assume they need not index across the 64k boundary.
+(define_constraint "Ys"
+  "Memory reference, stack only."
+  (and (match_code "mem")
+       (ior
+	(and (match_code "plus" "0")
+	     (and (match_code "reg" "00")
+		  (match_test ("CONST_INT_P (XEXP (XEXP (op, 0), 1))"))
+		  (match_test ("IN_RANGE (INTVAL (XEXP (XEXP (op, 0), 1)), -1 << 15, (1 << 15)-1)"))))
+	(match_code "reg" "0")
+	)))
+
+(define_constraint "Yc"
+  "Memory reference, for CALL - we can't use SP."
+  (and (match_code "mem")
+       (match_code "mem" "0")
+       (not (ior
+	     (and (match_code "plus" "00")
+		  (and (match_code "reg" "000")
+		       (match_test ("REGNO (XEXP (XEXP (op, 0), 0)) != SP_REGNO"))))
+	     (and (match_code "reg" "0")
+		  (match_test ("REGNO (XEXP (XEXP (op, 0), 0)) != SP_REGNO")))
+	     ))))
+
diff --git a/gcc-4.9/gcc/config/msp430/msp430-c.c b/gcc-4.9/gcc/config/msp430/msp430-c.c
new file mode 100644
index 000000000..b637192a0
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430-c.c
@@ -0,0 +1,36 @@
+/* MSP430 C-specific support
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "msp430-protos.h"
+
+/* Implements REGISTER_TARGET_PRAGMAS.  */
+void
+msp430_register_pragmas (void)
+{
+  c_register_addr_space ("__near", ADDR_SPACE_NEAR);
+  if (msp430x)
+    c_register_addr_space ("__far", ADDR_SPACE_FAR);
+}
diff --git a/gcc-4.9/gcc/config/msp430/msp430-modes.def b/gcc-4.9/gcc/config/msp430/msp430-modes.def
new file mode 100644
index 000000000..4e94a6df5
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430-modes.def
@@ -0,0 +1,3 @@
+/* 20-bit address */
+PARTIAL_INT_MODE (SI, 20, PSI);
+
diff --git a/gcc-4.9/gcc/config/msp430/msp430-protos.h b/gcc-4.9/gcc/config/msp430/msp430-protos.h
new file mode 100644
index 000000000..7f999abbd
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430-protos.h
@@ -0,0 +1,49 @@
+/* Exported function prototypes from the TI MSP430 backend.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MSP430_PROTOS_H
+#define GCC_MSP430_PROTOS_H
+
+rtx	msp430_eh_return_stackadj_rtx (void);
+void	msp430_expand_eh_return (rtx);
+void	msp430_expand_epilogue (int);
+void	msp430_expand_helper (rtx *operands, const char *, bool);
+void	msp430_expand_prologue (void);
+const char * msp430x_extendhisi (rtx *);
+void	msp430_fixup_compare_operands (enum machine_mode, rtx *);
+int	msp430_hard_regno_mode_ok (int, enum machine_mode);
+int	msp430_hard_regno_nregs (int, enum machine_mode);
+bool    msp430_hwmult_enabled (void);
+rtx	msp430_incoming_return_addr_rtx (void);
+void	msp430_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+int	msp430_initial_elimination_offset (int, int);
+bool    msp430_is_f5_mcu (void);
+bool    msp430_is_interrupt_func (void);
+const char * msp430x_logical_shift_right (rtx);
+const char * msp430_mcu_name (void);
+bool	msp430_modes_tieable_p (enum machine_mode, enum machine_mode);
+void	msp430_output_labelref (FILE *, const char *);
+void	msp430_register_pragmas (void);
+rtx	msp430_return_addr_rtx (int);
+void	msp430_split_movsi (rtx *);
+void    msp430_start_function (FILE *, const char *, tree);
+rtx	msp430_subreg (enum machine_mode, rtx, enum machine_mode, int);
+
+#endif /* GCC_MSP430_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/msp430/msp430.c b/gcc-4.9/gcc/config/msp430/msp430.c
new file mode 100644
index 000000000..80a17a6e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430.c
@@ -0,0 +1,2338 @@
+/* Subroutines used for code generation on TI MSP430 processors.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "reload.h"
+#include "df.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "msp430-protos.h"
+#include "dumpfile.h"
+#include "opts.h"
+
+
+static void msp430_compute_frame_info (void);
+
+
+
+/* Run-time Target Specification.  */
+
+bool msp430x = true;
+
+struct GTY(()) machine_function
+{
+  /* If set, the rest of the fields have been computed.  */
+  int computed;
+  /* Which registers need to be saved in the pro/epilogue.  */
+  int need_to_save [FIRST_PSEUDO_REGISTER];
+
+  /* These fields describe the frame layout...  */
+  /* arg pointer */
+  /* 2/4 bytes for saved PC */
+  int framesize_regs;
+  /* frame pointer */
+  int framesize_locals;
+  int framesize_outgoing;
+  /* stack pointer */
+  int framesize;
+
+  /* How much we adjust the stack when returning from an exception
+     handler.  */
+  rtx eh_stack_adjust;
+};
+
+/* This is our init_machine_status, as set in
+   msp_option_override.  */
+static struct machine_function *
+msp430_init_machine_status (void)
+{
+  struct machine_function *m;
+
+  m = ggc_alloc_cleared_machine_function ();
+
+  return m;
+}
+
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION msp430_handle_option
+
+bool
+msp430_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		      struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		      const struct cl_decoded_option *decoded ATTRIBUTE_UNUSED,
+		      location_t loc ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		msp430_option_override
+
+static const char * msp430_mcu_names [] =
+{
+"msp430afe221",	"msp430afe222",	"msp430afe223",	"msp430afe231",	
+"msp430afe232",	"msp430afe233",	"msp430afe251",	"msp430afe252",	
+"msp430afe253",	"msp430c091",	"msp430c092",	"msp430c111",	
+"msp430c1111",	"msp430c112",	"msp430c1121",	"msp430c1331",	
+"msp430c1351",	"msp430c311s",	"msp430c312",	"msp430c313",	
+"msp430c314",	"msp430c315",	"msp430c323",	"msp430c325",	
+"msp430c336",	"msp430c337",	"msp430c412",	"msp430c413",	
+"msp430e112",	"msp430e313",	"msp430e315",	"msp430e325",	
+"msp430e337",	"msp430f110",	"msp430f1101",	"msp430f1101a",	
+"msp430f1111",	"msp430f1111a",	"msp430f112",	"msp430f1121",	
+"msp430f1121a",	"msp430f1122",	"msp430f1132",	"msp430f122",	
+"msp430f1222",	"msp430f123",	"msp430f1232",	"msp430f133",	
+"msp430f135",	"msp430f147",	"msp430f1471",	"msp430f148",	
+"msp430f1481",	"msp430f149",	"msp430f1491",	"msp430f155",	
+"msp430f156",	"msp430f157",	"msp430f1610",	"msp430f1611",	
+"msp430f1612",	"msp430f167",	"msp430f168",	"msp430f169",	
+"msp430f2001",	"msp430f2002",	"msp430f2003",	"msp430f2011",	
+"msp430f2012",	"msp430f2013",	"msp430f2101",	"msp430f2111",	
+"msp430f2112",	"msp430f2121",	"msp430f2122",	"msp430f2131",	
+"msp430f2132",	"msp430f2232",	"msp430f2234",	"msp430f2252",	
+"msp430f2254",	"msp430f2272",	"msp430f2274",	"msp430f233",	
+"msp430f2330",	"msp430f235",	"msp430f2350",	"msp430f2370",	
+"msp430f2410",	"msp430f247",	"msp430f2471",	"msp430f248",	
+"msp430f2481",	"msp430f249",	"msp430f2491",	"msp430f412",	
+"msp430f413",	"msp430f4132",	"msp430f415",	"msp430f4152",	
+"msp430f417",	"msp430f423",	"msp430f423a",	"msp430f425",	
+"msp430f4250",	"msp430f425a",	"msp430f4260",	"msp430f427",	
+"msp430f4270",	"msp430f427a",	"msp430f435",	"msp430f4351",	
+"msp430f436",	"msp430f4361",	"msp430f437",	"msp430f4371",	
+"msp430f438",	"msp430f439",	"msp430f447",	"msp430f448",	
+"msp430f4481",	"msp430f449",	"msp430f4491",	"msp430f477",	
+"msp430f478",	"msp430f4783",	"msp430f4784",	"msp430f479",	
+"msp430f4793",	"msp430f4794",	"msp430fe423",	"msp430fe4232",	
+"msp430fe423a",	"msp430fe4242",	"msp430fe425",	"msp430fe4252",	
+"msp430fe425a",	"msp430fe427",	"msp430fe4272",	"msp430fe427a",	
+"msp430fg4250",	"msp430fg4260",	"msp430fg4270",	"msp430fg437",	
+"msp430fg438",	"msp430fg439",	"msp430fg477",	"msp430fg478",	
+"msp430fg479",	"msp430fw423",	"msp430fw425",	"msp430fw427",	
+"msp430fw428",	"msp430fw429",	"msp430g2001",	"msp430g2101",	
+"msp430g2102",	"msp430g2111",	"msp430g2112",	"msp430g2113",	
+"msp430g2121",	"msp430g2131",	"msp430g2132",	"msp430g2152",	
+"msp430g2153",	"msp430g2201",	"msp430g2202",	"msp430g2203",	
+"msp430g2210",	"msp430g2211",	"msp430g2212",	"msp430g2213",	
+"msp430g2221",	"msp430g2230",	"msp430g2231",	"msp430g2232",	
+"msp430g2233",	"msp430g2252",	"msp430g2253",	"msp430g2302",	
+"msp430g2303",	"msp430g2312",	"msp430g2313",	"msp430g2332",	
+"msp430g2333",	"msp430g2352",	"msp430g2353",	"msp430g2402",	
+"msp430g2403",	"msp430g2412",	"msp430g2413",	"msp430g2432",	
+"msp430g2433",	"msp430g2444",	"msp430g2452",	"msp430g2453",	
+"msp430g2513",	"msp430g2533",	"msp430g2544",	"msp430g2553",	
+"msp430g2744",	"msp430g2755",	"msp430g2855",	"msp430g2955",	
+"msp430i2020",	"msp430i2021",	"msp430i2030",	"msp430i2031",	
+"msp430i2040",	"msp430i2041",	"msp430l092",   "msp430p112",	
+"msp430p313",	"msp430p315",	"msp430p315s",	"msp430p325",	
+"msp430p337",	"msp430tch5e"
+};
+
+/* Generate a C preprocessor symbol based upon the MCU selected by the user.
+   If a specific MCU has not been selected then return a generic symbol instead.  */
+
+const char *
+msp430_mcu_name (void)
+{
+  if (target_mcu)
+    {
+      unsigned int i;
+      static char mcu_name [64];
+
+      snprintf (mcu_name, sizeof (mcu_name) - 1, "__%s__", target_mcu);
+      for (i = strlen (mcu_name); i--;)
+	mcu_name[i] = TOUPPER (mcu_name[i]);
+      return mcu_name;
+    }
+
+  return msp430x ? "__MSP430XGENERIC__" : "__MSP430GENERIC__";
+}
+
+static void
+msp430_option_override (void)
+{
+  init_machine_status = msp430_init_machine_status;
+
+  if (target_cpu)
+    {
+      if (strcasecmp (target_cpu, "msp430x") == 0
+	  || strcasecmp (target_cpu, "msp430xv2") == 0
+	  || strcasecmp (target_cpu, "430x") == 0
+	  || strcasecmp (target_cpu, "430xv2") == 0)
+	msp430x = true;
+      else if (strcasecmp (target_cpu, "msp430") == 0
+	       || strcasecmp (target_cpu, "430") == 0)
+	msp430x = false;
+      else
+	error ("unrecognised argument of -mcpu: %s", target_cpu);
+    }
+
+  if (target_mcu)
+    {
+      int i;
+
+      /* If we are given an MCU name, we assume that it supports 430X.
+	 Then we check to see if it is one of the known MCUs that only
+	 supports 430.  */
+      msp430x = true;
+
+      /* For backwards compatibility we recognise two generic MCU
+	 430X names.  However we want to be able to generate special C
+	 preprocessor defines for them, which is why we set target_mcu
+	 to NULL.  */
+      if (strcasecmp (target_mcu, "msp430") == 0)
+	{
+	  msp430x = false;
+	  target_mcu = NULL;
+	}
+      else if (strcasecmp (target_mcu, "msp430x") == 0
+	       || strcasecmp (target_mcu, "msp430xv2") == 0)
+	target_mcu = NULL;
+      else
+	for (i = ARRAY_SIZE (msp430_mcu_names); i--;)
+	  if (strcasecmp (msp430_mcu_names[i], target_mcu) == 0)
+	    {
+	      msp430x = false;
+	      break;
+	    }
+      /* It is not an error if we do not match the MCU name.  There are
+	 hundreds of them.  */
+    }
+
+  if (TARGET_LARGE && !msp430x)
+    error ("-mlarge requires a 430X-compatible -mmcu=");
+
+  if (flag_exceptions || flag_non_call_exceptions
+      || flag_unwind_tables || flag_asynchronous_unwind_tables)
+    flag_omit_frame_pointer = false;
+  else
+    flag_omit_frame_pointer = true;
+
+  /* This is a hack to work around a problem with the newlib build
+     mechanism.  Newlib always appends CFLAGS to the end of the GCC
+     command line and always sets -O2 in CFLAGS.  Thus it is not
+     possible to build newlib with -Os enabled.  Until now...  */
+  if (TARGET_OPT_SPACE && optimize < 3)
+    optimize_size = 1;
+}
+
+
+
+/* Storage Layout */
+
+#undef  TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P msp430_ms_bitfield_layout_p
+
+bool
+msp430_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+
+/* Register Usage */
+
+/* Implements HARD_REGNO_NREGS.  MSP430X registers can hold a single
+   PSImode value, but not an SImode value.  */
+int
+msp430_hard_regno_nregs (int regno ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  if (mode == PSImode && msp430x)
+    return 1;
+  return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
+	  / UNITS_PER_WORD);
+}
+
+/* Implements HARD_REGNO_MODE_OK.  */
+int
+msp430_hard_regno_mode_ok (int regno ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  return regno <= (ARG_POINTER_REGNUM - msp430_hard_regno_nregs (regno, mode));
+}
+
+/* Implements MODES_TIEABLE_P.  */
+bool
+msp430_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if ((mode1 == PSImode || mode2 == SImode)
+      || (mode1 == SImode || mode2 == PSImode))
+    return false;
+
+  return ((GET_MODE_CLASS (mode1) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode1) == MODE_COMPLEX_FLOAT)
+	  == (GET_MODE_CLASS (mode2) == MODE_FLOAT
+	      || GET_MODE_CLASS (mode2) == MODE_COMPLEX_FLOAT));
+}
+
+#undef  TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED msp430_frame_pointer_required
+
+static bool
+msp430_frame_pointer_required (void)
+{
+  return false;
+}
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE		msp430_can_eliminate
+
+static bool
+msp430_can_eliminate (const int from_reg ATTRIBUTE_UNUSED,
+		      const int to_reg ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Implements INITIAL_ELIMINATION_OFFSET.  */
+int
+msp430_initial_elimination_offset (int from, int to)
+{
+  int rv = 0; /* As if arg to arg.  */
+
+  msp430_compute_frame_info ();
+
+  switch (to)
+    {
+    case STACK_POINTER_REGNUM:
+      rv += cfun->machine->framesize_outgoing;
+      rv += cfun->machine->framesize_locals;
+      /* Fall through.  */
+    case FRAME_POINTER_REGNUM:
+      rv += cfun->machine->framesize_regs;
+      /* Allow for the saved return address.  */
+      rv += (TARGET_LARGE ? 4 : 2);
+      /* NB/ No need to allow for crtl->args.pretend_args_size.
+         GCC does that for us.  */
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      /* Allow for the fall through above.  */
+      rv -= (TARGET_LARGE ? 4 : 2);
+      rv -= cfun->machine->framesize_regs;
+    case ARG_POINTER_REGNUM:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return rv;
+}
+
+/* Named Address Space support */
+
+
+/* Return the appropriate mode for a named address pointer.  */
+#undef  TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE msp430_addr_space_pointer_mode
+#undef  TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE msp430_addr_space_pointer_mode
+
+static enum machine_mode
+msp430_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    default:
+    case ADDR_SPACE_GENERIC:
+      return Pmode;
+    case ADDR_SPACE_NEAR:
+      return HImode;
+    case ADDR_SPACE_FAR:
+      return PSImode;
+    }
+}
+
+/* Function pointers are stored in unwind_word sized
+   variables, so make sure that unwind_word is big enough.  */
+#undef  TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE msp430_unwind_word_mode
+
+static enum machine_mode
+msp430_unwind_word_mode (void)
+{
+  return TARGET_LARGE ? SImode : HImode;
+}
+
+/* Determine if one named address space is a subset of another.  */
+#undef  TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P msp430_addr_space_subset_p
+static bool
+msp430_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  if (subset == superset)
+    return true;
+  else
+    return (subset != ADDR_SPACE_FAR && superset == ADDR_SPACE_FAR);
+}
+
+#undef  TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT msp430_addr_space_convert
+/* Convert from one address space to another.  */
+static rtx
+msp430_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+  rtx result;
+
+  if (to_as != ADDR_SPACE_FAR && from_as == ADDR_SPACE_FAR)
+    {
+      /* This is unpredictable, as we're truncating off usable address
+	 bits.  */
+
+      if (CONSTANT_P (op))
+	return gen_rtx_CONST (HImode, op);
+
+      result = gen_reg_rtx (HImode);
+      emit_insn (gen_truncpsihi2 (result, op));
+      return result;
+    }
+  else if (to_as == ADDR_SPACE_FAR && from_as != ADDR_SPACE_FAR)
+    {
+      /* This always works.  */
+
+      if (CONSTANT_P (op))
+	return gen_rtx_CONST (PSImode, op);
+
+      result = gen_reg_rtx (PSImode);
+      emit_insn (gen_zero_extendhipsi2 (result, op));
+      return result;
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Stack Layout and Calling Conventions.  */
+
+/* For each function, we list the gcc version and the TI version on
+   each line, where we're converting the function names.  */
+static char const * const special_convention_function_names [] =
+{
+  "__muldi3", "__mspabi_mpyll",
+  "__udivdi3", "__mspabi_divull",
+  "__umoddi3", "__mspabi_remull",
+  "__divdi3", "__mspabi_divlli",
+  "__moddi3", "__mspabi_remlli",
+  "__mspabi_srall",
+  "__mspabi_srlll",
+  "__mspabi_sllll",
+  "__adddf3", "__mspabi_addd",
+  "__subdf3", "__mspabi_subd",
+  "__muldf3", "__mspabi_mpyd",
+  "__divdf3", "__mspabi_divd",
+  "__mspabi_cmpd",
+  NULL
+};
+
+/* TRUE if the function passed is a "speical" function.  Special
+   functions pass two DImode parameters in registers.  */
+static bool
+msp430_special_register_convention_p (const char *name)
+{
+  int i;
+
+  for (i = 0; special_convention_function_names [i]; i++)
+    if (! strcmp (name, special_convention_function_names [i]))
+      return true;
+
+  return false;
+}
+
+#undef  TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P msp430_function_value_regno_p
+
+bool
+msp430_function_value_regno_p (unsigned int regno)
+{
+  return regno == 12;
+}
+
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE msp430_function_value
+
+rtx
+msp430_function_value (const_tree ret_type,
+		       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		       bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (ret_type), 12);
+}
+
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE msp430_libcall_value
+
+rtx
+msp430_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, 12);
+}
+
+/* Implements INIT_CUMULATIVE_ARGS.  */
+void
+msp430_init_cumulative_args (CUMULATIVE_ARGS *ca,
+			     tree fntype ATTRIBUTE_UNUSED,
+			     rtx libname ATTRIBUTE_UNUSED,
+			     tree fndecl ATTRIBUTE_UNUSED,
+			     int n_named_args ATTRIBUTE_UNUSED)
+{
+  const char *fname;
+  memset (ca, 0, sizeof(*ca));
+
+  ca->can_split = 1;
+
+  if (fndecl)
+    fname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+  else if (libname)
+    fname = XSTR (libname, 0);
+  else
+    fname = NULL;
+
+  if (fname && msp430_special_register_convention_p (fname))
+    ca->special_p = 1;
+}
+
+/* Helper function for argument passing; this function is the common
+   code that determines where an argument will be passed.  */
+static void
+msp430_evaluate_arg (cumulative_args_t cap,
+		     enum machine_mode mode,
+		     const_tree type ATTRIBUTE_UNUSED,
+		     bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (cap);
+  int nregs = GET_MODE_SIZE (mode);
+  int i;
+
+  ca->reg_count = 0;
+  ca->mem_count = 0;
+
+  if (!named)
+    return;
+
+  if (mode == PSImode)
+    nregs = 1;
+  else
+    nregs = (nregs + 1) / 2;
+
+  if (ca->special_p)
+    {
+      /* Function is passed two DImode operands, in R8:R11 and
+	 R12:15.  */
+      ca->start_reg = 8;
+      ca->reg_count = 4;
+      return;
+    }
+
+  switch (nregs)
+    {
+    case 1:
+      for (i = 0; i < 4; i++)
+	if (! ca->reg_used [i])
+	  {
+	    ca->reg_count = 1;
+	    ca->start_reg = CA_FIRST_REG + i;
+	    return;
+	  }
+      break;
+    case 2:
+      for (i = 0; i < 3; i++)
+	if (! ca->reg_used [i] && ! ca->reg_used [i + 1])
+	  {
+	    ca->reg_count = 2;
+	    ca->start_reg = CA_FIRST_REG + i;
+	    return;
+	  }
+      if (! ca->reg_used [3] && ca->can_split)
+	{
+	  ca->reg_count = 1;
+	  ca->mem_count = 2;
+	  ca->start_reg = CA_FIRST_REG + 3;
+	  return;
+	}
+      break;
+    case 3:
+    case 4:
+      ca->can_split = 0;
+      if (! ca->reg_used [0]
+	  && ! ca->reg_used [1]
+	  && ! ca->reg_used [2]
+	  && ! ca->reg_used [3])
+	{
+	  ca->reg_count = 4;
+	  ca->start_reg = CA_FIRST_REG;
+	  return;
+	}
+      break;
+    }
+}
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES msp430_promote_prototypes
+
+bool
+msp430_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG msp430_function_arg
+
+rtx
+msp430_function_arg (cumulative_args_t cap,
+		     enum machine_mode mode,
+		     const_tree type,
+		     bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (cap);
+
+  msp430_evaluate_arg (cap, mode, type, named);
+
+  if (ca->reg_count)
+    return gen_rtx_REG (mode, ca->start_reg);
+
+  return 0;
+}
+
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES msp430_arg_partial_bytes
+
+int
+msp430_arg_partial_bytes (cumulative_args_t cap,
+			  enum machine_mode mode,
+			  tree type,
+			  bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (cap);
+
+  msp430_evaluate_arg (cap, mode, type, named);
+
+  if (ca->reg_count && ca->mem_count)
+    return ca->reg_count * UNITS_PER_WORD;
+
+  return 0;
+}
+
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE msp430_pass_by_reference
+
+static bool
+msp430_pass_by_reference (cumulative_args_t cap ATTRIBUTE_UNUSED,
+			  enum machine_mode mode,
+			  const_tree type,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  return (mode == BLKmode
+	  || (type && TREE_CODE (type) == RECORD_TYPE)
+	  || (type && TREE_CODE (type) == UNION_TYPE));
+}
+
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES msp430_callee_copies
+
+static bool
+msp430_callee_copies (cumulative_args_t cap ATTRIBUTE_UNUSED,
+                     enum machine_mode mode ATTRIBUTE_UNUSED,
+                     const_tree type ATTRIBUTE_UNUSED,
+                     bool named ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE msp430_function_arg_advance
+
+void
+msp430_function_arg_advance (cumulative_args_t cap,
+			     enum machine_mode mode,
+			     const_tree type,
+			     bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (cap);
+  int i;
+
+  msp430_evaluate_arg (cap, mode, type, named);
+
+  if (ca->start_reg >= CA_FIRST_REG)
+    for (i = 0; i < ca->reg_count; i ++)
+      ca->reg_used [i + ca->start_reg - CA_FIRST_REG] = 1;
+
+  ca->special_p = 0;
+}
+
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY msp430_function_arg_boundary
+
+static unsigned int
+msp430_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode
+      && int_size_in_bytes (type) > 1)
+    return 16;
+  if (GET_MODE_BITSIZE (mode) > 8)
+    return 16;
+  return 8;
+}
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY msp430_return_in_memory
+
+static bool
+msp430_return_in_memory (const_tree ret_type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (ret_type);
+
+  if (mode == BLKmode
+      || (fntype && TREE_CODE (TREE_TYPE (fntype)) == RECORD_TYPE)
+      || (fntype && TREE_CODE (TREE_TYPE (fntype)) == UNION_TYPE))
+    return true;
+
+  if (GET_MODE_SIZE (mode) > 8)
+    return true;
+
+  return false;
+}
+
+#undef  TARGET_GET_RAW_ARG_MODE
+#define TARGET_GET_RAW_ARG_MODE msp430_get_raw_arg_mode
+
+static enum machine_mode
+msp430_get_raw_arg_mode (int regno)
+{
+  return (regno == ARG_POINTER_REGNUM) ? VOIDmode : Pmode;
+}
+
+#undef  TARGET_GET_RAW_RESULT_MODE
+#define TARGET_GET_RAW_RESULT_MODE msp430_get_raw_result_mode
+
+static enum machine_mode
+msp430_get_raw_result_mode (int regno ATTRIBUTE_UNUSED)
+{
+  return Pmode;
+}
+
+/* Addressing Modes */
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P msp430_legitimate_address_p
+
+static bool
+reg_ok_for_addr (rtx r, bool strict)
+{
+  int rn = REGNO (r);
+
+  if (strict && rn >= FIRST_PSEUDO_REGISTER)
+    rn = reg_renumber [rn];
+  if (strict && 0 <= rn && rn < FIRST_PSEUDO_REGISTER)
+    return true;
+  if (!strict)
+    return true;
+  return false;
+}
+
+bool
+msp430_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x ATTRIBUTE_UNUSED,
+			     bool strict ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (x))
+    {
+    case MEM:
+      return false;
+
+    case PLUS:
+      if (REG_P (XEXP (x, 0)))
+	{
+	  if (GET_MODE (x) != GET_MODE (XEXP (x, 0)))
+	    return false;
+	  if (!reg_ok_for_addr (XEXP (x, 0), strict))
+	    return false;
+	  switch (GET_CODE (XEXP (x, 1)))
+	    {
+	    case CONST:
+	    case SYMBOL_REF:
+	    case CONST_INT:
+	      return true;
+	    default:
+	      return false;
+	    }
+	}
+      return false;
+
+    case REG:
+      if (!reg_ok_for_addr (x, strict))
+	return false;
+      /* else... */
+    case CONST:
+    case SYMBOL_REF:
+    case CONST_INT:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P msp430_legitimate_constant
+
+static bool
+msp430_legitimate_constant (enum machine_mode mode, rtx x)
+{
+  return ! CONST_INT_P (x)
+    || mode != PSImode
+    /* GCC does not know the width of the PSImode, so make
+       sure that it does not try to use a constant value that
+       is out of range.  */
+    || (INTVAL (x) < (1 << 20) && INTVAL (x) >= (-1 << 20));
+}
+
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS msp430_rtx_costs
+
+static bool msp430_rtx_costs (rtx   x ATTRIBUTE_UNUSED,
+			      int   code,
+			      int   outer_code ATTRIBUTE_UNUSED,
+			      int   opno ATTRIBUTE_UNUSED,
+			      int * total,
+			      bool  speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case SIGN_EXTEND:
+      if (GET_MODE (x) == SImode && outer_code == SET)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return true;
+	}
+      break;
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (!msp430x)
+	{
+	  *total = COSTS_N_INSNS (100);
+	  return true;
+	}
+      break;
+    }
+  return false;
+}
+
+/* Function Entry and Exit */
+
+/* The MSP430 call frame looks like this:
+
+   <higher addresses>
+   +--------------------+
+   |                    |
+   | Stack Arguments    |
+   |                    |
+   +--------------------+ <-- "arg pointer"
+   |                    |
+   | PC from call       |  (2 bytes for 430, 4 for TARGET_LARGE)
+   |                    |
+   +--------------------+
+   | SR if this func has|
+   | been called via an |
+   | interrupt.         |
+   +--------------------+  <-- SP before prologue, also AP
+   |                    |
+   | Saved Regs         |  (2 bytes per reg for 430, 4 per for TARGET_LARGE)
+   |                    |
+   +--------------------+  <-- "frame pointer"
+   |                    |
+   | Locals             |
+   |                    |
+   +--------------------+
+   |                    |
+   | Outgoing Args      |
+   |                    |
+   +--------------------+  <-- SP during function
+   <lower addresses>
+
+*/
+
+/* We use this to wrap all emitted insns in the prologue, so they get
+   the "frame-related" (/f) flag set.  */
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* This is the one spot that decides if a register is to be saved and
+   restored in the prologue/epilogue.  */
+static bool
+msp430_preserve_reg_p (int regno)
+{
+  /* PC, SP, SR, and the constant generator.  */
+  if (regno <= 3)
+    return false;
+
+  /* FIXME: add interrupt, EH, etc.  */
+  if (crtl->calls_eh_return)
+    return true;
+
+  /* Shouldn't be more than the above, but just in case...  */
+  if (fixed_regs [regno])
+    return false;
+
+  /* Interrupt handlers save all registers they use, even
+     ones which are call saved.  If they call other functions
+     then *every* register is saved.  */
+  if (msp430_is_interrupt_func ())
+    return ! crtl->is_leaf || df_regs_ever_live_p (regno);
+
+  if (!call_used_regs [regno]
+      && df_regs_ever_live_p (regno))
+    return true;
+
+  return false;
+}
+
+/* Compute all the frame-related fields in our machine_function
+   structure.  */
+static void
+msp430_compute_frame_info (void)
+{
+  int i;
+
+  cfun->machine->computed = 1;
+  cfun->machine->framesize_regs = 0;
+  cfun->machine->framesize_locals = get_frame_size ();
+  cfun->machine->framesize_outgoing = crtl->outgoing_args_size;
+
+  for (i = 0; i < ARG_POINTER_REGNUM; i ++)
+    if (msp430_preserve_reg_p (i))
+      {
+	cfun->machine->need_to_save [i] = 1;
+	cfun->machine->framesize_regs += (TARGET_LARGE ? 4 : 2);
+      }
+    else
+      cfun->machine->need_to_save [i] = 0;
+
+  if ((cfun->machine->framesize_locals + cfun->machine->framesize_outgoing) & 1)
+    cfun->machine->framesize_locals ++;
+
+  cfun->machine->framesize = (cfun->machine->framesize_regs
+			      + cfun->machine->framesize_locals
+			      + cfun->machine->framesize_outgoing);
+}
+
+static inline bool
+is_attr_func (const char * attr)
+{
+  return lookup_attribute (attr, DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE;
+}
+
+/* Returns true if the current function has the "interrupt" attribute.  */
+
+bool
+msp430_is_interrupt_func (void)
+{
+  if (current_function_decl == NULL)
+    return false;
+  return is_attr_func ("interrupt");
+}
+
+static bool
+is_wakeup_func (void)
+{
+  return msp430_is_interrupt_func () && is_attr_func ("wakeup");
+}
+
+static inline bool
+is_naked_func (void)
+{
+  return is_attr_func ("naked");
+}
+
+static inline bool
+is_reentrant_func (void)
+{
+  return is_attr_func ("reentrant");
+}
+
+static inline bool
+is_critical_func (void)
+{
+  return is_attr_func ("critical");
+}
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE	msp430_start_function
+
+static void
+msp430_start_function (FILE *outfile, HOST_WIDE_INT hwi_local ATTRIBUTE_UNUSED)
+{
+  int r, n;
+
+  fprintf (outfile, "; start of function\n");
+
+  if (DECL_ATTRIBUTES (current_function_decl) != NULL_TREE)
+    {
+      fprintf (outfile, "; attributes: ");
+      if (is_naked_func ())
+	fprintf (outfile, "naked ");
+      if (msp430_is_interrupt_func ())
+	fprintf (outfile, "interrupt ");
+      if (is_reentrant_func ())
+	fprintf (outfile, "reentrant ");
+      if (is_critical_func ())
+	fprintf (outfile, "critical ");
+      if (is_wakeup_func ())
+	fprintf (outfile, "wakeup ");
+      fprintf (outfile, "\n");
+    }
+
+  fprintf (outfile, "; framesize_regs:     %d\n", cfun->machine->framesize_regs);
+  fprintf (outfile, "; framesize_locals:   %d\n", cfun->machine->framesize_locals);
+  fprintf (outfile, "; framesize_outgoing: %d\n", cfun->machine->framesize_outgoing);
+  fprintf (outfile, "; framesize:          %d\n", cfun->machine->framesize);
+  fprintf (outfile, "; elim ap -> fp       %d\n", msp430_initial_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM));
+  fprintf (outfile, "; elim fp -> sp       %d\n", msp430_initial_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM));
+
+  n = 0;
+  fprintf (outfile, "; saved regs:");
+  for (r = 0; r < ARG_POINTER_REGNUM; r++)
+    if (cfun->machine->need_to_save [r])
+      {
+	fprintf (outfile, " %s", reg_names [r]);
+	n = 1;
+      }
+  if (n == 0)
+    fprintf (outfile, "(none)");
+  fprintf (outfile, "\n");
+}
+
+/* Common code to change the stack pointer.  */
+static void
+increment_stack (HOST_WIDE_INT amount)
+{
+  rtx inc;
+  rtx sp =  stack_pointer_rtx;
+
+  if (amount == 0)
+    return;
+
+  if (amount < 0)
+    {
+      inc = GEN_INT (- amount);
+      if (TARGET_LARGE)
+	F (emit_insn (gen_subpsi3 (sp, sp, inc)));
+      else
+	F (emit_insn (gen_subhi3 (sp, sp, inc)));
+    }
+  else
+    {
+      inc = GEN_INT (amount);
+      if (TARGET_LARGE)
+	emit_insn (gen_addpsi3 (sp, sp, inc));
+      else
+	emit_insn (gen_addhi3 (sp, sp, inc));
+    }
+}
+
+/* Verify MSP430 specific attributes.  */
+
+static tree
+msp430_attr (tree * node,
+	     tree   name,
+	     tree   args,
+	     int    flags ATTRIBUTE_UNUSED,
+	     bool * no_add_attrs)
+{
+  gcc_assert (DECL_P (* node));
+
+  if (args != NULL)
+    {
+      tree value = TREE_VALUE (args);
+
+      switch (TREE_CODE (value))
+	{
+	case STRING_CST:
+	  if (   strcmp (TREE_STRING_POINTER (value), "reset")
+	      && strcmp (TREE_STRING_POINTER (value), "nmi")
+	      && strcmp (TREE_STRING_POINTER (value), "watchdog"))
+	    /* Allow the attribute to be added - the linker script
+	       being used may still recognise this name.  */
+	    warning (OPT_Wattributes,
+		     "unrecognised interrupt vector argument of %qE attribute",
+		     name);
+	  break;
+
+	case INTEGER_CST:
+	  if (TREE_INT_CST_LOW (value) > 63)
+	    /* Allow the attribute to be added - the linker script
+	       being used may still recognise this value.  */
+	    warning (OPT_Wattributes,
+		     "numeric argument of %qE attribute must be in range 0..63",
+		     name);
+	  break;
+
+	default:
+	  warning (OPT_Wattributes,
+		   "argument of %qE attribute is not a string constant or number",
+		   name);
+	  *no_add_attrs = true;
+	  break;
+	}
+    }
+
+  if (TREE_CODE (* node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes,
+	       "%qE attribute only applies to functions",
+	       name);
+      * no_add_attrs = true;
+    }
+
+  /* FIXME: We ought to check that the interrupt handler
+     attribute has been applied to a void function.  */
+  /* FIXME: We should check that reentrant and critical
+     functions are not naked and that critical functions
+     are not reentrant.  */
+
+  return NULL_TREE;
+}
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		msp430_attribute_table
+
+/* Table of MSP430-specific attributes.  */
+const struct attribute_spec msp430_attribute_table[] =
+{
+  /* Name          min_len  decl_req,    fn_type_req,    affects_type_identity
+                       max_len,  type_req,        handler.  */
+  { "interrupt",      0, 1, true,  false, false, msp430_attr, false },
+  { "naked",          0, 0, true,  false, false, msp430_attr, false },
+  { "reentrant",      0, 0, true,  false, false, msp430_attr, false },
+  { "critical",       0, 0, true,  false, false, msp430_attr, false },
+  { "wakeup",         0, 0, true,  false, false, msp430_attr, false },
+  { NULL,             0, 0, false, false, false, NULL,        false }
+};
+
+void
+msp430_start_function (FILE *file, const char *name, tree decl)
+{
+  tree int_attr;
+
+  int_attr = lookup_attribute ("interrupt", DECL_ATTRIBUTES (decl));
+  if (int_attr != NULL_TREE)
+    {
+      tree intr_vector = TREE_VALUE (int_attr);
+
+      if (intr_vector != NULL_TREE)
+	{
+	  char buf[101];
+
+	  intr_vector = TREE_VALUE (intr_vector);
+
+	  /* The interrupt attribute has a vector value.  Turn this into a
+	     section name, switch to that section and put the address of
+	     the current function into that vector slot.  Note msp430_attr()
+	     has already verified the vector name for us.  */
+	  if (TREE_CODE (intr_vector) == STRING_CST)
+	    sprintf (buf, "__interrupt_vector_%.80s",
+		     TREE_STRING_POINTER (intr_vector));
+	  else /* TREE_CODE (intr_vector) == INTEGER_CST */
+	    sprintf (buf, "__interrupt_vector_%u",
+		     (unsigned int) TREE_INT_CST_LOW (intr_vector));
+
+	  switch_to_section (get_section (buf, SECTION_CODE, decl));
+	  fputs ("\t.word\t", file);
+	  assemble_name (file, name);
+	  fputc ('\n', file);
+	  fputc ('\t', file);
+	}
+    }
+
+  switch_to_section (function_section (decl));
+  ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
+}
+
+static section *
+msp430_function_section (tree decl, enum node_frequency freq, bool startup, bool exit)
+{
+  /* In large mode we must make sure that interrupt handlers are put into
+     low memory as the vector table only accepts 16-bit addresses.  */
+  if (TARGET_LARGE
+      && lookup_attribute ("interrupt", DECL_ATTRIBUTES (decl)))
+    return get_section (".lowtext", SECTION_CODE | SECTION_WRITE , decl);
+
+  /* Otherwise, use the default function section.  */
+  return default_function_section (decl, freq, startup, exit);
+}
+
+#undef  TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION msp430_function_section
+
+enum msp430_builtin
+{
+  MSP430_BUILTIN_BIC_SR,
+  MSP430_BUILTIN_BIS_SR,
+  MSP430_BUILTIN_max
+};
+
+static GTY(()) tree msp430_builtins [(int) MSP430_BUILTIN_max];
+
+static void
+msp430_init_builtins (void)
+{
+  tree void_ftype_int = build_function_type_list (void_type_node, integer_type_node, NULL);
+
+  msp430_builtins[MSP430_BUILTIN_BIC_SR] =
+    add_builtin_function ( "__bic_SR_register_on_exit", void_ftype_int,
+			   MSP430_BUILTIN_BIC_SR, BUILT_IN_MD, NULL, NULL_TREE);
+
+  msp430_builtins[MSP430_BUILTIN_BIS_SR] =
+    add_builtin_function ( "__bis_SR_register_on_exit", void_ftype_int,
+			   MSP430_BUILTIN_BIS_SR, BUILT_IN_MD, NULL, NULL_TREE);
+}
+
+static tree
+msp430_builtin_decl (unsigned code, bool initialize ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case MSP430_BUILTIN_BIC_SR:
+    case MSP430_BUILTIN_BIS_SR:
+      return msp430_builtins[code];
+    default:
+      return error_mark_node;
+    }
+}
+
+static rtx
+msp430_expand_builtin (tree exp,
+		       rtx target ATTRIBUTE_UNUSED,
+		       rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+
+  if (! msp430_is_interrupt_func ())
+    {
+      error ("MSP430 builtin functions only work inside interrupt handlers");
+      return NULL_RTX;
+    }
+
+  if (! REG_P (arg1) && ! CONSTANT_P (arg1))
+    arg1 = force_reg (mode, arg1);
+
+  switch (fcode)
+    {
+    case MSP430_BUILTIN_BIC_SR:  emit_insn (gen_bic_SR (arg1)); break;
+    case MSP430_BUILTIN_BIS_SR:  emit_insn (gen_bis_SR (arg1)); break;
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+  return NULL_RTX;
+}
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  msp430_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN msp430_expand_builtin
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL   msp430_builtin_decl
+
+void
+msp430_expand_prologue (void)
+{
+  int i, j;
+  int fs;
+  /* Always use stack_pointer_rtx instead of calling
+     rtx_gen_REG ourselves.  Code elsewhere in GCC assumes
+     that there is a single rtx representing the stack pointer,
+     namely stack_pointer_rtx, and uses == to recognize it.  */
+  rtx sp = stack_pointer_rtx;
+  rtx p;
+
+  if (is_naked_func ())
+    return;
+
+  emit_insn (gen_prologue_start_marker ());
+
+  if (is_critical_func ())
+    {
+      emit_insn (gen_push_intr_state ());
+      emit_insn (gen_disable_interrupts ());
+    }
+  else if (is_reentrant_func ())
+    emit_insn (gen_disable_interrupts ());
+
+  if (!cfun->machine->computed)
+    msp430_compute_frame_info ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = cfun->machine->framesize;
+
+  if (crtl->args.pretend_args_size)
+    {
+      rtx note;
+
+      gcc_assert (crtl->args.pretend_args_size == 2);
+
+      p = emit_insn (gen_grow_and_swap ());
+
+      /* Document the stack decrement...  */
+      note = F (gen_rtx_SET (Pmode, stack_pointer_rtx,
+			     gen_rtx_MINUS (Pmode, stack_pointer_rtx, GEN_INT (2))));
+      add_reg_note (p, REG_FRAME_RELATED_EXPR, note);
+
+      /* ...and the establishment of a new location for the return address.  */
+      note = F (gen_rtx_SET (Pmode, gen_rtx_MEM (Pmode,
+						 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-2))),
+			     pc_rtx));
+      add_reg_note (p, REG_CFA_OFFSET, note);
+      F (p);
+    }
+
+  for (i = 15; i >= 4; i--)
+    if (cfun->machine->need_to_save [i])
+      {
+	int seq, count;
+	rtx note;
+
+	for (seq = i - 1; seq >= 4 && cfun->machine->need_to_save[seq]; seq --)
+	  ;
+	count = i - seq;
+
+	if (msp430x)
+	  {
+	    /* Note: with TARGET_LARGE we still use PUSHM as PUSHX.A is two bytes bigger.  */
+	    p = F (emit_insn (gen_pushm (gen_rtx_REG (Pmode, i),
+					 GEN_INT (count))));
+
+	    note = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+
+	    XVECEXP (note, 0, 0)
+	      = F (gen_rtx_SET (VOIDmode,
+			     stack_pointer_rtx,
+			     gen_rtx_PLUS (Pmode,
+					   stack_pointer_rtx,
+					   GEN_INT (count * (TARGET_LARGE ? -4 : -2)))));
+
+	    /* *sp-- = R[i-j] */
+	    /* sp+N	R10
+	       ...
+	       sp	R4  */
+	    for (j = 0; j < count; j ++)
+	      {
+		rtx addr;
+		int ofs = (count - j - 1) * (TARGET_LARGE ? 4 : 2);
+
+		if (ofs)
+		  addr = gen_rtx_PLUS (Pmode, sp, GEN_INT (ofs));
+		else
+		  addr = stack_pointer_rtx;
+
+		XVECEXP (note, 0, j + 1) =
+		  F (gen_rtx_SET (VOIDmode,
+				  gen_rtx_MEM (Pmode, addr),
+				  gen_rtx_REG (Pmode, i - j)) );
+	      }
+
+	    add_reg_note (p, REG_FRAME_RELATED_EXPR, note);
+	    i -= count - 1;
+	  }
+	else
+	  F (emit_insn (gen_push (gen_rtx_REG (Pmode, i))));
+      }
+
+  if (frame_pointer_needed)
+    F (emit_move_insn (gen_rtx_REG (Pmode, FRAME_POINTER_REGNUM), sp));
+
+  fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing;
+
+  increment_stack (- fs);
+
+  emit_insn (gen_prologue_end_marker ());
+}
+
+void
+msp430_expand_epilogue (int is_eh)
+{
+  int i;
+  int fs;
+  int helper_n = 0;
+
+  if (is_naked_func ())
+    return;
+
+  if (cfun->machine->need_to_save [10])
+    {
+      /* Check for a helper function.  */
+      helper_n = 7; /* For when the loop below never sees a match.  */
+      for (i = 9; i >= 4; i--)
+	if (!cfun->machine->need_to_save [i])
+	  {
+	    helper_n = 10 - i;
+	    for (; i >= 4; i--)
+	      if (cfun->machine->need_to_save [i])
+		{
+		  helper_n = 0;
+		  break;
+		}
+	    break;
+	  }
+    }
+
+  emit_insn (gen_epilogue_start_marker ());
+
+  if (cfun->decl && strcmp (IDENTIFIER_POINTER (DECL_NAME (cfun->decl)), "main") == 0)
+    emit_insn (gen_msp430_refsym_need_exit ());
+
+  if (is_wakeup_func ())
+    /* Clear the SCG1, SCG0, OSCOFF and CPUOFF bits in the saved copy of the
+       status register current residing on the stack.  When this function
+       executes its RETI instruction the SR will be updated with this saved
+       value, thus ensuring that the processor is woken up from any low power
+       state in which it may be residing.  */
+    emit_insn (gen_bic_SR (GEN_INT (0xf0)));
+
+  fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing;
+
+  increment_stack (fs);
+
+  if (is_eh)
+    {
+      /* We need to add the right "SP" register save just after the
+	 regular ones, so that when we pop it off we're in the EH
+	 return frame, not this one.  This overwrites our own return
+	 address, but we're not going to be returning anyway.  */
+      rtx r12 = gen_rtx_REG (Pmode, 12);
+      rtx (*addPmode)(rtx, rtx, rtx) = TARGET_LARGE ? gen_addpsi3 : gen_addhi3;
+
+      /* R12 will hold the new SP.  */
+      i = cfun->machine->framesize_regs;
+      emit_move_insn (r12, stack_pointer_rtx);
+      emit_insn (addPmode (r12, r12, EH_RETURN_STACKADJ_RTX));
+      emit_insn (addPmode (r12, r12, GEN_INT (i)));
+      emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx, i)), r12);
+    }
+
+  for (i = 4; i <= 15; i++)
+    if (cfun->machine->need_to_save [i])
+      {
+	int seq, count;
+
+	for (seq = i + 1; seq <= 15 && cfun->machine->need_to_save[seq]; seq ++)
+	  ;
+	count = seq - i;
+
+	if (msp430x)
+	  {
+	    /* Note: With TARGET_LARGE we still use
+	       POPM as POPX.A is two bytes bigger.  */
+	    emit_insn (gen_popm (stack_pointer_rtx, GEN_INT (seq - 1),
+				 GEN_INT (count)));
+	    i += count - 1;
+	  }
+	else if (i == 11 - helper_n
+		 && ! msp430_is_interrupt_func ()
+		 && ! is_reentrant_func ()
+		 && ! is_critical_func ()
+		 && crtl->args.pretend_args_size == 0
+		 /* Calling the helper takes as many bytes as the POP;RET sequence.  */
+		 && helper_n > 1
+		 && !is_eh)
+	  {
+	    emit_insn (gen_epilogue_helper (GEN_INT (helper_n)));
+	    return;
+	  }
+	else
+	  emit_insn (gen_pop (gen_rtx_REG (Pmode, i)));
+      }
+
+  if (is_eh)
+    {
+      /* Also pop SP, which puts us into the EH return frame.  Except
+	 that you can't "pop" sp, you have to just load it off the
+	 stack.  */
+      emit_move_insn (stack_pointer_rtx, gen_rtx_MEM (Pmode, stack_pointer_rtx));
+    }
+
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_swap_and_shrink ());
+
+  if (is_critical_func ())
+    emit_insn (gen_pop_intr_state ());
+  else if (is_reentrant_func ())
+    emit_insn (gen_enable_interrupts ());
+
+  emit_jump_insn (gen_msp_return ());
+}
+
+/* Implements EH_RETURN_STACKADJ_RTX.  Saved and used later in
+   m32c_emit_eh_epilogue.  */
+rtx
+msp430_eh_return_stackadj_rtx (void)
+{
+  if (!cfun->machine->eh_stack_adjust)
+    {
+      rtx sa;
+
+      sa = gen_rtx_REG (Pmode, 15);
+      cfun->machine->eh_stack_adjust = sa;
+    }
+  return cfun->machine->eh_stack_adjust;
+}
+
+/* This function is called before reload, to "fix" the stack in
+   preparation for an EH return.  */
+void
+msp430_expand_eh_return (rtx eh_handler)
+{
+  /* These are all Pmode */
+  rtx ap, sa, ra, tmp;
+
+  ap = arg_pointer_rtx;
+  sa = msp430_eh_return_stackadj_rtx ();
+  ra = eh_handler;
+
+  tmp = ap;
+  tmp = gen_rtx_PLUS (Pmode, ap, sa);
+  tmp = plus_constant (Pmode, tmp, TARGET_LARGE ? -4 : -2);
+  tmp = gen_rtx_MEM (Pmode, tmp);
+  emit_move_insn (tmp, ra);
+}
+
+/* This is a list of MD patterns that implement fixed-count shifts.  */
+static struct
+{
+  const char *name;
+  int count;
+  int need_430x;
+  rtx (*genfunc)(rtx,rtx);
+}
+  const_shift_helpers[] =
+{
+#define CSH(N,C,X,G) { "__mspabi_"N, C, X, gen_##G }
+
+  CSH ("slli", 1, 1, slli_1),
+  CSH ("slll", 1, 1, slll_1),
+  CSH ("slll", 2, 1, slll_2),
+
+  CSH ("srai", 1, 0, srai_1),
+  CSH ("sral", 1, 0, sral_1),
+  CSH ("sral", 2, 0, sral_2),
+
+  CSH ("srll", 1, 0, srll_1),
+  CSH ("srll", 2, 1, srll_2x),
+  { 0, 0, 0, 0 }
+#undef CSH
+};
+
+/* The MSP430 ABI defines a number of helper functions that should be
+   used for, for example, 32-bit shifts.  This function is called to
+   emit such a function, using the table above to optimize some
+   cases.  */
+void
+msp430_expand_helper (rtx *operands, const char *helper_name, bool const_variants)
+{
+  rtx c, f;
+  char *helper_const = NULL;
+  int arg2 = 13;
+  int arg1sz = 1;
+  enum machine_mode arg0mode = GET_MODE (operands[0]);
+  enum machine_mode arg1mode = GET_MODE (operands[1]);
+  enum machine_mode arg2mode = GET_MODE (operands[2]);
+  int have_430x = msp430x ? 1 : 0;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      int i;
+
+      for (i=0; const_shift_helpers[i].name; i++)
+	{
+	  if (const_shift_helpers[i].need_430x <= have_430x
+	      && strcmp (helper_name, const_shift_helpers[i].name) == 0
+	      && INTVAL (operands[2]) == const_shift_helpers[i].count)
+	    {
+	      emit_insn (const_shift_helpers[i].genfunc (operands[0], operands[1]));
+	      return;
+	    }
+	}
+    }
+
+  if (arg1mode == VOIDmode)
+    arg1mode = arg0mode;
+  if (arg2mode == VOIDmode)
+    arg2mode = arg0mode;
+
+  if (arg1mode == SImode)
+    {
+      arg2 = 14;
+      arg1sz = 2;
+    }
+
+  if (const_variants
+      && CONST_INT_P (operands[2])
+      && INTVAL (operands[2]) >= 1
+      && INTVAL (operands[2]) <= 15)
+    {
+      /* Note that the INTVAL is limited in value and length by the conditional above.  */
+      int len = strlen (helper_name) + 4;
+      helper_const = (char *) xmalloc (len);
+      snprintf (helper_const, len, "%s_%d", helper_name, (int) INTVAL (operands[2]));
+    }
+
+  emit_move_insn (gen_rtx_REG (arg1mode, 12),
+		  operands[1]);
+  if (!helper_const)
+    emit_move_insn (gen_rtx_REG (arg2mode, arg2),
+		    operands[2]);
+
+  c = gen_call_value_internal (gen_rtx_REG (arg0mode, 12),
+			       gen_rtx_SYMBOL_REF (VOIDmode, helper_const ? helper_const : helper_name),
+			       GEN_INT (0));
+  c = emit_call_insn (c);
+  RTL_CONST_CALL_P (c) = 1;
+
+  f = 0;
+  use_regs (&f, 12, arg1sz);
+  if (!helper_const)
+    use_regs (&f, arg2, 1);
+  add_function_usage_to (c, f);
+
+  emit_move_insn (operands[0],
+		  gen_rtx_REG (arg0mode, 12));
+}
+
+/* Called by cbranch<mode>4 to coerce operands into usable forms.  */
+void
+msp430_fixup_compare_operands (enum machine_mode my_mode, rtx * operands)
+{
+  /* constants we're looking for, not constants which are allowed.  */
+  int const_op_idx = 1;
+
+  if (msp430_reversible_cmp_operator (operands[0], VOIDmode))
+    const_op_idx = 2;
+
+  if (GET_CODE (operands[const_op_idx]) != REG
+      && GET_CODE (operands[const_op_idx]) != MEM)
+    operands[const_op_idx] = copy_to_mode_reg (my_mode, operands[const_op_idx]);
+}
+
+/* Simplify_gen_subreg() doesn't handle memory references the way we
+   need it to below, so we use this function for when we must get a
+   valid subreg in a "natural" state.  */
+rtx
+msp430_subreg (enum machine_mode mode, rtx r, enum machine_mode omode, int byte)
+{
+  rtx rv;
+
+  if (GET_CODE (r) == SUBREG
+      && SUBREG_BYTE (r) == 0)
+    {
+      rtx ireg = SUBREG_REG (r);
+      enum machine_mode imode = GET_MODE (ireg);
+
+      /* special case for (HI (SI (PSI ...), 0)) */
+      if (imode == PSImode
+	  && mode == HImode
+	  && byte == 0)
+	rv = gen_rtx_SUBREG (mode, ireg, byte);
+      else
+	rv = simplify_gen_subreg (mode, ireg, imode, byte);
+    }
+  else if (GET_CODE (r) == MEM)
+    rv = adjust_address (r, mode, byte);
+  else
+    rv = simplify_gen_subreg (mode, r, omode, byte);
+
+  if (!rv)
+    gcc_unreachable ();
+
+  return rv;
+}
+
+/* Called by movsi_x to generate the HImode operands.  */
+void
+msp430_split_movsi (rtx *operands)
+{
+  rtx op00, op02, op10, op12;
+
+  op00 = msp430_subreg (HImode, operands[0], SImode, 0);
+  op02 = msp430_subreg (HImode, operands[0], SImode, 2);
+
+  if (GET_CODE (operands[1]) == CONST
+      || GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      op10 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (0));
+      op10 = gen_rtx_CONST (HImode, op10);
+      op12 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (16));
+      op12 = gen_rtx_CONST (HImode, op12);
+    }
+  else
+    {
+      op10 = msp430_subreg (HImode, operands[1], SImode, 0);
+      op12 = msp430_subreg (HImode, operands[1], SImode, 2);
+    }
+
+  if (rtx_equal_p (operands[0], operands[1]))
+    {
+      operands[2] = op02;
+      operands[4] = op12;
+      operands[3] = op00;
+      operands[5] = op10;
+    }
+  else if (rtx_equal_p (op00, op12)
+	   /* Catch the case where we are loading (rN, rN+1) from mem (rN).  */
+	   || (REG_P (op00) && reg_mentioned_p (op00, op10))
+	   /* Or storing (rN) into mem (rN).  */
+	   || (REG_P (op10) && reg_mentioned_p (op10, op00))
+	   )
+    {
+      operands[2] = op02;
+      operands[4] = op12;
+      operands[3] = op00;
+      operands[5] = op10;
+    }
+  else
+    {
+      operands[2] = op00;
+      operands[4] = op10;
+      operands[3] = op02;
+      operands[5] = op12;
+    }
+}
+
+
+/* The MSPABI specifies the names of various helper functions, many of
+   which are compatible with GCC's helpers.  This table maps the GCC
+   name to the MSPABI name.  */
+static const struct
+{
+  char const * const gcc_name;
+  char const * const ti_name;
+}
+  helper_function_name_mappings [] =
+{
+  /* Floating point to/from integer conversions.  */
+  { "__truncdfsf2", "__mspabi_cvtdf" },
+  { "__extendsfdf2", "__mspabi_cvtfd" },
+  { "__fixdfhi", "__mspabi_fixdi" },
+  { "__fixdfsi", "__mspabi_fixdli" },
+  { "__fixdfdi", "__mspabi_fixdlli" },
+  { "__fixunsdfhi", "__mspabi_fixdu" },
+  { "__fixunsdfsi", "__mspabi_fixdul" },
+  { "__fixunsdfdi", "__mspabi_fixdull" },
+  { "__fixsfhi", "__mspabi_fixfi" },
+  { "__fixsfsi", "__mspabi_fixfli" },
+  { "__fixsfdi", "__mspabi_fixflli" },
+  { "__fixunsfhi", "__mspabi_fixfu" },
+  { "__fixunsfsi", "__mspabi_fixful" },
+  { "__fixunsfdi", "__mspabi_fixfull" },
+  { "__floathisf", "__mspabi_fltif" },
+  { "__floatsisf", "__mspabi_fltlif" },
+  { "__floatdisf", "__mspabi_fltllif" },
+  { "__floathidf", "__mspabi_fltid" },
+  { "__floatsidf", "__mspabi_fltlid" },
+  { "__floatdidf", "__mspabi_fltllid" },
+  { "__floatunhisf", "__mspabi_fltuf" },
+  { "__floatunsisf", "__mspabi_fltulf" },
+  { "__floatundisf", "__mspabi_fltullf" },
+  { "__floatunhidf", "__mspabi_fltud" },
+  { "__floatunsidf", "__mspabi_fltuld" },
+  { "__floatundidf", "__mspabi_fltulld" },
+
+  /* Floating point comparisons.  */
+  /* GCC uses individual functions for each comparison, TI uses one
+     compare <=> function.  */
+
+  /* Floating point arithmatic */
+  { "__adddf3", "__mspabi_addd" },
+  { "__addsf3", "__mspabi_addf" },
+  { "__divdf3", "__mspabi_divd" },
+  { "__divsf3", "__mspabi_divf" },
+  { "__muldf3", "__mspabi_mpyd" },
+  { "__mulsf3", "__mspabi_mpyf" },
+  { "__subdf3", "__mspabi_subd" },
+  { "__subsf3", "__mspabi_subf" },
+  /* GCC does not use helper functions for negation */
+
+  /* Integer multiply, divide, remainder.  */
+  { "__mulhi3", "__mspabi_mpyi" },
+  { "__mulsi3", "__mspabi_mpyl" },
+  { "__muldi3", "__mspabi_mpyll" },
+#if 0
+  /* Clarify signed vs unsigned first.  */
+  { "__mulhisi3", "__mspabi_mpysl" }, /* gcc doesn't use widening multiply (yet?) */
+  { "__mulsidi3", "__mspabi_mpysll" }, /* gcc doesn't use widening multiply (yet?) */
+#endif
+
+  { "__divhi3", "__mspabi_divi" },
+  { "__divsi3", "__mspabi_divli" },
+  { "__divdi3", "__mspabi_divlli" },
+  { "__udivhi3", "__mspabi_divu" },
+  { "__udivsi3", "__mspabi_divlu" },
+  { "__udivdi3", "__mspabi_divllu" },
+  { "__modhi3", "__mspabi_remi" },
+  { "__modsi3", "__mspabi_remli" },
+  { "__moddi3", "__mspabi_remlli" },
+  { "__umodhi3", "__mspabi_remu" },
+  { "__umodsi3", "__mspabi_remul" },
+  { "__umoddi3", "__mspabi_remull" },
+
+  /* Bitwise operations.  */
+  /* Rotation - no rotation support yet.  */
+  /* Logical left shift - gcc already does these itself.  */
+  /* Arithmetic left shift - gcc already does these itself.  */
+  /* Arithmetic right shift - gcc already does these itself.  */
+
+  { NULL, NULL }
+};
+
+/* Returns true if the current MCU is an F5xxx series.  */
+bool
+msp430_is_f5_mcu (void)
+{
+  if (target_mcu == NULL)
+    return false;
+  return strncasecmp (target_mcu, "msp430f5", 8) == 0;
+}
+
+/* Returns true id the current MCU has a second generation 32-bit hardware multiplier.  */
+static bool
+has_32bit_hw_mult (void)
+{
+  static const char * known_32bit_mult_mcus [] =
+    {
+      "msp430f4783",      "msp430f4793",      "msp430f4784",
+      "msp430f4794",      "msp430f47126",     "msp430f47127",
+      "msp430f47163",     "msp430f47173",     "msp430f47183",
+      "msp430f47193",     "msp430f47166",     "msp430f47176",
+      "msp430f47186",     "msp430f47196",     "msp430f47167",
+      "msp430f47177",     "msp430f47187",     "msp430f47197"
+    };
+  int i;
+  if (target_mcu == NULL)
+    return false;
+
+  for (i = ARRAY_SIZE (known_32bit_mult_mcus); i--;)
+    if (strcasecmp (target_mcu, known_32bit_mult_mcus[i]) == 0)
+      return true;
+
+  return false;
+}
+
+/* Returns true if hardware multiply is supported by the chosen MCU.  */
+bool
+msp430_hwmult_enabled (void)
+{
+  if (target_mcu == NULL)
+    return false;
+
+  if (!ENABLE_HWMULT)
+    return false;
+
+  if (msp430_is_interrupt_func ())
+    return false;
+
+  if (msp430_is_f5_mcu () || has_32bit_hw_mult ())
+    return true;
+
+  return false;
+}
+
+/* This function does the same as the default, but it will replace GCC
+   function names with the MSPABI-specified ones.  */
+void
+msp430_output_labelref (FILE *file, const char *name)
+{
+  int i;
+
+  for (i = 0; helper_function_name_mappings [i].gcc_name; i++)
+    if (strcmp (helper_function_name_mappings [i].gcc_name, name) == 0)
+      {
+	name = helper_function_name_mappings [i].ti_name;
+	break;
+      }
+
+  /* If we have been given a specific MCU name then we may be
+     able to make use of its hardware multiply capabilities.  */
+  if (msp430_hwmult_enabled ())
+    {
+      if (strcmp ("__mspabi_mpyi", name) == 0)
+	{
+	  if (msp430_is_f5_mcu ())
+	    name = "__mulhi2_f5";
+	  else
+	    name = "__mulhi2";
+	}
+      else if (strcmp ("__mspabi_mpyl", name) == 0)
+	{
+	  if (msp430_is_f5_mcu ())
+	    name = "__mulsi2_f5";
+	  else if (has_32bit_hw_mult ())
+	    name = "__mulsi2_hw32";
+	  else
+	    name = "__mulsi2";
+	}
+    }
+
+  fputs (name, file);
+}
+
+/* Common code for msp430_print_operand...  */
+
+static void
+msp430_print_operand_raw (FILE * file, rtx op)
+{
+  HOST_WIDE_INT i;
+
+  switch (GET_CODE (op))
+    {
+    case REG:
+      fprintf (file, "%s", reg_names [REGNO (op)]);
+      break;
+
+    case CONST_INT:
+      i = INTVAL (op);
+      if (TARGET_ASM_HEX)
+	fprintf (file, "%#" HOST_WIDE_INT_PRINT "x", i);
+      else
+	fprintf (file, "%" HOST_WIDE_INT_PRINT "d", i);
+      break;
+
+    case CONST:
+    case PLUS:
+    case MINUS:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      output_addr_const (file, op);
+      break;
+
+    default:
+      print_rtl (file, op);
+      break;
+    }
+}
+
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS	msp430_print_operand_addr
+
+/* Output to stdio stream FILE the assembler syntax for an
+   instruction operand that is a memory reference whose address
+   is ADDR.  */
+
+static void
+msp430_print_operand_addr (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case PLUS:
+      msp430_print_operand_raw (file, XEXP (addr, 1));
+      gcc_assert (REG_P (XEXP (addr, 0)));
+      fprintf (file, "(%s)", reg_names [REGNO (XEXP (addr, 0))]);
+      return;
+
+    case REG:
+      fprintf (file, "@");
+      break;
+
+    case CONST:
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      fprintf (file, "&");
+      break;
+
+    default:
+      break;
+    }
+
+  msp430_print_operand_raw (file, addr);
+}
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND		msp430_print_operand
+
+/* A   low 16-bits of int/lower of register pair
+   B   high 16-bits of int/higher of register pair
+   C   bits 32-47 of a 64-bit value/reg 3 of a DImode value
+   D   bits 48-63 of a 64-bit value/reg 4 of a DImode value
+   H   like %B (for backwards compatibility)
+   I   inverse of value
+   J   an integer without a # prefix
+   L   like %A (for backwards compatibility)
+   O   offset of the top of the stack
+   Q   like X but generates an A postfix
+   R   inverse of condition code, unsigned.
+   X   X instruction postfix in large mode
+   Y   value - 4
+   Z   value - 1
+   b   .B or .W or .A, depending upon the mode
+   p   bit position
+   r   inverse of condition code
+   x   like X but only for pointers.  */
+
+static void
+msp430_print_operand (FILE * file, rtx op, int letter)
+{
+  rtx addr;
+
+  /* We can't use c, n, a, or l.  */
+  switch (letter)
+    {
+    case 'Z':
+      gcc_assert (CONST_INT_P (op));
+      /* Print the constant value, less one.  */
+      fprintf (file, "#%ld", INTVAL (op) - 1);
+      return;
+    case 'Y':
+      gcc_assert (CONST_INT_P (op));
+      /* Print the constant value, less four.  */
+      fprintf (file, "#%ld", INTVAL (op) - 4);
+      return;
+    case 'I':
+      if (GET_CODE (op) == CONST_INT)
+	{
+	  /* Inverse of constants */
+	  int i = INTVAL (op);
+	  fprintf (file, "%d", ~i);
+	  return;
+	}
+      op = XEXP (op, 0);
+      break;
+    case 'r': /* Conditional jump where the condition is reversed.  */
+      switch (GET_CODE (op))
+	{
+	case EQ: fprintf (file, "NE"); break;
+	case NE: fprintf (file, "EQ"); break;
+	case GEU: fprintf (file, "LO"); break;
+	case LTU: fprintf (file, "HS"); break;
+	case GE: fprintf (file, "L"); break;
+	case LT: fprintf (file, "GE"); break;
+	  /* Assume these have reversed operands.  */
+	case GTU: fprintf (file, "HS"); break;
+	case LEU: fprintf (file, "LO"); break;
+	case GT: fprintf (file, "GE"); break;
+	case LE: fprintf (file, "L"); break;
+	default:
+	  msp430_print_operand_raw (file, op);
+	  break;
+	}
+      return;
+    case 'R': /* Conditional jump where the operands are reversed.  */
+      switch (GET_CODE (op))
+	{
+	case GTU: fprintf (file, "LO"); break;
+	case LEU: fprintf (file, "HS"); break;
+	case GT: fprintf (file, "L"); break;
+	case LE: fprintf (file, "GE"); break;
+	default:
+	  msp430_print_operand_raw (file, op);
+	  break;
+	}
+      return;
+    case 'p': /* Bit position. 0 == 0x01, 3 = 0x08 etc.  */
+      gcc_assert (CONST_INT_P (op));
+      fprintf (file, "#%d", 1 << INTVAL (op));
+      return;
+    case 'b':
+      switch (GET_MODE (op))
+	{
+	case QImode: fprintf (file, ".B"); return;
+	case HImode: fprintf (file, ".W"); return;
+	case PSImode: fprintf (file, ".A"); return;
+	case SImode: fprintf (file, ".A"); return;
+	default:
+	  return;
+	}
+    case 'A':
+    case 'L': /* Low half.  */
+      switch (GET_CODE (op))
+	{
+	case MEM:
+	  op = adjust_address (op, Pmode, 0);
+	  break;
+	case REG:
+	  break;
+	case CONST_INT:
+	  op = GEN_INT (INTVAL (op) & 0xffff);
+	  letter = 0;
+	  break;
+	default:
+	  /* If you get here, figure out a test case :-) */
+	  gcc_unreachable ();
+	}
+      break;
+    case 'B':
+    case 'H': /* high half */
+      switch (GET_CODE (op))
+	{
+	case MEM:
+	  op = adjust_address (op, Pmode, 2);
+	  break;
+	case REG:
+	  op = gen_rtx_REG (Pmode, REGNO (op) + 1);
+	  break;
+	case CONST_INT:
+	  op = GEN_INT (INTVAL (op) >> 16);
+	  letter = 0;
+	  break;
+	default:
+	  /* If you get here, figure out a test case :-) */
+	  gcc_unreachable ();
+	}
+      break;
+    case 'C':
+      switch (GET_CODE (op))
+	{
+	case MEM:
+	  op = adjust_address (op, Pmode, 3);
+	  break;
+	case REG:
+	  op = gen_rtx_REG (Pmode, REGNO (op) + 2);
+	  break;
+	case CONST_INT:
+	  op = GEN_INT ((long long) INTVAL (op) >> 32);
+	  letter = 0;
+	  break;
+	default:
+	  /* If you get here, figure out a test case :-) */
+	  gcc_unreachable ();
+	}
+      break;
+    case 'D':
+      switch (GET_CODE (op))
+	{
+	case MEM:
+	  op = adjust_address (op, Pmode, 4);
+	  break;
+	case REG:
+	  op = gen_rtx_REG (Pmode, REGNO (op) + 3);
+	  break;
+	case CONST_INT:
+	  op = GEN_INT ((long long) INTVAL (op) >> 48);
+	  letter = 0;
+	  break;
+	default:
+	  /* If you get here, figure out a test case :-) */
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'X':
+      /* This is used to turn, for example, an ADD opcode into an ADDX
+	 opcode when we're using 20-bit addresses.  */
+      if (TARGET_LARGE)
+	fprintf (file, "X");
+      /* We don't care which operand we use, but we want 'X' in the MD
+	 file, so we do it this way.  */
+      return;
+
+    case 'x':
+      /* Similarly, but only for PSImodes.  BIC, for example, needs this.  */
+      if (TARGET_LARGE && GET_MODE (op) == PSImode)
+	fprintf (file, "X");
+      return;
+
+    case 'Q':
+      /* Likewise, for BR -> BRA.  */
+      if (TARGET_LARGE)
+	fprintf (file, "A");
+      return;
+
+    case 'O':
+      /* Computes the offset to the top of the stack for the current frame.
+	 This has to be done here rather than in, say, msp430_expand_builtin()
+	 because builtins are expanded before the frame layout is determined.  */
+      fprintf (file, "%d",
+	       msp430_initial_elimination_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM)
+	        - 2);
+      return;
+
+    case 'J':
+      gcc_assert (GET_CODE (op) == CONST_INT);
+    case 0:
+      break;
+    default:
+      output_operand_lossage ("invalid operand prefix");
+      return;
+    }
+
+  switch (GET_CODE (op))
+    {
+    case REG:
+      msp430_print_operand_raw (file, op);
+      break;
+
+    case MEM:
+      addr = XEXP (op, 0);
+      msp430_print_operand_addr (file, addr);
+      break;
+
+    case CONST_INT:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (letter == 0)
+	fprintf (file, "#");
+      msp430_print_operand_raw (file, op);
+      break;
+
+    case EQ: fprintf (file, "EQ"); break;
+    case NE: fprintf (file, "NE"); break;
+    case GEU: fprintf (file, "HS"); break;
+    case LTU: fprintf (file, "LO"); break;
+    case GE: fprintf (file, "GE"); break;
+    case LT: fprintf (file, "L"); break;
+
+    default:
+      print_rtl (file, op);
+      break;
+    }
+}
+
+
+/* Frame stuff.  */
+
+rtx
+msp430_return_addr_rtx (int count)
+{
+  int ra_size;
+  if (count)
+    return NULL_RTX;
+
+  ra_size = TARGET_LARGE ? 4 : 2;
+  if (crtl->args.pretend_args_size)
+    ra_size += 2;
+
+  return gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, arg_pointer_rtx, GEN_INT (- ra_size)));
+}
+
+rtx
+msp430_incoming_return_addr_rtx (void)
+{
+  return gen_rtx_MEM (Pmode, stack_pointer_rtx);
+}
+
+/* Instruction generation stuff.  */
+
+/* Generate a sequence of instructions to sign-extend an HI
+   value into an SI value.  Handles the tricky case where
+   we are overwriting the destination.  */
+
+const char *
+msp430x_extendhisi (rtx * operands)
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    /* Low word of dest == source word.  */
+    return "BIT.W\t#0x8000, %L0 { SUBC.W\t%H0, %H0 { INV.W\t%H0, %H0"; /* 8-bytes.  */
+
+  if (! msp430x)
+    /* Note: This sequence is approximately the same length as invoking a helper
+       function to perform the sign-extension, as in:
+
+         MOV.W  %1, %L0
+	 MOV.W  %1, r12
+	 CALL   __mspabi_srai_15
+	 MOV.W  r12, %H0
+
+       but this version does not involve any function calls or using argument
+       registers, so it reduces register pressure.  */
+    return "MOV.W\t%1, %L0 { BIT.W\t#0x8000, %L0 { SUBC.W\t%H0, %H0 { INV.W\t%H0, %H0"; /* 10-bytes.  */
+
+  if (REGNO (operands[0]) + 1 == REGNO (operands[1]))
+    /* High word of dest == source word.  */
+    return "MOV.W\t%1, %L0 { RPT\t#15 { RRAX.W\t%H0"; /* 6-bytes.  */
+
+  /* No overlap between dest and source.  */
+  return "MOV.W\t%1, %L0 { MOV.W\t%1, %H0 { RPT\t#15 { RRAX.W\t%H0"; /* 8-bytes.  */
+}
+
+/* Likewise for logical right shifts.  */
+const char *
+msp430x_logical_shift_right (rtx amount)
+{
+  /* The MSP430X's logical right shift instruction - RRUM - does
+     not use an extension word, so we cannot encode a repeat count.
+     Try various alternatives to work around this.  If the count
+     is in a register we are stuck, hence the assert.  */
+  gcc_assert (CONST_INT_P (amount));
+
+  if (INTVAL (amount) <= 0
+      || INTVAL (amount) >= 16)
+    return "# nop logical shift.";
+
+  if (INTVAL (amount) > 0
+      && INTVAL (amount) < 5)
+    return "rrum.w\t%2, %0"; /* Two bytes.  */
+
+  if (INTVAL (amount) > 4
+      && INTVAL (amount) < 9)
+    return "rrum.w\t#4, %0 { rrum.w\t%Y2, %0 "; /* Four bytes.  */
+
+  /* First we logically shift right by one.  Now we know
+     that the top bit is zero and we can use the arithmetic
+     right shift instruction to perform the rest of the shift.  */
+  return "rrum.w\t#1, %0 { rpt\t%Z2 { rrax.w\t%0"; /* Six bytes.  */
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-msp430.h"
diff --git a/gcc-4.9/gcc/config/msp430/msp430.h b/gcc-4.9/gcc/config/msp430/msp430.h
new file mode 100644
index 000000000..65d6ad66d
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430.h
@@ -0,0 +1,411 @@
+/* GCC backend definitions for the TI MSP430 Processor
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Run-time Target Specification */
+
+/* True if the MSP430x extensions are enabled.  */
+#ifndef IN_LIBGCC2
+extern bool msp430x;
+#endif
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("NO_TRAMPOLINES");        \
+      builtin_define ("__MSP430__"); 		\
+      builtin_define (msp430_mcu_name ());	\
+      if (msp430x)				\
+	{					\
+	  builtin_define ("__MSP430X__");	\
+	  builtin_assert ("cpu=MSP430X");	\
+	  if (TARGET_LARGE)			\
+	    builtin_define ("__MSP430X_LARGE__");	\
+	}					\
+      else					\
+	builtin_assert ("cpu=MSP430"); 		\
+    }                                           \
+  while (0)
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:%{minrt:crt0-minrt.o%s}%{!minrt:crt0.o%s}} %{!minrt:crtbegin.o%s}"
+
+/* -lgcc is included because crtend.o needs __mspabi_func_epilog_1.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!minrt:crtend.o%s} %{minrt:crtn-minrt.o%s}%{!minrt:crtn.o%s} -lgcc"
+
+#define ASM_SPEC "-mP " /* Enable polymorphic instructions.  */ \
+  "%{mcpu=*:-mcpu=%*}%{!mcpu=*:%{mmcu=*:-mmcu=%*}} " /* Pass the CPU type on to the assembler.  */ \
+  "%{mrelax=-mQ} " /* Pass the relax option on to the assembler.  */ \
+  "%{mlarge:-ml} " /* Tell the assembler if we are building for the LARGE pointer model.  */ \
+  "%{!msim:-md} %{msim:%{mlarge:-md}}" /* Copy data from ROM to RAM if necessary.  */ \
+  "%{ffunction-sections:-gdwarf-sections}" /* If function sections are being created then create DWARF line number sections as well.  */
+
+/* Enable linker section garbage collection by default, unless we
+   are creating a relocatable binary (gc does not work) or debugging
+   is enabled  (the GDB testsuite relies upon unused entities not being deleted).  */
+#define LINK_SPEC "%{mrelax:--relax} %{mlarge:%{!r:%{!g:--gc-sections}}}"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "					\
+--start-group						\
+-lc							\
+-lgcc							\
+-lcrt							\
+%{msim:-lsim}						\
+%{!msim:-lnosys}					\
+--end-group					   	\
+%{!T*:%{!msim:%{mmcu=*:--script=%*.ld}}}		\
+%{!T*:%{!msim:%{!mmcu=*:%Tmsp430.ld}}}			\
+%{!T*:%{msim:%{mlarge:%Tmsp430xl-sim.ld}%{!mlarge:%Tmsp430-sim.ld}}} \
+"
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		0
+#define WORDS_BIG_ENDIAN 		0
+
+
+#ifdef IN_LIBGCC2
+/* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits).  */
+#define	UNITS_PER_WORD			4
+/* We have a problem with libgcc2.  It only defines two versions of
+   each function, one for "int" and one for "long long".  Ie it assumes
+   that "sizeof (int) == sizeof (long)".  For the MSP430 this is not true
+   and we need a third set of functions.  We explicitly define
+   LIBGCC2_UNITS_PER_WORD here so that it is clear that we are expecting
+   to get the SI and DI versions from the libgcc2.c sources, and we
+   provide our own set of HI functions, which is why this
+   definition is surrounded by #ifndef..#endif.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 		4
+#endif
+#else
+/* Actual width of a word, in units (bytes).  */
+#define	UNITS_PER_WORD 			2
+#endif
+
+#define SHORT_TYPE_SIZE			16
+#define INT_TYPE_SIZE			16
+#define LONG_TYPE_SIZE			32
+#define LONG_LONG_TYPE_SIZE		64
+
+#define FLOAT_TYPE_SIZE 		32
+#define DOUBLE_TYPE_SIZE 		64
+#define LONG_DOUBLE_TYPE_SIZE		64 /*DOUBLE_TYPE_SIZE*/
+
+#define LIBGCC2_HAS_DF_MODE		1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
+
+#define DEFAULT_SIGNED_CHAR		0
+
+#define STRICT_ALIGNMENT 		1
+#define FUNCTION_BOUNDARY 		16
+#define BIGGEST_ALIGNMENT 		16
+#define STACK_BOUNDARY 			16
+#define PARM_BOUNDARY 			8
+#define PCC_BITFIELD_TYPE_MATTERS	1
+
+#define STACK_GROWS_DOWNWARD		1
+#define FRAME_GROWS_DOWNWARD		1
+#define FIRST_PARM_OFFSET(FNDECL) 	0
+
+#define MAX_REGS_PER_ADDRESS 		1
+
+#define Pmode 				(TARGET_LARGE ? PSImode : HImode)
+/* Note: 32 is a lie.  Large pointers are actually 20-bits wide.  But gcc
+   thinks that any non-power-of-2 pointer size equates to BLKmode, which
+   causes all kinds of problems...  */
+#define POINTER_SIZE			(TARGET_LARGE ? 32 : 16)
+#define	POINTERS_EXTEND_UNSIGNED	1
+
+#define ADDR_SPACE_NEAR	1
+#define ADDR_SPACE_FAR	2
+
+#define REGISTER_TARGET_PRAGMAS() msp430_register_pragmas()
+
+#if 1 /* XXX */
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 2)      	\
+    (MODE) = HImode;
+#endif
+
+/* Layout of Source Language Data Types */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE			(TARGET_LARGE ? "long unsigned int" : "unsigned int")
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE			(TARGET_LARGE ? "long int" : "int")
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE			"long int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE			BITS_PER_WORD
+#define FUNCTION_MODE 			HImode
+#define CASE_VECTOR_MODE		Pmode
+#define HAS_LONG_COND_BRANCH		0
+#define HAS_LONG_UNCOND_BRANCH		0
+
+#define LOAD_EXTEND_OP(M)		ZERO_EXTEND
+/*#define WORD_REGISTER_OPERATIONS	1*/
+
+#define MOVE_MAX 			8
+#define STARTING_FRAME_OFFSET		0
+
+#define INCOMING_RETURN_ADDR_RTX \
+  msp430_incoming_return_addr_rtx ()
+
+#define RETURN_ADDR_RTX(COUNT, FA)		\
+  msp430_return_addr_rtx (COUNT)
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)   1
+
+#define SLOW_BYTE_ACCESS		0
+
+
+/* Register Usage */
+
+/* gas doesn't recognize PC (R0), SP (R1), and SR (R2) as register
+   names.  */
+#define REGISTER_NAMES						\
+{								\
+  "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",		\
+    "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",	\
+  "argptr"							\
+}
+
+enum reg_class
+{
+  NO_REGS,
+  R12_REGS,
+  R13_REGS,
+  GEN_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define REG_CLASS_NAMES \
+{			\
+  "NO_REGS",		\
+  "R12_REGS",		\
+  "R13_REGS",		\
+  "GEN_REGS",		\
+  "ALL_REGS"		\
+}
+
+#define REG_CLASS_CONTENTS \
+{			   \
+  0x00000000,		   \
+  0x00001000,		   \
+  0x00002000,		   \
+  0x0000fff2,		   \
+  0x0001ffff		   \
+}
+
+#define GENERAL_REGS			GEN_REGS
+#define BASE_REG_CLASS  		GEN_REGS
+#define INDEX_REG_CLASS			GEN_REGS
+#define N_REG_CLASSES			(int) LIM_REG_CLASSES
+
+#define PC_REGNUM 		        0
+#define STACK_POINTER_REGNUM 	        1
+#define CC_REGNUM                       2
+#define FRAME_POINTER_REGNUM 		4 /* not usually used, call preserved */
+#define ARG_POINTER_REGNUM 		16
+#define STATIC_CHAIN_REGNUM 		5 /* FIXME */
+
+#define FIRST_PSEUDO_REGISTER 		17
+
+#define REGNO_REG_CLASS(REGNO)          ((REGNO) < 17 \
+					 ? GEN_REGS : NO_REGS)
+
+#define TRAMPOLINE_SIZE			4 /* FIXME */
+#define TRAMPOLINE_ALIGNMENT		16 /* FIXME */
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM },	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)	\
+  (OFFSET) = msp430_initial_elimination_offset ((FROM), (TO))
+
+
+#define FUNCTION_ARG_REGNO_P(N)	  	((N) >= 8 && (N) < ARG_POINTER_REGNUM)
+#define DEFAULT_PCC_STRUCT_RETURN	0
+
+/* 1 == register can't be used by gcc, in general
+   0 == register can be used by gcc, in general */
+#define FIXED_REGISTERS					\
+{							\
+  1,0,1,1, 0,0,0,0,					\
+  0,0,0,0, 0,0,0,0,					\
+  1,							\
+}
+
+/* 1 == value changes across function calls
+   0 == value is the same after a call      */
+/* R4 through R10 are callee-saved */
+#define CALL_USED_REGISTERS				\
+{							\
+  1,0,1,1, 0,0,0,0,					\
+  0,0,0,1, 1,1,1,1,					\
+  1,						\
+}
+
+#define REG_ALLOC_ORDER					\
+  { 12, 13, 14, 15, 10, 9, 8, 7, 6, 5, 4, 11, 0, 1, 2, 3, 16 }
+/*  { 11, 15, 14, 13, 12, 10, 9, 8, 7, 6, 5, 4, 0, 1, 2, 3, 16 }*/
+
+#define REGNO_OK_FOR_BASE_P(regno)	1
+#define REGNO_OK_FOR_INDEX_P(regno)	1
+
+
+
+typedef struct
+{
+  /* These two are the current argument status.  */
+  char reg_used[4];
+#define CA_FIRST_REG 12
+  char can_split;
+  /* These two are temporaries used internally.  */
+  char start_reg;
+  char reg_count;
+  char mem_count;
+  char special_p;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CA, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  msp430_init_cumulative_args (&CA, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS)
+
+
+/* FIXME */
+#define NO_PROFILE_COUNTERS     1
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+    fprintf (FILE, "\tcall\t__mcount\n");
+
+#define HARD_REGNO_NREGS(REGNO, MODE)            \
+  msp430_hard_regno_nregs (REGNO, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+  msp430_hard_regno_mode_ok (REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  msp430_modes_tieable_p (MODE1, MODE2)
+
+/* Exception Handling */
+
+/* R12,R13,R14 - EH data
+   R15 - stack adjustment */
+
+#define EH_RETURN_DATA_REGNO(N) \
+  (((N) < 3) ? ((N) + 12) : INVALID_REGNUM)
+
+#define EH_RETURN_HANDLER_RTX \
+  gen_rtx_MEM(Pmode, gen_rtx_PLUS (Pmode, gen_rtx_REG(Pmode, SP_REGNO), gen_rtx_REG (Pmode, 15)))
+
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 15)
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) DW_EH_PE_udata4
+
+
+/* Stack Layout and Calling Conventions */
+
+
+/* Addressing Modes */
+
+
+
+#define TEXT_SECTION_ASM_OP ".text"
+#define DATA_SECTION_ASM_OP ".data"
+#define BSS_SECTION_ASM_OP   "\t.section .bss"
+
+#define ASM_COMMENT_START	" ;"
+#define ASM_APP_ON		""
+#define ASM_APP_OFF 		""
+#define LOCAL_LABEL_PREFIX	".L"
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	""
+
+#define GLOBAL_ASM_OP 		"\t.global\t"
+
+#define ASM_OUTPUT_LABELREF(FILE, SYM) msp430_output_labelref ((FILE), (SYM))
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t.long .L%d\n", VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Note: The local label referenced by the "3b" below is emitted by
+   the tablejump insn.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t.long .L%d - 1b\n", VALUE)
+
+
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)		\
+  do						\
+    {						\
+      if ((LOG) == 0)				\
+        break;					\
+      fprintf (STREAM, "\t.balign %d\n", 1 << (LOG));	\
+    }						\
+  while (0)
+
+#define JUMP_TABLES_IN_TEXT_SECTION	1
+
+#undef	DWARF2_ADDR_SIZE
+#define	DWARF2_ADDR_SIZE			4
+
+#define INCOMING_FRAME_SP_OFFSET		(POINTER_SIZE / BITS_PER_UNIT)
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_ASM_LINE_DEBUG_INFO		1
+
+/* Prevent reload (and others) from choosing HImode stack slots
+   when spilling hard registers when they may contain PSImode values.  */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO,NREGS,MODE) \
+  ((TARGET_LARGE && ((NREGS) <= 2)) ? PSImode : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
+/* Also stop GCC from thinking that it can eliminate (SUBREG:PSI (SI)).  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM,TO,CLASS) \
+  (   ((TO) == PSImode && (FROM) == SImode)	\
+   || ((TO) == SImode  && (FROM) == PSImode)    \
+   || ((TO) == DImode  && (FROM) == PSImode)    \
+   || ((TO) == PSImode && (FROM) == DImode)     \
+      )
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+  msp430_start_function ((FILE), (NAME), (DECL))
+
+#define TARGET_HAS_NO_HW_DIVIDE (! TARGET_HWMULT)
diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md
new file mode 100644
index 000000000..c0c97dae6
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430.md
@@ -0,0 +1,1370 @@
+;;  Machine Description for TI MSP43* processors
+;;  Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constants
+  [
+   (PC_REGNO 0)
+   (SP_REGNO 1)
+   (CARRY 2)
+  ])
+
+(define_c_enum "unspec"
+  [
+   UNS_PROLOGUE_START_MARKER
+   UNS_PROLOGUE_END_MARKER
+   UNS_EPILOGUE_START_MARKER
+   UNS_EPILOGUE_HELPER
+
+   UNS_PUSHM
+   UNS_POPM
+
+   UNS_GROW_AND_SWAP
+   UNS_SWAP_AND_SHRINK
+   
+   UNS_DINT
+   UNS_EINT
+   UNS_PUSH_INTR
+   UNS_POP_INTR
+   UNS_BIC_SR
+   UNS_BIS_SR
+
+   UNS_REFSYM_NEED_EXIT
+  ])
+
+(include "predicates.md")
+(include "constraints.md")
+
+(define_mode_iterator QHI [QI HI PSI])
+
+;; There are two basic "family" tests we do here:
+;;
+;; msp430x - true if 430X instructions are available.
+;; TARGET_LARGE - true if pointers are 20-bits
+;;
+;; Note that there are three supported cases, since the base 430
+;; doesn't have 20-bit pointers:
+;;
+;; 1. MSP430 cpu, small model
+;; 2. MSP430X cpu, small model.
+;; 3. MSP430X cpu, large model.
+
+;;------------------------------------------------------------
+;; Moves
+
+;; Push/Pop must be before the generic move patterns
+
+(define_insn "push"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNO)))
+	(match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "PUSH\t%0"
+  )
+
+(define_insn "pusha"
+  [(set (mem:PSI (pre_dec:PSI (reg:PSI SP_REGNO)))
+	(match_operand:PSI 0 "register_operand" "r"))]
+  "TARGET_LARGE"
+  "PUSHX.A\t%0"
+  )
+
+(define_insn "pushm"
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")
+		     (match_operand 1 "immediate_operand" "n")] UNS_PUSHM)]
+  ""
+  "PUSHM%b0\t%1, %0"
+  )
+
+(define_insn "pop"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (post_inc:HI (reg:HI SP_REGNO))))]
+  ""
+  "POP\t%0"
+  )
+
+(define_insn "popa"
+  [(set (match_operand:PSI 0 "register_operand" "=r")
+	(mem:PSI (post_inc:PSI (reg:PSI SP_REGNO))))]
+  "TARGET_LARGE"
+  "POPX.A\t%0"
+  )
+
+;; This is nasty.  Operand0 is bogus.  It is only there so that we can get a
+;; mode for the %b0 to work.  We should use operand1 for this, but that does
+;; not have a mode.
+;; 
+;; Operand1 is actually a register, but we cannot accept (REG...) because the
+;; cprop_hardreg pass can and will renumber registers even inside
+;; unspec_volatiles.  So we take an integer register number parameter and
+;; fudge it to be a register name when we generate the assembler.
+;;
+;; The pushm pattern does not have this problem because of all of the
+;; frame info cruft attached to it, so cprop_hardreg leaves it alone.
+(define_insn "popm"
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")
+		     (match_operand 1 "immediate_operand" "i")
+		     (match_operand 2 "immediate_operand" "i")] UNS_POPM)]
+  ""
+  "POPM%b0\t%2, r%J1"
+  )
+
+;; The next two patterns are here to support a "feature" of how GCC implements
+;; varargs.  When a function uses varargs and the *second* to last named
+;; argument is split between argument registers and the stack, gcc expects the
+;; callee to allocate space on the stack that can contain the register-based
+;; part of the argument.  This space *has* to be just before the remaining
+;; arguments (ie the ones that are fully on the stack).
+;;
+;; The problem is that the MSP430 CALL instruction pushes the return address
+;; onto the stack in the exact place where the callee wants to allocate
+;; this extra space.  So we need a sequence of instructions that can allocate
+;; the extra space and then move the return address down the stack, so that
+;; the extra space is now adjacent to the remaining arguments.
+;;
+;; This could be constructed through regular insns, but they might be split up
+;; by a misguided optimization, so an unspec volatile is used instead.
+
+(define_insn "grow_and_swap"
+  [(unspec_volatile [(const_int 0)] UNS_GROW_AND_SWAP)]
+  ""
+  "*
+    if (TARGET_LARGE)
+      return \"SUBA\t#2, r1 { MOVX.A\t2(r1), 0(r1)\";
+    return \"SUB\t#2, r1 { MOV.W\t2(r1), 0(r1)\";
+  "
+)
+
+(define_insn "swap_and_shrink"
+  [(unspec_volatile [(const_int 0)] UNS_SWAP_AND_SHRINK)]
+  ""
+  "* return TARGET_LARGE
+	   ? \"MOVX.A\t0(r1), 2(r1) { ADDA\t#2, SP\"
+	   : \"MOV.W\t0(r1), 2(r1) { ADD\t#2, SP\";
+  ")
+
+; I set LOAD_EXTEND_OP and WORD_REGISTER_OPERATIONS, but gcc puts in a
+; zero_extend anyway.  Catch it here.
+(define_insn "movqihi"
+  [(set (match_operand:HI                 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "memory_operand" "Ys,m")))]
+  ""
+  "@
+   MOV.B\t%1, %0
+   MOV%X1.B\t%1, %0"
+)
+
+(define_insn "movqi"
+  [(set (match_operand:QI 0 "msp_nonimmediate_operand" "=rYs,rm")
+	(match_operand:QI 1 "msp_general_operand" "riYs,rmi"))]
+  ""
+  "@
+  MOV.B\t%1, %0
+  MOV%X0.B\t%1, %0"
+)
+
+(define_insn "movhi"
+  [(set (match_operand:HI 0 "msp_nonimmediate_operand" "=rYs,rm")
+	(match_operand:HI 1 "msp_general_operand" "riYs,rmi"))]
+  ""
+  "@
+  MOV.W\t%1, %0
+  MOV%X0.W\t%1, %0"
+)
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "general_operand"))]
+  ""
+  ""
+  )
+  
+(define_insn_and_split "movsi_x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(match_operand:SI 1 "general_operand" "rmi"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_operand:HI 2 "nonimmediate_operand")
+	(match_operand:HI 4 "general_operand"))
+   (set (match_operand:HI 3 "nonimmediate_operand")
+	(match_operand:HI 5 "general_operand"))]
+  "msp430_split_movsi (operands);"
+)
+
+;; Some MOVX.A cases can be done with MOVA, this is only a few of them.
+(define_insn "movpsi"
+  [(set (match_operand:PSI 0 "msp_nonimmediate_operand" "=r,Ya,rm")
+	(match_operand:PSI 1 "msp_general_operand" "riYa,r,rmi"))]
+  ""
+  "@
+  MOV%Q0\t%1, %0
+  MOV%Q0\t%1, %0
+  MOV%X0.%Q0\t%1, %0")
+
+; This pattern is identical to the truncsipsi2 pattern except
+; that it uses a SUBREG instead of a TRUNC.  It is needed in
+; order to prevent reload from converting (set:SI (SUBREG:PSI (SI)))
+; into (SET:PSI (PSI)).
+;
+; Note: using POPM.A #1 is two bytes smaller than using POPX.A....
+
+(define_insn "movsipsi2"
+  [(set (match_operand:PSI            0 "register_operand" "=r")
+	(subreg:PSI (match_operand:SI 1 "register_operand" "r") 0))]
+  "TARGET_LARGE"
+  "PUSH.W\t%H1 { PUSH.W\t%L1 { POPM.A #1, %0 ; Move reg-pair %L1:%H1 into pointer %0"
+)
+
+;;------------------------------------------------------------
+;; Math
+
+(define_insn "addpsi3"
+  [(set (match_operand:PSI           0 "msp_nonimmediate_operand" "=r,rm")
+	(plus:PSI (match_operand:PSI 1 "msp_nonimmediate_operand" "%0,0")
+		  (match_operand:PSI 2 "msp_general_operand"      "rLs,rmi")))]
+  ""
+  "@
+  ADDA\t%2, %0
+  ADDX.A\t%2, %0"
+)
+
+(define_insn "addqi3"
+  [(set (match_operand:QI          0 "msp_nonimmediate_operand" "=rYs,rm")
+	(plus:QI (match_operand:QI 1 "msp_nonimmediate_operand" "%0,0")
+		 (match_operand:QI 2 "msp_general_operand"      "riYs,rmi")))]
+  ""
+  "@
+   ADD.B\t%2, %0
+   ADD%X0.B\t%2, %0"
+)
+
+(define_insn "addhi3"
+  [(set (match_operand:HI           0 "msp_nonimmediate_operand" "=rYs,rm")
+	(plus:HI (match_operand:HI  1 "msp_nonimmediate_operand" "%0,0")
+		  (match_operand:HI 2 "msp_general_operand"      "riYs,rmi")))]
+  ""
+  "@
+   ADD.W\t%2, %0
+   ADD%X0.W\t%2, %0"
+)
+
+; This pattern is needed in order to avoid reload problems.
+; It takes an SI pair of registers, adds a value to them, and
+; then converts them into a single PSI register.
+
+(define_insn "addsipsi3"
+  [(set (subreg:SI (match_operand:PSI 0 "register_operand" "=&r") 0)
+	(plus:SI (match_operand:SI    1 "register_operand" "0")
+		 (match_operand       2 "general_operand" "rmi")))]
+  ""
+  "ADD.W\t%L2, %L0 { ADDC.W\t%H2, %H0 { PUSH.W\t%H0 { PUSH.W\t%L0 { POPM.A\t#1, %0"
+)
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&r,rm")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:SI 2 "general_operand" "r,mi")))]
+  ""
+  "@
+   ADD\t%L2, %L0 { ADDC\t%H2, %H0
+   ADD%X0\t%L2, %L0 { ADDC%X0\t%H2, %H0"
+)
+
+; Version of addhi that exposes the carry operations, for SImode adds.
+;
+; NOTE - we are playing a dangerous game with GCC here.  We have these two
+; add patterns and the splitter that follows because our tests have shown
+; that this results in a significant reduction in code size - because GCC is
+; able to discard any unused part of the addition.  We have to annotate the
+; patterns with the set and use of the carry flag because otherwise GCC will
+; discard parts of the addition when they are actually needed.  But we have
+; not annotated all the other patterns that set the CARRY flag as doing so
+; results in an overall increase in code size[1].  Instead we just *hope*
+; that GCC will not move a carry-setting instruction in between the first
+; and second adds.
+;
+; So far our experiments have shown that GCC is likely to move MOV and CMP
+; instructions in between the two adds, but not other instructions.  MOV is
+; safe, CMP is not.  So we have annotated the CMP patterns and left the
+; subtract, shift and other add patterns alone.  At the moment this is
+; working, but with future changes to the generic parts of GCC that might
+; change.
+;
+; [1] It is not clear exactly why the code size increases.  The cause appears
+; to be that reload is more prevelent to spilling a variable onto the stack
+; but why it does this is unknown.  Possibly the additional CLOBBERs necessary
+; to correctly annotate the other patterns makes reload think that there is
+; increased register pressure.  Or possibly reload does not handle ADD patterns
+; that are not single_set() very well.
+
+(define_insn "addhi3_cy"
+  [(set (match_operand:HI          0 "msp_nonimmediate_operand" "=r,rm")
+	(plus:HI (match_operand:HI 1 "msp_nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "msp_nonimmediate_operand" "r,rm")))
+   (set (reg:BI CARRY)
+	(truncate:BI (lshiftrt:SI (plus:SI (zero_extend:SI (match_dup 1))
+					   (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))
+   ]
+  ""
+  "@
+   ADD\t%2, %1 ; cy
+   ADD%X0\t%2, %1 ; cy"
+  )
+
+(define_insn "addhi3_cy_i"
+  [(set (match_operand:HI          0 "nonimmediate_operand" "=r,rm")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "immediate_operand"     "i,i")))
+   (set (reg:BI CARRY)
+	(truncate:BI (lshiftrt:SI (plus:SI (zero_extend:SI (match_dup 1))
+					   (match_operand 3 "immediate_operand" "i,i"))
+				  (const_int 16))))
+   ]
+  ""
+  "@
+   ADD\t%2, %1 ; cy
+   ADD%X0\t%2, %1 ; cy"
+  )
+
+; Version of addhi that adds the carry, for SImode adds.
+(define_insn "addchi4_cy"
+  [(set (match_operand:HI                   0 "msp_nonimmediate_operand" "=r,rm")
+	(plus:HI (plus:HI (match_operand:HI 1 "msp_nonimmediate_operand" "%0,0")
+			  (match_operand:HI 2 "msp_general_operand"      "ri,rmi"))
+		 (zero_extend:HI (reg:BI CARRY))))
+   ]
+  ""
+  "@
+   ADDC\t%2, %1
+   ADDC%X0\t%2, %1"
+  )
+
+; Split an SImode add into two HImode adds, keeping track of the carry
+; so that gcc knows when it can and can't optimize away the two
+; halves.
+(define_split
+  [(set (match_operand:SI          0 "msp430_nonsubreg_operand")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand")
+		 (match_operand:SI 2 "general_operand")))
+   ]
+  ""
+  [(parallel [(set (match_operand:HI 3 "nonimmediate_operand" "=&rm")
+		   (plus:HI (match_dup 4)
+			    (match_dup 5)))
+	      (set (reg:BI CARRY)
+		   (truncate:BI (lshiftrt:SI (plus:SI (zero_extend:SI (match_dup 4))
+						      (match_dup 9))
+					     (const_int 16))))
+	      ])
+   (set (match_operand:HI 6 "nonimmediate_operand" "=&rm")
+	(plus:HI (plus:HI (match_dup 7)
+			  (match_dup 8))
+		 (zero_extend:HI (reg:BI CARRY))))
+   ]
+  "
+   operands[3] = msp430_subreg (HImode, operands[0], SImode, 0);
+   operands[4] = msp430_subreg (HImode, operands[1], SImode, 0);
+   operands[5] = msp430_subreg (HImode, operands[2], SImode, 0);
+   operands[6] = msp430_subreg (HImode, operands[0], SImode, 2);
+   operands[7] = msp430_subreg (HImode, operands[1], SImode, 2);
+   operands[8] = msp430_subreg (HImode, operands[2], SImode, 2);
+   if (GET_CODE (operands[5]) == CONST_INT)
+     {
+       operands[9] = GEN_INT (INTVAL (operands[5]) & 0xffff);
+     }
+   else
+     {
+       operands[9] = gen_rtx_ZERO_EXTEND (SImode, operands[5]);
+     }
+   "
+  )
+
+
+;; Alternatives 2 and 3 are to handle cases generated by reload.
+(define_insn "subpsi3"
+  [(set (match_operand:PSI            0 "nonimmediate_operand" "=r,   rm, &?r, ?&r")
+	(minus:PSI (match_operand:PSI 1 "general_operand"       "0,   0,   !r,  !i")
+		   (match_operand:PSI 2 "general_operand"       "rLs, rmi, rmi,  r")))]
+  ""
+  "@
+  SUBA\t%2, %0
+  SUBX.A\t%2, %0
+  MOVX.A\t%1, %0 { SUBX.A\t%2, %0
+  MOVX.A\t%1, %0 { SUBA\t%2, %0"
+)
+
+;; Alternatives 2 and 3 are to handle cases generated by reload.
+(define_insn "subqi3"
+  [(set (match_operand:QI           0 "nonimmediate_operand" "=rYs,  rm,  &?r, ?&r")
+	(minus:QI (match_operand:QI 1 "general_operand"       "0,    0,    !r,  !i")
+		  (match_operand:QI 2 "general_operand"      " riYs, rmi, rmi,   r")))]
+  ""
+  "@
+  SUB.B\t%2, %0
+  SUB%X0.B\t%2, %0
+  MOV%X0.B\t%1, %0 { SUB%X0.B\t%2, %0
+  MOV%X0.B\t%1, %0 { SUB%X0.B\t%2, %0"
+)
+
+;; Alternatives 2 and 3 are to handle cases generated by reload.
+(define_insn "subhi3"
+  [(set (match_operand:HI           0 "nonimmediate_operand" "=rYs,  rm,  &?r, ?&r")
+	(minus:HI (match_operand:HI 1 "general_operand"       "0,    0,    !r,  !i")
+		  (match_operand:HI 2 "general_operand"      " riYs, rmi, rmi,   r")))]
+  ""
+  "@
+  SUB.W\t%2, %0
+  SUB%X0.W\t%2, %0
+  MOV%X0.W\t%1, %0 { SUB%X0.W\t%2, %0
+  MOV%X0.W\t%1, %0 { SUB%X0.W\t%2, %0"
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=&rm")
+	(minus:SI (match_operand:SI 1 "nonimmediate_operand"   "0")
+		  (match_operand:SI 2 "general_operand"        "rmi")))]
+  ""
+  "SUB%X0\t%L2, %L0 { SUBC%X0\t%H2, %H0"
+)
+
+(define_insn "*bic<mode>_cg"
+  [(set (match_operand:QHI 0 "msp_nonimmediate_operand" "=rYs,m")
+	(and:QHI (match_operand:QHI 1 "msp_general_operand" "0,0")
+		 (match_operand 2 "msp430_inv_constgen_operator" "n,n")))]
+  ""
+  "@
+   BIC%x0%b0\t#%I2, %0
+   BIC%X0%b0\t#%I2, %0"
+)
+
+(define_insn "bic<mode>3"
+  [(set (match_operand:QHI                   0 "msp_nonimmediate_operand" "=rYs,rm")
+	(and:QHI (not:QHI (match_operand:QHI 1 "msp_general_operand"       "rYs,rmn"))
+		 (match_operand:QHI          2 "msp_nonimmediate_operand"  "0,0")))]
+  ""
+  "@
+   BIC%x0%b0\t%1, %0
+   BIC%X0%b0\t%1, %0"
+)
+
+(define_insn "and<mode>3"
+  [(set (match_operand:QHI 0 "msp_nonimmediate_operand" "=rYs,rm")
+	(and:QHI (match_operand:QHI 1 "msp_nonimmediate_operand" "%0,0")
+		 (match_operand:QHI 2 "msp_general_operand" "riYs,rmi")))]
+  ""
+  "@
+   AND%x0%b0\t%2, %0
+   AND%X0%b0\t%2, %0"
+)
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:QHI          0 "msp_nonimmediate_operand" "=rYs,rm")
+	(ior:QHI (match_operand:QHI 1 "msp_nonimmediate_operand" "%0,0")
+		 (match_operand:QHI 2 "msp_general_operand" "riYs,rmi")))]
+  ""
+  "@
+   BIS%x0%b0\t%2, %0
+   BIS%X0%b0\t%2, %0"
+)
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:QHI          0 "msp_nonimmediate_operand" "=rYs,rm")
+	(xor:QHI (match_operand:QHI 1 "msp_nonimmediate_operand" "%0,0")
+		 (match_operand:QHI 2 "msp_general_operand" "riYs,rmi")))]
+  ""
+  "@
+   XOR%x0%b0\t%2, %0
+   XOR%X0%b0\t%2, %0"
+)
+
+;; Macro : XOR #~0, %0
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:QHI          0 "msp_nonimmediate_operand" "=rYs,m")
+	(not:QHI (match_operand:QHI 1 "msp_nonimmediate_operand" "0,0")))]
+  ""
+  "@
+   INV%x0%b0\t%0
+   INV%X0%b0\t%0"
+)
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI                 0 "msp_nonimmediate_operand" "=rYs,m")
+	(sign_extend:HI (match_operand:QI 1 "msp_nonimmediate_operand" "0,0")))]
+  ""
+  "@
+   SXT%X0\t%0
+   SXT%X0\t%0"
+)
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI                 0 "msp_nonimmediate_operand" "=rYs,m")
+	(zero_extend:HI (match_operand:QI 1 "msp_nonimmediate_operand" "0,0")))]
+  ""
+  "@
+   AND\t#0xff, %0
+   AND%X0\t#0xff, %0"
+)
+
+;; Eliminate extraneous zero-extends mysteriously created by gcc.
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand")
+	(zero_extend:HI (match_operand:QI 1 "general_operand")))
+   (set (match_operand:HI 2 "register_operand")
+	(zero_extend:HI (match_operand:QI 3 "register_operand")))]
+  "REGNO (operands[0]) == REGNO (operands[2]) && REGNO (operands[2]) == REGNO (operands[3])"
+  [(set (match_dup 0)
+	(zero_extend:HI (match_dup 1)))]
+)
+   
+(define_insn "zero_extendhipsi2"
+  [(set (match_operand:PSI                 0 "msp_nonimmediate_operand" "=r,m")
+	(zero_extend:PSI (match_operand:HI 1 "msp_nonimmediate_operand" "rm,r")))]
+  ""
+  "MOVX\t%1, %0"
+)
+
+(define_insn "truncpsihi2"
+  [(set (match_operand:HI               0 "msp_nonimmediate_operand" "=rm")
+	(truncate:HI (match_operand:PSI 1 "register_operand"      "r")))]
+  ""
+  "MOVX\t%1, %0"
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r")))]
+  ""
+  { return msp430x_extendhisi (operands); }
+)
+
+(define_insn "extendhipsi2"
+  [(set (match_operand:PSI 0 "nonimmediate_operand" "=r")
+	(subreg:PSI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) 0))]
+  "TARGET_LARGE"
+  "RLAM #4, %0 { RRAM #4, %0"
+)
+
+;; Look for cases where integer/pointer conversions are suboptimal due
+;; to missing patterns, despite us not having opcodes for these
+;; patterns.  Doing these manually allows for alternate optimization
+;; paths.
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")))]
+  "TARGET_LARGE"
+  "MOV.W\t#0,%H0"
+)
+
+(define_insn "zero_extendhisipsi2"
+  [(set (match_operand:PSI 0 "nonimmediate_operand" "=r,r")
+	(subreg:PSI (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,r")) 0))]
+  "TARGET_LARGE"
+  "@
+   AND.W\t#-1,%0
+   MOV.W\t%1,%0"
+)
+
+(define_insn "extend_and_shift1_hipsi2"
+  [(set (subreg:SI (match_operand:PSI 0 "nonimmediate_operand" "=r") 0)
+	(ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0"))
+		   (const_int 1)))]
+  "TARGET_LARGE"
+  "RLAM #4, %0 { RRAM #3, %0"
+)
+
+(define_insn "extend_and_shift2_hipsi2"
+  [(set (subreg:SI (match_operand:PSI 0 "nonimmediate_operand" "=r") 0)
+	(ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0"))
+		   (const_int 2)))]
+  "TARGET_LARGE"
+  "RLAM #4, %0 { RRAM #2, %0"
+)
+
+; Nasty - we are sign-extending a 20-bit PSI value in one register into
+; two adjacent 16-bit registers to make an SI value.  There is no MSP430X
+; instruction that will do this, so we push the 20-bit value onto the stack
+; and then pop it off as two 16-bit values.
+;
+; FIXME: The MSP430X documentation does not specify if zero-extension or
+; sign-extension happens when the 20-bit value is pushed onto the stack.
+; It is probably zero-extension, but if not this pattern will not work
+; when the PSI value is negative..
+;
+; Note: using PUSHM.A #1 is two bytes smaller than using PUSHX.A....
+
+(define_insn "zero_extendpsisi2"
+  [(set (match_operand:SI                  0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:PSI 1 "register_operand" "r")))]
+  ""
+  "*
+    if (REGNO (operands[1]) == SP_REGNO)
+      /* If the source register is the stack pointer, the value
+         stored in the stack slot will be the value *after* the
+	 stack pointer has been decremented.  So allow for that
+	 here.  */
+      return \"PUSHM.A\t#1, %1 { ADDX.W\t#4, @r1 { POPX.W\t%L0 { POPX.W\t%H0 ; get stack pointer into %L0:%H0\";
+    else
+      return \"PUSHM.A\t#1, %1 { POPX.W\t%L0 { POPX.W\t%H0 ; move pointer in %1 into reg-pair %L0:%H0\";
+  "
+)
+
+;; We also need to be able to sign-extend pointer types (eg ptrdiff_t).
+;; Since (we assume) pushing a 20-bit value onto the stack zero-extends
+;; it, we use a different method here.
+
+(define_insn "extendpsisi2"
+  [(set (match_operand:SI                  0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:PSI 1 "register_operand" "r")))]
+  "TARGET_LARGE"
+  "*
+    /* The intention here is that we copy the bottom 16-bits of
+       %1 into %L0 (zeroing the top four bits).  Then we copy the
+       entire 20-bits of %1 into %H0 and then arithmetically shift
+       it right by 16 bits, to get the top four bits of the pointer
+       sign-extended in %H0.  */
+    if (REGNO (operands[0]) == REGNO (operands[1]))
+      return \"MOVX.A\t%1, %H0 { MOV.W\t%1, %L0 { RPT\t#16 { RRAX.A\t%H0 ; sign extend pointer in %1 into %L0:%H0\";
+    else
+      return \"MOV.W\t%1, %L0 { MOVX.A\t%1, %H0 { RPT\t#16 { RRAX.A\t%H0 ; sign extend pointer in %1 into %L0:%H0\";
+  "
+)
+
+; See the movsipsi2 pattern above for another way that GCC performs this
+; conversion.
+(define_insn "truncsipsi2"
+  [(set (match_operand:PSI              0 "register_operand" "=r")
+	(truncate:PSI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "PUSH.W\t%H1 { PUSH.W\t%L1 { POPM.A\t#1, %L0"
+)
+
+;;------------------------------------------------------------
+;; Shift Functions
+
+;; Note:  We do not use the RPT ... SHIFT instruction sequence
+;; when the repeat count is in a register, because even though RPT
+;; accepts counts in registers, it does not work if the count is
+;; zero, and the actual count in the register has to be one less
+;; than the required number of iterations.  We could encode a
+;; seqeunce like this:
+;;
+;;   bit #0xf, Rn
+;;   bz  1f
+;;   dec Rn
+;;   rpt Rn
+;;   <shift> Rm
+;;   inc Rn
+;; 1:
+;;
+;; But is longer than calling a helper function, and we are mostly
+;; concerned with code size.  FIXME: Maybe enable a sequence like
+;; this at -O3 and above ?
+;;
+;; Note - we ignore shift counts of less than one or more than 15.
+;; This is permitted by the ISO C99 standard as such shifts result
+;; in "undefined" behaviour.  [6.5.7 (3)]
+
+;; signed A << C
+
+(define_expand "ashlhi3"
+  [(set (match_operand:HI            0 "nonimmediate_operand")
+	(ashift:HI (match_operand:HI 1 "general_operand")
+		   (match_operand:HI 2 "general_operand")))]
+  ""
+  {
+    if (msp430x
+        && REG_P (operands[0])
+        && REG_P (operands[1])
+        && CONST_INT_P (operands[2]))
+      emit_insn (gen_430x_shift_left (operands[0], operands[1], operands[2]));
+    else		 
+      msp430_expand_helper (operands, \"__mspabi_slli\", true);
+    DONE;
+  }
+)
+
+(define_insn "slli_1"
+  [(set (match_operand:HI            0 "nonimmediate_operand" "=rm")
+	(ashift:HI (match_operand:HI 1 "general_operand"       "0")
+		   (const_int 1)))]
+  ""
+  "RLA.W\t%0" ;; Note - this is a macro for ADD
+)
+
+(define_insn "430x_shift_left"
+  [(set (match_operand:HI            0 "register_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "register_operand"  "0")
+		   (match_operand    2 "immediate_operand" "n")))]
+  "msp430x"
+  "*
+  if (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 16)
+    return \"rpt\t%2 { rlax.w\t%0\";
+  return \"# nop left shift\";
+  "
+)
+
+(define_insn "slll_1"
+  [(set (match_operand:SI            0 "nonimmediate_operand" "=rm")
+	(ashift:SI (match_operand:SI 1 "general_operand"       "0")
+		   (const_int 1)))]
+  ""
+  "RLA.W\t%L0 { RLC.W\t%H0"
+)
+
+(define_insn "slll_2"
+  [(set (match_operand:SI            0 "nonimmediate_operand" "=rm")
+	(ashift:SI (match_operand:SI 1 "general_operand"       "0")
+		   (const_int 2)))]
+  ""
+  "RLA.W\t%L0 { RLC.W\t%H0 { RLA.W\t%L0 { RLC.W\t%H0"
+)
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI            0 "nonimmediate_operand")
+	(ashift:SI (match_operand:SI 1 "general_operand")
+		   (match_operand:SI 2 "general_operand")))]
+  ""
+  "msp430_expand_helper (operands, \"__mspabi_slll\", true);
+   DONE;"
+)
+
+;;----------
+
+;; signed A >> C
+
+(define_expand "ashrhi3"
+  [(set (match_operand:HI              0 "nonimmediate_operand")
+	(ashiftrt:HI (match_operand:HI 1 "general_operand")
+		     (match_operand:HI 2 "general_operand")))]
+  ""
+  {
+    if (msp430x
+        && REG_P (operands[0])
+        && REG_P (operands[1])
+        && CONST_INT_P (operands[2]))
+      emit_insn (gen_430x_arithmetic_shift_right (operands[0], operands[1], operands[2]));
+    else		 
+       msp430_expand_helper (operands, \"__mspabi_srai\", true);
+   DONE;
+   }
+)
+
+(define_insn "srai_1"
+  [(set (match_operand:HI              0 "msp_nonimmediate_operand" "=rm")
+	(ashiftrt:HI (match_operand:HI 1 "msp_general_operand"      "0")
+		     (const_int 1)))]
+  ""
+  "RRA.W\t%0"
+)
+
+(define_insn "430x_arithmetic_shift_right"
+  [(set (match_operand:HI              0 "register_operand" "=r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand"  "0")
+		     (match_operand    2 "immediate_operand" "n")))]
+  "msp430x"
+  "*
+  if (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 16)
+    return \"rpt\t%2 { rrax.w\t%0\";
+  return \"# nop arith right shift\";
+  "
+)
+
+(define_insn "srap_1"
+  [(set (match_operand:PSI              0 "register_operand" "=r")
+	(ashiftrt:PSI (match_operand:PSI 1 "general_operand" "0")
+		      (const_int 1)))]
+  "msp430x"
+  "RRAM.A #1,%0"
+)
+
+(define_insn "srap_2"
+  [(set (match_operand:PSI              0 "register_operand" "=r")
+	(ashiftrt:PSI (match_operand:PSI 1 "general_operand" "0")
+		      (const_int 2)))]
+  "msp430x"
+  "RRAM.A #2,%0"
+)
+
+(define_insn "sral_1"
+  [(set (match_operand:SI              0 "nonimmediate_operand" "=rm")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand"       "0")
+		     (const_int 1)))]
+  ""
+  "RRA.W\t%H0 { RRC.W\t%L0"
+)
+
+(define_insn "sral_2"
+  [(set (match_operand:SI              0 "nonimmediate_operand" "=rm")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand"       "0")
+		     (const_int 2)))]
+  ""
+  "RRA.W\t%H0 { RRC.W\t%L0 { RRA.W\t%H0 { RRC.W\t%L0"
+)
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI              0 "nonimmediate_operand")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand")
+		     (match_operand:SI 2 "general_operand")))]
+  ""
+  "msp430_expand_helper (operands, \"__mspabi_sral\", true);
+   DONE;"
+)
+
+;;----------
+
+;; unsigned A >> C
+
+(define_expand "lshrhi3"
+  [(set (match_operand:HI              0 "nonimmediate_operand")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand")
+		     (match_operand:HI 2 "general_operand")))]
+  ""
+  {
+    if (msp430x
+        && REG_P (operands[0])
+        && REG_P (operands[1])
+        && CONST_INT_P (operands[2]))
+      emit_insn (gen_430x_logical_shift_right (operands[0], operands[1], operands[2]));
+    else		 
+      msp430_expand_helper (operands, \"__mspabi_srli\", true);
+    DONE;
+  }
+)
+
+(define_insn "srli_1"
+  [(set (match_operand:HI              0 "nonimmediate_operand" "=rm")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand"       "0")
+		     (const_int 1)))]
+  ""
+  "CLRC { RRC.W\t%0"
+)
+
+(define_insn "430x_logical_shift_right"
+  [(set (match_operand:HI              0 "register_operand" "=r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand"  "0")
+		     (match_operand    2 "immediate_operand" "n")))]
+  "msp430x"
+  {
+    return msp430x_logical_shift_right (operands[2]);
+  }
+)
+
+(define_insn "srlp_1"
+  [(set (match_operand:PSI              0 "register_operand" "=r")
+	(lshiftrt:PSI (match_operand:PSI 1 "general_operand" "0")
+		      (const_int 1)))]
+  ""
+  "RRUM.A #1,%0"
+)
+
+(define_insn "srll_1"
+  [(set (match_operand:SI              0 "nonimmediate_operand" "=rm")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand"       "0")
+		     (const_int 1)))]
+  ""
+  "CLRC { RRC.W\t%H0 { RRC.W\t%L0"
+)
+
+(define_insn "srll_2x"
+  [(set (match_operand:SI              0 "nonimmediate_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand"       "0")
+		     (const_int 2)))]
+  "msp430x"
+  "RRUX.W\t%H0 { RRC.W\t%L0 { RRUX.W\t%H0 { RRC.W\t%L0"
+)
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI              0 "nonimmediate_operand")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand")
+		     (match_operand:SI 2 "general_operand")))]
+  ""
+  "msp430_expand_helper (operands, \"__mspabi_srll\", true);
+   DONE;"
+)
+
+;;------------------------------------------------------------
+;; Function Entry/Exit
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "msp430_expand_prologue (); DONE;"
+  )
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  "msp430_expand_epilogue (0); DONE;"
+  )
+
+
+(define_insn "epilogue_helper"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")] UNS_EPILOGUE_HELPER)]
+  ""
+  "BR%Q0\t#__mspabi_func_epilog_%J0"
+  )
+
+
+(define_insn "prologue_start_marker"
+  [(unspec_volatile [(const_int 0)] UNS_PROLOGUE_START_MARKER)]
+  ""
+  "; start of prologue"
+  )
+
+(define_insn "prologue_end_marker"
+  [(unspec_volatile [(const_int 0)] UNS_PROLOGUE_END_MARKER)]
+  ""
+  "; end of prologue"
+  )
+
+(define_insn "epilogue_start_marker"
+  [(unspec_volatile [(const_int 0)] UNS_EPILOGUE_START_MARKER)]
+  ""
+  "; start of epilogue"
+  )
+
+;; This makes the linker add a call to exit() after the call to main()
+;; in crt0
+(define_insn "msp430_refsym_need_exit"
+  [(unspec_volatile [(const_int 0)] UNS_REFSYM_NEED_EXIT)]
+  ""
+  ".refsym\t__crt0_call_exit"
+  )
+
+;;------------------------------------------------------------
+;; Jumps
+
+(define_expand "call"
+  [(call:HI (match_operand 0 "")
+	    (match_operand 1 ""))]
+  ""
+  ""
+)
+
+(define_insn "call_internal"
+  [(call (mem:HI (match_operand 0 "general_operand" "rYci"))
+	 (match_operand 1 ""))]
+  ""
+  "CALL%Q0\t%0"
+)
+
+(define_expand "call_value"
+  [(set (match_operand          0 "register_operand")
+	(call:HI (match_operand 1 "general_operand")
+		 (match_operand 2 "")))]
+  ""
+  ""
+)
+
+(define_insn "call_value_internal"
+  [(set (match_operand               0 "register_operand" "=r")
+	(call (mem:HI (match_operand 1 "general_operand" "rYci"))
+	      (match_operand 2 "")))]
+  ""
+  "CALL%Q0\t%1"
+)
+
+(define_insn "msp_return"
+  [(return)]
+  ""
+  { return msp430_is_interrupt_func () ? "RETI" : (TARGET_LARGE ? "RETA" : "RET"); }
+)
+
+;; This pattern is NOT, as expected, a return pattern.  It's called
+;; before reload and must only store its operands, and emit a
+;; placeholder where the epilog needs to be.  AFTER reload, the
+;; placeholder should get expanded into a regular-type epilogue that
+;; also does the EH return.
+(define_expand "eh_return"
+  [(match_operand:HI 0 "")]
+  ""
+  "msp430_expand_eh_return (operands[0]);
+   emit_jump_insn (gen_msp430_eh_epilogue ());
+   emit_barrier ();
+   DONE;"
+)
+
+;; This is the actual EH epilogue.  We emit it in the pattern above,
+;; before reload, and convert it to a real epilogue after reload.
+(define_insn_and_split "msp430_eh_epilogue"
+  [(eh_return)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "msp430_expand_epilogue (1); DONE;"
+  )
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "BR%Q0\t#%l0"
+)
+
+;; FIXME: GCC currently (8/feb/2013) cannot handle symbol_refs
+;; in indirect jumps (cf gcc.c-torture/compile/991213-3.c).
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "nonimmediate_operand" "rYl"))]
+  ""
+  "BR%Q0\t%0"
+)
+
+;;------------------------------------------------------------
+;; Various Conditionals
+
+(define_expand "cbranch<mode>4"
+  [(parallel [(set (pc) (if_then_else
+			 (match_operator 0 ""
+					 [(match_operand:QHI 1 "nonimmediate_operand")
+					  (match_operand:QHI 2 "general_operand")])
+			 (label_ref (match_operand 3 "" ""))
+			 (pc)))
+	      (clobber (reg:BI CARRY))]
+  )]
+  ""
+  "msp430_fixup_compare_operands (<MODE>mode, operands);"
+  )
+
+(define_insn "cbranchpsi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator                     0 "msp430_cmp_operator"
+			      [(match_operand:PSI 1 "nonimmediate_operand" "r,rYs,rm")
+			       (match_operand:PSI 2 "general_operand"      "rLs,rYsi,rmi")])
+              (label_ref (match_operand           3 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+  CMP%Q0\t%2, %1 { J%0\t%l3
+  CMPX.A\t%2, %1 { J%0\t%l3
+  CMPX.A\t%2, %1 { J%0\t%l3"
+  )
+
+(define_insn "cbranchqi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "msp430_cmp_operator"
+			      [(match_operand:QI 1 "nonimmediate_operand" "rYs,rm")
+			       (match_operand:QI 2 "general_operand"      "rYsi,rmi")])
+              (label_ref (match_operand          3 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+   CMP.B\t%2, %1 { J%0\t%l3
+   CMP%X0.B\t%2, %1 { J%0\t%l3"
+  )
+
+(define_insn "cbranchhi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "msp430_cmp_operator"
+			      [(match_operand:HI 1 "nonimmediate_operand" "rYs,rm")
+			       (match_operand:HI 2 "general_operand"      "rYsi,rmi")])
+              (label_ref (match_operand          3 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+   CMP.W\t%2, %1 { J%0\t%l3
+   CMP%X0.W\t%2, %1 { J%0\t%l3"
+  )
+
+(define_insn "cbranchpsi4_reversed"
+  [(set (pc) (if_then_else
+	      (match_operator                     0 "msp430_reversible_cmp_operator"
+			      [(match_operand:PSI 1 "general_operand" "rLs,rYsi,rmi")
+			       (match_operand:PSI 2 "general_operand" "r,rYs,rm")])
+              (label_ref (match_operand           3 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+  CMP%Q0\t%1, %2 { J%R0\t%l3
+  CMPX.A\t%1, %2 { J%R0\t%l3
+  CMPX.A\t%1, %2 { J%R0\t%l3"
+  )
+
+(define_insn "cbranchqi4_reversed"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "msp430_reversible_cmp_operator"
+			      [(match_operand:QI 1 "general_operand" "rYsi,rmi")
+			       (match_operand:QI 2 "general_operand" "rYs,rm")])
+              (label_ref (match_operand          3 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+   CMP.B\t%1, %2 { J%R0\t%l3
+   CMP%X0.B\t%1, %2 { J%R0\t%l3"
+  )
+
+(define_insn "cbranchhi4_reversed"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "msp430_reversible_cmp_operator"
+			      [(match_operand:HI 1 "general_operand" "rYsi,rmi")
+			       (match_operand:HI 2 "general_operand" "rYs,rm")])
+              (label_ref (match_operand          3 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+   CMP.W\t%1, %2 { J%R0\t%l3
+   CMP%X0.W\t%1, %2 { J%R0\t%l3"
+  )
+
+(define_insn "*bitbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (ne (and:QHI (match_operand:QHI 0 "msp_nonimmediate_operand" "rYs,rm")
+			   (match_operand:QHI 1 "msp_general_operand" "rYsi,rmi"))
+		  (const_int 0))
+              (label_ref (match_operand 2 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+   BIT%x0%b0\t%1, %0 { JNE\t%l2
+   BIT%X0%b0\t%1, %0 { JNE\t%l2"
+  )
+
+(define_insn "*bitbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (eq (and:QHI (match_operand:QHI 0 "msp_nonimmediate_operand" "rm")
+			   (match_operand:QHI 1 "msp_general_operand" "rmi"))
+		  (const_int 0))
+              (label_ref (match_operand 2 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "BIT%x0%X0%b0\t%1, %0 { JEQ\t%l2"
+  )
+
+(define_insn "*bitbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (eq (and:QHI (match_operand:QHI 0 "msp_nonimmediate_operand" "rm")
+			   (match_operand:QHI 1 "msp_general_operand" "rmi"))
+		  (const_int 0))
+              (pc)
+	      (label_ref (match_operand 2 "" ""))))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "BIT%X0%b0\t%1, %0 { JNE\t%l2"
+  )
+
+(define_insn "*bitbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (ne (and:QHI (match_operand:QHI 0 "msp_nonimmediate_operand" "rm")
+			   (match_operand:QHI 1 "msp_general_operand" "rmi"))
+		  (const_int 0))
+              (pc)
+	      (label_ref (match_operand 2 "" ""))))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "BIT%X0%b0\t%1, %0 { JEQ\t%l2"
+  )
+
+;;------------------------------------------------------------
+;; zero-extract versions of the above
+
+(define_insn "*bitbranch<mode>4_z"
+  [(set (pc) (if_then_else
+	      (ne (zero_extract:HI (match_operand:QHI 0 "msp_nonimmediate_operand" "rYs,rm")
+				    (const_int 1)
+				    (match_operand 1 "msp430_bitpos" "i,i"))
+		  (const_int 0))
+              (label_ref (match_operand 2 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "@
+   BIT%x0%b0\t%p1, %0 { JNE\t%l2
+   BIT%X0%b0\t%p1, %0 { JNE\t%l2"
+  )
+
+(define_insn "*bitbranch<mode>4_z"
+  [(set (pc) (if_then_else
+	      (eq (zero_extract:HI (match_operand:QHI 0 "msp_nonimmediate_operand" "rm")
+				   (const_int 1)
+				   (match_operand 1 "msp430_bitpos" "i"))
+		  (const_int 0))
+              (label_ref (match_operand 2 "" ""))
+	      (pc)))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "BIT%x0%X0%b0\t%p1, %0 { JEQ\t%l2"
+  )
+
+(define_insn "*bitbranch<mode>4_z"
+  [(set (pc) (if_then_else
+	      (eq (zero_extract:HI (match_operand:QHI 0 "msp_nonimmediate_operand" "rm")
+				   (const_int 1)
+				   (match_operand 1 "msp430_bitpos" "i"))
+		  (const_int 0))
+              (pc)
+	      (label_ref (match_operand 2 "" ""))))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "BIT%X0%b0\t%p1, %0 { JNE\t%l2"
+  )
+
+(define_insn "*bitbranch<mode>4_z"
+  [(set (pc) (if_then_else
+	      (ne (zero_extract:HI (match_operand:QHI 0 "msp_nonimmediate_operand" "rm")
+				   (const_int 1)
+				   (match_operand 1 "msp430_bitpos" "i"))
+		  (const_int 0))
+              (pc)
+	      (label_ref (match_operand 2 "" ""))))
+   (clobber (reg:BI CARRY))
+   ]
+  ""
+  "BIT%X0%b0\t%p1, %0 { JEQ\t%l2"
+  )
+
+;;------------------------------------------------------------
+;; Misc
+
+(define_insn "nop"
+  [(const_int 0)]
+  "1"
+  "NOP"
+)
+
+(define_insn "disable_interrupts"
+  [(unspec_volatile [(const_int 0)] UNS_DINT)]
+  ""
+  "DINT \; NOP"
+  )
+
+(define_insn "enable_interrupts"
+  [(unspec_volatile [(const_int 0)] UNS_EINT)]
+  ""
+  "EINT"
+  )
+
+(define_insn "push_intr_state"
+  [(unspec_volatile [(const_int 0)] UNS_PUSH_INTR)]
+  ""
+  "PUSH\tSR"
+  )
+
+(define_insn "pop_intr_state"
+  [(unspec_volatile [(const_int 0)] UNS_POP_INTR)]
+  ""
+  "POP\tSR"
+  )
+
+;; Clear bits in the copy of the status register that is currently
+;; saved on the stack at the top of the interrupt handler.
+(define_insn "bic_SR"
+  [(unspec_volatile [(match_operand 0 "nonmemory_operand" "ir")] UNS_BIC_SR)]
+  ""
+  "BIC.W\t%0, %O0(SP)"
+  )
+
+;; Set bits in the copy of the status register that is currently
+;; saved on the stack at the top of the interrupt handler.
+(define_insn "bis_SR"
+  [(unspec_volatile [(match_operand 0 "nonmemory_operand" "ir")] UNS_BIS_SR)]
+  ""
+  "BIS.W\t%0, %O0(SP)"
+  )
+
+;; For some reason GCC is generating (set (reg) (and (neg (reg)) (int)))
+;; very late on in the compilation and not splitting it into separate
+;; instructions, so we provide a pattern to support it here.
+(define_insn "andneghi3"
+  [(set (match_operand:HI                 0 "register_operand" "=r")
+	(and:HI (neg:HI (match_operand:HI 1 "register_operand"  "r"))
+		(match_operand            2 "immediate_operand" "n")))]
+  ""
+  "*
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      return \"MOV.W\t%1, %0 { SUB.W\t#0, %0 { AND.W\t%2, %0\";
+    else
+      return \"SUB.W\t#0, %0 { AND.W\t%2, %0\";
+  "
+  )
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI                          0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "optimize > 2 && msp430_hwmult_enabled ()"
+  "*
+    if (msp430_is_f5_mcu ())
+      return \"MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0\";
+    else
+      return \"MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0\";
+  "
+)
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI                          0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "optimize > 2 && msp430_hwmult_enabled ()"
+  "*
+    if (msp430_is_f5_mcu ())
+      return \"MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0\";
+    else
+      return \"MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0\";
+  "
+)
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI                          0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "optimize > 2 && msp430_hwmult_enabled ()"
+  "*
+    if (msp430_is_f5_mcu ())
+      return \"MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0\";
+    else
+      return \"MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0\";
+  "
+)
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI                          0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "optimize > 2 && msp430_hwmult_enabled ()"
+  "*
+    if (msp430_is_f5_mcu ())
+      return \"MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0\";
+    else
+      return \"MOV.W %L1, &0x0140 { MOV.W %H1, &0x0141 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0\";
+  "
+)
diff --git a/gcc-4.9/gcc/config/msp430/msp430.opt b/gcc-4.9/gcc/config/msp430/msp430.opt
new file mode 100644
index 000000000..5a447c0d2
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430.opt
@@ -0,0 +1,38 @@
+msim
+Target
+Use simulator runtime
+
+masm-hex
+Target Mask(ASM_HEX)
+Force assembly output to always use hex constants
+
+mmcu=
+Target ToLower Joined RejectNegative Var(target_mcu)
+Specify the MCU to build for.
+
+mcpu=
+Target Joined RejectNegative Var(target_cpu)
+Specify the ISA to build for: msp430, mdsp430x, msp430xv2
+
+mlarge
+Target Mask(LARGE) RejectNegative
+Select large model - 20-bit addresses/pointers
+
+msmall
+Target InverseMask(LARGE) RejectNegative
+Select small model - 16-bit addresses/pointers (default)
+
+mrelax
+Target Report
+Optimize opcode sizes at link time
+
+mOs
+Target Undocumented Mask(OPT_SPACE)
+
+minrt
+Target Report Mask(MINRT) RejectNegative
+Use a minimum runtime (no static initializers or ctors) for memory-constrained devices.
+
+mhwmult
+Target Report Var(ENABLE_HWMULT, 1) Init(1)
+Enable hardware multiply (except in interrupt routines)
diff --git a/gcc-4.9/gcc/config/msp430/predicates.md b/gcc-4.9/gcc/config/msp430/predicates.md
new file mode 100644
index 000000000..9a8e2da0a
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/predicates.md
@@ -0,0 +1,80 @@
+;;  Machine Description for TI MSP43* processors
+;;  Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "msp_volatile_memory_operand"
+  (and (match_code "mem")
+       (match_test ("memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0), MEM_ADDR_SPACE (op))")))
+)
+
+; TRUE for any valid general operand.  We do this because
+; general_operand refuses to match volatile memory refs.
+
+(define_predicate "msp_general_operand"
+  (ior (match_operand 0 "general_operand")
+       (match_operand 0 "msp_volatile_memory_operand"))
+)
+
+; Likewise for nonimmediate_operand.
+
+(define_predicate "msp_nonimmediate_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "msp_volatile_memory_operand"))
+)
+
+(define_predicate "ubyte_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+; TRUE for comparisons we support.
+(define_predicate "msp430_cmp_operator"
+  (match_code "eq,ne,lt,ltu,ge,geu"))
+
+; TRUE for comparisons we need to reverse.
+(define_predicate "msp430_reversible_cmp_operator"
+  (match_code "gt,gtu,le,leu"))
+
+; TRUE for constants the constant generator can produce
+(define_predicate "msp430_constgen_operator"
+  (and (match_code "const_int")
+       (match_test ("   INTVAL (op) == 0
+		     || INTVAL (op) == 1
+		     || INTVAL (op) == 2
+		     || INTVAL (op) == 4
+		     || INTVAL (op) == 8
+		     || INTVAL (op) == -1 "))))
+
+; TRUE for constants the constant generator can produce
+(define_predicate "msp430_inv_constgen_operator"
+  (and (match_code "const_int")
+       (match_test ("   INTVAL (op) == ~0
+		     || INTVAL (op) == ~1
+		     || INTVAL (op) == ~2
+		     || INTVAL (op) == ~4
+		     || INTVAL (op) == ~8
+		     || INTVAL (op) == ~(-1) "))))
+
+(define_predicate "msp430_nonsubreg_operand"
+  (match_code "reg,mem"))
+
+; TRUE for constants which are bit positions for zero_extract
+(define_predicate "msp430_bitpos"
+  (and (match_code "const_int")
+       (match_test ("   INTVAL (op) >= 0
+		     && INTVAL (op) <= 15 "))))
diff --git a/gcc-4.9/gcc/config/msp430/t-msp430 b/gcc-4.9/gcc/config/msp430/t-msp430
new file mode 100644
index 000000000..74a3c529f
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/t-msp430
@@ -0,0 +1,257 @@
+# Makefile fragment for building GCC for the TI MSP430 target.
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See
+# the GNU General Public License for more details.
+#
+# You should have received a copy of the  GNU General Public
+# License along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Enable multilibs:
+
+MULTILIB_OPTIONS    = mcpu=msp430 mlarge
+MULTILIB_DIRNAMES   = 430 large
+
+# Match -mcpu=430
+MULTILIB_MATCHES    = mcpu?msp430=mcpu?430
+
+# Match the known 430 ISA mcu names.
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe221	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe222	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe223	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe231	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe232	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe233	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe251	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe252	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430afe253	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c091
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c092
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c111	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c1111	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c112	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c1121	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c1331	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c1351	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c311s	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c312	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c313	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c314	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c315	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c323	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c325	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c336	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c337	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c412	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430c413	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430e112	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430e313	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430e315	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430e325	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430e337	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f110	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1101	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1101a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1111	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1111a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f112	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1121	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1121a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1122	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1132	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f122	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1222	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f123	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1232	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f133	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f135	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f147	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1471	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f148	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1481	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f149	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1491	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f155	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f156	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f157	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1610	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1611	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f1612	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f167	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f168	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f169	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2001	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2002	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2003	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2011	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2012	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2013	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2101	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2111	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2112	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2121	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2122	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2131	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2132	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2232	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2234	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2252	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2254	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2272	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2274	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f233	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2330	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f235	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2350	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2370	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2410	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f247	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2471	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f248	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2481	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f249	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f2491	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f412	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f413	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4132	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f415	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4152	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f417	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f423	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f423a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f425	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4250	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f425a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4260	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f427	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4270	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f427a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f435	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4351	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f436	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4361	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f437	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4371	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f438	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f439	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f447	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f448	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4481	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f449	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4491	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f477	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f478	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4783	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4784	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f479	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4793	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430f4794	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe423	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe4232	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe423a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe4242	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe425	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe4252	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe425a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe427	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe4272	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fe427a	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg4250	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg4260	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg4270	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg437	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg438	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg439	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg477	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg478	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fg479	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fw423	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fw425	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fw427	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fw428	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430fw429	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2001	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2101	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2102	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2111	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2112	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2113	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2121	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2131	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2132	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2152	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2153	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2201	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2202	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2203	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2210	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2211	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2212	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2213	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2221	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2230	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2231	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2232	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2233	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2252	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2253	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2302	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2303	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2312	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2313	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2332	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2333	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2352	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2353	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2402	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2403	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2412	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2413	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2432	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2433	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2444	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2452	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2453	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2513	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2533	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2544	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2553	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2744	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2755	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2855	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430g2955	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430i2020	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430i2021	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430i2030	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430i2031	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430i2040	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430i2041	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430l092
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430p112	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430p313	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430p315	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430p315s	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430p325	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430p337	
+MULTILIB_MATCHES += mcpu?msp430=mmcu?msp430tch5e	
+
+# Add additional MCU matches like this:
+# MULTILIB_MATCHES += mcpu?msp430x=mmcu?xxxxxxxxxx
+
+MULTILIB_EXCEPTIONS = mcpu=msp430/mlarge
+
+MULTILIB_EXTRA_OPTS =
+
+msp430-c.o: $(srcdir)/config/msp430/msp430-c.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/nds32/constants.md b/gcc-4.9/gcc/config/nds32/constants.md
new file mode 100644
index 000000000..2c317a98e
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/constants.md
@@ -0,0 +1,46 @@
+;; Constant defintions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Register numbers.
+(define_constants
+  [(R8_REGNUM  8)
+   (TA_REGNUM 15)
+   (FP_REGNUM 28)
+   (GP_REGNUM 29)
+   (LP_REGNUM 30)
+   (SP_REGNUM 31)
+  ])
+
+
+;; The unspec_volatile operation index.
+(define_c_enum "unspec_volatile_element" [
+  UNSPEC_VOLATILE_FUNC_RETURN
+  UNSPEC_VOLATILE_ISYNC
+  UNSPEC_VOLATILE_ISB
+  UNSPEC_VOLATILE_MFSR
+  UNSPEC_VOLATILE_MFUSR
+  UNSPEC_VOLATILE_MTSR
+  UNSPEC_VOLATILE_MTUSR
+  UNSPEC_VOLATILE_SETGIE_EN
+  UNSPEC_VOLATILE_SETGIE_DIS
+])
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/constraints.md b/gcc-4.9/gcc/config/nds32/constraints.md
new file mode 100644
index 000000000..1177144a4
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/constraints.md
@@ -0,0 +1,254 @@
+;; Constraint definitions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Check 16.8.7 Defining Machine-Specific Constraints for detail.
+
+;; NO contrains can be prefixed with: E F V X g i m n o p r s
+;; Machine-dependent integer: I J K L M N O P
+;; Machine-dependent floating: G H
+
+
+(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS"
+  "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA")
+
+(define_register_constraint "l" "LOW_REGS"
+  "LOW register class $r0 ~ $r7")
+
+(define_register_constraint "d" "MIDDLE_REGS"
+  "MIDDLE register class $r0 ~ $r11, $r16 ~ $r19")
+
+(define_register_constraint "h" "HIGH_REGS"
+  "HIGH register class $r12 ~ $r14, $r20 ~ $r31")
+
+
+(define_register_constraint "t" "R15_TA_REG"
+  "Temporary Assist register $ta (i.e. $r15)")
+
+(define_register_constraint "k" "STACK_REG"
+  "Stack register $sp")
+
+
+(define_constraint "Iu03"
+  "Unsigned immediate 3-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 3) && ival >= 0")))
+
+(define_constraint "In03"
+  "Negative immediate 3-bit value in the range of -7 to 0"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -7, 0)")))
+
+(define_constraint "Iu04"
+  "Unsigned immediate 4-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 4) && ival >= 0")))
+
+(define_constraint "Is05"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 4) && ival >= -(1 << 4)")))
+
+(define_constraint "Iu05"
+  "Unsigned immediate 5-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 5) && ival >= 0")))
+
+(define_constraint "In05"
+  "Negative immediate 5-bit value in the range of -31 to 0"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -31, 0)")))
+
+;; Ip05 is special and dedicated for v3 movpi45 instruction.
+;; movpi45 has imm5u field but the range is 16 ~ 47.
+(define_constraint "Ip05"
+  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
+  (and (match_code "const_int")
+       (match_test "ival < ((1 << 5) + 16)
+		    && ival >= (0 + 16)
+		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
+
+(define_constraint "Iu06"
+  "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 6)
+		    && ival >= 0
+		    && (ival % 4 == 0)
+		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
+
+(define_constraint "Iu08"
+  "Unsigned immediate 8-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 8) && ival >= 0")))
+
+(define_constraint "Iu09"
+  "Unsigned immediate 9-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 9) && ival >= 0")))
+
+
+(define_constraint "Is10"
+  "Signed immediate 10-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 9) && ival >= -(1 << 9)")))
+
+(define_constraint "Is11"
+  "Signed immediate 11-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 10) && ival >= -(1 << 10)")))
+
+
+(define_constraint "Is15"
+  "Signed immediate 15-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 14) && ival >= -(1 << 14)")))
+
+(define_constraint "Iu15"
+  "Unsigned immediate 15-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 15) && ival >= 0")))
+
+
+;; Ic15 is special and dedicated for performance extension
+;; 'bclr' (single-bit-clear) instruction.
+;; It is used in andsi3 pattern and recognized for the immediate
+;; which is NOT in the range of imm15u but OK for 'bclr' instruction.
+;; (If the immediate value IS in the range of imm15u,
+;;  we can directly use 'andi' instruction.)
+(define_constraint "Ic15"
+  "A constant which is not in the range of imm15u but ok for bclr instruction"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff8000) && nds32_can_use_bclr_p (ival)")))
+
+;; Ie15 is special and dedicated for performance extension
+;; 'bset' (single-bit-set) instruction.
+;; It is used in iorsi3 pattern and recognized for the immediate
+;; which is NOT in the range of imm15u but OK for 'bset' instruction.
+;; (If the immediate value IS in the range of imm15u,
+;;  we can directly use 'ori' instruction.)
+(define_constraint "Ie15"
+  "A constant which is not in the range of imm15u but ok for bset instruction"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff8000) && nds32_can_use_bset_p (ival)")))
+
+;; It15 is special and dedicated for performance extension
+;; 'btgl' (single-bit-toggle) instruction.
+;; It is used in xorsi3 pattern and recognized for the immediate
+;; which is NOT in the range of imm15u but OK for 'btgl' instruction.
+;; (If the immediate value IS in the range of imm15u,
+;;  we can directly use 'xori' instruction.)
+(define_constraint "It15"
+  "A constant which is not in the range of imm15u but ok for btgl instruction"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff8000) && nds32_can_use_btgl_p (ival)")))
+
+
+;; Ii15 is special and dedicated for v3 isa
+;; 'bitci' (bit-clear-immediate) instruction.
+;; It is used in andsi3 pattern and recognized for the immediate whose
+;; (~ival) value is in the range of imm15u and OK for 'bitci' instruction.
+;; For example, 'andi $r0,$r0,0xfffffffc' can be presented
+;  with 'bitci $r0,$r0,3'.
+(define_constraint "Ii15"
+  "A constant whose compliment value is in the range of imm15u
+   and ok for bitci instruction"
+  (and (match_code "const_int")
+       (match_test "nds32_can_use_bitci_p (ival)")))
+
+
+(define_constraint "Is16"
+  "Signed immediate 16-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 15) && ival >= -(1 << 15)")))
+
+(define_constraint "Is17"
+  "Signed immediate 17-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 16) && ival >= -(1 << 16)")))
+
+
+(define_constraint "Is19"
+  "Signed immediate 19-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 18) && ival >= -(1 << 18)")))
+
+
+(define_constraint "Is20"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 19) && ival >= -(1 << 19)")))
+
+
+(define_constraint "Ihig"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "const_int")
+       (match_test "(ival != 0) && ((ival & 0xfff) == 0)")))
+
+(define_constraint "Izeb"
+  "The immediate value 0xff"
+  (and (match_code "const_int")
+       (match_test "(ival == 0xff)")))
+
+(define_constraint "Izeh"
+  "The immediate value 0xffff"
+  (and (match_code "const_int")
+       (match_test "(ival == 0xffff)")))
+
+(define_constraint "Ixls"
+  "The immediate value 0x01"
+  (and (match_code "const_int")
+       (match_test "TARGET_PERF_EXT && (ival == 0x1)")))
+
+(define_constraint "Ix11"
+  "The immediate value 0x7ff"
+  (and (match_code "const_int")
+       (match_test "TARGET_PERF_EXT && (ival == 0x7ff)")))
+
+(define_constraint "Ibms"
+  "The immediate value with power of 2"
+  (and (match_code "const_int")
+       (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
+		    && (IN_RANGE (exact_log2 (ival), 0, 7))")))
+
+(define_constraint "Ifex"
+  "The immediate value with power of 2 minus 1"
+  (and (match_code "const_int")
+       (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
+		    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
+
+
+(define_memory_constraint "U33"
+  "Memory constraint for 333 format"
+  (and (match_code "mem")
+       (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
+
+(define_memory_constraint "U45"
+  "Memory constraint for 45 format"
+  (and (match_code "mem")
+       (match_test "(nds32_mem_format (op) == ADDRESS_REG)
+		    && (GET_MODE (op) == SImode)")))
+
+(define_memory_constraint "U37"
+  "Memory constraint for 37 format"
+  (and (match_code "mem")
+       (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U
+		    || nds32_mem_format (op) == ADDRESS_FP_IMM7U)
+		    && (GET_MODE (op) == SImode)")))
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/iterators.md b/gcc-4.9/gcc/config/nds32/iterators.md
new file mode 100644
index 000000000..cda049038
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/iterators.md
@@ -0,0 +1,55 @@
+;; Code and mode itertator and attribute definitions
+;; of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;----------------------------------------------------------------------------
+;; Mode iterators.
+;;----------------------------------------------------------------------------
+
+;; A list of integer modes that are up to one word long.
+(define_mode_iterator QIHISI [QI HI SI])
+
+;; A list of integer modes that are up to one half-word long.
+(define_mode_iterator QIHI [QI HI])
+
+;; A list of the modes that are up to double-word long.
+(define_mode_iterator DIDF [DI DF])
+
+
+;;----------------------------------------------------------------------------
+;; Mode attributes.
+;;----------------------------------------------------------------------------
+
+(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+
+(define_mode_attr byte [(QI "1") (HI "2") (SI "4")])
+
+
+;;----------------------------------------------------------------------------
+;; Code iterators.
+;;----------------------------------------------------------------------------
+
+
+;;----------------------------------------------------------------------------
+;; Code attributes.
+;;----------------------------------------------------------------------------
+
+
+;;----------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/nds32-doubleword.md b/gcc-4.9/gcc/config/nds32/nds32-doubleword.md
new file mode 100644
index 000000000..e68bfece5
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-doubleword.md
@@ -0,0 +1,251 @@
+;; DImode/DFmode patterns description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; -------------------------------------------------------------
+;; Move DImode/DFmode instructions.
+;; -------------------------------------------------------------
+
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+})
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (DFmode, operands[1]);
+})
+
+
+(define_insn "move_<mode>"
+  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m")
+	(match_operand:DIDF 1 "general_operand"      " r, i, m, r"))]
+  ""
+{
+  rtx addr;
+  rtx otherops[5];
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "movd44\t%0, %1";
+
+    case 1:
+      /* reg <- const_int, we ask gcc to split instruction.  */
+      return "#";
+
+    case 2:
+      /* Refer to nds32_legitimate_address_p() in nds32.c,
+         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
+         as address rtx for DImode/DFmode memory access.  */
+      addr = XEXP (operands[1], 0);
+
+      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      otherops[2] = addr;
+
+      if (REG_P (addr))
+	{
+	  /* (reg) <- (mem (reg)) */
+	  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
+	}
+      else if (GET_CODE (addr) == PLUS)
+	{
+	  /* (reg) <- (mem (plus (reg) (const_int))) */
+	  rtx op0 = XEXP (addr, 0);
+	  rtx op1 = XEXP (addr, 1);
+
+	  if (REG_P (op0))
+	    {
+	      otherops[2] = op0;
+	      otherops[3] = op1;
+	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
+	    }
+	  else
+	    {
+	      otherops[2] = op1;
+	      otherops[3] = op0;
+	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
+	    }
+
+	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
+	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
+	    {
+	      output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops);
+	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
+	    }
+	  else
+	    {
+	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
+	      output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops);
+	    }
+	}
+      else
+	{
+	  /* (reg) <- (mem (symbol_ref ...))
+	     (reg) <- (mem (const ...)) */
+	  output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops);
+	  output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops);
+	}
+
+      /* We have already used output_asm_insn() by ourself,
+         so return an empty string.  */
+      return "";
+
+    case 3:
+      /* Refer to nds32_legitimate_address_p() in nds32.c,
+         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
+         as address rtx for DImode/DFmode memory access.  */
+      addr = XEXP (operands[0], 0);
+
+      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
+      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+      otherops[2] = addr;
+
+      if (REG_P (addr))
+	{
+	  /* (mem (reg)) <- (reg) */
+	  output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
+	}
+      else if (GET_CODE (addr) == PLUS)
+	{
+	  /* (mem (plus (reg) (const_int))) <- (reg) */
+	  rtx op0 = XEXP (addr, 0);
+	  rtx op1 = XEXP (addr, 1);
+
+	  if (REG_P (op0))
+	    {
+	      otherops[2] = op0;
+	      otherops[3] = op1;
+	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
+	    }
+	  else
+	    {
+	      otherops[2] = op1;
+	      otherops[3] = op0;
+	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
+	    }
+
+	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
+	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
+	    {
+	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
+	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
+	    }
+	  else
+	    {
+	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
+	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
+	    }
+	}
+      else
+	{
+	  /* (mem (symbol_ref ...)) <- (reg)
+	     (mem (const ...))      <- (reg) */
+	  output_asm_insn ("swi.gp\t%0, [ + %2]", otherops);
+	  output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops);
+	}
+
+      /* We have already used output_asm_insn() by ourself,
+         so return an empty string.  */
+      return "";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "move,move,move,move")
+   (set_attr "length" "   4,  16,   8,   8")])
+
+(define_split
+  [(set (match_operand:DIDF 0 "register_operand"     "")
+	(match_operand:DIDF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  /* Construct lowpart rtx.  */
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[1]);
+
+  /* Construct highpart rtx.  */
+  /* Note that operands[1] can be VOIDmode constant,
+     so we need to use gen_highpart_mode().
+     Refer to gcc/emit-rtl.c for more information.  */
+  operands[4] = gen_highpart (SImode, operands[0]);
+  operands[5] = gen_highpart_mode (SImode,
+				   GET_MODE (operands[0]), operands[1]);
+
+  /* Actually we would like to create move behavior by ourself.
+     So that movsi expander could have chance to split large constant.  */
+  emit_move_insn (operands[2], operands[3]);
+  emit_move_insn (operands[4], operands[5]);
+  DONE;
+})
+
+;; There is 'movd44' instruction for DImode/DFmode movement under V3/V3M ISA.
+;; We only need to split it under V2 ISA or none-16-bit code generation.
+(define_split
+  [(set (match_operand:DIDF 0 "register_operand" "")
+	(match_operand:DIDF 1 "register_operand" ""))]
+  "reload_completed
+   && (TARGET_ISA_V2 || !TARGET_16_BIT)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+
+  /* Handle a partial overlap.  */
+  if (rtx_equal_p (operands[0], operands[3]))
+    {
+      rtx tmp0 = operands[0];
+      rtx tmp1 = operands[1];
+
+      operands[0] = operands[2];
+      operands[1] = operands[3];
+      operands[2] = tmp0;
+      operands[3] = tmp1;
+    }
+})
+
+;; -------------------------------------------------------------
+;; Boolean DImode instructions.
+;; -------------------------------------------------------------
+
+;; Nowadays, the generic code is supposed to split the DImode
+;; boolean operations and have good code generation.
+;; Unless we find out some bad cases, there is no need to
+;; define DImode boolean operations by ourself.
+
+;; -------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/nds32-intrinsic.md b/gcc-4.9/gcc/config/nds32/nds32-intrinsic.md
new file mode 100644
index 000000000..3c1d05e48
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-intrinsic.md
@@ -0,0 +1,97 @@
+;; Intrinsic patterns description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; ------------------------------------------------------------------------
+
+;; Register Transfer.
+
+(define_insn "unspec_volatile_mfsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MFSR))]
+  ""
+  "mfsr\t%0, %V1"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_mfusr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MFUSR))]
+  ""
+  "mfusr\t%0, %V1"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_mtsr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTSR)]
+  ""
+  "mtsr\t%0, %V1"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_mtusr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTUSR)]
+  ""
+  "mtusr\t%0, %V1"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+;; ------------------------------------------------------------------------
+
+;; Interrupt Instructions.
+
+(define_insn "unspec_volatile_setgie_en"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETGIE_EN)]
+  ""
+  "setgie.e"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_volatile_setgie_dis"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETGIE_DIS)]
+  ""
+  "setgie.d"
+  [(set_attr "type" "misc")]
+)
+
+;; ------------------------------------------------------------------------
+
+;; Cache Synchronization Instructions
+
+(define_insn "unspec_volatile_isync"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_ISYNC)]
+  ""
+  "isync\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_volatile_isb"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_ISB)]
+  ""
+  "isb"
+  [(set_attr "type" "misc")]
+)
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/nds32-modes.def b/gcc-4.9/gcc/config/nds32/nds32-modes.def
new file mode 100644
index 000000000..041560eb7
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-modes.def
@@ -0,0 +1,21 @@
+/* Extra machine modes of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* So far, there is no need to define any modes for nds32 target.  */
diff --git a/gcc-4.9/gcc/config/nds32/nds32-multiple.md b/gcc-4.9/gcc/config/nds32/nds32-multiple.md
new file mode 100644
index 000000000..6783cbaf8
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-multiple.md
@@ -0,0 +1,410 @@
+;; Load/Store Multiple patterns description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.for NDS32.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Load Multiple Insns.
+;;
+;; operands[0] is the first of the consecutive registers.
+;; operands[1] is the first memory location.
+;; operands[2] is the number of consecutive registers.
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  ""
+{
+  int maximum;
+
+  /* Because reduced-set regsiters has few registers
+     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot
+     be used for register allocation),
+     using 8 registers for load_multiple may easily consume all of them.
+     It makes register allocation/spilling hard to work.
+     So we only allow maximum=4 registers for load_multiple
+     under reduced-set registers.  */
+  if (TARGET_REDUCED_REGS)
+    maximum = 4;
+  else
+    maximum = 8;
+
+  /* Here are the conditions that must be all passed,
+     otherwise we have to FAIL this rtx generation:
+       1. The number of consecutive registers must be integer.
+       2. Maximum 4 or 8 registers for lmw.bi instruction
+          (based on this nds32-multiple.md design).
+       3. Minimum 2 registers for lmw.bi instruction
+          (based on this nds32-multiple.md design).
+       4. operands[0] must be register for sure.
+       5. operands[1] must be memory for sure.
+       6. Do not cross $r15 register because it is not allocatable.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) > maximum
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[0]) != REG
+      || GET_CODE (operands[1]) != MEM
+      || REGNO (operands[0]) + INTVAL (operands[2]) > TA_REGNUM)
+    FAIL;
+
+  /* For (mem addr), we force_reg on addr here,
+     so that nds32_expand_load_multiple can easily use it.  */
+  operands[3] = nds32_expand_load_multiple (REGNO (operands[0]),
+					    INTVAL (operands[2]),
+					    force_reg (SImode,
+						       XEXP (operands[1], 0)),
+					    operands[1]);
+})
+
+;; Ordinary Load Multiple.
+
+(define_insn "*lmwsi8"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))])]
+  "(XVECLEN (operands[0], 0) == 8)"
+  "lmw.bi\t%2, [%1], %9, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "*lmwsi7"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))])]
+  "(XVECLEN (operands[0], 0) == 7)"
+  "lmw.bi\t%2, [%1], %8, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "*lmwsi6"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))])]
+  "(XVECLEN (operands[0], 0) == 6)"
+  "lmw.bi\t%2, [%1], %7, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "*lmwsi5"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))])]
+  "(XVECLEN (operands[0], 0) == 5)"
+  "lmw.bi\t%2, [%1], %6, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "*lmwsi4"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
+  "(XVECLEN (operands[0], 0) == 4)"
+  "lmw.bi\t%2, [%1], %5, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "*lmwsi3"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
+  "(XVECLEN (operands[0], 0) == 3)"
+  "lmw.bi\t%2, [%1], %4, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "*lmwsi2"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
+  "(XVECLEN (operands[0], 0) == 2)"
+  "lmw.bi\t%2, [%1], %3, 0x0"
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+
+;; Store Multiple Insns.
+;;
+;; operands[0] is the first memory location.
+;; opernads[1] is the first of the consecutive registers.
+;; operands[2] is the number of consecutive registers.
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  ""
+{
+  int maximum;
+
+  /* Because reduced-set regsiters has few registers
+     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot
+     be used for register allocation),
+     using 8 registers for store_multiple may easily consume all of them.
+     It makes register allocation/spilling hard to work.
+     So we only allow maximum=4 registers for store_multiple
+     under reduced-set registers.  */
+  if (TARGET_REDUCED_REGS)
+    maximum = 4;
+  else
+    maximum = 8;
+
+  /* Here are the conditions that must be all passed,
+     otherwise we have to FAIL this rtx generation:
+       1. The number of consecutive registers must be integer.
+       2. Maximum 4 or 8 registers for smw.bi instruction
+          (based on this nds32-multiple.md design).
+       3. Minimum 2 registers for smw.bi instruction
+          (based on this nds32-multiple.md design).
+       4. operands[0] must be memory for sure.
+       5. operands[1] must be register for sure.
+       6. Do not cross $r15 register because it is not allocatable.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) > maximum
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[0]) != MEM
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM)
+    FAIL;
+
+  /* For (mem addr), we force_reg on addr here,
+     so that nds32_expand_store_multiple can easily use it.  */
+  operands[3] = nds32_expand_store_multiple (REGNO (operands[1]),
+					     INTVAL (operands[2]),
+					     force_reg (SImode,
+							XEXP (operands[0], 0)),
+					     operands[0]);
+})
+
+;; Ordinary Store Multiple.
+
+(define_insn "*stmsi8"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 8 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
+	  (match_operand:SI 9 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 8)"
+  "smw.bi\t%2, [%1], %9, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "*stmsi7"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 8 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 7)"
+  "smw.bi\t%2, [%1], %8, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "*stmsi6"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 7 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 6)"
+  "smw.bi\t%2, [%1], %7, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "*stmsi5"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 6 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 5)"
+  "smw.bi\t%2, [%1], %6, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "*stmsi4"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 4)"
+  "smw.bi\t%2, [%1], %5, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "*stmsi3"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 3)"
+  "smw.bi\t%2, [%1], %4, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "*stmsi2"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 2)"
+  "smw.bi\t%2, [%1], %3, 0x0"
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+;; Move a block of memory if it is word aligned and MORE than 2 words long.
+;; We could let this apply for blocks of less than this, but it clobbers so
+;; many registers that there is then probably a better way.
+;;
+;; operands[0] is the destination block of memory.
+;; operands[1] is the source block of memory.
+;; operands[2] is the number of bytes to move.
+;; operands[3] is the known shared alignment.
+
+(define_expand "movmemqi"
+  [(match_operand:BLK 0 "general_operand" "")
+   (match_operand:BLK 1 "general_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  ""
+{
+  if (nds32_expand_movmemqi (operands[0],
+			     operands[1],
+			     operands[2],
+			     operands[3]))
+    DONE;
+
+  FAIL;
+})
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/nds32-opts.h b/gcc-4.9/gcc/config/nds32/nds32-opts.h
new file mode 100644
index 000000000..b05e2d449
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-opts.h
@@ -0,0 +1,35 @@
+/* Definitions for option handling of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef NDS32_OPTS_H
+#define NDS32_OPTS_H
+
+#define NDS32_DEFAULT_CACHE_BLOCK_SIZE 16
+#define NDS32_DEFAULT_ISR_VECTOR_SIZE (TARGET_ISA_V3 ? 4 : 16)
+
+/* The various ANDES ISA.  */
+enum nds32_arch_type
+{
+  ARCH_V2,
+  ARCH_V3,
+  ARCH_V3M
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/nds32/nds32-peephole2.md b/gcc-4.9/gcc/config/nds32/nds32-peephole2.md
new file mode 100644
index 000000000..f9cd6dfc7
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-peephole2.md
@@ -0,0 +1,25 @@
+;; define_peephole2 optimization patterns of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Use define_peephole and define_peephole2 to handle possible
+;; target-specific optimization in this file.
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/nds32-protos.h b/gcc-4.9/gcc/config/nds32/nds32-protos.h
new file mode 100644
index 000000000..6d94027bb
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32-protos.h
@@ -0,0 +1,128 @@
+/* Prototypes for exported functions of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+/* Defining Data Structures for Per-function Information.  */
+
+extern void nds32_init_expanders (void);
+
+
+/* Register Usage.  */
+
+/* -- How Values Fit in Registers.  */
+
+extern int nds32_hard_regno_nregs (int, enum machine_mode);
+extern int nds32_hard_regno_mode_ok (int, enum machine_mode);
+
+
+/* Register Classes.  */
+
+extern enum reg_class nds32_regno_reg_class (int);
+
+
+/* Stack Layout and Calling Conventions.  */
+
+/* -- Basic Stack Layout.  */
+
+extern rtx nds32_return_addr_rtx (int, rtx);
+
+/* -- Eliminating Frame Pointer and Arg Pointer.  */
+
+extern HOST_WIDE_INT nds32_initial_elimination_offset (unsigned int,
+						       unsigned int);
+
+/* -- Passing Arguments in Registers.  */
+
+extern void nds32_init_cumulative_args (CUMULATIVE_ARGS *,
+					tree, rtx, tree, int);
+
+/* -- Function Entry and Exit.  */
+
+extern void nds32_expand_prologue (void);
+extern void nds32_expand_epilogue (void);
+extern void nds32_expand_prologue_v3push (void);
+extern void nds32_expand_epilogue_v3pop (void);
+
+/* ------------------------------------------------------------------------ */
+
+/* Auxiliary functions for auxiliary macros in nds32.h.  */
+
+extern bool nds32_ls_333_p (rtx, rtx, rtx, enum machine_mode);
+
+/* Auxiliary functions for expanding rtl used in nds32-multiple.md.  */
+
+extern rtx nds32_expand_load_multiple (int, int, rtx, rtx);
+extern rtx nds32_expand_store_multiple (int, int, rtx, rtx);
+extern int nds32_expand_movmemqi (rtx, rtx, rtx, rtx);
+
+/* Auxiliary functions for multiple load/store predicate checking.  */
+
+extern bool nds32_valid_multiple_load_store (rtx, bool);
+
+/* Auxiliary functions for stack operation predicate checking.  */
+
+extern bool nds32_valid_stack_push_pop (rtx, bool);
+
+/* Auxiliary functions for bit operation detection.  */
+
+extern int nds32_can_use_bclr_p (int);
+extern int nds32_can_use_bset_p (int);
+extern int nds32_can_use_btgl_p (int);
+
+extern int nds32_can_use_bitci_p (int);
+
+/* Auxiliary function for 'Computing the Length of an Insn'.  */
+
+extern int nds32_adjust_insn_length (rtx, int);
+
+/* Auxiliary functions for FP_AS_GP detection.  */
+
+extern bool nds32_symbol_load_store_p (rtx);
+extern int nds32_fp_as_gp_check_available (void);
+
+/* Auxiliary functions for jump table generation.  */
+
+extern const char *nds32_output_casesi_pc_relative (rtx *);
+extern const char *nds32_output_casesi (rtx *);
+
+/* Auxiliary functions to identify 16 bit addresing mode.  */
+
+extern enum nds32_16bit_address_type nds32_mem_format (rtx);
+
+/* Auxiliary functions to output assembly code.  */
+
+extern const char *nds32_output_16bit_store (rtx *, int);
+extern const char *nds32_output_16bit_load (rtx *, int);
+extern const char *nds32_output_32bit_store (rtx *, int);
+extern const char *nds32_output_32bit_load (rtx *, int);
+extern const char *nds32_output_32bit_load_s (rtx *, int);
+
+/* Auxiliary functions to output stack push/pop instruction.  */
+
+extern const char *nds32_output_stack_push (void);
+extern const char *nds32_output_stack_pop (void);
+
+/* Auxiliary functions to decide output alignment or not.  */
+
+extern int nds32_target_alignment (rtx);
+
+/* ------------------------------------------------------------------------ */
diff --git a/gcc-4.9/gcc/config/nds32/nds32.c b/gcc-4.9/gcc/config/nds32/nds32.c
new file mode 100644
index 000000000..645d8dda3
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32.c
@@ -0,0 +1,5724 @@
+/* Subroutines used for code generation of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* This file is divided into five parts:
+
+     PART 1: Auxiliary static variable definitions and
+             target hook static variable definitions.
+
+     PART 2: Auxiliary static function definitions.
+
+     PART 3: Implement target hook stuff definitions.
+
+     PART 4: Implemet extern function definitions,
+             the prototype is in nds32-protos.h.
+
+     PART 5: Initialize target hook structure and definitions.  */
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 1: Auxiliary static variable definitions and
+           target hook static variable definitions.  */
+
+/* Refer to nds32.h, there are maximum 73 isr vectors in nds32 architecture.
+   0 for reset handler with __attribute__((reset())),
+   1-8 for exception handler with __attribute__((exception(1,...,8))),
+   and 9-72 for interrupt handler with __attribute__((interrupt(0,...,63))).
+   We use an array to record essential information for each vector.  */
+static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
+
+/* Define intrinsic register names.
+   Please refer to nds32_intrinsic.h file, the index is corresponding to
+   'enum nds32_intrinsic_registers' data type values.
+   NOTE that the base value starting from 1024.  */
+static const char * const nds32_intrinsic_register_names[] =
+{
+  "$PSW", "$IPSW", "$ITYPE", "$IPC"
+};
+
+/* Defining target-specific uses of __attribute__.  */
+static const struct attribute_spec nds32_attribute_table[] =
+{
+  /* Syntax: { name, min_len, max_len, decl_required, type_required,
+               function_type_required, handler, affects_type_identity } */
+
+  /* The interrupt vid: [0-63]+ (actual vector number starts from 9 to 72).  */
+  { "interrupt",    1, 64, false, false, false, NULL, false },
+  /* The exception vid: [1-8]+  (actual vector number starts from 1 to 8).  */
+  { "exception",    1,  8, false, false, false, NULL, false },
+  /* Argument is user's interrupt numbers.  The vector number is always 0.  */
+  { "reset",        1,  1, false, false, false, NULL, false },
+
+  /* The attributes describing isr nested type.  */
+  { "nested",       0,  0, false, false, false, NULL, false },
+  { "not_nested",   0,  0, false, false, false, NULL, false },
+  { "nested_ready", 0,  0, false, false, false, NULL, false },
+
+  /* The attributes describing isr register save scheme.  */
+  { "save_all",     0,  0, false, false, false, NULL, false },
+  { "partial_save", 0,  0, false, false, false, NULL, false },
+
+  /* The attributes used by reset attribute.  */
+  { "nmi",          1,  1, false, false, false, NULL, false },
+  { "warm",         1,  1, false, false, false, NULL, false },
+
+  /* The attribute telling no prologue/epilogue.  */
+  { "naked",        0,  0, false, false, false, NULL, false },
+
+  /* The last attribute spec is set to be NULL.  */
+  { NULL,           0,  0, false, false, false, NULL, false }
+};
+
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 2: Auxiliary static function definitions.  */
+
+/* Function to save and restore machine-specific function data.  */
+static struct machine_function *
+nds32_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine = ggc_alloc_cleared_machine_function ();
+
+  /* Initially assume this function needs prologue/epilogue.  */
+  machine->naked_p = 0;
+
+  /* Initially assume this function does NOT use fp_as_gp optimization.  */
+  machine->fp_as_gp_p = 0;
+
+  return machine;
+}
+
+/* Function to compute stack frame size and
+   store into cfun->machine structure.  */
+static void
+nds32_compute_stack_frame (void)
+{
+  int r;
+  int block_size;
+
+  /* Because nds32_compute_stack_frame() will be called from different place,
+     everytime we enter this function, we have to assume this function
+     needs prologue/epilogue.  */
+  cfun->machine->naked_p = 0;
+
+  /* Get variadic arguments size to prepare pretend arguments and
+     push them into stack at prologue.
+     Currently, we do not push variadic arguments by ourself.
+     We have GCC handle all the works.
+     The caller will push all corresponding nameless arguments into stack,
+     and the callee is able to retrieve them without problems.
+     These variables are still preserved in case one day
+     we would like caller passing arguments with registers.  */
+  cfun->machine->va_args_size = 0;
+  cfun->machine->va_args_first_regno = SP_REGNUM;
+  cfun->machine->va_args_last_regno  = SP_REGNUM;
+
+  /* Get local variables, incoming variables, and temporary variables size.
+     Note that we need to make sure it is 8-byte alignment because
+     there may be no padding bytes if we are using LRA.  */
+  cfun->machine->local_size = NDS32_ROUND_UP_DOUBLE_WORD (get_frame_size ());
+
+  /* Get outgoing arguments size.  */
+  cfun->machine->out_args_size = crtl->outgoing_args_size;
+
+  /* If $fp value is required to be saved on stack, it needs 4 bytes space.
+     Check whether $fp is ever live.  */
+  cfun->machine->fp_size = (df_regs_ever_live_p (FP_REGNUM)) ? 4 : 0;
+
+  /* If $gp value is required to be saved on stack, it needs 4 bytes space.
+     Check whether we are using PIC code genration.  */
+  cfun->machine->gp_size = (flag_pic) ? 4 : 0;
+
+  /* If $lp value is required to be saved on stack, it needs 4 bytes space.
+     Check whether $lp is ever live.  */
+  cfun->machine->lp_size = (df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
+
+  /* Initially there is no padding bytes.  */
+  cfun->machine->callee_saved_area_padding_bytes = 0;
+
+  /* Calculate the bytes of saving callee-saved registers on stack.  */
+  cfun->machine->callee_saved_regs_size = 0;
+  cfun->machine->callee_saved_regs_first_regno = SP_REGNUM;
+  cfun->machine->callee_saved_regs_last_regno  = SP_REGNUM;
+  /* Currently, there is no need to check $r28~$r31
+     because we will save them in another way.  */
+  for (r = 0; r < 28; r++)
+    {
+      if (NDS32_REQUIRED_CALLEE_SAVED_P (r))
+	{
+	  /* Mark the first required callee-saved register
+	     (only need to set it once).
+	     If first regno == SP_REGNUM, we can tell that
+	     it is the first time to be here.  */
+	  if (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM)
+	    cfun->machine->callee_saved_regs_first_regno = r;
+	  /* Mark the last required callee-saved register.  */
+	  cfun->machine->callee_saved_regs_last_regno = r;
+	}
+    }
+
+  /* Check if this function can omit prologue/epilogue code fragment.
+     If there is 'naked' attribute in this function,
+     we can set 'naked_p' flag to indicate that
+     we do not have to generate prologue/epilogue.
+     Or, if all the following conditions succeed,
+     we can set this function 'naked_p' as well:
+       condition 1: first_regno == last_regno == SP_REGNUM,
+                    which means we do not have to save
+                    any callee-saved registers.
+       condition 2: Both $lp and $fp are NOT live in this function,
+                    which means we do not need to save them.
+       condition 3: There is no local_size, which means
+                    we do not need to adjust $sp.  */
+  if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      || (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_regs_last_regno == SP_REGNUM
+	  && !df_regs_ever_live_p (FP_REGNUM)
+	  && !df_regs_ever_live_p (LP_REGNUM)
+	  && cfun->machine->local_size == 0))
+    {
+      /* Set this function 'naked_p' and
+         other functions can check this flag.  */
+      cfun->machine->naked_p = 1;
+
+      /* No need to save $fp, $gp, and $lp.
+         We should set these value to be zero
+         so that nds32_initial_elimination_offset() can work properly.  */
+      cfun->machine->fp_size = 0;
+      cfun->machine->gp_size = 0;
+      cfun->machine->lp_size = 0;
+
+      /* If stack usage computation is required,
+         we need to provide the static stack size.  */
+      if (flag_stack_usage_info)
+	current_function_static_stack_size = 0;
+
+      /* No need to do following adjustment, return immediately.  */
+      return;
+    }
+
+  /* Adjustment for v3push instructions:
+     If we are using v3push (push25/pop25) instructions,
+     we need to make sure Rb is $r6 and Re is
+     located on $r6, $r8, $r10, or $r14.
+     Some results above will be discarded and recomputed.
+     Note that it is only available under V3/V3M ISA.  */
+  if (TARGET_V3PUSH)
+    {
+      /* Recompute:
+           cfun->machine->fp_size
+           cfun->machine->gp_size
+           cfun->machine->lp_size
+           cfun->machine->callee_saved_regs_first_regno
+           cfun->machine->callee_saved_regs_last_regno */
+
+      /* For v3push instructions, $fp, $gp, and $lp are always saved.  */
+      cfun->machine->fp_size = 4;
+      cfun->machine->gp_size = 4;
+      cfun->machine->lp_size = 4;
+
+      /* Remember to set Rb = $r6.  */
+      cfun->machine->callee_saved_regs_first_regno = 6;
+
+      if (cfun->machine->callee_saved_regs_last_regno <= 6)
+	{
+	  /* Re = $r6 */
+	  cfun->machine->callee_saved_regs_last_regno = 6;
+	}
+      else if (cfun->machine->callee_saved_regs_last_regno <= 8)
+	{
+	  /* Re = $r8 */
+	  cfun->machine->callee_saved_regs_last_regno = 8;
+	}
+      else if (cfun->machine->callee_saved_regs_last_regno <= 10)
+	{
+	  /* Re = $r10 */
+	  cfun->machine->callee_saved_regs_last_regno = 10;
+	}
+      else if (cfun->machine->callee_saved_regs_last_regno <= 14)
+	{
+	  /* Re = $r14 */
+	  cfun->machine->callee_saved_regs_last_regno = 14;
+	}
+      else if (cfun->machine->callee_saved_regs_last_regno == SP_REGNUM)
+	{
+	  /* If last_regno is SP_REGNUM, which means
+	     it is never changed, so set it to Re = $r6.  */
+	  cfun->machine->callee_saved_regs_last_regno = 6;
+	}
+      else
+	{
+	  /* The program flow should not go here.  */
+	  gcc_unreachable ();
+	}
+    }
+
+  /* We have correctly set callee_saved_regs_first_regno
+     and callee_saved_regs_last_regno.
+     Initially, the callee_saved_regs_size is supposed to be 0.
+     As long as callee_saved_regs_last_regno is not SP_REGNUM,
+     we can update callee_saved_regs_size with new size.  */
+  if (cfun->machine->callee_saved_regs_last_regno != SP_REGNUM)
+    {
+      /* Compute pushed size of callee-saved registers.  */
+      cfun->machine->callee_saved_regs_size
+	= 4 * (cfun->machine->callee_saved_regs_last_regno
+	       - cfun->machine->callee_saved_regs_first_regno
+	       + 1);
+    }
+
+  /* Important: We need to make sure that
+                (va_args_size + fp_size + gp_size
+                 + lp_size + callee_saved_regs_size)
+                is 8-byte alignment.
+                If it is not, calculate the padding bytes.  */
+  block_size = cfun->machine->va_args_size
+	       + cfun->machine->fp_size
+	       + cfun->machine->gp_size
+	       + cfun->machine->lp_size
+	       + cfun->machine->callee_saved_regs_size;
+  if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
+    {
+      cfun->machine->callee_saved_area_padding_bytes
+	= NDS32_ROUND_UP_DOUBLE_WORD (block_size) - block_size;
+    }
+
+  /* If stack usage computation is required,
+     we need to provide the static stack size.  */
+  if (flag_stack_usage_info)
+    {
+      current_function_static_stack_size
+	= NDS32_ROUND_UP_DOUBLE_WORD (block_size)
+	  + cfun->machine->local_size
+	  + cfun->machine->out_args_size;
+    }
+}
+
+/* Function to create a parallel rtx pattern
+   which presents stack push multiple behavior.
+   The overall concept are:
+     "push registers to memory",
+     "adjust stack pointer".  */
+static rtx
+nds32_gen_stack_push_multiple (rtx Rb, rtx Re,
+			       rtx En4 ATTRIBUTE_UNUSED)
+{
+  int regno;
+  int extra_count;
+  int num_use_regs;
+  int par_index;
+  int offset;
+
+  rtx reg;
+  rtx mem;
+  rtx push_rtx;
+  rtx adjust_sp_rtx;
+  rtx parallel_insn;
+
+  /* We need to provide a customized rtx which contains
+     necessary information for data analysis,
+     so we create a parallel rtx like this:
+     (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+                     (reg:SI Rb))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+                     (reg:SI Rb+1))
+                ...
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+                     (reg:SI Re))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+                     (reg:SI FP_REGNUM))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+                     (reg:SI GP_REGNUM))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+                     (reg:SI LP_REGNUM))
+                (set (reg:SI SP_REGNUM)
+                     (plus (reg:SI SP_REGNUM) (const_int -32)))]) */
+
+  /* Calculate the number of registers that will be pushed.  */
+  extra_count = 0;
+  if (cfun->machine->fp_size)
+    extra_count++;
+  if (cfun->machine->gp_size)
+    extra_count++;
+  if (cfun->machine->lp_size)
+    extra_count++;
+  /* Note that Rb and Re may be SP_REGNUM.  DO NOT count it in.  */
+  if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM)
+    num_use_regs = extra_count;
+  else
+    num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count;
+
+  /* In addition to used registers,
+     we need one more space for (set sp sp-x) rtx.  */
+  parallel_insn = gen_rtx_PARALLEL (VOIDmode,
+				    rtvec_alloc (num_use_regs + 1));
+  par_index = 0;
+
+  /* Initialize offset and start to create push behavior.  */
+  offset = -(num_use_regs * 4);
+
+  /* Create (set mem regX) from Rb, Rb+1 up to Re.  */
+  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+    {
+      /* Rb and Re may be SP_REGNUM.
+         We need to break this loop immediately.  */
+      if (regno == SP_REGNUM)
+        break;
+
+      reg = gen_rtx_REG (SImode, regno);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+      RTX_FRAME_RELATED_P (push_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  /* Create (set mem fp), (set mem gp), and (set mem lp) if necessary.  */
+  if (cfun->machine->fp_size)
+    {
+      reg = gen_rtx_REG (SImode, FP_REGNUM);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+      RTX_FRAME_RELATED_P (push_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+  if (cfun->machine->gp_size)
+    {
+      reg = gen_rtx_REG (SImode, GP_REGNUM);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+      RTX_FRAME_RELATED_P (push_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+  if (cfun->machine->lp_size)
+    {
+      reg = gen_rtx_REG (SImode, LP_REGNUM);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+      RTX_FRAME_RELATED_P (push_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  /* Create (set sp sp-x).  */
+
+  /* We need to re-calculate the offset value again for adjustment.  */
+  offset = -(num_use_regs * 4);
+  adjust_sp_rtx
+    = gen_rtx_SET (VOIDmode,
+		   stack_pointer_rtx,
+		   plus_constant (Pmode, stack_pointer_rtx, offset));
+  XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
+  RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
+
+  return parallel_insn;
+}
+
+/* Function to create a parallel rtx pattern
+   which presents stack pop multiple behavior.
+   The overall concept are:
+     "pop registers from memory",
+     "adjust stack pointer".  */
+static rtx
+nds32_gen_stack_pop_multiple (rtx Rb, rtx Re,
+			      rtx En4 ATTRIBUTE_UNUSED)
+{
+  int regno;
+  int extra_count;
+  int num_use_regs;
+  int par_index;
+  int offset;
+
+  rtx reg;
+  rtx mem;
+  rtx pop_rtx;
+  rtx adjust_sp_rtx;
+  rtx parallel_insn;
+
+  /* We need to provide a customized rtx which contains
+     necessary information for data analysis,
+     so we create a parallel rtx like this:
+     (parallel [(set (reg:SI Rb)
+                     (mem (reg:SI SP_REGNUM)))
+                (set (reg:SI Rb+1)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+                ...
+                (set (reg:SI Re)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+                (set (reg:SI FP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+                (set (reg:SI GP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+                (set (reg:SI LP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+                (set (reg:SI SP_REGNUM)
+                     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+
+  /* Calculate the number of registers that will be poped.  */
+  extra_count = 0;
+  if (cfun->machine->fp_size)
+    extra_count++;
+  if (cfun->machine->gp_size)
+    extra_count++;
+  if (cfun->machine->lp_size)
+    extra_count++;
+  /* Note that Rb and Re may be SP_REGNUM.  DO NOT count it in.  */
+  if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM)
+    num_use_regs = extra_count;
+  else
+    num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count;
+
+  /* In addition to used registers,
+     we need one more space for (set sp sp+x) rtx.  */
+  parallel_insn = gen_rtx_PARALLEL (VOIDmode,
+				    rtvec_alloc (num_use_regs + 1));
+  par_index = 0;
+
+  /* Initialize offset and start to create pop behavior.  */
+  offset = 0;
+
+  /* Create (set regX mem) from Rb, Rb+1 up to Re.  */
+  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+    {
+      /* Rb and Re may be SP_REGNUM.
+         We need to break this loop immediately.  */
+      if (regno == SP_REGNUM)
+        break;
+
+      reg = gen_rtx_REG (SImode, regno);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+      XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+      RTX_FRAME_RELATED_P (pop_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  /* Create (set fp mem), (set gp mem), and (set lp mem) if necessary.  */
+  if (cfun->machine->fp_size)
+    {
+      reg = gen_rtx_REG (SImode, FP_REGNUM);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+      XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+      RTX_FRAME_RELATED_P (pop_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+  if (cfun->machine->gp_size)
+    {
+      reg = gen_rtx_REG (SImode, GP_REGNUM);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+      XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+      RTX_FRAME_RELATED_P (pop_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+  if (cfun->machine->lp_size)
+    {
+      reg = gen_rtx_REG (SImode, LP_REGNUM);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+      XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+      RTX_FRAME_RELATED_P (pop_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  /* Create (set sp sp+x).  */
+
+  /* The offset value is already in place.  No need to re-calculate it.  */
+  adjust_sp_rtx
+    = gen_rtx_SET (VOIDmode,
+		   stack_pointer_rtx,
+		   plus_constant (Pmode, stack_pointer_rtx, offset));
+  XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
+  RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
+
+  return parallel_insn;
+}
+
+/* Function to create a parallel rtx pattern
+   which presents stack v3push behavior.
+   The overall concept are:
+     "push registers to memory",
+     "adjust stack pointer".  */
+static rtx
+nds32_gen_stack_v3push (rtx Rb,
+			rtx Re,
+			rtx En4 ATTRIBUTE_UNUSED,
+			rtx imm8u)
+{
+  int regno;
+  int num_use_regs;
+  int par_index;
+  int offset;
+
+  rtx reg;
+  rtx mem;
+  rtx push_rtx;
+  rtx adjust_sp_rtx;
+  rtx parallel_insn;
+
+  /* We need to provide a customized rtx which contains
+     necessary information for data analysis,
+     so we create a parallel rtx like this:
+     (parallel [
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+                     (reg:SI Rb))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+                     (reg:SI Rb+1))
+                ...
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+                     (reg:SI Re))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+                     (reg:SI FP_REGNUM))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+                     (reg:SI GP_REGNUM))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+                     (reg:SI LP_REGNUM))
+                (set (reg:SI SP_REGNUM)
+                     (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */
+
+  /* Calculate the number of registers that will be pushed.
+     Since $fp, $gp, and $lp is always pushed with v3push instruction,
+     we need to count these three registers.
+     Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+     So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+  num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3;
+
+  /* In addition to used registers,
+     we need one more space for (set sp sp-x-imm8u) rtx.  */
+  parallel_insn = gen_rtx_PARALLEL (VOIDmode,
+				    rtvec_alloc (num_use_regs + 1));
+  par_index = 0;
+
+  /* Initialize offset and start to create push behavior.  */
+  offset = -(num_use_regs * 4);
+
+  /* Create (set mem regX) from Rb, Rb+1 up to Re.
+     Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+     So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+    {
+      reg = gen_rtx_REG (SImode, regno);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+      RTX_FRAME_RELATED_P (push_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  /* Create (set mem fp).  */
+  reg = gen_rtx_REG (SImode, FP_REGNUM);
+  mem = gen_frame_mem (SImode, plus_constant (Pmode,
+					      stack_pointer_rtx,
+					      offset));
+  push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+  XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+  RTX_FRAME_RELATED_P (push_rtx) = 1;
+  offset = offset + 4;
+  par_index++;
+  /* Create (set mem gp).  */
+  reg = gen_rtx_REG (SImode, GP_REGNUM);
+  mem = gen_frame_mem (SImode, plus_constant (Pmode,
+					      stack_pointer_rtx,
+					      offset));
+  push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+  XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+  RTX_FRAME_RELATED_P (push_rtx) = 1;
+  offset = offset + 4;
+  par_index++;
+  /* Create (set mem lp).  */
+  reg = gen_rtx_REG (SImode, LP_REGNUM);
+  mem = gen_frame_mem (SImode, plus_constant (Pmode,
+					      stack_pointer_rtx,
+					      offset));
+  push_rtx = gen_rtx_SET (VOIDmode, mem, reg);
+  XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+  RTX_FRAME_RELATED_P (push_rtx) = 1;
+  offset = offset + 4;
+  par_index++;
+
+  /* Create (set sp sp-x-imm8u).  */
+
+  /* We need to re-calculate the offset value again for adjustment.  */
+  offset = -(num_use_regs * 4);
+  adjust_sp_rtx
+    = gen_rtx_SET (VOIDmode,
+		   stack_pointer_rtx,
+		   plus_constant (Pmode,
+				  stack_pointer_rtx,
+				  offset - INTVAL (imm8u)));
+  XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
+  RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
+
+  return parallel_insn;
+}
+
+/* Function to create a parallel rtx pattern
+   which presents stack v3pop behavior.
+   The overall concept are:
+     "pop registers from memory",
+     "adjust stack pointer".  */
+static rtx
+nds32_gen_stack_v3pop (rtx Rb,
+		       rtx Re,
+		       rtx En4 ATTRIBUTE_UNUSED,
+		       rtx imm8u)
+{
+  int regno;
+  int num_use_regs;
+  int par_index;
+  int offset;
+
+  rtx reg;
+  rtx mem;
+  rtx pop_rtx;
+  rtx adjust_sp_rtx;
+  rtx parallel_insn;
+
+  /* We need to provide a customized rtx which contains
+     necessary information for data analysis,
+     so we create a parallel rtx like this:
+     (parallel [(set (reg:SI Rb)
+                     (mem (reg:SI SP_REGNUM)))
+                (set (reg:SI Rb+1)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+                ...
+                (set (reg:SI Re)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+                (set (reg:SI FP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+                (set (reg:SI GP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+                (set (reg:SI LP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+                (set (reg:SI SP_REGNUM)
+                     (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */
+
+  /* Calculate the number of registers that will be poped.
+     Since $fp, $gp, and $lp is always poped with v3pop instruction,
+     we need to count these three registers.
+     Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+     So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+  num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3;
+
+  /* In addition to used registers,
+     we need one more space for (set sp sp+x+imm8u) rtx.  */
+  parallel_insn = gen_rtx_PARALLEL (VOIDmode,
+				    rtvec_alloc (num_use_regs + 1));
+  par_index = 0;
+
+  /* Initialize offset and start to create pop behavior.  */
+  offset = 0;
+
+  /* Create (set regX mem) from Rb, Rb+1 up to Re.
+     Under v3pop, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+     So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+    {
+      reg = gen_rtx_REG (SImode, regno);
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  offset));
+      pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+      XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+      RTX_FRAME_RELATED_P (pop_rtx) = 1;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  /* Create (set fp mem).  */
+  reg = gen_rtx_REG (SImode, FP_REGNUM);
+  mem = gen_frame_mem (SImode, plus_constant (Pmode,
+					      stack_pointer_rtx,
+					      offset));
+  pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+  XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+  RTX_FRAME_RELATED_P (pop_rtx) = 1;
+  offset = offset + 4;
+  par_index++;
+  /* Create (set gp mem).  */
+  reg = gen_rtx_REG (SImode, GP_REGNUM);
+  mem = gen_frame_mem (SImode, plus_constant (Pmode,
+					      stack_pointer_rtx,
+					      offset));
+  pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+  XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+  RTX_FRAME_RELATED_P (pop_rtx) = 1;
+  offset = offset + 4;
+  par_index++;
+  /* Create (set lp mem ).  */
+  reg = gen_rtx_REG (SImode, LP_REGNUM);
+  mem = gen_frame_mem (SImode, plus_constant (Pmode,
+					      stack_pointer_rtx,
+					      offset));
+  pop_rtx = gen_rtx_SET (VOIDmode, reg, mem);
+  XVECEXP (parallel_insn, 0, par_index) = pop_rtx;
+  RTX_FRAME_RELATED_P (pop_rtx) = 1;
+  offset = offset + 4;
+  par_index++;
+
+  /* Create (set sp sp+x+imm8u).  */
+
+  /* The offset value is already in place.  No need to re-calculate it.  */
+  adjust_sp_rtx
+    = gen_rtx_SET (VOIDmode,
+		   stack_pointer_rtx,
+		   plus_constant (Pmode,
+				  stack_pointer_rtx,
+				  offset + INTVAL (imm8u)));
+  XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
+  RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
+
+  return parallel_insn;
+}
+
+/* A subroutine that checks multiple load and store
+   using consecutive registers.
+     OP is a parallel rtx we would like to check.
+     LOAD_P indicates whether we are checking load operation.
+     PAR_INDEX is starting element of parallel rtx.
+     FIRST_ELT_REGNO is used to tell starting register number.
+     COUNT helps us to check consecutive register numbers.  */
+static bool
+nds32_consecutive_registers_load_store_p (rtx op,
+					  bool load_p,
+					  int par_index,
+					  int first_elt_regno,
+					  int count)
+{
+  int i;
+  int check_regno;
+  rtx elt;
+  rtx elt_reg;
+  rtx elt_mem;
+
+  for (i = 0; i < count; i++)
+    {
+      /* Pick up each element from parallel rtx.  */
+      elt = XVECEXP (op, 0, i + par_index);
+
+      /* If this element is not a 'set' rtx, return false immediately.  */
+      if (GET_CODE (elt) != SET)
+	return false;
+
+      /* Pick up reg and mem of this element.  */
+      elt_reg = load_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_mem = load_p ? SET_SRC (elt) : SET_DEST (elt);
+
+      /* If elt_reg is not a expected reg rtx, return false.  */
+      if (GET_CODE (elt_reg) != REG || GET_MODE (elt_reg) != SImode)
+	return false;
+      /* If elt_mem is not a expected mem rtx, return false.  */
+      if (GET_CODE (elt_mem) != MEM || GET_MODE (elt_mem) != SImode)
+	return false;
+
+      /* The consecutive registers should be in (Rb,Rb+1...Re) order.  */
+      check_regno = first_elt_regno + i;
+
+      /* If the register number is not continuous, return false.  */
+      if (REGNO (elt_reg) != (unsigned int) check_regno)
+	return false;
+    }
+
+  return true;
+}
+
+/* A helper function to emit section head template.  */
+static void
+nds32_emit_section_head_template (char section_name[],
+				  char symbol_name[],
+				  int align_value,
+				  bool object_p)
+{
+  const char *flags_str;
+  const char *type_str;
+
+  flags_str = (object_p) ? "\"a\"" : "\"ax\"";
+  type_str = (object_p) ? "@object" : "@function";
+
+  fprintf (asm_out_file, "\t.section\t%s, %s\n", section_name, flags_str);
+  fprintf (asm_out_file, "\t.align\t%d\n", align_value);
+  fprintf (asm_out_file, "\t.global\t%s\n", symbol_name);
+  fprintf (asm_out_file, "\t.type\t%s, %s\n", symbol_name, type_str);
+  fprintf (asm_out_file, "%s:\n", symbol_name);
+}
+
+/* A helper function to emit section tail template.  */
+static void
+nds32_emit_section_tail_template (char symbol_name[])
+{
+  fprintf (asm_out_file, "\t.size\t%s, .-%s\n", symbol_name, symbol_name);
+}
+
+/* Function to emit isr jump table section.  */
+static void
+nds32_emit_isr_jmptbl_section (int vector_id)
+{
+  char section_name[100];
+  char symbol_name[100];
+
+  /* Prepare jmptbl section and symbol name.  */
+  snprintf (section_name, sizeof (section_name),
+	    ".nds32_jmptbl.%02d", vector_id);
+  snprintf (symbol_name, sizeof (symbol_name),
+	    "_nds32_jmptbl_%02d", vector_id);
+
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 nds32_isr_vectors[vector_id].func_name);
+  nds32_emit_section_tail_template (symbol_name);
+}
+
+/* Function to emit isr vector section.  */
+static void
+nds32_emit_isr_vector_section (int vector_id)
+{
+  unsigned int vector_number_offset = 0;
+  const char *c_str = "CATEGORY";
+  const char *sr_str = "SR";
+  const char *nt_str = "NT";
+  const char *vs_str = "VS";
+  char first_level_handler_name[100];
+  char section_name[100];
+  char symbol_name[100];
+
+  /* Set the vector number offset so that we can calculate
+     the value that user specifies in the attribute.
+     We also prepare the category string for first level handler name.  */
+  switch (nds32_isr_vectors[vector_id].category)
+    {
+    case NDS32_ISR_INTERRUPT:
+      vector_number_offset = 9;
+      c_str = "i";
+      break;
+    case NDS32_ISR_EXCEPTION:
+      vector_number_offset = 0;
+      c_str = "e";
+      break;
+    case NDS32_ISR_NONE:
+    case NDS32_ISR_RESET:
+      /* Normally it should not be here.  */
+      gcc_unreachable ();
+      break;
+    }
+
+  /* Prepare save reg string for first level handler name.  */
+  switch (nds32_isr_vectors[vector_id].save_reg)
+    {
+    case NDS32_SAVE_ALL:
+      sr_str = "sa";
+      break;
+    case NDS32_PARTIAL_SAVE:
+      sr_str = "ps";
+      break;
+    }
+
+  /* Prepare nested type string for first level handler name.  */
+  switch (nds32_isr_vectors[vector_id].nested_type)
+    {
+    case NDS32_NESTED:
+      nt_str = "ns";
+      break;
+    case NDS32_NOT_NESTED:
+      nt_str = "nn";
+      break;
+    case NDS32_NESTED_READY:
+      nt_str = "nr";
+      break;
+    }
+
+  /* Currently we have 4-byte or 16-byte size for each vector.
+     If it is 4-byte, the first level handler name has suffix string "_4b".  */
+  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
+
+  /* Now we can create first level handler name.  */
+  snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+	    "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str);
+
+  /* Prepare vector section and symbol name.  */
+  snprintf (section_name, sizeof (section_name),
+	    ".nds32_vector.%02d", vector_id);
+  snprintf (symbol_name, sizeof (symbol_name),
+	    "_nds32_vector_%02d%s", vector_id, vs_str);
+
+
+  /* Everything is ready.  We can start emit vector section content.  */
+  nds32_emit_section_head_template (section_name, symbol_name,
+				    floor_log2 (nds32_isr_vector_size), false);
+
+  /* According to the vector size, the instructions in the
+     vector section may be different.  */
+  if (nds32_isr_vector_size == 4)
+    {
+      /* This block is for 4-byte vector size.
+         Hardware $VID support is necessary and only one instruction
+         is needed in vector section.  */
+      fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
+			     first_level_handler_name);
+    }
+  else
+    {
+      /* This block is for 16-byte vector size.
+         There is NO hardware $VID so that we need several instructions
+         such as pushing GPRs and preparing software vid at vector section.
+         For pushing GPRs, there are four variations for
+         16-byte vector content and we have to handle each combination.
+         For preparing software vid, note that the vid need to
+         be substracted vector_number_offset.  */
+      if (TARGET_REDUCED_REGS)
+	{
+	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
+	    {
+	      /* Case of reduced set registers and save_all attribute.  */
+	      fprintf (asm_out_file, "\t! reduced set regs + save_all\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0xf\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r10, 0x0\n");
+
+	    }
+	  else
+	    {
+	      /* Case of reduced set registers and partial_save attribute.  */
+	      fprintf (asm_out_file, "\t! reduced set regs + partial_save\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0x2\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n");
+	    }
+	}
+      else
+	{
+	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
+	    {
+	      /* Case of full set registers and save_all attribute.  */
+	      fprintf (asm_out_file, "\t! full set regs + save_all\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r27, 0xf\n");
+	    }
+	  else
+	    {
+	      /* Case of full set registers and partial_save attribute.  */
+	      fprintf (asm_out_file, "\t! full set regs + partial_save\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r27, 0x2\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n");
+	    }
+	}
+
+      fprintf (asm_out_file, "\tmovi\t$r0, %d ! preparing software vid\n",
+			     vector_id - vector_number_offset);
+      fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
+			     first_level_handler_name);
+    }
+
+  nds32_emit_section_tail_template (symbol_name);
+}
+
+/* Function to emit isr reset handler content.
+   Including all jmptbl/vector references, jmptbl section,
+   vector section, nmi handler section, and warm handler section.  */
+static void
+nds32_emit_isr_reset_content (void)
+{
+  unsigned int i;
+  unsigned int total_n_vectors;
+  const char *vs_str;
+  char reset_handler_name[100];
+  char section_name[100];
+  char symbol_name[100];
+
+  total_n_vectors = nds32_isr_vectors[0].total_n_vectors;
+  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
+
+  fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n");
+
+  /* Create references in .rodata according to total number of vectors.  */
+  fprintf (asm_out_file, "\t.section\t.rodata\n");
+  fprintf (asm_out_file, "\t.align\t2\n");
+
+  /* Emit jmptbl references.  */
+  fprintf (asm_out_file, "\t ! references to jmptbl section entries\n");
+  for (i = 0; i < total_n_vectors; i++)
+    fprintf (asm_out_file, "\t.word\t_nds32_jmptbl_%02d\n", i);
+
+  /* Emit vector references.  */
+  fprintf (asm_out_file, "\t ! references to vector section entries\n");
+  for (i = 0; i < total_n_vectors; i++)
+    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str);
+
+  /* Emit jmptbl_00 section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_jmptbl_00");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 nds32_isr_vectors[0].func_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  /* Emit vector_00 section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_vector.00");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str);
+  snprintf (reset_handler_name, sizeof (reset_handler_name),
+	    "_nds32_reset%s", vs_str);
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name,
+				    floor_log2 (nds32_isr_vector_size), false);
+  fprintf (asm_out_file, "\tj\t%s ! jump to reset handler\n",
+			 reset_handler_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  /* Emit nmi handler section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_nmih");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_nmih");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 (strlen (nds32_isr_vectors[0].nmi_name) == 0)
+			 ? "0"
+			 : nds32_isr_vectors[0].nmi_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  /* Emit warm handler section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_wrh");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_wrh");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 (strlen (nds32_isr_vectors[0].warm_name) == 0)
+			 ? "0"
+			 : nds32_isr_vectors[0].warm_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - END !\n");
+}
+
+/* Function for nds32_merge_decl_attributes() and nds32_insert_attributes()
+   to check if there are any conflict isr-specific attributes being set.
+   We need to check:
+     1. Only 'save_all' or 'partial_save' in the attributes.
+     2. Only 'nested', 'not_nested', or 'nested_ready' in the attributes.
+     3. Only 'interrupt', 'exception', or 'reset' in the attributes.  */
+static void
+nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
+{
+  int save_all_p, partial_save_p;
+  int nested_p, not_nested_p, nested_ready_p;
+  int intr_p, excp_p, reset_p;
+
+  /* Initialize variables.  */
+  save_all_p = partial_save_p = 0;
+  nested_p = not_nested_p = nested_ready_p = 0;
+  intr_p = excp_p = reset_p = 0;
+
+  /* We must check at MOST one attribute to set save-reg.  */
+  if (lookup_attribute ("save_all", func_attrs))
+    save_all_p = 1;
+  if (lookup_attribute ("partial_save", func_attrs))
+    partial_save_p = 1;
+
+  if ((save_all_p + partial_save_p) > 1)
+    error ("multiple save reg attributes to function %qD", func_decl);
+
+  /* We must check at MOST one attribute to set nested-type.  */
+  if (lookup_attribute ("nested", func_attrs))
+    nested_p = 1;
+  if (lookup_attribute ("not_nested", func_attrs))
+    not_nested_p = 1;
+  if (lookup_attribute ("nested_ready", func_attrs))
+    nested_ready_p = 1;
+
+  if ((nested_p + not_nested_p + nested_ready_p) > 1)
+    error ("multiple nested types attributes to function %qD", func_decl);
+
+  /* We must check at MOST one attribute to
+     set interrupt/exception/reset.  */
+  if (lookup_attribute ("interrupt", func_attrs))
+    intr_p = 1;
+  if (lookup_attribute ("exception", func_attrs))
+    excp_p = 1;
+  if (lookup_attribute ("reset", func_attrs))
+    reset_p = 1;
+
+  if ((intr_p + excp_p + reset_p) > 1)
+    error ("multiple interrupt attributes to function %qD", func_decl);
+}
+
+/* Function to construct isr vectors information array.
+   We DO NOT HAVE TO check if the attributes are valid
+   because those works are supposed to be done on
+   nds32_merge_decl_attributes() and nds32_insert_attributes().  */
+static void
+nds32_construct_isr_vectors_information (tree func_attrs,
+					 const char *func_name)
+{
+  tree save_all, partial_save;
+  tree nested, not_nested, nested_ready;
+  tree intr, excp, reset;
+
+  save_all     = lookup_attribute ("save_all", func_attrs);
+  partial_save = lookup_attribute ("partial_save", func_attrs);
+
+  nested       = lookup_attribute ("nested", func_attrs);
+  not_nested   = lookup_attribute ("not_nested", func_attrs);
+  nested_ready = lookup_attribute ("nested_ready", func_attrs);
+
+  intr  = lookup_attribute ("interrupt", func_attrs);
+  excp  = lookup_attribute ("exception", func_attrs);
+  reset = lookup_attribute ("reset", func_attrs);
+
+  /* If there is no interrupt/exception/reset, we can return immediately.  */
+  if (!intr && !excp && !reset)
+    return;
+
+  /* If we are here, either we have interrupt/exception,
+     or reset attribute.  */
+  if (intr || excp)
+    {
+      tree id_list;
+
+      /* Prepare id list so that we can traverse and set vector id.  */
+      id_list = (intr) ? (TREE_VALUE (intr)) : (TREE_VALUE (excp));
+
+      while (id_list)
+	{
+	  tree id;
+	  int vector_id;
+	  unsigned int vector_number_offset;
+
+	  /* The way to handle interrupt or exception is the same,
+	     we just need to take care of actual vector number.
+	     For interrupt(0..63), the actual vector number is (9..72).
+	     For exception(1..8), the actual vector number is (1..8).  */
+	  vector_number_offset = (intr) ? (9) : (0);
+
+	  /* Pick up each vector id value.  */
+	  id = TREE_VALUE (id_list);
+	  /* Add vector_number_offset to get actual vector number.  */
+	  vector_id = TREE_INT_CST_LOW (id) + vector_number_offset;
+
+	  /* Enable corresponding vector and set function name.  */
+	  nds32_isr_vectors[vector_id].category = (intr)
+						  ? (NDS32_ISR_INTERRUPT)
+						  : (NDS32_ISR_EXCEPTION);
+	  strcpy (nds32_isr_vectors[vector_id].func_name, func_name);
+
+	  /* Set register saving scheme.  */
+	  if (save_all)
+	    nds32_isr_vectors[vector_id].save_reg = NDS32_SAVE_ALL;
+	  else if (partial_save)
+	    nds32_isr_vectors[vector_id].save_reg = NDS32_PARTIAL_SAVE;
+
+	  /* Set nested type.  */
+	  if (nested)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED;
+	  else if (not_nested)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED;
+	  else if (nested_ready)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY;
+
+	  /* Advance to next id.  */
+	  id_list = TREE_CHAIN (id_list);
+	}
+    }
+  else
+    {
+      tree id_list;
+      tree id;
+      tree nmi, warm;
+
+      /* Deal with reset attribute.  Its vector number is always 0.  */
+      nds32_isr_vectors[0].category = NDS32_ISR_RESET;
+
+      /* Prepare id_list and identify id value so that
+         we can set total number of vectors.  */
+      id_list = TREE_VALUE (reset);
+      id = TREE_VALUE (id_list);
+
+      /* The total vectors = interrupt + exception numbers + reset.
+         There are 8 exception and 1 reset in nds32 architecture.  */
+      nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1;
+      strcpy (nds32_isr_vectors[0].func_name, func_name);
+
+      /* Retrieve nmi and warm function.  */
+      nmi  = lookup_attribute ("nmi", func_attrs);
+      warm = lookup_attribute ("warm", func_attrs);
+
+      if (nmi != NULL_TREE)
+	{
+	  tree nmi_func_list;
+	  tree nmi_func;
+
+	  nmi_func_list = TREE_VALUE (nmi);
+	  nmi_func = TREE_VALUE (nmi_func_list);
+
+	  /* Record nmi function name.  */
+	  strcpy (nds32_isr_vectors[0].nmi_name,
+		  IDENTIFIER_POINTER (nmi_func));
+	}
+
+      if (warm != NULL_TREE)
+	{
+	  tree warm_func_list;
+	  tree warm_func;
+
+	  warm_func_list = TREE_VALUE (warm);
+	  warm_func = TREE_VALUE (warm_func_list);
+
+	  /* Record warm function name.  */
+	  strcpy (nds32_isr_vectors[0].warm_name,
+		  IDENTIFIER_POINTER (warm_func));
+	}
+    }
+}
+
+/* Function that may creates more instructions
+   for large value on adjusting stack pointer.
+
+   In nds32 target, 'addi' can be used for stack pointer
+   adjustment in prologue/epilogue stage.
+   However, sometimes there are too many local variables so that
+   the adjustment value is not able to be fit in the 'addi' instruction.
+   One solution is to move value into a register
+   and then use 'add' instruction.
+   In practice, we use TA_REGNUM ($r15) to accomplish this purpose.
+   Also, we need to return zero for sp adjustment so that
+   proglogue/epilogue knows there is no need to create 'addi' instruction.  */
+static int
+nds32_force_addi_stack_int (int full_value)
+{
+  int adjust_value;
+
+  rtx tmp_reg;
+  rtx sp_adjust_insn;
+
+  if (!satisfies_constraint_Is15 (GEN_INT (full_value)))
+    {
+      /* The value is not able to fit in single addi instruction.
+         Create more instructions of moving value into a register
+         and then add stack pointer with it.  */
+
+      /* $r15 is going to be temporary register to hold the value.  */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      /* Create one more instruction to move value
+         into the temporary register.  */
+      emit_move_insn (tmp_reg, GEN_INT (full_value));
+
+      /* Create new 'add' rtx.  */
+      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   tmp_reg);
+      /* Emit rtx into insn list and receive its transformed insn rtx.  */
+      sp_adjust_insn = emit_insn (sp_adjust_insn);
+
+      /* At prologue, we need to tell GCC that this is frame related insn,
+         so that we can consider this instruction to output debug information.
+         If full_value is NEGATIVE, it means this function
+         is invoked by expand_prologue.  */
+      if (full_value < 0)
+	{
+	  /* Because (tmp_reg <- full_value) may be split into two
+	     rtl patterns, we can not set its RTX_FRAME_RELATED_P.
+	     We need to construct another (sp <- sp + full_value)
+	     and then insert it into sp_adjust_insn's reg note to
+	     represent a frame related expression.
+	     GCC knows how to refer it and output debug information.  */
+
+	  rtx plus_rtx;
+	  rtx set_rtx;
+
+	  plus_rtx = plus_constant (Pmode, stack_pointer_rtx, full_value);
+	  set_rtx = gen_rtx_SET (VOIDmode, stack_pointer_rtx, plus_rtx);
+	  add_reg_note (sp_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx);
+
+	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
+	}
+
+      /* We have used alternative way to adjust stack pointer value.
+         Return zero so that prologue/epilogue
+         will not generate other instructions.  */
+      return 0;
+    }
+  else
+    {
+      /* The value is able to fit in addi instruction.
+         However, remember to make it to be positive value
+         because we want to return 'adjustment' result.  */
+      adjust_value = (full_value < 0) ? (-full_value) : (full_value);
+
+      return adjust_value;
+    }
+}
+
+/* Return true if MODE/TYPE need double word alignment.  */
+static bool
+nds32_needs_double_word_align (enum machine_mode mode, const_tree type)
+{
+  unsigned int align;
+
+  /* Pick up the alignment according to the mode or type.  */
+  align = NDS32_MODE_TYPE_ALIGN (mode, type);
+
+  return (align > PARM_BOUNDARY);
+}
+
+/* Return true if FUNC is a naked function.  */
+static bool
+nds32_naked_function_p (tree func)
+{
+  tree t;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  t = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+
+  return (t != NULL_TREE);
+}
+
+/* Function that check if 'X' is a valid address register.
+   The variable 'STRICT' is very important to
+   make decision for register number.
+
+   STRICT : true
+     => We are in reload pass or after reload pass.
+        The register number should be strictly limited in general registers.
+
+   STRICT : false
+     => Before reload pass, we are free to use any register number.  */
+static bool
+nds32_address_register_rtx_p (rtx x, bool strict)
+{
+  int regno;
+
+  if (GET_CODE (x) != REG)
+    return false;
+
+  regno = REGNO (x);
+
+  if (strict)
+    return REGNO_OK_FOR_BASE_P (regno);
+  else
+    return true;
+}
+
+/* Function that check if 'INDEX' is valid to be a index rtx for address.
+
+   OUTER_MODE : Machine mode of outer address rtx.
+        INDEX : Check if this rtx is valid to be a index for address.
+       STRICT : If it is true, we are in reload pass or after reload pass.  */
+static bool
+nds32_legitimate_index_p (enum machine_mode outer_mode,
+			  rtx index,
+			  bool strict)
+{
+  int regno;
+  rtx op0;
+  rtx op1;
+
+  switch (GET_CODE (index))
+    {
+    case REG:
+      regno = REGNO (index);
+      /* If we are in reload pass or after reload pass,
+         we need to limit it to general register.  */
+      if (strict)
+	return REGNO_OK_FOR_INDEX_P (regno);
+      else
+	return true;
+
+    case CONST_INT:
+      /* The alignment of the integer value is determined by 'outer_mode'.  */
+      if (GET_MODE_SIZE (outer_mode) == 1)
+	{
+	  /* Further check if the value is legal for the 'outer_mode'.  */
+	  if (!satisfies_constraint_Is15 (index))
+	    return false;
+
+	  /* Pass all test, the value is valid, return true.  */
+	  return true;
+	}
+      if (GET_MODE_SIZE (outer_mode) == 2
+	  && NDS32_HALF_WORD_ALIGN_P (INTVAL (index)))
+	{
+	  /* Further check if the value is legal for the 'outer_mode'.  */
+	  if (!satisfies_constraint_Is16 (index))
+	    return false;
+
+	  /* Pass all test, the value is valid, return true.  */
+	  return true;
+	}
+      if (GET_MODE_SIZE (outer_mode) == 4
+	  && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
+	{
+	  /* Further check if the value is legal for the 'outer_mode'.  */
+	  if (!satisfies_constraint_Is17 (index))
+	    return false;
+
+	  /* Pass all test, the value is valid, return true.  */
+	  return true;
+	}
+      if (GET_MODE_SIZE (outer_mode) == 8
+	  && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
+	{
+	  /* Further check if the value is legal for the 'outer_mode'.  */
+	  if (!satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4,
+							SImode)))
+	    return false;
+
+	  /* Pass all test, the value is valid, return true.  */
+	  return true;
+	}
+
+      return false;
+
+    case MULT:
+      op0 = XEXP (index, 0);
+      op1 = XEXP (index, 1);
+
+      if (REG_P (op0) && CONST_INT_P (op1))
+	{
+	  int multiplier;
+	  multiplier = INTVAL (op1);
+
+	  /* We only allow (mult reg const_int_1)
+	     or (mult reg const_int_2) or (mult reg const_int_4).  */
+	  if (multiplier != 1 && multiplier != 2 && multiplier != 4)
+	    return false;
+
+	  regno = REGNO (op0);
+	  /* Limit it in general registers if we are
+	     in reload pass or after reload pass.  */
+	  if(strict)
+	    return REGNO_OK_FOR_INDEX_P (regno);
+	  else
+	    return true;
+	}
+
+      return false;
+
+    case ASHIFT:
+      op0 = XEXP (index, 0);
+      op1 = XEXP (index, 1);
+
+      if (REG_P (op0) && CONST_INT_P (op1))
+	{
+	  int sv;
+	  /* op1 is already the sv value for use to do left shift.  */
+	  sv = INTVAL (op1);
+
+	  /* We only allow (ashift reg const_int_0)
+	     or (ashift reg const_int_1) or (ashift reg const_int_2).  */
+	  if (sv != 0 && sv != 1 && sv !=2)
+	    return false;
+
+	  regno = REGNO (op0);
+	  /* Limit it in general registers if we are
+	     in reload pass or after reload pass.  */
+	  if(strict)
+	    return REGNO_OK_FOR_INDEX_P (regno);
+	  else
+	    return true;
+	}
+
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Function to expand builtin function for
+   '[(unspec_volatile [(reg)])]'.  */
+static rtx
+nds32_expand_builtin_null_ftype_reg (enum insn_code icode,
+				     tree exp, rtx target)
+{
+  /* Mapping:
+       ops[0] <--> value0 <--> arg0 */
+  struct expand_operand ops[1];
+  tree arg0;
+  rtx value0;
+
+  /* Grab the incoming arguments and extract its rtx.  */
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  value0 = expand_normal (arg0);
+
+  /* Create operands.  */
+  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
+
+  /* Emit new instruction.  */
+  if (!maybe_expand_insn (icode, 1, ops))
+    error ("invalid argument to built-in function");
+
+  return target;
+}
+
+/* Function to expand builtin function for
+   '[(set (reg) (unspec_volatile [(imm)]))]'.  */
+static rtx
+nds32_expand_builtin_reg_ftype_imm (enum insn_code icode,
+				    tree exp, rtx target)
+{
+  /* Mapping:
+       ops[0] <--> target <--> exp
+       ops[1] <--> value0 <--> arg0 */
+  struct expand_operand ops[2];
+  tree arg0;
+  rtx value0;
+
+  /* Grab the incoming arguments and extract its rtx.  */
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  value0 = expand_normal (arg0);
+
+  /* Create operands.  */
+  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp)));
+  create_input_operand (&ops[1], value0, TYPE_MODE (TREE_TYPE (arg0)));
+
+  /* Emit new instruction.  */
+  if (!maybe_expand_insn (icode, 2, ops))
+    error ("invalid argument to built-in function");
+
+  return target;
+}
+
+/* Function to expand builtin function for
+   '[(unspec_volatile [(reg) (imm)])]' pattern.  */
+static rtx
+nds32_expand_builtin_null_ftype_reg_imm (enum insn_code icode,
+					 tree exp, rtx target)
+{
+  /* Mapping:
+       ops[0] <--> value0 <--> arg0
+       ops[1] <--> value1 <--> arg1 */
+  struct expand_operand ops[2];
+  tree arg0, arg1;
+  rtx value0, value1;
+
+  /* Grab the incoming arguments and extract its rtx.  */
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  value0 = expand_normal (arg0);
+  value1 = expand_normal (arg1);
+
+  /* Create operands.  */
+  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
+  create_input_operand (&ops[1], value1, TYPE_MODE (TREE_TYPE (arg1)));
+
+  /* Emit new instruction.  */
+  if (!maybe_expand_insn (icode, 2, ops))
+    error ("invalid argument to built-in function");
+
+  return target;
+}
+
+/* A helper function to return character based on byte size.  */
+static char
+nds32_byte_to_size (int byte)
+{
+  switch (byte)
+    {
+    case 4:
+      return 'w';
+    case 2:
+      return 'h';
+    case 1:
+      return 'b';
+    default:
+      /* Normally it should not be here.  */
+      gcc_unreachable ();
+    }
+}
+
+/* A helper function to check if this function should contain prologue.  */
+static int
+nds32_have_prologue_p (void)
+{
+  int i;
+
+  for (i = 0; i < 28; i++)
+    if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
+      return 1;
+
+  return (flag_pic
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 3: Implement target hook stuff definitions.  */
+
+/* Register Classes.  */
+
+static unsigned char
+nds32_class_max_nregs (reg_class_t rclass ATTRIBUTE_UNUSED,
+		       enum machine_mode mode)
+{
+  /* Return the maximum number of consecutive registers
+     needed to represent "mode" in a register of "rclass".  */
+  return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+}
+
+static int
+nds32_register_priority (int hard_regno)
+{
+  /* Encourage to use r0-r7 for LRA when optimize for size.  */
+  if (optimize_size && hard_regno < 8)
+    return 4;
+  return 3;
+}
+
+
+/* Stack Layout and Calling Conventions.  */
+
+/* There are three kinds of pointer concepts using in GCC compiler:
+
+     frame pointer: A pointer to the first location of local variables.
+     stack pointer: A pointer to the top of a stack frame.
+     argument pointer: A pointer to the incoming arguments.
+
+   In nds32 target calling convention, we are using 8-byte alignment.
+   Besides, we would like to have each stack frame of a function includes:
+
+     [Block A]
+       1. previous hard frame pointer
+       2. return address
+       3. callee-saved registers
+       4. <padding bytes> (we will calculte in nds32_compute_stack_frame()
+                           and save it at
+                           cfun->machine->callee_saved_area_padding_bytes)
+
+     [Block B]
+       1. local variables
+       2. spilling location
+       3. <padding bytes> (it will be calculated by GCC itself)
+       4. incoming arguments
+       5. <padding bytes> (it will be calculated by GCC itself)
+
+     [Block C]
+       1. <padding bytes> (it will be calculated by GCC itself)
+       2. outgoing arguments
+
+   We 'wrap' these blocks together with
+   hard frame pointer ($r28) and stack pointer ($r31).
+   By applying the basic frame/stack/argument pointers concept,
+   the layout of a stack frame shoule be like this:
+
+                            |    |
+       old stack pointer ->  ----
+                            |    | \
+                            |    |   saved arguments for
+                            |    |   vararg functions
+                            |    | /
+      hard frame pointer ->   --
+      & argument pointer    |    | \
+                            |    |   previous hardware frame pointer
+                            |    |   return address
+                            |    |   callee-saved registers
+                            |    | /
+           frame pointer ->   --
+                            |    | \
+                            |    |   local variables
+                            |    |   and incoming arguments
+                            |    | /
+                              --
+                            |    | \
+                            |    |   outgoing
+                            |    |   arguments
+                            |    | /
+           stack pointer ->  ----
+
+  $SFP and $AP are used to represent frame pointer and arguments pointer,
+  which will be both eliminated as hard frame pointer.  */
+
+/* -- Eliminating Frame Pointer and Arg Pointer.  */
+
+static bool nds32_can_eliminate (const int from_reg, const int to_reg)
+{
+  if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM)
+    return true;
+
+  if (from_reg == ARG_POINTER_REGNUM && to_reg == HARD_FRAME_POINTER_REGNUM)
+    return true;
+
+  if (from_reg == FRAME_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM)
+    return true;
+
+  if (from_reg == FRAME_POINTER_REGNUM && to_reg == HARD_FRAME_POINTER_REGNUM)
+    return true;
+
+  return false;
+}
+
+/* -- Passing Arguments in Registers.  */
+
+static rtx
+nds32_function_arg (cumulative_args_t ca, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
+
+  /* The last time this hook is called,
+     it is called with MODE == VOIDmode.  */
+  if (mode == VOIDmode)
+    return NULL_RTX;
+
+  /* For nameless arguments, they are passed on the stack.  */
+  if (!named)
+    return NULL_RTX;
+
+  /* If there are still registers available, return it.  */
+  if (NDS32_ARG_PASS_IN_REG_P (cum->reg_offset, mode, type))
+    {
+      /* Pick up the next available register number.  */
+      unsigned int regno;
+
+      regno = NDS32_AVAILABLE_REGNUM_FOR_ARG (cum->reg_offset, mode, type);
+      return gen_rtx_REG (mode, regno);
+    }
+  else
+    {
+      /* No register available, return NULL_RTX.
+         The compiler will use stack to pass argument instead.  */
+      return NULL_RTX;
+    }
+}
+
+static void
+nds32_function_arg_advance (cumulative_args_t ca, enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
+
+  /* Advance next register for use.
+     Only named argument could be advanced.  */
+  if (named)
+    {
+      cum->reg_offset
+	= NDS32_AVAILABLE_REGNUM_FOR_ARG (cum->reg_offset, mode, type)
+	  - NDS32_GPR_ARG_FIRST_REGNUM
+	  + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+    }
+}
+
+static unsigned int
+nds32_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return (nds32_needs_double_word_align (mode, type)
+	  ? NDS32_DOUBLE_WORD_ALIGNMENT
+	  : PARM_BOUNDARY);
+}
+
+/* -- How Scalar Function Values Are Returned.  */
+
+static rtx
+nds32_function_value (const_tree ret_type,
+		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  int unsignedp;
+
+  mode = TYPE_MODE (ret_type);
+  unsignedp = TYPE_UNSIGNED (ret_type);
+
+  mode = promote_mode (ret_type, mode, &unsignedp);
+
+  return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
+}
+
+static rtx
+nds32_libcall_value (enum machine_mode mode,
+		     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
+}
+
+static bool
+nds32_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == NDS32_GPR_RET_FIRST_REGNUM);
+}
+
+/* -- Function Entry and Exit.  */
+
+/* The content produced from this function
+   will be placed before prologue body.  */
+static void
+nds32_asm_function_prologue (FILE *file,
+			     HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  int r;
+  const char *func_name;
+  tree attrs;
+  tree name;
+
+  /* All stack frame information is supposed to be
+     already computed when expanding prologue.
+     The result is in cfun->machine.
+     DO NOT call nds32_compute_stack_frame() here
+     because it may corrupt the essential information.  */
+
+  fprintf (file, "\t! BEGIN PROLOGUE\n");
+  fprintf (file, "\t!     fp needed: %d\n", frame_pointer_needed);
+  fprintf (file, "\t!  pretend_args: %d\n", cfun->machine->va_args_size);
+  fprintf (file, "\t!    local_size: %d\n", cfun->machine->local_size);
+  fprintf (file, "\t! out_args_size: %d\n", cfun->machine->out_args_size);
+
+  /* Use df_regs_ever_live_p() to detect if the register
+     is ever used in the current function.  */
+  fprintf (file, "\t! registers ever_live: ");
+  for (r = 0; r < 32; r++)
+    {
+      if (df_regs_ever_live_p (r))
+	fprintf (file, "%s, ", reg_names[r]);
+    }
+  fputc ('\n', file);
+
+  /* Display the attributes of this function.  */
+  fprintf (file, "\t! function attributes: ");
+  /* Get the attributes tree list.
+     Note that GCC builds attributes list with reverse order.  */
+  attrs = DECL_ATTRIBUTES (current_function_decl);
+
+  /* If there is no any attribute, print out "None".  */
+  if (!attrs)
+    fprintf (file, "None");
+
+  /* If there are some attributes, try if we need to
+     construct isr vector information.  */
+  func_name = IDENTIFIER_POINTER (DECL_NAME (current_function_decl));
+  nds32_construct_isr_vectors_information (attrs, func_name);
+
+  /* Display all attributes of this function.  */
+  while (attrs)
+    {
+      name = TREE_PURPOSE (attrs);
+      fprintf (file, "%s ", IDENTIFIER_POINTER (name));
+
+      /* Pick up the next attribute.  */
+      attrs = TREE_CHAIN (attrs);
+    }
+  fputc ('\n', file);
+}
+
+/* After rtl prologue has been expanded, this function is used.  */
+static void
+nds32_asm_function_end_prologue (FILE *file)
+{
+  fprintf (file, "\t! END PROLOGUE\n");
+
+  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
+     we can generate special directive: ".omit_fp_begin"
+     to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (!frame_pointer_needed
+      && !cfun->machine->naked_p
+      && cfun->machine->fp_as_gp_p)
+    {
+      fprintf (file, "\t! ----------------------------------------\n");
+      fprintf (file, "\t! Guide linker to do "
+		     "link time optimization: fp-as-gp\n");
+      fprintf (file, "\t! We add one more instruction to "
+		     "initialize $fp near to $gp location.\n");
+      fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n");
+      fprintf (file, "\t! this extra instruction should be "
+		     "eliminated at link stage.\n");
+      fprintf (file, "\t.omit_fp_begin\n");
+      fprintf (file, "\tla\t$fp,_FP_BASE_\n");
+      fprintf (file, "\t! ----------------------------------------\n");
+    }
+}
+
+/* Before rtl epilogue has been expanded, this function is used.  */
+static void
+nds32_asm_function_begin_epilogue (FILE *file)
+{
+  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
+     we can generate special directive: ".omit_fp_end"
+     to claim fp-as-gp optimization range.
+     However, for a naked function,
+     which means it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (!frame_pointer_needed
+      && !cfun->machine->naked_p
+      && cfun->machine->fp_as_gp_p)
+    {
+      fprintf (file, "\t! ----------------------------------------\n");
+      fprintf (file, "\t! Claim the range of fp-as-gp "
+		     "link time optimization\n");
+      fprintf (file, "\t.omit_fp_end\n");
+      fprintf (file, "\t! ----------------------------------------\n");
+    }
+
+  fprintf (file, "\t! BEGIN EPILOGUE\n");
+}
+
+/* The content produced from this function
+   will be placed after epilogue body.  */
+static void
+nds32_asm_function_epilogue (FILE *file,
+			     HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  fprintf (file, "\t! END EPILOGUE\n");
+}
+
+static void
+nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta,
+			   HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			   tree function)
+{
+  int this_regno;
+
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), file, 1);
+
+  this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
+		? 1
+		: 0);
+
+  if (delta != 0)
+    {
+      if (satisfies_constraint_Is15 (GEN_INT (delta)))
+	{
+	  fprintf (file, "\taddi\t$r%d, $r%d, %ld\n",
+		   this_regno, this_regno, delta);
+	}
+      else if (satisfies_constraint_Is20 (GEN_INT (delta)))
+	{
+	  fprintf (file, "\tmovi\t$ta, %ld\n", delta);
+	  fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno);
+	}
+      else
+	{
+	  fprintf (file, "\tsethi\t$ta, hi20(%ld)\n", delta);
+	  fprintf (file, "\tori\t$ta, $ta, lo12(%ld)\n", delta);
+	  fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno);
+	}
+    }
+
+  fprintf (file, "\tb\t");
+  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+  fprintf (file, "\n");
+
+  final_end_function ();
+}
+
+/* -- Permitting tail calls.  */
+
+/* Determine whether we need to enable warning for function return check.  */
+static bool
+nds32_warn_func_return (tree decl)
+{
+/* Naked functions are implemented entirely in assembly, including the
+   return sequence, so suppress warnings about this.  */
+  return !nds32_naked_function_p (decl);
+}
+
+
+/* Implementing the Varargs Macros.  */
+
+static bool
+nds32_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
+{
+  /* Return true so that all the named arguments for FUNCTION_ARG have named=1.
+     If return false, for the variadic function, all named arguments EXCEPT
+     the last are treated as named.  */
+  return true;
+}
+
+
+/* Trampolines for Nested Functions.  */
+
+static void
+nds32_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_REDUCED_REGS)
+    {
+      /* Trampoline is not supported on reduced-set registers yet.  */
+      sorry ("a nested function is not supported for reduced registers");
+    }
+  else
+    {
+      asm_fprintf (f, "\t! Trampoline code template\n");
+      asm_fprintf (f, "\t! This code fragment will be copied "
+		      "into stack on demand\n");
+
+      asm_fprintf (f, "\tmfusr\t$r16,$pc\n");
+      asm_fprintf (f, "\tlwi\t$r15,[$r16 + 20] "
+		      "! load nested function address\n");
+      asm_fprintf (f, "\tlwi\t$r16,[$r16 + 16] "
+		      "! load chain_value\n");
+      asm_fprintf (f, "\tjr\t$r15\n");
+    }
+
+  /* Preserve space ($pc + 16) for saving chain_value,
+     nds32_trampoline_init will fill the value in this slot.  */
+  asm_fprintf (f, "\t! space for saving chain_value\n");
+  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+
+  /* Preserve space ($pc + 20) for saving nested function address,
+     nds32_trampoline_init will fill the value in this slot.  */
+  asm_fprintf (f, "\t! space for saving nested function address\n");
+  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.  */
+static void
+nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  int i;
+
+  /* Nested function address.  */
+  rtx fnaddr;
+  /* The memory rtx that is going to
+     be filled with chain_value.  */
+  rtx chain_value_mem;
+  /* The memory rtx that is going to
+     be filled with nested function address.  */
+  rtx nested_func_mem;
+
+  /* Start address of trampoline code in stack, for doing cache sync.  */
+  rtx sync_cache_addr;
+  /* Temporary register for sync instruction.  */
+  rtx tmp_reg;
+  /* Instruction-cache sync instruction,
+     requesting an argument as starting address.  */
+  rtx isync_insn;
+  /* For convenience reason of doing comparison.  */
+  int tramp_align_in_bytes;
+
+  /* Trampoline is not supported on reduced-set registers yet.  */
+  if (TARGET_REDUCED_REGS)
+    sorry ("a nested function is not supported for reduced registers");
+
+  /* STEP 1: Copy trampoline code template into stack,
+             fill up essential data into stack.  */
+
+  /* Extract nested function address rtx.  */
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  /* m_tramp is memory rtx that is going to be filled with trampoline code.
+     We have nds32_asm_trampoline_template() to emit template pattern.  */
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  /* After copying trampoline code into stack,
+     fill chain_value into stack.  */
+  chain_value_mem = adjust_address (m_tramp, SImode, 16);
+  emit_move_insn (chain_value_mem, chain_value);
+  /* After copying trampoline code int stack,
+     fill nested function address into stack.  */
+  nested_func_mem = adjust_address (m_tramp, SImode, 20);
+  emit_move_insn (nested_func_mem, fnaddr);
+
+  /* STEP 2: Sync instruction-cache.  */
+
+  /* We have successfully filled trampoline code into stack.
+     However, in order to execute code in stack correctly,
+     we must sync instruction cache.  */
+  sync_cache_addr = XEXP (m_tramp, 0);
+  tmp_reg         = gen_reg_rtx (SImode);
+  isync_insn      = gen_unspec_volatile_isync (tmp_reg);
+
+  /* Because nds32_cache_block_size is in bytes,
+     we get trampoline alignment in bytes for convenient comparison.  */
+  tramp_align_in_bytes = TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT;
+
+  if (tramp_align_in_bytes >= nds32_cache_block_size
+      && (tramp_align_in_bytes % nds32_cache_block_size) == 0)
+    {
+      /* Under this condition, the starting address of trampoline
+         must be aligned to the starting address of each cache block
+         and we do not have to worry about cross-boundary issue.  */
+      for (i = 0;
+	   i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1)
+	       / nds32_cache_block_size;
+	   i++)
+	{
+	  emit_move_insn (tmp_reg,
+			  plus_constant (Pmode, sync_cache_addr,
+					 nds32_cache_block_size * i));
+	  emit_insn (isync_insn);
+	}
+    }
+  else if (TRAMPOLINE_SIZE > nds32_cache_block_size)
+    {
+      /* The starting address of trampoline code
+         may not be aligned to the cache block,
+         so the trampoline code may be across two cache block.
+         We need to sync the last element, which is 4-byte size,
+         of trampoline template.  */
+      for (i = 0;
+	   i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1)
+	       / nds32_cache_block_size;
+	   i++)
+	{
+	  emit_move_insn (tmp_reg,
+			  plus_constant (Pmode, sync_cache_addr,
+					 nds32_cache_block_size * i));
+	  emit_insn (isync_insn);
+	}
+
+      /* The last element of trampoline template is 4-byte size.  */
+      emit_move_insn (tmp_reg,
+		      plus_constant (Pmode, sync_cache_addr,
+				     TRAMPOLINE_SIZE - 4));
+      emit_insn (isync_insn);
+    }
+  else
+    {
+      /* This is the simplest case.
+         Because TRAMPOLINE_SIZE is less than or
+         equal to nds32_cache_block_size,
+         we can just sync start address and
+         the last element of trampoline code.  */
+
+      /* Sync starting address of tampoline code.  */
+      emit_move_insn (tmp_reg, sync_cache_addr);
+      emit_insn (isync_insn);
+      /* Sync the last element, which is 4-byte size,
+         of trampoline template.  */
+      emit_move_insn (tmp_reg,
+		      plus_constant (Pmode, sync_cache_addr,
+				     TRAMPOLINE_SIZE - 4));
+      emit_insn (isync_insn);
+    }
+
+  /* Set instruction serialization barrier
+     to guarantee the correct operations.  */
+  emit_insn (gen_unspec_volatile_isb ());
+}
+
+
+/* Addressing Modes.  */
+
+static bool
+nds32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  /* For (mem:DI addr) or (mem:DF addr) case,
+     we only allow 'addr' to be [reg], [symbol_ref],
+                                [const], or [reg + const_int] pattern.  */
+  if (mode == DImode || mode == DFmode)
+    {
+      /* Allow [Reg + const_int] addressing mode.  */
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (nds32_address_register_rtx_p (XEXP (x, 0), strict)
+	      && nds32_legitimate_index_p (mode, XEXP (x, 1), strict)
+	      && CONST_INT_P (XEXP (x, 1)))
+	    return true;
+
+	  else if (nds32_address_register_rtx_p (XEXP (x, 1), strict)
+		   && nds32_legitimate_index_p (mode, XEXP (x, 0), strict)
+		   && CONST_INT_P (XEXP (x, 0)))
+	    return true;
+	}
+
+      /* Now check [reg], [symbol_ref], and [const].  */
+      if (GET_CODE (x) != REG
+	  && GET_CODE (x) != SYMBOL_REF
+	  && GET_CODE (x) != CONST)
+	return false;
+    }
+
+  /* Check if 'x' is a valid address.  */
+  switch (GET_CODE (x))
+    {
+    case REG:
+      /* (mem (reg A)) => [Ra] */
+      return nds32_address_register_rtx_p (x, strict);
+
+    case SYMBOL_REF:
+
+      if (!TARGET_GP_DIRECT
+	  && (reload_completed
+	      || reload_in_progress
+	      || lra_in_progress))
+	return false;
+
+      /* (mem (symbol_ref A)) => [symbol_ref] */
+      return !currently_expanding_to_rtl;
+
+    case CONST:
+
+      if (!TARGET_GP_DIRECT
+	  && (reload_completed
+	      || reload_in_progress
+	      || lra_in_progress))
+	return false;
+
+      /* (mem (const (...)))
+         => [ + const_addr ], where const_addr = symbol_ref + const_int */
+      if (GET_CODE (XEXP (x, 0)) == PLUS)
+	{
+	  rtx plus_op = XEXP (x, 0);
+
+	  rtx op0 = XEXP (plus_op, 0);
+	  rtx op1 = XEXP (plus_op, 1);
+
+	  if (GET_CODE (op0) == SYMBOL_REF && CONST_INT_P (op1))
+	    return true;
+	  else
+	    return false;
+	}
+
+	return false;
+
+    case POST_MODIFY:
+      /* (mem (post_modify (reg) (plus (reg) (reg))))
+         => [Ra], Rb */
+      /* (mem (post_modify (reg) (plus (reg) (const_int))))
+         => [Ra], const_int */
+      if (GET_CODE (XEXP (x, 0)) == REG
+	  && GET_CODE (XEXP (x, 1)) == PLUS)
+	{
+	  rtx plus_op = XEXP (x, 1);
+
+	  rtx op0 = XEXP (plus_op, 0);
+	  rtx op1 = XEXP (plus_op, 1);
+
+	  if (nds32_address_register_rtx_p (op0, strict)
+	      && nds32_legitimate_index_p (mode, op1, strict))
+	    return true;
+	  else
+	    return false;
+	}
+
+	return false;
+
+    case POST_INC:
+    case POST_DEC:
+      /* (mem (post_inc reg)) => [Ra], 1/2/4 */
+      /* (mem (post_dec reg)) => [Ra], -1/-2/-4 */
+      /* The 1/2/4 or -1/-2/-4 have been displayed in nds32.md.
+         We only need to deal with register Ra.  */
+      if (nds32_address_register_rtx_p (XEXP (x, 0), strict))
+	return true;
+      else
+	return false;
+
+    case PLUS:
+      /* (mem (plus reg const_int))
+         => [Ra + imm] */
+      /* (mem (plus reg reg))
+         => [Ra + Rb] */
+      /* (mem (plus (mult reg const_int) reg))
+         => [Ra + Rb << sv] */
+      if (nds32_address_register_rtx_p (XEXP (x, 0), strict)
+	  && nds32_legitimate_index_p (mode, XEXP (x, 1), strict))
+	return true;
+      else if (nds32_address_register_rtx_p (XEXP (x, 1), strict)
+	       && nds32_legitimate_index_p (mode, XEXP (x, 0), strict))
+	return true;
+      else
+	return false;
+
+    case LO_SUM:
+      if (!TARGET_GP_DIRECT)
+	return true;
+
+    default:
+      return false;
+    }
+}
+
+
+/* Describing Relative Costs of Operations.  */
+
+static int nds32_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+				     reg_class_t from,
+				     reg_class_t to)
+{
+  if (from == HIGH_REGS || to == HIGH_REGS)
+    return 6;
+
+  return 2;
+}
+
+static int nds32_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+				   reg_class_t rclass ATTRIBUTE_UNUSED,
+				   bool in ATTRIBUTE_UNUSED)
+{
+  return 8;
+}
+
+/* This target hook describes the relative costs of RTL expressions.
+   Return 'true' when all subexpressions of x have been processed.
+   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
+   Refer to gcc/rtlanal.c for more information.  */
+static bool
+nds32_rtx_costs (rtx x,
+		 int code,
+		 int outer_code,
+		 int opno ATTRIBUTE_UNUSED,
+		 int *total,
+		 bool speed)
+{
+  /* According to 'speed', goto suitable cost model section.  */
+  if (speed)
+    goto performance_cost;
+  else
+    goto size_cost;
+
+
+performance_cost:
+  /* This is section for performance cost model.  */
+
+  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
+     We treat it as 4-cycle cost for each instruction
+     under performance consideration.  */
+  switch (code)
+    {
+    case SET:
+      /* For 'SET' rtx, we need to return false
+         so that it can recursively calculate costs.  */
+      return false;
+
+    case USE:
+      /* Used in combine.c as a marker.  */
+      *total = 0;
+      break;
+
+    case MULT:
+      *total = COSTS_N_INSNS (1);
+      break;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (7);
+      break;
+
+    default:
+      *total = COSTS_N_INSNS (1);
+      break;
+    }
+
+  return true;
+
+
+size_cost:
+  /* This is section for size cost model.  */
+
+  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
+     We treat it as 4-byte cost for each instruction
+     under code size consideration.  */
+  switch (code)
+    {
+    case SET:
+      /* For 'SET' rtx, we need to return false
+         so that it can recursively calculate costs.  */
+      return false;
+
+    case USE:
+      /* Used in combine.c as a marker.  */
+      *total = 0;
+      break;
+
+    case CONST_INT:
+      /* All instructions involving constant operation
+         need to be considered for cost evaluation.  */
+      if (outer_code == SET)
+	{
+	  /* (set X imm5s), use movi55, 2-byte cost.
+	     (set X imm20s), use movi, 4-byte cost.
+	     (set X BIG_INT), use sethi/ori, 8-byte cost.  */
+	  if (satisfies_constraint_Is05 (x))
+	    *total = COSTS_N_INSNS (1) - 2;
+	  else if (satisfies_constraint_Is20 (x))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if (outer_code == PLUS || outer_code == MINUS)
+	{
+	  /* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
+	     General case, cost 1 instruction with 4-byte.  */
+	  if (satisfies_constraint_Iu05 (x))
+	    *total = COSTS_N_INSNS (1) - 2;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	}
+      else if (outer_code == ASHIFT)
+	{
+	  /* Possible slli333, 2-byte cost.
+	     General case, cost 1 instruction with 4-byte.  */
+	  if (satisfies_constraint_Iu03 (x))
+	    *total = COSTS_N_INSNS (1) - 2;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	}
+      else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
+	{
+	  /* Possible srai45 or srli45, 2-byte cost.
+	     General case, cost 1 instruction with 4-byte.  */
+	  if (satisfies_constraint_Iu05 (x))
+	    *total = COSTS_N_INSNS (1) - 2;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	}
+      else
+	{
+	  /* For other cases, simply set it 4-byte cost.  */
+	  *total = COSTS_N_INSNS (1);
+	}
+      break;
+
+    case CONST_DOUBLE:
+      /* It requires high part and low part processing, set it 8-byte cost.  */
+      *total = COSTS_N_INSNS (2);
+      break;
+
+    default:
+      /* For other cases, generally we set it 4-byte cost
+         and stop resurively traversing.  */
+      *total = COSTS_N_INSNS (1);
+      break;
+    }
+
+  return true;
+}
+
+static int nds32_address_cost (rtx address,
+			       enum machine_mode mode ATTRIBUTE_UNUSED,
+			       addr_space_t as ATTRIBUTE_UNUSED,
+			       bool speed)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
+
+  /* According to 'speed', goto suitable cost model section.  */
+  if (speed)
+    goto performance_cost;
+  else
+    goto size_cost;
+
+performance_cost:
+  /* This is section for performance cost model.  */
+
+  /* FALLTHRU, currently we use same cost model as size_cost.  */
+
+size_cost:
+  /* This is section for size cost model.  */
+
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+         POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return 0;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+         Have it 4-byte cost.  */
+      return COSTS_N_INSNS (1);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+         Have it 4-byte cost.  */
+      return COSTS_N_INSNS (1);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1);
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+         because this hook is never called with an invalid address.
+         But we better check the range of
+         const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+        {
+	  /* If it is possible to be lwi333/swi333 form,
+	     make it 2-byte cost.  */
+	  if (satisfies_constraint_Iu05 (plus1))
+	    return (COSTS_N_INSNS (1) - 2);
+	  else
+	    return COSTS_N_INSNS (1);
+	}
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
+}
+
+
+/* Defining the Output Assembler Language.  */
+
+/* -- The Overall Framework of an Assembler File.  */
+
+static void
+nds32_asm_file_start (void)
+{
+  int i;
+
+  default_file_start ();
+
+  /* Tell assembler which ABI we are using.  */
+  fprintf (asm_out_file, "\t! ABI version\n");
+  fprintf (asm_out_file, "\t.abi_2\n");
+
+  /* Tell assembler that this asm code is generated by compiler.  */
+  fprintf (asm_out_file, "\t! This asm file is generated by compiler\n");
+  fprintf (asm_out_file, "\t.flag\tverbatim\n");
+  /* Give assembler the size of each vector for interrupt handler.  */
+  fprintf (asm_out_file, "\t! This vector size directive is required "
+			 "for checking inconsistency on interrupt handler\n");
+  fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
+
+  /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os,
+     the compiler may produce 'la $fp,_FP_BASE_' instruction
+     at prologue for fp-as-gp optimization.
+     We should emit weak reference of _FP_BASE_ to avoid undefined reference
+     in case user does not pass '--relax' option to linker.  */
+  if (TARGET_FORCE_FP_AS_GP || optimize_size)
+    {
+      fprintf (asm_out_file, "\t! This weak reference is required to do "
+			     "fp-as-gp link time optimization\n");
+      fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n");
+    }
+  /* If user enables '-mex9', we should emit relaxation directive
+     to tell linker that this file is allowed to do ex9 optimization.  */
+  if (TARGET_EX9)
+    {
+      fprintf (asm_out_file, "\t! This relaxation directive is required "
+			     "to do ex9 link time optimization\n");
+      fprintf (asm_out_file, "\t.relax\tex9\n");
+    }
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  if (TARGET_ISA_V2)
+    fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V2");
+  if (TARGET_ISA_V3)
+    fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3");
+  if (TARGET_ISA_V3M)
+    fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M");
+
+  fprintf (asm_out_file, "\t! Endian setting\t: %s\n",
+			 ((TARGET_BIG_ENDIAN) ? "big-endian"
+					      : "little-endian"));
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  fprintf (asm_out_file, "\t! Use conditional move\t\t: %s\n",
+			 ((TARGET_CMOV) ? "Yes"
+					: "No"));
+  fprintf (asm_out_file, "\t! Use performance extension\t: %s\n",
+			 ((TARGET_PERF_EXT) ? "Yes"
+					    : "No"));
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  fprintf (asm_out_file, "\t! V3PUSH instructions\t: %s\n",
+			 ((TARGET_V3PUSH) ? "Yes"
+					  : "No"));
+  fprintf (asm_out_file, "\t! 16-bit instructions\t: %s\n",
+			 ((TARGET_16_BIT) ? "Yes"
+					  : "No"));
+  fprintf (asm_out_file, "\t! GP base access\t: %s\n",
+			 ((TARGET_GP_DIRECT) ? "Yes"
+					     : "No"));
+  fprintf (asm_out_file, "\t! Reduced registers set\t: %s\n",
+			 ((TARGET_REDUCED_REGS) ? "Yes"
+						: "No"));
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  if (optimize_size)
+    fprintf (asm_out_file, "\t! Optimization level\t: -Os\n");
+  else
+    fprintf (asm_out_file, "\t! Optimization level\t: -O%d\n", optimize);
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  fprintf (asm_out_file, "\t! Cache block size\t: %d\n",
+			 nds32_cache_block_size);
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  /* Initialize isr vector information array before compiling functions.  */
+  for (i = 0; i < NDS32_N_ISR_VECTORS; i++)
+    {
+      nds32_isr_vectors[i].category = NDS32_ISR_NONE;
+      strcpy (nds32_isr_vectors[i].func_name, "");
+      nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE;
+      nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED;
+      nds32_isr_vectors[i].total_n_vectors = 0;
+      strcpy (nds32_isr_vectors[i].nmi_name, "");
+      strcpy (nds32_isr_vectors[i].warm_name, "");
+    }
+}
+
+static void
+nds32_asm_file_end (void)
+{
+  int i;
+
+  /* If all the vectors are NDS32_ISR_NONE, we can return immediately.  */
+  for (i = 0; i < NDS32_N_ISR_VECTORS; i++)
+    if (nds32_isr_vectors[i].category != NDS32_ISR_NONE)
+      break;
+
+  if (i == NDS32_N_ISR_VECTORS)
+    return;
+
+  /* At least one vector is NOT NDS32_ISR_NONE,
+     we should output isr vector information.  */
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+  fprintf (asm_out_file, "\t! The isr vector information:\n");
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  /* Check reset handler first.  Its vector number is always 0.  */
+  if (nds32_isr_vectors[0].category == NDS32_ISR_RESET)
+    {
+      nds32_emit_isr_reset_content ();
+      fprintf (asm_out_file, "\t! ------------------------------------\n");
+    }
+
+  /* Check other vectors, starting from vector number 1.  */
+  for (i = 1; i < NDS32_N_ISR_VECTORS; i++)
+    {
+      if (nds32_isr_vectors[i].category == NDS32_ISR_INTERRUPT
+	  || nds32_isr_vectors[i].category == NDS32_ISR_EXCEPTION)
+	{
+	  /* Found one vector which is interupt or exception.
+	     Output its jmptbl and vector section content.  */
+	  fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i);
+	  fprintf (asm_out_file, "\t! ------------------------------------\n");
+	  nds32_emit_isr_jmptbl_section (i);
+	  fprintf (asm_out_file, "\t! ....................................\n");
+	  nds32_emit_isr_vector_section (i);
+	  fprintf (asm_out_file, "\t! ------------------------------------\n");
+	}
+    }
+
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+}
+
+/* -- Output and Generation of Labels.  */
+
+static void
+nds32_asm_globalize_label (FILE *stream, const char *name)
+{
+  fputs ("\t.global\t", stream);
+  assemble_name (stream, name);
+  fputs ("\n", stream);
+}
+
+/* -- Output of Assembler Instructions.  */
+
+static void
+nds32_print_operand (FILE *stream, rtx x, int code)
+{
+  int op_value;
+
+  switch (code)
+    {
+    case 0 :
+      /* Do nothing special.  */
+      break;
+
+    case 'V':
+      /* 'x' is supposed to be CONST_INT, get the value.  */
+      gcc_assert (CONST_INT_P (x));
+      op_value = INTVAL (x);
+
+      /* According to the Andes architecture,
+         the system/user register index range is 0 ~ 1023.
+         In order to avoid conflict between user-specified-integer value
+         and enum-specified-register value,
+         the 'enum nds32_intrinsic_registers' value
+         in nds32_intrinsic.h starts from 1024.  */
+      if (op_value < 1024 && op_value >= 0)
+	{
+	  /* If user gives integer value directly (0~1023),
+	     we just print out the value.  */
+	  fprintf (stream, "%d", op_value);
+	}
+      else if (op_value < 0
+	       || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names)
+			       + 1024))
+	{
+	  /* The enum index value for array size is out of range.  */
+	  error ("intrinsic register index is out of range");
+	}
+      else
+	{
+	  /* If user applies normal way with __NDS32_REG_XXX__ enum data,
+	     we can print out register name.  Remember to substract 1024.  */
+	  fprintf (stream, "%s",
+			   nds32_intrinsic_register_names[op_value - 1024]);
+	}
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+      break;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+      output_addr_const (stream, x);
+      break;
+
+    case REG:
+      /* Forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REGNO (x) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+
+      /* Normal cases, print out register name.  */
+      fputs (reg_names[REGNO (x)], stream);
+      break;
+
+    case MEM:
+      output_address (XEXP (x, 0));
+      break;
+
+    case CODE_LABEL:
+    case CONST_INT:
+    case CONST:
+      output_addr_const (stream, x);
+      break;
+
+    default:
+      /* Generally, output_addr_const () is able to handle most cases.
+         We want to see what CODE could appear,
+         so we use gcc_unreachable() to stop it.  */
+      debug_rtx (x);
+      gcc_unreachable ();
+      break;
+    }
+}
+
+static void
+nds32_print_operand_address (FILE *stream, rtx x)
+{
+  rtx op0, op1;
+
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* [ + symbol_ref] */
+      /* [ + const_addr], where const_addr = symbol_ref + const_int */
+      fputs ("[ + ", stream);
+      output_addr_const (stream, x);
+      fputs ("]", stream);
+      break;
+
+    case REG:
+      /* Forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REGNO (x) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+
+      /* [Ra] */
+      fprintf (stream, "[%s]", reg_names[REGNO (x)]);
+      break;
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      /* Checking op0, forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REG_P (op0)
+	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+      /* Checking op1, forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REG_P (op1)
+	  && REGNO (op1) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+
+      if (REG_P (op0) && CONST_INT_P (op1))
+	{
+	  /* [Ra + imm] */
+	  fprintf (stream, "[%s + (%d)]",
+			   reg_names[REGNO (op0)], (int)INTVAL (op1));
+	}
+      else if (REG_P (op0) && REG_P (op1))
+	{
+	  /* [Ra + Rb] */
+	  fprintf (stream, "[%s + %s]",
+			   reg_names[REGNO (op0)], reg_names[REGNO (op1)]);
+	}
+      else if (GET_CODE (op0) == MULT && REG_P (op1))
+	{
+	  /* [Ra + Rb << sv]
+	     From observation, the pattern looks like:
+	     (plus:SI (mult:SI (reg:SI 58)
+	                       (const_int 4 [0x4]))
+	              (reg/f:SI 57)) */
+	  int sv;
+
+	  /* We need to set sv to output shift value.  */
+	  if (INTVAL (XEXP (op0, 1)) == 1)
+	    sv = 0;
+	  else if (INTVAL (XEXP (op0, 1)) == 2)
+	    sv = 1;
+	  else if (INTVAL (XEXP (op0, 1)) == 4)
+	    sv = 2;
+	  else
+	    gcc_unreachable ();
+
+	  fprintf (stream, "[%s + %s << %d]",
+			   reg_names[REGNO (op1)],
+			   reg_names[REGNO (XEXP (op0, 0))],
+			   sv);
+	}
+      else
+	{
+	  /* The control flow is not supposed to be here.  */
+	  debug_rtx (x);
+	  gcc_unreachable ();
+	}
+
+      break;
+
+    case POST_MODIFY:
+      /* (post_modify (regA) (plus (regA) (regB)))
+         (post_modify (regA) (plus (regA) (const_int)))
+         We would like to extract
+         regA and regB (or const_int) from plus rtx.  */
+      op0 = XEXP (XEXP (x, 1), 0);
+      op1 = XEXP (XEXP (x, 1), 1);
+
+      /* Checking op0, forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REG_P (op0)
+	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+      /* Checking op1, forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REG_P (op1)
+	  && REGNO (op1) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+
+      if (REG_P (op0) && REG_P (op1))
+	{
+	  /* [Ra], Rb */
+	  fprintf (stream, "[%s], %s",
+			   reg_names[REGNO (op0)], reg_names[REGNO (op1)]);
+	}
+      else if (REG_P (op0) && CONST_INT_P (op1))
+	{
+	  /* [Ra], imm */
+	  fprintf (stream, "[%s], %d",
+			   reg_names[REGNO (op0)], (int)INTVAL (op1));
+	}
+      else
+	{
+	  /* The control flow is not supposed to be here.  */
+	  debug_rtx (x);
+	  gcc_unreachable ();
+	}
+
+      break;
+
+    case POST_INC:
+    case POST_DEC:
+      op0 = XEXP (x, 0);
+
+      /* Checking op0, forbid using static chain register ($r16)
+         on reduced-set registers configuration.  */
+      if (TARGET_REDUCED_REGS
+	  && REG_P (op0)
+	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
+	sorry ("a nested function is not supported for reduced registers");
+
+      if (REG_P (op0))
+	{
+	  /* "[Ra], 1/2/4" or "[Ra], -1/-2/-4"
+	     The 1/2/4 or -1/-2/-4 have been displayed in nds32.md.
+	     We only need to deal with register Ra.  */
+	  fprintf (stream, "[%s]", reg_names[REGNO (op0)]);
+	}
+      else
+	{
+	  /* The control flow is not supposed to be here.  */
+	  debug_rtx (x);
+	  gcc_unreachable ();
+	}
+
+      break;
+
+    default :
+      /* Generally, output_addr_const () is able to handle most cases.
+         We want to see what CODE could appear,
+         so we use gcc_unreachable() to stop it.  */
+      debug_rtx (x);
+      gcc_unreachable ();
+      break;
+    }
+}
+
+
+/* Defining target-specific uses of __attribute__.  */
+
+/* Add some checking after merging attributes.  */
+static tree
+nds32_merge_decl_attributes (tree olddecl, tree newdecl)
+{
+  tree combined_attrs;
+
+  /* Create combined attributes.  */
+  combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
+				     DECL_ATTRIBUTES (newdecl));
+
+  /* Since newdecl is acutally a duplicate of olddecl,
+     we can take olddecl for some operations.  */
+  if (TREE_CODE (olddecl) == FUNCTION_DECL)
+    {
+      /* Check isr-specific attributes conflict.  */
+      nds32_check_isr_attrs_conflict (olddecl, combined_attrs);
+    }
+
+  return combined_attrs;
+}
+
+/* Add some checking when inserting attributes.  */
+static void
+nds32_insert_attributes (tree decl, tree *attributes)
+{
+  /* For function declaration, we need to check isr-specific attributes:
+       1. Call nds32_check_isr_attrs_conflict() to check any conflict.
+       2. Check valid integer value for interrupt/exception.
+       3. Check valid integer value for reset.
+       4. Check valid function for nmi/warm.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      tree func_attrs;
+      tree intr, excp, reset;
+
+      /* Pick up function attributes.  */
+      func_attrs = *attributes;
+
+      /* 1. Call nds32_check_isr_attrs_conflict() to check any conflict.  */
+      nds32_check_isr_attrs_conflict (decl, func_attrs);
+
+      /* Now we are starting to check valid id value
+         for interrupt/exception/reset.
+         Note that we ONLY check its validity here.
+         To construct isr vector information, it is still performed
+         by nds32_construct_isr_vectors_information().  */
+      intr  = lookup_attribute ("interrupt", func_attrs);
+      excp  = lookup_attribute ("exception", func_attrs);
+      reset = lookup_attribute ("reset", func_attrs);
+
+      if (intr || excp)
+	{
+	  /* Deal with interrupt/exception.  */
+	  tree id_list;
+	  unsigned int lower_bound, upper_bound;
+
+	  /* The way to handle interrupt or exception is the same,
+	     we just need to take care of actual vector number.
+	     For interrupt(0..63), the actual vector number is (9..72).
+	     For exception(1..8), the actual vector number is (1..8).  */
+	  lower_bound = (intr) ? (0) : (1);
+	  upper_bound = (intr) ? (63) : (8);
+
+	  /* Prepare id list so that we can traverse id value.  */
+	  id_list = (intr) ? (TREE_VALUE (intr)) : (TREE_VALUE (excp));
+
+	  /* 2. Check valid integer value for interrupt/exception.  */
+	  while (id_list)
+	    {
+	      tree id;
+
+	      /* Pick up each vector id value.  */
+	      id = TREE_VALUE (id_list);
+	      /* Issue error if it is not a valid integer value.  */
+	      if (TREE_CODE (id) != INTEGER_CST
+		  || TREE_INT_CST_LOW (id) < lower_bound
+		  || TREE_INT_CST_LOW (id) > upper_bound)
+		error ("invalid id value for interrupt/exception attribute");
+
+	      /* Advance to next id.  */
+	      id_list = TREE_CHAIN (id_list);
+	    }
+	}
+      else if (reset)
+	{
+	  /* Deal with reset.  */
+	  tree id_list;
+	  tree id;
+	  tree nmi, warm;
+	  unsigned int lower_bound;
+	  unsigned int upper_bound;
+
+	  /* Prepare id_list and identify id value so that
+	     we can check if total number of vectors is valid.  */
+	  id_list = TREE_VALUE (reset);
+	  id = TREE_VALUE (id_list);
+
+	  /* The maximum numbers for user's interrupt is 64.  */
+	  lower_bound = 0;
+	  upper_bound = 64;
+
+	  /* 3. Check valid integer value for reset.  */
+	  if (TREE_CODE (id) != INTEGER_CST
+	      || TREE_INT_CST_LOW (id) < lower_bound
+	      || TREE_INT_CST_LOW (id) > upper_bound)
+	    error ("invalid id value for reset attribute");
+
+	  /* 4. Check valid function for nmi/warm.  */
+	  nmi  = lookup_attribute ("nmi", func_attrs);
+	  warm = lookup_attribute ("warm", func_attrs);
+
+	  if (nmi != NULL_TREE)
+	    {
+	      tree nmi_func_list;
+	      tree nmi_func;
+
+	      nmi_func_list = TREE_VALUE (nmi);
+	      nmi_func = TREE_VALUE (nmi_func_list);
+
+	      /* Issue error if it is not a valid nmi function.  */
+	      if (TREE_CODE (nmi_func) != IDENTIFIER_NODE)
+		error ("invalid nmi function for reset attribute");
+	    }
+
+	  if (warm != NULL_TREE)
+	    {
+	      tree warm_func_list;
+	      tree warm_func;
+
+	      warm_func_list = TREE_VALUE (warm);
+	      warm_func = TREE_VALUE (warm_func_list);
+
+	      /* Issue error if it is not a valid warm function.  */
+	      if (TREE_CODE (warm_func) != IDENTIFIER_NODE)
+		error ("invalid warm function for reset attribute");
+	    }
+	}
+      else
+	{
+	  /* No interrupt, exception, or reset attribute is set.  */
+	  return;
+	}
+    }
+}
+
+static bool
+nds32_option_pragma_parse (tree args ATTRIBUTE_UNUSED,
+			   tree pop_target ATTRIBUTE_UNUSED)
+{
+  /* Currently, we do not parse any pragma target by ourself,
+     so just simply return false.  */
+  return false;
+}
+
+static void
+nds32_option_override (void)
+{
+  /* After all the command options have been parsed,
+     we shall deal with some flags for changing compiler settings.  */
+
+  /* At first, we check if we have to strictly
+     set some flags based on ISA family.  */
+  if (TARGET_ISA_V2)
+    {
+      /* Under V2 ISA, we need to strictly disable TARGET_V3PUSH.  */
+      target_flags &= ~MASK_V3PUSH;
+    }
+  if (TARGET_ISA_V3)
+    {
+      /* Under V3 ISA, currently nothing should be strictly set.  */
+    }
+  if (TARGET_ISA_V3M)
+    {
+      /* Under V3M ISA, we need to strictly enable TARGET_REDUCED_REGS.  */
+      target_flags |= MASK_REDUCED_REGS;
+      /* Under V3M ISA, we need to strictly disable TARGET_PERF_EXT.  */
+      target_flags &= ~MASK_PERF_EXT;
+    }
+
+  /* See if we are using reduced-set registers:
+       $r0~$r5, $r6~$r10, $r15, $r28, $r29, $r30, $r31
+     If so, we must forbid using $r11~$r14, $r16~$r27.  */
+  if (TARGET_REDUCED_REGS)
+    {
+      int r;
+
+      /* Prevent register allocator from
+         choosing it as doing register allocation.  */
+      for (r = 11; r <= 14; r++)
+	fixed_regs[r] = call_used_regs[r] = 1;
+      for (r = 16; r <= 27; r++)
+	fixed_regs[r] = call_used_regs[r] = 1;
+    }
+
+  /* See if user explicitly would like to use fp-as-gp optimization.
+     If so, we must prevent $fp from being allocated
+     during register allocation.  */
+  if (TARGET_FORCE_FP_AS_GP)
+    fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1;
+
+  if (!TARGET_16_BIT)
+    {
+      /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH.  */
+      target_flags &= ~MASK_V3PUSH;
+    }
+
+  /* Currently, we don't support PIC code generation yet.  */
+  if (flag_pic)
+    sorry ("not support -fpic");
+}
+
+
+/* Miscellaneous Parameters.  */
+
+static void
+nds32_init_builtins (void)
+{
+  tree pointer_type_node  = build_pointer_type (integer_type_node);
+
+  tree void_ftype_void    = build_function_type (void_type_node,
+						 void_list_node);
+
+  tree void_ftype_pint    = build_function_type_list (void_type_node,
+						      pointer_type_node,
+						      NULL_TREE);
+
+  tree int_ftype_int      = build_function_type_list (integer_type_node,
+						      integer_type_node,
+						      NULL_TREE);
+
+  tree void_ftype_int_int = build_function_type_list (void_type_node,
+						      integer_type_node,
+						      integer_type_node,
+						      NULL_TREE);
+
+  /* Cache.  */
+  add_builtin_function ("__builtin_nds32_isync",  void_ftype_pint,
+			NDS32_BUILTIN_ISYNC,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_nds32_isb",  void_ftype_void,
+			NDS32_BUILTIN_ISB,
+			BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* Register Transfer.  */
+  add_builtin_function ("__builtin_nds32_mfsr",  int_ftype_int,
+			NDS32_BUILTIN_MFSR,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_nds32_mfusr", int_ftype_int,
+			NDS32_BUILTIN_MFUSR,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_nds32_mtsr",  void_ftype_int_int,
+			NDS32_BUILTIN_MTSR,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_nds32_mtusr", void_ftype_int_int,
+			NDS32_BUILTIN_MTUSR,
+			BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* Interrupt.  */
+  add_builtin_function ("__builtin_nds32_setgie_en",  void_ftype_void,
+			NDS32_BUILTIN_SETGIE_EN,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_nds32_setgie_dis", void_ftype_void,
+			NDS32_BUILTIN_SETGIE_DIS,
+			BUILT_IN_MD, NULL, NULL_TREE);
+}
+
+static rtx
+nds32_expand_builtin (tree exp,
+		      rtx target,
+		      rtx subtarget ATTRIBUTE_UNUSED,
+		      enum machine_mode mode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    /* Cache.  */
+    case NDS32_BUILTIN_ISYNC:
+      return nds32_expand_builtin_null_ftype_reg
+	     (CODE_FOR_unspec_volatile_isync, exp, target);
+    case NDS32_BUILTIN_ISB:
+      /* Since there are no result and operands for isb instruciton,
+         we can simply emit this rtx.  */
+      emit_insn (gen_unspec_volatile_isb ());
+      return target;
+
+    /* Register Transfer.  */
+    case NDS32_BUILTIN_MFSR:
+      return nds32_expand_builtin_reg_ftype_imm
+	     (CODE_FOR_unspec_volatile_mfsr, exp, target);
+    case NDS32_BUILTIN_MFUSR:
+      return nds32_expand_builtin_reg_ftype_imm
+	     (CODE_FOR_unspec_volatile_mfusr, exp, target);
+    case NDS32_BUILTIN_MTSR:
+      return nds32_expand_builtin_null_ftype_reg_imm
+	     (CODE_FOR_unspec_volatile_mtsr, exp, target);
+    case NDS32_BUILTIN_MTUSR:
+      return nds32_expand_builtin_null_ftype_reg_imm
+	     (CODE_FOR_unspec_volatile_mtusr, exp, target);
+
+    /* Interrupt.  */
+    case NDS32_BUILTIN_SETGIE_EN:
+      /* Since there are no result and operands for setgie.e instruciton,
+         we can simply emit this rtx.  */
+      emit_insn (gen_unspec_volatile_setgie_en ());
+      return target;
+    case NDS32_BUILTIN_SETGIE_DIS:
+      /* Since there are no result and operands for setgie.d instruciton,
+         we can simply emit this rtx.  */
+      emit_insn (gen_unspec_volatile_setgie_dis ());
+      return target;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return NULL_RTX;
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 4: Implemet extern function definitions,
+           the prototype is in nds32-protos.h.  */
+
+/* Defining Data Structures for Per-function Information.  */
+
+void
+nds32_init_expanders (void)
+{
+  /* Arrange to initialize and mark the machine per-function status.  */
+  init_machine_status = nds32_init_machine_status;
+}
+
+
+/* Register Usage.  */
+
+/* -- How Values Fit in Registers.  */
+
+int
+nds32_hard_regno_nregs (int regno ATTRIBUTE_UNUSED,
+			enum machine_mode mode)
+{
+  return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+}
+
+int
+nds32_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* Restrict double-word quantities to even register pairs.  */
+  if (HARD_REGNO_NREGS (regno, mode) == 1
+      || !((regno) & 1))
+    return 1;
+
+  return 0;
+}
+
+
+/* Register Classes.  */
+
+enum reg_class
+nds32_regno_reg_class (int regno)
+{
+  /* Refer to nds32.h for more register class details.  */
+
+  if (regno >= 0 && regno <= 7)
+    return LOW_REGS;
+  else if (regno >= 8 && regno <= 11)
+    return MIDDLE_REGS;
+  else if (regno >= 12 && regno <= 14)
+    return HIGH_REGS;
+  else if (regno == 15)
+    return R15_TA_REG;
+  else if (regno >= 16 && regno <= 19)
+    return MIDDLE_REGS;
+  else if (regno >= 20 && regno <= 31)
+    return HIGH_REGS;
+  else if (regno == 32 || regno == 33)
+    return FRAME_REGS;
+  else
+    return NO_REGS;
+}
+
+
+/* Stack Layout and Calling Conventions.  */
+
+/* -- Basic Stack Layout.  */
+
+rtx
+nds32_return_addr_rtx (int count,
+		       rtx frameaddr ATTRIBUTE_UNUSED)
+{
+  /* There is no way to determine the return address
+     if frameaddr is the frame that has 'count' steps
+     up from current frame.  */
+  if (count != 0)
+    return NULL_RTX;
+
+  /* If count == 0, it means we are at current frame,
+     the return address is $r30 ($lp).  */
+  return get_hard_reg_initial_val (Pmode, LP_REGNUM);
+}
+
+/* -- Eliminating Frame Pointer and Arg Pointer.  */
+
+HOST_WIDE_INT
+nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg)
+{
+  HOST_WIDE_INT offset;
+
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
+
+  /* Remember to consider
+     cfun->machine->callee_saved_area_padding_bytes
+     when calculating offset.  */
+  if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM)
+    {
+      offset = (cfun->machine->fp_size
+	        + cfun->machine->gp_size
+		+ cfun->machine->lp_size
+		+ cfun->machine->callee_saved_regs_size
+		+ cfun->machine->callee_saved_area_padding_bytes
+		+ cfun->machine->local_size
+		+ cfun->machine->out_args_size);
+    }
+  else if (from_reg == ARG_POINTER_REGNUM
+	   && to_reg == HARD_FRAME_POINTER_REGNUM)
+    {
+      offset = 0;
+    }
+  else if (from_reg == FRAME_POINTER_REGNUM
+	   && to_reg == STACK_POINTER_REGNUM)
+    {
+      offset = (cfun->machine->local_size + cfun->machine->out_args_size);
+    }
+  else if (from_reg == FRAME_POINTER_REGNUM
+	   && to_reg == HARD_FRAME_POINTER_REGNUM)
+    {
+      offset = (-1) * (cfun->machine->fp_size
+		       + cfun->machine->gp_size
+		       + cfun->machine->lp_size
+		       + cfun->machine->callee_saved_regs_size
+		       + cfun->machine->callee_saved_area_padding_bytes);
+    }
+  else
+    {
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+/* -- Passing Arguments in Registers.  */
+
+void
+nds32_init_cumulative_args (CUMULATIVE_ARGS *cum,
+			    tree fntype ATTRIBUTE_UNUSED,
+			    rtx libname ATTRIBUTE_UNUSED,
+			    tree fndecl ATTRIBUTE_UNUSED,
+			    int n_named_args ATTRIBUTE_UNUSED)
+{
+  /* Initial available registers
+     (in offset, corresponding to NDS32_GPR_ARG_FIRST_REGNUM)
+     for passing arguments.  */
+  cum->reg_offset = 0;
+}
+
+/* -- Function Entry and Exit.  */
+
+/* Function for normal multiple push prologue.  */
+void
+nds32_expand_prologue (void)
+{
+  int fp_adjust;
+  int sp_adjust;
+  int en4_const;
+
+  rtx Rb, Re;
+  rtx push_insn;
+  rtx fp_adjust_insn, sp_adjust_insn;
+
+  /* Before computing everything for stack frame size,
+     we check if it is still worth to use fp_as_gp optimization.
+     If it is, the 'df_regs_ever_live_p (FP_REGNUM)' will be set
+     so that $fp will be saved on stack.  */
+  cfun->machine->fp_as_gp_p = nds32_fp_as_gp_check_available ();
+
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
+
+  /* If the function is 'naked',
+     we do not have to generate prologue code fragment.  */
+  if (cfun->machine->naked_p)
+    return;
+
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
+  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
+
+  /* push_insn = gen_stack_push_multiple(first_regno, last_regno),
+     the pattern 'stack_push_multiple' is implemented in nds32.md.
+     For En4 field, we have to calculate its constant value.
+     Refer to Andes ISA for more information.  */
+  en4_const = 0;
+  if (cfun->machine->fp_size)
+    en4_const += 8;
+  if (cfun->machine->gp_size)
+    en4_const += 4;
+  if (cfun->machine->lp_size)
+    en4_const += 2;
+
+  /* If $fp, $gp, $lp, and all callee-save registers are NOT required
+     to be saved, we don't have to create multiple push instruction.
+     Otherwise, a multiple push instruction is needed.  */
+  if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0))
+    {
+      /* Create multiple push instruction rtx.  */
+      push_insn = nds32_gen_stack_push_multiple (Rb, Re, GEN_INT (en4_const));
+      /* Emit rtx into instructions list and receive INSN rtx form.  */
+      push_insn = emit_insn (push_insn);
+
+      /* The insn rtx 'push_insn' will change frame layout.
+         We need to use RTX_FRAME_RELATED_P so that GCC is able to
+         generate CFI (Call Frame Information) stuff.  */
+      RTX_FRAME_RELATED_P (push_insn) = 1;
+    }
+
+  /* Check frame_pointer_needed to see
+     if we shall emit fp adjustment instruction.  */
+  if (frame_pointer_needed)
+    {
+      /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size)
+                          + (4 * callee-saved-registers)
+         Note: No need to adjust
+               cfun->machine->callee_saved_area_padding_bytes,
+               because, at this point, stack pointer is just
+               at the position after push instruction.  */
+      fp_adjust = cfun->machine->fp_size
+		  + cfun->machine->gp_size
+		  + cfun->machine->lp_size
+		  + cfun->machine->callee_saved_regs_size;
+      fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
+				   stack_pointer_rtx,
+				   GEN_INT (fp_adjust));
+      /* Emit rtx into instructions list and receive INSN rtx form.  */
+      fp_adjust_insn = emit_insn (fp_adjust_insn);
+    }
+
+  /* Adjust $sp = $sp - local_size - out_args_size
+                      - callee_saved_area_padding_bytes.  */
+  sp_adjust = cfun->machine->local_size
+	      + cfun->machine->out_args_size
+	      + cfun->machine->callee_saved_area_padding_bytes;
+  /* sp_adjust value may be out of range of the addi instruction,
+     create alternative add behavior with TA_REGNUM if necessary,
+     using NEGATIVE value to tell that we are decreasing address.  */
+  sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust);
+  if (sp_adjust)
+    {
+      /* Generate sp adjustment instruction if and only if sp_adjust != 0.  */
+      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   GEN_INT (-1 * sp_adjust));
+      /* Emit rtx into instructions list and receive INSN rtx form.  */
+      sp_adjust_insn = emit_insn (sp_adjust_insn);
+
+      /* The insn rtx 'sp_adjust_insn' will change frame layout.
+         We need to use RTX_FRAME_RELATED_P so that GCC is able to
+         generate CFI (Call Frame Information) stuff.  */
+      RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
+    }
+
+  /* Prevent the instruction scheduler from
+     moving instructions across the boundary.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Function for normal multiple pop epilogue.  */
+void
+nds32_expand_epilogue (void)
+{
+  int sp_adjust;
+  int en4_const;
+
+  rtx Rb, Re;
+  rtx pop_insn;
+  rtx sp_adjust_insn;
+
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
+
+  /* Prevent the instruction scheduler from
+     moving instructions across the boundary.  */
+  emit_insn (gen_blockage ());
+
+  /* If the function is 'naked', we do not have to generate
+     epilogue code fragment BUT 'ret' instruction.  */
+  if (cfun->machine->naked_p)
+    {
+      /* Generate return instruction by using
+         unspec_volatile_func_return pattern.
+         Make sure this instruction is after gen_blockage().
+         NOTE that $lp will become 'live'
+         after this instruction has been emitted.  */
+      emit_insn (gen_unspec_volatile_func_return ());
+      return;
+    }
+
+  if (frame_pointer_needed)
+    {
+      /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
+                          - (4 * callee-saved-registers)
+         Note: No need to adjust
+               cfun->machine->callee_saved_area_padding_bytes,
+               because we want to adjust stack pointer
+               to the position for pop instruction.  */
+      sp_adjust = cfun->machine->fp_size
+		  + cfun->machine->gp_size
+		  + cfun->machine->lp_size
+		  + cfun->machine->callee_saved_regs_size;
+      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+				   hard_frame_pointer_rtx,
+				   GEN_INT (-1 * sp_adjust));
+      /* Emit rtx into instructions list and receive INSN rtx form.  */
+      sp_adjust_insn = emit_insn (sp_adjust_insn);
+    }
+  else
+    {
+      /* If frame pointer is NOT needed,
+         we cannot calculate the sp adjustment from frame pointer.
+         Instead, we calculate the adjustment by local_size,
+         out_args_size, and callee_saved_area_padding_bytes.
+         Notice that such sp adjustment value may be out of range,
+         so we have to deal with it as well.  */
+
+      /* Adjust $sp = $sp + local_size + out_args_size
+                          + callee_saved_area_padding_bytes.  */
+      sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_padding_bytes;
+      /* sp_adjust value may be out of range of the addi instruction,
+         create alternative add behavior with TA_REGNUM if necessary,
+         using POSITIVE value to tell that we are increasing address.  */
+      sp_adjust = nds32_force_addi_stack_int (sp_adjust);
+      if (sp_adjust)
+	{
+	  /* Generate sp adjustment instruction
+	     if and only if sp_adjust != 0.  */
+	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (sp_adjust));
+	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+	  sp_adjust_insn = emit_insn (sp_adjust_insn);
+	}
+    }
+
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
+  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
+
+  /* pop_insn = gen_stack_pop_multiple(first_regno, last_regno),
+     the pattern 'stack_pop_multiple' is implementad in nds32.md.
+     For En4 field, we have to calculate its constant value.
+     Refer to Andes ISA for more information.  */
+  en4_const = 0;
+  if (cfun->machine->fp_size)
+    en4_const += 8;
+  if (cfun->machine->gp_size)
+    en4_const += 4;
+  if (cfun->machine->lp_size)
+    en4_const += 2;
+
+  /* If $fp, $gp, $lp, and all callee-save registers are NOT required
+     to be saved, we don't have to create multiple pop instruction.
+     Otherwise, a multiple pop instruction is needed.  */
+  if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0))
+    {
+      /* Create multiple pop instruction rtx.  */
+      pop_insn = nds32_gen_stack_pop_multiple (Rb, Re, GEN_INT (en4_const));
+      /* Emit pop instruction.  */
+      emit_insn (pop_insn);
+    }
+
+  /* Generate return instruction by using
+     unspec_volatile_func_return pattern.  */
+  emit_insn (gen_unspec_volatile_func_return ());
+}
+
+/* Function for v3push prologue.  */
+void
+nds32_expand_prologue_v3push (void)
+{
+  int fp_adjust;
+  int sp_adjust;
+
+  rtx Rb, Re;
+  rtx push_insn;
+  rtx fp_adjust_insn, sp_adjust_insn;
+
+  /* Before computing everything for stack frame size,
+     we check if it is still worth to use fp_as_gp optimization.
+     If it is, the 'df_regs_ever_live_p (FP_REGNUM)' will be set
+     so that $fp will be saved on stack.  */
+  cfun->machine->fp_as_gp_p = nds32_fp_as_gp_check_available ();
+
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
+
+  /* If the function is 'naked',
+     we do not have to generate prologue code fragment.  */
+  if (cfun->machine->naked_p)
+    return;
+
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
+  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
+
+  /* Calculate sp_adjust first to test if 'push25 Re,imm8u' is available,
+     where imm8u has to be 8-byte alignment.  */
+  sp_adjust = cfun->machine->local_size
+	      + cfun->machine->out_args_size
+	      + cfun->machine->callee_saved_area_padding_bytes;
+
+  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
+    {
+      /* We can use 'push25 Re,imm8u'.  */
+
+      /* push_insn = gen_stack_v3push(last_regno, sp_adjust),
+         the pattern 'stack_v3push' is implemented in nds32.md.
+         The (const_int 14) means v3push always push { $fp $gp $lp }.  */
+      push_insn = nds32_gen_stack_v3push (Rb, Re,
+					  GEN_INT (14), GEN_INT (sp_adjust));
+      /* emit rtx into instructions list and receive INSN rtx form */
+      push_insn = emit_insn (push_insn);
+
+      /* The insn rtx 'push_insn' will change frame layout.
+         We need to use RTX_FRAME_RELATED_P so that GCC is able to
+         generate CFI (Call Frame Information) stuff.  */
+      RTX_FRAME_RELATED_P (push_insn) = 1;
+
+      /* Check frame_pointer_needed to see
+         if we shall emit fp adjustment instruction.  */
+      if (frame_pointer_needed)
+	{
+	  /* adjust $fp = $sp   + 4         ($fp size)
+	                        + 4         ($gp size)
+	                        + 4         ($lp size)
+	                        + (4 * n)   (callee-saved registers)
+	                        + sp_adjust ('push25 Re,imm8u')
+	     Note: Since we use 'push25 Re,imm8u',
+	           the position of stack pointer is further
+	           changed after push instruction.
+	           Hence, we need to take sp_adjust value
+	           into consideration.  */
+	  fp_adjust = cfun->machine->fp_size
+		      + cfun->machine->gp_size
+		      + cfun->machine->lp_size
+		      + cfun->machine->callee_saved_regs_size
+		      + sp_adjust;
+	  fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (fp_adjust));
+	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+	  fp_adjust_insn = emit_insn (fp_adjust_insn);
+	}
+    }
+  else
+    {
+      /* We have to use 'push25 Re,0' and
+         expand one more instruction to adjust $sp later.  */
+
+      /* push_insn = gen_stack_v3push(last_regno, sp_adjust),
+         the pattern 'stack_v3push' is implemented in nds32.md.
+         The (const_int 14) means v3push always push { $fp $gp $lp }.  */
+      push_insn = nds32_gen_stack_v3push (Rb, Re,
+					  GEN_INT (14), GEN_INT (0));
+      /* Emit rtx into instructions list and receive INSN rtx form.  */
+      push_insn = emit_insn (push_insn);
+
+      /* The insn rtx 'push_insn' will change frame layout.
+         We need to use RTX_FRAME_RELATED_P so that GCC is able to
+         generate CFI (Call Frame Information) stuff.  */
+      RTX_FRAME_RELATED_P (push_insn) = 1;
+
+      /* Check frame_pointer_needed to see
+         if we shall emit fp adjustment instruction.  */
+      if (frame_pointer_needed)
+	{
+	  /* adjust $fp = $sp + 4        ($fp size)
+	                      + 4        ($gp size)
+	                      + 4        ($lp size)
+	                      + (4 * n)  (callee-saved registers)
+	     Note: Since we use 'push25 Re,0',
+	           the stack pointer is just at the position
+	           after push instruction.
+	           No need to take sp_adjust into consideration.  */
+	  fp_adjust = cfun->machine->fp_size
+		      + cfun->machine->gp_size
+		      + cfun->machine->lp_size
+		      + cfun->machine->callee_saved_regs_size;
+	  fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (fp_adjust));
+	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+	  fp_adjust_insn = emit_insn (fp_adjust_insn);
+	}
+
+      /* Because we use 'push25 Re,0',
+         we need to expand one more instruction to adjust $sp.
+         However, sp_adjust value may be out of range of the addi instruction,
+         create alternative add behavior with TA_REGNUM if necessary,
+         using NEGATIVE value to tell that we are decreasing address.  */
+      sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust);
+      if (sp_adjust)
+	{
+	  /* Generate sp adjustment instruction
+	     if and only if sp_adjust != 0.  */
+	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (-1 * sp_adjust));
+	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+	  sp_adjust_insn = emit_insn (sp_adjust_insn);
+
+	  /* The insn rtx 'sp_adjust_insn' will change frame layout.
+	     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+	     generate CFI (Call Frame Information) stuff.  */
+	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
+	}
+    }
+
+  /* Prevent the instruction scheduler from
+     moving instructions across the boundary.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Function for v3pop epilogue.  */
+void
+nds32_expand_epilogue_v3pop (void)
+{
+  int sp_adjust;
+
+  rtx Rb, Re;
+  rtx pop_insn;
+  rtx sp_adjust_insn;
+
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
+
+  /* Prevent the instruction scheduler from
+     moving instructions across the boundary.  */
+  emit_insn (gen_blockage ());
+
+  /* If the function is 'naked', we do not have to generate
+     epilogue code fragment BUT 'ret' instruction.  */
+  if (cfun->machine->naked_p)
+    {
+      /* Generate return instruction by using
+         unspec_volatile_func_return pattern.
+         Make sure this instruction is after gen_blockage().
+         NOTE that $lp will become 'live'
+         after this instruction has been emitted.  */
+      emit_insn (gen_unspec_volatile_func_return ());
+      return;
+    }
+
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
+  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
+
+  /* Calculate sp_adjust first to test if 'pop25 Re,imm8u' is available,
+     where imm8u has to be 8-byte alignment.  */
+  sp_adjust = cfun->machine->local_size
+	      + cfun->machine->out_args_size
+	      + cfun->machine->callee_saved_area_padding_bytes;
+
+  /* We have to consider alloca issue as well.
+     If the function does call alloca(), the stack pointer is not fixed.
+     In that case, we cannot use 'pop25 Re,imm8u' directly.
+     We have to caculate stack pointer from frame pointer
+     and then use 'pop25 Re,0'.
+     Of course, the frame_pointer_needed should be nonzero
+     if the function calls alloca().  */
+  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+      && !cfun->calls_alloca)
+    {
+      /* We can use 'pop25 Re,imm8u'.  */
+
+      /* pop_insn = gen_stack_v3pop(last_regno, sp_adjust),
+         the pattern 'stack_v3pop' is implementad in nds32.md.
+         The (const_int 14) means v3pop always pop { $fp $gp $lp }.  */
+      pop_insn = nds32_gen_stack_v3pop (Rb, Re,
+					GEN_INT (14), GEN_INT (sp_adjust));
+
+      /* Emit pop instruction.  */
+      emit_insn (pop_insn);
+    }
+  else
+    {
+      /* We have to use 'pop25 Re,0', and prior to it,
+         we must expand one more instruction to adjust $sp.  */
+
+      if (frame_pointer_needed)
+	{
+	  /* adjust $sp = $fp - 4        ($fp size)
+	                      - 4        ($gp size)
+	                      - 4        ($lp size)
+	                      - (4 * n)  (callee-saved registers)
+	     Note: No need to adjust
+	           cfun->machine->callee_saved_area_padding_bytes,
+	           because we want to adjust stack pointer
+	           to the position for pop instruction.  */
+	  sp_adjust = cfun->machine->fp_size
+		      + cfun->machine->gp_size
+		      + cfun->machine->lp_size
+		      + cfun->machine->callee_saved_regs_size;
+	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+				       hard_frame_pointer_rtx,
+				       GEN_INT (-1 * sp_adjust));
+	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+	  sp_adjust_insn = emit_insn (sp_adjust_insn);
+	}
+      else
+	{
+	  /* If frame pointer is NOT needed,
+	     we cannot calculate the sp adjustment from frame pointer.
+	     Instead, we calculate the adjustment by local_size,
+	     out_args_size, and callee_saved_area_padding_bytes.
+	     Notice that such sp adjustment value may be out of range,
+	     so we have to deal with it as well.  */
+
+	  /* Adjust $sp = $sp + local_size + out_args_size
+			      + callee_saved_area_padding_bytes.  */
+	  sp_adjust = cfun->machine->local_size
+		      + cfun->machine->out_args_size
+		      + cfun->machine->callee_saved_area_padding_bytes;
+	  /* sp_adjust value may be out of range of the addi instruction,
+	     create alternative add behavior with TA_REGNUM if necessary,
+	     using POSITIVE value to tell that we are increasing address.  */
+	  sp_adjust = nds32_force_addi_stack_int (sp_adjust);
+	  if (sp_adjust)
+	    {
+	      /* Generate sp adjustment instruction
+	         if and only if sp_adjust != 0.  */
+	      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+					   stack_pointer_rtx,
+					   GEN_INT (sp_adjust));
+	      /* Emit rtx into instructions list and receive INSN rtx form.  */
+	      sp_adjust_insn = emit_insn (sp_adjust_insn);
+	    }
+	}
+
+      /* pop_insn = gen_stack_v3pop(last_regno, sp_adjust),
+         the pattern 'stack_v3pop' is implementad in nds32.md.  */
+      /* The (const_int 14) means v3pop always pop { $fp $gp $lp }.  */
+      pop_insn = nds32_gen_stack_v3pop (Rb, Re,
+					GEN_INT (14), GEN_INT (0));
+
+      /* Emit pop instruction.  */
+      emit_insn (pop_insn);
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Function to test 333-form for load/store instructions.
+   This is auxiliary extern function for auxiliary macro in nds32.h.
+   Because it is a little complicated, we use function instead of macro.  */
+bool
+nds32_ls_333_p (rtx rt, rtx ra, rtx imm, enum machine_mode mode)
+{
+  if (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS
+      && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS)
+    {
+      if (GET_MODE_SIZE (mode) == 4)
+	return satisfies_constraint_Iu05 (imm);
+
+      if (GET_MODE_SIZE (mode) == 2)
+	return satisfies_constraint_Iu04 (imm);
+
+      if (GET_MODE_SIZE (mode) == 1)
+	return satisfies_constraint_Iu03 (imm);
+    }
+
+  return false;
+}
+
+
+/* Functions to expand load_multiple and store_multiple.
+   They are auxiliary extern functions to help create rtx template.
+   Check nds32-multiple.md file for the patterns.  */
+rtx
+nds32_expand_load_multiple (int base_regno, int count,
+			    rtx base_addr, rtx basemem)
+{
+  int par_index;
+  int offset;
+  rtx result;
+  rtx new_addr, mem, reg;
+
+  /* Create the pattern that is presented in nds32-multiple.md.  */
+
+  result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  for (par_index = 0; par_index < count; par_index++)
+    {
+      offset   = par_index * 4;
+      /* 4-byte for loading data to each register.  */
+      new_addr = plus_constant (Pmode, base_addr, offset);
+      mem      = adjust_automodify_address_nv (basemem, SImode,
+					       new_addr, offset);
+      reg      = gen_rtx_REG (SImode, base_regno + par_index);
+
+      XVECEXP (result, 0, par_index) = gen_rtx_SET (VOIDmode, reg, mem);
+    }
+
+  return result;
+}
+
+rtx
+nds32_expand_store_multiple (int base_regno, int count,
+			     rtx base_addr, rtx basemem)
+{
+  int par_index;
+  int offset;
+  rtx result;
+  rtx new_addr, mem, reg;
+
+  /* Create the pattern that is presented in nds32-multiple.md.  */
+
+  result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  for (par_index = 0; par_index < count; par_index++)
+    {
+      offset   = par_index * 4;
+      /* 4-byte for storing data to memory.  */
+      new_addr = plus_constant (Pmode, base_addr, offset);
+      mem      = adjust_automodify_address_nv (basemem, SImode,
+					       new_addr, offset);
+      reg      = gen_rtx_REG (SImode, base_regno + par_index);
+
+      XVECEXP (result, 0, par_index) = gen_rtx_SET (VOIDmode, mem, reg);
+    }
+
+  return result;
+}
+
+/* Function to move block memory content by
+   using load_multiple and store_multiple.
+   This is auxiliary extern function to help create rtx template.
+   Check nds32-multiple.md file for the patterns.  */
+int
+nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
+{
+  HOST_WIDE_INT in_words, out_words;
+  rtx dst_base_reg, src_base_reg;
+  int maximum_bytes;
+
+  /* Because reduced-set regsiters has few registers
+     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
+      cannot be used for register allocation),
+     using 8 registers (32 bytes) for moving memory block
+     may easily consume all of them.
+     It makes register allocation/spilling hard to work.
+     So we only allow maximum=4 registers (16 bytes) for
+     moving memory block under reduced-set registers.  */
+  if (TARGET_REDUCED_REGS)
+    maximum_bytes = 16;
+  else
+    maximum_bytes = 32;
+
+  /* 1. Total_bytes is integer for sure.
+     2. Alignment is integer for sure.
+     3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes.
+     4. Requires (n * 4) block size.
+     5. Requires 4-byte alignment.  */
+  if (GET_CODE (total_bytes) != CONST_INT
+      || GET_CODE (alignment) != CONST_INT
+      || INTVAL (total_bytes) > maximum_bytes
+      || INTVAL (total_bytes) & 3
+      || INTVAL (alignment) & 3)
+    return 0;
+
+  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
+  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
+
+  out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD;
+
+  emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem));
+  emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem));
+
+  /* Successfully create patterns, return 1.  */
+  return 1;
+}
+
+/* Function to check whether the OP is a valid load/store operation.
+   This is a helper function for the predicates:
+   'nds32_load_multiple_operation' and 'nds32_store_multiple_operation'
+   in predicates.md file.
+
+   The OP is supposed to be a parallel rtx.
+   For each element within this parallel rtx:
+     (set (reg) (mem addr)) is the form for load operation.
+     (set (mem addr) (reg)) is the form for store operation.
+   We have to extract reg and mem of every element and
+   check if the information is valid for multiple load/store operation.  */
+bool
+nds32_valid_multiple_load_store (rtx op, bool load_p)
+{
+  int count;
+  int first_elt_regno;
+  rtx elt;
+
+  /* Get the counts of elements in the parallel rtx.  */
+  count = XVECLEN (op, 0);
+  /* Pick up the first element.  */
+  elt = XVECEXP (op, 0, 0);
+
+  /* Perform some quick check for the first element in the parallel rtx.  */
+  if (GET_CODE (elt) != SET
+      || count <= 1
+      || count > 8)
+    return false;
+
+  /* Pick up regno of first element for further detail checking.
+     Note that the form is different between load and store operation.  */
+  if (load_p)
+    {
+      if (GET_CODE (SET_DEST (elt)) != REG
+	  || GET_CODE (SET_SRC (elt)) != MEM)
+	return false;
+
+      first_elt_regno = REGNO (SET_DEST (elt));
+    }
+  else
+    {
+      if (GET_CODE (SET_SRC (elt)) != REG
+	  || GET_CODE (SET_DEST (elt)) != MEM)
+	return false;
+
+      first_elt_regno = REGNO (SET_SRC (elt));
+    }
+
+  /* Perform detail check for each element.
+     Refer to nds32-multiple.md for more information
+     about following checking.
+     The starting element of parallel rtx is index 0.  */
+  if (!nds32_consecutive_registers_load_store_p (op, load_p, 0,
+						 first_elt_regno,
+						 count))
+    return false;
+
+  /* Pass all test, this is a valid rtx.  */
+  return true;
+}
+
+/* Function to check whether the OP is a valid stack push/pop operation.
+   For a valid stack operation, it must satisfy following conditions:
+     1. Consecutive registers push/pop operations.
+     2. Valid $fp/$gp/$lp push/pop operations.
+     3. The last element must be stack adjustment rtx.
+   See the prologue/epilogue implementation for details.  */
+bool
+nds32_valid_stack_push_pop (rtx op, bool push_p)
+{
+  int index;
+  int total_count;
+  int rest_count;
+  int first_regno;
+  rtx elt;
+  rtx elt_reg;
+  rtx elt_mem;
+  rtx elt_plus;
+
+  /* Get the counts of elements in the parallel rtx.  */
+  total_count = XVECLEN (op, 0);
+
+  /* Perform some quick check for that every element should be 'set'.  */
+  for (index = 0; index < total_count; index++)
+    {
+      elt = XVECEXP (op, 0, index);
+      if (GET_CODE (elt) != SET)
+        return false;
+    }
+
+  /* For push operation, the parallel rtx looks like:
+     (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+                     (reg:SI Rb))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+                     (reg:SI Rb+1))
+                ...
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+                     (reg:SI Re))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+                     (reg:SI FP_REGNUM))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+                     (reg:SI GP_REGNUM))
+                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+                     (reg:SI LP_REGNUM))
+                (set (reg:SI SP_REGNUM)
+                     (plus (reg:SI SP_REGNUM) (const_int -32)))])
+
+     For pop operation, the parallel rtx looks like:
+     (parallel [(set (reg:SI Rb)
+                     (mem (reg:SI SP_REGNUM)))
+                (set (reg:SI Rb+1)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+                ...
+                (set (reg:SI Re)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+                (set (reg:SI FP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+                (set (reg:SI GP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+                (set (reg:SI LP_REGNUM)
+                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+                (set (reg:SI SP_REGNUM)
+                     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+
+  /* 1. Consecutive registers push/pop operations.
+        We need to calculate how many registers should be consecutive.
+        The $sp adjustment rtx, $fp push rtx, $gp push rtx,
+        and $lp push rtx are excluded.  */
+
+  /* Exclude last $sp adjustment rtx.  */
+  rest_count = total_count - 1;
+  /* Exclude $fp, $gp, and $lp if they are in the parallel rtx.  */
+  if (cfun->machine->fp_size)
+    rest_count--;
+  if (cfun->machine->gp_size)
+    rest_count--;
+  if (cfun->machine->lp_size)
+    rest_count--;
+
+  if (rest_count > 0)
+    {
+      elt = XVECEXP (op, 0, 0);
+      /* Pick up register element.  */
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      first_regno = REGNO (elt_reg);
+
+      /* The 'push' operation is a kind of store operation.
+         The 'pop' operation is a kind of load operation.
+         Pass corresponding false/true as second argument (bool load_p).
+         The par_index is supposed to start with index 0.  */
+      if (!nds32_consecutive_registers_load_store_p (op,
+						     !push_p ? true : false,
+						     0,
+						     first_regno,
+						     rest_count))
+        return false;
+    }
+
+  /* 2. Valid $fp/$gp/$lp push/pop operations.
+        Remember to set start index for checking them.  */
+
+  /* The rest_count is the start index for checking $fp/$gp/$lp.  */
+  index = rest_count;
+  /* If index < 0, this parallel rtx is definitely
+     not a valid stack push/pop operation.  */
+  if (index < 0)
+    return false;
+
+  /* Check $fp/$gp/$lp one by one.
+     We use 'push_p' to pick up reg rtx and mem rtx.  */
+  if (cfun->machine->fp_size)
+    {
+      elt = XVECEXP (op, 0, index);
+      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      index++;
+
+      if (GET_CODE (elt_mem) != MEM
+          || GET_CODE (elt_reg) != REG
+          || REGNO (elt_reg) != FP_REGNUM)
+        return false;
+    }
+  if (cfun->machine->gp_size)
+    {
+      elt = XVECEXP (op, 0, index);
+      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      index++;
+
+      if (GET_CODE (elt_mem) != MEM
+          || GET_CODE (elt_reg) != REG
+          || REGNO (elt_reg) != GP_REGNUM)
+        return false;
+    }
+  if (cfun->machine->lp_size)
+    {
+      elt = XVECEXP (op, 0, index);
+      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      index++;
+
+      if (GET_CODE (elt_mem) != MEM
+          || GET_CODE (elt_reg) != REG
+          || REGNO (elt_reg) != LP_REGNUM)
+        return false;
+    }
+
+  /* 3. The last element must be stack adjustment rtx.
+        Its form of rtx should be:
+          (set (reg:SI SP_REGNUM)
+               (plus (reg:SI SP_REGNUM) (const_int X)))
+        The X could be positive or negative value.  */
+
+  /* Pick up the last element.  */
+  elt = XVECEXP (op, 0, total_count - 1);
+
+  /* Extract its destination and source rtx.  */
+  elt_reg  = SET_DEST (elt);
+  elt_plus = SET_SRC (elt);
+
+  /* Check this is (set (stack_reg) (plus stack_reg const)) pattern.  */
+  if (GET_CODE (elt_reg) != REG
+      || GET_CODE (elt_plus) != PLUS
+      || REGNO (elt_reg) != SP_REGNUM)
+    return false;
+
+  /* Pass all test, this is a valid rtx.  */
+  return true;
+}
+
+/* Computing the Length of an Insn.
+   Modifies the length assigned to instruction INSN.
+   LEN is the initially computed length of the insn.  */
+int
+nds32_adjust_insn_length (rtx insn, int length)
+{
+  rtx src, dst;
+
+  switch (recog_memoized (insn))
+    {
+    case CODE_FOR_move_df:
+    case CODE_FOR_move_di:
+      /* Adjust length of movd44 to 2.  */
+      src = XEXP (PATTERN (insn), 1);
+      dst = XEXP (PATTERN (insn), 0);
+
+      if (REG_P (src)
+	  && REG_P (dst)
+	  && (REGNO (src) % 2) == 0
+	  && (REGNO (dst) % 2) == 0)
+	length = 2;
+      break;
+
+    default:
+      break;
+    }
+
+  return length;
+}
+
+
+/* Function to check if 'bclr' instruction can be used with IVAL.  */
+int
+nds32_can_use_bclr_p (int ival)
+{
+  int one_bit_count;
+
+  /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit,
+     it means the original ival has only one 0-bit,
+     So it is ok to perform 'bclr' operation.  */
+
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival));
+
+  /* 'bclr' is a performance extension instruction.  */
+  return (TARGET_PERF_EXT && (one_bit_count == 1));
+}
+
+/* Function to check if 'bset' instruction can be used with IVAL.  */
+int
+nds32_can_use_bset_p (int ival)
+{
+  int one_bit_count;
+
+  /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
+     it is ok to perform 'bset' operation.  */
+
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
+
+  /* 'bset' is a performance extension instruction.  */
+  return (TARGET_PERF_EXT && (one_bit_count == 1));
+}
+
+/* Function to check if 'btgl' instruction can be used with IVAL.  */
+int
+nds32_can_use_btgl_p (int ival)
+{
+  int one_bit_count;
+
+  /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
+     it is ok to perform 'btgl' operation.  */
+
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
+
+  /* 'btgl' is a performance extension instruction.  */
+  return (TARGET_PERF_EXT && (one_bit_count == 1));
+}
+
+/* Function to check if 'bitci' instruction can be used with IVAL.  */
+int
+nds32_can_use_bitci_p (int ival)
+{
+  /* If we are using V3 ISA, we have 'bitci' instruction.
+     Try to see if we can present 'andi' semantic with
+     such 'bit-clear-immediate' operation.
+     For example, 'andi $r0,$r0,0xfffffffc' can be
+     presented with 'bitci $r0,$r0,3'.  */
+  return (TARGET_ISA_V3
+	  && (ival < 0)
+	  && satisfies_constraint_Iu15 (gen_int_mode (~ival, SImode)));
+}
+
+
+/* Return true if is load/store with SYMBOL_REF addressing mode
+   and memory mode is SImode.  */
+bool
+nds32_symbol_load_store_p (rtx insn)
+{
+  rtx mem_src = NULL_RTX;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      mem_src = SET_SRC (PATTERN (insn));
+      break;
+    case TYPE_STORE:
+      mem_src = SET_DEST (PATTERN (insn));
+      break;
+    default:
+      break;
+    }
+
+  /* Find load/store insn with addressing mode is SYMBOL_REF.  */
+  if (mem_src != NULL_RTX)
+    {
+      if ((GET_CODE (mem_src) == ZERO_EXTEND)
+	  || (GET_CODE (mem_src) == SIGN_EXTEND))
+	mem_src = XEXP (mem_src, 0);
+
+      if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF)
+	   || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM))
+	return true;
+    }
+
+  return false;
+}
+
+/* Function to determine whether it is worth to do fp_as_gp optimization.
+   Return 0: It is NOT worth to do fp_as_gp optimization.
+   Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
+   Note that if it is worth to do fp_as_gp optimization,
+   we MUST set FP_REGNUM ever live in this function.  */
+int
+nds32_fp_as_gp_check_available (void)
+{
+  /* If there exists ANY of following conditions,
+     we DO NOT perform fp_as_gp optimization:
+       1. TARGET_FORBID_FP_AS_GP is set
+          regardless of the TARGET_FORCE_FP_AS_GP.
+       2. User explicitly uses 'naked' attribute.
+       3. Not optimize for size.
+       4. Need frame pointer.
+       5. If $fp is already required to be saved,
+          it means $fp is already choosen by register allocator.
+          Thus we better not to use it for fp_as_gp optimization.
+       6. This function is a vararg function.
+          DO NOT apply fp_as_gp optimization on this function
+          because it may change and break stack frame.
+       7. The epilogue is empty.
+          This happens when the function uses exit()
+          or its attribute is no_return.
+          In that case, compiler will not expand epilogue
+          so that we have no chance to output .omit_fp_end directive.  */
+  if (TARGET_FORBID_FP_AS_GP
+      || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      || !optimize_size
+      || frame_pointer_needed
+      || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+      || (cfun->stdarg == 1)
+      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
+    return 0;
+
+  /* Now we can check the possibility of using fp_as_gp optimization.  */
+  if (TARGET_FORCE_FP_AS_GP)
+    {
+      /* User explicitly issues -mforce-fp-as-gp option.  */
+      df_set_regs_ever_live (FP_REGNUM, 1);
+      return 1;
+    }
+  else
+    {
+      /* In the following we are going to evaluate whether
+         it is worth to do fp_as_gp optimization.  */
+      int good_gain     = 0;
+      int symbol_count  = 0;
+
+      int threshold;
+      rtx insn;
+
+      /* We check if there already requires prologue.
+         Note that $gp will be saved in prologue for PIC code generation.
+         After that, we can set threshold by the existence of prologue.
+         Each fp-implied instruction will gain 2-byte code size
+         from gp-aware instruction, so we have following heuristics.  */
+      if (flag_pic
+	  || nds32_have_prologue_p ())
+	{
+	  /* Have-prologue:
+	       Compiler already intends to generate prologue content,
+	       so the fp_as_gp optimization will only insert
+	       'la $fp,_FP_BASE_' instruction, which will be
+	       converted into 4-byte instruction at link time.
+	       The threshold is "3" symbol accesses, 2 + 2 + 2 > 4.  */
+	  threshold = 3;
+	}
+      else
+	{
+	  /* None-prologue:
+	       Compiler originally does not generate prologue content,
+	       so the fp_as_gp optimization will NOT ONLY insert
+	       'la $fp,_FP_BASE' instruction, but also causes
+	       push/pop instructions.
+	       If we are using v3push (push25/pop25),
+	       the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
+	       If we are using normal push (smw/lmw),
+	       the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4.  */
+	  threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
+	}
+
+      /* We would like to traverse every instruction in this function.
+         So we need to have push_topmost_sequence()/pop_topmost_sequence()
+         surrounding our for-loop evaluation.  */
+      push_topmost_sequence ();
+      /* Counting the insn number which the addressing mode is symbol.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  if (single_set (insn) && nds32_symbol_load_store_p (insn))
+	    symbol_count++;
+
+	  if (symbol_count == threshold)
+	    {
+	      good_gain = 1;
+	      break;
+	    }
+	}
+      pop_topmost_sequence ();
+
+      /* Enable fp_as_gp optimization when potential gain is good enough.  */
+      if (good_gain)
+	{
+	  df_set_regs_ever_live (FP_REGNUM, 1);
+	  return 1;
+	}
+    }
+
+  /* By default we return 0.  */
+  return 0;
+}
+
+
+/* Function to generate PC relative jump table.
+   Refer to nds32.md for more details.
+
+   The following is the sample for the case that diff value
+   can be presented in '.short' size.
+
+     addi    $r1, $r1, -(case_lower_bound)
+     slti    $ta, $r1, (case_number)
+     beqz    $ta, .L_skip_label
+
+     la      $ta, .L35             ! get jump table address
+     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
+     addi    $ta, $r1, $ta
+     jr5     $ta
+
+     ! jump table entry
+   L35:
+     .short  .L25-.L35
+     .short  .L26-.L35
+     .short  .L27-.L35
+     .short  .L28-.L35
+     .short  .L29-.L35
+     .short  .L30-.L35
+     .short  .L31-.L35
+     .short  .L32-.L35
+     .short  .L33-.L35
+     .short  .L34-.L35 */
+const char *
+nds32_output_casesi_pc_relative (rtx *operands)
+{
+  enum machine_mode mode;
+  rtx diff_vec;
+
+  diff_vec = PATTERN (NEXT_INSN (operands[1]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  /* Step C: "t <-- operands[1]".  */
+  output_asm_insn ("la\t$ta, %l1", operands);
+
+  /* Get the mode of each element in the difference vector.  */
+  mode = GET_MODE (diff_vec);
+
+  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
+     where m is 0, 1, or 2 to load address-diff value from table.  */
+  switch (mode)
+    {
+    case QImode:
+      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
+      break;
+    case HImode:
+      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
+      break;
+    case SImode:
+      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Step E: "t <-- z + t".
+     Add table label_ref with address-diff value to
+     obtain target case address.  */
+  output_asm_insn ("add\t$ta, %2, $ta", operands);
+
+  /* Step F: jump to target with register t.  */
+  if (TARGET_16_BIT)
+    return "jr5\t$ta";
+  else
+    return "jr\t$ta";
+}
+
+/* Function to generate normal jump table.  */
+const char *
+nds32_output_casesi (rtx *operands)
+{
+  /* Step C: "t <-- operands[1]".  */
+  output_asm_insn ("la\t$ta, %l1", operands);
+
+  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
+  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+
+  /* No need to perform Step E, which is only used for
+     pc relative jump table.  */
+
+  /* Step F: jump to target with register z.  */
+  if (TARGET_16_BIT)
+    return "jr5\t%2";
+  else
+    return "jr\t%2";
+}
+
+
+/* Function to return memory format.  */
+enum nds32_16bit_address_type
+nds32_mem_format (rtx op)
+{
+  enum machine_mode mode_test;
+  int val;
+  int regno;
+
+  if (!TARGET_16_BIT)
+    return ADDRESS_NOT_16BIT_FORMAT;
+
+  mode_test = GET_MODE (op);
+
+  op = XEXP (op, 0);
+
+  /* 45 format.  */
+  if (GET_CODE (op) == REG && (mode_test == SImode))
+    return ADDRESS_REG;
+
+  /* 333 format for QI/HImode.  */
+  if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM))
+    return ADDRESS_LO_REG_IMM3U;
+
+  /* post_inc 333 format.  */
+  if ((GET_CODE (op) == POST_INC) && (mode_test == SImode))
+    {
+      regno = REGNO(XEXP (op, 0));
+
+      if (regno < 8)
+	return ADDRESS_POST_INC_LO_REG_IMM3U;
+    }
+
+  /* post_inc 333 format.  */
+  if ((GET_CODE (op) == POST_MODIFY)
+      && (mode_test == SImode)
+      && (REG_P (XEXP (XEXP (op, 1), 0)))
+      && (CONST_INT_P (XEXP (XEXP (op, 1), 1))))
+    {
+      regno = REGNO (XEXP (XEXP (op, 1), 0));
+      val = INTVAL (XEXP (XEXP (op, 1), 1));
+      if (regno < 8 && val < 32)
+	return ADDRESS_POST_INC_LO_REG_IMM3U;
+    }
+
+  if ((GET_CODE (op) == PLUS)
+      && (GET_CODE (XEXP (op, 0)) == REG)
+      && (GET_CODE (XEXP (op, 1)) == CONST_INT))
+    {
+      val = INTVAL (XEXP (op, 1));
+
+      regno = REGNO(XEXP (op, 0));
+
+      if (regno > 7
+	  && regno != SP_REGNUM
+	  && regno != FP_REGNUM)
+	return ADDRESS_NOT_16BIT_FORMAT;
+
+      switch (mode_test)
+	{
+	case QImode:
+	  /* 333 format.  */
+	  if (val >= 0 && val < 8 && regno < 8)
+	    return ADDRESS_LO_REG_IMM3U;
+	  break;
+
+	case HImode:
+	  /* 333 format.  */
+	  if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8)
+	    return ADDRESS_LO_REG_IMM3U;
+	  break;
+
+	case SImode:
+	case SFmode:
+	case DFmode:
+	  /* fp imply 37 format.  */
+	  if ((regno == FP_REGNUM) &&
+	      (val >= 0 && val < 512 && (val % 4 == 0)))
+	    return ADDRESS_FP_IMM7U;
+	  /* sp imply 37 format.  */
+	  else if ((regno == SP_REGNUM) &&
+		   (val >= 0 && val < 512 && (val % 4 == 0)))
+	    return ADDRESS_SP_IMM7U;
+	  /* 333 format.  */
+	  else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8)
+	    return ADDRESS_LO_REG_IMM3U;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  return ADDRESS_NOT_16BIT_FORMAT;
+}
+
+/* Output 16-bit store.  */
+const char *
+nds32_output_16bit_store (rtx *operands, int byte)
+{
+  char pattern[100];
+  char size;
+  rtx code = XEXP (operands[0], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (nds32_mem_format (operands[0]))
+    {
+    case ADDRESS_REG:
+      operands[0] = code;
+      output_asm_insn ("swi450\t%1, [%0]", operands);
+      break;
+    case ADDRESS_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size);
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_POST_INC_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "s%ci333.bi\t%%1, %%0", size);
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_FP_IMM7U:
+      output_asm_insn ("swi37\t%1, %0", operands);
+      break;
+    case ADDRESS_SP_IMM7U:
+      /* Get immediate value and set back to operands[1].  */
+      operands[0] = XEXP (code, 1);
+      output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands);
+      break;
+    default:
+      break;
+    }
+
+  return "";
+}
+
+/* Output 16-bit load.  */
+const char *
+nds32_output_16bit_load (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code = XEXP (operands[1], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (nds32_mem_format (operands[1]))
+    {
+    case ADDRESS_REG:
+      operands[1] = code;
+      output_asm_insn ("lwi450\t%0, [%1]", operands);
+      break;
+    case ADDRESS_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size);
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_POST_INC_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "l%ci333.bi\t%%0, %%1", size);
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_FP_IMM7U:
+      output_asm_insn ("lwi37\t%0, %1", operands);
+      break;
+    case ADDRESS_SP_IMM7U:
+      /* Get immediate value and set back to operands[0].  */
+      operands[1] = XEXP (code, 1);
+      output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands);
+      break;
+    default:
+      break;
+    }
+
+  return "";
+}
+
+/* Output 32-bit store.  */
+const char *
+nds32_output_32bit_store (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code = XEXP (operands[0], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (GET_CODE (code))
+    {
+    case REG:
+      /* (mem (reg X))
+	 => access location by using register,
+	 use "sbi / shi / swi" */
+      snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+	 (mem (const (...)))
+	 => access global variables,
+	 use "sbi.gp / shi.gp / swi.gp" */
+      operands[0] = XEXP (operands[0], 0);
+      snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size);
+      break;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+	 => access location by using register which will be post increment,
+	 use "sbi.bi / shi.bi / swi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"s%ci.bi\t%%1, %%0, %d", size, byte);
+      break;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+	 => access location by using register which will be post decrement,
+	 use "sbi.bi / shi.bi / swi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"s%ci.bi\t%%1, %%0, -%d", size, byte);
+      break;
+
+    case POST_MODIFY:
+      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+	     => access location by using register which will be
+	     post modified with reg,
+	     use "sb.bi/ sh.bi / sw.bi" */
+	  snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+	     => access location by using register which will be
+	     post modified with const_int,
+	     use "sbi.bi/ shi.bi / swi.bi" */
+	  snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (code, 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+	     => access location by adding two registers,
+	     use "sb / sh / sw" */
+	  snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (plus reg const_int))
+	     => access location by adding one register with const_int,
+	     use "sbi / shi / swi" */
+	  snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case LO_SUM:
+      operands[2] = XEXP (code, 1);
+      operands[0] = XEXP (code, 0);
+      snprintf (pattern, sizeof (pattern),
+		"s%ci\t%%1, [%%0 + lo12(%%2)]", size);
+      break;
+
+    default:
+      abort ();
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Output 32-bit load.  */
+const char *
+nds32_output_32bit_load (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code;
+
+  code = XEXP (operands[1], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (GET_CODE (code))
+    {
+    case REG:
+      /* (mem (reg X))
+	 => access location by using register,
+	 use "lbi / lhi / lwi" */
+      snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+	 (mem (const (...)))
+	 => access global variables,
+	 use "lbi.gp / lhi.gp / lwi.gp" */
+      operands[1] = XEXP (operands[1], 0);
+      snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size);
+      break;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+	 => access location by using register which will be post increment,
+	 use "lbi.bi / lhi.bi / lwi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%ci.bi\t%%0, %%1, %d", size, byte);
+      break;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+	 => access location by using register which will be post decrement,
+	 use "lbi.bi / lhi.bi / lwi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%ci.bi\t%%0, %%1, -%d", size, byte);
+      break;
+
+    case POST_MODIFY:
+      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+	     => access location by using register which will be
+	     post modified with reg,
+	     use "lb.bi/ lh.bi / lw.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+	     => access location by using register which will be
+	     post modified with const_int,
+	     use "lbi.bi/ lhi.bi / lwi.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (code, 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+	     use "lb / lh / lw" */
+	  snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (plus reg const_int))
+	     => access location by adding one register with const_int,
+	     use "lbi / lhi / lwi" */
+	  snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case LO_SUM:
+      operands[2] = XEXP (code, 1);
+      operands[1] = XEXP (code, 0);
+      snprintf (pattern, sizeof (pattern),
+		"l%ci\t%%0, [%%1 + lo12(%%2)]", size);
+      break;
+
+    default:
+      abort ();
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Output 32-bit load with signed extension.  */
+const char *
+nds32_output_32bit_load_s (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code;
+
+  code = XEXP (operands[1], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (GET_CODE (code))
+    {
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+         use "lbsi / lhsi" */
+      snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+         use "lbsi.gp / lhsi.gp" */
+      operands[1] = XEXP (operands[1], 0);
+      snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size);
+      break;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be post increment,
+         use "lbsi.bi / lhsi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%csi.bi\t%%0, %%1, %d", size, byte);
+      break;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be post decrement,
+         use "lbsi.bi / lhsi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%csi.bi\t%%0, %%1, -%d", size, byte);
+      break;
+
+    case POST_MODIFY:
+      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+	     => access location by using register which will be
+	     post modified with reg,
+	     use "lbs.bi/ lhs.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+	     => access location by using register which will be
+	     post modified with const_int,
+	     use "lbsi.bi/ lhsi.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (code, 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+	     use "lbs / lhs" */
+	  snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (plus reg const_int))
+	     => access location by adding one register with const_int,
+	     use "lbsi / lhsi" */
+	  snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case LO_SUM:
+      operands[2] = XEXP (code, 1);
+      operands[1] = XEXP (code, 0);
+      snprintf (pattern, sizeof (pattern),
+		"l%csi\t%%0, [%%1 + lo12(%%2)]", size);
+      break;
+
+    default:
+      abort ();
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Function to output stack push operation.
+   We need to deal with normal stack push multiple or stack v3push.  */
+const char *
+nds32_output_stack_push (void)
+{
+  /* A string pattern for output_asm_insn().  */
+  char pattern[100];
+  /* The operands array which will be used in output_asm_insn().  */
+  rtx operands[3];
+  /* Pick up callee-saved first regno and last regno for further use.  */
+  int rb_regno = cfun->machine->callee_saved_regs_first_regno;
+  int re_regno = cfun->machine->callee_saved_regs_last_regno;
+
+  if (TARGET_V3PUSH)
+    {
+      /* For stack v3push:
+           operands[0]: Re
+           operands[1]: imm8u */
+
+      /* This variable is to check if 'push25 Re,imm8u' is available.  */
+      int sp_adjust;
+
+      /* Set operands[0].  */
+      operands[0] = gen_rtx_REG (SImode, re_regno);
+
+      /* Check if we can generate 'push25 Re,imm8u',
+         otherwise, generate 'push25 Re,0'.  */
+      sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_padding_bytes;
+      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
+	operands[1] = GEN_INT (sp_adjust);
+      else
+	operands[1] = GEN_INT (0);
+
+      /* Create assembly code pattern.  */
+      snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1");
+    }
+  else
+    {
+      /* For normal stack push multiple:
+         operands[0]: Rb
+         operands[1]: Re
+         operands[2]: En4 */
+
+      /* This variable is used to check if we only need to generate En4 field.
+         As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
+      int push_en4_only_p = 0;
+
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_regno);
+      operands[1] = gen_rtx_REG (SImode, re_regno);
+
+      /* 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+      if (!cfun->machine->fp_size
+	  && !cfun->machine->gp_size
+	  && !cfun->machine->lp_size
+	  && REGNO (operands[0]) == SP_REGNUM
+	  && REGNO (operands[1]) == SP_REGNUM)
+	{
+	  /* No need to generate instruction.  */
+	  return "";
+	}
+      else
+	{
+	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
+	  if (REGNO (operands[0]) == SP_REGNUM
+	      && REGNO (operands[1]) == SP_REGNUM)
+	    push_en4_only_p = 1;
+
+	  /* Create assembly code pattern.
+	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
+	  snprintf (pattern, sizeof (pattern),
+		    "push.s\t%s{%s%s%s }",
+		    push_en4_only_p ? "" : "%0, %1, ",
+		    cfun->machine->fp_size ? " $fp" : "",
+		    cfun->machine->gp_size ? " $gp" : "",
+		    cfun->machine->lp_size ? " $lp" : "");
+	}
+    }
+
+  /* We use output_asm_insn() to output assembly code by ourself.  */
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Function to output stack pop operation.
+   We need to deal with normal stack pop multiple or stack v3pop.  */
+const char *
+nds32_output_stack_pop (void)
+{
+  /* A string pattern for output_asm_insn().  */
+  char pattern[100];
+  /* The operands array which will be used in output_asm_insn().  */
+  rtx operands[3];
+  /* Pick up callee-saved first regno and last regno for further use.  */
+  int rb_regno = cfun->machine->callee_saved_regs_first_regno;
+  int re_regno = cfun->machine->callee_saved_regs_last_regno;
+
+  if (TARGET_V3PUSH)
+    {
+      /* For stack v3pop:
+           operands[0]: Re
+           operands[1]: imm8u */
+
+      /* This variable is to check if 'pop25 Re,imm8u' is available.  */
+      int sp_adjust;
+
+      /* Set operands[0].  */
+      operands[0] = gen_rtx_REG (SImode, re_regno);
+
+      /* Check if we can generate 'pop25 Re,imm8u',
+         otherwise, generate 'pop25 Re,0'.
+         We have to consider alloca issue as well.
+         If the function does call alloca(), the stack pointer is not fixed.
+         In that case, we cannot use 'pop25 Re,imm8u' directly.
+         We have to caculate stack pointer from frame pointer
+         and then use 'pop25 Re,0'.  */
+      sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_padding_bytes;
+      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+	  && !cfun->calls_alloca)
+	operands[1] = GEN_INT (sp_adjust);
+      else
+	operands[1] = GEN_INT (0);
+
+      /* Create assembly code pattern.  */
+      snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
+    }
+  else
+    {
+      /* For normal stack pop multiple:
+         operands[0]: Rb
+         operands[1]: Re
+         operands[2]: En4 */
+
+      /* This variable is used to check if we only need to generate En4 field.
+         As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
+      int pop_en4_only_p = 0;
+
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_regno);
+      operands[1] = gen_rtx_REG (SImode, re_regno);
+
+      /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+      if (!cfun->machine->fp_size
+	  && !cfun->machine->gp_size
+	  && !cfun->machine->lp_size
+	  && REGNO (operands[0]) == SP_REGNUM
+	  && REGNO (operands[1]) == SP_REGNUM)
+	{
+	  /* No need to generate instruction.  */
+	  return "";
+	}
+      else
+	{
+	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
+	  if (REGNO (operands[0]) == SP_REGNUM
+	      && REGNO (operands[1]) == SP_REGNUM)
+	    pop_en4_only_p = 1;
+
+	  /* Create assembly code pattern.
+	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
+	  snprintf (pattern, sizeof (pattern),
+		    "pop.s\t%s{%s%s%s }",
+		    pop_en4_only_p ? "" : "%0, %1, ",
+		    cfun->machine->fp_size ? " $fp" : "",
+		    cfun->machine->gp_size ? " $gp" : "",
+		    cfun->machine->lp_size ? " $lp" : "");
+	}
+    }
+
+  /* We use output_asm_insn() to output assembly code by ourself.  */
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte.  */
+int
+nds32_target_alignment (rtx label)
+{
+  rtx insn;
+
+  if (optimize_size)
+    return 0;
+
+  insn = next_active_insn (label);
+
+  if (insn == 0)
+    return 0;
+  else if ((get_attr_length (insn) % 4) == 0)
+    return 2;
+  else
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 5: Initialize target hook structure and definitions.  */
+
+/* Controlling the Compilation Driver.  */
+
+
+/* Run-time Target Specification.  */
+
+
+/* Defining Data Structures for Per-function Information.  */
+
+
+/* Storage Layout.  */
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE \
+  default_promote_function_mode_always_promote
+
+
+/* Layout of Source Language Data Types.  */
+
+
+/* Register Usage.  */
+
+/* -- Basic Characteristics of Registers.  */
+
+/* -- Order of Allocation of Registers.  */
+
+/* -- How Values Fit in Registers.  */
+
+/* -- Handling Leaf Functions.  */
+
+/* -- Registers That Form a Stack.  */
+
+
+/* Register Classes.  */
+
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS nds32_class_max_nregs
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_true
+
+#undef TARGET_REGISTER_PRIORITY
+#define TARGET_REGISTER_PRIORITY nds32_register_priority
+
+
+/* Obsolete Macros for Defining Constraints.  */
+
+
+/* Stack Layout and Calling Conventions.  */
+
+/* -- Basic Stack Layout.  */
+
+/* -- Exception Handling Support.  */
+
+/* -- Specifying How Stack Checking is Done.  */
+
+/* -- Registers That Address the Stack Frame.  */
+
+/* -- Eliminating Frame Pointer and Arg Pointer.  */
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE nds32_can_eliminate
+
+/* -- Passing Function Arguments on the Stack.  */
+
+/* -- Passing Arguments in Registers.  */
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG nds32_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE nds32_function_arg_advance
+
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary
+
+/* -- How Scalar Function Values Are Returned.  */
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE nds32_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE nds32_libcall_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P nds32_function_value_regno_p
+
+/* -- How Large Values Are Returned.  */
+
+/* -- Caller-Saves Register Allocation.  */
+
+/* -- Function Entry and Exit.  */
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE nds32_asm_function_prologue
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE nds32_asm_function_end_prologue
+
+#undef  TARGET_ASM_FUNCTION_BEGIN_EPILOGUE
+#define TARGET_ASM_FUNCTION_BEGIN_EPILOGUE nds32_asm_function_begin_epilogue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE nds32_asm_function_epilogue
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK nds32_asm_output_mi_thunk
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+/* -- Generating Code for Profiling.  */
+
+/* -- Permitting tail calls.  */
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN nds32_warn_func_return
+
+/* Stack smashing protection.  */
+
+
+/* Implementing the Varargs Macros.  */
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING nds32_strict_argument_naming
+
+
+/* Trampolines for Nested Functions.  */
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE nds32_asm_trampoline_template
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT nds32_trampoline_init
+
+
+/* Implicit Calls to Library Routines.  */
+
+
+/* Addressing Modes.  */
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p
+
+
+/* Anchored Addresses.  */
+
+
+/* Condition Code Status.  */
+
+/* -- Representation of condition codes using (cc0).  */
+
+/* -- Representation of condition codes using registers.  */
+
+/* -- Macros to control conditional execution.  */
+
+
+/* Describing Relative Costs of Operations.  */
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST nds32_register_move_cost
+
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST nds32_memory_move_cost
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS nds32_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST nds32_address_cost
+
+
+/* Adjusting the Instruction Scheduler.  */
+
+
+/* Dividing the Output into Sections (Texts, Data, . . . ).  */
+
+
+/* Position Independent Code.  */
+
+
+/* Defining the Output Assembler Language.  */
+
+/* -- The Overall Framework of an Assembler File.  */
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START nds32_asm_file_start
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END nds32_asm_file_end
+
+/* -- Output of Data.  */
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+/* -- Output of Uninitialized Variables.  */
+
+/* -- Output and Generation of Labels.  */
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL nds32_asm_globalize_label
+
+/* -- How Initialization Functions Are Handled.  */
+
+/* -- Macros Controlling Initialization Routines.  */
+
+/* -- Output of Assembler Instructions.  */
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND nds32_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS nds32_print_operand_address
+
+/* -- Output of Dispatch Tables.  */
+
+/* -- Assembler Commands for Exception Regions.  */
+
+/* -- Assembler Commands for Alignment.  */
+
+
+/* Controlling Debugging Information Format.  */
+
+/* -- Macros Affecting All Debugging Formats.  */
+
+/* -- Specific Options for DBX Output.  */
+
+/* -- Open-Ended Hooks for DBX Format.  */
+
+/* -- File Names in DBX Format.  */
+
+/* -- Macros for SDB and DWARF Output.  */
+
+/* -- Macros for VMS Debug Format.  */
+
+
+/* Cross Compilation and Floating Point.  */
+
+
+/* Mode Switching Instructions.  */
+
+
+/* Defining target-specific uses of __attribute__.  */
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE nds32_attribute_table
+
+#undef TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES nds32_merge_decl_attributes
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES nds32_insert_attributes
+
+#undef TARGET_OPTION_PRAGMA_PARSE
+#define TARGET_OPTION_PRAGMA_PARSE nds32_option_pragma_parse
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE nds32_option_override
+
+
+/* Emulating TLS.  */
+
+
+/* Defining coprocessor specifics for MIPS targets.  */
+
+
+/* Parameters for Precompiled Header Validity Checking.  */
+
+
+/* C++ ABI parameters.  */
+
+
+/* Adding support for named address spaces.  */
+
+
+/* Miscellaneous Parameters.  */
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS nds32_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN nds32_expand_builtin
+
+
+/* ------------------------------------------------------------------------ */
+
+/* Initialize the GCC target structure.  */
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git a/gcc-4.9/gcc/config/nds32/nds32.h b/gcc-4.9/gcc/config/nds32/nds32.h
new file mode 100644
index 000000000..38847e569
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32.h
@@ -0,0 +1,993 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+/* The following are auxiliary macros or structure declarations
+   that are used all over the nds32.c and nds32.h.  */
+
+
+/* Computing the Length of an Insn.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  (LENGTH = nds32_adjust_insn_length (INSN, LENGTH))
+
+/* Check instruction LS-37-FP-implied form.
+   Note: actually its immediate range is imm9u
+         since it is used for lwi37/swi37 instructions.  */
+#define NDS32_LS_37_FP_P(rt, ra, imm)       \
+  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
+   && REGNO (ra) == FP_REGNUM               \
+   && satisfies_constraint_Iu09 (imm))
+
+/* Check instruction LS-37-SP-implied form.
+   Note: actually its immediate range is imm9u
+         since it is used for lwi37/swi37 instructions.  */
+#define NDS32_LS_37_SP_P(rt, ra, imm)       \
+  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
+   && REGNO (ra) == SP_REGNUM               \
+   && satisfies_constraint_Iu09 (imm))
+
+
+/* Check load/store instruction form : Rt3, Ra3, imm3u.  */
+#define NDS32_LS_333_P(rt, ra, imm, mode) nds32_ls_333_p (rt, ra, imm, mode)
+
+/* Check load/store instruction form : Rt4, Ra5, const_int_0.
+   Note: no need to check ra because Ra5 means it covers all registers.  */
+#define NDS32_LS_450_P(rt, ra, imm)                     \
+  ((imm == const0_rtx)                                  \
+   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS         \
+       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS))
+
+/* Check instruction RRI-333-form.  */
+#define NDS32_RRI_333_P(rt, ra, imm)           \
+  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS    \
+   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \
+   && satisfies_constraint_Iu03 (imm))
+
+/* Check instruction RI-45-form.  */
+#define NDS32_RI_45_P(rt, ra, imm)                     \
+  (REGNO (rt) == REGNO (ra)                            \
+   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS        \
+       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS) \
+   && satisfies_constraint_Iu05 (imm))
+
+
+/* Check instruction RR-33-form.  */
+#define NDS32_RR_33_P(rt, ra)                   \
+  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS     \
+   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS)
+
+/* Check instruction RRR-333-form.  */
+#define NDS32_RRR_333_P(rt, ra, rb)             \
+  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS     \
+   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS  \
+   && REGNO_REG_CLASS (REGNO (rb)) == LOW_REGS)
+
+/* Check instruction RR-45-form.
+   Note: no need to check rb because Rb5 means it covers all registers.  */
+#define NDS32_RR_45_P(rt, ra, rb)               \
+  (REGNO (rt) == REGNO (ra)                     \
+   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
+       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS))
+
+/* Classifies address type to distinguish 16-bit/32-bit format.  */
+enum nds32_16bit_address_type
+{
+  /* [reg]: 45 format address.  */
+  ADDRESS_REG,
+  /* [lo_reg + imm3u]: 333 format address.  */
+  ADDRESS_LO_REG_IMM3U,
+  /* post_inc [lo_reg + imm3u]: 333 format address.  */
+  ADDRESS_POST_INC_LO_REG_IMM3U,
+  /* [$fp + imm7u]: fp imply address.  */
+  ADDRESS_FP_IMM7U,
+  /* [$sp + imm7u]: sp imply address.  */
+  ADDRESS_SP_IMM7U,
+  /* Other address format.  */
+  ADDRESS_NOT_16BIT_FORMAT
+};
+
+
+/* ------------------------------------------------------------------------ */
+
+/* Define maximum numbers of registers for passing arguments.  */
+#define NDS32_MAX_REGS_FOR_ARGS 6
+
+/* Define the register number for first argument.  */
+#define NDS32_GPR_ARG_FIRST_REGNUM 0
+
+/* Define the register number for return value.  */
+#define NDS32_GPR_RET_FIRST_REGNUM 0
+
+
+/* Define double word alignment bits.  */
+#define NDS32_DOUBLE_WORD_ALIGNMENT 64
+
+/* Define alignment checking macros for convenience.  */
+#define NDS32_HALF_WORD_ALIGN_P(value)   (((value) & 0x01) == 0)
+#define NDS32_SINGLE_WORD_ALIGN_P(value) (((value) & 0x03) == 0)
+#define NDS32_DOUBLE_WORD_ALIGN_P(value) (((value) & 0x07) == 0)
+
+/* Get alignment according to mode or type information.
+   When 'type' is nonnull, there is no need to look at 'mode'.  */
+#define NDS32_MODE_TYPE_ALIGN(mode, type) \
+  (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
+
+/* Round X up to the nearest double word.  */
+#define NDS32_ROUND_UP_DOUBLE_WORD(value)  (((value) + 7) & ~7)
+
+
+/* This macro is used to calculate the numbers of registers for
+   containing 'size' bytes of the argument.
+   The size of a register is a word in nds32 target.
+   So we use UNITS_PER_WORD to do the calculation.  */
+#define NDS32_NEED_N_REGS_FOR_ARG(mode, type)                            \
+  ((mode == BLKmode)                                                     \
+   ? ((int_size_in_bytes (type) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)  \
+   : ((GET_MODE_SIZE (mode)     + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* This macro is used to return the register number for passing argument.
+   We need to obey the following rules:
+     1. If it is required MORE THAN one register,
+        we need to further check if it really needs to be
+        aligned on double words.
+          a) If double word alignment is necessary,
+             the register number must be even value.
+          b) Otherwise, the register number can be odd or even value.
+     2. If it is required ONLY one register,
+        the register number can be odd or even value.  */
+#define NDS32_AVAILABLE_REGNUM_FOR_ARG(reg_offset, mode, type)  \
+  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                 \
+   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)      \
+      ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1)  \
+      : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))            \
+   : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))
+
+/* This macro is to check if there are still available registers
+   for passing argument.  */
+#define NDS32_ARG_PASS_IN_REG_P(reg_offset, mode, type)      \
+  (((reg_offset) < NDS32_MAX_REGS_FOR_ARGS)                  \
+   && ((reg_offset) + NDS32_NEED_N_REGS_FOR_ARG (mode, type) \
+       <= NDS32_MAX_REGS_FOR_ARGS))
+
+/* This macro is to check if the register is required to be saved on stack.
+   If call_used_regs[regno] == 0, regno is the callee-saved register.
+   If df_regs_ever_live_p(regno) == true, it is used in the current function.
+   As long as the register satisfies both criteria above,
+   it is required to be saved.  */
+#define NDS32_REQUIRED_CALLEE_SAVED_P(regno)                  \
+  ((!call_used_regs[regno]) && (df_regs_ever_live_p (regno)))
+
+/* ------------------------------------------------------------------------ */
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct GTY(()) machine_function
+{
+  /* Number of bytes allocated on the stack for variadic args
+     if we want to push them into stack as pretend arguments by ourself.  */
+  int va_args_size;
+  /* Number of bytes reserved on the stack for
+     local and temporary variables.  */
+  int local_size;
+  /* Number of bytes allocated on the stack for outgoing arguments.  */
+  int out_args_size;
+
+  /* Number of bytes on the stack for saving $fp.  */
+  int fp_size;
+  /* Number of bytes on the stack for saving $gp.  */
+  int gp_size;
+  /* Number of bytes on the stack for saving $lp.  */
+  int lp_size;
+
+  /* Number of bytes on the stack for saving callee-saved registers.  */
+  int callee_saved_regs_size;
+  /* The padding bytes in callee-saved area may be required.  */
+  int callee_saved_area_padding_bytes;
+
+  /* The first required register that should be saved on stack
+     for va_args (one named argument + nameless arguments).  */
+  int va_args_first_regno;
+  /* The last required register that should be saved on stack
+     for va_args (one named argument + nameless arguments).  */
+  int va_args_last_regno;
+
+  /* The first required callee-saved register.  */
+  int callee_saved_regs_first_regno;
+  /* The last required callee-saved register.  */
+  int callee_saved_regs_last_regno;
+
+  /* Indicate that whether this function needs
+     prologue/epilogue code generation.  */
+  int naked_p;
+  /* Indicate that whether this function
+     uses fp_as_gp optimization.  */
+  int fp_as_gp_p;
+};
+
+/* A C structure that contains the arguments information.  */
+typedef struct
+{
+  unsigned int reg_offset;
+} nds32_cumulative_args;
+
+/* ------------------------------------------------------------------------ */
+
+/* The following we define C-ISR related stuff.
+   In nds32 architecture, we have 73 vectors for interrupt/exception.
+   For each vector (except for vector 0, which is used for reset behavior),
+   we allow users to set its register saving scheme and interrupt level.  */
+
+/* There are 73 vectors in nds32 architecture.
+   0 for reset handler,
+   1-8 for exception handler,
+   and 9-72 for interrupt handler.
+   We use an array, which is defined in nds32.c, to record
+   essential information for each vector.  */
+#define NDS32_N_ISR_VECTORS 73
+
+/* Define possible isr category.  */
+enum nds32_isr_category
+{
+  NDS32_ISR_NONE,
+  NDS32_ISR_INTERRUPT,
+  NDS32_ISR_EXCEPTION,
+  NDS32_ISR_RESET
+};
+
+/* Define isr register saving scheme.  */
+enum nds32_isr_save_reg
+{
+  NDS32_SAVE_ALL,
+  NDS32_PARTIAL_SAVE
+};
+
+/* Define isr nested type.  */
+enum nds32_isr_nested_type
+{
+  NDS32_NESTED,
+  NDS32_NOT_NESTED,
+  NDS32_NESTED_READY
+};
+
+/* Define structure to record isr information.
+   The isr vector array 'isr_vectors[]' with this structure
+   is defined in nds32.c.  */
+struct nds32_isr_info
+{
+  /* The field to identify isr category.
+     It should be set to NDS32_ISR_NONE by default.
+     If user specifies a function as isr by using attribute,
+     this field will be set accordingly.  */
+  enum nds32_isr_category category;
+
+  /* A string for the applied function name.
+     It should be set to empty string by default.  */
+  char func_name[100];
+
+  /* The register saving scheme.
+     It should be set to NDS32_PARTIAL_SAVE by default
+     unless user specifies attribute to change it.  */
+  enum nds32_isr_save_reg save_reg;
+
+  /* The nested type.
+     It should be set to NDS32_NOT_NESTED by default
+     unless user specifies attribute to change it.  */
+  enum nds32_isr_nested_type nested_type;
+
+  /* Total vectors.
+     The total vectors = interrupt + exception numbers + reset.
+     It should be set to 0 by default.
+     This field is ONLY used in NDS32_ISR_RESET category.  */
+  unsigned int total_n_vectors;
+
+  /* A string for nmi handler name.
+     It should be set to empty string by default.
+     This field is ONLY used in NDS32_ISR_RESET category.  */
+  char nmi_name[100];
+
+  /* A string for warm handler name.
+     It should be set to empty string by default.
+     This field is ONLY used in NDS32_ISR_RESET category.  */
+  char warm_name[100];
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* Define code for all nds32 builtins.  */
+enum nds32_builtins
+{
+  NDS32_BUILTIN_ISYNC,
+  NDS32_BUILTIN_ISB,
+  NDS32_BUILTIN_MFSR,
+  NDS32_BUILTIN_MFUSR,
+  NDS32_BUILTIN_MTSR,
+  NDS32_BUILTIN_MTUSR,
+  NDS32_BUILTIN_SETGIE_EN,
+  NDS32_BUILTIN_SETGIE_DIS
+};
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_ISA_V2   (nds32_arch_option == ARCH_V2)
+#define TARGET_ISA_V3   (nds32_arch_option == ARCH_V3)
+#define TARGET_ISA_V3M  (nds32_arch_option == ARCH_V3M)
+
+/* ------------------------------------------------------------------------ */
+
+/* Controlling the Compilation Driver.  */
+
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:-march=%(VALUE)}" }
+
+#define CC1_SPEC \
+  ""
+
+#define ASM_SPEC \
+  " %{mbig-endian:-EB} %{mlittle-endian:-EL}"
+
+/* If user issues -mrelax, -mforce-fp-as-gp, or -mex9,
+   we need to pass '--relax' to linker.
+   Besides, for -mex9, we need to further pass '--mex9'.  */
+#define LINK_SPEC \
+  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+  " %{mrelax|mforce-fp-as-gp|mex9:--relax}" \
+  " %{mex9:--mex9}"
+
+#define LIB_SPEC \
+  " -lc -lgloss"
+
+/* The option -mno-ctor-dtor can disable constructor/destructor feature
+   by applying different crt stuff.  In the convention, crt0.o is the
+   startup file without constructor/destructor;
+   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+   startup files with constructor/destructor.
+   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+   currently provided by GCC for nds32 target.
+
+   For nds32 target so far:
+   If -mno-ctor-dtor, we are going to link
+   "crt0.o [user objects]".
+   If general cases, we are going to link
+   "crt1.o crtbegin1.o [user objects] crtend1.o".  */
+#define STARTFILE_SPEC \
+  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+  " %{!mno-ctor-dtor:crtbegin1.o%s}"
+#define ENDFILE_SPEC \
+  " %{!mno-ctor-dtor:crtend1.o%s}"
+
+/* The TARGET_BIG_ENDIAN_DEFAULT is defined if we configure gcc
+   with --target=nds32be-* setting.
+   Check gcc/config.gcc for more information.
+   In addition, currently we only have elf toolchain,
+   where mgp-direct is always the default.  */
+#ifdef TARGET_BIG_ENDIAN_DEFAULT
+#define MULTILIB_DEFAULTS { "mbig-endian", "mgp-direct" }
+#else
+#define MULTILIB_DEFAULTS { "mlittle-endian", "mgp-direct" }
+#endif
+
+
+/* Run-time Target Specification.  */
+
+#define TARGET_CPU_CPP_BUILTINS()                     \
+  do                                                  \
+    {                                                 \
+      builtin_define ("__nds32__");                   \
+                                                      \
+      if (TARGET_ISA_V2)                              \
+        builtin_define ("__NDS32_ISA_V2__");          \
+      if (TARGET_ISA_V3)                              \
+        builtin_define ("__NDS32_ISA_V3__");          \
+      if (TARGET_ISA_V3M)                             \
+        builtin_define ("__NDS32_ISA_V3M__");         \
+                                                      \
+      if (TARGET_BIG_ENDIAN)                          \
+        builtin_define ("__big_endian__");            \
+      if (TARGET_REDUCED_REGS)                        \
+        builtin_define ("__NDS32_REDUCED_REGS__");    \
+      if (TARGET_CMOV)                                \
+        builtin_define ("__NDS32_CMOV__");            \
+      if (TARGET_PERF_EXT)                            \
+        builtin_define ("__NDS32_PERF_EXT__");        \
+      if (TARGET_16_BIT)                              \
+        builtin_define ("__NDS32_16_BIT__");          \
+      if (TARGET_GP_DIRECT)                           \
+        builtin_define ("__NDS32_GP_DIRECT__");       \
+                                                      \
+      builtin_assert ("cpu=nds32");                   \
+      builtin_assert ("machine=nds32");               \
+    } while (0)
+
+
+/* Defining Data Structures for Per-function Information.  */
+
+/* This macro is called once per function,
+   before generation of any RTL has begun.  */
+#define INIT_EXPANDERS  nds32_init_expanders ()
+
+
+/* Storage Layout.  */
+
+#define BITS_BIG_ENDIAN 0
+
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+#define UNITS_PER_WORD 4
+
+#define PROMOTE_MODE(m, unsignedp, type)                                    \
+  if (GET_MODE_CLASS (m) == MODE_INT && GET_MODE_SIZE (m) < UNITS_PER_WORD) \
+    {                                                                       \
+      (m) = SImode;                                                         \
+    }
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 64
+
+#define FUNCTION_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 64
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE           32
+#define SHORT_TYPE_SIZE         16
+#define LONG_TYPE_SIZE          32
+#define LONG_LONG_TYPE_SIZE     64
+
+#define FLOAT_TYPE_SIZE         32
+#define DOUBLE_TYPE_SIZE        64
+#define LONG_DOUBLE_TYPE_SIZE   64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+
+/* Register Usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+#define FIRST_PSEUDO_REGISTER 34
+
+/* An initializer that says which registers are used for fixed
+   purposes all throughout the compiled code and are therefore
+   not available for general allocation.
+
+   $r28 : $fp
+   $r29 : $gp
+   $r30 : $lp
+   $r31 : $sp
+
+   caller-save registers: $r0 ~ $r5, $r16 ~ $r23
+   callee-save registers: $r6 ~ $r10, $r11 ~ $r14
+
+   reserved for assembler : $r15
+   reserved for other use : $r24, $r25, $r26, $r27 */
+#define FIXED_REGISTERS                 \
+{ /* r0  r1  r2  r3  r4  r5  r6  r7  */ \
+      0,  0,  0,  0,  0,  0,  0,  0,    \
+  /* r8  r9  r10 r11 r12 r13 r14 r15 */ \
+      0,  0,  0,  0,  0,  0,  0,  1,    \
+  /* r16 r17 r18 r19 r20 r21 r22 r23 */ \
+      0,  0,  0,  0,  0,  0,  0,  0,    \
+  /* r24 r25 r26 r27 r28 r29 r30 r31 */ \
+      1,  1,  1,  1,  0,  1,  0,  1,    \
+  /* ARG_POINTER:32 */                  \
+      1,                                \
+  /* FRAME_POINTER:33 */                \
+      1                                 \
+}
+
+/* Identifies the registers that are not available for
+   general allocation of values that must live across
+   function calls -- so they are caller-save registers.
+
+   0 : callee-save registers
+   1 : caller-save registers */
+#define CALL_USED_REGISTERS             \
+{ /* r0  r1  r2  r3  r4  r5  r6  r7  */ \
+      1,  1,  1,  1,  1,  1,  0,  0,    \
+  /* r8  r9  r10 r11 r12 r13 r14 r15 */ \
+      0,  0,  0,  0,  0,  0,  0,  1,    \
+  /* r16 r17 r18 r19 r20 r21 r22 r23 */ \
+      1,  1,  1,  1,  1,  1,  1,  1,    \
+  /* r24 r25 r26 r27 r28 r29 r30 r31 */ \
+      1,  1,  1,  1,  0,  1,  0,  1,    \
+  /* ARG_POINTER:32 */                  \
+      1,                                \
+  /* FRAME_POINTER:33 */                \
+      1                                 \
+}
+
+/* In nds32 target, we have three levels of registers:
+     LOW_COST_REGS    : $r0 ~ $r7
+     MIDDLE_COST_REGS : $r8 ~ $r11, $r16 ~ $r19
+     HIGH_COST_REGS   : $r12 ~ $r14, $r20 ~ $r31 */
+#define REG_ALLOC_ORDER           \
+{                                 \
+   0,  1,  2,  3,  4,  5,  6,  7, \
+   8,  9, 10, 11, 16, 17, 18, 19, \
+  12, 13, 14, 15, 20, 21, 22, 23, \
+  24, 25, 26, 27, 28, 29, 30, 31, \
+  32,                             \
+  33                              \
+}
+
+/* Tell IRA to use the order we define rather than messing it up with its
+   own cost calculations.  */
+#define HONOR_REG_ALLOC_ORDER
+
+/* The number of consecutive hard regs needed starting at
+   reg "regno" for holding a value of mode "mode".  */
+#define HARD_REGNO_NREGS(regno, mode) nds32_hard_regno_nregs (regno, mode)
+
+/* Value is 1 if hard register "regno" can hold a value
+   of machine-mode "mode".  */
+#define HARD_REGNO_MODE_OK(regno, mode) nds32_hard_regno_mode_ok (regno, mode)
+
+/* A C expression that is nonzero if a value of mode1
+   is accessible in mode2 without copying.
+   Define this macro to return nonzero in as many cases as possible
+   since doing so will allow GCC to perform better register allocation.
+   We can use general registers to tie QI/HI/SI modes together.  */
+#define MODES_TIEABLE_P(mode1, mode2)          \
+  (GET_MODE_CLASS (mode1) == MODE_INT          \
+   && GET_MODE_CLASS (mode2) == MODE_INT       \
+   && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD  \
+   && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD)
+
+
+/* Register Classes.  */
+
+/* In nds32 target, we have three levels of registers:
+     Low cost regsiters    : $r0 ~ $r7
+     Middle cost registers : $r8 ~ $r11, $r16 ~ $r19
+     High cost registers   : $r12 ~ $r14, $r20 ~ $r31
+
+   In practice, we have MIDDLE_REGS cover LOW_REGS register class contents
+   so that it provides more chance to use low cost registers.  */
+enum reg_class
+{
+  NO_REGS,
+  R15_TA_REG,
+  STACK_REG,
+  LOW_REGS,
+  MIDDLE_REGS,
+  HIGH_REGS,
+  GENERAL_REGS,
+  FRAME_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES \
+{                       \
+  "NO_REGS",            \
+  "R15_TA_REG",         \
+  "STACK_REG",          \
+  "LOW_REGS",           \
+  "MIDDLE_REGS",        \
+  "HIGH_REGS",          \
+  "GENERAL_REGS",       \
+  "FRAME_REGS",         \
+  "ALL_REGS"            \
+}
+
+#define REG_CLASS_CONTENTS \
+{                                                            \
+  {0x00000000, 0x00000000}, /* NO_REGS     :              */ \
+  {0x00008000, 0x00000000}, /* R15_TA_REG  : 15           */ \
+  {0x80000000, 0x00000000}, /* STACK_REG   : 31           */ \
+  {0x000000ff, 0x00000000}, /* LOW_REGS    : 0-7          */ \
+  {0x000f0fff, 0x00000000}, /* MIDDLE_REGS : 0-11, 16-19  */ \
+  {0xfff07000, 0x00000000}, /* HIGH_REGS   : 12-14, 20-31 */ \
+  {0xffffffff, 0x00000000}, /* GENERAL_REGS: 0-31         */ \
+  {0x00000000, 0x00000003}, /* FRAME_REGS  : 32, 33       */ \
+  {0xffffffff, 0x00000003}  /* ALL_REGS    : 0-31, 32, 33 */ \
+}
+
+#define REGNO_REG_CLASS(regno) nds32_regno_reg_class (regno)
+
+#define BASE_REG_CLASS GENERAL_REGS
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* Return nonzero if it is suitable for use as a
+   base register in operand addresses.
+   So far, we return nonzero only if "num" is a hard reg
+   of the suitable class or a pseudo register which is
+   allocated to a suitable hard reg.  */
+#define REGNO_OK_FOR_BASE_P(num) \
+  ((num) < 32 || (unsigned) reg_renumber[num] < 32)
+
+/* Return nonzero if it is suitable for use as a
+   index register in operand addresses.
+   So far, we return nonzero only if "num" is a hard reg
+   of the suitable class or a pseudo register which is
+   allocated to a suitable hard reg.
+   The difference between an index register and a base register is that
+   the index register may be scaled.  */
+#define REGNO_OK_FOR_INDEX_P(num) \
+  ((num) < 32 || (unsigned) reg_renumber[num] < 32)
+
+
+/* Obsolete Macros for Defining Constraints.  */
+
+
+/* Stack Layout and Calling Conventions.  */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET 0
+
+#define STACK_POINTER_OFFSET 0
+
+#define FIRST_PARM_OFFSET(fundecl) 0
+
+#define RETURN_ADDR_RTX(count, frameaddr) \
+  nds32_return_addr_rtx (count, frameaddr)
+
+/* A C expression whose value is RTL representing the location
+   of the incoming return address at the beginning of any function
+   before the prologue.
+   If this RTL is REG, you should also define
+   DWARF_FRAME_RETURN_COLUMN to DWARF_FRAME_REGNUM (REGNO).  */
+#define INCOMING_RETURN_ADDR_RTX    gen_rtx_REG (Pmode, LP_REGNUM)
+#define DWARF_FRAME_RETURN_COLUMN   DWARF_FRAME_REGNUM (LP_REGNUM)
+
+#define STACK_POINTER_REGNUM SP_REGNUM
+
+#define FRAME_POINTER_REGNUM 33
+
+#define HARD_FRAME_POINTER_REGNUM FP_REGNUM
+
+#define ARG_POINTER_REGNUM 32
+
+#define STATIC_CHAIN_REGNUM 16
+
+#define ELIMINABLE_REGS                                \
+{ { ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },      \
+  { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }, \
+  { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },      \
+  { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM } }
+
+#define INITIAL_ELIMINATION_OFFSET(from_reg, to_reg, offset_var) \
+  (offset_var) = nds32_initial_elimination_offset (from_reg, to_reg)
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define OUTGOING_REG_PARM_STACK_SPACE(fntype) 1
+
+#define CUMULATIVE_ARGS nds32_cumulative_args
+
+#define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \
+  nds32_init_cumulative_args (&cum, fntype, libname, fndecl, n_named_args)
+
+/* The REGNO is an unsigned integer but NDS32_GPR_ARG_FIRST_REGNUM may be 0.
+   We better cast REGNO into signed integer so that we can avoid
+   'comparison of unsigned expression >= 0 is always true' warning.  */
+#define FUNCTION_ARG_REGNO_P(regno)                                        \
+  (((int) regno - NDS32_GPR_ARG_FIRST_REGNUM >= 0)                         \
+   && ((int) regno - NDS32_GPR_ARG_FIRST_REGNUM < NDS32_MAX_REGS_FOR_ARGS))
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning
+   from a function, the stack pointer does not matter.
+   The value is tested only in functions that have frame pointers.
+   In nds32 target, the function epilogue recovers the
+   stack pointer from the frame.  */
+#define EXIT_IGNORE_STACK 1
+
+#define FUNCTION_PROFILER(file, labelno) \
+  fprintf (file, "/* profiler %d */", (labelno))
+
+
+/* Implementing the Varargs Macros.  */
+
+
+/* Trampolines for Nested Functions.  */
+
+/* Giving A-function and B-function,
+   if B-function wants to call A-function's nested function,
+   we need to fill trampoline code into A-function's stack
+   so that B-function can execute the code in stack to indirectly
+   jump to (like 'trampoline' action) desired nested function.
+
+   The trampoline code for nds32 target must contains following parts:
+
+     1. instructions (4 * 4 = 16 bytes):
+          get $pc first
+          load chain_value to static chain register via $pc
+          load nested function address to $r15 via $pc
+          jump to desired nested function via $r15
+     2. data (4 * 2 = 8 bytes):
+          chain_value
+          nested function address
+
+   Please check nds32.c implementation for more information.  */
+#define TRAMPOLINE_SIZE 24
+
+/* Because all instructions/data in trampoline template are 4-byte size,
+   we set trampoline alignment 8*4=32 bits.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+
+/* Implicit Calls to Library Routines.  */
+
+
+/* Addressing Modes.  */
+
+/* We can use "LWI.bi  Rt, [Ra], 4" to support post increment.  */
+#define HAVE_POST_INCREMENT 1
+/* We can use "LWI.bi  Rt, [Ra], -4" to support post decrement.  */
+#define HAVE_POST_DECREMENT 1
+
+/* We have "LWI.bi  Rt, [Ra], imm" instruction form.  */
+#define HAVE_POST_MODIFY_DISP 1
+/* We have "LW.bi   Rt, [Ra], Rb" instruction form.  */
+#define HAVE_POST_MODIFY_REG  1
+
+#define CONSTANT_ADDRESS_P(x) (CONSTANT_P (x) && GET_CODE (x) != CONST_DOUBLE)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* Anchored Addresses.  */
+
+
+/* Condition Code Status.  */
+
+
+/* Describing Relative Costs of Operations.  */
+
+/* A C expression for the cost of a branch instruction.
+   A value of 1 is the default;
+   other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 0)
+
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE
+
+
+/* Adjusting the Instruction Scheduler.  */
+
+
+/* Dividing the Output into Sections (Texts, Data, . . . ).  */
+
+#define TEXT_SECTION_ASM_OP     "\t.text"
+#define DATA_SECTION_ASM_OP     "\t.data"
+
+/* Currently, nds32 assembler does NOT handle '.bss' pseudo-op.
+   So we use '.section .bss' alternatively.  */
+#define BSS_SECTION_ASM_OP      "\t.section\t.bss"
+
+/* Define this macro to be an expression with a nonzero value if jump tables
+   (for tablejump insns) should be output in the text section,
+   along with the assembler instructions.
+   Otherwise, the readonly data section is used.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* Position Independent Code.  */
+
+
+/* Defining the Output Assembler Language.  */
+
+#define ASM_COMMENT_START "!"
+
+#define ASM_APP_ON "! #APP"
+
+#define ASM_APP_OFF "! #NO_APP\n"
+
+#define ASM_OUTPUT_LABELREF(stream, name) \
+  asm_fprintf (stream, "%U%s", (*targetm.strip_name_encoding) (name))
+
+#define ASM_OUTPUT_SYMBOL_REF(stream, sym) \
+  assemble_name (stream, XSTR (sym, 0))
+
+#define ASM_OUTPUT_LABEL_REF(stream, buf) \
+  assemble_name (stream, buf)
+
+#define LOCAL_LABEL_PREFIX "."
+
+#define REGISTER_NAMES                                            \
+{                                                                 \
+  "$r0",  "$r1",  "$r2",  "$r3",  "$r4",  "$r5",  "$r6",  "$r7",  \
+  "$r8",  "$r9",  "$r10", "$r11", "$r12", "$r13", "$r14", "$ta",  \
+  "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \
+  "$r24", "$r25", "$r26", "$r27", "$fp",  "$gp",  "$lp",  "$sp",  \
+  "$AP",                                                          \
+  "$SFP"                                                          \
+}
+
+/* Output normal jump table entry.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(stream, value) \
+  asm_fprintf (stream, "\t.word\t%LL%d\n", value)
+
+/* Output pc relative jump table entry.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(stream, body, value, rel)              \
+  do                                                                    \
+    {                                                                   \
+      switch (GET_MODE (body))                                          \
+        {                                                               \
+        case QImode:                                                    \
+          asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel);     \
+          break;                                                        \
+        case HImode:                                                    \
+          asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel);    \
+          break;                                                        \
+        case SImode:                                                    \
+          asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel);     \
+          break;                                                        \
+        default:                                                        \
+          gcc_unreachable();                                            \
+        }                                                               \
+    } while (0)
+
+/* We have to undef it first because elfos.h formerly define it
+   check gcc/config.gcc and gcc/config/elfos.h for more information.  */
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(stream, prefix, num, table)          \
+  do                                                               \
+    {                                                              \
+      asm_fprintf (stream, "\t! Jump Table Begin\n");              \
+      (*targetm.asm_out.internal_label) (stream, prefix, num);     \
+    } while (0)
+
+#define ASM_OUTPUT_CASE_END(stream, num, table)        \
+  do                                                   \
+    {                                                  \
+      /* Because our jump table is in text section,    \
+         we need to make sure 2-byte alignment after   \
+         the jump table for instructions fetch.  */    \
+      if (GET_MODE (PATTERN (table)) == QImode)        \
+        ASM_OUTPUT_ALIGN (stream, 1);                  \
+      asm_fprintf (stream, "\t! Jump Table End\n");    \
+    }  while (0)
+
+/* This macro is not documented yet.
+   But we do need it to make jump table vector aligned.  */
+#define ADDR_VEC_ALIGN(JUMPTABLE) 2
+
+#define DWARF2_UNWIND_INFO 1
+
+#define JUMP_ALIGN(x) \
+  (align_jumps_log ? align_jumps_log : nds32_target_alignment (x))
+
+#define LOOP_ALIGN(x) \
+  (align_loops_log ? align_loops_log : nds32_target_alignment (x))
+
+#define LABEL_ALIGN(x) \
+  (align_labels_log ? align_labels_log : nds32_target_alignment (x))
+
+#define ASM_OUTPUT_ALIGN(stream, power) \
+  fprintf (stream, "\t.align\t%d\n", power)
+
+
+/* Controlling Debugging Information Format.  */
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+
+/* Cross Compilation and Floating Point.  */
+
+
+/* Mode Switching Instructions.  */
+
+
+/* Defining target-specific uses of __attribute__.  */
+
+
+/* Emulating TLS.  */
+
+
+/* Defining coprocessor specifics for MIPS targets.  */
+
+
+/* Parameters for Precompiled Header Validity Checking.  */
+
+
+/* C++ ABI parameters.  */
+
+
+/* Adding support for named address spaces.  */
+
+
+/* Miscellaneous Parameters.  */
+
+/* This is the machine mode that elements of a jump-table should have.  */
+#define CASE_VECTOR_MODE Pmode
+
+/* Return the preferred mode for and addr_diff_vec when the mininum
+   and maximum offset are known.  */
+#define CASE_VECTOR_SHORTEN_MODE(min_offset, max_offset, body)  \
+   ((min_offset < 0 || max_offset >= 0x2000 ) ? SImode          \
+   : (max_offset >= 100) ? HImode                               \
+   : QImode)
+
+/* Generate pc relative jump table when -fpic or -Os.  */
+#define CASE_VECTOR_PC_RELATIVE (flag_pic || optimize_size)
+
+/* Define this macro if operations between registers with integral mode
+   smaller than a word are always performed on the entire register.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* A C expression indicating when insns that read memory in mem_mode,
+   an integral mode narrower than a word, set the bits outside of mem_mode
+   to be either the sign-extension or the zero-extension of the data read.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* The maximum number of bytes that a single instruction can move quickly
+   between memory and registers or between two memory locations.  */
+#define MOVE_MAX 4
+
+/* A C expression that is nonzero if on this machine the number of bits
+   actually used for the count of a shift operation is equal to the number
+   of bits needed to represent the size of the object being shifted.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of 'inprec' bits to one of 'outprec' bits by merely operating
+   on it as if it had only 'outprec' bits.  */
+#define TRULY_NOOP_TRUNCATION(outprec, inprec) 1
+
+/* A C expression describing the value returned by a comparison operator with
+   an integral mode and stored by a store-flag instruction ('cstoremode4')
+   when the condition is true.  */
+#define STORE_FLAG_VALUE 1
+
+/* An alias for the machine mode for pointers.  */
+#define Pmode SImode
+
+/* An alias for the machine mode used for memory references to functions
+   being called, in call RTL expressions.  */
+#define FUNCTION_MODE SImode
+
+/* ------------------------------------------------------------------------ */
diff --git a/gcc-4.9/gcc/config/nds32/nds32.md b/gcc-4.9/gcc/config/nds32/nds32.md
new file mode 100644
index 000000000..0402cadd6
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32.md
@@ -0,0 +1,2221 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32-intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32-multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32-doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32-peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to split rtx for QI and HI patterns.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     by a single instruction, we need to split it.  */
+  if (CONST_INT_P (operands[1])
+      && !satisfies_constraint_Is20 (operands[1])
+      && !satisfies_constraint_Ihig (operands[1]))
+    {
+      rtx high20_rtx;
+      HOST_WIDE_INT low12_int;
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+
+      high20_rtx = gen_int_mode ((INTVAL (operands[1]) >> 12) << 12, SImode);
+      low12_int = INTVAL (operands[1]) & 0xfff;
+
+      emit_move_insn (tmp_rtx, high20_rtx);
+      emit_move_insn (operands[0], plus_constant (SImode,
+						  tmp_rtx,
+						  low12_int));
+      DONE;
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r")
+	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 11:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 12:
+      return "movpi45\t%0, %1";
+    case 13:
+      return "movi55\t%0, %1";
+    case 14:
+      return "movi\t%0, %1";
+    case 15:
+      return "sethi\t%0, hi20(%1)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4")])
+
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
+  ""
+  "sethi\t%0, hi20(%1)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
+		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 3:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+
+;; Sign extension instructions.
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r, r")
+	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l,    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
+         where 0 <= x <= 31 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi45\t%0, %2";
+    case 1:
+      /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
+         where 0 <= x <= 7 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi333\t%0, %1, %2";
+    case 2:
+      return "addi45\t%0, %2";
+    case 3:
+      return "addi333\t%0, %1, %2";
+    case 4:
+      return "add45\t%0, %2";
+    case 5:
+      return "add333\t%0, %1, %2";
+    case 6:
+      return "addi10.sp\t%2";
+    case 7:
+      return "addri36.sp\t%0, %2";
+    case 8:
+      return "addi\t%0, %1, %2";
+    case 9:
+      return "add\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+  ""
+  "@
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+  "@
+  mul33\t%0, %2
+  mul\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32-doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0xffff -> zeh33  Rt3,Ra3
+     andi Rt3,Ra3,0x01   -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff  -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "zeh33\t%0, %1";
+    case 4:
+      return "xlsb33\t%0, %1";
+    case 5:
+      return "x11b33\t%0, %1";
+    case 6:
+      operands[2] = GEN_INT (floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 7:
+      operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 8:
+      return "zeb\t%0, %1";
+    case 9:
+      return "zeh\t%0, %1";
+    case 10:
+      return "andi\t%0, %1, %2";
+    case 11:
+      operands[2] = GEN_INT (~mask);
+      return "bitci\t%0, %1, %2";
+    case 12:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'or33' instruction.
+;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'xor33' instruction.
+;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'neg33' instruction.
+;; So we can identify 'xor Rt3,Ra3' case and set its length to be 2.
+;; And for V2 ISA, there is NO 'neg33' instruction.
+;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'not33' instruction.
+;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
+		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+  ""
+  "@
+  slli333\t%0, %1, %2
+  slli\t%0, %1, %2
+  sll\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srai45\t%0, %2
+  srai\t%0, %1, %2
+  sra\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srli45\t%0, %2
+  srli\t%0, %1, %2
+  srl\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         Later we will have cmovz/cmovn instruction pattern to
+         match corresponding behavior and output instruction.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+	     We better have an assert here in case GCC does not properly
+	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
+	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	          =>
+	            bnezs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	          =>
+	            beqzs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	          =>
+	            bnez38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	          =>
+	            beqz38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	      =>
+	        bnez  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	      =>
+	        beqz  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to V2 ISA,
+;; because V2 DOES NOT HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	      =>
+	        bne  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	      =>
+	        beq  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3/V3M,
+;; because V3/V3M DO HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	          =>
+	            bne  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	          =>
+	            bnec  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	          =>
+	            beq  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	          =>
+	            beqc  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	     =>
+	       blez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	     =>
+	       bltz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	     =>
+	       bgez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	     =>
+	       bgtz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
+  ""
+  "@
+  jr5\t%0
+  jr\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%0
+  jral\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%1
+  jral\t%1"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push ();
+  else
+    nds32_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop ();
+  else
+    nds32_expand_epilogue ();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "nop16";
+  else
+    return "nop";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Stack push/pop operations
+;; ----------------------------------------------------------------------------
+
+;; The pattern for stack push.
+;; Both stack_push_multiple and stack_v3push use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_push"
+  [(match_parallel 0 "nds32_stack_push_operation"
+     [(set (mem:SI (plus:SI (reg:SI SP_REGNUM)
+			    (match_operand:SI 1 "const_int_operand" "")))
+	   (match_operand:SI 2 "register_operand" ""))
+     ])]
+  ""
+{
+  return nds32_output_stack_push ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; The pattern for stack pop.
+;; Both stack_pop_multiple and stack_v3pop use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_pop"
+  [(match_parallel 0 "nds32_stack_pop_operation"
+     [(set (match_operand:SI 1 "register_operand" "")
+	   (mem:SI (reg:SI SP_REGNUM)))
+     ])]
+  ""
+{
+  return nds32_output_stack_pop ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_volatile_func_return"
+  [(set (pc)
+	(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_FUNC_RETURN))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "ret5";
+  else
+    return "ret";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx add_tmp;
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+      add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
+
+      /* If the integer value is not in the range of imm15s,
+         we need to force register first because our addsi3 pattern
+         only accept nds32_rimm15s_operand predicate.  */
+      add_tmp = force_reg (SImode, add_tmp);
+
+      emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*btst"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
+  "TARGET_PERF_EXT"
+  "btst\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/nds32.opt b/gcc-4.9/gcc/config/nds32/nds32.opt
new file mode 100644
index 000000000..4974f3b86
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32.opt
@@ -0,0 +1,102 @@
+; Options of Andes NDS32 cpu for GNU compiler
+; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+; Contributed by Andes Technology Corporation.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published
+; by the Free Software Foundation; either version 3, or (at your
+; option) any later version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/nds32/nds32-opts.h
+
+mbig-endian
+Target Report RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN)
+Generate code in big-endian mode.
+
+mlittle-endian
+Target Report RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
+Generate code in little-endian mode.
+
+mreduced-regs
+Target Report RejectNegative Negative(mfull-regs) Mask(REDUCED_REGS)
+Use reduced-set registers for register allocation.
+
+mfull-regs
+Target Report RejectNegative Negative(mreduced-regs) InverseMask(REDUCED_REGS)
+Use full-set registers for register allocation.
+
+mcmov
+Target Report Mask(CMOV)
+Generate conditional move instructions.
+
+mperf-ext
+Target Report Mask(PERF_EXT)
+Generate performance extension instructions.
+
+mv3push
+Target Report Mask(V3PUSH)
+Generate v3 push25/pop25 instructions.
+
+m16-bit
+Target Report Mask(16_BIT)
+Generate 16-bit instructions.
+
+mgp-direct
+Target Report Mask(GP_DIRECT)
+Generate GP base instructions directly.
+
+misr-vector-size=
+Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE)
+Specify the size of each interrupt vector, which must be 4 or 16.
+
+mcache-block-size=
+Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE)
+Specify the size of each cache block, which must be a power of 2 between 4 and 512.
+
+march=
+Target RejectNegative Joined Enum(nds32_arch_type) Var(nds32_arch_option) Init(ARCH_V3)
+Specify the name of the target architecture.
+
+Enum
+Name(nds32_arch_type) Type(enum nds32_arch_type)
+
+EnumValue
+Enum(nds32_arch_type) String(v2) Value(ARCH_V2)
+
+EnumValue
+Enum(nds32_arch_type) String(v3) Value(ARCH_V3)
+
+EnumValue
+Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M)
+
+mforce-fp-as-gp
+Target Report Mask(FORCE_FP_AS_GP)
+Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
+
+mforbid-fp-as-gp
+Target Report Mask(FORBID_FP_AS_GP)
+Forbid using $fp to access static and global variables.  This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
+
+mex9
+Target Report Mask(EX9)
+Use special directives to guide linker doing ex9 optimization.
+
+mctor-dtor
+Target Report
+Enable constructor/destructor feature.
+
+mrelax
+Target Report
+Guide linker to relax instructions.
diff --git a/gcc-4.9/gcc/config/nds32/nds32_intrinsic.h b/gcc-4.9/gcc/config/nds32/nds32_intrinsic.h
new file mode 100644
index 000000000..3908051f7
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/nds32_intrinsic.h
@@ -0,0 +1,37 @@
+/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NDS32_INTRINSIC_H
+#define _NDS32_INTRINSIC_H
+
+enum nds32_intrinsic_registers
+{
+  __NDS32_REG_PSW__ = 1024,
+  __NDS32_REG_IPSW__,
+  __NDS32_REG_ITYPE__,
+  __NDS32_REG_IPC__
+};
+
+#endif /* nds32_intrinsic.h */
diff --git a/gcc-4.9/gcc/config/nds32/pipelines.md b/gcc-4.9/gcc/config/nds32/pipelines.md
new file mode 100644
index 000000000..18cee9b56
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/pipelines.md
@@ -0,0 +1,29 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "nds32_machine")
+
+(define_cpu_unit "general_unit" "nds32_machine")
+
+(define_insn_reservation "simple_insn" 1
+			 (eq_attr "type" "unknown,load,store,move,alu,compare,branch,call,misc")
+			 "general_unit")
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/predicates.md b/gcc-4.9/gcc/config/nds32/predicates.md
new file mode 100644
index 000000000..0a40d68b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/predicates.md
@@ -0,0 +1,92 @@
+;; Predicate definitions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "nds32_equality_comparison_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "nds32_greater_less_comparison_operator"
+  (match_code "gt,ge,lt,le"))
+
+(define_special_predicate "nds32_logical_binary_operator"
+  (match_code "and,ior,xor"))
+
+(define_predicate "nds32_symbolic_operand"
+  (match_code "const,symbol_ref,label_ref"))
+
+(define_predicate "nds32_reg_constant_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+(define_predicate "nds32_rimm15s_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_Is15 (op)"))))
+
+(define_predicate "nds32_imm5u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu05 (op)")))
+
+(define_predicate "nds32_move_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_code "high,const,symbol_ref,label_ref")))
+{
+  /* If the constant op does NOT satisfy Is20 nor Ihig,
+     we can not perform move behavior by a single instruction.  */
+  if (CONST_INT_P (op)
+      && !satisfies_constraint_Is20 (op)
+      && !satisfies_constraint_Ihig (op))
+    return false;
+
+  return true;
+})
+
+(define_special_predicate "nds32_load_multiple_operation"
+  (match_code "parallel")
+{
+  /* To verify 'load' operation, pass 'true' for the second argument.
+     See the implementation in nds32.c for details.  */
+  return nds32_valid_multiple_load_store (op, true);
+})
+
+(define_special_predicate "nds32_store_multiple_operation"
+  (match_code "parallel")
+{
+  /* To verify 'store' operation, pass 'false' for the second argument.
+     See the implementation in nds32.c for details.  */
+  return nds32_valid_multiple_load_store (op, false);
+})
+
+(define_special_predicate "nds32_stack_push_operation"
+  (match_code "parallel")
+{
+  /* To verify 'push' operation, pass 'true' for the second argument.
+     See the implementation in nds32.c for details.  */
+  return nds32_valid_stack_push_pop (op, true);
+})
+
+(define_special_predicate "nds32_stack_pop_operation"
+  (match_code "parallel")
+{
+  /* To verify 'pop' operation, pass 'false' for the second argument.
+     See the implementation in nds32.c for details.  */
+  return nds32_valid_stack_push_pop (op, false);
+})
+
+;; ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/nds32/t-mlibs b/gcc-4.9/gcc/config/nds32/t-mlibs
new file mode 100644
index 000000000..199ee3129
--- /dev/null
+++ b/gcc-4.9/gcc/config/nds32/t-mlibs
@@ -0,0 +1,38 @@
+# The multilib settings of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We need to build following multilibs combinations:
+#
+#   1. <None multilibs>
+#   2. -mlittle-endian
+#   3. -mbig-endian
+#   4. -mgp-direct
+#   5. -mno-gp-direct
+#   6. -mlittle-endian -mgp-direct
+#   7. -mlittle-endian -mno-gp-direct
+#   8. -mbig-endian -mgp-direct
+#   9. -mbig-endian -mno-gp-direct
+#
+# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
+# driver program which options are defaults for this target and thus
+# do not need to be handled specially.
+MULTILIB_OPTIONS = mlittle-endian/mbig-endian mgp-direct/mno-gp-direct
+
+# ------------------------------------------------------------------------
diff --git a/gcc-4.9/gcc/config/netbsd-elf.h b/gcc-4.9/gcc/config/netbsd-elf.h
new file mode 100644
index 000000000..d5eb83b1f
--- /dev/null
+++ b/gcc-4.9/gcc/config/netbsd-elf.h
@@ -0,0 +1,86 @@
+/* Common configuration file for NetBSD ELF targets.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* TARGET_OS_CPP_BUILTINS() common to all NetBSD ELF targets.  */
+#define NETBSD_OS_CPP_BUILTINS_ELF()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_COMMON();		\
+    }						\
+  while (0)
+
+/* Provide a STARTFILE_SPEC appropriate for NetBSD ELF.  Here we
+   provide support for the special GCC option -static.  On ELF
+   targets, we also add the crtbegin.o file, which provides part
+   of the support for getting C++ file-scope static objects
+   constructed before entering "main".  */
+
+#define NETBSD_STARTFILE_SPEC	\
+  "%{!shared:			\
+     %{pg:gcrt0%O%s}		\
+     %{!pg:			\
+       %{p:gcrt0%O%s}		\
+       %{!p:crt0%O%s}}}		\
+   %:if-exists(crti%O%s)	\
+   %{static:%:if-exists-else(crtbeginT%O%s crtbegin%O%s)} \
+   %{!static: \
+     %{!shared:crtbegin%O%s} %{shared:crtbeginS%O%s}}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC NETBSD_STARTFILE_SPEC
+
+
+/* Provide an ENDFILE_SPEC appropriate for NetBSD ELF.  Here we
+   add crtend.o, which provides part of the support for getting
+   C++ file-scope static objects deconstructed after exiting "main".  */
+
+#define NETBSD_ENDFILE_SPEC	\
+  "%{!shared:crtend%O%s} %{shared:crtendS%O%s} \
+   %:if-exists(crtn%O%s)"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC NETBSD_ENDFILE_SPEC
+
+/* Provide a LINK_SPEC appropriate for NetBSD ELF.  Here we provide
+   support for the special GCC options -assert, -R, -rpath, -shared,
+   -nostdlib, -static, -rdynamic, and -dynamic-linker.
+
+   Target-specific code can use this in conjunction with any other
+   target-specific LINK_SPEC options.
+
+   Target-specific code must provide the %(netbsd_entry_point) spec.  */
+
+#define NETBSD_LINK_SPEC_ELF \
+  "%{assert*} %{R*} %{rpath*} \
+   %{shared:-shared} \
+   %{!shared: \
+     -dc -dp \
+     %{!nostdlib: \
+       %{!r: \
+	 %{!e*:-e %(netbsd_entry_point)}}} \
+     %{!static: \
+       %{rdynamic:-export-dynamic} \
+       -dynamic-linker /usr/libexec/ld.elf_so} \
+     %{static:-static}}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
diff --git a/gcc-4.9/gcc/config/netbsd-elf.opt b/gcc-4.9/gcc/config/netbsd-elf.opt
new file mode 100644
index 000000000..ef6646629
--- /dev/null
+++ b/gcc-4.9/gcc/config/netbsd-elf.opt
@@ -0,0 +1,32 @@
+; NetBSD ELF-only options.
+
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rdynamic
+Driver
+
+rpath=
+Driver JoinedOrMissing
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/netbsd.h b/gcc-4.9/gcc/config/netbsd.h
new file mode 100644
index 000000000..d40c3afe8
--- /dev/null
+++ b/gcc-4.9/gcc/config/netbsd.h
@@ -0,0 +1,177 @@
+/* Base configuration file for all NetBSD targets.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* TARGET_OS_CPP_BUILTINS() common to all NetBSD targets.  */
+#define NETBSD_OS_CPP_BUILTINS_COMMON()		\
+  do						\
+    {						\
+      builtin_define ("__NetBSD__");		\
+      builtin_define ("__unix__");		\
+      builtin_assert ("system=bsd");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=NetBSD");		\
+    }						\
+  while (0)
+
+/* CPP_SPEC parts common to all NetBSD targets.  */
+#define NETBSD_CPP_SPEC				\
+  "%{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS}"
+
+/* NETBSD_NATIVE is defined when gcc is integrated into the NetBSD
+   source tree so it can be configured appropriately without using
+   the GNU configure/build mechanism.  */
+
+#ifdef NETBSD_NATIVE
+
+/* Look for the include files in the system-defined places.  */
+
+#undef GPLUSPLUS_INCLUDE_DIR
+#define GPLUSPLUS_INCLUDE_DIR "/usr/include/g++"
+
+#undef GCC_INCLUDE_DIR
+#define GCC_INCLUDE_DIR "/usr/include"
+
+#undef INCLUDE_DEFAULTS
+#define INCLUDE_DEFAULTS			\
+  {						\
+    { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1 },	\
+    { GCC_INCLUDE_DIR, "GCC", 0, 0 },		\
+    { 0, 0, 0, 0 }				\
+  }
+
+/* Under NetBSD, the normal location of the compiler back ends is the
+   /usr/libexec directory.  */
+
+#undef STANDARD_EXEC_PREFIX
+#define STANDARD_EXEC_PREFIX		"/usr/libexec/"
+
+/* Under NetBSD, the normal location of the various *crt*.o files is the
+   /usr/lib directory.  */
+
+#undef STANDARD_STARTFILE_PREFIX
+#define STANDARD_STARTFILE_PREFIX	"/usr/lib/"
+
+#endif /* NETBSD_NATIVE */
+
+
+/* Provide a LIB_SPEC appropriate for NetBSD.  Here we:
+
+   1. Select the appropriate set of libs, depending on whether we're
+      profiling.
+
+   2. Include the pthread library if -pthread is specified (only
+      if threads are enabled).
+
+   3. Include the posix library if -posix is specified.
+
+   FIXME: Could eliminate the duplication here if we were allowed to
+   use string concatenation.  */
+
+#ifdef NETBSD_ENABLE_PTHREADS
+#define NETBSD_LIB_SPEC		\
+  "%{pthread:			\
+     %{!p:			\
+       %{!pg:-lpthread}}	\
+     %{p:-lpthread_p}		\
+     %{pg:-lpthread_p}}		\
+   %{posix:			\
+     %{!p:			\
+       %{!pg:-lposix}}		\
+     %{p:-lposix_p}		\
+     %{pg:-lposix_p}}		\
+   %{!shared:			\
+     %{!symbolic:		\
+       %{!p:			\
+	 %{!pg:-lc}}		\
+       %{p:-lc_p}		\
+       %{pg:-lc_p}}}"
+#else
+#define NETBSD_LIB_SPEC		\
+  "%{posix:			\
+     %{!p:			\
+       %{!pg:-lposix}}		\
+     %{p:-lposix_p}		\
+     %{pg:-lposix_p}}		\
+   %{!shared:			\
+     %{!symbolic:		\
+       %{!p:			\
+	 %{!pg:-lc}}		\
+       %{p:-lc_p}		\
+       %{pg:-lc_p}}}"
+#endif
+
+#undef LIB_SPEC
+#define LIB_SPEC NETBSD_LIB_SPEC
+
+/* Provide a LIBGCC_SPEC appropriate for NetBSD.  We also want to exclude
+   libgcc with -symbolic.  */
+
+#ifdef NETBSD_NATIVE
+#define NETBSD_LIBGCC_SPEC	\
+  "%{!symbolic:			\
+     %{!shared:			\
+       %{!p:			\
+	 %{!pg: -lgcc}}}	\
+     %{shared: -lgcc_pic}	\
+     %{p: -lgcc_p}		\
+     %{pg: -lgcc_p}}"
+#else
+#define NETBSD_LIBGCC_SPEC "%{!shared:%{!symbolic: -lgcc}}"
+#endif
+
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC NETBSD_LIBGCC_SPEC
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* When building shared libraries, the initialization and finalization 
+   functions for the library are .init and .fini respectively.  */
+
+#define COLLECT_SHARED_INIT_FUNC(STREAM,FUNC)				\
+  do {									\
+    fprintf ((STREAM), "void __init() __asm__ (\".init\");");		\
+    fprintf ((STREAM), "void __init() {\n\t%s();\n}\n", (FUNC));	\
+  } while (0)
+
+#define COLLECT_SHARED_FINI_FUNC(STREAM,FUNC)				\
+  do {									\
+    fprintf ((STREAM), "void __fini() __asm__ (\".fini\");");		\
+    fprintf ((STREAM), "void __fini() {\n\t%s();\n}\n", (FUNC));	\
+  } while (0)
+
+#undef TARGET_POSIX_IO
+#define TARGET_POSIX_IO
+
+/* Don't assume anything about the header files.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C    1
+
+/* Define some types that are the same on all NetBSD platforms,
+   making them agree with <machine/ansi.h>.  */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
diff --git a/gcc-4.9/gcc/config/netbsd.opt b/gcc-4.9/gcc/config/netbsd.opt
new file mode 100644
index 000000000..468d0b91f
--- /dev/null
+++ b/gcc-4.9/gcc/config/netbsd.opt
@@ -0,0 +1,35 @@
+; NetBSD options.
+
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+assert=
+Driver JoinedOrMissing
+
+posix
+Driver
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/newlib-stdint.h b/gcc-4.9/gcc/config/newlib-stdint.h
new file mode 100644
index 000000000..f4a78a544
--- /dev/null
+++ b/gcc-4.9/gcc/config/newlib-stdint.h
@@ -0,0 +1,64 @@
+/* Definitions for <stdint.h> types on systems using newlib.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* newlib uses 32-bit long in certain cases for all non-SPU
+   targets.  */
+#ifndef STDINT_LONG32
+#define STDINT_LONG32 (LONG_TYPE_SIZE == 32)
+#endif
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* The newlib logic actually checks for sizes greater than 32 rather
+   than equal to 64 for various 64-bit types.  */
+
+#define INT8_TYPE (CHAR_TYPE_SIZE == 8 ? "signed char" : 0)
+#define INT16_TYPE (SHORT_TYPE_SIZE == 16 ? "short int" : INT_TYPE_SIZE == 16 ? "int" : CHAR_TYPE_SIZE == 16 ? "signed char" : 0)
+#define INT32_TYPE (STDINT_LONG32 ? "long int" : INT_TYPE_SIZE == 32 ? "int" : SHORT_TYPE_SIZE == 32 ? "short int" : CHAR_TYPE_SIZE == 32 ? "signed char" : 0)
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : LONG_LONG_TYPE_SIZE == 64 ? "long long int" : INT_TYPE_SIZE == 64 ? "int" : 0)
+#define UINT8_TYPE (CHAR_TYPE_SIZE == 8 ? "unsigned char" : 0)
+#define UINT16_TYPE (SHORT_TYPE_SIZE == 16 ? "short unsigned int" : INT_TYPE_SIZE == 16 ? "unsigned int" : CHAR_TYPE_SIZE == 16 ? "unsigned char" : 0)
+#define UINT32_TYPE (STDINT_LONG32 ? "long unsigned int" : INT_TYPE_SIZE == 32 ? "unsigned int" : SHORT_TYPE_SIZE == 32 ? "short unsigned int" : CHAR_TYPE_SIZE == 32 ? "unsigned char" : 0)
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : LONG_LONG_TYPE_SIZE == 64 ? "long long unsigned int" : INT_TYPE_SIZE == 64 ? "unsigned int" : 0)
+
+#define INT_LEAST8_TYPE (INT8_TYPE ? INT8_TYPE : INT16_TYPE ? INT16_TYPE : INT32_TYPE ? INT32_TYPE : INT64_TYPE ? INT64_TYPE : 0)
+#define INT_LEAST16_TYPE (INT16_TYPE ? INT16_TYPE : INT32_TYPE ? INT32_TYPE : INT64_TYPE ? INT64_TYPE : 0)
+#define INT_LEAST32_TYPE (INT32_TYPE ? INT32_TYPE : INT64_TYPE ? INT64_TYPE : 0)
+#define INT_LEAST64_TYPE INT64_TYPE
+#define UINT_LEAST8_TYPE (UINT8_TYPE ? UINT8_TYPE : UINT16_TYPE ? UINT16_TYPE : UINT32_TYPE ? UINT32_TYPE : UINT64_TYPE ? UINT64_TYPE : 0)
+#define UINT_LEAST16_TYPE (UINT16_TYPE ? UINT16_TYPE : UINT32_TYPE ? UINT32_TYPE : UINT64_TYPE ? UINT64_TYPE : 0)
+#define UINT_LEAST32_TYPE (UINT32_TYPE ? UINT32_TYPE : UINT64_TYPE ? UINT64_TYPE : 0)
+#define UINT_LEAST64_TYPE UINT64_TYPE
+
+#define INT_FAST8_TYPE (INT_TYPE_SIZE >= 8 ? "int" : INT_LEAST8_TYPE)
+#define INT_FAST16_TYPE (INT_TYPE_SIZE >= 16 ? "int" : INT_LEAST16_TYPE)
+#define INT_FAST32_TYPE (INT_TYPE_SIZE >= 32 ? "int" : INT_LEAST32_TYPE)
+#define INT_FAST64_TYPE (INT_TYPE_SIZE >= 64 ? "int" : INT_LEAST64_TYPE)
+#define UINT_FAST8_TYPE (INT_TYPE_SIZE >= 8 ? "unsigned int" : UINT_LEAST8_TYPE)
+#define UINT_FAST16_TYPE (INT_TYPE_SIZE >= 16 ? "unsigned int" : UINT_LEAST16_TYPE)
+#define UINT_FAST32_TYPE (INT_TYPE_SIZE >= 32 ? "unsigned int" : UINT_LEAST32_TYPE)
+#define UINT_FAST64_TYPE (INT_TYPE_SIZE >= 64 ? "unsigned int" : UINT_LEAST64_TYPE)
+
+/* Newlib uses the unsigned type corresponding to ptrdiff_t for
+   uintptr_t; this is the same as size_t for most newlib-using
+   targets.  */
+#define INTPTR_TYPE PTRDIFF_TYPE
+#ifndef UINTPTR_TYPE
+#define UINTPTR_TYPE SIZE_TYPE
+#endif
diff --git a/gcc-4.9/gcc/config/nios2/constraints.md b/gcc-4.9/gcc/config/nios2/constraints.md
new file mode 100644
index 000000000..dbae54320
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/constraints.md
@@ -0,0 +1,89 @@
+;; Constraint definitions for Altera Nios II.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Chung-Lin Tang <cltang@codesourcery.com>
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; We use the following constraint letters for constants
+;;
+;;  I: -32768 to -32767
+;;  J: 0 to 65535
+;;  K: $nnnn0000 for some nnnn
+;;  L: 0 to 31 (for shift counts)
+;;  M: 0
+;;  N: 0 to 255 (for custom instruction numbers)
+;;  O: 0 to 31 (for control register numbers)
+;;
+;; We use the following built-in register classes:
+;;
+;;  r: general purpose register (r0..r31)
+;;  m: memory operand
+;;
+;; Plus, we define the following constraint strings:
+;;
+;;  S: symbol that is in the "small data" area
+
+;; Register constraints
+
+(define_register_constraint "j" "SIB_REGS"
+  "A register suitable for an indirect sibcall.")
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant (for arithmetic instructions)."
+  (and (match_code "const_int")
+       (match_test "SMALL_INT (ival)")))
+
+(define_constraint "J"
+  "An unsigned 16-bit constant (for logical instructions)."
+  (and (match_code "const_int")
+       (match_test "SMALL_INT_UNSIGNED (ival)")))
+
+(define_constraint "K"
+  "An unsigned 16-bit high constant (for logical instructions)."
+  (and (match_code "const_int")
+       (match_test "UPPER16_INT (ival)")))
+
+(define_constraint "L"
+  "An unsigned 5-bit constant (for shift counts)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "M"
+  "Integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "N"
+  "An unsigned 8-bit constant (for custom instruction codes)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 255")))
+
+(define_constraint "O"
+  "An unsigned 5-bit constant (for control register numbers)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "S"
+  "An immediate stored in small data, accessible by GP."
+  (and (match_code "symbol_ref")
+       (match_test "nios2_symbol_ref_in_small_data_p (op)")))
+
+(define_constraint "T"
+  "A constant unspec offset representing a relocation."
+  (match_test "nios2_unspec_reloc_p (op)"))
diff --git a/gcc-4.9/gcc/config/nios2/elf.h b/gcc-4.9/gcc/config/nios2/elf.h
new file mode 100644
index 000000000..357bf3ee8
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/elf.h
@@ -0,0 +1,52 @@
+/* Definitions of ELF target support for Altera Nios II.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Jonah Graham (jgraham@altera.com), 
+   Will Reece (wreece@altera.com), and Jeff DaSilva (jdasilva@altera.com).
+   Contributed by Mentor Graphics, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Specs to support the additional command-line options for Nios II ELF
+   toolchains.  */
+
+/* -msmallc chooses an alternate C library.
+   -msys-lib= specifies an additional low-level system/hosting library and
+   is typically used to suck in a library provided by a HAL BSP.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+"--start-group %{msmallc: -lsmallc} %{!msmallc: -lc} -lgcc \
+ %{msys-lib=*: -l%*} \
+ --end-group \
+"
+
+/* Linking with -mhal suppresses inclusion of the GCC-provided crt* begin/end
+   code.  Normally in this case you also link with -msys-crt0= to specify
+   the startup code provided by the HAL BSP instead.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC						\
+  "%{mhal:"							\
+  "%{msys-crt0=*:%*} %{!msys-crt0=*:crt0%O%s} "			\
+  "%{msys-crt0=:%eYou need a C startup file for -msys-crt0=};"	\
+  ":crti%O%s crtbegin%O%s}"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!mhal:crtend%O%s crtn%O%s}"
+
+/* The ELF target doesn't support the Nios II Linux ABI.  */
+#define TARGET_LINUX_ABI 0
+
diff --git a/gcc-4.9/gcc/config/nios2/elf.opt b/gcc-4.9/gcc/config/nios2/elf.opt
new file mode 100644
index 000000000..b4de37777
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/elf.opt
@@ -0,0 +1,38 @@
+; Options for the Altera Nios II port of the compiler.
+; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+; Contributed by Altera and Mentor Graphics, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 3, or (at your option)
+; any later version.
+;
+; GCC is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; These additional options are supported for ELF (bare-metal) Nios II
+; toolchains.
+
+msmallc
+Target Report RejectNegative
+Link with a limited version of the C library
+
+msys-lib=
+Target RejectNegative Joined Var(nios2_sys_lib_string)
+Name of system library to link against
+
+msys-crt0=
+Target RejectNegative Joined Var(nios2_sys_crt0_string)
+Name of the startfile
+
+mhal
+Target Report RejectNegative
+Link with HAL BSP
diff --git a/gcc-4.9/gcc/config/nios2/linux.h b/gcc-4.9/gcc/config/nios2/linux.h
new file mode 100644
index 000000000..47976f85b
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/linux.h
@@ -0,0 +1,38 @@
+/* Definitions of target support for Altera Nios II systems
+   running GNU/Linux with ELF format.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Mentor Graphics, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
+    }                                           \
+  while (0)
+
+#undef LINK_SPEC
+#define LINK_SPEC LINK_SPEC_ENDIAN \
+  " %{shared:-shared} \
+    %{static:-Bstatic} \
+    %{rdynamic:-export-dynamic}"
+
+/* This toolchain implements the ABI for Linux Systems documented in the
+   Nios II Processor Reference Handbook.  */
+#define TARGET_LINUX_ABI 1
+
diff --git a/gcc-4.9/gcc/config/nios2/nios2-opts.h b/gcc-4.9/gcc/config/nios2/nios2-opts.h
new file mode 100644
index 000000000..95dbad145
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/nios2-opts.h
@@ -0,0 +1,69 @@
+/* Definitions for option handling for Nios II.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef NIOS2_OPTS_H
+#define NIOS2_OPTS_H
+
+/* Enumeration of all FPU insn codes.  */
+#define N2FPU_ALL_CODES							\
+  N2FPU_CODE(fadds) N2FPU_CODE(fsubs) N2FPU_CODE(fmuls) N2FPU_CODE(fdivs) \
+  N2FPU_CODE(fmins) N2FPU_CODE(fmaxs)					\
+  N2FPU_CODE(fnegs) N2FPU_CODE(fabss) N2FPU_CODE(fsqrts)		\
+  N2FPU_CODE(fsins) N2FPU_CODE(fcoss) N2FPU_CODE(ftans) N2FPU_CODE(fatans) \
+  N2FPU_CODE(fexps) N2FPU_CODE(flogs)					\
+  N2FPU_CODE(fcmpeqs) N2FPU_CODE(fcmpnes)				\
+  N2FPU_CODE(fcmplts) N2FPU_CODE(fcmples)				\
+  N2FPU_CODE(fcmpgts) N2FPU_CODE(fcmpges)				\
+  									\
+  N2FPU_CODE(faddd) N2FPU_CODE(fsubd) N2FPU_CODE(fmuld) N2FPU_CODE(fdivd) \
+  N2FPU_CODE(fmind) N2FPU_CODE(fmaxd)					\
+  N2FPU_CODE(fnegd) N2FPU_CODE(fabsd) N2FPU_CODE(fsqrtd)		\
+  N2FPU_CODE(fsind) N2FPU_CODE(fcosd) N2FPU_CODE(ftand) N2FPU_CODE(fatand) \
+  N2FPU_CODE(fexpd) N2FPU_CODE(flogd)					\
+  N2FPU_CODE(fcmpeqd) N2FPU_CODE(fcmpned)				\
+  N2FPU_CODE(fcmpltd) N2FPU_CODE(fcmpled)				\
+  N2FPU_CODE(fcmpgtd) N2FPU_CODE(fcmpged)				\
+  									\
+  N2FPU_CODE(floatis) N2FPU_CODE(floatus)				\
+  N2FPU_CODE(floatid) N2FPU_CODE(floatud)				\
+  N2FPU_CODE(fixsi) N2FPU_CODE(fixsu)					\
+  N2FPU_CODE(fixdi) N2FPU_CODE(fixdu)					\
+  N2FPU_CODE(fextsd) N2FPU_CODE(ftruncds)				\
+									\
+  N2FPU_CODE(fwrx) N2FPU_CODE(fwry)					\
+  N2FPU_CODE(frdxlo) N2FPU_CODE(frdxhi) N2FPU_CODE(frdy)
+
+enum n2fpu_code {
+#define N2FPU_CODE(name) n2fpu_ ## name,
+  N2FPU_ALL_CODES
+#undef N2FPU_CODE
+  n2fpu_code_num
+};
+
+/* An enumeration to indicate the custom code status; if values within 0--255
+   are registered to an FPU insn, or custom insn.  */
+enum nios2_ccs_code
+{
+  CCS_UNUSED,
+  CCS_FPU,
+  CCS_BUILTIN_CALL
+};
+
+#endif
+
diff --git a/gcc-4.9/gcc/config/nios2/nios2-protos.h b/gcc-4.9/gcc/config/nios2/nios2-protos.h
new file mode 100644
index 000000000..878295446
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/nios2-protos.h
@@ -0,0 +1,61 @@
+/* Subroutine declarations for Altera Nios II target support.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Jonah Graham (jgraham@altera.com).
+   Contributed by Mentor Graphics, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_NIOS2_PROTOS_H
+#define GCC_NIOS2_PROTOS_H
+
+extern int nios2_initial_elimination_offset (int, int);
+extern int nios2_can_use_return_insn (void);
+extern void nios2_expand_prologue (void);
+extern void nios2_expand_epilogue (bool);
+extern void nios2_function_profiler (FILE *, int);
+
+#ifdef RTX_CODE
+extern int nios2_emit_move_sequence (rtx *, enum machine_mode);
+extern void nios2_emit_expensive_div (rtx *, enum machine_mode);
+extern void nios2_adjust_call_address (rtx *);
+
+extern rtx nios2_get_return_address (int);
+extern void nios2_set_return_address (rtx, rtx);
+
+extern bool nios2_validate_compare (enum machine_mode, rtx *, rtx *, rtx *);
+extern bool nios2_validate_fpu_compare (enum machine_mode, rtx *, rtx *, rtx *,
+					bool);
+
+extern bool nios2_fpu_insn_enabled (enum n2fpu_code);
+extern const char * nios2_fpu_insn_asm (enum n2fpu_code);
+
+extern bool nios2_legitimate_pic_operand_p (rtx);
+extern bool nios2_symbol_ref_in_small_data_p (rtx);
+extern bool nios2_regno_ok_for_base_p (int, bool);
+extern bool nios2_unspec_reloc_p (rtx);
+
+#ifdef TREE_CODE
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines both ARGS_SIZE_RTX and `enum direction' */
+extern enum direction nios2_function_arg_padding (enum machine_mode, const_tree);
+extern enum direction nios2_block_reg_padding (enum machine_mode, tree, int);
+#endif /* ARGS_SIZE_RTX */
+
+#endif /* TREE_CODE */
+#endif /* RTX_CODE */
+
+#endif /* GCC_NIOS2_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/nios2/nios2.c b/gcc-4.9/gcc/config/nios2/nios2.c
new file mode 100644
index 000000000..edf9a618b
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/nios2.c
@@ -0,0 +1,3312 @@
+/* Target machine subroutines for Altera Nios II.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Jonah Graham (jgraham@altera.com), 
+   Will Reece (wreece@altera.com), and Jeff DaSilva (jdasilva@altera.com).
+   Contributed by Mentor Graphics, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "function.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "df.h"
+#include "debug.h"
+#include "real.h"
+#include "reload.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+
+/* Forward function declarations.  */
+static bool prologue_saved_reg_p (unsigned);
+static void nios2_load_pic_register (void);
+static void nios2_register_custom_code (unsigned int, enum nios2_ccs_code, int);
+static const char *nios2_unspec_reloc_name (int);
+static void nios2_register_builtin_fndecl (unsigned, tree);
+
+/* Threshold for data being put into the small data/bss area, instead
+   of the normal data area (references to the small data/bss area take
+   1 instruction, and use the global pointer, references to the normal
+   data area takes 2 instructions).  */
+unsigned HOST_WIDE_INT nios2_section_threshold = NIOS2_DEFAULT_GVALUE;
+
+struct GTY (()) machine_function
+{
+  /* Current frame information, to be filled in by nios2_compute_frame_layout
+     with register save masks, and offsets for the current function.  */
+
+  /* Mask of registers to save.  */
+  unsigned int save_mask;
+  /* Number of bytes that the entire frame takes up.  */
+  int total_size;
+  /* Number of bytes that variables take up.  */
+  int var_size;
+  /* Number of bytes that outgoing arguments take up.  */
+  int args_size;
+  /* Number of bytes needed to store registers in frame.  */
+  int save_reg_size;
+  /* Offset from new stack pointer to store registers.  */
+  int save_regs_offset;
+  /* Offset from save_regs_offset to store frame pointer register.  */
+  int fp_save_offset;
+  /* != 0 if frame layout already calculated.  */
+  int initialized;
+};
+
+/* State to track the assignment of custom codes to FPU/custom builtins.  */
+static enum nios2_ccs_code custom_code_status[256];
+static int custom_code_index[256];
+/* Set to true if any conflicts (re-use of a code between 0-255) are found.  */
+static bool custom_code_conflict = false;
+
+
+/* Definition of builtin function types for nios2.  */
+
+#define N2_FTYPES				\
+  N2_FTYPE(1, (SF))				\
+  N2_FTYPE(1, (VOID))				\
+  N2_FTYPE(2, (DF, DF))				\
+  N2_FTYPE(3, (DF, DF, DF))			\
+  N2_FTYPE(2, (DF, SF))				\
+  N2_FTYPE(2, (DF, SI))				\
+  N2_FTYPE(2, (DF, UI))				\
+  N2_FTYPE(2, (SF, DF))				\
+  N2_FTYPE(2, (SF, SF))				\
+  N2_FTYPE(3, (SF, SF, SF))			\
+  N2_FTYPE(2, (SF, SI))				\
+  N2_FTYPE(2, (SF, UI))				\
+  N2_FTYPE(2, (SI, CVPTR))			\
+  N2_FTYPE(2, (SI, DF))				\
+  N2_FTYPE(3, (SI, DF, DF))			\
+  N2_FTYPE(2, (SI, SF))				\
+  N2_FTYPE(3, (SI, SF, SF))			\
+  N2_FTYPE(2, (SI, SI))				\
+  N2_FTYPE(2, (UI, CVPTR))			\
+  N2_FTYPE(2, (UI, DF))				\
+  N2_FTYPE(2, (UI, SF))				\
+  N2_FTYPE(2, (VOID, DF))			\
+  N2_FTYPE(2, (VOID, SF))			\
+  N2_FTYPE(3, (VOID, SI, SI))			\
+  N2_FTYPE(3, (VOID, VPTR, SI))
+
+#define N2_FTYPE_OP1(R)         N2_FTYPE_ ## R ## _VOID
+#define N2_FTYPE_OP2(R, A1)     N2_FTYPE_ ## R ## _ ## A1
+#define N2_FTYPE_OP3(R, A1, A2) N2_FTYPE_ ## R ## _ ## A1 ## _ ## A2
+
+/* Expand ftcode enumeration.  */
+enum nios2_ftcode {
+#define N2_FTYPE(N,ARGS) N2_FTYPE_OP ## N ARGS,
+N2_FTYPES
+#undef N2_FTYPE
+N2_FTYPE_MAX
+};
+
+/* Return the tree function type, based on the ftcode.  */
+static tree
+nios2_ftype (enum nios2_ftcode ftcode)
+{
+  static tree types[(int) N2_FTYPE_MAX];
+
+  tree N2_TYPE_SF = float_type_node;
+  tree N2_TYPE_DF = double_type_node;
+  tree N2_TYPE_SI = integer_type_node;
+  tree N2_TYPE_UI = unsigned_type_node;
+  tree N2_TYPE_VOID = void_type_node;
+
+  static const_tree N2_TYPE_CVPTR, N2_TYPE_VPTR;
+  if (!N2_TYPE_CVPTR)
+    {
+      /* const volatile void *.  */
+      N2_TYPE_CVPTR
+	= build_pointer_type (build_qualified_type (void_type_node,
+						    (TYPE_QUAL_CONST
+						     | TYPE_QUAL_VOLATILE)));
+      /* volatile void *.  */
+      N2_TYPE_VPTR
+	= build_pointer_type (build_qualified_type (void_type_node,
+						    TYPE_QUAL_VOLATILE));
+    }
+  if (types[(int) ftcode] == NULL_TREE)
+    switch (ftcode)
+      {
+#define N2_FTYPE_ARGS1(R) N2_TYPE_ ## R
+#define N2_FTYPE_ARGS2(R,A1) N2_TYPE_ ## R, N2_TYPE_ ## A1
+#define N2_FTYPE_ARGS3(R,A1,A2) N2_TYPE_ ## R, N2_TYPE_ ## A1, N2_TYPE_ ## A2
+#define N2_FTYPE(N,ARGS)						\
+  case N2_FTYPE_OP ## N ARGS:						\
+    types[(int) ftcode]							\
+      = build_function_type_list (N2_FTYPE_ARGS ## N ARGS, NULL_TREE); \
+    break;
+	N2_FTYPES
+#undef N2_FTYPE
+      default: gcc_unreachable ();
+      }
+  return types[(int) ftcode];
+}
+
+
+/* Definition of FPU instruction descriptions.  */
+
+struct nios2_fpu_insn_info
+{
+  const char *name;
+  int num_operands, *optvar;
+  int opt, no_opt;
+#define N2F_DF            0x1
+#define N2F_DFREQ         0x2
+#define N2F_UNSAFE        0x4
+#define N2F_FINITE        0x8
+  unsigned int flags;
+  enum insn_code icode;
+  enum nios2_ftcode ftcode;
+};
+
+/* Base macro for defining FPU instructions.  */
+#define N2FPU_INSN_DEF_BASE(insn, nop, flags, icode, args)	\
+  { #insn, nop, &nios2_custom_ ## insn, OPT_mcustom_##insn##_,	\
+    OPT_mno_custom_##insn, flags, CODE_FOR_ ## icode,		\
+    N2_FTYPE_OP ## nop args }
+
+/* Arithmetic and math functions; 2 or 3 operand FP operations.  */
+#define N2FPU_OP2(mode) (mode, mode)
+#define N2FPU_OP3(mode) (mode, mode, mode)
+#define N2FPU_INSN_DEF(code, icode, nop, flags, m, M)			\
+  N2FPU_INSN_DEF_BASE (f ## code ## m, nop, flags,			\
+		       icode ## m ## f ## nop, N2FPU_OP ## nop (M ## F))
+#define N2FPU_INSN_SF(code, nop, flags)		\
+  N2FPU_INSN_DEF (code, code, nop, flags, s, S)
+#define N2FPU_INSN_DF(code, nop, flags)		\
+  N2FPU_INSN_DEF (code, code, nop, flags | N2F_DF, d, D)
+
+/* Compare instructions, 3 operand FP operation with a SI result.  */
+#define N2FPU_CMP_DEF(code, flags, m, M)				\
+  N2FPU_INSN_DEF_BASE (fcmp ## code ## m, 3, flags,			\
+		       nios2_s ## code ## m ## f, (SI, M ## F, M ## F))
+#define N2FPU_CMP_SF(code) N2FPU_CMP_DEF (code, 0, s, S)
+#define N2FPU_CMP_DF(code) N2FPU_CMP_DEF (code, N2F_DF, d, D)
+
+/* The order of definition needs to be maintained consistent with
+   enum n2fpu_code in nios2-opts.h.  */
+struct nios2_fpu_insn_info nios2_fpu_insn[] =
+  {
+    /* Single precision instructions.  */
+    N2FPU_INSN_SF (add, 3, 0),
+    N2FPU_INSN_SF (sub, 3, 0),
+    N2FPU_INSN_SF (mul, 3, 0),
+    N2FPU_INSN_SF (div, 3, 0),
+    /* Due to textual difference between min/max and smin/smax.  */
+    N2FPU_INSN_DEF (min, smin, 3, N2F_FINITE, s, S),
+    N2FPU_INSN_DEF (max, smax, 3, N2F_FINITE, s, S),
+    N2FPU_INSN_SF (neg, 2, 0),
+    N2FPU_INSN_SF (abs, 2, 0),
+    N2FPU_INSN_SF (sqrt, 2, 0),
+    N2FPU_INSN_SF (sin, 2, N2F_UNSAFE),
+    N2FPU_INSN_SF (cos, 2, N2F_UNSAFE),
+    N2FPU_INSN_SF (tan, 2, N2F_UNSAFE),
+    N2FPU_INSN_SF (atan, 2, N2F_UNSAFE),
+    N2FPU_INSN_SF (exp, 2, N2F_UNSAFE),
+    N2FPU_INSN_SF (log, 2, N2F_UNSAFE),
+    /* Single precision compares.  */
+    N2FPU_CMP_SF (eq), N2FPU_CMP_SF (ne),
+    N2FPU_CMP_SF (lt), N2FPU_CMP_SF (le),
+    N2FPU_CMP_SF (gt), N2FPU_CMP_SF (ge),
+
+    /* Double precision instructions.  */
+    N2FPU_INSN_DF (add, 3, 0),
+    N2FPU_INSN_DF (sub, 3, 0),
+    N2FPU_INSN_DF (mul, 3, 0),
+    N2FPU_INSN_DF (div, 3, 0),
+    /* Due to textual difference between min/max and smin/smax.  */
+    N2FPU_INSN_DEF (min, smin, 3, N2F_FINITE, d, D),
+    N2FPU_INSN_DEF (max, smax, 3, N2F_FINITE, d, D),
+    N2FPU_INSN_DF (neg, 2, 0),
+    N2FPU_INSN_DF (abs, 2, 0),
+    N2FPU_INSN_DF (sqrt, 2, 0),
+    N2FPU_INSN_DF (sin, 2, N2F_UNSAFE),
+    N2FPU_INSN_DF (cos, 2, N2F_UNSAFE),
+    N2FPU_INSN_DF (tan, 2, N2F_UNSAFE),
+    N2FPU_INSN_DF (atan, 2, N2F_UNSAFE),
+    N2FPU_INSN_DF (exp, 2, N2F_UNSAFE),
+    N2FPU_INSN_DF (log, 2, N2F_UNSAFE),
+    /* Double precision compares.  */
+    N2FPU_CMP_DF (eq), N2FPU_CMP_DF (ne),
+    N2FPU_CMP_DF (lt), N2FPU_CMP_DF (le),
+    N2FPU_CMP_DF (gt), N2FPU_CMP_DF (ge),
+
+    /* Conversion instructions.  */
+    N2FPU_INSN_DEF_BASE (floatis,  2, 0, floatsisf2,    (SF, SI)),
+    N2FPU_INSN_DEF_BASE (floatus,  2, 0, floatunssisf2, (SF, UI)),
+    N2FPU_INSN_DEF_BASE (floatid,  2, 0, floatsidf2,    (DF, SI)),
+    N2FPU_INSN_DEF_BASE (floatud,  2, 0, floatunssidf2, (DF, UI)),
+    N2FPU_INSN_DEF_BASE (fixsi,    2, 0, fix_truncsfsi2,      (SI, SF)),
+    N2FPU_INSN_DEF_BASE (fixsu,    2, 0, fixuns_truncsfsi2,   (UI, SF)),
+    N2FPU_INSN_DEF_BASE (fixdi,    2, 0, fix_truncdfsi2,      (SI, DF)),
+    N2FPU_INSN_DEF_BASE (fixdu,    2, 0, fixuns_truncdfsi2,   (UI, DF)),
+    N2FPU_INSN_DEF_BASE (fextsd,   2, 0, extendsfdf2,   (DF, SF)),
+    N2FPU_INSN_DEF_BASE (ftruncds, 2, 0, truncdfsf2,    (SF, DF)),
+
+    /* X, Y access instructions.  */
+    N2FPU_INSN_DEF_BASE (fwrx,     2, N2F_DFREQ, nios2_fwrx,   (VOID, DF)),
+    N2FPU_INSN_DEF_BASE (fwry,     2, N2F_DFREQ, nios2_fwry,   (VOID, SF)),
+    N2FPU_INSN_DEF_BASE (frdxlo,   1, N2F_DFREQ, nios2_frdxlo, (SF)),
+    N2FPU_INSN_DEF_BASE (frdxhi,   1, N2F_DFREQ, nios2_frdxhi, (SF)),
+    N2FPU_INSN_DEF_BASE (frdy,     1, N2F_DFREQ, nios2_frdy,   (SF))
+  };
+
+/* Some macros for ease of access.  */
+#define N2FPU(code) nios2_fpu_insn[(int) code]
+#define N2FPU_ENABLED_P(code) (N2FPU_N(code) >= 0)
+#define N2FPU_N(code) (*N2FPU(code).optvar)
+#define N2FPU_NAME(code) (N2FPU(code).name)
+#define N2FPU_ICODE(code) (N2FPU(code).icode)
+#define N2FPU_FTCODE(code) (N2FPU(code).ftcode)
+#define N2FPU_FINITE_P(code) (N2FPU(code).flags & N2F_FINITE)
+#define N2FPU_UNSAFE_P(code) (N2FPU(code).flags & N2F_UNSAFE)
+#define N2FPU_DOUBLE_P(code) (N2FPU(code).flags & N2F_DF)
+#define N2FPU_DOUBLE_REQUIRED_P(code) (N2FPU(code).flags & N2F_DFREQ)
+
+/* Same as above, but for cases where using only the op part is shorter.  */
+#define N2FPU_OP(op) N2FPU(n2fpu_ ## op)
+#define N2FPU_OP_NAME(op) N2FPU_NAME(n2fpu_ ## op)
+#define N2FPU_OP_ENABLED_P(op) N2FPU_ENABLED_P(n2fpu_ ## op)
+
+/* Export the FPU insn enabled predicate to nios2.md.  */
+bool
+nios2_fpu_insn_enabled (enum n2fpu_code code)
+{
+  return N2FPU_ENABLED_P (code);
+}
+
+/* Return true if COND comparison for mode MODE is enabled under current
+   settings.  */
+
+static bool
+nios2_fpu_compare_enabled (enum rtx_code cond, enum machine_mode mode)
+{
+  if (mode == SFmode)
+    switch (cond) 
+      {
+      case EQ: return N2FPU_OP_ENABLED_P (fcmpeqs);
+      case NE: return N2FPU_OP_ENABLED_P (fcmpnes);
+      case GT: return N2FPU_OP_ENABLED_P (fcmpgts);
+      case GE: return N2FPU_OP_ENABLED_P (fcmpges);
+      case LT: return N2FPU_OP_ENABLED_P (fcmplts);
+      case LE: return N2FPU_OP_ENABLED_P (fcmples);
+      default: break;
+      }
+  else if (mode == DFmode)
+    switch (cond) 
+      {
+      case EQ: return N2FPU_OP_ENABLED_P (fcmpeqd);
+      case NE: return N2FPU_OP_ENABLED_P (fcmpned);
+      case GT: return N2FPU_OP_ENABLED_P (fcmpgtd);
+      case GE: return N2FPU_OP_ENABLED_P (fcmpged);
+      case LT: return N2FPU_OP_ENABLED_P (fcmpltd);
+      case LE: return N2FPU_OP_ENABLED_P (fcmpled);
+      default: break;
+      }
+  return false;
+}
+
+/* Stack layout and calling conventions.  */
+
+#define NIOS2_STACK_ALIGN(LOC)						\
+  (((LOC) + ((PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) - 1))		\
+   & ~((PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) - 1))
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  */
+static int
+nios2_compute_frame_layout (void)
+{
+  unsigned int regno;
+  unsigned int save_mask = 0;
+  int total_size;
+  int var_size;
+  int out_args_size;
+  int save_reg_size;
+
+  if (cfun->machine->initialized)
+    return cfun->machine->total_size;
+  
+  var_size = NIOS2_STACK_ALIGN (get_frame_size ());
+  out_args_size = NIOS2_STACK_ALIGN (crtl->outgoing_args_size);
+  total_size = var_size + out_args_size;
+
+  /* Calculate space needed for gp registers.  */
+  save_reg_size = 0;
+  for (regno = 0; regno <= LAST_GP_REG; regno++)
+    if (prologue_saved_reg_p (regno))
+      {
+	save_mask |= 1 << regno;
+	save_reg_size += 4;
+      }
+
+  /* If we call eh_return, we need to save the EH data registers.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned i;
+      unsigned r;
+      
+      for (i = 0; (r = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
+	if (!(save_mask & (1 << r)))
+	  {
+	    save_mask |= 1 << r;
+	    save_reg_size += 4;
+	  }
+    }
+
+  cfun->machine->fp_save_offset = 0;
+  if (save_mask & (1 << HARD_FRAME_POINTER_REGNUM))
+    {
+      int fp_save_offset = 0;
+      for (regno = 0; regno < HARD_FRAME_POINTER_REGNUM; regno++)
+	if (save_mask & (1 << regno))
+	  fp_save_offset += 4;
+
+      cfun->machine->fp_save_offset = fp_save_offset;
+    }
+
+  save_reg_size = NIOS2_STACK_ALIGN (save_reg_size);
+  total_size += save_reg_size;
+  total_size += NIOS2_STACK_ALIGN (crtl->args.pretend_args_size);
+
+  /* Save other computed information.  */
+  cfun->machine->save_mask = save_mask;
+  cfun->machine->total_size = total_size;
+  cfun->machine->var_size = var_size;
+  cfun->machine->args_size = out_args_size;
+  cfun->machine->save_reg_size = save_reg_size;
+  cfun->machine->initialized = reload_completed;
+  cfun->machine->save_regs_offset = out_args_size + var_size;
+
+  return total_size;
+}
+
+/* Generate save/restore of register REGNO at SP + OFFSET.  Used by the
+   prologue/epilogue expand routines.  */
+static void
+save_reg (int regno, unsigned offset)
+{
+  rtx reg = gen_rtx_REG (SImode, regno);
+  rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			   gen_int_mode (offset, Pmode));
+  rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+static void
+restore_reg (int regno, unsigned offset)
+{
+  rtx reg = gen_rtx_REG (SImode, regno);
+  rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			   gen_int_mode (offset, Pmode));
+  rtx insn = emit_move_insn (reg, gen_frame_mem (Pmode, addr));
+  /* Tag epilogue unwind note.  */
+  add_reg_note (insn, REG_CFA_RESTORE, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Emit conditional trap for checking stack limit.  */
+static void
+nios2_emit_stack_limit_check (void)
+{
+  if (REG_P (stack_limit_rtx))
+    emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode, stack_pointer_rtx,
+					  stack_limit_rtx),
+			     stack_pointer_rtx, stack_limit_rtx, GEN_INT (3)));
+  else
+    sorry ("only register based stack limit is supported");
+}
+
+/* Temp regno used inside prologue/epilogue.  */
+#define TEMP_REG_NUM 8
+
+void
+nios2_expand_prologue (void)
+{
+  unsigned int regno;
+  int total_frame_size, save_offset;
+  int sp_offset;      /* offset from base_reg to final stack value.  */
+  int save_regs_base; /* offset from base_reg to register save area.  */
+  rtx insn;
+
+  total_frame_size = nios2_compute_frame_layout ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = total_frame_size;
+
+  /* Decrement the stack pointer.  */
+  if (!SMALL_INT (total_frame_size))
+    {
+      /* We need an intermediary point, this will point at the spill block.  */
+      insn = emit_insn
+	(gen_add2_insn (stack_pointer_rtx,
+			gen_int_mode (cfun->machine->save_regs_offset
+				      - total_frame_size, Pmode)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      save_regs_base = 0;
+      sp_offset = -cfun->machine->save_regs_offset;
+    }
+  else if (total_frame_size)
+    {
+      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+				       gen_int_mode (-total_frame_size,
+						     Pmode)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      save_regs_base = cfun->machine->save_regs_offset;
+      sp_offset = 0;
+    }
+  else
+    save_regs_base = sp_offset = 0;
+
+  if (crtl->limit_stack)
+    nios2_emit_stack_limit_check ();
+
+  save_offset = save_regs_base + cfun->machine->save_reg_size;
+
+  for (regno = LAST_GP_REG; regno > 0; regno--)
+    if (cfun->machine->save_mask & (1 << regno))
+      {
+	save_offset -= 4;
+	save_reg (regno, save_offset);
+      }
+
+  if (frame_pointer_needed)
+    {
+      int fp_save_offset = save_regs_base + cfun->machine->fp_save_offset;
+      insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
+				       stack_pointer_rtx,
+				       gen_int_mode (fp_save_offset, Pmode)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (sp_offset)
+    {
+      rtx sp_adjust
+	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		       plus_constant (Pmode, stack_pointer_rtx, sp_offset));
+      if (SMALL_INT (sp_offset))
+	insn = emit_insn (sp_adjust);
+      else
+	{
+	  rtx tmp = gen_rtx_REG (Pmode, TEMP_REG_NUM);
+	  emit_move_insn (tmp, gen_int_mode (sp_offset, Pmode));
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, tmp));
+	  /* Attach the sp_adjust as a note indicating what happened.  */
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, sp_adjust);
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (crtl->limit_stack)
+	nios2_emit_stack_limit_check ();
+    }
+
+  /* Load the PIC register if needed.  */
+  if (crtl->uses_pic_offset_table)
+    nios2_load_pic_register ();
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  */
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+}
+
+void
+nios2_expand_epilogue (bool sibcall_p)
+{
+  rtx insn, cfa_adj;
+  int total_frame_size;
+  int sp_adjust, save_offset;
+  unsigned int regno;
+
+  if (!sibcall_p && nios2_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  emit_insn (gen_blockage ());
+
+  total_frame_size = nios2_compute_frame_layout ();
+  if (frame_pointer_needed)
+    {
+      /* Recover the stack pointer.  */
+      insn = emit_insn (gen_add3_insn
+			(stack_pointer_rtx, hard_frame_pointer_rtx,
+			 gen_int_mode (-cfun->machine->fp_save_offset, Pmode)));
+      cfa_adj = plus_constant (Pmode, stack_pointer_rtx,
+			       (total_frame_size
+				- cfun->machine->save_regs_offset));
+      add_reg_note (insn, REG_CFA_DEF_CFA, cfa_adj);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      save_offset = 0;
+      sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
+    }
+  else if (!SMALL_INT (total_frame_size))
+    {
+      rtx tmp = gen_rtx_REG (Pmode, TEMP_REG_NUM);
+      emit_move_insn (tmp, gen_int_mode (cfun->machine->save_regs_offset,
+					 Pmode));
+      insn = emit_insn (gen_add2_insn (stack_pointer_rtx, tmp));
+      cfa_adj = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			     plus_constant (Pmode, stack_pointer_rtx,
+					    cfun->machine->save_regs_offset));
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa_adj);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      save_offset = 0;
+      sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
+    }
+  else
+    {
+      save_offset = cfun->machine->save_regs_offset;
+      sp_adjust = total_frame_size;
+    }
+  
+  save_offset += cfun->machine->save_reg_size;
+
+  for (regno = LAST_GP_REG; regno > 0; regno--)
+    if (cfun->machine->save_mask & (1 << regno))
+      {
+	save_offset -= 4;
+	restore_reg (regno, save_offset);
+      }
+
+  if (sp_adjust)
+    {
+      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+				       gen_int_mode (sp_adjust, Pmode)));
+      cfa_adj = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			     plus_constant (Pmode, stack_pointer_rtx,
+					    sp_adjust));
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa_adj);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Add in the __builtin_eh_return stack adjustment.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
+
+  if (!sibcall_p)
+    emit_jump_insn (gen_simple_return ());
+}
+
+/* Implement RETURN_ADDR_RTX.  Note, we do not support moving
+   back to a previous frame.  */
+rtx
+nios2_get_return_address (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RA_REGNO);
+}
+
+/* Emit code to change the current function's return address to
+   ADDRESS.  SCRATCH is available as a scratch register, if needed.
+   ADDRESS and SCRATCH are both word-mode GPRs.  */
+void
+nios2_set_return_address (rtx address, rtx scratch)
+{
+  nios2_compute_frame_layout ();
+  if (cfun->machine->save_mask & (1 << RA_REGNO))
+    {
+      unsigned offset = cfun->machine->save_reg_size - 4;
+      rtx base;
+      
+      if (frame_pointer_needed)
+	base = hard_frame_pointer_rtx;
+      else
+	{
+	  base = stack_pointer_rtx;
+	  offset += cfun->machine->save_regs_offset;
+
+	  if (!SMALL_INT (offset))
+	    {
+	      emit_move_insn (scratch, gen_int_mode (offset, Pmode));
+	      emit_insn (gen_add2_insn (scratch, base));
+	      base = scratch;
+	      offset = 0;
+	    }
+	}
+      if (offset)
+	base = plus_constant (Pmode, base, offset);
+      emit_move_insn (gen_rtx_MEM (Pmode, base), address);
+    }
+  else
+    emit_move_insn (gen_rtx_REG (Pmode, RA_REGNO), address);
+}
+
+/* Implement FUNCTION_PROFILER macro.  */
+void
+nios2_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+  fprintf (file, "\tmov\tr8, ra\n");
+  if (flag_pic == 1)
+    {
+      fprintf (file, "\tnextpc\tr2\n");
+      fprintf (file, "\t1: movhi\tr3, %%hiadj(_gp_got - 1b)\n");
+      fprintf (file, "\taddi\tr3, r3, %%lo(_gp_got - 1b)\n");
+      fprintf (file, "\tadd\tr2, r2, r3\n");
+      fprintf (file, "\tldw\tr2, %%call(_mcount)(r2)\n");
+      fprintf (file, "\tcallr\tr2\n");
+    }
+  else if (flag_pic == 2)
+    {
+      fprintf (file, "\tnextpc\tr2\n");
+      fprintf (file, "\t1: movhi\tr3, %%hiadj(_gp_got - 1b)\n");
+      fprintf (file, "\taddi\tr3, r3, %%lo(_gp_got - 1b)\n");
+      fprintf (file, "\tadd\tr2, r2, r3\n");
+      fprintf (file, "\tmovhi\tr3, %%call_hiadj(_mcount)\n");
+      fprintf (file, "\taddi\tr3, %%call_lo(_mcount)\n");
+      fprintf (file, "\tadd\tr3, r2, r3\n");
+      fprintf (file, "\tldw\tr2, 0(r3)\n");
+      fprintf (file, "\tcallr\tr2\n");
+    }
+  else
+    fprintf (file, "\tcall\t_mcount\n");
+  fprintf (file, "\tmov\tra, r8\n");
+}
+
+/* Dump stack layout.  */
+static void
+nios2_dump_frame_layout (FILE *file)
+{
+  fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
+  fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
+           cfun->machine->total_size);
+  fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
+           cfun->machine->var_size);
+  fprintf (file, "\t%s args_size = %d\n", ASM_COMMENT_START,
+           cfun->machine->args_size);
+  fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
+           cfun->machine->save_reg_size);
+  fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
+           cfun->machine->initialized);
+  fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
+           cfun->machine->save_regs_offset);
+  fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
+           crtl->is_leaf);
+  fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
+           frame_pointer_needed);
+  fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
+           crtl->args.pretend_args_size);
+}
+
+/* Return true if REGNO should be saved in the prologue.  */
+static bool
+prologue_saved_reg_p (unsigned regno)
+{
+  gcc_assert (GP_REG_P (regno));
+  
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return true;
+
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return true;
+
+  if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
+    return true;
+
+  if (regno == RA_REGNO && df_regs_ever_live_p (RA_REGNO))
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_CAN_ELIMINATE.  */
+static bool
+nios2_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  if (to == STACK_POINTER_REGNUM)
+    return !frame_pointer_needed;
+  return true;
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET macro.  */
+int
+nios2_initial_elimination_offset (int from, int to)
+{
+  int offset;
+
+  nios2_compute_frame_layout ();
+
+  /* Set OFFSET to the offset from the stack pointer.  */
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      offset = cfun->machine->args_size;
+      break;
+
+    case ARG_POINTER_REGNUM:
+      offset = cfun->machine->total_size;
+      offset -= crtl->args.pretend_args_size;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    /* If we are asked for the frame pointer offset, then adjust OFFSET
+       by the offset from the frame pointer to the stack pointer.  */
+  if (to == HARD_FRAME_POINTER_REGNUM)
+    offset -= (cfun->machine->save_regs_offset
+	       + cfun->machine->fp_save_offset); 
+
+  return offset;
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+int
+nios2_can_use_return_insn (void)
+{
+  if (!reload_completed || crtl->profile)
+    return 0;
+
+  return nios2_compute_frame_layout () == 0;
+}
+
+
+/* Check and signal some warnings/errors on FPU insn options.  */
+static void
+nios2_custom_check_insns (void)
+{
+  unsigned int i, j;
+  bool errors = false;
+
+  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+    if (N2FPU_ENABLED_P (i) && N2FPU_DOUBLE_P (i))
+      {
+	for (j = 0; j < ARRAY_SIZE (nios2_fpu_insn); j++)
+	  if (N2FPU_DOUBLE_REQUIRED_P (j) && ! N2FPU_ENABLED_P (j))
+	    {
+	      error ("switch %<-mcustom-%s%> is required for double "
+		     "precision floating point", N2FPU_NAME (j));
+	      errors = true;
+	    }
+	break;
+      }
+
+  /* Warn if the user has certain exotic operations that won't get used
+     without -funsafe-math-optimizations.  See expand_builtin () in
+     builtins.c.  */
+  if (!flag_unsafe_math_optimizations)
+    for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+      if (N2FPU_ENABLED_P (i) && N2FPU_UNSAFE_P (i))
+	warning (0, "switch %<-mcustom-%s%> has no effect unless "
+		 "-funsafe-math-optimizations is specified", N2FPU_NAME (i));
+
+  /* Warn if the user is trying to use -mcustom-fmins et. al, that won't
+     get used without -ffinite-math-only.  See fold_builtin_fmin_fmax ()
+     in builtins.c.  */
+  if (!flag_finite_math_only)
+    for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+      if (N2FPU_ENABLED_P (i) && N2FPU_FINITE_P (i))
+	warning (0, "switch %<-mcustom-%s%> has no effect unless "
+		 "-ffinite-math-only is specified", N2FPU_NAME (i));
+
+  if (errors || custom_code_conflict)
+    fatal_error ("conflicting use of -mcustom switches, target attributes, "
+		 "and/or __builtin_custom_ functions");
+}
+
+static void
+nios2_set_fpu_custom_code (enum n2fpu_code code, int n, bool override_p)
+{
+  if (override_p || N2FPU_N (code) == -1)
+    N2FPU_N (code) = n;
+  nios2_register_custom_code (n, CCS_FPU, (int) code);
+}
+
+/* Type to represent a standard FPU config.  */
+struct nios2_fpu_config
+{
+  const char *name;
+  bool set_sp_constants;
+  int code[n2fpu_code_num];
+};
+
+#define NIOS2_FPU_CONFIG_NUM 3
+static struct nios2_fpu_config custom_fpu_config[NIOS2_FPU_CONFIG_NUM];
+
+static void
+nios2_init_fpu_configs (void)
+{
+  struct nios2_fpu_config* cfg;
+  int i = 0;
+#define NEXT_FPU_CONFIG				\
+  do {						\
+    cfg = &custom_fpu_config[i++];			\
+    memset (cfg, -1, sizeof (struct nios2_fpu_config));\
+  } while (0)
+
+  NEXT_FPU_CONFIG;
+  cfg->name = "60-1";
+  cfg->set_sp_constants  = true;
+  cfg->code[n2fpu_fmuls] = 252;
+  cfg->code[n2fpu_fadds] = 253;
+  cfg->code[n2fpu_fsubs] = 254;
+
+  NEXT_FPU_CONFIG;
+  cfg->name = "60-2";
+  cfg->set_sp_constants  = true;
+  cfg->code[n2fpu_fmuls] = 252;
+  cfg->code[n2fpu_fadds] = 253;
+  cfg->code[n2fpu_fsubs] = 254;
+  cfg->code[n2fpu_fdivs] = 255;
+
+  NEXT_FPU_CONFIG;
+  cfg->name = "72-3";
+  cfg->set_sp_constants    = true;
+  cfg->code[n2fpu_floatus] = 243;
+  cfg->code[n2fpu_fixsi]   = 244;
+  cfg->code[n2fpu_floatis] = 245;
+  cfg->code[n2fpu_fcmpgts] = 246;
+  cfg->code[n2fpu_fcmples] = 249;
+  cfg->code[n2fpu_fcmpeqs] = 250;
+  cfg->code[n2fpu_fcmpnes] = 251;
+  cfg->code[n2fpu_fmuls]   = 252;
+  cfg->code[n2fpu_fadds]   = 253;
+  cfg->code[n2fpu_fsubs]   = 254;
+  cfg->code[n2fpu_fdivs]   = 255;
+
+#undef NEXT_FPU_CONFIG
+  gcc_assert (i == NIOS2_FPU_CONFIG_NUM);
+}
+
+static struct nios2_fpu_config *
+nios2_match_custom_fpu_cfg (const char *cfgname, const char *endp)
+{
+  int i;
+  for (i = 0; i < NIOS2_FPU_CONFIG_NUM; i++)
+    {
+      bool match = !(endp != NULL
+		     ? strncmp (custom_fpu_config[i].name, cfgname,
+				endp - cfgname)
+		     : strcmp (custom_fpu_config[i].name, cfgname));
+      if (match)
+	return &custom_fpu_config[i];
+    }
+  return NULL;
+}
+
+/* Use CFGNAME to lookup FPU config, ENDP if not NULL marks end of string.
+   OVERRIDE is true if loaded config codes should overwrite current state.  */
+static void
+nios2_handle_custom_fpu_cfg (const char *cfgname, const char *endp,
+			     bool override)
+{
+  struct nios2_fpu_config *cfg = nios2_match_custom_fpu_cfg (cfgname, endp);
+  if (cfg)
+    {
+      unsigned int i;
+      for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+	if (cfg->code[i] >= 0)
+	  nios2_set_fpu_custom_code ((enum n2fpu_code) i, cfg->code[i],
+				     override);
+      if (cfg->set_sp_constants)
+	flag_single_precision_constant = 1;
+    }
+  else
+    warning (0, "ignoring unrecognized switch %<-mcustom-fpu-cfg%> "
+	     "value %<%s%>", cfgname);
+
+  /* Guard against errors in the standard configurations.  */
+  nios2_custom_check_insns ();
+}
+
+/* Check individual FPU insn options, and register custom code.  */
+static void
+nios2_handle_custom_fpu_insn_option (int fpu_insn_index)
+{
+  int param = N2FPU_N (fpu_insn_index);
+
+  if (0 <= param && param <= 255)
+    nios2_register_custom_code (param, CCS_FPU, fpu_insn_index);
+
+  /* Valid values are 0-255, but also allow -1 so that the
+     -mno-custom-<opt> switches work.  */
+  else if (param != -1)
+    error ("switch %<-mcustom-%s%> value %d must be between 0 and 255",
+	   N2FPU_NAME (fpu_insn_index), param);
+}
+
+/* Allocate a chunk of memory for per-function machine-dependent data.  */
+static struct machine_function *
+nios2_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+static void
+nios2_option_override (void)
+{
+  unsigned int i;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* Check for unsupported options.  */
+  if (flag_pic && !TARGET_LINUX_ABI)
+    sorry ("position-independent code requires the Linux ABI");
+
+  /* Function to allocate machine-dependent function status.  */
+  init_machine_status = &nios2_init_machine_status;
+
+  nios2_section_threshold
+    = (global_options_set.x_g_switch_value
+       ? g_switch_value : NIOS2_DEFAULT_GVALUE);
+
+  /* Default to -mgpopt unless -fpic or -fPIC.  */
+  if (TARGET_GPOPT == -1 && flag_pic)
+    TARGET_GPOPT = 0;
+
+  /* If we don't have mul, we don't have mulx either!  */
+  if (!TARGET_HAS_MUL && TARGET_HAS_MULX)
+    target_flags &= ~MASK_HAS_MULX;
+
+  /* Initialize default FPU configurations.  */
+  nios2_init_fpu_configs ();
+
+  /* Set up default handling for floating point custom instructions.
+
+     Putting things in this order means that the -mcustom-fpu-cfg=
+     switch will always be overridden by individual -mcustom-fadds=
+     switches, regardless of the order in which they were specified
+     on the command line.
+
+     This behavior of prioritization of individual -mcustom-<insn>=
+     options before the -mcustom-fpu-cfg= switch is maintained for
+     compatibility.  */
+  if (nios2_custom_fpu_cfg_string && *nios2_custom_fpu_cfg_string)
+    nios2_handle_custom_fpu_cfg (nios2_custom_fpu_cfg_string, NULL, false);
+
+  /* Handle options for individual FPU insns.  */
+  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+    nios2_handle_custom_fpu_insn_option (i);
+
+  nios2_custom_check_insns ();
+
+  /* Save the initial options in case the user does function specific
+     options.  */
+  target_option_default_node = target_option_current_node
+    = build_target_option_node (&global_options);
+}
+
+
+/* Return true if CST is a constant within range of movi/movui/movhi.  */
+static bool
+nios2_simple_const_p (const_rtx cst)
+{
+  HOST_WIDE_INT val = INTVAL (cst);
+  return SMALL_INT (val) || SMALL_INT_UNSIGNED (val) || UPPER16_INT (val);
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+static bool
+nios2_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		 int opno ATTRIBUTE_UNUSED,
+		 int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      case CONST_INT:
+        if (INTVAL (x) == 0)
+          {
+            *total = COSTS_N_INSNS (0);
+            return true;
+          }
+        else if (nios2_simple_const_p (x))
+          {
+            *total = COSTS_N_INSNS (2);
+            return true;
+          }
+        else
+          {
+            *total = COSTS_N_INSNS (4);
+            return true;
+          }
+
+      case LABEL_REF:
+      case SYMBOL_REF:
+      case CONST:
+      case CONST_DOUBLE:
+        {
+          *total = COSTS_N_INSNS (4);
+          return true;
+        }
+
+      case AND:
+	{
+	  /* Recognize 'nor' insn pattern.  */
+	  if (GET_CODE (XEXP (x, 0)) == NOT
+	      && GET_CODE (XEXP (x, 1)) == NOT)
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+	  return false;
+	}
+
+      case MULT:
+        {
+          *total = COSTS_N_INSNS (1);
+          return false;
+        }
+      case SIGN_EXTEND:
+        {
+          *total = COSTS_N_INSNS (3);
+          return false;
+        }
+      case ZERO_EXTEND:
+        {
+          *total = COSTS_N_INSNS (1);
+          return false;
+        }
+
+      default:
+        return false;
+    }
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+static reg_class_t
+nios2_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
+{
+  return regclass == NO_REGS ? GENERAL_REGS : regclass;
+}
+
+/* Emit a call to __tls_get_addr.  TI is the argument to this function.
+   RET is an RTX for the return value location.  The entire insn sequence
+   is returned.  */
+static GTY(()) rtx nios2_tls_symbol;
+
+static rtx
+nios2_call_tls_get_addr (rtx ti)
+{
+  rtx arg = gen_rtx_REG (Pmode, FIRST_ARG_REGNO);
+  rtx ret = gen_rtx_REG (Pmode, FIRST_RETVAL_REGNO);
+  rtx fn, insn;
+  
+  if (!nios2_tls_symbol)
+    nios2_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  emit_move_insn (arg, ti);
+  fn = gen_rtx_MEM (QImode, nios2_tls_symbol);
+  insn = emit_call_insn (gen_call_value (ret, fn, const0_rtx));
+  RTL_CONST_CALL_P (insn) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), ret);
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), arg);
+
+  return ret;
+}
+
+/* Return true for large offsets requiring hiadj/lo relocation pairs.  */
+static bool
+nios2_large_offset_p (int unspec)
+{
+  gcc_assert (nios2_unspec_reloc_name (unspec) != NULL);
+
+  if (flag_pic == 2
+      /* FIXME: TLS GOT offset relocations will eventually also get this
+	 treatment, after binutils support for those are also completed.  */
+      && (unspec == UNSPEC_PIC_SYM || unspec == UNSPEC_PIC_CALL_SYM))
+    return true;
+
+  /* 'gotoff' offsets are always hiadj/lo.  */
+  if (unspec == UNSPEC_PIC_GOTOFF_SYM)
+    return true;
+
+  return false;
+}
+
+/* Return true for conforming unspec relocations.  Also used in
+   constraints.md and predicates.md.  */
+bool
+nios2_unspec_reloc_p (rtx op)
+{
+  return (GET_CODE (op) == CONST
+	  && GET_CODE (XEXP (op, 0)) == UNSPEC
+	  && ! nios2_large_offset_p (XINT (XEXP (op, 0), 1)));
+}
+
+/* Helper to generate unspec constant.  */
+static rtx
+nios2_unspec_offset (rtx loc, int unspec)
+{
+  return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc),
+					       unspec));
+}
+
+/* Generate GOT pointer based address with large offset.  */
+static rtx
+nios2_large_got_address (rtx sym, rtx offset)
+{
+  rtx addr = gen_reg_rtx (Pmode);
+  emit_insn (gen_add3_insn (addr, pic_offset_table_rtx,
+			    force_reg (Pmode, offset)));
+  return addr;
+}
+
+/* Generate a GOT pointer based address.  */
+static rtx
+nios2_got_address (rtx loc, int unspec)
+{
+  rtx offset = nios2_unspec_offset (loc, unspec);
+  crtl->uses_pic_offset_table = 1;
+
+  if (nios2_large_offset_p (unspec))
+    return nios2_large_got_address (loc, offset);
+
+  return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, offset);
+}
+
+/* Generate the code to access LOC, a thread local SYMBOL_REF.  The
+   return value will be a valid address and move_operand (either a REG
+   or a LO_SUM).  */
+static rtx
+nios2_legitimize_tls_address (rtx loc)
+{
+  rtx tmp, mem, tp;
+  enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      tmp = gen_reg_rtx (Pmode);
+      emit_move_insn (tmp, nios2_got_address (loc, UNSPEC_ADD_TLS_GD));
+      return nios2_call_tls_get_addr (tmp);
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      tmp = gen_reg_rtx (Pmode);
+      emit_move_insn (tmp, nios2_got_address (loc, UNSPEC_ADD_TLS_LDM));
+      return gen_rtx_PLUS (Pmode, nios2_call_tls_get_addr (tmp),
+			   nios2_unspec_offset (loc, UNSPEC_ADD_TLS_LDO));
+
+    case TLS_MODEL_INITIAL_EXEC:
+      tmp = gen_reg_rtx (Pmode);
+      mem = gen_const_mem (Pmode, nios2_got_address (loc, UNSPEC_LOAD_TLS_IE));
+      emit_move_insn (tmp, mem);
+      tp = gen_rtx_REG (Pmode, TP_REGNO);
+      return gen_rtx_PLUS (Pmode, tp, tmp);
+
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = gen_rtx_REG (Pmode, TP_REGNO);
+      return gen_rtx_PLUS (Pmode, tp,
+			   nios2_unspec_offset (loc, UNSPEC_ADD_TLS_LE));
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Divide Support
+
+   If -O3 is used, we want to output a table lookup for
+   divides between small numbers (both num and den >= 0
+   and < 0x10).  The overhead of this method in the worst
+   case is 40 bytes in the text section (10 insns) and
+   256 bytes in the data section.  Additional divides do
+   not incur additional penalties in the data section.
+
+   Code speed is improved for small divides by about 5x
+   when using this method in the worse case (~9 cycles
+   vs ~45).  And in the worst case divides not within the
+   table are penalized by about 10% (~5 cycles vs ~45).
+   However in the typical case the penalty is not as bad
+   because doing the long divide in only 45 cycles is
+   quite optimistic.
+
+   ??? would be nice to have some benchmarks other
+   than Dhrystone to back this up.
+
+   This bit of expansion is to create this instruction
+   sequence as rtl.
+        or      $8, $4, $5
+        slli    $9, $4, 4
+        cmpgeui $3, $8, 16
+        beq     $3, $0, .L3
+        or      $10, $9, $5
+        add     $12, $11, divide_table
+        ldbu    $2, 0($12)
+        br      .L1
+.L3:
+        call    slow_div
+.L1:
+#       continue here with result in $2
+
+   ??? Ideally I would like the libcall block to contain all
+   of this code, but I don't know how to do that.  What it
+   means is that if the divide can be eliminated, it may not
+   completely disappear.
+
+   ??? The __divsi3_table label should ideally be moved out
+   of this block and into a global.  If it is placed into the
+   sdata section we can save even more cycles by doing things
+   gp relative.  */
+void
+nios2_emit_expensive_div (rtx *operands, enum machine_mode mode)
+{
+  rtx or_result, shift_left_result;
+  rtx lookup_value;
+  rtx lab1, lab3;
+  rtx insns;
+  rtx libfunc;
+  rtx final_result;
+  rtx tmp;
+  rtx table;
+
+  /* It may look a little generic, but only SImode is supported for now.  */
+  gcc_assert (mode == SImode);
+  libfunc = optab_libfunc (sdiv_optab, SImode);
+
+  lab1 = gen_label_rtx ();
+  lab3 = gen_label_rtx ();
+
+  or_result = expand_simple_binop (SImode, IOR,
+                                   operands[1], operands[2],
+                                   0, 0, OPTAB_LIB_WIDEN);
+
+  emit_cmp_and_jump_insns (or_result, GEN_INT (15), GTU, 0,
+                           GET_MODE (or_result), 0, lab3);
+  JUMP_LABEL (get_last_insn ()) = lab3;
+
+  shift_left_result = expand_simple_binop (SImode, ASHIFT,
+                                           operands[1], GEN_INT (4),
+                                           0, 0, OPTAB_LIB_WIDEN);
+
+  lookup_value = expand_simple_binop (SImode, IOR,
+                                      shift_left_result, operands[2],
+                                      0, 0, OPTAB_LIB_WIDEN);
+  table = gen_rtx_PLUS (SImode, lookup_value,
+			gen_rtx_SYMBOL_REF (SImode, "__divsi3_table"));
+  convert_move (operands[0], gen_rtx_MEM (QImode, table), 1);
+
+  tmp = emit_jump_insn (gen_jump (lab1));
+  JUMP_LABEL (tmp) = lab1;
+  emit_barrier ();
+
+  emit_label (lab3);
+  LABEL_NUSES (lab3) = 1;
+
+  start_sequence ();
+  final_result = emit_library_call_value (libfunc, NULL_RTX,
+                                          LCT_CONST, SImode, 2,
+                                          operands[1], SImode,
+                                          operands[2], SImode);
+
+  insns = get_insns ();
+  end_sequence ();
+  emit_libcall_block (insns, operands[0], final_result,
+                      gen_rtx_DIV (SImode, operands[1], operands[2]));
+
+  emit_label (lab1);
+  LABEL_NUSES (lab1) = 1;
+}
+
+
+/* Branches and compares.  */
+
+/* Return in *ALT_CODE and *ALT_OP, an alternate equivalent constant
+   comparison, e.g. >= 1 into > 0.  */
+static void
+nios2_alternate_compare_const (enum rtx_code code, rtx op,
+			       enum rtx_code *alt_code, rtx *alt_op,
+			       enum machine_mode mode)
+{
+  HOST_WIDE_INT opval = INTVAL (op);
+  enum rtx_code scode = signed_condition (code);
+  bool dec_p = (scode == LT || scode == GE);
+
+  if (code == EQ || code == NE)
+    {
+      *alt_code = code;
+      *alt_op = op;
+      return;
+    }
+
+  *alt_op = (dec_p
+	     ? gen_int_mode (opval - 1, mode)
+	     : gen_int_mode (opval + 1, mode));
+
+  /* The required conversion between [>,>=] and [<,<=] is captured
+     by a reverse + swap of condition codes.  */
+  *alt_code = reverse_condition (swap_condition (code));
+
+  {
+    /* Test if the incremented/decremented value crosses the over/underflow
+       boundary.  Supposedly, such boundary cases should already be transformed
+       into always-true/false or EQ conditions, so use an assertion here.  */
+    unsigned HOST_WIDE_INT alt_opval = INTVAL (*alt_op);
+    if (code == scode)
+      alt_opval ^= (1 << (GET_MODE_BITSIZE (mode) - 1));
+    alt_opval &= GET_MODE_MASK (mode);
+    gcc_assert (dec_p ? alt_opval != GET_MODE_MASK (mode) : alt_opval != 0);
+  }
+}
+
+/* Return true if the constant comparison is supported by nios2.  */
+static bool
+nios2_valid_compare_const_p (enum rtx_code code, rtx op)
+{
+  switch (code)
+    {
+    case EQ: case NE: case GE: case LT:
+      return SMALL_INT (INTVAL (op));
+    case GEU: case LTU:
+      return SMALL_INT_UNSIGNED (INTVAL (op));
+    default:
+      return false;
+    }
+}
+
+/* Checks if the FPU comparison in *CMP, *OP1, and *OP2 can be supported in
+   the current configuration.  Perform modifications if MODIFY_P is true.
+   Returns true if FPU compare can be done.  */
+
+bool
+nios2_validate_fpu_compare (enum machine_mode mode, rtx *cmp, rtx *op1, rtx *op2,
+			    bool modify_p)
+{
+  bool rev_p = false;
+  enum rtx_code code = GET_CODE (*cmp);
+
+  if (!nios2_fpu_compare_enabled (code, mode))
+    {
+      code = swap_condition (code);
+      if (nios2_fpu_compare_enabled (code, mode))
+	rev_p = true;
+      else
+	return false;
+    }
+
+  if (modify_p)
+    {
+      if (rev_p)
+	{
+	  rtx tmp = *op1;
+	  *op1 = *op2;
+	  *op2 = tmp;
+	}
+      *op1 = force_reg (mode, *op1);
+      *op2 = force_reg (mode, *op2);
+      *cmp = gen_rtx_fmt_ee (code, mode, *op1, *op2);
+    }
+  return true;
+}
+
+/* Checks and modifies the comparison in *CMP, *OP1, and *OP2 into valid
+   nios2 supported form.  Returns true if success.  */
+bool
+nios2_validate_compare (enum machine_mode mode, rtx *cmp, rtx *op1, rtx *op2)
+{
+  enum rtx_code code = GET_CODE (*cmp);
+  enum rtx_code alt_code;
+  rtx alt_op2;
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return nios2_validate_fpu_compare (mode, cmp, op1, op2, true);
+
+  if (!reg_or_0_operand (*op2, mode))
+    {
+      /* Create alternate constant compare.  */
+      nios2_alternate_compare_const (code, *op2, &alt_code, &alt_op2, mode);
+
+      /* If alterate op2 is zero(0), we can use it directly, possibly
+	 swapping the compare code.  */
+      if (alt_op2 == const0_rtx)
+	{
+	  code = alt_code;
+	  *op2 = alt_op2;
+	  goto check_rebuild_cmp;
+	}
+
+      /* Check if either constant compare can be used.  */
+      if (nios2_valid_compare_const_p (code, *op2))
+	return true;
+      else if (nios2_valid_compare_const_p (alt_code, alt_op2))
+	{
+	  code = alt_code;
+	  *op2 = alt_op2;
+	  goto rebuild_cmp;
+	}
+
+      /* We have to force op2 into a register now.  Try to pick one
+	 with a lower cost.  */
+      if (! nios2_simple_const_p (*op2)
+	  && nios2_simple_const_p (alt_op2))
+	{
+	  code = alt_code;
+	  *op2 = alt_op2;
+	}
+      *op2 = force_reg (SImode, *op2);
+    }
+ check_rebuild_cmp:
+  if (code == GT || code == GTU || code == LE || code == LEU)
+    {
+      rtx t = *op1; *op1 = *op2; *op2 = t;
+      code = swap_condition (code);
+    }
+ rebuild_cmp:
+  *cmp = gen_rtx_fmt_ee (code, mode, *op1, *op2);
+  return true;
+}
+
+
+/* Addressing Modes.  */
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+static bool
+nios2_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  rtx base, offset;
+  split_const (x, &base, &offset);
+  return GET_CODE (base) != SYMBOL_REF || !SYMBOL_REF_TLS_MODEL (base);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+static bool
+nios2_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return nios2_legitimate_constant_p (mode, x) == false;
+}
+
+/* Return true if register REGNO is a valid base register.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+bool
+nios2_regno_ok_for_base_p (int regno, bool strict_p)
+{
+  if (!HARD_REGISTER_NUM_P (regno))
+    {
+      if (!strict_p)
+	return true;
+
+      if (!reg_renumber)
+	return false;
+
+      regno = reg_renumber[regno];
+    }
+
+  /* The fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  return (GP_REG_P (regno)
+	  || regno == FRAME_POINTER_REGNUM
+	  || regno == ARG_POINTER_REGNUM);
+}
+
+/* Return true if the address expression formed by BASE + OFFSET is
+   valid.  */
+static bool
+nios2_valid_addr_expr_p (rtx base, rtx offset, bool strict_p)
+{
+  if (!strict_p && GET_CODE (base) == SUBREG)
+    base = SUBREG_REG (base);
+  return (REG_P (base)
+	  && nios2_regno_ok_for_base_p (REGNO (base), strict_p)
+	  && (offset == NULL_RTX
+	      || const_arith_operand (offset, Pmode)
+	      || nios2_unspec_reloc_p (offset)));
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+static bool
+nios2_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    rtx operand, bool strict_p)
+{
+  switch (GET_CODE (operand))
+    {
+      /* Direct.  */
+    case SYMBOL_REF:
+      if (SYMBOL_REF_TLS_MODEL (operand))
+	return false;
+      
+      if (nios2_symbol_ref_in_small_data_p (operand))
+	return true;
+
+      /* Else, fall through.  */
+    case LABEL_REF:
+    case CONST_INT:
+    case CONST:
+    case CONST_DOUBLE:
+      return false;
+
+      /* Register indirect.  */
+    case REG:
+      return nios2_regno_ok_for_base_p (REGNO (operand), strict_p);
+
+      /* Register indirect with displacement.  */
+    case PLUS:
+      {
+        rtx op0 = XEXP (operand, 0);
+        rtx op1 = XEXP (operand, 1);
+
+	return (nios2_valid_addr_expr_p (op0, op1, strict_p)
+		|| nios2_valid_addr_expr_p (op1, op0, strict_p));
+      }
+
+    default:
+      break;
+    }
+  return false;
+}
+
+/* Return true if SECTION is a small section name.  */
+static bool
+nios2_small_section_name_p (const char *section)
+{
+  return (strcmp (section, ".sbss") == 0
+	  || strncmp (section, ".sbss.", 6) == 0
+	  || strcmp (section, ".sdata") == 0
+	  || strncmp (section, ".sdata.", 7) == 0);
+}
+
+/* Return true if EXP should be placed in the small data section.  */
+static bool
+nios2_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL)
+    {
+      if (DECL_SECTION_NAME (exp))
+	{
+	  const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+	  if (nios2_section_threshold > 0
+	      && nios2_small_section_name_p (section))
+	    return true;
+	}
+      else
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+	  /* If this is an incomplete type with size 0, then we can't put it
+	     in sdata because it might be too big when completed.  */
+	  if (size > 0
+	      && (unsigned HOST_WIDE_INT) size <= nios2_section_threshold)
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Return true if symbol is in small data section.  */
+
+bool
+nios2_symbol_ref_in_small_data_p (rtx sym)
+{
+  gcc_assert (GET_CODE (sym) == SYMBOL_REF);
+  return
+    (TARGET_GPOPT
+     /* GP-relative access cannot be used for externally defined symbols,
+	because the compilation unit that defines the symbol may place it
+	in a section that cannot be reached from GP.  */
+     && !SYMBOL_REF_EXTERNAL_P (sym)
+     /* True if a symbol is both small and not weak.  */
+     && SYMBOL_REF_SMALL_P (sym)
+     && !(SYMBOL_REF_DECL (sym) && DECL_WEAK (SYMBOL_REF_DECL (sym)))
+     /* TLS variables are not accessed through the GP.  */
+     && SYMBOL_REF_TLS_MODEL (sym) == 0);
+
+}
+
+/* Implement TARGET_SECTION_TYPE_FLAGS.  */
+
+static unsigned int
+nios2_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags;
+
+  flags = default_section_type_flags (decl, name, reloc);
+
+  if (nios2_small_section_name_p (name))
+    flags |= SECTION_SMALL;
+
+  return flags;
+}
+
+/* Return true if SYMBOL_REF X binds locally.  */
+
+static bool
+nios2_symbol_binds_local_p (const_rtx x)
+{
+  return (SYMBOL_REF_DECL (x)
+	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+	  : SYMBOL_REF_LOCAL_P (x));
+}
+
+/* Position independent code related.  */
+
+/* Emit code to load the PIC register.  */
+static void
+nios2_load_pic_register (void)
+{
+  rtx tmp = gen_rtx_REG (Pmode, TEMP_REG_NUM);
+
+  emit_insn (gen_load_got_register (pic_offset_table_rtx, tmp));
+  emit_insn (gen_add3_insn (pic_offset_table_rtx, pic_offset_table_rtx, tmp));
+}
+
+/* Generate a PIC address as a MEM rtx.  */
+static rtx
+nios2_load_pic_address (rtx sym, int unspec)
+{
+  if (flag_pic == 2
+      && GET_CODE (sym) == SYMBOL_REF
+      && nios2_symbol_binds_local_p (sym))
+    /* Under -fPIC, generate a GOTOFF address for local symbols.  */
+    return nios2_got_address (sym, UNSPEC_PIC_GOTOFF_SYM);
+
+  return gen_const_mem (Pmode, nios2_got_address (sym, unspec));
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+bool
+nios2_legitimate_pic_operand_p (rtx x)
+{
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == UNSPEC
+      && nios2_large_offset_p (XINT (XEXP (x, 0), 1)))
+    return true;
+
+  return ! (GET_CODE (x) == SYMBOL_REF
+	    || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST);
+}
+
+/* Return TRUE if X is a thread-local symbol.  */
+static bool
+nios2_tls_symbol_p (rtx x)
+{
+  return (targetm.have_tls && GET_CODE (x) == SYMBOL_REF
+	  && SYMBOL_REF_TLS_MODEL (x) != 0);
+}
+
+/* Legitimize addresses that are CONSTANT_P expressions.  */
+static rtx
+nios2_legitimize_constant_address (rtx addr)
+{
+  rtx base, offset;
+  split_const (addr, &base, &offset);
+
+  if (nios2_tls_symbol_p (base))
+    base = nios2_legitimize_tls_address (base);
+  else if (flag_pic)
+    base = nios2_load_pic_address (base, UNSPEC_PIC_SYM);
+  else
+    return addr;
+
+  if (offset != const0_rtx)
+    {
+      gcc_assert (can_create_pseudo_p ());
+      return gen_rtx_PLUS (Pmode, force_reg (Pmode, base),
+			   (CONST_INT_P (offset)
+			    ? (SMALL_INT (INTVAL (offset))
+			       ? offset : force_reg (Pmode, offset))
+			    : offset));
+    }
+  return base;
+}
+
+/* Implement TARGET_LEGITIMIZE_ADDRESS.  */
+static rtx
+nios2_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (CONSTANT_P (x))
+    return nios2_legitimize_constant_address (x);
+
+  /* For the TLS LE (Local Exec) model, the compiler may try to
+     combine constant offsets with unspec relocs, creating address RTXs
+     looking like this:
+     (plus:SI (reg:SI 23 r23)
+              (const:SI
+                (plus:SI
+                  (unspec:SI [(symbol_ref:SI ("var"))] UNSPEC_ADD_TLS_LE)
+                  (const_int 48 [0x30]))))
+
+     This usually happens when 'var' is a thread-local struct variable,
+     and access of a field in var causes the addend.
+
+     We typically want this combining, so transform the above into this
+     form, which is allowed:
+     (plus:SI (reg:SI 23 r23)
+              (const:SI
+                (unspec:SI
+                  [(const:SI
+                     (plus:SI (symbol_ref:SI ("var"))
+                              (const_int 48 [0x30])))] UNSPEC_ADD_TLS_LE)))
+
+     Which will be output as '%tls_le(var+48)(r23)' in assembly.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == CONST)
+    {
+      rtx unspec, offset, reg = XEXP (x, 0);
+      split_const (XEXP (x, 1), &unspec, &offset);
+      if (GET_CODE (unspec) == UNSPEC
+	  && !nios2_large_offset_p (XINT (unspec, 1))
+	  && offset != const0_rtx)
+	{
+	  unspec = copy_rtx (unspec);
+	  XVECEXP (unspec, 0, 0)
+	    = plus_constant (Pmode, XVECEXP (unspec, 0, 0), INTVAL (offset));
+	  x = gen_rtx_PLUS (Pmode, reg, gen_rtx_CONST (Pmode, unspec));
+	}
+    }
+
+  return x;
+}
+
+/* Main expander function for RTL moves.  */
+int
+nios2_emit_move_sequence (rtx *operands, enum machine_mode mode)
+{
+  rtx to = operands[0];
+  rtx from = operands[1];
+
+  if (!register_operand (to, mode) && !reg_or_0_operand (from, mode))
+    {
+      gcc_assert (can_create_pseudo_p ());
+      from = copy_to_mode_reg (mode, from);
+    }
+
+  if (GET_CODE (from) == SYMBOL_REF || GET_CODE (from) == LABEL_REF
+      || (GET_CODE (from) == CONST
+	  && GET_CODE (XEXP (from, 0)) != UNSPEC))
+    from = nios2_legitimize_constant_address (from);
+
+  operands[0] = to;
+  operands[1] = from;
+  return 0;
+}
+
+/* The function with address *ADDR is being called.  If the address
+   needs to be loaded from the GOT, emit the instruction to do so and
+   update *ADDR to point to the rtx for the loaded value.  */
+void
+nios2_adjust_call_address (rtx *call_op)
+{
+  rtx addr;
+  gcc_assert (MEM_P (*call_op));
+  addr = XEXP (*call_op, 0);
+  if (flag_pic && CONSTANT_P (addr))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+      emit_move_insn (reg, nios2_load_pic_address (addr, UNSPEC_PIC_CALL_SYM));
+      XEXP (*call_op, 0) = reg;
+    }
+}
+
+
+/* Output assembly language related definitions.  */
+
+/* Print the operand OP to file stream FILE modified by LETTER.
+   LETTER can be one of:
+
+     i: print "i" if OP is an immediate, except 0
+     o: print "io" if OP is volatile
+     z: for const0_rtx print $0 instead of 0
+     H: for %hiadj
+     L: for %lo
+     U: for upper half of 32 bit value
+     D: for the upper 32-bits of a 64-bit double value
+     R: prints reverse condition.
+*/
+static void
+nios2_print_operand (FILE *file, rtx op, int letter)
+{
+
+  switch (letter)
+    {
+    case 'i':
+      if (CONSTANT_P (op) && op != const0_rtx)
+        fprintf (file, "i");
+      return;
+
+    case 'o':
+      if (GET_CODE (op) == MEM
+	  && ((MEM_VOLATILE_P (op) && TARGET_BYPASS_CACHE_VOLATILE)
+	      || TARGET_BYPASS_CACHE))
+        fprintf (file, "io");
+      return;
+
+    default:
+      break;
+    }
+
+  if (comparison_operator (op, VOIDmode))
+    {
+      enum rtx_code cond = GET_CODE (op);
+      if (letter == 0)
+	{
+	  fprintf (file, "%s", GET_RTX_NAME (cond));
+	  return;
+	}
+      if (letter == 'R')
+	{
+	  fprintf (file, "%s", GET_RTX_NAME (reverse_condition (cond)));
+	  return;
+	}
+    }
+
+  switch (GET_CODE (op))
+    {
+    case REG:
+      if (letter == 0 || letter == 'z')
+        {
+          fprintf (file, "%s", reg_names[REGNO (op)]);
+          return;
+        }
+      else if (letter == 'D')
+        {
+          fprintf (file, "%s", reg_names[REGNO (op)+1]);
+          return;
+        }
+      break;
+
+    case CONST_INT:
+      if (INTVAL (op) == 0 && letter == 'z')
+        {
+          fprintf (file, "zero");
+          return;
+        }
+
+      if (letter == 'U')
+        {
+          HOST_WIDE_INT val = INTVAL (op);
+	  val = (val >> 16) & 0xFFFF;
+	  output_addr_const (file, gen_int_mode (val, SImode));
+          return;
+        }
+      /* Else, fall through.  */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      if (letter == 0 || letter == 'z')
+        {
+          output_addr_const (file, op);
+          return;
+        }
+      else if (letter == 'H' || letter == 'L')
+	{
+	  fprintf (file, "%%");
+	  if (GET_CODE (op) == CONST
+	      && GET_CODE (XEXP (op, 0)) == UNSPEC)
+	    {
+	      rtx unspec = XEXP (op, 0);
+	      int unspec_reloc = XINT (unspec, 1);
+	      gcc_assert (nios2_large_offset_p (unspec_reloc));
+	      fprintf (file, "%s_", nios2_unspec_reloc_name (unspec_reloc));
+	      op = XVECEXP (unspec, 0, 0);
+	    }
+          fprintf (file, letter == 'H' ? "hiadj(" : "lo(");
+          output_addr_const (file, op);
+          fprintf (file, ")");
+          return;
+	}
+      break;
+
+    case SUBREG:
+    case MEM:
+      if (letter == 0)
+        {
+          output_address (op);
+          return;
+        }
+      break;
+
+    case CODE_LABEL:
+      if (letter == 0)
+        {
+          output_addr_const (file, op);
+          return;
+        }
+      break;
+
+    default:
+      break;
+    }
+
+  output_operand_lossage ("Unsupported operand for code '%c'", letter);
+  gcc_unreachable ();
+}
+
+/* Return true if this is a GP-relative accessible reference.  */
+static bool
+gprel_constant_p (rtx op)
+{
+  if (GET_CODE (op) == SYMBOL_REF
+      && nios2_symbol_ref_in_small_data_p (op))
+    return true;
+  else if (GET_CODE (op) == CONST
+           && GET_CODE (XEXP (op, 0)) == PLUS)
+    return gprel_constant_p (XEXP (XEXP (op, 0), 0));
+
+  return false;
+}
+
+/* Return the name string for a supported unspec reloc offset.  */
+static const char *
+nios2_unspec_reloc_name (int unspec)
+{
+  switch (unspec)
+    {
+    case UNSPEC_PIC_SYM:
+      return "got";
+    case UNSPEC_PIC_CALL_SYM:
+      return "call";
+    case UNSPEC_PIC_GOTOFF_SYM:
+      return "gotoff";
+    case UNSPEC_LOAD_TLS_IE:
+      return "tls_ie";
+    case UNSPEC_ADD_TLS_LE:
+      return "tls_le";
+    case UNSPEC_ADD_TLS_GD:
+      return "tls_gd";
+    case UNSPEC_ADD_TLS_LDM:
+      return "tls_ldm";
+    case UNSPEC_ADD_TLS_LDO:
+      return "tls_ldo";
+    default:
+      return NULL;
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+static bool
+nios2_output_addr_const_extra (FILE *file, rtx op)
+{
+  const char *name;
+  gcc_assert (GET_CODE (op) == UNSPEC);
+
+  /* Support for printing out const unspec relocations.  */
+  name = nios2_unspec_reloc_name (XINT (op, 1));
+  if (name)
+    {
+      fprintf (file, "%%%s(", name);
+      output_addr_const (file, XVECEXP (op, 0, 0));
+      fprintf (file, ")");
+      return true;
+    }
+  return false;
+}
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+static void
+nios2_print_operand_address (FILE *file, rtx op)
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+    case CONST_INT:
+    case LABEL_REF:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+      if (gprel_constant_p (op))
+        {
+          fprintf (file, "%%gprel(");
+          output_addr_const (file, op);
+          fprintf (file, ")(%s)", reg_names[GP_REGNO]);
+          return;
+        }
+
+      break;
+
+    case PLUS:
+      {
+        rtx op0 = XEXP (op, 0);
+        rtx op1 = XEXP (op, 1);
+
+        if (REG_P (op0) && CONSTANT_P (op1))
+          {
+            output_addr_const (file, op1);
+            fprintf (file, "(%s)", reg_names[REGNO (op0)]);
+            return;
+          }
+        else if (REG_P (op1) && CONSTANT_P (op0))
+          {
+            output_addr_const (file, op0);
+            fprintf (file, "(%s)", reg_names[REGNO (op1)]);
+            return;
+          }
+      }
+      break;
+
+    case REG:
+      fprintf (file, "0(%s)", reg_names[REGNO (op)]);
+      return;
+
+    case MEM:
+      {
+        rtx base = XEXP (op, 0);
+        nios2_print_operand_address (file, base);
+        return;
+      }
+    default:
+      break;
+    }
+
+  fprintf (stderr, "Missing way to print address\n");
+  debug_rtx (op);
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+static void
+nios2_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fprintf (file, "\t.4byte\t%%tls_ldo(");
+  output_addr_const (file, x);
+  fprintf (file, ")");
+}
+
+/* Implement TARGET_ASM_FUNCTION_PROLOGUE.  */
+static void
+nios2_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (flag_verbose_asm || flag_debug_asm)
+    {
+      nios2_compute_frame_layout ();
+      nios2_dump_frame_layout (file);
+    }
+}
+
+/* Emit assembly of custom FPU instructions.  */
+const char *
+nios2_fpu_insn_asm (enum n2fpu_code code)
+{
+  static char buf[256];
+  const char *op1, *op2, *op3;
+  int ln = 256, n = 0;
+  
+  int N = N2FPU_N (code);
+  int num_operands = N2FPU (code).num_operands;
+  const char *insn_name = N2FPU_NAME (code);
+  tree ftype = nios2_ftype (N2FPU_FTCODE (code));
+  enum machine_mode dst_mode = TYPE_MODE (TREE_TYPE (ftype));
+  enum machine_mode src_mode = TYPE_MODE (TREE_VALUE (TYPE_ARG_TYPES (ftype)));
+
+  /* Prepare X register for DF input operands.  */
+  if (GET_MODE_SIZE (src_mode) == 8 && num_operands == 3)
+    n = snprintf (buf, ln, "custom\t%d, zero, %%1, %%D1 # fwrx %%1\n\t",
+		  N2FPU_N (n2fpu_fwrx));
+
+  if (src_mode == SFmode)
+    {
+      if (dst_mode == VOIDmode)
+	{
+	  /* The fwry case.  */
+	  op1 = op3 = "zero";
+	  op2 = "%0";
+	  num_operands -= 1;
+	}
+      else
+	{
+	  op1 = (dst_mode == DFmode ? "%D0" : "%0");
+	  op2 = "%1";
+	  op3 = (num_operands == 2 ? "zero" : "%2");
+	}
+    }
+  else if (src_mode == DFmode)
+    {
+      if (dst_mode == VOIDmode)
+	{
+	  /* The fwrx case.  */
+	  op1 = "zero";
+	  op2 = "%0";
+	  op3 = "%D0";
+	  num_operands -= 1;
+	}
+      else
+	{
+	  op1 = (dst_mode == DFmode ? "%D0" : "%0");
+	  op2 = (num_operands == 2 ? "%1" : "%2");
+	  op3 = (num_operands == 2 ? "%D1" : "%D2");
+	}
+    }
+  else if (src_mode == VOIDmode)
+    {
+      /* frdxlo, frdxhi, frdy cases.  */
+      gcc_assert (dst_mode == SFmode);
+      op1 = "%0";
+      op2 = op3 = "zero";
+    }
+  else if (src_mode == SImode)
+    {
+      /* Conversion operators.  */
+      gcc_assert (num_operands == 2);
+      op1 = (dst_mode == DFmode ? "%D0" : "%0");
+      op2 = "%1";
+      op3 = "zero";
+    }
+  else
+    gcc_unreachable ();
+
+  /* Main instruction string.  */
+  n += snprintf (buf + n, ln - n, "custom\t%d, %s, %s, %s # %s %%0%s%s",
+		 N, op1, op2, op3, insn_name,
+		 (num_operands >= 2 ? ", %1" : ""),
+		 (num_operands == 3 ? ", %2" : ""));
+
+  /* Extraction of Y register for DF results.  */
+  if (dst_mode == DFmode)
+    snprintf (buf + n, ln - n, "\n\tcustom\t%d, %%0, zero, zero # frdy %%0",
+	      N2FPU_N (n2fpu_frdy));
+  return buf;
+}
+
+
+
+/* Function argument related.  */
+
+/* Define where to put the arguments to a function.  Value is zero to
+   push the argument on the stack, or a hard register in which to
+   store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+   This is null for libcalls where that information may
+   not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+   the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+   (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+nios2_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		    const_tree type ATTRIBUTE_UNUSED,
+		    bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
+  rtx return_rtx = NULL_RTX;
+
+  if (cum->regs_used < NUM_ARG_REGS)
+    return_rtx = gen_rtx_REG (mode, FIRST_ARG_REGNO + cum->regs_used);
+
+  return return_rtx;
+}
+
+/* Return number of bytes, at the beginning of the argument, that must be
+   put in registers.  0 is the argument is entirely in registers or entirely
+   in memory.  */
+
+static int
+nios2_arg_partial_bytes (cumulative_args_t cum_v,
+                         enum machine_mode mode, tree type ATTRIBUTE_UNUSED,
+                         bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
+  HOST_WIDE_INT param_size;
+
+  if (mode == BLKmode)
+    {
+      param_size = int_size_in_bytes (type);
+      gcc_assert (param_size >= 0);
+    }
+  else
+    param_size = GET_MODE_SIZE (mode);
+
+  /* Convert to words (round up).  */
+  param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
+
+  if (cum->regs_used < NUM_ARG_REGS
+      && cum->regs_used + param_size > NUM_ARG_REGS)
+    return (NUM_ARG_REGS - cum->regs_used) * UNITS_PER_WORD;
+
+  return 0;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE
+   and data type TYPE; TYPE is null for libcalls where that information
+   may not be available.  */
+
+static void
+nios2_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type ATTRIBUTE_UNUSED,
+			    bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
+  HOST_WIDE_INT param_size;
+
+  if (mode == BLKmode)
+    {
+      param_size = int_size_in_bytes (type);
+      gcc_assert (param_size >= 0);
+    }
+  else
+    param_size = GET_MODE_SIZE (mode);
+
+  /* Convert to words (round up).  */
+  param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
+
+  if (cum->regs_used + param_size > NUM_ARG_REGS)
+    cum->regs_used = NUM_ARG_REGS;
+  else
+    cum->regs_used += param_size;
+}
+
+enum direction
+nios2_function_arg_padding (enum machine_mode mode, const_tree type)
+{
+  /* On little-endian targets, the first byte of every stack argument
+     is passed in the first byte of the stack slot.  */
+  if (!BYTES_BIG_ENDIAN)
+    return upward;
+
+  /* Otherwise, integral types are padded downward: the last byte of a
+     stack argument is passed in the last byte of the stack slot.  */
+  if (type != 0
+      ? INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)
+      : GET_MODE_CLASS (mode) == MODE_INT)
+    return downward;
+
+  /* Arguments smaller than a stack slot are padded downward.  */
+  if (mode != BLKmode)
+    return (GET_MODE_BITSIZE (mode) >= PARM_BOUNDARY) ? upward : downward;
+
+  return ((int_size_in_bytes (type) >= (PARM_BOUNDARY / BITS_PER_UNIT))
+	  ? upward : downward);
+}
+
+enum direction
+nios2_block_reg_padding (enum machine_mode mode, tree type,
+                         int first ATTRIBUTE_UNUSED)
+{
+  return nios2_function_arg_padding (mode, type);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.
+   On Nios II, we handle this by a library call.  */
+static void
+nios2_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx ctx_reg = force_reg (Pmode, cxt);
+  rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
+		     LCT_NORMAL, VOIDmode, 3, addr, Pmode, fnaddr, Pmode,
+		     ctx_reg, Pmode);
+}
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+static rtx
+nios2_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
+		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNO);
+}
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+static rtx
+nios2_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, FIRST_RETVAL_REGNO);
+}
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
+static bool
+nios2_function_value_regno_p (const unsigned int regno)
+{
+  return regno == FIRST_RETVAL_REGNO;
+}
+
+/* Implement TARGET_RETURN_IN_MEMORY.  */
+static bool
+nios2_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return (int_size_in_bytes (type) > (2 * UNITS_PER_WORD)
+	  || int_size_in_bytes (type) == -1);
+}
+
+/* TODO: It may be possible to eliminate the copyback and implement
+   own va_arg type.  */
+static void
+nios2_setup_incoming_varargs (cumulative_args_t cum_v,
+                              enum machine_mode mode, tree type,
+                              int *pretend_size, int second_time)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
+  CUMULATIVE_ARGS local_cum;
+  cumulative_args_t local_cum_v = pack_cumulative_args (&local_cum);
+  int regs_to_push;
+  int pret_size;
+
+  local_cum = *cum;
+  nios2_function_arg_advance (local_cum_v, mode, type, 1);
+
+  regs_to_push = NUM_ARG_REGS - local_cum.regs_used;
+
+  if (!second_time && regs_to_push > 0)
+    {
+      rtx ptr = virtual_incoming_args_rtx;
+      rtx mem = gen_rtx_MEM (BLKmode, ptr);
+      emit_insn (gen_blockage ());
+      move_block_from_reg (local_cum.regs_used + FIRST_ARG_REGNO, mem,
+			   regs_to_push);
+      emit_insn (gen_blockage ());
+    }
+
+  pret_size = regs_to_push * UNITS_PER_WORD;
+  if (pret_size)
+    *pretend_size = pret_size;
+}
+
+
+
+/* Init FPU builtins.  */
+static void
+nios2_init_fpu_builtins (int start_code)
+{
+  tree fndecl;
+  char builtin_name[64] = "__builtin_custom_";
+  unsigned int i, n = strlen ("__builtin_custom_");
+
+  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+    {
+      snprintf (builtin_name + n, sizeof (builtin_name) - n,
+		"%s", N2FPU_NAME (i));
+      fndecl =
+	add_builtin_function (builtin_name, nios2_ftype (N2FPU_FTCODE (i)),
+			      start_code + i, BUILT_IN_MD, NULL, NULL_TREE);
+      nios2_register_builtin_fndecl (start_code + i, fndecl);
+    }
+}
+
+/* Helper function for expanding FPU builtins.  */
+static rtx
+nios2_expand_fpu_builtin (tree exp, unsigned int code, rtx target)
+{
+  struct expand_operand ops[MAX_RECOG_OPERANDS];
+  enum insn_code icode = N2FPU_ICODE (code);
+  int nargs, argno, opno = 0;
+  int num_operands = N2FPU (code).num_operands;
+  enum machine_mode dst_mode = TYPE_MODE (TREE_TYPE (exp));
+  bool has_target_p = (dst_mode != VOIDmode);
+
+  if (N2FPU_N (code) < 0)
+    fatal_error ("Cannot call %<__builtin_custom_%s%> without specifying switch"
+		 " %<-mcustom-%s%>", N2FPU_NAME (code), N2FPU_NAME (code));
+  if (has_target_p)
+    create_output_operand (&ops[opno++], target, dst_mode);
+  else
+    /* Subtract away the count of the VOID return, mainly for fwrx/fwry.   */
+    num_operands -= 1;
+  nargs = call_expr_nargs (exp);
+  for (argno = 0; argno < nargs; argno++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, argno);
+      create_input_operand (&ops[opno++], expand_normal (arg),
+			    TYPE_MODE (TREE_TYPE (arg)));
+    }
+  if (!maybe_expand_insn (icode, num_operands, ops))
+    {
+      error ("invalid argument to built-in function");
+      return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;
+    }
+  return has_target_p ? ops[0].value : const0_rtx;
+}
+
+/* Nios II has custom instruction built-in functions of the forms:
+   __builtin_custom_n
+   __builtin_custom_nX
+   __builtin_custom_nXX
+   __builtin_custom_Xn
+   __builtin_custom_XnX
+   __builtin_custom_XnXX
+
+   where each X could be either 'i' (int), 'f' (float), or 'p' (void*).
+   Therefore with 0-1 return values, and 0-2 arguments, we have a
+   total of (3 + 1) * (1 + 3 + 9) == 52 custom builtin functions.
+*/
+#define NUM_CUSTOM_BUILTINS ((3 + 1) * (1 + 3 + 9))
+static char custom_builtin_name[NUM_CUSTOM_BUILTINS][5];
+
+static void
+nios2_init_custom_builtins (int start_code)
+{
+  tree builtin_ftype, ret_type, fndecl;
+  char builtin_name[32] = "__builtin_custom_";
+  int n = strlen ("__builtin_custom_");
+  int builtin_code = 0;
+  int lhs, rhs1, rhs2;
+
+  struct { tree type; const char *c; } op[4];
+  /* z */ op[0].c = "";  op[0].type = NULL_TREE;
+  /* f */ op[1].c = "f"; op[1].type = float_type_node;
+  /* i */ op[2].c = "i"; op[2].type = integer_type_node;
+  /* p */ op[3].c = "p"; op[3].type = ptr_type_node;
+
+  /* We enumerate through the possible operand types to create all the
+     __builtin_custom_XnXX function tree types.  Note that these may slightly
+     overlap with the function types created for other fixed builtins.  */
+
+  for (lhs = 0; lhs < 4; lhs++)
+    for (rhs1 = 0; rhs1 < 4; rhs1++)
+      for (rhs2 = 0; rhs2 < 4; rhs2++)
+	{
+	  if (rhs1 == 0 && rhs2 != 0)
+	    continue;
+	  ret_type = (op[lhs].type ? op[lhs].type : void_type_node);
+	  builtin_ftype
+	    = build_function_type_list (ret_type, integer_type_node,
+					op[rhs1].type, op[rhs2].type,
+					NULL_TREE);
+	  snprintf (builtin_name + n, 32 - n, "%sn%s%s",
+		    op[lhs].c, op[rhs1].c, op[rhs2].c);
+	  /* Save copy of parameter string into custom_builtin_name[].  */
+	  strncpy (custom_builtin_name[builtin_code], builtin_name + n, 5);
+	  fndecl =
+	    add_builtin_function (builtin_name, builtin_ftype,
+				  start_code + builtin_code,
+				  BUILT_IN_MD, NULL, NULL_TREE);
+	  nios2_register_builtin_fndecl (start_code + builtin_code, fndecl);
+	  builtin_code += 1;
+	}
+}
+
+/* Helper function for expanding custom builtins.  */
+static rtx
+nios2_expand_custom_builtin (tree exp, unsigned int index, rtx target)
+{
+  bool has_target_p = (TREE_TYPE (exp) != void_type_node);
+  enum machine_mode tmode = VOIDmode;
+  int nargs, argno;
+  rtx value, insn, unspec_args[3];
+  tree arg;
+
+  /* XnXX form.  */
+  if (has_target_p)
+    {
+      tmode = TYPE_MODE (TREE_TYPE (exp));
+      if (!target || GET_MODE (target) != tmode
+	  || !REG_P (target))
+	target = gen_reg_rtx (tmode);
+    }
+
+  nargs = call_expr_nargs (exp);
+  for (argno = 0; argno < nargs; argno++)
+    {
+      arg = CALL_EXPR_ARG (exp, argno);
+      value = expand_normal (arg);
+      unspec_args[argno] = value;
+      if (argno == 0)
+	{
+	  if (!custom_insn_opcode (value, VOIDmode))
+	    error ("custom instruction opcode must be compile time "
+		   "constant in the range 0-255 for __builtin_custom_%s",
+		   custom_builtin_name[index]);
+	}
+      else
+	/* For other arguments, force into a register.  */
+	unspec_args[argno] = force_reg (TYPE_MODE (TREE_TYPE (arg)),
+					unspec_args[argno]);
+    }
+  /* Fill remaining unspec operands with zero.  */
+  for (; argno < 3; argno++)
+    unspec_args[argno] = const0_rtx;
+
+  insn = (has_target_p
+	  ? gen_rtx_SET (VOIDmode, target,
+			 gen_rtx_UNSPEC_VOLATILE (tmode,
+						  gen_rtvec_v (3, unspec_args),
+						  UNSPECV_CUSTOM_XNXX))
+	  : gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec_v (3, unspec_args),
+				     UNSPECV_CUSTOM_NXX));
+  emit_insn (insn);
+  return has_target_p ? target : const0_rtx;
+}
+
+
+
+
+/* Main definition of built-in functions.  Nios II has a small number of fixed
+   builtins, plus a large number of FPU insn builtins, and builtins for
+   generating custom instructions.  */
+
+struct nios2_builtin_desc
+{
+  enum insn_code icode;
+  enum nios2_ftcode ftype;
+  const char *name;
+};
+
+#define N2_BUILTINS					\
+  N2_BUILTIN_DEF (sync,   N2_FTYPE_VOID_VOID)		\
+  N2_BUILTIN_DEF (ldbio,  N2_FTYPE_SI_CVPTR)		\
+  N2_BUILTIN_DEF (ldbuio, N2_FTYPE_UI_CVPTR)		\
+  N2_BUILTIN_DEF (ldhio,  N2_FTYPE_SI_CVPTR)		\
+  N2_BUILTIN_DEF (ldhuio, N2_FTYPE_UI_CVPTR)		\
+  N2_BUILTIN_DEF (ldwio,  N2_FTYPE_SI_CVPTR)		\
+  N2_BUILTIN_DEF (stbio,  N2_FTYPE_VOID_VPTR_SI)	\
+  N2_BUILTIN_DEF (sthio,  N2_FTYPE_VOID_VPTR_SI)	\
+  N2_BUILTIN_DEF (stwio,  N2_FTYPE_VOID_VPTR_SI)	\
+  N2_BUILTIN_DEF (rdctl,  N2_FTYPE_SI_SI)		\
+  N2_BUILTIN_DEF (wrctl,  N2_FTYPE_VOID_SI_SI)
+
+enum nios2_builtin_code {
+#define N2_BUILTIN_DEF(name, ftype) NIOS2_BUILTIN_ ## name,
+  N2_BUILTINS
+#undef N2_BUILTIN_DEF
+  NUM_FIXED_NIOS2_BUILTINS
+};
+
+static const struct nios2_builtin_desc nios2_builtins[] = {
+#define N2_BUILTIN_DEF(name, ftype)			\
+  { CODE_FOR_ ## name, ftype, "__builtin_" #name },
+  N2_BUILTINS
+#undef N2_BUILTIN_DEF
+};
+
+/* Start/ends of FPU/custom insn builtin index ranges.  */
+static unsigned int nios2_fpu_builtin_base;
+static unsigned int nios2_custom_builtin_base;
+static unsigned int nios2_custom_builtin_end;
+
+/* Implement TARGET_INIT_BUILTINS.  */
+static void
+nios2_init_builtins (void)
+{
+  unsigned int i;
+
+  /* Initialize fixed builtins.  */
+  for (i = 0; i < ARRAY_SIZE (nios2_builtins); i++)
+    {
+      const struct nios2_builtin_desc *d = &nios2_builtins[i];
+      tree fndecl =
+	add_builtin_function (d->name, nios2_ftype (d->ftype), i,
+			      BUILT_IN_MD, NULL, NULL);
+      nios2_register_builtin_fndecl (i, fndecl);
+    }
+
+  /* Initialize FPU builtins.  */
+  nios2_fpu_builtin_base = ARRAY_SIZE (nios2_builtins);
+  nios2_init_fpu_builtins (nios2_fpu_builtin_base);
+
+  /* Initialize custom insn builtins.  */
+  nios2_custom_builtin_base
+    = nios2_fpu_builtin_base + ARRAY_SIZE (nios2_fpu_insn);
+  nios2_custom_builtin_end
+    = nios2_custom_builtin_base + NUM_CUSTOM_BUILTINS;
+  nios2_init_custom_builtins (nios2_custom_builtin_base);
+}
+
+/* Array of fndecls for TARGET_BUILTIN_DECL.  */
+#define NIOS2_NUM_BUILTINS \
+  (ARRAY_SIZE (nios2_builtins) + ARRAY_SIZE (nios2_fpu_insn) + NUM_CUSTOM_BUILTINS)
+static GTY(()) tree nios2_builtin_decls[NIOS2_NUM_BUILTINS];
+
+static void
+nios2_register_builtin_fndecl (unsigned code, tree fndecl)
+{
+  nios2_builtin_decls[code] = fndecl;
+}
+
+/* Implement TARGET_BUILTIN_DECL.  */
+static tree
+nios2_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  gcc_assert (nios2_custom_builtin_end == ARRAY_SIZE (nios2_builtin_decls));
+
+  if (code >= nios2_custom_builtin_end)
+    return error_mark_node;
+
+  if (code >= nios2_fpu_builtin_base
+      && code < nios2_custom_builtin_base
+      && ! N2FPU_ENABLED_P (code - nios2_fpu_builtin_base))
+    return error_mark_node;
+
+  return nios2_builtin_decls[code];
+}
+
+
+/* Low-level built-in expand routine.  */
+static rtx
+nios2_expand_builtin_insn (const struct nios2_builtin_desc *d, int n,
+			   struct expand_operand *ops, bool has_target_p)
+{
+  if (maybe_expand_insn (d->icode, n, ops))
+    return has_target_p ? ops[0].value : const0_rtx;
+  else
+    {
+      error ("invalid argument to built-in function %s", d->name);
+      return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;	  
+    } 
+}
+
+/* Expand ldio/stio form load-store instruction builtins.  */
+static rtx
+nios2_expand_ldstio_builtin (tree exp, rtx target,
+			     const struct nios2_builtin_desc *d)
+{
+  bool has_target_p;
+  rtx addr, mem, val;
+  struct expand_operand ops[MAX_RECOG_OPERANDS];
+  enum machine_mode mode = insn_data[d->icode].operand[0].mode;
+
+  addr = expand_normal (CALL_EXPR_ARG (exp, 0));
+  mem = gen_rtx_MEM (mode, addr);
+
+  if (insn_data[d->icode].operand[0].allows_mem)
+    {
+      /* stxio.  */
+      val = expand_normal (CALL_EXPR_ARG (exp, 1));
+      if (CONST_INT_P (val))
+	val = force_reg (mode, gen_int_mode (INTVAL (val), mode));
+      val = simplify_gen_subreg (mode, val, GET_MODE (val), 0);
+      create_output_operand (&ops[0], mem, mode);
+      create_input_operand (&ops[1], val, mode);
+      has_target_p = false;
+    }
+  else
+    {
+      /* ldxio.  */
+      create_output_operand (&ops[0], target, mode);
+      create_input_operand (&ops[1], mem, mode);
+      has_target_p = true;
+    }
+  return nios2_expand_builtin_insn (d, 2, ops, has_target_p);
+}
+
+/* Expand rdctl/wrctl builtins.  */
+static rtx
+nios2_expand_rdwrctl_builtin (tree exp, rtx target,
+			     const struct nios2_builtin_desc *d)
+{
+  bool has_target_p = (insn_data[d->icode].operand[0].predicate
+		       == register_operand);
+  rtx ctlcode = expand_normal (CALL_EXPR_ARG (exp, 0));
+  struct expand_operand ops[MAX_RECOG_OPERANDS];
+  if (!rdwrctl_operand (ctlcode, VOIDmode))
+    {
+      error ("Control register number must be in range 0-31 for %s",
+	     d->name);
+      return has_target_p ? gen_reg_rtx (SImode) : const0_rtx;
+    }
+  if (has_target_p)
+    {
+      create_output_operand (&ops[0], target, SImode);
+      create_integer_operand (&ops[1], INTVAL (ctlcode));
+    }
+  else
+    {
+      rtx val = expand_normal (CALL_EXPR_ARG (exp, 1));
+      create_integer_operand (&ops[0], INTVAL (ctlcode));
+      create_input_operand (&ops[1], val, SImode);
+    }
+  return nios2_expand_builtin_insn (d, 2, ops, has_target_p);
+}
+
+/* Implement TARGET_EXPAND_BUILTIN.  Expand an expression EXP that calls
+   a built-in function, with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+nios2_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+                      enum machine_mode mode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  if (fcode < nios2_fpu_builtin_base)
+    {
+      const struct nios2_builtin_desc *d = &nios2_builtins[fcode];
+
+      switch (fcode)
+	{
+	case NIOS2_BUILTIN_sync:
+	  emit_insn (gen_sync ());
+	  return const0_rtx;
+
+	case NIOS2_BUILTIN_ldbio:
+	case NIOS2_BUILTIN_ldbuio:
+	case NIOS2_BUILTIN_ldhio:
+	case NIOS2_BUILTIN_ldhuio:
+	case NIOS2_BUILTIN_ldwio:
+	case NIOS2_BUILTIN_stbio:
+	case NIOS2_BUILTIN_sthio:
+	case NIOS2_BUILTIN_stwio:
+	  return nios2_expand_ldstio_builtin (exp, target, d);
+
+	case NIOS2_BUILTIN_rdctl:
+	case NIOS2_BUILTIN_wrctl:
+	  return nios2_expand_rdwrctl_builtin (exp, target, d);
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (fcode < nios2_custom_builtin_base)
+    /* FPU builtin range.  */
+    return nios2_expand_fpu_builtin (exp, fcode - nios2_fpu_builtin_base,
+				     target);
+  else if (fcode < nios2_custom_builtin_end)
+    /* Custom insn builtin range.  */
+    return nios2_expand_custom_builtin (exp, fcode - nios2_custom_builtin_base,
+					target);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_INIT_LIBFUNCS.  */
+static void
+nios2_init_libfuncs (void)
+{
+  /* For Linux, we have access to kernel support for atomic operations.  */
+  if (TARGET_LINUX_ABI)
+    init_sync_libfuncs (UNITS_PER_WORD);
+}
+
+
+
+/* Register a custom code use, and signal error if a conflict was found.  */
+static void
+nios2_register_custom_code (unsigned int N, enum nios2_ccs_code status,
+			    int index)
+{
+  gcc_assert (N <= 255);
+
+  if (status == CCS_FPU)
+    {
+      if (custom_code_status[N] == CCS_FPU && index != custom_code_index[N])
+	{
+	  custom_code_conflict = true;
+	  error ("switch %<-mcustom-%s%> conflicts with switch %<-mcustom-%s%>",
+		 N2FPU_NAME (custom_code_index[N]), N2FPU_NAME (index));
+	}
+      else if (custom_code_status[N] == CCS_BUILTIN_CALL)
+	{
+	  custom_code_conflict = true;
+	  error ("call to %<__builtin_custom_%s%> conflicts with switch "
+		 "%<-mcustom-%s%>", custom_builtin_name[custom_code_index[N]],
+		 N2FPU_NAME (index));
+	}
+    }
+  else if (status == CCS_BUILTIN_CALL)
+    {
+      if (custom_code_status[N] == CCS_FPU)
+	{
+	  custom_code_conflict = true;
+	  error ("call to %<__builtin_custom_%s%> conflicts with switch "
+		 "%<-mcustom-%s%>", custom_builtin_name[index],
+		 N2FPU_NAME (custom_code_index[N]));
+	}
+      else
+	{
+	  /* Note that code conflicts between different __builtin_custom_xnxx
+	     calls are not checked.  */
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  custom_code_status[N] = status;
+  custom_code_index[N] = index;
+}
+
+/* Mark a custom code as not in use.  */
+static void
+nios2_deregister_custom_code (unsigned int N)
+{
+  if (N <= 255)
+    {
+      custom_code_status[N] = CCS_UNUSED;
+      custom_code_index[N] = 0;
+    }
+}
+
+/* Target attributes can affect per-function option state, so we need to
+   save/restore the custom code tracking info using the
+   TARGET_OPTION_SAVE/TARGET_OPTION_RESTORE hooks.  */
+
+static void
+nios2_option_save (struct cl_target_option *ptr,
+		   struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  unsigned int i;
+  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+    ptr->saved_fpu_custom_code[i] = N2FPU_N (i);
+  memcpy (ptr->saved_custom_code_status, custom_code_status,
+	  sizeof (custom_code_status));
+  memcpy (ptr->saved_custom_code_index, custom_code_index,
+	  sizeof (custom_code_index));
+}
+
+static void
+nios2_option_restore (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		      struct cl_target_option *ptr)
+{
+  unsigned int i;
+  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+    N2FPU_N (i) = ptr->saved_fpu_custom_code[i];
+  memcpy (custom_code_status, ptr->saved_custom_code_status,
+	  sizeof (custom_code_status));
+  memcpy (custom_code_index, ptr->saved_custom_code_index,
+	  sizeof (custom_code_index));
+}
+
+/* Inner function to process the attribute((target(...))), take an argument and
+   set the current options from the argument.  If we have a list, recursively
+   go over the list.  */
+
+static bool
+nios2_valid_target_attribute_rec (tree args)
+{
+  if (TREE_CODE (args) == TREE_LIST)
+    {
+      bool ret = true;
+      for (; args; args = TREE_CHAIN (args))
+	if (TREE_VALUE (args)
+	    && !nios2_valid_target_attribute_rec (TREE_VALUE (args)))
+	  ret = false;
+      return ret;
+    }
+  else if (TREE_CODE (args) == STRING_CST)
+    {
+      char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
+      while (argstr && *argstr != '\0')
+	{
+	  bool no_opt = false, end_p = false;
+	  char *eq = NULL, *p;
+	  while (ISSPACE (*argstr))
+	    argstr++;
+	  p = argstr;
+	  while (*p != '\0' && *p != ',')
+	    {
+	      if (!eq && *p == '=')
+		eq = p;
+	      ++p;
+	    }
+	  if (*p == '\0')
+	    end_p = true;
+	  else
+	    *p = '\0';
+	  if (eq) *eq = '\0';
+
+	  if (!strncmp (argstr, "no-", 3))
+	    {
+	      no_opt = true;
+	      argstr += 3;
+	    }
+	  if (!strncmp (argstr, "custom-fpu-cfg", 14))
+	    {
+	      char *end_eq = p;
+	      if (no_opt)
+		{
+		  error ("custom-fpu-cfg option does not support %<no-%>");
+		  return false;
+		}
+	      if (!eq)
+		{
+		  error ("custom-fpu-cfg option requires configuration"
+			 " argument");
+		  return false;
+		}
+	      /* Increment and skip whitespace.  */
+	      while (ISSPACE (*(++eq))) ;
+	      /* Decrement and skip to before any trailing whitespace.  */
+	      while (ISSPACE (*(--end_eq))) ;
+
+	      nios2_handle_custom_fpu_cfg (eq, end_eq + 1, true);
+	    }
+	  else if (!strncmp (argstr, "custom-", 7))
+	    {
+	      int code = -1;
+	      unsigned int i;
+	      for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
+		if (!strncmp (argstr + 7, N2FPU_NAME (i),
+			      strlen (N2FPU_NAME (i))))
+		  {
+		    /* Found insn.  */
+		    code = i;
+		    break;
+		  }
+	      if (code >= 0)
+		{
+		  if (no_opt)
+		    {
+		      if (eq)
+			{
+			  error ("%<no-custom-%s%> does not accept arguments",
+				 N2FPU_NAME (code));
+			  return false;
+			}
+		      /* Disable option by setting to -1.  */
+		      nios2_deregister_custom_code (N2FPU_N (code));
+		      N2FPU_N (code) = -1;
+		    }
+		  else
+		    {
+		      char *t;
+		      if (eq)
+			while (ISSPACE (*(++eq))) ;
+		      if (!eq || eq == p)
+			{
+			  error ("%<custom-%s=%> requires argument",
+				 N2FPU_NAME (code));
+			  return false;
+			}
+		      for (t = eq; t != p; ++t)
+			{
+			  if (ISSPACE (*t))
+			    continue;
+			  if (!ISDIGIT (*t))
+			    {			 
+			      error ("`custom-%s=' argument requires "
+				     "numeric digits", N2FPU_NAME (code));
+			      return false;
+			    }
+			}
+		      /* Set option to argument.  */
+		      N2FPU_N (code) = atoi (eq);
+		      nios2_handle_custom_fpu_insn_option (code);
+		    }
+		}
+	      else
+		{
+		  error ("%<custom-%s=%> is not recognised as FPU instruction",
+			 argstr + 7);
+		  return false;
+		}		
+	    }
+	  else
+	    {
+	      error ("%<%s%> is unknown", argstr);
+	      return false;
+	    }
+
+	  if (end_p)
+	    break;
+	  else
+	    argstr = p + 1;
+	}
+      return true;
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
+
+static tree
+nios2_valid_target_attribute_tree (tree args)
+{
+  if (!nios2_valid_target_attribute_rec (args))
+    return NULL_TREE;
+  nios2_custom_check_insns ();
+  return build_target_option_node (&global_options);
+}
+
+/* Hook to validate attribute((target("string"))).  */
+
+static bool
+nios2_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
+				tree args, int ARG_UNUSED (flags))
+{
+  struct cl_target_option cur_target;
+  bool ret = true;
+  tree old_optimize = build_optimization_node (&global_options);
+  tree new_target, new_optimize;
+  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  /* If the function changed the optimization levels as well as setting target
+     options, start with the optimizations specified.  */
+  if (func_optimize && func_optimize != old_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (func_optimize));
+
+  /* The target attributes may also change some optimization flags, so update
+     the optimization options if necessary.  */
+  cl_target_option_save (&cur_target, &global_options);
+  new_target = nios2_valid_target_attribute_tree (args);
+  new_optimize = build_optimization_node (&global_options);
+
+  if (!new_target)
+    ret = false;
+
+  else if (fndecl)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+      if (old_optimize != new_optimize)
+	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+    }
+
+  cl_target_option_restore (&global_options, &cur_target);
+
+  if (old_optimize != new_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (old_optimize));
+  return ret;
+}
+
+/* Remember the last target of nios2_set_current_function.  */
+static GTY(()) tree nios2_previous_fndecl;
+
+/* Establish appropriate back-end context for processing the function
+   FNDECL.  The argument might be NULL to indicate processing at top
+   level, outside of any function scope.  */
+static void
+nios2_set_current_function (tree fndecl)
+{
+  tree old_tree = (nios2_previous_fndecl
+		   ? DECL_FUNCTION_SPECIFIC_TARGET (nios2_previous_fndecl)
+		   : NULL_TREE);
+
+  tree new_tree = (fndecl
+		   ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+		   : NULL_TREE);
+
+  if (fndecl && fndecl != nios2_previous_fndecl)
+    {
+      nios2_previous_fndecl = fndecl;
+      if (old_tree == new_tree)
+	;
+
+      else if (new_tree)
+	{
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  target_reinit ();
+	}
+
+      else if (old_tree)
+	{
+	  struct cl_target_option *def
+	    = TREE_TARGET_OPTION (target_option_current_node);
+
+	  cl_target_option_restore (&global_options, def);
+	  target_reinit ();
+	}
+    }
+}
+
+/* Hook to validate the current #pragma GCC target and set the FPU custom
+   code option state.  If ARGS is NULL, then POP_TARGET is used to reset
+   the options.  */
+static bool
+nios2_pragma_target_parse (tree args, tree pop_target)
+{
+  tree cur_tree;
+  if (! args)
+    {
+      cur_tree = ((pop_target)
+		  ? pop_target
+		  : target_option_default_node);
+      cl_target_option_restore (&global_options,
+				TREE_TARGET_OPTION (cur_tree));
+    }
+  else
+    {
+      cur_tree = nios2_valid_target_attribute_tree (args);
+      if (!cur_tree)
+	return false;
+    }
+
+  target_option_current_node = cur_tree;
+  return true;
+}
+
+/* Implement TARGET_MERGE_DECL_ATTRIBUTES.
+   We are just using this hook to add some additional error checking to
+   the default behavior.  GCC does not provide a target hook for merging
+   the target options, and only correctly handles merging empty vs non-empty
+   option data; see merge_decls() in c-decl.c.
+   So here we require either that at least one of the decls has empty
+   target options, or that the target options/data be identical.  */
+static tree
+nios2_merge_decl_attributes (tree olddecl, tree newdecl)
+{
+  tree oldopts = lookup_attribute ("target", DECL_ATTRIBUTES (olddecl));
+  tree newopts = lookup_attribute ("target", DECL_ATTRIBUTES (newdecl));
+  if (newopts && oldopts && newopts != oldopts)
+    {
+      tree oldtree = DECL_FUNCTION_SPECIFIC_TARGET (olddecl);
+      tree newtree = DECL_FUNCTION_SPECIFIC_TARGET (newdecl);
+      if (oldtree && newtree && oldtree != newtree)
+	{
+	  struct cl_target_option *olddata = TREE_TARGET_OPTION (oldtree);
+	  struct cl_target_option *newdata = TREE_TARGET_OPTION (newtree);
+	  if (olddata != newdata
+	      && memcmp (olddata, newdata, sizeof (struct cl_target_option)))
+	    error ("%qE redeclared with conflicting %qs attributes",
+		   DECL_NAME (newdecl), "target");
+	}
+    }
+  return merge_attributes (DECL_ATTRIBUTES (olddecl),
+			   DECL_ATTRIBUTES (newdecl));
+}
+
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE nios2_asm_function_prologue
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P nios2_in_small_data_p
+
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  nios2_section_type_flags
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS nios2_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN nios2_expand_builtin
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL nios2_builtin_decl
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS nios2_init_libfuncs
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE nios2_can_eliminate
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG nios2_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE nios2_function_arg_advance
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES nios2_arg_partial_bytes
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT nios2_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE nios2_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE nios2_libcall_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P nios2_function_value_regno_p
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY nios2_return_in_memory
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS nios2_setup_incoming_varargs
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P nios2_legitimate_constant_p
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS nios2_legitimize_address
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P nios2_legitimate_address_p
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS nios2_preferred_reload_class
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS nios2_rtx_costs
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS TARGET_LINUX_ABI
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM nios2_cannot_force_const_mem
+
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL nios2_output_dwarf_dtprel
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND nios2_print_operand
+
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS nios2_print_operand_address
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nios2_output_addr_const_extra
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE nios2_option_override
+
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE nios2_option_save
+
+#undef TARGET_OPTION_RESTORE
+#define TARGET_OPTION_RESTORE nios2_option_restore
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION nios2_set_current_function
+
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P nios2_valid_target_attribute_p
+
+#undef TARGET_OPTION_PRAGMA_PARSE
+#define TARGET_OPTION_PRAGMA_PARSE nios2_pragma_target_parse
+
+#undef TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES nios2_merge_decl_attributes
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-nios2.h"
diff --git a/gcc-4.9/gcc/config/nios2/nios2.h b/gcc-4.9/gcc/config/nios2/nios2.h
new file mode 100644
index 000000000..17148d3e5
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/nios2.h
@@ -0,0 +1,499 @@
+/* Definitions of target machine for Altera Nios II.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Jonah Graham (jgraham@altera.com), 
+   Will Reece (wreece@altera.com), and Jeff DaSilva (jdasilva@altera.com).
+   Contributed by Mentor Graphics, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_NIOS2_H
+#define GCC_NIOS2_H
+
+/* FPU insn codes declared here.  */
+#include "config/nios2/nios2-opts.h"
+
+/* Define built-in preprocessor macros.  */
+#define TARGET_CPU_CPP_BUILTINS()                   \
+  do                                                \
+    {                                               \
+      builtin_define_std ("NIOS2");                 \
+      builtin_define_std ("nios2");                 \
+      if (TARGET_BIG_ENDIAN)                        \
+        builtin_define_std ("nios2_big_endian");    \
+      else                                          \
+        builtin_define_std ("nios2_little_endian"); \
+    }                                               \
+  while (0)
+
+/* We're little endian, unless otherwise specified by defining
+   BIG_ENDIAN_FLAG.  */
+#ifndef TARGET_ENDIAN_DEFAULT
+# define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+/* Default target_flags if no switches specified.  */
+#ifndef TARGET_DEFAULT
+# define TARGET_DEFAULT (MASK_HAS_MUL | TARGET_ENDIAN_DEFAULT)
+#endif
+
+#define CC1_SPEC "%{G*}"
+
+#if TARGET_ENDIAN_DEFAULT == 0
+# define ASM_SPEC "%{!meb:-EL} %{meb:-EB}"
+# define LINK_SPEC_ENDIAN "%{!meb:-EL} %{meb:-EB}"
+# define MULTILIB_DEFAULTS { "EL" }
+#else
+# define ASM_SPEC "%{!mel:-EB} %{mel:-EL}"
+# define LINK_SPEC_ENDIAN "%{!mel:-EB} %{mel:-EL}"
+# define MULTILIB_DEFAULTS { "EB" }
+#endif
+
+#define LINK_SPEC LINK_SPEC_ENDIAN \
+  " %{shared:-shared} \
+    %{static:-Bstatic}"
+
+
+/* Storage layout.  */
+
+#define DEFAULT_SIGNED_CHAR 1
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define BITS_PER_WORD 32
+#define UNITS_PER_WORD 4
+#define POINTER_SIZE 32
+#define BIGGEST_ALIGNMENT 32
+#define STRICT_ALIGNMENT 1
+#define FUNCTION_BOUNDARY 32
+#define PARM_BOUNDARY 32
+#define STACK_BOUNDARY 32
+#define PREFERRED_STACK_BOUNDARY 32
+#define MAX_FIXED_MODE_SIZE 64
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)                          \
+  ((TREE_CODE (EXP) == STRING_CST)                              \
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Layout of source language data types.  */
+
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+
+/* Basic characteristics of Nios II registers:
+
+   Regno  Name
+   0      r0       zero    always zero
+   1      r1       at      Assembler Temporary
+   2-3    r2-r3            Return Location
+   4-7    r4-r7            Register Arguments
+   8-15   r8-r15           Caller Saved Registers
+   16-22  r16-r22          Callee Saved Registers
+   22     r22              Global Offset Table pointer (Linux ABI only)
+   23     r23              Thread pointer (Linux ABI only)
+   24     r24      et      Exception Temporary
+   25     r25      bt      Breakpoint Temporary
+   26     r26      gp      Global Pointer
+   27     r27      sp      Stack Pointer
+   28     r28      fp      Frame Pointer
+   29     r29      ea      Exception Return Address
+   30     r30      ba      Breakpoint Return Address
+   31     r31      ra      Return Address
+			   
+   32     ctl0     status
+   33     ctl1     estatus STATUS saved by exception
+   34     ctl2     bstatus STATUS saved by break
+   35     ctl3     ipri    Interrupt Priority Mask
+   36     ctl4     ecause  Exception Cause
+
+   37     pc               Not an actual register
+
+   38     fake_fp          Fake Frame Pointer (always eliminated)
+   39     fake_ap          Fake Argument Pointer (always eliminated)
+   40                      First Pseudo Register
+
+   In addition, r12 is used as the static chain register and r13, r14, and r15
+   are clobbered by PLT code sequences.  
+
+   The definitions for all the hard register numbers are located in nios2.md.
+*/
+
+#define FIXED_REGISTERS                      \
+  {					     \
+/*        +0  1  2  3  4  5  6  7  8  9 */   \
+/*   0 */  1, 1, 0, 0, 0, 0, 0, 0, 0, 0,     \
+/*  10 */  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     \
+/*  20 */  0, 0, TARGET_LINUX_ABI, TARGET_LINUX_ABI, 1, 1, 1, 1, 0, 1,     \
+/*  30 */  1, 0, 1, 1, 1, 1, 1, 1, 1, 1,     \
+  }
+
+/* Call used == caller saved + fixed regs + args + ret vals.  */
+#define CALL_USED_REGISTERS                  \
+  {					     \
+/*        +0  1  2  3  4  5  6  7  8  9 */   \
+/*   0 */  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     \
+/*  10 */  1, 1, 1, 1, 1, 1, 0, 0, 0, 0,     \
+/*  20 */  0, 0, TARGET_LINUX_ABI, TARGET_LINUX_ABI, 1, 1, 1, 1, 0, 1,     \
+/*  30 */  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     \
+  }
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+#define HARD_REGNO_NREGS(REGNO, MODE)            \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Register Classes.  */
+
+enum reg_class
+{
+  NO_REGS,
+  SIB_REGS,
+  GP_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES   \
+  {  "NO_REGS",		  \
+     "SIB_REGS",	  \
+     "GP_REGS",           \
+     "ALL_REGS" }
+
+#define GENERAL_REGS ALL_REGS
+
+#define REG_CLASS_CONTENTS			\
+  {						\
+    /* NO_REGS  */ { 0, 0},			\
+    /* SIB_REGS */ { 0xfe0c, 0},		\
+    /* GP_REGS  */ {~0, 0},			\
+    /* ALL_REGS */ {~0,~0}			\
+  }
+
+
+#define GP_REG_P(REGNO) ((unsigned)(REGNO) <= LAST_GP_REG)
+#define REGNO_REG_CLASS(REGNO) (GP_REG_P (REGNO) ? GP_REGS : ALL_REGS)
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Tests for various kinds of constants used in the Nios II port.  */
+
+#define SMALL_INT(X) ((unsigned HOST_WIDE_INT)(X) + 0x8000 < 0x10000)
+#define SMALL_INT_UNSIGNED(X) ((X) >= 0 && (X) < 0x10000)
+#define UPPER16_INT(X) (((X) & 0xffff) == 0)
+#define SHIFT_INT(X) ((X) >= 0 && (X) <= 31)
+#define RDWRCTL_INT(X) ((X) >= 0 && (X) <= 31)
+#define CUSTOM_INSN_OPCODE(X) ((X) >= 0 && (X) <= 255)
+
+/* Say that the epilogue uses the return address register.  Note that
+   in the case of sibcalls, the values "used by the epilogue" are
+   considered live at the start of the called function.  */
+#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == RA_REGNO)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Trampolines use a 5-instruction sequence.  */
+#define TRAMPOLINE_SIZE 20
+
+/* Stack layout.  */
+#define STACK_GROWS_DOWNWARD
+#define STARTING_FRAME_OFFSET 0
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* Before the prologue, RA lives in r31.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (VOIDmode, RA_REGNO)
+#define RETURN_ADDR_RTX(C,F) nios2_get_return_address (C)
+
+#define DWARF_FRAME_RETURN_COLUMN RA_REGNO
+
+/* The CFA includes the pretend args.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (gcc_assert ((FNDECL) == current_function_decl), \
+   FIRST_PARM_OFFSET (FNDECL) + crtl->args.pretend_args_size)
+
+/* Frame/arg pointer elimination settings.  */
+#define ELIMINABLE_REGS                                                 \
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},                         \
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM},                    \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},                         \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = nios2_initial_elimination_offset ((FROM), (TO))
+
+/* Calling convention definitions.  */
+typedef struct nios2_args
+{
+  int regs_used;
+} CUMULATIVE_ARGS;
+
+#define NUM_ARG_REGS (LAST_ARG_REGNO - FIRST_ARG_REGNO + 1)
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  do { (CUM).regs_used = 0; } while (0)
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  (nios2_function_arg_padding ((MODE), (TYPE)))
+
+#define PAD_VARARGS_DOWN \
+  (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (nios2_block_reg_padding ((MODE), (TYPE), (FIRST)))
+
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  ((REGNO) >= FIRST_ARG_REGNO && (REGNO) <= LAST_ARG_REGNO)
+
+/* Passing function arguments on stack.  */
+#define PUSH_ARGS 0
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* We define TARGET_RETURN_IN_MEMORY, so set to zero.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Profiling.  */
+#define PROFILE_BEFORE_PROLOGUE
+#define NO_PROFILE_COUNTERS 1
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  nios2_function_profiler ((FILE), (LABELNO))
+
+/* Addressing modes.  */
+
+#define CONSTANT_ADDRESS_P(X) \
+  (CONSTANT_P (X) && memory_address_p (SImode, X))
+
+#define MAX_REGS_PER_ADDRESS 1
+#define BASE_REG_CLASS ALL_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+#define REGNO_OK_FOR_BASE_P(REGNO) nios2_regno_ok_for_base_p ((REGNO), true)
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+
+/* Describing Relative Costs of Operations.  */
+#define MOVE_MAX 4
+#define SLOW_BYTE_ACCESS 1
+
+/* It is as good to call a constant function address as to call an address
+   kept in a register.  */
+#define NO_FUNCTION_CSE
+
+/* Position independent code.  */
+
+#define PIC_OFFSET_TABLE_REGNUM 22
+#define LEGITIMATE_PIC_OPERAND_P(X) nios2_legitimate_pic_operand_p (X)
+
+/* Define output assembler language.  */
+
+#define ASM_APP_ON "#APP\n"
+#define ASM_APP_OFF "#NO_APP\n"
+
+#define ASM_COMMENT_START "# "
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define REGISTER_NAMES \
+  {		       \
+    "zero", \
+    "at", \
+    "r2", \
+    "r3", \
+    "r4", \
+    "r5", \
+    "r6", \
+    "r7", \
+    "r8", \
+    "r9", \
+    "r10", \
+    "r11", \
+    "r12", \
+    "r13", \
+    "r14", \
+    "r15", \
+    "r16", \
+    "r17", \
+    "r18", \
+    "r19", \
+    "r20", \
+    "r21", \
+    "r22", \
+    "r23", \
+    "et", \
+    "bt", \
+    "gp", \
+    "sp", \
+    "fp", \
+    "ta", \
+    "ba", \
+    "ra", \
+    "status", \
+    "estatus", \
+    "bstatus", \
+    "ipri", \
+    "ecause", \
+    "pc", \
+    "fake_fp", \
+    "fake_ap", \
+}
+
+#define ADDITIONAL_REGISTER_NAMES       \
+{					\
+  {"r0", 0},				\
+  {"r1", 1},				\
+  {"r24", 24},                          \
+  {"r25", 25},                          \
+  {"r26", 26},                          \
+  {"r27", 27},                          \
+  {"r28", 28},                          \
+  {"r29", 29},                          \
+  {"r30", 30},                          \
+  {"r31", 31}                           \
+}
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  do									\
+    {									\
+      fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), FILE); \
+      fprintf (FILE, ".L%u\n", (unsigned) (VALUE));			\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)\
+  do									\
+    {									\
+      fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), STREAM); \
+      fprintf (STREAM, ".L%u-.L%u\n", (unsigned) (VALUE), (unsigned) (REL)); \
+    }									\
+  while (0)
+
+/* Section directives.  */
+
+/* Output before read-only data.  */
+#define TEXT_SECTION_ASM_OP "\t.section\t.text"
+
+/* Output before writable data.  */
+#define DATA_SECTION_ASM_OP "\t.section\t.data"
+
+/* Output before uninitialized data.  */
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Output before 'small' uninitialized data.  */
+#define SBSS_SECTION_ASM_OP "\t.section\t.sbss"
+
+#ifndef IN_LIBGCC2
+/* Default the definition of "small data" to 8 bytes.  */
+extern unsigned HOST_WIDE_INT nios2_section_threshold;
+#endif
+
+#define NIOS2_DEFAULT_GVALUE 8
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP   "\t.comm\t"
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)		     \
+  do {						     \
+    fprintf ((FILE), "%s%d\n", ALIGN_ASM_OP, (LOG)); \
+  } while (0)
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)              \
+do                                                                      \
+  {									\
+    fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+    assemble_name ((FILE), (NAME));					\
+    fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", (SIZE),	\
+	     (ALIGN) / BITS_PER_UNIT);					\
+  }									\
+while (0)
+
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)               \
+do {                                                                    \
+  if ((SIZE) <= nios2_section_threshold)                                \
+    switch_to_section (sbss_section);					\
+  else                                                                  \
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");                     \
+  if (!flag_inhibit_size_directive)                                     \
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);                       \
+  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));       \
+  ASM_OUTPUT_LABEL(FILE, NAME);                                         \
+  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);                         \
+} while (0)
+
+/* Put the jump tables in .text because when using position-independent code,
+   Nios II elf has no relocation that can represent arbitrary differences
+   between symbols in different sections.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Exception handling.  */
+
+/* Describe __builtin_eh_return.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, LAST_RETVAL_REGNO)
+#define EH_RETURN_DATA_REGNO(N) ((N) <= (LAST_ARG_REGNO - FIRST_ARG_REGNO) \
+				 ? (N) + FIRST_ARG_REGNO : INVALID_REGNUM)
+
+/* Nios II has no appropriate relocations for a 32-bit PC-relative or
+   section-relative pointer encoding.  This therefore always chooses an
+   absolute representation for pointers.  An unfortunate consequence of
+   this is that ld complains about the absolute fde encoding when linking
+   with -shared or -fpie, but the warning is harmless and there seems to
+   be no good way to suppress it.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)		\
+  (flag_pic ? DW_EH_PE_aligned : DW_EH_PE_sdata4)
+
+/* Misc. parameters.  */
+
+#define STORE_FLAG_VALUE 1
+#define Pmode SImode
+#define FUNCTION_MODE QImode
+
+#define CASE_VECTOR_MODE Pmode
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define LOAD_EXTEND_OP(MODE) (ZERO_EXTEND)
+
+#define WORD_REGISTER_OPERATIONS
+
+#endif /* GCC_NIOS2_H */
diff --git a/gcc-4.9/gcc/config/nios2/nios2.md b/gcc-4.9/gcc/config/nios2/nios2.md
new file mode 100644
index 000000000..b5b599ece
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/nios2.md
@@ -0,0 +1,1030 @@
+;; Machine Description for Altera Nios II.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Jonah Graham (jgraham@altera.com) and 
+;; Will Reece (wreece@altera.com).
+;; Contributed by Mentor Graphics, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register numbers
+(define_constants
+  [
+   (FIRST_RETVAL_REGNO     2)	; Return value registers
+   (LAST_RETVAL_REGNO      3)	;
+   (FIRST_ARG_REGNO        4)	; Argument registers
+   (LAST_ARG_REGNO         7)	;
+
+   (TP_REGNO              23)	; Thread pointer register
+   (GP_REGNO	          26)	; Global pointer register
+   (FP_REGNO	          28)	; Frame pointer register
+   (EA_REGNO	          29)	; Exception return address register
+   (RA_REGNO              31)	; Return address register
+   (LAST_GP_REG           31)	; Last general purpose register
+
+   ;; Target register definitions
+   (STATIC_CHAIN_REGNUM        12)
+   (STACK_POINTER_REGNUM       27)
+   (HARD_FRAME_POINTER_REGNUM  28)
+   (PC_REGNUM                  37)
+   (FRAME_POINTER_REGNUM       38)
+   (ARG_POINTER_REGNUM         39)
+   (FIRST_PSEUDO_REGISTER      40)
+  ]
+)
+
+;; Enumeration of UNSPECs
+
+(define_c_enum "unspecv" [
+  UNSPECV_BLOCKAGE
+  UNSPECV_WRCTL
+  UNSPECV_RDCTL
+  UNSPECV_FWRX
+  UNSPECV_FWRY
+  UNSPECV_FRDXLO
+  UNSPECV_FRDXHI
+  UNSPECV_FRDY
+  UNSPECV_CUSTOM_NXX
+  UNSPECV_CUSTOM_XNXX
+  UNSPECV_LDXIO
+  UNSPECV_STXIO
+])
+
+(define_c_enum "unspec" [
+  UNSPEC_FCOS
+  UNSPEC_FSIN
+  UNSPEC_FTAN
+  UNSPEC_FATAN
+  UNSPEC_FEXP
+  UNSPEC_FLOG
+  UNSPEC_LOAD_GOT_REGISTER
+  UNSPEC_PIC_SYM
+  UNSPEC_PIC_CALL_SYM
+  UNSPEC_PIC_GOTOFF_SYM
+  UNSPEC_TLS
+  UNSPEC_TLS_LDM
+  UNSPEC_LOAD_TLS_IE
+  UNSPEC_ADD_TLS_LE
+  UNSPEC_ADD_TLS_GD
+  UNSPEC_ADD_TLS_LDM
+  UNSPEC_ADD_TLS_LDO
+  UNSPEC_EH_RETURN
+  UNSPEC_SYNC
+])
+
+
+;;  Instruction scheduler
+
+; No schedule info is currently available, using an assumption that no
+; instruction can use the results of the previous instruction without
+; incuring a stall.
+
+; length of an instruction (in bytes)
+(define_attr "length" "" (const_int 4))
+(define_attr "type" 
+  "unknown,complex,control,alu,cond_alu,st,ld,shift,mul,div,custom" 
+  (const_string "complex"))
+
+(define_asm_attributes
+ [(set_attr "length" "4")
+  (set_attr "type" "complex")])
+
+(define_automaton "nios2")
+(automata_option "v")
+;(automata_option "no-minimization")
+(automata_option "ndfa")
+
+; The nios2 pipeline is fairly straightforward for the fast model.
+; Every alu operation is pipelined so that an instruction can
+; be issued every cycle.  However, there are still potential
+; stalls which this description tries to deal with.
+
+(define_cpu_unit "cpu" "nios2")
+
+(define_insn_reservation "complex" 1
+  (eq_attr "type" "complex")
+  "cpu")
+
+(define_insn_reservation "control" 1
+  (eq_attr "type" "control")
+  "cpu")
+
+(define_insn_reservation "alu" 1
+  (eq_attr "type" "alu")
+  "cpu")
+
+(define_insn_reservation "cond_alu" 1
+  (eq_attr "type" "cond_alu")
+  "cpu")
+
+(define_insn_reservation "st" 1
+  (eq_attr "type" "st")
+  "cpu")
+  
+(define_insn_reservation "custom" 1
+  (eq_attr "type" "custom")
+  "cpu")
+
+; shifts, muls and lds have three cycle latency
+(define_insn_reservation "ld" 3
+  (eq_attr "type" "ld")
+  "cpu")
+
+(define_insn_reservation "shift" 3
+  (eq_attr "type" "shift")
+  "cpu")
+
+(define_insn_reservation "mul" 3
+  (eq_attr "type" "mul")
+  "cpu")
+
+(define_insn_reservation "div" 1
+  (eq_attr "type" "div")
+  "cpu")
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Move instructions
+
+(define_mode_iterator M [QI HI SI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:M 0 "nonimmediate_operand" "")
+        (match_operand:M 1 "general_operand" ""))]
+  ""
+{
+  if (nios2_emit_move_sequence (operands, <MODE>mode))
+    DONE;
+})
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m, r,r, r")
+        (match_operand:QI 1 "general_operand"       "rM,m,rM,I"))]
+  "(register_operand (operands[0], QImode)
+    || reg_or_0_operand (operands[1], QImode))"
+  "@
+    stb%o0\\t%z1, %0
+    ldbu%o1\\t%0, %1
+    mov\\t%0, %z1
+    movi\\t%0, %1"
+  [(set_attr "type" "st,ld,alu,alu")])
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m, r,r, r")
+        (match_operand:HI 1 "general_operand"       "rM,m,rM,I"))]
+  "(register_operand (operands[0], HImode)
+    || reg_or_0_operand (operands[1], HImode))"
+  "@
+    sth%o0\\t%z1, %0
+    ldhu%o1\\t%0, %1
+    mov\\t%0, %z1
+    movi\\t%0, %1"
+  [(set_attr "type" "st,ld,alu,alu")])
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m, r,r, r,r,r,r,r")
+        (match_operand:SI 1 "general_operand"       "rM,m,rM,I,J,K,S,i"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))"
+  "@
+    stw%o0\\t%z1, %0
+    ldw%o1\\t%0, %1
+    mov\\t%0, %z1
+    movi\\t%0, %1
+    movui\\t%0, %1
+    movhi\\t%0, %H1
+    addi\\t%0, gp, %%gprel(%1)
+    movhi\\t%0, %H1\;addi\\t%0, %0, %L1"
+  [(set_attr "type" "st,ld,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "4,4,4,4,4,4,4,8")])
+
+(define_mode_iterator BH [QI HI])
+(define_mode_iterator BHW [QI HI SI])
+(define_mode_attr bh [(QI "b") (HI "h")])
+(define_mode_attr bhw [(QI "b") (HI "h") (SI "w")])
+(define_mode_attr bhw_uns [(QI "bu") (HI "hu") (SI "w")])
+
+(define_insn "ld<bhw_uns>io"
+  [(set (match_operand:BHW 0 "register_operand" "=r")
+        (unspec_volatile:BHW
+          [(match_operand:BHW 1 "memory_operand" "m")] UNSPECV_LDXIO))]
+  ""
+  "ld<bhw_uns>io\\t%0, %1"
+  [(set_attr "type" "ld")])
+
+(define_expand "ld<bh>io"
+  [(set (match_operand:BH 0 "register_operand" "=r")
+        (match_operand:BH 1 "memory_operand"    "m"))]
+  ""
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_ld<bh>io_signed (tmp, operands[1]));
+  emit_insn (gen_mov<mode> (operands[0], gen_lowpart (<MODE>mode, tmp)));
+  DONE;
+})
+
+(define_insn "ld<bh>io_signed"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extend:SI
+          (unspec_volatile:BH
+            [(match_operand:BH 1 "memory_operand" "m")] UNSPECV_LDXIO)))]
+  ""
+  "ld<bh>io\\t%0, %1"
+  [(set_attr "type" "ld")])
+
+(define_insn "st<bhw>io"
+  [(set (match_operand:BHW 0 "memory_operand" "=m")
+        (unspec_volatile:BHW
+          [(match_operand:BHW 1 "reg_or_0_operand" "rM")] UNSPECV_STXIO))]
+  ""
+  "st<bhw>io\\t%z1, %0"
+  [(set_attr "type" "st")])
+
+
+;; QI to [HI, SI] extension patterns are collected together
+(define_mode_iterator QX [HI SI])
+
+;; Zero extension patterns
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+    andi\\t%0, %1, 0xffff
+    ldhu%o1\\t%0, %1"
+  [(set_attr "type"     "alu,ld")])
+
+(define_insn "zero_extendqi<mode>2"
+  [(set (match_operand:QX 0 "register_operand" "=r,r")
+        (zero_extend:QX (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+    andi\\t%0, %1, 0xff
+    ldbu%o1\\t%0, %1"
+  [(set_attr "type"     "alu,ld")])
+
+;; Sign extension patterns
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                     "=r,r")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand"  "r,m")))]
+  ""
+  "@
+   #
+   ldh%o1\\t%0, %1"
+  [(set_attr "type" "alu,ld")])
+
+(define_insn "extendqi<mode>2"
+  [(set (match_operand:QX 0 "register_operand"                     "=r,r")
+        (sign_extend:QX (match_operand:QI 1 "nonimmediate_operand"  "r,m")))]
+  ""
+  "@
+   #
+   ldb%o1\\t%0, %1"
+  [(set_attr "type" "alu,ld")])
+
+;; Split patterns for register alternative cases.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 0)
+        (and:SI (match_dup 1) (const_int 65535)))
+   (set (match_dup 0)
+        (xor:SI (match_dup 0) (const_int 32768)))
+   (set (match_dup 0)
+        (plus:SI (match_dup 0) (const_int -32768)))]
+  "operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+  [(set (match_operand:QX 0 "register_operand" "")
+        (sign_extend:QX (match_operand:QI 1 "register_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 0)
+        (and:SI (match_dup 1) (const_int 255)))
+   (set (match_dup 0)
+        (xor:SI (match_dup 0) (const_int 128)))
+   (set (match_dup 0)
+        (plus:SI (match_dup 0) (const_int -128)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);")
+
+
+;; Arithmetic Operations
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=r")
+        (plus:SI (match_operand:SI 1 "register_operand"   "%r")
+                 (match_operand:SI 2 "add_regimm_operand" "rIT")))]
+  ""
+  "add%i2\\t%0, %1, %z2"
+  [(set_attr "type" "alu")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+        (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rM")
+                  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\\t%0, %z1, %2"
+  [(set_attr "type" "alu")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (mult:SI (match_operand:SI 1 "register_operand" "%r")
+                 (match_operand:SI 2 "arith_operand"    "rI")))]
+  "TARGET_HAS_MUL"
+  "mul%i2\\t%0, %1, %z2"
+  [(set_attr "type" "mul")])
+
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (div:SI (match_operand:SI 1 "register_operand"   "r")
+                (match_operand:SI 2 "register_operand"   "r")))]
+  ""
+{
+  if (!TARGET_HAS_DIV)
+    {
+      if (TARGET_FAST_SW_DIV)
+        {
+          nios2_emit_expensive_div (operands, SImode);
+          DONE;
+        }
+      else
+        FAIL;
+    }
+})
+
+(define_insn "divsi3_insn"
+  [(set (match_operand:SI 0 "register_operand"            "=r")
+        (div:SI (match_operand:SI 1 "register_operand"     "r")
+                (match_operand:SI 2 "register_operand"     "r")))]
+  "TARGET_HAS_DIV"
+  "div\\t%0, %1, %2"
+  [(set_attr "type" "div")])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand"    "r")
+                 (match_operand:SI 2 "register_operand"    "r")))]
+  "TARGET_HAS_DIV"
+  "divu\\t%0, %1, %2"
+  [(set_attr "type" "div")])
+
+(define_code_iterator EXTEND [sign_extend zero_extend])
+(define_code_attr us [(sign_extend "s") (zero_extend "u")])
+(define_code_attr mul [(sign_extend "mul") (zero_extend "umul")])
+
+(define_insn "<us>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (EXTEND:DI (match_operand:SI 1 "register_operand"  "r"))
+                   (EXTEND:DI (match_operand:SI 2 "register_operand"  "r")))
+          (const_int 32))))]
+  "TARGET_HAS_MULX"
+  "mulx<us><us>\\t%0, %1, %2"
+  [(set_attr "type" "mul")])
+
+(define_expand "<mul>sidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (EXTEND:DI (match_operand:SI 1 "register_operand" ""))
+		 (EXTEND:DI (match_operand:SI 2 "register_operand" ""))))]
+  "TARGET_HAS_MULX"
+{
+  rtx hi = gen_reg_rtx (SImode);
+  rtx lo = gen_reg_rtx (SImode);
+
+  emit_insn (gen_<us>mulsi3_highpart (hi, operands[1], operands[2]));
+  emit_insn (gen_mulsi3 (lo, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (SImode, operands[0]), lo);
+  emit_move_insn (gen_highpart (SImode, operands[0]), hi);
+  DONE;
+})
+
+
+;;  Negate and ones complement
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"        "=r")
+        (neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub\\t%0, zero, %1"
+  [(set_attr "type" "alu")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"        "=r")
+        (not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "nor\\t%0, zero, %1"
+  [(set_attr "type" "alu")])
+
+
+;;  Integer logical Operations
+
+(define_code_iterator LOGICAL [and ior xor])
+(define_code_attr logical_asm [(and "and") (ior "or") (xor "xor")])
+
+(define_insn "<code>si3"
+  [(set (match_operand:SI 0 "register_operand"             "=r,r,r")
+        (LOGICAL:SI (match_operand:SI 1 "register_operand" "%r,r,r")
+                    (match_operand:SI 2 "logical_operand"  "rM,J,K")))]
+  ""
+  "@
+    <logical_asm>\\t%0, %1, %z2
+    <logical_asm>%i2\\t%0, %1, %2
+    <logical_asm>h%i2\\t%0, %1, %U2"
+  [(set_attr "type" "alu")])
+
+(define_insn "*norsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+        (and:SI (not:SI (match_operand:SI 1 "register_operand" "%r"))
+                (not:SI (match_operand:SI 2 "register_operand"  "r"))))]
+  ""
+  "nor\\t%0, %1, %2"
+  [(set_attr "type" "alu")])
+
+
+;;  Shift instructions
+
+(define_code_iterator SHIFT  [ashift ashiftrt lshiftrt rotate])
+(define_code_attr shift_op   [(ashift "ashl") (ashiftrt "ashr")
+                              (lshiftrt "lshr") (rotate "rotl")])
+(define_code_attr shift_asm  [(ashift "sll") (ashiftrt "sra")
+                              (lshiftrt "srl") (rotate "rol")])
+
+(define_insn "<shift_op>si3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (SHIFT:SI (match_operand:SI 1 "register_operand" "r")
+                  (match_operand:SI 2 "shift_operand"    "rL")))]
+  ""
+  "<shift_asm>%i2\\t%0, %1, %z2"
+  [(set_attr "type" "shift")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+        (rotatert:SI (match_operand:SI 1 "register_operand" "r")
+                     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "ror\\t%0, %1, %2"
+  [(set_attr "type" "shift")])
+
+
+;; Floating point instructions
+
+;; Mode iterator for single/double float
+(define_mode_iterator F [SF DF])
+(define_mode_attr f [(SF "s") (DF "d")])
+
+;; Basic arithmetic instructions
+(define_code_iterator FOP3 [plus minus mult div])
+(define_code_attr fop3 [(plus "add") (minus "sub") (mult "mul") (div "div")])
+
+(define_insn "<fop3><mode>3"
+  [(set (match_operand:F 0 "register_operand"        "=r")
+        (FOP3:F (match_operand:F 1 "register_operand" "r")
+                (match_operand:F 2 "register_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_f<fop3><f>)"
+  { return nios2_fpu_insn_asm (n2fpu_f<fop3><f>); }
+  [(set_attr "type" "custom")])
+
+;; Floating point min/max operations
+(define_code_iterator SMINMAX [smin smax])
+(define_code_attr minmax [(smin "min") (smax "max")])
+(define_insn "<code><mode>3"
+  [(set (match_operand:F 0 "register_operand" "=r")
+        (SMINMAX:F (match_operand:F 1 "register_operand" "r")
+                   (match_operand:F 2 "register_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_f<minmax><f>)"
+  { return nios2_fpu_insn_asm (n2fpu_f<minmax><f>); }
+  [(set_attr "type" "custom")])
+
+;; These 2-operand FP operations can be collected together
+(define_code_iterator FOP2 [abs neg sqrt])
+(define_insn "<code><mode>2"
+  [(set (match_operand:F 0 "register_operand" "=r")
+        (FOP2:F (match_operand:F 1 "register_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_f<code><f>)"
+  { return nios2_fpu_insn_asm (n2fpu_f<code><f>); }
+  [(set_attr "type" "custom")])
+
+;; X, Y register access instructions
+(define_insn "nios2_fwrx"
+  [(unspec_volatile [(match_operand:DF 0 "register_operand" "r")] UNSPECV_FWRX)]
+  "nios2_fpu_insn_enabled (n2fpu_fwrx)"
+  { return nios2_fpu_insn_asm (n2fpu_fwrx); }
+  [(set_attr "type" "custom")])
+
+(define_insn "nios2_fwry"
+  [(unspec_volatile [(match_operand:SF 0 "register_operand" "r")] UNSPECV_FWRY)]
+  "nios2_fpu_insn_enabled (n2fpu_fwry)"
+  { return nios2_fpu_insn_asm (n2fpu_fwry); }
+  [(set_attr "type" "custom")])
+
+;; The X, Y read insns uses an int iterator
+(define_int_iterator UNSPEC_READ_XY [UNSPECV_FRDXLO UNSPECV_FRDXHI
+                                     UNSPECV_FRDY])
+(define_int_attr read_xy [(UNSPECV_FRDXLO "frdxlo") (UNSPECV_FRDXHI "frdxhi")
+                          (UNSPECV_FRDY "frdy")])
+(define_insn "nios2_<read_xy>"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unspec_volatile:SF [(const_int 0)] UNSPEC_READ_XY))]
+  "nios2_fpu_insn_enabled (n2fpu_<read_xy>)"
+  { return nios2_fpu_insn_asm (n2fpu_<read_xy>); }
+  [(set_attr "type" "custom")])
+
+;; Various math functions
+(define_int_iterator MATHFUNC
+  [UNSPEC_FCOS UNSPEC_FSIN UNSPEC_FTAN UNSPEC_FATAN UNSPEC_FEXP UNSPEC_FLOG])
+(define_int_attr mathfunc [(UNSPEC_FCOS "cos") (UNSPEC_FSIN "sin")
+                           (UNSPEC_FTAN "tan") (UNSPEC_FATAN "atan")
+                           (UNSPEC_FEXP "exp") (UNSPEC_FLOG "log")])
+
+(define_insn "<mathfunc><mode>2"
+  [(set (match_operand:F 0 "register_operand" "=r")
+        (unspec:F [(match_operand:F 1 "register_operand" "r")] MATHFUNC))]
+  "nios2_fpu_insn_enabled (n2fpu_f<mathfunc><f>)"
+  { return nios2_fpu_insn_asm (n2fpu_f<mathfunc><f>); }
+  [(set_attr "type" "custom")])
+
+;; Converting between floating point and fixed point
+
+(define_code_iterator FLOAT [float unsigned_float])
+(define_code_iterator FIX [fix unsigned_fix])
+
+(define_code_attr conv_op [(float "float") (unsigned_float "floatuns")
+                           (fix "fix") (unsigned_fix "fixuns")])
+(define_code_attr i [(float "i") (unsigned_float "u")
+                     (fix "i") (unsigned_fix "u")])
+
+;; Integer to float conversions
+(define_insn "<conv_op>si<mode>2"
+  [(set (match_operand:F 0 "register_operand" "=r")
+        (FLOAT:F (match_operand:SI 1 "register_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_float<i><f>)"
+  { return nios2_fpu_insn_asm (n2fpu_float<i><f>); }
+  [(set_attr "type" "custom")])
+
+;; Float to integer conversions
+(define_insn "<conv_op>_trunc<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (FIX:SI (match_operand:F 1 "general_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_fix<f><i>)"
+  { return nios2_fpu_insn_asm (n2fpu_fix<f><i>); }
+  [(set_attr "type" "custom")])
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+        (float_extend:DF (match_operand:SF 1 "general_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_fextsd)"
+  { return nios2_fpu_insn_asm (n2fpu_fextsd); }
+  [(set_attr "type" "custom")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (float_truncate:SF (match_operand:DF 1 "general_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_ftruncds)"
+  { return nios2_fpu_insn_asm (n2fpu_ftruncds); }
+  [(set_attr "type" "custom")])
+
+
+
+;; Prologue, Epilogue and Return
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  nios2_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  nios2_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  nios2_expand_epilogue (true);
+  DONE;
+})
+
+(define_insn "return"
+  [(simple_return)]
+  "nios2_can_use_return_insn ()"
+  "ret")
+
+(define_insn "simple_return"
+  [(simple_return)]
+  ""
+  "ret")
+
+;; Block any insns from being moved before this point, since the
+;; profiling call to mcount can use various registers that aren't
+;; saved or used to pass arguments.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "unknown")
+   (set_attr "length" "0")])
+
+;; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  if (GET_MODE (operands[0]) != Pmode)
+    operands[0] = convert_to_mode (Pmode, operands[0], 0);
+  emit_insn (gen_eh_set_ra (operands[0]));
+  DONE;
+})
+
+;; Modify the return address for EH return.  We can't expand this
+;; until we know where it will be put in the stack frame.
+
+(define_insn_and_split "eh_set_ra"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  nios2_set_return_address (operands[0], operands[1]);
+  DONE;
+})
+
+
+;;  Jumps and calls
+
+; Note that the assembler fixes up any out-of-range branch instructions not
+; caught by the compiler branch shortening code.  The sequence emitted by
+; the assembler can be very inefficient, but it is correct for PIC code.
+; For non-PIC we are better off converting to an absolute JMPI.
+;
+; Direct calls and sibcalls use the CALL and JMPI instructions, respectively.
+; These instructions have an immediate operand that specifies the low 28 bits
+; of the PC, effectively allowing direct calls within a 256MB memory segment.
+; Per the Nios II Processor Reference Handbook, the linker is not required to
+; check or adjust for overflow.
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp\\t%0"
+  [(set_attr "type" "control")])
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+  {
+    if (flag_pic || get_attr_length (insn) == 4)
+      return "br\\t%0";
+    else
+      return "jmpi\\t%0";
+  }
+  [(set_attr "type" "control")
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 0) (pc)) (const_int -32768))
+	         (le (minus (match_dup 0) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))])
+
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (match_operand 1 "" ""))
+              (clobber (reg:SI RA_REGNO))])]
+  ""
+  "nios2_adjust_call_address (&operands[0]);")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "")
+                         (match_operand 2 "" "")))
+              (clobber (reg:SI RA_REGNO))])]
+  ""
+  "nios2_adjust_call_address (&operands[1]);")
+
+(define_insn "*call"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "i,r"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RA_REGNO))]
+  ""
+  "@
+   call\\t%0
+   callr\\t%0"
+  [(set_attr "type" "control")])
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand:SI 1 "call_operand" "i,r"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RA_REGNO))]
+  ""
+  "@
+   call\\t%1
+   callr\\t%1"
+  [(set_attr "type" "control")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "" "")
+                    (match_operand 1 "" ""))
+              (return)])]
+  ""
+  "nios2_adjust_call_address (&operands[0]);")
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "")
+                         (match_operand 2 "" "")))
+              (return)])]
+  ""
+  "nios2_adjust_call_address (&operands[1]);")
+
+(define_insn "*sibcall"
+ [(call (mem:QI (match_operand:SI 0 "call_operand" "i,j"))
+        (match_operand 1 "" ""))
+  (return)]
+  ""
+  "@
+   jmpi\\t%0
+   jmp\\t%0"
+  [(set_attr "type" "control")])
+
+(define_insn "*sibcall_value"
+ [(set (match_operand 0 "register_operand" "")
+       (call (mem:QI (match_operand:SI 1 "call_operand" "i,j"))
+             (match_operand 2 "" "")))
+  (return)]
+  ""
+  "@
+   jmpi\\t%1
+   jmp\\t%1"
+  [(set_attr "type" "control")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
+              (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  if (flag_pic)
+    {
+      /* Hopefully, CSE will eliminate this copy.  */
+      rtx reg1 = copy_addr_to_reg (gen_rtx_LABEL_REF (Pmode, operands[1]));
+      rtx reg2 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg2, operands[0], reg1));
+      operands[0] = reg2;
+    }
+})
+
+(define_insn "*tablejump"
+  [(set (pc)
+        (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp\\t%0"
+  [(set_attr "type" "control")])
+
+
+;; cstore, cbranch patterns
+
+(define_mode_iterator CM [SI SF DF])
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operator:SI 1 "expandable_comparison_operator"
+	  [(match_operand:CM 2 "register_operand")
+	   (match_operand:CM 3 "nonmemory_operand")]))]
+  ""
+{
+  if (!nios2_validate_compare (<MODE>mode, &operands[1], &operands[2],
+                               &operands[3]))
+    FAIL;
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+     (if_then_else
+       (match_operator 0 "expandable_comparison_operator"
+         [(match_operand:CM 1 "register_operand")
+          (match_operand:CM 2 "nonmemory_operand")])
+       (label_ref (match_operand 3 ""))
+       (pc)))]
+  ""
+{
+  if (!nios2_validate_compare (<MODE>mode, &operands[0], &operands[1],
+                               &operands[2]))
+    FAIL;
+  if (GET_MODE_CLASS (<MODE>mode) == MODE_FLOAT
+      || !reg_or_0_operand (operands[2], <MODE>mode))
+    {
+      rtx condreg = gen_reg_rtx (SImode);
+      emit_insn (gen_cstore<mode>4
+                  (condreg, operands[0], operands[1], operands[2]));
+      operands[1] = condreg;
+      operands[2] = const0_rtx;
+      operands[0] = gen_rtx_fmt_ee (NE, VOIDmode, condreg, const0_rtx);
+    }
+})
+
+(define_insn "nios2_cbranch"
+  [(set (pc)
+     (if_then_else
+       (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SI 1 "reg_or_0_operand" "rM")
+          (match_operand:SI 2 "reg_or_0_operand" "rM")])
+       (label_ref (match_operand 3 "" ""))
+       (pc)))]
+  ""
+  {
+    if (flag_pic || get_attr_length (insn) == 4)
+      return "b%0\t%z1, %z2, %l3";
+    else
+      return "b%R0\t%z1, %z2, .+8;jmpi\t%l3";
+  }
+  [(set_attr "type" "control")
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
+	         (le (minus (match_dup 1) (pc)) (const_int 32764)))
+	    (const_int 4) (const_int 8)))])
+
+;; Floating point comparisons
+(define_code_iterator FCMP [eq ne gt ge le lt])
+(define_insn "nios2_s<code><mode>"
+  [(set (match_operand:SI 0 "register_operand"        "=r")
+        (FCMP:SI (match_operand:F 1 "register_operand" "r")
+                 (match_operand:F 2 "register_operand" "r")))]
+  "nios2_fpu_insn_enabled (n2fpu_fcmp<code><f>)"
+  { return nios2_fpu_insn_asm (n2fpu_fcmp<code><f>); }
+  [(set_attr "type" "custom")])
+
+;; Integer comparisons
+
+(define_code_iterator EQNE [eq ne])
+(define_insn "nios2_cmp<code>"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+        (EQNE:SI (match_operand:SI 1 "register_operand"  "%r")
+                 (match_operand:SI 2 "arith_operand"     "rI")))]
+  ""
+  "cmp<code>%i2\\t%0, %1, %z2"
+  [(set_attr "type" "alu")])
+
+(define_code_iterator SCMP [ge lt])
+(define_insn "nios2_cmp<code>"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+        (SCMP:SI (match_operand:SI 1 "reg_or_0_operand"  "rM")
+                 (match_operand:SI 2 "arith_operand"     "rI")))]
+  ""
+  "cmp<code>%i2\\t%0, %z1, %z2"
+  [(set_attr "type" "alu")])
+
+(define_code_iterator UCMP [geu ltu])
+(define_insn "nios2_cmp<code>"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+        (UCMP:SI (match_operand:SI 1 "reg_or_0_operand"  "rM")
+                 (match_operand:SI 2 "uns_arith_operand" "rJ")))]
+  ""
+  "cmp<code>%i2\\t%0, %z1, %z2"
+  [(set_attr "type" "alu")])
+
+
+
+;; Custom instruction patterns.  The operands are intentionally
+;; mode-less, to serve as generic carriers of all Altera defined
+;; built-in instruction/function types.
+
+(define_insn "custom_nxx"
+  [(unspec_volatile [(match_operand 0 "custom_insn_opcode" "N")
+                     (match_operand 1 "reg_or_0_operand"  "rM")
+                     (match_operand 2 "reg_or_0_operand"  "rM")]
+    UNSPECV_CUSTOM_NXX)]
+  ""
+  "custom\\t%0, zero, %z1, %z2"
+  [(set_attr "type" "custom")])
+
+(define_insn "custom_xnxx"
+  [(set (match_operand 0 "register_operand"   "=r")
+        (unspec_volatile [(match_operand 1 "custom_insn_opcode" "N")
+                          (match_operand 2 "reg_or_0_operand"  "rM")
+                          (match_operand 3 "reg_or_0_operand"  "rM")] 
+	 UNSPECV_CUSTOM_XNXX))]
+  ""
+  "custom\\t%1, %0, %z2, %z3"
+  [(set_attr "type" "custom")])
+
+
+;;  Misc. patterns
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "alu")])
+
+;; Connect 'sync' to 'memory_barrier' standard expand name
+(define_expand "memory_barrier"
+  [(const_int 0)]
+  ""
+{
+  emit_insn (gen_sync ());
+  DONE;
+})
+
+;; For the nios2 __builtin_sync built-in function
+(define_expand "sync"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sync_insn"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
+  ""
+  "sync"
+  [(set_attr "type" "control")])
+
+(define_insn "rdctl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "rdwrctl_operand" "O")] 
+	 UNSPECV_RDCTL))]
+  ""
+  "rdctl\\t%0, ctl%1"
+  [(set_attr "type" "control")])
+
+(define_insn "wrctl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "rdwrctl_operand"  "O")
+                        (match_operand:SI 1 "reg_or_0_operand" "rM")] 
+    UNSPECV_WRCTL)]
+  ""
+  "wrctl\\tctl%0, %z1"
+  [(set_attr "type" "control")])
+
+;; Trap patterns
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 3))]
+  ""
+  "break\\t3"
+  [(set_attr "type" "control")])
+
+(define_insn "ctrapsi4"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+              [(match_operand:SI 1 "reg_or_0_operand" "rM")
+               (match_operand:SI 2 "reg_or_0_operand" "rM")])
+            (match_operand 3 "const_int_operand" "i"))]
+  ""
+  "b%R0\\t%z1, %z2, 1f\;break\\t%3\;1:"
+  [(set_attr "type" "control")
+   (set_attr "length" "8")])
+  
+;; Load the GOT register.
+(define_insn "load_got_register"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	 (unspec:SI [(const_int 0)] UNSPEC_LOAD_GOT_REGISTER))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	 (unspec:SI [(const_int 0)] UNSPEC_LOAD_GOT_REGISTER))]
+  ""
+  "nextpc\\t%0
+\\t1:
+\\tmovhi\\t%1, %%hiadj(_gp_got - 1b)
+\\taddi\\t%1, %1, %%lo(_gp_got - 1b)"
+  [(set_attr "length" "12")])
+
+;; Read thread pointer register
+(define_expand "get_thread_pointersi"
+  [(match_operand:SI 0 "register_operand" "=r")]
+  "TARGET_LINUX_ABI"
+{
+  emit_move_insn (operands[0], gen_rtx_REG (Pmode, TP_REGNO));
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/nios2/nios2.opt b/gcc-4.9/gcc/config/nios2/nios2.opt
new file mode 100644
index 000000000..4ca8e90b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/nios2.opt
@@ -0,0 +1,531 @@
+; Options for the Altera Nios II port of the compiler.
+; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+; Contributed by Altera and Mentor Graphics, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 3, or (at your option)
+; any later version.
+;
+; GCC is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/nios2/nios2-opts.h
+
+TargetSave
+int saved_fpu_custom_code[n2fpu_code_num]
+
+TargetSave
+enum nios2_ccs_code saved_custom_code_status[256]
+
+TargetSave
+int saved_custom_code_index[256]
+
+mhw-div
+Target Report Mask(HAS_DIV)
+Enable DIV, DIVU
+
+mhw-mul
+Target Report Mask(HAS_MUL)
+Enable MUL instructions
+
+mhw-mulx
+Target Report Mask(HAS_MULX)
+Enable MULX instructions, assume fast shifter
+
+mfast-sw-div
+Target Report Mask(FAST_SW_DIV)
+Use table based fast divide (default at -O3)
+
+mbypass-cache
+Target Report Mask(BYPASS_CACHE)
+All memory accesses use I/O load/store instructions
+
+mno-cache-volatile
+Target Report RejectNegative Mask(BYPASS_CACHE_VOLATILE)
+Volatile memory accesses use I/O load/store instructions
+
+mcache-volatile
+Target Report RejectNegative Undocumented InverseMask(BYPASS_CACHE_VOLATILE)
+Volatile memory accesses do not use I/O load/store instructions
+
+mgpopt
+Target Report Var(TARGET_GPOPT) Init(-1)
+Use GP-relative addressing to access small data
+
+meb
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Use big-endian byte order
+
+mel
+Target Report RejectNegative InverseMask(BIG_ENDIAN)
+Use little-endian byte order
+
+mcustom-fpu-cfg=
+Target RejectNegative Joined Var(nios2_custom_fpu_cfg_string)
+Floating point custom instruction configuration name
+
+mno-custom-ftruncds
+Target Report RejectNegative Var(nios2_custom_ftruncds, -1)
+Do not use the ftruncds custom instruction
+
+mcustom-ftruncds=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_ftruncds) Init(-1)
+Integer id (N) of ftruncds custom instruction
+
+mno-custom-fextsd
+Target Report RejectNegative Var(nios2_custom_fextsd, -1)
+Do not use the fextsd custom instruction
+
+mcustom-fextsd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fextsd) Init(-1)
+Integer id (N) of fextsd custom instruction
+
+mno-custom-fixdu
+Target Report RejectNegative Var(nios2_custom_fixdu, -1)
+Do not use the fixdu custom instruction
+
+mcustom-fixdu=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fixdu) Init(-1)
+Integer id (N) of fixdu custom instruction
+
+mno-custom-fixdi
+Target Report RejectNegative Var(nios2_custom_fixdi, -1)
+Do not use the fixdi custom instruction
+
+mcustom-fixdi=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fixdi) Init(-1)
+Integer id (N) of fixdi custom instruction
+
+mno-custom-fixsu
+Target Report RejectNegative Var(nios2_custom_fixsu, -1)
+Do not use the fixsu custom instruction
+
+mcustom-fixsu=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fixsu) Init(-1)
+Integer id (N) of fixsu custom instruction
+
+mno-custom-fixsi
+Target Report RejectNegative Var(nios2_custom_fixsi, -1)
+Do not use the fixsi custom instruction
+
+mcustom-fixsi=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fixsi) Init(-1)
+Integer id (N) of fixsi custom instruction
+
+mno-custom-floatud
+Target Report RejectNegative Var(nios2_custom_floatud, -1)
+Do not use the floatud custom instruction
+
+mcustom-floatud=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_floatud) Init(-1)
+Integer id (N) of floatud custom instruction
+
+mno-custom-floatid
+Target Report RejectNegative Var(nios2_custom_floatid, -1)
+Do not use the floatid custom instruction
+
+mcustom-floatid=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_floatid) Init(-1)
+Integer id (N) of floatid custom instruction
+
+mno-custom-floatus
+Target Report RejectNegative Var(nios2_custom_floatus, -1)
+Do not use the floatus custom instruction
+
+mcustom-floatus=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_floatus) Init(-1)
+Integer id (N) of floatus custom instruction
+
+mno-custom-floatis
+Target Report RejectNegative Var(nios2_custom_floatis, -1)
+Do not use the floatis custom instruction
+
+mcustom-floatis=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_floatis) Init(-1)
+Integer id (N) of floatis custom instruction
+
+mno-custom-fcmpned
+Target Report RejectNegative Var(nios2_custom_fcmpned, -1)
+Do not use the fcmpned custom instruction
+
+mcustom-fcmpned=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpned) Init(-1)
+Integer id (N) of fcmpned custom instruction
+
+mno-custom-fcmpeqd
+Target Report RejectNegative Var(nios2_custom_fcmpeqd, -1)
+Do not use the fcmpeqd custom instruction
+
+mcustom-fcmpeqd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpeqd) Init(-1)
+Integer id (N) of fcmpeqd custom instruction
+
+mno-custom-fcmpged
+Target Report RejectNegative Var(nios2_custom_fcmpged, -1)
+Do not use the fcmpged custom instruction
+
+mcustom-fcmpged=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpged) Init(-1)
+Integer id (N) of fcmpged custom instruction
+
+mno-custom-fcmpgtd
+Target Report RejectNegative Var(nios2_custom_fcmpgtd, -1)
+Do not use the fcmpgtd custom instruction
+
+mcustom-fcmpgtd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpgtd) Init(-1)
+Integer id (N) of fcmpgtd custom instruction
+
+mno-custom-fcmpled
+Target Report RejectNegative Var(nios2_custom_fcmpled, -1)
+Do not use the fcmpled custom instruction
+
+mcustom-fcmpled=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpled) Init(-1)
+Integer id (N) of fcmpled custom instruction
+
+mno-custom-fcmpltd
+Target Report RejectNegative Var(nios2_custom_fcmpltd, -1)
+Do not use the fcmpltd custom instruction
+
+mcustom-fcmpltd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpltd) Init(-1)
+Integer id (N) of fcmpltd custom instruction
+
+mno-custom-flogd
+Target Report RejectNegative Var(nios2_custom_flogd, -1)
+Do not use the flogd custom instruction
+
+mcustom-flogd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_flogd) Init(-1)
+Integer id (N) of flogd custom instruction
+
+mno-custom-fexpd
+Target Report RejectNegative Var(nios2_custom_fexpd, -1)
+Do not use the fexpd custom instruction
+
+mcustom-fexpd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fexpd) Init(-1)
+Integer id (N) of fexpd custom instruction
+
+mno-custom-fatand
+Target Report RejectNegative Var(nios2_custom_fatand, -1)
+Do not use the fatand custom instruction
+
+mcustom-fatand=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fatand) Init(-1)
+Integer id (N) of fatand custom instruction
+
+mno-custom-ftand
+Target Report RejectNegative Var(nios2_custom_ftand, -1)
+Do not use the ftand custom instruction
+
+mcustom-ftand=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_ftand) Init(-1)
+Integer id (N) of ftand custom instruction
+
+mno-custom-fsind
+Target Report RejectNegative Var(nios2_custom_fsind, -1)
+Do not use the fsind custom instruction
+
+mcustom-fsind=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fsind) Init(-1)
+Integer id (N) of fsind custom instruction
+
+mno-custom-fcosd
+Target Report RejectNegative Var(nios2_custom_fcosd, -1)
+Do not use the fcosd custom instruction
+
+mcustom-fcosd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcosd) Init(-1)
+Integer id (N) of fcosd custom instruction
+
+mno-custom-fsqrtd
+Target Report RejectNegative Var(nios2_custom_fsqrtd, -1)
+Do not use the fsqrtd custom instruction
+
+mcustom-fsqrtd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fsqrtd) Init(-1)
+Integer id (N) of fsqrtd custom instruction
+
+mno-custom-fabsd
+Target Report RejectNegative Var(nios2_custom_fabsd, -1)
+Do not use the fabsd custom instruction
+
+mcustom-fabsd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fabsd) Init(-1)
+Integer id (N) of fabsd custom instruction
+
+mno-custom-fnegd
+Target Report RejectNegative Var(nios2_custom_fnegd, -1)
+Do not use the fnegd custom instruction
+
+mcustom-fnegd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fnegd) Init(-1)
+Integer id (N) of fnegd custom instruction
+
+mno-custom-fmaxd
+Target Report RejectNegative Var(nios2_custom_fmaxd, -1)
+Do not use the fmaxd custom instruction
+
+mcustom-fmaxd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fmaxd) Init(-1)
+Integer id (N) of fmaxd custom instruction
+
+mno-custom-fmind
+Target Report RejectNegative Var(nios2_custom_fmind, -1)
+Do not use the fmind custom instruction
+
+mcustom-fmind=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fmind) Init(-1)
+Integer id (N) of fmind custom instruction
+
+mno-custom-fdivd
+Target Report RejectNegative Var(nios2_custom_fdivd, -1)
+Do not use the fdivd custom instruction
+
+mcustom-fdivd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fdivd) Init(-1)
+Integer id (N) of fdivd custom instruction
+
+mno-custom-fmuld
+Target Report RejectNegative Var(nios2_custom_fmuld, -1)
+Do not use the fmuld custom instruction
+
+mcustom-fmuld=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fmuld) Init(-1)
+Integer id (N) of fmuld custom instruction
+
+mno-custom-fsubd
+Target Report RejectNegative Var(nios2_custom_fsubd, -1)
+Do not use the fsubd custom instruction
+
+mcustom-fsubd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fsubd) Init(-1)
+Integer id (N) of fsubd custom instruction
+
+mno-custom-faddd
+Target Report RejectNegative Var(nios2_custom_faddd, -1)
+Do not use the faddd custom instruction
+
+mcustom-faddd=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_faddd) Init(-1)
+Integer id (N) of faddd custom instruction
+
+mno-custom-fcmpnes
+Target Report RejectNegative Var(nios2_custom_fcmpnes, -1)
+Do not use the fcmpnes custom instruction
+
+mcustom-fcmpnes=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpnes) Init(-1)
+Integer id (N) of fcmpnes custom instruction
+
+mno-custom-fcmpeqs
+Target Report RejectNegative Var(nios2_custom_fcmpeqs, -1)
+Do not use the fcmpeqs custom instruction
+
+mcustom-fcmpeqs=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpeqs) Init(-1)
+Integer id (N) of fcmpeqs custom instruction
+
+mno-custom-fcmpges
+Target Report RejectNegative Var(nios2_custom_fcmpges, -1)
+Do not use the fcmpges custom instruction
+
+mcustom-fcmpges=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpges) Init(-1)
+Integer id (N) of fcmpges custom instruction
+
+mno-custom-fcmpgts
+Target Report RejectNegative Var(nios2_custom_fcmpgts, -1)
+Do not use the fcmpgts custom instruction
+
+mcustom-fcmpgts=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmpgts) Init(-1)
+Integer id (N) of fcmpgts custom instruction
+
+mno-custom-fcmples
+Target Report RejectNegative Var(nios2_custom_fcmples, -1)
+Do not use the fcmples custom instruction
+
+mcustom-fcmples=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmples) Init(-1)
+Integer id (N) of fcmples custom instruction
+
+mno-custom-fcmplts
+Target Report RejectNegative Var(nios2_custom_fcmplts, -1)
+Do not use the fcmplts custom instruction
+
+mcustom-fcmplts=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcmplts) Init(-1)
+Integer id (N) of fcmplts custom instruction
+
+mno-custom-flogs
+Target Report RejectNegative Var(nios2_custom_flogs, -1)
+Do not use the flogs custom instruction
+
+mcustom-flogs=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_flogs) Init(-1)
+Integer id (N) of flogs custom instruction
+
+mno-custom-fexps
+Target Report RejectNegative Var(nios2_custom_fexps, -1)
+Do not use the fexps custom instruction
+
+mcustom-fexps=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fexps) Init(-1)
+Integer id (N) of fexps custom instruction
+
+mno-custom-fatans
+Target Report RejectNegative Var(nios2_custom_fatans, -1)
+Do not use the fatans custom instruction
+
+mcustom-fatans=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fatans) Init(-1)
+Integer id (N) of fatans custom instruction
+
+mno-custom-ftans
+Target Report RejectNegative Var(nios2_custom_ftans, -1)
+Do not use the ftans custom instruction
+
+mcustom-ftans=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_ftans) Init(-1)
+Integer id (N) of ftans custom instruction
+
+mno-custom-fsins
+Target Report RejectNegative Var(nios2_custom_fsins, -1)
+Do not use the fsins custom instruction
+
+mcustom-fsins=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fsins) Init(-1)
+Integer id (N) of fsins custom instruction
+
+mno-custom-fcoss
+Target Report RejectNegative Var(nios2_custom_fcoss, -1)
+Do not use the fcoss custom instruction
+
+mcustom-fcoss=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fcoss) Init(-1)
+Integer id (N) of fcoss custom instruction
+
+mno-custom-fsqrts
+Target Report RejectNegative Var(nios2_custom_fsqrts, -1)
+Do not use the fsqrts custom instruction
+
+mcustom-fsqrts=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fsqrts) Init(-1)
+Integer id (N) of fsqrts custom instruction
+
+mno-custom-fabss
+Target Report RejectNegative Var(nios2_custom_fabss, -1)
+Do not use the fabss custom instr
+
+mcustom-fabss=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fabss) Init(-1)
+Integer id (N) of fabss custom instruction
+
+mno-custom-fnegs
+Target Report RejectNegative Var(nios2_custom_fnegs, -1)
+Do not use the fnegs custom instruction
+
+mcustom-fnegs=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fnegs) Init(-1)
+Integer id (N) of fnegs custom instruction
+
+mno-custom-fmaxs
+Target Report RejectNegative Var(nios2_custom_fmaxs, -1)
+Do not use the fmaxs custom instruction
+
+mcustom-fmaxs=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fmaxs) Init(-1)
+Integer id (N) of fmaxs custom instruction
+
+mno-custom-fmins
+Target Report RejectNegative Var(nios2_custom_fmins, -1)
+Do not use the fmins custom instruction
+
+mcustom-fmins=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fmins) Init(-1)
+Integer id (N) of fmins custom instruction
+
+mno-custom-fdivs
+Target Report RejectNegative Var(nios2_custom_fdivs, -1)
+Do not use the fdivs custom instruction
+
+mcustom-fdivs=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fdivs) Init(-1)
+Integer id (N) of fdivs custom instruction
+
+mno-custom-fmuls
+Target Report RejectNegative Var(nios2_custom_fmuls, -1)
+Do not use the fmuls custom instruction
+
+mcustom-fmuls=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fmuls) Init(-1)
+Integer id (N) of fmuls custom instruction
+
+mno-custom-fsubs
+Target Report RejectNegative Var(nios2_custom_fsubs, -1)
+Do not use the fsubs custom instruction
+
+mcustom-fsubs=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fsubs) Init(-1)
+Integer id (N) of fsubs custom instruction
+
+mno-custom-fadds
+Target Report RejectNegative Var(nios2_custom_fadds, -1)
+Do not use the fadds custom instruction
+
+mcustom-fadds=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fadds) Init(-1)
+Integer id (N) of fadds custom instruction
+
+mno-custom-frdy
+Target Report RejectNegative Var(nios2_custom_frdy, -1)
+Do not use the frdy custom instruction
+
+mcustom-frdy=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_frdy) Init(-1)
+Integer id (N) of frdy custom instruction
+
+mno-custom-frdxhi
+Target Report RejectNegative Var(nios2_custom_frdxhi, -1)
+Do not use the frdxhi custom instruction
+
+mcustom-frdxhi=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_frdxhi) Init(-1)
+Integer id (N) of frdxhi custom instruction
+
+mno-custom-frdxlo
+Target Report RejectNegative Var(nios2_custom_frdxlo, -1)
+Do not use the frdxlo custom instruction
+
+mcustom-frdxlo=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_frdxlo) Init(-1)
+Integer id (N) of frdxlo custom instruction
+
+mno-custom-fwry
+Target Report RejectNegative Var(nios2_custom_fwry, -1)
+Do not use the fwry custom instruction
+
+mcustom-fwry=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fwry) Init(-1)
+Integer id (N) of fwry custom instruction
+
+mno-custom-fwrx
+Target Report RejectNegative Var(nios2_custom_fwrx, -1)
+Do not use the fwrx custom instruction
+
+mcustom-fwrx=
+Target Report RejectNegative Joined UInteger Var(nios2_custom_fwrx) Init(-1)
+Integer id (N) of fwrx custom instruction
diff --git a/gcc-4.9/gcc/config/nios2/predicates.md b/gcc-4.9/gcc/config/nios2/predicates.md
new file mode 100644
index 000000000..1c3b2553b
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/predicates.md
@@ -0,0 +1,85 @@
+;; Predicate definitions for Altera Nios II.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Chung-Lin Tang <cltang@codesourcery.com>
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "const_0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "const_0_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_uns_arith_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_INT_UNSIGNED (INTVAL (op))")))
+
+(define_predicate "uns_arith_operand"
+  (ior (match_operand 0 "const_uns_arith_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_arith_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_INT (INTVAL (op))")))
+
+(define_predicate "arith_operand"
+  (ior (match_operand 0 "const_arith_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "add_regimm_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_test "nios2_unspec_reloc_p (op)")))
+
+(define_predicate "const_logical_operand"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) & 0xffff) == 0
+                    || (INTVAL (op) & 0xffff0000) == 0")))
+
+(define_predicate "logical_operand"
+  (ior (match_operand 0 "const_logical_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_shift_operand"
+  (and (match_code "const_int")
+       (match_test "SHIFT_INT (INTVAL (op))")))
+
+(define_predicate "shift_operand"
+  (ior (match_operand 0 "const_shift_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "call_operand"
+  (ior (match_operand 0 "immediate_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "rdwrctl_operand"
+  (and (match_code "const_int")
+       (match_test "RDWRCTL_INT (INTVAL (op))")))
+
+(define_predicate "custom_insn_opcode"
+  (and (match_code "const_int")
+       (match_test "CUSTOM_INSN_OPCODE (INTVAL (op))")))
+
+(define_special_predicate "expandable_comparison_operator"
+  (match_operand 0 "ordered_comparison_operator")
+{
+  return (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) != MODE_FLOAT
+          || nios2_validate_fpu_compare (GET_MODE (XEXP (op, 0)), &op,
+                                         &XEXP (op, 0), &XEXP (op, 1),
+                                         false));
+})
diff --git a/gcc-4.9/gcc/config/nios2/t-nios2 b/gcc-4.9/gcc/config/nios2/t-nios2
new file mode 100644
index 000000000..d5f25b353
--- /dev/null
+++ b/gcc-4.9/gcc/config/nios2/t-nios2
@@ -0,0 +1,27 @@
+# Target Makefile Fragment for Altera Nios II.
+# Copyright (C) 2013-2014 Free Software Foundation, Inc.
+# Contributed by Altera and Mentor Graphics, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# MULTILIB_OPTIONS = mno-hw-mul/mhw-mulx mcustom-fpu-cfg=60-1/mcustom-fpu-cfg=60-2
+# MULTILIB_DIRNAMES = nomul mulx fpu-60-1 fpu-60-2
+# MULTILIB_EXCEPTIONS = 
+
+# MULTILIB_OPTIONS += EL/EB
+# MULTILIB_DIRNAMES += le be
+# MULTILIB_MATCHES += EL=mel EB=meb
diff --git a/gcc-4.9/gcc/config/openbsd-libpthread.h b/gcc-4.9/gcc/config/openbsd-libpthread.h
new file mode 100644
index 000000000..e933a782e
--- /dev/null
+++ b/gcc-4.9/gcc/config/openbsd-libpthread.h
@@ -0,0 +1,22 @@
+/* LIB_SPEC appropriate for OpenBSD.  Include -lpthread if -pthread is
+   specified on the command line. */
+/*   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define OBSD_LIB_SPEC "%{!shared:%{pthread:-lpthread} -lc}"
+
diff --git a/gcc-4.9/gcc/config/openbsd-oldgas.h b/gcc-4.9/gcc/config/openbsd-oldgas.h
new file mode 100644
index 000000000..080da5e0e
--- /dev/null
+++ b/gcc-4.9/gcc/config/openbsd-oldgas.h
@@ -0,0 +1,22 @@
+/* Generic settings for a.out OpenBSD systems.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define OBSD_OLD_GAS
diff --git a/gcc-4.9/gcc/config/openbsd-stdint.h b/gcc-4.9/gcc/config/openbsd-stdint.h
new file mode 100644
index 000000000..a6da1da19
--- /dev/null
+++ b/gcc-4.9/gcc/config/openbsd-stdint.h
@@ -0,0 +1,34 @@
+#define SIG_ATOMIC_TYPE		"int"
+ 
+#define INT8_TYPE		"signed char"
+#define INT16_TYPE		"short int"
+#define INT32_TYPE		"int"
+#define INT64_TYPE		"long long int"
+#define UINT8_TYPE		"unsigned char"
+#define UINT16_TYPE		"short unsigned int"
+#define UINT32_TYPE		"unsigned int"
+#define UINT64_TYPE		"long long unsigned int"
+ 
+#define INT_LEAST8_TYPE		"signed char"
+#define INT_LEAST16_TYPE	"short int"
+#define INT_LEAST32_TYPE	"int"
+#define INT_LEAST64_TYPE	"long long int"
+#define UINT_LEAST8_TYPE	"unsigned char"
+#define UINT_LEAST16_TYPE	"short unsigned int"
+#define UINT_LEAST32_TYPE	"unsigned int"
+#define UINT_LEAST64_TYPE	"long long unsigned int"
+ 
+#define INT_FAST8_TYPE		"int"
+#define INT_FAST16_TYPE		"int"
+#define INT_FAST32_TYPE		"int"
+#define INT_FAST64_TYPE		"long long int"
+#define UINT_FAST8_TYPE		"unsigned int"
+#define UINT_FAST16_TYPE	"unsigned int"
+#define UINT_FAST32_TYPE	"unsigned int"
+#define UINT_FAST64_TYPE	"long long unsigned int"
+
+#define INTMAX_TYPE		"long long int"
+#define UINTMAX_TYPE		"long long unsigned int"
+ 
+#define INTPTR_TYPE		"long int"
+#define UINTPTR_TYPE		"long unsigned int"
diff --git a/gcc-4.9/gcc/config/openbsd.h b/gcc-4.9/gcc/config/openbsd.h
new file mode 100644
index 000000000..7150d49aa
--- /dev/null
+++ b/gcc-4.9/gcc/config/openbsd.h
@@ -0,0 +1,289 @@
+/* Base configuration file for all OpenBSD targets.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Common OpenBSD configuration. 
+   All OpenBSD architectures include this file, which is intended as
+   a repository for common defines. 
+
+   Some defines are common to all architectures, a few of them are
+   triggered by OBSD_* guards, so that we won't override architecture
+   defaults by mistakes.
+
+   OBSD_HAS_CORRECT_SPECS: 
+      another mechanism provides correct specs already.
+   OBSD_NO_DYNAMIC_LIBRARIES: 
+      no implementation of dynamic libraries.
+   OBSD_OLD_GAS: 
+      older flavor of gas which needs help for PIC.
+   OBSD_HAS_DECLARE_FUNCTION_NAME, OBSD_HAS_DECLARE_FUNCTION_SIZE,
+   OBSD_HAS_DECLARE_OBJECT: 
+      PIC support, FUNCTION_NAME/FUNCTION_SIZE are independent, whereas
+      the corresponding logic for OBJECTS is necessarily coupled.
+
+   There are also a few `default' defines such as ASM_WEAKEN_LABEL,
+   intended as common ground for arch that don't provide 
+   anything suitable.  */
+
+/* OPENBSD_NATIVE is defined only when gcc is configured as part of
+   the OpenBSD source tree, specifically through Makefile.bsd-wrapper.
+
+   In such a case the include path can be trimmed as there is no
+   distinction between system includes and gcc includes.  */
+
+/* This configuration method, namely Makefile.bsd-wrapper and
+   OPENBSD_NATIVE is NOT recommended for building cross-compilers.  */
+
+#ifdef OPENBSD_NATIVE
+
+/* The compiler is configured with ONLY the gcc/g++ standard headers.  */
+#undef INCLUDE_DEFAULTS
+#define INCLUDE_DEFAULTS			\
+  {						\
+    { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1 },	\
+    { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1 }, \
+    { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1 }, \
+    { NATIVE_SYSTEM_HEADER_DIR, NATIVE_SYSTEM_HEADER_COMPONENT, 0, 0 }, \
+    { 0, 0, 0, 0 }				\
+  }
+
+/* Under OpenBSD, the normal location of the various *crt*.o files is the
+   /usr/lib directory.  */
+#undef STANDARD_STARTFILE_PREFIX
+#define STANDARD_STARTFILE_PREFIX	"/usr/local/lib/"
+
+#endif
+
+
+/* Controlling the compilation driver.  */
+/* TARGET_OS_CPP_BUILTINS() common to all OpenBSD targets.  */
+#define OPENBSD_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__OpenBSD__");		\
+      builtin_define ("__unix__");		\
+      builtin_define ("__ANSI_COMPAT");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=bsd");		\
+      builtin_assert ("system=OpenBSD");	\
+    }						\
+  while (0)
+
+/* TARGET_OS_CPP_BUILTINS() common to all OpenBSD ELF targets.  */
+#define OPENBSD_OS_CPP_BUILTINS_ELF()		\
+  do						\
+    {						\
+      OPENBSD_OS_CPP_BUILTINS();		\
+      builtin_define ("__ELF__");		\
+    }						\
+while (0)
+
+/* TARGET_OS_CPP_BUILTINS() common to all LP64 OpenBSD targets.  */
+#define OPENBSD_OS_CPP_BUILTINS_LP64()		\
+  do						\
+    {						\
+      builtin_define ("_LP64");			\
+      builtin_define ("__LP64__");		\
+    }						\
+  while (0)
+
+/* CPP_SPEC appropriate for OpenBSD. We deal with -posix and -pthread.
+   XXX the way threads are handled currently is not very satisfying,
+   since all code must be compiled with -pthread to work. 
+   This two-stage defines makes it easy to pick that for targets that
+   have subspecs.  */
+#ifdef CPP_CPU_SPEC
+#define OBSD_CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+#else
+#define OBSD_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+#endif
+
+#undef LIB_SPEC
+#define LIB_SPEC OBSD_LIB_SPEC
+
+#ifndef OBSD_HAS_CORRECT_SPECS
+
+#undef CPP_SPEC
+#define CPP_SPEC OBSD_CPP_SPEC
+
+#ifdef OBSD_OLD_GAS
+/* ASM_SPEC appropriate for OpenBSD.  For some architectures, OpenBSD 
+   still uses a special flavor of gas that needs to be told when generating 
+   pic code.  */
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+#endif
+
+/* Since we use gas, stdin -> - is a good idea.  */
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
+#undef LIB_SPEC
+#define LIB_SPEC OBSD_LIB_SPEC
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#undef LIB_SPEC
+#define LIB_SPEC OBSD_LIB_SPEC
+#endif
+
+#define TARGET_POSIX_IO
+
+/* All new versions of OpenBSD have C99 functions.  We redefine this hook
+   so the version from elfos.h header won't be used.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
+
+
+/* Runtime target specification.  */
+
+/* Miscellaneous parameters.  */
+
+/* Controlling debugging info: dbx options.  */
+
+/* Don't use the `xsTAG;' construct in DBX output; OpenBSD systems that
+   use DBX don't support it.  */
+#define DBX_NO_XREFS
+
+
+/* - we use . - _func instead of a local label,
+   - we put extra spaces in expressions such as 
+     .type _func , @function
+     This is more readable for a human being and confuses c++filt less.  */
+
+/* Assembler format: output and generation of labels.  */
+
+/* Define the strings used for the .type and .size directives.
+   These strings generally do not vary from one system running OpenBSD
+   to another, but if a given system needs to use different pseudo-op
+   names for these, they may be overridden in the arch specific file.  */ 
+
+/* OpenBSD assembler is hacked to have .type & .size support even in a.out
+   format object files.  Functions size are supported but not activated 
+   yet (look for GRACE_PERIOD_EXPIRED in gas/config/obj-aout.c).  
+   SET_ASM_OP is needed for attribute alias to work.  */
+
+#undef TYPE_ASM_OP
+#undef SIZE_ASM_OP
+#undef SET_ASM_OP
+#undef GLOBAL_ASM_OP
+
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+#define SET_ASM_OP	"\t.set\t"
+#define GLOBAL_ASM_OP	"\t.globl\t"
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  */
+#undef TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+
+/* Provision if extra assembler code is needed to declare a function's result
+   (taken from svr4, not needed yet actually).  */
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries under OpenBSD.  These macros also have to output the starting 
+   labels for the relevant functions/objects.  */
+
+#ifndef OBSD_HAS_DECLARE_FUNCTION_NAME
+/* Extra assembler code needed to declare a function properly.
+   Some assemblers may also need to also have something extra said 
+   about the function's return value.  We allow for that here.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  do {									\
+    ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");			\
+    ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));			\
+    ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);			\
+  } while (0)
+#endif
+
+#ifndef OBSD_HAS_DECLARE_FUNCTION_SIZE
+/* Declare the size of a function.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do {								\
+    if (!flag_inhibit_size_directive)				\
+      ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+  } while (0)
+#endif
+
+#ifndef OBSD_HAS_DECLARE_OBJECT
+/* Extra assembler code needed to declare an object properly.  */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do {								\
+      HOST_WIDE_INT size;					\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+  } while (0)
+
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set by ASM_DECLARE_OBJECT_NAME 
+   when it was run for the same decl.  */
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
+do {									 \
+     const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
+     HOST_WIDE_INT size;						 \
+     if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		 \
+         && ! AT_END && TOP_LEVEL					 \
+	 && DECL_INITIAL (DECL) == error_mark_node			 \
+	 && !size_directive_output)					 \
+       {								 \
+	 size_directive_output = 1;					 \
+	 size = int_size_in_bytes (TREE_TYPE (DECL));			 \
+	 ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			 \
+       }								 \
+   } while (0)
+#endif
+
+
+/* Those are `generic' ways to weaken/globalize a label. We shouldn't need
+   to override a processor specific definition. Hence, #ifndef ASM_*
+   In case overriding turns out to be needed, one can always #undef ASM_* 
+   before including this file.  */
+
+/* Tell the assembler that a symbol is weak.  */
+/* Note: netbsd arm32 assembler needs a .globl here. An override may 
+   be needed when/if we go for arm32 support.  */
+#ifndef ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+       fputc ('\n', FILE); } while (0)
+#endif
+
+/* Storage layout.  */
+
+
+#define HAVE_ENABLE_EXECUTE_STACK
diff --git a/gcc-4.9/gcc/config/openbsd.opt b/gcc-4.9/gcc/config/openbsd.opt
new file mode 100644
index 000000000..446ff9403
--- /dev/null
+++ b/gcc-4.9/gcc/config/openbsd.opt
@@ -0,0 +1,35 @@
+; OpenBSD options.
+
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+assert=
+Driver JoinedOrMissing
+
+posix
+Driver
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/pa/constraints.md b/gcc-4.9/gcc/config/pa/constraints.md
new file mode 100644
index 000000000..a9117b988
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/constraints.md
@@ -0,0 +1,135 @@
+;; Constraint definitions for pa
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;    ABCD   H                Y 
+;;;     bcde  h jkl       tuvw  z
+
+;; Register constraints.
+(define_register_constraint "a" "R1_REGS"
+  "General register 1.")
+
+(define_register_constraint "f" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "q" "SHIFT_REGS"
+  "Shift amount register.")
+
+;; Keep 'x' for backward compatibility with user asm.
+(define_register_constraint "x" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "y" "TARGET_64BIT ? FP_REGS : FPUPPER_REGS"
+  "Upper floating-point register.")
+
+(define_register_constraint "Z" "ALL_REGS"
+  "Any register.")
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "Signed 11-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "VAL_11_BITS_P (ival)")))
+
+(define_constraint "J"
+  "Signed 14-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "VAL_14_BITS_P (ival)")))
+
+(define_constraint "K"
+  "Integer constant that can be deposited with a zdepi instruction."
+  (and (match_code "const_int")
+       (match_test "pa_zdepi_cint_p (ival)")))
+
+(define_constraint "L"
+  "Signed 5-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "VAL_5_BITS_P (ival)")))
+
+(define_constraint "M"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "N"
+  "Integer constant that can be loaded with a ldil instruction."
+  (and (match_code "const_int")
+       (match_test "pa_ldil_cint_p (ival)")))
+
+(define_constraint "O"
+  "Integer constant such that ival+1 is a power of 2."
+  (and (match_code "const_int")
+       (match_test "(ival & (ival + 1)) == 0")))
+
+(define_constraint "P"
+  "Integer constant that can be used as an and mask in depi and
+   extru instructions."
+  (and (match_code "const_int")
+       (match_test "pa_and_mask_p (ival)")))
+
+(define_constraint "S"
+  "Integer constant 31."
+  (and (match_code "const_int")
+       (match_test "ival == 31")))
+
+(define_constraint "U"
+  "Integer constant 63."
+  (and (match_code "const_int")
+       (match_test "ival == 63")))
+
+;; Floating-point constant constraints.
+(define_constraint "G"
+  "Floating-point constant 0."
+  (and (match_code "const_double")
+       (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT
+		    && op == CONST0_RTX (mode)")))
+
+;; Extra constraints.
+(define_constraint "A"
+  "A LO_SUM DLT memory operand."
+  (and (match_code "mem")
+       (match_test "IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "Q"
+  "A memory operand that can be used as the destination operand of an
+   integer store, or the source operand of an integer load.  That is
+   any memory operand that isn't a symbolic, indexed or lo_sum memory
+   operand.  Note that an unassigned pseudo register is such a memory
+   operand.  We accept unassigned pseudo registers because reload
+   generates them and then doesn't re-recognize the insn, causing
+   constrain_operands to fail."
+  (match_test "integer_store_memory_operand (op, mode)"))
+
+(define_constraint "R"
+  "A scaled or unscaled indexed memory operand that can be used as the
+   source address in integer and floating-point loads."
+  (and (match_code "mem")
+       (match_test "IS_INDEX_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "T"
+  "A memory operand for floating-point loads and stores."
+  (match_test "floating_point_store_memory_operand (op, mode)"))
+
+;; We could allow short displacements but TARGET_LEGITIMATE_ADDRESS_P
+;; can't tell when a long displacement is valid.
+(define_constraint "W"
+  "A register indirect memory operand."
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    && REG_OK_FOR_BASE_P (XEXP (op, 0))")))
diff --git a/gcc-4.9/gcc/config/pa/elf.h b/gcc-4.9/gcc/config/pa/elf.h
new file mode 100644
index 000000000..876e8e78b
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/elf.h
@@ -0,0 +1,92 @@
+/* Definitions for ELF assembler support.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* So we can conditionalize small amounts of code in pa.c or pa.md.  */
+#define OBJ_ELF
+
+#define ENDFILE_SPEC "crtend.o%s"
+
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crtbegin.o%s"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+#define TARGET_ASM_FILE_START pa_elf_file_start
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+do {  \
+  if (TREE_PUBLIC (DECL)) \
+    { \
+      fputs ("\t.EXPORT ", FILE); \
+      assemble_name (FILE, NAME); \
+      fputs (",ENTRY\n", FILE); \
+    } \
+   } while (0)
+
+/* This is how to output a command to make the user-level label
+   named NAME defined for reference from other files.  We use
+   assemble_name_raw instead of assemble_name since a symbol in
+   a .IMPORT directive that isn't otherwise referenced is not
+   placed in the symbol table of the assembled object.
+
+   Failure to import a function reference can cause the HP linker
+   to segmentation fault!
+
+   Note that the SOM based tools need the symbol imported as a
+   CODE symbol, while the ELF based tools require the symbol to
+   be imported as an ENTRY symbol.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  pa_hpux_asm_output_external ((FILE), (DECL), (NAME))
+#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \
+  do { fputs ("\t.IMPORT ", FILE);					\
+       assemble_name_raw (FILE, NAME);					\
+       if (FUNCTION_NAME_P (NAME))     					\
+	 fputs (",ENTRY\n", FILE);					\
+       else								\
+	 fputs (",DATA\n", FILE);					\
+     } while (0)
+
+/* The bogus HP assembler requires ALL external references to be
+   "imported", even library calls. They look a bit different, so
+   here's this macro.
+
+   Also note not all libcall names are passed to
+   targetm.encode_section_info (__main for example).  To make sure all
+   libcall names have section info recorded in them, we do it here.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, RTL) \
+  do { fputs ("\t.IMPORT ", FILE);					\
+       if (!function_label_operand (RTL, VOIDmode))			\
+	 pa_encode_label (RTL);						\
+       assemble_name (FILE, XSTR ((RTL), 0));		       		\
+       fputs (",ENTRY\n", FILE);					\
+     } while (0)
+
+/* Biggest alignment supported by the object file format of this
+   machine.  Use this macro to limit the alignment which can be
+   specified using the `__attribute__ ((aligned (N)))' construct.  If
+   not defined, the default value is `BIGGEST_ALIGNMENT'.  */
+#define MAX_OFILE_ALIGNMENT (32768 * 8)
diff --git a/gcc-4.9/gcc/config/pa/pa-64.h b/gcc-4.9/gcc/config/pa/pa-64.h
new file mode 100644
index 000000000..14a5fb01a
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-64.h
@@ -0,0 +1,100 @@
+/* Definitions of target machine for GNU compiler, for HPs using the
+   64bit runtime model.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The default sizes for basic datatypes provided by GCC are not
+   correct for the PA64 runtime architecture.
+
+   In PA64, basic types have the following sizes
+
+     char	1 byte
+     short	2 bytes
+     int	4 bytes
+     long	8 bytes
+     long long	8 bytes
+     pointer	8 bytes
+     float	4 bytes
+     double	8 bytes
+     long double 16 bytes
+     size_t	8 bytes
+     ptrdiff_t	8 bytes
+     wchar	4 bytes
+     
+  Make GCC agree with types.h.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* If it is not listed here, then the default selected by GCC is OK.  */
+#undef SHORT_TYPE_SIZE
+#define SHORT_TYPE_SIZE 16
+#undef INT_TYPE_SIZE
+#define INT_TYPE_SIZE 32
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 64
+#undef LONG_LONG_TYPE_SIZE
+#define LONG_LONG_TYPE_SIZE 64
+#undef FLOAT_TYPE_SIZE
+#define FLOAT_TYPE_SIZE 32
+#undef DOUBLE_TYPE_SIZE
+#define DOUBLE_TYPE_SIZE 64
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+/* Temporary until we figure out what to do with those *(&@$ 32bit
+   relocs which appear in stabs.  */
+#undef DBX_DEBUGGING_INFO
+
+/* ?!? This needs to be made compile-time selectable.
+
+   The PA64 runtime model has arguments that grow to higher addresses
+   (like most other targets).  The older runtime model has arguments
+   that grow to lower addresses.  What fun.  */
+#undef ARGS_GROW_DOWNWARD
+
+/* If defined, a C expression which determines whether the default
+   implementation of va_arg will attempt to pad down before reading the
+   next argument, if that argument is smaller than its aligned space as
+   controlled by PARM_BOUNDARY.  If this macro is not defined, all such
+   arguments are padded down when BYTES_BIG_ENDIAN is true.  We don't
+   want aggregates padded down.  */
+
+#define PAD_VARARGS_DOWN (!AGGREGATE_TYPE_P (type))
+
+/* In the PA architecture, it is not possible to directly move data
+   between GENERAL_REGS and FP_REGS.  On the 32-bit port, we use the
+   location at SP-16 because PA 1.X only supports 5-bit immediates for
+   floating-point loads and stores.  We don't expose this location in
+   the RTL to avoid scheduling related problems.  For example, the
+   store and load could be separated by a call to a pure or const
+   function which has no frame and this function might also use SP-16.
+   We have 14-bit immediates on the 64-bit port, so we use secondary
+   memory for the copies.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+  (MAYBE_FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)		\
+   || MAYBE_FP_REG_CLASS_P (CLASS2) != FP_REG_CLASS_P (CLASS1))
+
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux.h b/gcc-4.9/gcc/config/pa/pa-hpux.h
new file mode 100644
index 000000000..b303d2bda
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux.h
@@ -0,0 +1,119 @@
+/* Definitions of target machine for GNU compiler, for HP-UX.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX UNIX features.  */
+#undef TARGET_HPUX
+#define TARGET_HPUX 1
+
+#undef HPUX_LONG_DOUBLE_LIBRARY
+#define HPUX_LONG_DOUBLE_LIBRARY 1
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+
+/* Make GCC agree with types.h.  */
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+
+#define SIZE_TYPE "unsigned int"
+#define PTRDIFF_TYPE "int"
+
+#define LONG_DOUBLE_TYPE_SIZE 128
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode)
+
+/* GCC always defines __STDC__.  HP C++ compilers don't define it.  This
+   causes trouble when sys/stdsyms.h is included.  As a work around,
+   we define __STDC_EXT__.  A similar situation exists with respect to
+   the definition of __cplusplus.  We define _INCLUDE_LONGLONG
+   to prevent nlist.h from defining __STDC_32_MODE__ (no longlong
+   support).  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	builtin_assert ("system=hpux");				\
+	builtin_assert ("system=unix");				\
+	builtin_define ("__hp9000s800");			\
+	builtin_define ("__hp9000s800__");			\
+	builtin_define ("__hp9k8");				\
+	builtin_define ("__hp9k8__");				\
+	builtin_define ("__hpux");				\
+	builtin_define ("__hpux__");				\
+	builtin_define ("__unix");				\
+	builtin_define ("__unix__");				\
+	builtin_define ("__STDC_EXT__");			\
+	if (c_dialect_cxx ())					\
+	  {							\
+	    builtin_define ("_HPUX_SOURCE");			\
+	    builtin_define ("_INCLUDE_LONGLONG");		\
+	  }							\
+	else if (!flag_iso)					\
+	  {							\
+	    builtin_define ("_HPUX_SOURCE");			\
+	    if (preprocessing_trad_p ())			\
+	      {							\
+		builtin_define ("hp9000s800");			\
+		builtin_define ("hp9k8");			\
+		builtin_define ("hppa");			\
+		builtin_define ("hpux");			\
+		builtin_define ("unix");			\
+		builtin_define ("__CLASSIC_C__");		\
+		builtin_define ("_PWB");			\
+		builtin_define ("PWB");				\
+	      }							\
+	  }							\
+	if (TARGET_SIO)						\
+	  builtin_define ("_SIO");				\
+	else							\
+	  {							\
+	    builtin_define ("__hp9000s700");			\
+	    builtin_define ("__hp9000s700__");			\
+	    builtin_define ("_WSIO");				\
+	  }							\
+    }								\
+  while (0)
+
+/* Like the default, except no -lg.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!p:%{!pg:-lc}}%{p: -L/lib/libp/ -lc}%{pg: -L/lib/libp/ -lc}}"
+
+#undef LINK_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_PA_11)
+#define LINK_SPEC \
+  "%{!mpa-risc-1-0:%{!march=1.0:%{static:-L/lib/pa1.1 -L/usr/lib/pa1.1 }}}%{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{g*:-a archive} %{shared:-b}"
+#else
+#define LINK_SPEC \
+  "%{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{g*:-a archive} %{shared:-b}"
+#endif
+
+/* hpux8 and later have C++ compatible include files, so do not
+   pretend they are `extern "C"'.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* hpux11 and earlier don't have fputc_unlocked, so we must inhibit the
+   transformation of fputs_unlocked and fprintf_unlocked to fputc_unlocked.  */
+#define DONT_HAVE_FPUTC_UNLOCKED
+
+/* We want the entry value of SP saved in the frame marker for
+   compatibility with the HP-UX unwind library.  */
+#undef TARGET_HPUX_UNWIND_LIBRARY
+#define TARGET_HPUX_UNWIND_LIBRARY 1
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux.opt b/gcc-4.9/gcc/config/pa/pa-hpux.opt
new file mode 100644
index 000000000..5cc93f3b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux.opt
@@ -0,0 +1,41 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; The UNIX standard to use for predefines and linking.
+Variable
+int flag_pa_unix = TARGET_HPUX_11_31 ? 2003 : TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993
+
+msio
+Target RejectNegative Mask(SIO)
+Generate cpp defines for server IO
+
+munix=93
+Target RejectNegative Var(flag_pa_unix, 1993)
+Specify UNIX standard for predefines and linking
+
+mwsio
+Target RejectNegative InverseMask(SIO)
+Generate cpp defines for workstation IO
+
+nolibdld
+Driver
+
+rdynamic
+Driver
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux10.h b/gcc-4.9/gcc/config/pa/pa-hpux10.h
new file mode 100644
index 000000000..1b4d21f56
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux10.h
@@ -0,0 +1,143 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Tim Moore (moore@defmacro.cs.utah.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* GCC always defines __STDC__.  HP C++ compilers don't define it.  This
+   causes trouble when sys/stdsyms.h is included.  As a work around,
+   we define __STDC_EXT__.  A similar situation exists with respect to
+   the definition of __cplusplus.  We define _INCLUDE_LONGLONG
+   to prevent nlist.h from defining __STDC_32_MODE__ (no longlong
+   support).  We define __STDCPP__ to get certain system headers
+   (notably assert.h) to assume standard preprocessor behavior in C++.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_assert ("system=hpux");					\
+	builtin_assert ("system=unix");					\
+	builtin_define ("__hp9000s800");				\
+	builtin_define ("__hp9000s800__");				\
+	builtin_define ("__hpux");					\
+	builtin_define ("__hpux__");					\
+	builtin_define ("__unix");					\
+	builtin_define ("__unix__");					\
+	builtin_define ("__STDC_EXT__");				\
+	if (c_dialect_cxx ())						\
+	  {								\
+	    builtin_define ("_HPUX_SOURCE");				\
+	    builtin_define ("_REENTRANT");				\
+	    builtin_define ("_INCLUDE_LONGLONG");			\
+	    builtin_define ("__STDCPP__");				\
+	  }								\
+	else if (!flag_iso)						\
+	  {								\
+	    builtin_define ("_HPUX_SOURCE");				\
+	    builtin_define ("_REENTRANT");				\
+	    if (preprocessing_trad_p ())				\
+	      {								\
+		builtin_define ("hp9000s800");				\
+		builtin_define ("hppa");				\
+		builtin_define ("hpux");				\
+		builtin_define ("unix");				\
+		builtin_define ("__CLASSIC_C__");			\
+		builtin_define ("_PWB");				\
+		builtin_define ("PWB");					\
+	      }								\
+	  }								\
+	if (flag_pa_unix >= 1995)					\
+	  {								\
+	    builtin_define ("_XOPEN_UNIX");				\
+	    builtin_define ("_XOPEN_SOURCE_EXTENDED");			\
+	  }								\
+	if (TARGET_SIO)							\
+	  builtin_define ("_SIO");					\
+	else								\
+	  {								\
+	    builtin_define ("__hp9000s700");				\
+	    builtin_define ("__hp9000s700__");				\
+	    builtin_define ("_WSIO");					\
+	  }								\
+    }									\
+  while (0)
+
+#define CPP_SPEC "%{threads: -D_REENTRANT -D_DCE_THREADS}"
+
+/* We can debug dynamically linked executables on hpux9; we also want
+   dereferencing of a NULL pointer to cause a SEGV.  */
+#undef LINK_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_PA_11)
+#define LINK_SPEC \
+  "%{!mpa-risc-1-0:%{!march=1.0:%{static:-L/lib/pa1.1 -L/usr/lib/pa1.1 }}}\
+   %{!shared:%{p:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   -z %{mlinker-opt:-O} %{!shared:-u main}\
+   %{static:-a archive} %{shared:-b}"
+#else
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   -z %{mlinker-opt:-O} %{!shared:-u main}\
+   %{static:-a archive} %{shared:-b}"
+#endif
+
+/* Like the default, except no -lg.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:%{!pg:\
+       %{!threads:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
+       %{threads:-lcma -lc}}}\
+     %{p:%{!pg:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
+     %{pg:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}"
+
+#undef THREAD_MODEL_SPEC
+#define THREAD_MODEL_SPEC "%{!threads:single}%{threads:dce}"
+
+/* Under hpux10, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+#endif
+
+/* Under hpux10, the normal location of the various *crt*.o files is
+   the /usr/ccs/lib directory.  However, the profiling files are in
+   /opt/langtools/lib.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/"
+#endif
+
+/* hpux10 has the new HP assembler.  It's still lousy, but it's a whole lot
+   better than the assembler shipped with older versions of hpux.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux10.opt b/gcc-4.9/gcc/config/pa/pa-hpux10.opt
new file mode 100644
index 000000000..fe65c1597
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux10.opt
@@ -0,0 +1,22 @@
+; Options specific to HP-UX 10.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+threads
+Driver
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux1010.h b/gcc-4.9/gcc/config/pa/pa-hpux1010.h
new file mode 100644
index 000000000..f4866b74a
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux1010.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX 10.10 UNIX 95 features.  */
+#undef TARGET_HPUX_10_10
+#define TARGET_HPUX_10_10 1
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{!munix=93:unix95%O%s}}"
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux1010.opt b/gcc-4.9/gcc/config/pa/pa-hpux1010.opt
new file mode 100644
index 000000000..949628292
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux1010.opt
@@ -0,0 +1,23 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+munix=95
+Target RejectNegative Var(flag_pa_unix, 1995)
+Specify UNIX standard for predefines and linking
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux11.h b/gcc-4.9/gcc/config/pa/pa-hpux11.h
new file mode 100644
index 000000000..07f0decfa
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux11.h
@@ -0,0 +1,189 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* GCC always defines __STDC__.  HP C++ compilers don't define it.  This
+   causes trouble when sys/stdsyms.h is included.  As a work around,
+   we define __STDC_EXT__.  A similar situation exists with respect to
+   the definition of __cplusplus.  We define _INCLUDE_LONGLONG
+   to prevent nlist.h from defining __STDC_32_MODE__ (no longlong
+   support).  We define __STDCPP__ to get certain system headers
+   (notably assert.h) to assume standard preprocessor behavior in C++.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_assert ("system=hpux");					\
+	builtin_assert ("system=unix");					\
+	builtin_define ("__hp9000s800");				\
+	builtin_define ("__hp9000s800__");				\
+	builtin_define ("__hpux");					\
+	builtin_define ("__hpux__");					\
+	builtin_define ("__unix");					\
+	builtin_define ("__unix__");					\
+	builtin_define ("__STDC_EXT__");				\
+	if (c_dialect_cxx ())						\
+	  {								\
+	    builtin_define ("_HPUX_SOURCE");				\
+	    builtin_define ("_INCLUDE_LONGLONG");			\
+	    builtin_define ("__STDCPP__");				\
+	  }								\
+	else								\
+	  {								\
+	    if (!flag_iso)						\
+	      {								\
+		builtin_define ("_HPUX_SOURCE");			\
+		if (preprocessing_trad_p ())				\
+		  {							\
+		    builtin_define ("hp9000s800");			\
+		    builtin_define ("hppa");				\
+		    builtin_define ("hpux");				\
+		    builtin_define ("unix");				\
+		    builtin_define ("__CLASSIC_C__");			\
+		    builtin_define ("_PWB");				\
+		    builtin_define ("PWB");				\
+		  }							\
+	      }								\
+	  }								\
+	if (!TARGET_64BIT)						\
+	  builtin_define ("_ILP32");					\
+	if (flag_pa_unix >= 1995 && !flag_iso)				\
+	  {								\
+	    builtin_define ("_XOPEN_UNIX");				\
+	    builtin_define ("_XOPEN_SOURCE_EXTENDED");			\
+	  }								\
+	if (TARGET_HPUX_11_11)						\
+	  {								\
+	    if (flag_pa_unix >= 1998)					\
+	      {								\
+		if (flag_isoc94 || flag_isoc99 || c_dialect_cxx()	\
+		    || !flag_iso)					\
+		  builtin_define ("_INCLUDE__STDC_A1_SOURCE");		\
+		if (!flag_iso)						\
+		  builtin_define ("_INCLUDE_XOPEN_SOURCE_500");		\
+	      }								\
+	    else if (flag_isoc94 || flag_isoc99 || c_dialect_cxx ())	\
+	      warning (0, "-munix=98 option required for C89 "		\
+		       "Amendment 1 features.\n");			\
+	  }								\
+	if (TARGET_SIO)							\
+	  builtin_define ("_SIO");					\
+	else								\
+	  {								\
+	    builtin_define ("__hp9000s700");				\
+	    builtin_define ("__hp9000s700__");				\
+	    builtin_define ("_WSIO");					\
+	  }								\
+    }									\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+  "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}"
+/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD.  These
+   affect only aCC's C++ library (Rogue Wave-derived) which we do not
+   use, and they violate the user's name space.  */
+
+/* We can debug dynamically linked executables on hpux11; we also
+   want dereferencing of a NULL pointer to cause a SEGV.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/libp -L/usr/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/libp -L/usr/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   -z %{mlinker-opt:-O} %{!shared:-u main -u __gcc_plt_call}\
+   %{static:-a archive} %{shared:-b}"
+
+/* HP-UX 11 has posix threads.  HP's shared libc contains pthread stubs
+   so that non-threaded applications can be linked with a thread-safe
+   libc without a subsequent loss of performance.  For more details,
+   see <http://docs.hp.com/en/1896/pthreads.html>.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared:\
+     %{fopenmp|ftree-parallelize-loops=*:%{static:-a archive_shared} -lrt\
+       %{static:-a archive}}\
+     %{mt|pthread:-lpthread} -lc\
+     %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}\
+       %{!mt:%{!pthread:-a shared -lc -a archive}}}}\
+   %{shared:%{mt|pthread:-lpthread}}"
+
+/* The libgcc_stub.a library needs to come last.  */
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%G %L %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}}}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{!munix=93:unix95%O%s}}"
+
+/* Under hpux11, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+#endif
+
+/* Under hpux11 the normal location of the various *crt*.o files is
+   the /usr/ccs/lib directory.  However, the profiling files are in
+   /opt/langtools/lib.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/"
+#endif
+
+/* hpux11 has the new HP assembler.  It's still lousy, but it's a whole lot
+   better than the assembler shipped with older versions of hpux.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
+
+/* Make GCC agree with types.h.  */
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+
+/* HP-UX 11.0 and above provides initialization and finalization function
+   support from linker command line.  We don't need to invoke __main to run
+   constructors.  We also don't need chatr to determine the dependencies of
+   dynamically linked executables and shared libraries.  */
+#undef LDD_SUFFIX
+#undef PARSE_LDD_OUTPUT
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION 1
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "+init"
+#undef LD_FINI_SWITCH
+#define LD_FINI_SWITCH "+fini"
+
+/* The HP-UX 11.X SOM linker (ld32) can successfully link shared libraries
+   with secondary definition (weak) symbols.  */
+#undef TARGET_SOM_SDEF
+#define TARGET_SOM_SDEF 1
+
+#undef TARGET_HPUX_11
+#define TARGET_HPUX_11 1
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux1111.h b/gcc-4.9/gcc/config/pa/pa-hpux1111.h
new file mode 100644
index 000000000..704d741a5
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux1111.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX 11i multibyte and UNIX 98 extensions.  */
+#undef TARGET_HPUX_11_11
+#define TARGET_HPUX_11_11 1
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{munix=95:unix95%O%s}%{!munix=93:%{!munix=95:unix98%O%s}}}"
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux1111.opt b/gcc-4.9/gcc/config/pa/pa-hpux1111.opt
new file mode 100644
index 000000000..21b4d2d9f
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux1111.opt
@@ -0,0 +1,23 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+munix=98
+Target RejectNegative Var(flag_pa_unix, 1998)
+Specify UNIX standard for predefines and linking
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux1131.h b/gcc-4.9/gcc/config/pa/pa-hpux1131.h
new file mode 100644
index 000000000..96673ae31
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux1131.h
@@ -0,0 +1,29 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX 11i multibyte and UNIX 98 extensions.  */
+#undef TARGET_HPUX_11_11
+#define TARGET_HPUX_11_11 1
+#undef TARGET_HPUX_11_31
+#define TARGET_HPUX_11_31 1
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{munix=95:unix95%O%s}%{munix=98:unix98%O%s}%{!munix=93:%{!munix=95:%{!munix=98:unix2003%O%s}}}}"
diff --git a/gcc-4.9/gcc/config/pa/pa-hpux1131.opt b/gcc-4.9/gcc/config/pa/pa-hpux1131.opt
new file mode 100644
index 000000000..6f90d464e
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-hpux1131.opt
@@ -0,0 +1,23 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+munix=2003
+Target RejectNegative Var(flag_pa_unix, 2003)
+Specify UNIX standard for predefines and linking
diff --git a/gcc-4.9/gcc/config/pa/pa-linux.h b/gcc-4.9/gcc/config/pa/pa-linux.h
new file mode 100644
index 000000000..399308ab4
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-linux.h
@@ -0,0 +1,135 @@
+/* Definitions for PA_RISC with ELF format
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");	\
+	builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");	\
+	builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");	\
+	GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+	builtin_assert ("machine=bigendian");	\
+    }						\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+  ""
+
+/* Define this for shared library support because it isn't in the main
+   linux.h file.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+/* glibc's profiling functions don't need gcc to allocate counters.  */
+#define NO_DEFERRED_PROFILE_COUNTERS 1
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP   "\t.stringz\t"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+#define TARGET_ASM_FILE_START pa_linux_file_start
+
+/* We want local labels to start with period if made with asm_fprintf.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Define these to generate the Linux/ELF/SysV style of internal
+   labels all the time - i.e. to be compatible with
+   ASM_GENERATE_INTERNAL_LABEL in <elfos.h>.  Compare these with the
+   ones in pa.h and note the lack of dollar signs in these.  FIXME:
+   shouldn't we fix pa.h to use ASM_GENERATE_INTERNAL_LABEL instead? */
+
+#undef ASM_OUTPUT_ADDR_VEC_ELT
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t.word .L%d\n", VALUE)
+
+#undef ASM_OUTPUT_ADDR_DIFF_ELT
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+/* Use the default.  */
+#undef ASM_OUTPUT_LABEL
+
+/* NOTE: (*targetm.asm_out.internal_label)() is defined for us by elfos.h, and
+   does what we want (i.e. uses colons).  It must be compatible with
+   ASM_GENERATE_INTERNAL_LABEL(), so do not define it here.  */
+
+/* Use the default.  */
+#undef ASM_OUTPUT_INTERNAL_LABEL
+
+/* Use the default.  */
+#undef TARGET_ASM_GLOBALIZE_LABEL
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".globl "
+
+/* FIXME: Hacked from the <elfos.h> one so that we avoid multiple
+   labels in a function declaration (since pa.c seems determined to do
+   it differently)  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+    }								\
+  while (0)
+
+/* As well as globalizing the label, we need to encode the label
+   to ensure a plabel is generated in an indirect call.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)  		\
+  do								\
+    {								\
+      if (!FUNCTION_NAME_P (XSTR (FUN, 0)))			\
+	pa_encode_label (FUN);					\
+      (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+    }								\
+  while (0)
+
+#undef TARGET_GAS
+#define TARGET_GAS 1
+
+#undef TARGET_SYNC_LIBCALL
+#define TARGET_SYNC_LIBCALL 1
diff --git a/gcc-4.9/gcc/config/pa/pa-modes.def b/gcc-4.9/gcc/config/pa/pa-modes.def
new file mode 100644
index 000000000..cd5af4528
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-modes.def
@@ -0,0 +1,32 @@
+/* Definitions of target machine for GNU compiler, for the HP Spectrum.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support
+   and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for
+   Software Science at the University of Utah.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* PA-RISC has the same reversed quiet bit as MIPS.
+   ??? Why is this called the MIPS format.  */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+
+/* TFmode: IEEE quad floating point (software).  */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
+/* HPPA floating comparisons produce distinct condition codes.  */
+CC_MODE (CCFP);
diff --git a/gcc-4.9/gcc/config/pa/pa-openbsd.h b/gcc-4.9/gcc/config/pa/pa-openbsd.h
new file mode 100644
index 000000000..0c8d19454
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-openbsd.h
@@ -0,0 +1,154 @@
+/* Definitions for PA_RISC with ELF format
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	OPENBSD_OS_CPP_BUILTINS();		\
+	builtin_assert ("machine=bigendian");	\
+    }						\
+  while (0)
+
+/* Our profiling scheme doesn't LP labels and counter words.  */
+#define NO_DEFERRED_PROFILE_COUNTERS 1
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP   "\t.stringz\t"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* We want local labels to start with period if made with asm_fprintf.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Define these to generate the Linux/ELF/SysV style of internal
+   labels all the time - i.e. to be compatible with
+   ASM_GENERATE_INTERNAL_LABEL in <elfos.h>.  Compare these with the
+   ones in pa.h and note the lack of dollar signs in these.  FIXME:
+   shouldn't we fix pa.h to use ASM_GENERATE_INTERNAL_LABEL instead? */
+
+#undef ASM_OUTPUT_ADDR_VEC_ELT
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t.word .L%d\n", VALUE)
+
+#undef ASM_OUTPUT_ADDR_DIFF_ELT
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+/* Use the default.  */
+#undef ASM_OUTPUT_LABEL
+
+/* NOTE: (*targetm.asm_out.internal_label)() is defined for us by elfos.h, and
+   does what we want (i.e. uses colons).  It must be compatible with
+   ASM_GENERATE_INTERNAL_LABEL(), so do not define it here.  */
+
+/* Use the default.  */
+#undef ASM_OUTPUT_INTERNAL_LABEL
+
+/* Use the default.  */
+#undef TARGET_ASM_GLOBALIZE_LABEL
+
+/* FIXME: Hacked from the <elfos.h> one so that we avoid multiple
+   labels in a function declaration (since pa.c seems determined to do
+   it differently)  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+    }								\
+  while (0)
+
+/* As well as globalizing the label, we need to encode the label
+   to ensure a plabel is generated in an indirect call.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)  		\
+  do								\
+    {								\
+      if (!FUNCTION_NAME_P (XSTR (FUN, 0)))			\
+	pa_encode_label (FUN);				\
+      (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+    }								\
+  while (0)
+
+/* This says how to output an assembler line to define a global common symbol
+   with size SIZE (in bytes) and alignment ALIGN (in bits).  */
+
+#undef ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)  		\
+  pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN)
+
+/* This says how to output an assembler line to define a local common symbol
+   with size SIZE (in bytes) and alignment ALIGN (in bits).  This macro
+   controls how the assembler definitions of uninitialized static variables
+   are output.  */
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN)
+
+/* OpenBSD always uses gas.  */
+#undef TARGET_GAS
+#define TARGET_GAS 1
+
+/* Layout of source language data types. */
+
+/* This must agree with <machine/_types.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{!nostdlib:%{!r:%{!e*:-e __start}}}} \
+   %{shared:-shared} %{R*} \
+   %{static:-Bstatic} \
+   %{!static:-Bdynamic} \
+   %{assert*} \
+   -dynamic-linker /usr/libexec/ld.so"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+	%{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \
+	crtbegin%O%s} %{shared:crtbeginS%O%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+#define OBSD_HAS_CORRECT_SPECS
+
+#define HAVE_ENABLE_EXECUTE_STACK
diff --git a/gcc-4.9/gcc/config/pa/pa-opts.h b/gcc-4.9/gcc/config/pa/pa-opts.h
new file mode 100644
index 000000000..44ddf06c4
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-opts.h
@@ -0,0 +1,35 @@
+/* Definitions for option handling for HP PA.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef PA_OPTS_H
+#define PA_OPTS_H
+
+/* Which processor to schedule for.  */
+
+enum processor_type
+{
+  PROCESSOR_700,
+  PROCESSOR_7100,
+  PROCESSOR_7100LC,
+  PROCESSOR_7200,
+  PROCESSOR_7300,
+  PROCESSOR_8000
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/pa/pa-protos.h b/gcc-4.9/gcc/config/pa/pa-protos.h
new file mode 100644
index 000000000..2659dcdf0
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa-protos.h
@@ -0,0 +1,121 @@
+/* Prototypes for pa.c functions used in the md file & elsewhere.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef RTX_CODE
+/* Prototype function used in various macros.  */
+extern rtx pa_eh_return_handler_rtx (void);
+
+/* Used in insn-*.c.  */
+extern int pa_following_call (rtx);
+
+/* Define functions in pa.c and used in insn-output.c.  */
+
+extern const char *pa_output_and (rtx *);
+extern const char *pa_output_64bit_and (rtx *);
+extern const char *pa_output_ior (rtx *);
+extern const char *pa_output_64bit_ior (rtx *);
+extern const char *pa_output_move_double (rtx *);
+extern const char *pa_output_fp_move_double (rtx *);
+extern const char *pa_output_block_move (rtx *, int);
+extern const char *pa_output_block_clear (rtx *, int);
+extern const char *pa_output_cbranch (rtx *, int, rtx);
+extern const char *pa_output_lbranch (rtx, rtx, int);
+extern const char *pa_output_bb (rtx *, int, rtx, int);
+extern const char *pa_output_bvb (rtx *, int, rtx, int);
+extern const char *pa_output_dbra (rtx *, rtx, int);
+extern const char *pa_output_movb (rtx *, rtx, int, int);
+extern const char *pa_output_parallel_movb (rtx *, rtx);
+extern const char *pa_output_parallel_addb (rtx *, rtx);
+extern const char *pa_output_call (rtx, rtx, int);
+extern const char *pa_output_indirect_call (rtx, rtx);
+extern const char *pa_output_millicode_call (rtx, rtx);
+extern const char *pa_output_mul_insn (int, rtx);
+extern const char *pa_output_div_insn (rtx *, int, rtx);
+extern const char *pa_output_mod_insn (int, rtx);
+extern const char *pa_singlemove_string (rtx *);
+extern void pa_output_arg_descriptor (rtx);
+extern void pa_output_global_address (FILE *, rtx, int);
+extern void pa_print_operand (FILE *, rtx, int);
+extern void pa_encode_label (rtx);
+extern int pa_symbolic_expression_p (rtx);
+extern bool pa_tls_referenced_p (rtx);
+extern int pa_adjust_insn_length (rtx, int);
+extern int pa_fmpyaddoperands (rtx *);
+extern int pa_fmpysuboperands (rtx *);
+extern void pa_emit_bcond_fp (rtx[]);
+extern int pa_emit_move_sequence (rtx *, enum machine_mode, rtx);
+extern int pa_emit_hpdiv_const (rtx *, int);
+extern int pa_is_function_label_plus_const (rtx);
+extern int pa_jump_in_call_delay (rtx);
+extern int pa_fpstore_bypass_p (rtx, rtx);
+extern int pa_attr_length_millicode_call (rtx);
+extern int pa_attr_length_call (rtx, int);
+extern int pa_attr_length_indirect_call (rtx);
+extern rtx pa_legitimize_reload_address (rtx, enum machine_mode,
+					 int, int, int);
+
+/* Declare functions defined in pa.c and used in templates.  */
+
+extern rtx pa_return_addr_rtx (int, rtx);
+
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction' */
+#ifdef TREE_CODE
+extern enum direction pa_function_arg_padding (enum machine_mode, const_tree);
+#endif
+#endif /* ARGS_SIZE_RTX */
+extern int pa_insn_refs_are_delayed (rtx);
+extern rtx pa_get_deferred_plabel (rtx);
+#endif /* RTX_CODE */
+
+extern int pa_and_mask_p (unsigned HOST_WIDE_INT);
+extern int pa_cint_ok_for_move (HOST_WIDE_INT);
+extern int pa_ior_mask_p (unsigned HOST_WIDE_INT);
+extern int pa_ldil_cint_p (HOST_WIDE_INT);
+extern int pa_shadd_constant_p (int);
+extern int pa_zdepi_cint_p (unsigned HOST_WIDE_INT);
+
+extern void pa_output_ascii (FILE *, const char *, int);
+extern HOST_WIDE_INT pa_compute_frame_size (HOST_WIDE_INT, int *);
+extern void pa_expand_prologue (void);
+extern void pa_expand_epilogue (void);
+extern bool pa_can_use_return_insn (void);
+
+/* Miscellaneous functions in pa.c.  */
+#ifdef TREE_CODE
+extern int pa_reloc_needed (tree);
+extern bool pa_return_in_memory (const_tree, const_tree);
+#endif /* TREE_CODE */
+
+extern void pa_asm_output_aligned_bss (FILE *, const char *,
+				       unsigned HOST_WIDE_INT,
+				       unsigned int);
+extern void pa_asm_output_aligned_common (FILE *, const char *,
+					  unsigned HOST_WIDE_INT,
+					  unsigned int);
+extern void pa_asm_output_aligned_local (FILE *, const char *,
+					 unsigned HOST_WIDE_INT,
+					 unsigned int);
+extern void pa_hpux_asm_output_external (FILE *, tree, const char *);
+extern bool pa_cannot_change_mode_class (enum machine_mode, enum machine_mode,
+					 enum reg_class);
+extern bool pa_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern HOST_WIDE_INT pa_initial_elimination_offset (int, int);
+
+extern const int pa_magic_milli[];
diff --git a/gcc-4.9/gcc/config/pa/pa.c b/gcc-4.9/gcc/config/pa/pa.c
new file mode 100644
index 000000000..fb698d20d
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa.c
@@ -0,0 +1,10581 @@
+/* Subroutines for insn-output.c for HPPA.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "calls.h"
+#include "output.h"
+#include "dbxout.h"
+#include "except.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "recog.h"
+#include "predict.h"
+#include "tm_p.h"
+#include "target.h"
+#include "common/common-target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+#include "opts.h"
+
+/* Return nonzero if there is a bypass for the output of 
+   OUT_INSN and the fp store IN_INSN.  */
+int
+pa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
+{
+  enum machine_mode store_mode;
+  enum machine_mode other_mode;
+  rtx set;
+
+  if (recog_memoized (in_insn) < 0
+      || (get_attr_type (in_insn) != TYPE_FPSTORE
+	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
+      || recog_memoized (out_insn) < 0)
+    return 0;
+
+  store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
+
+  set = single_set (out_insn);
+  if (!set)
+    return 0;
+
+  other_mode = GET_MODE (SET_SRC (set));
+
+  return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
+}
+  
+
+#ifndef DO_FRAME_NOTES
+#ifdef INCOMING_RETURN_ADDR_RTX
+#define DO_FRAME_NOTES 1
+#else
+#define DO_FRAME_NOTES 0
+#endif
+#endif
+
+static void pa_option_override (void);
+static void copy_reg_pointer (rtx, rtx);
+static void fix_range (const char *);
+static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
+				    reg_class_t);
+static int hppa_address_cost (rtx, enum machine_mode mode, addr_space_t, bool);
+static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
+static inline rtx force_mode (enum machine_mode, rtx);
+static void pa_reorg (void);
+static void pa_combine_instructions (void);
+static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
+static bool forward_branch_p (rtx);
+static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
+static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
+static int compute_movmem_length (rtx);
+static int compute_clrmem_length (rtx);
+static bool pa_assemble_integer (rtx, unsigned int, int);
+static void remove_useless_addtr_insns (int);
+static void store_reg (int, HOST_WIDE_INT, int);
+static void store_reg_modify (int, int, HOST_WIDE_INT);
+static void load_reg (int, HOST_WIDE_INT, int);
+static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
+static rtx pa_function_value (const_tree, const_tree, bool);
+static rtx pa_libcall_value (enum machine_mode, const_rtx);
+static bool pa_function_value_regno_p (const unsigned int);
+static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void update_total_code_bytes (unsigned int);
+static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static int pa_adjust_cost (rtx, rtx, rtx, int);
+static int pa_adjust_priority (rtx, int);
+static int pa_issue_rate (void);
+static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
+static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
+static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
+     ATTRIBUTE_UNUSED;
+static void pa_encode_section_info (tree, rtx, int);
+static const char *pa_strip_name_encoding (const char *);
+static bool pa_function_ok_for_sibcall (tree, tree);
+static void pa_globalize_label (FILE *, const char *)
+     ATTRIBUTE_UNUSED;
+static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				    HOST_WIDE_INT, tree);
+#if !defined(USE_COLLECT2)
+static void pa_asm_out_constructor (rtx, int);
+static void pa_asm_out_destructor (rtx, int);
+#endif
+static void pa_init_builtins (void);
+static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
+static rtx hppa_builtin_saveregs (void);
+static void hppa_va_start (tree, rtx);
+static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool pa_scalar_mode_supported_p (enum machine_mode);
+static bool pa_commutative_p (const_rtx x, int outer_code);
+static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
+static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
+static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
+static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
+static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
+static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
+static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
+static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
+static void output_deferred_plabels (void);
+static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
+#ifdef ASM_OUTPUT_EXTERNAL_REAL
+static void pa_hpux_file_end (void);
+#endif
+static void pa_init_libfuncs (void);
+static rtx pa_struct_value_rtx (tree, int);
+static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode,
+				  const_tree, bool);
+static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+				 tree, bool);
+static void pa_function_arg_advance (cumulative_args_t, enum machine_mode,
+				     const_tree, bool);
+static rtx pa_function_arg (cumulative_args_t, enum machine_mode,
+			    const_tree, bool);
+static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
+static struct machine_function * pa_init_machine_status (void);
+static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
+					enum machine_mode,
+					secondary_reload_info *);
+static void pa_extra_live_on_entry (bitmap);
+static enum machine_mode pa_promote_function_mode (const_tree,
+						   enum machine_mode, int *,
+						   const_tree, int);
+
+static void pa_asm_trampoline_template (FILE *);
+static void pa_trampoline_init (rtx, tree, rtx);
+static rtx pa_trampoline_adjust_address (rtx);
+static rtx pa_delegitimize_address (rtx);
+static bool pa_print_operand_punct_valid_p (unsigned char);
+static rtx pa_internal_arg_pointer (void);
+static bool pa_can_eliminate (const int, const int);
+static void pa_conditional_register_usage (void);
+static enum machine_mode pa_c_mode_for_suffix (char);
+static section *pa_function_section (tree, enum node_frequency, bool, bool);
+static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
+static bool pa_legitimate_constant_p (enum machine_mode, rtx);
+static unsigned int pa_section_type_flags (tree, const char *, int);
+static bool pa_legitimate_address_p (enum machine_mode, rtx, bool);
+
+/* The following extra sections are only used for SOM.  */
+static GTY(()) section *som_readonly_data_section;
+static GTY(()) section *som_one_only_readonly_data_section;
+static GTY(()) section *som_one_only_data_section;
+static GTY(()) section *som_tm_clone_table_section;
+
+/* Counts for the number of callee-saved general and floating point
+   registers which were saved by the current function's prologue.  */
+static int gr_saved, fr_saved;
+
+/* Boolean indicating whether the return pointer was saved by the
+   current function's prologue.  */
+static bool rp_saved;
+
+static rtx find_addr_reg (rtx);
+
+/* Keep track of the number of bytes we have output in the CODE subspace
+   during this compilation so we'll know when to emit inline long-calls.  */
+unsigned long total_code_bytes;
+
+/* The last address of the previous function plus the number of bytes in
+   associated thunks that have been output.  This is used to determine if
+   a thunk can use an IA-relative branch to reach its target function.  */
+static unsigned int last_address;
+
+/* Variables to handle plabels that we discover are necessary at assembly
+   output time.  They are output after the current function.  */
+struct GTY(()) deferred_plabel
+{
+  rtx internal_label;
+  rtx symbol;
+};
+static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
+  deferred_plabels;
+static size_t n_deferred_plabels = 0;
+
+/* Initialize the GCC target structure.  */
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE pa_option_override
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER pa_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE pa_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE pa_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
+
+#undef TARGET_COMMUTATIVE_P
+#define TARGET_COMMUTATIVE_P pa_commutative_p
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef TARGET_ASM_FILE_END
+#ifdef ASM_OUTPUT_EXTERNAL_REAL
+#define TARGET_ASM_FILE_END pa_hpux_file_end
+#else
+#define TARGET_ASM_FILE_END output_deferred_plabels
+#endif
+
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
+
+#if !defined(USE_COLLECT2)
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
+#endif
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS pa_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN pa_expand_builtin
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS hppa_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hppa_address_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS pa_init_libfuncs
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG pa_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD pa_secondary_reload
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE pa_can_eliminate
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
+#undef TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION pa_function_section
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Parse the -mfixed-range= option string.  */
+
+static void
+fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  This is
+     used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
+
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+
+  /* Check if all floating point registers have been fixed.  */
+  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+    if (!fixed_regs[i])
+      break;
+
+  if (i > FP_REG_LAST)
+    target_flags |= MASK_DISABLE_FPREGS;
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook.  */
+
+static void
+pa_option_override (void)
+{
+  unsigned int i;
+  cl_deferred_option *opt;
+  vec<cl_deferred_option> *v
+    = (vec<cl_deferred_option> *) pa_deferred_options;
+
+  if (v)
+    FOR_EACH_VEC_ELT (*v, i, opt)
+      {
+	switch (opt->opt_index)
+	  {
+	  case OPT_mfixed_range_:
+	    fix_range (opt->arg);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+  /* Unconditional branches in the delay slot are not compatible with dwarf2
+     call frame information.  There is no benefit in using this optimization
+     on PA8000 and later processors.  */
+  if (pa_cpu >= PROCESSOR_8000
+      || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
+	  && flag_exceptions)
+      || flag_unwind_tables)
+    target_flags &= ~MASK_JUMP_IN_DELAY;
+
+  if (flag_pic && TARGET_PORTABLE_RUNTIME)
+    {
+      warning (0, "PIC code generation is not supported in the portable runtime model");
+    }
+
+  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
+   {
+      warning (0, "PIC code generation is not compatible with fast indirect calls");
+   }
+
+  if (! TARGET_GAS && write_symbols != NO_DEBUG)
+    {
+      warning (0, "-g is only supported when using GAS on this processor,");
+      warning (0, "-g option disabled");
+      write_symbols = NO_DEBUG;
+    }
+
+  /* We only support the "big PIC" model now.  And we always generate PIC
+     code when in 64bit mode.  */
+  if (flag_pic == 1 || TARGET_64BIT)
+    flag_pic = 2;
+
+  /* Disable -freorder-blocks-and-partition as we don't support hot and
+     cold partitioning.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      inform (input_location,
+              "-freorder-blocks-and-partition does not work "
+              "on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+  /* We can't guarantee that .dword is available for 32-bit targets.  */
+  if (UNITS_PER_WORD == 4)
+    targetm.asm_out.aligned_op.di = NULL;
+
+  /* The unaligned ops are only available when using GAS.  */
+  if (!TARGET_GAS)
+    {
+      targetm.asm_out.unaligned_op.hi = NULL;
+      targetm.asm_out.unaligned_op.si = NULL;
+      targetm.asm_out.unaligned_op.di = NULL;
+    }
+
+  init_machine_status = pa_init_machine_status;
+}
+
+enum pa_builtins
+{
+  PA_BUILTIN_COPYSIGNQ,
+  PA_BUILTIN_FABSQ,
+  PA_BUILTIN_INFQ,
+  PA_BUILTIN_HUGE_VALQ,
+  PA_BUILTIN_max
+};
+
+static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
+
+static void
+pa_init_builtins (void)
+{
+#ifdef DONT_HAVE_FPUTC_UNLOCKED
+  {
+    tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
+    set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
+		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
+  }
+#endif
+#if TARGET_HPUX_11
+  {
+    tree decl;
+
+    if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
+      set_user_assembler_name (decl, "_Isfinite");
+    if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
+      set_user_assembler_name (decl, "_Isfinitef");
+  }
+#endif
+
+  if (HPUX_LONG_DOUBLE_LIBRARY)
+    {
+      tree decl, ftype;
+
+      /* Under HPUX, the __float128 type is a synonym for "long double".  */
+      (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+						 "__float128");
+
+      /* TFmode support builtins.  */
+      ftype = build_function_type_list (long_double_type_node,
+					long_double_type_node,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_fabsq", ftype,
+				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
+				   "_U_Qfabs", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      pa_builtins[PA_BUILTIN_FABSQ] = decl;
+
+      ftype = build_function_type_list (long_double_type_node,
+					long_double_type_node,
+					long_double_type_node,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_copysignq", ftype,
+				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
+				   "_U_Qfcopysign", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
+
+      ftype = build_function_type_list (long_double_type_node, NULL_TREE);
+      decl = add_builtin_function ("__builtin_infq", ftype,
+				   PA_BUILTIN_INFQ, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      pa_builtins[PA_BUILTIN_INFQ] = decl;
+
+      decl = add_builtin_function ("__builtin_huge_valq", ftype,
+                                   PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
+                                   NULL, NULL_TREE);
+      pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
+    }
+}
+
+static rtx
+pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case PA_BUILTIN_FABSQ:
+    case PA_BUILTIN_COPYSIGNQ:
+      return expand_call (exp, target, ignore);
+
+    case PA_BUILTIN_INFQ:
+    case PA_BUILTIN_HUGE_VALQ:
+      {
+	enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
+	REAL_VALUE_TYPE inf;
+	rtx tmp;
+
+	real_inf (&inf);
+	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
+
+	tmp = validize_mem (force_const_mem (target_mode, tmp));
+
+	if (target == 0)
+	  target = gen_reg_rtx (target_mode);
+
+	emit_move_insn (target, tmp);
+	return target;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return NULL_RTX;
+}
+
+/* Function to init struct machine_function.
+   This will be called, via a pointer variable,
+   from push_function_context.  */
+
+static struct machine_function *
+pa_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* If FROM is a probable pointer register, mark TO as a probable
+   pointer register with the same pointer alignment as FROM.  */
+
+static void
+copy_reg_pointer (rtx to, rtx from)
+{
+  if (REG_POINTER (from))
+    mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
+}
+
+/* Return 1 if X contains a symbolic expression.  We know these
+   expressions will have one of a few well defined forms, so
+   we need only check those forms.  */
+int
+pa_symbolic_expression_p (rtx x)
+{
+
+  /* Strip off any HIGH.  */
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
+
+  return symbolic_operand (x, VOIDmode);
+}
+
+/* Accept any constant that can be moved in one instruction into a
+   general register.  */
+int
+pa_cint_ok_for_move (HOST_WIDE_INT ival)
+{
+  /* OK if ldo, ldil, or zdepi, can be used.  */
+  return (VAL_14_BITS_P (ival)
+	  || pa_ldil_cint_p (ival)
+	  || pa_zdepi_cint_p (ival));
+}
+
+/* True iff ldil can be used to load this CONST_INT.  The least
+   significant 11 bits of the value must be zero and the value must
+   not change sign when extended from 32 to 64 bits.  */
+int
+pa_ldil_cint_p (HOST_WIDE_INT ival)
+{
+  HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
+
+  return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
+}
+
+/* True iff zdepi can be used to generate this CONST_INT.
+   zdepi first sign extends a 5-bit signed number to a given field
+   length, then places this field anywhere in a zero.  */
+int
+pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
+{
+  unsigned HOST_WIDE_INT lsb_mask, t;
+
+  /* This might not be obvious, but it's at least fast.
+     This function is critical; we don't have the time loops would take.  */
+  lsb_mask = x & -x;
+  t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
+  /* Return true iff t is a power of two.  */
+  return ((t & (t - 1)) == 0);
+}
+
+/* True iff depi or extru can be used to compute (reg & mask).
+   Accept bit pattern like these:
+   0....01....1
+   1....10....0
+   1..10..01..1  */
+int
+pa_and_mask_p (unsigned HOST_WIDE_INT mask)
+{
+  mask = ~mask;
+  mask += mask & -mask;
+  return (mask & (mask - 1)) == 0;
+}
+
+/* True iff depi can be used to compute (reg | MASK).  */
+int
+pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
+{
+  mask += mask & -mask;
+  return (mask & (mask - 1)) == 0;
+}
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go to REG.  If we need more
+   than one register, we lose.  */
+
+static rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+{
+  rtx pic_ref = orig;
+
+  gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
+
+  /* Labels need special handling.  */
+  if (pic_label_operand (orig, mode))
+    {
+      rtx insn;
+
+      /* We do not want to go through the movXX expanders here since that
+	 would create recursion.
+
+	 Nor do we really want to call a generator for a named pattern
+	 since that requires multiple patterns if we want to support
+	 multiple word sizes.
+
+	 So instead we just emit the raw set, which avoids the movXX
+	 expanders completely.  */
+      mark_reg_pointer (reg, BITS_PER_UNIT);
+      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
+      add_reg_note (insn, REG_EQUAL, orig);
+
+      /* During and after reload, we need to generate a REG_LABEL_OPERAND note
+	 and update LABEL_NUSES because this is not done automatically.  */
+      if (reload_in_progress || reload_completed)
+	{
+	  /* Extract LABEL_REF.  */
+	  if (GET_CODE (orig) == CONST)
+	    orig = XEXP (XEXP (orig, 0), 0);
+	  /* Extract CODE_LABEL.  */
+	  orig = XEXP (orig, 0);
+	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
+	  /* Make sure we have label and not a note.  */
+	  if (LABEL_P (orig))
+	    LABEL_NUSES (orig)++;
+	}
+      crtl->uses_pic_offset_table = 1;
+      return reg;
+    }
+  if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      rtx insn, tmp_reg;
+
+      gcc_assert (reg);
+
+      /* Before reload, allocate a temporary register for the intermediate
+	 result.  This allows the sequence to be deleted when the final
+	 result is unused and the insns are trivially dead.  */
+      tmp_reg = ((reload_in_progress || reload_completed)
+		 ? reg : gen_reg_rtx (Pmode));
+
+      if (function_label_operand (orig, VOIDmode))
+	{
+	  /* Force function label into memory in word mode.  */
+	  orig = XEXP (force_const_mem (word_mode, orig), 0);
+	  /* Load plabel address from DLT.  */
+	  emit_move_insn (tmp_reg,
+			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
+					gen_rtx_HIGH (word_mode, orig)));
+	  pic_ref
+	    = gen_const_mem (Pmode,
+			     gen_rtx_LO_SUM (Pmode, tmp_reg,
+					     gen_rtx_UNSPEC (Pmode,
+						         gen_rtvec (1, orig),
+						         UNSPEC_DLTIND14R)));
+	  emit_move_insn (reg, pic_ref);
+	  /* Now load address of function descriptor.  */
+	  pic_ref = gen_rtx_MEM (Pmode, reg);
+	}
+      else
+	{
+	  /* Load symbol reference from DLT.  */
+	  emit_move_insn (tmp_reg,
+			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
+					gen_rtx_HIGH (word_mode, orig)));
+	  pic_ref
+	    = gen_const_mem (Pmode,
+			     gen_rtx_LO_SUM (Pmode, tmp_reg,
+					     gen_rtx_UNSPEC (Pmode,
+						         gen_rtvec (1, orig),
+						         UNSPEC_DLTIND14R)));
+	}
+
+      crtl->uses_pic_offset_table = 1;
+      mark_reg_pointer (reg, BITS_PER_UNIT);
+      insn = emit_move_insn (reg, pic_ref);
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
+      set_unique_reg_note (insn, REG_EQUAL, orig);
+
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      gcc_assert (reg);
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				     base == reg ? 0 : reg);
+
+      if (GET_CODE (orig) == CONST_INT)
+	{
+	  if (INT_14_BITS (orig))
+	    return plus_constant (Pmode, base, INTVAL (orig));
+	  orig = force_reg (Pmode, orig);
+	}
+      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+      /* Likewise, should we set special REG_NOTEs here?  */
+    }
+
+  return pic_ref;
+}
+
+static GTY(()) rtx gen_tls_tga;
+
+static rtx
+gen_tls_get_addr (void)
+{
+  if (!gen_tls_tga)
+    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
+  return gen_tls_tga;
+}
+
+static rtx
+hppa_tls_call (rtx arg)
+{
+  rtx ret;
+
+  ret = gen_reg_rtx (Pmode);
+  emit_library_call_value (gen_tls_get_addr (), ret,
+		  	   LCT_CONST, Pmode, 1, arg, Pmode);
+
+  return ret;
+}
+
+static rtx
+legitimize_tls_address (rtx addr)
+{
+  rtx ret, insn, tmp, t1, t2, tp;
+
+  /* Currently, we can't handle anything but a SYMBOL_REF.  */
+  if (GET_CODE (addr) != SYMBOL_REF)
+    return addr;
+
+  switch (SYMBOL_REF_TLS_MODEL (addr)) 
+    {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	tmp = gen_reg_rtx (Pmode);
+	if (flag_pic)
+	  emit_insn (gen_tgd_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tgd_load (tmp, addr));
+	ret = hppa_tls_call (tmp);
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	ret = gen_reg_rtx (Pmode);
+	tmp = gen_reg_rtx (Pmode);
+	start_sequence ();
+	if (flag_pic)
+	  emit_insn (gen_tld_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tld_load (tmp, addr));
+	t1 = hppa_tls_call (tmp);
+	insn = get_insns ();
+	end_sequence ();
+	t2 = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, t2, t1, 
+			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				            UNSPEC_TLSLDBASE));
+	emit_insn (gen_tld_offset_load (ret, addr, t2));
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	tp = gen_reg_rtx (Pmode);
+	tmp = gen_reg_rtx (Pmode);
+	ret = gen_reg_rtx (Pmode);
+	emit_insn (gen_tp_load (tp));
+	if (flag_pic)
+	  emit_insn (gen_tie_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tie_load (tmp, addr));
+	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	tp = gen_reg_rtx (Pmode);
+	ret = gen_reg_rtx (Pmode);
+	emit_insn (gen_tp_load (tp));
+	emit_insn (gen_tle_load (ret, addr, tp));
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  return ret;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   It is always safe for this macro to do nothing.  It exists to recognize
+   opportunities to optimize the output.
+
+   For the PA, transform:
+
+	memory(X + <large int>)
+
+   into:
+
+	if (<large int> & mask) >= 16
+	  Y = (<large int> & ~mask) + mask + 1	Round up.
+	else
+	  Y = (<large int> & ~mask)		Round down.
+	Z = X + Y
+	memory (Z + (<large int> - Y));
+
+   This is for CSE to find several similar references, and only use one Z.
+
+   X can either be a SYMBOL_REF or REG, but because combine cannot
+   perform a 4->2 combination we do nothing for SYMBOL_REF + D where
+   D will not fit in 14 bits.
+
+   MODE_FLOAT references allow displacements which fit in 5 bits, so use
+   0x1f as the mask.
+
+   MODE_INT references allow displacements which fit in 14 bits, so use
+   0x3fff as the mask.
+
+   This relies on the fact that most mode MODE_FLOAT references will use FP
+   registers and most mode MODE_INT references will use integer registers.
+   (In the rare case of an FP register used in an integer MODE, we depend
+   on secondary reloads to clean things up.)
+
+
+   It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
+   manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
+   addressing modes to be used).
+
+   Put X and Z into registers.  Then put the entire expression into
+   a register.  */
+
+rtx
+hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  rtx orig = x;
+
+  /* We need to canonicalize the order of operands in unscaled indexed
+     addresses since the code that checks if an address is valid doesn't
+     always try both orders.  */
+  if (!TARGET_NO_SPACE_REGS
+      && GET_CODE (x) == PLUS
+      && GET_MODE (x) == Pmode
+      && REG_P (XEXP (x, 0))
+      && REG_P (XEXP (x, 1))
+      && REG_POINTER (XEXP (x, 0))
+      && !REG_POINTER (XEXP (x, 1)))
+    return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
+
+  if (pa_tls_referenced_p (x))
+    return legitimize_tls_address (x);
+  else if (flag_pic)
+    return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
+
+  /* Strip off CONST.  */
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+
+  /* Special case.  Get the SYMBOL_REF into a register and use indexing.
+     That should always be safe.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+    {
+      rtx reg = force_reg (Pmode, XEXP (x, 1));
+      return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
+    }
+
+  /* Note we must reject symbols which represent function addresses
+     since the assembler/linker can't handle arithmetic on plabels.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
+	  || GET_CODE (XEXP (x, 0)) == REG))
+    {
+      rtx int_part, ptr_reg;
+      int newoffset;
+      int offset = INTVAL (XEXP (x, 1));
+      int mask;
+
+      mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
+
+      /* Choose which way to round the offset.  Round up if we
+	 are >= halfway to the next boundary.  */
+      if ((offset & mask) >= ((mask + 1) / 2))
+	newoffset = (offset & ~ mask) + mask + 1;
+      else
+	newoffset = (offset & ~ mask);
+
+      /* If the newoffset will not fit in 14 bits (ldo), then
+	 handling this would take 4 or 5 instructions (2 to load
+	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
+	 add the new offset and the SYMBOL_REF.)  Combine can
+	 not handle 4->2 or 5->2 combinations, so do not create
+	 them.  */
+      if (! VAL_14_BITS_P (newoffset)
+	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
+	{
+	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
+	  rtx tmp_reg
+	    = force_reg (Pmode,
+			 gen_rtx_HIGH (Pmode, const_part));
+	  ptr_reg
+	    = force_reg (Pmode,
+			 gen_rtx_LO_SUM (Pmode,
+					 tmp_reg, const_part));
+	}
+      else
+	{
+	  if (! VAL_14_BITS_P (newoffset))
+	    int_part = force_reg (Pmode, GEN_INT (newoffset));
+	  else
+	    int_part = GEN_INT (newoffset);
+
+	  ptr_reg = force_reg (Pmode,
+			       gen_rtx_PLUS (Pmode,
+					     force_reg (Pmode, XEXP (x, 0)),
+					     int_part));
+	}
+      return plus_constant (Pmode, ptr_reg, offset - newoffset);
+    }
+
+  /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
+
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
+      && (OBJECT_P (XEXP (x, 1))
+	  || GET_CODE (XEXP (x, 1)) == SUBREG)
+      && GET_CODE (XEXP (x, 1)) != CONST)
+    {
+      int val = INTVAL (XEXP (XEXP (x, 0), 1));
+      rtx reg1, reg2;
+
+      reg1 = XEXP (x, 1);
+      if (GET_CODE (reg1) != REG)
+	reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+      reg2 = XEXP (XEXP (x, 0), 0);
+      if (GET_CODE (reg2) != REG)
+        reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+      return force_reg (Pmode, gen_rtx_PLUS (Pmode,
+					     gen_rtx_MULT (Pmode,
+							   reg2,
+							   GEN_INT (val)),
+					     reg1));
+    }
+
+  /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
+
+     Only do so for floating point modes since this is more speculative
+     and we lose if it's an integer store.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
+      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
+      && (mode == SFmode || mode == DFmode))
+    {
+
+      /* First, try and figure out what to use as a base register.  */
+      rtx reg1, reg2, base, idx;
+
+      reg1 = XEXP (XEXP (x, 0), 1);
+      reg2 = XEXP (x, 1);
+      base = NULL_RTX;
+      idx = NULL_RTX;
+
+      /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
+	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
+	 it's a base register below.  */
+      if (GET_CODE (reg1) != REG)
+	reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+      if (GET_CODE (reg2) != REG)
+	reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+      /* Figure out what the base and index are.  */
+
+      if (GET_CODE (reg1) == REG
+	  && REG_POINTER (reg1))
+	{
+	  base = reg1;
+	  idx = gen_rtx_PLUS (Pmode,
+			      gen_rtx_MULT (Pmode,
+					    XEXP (XEXP (XEXP (x, 0), 0), 0),
+					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
+			      XEXP (x, 1));
+	}
+      else if (GET_CODE (reg2) == REG
+	       && REG_POINTER (reg2))
+	{
+	  base = reg2;
+	  idx = XEXP (x, 0);
+	}
+
+      if (base == 0)
+	return orig;
+
+      /* If the index adds a large constant, try to scale the
+	 constant so that it can be loaded with only one insn.  */
+      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
+	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
+			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
+	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
+	{
+	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
+	  int val = INTVAL (XEXP (idx, 1));
+
+	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
+	  reg1 = XEXP (XEXP (idx, 0), 0);
+	  if (GET_CODE (reg1) != REG)
+	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
+
+	  /* We can now generate a simple scaled indexed address.  */
+	  return
+	    force_reg
+	      (Pmode, gen_rtx_PLUS (Pmode,
+				    gen_rtx_MULT (Pmode, reg1,
+						  XEXP (XEXP (idx, 0), 1)),
+				    base));
+	}
+
+      /* If B + C is still a valid base register, then add them.  */
+      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
+	  && INTVAL (XEXP (idx, 1)) <= 4096
+	  && INTVAL (XEXP (idx, 1)) >= -4096)
+	{
+	  int val = INTVAL (XEXP (XEXP (idx, 0), 1));
+	  rtx reg1, reg2;
+
+	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
+
+	  reg2 = XEXP (XEXP (idx, 0), 0);
+	  if (GET_CODE (reg2) != CONST_INT)
+	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
+						 gen_rtx_MULT (Pmode,
+							       reg2,
+							       GEN_INT (val)),
+						 reg1));
+	}
+
+      /* Get the index into a register, then add the base + index and
+	 return a register holding the result.  */
+
+      /* First get A into a register.  */
+      reg1 = XEXP (XEXP (idx, 0), 0);
+      if (GET_CODE (reg1) != REG)
+	reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+      /* And get B into a register.  */
+      reg2 = XEXP (idx, 1);
+      if (GET_CODE (reg2) != REG)
+	reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+      reg1 = force_reg (Pmode,
+			gen_rtx_PLUS (Pmode,
+				      gen_rtx_MULT (Pmode, reg1,
+						    XEXP (XEXP (idx, 0), 1)),
+				      reg2));
+
+      /* Add the result to our base register and return.  */
+      return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
+
+    }
+
+  /* Uh-oh.  We might have an address for x[n-100000].  This needs
+     special handling to avoid creating an indexed memory address
+     with x-100000 as the base.
+
+     If the constant part is small enough, then it's still safe because
+     there is a guard page at the beginning and end of the data segment.
+
+     Scaled references are common enough that we want to try and rearrange the
+     terms so that we can use indexing for these addresses too.  Only
+     do the optimization for floatint point modes.  */
+
+  if (GET_CODE (x) == PLUS
+      && pa_symbolic_expression_p (XEXP (x, 1)))
+    {
+      /* Ugly.  We modify things here so that the address offset specified
+	 by the index expression is computed first, then added to x to form
+	 the entire address.  */
+
+      rtx regx1, regx2, regy1, regy2, y;
+
+      /* Strip off any CONST.  */
+      y = XEXP (x, 1);
+      if (GET_CODE (y) == CONST)
+	y = XEXP (y, 0);
+
+      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
+	{
+	  /* See if this looks like
+		(plus (mult (reg) (shadd_const))
+		      (const (plus (symbol_ref) (const_int))))
+
+	     Where const_int is small.  In that case the const
+	     expression is a valid pointer for indexing.
+
+	     If const_int is big, but can be divided evenly by shadd_const
+	     and added to (reg).  This allows more scaled indexed addresses.  */
+	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
+	      && GET_CODE (XEXP (x, 0)) == MULT
+	      && GET_CODE (XEXP (y, 1)) == CONST_INT
+	      && INTVAL (XEXP (y, 1)) >= -4096
+	      && INTVAL (XEXP (y, 1)) <= 4095
+	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
+	    {
+	      int val = INTVAL (XEXP (XEXP (x, 0), 1));
+	      rtx reg1, reg2;
+
+	      reg1 = XEXP (x, 1);
+	      if (GET_CODE (reg1) != REG)
+		reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+	      reg2 = XEXP (XEXP (x, 0), 0);
+	      if (GET_CODE (reg2) != REG)
+	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+	      return force_reg (Pmode,
+				gen_rtx_PLUS (Pmode,
+					      gen_rtx_MULT (Pmode,
+							    reg2,
+							    GEN_INT (val)),
+					      reg1));
+	    }
+	  else if ((mode == DFmode || mode == SFmode)
+		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
+		   && GET_CODE (XEXP (x, 0)) == MULT
+		   && GET_CODE (XEXP (y, 1)) == CONST_INT
+		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+		   && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
+	    {
+	      regx1
+		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
+					     / INTVAL (XEXP (XEXP (x, 0), 1))));
+	      regx2 = XEXP (XEXP (x, 0), 0);
+	      if (GET_CODE (regx2) != REG)
+		regx2 = force_reg (Pmode, force_operand (regx2, 0));
+	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
+							regx2, regx1));
+	      return
+		force_reg (Pmode,
+			   gen_rtx_PLUS (Pmode,
+					 gen_rtx_MULT (Pmode, regx2,
+						       XEXP (XEXP (x, 0), 1)),
+					 force_reg (Pmode, XEXP (y, 0))));
+	    }
+	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
+		   && INTVAL (XEXP (y, 1)) >= -4096
+		   && INTVAL (XEXP (y, 1)) <= 4095)
+	    {
+	      /* This is safe because of the guard page at the
+		 beginning and end of the data space.  Just
+		 return the original address.  */
+	      return orig;
+	    }
+	  else
+	    {
+	      /* Doesn't look like one we can optimize.  */
+	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
+	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
+	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
+	      regx1 = force_reg (Pmode,
+				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
+						 regx1, regy2));
+	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
+	    }
+	}
+    }
+
+  return orig;
+}
+
+/* Implement the TARGET_REGISTER_MOVE_COST hook.
+
+   Compute extra cost of moving data between one register class
+   and another.
+
+   Make moves from SAR so expensive they should never happen.  We used to
+   have 0xffff here, but that generates overflow in rare cases.
+
+   Copies involving a FP register and a non-FP register are relatively
+   expensive because they must go through memory.
+
+   Other copies are reasonably cheap.  */
+
+static int
+hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from, reg_class_t to)
+{
+  if (from == SHIFT_REGS)
+    return 0x100;
+  else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
+    return 18;
+  else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
+           || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
+    return 16;
+  else
+    return 2;
+}
+
+/* For the HPPA, REG and REG+CONST is cost 0
+   and addresses involving symbolic constants are cost 2.
+
+   PIC addresses are very expensive.
+
+   It is no coincidence that this has the same structure
+   as pa_legitimate_address_p.  */
+
+static int
+hppa_address_cost (rtx X, enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (X))
+    {
+    case REG:
+    case PLUS:
+    case LO_SUM:
+      return 1;
+    case HIGH:
+      return 2;
+    default:
+      return 4;
+    }
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  int factor;
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) == 0)
+	*total = 0;
+      else if (INT_14_BITS (x))
+	*total = 1;
+      else
+	*total = 2;
+      return true;
+
+    case HIGH:
+      *total = 2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 4;
+      return true;
+
+    case CONST_DOUBLE:
+      if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
+	  && outer_code != SET)
+	*total = 0;
+      else
+        *total = 8;
+      return true;
+
+    case MULT:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+	}
+
+      /* A mode size N times larger than SImode needs O(N*N) more insns.  */
+      factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
+      if (factor == 0)
+	factor = 1;
+
+      if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
+	*total = factor * factor * COSTS_N_INSNS (8);
+      else
+	*total = factor * factor * COSTS_N_INSNS (20);
+      return true;
+
+    case DIV:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	{
+	  *total = COSTS_N_INSNS (14);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case UDIV:
+    case MOD:
+    case UMOD:
+      /* A mode size N times larger than SImode needs O(N*N) more insns.  */
+      factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
+      if (factor == 0)
+	factor = 1;
+
+      *total = factor * factor * COSTS_N_INSNS (60);
+      return true;
+
+    case PLUS: /* this includes shNadd insns */
+    case MINUS:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+	}
+
+      /* A size N times larger than UNITS_PER_WORD needs N times as
+	 many insns, taking N times as long.  */
+      factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
+      if (factor == 0)
+	factor = 1;
+      *total = factor * COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
+   new rtx with the correct mode.  */
+static inline rtx
+force_mode (enum machine_mode mode, rtx orig)
+{
+  if (mode == GET_MODE (orig))
+    return orig;
+
+  gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
+
+  return gen_rtx_REG (mode, REGNO (orig));
+}
+
+/* Return 1 if *X is a thread-local symbol.  */
+
+static int
+pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return PA_SYMBOL_REF_TLS_P (*x);
+}
+
+/* Return 1 if X contains a thread-local symbol.  */
+
+bool
+pa_tls_referenced_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return pa_tls_referenced_p (x);
+}
+
+/* Emit insns to move operands[1] into operands[0].
+
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.
+
+   Note SCRATCH_REG may not be in the proper mode depending on how it
+   will be used.  This routine is responsible for creating a new copy
+   of SCRATCH_REG in the proper mode.  */
+
+int
+pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
+{
+  register rtx operand0 = operands[0];
+  register rtx operand1 = operands[1];
+  register rtx tem;
+
+  /* We can only handle indexed addresses in the destination operand
+     of floating point stores.  Thus, we need to break out indexed
+     addresses from the destination operand.  */
+  if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
+    {
+      gcc_assert (can_create_pseudo_p ());
+
+      tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
+      operand0 = replace_equiv_address (operand0, tem);
+    }
+
+  /* On targets with non-equivalent space registers, break out unscaled
+     indexed addresses from the source operand before the final CSE.
+     We have to do this because the REG_POINTER flag is not correctly
+     carried through various optimization passes and CSE may substitute
+     a pseudo without the pointer set for one with the pointer set.  As
+     a result, we loose various opportunities to create insns with
+     unscaled indexed addresses.  */
+  if (!TARGET_NO_SPACE_REGS
+      && !cse_not_expected
+      && GET_CODE (operand1) == MEM
+      && GET_CODE (XEXP (operand1, 0)) == PLUS
+      && REG_P (XEXP (XEXP (operand1, 0), 0))
+      && REG_P (XEXP (XEXP (operand1, 0), 1)))
+    operand1
+      = replace_equiv_address (operand1,
+			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand0) == REG
+      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+    operand0 = reg_equiv_mem (REGNO (operand0));
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand0) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand0)) == REG
+	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
+				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
+				 SUBREG_BYTE (operand0));
+      operand0 = alter_subreg (&temp, true);
+    }
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand1) == REG
+      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
+    operand1 = reg_equiv_mem (REGNO (operand1));
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand1) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand1)) == REG
+	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
+				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
+				 SUBREG_BYTE (operand1));
+      operand1 = alter_subreg (&temp, true);
+    }
+
+  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+      && ((tem = find_replacement (&XEXP (operand0, 0)))
+	  != XEXP (operand0, 0)))
+    operand0 = replace_equiv_address (operand0, tem);
+
+  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+      && ((tem = find_replacement (&XEXP (operand1, 0)))
+	  != XEXP (operand1, 0)))
+    operand1 = replace_equiv_address (operand1, tem);
+
+  /* Handle secondary reloads for loads/stores of FP registers from
+     REG+D addresses where D does not fit in 5 or 14 bits, including
+     (subreg (mem (addr))) cases.  */
+  if (scratch_reg
+      && fp_reg_operand (operand0, mode)
+      && (MEM_P (operand1)
+	  || (GET_CODE (operand1) == SUBREG
+	      && MEM_P (XEXP (operand1, 0))))
+      && !floating_point_store_memory_operand (operand1, mode))
+    {
+      if (GET_CODE (operand1) == SUBREG)
+	operand1 = XEXP (operand1, 0);
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in WORD_MODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (word_mode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (reg_plus_base_memory_operand (operand1, mode)
+	  && !(TARGET_PA_20
+	       && !TARGET_ELF32
+	       && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg,
+			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
+					  Pmode,
+					  XEXP (XEXP (operand1, 0), 0),
+					  scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand1, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, operand0,
+			      replace_equiv_address (operand1, scratch_reg)));
+      return 1;
+    }
+  else if (scratch_reg
+	   && fp_reg_operand (operand1, mode)
+	   && (MEM_P (operand0)
+	       || (GET_CODE (operand0) == SUBREG
+		   && MEM_P (XEXP (operand0, 0))))
+	   && !floating_point_store_memory_operand (operand0, mode))
+    {
+      if (GET_CODE (operand0) == SUBREG)
+	operand0 = XEXP (operand0, 0);
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in WORD_MODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (word_mode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (reg_plus_base_memory_operand (operand0, mode)
+	  && !(TARGET_PA_20
+	       && !TARGET_ELF32
+	       && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
+								        0)),
+						       Pmode,
+						       XEXP (XEXP (operand0, 0),
+								   0),
+						       scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand0, 0));
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      replace_equiv_address (operand0, scratch_reg),
+			      operand1));
+      return 1;
+    }
+  /* Handle secondary reloads for loads of FP registers from constant
+     expressions by forcing the constant into memory.  For the most part,
+     this is only necessary for SImode and DImode.
+
+     Use scratch_reg to hold the address of the memory location.  */
+  else if (scratch_reg
+	   && CONSTANT_P (operand1)
+	   && fp_reg_operand (operand0, mode))
+    {
+      rtx const_mem, xoperands[2];
+
+      if (operand1 == CONST0_RTX (mode))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+	  return 1;
+	}
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in WORD_MODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (word_mode, scratch_reg);
+
+      /* Force the constant into memory and put the address of the
+	 memory location into scratch_reg.  */
+      const_mem = force_const_mem (mode, operand1);
+      xoperands[0] = scratch_reg;
+      xoperands[1] = XEXP (const_mem, 0);
+      pa_emit_move_sequence (xoperands, Pmode, 0);
+
+      /* Now load the destination register.  */
+      emit_insn (gen_rtx_SET (mode, operand0,
+			      replace_equiv_address (const_mem, scratch_reg)));
+      return 1;
+    }
+  /* Handle secondary reloads for SAR.  These occur when trying to load
+     the SAR from memory or a constant.  */
+  else if (scratch_reg
+	   && GET_CODE (operand0) == REG
+	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
+	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
+	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
+    {
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (GET_CODE (operand1) == MEM
+	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
+	{
+	  /* We are reloading the address into the scratch register, so we
+	     want to make sure the scratch register is a full register.  */
+	  scratch_reg = force_mode (word_mode, scratch_reg);
+
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
+								        0)),
+						       Pmode,
+						       XEXP (XEXP (operand1, 0),
+						       0),
+						       scratch_reg));
+
+	  /* Now we are going to load the scratch register from memory,
+	     we want to load it in the same width as the original MEM,
+	     which must be the same as the width of the ultimate destination,
+	     OPERAND0.  */
+	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
+
+	  emit_move_insn (scratch_reg,
+			  replace_equiv_address (operand1, scratch_reg));
+	}
+      else
+	{
+	  /* We want to load the scratch register using the same mode as
+	     the ultimate destination.  */
+	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
+
+	  emit_move_insn (scratch_reg, operand1);
+	}
+
+      /* And emit the insn to set the ultimate destination.  We know that
+	 the scratch register has the same mode as the destination at this
+	 point.  */
+      emit_move_insn (operand0, scratch_reg);
+      return 1;
+    }
+  /* Handle the most common case: storing into a register.  */
+  else if (register_operand (operand0, mode))
+    {
+      /* Legitimize TLS symbol references.  This happens for references
+	 that aren't a legitimate constant.  */
+      if (PA_SYMBOL_REF_TLS_P (operand1))
+	operand1 = legitimize_tls_address (operand1);
+
+      if (register_operand (operand1, mode)
+	  || (GET_CODE (operand1) == CONST_INT
+	      && pa_cint_ok_for_move (INTVAL (operand1)))
+	  || (operand1 == CONST0_RTX (mode))
+	  || (GET_CODE (operand1) == HIGH
+	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
+	  /* Only `general_operands' can come here, so MEM is ok.  */
+	  || GET_CODE (operand1) == MEM)
+	{
+	  /* Various sets are created during RTL generation which don't
+	     have the REG_POINTER flag correctly set.  After the CSE pass,
+	     instruction recognition can fail if we don't consistently
+	     set this flag when performing register copies.  This should
+	     also improve the opportunities for creating insns that use
+	     unscaled indexing.  */
+	  if (REG_P (operand0) && REG_P (operand1))
+	    {
+	      if (REG_POINTER (operand1)
+		  && !REG_POINTER (operand0)
+		  && !HARD_REGISTER_P (operand0))
+		copy_reg_pointer (operand0, operand1);
+	    }
+	  
+	  /* When MEMs are broken out, the REG_POINTER flag doesn't
+	     get set.  In some cases, we can set the REG_POINTER flag
+	     from the declaration for the MEM.  */
+	  if (REG_P (operand0)
+	      && GET_CODE (operand1) == MEM
+	      && !REG_POINTER (operand0))
+	    {
+	      tree decl = MEM_EXPR (operand1);
+
+	      /* Set the register pointer flag and register alignment
+		 if the declaration for this memory reference is a
+		 pointer type.  */
+	      if (decl)
+		{
+		  tree type;
+
+		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
+		     tree operand 1.  */
+		  if (TREE_CODE (decl) == COMPONENT_REF)
+		    decl = TREE_OPERAND (decl, 1);
+
+		  type = TREE_TYPE (decl);
+		  type = strip_array_types (type);
+
+		  if (POINTER_TYPE_P (type))
+		    {
+		      int align;
+
+		      type = TREE_TYPE (type);
+		      /* Using TYPE_ALIGN_OK is rather conservative as
+			 only the ada frontend actually sets it.  */
+		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
+			       : BITS_PER_UNIT);
+		      mark_reg_pointer (operand0, align);
+		    }
+		}
+	    }
+
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+	  return 1;
+	}
+    }
+  else if (GET_CODE (operand0) == MEM)
+    {
+      if (mode == DFmode && operand1 == CONST0_RTX (mode)
+	  && !(reload_in_progress || reload_completed))
+	{
+	  rtx temp = gen_reg_rtx (DFmode);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
+	  return 1;
+	}
+      if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
+	{
+	  /* Run this case quickly.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+	  return 1;
+	}
+      if (! (reload_in_progress || reload_completed))
+	{
+	  operands[0] = validize_mem (operand0);
+	  operands[1] = operand1 = force_reg (mode, operand1);
+	}
+    }
+
+  /* Simplify the source if we need to.
+     Note we do have to handle function labels here, even though we do
+     not consider them legitimate constants.  Loop optimizations can
+     call the emit_move_xxx with one as a source.  */
+  if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
+      || (GET_CODE (operand1) == HIGH
+	  && symbolic_operand (XEXP (operand1, 0), mode))
+      || function_label_operand (operand1, VOIDmode)
+      || pa_tls_referenced_p (operand1))
+    {
+      int ishighonly = 0;
+
+      if (GET_CODE (operand1) == HIGH)
+	{
+	  ishighonly = 1;
+	  operand1 = XEXP (operand1, 0);
+	}
+      if (symbolic_operand (operand1, mode))
+	{
+	  /* Argh.  The assembler and linker can't handle arithmetic
+	     involving plabels.
+
+	     So we force the plabel into memory, load operand0 from
+	     the memory location, then add in the constant part.  */
+	  if ((GET_CODE (operand1) == CONST
+	       && GET_CODE (XEXP (operand1, 0)) == PLUS
+	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
+					  VOIDmode))
+	      || function_label_operand (operand1, VOIDmode))
+	    {
+	      rtx temp, const_part;
+
+	      /* Figure out what (if any) scratch register to use.  */
+	      if (reload_in_progress || reload_completed)
+		{
+		  scratch_reg = scratch_reg ? scratch_reg : operand0;
+		  /* SCRATCH_REG will hold an address and maybe the actual
+		     data.  We want it in WORD_MODE regardless of what mode it
+		     was originally given to us.  */
+		  scratch_reg = force_mode (word_mode, scratch_reg);
+		}
+	      else if (flag_pic)
+		scratch_reg = gen_reg_rtx (Pmode);
+
+	      if (GET_CODE (operand1) == CONST)
+		{
+		  /* Save away the constant part of the expression.  */
+		  const_part = XEXP (XEXP (operand1, 0), 1);
+		  gcc_assert (GET_CODE (const_part) == CONST_INT);
+
+		  /* Force the function label into memory.  */
+		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
+		}
+	      else
+		{
+		  /* No constant part.  */
+		  const_part = NULL_RTX;
+
+		  /* Force the function label into memory.  */
+		  temp = force_const_mem (mode, operand1);
+		}
+
+
+	      /* Get the address of the memory location.  PIC-ify it if
+		 necessary.  */
+	      temp = XEXP (temp, 0);
+	      if (flag_pic)
+		temp = legitimize_pic_address (temp, mode, scratch_reg);
+
+	      /* Put the address of the memory location into our destination
+		 register.  */
+	      operands[1] = temp;
+	      pa_emit_move_sequence (operands, mode, scratch_reg);
+
+	      /* Now load from the memory location into our destination
+		 register.  */
+	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
+	      pa_emit_move_sequence (operands, mode, scratch_reg);
+
+	      /* And add back in the constant part.  */
+	      if (const_part != NULL_RTX)
+		expand_inc (operand0, const_part);
+
+	      return 1;
+	    }
+
+	  if (flag_pic)
+	    {
+	      rtx temp;
+
+	      if (reload_in_progress || reload_completed)
+		{
+		  temp = scratch_reg ? scratch_reg : operand0;
+		  /* TEMP will hold an address and maybe the actual
+		     data.  We want it in WORD_MODE regardless of what mode it
+		     was originally given to us.  */
+		  temp = force_mode (word_mode, temp);
+		}
+	      else
+		temp = gen_reg_rtx (Pmode);
+
+	      /* (const (plus (symbol) (const_int))) must be forced to
+		 memory during/after reload if the const_int will not fit
+		 in 14 bits.  */
+	      if (GET_CODE (operand1) == CONST
+		       && GET_CODE (XEXP (operand1, 0)) == PLUS
+		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
+		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
+		       && (reload_completed || reload_in_progress)
+		       && flag_pic)
+		{
+		  rtx const_mem = force_const_mem (mode, operand1);
+		  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
+							mode, temp);
+		  operands[1] = replace_equiv_address (const_mem, operands[1]);
+		  pa_emit_move_sequence (operands, mode, temp);
+		}
+	      else
+		{
+		  operands[1] = legitimize_pic_address (operand1, mode, temp);
+		  if (REG_P (operand0) && REG_P (operands[1]))
+		    copy_reg_pointer (operand0, operands[1]);
+		  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
+		}
+	    }
+	  /* On the HPPA, references to data space are supposed to use dp,
+	     register 27, but showing it in the RTL inhibits various cse
+	     and loop optimizations.  */
+	  else
+	    {
+	      rtx temp, set;
+
+	      if (reload_in_progress || reload_completed)
+		{
+		  temp = scratch_reg ? scratch_reg : operand0;
+		  /* TEMP will hold an address and maybe the actual
+		     data.  We want it in WORD_MODE regardless of what mode it
+		     was originally given to us.  */
+		  temp = force_mode (word_mode, temp);
+		}
+	      else
+		temp = gen_reg_rtx (mode);
+
+	      /* Loading a SYMBOL_REF into a register makes that register
+		 safe to be used as the base in an indexed address.
+
+		 Don't mark hard registers though.  That loses.  */
+	      if (GET_CODE (operand0) == REG
+		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+		mark_reg_pointer (operand0, BITS_PER_UNIT);
+	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
+		mark_reg_pointer (temp, BITS_PER_UNIT);
+
+	      if (ishighonly)
+		set = gen_rtx_SET (mode, operand0, temp);
+	      else
+		set = gen_rtx_SET (VOIDmode,
+				   operand0,
+				   gen_rtx_LO_SUM (mode, temp, operand1));
+
+	      emit_insn (gen_rtx_SET (VOIDmode,
+				      temp,
+				      gen_rtx_HIGH (mode, operand1)));
+	      emit_insn (set);
+
+	    }
+	  return 1;
+	}
+      else if (pa_tls_referenced_p (operand1))
+	{
+	  rtx tmp = operand1;
+	  rtx addend = NULL;
+
+	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+	    {
+	      addend = XEXP (XEXP (tmp, 0), 1);
+	      tmp = XEXP (XEXP (tmp, 0), 0);
+	    }
+
+	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+	  tmp = legitimize_tls_address (tmp);
+	  if (addend)
+	    {
+	      tmp = gen_rtx_PLUS (mode, tmp, addend);
+	      tmp = force_operand (tmp, operands[0]);
+	    }
+	  operands[1] = tmp;
+	}
+      else if (GET_CODE (operand1) != CONST_INT
+	       || !pa_cint_ok_for_move (INTVAL (operand1)))
+	{
+	  rtx insn, temp;
+	  rtx op1 = operand1;
+	  HOST_WIDE_INT value = 0;
+	  HOST_WIDE_INT insv = 0;
+	  int insert = 0;
+
+	  if (GET_CODE (operand1) == CONST_INT)
+	    value = INTVAL (operand1);
+
+	  if (TARGET_64BIT
+	      && GET_CODE (operand1) == CONST_INT
+	      && HOST_BITS_PER_WIDE_INT > 32
+	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
+	    {
+	      HOST_WIDE_INT nval;
+
+	      /* Extract the low order 32 bits of the value and sign extend.
+		 If the new value is the same as the original value, we can
+		 can use the original value as-is.  If the new value is
+		 different, we use it and insert the most-significant 32-bits
+		 of the original value into the final result.  */
+	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
+		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
+	      if (value != nval)
+		{
+#if HOST_BITS_PER_WIDE_INT > 32
+		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
+#endif
+		  insert = 1;
+		  value = nval;
+		  operand1 = GEN_INT (nval);
+		}
+	    }
+
+	  if (reload_in_progress || reload_completed)
+	    temp = scratch_reg ? scratch_reg : operand0;
+	  else
+	    temp = gen_reg_rtx (mode);
+
+	  /* We don't directly split DImode constants on 32-bit targets
+	     because PLUS uses an 11-bit immediate and the insn sequence
+	     generated is not as efficient as the one using HIGH/LO_SUM.  */
+	  if (GET_CODE (operand1) == CONST_INT
+	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
+	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+	      && !insert)
+	    {
+	      /* Directly break constant into high and low parts.  This
+		 provides better optimization opportunities because various
+		 passes recognize constants split with PLUS but not LO_SUM.
+		 We use a 14-bit signed low part except when the addition
+		 of 0x4000 to the high part might change the sign of the
+		 high part.  */
+	      HOST_WIDE_INT low = value & 0x3fff;
+	      HOST_WIDE_INT high = value & ~ 0x3fff;
+
+	      if (low >= 0x2000)
+		{
+		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
+		    high += 0x2000;
+		  else
+		    high += 0x4000;
+		}
+
+	      low = value - high;
+
+	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
+	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
+	    }
+	  else
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, temp,
+				      gen_rtx_HIGH (mode, operand1)));
+	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
+	    }
+
+	  insn = emit_move_insn (operands[0], operands[1]);
+
+	  /* Now insert the most significant 32 bits of the value
+	     into the register.  When we don't have a second register
+	     available, it could take up to nine instructions to load
+	     a 64-bit integer constant.  Prior to reload, we force
+	     constants that would take more than three instructions
+	     to load to the constant pool.  During and after reload,
+	     we have to handle all possible values.  */
+	  if (insert)
+	    {
+	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
+		 register and the value to be inserted is outside the
+		 range that can be loaded with three depdi instructions.  */
+	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
+		{
+		  operand1 = GEN_INT (insv);
+
+		  emit_insn (gen_rtx_SET (VOIDmode, temp,
+					  gen_rtx_HIGH (mode, operand1)));
+		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
+		  if (mode == DImode)
+		    emit_insn (gen_insvdi (operand0, GEN_INT (32),
+					   const0_rtx, temp));
+		  else
+		    emit_insn (gen_insvsi (operand0, GEN_INT (32),
+					   const0_rtx, temp));
+		}
+	      else
+		{
+		  int len = 5, pos = 27;
+
+		  /* Insert the bits using the depdi instruction.  */
+		  while (pos >= 0)
+		    {
+		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
+		      HOST_WIDE_INT sign = v5 < 0;
+
+		      /* Left extend the insertion.  */
+		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
+		      while (pos > 0 && (insv & 1) == sign)
+			{
+			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
+			  len += 1;
+			  pos -= 1;
+			}
+
+		      if (mode == DImode)
+			emit_insn (gen_insvdi (operand0, GEN_INT (len),
+					       GEN_INT (pos), GEN_INT (v5)));
+		      else
+			emit_insn (gen_insvsi (operand0, GEN_INT (len),
+					       GEN_INT (pos), GEN_INT (v5)));
+
+		      len = pos > 0 && pos < 5 ? pos : 5;
+		      pos -= len;
+		    }
+		}
+	    }
+
+	  set_unique_reg_note (insn, REG_EQUAL, op1);
+
+	  return 1;
+	}
+    }
+  /* Now have insn-emit do whatever it normally does.  */
+  return 0;
+}
+
+/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
+   it will need a link/runtime reloc).  */
+
+int
+pa_reloc_needed (tree exp)
+{
+  int reloc = 0;
+
+  switch (TREE_CODE (exp))
+    {
+    case ADDR_EXPR:
+      return 1;
+
+    case POINTER_PLUS_EXPR:
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
+      reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
+      break;
+
+    CASE_CONVERT:
+    case NON_LVALUE_EXPR:
+      reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
+      break;
+
+    case CONSTRUCTOR:
+      {
+	tree value;
+	unsigned HOST_WIDE_INT ix;
+
+	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
+	  if (value)
+	    reloc |= pa_reloc_needed (value);
+      }
+      break;
+
+    case ERROR_MARK:
+      break;
+
+    default:
+      break;
+    }
+  return reloc;
+}
+
+
+/* Return the best assembler insn template
+   for moving operands[1] into operands[0] as a fullword.  */
+const char *
+pa_singlemove_string (rtx *operands)
+{
+  HOST_WIDE_INT intval;
+
+  if (GET_CODE (operands[0]) == MEM)
+    return "stw %r1,%0";
+  if (GET_CODE (operands[1]) == MEM)
+    return "ldw %1,%0";
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      long i;
+      REAL_VALUE_TYPE d;
+
+      gcc_assert (GET_MODE (operands[1]) == SFmode);
+
+      /* Translate the CONST_DOUBLE to a CONST_INT with the same target
+	 bit pattern.  */
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (d, i);
+
+      operands[1] = GEN_INT (i);
+      /* Fall through to CONST_INT case.  */
+    }
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      intval = INTVAL (operands[1]);
+
+      if (VAL_14_BITS_P (intval))
+	return "ldi %1,%0";
+      else if ((intval & 0x7ff) == 0)
+	return "ldil L'%1,%0";
+      else if (pa_zdepi_cint_p (intval))
+	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
+      else
+	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
+    }
+  return "copy %1,%0";
+}
+
+
+/* Compute position (in OP[1]) and width (in OP[2])
+   useful for copying IMM to a register using the zdepi
+   instructions.  Store the immediate value to insert in OP[0].  */
+static void
+compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
+{
+  int lsb, len;
+
+  /* Find the least significant set bit in IMM.  */
+  for (lsb = 0; lsb < 32; lsb++)
+    {
+      if ((imm & 1) != 0)
+        break;
+      imm >>= 1;
+    }
+
+  /* Choose variants based on *sign* of the 5-bit field.  */
+  if ((imm & 0x10) == 0)
+    len = (lsb <= 28) ? 4 : 32 - lsb;
+  else
+    {
+      /* Find the width of the bitstring in IMM.  */
+      for (len = 5; len < 32 - lsb; len++)
+	{
+	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
+	    break;
+	}
+
+      /* Sign extend IMM as a 5-bit value.  */
+      imm = (imm & 0xf) - 0x10;
+    }
+
+  op[0] = imm;
+  op[1] = 31 - lsb;
+  op[2] = len;
+}
+
+/* Compute position (in OP[1]) and width (in OP[2])
+   useful for copying IMM to a register using the depdi,z
+   instructions.  Store the immediate value to insert in OP[0].  */
+
+static void
+compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
+{
+  int lsb, len, maxlen;
+
+  maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
+
+  /* Find the least significant set bit in IMM.  */
+  for (lsb = 0; lsb < maxlen; lsb++)
+    {
+      if ((imm & 1) != 0)
+        break;
+      imm >>= 1;
+    }
+
+  /* Choose variants based on *sign* of the 5-bit field.  */
+  if ((imm & 0x10) == 0)
+    len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
+  else
+    {
+      /* Find the width of the bitstring in IMM.  */
+      for (len = 5; len < maxlen - lsb; len++)
+	{
+	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
+	    break;
+	}
+
+      /* Extend length if host is narrow and IMM is negative.  */
+      if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
+	len += 32;
+
+      /* Sign extend IMM as a 5-bit value.  */
+      imm = (imm & 0xf) - 0x10;
+    }
+
+  op[0] = imm;
+  op[1] = 63 - lsb;
+  op[2] = len;
+}
+
+/* Output assembler code to perform a doubleword move insn
+   with operands OPERANDS.  */
+
+const char *
+pa_output_move_double (rtx *operands)
+{
+  enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
+  rtx latehalf[2];
+  rtx addreg0 = 0, addreg1 = 0;
+
+  /* First classify both operands.  */
+
+  if (REG_P (operands[0]))
+    optype0 = REGOP;
+  else if (offsettable_memref_p (operands[0]))
+    optype0 = OFFSOP;
+  else if (GET_CODE (operands[0]) == MEM)
+    optype0 = MEMOP;
+  else
+    optype0 = RNDOP;
+
+  if (REG_P (operands[1]))
+    optype1 = REGOP;
+  else if (CONSTANT_P (operands[1]))
+    optype1 = CNSTOP;
+  else if (offsettable_memref_p (operands[1]))
+    optype1 = OFFSOP;
+  else if (GET_CODE (operands[1]) == MEM)
+    optype1 = MEMOP;
+  else
+    optype1 = RNDOP;
+
+  /* Check for the cases that the operand constraints are not
+     supposed to allow to happen.  */
+  gcc_assert (optype0 == REGOP || optype1 == REGOP);
+
+  /* Handle copies between general and floating registers.  */
+
+  if (optype0 == REGOP && optype1 == REGOP
+      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
+    {
+      if (FP_REG_P (operands[0]))
+	{
+	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
+	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
+	  return "{fldds|fldd} -16(%%sp),%0";
+	}
+      else
+	{
+	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
+	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
+	  return "{ldws|ldw} -12(%%sp),%R0";
+	}
+    }
+
+   /* Handle auto decrementing and incrementing loads and stores
+     specifically, since the structure of the function doesn't work
+     for them without major modification.  Do it better when we learn
+     this port about the general inc/dec addressing of PA.
+     (This was written by tege.  Chide him if it doesn't work.)  */
+
+  if (optype0 == MEMOP)
+    {
+      /* We have to output the address syntax ourselves, since print_operand
+	 doesn't deal with the addresses we want to use.  Fix this later.  */
+
+      rtx addr = XEXP (operands[0], 0);
+      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
+
+	  operands[0] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[1]) == REG
+		      && GET_CODE (operands[0]) == REG);
+
+	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
+	  
+	  /* No overlap between high target register and address
+	     register.  (We do this in a non-obvious way to
+	     save a register file writeback)  */
+	  if (GET_CODE (addr) == POST_INC)
+	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
+	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
+	}
+      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
+
+	  operands[0] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[1]) == REG
+		      && GET_CODE (operands[0]) == REG);
+	  
+	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
+	  /* No overlap between high target register and address
+	     register.  (We do this in a non-obvious way to save a
+	     register file writeback)  */
+	  if (GET_CODE (addr) == PRE_INC)
+	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
+	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
+	}
+    }
+  if (optype1 == MEMOP)
+    {
+      /* We have to output the address syntax ourselves, since print_operand
+	 doesn't deal with the addresses we want to use.  Fix this later.  */
+
+      rtx addr = XEXP (operands[1], 0);
+      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+	  operands[1] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[0]) == REG
+		      && GET_CODE (operands[1]) == REG);
+
+	  if (!reg_overlap_mentioned_p (high_reg, addr))
+	    {
+	      /* No overlap between high target register and address
+		 register.  (We do this in a non-obvious way to
+		 save a register file writeback)  */
+	      if (GET_CODE (addr) == POST_INC)
+		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
+	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
+	    }
+	  else
+	    {
+	      /* This is an undefined situation.  We should load into the
+		 address register *and* update that register.  Probably
+		 we don't need to handle this at all.  */
+	      if (GET_CODE (addr) == POST_INC)
+		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
+	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
+	    }
+	}
+      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+	  operands[1] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[0]) == REG
+		      && GET_CODE (operands[1]) == REG);
+
+	  if (!reg_overlap_mentioned_p (high_reg, addr))
+	    {
+	      /* No overlap between high target register and address
+		 register.  (We do this in a non-obvious way to
+		 save a register file writeback)  */
+	      if (GET_CODE (addr) == PRE_INC)
+		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
+	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
+	    }
+	  else
+	    {
+	      /* This is an undefined situation.  We should load into the
+		 address register *and* update that register.  Probably
+		 we don't need to handle this at all.  */
+	      if (GET_CODE (addr) == PRE_INC)
+		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
+	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
+	    }
+	}
+      else if (GET_CODE (addr) == PLUS
+	       && GET_CODE (XEXP (addr, 0)) == MULT)
+	{
+	  rtx xoperands[4];
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+	  if (!reg_overlap_mentioned_p (high_reg, addr))
+	    {
+	      xoperands[0] = high_reg;
+	      xoperands[1] = XEXP (addr, 1);
+	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
+	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
+	      output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
+			       xoperands);
+	      return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
+	    }
+	  else
+	    {
+	      xoperands[0] = high_reg;
+	      xoperands[1] = XEXP (addr, 1);
+	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
+	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
+	      output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
+			       xoperands);
+	      return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
+	    }
+	}
+    }
+
+  /* If an operand is an unoffsettable memory ref, find a register
+     we can increment temporarily to make it refer to the second word.  */
+
+  if (optype0 == MEMOP)
+    addreg0 = find_addr_reg (XEXP (operands[0], 0));
+
+  if (optype1 == MEMOP)
+    addreg1 = find_addr_reg (XEXP (operands[1], 0));
+
+  /* Ok, we can do one word at a time.
+     Normally we do the low-numbered word first.
+
+     In either case, set up in LATEHALF the operands to use
+     for the high-numbered word and in some cases alter the
+     operands in OPERANDS to be suitable for the low-numbered word.  */
+
+  if (optype0 == REGOP)
+    latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else if (optype0 == OFFSOP)
+    latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
+  else
+    latehalf[0] = operands[0];
+
+  if (optype1 == REGOP)
+    latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else if (optype1 == OFFSOP)
+    latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
+  else if (optype1 == CNSTOP)
+    split_double (operands[1], &operands[1], &latehalf[1]);
+  else
+    latehalf[1] = operands[1];
+
+  /* If the first move would clobber the source of the second one,
+     do them in the other order.
+
+     This can happen in two cases:
+
+	mem -> register where the first half of the destination register
+ 	is the same register used in the memory's address.  Reload
+	can create such insns.
+
+	mem in this case will be either register indirect or register
+	indirect plus a valid offset.
+
+	register -> register move where REGNO(dst) == REGNO(src + 1)
+	someone (Tim/Tege?) claimed this can happen for parameter loads.
+
+     Handle mem -> register case first.  */
+  if (optype0 == REGOP
+      && (optype1 == MEMOP || optype1 == OFFSOP)
+      && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			    operands[1], 0))
+    {
+      /* Do the late half first.  */
+      if (addreg1)
+	output_asm_insn ("ldo 4(%0),%0", &addreg1);
+      output_asm_insn (pa_singlemove_string (latehalf), latehalf);
+
+      /* Then clobber.  */
+      if (addreg1)
+	output_asm_insn ("ldo -4(%0),%0", &addreg1);
+      return pa_singlemove_string (operands);
+    }
+
+  /* Now handle register -> register case.  */
+  if (optype0 == REGOP && optype1 == REGOP
+      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    {
+      output_asm_insn (pa_singlemove_string (latehalf), latehalf);
+      return pa_singlemove_string (operands);
+    }
+
+  /* Normal case: do the two words, low-numbered first.  */
+
+  output_asm_insn (pa_singlemove_string (operands), operands);
+
+  /* Make any unoffsettable addresses point at high-numbered word.  */
+  if (addreg0)
+    output_asm_insn ("ldo 4(%0),%0", &addreg0);
+  if (addreg1)
+    output_asm_insn ("ldo 4(%0),%0", &addreg1);
+
+  /* Do that word.  */
+  output_asm_insn (pa_singlemove_string (latehalf), latehalf);
+
+  /* Undo the adds we just did.  */
+  if (addreg0)
+    output_asm_insn ("ldo -4(%0),%0", &addreg0);
+  if (addreg1)
+    output_asm_insn ("ldo -4(%0),%0", &addreg1);
+
+  return "";
+}
+
+const char *
+pa_output_fp_move_double (rtx *operands)
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1])
+	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
+	output_asm_insn ("fcpy,dbl %f1,%0", operands);
+      else
+	output_asm_insn ("fldd%F1 %1,%0", operands);
+    }
+  else if (FP_REG_P (operands[1]))
+    {
+      output_asm_insn ("fstd%F0 %1,%0", operands);
+    }
+  else
+    {
+      rtx xoperands[2];
+      
+      gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
+      
+      /* This is a pain.  You have to be prepared to deal with an
+	 arbitrary address here including pre/post increment/decrement.
+
+	 so avoid this in the MD.  */
+      gcc_assert (GET_CODE (operands[0]) == REG);
+      
+      xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      xoperands[0] = operands[0];
+      output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
+    }
+  return "";
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+   ADDR can be effectively incremented by incrementing REG.  */
+
+static rtx
+find_addr_reg (rtx addr)
+{
+  while (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	addr = XEXP (addr, 0);
+      else if (GET_CODE (XEXP (addr, 1)) == REG)
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 0)))
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 1)))
+	addr = XEXP (addr, 0);
+      else
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (addr) == REG);
+  return addr;
+}
+
+/* Emit code to perform a block move.
+
+   OPERANDS[0] is the destination pointer as a REG, clobbered.
+   OPERANDS[1] is the source pointer as a REG, clobbered.
+   OPERANDS[2] is a register for temporary storage.
+   OPERANDS[3] is a register for temporary storage.
+   OPERANDS[4] is the size as a CONST_INT
+   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
+   OPERANDS[6] is another temporary register.  */
+
+const char *
+pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
+{
+  int align = INTVAL (operands[5]);
+  unsigned long n_bytes = INTVAL (operands[4]);
+
+  /* We can't move more than a word at a time because the PA
+     has no longer integer move insns.  (Could use fp mem ops?)  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* Note that we know each loop below will execute at least twice
+     (else we would have open-coded the copy).  */
+  switch (align)
+    {
+      case 8:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 16);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("ldd,ma 8(%1),%3", operands);
+	output_asm_insn ("ldd,ma 8(%1),%6", operands);
+	output_asm_insn ("std,ma %3,8(%0)", operands);
+	output_asm_insn ("addib,>= -16,%2,.-12", operands);
+	output_asm_insn ("std,ma %6,8(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 16 != 0)
+	  {
+	    operands[4] = GEN_INT (n_bytes % 8);
+	    if (n_bytes % 16 >= 8)
+	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
+	    if (n_bytes % 8 != 0)
+	      output_asm_insn ("ldd 0(%1),%6", operands);
+	    if (n_bytes % 16 >= 8)
+	      output_asm_insn ("std,ma %3,8(%0)", operands);
+	    if (n_bytes % 8 != 0)
+	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
+	  }
+	return "";
+
+      case 4:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 8);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
+	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
+	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
+	output_asm_insn ("addib,>= -8,%2,.-12", operands);
+	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 8 != 0)
+	  {
+	    operands[4] = GEN_INT (n_bytes % 4);
+	    if (n_bytes % 8 >= 4)
+	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
+	    if (n_bytes % 4 != 0)
+	      output_asm_insn ("ldw 0(%1),%6", operands);
+	    if (n_bytes % 8 >= 4)
+	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
+	    if (n_bytes % 4 != 0)
+	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
+	  }
+	return "";
+
+      case 2:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 4);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
+	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
+	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
+	output_asm_insn ("addib,>= -4,%2,.-12", operands);
+	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 4 != 0)
+	  {
+	    if (n_bytes % 4 >= 2)
+	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
+	    if (n_bytes % 2 != 0)
+	      output_asm_insn ("ldb 0(%1),%6", operands);
+	    if (n_bytes % 4 >= 2)
+	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
+	    if (n_bytes % 2 != 0)
+	      output_asm_insn ("stb %6,0(%0)", operands);
+	  }
+	return "";
+
+      case 1:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 2);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
+	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
+	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
+	output_asm_insn ("addib,>= -2,%2,.-12", operands);
+	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 2 != 0)
+	  {
+	    output_asm_insn ("ldb 0(%1),%3", operands);
+	    output_asm_insn ("stb %3,0(%0)", operands);
+	  }
+	return "";
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Count the number of insns necessary to handle this block move.
+
+   Basic structure is the same as emit_block_move, except that we
+   count insns rather than emit them.  */
+
+static int
+compute_movmem_length (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
+  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
+  unsigned int n_insns = 0;
+
+  /* We can't move more than four bytes at a time because the PA
+     has no longer integer move insns.  (Could use fp mem ops?)  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* The basic copying loop.  */
+  n_insns = 6;
+
+  /* Residuals.  */
+  if (n_bytes % (2 * align) != 0)
+    {
+      if ((n_bytes % (2 * align)) >= align)
+	n_insns += 2;
+
+      if ((n_bytes % align) != 0)
+	n_insns += 2;
+    }
+
+  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
+  return n_insns * 4;
+}
+
+/* Emit code to perform a block clear.
+
+   OPERANDS[0] is the destination pointer as a REG, clobbered.
+   OPERANDS[1] is a register for temporary storage.
+   OPERANDS[2] is the size as a CONST_INT
+   OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
+
+const char *
+pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
+{
+  int align = INTVAL (operands[3]);
+  unsigned long n_bytes = INTVAL (operands[2]);
+
+  /* We can't clear more than a word at a time because the PA
+     has no longer integer move insns.  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* Note that we know each loop below will execute at least twice
+     (else we would have open-coded the copy).  */
+  switch (align)
+    {
+      case 8:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 16);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("std,ma %%r0,8(%0)", operands);
+	output_asm_insn ("addib,>= -16,%1,.-4", operands);
+	output_asm_insn ("std,ma %%r0,8(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 16 != 0)
+	  {
+	    operands[2] = GEN_INT (n_bytes % 8);
+	    if (n_bytes % 16 >= 8)
+	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
+	    if (n_bytes % 8 != 0)
+	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
+	  }
+	return "";
+
+      case 4:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 8);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+	output_asm_insn ("addib,>= -8,%1,.-4", operands);
+	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 8 != 0)
+	  {
+	    operands[2] = GEN_INT (n_bytes % 4);
+	    if (n_bytes % 8 >= 4)
+	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+	    if (n_bytes % 4 != 0)
+	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
+	  }
+	return "";
+
+      case 2:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 4);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+	output_asm_insn ("addib,>= -4,%1,.-4", operands);
+	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 4 != 0)
+	  {
+	    if (n_bytes % 4 >= 2)
+	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+	    if (n_bytes % 2 != 0)
+	      output_asm_insn ("stb %%r0,0(%0)", operands);
+	  }
+	return "";
+
+      case 1:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 2);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
+	output_asm_insn ("addib,>= -2,%1,.-4", operands);
+	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 2 != 0)
+	  output_asm_insn ("stb %%r0,0(%0)", operands);
+
+	return "";
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Count the number of insns necessary to handle this block move.
+
+   Basic structure is the same as emit_block_move, except that we
+   count insns rather than emit them.  */
+
+static int
+compute_clrmem_length (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
+  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
+  unsigned int n_insns = 0;
+
+  /* We can't clear more than a word at a time because the PA
+     has no longer integer move insns.  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* The basic loop.  */
+  n_insns = 4;
+
+  /* Residuals.  */
+  if (n_bytes % (2 * align) != 0)
+    {
+      if ((n_bytes % (2 * align)) >= align)
+	n_insns++;
+
+      if ((n_bytes % align) != 0)
+	n_insns++;
+    }
+
+  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
+  return n_insns * 4;
+}
+
+
+const char *
+pa_output_and (rtx *operands)
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
+    {
+      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+      int ls0, ls1, ms0, p, len;
+
+      for (ls0 = 0; ls0 < 32; ls0++)
+	if ((mask & (1 << ls0)) == 0)
+	  break;
+
+      for (ls1 = ls0; ls1 < 32; ls1++)
+	if ((mask & (1 << ls1)) != 0)
+	  break;
+
+      for (ms0 = ls1; ms0 < 32; ms0++)
+	if ((mask & (1 << ms0)) == 0)
+	  break;
+
+      gcc_assert (ms0 == 32);
+
+      if (ls1 == 32)
+	{
+	  len = ls0;
+
+	  gcc_assert (len);
+
+	  operands[2] = GEN_INT (len);
+	  return "{extru|extrw,u} %1,31,%2,%0";
+	}
+      else
+	{
+	  /* We could use this `depi' for the case above as well, but `depi'
+	     requires one more register file access than an `extru'.  */
+
+	  p = 31 - ls0;
+	  len = ls1 - ls0;
+
+	  operands[2] = GEN_INT (p);
+	  operands[3] = GEN_INT (len);
+	  return "{depi|depwi} 0,%2,%3,%0";
+	}
+    }
+  else
+    return "and %1,%2,%0";
+}
+
+/* Return a string to perform a bitwise-and of operands[1] with operands[2]
+   storing the result in operands[0].  */
+const char *
+pa_output_64bit_and (rtx *operands)
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
+    {
+      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+      int ls0, ls1, ms0, p, len;
+
+      for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
+	  break;
+
+      for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
+	  break;
+
+      for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
+	  break;
+
+      gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
+
+      if (ls1 == HOST_BITS_PER_WIDE_INT)
+	{
+	  len = ls0;
+
+	  gcc_assert (len);
+
+	  operands[2] = GEN_INT (len);
+	  return "extrd,u %1,63,%2,%0";
+	}
+      else
+	{
+	  /* We could use this `depi' for the case above as well, but `depi'
+	     requires one more register file access than an `extru'.  */
+
+	  p = 63 - ls0;
+	  len = ls1 - ls0;
+
+	  operands[2] = GEN_INT (p);
+	  operands[3] = GEN_INT (len);
+	  return "depdi 0,%2,%3,%0";
+	}
+    }
+  else
+    return "and %1,%2,%0";
+}
+
+const char *
+pa_output_ior (rtx *operands)
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int bs0, bs1, p, len;
+
+  if (INTVAL (operands[2]) == 0)
+    return "copy %1,%0";
+
+  for (bs0 = 0; bs0 < 32; bs0++)
+    if ((mask & (1 << bs0)) != 0)
+      break;
+
+  for (bs1 = bs0; bs1 < 32; bs1++)
+    if ((mask & (1 << bs1)) == 0)
+      break;
+
+  gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
+
+  p = 31 - bs0;
+  len = bs1 - bs0;
+
+  operands[2] = GEN_INT (p);
+  operands[3] = GEN_INT (len);
+  return "{depi|depwi} -1,%2,%3,%0";
+}
+
+/* Return a string to perform a bitwise-and of operands[1] with operands[2]
+   storing the result in operands[0].  */
+const char *
+pa_output_64bit_ior (rtx *operands)
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int bs0, bs1, p, len;
+
+  if (INTVAL (operands[2]) == 0)
+    return "copy %1,%0";
+
+  for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
+    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
+      break;
+
+  for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
+    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
+      break;
+
+  gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
+	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
+
+  p = 63 - bs0;
+  len = bs1 - bs0;
+
+  operands[2] = GEN_INT (p);
+  operands[3] = GEN_INT (len);
+  return "depdi -1,%2,%3,%0";
+}
+
+/* Target hook for assembling integer objects.  This code handles
+   aligned SI and DI integers specially since function references
+   must be preceded by P%.  */
+
+static bool
+pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == UNITS_PER_WORD
+      && aligned_p
+      && function_label_operand (x, VOIDmode))
+    {
+      fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputc ('\n', asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Output an ascii string.  */
+void
+pa_output_ascii (FILE *file, const char *p, int size)
+{
+  int i;
+  int chars_output;
+  unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
+
+  /* The HP assembler can only take strings of 256 characters at one
+     time.  This is a limitation on input line length, *not* the
+     length of the string.  Sigh.  Even worse, it seems that the
+     restriction is in number of input characters (see \xnn &
+     \whatever).  So we have to do this very carefully.  */
+
+  fputs ("\t.STRING \"", file);
+
+  chars_output = 0;
+  for (i = 0; i < size; i += 4)
+    {
+      int co = 0;
+      int io = 0;
+      for (io = 0, co = 0; io < MIN (4, size - i); io++)
+	{
+	  register unsigned int c = (unsigned char) p[i + io];
+
+	  if (c == '\"' || c == '\\')
+	    partial_output[co++] = '\\';
+	  if (c >= ' ' && c < 0177)
+	    partial_output[co++] = c;
+	  else
+	    {
+	      unsigned int hexd;
+	      partial_output[co++] = '\\';
+	      partial_output[co++] = 'x';
+	      hexd =  c  / 16 - 0 + '0';
+	      if (hexd > '9')
+		hexd -= '9' - 'a' + 1;
+	      partial_output[co++] = hexd;
+	      hexd =  c % 16 - 0 + '0';
+	      if (hexd > '9')
+		hexd -= '9' - 'a' + 1;
+	      partial_output[co++] = hexd;
+	    }
+	}
+      if (chars_output + co > 243)
+	{
+	  fputs ("\"\n\t.STRING \"", file);
+	  chars_output = 0;
+	}
+      fwrite (partial_output, 1, (size_t) co, file);
+      chars_output += co;
+      co = 0;
+    }
+  fputs ("\"\n", file);
+}
+
+/* Try to rewrite floating point comparisons & branches to avoid
+   useless add,tr insns.
+
+   CHECK_NOTES is nonzero if we should examine REG_DEAD notes
+   to see if FPCC is dead.  CHECK_NOTES is nonzero for the
+   first attempt to remove useless add,tr insns.  It is zero
+   for the second pass as reorg sometimes leaves bogus REG_DEAD
+   notes lying around.
+
+   When CHECK_NOTES is zero we can only eliminate add,tr insns
+   when there's a 1:1 correspondence between fcmp and ftest/fbranch
+   instructions.  */
+static void
+remove_useless_addtr_insns (int check_notes)
+{
+  rtx insn;
+  static int pass = 0;
+
+  /* This is fairly cheap, so always run it when optimizing.  */
+  if (optimize > 0)
+    {
+      int fcmp_count = 0;
+      int fbranch_count = 0;
+
+      /* Walk all the insns in this function looking for fcmp & fbranch
+	 instructions.  Keep track of how many of each we find.  */
+      for (insn = get_insns (); insn; insn = next_insn (insn))
+	{
+	  rtx tmp;
+
+	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
+	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
+	    continue;
+
+	  tmp = PATTERN (insn);
+
+	  /* It must be a set.  */
+	  if (GET_CODE (tmp) != SET)
+	    continue;
+
+	  /* If the destination is CCFP, then we've found an fcmp insn.  */
+	  tmp = SET_DEST (tmp);
+	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
+	    {
+	      fcmp_count++;
+	      continue;
+	    }
+
+	  tmp = PATTERN (insn);
+	  /* If this is an fbranch instruction, bump the fbranch counter.  */
+	  if (GET_CODE (tmp) == SET
+	      && SET_DEST (tmp) == pc_rtx
+	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
+	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
+	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
+	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
+	    {
+	      fbranch_count++;
+	      continue;
+	    }
+	}
+
+
+      /* Find all floating point compare + branch insns.  If possible,
+	 reverse the comparison & the branch to avoid add,tr insns.  */
+      for (insn = get_insns (); insn; insn = next_insn (insn))
+	{
+	  rtx tmp, next;
+
+	  /* Ignore anything that isn't an INSN.  */
+	  if (! NONJUMP_INSN_P (insn))
+	    continue;
+
+	  tmp = PATTERN (insn);
+
+	  /* It must be a set.  */
+	  if (GET_CODE (tmp) != SET)
+	    continue;
+
+	  /* The destination must be CCFP, which is register zero.  */
+	  tmp = SET_DEST (tmp);
+	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
+	    continue;
+
+	  /* INSN should be a set of CCFP.
+
+	     See if the result of this insn is used in a reversed FP
+	     conditional branch.  If so, reverse our condition and
+	     the branch.  Doing so avoids useless add,tr insns.  */
+	  next = next_insn (insn);
+	  while (next)
+	    {
+	      /* Jumps, calls and labels stop our search.  */
+	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
+		break;
+
+	      /* As does another fcmp insn.  */
+	      if (NONJUMP_INSN_P (next)
+		  && GET_CODE (PATTERN (next)) == SET
+		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
+		  && REGNO (SET_DEST (PATTERN (next))) == 0)
+		break;
+
+	      next = next_insn (next);
+	    }
+
+	  /* Is NEXT_INSN a branch?  */
+	  if (next && JUMP_P (next))
+	    {
+	      rtx pattern = PATTERN (next);
+
+	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
+		 and CCFP dies, then reverse our conditional and the branch
+		 to avoid the add,tr.  */
+	      if (GET_CODE (pattern) == SET
+		  && SET_DEST (pattern) == pc_rtx
+		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
+		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
+		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
+		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
+		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
+		  && (fcmp_count == fbranch_count
+		      || (check_notes
+			  && find_regno_note (next, REG_DEAD, 0))))
+		{
+		  /* Reverse the branch.  */
+		  tmp = XEXP (SET_SRC (pattern), 1);
+		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
+		  XEXP (SET_SRC (pattern), 2) = tmp;
+		  INSN_CODE (next) = -1;
+
+		  /* Reverse our condition.  */
+		  tmp = PATTERN (insn);
+		  PUT_CODE (XEXP (tmp, 1),
+			    (reverse_condition_maybe_unordered
+			     (GET_CODE (XEXP (tmp, 1)))));
+		}
+	    }
+	}
+    }
+
+  pass = !pass;
+
+}
+
+/* You may have trouble believing this, but this is the 32 bit HP-PA
+   stack layout.  Wow.
+
+   Offset		Contents
+
+   Variable arguments	(optional; any number may be allocated)
+
+   SP-(4*(N+9))		arg word N
+   	:		    :
+      SP-56		arg word 5
+      SP-52		arg word 4
+
+   Fixed arguments	(must be allocated; may remain unused)
+
+      SP-48		arg word 3
+      SP-44		arg word 2
+      SP-40		arg word 1
+      SP-36		arg word 0
+
+   Frame Marker
+
+      SP-32		External Data Pointer (DP)
+      SP-28		External sr4
+      SP-24		External/stub RP (RP')
+      SP-20		Current RP
+      SP-16		Static Link
+      SP-12		Clean up
+      SP-8		Calling Stub RP (RP'')
+      SP-4		Previous SP
+
+   Top of Frame
+
+      SP-0		Stack Pointer (points to next available address)
+
+*/
+
+/* This function saves registers as follows.  Registers marked with ' are
+   this function's registers (as opposed to the previous function's).
+   If a frame_pointer isn't needed, r4 is saved as a general register;
+   the space for the frame pointer is still allocated, though, to keep
+   things simple.
+
+
+   Top of Frame
+
+       SP (FP')		Previous FP
+       SP + 4		Alignment filler (sigh)
+       SP + 8		Space for locals reserved here.
+       .
+       .
+       .
+       SP + n		All call saved register used.
+       .
+       .
+       .
+       SP + o		All call saved fp registers used.
+       .
+       .
+       .
+       SP + p (SP')	points to next available address.
+
+*/
+
+/* Global variables set by output_function_prologue().  */
+/* Size of frame.  Need to know this to emit return insns from
+   leaf procedures.  */
+static HOST_WIDE_INT actual_fsize, local_fsize;
+static int save_fregs;
+
+/* Emit RTL to store REG at the memory location specified by BASE+DISP.
+   Handle case where DISP > 8k by using the add_high_const patterns.
+
+   Note in DISP > 8k case, we will leave the high part of the address
+   in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
+
+static void
+store_reg (int reg, HOST_WIDE_INT disp, int base)
+{
+  rtx insn, dest, src, basereg;
+
+  src = gen_rtx_REG (word_mode, reg);
+  basereg = gen_rtx_REG (Pmode, base);
+  if (VAL_14_BITS_P (disp))
+    {
+      dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
+      insn = emit_move_insn (dest, src);
+    }
+  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
+    {
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, delta);
+      insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
+      if (DO_FRAME_NOTES)
+	{
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (VOIDmode, tmpreg,
+				     gen_rtx_PLUS (Pmode, basereg, delta)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      dest = gen_rtx_MEM (word_mode, tmpreg);
+      insn = emit_move_insn (dest, src);
+    }
+  else
+    {
+      rtx delta = GEN_INT (disp);
+      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, high);
+      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+      insn = emit_move_insn (dest, src);
+      if (DO_FRAME_NOTES)
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (VOIDmode,
+				   gen_rtx_MEM (word_mode,
+						gen_rtx_PLUS (word_mode,
+							      basereg,
+							      delta)),
+				   src));
+    }
+
+  if (DO_FRAME_NOTES)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Emit RTL to store REG at the memory location specified by BASE and then
+   add MOD to BASE.  MOD must be <= 8k.  */
+
+static void
+store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
+{
+  rtx insn, basereg, srcreg, delta;
+
+  gcc_assert (VAL_14_BITS_P (mod));
+
+  basereg = gen_rtx_REG (Pmode, base);
+  srcreg = gen_rtx_REG (word_mode, reg);
+  delta = GEN_INT (mod);
+
+  insn = emit_insn (gen_post_store (basereg, srcreg, delta));
+  if (DO_FRAME_NOTES)
+    {
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* RTX_FRAME_RELATED_P must be set on each frame related set
+	 in a parallel with more than one element.  */
+      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
+      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+    }
+}
+
+/* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
+   where DISP > 8k by using the add_high_const patterns.  NOTE indicates
+   whether to add a frame note or not.
+
+   In the DISP > 8k case, we leave the high part of the address in %r1.
+   There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
+
+static void
+set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
+{
+  rtx insn;
+
+  if (VAL_14_BITS_P (disp))
+    {
+      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			     plus_constant (Pmode,
+					    gen_rtx_REG (Pmode, base), disp));
+    }
+  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
+    {
+      rtx basereg = gen_rtx_REG (Pmode, base);
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, delta);
+      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
+      if (DO_FRAME_NOTES)
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (VOIDmode, tmpreg,
+				   gen_rtx_PLUS (Pmode, basereg, delta)));
+    }
+  else
+    {
+      rtx basereg = gen_rtx_REG (Pmode, base);
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg,
+		      gen_rtx_PLUS (Pmode, basereg,
+				    gen_rtx_HIGH (Pmode, delta)));
+      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+    }
+
+  if (DO_FRAME_NOTES && note)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+HOST_WIDE_INT
+pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
+{
+  int freg_saved = 0;
+  int i, j;
+
+  /* The code in pa_expand_prologue and pa_expand_epilogue must
+     be consistent with the rounding and size calculation done here.
+     Change them at the same time.  */
+
+  /* We do our own stack alignment.  First, round the size of the
+     stack locals up to a word boundary.  */
+  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
+
+  /* Space for previous frame pointer + filler.  If any frame is
+     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
+     waste some space here for the sake of HP compatibility.  The
+     first slot is only used when the frame pointer is needed.  */
+  if (size || frame_pointer_needed)
+    size += STARTING_FRAME_OFFSET;
+  
+  /* If the current function calls __builtin_eh_return, then we need
+     to allocate stack space for registers that will hold data for
+     the exception handler.  */
+  if (DO_FRAME_NOTES && crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
+	continue;
+      size += i * UNITS_PER_WORD;
+    }
+
+  /* Account for space used by the callee general register saves.  */
+  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
+    if (df_regs_ever_live_p (i))
+      size += UNITS_PER_WORD;
+
+  /* Account for space used by the callee floating point register saves.  */
+  for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
+    if (df_regs_ever_live_p (i)
+	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
+      {
+	freg_saved = 1;
+
+	/* We always save both halves of the FP register, so always
+	   increment the frame size by 8 bytes.  */
+	size += 8;
+      }
+
+  /* If any of the floating registers are saved, account for the
+     alignment needed for the floating point register save block.  */
+  if (freg_saved)
+    {
+      size = (size + 7) & ~7;
+      if (fregs_live)
+	*fregs_live = 1;
+    }
+
+  /* The various ABIs include space for the outgoing parameters in the
+     size of the current function's stack frame.  We don't need to align
+     for the outgoing arguments as their alignment is set by the final
+     rounding for the frame as a whole.  */
+  size += crtl->outgoing_args_size;
+
+  /* Allocate space for the fixed frame marker.  This space must be
+     allocated for any function that makes calls or allocates
+     stack space.  */
+  if (!crtl->is_leaf || size)
+    size += TARGET_64BIT ? 48 : 32;
+
+  /* Finally, round to the preferred stack boundary.  */
+  return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
+	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
+}
+
+/* Generate the assembly code for function entry.  FILE is a stdio
+   stream to output the code to.  SIZE is an int: how many units of
+   temporary storage to allocate.
+
+   Refer to the array `regs_ever_live' to determine which registers to
+   save; `regs_ever_live[I]' is nonzero if register number I is ever
+   used in the function.  This function is responsible for knowing
+   which registers should not be saved even if used.  */
+
+/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
+   of memory.  If any fpu reg is used in the function, we allocate
+   such a block here, at the bottom of the frame, just in case it's needed.
+
+   If this function is a leaf procedure, then we may choose not
+   to do a "save" insn.  The decision about whether or not
+   to do this is made in regclass.c.  */
+
+static void
+pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* The function's label and associated .PROC must never be
+     separated and must be output *after* any profiling declarations
+     to avoid changing spaces/subspaces within a procedure.  */
+  ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
+  fputs ("\t.PROC\n", file);
+
+  /* pa_expand_prologue does the dirty work now.  We just need
+     to output the assembler directives which denote the start
+     of a function.  */
+  fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
+  if (crtl->is_leaf)
+    fputs (",NO_CALLS", file);
+  else
+    fputs (",CALLS", file);
+  if (rp_saved)
+    fputs (",SAVE_RP", file);
+
+  /* The SAVE_SP flag is used to indicate that register %r3 is stored
+     at the beginning of the frame and that it is used as the frame
+     pointer for the frame.  We do this because our current frame
+     layout doesn't conform to that specified in the HP runtime
+     documentation and we need a way to indicate to programs such as
+     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
+     isn't used by HP compilers but is supported by the assembler.
+     However, SAVE_SP is supposed to indicate that the previous stack
+     pointer has been saved in the frame marker.  */
+  if (frame_pointer_needed)
+    fputs (",SAVE_SP", file);
+
+  /* Pass on information about the number of callee register saves
+     performed in the prologue.
+
+     The compiler is supposed to pass the highest register number
+     saved, the assembler then has to adjust that number before
+     entering it into the unwind descriptor (to account for any
+     caller saved registers with lower register numbers than the
+     first callee saved register).  */
+  if (gr_saved)
+    fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
+
+  if (fr_saved)
+    fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
+
+  fputs ("\n\t.ENTRY\n", file);
+
+  remove_useless_addtr_insns (0);
+}
+
+void
+pa_expand_prologue (void)
+{
+  int merge_sp_adjust_with_store = 0;
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT offset;
+  int i;
+  rtx insn, tmpreg;
+
+  gr_saved = 0;
+  fr_saved = 0;
+  save_fregs = 0;
+
+  /* Compute total size for frame pointer, filler, locals and rounding to
+     the next word boundary.  Similar code appears in pa_compute_frame_size
+     and must be changed in tandem with this code.  */
+  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
+  if (local_fsize || frame_pointer_needed)
+    local_fsize += STARTING_FRAME_OFFSET;
+
+  actual_fsize = pa_compute_frame_size (size, &save_fregs);
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = actual_fsize;
+
+  /* Compute a few things we will use often.  */
+  tmpreg = gen_rtx_REG (word_mode, 1);
+
+  /* Save RP first.  The calling conventions manual states RP will
+     always be stored into the caller's frame at sp - 20 or sp - 16
+     depending on which ABI is in use.  */
+  if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
+    {
+      store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
+      rp_saved = true;
+    }
+  else
+    rp_saved = false;
+
+  /* Allocate the local frame and set up the frame pointer if needed.  */
+  if (actual_fsize != 0)
+    {
+      if (frame_pointer_needed)
+	{
+	  /* Copy the old frame pointer temporarily into %r1.  Set up the
+	     new stack pointer, then store away the saved old frame pointer
+	     into the stack at sp and at the same time update the stack
+	     pointer by actual_fsize bytes.  Two versions, first
+	     handles small (<8k) frames.  The second handles large (>=8k)
+	     frames.  */
+	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
+	  if (DO_FRAME_NOTES)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  if (DO_FRAME_NOTES)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (VAL_14_BITS_P (actual_fsize))
+	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
+	  else
+	    {
+	      /* It is incorrect to store the saved frame pointer at *sp,
+		 then increment sp (writes beyond the current stack boundary).
+
+		 So instead use stwm to store at *sp and post-increment the
+		 stack pointer as an atomic operation.  Then increment sp to
+		 finish allocating the new frame.  */
+	      HOST_WIDE_INT adjust1 = 8192 - 64;
+	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
+
+	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
+	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+			      adjust2, 1);
+	    }
+
+	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
+	     we need to store the previous stack pointer (frame pointer)
+	     into the frame marker on targets that use the HP unwind
+	     library.  This allows the HP unwind library to be used to
+	     unwind GCC frames.  However, we are not fully compatible
+	     with the HP library because our frame layout differs from
+	     that specified in the HP runtime specification.
+
+	     We don't want a frame note on this instruction as the frame
+	     marker moves during dynamic stack allocation.
+
+	     This instruction also serves as a blockage to prevent
+	     register spills from being scheduled before the stack
+	     pointer is raised.  This is necessary as we store
+	     registers using the frame pointer as a base register,
+	     and the frame pointer is set before sp is raised.  */
+	  if (TARGET_HPUX_UNWIND_LIBRARY)
+	    {
+	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+				       GEN_INT (TARGET_64BIT ? -8 : -4));
+
+	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
+			      hard_frame_pointer_rtx);
+	    }
+	  else
+	    emit_insn (gen_blockage ());
+	}
+      /* no frame pointer needed.  */
+      else
+	{
+	  /* In some cases we can perform the first callee register save
+	     and allocating the stack frame at the same time.   If so, just
+	     make a note of it and defer allocating the frame until saving
+	     the callee registers.  */
+	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
+	    merge_sp_adjust_with_store = 1;
+	  /* Can not optimize.  Adjust the stack frame by actual_fsize
+	     bytes.  */
+	  else
+	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+			    actual_fsize, 1);
+	}
+    }
+
+  /* Normal register save.
+
+     Do not save the frame pointer in the frame_pointer_needed case.  It
+     was done earlier.  */
+  if (frame_pointer_needed)
+    {
+      offset = local_fsize;
+
+      /* Saving the EH return data registers in the frame is the simplest
+	 way to get the frame unwind information emitted.  We put them
+	 just before the general registers.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 4; i--)
+	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  {
+	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
+	    offset += UNITS_PER_WORD;
+	    gr_saved++;
+	  }
+      /* Account for %r3 which is saved in a special place.  */
+      gr_saved++;
+    }
+  /* No frame pointer needed.  */
+  else
+    {
+      offset = local_fsize - actual_fsize;
+
+      /* Saving the EH return data registers in the frame is the simplest
+         way to get the frame unwind information emitted.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      /* If merge_sp_adjust_with_store is nonzero, then we can
+		 optimize the first save.  */
+	      if (merge_sp_adjust_with_store)
+		{
+		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
+		  merge_sp_adjust_with_store = 0;
+		}
+	      else
+		store_reg (regno, offset, STACK_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 3; i--)
+      	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  {
+	    /* If merge_sp_adjust_with_store is nonzero, then we can
+	       optimize the first GR save.  */
+	    if (merge_sp_adjust_with_store)
+	      {
+		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
+		merge_sp_adjust_with_store = 0;
+	      }
+	    else
+	      store_reg (i, offset, STACK_POINTER_REGNUM);
+	    offset += UNITS_PER_WORD;
+	    gr_saved++;
+	  }
+
+      /* If we wanted to merge the SP adjustment with a GR save, but we never
+	 did any GR saves, then just emit the adjustment here.  */
+      if (merge_sp_adjust_with_store)
+	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+			actual_fsize, 1);
+    }
+
+  /* The hppa calling conventions say that %r19, the pic offset
+     register, is saved at sp - 32 (in this function's frame)
+     when generating PIC code.  FIXME:  What is the correct thing
+     to do for functions which make no calls and allocate no
+     frame?  Do we need to allocate a frame, or can we just omit
+     the save?   For now we'll just omit the save.
+     
+     We don't want a note on this insn as the frame marker can
+     move if there is a dynamic stack allocation.  */
+  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
+    {
+      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
+
+      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
+
+    }
+
+  /* Align pointer properly (doubleword boundary).  */
+  offset = (offset + 7) & ~7;
+
+  /* Floating point register store.  */
+  if (save_fregs)
+    {
+      rtx base;
+
+      /* First get the frame or stack pointer to the start of the FP register
+	 save area.  */
+      if (frame_pointer_needed)
+	{
+	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
+	  base = hard_frame_pointer_rtx;
+	}
+      else
+	{
+	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
+	  base = stack_pointer_rtx;
+	}
+
+      /* Now actually save the FP registers.  */
+      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
+	{
+	  if (df_regs_ever_live_p (i)
+	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
+	    {
+	      rtx addr, insn, reg;
+	      addr = gen_rtx_MEM (DFmode,
+				  gen_rtx_POST_INC (word_mode, tmpreg));
+	      reg = gen_rtx_REG (DFmode, i);
+	      insn = emit_move_insn (addr, reg);
+	      if (DO_FRAME_NOTES)
+		{
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  if (TARGET_64BIT)
+		    {
+		      rtx mem = gen_rtx_MEM (DFmode,
+					     plus_constant (Pmode, base,
+							    offset));
+		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+				    gen_rtx_SET (VOIDmode, mem, reg));
+		    }
+		  else
+		    {
+		      rtx meml = gen_rtx_MEM (SFmode,
+					      plus_constant (Pmode, base,
+							     offset));
+		      rtx memr = gen_rtx_MEM (SFmode,
+					      plus_constant (Pmode, base,
+							     offset + 4));
+		      rtx regl = gen_rtx_REG (SFmode, i);
+		      rtx regr = gen_rtx_REG (SFmode, i + 1);
+		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
+		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
+		      rtvec vec;
+
+		      RTX_FRAME_RELATED_P (setl) = 1;
+		      RTX_FRAME_RELATED_P (setr) = 1;
+		      vec = gen_rtvec (2, setl, setr);
+		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+				    gen_rtx_SEQUENCE (VOIDmode, vec));
+		    }
+		}
+	      offset += GET_MODE_SIZE (DFmode);
+	      fr_saved++;
+	    }
+	}
+    }
+}
+
+/* Emit RTL to load REG from the memory location specified by BASE+DISP.
+   Handle case where DISP > 8k by using the add_high_const patterns.  */
+
+static void
+load_reg (int reg, HOST_WIDE_INT disp, int base)
+{
+  rtx dest = gen_rtx_REG (word_mode, reg);
+  rtx basereg = gen_rtx_REG (Pmode, base);
+  rtx src;
+
+  if (VAL_14_BITS_P (disp))
+    src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
+  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
+    {
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, delta);
+      if (TARGET_DISABLE_INDEXING)
+	{
+	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
+	  src = gen_rtx_MEM (word_mode, tmpreg);
+	}
+      else
+	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
+    }
+  else
+    {
+      rtx delta = GEN_INT (disp);
+      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, high);
+      src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+    }
+
+  emit_move_insn (dest, src);
+}
+
+/* Update the total code bytes output to the text section.  */
+
+static void
+update_total_code_bytes (unsigned int nbytes)
+{
+  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
+      && !IN_NAMED_SECTION_P (cfun->decl))
+    {
+      unsigned int old_total = total_code_bytes;
+
+      total_code_bytes += nbytes;
+
+      /* Be prepared to handle overflows.  */
+      if (old_total > total_code_bytes)
+        total_code_bytes = UINT_MAX;
+    }
+}
+
+/* This function generates the assembly code for function exit.
+   Args are as for output_function_prologue ().
+
+   The function epilogue should not depend on the current stack
+   pointer!  It should use the frame pointer only.  This is mandatory
+   because of alloca; we also take advantage of it to omit stack
+   adjustments before returning.  */
+
+static void
+pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  rtx insn = get_last_insn ();
+
+  last_address = 0;
+
+  /* pa_expand_epilogue does the dirty work now.  We just need
+     to output the assembler directives which denote the end
+     of a function.
+
+     To make debuggers happy, emit a nop if the epilogue was completely
+     eliminated due to a volatile call as the last insn in the
+     current function.  That way the return address (in %r2) will
+     always point to a valid instruction in the current function.  */
+
+  /* Get the last real insn.  */
+  if (NOTE_P (insn))
+    insn = prev_real_insn (insn);
+
+  /* If it is a sequence, then look inside.  */
+  if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+    insn = XVECEXP (PATTERN (insn), 0, 0);
+
+  /* If insn is a CALL_INSN, then it must be a call to a volatile
+     function (otherwise there would be epilogue insns).  */
+  if (insn && CALL_P (insn))
+    {
+      fputs ("\tnop\n", file);
+      last_address += 4;
+    }
+
+  fputs ("\t.EXIT\n\t.PROCEND\n", file);
+
+  if (TARGET_SOM && TARGET_GAS)
+    {
+      /* We done with this subspace except possibly for some additional
+	 debug information.  Forget that we are in this subspace to ensure
+	 that the next function is output in its own subspace.  */
+      in_section = NULL;
+      cfun->machine->in_nsubspa = 2;
+    }
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      insn = get_last_nonnote_insn ();
+      last_address += INSN_ADDRESSES (INSN_UID (insn));
+      if (INSN_P (insn))
+	last_address += insn_default_length (insn);
+      last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
+		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
+    }
+  else
+    last_address = UINT_MAX;
+
+  /* Finally, update the total number of code bytes output so far.  */
+  update_total_code_bytes (last_address);
+}
+
+void
+pa_expand_epilogue (void)
+{
+  rtx tmpreg;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT ret_off = 0;
+  int i;
+  int merge_sp_adjust_with_load = 0;
+
+  /* We will use this often.  */
+  tmpreg = gen_rtx_REG (word_mode, 1);
+
+  /* Try to restore RP early to avoid load/use interlocks when
+     RP gets used in the return (bv) instruction.  This appears to still
+     be necessary even when we schedule the prologue and epilogue.  */
+  if (rp_saved)
+    {
+      ret_off = TARGET_64BIT ? -16 : -20;
+      if (frame_pointer_needed)
+	{
+	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
+	  ret_off = 0;
+	}
+      else
+	{
+	  /* No frame pointer, and stack is smaller than 8k.  */
+	  if (VAL_14_BITS_P (ret_off - actual_fsize))
+	    {
+	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
+	      ret_off = 0;
+	    }
+	}
+    }
+
+  /* General register restores.  */
+  if (frame_pointer_needed)
+    {
+      offset = local_fsize;
+
+      /* If the current function calls __builtin_eh_return, then we need
+         to restore the saved EH data registers.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 4; i--)
+	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  {
+	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
+	    offset += UNITS_PER_WORD;
+	  }
+    }
+  else
+    {
+      offset = local_fsize - actual_fsize;
+
+      /* If the current function calls __builtin_eh_return, then we need
+         to restore the saved EH data registers.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      /* Only for the first load.
+	         merge_sp_adjust_with_load holds the register load
+	         with which we will merge the sp adjustment.  */
+	      if (merge_sp_adjust_with_load == 0
+		  && local_fsize == 0
+		  && VAL_14_BITS_P (-actual_fsize))
+	        merge_sp_adjust_with_load = regno;
+	      else
+		load_reg (regno, offset, STACK_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 3; i--)
+	{
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	    {
+	      /* Only for the first load.
+	         merge_sp_adjust_with_load holds the register load
+	         with which we will merge the sp adjustment.  */
+	      if (merge_sp_adjust_with_load == 0
+		  && local_fsize == 0
+		  && VAL_14_BITS_P (-actual_fsize))
+	        merge_sp_adjust_with_load = i;
+	      else
+		load_reg (i, offset, STACK_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+    }
+
+  /* Align pointer properly (doubleword boundary).  */
+  offset = (offset + 7) & ~7;
+
+  /* FP register restores.  */
+  if (save_fregs)
+    {
+      /* Adjust the register to index off of.  */
+      if (frame_pointer_needed)
+	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
+      else
+	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
+
+      /* Actually do the restores now.  */
+      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
+	if (df_regs_ever_live_p (i)
+	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
+	  {
+	    rtx src = gen_rtx_MEM (DFmode,
+				   gen_rtx_POST_INC (word_mode, tmpreg));
+	    rtx dest = gen_rtx_REG (DFmode, i);
+	    emit_move_insn (dest, src);
+	  }
+    }
+
+  /* Emit a blockage insn here to keep these insns from being moved to
+     an earlier spot in the epilogue, or into the main instruction stream.
+
+     This is necessary as we must not cut the stack back before all the
+     restores are finished.  */
+  emit_insn (gen_blockage ());
+
+  /* Reset stack pointer (and possibly frame pointer).  The stack
+     pointer is initially set to fp + 64 to avoid a race condition.  */
+  if (frame_pointer_needed)
+    {
+      rtx delta = GEN_INT (-64);
+
+      set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
+      emit_insn (gen_pre_load (hard_frame_pointer_rtx,
+			       stack_pointer_rtx, delta));
+    }
+  /* If we were deferring a callee register restore, do it now.  */
+  else if (merge_sp_adjust_with_load)
+    {
+      rtx delta = GEN_INT (-actual_fsize);
+      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
+
+      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
+    }
+  else if (actual_fsize != 0)
+    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+		    - actual_fsize, 0);
+
+  /* If we haven't restored %r2 yet (no frame pointer, and a stack
+     frame greater than 8k), do so now.  */
+  if (ret_off != 0)
+    load_reg (2, ret_off, STACK_POINTER_REGNUM);
+
+  if (DO_FRAME_NOTES && crtl->calls_eh_return)
+    {
+      rtx sa = EH_RETURN_STACKADJ_RTX;
+
+      emit_insn (gen_blockage ());
+      emit_insn (TARGET_64BIT
+		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
+		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
+    }
+}
+
+bool
+pa_can_use_return_insn (void)
+{
+  if (!reload_completed)
+    return false;
+
+  if (frame_pointer_needed)
+    return false;
+
+  if (df_regs_ever_live_p (2))
+    return false;
+
+  if (crtl->profile)
+    return false;
+
+  return pa_compute_frame_size (get_frame_size (), 0) == 0;
+}
+
+rtx
+hppa_pic_save_rtx (void)
+{
+  return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
+}
+
+#ifndef NO_DEFERRED_PROFILE_COUNTERS
+#define NO_DEFERRED_PROFILE_COUNTERS 0
+#endif
+
+
+/* Vector of funcdef numbers.  */
+static vec<int> funcdef_nos;
+
+/* Output deferred profile counters.  */
+static void
+output_deferred_profile_counters (void)
+{
+  unsigned int i;
+  int align, n;
+
+  if (funcdef_nos.is_empty ())
+   return;
+
+  switch_to_section (data_section);
+  align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
+  ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
+
+  for (i = 0; funcdef_nos.iterate (i, &n); i++)
+    {
+      targetm.asm_out.internal_label (asm_out_file, "LP", n);
+      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
+    }
+
+  funcdef_nos.release ();
+}
+
+void
+hppa_profile_hook (int label_no)
+{
+  /* We use SImode for the address of the function in both 32 and
+     64-bit code to avoid having to provide DImode versions of the
+     lcla2 and load_offset_label_address insn patterns.  */
+  rtx reg = gen_reg_rtx (SImode);
+  rtx label_rtx = gen_label_rtx ();
+  rtx begin_label_rtx, call_insn;
+  char begin_label_name[16];
+
+  ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
+			       label_no);
+  begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
+
+  if (TARGET_64BIT)
+    emit_move_insn (arg_pointer_rtx,
+		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				  GEN_INT (64)));
+
+  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
+
+  /* The address of the function is loaded into %r25 with an instruction-
+     relative sequence that avoids the use of relocations.  The sequence
+     is split so that the load_offset_label_address instruction can
+     occupy the delay slot of the call to _mcount.  */
+  if (TARGET_PA_20)
+    emit_insn (gen_lcla2 (reg, label_rtx));
+  else
+    emit_insn (gen_lcla1 (reg, label_rtx));
+
+  emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), 
+					    reg, begin_label_rtx, label_rtx));
+
+#if !NO_DEFERRED_PROFILE_COUNTERS
+  {
+    rtx count_label_rtx, addr, r24;
+    char count_label_name[16];
+
+    funcdef_nos.safe_push (label_no);
+    ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
+    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
+
+    addr = force_reg (Pmode, count_label_rtx);
+    r24 = gen_rtx_REG (Pmode, 24);
+    emit_move_insn (r24, addr);
+
+    call_insn =
+      emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 
+					     gen_rtx_SYMBOL_REF (Pmode, 
+								 "_mcount")),
+				GEN_INT (TARGET_64BIT ? 24 : 12)));
+
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
+  }
+#else
+
+  call_insn =
+    emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 
+					   gen_rtx_SYMBOL_REF (Pmode, 
+							       "_mcount")),
+			      GEN_INT (TARGET_64BIT ? 16 : 8)));
+
+#endif
+
+  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
+  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
+
+  /* Indicate the _mcount call cannot throw, nor will it execute a
+     non-local goto.  */
+  make_reg_eh_region_note_nothrow_nononlocal (call_insn);
+}
+
+/* Fetch the return address for the frame COUNT steps up from
+   the current frame, after the prologue.  FRAMEADDR is the
+   frame pointer of the COUNT frame.
+
+   We want to ignore any export stub remnants here.  To handle this,
+   we examine the code at the return address, and if it is an export
+   stub, we return a memory rtx for the stub return address stored
+   at frame-24.
+
+   The value returned is used in two different ways:
+
+	1. To find a function's caller.
+
+	2. To change the return address for a function.
+
+   This function handles most instances of case 1; however, it will
+   fail if there are two levels of stubs to execute on the return
+   path.  The only way I believe that can happen is if the return value
+   needs a parameter relocation, which never happens for C code.
+
+   This function handles most instances of case 2; however, it will
+   fail if we did not originally have stub code on the return path
+   but will need stub code on the new return path.  This can happen if
+   the caller & callee are both in the main program, but the new
+   return location is in a shared library.  */
+
+rtx
+pa_return_addr_rtx (int count, rtx frameaddr)
+{
+  rtx label;
+  rtx rp;
+  rtx saved_rp;
+  rtx ins;
+
+  /* The instruction stream at the return address of a PA1.X export stub is:
+
+	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
+	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
+	0x00011820 | stub+16:  mtsp r1,sr0
+	0xe0400002 | stub+20:  be,n 0(sr0,rp)
+
+     0xe0400002 must be specified as -532676606 so that it won't be
+     rejected as an invalid immediate operand on 64-bit hosts.
+
+     The instruction stream at the return address of a PA2.0 export stub is:
+
+	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
+	0xe840d002 | stub+12:  bve,n (rp)
+  */
+
+  HOST_WIDE_INT insns[4];
+  int i, len;
+
+  if (count != 0)
+    return NULL_RTX;
+
+  rp = get_hard_reg_initial_val (Pmode, 2);
+
+  if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
+    return rp;
+
+  /* If there is no export stub then just use the value saved from
+     the return pointer register.  */
+
+  saved_rp = gen_reg_rtx (Pmode);
+  emit_move_insn (saved_rp, rp);
+
+  /* Get pointer to the instruction stream.  We have to mask out the
+     privilege level from the two low order bits of the return address
+     pointer here so that ins will point to the start of the first
+     instruction that would have been executed if we returned.  */
+  ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
+  label = gen_label_rtx ();
+
+  if (TARGET_PA_20)
+    {
+      insns[0] = 0x4bc23fd1;
+      insns[1] = -398405630;
+      len = 2;
+    }
+  else
+    {
+      insns[0] = 0x4bc23fd1;
+      insns[1] = 0x004010a1;
+      insns[2] = 0x00011820;
+      insns[3] = -532676606;
+      len = 4;
+    }
+
+  /* Check the instruction stream at the normal return address for the
+     export stub.  If it is an export stub, than our return address is
+     really in -24[frameaddr].  */
+
+  for (i = 0; i < len; i++)
+    {
+      rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
+      rtx op1 = GEN_INT (insns[i]);
+      emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
+    }
+
+  /* Here we know that our return address points to an export
+     stub.  We don't want to return the address of the export stub,
+     but rather the return address of the export stub.  That return
+     address is stored at -24[frameaddr].  */
+
+  emit_move_insn (saved_rp,
+		  gen_rtx_MEM (Pmode,
+			       memory_address (Pmode,
+					       plus_constant (Pmode, frameaddr,
+							      -24))));
+
+  emit_label (label);
+
+  return saved_rp;
+}
+
+void
+pa_emit_bcond_fp (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx operand0 = operands[1];
+  rtx operand1 = operands[2];
+  rtx label = operands[3];
+
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
+		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     gen_rtx_fmt_ee (NE,
+							      VOIDmode,
+							      gen_rtx_REG (CCFPmode, 0),
+							      const0_rtx),
+						     gen_rtx_LABEL_REF (VOIDmode, label),
+						     pc_rtx)));
+
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type attr_type;
+
+  /* Don't adjust costs for a pa8000 chip, also do not adjust any
+     true dependencies as they are described with bypasses now.  */
+  if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
+    return cost;
+
+  if (! recog_memoized (insn))
+    return 0;
+
+  attr_type = get_attr_type (insn);
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case REG_DEP_ANTI:
+      /* Anti dependency; DEP_INSN reads a register that INSN writes some
+	 cycles later.  */
+
+      if (attr_type == TYPE_FPLOAD)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPALU:
+		case TYPE_FPMULSGL:
+		case TYPE_FPMULDBL:
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* A fpload can't be issued until one cycle before a
+		     preceding arithmetic operation has finished if
+		     the target of the fpload is any of the sources
+		     (or destination) of the arithmetic operation.  */
+		  return insn_default_latency (dep_insn) - 1;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+      else if (attr_type == TYPE_FPALU)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* An ALU flop can't be issued until two cycles before a
+		     preceding divide or sqrt operation has finished if
+		     the target of the ALU flop is any of the sources
+		     (or destination) of the divide or sqrt operation.  */
+		  return insn_default_latency (dep_insn) - 2;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+
+      /* For other anti dependencies, the cost is 0.  */
+      return 0;
+
+    case REG_DEP_OUTPUT:
+      /* Output dependency; DEP_INSN writes a register that INSN writes some
+	 cycles later.  */
+      if (attr_type == TYPE_FPLOAD)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPALU:
+		case TYPE_FPMULSGL:
+		case TYPE_FPMULDBL:
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* A fpload can't be issued until one cycle before a
+		     preceding arithmetic operation has finished if
+		     the target of the fpload is the destination of the
+		     arithmetic operation. 
+
+		     Exception: For PA7100LC, PA7200 and PA7300, the cost
+		     is 3 cycles, unless they bundle together.   We also
+		     pay the penalty if the second insn is a fpload.  */
+		  return insn_default_latency (dep_insn) - 1;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+      else if (attr_type == TYPE_FPALU)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* An ALU flop can't be issued until two cycles before a
+		     preceding divide or sqrt operation has finished if
+		     the target of the ALU flop is also the target of
+		     the divide or sqrt operation.  */
+		  return insn_default_latency (dep_insn) - 2;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+
+      /* For other output dependencies, the cost is 0.  */
+      return 0;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Adjust scheduling priorities.  We use this to try and keep addil
+   and the next use of %r1 close together.  */
+static int
+pa_adjust_priority (rtx insn, int priority)
+{
+  rtx set = single_set (insn);
+  rtx src, dest;
+  if (set)
+    {
+      src = SET_SRC (set);
+      dest = SET_DEST (set);
+      if (GET_CODE (src) == LO_SUM
+	  && symbolic_operand (XEXP (src, 1), VOIDmode)
+	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
+	priority >>= 3;
+
+      else if (GET_CODE (src) == MEM
+	       && GET_CODE (XEXP (src, 0)) == LO_SUM
+	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
+	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
+	priority >>= 1;
+
+      else if (GET_CODE (dest) == MEM
+	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
+	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
+	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
+	priority >>= 3;
+    }
+  return priority;
+}
+
+/* The 700 can only issue a single insn at a time.
+   The 7XXX processors can issue two insns at a time.
+   The 8000 can issue 4 insns at a time.  */
+static int
+pa_issue_rate (void)
+{
+  switch (pa_cpu)
+    {
+    case PROCESSOR_700:		return 1;
+    case PROCESSOR_7100:	return 2;
+    case PROCESSOR_7100LC:	return 2;
+    case PROCESSOR_7200:	return 2;
+    case PROCESSOR_7300:	return 2;
+    case PROCESSOR_8000:	return 4;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+
+/* Return any length plus adjustment needed by INSN which already has
+   its length computed as LENGTH.   Return LENGTH if no adjustment is
+   necessary.
+
+   Also compute the length of an inline block move here as it is too
+   complicated to express as a length attribute in pa.md.  */
+int
+pa_adjust_insn_length (rtx insn, int length)
+{
+  rtx pat = PATTERN (insn);
+
+  /* If length is negative or undefined, provide initial length.  */
+  if ((unsigned int) length >= INT_MAX)
+    {
+      if (GET_CODE (pat) == SEQUENCE)
+	insn = XVECEXP (pat, 0, 0);
+
+      switch (get_attr_type (insn))
+	{
+	case TYPE_MILLI:
+	  length = pa_attr_length_millicode_call (insn);
+	  break;
+	case TYPE_CALL:
+	  length = pa_attr_length_call (insn, 0);
+	  break;
+	case TYPE_SIBCALL:
+	  length = pa_attr_length_call (insn, 1);
+	  break;
+	case TYPE_DYNCALL:
+	  length = pa_attr_length_indirect_call (insn);
+	  break;
+	case TYPE_SH_FUNC_ADRS:
+	  length = pa_attr_length_millicode_call (insn) + 20;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* Block move pattern.  */
+  if (NONJUMP_INSN_P (insn)
+      && GET_CODE (pat) == PARALLEL
+      && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+      && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
+      && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
+      && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
+      && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
+    length += compute_movmem_length (insn) - 4;
+  /* Block clear pattern.  */
+  else if (NONJUMP_INSN_P (insn)
+	   && GET_CODE (pat) == PARALLEL
+	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
+	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
+	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
+    length += compute_clrmem_length (insn) - 4;
+  /* Conditional branch with an unfilled delay slot.  */
+  else if (JUMP_P (insn) && ! simplejump_p (insn))
+    {
+      /* Adjust a short backwards conditional with an unfilled delay slot.  */
+      if (GET_CODE (pat) == SET
+	  && length == 4
+	  && JUMP_LABEL (insn) != NULL_RTX
+	  && ! forward_branch_p (insn))
+	length += 4;
+      else if (GET_CODE (pat) == PARALLEL
+	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
+	       && length == 4)
+	length += 4;
+      /* Adjust dbra insn with short backwards conditional branch with
+	 unfilled delay slot -- only for case where counter is in a
+	 general register register.  */
+      else if (GET_CODE (pat) == PARALLEL
+	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
+	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
+ 	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
+	       && length == 4
+	       && ! forward_branch_p (insn))
+	length += 4;
+    }
+  return length;
+}
+
+/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
+
+static bool
+pa_print_operand_punct_valid_p (unsigned char code)
+{
+  if (code == '@'
+      || code == '#'
+      || code == '*'
+      || code == '^')
+    return true;
+
+  return false;
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+void
+pa_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case '#':
+      /* Output a 'nop' if there's nothing for the delay slot.  */
+      if (dbr_sequence_length () == 0)
+	fputs ("\n\tnop", file);
+      return;
+    case '*':
+      /* Output a nullification completer if there's nothing for the */
+      /* delay slot or nullification is requested.  */
+      if (dbr_sequence_length () == 0 ||
+	  (final_sequence &&
+	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
+        fputs (",n", file);
+      return;
+    case 'R':
+      /* Print out the second register name of a register pair.
+	 I.e., R (6) => 7.  */
+      fputs (reg_names[REGNO (x) + 1], file);
+      return;
+    case 'r':
+      /* A register or zero.  */
+      if (x == const0_rtx
+	  || (x == CONST0_RTX (DFmode))
+	  || (x == CONST0_RTX (SFmode)))
+	{
+	  fputs ("%r0", file);
+	  return;
+	}
+      else
+	break;
+    case 'f':
+      /* A register or zero (floating point).  */
+      if (x == const0_rtx
+	  || (x == CONST0_RTX (DFmode))
+	  || (x == CONST0_RTX (SFmode)))
+	{
+	  fputs ("%fr0", file);
+	  return;
+	}
+      else
+	break;
+    case 'A':
+      {
+	rtx xoperands[2];
+
+	xoperands[0] = XEXP (XEXP (x, 0), 0);
+	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
+	pa_output_global_address (file, xoperands[1], 0);
+        fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
+	return;
+      }
+
+    case 'C':			/* Plain (C)ondition */
+    case 'X':
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("=", file);  break;
+	case NE:
+	  fputs ("<>", file);  break;
+	case GT:
+	  fputs (">", file);  break;
+	case GE:
+	  fputs (">=", file);  break;
+	case GEU:
+	  fputs (">>=", file);  break;
+	case GTU:
+	  fputs (">>", file);  break;
+	case LT:
+	  fputs ("<", file);  break;
+	case LE:
+	  fputs ("<=", file);  break;
+	case LEU:
+	  fputs ("<<=", file);  break;
+	case LTU:
+	  fputs ("<<", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'N':			/* Condition, (N)egated */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("<>", file);  break;
+	case NE:
+	  fputs ("=", file);  break;
+	case GT:
+	  fputs ("<=", file);  break;
+	case GE:
+	  fputs ("<", file);  break;
+	case GEU:
+	  fputs ("<<", file);  break;
+	case GTU:
+	  fputs ("<<=", file);  break;
+	case LT:
+	  fputs (">=", file);  break;
+	case LE:
+	  fputs (">", file);  break;
+	case LEU:
+	  fputs (">>", file);  break;
+	case LTU:
+	  fputs (">>=", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    /* For floating point comparisons.  Note that the output
+       predicates are the complement of the desired mode.  The
+       conditions for GT, GE, LT, LE and LTGT cause an invalid
+       operation exception if the result is unordered and this
+       exception is enabled in the floating-point status register.  */
+    case 'Y':
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("!=", file);  break;
+	case NE:
+	  fputs ("=", file);  break;
+	case GT:
+	  fputs ("!>", file);  break;
+	case GE:
+	  fputs ("!>=", file);  break;
+	case LT:
+	  fputs ("!<", file);  break;
+	case LE:
+	  fputs ("!<=", file);  break;
+	case LTGT:
+	  fputs ("!<>", file);  break;
+	case UNLE:
+	  fputs ("!?<=", file);  break;
+	case UNLT:
+	  fputs ("!?<", file);  break;
+	case UNGE:
+	  fputs ("!?>=", file);  break;
+	case UNGT:
+	  fputs ("!?>", file);  break;
+	case UNEQ:
+	  fputs ("!?=", file);  break;
+	case UNORDERED:
+	  fputs ("!?", file);  break;
+	case ORDERED:
+	  fputs ("?", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'S':			/* Condition, operands are (S)wapped.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("=", file);  break;
+	case NE:
+	  fputs ("<>", file);  break;
+	case GT:
+	  fputs ("<", file);  break;
+	case GE:
+	  fputs ("<=", file);  break;
+	case GEU:
+	  fputs ("<<=", file);  break;
+	case GTU:
+	  fputs ("<<", file);  break;
+	case LT:
+	  fputs (">", file);  break;
+	case LE:
+	  fputs (">=", file);  break;
+	case LEU:
+	  fputs (">>=", file);  break;
+	case LTU:
+	  fputs (">>", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'B':			/* Condition, (B)oth swapped and negate.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("<>", file);  break;
+	case NE:
+	  fputs ("=", file);  break;
+	case GT:
+	  fputs (">=", file);  break;
+	case GE:
+	  fputs (">", file);  break;
+	case GEU:
+	  fputs (">>", file);  break;
+	case GTU:
+	  fputs (">>=", file);  break;
+	case LT:
+	  fputs ("<=", file);  break;
+	case LE:
+	  fputs ("<", file);  break;
+	case LEU:
+	  fputs ("<<", file);  break;
+	case LTU:
+	  fputs ("<<=", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'k':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
+      return;
+    case 'Q':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
+      return;
+    case 'L':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
+      return;
+    case 'O':
+      gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      return;
+    case 'p':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
+      return;
+    case 'P':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
+      return;
+    case 'I':
+      if (GET_CODE (x) == CONST_INT)
+	fputs ("i", file);
+      return;
+    case 'M':
+    case 'F':
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PRE_DEC:
+	case PRE_INC:
+	  if (ASSEMBLER_DIALECT == 0)
+	    fputs ("s,mb", file);
+	  else
+	    fputs (",mb", file);
+	  break;
+	case POST_DEC:
+	case POST_INC:
+	  if (ASSEMBLER_DIALECT == 0)
+	    fputs ("s,ma", file);
+	  else
+	    fputs (",ma", file);
+	  break;
+	case PLUS:
+	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
+	    {
+	      if (ASSEMBLER_DIALECT == 0)
+		fputs ("x", file);
+	    }
+	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
+	    {
+	      if (ASSEMBLER_DIALECT == 0)
+		fputs ("x,s", file);
+	      else
+		fputs (",s", file);
+	    }
+	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
+	    fputs ("s", file);
+	  break;
+	default:
+	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
+	    fputs ("s", file);
+	  break;
+	}
+      return;
+    case 'G':
+      pa_output_global_address (file, x, 0);
+      return;
+    case 'H':
+      pa_output_global_address (file, x, 1);
+      return;
+    case 0:			/* Don't do anything special */
+      break;
+    case 'Z':
+      {
+	unsigned op[3];
+	compute_zdepwi_operands (INTVAL (x), op);
+	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
+	return;
+      }
+    case 'z':
+      {
+	unsigned op[3];
+	compute_zdepdi_operands (INTVAL (x), op);
+	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
+	return;
+      }
+    case 'c':
+      /* We can get here from a .vtable_inherit due to our
+	 CONSTANT_ADDRESS_P rejecting perfectly good constant
+	 addresses.  */
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (GET_CODE (x) == REG)
+    {
+      fputs (reg_names [REGNO (x)], file);
+      if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
+	{
+	  fputs ("R", file);
+	  return;
+	}
+      if (FP_REG_P (x)
+	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
+	  && (REGNO (x) & 1) == 0)
+	fputs ("L", file);
+    }
+  else if (GET_CODE (x) == MEM)
+    {
+      int size = GET_MODE_SIZE (GET_MODE (x));
+      rtx base = NULL_RTX;
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PRE_DEC:
+	case POST_DEC:
+          base = XEXP (XEXP (x, 0), 0);
+	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
+	  break;
+	case PRE_INC:
+	case POST_INC:
+          base = XEXP (XEXP (x, 0), 0);
+	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
+	  break;
+	case PLUS:
+	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
+	    fprintf (file, "%s(%s)",
+		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
+		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
+	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
+	    fprintf (file, "%s(%s)",
+		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
+		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
+	    {
+	      /* Because the REG_POINTER flag can get lost during reload,
+		 pa_legitimate_address_p canonicalizes the order of the
+		 index and base registers in the combined move patterns.  */
+	      rtx base = XEXP (XEXP (x, 0), 1);
+	      rtx index = XEXP (XEXP (x, 0), 0);
+
+	      fprintf (file, "%s(%s)",
+		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
+	    }
+	  else
+	    output_address (XEXP (x, 0));
+	  break;
+	default:
+	  output_address (XEXP (x, 0));
+	  break;
+	}
+    }
+  else
+    output_addr_const (file, x);
+}
+
+/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
+
+void
+pa_output_global_address (FILE *file, rtx x, int round_constant)
+{
+
+  /* Imagine  (high (const (plus ...))).  */
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
+    output_addr_const (file, x);
+  else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
+    {
+      output_addr_const (file, x);
+      fputs ("-$global$", file);
+    }
+  else if (GET_CODE (x) == CONST)
+    {
+      const char *sep = "";
+      int offset = 0;		/* assembler wants -$global$ at end */
+      rtx base = NULL_RTX;
+
+      switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
+	{
+	case SYMBOL_REF:
+	  base = XEXP (XEXP (x, 0), 0);
+	  output_addr_const (file, base);
+	  break;
+	case CONST_INT:
+	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
+	{
+	case SYMBOL_REF:
+	  base = XEXP (XEXP (x, 0), 1);
+	  output_addr_const (file, base);
+	  break;
+	case CONST_INT:
+	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* How bogus.  The compiler is apparently responsible for
+	 rounding the constant if it uses an LR field selector.
+
+	 The linker and/or assembler seem a better place since
+	 they have to do this kind of thing already.
+
+	 If we fail to do this, HP's optimizing linker may eliminate
+	 an addil, but not update the ldw/stw/ldo instruction that
+	 uses the result of the addil.  */
+      if (round_constant)
+	offset = ((offset + 0x1000) & ~0x1fff);
+
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PLUS:
+	  if (offset < 0)
+	    {
+	      offset = -offset;
+	      sep = "-";
+	    }
+	  else
+	    sep = "+";
+	  break;
+
+	case MINUS:
+	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
+	  sep = "-";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      
+      if (!read_only_operand (base, VOIDmode) && !flag_pic)
+	fputs ("-$global$", file);
+      if (offset)
+	fprintf (file, "%s%d", sep, offset);
+    }
+  else
+    output_addr_const (file, x);
+}
+
+/* Output boilerplate text to appear at the beginning of the file.
+   There are several possible versions.  */
+#define aputs(x) fputs(x, asm_out_file)
+static inline void
+pa_file_start_level (void)
+{
+  if (TARGET_64BIT)
+    aputs ("\t.LEVEL 2.0w\n");
+  else if (TARGET_PA_20)
+    aputs ("\t.LEVEL 2.0\n");
+  else if (TARGET_PA_11)
+    aputs ("\t.LEVEL 1.1\n");
+  else
+    aputs ("\t.LEVEL 1.0\n");
+}
+
+static inline void
+pa_file_start_space (int sortspace)
+{
+  aputs ("\t.SPACE $PRIVATE$");
+  if (sortspace)
+    aputs (",SORT=16");
+  aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
+  if (flag_tm)
+    aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
+  aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
+	 "\n\t.SPACE $TEXT$");
+  if (sortspace)
+    aputs (",SORT=8");
+  aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
+	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
+}
+
+static inline void
+pa_file_start_file (int want_version)
+{
+  if (write_symbols != NO_DEBUG)
+    {
+      output_file_directive (asm_out_file, main_input_filename);
+      if (want_version)
+	aputs ("\t.version\t\"01.01\"\n");
+    }
+}
+
+static inline void
+pa_file_start_mcount (const char *aswhat)
+{
+  if (profile_flag)
+    fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
+}
+  
+static void
+pa_elf_file_start (void)
+{
+  pa_file_start_level ();
+  pa_file_start_mcount ("ENTRY");
+  pa_file_start_file (0);
+}
+
+static void
+pa_som_file_start (void)
+{
+  pa_file_start_level ();
+  pa_file_start_space (0);
+  aputs ("\t.IMPORT $global$,DATA\n"
+         "\t.IMPORT $$dyncall,MILLICODE\n");
+  pa_file_start_mcount ("CODE");
+  pa_file_start_file (0);
+}
+
+static void
+pa_linux_file_start (void)
+{
+  pa_file_start_file (1);
+  pa_file_start_level ();
+  pa_file_start_mcount ("CODE");
+}
+
+static void
+pa_hpux64_gas_file_start (void)
+{
+  pa_file_start_level ();
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  if (profile_flag)
+    ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
+#endif
+  pa_file_start_file (1);
+}
+
+static void
+pa_hpux64_hpas_file_start (void)
+{
+  pa_file_start_level ();
+  pa_file_start_space (1);
+  pa_file_start_mcount ("CODE");
+  pa_file_start_file (0);
+}
+#undef aputs
+
+/* Search the deferred plabel list for SYMBOL and return its internal
+   label.  If an entry for SYMBOL is not found, a new entry is created.  */
+
+rtx
+pa_get_deferred_plabel (rtx symbol)
+{
+  const char *fname = XSTR (symbol, 0);
+  size_t i;
+
+  /* See if we have already put this function on the list of deferred
+     plabels.  This list is generally small, so a liner search is not
+     too ugly.  If it proves too slow replace it with something faster.  */
+  for (i = 0; i < n_deferred_plabels; i++)
+    if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
+      break;
+
+  /* If the deferred plabel list is empty, or this entry was not found
+     on the list, create a new entry on the list.  */
+  if (deferred_plabels == NULL || i == n_deferred_plabels)
+    {
+      tree id;
+
+      if (deferred_plabels == 0)
+	deferred_plabels =  ggc_alloc_deferred_plabel ();
+      else
+        deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
+                                          deferred_plabels,
+                                          n_deferred_plabels + 1);
+
+      i = n_deferred_plabels++;
+      deferred_plabels[i].internal_label = gen_label_rtx ();
+      deferred_plabels[i].symbol = symbol;
+
+      /* Gross.  We have just implicitly taken the address of this
+	 function.  Mark it in the same manner as assemble_name.  */
+      id = maybe_get_identifier (targetm.strip_name_encoding (fname));
+      if (id)
+	mark_referenced (id);
+    }
+
+  return deferred_plabels[i].internal_label;
+}
+
+static void
+output_deferred_plabels (void)
+{
+  size_t i;
+
+  /* If we have some deferred plabels, then we need to switch into the
+     data or readonly data section, and align it to a 4 byte boundary
+     before outputting the deferred plabels.  */
+  if (n_deferred_plabels)
+    {
+      switch_to_section (flag_pic ? data_section : readonly_data_section);
+      ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
+    }
+
+  /* Now output the deferred plabels.  */
+  for (i = 0; i < n_deferred_plabels; i++)
+    {
+      targetm.asm_out.internal_label (asm_out_file, "L",
+		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
+      assemble_integer (deferred_plabels[i].symbol,
+			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
+    }
+}
+
+/* Initialize optabs to point to emulation routines.  */
+
+static void
+pa_init_libfuncs (void)
+{
+  if (HPUX_LONG_DOUBLE_LIBRARY)
+    {
+      set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
+      set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
+      set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
+      set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
+      set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
+      set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
+      set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
+      set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
+      set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
+
+      set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
+      set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
+      set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
+      set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
+      set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
+      set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
+      set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
+
+      set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
+      set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
+      set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
+      set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
+
+      set_conv_libfunc (sfix_optab, SImode, TFmode,
+			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
+				     : "_U_Qfcnvfxt_quad_to_sgl");
+      set_conv_libfunc (sfix_optab, DImode, TFmode,
+			"_U_Qfcnvfxt_quad_to_dbl");
+      set_conv_libfunc (ufix_optab, SImode, TFmode,
+			"_U_Qfcnvfxt_quad_to_usgl");
+      set_conv_libfunc (ufix_optab, DImode, TFmode,
+			"_U_Qfcnvfxt_quad_to_udbl");
+
+      set_conv_libfunc (sfloat_optab, TFmode, SImode,
+			"_U_Qfcnvxf_sgl_to_quad");
+      set_conv_libfunc (sfloat_optab, TFmode, DImode,
+			"_U_Qfcnvxf_dbl_to_quad");
+      set_conv_libfunc (ufloat_optab, TFmode, SImode,
+			"_U_Qfcnvxf_usgl_to_quad");
+      set_conv_libfunc (ufloat_optab, TFmode, DImode,
+			"_U_Qfcnvxf_udbl_to_quad");
+    }
+
+  if (TARGET_SYNC_LIBCALL)
+    init_sync_libfuncs (UNITS_PER_WORD);
+}
+
+/* HP's millicode routines mean something special to the assembler.
+   Keep track of which ones we have used.  */
+
+enum millicodes { remI, remU, divI, divU, mulI, end1000 };
+static void import_milli (enum millicodes);
+static char imported[(int) end1000];
+static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
+static const char import_string[] = ".IMPORT $$....,MILLICODE";
+#define MILLI_START 10
+
+static void
+import_milli (enum millicodes code)
+{
+  char str[sizeof (import_string)];
+
+  if (!imported[(int) code])
+    {
+      imported[(int) code] = 1;
+      strcpy (str, import_string);
+      strncpy (str + MILLI_START, milli_names[(int) code], 4);
+      output_asm_insn (str, 0);
+    }
+}
+
+/* The register constraints have put the operands and return value in
+   the proper registers.  */
+
+const char *
+pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
+{
+  import_milli (mulI);
+  return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
+}
+
+/* Emit the rtl for doing a division by a constant.  */
+
+/* Do magic division millicodes exist for this value? */
+const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
+
+/* We'll use an array to keep track of the magic millicodes and
+   whether or not we've used them already. [n][0] is signed, [n][1] is
+   unsigned.  */
+
+static int div_milli[16][2];
+
+int
+pa_emit_hpdiv_const (rtx *operands, int unsignedp)
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) > 0
+      && INTVAL (operands[2]) < 16
+      && pa_magic_milli[INTVAL (operands[2])])
+    {
+      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+
+      emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
+      emit
+	(gen_rtx_PARALLEL
+	 (VOIDmode,
+	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
+				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
+						     SImode,
+						     gen_rtx_REG (SImode, 26),
+						     operands[2])),
+		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
+		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
+		     gen_rtx_CLOBBER (VOIDmode, ret))));
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
+      return 1;
+    }
+  return 0;
+}
+
+const char *
+pa_output_div_insn (rtx *operands, int unsignedp, rtx insn)
+{
+  int divisor;
+
+  /* If the divisor is a constant, try to use one of the special
+     opcodes .*/
+  if (GET_CODE (operands[0]) == CONST_INT)
+    {
+      static char buf[100];
+      divisor = INTVAL (operands[0]);
+      if (!div_milli[divisor][unsignedp])
+	{
+	  div_milli[divisor][unsignedp] = 1;
+	  if (unsignedp)
+	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
+	  else
+	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
+	}
+      if (unsignedp)
+	{
+	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
+		   INTVAL (operands[0]));
+	  return pa_output_millicode_call (insn,
+					   gen_rtx_SYMBOL_REF (SImode, buf));
+	}
+      else
+	{
+	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
+		   INTVAL (operands[0]));
+	  return pa_output_millicode_call (insn,
+					   gen_rtx_SYMBOL_REF (SImode, buf));
+	}
+    }
+  /* Divisor isn't a special constant.  */
+  else
+    {
+      if (unsignedp)
+	{
+	  import_milli (divU);
+	  return pa_output_millicode_call (insn,
+					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
+	}
+      else
+	{
+	  import_milli (divI);
+	  return pa_output_millicode_call (insn,
+					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
+	}
+    }
+}
+
+/* Output a $$rem millicode to do mod.  */
+
+const char *
+pa_output_mod_insn (int unsignedp, rtx insn)
+{
+  if (unsignedp)
+    {
+      import_milli (remU);
+      return pa_output_millicode_call (insn,
+				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
+    }
+  else
+    {
+      import_milli (remI);
+      return pa_output_millicode_call (insn,
+				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
+    }
+}
+
+void
+pa_output_arg_descriptor (rtx call_insn)
+{
+  const char *arg_regs[4];
+  enum machine_mode arg_mode;
+  rtx link;
+  int i, output_flag = 0;
+  int regno;
+
+  /* We neither need nor want argument location descriptors for the
+     64bit runtime environment or the ELF32 environment.  */
+  if (TARGET_64BIT || TARGET_ELF32)
+    return;
+
+  for (i = 0; i < 4; i++)
+    arg_regs[i] = 0;
+
+  /* Specify explicitly that no argument relocations should take place
+     if using the portable runtime calling conventions.  */
+  if (TARGET_PORTABLE_RUNTIME)
+    {
+      fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
+	     asm_out_file);
+      return;
+    }
+
+  gcc_assert (CALL_P (call_insn));
+  for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
+       link; link = XEXP (link, 1))
+    {
+      rtx use = XEXP (link, 0);
+
+      if (! (GET_CODE (use) == USE
+	     && GET_CODE (XEXP (use, 0)) == REG
+	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+	continue;
+
+      arg_mode = GET_MODE (XEXP (use, 0));
+      regno = REGNO (XEXP (use, 0));
+      if (regno >= 23 && regno <= 26)
+	{
+	  arg_regs[26 - regno] = "GR";
+	  if (arg_mode == DImode)
+	    arg_regs[25 - regno] = "GR";
+	}
+      else if (regno >= 32 && regno <= 39)
+	{
+	  if (arg_mode == SFmode)
+	    arg_regs[(regno - 32) / 2] = "FR";
+	  else
+	    {
+#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
+	      arg_regs[(regno - 34) / 2] = "FR";
+	      arg_regs[(regno - 34) / 2 + 1] = "FU";
+#else
+	      arg_regs[(regno - 34) / 2] = "FU";
+	      arg_regs[(regno - 34) / 2 + 1] = "FR";
+#endif
+	    }
+	}
+    }
+  fputs ("\t.CALL ", asm_out_file);
+  for (i = 0; i < 4; i++)
+    {
+      if (arg_regs[i])
+	{
+	  if (output_flag++)
+	    fputc (',', asm_out_file);
+	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
+	}
+    }
+  fputc ('\n', asm_out_file);
+}
+
+/* Inform reload about cases where moving X with a mode MODE to or from
+   a register in RCLASS requires an extra scratch or immediate register.
+   Return the class needed for the immediate register.  */
+
+static reg_class_t
+pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		     enum machine_mode mode, secondary_reload_info *sri)
+{
+  int regno;
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Handle the easy stuff first.  */
+  if (rclass == R1_REGS)
+    return NO_REGS;
+
+  if (REG_P (x))
+    {
+      regno = REGNO (x);
+      if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
+	return NO_REGS;
+    }
+  else
+    regno = -1;
+
+  /* If we have something like (mem (mem (...)), we can safely assume the
+     inner MEM will end up in a general register after reloading, so there's
+     no need for a secondary reload.  */
+  if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
+    return NO_REGS;
+
+  /* Trying to load a constant into a FP register during PIC code
+     generation requires %r1 as a scratch register.  For float modes,
+     the only legitimate constant is CONST0_RTX.  However, there are
+     a few patterns that accept constant double operands.  */
+  if (flag_pic
+      && FP_REG_CLASS_P (rclass)
+      && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
+    {
+      switch (mode)
+	{
+	case SImode:
+	  sri->icode = CODE_FOR_reload_insi_r1;
+	  break;
+
+	case DImode:
+	  sri->icode = CODE_FOR_reload_indi_r1;
+	  break;
+
+	case SFmode:
+	  sri->icode = CODE_FOR_reload_insf_r1;
+	  break;
+
+	case DFmode:
+	  sri->icode = CODE_FOR_reload_indf_r1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      return NO_REGS;
+    }
+
+  /* Secondary reloads of symbolic expressions require %r1 as a scratch
+     register when we're generating PIC code or when the operand isn't
+     readonly.  */
+  if (pa_symbolic_expression_p (x))
+    {
+      if (GET_CODE (x) == HIGH)
+	x = XEXP (x, 0);
+
+      if (flag_pic || !read_only_operand (x, VOIDmode))
+	{
+	  switch (mode)
+	    {
+	    case SImode:
+	      sri->icode = CODE_FOR_reload_insi_r1;
+	      break;
+
+	    case DImode:
+	      sri->icode = CODE_FOR_reload_indi_r1;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  return NO_REGS;
+	}
+    }
+
+  /* Profiling showed the PA port spends about 1.3% of its compilation
+     time in true_regnum from calls inside pa_secondary_reload_class.  */
+  if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
+    regno = true_regnum (x);
+
+  /* Handle reloads for floating point loads and stores.  */
+  if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
+      && FP_REG_CLASS_P (rclass))
+    {
+      if (MEM_P (x))
+	{
+	  x = XEXP (x, 0);
+
+	  /* We don't need an intermediate for indexed and LO_SUM DLT
+	     memory addresses.  When INT14_OK_STRICT is true, it might
+	     appear that we could directly allow register indirect
+	     memory addresses.  However, this doesn't work because we
+	     don't support SUBREGs in floating-point register copies
+	     and reload doesn't tell us when it's going to use a SUBREG.  */
+	  if (IS_INDEX_ADDR_P (x)
+	      || IS_LO_SUM_DLT_ADDR_P (x))
+	    return NO_REGS;
+
+	  /* Request intermediate general register.  */
+	  return GENERAL_REGS;
+	}
+
+      /* Request a secondary reload with a general scratch register
+	 for everything else.  ??? Could symbolic operands be handled
+	 directly when generating non-pic PA 2.0 code?  */
+      sri->icode = (in_p
+		    ? direct_optab_handler (reload_in_optab, mode)
+		    : direct_optab_handler (reload_out_optab, mode));
+      return NO_REGS;
+    }
+
+  /* A SAR<->FP register copy requires an intermediate general register
+     and secondary memory.  We need a secondary reload with a general
+     scratch register for spills.  */
+  if (rclass == SHIFT_REGS)
+    {
+      /* Handle spill.  */
+      if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
+	{
+	  sri->icode = (in_p
+			? direct_optab_handler (reload_in_optab, mode)
+			: direct_optab_handler (reload_out_optab, mode));
+	  return NO_REGS;
+	}
+
+      /* Handle FP copy.  */
+      if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
+	return GENERAL_REGS;
+    }
+
+  if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
+      && REGNO_REG_CLASS (regno) == SHIFT_REGS
+      && FP_REG_CLASS_P (rclass))
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
+   is only marked as live on entry by df-scan when it is a fixed
+   register.  It isn't a fixed register in the 64-bit runtime,
+   so we need to mark it here.  */
+
+static void
+pa_extra_live_on_entry (bitmap regs)
+{
+  if (TARGET_64BIT)
+    bitmap_set_bit (regs, ARG_POINTER_REGNUM);
+}
+
+/* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
+   to prevent it from being deleted.  */
+
+rtx
+pa_eh_return_handler_rtx (void)
+{
+  rtx tmp;
+
+  tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
+		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
+  tmp = gen_rtx_MEM (word_mode, tmp);
+  tmp->volatil = 1;
+  return tmp;
+}
+
+/* In the 32-bit runtime, arguments larger than eight bytes are passed
+   by invisible reference.  As a GCC extension, we also pass anything
+   with a zero or variable size by reference.
+
+   The 64-bit runtime does not describe passing any types by invisible
+   reference.  The internals of GCC can't currently handle passing
+   empty structures, and zero or variable length arrays when they are
+   not passed entirely on the stack or by reference.  Thus, as a GCC
+   extension, we pass these types by reference.  The HP compiler doesn't
+   support these types, so hopefully there shouldn't be any compatibility
+   issues.  This may have to be revisited when HP releases a C99 compiler
+   or updates the ABI.  */
+
+static bool
+pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+		      enum machine_mode mode, const_tree type,
+		      bool named ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (TARGET_64BIT)
+    return size <= 0;
+  else
+    return size <= 0 || size > 8;
+}
+
+enum direction
+pa_function_arg_padding (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode
+      || (TARGET_64BIT
+	  && type
+	  && (AGGREGATE_TYPE_P (type)
+	      || TREE_CODE (type) == COMPLEX_TYPE
+	      || TREE_CODE (type) == VECTOR_TYPE)))
+    {
+      /* Return none if justification is not required.  */
+      if (type
+	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
+	return none;
+
+      /* The directions set here are ignored when a BLKmode argument larger
+	 than a word is placed in a register.  Different code is used for
+	 the stack and registers.  This makes it difficult to have a
+	 consistent data representation for both the stack and registers.
+	 For both runtimes, the justification and padding for arguments on
+	 the stack and in registers should be identical.  */
+      if (TARGET_64BIT)
+	/* The 64-bit runtime specifies left justification for aggregates.  */
+        return upward;
+      else
+	/* The 32-bit runtime architecture specifies right justification.
+	   When the argument is passed on the stack, the argument is padded
+	   with garbage on the left.  The HP compiler pads with zeros.  */
+	return downward;
+    }
+
+  if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
+    return downward;
+  else
+    return none;
+}
+
+
+/* Do what is necessary for `va_start'.  We look at the current function
+   to determine if stdargs or varargs is used and fill in an initial
+   va_list.  A pointer to this constructor is returned.  */
+
+static rtx
+hppa_builtin_saveregs (void)
+{
+  rtx offset, dest;
+  tree fntype = TREE_TYPE (current_function_decl);
+  int argadj = ((!stdarg_p (fntype))
+		? UNITS_PER_WORD : 0);
+
+  if (argadj)
+    offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
+  else
+    offset = crtl->args.arg_offset_rtx;
+
+  if (TARGET_64BIT)
+    {
+      int i, off;
+
+      /* Adjust for varargs/stdarg differences.  */
+      if (argadj)
+	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
+      else
+	offset = crtl->args.arg_offset_rtx;
+
+      /* We need to save %r26 .. %r19 inclusive starting at offset -64
+	 from the incoming arg pointer and growing to larger addresses.  */
+      for (i = 26, off = -64; i >= 19; i--, off += 8)
+	emit_move_insn (gen_rtx_MEM (word_mode,
+				     plus_constant (Pmode,
+						    arg_pointer_rtx, off)),
+			gen_rtx_REG (word_mode, i));
+
+      /* The incoming args pointer points just beyond the flushback area;
+	 normally this is not a serious concern.  However, when we are doing
+	 varargs/stdargs we want to make the arg pointer point to the start
+	 of the incoming argument area.  */
+      emit_move_insn (virtual_incoming_args_rtx,
+		      plus_constant (Pmode, arg_pointer_rtx, -64));
+
+      /* Now return a pointer to the first anonymous argument.  */
+      return copy_to_reg (expand_binop (Pmode, add_optab,
+					virtual_incoming_args_rtx,
+					offset, 0, 0, OPTAB_LIB_WIDEN));
+    }
+
+  /* Store general registers on the stack.  */
+  dest = gen_rtx_MEM (BLKmode,
+		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
+				     -16));
+  set_mem_alias_set (dest, get_varargs_alias_set ());
+  set_mem_align (dest, BITS_PER_WORD);
+  move_block_from_reg (23, dest, 4);
+
+  /* move_block_from_reg will emit code to store the argument registers
+     individually as scalar stores.
+
+     However, other insns may later load from the same addresses for
+     a structure load (passing a struct to a varargs routine).
+
+     The alias code assumes that such aliasing can never happen, so we
+     have to keep memory referencing insns from moving up beyond the
+     last argument register store.  So we emit a blockage insn here.  */
+  emit_insn (gen_blockage ());
+
+  return copy_to_reg (expand_binop (Pmode, add_optab,
+				    crtl->args.internal_arg_pointer,
+				    offset, 0, 0, OPTAB_LIB_WIDEN));
+}
+
+static void
+hppa_va_start (tree valist, rtx nextarg)
+{
+  nextarg = expand_builtin_saveregs ();
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+static tree
+hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			   gimple_seq *post_p)
+{
+  if (TARGET_64BIT)
+    {
+      /* Args grow upward.  We can use the generic routines.  */
+      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+    }
+  else /* !TARGET_64BIT */
+    {
+      tree ptr = build_pointer_type (type);
+      tree valist_type;
+      tree t, u;
+      unsigned int size, ofs;
+      bool indirect;
+
+      indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
+      if (indirect)
+	{
+	  type = ptr;
+	  ptr = build_pointer_type (type);
+	}
+      size = int_size_in_bytes (type);
+      valist_type = TREE_TYPE (valist);
+
+      /* Args grow down.  Not handled by generic routines.  */
+
+      u = fold_convert (sizetype, size_in_bytes (type));
+      u = fold_build1 (NEGATE_EXPR, sizetype, u);
+      t = fold_build_pointer_plus (valist, u);
+
+      /* Align to 4 or 8 byte boundary depending on argument size.  */
+
+      u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
+      t = fold_convert (valist_type, t);
+
+      t = build2 (MODIFY_EXPR, valist_type, valist, t);
+
+      ofs = (8 - size) % 4;
+      if (ofs != 0)
+	t = fold_build_pointer_plus_hwi (t, ofs);
+
+      t = fold_convert (ptr, t);
+      t = build_va_arg_indirect_ref (t);
+
+      if (indirect)
+	t = build_va_arg_indirect_ref (t);
+
+      return t;
+    }
+}
+
+/* True if MODE is valid for the target.  By "valid", we mean able to
+   be manipulated in non-trivial ways.  In particular, this means all
+   the arithmetic is supported.
+
+   Currently, TImode is not valid as the HP 64-bit runtime documentation
+   doesn't document the alignment and calling conventions for this type. 
+   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
+   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
+
+static bool
+pa_scalar_mode_supported_p (enum machine_mode mode)
+{
+  int precision = GET_MODE_PRECISION (mode);
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_PARTIAL_INT:
+    case MODE_INT:
+      if (precision == CHAR_TYPE_SIZE)
+	return true;
+      if (precision == SHORT_TYPE_SIZE)
+	return true;
+      if (precision == INT_TYPE_SIZE)
+	return true;
+      if (precision == LONG_TYPE_SIZE)
+	return true;
+      if (precision == LONG_LONG_TYPE_SIZE)
+	return true;
+      return false;
+
+    case MODE_FLOAT:
+      if (precision == FLOAT_TYPE_SIZE)
+	return true;
+      if (precision == DOUBLE_TYPE_SIZE)
+	return true;
+      if (precision == LONG_DOUBLE_TYPE_SIZE)
+	return true;
+      return false;
+
+    case MODE_DECIMAL_FLOAT:
+      return false;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
+   it branches into the delay slot.  Otherwise, return FALSE.  */
+
+static bool
+branch_to_delay_slot_p (rtx insn)
+{
+  rtx jump_insn;
+
+  if (dbr_sequence_length ())
+    return FALSE;
+
+  jump_insn = next_active_insn (JUMP_LABEL (insn));
+  while (insn)
+    {
+      insn = next_active_insn (insn);
+      if (jump_insn == insn)
+	return TRUE;
+
+      /* We can't rely on the length of asms.  So, we return FALSE when
+	 the branch is followed by an asm.  */
+      if (!insn
+	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
+	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
+	  || get_attr_length (insn) > 0)
+	break;
+    }
+
+  return FALSE;
+}
+
+/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
+
+   This occurs when INSN has an unfilled delay slot and is followed
+   by an asm.  Disaster can occur if the asm is empty and the jump
+   branches into the delay slot.  So, we add a nop in the delay slot
+   when this occurs.  */
+
+static bool
+branch_needs_nop_p (rtx insn)
+{
+  rtx jump_insn;
+
+  if (dbr_sequence_length ())
+    return FALSE;
+
+  jump_insn = next_active_insn (JUMP_LABEL (insn));
+  while (insn)
+    {
+      insn = next_active_insn (insn);
+      if (!insn || jump_insn == insn)
+	return TRUE;
+
+      if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
+	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
+	  && get_attr_length (insn) > 0)
+	break;
+    }
+
+  return FALSE;
+}
+
+/* Return TRUE if INSN, a forward jump insn, can use nullification
+   to skip the following instruction.  This avoids an extra cycle due
+   to a mis-predicted branch when we fall through.  */
+
+static bool
+use_skip_p (rtx insn)
+{
+  rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
+
+  while (insn)
+    {
+      insn = next_active_insn (insn);
+
+      /* We can't rely on the length of asms, so we can't skip asms.  */
+      if (!insn
+	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
+	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
+	break;
+      if (get_attr_length (insn) == 4
+	  && jump_insn == next_active_insn (insn))
+	return TRUE;
+      if (get_attr_length (insn) > 0)
+	break;
+    }
+
+  return FALSE;
+}
+
+/* This routine handles all the normal conditional branch sequences we
+   might need to generate.  It handles compare immediate vs compare
+   register, nullification of delay slots, varying length branches,
+   negated branches, and all combinations of the above.  It returns the
+   output appropriate to emit the branch corresponding to all given
+   parameters.  */
+
+const char *
+pa_output_cbranch (rtx *operands, int negated, rtx insn)
+{
+  static char buf[100];
+  bool useskip;
+  int nullify = INSN_ANNULLED_BRANCH_P (insn);
+  int length = get_attr_length (insn);
+  int xdelay;
+
+  /* A conditional branch to the following instruction (e.g. the delay slot)
+     is asking for a disaster.  This can happen when not optimizing and
+     when jump optimization fails.
+
+     While it is usually safe to emit nothing, this can fail if the
+     preceding instruction is a nullified branch with an empty delay
+     slot and the same branch target as this branch.  We could check
+     for this but jump optimization should eliminate nop jumps.  It
+     is always safe to emit a nop.  */
+  if (branch_to_delay_slot_p (insn))
+    return "nop";
+
+  /* The doubleword form of the cmpib instruction doesn't have the LEU
+     and GTU conditions while the cmpb instruction does.  Since we accept
+     zero for cmpb, we must ensure that we use cmpb for the comparison.  */
+  if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
+    operands[2] = gen_rtx_REG (DImode, 0);
+  if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
+    operands[1] = gen_rtx_REG (DImode, 0);
+
+  /* If this is a long branch with its delay slot unfilled, set `nullify'
+     as it can nullify the delay slot and save a nop.  */
+  if (length == 8 && dbr_sequence_length () == 0)
+    nullify = 1;
+
+  /* If this is a short forward conditional branch which did not get
+     its delay slot filled, the delay slot can still be nullified.  */
+  if (! nullify && length == 4 && dbr_sequence_length () == 0)
+    nullify = forward_branch_p (insn);
+
+  /* A forward branch over a single nullified insn can be done with a
+     comclr instruction.  This avoids a single cycle penalty due to
+     mis-predicted branch if we fall through (branch not taken).  */
+  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
+
+  switch (length)
+    {
+      /* All short conditional branches except backwards with an unfilled
+	 delay slot.  */
+      case 4:
+	if (useskip)
+	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
+	else
+	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
+	if (GET_MODE (operands[1]) == DImode)
+	  strcat (buf, "*");
+	if (negated)
+	  strcat (buf, "%B3");
+	else
+	  strcat (buf, "%S3");
+	if (useskip)
+	  strcat (buf, " %2,%r1,%%r0");
+	else if (nullify)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, ",n %2,%r1,%0%#");
+	    else
+	      strcat (buf, ",n %2,%r1,%0");
+	  }
+	else
+	  strcat (buf, " %2,%r1,%0");
+	break;
+
+     /* All long conditionals.  Note a short backward branch with an
+	unfilled delay slot is treated just like a long backward branch
+	with an unfilled delay slot.  */
+      case 8:
+	/* Handle weird backwards branch with a filled delay slot
+	   which is nullified.  */
+	if (dbr_sequence_length () != 0
+	    && ! forward_branch_p (insn)
+	    && nullify)
+	  {
+	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
+	    if (GET_MODE (operands[1]) == DImode)
+	      strcat (buf, "*");
+	    if (negated)
+	      strcat (buf, "%S3");
+	    else
+	      strcat (buf, "%B3");
+	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
+	  }
+	/* Handle short backwards branch with an unfilled delay slot.
+	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
+	   taken and untaken branches.  */
+	else if (dbr_sequence_length () == 0
+		 && ! forward_branch_p (insn)
+		 && INSN_ADDRESSES_SET_P ()
+		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	  {
+	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
+	    if (GET_MODE (operands[1]) == DImode)
+	      strcat (buf, "*");
+	    if (negated)
+	      strcat (buf, "%B3 %2,%r1,%0%#");
+	    else
+	      strcat (buf, "%S3 %2,%r1,%0%#");
+	  }
+	else
+	  {
+	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
+	    if (GET_MODE (operands[1]) == DImode)
+	      strcat (buf, "*");
+	    if (negated)
+	      strcat (buf, "%S3");
+	    else
+	      strcat (buf, "%B3");
+	    if (nullify)
+	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
+	    else
+	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
+	  }
+	break;
+
+      default:
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled and needs to be extracted
+	   by pa_output_lbranch.  If the delay slot is empty or this is a
+	   nullified forward branch, the instruction after the reversed
+	   condition branch must be nullified.  */
+	if (dbr_sequence_length () == 0
+	    || (nullify && forward_branch_p (insn)))
+	  {
+	    nullify = 1;
+	    xdelay = 0;
+	    operands[4] = GEN_INT (length);
+	  }
+	else
+	  {
+	    xdelay = 1;
+	    operands[4] = GEN_INT (length + 4);
+	  }
+
+	/* Create a reversed conditional branch which branches around
+	   the following insns.  */
+	if (GET_MODE (operands[1]) != DImode)
+	  {
+	    if (nullify)
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
+	      }
+	    else
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
+	      }
+	  }
+	else
+	  {
+	    if (nullify)
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
+	      }
+	    else
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
+	      }
+	  }
+
+	output_asm_insn (buf, operands);
+	return pa_output_lbranch (operands[0], insn, xdelay);
+    }
+  return buf;
+}
+
+/* This routine handles output of long unconditional branches that
+   exceed the maximum range of a simple branch instruction.  Since
+   we don't have a register available for the branch, we save register
+   %r1 in the frame marker, load the branch destination DEST into %r1,
+   execute the branch, and restore %r1 in the delay slot of the branch.
+
+   Since long branches may have an insn in the delay slot and the
+   delay slot is used to restore %r1, we in general need to extract
+   this insn and execute it before the branch.  However, to facilitate
+   use of this function by conditional branches, we also provide an
+   option to not extract the delay insn so that it will be emitted
+   after the long branch.  So, if there is an insn in the delay slot,
+   it is extracted if XDELAY is nonzero.
+
+   The lengths of the various long-branch sequences are 20, 16 and 24
+   bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
+
+const char *
+pa_output_lbranch (rtx dest, rtx insn, int xdelay)
+{
+  rtx xoperands[2];
+ 
+  xoperands[0] = dest;
+
+  /* First, free up the delay slot.  */
+  if (xdelay && dbr_sequence_length () != 0)
+    {
+      /* We can't handle a jump in the delay slot.  */
+      gcc_assert (! JUMP_P (NEXT_INSN (insn)));
+
+      final_scan_insn (NEXT_INSN (insn), asm_out_file,
+		       optimize, 0, NULL);
+
+      /* Now delete the delay insn.  */
+      SET_INSN_DELETED (NEXT_INSN (insn));
+    }
+
+  /* Output an insn to save %r1.  The runtime documentation doesn't
+     specify whether the "Clean Up" slot in the callers frame can
+     be clobbered by the callee.  It isn't copied by HP's builtin
+     alloca, so this suggests that it can be clobbered if necessary.
+     The "Static Link" location is copied by HP builtin alloca, so
+     we avoid using it.  Using the cleanup slot might be a problem
+     if we have to interoperate with languages that pass cleanup
+     information.  However, it should be possible to handle these
+     situations with GCC's asm feature.
+
+     The "Current RP" slot is reserved for the called procedure, so
+     we try to use it when we don't have a frame of our own.  It's
+     rather unlikely that we won't have a frame when we need to emit
+     a very long branch.
+
+     Really the way to go long term is a register scavenger; goto
+     the target of the jump and find a register which we can use
+     as a scratch to hold the value in %r1.  Then, we wouldn't have
+     to free up the delay slot or clobber a slot that may be needed
+     for other purposes.  */
+  if (TARGET_64BIT)
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	/* Use the return pointer slot in the frame marker.  */
+	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
+      else
+	/* Use the slot at -40 in the frame marker since HP builtin
+	   alloca doesn't copy it.  */
+	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
+    }
+  else
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	/* Use the return pointer slot in the frame marker.  */
+	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
+      else
+	/* Use the "Clean Up" slot in the frame marker.  In GCC,
+	   the only other use of this location is for copying a
+	   floating point double argument from a floating-point
+	   register to two general registers.  The copy is done
+	   as an "atomic" operation when outputting a call, so it
+	   won't interfere with our using the location here.  */
+	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
+    }
+
+  if (TARGET_PORTABLE_RUNTIME)
+    {
+      output_asm_insn ("ldil L'%0,%%r1", xoperands);
+      output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
+      output_asm_insn ("bv %%r0(%%r1)", xoperands);
+    }
+  else if (flag_pic)
+    {
+      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+      if (TARGET_SOM || !TARGET_GAS)
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (xoperands[1]));
+	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	}
+      output_asm_insn ("bv %%r0(%%r1)", xoperands);
+    }
+  else
+    /* Now output a very long branch to the original target.  */
+    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
+
+  /* Now restore the value of %r1 in the delay slot.  */
+  if (TARGET_64BIT)
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	return "ldd -16(%%r30),%%r1";
+      else
+	return "ldd -40(%%r30),%%r1";
+    }
+  else
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	return "ldw -20(%%r30),%%r1";
+      else
+	return "ldw -12(%%r30),%%r1";
+    }
+}
+
+/* This routine handles all the branch-on-bit conditional branch sequences we
+   might need to generate.  It handles nullification of delay slots,
+   varying length branches, negated branches and all combinations of the
+   above.  it returns the appropriate output template to emit the branch.  */
+
+const char *
+pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
+{
+  static char buf[100];
+  bool useskip;
+  int nullify = INSN_ANNULLED_BRANCH_P (insn);
+  int length = get_attr_length (insn);
+  int xdelay;
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  I do not think this can happen as this pattern
+     is only used when optimizing; jump optimization should eliminate the
+     jump.  But be prepared just in case.  */
+
+  if (branch_to_delay_slot_p (insn))
+    return "nop";
+
+  /* If this is a long branch with its delay slot unfilled, set `nullify'
+     as it can nullify the delay slot and save a nop.  */
+  if (length == 8 && dbr_sequence_length () == 0)
+    nullify = 1;
+
+  /* If this is a short forward conditional branch which did not get
+     its delay slot filled, the delay slot can still be nullified.  */
+  if (! nullify && length == 4 && dbr_sequence_length () == 0)
+    nullify = forward_branch_p (insn);
+
+  /* A forward branch over a single nullified insn can be done with a
+     extrs instruction.  This avoids a single cycle penalty due to
+     mis-predicted branch if we fall through (branch not taken).  */
+  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
+
+  switch (length)
+    {
+
+      /* All short conditional branches except backwards with an unfilled
+	 delay slot.  */
+      case 4:
+	if (useskip)
+	  strcpy (buf, "{extrs,|extrw,s,}");
+	else
+	  strcpy (buf, "bb,");
+	if (useskip && GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "extrd,s,*");
+	else if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	if ((which == 0 && negated)
+	     || (which == 1 && ! negated))
+	  strcat (buf, ">=");
+	else
+	  strcat (buf, "<");
+	if (useskip)
+	  strcat (buf, " %0,%1,1,%%r0");
+	else if (nullify && negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, ",n %0,%1,%3%#");
+	    else
+	      strcat (buf, ",n %0,%1,%3");
+	  }
+	else if (nullify && ! negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, ",n %0,%1,%2%#");
+	    else
+	      strcat (buf, ",n %0,%1,%2");
+	  }
+	else if (! nullify && negated)
+	  strcat (buf, " %0,%1,%3");
+	else if (! nullify && ! negated)
+	  strcat (buf, " %0,%1,%2");
+	break;
+
+     /* All long conditionals.  Note a short backward branch with an
+	unfilled delay slot is treated just like a long backward branch
+	with an unfilled delay slot.  */
+      case 8:
+	/* Handle weird backwards branch with a filled delay slot
+	   which is nullified.  */
+	if (dbr_sequence_length () != 0
+	    && ! forward_branch_p (insn)
+	    && nullify)
+	  {
+	    strcpy (buf, "bb,");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (negated)
+	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
+	    else
+	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
+	  }
+	/* Handle short backwards branch with an unfilled delay slot.
+	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
+	   taken and untaken branches.  */
+	else if (dbr_sequence_length () == 0
+		 && ! forward_branch_p (insn)
+		 && INSN_ADDRESSES_SET_P ()
+		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	  {
+	    strcpy (buf, "bb,");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, ">=");
+	    else
+	      strcat (buf, "<");
+	    if (negated)
+	      strcat (buf, " %0,%1,%3%#");
+	    else
+	      strcat (buf, " %0,%1,%2%#");
+	  }
+	else
+	  {
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcpy (buf, "extrd,s,*");
+	    else
+	      strcpy (buf, "{extrs,|extrw,s,}");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (nullify && negated)
+	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
+	    else if (nullify && ! negated)
+	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
+	    else if (negated)
+	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
+	    else
+	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
+	  }
+	break;
+
+      default:
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled and needs to be extracted
+	   by pa_output_lbranch.  If the delay slot is empty or this is a
+	   nullified forward branch, the instruction after the reversed
+	   condition branch must be nullified.  */
+	if (dbr_sequence_length () == 0
+	    || (nullify && forward_branch_p (insn)))
+	  {
+	    nullify = 1;
+	    xdelay = 0;
+	    operands[4] = GEN_INT (length);
+	  }
+	else
+	  {
+	    xdelay = 1;
+	    operands[4] = GEN_INT (length + 4);
+	  }
+
+	if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	else
+	  strcpy (buf, "bb,");
+	if ((which == 0 && negated)
+	    || (which == 1 && !negated))
+	  strcat (buf, "<");
+	else
+	  strcat (buf, ">=");
+	if (nullify)
+	  strcat (buf, ",n %0,%1,.+%4");
+	else
+	  strcat (buf, " %0,%1,.+%4");
+	output_asm_insn (buf, operands);
+	return pa_output_lbranch (negated ? operands[3] : operands[2],
+				  insn, xdelay);
+    }
+  return buf;
+}
+
+/* This routine handles all the branch-on-variable-bit conditional branch
+   sequences we might need to generate.  It handles nullification of delay
+   slots, varying length branches, negated branches and all combinations
+   of the above.  it returns the appropriate output template to emit the
+   branch.  */
+
+const char *
+pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn,
+	       int which)
+{
+  static char buf[100];
+  bool useskip;
+  int nullify = INSN_ANNULLED_BRANCH_P (insn);
+  int length = get_attr_length (insn);
+  int xdelay;
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  I do not think this can happen as this pattern
+     is only used when optimizing; jump optimization should eliminate the
+     jump.  But be prepared just in case.  */
+
+  if (branch_to_delay_slot_p (insn))
+    return "nop";
+
+  /* If this is a long branch with its delay slot unfilled, set `nullify'
+     as it can nullify the delay slot and save a nop.  */
+  if (length == 8 && dbr_sequence_length () == 0)
+    nullify = 1;
+
+  /* If this is a short forward conditional branch which did not get
+     its delay slot filled, the delay slot can still be nullified.  */
+  if (! nullify && length == 4 && dbr_sequence_length () == 0)
+    nullify = forward_branch_p (insn);
+
+  /* A forward branch over a single nullified insn can be done with a
+     extrs instruction.  This avoids a single cycle penalty due to
+     mis-predicted branch if we fall through (branch not taken).  */
+  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
+
+  switch (length)
+    {
+
+      /* All short conditional branches except backwards with an unfilled
+	 delay slot.  */
+      case 4:
+	if (useskip)
+	  strcpy (buf, "{vextrs,|extrw,s,}");
+	else
+	  strcpy (buf, "{bvb,|bb,}");
+	if (useskip && GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "extrd,s,*");
+	else if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	if ((which == 0 && negated)
+	     || (which == 1 && ! negated))
+	  strcat (buf, ">=");
+	else
+	  strcat (buf, "<");
+	if (useskip)
+	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
+	else if (nullify && negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
+	    else
+	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
+	  }
+	else if (nullify && ! negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
+	    else
+	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
+	  }
+	else if (! nullify && negated)
+	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
+	else if (! nullify && ! negated)
+	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
+	break;
+
+     /* All long conditionals.  Note a short backward branch with an
+	unfilled delay slot is treated just like a long backward branch
+	with an unfilled delay slot.  */
+      case 8:
+	/* Handle weird backwards branch with a filled delay slot
+	   which is nullified.  */
+	if (dbr_sequence_length () != 0
+	    && ! forward_branch_p (insn)
+	    && nullify)
+	  {
+	    strcpy (buf, "{bvb,|bb,}");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (negated)
+	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
+	    else
+	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
+	  }
+	/* Handle short backwards branch with an unfilled delay slot.
+	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
+	   taken and untaken branches.  */
+	else if (dbr_sequence_length () == 0
+		 && ! forward_branch_p (insn)
+		 && INSN_ADDRESSES_SET_P ()
+		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	  {
+	    strcpy (buf, "{bvb,|bb,}");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, ">=");
+	    else
+	      strcat (buf, "<");
+	    if (negated)
+	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
+	    else
+	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
+	  }
+	else
+	  {
+	    strcpy (buf, "{vextrs,|extrw,s,}");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcpy (buf, "extrd,s,*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (nullify && negated)
+	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
+	    else if (nullify && ! negated)
+	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
+	    else if (negated)
+	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
+	    else
+	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
+	  }
+	break;
+
+      default:
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled and needs to be extracted
+	   by pa_output_lbranch.  If the delay slot is empty or this is a
+	   nullified forward branch, the instruction after the reversed
+	   condition branch must be nullified.  */
+	if (dbr_sequence_length () == 0
+	    || (nullify && forward_branch_p (insn)))
+	  {
+	    nullify = 1;
+	    xdelay = 0;
+	    operands[4] = GEN_INT (length);
+	  }
+	else
+	  {
+	    xdelay = 1;
+	    operands[4] = GEN_INT (length + 4);
+	  }
+
+	if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	else
+	  strcpy (buf, "{bvb,|bb,}");
+	if ((which == 0 && negated)
+	    || (which == 1 && !negated))
+	  strcat (buf, "<");
+	else
+	  strcat (buf, ">=");
+	if (nullify)
+	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
+	else
+	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
+	output_asm_insn (buf, operands);
+	return pa_output_lbranch (negated ? operands[3] : operands[2],
+				  insn, xdelay);
+    }
+  return buf;
+}
+
+/* Return the output template for emitting a dbra type insn.
+
+   Note it may perform some output operations on its own before
+   returning the final output string.  */
+const char *
+pa_output_dbra (rtx *operands, rtx insn, int which_alternative)
+{
+  int length = get_attr_length (insn);
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  Be prepared!  */
+
+  if (branch_to_delay_slot_p (insn))
+    {
+      if (which_alternative == 0)
+	return "ldo %1(%0),%0";
+      else if (which_alternative == 1)
+	{
+	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
+	  output_asm_insn ("ldw -16(%%r30),%4", operands);
+	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
+	  return "{fldws|fldw} -16(%%r30),%0";
+	}
+      else
+	{
+	  output_asm_insn ("ldw %0,%4", operands);
+	  return "ldo %1(%4),%4\n\tstw %4,%0";
+	}
+    }
+
+  if (which_alternative == 0)
+    {
+      int nullify = INSN_ANNULLED_BRANCH_P (insn);
+      int xdelay;
+
+      /* If this is a long branch with its delay slot unfilled, set `nullify'
+	 as it can nullify the delay slot and save a nop.  */
+      if (length == 8 && dbr_sequence_length () == 0)
+	nullify = 1;
+
+      /* If this is a short forward conditional branch which did not get
+	 its delay slot filled, the delay slot can still be nullified.  */
+      if (! nullify && length == 4 && dbr_sequence_length () == 0)
+	nullify = forward_branch_p (insn);
+
+      switch (length)
+	{
+	case 4:
+	  if (nullify)
+	    {
+	      if (branch_needs_nop_p (insn))
+		return "addib,%C2,n %1,%0,%3%#";
+	      else
+		return "addib,%C2,n %1,%0,%3";
+	    }
+	  else
+	    return "addib,%C2 %1,%0,%3";
+      
+	case 8:
+	  /* Handle weird backwards branch with a fulled delay slot
+	     which is nullified.  */
+	  if (dbr_sequence_length () != 0
+	      && ! forward_branch_p (insn)
+	      && nullify)
+	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
+	  /* Handle short backwards branch with an unfilled delay slot.
+	     Using a addb;nop rather than addi;bl saves 1 cycle for both
+	     taken and untaken branches.  */
+	  else if (dbr_sequence_length () == 0
+		   && ! forward_branch_p (insn)
+		   && INSN_ADDRESSES_SET_P ()
+		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	      return "addib,%C2 %1,%0,%3%#";
+
+	  /* Handle normal cases.  */
+	  if (nullify)
+	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
+	  else
+	    return "addi,%N2 %1,%0,%0\n\tb %3";
+
+	default:
+	  /* The reversed conditional branch must branch over one additional
+	     instruction if the delay slot is filled and needs to be extracted
+	     by pa_output_lbranch.  If the delay slot is empty or this is a
+	     nullified forward branch, the instruction after the reversed
+	     condition branch must be nullified.  */
+	  if (dbr_sequence_length () == 0
+	      || (nullify && forward_branch_p (insn)))
+	    {
+	      nullify = 1;
+	      xdelay = 0;
+	      operands[4] = GEN_INT (length);
+	    }
+	  else
+	    {
+	      xdelay = 1;
+	      operands[4] = GEN_INT (length + 4);
+	    }
+
+	  if (nullify)
+	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
+	  else
+	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
+
+	  return pa_output_lbranch (operands[3], insn, xdelay);
+	}
+      
+    }
+  /* Deal with gross reload from FP register case.  */
+  else if (which_alternative == 1)
+    {
+      /* Move loop counter from FP register to MEM then into a GR,
+	 increment the GR, store the GR into MEM, and finally reload
+	 the FP register from MEM from within the branch's delay slot.  */
+      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
+		       operands);
+      output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
+      if (length == 24)
+	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
+      else if (length == 28)
+	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
+      else
+	{
+	  operands[5] = GEN_INT (length - 16);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
+	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
+	  return pa_output_lbranch (operands[3], insn, 0);
+	}
+    }
+  /* Deal with gross reload from memory case.  */
+  else
+    {
+      /* Reload loop counter from memory, the store back to memory
+	 happens in the branch's delay slot.  */
+      output_asm_insn ("ldw %0,%4", operands);
+      if (length == 12)
+	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
+      else if (length == 16)
+	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
+      else
+	{
+	  operands[5] = GEN_INT (length - 4);
+	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
+	  return pa_output_lbranch (operands[3], insn, 0);
+	}
+    }
+}
+
+/* Return the output template for emitting a movb type insn.
+
+   Note it may perform some output operations on its own before
+   returning the final output string.  */
+const char *
+pa_output_movb (rtx *operands, rtx insn, int which_alternative,
+	     int reverse_comparison)
+{
+  int length = get_attr_length (insn);
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  Be prepared!  */
+
+  if (branch_to_delay_slot_p (insn))
+    {
+      if (which_alternative == 0)
+	return "copy %1,%0";
+      else if (which_alternative == 1)
+	{
+	  output_asm_insn ("stw %1,-16(%%r30)", operands);
+	  return "{fldws|fldw} -16(%%r30),%0";
+	}
+      else if (which_alternative == 2)
+	return "stw %1,%0";
+      else
+	return "mtsar %r1";
+    }
+
+  /* Support the second variant.  */
+  if (reverse_comparison)
+    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
+
+  if (which_alternative == 0)
+    {
+      int nullify = INSN_ANNULLED_BRANCH_P (insn);
+      int xdelay;
+
+      /* If this is a long branch with its delay slot unfilled, set `nullify'
+	 as it can nullify the delay slot and save a nop.  */
+      if (length == 8 && dbr_sequence_length () == 0)
+	nullify = 1;
+
+      /* If this is a short forward conditional branch which did not get
+	 its delay slot filled, the delay slot can still be nullified.  */
+      if (! nullify && length == 4 && dbr_sequence_length () == 0)
+	nullify = forward_branch_p (insn);
+
+      switch (length)
+	{
+	case 4:
+	  if (nullify)
+	    {
+	      if (branch_needs_nop_p (insn))
+		return "movb,%C2,n %1,%0,%3%#";
+	      else
+		return "movb,%C2,n %1,%0,%3";
+	    }
+	  else
+	    return "movb,%C2 %1,%0,%3";
+
+	case 8:
+	  /* Handle weird backwards branch with a filled delay slot
+	     which is nullified.  */
+	  if (dbr_sequence_length () != 0
+	      && ! forward_branch_p (insn)
+	      && nullify)
+	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
+
+	  /* Handle short backwards branch with an unfilled delay slot.
+	     Using a movb;nop rather than or;bl saves 1 cycle for both
+	     taken and untaken branches.  */
+	  else if (dbr_sequence_length () == 0
+		   && ! forward_branch_p (insn)
+		   && INSN_ADDRESSES_SET_P ()
+		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	    return "movb,%C2 %1,%0,%3%#";
+	  /* Handle normal cases.  */
+	  if (nullify)
+	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
+	  else
+	    return "or,%N2 %1,%%r0,%0\n\tb %3";
+
+	default:
+	  /* The reversed conditional branch must branch over one additional
+	     instruction if the delay slot is filled and needs to be extracted
+	     by pa_output_lbranch.  If the delay slot is empty or this is a
+	     nullified forward branch, the instruction after the reversed
+	     condition branch must be nullified.  */
+	  if (dbr_sequence_length () == 0
+	      || (nullify && forward_branch_p (insn)))
+	    {
+	      nullify = 1;
+	      xdelay = 0;
+	      operands[4] = GEN_INT (length);
+	    }
+	  else
+	    {
+	      xdelay = 1;
+	      operands[4] = GEN_INT (length + 4);
+	    }
+
+	  if (nullify)
+	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
+	  else
+	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
+
+	  return pa_output_lbranch (operands[3], insn, xdelay);
+	}
+    }
+  /* Deal with gross reload for FP destination register case.  */
+  else if (which_alternative == 1)
+    {
+      /* Move source register to MEM, perform the branch test, then
+	 finally load the FP register from MEM from within the branch's
+	 delay slot.  */
+      output_asm_insn ("stw %1,-16(%%r30)", operands);
+      if (length == 12)
+	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
+      else if (length == 16)
+	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
+      else
+	{
+	  operands[4] = GEN_INT (length - 4);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
+	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
+	  return pa_output_lbranch (operands[3], insn, 0);
+	}
+    }
+  /* Deal with gross reload from memory case.  */
+  else if (which_alternative == 2)
+    {
+      /* Reload loop counter from memory, the store back to memory
+	 happens in the branch's delay slot.  */
+      if (length == 8)
+	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
+      else if (length == 12)
+	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
+      else
+	{
+	  operands[4] = GEN_INT (length);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
+			   operands);
+	  return pa_output_lbranch (operands[3], insn, 0);
+	}
+    }
+  /* Handle SAR as a destination.  */
+  else
+    {
+      if (length == 8)
+	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
+      else if (length == 12)
+	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
+      else
+	{
+	  operands[4] = GEN_INT (length);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
+			   operands);
+	  return pa_output_lbranch (operands[3], insn, 0);
+	}
+    }
+}
+
+/* Copy any FP arguments in INSN into integer registers.  */
+static void
+copy_fp_args (rtx insn)
+{
+  rtx link;
+  rtx xoperands[2];
+
+  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+    {
+      int arg_mode, regno;
+      rtx use = XEXP (link, 0);
+
+      if (! (GET_CODE (use) == USE
+	  && GET_CODE (XEXP (use, 0)) == REG
+	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+	continue;
+
+      arg_mode = GET_MODE (XEXP (use, 0));
+      regno = REGNO (XEXP (use, 0));
+
+      /* Is it a floating point register?  */
+      if (regno >= 32 && regno <= 39)
+	{
+	  /* Copy the FP register into an integer register via memory.  */
+	  if (arg_mode == SFmode)
+	    {
+	      xoperands[0] = XEXP (use, 0);
+	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
+	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
+	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+	    }
+	  else
+	    {
+	      xoperands[0] = XEXP (use, 0);
+	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
+	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
+	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
+	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+	    }
+	}
+    }
+}
+
+/* Compute length of the FP argument copy sequence for INSN.  */
+static int
+length_fp_args (rtx insn)
+{
+  int length = 0;
+  rtx link;
+
+  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+    {
+      int arg_mode, regno;
+      rtx use = XEXP (link, 0);
+
+      if (! (GET_CODE (use) == USE
+	  && GET_CODE (XEXP (use, 0)) == REG
+	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+	continue;
+
+      arg_mode = GET_MODE (XEXP (use, 0));
+      regno = REGNO (XEXP (use, 0));
+
+      /* Is it a floating point register?  */
+      if (regno >= 32 && regno <= 39)
+	{
+	  if (arg_mode == SFmode)
+	    length += 8;
+	  else
+	    length += 12;
+	}
+    }
+
+  return length;
+}
+
+/* Return the attribute length for the millicode call instruction INSN.
+   The length must match the code generated by pa_output_millicode_call.
+   We include the delay slot in the returned length as it is better to
+   over estimate the length than to under estimate it.  */
+
+int
+pa_attr_length_millicode_call (rtx insn)
+{
+  unsigned long distance = -1;
+  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total + insn_current_reference_address (insn));
+      if (distance < total)
+	distance = -1;
+    }
+
+  if (TARGET_64BIT)
+    {
+      if (!TARGET_LONG_CALLS && distance < 7600000)
+	return 8;
+
+      return 20;
+    }
+  else if (TARGET_PORTABLE_RUNTIME)
+    return 24;
+  else
+    {
+      if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
+	return 8;
+
+      if (!flag_pic)
+	return 12;
+
+      return 24;
+    }
+}
+
+/* INSN is a function call.  It may have an unconditional jump
+   in its delay slot.
+
+   CALL_DEST is the routine we are calling.  */
+
+const char *
+pa_output_millicode_call (rtx insn, rtx call_dest)
+{
+  int attr_length = get_attr_length (insn);
+  int seq_length = dbr_sequence_length ();
+  int distance;
+  rtx seq_insn;
+  rtx xoperands[3];
+
+  xoperands[0] = call_dest;
+  xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
+
+  /* Handle the common case where we are sure that the branch will
+     reach the beginning of the $CODE$ subspace.  The within reach
+     form of the $$sh_func_adrs call has a length of 28.  Because it
+     has an attribute type of sh_func_adrs, it never has a nonzero
+     sequence length (i.e., the delay slot is never filled).  */
+  if (!TARGET_LONG_CALLS
+      && (attr_length == 8
+	  || (attr_length == 28
+	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
+    {
+      output_asm_insn ("{bl|b,l} %0,%2", xoperands);
+    }
+  else
+    {
+      if (TARGET_64BIT)
+	{
+	  /* It might seem that one insn could be saved by accessing
+	     the millicode function using the linkage table.  However,
+	     this doesn't work in shared libraries and other dynamically
+	     loaded objects.  Using a pc-relative sequence also avoids
+	     problems related to the implicit use of the gp register.  */
+	  output_asm_insn ("b,l .+8,%%r1", xoperands);
+
+	  if (TARGET_GAS)
+	    {
+	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
+	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	    }
+	  else
+	    {
+	      xoperands[1] = gen_label_rtx ();
+	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
+	      targetm.asm_out.internal_label (asm_out_file, "L",
+					 CODE_LABEL_NUMBER (xoperands[1]));
+	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
+	    }
+
+	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
+	}
+      else if (TARGET_PORTABLE_RUNTIME)
+	{
+	  /* Pure portable runtime doesn't allow be/ble; we also don't
+	     have PIC support in the assembler/linker, so this sequence
+	     is needed.  */
+
+	  /* Get the address of our target into %r1.  */
+	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
+
+	  /* Get our return address into %r31.  */
+	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
+	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
+
+	  /* Jump to our target address in %r1.  */
+	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	}
+      else if (!flag_pic)
+	{
+	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
+	  if (TARGET_PA_20)
+	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
+	  else
+	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
+
+	  if (TARGET_SOM || !TARGET_GAS)
+	    {
+	      /* The HP assembler can generate relocations for the
+		 difference of two symbols.  GAS can do this for a
+		 millicode symbol but not an arbitrary external
+		 symbol when generating SOM output.  */
+	      xoperands[1] = gen_label_rtx ();
+	      targetm.asm_out.internal_label (asm_out_file, "L",
+					 CODE_LABEL_NUMBER (xoperands[1]));
+	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
+	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
+	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
+			       xoperands);
+	    }
+
+	  /* Jump to our target address in %r1.  */
+	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	}
+    }
+
+  if (seq_length == 0)
+    output_asm_insn ("nop", xoperands);
+
+  /* We are done if there isn't a jump in the delay slot.  */
+  if (seq_length == 0 || ! JUMP_P (NEXT_INSN (insn)))
+    return "";
+
+  /* This call has an unconditional jump in its delay slot.  */
+  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+
+  /* See if the return address can be adjusted.  Use the containing
+     sequence insn's address.  */
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+      distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
+		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
+
+      if (VAL_14_BITS_P (distance))
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (xoperands[1]));
+	}
+      else
+	/* ??? This branch may not reach its target.  */
+	output_asm_insn ("nop\n\tb,n %0", xoperands);
+    }
+  else
+    /* ??? This branch may not reach its target.  */
+    output_asm_insn ("nop\n\tb,n %0", xoperands);
+
+  /* Delete the jump.  */
+  SET_INSN_DELETED (NEXT_INSN (insn));
+
+  return "";
+}
+
+/* Return the attribute length of the call instruction INSN.  The SIBCALL
+   flag indicates whether INSN is a regular call or a sibling call.  The
+   length returned must be longer than the code actually generated by
+   pa_output_call.  Since branch shortening is done before delay branch
+   sequencing, there is no way to determine whether or not the delay
+   slot will be filled during branch shortening.  Even when the delay
+   slot is filled, we may have to add a nop if the delay slot contains
+   a branch that can't reach its target.  Thus, we always have to include
+   the delay slot in the length estimate.  This used to be done in
+   pa_adjust_insn_length but we do it here now as some sequences always
+   fill the delay slot and we can save four bytes in the estimate for
+   these sequences.  */
+
+int
+pa_attr_length_call (rtx insn, int sibcall)
+{
+  int local_call;
+  rtx call, call_dest;
+  tree call_decl;
+  int length = 0;
+  rtx pat = PATTERN (insn);
+  unsigned long distance = -1;
+
+  gcc_assert (CALL_P (insn));
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      unsigned long total;
+
+      total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
+      distance = (total + insn_current_reference_address (insn));
+      if (distance < total)
+	distance = -1;
+    }
+
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+
+  /* Get the call rtx.  */
+  call = XVECEXP (pat, 0, 0);
+  if (GET_CODE (call) == SET)
+    call = SET_SRC (call);
+
+  gcc_assert (GET_CODE (call) == CALL);
+
+  /* Determine if this is a local call.  */
+  call_dest = XEXP (XEXP (call, 0), 0);
+  call_decl = SYMBOL_REF_DECL (call_dest);
+  local_call = call_decl && targetm.binds_local_p (call_decl);
+
+  /* pc-relative branch.  */
+  if (!TARGET_LONG_CALLS
+      && ((TARGET_PA_20 && !sibcall && distance < 7600000)
+	  || distance < MAX_PCREL17F_OFFSET))
+    length += 8;
+
+  /* 64-bit plabel sequence.  */
+  else if (TARGET_64BIT && !local_call)
+    length += sibcall ? 28 : 24;
+
+  /* non-pic long absolute branch sequence.  */
+  else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
+    length += 12;
+
+  /* long pc-relative branch sequence.  */
+  else if (TARGET_LONG_PIC_SDIFF_CALL
+	   || (TARGET_GAS && !TARGET_SOM
+	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
+    {
+      length += 20;
+
+      if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
+	length += 8;
+    }
+
+  /* 32-bit plabel sequence.  */
+  else
+    {
+      length += 32;
+
+      if (TARGET_SOM)
+	length += length_fp_args (insn);
+
+      if (flag_pic)
+	length += 4;
+
+      if (!TARGET_PA_20)
+	{
+	  if (!sibcall)
+	    length += 8;
+
+	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
+	    length += 8;
+	}
+    }
+
+  return length;
+}
+
+/* INSN is a function call.  It may have an unconditional jump
+   in its delay slot.
+
+   CALL_DEST is the routine we are calling.  */
+
+const char *
+pa_output_call (rtx insn, rtx call_dest, int sibcall)
+{
+  int delay_insn_deleted = 0;
+  int delay_slot_filled = 0;
+  int seq_length = dbr_sequence_length ();
+  tree call_decl = SYMBOL_REF_DECL (call_dest);
+  int local_call = call_decl && targetm.binds_local_p (call_decl);
+  rtx xoperands[2];
+
+  xoperands[0] = call_dest;
+
+  /* Handle the common case where we're sure that the branch will reach
+     the beginning of the "$CODE$" subspace.  This is the beginning of
+     the current function if we are in a named section.  */
+  if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
+    {
+      xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
+      output_asm_insn ("{bl|b,l} %0,%1", xoperands);
+    }
+  else
+    {
+      if (TARGET_64BIT && !local_call)
+	{
+	  /* ??? As far as I can tell, the HP linker doesn't support the
+	     long pc-relative sequence described in the 64-bit runtime
+	     architecture.  So, we use a slightly longer indirect call.  */
+	  xoperands[0] = pa_get_deferred_plabel (call_dest);
+	  xoperands[1] = gen_label_rtx ();
+
+	  /* If this isn't a sibcall, we put the load of %r27 into the
+	     delay slot.  We can't do this in a sibcall as we don't
+	     have a second call-clobbered scratch register available.  */
+	  if (seq_length != 0
+	      && ! JUMP_P (NEXT_INSN (insn))
+	      && !sibcall)
+	    {
+	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
+			       optimize, 0, NULL);
+
+	      /* Now delete the delay insn.  */
+	      SET_INSN_DELETED (NEXT_INSN (insn));
+	      delay_insn_deleted = 1;
+	    }
+
+	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
+	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
+	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
+
+	  if (sibcall)
+	    {
+	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
+	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
+	      output_asm_insn ("bve (%%r1)", xoperands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
+	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
+	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
+	      delay_slot_filled = 1;
+	    }
+	}
+      else
+	{
+	  int indirect_call = 0;
+
+	  /* Emit a long call.  There are several different sequences
+	     of increasing length and complexity.  In most cases,
+             they don't allow an instruction in the delay slot.  */
+	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
+	      && !TARGET_LONG_PIC_SDIFF_CALL
+	      && !(TARGET_GAS && !TARGET_SOM
+		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
+	      && !TARGET_64BIT)
+	    indirect_call = 1;
+
+	  if (seq_length != 0
+	      && ! JUMP_P (NEXT_INSN (insn))
+	      && !sibcall
+	      && (!TARGET_PA_20
+		  || indirect_call
+		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
+	    {
+	      /* A non-jump insn in the delay slot.  By definition we can
+		 emit this insn before the call (and in fact before argument
+		 relocating.  */
+	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
+			       NULL);
+
+	      /* Now delete the delay insn.  */
+	      SET_INSN_DELETED (NEXT_INSN (insn));
+	      delay_insn_deleted = 1;
+	    }
+
+	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
+	    {
+	      /* This is the best sequence for making long calls in
+		 non-pic code.  Unfortunately, GNU ld doesn't provide
+		 the stub needed for external calls, and GAS's support
+		 for this with the SOM linker is buggy.  It is safe
+		 to use this for local calls.  */
+	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
+	      if (sibcall)
+		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
+	      else
+		{
+		  if (TARGET_PA_20)
+		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
+				     xoperands);
+		  else
+		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
+
+		  output_asm_insn ("copy %%r31,%%r2", xoperands);
+		  delay_slot_filled = 1;
+		}
+	    }
+	  else
+	    {
+	      if (TARGET_LONG_PIC_SDIFF_CALL)
+		{
+		  /* The HP assembler and linker can handle relocations
+		     for the difference of two symbols.  The HP assembler
+		     recognizes the sequence as a pc-relative call and
+		     the linker provides stubs when needed.  */
+		  xoperands[1] = gen_label_rtx ();
+		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
+		  targetm.asm_out.internal_label (asm_out_file, "L",
+					     CODE_LABEL_NUMBER (xoperands[1]));
+		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
+		}
+	      else if (TARGET_GAS && !TARGET_SOM
+		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
+		{
+		  /*  GAS currently can't generate the relocations that
+		      are needed for the SOM linker under HP-UX using this
+		      sequence.  The GNU linker doesn't generate the stubs
+		      that are needed for external calls on TARGET_ELF32
+		      with this sequence.  For now, we have to use a
+		      longer plabel sequence when using GAS.  */
+		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
+				   xoperands);
+		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
+				   xoperands);
+		}
+	      else
+		{
+		  /* Emit a long plabel-based call sequence.  This is
+		     essentially an inline implementation of $$dyncall.
+		     We don't actually try to call $$dyncall as this is
+		     as difficult as calling the function itself.  */
+		  xoperands[0] = pa_get_deferred_plabel (call_dest);
+		  xoperands[1] = gen_label_rtx ();
+
+		  /* Since the call is indirect, FP arguments in registers
+		     need to be copied to the general registers.  Then, the
+		     argument relocation stub will copy them back.  */
+		  if (TARGET_SOM)
+		    copy_fp_args (insn);
+
+		  if (flag_pic)
+		    {
+		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
+		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
+		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
+		    }
+		  else
+		    {
+		      output_asm_insn ("addil LR'%0-$global$,%%r27",
+				       xoperands);
+		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
+				       xoperands);
+		    }
+
+		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
+		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
+		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
+		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
+
+		  if (!sibcall && !TARGET_PA_20)
+		    {
+		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
+		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
+			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
+		      else
+			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
+		    }
+		}
+
+	      if (TARGET_PA_20)
+		{
+		  if (sibcall)
+		    output_asm_insn ("bve (%%r1)", xoperands);
+		  else
+		    {
+		      if (indirect_call)
+			{
+			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
+			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
+			  delay_slot_filled = 1;
+			}
+		      else
+			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
+		    }
+		}
+	      else
+		{
+		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
+		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
+				     xoperands);
+
+		  if (sibcall)
+		    {
+		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
+			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
+		      else
+			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
+		    }
+		  else
+		    {
+		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
+			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
+		      else
+			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
+
+		      if (indirect_call)
+			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
+		      else
+			output_asm_insn ("copy %%r31,%%r2", xoperands);
+		      delay_slot_filled = 1;
+		    }
+		}
+	    }
+	}
+    }
+
+  if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
+    output_asm_insn ("nop", xoperands);
+
+  /* We are done if there isn't a jump in the delay slot.  */
+  if (seq_length == 0
+      || delay_insn_deleted
+      || ! JUMP_P (NEXT_INSN (insn)))
+    return "";
+
+  /* A sibcall should never have a branch in the delay slot.  */
+  gcc_assert (!sibcall);
+
+  /* This call has an unconditional jump in its delay slot.  */
+  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+
+  if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
+    {
+      /* See if the return address can be adjusted.  Use the containing
+         sequence insn's address.  This would break the regular call/return@
+         relationship assumed by the table based eh unwinder, so only do that
+         if the call is not possibly throwing.  */
+      rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+      int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
+		      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
+
+      if (VAL_14_BITS_P (distance)
+	  && !(can_throw_internal (insn) || can_throw_external (insn)))
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (xoperands[1]));
+	}
+      else
+	output_asm_insn ("nop\n\tb,n %0", xoperands);
+    }
+  else
+    output_asm_insn ("b,n %0", xoperands);
+
+  /* Delete the jump.  */
+  SET_INSN_DELETED (NEXT_INSN (insn));
+
+  return "";
+}
+
+/* Return the attribute length of the indirect call instruction INSN.
+   The length must match the code generated by output_indirect call.
+   The returned length includes the delay slot.  Currently, the delay
+   slot of an indirect call sequence is not exposed and it is used by
+   the sequence itself.  */
+
+int
+pa_attr_length_indirect_call (rtx insn)
+{
+  unsigned long distance = -1;
+  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total + insn_current_reference_address (insn));
+      if (distance < total)
+	distance = -1;
+    }
+
+  if (TARGET_64BIT)
+    return 12;
+
+  if (TARGET_FAST_INDIRECT_CALLS
+      || (!TARGET_LONG_CALLS
+	  && !TARGET_PORTABLE_RUNTIME
+	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
+	      || distance < MAX_PCREL17F_OFFSET)))
+    return 8;
+
+  if (flag_pic)
+    return 20;
+
+  if (TARGET_PORTABLE_RUNTIME)
+    return 16;
+
+  /* Out of reach, can use ble.  */
+  return 12;
+}
+
+const char *
+pa_output_indirect_call (rtx insn, rtx call_dest)
+{
+  rtx xoperands[1];
+
+  if (TARGET_64BIT)
+    {
+      xoperands[0] = call_dest;
+      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
+      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
+      return "";
+    }
+
+  /* First the special case for kernels, level 0 systems, etc.  */
+  if (TARGET_FAST_INDIRECT_CALLS)
+    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 
+
+  /* Now the normal case -- we can reach $$dyncall directly or
+     we're sure that we can get there via a long-branch stub. 
+
+     No need to check target flags as the length uniquely identifies
+     the remaining cases.  */
+  if (pa_attr_length_indirect_call (insn) == 8)
+    {
+      /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
+	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
+	 variant of the B,L instruction can't be used on the SOM target.  */
+      if (TARGET_PA_20 && !TARGET_SOM)
+	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
+      else
+	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
+    }
+
+  /* Long millicode call, but we are not generating PIC or portable runtime
+     code.  */
+  if (pa_attr_length_indirect_call (insn) == 12)
+    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
+
+  /* Long millicode call for portable runtime.  */
+  if (pa_attr_length_indirect_call (insn) == 16)
+    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
+
+  /* We need a long PIC call to $$dyncall.  */
+  xoperands[0] = NULL_RTX;
+  output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
+  if (TARGET_SOM || !TARGET_GAS)
+    {
+      xoperands[0] = gen_label_rtx ();
+      output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
+      targetm.asm_out.internal_label (asm_out_file, "L",
+				      CODE_LABEL_NUMBER (xoperands[0]));
+      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
+    }
+  else
+    {
+      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
+      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
+		       xoperands);
+    }
+  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+  output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
+  return "";
+}
+
+/* In HPUX 8.0's shared library scheme, special relocations are needed
+   for function labels if they might be passed to a function
+   in a shared library (because shared libraries don't live in code
+   space), and special magic is needed to construct their address.  */
+
+void
+pa_encode_label (rtx sym)
+{
+  const char *str = XSTR (sym, 0);
+  int len = strlen (str) + 1;
+  char *newstr, *p;
+
+  p = newstr = XALLOCAVEC (char, len + 1);
+  *p++ = '@';
+  strcpy (p, str);
+
+  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
+}
+
+static void
+pa_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int old_referenced = 0;
+
+  if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
+    old_referenced
+      = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (first && TEXT_SPACE_P (decl))
+    {
+      SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+      if (TREE_CODE (decl) == FUNCTION_DECL)
+	pa_encode_label (XEXP (rtl, 0));
+    }
+  else if (old_referenced)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
+}
+
+/* This is sort of inverse to pa_encode_section_info.  */
+
+static const char *
+pa_strip_name_encoding (const char *str)
+{
+  str += (*str == '@');
+  str += (*str == '*');
+  return str;
+}
+
+/* Returns 1 if OP is a function label involved in a simple addition
+   with a constant.  Used to keep certain patterns from matching
+   during instruction combination.  */
+int
+pa_is_function_label_plus_const (rtx op)
+{
+  /* Strip off any CONST.  */
+  if (GET_CODE (op) == CONST)
+    op = XEXP (op, 0);
+
+  return (GET_CODE (op) == PLUS
+	  && function_label_operand (XEXP (op, 0), VOIDmode)
+	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
+}
+
+/* Output assembly code for a thunk to FUNCTION.  */
+
+static void
+pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
+			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			tree function)
+{
+  static unsigned int current_thunk_number;
+  int val_14 = VAL_14_BITS_P (delta);
+  unsigned int old_last_address = last_address, nbytes = 0;
+  char label[16];
+  rtx xoperands[4];
+
+  xoperands[0] = XEXP (DECL_RTL (function), 0);
+  xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
+  xoperands[2] = GEN_INT (delta);
+
+  ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
+  fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
+
+  /* Output the thunk.  We know that the function is in the same
+     translation unit (i.e., the same space) as the thunk, and that
+     thunks are output after their method.  Thus, we don't need an
+     external branch to reach the function.  With SOM and GAS,
+     functions and thunks are effectively in different sections.
+     Thus, we can always use a IA-relative branch and the linker
+     will add a long branch stub if necessary.
+
+     However, we have to be careful when generating PIC code on the
+     SOM port to ensure that the sequence does not transfer to an
+     import stub for the target function as this could clobber the
+     return value saved at SP-24.  This would also apply to the
+     32-bit linux port if the multi-space model is implemented.  */
+  if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+       && !(flag_pic && TREE_PUBLIC (function))
+       && (TARGET_GAS || last_address < 262132))
+      || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+	  && ((targetm_common.have_named_sections
+	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
+	       /* The GNU 64-bit linker has rather poor stub management.
+		  So, we use a long branch from thunks that aren't in
+		  the same section as the target function.  */
+	       && ((!TARGET_64BIT
+		    && (DECL_SECTION_NAME (thunk_fndecl)
+			!= DECL_SECTION_NAME (function)))
+		   || ((DECL_SECTION_NAME (thunk_fndecl)
+			== DECL_SECTION_NAME (function))
+		       && last_address < 262132)))
+	      || (targetm_common.have_named_sections
+		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
+		  && DECL_SECTION_NAME (function) == NULL
+		  && last_address < 262132)
+	      || (!targetm_common.have_named_sections
+		  && last_address < 262132))))
+    {
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("b %0", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 8;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 12;
+	}
+    }
+  else if (TARGET_64BIT)
+    {
+      /* We only have one call-clobbered scratch register, so we can't
+         make use of the delay slot if delta doesn't fit in 14 bits.  */
+      if (!val_14)
+	{
+	  output_asm_insn ("addil L'%2,%%r26", xoperands);
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	}
+
+      output_asm_insn ("b,l .+8,%%r1", xoperands);
+
+      if (TARGET_GAS)
+	{
+	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	}
+      else
+	{
+	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
+	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
+	}
+
+      if (val_14)
+	{
+	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 20;
+	}
+      else
+	{
+	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
+	  nbytes += 24;
+	}
+    }
+  else if (TARGET_PORTABLE_RUNTIME)
+    {
+      output_asm_insn ("ldil L'%0,%%r1", xoperands);
+      output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
+
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 16;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 20;
+	}
+    }
+  else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+    {
+      /* The function is accessible from outside this module.  The only
+	 way to avoid an import stub between the thunk and function is to
+	 call the function directly with an indirect sequence similar to
+	 that used by $$dyncall.  This is possible because $$dyncall acts
+	 as the import stub in an indirect call.  */
+      ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
+      xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
+      output_asm_insn ("addil LT'%3,%%r19", xoperands);
+      output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
+      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+      output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
+      output_asm_insn ("depi 0,31,2,%%r22", xoperands);
+      output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
+      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+
+      if (!val_14)
+	{
+	  output_asm_insn ("addil L'%2,%%r26", xoperands);
+	  nbytes += 4;
+	}
+
+      if (TARGET_PA_20)
+	{
+	  output_asm_insn ("bve (%%r22)", xoperands);
+	  nbytes += 36;
+	}
+      else if (TARGET_NO_SPACE_REGS)
+	{
+	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
+	  nbytes += 36;
+	}
+      else
+	{
+	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
+	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
+	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
+	  nbytes += 44;
+	}
+
+      if (val_14)
+	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+      else
+	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+    }
+  else if (flag_pic)
+    {
+      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+
+      if (TARGET_SOM || !TARGET_GAS)
+	{
+	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
+	}
+
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 20;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 24;
+	}
+    }
+  else
+    {
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("ldil L'%0,%%r22", xoperands);
+      output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 12;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 16;
+	}
+    }
+
+  fprintf (file, "\t.EXIT\n\t.PROCEND\n");
+
+  if (TARGET_SOM && TARGET_GAS)
+    {
+      /* We done with this subspace except possibly for some additional
+	 debug information.  Forget that we are in this subspace to ensure
+	 that the next function is output in its own subspace.  */
+      in_section = NULL;
+      cfun->machine->in_nsubspa = 2;
+    }
+
+  if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+    {
+      switch_to_section (data_section);
+      output_asm_insn (".align 4", xoperands);
+      ASM_OUTPUT_LABEL (file, label);
+      output_asm_insn (".word P'%0", xoperands);
+    }
+
+  current_thunk_number++;
+  nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
+	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
+  last_address += nbytes;
+  if (old_last_address > last_address)
+    last_address = UINT_MAX;
+  update_total_code_bytes (nbytes);
+}
+
+/* Only direct calls to static functions are allowed to be sibling (tail)
+   call optimized.
+
+   This restriction is necessary because some linker generated stubs will
+   store return pointers into rp' in some cases which might clobber a
+   live value already in rp'.
+
+   In a sibcall the current function and the target function share stack
+   space.  Thus if the path to the current function and the path to the
+   target function save a value in rp', they save the value into the
+   same stack slot, which has undesirable consequences.
+
+   Because of the deferred binding nature of shared libraries any function
+   with external scope could be in a different load module and thus require
+   rp' to be saved when calling that function.  So sibcall optimizations
+   can only be safe for static function.
+
+   Note that GCC never needs return value relocations, so we don't have to
+   worry about static calls with return value relocations (which require
+   saving rp').
+
+   It is safe to perform a sibcall optimization when the target function
+   will never return.  */
+static bool
+pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  if (TARGET_PORTABLE_RUNTIME)
+    return false;
+
+  /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
+     single subspace mode and the call is not indirect.  As far as I know,
+     there is no operating system support for the multiple subspace mode.
+     It might be possible to support indirect calls if we didn't use
+     $$dyncall (see the indirect sequence generated in pa_output_call).  */
+  if (TARGET_ELF32)
+    return (decl != NULL_TREE);
+
+  /* Sibcalls are not ok because the arg pointer register is not a fixed
+     register.  This prevents the sibcall optimization from occurring.  In
+     addition, there are problems with stub placement using GNU ld.  This
+     is because a normal sibcall branch uses a 17-bit relocation while
+     a regular call branch uses a 22-bit relocation.  As a result, more
+     care needs to be taken in the placement of long-branch stubs.  */
+  if (TARGET_64BIT)
+    return false;
+
+  /* Sibcalls are only ok within a translation unit.  */
+  return (decl && !TREE_PUBLIC (decl));
+}
+
+/* ??? Addition is not commutative on the PA due to the weird implicit
+   space register selection rules for memory addresses.  Therefore, we
+   don't consider a + b == b + a, as this might be inside a MEM.  */
+static bool
+pa_commutative_p (const_rtx x, int outer_code)
+{
+  return (COMMUTATIVE_P (x)
+	  && (TARGET_NO_SPACE_REGS
+	      || (outer_code != UNKNOWN && outer_code != MEM)
+	      || GET_CODE (x) != PLUS));
+}
+
+/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
+   use in fmpyadd instructions.  */
+int
+pa_fmpyaddoperands (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  /* Must be a floating point mode.  */
+  if (mode != SFmode && mode != DFmode)
+    return 0;
+
+  /* All modes must be the same.  */
+  if (! (mode == GET_MODE (operands[1])
+	 && mode == GET_MODE (operands[2])
+	 && mode == GET_MODE (operands[3])
+	 && mode == GET_MODE (operands[4])
+	 && mode == GET_MODE (operands[5])))
+    return 0;
+
+  /* All operands must be registers.  */
+  if (! (GET_CODE (operands[1]) == REG
+	 && GET_CODE (operands[2]) == REG
+	 && GET_CODE (operands[3]) == REG
+	 && GET_CODE (operands[4]) == REG
+	 && GET_CODE (operands[5]) == REG))
+    return 0;
+
+  /* Only 2 real operands to the addition.  One of the input operands must
+     be the same as the output operand.  */
+  if (! rtx_equal_p (operands[3], operands[4])
+      && ! rtx_equal_p (operands[3], operands[5]))
+    return 0;
+
+  /* Inout operand of add cannot conflict with any operands from multiply.  */
+  if (rtx_equal_p (operands[3], operands[0])
+     || rtx_equal_p (operands[3], operands[1])
+     || rtx_equal_p (operands[3], operands[2]))
+    return 0;
+
+  /* multiply cannot feed into addition operands.  */
+  if (rtx_equal_p (operands[4], operands[0])
+      || rtx_equal_p (operands[5], operands[0]))
+    return 0;
+
+  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
+  if (mode == SFmode
+      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
+    return 0;
+
+  /* Passed.  Operands are suitable for fmpyadd.  */
+  return 1;
+}
+
+#if !defined(USE_COLLECT2)
+static void
+pa_asm_out_constructor (rtx symbol, int priority)
+{
+  if (!function_label_operand (symbol, VOIDmode))
+    pa_encode_label (symbol);
+
+#ifdef CTORS_SECTION_ASM_OP
+  default_ctor_section_asm_out_constructor (symbol, priority);
+#else
+# ifdef TARGET_ASM_NAMED_SECTION
+  default_named_section_asm_out_constructor (symbol, priority);
+# else
+  default_stabs_asm_out_constructor (symbol, priority);
+# endif
+#endif
+}
+
+static void
+pa_asm_out_destructor (rtx symbol, int priority)
+{
+  if (!function_label_operand (symbol, VOIDmode))
+    pa_encode_label (symbol);
+
+#ifdef DTORS_SECTION_ASM_OP
+  default_dtor_section_asm_out_destructor (symbol, priority);
+#else
+# ifdef TARGET_ASM_NAMED_SECTION
+  default_named_section_asm_out_destructor (symbol, priority);
+# else
+  default_stabs_asm_out_destructor (symbol, priority);
+# endif
+#endif
+}
+#endif
+
+/* This function places uninitialized global data in the bss section.
+   The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
+   function on the SOM port to prevent uninitialized global data from
+   being placed in the data section.  */
+   
+void
+pa_asm_output_aligned_bss (FILE *stream,
+			   const char *name,
+			   unsigned HOST_WIDE_INT size,
+			   unsigned int align)
+{
+  switch_to_section (bss_section);
+  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
+
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+#endif
+
+#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
+  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+#endif
+
+  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
+  ASM_OUTPUT_LABEL (stream, name);
+  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+}
+
+/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
+   that doesn't allow the alignment of global common storage to be directly
+   specified.  The SOM linker aligns common storage based on the rounded
+   value of the NUM_BYTES parameter in the .comm directive.  It's not
+   possible to use the .align directive as it doesn't affect the alignment
+   of the label associated with a .comm directive.  */
+
+void
+pa_asm_output_aligned_common (FILE *stream,
+			      const char *name,
+			      unsigned HOST_WIDE_INT size,
+			      unsigned int align)
+{
+  unsigned int max_common_align;
+
+  max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
+  if (align > max_common_align)
+    {
+      warning (0, "alignment (%u) for %s exceeds maximum alignment "
+	       "for global common data.  Using %u",
+	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
+      align = max_common_align;
+    }
+
+  switch_to_section (bss_section);
+
+  assemble_name (stream, name);
+  fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
+           MAX (size, align / BITS_PER_UNIT));
+}
+
+/* We can't use .comm for local common storage as the SOM linker effectively
+   treats the symbol as universal and uses the same storage for local symbols
+   with the same name in different object files.  The .block directive
+   reserves an uninitialized block of storage.  However, it's not common
+   storage.  Fortunately, GCC never requests common storage with the same
+   name in any given translation unit.  */
+
+void
+pa_asm_output_aligned_local (FILE *stream,
+			     const char *name,
+			     unsigned HOST_WIDE_INT size,
+			     unsigned int align)
+{
+  switch_to_section (bss_section);
+  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
+
+#ifdef LOCAL_ASM_OP
+  fprintf (stream, "%s", LOCAL_ASM_OP);
+  assemble_name (stream, name);
+  fprintf (stream, "\n");
+#endif
+
+  ASM_OUTPUT_LABEL (stream, name);
+  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+}
+
+/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
+   use in fmpysub instructions.  */
+int
+pa_fmpysuboperands (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  /* Must be a floating point mode.  */
+  if (mode != SFmode && mode != DFmode)
+    return 0;
+
+  /* All modes must be the same.  */
+  if (! (mode == GET_MODE (operands[1])
+	 && mode == GET_MODE (operands[2])
+	 && mode == GET_MODE (operands[3])
+	 && mode == GET_MODE (operands[4])
+	 && mode == GET_MODE (operands[5])))
+    return 0;
+
+  /* All operands must be registers.  */
+  if (! (GET_CODE (operands[1]) == REG
+	 && GET_CODE (operands[2]) == REG
+	 && GET_CODE (operands[3]) == REG
+	 && GET_CODE (operands[4]) == REG
+	 && GET_CODE (operands[5]) == REG))
+    return 0;
+
+  /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
+     operation, so operands[4] must be the same as operand[3].  */
+  if (! rtx_equal_p (operands[3], operands[4]))
+    return 0;
+
+  /* multiply cannot feed into subtraction.  */
+  if (rtx_equal_p (operands[5], operands[0]))
+    return 0;
+
+  /* Inout operand of sub cannot conflict with any operands from multiply.  */
+  if (rtx_equal_p (operands[3], operands[0])
+     || rtx_equal_p (operands[3], operands[1])
+     || rtx_equal_p (operands[3], operands[2]))
+    return 0;
+
+  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
+  if (mode == SFmode
+      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
+    return 0;
+
+  /* Passed.  Operands are suitable for fmpysub.  */
+  return 1;
+}
+
+/* Return 1 if the given constant is 2, 4, or 8.  These are the valid
+   constants for shadd instructions.  */
+int
+pa_shadd_constant_p (int val)
+{
+  if (val == 2 || val == 4 || val == 8)
+    return 1;
+  else
+    return 0;
+}
+
+/* Return TRUE if INSN branches forward.  */
+
+static bool
+forward_branch_p (rtx insn)
+{
+  rtx lab = JUMP_LABEL (insn);
+
+  /* The INSN must have a jump label.  */
+  gcc_assert (lab != NULL_RTX);
+
+  if (INSN_ADDRESSES_SET_P ())
+    return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));  
+
+  while (insn)
+    {
+      if (insn == lab)
+	return true;
+      else
+	insn = NEXT_INSN (insn);
+    }
+
+  return false;
+}
+
+/* Return 1 if INSN is in the delay slot of a call instruction.  */
+int
+pa_jump_in_call_delay (rtx insn)
+{
+
+  if (! JUMP_P (insn))
+    return 0;
+
+  if (PREV_INSN (insn)
+      && PREV_INSN (PREV_INSN (insn))
+      && NONJUMP_INSN_P (next_real_insn (PREV_INSN (PREV_INSN (insn)))))
+    {
+      rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
+
+      return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
+	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
+
+    }
+  else
+    return 0;
+}
+
+/* Output an unconditional move and branch insn.  */
+
+const char *
+pa_output_parallel_movb (rtx *operands, rtx insn)
+{
+  int length = get_attr_length (insn);
+
+  /* These are the cases in which we win.  */
+  if (length == 4)
+    return "mov%I1b,tr %1,%0,%2";
+
+  /* None of the following cases win, but they don't lose either.  */
+  if (length == 8)
+    {
+      if (dbr_sequence_length () == 0)
+	{
+	  /* Nothing in the delay slot, fake it by putting the combined
+	     insn (the copy or add) in the delay slot of a bl.  */
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    return "b %2\n\tldi %1,%0";
+	  else
+	    return "b %2\n\tcopy %1,%0";
+	}
+      else
+	{
+	  /* Something in the delay slot, but we've got a long branch.  */
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    return "ldi %1,%0\n\tb %2";
+	  else
+	    return "copy %1,%0\n\tb %2";
+	}
+    }
+
+  if (GET_CODE (operands[1]) == CONST_INT)
+    output_asm_insn ("ldi %1,%0", operands);
+  else
+    output_asm_insn ("copy %1,%0", operands);
+  return pa_output_lbranch (operands[2], insn, 1);
+}
+
+/* Output an unconditional add and branch insn.  */
+
+const char *
+pa_output_parallel_addb (rtx *operands, rtx insn)
+{
+  int length = get_attr_length (insn);
+
+  /* To make life easy we want operand0 to be the shared input/output
+     operand and operand1 to be the readonly operand.  */
+  if (operands[0] == operands[1])
+    operands[1] = operands[2];
+
+  /* These are the cases in which we win.  */
+  if (length == 4)
+    return "add%I1b,tr %1,%0,%3";
+
+  /* None of the following cases win, but they don't lose either.  */
+  if (length == 8)
+    {
+      if (dbr_sequence_length () == 0)
+	/* Nothing in the delay slot, fake it by putting the combined
+	   insn (the copy or add) in the delay slot of a bl.  */
+	return "b %3\n\tadd%I1 %1,%0,%0";
+      else
+	/* Something in the delay slot, but we've got a long branch.  */
+	return "add%I1 %1,%0,%0\n\tb %3";
+    }
+
+  output_asm_insn ("add%I1 %1,%0,%0", operands);
+  return pa_output_lbranch (operands[3], insn, 1);
+}
+
+/* Return nonzero if INSN (a jump insn) immediately follows a call
+   to a named function.  This is used to avoid filling the delay slot
+   of the jump since it can usually be eliminated by modifying RP in
+   the delay slot of the call.  */
+
+int
+pa_following_call (rtx insn)
+{
+  if (! TARGET_JUMP_IN_DELAY)
+    return 0;
+
+  /* Find the previous real insn, skipping NOTEs.  */
+  insn = PREV_INSN (insn);
+  while (insn && NOTE_P (insn))
+    insn = PREV_INSN (insn);
+
+  /* Check for CALL_INSNs and millicode calls.  */
+  if (insn
+      && ((CALL_P (insn)
+	   && get_attr_type (insn) != TYPE_DYNCALL)
+	  || (NONJUMP_INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) != SEQUENCE
+	      && GET_CODE (PATTERN (insn)) != USE
+	      && GET_CODE (PATTERN (insn)) != CLOBBER
+	      && get_attr_type (insn) == TYPE_MILLI)))
+    return 1;
+
+  return 0;
+}
+
+/* We use this hook to perform a PA specific optimization which is difficult
+   to do in earlier passes.
+
+   We surround the jump table itself with BEGIN_BRTAB and END_BRTAB
+   insns.  Those insns mark where we should emit .begin_brtab and
+   .end_brtab directives when using GAS.  This allows for better link
+   time optimizations.  */
+
+static void
+pa_reorg (void)
+{
+  rtx insn;
+
+  remove_useless_addtr_insns (1);
+
+  if (pa_cpu < PROCESSOR_8000)
+    pa_combine_instructions ();
+
+    /* Still need brtab marker insns.  FIXME: the presence of these
+       markers disables output of the branch table to readonly memory,
+       and any alignment directives that might be needed.  Possibly,
+       the begin_brtab insn should be output before the label for the
+       table.  This doesn't matter at the moment since the tables are
+       always output in the text section.  */
+    for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+      {
+	/* Find an ADDR_VEC insn.  */
+	if (! JUMP_TABLE_DATA_P (insn))
+	  continue;
+
+	/* Now generate markers for the beginning and end of the
+	   branch table.  */
+	emit_insn_before (gen_begin_brtab (), insn);
+	emit_insn_after (gen_end_brtab (), insn);
+      }
+}
+
+/* The PA has a number of odd instructions which can perform multiple
+   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
+   it may be profitable to combine two instructions into one instruction
+   with two outputs.  It's not profitable PA2.0 machines because the
+   two outputs would take two slots in the reorder buffers.
+
+   This routine finds instructions which can be combined and combines
+   them.  We only support some of the potential combinations, and we
+   only try common ways to find suitable instructions.
+
+      * addb can add two registers or a register and a small integer
+      and jump to a nearby (+-8k) location.  Normally the jump to the
+      nearby location is conditional on the result of the add, but by
+      using the "true" condition we can make the jump unconditional.
+      Thus addb can perform two independent operations in one insn.
+
+      * movb is similar to addb in that it can perform a reg->reg
+      or small immediate->reg copy and jump to a nearby (+-8k location).
+
+      * fmpyadd and fmpysub can perform a FP multiply and either an
+      FP add or FP sub if the operands of the multiply and add/sub are
+      independent (there are other minor restrictions).  Note both
+      the fmpy and fadd/fsub can in theory move to better spots according
+      to data dependencies, but for now we require the fmpy stay at a
+      fixed location.
+
+      * Many of the memory operations can perform pre & post updates
+      of index registers.  GCC's pre/post increment/decrement addressing
+      is far too simple to take advantage of all the possibilities.  This
+      pass may not be suitable since those insns may not be independent.
+
+      * comclr can compare two ints or an int and a register, nullify
+      the following instruction and zero some other register.  This
+      is more difficult to use as it's harder to find an insn which
+      will generate a comclr than finding something like an unconditional
+      branch.  (conditional moves & long branches create comclr insns).
+
+      * Most arithmetic operations can conditionally skip the next
+      instruction.  They can be viewed as "perform this operation
+      and conditionally jump to this nearby location" (where nearby
+      is an insns away).  These are difficult to use due to the
+      branch length restrictions.  */
+
+static void
+pa_combine_instructions (void)
+{
+  rtx anchor, new_rtx;
+
+  /* This can get expensive since the basic algorithm is on the
+     order of O(n^2) (or worse).  Only do it for -O2 or higher
+     levels of optimization.  */
+  if (optimize < 2)
+    return;
+
+  /* Walk down the list of insns looking for "anchor" insns which
+     may be combined with "floating" insns.  As the name implies,
+     "anchor" instructions don't move, while "floating" insns may
+     move around.  */
+  new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
+  new_rtx = make_insn_raw (new_rtx);
+
+  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
+    {
+      enum attr_pa_combine_type anchor_attr;
+      enum attr_pa_combine_type floater_attr;
+
+      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
+	 Also ignore any special USE insns.  */
+      if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
+	  || GET_CODE (PATTERN (anchor)) == USE
+	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
+	continue;
+
+      anchor_attr = get_attr_pa_combine_type (anchor);
+      /* See if anchor is an insn suitable for combination.  */
+      if (anchor_attr == PA_COMBINE_TYPE_FMPY
+	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
+	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+	      && ! forward_branch_p (anchor)))
+	{
+	  rtx floater;
+
+	  for (floater = PREV_INSN (anchor);
+	       floater;
+	       floater = PREV_INSN (floater))
+	    {
+	      if (NOTE_P (floater)
+		  || (NONJUMP_INSN_P (floater)
+		      && (GET_CODE (PATTERN (floater)) == USE
+			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
+		continue;
+
+	      /* Anything except a regular INSN will stop our search.  */
+	      if (! NONJUMP_INSN_P (floater))
+		{
+		  floater = NULL_RTX;
+		  break;
+		}
+
+	      /* See if FLOATER is suitable for combination with the
+		 anchor.  */
+	      floater_attr = get_attr_pa_combine_type (floater);
+	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+		      && floater_attr == PA_COMBINE_TYPE_FMPY))
+		{
+		  /* If ANCHOR and FLOATER can be combined, then we're
+		     done with this pass.  */
+		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
+					SET_DEST (PATTERN (floater)),
+					XEXP (SET_SRC (PATTERN (floater)), 0),
+					XEXP (SET_SRC (PATTERN (floater)), 1)))
+		    break;
+		}
+
+	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
+		{
+		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
+		    {
+		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
+					    SET_DEST (PATTERN (floater)),
+					XEXP (SET_SRC (PATTERN (floater)), 0),
+					XEXP (SET_SRC (PATTERN (floater)), 1)))
+			break;
+		    }
+		  else
+		    {
+		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
+					    SET_DEST (PATTERN (floater)),
+					    SET_SRC (PATTERN (floater)),
+					    SET_SRC (PATTERN (floater))))
+			break;
+		    }
+		}
+	    }
+
+	  /* If we didn't find anything on the backwards scan try forwards.  */
+	  if (!floater
+	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
+		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
+	    {
+	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
+		{
+		  if (NOTE_P (floater)
+		      || (NONJUMP_INSN_P (floater)
+			  && (GET_CODE (PATTERN (floater)) == USE
+			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
+
+		    continue;
+
+		  /* Anything except a regular INSN will stop our search.  */
+		  if (! NONJUMP_INSN_P (floater))
+		    {
+		      floater = NULL_RTX;
+		      break;
+		    }
+
+		  /* See if FLOATER is suitable for combination with the
+		     anchor.  */
+		  floater_attr = get_attr_pa_combine_type (floater);
+		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+			  && floater_attr == PA_COMBINE_TYPE_FMPY))
+		    {
+		      /* If ANCHOR and FLOATER can be combined, then we're
+			 done with this pass.  */
+		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
+					    SET_DEST (PATTERN (floater)),
+					    XEXP (SET_SRC (PATTERN (floater)),
+						  0),
+					    XEXP (SET_SRC (PATTERN (floater)),
+						  1)))
+			break;
+		    }
+		}
+	    }
+
+	  /* FLOATER will be nonzero if we found a suitable floating
+	     insn for combination with ANCHOR.  */
+	  if (floater
+	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
+	    {
+	      /* Emit the new instruction and delete the old anchor.  */
+	      emit_insn_before (gen_rtx_PARALLEL
+				(VOIDmode,
+				 gen_rtvec (2, PATTERN (anchor),
+					    PATTERN (floater))),
+				anchor);
+
+	      SET_INSN_DELETED (anchor);
+
+	      /* Emit a special USE insn for FLOATER, then delete
+		 the floating insn.  */
+	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
+	      delete_insn (floater);
+
+	      continue;
+	    }
+	  else if (floater
+		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
+	    {
+	      rtx temp;
+	      /* Emit the new_jump instruction and delete the old anchor.  */
+	      temp
+		= emit_jump_insn_before (gen_rtx_PARALLEL
+					 (VOIDmode,
+					  gen_rtvec (2, PATTERN (anchor),
+						     PATTERN (floater))),
+					 anchor);
+
+	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
+	      SET_INSN_DELETED (anchor);
+
+	      /* Emit a special USE insn for FLOATER, then delete
+		 the floating insn.  */
+	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
+	      delete_insn (floater);
+	      continue;
+	    }
+	}
+    }
+}
+
+static int
+pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
+		  rtx src1, rtx src2)
+{
+  int insn_code_number;
+  rtx start, end;
+
+  /* Create a PARALLEL with the patterns of ANCHOR and
+     FLOATER, try to recognize it, then test constraints
+     for the resulting pattern.
+
+     If the pattern doesn't match or the constraints
+     aren't met keep searching for a suitable floater
+     insn.  */
+  XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
+  XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
+  INSN_CODE (new_rtx) = -1;
+  insn_code_number = recog_memoized (new_rtx);
+  if (insn_code_number < 0
+      || (extract_insn (new_rtx), ! constrain_operands (1)))
+    return 0;
+
+  if (reversed)
+    {
+      start = anchor;
+      end = floater;
+    }
+  else
+    {
+      start = floater;
+      end = anchor;
+    }
+
+  /* There's up to three operands to consider.  One
+     output and two inputs.
+
+     The output must not be used between FLOATER & ANCHOR
+     exclusive.  The inputs must not be set between
+     FLOATER and ANCHOR exclusive.  */
+
+  if (reg_used_between_p (dest, start, end))
+    return 0;
+
+  if (reg_set_between_p (src1, start, end))
+    return 0;
+
+  if (reg_set_between_p (src2, start, end))
+    return 0;
+
+  /* If we get here, then everything is good.  */
+  return 1;
+}
+
+/* Return nonzero if references for INSN are delayed.
+
+   Millicode insns are actually function calls with some special
+   constraints on arguments and register usage.
+
+   Millicode calls always expect their arguments in the integer argument
+   registers, and always return their result in %r29 (ret1).  They
+   are expected to clobber their arguments, %r1, %r29, and the return
+   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
+
+   This function tells reorg that the references to arguments and
+   millicode calls do not appear to happen until after the millicode call.
+   This allows reorg to put insns which set the argument registers into the
+   delay slot of the millicode call -- thus they act more like traditional
+   CALL_INSNs.
+
+   Note we cannot consider side effects of the insn to be delayed because
+   the branch and link insn will clobber the return pointer.  If we happened
+   to use the return pointer in the delay slot of the call, then we lose.
+
+   get_attr_type will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+int
+pa_insn_refs_are_delayed (rtx insn)
+{
+  return ((NONJUMP_INSN_P (insn)
+	   && GET_CODE (PATTERN (insn)) != SEQUENCE
+	   && GET_CODE (PATTERN (insn)) != USE
+	   && GET_CODE (PATTERN (insn)) != CLOBBER
+	   && get_attr_type (insn) == TYPE_MILLI));
+}
+
+/* Promote the return value, but not the arguments.  */
+
+static enum machine_mode
+pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                          enum machine_mode mode,
+                          int *punsignedp ATTRIBUTE_UNUSED,
+                          const_tree fntype ATTRIBUTE_UNUSED,
+                          int for_return)
+{
+  if (for_return == 0)
+    return mode;
+  return promote_mode (type, mode, punsignedp);
+}
+
+/* On the HP-PA the value is found in register(s) 28(-29), unless
+   the mode is SF or DF. Then the value is returned in fr4 (32).
+
+   This must perform the same promotions as PROMOTE_MODE, else promoting
+   return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
+
+   Small structures must be returned in a PARALLEL on PA64 in order
+   to match the HP Compiler ABI.  */
+
+static rtx
+pa_function_value (const_tree valtype, 
+                   const_tree func ATTRIBUTE_UNUSED, 
+                   bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode valmode;
+
+  if (AGGREGATE_TYPE_P (valtype)
+      || TREE_CODE (valtype) == COMPLEX_TYPE
+      || TREE_CODE (valtype) == VECTOR_TYPE)
+    {
+      if (TARGET_64BIT)
+	{
+          /* Aggregates with a size less than or equal to 128 bits are
+	     returned in GR 28(-29).  They are left justified.  The pad
+	     bits are undefined.  Larger aggregates are returned in
+	     memory.  */
+	  rtx loc[2];
+	  int i, offset = 0;
+	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
+
+	  for (i = 0; i < ub; i++)
+	    {
+	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode, 28 + i),
+					  GEN_INT (offset));
+	      offset += 8;
+	    }
+
+	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
+	}
+      else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
+	{
+	  /* Aggregates 5 to 8 bytes in size are returned in general
+	     registers r28-r29 in the same manner as other non
+	     floating-point objects.  The data is right-justified and
+	     zero-extended to 64 bits.  This is opposite to the normal
+	     justification used on big endian targets and requires
+	     special treatment.  */
+	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
+				       gen_rtx_REG (DImode, 28), const0_rtx);
+	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
+	}
+    }
+
+  if ((INTEGRAL_TYPE_P (valtype)
+       && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
+      || POINTER_TYPE_P (valtype))
+    valmode = word_mode;
+  else
+    valmode = TYPE_MODE (valtype);
+
+  if (TREE_CODE (valtype) == REAL_TYPE
+      && !AGGREGATE_TYPE_P (valtype)
+      && TYPE_MODE (valtype) != TFmode
+      && !TARGET_SOFT_FLOAT)
+    return gen_rtx_REG (valmode, 32);
+
+  return gen_rtx_REG (valmode, 28);
+}
+
+/* Implement the TARGET_LIBCALL_VALUE hook.  */
+
+static rtx
+pa_libcall_value (enum machine_mode mode,
+		  const_rtx fun ATTRIBUTE_UNUSED)
+{
+  if (! TARGET_SOFT_FLOAT
+      && (mode == SFmode || mode == DFmode))
+    return  gen_rtx_REG (mode, 32);
+  else
+    return  gen_rtx_REG (mode, 28);
+}
+
+/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
+
+static bool
+pa_function_value_regno_p (const unsigned int regno)
+{
+  if (regno == 28
+      || (! TARGET_SOFT_FLOAT &&  regno == 32))
+    return true;
+
+  return false;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int arg_size = FUNCTION_ARG_SIZE (mode, type);
+
+  cum->nargs_prototype--;
+  cum->words += (arg_size
+		 + ((cum->words & 01)
+		    && type != NULL_TREE
+		    && arg_size > 1));
+}
+
+/* Return the location of a parameter that is passed in a register or NULL
+   if the parameter has any component that is passed in memory.
+
+   This is new code and will be pushed to into the net sources after
+   further testing.
+
+   ??? We might want to restructure this so that it looks more like other
+   ports.  */
+static rtx
+pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int max_arg_words = (TARGET_64BIT ? 8 : 4);
+  int alignment = 0;
+  int arg_size;
+  int fpr_reg_base;
+  int gpr_reg_base;
+  rtx retval;
+
+  if (mode == VOIDmode)
+    return NULL_RTX;
+
+  arg_size = FUNCTION_ARG_SIZE (mode, type);
+
+  /* If this arg would be passed partially or totally on the stack, then
+     this routine should return zero.  pa_arg_partial_bytes will
+     handle arguments which are split between regs and stack slots if
+     the ABI mandates split arguments.  */
+  if (!TARGET_64BIT)
+    {
+      /* The 32-bit ABI does not split arguments.  */
+      if (cum->words + arg_size > max_arg_words)
+	return NULL_RTX;
+    }
+  else
+    {
+      if (arg_size > 1)
+	alignment = cum->words & 1;
+      if (cum->words + alignment >= max_arg_words)
+	return NULL_RTX;
+    }
+
+  /* The 32bit ABIs and the 64bit ABIs are rather different,
+     particularly in their handling of FP registers.  We might
+     be able to cleverly share code between them, but I'm not
+     going to bother in the hope that splitting them up results
+     in code that is more easily understood.  */
+
+  if (TARGET_64BIT)
+    {
+      /* Advance the base registers to their current locations.
+
+         Remember, gprs grow towards smaller register numbers while
+	 fprs grow to higher register numbers.  Also remember that
+	 although FP regs are 32-bit addressable, we pretend that
+	 the registers are 64-bits wide.  */
+      gpr_reg_base = 26 - cum->words;
+      fpr_reg_base = 32 + cum->words;
+
+      /* Arguments wider than one word and small aggregates need special
+	 treatment.  */
+      if (arg_size > 1
+	  || mode == BLKmode
+	  || (type && (AGGREGATE_TYPE_P (type)
+		       || TREE_CODE (type) == COMPLEX_TYPE
+		       || TREE_CODE (type) == VECTOR_TYPE)))
+	{
+	  /* Double-extended precision (80-bit), quad-precision (128-bit)
+	     and aggregates including complex numbers are aligned on
+	     128-bit boundaries.  The first eight 64-bit argument slots
+	     are associated one-to-one, with general registers r26
+	     through r19, and also with floating-point registers fr4
+	     through fr11.  Arguments larger than one word are always
+	     passed in general registers.
+
+	     Using a PARALLEL with a word mode register results in left
+	     justified data on a big-endian target.  */
+
+	  rtx loc[8];
+	  int i, offset = 0, ub = arg_size;
+
+	  /* Align the base register.  */
+	  gpr_reg_base -= alignment;
+
+	  ub = MIN (ub, max_arg_words - cum->words - alignment);
+	  for (i = 0; i < ub; i++)
+	    {
+	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode, gpr_reg_base),
+					  GEN_INT (offset));
+	      gpr_reg_base -= 1;
+	      offset += 8;
+	    }
+
+	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
+	}
+     }
+  else
+    {
+      /* If the argument is larger than a word, then we know precisely
+	 which registers we must use.  */
+      if (arg_size > 1)
+	{
+	  if (cum->words)
+	    {
+	      gpr_reg_base = 23;
+	      fpr_reg_base = 38;
+	    }
+	  else
+	    {
+	      gpr_reg_base = 25;
+	      fpr_reg_base = 34;
+	    }
+
+	  /* Structures 5 to 8 bytes in size are passed in the general
+	     registers in the same manner as other non floating-point
+	     objects.  The data is right-justified and zero-extended
+	     to 64 bits.  This is opposite to the normal justification
+	     used on big endian targets and requires special treatment.
+	     We now define BLOCK_REG_PADDING to pad these objects.
+	     Aggregates, complex and vector types are passed in the same
+	     manner as structures.  */
+	  if (mode == BLKmode
+	      || (type && (AGGREGATE_TYPE_P (type)
+			   || TREE_CODE (type) == COMPLEX_TYPE
+			   || TREE_CODE (type) == VECTOR_TYPE)))
+	    {
+	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
+					   gen_rtx_REG (DImode, gpr_reg_base),
+					   const0_rtx);
+	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
+	    }
+	}
+      else
+        {
+	   /* We have a single word (32 bits).  A simple computation
+	      will get us the register #s we need.  */
+	   gpr_reg_base = 26 - cum->words;
+	   fpr_reg_base = 32 + 2 * cum->words;
+	}
+    }
+
+  /* Determine if the argument needs to be passed in both general and
+     floating point registers.  */
+  if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
+       /* If we are doing soft-float with portable runtime, then there
+	  is no need to worry about FP regs.  */
+       && !TARGET_SOFT_FLOAT
+       /* The parameter must be some kind of scalar float, else we just
+	  pass it in integer registers.  */
+       && GET_MODE_CLASS (mode) == MODE_FLOAT
+       /* The target function must not have a prototype.  */
+       && cum->nargs_prototype <= 0
+       /* libcalls do not need to pass items in both FP and general
+	  registers.  */
+       && type != NULL_TREE
+       /* All this hair applies to "outgoing" args only.  This includes
+	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
+       && !cum->incoming)
+      /* Also pass outgoing floating arguments in both registers in indirect
+	 calls with the 32 bit ABI and the HP assembler since there is no
+	 way to the specify argument locations in static functions.  */
+      || (!TARGET_64BIT
+	  && !TARGET_GAS
+	  && !cum->incoming
+	  && cum->indirect
+	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    {
+      retval
+	= gen_rtx_PARALLEL
+	    (mode,
+	     gen_rtvec (2,
+			gen_rtx_EXPR_LIST (VOIDmode,
+					   gen_rtx_REG (mode, fpr_reg_base),
+					   const0_rtx),
+			gen_rtx_EXPR_LIST (VOIDmode,
+					   gen_rtx_REG (mode, gpr_reg_base),
+					   const0_rtx)));
+    }
+  else
+    {
+      /* See if we should pass this parameter in a general register.  */
+      if (TARGET_SOFT_FLOAT
+	  /* Indirect calls in the normal 32bit ABI require all arguments
+	     to be passed in general registers.  */
+	  || (!TARGET_PORTABLE_RUNTIME
+	      && !TARGET_64BIT
+	      && !TARGET_ELF32
+	      && cum->indirect)
+	  /* If the parameter is not a scalar floating-point parameter,
+	     then it belongs in GPRs.  */
+	  || GET_MODE_CLASS (mode) != MODE_FLOAT
+	  /* Structure with single SFmode field belongs in GPR.  */
+	  || (type && AGGREGATE_TYPE_P (type)))
+	retval = gen_rtx_REG (mode, gpr_reg_base);
+      else
+	retval = gen_rtx_REG (mode, fpr_reg_base);
+    }
+  return retval;
+}
+
+/* Arguments larger than one word are double word aligned.  */
+
+static unsigned int
+pa_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  bool singleword = (type
+		     ? (integer_zerop (TYPE_SIZE (type))
+			|| !TREE_CONSTANT (TYPE_SIZE (type))
+			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
+		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
+
+  return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
+}
+
+/* If this arg would be passed totally in registers or totally on the stack,
+   then this routine should return zero.  */
+
+static int
+pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+		      tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  unsigned int max_arg_words = 8;
+  unsigned int offset = 0;
+
+  if (!TARGET_64BIT)
+    return 0;
+
+  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
+    offset = 1;
+
+  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
+    /* Arg fits fully into registers.  */
+    return 0;
+  else if (cum->words + offset >= max_arg_words)
+    /* Arg fully on the stack.  */
+    return 0;
+  else
+    /* Arg is split.  */
+    return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
+}
+
+
+/* A get_unnamed_section callback for switching to the text section.
+
+   This function is only used with SOM.  Because we don't support
+   named subspaces, we can only create a new subspace or switch back
+   to the default text subspace.  */
+
+static void
+som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  gcc_assert (TARGET_SOM);
+  if (TARGET_GAS)
+    {
+      if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
+	{
+	  /* We only want to emit a .nsubspa directive once at the
+	     start of the function.  */
+	  cfun->machine->in_nsubspa = 1;
+
+	  /* Create a new subspace for the text.  This provides
+	     better stub placement and one-only functions.  */
+	  if (cfun->decl
+	      && DECL_ONE_ONLY (cfun->decl)
+	      && !DECL_WEAK (cfun->decl))
+	    {
+	      output_section_asm_op ("\t.SPACE $TEXT$\n"
+				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
+				     "ACCESS=44,SORT=24,COMDAT");
+	      return;
+	    }
+	}
+      else
+	{
+	  /* There isn't a current function or the body of the current
+	     function has been completed.  So, we are changing to the
+	     text section to output debugging information.  Thus, we
+	     need to forget that we are in the text section so that
+	     varasm.c will call us when text_section is selected again.  */
+	  gcc_assert (!cfun || !cfun->machine
+		      || cfun->machine->in_nsubspa == 2);
+	  in_section = NULL;
+	}
+      output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
+      return;
+    }
+  output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
+}
+
+/* A get_unnamed_section callback for switching to comdat data
+   sections.  This function is only used with SOM.  */
+
+static void
+som_output_comdat_data_section_asm_op (const void *data)
+{
+  in_section = NULL;
+  output_section_asm_op (data);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
+
+static void
+pa_som_asm_init_sections (void)
+{
+  text_section
+    = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
+
+  /* SOM puts readonly data in the default $LIT$ subspace when PIC code
+     is not being generated.  */
+  som_readonly_data_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
+
+  /* When secondary definitions are not supported, SOM makes readonly
+     data one-only by creating a new $LIT$ subspace in $TEXT$ with
+     the comdat flag.  */
+  som_one_only_readonly_data_section
+    = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
+			   "\t.SPACE $TEXT$\n"
+			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
+			   "ACCESS=0x2c,SORT=16,COMDAT");
+
+
+  /* When secondary definitions are not supported, SOM makes data one-only
+     by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
+  som_one_only_data_section
+    = get_unnamed_section (SECTION_WRITE,
+			   som_output_comdat_data_section_asm_op,
+			   "\t.SPACE $PRIVATE$\n"
+			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
+			   "ACCESS=31,SORT=24,COMDAT");
+
+  if (flag_tm)
+    som_tm_clone_table_section
+      = get_unnamed_section (0, output_section_asm_op,
+			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
+
+  /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
+     which reference data within the $TEXT$ space (for example constant
+     strings in the $LIT$ subspace).
+
+     The assemblers (GAS and HP as) both have problems with handling
+     the difference of two symbols which is the other correct way to
+     reference constant data during PIC code generation.
+
+     So, there's no way to reference constant data which is in the
+     $TEXT$ space during PIC generation.  Instead place all constant
+     data into the $PRIVATE$ subspace (this reduces sharing, but it
+     works correctly).  */
+  readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
+
+  /* We must not have a reference to an external symbol defined in a
+     shared library in a readonly section, else the SOM linker will
+     complain.
+
+     So, we force exception information into the data section.  */
+  exception_section = data_section;
+}
+
+/* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
+
+static section *
+pa_som_tm_clone_table_section (void)
+{
+  return som_tm_clone_table_section;
+}
+
+/* On hpux10, the linker will give an error if we have a reference
+   in the read-only data section to a symbol defined in a shared
+   library.  Therefore, expressions that might require a reloc can
+   not be placed in the read-only data section.  */
+
+static section *
+pa_select_section (tree exp, int reloc,
+		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (TREE_CODE (exp) == VAR_DECL
+      && TREE_READONLY (exp)
+      && !TREE_THIS_VOLATILE (exp)
+      && DECL_INITIAL (exp)
+      && (DECL_INITIAL (exp) == error_mark_node
+          || TREE_CONSTANT (DECL_INITIAL (exp)))
+      && !reloc)
+    {
+      if (TARGET_SOM
+	  && DECL_ONE_ONLY (exp)
+	  && !DECL_WEAK (exp))
+	return som_one_only_readonly_data_section;
+      else
+	return readonly_data_section;
+    }
+  else if (CONSTANT_CLASS_P (exp) && !reloc)
+    return readonly_data_section;
+  else if (TARGET_SOM
+	   && TREE_CODE (exp) == VAR_DECL
+	   && DECL_ONE_ONLY (exp)
+	   && !DECL_WEAK (exp))
+    return som_one_only_data_section;
+  else
+    return data_section;
+}
+
+static void
+pa_globalize_label (FILE *stream, const char *name)
+{
+  /* We only handle DATA objects here, functions are globalized in
+     ASM_DECLARE_FUNCTION_NAME.  */
+  if (! FUNCTION_NAME_P (name))
+  {
+    fputs ("\t.EXPORT ", stream);
+    assemble_name (stream, name);
+    fputs (",DATA\n", stream);
+  }
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		     int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+bool
+pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* SOM ABI says that objects larger than 64 bits are returned in memory.
+     PA64 ABI says that objects larger than 128 bits are returned in memory.
+     Note, int_size_in_bytes can return -1 if the size of the object is
+     variable or larger than the maximum value that can be expressed as
+     a HOST_WIDE_INT.   It can also return zero for an empty type.  The
+     simplest way to handle variable and empty types is to pass them in
+     memory.  This avoids problems in defining the boundaries of argument
+     slots, allocating registers, etc.  */
+  return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
+	  || int_size_in_bytes (type) <= 0);
+}
+
+/* Structure to hold declaration and name of external symbols that are
+   emitted by GCC.  We generate a vector of these symbols and output them
+   at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
+   This avoids putting out names that are never really used.  */
+
+typedef struct GTY(()) extern_symbol
+{
+  tree decl;
+  const char *name;
+} extern_symbol;
+
+/* Define gc'd vector type for extern_symbol.  */
+
+/* Vector of extern_symbol pointers.  */
+static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
+
+#ifdef ASM_OUTPUT_EXTERNAL_REAL
+/* Mark DECL (name NAME) as an external reference (assembler output
+   file FILE).  This saves the names to output at the end of the file
+   if actually referenced.  */
+
+void
+pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
+{
+  gcc_assert (file == asm_out_file);
+  extern_symbol p = {decl, name};
+  vec_safe_push (extern_symbols, p);
+}
+
+/* Output text required at the end of an assembler file.
+   This includes deferred plabels and .import directives for
+   all external symbols that were actually referenced.  */
+
+static void
+pa_hpux_file_end (void)
+{
+  unsigned int i;
+  extern_symbol *p;
+
+  if (!NO_DEFERRED_PROFILE_COUNTERS)
+    output_deferred_profile_counters ();
+
+  output_deferred_plabels ();
+
+  for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
+    {
+      tree decl = p->decl;
+
+      if (!TREE_ASM_WRITTEN (decl)
+	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
+	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
+    }
+
+  vec_free (extern_symbols);
+}
+#endif
+
+/* Return true if a change from mode FROM to mode TO for a register
+   in register class RCLASS is invalid.  */
+
+bool
+pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			     enum reg_class rclass)
+{
+  if (from == to)
+    return false;
+
+  /* Reject changes to/from complex and vector modes.  */
+  if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
+      || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
+    return true;
+      
+  if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
+    return false;
+
+  /* There is no way to load QImode or HImode values directly from
+     memory.  SImode loads to the FP registers are not zero extended.
+     On the 64-bit target, this conflicts with the definition of
+     LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
+     with different sizes in the floating-point registers.  */
+  if (MAYBE_FP_REG_CLASS_P (rclass))
+    return true;
+
+  /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
+     in specific sets of registers.  Thus, we cannot allow changing
+     to a larger mode when it's larger than a word.  */
+  if (GET_MODE_SIZE (to) > UNITS_PER_WORD
+      && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
+    return true;
+
+  return false;
+}
+
+/* Returns TRUE if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be FALSE for correct output.
+   
+   We should return FALSE for QImode and HImode because these modes
+   are not ok in the floating-point registers.  However, this prevents
+   tieing these modes to SImode and DImode in the general registers.
+   So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
+   CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
+   in the floating-point registers.  */
+
+bool
+pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  /* Don't tie modes in different classes.  */
+  if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
+    return false;
+
+  return true;
+}
+
+
+/* Length in units of the trampoline instruction code.  */
+
+#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
+
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.\
+
+   The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
+   and then branches to the specified routine.
+
+   This code template is copied from text segment to stack location
+   and then patched with pa_trampoline_init to contain valid values,
+   and then entered as a subroutine.
+
+   It is best to keep this as small as possible to avoid having to
+   flush multiple lines in the cache.  */
+
+static void
+pa_asm_trampoline_template (FILE *f)
+{
+  if (!TARGET_64BIT)
+    {
+      fputs ("\tldw	36(%r22),%r21\n", f);
+      fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
+      if (ASSEMBLER_DIALECT == 0)
+	fputs ("\tdepi	0,31,2,%r21\n", f);
+      else
+	fputs ("\tdepwi	0,31,2,%r21\n", f);
+      fputs ("\tldw	4(%r21),%r19\n", f);
+      fputs ("\tldw	0(%r21),%r21\n", f);
+      if (TARGET_PA_20)
+	{
+	  fputs ("\tbve	(%r21)\n", f);
+	  fputs ("\tldw	40(%r22),%r29\n", f);
+	  fputs ("\t.word	0\n", f);
+	  fputs ("\t.word	0\n", f);
+	}
+      else
+	{
+	  fputs ("\tldsid	(%r21),%r1\n", f);
+	  fputs ("\tmtsp	%r1,%sr0\n", f);
+	  fputs ("\tbe	0(%sr0,%r21)\n", f);
+	  fputs ("\tldw	40(%r22),%r29\n", f);
+	}
+      fputs ("\t.word	0\n", f);
+      fputs ("\t.word	0\n", f);
+      fputs ("\t.word	0\n", f);
+      fputs ("\t.word	0\n", f);
+    }
+  else
+    {
+      fputs ("\t.dword 0\n", f);
+      fputs ("\t.dword 0\n", f);
+      fputs ("\t.dword 0\n", f);
+      fputs ("\t.dword 0\n", f);
+      fputs ("\tmfia	%r31\n", f);
+      fputs ("\tldd	24(%r31),%r1\n", f);
+      fputs ("\tldd	24(%r1),%r27\n", f);
+      fputs ("\tldd	16(%r1),%r1\n", f);
+      fputs ("\tbve	(%r1)\n", f);
+      fputs ("\tldd	32(%r31),%r31\n", f);
+      fputs ("\t.dword 0  ; fptr\n", f);
+      fputs ("\t.dword 0  ; static link\n", f);
+    }
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.
+
+   Move the function address to the trampoline template at offset 36.
+   Move the static chain value to trampoline template at offset 40.
+   Move the trampoline address to trampoline template at offset 44.
+   Move r19 to trampoline template at offset 48.  The latter two
+   words create a plabel for the indirect call to the trampoline.
+
+   A similar sequence is used for the 64-bit port but the plabel is
+   at the beginning of the trampoline.
+
+   Finally, the cache entries for the trampoline code are flushed.
+   This is necessary to ensure that the trampoline instruction sequence
+   is written to memory prior to any attempts at prefetching the code
+   sequence.  */
+
+static void
+pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx start_addr = gen_reg_rtx (Pmode);
+  rtx end_addr = gen_reg_rtx (Pmode);
+  rtx line_length = gen_reg_rtx (Pmode);
+  rtx r_tramp, tmp;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+  r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
+
+  if (!TARGET_64BIT)
+    {
+      tmp = adjust_address (m_tramp, Pmode, 36);
+      emit_move_insn (tmp, fnaddr);
+      tmp = adjust_address (m_tramp, Pmode, 40);
+      emit_move_insn (tmp, chain_value);
+
+      /* Create a fat pointer for the trampoline.  */
+      tmp = adjust_address (m_tramp, Pmode, 44);
+      emit_move_insn (tmp, r_tramp);
+      tmp = adjust_address (m_tramp, Pmode, 48);
+      emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
+
+      /* fdc and fic only use registers for the address to flush,
+	 they do not accept integer displacements.  We align the
+	 start and end addresses to the beginning of their respective
+	 cache lines to minimize the number of lines flushed.  */
+      emit_insn (gen_andsi3 (start_addr, r_tramp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
+					     TRAMPOLINE_CODE_SIZE-1));
+      emit_insn (gen_andsi3 (end_addr, tmp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
+      emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
+      emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
+				    gen_reg_rtx (Pmode),
+				    gen_reg_rtx (Pmode)));
+    }
+  else
+    {
+      tmp = adjust_address (m_tramp, Pmode, 56);
+      emit_move_insn (tmp, fnaddr);
+      tmp = adjust_address (m_tramp, Pmode, 64);
+      emit_move_insn (tmp, chain_value);
+
+      /* Create a fat pointer for the trampoline.  */
+      tmp = adjust_address (m_tramp, Pmode, 16);
+      emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
+							    r_tramp, 32)));
+      tmp = adjust_address (m_tramp, Pmode, 24);
+      emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
+
+      /* fdc and fic only use registers for the address to flush,
+	 they do not accept integer displacements.  We align the
+	 start and end addresses to the beginning of their respective
+	 cache lines to minimize the number of lines flushed.  */
+      tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
+      emit_insn (gen_anddi3 (start_addr, tmp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
+					     TRAMPOLINE_CODE_SIZE - 1));
+      emit_insn (gen_anddi3 (end_addr, tmp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
+      emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
+      emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
+				    gen_reg_rtx (Pmode),
+				    gen_reg_rtx (Pmode)));
+    }
+
+#ifdef HAVE_ENABLE_EXECUTE_STACK
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+}
+
+/* Perform any machine-specific adjustment in the address of the trampoline.
+   ADDR contains the address that was passed to pa_trampoline_init.
+   Adjust the trampoline address to point to the plabel at offset 44.  */
+
+static rtx
+pa_trampoline_adjust_address (rtx addr)
+{
+  if (!TARGET_64BIT)
+    addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
+  return addr;
+}
+
+static rtx
+pa_delegitimize_address (rtx orig_x)
+{
+  rtx x = delegitimize_mem_from_attrs (orig_x);
+
+  if (GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 1)) == UNSPEC
+      && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
+    return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
+  return x;
+}
+
+static rtx
+pa_internal_arg_pointer (void)
+{
+  /* The argument pointer and the hard frame pointer are the same in
+     the 32-bit runtime, so we don't need a copy.  */
+  if (TARGET_64BIT)
+    return copy_to_reg (virtual_incoming_args_rtx);
+  else
+    return virtual_incoming_args_rtx;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.  */
+
+static bool
+pa_can_eliminate (const int from, const int to)
+{
+  /* The argument cannot be eliminated in the 64-bit runtime.  */
+  if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
+    return false;
+
+  return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* Define the offset between two registers, FROM to be eliminated and its
+   replacement TO, at the start of a routine.  */
+HOST_WIDE_INT
+pa_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
+      && to == STACK_POINTER_REGNUM)
+    offset = -pa_compute_frame_size (get_frame_size (), 0);
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    offset = 0;
+  else
+    gcc_unreachable ();
+
+  return offset;
+}
+
+static void
+pa_conditional_register_usage (void)
+{
+  int i;
+
+  if (!TARGET_64BIT && !TARGET_PA_11)
+    {
+      for (i = 56; i <= FP_REG_LAST; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+      for (i = 33; i < 56; i += 2)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
+    {
+      for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (flag_pic)
+    fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+}
+
+/* Target hook for c_mode_for_suffix.  */
+
+static enum machine_mode
+pa_c_mode_for_suffix (char suffix)
+{
+  if (HPUX_LONG_DOUBLE_LIBRARY)
+    {
+      if (suffix == 'q')
+	return TFmode;
+    }
+
+  return VOIDmode;
+}
+
+/* Target hook for function_section.  */
+
+static section *
+pa_function_section (tree decl, enum node_frequency freq,
+		     bool startup, bool exit)
+{
+  /* Put functions in text section if target doesn't have named sections.  */
+  if (!targetm_common.have_named_sections)
+    return text_section;
+
+  /* Force nested functions into the same section as the containing
+     function.  */
+  if (decl
+      && DECL_SECTION_NAME (decl) == NULL_TREE
+      && DECL_CONTEXT (decl) != NULL_TREE
+      && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
+      && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
+    return function_section (DECL_CONTEXT (decl));
+
+  /* Otherwise, use the default function section.  */
+  return default_function_section (decl, freq, startup, exit);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.
+
+   In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
+   that need more than three instructions to load prior to reload.  This
+   limit is somewhat arbitrary.  It takes three instructions to load a
+   CONST_INT from memory but two are memory accesses.  It may be better
+   to increase the allowed range for CONST_INTS.  We may also be able
+   to handle CONST_DOUBLES.  */
+
+static bool
+pa_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
+    return false;
+
+  if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
+    return false;
+
+  /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
+     legitimate constants.  The other variants can't be handled by
+     the move patterns after reload starts.  */
+  if (pa_tls_referenced_p (x))
+    return false;
+
+  if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
+    return false;
+
+  if (TARGET_64BIT
+      && HOST_BITS_PER_WIDE_INT > 32
+      && GET_CODE (x) == CONST_INT
+      && !reload_in_progress
+      && !reload_completed
+      && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
+      && !pa_cint_ok_for_move (INTVAL (x)))
+    return false;
+
+  if (function_label_operand (x, mode))
+    return false;
+
+  return true;
+}
+
+/* Implement TARGET_SECTION_TYPE_FLAGS.  */
+
+static unsigned int
+pa_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags;
+
+  flags = default_section_type_flags (decl, name, reloc);
+
+  /* Function labels are placed in the constant pool.  This can
+     cause a section conflict if decls are put in ".data.rel.ro"
+     or ".data.rel.ro.local" using the __attribute__ construct.  */
+  if (strcmp (name, ".data.rel.ro") == 0
+      || strcmp (name, ".data.rel.ro.local") == 0)
+    flags |= SECTION_WRITE | SECTION_RELRO;
+
+  return flags;
+}
+
+/* pa_legitimate_address_p recognizes an RTL expression that is a
+   valid memory address for an instruction.  The MODE argument is the
+   machine mode for the MEM expression that wants to use this address.
+
+   On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
+   REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
+   available with floating point loads and stores, and integer loads.
+   We get better code by allowing indexed addresses in the initial
+   RTL generation.
+
+   The acceptance of indexed addresses as legitimate implies that we
+   must provide patterns for doing indexed integer stores, or the move
+   expanders must force the address of an indexed store to a register.
+   We have adopted the latter approach.
+   
+   Another function of pa_legitimate_address_p is to ensure that
+   the base register is a valid pointer for indexed instructions.
+   On targets that have non-equivalent space registers, we have to
+   know at the time of assembler output which register in a REG+REG
+   pair is the base register.  The REG_POINTER flag is sometimes lost
+   in reload and the following passes, so it can't be relied on during
+   code generation.  Thus, we either have to canonicalize the order
+   of the registers in REG+REG indexed addresses, or treat REG+REG
+   addresses separately and provide patterns for both permutations.
+
+   The latter approach requires several hundred additional lines of
+   code in pa.md.  The downside to canonicalizing is that a PLUS
+   in the wrong order can't combine to form to make a scaled indexed
+   memory operand.  As we won't need to canonicalize the operands if
+   the REG_POINTER lossage can be fixed, it seems better canonicalize.
+
+   We initially break out scaled indexed addresses in canonical order
+   in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
+   scaled indexed addresses during RTL generation.  However, fold_rtx
+   has its own opinion on how the operands of a PLUS should be ordered.
+   If one of the operands is equivalent to a constant, it will make
+   that operand the second operand.  As the base register is likely to
+   be equivalent to a SYMBOL_REF, we have made it the second operand.
+
+   pa_legitimate_address_p accepts REG+REG as legitimate when the
+   operands are in the order INDEX+BASE on targets with non-equivalent
+   space registers, and in any order on targets with equivalent space
+   registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
+
+   We treat a SYMBOL_REF as legitimate if it is part of the current
+   function's constant-pool, because such addresses can actually be
+   output as REG+SMALLINT.  */
+
+static bool
+pa_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if ((REG_P (x)
+       && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
+		  : REG_OK_FOR_BASE_P (x)))
+      || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
+	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
+	  && REG_P (XEXP (x, 0))
+	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
+		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
+    return true;
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx base, index;
+
+      /* For REG+REG, the base register should be in XEXP (x, 1),
+	 so check it first.  */
+      if (REG_P (XEXP (x, 1))
+	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
+		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
+	base = XEXP (x, 1), index = XEXP (x, 0);
+      else if (REG_P (XEXP (x, 0))
+	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
+			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
+	base = XEXP (x, 0), index = XEXP (x, 1);
+      else
+	return false;
+
+      if (GET_CODE (index) == CONST_INT)
+	{
+	  if (INT_5_BITS (index))
+	    return true;
+
+	  /* When INT14_OK_STRICT is false, a secondary reload is needed
+	     to adjust the displacement of SImode and DImode floating point
+	     instructions but this may fail when the register also needs
+	     reloading.  So, we return false when STRICT is true.  We
+	     also reject long displacements for float mode addresses since
+	     the majority of accesses will use floating point instructions
+	     that don't support 14-bit offsets.  */
+	  if (!INT14_OK_STRICT
+	      && (strict || !(reload_in_progress || reload_completed))
+	      && mode != QImode
+	      && mode != HImode)
+	    return false;
+
+	  return base14_operand (index, mode);
+	}
+
+      if (!TARGET_DISABLE_INDEXING
+	  /* Only accept the "canonical" INDEX+BASE operand order
+	     on targets with non-equivalent space registers.  */
+	  && (TARGET_NO_SPACE_REGS
+	      ? REG_P (index)
+	      : (base == XEXP (x, 1) && REG_P (index)
+		 && (reload_completed
+		     || (reload_in_progress && HARD_REGISTER_P (base))
+		     || REG_POINTER (base))
+		 && (reload_completed
+		     || (reload_in_progress && HARD_REGISTER_P (index))
+		     || !REG_POINTER (index))))
+	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
+	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
+		     : REG_OK_FOR_INDEX_P (index))
+	  && borx_reg_operand (base, Pmode)
+	  && borx_reg_operand (index, Pmode))
+	return true;
+
+      if (!TARGET_DISABLE_INDEXING
+	  && GET_CODE (index) == MULT
+	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
+	  && REG_P (XEXP (index, 0))
+	  && GET_MODE (XEXP (index, 0)) == Pmode
+	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
+		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
+	  && GET_CODE (XEXP (index, 1)) == CONST_INT
+	  && INTVAL (XEXP (index, 1))
+	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
+	  && borx_reg_operand (base, Pmode))
+	return true;
+
+      return false;
+    }
+
+  if (GET_CODE (x) == LO_SUM)
+    {
+      rtx y = XEXP (x, 0);
+
+      if (GET_CODE (y) == SUBREG)
+	y = SUBREG_REG (y);
+
+      if (REG_P (y)
+	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
+		     : REG_OK_FOR_BASE_P (y)))
+	{
+	  /* Needed for -fPIC */
+	  if (mode == Pmode
+	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
+	    return true;
+
+	  if (!INT14_OK_STRICT
+	      && (strict || !(reload_in_progress || reload_completed))
+	      && mode != QImode
+	      && mode != HImode)
+	    return false;
+
+	  if (CONSTANT_P (XEXP (x, 1)))
+	    return true;
+	}
+      return false;
+    }
+
+  if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
+    return true;
+
+  return false;
+}
+
+/* Look for machine dependent ways to make the invalid address AD a
+   valid address.
+
+   For the PA, transform:
+
+        memory(X + <large int>)
+
+   into:
+
+        if (<large int> & mask) >= 16
+          Y = (<large int> & ~mask) + mask + 1  Round up.
+        else
+          Y = (<large int> & ~mask)             Round down.
+        Z = X + Y
+        memory (Z + (<large int> - Y));
+
+   This makes reload inheritance and reload_cse work better since Z
+   can be reused.
+
+   There may be more opportunities to improve code with this hook.  */
+
+rtx
+pa_legitimize_reload_address (rtx ad, enum machine_mode mode,
+			      int opnum, int type,
+			      int ind_levels ATTRIBUTE_UNUSED)
+{
+  long offset, newoffset, mask;
+  rtx new_rtx, temp = NULL_RTX;
+
+  mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
+
+  if (optimize && GET_CODE (ad) == PLUS)
+    temp = simplify_binary_operation (PLUS, Pmode,
+				      XEXP (ad, 0), XEXP (ad, 1));
+
+  new_rtx = temp ? temp : ad;
+
+  if (optimize
+      && GET_CODE (new_rtx) == PLUS
+      && GET_CODE (XEXP (new_rtx, 0)) == REG
+      && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
+    {
+      offset = INTVAL (XEXP ((new_rtx), 1));
+
+      /* Choose rounding direction.  Round up if we are >= halfway.  */
+      if ((offset & mask) >= ((mask + 1) / 2))
+	newoffset = (offset & ~mask) + mask + 1;
+      else
+	newoffset = offset & ~mask;
+
+      /* Ensure that long displacements are aligned.  */
+      if (mask == 0x3fff
+	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
+	      || (TARGET_64BIT && (mode) == DImode)))
+	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
+
+      if (newoffset != 0 && VAL_14_BITS_P (newoffset))
+	{
+	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
+			       GEN_INT (newoffset));
+	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
+	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		       opnum, (enum reload_type) type);
+	  return ad;
+	}
+    }
+
+  return NULL_RTX;
+}
+
+#include "gt-pa.h"
diff --git a/gcc-4.9/gcc/config/pa/pa.h b/gcc-4.9/gcc/config/pa/pa.h
new file mode 100644
index 000000000..ac3f0ebe7
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa.h
@@ -0,0 +1,1310 @@
+/* Definitions of target machine for GNU compiler, for the HP Spectrum.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support
+   and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for
+   Software Science at the University of Utah.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* For long call handling.  */
+extern unsigned long total_code_bytes;
+
+#define pa_cpu_attr ((enum attr_cpu)pa_cpu)
+
+#define TARGET_PA_10 (!TARGET_PA_11 && !TARGET_PA_20)
+
+/* Generate code for the HPPA 2.0 architecture in 64bit mode.  */
+#ifndef TARGET_64BIT
+#define TARGET_64BIT 0
+#endif
+
+/* Generate code for ELF32 ABI.  */
+#ifndef TARGET_ELF32
+#define TARGET_ELF32 0
+#endif
+
+/* Generate code for SOM 32bit ABI.  */
+#ifndef TARGET_SOM
+#define TARGET_SOM 0
+#endif
+
+/* HP-UX UNIX features.  */
+#ifndef TARGET_HPUX
+#define TARGET_HPUX 0
+#endif
+
+/* HP-UX 10.10 UNIX 95 features.  */
+#ifndef TARGET_HPUX_10_10
+#define TARGET_HPUX_10_10 0
+#endif
+
+/* HP-UX 11.* features (11.00, 11.11, 11.23, etc.)  */
+#ifndef TARGET_HPUX_11
+#define TARGET_HPUX_11 0
+#endif
+
+/* HP-UX 11i multibyte and UNIX 98 extensions.  */
+#ifndef TARGET_HPUX_11_11
+#define TARGET_HPUX_11_11 0
+#endif
+
+/* HP-UX 11i multibyte and UNIX 2003 extensions.  */
+#ifndef TARGET_HPUX_11_31
+#define TARGET_HPUX_11_31 0
+#endif
+
+/* HP-UX long double library.  */
+#ifndef HPUX_LONG_DOUBLE_LIBRARY
+#define HPUX_LONG_DOUBLE_LIBRARY 0
+#endif
+
+/* Linux kernel atomic operation support.  */
+#ifndef TARGET_SYNC_LIBCALL
+#define TARGET_SYNC_LIBCALL 0
+#endif
+
+/* The following three defines are potential target switches.  The current
+   defines are optimal given the current capabilities of GAS and GNU ld.  */
+
+/* Define to a C expression evaluating to true to use long absolute calls.
+   Currently, only the HP assembler and SOM linker support long absolute
+   calls.  They are used only in non-pic code.  */
+#define TARGET_LONG_ABS_CALL (TARGET_SOM && !TARGET_GAS)
+
+/* Define to a C expression evaluating to true to use long PIC symbol
+   difference calls.  Long PIC symbol difference calls are only used with
+   the HP assembler and linker.  The HP assembler detects this instruction
+   sequence and treats it as long pc-relative call.  Currently, GAS only
+   allows a difference of two symbols in the same subspace, and it doesn't
+   detect the sequence as a pc-relative call.  */
+#define TARGET_LONG_PIC_SDIFF_CALL (!TARGET_GAS && TARGET_HPUX)
+
+/* Define to a C expression evaluating to true to use long PIC
+   pc-relative calls.  Long PIC pc-relative calls are only used with
+   GAS.  Currently, they are usable for calls which bind local to a
+   module but not for external calls.  */
+#define TARGET_LONG_PIC_PCREL_CALL 0
+
+/* Define to a C expression evaluating to true to use SOM secondary
+   definition symbols for weak support.  Linker support for secondary
+   definition symbols is buggy prior to HP-UX 11.X.  */
+#define TARGET_SOM_SDEF 0
+
+/* Define to a C expression evaluating to true to save the entry value
+   of SP in the current frame marker.  This is normally unnecessary.
+   However, the HP-UX unwind library looks at the SAVE_SP callinfo flag.
+   HP compilers don't use this flag but it is supported by the assembler.
+   We set this flag to indicate that register %r3 has been saved at the
+   start of the frame.  Thus, when the HP unwind library is used, we
+   need to generate additional code to save SP into the frame marker.  */
+#define TARGET_HPUX_UNWIND_LIBRARY 0
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_GAS | MASK_JUMP_IN_DELAY)
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_SCHED_DEFAULT
+#define TARGET_SCHED_DEFAULT PROCESSOR_8000
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-schedule is ignored if -mschedule is specified.
+   --with-arch is ignored if -march is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+  {"schedule", "%{!mschedule=*:-mschedule=%(VALUE)}" }
+
+/* Specify the dialect of assembler to use.  New mnemonics is dialect one
+   and the old mnemonics are dialect zero.  */
+#define ASSEMBLER_DIALECT (TARGET_PA_20 ? 1 : 0)
+
+/* Override some settings from dbxelf.h.  */
+
+/* We do not have to be compatible with dbx, so we enable gdb extensions
+   by default.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* This used to be zero (no max length), but big enums and such can
+   cause huge strings which killed gas.
+
+   We also have to avoid lossage in dbxout.c -- it does not compute the
+   string size accurately, so we are real conservative here.  */
+#undef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 3000
+
+/* GDB always assumes the current function's frame begins at the value
+   of the stack pointer upon entry to the current function.  Accessing
+   local variables and parameters passed on the stack is done using the
+   base of the frame + an offset provided by GCC.
+
+   For functions which have frame pointers this method works fine;
+   the (frame pointer) == (stack pointer at function entry) and GCC provides
+   an offset relative to the frame pointer.
+
+   This loses for functions without a frame pointer; GCC provides an offset
+   which is relative to the stack pointer after adjusting for the function's
+   frame size.  GDB would prefer the offset to be relative to the value of
+   the stack pointer at the function's entry.  Yuk!  */
+#define DEBUGGER_AUTO_OFFSET(X) \
+  ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) \
+    + (frame_pointer_needed ? 0 : pa_compute_frame_size (get_frame_size (), 0)))
+
+#define DEBUGGER_ARG_OFFSET(OFFSET, X) \
+  ((GET_CODE (X) == PLUS ? OFFSET : 0) \
+    + (frame_pointer_needed ? 0 : pa_compute_frame_size (get_frame_size (), 0)))
+
+#define TARGET_CPU_CPP_BUILTINS()				\
+do {								\
+     builtin_assert("cpu=hppa");				\
+     builtin_assert("machine=hppa");				\
+     builtin_define("__hppa");					\
+     builtin_define("__hppa__");				\
+     if (TARGET_PA_20)						\
+       builtin_define("_PA_RISC2_0");				\
+     else if (TARGET_PA_11)					\
+       builtin_define("_PA_RISC1_1");				\
+     else							\
+       builtin_define("_PA_RISC1_0");				\
+} while (0)
+
+/* An old set of OS defines for various BSD-like systems.  */
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	builtin_define_std ("REVARGV");				\
+	builtin_define_std ("hp800");				\
+	builtin_define_std ("hp9000");				\
+	builtin_define_std ("hp9k8");				\
+	if (!c_dialect_cxx () && !flag_iso)			\
+	  builtin_define ("hppa");				\
+	builtin_define_std ("spectrum");			\
+	builtin_define_std ("unix");				\
+	builtin_assert ("system=bsd");				\
+	builtin_assert ("system=unix");				\
+    }								\
+  while (0)
+
+#define CC1_SPEC "%{pg:} %{p:}"
+
+#define LINK_SPEC "%{mlinker-opt:-O} %{!shared:-u main} %{shared:-b}"
+
+/* We don't want -lg.  */
+#ifndef LIB_SPEC
+#define LIB_SPEC "%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}"
+#endif
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#define SIZE_TYPE "unsigned int"
+#define PTRDIFF_TYPE "int"
+#define WCHAR_TYPE "unsigned int"
+#define WCHAR_TYPE_SIZE 32
+
+/* target machine storage layout */
+typedef struct GTY(()) machine_function
+{
+  /* Flag indicating that a .NSUBSPA directive has been output for
+     this function.  */
+  int in_nsubspa;
+} machine_function;
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases, 
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  \
+  if (GET_MODE_CLASS (MODE) == MODE_INT	\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)  	\
+    (MODE) = word_mode;
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on the HP-PA.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+
+/* Minimum number of units in a word.  If this is undefined, the default
+   is UNITS_PER_WORD.  Otherwise, it is the constant value that is the
+   smallest value that UNITS_PER_WORD can have at run-time.
+
+   FIXME: This needs to be 4 when TARGET_64BIT is true to suppress the
+   building of various TImode routines in libgcc.  The HP runtime
+   specification doesn't provide the alignment requirements and calling
+   conventions for TImode variables.  */
+#define MIN_UNITS_PER_WORD 4
+
+/* The widest floating point format supported by the hardware.  Note that
+   setting this influences some Ada floating point type sizes, currently
+   required for GNAT to operate properly.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Largest alignment required for any stack parameter, in bits.
+   Don't define this if it is equal to PARM_BOUNDARY */
+#define MAX_PARM_BOUNDARY BIGGEST_ALIGNMENT
+
+/* Boundary (in *bits*) on which stack pointer is always aligned;
+   certain optimizations in combine depend on this.
+
+   The HP-UX runtime documents mandate 64-byte and 16-byte alignment for
+   the stack on the 32 and 64-bit ports, respectively.  However, we
+   are only guaranteed that the stack is aligned to BIGGEST_ALIGNMENT
+   in main.  Thus, we treat the former as the preferred alignment.  */
+#define STACK_BOUNDARY BIGGEST_ALIGNMENT
+#define PREFERRED_STACK_BOUNDARY (TARGET_64BIT ? 128 : 512)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY BITS_PER_WORD
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT (2 * BITS_PER_WORD)
+
+/* Get around hp-ux assembler bug, and make strcpy of constants fast.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)		\
+  (TREE_CODE (EXP) == STRING_CST		\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  pa_modes_tieable_p (MODE1, MODE2)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* The HP-PA pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 30
+
+/* Fixed register for local variable access.  Always eliminated.  */
+#define FRAME_POINTER_REGNUM (TARGET_64BIT ? 61 : 89)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 3
+
+/* Don't allow hard registers to be renamed into r2 unless r2
+   is already live or already being saved (due to eh).  */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  ((NEW_REG) != 2 || df_regs_ever_live_p (2) || crtl->calls_eh_return)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM (TARGET_64BIT ? 29 : 3)
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? 31 : 29)
+
+/* Register used to address the offset table for position-independent
+   data references.  */
+#define PIC_OFFSET_TABLE_REGNUM \
+  (flag_pic ? (TARGET_64BIT ? 27 : 19) : INVALID_REGNUM)
+
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED 1
+
+/* Function to return the rtx used to save the pic offset table register
+   across function calls.  */
+extern rtx hppa_pic_save_rtx (void);
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+#define PA_STRUCT_VALUE_REGNUM 28
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated.  First, the frame pointer
+   register can often be eliminated in favor of the stack pointer register.
+   Secondly, the argument pointer register can always be eliminated in the
+   32-bit runtimes.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   The argument pointer cannot be eliminated in the 64-bit runtime.  It
+   is the same register as the hard frame pointer in the 32-bit runtime.
+   So, it does not need to be listed.  */
+#define ELIMINABLE_REGS                                 \
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},    \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},         \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} }
+
+/* Define the offset between two registers, one to be eliminated,
+   and the other its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = pa_initial_elimination_offset(FROM, TO))
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 3 ? (N) + 20 : (N) == 3 ? 31 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 29)
+#define EH_RETURN_HANDLER_RTX pa_eh_return_handler_rtx ()
+
+/* Offset from the frame pointer register value to the top of stack.  */
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* The maximum number of hard registers that can be saved in the call
+   frame.  The soft frame pointer is not included.  */
+#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before the
+   prologue.  You only need to define this macro if you want to support
+   call frame debugging information like that provided by DWARF 2.  */
+#define INCOMING_RETURN_ADDR_RTX (gen_rtx_REG (word_mode, 2))
+#define DWARF_FRAME_RETURN_COLUMN (DWARF_FRAME_REGNUM (2))
+
+/* A C expression whose value is an integer giving a DWARF 2 column
+   number that may be used as an alternate return column.  This should
+   be defined only if DWARF_FRAME_RETURN_COLUMN is set to a general
+   register, but an alternate column needs to be used for signal frames.
+
+   Column 0 is not used but unfortunately its register size is set to
+   4 bytes (sizeof CCmode) so it can't be used on 64-bit targets.  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN (FIRST_PSEUDO_REGISTER - 1)
+
+/* This macro chooses the encoding of pointers embedded in the exception
+   handling sections.  If at all possible, this should be defined such
+   that the exception handling section will not require dynamic relocations,
+   and so may be read-only.
+
+   Because the HP assembler auto aligns, it is necessary to use
+   DW_EH_PE_aligned.  It's not possible to make the data read-only
+   on the HP-UX SOM port since the linker requires fixups for label
+   differences in different sections to be word aligned.  However,
+   the SOM linker can do unaligned fixups for absolute pointers.
+   We also need aligned pointers for global and function pointers.
+
+   Although the HP-UX 64-bit ELF linker can handle unaligned pc-relative
+   fixups, the runtime doesn't have a consistent relationship between
+   text and data for dynamically loaded objects.  Thus, it's not possible
+   to use pc-relative encoding for pointers on this target.  It may be
+   possible to use segment relative encodings but GAS doesn't currently
+   have a mechanism to generate these encodings.  For other targets, we
+   use pc-relative encoding for pointers.  If the pointer might require
+   dynamic relocation, we make it indirect.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (TARGET_GAS && !TARGET_HPUX						\
+   ? (DW_EH_PE_pcrel							\
+      | ((GLOBAL) || (CODE) == 2 ? DW_EH_PE_indirect : 0)		\
+      | (TARGET_64BIT ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4))		\
+   : (!TARGET_GAS || (GLOBAL) || (CODE) == 2				\
+      ? DW_EH_PE_aligned : DW_EH_PE_absptr))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  We output pc-relative, and
+   indirect pc-relative ourself since we need some special magic to
+   generate pc-relative relocations, and to handle indirect function
+   pointers.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if (((ENCODING) & 0x70) == DW_EH_PE_pcrel)				\
+      {									\
+	fputs (integer_asm_op (SIZE, FALSE), FILE);			\
+	if ((ENCODING) & DW_EH_PE_indirect)				\
+	  output_addr_const (FILE, pa_get_deferred_plabel (ADDR));	\
+	else								\
+	  assemble_name (FILE, XSTR ((ADDR), 0));			\
+	fputs ("+8-$PIC_pcrel$0", FILE);				\
+	goto DONE;							\
+      }									\
+    } while (0)
+
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define FP_REG_CLASS_P(CLASS) \
+  ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS)
+
+/* True if register is floating-point.  */
+#define FP_REGNO_P(N) ((N) >= FP_REG_FIRST && (N) <= FP_REG_LAST)
+
+#define MAYBE_FP_REG_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), FP_REGS)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+/* #define STACK_GROWS_DOWNWARD */
+
+/* Believe it or not.  */
+#define ARGS_GROW_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 0
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.
+
+   On the 32-bit ports, we reserve one slot for the previous frame
+   pointer and one fill slot.  The fill slot is for compatibility
+   with HP compiled programs.  On the 64-bit ports, we reserve one
+   slot for the previous frame pointer.  */
+#define STARTING_FRAME_OFFSET 8
+
+/* Define STACK_ALIGNMENT_NEEDED to zero to disable final alignment
+   of the stack.  The default is to align it to STACK_BOUNDARY.  */
+#define STACK_ALIGNMENT_NEEDED 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On the HP-PA, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Offset of first parameter from the argument pointer register value.
+   This value will be negated because the arguments grow down.
+   Also note that on STACK_GROWS_UPWARD machines (such as this one)
+   this is the distance from the frame pointer to the end of the first
+   argument, not it's beginning.  To get the real offset of the first
+   argument, the size of the argument must be added.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) (TARGET_64BIT ? -64 : -32)
+
+/* When a parameter is passed in a register, stack space is still
+   allocated for it.  */
+#define REG_PARM_STACK_SPACE(DECL) (TARGET_64BIT ? 64 : 16)
+
+/* Define this if the above stack space is to be considered part of the
+   space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+/* Keep the stack pointer constant throughout the function.
+   This is both an optimization and a necessity: longjmp
+   doesn't behave itself when the stack pointer moves within
+   the function!  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* The weird HPPA calling conventions require a minimum of 48 bytes on
+   the stack: 16 bytes for register saves, and 32 bytes for magic.
+   This is the difference between the logical top of stack and the
+   actual sp.
+
+   On the 64-bit port, the HP C compiler allocates a 48-byte frame
+   marker, although the runtime documentation only describes a 16
+   byte marker.  For compatibility, we allocate 48 bytes.  */
+#define STACK_POINTER_OFFSET \
+  (TARGET_64BIT ? -(crtl->outgoing_args_size + 48): -32)
+
+#define STACK_DYNAMIC_OFFSET(FNDECL)	\
+  (TARGET_64BIT				\
+   ? (STACK_POINTER_OFFSET)		\
+   : ((STACK_POINTER_OFFSET) - crtl->outgoing_args_size))
+
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the HP-PA, the WORDS field holds the number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).  Thus, 4 or
+   more means all following args should go on the stack.
+   
+   The INCOMING field tracks whether this is an "incoming" or
+   "outgoing" argument.
+   
+   The INDIRECT field indicates whether this is is an indirect
+   call or not.
+   
+   The NARGS_PROTOTYPE field indicates that an argument does not
+   have a prototype when it less than or equal to 0.  */
+
+struct hppa_args {int words, nargs_prototype, incoming, indirect; };
+
+#define CUMULATIVE_ARGS struct hppa_args
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  (CUM).words = 0, 							\
+  (CUM).incoming = 0,							\
+  (CUM).indirect = (FNTYPE) && !(FNDECL),				\
+  (CUM).nargs_prototype = (FNTYPE && prototype_p (FNTYPE)		\
+			   ? (list_length (TYPE_ARG_TYPES (FNTYPE)) - 1	\
+			      + (TYPE_MODE (TREE_TYPE (FNTYPE)) == BLKmode \
+				 || pa_return_in_memory (TREE_TYPE (FNTYPE), 0))) \
+			   : 0)
+
+
+
+/* Similar, but when scanning the definition of a procedure.  We always
+   set NARGS_PROTOTYPE large so we never return a PARALLEL.  */
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM,FNTYPE,IGNORE) \
+  (CUM).words = 0,				\
+  (CUM).incoming = 1,				\
+  (CUM).indirect = 0,				\
+  (CUM).nargs_prototype = 1000
+
+/* Figure out the size in words of the function argument.  The size
+   returned by this macro should always be greater than zero because
+   we pass variable and zero sized objects by reference.  */
+
+#define FUNCTION_ARG_SIZE(MODE, TYPE)	\
+  ((((MODE) != BLKmode \
+     ? (HOST_WIDE_INT) GET_MODE_SIZE (MODE) \
+     : int_size_in_bytes (TYPE)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On the HP-PA the first four words of args are normally in registers
+   and the rest are pushed.  But any arg that won't entirely fit in regs
+   is pushed.
+
+   Arguments passed in registers are either 1 or 2 words long.
+
+   The caller must make a distinction between calls to explicitly named
+   functions and calls through pointers to functions -- the conventions
+   are different!  Calls through pointers to functions only use general
+   registers for the first four argument words.
+
+   Of course all this is different for the portable runtime model
+   HP wants everyone to use for ELF.  Ugh.  Here's a quick description
+   of how it's supposed to work.
+
+   1) callee side remains unchanged.  It expects integer args to be
+   in the integer registers, float args in the float registers and
+   unnamed args in integer registers.
+
+   2) caller side now depends on if the function being called has
+   a prototype in scope (rather than if it's being called indirectly).
+
+      2a) If there is a prototype in scope, then arguments are passed
+      according to their type (ints in integer registers, floats in float
+      registers, unnamed args in integer registers.
+
+      2b) If there is no prototype in scope, then floating point arguments
+      are passed in both integer and float registers.  egad.
+
+  FYI: The portable parameter passing conventions are almost exactly like
+  the standard parameter passing conventions on the RS6000.  That's why
+  you'll see lots of similar code in rs6000.h.  */
+
+/* If defined, a C expression which determines whether, and in which
+   direction, to pad out an argument with extra space.  */
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  pa_function_arg_padding ((MODE), (TYPE))
+
+/* Specify padding for the last element of a block move between registers
+   and memory.
+
+   The 64-bit runtime specifies that objects need to be left justified
+   (i.e., the normal justification for a big endian target).  The 32-bit
+   runtime specifies right justification for objects smaller than 64 bits.
+   We use a DImode register in the parallel for 5 to 7 byte structures
+   so that there is only one element.  This allows the object to be
+   correctly padded.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  pa_function_arg_padding ((MODE), (TYPE))
+
+
+/* On HPPA, we emit profiling code as rtl via PROFILE_HOOK rather than
+   as assembly via FUNCTION_PROFILER.  Just output a local label.
+   We can't use the function label because the GAS SOM target can't
+   handle the difference of a global symbol and a local symbol.  */
+
+#ifndef FUNC_BEGIN_PROLOG_LABEL
+#define FUNC_BEGIN_PROLOG_LABEL        "LFBP"
+#endif
+
+#define FUNCTION_PROFILER(FILE, LABEL) \
+  (*targetm.asm_out.internal_label) (FILE, FUNC_BEGIN_PROLOG_LABEL, LABEL)
+
+#define PROFILE_HOOK(label_no) hppa_profile_hook (label_no)
+void hppa_profile_hook (int label_no);
+
+/* The profile counter if emitted must come before the prologue.  */
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* We never want final.c to emit profile counters.  When profile
+   counters are required, we have to defer emitting them to the end
+   of the current file.  */
+#define NO_PROFILE_COUNTERS 1
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+extern int may_call_alloca;
+
+#define EXIT_IGNORE_STACK	\
+ (get_frame_size () != 0	\
+  || cfun->calls_alloca || crtl->outgoing_args_size)
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52)
+
+/* Alignment required by the trampoline.  */
+
+#define TRAMPOLINE_ALIGNMENT BITS_PER_WORD
+
+/* Minimum length of a cache line.  A length of 16 will work on all
+   PA-RISC processors.  All PA 1.1 processors have a cache line of
+   32 bytes.  Most but not all PA 2.0 processors have a cache line
+   of 64 bytes.  As cache flushes are expensive and we don't support
+   PA 1.0, we use a minimum length of 32.  */
+
+#define MIN_CACHELINE_SIZE 32
+
+
+/* Addressing modes, and classification of registers for them. 
+
+   Using autoincrement addressing modes on PA8000 class machines is
+   not profitable.  */
+
+#define HAVE_POST_INCREMENT (pa_cpu < PROCESSOR_8000)
+#define HAVE_POST_DECREMENT (pa_cpu < PROCESSOR_8000)
+
+#define HAVE_PRE_DECREMENT (pa_cpu < PROCESSOR_8000)
+#define HAVE_PRE_INCREMENT (pa_cpu < PROCESSOR_8000)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* The following macros assume that X is a hard or pseudo reg number.
+   They give nonzero only if X is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(X) \
+  ((X) && ((X) < 32							\
+   || ((X) == FRAME_POINTER_REGNUM)					\
+   || ((X) >= FIRST_PSEUDO_REGISTER					\
+       && reg_renumber							\
+       && (unsigned) reg_renumber[X] < 32)))
+#define REGNO_OK_FOR_BASE_P(X) \
+  ((X) && ((X) < 32							\
+   || ((X) == FRAME_POINTER_REGNUM)					\
+   || ((X) >= FIRST_PSEUDO_REGISTER					\
+       && reg_renumber							\
+       && (unsigned) reg_renumber[X] < 32)))
+#define REGNO_OK_FOR_FP_P(X) \
+  (FP_REGNO_P (X)							\
+   || (X >= FIRST_PSEUDO_REGISTER					\
+       && reg_renumber							\
+       && FP_REGNO_P (reg_renumber[X])))
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+
+   These macros are specific to the HP-PA, and may be used only
+   in code for printing assembler insns and in conditions for
+   define_optimization.  */
+
+/* 1 if X is an fp register.  */
+
+#define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X)))
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* TLS symbolic reference.  */
+#define PA_SYMBOL_REF_TLS_P(X) \
+  (GET_CODE (X) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (X) != 0)
+
+/* Recognize any constant value that is a valid address except
+   for symbolic addresses.  We get better CSE by rejecting them
+   here and allowing hppa_legitimize_address to break them up.  We
+   use most of the constants accepted by CONSTANT_P, except CONST_DOUBLE.  */
+
+#define CONSTANT_ADDRESS_P(X) \
+  ((GET_CODE (X) == LABEL_REF 						\
+   || (GET_CODE (X) == SYMBOL_REF && !SYMBOL_REF_TLS_MODEL (X))		\
+   || GET_CODE (X) == CONST_INT						\
+   || (GET_CODE (X) == CONST && !pa_tls_referenced_p (X))		\
+   || GET_CODE (X) == HIGH) 						\
+   && (reload_in_progress || reload_completed				\
+       || ! pa_symbolic_expression_p (X)))
+
+/* A C expression that is nonzero if we are using the new HP assembler.  */
+
+#ifndef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 0
+#endif
+
+/* The macros below define the immediate range for CONST_INTS on
+   the 64-bit port.  Constants in this range can be loaded in three
+   instructions using a ldil/ldo/depdi sequence.  Constants outside
+   this range are forced to the constant pool prior to reload.  */
+
+#define MAX_LEGIT_64BIT_CONST_INT ((HOST_WIDE_INT) 32 << 31)
+#define MIN_LEGIT_64BIT_CONST_INT ((HOST_WIDE_INT) -32 << 31)
+#define LEGITIMATE_64BIT_CONST_INT_P(X) \
+  ((X) >= MIN_LEGIT_64BIT_CONST_INT && (X) < MAX_LEGIT_64BIT_CONST_INT)
+
+/* Target flags set on a symbol_ref.  */
+
+/* Set by ASM_OUTPUT_SYMBOL_REF when a symbol_ref is output.  */
+#define SYMBOL_FLAG_REFERENCED (1 << SYMBOL_FLAG_MACH_DEP_SHIFT)
+#define SYMBOL_REF_REFERENCED_P(RTX) \
+  ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_REFERENCED) != 0)
+
+/* Defines for constraints.md.  */
+
+/* Return 1 iff OP is a scaled or unscaled index address.  */
+#define IS_INDEX_ADDR_P(OP) \
+  (GET_CODE (OP) == PLUS				\
+   && GET_MODE (OP) == Pmode				\
+   && (GET_CODE (XEXP (OP, 0)) == MULT			\
+       || GET_CODE (XEXP (OP, 1)) == MULT		\
+       || (REG_P (XEXP (OP, 0))				\
+	   && REG_P (XEXP (OP, 1)))))
+
+/* Return 1 iff OP is a LO_SUM DLT address.  */
+#define IS_LO_SUM_DLT_ADDR_P(OP) \
+  (GET_CODE (OP) == LO_SUM				\
+   && GET_MODE (OP) == Pmode				\
+   && REG_P (XEXP (OP, 0))				\
+   && REG_OK_FOR_BASE_P (XEXP (OP, 0))			\
+   && GET_CODE (XEXP (OP, 1)) == UNSPEC)
+
+/* Nonzero if 14-bit offsets can be used for all loads and stores.
+   This is not possible when generating PA 1.x code as floating point
+   loads and stores only support 5-bit offsets.  Note that we do not
+   forbid the use of 14-bit offsets for integer modes.  Instead, we
+   use secondary reloads to fix REG+D memory addresses for integer
+   mode floating-point loads and stores.
+
+   FIXME: the ELF32 linker clobbers the LSB of the FP register number
+   in PA 2.0 floating-point insns with long displacements.  This is
+   because R_PARISC_DPREL14WR and other relocations like it are not
+   yet supported by GNU ld.  For now, we reject long displacements
+   on this target.  */
+
+#define INT14_OK_STRICT \
+  (TARGET_SOFT_FLOAT                                                   \
+   || TARGET_DISABLE_FPREGS                                            \
+   || (TARGET_PA_20 && !TARGET_ELF32))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) \
+  (REGNO (X) && (REGNO (X) < 32 				\
+   || REGNO (X) == FRAME_POINTER_REGNUM				\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+  (REGNO (X) && (REGNO (X) < 32 				\
+   || REGNO (X) == FRAME_POINTER_REGNUM				\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define STRICT_REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#define VAL_5_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x10 < 0x20)
+#define INT_5_BITS(X) VAL_5_BITS_P (INTVAL (X))
+
+#define VAL_U5_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) < 0x20)
+#define INT_U5_BITS(X) VAL_U5_BITS_P (INTVAL (X))
+
+#define VAL_U6_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) < 0x40)
+#define INT_U6_BITS(X) VAL_U6_BITS_P (INTVAL (X))
+
+#define VAL_11_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x400 < 0x800)
+#define INT_11_BITS(X) VAL_11_BITS_P (INTVAL (X))
+
+#define VAL_14_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x2000 < 0x4000)
+#define INT_14_BITS(X) VAL_14_BITS_P (INTVAL (X))
+
+#if HOST_BITS_PER_WIDE_INT > 32
+#define VAL_32_BITS_P(X) \
+  ((unsigned HOST_WIDE_INT)(X) + ((unsigned HOST_WIDE_INT) 1 << 31)    \
+   < (unsigned HOST_WIDE_INT) 2 << 31)
+#else
+#define VAL_32_BITS_P(X) 1
+#endif
+#define INT_32_BITS(X) VAL_32_BITS_P (INTVAL (X))
+
+/* These are the modes that we allow for scaled indexing.  */
+#define MODE_OK_FOR_SCALED_INDEXING_P(MODE) \
+  ((TARGET_64BIT && (MODE) == DImode)					\
+   || (MODE) == SImode							\
+   || (MODE) == HImode							\
+   || (MODE) == SFmode							\
+   || (MODE) == DFmode)
+
+/* These are the modes that we allow for unscaled indexing.  */
+#define MODE_OK_FOR_UNSCALED_INDEXING_P(MODE) \
+  ((TARGET_64BIT && (MODE) == DImode)					\
+   || (MODE) == SImode							\
+   || (MODE) == HImode							\
+   || (MODE) == QImode							\
+   || (MODE) == SFmode							\
+   || (MODE) == DFmode)
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(AD, MODE, OPNUM, TYPE, IND_L, WIN) 	     \
+do {									     \
+  rtx new_ad = pa_legitimize_reload_address (AD, MODE, OPNUM, TYPE, IND_L);  \
+  if (new_ad)								     \
+    {									     \
+      AD = new_ad;							     \
+      goto WIN;								     \
+    }									     \
+} while (0)
+
+
+#define TARGET_ASM_SELECT_SECTION  pa_select_section
+
+/* Return a nonzero value if DECL has a section attribute.  */
+#define IN_NAMED_SECTION_P(DECL) \
+  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
+   && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variable or
+   function named by the symbol (such as what section it is in).
+
+   The macro definition, if any, is executed immediately after the
+   rtl for DECL or other node is created.
+   The value of the rtl will be a `mem' whose address is a
+   `symbol_ref'.
+
+   The usual thing for this macro to do is to a flag in the
+   `symbol_ref' (such as `SYMBOL_REF_FLAG') or to store a modified
+   name string in the `symbol_ref' (if one bit is not enough
+   information).
+
+   On the HP-PA we use this to indicate if a symbol is in text or
+   data space.  Also, function labels need special treatment.  */
+
+#define TEXT_SPACE_P(DECL)\
+  (TREE_CODE (DECL) == FUNCTION_DECL					\
+   || (TREE_CODE (DECL) == VAR_DECL					\
+       && TREE_READONLY (DECL) && ! TREE_SIDE_EFFECTS (DECL)		\
+       && (! DECL_INITIAL (DECL) || ! pa_reloc_needed (DECL_INITIAL (DECL))) \
+       && !flag_pic)							\
+   || CONSTANT_CLASS_P (DECL))
+
+#define FUNCTION_NAME_P(NAME)  (*(NAME) == '@')
+
+/* Specify the machine mode that this machine uses for the index in the
+   tablejump instruction.  We use a 32-bit absolute address for non-pic code,
+   and a 32-bit offset for 32 and 64-bit pic code.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Jump tables must be 32-bit aligned, no matter the size of the element.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* Higher than the default as we prefer to use simple move insns
+   (better scheduling and delay slot filling) and because our
+   built-in block move is really a 2X unrolled loop. 
+
+   Believe it or not, this has to be big enough to allow for copying all
+   arguments passed in registers to avoid infinite recursion during argument
+   setup for a function call.  Why?  Consider how we copy the stack slots
+   reserved for parameters when they may be trashed by a call.  */
+#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode word_mode
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point, CCFPmode
+   should be used.  CC_NOOVmode should be used when the first operand is a
+   PLUS, MINUS, or NEG.  CCmode should be used when no special processing is
+   needed.  */
+#define SELECT_CC_MODE(OP,X,Y) \
+  (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT ? CCFPmode : CCmode)    \
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE SImode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+#define NO_FUNCTION_CSE
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Adjust the cost of branches.  */
+#define BRANCH_COST(speed_p, predictable_p) (pa_cpu == PROCESSOR_8000 ? 2 : 1)
+
+/* Handling the special cases is going to get too complicated for a macro,
+   just call `pa_adjust_insn_length' to do the real work.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = pa_adjust_insn_length ((INSN), (LENGTH)))
+
+/* Millicode insns are actually function calls with some special
+   constraints on arguments and register usage.
+
+   Millicode calls always expect their arguments in the integer argument
+   registers, and always return their result in %r29 (ret1).  They
+   are expected to clobber their arguments, %r1, %r29, and the return
+   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
+
+   This macro tells reorg that the references to arguments and
+   millicode calls do not appear to happen until after the millicode call.
+   This allows reorg to put insns which set the argument registers into the
+   delay slot of the millicode call -- thus they act more like traditional
+   CALL_INSNs.
+
+   Note we cannot consider side effects of the insn to be delayed because
+   the branch and link insn will clobber the return pointer.  If we happened
+   to use the return pointer in the delay slot of the call, then we lose.
+
+   get_attr_type will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+#define INSN_REFERENCES_ARE_DELAYED(X) (pa_insn_refs_are_delayed (X))
+
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+
+#define ASM_COMMENT_START ";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+
+#define ASM_OUTPUT_LABEL(FILE,NAME) \
+  do {							\
+    assemble_name ((FILE), (NAME));			\
+    if (TARGET_GAS)					\
+      fputs (":\n", (FILE));				\
+    else						\
+      fputc ('\n', (FILE));				\
+  } while (0)
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  do {					\
+    const char *xname = (NAME);		\
+    if (FUNCTION_NAME_P (NAME))		\
+      xname += 1;			\
+    if (xname[0] == '*')		\
+      xname += 1;			\
+    else				\
+      fputs (user_label_prefix, FILE);	\
+    fputs (xname, FILE);		\
+  } while (0)
+
+/* This how we output the symbol_ref X.  */
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE,X) \
+  do {                                                 \
+    SYMBOL_REF_FLAGS (X) |= SYMBOL_FLAG_REFERENCED;    \
+    assemble_name (FILE, XSTR (X, 0));                 \
+  } while (0)
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%c$%s%04ld", (PREFIX)[0], (PREFIX) + 1, (long)(NUM))
+
+/* Output the definition of a compiler-generated label named NAME.  */
+
+#define ASM_OUTPUT_INTERNAL_LABEL(FILE,NAME) \
+  do {							\
+    assemble_name_raw ((FILE), (NAME));			\
+    if (TARGET_GAS)					\
+      fputs (":\n", (FILE));				\
+    else						\
+      fputc ('\n', (FILE));				\
+  } while (0)
+
+#define TARGET_ASM_GLOBALIZE_LABEL pa_globalize_label
+
+#define ASM_OUTPUT_ASCII(FILE, P, SIZE)  \
+  pa_output_ascii ((FILE), (P), (SIZE))
+
+/* Jump tables are always placed in the text section.  Technically, it
+   is possible to put them in the readonly data section.  This has the
+   benefit of getting the table out of .text and reducing branch lengths
+   as a result.
+
+   The downside is that an additional insn (addil) is needed to access
+   the table when generating PIC code.  The address difference table
+   also has to use 32-bit pc-relative relocations.  Currently, GAS does
+   not support these relocations, although it is easily modified to do
+   this operation.
+
+   The table entries need to look like "$L1+(.+8-$L0)-$PIC_pcrel$0"
+   when using ELF GAS.  A simple difference can be used when using
+   SOM GAS or the HP assembler.  The final downside is GDB complains
+   about the nesting of the label for the table when debugging.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  fprintf (FILE, "\t.word L$%04d\n", VALUE)
+
+/* This is how to output an element of a case-vector that is relative. 
+   Since we always place jump tables in the text section, the difference
+   is absolute and requires no relocation.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
+  fprintf (FILE, "\t.word L$%04d-L$%04d\n", VALUE, REL)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    fprintf (FILE, "\t.align %d\n", (1<<(LOG)))
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.blockz "HOST_WIDE_INT_PRINT_UNSIGNED"\n",		\
+	   (unsigned HOST_WIDE_INT)(SIZE))
+
+/* This says how to output an assembler line to define an uninitialized
+   global variable with size SIZE (in bytes) and alignment ALIGN (in bits).
+   This macro exists to properly support languages like C++ which do not
+   have common data.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_bss (FILE, NAME, SIZE, ALIGN)
+  
+/* This says how to output an assembler line to define a global common symbol
+   with size SIZE (in bytes) and alignment ALIGN (in bits).  */
+
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)  		\
+  pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN)
+
+/* This says how to output an assembler line to define a local common symbol
+   with size SIZE (in bytes) and alignment ALIGN (in bits).  This macro
+   controls how the assembler definitions of uninitialized static variables
+   are output.  */
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN)
+  
+/* All HP assemblers use "!" to separate logical lines.  */
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '!')
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.
+
+   On the HP-PA, the CODE can be `r', meaning this is a register-only operand
+   and an immediate zero should be represented as `r0'.
+
+   Several % codes are defined:
+   O an operation
+   C compare conditions
+   N extract conditions
+   M modifier to handle preincrement addressing for memory refs.
+   F modifier to handle preincrement addressing for fp memory refs */
+
+#define PRINT_OPERAND(FILE, X, CODE) pa_print_operand (FILE, X, CODE)
+
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+{ rtx addr = ADDR;							\
+  switch (GET_CODE (addr))						\
+    {									\
+    case REG:								\
+      fprintf (FILE, "0(%s)", reg_names [REGNO (addr)]);		\
+      break;								\
+    case PLUS:								\
+      gcc_assert (GET_CODE (XEXP (addr, 1)) == CONST_INT);		\
+      fprintf (FILE, "%d(%s)", (int)INTVAL (XEXP (addr, 1)),		\
+	       reg_names [REGNO (XEXP (addr, 0))]);			\
+      break;								\
+    case LO_SUM:							\
+      if (!symbolic_operand (XEXP (addr, 1), VOIDmode))			\
+	fputs ("R'", FILE);						\
+      else if (flag_pic == 0)						\
+	fputs ("RR'", FILE);						\
+      else								\
+	fputs ("RT'", FILE);						\
+      pa_output_global_address (FILE, XEXP (addr, 1), 0);		\
+      fputs ("(", FILE);						\
+      output_operand (XEXP (addr, 0), 0);				\
+      fputs (")", FILE);						\
+      break;								\
+    case CONST_INT:							\
+      fprintf (FILE, HOST_WIDE_INT_PRINT_DEC "(%%r0)", INTVAL (addr));	\
+      break;								\
+    default:								\
+      output_addr_const (FILE, addr);					\
+    }}
+
+
+/* Find the return address associated with the frame given by
+   FRAMEADDR.  */
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)				 \
+  (pa_return_addr_rtx (COUNT, FRAMEADDR))
+
+/* Used to mask out junk bits from the return address, such as
+   processor state, interrupt status, condition codes and the like.  */
+#define MASK_RETURN_ADDR						\
+  /* The privilege level is in the two low order bits, mask em out	\
+     of the return address.  */						\
+  (GEN_INT (-4))
+
+/* We need a libcall to canonicalize function pointers on TARGET_ELF32.  */
+#define CANONICALIZE_FUNCPTR_FOR_COMPARE_LIBCALL \
+  "__canonicalize_funcptr_for_compare"
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+/* The maximum offset in bytes for a PA 1.X pc-relative call to the
+   head of the preceding stub table.  The selected offsets have been
+   chosen so that approximately one call stub is allocated for every
+   86.7 instructions.  A long branch stub is two instructions when
+   not generating PIC code.  For HP-UX and ELF targets, PIC stubs are
+   seven and four instructions, respectively.  */  
+#define MAX_PCREL17F_OFFSET \
+  (flag_pic ? (TARGET_HPUX ? 198164 : 221312) : 240000)
diff --git a/gcc-4.9/gcc/config/pa/pa.md b/gcc-4.9/gcc/config/pa/pa.md
new file mode 100644
index 000000000..e55d0b86b
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa.md
@@ -0,0 +1,9855 @@
+;;- Machine description for HP PA-RISC architecture for GCC compiler
+;;   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+;;   Contributed by the Center for Software Science at the University
+;;   of Utah.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This machine description is inspired by sparc.md and to a lesser
+;; extent mips.md.
+
+;; Possible improvements:
+;;
+;; * With PA1.1, most computational instructions can conditionally nullify
+;;   the execution of the following instruction.  A nullified instruction
+;;   does not cause the instruction pipeline to stall, making it a very
+;;   efficient alternative to e.g. branching or conditional moves.
+;;
+;;   Nullification is performed conditionally based on the outcome of a
+;;   test specified in the opcode.  The test result is stored in PSW[N]
+;;   and can only be used to nullify the instruction following immediately
+;;   after the test.  For example:
+;;
+;;	ldi 10,%r26
+;;	ldi 5,%r25
+;;	sub,< %r26,%r25,%r28
+;;	sub   %r28,%r25,%r28	; %r28 == 0
+;;	sub,> %r26,%r25,%r29
+;;	sub   %r29,%r25,%r29	; %r29 == 5
+;;
+;;   This could be tricky to implement because the result of the test has
+;;   to be propagated one instruction forward, which, in the worst case,
+;;   would involve (1) adding a fake register for PSW[N]; (2) adding the
+;;   variants of the computational instructions that set or consume this
+;;   fake register.  The cond_exec infrastructure is probably not helpful
+;;   for this.
+;;
+;; * PA-RISC includes a set of conventions for branch instruction usage
+;;   to indicate whether a particular branch is more likely to be taken
+;;   or not taken.  For example, the prediction for CMPB instructions
+;;   (CMPB,cond,n r1,r2,target) depends on the direction of the branch
+;;   (forward or backward) and on the order of the operands:
+;;
+;;     | branch    | operand  | branch     |
+;;     | direction | compare  | prediction |
+;;     +-----------+----------+------------+
+;;     | backward  | r1 < r2  | taken      |
+;;     | backward  | r1 >= r2 | not taken  |
+;;     | forward   | r1 < r2  | not taken  |
+;;     | forward   | r1 >= r2 | taken      |
+;;    
+;;   By choosing instructions and operand order carefully, the compiler
+;;   could give the CPU branch predictor some help.
+;;   
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Uses of UNSPEC in this file:
+
+(define_c_enum "unspec"
+  [UNSPEC_CFFC		; canonicalize_funcptr_for_compare
+   UNSPEC_GOTO		; indirect_goto
+   UNSPEC_DLTIND14R
+   UNSPEC_TP
+   UNSPEC_TLSGD
+   UNSPEC_TLSLDM
+   UNSPEC_TLSLDO
+   UNSPEC_TLSLDBASE
+   UNSPEC_TLSIE
+   UNSPEC_TLSLE 
+   UNSPEC_TLSGD_PIC
+   UNSPEC_TLSLDM_PIC
+   UNSPEC_TLSIE_PIC
+  ])
+
+;; UNSPEC_VOLATILE:
+
+(define_c_enum "unspecv"
+  [UNSPECV_BLOCKAGE	; blockage
+   UNSPECV_DCACHE	; dcacheflush
+   UNSPECV_ICACHE	; icacheflush
+   UNSPECV_OPC		; outline_prologue_call
+   UNSPECV_OEC		; outline_epilogue_call
+   UNSPECV_LONGJMP	; builtin_longjmp
+  ])
+
+;; Maximum pc-relative branch offsets.
+
+;; These numbers are a bit smaller than the maximum allowable offsets
+;; so that a few instructions may be inserted before the actual branch.
+
+(define_constants
+  [(MAX_12BIT_OFFSET     8184)	; 12-bit branch
+   (MAX_17BIT_OFFSET   262100)	; 17-bit branch
+  ])
+
+;; Mode and code iterators
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; This attribute defines the condition prefix for word and double word
+;; add, compare, subtract and logical instructions.
+(define_mode_attr dwc [(SI "") (DI "*")])
+
+;; Insn type.  Used to default other attribute values.
+
+;; type "unary" insns have one input operand (1) and one output operand (0)
+;; type "binary" insns have two input operands (1,2) and one output (0)
+
+(define_attr "type"
+  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload"
+  (const_string "binary"))
+
+(define_attr "pa_combine_type"
+  "fmpy,faddsub,uncond_branch,addmove,none"
+  (const_string "none"))
+
+;; Processor type (for scheduling, not code generation) -- this attribute
+;; must exactly match the processor_type enumeration in pa.h.
+;;
+;; FIXME: Add 800 scheduling for completeness?
+
+(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")))
+
+;; Length (in # of bytes).
+(define_attr "length" ""
+  (cond [(eq_attr "type" "load,fpload")
+	 (if_then_else (match_operand 1 "symbolic_memory_operand" "")
+		       (const_int 8) (const_int 4))
+
+	 (eq_attr "type" "store,fpstore")
+	 (if_then_else (match_operand 0 "symbolic_memory_operand" "")
+		       (const_int 8) (const_int 4))
+
+	 (eq_attr "type" "binary,shift,nullshift")
+	 (if_then_else (match_operand 2 "arith14_operand" "")
+		       (const_int 4) (const_int 12))
+
+	 (eq_attr "type" "move,unary,shift,nullshift")
+	 (if_then_else (match_operand 1 "arith14_operand" "")
+		       (const_int 4) (const_int 8))]
+
+	(const_int 4)))
+
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")])
+
+;; Attributes for instruction and branch scheduling
+
+;; For conditional branches. Frame related instructions are not allowed
+;; because they confuse the unwind support.
+(define_attr "in_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+		     (eq_attr "length" "4")
+		     (not (match_test "RTX_FRAME_RELATED_P (insn)")))
+		(const_string "true")
+		(const_string "false")))
+
+;; Disallow instructions which use the FPU since they will tie up the FPU
+;; even if the instruction is nullified.
+(define_attr "in_nullified_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")
+		     (eq_attr "length" "4")
+		     (not (match_test "RTX_FRAME_RELATED_P (insn)")))
+		(const_string "true")
+		(const_string "false")))
+
+;; For calls and millicode calls.  Allow unconditional branches in the
+;; delay slot.
+(define_attr "in_call_delay" "false,true"
+  (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+	      (eq_attr "length" "4")
+	      (not (match_test "RTX_FRAME_RELATED_P (insn)")))
+	   (const_string "true")
+	 (eq_attr "type" "uncond_branch")
+	   (if_then_else (match_test "TARGET_JUMP_IN_DELAY")
+			 (const_string "true")
+			 (const_string "false"))]
+	(const_string "false")))
+
+
+;; Call delay slot description.
+(define_delay (eq_attr "type" "call")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+;; Sibcall delay slot description.
+(define_delay (eq_attr "type" "sibcall")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+;; Millicode call delay slot description.
+(define_delay (eq_attr "type" "milli")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+;; Return and other similar instructions.
+(define_delay (eq_attr "type" "branch,parallel_branch")
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
+
+;; Floating point conditional branch delay slot description.
+(define_delay (eq_attr "type" "fbranch")
+  [(eq_attr "in_branch_delay" "true")
+   (eq_attr "in_nullified_branch_delay" "true")
+   (nil)])
+
+;; Integer conditional branch delay slot description.
+;; Nullification of conditional branches on the PA is dependent on the
+;; direction of the branch.  Forward branches nullify true and
+;; backward branches nullify false.  If the direction is unknown
+;; then nullification is not allowed.
+(define_delay (eq_attr "type" "cbranch")
+  [(eq_attr "in_branch_delay" "true")
+   (and (eq_attr "in_nullified_branch_delay" "true")
+	(attr_flag "forward"))
+   (and (eq_attr "in_nullified_branch_delay" "true")
+	(attr_flag "backward"))])
+
+(define_delay (and (eq_attr "type" "uncond_branch")
+		   (not (match_test "pa_following_call (insn)")))
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
+
+;; Memory. Disregarding Cache misses, the Mustang memory times are:
+;; load: 2, fpload: 3
+;; store, fpstore: 3, no D-cache operations should be scheduled.
+
+;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.
+;; Timings:
+;; Instruction	Time	Unit	Minimum Distance (unit contention)
+;; fcpy		3	ALU	2
+;; fabs		3	ALU	2
+;; fadd		3	ALU	2
+;; fsub		3	ALU	2
+;; fcmp		3	ALU	2
+;; fcnv		3	ALU	2
+;; fmpyadd	3	ALU,MPY	2
+;; fmpysub	3	ALU,MPY 2
+;; fmpycfxt	3	ALU,MPY 2
+;; fmpy		3	MPY	2
+;; fmpyi	3	MPY	2
+;; fdiv,sgl	10	MPY	10
+;; fdiv,dbl	12	MPY	12
+;; fsqrt,sgl	14	MPY	14
+;; fsqrt,dbl	18	MPY	18
+;;
+;; We don't model fmpyadd/fmpysub properly as those instructions
+;; keep both the FP ALU and MPY units busy.  Given that these
+;; processors are obsolete, I'm not going to spend the time to
+;; model those instructions correctly.
+
+(define_automaton "pa700")
+(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")
+
+(define_insn_reservation "W0" 4
+  (and (eq_attr "type" "fpcc")
+       (eq_attr "cpu" "700"))
+  "fpalu_700*2")
+
+(define_insn_reservation "W1" 3
+  (and (eq_attr "type" "fpalu")
+       (eq_attr "cpu" "700"))
+  "fpalu_700*2")
+
+(define_insn_reservation "W2" 3
+  (and (eq_attr "type" "fpmulsgl,fpmuldbl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*2")
+
+(define_insn_reservation "W3" 10
+  (and (eq_attr "type" "fpdivsgl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*10")
+
+(define_insn_reservation "W4" 12
+  (and (eq_attr "type" "fpdivdbl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*12")
+
+(define_insn_reservation "W5" 14
+  (and (eq_attr "type" "fpsqrtsgl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*14")
+
+(define_insn_reservation "W6" 18
+  (and (eq_attr "type" "fpsqrtdbl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*18")
+
+(define_insn_reservation "W7" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "700"))
+  "mem_700")
+
+(define_insn_reservation "W8" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "700"))
+  "mem_700")
+
+(define_insn_reservation "W9" 3
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "700"))
+  "mem_700*3")
+
+(define_insn_reservation "W10" 3
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "700"))
+  "mem_700*3")
+
+(define_insn_reservation "W11" 5
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "700"))
+  "mem_700*5")
+
+(define_insn_reservation "W12" 6
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "700"))
+  "mem_700*6")
+
+(define_insn_reservation "W13" 1
+  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore,fpstore_load,store_fpload")
+       (eq_attr "cpu" "700"))
+  "dummy_700")
+
+;; We have a bypass for all computations in the FP unit which feed an
+;; FP store as long as the sizes are the same.
+(define_bypass 2 "W1,W2" "W10,W11" "pa_fpstore_bypass_p")
+(define_bypass 9 "W3" "W10,W11" "pa_fpstore_bypass_p")
+(define_bypass 11 "W4" "W10,W11" "pa_fpstore_bypass_p")
+(define_bypass 13 "W5" "W10,W11" "pa_fpstore_bypass_p")
+(define_bypass 17 "W6" "W10,W11" "pa_fpstore_bypass_p")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 4 "W8,W12" "W10,W11" "pa_fpstore_bypass_p")
+
+;; Function units for the 7100 and 7150.  The 7100/7150 can dual-issue
+;; floating point computations with non-floating point computations (fp loads
+;; and stores are not fp computations).
+;;
+;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also
+;; take two cycles, during which no Dcache operations should be scheduled.
+;; Any special cases are handled in pa_adjust_cost.  The 7100, 7150 and 7100LC
+;; all have the same memory characteristics if one disregards cache misses.
+;;
+;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
+;; There's no value in modeling the ALU and MUL separately though
+;; since there can never be a functional unit conflict given the
+;; latency and issue rates for those units.
+;;
+;; Timings:
+;; Instruction	Time	Unit	Minimum Distance (unit contention)
+;; fcpy		2	ALU	1
+;; fabs		2	ALU	1
+;; fadd		2	ALU	1
+;; fsub		2	ALU	1
+;; fcmp		2	ALU	1
+;; fcnv		2	ALU	1
+;; fmpyadd	2	ALU,MPY	1
+;; fmpysub	2	ALU,MPY 1
+;; fmpycfxt	2	ALU,MPY 1
+;; fmpy		2	MPY	1
+;; fmpyi	2	MPY	1
+;; fdiv,sgl	8	DIV	8
+;; fdiv,dbl	15	DIV	15
+;; fsqrt,sgl	8	DIV	8
+;; fsqrt,dbl	15	DIV	15
+
+(define_automaton "pa7100")
+(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
+
+(define_insn_reservation "X0" 2
+  (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+       (eq_attr "cpu" "7100"))
+  "f_7100,fpmac_7100")
+
+(define_insn_reservation "X1" 8
+  (and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+       (eq_attr "cpu" "7100"))
+  "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")
+
+(define_insn_reservation "X2" 15
+  (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+       (eq_attr "cpu" "7100"))
+  "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
+
+(define_insn_reservation "X3" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100")
+
+(define_insn_reservation "X4" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100")
+
+(define_insn_reservation "X5" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100")
+
+(define_insn_reservation "X6" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100")
+
+(define_insn_reservation "X7" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100*3")
+
+(define_insn_reservation "X8" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100*3")
+
+(define_insn_reservation "X9" 1
+  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,fpstore_load,store_fpload")
+       (eq_attr "cpu" "7100"))
+  "i_7100")
+
+;; We have a bypass for all computations in the FP unit which feed an
+;; FP store as long as the sizes are the same.
+(define_bypass 1 "X0" "X6,X7" "pa_fpstore_bypass_p")
+(define_bypass 7 "X1" "X6,X7" "pa_fpstore_bypass_p")
+(define_bypass 14 "X2" "X6,X7" "pa_fpstore_bypass_p")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 3 "X4,X8" "X6,X7" "pa_fpstore_bypass_p")
+
+;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
+;; There's no value in modeling the ALU and MUL separately though
+;; since there can never be a functional unit conflict that
+;; can be avoided given the latency, issue rates and mandatory
+;; one cycle cpu-wide lock for a double precision fp multiply.
+;;
+;; Timings:
+;; Instruction	Time	Unit	Minimum Distance (unit contention)
+;; fcpy		2	ALU	1
+;; fabs		2	ALU	1
+;; fadd		2	ALU	1
+;; fsub		2	ALU	1
+;; fcmp		2	ALU	1
+;; fcnv		2	ALU	1
+;; fmpyadd,sgl	2	ALU,MPY	1
+;; fmpyadd,dbl	3	ALU,MPY	2
+;; fmpysub,sgl	2	ALU,MPY 1
+;; fmpysub,dbl	3	ALU,MPY 2
+;; fmpycfxt,sgl	2	ALU,MPY 1
+;; fmpycfxt,dbl	3	ALU,MPY 2
+;; fmpy,sgl	2	MPY	1
+;; fmpy,dbl	3	MPY	2
+;; fmpyi	3	MPY	2
+;; fdiv,sgl	8	DIV	8
+;; fdiv,dbl	15	DIV	15
+;; fsqrt,sgl	8	DIV	8
+;; fsqrt,dbl	15	DIV	15
+;;
+;; The PA7200 is just like the PA7100LC except that there is
+;; no store-store penalty.
+;;
+;; The PA7300 is just like the PA7200 except that there is
+;; no store-load penalty.
+;;
+;; Note there are some aspects of the 7100LC we are not modeling
+;; at the moment.  I'll be reviewing the 7100LC scheduling info
+;; shortly and updating this description.
+;;
+;;   load-load pairs
+;;   store-store pairs
+;;   other issue modeling
+
+(define_automaton "pa7100lc")
+(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
+(define_cpu_unit "fpmac_7100lc" "pa7100lc")
+(define_cpu_unit "mem_7100lc" "pa7100lc")
+
+;; Double precision multiplies lock the entire CPU for one
+;; cycle.  There is no way to avoid this lock and trying to
+;; schedule around the lock is pointless and thus there is no
+;; value in trying to model this lock.
+;;
+;; Not modeling the lock allows us to treat fp multiplies just
+;; like any other FP alu instruction.  It allows for a smaller
+;; DFA and may reduce register pressure.
+(define_insn_reservation "Y0" 2
+  (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "f_7100lc,fpmac_7100lc")
+
+;; fp division and sqrt instructions lock the entire CPU for
+;; 7 cycles (single precision) or 14 cycles (double precision).
+;; There is no way to avoid this lock and trying to schedule
+;; around the lock is pointless and thus there is no value in
+;; trying to model this lock.  Not modeling the lock allows
+;; for a smaller DFA and may reduce register pressure.
+(define_insn_reservation "Y1" 1
+  (and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "f_7100lc")
+
+(define_insn_reservation "Y2" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y3" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y4" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y5" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y6" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc*3")
+
+(define_insn_reservation "Y7" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc*3")
+
+(define_insn_reservation "Y8" 1
+  (and (eq_attr "type" "shift,nullshift")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "i1_7100lc")
+
+(define_insn_reservation "Y9" 1
+  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "(i0_7100lc|i1_7100lc)")
+
+;; The 7200 has a store-load penalty
+(define_insn_reservation "Y10" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y11" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y12" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc,i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y13" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc,i1_7100lc+mem_7100lc")
+
+;; The 7300 has no penalty for store-store or store-load
+(define_insn_reservation "Y14" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc")
+
+(define_insn_reservation "Y15" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc")
+
+(define_insn_reservation "Y16" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc,i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y17" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc,i1_7100lc+mem_7100lc")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 3 "Y3,Y7,Y13,Y17" "Y5,Y6,Y11,Y12,Y15,Y16" "pa_fpstore_bypass_p")
+
+;; Scheduling for the PA8000 is somewhat different than scheduling for a
+;; traditional architecture.
+;;
+;; The PA8000 has a large (56) entry reorder buffer that is split between
+;; memory and non-memory operations.
+;;
+;; The PA8000 can issue two memory and two non-memory operations per cycle to
+;; the function units, with the exception of branches and multi-output
+;; instructions.  The PA8000 can retire two non-memory operations per cycle
+;; and two memory operations per cycle, only one of which may be a store.
+;;
+;; Given the large reorder buffer, the processor can hide most latencies.
+;; According to HP, they've got the best results by scheduling for retirement
+;; bandwidth with limited latency scheduling for floating point operations.
+;; Latency for integer operations and memory references is ignored.
+;;
+;;
+;; We claim floating point operations have a 2 cycle latency and are
+;; fully pipelined, except for div and sqrt which are not pipelined and
+;; take from 17 to 31 cycles to complete.
+;;
+;; It's worth noting that there is no way to saturate all the functional
+;; units on the PA8000 as there is not enough issue bandwidth.
+
+(define_automaton "pa8000")
+(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")
+(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")
+(define_cpu_unit "store_8000" "pa8000")
+(define_cpu_unit "f0_8000, f1_8000" "pa8000")
+(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")
+(define_reservation "inm_8000" "inm0_8000 | inm1_8000")
+(define_reservation "im_8000" "im0_8000 | im1_8000")
+(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")
+(define_reservation "rm_8000" "rm0_8000 | rm1_8000")
+(define_reservation "f_8000" "f0_8000 | f1_8000")
+(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000")
+
+;; We can issue any two memops per cycle, but we can only retire
+;; one memory store per cycle.  We assume that the reorder buffer
+;; will hide any memory latencies per HP's recommendation.
+(define_insn_reservation "Z0" 0
+  (and
+    (eq_attr "type" "load,fpload")
+    (eq_attr "cpu" "8000"))
+  "im_8000,rm_8000")
+
+(define_insn_reservation "Z1" 0
+  (and
+    (eq_attr "type" "store,fpstore")
+    (eq_attr "cpu" "8000"))
+  "im_8000,rm_8000+store_8000")
+
+(define_insn_reservation "Z2" 0
+  (and (eq_attr "type" "fpstore_load,store_fpload")
+       (eq_attr "cpu" "8000"))
+  "im_8000,rm_8000+store_8000,im_8000,rm_8000")
+
+;; We can issue and retire two non-memory operations per cycle with
+;; a few exceptions (branches).  This group catches those we want
+;; to assume have zero latency.
+(define_insn_reservation "Z3" 0
+  (and
+    (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl,fpstore_load,store_fpload")
+    (eq_attr "cpu" "8000"))
+  "inm_8000,rnm_8000")
+
+;; Branches use both slots in the non-memory issue and
+;; retirement unit.
+(define_insn_reservation "Z4" 0
+  (and
+    (eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+    (eq_attr "cpu" "8000"))
+  "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
+
+;; We partial latency schedule the floating point units.
+;; They can issue/retire two at a time in the non-memory
+;; units.  We fix their latency at 2 cycles and they
+;; are fully pipelined.
+(define_insn_reservation "Z5" 1
+ (and
+   (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+   (eq_attr "cpu" "8000"))
+ "inm_8000,f_8000,rnm_8000")
+
+;; The fdivsqrt units are not pipelined and have a very long latency.  
+;; To keep the DFA from exploding, we do not show all the
+;; reservations for the divsqrt unit.
+(define_insn_reservation "Z6" 17
+ (and
+   (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+   (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+(define_insn_reservation "Z7" 31
+ (and
+   (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+   (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Atomic instructions
+
+;; All memory loads and stores access storage atomically except
+;; for one exception.  The STORE BYTES, STORE DOUBLE BYTES, and
+;; doubleword loads and stores are not guaranteed to be atomic
+;; when referencing the I/O address space.
+
+;; Implement atomic DImode load using 64-bit floating point load and copy.
+
+(define_expand "atomic_loaddi"
+  [(match_operand:DI 0 "register_operand")              ;; val out
+   (match_operand:DI 1 "memory_operand")                ;; memory
+   (match_operand:SI 2 "const_int_operand")]            ;; model
+  "!TARGET_64BIT && !TARGET_SOFT_FLOAT"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+  operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+  operands[2] = gen_reg_rtx (DImode);
+  expand_mem_thread_fence (model);
+  emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1], operands[2]));
+  if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST)
+    expand_mem_thread_fence (model);
+  DONE;
+})
+
+(define_insn "atomic_loaddi_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (mem:DI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (match_operand:DI 2 "register_operand" "=&f"))]
+  "!TARGET_64BIT && !TARGET_SOFT_FLOAT"
+  "{fldds|fldd} 0(%1),%2\;{fstds|fstd} %2,-16(%%sp)\;{ldws|ldw} -16(%%sp),%0\;{ldws|ldw} -12(%%sp),%R0"
+  [(set_attr "type" "move")
+   (set_attr "length" "16")])
+
+;; Implement atomic DImode store using copy and 64-bit floating point store.
+
+(define_expand "atomic_storedi"
+  [(match_operand:DI 0 "memory_operand")                ;; memory
+   (match_operand:DI 1 "register_operand")              ;; val out
+   (match_operand:SI 2 "const_int_operand")]            ;; model
+  "!TARGET_64BIT && !TARGET_SOFT_FLOAT"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+  operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+  operands[2] = gen_reg_rtx (DImode);
+  expand_mem_thread_fence (model);
+  emit_insn (gen_atomic_storedi_1 (operands[0], operands[1], operands[2]));
+  if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST)
+    expand_mem_thread_fence (model);
+  DONE;
+})
+
+(define_insn "atomic_storedi_1"
+  [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
+        (match_operand:DI 1 "register_operand" "r"))
+   (clobber (match_operand:DI 2 "register_operand" "=&f"))]
+  "!TARGET_64BIT && !TARGET_SOFT_FLOAT"
+  "{stws|stw} %1,-16(%%sp)\;{stws|stw} %R1,-12(%%sp)\;{fldds|fldd} -16(%%sp),%2\;{fstds|fstd} %2,0(%0)"
+  [(set_attr "type" "move")
+   (set_attr "length" "16")])
+
+;; Compare instructions.
+;; This controls RTL generation and register allocation.
+
+(define_insn ""
+  [(set (reg:CCFP 0)
+	(match_operator:CCFP 2 "comparison_operator"
+			     [(match_operand:SF 0 "reg_or_0_operand" "fG")
+			      (match_operand:SF 1 "reg_or_0_operand" "fG")]))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,sgl,%Y2 %f0,%f1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+(define_insn ""
+  [(set (reg:CCFP 0)
+	(match_operator:CCFP 2 "comparison_operator"
+			     [(match_operand:DF 0 "reg_or_0_operand" "fG")
+			      (match_operand:DF 1 "reg_or_0_operand" "fG")]))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,dbl,%Y2 %f0,%f1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+;; Provide a means to emit the movccfp0 and movccfp1 optimization
+;; placeholders.  This is necessary in rare situations when a
+;; placeholder is re-emitted (see PR 8705).
+
+(define_expand "movccfp"
+  [(set (reg:CCFP 0)
+	(match_operand 0 "const_int_operand" ""))]
+  "! TARGET_SOFT_FLOAT"
+  "
+{
+  if ((unsigned HOST_WIDE_INT) INTVAL (operands[0]) > 1)
+    FAIL;
+}")
+
+;; The following patterns are optimization placeholders.  In almost
+;; all cases, the user of the condition code will be simplified and the
+;; original condition code setting insn should be eliminated.
+
+(define_insn "*movccfp0"
+  [(set (reg:CCFP 0)
+	(const_int 0))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,dbl,= %%fr0,%%fr0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+(define_insn "*movccfp1"
+  [(set (reg:CCFP 0)
+	(const_int 1))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,dbl,!= %%fr0,%%fr0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+;; scc insns.
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "reg_or_0_operand" "")
+	  (match_operand:SI 3 "arith5_operand" "")]))]
+  "!TARGET_64BIT"
+  "")
+
+;; Instruction canonicalization puts immediate operands second, which
+;; is the reverse of what we want.
+
+(define_insn "scc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 3 "comparison_operator"
+			   [(match_operand:SI 1 "reg_or_0_operand" "rM")
+			    (match_operand:SI 2 "arith11_operand" "rI")]))]
+  ""
+  "{com%I2clr|cmp%I2clr},%B3 %2,%r1,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 3 "comparison_operator"
+			   [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			    (match_operand:DI 2 "arith11_operand" "rI")]))]
+  "TARGET_64BIT"
+  "cmp%I2clr,*%B3 %2,%r1,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn "iorscc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operator:SI 3 "comparison_operator"
+				   [(match_operand:SI 1 "reg_or_0_operand" "rM")
+				    (match_operand:SI 2 "arith11_operand" "rI")])
+		(match_operator:SI 6 "comparison_operator"
+				   [(match_operand:SI 4 "reg_or_0_operand" "rM")
+				    (match_operand:SI 5 "arith11_operand" "rI")])))]
+  ""
+  "{com%I2clr|cmp%I2clr},%S3 %2,%r1,%%r0\;{com%I5clr|cmp%I5clr},%B6 %5,%r4,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operator:DI 3 "comparison_operator"
+				   [(match_operand:DI 1 "reg_or_0_operand" "rM")
+				    (match_operand:DI 2 "arith11_operand" "rI")])
+		(match_operator:DI 6 "comparison_operator"
+				   [(match_operand:DI 4 "reg_or_0_operand" "rM")
+				    (match_operand:DI 5 "arith11_operand" "rI")])))]
+  "TARGET_64BIT"
+  "cmp%I2clr,*%S3 %2,%r1,%%r0\;cmp%I5clr,*%B6 %5,%r4,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "12")])
+
+;; Combiner patterns for common operations performed with the output
+;; from an scc insn (negscc and incscc).
+(define_insn "negscc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operator:SI 3 "comparison_operator"
+	       [(match_operand:SI 1 "reg_or_0_operand" "rM")
+		(match_operand:SI 2 "arith11_operand" "rI")])))]
+  ""
+  "{com%I2clr|cmp%I2clr},%B3 %2,%r1,%0\;ldi -1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operator:DI 3 "comparison_operator"
+	       [(match_operand:DI 1 "reg_or_0_operand" "rM")
+		(match_operand:DI 2 "arith11_operand" "rI")])))]
+  "TARGET_64BIT"
+  "cmp%I2clr,*%B3 %2,%r1,%0\;ldi -1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+;; Patterns for adding/subtracting the result of a boolean expression from
+;; a register.  First we have special patterns that make use of the carry
+;; bit, and output only two instructions.  For the cases we can't in
+;; general do in two instructions, the incscc pattern at the end outputs
+;; two or three instructions.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (leu:SI (match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 3 "arith11_operand" "rI"))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub%I3 %3,%2,%%r0\;{addc|add,c} %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (leu:DI (match_operand:DI 2 "register_operand" "r")
+			 (match_operand:DI 3 "arith11_operand" "rI"))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub%I3 %3,%2,%%r0\;add,dc %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; This need only accept registers for op3, since canonicalization
+; replaces geu with gtu when op3 is an integer.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (geu:SI (match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 3 "register_operand" "r"))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub %2,%3,%%r0\;{addc|add,c} %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (geu:DI (match_operand:DI 2 "register_operand" "r")
+			 (match_operand:DI 3 "register_operand" "r"))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub %2,%3,%%r0\;add,dc %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; Match only integers for op3 here.  This is used as canonical form of the
+; geu pattern when op3 is an integer.  Don't match registers since we can't
+; make better code than the general incscc pattern.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (gtu:SI (match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 3 "int11_operand" "I"))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "addi %k3,%2,%%r0\;{addc|add,c} %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (gtu:DI (match_operand:DI 2 "register_operand" "r")
+			 (match_operand:DI 3 "int11_operand" "I"))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "addi %k3,%2,%%r0\;add,dc %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn "incscc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+ 	(plus:SI (match_operator:SI 4 "comparison_operator"
+		    [(match_operand:SI 2 "register_operand" "r,r")
+		     (match_operand:SI 3 "arith11_operand" "rI,rI")])
+		 (match_operand:SI 1 "register_operand" "0,?r")))]
+  ""
+  "@
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi 1,%0,%0
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi,tr 1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+ 	(plus:DI (match_operator:DI 4 "comparison_operator"
+		    [(match_operand:DI 2 "register_operand" "r,r")
+		     (match_operand:DI 3 "arith11_operand" "rI,rI")])
+		 (match_operand:DI 1 "register_operand" "0,?r")))]
+  "TARGET_64BIT"
+  "@
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi 1,%0,%0
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi,tr 1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (gtu:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "arith11_operand" "rI"))))]
+  ""
+  "sub%I3 %3,%2,%%r0\;{subb|sub,b} %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (gtu:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "arith11_operand" "rI"))))]
+  "TARGET_64BIT"
+  "sub%I3 %3,%2,%%r0\;sub,db %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (gtu:SI (match_operand:SI 2 "register_operand" "r")
+				    (match_operand:SI 3 "arith11_operand" "rI")))
+		  (match_operand:SI 4 "register_operand" "r")))]
+  ""
+  "sub%I3 %3,%2,%%r0\;{subb|sub,b} %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r")
+			    (gtu:DI (match_operand:DI 2 "register_operand" "r")
+				    (match_operand:DI 3 "arith11_operand" "rI")))
+		  (match_operand:DI 4 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub%I3 %3,%2,%%r0\;sub,db %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; This need only accept registers for op3, since canonicalization
+; replaces ltu with leu when op3 is an integer.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (ltu:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "sub %2,%3,%%r0\;{subb|sub,b} %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (ltu:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "register_operand" "r"))))]
+  "TARGET_64BIT"
+  "sub %2,%3,%%r0\;sub,db %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (ltu:SI (match_operand:SI 2 "register_operand" "r")
+				    (match_operand:SI 3 "register_operand" "r")))
+		  (match_operand:SI 4 "register_operand" "r")))]
+  ""
+  "sub %2,%3,%%r0\;{subb|sub,b} %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r")
+			    (ltu:DI (match_operand:DI 2 "register_operand" "r")
+				    (match_operand:DI 3 "register_operand" "r")))
+		  (match_operand:DI 4 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub %2,%3,%%r0\;sub,db %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; Match only integers for op3 here.  This is used as canonical form of the
+; ltu pattern when op3 is an integer.  Don't match registers since we can't
+; make better code than the general incscc pattern.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (leu:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "int11_operand" "I"))))]
+  ""
+  "addi %k3,%2,%%r0\;{subb|sub,b} %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (leu:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "int11_operand" "I"))))]
+  "TARGET_64BIT"
+  "addi %k3,%2,%%r0\;sub,db %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (leu:SI (match_operand:SI 2 "register_operand" "r")
+				    (match_operand:SI 3 "int11_operand" "I")))
+		  (match_operand:SI 4 "register_operand" "r")))]
+  ""
+  "addi %k3,%2,%%r0\;{subb|sub,b} %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r")
+			    (leu:DI (match_operand:DI 2 "register_operand" "r")
+				    (match_operand:DI 3 "int11_operand" "I")))
+		  (match_operand:DI 4 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "addi %k3,%2,%%r0\;sub,db %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn "decscc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,?r")
+		  (match_operator:SI 4 "comparison_operator"
+		     [(match_operand:SI 2 "register_operand" "r,r")
+		      (match_operand:SI 3 "arith11_operand" "rI,rI")])))]
+  ""
+  "@
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi -1,%0,%0
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi,tr -1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,?r")
+		  (match_operator:DI 4 "comparison_operator"
+		     [(match_operand:DI 2 "register_operand" "r,r")
+		      (match_operand:DI 3 "arith11_operand" "rI,rI")])))]
+  "TARGET_64BIT"
+  "@
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi -1,%0,%0
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi,tr -1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+; Patterns for max and min.  (There is no need for an earlyclobber in the
+; last alternative since the middle alternative will match if op0 == op1.)
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(smin:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		 (match_operand:SI 2 "arith11_operand" "r,I,M")))]
+  ""
+  "@
+  {comclr|cmpclr},> %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},> %2,%0,%%r0\;ldi %2,%0
+  {comclr|cmpclr},> %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "smindi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(smin:DI (match_operand:DI 1 "register_operand" "%0,0,r")
+		 (match_operand:DI 2 "arith11_operand" "r,I,M")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*> %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*> %2,%0,%%r0\;ldi %2,%0
+  cmpclr,*> %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "uminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(umin:SI (match_operand:SI 1 "register_operand" "%0,0")
+		 (match_operand:SI 2 "arith11_operand" "r,I")))]
+  ""
+  "@
+  {comclr|cmpclr},>> %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},>> %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "umindi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(umin:DI (match_operand:DI 1 "register_operand" "%0,0")
+		 (match_operand:DI 2 "arith11_operand" "r,I")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*>> %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*>> %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(smax:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		 (match_operand:SI 2 "arith11_operand" "r,I,M")))]
+  ""
+  "@
+  {comclr|cmpclr},< %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},< %2,%0,%%r0\;ldi %2,%0
+  {comclr|cmpclr},< %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "smaxdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(smax:DI (match_operand:DI 1 "register_operand" "%0,0,r")
+		 (match_operand:DI 2 "arith11_operand" "r,I,M")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*< %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*< %2,%0,%%r0\;ldi %2,%0
+  cmpclr,*< %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "umaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(umax:SI (match_operand:SI 1 "register_operand" "%0,0")
+		 (match_operand:SI 2 "arith11_operand" "r,I")))]
+  ""
+  "@
+  {comclr|cmpclr},<< %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},<< %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "umaxdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(umax:DI (match_operand:DI 1 "register_operand" "%0,0")
+		 (match_operand:DI 2 "arith11_operand" "r,I")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*<< %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*<< %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "or,>= %%r0,%1,%0\;subi 0,%0,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "absdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(abs:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "or,*>= %%r0,%1,%0\;subi 0,%0,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;;; Experimental conditional move patterns
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:SI 2 "reg_or_cint_move_operand" "")
+	 (match_operand:SI 3 "reg_or_cint_move_operand" "")))]
+  ""
+  "
+{
+  if (GET_MODE (XEXP (operands[1], 0)) != SImode
+      || GET_MODE (XEXP (operands[1], 0)) != GET_MODE (XEXP (operands[1], 1)))
+    FAIL;
+}")
+
+;; We used to accept any register for op1.
+;;
+;; However, it loses sometimes because the compiler will end up using
+;; different registers for op0 and op1 in some critical cases.  local-alloc
+;; will  not tie op0 and op1 because op0 is used in multiple basic blocks.
+;;
+;; If/when global register allocation supports tying we should allow any
+;; register for op1 again.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 2 "comparison_operator"
+	    [(match_operand:SI 3 "register_operand" "r,r,r,r")
+	     (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI")])
+	 (match_operand:SI 1 "reg_or_cint_move_operand" "0,J,N,K")
+	 (const_int 0)))]
+  ""
+  "@
+   {com%I4clr|cmp%I4clr},%S2 %4,%3,%%r0\;ldi 0,%0
+   {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;ldi %1,%0
+   {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;ldil L'%1,%0
+   {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;{zdepi|depwi,z} %Z1,%0"
+  [(set_attr "type" "multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "comparison_operator"
+	    [(match_operand:SI 3 "register_operand" "r,r,r,r,r,r,r,r")
+	     (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")])
+	 (match_operand:SI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K")
+	 (match_operand:SI 2 "reg_or_cint_move_operand" "r,J,N,K,0,0,0,0")))]
+  ""
+  "@
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;copy %2,%0
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;ldi %2,%0
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;ldil L'%2,%0
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;{zdepi|depwi,z} %Z2,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;copy %1,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;ldi %1,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;ldil L'%1,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;{zdepi|depwi,z} %Z1,%0"
+  [(set_attr "type" "multi,multi,multi,nullshift,multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8,8,8,8,8")])
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:DI 2 "reg_or_cint_move_operand" "")
+	 (match_operand:DI 3 "reg_or_cint_move_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  if (GET_MODE (XEXP (operands[1], 0)) != DImode
+      || GET_MODE (XEXP (operands[1], 0)) != GET_MODE (XEXP (operands[1], 1)))
+    FAIL;
+}")
+
+; We need the first constraint alternative in order to avoid
+; earlyclobbers on all other alternatives.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 2 "comparison_operator"
+	    [(match_operand:DI 3 "register_operand" "r,r,r,r,r")
+	     (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI")])
+	 (match_operand:DI 1 "reg_or_cint_move_operand" "0,r,J,N,K")
+	 (const_int 0)))]
+  "TARGET_64BIT"
+  "@
+   cmp%I4clr,*%S2 %4,%3,%%r0\;ldi 0,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;copy %1,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;ldi %1,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;ldil L'%1,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;depdi,z %z1,%0"
+  [(set_attr "type" "multi,multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8,8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 5 "comparison_operator"
+	    [(match_operand:DI 3 "register_operand" "r,r,r,r,r,r,r,r")
+	     (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")])
+	 (match_operand:DI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K")
+	 (match_operand:DI 2 "reg_or_cint_move_operand" "r,J,N,K,0,0,0,0")))]
+  "TARGET_64BIT"
+  "@
+   cmp%I4clr,*%S5 %4,%3,%%r0\;copy %2,%0
+   cmp%I4clr,*%S5 %4,%3,%%r0\;ldi %2,%0
+   cmp%I4clr,*%S5 %4,%3,%%r0\;ldil L'%2,%0
+   cmp%I4clr,*%S5 %4,%3,%%r0\;depdi,z %z2,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;copy %1,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;ldi %1,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;ldil L'%1,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;depdi,z %z1,%0"
+  [(set_attr "type" "multi,multi,multi,nullshift,multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8,8,8,8,8")])
+
+;; Conditional Branches
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:DI 1 "reg_or_0_operand" "")
+                        (match_operand:DI 2 "register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_64BIT"
+  "")
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "reg_or_0_operand" "")
+                        (match_operand:SI 2 "arith5_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:SF 1 "reg_or_0_operand" "")
+                        (match_operand:SF 2 "reg_or_0_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  pa_emit_bcond_fp (operands);
+  DONE;
+}")
+
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:DF 1 "reg_or_0_operand" "")
+                        (match_operand:DF 2 "reg_or_0_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  pa_emit_bcond_fp (operands);
+  DONE;
+}")
+
+;; Match the branch patterns.
+
+
+;; Note a long backward conditional branch with an annulled delay slot
+;; has a length of 12.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:SI 1 "reg_or_0_operand" "rM")
+			  (match_operand:SI 2 "arith5_operand" "rL")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return pa_output_cbranch (operands, 0, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Match the negated branch.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:SI 1 "reg_or_0_operand" "rM")
+			  (match_operand:SI 2 "arith5_operand" "rL")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  return pa_output_cbranch (operands, 1, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "reg_or_0_operand" "rM")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_cbranch (operands, 0, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Match the negated branch.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "reg_or_0_operand" "rM")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_cbranch (operands, 1, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "cmpib_comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "arith5_operand" "rL")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_cbranch (operands, 0, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Match the negated branch.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "cmpib_comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "arith5_operand" "rL")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_cbranch (operands, 1, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Branch on Bit patterns.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return pa_output_bb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return pa_output_bb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return pa_output_bb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return pa_output_bb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Branch on Variable Bit patterns.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return pa_output_bvb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bvb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return pa_output_bvb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bvb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return pa_output_bvb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bvb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return pa_output_bvb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_bvb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Floating point branches
+
+;; ??? Nullification is handled differently from other branches.
+;; If nullification is specified, the delay slot is nullified on any
+;; taken branch regardless of branch direction.
+(define_insn ""
+  [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  int length = get_attr_length (insn);
+  rtx xoperands[1];
+  int nullify, xdelay;
+
+  if (length < 16)
+    return \"ftest\;b%* %l0\";
+
+  if (dbr_sequence_length () == 0 || INSN_ANNULLED_BRANCH_P (insn))
+    {
+      nullify = 1;
+      xdelay = 0;
+      xoperands[0] = GEN_INT (length - 8);
+    }
+  else
+    {
+      nullify = 0;
+      xdelay = 1;
+      xoperands[0] = GEN_INT (length - 4);
+    }
+
+  if (nullify)
+    output_asm_insn (\"ftest\;add,tr %%r0,%%r0,%%r0\;b,n .+%0\", xoperands);
+  else
+    output_asm_insn (\"ftest\;add,tr %%r0,%%r0,%%r0\;b .+%0\", xoperands);
+  return pa_output_lbranch (operands[0], insn, xdelay);
+}"
+[(set_attr "type" "fbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 32)
+	   (not (match_test "flag_pic"))
+	   (const_int 28)]
+	  (const_int 36)))])
+
+(define_insn ""
+  [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0))
+			   (pc)
+			   (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  int length = get_attr_length (insn);
+  rtx xoperands[1];
+  int nullify, xdelay;
+
+  if (length < 16)
+    return \"ftest\;add,tr %%r0,%%r0,%%r0\;b%* %0\";
+
+  if (dbr_sequence_length () == 0 || INSN_ANNULLED_BRANCH_P (insn))
+    {
+      nullify = 1;
+      xdelay = 0;
+      xoperands[0] = GEN_INT (length - 4);
+    }
+  else
+    {
+      nullify = 0;
+      xdelay = 1;
+      xoperands[0] = GEN_INT (length);
+    }
+
+  if (nullify)
+    output_asm_insn (\"ftest\;b,n .+%0\", xoperands);
+  else
+    output_asm_insn (\"ftest\;b .+%0\", xoperands);
+  return pa_output_lbranch (operands[0], insn, xdelay);
+}"
+[(set_attr "type" "fbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 12)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 28)
+	   (not (match_test "flag_pic"))
+	   (const_int 24)]
+	  (const_int 32)))])
+
+;; Move instructions
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SImode, 0))
+    DONE;
+}")
+
+;; Handle SImode input reloads requiring %r1 as a scratch register.
+(define_expand "reload_insi_r1"
+  [(set (match_operand:SI 0 "register_operand" "=Z")
+	(match_operand:SI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_insi"
+  [(set (match_operand:SI 0 "register_operand" "=Z")
+	(match_operand:SI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outsi"
+  [(set (match_operand:SI 0 "non_hard_reg_operand" "")
+	(match_operand:SI 1  "register_operand" "Z"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T,?r,?*f")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f,*f,r"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && !TARGET_SOFT_FLOAT
+   && !TARGET_64BIT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,sgl %f1,%0
+   fldw%F1 %1,%0
+   fstw%F0 %1,%0
+   {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0
+   {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore,fpstore_load,store_fpload")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && !TARGET_SOFT_FLOAT
+   && TARGET_64BIT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,sgl %f1,%0
+   fldw%F1 %1,%0
+   fstw%F0 %1,%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "indexed_memory_operand" "=R")
+	(match_operand:SI 1 "register_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstw%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+; Rewrite RTL using an indexed store.  This will allow the insn that
+; computes the address to be deleted if the register it sets is dead.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && TARGET_SOFT_FLOAT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0"
+  [(set_attr "type" "load,move,move,move,move,load,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4")])
+
+;; Load or store with base-register modification.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "int5_operand" "L"))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldw,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+; And a zero extended variant.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:SI
+			  (plus:DI
+			    (match_operand:DI 1 "register_operand" "+r")
+			    (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldw,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_expand "pre_load"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+	      (mem (plus (match_operand 1 "register_operand" "")
+			       (match_operand 2 "pre_cint_operand" ""))))
+	      (set (match_dup 1)
+		   (plus (match_dup 1) (match_dup 2)))])]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      emit_insn (gen_pre_ldd (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  emit_insn (gen_pre_ldw (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "pre_ldw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "+r")
+			 (match_operand:SI 2 "pre_cint_operand" ""))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "*
+{
+  if (INTVAL (operands[2]) < 0)
+    return \"{ldwm|ldw,mb} %2(%1),%0\";
+  return \"{ldws|ldw},mb %2(%1),%0\";
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "pre_ldd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mem:DI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "pre_cint_operand" ""))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldd,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "pre_cint_operand" "")))
+	(match_operand:SI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  "*
+{
+  if (INTVAL (operands[1]) < 0)
+    return \"{stwm|stw,mb} %r2,%1(%0)\";
+  return \"{stws|stw},mb %r2,%1(%0)\";
+}"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (match_operand:SI 1 "register_operand" "+r")))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 2 "post_cint_operand" "")))]
+  ""
+  "*
+{
+  if (INTVAL (operands[2]) > 0)
+    return \"{ldwm|ldw,ma} %2(%1),%0\";
+  return \"{ldws|ldw},ma %2(%1),%0\";
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_expand "post_store"
+  [(parallel [(set (mem (match_operand 0 "register_operand" ""))
+		   (match_operand 1 "reg_or_0_operand" ""))
+	      (set (match_dup 0)
+		   (plus (match_dup 0)
+			 (match_operand 2 "post_cint_operand" "")))])]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      emit_insn (gen_post_std (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  emit_insn (gen_post_stw (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "post_stw"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" "+r"))
+	(match_operand:SI 1 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "post_cint_operand" "")))]
+  ""
+  "*
+{
+  if (INTVAL (operands[2]) > 0)
+    return \"{stwm|stw,ma} %r1,%2(%0)\";
+  return \"{stws|stw},ma %r1,%2(%0)\";
+}"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "post_std"
+  [(set (mem:DI (match_operand:DI 0 "register_operand" "+r"))
+	(match_operand:DI 1 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:DI (match_dup 0)
+		 (match_operand:DI 2 "post_cint_operand" "")))]
+  "TARGET_64BIT"
+  "std,ma %r1,%2(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; For loading the address of a label while generating PIC code.
+;; Note since this pattern can be created at reload time (via movsi), all
+;; the same rules for movsi apply here.  (no new pseudos, no temporaries).
+(define_insn ""
+  [(set (match_operand 0 "pmode_register_operand" "=a")
+	(match_operand 1 "pic_label_operand" ""))]
+  "TARGET_PA_20"
+  "*
+{
+  rtx xoperands[3];
+
+  xoperands[0] = operands[0];
+  xoperands[1] = operands[1];
+  xoperands[2] = gen_label_rtx ();
+
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				     CODE_LABEL_NUMBER (xoperands[2]));
+  output_asm_insn (\"mfia %0\", xoperands);
+
+  /* If we're trying to load the address of a label that happens to be
+     close, then we can use a shorter sequence.  */
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && !LABEL_REF_NONLOCAL_P (operands[1])
+      && INSN_ADDRESSES_SET_P ()
+      && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
+	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
+    output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+  else
+    {
+      output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+      output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])		; 8 or 12
+
+(define_insn ""
+  [(set (match_operand 0 "pmode_register_operand" "=a")
+	(match_operand 1 "pic_label_operand" ""))]
+  "!TARGET_PA_20"
+  "*
+{
+  rtx xoperands[3];
+
+  xoperands[0] = operands[0];
+  xoperands[1] = operands[1];
+  xoperands[2] = gen_label_rtx ();
+
+  output_asm_insn (\"bl .+8,%0\", xoperands);
+  output_asm_insn (\"depi 0,31,2,%0\", xoperands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				     CODE_LABEL_NUMBER (xoperands[2]));
+
+  /* If we're trying to load the address of a label that happens to be
+     close, then we can use a shorter sequence.  */
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && !LABEL_REF_NONLOCAL_P (operands[1])
+      && INSN_ADDRESSES_SET_P ()
+      && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
+	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
+    output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+  else
+    {
+      output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+      output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])		; 12 or 16
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (high:SI (match_operand 2 "" ""))))]
+  "symbolic_operand (operands[2], Pmode)
+   && ! function_label_operand (operands[2], Pmode)
+   && flag_pic"
+  "addil LT'%G2,%1"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+	         (high:DI (match_operand 2 "" ""))))]
+  "symbolic_operand (operands[2], Pmode)
+   && ! function_label_operand (operands[2], Pmode)
+   && TARGET_64BIT
+   && flag_pic"
+  "addil LT'%G2,%1"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+;; Always use addil rather than ldil;add sequences.  This allows the
+;; HP linker to eliminate the dp relocation if the symbolic operand
+;; lives in the TEXT space.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(high:SI (match_operand 1 "" "")))]
+  "symbolic_operand (operands[1], Pmode)
+   && ! function_label_operand (operands[1], Pmode)
+   && ! read_only_operand (operands[1], Pmode)
+   && ! flag_pic"
+  "*
+{
+  if (TARGET_LONG_LOAD_STORE)
+    return \"addil NLR'%H1,%%r27\;ldo N'%H1(%%r1),%%r1\";
+  else
+    return \"addil LR'%H1,%%r27\";
+}"
+  [(set_attr "type" "binary")
+   (set (attr "length")
+      (if_then_else (not (match_test "TARGET_LONG_LOAD_STORE"))
+		    (const_int 4)
+		    (const_int 8)))])
+
+
+;; This is for use in the prologue/epilogue code.  We need it
+;; to add large constants to a stack pointer or frame pointer.
+;; Because of the additional %r1 pressure, we probably do not
+;; want to use this in general code, so make it available
+;; only after reload.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=!a,*r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r")
+		 (high:SI (match_operand 2 "const_int_operand" ""))))]
+  "reload_completed"
+  "@
+   addil L'%G2,%1
+   ldil L'%G2,%0\;{addl|add,l} %0,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=!a,*r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r,r")
+		 (high:DI (match_operand 2 "const_int_operand" ""))))]
+  "reload_completed && TARGET_64BIT"
+  "@
+   addil L'%G2,%1
+   ldil L'%G2,%0\;{addl|add,l} %0,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand 1 "" "")))]
+  "(!flag_pic || !symbolic_operand (operands[1], Pmode))
+    && !pa_is_function_label_plus_const (operands[1])"
+  "*
+{
+  if (symbolic_operand (operands[1], Pmode))
+    return \"ldil LR'%H1,%0\";
+  else
+    return \"ldil L'%G1,%0\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(high:DI (match_operand 1 "const_int_operand" "")))]
+  "TARGET_64BIT"
+  "ldil L'%G1,%0";
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "i")))]
+  "TARGET_64BIT"
+  "ldo R'%G2(%1),%0";
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")))]
+  "!pa_is_function_label_plus_const (operands[2])"
+  "*
+{
+  gcc_assert (!flag_pic || !symbolic_operand (operands[2], Pmode));
+  
+  if (symbolic_operand (operands[2], Pmode))
+    return \"ldo RR'%G2(%1),%0\";
+  else
+    return \"ldo R'%G2(%1),%0\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+;; Now that a symbolic_address plus a constant is broken up early
+;; in the compilation phase (for better CSE) we need a special
+;; combiner pattern to load the symbolic address plus the constant
+;; in only 2 instructions. (For cases where the symbolic address
+;; was not a common subexpression.)
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "symbolic_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "! (flag_pic && pic_label_operand (operands[1], SImode))"
+  [(set (match_dup 2) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+;; hppa_legitimize_address goes to a great deal of trouble to
+;; create addresses which use indexing.  In some cases, this
+;; is a lose because there isn't any store instructions which
+;; allow indexed addresses (with integer register source).
+;;
+;; These define_splits try to turn a 3 insn store into
+;; a 2 insn store with some creative RTL rewriting.
+(define_split
+  [(set (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "")
+			       (match_operand:SI 1 "shadd_operand" ""))
+		   (plus:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))))
+	(match_operand:SI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+			       (match_dup 2)))
+   (set (mem:SI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (mem:HI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "")
+			       (match_operand:SI 1 "shadd_operand" ""))
+		   (plus:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))))
+	(match_operand:HI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+			       (match_dup 2)))
+   (set (mem:HI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (mem:QI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "")
+			       (match_operand:SI 1 "shadd_operand" ""))
+		   (plus:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))))
+	(match_operand:QI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+			       (match_dup 2)))
+   (set (mem:QI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))]
+  "")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, HImode, 0))
+    DONE;
+}")
+
+;; Handle HImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_inhi"
+  [(set (match_operand:HI 0 "register_operand" "=Z")
+	(match_operand:HI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:HI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, HImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle HImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outhi"
+  [(set (match_operand:HI 0 "non_hard_reg_operand" "")
+	(match_operand:HI 1  "register_operand" "Z"))
+   (clobber (match_operand:HI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, HImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "move_dest_operand"
+	 		  "=r,r,r,r,r,Q,!*q,!r")
+	(match_operand:HI 1 "move_src_operand"
+			  "r,J,N,K,RQ,rM,!rM,!*q"))]
+  "(register_operand (operands[0], HImode)
+    || reg_or_0_operand (operands[1], HImode))"
+  "@
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldh%M1 %1,%0
+   sth%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %sar,%0"
+  [(set_attr "type" "move,move,move,shift,load,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "+r")
+			 (match_operand:SI 2 "int5_operand" "L"))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldhs|ldh},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "int5_operand" "L"))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldh,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+; And a zero extended variant.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:HI
+			  (plus:DI
+			    (match_operand:DI 1 "register_operand" "+r")
+			    (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldh,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI
+			  (plus:SI
+			    (match_operand:SI 1 "register_operand" "+r")
+			    (match_operand:SI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldhs|ldh},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI
+			  (plus:DI
+			    (match_operand:DI 1 "register_operand" "+r")
+			    (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldh,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "int5_operand" "L")))
+	(match_operand:HI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  "{sths|sth},mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:HI (plus:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "int5_operand" "L")))
+	(match_operand:HI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:DI (match_dup 0) (match_dup 1)))]
+  "TARGET_64BIT"
+  "sth,mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%r,r")
+		 (match_operand:HI 2 "arith14_operand" "r,J")))]
+  ""
+  "@
+   {addl|add,l} %1,%2,%0
+   ldo %2(%1),%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, QImode, 0))
+    DONE;
+}")
+
+;; Handle QImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_inqi"
+  [(set (match_operand:QI 0 "register_operand" "=Z")
+	(match_operand:QI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, QImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle QImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outqi"
+  [(set (match_operand:QI 0 "non_hard_reg_operand" "")
+	(match_operand:QI 1  "register_operand" "Z"))
+   (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, QImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "move_dest_operand"
+			  "=r,r,r,r,r,Q,!*q,!r")
+	(match_operand:QI 1 "move_src_operand"
+			  "r,J,N,K,RQ,rM,!rM,!*q"))]
+  "(register_operand (operands[0], QImode)
+    || reg_or_0_operand (operands[1], QImode))"
+  "@
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldb%M1 %1,%0
+   stb%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0"
+  [(set_attr "type" "move,move,move,shift,load,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "+r")
+			 (match_operand:SI 2 "int5_operand" "L"))))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldbs|ldb},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "int5_operand" "L"))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+; Now the same thing with zero extensions.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:QI (plus:DI
+				  (match_operand:DI 1 "register_operand" "+r")
+				  (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:SI
+				  (match_operand:SI 1 "register_operand" "+r")
+				  (match_operand:SI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldbs|ldb},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:DI
+				  (match_operand:DI 1 "register_operand" "+r")
+				  (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:SI
+				  (match_operand:SI 1 "register_operand" "+r")
+				  (match_operand:SI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldbs|ldb},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:DI
+				  (match_operand:DI 1 "register_operand" "+r")
+				  (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:QI (plus:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "int5_operand" "L")))
+	(match_operand:QI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  "{stbs|stb},mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:QI (plus:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "int5_operand" "L")))
+	(match_operand:QI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:DI (match_dup 0) (match_dup 1)))]
+  "TARGET_64BIT"
+  "stb,mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; The definition of this insn does not really explain what it does,
+;; but it should suffice that anything generated as this insn will be
+;; recognized as a movmemsi operation, and that it will not successfully
+;; combine with anything.
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))
+	      (clobber (match_dup 7))
+	      (clobber (match_dup 8))
+	      (use (match_operand:SI 2 "arith14_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* HP provides very fast block move library routine for the PA;
+     this routine includes:
+
+	4x4 byte at a time block moves,
+	1x4 byte at a time with alignment checked at runtime with
+	    attempts to align the source and destination as needed
+	1x1 byte loop
+
+     With that in mind, here's the heuristics to try and guess when
+     the inlined block move will be better than the library block
+     move:
+
+	If the size isn't constant, then always use the library routines.
+
+	If the size is large in respect to the known alignment, then use
+	the library routines.
+
+	If the size is small in respect to the known alignment, then open
+	code the copy (since that will lead to better scheduling).
+
+        Else use the block move pattern.   */
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[2]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+  align = align > 4 ? 4 : (align ? align : 1);
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block move pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
+  operands[1]
+    = replace_equiv_address (operands[1],
+			     copy_to_mode_reg (SImode, XEXP (operands[1], 0)));
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = gen_reg_rtx (SImode);
+  operands[8] = gen_reg_rtx (SImode);
+}")
+
+;; The operand constraints are written like this to support both compile-time
+;; and run-time determined byte counts.  The expander and pa_output_block_move
+;; only support compile-time determined counts at this time.
+;;
+;; If the count is run-time determined, the register with the byte count
+;; is clobbered by the copying code, and therefore it is forced to operand 2.
+;;
+;; We used to clobber operands 0 and 1.  However, a change to regrename.c
+;; broke this semantic for pseudo registers.  We can't use match_scratch
+;; as this requires two registers in the class R1_REGS when the MEMs for
+;; operands 0 and 1 are both equivalent to symbolic MEMs.  Thus, we are
+;; forced to internally copy operands 0 and 1 to operands 7 and 8,
+;; respectively.  We then split or peephole optimize after reload.
+(define_insn "movmemsi_prereload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+	(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
+   (clobber (match_operand:SI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:SI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:SI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_operand:SI 7 "register_operand" "=&r,&r"))	;item tmp3
+   (clobber (match_operand:SI 8 "register_operand" "=&r,&r"))	;item tmp4
+   (use (match_operand:SI 4 "arith14_operand" "J,2"))	 ;byte count
+   (use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (match_operand:SI 3 "register_operand" ""))
+	      (clobber (match_operand:SI 6 "register_operand" ""))
+	      (clobber (match_operand:SI 7 "register_operand" ""))
+	      (clobber (match_operand:SI 8 "register_operand" ""))
+	      (use (match_operand:SI 4 "arith14_operand" ""))
+	      (use (match_operand:SI 5 "const_int_operand" ""))])]
+  "!TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), SImode)"
+  [(set (match_dup 7) (match_dup 9))
+   (set (match_dup 8) (match_dup 10))
+   (parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  operands[9] = XEXP (operands[0], 0);
+  operands[10] = XEXP (operands[1], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[7]);
+  operands[1] = replace_equiv_address (operands[1], operands[8]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (match_operand:SI 3 "register_operand" ""))
+	      (clobber (match_operand:SI 6 "register_operand" ""))
+	      (clobber (match_operand:SI 7 "register_operand" ""))
+	      (clobber (match_operand:SI 8 "register_operand" ""))
+	      (use (match_operand:SI 4 "arith14_operand" ""))
+	      (use (match_operand:SI 5 "const_int_operand" ""))])]
+  "!TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), SImode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[7] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[7], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[7]);
+    }
+
+  addr = XEXP (operands[1], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[8] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[8], addr));
+      operands[1] = replace_equiv_address (operands[1], operands[8]);
+    }
+}")
+
+(define_insn "movmemsi_postreload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r"))
+	(mem:BLK (match_operand:SI 1 "register_operand" "+r,r")))
+   (clobber (match_operand:SI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:SI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:SI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_dup 0))
+   (clobber (match_dup 1))
+   (use (match_operand:SI 4 "arith14_operand" "J,2"))	 ;byte count
+   (use (match_operand:SI 5 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "!TARGET_64BIT && reload_completed"
+  "* return pa_output_block_move (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+(define_expand "movmemdi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))
+	      (clobber (match_dup 7))
+	      (clobber (match_dup 8))
+	      (use (match_operand:DI 2 "arith14_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* HP provides very fast block move library routine for the PA;
+     this routine includes:
+
+	4x4 byte at a time block moves,
+	1x4 byte at a time with alignment checked at runtime with
+	    attempts to align the source and destination as needed
+	1x1 byte loop
+
+     With that in mind, here's the heuristics to try and guess when
+     the inlined block move will be better than the library block
+     move:
+
+	If the size isn't constant, then always use the library routines.
+
+	If the size is large in respect to the known alignment, then use
+	the library routines.
+
+	If the size is small in respect to the known alignment, then open
+	code the copy (since that will lead to better scheduling).
+
+        Else use the block move pattern.   */
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[2]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+  align = align > 8 ? 8 : (align ? align : 1);
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block move pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
+  operands[1]
+    = replace_equiv_address (operands[1],
+			     copy_to_mode_reg (DImode, XEXP (operands[1], 0)));
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+  operands[6] = gen_reg_rtx (DImode);
+  operands[7] = gen_reg_rtx (DImode);
+  operands[8] = gen_reg_rtx (DImode);
+}")
+
+;; The operand constraints are written like this to support both compile-time
+;; and run-time determined byte counts.  The expander and pa_output_block_move
+;; only support compile-time determined counts at this time.
+;;
+;; If the count is run-time determined, the register with the byte count
+;; is clobbered by the copying code, and therefore it is forced to operand 2.
+;;
+;; We used to clobber operands 0 and 1.  However, a change to regrename.c
+;; broke this semantic for pseudo registers.  We can't use match_scratch
+;; as this requires two registers in the class R1_REGS when the MEMs for
+;; operands 0 and 1 are both equivalent to symbolic MEMs.  Thus, we are
+;; forced to internally copy operands 0 and 1 to operands 7 and 8,
+;; respectively.  We then split or peephole optimize after reload.
+(define_insn "movmemdi_prereload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+	(mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
+   (clobber (match_operand:DI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:DI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:DI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_operand:DI 7 "register_operand" "=&r,&r"))	;item tmp3
+   (clobber (match_operand:DI 8 "register_operand" "=&r,&r"))	;item tmp4
+   (use (match_operand:DI 4 "arith14_operand" "J,2"))	 ;byte count
+   (use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:DI 2 "register_operand" ""))
+	      (clobber (match_operand:DI 3 "register_operand" ""))
+	      (clobber (match_operand:DI 6 "register_operand" ""))
+	      (clobber (match_operand:DI 7 "register_operand" ""))
+	      (clobber (match_operand:DI 8 "register_operand" ""))
+	      (use (match_operand:DI 4 "arith14_operand" ""))
+	      (use (match_operand:DI 5 "const_int_operand" ""))])]
+  "TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), DImode)"
+  [(set (match_dup 7) (match_dup 9))
+   (set (match_dup 8) (match_dup 10))
+   (parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  operands[9] = XEXP (operands[0], 0);
+  operands[10] = XEXP (operands[1], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[7]);
+  operands[1] = replace_equiv_address (operands[1], operands[8]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:DI 2 "register_operand" ""))
+	      (clobber (match_operand:DI 3 "register_operand" ""))
+	      (clobber (match_operand:DI 6 "register_operand" ""))
+	      (clobber (match_operand:DI 7 "register_operand" ""))
+	      (clobber (match_operand:DI 8 "register_operand" ""))
+	      (use (match_operand:DI 4 "arith14_operand" ""))
+	      (use (match_operand:DI 5 "const_int_operand" ""))])]
+  "TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), DImode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[7] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[7], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[7]);
+    }
+
+  addr = XEXP (operands[1], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[8] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[8], addr));
+      operands[1] = replace_equiv_address (operands[1], operands[8]);
+    }
+}")
+
+(define_insn "movmemdi_postreload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r"))
+	(mem:BLK (match_operand:DI 1 "register_operand" "+r,r")))
+   (clobber (match_operand:DI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:DI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:DI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_dup 0))
+   (clobber (match_dup 1))
+   (use (match_operand:DI 4 "arith14_operand" "J,2"))	 ;byte count
+   (use (match_operand:DI 5 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "TARGET_64BIT && reload_completed"
+  "* return pa_output_block_move (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+(define_expand "setmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (use (match_operand:SI 1 "arith14_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[1]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[1]);
+  align = INTVAL (operands[3]);
+  align = align > 4 ? 4 : align;
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block clear pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "clrmemsi_prereload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+	(const_int 0))
+   (clobber (match_operand:SI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:SI 4 "register_operand" "=&r,&r"))	;tmp1
+   (use (match_operand:SI 2 "arith14_operand" "J,1"))	 ;byte count
+   (use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:SI 1 "register_operand" ""))
+	      (clobber (match_operand:SI 4 "register_operand" ""))
+	      (use (match_operand:SI 2 "arith14_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)"
+  [(set (match_dup 4) (match_dup 5))
+   (parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{
+  operands[5] = XEXP (operands[0], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[4]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:SI 1 "register_operand" ""))
+	      (clobber (match_operand:SI 4 "register_operand" ""))
+	      (use (match_operand:SI 2 "arith14_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[4] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[4], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[4]);
+    }
+}")
+
+(define_insn "clrmemsi_postreload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r"))
+	(const_int 0))
+   (clobber (match_operand:SI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_dup 0))
+   (use (match_operand:SI 2 "arith14_operand" "J,1"))	 ;byte count
+   (use (match_operand:SI 3 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "!TARGET_64BIT && reload_completed"
+  "* return pa_output_block_clear (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+(define_expand "setmemdi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (use (match_operand:DI 1 "arith14_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[1]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[1]);
+  align = INTVAL (operands[3]);
+  align = align > 8 ? 8 : align;
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block clear pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "clrmemdi_prereload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+	(const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:DI 4 "register_operand" "=&r,&r"))	;item tmp1
+   (use (match_operand:DI 2 "arith14_operand" "J,1"))	 ;byte count
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:DI 1 "register_operand" ""))
+	      (clobber (match_operand:DI 4 "register_operand" ""))
+	      (use (match_operand:DI 2 "arith14_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)"
+  [(set (match_dup 4) (match_dup 5))
+   (parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{
+  operands[5] = XEXP (operands[0], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[4]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:DI 1 "register_operand" ""))
+	      (clobber (match_operand:DI 4 "register_operand" ""))
+	      (use (match_operand:DI 2 "arith14_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{  
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[4] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[4], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[4]);
+    }
+}")
+
+(define_insn "clrmemdi_postreload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r"))
+	(const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_dup 0))
+   (use (match_operand:DI 2 "arith14_operand" "J,1"))	 ;byte count
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "TARGET_64BIT && reload_completed"
+  "* return pa_output_block_clear (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+;; Floating point move insns
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DFmode, 0))
+    DONE;
+}")
+
+;; Handle DFmode input reloads requiring %r1 as a scratch register.
+(define_expand "reload_indf_r1"
+  [(set (match_operand:DF 0 "register_operand" "=Z")
+	(match_operand:DF 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DFmode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_indf"
+  [(set (match_operand:DF 0 "register_operand" "=Z")
+	(match_operand:DF 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:DF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DFmode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outdf" 
+ [(set (match_operand:DF 0 "non_hard_reg_operand" "")
+	(match_operand:DF 1  "register_operand" "Z"))
+   (clobber (match_operand:DF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:DF 0 "move_dest_operand"
+			  "=f,*r,T,?o,?Q,f,*r,*r,?*r,?f")
+	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,*rG,f,*r,*r,RT,o,RQ,f,*r"))]
+  "(register_operand (operands[0], DFmode)
+    || reg_or_0_operand (operands[1], DFmode))
+   && !(GET_CODE (operands[1]) == CONST_DOUBLE
+	&& GET_CODE (operands[0]) == MEM)
+   && !TARGET_64BIT
+   && !TARGET_SOFT_FLOAT"
+  "*
+{
+  if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1])
+       || operands[1] == CONST0_RTX (DFmode))
+      && !(REG_P (operands[0]) && REG_P (operands[1])
+	   && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])))
+    return pa_output_fp_move_double (operands);
+  return pa_output_move_double (operands);
+}"
+  [(set_attr "type" "fpalu,move,fpstore,store,store,fpload,load,load,fpstore_load,store_fpload")
+   (set_attr "length" "4,8,4,8,16,4,8,16,12,12")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "indexed_memory_operand" "=R")
+	(match_operand:DF 1 "reg_or_0_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstd%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8))
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8))))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:DF 0 "move_dest_operand"
+			  "=r,?o,?Q,r,r")
+	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "rG,r,r,o,RQ"))]
+  "(register_operand (operands[0], DFmode)
+    || reg_or_0_operand (operands[1], DFmode))
+   && !TARGET_64BIT
+   && TARGET_SOFT_FLOAT"
+  "*
+{
+  return pa_output_move_double (operands);
+}"
+  [(set_attr "type" "move,store,store,load,load")
+   (set_attr "length" "8,8,16,8,16")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "move_dest_operand"
+			  "=!*r,*r,*r,*r,*r,Q,f,f,T")
+	(match_operand:DF 1 "move_src_operand"
+			  "!*r,J,N,K,RQ,*rG,fG,RT,f"))]
+  "(register_operand (operands[0], DFmode)
+    || reg_or_0_operand (operands[1], DFmode))
+   && !TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "@
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   depdi,z %z1,%0
+   ldd%M1 %1,%0
+   std%M0 %r1,%0
+   fcpy,dbl %f1,%0
+   fldd%F1 %1,%0
+   fstd%F0 %1,%0"
+  [(set_attr "type" "move,move,move,shift,load,store,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4")])
+
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DImode, 0))
+    DONE;
+}")
+
+;; Handle DImode input reloads requiring %r1 as a scratch register.
+(define_expand "reload_indi_r1"
+  [(set (match_operand:DI 0 "register_operand" "=Z")
+	(match_operand:DI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_indi"
+  [(set (match_operand:DI 0 "register_operand" "=Z")
+	(match_operand:DI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outdi"
+  [(set (match_operand:DI 0 "non_hard_reg_operand" "")
+	(match_operand:DI 1 "register_operand" "Z"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, DImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(high:DI (match_operand 1 "" "")))]
+  "!TARGET_64BIT"
+  "*
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  switch (GET_CODE (op1))
+    {
+    case CONST_INT:
+#if HOST_BITS_PER_WIDE_INT <= 32
+      operands[0] = operand_subword (op0, 1, 0, DImode);
+      output_asm_insn (\"ldil L'%1,%0\", operands);
+
+      operands[0] = operand_subword (op0, 0, 0, DImode);
+      if (INTVAL (op1) < 0)
+	output_asm_insn (\"ldi -1,%0\", operands);
+      else
+	output_asm_insn (\"ldi 0,%0\", operands);
+#else
+      operands[0] = operand_subword (op0, 1, 0, DImode);
+      operands[1] = GEN_INT (INTVAL (op1) & 0xffffffff);
+      output_asm_insn (\"ldil L'%1,%0\", operands);
+
+      operands[0] = operand_subword (op0, 0, 0, DImode);
+      operands[1] = GEN_INT (INTVAL (op1) >> 32);
+      output_asm_insn (pa_singlemove_string (operands), operands);
+#endif
+      break;
+
+    case CONST_DOUBLE:
+      operands[0] = operand_subword (op0, 1, 0, DImode);
+      operands[1] = GEN_INT (CONST_DOUBLE_LOW (op1));
+      output_asm_insn (\"ldil L'%1,%0\", operands);
+
+      operands[0] = operand_subword (op0, 0, 0, DImode);
+      operands[1] = GEN_INT (CONST_DOUBLE_HIGH (op1));
+      output_asm_insn (pa_singlemove_string (operands), operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return \"\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "move_dest_operand"
+			  "=r,o,Q,r,r,r,*f,*f,T,?r,?*f")
+	(match_operand:DI 1 "move_src_operand"
+			  "rM,r,r,o*R,Q,i,*fM,RT,*f,*f,r"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))
+   && !TARGET_64BIT
+   && !TARGET_SOFT_FLOAT"
+  "*
+{
+  if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1])
+       || operands[1] == CONST0_RTX (DFmode))
+      && !(REG_P (operands[0]) && REG_P (operands[1])
+	   && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])))
+    return pa_output_fp_move_double (operands);
+  return pa_output_move_double (operands);
+}"
+  [(set_attr "type"
+    "move,store,store,load,load,multi,fpalu,fpload,fpstore,fpstore_load,store_fpload")
+   (set_attr "length" "8,8,16,8,16,16,4,4,4,12,12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
+	(match_operand:DI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))
+   && !TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "@
+   ldd RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   depdi,z %z1,%0
+   ldd%M1 %1,%0
+   std%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,dbl %f1,%0
+   fldd%F1 %1,%0
+   fstd%F0 %1,%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "indexed_memory_operand" "=R")
+	(match_operand:DI 1 "register_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && TARGET_64BIT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstd%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "move_dest_operand"
+			  "=r,o,Q,r,r,r")
+	(match_operand:DI 1 "general_operand"
+			  "rM,r,r,o,Q,i"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))
+   && !TARGET_64BIT
+   && TARGET_SOFT_FLOAT"
+  "*
+{
+  return pa_output_move_double (operands);
+}"
+  [(set_attr "type" "move,store,store,load,load,multi")
+   (set_attr "length" "8,8,16,8,16,16")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "0,r")
+		   (match_operand:DI 2 "immediate_operand" "i,i")))]
+  "!TARGET_64BIT"
+  "*
+{
+  /* Don't output a 64-bit constant, since we can't trust the assembler to
+     handle it correctly.  */
+  if (GET_CODE (operands[2]) == CONST_DOUBLE)
+    operands[2] = GEN_INT (CONST_DOUBLE_LOW (operands[2]));
+  else if (HOST_BITS_PER_WIDE_INT > 32
+	   && GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffffffff);
+  if (which_alternative == 1)
+    output_asm_insn (\"copy %1,%0\", operands);
+  return \"ldo R'%G2(%R1),%R0\";
+}"
+  [(set_attr "type" "move,move")
+   (set_attr "length" "4,8")])
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SFmode, 0))
+    DONE;
+}")
+
+;; Handle SFmode input reloads requiring %r1 as a scratch register.
+(define_expand "reload_insf_r1"
+  [(set (match_operand:SF 0 "register_operand" "=Z")
+	(match_operand:SF 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SFmode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_insf"
+  [(set (match_operand:SF 0 "register_operand" "=Z")
+	(match_operand:SF 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SFmode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outsf"
+  [(set (match_operand:SF 0 "non_hard_reg_operand" "")
+	(match_operand:SF 1  "register_operand" "Z"))
+   (clobber (match_operand:SF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (pa_emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
+			  "=f,!*r,f,*r,T,Q,?*r,?f")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,!*rG,RT,RQ,f,*rG,f,*r"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && !TARGET_SOFT_FLOAT
+   && !TARGET_64BIT"
+  "@
+   fcpy,sgl %f1,%0
+   copy %r1,%0
+   fldw%F1 %1,%0
+   ldw%M1 %1,%0
+   fstw%F0 %1,%0
+   stw%M0 %r1,%0
+   {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0
+   {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0"
+  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store,fpstore_load,store_fpload")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
+			  "=f,!*r,f,*r,T,Q")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,!*rG,RT,RQ,f,*rG"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && !TARGET_SOFT_FLOAT
+   && TARGET_64BIT"
+  "@
+   fcpy,sgl %f1,%0
+   copy %r1,%0
+   fldw%F1 %1,%0
+   ldw%M1 %1,%0
+   fstw%F0 %1,%0
+   stw%M0 %r1,%0"
+  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "indexed_memory_operand" "=R")
+	(match_operand:SF 1 "register_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstw%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
+			  "=r,r,Q")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "rG,RQ,rG"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && TARGET_SOFT_FLOAT"
+  "@
+   copy %r1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0"
+  [(set_attr "type" "move,load,store")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4")])
+
+
+
+;;- zero extension instructions
+;; We have define_expand for zero extension patterns to make sure the
+;; operands get loaded into registers.  The define_insns accept
+;; memory operands.  This gives us better overall code than just
+;; having a pattern that does or does not accept memory operands.
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI
+	 (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI
+	 (match_operand:QI 1 "move_src_operand" "r,RQ")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   {extru|extrw,u} %1,31,8,%0
+   ldb%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI
+	 (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI
+	 (match_operand:QI 1 "move_src_operand" "r,RQ")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   {extru|extrw,u} %1,31,8,%0
+   ldb%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI
+	 (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI
+	 (match_operand:HI 1 "move_src_operand" "r,RQ")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   {extru|extrw,u} %1,31,16,%0
+   ldh%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	 (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	 (match_operand:QI 1 "move_src_operand" "r,RQ")))]
+  "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   extrd,u %1,63,8,%0
+   ldb%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	 (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	 (match_operand:HI 1 "move_src_operand" "r,RQ")))]
+  "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   extrd,u %1,63,16,%0
+   ldh%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	 (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	 (match_operand:SI 1 "move_src_operand" "r,RQ")))]
+  "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   extrd,u %1,63,32,%0
+   ldw%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+;;- sign extension instructions
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "{extrs|extrw,s} %1,31,16,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "{extrs|extrw,s} %1,31,8,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "{extrs|extrw,s} %1,31,8,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "extrd,s %1,63,8,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "extrd,s %1,63,16,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "extrd,s %1,63,32,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+
+;; Conversions between float and double.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF
+	 (match_operand:SF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvff|fcnv},sgl,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:DF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvff|fcnv},dbl,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; Conversion between fixed point and floating point.
+;; Note that among the fix-to-float insns
+;; the ones that start with SImode come first.
+;; That is so that an operand that is a CONST_INT
+;; (and therefore lacks a specific machine mode).
+;; will be recognized as SImode (which is always valid)
+;; rather than as QImode or HImode.
+
+;; This pattern forces (set (reg:SF ...) (float:SF (const_int ...)))
+;; to be reloaded by putting the constant into memory.
+;; It must come before the more general floatsisf2 pattern.
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "const_int_operand" "m")))]
+  "! TARGET_SOFT_FLOAT"
+  "fldw%F1 %1,%0\;{fcnvxf,sgl,sgl|fcnv,w,sgl} %0,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvxf,sgl,sgl|fcnv,w,sgl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; This pattern forces (set (reg:DF ...) (float:DF (const_int ...)))
+;; to be reloaded by putting the constant into memory.
+;; It must come before the more general floatsidf2 pattern.
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "const_int_operand" "m")))]
+  "! TARGET_SOFT_FLOAT"
+  "fldw%F1 %1,%0\;{fcnvxf,sgl,dbl|fcnv,w,dbl} %0,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvxf,sgl,dbl|fcnv,w,dbl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_expand "floatunssisf2"
+  [(set (subreg:SI (match_dup 2) 4)
+	(match_operand:SI 1 "register_operand" ""))
+   (set (subreg:SI (match_dup 2) 0)
+	(const_int 0))
+   (set (match_operand:SF 0 "register_operand" "")
+	(float:SF (match_dup 2)))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "
+{
+  if (TARGET_PA_20)
+    {
+      emit_insn (gen_floatunssisf2_pa20 (operands[0], operands[1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (DImode);
+}")
+
+(define_expand "floatunssidf2"
+  [(set (subreg:SI (match_dup 2) 4)
+	(match_operand:SI 1 "register_operand" ""))
+   (set (subreg:SI (match_dup 2) 0)
+	(const_int 0))
+   (set (match_operand:DF 0 "register_operand" "")
+	(float:DF (match_dup 2)))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "
+{
+  if (TARGET_PA_20)
+    {
+      emit_insn (gen_floatunssidf2_pa20 (operands[0], operands[1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvxf,dbl,sgl|fcnv,dw,sgl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvxf,dbl,dbl|fcnv,dw,dbl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; Convert a float to an actual integer.
+;; Truncation is performed as part of the conversion.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,sgl,sgl|fcnv,t,sgl,w} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,dbl,sgl|fcnv,t,dbl,w} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,sgl,dbl|fcnv,t,sgl,dw} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,dbl,dbl|fcnv,t,dbl,dw} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunssidf2_pa20"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unsigned_float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,uw,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunssisf2_pa20"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,uw,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunsdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,udw,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unsigned_float:DF (match_operand:DI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,udw,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,sgl,uw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,dbl,uw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unsigned_fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,sgl,udw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unsigned_fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,dbl,udw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;;- arithmetic instructions
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "adddi3_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r")
+		 (match_operand:DI 2 "arith11_operand" "rI")))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 0)
+	return \"addi %2,%R1,%R0\;{addc|add,c} %1,%%r0,%0\";
+      else
+	return \"addi %2,%R1,%R0\;{subb|sub,b} %1,%%r0,%0\";
+    }
+  else
+    return \"add %R2,%R1,%R0\;{addc|add,c} %2,%1,%0\";
+}"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "arith14_operand" "r,J")))]
+  "TARGET_64BIT"
+  "@
+   add,l %1,%2,%0
+   ldo %2(%1),%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "uaddcm %2,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "uaddcm %2,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "addvdi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (plus:DI (match_operand:DI 1 "reg_or_0_operand" "")
+			    (match_operand:DI 2 "arith11_operand" "")))
+	      (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+				    (sign_extend:TI (match_dup 2)))
+			   (sign_extend:TI (plus:DI (match_dup 1)
+						    (match_dup 2))))
+		       (const_int 0))])]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rM,rM")
+		 (match_operand:DI 2 "arith11_operand" "r,I")))
+   (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+			 (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (plus:DI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  "TARGET_64BIT"
+  "@
+  add,tsv,* %2,%1,%0
+  addi,tsv,* %2,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rM")
+		 (match_operand:DI 2 "arith11_operand" "rI")))
+   (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+			 (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (plus:DI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 0)
+	return \"addi %2,%R1,%R0\;{addco|add,c,tsv} %1,%%r0,%0\";
+      else
+	return \"addi %2,%R1,%R0\;{subbo|sub,b,tsv} %1,%%r0,%0\";
+    }
+  else
+    return \"add %R2,%R1,%R0\;{addco|add,c,tsv} %2,%1,%0\";
+}"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+;; define_splits to optimize cases of adding a constant integer
+;; to a register when the constant does not fit in 14 bits.  */
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (match_operand:SI 4 "register_operand" ""))]
+  "! pa_cint_ok_for_move (INTVAL (operands[2]))
+   && VAL_14_BITS_P (INTVAL (operands[2]) >> 1)"
+  [(set (match_dup 4) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 4) (match_dup 3)))]
+  "
+{
+  int val = INTVAL (operands[2]);
+  int low = (val < 0) ? -0x2000 : 0x1fff;
+  int rest = val - low;
+
+  operands[2] = GEN_INT (rest);
+  operands[3] = GEN_INT (low);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (match_operand:SI 4 "register_operand" ""))]
+  "! pa_cint_ok_for_move (INTVAL (operands[2]))"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 4) (match_dup 3))
+			       (match_dup 1)))]
+  "
+{
+  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+  /* Try dividing the constant by 2, then 4, and finally 8 to see
+     if we can get a constant which can be loaded into a register
+     in a single instruction (pa_cint_ok_for_move). 
+
+     If that fails, try to negate the constant and subtract it
+     from our input operand.  */
+  if (intval % 2 == 0 && pa_cint_ok_for_move (intval / 2))
+    {
+      operands[2] = GEN_INT (intval / 2);
+      operands[3] = const2_rtx;
+    }
+  else if (intval % 4 == 0 && pa_cint_ok_for_move (intval / 4))
+    {
+      operands[2] = GEN_INT (intval / 4);
+      operands[3] = GEN_INT (4);
+    }
+  else if (intval % 8 == 0 && pa_cint_ok_for_move (intval / 8))
+    {
+      operands[2] = GEN_INT (intval / 8);
+      operands[3] = GEN_INT (8);
+    }
+  else if (pa_cint_ok_for_move (-intval))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[4], GEN_INT (-intval)));
+      emit_insn (gen_subsi3 (operands[0], operands[1], operands[4]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,r")
+		 (match_operand:SI 2 "arith14_operand" "r,J")))]
+  ""
+  "@
+   {addl|add,l} %1,%2,%0
+   ldo %2(%1),%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4")])
+
+(define_insn "addvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rM,rM")
+		 (match_operand:SI 2 "arith11_operand" "r,I")))
+   (trap_if (ne (plus:DI (sign_extend:DI (match_dup 1))
+			 (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (plus:SI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+  {addo|add,tsv} %2,%1,%0
+  {addio|addi,tsv} %2,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "")
+		  (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r,!q")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I,!U")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM,!rM")))]
+  "TARGET_64BIT"
+  "@
+   sub %1,%2,%0
+   subi %1,%2,%0
+   mtsarcm %2"
+  [(set_attr "type" "binary,binary,move")
+  (set_attr "length" "4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 0)
+	return \"subi %1,%R2,%R0\;{subb|sub,b} %%r0,%2,%0\";
+      else
+	return \"ldi -1,%0\;subi %1,%R2,%R0\;{subb|sub,b} %0,%2,%0\";
+    }
+  else
+    return \"sub %R1,%R2,%R0\;{subb|sub,b} %1,%2,%0\";
+}"
+  [(set_attr "type" "binary")
+   (set (attr "length")
+	(if_then_else (eq_attr "alternative" "0")
+	  (const_int 8)
+	  (if_then_else (ge (symbol_ref "INTVAL (operands[1])")
+			    (const_int 0))
+	    (const_int 8)
+	    (const_int 12))))])
+
+(define_expand "subvdi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (minus:DI (match_operand:DI 1 "arith11_operand" "")
+			     (match_operand:DI 2 "reg_or_0_operand" "")))
+	      (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+				     (sign_extend:TI (match_dup 2)))
+			   (sign_extend:TI (minus:DI (match_dup 1)
+						     (match_dup 2))))
+		       (const_int 0))])]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))
+   (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+			  (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (minus:DI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  "TARGET_64BIT"
+  "@
+  {subo|sub,tsv} %1,%2,%0
+  {subio|subi,tsv} %1,%2,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))
+   (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+			  (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (minus:DI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 0)
+	return \"subi %1,%R2,%R0\;{subbo|sub,b,tsv} %%r0,%2,%0\";
+      else
+	return \"ldi -1,%0\;subi %1,%R2,%R0\;{subbo|sub,b,tsv} %0,%2,%0\";
+    }
+  else
+    return \"sub %R1,%R2,%R0\;{subbo|sub,b,tsv} %1,%2,%0\";
+}"
+  [(set_attr "type" "binary,binary")
+   (set (attr "length")
+	(if_then_else (eq_attr "alternative" "0")
+	  (const_int 8)
+	  (if_then_else (ge (symbol_ref "INTVAL (operands[1])")
+			    (const_int 0))
+	    (const_int 8)
+	    (const_int 12))))])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "")
+		  (match_operand:SI 2 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "r,I")
+		  (match_operand:SI 2 "register_operand" "r,r")))]
+  "!TARGET_PA_20"
+  "@
+   sub %1,%2,%0
+   subi %1,%2,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,!q")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "r,I,!S")
+		  (match_operand:SI 2 "register_operand" "r,r,!r")))]
+  "TARGET_PA_20"
+  "@
+   sub %1,%2,%0
+   subi %1,%2,%0
+   mtsarcm %2"
+  [(set_attr "type" "binary,binary,move")
+   (set_attr "length" "4,4,4")])
+
+(define_insn "subvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "rM,I")
+		  (match_operand:SI 2 "reg_or_0_operand" "rM,rM")))
+   (trap_if (ne (minus:DI (sign_extend:DI (match_dup 1))
+			  (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (minus:SI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+  {subo|sub,tsv} %1,%2,%0
+  {subio|subi,tsv} %1,%2,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+;; Clobbering a "register_operand" instead of a match_scratch
+;; in operand3 of millicode calls avoids spilling %r1 and
+;; produces better code.
+
+;; The mulsi3 insns set up registers for the millicode call.
+(define_expand "mulsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 4))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  operands[4] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
+    {
+      rtx scratch = gen_reg_rtx (DImode);
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_umulsidi3 (scratch, operands[1], operands[2]));
+      emit_insn (gen_movsi (operands[0],
+			    gen_rtx_SUBREG (SImode, scratch,
+					    GET_MODE_SIZE (SImode))));
+      DONE;
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f"))
+		 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "f"))))]
+  "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT"
+  "xmpyu %1,%2,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f"))
+		 (match_operand:DI 2 "uint32_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && !TARGET_64BIT"
+  "xmpyu %1,%R2,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f"))
+		 (match_operand:DI 2 "uint32_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "xmpyu %1,%2R,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "* return pa_output_mul_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "* return pa_output_mul_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mult:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))]
+  "TARGET_64BIT && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT"
+  "
+{
+  rtx low_product = gen_reg_rtx (DImode);
+  rtx cross_product1 = gen_reg_rtx (DImode);
+  rtx cross_product2 = gen_reg_rtx (DImode);
+  rtx cross_scratch = gen_reg_rtx (DImode);
+  rtx cross_product = gen_reg_rtx (DImode);
+  rtx op1l, op1r, op2l, op2r;
+  rtx op1shifted, op2shifted;
+
+  op1shifted = gen_reg_rtx (DImode);
+  op2shifted = gen_reg_rtx (DImode);
+  op1l = gen_reg_rtx (SImode);
+  op1r = gen_reg_rtx (SImode);
+  op2l = gen_reg_rtx (SImode);
+  op2r = gen_reg_rtx (SImode);
+
+  emit_move_insn (op1shifted, gen_rtx_LSHIFTRT (DImode, operands[1],
+						GEN_INT (32)));
+  emit_move_insn (op2shifted, gen_rtx_LSHIFTRT (DImode, operands[2],
+						GEN_INT (32)));
+  op1r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[1], 4));
+  op2r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[2], 4));
+  op1l = force_reg (SImode, gen_rtx_SUBREG (SImode, op1shifted, 4));
+  op2l = force_reg (SImode, gen_rtx_SUBREG (SImode, op2shifted, 4));
+
+  /* Emit multiplies for the cross products.  */
+  emit_insn (gen_umulsidi3 (cross_product1, op2r, op1l));
+  emit_insn (gen_umulsidi3 (cross_product2, op2l, op1r));
+
+  /* Emit a multiply for the low sub-word.  */
+  emit_insn (gen_umulsidi3 (low_product, copy_rtx (op2r), copy_rtx (op1r)));
+
+  /* Sum the cross products and shift them into proper position.  */
+  emit_insn (gen_adddi3 (cross_scratch, cross_product1, cross_product2));
+  emit_insn (gen_ashldi3 (cross_product, cross_scratch, GEN_INT (32)));
+
+  /* Add the cross product to the low product and store the result
+     into the output operand .  */
+  emit_insn (gen_adddi3 (operands[0], cross_product, low_product));
+  DONE;
+}")
+
+;;; Division and mod.
+(define_expand "divsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (div:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (SImode);
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  if (GET_CODE (operands[2]) == CONST_INT && pa_emit_hpdiv_const (operands, 0))
+    DONE;
+}")
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+   return pa_output_div_insn (operands, 0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+   return pa_output_div_insn (operands, 0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_expand "udivsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (udiv:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (SImode);
+
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  if (GET_CODE (operands[2]) == CONST_INT && pa_emit_hpdiv_const (operands, 1))
+    DONE;
+}")
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+   return pa_output_div_insn (operands, 1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+   return pa_output_div_insn (operands, 1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_expand "modsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_insn ""
+  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+  return pa_output_mod_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+  return pa_output_mod_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_expand "umodsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_insn ""
+  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+  return pa_output_mod_insn (1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+  return pa_output_mod_insn (1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
+
+;;- and instructions
+;; We define DImode `and` so with DImode `not` we can get
+;; DImode `andn`.  Other combinations are possible.
+
+(define_expand "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "and_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(and:DI (match_operand:DI 1 "register_operand" "%?r,0")
+		(match_operand:DI 2 "and_operand" "rO,P")))]
+  "TARGET_64BIT"
+  "* return pa_output_64bit_and (operands); "
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+; The ? for op1 makes reload prefer zdepi instead of loading a huge
+; constant with ldil;ldo.
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%?r,0")
+		(match_operand:SI 2 "and_operand" "rO,P")))]
+  ""
+  "* return pa_output_and (operands); "
+  [(set_attr "type" "binary,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "andcm %2,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "andcm %2,%1,%0"
+  [(set_attr "type" "binary")
+  (set_attr "length" "4")])
+
+(define_expand "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ior:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "reg_or_cint_ior_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "register_operand" "0,0")
+		(match_operand:DI 2 "cint_ior_operand" "M,i")))]
+  "TARGET_64BIT"
+  "* return pa_output_64bit_ior (operands); "
+  [(set_attr "type" "binary,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "or %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+;; Need a define_expand because we've run out of CONST_OK... characters.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_cint_ior_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "0,0")
+		(match_operand:SI 2 "cint_ior_operand" "M,i")))]
+  ""
+  "* return pa_output_ior (operands); "
+  [(set_attr "type" "binary,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "or %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "xor %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(neg:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "sub %%r0,%R1,%R0\;{subb|sub,b} %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_expand "negvdi2"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (neg:DI (match_operand:DI 1 "register_operand" "")))
+	      (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+				   (sign_extend:TI (neg:DI (match_dup 1))))
+		       (const_int 0))])]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+		(sign_extend:TI (neg:DI (match_dup 1))))
+	    (const_int 0))]
+  "!TARGET_64BIT"
+  "sub %%r0,%R1,%R0\;{subbo|sub,b,tsv} %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+		(sign_extend:TI (neg:DI (match_dup 1))))
+	    (const_int 0))]
+  "TARGET_64BIT"
+  "sub,tsv %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_insn "negvsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (neg:SI (match_operand:SI 1 "register_operand" "r")))
+   (trap_if (ne (neg:DI (sign_extend:DI (match_dup 1)))
+		(sign_extend:DI (neg:SI (match_dup 1))))
+	    (const_int 0))]
+   ""
+   "{subo|sub,tsv} %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(not:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+  "
+{
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "uaddcm %%r0,%1,%0\;uaddcm %%r0,%R1,%R0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "uaddcm %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "uaddcm %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+;; Floating point arithmetic instructions.
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fadd,dbl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fadd,sgl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "register_operand" "f")
+		  (match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsub,dbl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsub,sgl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fmpy,dbl %1,%2,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "pa_combine_type" "fmpy")
+   (set_attr "length" "4")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fmpy,sgl %1,%2,%0"
+  [(set_attr "type" "fpmulsgl")
+   (set_attr "pa_combine_type" "fmpy")
+   (set_attr "length" "4")])
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "register_operand" "f")
+		(match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fdiv,dbl %1,%2,%0"
+  [(set_attr "type" "fpdivdbl")
+   (set_attr "length" "4")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fdiv,sgl %1,%2,%0"
+  [(set_attr "type" "fpdivsgl")
+   (set_attr "length" "4")])
+
+;; Processors prior to PA 2.0 don't have a fneg instruction.  Fast
+;; negation can be done by subtracting from plus zero.  However, this
+;; violates the IEEE standard when negating plus and minus zero.
+;; The slow path toggles the sign bit in the general registers.
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "!TARGET_SOFT_FLOAT"
+{
+  if (TARGET_PA_20 || !flag_signed_zeros)
+    emit_insn (gen_negdf2_fast (operands[0], operands[1]));
+  else
+    emit_insn (gen_negdf2_slow (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negdf2_slow"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(neg:DF (match_operand:DF 1 "register_operand" "r")))]
+  "!TARGET_SOFT_FLOAT && !TARGET_PA_20"
+  "*
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    return \"and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0\";
+  else
+    return \"and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0\;copy %R1,%R0\";
+}"
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (match_test "rtx_equal_p (operands[0], operands[1])")
+	    (const_int 12)
+	    (const_int 16)))])
+
+(define_insn "negdf2_fast"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "register_operand" "f")))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  if (TARGET_PA_20)
+    return \"fneg,dbl %1,%0\";
+  else
+    return \"fsub,dbl %%fr0,%1,%0\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (match_operand:SF 1 "register_operand" "")))]
+  "!TARGET_SOFT_FLOAT"
+{
+  if (TARGET_PA_20 || !flag_signed_zeros)
+    emit_insn (gen_negsf2_fast (operands[0], operands[1]));
+  else
+    emit_insn (gen_negsf2_slow (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negsf2_slow"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "r")))]
+  "!TARGET_SOFT_FLOAT && !TARGET_PA_20"
+  "and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "negsf2_fast"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  if (TARGET_PA_20)
+    return \"fneg,sgl %1,%0\";
+  else
+    return \"fsub,sgl %%fr0,%1,%0\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fabs,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fabs,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsqrt,dbl %1,%0"
+  [(set_attr "type" "fpsqrtdbl")
+   (set_attr "length" "4")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsqrt,sgl %1,%0"
+  [(set_attr "type" "fpsqrtsgl")
+   (set_attr "length" "4")])
+
+;; PA 2.0 floating point instructions
+
+; fmpyfadd patterns
+(define_insn "fmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "register_operand" "f")
+		(match_operand:DF 2 "register_operand" "f")
+		(match_operand:DF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpyfadd,dbl %1,%2,%3,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpyfadd,sgl %1,%2,%3,%0"
+  [(set_attr "type" "fpmulsgl")
+   (set_attr "length" "4")])
+
+; fmpynfadd patterns
+(define_insn "fnmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		(match_operand:DF 2 "register_operand" "f")
+		(match_operand:DF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpynfadd,dbl %1,%2,%3,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpynfadd,sgl %1,%2,%3,%0"
+  [(set_attr "type" "fpmulsgl")
+   (set_attr "length" "4")])
+
+; fnegabs patterns
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fnegabs,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fnegabs,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))
+   (set (match_operand:DF 2 "register_operand" "=&f") (abs:DF (match_dup 1)))]
+  "(! TARGET_SOFT_FLOAT && TARGET_PA_20
+    && ! reg_overlap_mentioned_p (operands[2], operands[1]))"
+  "#"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" ""))))
+   (set (match_operand:DF 2 "register_operand" "") (abs:DF (match_dup 1)))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  [(set (match_dup 2) (abs:DF (match_dup 1)))
+   (set (match_dup 0) (neg:DF (abs:DF (match_dup 1))))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))
+   (set (match_operand:SF 2 "register_operand" "=&f") (abs:SF (match_dup 1)))]
+  "(! TARGET_SOFT_FLOAT && TARGET_PA_20
+    && ! reg_overlap_mentioned_p (operands[2], operands[1]))"
+  "#"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" ""))))
+   (set (match_operand:SF 2 "register_operand" "") (abs:SF (match_dup 1)))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  [(set (match_dup 2) (abs:SF (match_dup 1)))
+   (set (match_dup 0) (neg:SF (abs:SF (match_dup 1))))]
+  "")
+
+;; Negating a multiply can be faked by adding zero in a fused multiply-add
+;; instruction if we can ignore the sign of zero.
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			 (match_operand:DF 2 "register_operand" "f"))))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros"
+  "fmpynfadd,dbl %1,%2,%%fr0,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			 (match_operand:SF 2 "register_operand" "f"))))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros"
+  "fmpynfadd,sgl %1,%2,%%fr0,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			 (match_operand:DF 2 "register_operand" "f"))))
+   (set (match_operand:DF 3 "register_operand" "=&f")
+	(mult:DF (match_dup 1) (match_dup 2)))]
+  "(!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros
+    && ! (reg_overlap_mentioned_p (operands[3], operands[1])
+          || reg_overlap_mentioned_p (operands[3], operands[2])))"
+  "#"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "")
+			 (match_operand:DF 2 "register_operand" ""))))
+   (set (match_operand:DF 3 "register_operand" "")
+	(mult:DF (match_dup 1) (match_dup 2)))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros"
+  [(set (match_dup 3) (mult:DF (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (neg:DF (mult:DF (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			 (match_operand:SF 2 "register_operand" "f"))))
+   (set (match_operand:SF 3 "register_operand" "=&f")
+	(mult:SF (match_dup 1) (match_dup 2)))]
+  "(!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros
+    && ! (reg_overlap_mentioned_p (operands[3], operands[1])
+          || reg_overlap_mentioned_p (operands[3], operands[2])))"
+  "#"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "")
+			 (match_operand:SF 2 "register_operand" ""))))
+   (set (match_operand:SF 3 "register_operand" "")
+	(mult:SF (match_dup 1) (match_dup 2)))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20&& !flag_signed_zeros"
+  [(set (match_dup 3) (mult:SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (neg:SF (mult:SF (match_dup 1) (match_dup 2))))]
+  "")
+
+;;- Shift instructions
+
+;; Optimized special case of shifting.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
+		     (const_int 24)))]
+  ""
+  "ldb%M1 %1,%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
+		     (const_int 16)))]
+  ""
+  "ldh%M1 %1,%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "shadd_operand" ""))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0} "
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "shadd_operand" ""))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "shladd,l %2,%O3,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "lhs_lshift_operand" "")
+		   (match_operand:SI 2 "arith32_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (temp, GEN_INT (31), operands[2]));
+      if (GET_CODE (operands[1]) == CONST_INT)
+	emit_insn (gen_zvdep_imm32 (operands[0], operands[1], temp));
+      else
+	emit_insn (gen_zvdep32 (operands[0], operands[1], temp));
+      DONE;
+    }
+  /* Make sure both inputs are not constants,
+     there are no patterns for that.  */
+  operands[1] = force_reg (SImode, operands[1]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "const_int_operand" "n")))]
+  ""
+  "{zdep|depw,z} %1,%P2,%L2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+; Match cases of op1 a CONST_INT here that zvdep_imm32 doesn't handle.
+; Doing it like this makes slightly better code since reload can
+; replace a register with a known value in range -16..15 with a
+; constant.  Ideally, we would like to merge zvdep32 and zvdep_imm32,
+; but since we have no more CONST_OK... characters, that is not
+; possible.
+(define_insn "zvdep32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "arith5_operand" "r,L")
+		   (minus:SI (const_int 31)
+			     (match_operand:SI 2 "register_operand" "q,q"))))]
+  ""
+  "@
+   {zvdep %1,32,%0|depw,z %1,%%sar,32,%0}
+   {zvdepi %1,32,%0|depwi,z %1,%%sar,32,%0}"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn "zvdep_imm32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "lhs_lshift_cint_operand" "")
+		   (minus:SI (const_int 31)
+			     (match_operand:SI 2 "register_operand" "q"))))]
+  ""
+  "*
+{
+  unsigned HOST_WIDE_INT x = UINTVAL (operands[1]);
+  operands[2] = GEN_INT (4 + exact_log2 ((x >> 4) + 1));
+  operands[1] = GEN_INT ((x & 0xf) - 0x10);
+  return \"{zvdepi %1,%2,%0|depwi,z %1,%%sar,%2,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vdepi_ior"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "")
+			   (minus:SI (const_int 31)
+				     (match_operand:SI 2 "register_operand" "q")))
+		(match_operand:SI 3 "register_operand" "0")))]
+  ; accept ...0001...1, can this be generalized?
+  "exact_log2 (INTVAL (operands[1]) + 1) > 0"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 (x + 1));
+  return \"{vdepi -1,%2,%0|depwi -1,%%sar,%2,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vdepi_and"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (rotate:SI (match_operand:SI 1 "const_int_operand" "")
+			   (minus:SI (const_int 31)
+				     (match_operand:SI 2 "register_operand" "q")))
+		(match_operand:SI 3 "register_operand" "0")))]
+  ; this can be generalized...!
+  "INTVAL (operands[1]) == -2"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 ((~x) + 1));
+  return \"{vdepi 0,%2,%0|depwi 0,%%sar,%2,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "lhs_lshift_operand" "")
+		   (match_operand:DI 2 "arith32_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (DImode);
+      emit_insn (gen_subdi3 (temp, GEN_INT (63), operands[2]));
+      if (GET_CODE (operands[1]) == CONST_INT)
+	emit_insn (gen_zvdep_imm64 (operands[0], operands[1], temp));
+      else
+	emit_insn (gen_zvdep64 (operands[0], operands[1], temp));
+      DONE;
+    }
+  /* Make sure both inputs are not constants,
+     there are no patterns for that.  */
+  operands[1] = force_reg (DImode, operands[1]);
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_64BIT"
+  "depd,z %1,%p2,%Q2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+; Match cases of op1 a CONST_INT here that zvdep_imm64 doesn't handle.
+; Doing it like this makes slightly better code since reload can
+; replace a register with a known value in range -16..15 with a
+; constant.  Ideally, we would like to merge zvdep64 and zvdep_imm64,
+; but since we have no more CONST_OK... characters, that is not
+; possible.
+(define_insn "zvdep64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "arith5_operand" "r,L")
+		   (minus:DI (const_int 63)
+			     (match_operand:DI 2 "register_operand" "q,q"))))]
+  "TARGET_64BIT"
+  "@
+   depd,z %1,%%sar,64,%0
+   depdi,z %1,%%sar,64,%0"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn "zvdep_imm64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "lhs_lshift_cint_operand" "")
+		   (minus:DI (const_int 63)
+			     (match_operand:DI 2 "register_operand" "q"))))]
+  "TARGET_64BIT"
+  "*
+{
+  unsigned HOST_WIDE_INT x = UINTVAL (operands[1]);
+  operands[2] = GEN_INT (4 + exact_log2 ((x >> 4) + 1));
+  operands[1] = GEN_INT ((x & 0x1f) - 0x20);
+  return \"depdi,z %1,%%sar,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "const_int_operand" "")
+			   (minus:DI (const_int 63)
+				     (match_operand:DI 2 "register_operand" "q")))
+		(match_operand:DI 3 "register_operand" "0")))]
+  ; accept ...0001...1, can this be generalized?
+  "TARGET_64BIT && exact_log2 (INTVAL (operands[1]) + 1) > 0"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 (x + 1));
+  return \"depdi -1,%%sar,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (rotate:DI (match_operand:DI 1 "const_int_operand" "")
+			   (minus:DI (const_int 63)
+				     (match_operand:DI 2 "register_operand" "q")))
+		(match_operand:DI 3 "register_operand" "0")))]
+  ; this can be generalized...!
+  "TARGET_64BIT && INTVAL (operands[1]) == -2"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 ((~x) + 1));
+  return \"depdi 0,%%sar,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "arith32_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (temp, GEN_INT (31), operands[2]));
+      emit_insn (gen_vextrs32 (operands[0], operands[1], temp));
+      DONE;
+    }
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "const_int_operand" "n")))]
+  ""
+  "{extrs|extrw,s} %1,%P2,%L2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vextrs32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (minus:SI (const_int 31)
+			       (match_operand:SI 2 "register_operand" "q"))))]
+  ""
+  "{vextrs %1,32,%0|extrw,s %1,%%sar,32,%0}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "arith32_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (DImode);
+      emit_insn (gen_subdi3 (temp, GEN_INT (63), operands[2]));
+      emit_insn (gen_vextrs64 (operands[0], operands[1], temp));
+      DONE;
+    }
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_64BIT"
+  "extrd,s %1,%p2,%Q2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vextrs64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (minus:DI (const_int 63)
+			       (match_operand:DI 2 "register_operand" "q"))))]
+  "TARGET_64BIT"
+  "extrd,s %1,%%sar,64,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "shift5_operand" "q,n")))]
+  ""
+  "@
+   {vshd %%r0,%1,%0|shrpw %%r0,%1,%%sar,%0}
+   {extru|extrw,u} %1,%P2,%L2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r")
+		     (match_operand:DI 2 "shift6_operand" "q,n")))]
+  "TARGET_64BIT"
+  "@
+   shrpd %%r0,%1,%%sar,%0
+   extrd,u %1,%p2,%Q2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+; Shift right pair word 0 to 31 bits.
+(define_insn "shrpsi4"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+			   (minus:SI (const_int 32)
+			     (match_operand:SI 3 "shift5_operand" "q,n")))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "r,r")
+			     (match_dup 3))))]
+  ""
+  "@
+   {vshd %1,%2,%0|shrpw %1,%2,%%sar,%0}
+   {shd|shrpw} %1,%2,%3,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+; Shift right pair doubleword 0 to 63 bits.
+(define_insn "shrpdi4"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (ashift:DI (match_operand:SI 1 "register_operand" "r,r")
+			   (minus:DI (const_int 64)
+			     (match_operand:DI 3 "shift6_operand" "q,n")))
+		(lshiftrt:DI (match_operand:DI 2 "register_operand" "r,r")
+			     (match_dup 3))))]
+  "TARGET_64BIT"
+  "@
+   shrpd %1,%2,%%sar,%0
+   shrpd %1,%2,%3,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "shift5_operand" "q,n")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+      return \"{shd|shrpw} %1,%1,%2,%0\";
+    }
+  else
+    return \"{vshd %1,%1,%0|shrpw %1,%1,%%sar,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (rotate:SI (match_operand:SI 1 "register_operand" "")
+                   (match_operand:SI 2 "arith32_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (temp, GEN_INT (32), operands[2]));
+      emit_insn (gen_rotrsi3 (operands[0], operands[1], temp));
+      DONE;
+    }
+  /* Else expand normally.  */
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (rotate:SI (match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "const_int_operand" "n")))]
+  ""
+  "*
+{
+  operands[2] = GEN_INT ((32 - INTVAL (operands[2])) & 31);
+  return \"{shd|shrpw} %1,%1,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 5 "plus_xor_ior_operator"
+	  [(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:SI 3 "const_int_operand" "n"))
+	   (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand:SI 4 "const_int_operand" "n"))]))]
+  "INTVAL (operands[3]) + INTVAL (operands[4]) == 32"
+  "{shd|shrpw} %1,%2,%4,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 5 "plus_xor_ior_operator"
+	  [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand:SI 4 "const_int_operand" "n"))
+	   (ashift:SI (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:SI 3 "const_int_operand" "n"))]))]
+  "INTVAL (operands[3]) + INTVAL (operands[4]) == 32"
+  "{shd|shrpw} %1,%2,%4,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "exact_log2 (1 + (INTVAL (operands[3]) >> (INTVAL (operands[2]) & 31))) > 0"
+  "*
+{
+  int cnt = INTVAL (operands[2]) & 31;
+  operands[3] = GEN_INT (exact_log2 (1 + (INTVAL (operands[3]) >> cnt)));
+  operands[2] = GEN_INT (31 - cnt);
+  return \"{zdep|depw,z} %1,%2,%3,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;; Unconditional and other jump instructions.
+
+;; Trivial return used when no epilogue is needed.
+(define_insn "return"
+  [(return)
+   (use (reg:SI 2))]
+  "pa_can_use_return_insn ()"
+  "*
+{
+  if (TARGET_PA_20)
+    return \"bve%* (%%r2)\";
+  return \"bv%* %%r0(%%r2)\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; This is used for most returns.
+(define_insn "return_internal"
+  [(return)
+   (use (reg:SI 2))]
+  ""
+  "*
+{
+  if (TARGET_PA_20)
+    return \"bve%* (%%r2)\";
+  return \"bv%* %%r0(%%r2)\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; This is used for eh returns which bypass the return stub.
+(define_insn "return_external_pic"
+  [(return)
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))]
+  "!TARGET_NO_SPACE_REGS
+   && !TARGET_PA_20
+   && flag_pic && crtl->calls_eh_return"
+  "ldsid (%%sr0,%%r2),%%r1\;mtsp %%r1,%%sr0\;be%* 0(%%sr0,%%r2)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "12")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "pa_expand_prologue ();DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "
+{
+  pa_expand_epilogue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  rtx x;
+
+  /* Try to use the trivial return first.  Else use the full epilogue.  */
+  if (pa_can_use_return_insn ())
+    x = gen_return ();
+  else
+    {
+      pa_expand_epilogue ();
+
+      /* EH returns bypass the normal return stub.  Thus, we must do an
+	 interspace branch to return from functions that call eh_return.
+	 This is only a problem for returns from shared code on ports
+	 using space registers.  */
+      if (!TARGET_NO_SPACE_REGS
+	  && !TARGET_PA_20
+	  && flag_pic && crtl->calls_eh_return)
+	x = gen_return_external_pic ();
+      else
+	x = gen_return_internal ();
+    }
+  emit_jump_insn (x);
+  DONE;
+}")
+
+; Used by hppa_profile_hook to load the starting address of the current
+; function; operand 1 contains the address of the label in operand 3
+(define_insn "load_offset_label_address"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (minus:SI (match_operand:SI 2 "" "")
+			   (label_ref:SI (match_operand 3 "" "")))))]
+  ""
+  "ldo %2-%l3(%1),%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+; Output a code label and load its address.
+(define_insn "lcla1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (label_ref:SI (match_operand 1 "" "")))
+   (const_int 0)]
+  "!TARGET_PA_20"
+  "*
+{
+  output_asm_insn (\"bl .+8,%0\;depi 0,31,2,%0\", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+                                     CODE_LABEL_NUMBER (operands[1]));
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "lcla2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (label_ref:SI (match_operand 1 "" "")))
+   (const_int 0)]
+  "TARGET_PA_20"
+  "*
+{
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+                                     CODE_LABEL_NUMBER (operands[1]));
+  return \"mfia %0\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 2)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  /* An unconditional branch which can reach its target.  */
+  if (get_attr_length (insn) < 16)
+    return \"b%* %l0\";
+
+  return pa_output_lbranch (operands[0], insn, 1);
+}"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "pa_combine_type" "uncond_branch")
+   (set (attr "length")
+    (cond [(match_test "pa_jump_in_call_delay (insn)")
+	   (if_then_else (lt (abs (minus (match_dup 0)
+					 (plus (pc) (const_int 8))))
+			     (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (const_int 8))
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 4)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 20)
+	   (not (match_test "flag_pic"))
+	   (const_int 16)]
+	  (const_int 24)))])
+
+;;; Hope this is only within a function...
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "r"))]
+  "GET_MODE (operands[0]) == word_mode"
+  "bv%* %%r0(%0)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;;; An indirect jump can be optimized to a direct jump.  GAS for the
+;;; SOM target doesn't allow branching to a label inside a function.
+;;; We also don't correctly compute branch distances for labels
+;;; outside the current function.  Thus, we use an indirect jump can't
+;;; be optimized to a direct jump for all targets.  We assume that
+;;; the branch target is in the same space (i.e., nested function
+;;; jumping to a label in an outer function in the same translation
+;;; unit).
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  rtx lab = operands[1];
+  rtx stack = operands[2];
+  rtx fp = operands[3];
+
+  lab = copy_to_reg (lab);
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  /* Restore the frame pointer.  The virtual_stack_vars_rtx is saved
+     instead of the hard_frame_pointer_rtx in the save area.  As a
+     result, an extra instruction is needed to adjust for the offset
+     of the virtual stack variables and the hard frame pointer.  */
+  if (GET_CODE (fp) != REG)
+    fp = force_reg (Pmode, fp);
+  emit_move_insn (hard_frame_pointer_rtx, plus_constant (Pmode, fp, -8));
+
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* Nonlocal goto jumps are only used between functions in the same
+     translation unit.  Thus, we can avoid the extra overhead of an
+     interspace jump.  */
+  emit_jump_insn (gen_indirect_goto (lab));
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn "indirect_goto"
+  [(unspec [(match_operand 0 "register_operand" "=r")] UNSPEC_GOTO)]
+  "GET_MODE (operands[0]) == word_mode"
+  "bv%* %%r0(%0)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; Subroutines of "casesi".
+;; operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "const_int_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != REG)
+    operands[0] = force_reg (SImode, operands[0]);
+
+  if (operands[1] != const0_rtx)
+    {
+      rtx index = gen_reg_rtx (SImode);
+
+      operands[1] = gen_int_mode (-INTVAL (operands[1]), SImode);
+      if (!INT_14_BITS (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_addsi3 (index, operands[0], operands[1]));
+      operands[0] = index;
+    }
+
+  if (!INT_5_BITS (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+
+  /* This branch prevents us finding an insn for the delay slot of the
+     following vectored branch.  It might be possible to use the delay
+     slot if an index value of -1 was used to transfer to the out-of-range
+     label.  In order to do this, we would have to output the -1 vector
+     element after the delay insn.  The casesi output code would have to
+     check if the casesi insn is in a delay branch sequence and output
+     the delay insn if one is found.  If this was done, then it might
+     then be worthwhile to split the casesi patterns to improve scheduling.
+     However, it's not clear that all this extra complexity is worth
+     the effort.  */
+  {
+    rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+    emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
+  }
+
+  /* In 64bit mode we must make sure to wipe the upper bits of the register
+     just in case the addition overflowed or we had random bits in the
+     high part of the register.  */
+  if (TARGET_64BIT)
+    {
+      rtx index = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendsidi2 (index, operands[0]));
+      operands[0] = index;
+    }
+
+  if (TARGET_64BIT)
+    emit_jump_insn (gen_casesi64p (operands[0], operands[3]));
+  else if (flag_pic)
+    emit_jump_insn (gen_casesi32p (operands[0], operands[3]));
+  else
+    emit_jump_insn (gen_casesi32 (operands[0], operands[3]));
+  DONE;
+}")
+
+;;; 32-bit code, absolute branch table.
+(define_insn "casesi32"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "!flag_pic"
+  "ldil L'%l1,%2\;ldo R'%l1(%2),%2\;{ldwx|ldw},s %0(%2),%2\;bv,n %%r0(%2)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+;;; 32-bit code, relative branch table.
+(define_insn "casesi32p"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:SI 2 "=&r"))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "flag_pic"
+  "{bl .+8,%2\;depi 0,31,2,%2|mfia %2}\;ldo {%l1-.|%l1+4-.}(%2),%2\;\
+{ldwx|ldw},s %0(%2),%3\;{addl|add,l} %2,%3,%3\;bv,n %%r0(%3)"
+  [(set_attr "type" "multi")
+   (set (attr "length")
+     (if_then_else (match_test "TARGET_PA_20")
+	(const_int 20)
+	(const_int 24)))])
+
+;;; 64-bit code, 32-bit relative branch table.
+(define_insn "casesi64p"
+  [(set (pc) (mem:DI (plus:DI
+		       (mult:DI (match_operand:DI 0 "register_operand" "r")
+				(const_int 8))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:DI 2 "=&r"))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  ""
+  "mfia %2\;ldo %l1+4-.(%2),%2\;ldw,s %0(%2),%3\;extrd,s %3,63,32,%3\;\
+add,l %2,%3,%3\;bv,n %%r0(%3)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+
+;; Call patterns.
+;;- jump to subroutine
+
+(define_expand "call"
+  [(parallel [(call (match_operand:SI 0 "" "")
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 2))])]
+  ""
+  "
+{
+  rtx op;
+  rtx nb = operands[1];
+
+  if (TARGET_PORTABLE_RUNTIME)
+    op = force_reg (SImode, XEXP (operands[0], 0));
+  else
+    {
+      op = XEXP (operands[0], 0);
+
+      /* Generate indirect long calls to non-local functions. */
+      if (!TARGET_64BIT && TARGET_LONG_CALLS && GET_CODE (op) == SYMBOL_REF)
+	{
+	  tree call_decl = SYMBOL_REF_DECL (op);
+	  if (!(call_decl && targetm.binds_local_p (call_decl)))
+	    op = force_reg (word_mode, op);
+	}
+    }
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Use two different patterns for calls to explicitly named functions
+     and calls through function pointers.  This is necessary as these two
+     types of calls use different calling conventions, and CSE might try
+     to change the named call into an indirect call in some cases (using
+     two patterns keeps CSE from performing this optimization).
+     
+     We now use even more call patterns as there was a subtle bug in
+     attempting to restore the pic register after a call using a simple
+     move insn.  During reload, a instruction involving a pseudo register
+     with no explicit dependence on the PIC register can be converted
+     to an equivalent load from memory using the PIC register.  If we
+     emit a simple move to restore the PIC register in the initial rtl
+     generation, then it can potentially be repositioned during scheduling.
+     and an instruction that eventually uses the PIC register may end up
+     between the call and the PIC register restore.
+     
+     This only worked because there is a post call group of instructions
+     that are scheduled with the call.  These instructions are included
+     in the same basic block as the call.  However, calls can throw in
+     C++ code and a basic block has to terminate at the call if the call
+     can throw.  This results in the PIC register restore being scheduled
+     independently from the call.  So, we now hide the save and restore
+     of the PIC register in the call pattern until after reload.  Then,
+     we split the moves out.  A small side benefit is that we now don't
+     need to have a use of the PIC register in the return pattern and
+     the final save/restore operation is not needed.
+     
+     I elected to just use register %r4 in the PIC patterns instead
+     of trying to force hppa_pic_save_rtx () to a callee saved register.
+     This might have required a new register class and constraint.  It
+     was also simpler to just handle the restore from a register than a
+     generic pseudo.  */
+  if (TARGET_64BIT)
+    {
+      rtx r4 = gen_rtx_REG (word_mode, 4);
+      if (GET_CODE (op) == SYMBOL_REF)
+	emit_call_insn (gen_call_symref_64bit (op, nb, r4));
+      else
+	{
+	  op = force_reg (word_mode, op);
+	  emit_call_insn (gen_call_reg_64bit (op, nb, r4));
+	}
+    }
+  else
+    {
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_symref_pic (op, nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_symref (op, nb));
+	}
+      else
+	{
+	  rtx tmpreg = gen_rtx_REG (word_mode, 22);
+	  emit_move_insn (tmpreg, force_reg (word_mode, op));
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_reg_pic (nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_reg (nb));
+	}
+    }
+
+  DONE;
+}")
+
+;; We use function calls to set the attribute length of calls and millicode
+;; calls.  This is necessary because of the large variety of call sequences.
+;; Implementing the calculation in rtl is difficult as well as ugly.  As
+;; we need the same calculation in several places, maintenance becomes a
+;; nightmare.
+;;
+;; However, this has a subtle impact on branch shortening.  When the
+;; expression used to set the length attribute of an instruction depends
+;; on a relative address (e.g., pc or a branch address), genattrtab
+;; notes that the insn's length is variable, and attempts to determine a
+;; worst-case default length and code to compute an insn's current length.
+
+;; The use of a function call hides the variable dependence of our calls
+;; and millicode calls.  The result is genattrtab doesn't treat the operation
+;; as variable and it only generates code for the default case using our
+;; function call.  Because of this, calls and millicode calls have a fixed
+;; length in the branch shortening pass, and some branches will use a longer
+;; code sequence than necessary.  However, the length of any given call
+;; will still reflect its final code location and it may be shorter than
+;; the initial length estimate.
+
+;; It's possible to trick genattrtab by adding an expression involving `pc'
+;; in the set.  However, when genattrtab hits a function call in its attempt
+;; to compute the default length, it marks the result as unknown and sets
+;; the default result to MAX_INT ;-(  One possible fix that would allow
+;; calls to participate in branch shortening would be to make the call to
+;; insn_default_length a target option.  Then, we could massage unknown
+;; results.  Another fix might be to change genattrtab so that it just does
+;; the call in the variable case as it already does for the fixed case.
+
+(define_insn "call_symref"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+(define_insn "call_symref_pic"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (match_operand 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])
+   (set (reg:SI 19) (match_dup 2))]
+  "")
+
+(define_insn "*call_symref_pic_post_reload"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_symref_64bit"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (match_operand 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])
+   (set (reg:DI 27) (match_dup 2))]
+  "")
+
+(define_insn "*call_symref_64bit_post_reload"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+(define_insn "call_reg"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return pa_output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_reg_pic"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (match_operand 1))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(call (mem:SI (reg:SI 22))
+		    (match_operand 0 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 1))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 1) (reg:SI 19))
+   (parallel [(call (mem:SI (reg:SI 22))
+		    (match_dup 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(call (mem:SI (reg:SI 22))
+		    (match_operand 0 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 1))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 1) (reg:SI 19))
+   (parallel [(call (mem:SI (reg:SI 22))
+		    (match_dup 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (match_dup 1))]
+  "")
+
+(define_insn "*call_reg_pic_post_reload"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return pa_output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_reg_64bit"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (match_operand 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "register_operand" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "register_operand" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (match_dup 2))]
+  "")
+
+(define_insn "*call_reg_64bit_post_reload"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_indirect_call (insn, operands[0]);
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)]
+	      (symbol_ref "pa_attr_length_indirect_call (insn)")))])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI 2))])]
+  ""
+{
+  rtx op;
+  rtx dst = operands[0];
+  rtx nb = operands[2];
+  bool call_powf = false;
+
+  if (TARGET_PORTABLE_RUNTIME)
+    op = force_reg (SImode, XEXP (operands[1], 0));
+  else
+    {
+      op = XEXP (operands[1], 0);
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  /* Handle special call to buggy powf function.  */
+	  if (TARGET_HPUX && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT
+	      && !strcmp (targetm.strip_name_encoding (XSTR (op, 0)), "powf"))
+	    call_powf = true;
+
+	  /* Generate indirect long calls to non-local functions. */
+	  else if (!TARGET_64BIT && TARGET_LONG_CALLS)
+	    {
+	      tree call_decl = SYMBOL_REF_DECL (op);
+	      if (!(call_decl && targetm.binds_local_p (call_decl)))
+		op = force_reg (word_mode, op);
+	    }
+	}
+    }
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Use two different patterns for calls to explicitly named functions
+     and calls through function pointers.  This is necessary as these two
+     types of calls use different calling conventions, and CSE might try
+     to change the named call into an indirect call in some cases (using
+     two patterns keeps CSE from performing this optimization).
+
+     We now use even more call patterns as there was a subtle bug in
+     attempting to restore the pic register after a call using a simple
+     move insn.  During reload, a instruction involving a pseudo register
+     with no explicit dependence on the PIC register can be converted
+     to an equivalent load from memory using the PIC register.  If we
+     emit a simple move to restore the PIC register in the initial rtl
+     generation, then it can potentially be repositioned during scheduling.
+     and an instruction that eventually uses the PIC register may end up
+     between the call and the PIC register restore.
+     
+     This only worked because there is a post call group of instructions
+     that are scheduled with the call.  These instructions are included
+     in the same basic block as the call.  However, calls can throw in
+     C++ code and a basic block has to terminate at the call if the call
+     can throw.  This results in the PIC register restore being scheduled
+     independently from the call.  So, we now hide the save and restore
+     of the PIC register in the call pattern until after reload.  Then,
+     we split the moves out.  A small side benefit is that we now don't
+     need to have a use of the PIC register in the return pattern and
+     the final save/restore operation is not needed.
+     
+     I elected to just use register %r4 in the PIC patterns instead
+     of trying to force hppa_pic_save_rtx () to a callee saved register.
+     This might have required a new register class and constraint.  It
+     was also simpler to just handle the restore from a register than a
+     generic pseudo.  */
+  if (TARGET_64BIT)
+    {
+      rtx r4 = gen_rtx_REG (word_mode, 4);
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (call_powf)
+	    emit_call_insn (gen_call_val_powf_64bit (dst, op, nb, r4));
+	  else
+	    emit_call_insn (gen_call_val_symref_64bit (dst, op, nb, r4));
+	}
+      else
+	{
+	  op = force_reg (word_mode, op);
+	  emit_call_insn (gen_call_val_reg_64bit (dst, op, nb, r4));
+	}
+    }
+  else
+    {
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+
+	      if (call_powf)
+		emit_call_insn (gen_call_val_powf_pic (dst, op, nb, r4));
+	      else
+		emit_call_insn (gen_call_val_symref_pic (dst, op, nb, r4));
+	    }
+	  else
+	    {
+	      if (call_powf)
+		emit_call_insn (gen_call_val_powf (dst, op, nb));
+	      else
+		emit_call_insn (gen_call_val_symref (dst, op, nb));
+	    }
+	}
+      else
+	{
+	  rtx tmpreg = gen_rtx_REG (word_mode, 22);
+	  emit_move_insn (tmpreg, force_reg (word_mode, op));
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_val_reg_pic (dst, nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_val_reg (dst, nb));
+	}
+    }
+
+  DONE;
+})
+
+(define_insn "call_val_symref"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+;; powf function clobbers %fr12
+(define_insn "call_val_powf"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:DF 48))
+   (use (const_int 1))]
+  "TARGET_HPUX && !TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+(define_insn "call_val_symref_pic"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (match_operand 3))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 3))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 3))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])
+   (set (reg:SI 19) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_symref_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+;; powf function clobbers %fr12
+(define_insn "call_val_powf_pic"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:DF 48))
+   (clobber (match_operand 3))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "TARGET_HPUX && !TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:DF 48))
+	      (clobber (match_operand 3))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "TARGET_HPUX && !TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:DF 48))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:DF 48))
+	      (clobber (match_operand 3))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "TARGET_HPUX && !TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:DF 48))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_powf_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:DF 48))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "TARGET_HPUX && !TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_symref_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (match_operand 3))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])
+   (set (reg:DI 27) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_symref_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+;; powf function clobbers %fr12
+(define_insn "call_val_powf_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (reg:DF 40))
+   (clobber (match_operand 3))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT && TARGET_HPUX"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DF 40))
+	      (clobber (match_operand 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && TARGET_HPUX && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DF 40))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DF 40))
+	      (clobber (match_operand 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && TARGET_HPUX && reload_completed"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DF 40))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_powf_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (reg:DF 40))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT && TARGET_HPUX"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 0)")))])
+
+(define_insn "call_val_reg"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return pa_output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_reg_pic"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (match_operand 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (reg:SI 22))
+			 (match_operand 1 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (reg:SI 22))
+			 (match_dup 1)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (reg:SI 22))
+			 (match_operand 1 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (match_operand 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (reg:SI 22))
+			 (match_dup 1)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (match_dup 2))]
+  "")
+
+(define_insn "*call_val_reg_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return pa_output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_reg_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "r"))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (match_operand 3))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand:DI 1 "register_operand" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand:DI 1 "register_operand" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (match_operand 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_reg_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "r"))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return pa_output_indirect_call (insn, operands[1]);
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)]
+	      (symbol_ref "pa_attr_length_indirect_call (insn)")))])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+(define_expand "sibcall"
+  [(call (match_operand:SI 0 "" "")
+	 (match_operand 1 "" ""))]
+  "!TARGET_PORTABLE_RUNTIME"
+  "
+{
+  rtx op, call_insn;
+  rtx nb = operands[1];
+
+  op = XEXP (operands[0], 0);
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Indirect sibling calls are not allowed.  */
+  if (TARGET_64BIT)
+    call_insn = gen_sibcall_internal_symref_64bit (op, operands[1]);
+  else
+    call_insn = gen_sibcall_internal_symref (op, operands[1]);
+
+  call_insn = emit_call_insn (call_insn);
+
+  if (TARGET_64BIT)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
+
+  /* We don't have to restore the PIC register.  */
+  if (flag_pic)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+
+  DONE;
+}")
+
+(define_insn "sibcall_internal_symref"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[0], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 1)")))])
+
+(define_insn "sibcall_internal_symref_64bit"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (use (reg:DI 2))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[0], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 1)")))])
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))]
+  "!TARGET_PORTABLE_RUNTIME"
+  "
+{
+  rtx op, call_insn;
+  rtx nb = operands[1];
+
+  op = XEXP (operands[1], 0);
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Indirect sibling calls are not allowed.  */
+  if (TARGET_64BIT)
+    call_insn
+      = gen_sibcall_value_internal_symref_64bit (operands[0], op, operands[2]);
+  else
+    call_insn
+      = gen_sibcall_value_internal_symref (operands[0], op, operands[2]);
+
+  call_insn = emit_call_insn (call_insn);
+
+  if (TARGET_64BIT)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
+
+  /* We don't have to restore the PIC register.  */
+  if (flag_pic)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+
+  DONE;
+}")
+
+(define_insn "sibcall_value_internal_symref"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 1)")))])
+
+(define_insn "sibcall_value_internal_symref_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (use (reg:DI 2))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  pa_output_arg_descriptor (insn);
+  return pa_output_call (insn, operands[1], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "pa_attr_length_call (insn, 1)")))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+;; These are just placeholders so we know where branch tables
+;; begin and end.
+(define_insn "begin_brtab"
+  [(const_int 1)]
+  ""
+  "*
+{
+  /* Only GAS actually supports this pseudo-op.  */
+  if (TARGET_GAS)
+    return \".begin_brtab\";
+  else
+    return \"\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "0")])
+
+(define_insn "end_brtab"
+  [(const_int 2)]
+  ""
+  "*
+{
+  /* Only GAS actually supports this pseudo-op.  */
+  if (TARGET_GAS)
+    return \".end_brtab\";
+  else
+    return \"\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "0")])
+
+;;; EH does longjmp's from and within the data section.  Thus,
+;;; an interspace branch is required for the longjmp implementation.
+;;; Registers r1 and r2 are used as scratch registers for the jump
+;;; when necessary.
+(define_expand "interspace_jump"
+  [(parallel
+     [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+      (clobber (match_dup 1))])]
+  ""
+  "
+{
+  operands[1] = gen_rtx_REG (word_mode, 2);
+}")
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "TARGET_PA_20 && !TARGET_64BIT"
+  "bve%* (%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "TARGET_NO_SPACE_REGS && !TARGET_64BIT"
+  "be%* 0(%%sr4,%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "!TARGET_64BIT"
+  "ldsid (%%sr0,%0),%%r2\;mtsp %%r2,%%sr0\;be%* 0(%%sr0,%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:DI 2))]
+  "TARGET_64BIT"
+  "bve%* (%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_expand "builtin_longjmp"
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")] UNSPECV_LONGJMP)]
+  ""
+  "
+{
+  /* The elements of the buffer are, in order:  */
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0],
+			 POINTER_SIZE / BITS_PER_UNIT));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0],
+			   (POINTER_SIZE * 2) / BITS_PER_UNIT));
+  rtx pv = gen_rtx_REG (Pmode, 1);
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  /* Restore the frame pointer.  The virtual_stack_vars_rtx is saved
+     instead of the hard_frame_pointer_rtx in the save area.  We need
+     to adjust for the offset between these two values.  */
+  if (GET_CODE (fp) != REG)
+    fp = force_reg (Pmode, fp);
+  emit_move_insn (hard_frame_pointer_rtx, plus_constant (Pmode, fp, -8));
+
+  /* This bit is the same as expand_builtin_longjmp.  */
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* Load the label we are jumping through into r1 so that we know
+     where to look for it when we get back to setjmp's function for
+     restoring the gp.  */
+  emit_move_insn (pv, lab);
+
+  /* Prevent the insns above from being scheduled into the delay slot
+     of the interspace jump because the space register could change.  */
+  emit_insn (gen_blockage ());
+
+  emit_jump_insn (gen_interspace_jump (pv));
+  emit_barrier ();
+  DONE;
+}")
+
+;;; Operands 2 and 3 are assumed to be CONST_INTs.
+(define_expand "extzvsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "uint5_operand" "")
+			 (match_operand:SI 3 "uint5_operand" "")))]
+  ""
+  "
+{
+  unsigned HOST_WIDE_INT len = UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[3]);
+
+  /* PA extraction insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, the predicate
+     rejects lengths equal to a word as they are better handled by
+     the move patterns.  */
+  if (len == 0 || pos + len > 32)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[1], VOIDmode))
+    FAIL;
+
+  emit_insn (gen_extzv_32 (operands[0], operands[1],
+			   operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extzv_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "uint5_operand" "")
+			 (match_operand:SI 3 "uint5_operand" "")))]
+  "UINTVAL (operands[2]) > 0
+   && UINTVAL (operands[2]) + UINTVAL (operands[3]) <= 32"
+  "{extru|extrw,u} %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:SI 2 "register_operand" "q")))]
+  ""
+  "{vextru %1,1,%0|extrw,u %1,%%sar,1,%0}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "extzvdi"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "")
+			 (match_operand:DI 2 "uint6_operand" "")
+			 (match_operand:DI 3 "uint6_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  unsigned HOST_WIDE_INT len = UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[3]);
+
+  /* PA extraction insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, the predicate
+     rejects lengths equal to a doubleword as they are better handled by
+     the move patterns.  */
+  if (len == 0 || pos + len > 64)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[1], VOIDmode))
+    FAIL;
+
+  emit_insn (gen_extzv_64 (operands[0], operands[1],
+			   operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extzv_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "uint6_operand" "")
+			 (match_operand:DI 3 "uint6_operand" "")))]
+  "TARGET_64BIT
+   && UINTVAL (operands[2]) > 0
+   && UINTVAL (operands[2]) + UINTVAL (operands[3]) <= 64"
+  "extrd,u %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "register_operand" "q")))]
+  "TARGET_64BIT"
+  "extrd,u %1,%%sar,1,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;;; Operands 2 and 3 are assumed to be CONST_INTs.
+(define_expand "extvsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "uint5_operand" "")
+			 (match_operand:SI 3 "uint5_operand" "")))]
+  ""
+  "
+{
+  unsigned HOST_WIDE_INT len = UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[3]);
+
+  /* PA extraction insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, the predicate
+     rejects lengths equal to a word as they are better handled by
+     the move patterns.  */
+  if (len == 0 || pos + len > 32)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[1], VOIDmode))
+    FAIL;
+
+  emit_insn (gen_extv_32 (operands[0], operands[1],
+			  operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extv_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "uint5_operand" "")
+			 (match_operand:SI 3 "uint5_operand" "")))]
+  "UINTVAL (operands[2]) > 0
+   && UINTVAL (operands[2]) + UINTVAL (operands[3]) <= 32"
+  "{extrs|extrw,s} %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:SI 2 "register_operand" "q")))]
+  "!TARGET_64BIT"
+  "{vextrs %1,1,%0|extrw,s %1,%%sar,1,%0}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "extvdi"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extract:DI (match_operand:DI 1 "register_operand" "")
+			 (match_operand:DI 2 "uint6_operand" "")
+			 (match_operand:DI 3 "uint6_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  unsigned HOST_WIDE_INT len = UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[3]);
+
+  /* PA extraction insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, the predicate
+     rejects lengths equal to a doubleword as they are better handled by
+     the move patterns.  */
+  if (len == 0 || pos + len > 64)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[1], VOIDmode))
+    FAIL;
+
+  emit_insn (gen_extv_64 (operands[0], operands[1],
+			  operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extv_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "uint6_operand" "")
+			 (match_operand:DI 3 "uint6_operand" "")))]
+  "TARGET_64BIT
+   && UINTVAL (operands[2]) > 0
+   && UINTVAL (operands[2]) + UINTVAL (operands[3]) <= 64"
+  "extrd,s %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "register_operand" "q")))]
+  "TARGET_64BIT"
+  "extrd,s %1,%%sar,1,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;;; Operands 1 and 2 are assumed to be CONST_INTs.
+(define_expand "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "uint5_operand" "")
+			 (match_operand:SI 2 "uint5_operand" ""))
+	(match_operand:SI 3 "arith5_operand" ""))]
+  ""
+  "
+{
+  unsigned HOST_WIDE_INT len = UINTVAL (operands[1]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
+
+  /* PA insertion insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, the predicate
+     rejects lengths equal to a word as they are better handled by
+     the move patterns.  */
+  if (len <= 0 || pos + len > 32)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our destination
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[0], VOIDmode))
+    FAIL;
+
+  emit_insn (gen_insv_32 (operands[0], operands[1],
+			  operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "insv_32"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r,r")
+			 (match_operand:SI 1 "uint5_operand" "")
+			 (match_operand:SI 2 "uint5_operand" ""))
+	(match_operand:SI 3 "arith5_operand" "r,L"))]
+  "UINTVAL (operands[1]) > 0
+   && UINTVAL (operands[1]) + UINTVAL (operands[2]) <= 32"
+  "@
+   {dep|depw} %3,%2+%1-1,%1,%0
+   {depi|depwi} %3,%2+%1-1,%1,%0"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+;; Optimize insertion of const_int values of type 1...1xxxx.
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "uint5_operand" "")
+			 (match_operand:SI 2 "uint5_operand" ""))
+	(match_operand:SI 3 "const_int_operand" ""))]
+  "(INTVAL (operands[3]) & 0x10) != 0 &&
+   (~INTVAL (operands[3]) & ((1L << INTVAL (operands[1])) - 1) & ~0xf) == 0"
+  "*
+{
+  operands[3] = GEN_INT ((INTVAL (operands[3]) & 0xf) - 0x10);
+  return \"{depi|depwi} %3,%2+%1-1,%1,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "insvdi"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (match_operand:DI 1 "uint6_operand" "")
+			 (match_operand:DI 2 "uint6_operand" ""))
+	(match_operand:DI 3 "arith5_operand" ""))]
+  "TARGET_64BIT"
+  "
+{
+  unsigned HOST_WIDE_INT len = UINTVAL (operands[1]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
+
+  /* PA insertion insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, the predicate
+     rejects lengths equal to a doubleword as they are better handled by
+     the move patterns.  */
+  if (len <= 0 || pos + len > 64)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our destination
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[0], VOIDmode))
+    FAIL;
+
+  emit_insn (gen_insv_64 (operands[0], operands[1],
+			  operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "insv_64"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r,r")
+			 (match_operand:DI 1 "uint6_operand" "")
+			 (match_operand:DI 2 "uint6_operand" ""))
+	(match_operand:DI 3 "arith5_operand" "r,L"))]
+  "TARGET_64BIT
+   && UINTVAL (operands[1]) > 0
+   && UINTVAL (operands[1]) + UINTVAL (operands[2]) <= 64"
+  "@
+   depd %3,%2+%1-1,%1,%0
+   depdi %3,%2+%1-1,%1,%0"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+;; Optimize insertion of const_int values of type 1...1xxxx.
+(define_insn ""
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "uint6_operand" "")
+			 (match_operand:DI 2 "uint6_operand" ""))
+	(match_operand:DI 3 "const_int_operand" ""))]
+  "(INTVAL (operands[3]) & 0x10) != 0
+   && TARGET_64BIT
+   && (~INTVAL (operands[3]) & ((1L << INTVAL (operands[1])) - 1) & ~0xf) == 0"
+  "*
+{
+  operands[3] = GEN_INT ((INTVAL (operands[3]) & 0xf) - 0x10);
+  return \"depdi %3,%2+%1-1,%1,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		   (const_int 32)))]
+  "TARGET_64BIT"
+  "depd,z %1,31,32,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;; This insn is used for some loop tests, typically loops reversed when
+;; strength reduction is used.  It is actually created when the instruction
+;; combination phase combines the special loop test.  Since this insn
+;; is both a jump insn and has an output, it must deal with its own
+;; reloads, hence the `m' constraints.  The `!' constraints direct reload
+;; to not choose the register alternatives in the event a reload is needed.
+(define_insn "decrement_and_branch_until_zero"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 2 "comparison_operator"
+	   [(plus:SI
+	      (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m")
+	      (match_operand:SI 1 "int5_operand" "L,L,L"))
+	    (const_int 0)])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))
+   (clobber (match_scratch:SI 4 "=X,r,r"))]
+  ""
+  "* return pa_output_dbra (operands, insn, which_alternative); "
+;; Do not expect to understand this the first time through.
+[(set_attr "type" "cbranch,multi,multi")
+ (set (attr "length")
+      (if_then_else (eq_attr "alternative" "0")
+;; Loop counter in register case
+;; Short branch has length of 4
+;; Long branch has length of 8, 20, 24 or 28
+	(cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28))
+
+;; Loop counter in FP reg case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (eq_attr "alternative" "1")
+	  (if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 24))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 24)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 24))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 28)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 44)
+		    (not (match_test "flag_pic"))
+		    (const_int 40)]
+		  (const_int 48))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 24)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 28)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 44)
+		    (not (match_test "flag_pic"))
+		    (const_int 40)]
+		  (const_int 48)))
+
+;; Loop counter in memory case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 32)
+		    (not (match_test "flag_pic"))
+		    (const_int 28)]
+		  (const_int 36))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 32)
+		    (not (match_test "flag_pic"))
+		    (const_int 28)]
+		  (const_int 36))))))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	  (match_operator 2 "movb_comparison_operator"
+	   [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+	(match_dup 1))]
+  ""
+"* return pa_output_movb (operands, insn, which_alternative, 0); "
+;; Do not expect to understand this the first time through.
+[(set_attr "type" "cbranch,multi,multi,multi")
+ (set (attr "length")
+      (if_then_else (eq_attr "alternative" "0")
+;; Loop counter in register case
+;; Short branch has length of 4
+;; Long branch has length of 8, 20, 24 or 28
+        (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28))
+
+;; Loop counter in FP reg case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (eq_attr "alternative" "1")
+	  (if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 32)
+		    (not (match_test "flag_pic"))
+		    (const_int 28)]
+		  (const_int 36))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 32)
+		    (not (match_test "flag_pic"))
+		    (const_int 28)]
+		  (const_int 36)))
+
+;; Loop counter in memory or sar case.
+;; Extra goo to deal with additional reload insns.
+	(cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		   (const_int MAX_12BIT_OFFSET))
+		(const_int 8)
+		(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		  (const_int MAX_17BIT_OFFSET))
+		(const_int 12)
+		(match_test "TARGET_PORTABLE_RUNTIME")
+		(const_int 28)
+		(not (match_test "flag_pic"))
+		(const_int 24)]
+	      (const_int 32)))))])
+
+;; Handle negated branch.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	  (match_operator 2 "movb_comparison_operator"
+	   [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+	(match_dup 1))]
+  ""
+"* return pa_output_movb (operands, insn, which_alternative, 1); "
+;; Do not expect to understand this the first time through.
+[(set_attr "type" "cbranch,multi,multi,multi")
+ (set (attr "length")
+      (if_then_else (eq_attr "alternative" "0")
+;; Loop counter in register case
+;; Short branch has length of 4
+;; Long branch has length of 8
+        (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28))
+
+;; Loop counter in FP reg case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (eq_attr "alternative" "1")
+	  (if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 32)
+		    (not (match_test "flag_pic"))
+		    (const_int 28)]
+		  (const_int 36))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (match_test "TARGET_PORTABLE_RUNTIME")
+		    (const_int 32)
+		    (not (match_test "flag_pic"))
+		    (const_int 28)]
+		  (const_int 36)))
+
+;; Loop counter in memory or SAR case.
+;; Extra goo to deal with additional reload insns.
+	(cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		   (const_int MAX_12BIT_OFFSET))
+		(const_int 8)
+		(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		  (const_int MAX_17BIT_OFFSET))
+		(const_int 12)
+		(match_test "TARGET_PORTABLE_RUNTIME")
+		(const_int 28)
+		(not (match_test "flag_pic"))
+		(const_int 24)]
+	      (const_int 32)))))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 3 "" "" )))
+   (set (match_operand:SI 0 "ireg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "ireg_operand" "r")
+		 (match_operand:SI 2 "ireg_or_int5_operand" "rL")))]
+  "(reload_completed && operands[0] == operands[1]) || operands[0] == operands[2]"
+  "*
+{
+  return pa_output_parallel_addb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:SF 0 "ireg_operand" "=r")
+	(match_operand:SF 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return pa_output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:SI 0 "ireg_operand" "=r")
+	(match_operand:SI 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return pa_output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:HI 0 "ireg_operand" "=r")
+	(match_operand:HI 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return pa_output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:QI 0 "ireg_operand" "=r")
+	(match_operand:QI 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return pa_output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (match_test "TARGET_PORTABLE_RUNTIME")
+	   (const_int 24)
+	   (not (match_test "flag_pic"))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))
+   (set (match_operand 3 "register_operand" "+f")
+	(plus (match_operand 4 "register_operand" "f")
+	      (match_operand 5 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && pa_fmpyaddoperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,dbl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,dbl %1,%2,%0,%5,%3\";
+    }
+  else
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,sgl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,sgl %1,%2,%0,%5,%3\";
+    }
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand 3 "register_operand" "+f")
+	(plus (match_operand 4 "register_operand" "f")
+	      (match_operand 5 "register_operand" "f")))
+   (set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && pa_fmpyaddoperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,dbl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,dbl %1,%2,%0,%5,%3\";
+    }
+  else
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,sgl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,sgl %1,%2,%0,%5,%3\";
+    }
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))
+   (set (match_operand 3 "register_operand" "+f")
+	(minus (match_operand 4 "register_operand" "f")
+	       (match_operand 5 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && pa_fmpysuboperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
+  else
+    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand 3 "register_operand" "+f")
+	(minus (match_operand 4 "register_operand" "f")
+	       (match_operand 5 "register_operand" "f")))
+   (set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && pa_fmpysuboperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
+  else
+    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; The following two patterns are used by the trampoline code for nested
+;; functions.  They flush the I and D cache lines from the start address
+;; (operand0) to the end address (operand1).  No lines are flushed if the
+;; end address is less than the start address (unsigned).
+;;
+;; Because the range of memory flushed is variable and the size of a MEM
+;; can only be a CONST_INT, the patterns specify that they perform an
+;; unspecified volatile operation on all memory.
+;;
+;; The address range for an icache flush must lie within a single
+;; space on targets with non-equivalent space registers.
+;;
+;; Operand 0 contains the start address.
+;; Operand 1 contains the end address.
+;; Operand 2 contains the line length to use.
+(define_insn "dcacheflush<P:mode>"
+  [(const_int 1)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_DCACHE)
+   (use (match_operand 0 "pmode_register_operand" "r"))
+   (use (match_operand 1 "pmode_register_operand" "r"))
+   (use (match_operand 2 "pmode_register_operand" "r"))
+   (clobber (match_scratch:P 3 "=&0"))]
+  ""
+  "cmpb,<dwc><<=,n %3,%1,.\;fdc,m %2(%3)\;sync"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "icacheflush<P:mode>"
+  [(const_int 2)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE)
+   (use (match_operand 0 "pmode_register_operand" "r"))
+   (use (match_operand 1 "pmode_register_operand" "r"))
+   (use (match_operand 2 "pmode_register_operand" "r"))
+   (clobber (match_operand 3 "pmode_register_operand" "=&r"))
+   (clobber (match_operand 4 "pmode_register_operand" "=&r"))
+   (clobber (match_scratch:P 5 "=&0"))]
+  ""
+  "mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,<dwc><<=,n %5,%1,.\;fic,m %2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop"
+  [(set_attr "type" "multi")
+   (set_attr "length" "52")])
+
+;; An out-of-line prologue.
+(define_insn "outline_prologue_call"
+  [(unspec_volatile [(const_int 0)] UNSPECV_OPC)
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 22))
+   (clobber (reg:SI 21))
+   (clobber (reg:SI 20))
+   (clobber (reg:SI 19))
+   (clobber (reg:SI 1))]
+  ""
+  "*
+{
+ 
+  /* We need two different versions depending on whether or not we
+     need a frame pointer.   Also note that we return to the instruction
+     immediately after the branch rather than two instructions after the
+     break as normally is the case.  */
+  if (frame_pointer_needed)
+    {
+      /* Must import the magic millicode routine(s).  */
+      output_asm_insn (\".IMPORT __outline_prologue_fp,MILLICODE\", NULL);
+
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_prologue_fp,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_prologue_fp(%%sr0,%%r31)\",
+			   NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_prologue_fp,%%r31\", NULL);
+    }
+  else
+    {
+      /* Must import the magic millicode routine(s).  */
+      output_asm_insn (\".IMPORT __outline_prologue,MILLICODE\", NULL);
+
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_prologue,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_prologue(%%sr0,%%r31)\", NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_prologue,%%r31\", NULL);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;; An out-of-line epilogue.
+(define_insn "outline_epilogue_call"
+  [(unspec_volatile [(const_int 1)] UNSPECV_OEC)
+   (use (reg:SI 29))
+   (use (reg:SI 28))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 22))
+   (clobber (reg:SI 21))
+   (clobber (reg:SI 20))
+   (clobber (reg:SI 19))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 1))]
+  ""
+  "*
+{
+
+  /* We need two different versions depending on whether or not we
+     need a frame pointer.   Also note that we return to the instruction
+     immediately after the branch rather than two instructions after the
+     break as normally is the case.  */
+  if (frame_pointer_needed)
+    {
+      /* Must import the magic millicode routine.  */
+      output_asm_insn (\".IMPORT __outline_epilogue_fp,MILLICODE\", NULL);
+
+      /* The out-of-line prologue will make sure we return to the right
+	 instruction.  */
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_epilogue_fp,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_epilogue_fp(%%sr0,%%r31)\",
+			   NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_epilogue_fp,%%r31\", NULL);
+    }
+  else
+    {
+      /* Must import the magic millicode routine.  */
+      output_asm_insn (\".IMPORT __outline_epilogue,MILLICODE\", NULL);
+
+      /* The out-of-line prologue will make sure we return to the right
+	 instruction.  */
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_epilogue,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_epilogue(%%sr0,%%r31)\", NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_epilogue,%%r31\", NULL);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;; Given a function pointer, canonicalize it so it can be 
+;; reliably compared to another function pointer.  */
+(define_expand "canonicalize_funcptr_for_compare"
+  [(set (reg:SI 26) (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC))
+	      (clobber (match_dup 2))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 22))
+	      (clobber (reg:SI 31))])
+   (set (match_operand:SI 0 "register_operand" "")
+	(reg:SI 29))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "
+{
+  if (TARGET_ELF32)
+    {
+      rtx canonicalize_funcptr_for_compare_libfunc
+        = init_one_libfunc (CANONICALIZE_FUNCPTR_FOR_COMPARE_LIBCALL);
+
+      emit_library_call_value (canonicalize_funcptr_for_compare_libfunc,
+      			       operands[0], LCT_NORMAL, Pmode,
+			       1, operands[1], Pmode);
+      DONE;
+    }
+
+  operands[2] = gen_reg_rtx (SImode);
+  if (GET_CODE (operands[1]) != REG)
+    {
+      rtx tmp = gen_reg_rtx (Pmode);
+      emit_move_insn (tmp, operands[1]);
+      operands[1] = tmp;
+    }
+}")
+
+(define_insn "*$$sh_func_adrs"
+  [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 22))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+{
+  int length = get_attr_length (insn);
+  rtx xoperands[2];
+
+  xoperands[0] = GEN_INT (length - 8);
+  xoperands[1] = GEN_INT (length - 16);
+
+  /* Must import the magic millicode routine.  */
+  output_asm_insn (\".IMPORT $$sh_func_adrs,MILLICODE\", NULL);
+
+  /* This is absolutely amazing.
+
+     First, copy our input parameter into %r29 just in case we don't
+     need to call $$sh_func_adrs.  */
+  output_asm_insn (\"copy %%r26,%%r29\", NULL);
+  output_asm_insn (\"{extru|extrw,u} %%r26,31,2,%%r31\", NULL);
+
+  /* Next, examine the low two bits in %r26, if they aren't 0x2, then
+     we use %r26 unchanged.  */
+  output_asm_insn (\"{comib|cmpib},<>,n 2,%%r31,.+%0\", xoperands);
+  output_asm_insn (\"ldi 4096,%%r31\", NULL);
+
+  /* Next, compare %r26 with 4096, if %r26 is less than or equal to
+     4096, then again we use %r26 unchanged.  */
+  output_asm_insn (\"{comb|cmpb},<<,n %%r26,%%r31,.+%1\", xoperands);
+
+  /* Finally, call $$sh_func_adrs to extract the function's real add24.  */
+  return pa_output_millicode_call (insn,
+				   gen_rtx_SYMBOL_REF (SImode,
+						       \"$$sh_func_adrs\"));
+}"
+  [(set_attr "type" "sh_func_adrs")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 28)]
+	      (plus (symbol_ref "pa_attr_length_millicode_call (insn)")
+		    (const_int 20))))])
+
+;; On the PA, the PIC register is call clobbered, so it must
+;; be saved & restored around calls by the caller.  If the call
+;; doesn't return normally (nonlocal goto, or an exception is
+;; thrown), then the code at the exception handler label must
+;; restore the PIC register.
+(define_expand "exception_receiver"
+  [(const_int 4)]
+  "flag_pic"
+  "
+{
+  /* On the 64-bit port, we need a blockage because there is
+     confusion regarding the dependence of the restore on the
+     frame pointer.  As a result, the frame pointer and pic
+     register restores sometimes are interchanged erroneously.  */
+  if (TARGET_64BIT)
+    emit_insn (gen_blockage ());
+  /* Restore the PIC register using hppa_pic_save_rtx ().  The
+     PIC register is not saved in the frame in 64-bit ABI.  */
+  emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+  emit_insn (gen_blockage ());
+  DONE;
+}")
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+  "
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_blockage ());
+  /* Restore the PIC register.  Hopefully, this will always be from
+     a stack slot.  The only registers that are valid after a
+     builtin_longjmp are the stack and frame pointers.  */
+  emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+  emit_insn (gen_blockage ());
+  DONE;
+}")
+
+;; Allocate new stack space and update the saved stack pointer in the
+;; frame marker.  The HP C compilers also copy additional words in the
+;; frame marker.  The 64-bit compiler copies words at -48, -32 and -24.
+;; The 32-bit compiler copies the word at -16 (Static Link).  We
+;; currently don't copy these values.
+;;
+;; Since the copy of the frame marker can't be done atomically, I
+;; suspect that using it for unwind purposes may be somewhat unreliable.
+;; The HP compilers appear to raise the stack and copy the frame
+;; marker in a strict instruction sequence.  This suggests that the
+;; unwind library may check for an alloca sequence when ALLOCA_FRAME
+;; is set in the callinfo data.  We currently don't set ALLOCA_FRAME
+;; as GAS doesn't support it, or try to keep the instructions emitted
+;; here in strict sequence.
+(define_expand "allocate_stack"
+  [(match_operand 0 "" "")
+   (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx addr;
+
+  /* Since the stack grows upward, we need to store virtual_stack_dynamic_rtx
+     in operand 0 before adjusting the stack.  */
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  anti_adjust_stack (operands[1]);
+  if (TARGET_HPUX_UNWIND_LIBRARY)
+    {
+      addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+			   GEN_INT (TARGET_64BIT ? -8 : -4));
+      emit_move_insn (gen_rtx_MEM (word_mode, addr), hard_frame_pointer_rtx);
+    }
+  if (!TARGET_64BIT && flag_pic)
+    {
+      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
+      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
+    }
+  DONE;
+}")
+
+(define_expand "prefetch"
+  [(match_operand 0 "address_operand" "")
+   (match_operand 1 "const_int_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_PA_20"
+{
+  operands[0] = copy_addr_to_reg (operands[0]);
+  emit_insn (gen_prefetch_20 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "prefetch_20"
+  [(prefetch (match_operand 0 "pmode_register_operand" "r")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_PA_20"
+{
+  /* The SL cache-control completer indicates good spatial locality but
+     poor temporal locality.  The ldw instruction with a target of general
+     register 0 prefetches a cache line for a read.  The ldd instruction
+     prefetches a cache line for a write.  */
+  static const char * const instr[2][2] = {
+    {
+      "ldw,sl 0(%0),%%r0",
+      "ldd,sl 0(%0),%%r0"
+    },
+    {
+      "ldw 0(%0),%%r0",
+      "ldd 0(%0),%%r0"
+    }
+  };
+  int read_or_write = INTVAL (operands[1]) == 0 ? 0 : 1;
+  int locality = INTVAL (operands[2]) == 0 ? 0 : 1;
+
+  return instr [locality][read_or_write];
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+;; TLS Support
+(define_insn "tgd_load"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD))
+  (clobber (reg:SI 1))
+  (use (reg:SI 27))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_gdidx$,%%r27\;ldo RR'%1-$tls_gdidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tgd_load_pic"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD_PIC))
+  (clobber (reg:SI 1))
+  (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_gdidx$,%%r19\;ldo RT'%1-$tls_gdidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_load"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM))
+  (clobber (reg:SI 1))
+  (use (reg:SI 27))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_ldidx$,%%r27\;ldo RR'%1-$tls_ldidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_load_pic"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM_PIC))
+  (clobber (reg:SI 1))
+  (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_ldidx$,%%r19\;ldo RT'%1-$tls_ldidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_offset_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] 
+		 	    UNSPEC_TLSLDO)
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI 1))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_dtpoff$,%2\;ldo RR'%1-$tls_dtpoff$(%%r1),%0\"; 
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tp_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  ""
+  "mfctl %%cr27,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_insn "tie_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE))
+   (clobber (reg:SI 1))
+   (use (reg:SI 27))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_ieoff$,%%r27\;ldw RR'%1-$tls_ieoff$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tie_load_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE_PIC))
+   (clobber (reg:SI 1))
+   (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_ieoff$,%%r19\;ldw RT'%1-$tls_ieoff$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tle_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (unspec:SI [(match_operand 1 "tle_symbolic_operand" "")] 
+		 	    UNSPEC_TLSLE)
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI 1))]
+  ""
+  "addil LR'%1-$tls_leoff$,%2\;ldo RR'%1-$tls_leoff$(%%r1),%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
diff --git a/gcc-4.9/gcc/config/pa/pa.opt b/gcc-4.9/gcc/config/pa/pa.opt
new file mode 100644
index 000000000..ebb757824
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa.opt
@@ -0,0 +1,140 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/pa/pa-opts.h
+
+; Which cpu we are scheduling for.
+Variable
+enum processor_type pa_cpu = TARGET_SCHED_DEFAULT
+
+march=1.0
+Target RejectNegative
+Generate PA1.0 code
+
+march=1.1
+Target RejectNegative
+Generate PA1.1 code
+
+march=2.0
+Target RejectNegative
+Generate PA2.0 code (requires binutils 2.10 or later)
+
+mbig-switch
+Target Ignore
+Does nothing.  Preserved for backward compatibility.
+
+mdisable-fpregs
+Target Report Mask(DISABLE_FPREGS)
+Disable FP regs
+
+mdisable-indexing
+Target Report Mask(DISABLE_INDEXING)
+Disable indexed addressing
+
+mfast-indirect-calls
+Target Report Mask(FAST_INDIRECT_CALLS)
+Generate fast indirect calls
+
+mfixed-range=
+Target RejectNegative Joined Var(pa_deferred_options) Defer
+Specify range of registers to make fixed
+
+mgas
+Target Report Mask(GAS)
+Assume code will be assembled by GAS
+
+mjump-in-delay
+Target Report Mask(JUMP_IN_DELAY)
+Put jumps in call delay slots
+
+;; Not used by gcc
+mlinker-opt
+Target RejectNegative
+Enable linker optimizations
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Always generate long calls
+
+mlong-load-store
+Target Report Mask(LONG_LOAD_STORE)
+Emit long load/store sequences
+
+mnosnake
+Target RejectNegative
+Generate PA1.0 code
+
+mno-space-regs
+Target RejectNegative Report Mask(NO_SPACE_REGS)
+Disable space regs
+
+mpa-risc-1-0
+Target RejectNegative
+Generate PA1.0 code
+
+mpa-risc-1-1
+Target RejectNegative Mask(PA_11)
+Generate PA1.1 code
+
+mpa-risc-2-0
+Target RejectNegative Mask(PA_20)
+Generate PA2.0 code (requires binutils 2.10 or later)
+
+mportable-runtime
+Target Report Mask(PORTABLE_RUNTIME)
+Use portable calling conventions
+
+mschedule=
+Target RejectNegative Joined Enum(pa_schedule) Var(pa_cpu)
+Specify CPU for scheduling purposes.  Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000
+
+Enum
+Name(pa_schedule) Type(enum processor_type)
+
+EnumValue
+Enum(pa_schedule) String(8000) Value(PROCESSOR_8000)
+
+EnumValue
+Enum(pa_schedule) String(7100) Value(PROCESSOR_7100)
+
+EnumValue
+Enum(pa_schedule) String(700) Value(PROCESSOR_700)
+
+EnumValue
+Enum(pa_schedule) String(7100LC) Value(PROCESSOR_7100LC)
+
+EnumValue
+Enum(pa_schedule) String(7200) Value(PROCESSOR_7200)
+
+EnumValue
+Enum(pa_schedule) String(7300) Value(PROCESSOR_7300)
+
+msoft-float
+Target Report Mask(SOFT_FLOAT)
+Use software floating point
+
+msnake
+Target RejectNegative
+Generate PA1.1 code
+
+mspace-regs
+Target RejectNegative Report InverseMask(NO_SPACE_REGS)
+Do not disable space regs
diff --git a/gcc-4.9/gcc/config/pa/pa32-linux.h b/gcc-4.9/gcc/config/pa/pa32-linux.h
new file mode 100644
index 000000000..f8ad8e843
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa32-linux.h
@@ -0,0 +1,64 @@
+/* Definitions for PA_RISC with ELF-32 format
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Turn off various SOM crap we don't want.  */
+#undef TARGET_ELF32
+#define TARGET_ELF32 1
+
+/* The libcall __canonicalize_funcptr_for_compare is referenced in
+   crtend.o and the reference isn't resolved in objects that don't
+   compare function pointers.  Thus, we need to play games to provide
+   a reference in crtbegin.o.  The rest of the define is the same
+   as that in crtstuff.c  */
+#define CTOR_LIST_BEGIN \
+  asm (".type __canonicalize_funcptr_for_compare,@function\n"		\
+"	.text\n"							\
+"	.word __canonicalize_funcptr_for_compare-$PIC_pcrel$0");	\
+  STATIC func_ptr __CTOR_LIST__[1]					\
+    __attribute__ ((__used__, section(".ctors"),			\
+		    aligned(sizeof(func_ptr))))				\
+    = { (func_ptr) (-1) }
+
+/* This is a PIC version of CRT_CALL_STATIC_FUNCTION.  The PIC
+   register has to be saved before the call and restored after
+   the call.  We assume that register %r4 is available for this
+   purpose.  The hack prevents GCC from deleting the restore.  */
+#ifdef CRTSTUFFS_O
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+static void __attribute__((__used__))			\
+call_ ## FUNC (void)					\
+{							\
+  asm (SECTION_OP);					\
+  asm volatile ("bl " #FUNC ",%%r2\n\t"			\
+		"copy %%r19,%%r4\n\t"			\
+		"copy %%r4,%%r19\n"			\
+		:					\
+		:					\
+		: "r1", "r2", "r4", "r20", "r21",	\
+		  "r22", "r23", "r24", "r25", "r26",	\
+		  "r27", "r28", "r29", "r31");		\
+  asm (TEXT_SECTION_ASM_OP);				\
+}
+#endif
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
diff --git a/gcc-4.9/gcc/config/pa/pa32-openbsd.h b/gcc-4.9/gcc/config/pa/pa32-openbsd.h
new file mode 100644
index 000000000..6f169a2bc
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa32-openbsd.h
@@ -0,0 +1,22 @@
+/* Definitions for PA_RISC with ELF-32 format
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Turn off various SOM crap we don't want.  */
+#undef TARGET_ELF32
+#define TARGET_ELF32 1
diff --git a/gcc-4.9/gcc/config/pa/pa32-regs.h b/gcc-4.9/gcc/config/pa/pa32-regs.h
new file mode 100644
index 000000000..a2553c6b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa32-regs.h
@@ -0,0 +1,359 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   HP-PA 1.0 has 32 fullword registers and 16 floating point
+   registers. The floating point registers hold either word or double
+   word values.
+
+   16 additional registers are reserved.
+
+   HP-PA 1.1 has 32 fullword registers and 32 floating point
+   registers. However, the floating point registers behave
+   differently: the left and right halves of registers are addressable
+   as 32-bit registers. So, we will set things up like the 68k which
+   has different fp units: define separate register sets for the 1.0
+   and 1.1 fp units.  */
+
+#define FIRST_PSEUDO_REGISTER 90  /* 32 general regs + 56 fp regs +
+				     + 1 shift reg + frame pointer */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On the HP-PA, these are:
+   Reg 0	= 0 (hardware). However, 0 is used for condition code,
+                  so is not fixed.
+   Reg 1	= ADDIL target/Temporary (hardware).
+   Reg 2	= Return Pointer
+   Reg 3	= Frame Pointer
+   Reg 4	= Frame Pointer (>8k varying frame with HP compilers only)
+   Reg 4-18	= Preserved Registers
+   Reg 19	= Linkage Table Register in HPUX 8.0 shared library scheme.
+   Reg 20-22	= Temporary Registers
+   Reg 23-26	= Temporary/Parameter Registers
+   Reg 27	= Global Data Pointer (hp)
+   Reg 28	= Temporary/Return Value register
+   Reg 29	= Temporary/Static Chain/Return Value register #2
+   Reg 30	= stack pointer
+   Reg 31	= Temporary/Millicode Return Pointer (hp)
+
+   Freg 0-3	= Status Registers	 -- Not known to the compiler.
+   Freg 4-7	= Arguments/Return Value
+   Freg 8-11	= Temporary Registers
+   Freg 12-15	= Preserved Registers
+
+   Freg 16-31	= Reserved
+
+   On the Snake, fp regs are
+
+   Freg 0-3	= Status Registers	-- Not known to the compiler.
+   Freg 4L-7R	= Arguments/Return Value
+   Freg 8L-11R	= Temporary Registers
+   Freg 12L-21R	= Preserved Registers
+   Freg 22L-31R = Temporary Registers
+
+*/
+
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 0, 0, 1, 0, \
+  /* fp registers */	  \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  /* shift register and soft frame pointer */ \
+  0, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  /* fp registers */	  \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  /* shift register and soft frame pointer */ \
+  1, 1}
+
+/* Allocate the call used registers first.  This should minimize
+   the number of registers that need to be saved (as call used
+   registers will generally not be allocated across a call).
+
+   Experimentation has shown slightly better results by allocating
+   FP registers first.  We allocate the caller-saved registers more
+   or less in reverse order to their allocation as arguments.
+
+   FP registers are ordered so that all L registers are selected before
+   R registers.  This works around a false dependency interlock on the
+   PA8000 when accessing the high and low parts of an FP register
+   independently.  */
+
+#define REG_ALLOC_ORDER \
+ {					\
+  /* caller-saved fp regs.  */		\
+  68, 70, 72, 74, 76, 78, 80, 82,	\
+  84, 86, 40, 42, 44, 46, 38, 36,	\
+  34, 32,				\
+  69, 71, 73, 75, 77, 79, 81, 83,	\
+  85, 87, 41, 43, 45, 47, 39, 37,	\
+  35, 33,				\
+  /* caller-saved general regs.  */	\
+  28, 19, 20, 21, 22, 31, 27, 29,	\
+  23, 24, 25, 26,  2,			\
+  /* callee-saved fp regs.  */		\
+  48, 50, 52, 54, 56, 58, 60, 62,	\
+  64, 66,				\
+  49, 51, 53, 55, 57, 59, 61, 63,	\
+  65, 67,				\
+  /* callee-saved general regs.  */	\
+   3,  4,  5,  6,  7,  8,  9, 10, 	\
+  11, 12, 13, 14, 15, 16, 17, 18,	\
+  /* special registers.  */		\
+   1, 30,  0, 88, 89}
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the HP-PA, general registers are 32 bits wide.  The floating
+   point registers are 64 bits wide.  Snake fp regs are treated as
+   32 bits wide since the left and right parts are independently
+   accessible.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (FP_REGNO_P (REGNO)							\
+   ? (!TARGET_PA_11							\
+      ? COMPLEX_MODE_P (MODE) ? 2 : 1					\
+      : (GET_MODE_SIZE (MODE) + 4 - 1) / 4) 	                        \
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* There are no instructions that use DImode in PA 1.0, so we only
+   allow it in PA 1.1 and later.  */
+#define VALID_FP_MODE_P(MODE)						\
+  ((MODE) == SFmode || (MODE) == DFmode					\
+   || (MODE) == SCmode || (MODE) == DCmode				\
+   || (MODE) == SImode || (TARGET_PA_11 && (MODE) == DImode))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+
+   On the HP-PA, the cpu registers can hold any mode that fits in 32 bits.
+   For the 64-bit modes, we choose a set of non-overlapping general registers
+   that includes the incoming arguments and the return value.  We specify a
+   set with no overlaps so that we don't have to specify that the destination
+   register is an early clobber in patterns using this mode.  Except for the
+   return value, the starting registers are odd.  For 128 and 256 bit modes,
+   we similarly specify non-overlapping sets of cpu registers.  However,
+   there aren't any patterns defined for modes larger than 64 bits at the
+   moment.
+
+   We limit the modes allowed in the floating point registers to the
+   set of modes used in the machine definition.  In addition, we allow
+   the complex modes SCmode and DCmode.  The real and imaginary parts
+   of complex modes are allocated to separate registers.  This might
+   allow patterns to be defined in the future to operate on these values.
+
+   The PA 2.0 architecture specifies that quad-precision floating-point
+   values should start on an even floating point register.  Thus, we
+   choose non-overlapping sets of registers starting on even register
+   boundaries for large modes.  However, there is currently no support
+   in the machine definition for modes larger than 64 bits.  TFmode is
+   supported under HP-UX using libcalls.  Since TFmode values are passed
+   by reference, they never need to be loaded into the floating-point
+   registers.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((REGNO) == 0 ? (MODE) == CCmode || (MODE) == CCFPmode		\
+   : (REGNO) == 88 ? SCALAR_INT_MODE_P (MODE)				\
+   : !TARGET_PA_11 && FP_REGNO_P (REGNO)				\
+     ? (VALID_FP_MODE_P (MODE)						\
+	&& (GET_MODE_SIZE (MODE) <= 8					\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)))	\
+   : FP_REGNO_P (REGNO)							\
+     ? (VALID_FP_MODE_P (MODE)						\
+	&& (GET_MODE_SIZE (MODE) <= 4					\
+	    || (GET_MODE_SIZE (MODE) == 8 && ((REGNO) & 1) == 0)	\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)	\
+	    || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 7) == 0)))	\
+   : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD				\
+      || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD			\
+	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))	\
+      || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD			\
+	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)			\
+      || (GET_MODE_SIZE (MODE) == 8 * UNITS_PER_WORD			\
+	  && ((REGNO) & 7) == 3 && (REGNO) <= 19)))
+
+/* How to renumber registers for dbx and gdb.
+
+   Registers 0  - 31 remain unchanged.
+
+   Registers 32 - 87 are mapped to 72 - 127
+
+   Register 88 is mapped to 32.  */
+
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((REGNO) <= 31 ? (REGNO) :						\
+   ((REGNO) <= 87 ? (REGNO) + 40 : 32))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) REG
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+  /* The HP-PA has four kinds of registers: general regs, 1.0 fp regs,
+     1.1 fp regs, and the high 1.1 fp regs, to which the operands of
+     fmpyadd and fmpysub are restricted.  */
+
+enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
+		 GENERAL_OR_FP_REGS, SHIFT_REGS, ALL_REGS, LIM_REG_CLASSES};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+  {"NO_REGS", "R1_REGS", "GENERAL_REGS", "FPUPPER_REGS", "FP_REGS", \
+   "GENERAL_OR_FP_REGS", "SHIFT_REGS", "ALL_REGS"}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES. Register 0, the "condition code" register,
+   is in no class.  */
+
+#define REG_CLASS_CONTENTS	\
+ {{0x00000000, 0x00000000, 0x00000000},	/* NO_REGS */			\
+  {0x00000002, 0x00000000, 0x00000000},	/* R1_REGS */			\
+  {0xfffffffe, 0x00000000, 0x02000000},	/* GENERAL_REGS */		\
+  {0x00000000, 0xff000000, 0x00ffffff},	/* FPUPPER_REGS */		\
+  {0x00000000, 0xffffffff, 0x00ffffff},	/* FP_REGS */			\
+  {0xfffffffe, 0xffffffff, 0x02ffffff},	/* GENERAL_OR_FP_REGS */	\
+  {0x00000000, 0x00000000, 0x01000000},	/* SHIFT_REGS */		\
+  {0xfffffffe, 0xffffffff, 0x03ffffff}}	/* ALL_REGS */
+
+/* Defines invalid mode changes.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  pa_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)						\
+  ((REGNO) == 0 ? NO_REGS 						\
+   : (REGNO) == 1 ? R1_REGS						\
+   : (REGNO) < 32 || (REGNO) == 89 ? GENERAL_REGS			\
+   : (REGNO) < 56 ? FP_REGS						\
+   : (REGNO) < 88 ? FPUPPER_REGS					\
+   : SHIFT_REGS)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS			\
+   ? (!TARGET_PA_11							\
+      ? COMPLEX_MODE_P (MODE) ? 2 : 1					\
+      : (GET_MODE_SIZE (MODE) + 4 - 1) / 4)				\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* 1 if N is a possible register number for function argument passing.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  (((N) >= 23 && (N) <= 26) || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39)) 
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"%r0",   "%r1",    "%r2",   "%r3",    "%r4",   "%r5",    "%r6",   "%r7",    \
+ "%r8",   "%r9",    "%r10",  "%r11",   "%r12",  "%r13",   "%r14",  "%r15",   \
+ "%r16",  "%r17",   "%r18",  "%r19",   "%r20",  "%r21",   "%r22",  "%r23",   \
+ "%r24",  "%r25",   "%r26",  "%r27",   "%r28",  "%r29",   "%r30",  "%r31",   \
+ "%fr4",  "%fr4R",  "%fr5",  "%fr5R",  "%fr6",  "%fr6R",  "%fr7",  "%fr7R",  \
+ "%fr8",  "%fr8R",  "%fr9",  "%fr9R",  "%fr10", "%fr10R", "%fr11", "%fr11R", \
+ "%fr12", "%fr12R", "%fr13", "%fr13R", "%fr14", "%fr14R", "%fr15", "%fr15R", \
+ "%fr16", "%fr16R", "%fr17", "%fr17R", "%fr18", "%fr18R", "%fr19", "%fr19R", \
+ "%fr20", "%fr20R", "%fr21", "%fr21R", "%fr22", "%fr22R", "%fr23", "%fr23R", \
+ "%fr24", "%fr24R", "%fr25", "%fr25R", "%fr26", "%fr26R", "%fr27", "%fr27R", \
+ "%fr28", "%fr28R", "%fr29", "%fr29R", "%fr30", "%fr30R", "%fr31", "%fr31R", \
+ "SAR",   "sfp"}
+
+#define ADDITIONAL_REGISTER_NAMES \
+{{"%fr4L",32}, {"%fr5L",34}, {"%fr6L",36}, {"%fr7L",38},		\
+ {"%fr8L",40}, {"%fr9L",42}, {"%fr10L",44}, {"%fr11L",46},		\
+ {"%fr12L",48}, {"%fr13L",50}, {"%fr14L",52}, {"%fr15L",54},		\
+ {"%fr16L",56}, {"%fr17L",58}, {"%fr18L",60}, {"%fr19L",62},		\
+ {"%fr20L",64}, {"%fr21L",66}, {"%fr22L",68}, {"%fr23L",70},		\
+ {"%fr24L",72}, {"%fr25L",74}, {"%fr26L",76}, {"%fr27L",78},		\
+ {"%fr28L",80}, {"%fr29L",82}, {"%fr30L",84}, {"%fr31R",86},		\
+ {"%cr11",88}}
+
+#define FP_SAVED_REG_LAST 66
+#define FP_SAVED_REG_FIRST 48
+#define FP_REG_STEP 2
+#define FP_REG_FIRST 32
+#define FP_REG_LAST 87
diff --git a/gcc-4.9/gcc/config/pa/pa64-hpux.h b/gcc-4.9/gcc/config/pa/pa64-hpux.h
new file mode 100644
index 000000000..04b8de96e
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa64-hpux.h
@@ -0,0 +1,454 @@
+/* Definitions of target machine for GNU compiler, for HPs running
+   HPUX using the 64bit runtime model.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We can debug dynamically linked executables on hpux11; we also
+   want dereferencing of a NULL pointer to cause a SEGV.  Do not move
+   the "+Accept TypeMismatch" switch.  We check for it in collect2
+   to determine which init/fini is needed.  */
+#undef LINK_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD)
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   %{mhp-ld:+Accept TypeMismatch -z} %{mlinker-opt:-O}\
+   %{!shared:-u main %{!nostdlib:%{!nodefaultlibs:-u __cxa_finalize}}}\
+   %{static:-a archive} %{shared:%{mhp-ld:-b}%{!mhp-ld:-shared}}"
+#else
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   %{!mgnu-ld:+Accept TypeMismatch -z} %{mlinker-opt:-O}\
+   %{!shared:-u main %{!nostdlib:%{!nodefaultlibs:-u __cxa_finalize}}}\
+   %{static:-a archive} %{shared:%{mgnu-ld:-shared}%{!mgnu-ld:-b}}"
+#endif
+
+/* Profiling support is only provided in libc.a.  However, libprof and
+   libgprof are only available in shared form on HP-UX 11.00.  We use
+   the shared form if we are using the GNU linker or an archive form
+   isn't available.  We also usually need to link with libdld and it's
+   only available in shared form.  */
+#undef LIB_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD)
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:%{!pg:%{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\
+                  %{static:-a archive}}\
+	    %{mt|pthread:-lpthread} -lc\
+	    %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
+	   -lprof %{static:-a archive}\
+	   %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\
+             %{static:-a archive}}\
+	   %{mt|pthread:-lpthread} -lc\
+	   %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
+       -lgprof %{static:-a archive}\
+       %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\
+         %{static:-a archive}}\
+       %{mt|pthread:-lpthread} -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+   %{shared:%{mt|pthread:-lpthread}}"
+#else
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:%{!pg:%{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\
+                  %{static:-a archive}}\
+	    %{mt|pthread:-lpthread} -lc\
+	    %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
+	   -lprof %{static:-a archive}\
+	   %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\
+             %{static:-a archive}}\
+	   %{mt|pthread:-lpthread} -lc\
+	   %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
+       -lgprof %{static:-a archive}\
+       %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\
+         %{static:-a archive}}\
+       %{mt|pthread:-lpthread} -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+   %{shared:%{mt|pthread:-lpthread}}"
+#endif
+
+/* The libgcc_stub.a and milli.a libraries need to come last.  */
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "\
+  %G %L %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}\
+  milli.a%s}}"
+
+/* Under hpux11, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin"
+#endif
+
+/* Default prefixes.  */
+
+#undef STANDARD_STARTFILE_PREFIX_1
+#define STANDARD_STARTFILE_PREFIX_1 "/lib/pa20_64/"
+
+#undef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 "/usr/lib/pa20_64/"
+
+/* Under hpux11 the normal location of the various pa20_64 *crt*.o files
+   is the /usr/ccs/lib/pa20_64 directory.  Some files may also be in the
+   /opt/langtools/lib/pa20_64 directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/pa20_64/"
+#endif
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX_1
+#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/pa20_64/"
+#endif
+
+/* This macro specifies the biggest alignment supported by the object
+   file format of this machine.
+
+   The .align directive in the HP assembler allows alignments up to
+   4096 bytes.  However, the maximum alignment of a global common symbol
+   is 16 bytes using HP ld.  Unfortunately, this macro doesn't provide
+   a method to check for common symbols.  */
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT 32768
+
+/* Due to limitations in the target structure, it isn't currently possible
+   to dynamically switch between the GNU and HP assemblers.  */
+#undef TARGET_GAS
+
+/* Configure selects the standard ELFOS defines for use with GAS.  */
+#ifdef USING_ELFOS_H
+
+/* We are using GAS.  */
+#define TARGET_GAS 1
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START pa_hpux64_gas_file_start
+
+/* This is how we output a null terminated string.  */
+#undef STRING_ASM_OP
+#define STRING_ASM_OP	"\t.stringz\t"
+
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+
+#define JCR_SECTION_NAME	".jcr"
+
+#define HP_INIT_ARRAY_SECTION_ASM_OP	"\t.section\t.init"
+#define GNU_INIT_ARRAY_SECTION_ASM_OP	"\t.section\t.init_array"
+#define HP_FINI_ARRAY_SECTION_ASM_OP	"\t.section\t.fini"
+#define GNU_FINI_ARRAY_SECTION_ASM_OP	"\t.section\t.fini_array"
+
+/* We need to override the following two macros defined in elfos.h since
+   the .comm directive has a different syntax and it can't be used for
+   local common symbols.  */
+#undef ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN)
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN)
+
+/* The define in pa.h doesn't work with the alias attribute.  The
+   default is ok with the following define for GLOBAL_ASM_OP.  */
+#undef TARGET_ASM_GLOBALIZE_LABEL
+
+/* This is how we globalize a label.  */
+#define GLOBAL_ASM_OP	"\t.globl\t"
+
+/* Hacked version from defaults.h that uses assemble_name_raw
+   instead of assemble_name.  A symbol in a type directive that
+   isn't otherwise referenced doesn't cause the symbol to be
+   placed in the symbol table of the assembled object.  */
+#undef ASM_OUTPUT_TYPE_DIRECTIVE
+#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)		\
+do {								\
+  fputs (TYPE_ASM_OP, STREAM);					\
+  assemble_name_raw (STREAM, NAME);				\
+  fputs (", ", STREAM);						\
+  fprintf (STREAM, TYPE_OPERAND_FMT, TYPE);			\
+  putc ('\n', STREAM);						\
+} while (0)
+
+/* Hacked version from elfos.h that doesn't output a label.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+do {								\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");		\
+  ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+} while (0)
+
+/* The type of external references must be set correctly for the
+   dynamic loader to work correctly.  This is equivalent to the
+   HP assembler's .IMPORT directive but relates more directly to
+   ELF object file types.  */
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)			\
+  pa_hpux_asm_output_external ((FILE), (DECL), (NAME))
+#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME)		\
+do {								\
+  if (FUNCTION_NAME_P (NAME))					\
+    ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");		\
+  else								\
+    ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+  default_elf_asm_output_external (FILE, DECL, NAME);		\
+} while (0)
+
+/* We need set the type for external libcalls.  Also note that not all
+   libcall names are passed to targetm.encode_section_info (e.g., __main).
+   Thus, we also have to do the section encoding if it hasn't been done
+   already.  */
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)			\
+do {								\
+  if (!FUNCTION_NAME_P (XSTR (FUN, 0)))				\
+    pa_encode_label (FUN);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function");	\
+} while (0)
+
+/* We need to use the HP style for internal labels.  */
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
+  sprintf (LABEL, "*%c$%s%04ld", (PREFIX)[0], (PREFIX) + 1, (long)(NUM))
+
+#else /* USING_ELFOS_H */
+
+/* We are not using GAS.  */
+#define TARGET_GAS 0
+
+/* HPUX 11 has the "new" HP assembler.  It's still lousy, but it's a whole
+   lot better than the assembler shipped with older versions of hpux.
+   However, it doesn't support weak symbols and is a bad fit with ELF.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
+
+/* It looks like DWARF2 will be the easiest debug format to handle on this
+   platform.  */
+#define DWARF2_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* This target uses the ELF object file format.  */
+#define OBJECT_FORMAT_ELF
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START pa_hpux64_hpas_file_start
+
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP		"\t.SUBSPA $CODE$\n"
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.SUBSPA $LIT$\n"
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP		"\t.SUBSPA $DATA$\n"
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP		"\t.SUBSPA $BSS$\n"
+
+/* We provide explicit defines for CTORS_SECTION_ASM_OP and
+   DTORS_SECTION_ASM_OP since we don't yet have support for
+   named sections with the HP assembler.  */
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP "\t.SUBSPA \\.ctors,QUAD=1,ALIGN=8,ACCESS=31"
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP "\t.SUBSPA \\.dtors,QUAD=1,ALIGN=8,ACCESS=31"
+
+#define HP_INIT_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.init,QUAD=1,ALIGN=8,ACCESS=31"
+#define GNU_INIT_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.init_array,QUAD=1,ALIGN=8,ACCESS=31"
+#define HP_FINI_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.fini,QUAD=1,ALIGN=8,ACCESS=31"
+#define GNU_FINI_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.fini_array,QUAD=1,ALIGN=8,ACCESS=31"
+
+#endif /* USING_ELFOS_H */
+
+/* The following defines, used to run constructors and destructors with
+   the SOM linker under HP-UX 11, are not needed.  */
+#undef HAS_INIT_SECTION
+#undef LD_INIT_SWITCH
+#undef LD_FINI_SWITCH
+
+/* The following STARTFILE_SPEC and ENDFILE_SPEC defines provide the
+   magic needed to run initializers and finalizers.  */
+#undef STARTFILE_SPEC
+#if TARGET_HPUX_11_31
+#define STARTFILE_SPEC \
+  "%{!shared: %{!symbolic: crt0%O%s} \
+     %{munix=95:unix95.o%s} %{munix=98:unix98.o%s} \
+     %{!munix=93:%{!munix=95:%{!munix=98:unix2003%O%s}}}} \
+     %{static:crtbeginT%O%s} \
+   %{!static:%{!shared:crtbegin%O%s} %{shared:crtbeginS%O%s}}"
+#elif TARGET_HPUX_11_11
+#define STARTFILE_SPEC \
+  "%{!shared: %{!symbolic: crt0%O%s} %{munix=95:unix95.o%s} \
+     %{!munix=93:%{!munix=95:unix98%O%s}}} %{static:crtbeginT%O%s} \
+   %{!static:%{!shared:crtbegin%O%s} %{shared:crtbeginS%O%s}}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{!symbolic: crt0%O%s} %{munix=95:unix95%O%s}} \
+   %{static:crtbeginT%O%s} %{!static:%{!shared:crtbegin%O%s} \
+   %{shared:crtbeginS%O%s}}"
+#endif
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+/* Since HP uses the .init and .fini sections for array initializers
+   and finalizers, we need different defines for INIT_SECTION_ASM_OP
+   and FINI_SECTION_ASM_OP.  With the implementation adopted below,
+   the sections are not actually used.  However, we still must provide
+   defines to select the proper code path.  */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP
+
+/* We are using array initializers and don't want calls in the INIT
+   and FINI sections.  */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)
+
+/* The init_priority attribute is not supported with HP ld.  This could be
+   supported if collect2 was used with LD_INIT_SWITCH.  Unfortunately, this
+   approach doesn't work with GNU ld since HP-UX doesn't support DT_INIT,
+   and therefore the -init and -fini GNU ld switches.  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY (TARGET_GNU_LD ? 1 : 0)
+
+/* We use DTOR_LIST_BEGIN to carry a bunch of hacks to allow us to use
+   the init and fini array sections with both the HP and GNU linkers.
+   The linkers setup the required dynamic entries in the dynamic segment
+   and the dynamic linker does the calls.  This approach avoids using
+   collect2.
+
+   The first hack is to implement __do_global_ctors_aux in crtbegin as
+   it needs to be the first entry in the init array so that it is called
+   last.  HP got the order of the init array backwards.  The DT_INIT_ARRAY
+   is supposed to be executed in the same order as the addresses appear in
+   the array.  DT_FINI_ARRAY is supposed to be executed in the opposite
+   order.
+
+   The second hack is a set of plabels to implement the effect of
+   CRT_CALL_STATIC_FUNCTION.  HP-UX 11 only supports DI_INIT_ARRAY and
+   DT_FINI_ARRAY and they put the arrays in .init and .fini, rather than
+   in .init_array and .fini_array.  The standard defines for .init and
+   .fini have the execute flag set.  So, the assembler has to be hacked
+   to munge the standard flags for these sections to make them agree
+   with what the HP linker expects.  With the GNU linker, we need to
+   used the .init_array and .fini_array sections.  So, we set up for
+   both just in case.  Once we have built the table, the linker does
+   the rest of the work.
+
+   The order is significant.  Placing __do_global_ctors_aux first in
+   the list, results in it being called last.  User specified initializers,
+   either using the linker +init command or a plabel, run before the
+   initializers specified here.  */
+
+/* We need to add frame_dummy to the initializer list if EH_FRAME_SECTION_NAME
+   or JCR_SECTION_NAME is defined.  */
+#if defined(EH_FRAME_SECTION_NAME) || defined(JCR_SECTION_NAME)
+#define PA_INIT_FRAME_DUMMY_ASM_OP ".dword P%frame_dummy"
+#else
+#define PA_INIT_FRAME_DUMMY_ASM_OP ""
+#endif
+
+/* The following hack sets up the .init, .init_array, .fini and
+   .fini_array sections.  */
+#define PA_CRTBEGIN_HACK \
+asm (TEXT_SECTION_ASM_OP);						\
+static void __attribute__((used))					\
+__do_global_ctors_aux (void)						\
+{									\
+  func_ptr *p = __CTOR_LIST__;						\
+  while (*(p + 1))							\
+    p++;								\
+  for (; *p != (func_ptr) -1; p--)					\
+    (*p) ();								\
+}									\
+									\
+asm (HP_INIT_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_ctors_aux");					\
+asm (PA_INIT_FRAME_DUMMY_ASM_OP);					\
+asm (GNU_INIT_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_ctors_aux");					\
+asm (PA_INIT_FRAME_DUMMY_ASM_OP);					\
+asm (HP_FINI_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_dtors_aux");					\
+asm (GNU_FINI_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_dtors_aux")
+
+/* The following two variants of DTOR_LIST_BEGIN are identical to those
+   in crtstuff.c except for the addition of the above crtbegin hack.  */
+#ifdef DTORS_SECTION_ASM_OP
+#define DTOR_LIST_BEGIN \
+asm (DTORS_SECTION_ASM_OP);						\
+STATIC func_ptr __DTOR_LIST__[1]					\
+  __attribute__ ((aligned(sizeof(func_ptr))))				\
+  = { (func_ptr) (-1) };						\
+PA_CRTBEGIN_HACK
+#else
+#define DTOR_LIST_BEGIN \
+STATIC func_ptr __DTOR_LIST__[1]					\
+  __attribute__ ((section(".dtors"), aligned(sizeof(func_ptr))))	\
+  = { (func_ptr) (-1) };						\
+PA_CRTBEGIN_HACK
+#endif
+
+/* If using HP ld do not call pxdb.  Use size as a program that does nothing
+   and returns 0.  /bin/true cannot be used because it is a script without
+   an interpreter.  */
+#define INIT_ENVIRONMENT "LD_PXDB=/usr/ccs/bin/size"
+
+/* The HPUX dynamic linker objects to undefined weak symbols, so do
+   not use them in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+/* We don't want undefined weak references to __register_frame_info,
+   __deregister_frame_info, _Jv_RegisterClasses and __cxa_finalize
+   introduced by crtbegin.o.  The GNU linker only resolves weak
+   references if they appear in a shared library.  Thus, it would be
+   impossible to create a static executable if the symbols were weak.
+   So, the best solution seems to be to make the symbols strong and
+   provide an archive library of empty stub functions.  */
+#define TARGET_ATTRIBUTE_WEAK
diff --git a/gcc-4.9/gcc/config/pa/pa64-hpux.opt b/gcc-4.9/gcc/config/pa/pa64-hpux.opt
new file mode 100644
index 000000000..d7fb410fb
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa64-hpux.opt
@@ -0,0 +1,27 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mgnu-ld
+Target RejectNegative Mask(GNU_LD)
+Assume code will be linked by GNU ld
+
+mhp-ld
+Target RejectNegative InverseMask(GNU_LD)
+Assume code will be linked by HP ld
diff --git a/gcc-4.9/gcc/config/pa/pa64-linux.h b/gcc-4.9/gcc/config/pa/pa64-linux.h
new file mode 100644
index 000000000..0b0d51ef8
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa64-linux.h
@@ -0,0 +1,64 @@
+/* Definitions for PA_RISC with ELF format on 64-bit Linux
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#if 0 /* needs some work :-( */
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},				\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  do								\
+    {								\
+      int fsize;						\
+								\
+      fsize = pa_compute_frame_size (get_frame_size (), 0);	\
+      if ((TO) == FRAME_POINTER_REGNUM				\
+	  && (FROM) == ARG_POINTER_REGNUM)			\
+	{							\
+	  (OFFSET) = -16;					\
+	  break;						\
+	}							\
+								\
+      gcc_assert ((TO) == STACK_POINTER_REGNUM);		\
+								\
+      switch (FROM)						\
+	{							\
+	case FRAME_POINTER_REGNUM:				\
+	  (OFFSET) = - fsize;					\
+	  break;						\
+								\
+	case ARG_POINTER_REGNUM:				\
+	  (OFFSET) = - fsize - 16;				\
+	  break;						\
+								\
+	default:						\
+	  gcc_unreachable ();					\
+	}							\
+    } while (0)
+#endif
diff --git a/gcc-4.9/gcc/config/pa/pa64-regs.h b/gcc-4.9/gcc/config/pa/pa64-regs.h
new file mode 100644
index 000000000..64eaaaf2a
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa64-regs.h
@@ -0,0 +1,280 @@
+/* Configuration for GCC-compiler for PA-RISC.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Standard register usage.
+
+   It is safe to refer to actual register numbers in this file.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   HP-PA 2.0w has 32 fullword registers and 32 floating point
+   registers. However, the floating point registers behave
+   differently: the left and right halves of registers are addressable
+   as 32-bit registers.
+
+   Due to limitations within GCC itself, we do not expose the left/right
+   half addressability when in wide mode.  This is not a major performance
+   issue as using the halves independently triggers false dependency stalls
+   anyway.  */
+
+#define FIRST_PSEUDO_REGISTER 62  /* 32 general regs + 28 fp regs +
+				     + 1 shift reg + frame pointer */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On the HP-PA, these are:
+   Reg 0	= 0 (hardware). However, 0 is used for condition code,
+                  so is not fixed.
+   Reg 1	= ADDIL target/Temporary (hardware).
+   Reg 2	= Return Pointer
+   Reg 3	= Frame Pointer
+   Reg 4	= Frame Pointer (>8k varying frame with HP compilers only)
+   Reg 4-18	= Preserved Registers
+   Reg 19	= Linkage Table Register in HPUX 8.0 shared library scheme.
+   Reg 20-22	= Temporary Registers
+   Reg 23-26	= Temporary/Parameter Registers
+   Reg 27	= Global Data Pointer (hp)
+   Reg 28	= Temporary/Return Value register
+   Reg 29	= Temporary/Static Chain/Return Value register #2
+   Reg 30	= stack pointer
+   Reg 31	= Temporary/Millicode Return Pointer (hp)
+
+   Freg 0-3	= Status Registers	-- Not known to the compiler.
+   Freg 4-7	= Arguments/Return Value
+   Freg 8-11	= Temporary Registers
+   Freg 12-21	= Preserved Registers
+   Freg 22-31 = Temporary Registers
+
+*/
+
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 0, 0, 1, 0, \
+  /* fp registers */	  \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0,		  \
+  /* shift register and soft frame pointer */	  \
+  0, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  /* fp registers */	  \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 		  \
+  /* shift register and soft frame pointer */    \
+  1, 1}
+
+/* Allocate the call used registers first.  This should minimize
+   the number of registers that need to be saved (as call used
+   registers will generally not be allocated across a call).
+
+   Experimentation has shown slightly better results by allocating
+   FP registers first.  We allocate the caller-saved registers more
+   or less in reverse order to their allocation as arguments.  */
+
+#define REG_ALLOC_ORDER \
+ {					\
+  /* caller-saved fp regs.  */		\
+  50, 51, 52, 53, 54, 55, 56, 57,	\
+  58, 59, 39, 38, 37, 36, 35, 34,	\
+  33, 32,				\
+  /* caller-saved general regs.  */	\
+  28, 31, 19, 20, 21, 22, 23, 24,	\
+  25, 26, 29,  2,			\
+  /* callee-saved fp regs.  */		\
+  40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49,				\
+  /* callee-saved general regs.  */	\
+   3,  4,  5,  6,  7,  8,  9, 10, 	\
+  11, 12, 13, 14, 15, 16, 17, 18,	\
+  /* special registers.  */		\
+   1, 27, 30,  0, 60, 61}
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   For PA64, GPRs and FPRs hold 64 bits worth.  We ignore the 32-bit
+   addressability of the FPRs and pretend each register holds precisely
+   WORD_SIZE bits.  Note that SCmode values are placed in a single FPR.
+   Thus, any patterns defined to operate on these values would have to
+   use the 32-bit addressability of the FPR registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* These are the valid FP modes.  */
+#define VALID_FP_MODE_P(MODE)						\
+  ((MODE) == SFmode || (MODE) == DFmode					\
+   || (MODE) == SCmode || (MODE) == DCmode				\
+   || (MODE) == SImode || (MODE) == DImode)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On the HP-PA, the cpu registers can hold any mode.  We
+   force this to be an even register if it cannot hold the full mode.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((REGNO) == 0								\
+   ? (MODE) == CCmode || (MODE) == CCFPmode				\
+   : (REGNO) == 60 ? SCALAR_INT_MODE_P (MODE)				\
+   /* Make wide modes be in aligned registers.  */			\
+   : FP_REGNO_P (REGNO)							\
+     ? (VALID_FP_MODE_P (MODE)						\
+	&& (GET_MODE_SIZE (MODE) <= 8					\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0)	\
+	    || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 3) == 0)))	\
+   : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD				\
+      || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD			\
+	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))	\
+      || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD			\
+	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)))
+
+/* How to renumber registers for dbx and gdb.
+
+   Registers 0  - 31 remain unchanged.
+
+   Registers 32 - 59 are mapped to 72, 74, 76 ...
+
+   Register 60 is mapped to 32.  */
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((REGNO) <= 31 ? (REGNO) : ((REGNO) < 60 ? (REGNO - 32) * 2 + 72 : 32))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) REG
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+  /* The HP-PA has four kinds of registers: general regs, 1.0 fp regs,
+     1.1 fp regs, and the high 1.1 fp regs, to which the operands of
+     fmpyadd and fmpysub are restricted.  */
+
+enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
+		 GENERAL_OR_FP_REGS, SHIFT_REGS, ALL_REGS, LIM_REG_CLASSES};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+  {"NO_REGS", "R1_REGS", "GENERAL_REGS", "FPUPPER_REGS", "FP_REGS", \
+   "GENERAL_OR_FP_REGS", "SHIFT_REGS", "ALL_REGS"}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES. Register 0, the "condition code" register,
+   is in no class.  */
+
+#define REG_CLASS_CONTENTS	\
+ {{0x00000000, 0x00000000},	/* NO_REGS */			\
+  {0x00000002, 0x00000000},	/* R1_REGS */			\
+  {0xfffffffe, 0x20000000},	/* GENERAL_REGS */		\
+  {0x00000000, 0x00000000},	/* FPUPPER_REGS */		\
+  {0x00000000, 0x0fffffff},	/* FP_REGS */			\
+  {0xfffffffe, 0x2fffffff},	/* GENERAL_OR_FP_REGS */	\
+  {0x00000000, 0x10000000},	/* SHIFT_REGS */		\
+  {0xfffffffe, 0x3fffffff}}	/* ALL_REGS */
+
+/* Defines invalid mode changes.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  pa_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)						\
+  ((REGNO) == 0 ? NO_REGS 						\
+   : (REGNO) == 1 ? R1_REGS						\
+   : (REGNO) < 32 || (REGNO) == 61 ? GENERAL_REGS			\
+   : (REGNO) < 60 ? FP_REGS						\
+   : SHIFT_REGS)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* 1 if N is a possible register number for function argument passing.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  ((((N) >= 19) && (N) <= 26) \
+   || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39))
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"%r0",   "%r1",    "%r2",   "%r3",    "%r4",   "%r5",    "%r6",   "%r7",    \
+ "%r8",   "%r9",    "%r10",  "%r11",   "%r12",  "%r13",   "%r14",  "%r15",   \
+ "%r16",  "%r17",   "%r18",  "%r19",   "%r20",  "%r21",   "%r22",  "%r23",   \
+ "%r24",  "%r25",   "%r26",  "%r27",   "%r28",  "%r29",   "%r30",  "%r31",   \
+ "%fr4",  "%fr5",   "%fr6",  "%fr7",   "%fr8",  "%fr9",   "%fr10", "%fr11",  \
+ "%fr12", "%fr13",  "%fr14", "%fr15",  "%fr16", "%fr17",  "%fr18", "%fr19",  \
+ "%fr20", "%fr21",  "%fr22", "%fr23",  "%fr24", "%fr25",  "%fr26", "%fr27",  \
+ "%fr28", "%fr29",  "%fr30", "%fr31",  "SAR",   "sfp"}
+
+#define ADDITIONAL_REGISTER_NAMES \
+ {{"%cr11",60}}
+
+#define FP_SAVED_REG_LAST 49
+#define FP_SAVED_REG_FIRST 40
+#define FP_REG_STEP 1
+#define FP_REG_FIRST 32
+#define FP_REG_LAST 59
diff --git a/gcc-4.9/gcc/config/pa/pa64-start.h b/gcc-4.9/gcc/config/pa/pa64-start.h
new file mode 100644
index 000000000..9d7b19a37
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/pa64-start.h
@@ -0,0 +1,8 @@
+/* It is currently impossible to switch between PA32 and PA64 based on a
+   runtime compiler switch.  So we might as well lose the overhead with
+   checking for TARGET_64BIT.  */
+#define TARGET_64BIT 1
+#undef TARGET_PA_11
+#define TARGET_PA_11 1
+#undef TARGET_PA_20
+#define TARGET_PA_20 1
diff --git a/gcc-4.9/gcc/config/pa/predicates.md b/gcc-4.9/gcc/config/pa/predicates.md
new file mode 100644
index 000000000..8dcfce0e9
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/predicates.md
@@ -0,0 +1,657 @@
+;; Predicate definitions for HP PA-RISC.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining 5-bit signed immediate operands in three-address insns.
+
+(define_predicate "int5_operand"
+  (and (match_code "const_int")
+       (match_test "INT_5_BITS (op)")))
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining 5-bit unsigned immediate operands in three-address insns.
+
+(define_predicate "uint5_operand"
+  (and (match_code "const_int")
+       (match_test "INT_U5_BITS (op)")))
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining 6-bit unsigned immediate operands in three-address insns.
+
+(define_predicate "uint6_operand"
+  (and (match_code "const_int")
+       (match_test "INT_U6_BITS (op)")))
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining 11-bit signed immediate operands in three-address insns.
+
+(define_predicate "int11_operand"
+  (and (match_code "const_int")
+       (match_test "INT_11_BITS (op)")))
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining 14-bit signed immediate operands in three-address insns.
+
+(define_predicate "int14_operand"
+  (and (match_code "const_int")
+       (match_test "INT_14_BITS (op)")))
+
+;; True iff OP is a const_int or const_double that will fit in 32 bits.
+
+(define_predicate "uint32_operand"
+  (if_then_else (match_test "HOST_BITS_PER_WIDE_INT > 32")
+    (and (match_code "const_int")
+         (match_test "INTVAL (op) >= 0
+		      && INTVAL (op) < (HOST_WIDE_INT) 1 << 32"))
+    (and (match_code "const_int,const_double")
+         (match_test "CONST_INT_P (op) || CONST_DOUBLE_HIGH (op) == 0"))))
+
+;; True iff depi can be used to compute (reg | OP).
+
+(define_predicate "cint_ior_operand"
+  (and (match_code "const_int")
+       (match_test "pa_ior_mask_p (INTVAL (op))")))
+
+;; True iff OP is CONST_INT that can be moved in one instruction
+;; into a general register.
+
+(define_predicate "cint_move_operand"
+  (and (match_code "const_int")
+       (match_test "pa_cint_ok_for_move (INTVAL (op))")))
+
+;; True iff OP is a CONST0_RTX for MODE.
+
+(define_predicate "const_0_operand"
+  (and (match_code "const_int,const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; A constant integer suitable for use in a PRE_MODIFY memory reference.
+
+(define_predicate "pre_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10")))
+
+;; A constant integer suitable for use in a POST_MODIFY memory reference.
+
+(define_predicate "post_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10")))
+
+;; True iff depi or extru can be used to compute (reg & OP).
+
+(define_predicate "and_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "pa_and_mask_p (INTVAL (op))"))))
+
+;; Return truth value of whether OP can be used as an operand in a
+;; three operand arithmetic insn that accepts registers of mode MODE
+;; or 5-bit signed integers.
+
+(define_predicate "arith5_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "int5_operand")))
+
+;; Return truth value of whether OP can be used as an operand in a
+;; three operand arithmetic insn that accepts registers of mode MODE
+;; or 11-bit signed integers.
+
+(define_predicate "arith11_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "int11_operand")))
+
+;; Return truth value of whether OP can be used as an operand in a
+;; three operand arithmetic insn that accepts registers of mode MODE
+;; or 14-bit signed integers.
+
+(define_predicate "arith14_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "int14_operand")))
+
+;; Return truth value of whether OP can be used as an operand in a
+;; three operand arithmetic insn that accepts registers of mode MODE
+;; or 32-bit signed integers.
+
+(define_predicate "arith32_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_code "const_int")))
+
+;; Return truth value of whether OP can be used as a shift operand in
+;; a shift insn that accepts registers of mode MODE or 5-bit shift amounts.
+
+(define_predicate "shift5_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "uint5_operand")))
+
+;; Return truth value of whether OP can be used as a shift operand in
+;; a shift insn that accepts registers of mode MODE or 6-bit shift amounts.
+
+(define_predicate "shift6_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "uint6_operand")))
+
+;; True iff OP can be used as an operand in an adddi3 insn.
+
+(define_predicate "adddi3_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "arith14_operand")
+    (match_operand 0 "arith11_operand")))
+
+;; True iff OP is valid as a base or index register in a REG+REG address.
+
+(define_predicate "borx_reg_operand"
+  (match_code "reg")
+{
+  /* We must reject virtual registers as the only expressions that
+     can be instantiated are REG and REG+CONST.  */
+  if (op == virtual_incoming_args_rtx
+      || op == virtual_stack_vars_rtx
+      || op == virtual_stack_dynamic_rtx
+      || op == virtual_outgoing_args_rtx
+      || op == virtual_cfa_rtx)
+    return false;
+
+  /* While it's always safe to index off the frame pointer, it's not
+     profitable to do so when the frame pointer is being eliminated.  */
+  if (!reload_completed
+      && flag_omit_frame_pointer
+      && !cfun->calls_alloca
+      && op == frame_pointer_rtx)
+    return false;
+
+  return register_operand (op, mode);
+})
+
+;; Return nonzero if OP is suitable for use in a call to a named
+;; function.
+;;
+;; For 2.5 try to eliminate either call_operand_address or
+;; function_label_operand, they perform very similar functions.
+
+(define_predicate "call_operand_address"
+  (match_code "label_ref,symbol_ref,const_int,const_double,const,high")
+{
+  return (GET_MODE (op) == word_mode
+	  && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
+})
+
+;; True iff OP can be used as the divisor in a div millicode call.
+
+(define_predicate "div_operand"
+  (match_code "reg,const_int")
+{
+  return (mode == SImode
+	  && ((REG_P (op) && REGNO (op) == 25)
+	      || (CONST_INT_P (op)
+		  && INTVAL (op) > 0 && INTVAL (op) < 16
+		  && pa_magic_milli[INTVAL (op)])));
+})
+
+;; True iff OP is a reloading floating point register
+
+(define_predicate "fp_reg_operand"
+  (and (match_code "reg")
+       (match_test "reg_renumber && FP_REG_P (op)")))
+
+;; True iff OP is a function label operand.
+
+(define_special_predicate "function_label_operand"
+  (and (match_code "symbol_ref")
+       (match_test "FUNCTION_NAME_P (XSTR (op, 0))")))
+
+;; True iff OP is an indexed memory operand.
+
+(define_predicate "indexed_memory_operand"
+  (match_code "subreg,mem")
+{
+  if (GET_MODE (op) != mode)
+    return false;
+
+  /* Before reload, a (SUBREG (MEM...)) forces reloading into a register.  */
+  if (reload_completed && GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (! MEM_P (op))
+    return false;
+
+  op = XEXP (op, 0);
+  return IS_INDEX_ADDR_P (op) && memory_address_p (mode, op);
+})
+
+;; True iff OP is a register plus base memory operand.
+
+(define_predicate "reg_plus_base_memory_operand"
+  (match_code "subreg,mem")
+{
+  if (GET_MODE (op) != mode)
+    return false;
+
+  /* Before reload, a (SUBREG (MEM...)) forces reloading into a register.  */
+  if (reload_completed && GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (! MEM_P (op))
+    return false;
+
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS)
+    return false;
+
+  if (REG_P (XEXP (op, 0)) && REG_OK_FOR_BASE_P (XEXP (op, 0)))
+    return GET_CODE (XEXP (op, 1)) == CONST_INT;
+
+  return false;
+})
+
+;; True iff OP is a base14 operand.
+
+(define_predicate "base14_operand"
+  (match_code "const_int")
+{
+  if (!INT_14_BITS (op))
+    return false;
+
+  /* Although this may not be necessary, we require that the
+     base value is correctly aligned for its mode as this is
+     assumed in the instruction encoding.  */
+  switch (mode)
+    {
+    case BLKmode:
+    case QImode:
+    case HImode:
+      return true;
+
+    default:
+      return (INTVAL (op) % GET_MODE_SIZE (mode)) == 0;
+    }
+
+  return false;
+})
+
+;; True iff the operand OP can be used as the destination operand of
+;; an integer store.  This also implies the operand could be used as
+;; the source operand of an integer load.  LO_SUM DLT and indexed
+;; memory operands are not allowed.  We accept reloading pseudos and
+;; other memory operands.
+
+(define_predicate "integer_store_memory_operand"
+  (match_code "reg,mem")
+{
+  if (reload_in_progress
+      && REG_P (op)
+      && REGNO (op) >= FIRST_PSEUDO_REGISTER
+      && reg_renumber [REGNO (op)] < 0)
+    return true;
+
+  if (reg_plus_base_memory_operand (op, mode))
+    {
+      /* Extract CONST_INT operand.  */
+      if (GET_CODE (op) == SUBREG)
+	op = SUBREG_REG (op);
+      op = XEXP (op, 0);
+      op = REG_P (XEXP (op, 0)) ? XEXP (op, 1) : XEXP (op, 0);
+      return base14_operand (op, mode) || INT_5_BITS (op);
+    }
+
+  if (!MEM_P (op))
+    return false;
+
+  return ((reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
+	  && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
+	  && !IS_INDEX_ADDR_P (XEXP (op, 0)));
+})
+
+;; True iff the operand OP can be used as the destination operand of
+;; a floating point store.  This also implies the operand could be used as
+;; the source operand of a floating point load.  LO_SUM DLT and indexed
+;; memory operands are not allowed.  Symbolic operands are accepted if
+;; INT14_OK_STRICT is true.  We accept reloading pseudos and other memory
+;; operands.
+
+(define_predicate "floating_point_store_memory_operand"
+  (match_code "reg,mem")
+{
+  if (reload_in_progress
+      && REG_P (op)
+      && REGNO (op) >= FIRST_PSEUDO_REGISTER
+      && reg_renumber [REGNO (op)] < 0)
+    return true;
+
+  if (reg_plus_base_memory_operand (op, mode))
+    {
+      /* Extract CONST_INT operand.  */
+      if (GET_CODE (op) == SUBREG)
+	op = SUBREG_REG (op);
+      op = XEXP (op, 0);
+      op = REG_P (XEXP (op, 0)) ? XEXP (op, 1) : XEXP (op, 0);
+      return ((TARGET_PA_20
+	       && !TARGET_ELF32
+	       && base14_operand (op, mode))
+	      || INT_5_BITS (op));
+    }
+
+  if (!MEM_P (op))
+    return false;
+
+  return ((reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
+	  && (INT14_OK_STRICT || !symbolic_memory_operand (op, VOIDmode))
+	  && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
+	  && !IS_INDEX_ADDR_P (XEXP (op, 0)));
+})
+
+;; Return true iff OP is an integer register.
+
+(define_predicate "ireg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) > 0 && REGNO (op) < 32")))
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns, or is an
+;; integer register.
+
+(define_predicate "ireg_or_int5_operand"
+  (ior (match_operand 0 "ireg_operand")
+       (match_operand 0 "int5_operand")))
+
+;; True iff OP is a CONST_INT of the forms 0...0xxxx, 0...01...1xxxx,
+;; or 1...1xxxx. Such values can be the left hand side x in (x << r),
+;; using the zvdepi instruction.
+
+(define_predicate "lhs_lshift_cint_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT x;
+  x = INTVAL (op) >> 4;
+  return (x & (x + 1)) == 0;
+})
+
+;; True iff OP can be used in a zvdep instruction.
+
+(define_predicate "lhs_lshift_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "lhs_lshift_cint_operand")))
+
+;; Accept anything that can be used as a destination operand for a
+;; move instruction.  We don't accept indexed memory operands since
+;; they are supported only for floating point stores.
+
+(define_predicate "move_dest_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (register_operand (op, mode))
+    return true;
+
+  if (GET_MODE (op) != mode)
+    return false;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (! MEM_P (op))
+    return false;
+
+  op = XEXP (op, 0);
+
+  return (memory_address_p (mode, op)
+	  && !IS_INDEX_ADDR_P (op)
+	  && !IS_LO_SUM_DLT_ADDR_P (op));
+})
+
+;; Accept anything that can be used as a source operand for a move
+;; instruction.
+
+(define_predicate "move_src_operand"
+  (match_code "subreg,reg,const_int,const_double,mem")
+{
+  if (register_operand (op, mode))
+    return true;
+
+  if (op == CONST0_RTX (mode))
+    return true;
+
+  if (CONST_INT_P (op))
+    return pa_cint_ok_for_move (INTVAL (op));
+
+  if (GET_MODE (op) != mode)
+    return false;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (! MEM_P (op))
+    return false;
+
+  /* Until problems with management of the REG_POINTER flag are resolved,
+     we need to delay creating move insns with unscaled indexed addresses
+     until CSE is not expected.  */
+  if (!TARGET_NO_SPACE_REGS
+      && !cse_not_expected
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && REG_P (XEXP (XEXP (op, 0), 0))
+      && REG_P (XEXP (XEXP (op, 0), 1)))
+    return false;
+
+  return memory_address_p (mode, XEXP (op, 0));
+})
+
+;; True iff OP is not a symbolic memory operand. 
+
+(define_predicate "nonsymb_mem_operand"
+  (match_code "subreg,mem")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (! MEM_P (op))
+    return false;
+
+  /* Until problems with management of the REG_POINTER flag are resolved,
+     we need to delay creating move insns with unscaled indexed addresses
+     until CSE is not expected.  */
+  if (!TARGET_NO_SPACE_REGS
+      && !cse_not_expected
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && REG_P (XEXP (XEXP (op, 0), 0))
+      && REG_P (XEXP (XEXP (op, 0), 1)))
+    return false;
+
+  return (!symbolic_memory_operand (op, mode)
+	  && memory_address_p (mode, XEXP (op, 0)));
+})
+
+;; True iff OP is anything other than a hard register.
+
+(define_predicate "non_hard_reg_operand"
+  (match_test "! (REG_P (op) && REGNO (op) < FIRST_PSEUDO_REGISTER)"))
+
+;; True iff OP is a reference to a label whose address can be loaded
+;; while generating PIC code.
+
+(define_predicate "pic_label_operand"
+  (match_code "label_ref,const")
+{
+  if (!flag_pic)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+      return true;
+    case CONST:
+      op = XEXP (op, 0);
+      return (GET_CODE (XEXP (op, 0)) == LABEL_REF
+	      && CONST_INT_P (XEXP (op, 1)));
+    default:
+      gcc_unreachable ();
+    }
+  return false;
+})
+
+;; True iff the operand OP lives in text space.  OP is a symbolic operand.
+;; If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info, is true.
+
+(define_special_predicate "read_only_operand"
+  (match_test "true")
+{
+  if (GET_CODE (op) == CONST)
+    op = XEXP (XEXP (op, 0), 0);
+  if (GET_CODE (op) == SYMBOL_REF)
+    {
+      if (flag_pic)
+        return SYMBOL_REF_FLAG (op) && !CONSTANT_POOL_ADDRESS_P (op);
+      else
+        return SYMBOL_REF_FLAG (op) || CONSTANT_POOL_ADDRESS_P (op);
+    }
+  return true;
+})
+
+;; True iff the operand is a register operand, or a non-symbolic
+;; memory operand after reload.  A SUBREG is not accepted since it
+;; will need a reload.
+;;
+;; This predicate is used for branch patterns that internally handle
+;; register reloading.  We need to accept non-symbolic memory operands
+;; after reload to ensure that the pattern is still valid if reload
+;; didn't find a hard register for the operand.
+
+(define_predicate "reg_before_reload_operand"
+  (match_code "reg,mem")
+{
+  if (register_operand (op, mode))
+    return true;
+
+  if (reload_completed
+      && memory_operand (op, mode)
+      && !symbolic_memory_operand (op, mode))
+    return true;
+
+  return false;
+})
+
+;; True iff OP is a register or const_0 operand for MODE.
+
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_0_operand")))
+
+;; True iff OP is either a register, zero, or a non-symbolic memory operand.
+
+(define_predicate "reg_or_0_or_nonsymb_mem_operand"
+  (ior (match_operand 0 "reg_or_0_operand")
+       (match_operand 0 "nonsymb_mem_operand")))
+
+;; Accept REG and any CONST_INT that can be moved in one instruction
+;; into a general register.
+
+(define_predicate "reg_or_cint_move_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "cint_move_operand")))
+
+;; True iff OP can be used to compute (reg | OP).
+
+(define_predicate "reg_or_cint_ior_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "cint_ior_operand")))
+
+;; Return 1 if OP is a CONST_INT with the value 2, 4, or 8.  These are
+;; the valid constants for shadd instructions.
+
+(define_predicate "shadd_operand"
+  (and (match_code "const_int")
+       (match_test "pa_shadd_constant_p (INTVAL (op))")))
+
+;; Return truth value of statement that OP is a symbolic memory operand.
+
+(define_predicate "symbolic_memory_operand"
+  (match_code "subreg,mem")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (!MEM_P (op))
+    return false;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == LO_SUM)
+    op = XEXP (op, 1);
+  return pa_symbolic_expression_p (op);
+})
+
+;; True iff OP is a symbolic operand.
+;; Note: an inline copy of this code is present in pa_secondary_reload.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return !SYMBOL_REF_TLS_MODEL (op);
+    case LABEL_REF:
+      return true;
+    case CONST:
+      op = XEXP (op, 0);
+      return (GET_CODE (op) == PLUS
+	      && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+		   && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
+		  || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+    default:
+      break;
+    }
+  return false;
+})
+
+;; Return true if OP is a symbolic operand for the TLS Global Dynamic model.
+
+(define_predicate "tgd_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Dynamic model.
+
+(define_predicate "tld_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Initial Exec model.
+
+(define_predicate "tie_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Exec model.
+
+(define_predicate "tle_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC")))
+
+;; True iff this is a comparison operator.  This allows the use of
+;; MATCH_OPERATOR to recognize all the branch insns.
+
+(define_predicate "cmpib_comparison_operator"
+  (match_code "eq,ne,lt,le,leu,gt,gtu,ge"))
+
+;; True iff OP is an operator suitable for use in a movb instruction.
+
+(define_predicate "movb_comparison_operator"
+  (match_code "eq,ne,lt,ge"))
+
+;; True iff OP is a PLUS, XOR or IOR operator.
+
+(define_predicate "plus_xor_ior_operator"
+  (match_code "plus,xor,ior"))
diff --git a/gcc-4.9/gcc/config/pa/som.h b/gcc-4.9/gcc/config/pa/som.h
new file mode 100644
index 000000000..aa0402aa3
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/som.h
@@ -0,0 +1,345 @@
+/* Definitions for SOM assembler support.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* So we can conditionalize small amounts of code in pa.c or pa.md.  */
+#undef TARGET_SOM
+#define TARGET_SOM 1
+
+/* We do not use BINCL stabs in SOM.
+   ??? If it does not hurt, we probably should to avoid useless divergence
+   from other embedded stabs implementations.  */
+#undef DBX_USE_BINCL
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+/* gdb needs a null N_SO at the end of each file for scattered loading.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+/* HPUX has a program 'chatr' to list the dependencies of dynamically
+   linked executables and shared libraries.  */
+#define LDD_SUFFIX "chatr"
+/* Look for lines like "dynamic   /usr/lib/X11R5/libX11.sl"
+   or "static    /usr/lib/X11R5/libX11.sl". 
+
+   HPUX 10.20 also has lines like "static branch prediction ..."
+   so we filter that out explicitly.
+
+   We also try to bound our search for libraries with marker
+   lines.  What a pain.  */
+#define PARSE_LDD_OUTPUT(PTR)					\
+do {								\
+  static int in_shlib_list = 0;					\
+  while (*PTR == ' ') PTR++;					\
+  if (strncmp (PTR, "shared library list:",			\
+	       sizeof ("shared library list:") - 1) == 0)	\
+    {								\
+      PTR = 0;							\
+      in_shlib_list = 1;					\
+    }								\
+  else if (strncmp (PTR, "shared library binding:",		\
+		    sizeof ("shared library binding:") - 1) == 0)\
+    {								\
+      PTR = 0;							\
+      in_shlib_list = 0;					\
+    }								\
+  else if (strncmp (PTR, "static branch prediction disabled",	\
+		    sizeof ("static branch prediction disabled") - 1) == 0)\
+    {								\
+      PTR = 0;							\
+      in_shlib_list = 0;					\
+    }								\
+  else if (in_shlib_list					\
+	   &&  strncmp (PTR, "dynamic", sizeof ("dynamic") - 1) == 0) \
+    {								\
+      PTR += sizeof ("dynamic") - 1;				\
+      while (*p == ' ') PTR++;					\
+    }								\
+  else if (in_shlib_list					\
+	   && strncmp (PTR, "static", sizeof ("static") - 1) == 0) \
+    {								\
+      PTR += sizeof ("static") - 1;				\
+      while (*p == ' ') PTR++;					\
+    }								\
+  else								\
+    PTR = 0;							\
+} while (0)
+
+/* Output the label for a function definition.  */
+#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
+#define ASM_DOUBLE_ARG_DESCRIPTORS(FILE, ARG0, ARG1)	\
+  do { fprintf (FILE, ",ARGW%d=FR", (ARG0));		\
+       fprintf (FILE, ",ARGW%d=FU", (ARG1));} while (0)
+#define DFMODE_RETURN_STRING ",RTNVAL=FU"
+#define SFMODE_RETURN_STRING ",RTNVAL=FR"
+#else
+#define ASM_DOUBLE_ARG_DESCRIPTORS(FILE, ARG0, ARG1)	\
+  do { fprintf (FILE, ",ARGW%d=FU", (ARG0));		\
+       fprintf (FILE, ",ARGW%d=FR", (ARG1));} while (0)
+#define DFMODE_RETURN_STRING ",RTNVAL=FR"
+#define SFMODE_RETURN_STRING ",RTNVAL=FU"
+#endif
+
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+    do { tree fntype = TREE_TYPE (TREE_TYPE (DECL));			\
+	 tree tree_type = TREE_TYPE (DECL);				\
+	 tree parm;							\
+	 int i;								\
+	 if (TREE_PUBLIC (DECL) || TARGET_GAS)				\
+	   { 								\
+	     if (TREE_PUBLIC (DECL))					\
+	       {							\
+		 fputs ("\t.EXPORT ", FILE);				\
+		 assemble_name (FILE, NAME);				\
+		 fputs (",ENTRY,PRIV_LEV=3", FILE);			\
+	       }							\
+	     else							\
+	       {							\
+		 fputs ("\t.PARAM ", FILE);				\
+		 assemble_name (FILE, NAME);				\
+		 fputs (",PRIV_LEV=3", FILE);				\
+	       }							\
+	     for (parm = DECL_ARGUMENTS (DECL), i = 0; parm && i < 4;	\
+		  parm = DECL_CHAIN (parm))				\
+	       {							\
+		 if (TYPE_MODE (DECL_ARG_TYPE (parm)) == SFmode		\
+		     && ! TARGET_SOFT_FLOAT)				\
+		   fprintf (FILE, ",ARGW%d=FR", i++);			\
+		 else if (TYPE_MODE (DECL_ARG_TYPE (parm)) == DFmode	\
+			  && ! TARGET_SOFT_FLOAT)			\
+		   {							\
+		     if (i <= 2)					\
+		       {						\
+			 if (i == 1) i++;				\
+			 ASM_DOUBLE_ARG_DESCRIPTORS (FILE, i++, i++);	\
+		       }						\
+		     else						\
+		       break;						\
+		   }							\
+		 else							\
+		   {							\
+		     int arg_size =					\
+		       FUNCTION_ARG_SIZE (TYPE_MODE (DECL_ARG_TYPE (parm)),\
+					  DECL_ARG_TYPE (parm));	\
+		     /* Passing structs by invisible reference uses	\
+			one general register.  */			\
+		     if (arg_size > 2					\
+			 || TREE_ADDRESSABLE (DECL_ARG_TYPE (parm)))	\
+		       arg_size = 1;					\
+		     if (arg_size == 2 && i <= 2)			\
+		       {						\
+			 if (i == 1) i++;				\
+			 fprintf (FILE, ",ARGW%d=GR", i++);		\
+			 fprintf (FILE, ",ARGW%d=GR", i++);		\
+		       }						\
+		     else if (arg_size == 1)				\
+		       fprintf (FILE, ",ARGW%d=GR", i++);		\
+		     else						\
+		       i += arg_size;					\
+		   }							\
+	       }							\
+	     /* anonymous args */					\
+	     if (stdarg_p (tree_type))					\
+	       {							\
+		 for (; i < 4; i++)					\
+		   fprintf (FILE, ",ARGW%d=GR", i);			\
+	       }							\
+	     if (TYPE_MODE (fntype) == DFmode && ! TARGET_SOFT_FLOAT)	\
+	       fputs (DFMODE_RETURN_STRING, FILE);			\
+	     else if (TYPE_MODE (fntype) == SFmode && ! TARGET_SOFT_FLOAT) \
+	       fputs (SFMODE_RETURN_STRING, FILE);			\
+	     else if (fntype != void_type_node)				\
+	       fputs (",RTNVAL=GR", FILE);				\
+	     fputs ("\n", FILE);					\
+	   }} while (0)
+
+#define TARGET_ASM_FILE_START pa_som_file_start
+#define TARGET_ASM_INIT_SECTIONS pa_som_asm_init_sections
+
+/* String to output before writable data.  */
+#define DATA_SECTION_ASM_OP "\t.SPACE $PRIVATE$\n\t.SUBSPA $DATA$\n"
+
+/* String to output before uninitialized data.  */
+#define BSS_SECTION_ASM_OP "\t.SPACE $PRIVATE$\n\t.SUBSPA $BSS$\n"
+
+/* This is how to output a command to make the user-level label
+   named NAME defined for reference from other files.  We use
+   assemble_name_raw instead of assemble_name since a symbol in
+   a .IMPORT directive that isn't otherwise referenced is not
+   placed in the symbol table of the assembled object.
+
+   Failure to import a function reference can cause the HP linker
+   to segmentation fault!
+
+   Note that the SOM based tools need the symbol imported as a
+   CODE symbol, while the ELF based tools require the symbol to
+   be imported as an ENTRY symbol.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  pa_hpux_asm_output_external ((FILE), (DECL), (NAME))
+#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \
+  do { fputs ("\t.IMPORT ", FILE);					\
+       assemble_name_raw (FILE, NAME);					\
+       if (FUNCTION_NAME_P (NAME))					\
+	 fputs (",CODE\n", FILE);					\
+       else								\
+	 fputs (",DATA\n", FILE);					\
+     } while (0)
+
+/* The bogus HP assembler requires ALL external references to be
+   "imported", even library calls.  They look a bit different, so
+   here's this macro.
+
+   Also note not all libcall names are passed to pa_encode_section_info
+   (__main for example).  To make sure all libcall names have section
+   info recorded in them, we do it here.
+
+   We must also ensure that a libcall that has been previously
+   exported is not subsequently imported since the HP assembler may
+   change the type from an ENTRY to a CODE symbol.  This would make
+   the symbol local.  We are forced to use the identifier node
+   associated with the real assembler name for this check as the
+   symbol_ref available in ASM_DECLARE_FUNCTION_NAME is not the
+   same as the one used here.  As a result, we can't use flags
+   in the symbol_ref for this check.  The identifier check assumes
+   assemble_external_libcall is called before the symbol is used.  */
+
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, RTL) \
+  do { const char *name;						\
+       tree id;								\
+									\
+       if (!function_label_operand (RTL, VOIDmode))			\
+	 pa_encode_label (RTL);						\
+									\
+       name = targetm.strip_name_encoding (XSTR ((RTL), 0));		\
+       id = maybe_get_identifier (name);				\
+       if (!id || !TREE_SYMBOL_REFERENCED (id))				\
+	 {								\
+	   fputs ("\t.IMPORT ", FILE);					\
+	   assemble_name_raw (FILE, XSTR ((RTL), 0));		       	\
+	   fputs (",CODE\n", FILE);					\
+	 }								\
+     } while (0)
+
+/* We want __gcc_plt_call to appear in every program built by
+   gcc, so we make a reference to it out of __main.
+   We use the asm statement to fool the optimizer into not
+   removing the dead (but important) initialization of
+   REFERENCE.  */
+
+#define DO_GLOBAL_DTORS_BODY			\
+do {						\
+  extern void __gcc_plt_call (void);		\
+  void (*reference)(void) = &__gcc_plt_call;	\
+  func_ptr *p;					\
+  __asm__ ("" : : "r" (reference));		\
+  for (p = __DTOR_LIST__ + 1; *p; )		\
+    (*p++) ();					\
+} while (0)
+
+/* This macro specifies the biggest alignment supported by the object
+   file format of this machine.
+
+   The .align directive in the HP assembler allows alignments up to 4096
+   bytes.  However, the maximum alignment of a global common symbol is 8
+   bytes for objects smaller than the page size (4096 bytes).  For larger
+   objects, the linker provides an alignment of 32 bytes.  Unfortunately,
+   this macro doesn't provide a mechanism to test for common symbols.  */
+#define MAX_OFILE_ALIGNMENT 32768
+
+/* The SOM linker hardcodes paths into binaries.  As a result, dotdots
+   must be removed from library prefixes to prevent binaries from depending
+   on the location of the GCC tool directory.  The downside is GCC
+   cannot be moved after installation using a symlink.  */
+#undef TARGET_ALWAYS_STRIP_DOTDOT
+#define TARGET_ALWAYS_STRIP_DOTDOT true
+
+/* If GAS supports weak, we can support weak when we have working linker
+   support for secondary definitions and are generating code for GAS.
+   This is primarily for one-only support as SOM doesn't allow undefined
+   weak symbols.  */
+#ifdef HAVE_GAS_WEAK
+#define TARGET_SUPPORTS_WEAK (TARGET_SOM_SDEF && TARGET_GAS)
+#else
+#define TARGET_SUPPORTS_WEAK 0
+#endif
+
+/* CVS GAS as of 4/28/04 supports a comdat parameter for the .nsubspa
+   directive.  This provides one-only linkage semantics even though we
+   don't have weak support.  */
+#ifdef HAVE_GAS_NSUBSPA_COMDAT
+#define SUPPORTS_SOM_COMDAT (TARGET_GAS)
+#else
+#define SUPPORTS_SOM_COMDAT 0
+#endif
+
+/* We can support one only if we support weak or comdat.  */
+#define SUPPORTS_ONE_ONLY (TARGET_SUPPORTS_WEAK || SUPPORTS_SOM_COMDAT)
+
+/* We use DECL_COMMON for uninitialized one-only variables as we don't
+   have linkonce .bss.  We use SOM secondary definitions or comdat for
+   initialized variables and functions.  */
+#define MAKE_DECL_ONE_ONLY(DECL) \
+  do {									\
+    if (TREE_CODE (DECL) == VAR_DECL					\
+        && (DECL_INITIAL (DECL) == 0					\
+            || DECL_INITIAL (DECL) == error_mark_node))			\
+      DECL_COMMON (DECL) = 1;						\
+    else if (TARGET_SUPPORTS_WEAK)					\
+      DECL_WEAK (DECL) = 1;						\
+  } while (0)
+
+/* This is how we tell the assembler that a symbol is weak.  The SOM
+   weak implementation uses the secondary definition (sdef) flag.
+
+   The behavior of sdef symbols is similar to ELF weak symbols in that
+   multiple definitions can occur without incurring a link error.
+   However, they differ in the following ways:
+     1) Undefined sdef symbols are not allowed.
+     2) The linker searches for undefined sdef symbols and will load an
+	archive library member to resolve an undefined sdef symbol.
+     3) The exported symbol from a shared library is a primary symbol
+        rather than a sdef symbol.  Thus, more care is needed in the
+	ordering of libraries.
+
+   It appears that the linker discards extra copies of "weak" functions
+   when linking shared libraries, independent of whether or not they
+   are in their own section.  In linking final executables, -Wl,-O can
+   be used to remove dead procedures.  Thus, support for named sections
+   is not needed and in previous testing caused problems with various
+   HP tools.  */
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE);				\
+       assemble_name (FILE, NAME);				\
+       fputc ('\n', FILE);					\
+       targetm.asm_out.globalize_label (FILE, NAME);		\
+  } while (0)
+
+/* We can't handle weak aliases, and therefore can't support pragma weak.
+   Suppress the use of pragma weak in gthr-dce.h and gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+/* Shared library suffix.  Collect2 strips the version string after
+   this suffix when generating constructor/destructor names.  */ 
+#define SHLIB_SUFFIX ".sl"
+
+#define TARGET_HAVE_NAMED_SECTIONS false
+
+#define TARGET_ASM_TM_CLONE_TABLE_SECTION pa_som_tm_clone_table_section
diff --git a/gcc-4.9/gcc/config/pa/t-dce-thr b/gcc-4.9/gcc/config/pa/t-dce-thr
new file mode 100644
index 000000000..51b3abcf6
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/t-dce-thr
@@ -0,0 +1,2 @@
+MULTILIB_OPTIONS = threads
+MULTILIB_DIRNAMES = threads
diff --git a/gcc-4.9/gcc/config/pa/t-linux b/gcc-4.9/gcc/config/pa/t-linux
new file mode 100644
index 000000000..f9f2b0a3c
--- /dev/null
+++ b/gcc-4.9/gcc/config/pa/t-linux
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,hppa-linux-gnu)
diff --git a/gcc-4.9/gcc/config/pdp11/constraints.md b/gcc-4.9/gcc/config/pdp11/constraints.md
new file mode 100644
index 000000000..d3b67e73e
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/constraints.md
@@ -0,0 +1,81 @@
+;;- Constraint definitions for the pdp11 for GNU C compiler
+;; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "f" "FPU_REGS"
+  "Any FPU register")
+
+(define_register_constraint "a" "LOAD_FPU_REGS"
+  "FPU register that can be directly loaded from memory")
+
+(define_register_constraint "d" "MUL_REGS"
+  "General register that can be used for 16-bit multiply (odd numbered)")
+
+(define_constraint "I"
+  "Integer constant that fits in 16 bits unsigned"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff0000) == 0")))
+
+(define_constraint "J"
+  "Integer constant whose low 16 bits are zero"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0")))
+
+(define_constraint "K"
+  "Integer constant whose lower and upper 16 bit half are both non-zero"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) != 0 && (ival & 0xffff0000) != 0")))
+
+(define_constraint "L"
+  "Integer constant 1"
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "M"
+  "Integer constant -1"
+  (and (match_code "const_int")
+       (match_test "ival == -1")))
+
+(define_constraint "N"
+  "Integer constant 0"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "O"
+  "Integer constant for which several individual shifts are better than one big one"
+  (and (match_code "const_int")
+       (match_test "abs (ival) > 1 && abs (ival) <= 4")))
+
+(define_constraint "G"
+  "Defines a real zero constant."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_constraint "Q"
+  "Memory reference that requires an additional word after the opcode"
+  (and (match_code "mem")
+       (match_test "memory_address_p (GET_MODE (op), XEXP (op, 0))
+                    && !simple_memory_operand (op, GET_MODE (op))")))
+
+(define_constraint "R"
+  "Memory reference that is encoded within the opcode"
+  (and (match_code "mem")
+       (match_test "memory_address_p (GET_MODE (op), XEXP (op, 0))
+                    && simple_memory_operand (op, GET_MODE (op))")))
+
diff --git a/gcc-4.9/gcc/config/pdp11/pdp11-modes.def b/gcc-4.9/gcc/config/pdp11/pdp11-modes.def
new file mode 100644
index 000000000..92b137585
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/pdp11-modes.def
@@ -0,0 +1,26 @@
+/* Definitions of target machine for GNU compiler, for the pdp-11
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Add any extra modes needed to represent the condition code.
+   CCFPmode is used for FPU, but should we use a separate reg? */
+
+CC_MODE (CCFP);
+RESET_FLOAT_FORMAT (SF, pdp11_f_format);
+RESET_FLOAT_FORMAT (DF, pdp11_d_format);
diff --git a/gcc-4.9/gcc/config/pdp11/pdp11-protos.h b/gcc-4.9/gcc/config/pdp11/pdp11-protos.h
new file mode 100644
index 000000000..5b1b1efa2
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/pdp11-protos.h
@@ -0,0 +1,49 @@
+/* Definitions of target machine for GNU compiler, for the pdp-11
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* declarations */
+#ifdef RTX_CODE
+extern int simple_memory_operand (rtx, enum machine_mode);
+
+extern int legitimate_const_double_p (rtx);
+extern void notice_update_cc_on_set (rtx, rtx);
+extern void output_addr_const_pdp11 (FILE *, rtx);
+extern const char *output_move_multiple (rtx *);
+extern const char *output_block_move (rtx *);
+extern const char *output_jump (enum rtx_code, int, int);
+extern void print_operand_address (FILE *, rtx);
+extern bool pdp11_cannot_change_mode_class (enum machine_mode,
+                                            enum machine_mode, enum reg_class);
+extern bool pdp11_secondary_memory_needed (reg_class_t, reg_class_t, 
+					   enum machine_mode);
+typedef enum { no_action, dec_before, inc_after } pdp11_action;
+typedef enum { little, either, big } pdp11_partorder;
+extern bool pdp11_expand_operands (rtx *, rtx [][2], int, 
+				   pdp11_action *, pdp11_partorder);
+extern int pdp11_sp_frame_offset (void);
+extern int pdp11_initial_elimination_offset (int, int);
+extern enum reg_class pdp11_regno_reg_class (int);
+
+#endif /* RTX_CODE */
+
+extern void output_ascii (FILE *, const char *, int);
+extern void pdp11_asm_output_var (FILE *, const char *, int, int, bool);
+extern void pdp11_expand_prologue (void);
+extern void pdp11_expand_epilogue (void);
diff --git a/gcc-4.9/gcc/config/pdp11/pdp11.c b/gcc-4.9/gcc/config/pdp11/pdp11.c
new file mode 100644
index 000000000..677160aac
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/pdp11.c
@@ -0,0 +1,1903 @@
+/* Subroutines for gcc2 for pdp11.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "function.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+#include "opts.h"
+#include "dbxout.h"
+
+/* this is the current value returned by the macro FIRST_PARM_OFFSET 
+   defined in tm.h */
+int current_first_parm_offset;
+
+/* Routines to encode/decode pdp11 floats */
+static void encode_pdp11_f (const struct real_format *fmt,
+			    long *, const REAL_VALUE_TYPE *);
+static void decode_pdp11_f (const struct real_format *,
+			    REAL_VALUE_TYPE *, const long *);
+static void encode_pdp11_d (const struct real_format *fmt,
+			    long *, const REAL_VALUE_TYPE *);
+static void decode_pdp11_d (const struct real_format *,
+			    REAL_VALUE_TYPE *, const long *);
+
+/* These two are taken from the corresponding vax descriptors
+   in real.c, changing only the encode/decode routine pointers.  */
+const struct real_format pdp11_f_format =
+  {
+    encode_pdp11_f,
+    decode_pdp11_f,
+    2,
+    24,
+    24,
+    -127,
+    127,
+    15,
+    15,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false
+  };
+
+const struct real_format pdp11_d_format =
+  {
+    encode_pdp11_d,
+    decode_pdp11_d,
+    2,
+    56,
+    56,
+    -127,
+    127,
+    15,
+    15,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false
+  };
+
+static void
+encode_pdp11_f (const struct real_format *fmt ATTRIBUTE_UNUSED, long *buf,
+		const REAL_VALUE_TYPE *r)
+{
+  (*vax_f_format.encode) (fmt, buf, r);
+  buf[0] = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+}
+
+static void
+decode_pdp11_f (const struct real_format *fmt ATTRIBUTE_UNUSED,
+		REAL_VALUE_TYPE *r, const long *buf)
+{
+  long tbuf;
+  tbuf = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+  (*vax_f_format.decode) (fmt, r, &tbuf);
+}
+
+static void
+encode_pdp11_d (const struct real_format *fmt ATTRIBUTE_UNUSED, long *buf,
+		const REAL_VALUE_TYPE *r)
+{
+  (*vax_d_format.encode) (fmt, buf, r);
+  buf[0] = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+  buf[1] = ((buf[1] >> 16) & 0xffff) | ((buf[1] & 0xffff) << 16);
+}
+
+static void
+decode_pdp11_d (const struct real_format *fmt ATTRIBUTE_UNUSED,
+		REAL_VALUE_TYPE *r, const long *buf)
+{
+  long tbuf[2];
+  tbuf[0] = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+  tbuf[1] = ((buf[1] >> 16) & 0xffff) | ((buf[1] & 0xffff) << 16);
+  (*vax_d_format.decode) (fmt, r, tbuf);
+}
+
+/* This is where the condition code register lives.  */
+/* rtx cc0_reg_rtx; - no longer needed? */
+
+static const char *singlemove_string (rtx *);
+static bool pdp11_assemble_integer (rtx, unsigned int, int);
+static bool pdp11_rtx_costs (rtx, int, int, int, int *, bool);
+static bool pdp11_return_in_memory (const_tree, const_tree);
+static rtx pdp11_function_value (const_tree, const_tree, bool);
+static rtx pdp11_libcall_value (enum machine_mode, const_rtx);
+static bool pdp11_function_value_regno_p (const unsigned int);
+static void pdp11_trampoline_init (rtx, tree, rtx);
+static rtx pdp11_function_arg (cumulative_args_t, enum machine_mode,
+			       const_tree, bool);
+static void pdp11_function_arg_advance (cumulative_args_t,
+					enum machine_mode, const_tree, bool);
+static void pdp11_conditional_register_usage (void);
+static bool pdp11_legitimate_constant_p (enum machine_mode, rtx);
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP NULL
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP NULL
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP NULL
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER pdp11_assemble_integer
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN "["
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN "]"
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS pdp11_rtx_costs
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG pdp11_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE pdp11_function_arg_advance
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY pdp11_return_in_memory
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE pdp11_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE pdp11_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P pdp11_function_value_regno_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT pdp11_trampoline_init
+
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD pdp11_secondary_reload
+
+#undef  TARGET_REGISTER_MOVE_COST 
+#define TARGET_REGISTER_MOVE_COST pdp11_register_move_cost
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS pdp11_preferred_reload_class
+
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS pdp11_preferred_output_reload_class
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P pdp11_legitimate_address_p
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE pdp11_conditional_register_usage
+
+#undef  TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION pdp11_function_section
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND pdp11_asm_print_operand
+
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pdp11_asm_print_operand_punct_valid_p
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P pdp11_legitimate_constant_p
+
+/* A helper function to determine if REGNO should be saved in the
+   current function's stack frame.  */
+
+static inline bool
+pdp11_saved_regno (unsigned regno)
+{
+  return !call_used_regs[regno] && df_regs_ever_live_p (regno);
+}
+
+/* Expand the function prologue.  */
+
+void
+pdp11_expand_prologue (void)
+{							       
+  HOST_WIDE_INT fsize = get_frame_size ();
+  unsigned regno;
+  rtx x, via_ac = NULL;
+
+  /* If we are outputting code for main, the switch FPU to the
+     right mode if TARGET_FPU.  */
+  if (MAIN_NAME_P (DECL_NAME (current_function_decl)) && TARGET_FPU)
+    {
+      emit_insn (gen_setd ());
+      emit_insn (gen_seti ());
+    }
+    
+  if (frame_pointer_needed) 					
+    {								
+      x = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+      x = gen_frame_mem (Pmode, x);
+      emit_move_insn (x, hard_frame_pointer_rtx);
+
+      emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+    }								
+
+  /* Make frame.  */
+  if (fsize)
+    {
+      emit_insn (gen_addhi3 (stack_pointer_rtx, stack_pointer_rtx,
+			     GEN_INT (-fsize)));
+
+      /* Prevent frame references via the frame pointer from being
+	 scheduled before the frame is allocated.  */
+      if (frame_pointer_needed)
+	emit_insn (gen_blockage ());
+    }
+
+  /* Save CPU registers.  */
+  for (regno = R0_REGNUM; regno <= PC_REGNUM; regno++)
+    if (pdp11_saved_regno (regno)
+	&& (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
+      {
+	x = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	x = gen_frame_mem (Pmode, x);
+	emit_move_insn (x, gen_rtx_REG (Pmode, regno));
+      }
+
+  /* Save FPU registers.  */
+  for (regno = AC0_REGNUM; regno <= AC3_REGNUM; regno++) 
+    if (pdp11_saved_regno (regno))
+      {
+	x = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	x = gen_frame_mem (DFmode, x);
+	via_ac = gen_rtx_REG (DFmode, regno);
+	emit_move_insn (x, via_ac);
+      }
+
+  /* ??? Maybe make ac4, ac5 call used regs?? */
+  for (regno = AC4_REGNUM; regno <= AC5_REGNUM; regno++)
+    if (pdp11_saved_regno (regno))
+      {
+	gcc_assert (via_ac != NULL);
+	emit_move_insn (via_ac, gen_rtx_REG (DFmode, regno));
+
+	x = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	x = gen_frame_mem (DFmode, x);
+	emit_move_insn (x, via_ac);
+      }
+}
+
+/* The function epilogue should not depend on the current stack pointer!
+   It should use the frame pointer only.  This is mandatory because
+   of alloca; we also take advantage of it to omit stack adjustments
+   before returning.  */
+
+/* Maybe we can make leaf functions faster by switching to the
+   second register file - this way we don't have to save regs!
+   leaf functions are ~ 50% of all functions (dynamically!) 
+
+   set/clear bit 11 (dec. 2048) of status word for switching register files - 
+   but how can we do this? the pdp11/45 manual says bit may only 
+   be set (p.24), but not cleared!
+
+   switching to kernel is probably more expensive, so we'll leave it 
+   like this and not use the second set of registers... 
+
+   maybe as option if you want to generate code for kernel mode? */
+
+void
+pdp11_expand_epilogue (void)
+{								
+  HOST_WIDE_INT fsize = get_frame_size ();
+  unsigned regno;
+  rtx x, reg, via_ac = NULL;
+
+  if (pdp11_saved_regno (AC4_REGNUM) || pdp11_saved_regno (AC5_REGNUM))
+    {
+      /* Find a temporary with which to restore AC4/5.  */
+      for (regno = AC0_REGNUM; regno <= AC3_REGNUM; regno++)
+	if (pdp11_saved_regno (regno))
+	  {
+	    via_ac = gen_rtx_REG (DFmode, regno);
+	    break;
+	  }
+    }
+
+  /* If possible, restore registers via pops.  */
+  if (!frame_pointer_needed || crtl->sp_is_unchanging)
+    {
+      /* Restore registers via pops.  */
+
+      for (regno = AC5_REGNUM; regno >= AC0_REGNUM; regno--)
+	if (pdp11_saved_regno (regno))
+	  {
+	    x = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+	    x = gen_frame_mem (DFmode, x);
+	    reg = gen_rtx_REG (DFmode, regno);
+
+	    if (LOAD_FPU_REG_P (regno))
+	      emit_move_insn (reg, x);
+	    else
+	      {
+	        emit_move_insn (via_ac, x);
+		emit_move_insn (reg, via_ac);
+	      }
+	  }
+
+      for (regno = PC_REGNUM; regno >= R0_REGNUM + 2; regno--)
+	if (pdp11_saved_regno (regno)
+	    && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
+	  {
+	    x = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+	    x = gen_frame_mem (Pmode, x);
+	    emit_move_insn (gen_rtx_REG (Pmode, regno), x);
+	  }
+    }
+  else
+    {
+      /* Restore registers via moves.  */
+      /* ??? If more than a few registers need to be restored, it's smaller
+	 to generate a pointer through which we can emit pops.  Consider
+	 that moves cost 2*NREG words and pops cost NREG+3 words.  This
+	 means that the crossover is NREG=3.
+
+	 Possible registers to use are:
+	  (1) The first call-saved general register.  This register will
+		be restored with the last pop.
+	  (2) R1, if it's not used as a return register.
+	  (3) FP itself.  This option may result in +4 words, since we
+		may need two add imm,rn instructions instead of just one.
+		This also has the downside that we're not representing
+		the unwind info in any way, so during the epilogue the
+		debugger may get lost.  */
+
+      HOST_WIDE_INT ofs = -pdp11_sp_frame_offset ();
+
+      for (regno = AC5_REGNUM; regno >= AC0_REGNUM; regno--)
+	if (pdp11_saved_regno (regno))
+	  {
+	    x = plus_constant (Pmode, hard_frame_pointer_rtx, ofs);
+	    x = gen_frame_mem (DFmode, x);
+	    reg = gen_rtx_REG (DFmode, regno);
+
+	    if (LOAD_FPU_REG_P (regno))
+	      emit_move_insn (reg, x);
+	    else
+	      {
+	        emit_move_insn (via_ac, x);
+		emit_move_insn (reg, via_ac);
+	      }
+	    ofs += 8;
+	  }
+
+      for (regno = PC_REGNUM; regno >= R0_REGNUM + 2; regno--)
+	if (pdp11_saved_regno (regno)
+	    && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
+	  {
+	    x = plus_constant (Pmode, hard_frame_pointer_rtx, ofs);
+	    x = gen_frame_mem (Pmode, x);
+	    emit_move_insn (gen_rtx_REG (Pmode, regno), x);
+	    ofs += 2;
+	  }
+    }
+
+  /* Deallocate the stack frame.  */
+  if (fsize)
+    {
+      /* Prevent frame references via any pointer from being
+	 scheduled after the frame is deallocated.  */
+      emit_insn (gen_blockage ());
+
+      if (frame_pointer_needed)
+	{
+	  /* We can deallocate the frame with a single move.  */
+	  emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+	}
+      else
+	emit_insn (gen_addhi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (fsize)));
+    }
+
+  if (frame_pointer_needed)
+    {
+      x = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+      x = gen_frame_mem (Pmode, x);
+      emit_move_insn (hard_frame_pointer_rtx, x);
+    }
+
+  emit_jump_insn (gen_return ());
+}
+
+/* Return the best assembler insn template
+   for moving operands[1] into operands[0] as a fullword.  */
+static const char *
+singlemove_string (rtx *operands)
+{
+  if (operands[1] != const0_rtx)
+    return "mov %1,%0";
+
+  return "clr %0";
+}
+
+
+/* Expand multi-word operands (SImode or DImode) into the 2 or 4
+   corresponding HImode operands.  The number of operands is given
+   as the third argument, and the required order of the parts as
+   the fourth argument.  */
+bool
+pdp11_expand_operands (rtx *operands, rtx exops[][2], int opcount, 
+		       pdp11_action *action, pdp11_partorder order)
+{
+  int words, op, w, i, sh;
+  pdp11_partorder useorder;
+  bool sameoff = false;
+  enum { REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP } optype;
+  REAL_VALUE_TYPE r;
+  long sval[2];
+  
+  words = GET_MODE_BITSIZE (GET_MODE (operands[0])) / 16;
+  
+  /* If either piece order is accepted and one is pre-decrement
+     while the other is post-increment, set order to be high order
+     word first.  That will force the pre-decrement to be turned
+     into a pointer adjust, then offset addressing.
+     Otherwise, if either operand uses pre-decrement, that means
+     the order is low order first. 
+     Otherwise, if both operands are registers and destination is
+     higher than source and they overlap, do low order word (highest
+     register number) first.  */
+  useorder = either;
+  if (opcount == 2)
+    {
+      if (!REG_P (operands[0]) && !REG_P (operands[1]) &&
+	  !(CONSTANT_P (operands[1]) || 
+	    GET_CODE (operands[1]) == CONST_DOUBLE) &&
+	  ((GET_CODE (XEXP (operands[0], 0)) == POST_INC &&
+	    GET_CODE (XEXP (operands[1], 0)) == PRE_DEC) ||
+	   (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC &&
+	    GET_CODE (XEXP (operands[1], 0)) == POST_INC)))
+	    useorder = big;
+      else if ((!REG_P (operands[0]) &&
+		GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) ||
+	       (!REG_P (operands[1]) &&
+		!(CONSTANT_P (operands[1]) || 
+		  GET_CODE (operands[1]) == CONST_DOUBLE) &&
+		GET_CODE (XEXP (operands[1], 0)) == PRE_DEC))
+	useorder = little;
+      else if (REG_P (operands[0]) && REG_P (operands[1]) &&
+	       REGNO (operands[0]) > REGNO (operands[1]) &&
+	       REGNO (operands[0]) < REGNO (operands[1]) + words)
+	    useorder = little;
+
+      /* Check for source == offset from register and dest == push of
+	 the same register.  In that case, we have to use the same
+	 offset (the one for the low order word) for all words, because
+	 the push increases the offset to each source word.
+	 In theory there are other cases like this, for example dest == pop,
+	 but those don't occur in real life so ignore those.  */
+      if (GET_CODE (operands[0]) ==  MEM 
+	  && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	  && REGNO (XEXP (XEXP (operands[0], 0), 0)) == STACK_POINTER_REGNUM
+	  && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+	sameoff = true;
+    }
+
+  /* If the caller didn't specify order, use the one we computed,
+     or high word first if we don't care either.  If the caller did
+     specify, verify we don't have a problem with that order.
+     (If it matters to the caller, constraints need to be used to
+     ensure this case doesn't occur).  */
+  if (order == either)
+    order = (useorder == either) ? big : useorder;
+  else
+    gcc_assert (useorder == either || useorder == order);
+
+  
+  for (op = 0; op < opcount; op++)
+    {
+      /* First classify the operand.  */
+      if (REG_P (operands[op]))
+	optype = REGOP;
+      else if (CONSTANT_P (operands[op])
+	       || GET_CODE (operands[op]) == CONST_DOUBLE)
+	optype = CNSTOP;
+      else if (GET_CODE (XEXP (operands[op], 0)) == POST_INC)
+	optype = POPOP;
+      else if (GET_CODE (XEXP (operands[op], 0)) == PRE_DEC)
+	optype = PUSHOP;
+      else if (!reload_in_progress || offsettable_memref_p (operands[op]))
+	optype = OFFSOP;
+      else if (GET_CODE (operands[op]) == MEM)
+	optype = MEMOP;
+      else
+	optype = RNDOP;
+
+      /* Check for the cases that the operand constraints are not
+	 supposed to allow to happen. Return failure for such cases.  */
+      if (optype == RNDOP)
+	return false;
+      
+      if (action != NULL)
+	action[op] = no_action;
+      
+      /* If the operand uses pre-decrement addressing but we
+	 want to get the parts high order first,
+	 decrement the former register explicitly
+	 and change the operand into ordinary indexing.  */
+      if (optype == PUSHOP && order == big)
+	{
+	  gcc_assert (action != NULL);
+	  action[op] = dec_before;
+	  operands[op] = gen_rtx_MEM (GET_MODE (operands[op]),
+				      XEXP (XEXP (operands[op], 0), 0));
+	  optype = OFFSOP;
+	}
+      /* If the operand uses post-increment mode but we want 
+	 to get the parts low order first, change the operand
+	 into ordinary indexing and remember to increment
+	 the register explicitly when we're done.  */
+      else if (optype == POPOP && order == little)
+	{
+	  gcc_assert (action != NULL);
+	  action[op] = inc_after;
+	  operands[op] = gen_rtx_MEM (GET_MODE (operands[op]),
+				      XEXP (XEXP (operands[op], 0), 0));
+	  optype = OFFSOP;
+	}
+
+      if (GET_CODE (operands[op]) == CONST_DOUBLE)
+	{
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[op]);
+	  REAL_VALUE_TO_TARGET_DOUBLE (r, sval);
+	}
+      
+      for (i = 0; i < words; i++)
+	{
+	  if (order == big)
+	    w = i;
+	  else if (sameoff)
+	    w = words - 1;
+	  else
+	    w = words - 1 - i;
+
+	  /* Set the output operand to be word "w" of the input.  */
+	  if (optype == REGOP)
+	    exops[i][op] = gen_rtx_REG (HImode, REGNO (operands[op]) + w);
+	  else if (optype == OFFSOP)
+	    exops[i][op] = adjust_address (operands[op], HImode, w * 2);
+	  else if (optype == CNSTOP)
+	    {
+	      if (GET_CODE (operands[op]) == CONST_DOUBLE)
+		{
+		  sh = 16 - (w & 1) * 16;
+		  exops[i][op] = gen_rtx_CONST_INT (HImode, (sval[w / 2] >> sh) & 0xffff);
+		}
+	      else
+		{
+		  sh = ((words - 1 - w) * 16);
+		  exops[i][op] = gen_rtx_CONST_INT (HImode, trunc_int_for_mode (INTVAL(operands[op]) >> sh, HImode));
+		}
+	    }
+	  else
+	    exops[i][op] = operands[op];
+	}
+    }
+  return true;
+}
+
+/* Output assembler code to perform a multiple-word move insn
+   with operands OPERANDS.  This moves 2 or 4 words depending
+   on the machine mode of the operands.  */
+
+const char *
+output_move_multiple (rtx *operands)
+{
+  rtx exops[4][2];
+  pdp11_action action[2];
+  int i, words;
+  
+  words = GET_MODE_BITSIZE (GET_MODE (operands[0])) / 16;
+
+  pdp11_expand_operands (operands, exops, 2, action, either);
+  
+  /* Check for explicit decrement before.  */
+  if (action[0] == dec_before)
+    {
+      operands[0] = XEXP (operands[0], 0);
+      output_asm_insn ("sub $4,%0", operands);
+    }
+  if (action[1] == dec_before)
+    {
+      operands[1] = XEXP (operands[1], 0);
+      output_asm_insn ("sub $4,%1", operands);
+    }
+
+  /* Do the words.  */
+  for (i = 0; i < words; i++)
+    output_asm_insn (singlemove_string (exops[i]), exops[i]);
+
+  /* Check for increment after.  */
+  if (action[0] == inc_after)
+    {
+      operands[0] = XEXP (operands[0], 0);
+      output_asm_insn ("add $4,%0", operands);
+    }
+  if (action[1] == inc_after)
+    {
+      operands[1] = XEXP (operands[1], 0);
+      output_asm_insn ("add $4,%1", operands);
+    }
+
+  return "";
+}
+
+/* Output an ascii string.  */
+void
+output_ascii (FILE *file, const char *p, int size)
+{
+  int i;
+
+  /* This used to output .byte "string", which doesn't work with the UNIX
+     assembler and I think not with DEC ones either.  */
+  fprintf (file, "\t.byte ");
+
+  for (i = 0; i < size; i++)
+    {
+      register int c = p[i];
+      if (c < 0)
+	c += 256;
+      fprintf (file, "%#o", c);
+      if (i < size - 1)
+	putc (',', file);
+    }
+  putc ('\n', file);
+}
+
+
+void
+pdp11_asm_output_var (FILE *file, const char *name, int size,
+		      int align, bool global)
+{
+  if (align > 8)
+    fprintf (file, "\n\t.even\n");
+  if (global)
+    {
+      fprintf (file, ".globl ");
+      assemble_name (file, name);
+    }
+  fprintf (file, "\n");
+  assemble_name (file, name);
+  fprintf (file, ": .=.+ %#ho\n", (unsigned short)size);
+}
+
+static void
+pdp11_asm_print_operand (FILE *file, rtx x, int code)
+{
+  REAL_VALUE_TYPE r;
+  long sval[2];
+ 
+  if (code == '#')
+    fprintf (file, "#");
+  else if (code == '@')
+    {
+      if (TARGET_UNIX_ASM)
+	fprintf (file, "*");
+      else
+	fprintf (file, "@");
+    }
+  else if (GET_CODE (x) == REG)
+    fprintf (file, "%s", reg_names[REGNO (x)]);
+  else if (GET_CODE (x) == MEM)
+    output_address (XEXP (x, 0));
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != SImode)
+    {
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, sval);
+      fprintf (file, "$%#lo", sval[0] >> 16);
+    }
+  else
+    {
+      putc ('$', file);
+      output_addr_const_pdp11 (file, x);
+    }
+}
+
+static bool
+pdp11_asm_print_operand_punct_valid_p (unsigned char c)
+{
+  return (c == '#' || c == '@');
+}
+
+void
+print_operand_address (FILE *file, register rtx addr)
+{
+  register rtx breg;
+  rtx offset;
+  int again = 0;
+  
+ retry:
+
+  switch (GET_CODE (addr))
+    {
+    case MEM:
+      if (TARGET_UNIX_ASM)
+	fprintf (file, "*");
+      else
+	fprintf (file, "@");
+      addr = XEXP (addr, 0);
+      again = 1;
+      goto retry;
+
+    case REG:
+      fprintf (file, "(%s)", reg_names[REGNO (addr)]);
+      break;
+
+    case PRE_MODIFY:
+    case PRE_DEC:
+      fprintf (file, "-(%s)", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_MODIFY:
+    case POST_INC:
+      fprintf (file, "(%s)+", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PLUS:
+      breg = 0;
+      offset = 0;
+      if (CONSTANT_ADDRESS_P (XEXP (addr, 0))
+	  || GET_CODE (XEXP (addr, 0)) == MEM)
+	{
+	  offset = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (CONSTANT_ADDRESS_P (XEXP (addr, 1))
+	       || GET_CODE (XEXP (addr, 1)) == MEM)
+	{
+	  offset = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      if (GET_CODE (addr) != PLUS)
+	;
+      else if (GET_CODE (XEXP (addr, 0)) == REG)
+	{
+	  breg = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (GET_CODE (XEXP (addr, 1)) == REG)
+	{
+	  breg = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      if (GET_CODE (addr) == REG)
+	{
+	  gcc_assert (breg == 0);
+	  breg = addr;
+	  addr = 0;
+	}
+      if (offset != 0)
+	{
+	  gcc_assert (addr == 0);
+	  addr = offset;
+	}
+      if (addr != 0)
+	output_addr_const_pdp11 (file, addr);
+      if (breg != 0)
+	{
+	  gcc_assert (GET_CODE (breg) == REG);
+	  fprintf (file, "(%s)", reg_names[REGNO (breg)]);
+	}
+      break;
+
+    default:
+      if (!again && GET_CODE (addr) == CONST_INT)
+	{
+	  /* Absolute (integer number) address.  */
+	  if (!TARGET_UNIX_ASM)
+	    fprintf (file, "@$");
+	}
+      output_addr_const_pdp11 (file, addr);
+    }
+}
+
+/* Target hook to assemble integer objects.  We need to use the
+   pdp-specific version of output_addr_const.  */
+
+static bool
+pdp11_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (aligned_p)
+    switch (size)
+      {
+      case 1:
+	fprintf (asm_out_file, "\t.byte\t");
+	output_addr_const_pdp11 (asm_out_file, GEN_INT (INTVAL (x) & 0xff));
+;
+	fprintf (asm_out_file, " /* char */\n");
+	return true;
+
+      case 2:
+	fprintf (asm_out_file, TARGET_UNIX_ASM ? "\t" : "\t.word\t");
+	output_addr_const_pdp11 (asm_out_file, x);
+	fprintf (asm_out_file, " /* short */\n");
+	return true;
+      }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+
+/* register move costs, indexed by regs */
+
+static const int move_costs[N_REG_CLASSES][N_REG_CLASSES] = 
+{
+             /* NO  MUL  GEN  LFPU  NLFPU FPU ALL */
+
+/* NO */     {  0,   0,   0,    0,    0,    0,   0},
+/* MUL */    {  0,   2,   2,   22,   22,   22,  22},
+/* GEN */    {  0,   2,   2,   22,   22,   22,  22},
+/* LFPU */   {  0,  22,  22,    2,    2,    2,  22},
+/* NLFPU */  {  0,  22,  22,    2,   10,   10,  22},
+/* FPU */    {  0,  22,  22,    2,   10,   10,  22},
+/* ALL */    {  0,  22,  22,   22,   22,   22,  22}
+}  ;
+
+
+/* -- note that some moves are tremendously expensive, 
+   because they require lots of tricks! do we have to 
+   charge the costs incurred by secondary reload class 
+   -- as we do here with 10 -- or not ? */
+
+static int 
+pdp11_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  reg_class_t c1, reg_class_t c2)
+{
+    return move_costs[(int)c1][(int)c2];
+}
+
+static bool
+pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		 int opno ATTRIBUTE_UNUSED, int *total,
+		 bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) == 0 || INTVAL (x) == -1 || INTVAL (x) == 1)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      /* Twice as expensive as REG.  */
+      *total = 2;
+      return true;
+
+    case CONST_DOUBLE:
+      /* Twice (or 4 times) as expensive as 16 bit.  */
+      *total = 4;
+      return true;
+
+    case MULT:
+      /* ??? There is something wrong in MULT because MULT is not 
+         as cheap as total = 2 even if we can shift!  */
+      /* If optimizing for size make mult etc cheap, but not 1, so when 
+         in doubt the faster insn is chosen.  */
+      if (optimize_size)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (11);
+      return false;
+
+    case DIV:
+      if (optimize_size)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (25);
+      return false;
+
+    case MOD:
+      if (optimize_size)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (26);
+      return false;
+
+    case ABS:
+      /* Equivalent to length, so same for optimize_size.  */
+      *total = COSTS_N_INSNS (3);
+      return false;
+
+    case ZERO_EXTEND:
+      /* Only used for qi->hi.  */
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case SIGN_EXTEND:
+      if (GET_MODE (x) == HImode)
+      	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE (x) == SImode)
+	*total = COSTS_N_INSNS (6);
+      else
+	*total = COSTS_N_INSNS (2);
+      return false;
+
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (optimize_size)
+        *total = COSTS_N_INSNS (1);
+      else if (GET_MODE (x) ==  QImode)
+        {
+          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+   	    *total = COSTS_N_INSNS (8); /* worst case */
+          else
+	    *total = COSTS_N_INSNS (INTVAL (XEXP (x, 1)));
+        }
+      else if (GET_MODE (x) == HImode)
+        {
+          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+            {
+	      if (abs (INTVAL (XEXP (x, 1))) == 1)
+                *total = COSTS_N_INSNS (1);
+              else
+	        *total = COSTS_N_INSNS (2.5 + 0.5 * INTVAL (XEXP (x, 1)));
+            }
+          else
+            *total = COSTS_N_INSNS (10); /* worst case */
+        }
+      else if (GET_MODE (x) == SImode)
+        {
+          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    *total = COSTS_N_INSNS (2.5 + 0.5 * INTVAL (XEXP (x, 1)));
+          else /* worst case */
+            *total = COSTS_N_INSNS (18);
+        }
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+const char *
+output_jump (enum rtx_code code, int inv, int length)
+{
+    static int x = 0;
+    
+    static char buf[1000];
+    const char *pos, *neg;
+
+    if (cc_prev_status.flags & CC_NO_OVERFLOW)
+      {
+	switch (code)
+	  {
+	  case GTU: code = GT; break;
+	  case LTU: code = LT; break;
+	  case GEU: code = GE; break;
+	  case LEU: code = LE; break;
+	  default: ;
+	  }
+      }
+    switch (code)
+      {
+      case EQ: pos = "beq", neg = "bne"; break;
+      case NE: pos = "bne", neg = "beq"; break;
+      case GT: pos = "bgt", neg = "ble"; break;
+      case GTU: pos = "bhi", neg = "blos"; break;
+      case LT: pos = "blt", neg = "bge"; break;
+      case LTU: pos = "blo", neg = "bhis"; break;
+      case GE: pos = "bge", neg = "blt"; break;
+      case GEU: pos = "bhis", neg = "blo"; break;
+      case LE: pos = "ble", neg = "bgt"; break;
+      case LEU: pos = "blos", neg = "bhi"; break;
+      default: gcc_unreachable ();
+      }
+
+#if 0
+/* currently we don't need this, because the tstdf and cmpdf 
+   copy the condition code immediately, and other float operations are not 
+   yet recognized as changing the FCC - if so, then the length-cost of all
+   jump insns increases by one, because we have to potentially copy the 
+   FCC! */
+    if (cc_status.flags & CC_IN_FPU)
+	output_asm_insn("cfcc", NULL);
+#endif
+	
+    switch (length)
+    {
+      case 2:
+	
+	sprintf(buf, "%s %%l1", inv ? neg : pos);
+	
+	return buf;
+	
+      case 6:
+	
+	sprintf(buf, "%s JMP_%d\n\tjmp %%l1\nJMP_%d:", inv ? pos : neg, x, x);
+	
+	x++;
+	
+	return buf;
+	
+      default:
+	
+	gcc_unreachable ();
+    }
+    
+}
+
+void
+notice_update_cc_on_set(rtx exp, rtx insn ATTRIBUTE_UNUSED)
+{
+    if (GET_CODE (SET_DEST (exp)) == CC0)
+    { 
+      cc_status.flags = 0;					
+      cc_status.value1 = SET_DEST (exp);			
+      cc_status.value2 = SET_SRC (exp);			
+    }							
+    else if (GET_CODE (SET_SRC (exp)) == CALL)		
+    { 
+      CC_STATUS_INIT; 
+    }
+    else if (SET_DEST(exp) == pc_rtx)
+    { 
+      /* jump */
+    }	
+    else if (GET_MODE (SET_DEST(exp)) == HImode		
+	     || GET_MODE (SET_DEST(exp)) == QImode)
+    { 
+      cc_status.flags = GET_CODE (SET_SRC(exp)) == MINUS ? 0 : CC_NO_OVERFLOW;
+      cc_status.value1 = SET_SRC (exp);   			
+      cc_status.value2 = SET_DEST (exp);			
+	
+      if (cc_status.value1 && GET_CODE (cc_status.value1) == REG	
+	  && cc_status.value2					
+	  && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+	cc_status.value2 = 0;					
+      if (cc_status.value1 && GET_CODE (cc_status.value1) == MEM	
+	  && cc_status.value2					
+	  && GET_CODE (cc_status.value2) == MEM)			
+	cc_status.value2 = 0; 					
+    }		        
+    else
+    { 
+      CC_STATUS_INIT; 
+    }
+}
+
+
+int
+simple_memory_operand(rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+    rtx addr;
+
+    /* Eliminate non-memory operations */
+    if (GET_CODE (op) != MEM)
+	return FALSE;
+
+#if 0
+    /* dword operations really put out 2 instructions, so eliminate them.  */
+    if (GET_MODE_SIZE (GET_MODE (op)) > (HAVE_64BIT_P () ? 8 : 4))
+	return FALSE;
+#endif
+
+    /* Decode the address now.  */
+
+  indirection:
+    
+    addr = XEXP (op, 0);
+
+    switch (GET_CODE (addr))
+    {
+      case REG:
+	/* (R0) - no extra cost */
+	return 1;
+	
+      case PRE_DEC:
+      case POST_INC:
+	/* -(R0), (R0)+ - cheap! */
+	return 0;
+	
+      case MEM:
+	/* cheap - is encoded in addressing mode info! 
+
+	   -- except for @(R0), which has to be @0(R0) !!! */
+
+	if (GET_CODE (XEXP (addr, 0)) == REG)
+	    return 0;
+	
+	op=addr;
+	goto indirection;
+	
+      case CONST_INT:
+      case LABEL_REF:	       
+      case CONST:
+      case SYMBOL_REF:
+	/* @#address - extra cost */
+	return 0;
+
+      case PLUS:
+	/* X(R0) - extra cost */
+	return 0;
+
+      default:
+	break;
+    }
+    
+    return FALSE;
+}
+
+
+/*
+ * output a block move:
+ *
+ * operands[0]	... to
+ * operands[1]  ... from
+ * operands[2]  ... length
+ * operands[3]  ... alignment
+ * operands[4]  ... scratch register
+ */
+
+ 
+const char *
+output_block_move(rtx *operands)
+{
+    static int count = 0;
+    char buf[200];
+    int unroll;
+    int lastbyte = 0;
+    
+    /* Move of zero bytes is a NOP.  */
+    if (operands[2] == const0_rtx)
+      return "";
+    
+    /* Look for moves by small constant byte counts, those we'll
+       expand to straight line code.  */
+    if (CONSTANT_P (operands[2]))
+    {
+	if (INTVAL (operands[2]) < 16
+	    && (!optimize_size || INTVAL (operands[2]) < 5)
+	    && INTVAL (operands[3]) == 1)
+	{
+	    register int i;
+	    
+	    for (i = 1; i <= INTVAL (operands[2]); i++)
+		output_asm_insn("movb (%1)+, (%0)+", operands);
+
+	    return "";
+	}
+	else if (INTVAL(operands[2]) < 32
+		 && (!optimize_size || INTVAL (operands[2]) < 9)
+		 && INTVAL (operands[3]) >= 2)
+	{
+	    register int i;
+	    
+	    for (i = 1; i <= INTVAL (operands[2]) / 2; i++)
+		output_asm_insn ("mov (%1)+, (%0)+", operands);
+	    if (INTVAL (operands[2]) & 1)
+	      output_asm_insn ("movb (%1), (%0)", operands);
+	    
+	    return "";
+	}
+    }
+
+    /* Ideally we'd look for moves that are multiples of 4 or 8
+       bytes and handle those by unrolling the move loop.  That
+       makes for a lot of code if done at run time, but it's ok
+       for constant counts.  Also, for variable counts we have
+       to worry about odd byte count with even aligned pointers.
+       On 11/40 and up we handle that case; on older machines
+       we don't and just use byte-wise moves all the time.  */
+
+    if (CONSTANT_P (operands[2]) )
+    {
+      if (INTVAL (operands[3]) < 2)
+	unroll = 0;
+      else
+	{
+	  lastbyte = INTVAL (operands[2]) & 1;
+
+	  if (optimize_size || INTVAL (operands[2]) & 2)
+	    unroll = 1;
+	  else if (INTVAL (operands[2]) & 4)
+	    unroll = 2;
+	  else
+	    unroll = 3;
+	}
+      
+      /* Loop count is byte count scaled by unroll.  */
+      operands[2] = GEN_INT (INTVAL (operands[2]) >> unroll);
+      output_asm_insn ("mov %2, %4", operands);
+    }
+    else
+    {
+	/* Variable byte count; use the input register
+	   as the scratch.  */
+	operands[4] = operands[2];
+
+	/* Decide whether to move by words, and check
+	   the byte count for zero.  */
+	if (TARGET_40_PLUS && INTVAL (operands[3]) > 1)
+	  {
+	    unroll = 1;
+	    output_asm_insn ("asr %4", operands);
+	  }
+	else
+	  {
+	    unroll = 0;
+	    output_asm_insn ("tst %4", operands);
+	  }
+	sprintf (buf, "beq movestrhi%d", count + 1);
+	output_asm_insn (buf, NULL);
+    }
+
+    /* Output the loop label.  */
+    sprintf (buf, "\nmovestrhi%d:", count);
+    output_asm_insn (buf, NULL);
+
+    /* Output the appropriate move instructions.  */
+    switch (unroll)
+    {
+      case 0:
+	output_asm_insn ("movb (%1)+, (%0)+", operands);
+	break;
+	
+      case 1:
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	break;
+	
+      case 2:
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	break;
+	
+      default:
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	break;
+    }
+
+    /* Output the decrement and test.  */
+    if (TARGET_40_PLUS)
+      {
+	sprintf (buf, "sob %%4, movestrhi%d", count);
+	output_asm_insn (buf, operands);
+      }
+    else
+      {
+	output_asm_insn ("dec %4", operands);
+	sprintf (buf, "bgt movestrhi%d", count);
+	output_asm_insn (buf, NULL);
+      }
+    count ++;
+
+    /* If constant odd byte count, move the last byte.  */
+    if (lastbyte)
+      output_asm_insn ("movb (%1), (%0)", operands);
+    else if (!CONSTANT_P (operands[2]))
+      {
+	/* Output the destination label for the zero byte count check.  */
+	sprintf (buf, "\nmovestrhi%d:", count);
+	output_asm_insn (buf, NULL);
+	count++;
+    
+	/* If we did word moves, check for trailing last byte. */
+	if (unroll)
+	  {
+	    sprintf (buf, "bcc movestrhi%d", count);
+	    output_asm_insn (buf, NULL);
+	    output_asm_insn ("movb (%1), (%0)", operands);
+	    sprintf (buf, "\nmovestrhi%d:", count);
+	    output_asm_insn (buf, NULL);
+	    count++;
+	  }
+      }
+	     
+    return "";
+}
+
+/* This function checks whether a real value can be encoded as
+   a literal, i.e., addressing mode 27.  In that mode, real values
+   are one word values, so the remaining 48 bits have to be zero.  */
+int
+legitimate_const_double_p (rtx address)
+{
+  REAL_VALUE_TYPE r;
+  long sval[2];
+  REAL_VALUE_FROM_CONST_DOUBLE (r, address);
+  REAL_VALUE_TO_TARGET_DOUBLE (r, sval);
+  if ((sval[0] & 0xffff) == 0 && sval[1] == 0)
+    return 1;
+  return 0;
+}
+
+/* Implement CANNOT_CHANGE_MODE_CLASS.  */
+bool
+pdp11_cannot_change_mode_class (enum machine_mode from,
+				enum machine_mode to,
+				enum reg_class rclass)
+{
+  /* Also, FPU registers contain a whole float value and the parts of
+     it are not separately accessible.
+
+     So we disallow all mode changes involving FPRs.  */
+  if (FLOAT_MODE_P (from) != FLOAT_MODE_P (to))
+    return true;
+  
+  return reg_classes_intersect_p (FPU_REGS, rclass);
+}
+
+/* TARGET_PREFERRED_RELOAD_CLASS
+
+   Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  
+
+loading is easier into LOAD_FPU_REGS than FPU_REGS! */
+
+static reg_class_t
+pdp11_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == FPU_REGS)
+    return LOAD_FPU_REGS;
+  if (rclass == ALL_REGS)
+    {
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	return LOAD_FPU_REGS;
+      else
+	return GENERAL_REGS;
+    }
+  return rclass;
+}
+
+/* TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+
+   Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  
+
+loading is easier into LOAD_FPU_REGS than FPU_REGS! */
+
+static reg_class_t
+pdp11_preferred_output_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == FPU_REGS)
+    return LOAD_FPU_REGS;
+  if (rclass == ALL_REGS)
+    {
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	return LOAD_FPU_REGS;
+      else
+	return GENERAL_REGS;
+    }
+  return rclass;
+}
+
+
+/* TARGET_SECONDARY_RELOAD.
+
+   FPU registers AC4 and AC5 (class NO_LOAD_FPU_REGS) require an 
+   intermediate register (AC0-AC3: LOAD_FPU_REGS).  Everything else
+   can be loade/stored directly.  */
+static reg_class_t 
+pdp11_secondary_reload (bool in_p ATTRIBUTE_UNUSED,
+			rtx x,
+			reg_class_t reload_class,
+			enum machine_mode reload_mode ATTRIBUTE_UNUSED,
+			secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  if (reload_class != NO_LOAD_FPU_REGS || GET_CODE (x) != REG ||
+      REGNO_REG_CLASS (REGNO (x)) == LOAD_FPU_REGS)
+    return NO_REGS;
+  
+  return LOAD_FPU_REGS;
+}
+
+/* Target routine to check if register to register move requires memory.
+
+   The answer is yes if we're going between general register and FPU 
+   registers.  The mode doesn't matter in making this check.
+*/
+bool 
+pdp11_secondary_memory_needed (reg_class_t c1, reg_class_t c2, 
+			       enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int fromfloat = (c1 == LOAD_FPU_REGS || c1 == NO_LOAD_FPU_REGS || 
+		   c1 == FPU_REGS);
+  int tofloat = (c2 == LOAD_FPU_REGS || c2 == NO_LOAD_FPU_REGS || 
+		 c2 == FPU_REGS);
+  
+  return (fromfloat != tofloat);
+}
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+*/
+
+static bool
+pdp11_legitimate_address_p (enum machine_mode mode,
+			    rtx operand, bool strict)
+{
+    rtx xfoob;
+
+    /* accept @#address */
+    if (CONSTANT_ADDRESS_P (operand))
+      return true;
+    
+    switch (GET_CODE (operand))
+      {
+      case REG:
+	/* accept (R0) */
+	return !strict || REGNO_OK_FOR_BASE_P (REGNO (operand));
+    
+      case PLUS:
+	/* accept X(R0) */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (operand, 0))))
+	  && CONSTANT_ADDRESS_P (XEXP (operand, 1));
+
+      case PRE_DEC:
+	/* accept -(R0) */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (operand, 0))));
+
+      case POST_INC:
+	/* accept (R0)+ */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (operand, 0))));
+
+      case PRE_MODIFY:
+	/* accept -(SP) -- which uses PRE_MODIFY for byte mode */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && REGNO (XEXP (operand, 0)) == STACK_POINTER_REGNUM
+	  && GET_CODE ((xfoob = XEXP (operand, 1))) == PLUS
+	  && GET_CODE (XEXP (xfoob, 0)) == REG
+	  && REGNO (XEXP (xfoob, 0)) == STACK_POINTER_REGNUM
+	  && CONSTANT_P (XEXP (xfoob, 1))
+	  && INTVAL (XEXP (xfoob,1)) == -2;
+
+      case POST_MODIFY:
+	/* accept (SP)+ -- which uses POST_MODIFY for byte mode */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && REGNO (XEXP (operand, 0)) == STACK_POINTER_REGNUM
+	  && GET_CODE ((xfoob = XEXP (operand, 1))) == PLUS
+	  && GET_CODE (XEXP (xfoob, 0)) == REG
+	  && REGNO (XEXP (xfoob, 0)) == STACK_POINTER_REGNUM
+	  && CONSTANT_P (XEXP (xfoob, 1))
+	  && INTVAL (XEXP (xfoob,1)) == 2;
+
+      case MEM:
+	/* handle another level of indirection ! */
+	xfoob = XEXP (operand, 0);
+
+	/* (MEM:xx (MEM:xx ())) is not valid for SI, DI and currently
+	   also forbidden for float, because we have to handle this 
+	   in output_move_double and/or output_move_quad() - we could
+	   do it, but currently it's not worth it!!! 
+	   now that DFmode cannot go into CPU register file, 
+	   maybe I should allow float ... 
+	   but then I have to handle memory-to-memory moves in movdf ??  */
+	if (GET_MODE_BITSIZE(mode) > 16)
+	  return false;
+
+	/* accept @address */
+	if (CONSTANT_ADDRESS_P (xfoob))
+	  return true;
+
+	switch (GET_CODE (xfoob))
+	  {
+	  case REG:
+	    /* accept @(R0) - which is @0(R0) */
+	    return !strict || REGNO_OK_FOR_BASE_P(REGNO (xfoob));
+
+	  case PLUS:
+	    /* accept @X(R0) */
+	    return GET_CODE (XEXP (xfoob, 0)) == REG
+	      && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (xfoob, 0))))
+	      && CONSTANT_ADDRESS_P (XEXP (xfoob, 1));
+
+	  case PRE_DEC:
+	    /* accept @-(R0) */
+	    return GET_CODE (XEXP (xfoob, 0)) == REG
+	      && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (xfoob, 0))));
+
+	  case POST_INC:
+	    /* accept @(R0)+ */
+	    return GET_CODE (XEXP (xfoob, 0)) == REG
+	      && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (xfoob, 0))));
+
+	  default:
+	    /* anything else is invalid */
+	    return false;
+	  }
+
+      default:
+	/* anything else is invalid */
+	return false;
+      }
+}
+
+/* Return the class number of the smallest class containing
+   reg number REGNO.  */
+enum reg_class
+pdp11_regno_reg_class (int regno)
+{ 
+  if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
+    return GENERAL_REGS;
+  else if (regno > AC3_REGNUM)
+    return NO_LOAD_FPU_REGS;
+  else if (regno >= AC0_REGNUM)
+    return LOAD_FPU_REGS;
+  else if (regno & 1)
+    return MUL_REGS;
+  else
+    return GENERAL_REGS;
+}
+
+
+int
+pdp11_sp_frame_offset (void)
+{
+  int offset = 0, regno;
+  offset = get_frame_size();
+  for (regno = 0; regno <= PC_REGNUM; regno++)
+    if (pdp11_saved_regno (regno))
+      offset += 2;
+  for (regno = AC0_REGNUM; regno <= AC5_REGNUM; regno++)
+    if (pdp11_saved_regno (regno))
+      offset += 8;
+  
+  return offset;
+}   
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+int
+pdp11_initial_elimination_offset (int from, int to)
+{
+  int spoff;
+  
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return 4;
+  else if (from == FRAME_POINTER_REGNUM
+	   && to == HARD_FRAME_POINTER_REGNUM)
+    return 0;
+  else
+    {
+      gcc_assert (to == STACK_POINTER_REGNUM);
+
+      /* Get the size of the register save area.  */
+      spoff = pdp11_sp_frame_offset ();
+      if (from == FRAME_POINTER_REGNUM)
+	return spoff;
+
+      gcc_assert (from == ARG_POINTER_REGNUM);
+
+      /* If there is a frame pointer, that is saved too.  */
+      if (frame_pointer_needed)
+	spoff += 2;
+      
+      /* Account for the saved PC in the function call.  */
+      return spoff + 2;
+    }
+}    
+
+/* A copy of output_addr_const modified for pdp11 expression syntax.
+   output_addr_const also gets called for %cDIGIT and %nDIGIT, which we don't
+   use, and for debugging output, which we don't support with this port either.
+   So this copy should get called whenever needed.
+*/
+void
+output_addr_const_pdp11 (FILE *file, rtx x)
+{
+  char buf[256];
+  int i;
+  
+ restart:
+  switch (GET_CODE (x))
+    {
+    case PC:
+      gcc_assert (flag_pic);
+      putc ('.', file);
+      break;
+
+    case SYMBOL_REF:
+      assemble_name (file, XSTR (x, 0));
+      break;
+
+    case LABEL_REF:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
+      assemble_name (file, buf);
+      break;
+
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      assemble_name (file, buf);
+      break;
+
+    case CONST_INT:
+      i = INTVAL (x);
+      if (i < 0)
+	{
+	  i = -i;
+	  fprintf (file, "-");
+	}
+      fprintf (file, "%#o", i & 0xffff);
+      break;
+
+    case CONST:
+      /* This used to output parentheses around the expression,
+	 but that does not work on the 386 (either ATT or BSD assembler).  */
+      output_addr_const_pdp11 (file, XEXP (x, 0));
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+	{
+	  /* We can use %o if the number is one word and positive.  */
+	  gcc_assert (!CONST_DOUBLE_HIGH (x));
+	  fprintf (file, "%#ho", (unsigned short) CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case PLUS:
+      /* Some assemblers need integer constants to appear last (e.g. masm).  */
+      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	{
+	  output_addr_const_pdp11 (file, XEXP (x, 1));
+	  if (INTVAL (XEXP (x, 0)) >= 0)
+	    fprintf (file, "+");
+	  output_addr_const_pdp11 (file, XEXP (x, 0));
+	}
+      else
+	{
+	  output_addr_const_pdp11 (file, XEXP (x, 0));
+	  if (INTVAL (XEXP (x, 1)) >= 0)
+	    fprintf (file, "+");
+	  output_addr_const_pdp11 (file, XEXP (x, 1));
+	}
+      break;
+
+    case MINUS:
+      /* Avoid outputting things like x-x or x+5-x,
+	 since some assemblers can't handle that.  */
+      x = simplify_subtraction (x);
+      if (GET_CODE (x) != MINUS)
+	goto restart;
+
+      output_addr_const_pdp11 (file, XEXP (x, 0));
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT
+	  || INTVAL (XEXP (x, 1)) >= 0)
+	fprintf (file, "-");
+      output_addr_const_pdp11 (file, XEXP (x, 1));
+      break;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      output_addr_const_pdp11 (file, XEXP (x, 0));
+      break;
+
+    default:
+      output_operand_lossage ("invalid expression as operand");
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+pdp11_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Integers 32 bits and under, and scalar floats (if FPU), are returned
+     in registers.  The rest go into memory.  */
+  return (TYPE_MODE (type) == DImode
+	  || (FLOAT_MODE_P (TYPE_MODE (type)) && ! TARGET_AC0)
+	  || TREE_CODE (type) == VECTOR_TYPE
+	  || COMPLEX_MODE_P (TYPE_MODE (type)));
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   On the pdp11 the value is found in R0 (or ac0??? not without FPU!!!! )  */
+
+static rtx
+pdp11_function_value (const_tree valtype, 
+ 		      const_tree fntype_or_decl ATTRIBUTE_UNUSED,
+ 		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype),
+		      BASE_RETURN_VALUE_REG(TYPE_MODE(valtype)));
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+pdp11_libcall_value (enum machine_mode mode,
+                     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return  gen_rtx_REG (mode, BASE_RETURN_VALUE_REG(mode));
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the pdp, the first "output" reg is the only register thus used.
+
+   maybe ac0 ? - as option someday!  */
+
+static bool
+pdp11_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETVAL_REGNUM) || (TARGET_AC0 && (regno == AC0_REGNUM));
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.
+
+   trampoline - how should i do it in separate i+d ? 
+   have some allocate_trampoline magic??? 
+
+   the following should work for shared I/D:
+
+   MOV	#STATIC, $4	01270Y	0x0000 <- STATIC; Y = STATIC_CHAIN_REGNUM
+   JMP	@#FUNCTION	000137  0x0000 <- FUNCTION
+*/
+
+static void
+pdp11_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  gcc_assert (!TARGET_SPLIT);
+
+  mem = adjust_address (m_tramp, HImode, 0);
+  emit_move_insn (mem, GEN_INT (012700+STATIC_CHAIN_REGNUM));
+  mem = adjust_address (m_tramp, HImode, 2);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, HImode, 4);
+  emit_move_insn (mem, GEN_INT (000137));
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Worker function for TARGET_FUNCTION_ARG.
+
+   Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+pdp11_function_arg (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    const_tree type ATTRIBUTE_UNUSED,
+		    bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_FUNCTION_ARG_ADVANCE.
+
+   Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+pdp11_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum += (mode != BLKmode
+	   ? GET_MODE_SIZE (mode)
+	   : int_size_in_bytes (type));
+}
+
+/* Make sure everything's fine if we *don't* have an FPU.
+   This assumes that putting a register in fixed_regs will keep the
+   compiler's mitts completely off it.  We don't bother to zero it out
+   of register classes.  Also fix incompatible register naming with
+   the UNIX assembler.  */
+
+static void
+pdp11_conditional_register_usage (void)
+{
+  int i;
+  HARD_REG_SET x;
+  if (!TARGET_FPU)
+    {
+      COPY_HARD_REG_SET (x, reg_class_contents[(int)FPU_REGS]);
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++ )
+       if (TEST_HARD_REG_BIT (x, i))
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+
+  if (TARGET_AC0)
+      call_used_regs[AC0_REGNUM] = 1;
+  if (TARGET_UNIX_ASM)
+    {
+      /* Change names of FPU registers for the UNIX assembler.  */
+      reg_names[8] = "fr0";
+      reg_names[9] = "fr1";
+      reg_names[10] = "fr2";
+      reg_names[11] = "fr3";
+      reg_names[12] = "fr4";
+      reg_names[13] = "fr5";
+    }
+}
+
+static section *
+pdp11_function_section (tree decl ATTRIBUTE_UNUSED,
+			enum node_frequency freq ATTRIBUTE_UNUSED,
+			bool startup ATTRIBUTE_UNUSED,
+			bool exit ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+pdp11_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return GET_CODE (x) != CONST_DOUBLE || legitimate_const_double_p (x);
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/pdp11/pdp11.h b/gcc-4.9/gcc/config/pdp11/pdp11.h
new file mode 100644
index 000000000..67a3e7c2c
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/pdp11.h
@@ -0,0 +1,672 @@
+/* Definitions of target machine for GNU compiler, for the pdp-11
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define CONSTANT_POOL_BEFORE_FUNCTION	0
+
+/* check whether load_fpu_reg or not */
+#define LOAD_FPU_REG_P(x) ((x) >= AC0_REGNUM && (x) <= AC3_REGNUM)
+#define NO_LOAD_FPU_REG_P(x) ((x) == AC4_REGNUM || (x) == AC5_REGNUM)
+#define FPU_REG_P(x)	(LOAD_FPU_REG_P(x) || NO_LOAD_FPU_REG_P(x))
+#define CPU_REG_P(x)	((x) <= PC_REGNUM)
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("pdp11");		\
+    }						\
+  while (0)
+
+
+/* Generate DBX debugging information.  */
+
+#define DBX_DEBUGGING_INFO
+
+#define TARGET_40_PLUS		(TARGET_40 || TARGET_45)
+#define TARGET_10		(! TARGET_40_PLUS)
+
+#define TARGET_UNIX_ASM_DEFAULT	0
+
+#define ASSEMBLER_DIALECT	(TARGET_UNIX_ASM ? 1 : 0)
+
+
+
+/* TYPE SIZES */
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		(TARGET_INT16 ? 16 : 32)
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64     
+
+/* if we set FLOAT_TYPE_SIZE to 32, we could have the benefit 
+   of saving core for huge arrays - the definitions are 
+   already in md - but floats can never reside in 
+   an FPU register - we keep the FPU in double float mode 
+   all the time !! */
+#define FLOAT_TYPE_SIZE		(TARGET_FLOAT32 ? 32 : 64)
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* machine types from ansi */
+#define SIZE_TYPE "unsigned int" 	/* definition of size_t */
+#define WCHAR_TYPE "int" 		/* or long int???? */
+#define WCHAR_TYPE_SIZE 16
+
+#define PTRDIFF_TYPE "int"
+
+/* target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is first.  */
+#define WORDS_BIG_ENDIAN 1
+
+/* Define that floats are in VAX order, not high word first as for ints.  */
+#define FLOAT_WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes). 
+
+   UNITS OR BYTES - seems like units */
+#define UNITS_PER_WORD 2
+
+/* This machine doesn't use IEEE floats.  */
+/* Because the pdp11 (at least Unix) convention for 32-bit ints is
+   big endian, opposite for what you need for float, the vax float
+   conversion routines aren't actually used directly.  But the underlying
+   format is indeed the vax/pdp11 float format.  */
+extern const struct real_format pdp11_f_format;
+extern const struct real_format pdp11_d_format;
+
+/* Maximum sized of reasonable data type 
+   DImode or Dfmode ...*/
+#define MAX_FIXED_MODE_SIZE 64	
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 16
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 16
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 16
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 16
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 16
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   we have 8 integer registers, plus 6 float 
+   (don't use scratch float !) */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On the pdp, these are:
+   Reg 7	= pc;
+   reg 6	= sp;
+   reg 5	= fp;  not necessarily! 
+*/
+
+#define FIXED_REGISTERS  \
+{0, 0, 0, 0, 0, 0, 1, 1, \
+ 0, 0, 0, 0, 0, 0, 1, 1 }
+
+
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+/* don't know about fp */
+#define CALL_USED_REGISTERS  \
+{1, 1, 0, 0, 0, 0, 1, 1, \
+ 0, 0, 0, 0, 0, 0, 1, 1 }
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+*/
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+((REGNO <= PC_REGNUM)?							\
+    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)	\
+    :1)
+    
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On the pdp, the cpu registers can hold any mode other than float
+   (because otherwise we may end up being asked to move from CPU to FPU
+   register, which isn't a valid operation on the PDP11).
+   For CPU registers, check alignment.
+
+   FPU accepts SF and DF but actually holds a DF - simplifies life!
+*/
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+(((REGNO) <= PC_REGNUM)?				\
+  ((GET_MODE_BITSIZE(MODE) <= 16) 			\
+   || (GET_MODE_BITSIZE(MODE) >= 32 &&      		\
+       !((REGNO) & 1) && !FLOAT_MODE_P (MODE)))		\
+  :FLOAT_MODE_P (MODE))
+    
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register in which static-chain is passed to a function.  */
+/* ??? - i don't want to give up a reg for this! */
+#define STATIC_CHAIN_REGNUM 4
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+   
+/* The pdp has a couple of classes:
+
+MUL_REGS are used for odd numbered regs, to use in 16-bit multiplication
+         (even numbered do 32-bit multiply)
+LMUL_REGS long multiply registers (even numbered regs )
+	  (don't need them, all 32-bit regs are even numbered!)
+GENERAL_REGS is all cpu
+LOAD_FPU_REGS is the first four cpu regs, they are easier to load
+NO_LOAD_FPU_REGS is ac4 and ac5, currently - difficult to load them
+FPU_REGS is all fpu regs 
+*/
+
+enum reg_class { NO_REGS, MUL_REGS, GENERAL_REGS, LOAD_FPU_REGS, NO_LOAD_FPU_REGS, FPU_REGS, ALL_REGS, LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* have to allow this till cmpsi/tstsi are fixed in a better way !! */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Since GENERAL_REGS is the same class as ALL_REGS,
+   don't give it a different class number; just make it an alias.  */
+
+/* #define GENERAL_REGS ALL_REGS */
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES {"NO_REGS", "MUL_REGS", "GENERAL_REGS", "LOAD_FPU_REGS", "NO_LOAD_FPU_REGS", "FPU_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS {{0}, {0x00aa}, {0xc0ff}, {0x0f00}, {0x3000}, {0x3f00}, {0xffff}}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) pdp11_regno_reg_class (REGNO)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Hook for testing if memory is needed for moving between registers.  */
+#define SECONDARY_MEMORY_NEEDED(class1, class2, m) \
+  pdp11_secondary_memory_needed (class1, class2, m)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+((CLASS == GENERAL_REGS || CLASS == MUL_REGS)?				\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD):	\
+  1									\
+)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  pdp11_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.
+*/
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On the pdp11, the stack is on an even boundary */
+#define PUSH_ROUNDING(BYTES) ((BYTES + 1) & ~1)
+
+/* current_first_parm_offset stores the # of registers pushed on the 
+   stack */
+extern int current_first_parm_offset;
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+#define BASE_RETURN_VALUE_REG(MODE) \
+ (FLOAT_MODE_P (MODE) ? AC0_REGNUM : RETVAL_REGNUM) 
+
+/* 1 if N is a possible register number for function argument passing.
+   - not used on pdp */
+
+#define FUNCTION_ARG_REGNO_P(N) 0
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+*/
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   ...., the offset normally starts at 0, but starts at 1 word
+   when the function gets a structure-value-address as an
+   invisible first argument.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+   gcc_unreachable ();
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+extern int may_call_alloca;
+
+#define EXIT_IGNORE_STACK	1
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   There are two registers that can always be eliminated on the pdp11.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}	\
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = pdp11_initial_elimination_offset ((FROM), (TO)))
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+
+#define HAVE_PRE_DECREMENT 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_BASE_P(REGNO)  \
+  ((REGNO) <= PC_REGNUM || (unsigned) reg_renumber[REGNO] <= PC_REGNUM || \
+   (REGNO) == ARG_POINTER_REGNUM || (REGNO) == FRAME_POINTER_REGNUM)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P (REGNO)
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+*/
+
+
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) (1)
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) (1)
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE HImode
+
+/* Define this if a raw index is all that is needed for a
+   `tablejump' insn.  */
+#define CASE_TAKES_INDEX_RAW
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  
+*/
+
+#define MOVE_MAX 2
+
+/* Nonzero if access to memory by byte is slow and undesirable. -
+*/
+#define SLOW_BYTE_ACCESS 0
+
+/* Do not break .stabs pseudos into continuations.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Give a comparison code (EQ, NE etc) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point, CCFPmode
+   should be used.  */
+
+#define SELECT_CC_MODE(OP,X,Y)	\
+(GET_MODE_CLASS(GET_MODE(X)) == MODE_FLOAT? CCFPmode : CCmode)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode HImode
+
+/* A function address in a call instruction
+   is a word address (for indexing purposes)
+   so give the MEM rtx a word's mode.  */
+#define FUNCTION_MODE HImode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/* #define NO_FUNCTION_CSE */
+
+
+/* Tell emit-rtl.c how to initialize special values on a per-function base.  */
+extern rtx cc0_reg_rtx;
+
+#define CC_STATUS_MDEP rtx
+
+#define CC_STATUS_MDEP_INIT (cc_status.mdep = 0)
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  */
+
+#define CC_IN_FPU 04000 
+
+/* Do UPDATE_CC if EXP is a set, used in
+   NOTICE_UPDATE_CC 
+
+   floats only do compare correctly, else nullify ...
+
+   get cc0 out soon ...
+*/
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) \
+{ if (GET_CODE (EXP) == SET)					\
+    {								\
+      notice_update_cc_on_set(EXP, INSN);			\
+    }								\
+  else if (GET_CODE (EXP) == PARALLEL				\
+	   && GET_CODE (XVECEXP (EXP, 0, 0)) == SET)		\
+    {								\
+      notice_update_cc_on_set(XVECEXP (EXP, 0, 0), INSN);	\
+    }								\
+  else if (GET_CODE (EXP) == CALL)				\
+    { /* all bets are off */ CC_STATUS_INIT; }			\
+  if (cc_status.value1 && GET_CODE (cc_status.value1) == REG	\
+      && cc_status.value2					\
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2)) \
+    { 								\
+      printf ("here!\n");					\
+      cc_status.value2 = 0;					\
+    }								\
+}
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* Output before read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text\n"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data\n"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"r0", "r1", "r2", "r3", "r4", "r5", "sp", "pc",     \
+ "ac0", "ac1", "ac2", "ac3", "ac4", "ac5", "fp", "ap" }
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s_%lu", PREFIX, (unsigned long)(NUM))
+
+#define ASM_OUTPUT_ASCII(FILE, P, SIZE)  \
+  output_ascii (FILE, P, SIZE)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  fprintf (FILE, "\t%sL_%d\n", TARGET_UNIX_ASM ? "" : ".word ", VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Don't define this if it is not supported.  */
+
+/* #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL) */
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes. 
+
+   who needs this????
+*/
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  switch (LOG)				\
+    {					\
+      case 0:				\
+	break;				\
+      case 1:				\
+	fprintf (FILE, "\t.even\n");	\
+	break;				\
+      default:				\
+	gcc_unreachable ();		\
+    }
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.=.+ %#ho\n", (unsigned short)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)  \
+    pdp11_asm_output_var (FILE, NAME, SIZE, ALIGN, true)
+
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \
+    pdp11_asm_output_var (FILE, NAME, SIZE, ALIGN, false)
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+ print_operand_address (FILE, ADDR)
+
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)			\
+(							\
+  fprintf (FILE, "\tmov %s, -(sp)\n", reg_names[REGNO])	\
+)
+
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)                 		\
+(                                                       	\
+  fprintf (FILE, "\tmov (sp)+, %s\n", reg_names[REGNO])     	\
+)
+
+#define TRAMPOLINE_SIZE 8
+#define TRAMPOLINE_ALIGNMENT 16
+
+/* there is no point in avoiding branches on a pdp, 
+   since branches are really cheap - I just want to find out
+   how much difference the BRANCH_COST macro makes in code */
+#define BRANCH_COST(speed_p, predictable_p) (TARGET_BRANCH_CHEAP ? 0 : 1)
+
+
+#define COMPARE_FLAG_MODE HImode
+
+#define TARGET_HAVE_NAMED_SECTIONS false
+
+/* pdp11-unknown-aout target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc-4.9/gcc/config/pdp11/pdp11.md b/gcc-4.9/gcc/config/pdp11/pdp11.md
new file mode 100644
index 000000000..46beb4f5e
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/pdp11.md
@@ -0,0 +1,1436 @@
+;;- Machine description for the pdp11 for GNU C compiler
+;; Copyright (C) 1994-2014 Free Software Foundation, Inc.
+;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(include "predicates.md")
+(include "constraints.md")
+
+(define_c_enum "unspecv"
+  [
+    UNSPECV_BLOCKAGE
+    UNSPECV_SETD
+    UNSPECV_SETI
+  ])
+
+(define_constants
+  [
+   ;; Register numbers
+   (R0_REGNUM     	  0)
+   (RETVAL_REGNUM     	  0)
+   (HARD_FRAME_POINTER_REGNUM  5)
+   (STACK_POINTER_REGNUM  6)
+   (PC_REGNUM             7)
+   (AC0_REGNUM            8)
+   (AC3_REGNUM            11)
+   (AC4_REGNUM            12)
+   (AC5_REGNUM            13)
+   ;; The next two are not physical registers but are used for addressing
+   ;; arguments.
+   (FRAME_POINTER_REGNUM  14)
+   (ARG_POINTER_REGNUM    15)
+   (FIRST_PSEUDO_REGISTER 16)
+   ;; Branch offset limits, as byte offsets from instruction address
+   (MIN_BRANCH            -254)
+   (MAX_BRANCH            256)
+   (MIN_SOB               -126)
+   (MAX_SOB               0)])
+
+;; HI is 16 bit
+;; QI is 8 bit 
+
+;; Integer modes supported on the PDP11, with a mapping from machine mode
+;; to mnemonic suffix.  SImode and DImode always are special cases.
+(define_mode_iterator PDPint [QI HI])
+(define_mode_attr  isfx [(QI "b") (HI "")])
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;;- Operand classes for the register allocator:
+
+;; Compare instructions.
+
+;; currently we only support df floats, which saves us quite some
+;; hassle switching the FP mode! 
+;; we assume that CPU is always in long float mode, and 
+;; 16 bit integer mode - currently, the prologue for main does this,
+;; but maybe we should just set up a NEW crt0 properly, 
+;; -- and what about signal handling code?
+;; (we don't even let sf floats in the register file, so
+;; we only should have to worry about truncating and widening 
+;; when going to memory)
+
+;; abort() call by g++ - must define libfunc for cmp_optab
+;; and ucmp_optab for mode SImode, because we don't have that!!!
+;; - yet since no libfunc is there, we abort ()
+
+;; The only thing that remains to be done then is output 
+;; the floats in a way the assembler can handle it (and 
+;; if you're really into it, use a PDP11 float emulation
+;; library to do floating point constant folding - but 
+;; I guess you'll get reasonable results even when not
+;; doing this)
+;; the last thing to do is fix the UPDATE_CC macro to check
+;; for floating point condition codes, and set cc_status
+;; properly, also setting the CC_IN_FCCR flag. 
+
+;; define attributes
+;; currently type is only fpu or arith or unknown, maybe branch later ?
+;; default is arith
+(define_attr "type" "unknown,arith,fp" (const_string "arith"))
+
+;; length default is 2 bytes each
+(define_attr "length" "" (const_int 2))
+
+;; a user's asm statement
+(define_asm_attributes
+  [(set_attr "type" "unknown")
+; length for asm is the max length per statement.  That would be
+; 3 words, for a two-operand instruction with extra word addressing
+; modes for both operands.
+   (set_attr "length" "6")])
+
+;; define function units
+
+;; Prologue and epilogue support.
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  pdp11_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+{
+  pdp11_expand_epilogue ();
+  DONE;
+})
+
+(define_expand "return"
+  [(return)]
+  "reload_completed && !frame_pointer_needed && pdp11_sp_frame_offset () == 0"
+  "")
+
+(define_insn "*rts"
+  [(return)]
+  ""
+  "rts pc")
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "setd"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SETD)]
+  ""
+  "setd")
+
+(define_insn "seti"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SETI)]
+  ""
+  "seti")
+
+;; arithmetic - values here immediately when next insn issued
+;; or does it mean the number of cycles after this insn was issued?
+;; how do I say that fpu insns use cpu also? (pre-interaction phase)
+
+;(define_function_unit "cpu" 1 1 (eq_attr "type" "arith") 0 0)
+;(define_function_unit "fpu" 1 1 (eq_attr "type" "fp") 0 0)
+
+;; compare
+(define_insn "*cmpdf"
+  [(set (cc0)
+	(compare (match_operand:DF 0 "general_operand" "fR,fR,Q,QF")
+		 (match_operand:DF 1 "register_or_const0_operand" "G,a,G,a")))]
+  "TARGET_FPU"
+  "*
+{
+  cc_status.flags = CC_IN_FPU;
+  if (which_alternative == 0 || which_alternative == 2)
+    return \"{tstd|tstf} %0\;cfcc\";
+  else
+    return \"{cmpd|cmpf} %0, %1\;cfcc\";
+}"
+  [(set_attr "length" "4,4,6,6")]) 
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:PDPint 0 "general_operand" "rR,rR,rR,Q,Qi,Qi")
+		 (match_operand:PDPint 1 "general_operand" "N,rR,Qi,N,rR,Qi")))]
+  ""
+  "@
+   tst<PDPint:isfx> %0
+   cmp<PDPint:isfx> %0,%1
+   cmp<PDPint:isfx> %0,%1
+   tst<PDPint:isfx> %0
+   cmp<PDPint:isfx> %0,%1
+   cmp<PDPint:isfx> %0,%1"
+  [(set_attr "length" "2,2,4,4,4,6")])
+
+;; sob instruction - we need an assembler which can make this instruction
+;; valid under _all_ circumstances!
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (plus:HI (match_operand:HI 0 "register_operand" "+r")
+		      (const_int -1))
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (const_int -1)))]
+  "TARGET_40_PLUS"
+  "*
+{
+ static int labelcount = 0;
+ static char buf[1000];
+
+ if (get_attr_length (insn) == 2)
+    return \"sob %0, %l1\";
+
+ /* emulate sob */
+ output_asm_insn (\"dec %0\", operands);
+ 
+ sprintf (buf, \"bge LONG_SOB%d\", labelcount);
+ output_asm_insn (buf, NULL);
+
+ output_asm_insn (\"jmp %l1\", operands);
+ 
+ sprintf (buf, \"LONG_SOB%d:\", labelcount++);
+ output_asm_insn (buf, NULL);
+
+ return \"\";
+}"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 0)
+						       (pc))
+						(const_int MIN_SOB))
+					   (gt (minus (match_dup 0)
+						       (pc))
+						(const_int MAX_SOB)))
+				      (const_int 8)
+				      (const_int 2)))])
+
+;; These control RTL generation for conditional jump insns
+;; and match them for register allocation.
+
+(define_expand "cbranchdf4"
+  [(set (cc0)
+        (compare (match_operand:DF 1 "general_operand")
+		 (match_operand:DF 2 "register_or_const0_operand")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_FPU"
+  "")
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:PDPint 1 "general_operand")
+		 (match_operand:PDPint 2 "general_operand")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+;; problem with too short jump distance! we need an assembler which can 
+;; make this valid for all jump distances!
+;; e.g. gas!
+
+;; these must be changed to check for CC_IN_FCCR if float is to be 
+;; enabled
+
+(define_insn "*branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "* return output_jump(GET_CODE (operands[0]), 0, get_attr_length(insn));"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 1)
+						      (pc))
+					       (const_int MIN_BRANCH))
+					   (gt (minus (match_dup 1)
+						      (pc))
+					       (const_int MAX_BRANCH)))
+				      (const_int 6)
+				      (const_int 2)))])
+
+
+;; These match inverted jump insns for register allocation.
+
+(define_insn "*branch_inverted"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "* return output_jump(GET_CODE (operands[0]), 1, get_attr_length(insn));"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 1)
+						      (pc))
+					       (const_int MIN_BRANCH))
+					   (gt (minus (match_dup 1)
+						      (pc))
+					       (const_int MAX_BRANCH)))
+				      (const_int 6)
+				      (const_int 2)))])
+
+;; Move instructions
+
+(define_insn "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,g")
+	(match_operand:DI 1 "general_operand" "rN,g"))]
+  ""
+  "* return output_move_multiple (operands);"
+;; what's the mose expensive code - say twice movsi = 16
+  [(set_attr "length" "16,32")])
+
+(define_insn "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,g,g")
+	(match_operand:SI 1 "general_operand" "rN,IJ,IJ,g"))]
+  ""
+  "* return output_move_multiple (operands);"
+;; what's the most expensive code ? - I think 8!
+;; we could split it up and make several sub-cases...
+  [(set_attr "length" "4,6,8,16")])
+
+(define_insn "mov<mode>"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(match_operand:PDPint 1 "general_operand" "rRN,Qi,rRN,Qi"))]
+  ""
+  "*
+{
+  if (operands[1] == const0_rtx)
+    return \"clr<PDPint:isfx> %0\";
+
+  return \"mov<PDPint:isfx> %1, %0\";
+}"
+  [(set_attr "length" "2,4,4,6")])
+
+(define_insn "movdf"
+  [(set (match_operand:DF 0 "float_nonimm_operand" "=a,fR,a,Q,g")
+        (match_operand:DF 1 "float_operand" "fR,a,FQ,a,g"))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0 || which_alternative == 2)
+       return \"ldd %1, %0\";
+     else if (which_alternative == 1 || which_alternative == 3)
+       return \"std %1, %0\";
+     else 
+       return output_move_multiple (operands); "
+;; last one is worst-case
+  [(set_attr "length" "2,2,4,4,24")])
+
+(define_insn "movsf"
+  [(set (match_operand:SF 0 "float_nonimm_operand" "=a,fR,a,Q,g")
+        (match_operand:SF 1 "float_operand" "fR,a,FQ,a,g"))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0 || which_alternative == 2)
+       return \"{ldcfd|movof} %1, %0\";
+     else if (which_alternative == 1 || which_alternative == 3)
+       return \"{stcdf|movfo} %1, %0\";
+     else 
+       return output_move_multiple (operands); "
+;; last one is worst-case
+  [(set_attr "length" "2,2,4,4,12")])
+
+;; maybe fiddle a bit with move_ratio, then 
+;; let constraints only accept a register ...
+
+(define_expand "movmemhi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand" "=g,g")
+		   (match_operand:BLK 1 "general_operand" "g,g"))
+	      (use (match_operand:HI 2 "general_operand" "n,mr"))
+	      (use (match_operand:HI 3 "immediate_operand" "i,i"))
+	      (clobber (match_scratch:HI 4 "=&r,X"))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))
+	      (clobber (match_dup 2))])]
+  "(TARGET_BCOPY_BUILTIN)"
+  "
+{
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (Pmode, XEXP (operands[0], 0)));
+  operands[1]
+    = replace_equiv_address (operands[1],
+			     copy_to_mode_reg (Pmode, XEXP (operands[1], 0)));
+
+  operands[5] = XEXP (operands[0], 0);
+  operands[6] = XEXP (operands[1], 0);
+}")
+
+
+(define_insn "movmemhi1"
+  [(set (mem:BLK (match_operand:HI 0 "register_operand" "r,r"))
+	(mem:BLK (match_operand:HI 1 "register_operand" "r,r")))
+   (use (match_operand:HI 2 "general_operand" "n,r"))
+   (use (match_operand:HI 3 "immediate_operand" "i,i"))
+   (clobber (match_scratch:HI 4 "=&r,X"))
+   (clobber (match_dup 0))
+   (clobber (match_dup 1))
+   (clobber (match_dup 2))]
+  "(TARGET_BCOPY_BUILTIN)"
+  "* return output_block_move (operands);"
+;;; just a guess
+  [(set_attr "length" "80")])
+   
+
+
+;;- truncation instructions
+
+(define_insn  "truncdfsf2"
+  [(set (match_operand:SF 0 "float_nonimm_operand" "=f,R,Q")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f,a,a")))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0)
+     {
+       return \"\";
+     }
+     else if (which_alternative == 1)
+       return \"{stcdf|movfo} %1, %0\";
+     else 
+       return \"{stcdf|movfo} %1, %0\";
+  "
+  [(set_attr "length" "0,2,4")])
+
+
+(define_expand "truncsihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(subreg:HI 
+	  (match_operand:SI 1 "general_operand" "or")
+          0))]
+  ""
+  "")
+
+
+;;- zero extension instructions
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "0,0")))]
+  ""
+  "bic $0177400, %0"
+  [(set_attr "length" "4,6")])
+			 
+(define_expand "zero_extendhisi2"
+  [(set (subreg:HI 
+          (match_dup 0)
+          2)
+        (match_operand:HI 1 "register_operand" "r"))
+   (set (subreg:HI 
+          (match_operand:SI 0 "register_operand" "=r")
+          0)
+        (const_int 0))]
+  ""
+  "/* operands[1] = make_safe_from (operands[1], operands[0]); */")
+
+
+;;- sign extension instructions
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f,a,a")
+	(float_extend:DF (match_operand:SF 1 "float_operand" "f,R,Q")))]
+  "TARGET_FPU"
+  "@
+   /* nothing */
+   {ldcfd|movof} %1, %0
+   {ldcfd|movof} %1, %0"
+  [(set_attr "length" "0,2,4")])
+
+;; does movb sign extend in register-to-register move?
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_operand" "rR,Q")))]
+  ""
+  "movb %1, %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_operand" "rR,Q")))]
+  "TARGET_40_PLUS"
+  "*
+{
+  rtx latehalf[2];
+
+  /* make register pair available */
+  latehalf[0] = operands[0];
+  operands[0] = gen_rtx_REG (HImode, REGNO (operands[0])+ 1);
+
+  output_asm_insn(\"movb %1, %0\", operands);
+  output_asm_insn(\"sxt %0\", latehalf);
+    
+  return \"\";
+}"
+  [(set_attr "length" "4,6")])
+
+;; maybe we have to use define_expand to say that we have the instruction,
+;; unconditionally, and then match dependent on CPU type:
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extend:SI (match_operand:HI 1 "general_operand" "g")))]
+  ""
+  "")
+  
+(define_insn "" ; "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=o,<,r")
+	(sign_extend:SI (match_operand:HI 1 "general_operand" "g,g,g")))]
+  "TARGET_40_PLUS"
+  "*
+{
+  rtx latehalf[2];
+
+  /* we don't want to mess with auto increment */
+  
+  switch (which_alternative)
+  {
+    case 0:
+
+      latehalf[0] = operands[0];
+      operands[0] = adjust_address(operands[0], HImode, 2);
+  
+      output_asm_insn(\"mov %1, %0\", operands);
+      output_asm_insn(\"sxt %0\", latehalf);
+
+      return \"\";
+
+    case 1:
+
+      /* - auto-decrement - right direction ;-) */
+      output_asm_insn(\"mov %1, %0\", operands);
+      output_asm_insn(\"sxt %0\", operands);
+
+      return \"\";
+
+    case 2:
+
+      /* make register pair available */
+      latehalf[0] = operands[0];
+      operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+
+      output_asm_insn(\"mov %1, %0\", operands);
+      output_asm_insn(\"sxt %0\", latehalf);
+
+      return \"\";
+
+    default:
+
+      gcc_unreachable ();
+  }
+}"
+  [(set_attr "length" "10,6,6")])
+
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "general_operand" "0")))]
+  "(! TARGET_40_PLUS)"
+  "*
+{
+  static int count = 0;
+  char buf[100];
+  rtx lateoperands[2];
+
+  lateoperands[0] = operands[0];
+  operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+
+  output_asm_insn(\"tst %0\", operands);
+  sprintf(buf, \"bge extendhisi%d\", count);
+  output_asm_insn(buf, NULL);
+  output_asm_insn(\"mov -1, %0\", lateoperands);
+  sprintf(buf, \"bne extendhisi%d\", count+1);
+  output_asm_insn(buf, NULL);
+  sprintf(buf, \"\\nextendhisi%d:\", count);
+  output_asm_insn(buf, NULL);
+  output_asm_insn(\"clr %0\", lateoperands);
+  sprintf(buf, \"\\nextendhisi%d:\", count+1);
+  output_asm_insn(buf, NULL);
+
+  count += 2;
+
+  return \"\";
+}"
+  [(set_attr "length" "12")])
+
+;; make float to int and vice versa 
+;; using the cc_status.flag field we could probably cut down
+;; on seti and setl
+;; assume that we are normally in double and integer mode -
+;; what do pdp library routines do to fpu mode ?
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,a,a")
+	(float:DF (match_operand:SI 1 "general_operand" "r,R,Q")))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0)
+     {
+       rtx latehalf[2];
+
+       latehalf[0] = NULL; 
+       latehalf[1] = gen_rtx_REG (HImode, REGNO (operands[1]) + 1);
+       output_asm_insn(\"mov %1, -(sp)\", latehalf);
+       output_asm_insn(\"mov %1, -(sp)\", operands);
+       
+       output_asm_insn(\"setl\", operands);
+       output_asm_insn(\"{ldcld|movif} (sp)+, %0\", operands);
+       output_asm_insn(\"seti\", operands);
+       return \"\";
+     }
+     else if (which_alternative == 1)
+       return \"setl\;{ldcld|movif} %1, %0\;seti\";
+     else 
+       return \"setl\;{ldcld|movif} %1, %0\;seti\";
+  "
+  [(set_attr "length" "10,6,8")])
+
+(define_insn "floathidf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(float:DF (match_operand:HI 1 "general_operand" "rR,Qi")))]
+  "TARGET_FPU"
+  "{ldcid|movif} %1, %0"
+  [(set_attr "length" "2,4")])
+	
+;; cut float to int
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,R,Q")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "a,a,a"))))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0)
+     {
+       output_asm_insn(\"setl\", operands);
+       output_asm_insn(\"{stcdl|movfi} %1, -(sp)\", operands);
+       output_asm_insn(\"seti\", operands);
+       output_asm_insn(\"mov (sp)+, %0\", operands);
+       operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+       output_asm_insn(\"mov (sp)+, %0\", operands);
+       return \"\";
+     }
+     else if (which_alternative == 1)
+       return \"setl\;{stcdl|movfi} %1, %0\;seti\";
+     else 
+       return \"setl\;{stcdl|movfi} %1, %0\;seti\";
+  "
+  [(set_attr "length" "10,6,8")])
+
+(define_insn "fix_truncdfhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(fix:HI (fix:DF (match_operand:DF 1 "register_operand" "a,a"))))]
+  "TARGET_FPU"
+  "{stcdi|movfi} %1, %0"
+  [(set_attr "length" "2,4")])
+
+
+;;- arithmetic instructions
+;;- add instructions
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(plus:DF (match_operand:DF 1 "register_operand" "%0,0")
+		 (match_operand:DF 2 "general_operand" "fR,QF")))]
+  "TARGET_FPU"
+  "{addd|addf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,o")
+	(plus:DI (match_operand:DI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[4][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"add %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[2][1]) || INTVAL (exops[2][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[2]);
+    output_asm_insn (\"adc %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[3][1]) || INTVAL (exops[3][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[3]);
+    output_asm_insn (\"adc %0\", exops[2]);
+    output_asm_insn (\"adc %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "20,28,40,48")])
+
+;; Note that the register operand is not marked earlyclobber.
+;; The reason is that SI values go in register pairs, so they
+;; can't partially overlap.  They can be either disjoint, or
+;; source and destination can be equal.  The latter case is 
+;; handled properly because of the ordering of the individual
+;; instructions used.  Specifically, carry from the low to the
+;; high word is added at the end, so the adding of the high parts
+;; will always used the original high part and not a high part
+;; modified by carry (which would amount to double carry).
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,o,o")
+	(plus:SI (match_operand:SI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:SI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[2][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"add %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "6,10,12,16")])
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(plus:HI (match_operand:HI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "general_operand" "rRLM,Qi,rRLM,Qi")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL(operands[2]) == 1)
+	return \"inc %0\";
+      else if (INTVAL(operands[2]) == -1)
+        return \"dec %0\";
+    }
+
+  return \"add %2, %0\";
+}"
+  [(set_attr "length" "2,4,4,6")])
+
+
+;;- subtract instructions
+;; we don't have to care for constant second 
+;; args, since they are canonical plus:xx now!
+;; also for minus:DF ??
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(minus:DF (match_operand:DF 1 "register_operand" "0,0")
+		  (match_operand:DF 2 "general_operand" "fR,Q")))]
+  "TARGET_FPU"
+  "{subd|subf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,o")
+	(minus:DI (match_operand:DI 1 "general_operand" "0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[4][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"sub %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[2][1]) || INTVAL (exops[2][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[2]);
+    output_asm_insn (\"sbc %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[3][1]) || INTVAL (exops[3][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[3]);
+    output_asm_insn (\"sbc %0\", exops[2]);
+    output_asm_insn (\"sbc %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "20,28,40,48")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,o,o")
+	(minus:SI (match_operand:SI 1 "general_operand" "0,0,0,0")
+		 (match_operand:SI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[2][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"sub %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "6,10,12,16")])
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(minus:HI (match_operand:HI 1 "general_operand" "0,0,0,0")
+		  (match_operand:HI 2 "general_operand" "rR,Qi,rR,Qi")))]
+  ""
+  "*
+{
+  gcc_assert (GET_CODE (operands[2]) != CONST_INT);
+
+  return \"sub %2, %0\";
+}"
+  [(set_attr "length" "2,4,4,6")])
+
+;;;;- and instructions
+;; Bit-and on the pdp (like on the VAX) is done with a clear-bits insn.
+
+(define_expand "and<mode>3"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "")
+	(and:PDPint (not:PDPint (match_operand:PDPint 1 "general_operand" ""))
+		   (match_operand:PDPint 2 "general_operand" "")))]
+  ""
+  "
+{
+  rtx op1 = operands[1];
+
+  /* If there is a constant argument, complement that one.
+     Similarly, if one of the inputs is the same as the output,
+     complement the other input.  */
+  if ((CONST_INT_P (operands[2]) && ! CONST_INT_P (op1)) ||
+      rtx_equal_p (operands[0], operands[1]))
+    {
+      operands[1] = operands[2];
+      operands[2] = op1;
+      op1 = operands[1];
+    }
+
+  if (CONST_INT_P (op1))
+    operands[1] = GEN_INT (~INTVAL (op1));
+  else
+    operands[1] = expand_unop (<MODE>mode, one_cmpl_optab, op1, 0, 1);
+}")
+
+(define_insn "*bic<mode>"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(and:PDPint
+	     (not: PDPint (match_operand:PDPint 1 "general_operand" "rR,Qi,rR,Qi"))
+	     (match_operand:PDPint 2 "general_operand" "0,0,0,0")))]
+  ""
+  "bic<PDPint:isfx> %1, %0"
+  [(set_attr "length" "2,4,4,6")])
+
+;;- Bit set (inclusive or) instructions
+(define_insn "ior<mode>3"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(ior:PDPint (match_operand:PDPint 1 "general_operand" "%0,0,0,0")
+		(match_operand:PDPint 2 "general_operand" "rR,Qi,rR,Qi")))]
+  ""
+  "bis<PDPint:isfx> %2, %0"
+  [(set_attr "length" "2,4,4,6")])
+
+;;- xor instructions
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(xor:HI (match_operand:HI 1 "general_operand" "%0,0")
+		(match_operand:HI 2 "register_operand" "r,r")))]
+  "TARGET_40_PLUS"
+  "xor %2, %0"
+  [(set_attr "length" "2,4")])
+
+;;- one complement instructions
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,Q")
+        (not:PDPint (match_operand:PDPint 1 "general_operand" "0,0")))]
+  ""
+  "com<PDPint:isfx> %0"
+  [(set_attr "length" "2,4")])
+
+;;- arithmetic shift instructions
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,0")
+		   (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "ashc %2,%0"
+  [(set_attr "length" "2,4")])
+
+;; Arithmetic right shift on the pdp works by negating the shift count.
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  operands[2] = negate_rtx (HImode, operands[2]);
+}")
+
+;; define asl aslb asr asrb - ashc missing!
+
+;; asl 
+(define_insn "" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(ashift:HI (match_operand:HI 1 "general_operand" "0,0")
+		   (const_int 1)))]
+  ""
+  "asl %0"
+  [(set_attr "length" "2,4")])
+
+;; and another possibility for asr is << -1
+;; might cause problems since -1 can also be encoded as 65535!
+;; not in gcc2 ??? 
+
+;; asr
+(define_insn "" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(ashift:HI (match_operand:HI 1 "general_operand" "0,0")
+		   (const_int -1)))]
+  ""
+  "asr %0"
+  [(set_attr "length" "2,4")])
+
+;; lsr
+(define_insn "lsrhi1" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand" "0,0")
+		   (const_int 1)))]
+  ""
+  "clc\;ror %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "lsrsi1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand" "0")
+                   (const_int 1)))]
+  ""
+{
+
+  rtx lateoperands[2];
+
+  lateoperands[0] = operands[0];
+  operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+
+  lateoperands[1] = operands[1];
+  operands[1] = gen_rtx_REG (HImode, REGNO (operands[1]) + 1);
+
+  output_asm_insn (\"clc\", operands);
+  output_asm_insn (\"ror %0\", lateoperands);
+  output_asm_insn (\"ror %0\", operands);
+
+  return \"\";
+}
+  [(set_attr "length" "10")])
+
+(define_expand "lshrsi3"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "0")
+   (match_operand:HI 2 "general_operand" "")]
+  ""
+  "
+{
+  rtx r;
+
+  if (!TARGET_40_PLUS &&
+      (GET_CODE (operands[2]) != CONST_INT ||
+       (unsigned) INTVAL (operands[2]) > 3))
+    FAIL;
+  emit_insn (gen_lsrsi1 (operands[0], operands[1]));
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      r = gen_reg_rtx (HImode);
+      emit_insn (gen_addhi3 (r, operands [2], GEN_INT (-1)));
+      emit_insn (gen_ashrsi3 (operands[0], operands[0], r));
+    }
+  else if ((unsigned) INTVAL (operands[2]) != 1)
+    {
+      emit_insn (gen_ashlsi3 (operands[0], operands[0],
+                              GEN_INT (1 - INTVAL (operands[2]))));
+    }
+  DONE;
+}
+"
+)
+
+;; shift is by arbitrary count is expensive, 
+;; shift by one cheap - so let's do that, if
+;; space doesn't matter
+(define_insn "" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "general_operand" "0")
+		   (match_operand:HI 2 "expand_shift_operand" "O")))]
+  "! optimize_size"
+  "*
+{
+  register int i;
+
+  for (i = 1; i <= abs(INTVAL(operands[2])); i++)
+    if (INTVAL(operands[2]) < 0)
+      output_asm_insn(\"asr %0\", operands);
+    else
+      output_asm_insn(\"asl %0\", operands);
+      
+  return \"\";
+}"
+;; longest is 4
+  [(set (attr "length") (const_int 8))])
+
+;; aslb
+(define_insn "" 
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,o")
+	(ashift:QI (match_operand:QI 1 "general_operand" "0,0")
+		   (match_operand:HI 2 "const_int_operand" "n,n")))]
+  ""
+  "*
+{ /* allowing predec or post_inc is possible, but hairy! */
+  int i, cnt;
+
+  cnt = INTVAL(operands[2]) & 0x0007;
+
+  for (i=0 ; i < cnt ; i++)
+       output_asm_insn(\"aslb %0\", operands);
+
+  return \"\";
+}"
+;; set attribute length ( match_dup 2 & 7 ) *(1 or 2) !!!
+  [(set_attr_alternative "length" 
+                         [(const_int 14)
+                          (const_int 28)])])
+
+;;; asr 
+;(define_insn "" 
+;  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+;	(ashiftrt:HI (match_operand:HI 1 "general_operand" "0,0")
+;		     (const_int 1)))]
+;  ""
+;  "asr %0"
+;  [(set_attr "length" "2,4")])
+
+;; asrb
+(define_insn "" 
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,o")
+	(ashiftrt:QI (match_operand:QI 1 "general_operand" "0,0")
+		     (match_operand:HI 2 "const_int_operand" "n,n")))]
+  ""
+  "*
+{ /* allowing predec or post_inc is possible, but hairy! */
+  int i, cnt;
+
+  cnt = INTVAL(operands[2]) & 0x0007;
+
+  for (i=0 ; i < cnt ; i++)
+       output_asm_insn(\"asrb %0\", operands);
+
+  return \"\";
+}"
+  [(set_attr_alternative "length" 
+                         [(const_int 14)
+                          (const_int 28)])])
+
+;; the following is invalid - too complex!!! - just say 14 !!!
+;  [(set (attr "length") (plus (and (match_dup 2)
+;                                   (const_int 14))
+;                              (and (match_dup 2)
+;                                   (const_int 14))))])
+
+
+
+;; can we get +-1 in the next pattern? should 
+;; have been caught by previous patterns!
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0")
+		   (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "*
+{
+  if (GET_CODE(operands[2]) == CONST_INT)
+    {
+      if (INTVAL(operands[2]) == 1)
+	return \"asl %0\";
+      else if (INTVAL(operands[2]) == -1)
+	return \"asr %0\";
+    }
+
+  return \"ash %2,%0\";
+}"
+  [(set_attr "length" "2,4")])
+
+;; Arithmetic right shift on the pdp works by negating the shift count.
+(define_expand "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  operands[2] = negate_rtx (HImode, operands[2]);
+}")
+
+(define_expand "lshrhi3"
+  [(match_operand:HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:HI 2 "general_operand" "")]
+  ""
+  "
+{
+  rtx r;
+
+  if (!TARGET_40_PLUS &&
+      (GET_CODE (operands[2]) != CONST_INT ||
+       (unsigned) INTVAL (operands[2]) > 3))
+    FAIL;
+  emit_insn (gen_lsrhi1 (operands[0], operands[1]));
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      r = gen_reg_rtx (HImode);
+      emit_insn (gen_addhi3 (r, operands [2], GEN_INT (-1)));
+      emit_insn (gen_ashrhi3 (operands[0], operands[0], r));
+    }
+  else if ((unsigned) INTVAL (operands[2]) != 1)
+    {
+      emit_insn (gen_ashlhi3 (operands[0], operands[0],
+                              GEN_INT (1 - INTVAL (operands[2]))));
+    }
+  DONE;
+}
+"
+)
+
+;; absolute 
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=fR,Q")
+	(abs:DF (match_operand:DF 1 "general_operand" "0,0")))]
+  "TARGET_FPU"
+  "{absd|absf} %0"
+  [(set_attr "length" "2,4")])
+
+
+;; negate insns
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "float_nonimm_operand" "=fR,Q")
+	(neg:DF (match_operand:DF 1 "register_operand" "0,0")))]
+  "TARGET_FPU"
+  "{negd|negf} %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+	(neg:DI (match_operand:DI 1 "general_operand" "0,0")))]
+  ""
+{
+  rtx exops[4][2];
+  
+  pdp11_expand_operands (operands, exops, 1, NULL, either);
+
+  output_asm_insn (\"com %0\", exops[3]);
+  output_asm_insn (\"com %0\", exops[2]);
+  output_asm_insn (\"com %0\", exops[1]);
+  output_asm_insn (\"com %0\", exops[0]);
+  output_asm_insn (\"add $1, %0\", exops[3]);
+  output_asm_insn (\"adc %0\", exops[2]);
+  output_asm_insn (\"adc %0\", exops[1]);
+  output_asm_insn (\"adc %0\", exops[0]);
+
+  return \"\";
+}
+[(set_attr "length" "18,34")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,o")
+	(neg:SI (match_operand:SI 1 "general_operand" "0,0")))]
+  ""
+{
+  rtx exops[2][2];
+  
+  pdp11_expand_operands (operands, exops, 1, NULL, either);
+
+  output_asm_insn (\"com %0\", exops[1]);
+  output_asm_insn (\"com %0\", exops[0]);
+  output_asm_insn (\"add $1, %0\", exops[1]);
+  output_asm_insn (\"adc %0\", exops[0]);
+
+  return \"\";
+}
+[(set_attr "length" "12,20")])
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,Q")
+	(neg:PDPint (match_operand:PDPint 1 "general_operand" "0,0")))]
+  ""
+  "neg<isfx> %0"
+  [(set_attr "length" "2,4")])
+
+
+;; Unconditional and other jump instructions
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+ if (get_attr_length (insn) == 2)
+    return \"br %l0\";
+ return \"jmp %l0\";
+}"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 0)
+						      (pc))
+					       (const_int MIN_BRANCH))
+					   (gt (minus (match_dup 0)
+						      (pc))
+					       (const_int MAX_BRANCH)))
+				      (const_int 4)
+				      (const_int 2)))])
+
+(define_insn ""
+  [(set (pc)
+    (label_ref (match_operand 0 "" "")))
+   (clobber (const_int 1))]
+  ""
+  "jmp %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:HI 0 "general_operand" "r,R,Q"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "@
+  jmp (%0)
+  jmp %@%0
+  jmp %@%0"
+  [(set_attr "length" "2,2,4")])
+
+;; indirect jump - let's be conservative!
+;; allow only register_operand, even though we could also 
+;; allow labels etc.
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "jmp (%0)")
+
+;;- jump to subroutine
+
+(define_insn "call"
+  [(call (match_operand:HI 0 "general_operand" "rR,Q")
+	 (match_operand:HI 1 "general_operand" "g,g"))
+;;   (use (reg:HI 0)) what was that ???
+  ]
+  ;;- Don't use operand 1 for most machines.
+  ""
+  "jsr pc, %0"
+  [(set_attr "length" "2,4")])
+
+;;- jump to subroutine
+(define_insn "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:HI 1 "general_operand" "rR,Q")
+	      (match_operand:HI 2 "general_operand" "g,g")))
+;;   (use (reg:HI 0)) - what was that ????
+  ]
+  ;;- Don't use operand 2 for most machines.
+  ""
+  "jsr pc, %1"
+  [(set_attr "length" "2,4")])
+
+;;- nop instruction
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+
+;;- multiply 
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(mult:DF (match_operand:DF 1 "register_operand" "%0,0")
+		 (match_operand:DF 2 "float_operand" "fR,QF")))]
+  "TARGET_FPU"
+  "{muld|mulf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+;; 16 bit result multiply:
+;; currently we multiply only into odd registers, so we don't use two 
+;; registers - but this is a bit inefficient at times. If we define 
+;; a register class for each register, then we can specify properly 
+;; which register need which scratch register ....
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d") ; multiply regs
+	(mult:HI (match_operand:HI 1 "register_operand" "%0,0")
+		 (match_operand:HI 2 "float_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "mul %2, %0"
+  [(set_attr "length" "2,4")])
+
+;; 32 bit result
+(define_expand "mulhisi3"
+  [(set (match_dup 3)
+	(match_operand:HI 1 "nonimmediate_operand" "g,g"))
+   (set (match_operand:SI 0 "register_operand" "=r,r") ; even numbered!
+	(mult:SI (truncate:HI 
+                  (match_dup 0))
+		 (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "operands[3] = gen_lowpart(HImode, operands[1]);")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r") ; even numbered!
+	(mult:SI (truncate:HI 
+                  (match_operand:SI 1 "register_operand" "%0,0"))
+		 (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "mul %2, %0"
+  [(set_attr "length" "2,4")])
+
+;(define_insn "mulhisi3"
+;  [(set (match_operand:SI 0 "register_operand" "=r,r") ; even numbered!
+;	(mult:SI (truncate:HI 
+;                  (match_operand:SI 1 "register_operand" "%0,0"))
+;		 (match_operand:HI 2 "general_operand" "rR,Qi")))]
+;  "TARGET_40_PLUS"
+;  "mul %2, %0"
+;  [(set_attr "length" "2,4")])
+
+;;- divide
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(div:DF (match_operand:DF 1 "register_operand" "0,0")
+		(match_operand:DF 2 "general_operand" "fR,QF")))]
+  "TARGET_FPU"
+  "{divd|divf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+	 
+(define_expand "divhi3"
+  [(set (subreg:HI (match_dup 1) 0)
+	(div:HI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))
+   (set (match_operand:HI 0 "register_operand" "=r")
+        (subreg:HI (match_dup 1) 0))]
+  "TARGET_40_PLUS"
+  "")
+
+(define_insn ""
+  [(set (subreg:HI (match_operand:SI 0 "register_operand" "=r") 0)
+	(div:HI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))]
+  "TARGET_40_PLUS"
+  "div %2,%0"
+  [(set_attr "length" "4")])
+
+(define_expand "modhi3"
+  [(set (subreg:HI (match_dup 1) 2)
+	(mod:HI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))
+   (set (match_operand:HI 0 "register_operand" "=r")
+        (subreg:HI (match_dup 1) 2))]
+  "TARGET_40_PLUS"
+  "")
+
+(define_insn ""
+  [(set (subreg:HI (match_operand:SI 0 "register_operand" "=r") 2)
+	(mod:HI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))]
+  "TARGET_40_PLUS"
+  "div %2,%0"
+  [(set_attr "length" "4")])
+
+;(define_expand "divmodhi4"
+;  [(parallel [(set (subreg:HI (match_dup 1) 0)
+;	           (div:HI (match_operand:SI 1 "register_operand" "0")
+;		           (match_operand:HI 2 "general_operand" "g")))
+;              (set (subreg:HI (match_dup 1) 2)
+;	           (mod:HI (match_dup 1)
+;		           (match_dup 2)))])
+;   (set (match_operand:HI 3 "register_operand" "=r")
+;        (subreg:HI (match_dup 1) 2))
+;   (set (match_operand:HI 0 "register_operand" "=r")
+;        (subreg:HI (match_dup 1) 0))]
+;  "TARGET_40_PLUS"
+;  "")
+;
+;(define_insn ""
+;  [(set (subreg:HI (match_operand:SI 0 "register_operand" "=r") 0)
+;	           (div:HI (match_operand:SI 1 "general_operand" "0")
+;		           (match_operand:HI 2 "general_operand" "g")))
+;   (set (subreg:HI (match_dup 0) 2)
+;	           (mod:HI (match_dup 1)
+;		           (match_dup 2)))]
+;  "TARGET_40_PLUS"
+;  "div %2, %0")
+;
+   
+;; is rotate doing the right thing to be included here ????
diff --git a/gcc-4.9/gcc/config/pdp11/pdp11.opt b/gcc-4.9/gcc/config/pdp11/pdp11.opt
new file mode 100644
index 000000000..028bc593a
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/pdp11.opt
@@ -0,0 +1,87 @@
+; Options for the PDP11 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m10
+Target RejectNegative
+Generate code for an 11/10
+
+m40
+Target Report Mask(40)
+Generate code for an 11/40
+
+m45
+Target Report Mask(45)
+Generate code for an 11/45
+
+mac0
+Target Report Mask(AC0)
+Return floating-point results in ac0 (fr0 in Unix assembler syntax)
+
+mbcopy
+Target RejectNegative Report Mask(BCOPY)
+Do not use inline patterns for copying memory
+
+mbcopy-builtin
+Target RejectNegative Report InverseMask(BCOPY, BCOPY_BUILTIN)
+Use inline patterns for copying memory
+
+mbranch-cheap
+Target RejectNegative Report InverseMask(BRANCH_EXPENSIVE, BRANCH_CHEAP)
+Do not pretend that branches are expensive
+
+mbranch-expensive
+Target RejectNegative Report Mask(BRANCH_EXPENSIVE)
+Pretend that branches are expensive
+
+mdec-asm
+Target RejectNegative Report InverseMask(UNIX_ASM)
+Use the DEC assembler syntax
+
+mfloat32
+Target Report Mask(FLOAT32)
+Use 32 bit float
+
+mfloat64
+Target Report InverseMask(FLOAT32, FLOAT64)
+Use 64 bit float
+
+mfpu
+Target RejectNegative Report Mask(FPU)
+Use hardware floating point
+
+mint16
+Target Report InverseMask(INT32, INT16)
+Use 16 bit int
+
+mint32
+Target Report Mask(INT32)
+Use 32 bit int
+
+msoft-float
+Target RejectNegative Report InverseMask(FPU, SOFT_FLOAT)
+Do not use hardware floating point
+
+msplit
+Target Report Mask(SPLIT)
+Target has split I&D
+
+munix-asm
+Target RejectNegative Report Mask(UNIX_ASM)
+Use UNIX assembler syntax
diff --git a/gcc-4.9/gcc/config/pdp11/predicates.md b/gcc-4.9/gcc/config/pdp11/predicates.md
new file mode 100644
index 000000000..339143b61
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/predicates.md
@@ -0,0 +1,54 @@
+;;- Predicate definitions for the pdp11 for GNU C compiler
+;; Copyright (C) 1994-2014 Free Software Foundation, Inc.
+;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Match CONST_DOUBLE zero for tstd/tstf.
+(define_predicate "register_or_const0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+;; Accept integer arguments in the range -4..-2 and 2..4, which are the
+;; shift counts for which we unroll a shift.  This matches the rule for
+;; the "O" constraint.
+(define_predicate "expand_shift_operand"
+  (match_code "const_int")
+{
+  int sh;
+
+  sh = INTVAL (op);
+  return (abs (sh) > 1 && abs (sh) <= 4);
+})
+
+;; Accept anything general_operand accepts, except that registers must
+;; be FPU registers.
+(define_predicate "float_operand"
+  (if_then_else (match_code "reg")
+		(ior 
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == LOAD_FPU_REGS")
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == NO_LOAD_FPU_REGS"))
+		(match_operand 0 "general_operand")))
+
+;; Accept anything nonimmediate_operand accepts, except that registers must
+;; be FPU registers.
+(define_predicate "float_nonimm_operand"
+  (if_then_else (match_code "reg")
+		(ior 
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == LOAD_FPU_REGS")
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == NO_LOAD_FPU_REGS"))
+		(match_operand 0 "nonimmediate_operand")))
diff --git a/gcc-4.9/gcc/config/pdp11/t-pdp11 b/gcc-4.9/gcc/config/pdp11/t-pdp11
new file mode 100644
index 000000000..5d0a5af0b
--- /dev/null
+++ b/gcc-4.9/gcc/config/pdp11/t-pdp11
@@ -0,0 +1,27 @@
+# Copyright (C) 1995-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = msoft-float
+
+# Because the pdp11 POINTER_SIZE is only 16, in dwarf2out.c,
+# DWARF_ARANGES_PAD_SIZE is 0, thus a loop in output_aranges that checks
+# (i < (unsigned) DWARF_ARANGES_PAD_SIZE) elicits a warning that the
+# comparison is always false.
+# We could say "-Werror -Wno-error=type-limits", alas, not all supported
+# gcc bootstrap compilers support the latter option.
+dwarf2out.o-warn = -Wno-error
diff --git a/gcc-4.9/gcc/config/picochip/constraints.md b/gcc-4.9/gcc/config/picochip/constraints.md
new file mode 100644
index 000000000..ba7eb7475
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/constraints.md
@@ -0,0 +1,64 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constraint "I"
+ "4-bits signed value"
+ (and (match_code "const_int")
+      (match_test " ival >= -8 && ival< 8")))
+
+(define_constraint "J"
+ "4-bits unsigned value"
+ (and (match_code "const_int")
+      (match_test "ival>=0 && ival < 16")))
+
+(define_constraint "K"
+ "8-bits signed value"
+ (and (match_code "const_int")
+      (match_test " ival >= -128 && ival < 128")))
+
+(define_constraint "M"
+ "4-bits magnitude"
+ (and (match_code "const_int")
+      (match_test " abs(ival) < 16")))
+
+(define_constraint "N"
+ "10-bits signed value"
+ (and (match_code "const_int")
+      (match_test "ival >= -512 && ival < 512")))
+
+(define_constraint "O"
+ "16-bits signed value"
+ (and (match_code "const_int")
+      (match_test " ival >= -32768 && ival < 32768 ")))
+
+(define_constraint "a"
+ "See if this is an absolute address in memory"
+  (and (match_code "mem")
+       (match_test "picochip_absolute_memory_operand(op,mode) == 1")))
+
+(define_register_constraint "k" "FRAME_REGS"
+  "Frame regs")
+(define_register_constraint "f" "PTR_REGS"
+  "Pointer regs")
+(define_register_constraint "t" "TWIN_REGS"
+  "Twin regs")
+
diff --git a/gcc-4.9/gcc/config/picochip/dfa_space.md b/gcc-4.9/gcc/config/picochip/dfa_space.md
new file mode 100644
index 000000000..b4551037f
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/dfa_space.md
@@ -0,0 +1,43 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following DFA description schedules instructions for space.  The
+;; schedule seeks to avoid stall cycles (e.g., memory load), but the
+;; instructions are not VLIW packed (whenever instructions are packed
+;; together, an additional byte is used to denote this, which
+;; increases the code size).
+
+;; No special handling of the long constants is necessary (as in
+;; dfa_speed.md), since VLIW packing is not used.
+
+;; Memory instructions stall for one cycle.  All other instructions
+;; complete ready for the next cycle.
+
+(define_insn_reservation "nonStallInsn" 1
+  (and (eq_attr "schedType" "space")
+       (eq_attr "type" "!mem"))
+  "slot0+slot1+slot2")
+
+(define_insn_reservation "stallInsn" 2
+  (and (eq_attr "schedType" "space")
+       (eq_attr "type" "mem"))
+  "slot0+slot1+slot2")
diff --git a/gcc-4.9/gcc/config/picochip/dfa_speed.md b/gcc-4.9/gcc/config/picochip/dfa_speed.md
new file mode 100644
index 000000000..20b893b60
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/dfa_speed.md
@@ -0,0 +1,123 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following DFA description schedules instructions for speed.  In
+;; addition to the scheduling of instructions to remove stall cycles
+;; (e.g., memory load), the scheduler will also pack multiple
+;; instructions into a single cycle, using VLIW.
+
+;; Each instruction comes in forms with and without long
+;; constants.  The long constant is treated as though it were also an
+;; instruction.  Thus, an instruction which used slot0, will use slot0
+;; plus one of the other slots for the constant.  This mechanism
+;; ensures that it is impossible for 3 instructions to be issued, if
+;; one of them has a long constant.  This is necessary, because the
+;; encoding of 3 instructions, plus a constant, will overrun the
+;; 64-bit limit.
+
+; Extended ALU - Slot 0
+(define_insn_reservation "picoAluInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "picoAlu") (eq_attr "longConstant" "false")))
+  "slot0")
+(define_insn_reservation "picoAluInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "picoAlu") (eq_attr "longConstant" "true")))
+  "(slot0+slot1)|(slot0+slot2)")
+
+; Basic ALU - Slot 0 or 1
+(define_insn_reservation "basicAluInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "basicAlu") (eq_attr "longConstant" "false")))
+  "(slot0|slot1)")
+(define_insn_reservation "basicAluInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "basicAlu") (eq_attr "longConstant" "true")))
+  "(slot0+slot1) | (slot1+slot2) | (slot0+slot2)")
+
+; ALU which must not set flags - Slot 1
+(define_insn_reservation "nonCcAluInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "nonCcAlu") (eq_attr "longConstant" "false")))
+  "slot1")
+(define_insn_reservation "nonCcAluInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "nonCcAlu") (eq_attr "longConstant" "true")))
+  "(slot1+slot0) | (slot1+slot2)")
+
+; Memory - Slot 1
+(define_insn_reservation "memInsn" 2
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mem") (eq_attr "longConstant" "false")))
+  "slot1,nothing")
+(define_insn_reservation "memInsnWithConst" 2
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mem") (eq_attr "longConstant" "true")))
+  "slot1+(slot0|slot2),nothing")
+
+; Multiply - Slot 2
+(define_insn_reservation "mulInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mul") (eq_attr "longConstant" "false")))
+  "slot2")
+(define_insn_reservation "mulInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mul") (eq_attr "longConstant" "true")))
+  "(slot2+slot0)|(slot2+slot1)")
+
+; MAC - Slot 2
+(define_insn_reservation "macInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mac") (eq_attr "longConstant" "false")))
+  "slot2")
+(define_insn_reservation "macInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mac") (eq_attr "longConstant" "true")))
+  "(slot2+slot0)|(slot2+slot1)")
+
+; Branch - Real branches use slot2, while macro branches use unknown
+; resources.
+(define_insn_reservation "branchInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "realBranch") (eq_attr "longConstant" "false")))
+  "slot2")
+(define_insn_reservation "branchInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "realBranch") (eq_attr "longConstant" "true")))
+  "(slot2+slot0)|(slot2+slot1)")
+(define_insn_reservation "branchInsnMacro" 1
+  (and (eq_attr "schedType" "speed")
+       (eq_attr "type" "realBranch"))
+  "(slot0+slot1+slot2)")
+
+; Call instructions use all slots to prevent inadvertent scheduling
+; alongside instructions which set R12.
+
+(define_insn_reservation "callInsn" 1
+  (and (eq_attr "schedType" "speed") (eq_attr "type" "call"))
+  "slot0+slot1+slot2")
+
+; Communications - Slot 1
+(define_insn_reservation "commsInsn" 1
+  (and (eq_attr "schedType" "speed") (eq_attr "type" "comms"))
+  "slot1")
+
diff --git a/gcc-4.9/gcc/config/picochip/picochip-protos.h b/gcc-4.9/gcc/config/picochip/picochip-protos.h
new file mode 100644
index 000000000..1f548f8f2
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/picochip-protos.h
@@ -0,0 +1,128 @@
+/* Prototypes for exported functions defined in picochip.c
+
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Picochip Ltd. (http://www.picochip.com)
+   Maintained by Daniel Towner (daniel.towner@picochip.com) and
+   Hariharan Sandanagobalane (hariharan@picochip.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not, see
+   <http://www.gnu.org/licenses/>. */
+
+extern void picochip_function_prologue (FILE *, HOST_WIDE_INT);
+extern void picochip_function_epilogue (FILE *, HOST_WIDE_INT);
+
+extern enum reg_class picochip_reg_class_from_letter (unsigned);
+extern int picochip_const_ok_for_letter_p (unsigned HOST_WIDE_INT value, unsigned c);
+
+#ifdef RTX_CODE			/* inside TREE_CODE */
+
+extern int picochip_reg_mode_ok_for_base_p (int mode, rtx x, unsigned strict);
+extern void picochip_print_operand (FILE * file, rtx op, int letter);
+extern void picochip_print_operand_address (FILE * file, rtx operand);
+
+extern const char *picochip_output_cbranch (rtx operands[]);
+extern const char *picochip_output_branch (rtx operands[], rtx insn);
+extern const char *picochip_output_compare (rtx operands[]);
+extern const char *picochip_output_jump (rtx insn);
+
+extern const char *picochip_output_put_array (int alternative,
+					      rtx operands[]);
+extern const char *picochip_output_get_array (int alternative,
+					      rtx operands[]);
+extern const char *picochip_output_testport_array (int alternative,
+						   rtx operands[]);
+
+extern int picochip_expand_movmemhi (rtx *operands);
+
+extern rtx gen_SImode_mem(rtx opnd1,rtx opnd2);
+extern bool ok_to_peephole_stw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3);
+extern bool ok_to_peephole_ldw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3);
+
+extern rtx gen_min_reg(rtx opnd1,rtx opnd2);
+
+extern int picochip_regno_nregs (int regno, int mode);
+extern int picochip_class_max_nregs (int klass, int mode);
+
+extern void picochip_order_regs_for_local_alloc (void);
+
+extern int picochip_word_aligned_memory_reference (rtx operand);
+extern int picochip_alignable_memory_operand (rtx operand, enum machine_mode mode);
+extern int picochip_absolute_memory_operand (rtx op, enum machine_mode mode);
+
+extern rtx picochip_function_value (const_tree valtype, const_tree func, bool outgoing);
+extern int picochip_symbol_offset (rtx operand);
+
+extern void picochip_get_hi_aligned_mem (rtx ref, rtx * paligned_mem, rtx * pbitnum);
+
+extern rtx picochip_get_low_const (rtx value);
+extern rtx picochip_get_high_const (rtx value);
+
+extern void picochip_expand_prologue (void);
+extern void picochip_expand_epilogue (int is_sibling_call);
+
+extern void picochip_final_prescan_insn (rtx insn, rtx * operand, int num_operands);
+extern const char *picochip_asm_output_opcode (FILE * f, const char *ptr);
+
+extern int picochip_check_conditional_copy (rtx * operands);
+
+extern rtx picochip_return_addr_rtx(int count, rtx frameaddr);
+extern rtx picochip_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,
+                              int incoming ATTRIBUTE_UNUSED);
+
+#endif /* RTX_CODE inside TREE_CODE */
+
+extern int picochip_legitimize_reload_address (rtx *x, enum machine_mode mode,
+                                        int opnum, int type, int ind_levels);
+
+
+void picochip_output_ascii (FILE * file, const char *str, int length);
+
+extern int picochip_hard_regno_mode_ok (int regno, enum machine_mode mode);
+extern void picochip_generate_internal_label (char *str, const char *prefix,
+					      long num);
+
+extern bool picochip_return_in_memory(const_tree type,
+                                      const_tree fntype ATTRIBUTE_UNUSED);
+
+extern int initial_elimination_offset (int from, int to);
+
+extern void picochip_output_aligned_common (FILE * stream, const char *name,
+					    unsigned size, unsigned align);
+
+extern void picochip_output_global (FILE * stream, const char *name);
+
+extern void picochip_output_aligned_local (FILE * stream, const char *name,
+					   unsigned size, unsigned alignment);
+
+extern void picochip_output_label (FILE * stream, const char name[]);
+extern void picochip_output_labelref (FILE * stream, const char name[]);
+extern void picochip_weaken_label (FILE * stream, const char name[]);
+extern void picochip_output_internal_label (FILE * stream, const char *prefix,
+				     unsigned long num);
+
+extern void warn_of_byte_access (void);
+
+/* True if VLIW scheduling is enabled (i.e., second scheduling pass). */
+extern int picochip_flag_schedule_insns2;
+
+extern void picochip_asm_output_anchor (rtx symbol);
+
+/* Instruction set capability flags.  These are initialised to the
+   appropriate values by picochip_option_override, once the user has
+   selected a CPU type. */
+extern bool picochip_has_mul_unit;
+extern bool picochip_has_mac_unit;
+
diff --git a/gcc-4.9/gcc/config/picochip/picochip.c b/gcc-4.9/gcc/config/picochip/picochip.c
new file mode 100644
index 000000000..2476f7344
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/picochip.c
@@ -0,0 +1,4703 @@
+/* Subroutines used for code generation on picoChip processors.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Picochip Ltd. (http://www.picochip.com)
+   Maintained by Daniel Towner (daniel.towner@picochip.com) and
+   Hariharan Sandanagobalane (hariharan@picochip.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "output.h"
+#include "basic-block.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "params.h"
+
+#include "picochip-protos.h"
+
+#include "insn-attr.h"		/* For DFA state_t. */
+#include "insn-config.h"	/* Required by recog.h */
+#include "insn-codes.h"		/* For CODE_FOR_? */
+#include "optabs.h"		/* For GEN_FCN */
+#include "basic-block.h"	/* UPDATE_LIFE_GLOBAL* for picochip_reorg. */
+#include "timevar.h"		/* For TV_SCHED2, in picochip_reorg. */
+#include "libfuncs.h"		/* For memcpy_libfuncs, etc. */
+#include "df.h"			/* For df_regs_ever_live_df_regs_ever_live_pp, etc. */
+#include "dbxout.h"
+
+
+/* Target AE ISA information. */
+enum picochip_dfa_type picochip_schedule_type;
+
+bool picochip_has_mul_unit = false;
+bool picochip_has_mac_unit = false;
+
+/* targetm hook function prototypes. */
+
+void picochip_asm_file_start (void);
+void picochip_asm_file_end (void);
+
+void picochip_init_libfuncs (void);
+void picochip_reorg (void);
+
+int picochip_arg_partial_bytes (cumulative_args_t p_cum,
+				       enum machine_mode mode,
+				       tree type, bool named);
+rtx picochip_function_arg (cumulative_args_t p_cum,
+			   enum machine_mode mode,
+			   const_tree type, bool named);
+rtx picochip_incoming_function_arg (cumulative_args_t p_cum,
+				    enum machine_mode mode,
+				    const_tree type, bool named);
+void picochip_arg_advance (cumulative_args_t p_cum, enum machine_mode mode,
+			   const_tree type, bool named);
+unsigned int picochip_function_arg_boundary (enum machine_mode mode,
+					     const_tree type);
+
+int picochip_sched_lookahead (void);
+int picochip_sched_issue_rate (void);
+int picochip_sched_adjust_cost (rtx insn, rtx link,
+				       rtx dep_insn, int cost);
+int picochip_sched_reorder (FILE * file, int verbose, rtx * ready,
+				   int *n_readyp, int clock);
+
+void picochip_init_builtins (void);
+rtx picochip_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+bool picochip_rtx_costs (rtx x, int code, int outer_code, int opno,
+			 int* total, bool speed);
+bool picochip_return_in_memory(const_tree type,
+                              const_tree fntype ATTRIBUTE_UNUSED);
+bool picochip_legitimate_address_p (enum machine_mode, rtx, bool);
+rtx picochip_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+                             enum machine_mode mode);
+int picochip_legitimize_reload_address (rtx *x, enum machine_mode mode,
+                                        int opnum, int type, int ind_levels);
+
+rtx picochip_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED);
+rtx picochip_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+                         bool outgoing ATTRIBUTE_UNUSED);
+static reg_class_t
+picochip_secondary_reload (bool in_p,
+			   rtx x ATTRIBUTE_UNUSED,
+			   reg_class_t cla ATTRIBUTE_UNUSED,
+			   enum machine_mode mode,
+			   secondary_reload_info *sri);
+void
+picochip_asm_named_section (const char *name,
+			    unsigned int flags ATTRIBUTE_UNUSED,
+			    tree decl ATTRIBUTE_UNUSED);
+
+static rtx picochip_static_chain (const_tree, bool);
+
+static void picochip_option_override (void);
+
+/* Lookup table mapping a register number to the earliest containing
+   class.  Used by REGNO_REG_CLASS.  */
+const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  TWIN_REGS, TWIN_REGS, TWIN_REGS, TWIN_REGS,
+  TWIN_REGS, TWIN_REGS, TWIN_REGS, TWIN_REGS,
+  TWIN_REGS, TWIN_REGS, TWIN_REGS, TWIN_REGS,
+  GR_REGS, FRAME_REGS, PTR_REGS, CONST_REGS,
+  ACC_REGS, CC_REGS, GR_REGS, GR_REGS
+};
+
+/* picoChip register names. */
+const char *picochip_regnames[] = REGISTER_NAMES;
+
+/* Define the maximum number of registers which may be used to pass
+ * parameters to functions. */
+#define MAX_CALL_PARAMETER_REGS 6
+
+
+/* Target scheduling information. */
+
+/* This flag indicates whether the next instruction to be output is a
+   VLIW continuation instruction.  It is used to communicate between
+   final_prescan_insn and asm_output_opcode. */
+static int picochip_vliw_continuation = 0;
+
+/* This variable is used to communicate the current instruction
+   between final_prescan_insn and functions such as asm_output_opcode,
+   and picochip_get_vliw_alu_id (which are otherwise unable to determine the
+   current instruction. */
+static rtx picochip_current_prescan_insn;
+
+static bool picochip_is_delay_slot_pending = 0;
+
+/* When final_prescan_insn is called, it computes information about
+   the current VLIW packet, and stores it in this structure. When
+   instructions are output, this state is used to make sure that the
+   instructions are output in the correct way (e.g., which ALU to use,
+   whether a macro branch was ever previously a real branch, etc.). */
+struct vliw_state
+{
+  int contains_pico_alu_insn;
+  int contains_non_cc_alu_insn;
+  int num_alu_insns_so_far;
+
+  /* Record how many instructions are contained in the packet. */
+  int num_insns_in_packet;
+
+  /* There was a case for this to be more than 1 */
+  int num_cfi_labels_deferred;
+  char cfi_label_name[2][256];	/* Used to record the name of a CFI label
+				   emitted inside a VLIW packet. */
+  char lm_label_name[256];	/* Used to record the name of an LM label. */
+};
+
+struct vliw_state picochip_current_vliw_state;
+
+/* Save/restore recog_data. */
+static int picochip_saved_which_alternative;
+static struct recog_data_d picochip_saved_recog_data;
+
+/* Determine which ALU to use for the instruction in
+   picochip_current_prescan_insn. */
+static char picochip_get_vliw_alu_id (void);
+
+/* Initialize the GCC target structure.  */
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE picochip_function_prologue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE picochip_function_epilogue
+
+#undef TARGET_ASM_INTERNAL_LABEL
+#define TARGET_ASM_INTERNAL_LABEL picochip_output_internal_label
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL picochip_output_global
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP ".initByte "
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP  ".initWord "
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP  ".unalignedInitWord "
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ".initLong "
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP ".unalignedInitLong "
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS picochip_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN picochip_expand_builtin
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS picochip_rtx_costs
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE picochip_sched_issue_rate
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER picochip_sched_reorder
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  picochip_sched_lookahead
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST picochip_sched_adjust_cost
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION picochip_asm_named_section
+
+#undef TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS 1
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS picochip_init_libfuncs
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START picochip_asm_file_start
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END picochip_asm_file_end
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG picochip_reorg
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES picochip_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG picochip_function_arg
+
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG picochip_incoming_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE picochip_arg_advance
+
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY picochip_function_arg_boundary
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+/* Target support for Anchored Addresses optimization */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET 0
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 7
+#undef TARGET_ASM_OUTPUT_ANCHOR
+#define TARGET_ASM_OUTPUT_ANCHOR  picochip_asm_output_anchor
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE picochip_function_value
+/*
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE picochip_libgcc_cmp_return_mode
+*/
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P picochip_legitimate_address_p
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS picochip_legitimize_address
+
+/* Loading and storing QImode values to and from memory
+   usually requires a scratch register. */
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD picochip_secondary_reload
+
+/* How Large Values are Returned  */
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY picochip_return_in_memory
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN picochip_static_chain
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE picochip_option_override
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE picochip_option_override
+
+/* The 2nd scheduling pass option is switched off, and a machine
+   dependent reorganisation ensures that it is run later on, after the
+   second jump optimisation.  */
+#undef TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Only return a value in memory if it is greater than 4 bytes.
+   int_size_in_bytes returns -1 for variable size objects, which go in
+   memory always.  The cast to unsigned makes -1 > 8.  */
+
+bool
+picochip_return_in_memory(const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 4);
+}
+
+/* Allow some options to be overriden. */
+
+static void
+picochip_option_override (void)
+{
+  /* If we are optimizing for stack, dont let inliner to inline functions
+     that could potentially increase stack size.*/
+   if (flag_conserve_stack)
+     {
+       maybe_set_param_value (PARAM_LARGE_STACK_FRAME, 0,
+			      global_options.x_param_values,
+			      global_options_set.x_param_values);
+       maybe_set_param_value (PARAM_STACK_FRAME_GROWTH, 0,
+			      global_options.x_param_values,
+			      global_options_set.x_param_values);
+     }
+
+  /* Turn off the elimination of unused types. The elaborator
+     generates various interesting types to represent constants,
+     generics, and so on, and it is useful to retain this information
+     in the debug output. The increased size of the debug information
+     is not really an issue for us. */
+  flag_eliminate_unused_debug_types = 0;
+
+  /* Even if the user specifies a -fno-omit-frame-pointer on the
+     command line, we still want to go ahead and omit frame pointer
+     usages, since we dont really have a frame pointer register.
+     So, all accesses to FP need to be converted to accesses off
+     stack pointer.*/
+  flag_omit_frame_pointer = 1;
+
+  /* Turning on anchored addresses by default. This is an optimization
+     that could decrease the code size by placing anchors in data and
+     accessing offsets from the anchor for file local data variables.*/
+  if (optimize >= 1)
+    flag_section_anchors = 1;
+
+  /* The second scheduling pass runs within picochip_reorg, to avoid
+     having the second jump optimisation trash the instruction modes
+     (e.g., instructions are changed to TImode to mark the beginning
+     of cycles).  Two types of DFA scheduling are possible: space and
+     speed.  In both cases, instructions are reordered to avoid stalls
+     (e.g., memory loads stall for one cycle).  Speed scheduling will
+     also enable VLIW instruction packing.  VLIW instructions use more
+     code space, so VLIW scheduling is disabled when scheduling for
+     size.  */
+  if (flag_schedule_insns_after_reload)
+    {
+      if (optimize_size)
+	picochip_schedule_type = DFA_TYPE_SPACE;
+      else
+	{
+	  picochip_schedule_type = DFA_TYPE_SPEED;
+	  flag_delayed_branch = 0;
+	}
+    }
+  else
+    picochip_schedule_type = DFA_TYPE_NONE;
+
+  /* Ensure that the debug level is always at least -g2. The flow
+     analyser works at its best if it always has debug
+     information. DWARF is non-intrusive, so it makes no difference to
+     code quality if debug is always enabled. */
+  if (debug_info_level < DINFO_LEVEL_NORMAL)
+  {
+    debug_info_level = DINFO_LEVEL_NORMAL;
+    write_symbols = DWARF2_DEBUG;
+  }
+
+  /* Options of the form -mae=mac, and so on will be substituted by
+     the compiler driver for the appropriate byte access and multiply
+     unit ISA options. Any unrecognised AE types will end up being
+     passed to the compiler, which should reject them as invalid. */
+  if (picochip_ae_type_string != NULL)
+    error ("invalid AE type specified (%s)", picochip_ae_type_string);
+
+  /* Override any specific capabilities of the instruction set. These
+     take precedence over any capabilities inferred from the AE type,
+     regardless of where the options appear on the command line. */
+  if (picochip_mul_type_string == NULL)
+    {
+      /* Default to MEM-type multiply, for historical compatibility. */
+      picochip_has_mac_unit = false;
+      picochip_has_mul_unit = true;
+    }
+  else
+    {
+      picochip_has_mac_unit = false;
+      picochip_has_mul_unit = false;
+
+      if (strcmp (picochip_mul_type_string, "mul") == 0)
+	picochip_has_mul_unit = true;
+      else if (strcmp (picochip_mul_type_string, "mac") == 0)
+	picochip_has_mac_unit = true;
+      else if (strcmp (picochip_mul_type_string, "none") == 0)
+	{ /* Do nothing. Unit types already set to false. */ }
+      else
+	error ("invalid mul type specified (%s) - expected mac, mul or none",
+	       picochip_mul_type_string);
+    }
+}
+
+
+/* Initialise the library functions to handle arithmetic on some of
+   the larger modes. */
+void
+picochip_init_libfuncs (void)
+{
+  /* 64-bit shifts */
+  set_optab_libfunc (ashr_optab, DImode, "__ashrdi3");
+  set_optab_libfunc (ashl_optab, DImode, "__ashldi3");
+  set_optab_libfunc (lshr_optab, DImode, "__lshrdi3");
+
+  /* 64-bit signed multiplication. */
+  set_optab_libfunc (smul_optab, DImode, "__muldi3");
+
+  /* Signed division */
+  set_optab_libfunc (sdiv_optab, HImode, "__divhi3");
+  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
+
+  /* Signed modulus */
+  set_optab_libfunc (smod_optab, HImode, "__modhi3");
+  set_optab_libfunc (smod_optab, DImode, "__moddi3");
+
+  /* 32-bit count leading Zeros*/
+  set_optab_libfunc (clz_optab, SImode, "_clzsi2");
+
+  /* 64-bit comparison */
+  set_optab_libfunc (ucmp_optab, DImode, "__ucmpdi2");
+  set_optab_libfunc (cmp_optab, DImode, "__cmpdi2");
+
+  /* 64-bit addition and subtraction*/
+  set_optab_libfunc (add_optab, DImode, "_adddi3");
+  set_optab_libfunc (sub_optab, DImode, "_subdi3");
+}
+
+/* Memcpy function */
+int
+picochip_expand_movmemhi (rtx *operands)
+{
+  rtx src_addr_reg, dst_addr_reg, count_reg, src_mem, dst_mem, tmp_reg;
+  rtx start_label;
+  int align, size;
+  src_addr_reg = gen_reg_rtx(HImode);
+  dst_addr_reg = gen_reg_rtx(HImode);
+  count_reg = gen_reg_rtx(HImode);
+  emit_insn (gen_movhi (count_reg, operands[2]));
+  emit_insn (gen_movqi (src_addr_reg, XEXP(operands[1], 0)));
+  emit_insn (gen_movqi (dst_addr_reg, XEXP(operands[0], 0)));
+  gcc_assert (GET_CODE(count_reg) == REG);
+  start_label = gen_label_rtx ();
+  emit_label (start_label);
+
+  /* We can specialise the code for different alignments */
+  align = INTVAL(operands[3]);
+  size = INTVAL(operands[2]);
+  gcc_assert(align >= 0 && size >= 0);
+  if (size != 0)
+    {
+      if (size % 4 == 0 && align % 4 == 0)
+        {
+          src_mem = gen_rtx_MEM(SImode, src_addr_reg);
+          dst_mem = gen_rtx_MEM(SImode, dst_addr_reg);
+          tmp_reg = gen_reg_rtx(SImode);
+          emit_insn (gen_movsi (tmp_reg, src_mem));
+          emit_insn (gen_movsi (dst_mem, tmp_reg));
+          emit_insn (gen_addhi3 (dst_addr_reg, dst_addr_reg, GEN_INT(4)));
+          emit_insn (gen_addhi3 (src_addr_reg, src_addr_reg, GEN_INT(4)));
+          emit_insn (gen_addhi3 (count_reg, count_reg, GEN_INT(-4)));
+          /* The sub instruction above generates cc, but we cannot just emit the branch.*/
+          emit_cmp_and_jump_insns (count_reg, const0_rtx, GT, 0, HImode, 0, start_label);
+        }
+      else if (size % 2 == 0 && align % 2 == 0)
+        {
+          src_mem = gen_rtx_MEM(HImode, src_addr_reg);
+          dst_mem = gen_rtx_MEM(HImode, dst_addr_reg);
+          tmp_reg = gen_reg_rtx(HImode);
+          emit_insn (gen_movhi (tmp_reg, src_mem));
+          emit_insn (gen_movhi (dst_mem, tmp_reg));
+          emit_insn (gen_addhi3 (dst_addr_reg, dst_addr_reg, const2_rtx));
+          emit_insn (gen_addhi3 (src_addr_reg, src_addr_reg, const2_rtx));
+          emit_insn (gen_addhi3 (count_reg, count_reg, GEN_INT(-2)));
+          /* The sub instruction above generates cc, but we cannot just emit the branch.*/
+          emit_cmp_and_jump_insns (count_reg, const0_rtx, GT, 0, HImode, 0, start_label);
+        }
+      else
+        {
+          src_mem = gen_rtx_MEM(QImode, src_addr_reg);
+          dst_mem = gen_rtx_MEM(QImode, dst_addr_reg);
+          tmp_reg = gen_reg_rtx(QImode);
+          emit_insn (gen_movqi (tmp_reg, src_mem));
+          emit_insn (gen_movqi (dst_mem, tmp_reg));
+          emit_insn (gen_addhi3 (dst_addr_reg, dst_addr_reg, const1_rtx));
+          emit_insn (gen_addhi3 (src_addr_reg, src_addr_reg, const1_rtx));
+          emit_insn (gen_addhi3 (count_reg, count_reg, GEN_INT(-1)));
+          /* The sub instruction above generates cc, but we cannot just emit the branch.*/
+          emit_cmp_and_jump_insns (count_reg, const0_rtx, GT, 0, HImode, 0, start_label);
+        }
+    }
+  return 1;
+}
+
+
+/* Return the register class for letter C.  */
+enum reg_class
+picochip_reg_class_from_letter (unsigned c)
+{
+  switch (c)
+    {
+    case 'k':
+      return FRAME_REGS;
+    case 'f':
+      return PTR_REGS;
+    case 't':
+      return TWIN_REGS;
+    case 'r':
+      return GR_REGS;
+    default:
+      return NO_REGS;
+    }
+}
+
+static const int
+pico_leaf_reg_alloc_order[] = LEAF_REG_ALLOC_ORDER;
+static const int
+pico_nonleaf_reg_alloc_order[] = REG_ALLOC_ORDER;
+
+void
+picochip_order_regs_for_local_alloc (void)
+{
+  /* We change the order for leaf functions alone. We put r12 at
+     the end since using it will prevent us to combine stw/ldws to
+     stl/ldl and it gives no benefit. In non-leaf functions, we
+     would anyway saveup/restore r12, so it makes sense to use it.*/
+
+  if (leaf_function_p())
+  {
+    memcpy ((char *)reg_alloc_order, (const char *) pico_leaf_reg_alloc_order,
+            FIRST_PSEUDO_REGISTER * sizeof (int));
+  }
+  else
+  {
+    memcpy ((char *)reg_alloc_order, (const char *) pico_nonleaf_reg_alloc_order,
+            FIRST_PSEUDO_REGISTER * sizeof (int));
+  }
+}
+
+/* Check that VALUE (an INT_CST) is ok as a constant of type C.  */
+int
+picochip_const_ok_for_letter_p (unsigned HOST_WIDE_INT value, unsigned c)
+{
+
+  switch (c)
+    {
+    case 'I':			/* 4 bits signed.  */
+      return value + 8 < 16;
+    case 'J':			/* 4 bits unsigned.  */
+      return value < 16;
+    case 'K':			/* 8 bits signed.  */
+      return value + 128 < 256;
+    case 'M':			/* 4-bit magnitude. */
+      return abs (value) < 16;
+    case 'N':			/* 10 bits signed.  */
+      return value + 512 > 1024;
+    case 'O':			/* 16 bits signed. */
+      return value + 32768 < 65536;
+    default:			/* Unknown letter. */
+      return 0;
+    }
+}
+
+/* Stack utility functions. */
+rtx
+picochip_return_addr_rtx(int count, rtx frameaddr ATTRIBUTE_UNUSED)
+{
+   if (count==0)
+     return gen_rtx_REG (Pmode, LINK_REGNUM);
+   else
+     return NULL_RTX;
+}
+
+
+/* Emit a set of parallel register expressions used to store
+   blockmode values to pass to functions. */
+static rtx
+picochip_emit_register_parallel (int size_in_units, int offset)
+{
+  int num_regs = 0;
+  rtx result;
+  rtx vector[MAX_CALL_PARAMETER_REGS];
+  int base_reg = 0;
+  int i = 0;
+
+  /* Compute the base register, and number of required registers. */
+  base_reg = offset / 2;
+  num_regs = size_in_units / 2;
+  if (size_in_units % 2 == 1)
+    num_regs++;
+
+  /* Emit a register for each part of the block mode value to be
+     passed in a register. */
+  for (i = 0; i < num_regs; i++)
+    vector[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (HImode, base_reg + i),
+				   GEN_INT (i * 2));
+  result = gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (num_regs, vector));
+
+  return result;
+
+}
+
+/* Emit an instruction to allocate a suitable amount of space on the
+   stack, by decrementing the stack pointer. */
+static void
+picochip_emit_stack_allocate (int adjustment)
+{
+  rtx insn;
+  rtx stack_pointer_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+
+  /* Use an addition of a negative value. */
+  insn = emit_insn (gen_addhi3 (stack_pointer_reg, stack_pointer_reg,
+				GEN_INT (-adjustment)));
+
+  /* Make the instruction frame related.  Also add an expression note,
+     so that the correct Dwarf information is generated (see documention
+     for RTX_FRAME_RELATED_P for more details). */
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		gen_rtx_SET (VOIDmode, stack_pointer_reg,
+			     gen_rtx_PLUS (Pmode, stack_pointer_reg,
+					   GEN_INT (-adjustment))));
+
+}
+
+/* Emit an instruction to save a register of the given mode.  The
+   offset at which to save the register is given relative to the stack
+   pointer. */
+static void
+picochip_emit_save_register (rtx reg, int offset)
+{
+  rtx stack_pointer, address, mem, insn;
+
+  stack_pointer = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+
+  address = gen_rtx_PLUS (Pmode, stack_pointer, GEN_INT (offset));
+
+  mem = gen_rtx_MEM (GET_MODE (reg), address);
+
+  insn = emit_move_insn (mem, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  /* For modes other than HImode, create a note explaining that
+     multiple registers have been saved.  This allows the correct DWARF
+     call frame information to be generated. */
+  switch (GET_MODE (reg))
+    {
+    case HImode:
+      /* The RTL is sufficient to explain HImode register saves. */
+      break;
+
+    case SImode:
+      /* SImode must be broken down into parallel HImode register saves. */
+      {
+	rtvec p;
+	p = rtvec_alloc (2);
+
+	RTVEC_ELT (p, 0) =
+	  gen_rtx_SET (HImode,
+		       gen_rtx_MEM (HImode,
+				    gen_rtx_PLUS (Pmode, stack_pointer,
+						  GEN_INT (offset))),
+		       gen_rtx_REG (HImode, REGNO (reg)));
+	RTX_FRAME_RELATED_P (RTVEC_ELT (p, 0)) = 1;
+
+	RTVEC_ELT (p, 1) =
+	  gen_rtx_SET (HImode, gen_rtx_MEM (HImode,
+					    gen_rtx_PLUS (Pmode,
+							  stack_pointer,
+							  GEN_INT (offset +
+								   2))),
+		       gen_rtx_REG (HImode, REGNO (reg) + 1));
+	RTX_FRAME_RELATED_P (RTVEC_ELT (p, 1)) = 1;
+
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_PARALLEL (VOIDmode, p));
+
+      }
+      break;
+
+    default:
+      internal_error
+	("unexpected mode %s encountered in picochip_emit_save_register",
+	 GET_MODE_NAME (GET_MODE (reg)));
+    }
+
+}
+
+/* Emit an instruction to restore a register of the given mode.  The
+   offset from which to restore the register is given relative to the
+   stack pointer. */
+static void
+picochip_emit_restore_register (rtx reg, int offset)
+{
+  rtx stack_pointer, address, mem;
+
+  stack_pointer = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+
+  address = gen_rtx_PLUS (Pmode, stack_pointer, GEN_INT (offset));
+
+  mem = gen_rtx_MEM (GET_MODE (reg), address);
+
+  emit_move_insn (reg, mem);
+
+}
+
+/* Check that the given byte offset is aligned to the given number of
+   bits. */
+static int
+picochip_is_aligned (int byte_offset, int bit_alignment)
+{
+  int byte_alignment = bit_alignment / BITS_PER_UNIT;
+  return (byte_offset % byte_alignment) == 0;
+}
+
+/*****************************************************************************
+ * Stack layout.
+ *
+ * The following section contains code which controls how the stack is
+ * laid out.
+ *
+ * The stack is laid out as follows (high addresses first):
+ *
+ *   Incoming arguments
+ *   Pretend arguments            (ARG PTR)
+ *   Special registers
+ *   General registers
+ *   Frame                         (FP)
+ *   Outgoing arguments            (SP)
+ *
+ * The (constant) offsets of the different areas must be calculated
+ * relative to the stack area immediately below, and aligned
+ * appropriately. For example, the frame offset is computed by
+ * determining the offset of the special register area, adding the
+ * size of the special register area, and then aligning the resulting
+ * offset correctly. In turn, the special register offset is computed
+ * from the general register offset, and so on. This enables the
+ * different offsets to change size and alignment, without requiring
+ * the code for other offset calculations to be rewritten.
+ *
+ * The argument pointer, and the frame pointer are eliminated wherever
+ * possible, by replacing them with a constant offset from the stack
+ * pointer. In the rare cases where constant offsets from the stack
+ * pointer cannot be computed, another register will be allocated to
+ * serve as the argument pointer, or the frame pointer.
+ *
+ * The save registers are stored at small offsets from the caller, to
+ * enable the more efficient SP-based ISA instructions to be used.
+ *
+ ****************************************************************************/
+
+/* Compute the size of an argument in units. */
+static int
+picochip_compute_arg_size (const_tree type, enum machine_mode mode)
+{
+  int type_size_in_units = 0;
+
+  if (type)
+    type_size_in_units = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+  else
+    type_size_in_units = GET_MODE_SIZE (mode);
+
+  return type_size_in_units;
+
+}
+
+/* Determine where the next outgoing arg should be placed. */
+rtx
+picochip_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int reg = 0;
+  int type_align_in_units = 0;
+  int type_size_in_units;
+  int new_offset = 0;
+  int offset_overflow = 0;
+
+  /* VOIDmode is passed when computing the second argument to a `call'
+     pattern. This can be ignored. */
+  if (mode == VOIDmode)
+    return 0;
+
+  /* Compute the alignment and size of the parameter. */
+  type_align_in_units =
+    picochip_function_arg_boundary (mode, type) / BITS_PER_UNIT;
+  type_size_in_units = picochip_compute_arg_size (type, mode);
+
+  /* Compute the correct offset (i.e., ensure that the offset meets
+     the alignment requirements). */
+  offset_overflow = *cum % type_align_in_units;
+  if (offset_overflow == 0)
+    new_offset = *cum;
+  else
+    new_offset = (*cum - offset_overflow) + type_align_in_units;
+
+  if (TARGET_DEBUG)
+    {
+      printf ("Function arg:\n");
+      printf ("  Type valid: %s\n", (type ? "yes" : "no"));
+      printf ("  Cumulative Value: %d\n", *cum);
+      printf ("  Mode: %s\n", GET_MODE_NAME (mode));
+      printf ("  Type size: %i units\n", type_size_in_units);
+      printf ("  Alignment: %i units\n", type_align_in_units);
+      printf ("  New offset: %i\n", new_offset);
+      printf ("\n");
+    }
+
+  /* If the new offset is outside the register space, return. */
+  if (new_offset >= MAX_CALL_PARAMETER_REGS * 2)
+    return 0;
+
+  /* If the end of the argument is outside the register space, then
+     the argument must overlap the register space. Return the first
+     available register. */
+  if ((new_offset + type_size_in_units) > (MAX_CALL_PARAMETER_REGS * 2))
+    return gen_rtx_REG (HImode, new_offset / 2);
+
+  /* Create a register of the required mode to hold the parameter. */
+  reg = new_offset / 2;
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case SFmode:
+    case DImode:
+    case DFmode:
+    case SDmode:
+    case DDmode:
+    case CHImode:
+    case CSImode:
+    case SCmode:
+    case CQImode:
+      return gen_rtx_REG (mode, reg);
+
+    case BLKmode:
+      {
+	/* Empty blockmode values can be passed as arguments (e.g.,
+	 * empty structs). These require no registers
+	 * whatsoever. Non-empty blockmode values are passed in a set
+	 * of parallel registers. */
+	if (type_size_in_units == 0)
+	  return 0;
+	else
+	  return picochip_emit_register_parallel (type_size_in_units, new_offset);
+      }
+
+    default:
+      warning
+	(0, "defaulting to stack for %s register creation",
+	 GET_MODE_NAME (mode));
+      break;
+    }
+
+  return 0;
+
+}
+
+/* Determine where the next incoming function argument will
+   appear. Normally, this works in exactly the same way as
+   picochip_function_arg, except when the function in question is a
+   varadic function. In this case, the incoming arguments all appear
+   to be passed on the stack (actually, some of the arguments are
+   passed in registers, which are then pushed onto the stack by the
+   function prologue). */
+rtx
+picochip_incoming_function_arg (cumulative_args_t cum,
+				enum machine_mode mode,
+				const_tree type, bool named)
+{
+
+  if (cfun->stdarg)
+    return 0;
+  else
+    return picochip_function_arg (cum, mode, type, named);
+
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+   specified mode.  */
+unsigned int
+picochip_function_arg_boundary (enum machine_mode mode,
+				const_tree type ATTRIBUTE_UNUSED)
+{
+  int align;
+
+  if (mode == BLKmode)
+    align = STACK_BOUNDARY;
+  else
+    align = GET_MODE_ALIGNMENT (mode);
+
+  if (align < PARM_BOUNDARY)
+    align = PARM_BOUNDARY;
+
+  return align;
+
+}
+
+/* Compute partial registers. */
+int
+picochip_arg_partial_bytes (cumulative_args_t p_cum, enum machine_mode mode,
+			    tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int type_align_in_units = 0;
+  int type_size_in_units;
+  int new_offset = 0;
+  int offset_overflow = 0;
+
+  unsigned cum = *get_cumulative_args (p_cum);
+
+  /* VOIDmode is passed when computing the second argument to a `call'
+     pattern. This can be ignored. */
+  if (mode == VOIDmode)
+    return 0;
+
+  /* Compute the alignment and size of the parameter. */
+  type_align_in_units =
+    picochip_function_arg_boundary (mode, type) / BITS_PER_UNIT;
+  type_size_in_units = picochip_compute_arg_size (type, mode);
+
+  /* Compute the correct offset (i.e., ensure that the offset meets
+     the alignment requirements). */
+  offset_overflow = cum % type_align_in_units;
+  if (offset_overflow == 0)
+    new_offset = cum;
+  else
+    new_offset = (cum - offset_overflow) + type_align_in_units;
+
+  if (TARGET_DEBUG)
+    {
+      printf ("Partial function arg nregs:\n");
+      printf ("  Type valid: %s\n", (type ? "yes" : "no"));
+      printf ("  Cumulative Value: %d\n", cum);
+      printf ("  Mode: %s\n", GET_MODE_NAME (mode));
+      printf ("  Type size: %i units\n", type_size_in_units);
+      printf ("  Alignment: %i units\n", type_align_in_units);
+      printf ("  New offset: %i\n", new_offset);
+      printf ("\n");
+    }
+
+  /* If the new offset is outside the register space, return. */
+  if (new_offset >= (MAX_CALL_PARAMETER_REGS * 2))
+    return 0;
+
+  /* If the end of the argument is outside the register space, then
+     the argument must overlap the register space. Return the number
+     of bytes which are passed in registers.  */
+  if ((new_offset + type_size_in_units) > (MAX_CALL_PARAMETER_REGS * 2))
+    return ((MAX_CALL_PARAMETER_REGS * 2) - new_offset);
+
+  return 0;
+
+}
+
+/* Advance the cumulative args counter CUM. */
+void
+picochip_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+		      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int type_align_in_units = 0;
+  int type_size_in_units;
+  int new_offset = 0;
+  int offset_overflow = 0;
+
+  /* VOIDmode is passed when computing the second argument to a `call'
+     pattern. This can be ignored. */
+  if (mode == VOIDmode)
+    return;
+
+  /* Compute the alignment and size of the parameter. */
+  type_align_in_units =
+    picochip_function_arg_boundary (mode, type) / BITS_PER_UNIT;
+  type_size_in_units = picochip_compute_arg_size (type, mode);
+
+  /* Compute the correct offset (i.e., ensure that the offset meets
+     the alignment requirements). */
+  offset_overflow = *cum % type_align_in_units;
+  if (offset_overflow == 0)
+    new_offset = *cum;
+  else
+    new_offset = (*cum - offset_overflow) + type_align_in_units;
+
+  /* Advance past the last argument. */
+  new_offset += type_size_in_units;
+
+  *cum = new_offset;
+}
+
+/* Determine whether a register needs saving/restoring. It does if it
+   is live in a function, and isn't a call-used register. */
+static int
+picochip_reg_needs_saving (int reg_num)
+{
+  return df_regs_ever_live_p(reg_num) && !call_used_regs[reg_num];
+}
+
+/* Compute and return offset of the main frame. */
+static int
+picochip_frame_byte_offset (void)
+{
+  gcc_assert(picochip_is_aligned
+      (crtl->outgoing_args_size, BITS_PER_WORD));
+
+  return crtl->outgoing_args_size;
+}
+
+/* Return the size of the main frame. */
+static int
+picochip_frame_size_in_bytes (void)
+{
+  int frame_size = get_frame_size();
+  int stack_align = STACK_BOUNDARY/BITS_PER_UNIT;
+  if (!picochip_is_aligned (frame_size, STACK_BOUNDARY))
+    frame_size = frame_size + (stack_align - frame_size%stack_align);
+  gcc_assert(picochip_is_aligned (frame_size, STACK_BOUNDARY));
+  return frame_size;
+}
+
+/* Compute and return the size (in bytes) of the register save/restore
+   area for the current function. This only includes the general
+   purpose registers - the special purpose stack pointer and link
+   registers are not included in this area. */
+static int
+picochip_save_area_size_in_bytes (void)
+{
+  int num_regs_to_save = 0;
+  int i = 0;
+
+  /* Read through all the registers, determining which need to be saved. */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	num_regs_to_save += 1;
+    }
+
+  return num_regs_to_save * UNITS_PER_WORD;
+
+}
+
+/* Compute and return offset of the save area base. */
+static int
+picochip_save_area_byte_offset (void)
+{
+  int base_offset = (picochip_frame_byte_offset () +
+		     picochip_frame_size_in_bytes ());
+
+  gcc_assert(picochip_is_aligned (base_offset, BITS_PER_WORD));
+
+  return base_offset;
+
+}
+
+/* Compute and return offset of the special register save area. This
+   area can be found immediately above the normal save area. It must
+   be aligned, to allow the registers to be saved and restored as a
+   pair. */
+static int
+picochip_special_save_area_byte_offset (void)
+{
+  int byte_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  int offset = (picochip_save_area_byte_offset () +
+		picochip_save_area_size_in_bytes ());
+
+  if ((offset % byte_alignment) != 0)
+    offset = ((offset / byte_alignment) + 1) * byte_alignment;
+
+  return offset;
+
+}
+
+/* Determine whether the LNK/SP register save/restores can be eliminated. */
+static int
+picochip_can_eliminate_link_sp_save (void)
+{
+  /* This deserves some reasoning. The df_regs_ever_live_p call keeps
+    changing during optimizations phases. So, this function returns different
+    values when called from initial_elimination_offset and then again when it
+    is called from prologue/epilogue generation. This means that argument
+    accesses become wrong. This wouldnt happen only if we were not using the
+    stack at all. The following conditions ensures that.*/
+
+  return (crtl->is_leaf &&
+          !df_regs_ever_live_p(LINK_REGNUM) &&
+          !df_regs_ever_live_p(STACK_POINTER_REGNUM) &&
+          (picochip_special_save_area_byte_offset() == 0) &&
+          (crtl->args.size == 0) &&
+          (crtl->args.pretend_args_size == 0));
+}
+
+/* Compute the size of the special reg save area (SP and LNK). If the
+   SP/LNK registers don't need to be saved, this area can shrink to
+   nothing. */
+static int
+picochip_special_save_area_size_in_bytes (void)
+{
+
+
+  if (picochip_can_eliminate_link_sp_save ())
+    return 0;
+  else
+    return 2 * UNITS_PER_WORD;
+}
+
+/* Return the number of pretend arguments. If this function is
+   varadic, all the incoming arguments are effectively passed on the
+   stack. If this function has real pretend arguments (caused by a
+   value being passed partially on the stack and partially in
+   registers), then return the number of registers used. */
+static int
+picochip_pretend_arg_area_size (void)
+{
+
+  if (crtl->args.pretend_args_size != 0)
+    {
+      gcc_assert(crtl->args.pretend_args_size % 4 == 0);
+
+      return crtl->args.pretend_args_size;
+    }
+  else if (cfun->stdarg)
+    return 12;
+  else
+    return 0;
+
+}
+
+/* Compute and return the offset of the pretend arguments. The pretend
+   arguments are contiguous with the incoming arguments, and must be
+   correctly aligned. */
+static int
+picochip_pretend_arg_area_byte_offset (void)
+{
+  int base_offset = 0;
+
+  base_offset = (picochip_special_save_area_byte_offset () +
+		 picochip_special_save_area_size_in_bytes ());
+
+  gcc_assert(picochip_is_aligned (base_offset, STACK_BOUNDARY));
+  gcc_assert(picochip_is_aligned
+      (base_offset + picochip_pretend_arg_area_size (), STACK_BOUNDARY));
+
+  return base_offset;
+
+}
+
+/* Compute and return the offset of the incoming arguments. If a
+   static chain is in use, this will be passed just before the other
+   arguments.  This means that the pretend argument mechanism, used in
+   variadic functions, doesn't work properly. Thus, static chains work
+   on their own, as do variadic functions, but not the combination of
+   the two. This isn't really a problem. */
+static int
+picochip_arg_area_byte_offset (void)
+{
+  int base_offset = (picochip_pretend_arg_area_byte_offset () +
+		     picochip_pretend_arg_area_size ());
+
+  /* Add an extra 4 bytes - only an extra 16-bits are required, but
+     the alignment on a 32-bit boundary must be maintained. */
+  if (cfun->static_chain_decl != NULL)
+    {
+      gcc_assert (!cfun->stdarg);
+      base_offset += 4;
+    }
+
+  gcc_assert(picochip_is_aligned (base_offset, STACK_BOUNDARY));
+
+  return base_offset;
+
+}
+
+int
+picochip_regno_nregs (int regno ATTRIBUTE_UNUSED, int mode)
+{
+
+  /* Special case - only one register needed. */
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return 1;
+
+  /* We actually do not allocate acc0 ever. But, it seems like we need to
+  make it look like a allocatable register for the dataflow checks to work
+  properly. Note that hard_regno_mode_ok will always return 0 for acc0*/
+
+  if (regno == 16)
+    return 1;
+
+  /* General case - compute how much space in terms of units. */
+  return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+
+}
+
+int
+picochip_class_max_nregs (int reg_class, int mode)
+{
+  int size = ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+
+  if (reg_class == ACC_REGS)
+    return 1;
+
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return 1;
+  else
+    return size;
+
+}
+
+/* Eliminate a register that addresses the stack (e.g., frame pointer,
+   argument pointer) by replacing it with a constant offset from the
+   main stack register. */
+int
+initial_elimination_offset (int from, int to)
+{
+  int offset_from_sp = 0;
+
+  if (FRAME_POINTER_REGNUM == from && STACK_POINTER_REGNUM == to)
+    offset_from_sp = picochip_frame_byte_offset ();
+  else if (ARG_POINTER_REGNUM == from && STACK_POINTER_REGNUM == to)
+    offset_from_sp = picochip_pretend_arg_area_byte_offset ();
+  else
+    gcc_unreachable();
+
+  return offset_from_sp;
+
+}
+
+/* Compute and return the size of the incoming argument area. */
+static int
+picochip_arg_area_size_in_bytes (void)
+{
+  return crtl->args.size;
+}
+
+/* Determine whether the given register is valid. When the strict mode
+   is used, only hard registers are valid, otherwise any register is
+   valid. */
+static int
+picochip_legitimate_address_register (rtx x, unsigned strict)
+{
+
+  /* Sanity check - non-registers shouldn't make it here, but... */
+  if (REG != GET_CODE (x))
+    return 0;
+
+  if (strict)
+    return REGNO (x) < FIRST_NONHARD_REGISTER;
+  else
+    return 1;
+
+}
+
+/* Determine whether the given constant is in the range required for
+   the given base register. */
+static int
+picochip_const_ok_for_base (enum machine_mode mode, int regno, int offset)
+{
+  HOST_WIDE_INT corrected_offset;
+
+  if (GET_MODE_SIZE (mode) != 0)
+    {
+      if (GET_MODE_SIZE(mode) <= 4)
+      {
+         /* We used to allow incorrect offsets if strict is 0. But, this would
+            then rely on reload doing the right thing. We have had problems
+            there before, and on > 4.3 compiler, there are no benefits. */
+         if (offset % GET_MODE_SIZE (mode) != 0)
+           return 0;
+         corrected_offset = offset / GET_MODE_SIZE (mode);
+      }
+      else
+      {
+         if (offset % 4 != 0)
+           return 0;
+         corrected_offset = offset / 4;
+      }
+    }
+  else
+    {
+      /* Default to the byte offset as supplied. */
+      corrected_offset = offset;
+    }
+
+  /* The offset from the base register can be different depending upon
+     the base register.  The stack/frame/argument pointer offsets can
+     all be greater than a simple register-based offset.  Note that the
+     frame/argument pointer registers are actually eliminations of the
+     stack pointer, so a value which is valid for an offset to, for
+     example, the frame pointer, might be invalid for the stack
+     pointer once the elimination has occurred.  However, there is no
+     need to handle this special case here, as the stack offset is
+     always checked after elimination anyway, and the generated code
+     seems to have identical performance. */
+  if (regno == STACK_POINTER_REGNUM ||
+      regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
+    return picochip_const_ok_for_letter_p (corrected_offset, 'K');
+  else
+    return picochip_const_ok_for_letter_p (corrected_offset, 'J');
+
+}
+
+/* Determine whether a given rtx is a legitimate address for machine_mode
+   MODE.  STRICT is non-zero if we're being strict - any pseudo that
+   is not a hard register must be a memory reference.  */
+bool
+picochip_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  int valid = 0;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      valid = picochip_legitimate_address_register (x, strict);
+      break;
+
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx offset = XEXP (x, 1);
+        if (strict && !REGNO_OK_FOR_BASE_P (REGNO(base)))
+        {
+          valid = 0;
+          break;
+        }
+
+	valid = (REG == GET_CODE (base) &&
+		 picochip_legitimate_address_register (base, strict) &&
+		 CONST_INT == GET_CODE (offset) &&
+		 picochip_const_ok_for_base (mode, REGNO (base),
+					     INTVAL (offset)));
+	break;
+      }
+
+    case SYMBOL_REF:
+      /* The user can select whether a symbol can be used as a memory
+         address. Typically, this will decrease execution time (no
+         register load is required first), but will increase code size
+         (because the symbol will be used several times, rather than
+         loaded once into a register.*/
+      valid = TARGET_SYMBOL_AS_ADDRESS;
+      break;
+
+    case CONST:
+      {
+	/* A constant memory address must be a (plus (symbol_ref)
+	   (const_int)), and is only allowed when the symbols are
+	   permitted addresses. */
+	rtx inner = XEXP (x, 0);
+
+	valid = (TARGET_SYMBOL_AS_ADDRESS &&
+		 PLUS == GET_CODE (inner) &&
+		 SYMBOL_REF == GET_CODE (XEXP (inner, 0)) &&
+		 CONST_INT == GET_CODE (XEXP (inner, 1)));
+
+	break;
+
+      }
+
+    default:
+      valid = 0;
+    }
+
+  return valid;
+
+}
+
+/* For all memory operations, picochip allows a uconst4 offset value. It
+   is hence beneficial to turn an
+   addr = <reg + long_const>
+   ld/st addr
+
+   into
+
+   X = reg + long_const & FFF0
+   diff = long_const - (long_const & FFF0)
+   ld/st <X + diff>
+
+   X can be reused in subsequent memory operations.
+   */
+rtx
+picochip_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+                             enum machine_mode mode)
+{
+  unsigned mask_val;
+
+  if (!optimize)
+    return x;
+
+  /* Depending on mode, the offsets allowed are either 16/32/64.*/
+  switch (mode)
+    {
+      case QImode:
+        mask_val = 0xFFF0;
+        break;
+      case HImode:
+        mask_val = 0xFFE0;
+        break;
+      case SImode:
+        mask_val = 0xFFC0;
+        break;
+      default:
+        return x;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      int high_val, low_val, offset;
+      offset = INTVAL (XEXP (x, 1));
+      /* Ignore cases with negative offsets.  */
+      if (offset < 0)
+        return x;
+      high_val = offset & mask_val;
+      low_val = offset - high_val;
+      if (high_val != 0)
+        {
+          rtx temp_reg = force_reg (Pmode, gen_rtx_PLUS (Pmode, XEXP (x, 0), GEN_INT(high_val)));
+          x = gen_rtx_PLUS (Pmode, temp_reg, GEN_INT(low_val));
+          return x;
+        }
+    }
+  return x;
+}
+
+/* For all memory operations, picochip allows a uconst4 offset value. It
+   is hence beneficial to turn an
+   addr = <reg + long_const>
+   ld/st addr
+
+   into
+
+   X = reg + long_const & FFF0
+   diff = long_const - (long_const & FFF0)
+   ld/st <X + diff>
+
+   X can be reused in subsequent memory operations.
+   */
+int
+picochip_legitimize_reload_address (rtx *x,
+                                    enum machine_mode mode,
+                                    int opnum, int type,
+                                    int ind_levels ATTRIBUTE_UNUSED)
+{
+  unsigned mask_val;
+
+  if (picochip_symbol_offset(*x))
+    {
+      *x = gen_rtx_CONST(mode, *x);
+      return 0;
+    }
+  if (!optimize)
+    return 0;
+
+  /* We should recognise addresses that we created.*/
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (*x, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (*x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT)
+    {
+      push_reload (XEXP (*x, 0), NULL_RTX, &XEXP (*x, 0), NULL,
+                   BASE_REG_CLASS, GET_MODE (*x), VOIDmode, 0, 0,
+                   opnum, (enum reload_type)type);
+      return 1;
+    }
+
+  /* Depending on mode, the offsets allowed are either 16/32/64.  */
+  switch (mode)
+    {
+      case QImode:
+        mask_val = 0xFFF0;
+        break;
+      case HImode:
+        mask_val = 0xFFE0;
+        break;
+      case SImode:
+        mask_val = 0xFFC0;
+        break;
+      default:
+        return 0;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == REG
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT)
+    {
+      int high_val, low_val, offset;
+      offset = INTVAL (XEXP (*x, 1));
+      /* Ignore cases with negative offsets.  */
+      if (offset < 0)
+        return 0;
+      high_val = offset & mask_val;
+      low_val = offset - high_val;
+      if (high_val != 0)
+        {
+          rtx temp_reg = gen_rtx_PLUS (Pmode, XEXP (*x, 0), GEN_INT(high_val));
+          *x = gen_rtx_PLUS (Pmode, temp_reg, GEN_INT(low_val));
+          push_reload (XEXP (*x, 0), NULL_RTX, &XEXP (*x, 0), NULL,
+                       BASE_REG_CLASS, GET_MODE (*x), VOIDmode, 0, 0,
+                       opnum, (enum reload_type)type);
+          return 1;
+        }
+    }
+
+  return 0;
+}
+
+/* Detect an rtx which matches (plus (symbol_ref) (const_int)). */
+int
+picochip_symbol_offset (rtx operand)
+{
+
+  return (PLUS == GET_CODE (operand) &&
+	  SYMBOL_REF == GET_CODE (XEXP (operand, 0)) &&
+	  CONST_INT == GET_CODE (XEXP (operand, 1)));
+
+}
+
+/* Assembly output. */
+
+/* The format here should match the format used in the output of
+   symbol_ref's elsewhere in this file. */
+void
+picochip_output_label (FILE * stream, const char name[])
+{
+  int is_cfi_label = (strncmp (name, "picoMark_LCFI", 13) == 0);
+
+  /* If VLIW scheduling is in use, any Call Frame Information labels
+     generated inside a packet must have their output deferred until
+     the end of the packet. */
+  if (picochip_schedule_type == DFA_TYPE_SPEED &&
+      is_cfi_label && picochip_vliw_continuation)
+    {
+      if (picochip_current_vliw_state.num_cfi_labels_deferred == 2)
+      {
+        internal_error ("LCFI labels have already been deferred");
+      }
+      strcpy (picochip_current_vliw_state.cfi_label_name[
+                picochip_current_vliw_state.num_cfi_labels_deferred], name);
+      picochip_current_vliw_state.num_cfi_labels_deferred++;
+    }
+  else
+    {
+      assemble_name (stream, name);
+
+      if (strncmp (name, "picoMark_", 9) == 0)
+	fprintf (stream, "=\n");
+      else
+	fprintf (stream, ":\n");
+
+    }
+
+}
+
+/* The format here should match the format used in the output of
+   symbol_ref's elsewhere in this file. */
+void
+picochip_output_labelref (FILE * stream, const char name[])
+{
+  fprintf (stream, "_%s", name);
+}
+
+void
+picochip_weaken_label (FILE * stream, const char name[])
+{
+  fprintf (stream, ".weak ");
+  assemble_name (stream, name);
+  fprintf (stream, "\n");
+}
+
+/* Return true if the given label (or label prefix) denotes a marker
+   label which should be emitted in the form LABEL= */
+static int
+picochip_is_marker_prefix (const char *prefix)
+{
+  return (strcmp (prefix, "L") != 0 && strcmp (prefix, "LC") != 0
+          && strcmp (prefix, "LP") != 0);
+}
+
+void
+picochip_output_internal_label (FILE * stream, const char *prefix,
+				unsigned long num)
+{
+
+  /* Emit different types of label, based upon their prefix. They
+     are handled differently to allow the assembler to ensure that
+     branch target labels are properly aligned, while other labels
+     will only serve as code markers, not branch targets. Aligning
+     labels unnecessarily can result in much code wastage. */
+  if (picochip_is_marker_prefix (prefix))
+    {
+      /* Special label marker. If it appears in the middle of a VLIW
+         packet, defer it until the end of the packet. There has
+         never been a need to handle more than one lm label at a time. */
+      if (picochip_schedule_type == DFA_TYPE_SPEED &&
+	  (strcmp (prefix, "LM")) == 0 && picochip_vliw_continuation)
+	{
+	  if (strlen (picochip_current_vliw_state.lm_label_name) != 0)
+	    internal_error ("LM label has already been deferred");
+
+	  sprintf (picochip_current_vliw_state.lm_label_name,
+		   "picoMark_%s%ld", prefix, num);
+	}
+      else if (picochip_schedule_type == DFA_TYPE_SPEED &&
+	  (strcmp (prefix, "LCFI")) == 0 && picochip_vliw_continuation)
+	{
+          if (picochip_current_vliw_state.num_cfi_labels_deferred == 2)
+          {
+            internal_error ("LCFI labels have already been deferred.");
+          }
+          sprintf(picochip_current_vliw_state.cfi_label_name[
+                    picochip_current_vliw_state.num_cfi_labels_deferred], 
+                  "picoMark_%s%ld", prefix, num);
+          picochip_current_vliw_state.num_cfi_labels_deferred++;
+	}
+      else
+	{
+	  /* Marker label. */
+	  fprintf (stream, "_picoMark_%s%ld=\n", prefix, num);
+	}
+
+    }
+  else
+    {
+      /* Normal label. */
+      fprintf (stream, "_%s%ld:\n", prefix, num);
+    }
+
+}
+
+void
+picochip_generate_internal_label (char *str, const char *prefix, long num)
+{
+  /* Two types of internal label can be generated: branch target
+     labels and code marker labels. Branch target labels must always
+     be aligned (since code will execute at these
+     points). Differentiate between the two by prepending markers with
+     a unique prefix, which can later be used in output_label to
+     figure out which label syntax to use. */
+  if (picochip_is_marker_prefix (prefix))
+    sprintf (str, "picoMark_%s%ld", prefix, num);
+  else
+    sprintf (str, "%s%ld", prefix, num);
+
+}
+
+void
+picochip_asm_output_anchor (rtx symbol)
+{
+  fprintf (asm_out_file, ".offsetData _%s, ",XSTR (symbol, 0));
+  fprintf (asm_out_file, "+ " HOST_WIDE_INT_PRINT_DEC"\n",SYMBOL_REF_BLOCK_OFFSET(symbol));
+}
+
+void
+picochip_output_aligned_common (FILE * stream, const char *name,
+				unsigned size, unsigned alignment)
+{
+
+  fprintf (stream, ".commonData ");
+  assemble_name (stream, name);
+  fprintf (stream, ", %u, %u\n", size, alignment / 8);
+  picochip_output_global (stream, name);
+
+}
+
+void
+picochip_output_aligned_local (FILE * stream, const char *name,
+			       unsigned size, unsigned alignment)
+{
+
+  fprintf (stream, ".commonData ");
+  assemble_name (stream, name);
+  fprintf (stream, ", %u, %u\n", size, alignment / 8);
+
+}
+
+void
+picochip_output_global (FILE * stream, const char *name)
+{
+  fprintf (stream, ".global ");
+  assemble_name (stream, name);
+  fprintf (stream, "\n");
+}
+
+/* Output an assembly language string. Output as a sequence of decimal
+   numbers, followed by the literal string to make it obvious what the
+   numbers represent. */
+void
+picochip_output_ascii (FILE * file, const char *str, int length)
+{
+  int i = 0;
+
+  fprintf (file, ".ascii ");
+
+  for (i = 0; i < length; ++i)
+    {
+      fprintf (file, "16#%x# ", (char) (str[i]));
+    }
+
+  fprintf (file, "  ; ");
+
+  for (i = 0; i < length; ++i)
+    {
+      char c = str[i];
+
+      switch (c)
+	{
+	case '\n':
+	  fprintf (file, "\\n");
+	  break;
+	case '\t':
+	  fprintf (file, "\\t");
+	  break;
+	case '\0':
+	  fprintf (file, "\\0");
+	  break;
+	default:
+	  fprintf (file, "%c", c);
+	}
+
+    }
+
+  fprintf (file, "\n");
+
+}
+
+/* Output the beginning of an ASM file. */
+void
+picochip_asm_file_start (void)
+{
+  default_file_start ();
+
+  fprintf (asm_out_file, "// picoChip ASM file\n");
+  fprintf (asm_out_file, "//.file \"%s\"\n", main_input_filename);
+
+  fprintf (asm_out_file, "// Has byte access: %s\n",
+	   (TARGET_HAS_BYTE_ACCESS ? "Yes" : "No"));
+
+  if (TARGET_HAS_MUL_UNIT)
+    fprintf (asm_out_file, "// Has multiply: Yes (Multiply unit)\n");
+  else if (TARGET_HAS_MAC_UNIT)
+    fprintf (asm_out_file, "// Has multiply: Yes (Mac unit)\n");
+  else
+    fprintf (asm_out_file, "// Has multiply: No\n");
+}
+
+/* Output the end of an ASM file. */
+void
+picochip_asm_file_end (void)
+{
+  /* Include a segment end to make it easy for PERL scripts to grab
+     segments. This is now done by assembler*/
+
+  fprintf (asm_out_file, "// End of picoChip ASM file\n");
+
+}
+
+/* Output frame debug information to the given stream. */
+static void
+picochip_output_frame_debug (FILE * file)
+{
+  int i = 0;
+
+  if (crtl->is_leaf)
+    fprintf (file, "\t\t// Leaf function\n");
+  else
+    fprintf (file, "\t\t// Non-leaf function\n");
+
+  if (picochip_can_eliminate_link_sp_save ())
+    fprintf (file, "\t\t// Link/fp save/restore can be eliminated\n");
+
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\t\t// Static chain in use\n");
+
+  fprintf (file, "\t\t// Incoming argument size: %d bytes\n",
+	   picochip_arg_area_size_in_bytes ());
+  fprintf (file, "\t\t// Incoming arg offset: %d\n",
+	   picochip_arg_area_byte_offset ());
+  fprintf (file, "\t\t// Pretend arg size: %d\n",
+	   picochip_pretend_arg_area_size ());
+  fprintf (file, "\t\t// Pretend arg offset (ARGP): %d\n",
+	   picochip_pretend_arg_area_byte_offset ());
+  fprintf (file, "\t\t// Special reg area size: %d bytes\n",
+	   picochip_special_save_area_size_in_bytes ());
+  fprintf (file, "\t\t// Special reg area offset: %d\n",
+	   picochip_special_save_area_byte_offset ());
+
+  /* Output which registers are saved. */
+  fprintf (file, "\t\t// Saved regs: ");
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	fprintf (file, "%s ", picochip_regnames[i]);
+    }
+  fprintf (file, "\t\t\n");
+
+  fprintf (file, "\t\t// Save area size: %d bytes\n",
+	   picochip_save_area_size_in_bytes ());
+  fprintf (file, "\t\t// Save area offset: %d\n",
+	   picochip_save_area_byte_offset ());
+
+  fprintf (file, "\t\t// Frame size: %ld bytes\n", get_frame_size ());
+  fprintf (file, "\t\t// Frame offset (FP): %d\n",
+	   picochip_frame_byte_offset ());
+
+  fprintf (file, "\t\t// Outgoing argument area size: %d bytes\n",
+	   crtl->outgoing_args_size);
+
+}
+
+/* Output picoChip function prologue. This contains human-readable
+   information about the function. */
+void
+picochip_function_prologue (FILE * file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Get the function's name, as described by its RTL.  This may be
+     different from the DECL_NAME name used in the source file.  The
+     real declaration name must be used, to ensure that the prologue
+     emits the right information for the linker. */
+  rtx x;
+  const char *fnname;
+  x = DECL_RTL (current_function_decl);
+  gcc_assert (MEM_P (x));
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  fnname = XSTR (x, 0);
+
+  /* Note that the name of the function is given in the &_%s
+     form. This matches the name of the function as used in labels,
+     and function calls, and enables processCallGraph to match
+     function calls to the name of the function, as defined here. */
+  fprintf (file, "// picoChip Function Prologue : &_%s = %d bytes\n",
+	   fnname, picochip_arg_area_byte_offset ());
+
+  picochip_output_frame_debug (file);
+  fprintf (file, "\n");
+
+}
+
+/* Output picoChip function epilogue. */
+void
+picochip_function_epilogue (FILE * file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+
+  rtx x;
+  const char *fnname;
+  x = DECL_RTL (current_function_decl);
+  gcc_assert (MEM_P (x));
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  fnname = XSTR (x, 0);
+  fprintf (file, "\n// picoChip Function Epilogue : %s\n\n",
+	   fnname);
+}
+
+/* Manipulate the asm output. Some machines only execute the code when
+   there is actually a chance of needing it (e.g., FRV doesn't execute
+   it if the scheduling pass wasn't used). We always execute it,
+   simple to ensure that it is exercised more often, and bugs are more
+   likely to be found.
+
+   This function's prime reason for existence is to insert the VLIW
+   separators where appropriate. The separators must be inserted
+   before any comments which appear at the end of the file.
+
+*/
+const char *
+picochip_asm_output_opcode (FILE * f, const char *ptr)
+{
+  int c;
+
+  /* Flag to specify when a VLIW continuation has been inserted onto
+     the line. Continuations are either inserted before any comments,
+     or before the end of the line is reached. The flag ensures that
+     we don't insert continuations twice (i.e., at the comment and the
+     end of line). */
+  int continuation_inserted = 0;
+
+  /* If the instruction uses multiple lines (i.e., a new line
+     character appears in the opcode), then ensure that no attempt is
+     made to pack it into a VLIW. */
+  if (strchr (ptr, '\n') != NULL && picochip_vliw_continuation)
+    internal_error
+      ("picochip_asm_output_opcode - Found multiple lines in VLIW packet %s",
+       ptr);
+
+
+  /* If a delay slot is pending, output the directive to the assembler
+     before the instruction. */
+  if (picochip_is_delay_slot_pending)
+    {
+      picochip_is_delay_slot_pending = 0;
+      fputs ("=->\t", f);
+    }
+
+  /* Keep going for entire opcode. All substitution performed ourselves. */
+  while (*ptr)
+    {
+      c = *ptr++;
+
+      /* Determine whether a VLIW continuation must be inserted before
+         any comments, or the end of the opcode. A flag is set to show
+         that we have inserted a continuation on this line, so that we
+         don't try to insert another continuation when the end of the
+         opcode is reached. The only other case for a continuation
+         might have been a newline, but these aren't allowed in
+         conjunction with VLIW continuations (see above code). */
+      if (picochip_vliw_continuation &&
+	  !continuation_inserted &&
+	  ((c == '/' && (*ptr == '/')) || *ptr == '\0'))
+	{
+	  fprintf (f, "\\ ");
+	  continuation_inserted = 1;
+	}
+
+      /* Detect an explicit VLIW separator. */
+      if (c == '%' && (*ptr == '|'))
+	{
+	  fprintf (f, "\\");
+	  ptr++;
+	}
+      /* Detect the need for an ALU id operand. */
+      else if (c == '%' && (*ptr == '#'))
+	{
+	  fputc (picochip_get_vliw_alu_id (), f);
+
+	  if (TARGET_DEBUG)
+	    printf ("Generated ALU char at %s for insn %d\n", ptr,
+		    INSN_UID (picochip_current_prescan_insn));
+
+	  /* Skip past unwanted # */
+	  ptr++;
+	}
+      /* Detect the need for branch delay slot. */
+      else if (c == '%' && (*ptr == '>'))
+	{
+	  /* Only emit delay slots (NOP's, or otherwise) when delay
+	   * slot scheduling has actually been enabled, otherwise VLIW
+	   * scheduling and delay slot scheduling output combine to
+	   * produce nasty effects. */
+	  if (flag_delayed_branch)
+	    {
+	      if (dbr_sequence_length () == 0)
+		fputs ("\n=->\tNOP", f);
+	      else
+		picochip_is_delay_slot_pending = 1;
+	    }
+
+	  /* Skip past unwanted > */
+	  ptr++;
+	}
+      /* Detect any %digit specifiers. */
+      else if (c == '%' && (*ptr >= '0' && *ptr <= '9'))
+	{
+	  c = atoi (ptr);
+	  picochip_print_operand (f, recog_data.operand[c], 0);
+	  while ((c = *ptr) >= '0' && c <= '9')
+	    ptr++;
+	}
+      /* Detect any %letterdigit specifiers. */
+      else if (c == '%' && ((*ptr >= 'a' && *ptr <= 'z')
+			    || (*ptr >= 'A' && *ptr <= 'Z')))
+	{
+	  int letter = *ptr++;
+
+	  c = atoi (ptr);
+
+	  switch (letter)
+	    {
+	    case 'l':
+	      output_asm_label (recog_data.operand[c]);
+	      break;
+
+	    case 'a':
+	      output_address (recog_data.operand[c]);
+	      break;
+
+	    default:
+	      picochip_print_operand (f, recog_data.operand[c], letter);
+	    }
+
+	  while ((c = *ptr) >= '0' && c <= '9')
+	    ptr++;
+	}
+      else if (c == '%')
+	internal_error
+	  ("picochip_asm_output_opcode - can%'t output unknown operator %c",
+	   *ptr);
+      else
+	fputc (c, f);
+    }
+
+  /* Reached the end of the packet. If any labels were deferred
+     during output, emit them now. */
+  if (!picochip_vliw_continuation)
+    {
+      if (picochip_current_vliw_state.num_cfi_labels_deferred != 0)
+	{
+	  fprintf (f, "\n");
+	  assemble_name (f, picochip_current_vliw_state.cfi_label_name[0]);
+	  fprintf (f, "=");
+          if (picochip_current_vliw_state.num_cfi_labels_deferred == 2)
+          {
+	    fprintf (f, "\n");
+	    assemble_name (f, picochip_current_vliw_state.cfi_label_name[1]);
+	    fprintf (f, "=");
+          }
+	}
+
+      if (strlen (picochip_current_vliw_state.lm_label_name) != 0)
+	{
+	  fprintf (f, "\n");
+	  assemble_name (f, picochip_current_vliw_state.lm_label_name);
+	  fprintf (f, "=");
+	}
+    }
+
+  /* Output an end-of-packet marker if requested. */
+  if (!picochip_vliw_continuation &&
+      TARGET_DEBUG && picochip_schedule_type == DFA_TYPE_SPEED)
+    fprintf (f, "\n\t//-------------- End of VLIW packet -----------------");
+
+  return ptr;
+}
+
+/* Function RTL expansion. */
+
+/* Expand the prologue into RTL. */
+void
+picochip_expand_prologue (void)
+{
+  int stack_adjustment = 0;
+  int special_save_offset = 0;
+  int general_save_offset = 0;
+  int reg_save_offset = 0;
+  int i = 0;
+
+  stack_adjustment = picochip_arg_area_byte_offset ();
+  general_save_offset =
+    -(stack_adjustment - picochip_save_area_byte_offset ());
+  special_save_offset =
+    -(stack_adjustment - picochip_special_save_area_byte_offset ());
+
+  /* Save the link registers. We could try to save just one register
+     here. This would reduce the amount of stack space required.
+     There hasn't been a good reason to do that so far. */
+  if (!picochip_can_eliminate_link_sp_save ())
+    picochip_emit_save_register (gen_rtx_REG (SImode, LINK_REGNUM),
+				 special_save_offset);
+
+  /* Save callee-save registers. */
+  reg_save_offset = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	{
+
+	  /* If this register is an even numbered register, and the
+	     next register also needs to be saved, use a SImode save,
+	     which does both in one instruction. Note that a special
+	     check is performed to ensure that the double word aligned
+	     store is valid (e.g., it is possible that r6, r8, r9 need
+	     to be saved, in which case once r6 has been saved, the
+	     stored offset is no longer aligned, and an STL/LDL
+	     instruction becomes invalid). Alternately, we could store all
+	     aligned registers first and then save the single one(s). */
+	  if ((i % 2 == 0) &&
+	      picochip_reg_needs_saving (i + 1) &&
+	      picochip_is_aligned (reg_save_offset, LONG_TYPE_SIZE))
+	    {
+	      picochip_emit_save_register (gen_rtx_REG (SImode, i),
+					   general_save_offset +
+					   reg_save_offset);
+	      reg_save_offset += 2 * UNITS_PER_WORD;
+	      i++;
+	    }
+	  else
+	    {
+	      picochip_emit_save_register (gen_rtx_REG (HImode, i),
+					   general_save_offset +
+					   reg_save_offset);
+	      reg_save_offset += UNITS_PER_WORD;
+	    }
+	}
+
+    }
+
+  /* Emit a stack adjustment where required. */
+  if (stack_adjustment != 0)
+    picochip_emit_stack_allocate (stack_adjustment);
+
+  /* If this function uses varadic arguments, write any unnamed
+     registers to the stack. */
+  if (cfun->stdarg)
+    {
+      int stdarg_offset = picochip_pretend_arg_area_byte_offset ();
+
+      /* Sanity check. The pretend argument offset should be 32-bit aligned. */
+      gcc_assert(picochip_pretend_arg_area_byte_offset () % 4 == 0);
+
+      picochip_emit_save_register (gen_rtx_REG (SImode, 0), stdarg_offset);
+      picochip_emit_save_register (gen_rtx_REG (SImode, 2),
+				   stdarg_offset + 4);
+      picochip_emit_save_register (gen_rtx_REG (SImode, 4),
+				   stdarg_offset + 8);
+
+    }
+
+}
+
+/* Expand the epilogue into RTL. */
+void
+picochip_expand_epilogue (int is_sibling_call ATTRIBUTE_UNUSED)
+{
+  int stack_adjustment = 0;
+  int special_save_offset = 0;
+  int general_save_offset = 0;
+  int reg_save_offset = 0;
+  int i = 0;
+  int use_link_fp_restore_stack_adjust = 0;	/* Default to using an explicit
+						   stack restore. */
+
+  stack_adjustment = picochip_arg_area_byte_offset ();
+  general_save_offset =
+    -(stack_adjustment - picochip_save_area_byte_offset ());
+  special_save_offset =
+    -(stack_adjustment - picochip_special_save_area_byte_offset ());
+
+  /* Emit a stack adjustment where required. */
+  if (stack_adjustment != 0)
+    {
+      /* If the link/fp is already being restored, and the offset to
+         their save location is small enough, don't bother adjusting
+         the stack explicitly. */
+      if (picochip_special_save_area_byte_offset () < 512 &&
+	  !picochip_can_eliminate_link_sp_save ())
+	use_link_fp_restore_stack_adjust = 1;
+      else
+	/* Explicitly restore the stack. */
+	picochip_emit_stack_allocate (-stack_adjustment);
+    }
+
+  /* Restore the Link/FP registers. Only save the link register? */
+  if (!picochip_can_eliminate_link_sp_save ())
+    {
+      if (use_link_fp_restore_stack_adjust)
+	picochip_emit_restore_register (gen_rtx_REG (SImode, LINK_REGNUM),
+					picochip_special_save_area_byte_offset
+					());
+      else
+	picochip_emit_restore_register (gen_rtx_REG (SImode, LINK_REGNUM),
+					special_save_offset);
+    }
+
+  /* Restore callee-save registers. */
+  reg_save_offset = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	{
+
+	  /* If this register is an even numbered register, and the
+	     next register also needs to be saved, use a SImode save,
+	     which does both in one instruction. Note that a special
+	     check is performed to ensure that the double word aligned
+	     store is valid (e.g., it is possible that r6, r8, r9 need
+	     to be saved, in which case once r6 has been saved, the
+	     stored offset is no longer aligned, and an STL/LDL
+	     instruction becomes invalid). We could store all aligned
+	     registers first, and then save the single one(s). */
+	  if ((i % 2 == 0) &&
+	      picochip_reg_needs_saving (i + 1) &&
+	      picochip_is_aligned (reg_save_offset, LONG_TYPE_SIZE))
+	    {
+	      picochip_emit_restore_register (gen_rtx_REG (SImode, i),
+					      general_save_offset +
+					      reg_save_offset);
+	      reg_save_offset += 2 * UNITS_PER_WORD;
+	      i++;
+	    }
+	  else
+	    {
+	      picochip_emit_restore_register (gen_rtx_REG (HImode, i),
+					      general_save_offset +
+					      reg_save_offset);
+	      reg_save_offset += UNITS_PER_WORD;
+	    }
+	}
+
+    }
+
+  /* Emit a return instruction, which matches a (parallel
+     [(return) (use r12)]) */
+  {
+    rtvec p;
+    p = rtvec_alloc (2);
+
+    RTVEC_ELT (p, 0) = ret_rtx;
+    RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode,
+				    gen_rtx_REG (Pmode, LINK_REGNUM));
+    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+  }
+
+}
+
+/* Assembly instruction output. */
+
+/* Test whether the given branch instruction is short, or long. Short
+ * branches are equivalent to real branches, and may be DFA
+ * scheduled. Long branches expand to a macro which is handled by the
+ * elaborator, and cannot be scheduled. Occasionally, the branch
+ * shortening pass, which is run after DFA scheduling, will change the
+ * code layout and cause the short branch to be reverted into a long
+ * branch. Instead of having to fix this up by emitting new assembly,
+ * the short branch is emitted anyway. There is plenty of slack in the
+ * calculation of long and short branches (10-bit offset, but only
+ * 9-bits used in computation), so there is enough slack for this to
+ * be safe. */
+static int
+picochip_is_short_branch (rtx insn)
+{
+  int isRealShortBranch = (get_attr_length(insn) == SHORT_BRANCH_LENGTH);
+
+  return (isRealShortBranch ||
+	  picochip_current_vliw_state.num_insns_in_packet > 1);
+}
+
+/* Output a compare-and-branch instruction (matching the cbranch
+   pattern). */
+const char *
+picochip_output_cbranch (rtx operands[])
+{
+
+  if (HImode != GET_MODE (operands[1]) ||
+      (HImode != GET_MODE (operands[2]) &&
+       GET_CODE (operands[2]) != CONST_INT))
+    {
+      internal_error ("%s: at least one operand can%'t be handled",
+		      __FUNCTION__);
+    }
+
+  /* Use the type of comparison to output the appropriate condition
+     test. */
+  switch (GET_CODE (operands[0]))
+    {
+    case NE:
+      return ("// if (%1 != %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPNE %l3");
+
+    case EQ:
+      return ("// if (%1 == %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPEQ %l3");
+
+    case LE:
+      /* Reverse the operand order to be GE */
+      return ("// if (%1 <= %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPGE %l3");
+
+    case LEU:
+      /* Reverse operand order of GEU. */
+      return ("// if (%1 <= %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPHS %l3");
+
+    case GE:
+      return ("// if (%1 >= %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPGE %l3");
+
+    case GEU:
+      return ("// if (%1 >= %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPHS %l3");
+
+    case LT:
+      return ("// if (%1 < %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPLT %l3");
+
+    case LTU:
+      return ("// if (%1 <{U} %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPLO %l3");
+
+    case GT:
+      /* Reversed operand version of LT. */
+      return ("// if (%1 > %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPLT %l3");
+
+    case GTU:
+      /* Reverse an LTU. */
+      return ("// if (%1 >{U} %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPLO %l3");
+
+    default:
+      gcc_unreachable();
+    }
+}
+
+/* Output a compare-and-branch instruction (matching the cbranch
+   pattern). This function is current unused since the cbranch
+   split is disabled. The function is kept around so we can use
+   it when we understand how to do cbranch split safely. */
+const char *
+picochip_output_compare (rtx operands[])
+{
+  int code;
+
+  if (HImode != GET_MODE (operands[1]) ||
+      (HImode != GET_MODE (operands[2]) &&
+       GET_CODE (operands[2]) != CONST_INT))
+    {
+      internal_error ("%s: at least one operand can%'t be handled",
+		      __FUNCTION__);
+    }
+
+  code = GET_CODE (operands[0]);
+  /* Use the type of comparison to output the appropriate condition
+     test. */
+  switch (code)
+    {
+    case NE:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case EQ:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case LE:
+      /* Reverse the operand order to be GE */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    case LEU:
+      /* Reverse operand order of GEU. */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    case GE:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case GEU:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case LT:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case LTU:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case GT:
+      /* Reversed operand version of LT. */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    case GTU:
+      /* Reverse an LTU. */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    default:
+      gcc_unreachable();
+    }
+}
+
+/* Output the branch insn part of a compare-and-branch split. */
+const char *
+picochip_output_branch (rtx operands[], rtx insn)
+{
+
+  int code = GET_CODE(operands[2]);
+  if (picochip_is_short_branch (insn))
+    {
+      /* Short branches can be output directly using the
+         appropriate instruction. */
+      switch (code)
+	{
+	case NE:
+	  return ("BNE %l0 %>");
+	case EQ:
+	  return ("BEQ %l0 %>");
+	case LE:
+	  return ("BGE %l0 %>");
+	case LEU:
+	  return ("BHS %l0 %>");
+	case GE:
+	  return ("BGE %l0 %>");
+	case GEU:
+	  return ("BHS %l0 %>");
+	case LT:
+	  return ("BLT %l0 %>");
+	case LTU:
+	  return ("BLO %l0 %>");
+	case GT:
+	  return ("BLT %l0 %>");
+	case GTU:
+	  return ("BLO %l0 %>");
+	default:
+	  internal_error ("unknown short branch in %s (type %d)",
+			  __FUNCTION__, (int) INTVAL (operands[1]));
+	  return "UNKNOWN_BRANCH";
+	}
+    }
+  else
+    {
+      /* Long branches result in the emission of a special
+         instruction, which the assembler expands into a suitable long
+         branch. */
+
+      /* Use the type of comparison to output the appropriate condition
+         test. */
+      switch (code)
+	{
+	case NE:
+	  return ("JMPNE %l0 %>");
+	case EQ:
+	  return ("JMPEQ %l0 %>");
+	case LE:
+	  return ("JMPGE %l0 %>");
+	case LEU:
+	  return ("JMPHS %l0 %>");
+	case GE:
+	  return ("JMPGE %l0 %>");
+	case GEU:
+	  return ("JMPHS %l0 %>");
+	case LT:
+	  return ("JMPLT %l0 %>");
+	case LTU:
+	  return ("JMPLO %l0 %>");
+	case GT:
+	  return ("JMPLT %l0 %>");
+	case GTU:
+	  return ("JMPLO %l0 %>");
+
+	default:
+	  internal_error ("unknown long branch in %s (type %d)",
+			  __FUNCTION__, (int) INTVAL (operands[1]));
+	  return "UNKNOWN_BRANCH";
+	}
+
+    }
+}
+
+/* Output a jump instruction. */
+const char *
+picochip_output_jump (rtx insn)
+{
+  if (picochip_is_short_branch (insn))
+    return "BRA %l0%>";
+  else
+    return "JMPRA %l0%>";
+}
+
+const char *
+picochip_output_put_array (int alternative, rtx operands[])
+{
+  /* Local output buffer. */
+  char buf[256];
+
+  int portArraySize = INTVAL(operands[1]);
+  int portBaseIndex = INTVAL(operands[2]);
+
+  if (alternative == 0)
+    {
+      sprintf (buf, "// Array put\n\tadd.0 [lsl %%0,2],&__commTable_put_%d_%d,lr\n\tjl (lr)",
+	       portArraySize, portBaseIndex);
+      output_asm_insn (buf, operands);
+    }
+  else if (alternative == 1)
+    {
+      /* Constant port id. Emit a real instruction. */
+      int portIndex = INTVAL(operands[0]) + portBaseIndex;
+      if (portIndex < portBaseIndex ||
+	  portIndex >= (portBaseIndex + portArraySize))
+	{
+	  error ("PUT uses port array index %d, which is out of range [%d..%d)",
+		 portIndex, portBaseIndex, portBaseIndex + portArraySize);
+	}
+      sprintf(buf, "PUT R[0:1],%d", portIndex);
+      output_asm_insn (buf, operands);
+    }
+  else
+    gcc_unreachable();
+
+  /* Both alternatives output the insn directly. */
+  return "";
+}
+
+const char *picochip_output_get_array (int alternative, rtx operands[])
+{
+  /* Local output buffer. */
+  char buf[256];
+
+  int portArraySize = INTVAL(operands[1]);
+  int portBaseIndex = INTVAL(operands[2]);
+
+  if (alternative == 0)
+    {
+      sprintf (buf, "// Array get\n\tadd.0 [lsl %%0,2],&__commTable_get_%d_%d,lr\n\tjl (lr)",
+	       portArraySize, portBaseIndex);
+      output_asm_insn (buf, operands);
+    }
+  else if (alternative == 1)
+    {
+      /* Constant port id. Emit a real instruction. */
+      int portIndex = INTVAL(operands[0]) + portBaseIndex;
+      if (portIndex < portBaseIndex ||
+	  portIndex >= (portBaseIndex + portArraySize))
+	{
+	  error ("GET uses port array index %d, which is out of range [%d..%d)",
+		 portIndex, portBaseIndex, portBaseIndex + portArraySize);
+	}
+      sprintf(buf, "GET %d,R[0:1]", portIndex);
+      output_asm_insn (buf, operands);
+    }
+  else
+    gcc_unreachable();
+
+  /* Both alternatives output the insn directly. */
+  return "";
+}
+
+const char *picochip_output_testport_array (int alternative, rtx operands[])
+{
+  /* Local output buffer. */
+  char buf[256];
+
+  int portArraySize = INTVAL(operands[2]);
+  int portBaseIndex = INTVAL(operands[3]);
+
+  if (alternative == 0)
+    {
+      sprintf (buf, "// Array tstport\n\tadd.0 [lsl %%1,2],&__commTable_tstport_%d_%d,lr\n\tjl (lr)\n=->\tcopy.0 0,%%0\n\tcopyeq 1,%%0",
+	       portArraySize, portBaseIndex);
+      output_asm_insn (buf, operands);
+    }
+  else if (alternative == 1)
+    {
+      /* Constant port id. Emit a real instruction. */
+      int portIndex = INTVAL(operands[1]) + portBaseIndex;
+      if (portIndex < portBaseIndex ||
+	  portIndex >= (portBaseIndex + portArraySize))
+	{
+	  error ("PUT uses port array index %d, which is out of range [%d..%d)",
+		 portIndex, portBaseIndex, portBaseIndex + portArraySize);
+	}
+      sprintf(buf, "copy.1 0,%%0 %%| TSTPORT %d\n\tcopyeq 1,%%0", portIndex);
+      output_asm_insn (buf, operands);
+    }
+  else
+    gcc_unreachable();
+
+  /* Both alternatives output the insn directly. */
+  return "";
+}
+
+/* Output a comparison operand as a symbol (e.g., >). */
+static void
+picochip_print_comparison (FILE * file, rtx operand, int letter)
+{
+
+  if (letter == 'i')
+    {
+      /* Output just the comparison symbol. */
+      switch (GET_CODE (operand))
+	{
+	case NE:
+	  fprintf (file, "!=");
+	  break;
+	case EQ:
+	  fprintf (file, "==");
+	  break;
+	case GE:
+	  fprintf (file, ">=");
+	  break;
+	case GEU:
+	  fprintf (file, ">={U}");
+	  break;
+	case LT:
+	  fprintf (file, "<");
+	  break;
+	case LTU:
+	  fprintf (file, "<{U}");
+	  break;
+	case LE:
+	  fprintf (file, "<=");
+	  break;
+	case LEU:
+	  fprintf (file, "<={U}");
+	  break;
+	case GT:
+	  fprintf (file, ">");
+	  break;
+	case GTU:
+	  fprintf (file, ">{U}");
+	  break;
+	default:
+	  gcc_unreachable();
+	}
+    }
+  else
+    {
+      /* Output the comparison formatted as operand,symbol,operand */
+      rtx op0 = XEXP (operand, 0);
+      rtx op1 = XEXP (operand, 1);
+
+      picochip_print_operand (file, op0, 0);
+      picochip_print_comparison (file, operand, 'i');
+      picochip_print_operand (file, op1, 0);
+    }
+}
+
+/* This function generates a memory address operand in the given
+   mode.  That is, if the address contains a constant offset, then the
+   offset is divided by the required mode size to compute the
+   mode specific offset.  By default, picochip_print_operand_address calls
+   this function using the natural mode of the operand, but special
+   operand codes can be used to invoke the computation using an
+   unnatural mode (e.g., compute the HI aligned address of an SI mode
+   address). */
+static void
+picochip_print_memory_address (FILE * file, rtx operand,
+			       enum machine_mode mode)
+{
+  rtx address = XEXP (operand, 0);
+
+  /* Sanity check. */
+  if (MEM != GET_CODE (operand))
+    fatal_insn ("picochip_print_memory_address - Operand isn't memory based",
+		operand);
+
+  if (TARGET_DEBUG)
+    {
+      printf ("picochip_print_memory_address: ");
+      print_rtl (stdout, operand);
+      printf ("\n");
+    }
+
+  switch (GET_CODE (address))
+    {
+    case PLUS:
+      {
+	/* Grab the address components. */
+	rtx base = XEXP (address, 0);
+	rtx offset = XEXP (address, 1);
+
+	/* Only handle reg+const addresses */
+	if (REG == GET_CODE (base) && CONST_INT == GET_CODE (offset))
+	  {
+	    /* Sanity check.  If an FP+offset address is given, ensure
+	       that the offset lies within the given frame, or a lower
+	       frame. */
+	    if (REGNO (base) == STACK_POINTER_REGNUM )
+              gcc_assert (INTVAL (offset) <= (picochip_arg_area_byte_offset () +
+                          crtl->args.size));
+
+	    /* Print the base register - identical for all modes. */
+	    fprintf (file, "(");
+	    picochip_print_operand (file, base, 'r');
+	    fprintf (file, ")");
+
+	    /* Print the constant offset with compensation for the mode. */
+	    switch (mode)
+	      {
+	      case QImode:
+		picochip_print_operand (file, offset, 'Q');
+		break;
+
+	      case HImode:
+		picochip_print_operand (file, offset, 'H');
+		break;
+
+	      case SImode:
+	      case SFmode:
+		picochip_print_operand (file, offset, 'S');
+		break;
+
+	      case DImode:
+		picochip_print_operand (file, offset, 'D');
+		break;
+
+	      default:
+	        gcc_unreachable();
+	      }
+
+	  }
+
+      }
+
+      break;
+
+    case SYMBOL_REF:
+      picochip_print_operand (file, address, 's');
+      break;
+
+    case CONST:
+      {
+	rtx inner;
+	rtx base;
+	rtx offset;
+
+	inner = XEXP (address, 0);
+
+	/* Sanity check - the CONST memory address must be a base+offset. */
+	gcc_assert (PLUS == GET_CODE (inner));
+
+	base = XEXP (inner, 0);
+	offset = XEXP (inner, 1);
+
+	fprintf (file, "&_%s%+d", XSTR (base, 0), XINT (offset, 0));
+
+	break;
+      }
+
+    case REG:
+      /* Register operand. Provide a zero offset. */
+      fprintf (file, "(");
+      picochip_print_operand (file, address, 'r');
+      fprintf (file, ")0");
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+}
+
+/* Output an operand.  Formatting letters allow particular parts of
+   the operand to be output. */
+void
+picochip_print_operand (FILE * file, rtx operand, int letter)
+{
+
+  /* Handle special cases. */
+  switch (letter)
+    {
+      /* VLIW continuation, for explicit VLIW sequences. */
+    case '|':
+      fprintf (file, "\\");
+      return;
+
+      /* ALU selector.  */
+    case '#':
+      fputc (picochip_get_vliw_alu_id (), file);
+      return;
+
+      /* Delay slot specifier. */
+    case '>':
+      /* This should be handled in asm_output_opcode. */
+      gcc_unreachable();
+
+      /* Instruction mnemonics (e.g., lshift becomes LSL). */
+    case 'I':
+      switch (GET_CODE (operand))
+	{
+	case AND:
+	  fprintf (file, "AND");
+	  break;
+	case IOR:
+	  fprintf (file, "OR");
+	  break;
+	case XOR:
+	  fprintf (file, "XOR");
+	  break;
+	case PLUS:
+	  fprintf (file, "ADD");
+	  break;
+	case MINUS:
+	  fprintf (file, "SUB");
+	  break;
+	default:
+	  gcc_unreachable();
+	}
+      return;
+
+      /* Symbolic instructions (e.g., lshift becomes <<). */
+    case 'i':
+      switch (GET_CODE (operand))
+	{
+	case AND:
+	  fprintf (file, "&");
+	  break;
+	case IOR:
+	  fprintf (file, "|");
+	  break;
+	case XOR:
+	  fprintf (file, "^");
+	  break;
+	case PLUS:
+	  fprintf (file, "+");
+	  break;
+	case MINUS:
+	  fprintf (file, "-");
+	  break;
+	default:
+	  fprintf (file, "UNKNOWN_INSN");
+	  break;
+	}
+      return;
+
+    default:			/* Not a punctuation character - process as normal. */
+      break;
+    }
+
+  switch (GET_CODE (operand))
+    {
+    case REG:
+      switch (letter)
+	{
+	case 'R':
+	  /* Write a range of registers. */
+	  fprintf (file, "R[%d:%d]", REGNO (operand) + 1, REGNO (operand));
+	  break;
+
+	case 'U':
+	  /* The upper register of a pair is requested. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand) + 1]);
+	  break;
+
+	case 'L':
+	  /* The lower register of a pair is requested. Equivalent to the
+	     default, but included for completeness. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand)]);
+	  break;
+
+	case 'X':
+	  /* The 3rd register of a DI mode register. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand) + 2]);
+	  break;
+
+	case 'Y':
+	  /* The 4th register of a DI mode register. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand) + 3]);
+	  break;
+
+	default:
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand)]);
+	}
+      break;
+
+    case CONST_INT:
+      /* A range of letters can be used to format integers.  The
+         letters Q/H/S are used to divide the constant by the width of
+         QI/HI/SI mode integers in bytes.  The U/L modifiers are used
+         to obtain the upper and lower 16-bits of a 32-bit
+         constant.  Where possible, signed numbers are used, since
+         signed representations of numbers may be more compact (e.g.,
+         65535 can be represented as -1, which fits into a small
+         constant, whereas 65535 requires a large constant). */
+      switch (letter)
+	{
+	case 'Q':
+	  fprintf (file, "%ld", INTVAL (operand));
+	  break;
+
+	case 'H':
+	  fprintf (file, "%ld", INTVAL (operand) / 2);
+	  break;
+
+	case 'S':
+	  fprintf (file, "%ld", INTVAL (operand) / 4);
+	  break;
+
+	case 'P':
+	  fprintf (file, "%d", exact_log2 (INTVAL(operand)));
+	  break;
+
+	case 'U':
+	  fprintf (file, "%hi", (short) ((INTVAL (operand) >> 16) & 0xFFFF));
+	  break;
+
+	case 'L':
+	  fprintf (file, "%hi", (short) (INTVAL (operand) & 0xFFFF));
+	  break;
+
+	default:
+	  fprintf (file, "%ld", INTVAL (operand));
+	  break;
+	}
+      break;
+
+    case CONST_DOUBLE:
+      {
+	long val;
+	REAL_VALUE_TYPE rv;
+
+	if (GET_MODE (operand) != SFmode)
+	  fatal_insn ("Unknown mode in print_operand (CONST_DOUBLE) :",
+		      operand);
+	REAL_VALUE_FROM_CONST_DOUBLE (rv, operand);
+	REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+
+	switch (letter)
+	  {
+	  case 'U':
+	    fprintf (file, "%hi", (short) ((val >> 16) & 0xFFFF));
+	    break;
+
+	  case 'L':
+	    fprintf (file, "%hi", (short) (val & 0xFFFF));
+	    break;
+	  }
+
+	break;
+
+      }
+
+      /* Output a symbol.  The output format must match that of
+         picochip_output_label. */
+    case SYMBOL_REF:
+      /* Ensure that the symbol is marked as referenced.  Gcc can
+         occasionally omit the function bodies when it believes them
+         to be unreferenced. */
+      if (SYMBOL_REF_DECL (operand))
+	mark_decl_referenced (SYMBOL_REF_DECL (operand));
+      fprintf (file, "&");
+      assemble_name (file, XSTR (operand, 0));
+      break;
+
+    case LABEL_REF:
+      /* This format must match that of picochip_output_label. */
+      fprintf (file, "&");
+      output_asm_label (operand);
+      break;
+
+    case MEM:
+      {
+	rtx addr = XEXP (operand, 0);
+
+	switch (letter)
+	  {
+	  case 'o':
+	    if (PLUS != GET_CODE (addr))
+	      fatal_insn ("Bad address, not (reg+disp):", addr);
+	    else
+	      picochip_print_operand (file, XEXP (addr, 1), 0);
+	    break;
+
+	  case 'M':
+	    /* Output a memory address in byte mode notation (i.e., the
+	       constant address (if any) is the actual byte address. */
+	    picochip_print_memory_address (file, operand, QImode);
+	    break;
+
+	    /* Output a constant offset of the given mode (i.e., divide
+	       the constant by the number of units in the mode to get the
+	       constant). */
+	  case 'Q':
+	    picochip_print_memory_address (file, operand, QImode);
+	    break;
+
+	  case 'H':
+	    picochip_print_memory_address (file, operand, HImode);
+	    break;
+
+	  case 'S':
+	    picochip_print_memory_address (file, operand, SImode);
+	    break;
+
+	  case 'F':
+	    picochip_print_memory_address (file, operand, SFmode);
+	    break;
+
+	  case 'b':
+	    if (PLUS != GET_CODE (addr))
+	      fatal_insn ("Bad address, not (reg+disp):", addr);
+	    else
+	      picochip_print_operand (file, XEXP (addr, 0), 0);
+	    break;
+
+          /* When the mem operand is (reg + big offset) which cannot
+            be represented in an instruction as operand, the compiler
+            automatically generates the instruction to put in (reg +
+            big offset) into another register. In such cases, it
+            returns '0' as the character. This needs to be handled
+            as well. */
+	  case 0:
+	  case 'r':
+	    if (REG != GET_CODE (addr))
+	      fatal_insn ("Bad address, not register:", addr);
+	    else
+	      picochip_print_operand (file, addr, 0);
+	    break;
+
+	  default:
+	    fprintf (file, "Unknown mem operand - letter %c ",
+		     (char) (letter));
+	    print_rtl (file, operand);
+	  }
+
+	break;
+      }
+
+    case CONST:
+      {
+	rtx const_exp = XEXP (operand, 0);
+
+	/* Handle constant offsets to symbol references. */
+	if (PLUS == GET_CODE (const_exp) &&
+	    SYMBOL_REF == GET_CODE (XEXP (const_exp, 0)) &&
+	    CONST_INT == GET_CODE (XEXP (const_exp, 1)))
+	  {
+
+	    picochip_print_operand (file, XEXP (const_exp, 0), 0);
+	    if (INTVAL (XEXP (const_exp, 1)) >= 0)
+	      fprintf (file, "+");
+	    /* else use the - from the operand (i.e., AP-2)) */
+
+	    picochip_print_operand (file, XEXP (const_exp, 1), letter);
+
+	  }
+      }
+      break;
+
+
+    case PLUS:
+      {
+	/* PLUS expressions are of the form (base + offset). Different
+	   options (analagous to those of memory PLUS expressions) are used
+	   to extract the base and offset components. */
+
+	switch (letter)
+	  {
+	  case 'b':
+	    picochip_print_operand (file, XEXP (operand, 0), 0);
+	    break;
+
+	  case 'o':
+	    picochip_print_operand (file, XEXP (operand, 1), 0);
+	    break;
+
+	  default:
+
+	    /* If the expression is composed entirely of constants,
+	       evaluate the result.  This should only occur with the
+	       picoChip specific comms instructions, which are emitted as
+	       base+offset expressions. */
+	    if (CONST_INT == GET_CODE (XEXP (operand, 0)) &&
+		CONST_INT == GET_CODE (XEXP (operand, 1)))
+	      {
+		HOST_WIDE_INT result = (XINT (XEXP (operand, 0), 0) +
+					XINT (XEXP (operand, 1), 0));
+		fprintf (file, "%ld", result);
+	      }
+	    else
+	      {
+		fprintf (file, "(");
+		picochip_print_operand (file, XEXP (operand, 0), 0);
+		fprintf (file, "+");
+		picochip_print_operand (file, XEXP (operand, 1), 0);
+		fprintf (file, ")");
+	      }
+	  }
+
+	break;
+      }
+
+      /* Comparison operations. */
+    case NE:
+    case EQ:
+    case GE:
+    case GEU:
+    case LT:
+    case LTU:
+    case LE:
+    case LEU:
+    case GT:
+    case GTU:
+      picochip_print_comparison (file, operand, letter);
+      return;
+
+    default:
+      fprintf (stderr, "Unknown operand encountered in %s\n", __FUNCTION__);
+      print_rtl (file, operand);
+      break;
+
+    }
+
+}
+
+/* Output an operand address */
+void
+picochip_print_operand_address (FILE * file, rtx operand)
+{
+
+  switch (GET_CODE (operand))
+    {
+
+    case SYMBOL_REF:
+      /* This format must match that of picochip_output_label. */
+      assemble_name (file, XSTR (operand, 0));
+      break;
+
+    case CODE_LABEL:
+      /* Note  this format must match that of picochip_output_label. */
+      fprintf (file, "_L%d", XINT (operand, 5));
+      break;
+
+    case MEM:
+      /* Pass on to a specialised memory address generator. */
+      picochip_print_memory_address (file, operand, GET_MODE (operand));
+      break;
+
+    default:
+      gcc_unreachable();
+
+    }
+
+}
+
+
+/* Scheduling functions. */
+
+/* Save some of the contents of recog_data. */
+static void
+picochip_save_recog_data (void)
+{
+  picochip_saved_which_alternative = which_alternative;
+  memcpy (&picochip_saved_recog_data, &recog_data,
+	  sizeof (struct recog_data_d));
+}
+
+/* Restore some of the contents of global variable recog_data. */
+static void
+picochip_restore_recog_data (void)
+{
+  which_alternative = picochip_saved_which_alternative;
+  memcpy (&recog_data, &picochip_saved_recog_data,
+	  sizeof (struct recog_data_d));
+}
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+   three-instruction bundle.  */
+static void
+reorder_var_tracking_notes (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next, last_insn = NULL_RTX;
+      rtx queue = NULL_RTX;
+
+      /* Iterate through the bb and find the last non-debug insn */
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN(BB_END (bb)); insn = NEXT_INSN(insn))
+        {
+          if (NONDEBUG_INSN_P(insn))
+            last_insn = insn;
+        }
+
+      /* In all normal cases, queue up notes and emit them just before a TImode
+         instruction. For the last instruction, emit the queued notes just after
+         the last instruction. */
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN(BB_END (bb)); insn = next)
+        {
+          next = NEXT_INSN (insn);
+
+          if (insn == last_insn)
+            {
+              while (queue)
+                {
+                  rtx next_queue = PREV_INSN (queue);
+                  PREV_INSN (NEXT_INSN(insn)) = queue;
+                  NEXT_INSN(queue) = NEXT_INSN(insn);
+                  PREV_INSN(queue) = insn;
+                  NEXT_INSN(insn) = queue;
+                  queue = next_queue;
+                }
+              /* There is no more to do for this bb. break*/
+              break;
+            }
+          else if (NONDEBUG_INSN_P (insn))
+            {
+              /* Emit queued up notes before the first instruction of a bundle.  */
+              if (GET_MODE (insn) == TImode)
+                {
+                  while (queue)
+                    {
+                      rtx next_queue = PREV_INSN (queue);
+                      NEXT_INSN (PREV_INSN(insn)) = queue;
+                      PREV_INSN (queue) = PREV_INSN(insn);
+                      PREV_INSN (insn) = queue;
+                      NEXT_INSN (queue) = insn;
+                      queue = next_queue;
+                    }
+                }
+            }
+          else if (NOTE_P (insn))
+            {
+               rtx prev = PREV_INSN (insn);
+               PREV_INSN (next) = prev;
+               NEXT_INSN (prev) = next;
+               /* Ignore call_arg notes. They are expected to be just after the
+                  call insn. If the call is start of a long VLIW, labels are
+                  emitted in the middle of a VLIW, which our assembler can not
+                  handle. */
+               if (NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
+                 {
+                   PREV_INSN (insn) = queue;
+                   queue = insn;
+                 }
+            }
+        }
+        /* Make sure we are not dropping debug instructions.*/
+        gcc_assert (queue == NULL_RTX);
+    }
+}
+
+/* Perform machine dependent operations on the rtl chain INSNS. */
+void
+picochip_reorg (void)
+{
+  rtx insn, insn1, vliw_start = NULL_RTX;
+  int vliw_insn_location = 0;
+
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  if (optimize == 0)
+    split_all_insns ();
+
+  if (picochip_schedule_type != DFA_TYPE_NONE)
+    {
+      timevar_push (TV_SCHED2);
+
+      /* Process the instruction list, computing the sizes of each
+         instruction, and consequently branch distances.  This can
+         result in some branches becoming short enough to be treated
+         as a real branch instruction, rather than an assembly branch
+         macro which may expand into multiple instructions.  The
+         benefit of shortening branches is that real branch
+         instructions can be properly DFA scheduled, whereas macro
+         branches cannot. */
+      shorten_branches (get_insns ());
+
+      /* Do control and data sched analysis again,
+         and write some more of the results to dump file. */
+
+      split_all_insns ();
+
+      schedule_ebbs ();
+
+      timevar_pop (TV_SCHED2);
+
+      ggc_collect ();
+
+      if (picochip_schedule_type == DFA_TYPE_SPEED)
+	{
+	  /* Whenever a VLIW packet is generated, all instructions in
+	     that packet must appear to come from the same source
+	     location.  The following code finds all the VLIW packets,
+	     and tags their instructions with the location of the first
+	     instruction from the packet.  Clearly this will result in
+	     strange behaviour when debugging the code, but since
+	     debugging and optimisation are being used in conjunction,
+	     strange behaviour is certain to occur anyway. */
+          /* Slight bit of change. If the vliw set contains a branch
+             or call instruction, we pick its location.*/
+	  for (insn = get_insns (); insn; insn = next_real_insn (insn))
+	    {
+
+	      /* If this is the first instruction in the VLIW packet,
+	         extract its location. */
+              if (GET_MODE (insn) == TImode)
+              {
+                vliw_start = insn;
+                vliw_insn_location = INSN_LOCATION (insn);
+              }
+              if (JUMP_P (insn) || CALL_P(insn))
+              {
+                vliw_insn_location = INSN_LOCATION (insn);
+                for (insn1 = vliw_start; insn1 != insn ; insn1 = next_real_insn (insn1))
+                  INSN_LOCATION (insn1) = vliw_insn_location;
+              }
+              /* Tag subsequent instructions with the same location. */
+              INSN_LOCATION (insn) = vliw_insn_location;
+	    }
+	}
+
+    }
+
+  /* Locate the note marking the end of the function's prologue.  If
+     the note appears in the middle of a VLIW packet, move the note to
+     the end.  This avoids unpleasant consequences such as trying to
+     emit prologue markers (e.g., .loc/.file directives) in the middle
+     of VLIW packets. */
+  if (picochip_schedule_type == DFA_TYPE_SPEED)
+    {
+      rtx prologue_end_note = NULL;
+      rtx last_insn_in_packet = NULL;
+
+      for (insn = get_insns (); insn; insn = next_insn (insn))
+	{
+	  /* The prologue end must be moved to the end of the VLIW packet. */
+	  if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+	    {
+	      prologue_end_note = insn;
+	      break;
+	    }
+	}
+
+      /* Find the last instruction in this packet. */
+      for (insn = prologue_end_note; insn; insn = next_real_insn (insn))
+	{
+	  if (GET_MODE (insn) == TImode)
+	    break;
+	  else
+	    last_insn_in_packet = insn;
+	}
+
+      if (last_insn_in_packet != NULL)
+	{
+          rtx tmp_note
+	    = emit_note_after ((enum insn_note) NOTE_KIND (prologue_end_note),
+			       last_insn_in_packet);
+          memcpy(&NOTE_DATA (tmp_note), &NOTE_DATA(prologue_end_note), sizeof(NOTE_DATA(prologue_end_note)));
+	  delete_insn (prologue_end_note);
+	}
+    }
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      /* We also have to deal with variable tracking notes in the
+	 middle of VLIW packets. */
+      reorder_var_tracking_notes();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+}
+
+/* Return the ALU character identifier for the current
+   instruction.  This will be 0 or 1. */
+static char
+picochip_get_vliw_alu_id (void)
+{
+  int attr_type = 0;
+
+  /* Always use ALU 0 if VLIW scheduling is disabled. */
+  if (picochip_schedule_type != DFA_TYPE_SPEED)
+    return '0';
+
+  /* Get the attribute type of the instruction.  Note that this can
+     ruin the contents of recog_data, so save/restore around the
+     call. */
+  picochip_save_recog_data ();
+  attr_type = get_attr_type (picochip_current_prescan_insn);
+  picochip_restore_recog_data ();
+
+  if (picochip_current_vliw_state.contains_pico_alu_insn)
+    {
+
+      /* If this a picoAlu insn? If it is, then stuff it into ALU 0,
+         else it must be the other ALU (either basic or nonCc)
+         instruction which goes into 1. */
+      if (attr_type == TYPE_PICOALU)
+	return '0';
+      else
+	return '1';
+
+    }
+  else if (picochip_current_vliw_state.contains_non_cc_alu_insn)
+    {
+      /* Is this the non CC instruction? If it is, then stuff it into
+         ALU 1, else it must be a picoAlu or basicAlu, in which case
+         it goes into ALU 0. */
+      if (attr_type == TYPE_NONCCALU)
+	return '1';
+      else
+	return '0';
+    }
+  else
+    {
+      /* No picoAlu/nonCc instructions in use, so purely dependent upon
+         whether an ALU instruction has already been scheduled in this
+         cycle. */
+      switch (picochip_current_vliw_state.num_alu_insns_so_far)
+	{
+	case 0:
+	  picochip_current_vliw_state.num_alu_insns_so_far++;
+	  return '0';
+
+	case 1:
+	  picochip_current_vliw_state.num_alu_insns_so_far++;
+	  return '1';
+
+	default:
+	  internal_error ("too many ALU instructions emitted (%d)",
+			  picochip_current_vliw_state.num_alu_insns_so_far);
+	  return 'X';
+	}
+    }
+
+}
+
+/* Reset any information about the current VLIW packing status. */
+static void
+picochip_reset_vliw (rtx insn)
+{
+  rtx local_insn = insn;
+
+  /* Nothing to do if VLIW scheduling isn't being used. */
+  if (picochip_schedule_type != DFA_TYPE_SPEED)
+    return;
+
+  if (TARGET_DEBUG)
+    printf ("%s on insn %d\n", __FUNCTION__, INSN_UID (insn));
+
+  /* Reset. */
+  picochip_current_vliw_state.contains_pico_alu_insn = 0;
+  picochip_current_vliw_state.contains_non_cc_alu_insn = 0;
+  picochip_current_vliw_state.num_alu_insns_so_far = 0;
+  picochip_current_vliw_state.num_cfi_labels_deferred = 0;
+  picochip_current_vliw_state.lm_label_name[0] = 0;
+  picochip_current_vliw_state.num_insns_in_packet = 0;
+
+  /* Read through the VLIW packet, classifying the instructions where
+     appropriate. */
+  local_insn = insn;
+  do
+    {
+      if (NOTE_P (local_insn) || DEBUG_INSN_P(local_insn))
+	{
+	  local_insn = NEXT_INSN (local_insn);
+	  continue;
+	}
+      else if (!INSN_P (local_insn))
+	break;
+      else
+	{
+	  /* It is an instruction, but is it ours? */
+	  if (INSN_CODE (local_insn) != -1)
+	    {
+	      int attr_type = 0;
+
+	      picochip_current_vliw_state.num_insns_in_packet += 1;
+
+	      /* Is it a picoAlu or nonCcAlu instruction? Note that the
+	         get_attr_type function can overwrite the values in
+	         the recog_data global, hence this is saved and
+	         restored around the call.  Not doing so results in
+	         asm_output_opcode being called with a different
+	         instruction to final_prescan_insn, which is fatal. */
+	      picochip_save_recog_data ();
+	      attr_type = get_attr_type (local_insn);
+	      picochip_restore_recog_data ();
+
+	      if (attr_type == TYPE_PICOALU)
+		picochip_current_vliw_state.contains_pico_alu_insn = 1;
+	      if (attr_type == TYPE_NONCCALU)
+		picochip_current_vliw_state.contains_non_cc_alu_insn = 1;
+
+	    }
+	}
+
+      /* Get the next instruction. */
+      local_insn = NEXT_INSN (local_insn);
+
+      /* Keep going while the next instruction is part of the same
+         VLIW packet (i.e., its a valid instruction and doesn't mark
+         the start of a new VLIW packet. */
+    }
+  while (local_insn &&
+	 (GET_MODE (local_insn) != TImode) && (INSN_CODE (local_insn) != -1));
+
+}
+
+int
+picochip_sched_reorder (FILE * file, int verbose,
+			rtx * ready ATTRIBUTE_UNUSED,
+			int *n_readyp ATTRIBUTE_UNUSED, int clock)
+{
+
+  if (verbose > 0)
+    fprintf (file, ";;\tClock %d\n", clock);
+
+  return picochip_sched_issue_rate ();
+
+}
+
+int
+picochip_sched_lookahead (void)
+{
+  /* It should always be enough to lookahead by 2 insns. Only slot0/1 could
+     have a conflict. */
+  return 2;
+}
+
+int
+picochip_sched_issue_rate (void)
+{
+  return 3;
+}
+
+/* Adjust the scheduling cost between the two given instructions,
+   which have the given dependency. */
+int
+picochip_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+
+  if (TARGET_DEBUG)
+    {
+      printf ("Sched Adjust Cost: %d->%d is %d\n",
+	      INSN_UID (insn), INSN_UID (dep_insn), cost);
+
+      printf ("  Dependency type:");
+      switch (REG_NOTE_KIND (link))
+	{
+	case 0:
+	  printf ("Data\n");
+	  break;
+	case REG_DEP_ANTI:
+	  printf ("ANTI\n");
+	  break;
+	case REG_DEP_OUTPUT:
+	  printf ("OUTPUT\n");
+	  break;
+	default:
+	  printf ("Unknown (%d)\n", REG_NOTE_KIND (link));
+	}
+    }
+
+  /* Anti-dependencies are used to enforce the ordering between a
+   * branch, and any subsequent instructions.  For example:
+   *
+   *   BNE someLabel
+   *   ADD.0 r0,r1,r2
+   *
+   * The ADD instruction must execute after the branch, and this is
+   * enforced using an anti-dependency.  Unfortunately, VLIW machines
+   * are happy to execute anti-dependent instructions in the same
+   * cycle, which then results in a schedule like the following being
+   * created:
+   *
+   *    BNE someLabel \ ADD.0 r0,r1,r2
+   *
+   * The instruction which would normally be conditionally executed
+   * depending upon the outcome of the branch, is now unconditionally
+   * executed every time.  To prevent this happening, any
+   * anti-dependencies between a branch and another instruction are
+   * promoted to become real dependencies.
+   */
+  if ((JUMP_P (dep_insn) || CALL_P(dep_insn)) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
+    {
+
+      if (TARGET_DEBUG)
+	printf ("Promoting anti-dependency %d->%d to a true-dependency\n",
+		INSN_UID (insn), INSN_UID (dep_insn));
+
+      return 1;
+    }
+
+  return cost;
+
+}
+
+/* Return the minimum of the two values */
+static int
+minimum (int a, int b)
+{
+  if (a < b)
+    return a;
+  if (b < a)
+    return b;
+  /* I dont expect to get to this function with a==b.*/
+  gcc_unreachable();
+}
+
+
+/* This function checks if the memory of the two stores are just off by 2 bytes.
+   It returns the lower memory operand's index.*/
+
+static int
+memory_just_off (rtx opnd1, rtx opnd2)
+{
+  int offset1 = 0, offset2 = 0;
+  int reg1, reg2;
+
+  if (GET_CODE(XEXP(opnd1, 0)) == PLUS && GET_CODE(XEXP(XEXP(opnd1, 0),1)) == CONST_INT)
+  {
+    offset1 = INTVAL(XEXP(XEXP(opnd1, 0), 1));
+    reg1 = REGNO(XEXP(XEXP(opnd1, 0), 0));
+  }
+  else
+  {
+    reg1 = REGNO(XEXP(opnd1, 0));
+  }
+  if (GET_CODE(XEXP(opnd2, 0)) == PLUS && GET_CODE(XEXP(XEXP(opnd2, 0), 1)) == CONST_INT)
+  {
+    offset2 = INTVAL(XEXP(XEXP(opnd2, 0), 1));
+    reg2 = REGNO(XEXP(XEXP(opnd2, 0), 0));
+  }
+  else
+  {
+    reg2 = REGNO(XEXP(opnd2, 0));
+  }
+
+  /* Peepholing 2 STW/LDWs has the restriction that the resulting STL/LDL's address
+     should be 4 byte aligned. We can currently guarantee that only if the base
+     address is FP(R13) and the offset is aligned. */
+
+  if (reg1 == reg2 && reg1 == 13 && abs(offset1-offset2) == 2 && minimum(offset1, offset2) % 4 == 0)
+    return (minimum(offset1, offset2) == offset1) ? 1:2;
+
+  return 0;
+}
+
+static int
+registers_just_off (rtx opnd1, rtx opnd2)
+{
+  int reg1, reg2;
+  reg1 = REGNO(opnd1);
+  reg2 = REGNO(opnd2);
+  if (abs(reg1-reg2) == 1 && minimum(reg1, reg2) % 2 == 0)
+    return (minimum(reg1, reg2) == reg1)?1:2;
+  return 0;
+}
+
+/* Check to see if the two LDWs can be peepholed together into a LDL
+   They can be if the registers getting loaded into are contiguous
+   and the memory addresses are contiguous as well.
+   for eg.
+           LDW r2,[r11]x
+           LDW r3,[r11]x+1
+   can be merged together into
+           LDL r[3:2],[r11]
+
+   NOTE:
+   1. The LDWs themselves only guarantee that r11 will be a 2-byte
+   aligned address. Only FP can be assumed to be 4 byte aligned.
+   2. The progression of addresses and the register numbers should
+   be similar. For eg., if you swap r2 and r3 in the above instructions,
+   the resultant pair cannot be merged.
+
+*/
+bool
+ok_to_peephole_ldw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3)
+{
+  int memtest=0,regtest=0;
+  regtest = registers_just_off(opnd1,opnd3);
+  if (regtest == 0)
+    return false;
+
+  memtest = memory_just_off(opnd0,opnd2);
+  if (memtest == 0)
+    return false;
+
+  if (regtest == memtest)
+  {
+    return true;
+  }
+  return false;
+}
+
+/* Similar to LDW peephole */
+bool
+ok_to_peephole_stw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3)
+{
+  int memtest=0,regtest=0;
+  regtest = registers_just_off(opnd1,opnd3);
+  if (regtest == 0)
+    return false;
+
+  memtest = memory_just_off(opnd0,opnd2);
+  if (memtest == 0)
+    return false;
+
+  if (regtest == memtest)
+  {
+    return true;
+  }
+  return false;
+}
+
+
+/* Generate a SImode register with the register number that is the smaller of the two */
+rtx
+gen_min_reg(rtx opnd1,rtx opnd2)
+{
+  return gen_rtx_REG (SImode, minimum(REGNO(opnd1),REGNO(opnd2)));
+}
+
+/* Generate a SImode memory with the address that is the smaller of the two */
+rtx
+gen_SImode_mem(rtx opnd1,rtx opnd2)
+{
+  int offset1=0,offset2=0;
+  rtx reg;
+  rtx address;
+  if (GET_CODE(XEXP(opnd1,0)) == PLUS && GET_CODE(XEXP(XEXP(opnd1,0),1)) == CONST_INT)
+  {
+    offset1 = INTVAL(XEXP(XEXP(opnd1,0),1));
+    reg = XEXP(XEXP(opnd1,0),0);
+  }
+  else
+  {
+    reg = XEXP(opnd1,0);
+  }
+  if (GET_CODE(XEXP(opnd2,0)) == PLUS && GET_CODE(XEXP(XEXP(opnd2,0),1)) == CONST_INT)
+  {
+    offset2 = INTVAL(XEXP(XEXP(opnd2,0),1));
+  }
+  address = gen_rtx_PLUS (HImode, reg, GEN_INT(minimum(offset1,offset2)));
+  return gen_rtx_MEM(SImode,address);
+}
+
+bool
+picochip_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		    int opno ATTRIBUTE_UNUSED, int* total, bool speed)
+{
+
+  int localTotal = 0;
+
+  if (!speed)
+  {
+    /* Need to penalize immediates that need to be encoded as long constants.*/
+    if (code == CONST_INT && !(INTVAL (x) >= 0 && INTVAL (x) < 16))
+    {
+        *total = COSTS_N_INSNS(1);
+        return true;
+    }
+  }
+  switch (code)
+  {
+  case SYMBOL_REF:
+  case LABEL_REF:
+    *total = COSTS_N_INSNS (outer_code != MEM);
+    return true;
+    break;
+
+  case IF_THEN_ELSE:
+    /* if_then_else come out of cbranch instructions. It will get split into
+       a condition code generating subtraction and a branch */
+    *total = COSTS_N_INSNS (2);
+    return true;
+    break;
+
+  case AND:
+  case IOR:
+  case XOR:
+    if (GET_MODE(x) == SImode)
+      *total = COSTS_N_INSNS (2);
+    if (GET_MODE(x) == DImode)
+      *total = COSTS_N_INSNS (4);
+    return false;
+
+  case MEM:
+    /* Byte Memory access on a NO_BYTE_ACCESS machine would be expensive */
+    if (GET_MODE(x) == QImode && !TARGET_HAS_BYTE_ACCESS)
+      *total = COSTS_N_INSNS (10);
+
+    /* 64-bit accesses have to be done through 2 32-bit access */
+    if (GET_MODE(x) == DImode)
+      *total = COSTS_N_INSNS (2);
+    return false;
+    break;
+
+  case ASHIFTRT:
+
+    /* SImode shifts are expensive */
+    if (GET_MODE(x) == SImode)
+      *total = COSTS_N_INSNS (10);
+
+    /* Register shift by constant is cheap. */
+    if ((GET_MODE(x) == QImode || GET_MODE(x) == HImode)
+        && GET_CODE(XEXP(x, 0)) == REG
+        && GET_CODE(XEXP(x, 1)) == CONST_INT)
+      *total = COSTS_N_INSNS (1);
+    else
+      *total = COSTS_N_INSNS (4);
+    return false;
+    break;
+
+  case DIV:
+  case MOD:
+
+    /* Divisions are more expensive than the default 7*/
+    if (GET_MODE(x) == SImode)
+      *total = COSTS_N_INSNS (20);
+    else
+      *total = COSTS_N_INSNS (12);
+    return false;
+    break;
+
+  case MULT:
+    /* Look for the simple cases of multiplying register*register or
+       register*constant. */
+    if ((GET_MODE(x) == QImode || GET_MODE(x) == HImode)
+        && ((GET_CODE(XEXP(x, 0)) == REG
+           && (GET_CODE(XEXP(x, 1)) == REG || GET_CODE(XEXP(x,1)) == CONST_INT))
+           || (GET_CODE(XEXP(x, 0)) == ZERO_EXTEND 
+               && GET_CODE(XEXP(XEXP(x, 0),0)) == REG
+               && GET_CODE(XEXP(x, 1)) == ZERO_EXTEND 
+               && GET_CODE(XEXP(XEXP(x, 1),0)) == REG)))
+      {
+
+        /* When optimising for size, multiplication by constant
+           should be discouraged slightly over multiplication by a
+           register. */
+        if (picochip_has_mac_unit)
+          {
+            /* Single cycle multiplication, but the result must be
+               loaded back into a general register afterwards. */
+            *total = COSTS_N_INSNS(2);
+            return true;
+          }
+        else if (picochip_has_mul_unit)
+          {
+            /* Single cycle multiplication. */
+            *total = COSTS_N_INSNS(1);
+            return true;
+          }
+        /* Else no multiply available. Use default cost. */
+
+      }
+    break;
+
+  default:
+    /* Do nothing. */
+    break;
+  }
+
+  if (localTotal != 0)
+    {
+      *total = localTotal;
+      return true;
+    }
+  else
+    {
+      return false;
+    }
+
+}
+
+void
+picochip_final_prescan_insn (rtx insn, rtx * opvec ATTRIBUTE_UNUSED,
+			     int num_operands ATTRIBUTE_UNUSED)
+{
+  rtx local_insn;
+
+  picochip_current_prescan_insn = insn;
+
+  if (TARGET_DEBUG)
+    printf ("Final prescan on INSN %d with mode %s\n",
+	    INSN_UID (insn), GET_MODE_NAME (GET_MODE (insn)));
+
+  /* If this is the start of a new instruction cycle, or no scheduling
+     is used, then reset the VLIW status. */
+  if (GET_MODE (insn) == TImode || !picochip_schedule_type == DFA_TYPE_SPEED)
+    picochip_reset_vliw (insn);
+
+  /* No VLIW scheduling occurred, so don't go any further. */
+  if (picochip_schedule_type != DFA_TYPE_SPEED)
+    return;
+
+  /* Look for the next printable instruction.  This loop terminates on
+     any recognisable instruction, and on any unrecognisable
+     instruction with TImode. */
+  local_insn = insn;
+  for (local_insn = NEXT_INSN (local_insn); local_insn;
+       local_insn = NEXT_INSN (local_insn))
+    {
+      if (NOTE_P (local_insn) || DEBUG_INSN_P(local_insn))
+	continue;
+      else if (!INSN_P (local_insn))
+	break;
+      else if (GET_MODE (local_insn) == TImode
+	       || INSN_CODE (local_insn) != -1)
+	break;
+    }
+
+  /* Set the continuation flag if the next instruction can be packed
+     with the current instruction (i.e., the next instruction is
+     valid, and isn't the start of a new cycle). */
+  picochip_vliw_continuation = (local_insn && NONDEBUG_INSN_P (local_insn) &&
+				(GET_MODE (local_insn) != TImode));
+
+}
+
+/* Builtin functions. */
+/* Given a builtin function taking 2 operands (i.e., target + source),
+   emit the RTL for the underlying instruction. */
+static rtx
+picochip_expand_builtin_2op (enum insn_code icode, tree call, rtx target)
+{
+  tree arg0;
+  rtx op0, pat;
+  enum machine_mode tmode, mode0;
+
+  /* Grab the incoming argument and emit its RTL. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* Determine the modes of the instruction operands. */
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+
+  /* Ensure that the incoming argument RTL is in a register of the
+     correct mode. */
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  /* If there isn't a suitable target, emit a target register. */
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Given a builtin function taking 3 operands (i.e., target + two
+   source), emit the RTL for the underlying instruction. */
+static rtx
+picochip_expand_builtin_3op (enum insn_code icode, tree call, rtx target)
+{
+  tree arg0, arg1;
+  rtx op0, op1, pat;
+  enum machine_mode tmode, mode0, mode1;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* Get the mode's of each of the instruction operands. */
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+  mode1 = insn_data[icode].operand[2].mode;
+
+  /* Ensure that each of the function argument rtl sequences are in a
+     register of the correct mode. */
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  /* If no target has been given, create a register to use as the target. */
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Expand a builtin function which takes two arguments, and returns a void. */
+static rtx
+picochip_expand_builtin_2opvoid (enum insn_code icode, tree call)
+{
+  tree arg0, arg1;
+  rtx op0, op1, pat;
+  enum machine_mode mode0, mode1;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* Get the mode's of each of the instruction operands. */
+  mode0 = insn_data[icode].operand[0].mode;
+  mode1 = insn_data[icode].operand[1].mode;
+
+  /* Ensure that each of the function argument rtl sequences are in a
+     register of the correct mode. */
+  if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (op0, op1);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return NULL_RTX;
+
+}
+
+/* Expand an array get into the corresponding RTL. */
+static rtx
+picochip_expand_array_get (tree call, rtx target)
+{
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, pat;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+  arg2 = CALL_EXPR_ARG (call, 2) ;
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* The second and third operands must be constant.  Nothing else will
+     do. */
+  if (CONST_INT != GET_CODE (op1))
+    internal_error ("%s: Second source operand is not a constant",
+		    __FUNCTION__);
+  if (CONST_INT != GET_CODE (op2))
+    internal_error ("%s: Third source operand is not a constant",
+		    __FUNCTION__);
+
+  /* If no target has been given, create a register to use as the target. */
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+
+  /* The first operand must be a HImode register or a constant.  If it
+     isn't, force it into a HImode register. */
+  if (GET_MODE (op0) != HImode || REG != GET_CODE (op0))
+    op0 = copy_to_mode_reg (HImode, op0);
+
+
+  /* Emit and return the new instruction. */
+  pat = gen_commsArrayGet (target, op0, op1, op2);
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Expand an array put into the corresponding RTL. */
+static rtx
+picochip_expand_array_put (tree call, rtx target)
+{
+  tree arg0, arg1, arg2, arg3;
+  rtx op0, op1, op2, op3, pat;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+  arg2 = CALL_EXPR_ARG (call, 2);
+  arg3 = CALL_EXPR_ARG (call, 3);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op3 = expand_expr (arg3, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* The first operand must be an SImode register. */
+  if (GET_MODE (op0) != SImode || REG != GET_CODE (op0))
+    op0 = copy_to_mode_reg (SImode, op0);
+
+  /* The second (index) operand must be a HImode register, or a
+     constant.  If it isn't, force it into a HImode register. */
+  if (GET_MODE (op1) != HImode || REG != GET_CODE (op1))
+    op1 = copy_to_mode_reg (HImode, op1);
+
+  /* The remaining operands must be constant.  Nothing else will do. */
+  if (CONST_INT != GET_CODE (op2))
+    internal_error ("%s: Third source operand is not a constant",
+		    __FUNCTION__);
+  if (CONST_INT != GET_CODE (op3))
+    internal_error ("%s: Fourth source operand is not a constant",
+		    __FUNCTION__);
+
+  /* Emit and return the new instruction. */
+  pat = gen_commsArrayPut (op0, op1, op2, op3);
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Expand an array testport into the corresponding RTL. */
+static rtx
+picochip_expand_array_testport (tree call, rtx target)
+{
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, pat;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+  arg2 = CALL_EXPR_ARG (call, 2);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* The first operand must be a HImode register, or a constant.  If it
+     isn't, force it into a HImode register. */
+  if (GET_MODE (op0) != HImode || REG != GET_CODE (op0))
+    op0 = copy_to_mode_reg (HImode, op0);
+
+  /* The second and third operands must be constant.  Nothing else will
+     do. */
+  if (CONST_INT != GET_CODE (op1))
+    internal_error ("%s: Second source operand is not a constant",
+		    __FUNCTION__);
+  if (CONST_INT != GET_CODE (op2))
+    internal_error ("%s: Third source operand is not a constant",
+		    __FUNCTION__);
+
+  /* If no target has been given, create a HImode register to use as
+     the target. */
+  if (target == 0 || GET_MODE (target) != HImode)
+    target = gen_reg_rtx (HImode);
+
+  /* Emit and return the new instruction. */
+  pat = gen_commsArrayTestPort (target, op0, op1, op2);
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Generate a unique HALT instruction by giving the instruction a
+   unique integer. This integer makes no difference to the assembly
+   output (other than a comment indicating the supplied id), but the
+   presence of the unique integer prevents the compiler from combining
+   several different halt instructions into one instruction. This
+   means that each use of the halt instruction is unique, which in
+   turn means that assertions work as expected. */
+static rtx
+picochip_generate_halt (void)
+{
+  static int currentId = 0;
+  rtx insns;
+  rtx id = GEN_INT (currentId);
+  currentId += 1;
+
+  start_sequence();
+  emit_insn (gen_halt (id));
+
+  /* A barrier is inserted to prevent the compiler from thinking that
+     it has to continue execution after the HALT.*/
+  emit_barrier ();
+
+  insns = get_insns();
+  end_sequence();
+  emit_insn (insns);
+
+  return const0_rtx;
+}
+
+/* Initialise the builtin functions.  Start by initialising
+   descriptions of different types of functions (e.g., void fn(int),
+   int fn(void)), and then use these to define the builtins. */
+void
+picochip_init_builtins (void)
+{
+  tree noreturn;
+
+  tree int_ftype_int, int_ftype_int_int;
+  tree long_ftype_int, long_ftype_int_int_int;
+  tree void_ftype_int_long, int_ftype_int_int_int,
+    void_ftype_long_int_int_int;
+  tree void_ftype_void, unsigned_ftype_unsigned;
+
+  /* void func (void) */
+  void_ftype_void = build_function_type_list (void_type_node, NULL_TREE);
+
+  /* int func (int) */
+  int_ftype_int = build_function_type_list (integer_type_node,
+					    integer_type_node, NULL_TREE);
+
+  /* unsigned int func (unsigned int) */
+  unsigned_ftype_unsigned
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node, NULL_TREE);
+
+  /* int func(int, int) */
+  int_ftype_int_int
+    = build_function_type_list (integer_type_node,
+				integer_type_node, integer_type_node,
+				NULL_TREE);
+
+  /* long func(int) */
+  long_ftype_int = build_function_type_list (long_integer_type_node,
+					     integer_type_node, NULL_TREE);
+
+  /* long func(int, int, int) */
+  long_ftype_int_int_int
+    = build_function_type_list (long_integer_type_node,
+				integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+
+  /* int func(int, int, int) */
+  int_ftype_int_int_int
+    = build_function_type_list (integer_type_node,
+				integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+
+  /* void func(int, long) */
+  void_ftype_int_long
+    = build_function_type_list (void_type_node,
+				integer_type_node, long_integer_type_node,
+				NULL_TREE);
+
+  /* void func(long, int, int, int) */
+  void_ftype_long_int_int_int
+    = build_function_type_list (void_type_node,
+				long_integer_type_node, integer_type_node,
+				integer_type_node, integer_type_node,
+				NULL_TREE);
+
+  /* Initialise the sign-bit-count function. */
+  add_builtin_function ("__builtin_sbc", int_ftype_int,
+			       PICOCHIP_BUILTIN_SBC, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoSbc", int_ftype_int, PICOCHIP_BUILTIN_SBC,
+			       BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* Initialise the bit reverse function. */
+  add_builtin_function ("__builtin_brev", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BREV, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoBrev", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BREV, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise the byte swap function. */
+  add_builtin_function ("__builtin_byteswap", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BYTESWAP, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoByteSwap", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BYTESWAP, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise the ASRI function (note that while this can be coded
+     using a signed shift in C, extra scratch registers are required,
+     which we avoid by having a direct builtin to map to the
+     instruction). */
+  add_builtin_function ("__builtin_asri", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_ASRI, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise saturating addition. */
+  add_builtin_function ("__builtin_adds", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_ADDS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoAdds", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_ADDS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise saturating subtraction. */
+  add_builtin_function ("__builtin_subs", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_SUBS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoSubs", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_SUBS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Scalar comms builtins. */
+  add_builtin_function ("__builtin_get", long_ftype_int,
+			       PICOCHIP_BUILTIN_GET, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_put", void_ftype_int_long,
+			       PICOCHIP_BUILTIN_PUT, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_testport", int_ftype_int,
+			       PICOCHIP_BUILTIN_TESTPORT, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Array comms builtins. */
+  add_builtin_function ("__builtin_put_array",
+			       void_ftype_long_int_int_int,
+			       PICOCHIP_BUILTIN_PUT_ARRAY, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_get_array", long_ftype_int_int_int,
+			       PICOCHIP_BUILTIN_GET_ARRAY, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_testport_array",
+			       int_ftype_int_int_int,
+			       PICOCHIP_BUILTIN_TESTPORT_ARRAY, BUILT_IN_MD,
+			       NULL, NULL_TREE);
+
+  /* Halt instruction. Note that the builtin function is marked as
+     having the attribute `noreturn' so that the compiler realises
+     that the halt stops the program dead. */
+  noreturn = tree_cons (get_identifier ("noreturn"), NULL, NULL);
+  add_builtin_function ("__builtin_halt", void_ftype_void,
+			       PICOCHIP_BUILTIN_HALT, BUILT_IN_MD, NULL,
+			       noreturn);
+  add_builtin_function ("picoHalt", void_ftype_void,
+			       PICOCHIP_BUILTIN_HALT, BUILT_IN_MD, NULL,
+			       noreturn);
+
+}
+
+/* Expand a call to a builtin function. */
+rtx
+picochip_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED,
+			 int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case PICOCHIP_BUILTIN_ASRI:
+      return picochip_expand_builtin_3op (CODE_FOR_builtin_asri, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_ADDS:
+      return picochip_expand_builtin_3op (CODE_FOR_sataddhi3, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_SUBS:
+      return picochip_expand_builtin_3op (CODE_FOR_satsubhi3, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_SBC:
+      return picochip_expand_builtin_2op (CODE_FOR_sbc, exp, target);
+
+    case PICOCHIP_BUILTIN_BREV:
+      return picochip_expand_builtin_2op (CODE_FOR_brev, exp, target);
+
+    case PICOCHIP_BUILTIN_BYTESWAP:
+      return picochip_expand_builtin_2op (CODE_FOR_bswaphi2, exp, target);
+
+    case PICOCHIP_BUILTIN_GET:
+      return picochip_expand_builtin_2op (CODE_FOR_commsGet, exp, target);
+
+    case PICOCHIP_BUILTIN_PUT:
+      return picochip_expand_builtin_2opvoid (CODE_FOR_commsPut, exp);
+
+    case PICOCHIP_BUILTIN_TESTPORT:
+      return picochip_expand_builtin_2op (CODE_FOR_commsTestPort, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_PUT_ARRAY:
+      return picochip_expand_array_put (exp, target);
+
+    case PICOCHIP_BUILTIN_GET_ARRAY:
+      return picochip_expand_array_get (exp, target);
+
+    case PICOCHIP_BUILTIN_TESTPORT_ARRAY:
+      return picochip_expand_array_testport (exp, target);
+
+    case PICOCHIP_BUILTIN_HALT:
+      return picochip_generate_halt ();
+
+    default:
+      gcc_unreachable();
+
+    }
+
+  /* Should really do something sensible here.  */
+  return NULL_RTX;
+}
+
+/* Emit warnings. */
+static void
+picochip_warn_inefficient (const char *msg)
+{
+  if (TARGET_INEFFICIENT_WARNINGS)
+    warning (OPT_minefficient_warnings,
+	     "%s (disable warning using -mno-inefficient-warnings)", msg);
+}
+
+void
+warn_of_byte_access (void)
+{
+  static int warned = 0;
+
+  if (!warned)
+    {
+      picochip_warn_inefficient
+	("byte access is synthesised - consider using MUL AE");
+      warned = 1;
+    }
+
+}
+
+rtx
+picochip_function_value (const_tree valtype, const_tree func,
+                         bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (valtype);
+  int unsignedp = TYPE_UNSIGNED (valtype);
+
+  /* Since we define PROMOTE_FUNCTION_RETURN, we must promote the mode
+     just as PROMOTE_MODE does.  */
+  mode = promote_function_mode (valtype, mode, &unsignedp, func, 1);
+
+  return gen_rtx_REG (mode, 0);
+
+}
+
+/* Check that the value of the given mode will fit in the register of
+   the given mode. */
+int
+picochip_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return regno == CC_REGNUM;
+
+  /* If the CC register is being used, then only CC mode values are
+     allowed (which have already been tested). */
+  if (regno == CC_REGNUM || regno == ACC_REGNUM)
+    return 0;
+
+  /* Must be a valid register. */
+  if (regno > 16)
+    return 0;
+
+  /* Modes QI and HI may be placed in any register except the CC. */
+  if (mode == QImode || mode == HImode)
+    return 1;
+
+  /* DI must be in a quad register. */
+  if (mode == DImode)
+    return (regno % 4 == 0);
+
+  /* All other modes must be placed in a even numbered register. */
+  return !(regno & 1);
+
+}
+
+/* Extract the lower and upper components of a constant value. */
+
+rtx
+picochip_get_low_const (rtx value)
+{
+  return gen_int_mode (INTVAL (value) & 0xFFFF, HImode);
+}
+
+rtx
+picochip_get_high_const (rtx value)
+{
+  /*return GEN_INT ((((INTVAL (value) >> 16) & 0xFFFF) ^ 0x8000) - 0x8000); */
+  return gen_int_mode ((INTVAL (value) >> 16) & 0xFFFF, HImode);
+}
+
+
+/* Loading and storing QImode values to and from memory in a machine
+   without byte access requires might require a scratch
+   register.  However, the scratch register might correspond to the
+   register in which the value is being loaded.  To ensure that a
+   scratch register is supplied which is definitely different to the
+   output register, request a register pair.  This effectively gives a
+   choice of two registers to choose from, so that we a guaranteed to
+   get at least one register which is different to the output
+   register.  This trick is taken from the alpha implementation. */
+static reg_class_t
+picochip_secondary_reload (bool in_p,
+			   rtx x ATTRIBUTE_UNUSED,
+			   reg_class_t cla ATTRIBUTE_UNUSED,
+			   enum machine_mode mode,
+			   secondary_reload_info *sri)
+{
+  if (mode == QImode && !TARGET_HAS_BYTE_ACCESS)
+  {
+    if (in_p == 0)
+      sri->icode = CODE_FOR_reload_outqi;
+    else
+      sri->icode = CODE_FOR_reload_inqi;
+  }
+
+  /* We dont need to return a register class type when we need only a
+     scratch register. It realizes the scratch register type by looking
+     at the instruction definition for sri->icode. We only need to
+     return the register type when we need intermediaries for copies.*/
+  return NO_REGS;
+}
+
+/* Return true if the given memory operand can be aligned to a
+   word+offset memory reference (e.g., FP+3 can be converted into the
+   memory operand FP+2, with the offset 1). */
+int
+picochip_alignable_memory_operand (rtx mem_operand,
+				   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx address;
+
+  /* Not a mem operand. Refuse immediately. */
+  if (MEM != GET_CODE (mem_operand))
+    return 0;
+
+  address = XEXP (mem_operand, 0);
+
+  /* Return true if a PLUS of the SP and a (valid) constant, or SP itself. */
+  return ((PLUS == GET_CODE (address) &&
+	   REGNO (XEXP (address, 0)) == STACK_POINTER_REGNUM &&
+	   CONST_INT == GET_CODE (XEXP (address, 1)) &&
+	   picochip_const_ok_for_letter_p (INTVAL (XEXP (address, 1)), 'K'))
+	  || (REG == GET_CODE (address)
+	      && REGNO (address) == STACK_POINTER_REGNUM));
+
+}
+
+/* Return true if the given memory reference is to a word aligned
+   address.  Currently this means it must be either SP, or
+   SP+offset.  We could replace this function with alignable
+   memory references in the above function?. */
+int
+picochip_word_aligned_memory_reference (rtx operand)
+{
+
+
+  /* The address must be the SP register, or a constant, aligned
+     offset from SP which doesn't exceed the FP+offset
+     restrictions. */
+  return ((PLUS == GET_CODE (operand)
+	   && REGNO (XEXP (operand, 0)) == STACK_POINTER_REGNUM
+	   && picochip_is_aligned (INTVAL (XEXP (operand, 1)), 16)
+           && picochip_const_ok_for_letter_p (INTVAL (XEXP (operand, 1)),
+                                                'K'))
+	  || (REG == GET_CODE (operand)
+	      && REGNO (operand) == STACK_POINTER_REGNUM));
+
+}
+
+/* Given an alignable memory location, convert the memory location
+   into a HI mode access, storing the new memory reference in
+   paligned_mem, and the number of bits by which to shift in pbitnum
+   (i.e., given a reference to FP+3, this creates an aligned reference
+   of FP+2, with an 8-bit shift). This code is a modification of that
+   found in the Alpha port. */
+void
+picochip_get_hi_aligned_mem (rtx ref, rtx * paligned_mem, rtx * pbitnum)
+{
+  rtx base;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (GET_CODE (ref) == MEM);
+
+  if (reload_in_progress && !memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
+    {
+      base = find_replacement (&XEXP (ref, 0));
+
+      gcc_assert(memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    {
+      base = XEXP (ref, 0);
+    }
+
+  if (GET_CODE (base) == PLUS)
+    {
+      offset += INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+    }
+
+  *paligned_mem = widen_memory_access (ref, HImode, (offset & ~1) - offset);
+
+  if (offset > 0)
+    {
+      if (TARGET_DEBUG)
+	{
+	  printf
+	    ("Found non-zero offset in get_hi_aligned_mem - check that the correct value is being used (as this functionality hasn't been exploited yet).\n");
+	}
+    }
+
+  *pbitnum = GEN_INT ((offset & 1) * 8);
+
+}
+
+/* Return true if the given operand is an absolute address in memory
+   (i.e., a symbolic offset). */
+int
+picochip_absolute_memory_operand (rtx op,
+				  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+
+  if (MEM == GET_CODE (op))
+    {
+      rtx address = XEXP (op, 0);
+
+      /* Symbols are valid absolute addresses. */
+      if (SYMBOL_REF == GET_CODE (address))
+	return 1;
+
+      /* Constant offsets to symbols are valid absolute addresses. */
+      if (CONST == GET_CODE (address) &&
+	  PLUS == GET_CODE (XEXP (address, 0)) &&
+	  SYMBOL_REF == GET_CODE (XEXP (XEXP (address, 0), 0)) &&
+	  CONST_INT == GET_CODE (XEXP (XEXP (address, 0), 1)))
+	return 1;
+
+    }
+  else
+    return 0;
+
+  /* Symbols are valid absolute addresses. */
+  if (SYMBOL_REF == GET_CODE (XEXP (op, 0)))
+    return 1;
+
+
+  return 0;
+
+}
+
+void
+picochip_asm_named_section (const char *name,
+			    unsigned int flags ATTRIBUTE_UNUSED,
+			    tree decl ATTRIBUTE_UNUSED)
+{
+  fprintf (asm_out_file, ".section %s\n", name);
+}
+
+
+/* Check if we can make a conditional copy instruction.  This is emitted as an
+   instruction to set the condition register, followed by an instruction which
+   uses the condition registers to perform the conditional move. */
+int
+picochip_check_conditional_copy (rtx * operands)
+{
+
+  rtx branch_op_0 = XEXP (operands[1], 0);
+  rtx branch_op_1 = XEXP (operands[1], 1);
+
+  /* Only HI mode conditional moves are currently allowed.  Can we add
+     SI mode moves? */
+  if (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE)
+    return 0;
+
+  /* Is the comparison valid? Only allow operands which are registers
+     if they are HImode.  SI mode comparisons against 0 could be
+     handled using logical operations (e.g., SIreg != 0 when low ||
+     high). Need to find test cases to provoke this though (fixunssfdi
+     in libgcc does, but is complicated). */
+  if (register_operand(branch_op_0, GET_MODE(branch_op_0)) &&
+      GET_MODE(branch_op_0) != HImode)
+    return 0;
+  if (register_operand(branch_op_1, GET_MODE(branch_op_1)) &&
+      GET_MODE(branch_op_1) != HImode)
+    return 0;
+
+  return 1;
+
+}
+
+
+static rtx
+picochip_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p)
+{
+  rtx addr;
+  if (incoming_p)
+    addr = arg_pointer_rtx;
+  else
+    addr = plus_constant (Pmode, stack_pointer_rtx, -2 * UNITS_PER_WORD);
+  return gen_frame_mem (Pmode, addr);
+}
diff --git a/gcc-4.9/gcc/config/picochip/picochip.h b/gcc-4.9/gcc/config/picochip/picochip.h
new file mode 100644
index 000000000..1daf93c7e
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/picochip.h
@@ -0,0 +1,661 @@
+/* Definitions of target machine for GNU compiler for picoChip
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+   Contributed by Picochip Ltd. (http://www.picochip.com)
+   Maintained by Daniel Towner (daniel.towner@picochip.com) and
+   Hariharan Sandanagobalane (hariharan@picochip.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Which type of DFA scheduling to use - schedule for speed (VLIW), or
+   schedule for space.  When scheduling for space, attempt to schedule
+   into stall cycles, but don't pack instructions. */
+
+enum picochip_dfa_type
+{
+  DFA_TYPE_NONE,
+  DFA_TYPE_SPACE,
+  DFA_TYPE_SPEED
+};
+
+extern enum picochip_dfa_type picochip_schedule_type;
+
+/* Controlling the Compilation Driver */
+
+/* Pass through the save-temps command option. */
+#define LINK_SPEC " %{save-temps:--save-temps}"
+
+/* This is an embedded processor, and only supports a cut-down version of
+ * the standard C library. */
+#define LIB_SPEC "-lpicoC"
+
+/* The start file is automatically provided by the linker. */
+#define STARTFILE_SPEC ""
+
+/* Run-time Target Specification  */
+
+/* Define some additional pre-processor macros. */
+#define TARGET_CPU_CPP_BUILTINS()                       \
+  do                                                    \
+    {                                                   \
+      builtin_define ("NO_TRAMPOLINES");                \
+      builtin_define ("PICOCHIP");                      \
+      builtin_define ("__PICOCHIP__");                      \
+    }                                                   \
+  while (0)
+
+/* Translate requests for particular AEs into their respective ISA
+   options. Note that byte access is enabled by default. */
+#define DRIVER_SELF_SPECS					\
+  "%{mae=ANY:-mmul-type=none -mno-byte-access} %<mae=ANY",	\
+  "%{mae=ANY2:-mmul-type=none -mno-byte-access} %<mae=ANY2",	\
+  "%{mae=ANY3:-mmul-type=none} %<mae=ANY3",			\
+  "%{mae=STAN:-mmul-type=none -mno-byte-access} %<mae=STAN",	\
+  "%{mae=STAN2:-mmul-type=mac -mno-byte-access} %<mae=STAN2",	\
+  "%{mae=STAN3:-mmul-type=mac} %<mae=STAN3",			\
+  "%{mae=MAC:-mmul-type=mac -mno-byte-access} %<mae=MAC",	\
+  "%{mae=MUL:-mmul-type=mul} %<mae=MUL",			\
+  "%{mae=MEM:-mmul-type=mul} %<mae=MEM",			\
+  "%{mae=MEM2:-mmul-type=mul} %<mae=MEM2",			\
+  "%{mae=CTRL:-mmul-type=mul} %<mae=CTRL",			\
+  "%{mae=CTRL2:-mmul-type=mul} %<mae=CTRL2"
+
+/* Specify the default options, so that the multilib build doesn't
+   need to provide special cases for the defaults. */
+#define MULTILIB_DEFAULTS \
+  { "mmul-type=mul", "mbyte-access"}
+
+#define TARGET_HAS_BYTE_ACCESS (picochip_has_byte_access)
+#define TARGET_HAS_MUL_UNIT (picochip_has_mul_unit)
+#define TARGET_HAS_MAC_UNIT (picochip_has_mac_unit)
+#define TARGET_HAS_MULTIPLY (picochip_has_mac_unit || picochip_has_mul_unit)
+
+/* Storage Layout */
+
+/* picoChip processors are 16-bit machines, little endian. */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#define BITS_PER_WORD 16
+#define UNITS_PER_WORD (BITS_PER_WORD / BITS_PER_UNIT)
+
+#define POINTER_SIZE BITS_PER_WORD
+
+/* Promote those modes that are smaller than an int, to int mode.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  ((GET_MODE_CLASS (MODE) == MODE_INT			\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)		\
+      ? (MODE) = HImode : 0)
+
+/* All parameters are at least this aligned.  Parameters are passed
+   one-per-register. */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* The main stack pointer is guaranteed to be aligned to the most
+   strict data alignment. */
+#define STACK_BOUNDARY 32
+
+/* Function entry point is byte aligned. */
+#define FUNCTION_BOUNDARY 8
+
+/* This is the biggest alignment that can be allowed on this machine.
+   Since the STANs have only 256 byte memory, it doesn't make sense
+   to have alignments greater than 32 bytes. Hence the value */
+#define MAX_OFILE_ALIGNMENT 32*8
+
+/* The strictest data object alignment, which repesents a register pair. */
+#define BIGGEST_ALIGNMENT 32
+
+/* The hardware doesn't allow unaligned memory access.  */
+#define STRICT_ALIGNMENT 1
+
+/* We want the 'unix' style bitfield packing algorithm.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Support up to 64-bit integers. */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (DImode)
+
+/* We don't support floating point, but give it a sensible definition. */
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE BITS_PER_WORD
+
+/* The normal sizes for C scalar data. */
+#define CHAR_TYPE_SIZE 8
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+
+/* We don't support the following data types, but still give them
+   sensible values.  */
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 32
+#define LONG_DOUBLE_TYPE_SIZE 32
+
+/* Plain `char' is a signed type, since the hardware sign-extends
+   bytes when loading them from memory into a register. */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Note that the names of the types used in the following macros must
+   be precisely the same as those defined internally in gcc.  For
+   example, `unsigned short' wouldn't work as a type string, since gcc
+   doesn't define any type with this exact string.  The correct string
+   to use is `short unsigned int'. */
+
+#define SIZE_TYPE "unsigned int"
+
+#define PTRDIFF_TYPE "int"
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+#define WINT_TYPE "unsigned int"
+
+/* Register Usage  */
+
+/* Picochip has 16 16-bit registers, a condition code register and an
+   (inaccessible) instruction pointer.  One of these registers (r15) is
+   special, and is either used to load a constant anywhere a register
+   can normally be used, or is used to specify a dummy destination
+   (e.g., when setting condition flags).  We also define some pseudo
+   registers to represent condition codes, the frame pointer and the
+   argument pointer.  The latter two are eliminated wherever possible.
+
+   Pairs of general registers may be combined to form 32-bit registers.
+
+   The picoChip registers are as follows:
+
+   0..1 - function return value
+   0..5 - first 6 function parameters
+   6..11 - General purpose
+   12 - link register
+   13 - stack pointer
+   14 - specialized pointer
+   15 - long constant or /dev/null
+   (16) acc0
+   (17) pseudo condition code
+   (18) pseudo frame pointer
+   (19) pseudo arg pointer
+
+   Registers 0..6, 12, 13, 14, 15 are caller save
+   Registers 0..12, 14 are available to the register allocator.
+
+   In addition, the DSP variant of the ISA allows extra accumulator
+   registers to be accessed.  These are special purpose registers,
+   which are not currently used by the compiler.
+
+  */
+
+/* Basic Characteristics of Registers  */
+
+/* We have 16 hard registers plus 3 pseudo hard registers and an accumulator.  */
+#define FIRST_PSEUDO_REGISTER 20
+
+/* The first non-hard register.  Only used internally by the picoChip port. */
+#define FIRST_NONHARD_REGISTER 18
+
+/* Cannot use SP, CST, CC, FP, AP */
+#define FIXED_REGISTERS {0,0,0,0,0,0,0,0, 0,0,0,0,0,1,0,1, 1,1,1,1}
+
+/* Those that are clobbered by a function call (includes pseudo-regs) */
+#define CALL_USED_REGISTERS {1,1,1,1,1,1,0,0, 0,0,0,0,1,1,0,1, 1,1,1,1}
+#define CALL_REALLY_USED_REGISTERS {1,1,1,1,1,1,0,0, 0,0,0,0,1,1,0,0, 0,1,0,0}
+
+/* Define the number of the picoChip link and condition pseudo registers. */
+#define LINK_REGNUM 12
+#define CC_REGNUM 17
+#define ACC_REGNUM 16
+
+/* Order of Allocation of Registers  */
+
+/* The registers are allocated starting with the caller-clobbered
+   registers, in reverse order.  The registers are then listed in an
+   order which means that they are efficiently saved in pairs (i.e.,
+   one 32-bit store can be used instead of two 16-bit stores to save
+   the registers into the stack). The exception to this is the use of
+   r14 (AP) register, which also appears early on.  This is because the
+   AP register can be used to encode memory operations more
+   efficiently than other registers.  Some code can be made more
+   compact as a result. */
+   /* My current feeling is that r14 should go to the end and maybe even r12.
+   It seems like the overhead of store/load that will occur since we cant
+   pair anything up with r14 will be higher than the advantage of smaller
+   encoding.
+   Also r12 is put towards the end for leaf functions. Since leaf functions
+   do not have any calls, the prologue/epilogue for them wouldnt save up/
+   restore its value. So, it doesn't make sense for us to use it in the middle,
+   if we can avoid it. */
+#define REG_ALLOC_ORDER {5,4,3,2,1,0,12,6,7,8,9,10,11,14,16,0,0,0,0,0}
+#define LEAF_REG_ALLOC_ORDER {5,4,3,2,1,0,6,7,8,9,10,11,14,12,16,0,0,0,0,0}
+
+/* We can dynamically change the REG_ALLOC_ORDER using the following hook.
+   It would be desirable to change it for leaf functions so we can put
+   r12 at the end of this list.*/
+#define ADJUST_REG_ALLOC_ORDER picochip_order_regs_for_local_alloc ()
+
+/* How Values Fit in Registers  */
+
+/* Number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) picochip_regno_nregs((REGNO), (MODE))
+
+/* Is it ok to place MODE in REGNO?  Require that the register number
+   be aligned. */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	picochip_hard_regno_mode_ok(REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1,MODE2) 1
+
+/* Don't copy the cc register ('cos you can't put it back).  */
+#define AVOID_CCMODE_COPIES 1
+
+/* Register Classes */
+
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  FRAME_REGS,			/* registers with a long offset  */
+  PTR_REGS,			/* registers without an offset  */
+  CONST_REGS,			/* registers for long constants  */
+  NULL_REGS,			/* registers which ignore writes  */
+  CC_REGS,			/* condition code registers  */
+  ACC_REGS,			/* Accumulator registers  */
+  TWIN_REGS,			/* registers which can be paired */
+  GR_REGS,			/* general purpose registers */
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES,		/* max value + 1 */
+
+  /* Some aliases  */
+  GENERAL_REGS = GR_REGS
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+
+/* The names of the register classes  */
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "FRAME_REGS",								\
+  "PTR_REGS",								\
+  "CONST_REGS",								\
+  "NULL_REGS", 								\
+  "CC_REGS",								\
+  "ACC_REGS",								\
+  "TWIN_REGS",								\
+  "GR_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Each reg class is an array of 32-bit integers.  Each array must be
+   long enough to store one bit for every pseudo register. Thus in the
+   following code, each array only stores one 32-bit value. */
+#define REG_CLASS_CONTENTS						\
+{									\
+  {0x00000000}, /* no registers */					\
+  {0x00002000},	/* frame */						\
+  {0x00004000},	/* pointer  */						\
+  {0x00008000}, /* const */						\
+  {0x00008000},	/* null  */						\
+  {0x00020000}, /* cc */						\
+  {0x00010000}, /* acc0 */						\
+  {0x00000FFF},	/* twin */						\
+  {0x000CFFFF},	/* general registers - includes pseudo-arg */    	\
+  {0x000FFFFF}	/* all registers - includes pseudo-arg */               \
+}
+
+/* The earliest register class containing the given register.  */
+extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) picochip_regno_reg_class[REGNO]
+
+/* Any register can be a base pointer.  */
+#define BASE_REG_CLASS GR_REGS
+
+/* Any register can be an index.  */
+#define INDEX_REG_CLASS GR_REGS
+
+#define REGNO_OK_FOR_BASE_P(REGNO) 					\
+  (REGNO_REG_CLASS (REGNO) != CC_REGS && REGNO_REG_CLASS (REGNO) != ACC_REGS)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+
+#define CLASS_MAX_NREGS(CLASS, MODE) picochip_class_max_nregs(CLASS, MODE)
+
+
+/* Stack Layout and Calling Conventions  */
+
+#define STACK_GROWS_DOWNWARD 1
+
+/* The frame pointer points to the outgoing argument area, so the
+   locals are above that.  */
+#define STARTING_FRAME_OFFSET 0
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Specify where the return address lives before entry to the
+   prologue.  This is required to enable DWARF debug information to be
+   generated. */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, LINK_REGNUM)
+
+#define RETURN_ADDR_RTX(count,frameaddr) picochip_return_addr_rtx(count,frameaddr)
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LINK_REGNUM)
+
+/* Registers that Address the Stack Frame  */
+
+#define STACK_POINTER_REGNUM 13
+#define FRAME_POINTER_REGNUM 18
+#define ARG_POINTER_REGNUM   19
+
+/* Eliminating Frame Pointer and Arg Pointer.  The frame and argument
+   pointers are eliminated wherever possible, by replacing them with
+   offsets from the stack pointer. */
+
+#define ELIMINABLE_REGS 						\
+  {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+   {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM,TO,OFFSET) \
+  OFFSET = initial_elimination_offset(FROM, TO);
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define PUSH_ARGS 0
+
+/* Passing Arguments in Registers  */
+
+/* Store the offset of the next argument. */
+#define CUMULATIVE_ARGS unsigned
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
+  ((CUM) = 0)
+
+/* The first 6 registers can hold parameters.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) ((REGNO) < 6)
+
+/* How Scalar Function Values are Returned
+   Do we need this?? */
+#define FUNCTION_VALUE(VALTYPE,FUNC) picochip_function_value(VALTYPE, FUNC, 0)
+
+#define LIBCALL_VALUE(MODE) (gen_rtx_REG (MODE, 0))
+
+/* Results are in register zero.  If an SImode register is returned,
+   reg0 will suffice to mean R[0:1]. */
+#define FUNCTION_VALUE_REGNO_P(REGNO) ((REGNO) == 0)
+
+/* Don't automatically pass struct's in memory - use the
+ * RETURN_IN_MEMORY macro to determine when structs are returned in
+ * memory, and when in registers. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Function Entry and Exit  */
+
+/* The epilogue doesn't clobber anything.  */
+#define EPILOGUE_USES(REGNO) 0
+
+/* Generating Code for Profiling.  No profiling implemented  */
+
+#define FUNCTION_PROFILER(FILE,LABELNO)
+
+/* Trampolines for Nested Functions  */
+
+/* No trampolines.  */
+#define TRAMPOLINE_SIZE 0
+
+/* Addressing Modes  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Legitimize reload address tries machine dependent means of
+   reloading addresses.  There seems to be a strange error in gcc,
+   which necessitates this macro.  Consider:
+
+     set (reg A) (symbol_ref)
+     set (reg B) (plus (reg A) (const_int))	
+			
+   A symbol_ref is a valid constant, so the symbol_ref is propagated
+   into the second instruction to generate the instruction:
+
+     set (reg B) (plus (symbol_ref) (const_int))
+
+   This is an invalid address, and find_reloads_address correctly
+   determines this.  However, that function doesn't generate a valid
+   replacement for the now invalid address, and the invalid address is
+   output into the assembly language.  To fix the problem without
+   changing gcc itself, the following macro tests when such an invalid
+   address has been computed, and wraps it up inside a constant rtx.  A
+   constant rtx can be correctly reloaded by the function, and hence
+   correct code is generated. */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	     \
+do {                                                                         \
+  if (picochip_legitimize_reload_address(&X,MODE,OPNUM,TYPE,IND_LEVELS))     \
+    goto WIN;                                                                \
+  } while(0);                                                                \
+
+
+/* Condition Code Status  */
+
+#define CC_STATUS_MDEP unsigned
+#define CC_STATUS_MDEP_INIT (cc_status.mdep = 0)
+
+/* Describing Relative Costs of Operations  */
+
+/* Bytes are no faster than words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* The assembler is often able to optimise function call branches, so
+   don't try to CSE them in the compiler. This was the thinking before.
+   But now, we realise that the benefits from CSE would mostly outweigh
+   the disadvantages. */
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the Output into Sections  */
+
+#define TEXT_SECTION_ASM_OP ".section .text\n"
+#define DATA_SECTION_ASM_OP ".section .data\n"
+#define BSS_SECTION_ASM_OP ".section .bss\n"
+/* picoChip is Harvard (separate data/instruction memories), so
+   read-only data must go into the data section. */
+#define READONLY_DATA_SECTION_ASM_OP ".section .data\n"
+
+/* Defining the Output Assembler Language  */
+
+/* The Overall Framework of an Assembler File  */
+
+#define ASM_FILE_COMMENT "// "
+
+#define ASM_APP_ON "// High-level ASM start\n"
+#define ASM_APP_OFF "// High-level ASM end\n"
+
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT default_asm_output_ident_directive
+
+/* Output of Data  */
+
+#define ASM_OUTPUT_ASCII(FILE, PTR, LEN) picochip_output_ascii(FILE, PTR, LEN);
+
+/* Output of Uninitialized Variables  */
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE,NAME,SIZE,ALIGN) \
+  picochip_output_aligned_common(FILE, NAME, SIZE, ALIGN)
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE,NAME,SIZE,ALIGN) \
+  picochip_output_aligned_local(FILE, NAME, SIZE, ALIGN)
+
+/* Output and Generation of Labels  */
+
+#define ASM_OUTPUT_LABEL(STREAM,NAME) \
+  do { picochip_output_label(STREAM, NAME); } while (0);
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  { picochip_output_labelref(STREAM, NAME); }
+
+/* Format must match that of picochip_output_label. */
+#define ASM_GENERATE_INTERNAL_LABEL(STRING,PREFIX,NUM) \
+ picochip_generate_internal_label(STRING,PREFIX,(long)NUM)
+
+#define ASM_WEAKEN_LABEL(STREAM,NAME) picochip_weaken_label(STREAM,NAME);
+
+/* Store in OUTPUT a string (made with alloca) containing an
+   assembler-name for a local static variable named NAME.  LABELNO is
+   an integer which is different for each call.  The assembler can't
+   use periods to generate the name, so we use a ___ separator
+   instead. */
+
+#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO)  \
+( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 15),    \
+  sprintf ((OUTPUT), "%s___%lu", (NAME), (unsigned long)(LABELNO)))
+
+/* Macros Controlling Initialization Routines  */
+
+/* By defining this, the main function won't try to call `__main'. */
+#define HAS_INIT_SECTION
+
+/* Output of Assembler Instructions  */
+
+#define REGISTER_NAMES							\
+{"R0",  "R1",  "R2",  "R3",						\
+ "R4",  "R5",  "R6",  "R7",						\
+ "R8",  "R9",  "R10", "R11",						\
+ "R12", "FP", "R14", "R15",						\
+ "acc0", "pseudoCC", "pseudoFP", "pseudoAP"}
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "R0",	 0},							\
+  { "R1",	 1},							\
+  { "R2",	 2},							\
+  { "R3",	 3},							\
+  { "R4",	 4},							\
+  { "R5",	 5},							\
+  { "R6",	 6},							\
+  { "R7",	 7},							\
+  { "R8",	 8},							\
+  { "R9",	 9},							\
+  { "R10",	10},							\
+  { "R11",	11},							\
+  { "R12",	12},							\
+  { "FP",	13},							\
+  { "R14",	14},							\
+  { "R15",	15},							\
+  { "acc0",	16},							\
+  { "sp",	12}, /* ABI stack pointer */				\
+  { "ln",	13}, /* arch link register */				\
+  { "ptr",	14}, /* arch constant pointer */			\
+  { "rc",	15}, /* arch constant register */			\
+  { "rz",	15}, /* arch zero */					\
+}
+
+/* Final prescan insn is called just before an instruction is
+   output.  In our case, we use this to detect the VLIW slot to which
+   the instruction has been assigned, preparatory to generating the
+   VLIW output in ASM_OUTPUT_OPCODE. */
+#define FINAL_PRESCAN_INSN(insn, operand, nop) \
+  picochip_final_prescan_insn (insn, operand,nop)
+
+#define ASM_OUTPUT_OPCODE(FILE,PTR) \
+  { PTR = picochip_asm_output_opcode(FILE, PTR); }
+
+#define PRINT_OPERAND(STREAM,X,CODE) \
+  picochip_print_operand(STREAM, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(code) \
+  (((code) == '|') || ((code) == '#') || ((code) == '>'))
+
+#define PRINT_OPERAND_ADDRESS(STREAM,X) \
+  picochip_print_operand_address(STREAM,X)
+
+/* Output of Dispatch Tables  */
+
+/* Initialise a data memory location to an absolute code label.  Used
+   for building switch statement jump tables.  Note - the format of the
+   label must match that of the function picochip_output_label. */
+#define ASM_OUTPUT_ADDR_VEC_ELT(stream, value) \
+  fprintf (stream, ".initWord _L%d\n", value);
+
+/* Assembler Commands for Alignment  */
+
+#define ASM_OUTPUT_SKIP(STREAM,BYTES) \
+  fprintf(STREAM, ".skip "HOST_WIDE_INT_PRINT_UNSIGNED"\n", BYTES);
+#define ASM_OUTPUT_ALIGN(STREAM,POWER) \
+  fprintf(STREAM, ".align %u\n", 1 << POWER);
+
+/* The elaborator doesn't output zero bytes in the text section. */
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* Controlling Debugging Information Format  */
+
+/* Macros Affecting All Debugging Formats  */
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#define DWARF2_DEBUGGING_INFO
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DWARF2_FRAME_INFO 1
+
+/* Generate .file/.loc directives, so that the assembler generates the
+   line table. */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+/* Miscellaneous Parameters  */
+
+#define CASE_VECTOR_MODE HImode
+#define WORD_REGISTER_OPERATIONS
+#define LOAD_EXTEND_OP(MODE) ((MODE) == QImode ? SIGN_EXTEND : ZERO_EXTEND)
+#define MOVE_MAX 4
+#define SHIFT_COUNT_TRUNCATED 1
+#define Pmode HImode
+#define FUNCTION_MODE QImode
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) 1
+
+#define ASM_LONG ":TODO:.word\t"
+
+/* Define builtins for selected special-purpose instructions. */
+enum picochip_builtins
+{
+  PICOCHIP_BUILTIN_SBC,
+  PICOCHIP_BUILTIN_PUT,
+  PICOCHIP_BUILTIN_GET,
+  PICOCHIP_BUILTIN_TESTPORT,
+  PICOCHIP_BUILTIN_COPYSW,
+  PICOCHIP_BUILTIN_ADDS,
+  PICOCHIP_BUILTIN_SUBS,
+  PICOCHIP_BUILTIN_BREV,
+  PICOCHIP_BUILTIN_BYTESWAP,
+  PICOCHIP_BUILTIN_GET_ARRAY,
+  PICOCHIP_BUILTIN_PUT_ARRAY,
+  PICOCHIP_BUILTIN_TESTPORT_ARRAY,
+  PICOCHIP_BUILTIN_ASRI,
+  PICOCHIP_BUILTIN_HALT
+};
+
+#define NO_DOLLAR_IN_LABEL 1
+#define NO_DOT_IN_LABEL 1
+
+/* The assembler does support LEB128, despite the auto-configure test
+   not detecting this. */
+#define HAVE_AS_LEB128 1
+
+/* picochip-unknown-none target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* The End */
diff --git a/gcc-4.9/gcc/config/picochip/picochip.md b/gcc-4.9/gcc/config/picochip/picochip.md
new file mode 100644
index 000000000..f565c665a
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/picochip.md
@@ -0,0 +1,2623 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+
+;; In addition to the normal output operand formats, the following
+;; letter formats are also available:
+;;
+;;  The following can be used for constants, or the constant part of a
+;;  memory offset.
+;;   Q - Output constant unaltered (byte mode).
+;;   M - Alias for Q, which only works with memory operands.
+;;   H - Divide constant by 2 (i.e., HImode is 2 bytes)
+;;   S - Divide constant by 4 (i.e., SImode is 4 bytes)
+;;
+;;  The following can be used for two part addresses (i.e., base +
+;;  offset or base[offset]).
+;;   o - Output offset only.
+;;   b - Output base only.
+;;
+;;  The following are used on SI registers and constants
+;;   R - Output register pair (i.e., R[n:m])
+;;   L - Output lower word/register
+;;   U - Output upper word/register
+;;
+;;  The following are used on DI mode registers.
+;;   X - Output 3rd register
+;;   Y - Output 4th register
+;;
+;;  Miscellaneous
+;;   | - Output VLIW separator
+;;   r - Output register value of memory operand.
+;;   I - Output an opcode (e.g., ADD for plus, LSL for lshift)
+;;   i - Output an opcode in symbolic notation (e.g., + for plus)
+
+;; Define the length of an instruction.  Used to allow different types
+;; of branches to be used for different branch offsets.  Default to 6
+;; bytes, which is the longest possible single instruction.
+(define_attr "length" "" (const_int 6))
+
+;; Define some constants which are used in conjuction with branch
+;; scheduling.  Branches must be 10-bit signed, which equates to
+;; [-512,511]. However, to compensate for the lack of branch alignment
+;; these offsets are reduced by a factor of 2.
+
+(define_constants
+  [
+   (MIN_BRANCH_OFFSET -256)
+   (MAX_BRANCH_OFFSET 255)
+   (SHORT_BRANCH_LENGTH 6)    ; The size of a schedulable short branch.
+   (LONG_BRANCH_LENGTH 16)    ; The size of an expanded JMP?? macro.
+   ]
+)
+
+;; Define identifiers for various special instructions.  These
+;; instructions may then be used in RTL expansions, or builtins.
+(define_constants
+  [
+   ; Special instruction builtins.
+   (UNSPEC_SBC             0) ; Sign-bit count
+   (UNSPEC_ADDS            1) ; Saturating addition
+   (UNSPEC_SUBS            2) ; Saturating subtraction
+   (UNSPEC_BREV            3) ; Bit reversal
+
+   ; Special internal instructions (only used by compiler)
+   (UNSPEC_COPYSW          5) ; Get status word
+   (UNSPEC_ADDC            6) ; Add with carry.
+
+   ; Scalar port communication builtins
+   (UNSPEC_PUT             7) ; Communication (put):       port[op0] := op1
+   (UNSPEC_GET             8) ; Communication (get):       op0 := get_port[op1]
+   (UNSPEC_TESTPORT        9) ; Communication (test):      op0 := testport[op1]
+
+   ; Array port communication builtins.  These all take extra
+   ; arguments giving information about the array access being used.
+   (UNSPEC_PUT_ARRAY      10) ; Array put
+   (UNSPEC_GET_ARRAY      11) ; Array get
+   (UNSPEC_TESTPORT_ARRAY 12) ; Array test port
+
+   ;; Array port expansions
+   (UNSPEC_CALL_GET_ARRAY 13) ;
+   (UNSPEC_CALL_PUT_ARRAY 14) ;
+   (UNSPEC_CALL_TESTPORT_ARRAY 15) ;
+
+   ; Array port low-level fn calls
+   (UNSPEC_CALL_GET_FN  16)
+   (UNSPEC_CALL_TESTPORT_FN  17)
+
+   ; Halt instruction.
+   (UNSPEC_HALT 18)
+
+   ; Internal TSTPORT instruction, used to generate a single TSTPORT
+   ; instruction for use in the testport branch split.
+   (UNSPEC_INTERNAL_TESTPORT        19)
+  ]
+)
+
+;; Register ID's
+(define_constants
+  [
+   (LINK_REGNUM           12) ; Function link register.
+   (CC_REGNUM             17) ; Condition flags.
+   (ACC_REGNUM             16) ; Condition flags.
+   ]
+)
+
+;;============================================================================
+;; Predicates and constraints
+;;============================================================================
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;============================================================================
+;; First operand shifting patterns.  These allow certain instructions
+;; (e.g., add, and, or, xor, sub) to apply a shift-by-constant to
+;; their first operand.
+;;
+;; Note that only the first operand is matched by the shift, to ensure
+;; that non-commutative instructions (like subtract) work
+;; properly.  When a commutative instruction, with a shift in the
+;; second operand is found, the compiler will reorder the operands to
+;; match.
+;;============================================================================
+
+(define_insn "*firstOpGenericAshift"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "picochip_first_op_shift_operator"
+			[(ashift:HI
+			  (match_operand:HI 2 "register_operand" "r")
+			  (match_operand:HI 3 "picochip_J_operand" "J"))
+			 (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [LSL %2,%3],%4,%0\t// %0 := (%2 << %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+		      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+;; During combine, ashift gets converted into a multiply, necessitating the following pattern.
+;; Note that we do a log_2(imm) to get the actual LSL operand.
+
+(define_insn "*firstOpGenericAshift"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (match_operator:HI 1 "picochip_first_op_shift_operator"
+                        [(mult:HI
+                          (match_operand:HI 2 "register_operand" "r")
+                          (match_operand:HI 3 "power_of_2_imm_operand" "n"))
+                         (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [LSL %2,%P3],%4,%0\t// %0 := (%2 << %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+                      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+(define_insn "*firstOpGenericAshiftrt"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "picochip_first_op_shift_operator"
+			[(ashiftrt:HI
+			  (match_operand:HI 2 "register_operand" "r")
+			  (match_operand:HI 3 "picochip_J_operand" "J"))
+			 (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [ASR %2,%3],%4,%0\t// %0 := (%2 >>{arith} %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+		      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+(define_insn "*firstOpGenericLshiftrt"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "picochip_first_op_shift_operator"
+			[(lshiftrt:HI
+			  (match_operand:HI 2 "register_operand" "r")
+			  (match_operand:HI 3 "picochip_J_operand" "J"))
+			 (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [LSR %2,%3],%4,%0\t// %0 := (%2 >> %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+		      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+;;===========================================================================
+;; Jump instructions.
+;;===========================================================================
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "JR (%0)\t// Indirect_jump to %0 %>"
+  [(set_attr "type" "realBranch")
+   (set_attr "length" "3")])
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+  "* return picochip_output_jump(insn);"
+  [(set (attr "length")
+	(if_then_else
+	 (and (ge (minus (match_dup 0) (pc)) (const_int MIN_BRANCH_OFFSET))
+	      (le (minus (match_dup 0) (pc)) (const_int MAX_BRANCH_OFFSET)))
+	 (const_int SHORT_BRANCH_LENGTH)
+	 (const_int LONG_BRANCH_LENGTH)))
+   (set (attr "type")
+	(if_then_else
+	 (eq_attr "length" "6")
+	 (const_string "realBranch")
+	 (const_string "unknown")))])
+
+(define_insn "*fn_return"
+  [(return)
+   (use (reg:HI LINK_REGNUM))]
+  ""
+  "JR (R12)\t// Return to caller %>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "false")])
+
+;; Peephole either 2 LDWs or STWs into LDL/STL.
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+        (match_operand:HI 1 "memory_operand" ""))
+   (set (match_operand:HI 2 "register_operand" "")
+        (match_operand:HI 3 "memory_operand" ""))]
+  "ok_to_peephole_ldw(operands[0],operands[1],operands[2],operands[3])"
+  [(set (match_dup 4) (match_dup 5))]
+  "{
+     operands[4] = gen_min_reg(operands[0],operands[2]);
+     operands[5] = gen_SImode_mem(operands[1],operands[3]);
+   }")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "memory_operand" "")
+        (match_operand:HI 1 "register_operand" ""))
+   (set (match_operand:HI 2 "memory_operand" "")
+        (match_operand:HI 3 "register_operand" ""))]
+  "ok_to_peephole_stw(operands[0],operands[1],operands[2],operands[3])"
+  [(set (match_dup 4) (match_dup 5))]
+  "{
+     operands[4] = gen_SImode_mem(operands[0],operands[2]);
+     operands[5] = gen_min_reg(operands[1],operands[3]);
+   }")
+
+
+;; We have instructions like add,subtract,ior,and that set condition
+;; codes if they are executed on slot 0. If we have
+;;    add a = b + c
+;;    if (a!=0)
+;;    {}
+;; We would have RTL sequence like
+;;    add.# rb,rc,ra   # will be replaced by slot no, after scheduling
+;;    sub.0 ra,0,r15
+;;    bnz
+;; Instead, we can just do
+;;    add.0 rb,rc,ra
+;;    bnz
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (plus:HI (match_operand:HI 1 "register_operand" "")
+                            (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                            [(match_dup 0) (const_int 0)])
+                   (label_ref       (match_operand    6 "" ""))
+                   (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (plus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                   (label_ref (match_dup 6))
+                   (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (plus:HI (match_operand:HI 1 "register_operand" "")
+                     (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+         (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                   [(match_dup 0) (const_int 0)]))
+   (parallel [(set (pc)
+                    (if_then_else
+                          (match_operator 4 "comparison_operator"
+                              [(reg:CC CC_REGNUM) (const_int 0)])
+                     (label_ref (match_operand 5 "" ""))
+                     (pc)))
+               (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (plus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                    (label_ref (match_dup 5))
+                    (pc)))
+              (use (match_dup 6))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+
+;; If peephole happens before the cbranch split
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                    (minus:HI (match_operand:HI 1 "general_operand" "")
+                              (match_operand:HI 2 "register_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                            [(match_dup 0) (const_int 0)])
+                     (label_ref       (match_operand    6 "" ""))
+                     (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (minus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                       (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                        (label_ref (match_dup 6))
+                        (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+
+;; If peephole happens after the cbranch split
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (minus:HI (match_operand:HI 1 "general_operand" "")
+                             (match_operand:HI 2 "register_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+         (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                 [(match_dup 0) (const_int 0)]))
+    (parallel [(set (pc)
+                     (if_then_else
+                         (match_operator 4 "comparison_operator"
+                             [(reg:CC CC_REGNUM) (const_int 0)])
+                      (label_ref (match_operand 5 "" ""))
+                      (pc)))
+                (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (minus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 5))
+                                 (pc)))
+              (use (match_dup 6))])]
+  "{
+      operands[7] = GEN_INT(0);
+   }")
+
+;; If peephole happens before the cbranch split
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (and:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                            [(match_dup 0) (const_int 0)])
+                   (label_ref       (match_operand    6 "" ""))
+                   (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (and:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                       (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 6))
+                                 (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (and:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                    [(match_dup 0) (const_int 0)]))
+  (parallel [(set (pc)
+                  (if_then_else
+                      (match_operator 4 "comparison_operator"
+                         [(reg:CC CC_REGNUM) (const_int 0)])
+                   (label_ref (match_operand 5 "" ""))
+                   (pc)))
+              (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (and:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 5))
+                                 (pc)))
+              (use (match_dup 6))])]
+  "{
+      operands[7] = GEN_INT(0);
+   }")
+
+;; If peephole happens before the cbranch split
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (ior:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                          [(match_dup 0) (const_int 0)])
+                   (label_ref       (match_operand    6 "" ""))
+                   (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ior:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                       (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 6))
+                                 (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (ior:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+              [(match_dup 0) (const_int 0)]))
+  (parallel [(set (pc)
+                  (if_then_else
+                     (match_operator 4 "comparison_operator"
+                        [(reg:CC CC_REGNUM) (const_int 0)])
+                   (label_ref (match_operand 5 "" ""))
+                   (pc)))
+             (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ior:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 5))
+                                 (pc)))
+              (use (match_dup 6))])]
+  "{
+      operands[7] = GEN_INT(0);
+   }")
+
+;; Conditional branch (HI). This is split into separate compare and
+;; branch instructions if scheduling is enabled.  The branch
+;; instruction is supplied with the type of comparison on which the
+;; branch should occur.
+
+(define_insn_and_split "cbranchhi4"
+  [(set (pc)
+        (if_then_else
+            (match_operator 0 "ordered_comparison_operator"
+                            [(match_operand:HI 1 "register_operand" "r")
+                             (match_operand:HI 2 "picochip_comparison_operand" "ri")])
+            (label_ref       (match_operand    3 "" ""))
+            (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "* return picochip_output_cbranch(operands);"
+  "reload_completed
+   && (picochip_schedule_type != DFA_TYPE_NONE || flag_delayed_branch)"
+  [(const_int 0)]
+  "{
+     rtx const_int_opnd;
+     const_int_opnd = GEN_INT(GET_CODE(operands[0]));
+     if (picochip_supported_comparison_operator (operands[0], HImode))
+       emit_insn (gen_supported_compare (operands[0], operands[1], operands[2]));
+     else
+       emit_insn (gen_compare (operands[0], operands[1], operands[2]));
+     emit_jump_insn (gen_branch (operands[3], const_int_opnd, operands[0]));
+   }")
+
+;; The only difference between this and the next pattern is that the next pattern
+;; might introduce subtracts whose first operand is a constant. This would have to
+;; be a longConstant. But, we know that such a situation wouldnt arise for supported
+;; comparison operator and hence this pattern assumes that the second constraint combo
+;; would still generate a normal instruction.
+
+(define_insn "supported_compare"
+  [(set (reg:CC CC_REGNUM)
+        (match_operator:CC 0 "picochip_supported_comparison_operator"
+                        [(match_operand:HI 1 "register_operand" "r,r,r")
+                         (match_operand:HI 2 "picochip_comparison_operand" "r,J,i")]))]
+  ""
+  "* return picochip_output_compare(operands);"
+  [; Must be picoAlu because it sets the condition flags.
+   (set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true")
+   (set_attr "length" "2,2,4")
+   ])
+
+;; This pattern was added to match the previous pattern. When doing if-convert
+;; the pattern generated using movhicc does not have a eq:CC but only a eq for
+;; operator. If this pattern were not to be there, Gcc decides not to use
+;; movhicc at all. Whereas, in Gcc 4.4, it seems to be cleverer.
+(define_insn "*supported_compare1"
+  [(set (reg:CC CC_REGNUM)
+        (match_operator 0 "picochip_supported_comparison_operator"
+                        [(match_operand:HI 1 "register_operand" "r,r,r")
+                         (match_operand:HI 2 "picochip_comparison_operand" "r,J,i")]))]
+  ""
+  "* return picochip_output_compare(operands);"
+  [; Must be picoAlu because it sets the condition flags.
+   (set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true")
+   (set_attr "length" "2,2,4")
+   ])
+
+(define_insn "compare"
+  [(set (reg:CC CC_REGNUM)
+        (match_operator:CC 0 "comparison_operator"
+                        [(match_operand:HI 1 "register_operand" "r,r,r")
+                         (match_operand:HI 2 "picochip_comparison_operand" "r,M,i")]))]
+  ""
+  "* return picochip_output_compare(operands);"
+  [; Must be picoAlu because it sets the condition flags.
+   (set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true,true")
+   (set_attr "length" "2,4,4")
+   ])
+
+; Match a branch instruction, created from a tstport/cbranch split.
+; We use a "use" clause so GCC doesnt try to use this pattern generally.
+(define_insn "branch"
+  [(set (pc)
+        (if_then_else
+            (match_operator 2 "comparison_operator"
+                 [(reg:CC CC_REGNUM) (const_int 0)])
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))
+   (use (match_operand:HI 1 "const_int_operand" ""))]
+  ""
+  "* return picochip_output_branch(operands, insn);"
+  [(set (attr "length")
+        (if_then_else
+         (and (ge (minus (match_dup 0) (pc)) (const_int MIN_BRANCH_OFFSET))
+              (le (minus (match_dup 0) (pc)) (const_int MAX_BRANCH_OFFSET)))
+         (const_int SHORT_BRANCH_LENGTH)
+         (const_int LONG_BRANCH_LENGTH)))
+    (set (attr "type")
+        (if_then_else
+         (eq_attr "length" "6")
+         (const_string "realBranch")
+         (const_string "unknown")))])
+
+;; If a movqi is used which accesses memory on a machine which doesn't
+;; have byte addressing, synthesise the instruction using word load/store
+;; operations. The movqi's that are required during reload phase are
+;; handled using reload_inqi/reload_outqi.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+
+     if (!reload_completed &&
+         !TARGET_HAS_BYTE_ACCESS &&
+         (MEM == GET_CODE(operands[0]) || MEM == GET_CODE(operands[1])))
+     {
+       rtx address;
+       rtx wordAddress;
+       rtx const1;
+       rtx shiftVal;
+       rtx loadedValue;
+       rtx addressMask;
+       rtx topByteValue;
+       rtx signExtendedValue;
+
+
+       warn_of_byte_access();
+
+       /* Load the constant 1 into a register. */
+       const1 = gen_reg_rtx(HImode);
+       emit_insn(gen_rtx_SET(HImode, const1, GEN_INT(1)));
+
+       /* Load the address mask with the bitwise complement of 1. */
+       addressMask = gen_reg_rtx(HImode);
+       emit_insn(gen_rtx_SET(HImode, addressMask, GEN_INT(-2)));
+
+       /* Handle loads first, in case we are dealing with a mem := mem
+        * instruction. */
+       if (MEM == GET_CODE(operands[1]))
+       {
+	 /* Loads work as follows. The entire word containing the desired byte
+          * is loaded. The bottom bit of the address indicates which
+          * byte is required. The desired byte is moved into the most
+          * significant byte, and then an arithmetic shift right
+          * invoked to achieve sign extension. The desired byte is
+          * moved to the MSB by XOR'ing the bottom address bit by 1,
+          * multiplying the result by 8, and then shifting left by
+          * that amount. Note that shifts only operate on the bottom
+          * 4-bits of the source offset, so although the XOR may
+          * produce a value which has its upper bits set, only bit 4
+          * (i.e., the inverted, shifted bottom address bit) actually
+          * gets used.
+          */
+
+         /* Ensure the address is in a register. */
+         address = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, address, XEXP(operands[1], 0)));
+
+         /* Compute the word address by masking out the bottom bit. */
+         wordAddress = gen_reg_rtx(HImode);
+         emit_insn(gen_andhi3(wordAddress, address, addressMask));
+
+         /* Compute the shift value. This is the bottom address bit,
+          * inverted, and multiplied by 8. */
+         shiftVal = gen_reg_rtx(HImode);
+         emit_insn(gen_xorhi3(shiftVal, address, const1));
+         emit_insn(gen_ashlhi3(shiftVal, shiftVal, GEN_INT(3)));
+
+         /* Emit the memory load. */
+         loadedValue = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, loadedValue, gen_rtx_MEM(HImode, wordAddress)));
+
+	 /* Shift the desired byte to the most significant byte. */
+	 topByteValue = gen_reg_rtx (HImode);
+	 emit_insn (gen_ashlhi3 (topByteValue, loadedValue, shiftVal));
+
+         /* Sign extend the top-byte back into the bottom byte. */
+	 signExtendedValue = gen_reg_rtx(HImode);
+         emit_insn(gen_ashrhi3(signExtendedValue, topByteValue, GEN_INT(8)));
+
+         /* Final extraction of QI mode register. */
+        operands[1] = gen_rtx_SUBREG(QImode, signExtendedValue, 0);
+
+       }
+
+       if (MEM == GET_CODE(operands[0]) && GET_CODE(operands[1]) != MEM)
+       {
+         rtx zeroingByteMask;
+         rtx temp;
+         rtx tempHiMode;
+         rtx lsbByteMask;
+
+         /* Get the address. */
+         address = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, address, XEXP(operands[0], 0)));
+
+         /* Compute the word aligned address. */
+         wordAddress = gen_reg_rtx(HImode);
+         emit_insn(gen_andhi3(wordAddress, address, addressMask));
+
+         /* Compute the shift value. */
+         shiftVal = gen_reg_rtx(HImode);
+         emit_insn(gen_andhi3(shiftVal, address, const1));
+         emit_insn(gen_ashlhi3(shiftVal, shiftVal, GEN_INT(3)));
+
+         /* Emit the memory load. */
+         loadedValue = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, loadedValue, gen_rtx_MEM(HImode, wordAddress)));
+
+         /* Zero out the destination bits by AND'ing with 0xFF00
+          * shifted appropriately. */
+         zeroingByteMask = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, zeroingByteMask, GEN_INT(-256)));
+         emit_insn(gen_lshrhi3(zeroingByteMask, zeroingByteMask, shiftVal));
+         emit_insn(gen_andhi3(loadedValue, loadedValue, zeroingByteMask));
+
+	 /* Grab the incoming QI register, and ensure that the top bits
+	  * are zeroed out. This is because the register may be
+	  * storing a signed value, in which case the top-bits will be
+	  * sign bits. These must be removed to ensure that the
+	  * read-modify-write (which uses an OR) doesn't pick up those
+	  * bits, instead of the original memory value which is being
+	  * modified.
+  	  */
+         tempHiMode = simplify_gen_subreg(HImode, operands[1], QImode, 0);
+         temp = gen_reg_rtx(HImode);
+	 emit_insn(gen_rtx_SET(HImode, temp, tempHiMode));
+         lsbByteMask = gen_reg_rtx (HImode);
+	 emit_insn (gen_rtx_SET (HImode, lsbByteMask, GEN_INT (0xFF)));
+	 emit_insn (gen_andhi3 (temp, temp, lsbByteMask));
+
+         /* Shift the incoming byte value by the appropriate amount,
+          * and OR into the load value. */
+         emit_insn(gen_ashlhi3(temp, temp, shiftVal));
+         emit_insn(gen_iorhi3(loadedValue, loadedValue, temp));
+
+         /* Rewrite the original assignment, to assign the new value
+          * to the word address. */
+         operands[0] = gen_rtx_MEM(HImode, wordAddress);
+         operands[1] = loadedValue;
+
+       }
+
+     }
+})
+
+(define_insn "*movqi_sign_extend"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "memory_operand" "a,m")))]
+  "TARGET_HAS_BYTE_ACCESS"
+  "@
+     LDB (%a1),%0\t\t// %0 = Mem(%a1)
+     LDB %a1,%0\t\t// %0 = Mem(%M1{byte})"
+  [(set_attr "type" "mem,mem")
+   (set_attr "longConstant" "true,false")
+   (set_attr "length" "4,4")])
+
+;; movqi instructions for machines with and without byte access.
+(define_insn "*movqi_byte"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,r,r,a,m")
+	(match_operand:QI 1 "general_operand" "r,a,m,I,i,r,r"))]
+  "TARGET_HAS_BYTE_ACCESS"
+  "@
+     COPY.%# %1, %0\t// %0 := %1
+     LDB (%a1),%0\t\t// %0 = Mem(%a1)
+     LDB %a1,%0\t\t// %0 = Mem(%M1{byte})
+     COPY.%# %1,%0\t\t// %0 := #%1 (QI) (short constant)
+     COPY.%# %1,%0\t\t// %0 := #%1 (QI) (long constant)
+     STB %1,(%a0)\t\t// Mem(%a0) := %1
+     STB %1,%a0\t\t// Mem(%M0{byte}) := %1"
+  [(set_attr "type" "basicAlu,mem,mem,basicAlu,basicAlu,mem,mem")
+   (set_attr "longConstant" "false,true,false,false,true,true,false")
+   (set_attr "length" "2,4,4,2,4,4,4")])
+
+;; Machines which don't have byte access can copy registers, and load
+;; constants, but can't access memory.  The define_expand for movqi
+;; should already have rewritten memory accesses using word
+;; operations.  The exception is qi reloads, which are handled using
+;; the reload_? patterns.
+(define_insn "*movqi_nobyte"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(match_operand:QI 1 "picochip_register_or_immediate_operand" "r,i"))]
+  "!TARGET_HAS_BYTE_ACCESS"
+  "@
+     COPY.%# %1,%0\t// %0 := %1
+     COPY.%# %1,%0\t\t// %0 := #%1 (QI)")
+
+(define_insn "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,a,m,r,r")
+	(match_operand:HI 1 "general_operand" "r,a,m,r,r,I,i"))]
+  ""
+  "@
+    COPY.%# %1,%0\t\t// %0 := %1
+    LDW (%a1),%0\t\t// %0 := Mem(%a1)
+    LDW %a1,%0\t\t// %0 = Mem(%M1{byte})
+    STW %1,(%a0)\t\t// Mem(%a0) := %1
+    STW %1,%a0\t\t// Mem(%M0{byte}) := %1
+    COPY.%# %1,%0\t// %0 := %1 (short constant)
+    COPY.%# %1,%0\t// %0 := %1 (long constant)"
+   [(set_attr "type" "basicAlu,mem,mem,mem,mem,basicAlu,basicAlu")
+    (set_attr "longConstant" "false,true,false,true,false,false,true")
+    (set_attr "length" "2,4,4,4,4,2,4")])
+
+(define_insn "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,a,m")
+	(match_operand:SI 1 "general_operand" "r,a,m,i,r,r"))]
+  ""
+  "@
+    // %R0 := %R1 (SI)\n\tCOPY.%# %L1,%L0 %| COPY.1 %U1,%U0
+    LDL (%a1),%R0\t\t// %R0 = Mem(%a1)
+    LDL %a1,%R0\t\t// %R0 = Mem(%M1{byte})
+    // %R0 := #%1 (SI)\n\tCOPY.%# %L1,%L0 %| COPY.%# %U1,%U0
+    STL %R1,(%a0)\t\t// Mem(%a0) := %R1
+    STL %R1,%a0\t\t// Mem(%M0{byte}) := %R1"
+  [(set_attr "type" "unknown,mem,mem,unknown,mem,mem")
+   (set_attr "longConstant" "false,true,false,true,false,false")
+   (set_attr "length" "4,4,4,6,4,4")])
+
+; Split an SI mode register copy into separate HI mode copies, which
+; can be VLIW'd with other instructions.  Only split the instruction
+; when VLIW scheduling is enabled.  Splitting the instruction saves
+; some code space.
+;
+; This is predicated in reload_completed.  This ensures that the
+; instructions aren't broken up too early which can result in the
+; SImode code being converted into inefficient HI mode code.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))]
+  "reload_completed && picochip_schedule_type == DFA_TYPE_SPEED"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "{
+     operands[2] = gen_lowpart (HImode, operands[0]);
+     operands[3] = gen_lowpart (HImode, operands[1]);
+     operands[4] = gen_highpart (HImode, operands[0]);
+     operands[5] = gen_highpart (HImode, operands[1]);
+ }")
+
+; SI Mode split for load constant.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "{
+     operands[2] = gen_lowpart (HImode, operands[0]);
+     operands[3] = picochip_get_low_const(operands[1]);
+     operands[4] = gen_highpart (HImode, operands[0]);
+     operands[5] = picochip_get_high_const(operands[1]);
+ }")
+
+(define_insn "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:SF 1 "general_operand" "r,m,i,r"))]
+  ""
+  "@
+    // %R0 := %R1 (SF)\n\tCOPY.%# %L1,%L0 %| COPY.1 %U1,%U0
+    LDL %a1,%R0\t\t// %R0 :={SF} Mem(%M1{byte})
+    // %R0 := #%1 (SF)\n\tCOPY.%# %L1,%L0\n\tCOPY.%# %U1,%U0
+    STL %R1,%a0\t\t// Mem(%M0{byte}) :={SF} %R1")
+
+;; memcpy pattern
+;; 0 = destination (mem:BLK ...)
+;; 1 = source (mem:BLK ...)
+;; 2 = count
+;; 3 = alignment
+(define_expand "movmemhi"
+  [(match_operand 0 "memory_operand" "")
+  (match_operand 1 "memory_operand" "")
+  (match_operand:HI 2 "immediate_operand" "")
+  (match_operand 3 "" "")]
+  "picochip_schedule_type != DFA_TYPE_NONE"
+  "if (picochip_expand_movmemhi(operands)) DONE; FAIL;"
+)
+
+;;===========================================================================
+;; NOP
+;;===========================================================================
+
+;; No-operation (NOP)
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "NOP\t// nop"
+  [(set_attr "length" "1")])
+
+;;===========================================================================
+;; Function Calls.  Define expands are used to ensure that the correct
+;; type of pattern is emitted, and then the define_insn's match the
+;; pattern using the correct types.
+;;
+;; Note: The comments output as part of these instructions are detected by
+;; the linker. Don't change the comments!
+;;===========================================================================
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand 1 "const_int_operand" ""))
+         (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+(define_insn "call_for_divmod"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand 1 "const_int_operand" ""))]
+  ""
+  "JL (%M0)\t// fn_call %M0%>"
+  [(set_attr "length" "4")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "true")])
+
+(define_insn "*call_using_symbol"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%M0)\t// fn_call %M0%>"
+  [(set_attr "length" "4")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "true")])
+
+(define_insn "*call_using_register"
+  [(call (mem:QI (match_operand:HI 0 "register_operand" "r"))
+	 (match_operand 1 "const_int_operand" ""))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%r0)\t// fn_call_unknown %r0%>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "false")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand:HI       0 "" "")
+	(call:HI (match_operand:QI 1 "memory_operand" "g")
+	      (match_operand 2 "const_int_operand" "")))
+         (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*call_value_using_symbol"
+  [(set (match_operand:HI 0 "" "")
+	(call:HI (mem:QI (match_operand:HI 1 "immediate_operand" "i"))
+	      (match_operand 2 "const_int_operand" "")))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%M1)\t// fn_call %M1 (value return)%>"
+  [(set_attr "length" "4")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "true")])
+
+(define_insn "*call_value_using_register"
+  [(set (match_operand:HI 0 "" "")
+	(call:HI (mem:QI (match_operand:HI 1 "register_operand" "r"))
+	      (match_operand 2 "const_int_operand" "")))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%r1)// fn_call_unknown %r1 (value return)%>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "false")])
+
+;;===========================================================================
+;; Addition
+;;===========================================================================
+
+;; Note that the addition of a negative value is transformed into the
+;; subtraction of a positive value, so that the add/sub immediate slot
+;; can make better use of the 4-bit range.
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r,r,r")
+		 (match_operand:HI 2 "general_operand" "r,M,n,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  {  if (CONST_INT == GET_CODE(operands[2]) &&
+         INTVAL(operands[2]) > -16 &&
+         INTVAL(operands[2]) < 0)
+       return "SUB.%# %1,-(%2),%0\t// %0 := %1 + %2 (HI)";
+     else
+       return "ADD.%# %1,%2,%0\t// %0 := %1 + %2 (HI)";
+  }
+  [(set_attr "type" "basicAlu,basicAlu,basicAlu,basicAlu")
+   (set_attr "longConstant" "false,false,true,true")
+   (set_attr "length" "2,2,4,4")]
+  )
+
+
+;; If we peepholed the compare instruction out, we need to make sure the add
+;; goes in slot 0. This pattern is just to accomplish that.
+
+(define_insn "addhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+        (plus:HI (match_operand:HI 1 "register_operand" "r,r,r,r")
+                 (match_operand:HI 2 "general_operand" "r,M,n,i")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  {  if (CONST_INT == GET_CODE(operands[2]) &&
+         INTVAL(operands[2]) > -16 &&
+         INTVAL(operands[2]) < 0)
+       return "SUB.0 %1,-(%2),%0\t// %0 := %1 + %2 (HI)";
+     else
+       return "ADD.0 %1,%2,%0\t// %0 := %1 + %2 (HI)";
+  }
+  [(set_attr "type" "picoAlu,picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true,true")
+   (set_attr "length" "2,2,4,4")]
+  )
+
+;; Match an addition in which the first operand has been shifted
+;; (e.g., the comms array functions can emit such instructions).
+(define_insn "*addWith1stOpShift"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (match_operand:HI 1 "register_operand" "r,r")
+			    (match_operand:HI 2 "const_int_operand" ""))
+		 (match_operand:HI 3 "immediate_operand" "I,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ADD.0 [LSL %1,%2],%3,%0\t// %0 := (%1 << %2) + %3"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")])
+
+(define_insn_and_split "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r")
+		 (match_operand:SI 2 "general_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// %0 := %1 + %2 (SI)\n\tADD.0 %L1,%L2,%L0\n\tADDC.0 %U1,%U2,%U0"
+  "reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op1_high = gen_highpart (HImode, operands[1]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  rtx op1_low  = gen_lowpart (HImode, operands[1]);
+  rtx op2_high, op2_low;
+
+  if (CONST_INT == GET_CODE(operands[2]))
+  {
+    op2_high = picochip_get_high_const(operands[2]);
+    op2_low = picochip_get_low_const(operands[2]);
+  } else {
+    op2_high = gen_highpart (HImode, operands[2]);
+    op2_low  = gen_lowpart (HImode, operands[2]);
+  }
+
+  operands[4] = gen_add_multi_lower (op0_low, op1_low, op2_low);
+  operands[5] = gen_add_multi_upper (op0_high, op1_high, op2_high);
+
+}")
+
+;; Perform the lowest part of a multi-part addition (SI/DI). This sets
+;; the flags, so is an picoAlu instruction (we could use a
+;; conventional addhi, but the addhi is better off being a treated as
+;; a basicAlu instruction, rather than a picoAlu instruction).
+(define_insn "add_multi_lower"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r,r")
+		 (match_operand:HI 2 "general_operand" "r,M,i")))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (plus:HI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  {  if (CONST_INT == GET_CODE(operands[2]) &&
+         INTVAL(operands[2]) > -16 &&
+         INTVAL(operands[2]) < 0)
+       return "SUB.%# %1,-(%2),%0\t// %0+carry := %1 + %2 (low multi-part)";
+     else
+       return "ADD.%# %1,%2,%0\t// %0+carry := %1 + %2 (low multi-part)";
+  }
+  [(set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true")
+   (set_attr "length" "2,2,4")])
+
+;; Perform the central part of a multi-part addition (DI). This uses
+;; the CC register, and also sets the CC register, so needs to be
+;; placed in the first ALU slot.  Note that the ADDC must
+;; use the long constant to represent immediates.
+(define_insn "add_multi_mid"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (plus:HI (match_operand:HI 2 "general_operand" "r,i")
+			  (reg:CC CC_REGNUM))))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (plus:HI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "ADDC.%# %1,%2,%0\t// %0+carry := carry + %1 + %2 (mid multi-part)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;; Perform the highest part of a multi-part addition (SI/DI). This
+;; uses the CC register, but doesn't require any registers to be set,
+;; so may be scheduled in either of the ALU's.  Note that the ADDC must
+;; use the long constant to represent immediates.
+(define_insn "add_multi_upper"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (plus:HI (match_operand:HI 2 "general_operand" "r,i")
+			  (reg:CC CC_REGNUM))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ADDC.%# %1,%2,%0\t// %0 := carry + %1 + %2 (high multi-part)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;; The lea instruction is a special type of add operation, which looks
+;; like a movhi (reg := address). It expands into reg := fp +
+;; offset.  Ideally there should be two variants, which take different
+;; sized offsets (i.e., using the long constant, or not, as
+;; appropriate).  However, the address operand may have arbitrary
+;; values added to it later (i.e., the AP will be eliminated, possibly
+;; converting a small offset into a long offset), so a long offset is
+;; always assumed.
+
+;; Note that the lea can use an addition, and hence may modify the CC
+;; register.  This upsets scheduling, so instead the lea is placed in
+;; ALU 1 where it cannot modify CC.
+
+(define_insn "*lea_add"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r")
+       (plus:HI (match_operand:HI 1 "register_operand" "r")
+		(match_operand:HI 2 "immediate_operand" "i")))]
+ ""
+ "ADD.1 %1,%2,%0\t// lea (add)")
+
+;; Note that, though this instruction looks similar to movhi pattern,
+;; "p" constraint cannot be specified for operands other than 
+;; address_operand, hence the extra pattern below.
+(define_insn "*lea_move"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r")
+        (match_operand:HI 1 "address_operand" "p"))]
+  ""
+  {
+    if (REG == GET_CODE(operands[1]))
+      return "COPY.1 %1,%0\t// %0 := %1 (lea)";
+    else
+      return "ADD.1 %b1,%o1,%0\t\t// %0 := %b1 + %o1 (lea)";
+  }
+  [(set_attr "type" "nonCcAlu")
+   (set_attr "longConstant" "true")
+   (set_attr "length" "4")])
+
+
+;;===========================================================================
+;; Subtraction.  Note that these patterns never take immediate second
+;; operands, since those cases are handled by canonicalising the
+;; instruction into the addition of a negative costant.
+;; But, if the first operand needs to be a negative constant, it
+;; is supported here.
+;;===========================================================================
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,I,i")
+		  (match_operand:HI 2 "register_operand" "r,r,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "SUB.%# %1,%2,%0 // %0 := %1 - %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true,true")
+   (set_attr "length" "2,4,4")])
+
+;; If we peepholed the compare instruction out, we need to make sure the
+;; sub goes in slot 0. This pattern is just to accomplish that.
+
+(define_insn "subhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,I,i")
+		  (match_operand:HI 2 "register_operand" "r,r,r")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  "SUB.0 %1,%2,%0 // %0 := %1 - %2 (HI)"
+  [(set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true,true")
+   (set_attr "length" "2,4,4")])
+
+(define_insn_and_split "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "general_operand" "r,i")
+		  (match_operand:SI 2 "register_operand" "r,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// %0 := %1 - %2 (SI)\n\tSUB.%# %L1,%L2,%L0\n\tSUBB.%# %U1,%U2,%U0"
+  "reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  rtx op2_high = gen_highpart (HImode, operands[2]);
+  rtx op2_low = gen_lowpart (HImode, operands[2]);
+  rtx op1_high,op1_low;
+
+  if (CONST_INT == GET_CODE(operands[1]))
+  {
+    op1_high = picochip_get_high_const(operands[1]);
+    op1_low = picochip_get_low_const(operands[1]);
+  } else {
+    op1_high = gen_highpart (HImode, operands[1]);
+    op1_low  = gen_lowpart (HImode, operands[1]);
+  }
+
+
+  operands[4] = gen_sub_multi_lower (op0_low, op1_low, op2_low);
+  operands[5] = gen_sub_multi_upper (op0_high, op1_high, op2_high);
+
+}")
+
+;; Match the patterns emitted by the multi-part subtraction splitting.
+;; This sets the CC register, so it needs to go into slot 0.
+(define_insn "sub_multi_lower"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,i")
+		  (match_operand:HI 2 "register_operand" "r,r")))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (minus:HI (match_dup 1) (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "SUB.%# %1,%2,%0\t// %0+carry := %1 - %2 (lower SI)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;; Perform the central part of a multi-part addition (DI). This uses
+;; the CC register, and also sets the CC register, so needs to be
+;; placed in the first ALU.
+(define_insn "sub_multi_mid"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,i")
+		  (minus:HI (match_operand:HI 2 "register_operand" "r,r")
+			    (reg:CC CC_REGNUM))))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (minus:HI (match_dup 1)
+			      (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "SUBB.%# %1,%2,%0\t// %0+carry := carry - %1 - %2 (mid multi-part)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+(define_insn "sub_multi_upper"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,i")
+		  (minus:HI (match_operand:HI 2 "register_operand" "r,r")
+			    (reg:CC CC_REGNUM))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "SUBB.%# %1,%2,%0\t// %0 := carry - %1 - %2 (upper SI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;;===========================================================================
+;; Multiplication (signed)
+;;===========================================================================
+
+(define_insn "multiply_machi"
+  [(set (reg:HI ACC_REGNUM)
+        (mult:HI (match_operand:HI 0 "register_operand" "r,r")
+                 (match_operand:HI 1
+                        "picochip_register_or_immediate_operand" "r,i")))]
+  "TARGET_HAS_MAC_UNIT"
+  "MUL %0,%1,acc0\t// acc0 := %0 * %1 (signed)"
+  [(set_attr "length" "3,5")
+   (set_attr "type" "mac,mac")
+   (set_attr "longConstant" "false,true")])
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "picochip_register_or_immediate_operand" "")))]
+  "TARGET_HAS_MULTIPLY"
+  "")
+
+;; Different types of mulhi, depending on the AE type. If the AE has MUL unit,
+;; use the following pattern.
+(define_insn "*mulhi3_mul"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (match_operand:HI 2
+			"picochip_register_or_immediate_operand" "r,i")))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULL %1,%2,%0 // %0 := %1 * %2 (HI)"
+  [(set_attr "length" "3,5")
+   (set_attr "type" "mul,mul")
+   (set_attr "longConstant" "false,true")])
+
+;; If the AE has MAC unit, instead, use the following pattern.
+(define_insn_and_split "*mulhi3_mac"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (match_operand:HI 2
+			"picochip_register_or_immediate_operand" "r,i")))]
+  "TARGET_HAS_MAC_UNIT"
+  "// %0 := %1 * %2\n\tMUL %1,%2,acc0\n\tREADACC acc0,frac,%0"
+  "TARGET_HAS_MAC_UNIT && reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+    rtx const_rtx = GEN_INT(0);
+    operands[3] = (gen_multiply_machi(operands[1], operands[2]));
+    operands[4] = (gen_movhi_mac(operands[0],const_rtx));
+} "
+)
+
+(define_insn "umultiply_machisi"
+  [(set (reg:SI ACC_REGNUM)
+	(mult:SI (zero_extend:SI (match_operand:HI 0 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "MULUU %0,%1,acc0\t// acc0 := %0 * %1 (unsigned)"
+  [(set_attr "length" "3")
+   (set_attr "type" "mac")
+   (set_attr "longConstant" "false")])
+
+(define_insn "multiply_machisi"
+  [(set (reg:SI ACC_REGNUM)
+        (mult:SI (sign_extend:SI (match_operand:HI 0 "register_operand" "r,r"))
+                 (sign_extend:SI (match_operand:HI 1
+                        "picochip_register_or_immediate_operand" "r,i"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "MUL %0,%1,acc0\t// acc0 := %0 * %1 (signed)"
+  [(set_attr "length" "3,5")
+   (set_attr "type" "mac,mac")
+   (set_attr "longConstant" "false,true")])
+
+;; We want to prevent GCC from thinking ACC is a normal register and using
+;; this pattern. We want it to be used only when you use MAC unit 
+;; multiplication. Added a "use" clause for that sake.
+(define_insn "movsi_mac"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+        (reg:SI ACC_REGNUM))
+    (use (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_HAS_MAC_UNIT"
+  "READACC32 acc0,%R0 \t// %0 := acc0 "
+  [(set_attr "length" "3")
+   (set_attr "type" "mac")
+   (set_attr "longConstant" "false")])
+
+;; We want to prevent GCC from thinking ACC is a normal register and using
+;; this pattern. We want it to be used only when you use MAC unit 
+;; multiplication. Added a "use" clause for that sake.
+(define_insn "movhi_mac"
+   [(set (match_operand:HI 0 "register_operand" "=r")
+        (reg:HI ACC_REGNUM) )
+    (use (match_operand:HI 1 "const_int_operand" ""))]
+  "TARGET_HAS_MAC_UNIT"
+  "READACC acc0,frac,%0 \t// %0 := acc0 "
+  [(set_attr "length" "3")
+   (set_attr "type" "mac")
+   (set_attr "longConstant" "false")])
+
+;; 16-bit to 32-bit widening signed multiplication.
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MULTIPLY"
+  ""
+)
+
+(define_insn_and_split "*mulhisi3_mul"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MUL_UNIT"
+  "// %0 := %1 * %2 (HI->SI)\;MULL %1,%2,%L0\;MULH %1,%2,%U0";
+  "TARGET_HAS_MUL_UNIT && reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  operands[3] = gen_mulhisi3_mul_lower(op0_low,operands[1],operands[2]);
+  operands[4] = gen_mulhisi3_mul_higher(op0_high,operands[1],operands[2]);
+}
+  "
+)
+
+(define_insn "mulhisi3_mul_lower"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))) 0))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULL %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn "mulhisi3_mul_higher"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))) 2))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULH %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn_and_split "*mulhisi3_mac"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "// %0 := %1 * %2 (HI->SI) STAN2\;MUL %1,%2,acc0\;READACC32 acc0,%R0";
+  "TARGET_HAS_MAC_UNIT && reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+    rtx const_rtx = gen_int_mode(0,SImode);
+    operands[3] = (gen_multiply_machisi(operands[1], operands[2]));
+    operands[4] = (gen_movsi_mac(operands[0],const_rtx));
+} "
+)
+		
+;;===========================================================================
+;; Widening multiplication (unsigned)
+;;===========================================================================
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MULTIPLY"
+  ""
+)
+
+(define_insn_and_split "*umulhisi3_mul"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MUL_UNIT"
+  "// %0 := %1 * %2 (uHI->uSI Type 1)\;MULUL %1,%2,%L0\n\tMULUH %1,%2,%U0";
+  "TARGET_HAS_MUL_UNIT && reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  operands[3] = gen_umulhisi3_mul_lower(op0_low,operands[1],operands[2]);
+  operands[4] = gen_umulhisi3_mul_higher(op0_high,operands[1],operands[2]);
+}
+  "
+  )
+
+(define_insn "umulhisi3_mul_lower"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))) 0))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULUL %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn "umulhisi3_mul_higher"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))) 2))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULUH %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn_and_split "*umulhisi3_mac"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "// %0 := %1 * %2 (uHI->uSI Type 3)\;MULUU %1,%2,acc0\;READACC32 acc0,%R0";
+  "TARGET_HAS_MAC_UNIT && reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+    rtx const_rtx = gen_int_mode(0,SImode);
+    operands[3] = (gen_umultiply_machisi(operands[1], operands[2]));
+    operands[4] = (gen_movsi_mac(operands[0],const_rtx));
+} "
+)
+
+;;===========================================================================
+;; Division (signed)
+;;===========================================================================
+
+;; Perform a divmod operation as a function call.  This results in some
+;; registers being clobbered (r0-6, r12 - ignore r13,14 as these are
+;; known not to be affected).
+(define_expand "divmodhi4"
+  [
+   ; Copy the inputs to r0 and r1.
+   (set (reg:HI 0) (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 1) (match_operand:HI 2 "register_operand" ""))
+   ; Make the function call - note that r12 (link) is clobbered. Note also
+   ; that an explicit call is generated. This ensures that gcc notices that
+   ; any function containing a div/mod is not a leaf function. 
+   (parallel [(match_dup 4)
+	      (set (reg:HI 0) (div:HI (reg:HI 0) (reg:HI 1)))
+              (set (reg:HI 1) (mod:HI (reg:HI 0) (reg:HI 1)))
+              (clobber (reg:HI 2))
+              (clobber (reg:HI 3))
+              (clobber (reg:HI 4))
+              (clobber (reg:HI 5))
+              (clobber (reg:HI 12))
+              (clobber (reg:CC CC_REGNUM))
+	      ])
+   ; Set the quotient (returned in register 0)
+   (set (match_operand:HI 0 "register_operand" "") (reg:HI 0))
+   ; Set the remainder (returned in register 1)
+   (set (match_operand:HI 3 "register_operand" "") (reg:HI 1))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_divmodhi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+; Match a call to divmodhi4.  As this is a call, the link register
+; (r12), and registers r0-5 must be clobbered.  Ignore clobbering of
+; r13/4 as these aren't used by the divide function).
+(define_insn "*divmodhi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:HI 0) (div:HI (reg:HI 0) (reg:HI 1)))
+   (set (reg:HI 1) (mod:HI (reg:HI 0) (reg:HI 1)))
+   (clobber (reg:HI 2))
+   (clobber (reg:HI 3))
+   (clobber (reg:HI 4))
+   (clobber (reg:HI 5))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))
+]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+;; Perform a udivmod operation as a function call.  This results in some
+;; registers being clobbered (r0-6, r12 - ignore r13,14 as these are
+;; known not to be affected).
+(define_expand "udivmodhi4"
+  [
+   ; Copy the inputs to r0 and r1.
+   (set (reg:HI 0) (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 1) (match_operand:HI 2 "register_operand" ""))
+   ; Make the function call - note that r12 (link) is clobbered. Note also
+   ; that an explicit call is generated. This ensures that gcc notices that
+   ; any function containing a div/mod is not a leaf function. 
+   (parallel [(match_dup 4)
+	      (set (reg:HI 0) (udiv:HI (reg:HI 0) (reg:HI 1)))
+              (set (reg:HI 1) (umod:HI (reg:HI 0) (reg:HI 1)))
+              (clobber (reg:HI 2))
+              (clobber (reg:HI 3))
+              (clobber (reg:HI 4))
+              (clobber (reg:HI 5))
+              (clobber (reg:HI 12))
+              (clobber (reg:CC CC_REGNUM))
+	      ])
+   ; Set the quotient (returned in register 0)
+   (set (match_operand:HI 0 "register_operand" "") (reg:HI 0))
+   ; Set the remainder (returned in register 1)
+   (set (match_operand:HI 3 "register_operand" "") (reg:HI 1))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_udivmodhi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+; Match a call to udivmodhi4.  As this is a call, the link register
+; (r12), and registers r0-5 must be clobbered.  Ignore clobbering of
+; r13/4 as these aren't used by the divide function).
+(define_insn "*udivmodhi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:HI 0) (udiv:HI (reg:HI 0) (reg:HI 1)))
+   (set (reg:HI 1) (umod:HI (reg:HI 0) (reg:HI 1)))
+   (clobber (reg:HI 2))
+   (clobber (reg:HI 3))
+   (clobber (reg:HI 4))
+   (clobber (reg:HI 5))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+(define_expand "udivmodsi4"
+  [
+   ; Make the function call
+   (set (reg:SI 0) (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 2) (match_operand:SI 2 "register_operand" ""))
+   (parallel [
+     (match_dup 4)
+     (set (reg:SI 4) (udiv:SI (reg:SI 0) (reg:SI 2)))
+     (set (reg:SI 6) (umod:SI (reg:SI 0) (reg:SI 2)))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 2))
+     (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 4))
+   (set (match_operand:SI 3 "register_operand" "") (reg:SI 6))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_udivmodsi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+(define_insn "*udivmodsi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:SI 4) (udiv:SI (reg:SI 0) (reg:SI 2)))
+   (set (reg:SI 6) (umod:SI (reg:SI 0) (reg:SI 2)))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 2))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+(define_expand "divmodsi4"
+  [
+   ; Make the function call
+   (set (reg:SI 0) (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 2) (match_operand:SI 2 "register_operand" ""))
+   (parallel [
+     (match_dup 4)
+     (set (reg:SI 4) (div:SI (reg:SI 0) (reg:SI 2)))
+     (set (reg:SI 6) (mod:SI (reg:SI 0) (reg:SI 2)))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 2))
+     (clobber (reg:HI 12))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 4))
+   (set (match_operand:SI 3 "register_operand" "") (reg:SI 6))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_divmodsi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+(define_insn "*divmodsi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:SI 4) (div:SI (reg:SI 0) (reg:SI 2)))
+   (set (reg:SI 6) (mod:SI (reg:SI 0) (reg:SI 2)))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 2))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+;;===========================================================================
+;; Bitwise AND.  The QI/SI mode instructions are automatically
+;; synthesised from the HI mode instruction.
+;;===========================================================================
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(and:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "general_operand" "r,n")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "AND.%# %1,%2,%0 // %0 := %1 AND %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;; If we peepholed the compare instruction out, we need to make sure the
+;; "and" goes in slot 0. This pattern is just to accomplish that.
+
+(define_insn "andhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (and:HI (match_operand:HI 1 "register_operand" "r,r")
+                (match_operand:HI 2 "general_operand" "r,n")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  "AND.0 %1,%2,%0 // %0 := %1 AND %2 (HI)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;;===========================================================================
+;; Bitwise inclusive-OR.  The QI mode instruction is automatically
+;; synthesised from the HI mode instruction.
+;;===========================================================================
+
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "register_operand" "r,n")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "OR.%# %1,%2,%0 // %0 := %1 IOR %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+(define_insn "iorhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (ior:HI (match_operand:HI 1 "register_operand" "r,r")
+                (match_operand:HI 2 "general_operand" "r,n")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  "OR.0 %1,%2,%0 // %0 := %1 IOR %2 (HI)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;;===========================================================================
+;; Bitwise exclusive-OR.  The QI/SI mode instructions are automatically
+;; synthesised from the HI mode instruction.
+;;===========================================================================
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(xor:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "picochip_register_or_immediate_operand" "r,n")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "XOR.%# %1,%2,%0 // %0 := %1 XOR %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;;===========================================================================
+;; Arithmetic shift left.
+;;===========================================================================
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "general_operand" "r,J")))]
+  ""
+  "LSL.%# %1,%2,%0 // %0 := %1 << %2"
+  [(set_attr "type" "picoAlu,basicAlu")
+   (set_attr "length" "3,3")])
+
+;;===========================================================================
+;; Arithmetic shift right.
+;;===========================================================================
+
+(define_insn "builtin_asri"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "r")
+		     (match_operand:HI 2 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ASR.%# %1,%2,%0\t// %0 = %1 >>{arith} %2"
+  [(set_attr "type" "basicAlu")
+   (set_attr "length" "3")])
+
+;; The picoChip ISA doesn't have a variable arithmetic shift right, so
+;; synthesise it.  Shifts by constants are directly supported.
+
+(define_expand "ashrhi3"
+  [(match_operand:HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:HI 2 "picochip_register_or_immediate_operand" "")]
+  ""
+{
+  if (GET_CODE(operands[2]) == CONST_INT)
+    /* Shift by constant is easy. */
+    emit_insn (gen_builtin_asri (operands[0], operands[1], operands[2]));
+  else
+  {
+    /* Synthesise a variable shift. */
+
+    rtx tmp1;
+    rtx tmp2;
+    rtx tmp3;
+    rtx minus_one;
+    rtx tmp4;
+
+    /* Fill a temporary with the sign bits. */
+    tmp1 = gen_reg_rtx (HImode);
+    emit_insn (gen_builtin_asri (tmp1, operands[1], GEN_INT(15)));
+
+    /* Shift the unsigned value. */
+    tmp2 = gen_reg_rtx (HImode);
+    emit_insn (gen_lshrhi3 (tmp2, operands[1], operands[2]));
+
+    /* The word of sign bits must be shifted back to the left, to zero
+     * out the unwanted lower bits.  The amount to shift left by is (15 -
+     * count). Since the shifts are computed modulo 16 (i.e., only the
+     * lower 4 bits of the count are used), the shift amount (15 - count)
+     * is equivalent to !count. */
+    tmp3 = gen_reg_rtx (HImode);
+    minus_one = GEN_INT (-1);
+    emit_insn (gen_xorhi3 (tmp3, operands[2], minus_one));
+    tmp4 = gen_reg_rtx (HImode);
+    emit_insn (gen_ashlhi3 (tmp4, tmp1, tmp3));
+
+    /* Combine the sign bits with the shifted value. */
+    emit_insn (gen_iorhi3 (operands[0], tmp2, tmp4));
+
+  }
+  DONE;
+})
+
+;;===========================================================================
+;; Logical shift right.
+;;===========================================================================
+
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "general_operand" "r,J")))]
+  ""
+  "LSR.%# %1,%2,%0 // %0 := %1 >> %2"
+  [(set_attr "type" "picoAlu,basicAlu")
+   (set_attr "length" "3,3")])
+
+;;===========================================================================
+;; Negate.
+;;===========================================================================
+
+;; Negations are performed by subtracting from the constant 0, which
+;; is loaded into a register.  By using a register containing 0, the
+;; chances of being able to CSE with another 0 value are increased.
+
+(define_expand "neghi2"
+  [(set (match_dup 2) (match_dup 3))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=r")
+		   (minus:HI (match_dup 2)
+			     (match_operand:HI 1 "register_operand" "r")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "operands[2] = gen_reg_rtx(HImode);
+   operands[3] = GEN_INT(0x00000000);")
+
+(define_expand "negsi2"
+  [(set (match_dup 2) (match_dup 3))
+   (parallel [(set (match_operand:SI 0 "register_operand" "=r")
+		   (minus:SI (match_dup 2)
+			     (match_operand:SI 1 "register_operand" "r")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "operands[2] = gen_reg_rtx(SImode);
+   operands[3] = GEN_INT(0x00000000);")
+
+;;===========================================================================
+;; Absolute value. Taken from the Hacker's Delight, page 17. The second of the
+;; four options given there produces the smallest, fastest code.
+;;===========================================================================
+
+(define_insn_and_split "abshi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+   (abs:HI (match_operand:HI 1 "register_operand" "")))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 2)
+                  (plus:HI (ashiftrt:HI (match_dup 1) (const_int 15))
+			   (match_dup 1)))
+             (clobber (reg:CC CC_REGNUM))])
+  (parallel [(set (match_dup 0)
+                  (xor:HI (ashiftrt:HI (match_dup 1) (const_int 15))
+			  (match_dup 2)))
+             (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = gen_reg_rtx (HImode);
+})
+
+;;===========================================================================
+;; Bitwise complement.  Use auto-synthesised variant for SI mode. Though this
+;; internally uses xor, the compiler doesnt automatically synthesize it using
+;; xor, if this pattern was removed.
+;;===========================================================================
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (not:HI (match_operand:HI 1 "register_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "XOR.%# %1,-1,%0 // %0 := ~%1"
+  [(set_attr "type" "basicAlu")
+   (set_attr "longConstant" "true")
+   (set_attr "length" "5")])
+
+;;===========================================================================
+;; Count leading zeros. The special sign-bit-count instruction can be used
+;; to help us here.
+;;    op1:=clz(op1)
+;; The code works by checking to see if the top bit is set. If it is,
+;; then there are no leading zeros. If the top bit is cleared, then
+;; the SBC instruction is used to determine how many more leading
+;; zeros are present, and adding one more for the initial zero.
+;;===========================================================================
+
+(define_insn "clzhi2"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(clz:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "// Count leading zeros\;SBC %1,%0\;ASR.0 %1,15,r15 %| ADD.1 %0,1,%0\;COPYNE 0,%0"
+  [(set_attr "length" "11")])
+
+;;===========================================================================
+;; Count trailing zeros. This can be achieved efficiently by reversing
+;; using the bitrev instruction, and then counting the leading zeros as
+;; described above.
+;;===========================================================================
+
+(define_insn "ctzhi2"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(ctz:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "// Count trailing zeros\;BREV %1,%0\;SBC %0,%0\;AND.0 %1,0x0001,r15 %| ADD.1 %0,1,%0\;COPYNE 0,%0"
+  [(set_attr "length" "15")])
+
+;;===========================================================================
+;; Find the first set bit, starting from the least significant bit position.
+;; This is very similar to the ctz function, except that the bit index is one
+;; greater than the number of trailing zeros (i.e., SBC + 2), and the
+;; result of ffs on the zero value is defined.
+;;===========================================================================
+
+(define_insn "ffshi2"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(ffs:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "// First first bit\;BREV %1,%0\;SBC %0,%0\;AND.0 %1,0x0001,r15 %| ADD.1 %0,2,%0\;COPYNE 1,%0\;SUB.0 %1,0x0000,r15\;COPYEQ 0,%0"
+  [(set_attr "length" "20")])
+
+;;===========================================================================
+;; Tablejump Instruction.  Jump to an absolute address.
+;;===========================================================================
+
+(define_insn "tablejump"
+  [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "r")] 1))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  ""
+  "JR (%0)\t // Table jump to %0 %>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")])
+
+;; Given the memory address of a QImode value, and a scratch register,
+;; store the memory operand into the given output operand.  The scratch
+;; operand will not conflict with either of the operands.  The other
+;; two operands may conflict with each other.
+
+(define_insn "synthesised_loadqi_unaligned"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (match_operand:QI 1 "memory_operand" "m"))
+   (clobber (match_operand:HI 2 "register_operand" "=&r"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// Synthesised loadqi %0 = Mem(%1) (Scratch %2)\n\tAND.0 %1,-2,%2\n\tLDW (%2)0,%0 %| AND.0 %1,1,%2\n\tLSL.0 %2,3,%2\n\tSUB.0 8,%2,%2\n\tLSL.0 %0,%2,%0\n\tASR.0 %0,8,%0"
+  ; Approximate length only.  Probably a little shorter than this.
+  [(set_attr "length" "40")])
+
+;; Given a memory operand whose alignment is known (the HImode aligned
+;; base is operand 0, and the number of bits by which to shift is in
+;; operand 5),
+(define_expand "synthesised_storeqi_aligned"
+  [; s1 = mem_op
+   (set (match_operand:HI 2 "register_operand" "")
+	(match_operand:HI 0 "memory_operand" ""))
+   ; s1 = s1 and mask
+   (parallel [(set (match_dup 2) (and:HI (match_dup 2) (match_dup 5)))
+   (clobber (reg:CC CC_REGNUM))])
+   ; s2 = source << bitShift
+   (set (match_dup 3)
+	(ashift:HI (subreg:HI (match_operand:QI 1 "register_operand" "") 0)
+		   (match_operand:HI 4 "const_int_operand" "")))
+   ; s1 = s1 or s2
+   (parallel [(set (match_dup 2) (ior:HI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC CC_REGNUM))])
+   ; mem_op = s1
+   (set (match_dup 0) (match_dup 2))]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  /* Create the byte mask 0xFF00. */
+  operands[5] = gen_int_mode(((~0xFF) >> INTVAL (operands[4])), HImode);
+})
+
+;; Reload instructions.  See picochip_secondary_reload for an
+;; explanation of why an SI mode register is used as a scratch.  The
+;; memory operand must be stored in a register (i.e., it can't be an
+;; offset to another register - this would require another scratch
+;; register into which the address of the offset could be computed).
+
+(define_expand "reload_inqi"
+  [(parallel [(match_operand:QI 0 "register_operand" "=&r")
+              (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  rtx scratch, seq;
+
+  /* Get the scratch register.  Given an SI mode value, we have a
+     choice of two HI mode scratch registers, so we can be sure that at
+     least one of the scratch registers will be different to the output
+     register, operand[0]. */
+
+  if (REGNO (operands[0]) == REGNO (operands[2]))
+    scratch = gen_rtx_REG (HImode, REGNO (operands[2]) + 1);
+  else
+    scratch = gen_rtx_REG (HImode, REGNO (operands[2]));
+
+  /* Ensure that the scratch doesn't overlap either of the other
+     two operands - however, the other two may overlap each
+     other. */
+  gcc_assert (REGNO(scratch) != REGNO(operands[0]));
+  gcc_assert (REGNO(scratch) != REGNO(operands[1]));
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  if (picochip_word_aligned_memory_reference(XEXP(operands[1], 0)))
+  {
+    /* Aligned reloads are easy, since they can use word-loads. */
+    seq = gen_synthesised_loadqi_aligned(operands[0], operands[1], scratch);
+  }
+  else
+  {
+    /* Emit the instruction using a define_insn. */
+    seq = gen_synthesised_loadqi_unaligned(operands[0], operands[1], scratch);
+  }
+  emit_insn (seq);
+
+  DONE;
+
+})
+
+(define_expand "reload_outqi"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+	      (match_operand:QI 1 "register_operand" "r")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  rtx scratch1 = gen_rtx_REG(HImode, REGNO(operands[2]));
+  rtx scratch2 = gen_rtx_REG(HImode, REGNO(operands[2]) + 1);
+  rtx seq;
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+  if (picochip_word_aligned_memory_reference(XEXP(operands[0], 0)))
+    {
+      rtx alignedAddr, bitShift;
+
+      /* Convert the address of the known alignment into two operands
+       * representing the aligned base address, and the number of shift bits
+       * required to access the required value. */
+      picochip_get_hi_aligned_mem(operands[0], &alignedAddr, &bitShift);
+
+      /* Emit an aligned store of the source, with the given bit offset. */
+      seq = gen_synthesised_storeqi_aligned(alignedAddr, operands[1], scratch1, scratch2, bitShift);
+
+    }
+  else
+    {
+      /* This isnt exercised at all. Moreover, with new devices, byte access
+         is available in all variants. */
+      gcc_unreachable();
+    }
+
+  emit_insn (seq);
+  DONE;
+
+})
+
+;; Perform a byte load of an alignable memory operand.
+; op0 = register to load. op1 = memory operand from which to load
+; op2 = op1, aligned to HI, op3 = const bit shift required to extract byte,
+; op4 = INTVAL(8 - op3)
+(define_expand "synthesised_loadqi_aligned"
+  [; Load memory operand into register
+   (set (match_operand:HI 2 "register_operand" "=r")
+	(match_dup 3))
+   ; Shift required byte into top byte of word.
+   (set (match_dup 2)
+	(ashift:HI (match_dup 2)
+		   (match_dup 4)))
+   ; Arithmetic shift of byte to sign extend, and move to lowest register.
+   (parallel[(set (subreg:HI (match_dup 0) 0)
+	(ashiftrt:HI (match_dup 2) 
+		     (const_int 8)))
+   (clobber (reg:CC CC_REGNUM))])
+   (use (match_operand:QI 1 "picochip_alignable_memory_operand" "g"))]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  rtx alignedAddr, bitShift;
+
+  /* Convert the address of the known alignment into two operands
+   * representing the aligned base address, and the number of shift bits
+   * required to access the required value. */
+  picochip_get_hi_aligned_mem(operands[1], &alignedAddr, &bitShift);
+
+  operands[3] = alignedAddr;
+  operands[4] = GEN_INT(8 - INTVAL(bitShift));
+})
+
+;;============================================================================
+;; Special instructions.
+;;============================================================================
+
+; Count sign-bits.
+(define_insn "sbc"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")]
+		   UNSPEC_SBC))]
+  ""
+  "SBC %1,%0\t\t// %0 := SBC(%1)"
+  [(set_attr "type" "picoAlu")
+   (set_attr "length" "2")])
+
+; Bit reversal.
+(define_insn "brev"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")]
+		   UNSPEC_BREV))]
+  ""
+  "BREV %1,%0\t\t// %0 := BREV(%1)"
+  [(set_attr "length" "2")
+   (set_attr "type" "picoAlu")])
+
+; Byte swap.
+(define_insn "bswaphi2"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "BYTESWAP %1,%0\t\t// %0 := ByteSwap(%1)"
+  [(set_attr "length" "2")
+   (set_attr "type" "picoAlu")])
+
+; Read status word.
+(define_insn "copysw"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(unspec_volatile:HI [(reg:CC CC_REGNUM)] UNSPEC_COPYSW))]
+  ""
+  "COPYSW.%# %0\t// %0 := Flags"
+  [(set_attr "type" "basicAlu")
+   (set_attr "length" "2")])
+
+; Saturating addition.
+(define_insn "sataddhi3"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")
+		    (match_operand:HI 2 "register_operand" "r")]
+		   UNSPEC_ADDS))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ADDS %1,%2,%0\t// %0 := sat(%1 + %2)"
+  [(set_attr "type" "picoAlu")
+   (set_attr "length" "3")])
+
+; Saturating subtraction.
+(define_insn "satsubhi3"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")
+		    (match_operand:HI 2 "register_operand" "r")]
+		   UNSPEC_SUBS))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "SUBS %1,%2,%0\t// %0 := sat(%1 - %2)"
+  [(set_attr "type" "picoAlu")
+   (set_attr "length" "3")])
+
+(define_insn "halt"
+  [(unspec_volatile [(match_operand:HI 0 "const_int_operand" "i")]
+	UNSPEC_HALT)]
+  ""
+  "HALT\t// (id %0)"
+  [(set_attr "length" "1")
+   (set_attr "type" "unknown")])
+
+(define_insn "internal_testport"
+  [(set (reg:CC CC_REGNUM)
+        (unspec_volatile:CC [(match_operand:HI 0 "const_int_operand" "i")]
+           UNSPEC_INTERNAL_TESTPORT))]
+  ""
+  "TSTPORT %0"
+  [(set_attr "length" "2")
+   (set_attr "longConstant" "false")
+   (set_attr "type" "picoAlu")])
+
+;;============================================================================
+;; Communications builtins.
+;;
+;; Each builtin comes in two forms: a single port version, which maps
+;; to a single instruction, and an array port version.  The array port
+;; version is treated as a special type of instruction, which is then
+;; split into a number of smaller instructions, if the index of the
+;; port can't be converted into a constant.  When the RTL split is
+;; performed, a function call is emitted, in which the index of the
+;; port to use is used to compute the address of the function to call
+;; (i.e., each array port is a function in its own right, and the
+;; functions are stored as an array which is then indexed to determine
+;; the correct function). The communication function port array is
+;; created by the linker if and only if it is required (in a
+;; collect2-like manner).
+;;============================================================================
+
+; Simple scalar get.
+(define_insn "commsGet"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec_volatile:SI
+	 [(match_operand:HI 1 "immediate_operand" "n")]
+	 UNSPEC_GET))]
+  ""
+  "GET %1,%R0\t// %R0 := PORT(%1)"
+  [(set_attr "type" "comms")
+   (set_attr "length" "2")])
+
+; Entry point for array get (the actual port index is computed as the
+; sum of the index, and the base).
+;
+; op0 - Destination
+; op1 - Requested port index
+; op2 - size of port array (constant)
+; op3 - base index of port array (constant)
+
+(define_expand "commsArrayGet"
+  [(parallel
+      [(set (reg:SI 0)
+            (unspec_volatile:SI [(match_operand:HI 1 "general_operand" "")
+	 	     	 (match_operand:HI 2 "immediate_operand" "")
+		     	 (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_GET_ARRAY))
+       (clobber (reg:HI LINK_REGNUM))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 0))]
+  ""
+  "")
+
+;; The actual array get instruction. When the array index is a constant,
+;; an exact instruction may be generated. When the index is variable,
+;; a call to a special function is generated. This code could be
+;; split into individual RTL instructions, but it is so rarely
+;; used, that we won't bother.
+(define_insn "*commsArrayGetInstruction"
+  [(set (reg:SI 0)
+        (unspec_volatile:SI [(match_operand:HI 0 "general_operand" "r,i")
+	 	     (match_operand:HI 1 "immediate_operand" "")
+		     (match_operand:HI 2 "immediate_operand" "")]
+	 	UNSPEC_CALL_GET_ARRAY))
+   (clobber (reg:HI LINK_REGNUM))]
+  ""
+{
+  return picochip_output_get_array (which_alternative, operands);
+})
+
+; Scalar Put instruction.
+(define_insn "commsPut"
+  [(unspec_volatile [(match_operand:HI 0 "const_int_operand" "")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_PUT)]
+  ""
+  "PUT %R1,%0\t// PORT(%0) := %R1"
+  [(set_attr "type" "comms")
+   (set_attr "length" "2")])
+
+; Entry point for array put. The operands accepted are:
+;   op0 - Value to put
+;   op1 - Requested port index
+;   op2 - size of port array
+;   op3 - base index of port array
+; The arguments are marshalled into the fixed registers, so that
+; the actual put instruction can expand into a call if necessary
+; (e.g., if the index is variable at run-time).
+
+(define_expand "commsArrayPut"
+  [(set (reg:SI 0) (match_operand:SI 0 "general_operand" ""))
+   (parallel
+      [(unspec_volatile [(match_operand:HI 1 "general_operand" "")
+	 	     	 (match_operand:HI 2 "immediate_operand" "")
+		     	 (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_PUT_ARRAY)
+       (use (reg:SI 0))
+       (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+;; The actual array put instruction. When the array index is a constant,
+;; an exact instruction may be generated. When the index is variable,
+;; a call to a special function is generated. This code could be
+;; split into individual RTL instructions, but it is so rarely
+;; used, that we won't bother.
+(define_insn "*commsArrayPutInstruction"
+  [(unspec_volatile [(match_operand:HI 0 "general_operand" "r,i")
+	 	     (match_operand:HI 1 "immediate_operand" "")
+		     (match_operand:HI 2 "immediate_operand" "")]
+	 	UNSPEC_CALL_PUT_ARRAY)
+   (use (reg:SI 0))
+   (clobber (reg:HI LINK_REGNUM))]
+  ""
+{
+  return picochip_output_put_array (which_alternative, operands);
+})
+
+;; Scalar test port instruction.
+(define_insn "commsTestPort"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec_volatile:HI [(match_operand:HI 1 "const_int_operand" "")]
+		   UNSPEC_TESTPORT))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// %0 := TestPort(%1)\;COPY.1 0,%0 %| TSTPORT %1\;COPYEQ 1,%0"
+  [(set_attr "length" "9")])
+
+; Entry point for array tstport (the actual port index is computed as the
+; sum of the index, and the base).
+;
+; op0 - Test value.
+; op1 - Requested port index
+; op2 - size of port array (constant)
+; op3 - base index of port array (constant)
+
+(define_expand "commsArrayTestPort"
+  [(parallel
+      [(set (match_operand:HI 0 "register_operand" "")
+            (unspec_volatile:HI [(match_operand:HI 1 "general_operand" "")
+	 	              (match_operand:HI 2 "immediate_operand" "")
+		     	      (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_TESTPORT_ARRAY))
+       (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+;; The actual array testport instruction. When the array index is a constant,
+;; an exact instruction may be generated. When the index is variable,
+;; a call to a special function is generated. This code could be
+;; split into individual RTL instructions, but it is so rarely
+;; used, that we won't bother.
+(define_insn "*commsArrayTestportInstruction"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (unspec_volatile:HI [(match_operand:HI 1 "general_operand" "r,i")
+	 	     	  (match_operand:HI 2 "immediate_operand" "")
+		     	  (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_TESTPORT_ARRAY))
+   (clobber (reg:HI LINK_REGNUM))]
+  ""
+{
+  return picochip_output_testport_array (which_alternative, operands);
+})
+
+;; Merge a TSTPORT instruction with the branch to which it
+;; relates.  Often the TSTPORT function (generated by a built-in), is
+;; used to control conditional execution.  The normal sequence of
+;; instructions would be:
+;;    TSTPORT p
+;;    COPYSW temp
+;;    AND temp, 0x0008, temp
+;;    SUB temp,0,discard
+;;    BEQ label
+;; This can be made more efficient by detecting the special case where
+;; the result of a TSTPORT is used to branch, to allow the following
+;; RTL sequence to be generated instead:
+;;    TSTPORT p
+;;    BEQ label
+;; A big saving in cycles and bytes!
+
+(define_insn_and_split "tstport_branch"
+ [(set (pc)
+	(if_then_else
+	    (match_operator 0 "comparison_operator"
+	                    [(unspec_volatile:HI
+				[(match_operand:HI 1 "const_int_operand" "")]
+					   UNSPEC_TESTPORT)
+			     (const_int 0)])
+            (label_ref       (match_operand    2 "" ""))
+	    (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+ ""
+ "#"
+ ""
+ [(set (reg:CC CC_REGNUM)
+       (unspec_volatile:CC [(match_dup 1)] UNSPEC_INTERNAL_TESTPORT))
+  (parallel [(set (pc)
+                  (if_then_else
+                       (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+				(label_ref (match_dup 2))
+				(pc)))
+	     (use (match_dup 3))])]
+ "{
+    /* Note that the sense of the branch is reversed, since we are
+     * comparing flag != 0. */
+    gcc_assert (GET_CODE(operands[0]) == NE || GET_CODE(operands[0]) == EQ);
+    operands[4] = gen_rtx_fmt_ee(reverse_condition(GET_CODE(operands[0])),
+                  GET_MODE(operands[0]), XEXP(operands[0], 0), XEXP(operands[0], 1));
+    operands[3] = GEN_INT (0);
+  }")
+
+;;============================================================================
+;; Epilogue/Epilogue expansion.
+;;============================================================================
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+{
+  picochip_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(use (const_int 0))]
+  ""
+{
+  picochip_expand_epilogue (FALSE);
+  DONE;
+})
+
+;;============================================================================
+;; Trap instruction. This is used to indicate an error. For the
+;; picoChip processors this is handled by calling a HALT instruction,
+;; which stops the processor.
+;;============================================================================
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 6))]
+  ""
+  "HALT\t// (Trap)"
+  [(set_attr "length" "2")])
+
+;;============================================================================
+;; Conditional copy instructions.  Only equal/not-equal comparisons are
+;; supported.  All other types of comparison remain as branch
+;; sequences.
+;;============================================================================
+
+;; Define expand seems to consider the resulting two instructions to be
+;; independent. With a split, guarded by reload, it works correctly.
+(define_expand "movhicc"
+   [(set (match_operand:HI 0 "register_operand" "=r,r")
+                   (if_then_else:HI (match_operand:HI 1 "" "")
+		   (match_operand:HI 2 "register_operand" "0,0")
+		   (match_operand:HI 3 "picochip_register_or_immediate_operand" "r,i")))]
+  ""
+  {if (!picochip_check_conditional_copy (operands))
+     FAIL;
+  })
+
+(define_insn_and_split "*checked_movhicc"
+   [(set (match_operand:HI 0 "register_operand" "=r,r")
+                   (if_then_else:HI (match_operator 1 "picochip_peephole_comparison_operator"
+                          [(match_operand:HI 4 "register_operand" "r,r")
+                           (match_operand:HI 5 "picochip_comparison_operand" "r,i")])
+		   (match_operand:HI 2 "register_operand" "0,0")
+		   (match_operand:HI 3 "picochip_register_or_immediate_operand" "r,i")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:CC CC_REGNUM) (match_dup 1))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=r,r")
+                   (if_then_else:HI (match_op_dup:HI 1 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (match_operand:HI 2 "picochip_register_or_immediate_operand" "0,0")
+                                 (match_operand:HI 3 "picochip_register_or_immediate_operand" "r,i")))
+              (use (match_dup 6))])]
+  "{
+     operands[6] = GEN_INT(GET_CODE(operands[0]));
+   }")
+
+;; We dont do any checks here. But this pattern is used only when movhicc 
+;; was checked. Put a "use" clause to make sure.
+(define_insn "*conditional_copy"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(if_then_else:HI
+            (match_operator:HI 4 "picochip_peephole_comparison_operator"
+                 [(reg:CC CC_REGNUM) (const_int 0)])
+	 (match_operand:HI 1 "register_operand" "0,0")
+	 (match_operand:HI 2 "picochip_register_or_immediate_operand" "r,i")))
+   (use (match_operand:HI 3 "const_int_operand" ""))]
+  ""
+{
+
+  gcc_assert (GET_CODE(operands[4]) == EQ || GET_CODE(operands[4]) == NE);
+  /* Note that the comparison is reversed as the pattern matches
+     the *else* part of the if_then_else */
+  switch (GET_CODE(operands[4]))
+    {
+    case EQ: return "COPYNE %2,%0\t// if (NE) %0 := %2";
+    case NE: return "COPYEQ %2,%0\t// if (EQ) %0 := %2";
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "length" "2")
+   (set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")])
+
+;;============================================================================
+;; Scheduling, including delay slot scheduling.
+;;============================================================================
+
+(automata_option "v")
+(automata_option "ndfa")
+
+;; Define each VLIW slot as a CPU resource.  Note the three flavours of
+;; branch.  `realBranch' is an actual branch instruction.  `macroBranch'
+;; is a directive to the assembler, which may expand into multiple
+;; instructions.  `call' is an actual branch instruction, but one which
+;; sets the link register, and hence can't be scheduled alongside
+;; other instructions which set the link register.  When the DFA
+;; scheduler is fixed to prevent it scheduling a JL with an R12
+;; setting register, the call type branches can be replaced by
+;; realBranch types instead.
+
+(define_attr "type"
+  "picoAlu,basicAlu,nonCcAlu,mem,call,realBranch,macroBranch,mul,mac,app,comms,unknown"
+  (const_string "unknown"))
+
+(define_attr "schedType" "none,space,speed"
+  (const (symbol_ref "(enum attr_schedType) picochip_schedule_type")))
+
+;; Define whether an instruction uses a long constant.
+
+(define_attr "longConstant"
+  "true,false" (const_string "false"))
+
+;; Define three EU slots.
+(define_query_cpu_unit "slot0,slot1,slot2")
+
+;; Pull in the pipeline descriptions for speed or space scheduling.
+(include "dfa_speed.md")
+(include "dfa_space.md")
+
+; Unknown instructions are assumed to take a single cycle, and use all
+; slots.  This enables them to actually output a sequence of
+; instructions without any limitation.  For the purposes of
+; scheduling, unknown instructions are a pain, and should be removed
+; completely.  This means that RTL patterns should always be used to
+; reduce complex sequences of instructions to individual instructions.
+(define_insn_reservation "unknownInsn" 1
+  (eq_attr "type" "unknown")
+  "(slot0+slot1+slot2)")
+
+; Allow any non-branch instructions to be placed in the branch
+; slot. Branch slots are always executed.
+(define_delay (eq_attr "type" "realBranch,call")
+  [(eq_attr "type" "!realBranch,macroBranch,call,unknown") (nil) (nil)])
diff --git a/gcc-4.9/gcc/config/picochip/picochip.opt b/gcc-4.9/gcc/config/picochip/picochip.opt
new file mode 100644
index 000000000..910775cf0
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/picochip.opt
@@ -0,0 +1,46 @@
+; Options for the picoChip port of the compiler.
+
+; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not, see
+; <http://www.gnu.org/licenses/>.
+
+mae=
+Target RejectNegative Joined Var(picochip_ae_type_string)
+Specify which type of AE to target. This option sets the mul-type and byte-access.
+
+mmul-type=
+Target RejectNegative Undocumented Joined Var(picochip_mul_type_string)
+Specify which type of multiplication to use. Can be mem, mac or none.
+
+mbyte-access
+Target Undocumented Var(picochip_has_byte_access,1) Init(1)
+Specify whether the byte access instructions should be used. Enabled by default.
+
+mdebug
+Target RejectNegative Undocumented Mask(DEBUG)
+Enable debug output to be generated.
+
+msymbol-as-address
+Target Mask(SYMBOL_AS_ADDRESS)
+Allow a symbol value to be used as an immediate value in an instruction.
+
+minefficient-warnings
+Target Mask(INEFFICIENT_WARNINGS)
+Generate warnings when inefficient code is known to be generated.
+
+minefficient
+Target Mask(INEFFICIENT_WARNINGS) Undocumented
diff --git a/gcc-4.9/gcc/config/picochip/predicates.md b/gcc-4.9/gcc/config/picochip/predicates.md
new file mode 100644
index 000000000..3431a363d
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/predicates.md
@@ -0,0 +1,72 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "picochip_register_or_immediate_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "immediate_operand")))
+
+(define_predicate "power_of_2_imm_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    {
+      if (exact_log2 (INTVAL (op)) <= 16 && exact_log2 (INTVAL (op)) > 0)
+        return 1;
+    }
+
+  return 0;
+})
+
+;; Limit the comparison operators to a selected subset.
+(define_predicate "picochip_supported_comparison_operator"
+  (and (match_operand 0 "comparison_operator")
+       (match_code "ne,eq,ge,geu,lt,ltu")))
+(define_predicate "picochip_peephole_comparison_operator"
+  (and (match_operand 0 "comparison_operator")
+       (match_code "ne,eq")))
+
+;; Allow selected arithmetic operators to apply a shift to their first
+;; operands
+
+(define_predicate "picochip_first_op_shift_operator"
+  (match_code "and,plus,minus,ior,xor"))
+
+;; The same as the previous predicate, but only allowing those
+;; operators which can accept an immediate.
+(define_predicate "picochip_first_op_shift_operator_imm"
+  (match_code "plus,minus"))
+
+;; Predicate on a J type integer.
+(define_predicate "picochip_J_operand"
+  (match_operand 0 "immediate_operand")
+  {
+    return (CONST_INT == GET_CODE(op) &&
+            picochip_const_ok_for_letter_p (INTVAL(op), 'J'));
+  })
+
+;; Is the operand suitable for use in a compare?
+
+(define_predicate "picochip_comparison_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "immediate_operand")
+            (match_test "picochip_const_ok_for_letter_p(INTVAL(op), 'O')"))))
+
diff --git a/gcc-4.9/gcc/config/picochip/t-picochip b/gcc-4.9/gcc/config/picochip/t-picochip
new file mode 100644
index 000000000..ac785e51e
--- /dev/null
+++ b/gcc-4.9/gcc/config/picochip/t-picochip
@@ -0,0 +1,24 @@
+# Copyright (C) 2008-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build all combinations of library for different multiply units, and
+# presence/absence of byte access.
+MULTILIB_OPTIONS = mmul-type=none/mmul-type=mac/mmul-type=mul mno-byte-access/mbyte-access
+
+# Using a mul unit (currently) implies that byte access is available.
+MULTILIB_EXCEPTIONS = mmul-type=mul/mno-byte-access
diff --git a/gcc-4.9/gcc/config/print-sysroot-suffix.sh b/gcc-4.9/gcc/config/print-sysroot-suffix.sh
new file mode 100644
index 000000000..be4492b7d
--- /dev/null
+++ b/gcc-4.9/gcc/config/print-sysroot-suffix.sh
@@ -0,0 +1,145 @@
+#! /bin/sh
+# Script to generate SYSROOT_SUFFIX_SPEC equivalent to MULTILIB_OSDIRNAMES
+# Arguments are MULTILIB_OSDIRNAMES, MULTILIB_OPTIONS and MULTILIB_MATCHES.
+
+# Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  
+
+# This shell script produces a header file fragment that defines
+# SYSROOT_SUFFIX_SPEC.  It assumes that the sysroots will have the same
+# structure and names used by the multilibs.
+
+# Invocation:
+#   print-sysroot-suffix.sh \
+#          MULTILIB_OSDIRNAMES \
+#          MULTILIB_OPTIONS \
+#          MULTILIB_MATCHES \
+#      > t-sysroot-suffix.h
+
+# The three options exactly correspond to the variables of the same
+# names defined in the tmake_file fragments.
+
+# Example:
+#   sh ./gcc/config/print-sysroot-suffix.sh "a=A" "a b/c/d" ""
+# =>
+#   #undef SYSROOT_SUFFIX_SPEC
+#   #define SYSROOT_SUFFIX_SPEC "" \
+#   "%{a:" \
+#     "%{b:A/b/;" \
+#     "c:A/c/;" \
+#     "d:A/d/;" \
+#     ":A/};" \
+#   ":}"
+
+# The script uses temporary subscripts in order to permit a recursive
+# algorithm without the use of functions.
+
+set -e
+
+dirnames="$1"
+options="$2"
+matches="$3"
+
+cat > print-sysroot-suffix3.sh <<\EOF
+#! /bin/sh
+# Print all the multilib matches for this option
+result="$1"
+EOF
+for x in $matches; do
+  l=`echo $x | sed -e 's/=.*$//' -e 's/?/=/g'`
+  r=`echo $x | sed -e 's/^.*=//' -e 's/?/=/g'`
+  echo "[ \"\$1\" = \"$l\" ] && result=\"\$result|$r\"" >> print-sysroot-suffix3.sh
+done
+echo 'echo $result' >> print-sysroot-suffix3.sh
+chmod +x print-sysroot-suffix3.sh
+
+cat > print-sysroot-suffix2.sh <<\EOF
+#! /bin/sh
+# Recursive script to enumerate all multilib combinations, match against
+# multilib directories and output a spec string of the result.
+# Will fold identical trees.
+
+padding="$1"
+optstring="$2"
+shift 2
+n="\" \\
+$padding\""
+if [ $# = 0 ]; then
+EOF
+
+pat=
+for x in $dirnames; do
+  p=`echo $x | sed -e 's,=!,/$=/,'`
+  pat="$pat -e 's=^//$p='"
+done
+echo '  optstring=`echo "/$optstring" | sed '"$pat\`" >> print-sysroot-suffix2.sh
+cat >> print-sysroot-suffix2.sh <<\EOF
+  case $optstring in
+  //*)
+    ;;
+  *)
+    echo "$optstring"
+    ;;
+  esac
+else
+  thisopt="$1"
+  shift
+  bit=
+  lastcond=
+  result=
+  for x in `echo "$thisopt" | sed -e 's,/, ,g'`; do
+    case $x in
+EOF
+for x in `echo "$options" | sed -e 's,/, ,g'`; do
+  match=`./print-sysroot-suffix3.sh "$x"`
+  echo "$x) optmatch=\"$match\" ;;" >> print-sysroot-suffix2.sh
+done
+cat >> print-sysroot-suffix2.sh <<\EOF
+    esac
+    bit=`"$0" "$padding  " "$optstring$x/" "$@"`
+    if [ -z "$lastopt" ]; then
+      lastopt="$optmatch"
+    else
+      if [ "$lastbit" = "$bit" ]; then
+	lastopt="$lastopt|$optmatch"
+      else
+	result="$result$lastopt:$lastbit;$n"
+	lastopt="$optmatch"
+      fi
+    fi
+    lastbit="$bit"
+  done
+  bit=`"$0" "$padding  " "$optstring" "$@"`
+  if [ "$bit" = "$lastbit" ]; then
+    if [ -z "$result" ]; then
+      echo "$bit"
+    else
+      echo "$n%{$result:$bit}"
+    fi
+  else
+    echo "$n%{$result$lastopt:$lastbit;$n:$bit}"
+  fi
+fi
+EOF
+
+chmod +x ./print-sysroot-suffix2.sh
+result=`./print-sysroot-suffix2.sh "" "/" $options`
+echo "#undef SYSROOT_SUFFIX_SPEC"
+echo "#define SYSROOT_SUFFIX_SPEC \"$result\""
+rm print-sysroot-suffix2.sh
+rm print-sysroot-suffix3.sh
diff --git a/gcc-4.9/gcc/config/rl78/constraints.md b/gcc-4.9/gcc/config/rl78/constraints.md
new file mode 100644
index 000000000..3f5e49085
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/constraints.md
@@ -0,0 +1,386 @@
+;;  Machine Description for Renesas RL78 processors
+;;  Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; Constraints in use:
+
+; core:
+; V X g i m n o p r s < >
+; 0..9
+; I..Q - integers
+;   Int8 = 0..255
+;   Int3 = 1..7
+;   J = -255..0
+;   K = 1
+;   L = -1
+;   M = 0
+;   N = 2
+;   O = -2
+;   P = 1..15
+
+; E..H - float constants
+
+; RL78-specific
+; a x b c d e h l w - 8-bit regs
+; A B D T S - 16-bit regs
+; R = all regular registers (A-L)
+; Y - any valid memory
+; Wxx - various memory addressing modes
+; Qxx - conditionals
+; U = usual memory references mov-able to/from AX
+; v = virtual registers
+; Zxx = specific virtual registers
+
+(define_constraint "Int8"
+  "Integer constant in the range 0 @dots{} 255."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "Int3"
+  "Integer constant in the range 1 @dots{} 7."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 7)")))
+
+(define_constraint "Iv08"
+  "@internal
+   Integer constant equal to 8."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 8, 8)")))
+
+(define_constraint "Iv16"
+  "@internal
+   Integer constant equal to 16."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 16, 16)")))
+
+(define_constraint "Iv24"
+  "@internal
+   Integer constant equal to 24."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 24, 24)")))
+
+(define_constraint "Is09"
+  "@internal
+   Integer constant in the range 9 @dots{} 15 (for shifts)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 9, 15)")))
+
+(define_constraint "Is17"
+  "@internal
+   Integer constant in the range 17 @dots{} 23 (for shifts)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 17, 23)")))
+
+(define_constraint "Is25"
+  "@internal
+   Integer constant in the range 25 @dots{} 31 (for shifts)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "ISsi"
+  "@internal
+   Integer constant with bit 31 set."
+  (and (match_code "const_int")
+       (match_test "(ival & 0x80000000) != 0")))
+
+(define_constraint "IShi"
+  "@internal
+   Integer constant with bit 15 set."
+  (and (match_code "const_int")
+       (match_test "(ival & 0x8000) != 0")))
+
+(define_constraint "ISqi"
+  "@internal
+   Integer constant with bit 7 set."
+  (and (match_code "const_int")
+       (match_test "(ival & 0x80) != 0")))
+
+(define_constraint "J"
+  "Integer constant in the range -255 @dots{} 0"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -255, 0)")))
+
+(define_constraint "K"
+  "Integer constant 1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 1)")))
+
+(define_constraint "L"
+  "Integer constant -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -1, -1)")))
+
+(define_constraint "M"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 0)")))
+
+(define_constraint "N"
+  "Integer constant 2."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 2, 2)")))
+
+(define_constraint "O"
+  "Integer constant -2."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -2, -2)")))
+
+(define_constraint "P"
+  "Integer constant 1..15"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 15)")))
+
+(define_register_constraint "R" "QI_REGS"
+ "@code{A} through @code{L} registers.")
+
+(define_register_constraint "a" "AREG"
+ "The @code{A} register.")
+
+(define_register_constraint "x" "XREG"
+ "The @code{X} register.")
+
+(define_register_constraint "b" "BREG"
+ "The @code{B} register.")
+
+(define_register_constraint "c" "CREG"
+ "The @code{C} register.")
+
+(define_register_constraint "d" "DREG"
+ "The @code{D} register.")
+
+(define_register_constraint "e" "EREG"
+ "The @code{E} register.")
+
+(define_register_constraint "h" "HREG"
+ "The @code{H} register.")
+
+(define_register_constraint "l" "LREG"
+ "The @code{L} register.")
+
+(define_register_constraint "w" "PSWREG"
+ "The @code{PSW} register.")
+
+(define_register_constraint "A" "AXREG"
+ "The @code{AX} register.")
+
+(define_register_constraint "B" "BCREG"
+ "The @code{BC} register.")
+
+(define_register_constraint "D" "DEREG"
+ "The @code{DE} register.")
+
+; because H + L = T, assuming A=1.
+(define_register_constraint "T" "HLREG"
+ "The @code{HL} register.")
+
+(define_register_constraint "S" "SPREG"
+ "The @code{SP} register.")
+
+(define_register_constraint "v" "V_REGS"
+ "The virtual registers.")
+
+(define_register_constraint "Z08W" "R8W_REGS"
+ "The R8 register, HImode.")
+
+(define_register_constraint "Z10W" "R10W_REGS"
+ "The R10 register, HImode.")
+
+(define_register_constraint "Zint" "INT_REGS"
+ "The interrupt registers.")
+
+; All the memory addressing schemes the RL78 supports
+; of the form W {register} {bytes of offset}
+;          or W {register} {register}
+; Additionally, the Cxx forms are the same as the Wxx forms, but without
+; the ES: override.
+
+; absolute address
+(define_memory_constraint "Cab"
+  "[addr]"
+  (and (match_code "mem")
+       (ior (match_test "CONSTANT_P (XEXP (op, 0))")
+	    (match_test "GET_CODE (XEXP (op, 0)) == PLUS && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF"))
+	    )
+  )
+(define_memory_constraint "Wab"
+  "es:[addr]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Cab (rl78_es_base (op)))
+               || satisfies_constraint_Cab (op)")
+  )
+
+(define_memory_constraint "Cbc"
+  "word16[BC]"
+  (and (match_code "mem")
+       (ior
+	(and (match_code "reg" "0")
+	     (match_test "REGNO (XEXP (op, 0)) == BC_REG"))
+	(and (match_code "plus" "0")
+	     (and (and (match_code "reg" "00")
+		       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == BC_REG"))
+		       (match_test "uword_operand (XEXP (XEXP (op, 0), 1), VOIDmode)"))))
+       )
+  )
+(define_memory_constraint "Wbc"
+  "es:word16[BC]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Cbc (rl78_es_base (op)))
+               || satisfies_constraint_Cbc (op)")
+  )
+
+(define_memory_constraint "Cde"
+  "[DE]"
+  (and (match_code "mem")
+       (and (match_code "reg" "0")
+	    (match_test "REGNO (XEXP (op, 0)) == DE_REG")))
+  )
+(define_memory_constraint "Wde"
+  "es:[DE]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Cde (rl78_es_base (op)))
+               || satisfies_constraint_Cde (op)")
+  )
+
+(define_memory_constraint "Cca"
+  "[AX..HL] for calls"
+  (and (match_code "mem")
+       (and (match_code "reg" "0")
+	    (match_test "REGNO (XEXP (op, 0)) <= HL_REG")))
+  )
+(define_memory_constraint "Wca"
+  "es:[AX..HL] for calls"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Cca (rl78_es_base (op)))
+               || satisfies_constraint_Cca (op)")
+  )
+
+(define_memory_constraint "Ccv"
+  "[AX..HL,r8-r31] for calls"
+  (and (match_code "mem")
+       (and (match_code "reg" "0")
+	    (match_test "REGNO (XEXP (op, 0)) < 32")))
+  )
+(define_memory_constraint "Wcv"
+  "es:[AX..HL,r8-r31] for calls"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Ccv (rl78_es_base (op)))
+               || satisfies_constraint_Ccv (op)")
+  )
+
+(define_memory_constraint "Cd2"
+  "word16[DE]"
+  (and (match_code "mem")
+       (ior
+	(and (match_code "reg" "0")
+	     (match_test "REGNO (XEXP (op, 0)) == DE_REG"))
+	(and (match_code "plus" "0")
+	     (and (and (match_code "reg" "00")
+		       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == DE_REG"))
+		       (match_test "uword_operand (XEXP (XEXP (op, 0), 1), VOIDmode)"))))
+       )
+  )
+(define_memory_constraint "Wd2"
+  "es:word16[DE]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Cd2 (rl78_es_base (op)))
+               || satisfies_constraint_Cd2 (op)")
+  )
+
+(define_memory_constraint "Chl"
+  "[HL]"
+  (and (match_code "mem")
+       (and (match_code "reg" "0")
+	    (match_test "REGNO (XEXP (op, 0)) == HL_REG")))
+  )
+(define_memory_constraint "Whl"
+  "es:[HL]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Chl (rl78_es_base (op)))
+               || satisfies_constraint_Chl (op)")
+  )
+
+(define_memory_constraint "Ch1"
+  "byte8[HL]"
+  (and (match_code "mem")
+       (and (match_code "plus" "0")
+	    (and (and (match_code "reg" "00")
+		      (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == HL_REG"))
+		      (match_test "ubyte_operand (XEXP (XEXP (op, 0), 1), VOIDmode)"))))
+  )
+(define_memory_constraint "Wh1"
+  "es:byte8[HL]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Ch1 (rl78_es_base (op)))
+               || satisfies_constraint_Ch1 (op)")
+  )
+
+(define_memory_constraint "Chb"
+  "[HL+B]"
+  (and (match_code "mem")
+       (match_test "rl78_hl_b_c_addr_p (XEXP (op, 0))"))
+  )
+(define_memory_constraint "Whb"
+  "es:[HL+B]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Chb (rl78_es_base (op)))
+               || satisfies_constraint_Chb (op)")
+  )
+
+(define_memory_constraint "Cs1"
+  "word8[SP]"
+  (and (match_code "mem")
+       (ior
+	(and (match_code "reg" "0")
+	     (match_test "REGNO (XEXP (op, 0)) == SP_REG"))
+	(and (match_code "plus" "0")
+	     (and (and (match_code "reg" "00")
+		       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG"))
+		       (match_test "ubyte_operand (XEXP (XEXP (op, 0), 1), VOIDmode)"))))
+       )
+  )
+(define_memory_constraint "Ws1"
+  "es:word8[SP]"
+  (match_test "(rl78_es_addr (op) && satisfies_constraint_Cs1 (rl78_es_base (op)))
+               || satisfies_constraint_Cs1 (op)")
+  )
+
+(define_memory_constraint "Wfr"
+  "ES/CS far pointer"
+  (and (match_code "mem")
+       (match_test "rl78_far_p (op)"))
+  )
+
+(define_memory_constraint "Y"
+  "any near legitimate memory access"
+  (and (match_code "mem")
+       (match_test "!rl78_far_p (op) && rl78_as_legitimate_address (VOIDmode, XEXP (op, 0), true, ADDR_SPACE_GENERIC)"))
+)
+
+(define_memory_constraint "U"
+  "memory references valid with mov to/from a/ax"
+  (and (match_code "mem")
+       (match_test "rl78_virt_insns_ok ()
+|| satisfies_constraint_Wab (op)
+|| satisfies_constraint_Wbc (op)
+|| satisfies_constraint_Wde (op)
+|| satisfies_constraint_Wd2 (op)
+|| satisfies_constraint_Whl (op)
+|| satisfies_constraint_Wh1 (op)
+|| satisfies_constraint_Whb (op)
+|| satisfies_constraint_Ws1 (op)
+|| satisfies_constraint_Wfr (op) ")))
+
+(define_memory_constraint "Qbi"
+  "built-in compare types"
+  (match_code "eq,ne,gtu,ltu,geu,leu"))
+
+(define_memory_constraint "Qsc"
+  "synthetic compares"
+  (match_code "gt,lt,ge,le"))
diff --git a/gcc-4.9/gcc/config/rl78/predicates.md b/gcc-4.9/gcc/config/rl78/predicates.md
new file mode 100644
index 000000000..e564f4369
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/predicates.md
@@ -0,0 +1,71 @@
+;;  Machine Description for Renesas RL78 processors
+;;  Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "rl78_any_operand"
+  (ior (match_operand 0 "general_operand")
+       (match_code "mem,const_int,const_double,reg"))
+)
+
+(define_predicate "rl78_nonfar_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_test "rl78_far_p (op)")))
+)
+
+(define_predicate "rl78_nonfar_nonimm_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (not (match_test "rl78_far_p (op)")))
+)
+
+(define_predicate "ubyte_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+(define_predicate "rl78_24_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4")))
+
+(define_predicate "uword_operand"
+  (ior (match_code "const")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), 0, 65536)"))))
+
+(define_predicate "rl78_cmp_operator_signed"
+  (match_code "gt,ge,lt,le"))
+(define_predicate "rl78_cmp_operator_real"
+  (match_code "eq,ne,gtu,ltu,geu,leu"))
+(define_predicate "rl78_cmp_operator"
+  (match_code "eq,ne,gtu,ltu,geu,leu,gt,lt,ge,le"))
+
+(define_predicate "rl78_ax_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == AX_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER")))
+
+(define_predicate "rl78_addw_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == AX_REG || REGNO (op) == SP_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER")))
+
+(define_predicate "rl78_stack_based_mem"
+  (and (match_code "mem")
+       (ior (and (match_code "reg" "0")
+		 (match_test "REGNO (XEXP (op, 0)) == SP_REG"))
+	    (and (match_code "plus" "0")
+		 (and (match_code "reg" "00")
+		      (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG")
+		      (match_code "const_int" "01"))))))
diff --git a/gcc-4.9/gcc/config/rl78/rl78-c.c b/gcc-4.9/gcc/config/rl78/rl78-c.c
new file mode 100644
index 000000000..81e84b9df
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78-c.c
@@ -0,0 +1,34 @@
+/* RL78 C-specific support
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "rl78-protos.h"
+
+/* Implements REGISTER_TARGET_PRAGMAS.  */
+void
+rl78_register_pragmas (void)
+{
+  c_register_addr_space ("__far", ADDR_SPACE_FAR);
+}
diff --git a/gcc-4.9/gcc/config/rl78/rl78-expand.md b/gcc-4.9/gcc/config/rl78/rl78-expand.md
new file mode 100644
index 000000000..f794d7cb1
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78-expand.md
@@ -0,0 +1,306 @@
+;;  Machine Description for Renesas RL78 processors
+;;  Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;---------- Moving ------------------------
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+	(match_operand:QI 1 "general_operand"))]
+  ""
+  {
+    if (MEM_P (operands[0]) && MEM_P (operands[1]))
+      operands[1] = copy_to_mode_reg (QImode, operands[1]);
+    if (rl78_far_p (operands[0]) && rl78_far_p (operands[1]))
+      operands[1] = copy_to_mode_reg (QImode, operands[1]);
+
+    /* FIXME: Not sure how GCC can generate (SUBREG (SYMBOL_REF)),
+       but it does.  Since this makes no sense, reject it here.  */
+    if (GET_CODE (operands[1]) == SUBREG
+        && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+      FAIL;
+    /* Similarly for (SUBREG (CONST (PLUS (SYMBOL_REF)))).
+       cf. g++.dg/abi/packed.C.  */
+    if (GET_CODE (operands[1]) == SUBREG
+	&& GET_CODE (XEXP (operands[1], 0)) == CONST
+        && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == PLUS
+        && GET_CODE (XEXP (XEXP (XEXP (operands[1], 0), 0), 0)) == SYMBOL_REF)
+      FAIL;
+
+    if (CONST_INT_P (operands[1]) && ! IN_RANGE (INTVAL (operands[1]), (-1 << 8) + 1, (1 << 8) - 1))
+      FAIL;
+  }
+)
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand")
+	(match_operand:HI 1 "general_operand"))]
+  ""
+  {
+    if (MEM_P (operands[0]) && MEM_P (operands[1]))
+      operands[1] = copy_to_mode_reg (HImode, operands[1]);
+    if (rl78_far_p (operands[0]) && rl78_far_p (operands[1]))
+      operands[1] = copy_to_mode_reg (HImode, operands[1]);
+
+    /* FIXME: Not sure how GCC can generate (SUBREG (SYMBOL_REF)),
+       but it does.  Since this makes no sense, reject it here.  */
+    if (GET_CODE (operands[1]) == SUBREG
+        && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+      FAIL;
+    /* Similarly for (SUBREG (CONST (PLUS (SYMBOL_REF)))).  */
+    if (GET_CODE (operands[1]) == SUBREG
+	&& GET_CODE (XEXP (operands[1], 0)) == CONST
+        && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == PLUS
+        && GET_CODE (XEXP (XEXP (XEXP (operands[1], 0), 0), 0)) == SYMBOL_REF)
+      FAIL;
+  }
+)
+
+(define_insn_and_split "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=vYS,v,Wfr")
+	(match_operand:SI 1 "general_operand" "viYS,Wfr,v"))]
+  ""
+  "#"
+  ""
+  [(set (match_operand:HI 2 "nonimmediate_operand")
+	(match_operand:HI 4 "general_operand"))
+   (set (match_operand:HI 3 "nonimmediate_operand")
+	(match_operand:HI 5 "general_operand"))]
+  "rl78_split_movsi (operands);"
+  [(set_attr "valloc" "op1")]
+)
+
+;;---------- Conversions ------------------------
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI                 0 "nonimmediate_operand")
+	(zero_extend:HI (match_operand:QI 1 "general_operand")))]
+  ""
+  "if (rl78_force_nonfar_2 (operands, gen_zero_extendqihi2))
+     DONE;"
+  )
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI                 0 "nonimmediate_operand")
+	(sign_extend:HI (match_operand:QI 1 "general_operand")))]
+  ""
+  "if (rl78_force_nonfar_2 (operands, gen_extendqihi2))
+     DONE;"
+  )
+
+;;---------- Arithmetic ------------------------
+
+(define_expand "add<mode>3"
+  [(set (match_operand:QHI           0 "nonimmediate_operand")
+	(plus:QHI (match_operand:QHI 1 "general_operand")
+		  (match_operand:QHI 2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_add<mode>3))
+     DONE;"
+)
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:QHI            0 "nonimmediate_operand")
+	(minus:QHI (match_operand:QHI 1 "general_operand")
+		   (match_operand:QHI 2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_sub<mode>3))
+     DONE;"
+)
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:QHI            0 "nonimmediate_operand")
+	(minus:QHI (const_int 0)
+		   (match_operand:QHI 1 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_2 (operands, gen_neg<mode>2))
+     DONE;"
+)
+
+(define_expand "umulqihi3"
+  [(set (match_operand:HI 0 "register_operand")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand"))
+                 (zero_extend:HI (match_operand:QI 2 "register_operand"))))]
+  ""
+  ""
+)
+
+(define_expand "andqi3"
+  [(set (match_operand:QI         0 "nonimmediate_operand")
+	(and:QI (match_operand:QI 1 "general_operand")
+		(match_operand:QI 2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_andqi3))
+     DONE;"
+)
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI         0 "nonimmediate_operand")
+	(ior:QI (match_operand:QI 1 "general_operand")
+		(match_operand:QI 2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_iorqi3))
+     DONE;"
+)
+
+(define_expand "xorqi3"
+  [(set (match_operand:QI         0 "nonimmediate_operand")
+	(xor:QI (match_operand:QI 1 "general_operand")
+		(match_operand:QI 2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_xorqi3))
+     DONE;"
+)
+
+(define_expand "one_cmplqi2"
+  [(set (match_operand:QI         0 "nonimmediate_operand")
+	(xor:QI (match_operand:QI 1 "general_operand")
+		(const_int -1)))
+   ]
+  ""
+  "if (rl78_force_nonfar_2 (operands, gen_one_cmplqi2))
+     DONE;"
+)
+
+;;---------- Shifts ------------------------
+
+(define_expand "ashl<mode>3"
+  [(set (match_operand:QHI             0 "nonimmediate_operand")
+	(ashift:QHI (match_operand:QHI 1 "general_operand")
+		    (match_operand:QI  2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_ashl<mode>3))
+     DONE;"
+)
+
+(define_expand "ashr<mode>3"
+  [(set (match_operand:QHI               0 "nonimmediate_operand")
+	(ashiftrt:QHI (match_operand:QHI 1 "general_operand")
+		      (match_operand:QI  2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_ashr<mode>3))
+     DONE;"
+)
+
+(define_expand "lshr<mode>3"
+  [(set (match_operand:QHI               0 "nonimmediate_operand")
+	(lshiftrt:QHI (match_operand:QHI 1 "general_operand")
+		      (match_operand:QI  2 "general_operand")))
+   ]
+  ""
+  "if (rl78_force_nonfar_3 (operands, gen_lshr<mode>3))
+     DONE;"
+)
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI               0 "nonimmediate_operand")
+		   (ashiftrt:SI (match_operand:SI  1 "nonimmediate_operand")
+				(match_operand:SI  2 "nonmemory_operand")))
+	      (clobber (reg:HI X_REG))])
+   ]
+  ""
+  ""
+)
+
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI               0 "nonimmediate_operand")
+		   (lshiftrt:SI (match_operand:SI  1 "nonimmediate_operand")
+				(match_operand:SI  2 "nonmemory_operand")))
+	      (clobber (reg:HI X_REG))])
+   ]
+  ""
+  ""
+)
+
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI            0 "nonimmediate_operand")
+		   (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
+			      (match_operand:SI 2 "nonmemory_operand")))
+	      (clobber (reg:HI X_REG))])
+   ]
+  ""
+  ""
+)
+
+;;---------- Branching ------------------------
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand:HI 0 "nonimmediate_operand"))]
+  ""
+  ""
+)
+
+(define_expand "call"
+  [(call (match_operand:HI 0 "memory_operand")
+	 (match_operand 1 ""))]
+  ""
+  ""
+)
+
+(define_expand "call_value"
+  [(set (match_operand          0 "register_operand")
+	(call (match_operand:HI 1 "memory_operand")
+	      (match_operand    2 "")))]
+  ""
+  ""
+)
+
+(define_expand "cbranchqi4"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "rl78_cmp_operator"
+			      [(match_operand:QI 1 "general_operand")
+			       (match_operand:QI 2 "general_operand")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+  "rl78_expand_compare (operands);"
+)
+
+(define_expand "cbranchhi4"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "rl78_cmp_operator"
+			      [(match_operand:HI 1 "general_operand")
+			       (match_operand:HI 2 "general_operand")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+  "rl78_expand_compare (operands);"
+)
+
+(define_expand "cbranchsi4"
+  [(parallel [(set (pc) (if_then_else
+			 (match_operator 0 "rl78_cmp_operator"
+					 [(match_operand:SI 1 "general_operand")
+					  (match_operand:SI 2 "nonmemory_operand")])
+			 (label_ref (match_operand 3 "" ""))
+			 (pc)))
+	      (clobber (reg:HI AX_REG))
+	      ])]
+  "1"
+  "rl78_expand_compare (operands);"
+)
diff --git a/gcc-4.9/gcc/config/rl78/rl78-opts.h b/gcc-4.9/gcc/config/rl78/rl78-opts.h
new file mode 100644
index 000000000..95c5278dd
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78-opts.h
@@ -0,0 +1,30 @@
+/* GCC option-handling definitions for the Renesas RL78 processor.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef RL78_OPTS_H
+#define RL78_OPTS_H
+
+enum rl78_mul_types
+{
+  MUL_NONE,
+  MUL_RL78,
+  MUL_G13
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/rl78/rl78-protos.h b/gcc-4.9/gcc/config/rl78/rl78-protos.h
new file mode 100644
index 000000000..a74bda187
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78-protos.h
@@ -0,0 +1,47 @@
+/* Prototypes for Renesas RL78 processors
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+void		rl78_emit_eh_epilogue (rtx);
+void		rl78_expand_compare (rtx *);
+void		rl78_expand_movsi (rtx *);
+void		rl78_split_movsi (rtx *);
+int		rl78_force_nonfar_2 (rtx *, rtx (*gen)(rtx,rtx));
+int		rl78_force_nonfar_3 (rtx *, rtx (*gen)(rtx,rtx,rtx));
+void		rl78_expand_eh_epilogue (rtx);
+void		rl78_expand_epilogue (void);
+void		rl78_expand_prologue (void);
+int		rl78_far_p (rtx x);
+int		rl78_hard_regno_mode_ok (int, enum machine_mode);
+int		rl78_hard_regno_nregs (int, enum machine_mode);
+bool		rl78_hl_b_c_addr_p (rtx);
+int		rl78_initial_elimination_offset (int, int);
+bool		rl78_as_legitimate_address (enum machine_mode, rtx,
+					    bool, addr_space_t);
+int		rl78_legitimize_reload_address (rtx *, enum machine_mode, int,int, int);
+enum reg_class	rl78_mode_code_base_reg_class (enum machine_mode, addr_space_t, int, int);
+bool		rl78_peep_movhi_p (rtx *);
+bool		rl78_real_insns_ok (void);
+void		rl78_register_pragmas (void);
+bool		rl78_regno_mode_code_ok_for_base_p (int, enum machine_mode, addr_space_t, int, int);
+void		rl78_setup_peep_movhi (rtx *);
+bool		rl78_virt_insns_ok (void);
+
+bool		rl78_es_addr (rtx);
+rtx		rl78_es_base (rtx);
diff --git a/gcc-4.9/gcc/config/rl78/rl78-real.md b/gcc-4.9/gcc/config/rl78/rl78-real.md
new file mode 100644
index 000000000..27ff60fa9
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78-real.md
@@ -0,0 +1,561 @@
+;;  Machine Description for Renesas RL78 processors
+;;  Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The insns in this file correspond to the actual opcodes the RL78
+;; can issue with real registers.  All insns in here should be
+;; conditional on rl78_real_insns_ok() returning true, and should
+;; allow virtual registers in their predicates - the reorg pass that
+;; allocates physical registers uses the constraints to select
+;; registers, but insns with virtual registers MUST match one of these
+;; patterns - other than the constraints - so that the operand info is
+;; properly set up for the alloc pass.
+
+;;---------- Moving ------------------------
+
+(define_insn "movqi_es"
+  [(set (reg:QI ES_REG)
+	(match_operand:QI 0 "register_operand" "a"))]
+  ""
+  "mov\tes, %0"
+)
+
+(define_insn "movqi_cs"
+  [(set (reg:QI CS_REG)
+	(match_operand:QI 0 "register_operand" "a"))]
+  ""
+  "mov\tcs, %0"
+)
+
+(define_insn "*movqi_real"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=g,RaxbcWab,RaxbcWab,a,                          bcx,R, WabWd2WhlWh1WhbWbcWs1v, bcx")
+	(match_operand    1 "general_operand"      "0,K,        M,       RInt8sJvWabWdeWd2WhlWh1WhbWbcWs1,Wab,aInt8J,a,                      R"))]
+  "rl78_real_insns_ok ()"
+  "@
+   ; mov\t%0, %1
+   oneb\t%0
+   clrb\t%0
+   mov\t%0, %1
+   mov\t%0, %1
+   mov\t%0, %1
+   mov\t%0, %1
+   mov\t%0, %S1"
+)
+
+(define_insn "*movhi_real"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g,AB,AB,RSv,A,BDTvSWabWd2WdeWhlWh1WbcWs1, BDT,ABDT,v")
+	(match_operand:HI 1 "general_operand"      " 0,K, M, i,  BDTvSWabWd2WdeWh1WhlWbcWs1,A, BDT,vS,  ABDT"))]
+  "rl78_real_insns_ok ()"
+  "@
+   ; movw\t%0, %1
+   onew\t%0
+   clrw\t%0
+   movw\t%0, %1
+   movw\t%0, %1
+   movw\t%0, %1
+   movw\t%0, %S1
+   movw\t%0, %1
+   movw\t%0, %1"
+)
+
+;;---------- Conversions ------------------------
+
+(define_insn "*zero_extendqihi2_real"
+  [(set (match_operand:HI                 0 "nonimmediate_operand" "=rv,A")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "0,a")))]
+  "rl78_real_insns_ok ()"
+  "@
+   mov\t%Q0, #0
+   mov\tx, a \;mov\ta, #0"
+  )
+
+(define_insn "*extendqihi2_real"
+  [(set (match_operand:HI                 0 "nonimmediate_operand" "=A,A")
+	(sign_extend:HI (match_operand:QI 1 "general_operand" "x,a")))]
+  "rl78_real_insns_ok ()"
+  "@
+   shlw\t%0, 8 \;sarw\t%0, 8
+   sarw\t%0, 8"
+  )
+
+;;---------- Arithmetic ------------------------
+
+(define_insn "*addqi3_real"
+  [(set (match_operand:QI          0 "nonimmediate_operand"  "=rvWabWhlWh1,rvWabWhlWh1,a,*bcdehl")
+	(plus:QI (match_operand:QI 1 "general_operand"  "%0,0,0,0")
+		 (match_operand:QI 2 "general_operand" "K,L,RWhlWh1i,a")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+    inc\t%0
+    dec\t%0
+    add\t%0, %2
+    add\t%0, %2"
+)
+
+(define_insn "*addhi3_real"
+  [(set (match_operand:HI          0 "nonimmediate_operand"  "=vABDTWh1Wab,vABDTWh1Wab,v,v,A,S,S,A")
+	(plus:HI (match_operand:HI 1 "general_operand"  "%0,0,0,0,0,0,0,S")
+		 (match_operand:HI 2 "general_operand" "K,L,N,O,RWh1WhlWabiv,Int8,J,Ri")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   incw\t%0
+   decw\t%0
+   incw\t%0 \;incw\t%0
+   decw\t%0 \;decw\t%0
+   addw\t%0, %p2
+   addw\t%0, %2
+   subw\t%0, %m2
+   movw\t%0, %1 \;addw\t%0, %2"
+)
+
+(define_insn "*addqihi3a_real"
+  [(set (match_operand:HI                          0 "register_operand" "=r")
+	(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand"  "r"))
+		 (match_operand:HI                 2 "register_operand"  "0")))
+   ]
+  "rl78_real_insns_ok ()"
+  "add\t%q0, %q1 \;addc\t%Q0, #0"
+)
+
+(define_insn "*subqi3_real"
+  [(set (match_operand:QI           0 "nonimmediate_operand"  "=a,R,v")
+	(minus:QI (match_operand:QI 1 "general_operand"  "0,0,0")
+		  (match_operand:QI 2 "general_operand" "RiWabWhbWh1Whl,a,i")))
+   ]
+  "rl78_real_insns_ok ()"
+  "sub\t%0, %2"
+)
+
+(define_insn "*subhi3_real"
+  [(set (match_operand:HI           0 "nonimmediate_operand"  "=A,S")
+	(minus:HI (match_operand:HI 1 "general_operand"  "0,0")
+		  (match_operand:HI 2 "general_operand" "iBDTWabWh1v,i")))
+   ]
+  "rl78_real_insns_ok ()"
+  "subw\t%0, %2"
+)
+
+(define_insn "*umulhi3_shift_real"
+  [(set (match_operand:HI 0 "register_operand" "=A,A")
+        (mult:HI (match_operand:HI 1 "rl78_nonfar_operand" "0,0")
+                 (match_operand:HI 2 "rl78_24_operand" "N,i")))]
+  "rl78_real_insns_ok ()"
+  "@
+   shlw\t%0, 1
+   shlw\t%0, 2"
+)
+
+(define_insn "*umulqihi3_real"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=A")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "general_operand" "%a"))
+                 (zero_extend:HI (match_operand:QI 2 "general_operand" "x"))))]
+  "rl78_real_insns_ok ()"
+  "mulu\t%2"
+)
+
+(define_insn "*andqi3_real"
+  [(set (match_operand:QI         0 "nonimmediate_operand"  "=A,R,v")
+	(and:QI (match_operand:QI 1 "general_operand"       "%0,0,0")
+		(match_operand:QI 2 "general_operand"       "iRvWabWhbWh1Whl,A,i")))
+   ]
+  "rl78_real_insns_ok ()"
+  "and\t%0, %2"
+)
+
+(define_insn "*iorqi3_real"
+  [(set (match_operand:QI         0 "nonimmediate_operand"  "=A,R,v")
+	(ior:QI (match_operand:QI 1 "general_operand"       "%0,0,0")
+		(match_operand:QI 2 "general_operand"       "iRvWabWhbWh1Whl,A,i")))
+   ]
+  "rl78_real_insns_ok ()"
+  "or\t%0, %2"
+)
+
+(define_insn "*xorqi3_real"
+  [(set (match_operand:QI         0 "nonimmediate_operand"  "=A,R,v")
+	(xor:QI (match_operand:QI 1 "general_operand"       "%0,0,0")
+		(match_operand    2 "general_operand"       "iRvWabWhbWh1Whl,A,i")))
+   ]
+  "rl78_real_insns_ok ()"
+  "xor\t%0, %2"
+)
+
+;;---------- Shifts ------------------------
+
+(define_insn "*ashlqi3_real"
+  [(set (match_operand:QI            0 "nonimmediate_operand"  "=abc,a,a")
+	(ashift:QI (match_operand:QI 1 "general_operand"  "0,0,0")
+		   (match_operand:QI 2 "general_operand" "Int3,bc,dehl")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   shl\t%0, %u2
+   cmp0 %2\; bz $2f\; 1: shl\t%0, 1 \;dec %2 \;bnz $1b\;2:
+   inc %2\;dec %2\;bz $2f\;1: shl\t%0, 1 \;dec %2 \;bnz $1b\;2:"
+)
+
+(define_insn "*ashlhi3_real"
+  [(set (match_operand:HI            0 "nonimmediate_operand"  "=AB,A,A")
+	(ashift:HI (match_operand:HI 1 "general_operand"  "0,0,0")
+		   (match_operand:QI 2 "general_operand" "P,bc,dehl")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   shlw\t%0, %u2
+   cmp0 %2\; bz $2f\; 1: shlw\t%0, 1 \;dec %2 \;bnz $1b\;2:
+   inc %2\;dec %2\;bz $2f\;1: shlw\t%0, 1 \;dec %2 \;bnz $1b\;2:"
+)
+
+;;----------
+
+(define_insn "*ashrqi3_real"
+  [(set (match_operand:QI              0 "nonimmediate_operand"  "=abc,a,a")
+	(ashiftrt:QI (match_operand:QI 1 "general_operand"  "0,0,0")
+		     (match_operand:QI 2 "general_operand" "Int3,bc,dehl")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   sar\t%0, %u2
+   cmp0 %2\; bz $2f\; 1: sar\t%0, 1 \;dec %2 \;bnz $1b\;2:
+   inc %2\;dec %2\;bz $2f\;1: sar\t%0, 1\;dec %2 \;bnz $1b\;2:"
+)
+
+(define_insn "*ashrhi3_real"
+  [(set (match_operand:HI              0 "nonimmediate_operand"  "=AB,A,A")
+	(ashiftrt:HI (match_operand:HI 1 "general_operand"  "0,0,0")
+		     (match_operand:QI 2 "general_operand" "P,bc,dehl")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   sarw\t%0, %u2
+   cmp0 %2\; bz $2f\; 1: sarw\t%0, 1 \;dec %2 \;bnz $1b\;2:
+   inc %2\;dec %2\;bz $2f\;1: sarw\t%0, 1\;dec %2\;bnz $1b\;2:"
+)
+
+;;----------
+
+(define_insn "*lshrqi3_real"
+  [(set (match_operand:QI              0 "nonimmediate_operand"  "=abc,a,a")
+	(lshiftrt:QI (match_operand:QI 1 "general_operand"  "0,0,0")
+		     (match_operand:QI 2 "general_operand" "Int3,bc,dehl")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   shr\t%0, %u2
+   cmp0 %2\; bz $2f\; 1: shr\t%0, 1 \;dec %2 \;bnz $1b\;2:
+   inc %2\;dec %2\;bz $2f\;1: shr\t%0, 1\;dec %2\;bnz $1b\;2:"
+)
+
+(define_insn "*lshrhi3_real"
+  [(set (match_operand:HI              0 "nonimmediate_operand"  "=AB,A,A")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand"  "0,0,0")
+		     (match_operand:QI 2 "general_operand" "P,bc,dehl")))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   shrw\t%0, %u2
+   cmp0 %2\; bz $2f\; 1: shrw\t%0, 1 \;dec %2 \;bnz $1b\;2:
+   inc %2\;dec %2\;bz $2f\;1: shrw\t%0, 1\;dec %2\;bnz $1b\;2:"
+)
+
+;;---------- Branching ------------------------
+
+(define_insn "*indirect_jump_real"
+  [(set (pc)
+	(match_operand:HI 0 "nonimmediate_operand" "A"))]
+  "rl78_real_insns_ok ()"
+  "br\t%0"
+)
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  ;; $rel8, $!rel16, !abs16, !!abs20
+  "br\t!!%0"
+)
+
+(define_insn "*call_real"
+  [(call (match_operand:HI 0 "memory_operand" "Wab,Wca")
+	 (match_operand 1 "" ""))]
+  "rl78_real_insns_ok ()"
+  "@
+   call\t!!%A0
+   call\t%A0"
+  )
+
+(define_insn "*call_value_real"
+  [(set (match_operand 0 "register_operand" "=v,v")
+	(call (match_operand:HI 1 "memory_operand" "Wab,Wca")
+	      (match_operand 2 "" "")))]
+  "rl78_real_insns_ok ()"
+  "@
+   call\t!!%A1
+   call\t%A1"
+  )
+
+(define_insn "*cbranchqi4_real_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:QI 1 "general_operand" "A,A,A")
+			       (match_operand:QI 2 "general_operand" "ISqi,i,v")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_real_insns_ok ()"
+  "@
+   cmp\t%1, %2 \;xor1 CY,%1.7\;not1 CY\;sk%C0 \;br\t!!%3
+   cmp\t%1, %2 \;xor1 CY,%1.7\;sk%C0 \;br\t!!%3
+   cmp\t%1, %2 \;xor1 CY,%1.7\;xor1 CY,%2.7\;sk%C0 \;br\t!!%3"
+  )
+
+(define_insn "*cbranchqi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_real"
+			      [(match_operand:QI 1 "general_operand" "Wabvaxbc,a,              v,bcdehl")
+			       (match_operand:QI 2 "general_operand" "M,       irvWabWhlWh1Whb,i,a")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_real_insns_ok ()"
+  "@
+   cmp0\t%1 \;sk%C0 \;br\t!!%3
+   cmp\t%1, %2 \;sk%C0 \;br\t!!%3
+   cmp\t%1, %2 \;sk%C0 \;br\t!!%3
+   cmp\t%1, %2 \;sk%C0 \;br\t!!%3"
+  )
+
+(define_insn "*cbranchhi4_real_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:HI 1 "general_operand" "A,A,A,vR")
+			       (match_operand:HI 2 "general_operand" "IShi,i,v,1")])
+              (label_ref (match_operand 3))
+	      (pc)))]
+  "rl78_real_insns_ok ()"
+  "@
+   cmpw\t%1, %2 \;xor1 CY,%Q1.7\;not1 CY\;sk%C0 \;br\t!!%3
+   cmpw\t%1, %2 \;xor1 CY,%Q1.7\;sk%C0 \;br\t!!%3
+   cmpw\t%1, %2 \;xor1 CY,%Q1.7\;xor1 CY,%Q2.7\;sk%C0 \;br\t!!%3
+   %z0\t!!%3"
+  )
+
+(define_insn "cbranchhi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "rl78_cmp_operator_real"
+			      [(match_operand:HI 1 "general_operand" "A,vR")
+			       (match_operand:HI 2 "general_operand" "iBDTvWabWhlWh1,1")])
+              (label_ref (match_operand          3 "" ""))
+	      (pc)))]
+  "rl78_real_insns_ok ()"
+  "@
+  cmpw\t%1, %2 \;sk%C0 \;br\t!!%3
+  %z0\t!!%3"
+  )
+
+(define_insn "cbranchhi4_real_inverted"  
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "rl78_cmp_operator_real"
+			      [(match_operand:HI 1 "general_operand" "A")
+			       (match_operand:HI 2 "general_operand" "iBDTvWabWhlWh1")])
+	      (pc)
+              (label_ref (match_operand          3 "" ""))))]
+  "rl78_real_insns_ok ()"
+  "cmpw\t%1, %2 \;sk%C0 \;br\t!!%3"
+  )
+
+(define_insn "*cbranchsi4_real_lt"
+  [(set (pc) (if_then_else
+	      (lt (match_operand:SI 0 "general_operand" "U,vWabWhlWh1")
+		  (const_int 0))
+              (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   mov a, %E0 \;mov1 CY,a.7 \;sknc \;br\t!!%1
+   mov1 CY,%E0.7 \;sknc \;br\t!!%1"
+  )
+
+(define_insn "*cbranchsi4_real_ge"
+  [(set (pc) (if_then_else
+	      (ge (match_operand:SI 0 "general_operand" "U,vWabWhlWh1")
+		  (const_int 0))
+              (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   mov a, %E0 \;mov1 CY,a.7 \;skc \;br\t!!%1
+   mov1 CY,%E0.7 \;skc \;br\t!!%1"
+  )
+
+(define_insn "*cbranchsi4_real_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:SI 1 "general_operand"   "vU,vU,vU,i,i")
+			       (match_operand:SI 2 "nonmemory_operand" "ISsi,i,v,S,v")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   movw ax,%H1 \;cmpw  ax, %H2 \;xor1 CY,a.7\;not1 CY\;      movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%C0 \;br\t!!%3
+   movw ax,%H1 \;cmpw  ax, %H2 \;xor1 CY,a.7\;               movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%C0 \;br\t!!%3
+   movw ax,%H1 \;cmpw  ax, %H2 \;xor1 CY,a.7\;xor1 CY,%E2.7\;movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%C0 \;br\t!!%3
+   movw ax, %H1\; cmpw  ax, %H2\; xor1 CY, a.7\; not1 CY\; movw ax, %h1 \;sknz\; cmpw  ax, %h2 \;sk%0 \;br\t!!%3
+   movw ax, %H1\; cmpw  ax, %H2\; xor1 CY, a.7\; movw ax, %h1\; sknz\; cmpw ax, %h2\; sk%0\; br\t!!%3"
+  )
+
+(define_insn "*cbranchsi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_real"
+			      [(match_operand:SI 1 "general_operand" "vUi")
+			       (match_operand:SI 2 "general_operand" "iWhlWh1v")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "movw ax,%H1 \;cmpw  ax, %H2 \;movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%C0 \;br\t!!%3"
+  )
+
+;; Peephole to match:
+;;
+;;     (set (mem (sp)) (ax))
+;;     (set (ax) (mem (sp)))
+;; or:
+;;     (set (mem (plus (sp) (const)) (ax))
+;;     (set (ax) (mem (plus (sp) (const))))
+;;
+;; which can be generated as the last instruction of the conversion
+;; of one virtual insn into a real insn and the first instruction of
+;; the conversion of the following virtual insn.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "rl78_stack_based_mem")
+	(reg:HI AX_REG))
+   (set (reg:HI AX_REG)
+	(match_dup 0))]
+  ""
+  [(set (match_dup 0) (reg:HI AX_REG))]
+  )
+
+;; Bit test and branch insns.
+
+;; NOTE: These patterns will work for bits in other places, not just A.
+
+(define_insn "bf"
+  [(set (pc)
+	(if_then_else (eq (and (reg:QI A_REG)
+			       (match_operand 0 "immediate_operand" "n"))
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "bf\tA.%B0, $%1"
+)
+
+(define_insn "bt"
+  [(set (pc)
+	(if_then_else (ne (and (reg:QI A_REG)
+			       (match_operand 0 "immediate_operand" "n"))
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "bt\tA.%B0, $%1"
+)
+
+;; NOTE: These peepholes are fragile.  They rely upon GCC generating
+;; a specific sequence on insns, based upon examination of test code.
+;; Improvements to GCC or using code other than the test code can result
+;; in the peephole not matching and the optimization being missed.
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand") (reg:QI A_REG))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_operand 1 "immediate_operand")))
+   (set (pc) (if_then_else (eq (match_dup 0) (const_int 0))
+			   (label_ref (match_operand 2 ""))
+			   (pc)))]
+  "peep2_regno_dead_p (3, REGNO (operands[0]))
+   && exact_log2 (INTVAL (operands[1])) >= 0"
+  [(set (pc) (if_then_else (eq (and (reg:QI A_REG) (match_dup 1)) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))]
+  )
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand") (reg:QI A_REG))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_operand 1 "immediate_operand")))
+   (set (pc) (if_then_else (ne (match_dup 0) (const_int 0))
+			   (label_ref (match_operand 2 ""))
+			   (pc)))]
+  "peep2_regno_dead_p (3, REGNO (operands[0]))
+   && exact_log2 (INTVAL (operands[1])) >= 0"
+  [(set (pc) (if_then_else (ne (and (reg:QI A_REG) (match_dup 1)) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))]
+  )
+
+;; Eliminate needless register copies.
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand") (match_operand:HI 1 "register_operand"))
+   (set (match_operand:HI 2 "register_operand") (match_dup 0))]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))
+   && (REGNO (operands[1]) < 8 || REGNO (operands[2]) < 8)"
+  [(set (match_dup 2) (match_dup 1))]
+  )
+
+;; Eliminate needless register copying when performing bit manipulations.
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand") (reg:QI A_REG))
+   (set (match_dup 0) (ior:QI (match_dup 0) (match_operand 1 "immediate_operand")))
+   (set (reg:QI A_REG) (match_dup 0))]
+  "peep2_regno_dead_p (3, REGNO (operands[0]))"
+  [(set (reg:QI A_REG) (ior:QI (reg:QI A_REG) (match_dup 1)))]
+  )
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand") (reg:QI A_REG))
+   (set (match_dup 0) (xor:QI (match_dup 0) (match_operand 1 "immediate_operand")))
+   (set (reg:QI A_REG) (match_dup 0))]
+  "peep2_regno_dead_p (3, REGNO (operands[0]))"
+  [(set (reg:QI A_REG) (xor:QI (reg:QI A_REG) (match_dup 1)))]
+  )
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand") (reg:QI A_REG))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_operand 1 "immediate_operand")))
+   (set (reg:QI A_REG) (match_dup 0))]
+  "peep2_regno_dead_p (3, REGNO (operands[0]))"
+  [(set (reg:QI A_REG) (and:QI (reg:QI A_REG) (match_dup 1)))]
+  )
+
+(define_insn "*negandhi3_real"
+  [(set (match_operand:HI                 0 "register_operand"  "=A")
+	(and:HI (neg:HI (match_operand:HI 1 "register_operand"  "0"))
+		(match_operand:HI         2 "immediate_operand" "n")))
+   ]
+  "rl78_real_insns_ok ()"
+  "xor a, #0xff @ xch a, x @ xor a, #0xff @ xch a, x @ addw ax, #1 @ and a, %Q2 @ xch a, x @ and a, %q2 @ xch a, x"
+)
diff --git a/gcc-4.9/gcc/config/rl78/rl78-virt.md b/gcc-4.9/gcc/config/rl78/rl78-virt.md
new file mode 100644
index 000000000..1db37512f
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78-virt.md
@@ -0,0 +1,416 @@
+;;  Machine Description for Renesas RL78 processors
+;;  Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; In this MD file, we define those insn patterns that involve
+;; registers, where such registers are virtual until allocated to a
+;; physical register.  All of these insns need to be conditional on
+;; rl78_virt_insns_ok () being true.
+
+;; This tells the physical register allocator what method to use to
+;; allocate registers.  Basically, this defines the template of the
+;; instruction - op1 is of the form "a = op(b)", op2 is "a = b op c"
+;; etc.
+
+(define_attr "valloc" "op1,op2,ro1,cmp,umul,macax"
+  (const_string "op2"))
+
+;;---------- Moving ------------------------
+
+(define_insn "*movqi_virt"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=vY,v,Wfr")
+	(match_operand    1 "general_operand" "vInt8JY,Wfr,vInt8J"))]
+  "rl78_virt_insns_ok ()"
+  "v.mov %0, %1"
+  [(set_attr "valloc" "op1")]
+)
+
+(define_insn "*movhi_virt"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=vYS,v,Wfr")
+	(match_operand:HI 1 "general_operand" "viYS,Wfr,v"))]
+  "rl78_virt_insns_ok ()"
+  "v.movw %0, %1"
+  [(set_attr "valloc" "op1")]
+)
+
+;;---------- Conversions ------------------------
+
+(define_insn "*zero_extendqihi2_virt"
+  [(set (match_operand:HI                 0 "rl78_nonfar_nonimm_operand" "=vm")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "vim")))]
+  "rl78_virt_insns_ok ()"
+  "v.zero_extend\t%0, %1"
+  [(set_attr "valloc" "op1")]
+  )
+
+(define_insn "*extendqihi2_virt"
+  [(set (match_operand:HI                 0 "rl78_nonfar_nonimm_operand" "=vm")
+	(sign_extend:HI (match_operand:QI 1 "general_operand" "vim")))]
+  "rl78_virt_insns_ok ()"
+  "v.sign_extend\t%0, %1"
+  [(set_attr "valloc" "op1")]
+  )
+
+;;---------- Arithmetic ------------------------
+
+(define_insn "*add<mode>3_virt"
+  [(set (match_operand:QHI           0 "rl78_nonfar_nonimm_operand" "=vY,S")
+	(plus:QHI (match_operand:QHI 1 "rl78_nonfar_operand" "viY,0")
+		  (match_operand:QHI 2 "general_operand" "vim,i")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.add\t%0, %1, %2"
+)
+
+(define_insn "*sub<mode>3_virt"
+  [(set (match_operand:QHI            0 "rl78_nonfar_nonimm_operand" "=vm,S")
+	(minus:QHI (match_operand:QHI 1 "rl78_nonfar_operand" "vim,0")
+		   (match_operand:QHI 2 "general_operand" "vim,i")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.sub\t%0, %1, %2"
+)
+
+(define_insn "*umulhi3_shift_virt"
+  [(set (match_operand:HI 0 "register_operand" "=vm")
+        (mult:HI (match_operand:HI 1 "rl78_nonfar_operand" "%vim")
+                 (match_operand:HI 2 "rl78_24_operand" "Ni")))]
+  "rl78_virt_insns_ok ()"
+  "v.mulu\t%0, %1, %2"
+  [(set_attr "valloc" "umul")]
+)
+
+(define_insn "*umulqihi3_virt"
+  [(set (match_operand:HI 0 "register_operand" "=vm")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "rl78_nonfar_operand" "%vim"))
+                 (zero_extend:HI (match_operand:QI 2 "general_operand" "vim"))))]
+  "rl78_virt_insns_ok ()"
+  "v.mulu\t%0, %2"
+  [(set_attr "valloc" "umul")]
+)
+
+(define_insn "*andqi3_virt"
+  [(set (match_operand:QI         0 "rl78_nonfar_nonimm_operand" "=vm")
+	(and:QI (match_operand:QI 1 "rl78_nonfar_operand" "vim")
+		(match_operand:QI 2 "general_operand" "vim")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.and\t%0, %1, %2"
+)
+
+(define_insn "*iorqi3_virt"
+  [(set (match_operand:QI         0 "rl78_nonfar_nonimm_operand" "=vm")
+	(ior:QI (match_operand:QI 1 "rl78_nonfar_operand" "vim")
+		(match_operand:QI 2 "general_operand" "vim")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.or\t%0, %1, %2"
+)
+
+(define_insn "*xor3_virt"
+  [(set (match_operand:QI         0 "rl78_nonfar_nonimm_operand" "=v,vm,m")
+	(xor:QI (match_operand:QI 1 "rl78_nonfar_operand" "%0,vm,vm")
+		(match_operand    2 "general_operand" "i,vm,vim")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.xor\t%0, %1, %2"
+)
+
+;;---------- Shifts ------------------------
+
+(define_insn "*ashl<mode>3_virt"
+  [(set (match_operand:QHI             0 "rl78_nonfar_nonimm_operand" "=vm")
+	(ashift:QHI (match_operand:QHI 1 "rl78_nonfar_operand" "vim")
+		    (match_operand:QI  2 "general_operand" "vim")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.shl\t%0, %1, %2"
+)
+
+(define_insn "*ashr<mode>3_virt"
+  [(set (match_operand:QHI               0 "rl78_nonfar_nonimm_operand" "=vm")
+	(ashiftrt:QHI (match_operand:QHI 1 "rl78_nonfar_operand" "vim")
+		      (match_operand:QI  2 "general_operand" "vim")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.sar\t%0, %1, %2"
+)
+
+(define_insn "*lshr<mode>3_virt"
+  [(set (match_operand:QHI               0 "rl78_nonfar_nonimm_operand" "=vm")
+	(lshiftrt:QHI (match_operand:QHI 1 "rl78_nonfar_operand" "vim")
+		      (match_operand:QI  2 "general_operand" "vim")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.shr\t%0, %1, %2"
+)
+
+;; This is complex mostly because the RL78 has no SImode operations,
+;; and very limited HImode operations, and no variable shifts.  This
+;; pattern is optimized for each constant shift count and operand
+;; types, so as to use a hand-optimized pattern.  For readability, the
+;; usual \t\; syntax is not used here.  Also, there's no easy way to 
+;; constrain to avoid partial overlaps, hence the duplication.
+(define_insn "ashrsi3_virt"                                  ;;   0  1      2-7            8         9-15           16   17-23     24   25-31 var
+  [(set (match_operand:SI               0 "nonimmediate_operand" "=v,vU,&vU,v,  &vU,  &vU, v,  &vU,  v,  &vU, &vU,  vU,  v,&vU,    vU,  vU,   vU")
+	(ashiftrt:SI (match_operand:SI  1 "nonimmediate_operand" "0, 0,  vU,0,   vWab, U,  0,   vU,  0,   vWab,U,   vU,  0, vU,    vU,  vU,   0")
+		      (match_operand:SI 2 "nonmemory_operand"    "M, K,  K, Int3,Int3,Int3,Iv08,Iv08,Is09,Is09,Is09,Iv16,Is17,Is17,Iv24,Is25, iv")))
+   (clobber (reg:HI X_REG))
+    ]
+   ""
+   "@
+    ; ashrsi %0, 0
+
+   movw ax,%H1 \; sarw ax,1 \; movw %H0,ax \; mov a,%Q1 \; rorc a,1 \; mov %Q0,a \; mov a,%q1 \; rorc a,1 \; mov %q0,a
+   movw ax,%H1 \; sarw ax,1 \; movw %H0,ax \; mov a,%Q1 \; rorc a,1 \; mov %Q0,a \; mov a,%q1 \; rorc a,1 \; mov %q0,a
+
+   movw ax,%1 \; shlw ax,%r2 \; mov %0,a             \; mov x,%Q1 \; mov a,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; sarw ax,%u2 \; movw %H0,ax
+   movw ax,%1 \; shlw ax,%r2 \; mov %0,a             \; mov x,%Q1 \; mov a,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; sarw ax,%u2 \; movw %H0,ax
+   movw ax,%1 \; shlw ax,%r2 \; mov %0,a \; mov a,%Q1 \; mov x,a   \; mov a,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; sarw ax,%u2 \; movw %H0,ax
+
+   mov x,%Q1            \; mov a,%H1 \; movw %0,ax \; movw ax,%H1 \; sarw ax,8 \; movw %H0,ax
+   mov a,%Q1 \; mov x, a \; mov a,%H1 \; movw %0,ax \; movw ax,%H1 \; sarw ax,8 \; movw %H0,ax
+
+   mov x,%Q1           \; mov a,%H1 \; shlw ax,%r2 \; mov %0,a \; movw ax,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; sarw ax,%u2 \; movw %H0,ax
+   mov x,%Q1           \; mov a,%H1 \; shlw ax,%r2 \; mov %0,a \; movw ax,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; sarw ax,%u2 \; movw %H0,ax
+   mov a,%Q1 \; mov x,a \; mov a,%H1 \; shlw ax,%r2 \; mov %0,a \; movw ax,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; sarw ax,%u2 \; movw %H0,ax
+
+   movw ax,%H1 \; movw %0,ax \; sarw ax,15 \; movw %H0,ax
+
+   movw ax,%H1 \; sarw ax,%S2 \; movw %0,ax \; sarw ax,15 \; movw %H0,ax
+   movw ax,%H1 \; sarw ax,%S2 \; movw %0,ax \; sarw ax,15 \; movw %H0,ax
+
+   movw ax,%H1 \; mov %0,a \; sarw ax,15 \; movw %H0,ax \; mov %Q0,a
+
+   movw ax,%H1 \; sar a,%s2 \; mov %0,a \; sarw ax,15 \; movw %H0,ax \; mov %Q0,a
+
+   mov b,%2 \; cmp0 b \; bz $2f \; 1: \; movw ax,%H1 \; sarw ax,1 \; movw %H0,ax \; mov a,%Q1 \; rorc a,1 \; mov %Q0,a \; mov a,%q1 \; rorc a,1 \; mov %q0,a \; dec b \; bnz $1b \; 2:"
+  [(set_attr "valloc" "macax")]
+)
+
+;; Likewise.
+(define_insn "lshrsi3_virt"                                  ;;   0  1      2-7            8         9-15           16   17-23     24   25-31 var
+  [(set (match_operand:SI               0 "nonimmediate_operand" "=v,vU,&vU,v,  &vU,  &vU, v,  &vU,  v,  &vU, &vU,  vU,  v,&vU,    vU,  vU,   vU")
+	(lshiftrt:SI (match_operand:SI  1 "nonimmediate_operand" "0, 0,  vU,0,   vWab, U,  0,   vU,  0,   vWab,U,   vU,  0, vU,    vU,  vU,   0")
+		      (match_operand:SI 2 "nonmemory_operand"    "M, K,  K, Int3,Int3,Int3,Iv08,Iv08,Is09,Is09,Is09,Iv16,Is17,Is17,Iv24,Is25, iv")))
+   (clobber (reg:HI X_REG))
+   ]
+  ""
+  "@
+   ; lshrsi %0, 0
+
+   movw ax,%H1 \; shrw ax,1 \; movw %H0,ax \; mov a,%Q1 \; rorc a,1 \; mov %Q0,a \; mov a,%q1 \; rorc a,1 \; mov %q0,a
+   movw ax,%H1 \; shrw ax,1 \; movw %H0,ax \; mov a,%Q1 \; rorc a,1 \; mov %Q0,a \; mov a,%q1 \; rorc a,1 \; mov %q0,a
+
+   movw ax,%1 \; shlw ax,%r2 \; mov %0,a             \; mov x,%Q1 \; mov a,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; shrw ax,%u2 \; movw %H0,ax
+   movw ax,%1 \; shlw ax,%r2 \; mov %0,a             \; mov x,%Q1 \; mov a,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; shrw ax,%u2 \; movw %H0,ax
+   movw ax,%1 \; shlw ax,%r2 \; mov %0,a \; mov a,%Q1 \; mov x,a   \; mov a,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; shrw ax,%u2 \; movw %H0,ax
+
+   mov x,%Q1            \; mov a,%H1 \; movw %0,ax \; movw ax,%H1 \; shrw ax,8 \; movw %H0,ax
+   mov a,%Q1 \; mov x, a \; mov a,%H1 \; movw %0,ax \; movw ax,%H1 \; shrw ax,8 \; movw %H0,ax
+
+   mov x,%Q1           \; mov a,%H1 \; shlw ax,%r2 \; mov %0,a \; movw ax,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; shrw ax,%u2 \; movw %H0,ax
+   mov x,%Q1           \; mov a,%H1 \; shlw ax,%r2 \; mov %0,a \; movw ax,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; shrw ax,%u2 \; movw %H0,ax
+   mov a,%Q1 \; mov x,a \; mov a,%H1 \; shlw ax,%r2 \; mov %0,a \; movw ax,%H1 \; shlw ax,%r2 \; mov %Q0,a \; movw ax,%H1 \; shrw ax,%u2 \; movw %H0,ax
+
+   movw ax,%H1 \; movw %0,ax \; movw ax,#0 \; movw %H0,ax
+
+   movw ax,%H1 \; shrw ax,%S2 \; movw %0,ax \; movw ax,#0 \; movw %H0,ax
+   movw ax,%H1 \; shrw ax,%S2 \; movw %0,ax \; movw ax,#0 \; movw %H0,ax
+
+   movw ax,%H1 \; mov %0,a \; movw ax,#0 \; movw %H0,ax \; mov %Q0,a
+
+   movw ax,%H1 \; shr a,%s2 \; mov %0,a \; movw ax,#0 \; movw %H0,ax \; mov %Q0,a
+
+   mov b,%2 \; cmp0 b \; bz $2f \; 1: \; movw ax,%H1 \; shrw ax,1 \; movw %H0,ax \; mov a,%Q1 \; rorc a,1 \; mov %Q0,a \; mov a,%q1 \; rorc a,1 \; mov %q0,a \; dec b \; bnz $1b \; 2:"
+  [(set_attr "valloc" "macax")]
+)
+
+;; Likewise.
+(define_insn "ashlsi3_virt"                                ;;   0  1      2-7            8         9-15           16        17-23     24        25-31     var
+  [(set (match_operand:SI             0 "nonimmediate_operand" "=v,vU,&vU,v,  &vU,  &vU, v,  &vU,  v,  &vU, &vU,  v,   U,   v,&vU,    v,   U,   v,   U,   vWab,vU,  vU")
+	(ashift:SI (match_operand:SI  1 "nonimmediate_operand" "0, 0,  vU,0,   vWab, U,  0,   vU,  0,   vWab,U,   vU,  vU,  0, vU,    vU,  vU,  vU,  vU,  0,   vWab,U")
+		    (match_operand:SI 2 "nonmemory_operand"    "M, K,  K, Int3,Int3,Int3,Iv08,Iv08,Is09,Is09,Is09,Iv16,Iv16,Is17,Is17,Iv24,Iv24,Is25,Is25,iv,  iv,  iv")))
+   (clobber (reg:HI X_REG))
+   ]
+  ""
+  "@
+   ; lshrsi %0, 0
+
+   movw ax,%1 \; shlw ax,1 \; movw %0,ax \; movw ax,%H1 \; rolwc ax,1 \; movw %H0,ax
+   movw ax,%1 \; shlw ax,1 \; movw %0,ax \; movw ax,%H1 \; rolwc ax,1 \; movw %H0,ax
+
+   movw ax,%H1 \; shlw ax,%u2 \; mov %E0,a \; mov x,%Q1           \; mov a, %H1 \; shlw ax,%S2 \; mov %H0,a \; movw ax,%1 \; shlw ax,%u2 \; movw %0,ax
+   movw ax,%H1 \; shlw ax,%u2 \; mov %E0,a \; mov x,%Q1           \; mov a, %H1 \; shlw ax,%S2 \; mov %H0,a \; movw ax,%1 \; shlw ax,%u2 \; movw %0,ax
+   movw ax,%H1 \; shlw ax,%u2 \; mov %E0,a \; mov a,%Q1 \; mov x,a \; mov a, %H1 \; shlw ax,%S2 \; mov %H0,a \; movw ax,%1 \; shlw ax,%u2 \; movw %0,ax
+
+   mov x,%Q1           \; mov a,%H1 \; movw %H0,ax \; movw ax,%1 \; shlw ax,8 \; movw %0,ax
+   mov a,%Q1 \; mov x,a \; mov a,%H1 \; movw %H0,ax \; movw ax,%1 \; shlw ax,8 \; movw %0,ax
+
+   mov x,%Q1           \; mov a,%H1 \; shlw ax,%s2 \; movw %H0,ax \; movw ax,%1 \; shlw ax,%s2 \; mov %H0,a \; movw ax,%1 \; shlw ax,%u2 \; movw %0,ax
+   mov x,%Q1           \; mov a,%H1 \; shlw ax,%s2 \; movw %H0,ax \; movw ax,%1 \; shlw ax,%s2 \; mov %H0,a \; movw ax,%1 \; shlw ax,%u2 \; movw %0,ax
+   mov a,%Q1 \; mov x,a \; mov a,%H1 \; shlw ax,%s2 \; movw %H0,ax \; movw ax,%1 \; shlw ax,%s2 \; mov %H0,a \; movw ax,%1 \; shlw ax,%u2 \; movw %0,ax
+
+   movw ax,%1 \; movw %H0,ax \; movw %0,#0
+   movw ax,%1 \; movw %H0,ax \; movw ax,#0 \; movw %0,ax
+
+   movw ax,%1 \; shlw ax,%S2 \; movw %H0,ax \; movw %0,#0
+   movw ax,%1 \; shlw ax,%S2 \; movw %H0,ax \; movw ax,#0 \; movw %0,ax
+
+   mov a,%1 \; movw %H0,ax \; mov %H0,#0 \; movw %0,#0
+   mov a,%1 \; movw %H0,ax \; movw ax,#0 \; mov %H0,a \; movW %0,ax
+
+   mov a,%1 \; shl a,%s2 \; movw %H0,ax \; mov %H0,#0 \; movw %0,#0
+   mov a,%1 \; shl a,%s2 \; movw %H0,ax \; movw ax,#0 \; mov %H0,a \; movW %0,ax
+
+   mov a,%2 \; cmp0 a \; bz $2f \; mov d,a \; movw ax,%H1 \; movw bc,%1 \; 1: \; shlw bc,1 \; rolwc ax,1 \; dec d \; bnz $1b \; movw %H0,ax \; movw ax,bc \; movw %0,ax \; 2:
+   mov a,%2 \; mov d,a \; movw ax,%H1 \; movw bc,%1 \; cmp0 0xFFEFD \; bz $2f \; 1: \; shlw bc,1 \; rolwc ax,1 \; dec d \; bnz $1b \; 2: \; movw %H0,ax \; movw ax,bc \; movw %0,ax
+   mov a,%2 \; mov d,a \; movw ax,%1 \; movw bc,ax \; movw ax,%H1 \; cmp0 0xFFEFD \; bz $2f \; 1: \; shlw bc,1 \; rolwc ax,1 \; dec d \; bnz $1b \; 2: \; movw %H0,ax \; movw ax,bc \; movw %0,ax"
+   [(set_attr "valloc" "macax")]
+ )
+
+;;---------- Branching ------------------------
+
+(define_insn "*indirect_jump_virt"
+  [(set (pc)
+	(match_operand:HI 0 "nonimmediate_operand" "vm"))]
+  "rl78_virt_insns_ok ()"
+  "v.br\t%0"
+  [(set_attr "valloc" "ro1")]
+)
+
+(define_insn "*call_virt"
+  [(call (match_operand:HI 0 "memory_operand" "Wab,Wcv")
+	 (match_operand 1 "" ""))]
+  "rl78_virt_insns_ok ()"
+  "v.call\t%0"
+  [(set_attr "valloc" "ro1")]
+  )
+
+(define_insn "*call_value_virt"
+  [(set (match_operand 0 "register_operand" "=v,v")
+	(call (match_operand:HI 1 "memory_operand" "Wab,Wcv")
+	      (match_operand 2 "" "")))]
+  "rl78_virt_insns_ok ()"
+  "v.call\t%1"
+  [(set_attr "valloc" "op1")]
+  )
+
+(define_insn "*cbranchqi4_virt_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:QI 1 "general_operand" "vim")
+			       (match_operand:QI 2 "nonmemory_operand" "vi")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_virt_insns_ok ()"
+  "v.cmp\t%1, %2\\n\tv.b%C0\t%3"
+  [(set_attr "valloc" "cmp")]
+  )
+
+(define_insn "*cbranchqi4_virt"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_real"
+			      [(match_operand:QI 1 "general_operand" "vim")
+			       (match_operand:QI 2 "general_operand" "vim")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_virt_insns_ok ()"
+  "v.cmp\t%1, %2\\n\tv.b%C0\t%3"
+  [(set_attr "valloc" "cmp")]
+  )
+
+(define_insn "*cbranchhi4_virt_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:HI 1 "general_operand" "vim")
+			       (match_operand:HI 2 "nonmemory_operand" "vi")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_virt_insns_ok ()"
+  "v.cmpw\t%1, %2\\n\tv.b%C0\t%3"
+  [(set_attr "valloc" "cmp")]
+  )
+
+(define_insn "*cbranchhi4_virt"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_real"
+			      [(match_operand:HI 1 "general_operand" "vim")
+			       (match_operand:HI 2 "general_operand" "vim")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_virt_insns_ok ()"
+  "v.cmpw\t%1, %2\\n\tv.b%C0\t%3"
+  [(set_attr "valloc" "cmp")]
+  )
+
+(define_insn "*cbranchsi4_virt"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator"
+			      [(match_operand:SI 1 "general_operand" "vim")
+			       (match_operand:SI 2 "nonmemory_operand" "vi")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.cmpd\t%1, %2\\n\tv.b%C0\t%3"
+  [(set_attr "valloc" "macax")]
+  )
+
+;;---------- Peepholes ------------------------
+
+(define_peephole2
+  [(set (match_operand:QI 0 "" "")
+	(match_operand:QI 1 "" ""))
+   (set (match_operand:QI 2 "" "")
+	(match_operand:QI 3 "" ""))]
+  "rl78_peep_movhi_p (operands)"
+  [(set (match_dup 4)
+	(match_dup 5))]
+  "rl78_setup_peep_movhi (operands);"
+  )
+
+(define_peephole2
+  [(set (reg:QI A_REG)
+	(match_operand:QI 1 "" ""))
+   (set (match_operand:QI 0 "" "")
+	(reg:QI A_REG))
+   (set (reg:QI A_REG)
+	(match_operand:QI 3 "" ""))
+   (set (match_operand:QI 2 "" "")
+	(reg:QI A_REG))
+   ]
+  "rl78_peep_movhi_p (operands)"
+  [(set (reg:HI AX_REG)
+	(match_dup 5))
+   (set (match_dup 4)
+	(reg:HI AX_REG))
+   ]
+  "rl78_setup_peep_movhi (operands);"
+  )
+
+(define_insn "*negandhi3_virt"
+  [(set (match_operand:HI                 0 "register_operand" "=v")
+	(and:HI (neg:HI (match_operand:HI 1 "register_operand"  "0"))
+ 		(match_operand:HI         2 "immediate_operand" "n")))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.nand\t%0, %1, %2"
+)
diff --git a/gcc-4.9/gcc/config/rl78/rl78.c b/gcc-4.9/gcc/config/rl78/rl78.c
new file mode 100644
index 000000000..b5cd2ad75
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78.c
@@ -0,0 +1,3748 @@
+/* Subroutines used for code generation on Renesas RL78 processors.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "reload.h"
+#include "df.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "rl78-protos.h"
+#include "dumpfile.h"
+#include "tree-pass.h"
+#include "context.h"
+#include "tm-constrs.h" /* for satisfies_constraint_*().  */
+#include "insn-flags.h" /* for gen_*().  */
+
+static inline bool is_interrupt_func (const_tree decl);
+static inline bool is_brk_interrupt_func (const_tree decl);
+static void rl78_reorg (void);
+
+
+/* Debugging statements are tagged with DEBUG0 only so that they can
+   be easily enabled individually, by replacing the '0' with '1' as
+   needed.  */
+#define DEBUG0 0
+#define DEBUG1 1
+
+/* REGISTER_NAMES has the names for individual 8-bit registers, but
+   these have the names we need to use when referring to 16-bit
+   register pairs.  */
+static const char * const word_regnames[] =
+{
+  "ax", "AX", "bc", "BC", "de", "DE", "hl", "HL",
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+  "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+  "sp", "ap", "psw", "es", "cs"
+};
+
+struct GTY(()) machine_function
+{
+  /* If set, the rest of the fields have been computed.  */
+  int computed;
+  /* Which register pairs need to be pushed in the prologue.  */
+  int need_to_push [FIRST_PSEUDO_REGISTER / 2];
+
+  /* These fields describe the frame layout...  */
+  /* arg pointer */
+  /* 4 bytes for saved PC */
+  int framesize_regs;
+  /* frame pointer */
+  int framesize_locals;
+  int framesize_outgoing;
+  /* stack pointer */
+  int framesize;
+
+  /* If set, recog is allowed to match against the "real" patterns.  */
+  int real_insns_ok;
+  /* If set, recog is allowed to match against the "virtual" patterns.  */
+  int virt_insns_ok;
+  /* Set if the current function needs to clean up any trampolines.  */
+  int trampolines_used;
+};
+
+/* This is our init_machine_status, as set in
+   rl78_option_override.  */
+static struct machine_function *
+rl78_init_machine_status (void)
+{
+  struct machine_function *m;
+
+  m = ggc_alloc_cleared_machine_function ();
+  m->virt_insns_ok = 1;
+
+  return m;
+}
+
+/* Returns whether to run the devirtualization pass.  */
+static bool
+devirt_gate (void)
+{
+  return true;
+}
+
+/* Runs the devirtualization pass.  */
+static unsigned int
+devirt_pass (void)
+{
+  rl78_reorg ();
+  return 0;
+}
+
+/* This pass converts virtual instructions using virtual registers, to
+   real instructions using real registers.  Rather than run it as
+   reorg, we reschedule it before vartrack to help with debugging.  */
+namespace {
+
+const pass_data pass_data_rl78_devirt =
+{
+  RTL_PASS, /* type */
+  "devirt", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_rl78_devirt : public rtl_opt_pass
+{
+public:
+  pass_rl78_devirt(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_rl78_devirt, ctxt)
+  {
+  }
+
+  /* opt_pass methods: */
+  bool gate () { return devirt_gate (); }
+  unsigned int execute () { return devirt_pass (); }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_rl78_devirt (gcc::context *ctxt)
+{
+  return new pass_rl78_devirt (ctxt);
+}
+
+/* Redundant move elimination pass.  Must be run after the basic block
+   reordering pass for the best effect.  */
+
+static unsigned int
+move_elim_pass (void)
+{
+  rtx insn, ninsn, prev = NULL_RTX;
+
+  for (insn = get_insns (); insn; insn = ninsn)
+    {
+      rtx set;
+
+      ninsn = next_nonnote_nondebug_insn (insn);
+
+      if ((set = single_set (insn)) == NULL_RTX)
+	{
+	  prev = NULL_RTX;
+	  continue;
+	}
+
+      /* If we have two SET insns in a row (without anything
+	 between them) and the source of the second one is the
+	 destination of the first one, and vice versa, then we
+	 can eliminate the second SET.  */
+      if (prev
+	  && rtx_equal_p (SET_DEST (prev), SET_SRC (set))
+	  && rtx_equal_p (SET_DEST (set), SET_SRC (prev))
+	  )	  
+	{
+	  if (dump_file)
+	    fprintf (dump_file, " Delete insn %d because it is redundant\n",
+		     INSN_UID (insn));
+
+	  delete_insn (insn);
+	  prev = NULL_RTX;
+	}
+      else
+	prev = set;
+    }
+  
+  if (dump_file)
+    print_rtl_with_bb (dump_file, get_insns (), 0);
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_rl78_move_elim =
+{
+  RTL_PASS, /* type */
+  "move_elim", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_rl78_move_elim : public rtl_opt_pass
+{
+public:
+  pass_rl78_move_elim(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_rl78_move_elim, ctxt)
+  {
+  }
+
+  /* opt_pass methods: */
+  bool gate () { return devirt_gate (); }
+  unsigned int execute () { return move_elim_pass (); }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_rl78_move_elim (gcc::context *ctxt)
+{
+  return new pass_rl78_move_elim (ctxt);
+}
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START rl78_asm_file_start
+
+static void
+rl78_asm_file_start (void)
+{
+  int i;
+
+  if (TARGET_G10)
+    {
+      /* The memory used is 0xffec8 to 0xffedf; real registers are in
+	 0xffee0 to 0xffee7.  */
+      for (i = 8; i < 32; i++)
+	fprintf (asm_out_file, "r%d\t=\t0x%x\n", i, 0xffec0 + i);
+    }
+  else
+    {
+      for (i = 0; i < 8; i++)
+	{
+	  fprintf (asm_out_file, "r%d\t=\t0x%x\n", 8 + i, 0xffef0 + i);
+	  fprintf (asm_out_file, "r%d\t=\t0x%x\n", 16 + i, 0xffee8 + i);
+	  fprintf (asm_out_file, "r%d\t=\t0x%x\n", 24 + i, 0xffee0 + i);
+	}
+    }
+
+  opt_pass *rl78_devirt_pass = make_pass_rl78_devirt (g);
+  static struct register_pass_info rl78_devirt_info =
+    {
+      rl78_devirt_pass,
+      "pro_and_epilogue",
+      1,
+      PASS_POS_INSERT_BEFORE
+    };
+
+  opt_pass *rl78_move_elim_pass = make_pass_rl78_move_elim (g);
+  static struct register_pass_info rl78_move_elim_info =
+    {
+      rl78_move_elim_pass,
+      "bbro",
+      1,
+      PASS_POS_INSERT_AFTER
+    };
+
+  register_pass (& rl78_devirt_info);
+  register_pass (& rl78_move_elim_info);
+}
+
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		rl78_option_override
+
+static void
+rl78_option_override (void)
+{
+  flag_omit_frame_pointer = 1;
+  flag_no_function_cse = 1;
+  flag_split_wide_types = 0;
+
+  init_machine_status = rl78_init_machine_status;
+
+  if (TARGET_ALLREGS)
+    {
+      int i;
+
+      for (i = 24; i < 32; i++)
+	fixed_regs[i] = 0;
+    }
+}
+
+/* Most registers are 8 bits.  Some are 16 bits because, for example,
+   gcc doesn't like dealing with $FP as a register pair (the second
+   half of $fp is also 2 to keep reload happy wrt register pairs, but
+   no register class includes it).  This table maps register numbers
+   to size in bytes.  */
+static const int register_sizes[] =
+{
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 1, 1, 1
+};
+
+/* Predicates used in the MD patterns.  This one is true when virtual
+   insns may be matched, which typically means before (or during) the
+   devirt pass.  */
+bool
+rl78_virt_insns_ok (void)
+{
+  if (cfun)
+    return cfun->machine->virt_insns_ok;
+  return true;
+}
+
+/* Predicates used in the MD patterns.  This one is true when real
+   insns may be matched, which typically means after (or during) the
+   devirt pass.  */
+bool
+rl78_real_insns_ok (void)
+{
+  if (cfun)
+    return cfun->machine->real_insns_ok;
+  return false;
+}
+
+/* Implements HARD_REGNO_NREGS.  */
+int
+rl78_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  int rs = register_sizes[regno];
+  if (rs < 1)
+    rs = 1;
+  return ((GET_MODE_SIZE (mode) + rs - 1) / rs);
+}
+
+/* Implements HARD_REGNO_MODE_OK.  */
+int
+rl78_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  int s = GET_MODE_SIZE (mode);
+
+  if (s < 1)
+    return 0;
+  /* These are not to be used by gcc.  */
+  if (regno == 23 || regno == ES_REG || regno == CS_REG)
+    return 0;
+  /* $fp can always be accessed as a 16-bit value.  */
+  if (regno == FP_REG && s == 2)
+    return 1;
+  if (regno < SP_REG)
+    {
+      /* Since a reg-reg move is really a reg-mem move, we must
+	 enforce alignment.  */
+      if (s > 1 && (regno % 2))
+	return 0;
+      return 1;
+    }
+  if (s == CC_REGNUM)
+    return (mode == BImode);
+  /* All other registers must be accessed in their natural sizes.  */
+  if (s == register_sizes [regno])
+    return 1;
+  return 0;
+}
+
+/* Simplify_gen_subreg() doesn't handle memory references the way we
+   need it to below, so we use this function for when we must get a
+   valid subreg in a "natural" state.  */
+static rtx
+rl78_subreg (enum machine_mode mode, rtx r, enum machine_mode omode, int byte)
+{
+  if (GET_CODE (r) == MEM)
+    return adjust_address (r, mode, byte);
+  else
+    return simplify_gen_subreg (mode, r, omode, byte);
+}
+
+/* Used by movsi.  Split SImode moves into two HImode moves, using
+   appropriate patterns for the upper and lower halves of symbols.  */
+void
+rl78_expand_movsi (rtx *operands)
+{
+  rtx op00, op02, op10, op12;
+
+  op00 = rl78_subreg (HImode, operands[0], SImode, 0);
+  op02 = rl78_subreg (HImode, operands[0], SImode, 2);
+  if (GET_CODE (operands[1]) == CONST
+      || GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      op10 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (0));
+      op10 = gen_rtx_CONST (HImode, op10);
+      op12 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (16));
+      op12 = gen_rtx_CONST (HImode, op12);
+    }
+  else
+    {
+      op10 = rl78_subreg (HImode, operands[1], SImode, 0);
+      op12 = rl78_subreg (HImode, operands[1], SImode, 2);
+    }
+
+  if (rtx_equal_p (operands[0], operands[1]))
+    ;
+  else if (rtx_equal_p (op00, op12))
+    {
+      emit_move_insn (op02, op12);
+      emit_move_insn (op00, op10);
+    }
+  else
+    {
+      emit_move_insn (op00, op10);
+      emit_move_insn (op02, op12);
+    }
+}
+
+/* Generate code to move an SImode value.  */
+void
+rl78_split_movsi (rtx *operands)
+{
+  rtx op00, op02, op10, op12;
+
+  op00 = rl78_subreg (HImode, operands[0], SImode, 0);
+  op02 = rl78_subreg (HImode, operands[0], SImode, 2);
+
+  if (GET_CODE (operands[1]) == CONST
+      || GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      op10 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (0));
+      op10 = gen_rtx_CONST (HImode, op10);
+      op12 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (16));
+      op12 = gen_rtx_CONST (HImode, op12);
+    }
+  else
+    {
+      op10 = rl78_subreg (HImode, operands[1], SImode, 0);
+      op12 = rl78_subreg (HImode, operands[1], SImode, 2);
+    }
+
+  if (rtx_equal_p (operands[0], operands[1]))
+    ;
+  else if (rtx_equal_p (op00, op12))
+    {
+      operands[2] = op02;
+      operands[4] = op12;
+      operands[3] = op00;
+      operands[5] = op10;
+    }
+  else
+    {
+      operands[2] = op00;
+      operands[4] = op10;
+      operands[3] = op02;
+      operands[5] = op12;
+    }
+}
+
+/* Used by various two-operand expanders which cannot accept all
+   operands in the "far" namespace.  Force some such operands into
+   registers so that each pattern has at most one far operand.  */
+int
+rl78_force_nonfar_2 (rtx *operands, rtx (*gen)(rtx,rtx))
+{
+  int did = 0;
+  rtx temp_reg = NULL;
+
+  /* FIXME: in the future, be smarter about only doing this if the
+     other operand is also far, assuming the devirtualizer can also
+     handle that.  */
+  if (rl78_far_p (operands[0]))
+    {
+      temp_reg = operands[0];
+      operands[0] = gen_reg_rtx (GET_MODE (operands[0]));
+      did = 1;
+    }
+  if (!did)
+    return 0;
+
+  emit_insn (gen (operands[0], operands[1]));
+  if (temp_reg)
+    emit_move_insn (temp_reg, operands[0]);
+  return 1;
+}
+
+/* Likewise, but for three-operand expanders.  */
+int
+rl78_force_nonfar_3 (rtx *operands, rtx (*gen)(rtx,rtx,rtx))
+{
+  int did = 0;
+  rtx temp_reg = NULL;
+
+  /* FIXME: Likewise.  */
+  if (rl78_far_p (operands[1]))
+    {
+      rtx temp_reg = gen_reg_rtx (GET_MODE (operands[1]));
+      emit_move_insn (temp_reg, operands[1]);
+      operands[1] = temp_reg;
+      did = 1;
+    }
+  if (rl78_far_p (operands[0]))
+    {
+      temp_reg = operands[0];
+      operands[0] = gen_reg_rtx (GET_MODE (operands[0]));
+      did = 1;
+    }
+  if (!did)
+    return 0;
+
+  emit_insn (gen (operands[0], operands[1], operands[2]));
+  if (temp_reg)
+    emit_move_insn (temp_reg, operands[0]);
+  return 1;
+}
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE		rl78_can_eliminate
+
+static bool
+rl78_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Returns true if the given register needs to be saved by the
+   current function.  */
+static bool
+need_to_save (unsigned int regno)
+{
+  if (is_interrupt_func (cfun->decl))
+    {
+      /* We don't know what devirt will need */
+      if (regno < 8)
+	return true;
+
+       /* We don't need to save registers that have
+	  been reserved for interrupt handlers.  */
+      if (regno > 23)
+	return false;
+
+      /* If the handler is a non-leaf function then it may call
+	 non-interrupt aware routines which will happily clobber
+	 any call_used registers, so we have to preserve them.  */
+      if (!crtl->is_leaf && call_used_regs[regno])
+	return true;
+
+      /* Otherwise we only have to save a register, call_used
+	 or not, if it is used by this handler.  */
+      return df_regs_ever_live_p (regno);
+    }
+
+  if (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return true;
+  if (fixed_regs[regno])
+    return false;
+  if (crtl->calls_eh_return)
+    return true;
+  if (df_regs_ever_live_p (regno)
+      && !call_used_regs[regno])
+    return true;
+  return false;
+}
+
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Compute all the frame-related fields in our machine_function
+   structure.  */
+static void
+rl78_compute_frame_info (void)
+{
+  int i;
+
+  cfun->machine->computed = 1;
+  cfun->machine->framesize_regs = 0;
+  cfun->machine->framesize_locals = get_frame_size ();
+  cfun->machine->framesize_outgoing = crtl->outgoing_args_size;
+
+  for (i = 0; i < 16; i ++)
+    if (need_to_save (i * 2) || need_to_save (i * 2 + 1))
+      {
+	cfun->machine->need_to_push [i] = 1;
+	cfun->machine->framesize_regs += 2;
+      }
+    else
+      cfun->machine->need_to_push [i] = 0;
+
+  if ((cfun->machine->framesize_locals + cfun->machine->framesize_outgoing) & 1)
+    cfun->machine->framesize_locals ++;
+
+  cfun->machine->framesize = (cfun->machine->framesize_regs
+			      + cfun->machine->framesize_locals
+			      + cfun->machine->framesize_outgoing);
+}
+
+/* Returns true if the provided function has the specified attribute.  */
+static inline bool
+has_func_attr (const_tree decl, const char * func_attr)
+{
+  if (decl == NULL_TREE)
+    decl = current_function_decl;
+
+  return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE;
+}
+
+/* Returns true if the provided function has the "interrupt" attribute.  */
+static inline bool
+is_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "interrupt") || has_func_attr (decl, "brk_interrupt");
+}
+
+/* Returns true if the provided function has the "brk_interrupt" attribute.  */
+static inline bool
+is_brk_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "brk_interrupt");
+}
+
+/* Check "interrupt" attributes.  */
+static tree
+rl78_handle_func_attribute (tree * node,
+			    tree   name,
+			    tree   args,
+			    int    flags ATTRIBUTE_UNUSED,
+			    bool * no_add_attrs)
+{
+  gcc_assert (DECL_P (* node));
+  gcc_assert (args == NULL_TREE);
+
+  if (TREE_CODE (* node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      * no_add_attrs = true;
+    }
+
+  /* FIXME: We ought to check that the interrupt and exception
+     handler attributes have been applied to void functions.  */
+  return NULL_TREE;
+}
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		rl78_attribute_table
+
+/* Table of RL78-specific attributes.  */
+const struct attribute_spec rl78_attribute_table[] =
+{
+  /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+     affects_type_identity.  */
+  { "interrupt",      0, 0, true, false, false, rl78_handle_func_attribute,
+    false },
+  { "brk_interrupt",  0, 0, true, false, false, rl78_handle_func_attribute,
+    false },
+  { "naked",          0, 0, true, false, false, rl78_handle_func_attribute,
+    false },
+  { NULL,             0, 0, false, false, false, NULL, false }
+};
+
+
+
+/* Break down an address RTX into its component base/index/addend
+   portions and return TRUE if the address is of a valid form, else
+   FALSE.  */
+static bool
+characterize_address (rtx x, rtx *base, rtx *index, rtx *addend)
+{
+  *base = NULL_RTX;
+  *index = NULL_RTX;
+  *addend = NULL_RTX;
+
+  if (GET_CODE (x) == UNSPEC
+      && XINT (x, 1) == UNS_ES_ADDR)
+    x = XVECEXP (x, 0, 1);
+
+  if (GET_CODE (x) == REG)
+    {
+      *base = x;
+      return true;
+    }
+
+  /* We sometimes get these without the CONST wrapper */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      *addend = x;
+      return true;
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      *base = XEXP (x, 0);
+      x = XEXP (x, 1);
+
+      if (GET_CODE (*base) != REG
+	  && GET_CODE (x) == REG)
+	{
+	  rtx tmp = *base;
+	  *base = x;
+	  x = tmp;
+	}
+
+      if (GET_CODE (*base) != REG)
+	return false;
+
+      if (GET_CODE (x) == ZERO_EXTEND
+	  && GET_CODE (XEXP (x, 0)) == REG)
+	{
+	  *index = XEXP (x, 0);
+	  return false;
+	}
+    }
+
+  switch (GET_CODE (x))
+    {
+    case PLUS:
+      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	  && GET_CODE (XEXP (x, 0)) == CONST_INT)
+	{
+	  *addend = x;
+	  return true;
+	}
+      /* fall through */
+    case MEM:
+    case REG:
+      return false;
+
+    case CONST:
+    case SYMBOL_REF:
+    case CONST_INT:
+      *addend = x;
+      return true;
+
+    default:
+      return false;
+    }
+
+  return false;
+}
+
+/* Used by the Whb constraint.  Match addresses that use HL+B or HL+C
+   addressing.  */
+bool
+rl78_hl_b_c_addr_p (rtx op)
+{
+  rtx hl, bc;
+
+  if (GET_CODE (op) != PLUS)
+    return false;
+  hl = XEXP (op, 0);
+  bc = XEXP (op, 1);
+  if (GET_CODE (hl) == ZERO_EXTEND)
+    {
+      rtx tmp = hl;
+      hl = bc;
+      bc = tmp;
+    }
+  if (GET_CODE (hl) != REG)
+    return false;
+  if (GET_CODE (bc) != ZERO_EXTEND)
+    return false;
+  bc = XEXP (bc, 0);
+  if (GET_CODE (bc) != REG)
+    return false;
+  if (REGNO (hl) != HL_REG)
+    return false;
+  if (REGNO (bc) != B_REG && REGNO (bc) != C_REG)
+    return false;
+
+  return true;
+}
+
+#define REG_IS(r, regno) (((r) == (regno)) || ((r) >= FIRST_PSEUDO_REGISTER && !(strict)))
+
+/* Used in various constraints and predicates to match operands in the
+   "far" address space.  */
+int
+rl78_far_p (rtx x)
+{
+  if (! MEM_P (x))
+    return 0;
+#if DEBUG0
+  fprintf (stderr, "\033[35mrl78_far_p: "); debug_rtx (x);
+  fprintf (stderr, " = %d\033[0m\n", MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR);
+#endif
+  return MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR;
+}
+
+/* Return the appropriate mode for a named address pointer.  */
+#undef  TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE rl78_addr_space_pointer_mode
+static enum machine_mode
+rl78_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return HImode;
+    case ADDR_SPACE_FAR:
+      return SImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Returns TRUE for valid addresses.  */
+#undef  TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode
+static bool
+rl78_valid_pointer_mode (enum machine_mode m)
+{
+  return (m == HImode || m == SImode);
+}
+
+/* Return the appropriate mode for a named address address.  */
+#undef  TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode
+static enum machine_mode
+rl78_addr_space_address_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return HImode;
+    case ADDR_SPACE_FAR:
+      return SImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P		rl78_is_legitimate_constant
+
+static bool
+rl78_is_legitimate_constant (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+#undef  TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P	rl78_as_legitimate_address
+
+bool
+rl78_as_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x,
+			    bool strict ATTRIBUTE_UNUSED, addr_space_t as ATTRIBUTE_UNUSED)
+{
+  rtx base, index, addend;
+  bool is_far_addr = false;
+
+  if (GET_CODE (x) == UNSPEC
+      && XINT (x, 1) == UNS_ES_ADDR)
+    {
+      x = XVECEXP (x, 0, 1);
+      is_far_addr = true;
+    }
+
+  if (as == ADDR_SPACE_GENERIC
+      && (GET_MODE (x) == SImode || is_far_addr))
+    return false;
+
+  if (! characterize_address (x, &base, &index, &addend))
+    return false;
+
+  /* We can't extract the high/low portions of a PLUS address
+     involving a register during devirtualization, so make sure all
+     such __far addresses do not have addends.  This forces GCC to do
+     the sum separately.  */
+  if (addend && base && as == ADDR_SPACE_FAR)
+    return false;
+
+  if (base && index)
+    {
+      int ir = REGNO (index);
+      int br = REGNO (base);
+
+#define OK(test, debug) if (test) { /*fprintf(stderr, "%d: OK %s\n", __LINE__, debug);*/ return true; }
+      OK (REG_IS (br, HL_REG) && REG_IS (ir, B_REG), "[hl+b]");
+      OK (REG_IS (br, HL_REG) && REG_IS (ir, C_REG), "[hl+c]");
+      return false;
+    }
+
+  if (strict && base && GET_CODE (base) == REG && REGNO (base) >= FIRST_PSEUDO_REGISTER)
+    return false;
+
+  if (! cfun->machine->virt_insns_ok && base && GET_CODE (base) == REG
+      && REGNO (base) >= 8 && REGNO (base) <= 31)
+    return false;
+
+  return true;
+}
+
+/* Determine if one named address space is a subset of another.  */
+#undef  TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P rl78_addr_space_subset_p
+static bool
+rl78_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_FAR);
+  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_FAR);
+
+  if (subset == superset)
+    return true;
+
+  else
+    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_FAR);
+}
+
+#undef  TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT rl78_addr_space_convert
+/* Convert from one address space to another.  */
+static rtx
+rl78_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+  rtx result;
+
+  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_FAR);
+  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_FAR);
+
+  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_FAR)
+    {
+      /* This is unpredictable, as we're truncating off usable address
+	 bits.  */
+
+      result = gen_reg_rtx (HImode);
+      emit_move_insn (result, simplify_subreg (HImode, op, SImode, 0));
+      return result;
+    }
+  else if (to_as == ADDR_SPACE_FAR && from_as == ADDR_SPACE_GENERIC)
+    {
+      /* This always works.  */
+      result = gen_reg_rtx (SImode);
+      emit_move_insn (rl78_subreg (HImode, result, SImode, 0), op);
+      emit_move_insn (rl78_subreg (HImode, result, SImode, 2), const0_rtx);
+      return result;
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Implements REGNO_MODE_CODE_OK_FOR_BASE_P.  */
+bool
+rl78_regno_mode_code_ok_for_base_p (int regno, enum machine_mode mode ATTRIBUTE_UNUSED,
+				    addr_space_t address_space ATTRIBUTE_UNUSED,
+				    int outer_code ATTRIBUTE_UNUSED, int index_code)
+{
+  if (regno <= SP_REG && regno >= 16)
+    return true;
+  if (index_code == REG)
+    return (regno == HL_REG);
+  if (regno == C_REG || regno == B_REG || regno == E_REG || regno == L_REG)
+    return true;
+  return false;
+}
+
+/* Implements MODE_CODE_BASE_REG_CLASS.  */
+enum reg_class
+rl78_mode_code_base_reg_class (enum machine_mode mode ATTRIBUTE_UNUSED,
+			       addr_space_t address_space ATTRIBUTE_UNUSED,
+			       int outer_code ATTRIBUTE_UNUSED,
+			       int index_code ATTRIBUTE_UNUSED)
+{
+  return V_REGS;
+}
+
+/* Implements INITIAL_ELIMINATION_OFFSET.  The frame layout is
+   described in the machine_Function struct definition, above.  */
+int
+rl78_initial_elimination_offset (int from, int to)
+{
+  int rv = 0; /* as if arg to arg */
+
+  rl78_compute_frame_info ();
+
+  switch (to)
+    {
+    case STACK_POINTER_REGNUM:
+      rv += cfun->machine->framesize_outgoing;
+      rv += cfun->machine->framesize_locals;
+      /* Fall through.  */
+    case FRAME_POINTER_REGNUM:
+      rv += cfun->machine->framesize_regs;
+      rv += 4;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      rv -= 4;
+      rv -= cfun->machine->framesize_regs;
+    case ARG_POINTER_REGNUM:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return rv;
+}
+
+static int
+rl78_is_naked_func (void)
+{
+  return (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE);
+}
+
+/* Expand the function prologue (from the prologue pattern).  */
+void
+rl78_expand_prologue (void)
+{
+  int i, fs;
+  rtx sp = gen_rtx_REG (HImode, STACK_POINTER_REGNUM);
+  int rb = 0;
+
+  if (rl78_is_naked_func ())
+    return;
+
+  /* Always re-compute the frame info - the register usage may have changed.  */
+  rl78_compute_frame_info ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = cfun->machine->framesize;
+
+  if (is_interrupt_func (cfun->decl) && !TARGET_G10)
+    for (i = 0; i < 4; i++)
+      if (cfun->machine->need_to_push [i])
+	{
+	  /* Select Bank 0 if we are using any registers from Bank 0.   */
+	  emit_insn (gen_sel_rb (GEN_INT (0)));
+	  break;
+	}
+
+  for (i = 0; i < 16; i++)
+    if (cfun->machine->need_to_push [i])
+      {
+	if (TARGET_G10)
+	  {
+	    emit_move_insn (gen_rtx_REG (HImode, 0), gen_rtx_REG (HImode, i*2));
+	    F (emit_insn (gen_push (gen_rtx_REG (HImode, 0))));
+	  }
+	else
+	  {
+	    int need_bank = i/4;
+
+	    if (need_bank != rb)
+	      {
+		emit_insn (gen_sel_rb (GEN_INT (need_bank)));
+		rb = need_bank;
+	      }
+	    F (emit_insn (gen_push (gen_rtx_REG (HImode, i*2))));
+	  }
+      }
+
+  if (rb != 0)
+    emit_insn (gen_sel_rb (GEN_INT (0)));
+
+  if (frame_pointer_needed)
+    {
+      F (emit_move_insn (gen_rtx_REG (HImode, AX_REG),
+			 gen_rtx_REG (HImode, STACK_POINTER_REGNUM)));
+      F (emit_move_insn (gen_rtx_REG (HImode, FRAME_POINTER_REGNUM),
+			 gen_rtx_REG (HImode, AX_REG)));
+    }
+
+  fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing;
+  while (fs > 0)
+    {
+      int fs_byte = (fs > 254) ? 254 : fs;
+      F (emit_insn (gen_subhi3 (sp, sp, GEN_INT (fs_byte))));
+      fs -= fs_byte;
+    }
+}
+
+/* Expand the function epilogue (from the epilogue pattern).  */
+void
+rl78_expand_epilogue (void)
+{
+  int i, fs;
+  rtx sp = gen_rtx_REG (HImode, STACK_POINTER_REGNUM);
+  int rb = 0;
+
+  if (rl78_is_naked_func ())
+    return;
+
+  if (frame_pointer_needed)
+    {
+      emit_move_insn (gen_rtx_REG (HImode, AX_REG),
+		      gen_rtx_REG (HImode, FRAME_POINTER_REGNUM));
+      emit_move_insn (gen_rtx_REG (HImode, STACK_POINTER_REGNUM),
+		      gen_rtx_REG (HImode, AX_REG));
+    }
+  else
+    {
+      fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing;
+      while (fs > 0)
+	{
+	  int fs_byte = (fs > 254) ? 254 : fs;
+
+	  emit_insn (gen_addhi3 (sp, sp, GEN_INT (fs_byte)));
+	  fs -= fs_byte;
+	}
+    }
+
+  for (i = 15; i >= 0; i--)
+    if (cfun->machine->need_to_push [i])
+      {
+	if (TARGET_G10)
+	  {
+	    emit_insn (gen_pop (gen_rtx_REG (HImode, 0)));
+	    emit_move_insn (gen_rtx_REG (HImode, i*2), gen_rtx_REG (HImode, 0));
+	  }
+	else
+	  {
+	    int need_bank = i / 4;
+
+	    if (need_bank != rb)
+	      {
+		emit_insn (gen_sel_rb (GEN_INT (need_bank)));
+		rb = need_bank;
+	      }
+	    emit_insn (gen_pop (gen_rtx_REG (HImode, i * 2)));
+	  }
+      }
+
+  if (rb != 0)
+    emit_insn (gen_sel_rb (GEN_INT (0)));
+
+  if (cfun->machine->trampolines_used)
+    emit_insn (gen_trampoline_uninit ());
+
+  if (is_brk_interrupt_func (cfun->decl))
+    emit_jump_insn (gen_brk_interrupt_return ());
+  else if (is_interrupt_func (cfun->decl))
+    emit_jump_insn (gen_interrupt_return ());
+  else
+    emit_jump_insn (gen_rl78_return ());
+}
+
+/* Likewise, for exception handlers.  */
+void
+rl78_expand_eh_epilogue (rtx x ATTRIBUTE_UNUSED)
+{
+  /* FIXME - replace this with an indirect jump with stack adjust.  */
+  emit_jump_insn (gen_rl78_return ());
+}
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE	rl78_start_function
+
+/* We don't use this to actually emit the function prologue.  We use
+   this to insert a comment in the asm file describing the
+   function.  */
+static void
+rl78_start_function (FILE *file, HOST_WIDE_INT hwi_local ATTRIBUTE_UNUSED)
+{
+  int i;
+
+  if (cfun->machine->framesize == 0)
+    return;
+  fprintf (file, "\t; start of function\n");
+
+  if (cfun->machine->framesize_regs)
+    {
+      fprintf (file, "\t; push %d:", cfun->machine->framesize_regs);
+      for (i = 0; i < 16; i ++)
+	if (cfun->machine->need_to_push[i])
+	  fprintf (file, " %s", word_regnames[i*2]);
+      fprintf (file, "\n");
+    }
+
+  if (frame_pointer_needed)
+    fprintf (file, "\t; $fp points here (r22)\n");
+
+  if (cfun->machine->framesize_locals)
+    fprintf (file, "\t; locals: %d byte%s\n", cfun->machine->framesize_locals,
+	     cfun->machine->framesize_locals == 1 ? "" : "s");
+
+  if (cfun->machine->framesize_outgoing)
+    fprintf (file, "\t; outgoing: %d byte%s\n", cfun->machine->framesize_outgoing,
+	     cfun->machine->framesize_outgoing == 1 ? "" : "s");
+}
+
+/* Return an RTL describing where a function return value of type RET_TYPE
+   is held.  */
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE		rl78_function_value
+
+static rtx
+rl78_function_value (const_tree ret_type,
+		     const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		     bool       outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (ret_type);
+
+  return gen_rtx_REG (mode, 8);
+}
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE rl78_promote_function_mode
+
+static enum machine_mode
+rl78_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+			    enum machine_mode mode,
+			    int *punsignedp ATTRIBUTE_UNUSED,
+			    const_tree funtype ATTRIBUTE_UNUSED, int for_return ATTRIBUTE_UNUSED)
+{
+  return mode;
+}
+
+/* Return an RTL expression describing the register holding a function
+   parameter of mode MODE and type TYPE or NULL_RTX if the parameter should
+   be passed on the stack.  CUM describes the previous parameters to the
+   function and NAMED is false if the parameter is part of a variable
+   parameter list, or the last named parameter before the start of a
+   variable parameter list.  */
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG     	rl78_function_arg
+
+static rtx
+rl78_function_arg (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     rl78_function_arg_advance
+
+static void
+rl78_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, const_tree type,
+			   bool named ATTRIBUTE_UNUSED)
+{
+  int rounded_size;
+  CUMULATIVE_ARGS * cum = get_cumulative_args (cum_v);
+
+  rounded_size = ((mode == BLKmode)
+		  ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+  if (rounded_size & 1)
+    rounded_size ++;
+  (*cum) += rounded_size;
+}
+
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define	TARGET_FUNCTION_ARG_BOUNDARY rl78_function_arg_boundary
+
+static unsigned int
+rl78_function_arg_boundary (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    const_tree type ATTRIBUTE_UNUSED)
+{
+  return 16;
+}
+
+/* Supported modifier letters:
+
+   A - address of a MEM
+   S - SADDR form of a real register
+   v - real register corresponding to a virtual register
+   m - minus - negative of CONST_INT value.
+   C - inverse of a conditional (NE vs EQ for example)
+   C - complement of an integer
+   z - collapsed conditional
+   s - shift count mod 8
+   S - shift count mod 16
+   r - reverse shift count (8-(count mod 8))
+   B - bit position
+
+   h - bottom HI of an SI
+   H - top HI of an SI
+   q - bottom QI of an HI
+   Q - top QI of an HI
+   e - third QI of an SI (i.e. where the ES register gets values from)
+   E - fourth QI of an SI (i.e. MSB)
+
+*/
+
+/* Implements the bulk of rl78_print_operand, below.  We do it this
+   way because we need to test for a constant at the top level and
+   insert the '#', but not test for it anywhere else as we recurse
+   down into the operand.  */
+static void
+rl78_print_operand_1 (FILE * file, rtx op, int letter)
+{
+  int need_paren;
+
+  switch (GET_CODE (op))
+    {
+    case MEM:
+      if (letter == 'A')
+	rl78_print_operand_1 (file, XEXP (op, 0), letter);
+      else
+	{
+	  if (rl78_far_p (op))
+	    {
+	      fprintf (file, "es:");
+	      op = gen_rtx_MEM (GET_MODE (op), XVECEXP (XEXP (op, 0), 0, 1));
+	    }
+	  if (letter == 'H')
+	    {
+	      op = adjust_address (op, HImode, 2);
+	      letter = 0;
+	    }
+	  if (letter == 'h')
+	    {
+	      op = adjust_address (op, HImode, 0);
+	      letter = 0;
+	    }
+	  if (letter == 'Q')
+	    {
+	      op = adjust_address (op, QImode, 1);
+	      letter = 0;
+	    }
+	  if (letter == 'q')
+	    {
+	      op = adjust_address (op, QImode, 0);
+	      letter = 0;
+	    }
+	  if (letter == 'e')
+	    {
+	      op = adjust_address (op, QImode, 2);
+	      letter = 0;
+	    }
+	  if (letter == 'E')
+	    {
+	      op = adjust_address (op, QImode, 3);
+	      letter = 0;
+	    }
+	  if (CONSTANT_P (XEXP (op, 0)))
+	    {
+	      fprintf (file, "!");
+	      rl78_print_operand_1 (file, XEXP (op, 0), letter);
+	    }
+	  else if (GET_CODE (XEXP (op, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF)
+	    {
+	      fprintf (file, "!");
+	      rl78_print_operand_1 (file, XEXP (op, 0), letter);
+	    }
+	  else if (GET_CODE (XEXP (op, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
+		   && REGNO (XEXP (XEXP (op, 0), 0)) == 2)
+	    {
+	      rl78_print_operand_1 (file, XEXP (XEXP (op, 0), 1), 'u');
+	      fprintf (file, "[");
+	      rl78_print_operand_1 (file, XEXP (XEXP (op, 0), 0), 0);
+	      fprintf (file, "]");
+	    }
+	  else
+	    {
+	      fprintf (file, "[");
+	      rl78_print_operand_1 (file, XEXP (op, 0), letter);
+	      fprintf (file, "]");
+	    }
+	}
+      break;
+
+    case REG:
+      if (letter == 'Q')
+	fprintf (file, "%s", reg_names [REGNO (op) | 1]);
+      else if (letter == 'H')
+	fprintf (file, "%s", reg_names [REGNO (op) + 2]);
+      else if (letter == 'q')
+	fprintf (file, "%s", reg_names [REGNO (op) & ~1]);
+      else if (letter == 'e')
+	fprintf (file, "%s", reg_names [REGNO (op) + 2]);
+      else if (letter == 'E')
+	fprintf (file, "%s", reg_names [REGNO (op) + 3]);
+      else if (letter == 'S')
+	fprintf (file, "0x%x", 0xffef8 + REGNO (op));
+      else if (GET_MODE (op) == HImode
+	       && ! (REGNO (op) & ~0xfe))
+	{
+	  if (letter == 'v')
+	    fprintf (file, "%s", word_regnames [REGNO (op) % 8]);
+	  else
+	    fprintf (file, "%s", word_regnames [REGNO (op)]);
+	}
+      else
+	fprintf (file, "%s", reg_names [REGNO (op)]);
+      break;
+
+    case CONST_INT:
+      if (letter == 'Q')
+	fprintf (file, "%ld", INTVAL (op) >> 8);
+      else if (letter == 'H')
+	fprintf (file, "%ld", INTVAL (op) >> 16);
+      else if (letter == 'q')
+	fprintf (file, "%ld", INTVAL (op) & 0xff);
+      else if (letter == 'h')
+	fprintf (file, "%ld", INTVAL (op) & 0xffff);
+      else if (letter == 'e')
+	fprintf (file, "%ld", (INTVAL (op) >> 16) & 0xff);
+      else if (letter == 'B')
+	fprintf (file, "%d", exact_log2 (INTVAL (op)));
+      else if (letter == 'E')
+	fprintf (file, "%ld", (INTVAL (op) >> 24) & 0xff);
+      else if (letter == 'm')
+	fprintf (file, "%ld", - INTVAL (op));
+      else if (letter == 's')
+	fprintf (file, "%ld", INTVAL (op) % 8);
+      else if (letter == 'S')
+	fprintf (file, "%ld", INTVAL (op) % 16);
+      else if (letter == 'r')
+	fprintf (file, "%ld", 8 - (INTVAL (op) % 8));
+      else if (letter == 'C')
+	fprintf (file, "%ld", (INTVAL (op) ^ 0x8000) & 0xffff);
+      else
+	fprintf (file, "%ld", INTVAL (op));
+      break;
+
+    case CONST:
+      rl78_print_operand_1 (file, XEXP (op, 0), letter);
+      break;
+
+    case ZERO_EXTRACT:
+      {
+	int bits = INTVAL (XEXP (op, 1));
+	int ofs = INTVAL (XEXP (op, 2));
+	if (bits == 16 && ofs == 0)
+	  fprintf (file, "%%lo16(");
+	else if (bits == 16 && ofs == 16)
+	  fprintf (file, "%%hi16(");
+	else if (bits == 8 && ofs == 16)
+	  fprintf (file, "%%hi8(");
+	else
+	  gcc_unreachable ();
+	rl78_print_operand_1 (file, XEXP (op, 0), 0);
+	fprintf (file, ")");
+      }
+      break;
+
+    case ZERO_EXTEND:
+      if (GET_CODE (XEXP (op, 0)) == REG)
+	fprintf (file, "%s", reg_names [REGNO (XEXP (op, 0))]);
+      else
+	print_rtl (file, op);
+      break;
+
+    case PLUS:
+      need_paren = 0;
+      if (letter == 'H')
+	{
+	  fprintf (file, "%%hi16(");
+	  need_paren = 1;
+	  letter = 0;
+	}
+      if (letter == 'h')
+	{
+	  fprintf (file, "%%lo16(");
+	  need_paren = 1;
+	  letter = 0;
+	}
+      if (letter == 'e')
+	{
+	  fprintf (file, "%%hi8(");
+	  need_paren = 1;
+	  letter = 0;
+	}
+      if (letter == 'q' || letter == 'Q')
+	output_operand_lossage ("q/Q modifiers invalid for symbol references");
+
+      if (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND)
+	{
+	  rl78_print_operand_1 (file, XEXP (op, 1), letter);
+	  fprintf (file, "+");
+	  rl78_print_operand_1 (file, XEXP (op, 0), letter);
+	}
+      else
+	{
+	  rl78_print_operand_1 (file, XEXP (op, 0), letter);
+	  fprintf (file, "+");
+	  rl78_print_operand_1 (file, XEXP (op, 1), letter);
+	}
+      if (need_paren)
+	fprintf (file, ")");
+      break;
+
+    case SYMBOL_REF:
+      need_paren = 0;
+      if (letter == 'H')
+	{
+	  fprintf (file, "%%hi16(");
+	  need_paren = 1;
+	  letter = 0;
+	}
+      if (letter == 'h')
+	{
+	  fprintf (file, "%%lo16(");
+	  need_paren = 1;
+	  letter = 0;
+	}
+      if (letter == 'e')
+	{
+	  fprintf (file, "%%hi8(");
+	  need_paren = 1;
+	  letter = 0;
+	}
+      if (letter == 'q' || letter == 'Q')
+	output_operand_lossage ("q/Q modifiers invalid for symbol references");
+
+      output_addr_const (file, op);
+      if (need_paren)
+	fprintf (file, ")");
+      break;
+
+    case CODE_LABEL:
+    case LABEL_REF:
+      output_asm_label (op);
+      break;
+
+    case LTU:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'C' ? "nc" : "c");
+      break;
+    case LEU:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'C' ? "h" : "nh");
+      break;
+    case GEU:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'C' ? "c" : "nc");
+      break;
+    case GTU:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'C' ? "nh" : "h");
+      break;
+    case EQ:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'C' ? "nz" : "z");
+      break;
+    case NE:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'C' ? "z" : "nz");
+      break;
+
+    /* Note: these assume appropriate adjustments were made so that
+       unsigned comparisons, which is all this chip has, will
+       work.  */
+    case LT:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'C' ? "nc" : "c");
+      break;
+    case LE:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+        fprintf (file, letter == 'C' ? "h" : "nh");
+      break;
+    case GE:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'C' ? "c" : "nc");
+      break;
+    case GT:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'C' ? "nh" : "h");
+      break;
+
+    default:
+      fprintf (file, "(%s)", GET_RTX_NAME (GET_CODE (op)));
+      break;
+    }
+}
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND		rl78_print_operand
+
+static void
+rl78_print_operand (FILE * file, rtx op, int letter)
+{
+  if (CONSTANT_P (op) && letter != 'u' && letter != 's' && letter != 'r' && letter != 'S' && letter != 'B')
+    fprintf (file, "#");
+  rl78_print_operand_1 (file, op, letter);
+}
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT rl78_trampoline_init
+
+/* Note that the RL78's addressing makes it very difficult to do
+   trampolines on the stack.  So, libgcc has a small pool of
+   trampolines from which one is allocated to this task.  */
+static void
+rl78_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx mov_addr, thunk_addr;
+  rtx function = XEXP (DECL_RTL (fndecl), 0);
+
+  mov_addr = adjust_address (m_tramp, HImode, 0);
+  thunk_addr = gen_reg_rtx (HImode);
+
+  function = force_reg (HImode, function);
+  static_chain = force_reg (HImode, static_chain);
+
+  emit_insn (gen_trampoline_init (thunk_addr, function, static_chain));
+  emit_move_insn (mov_addr, thunk_addr);
+
+  cfun->machine->trampolines_used = 1;
+}
+
+#undef  TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS rl78_trampoline_adjust_address
+
+static rtx
+rl78_trampoline_adjust_address (rtx m_tramp)
+{
+  rtx x = gen_rtx_MEM (HImode, m_tramp);
+  return x;
+}
+
+/* Expander for cbranchqi4 and cbranchhi4.  RL78 is missing some of
+   the "normal" compares, specifically, it only has unsigned compares,
+   so we must synthesize the missing ones.  */
+void
+rl78_expand_compare (rtx *operands)
+{
+  if (GET_CODE (operands[2]) == MEM)
+    operands[2] = copy_to_mode_reg (GET_MODE (operands[2]), operands[2]);
+}
+
+
+
+/* Define this to 1 if you are debugging the peephole optimizers.  */
+#define DEBUG_PEEP 0
+
+/* Predicate used to enable the peephole2 patterns in rl78-virt.md.
+   The default "word" size is a byte so we can effectively use all the
+   registers, but we want to do 16-bit moves whenever possible.  This
+   function determines when such a move is an option.  */
+bool
+rl78_peep_movhi_p (rtx *operands)
+{
+  int i;
+  rtx m, a;
+
+  /* (set (op0) (op1))
+     (set (op2) (op3)) */
+
+  if (! rl78_virt_insns_ok ())
+    return false;
+
+#if DEBUG_PEEP
+  fprintf (stderr, "\033[33m");
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+  debug_rtx (operands[2]);
+  debug_rtx (operands[3]);
+  fprintf (stderr, "\033[0m");
+#endif
+
+  /* You can move a constant to memory as QImode, but not HImode.  */
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) != REG)
+    {
+#if DEBUG_PEEP
+      fprintf (stderr, "no peep: move constant to memory\n");
+#endif
+      return false;
+    }
+
+  if (rtx_equal_p (operands[0], operands[3]))
+    {
+#if DEBUG_PEEP
+      fprintf (stderr, "no peep: overlapping\n");
+#endif
+      return false;
+    }
+
+  for (i = 0; i < 2; i ++)
+    {
+      if (GET_CODE (operands[i]) != GET_CODE (operands[i+2]))
+	{
+#if DEBUG_PEEP
+	  fprintf (stderr, "no peep: different codes\n");
+#endif
+	  return false;
+	}
+      if (GET_MODE (operands[i]) != GET_MODE (operands[i+2]))
+	{
+#if DEBUG_PEEP
+	  fprintf (stderr, "no peep: different modes\n");
+#endif
+	  return false;
+	}
+
+      switch (GET_CODE (operands[i]))
+	{
+	case REG:
+	  /*   LSB                      MSB  */
+	  if (REGNO (operands[i]) + 1 != REGNO (operands[i+2])
+	      || GET_MODE (operands[i]) != QImode)
+	    {
+#if DEBUG_PEEP
+	      fprintf (stderr, "no peep: wrong regnos %d %d %d\n",
+		       REGNO (operands[i]), REGNO (operands[i+2]),
+		       i);
+#endif
+	      return false;
+	    }
+	  if (! rl78_hard_regno_mode_ok (REGNO (operands[i]), HImode))
+	    {
+#if DEBUG_PEEP
+	      fprintf (stderr, "no peep: reg %d not HI\n", REGNO (operands[i]));
+#endif
+	      return false;
+	    }
+	  break;
+
+	case CONST_INT:
+	  break;
+
+	case MEM:
+	  if (GET_MODE (operands[i]) != QImode)
+	    return false;
+	  if (MEM_ALIGN (operands[i]) < 16)
+	    return false;
+	  a = XEXP (operands[i], 0);
+	  if (GET_CODE (a) == CONST)
+	    a = XEXP (a, 0);
+	  if (GET_CODE (a) == PLUS)
+	    a = XEXP (a, 1);
+	  if (GET_CODE (a) == CONST_INT
+	      && INTVAL (a) & 1)
+	    {
+#if DEBUG_PEEP
+	      fprintf (stderr, "no peep: misaligned mem %d\n", i);
+	      debug_rtx (operands[i]);
+#endif
+	      return false;
+	    }
+	  m = adjust_address (operands[i], QImode, 1);
+	  if (! rtx_equal_p (m, operands[i+2]))
+	    {
+#if DEBUG_PEEP
+	      fprintf (stderr, "no peep: wrong mem %d\n", i);
+	      debug_rtx (m);
+	      debug_rtx (operands[i+2]);
+#endif
+	      return false;
+	    }
+	  break;
+
+	default:
+#if DEBUG_PEEP
+	  fprintf (stderr, "no peep: wrong rtx %d\n", i);
+#endif
+	  return false;
+	}
+    }
+#if DEBUG_PEEP
+  fprintf (stderr, "\033[32mpeep!\033[0m\n");
+#endif
+  return true;
+}
+
+/* Likewise, when a peephole is activated, this function helps compute
+   the new operands.  */
+void
+rl78_setup_peep_movhi (rtx *operands)
+{
+  int i;
+
+  for (i = 0; i < 2; i ++)
+    {
+      switch (GET_CODE (operands[i]))
+	{
+	case REG:
+	  operands[i+4] = gen_rtx_REG (HImode, REGNO (operands[i]));
+	  break;
+
+	case CONST_INT:
+	  operands[i+4] = GEN_INT ((INTVAL (operands[i]) & 0xff) + ((char) INTVAL (operands[i+2])) * 256);
+	  break;
+
+	case MEM:
+	  operands[i+4] = adjust_address (operands[i], HImode, 0);
+	  break;
+
+	default:
+	  break;
+	}
+    }
+}
+
+/*
+	How Devirtualization works in the RL78 GCC port
+
+Background
+
+The RL78 is an 8-bit port with some 16-bit operations.  It has 32
+bytes of register space, in four banks, memory-mapped.  One bank is
+the "selected" bank and holds the registers used for primary
+operations.  Since the registers are memory mapped, often you can
+still refer to the unselected banks via memory accesses.
+
+Virtual Registers
+
+The GCC port uses bank 0 as the "selected" registers (A, X, BC, etc)
+and refers to the other banks via their memory addresses, although
+they're treated as regular registers internally.  These "virtual"
+registers are R8 through R23 (bank3 is reserved for asm-based
+interrupt handlers).
+
+There are four machine description files:
+
+rl78.md        - common register-independent patterns and definitions
+rl78-expand.md - expanders
+rl78-virt.md   - patterns that match BEFORE devirtualization
+rl78-real.md   - patterns that match AFTER devirtualization
+
+At least through register allocation and reload, gcc is told that it
+can do pretty much anything - but may only use the virtual registers.
+GCC cannot properly create the varying addressing modes that the RL78
+supports in an efficient way.
+
+Sometime after reload, the RL78 backend "devirtualizes" the RTL.  It
+uses the "valloc" attribute in rl78-virt.md for determining the rules
+by which it will replace virtual registers with real registers (or
+not) and how to make up addressing modes.  For example, insns tagged
+with "ro1" have a single read-only parameter, which may need to be
+moved from memory/constant/vreg to a suitable real register.  As part
+of devirtualization, a flag is toggled, disabling the rl78-virt.md
+patterns and enabling the rl78-real.md patterns.  The new patterns'
+constraints are used to determine the real registers used.  NOTE:
+patterns in rl78-virt.md essentially ignore the constrains and rely on
+predicates, where the rl78-real.md ones essentially ignore the
+predicates and rely on the constraints.
+
+The devirtualization pass is scheduled via the pass manager (despite
+being called "rl78_reorg") so it can be scheduled prior to var-track
+(the idea is to let gdb know about the new registers).  Ideally, it
+would be scheduled right after pro/epilogue generation, so the
+post-reload optimizers could operate on the real registers, but when I
+tried that there were some issues building the target libraries.
+
+During devirtualization, a simple register move optimizer is run.  It
+would be better to run a full CSE/propogation pass on it though, but
+that has not yet been attempted.
+
+ */
+#define DEBUG_ALLOC 0
+
+#define OP(x) (*recog_data.operand_loc[x])
+
+/* This array is used to hold knowledge about the contents of the
+   real registers (A ... H), the memory-based registers (r8 ... r31)
+   and the first NUM_STACK_LOCS words on the stack.  We use this to
+   avoid generating redundant move instructions.
+
+   A value in the range 0 .. 31 indicates register A .. r31.
+   A value in the range 32 .. 63 indicates stack slot (value - 32).
+   A value of NOT_KNOWN indicates that the contents of that location
+   are not known.  */
+
+#define NUM_STACK_LOCS	32
+#define NOT_KNOWN       127
+
+static unsigned char content_memory [32 + NUM_STACK_LOCS];
+
+static unsigned char saved_update_index = NOT_KNOWN;
+static unsigned char saved_update_value;
+static enum machine_mode saved_update_mode;
+
+
+static inline void
+clear_content_memory (void)
+{
+  memset (content_memory, NOT_KNOWN, sizeof content_memory);
+  if (dump_file)
+    fprintf (dump_file, "  clear content memory\n");
+  saved_update_index = NOT_KNOWN;
+}
+
+/* Convert LOC into an index into the content_memory array.
+   If LOC cannot be converted, return NOT_KNOWN.  */
+
+static unsigned char
+get_content_index (rtx loc)
+{
+  enum machine_mode mode;
+
+  if (loc == NULL_RTX)
+    return NOT_KNOWN;
+
+  if (REG_P (loc))
+    {
+      if (REGNO (loc) < 32)
+	return REGNO (loc);
+      return NOT_KNOWN;
+    }
+
+  mode = GET_MODE (loc);
+
+  if (! rl78_stack_based_mem (loc, mode))
+    return NOT_KNOWN;
+
+  loc = XEXP (loc, 0);
+
+  if (REG_P (loc))
+    /* loc = MEM (SP) */
+    return 32;
+
+  /* loc = MEM (PLUS (SP, INT)).  */
+  loc = XEXP (loc, 1);
+
+  if (INTVAL (loc) < NUM_STACK_LOCS)
+    return 32 + INTVAL (loc);
+
+  return NOT_KNOWN;
+}
+
+/* Return a string describing content INDEX in mode MODE.
+   WARNING: Can return a pointer to a static buffer.  */
+static const char *
+get_content_name (unsigned char index, enum machine_mode mode)
+{
+  static char buffer [128];
+
+  if (index == NOT_KNOWN)
+    return "Unknown";
+
+  if (index > 31)
+    sprintf (buffer, "stack slot %d", index - 32);
+  else if (mode == HImode)
+    sprintf (buffer, "%s%s",
+	     reg_names [index + 1], reg_names [index]);
+  else
+    return reg_names [index];
+
+  return buffer;
+}
+
+#if DEBUG_ALLOC
+
+static void
+display_content_memory (FILE * file)
+{
+  unsigned int i;
+
+  fprintf (file, " Known memory contents:\n");
+
+  for (i = 0; i < sizeof content_memory; i++)
+    if (content_memory[i] != NOT_KNOWN)
+      {
+	fprintf (file, "   %s contains a copy of ", get_content_name (i, QImode));
+	fprintf (file, "%s\n", get_content_name (content_memory [i], QImode));
+      }
+}
+#endif
+
+static void
+update_content (unsigned char index, unsigned char val, enum machine_mode mode)
+{
+  unsigned int i;
+
+  gcc_assert (index < sizeof content_memory);
+
+  content_memory [index] = val;
+  if (val != NOT_KNOWN)
+    content_memory [val] = index;
+
+  /* Make the entry in dump_file *before* VAL is increased below.  */
+  if (dump_file)
+    {
+      fprintf (dump_file, "  %s now contains ", get_content_name (index, mode));
+      if (val == NOT_KNOWN)
+	fprintf (dump_file, "Unknown\n");
+      else
+	fprintf (dump_file, "%s and vice versa\n", get_content_name (val, mode));
+    }
+
+  if (mode == HImode)
+    {
+      val = val == NOT_KNOWN ? val : val + 1;
+
+      content_memory [index + 1] = val;
+      if (val != NOT_KNOWN)
+	{
+	  content_memory [val] = index + 1;
+	  -- val;
+	}
+    }
+
+  /* Any other places that had INDEX recorded as their contents are now invalid.  */
+  for (i = 0; i < sizeof content_memory; i++)
+    {
+      if (i == index
+	  || (val != NOT_KNOWN && i == val))
+	{
+	  if (mode == HImode)
+	    ++ i;
+	  continue;
+	}
+
+      if (content_memory[i] == index
+	  || (val != NOT_KNOWN && content_memory[i] == val))
+	{
+	  content_memory[i] = NOT_KNOWN;
+
+	  if (dump_file)
+	    fprintf (dump_file, "  %s cleared\n", get_content_name (i, mode));
+
+	  if (mode == HImode)
+	    content_memory[++ i] = NOT_KNOWN;
+	}
+    }
+}
+
+/* Record that LOC contains VALUE.
+   For HImode locations record that LOC+1 contains VALUE+1.
+   If LOC is not a register or stack slot, do nothing.
+   If VALUE is not a register or stack slot, clear the recorded content.  */
+
+static void
+record_content (rtx loc, rtx value)
+{
+  enum machine_mode mode;
+  unsigned char index;
+  unsigned char val;
+
+  if ((index = get_content_index (loc)) == NOT_KNOWN)
+    return;
+
+  val = get_content_index (value);
+
+  mode = GET_MODE (loc);
+
+  if (val == index)
+    {
+      if (! optimize)
+	return;
+
+      /* This should not happen when optimizing.  */
+#if 1
+      fprintf (stderr, "ASSIGNMENT of location to itself detected! [%s]\n",
+	       get_content_name (val, mode));
+      return;
+#else
+      gcc_unreachable ();
+#endif
+    }
+
+  update_content (index, val, mode);
+}
+
+/* Returns TRUE if LOC already contains a copy of VALUE.  */
+
+static bool
+already_contains (rtx loc, rtx value)
+{
+  unsigned char index;
+  unsigned char val;
+
+  if ((index = get_content_index (loc)) == NOT_KNOWN)
+    return false;
+
+  if ((val = get_content_index (value)) == NOT_KNOWN)
+    return false;
+
+  if (content_memory [index] != val)
+    return false;
+
+  if (GET_MODE (loc) == HImode)
+    return content_memory [index + 1] == val + 1;
+
+  return true;
+}
+
+bool
+rl78_es_addr (rtx addr)
+{
+  if (GET_CODE (addr) == MEM)
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) != UNSPEC)
+    return false;
+  if (XINT (addr, 1) != UNS_ES_ADDR)
+    return false;
+  return true;
+}
+
+rtx
+rl78_es_base (rtx addr)
+{
+  if (GET_CODE (addr) == MEM)
+    addr = XEXP (addr, 0);
+  addr = XVECEXP (addr, 0, 1);
+  if (GET_CODE (addr) == CONST
+      && GET_CODE (XEXP (addr, 0)) == ZERO_EXTRACT)
+    addr = XEXP (XEXP (addr, 0), 0);
+  /* Mode doesn't matter here.  */
+  return gen_rtx_MEM (HImode, addr);
+}
+
+/* Rescans an insn to see if it's recognized again.  This is done
+   carefully to ensure that all the constraint information is accurate
+   for the newly matched insn.  */
+static bool
+insn_ok_now (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  int i;
+
+  INSN_CODE (insn) = -1;
+
+  if (recog (pattern, insn, 0) > -1)
+    {
+      extract_insn (insn);
+      if (constrain_operands (1))
+	{
+#if DEBUG_ALLOC
+	  fprintf (stderr, "\033[32m");
+	  debug_rtx (insn);
+	  fprintf (stderr, "\033[0m");
+#endif
+	  if (SET_P (pattern))
+	    record_content (SET_DEST (pattern), SET_SRC (pattern));
+
+	  /* We need to detect far addresses that haven't been
+	     converted to es/lo16 format.  */
+	  for (i=0; i<recog_data.n_operands; i++)
+	    if (GET_CODE (OP (i)) == MEM
+		&& GET_MODE (XEXP (OP (i), 0)) == SImode
+		&& GET_CODE (XEXP (OP (i), 0)) != UNSPEC)
+	      return false;
+
+	  return true;
+	}
+    }
+  else
+    {
+      /* We need to re-recog the insn with virtual registers to get
+	 the operands.  */
+      cfun->machine->virt_insns_ok = 1;
+      if (recog (pattern, insn, 0) > -1)
+	{
+	  extract_insn (insn);
+	  if (constrain_operands (0))
+	    {
+	      cfun->machine->virt_insns_ok = 0;
+	      return false;
+	    }
+	}
+
+#if DEBUG_ALLOC
+      fprintf (stderr, "\033[41;30m Unrecognized *virtual* insn \033[0m\n");
+      debug_rtx (insn);
+#endif
+      gcc_unreachable ();
+    }
+
+#if DEBUG_ALLOC
+  fprintf (stderr, "\033[31m");
+  debug_rtx (insn);
+  fprintf (stderr, "\033[0m");
+#endif
+  return false;
+}
+
+#if DEBUG_ALLOC
+#define WORKED      fprintf (stderr, "\033[48;5;22m Worked at line %d \033[0m\n", __LINE__)
+#define FAILEDSOFAR fprintf (stderr, "\033[48;5;52m FAILED at line %d \033[0m\n", __LINE__)
+#define FAILED      fprintf (stderr, "\033[48;5;52m FAILED at line %d \033[0m\n", __LINE__), gcc_unreachable()
+#define MAYBE_OK(insn) if (insn_ok_now (insn)) { WORKED; return; } else { FAILEDSOFAR; }
+#define MUST_BE_OK(insn) if (insn_ok_now (insn)) { WORKED; return; } FAILED
+#else
+#define FAILED gcc_unreachable ()
+#define MAYBE_OK(insn) if (insn_ok_now (insn)) return;
+#define MUST_BE_OK(insn) if (insn_ok_now (insn)) return; FAILED
+#endif
+
+/* Registers into which we move the contents of virtual registers.  */
+#define X gen_rtx_REG (QImode, X_REG)
+#define A gen_rtx_REG (QImode, A_REG)
+#define C gen_rtx_REG (QImode, C_REG)
+#define B gen_rtx_REG (QImode, B_REG)
+#define E gen_rtx_REG (QImode, E_REG)
+#define D gen_rtx_REG (QImode, D_REG)
+#define L gen_rtx_REG (QImode, L_REG)
+#define H gen_rtx_REG (QImode, H_REG)
+
+#define AX gen_rtx_REG (HImode, AX_REG)
+#define BC gen_rtx_REG (HImode, BC_REG)
+#define DE gen_rtx_REG (HImode, DE_REG)
+#define HL gen_rtx_REG (HImode, HL_REG)
+
+/* Returns TRUE if R is a virtual register.  */
+static inline bool
+is_virtual_register (rtx r)
+{
+  return (GET_CODE (r) == REG
+	  && REGNO (r) >= 8
+	  && REGNO (r) < 32);
+}
+
+/* In all these alloc routines, we expect the following: the insn
+   pattern is unshared, the insn was previously recognized and failed
+   due to predicates or constraints, and the operand data is in
+   recog_data.  */
+
+static int virt_insn_was_frame;
+
+/* Hook for all insns we emit.  Re-mark them as FRAME_RELATED if
+   needed.  */
+static rtx
+EM2 (int line ATTRIBUTE_UNUSED, rtx r)
+{
+#if DEBUG_ALLOC
+  fprintf (stderr, "\033[36m%d: ", line);
+  debug_rtx (r);
+  fprintf (stderr, "\033[0m");
+#endif
+  /*SCHED_GROUP_P (r) = 1;*/
+  if (virt_insn_was_frame)
+    RTX_FRAME_RELATED_P (r) = 1;
+  return r;
+}
+
+#define EM(x) EM2 (__LINE__, x)
+
+/* Return a suitable RTX for the low half of a __far address.  */
+static rtx
+rl78_lo16 (rtx addr)
+{
+  rtx r;
+
+  if (GET_CODE (addr) == SYMBOL_REF
+      || GET_CODE (addr) == CONST)
+    {
+      r = gen_rtx_ZERO_EXTRACT (HImode, addr, GEN_INT (16), GEN_INT (0));
+      r = gen_rtx_CONST (HImode, r);
+    }
+  else
+    r = rl78_subreg (HImode, addr, SImode, 0);
+
+  r = gen_es_addr (r);
+
+  return r;
+}
+
+/* Return a suitable RTX for the high half's lower byte of a __far address.  */
+static rtx
+rl78_hi8 (rtx addr)
+{
+  if (GET_CODE (addr) == SYMBOL_REF
+      || GET_CODE (addr) == CONST)
+    {
+      rtx r = gen_rtx_ZERO_EXTRACT (QImode, addr, GEN_INT (8), GEN_INT (16));
+      r = gen_rtx_CONST (QImode, r);
+      return r;
+    }
+  return rl78_subreg (QImode, addr, SImode, 2);
+}
+
+static void
+add_postponed_content_update (rtx to, rtx value)
+{
+  unsigned char index;
+
+  if ((index = get_content_index (to)) == NOT_KNOWN)
+    return;
+
+  gcc_assert (saved_update_index == NOT_KNOWN);
+  saved_update_index = index;
+  saved_update_value = get_content_index (value);
+  saved_update_mode  = GET_MODE (to);
+}
+
+static void
+process_postponed_content_update (void)
+{
+  if (saved_update_index != NOT_KNOWN)
+    {
+      update_content (saved_update_index, saved_update_value, saved_update_mode);
+      saved_update_index = NOT_KNOWN;
+    }
+}
+
+/* Generate and emit a move of (register) FROM into TO.  if WHERE is not NULL
+   then if BEFORE is true then emit the insn before WHERE, otherwise emit it
+   after WHERE.  If TO already contains FROM then do nothing.  Returns TO if
+   BEFORE is true, FROM otherwise.  */
+static rtx
+gen_and_emit_move (rtx to, rtx from, rtx where, bool before)
+{
+  enum machine_mode mode = GET_MODE (to);
+
+  if (optimize && before && already_contains (to, from))
+    {
+#if DEBUG_ALLOC
+      display_content_memory (stderr);
+#endif
+      if (dump_file)
+	{
+	  fprintf (dump_file, " Omit move of %s into ",
+		   get_content_name (get_content_index (from), mode));
+	  fprintf (dump_file, "%s as it already contains this value\n",
+		   get_content_name (get_content_index (to), mode));
+	}
+    }
+  else
+    {
+      rtx move = mode == QImode ? gen_movqi (to, from) : gen_movhi (to, from);
+
+      EM (move);
+
+      if (where == NULL_RTX)
+	emit_insn (move);
+      else if (before)
+	emit_insn_before (move, where);
+      else
+	{
+	  rtx note = find_reg_note (where, REG_EH_REGION, NULL_RTX);
+
+	  /* If necessary move REG_EH_REGION notes forward.
+	     cf. compiling gcc.dg/pr44545.c.  */
+	  if (note != NULL_RTX)
+	    {
+	      add_reg_note (move, REG_EH_REGION, XEXP (note, 0));
+	      remove_note (where, note);
+	    }
+
+	  emit_insn_after (move, where);
+	}
+
+      if (before)
+	record_content (to, from);
+      else
+	add_postponed_content_update (to, from);
+    }
+
+  return before ? to : from;
+}
+
+/* If M is MEM(REG) or MEM(PLUS(REG,INT)) and REG is virtual then
+   copy it into NEWBASE and return the updated MEM.  Otherwise just
+   return M.  Any needed insns are emitted before BEFORE.  */
+static rtx
+transcode_memory_rtx (rtx m, rtx newbase, rtx before)
+{
+  rtx base, index, addendr;
+  int addend = 0;
+  int need_es = 0;
+
+  if (! MEM_P (m))
+    return m;
+
+  if (GET_MODE (XEXP (m, 0)) == SImode)
+    {
+      rtx new_m;
+      rtx seg = rl78_hi8 (XEXP (m, 0));
+
+#if DEBUG_ALLOC
+      fprintf (stderr, "setting ES:\n");
+      debug_rtx(seg);
+#endif
+      emit_insn_before (EM (gen_movqi (A, seg)), before);
+      emit_insn_before (EM (gen_movqi_es (A)), before);
+      record_content (A, NULL_RTX);
+
+      new_m = gen_rtx_MEM (GET_MODE (m), rl78_lo16 (XEXP (m, 0)));
+      MEM_COPY_ATTRIBUTES (new_m, m);
+      m = new_m;
+      need_es = 1;
+    }
+
+  characterize_address (XEXP (m, 0), & base, & index, & addendr);
+  gcc_assert (index == NULL_RTX);
+
+#if DEBUG_ALLOC
+  fprintf (stderr, "\033[33m"); debug_rtx (m); fprintf (stderr, "\033[0m");
+  debug_rtx (base);
+#endif
+  if (base == NULL_RTX)
+    return m;
+
+  if (addendr && GET_CODE (addendr) == CONST_INT)
+    addend = INTVAL (addendr);
+
+  gcc_assert (REG_P (base));
+  gcc_assert (REG_P (newbase));
+
+  if (REGNO (base) == SP_REG)
+    {
+      if (addend >= 0 && addend  <= 255)
+	return m;
+    }
+
+  /* BASE should be a virtual register.  We copy it to NEWBASE.  If
+     the addend is out of range for DE/HL, we use AX to compute the full
+     address.  */
+
+  if (addend < 0
+      || (addend > 255 && REGNO (newbase) != 2)
+      || (addendr && GET_CODE (addendr) != CONST_INT))
+    {
+      /* mov ax, vreg
+	 add ax, #imm
+	 mov hl, ax	*/
+      EM (emit_insn_before (gen_movhi (AX, base), before));
+      EM (emit_insn_before (gen_addhi3 (AX, AX, addendr), before));
+      EM (emit_insn_before (gen_movhi (newbase, AX), before));
+      record_content (AX, NULL_RTX);
+      record_content (newbase, NULL_RTX);
+
+      base = newbase;
+      addend = 0;
+    }
+  else
+    {
+      base = gen_and_emit_move (newbase, base, before, true);
+    }
+
+  if (addend)
+    {
+      record_content (base, NULL_RTX);
+      base = gen_rtx_PLUS (HImode, base, GEN_INT (addend));
+    }
+
+#if DEBUG_ALLOC
+  fprintf (stderr, "\033[33m");
+  debug_rtx (m);
+#endif
+  if (need_es)
+    m = change_address (m, GET_MODE (m), gen_es_addr (base));
+  else
+    m = change_address (m, GET_MODE (m), base);
+#if DEBUG_ALLOC
+  debug_rtx (m);
+  fprintf (stderr, "\033[0m");
+#endif
+  return m;
+}
+
+/* Copy SRC to accumulator (A or AX), placing any generated insns
+   before BEFORE.  Returns accumulator RTX.  */
+static rtx
+move_to_acc (int opno, rtx before)
+{
+  rtx src = OP (opno);
+  enum machine_mode mode = GET_MODE (src);
+
+  if (REG_P (src) && REGNO (src) < 2)
+    return src;
+
+  if (mode == VOIDmode)
+    mode = recog_data.operand_mode[opno];
+
+  return gen_and_emit_move (mode == QImode ? A : AX, src, before, true);
+}
+
+static void
+force_into_acc (rtx src, rtx before)
+{
+  enum machine_mode mode = GET_MODE (src);
+  rtx move;
+
+  if (REG_P (src) && REGNO (src) < 2)
+    return;
+
+  move = mode == QImode ? gen_movqi (A, src) : gen_movhi (AX, src);
+
+  EM (move);
+
+  emit_insn_before (move, before);
+  record_content (AX, NULL_RTX);
+}
+
+/* Copy accumulator (A or AX) to DEST, placing any generated insns
+   after AFTER.  Returns accumulator RTX.  */
+static rtx
+move_from_acc (unsigned int opno, rtx after)
+{
+  rtx dest = OP (opno);
+  enum machine_mode mode = GET_MODE (dest);
+
+  if (REG_P (dest) && REGNO (dest) < 2)
+    return dest;
+
+  return gen_and_emit_move (dest, mode == QImode ? A : AX, after, false);
+}
+
+/* Copy accumulator (A or AX) to REGNO, placing any generated insns
+   before BEFORE.  Returns reg RTX.  */
+static rtx
+move_acc_to_reg (rtx acc, int regno, rtx before)
+{
+  enum machine_mode mode = GET_MODE (acc);
+  rtx reg;
+
+  reg = gen_rtx_REG (mode, regno);
+
+  return gen_and_emit_move (reg, acc, before, true);
+}
+
+/* Copy SRC to X, placing any generated insns before BEFORE.
+   Returns X RTX.  */
+static rtx
+move_to_x (int opno, rtx before)
+{
+  rtx src = OP (opno);
+  enum machine_mode mode = GET_MODE (src);
+  rtx reg;
+
+  if (mode == VOIDmode)
+    mode = recog_data.operand_mode[opno];
+  reg = (mode == QImode) ? X : AX;
+
+  if (mode == QImode || ! is_virtual_register (OP (opno)))
+    {
+      OP (opno) = move_to_acc (opno, before);
+      OP (opno) = move_acc_to_reg (OP (opno), X_REG, before);
+      return reg;
+    }
+
+  return gen_and_emit_move (reg, src, before, true);
+}
+
+/* Copy OP (opno) to H or HL, placing any generated insns before BEFORE.
+   Returns H/HL RTX.  */
+static rtx
+move_to_hl (int opno, rtx before)
+{
+  rtx src = OP (opno);
+  enum machine_mode mode = GET_MODE (src);
+  rtx reg;
+
+  if (mode == VOIDmode)
+    mode = recog_data.operand_mode[opno];
+  reg = (mode == QImode) ? L : HL;
+
+  if (mode == QImode || ! is_virtual_register (OP (opno)))
+    {
+      OP (opno) = move_to_acc (opno, before);
+      OP (opno) = move_acc_to_reg (OP (opno), L_REG, before);
+      return reg;
+    }
+
+  return gen_and_emit_move (reg, src, before, true);
+}
+
+/* Copy OP (opno) to E or DE, placing any generated insns before BEFORE.
+   Returns E/DE RTX.  */
+static rtx
+move_to_de (int opno, rtx before)
+{
+  rtx src = OP (opno);
+  enum machine_mode mode = GET_MODE (src);
+  rtx reg;
+
+  if (mode == VOIDmode)
+    mode = recog_data.operand_mode[opno];
+
+  reg = (mode == QImode) ? E : DE;
+
+  if (mode == QImode || ! is_virtual_register (OP (opno)))
+    {
+      OP (opno) = move_to_acc (opno, before);
+      OP (opno) = move_acc_to_reg (OP (opno), E_REG, before);
+    }
+  else
+    {
+      gen_and_emit_move (reg, src, before, true);
+    }
+
+  return reg;
+}
+
+/* Devirtualize an insn of the form (SET (op) (unop (op))).  */
+static void
+rl78_alloc_physical_registers_op1 (rtx insn)
+{
+  /* op[0] = func op[1] */
+
+  /* We first try using A as the destination, then copying it
+     back.  */
+  if (rtx_equal_p (OP (0), OP (1)))
+    {
+      OP (0) =
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+    }
+  else
+    {
+      /* If necessary, load the operands into BC and HL.
+	 Check to see if we already have OP (0) in HL
+	 and if so, swap the order.  */
+      if (MEM_P (OP (0))
+	  && already_contains (HL, XEXP (OP (0), 0)))
+	{
+	  OP (0) = transcode_memory_rtx (OP (0), HL, insn);
+	  OP (1) = transcode_memory_rtx (OP (1), BC, insn);
+	}
+      else
+	{
+	  OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+	  OP (1) = transcode_memory_rtx (OP (1), HL, insn);
+	}
+    }
+
+  MAYBE_OK (insn);
+
+  OP (0) = move_from_acc (0, insn);
+
+  MAYBE_OK (insn);
+
+  /* Try copying the src to acc first, then.  This is for, for
+     example, ZERO_EXTEND or NOT.  */
+  OP (1) = move_to_acc (1, insn);
+
+  MUST_BE_OK (insn);
+}
+
+/* Returns true if operand OPNUM contains a constraint of type CONSTRAINT.
+   Assumes that the current insn has already been recognised and hence the
+   constraint data has been filled in.  */
+static bool
+has_constraint (unsigned int opnum, enum constraint_num constraint)
+{
+  const char * p = recog_data.constraints[opnum];
+
+  /* No constraints means anything is accepted.  */
+  if (p == NULL || *p == 0 || *p == ',')
+    return true;
+
+  do
+    {
+      char c;
+      unsigned int len;
+
+      c = *p;
+      len = CONSTRAINT_LEN (c, p);
+      gcc_assert (len > 0);
+
+      switch (c)
+	{
+	case 0:
+	case ',':
+	  return false;
+	default:
+	  if (lookup_constraint (p) == constraint)
+	    return true;
+	}
+      p += len;
+    }
+  while (1);
+}
+
+/* Devirtualize an insn of the form (SET (op) (binop (op) (op))).  */
+static void
+rl78_alloc_physical_registers_op2 (rtx insn)
+{
+  rtx prev;
+  rtx first;
+  bool hl_used;
+  int tmp_id;
+  rtx saved_op1;
+
+  if (rtx_equal_p (OP (0), OP (1)))
+    {
+      OP (0) =
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+      OP (2) = transcode_memory_rtx (OP (2), HL, insn);
+    }
+  else if (rtx_equal_p (OP (0), OP (2)))
+    {
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+      OP (0) =
+      OP (2) = transcode_memory_rtx (OP (2), HL, insn);
+    }
+  else
+    {
+      OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+      OP (2) = transcode_memory_rtx (OP (2), HL, insn);
+    }
+
+  MAYBE_OK (insn);
+
+  prev = prev_nonnote_nondebug_insn (insn);
+  if (recog_data.constraints[1][0] == '%'
+      && is_virtual_register (OP (1))
+      && ! is_virtual_register (OP (2))
+      && ! CONSTANT_P (OP (2)))
+    {
+      rtx tmp = OP (1);
+      OP (1) = OP (2);
+      OP (2) = tmp;
+    }
+
+  /* Make a note of whether (H)L is being used.  It matters
+     because if OP (2) also needs reloading, then we must take
+     care not to corrupt HL.  */
+  hl_used = reg_mentioned_p (L, OP (0)) || reg_mentioned_p (L, OP (1));
+
+  /* If HL is not currently being used and dest == op1 then there are
+     some possible optimizations available by reloading one of the
+     operands into HL, before trying to use the accumulator.  */
+  if (optimize
+      && ! hl_used
+      && rtx_equal_p (OP (0), OP (1)))
+    {
+      /* If op0 is a Ws1 type memory address then switching the base
+	 address register to HL might allow us to perform an in-memory
+	 operation.  (eg for the INCW instruction).
+
+	 FIXME: Adding the move into HL is costly if this optimization is not
+	 going to work, so for now, make sure that we know that the new insn will
+	 match the requirements of the addhi3_real pattern.  Really we ought to
+	 generate a candidate sequence, test that, and then install it if the
+	 results are good.  */
+      if (satisfies_constraint_Ws1 (OP (0))
+	  && has_constraint (0, CONSTRAINT_Wh1)
+	  && (satisfies_constraint_K (OP (2)) || satisfies_constraint_L (OP (2))))
+	{
+	  rtx base, index, addend, newbase;
+
+	  characterize_address (XEXP (OP (0), 0), & base, & index, & addend);
+	  gcc_assert (index == NULL_RTX);
+	  gcc_assert (REG_P (base) && REGNO (base) == SP_REG);
+
+	  /* Ws1 addressing allows an offset of 0, Wh1 addressing requires a non-zero offset.  */
+	  if (addend != NULL_RTX)
+	    {
+	      newbase = gen_and_emit_move (HL, base, insn, true);
+	      record_content (newbase, NULL_RTX);
+	      newbase = gen_rtx_PLUS (HImode, newbase, addend);
+
+	      OP (0) = OP (1) = change_address (OP (0), VOIDmode, newbase);
+
+	      /* We do not want to fail here as this means that
+		 we have inserted useless insns into the stream.  */
+	      MUST_BE_OK (insn);
+	    }
+	}
+      else if (REG_P (OP (0))
+	       && satisfies_constraint_Ws1 (OP (2))
+	       && has_constraint (2, CONSTRAINT_Wh1))
+	{
+	  rtx base, index, addend, newbase;
+
+	  characterize_address (XEXP (OP (2), 0), & base, & index, & addend);
+	  gcc_assert (index == NULL_RTX);
+	  gcc_assert (REG_P (base) && REGNO (base) == SP_REG);
+
+	  /* Ws1 addressing allows an offset of 0, Wh1 addressing requires a non-zero offset.  */
+	  if (addend != NULL_RTX)
+	    {
+	      gen_and_emit_move (HL, base, insn, true);
+
+	      if (REGNO (OP (0)) != X_REG)
+		{
+		  OP (1) = move_to_acc (1, insn);
+		  OP (0) = move_from_acc (0, insn);
+		}
+
+	      record_content (HL, NULL_RTX);
+	      newbase = gen_rtx_PLUS (HImode, HL, addend);
+
+	      OP (2) = change_address (OP (2), VOIDmode, newbase);
+
+	      /* We do not want to fail here as this means that
+		 we have inserted useless insns into the stream.  */
+	      MUST_BE_OK (insn);
+	    }
+	}
+    }
+
+  OP (0) = move_from_acc (0, insn);
+
+  tmp_id = get_max_insn_count ();
+  saved_op1 = OP (1);
+
+  if (rtx_equal_p (OP (1), OP (2)))
+    OP (2) = OP (1) = move_to_acc (1, insn);
+  else
+    OP (1) = move_to_acc (1, insn);
+
+  MAYBE_OK (insn);
+
+  /* If we omitted the move of OP1 into the accumulator (because
+     it was already there from a previous insn), then force the
+     generation of the move instruction now.  We know that we
+     are about to emit a move into HL (or DE) via AX, and hence
+     our optimization to remove the load of OP1 is no longer valid.  */
+  if (tmp_id == get_max_insn_count ())
+    force_into_acc (saved_op1, insn);
+
+  /* We have to copy op2 to HL (or DE), but that involves AX, which
+     already has a live value.  Emit it before those insns.  */
+
+  if (prev)
+    first = next_nonnote_nondebug_insn (prev);
+  else
+    for (first = insn; prev_nonnote_nondebug_insn (first); first = prev_nonnote_nondebug_insn (first))
+      ;
+
+  OP (2) = hl_used ? move_to_de (2, first) : move_to_hl (2, first);
+
+  MUST_BE_OK (insn);
+}
+
+/* Devirtualize an insn of the form SET (PC) (MEM/REG).  */
+static void
+rl78_alloc_physical_registers_ro1 (rtx insn)
+{
+  OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+
+  MAYBE_OK (insn);
+
+  OP (0) = move_to_acc (0, insn);
+
+  MUST_BE_OK (insn);
+}
+
+/* Devirtualize a compare insn.  */
+static void
+rl78_alloc_physical_registers_cmp (rtx insn)
+{
+  int tmp_id;
+  rtx saved_op1;
+  rtx prev = prev_nonnote_nondebug_insn (insn);
+  rtx first;
+
+  OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+  OP (2) = transcode_memory_rtx (OP (2), HL, insn);
+
+  /* HI compares have to have OP (1) in AX, but QI
+     compares do not, so it is worth checking here.  */
+  MAYBE_OK (insn);
+
+  /* For an HImode compare, OP (1) must always be in AX.
+     But if OP (1) is a REG (and not AX), then we can avoid
+     a reload of OP (1) if we reload OP (2) into AX and invert
+     the comparison.  */
+  if (REG_P (OP (1))
+      && REGNO (OP (1)) != AX_REG
+      && GET_MODE (OP (1)) == HImode
+      && MEM_P (OP (2)))
+    {
+      rtx cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
+
+      OP (2) = move_to_acc (2, insn);
+
+      switch (GET_CODE (cmp))
+	{
+	case EQ:
+	case NE:
+	  break;
+	case LTU: cmp = gen_rtx_GTU (HImode, OP (2), OP (1)); break;
+	case GTU: cmp = gen_rtx_LTU (HImode, OP (2), OP (1)); break;
+	case LEU: cmp = gen_rtx_GEU (HImode, OP (2), OP (1)); break;
+	case GEU: cmp = gen_rtx_LEU (HImode, OP (2), OP (1)); break;
+
+	case LT:
+	case GT:
+	case LE:
+	case GE:
+#if DEBUG_ALLOC
+	  debug_rtx (insn);
+#endif
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (GET_CODE (cmp) == EQ || GET_CODE (cmp) == NE)
+	PATTERN (insn) = gen_cbranchhi4_real (cmp, OP (2), OP (1), OP (3));
+      else
+	PATTERN (insn) = gen_cbranchhi4_real_inverted (cmp, OP (2), OP (1), OP (3));
+
+      MUST_BE_OK (insn);
+    }
+
+  /* Surprisingly, gcc can generate a comparison of a register with itself, but this
+     should be handled by the second alternative of the cbranchhi_real pattern.  */
+  if (rtx_equal_p (OP (1), OP (2)))
+    {
+      OP (1) = OP (2) = BC;
+      MUST_BE_OK (insn);
+    }
+
+  tmp_id = get_max_insn_count ();
+  saved_op1 = OP (1);
+
+  OP (1) = move_to_acc (1, insn);
+
+  MAYBE_OK (insn);
+
+  /* If we omitted the move of OP1 into the accumulator (because
+     it was already there from a previous insn), then force the
+     generation of the move instruction now.  We know that we
+     are about to emit a move into HL via AX, and hence our
+     optimization to remove the load of OP1 is no longer valid.  */
+  if (tmp_id == get_max_insn_count ())
+    force_into_acc (saved_op1, insn);
+
+  /* We have to copy op2 to HL, but that involves the acc, which
+     already has a live value.  Emit it before those insns.  */
+  if (prev)
+    first = next_nonnote_nondebug_insn (prev);
+  else
+    for (first = insn; prev_nonnote_nondebug_insn (first); first = prev_nonnote_nondebug_insn (first))
+      ;
+  OP (2) = move_to_hl (2, first);
+
+  MUST_BE_OK (insn);
+}
+
+/* Like op2, but AX = A * X.  */
+static void
+rl78_alloc_physical_registers_umul (rtx insn)
+{
+  rtx prev = prev_nonnote_nondebug_insn (insn);
+  rtx first;
+  int tmp_id;
+  rtx saved_op1;
+
+  OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+  OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+  OP (2) = transcode_memory_rtx (OP (2), HL, insn);
+
+  MAYBE_OK (insn);
+
+  if (recog_data.constraints[1][0] == '%'
+      && is_virtual_register (OP (1))
+      && !is_virtual_register (OP (2))
+      && !CONSTANT_P (OP (2)))
+    {
+      rtx tmp = OP (1);
+      OP (1) = OP (2);
+      OP (2) = tmp;
+    }
+
+  OP (0) = move_from_acc (0, insn);
+
+  tmp_id = get_max_insn_count ();
+  saved_op1 = OP (1);
+
+  if (rtx_equal_p (OP (1), OP (2)))
+    {
+      gcc_assert (GET_MODE (OP (2)) == QImode);
+      /* The MULU instruction does not support duplicate arguments
+	 but we know that if we copy OP (2) to X it will do so via
+	 A and thus OP (1) will already be loaded into A.  */
+      OP (2) = move_to_x (2, insn);
+      OP (1) = A;
+    }
+  else
+    OP (1) = move_to_acc (1, insn);
+
+  MAYBE_OK (insn);
+
+  /* If we omitted the move of OP1 into the accumulator (because
+     it was already there from a previous insn), then force the
+     generation of the move instruction now.  We know that we
+     are about to emit a move into HL (or DE) via AX, and hence
+     our optimization to remove the load of OP1 is no longer valid.  */
+  if (tmp_id == get_max_insn_count ())
+    force_into_acc (saved_op1, insn);
+
+  /* We have to copy op2 to X, but that involves the acc, which
+     already has a live value.  Emit it before those insns.  */
+
+  if (prev)
+    first = next_nonnote_nondebug_insn (prev);
+  else
+    for (first = insn; prev_nonnote_nondebug_insn (first); first = prev_nonnote_nondebug_insn (first))
+      ;
+  OP (2) = move_to_x (2, first);
+
+  MUST_BE_OK (insn);
+}
+
+static void
+rl78_alloc_address_registers_macax (rtx insn)
+{
+  int which, op;
+  bool replace_in_op0 = false;
+  bool replace_in_op1 = false;
+
+  MAYBE_OK (insn);
+
+  /* Two different MEMs are not allowed.  */
+  which = 0;
+  for (op = 2; op >= 0; op --)
+    {
+      if (MEM_P (OP (op)))
+	{
+	  if (op == 0 && replace_in_op0)
+	    continue;
+	  if (op == 1 && replace_in_op1)
+	    continue;
+
+	  switch (which)
+	    {
+	    case 0:
+	      /* If we replace a MEM, make sure that we replace it for all
+		 occurrences of the same MEM in the insn.  */
+	      replace_in_op0 = (op > 0 && rtx_equal_p (OP (op), OP (0)));
+	      replace_in_op1 = (op > 1 && rtx_equal_p (OP (op), OP (1)));
+
+	      OP (op) = transcode_memory_rtx (OP (op), HL, insn);
+	      if (op == 2
+		  && MEM_P (OP (op))
+		  && ((GET_CODE (XEXP (OP (op), 0)) == REG
+		       && REGNO (XEXP (OP (op), 0)) == SP_REG)
+		      || (GET_CODE (XEXP (OP (op), 0)) == PLUS
+			  && REGNO (XEXP (XEXP (OP (op), 0), 0)) == SP_REG)))
+		{
+		  emit_insn_before (gen_movhi (HL, gen_rtx_REG (HImode, SP_REG)), insn);
+		  OP (op) = replace_rtx (OP (op), gen_rtx_REG (HImode, SP_REG), HL);
+		}
+	      if (replace_in_op0)
+		OP (0) = OP (op);
+	      if (replace_in_op1)
+		OP (1) = OP (op);
+	      break;
+	    case 1:
+	      OP (op) = transcode_memory_rtx (OP (op), DE, insn);
+	      break;
+	    case 2:
+	      OP (op) = transcode_memory_rtx (OP (op), BC, insn);
+	      break;
+	    }
+	  which ++;
+	}
+    }
+
+  MUST_BE_OK (insn);
+}
+
+/* Scan all insns and devirtualize them.  */
+static void
+rl78_alloc_physical_registers (void)
+{
+  /* During most of the compile, gcc is dealing with virtual
+     registers.  At this point, we need to assign physical registers
+     to the vitual ones, and copy in/out as needed.  */
+
+  rtx insn, curr;
+  enum attr_valloc valloc_method;
+
+  for (insn = get_insns (); insn; insn = curr)
+    {
+      int i;
+
+      curr = next_nonnote_nondebug_insn (insn);
+
+      if (INSN_P (insn)
+	  && (GET_CODE (PATTERN (insn)) == SET
+	      || GET_CODE (PATTERN (insn)) == CALL)
+	  && INSN_CODE (insn) == -1)
+	{
+	  if (GET_CODE (SET_SRC (PATTERN (insn))) == ASM_OPERANDS)
+	    continue;
+	  i = recog (PATTERN (insn), insn, 0);
+	  if (i == -1)
+	    {
+	      debug_rtx (insn);
+	      gcc_unreachable ();
+	    }
+	  INSN_CODE (insn) = i;
+	}
+    }
+
+  cfun->machine->virt_insns_ok = 0;
+  cfun->machine->real_insns_ok = 1;
+
+  clear_content_memory ();
+
+  for (insn = get_insns (); insn; insn = curr)
+    {
+      rtx pattern;
+
+      curr = insn ? next_nonnote_nondebug_insn (insn) : NULL;
+
+      if (!INSN_P (insn))
+	{
+	  if (LABEL_P (insn))
+	    clear_content_memory ();
+
+ 	  continue;
+	}
+
+      if (dump_file)
+	fprintf (dump_file, "Converting insn %d\n", INSN_UID (insn));
+
+      pattern = PATTERN (insn);
+      if (GET_CODE (pattern) == PARALLEL)
+	pattern = XVECEXP (pattern, 0, 0);
+      if (JUMP_P (insn) || CALL_P (insn) || GET_CODE (pattern) == CALL)
+	clear_content_memory ();
+      if (GET_CODE (pattern) != SET
+	  && GET_CODE (pattern) != CALL)
+	continue;
+      if (GET_CODE (pattern) == SET
+	  && GET_CODE (SET_SRC (pattern)) == ASM_OPERANDS)
+	continue;
+
+      valloc_method = get_attr_valloc (insn);
+
+      PATTERN (insn) = copy_rtx_if_shared (PATTERN (insn));
+
+      if (valloc_method == VALLOC_MACAX)
+	{
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  record_content (DE, NULL_RTX);
+	}
+
+      if (insn_ok_now (insn))
+	continue;
+
+      INSN_CODE (insn) = -1;
+
+      if (RTX_FRAME_RELATED_P (insn))
+	virt_insn_was_frame = 1;
+      else
+	virt_insn_was_frame = 0;
+
+      switch (valloc_method)
+	{
+	case VALLOC_OP1:
+	  rl78_alloc_physical_registers_op1 (insn);
+	  break;
+	case VALLOC_OP2:
+	  rl78_alloc_physical_registers_op2 (insn);
+	  break;
+	case VALLOC_RO1:
+	  rl78_alloc_physical_registers_ro1 (insn);
+	  break;
+	case VALLOC_CMP:
+	  rl78_alloc_physical_registers_cmp (insn);
+	  break;
+	case VALLOC_UMUL:
+	  rl78_alloc_physical_registers_umul (insn);
+	  break;
+	case VALLOC_MACAX:
+	  /* Macro that clobbers AX.  */
+	  rl78_alloc_address_registers_macax (insn);
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  record_content (DE, NULL_RTX);
+	  break;
+	}
+
+      if (JUMP_P (insn) || CALL_P (insn) || GET_CODE (pattern) == CALL)
+	clear_content_memory ();
+      else
+	process_postponed_content_update ();
+    }
+
+#if DEBUG_ALLOC
+  fprintf (stderr, "\033[0m");
+#endif
+}
+
+/* Add REG_DEAD notes using DEAD[reg] for rtx S which is part of INSN.
+   This function scans for uses of registers; the last use (i.e. first
+   encounter when scanning backwards) triggers a REG_DEAD note if the
+   reg was previously in DEAD[].  */
+static void
+rl78_note_reg_uses (char *dead, rtx s, rtx insn)
+{
+  const char *fmt;
+  int i, r;
+  enum rtx_code code;
+
+  if (!s)
+    return;
+
+  code = GET_CODE (s);
+
+  switch (code)
+    {
+      /* Compare registers by number.  */
+    case REG:
+      r = REGNO (s);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "note use reg %d size %d on insn %d\n",
+		   r, GET_MODE_SIZE (GET_MODE (s)), INSN_UID (insn));
+	  print_rtl_single (dump_file, s);
+	}
+      if (dead [r])
+	add_reg_note (insn, REG_DEAD, gen_rtx_REG (GET_MODE (s), r));
+      for (i = 0; i < GET_MODE_SIZE (GET_MODE (s)); i ++)
+	dead [r + i] = 0;
+      return;
+
+      /* These codes have no constituent expressions
+	 and are unique.  */
+    case SCRATCH:
+    case CC0:
+    case PC:
+      return;
+
+    case CONST_INT:
+    case CONST_VECTOR:
+    case CONST_DOUBLE:
+    case CONST_FIXED:
+      /* These are kept unique for a given value.  */
+      return;
+
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+	  for (j = XVECLEN (s, i) - 1; j >= 0; j--)
+	    rl78_note_reg_uses (dead, XVECEXP (s, i, j), insn);
+	}
+      else if (fmt[i] == 'e')
+	rl78_note_reg_uses (dead, XEXP (s, i), insn);
+    }
+}
+
+/* Like the previous function, but scan for SETs instead.  */
+static void
+rl78_note_reg_set (char *dead, rtx d, rtx insn)
+{
+  int r, i;
+
+  if (GET_CODE (d) != REG)
+    return;
+
+  r = REGNO (d);
+  if (dead [r])
+    add_reg_note (insn, REG_UNUSED, gen_rtx_REG (GET_MODE (d), r));
+  if (dump_file)
+    fprintf (dump_file, "note set reg %d size %d\n", r, GET_MODE_SIZE (GET_MODE (d)));
+  for (i = 0; i < GET_MODE_SIZE (GET_MODE (d)); i ++)
+    dead [r + i] = 1;
+}
+
+/* This is a rather crude register death pass.  Death status is reset
+   at every jump or call insn.  */
+static void
+rl78_calculate_death_notes (void)
+{
+  char dead[FIRST_PSEUDO_REGISTER];
+  rtx insn, p, s, d;
+  int i;
+
+  memset (dead, 0, sizeof (dead));
+
+  for (insn = get_last_insn ();
+       insn;
+       insn = prev_nonnote_nondebug_insn (insn))
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\n--------------------------------------------------");
+	  fprintf (dump_file, "\nDead:");
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i ++)
+	    if (dead[i])
+	      fprintf (dump_file, " %s", reg_names[i]);
+	  fprintf (dump_file, "\n");
+	  print_rtl_single (dump_file, insn);
+	}
+
+      switch (GET_CODE (insn))
+	{
+	case INSN:
+	  p = PATTERN (insn);
+	  switch (GET_CODE (p))
+	    {
+	    case SET:
+	      s = SET_SRC (p);
+	      d = SET_DEST (p);
+	      rl78_note_reg_set (dead, d, insn);
+	      rl78_note_reg_uses (dead, s, insn);
+	      break;
+
+	    case USE:
+	      rl78_note_reg_uses (dead, p, insn);
+	      break;
+
+	    default:
+	      break;
+	    }
+	  break;
+
+	case JUMP_INSN:
+	  if (INSN_CODE (insn) == CODE_FOR_rl78_return)
+	    {
+	      memset (dead, 1, sizeof (dead));
+	      /* We expect a USE just prior to this, which will mark
+		 the actual return registers.  The USE will have a
+		 death note, but we aren't going to be modifying it
+		 after this pass.  */
+	      break;
+	    }
+	case CALL_INSN:
+	  memset (dead, 0, sizeof (dead));
+	  break;
+
+	default:
+	  break;
+	}
+      if (dump_file)
+	print_rtl_single (dump_file, insn);
+    }
+}
+
+/* Helper function to reset the origins in RP and the age in AGE for
+   all registers.  */
+static void
+reset_origins (int *rp, int *age)
+{
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      rp[i] = i;
+      age[i] = 0;
+    }
+}
+
+/* The idea behind this optimization is to look for cases where we
+   move data from A to B to C, and instead move from A to B, and A to
+   C.  If B is a virtual register or memory, this is a big win on its
+   own.  If B turns out to be unneeded after this, it's a bigger win.
+   For each register, we try to determine where it's value originally
+   came from, if it's propogated purely through moves (and not
+   computes).  The ORIGINS[] array has the regno for the "origin" of
+   the value in the [regno] it's indexed by.  */
+static void
+rl78_propogate_register_origins (void)
+{
+  int origins[FIRST_PSEUDO_REGISTER];
+  int age[FIRST_PSEUDO_REGISTER];
+  int i;
+  rtx insn, ninsn = NULL_RTX;
+  rtx pat;
+
+  reset_origins (origins, age);
+
+  for (insn = get_insns (); insn; insn = ninsn)
+    {
+      ninsn = next_nonnote_nondebug_insn (insn);
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\n");
+	  fprintf (dump_file, "Origins:");
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i ++)
+	    if (origins[i] != i)
+	      fprintf (dump_file, " r%d=r%d", i, origins[i]);
+	  fprintf (dump_file, "\n");
+	  print_rtl_single (dump_file, insn);
+	}
+
+      switch (GET_CODE (insn))
+	{
+	case CODE_LABEL:
+	case BARRIER:
+	case CALL_INSN:
+	case JUMP_INSN:
+	  reset_origins (origins, age);
+	  break;
+
+	default:
+	  break;
+
+	case INSN:
+	  pat = PATTERN (insn);
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      rtx clobber = XVECEXP (pat, 0, 1);
+	      pat = XVECEXP (pat, 0, 0);
+	      if (GET_CODE (clobber) == CLOBBER
+		  && GET_CODE (XEXP (clobber, 0)) == REG)
+		{
+		  int cr = REGNO (XEXP (clobber, 0));
+		  int mb = GET_MODE_SIZE (GET_MODE (XEXP (clobber, 0)));
+		  if (dump_file)
+		    fprintf (dump_file, "reset origins of %d regs at %d\n", mb, cr);
+		  for (i = 0; i < mb; i++)
+		    {
+		      origins[cr + i] = cr + i;
+		      age[cr + i] = 0;
+		    }
+		}
+	      else
+		break;
+	    }
+
+	  if (GET_CODE (pat) == SET)
+	    {
+	      rtx src = SET_SRC (pat);
+	      rtx dest = SET_DEST (pat);
+	      int mb = GET_MODE_SIZE (GET_MODE (dest));
+
+	      if (GET_CODE (dest) == REG)
+		{
+		  int dr = REGNO (dest);
+
+		  if (GET_CODE (src) == REG)
+		    {
+		      int sr = REGNO (src);
+		      int same = 1;
+		      int best_age, best_reg;
+
+		      /* See if the copy is not needed.  */
+		      for (i = 0; i < mb; i ++)
+			if (origins[dr + i] != origins[sr + i])
+			  same = 0;
+		      if (same)
+			{
+			  if (dump_file)
+			    fprintf (dump_file, "deleting because dest already has correct value\n");
+			  delete_insn (insn);
+			  break;
+			}
+
+		      if (dr < 8 || sr >= 8)
+			{
+			  int ar;
+
+			  best_age = -1;
+			  best_reg = -1;
+			  /* See if the copy can be made from another
+			     bank 0 register instead, instead of the
+			     virtual src register.  */
+			  for (ar = 0; ar < 8; ar += mb)
+			    {
+			      same = 1;
+			      for (i = 0; i < mb; i ++)
+				if (origins[ar + i] != origins[sr + i])
+				  same = 0;
+
+			      /* The chip has some reg-reg move limitations.  */
+			      if (mb == 1 && dr > 3)
+				same = 0;
+
+			      if (same)
+				{
+				  if (best_age == -1 || best_age > age[sr + i])
+				    {
+				      best_age = age[sr + i];
+				      best_reg = sr;
+				    }
+				}
+			    }
+
+			  if (best_reg != -1)
+			    {
+			      /* FIXME: copy debug info too.  */
+			      SET_SRC (pat) = gen_rtx_REG (GET_MODE (src), best_reg);
+			      sr = best_reg;
+			    }
+			}
+
+		      for (i = 0; i < mb; i++)
+			{
+			  origins[dr + i] = origins[sr + i];
+			  age[dr + i] = age[sr + i] + 1;
+			}
+		    }
+		  else
+		    {
+		      /* The destination is computed, its origin is itself.  */
+		      if (dump_file)
+			fprintf (dump_file, "resetting origin of r%d for %d byte%s\n",
+				 dr, mb, mb == 1 ? "" : "s");
+		      for (i = 0; i < mb; i ++)
+			{
+			  origins[dr + i] = dr + i;
+			  age[dr + i] = 0;
+			}
+		    }
+
+		  /* Any registers marked with that reg as an origin are reset.  */
+		  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+		    if (origins[i] >= dr && origins[i] < dr + mb)
+		      {
+			origins[i] = i;
+			age[i] = 0;
+		      }
+		}
+
+	      /* Special case - our ADDSI3 macro uses AX and sometimes BC.  */
+	      if (get_attr_valloc (insn) == VALLOC_MACAX)
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "Resetting origin of AX/BC for macro.\n");
+		  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+		    if (i <= 3 || origins[i] <= 3)
+		      {
+			origins[i] = i;
+			age[i] = 0;
+		      }
+		}
+
+	      if (GET_CODE (src) == ASHIFT
+		  || GET_CODE (src) == ASHIFTRT
+		  || GET_CODE (src) == LSHIFTRT)
+		{
+		  rtx count = XEXP (src, 1);
+		  if (GET_CODE (count) == REG)
+		    {
+		      /* Special case - our pattern clobbers the count register.  */
+		      int r = REGNO (count);
+		      if (dump_file)
+			fprintf (dump_file, "Resetting origin of r%d for shift.\n", r);
+		      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+			if (i == r || origins[i] == r)
+			  {
+			    origins[i] = i;
+			    age[i] = 0;
+			  }
+		    }
+		}
+	    }
+	  else if (GET_CODE (pat) == CLOBBER
+		   && GET_CODE (XEXP (pat, 0)) == REG)
+	    {
+	      if (REG_P (XEXP (pat, 0)))
+		{
+		  unsigned int reg = REGNO (XEXP (pat, 0));
+
+		  origins[reg] = reg;
+		  age[reg] = 0;
+		}
+	    }
+	}
+    }
+}
+
+/* Remove any SETs where the destination is unneeded.  */
+static void
+rl78_remove_unused_sets (void)
+{
+  rtx insn, ninsn = NULL_RTX;
+  rtx dest;
+
+  for (insn = get_insns (); insn; insn = ninsn)
+    {
+      ninsn = next_nonnote_nondebug_insn (insn);
+
+      if ((insn = single_set (insn)) == NULL_RTX)
+	continue;
+
+      dest = SET_DEST (insn);
+
+      if (GET_CODE (dest) != REG || REGNO (dest) > 23)
+	continue;
+
+      if (find_regno_note (insn, REG_UNUSED, REGNO (dest)))
+	delete_insn (insn);
+    }
+}
+
+/* This is the top of the devritualization pass.  */
+static void
+rl78_reorg (void)
+{
+  /* split2 only happens when optimizing, but we need all movSIs to be
+     split now.  */
+  if (optimize <= 0)
+    split_all_insns ();
+
+  rl78_alloc_physical_registers ();
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n================DEVIRT:=AFTER=ALLOC=PHYSICAL=REGISTERS================\n");
+      print_rtl_with_bb (dump_file, get_insns (), 0);
+    }
+
+  rl78_propogate_register_origins ();
+  rl78_calculate_death_notes ();
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n================DEVIRT:=AFTER=PROPOGATION=============================\n");
+      print_rtl_with_bb (dump_file, get_insns (), 0);
+      fprintf (dump_file, "\n======================================================================\n");
+    }
+
+  rl78_remove_unused_sets ();
+
+  /* The code after devirtualizing has changed so much that at this point
+     we might as well just rescan everything.  Note that
+     df_rescan_all_insns is not going to help here because it does not
+     touch the artificial uses and defs.  */
+  df_finish_pass (true);
+  if (optimize > 1)
+    df_live_add_problem ();
+  df_scan_alloc (NULL);
+  df_scan_blocks ();
+
+  if (optimize)
+    df_analyze ();
+}
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY rl78_return_in_memory
+
+static bool
+rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > 8);
+}
+
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS rl78_rtx_costs
+
+static bool rl78_rtx_costs (rtx   x,
+			    int   code,
+			    int   outer_code ATTRIBUTE_UNUSED,
+			    int   opno ATTRIBUTE_UNUSED,
+			    int * total,
+			    bool  speed ATTRIBUTE_UNUSED)
+{
+  if (code == IF_THEN_ELSE)
+    return COSTS_N_INSNS (10);
+  if (GET_MODE (x) == SImode)
+    {
+      switch (code)
+	{
+	case MULT:
+	  if (RL78_MUL_RL78)
+	    *total = COSTS_N_INSNS (14);
+	  else if (RL78_MUL_G13)
+	    *total = COSTS_N_INSNS (29);
+	  else
+	    *total = COSTS_N_INSNS (500);
+	  return true;
+	case PLUS:
+	  *total = COSTS_N_INSNS (8);
+	  return true;
+	case ASHIFT:
+	case ASHIFTRT:
+	case LSHIFTRT:
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      switch (INTVAL (XEXP (x, 1)))
+		{
+		case 0:  *total = COSTS_N_INSNS (0);	break;
+		case 1:  *total = COSTS_N_INSNS (6);	break;
+		case 2: case 3: case 4: case 5: case 6: case 7:
+		  *total = COSTS_N_INSNS (10); break;
+		case 8:  *total = COSTS_N_INSNS (6);	break;
+		case 9: case 10: case 11: case 12: case 13: case 14: case 15:
+		  *total = COSTS_N_INSNS (10); break;
+		case 16: *total = COSTS_N_INSNS (3);	break;
+		case 17: case 18: case 19: case 20: case 21: case 22: case 23:
+		  *total = COSTS_N_INSNS (4); break;
+		case 24: *total = COSTS_N_INSNS (4);	break;
+		case 25: case 26: case 27: case 28: case 29: case 30: case 31:
+		  *total = COSTS_N_INSNS (5); break;
+		}
+	    }
+	  else
+	    *total = COSTS_N_INSNS (10+4*16);
+	  return true;
+	}
+    }
+  return false;
+}
+
+
+#undef  TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode
+
+static enum machine_mode
+rl78_unwind_word_mode (void)
+{
+  return HImode;
+}
+
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-rl78.h"
diff --git a/gcc-4.9/gcc/config/rl78/rl78.h b/gcc-4.9/gcc/config/rl78/rl78.h
new file mode 100644
index 000000000..8dee92be9
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78.h
@@ -0,0 +1,473 @@
+/* GCC backend definitions for the Renesas RL78 processor.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#define RL78_MUL_NONE	(rl78_mul_type == MUL_NONE)
+#define RL78_MUL_RL78	(rl78_mul_type == MUL_RL78)
+#define RL78_MUL_G13	(rl78_mul_type == MUL_G13)
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("__RL78__"); 		\
+      builtin_assert ("cpu=RL78"); 		\
+      if (RL78_MUL_RL78)			\
+	builtin_define ("__RL78_MUL_RL78__"); 	\
+      if (RL78_MUL_G13)				\
+	builtin_define ("__RL78_MUL_G13__"); 	\
+      if (TARGET_G10)				\
+	builtin_define ("__RL78_G10__"); 	\
+    }                                           \
+  while (0)
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mrelax:-relax} \
+%{mg10} \
+"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "\
+%{mrelax:-relax} \
+%{!r:--gc-sections} \
+"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "					\
+--start-group						\
+-lc							\
+-lsim							\
+%{fprofile-arcs|fprofile-generate|coverage:-lgcov} 	\
+--end-group					   	\
+%{!T*: %{msim:%Trl78-sim.ld}%{!msim:%Trl78.ld}}		\
+"
+
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		0
+#define WORDS_BIG_ENDIAN 		0
+
+#ifdef IN_LIBGCC2
+/* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits).  */
+#define	UNITS_PER_WORD			4
+/* We have a problem with libgcc2.  It only defines two versions of
+   each function, one for "int" and one for "long long".  Ie it assumes
+   that "sizeof (int) == sizeof (long)".  For the RL78 this is not true
+   and we need a third set of functions.  We explicitly define
+   LIBGCC2_UNITS_PER_WORD here so that it is clear that we are expecting
+   to get the SI and DI versions from the libgcc2.c sources, and we
+   provide our own set of HI functions, which is why this
+   definition is surrounded by #ifndef..#endif.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 		4
+#endif
+#else
+/* Actual width of a word, in units (bytes).  */
+#define	UNITS_PER_WORD 			1
+#endif
+
+#define SHORT_TYPE_SIZE			16
+#define INT_TYPE_SIZE			16
+#define LONG_TYPE_SIZE			32
+#define LONG_LONG_TYPE_SIZE		64
+
+#define FLOAT_TYPE_SIZE 		32
+#define DOUBLE_TYPE_SIZE 		32 /*64*/
+#define LONG_DOUBLE_TYPE_SIZE		64 /*DOUBLE_TYPE_SIZE*/
+
+#define LIBGCC2_HAS_DF_MODE		1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
+
+#define DEFAULT_SIGNED_CHAR		0
+
+#define STRICT_ALIGNMENT 		1
+#define FUNCTION_BOUNDARY 		8
+#define BIGGEST_ALIGNMENT 		16
+#define STACK_BOUNDARY 			16
+#define PARM_BOUNDARY 			16
+
+#define STACK_GROWS_DOWNWARD		1
+#define FRAME_GROWS_DOWNWARD		1
+#define FIRST_PARM_OFFSET(FNDECL) 	0
+
+#define MAX_REGS_PER_ADDRESS 		1
+
+#define Pmode 				HImode
+#define POINTER_SIZE			16
+#undef  SIZE_TYPE
+#define SIZE_TYPE			"unsigned int"
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE			"int"
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE			"long int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE			BITS_PER_WORD
+#define POINTERS_EXTEND_UNSIGNED	1
+#define FUNCTION_MODE 			HImode
+#define CASE_VECTOR_MODE		Pmode
+#define WORD_REGISTER_OPERATIONS	0
+#define HAS_LONG_COND_BRANCH		0
+#define HAS_LONG_UNCOND_BRANCH		0
+
+#define MOVE_MAX 			2
+#define STARTING_FRAME_OFFSET		0
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)   1
+
+#define ADDR_SPACE_FAR	1
+
+#define HAVE_PRE_DECCREMENT		0
+#define HAVE_POST_INCREMENT		0
+
+#define MOVE_RATIO(SPEED) 		((SPEED) ? 24 : 16)
+#define SLOW_BYTE_ACCESS		0
+
+#define STORE_FLAG_VALUE		1
+#define LOAD_EXTEND_OP(MODE)		ZERO_EXTEND
+#define SHORT_IMMEDIATES_SIGN_EXTEND	0
+
+
+/* The RL78 has four register banks.  Normal operation uses RB0 as
+   real registers, RB1 and RB2 as "virtual" registers (because we know
+   they'll be there, and not used as variables), and RB3 is reserved
+   for interrupt handlers.  The virtual registers are accessed as
+   SADDRs:
+
+   FFEE0-FFEE7 RB0
+   FFEE8-FFEEF RB1
+   FFEF0-FFEF7 RB2
+   FFEF8-FFEFF RB3
+*/
+#define REGISTER_NAMES						\
+  {								\
+    "x", "a", "c", "b", "e", "d", "l", "h", 			\
+    "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",	\
+    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",	\
+    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",	\
+      "sp", "ap", "psw", "es", "cs"				\
+  }
+
+#define ADDITIONAL_REGISTER_NAMES	\
+{					\
+{ "ax", 0 }, \
+{ "bc", 2 }, \
+{ "de", 4 }, \
+{ "hl", 6 }, \
+{ "rp0", 0 }, \
+{ "rp1", 2 }, \
+{ "rp2", 4 }, \
+{ "rp3", 6 }, \
+{ "r0", 0 }, \
+{ "r1", 1 }, \
+{ "r2", 2 }, \
+{ "r3", 3 }, \
+{ "r4", 4 }, \
+{ "r5", 5 }, \
+{ "r6", 6 }, \
+{ "r7", 7 }, \
+}
+
+enum reg_class
+{
+  NO_REGS,			/* No registers in set.  */
+  XREG,
+  AREG,
+  AXREG,
+  CREG,
+  BREG,
+  BCREG,
+  EREG,
+  DREG,
+  DEREG,
+  LREG,
+  HREG,
+  HLREG,
+  IDX_REGS,
+  QI_REGS,
+  SPREG,
+  R8W_REGS,
+  R10W_REGS,
+  INT_REGS,
+  V_REGS,			/* Virtual registers.  */
+  GR_REGS,			/* Integer registers.  */
+  PSWREG,
+  ALL_REGS,			/* All registers.  */
+  LIM_REG_CLASSES		/* Max value + 1.  */
+};
+
+#define REG_CLASS_NAMES					\
+{							\
+  "NO_REGS",						\
+  "XREG",						\
+  "AREG",						\
+  "AXREG",						\
+  "CREG",						\
+  "BREG",						\
+  "BCREG",						\
+  "EREG",						\
+  "DREG",						\
+  "DEREG",						\
+  "LREG",						\
+  "HREG",						\
+  "HLREG",						\
+  "IDX_REGS",						\
+  "QI_REGS",						\
+  "SPREG",						\
+  "R8W_REGS",						\
+  "R10W_REGS",						\
+  "INT_REGS",						\
+  "V_REGS",						\
+  "GR_REGS",						\
+  "PSWREG",						\
+  "ALL_REGS"						\
+}
+
+#define REG_CLASS_CONTENTS				\
+{							\
+  { 0x00000000, 0x00000000 },	/* No registers,  */		\
+  { 0x00000001, 0x00000000 }, \
+  { 0x00000002, 0x00000000 }, \
+  { 0x00000003, 0x00000000 }, \
+  { 0x00000004, 0x00000000 }, \
+  { 0x00000008, 0x00000000 }, \
+  { 0x0000000c, 0x00000000 }, \
+  { 0x00000010, 0x00000000 }, \
+  { 0x00000020, 0x00000000 }, \
+  { 0x00000030, 0x00000000 }, \
+  { 0x00000040, 0x00000000 }, \
+  { 0x00000080, 0x00000000 }, \
+  { 0x000000c0, 0x00000000 }, \
+  { 0x0000000c, 0x00000000 },	/* B and C - index regs.  */	\
+  { 0x000000ff, 0x00000000 },	/* all real registers.  */	\
+  { 0x00000000, 0x00000001 }, 	/* SP */			\
+  { 0x00000300, 0x00000000 }, 	/* R8 - HImode */		\
+  { 0x00000c00, 0x00000000 }, 	/* R10 - HImode */		\
+  { 0xff000000, 0x00000000 }, 	/* INT - HImode */		\
+  { 0xff7fff00, 0x00000000 },	/* Virtual registers.  */	\
+  { 0xff7fffff, 0x00000002 },	/* General registers.  */	\
+  { 0x04000000, 0x00000004 },	/* PSW.  */	\
+  { 0xff7fffff, 0x0000001f }	/* All registers.  */		\
+}
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+#define N_REG_CLASSES			(int) LIM_REG_CLASSES
+#define CLASS_MAX_NREGS(CLASS, MODE)    ((GET_MODE_SIZE (MODE) \
+					  + UNITS_PER_WORD - 1) \
+					 / UNITS_PER_WORD)
+
+#define GENERAL_REGS			GR_REGS
+#define BASE_REG_CLASS  		V_REGS
+#define INDEX_REG_CLASS			V_REGS
+
+#define FIRST_PSEUDO_REGISTER 		37
+
+#define REGNO_REG_CLASS(REGNO)          ((REGNO) < FIRST_PSEUDO_REGISTER \
+					 ? GR_REGS : NO_REGS)
+
+#define FRAME_POINTER_REGNUM 		22
+#define STACK_POINTER_REGNUM 	        32
+#define ARG_POINTER_REGNUM 		33
+#define CC_REGNUM                       34
+#define FUNC_RETURN_REGNUM              8
+#define STATIC_CHAIN_REGNUM 		14
+
+/* Trampolines are implemented with a separate data stack.  The memory
+   on stack only holds the function pointer for the chosen stub.
+ */
+
+#define TRAMPOLINE_SIZE			4
+#define TRAMPOLINE_ALIGNMENT		16
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM },	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)	\
+  (OFFSET) = rl78_initial_elimination_offset ((FROM), (TO))
+
+
+#define FUNCTION_ARG_REGNO_P(N)	  	0
+#define FUNCTION_VALUE_REGNO_P(N) 	((N) == 8)
+#define DEFAULT_PCC_STRUCT_RETURN	0
+
+#define FIXED_REGISTERS					\
+{							\
+  1,1,1,1, 1,1,1,1,					\
+  0,0,0,0, 0,0,0,0,					\
+  0,0,0,0, 0,0,1,1,					\
+  1,1,1,1, 1,1,1,1,					\
+  0, 1, 0, 1, 1						\
+}
+
+#define CALL_USED_REGISTERS				\
+{							\
+  1,1,1,1, 1,1,1,1,					\
+  1,1,1,1, 1,1,1,1,					\
+  0,0,0,0, 0,0,1,1,					\
+  1,1,1,1, 1,1,1,1,					\
+  0, 1, 1, 1, 1						\
+}
+
+#define LIBCALL_VALUE(MODE)				\
+  gen_rtx_REG ((MODE),					\
+	       FUNC_RETURN_REGNUM)
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER					\
+  { 8, 9, 10, 11, 12, 13, 14, 15,			\
+    16, 17, 18, 19, 20, 21, 22, 23,			\
+    0, 1, 6, 7, 2, 3, 4, 5,				\
+    24, 25, 26, 27, 28, 29, 30, 31,			\
+    32, 33, 34						\
+}
+
+#define REGNO_IN_RANGE(REGNO, MIN, MAX)			\
+  (IN_RANGE ((REGNO), (MIN), (MAX)) 			\
+   || (reg_renumber != NULL				\
+       && reg_renumber[(REGNO)] >= (MIN)		\
+       && reg_renumber[(REGNO)] <= (MAX)))
+
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(regno)      REGNO_IN_RANGE (regno, 16, 31)
+#else
+#define REGNO_OK_FOR_BASE_P(regno)	1
+#endif
+
+#define REGNO_OK_FOR_INDEX_P(regno)	REGNO_OK_FOR_BASE_P (regno)
+
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(regno, mode, address_space, outer_code, index_code) \
+  rl78_regno_mode_code_ok_for_base_p (regno, mode, address_space, outer_code, index_code)
+
+#define MODE_CODE_BASE_REG_CLASS(mode, address_space, outer_code, index_code) \
+  rl78_mode_code_base_reg_class (mode, address_space, outer_code, index_code)
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)				\
+  ((COUNT) == 0								\
+   ? gen_rtx_MEM (Pmode, gen_rtx_PLUS (HImode, arg_pointer_rtx, GEN_INT (-4))) \
+   : NULL_RTX)
+
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_MEM (Pmode, stack_pointer_rtx)
+
+#define ACCUMULATE_OUTGOING_ARGS	1
+
+typedef unsigned int CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+
+/* FIXME */
+#define NO_PROFILE_COUNTERS     1
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+    fprintf (FILE, "\tbsr\t__mcount\n");
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE)            \
+  rl78_hard_regno_nregs (REGNO, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+  rl78_hard_regno_mode_ok (REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  (   (   GET_MODE_CLASS (MODE1) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)		\
+   == (   GET_MODE_CLASS (MODE2) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+
+#define TEXT_SECTION_ASM_OP ".text"
+#define DATA_SECTION_ASM_OP ".data"
+#define BSS_SECTION_ASM_OP ".bss"
+#define CTORS_SECTION_ASM_OP ".section \".ctors\",\"a\""
+#define DTORS_SECTION_ASM_OP ".section \".dtors\",\"a\""
+
+#define ASM_COMMENT_START	" ;"
+#define ASM_APP_ON		""
+#define ASM_APP_OFF 		""
+#define LOCAL_LABEL_PREFIX	".L"
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	"_"
+
+#define GLOBAL_ASM_OP 		"\t.global\t"
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t.long .L%d\n", VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Note: The local label referenced by the "3b" below is emitted by
+   the tablejump insn.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t.long .L%d - 1b\n", VALUE)
+
+
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)		\
+  do						\
+    {						\
+      if ((LOG) == 0)				\
+        break;					\
+      fprintf (STREAM, "\t.balign %d\n", 1 << (LOG));	\
+    }						\
+  while (0)
+
+/* For PIC put jump tables into the text section so that the offsets that
+   they contain are always computed between two same-section symbols.  */
+#define JUMP_TABLES_IN_TEXT_SECTION	(flag_pic)
+
+/* This is a version of REG_P that also returns TRUE for SUBREGs.  */
+#define RL78_REG_P(rtl) (REG_P (rtl) || GET_CODE (rtl) == SUBREG)
+
+/* Like REG_P except that this macro is true for SET expressions.  */
+#define SET_P(rtl)    (GET_CODE (rtl) == SET)
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#undef	DWARF2_ADDR_SIZE
+#define	DWARF2_ADDR_SIZE			4
+
+#define DWARF2_ASM_LINE_DEBUG_INFO		1
+
+#define EXIT_IGNORE_STACK			0
+#define INCOMING_FRAME_SP_OFFSET		4
+
+
+#define BRANCH_COST(SPEED,PREDICT)       1
+#define REGISTER_MOVE_COST(MODE,FROM,TO) 2
+
+#define EH_RETURN_DATA_REGNO(N) (N < 2 ? (8+(N)*2) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (HImode, 20)
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) DW_EH_PE_udata4
+
+/* NOTE: defined but zero means dwarf2 debugging, but sjlj EH.  */
+#define DWARF2_UNWIND_INFO 0
+
+#define REGISTER_TARGET_PRAGMAS() rl78_register_pragmas()
diff --git a/gcc-4.9/gcc/config/rl78/rl78.md b/gcc-4.9/gcc/config/rl78/rl78.md
new file mode 100644
index 000000000..eb4c468ca
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78.md
@@ -0,0 +1,443 @@
+;;  Machine Description for Renesas RL78 processors
+;;  Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [
+   (AX_REG 0)
+   (X_REG 0)
+   (A_REG 1)
+   (BC_REG 2)
+   (C_REG 2)
+   (B_REG 3)
+   (DE_REG 4)
+   (E_REG 4)
+   (D_REG 5)
+   (HL_REG 6)
+   (L_REG 6)
+   (H_REG 7)
+
+   (FP_REG 22)
+   (SP_REG 32)
+   (CC_REG 34)
+   (ES_REG 35)
+   (CS_REG 36)
+
+   (UNS_PROLOG	1)
+   (UNS_EPILOG	1)
+   (UNS_RETI	2)
+   (UNS_RETB	3)
+
+   (UNS_SET_RB	10)
+   (UNS_ES_ADDR	11)
+
+   (UNS_TRAMPOLINE_INIT		20)
+   (UNS_TRAMPOLINE_UNINIT	21)
+   (UNS_NONLOCAL_GOTO		22)
+
+   ])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  )
+
+(define_mode_iterator QHI [QI HI])
+
+(include "predicates.md")
+(include "constraints.md")
+(include "rl78-expand.md")
+(include "rl78-virt.md")
+(include "rl78-real.md")
+
+
+;; Function Prologue/Epilogue Instructions
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "rl78_expand_prologue (); DONE;"
+)
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  "rl78_expand_epilogue (); DONE;"
+)
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "FAIL;"
+)
+
+(define_insn "rl78_return"
+  [(return)]
+  ""
+  "ret"
+)
+
+(define_insn "interrupt_return"
+  [(unspec_volatile [(return)] UNS_RETI) ]
+  ""
+  "reti"
+)
+
+(define_insn "brk_interrupt_return"
+  [(unspec_volatile [(return)] UNS_RETB) ]
+  ""
+  "retb"
+)
+
+(define_expand "eh_return"
+  [(match_operand:HI 0 "" "")]
+  ""
+  "rl78_expand_eh_epilogue (operands[0]);
+   emit_barrier ();
+   DONE;"
+)
+
+;; These are used only by prologue/epilogue so it's "safe" to pass
+;; virtual registers.
+(define_insn "push"
+  [(set (reg:HI SP_REG)
+	(plus:HI (reg:HI SP_REG)
+		  (const_int -2)))
+   (set (mem:HI (reg:HI SP_REG))
+	(match_operand:HI 0 "register_operand" "ABDT,vZint"))]
+  ""
+  "@
+   push\t%v0
+   push\t%v0 ; %0"
+)
+
+(define_insn "pop"
+  [(set (match_operand:HI 0 "register_operand" "=ABDT,vZint")
+	(mem:HI (reg:HI SP_REG)))
+   (set (reg:HI SP_REG)
+	(plus:HI (reg:HI SP_REG)
+		    (const_int 2)))]
+  ""
+  "@
+   pop\t%v0
+   pop\t%v0 ; %0"
+)
+
+(define_insn "sel_rb"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "")] UNS_SET_RB)]
+  "!TARGET_G10"
+  "sel\trb%u0"
+  )
+
+(define_insn "trampoline_init"
+  [(set (match_operand 0 "register_operand" "=Z08W")
+	(unspec_volatile [(match_operand 1 "register_operand" "Z08W")
+			  (match_operand 2 "register_operand" "Z10W")
+			  ] UNS_TRAMPOLINE_INIT))
+   ]
+  ""
+  "call !!___trampoline_init ; %0 <= %1 %2"
+  )
+
+(define_insn "trampoline_uninit"
+  [(unspec_volatile [(const_int 0)] UNS_TRAMPOLINE_UNINIT)
+   ]
+  ""
+  "call !!___trampoline_uninit"
+  )
+
+;; GCC restores $fp *before* using it to access values on the *old*
+;; frame.  So, we do it ourselves, to ensure this is not the case.
+;; Note that while %1 is usually a label_ref, we allow for a
+;; non-immediate as well.
+(define_expand "nonlocal_goto"
+  [(set (pc)
+	(unspec_volatile [(match_operand 0 "" "") ;; fp (ignore)
+			  (match_operand 1 "" "vi") ;; target
+			  (match_operand 2 "" "vi") ;; sp
+			  (match_operand 3 "" "vi") ;; ?
+			  ] UNS_NONLOCAL_GOTO))
+   ]
+  ""
+  "emit_jump_insn (gen_nonlocal_goto_insn (operands[0], operands[1], operands[2], operands[3]));
+   emit_barrier ();
+   DONE;"
+  )
+
+(define_insn "nonlocal_goto_insn"
+  [(set (pc)
+	(unspec_volatile [(match_operand 0 "" "") ;; fp (ignore)
+			  (match_operand 1 "" "vi") ;; target
+			  (match_operand 2 "" "vi") ;; sp
+			  (match_operand 3 "" "vi") ;; ?
+			  ] UNS_NONLOCAL_GOTO))
+   ]
+  ""
+  "; nonlocal goto
+	movw	ax, %3
+	movw	r22, ax
+	movw	ax, %2
+	movw	sp, ax
+	movw	ax, %1
+	br	ax
+"
+  )
+
+;;======================================================================
+;;
+;; "macro" insns - cases where inline chunks of code are more
+;; efficient than anything else.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI          0 "nonimmediate_operand" "=&vm")
+	(plus:SI (match_operand:SI 1 "general_operand"      "vim")
+		 (match_operand    2 "general_operand"      "vim")))
+   ]
+  ""
+  "emit_insn (gen_addsi3_internal_virt (operands[0], operands[1], operands[2]));
+   DONE;"
+)
+
+(define_insn "addsi3_internal_virt"
+  [(set (match_operand:SI          0 "nonimmediate_operand" "=v,&vm, vm")
+	(plus:SI (match_operand:SI 1 "general_operand"      "0, vim, vim")
+		 (match_operand    2 "general_operand"      "vim,vim,vim")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_virt_insns_ok ()"
+  ""
+  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "addsi3_internal_real"
+  [(set (match_operand:SI          0 "nonimmediate_operand" "=v,&vU, vU")
+	(plus:SI (match_operand:SI 1 "general_operand"      "+0, viU, viU")
+		 (match_operand    2 "general_operand"      "viWabWhlWh1,viWabWhlWh1,viWabWhlWh1")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   movw ax,%h1 \;addw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;incw ax \;addw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;addw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;incw ax \;addw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;addw ax,%h2 \;movw bc,  ax \;movw ax,%H1 \;sknc \;incw ax \;addw ax,%H2 \;movw %H0,ax \;movw ax,bc \;movw %h0, ax"
+  [(set_attr "valloc" "macax")]
+)
+
+(define_expand "subsi3"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=&vm")
+	(minus:SI (match_operand:SI 1 "general_operand"      "vim")
+		  (match_operand    2 "general_operand"    "vim")))
+   ]
+  ""
+  "emit_insn (gen_subsi3_internal_virt (operands[0], operands[1], operands[2]));
+  DONE;"
+)
+
+(define_insn "subsi3_internal_virt"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=v,&vm, vm")
+	(minus:SI (match_operand:SI 1 "general_operand"      "0, vim, vim")
+		  (match_operand    2 "general_operand"      "vim,vim,vim")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_virt_insns_ok ()"
+  ""
+  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "subsi3_internal_real"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=v,&vU, vU")
+	(minus:SI (match_operand:SI 1 "general_operand"      "+0, viU, viU")
+		  (match_operand    2 "general_operand"      "viWabWhlWh1,viWabWhlWh1,viWabWhlWh1")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   movw ax,%h1 \;subw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;decw ax \;subw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;subw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;decw ax \;subw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;subw ax,%h2 \;movw bc,  ax \;movw ax,%H1 \;sknc \;decw ax \;subw ax,%H2 \;movw %H0,ax \;movw ax,bc \;movw %h0, ax"
+  [(set_attr "valloc" "macax")]
+)
+
+(define_expand "mulqi3"
+  [(set (match_operand:QI          0 "register_operand" "")
+	(mult:QI  (match_operand:QI 1 "general_operand" "")
+		  (match_operand:QI 2 "nonmemory_operand" "")))
+   ]
+  "" ; mulu supported by all targets
+  ""
+)
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI          0 "register_operand" "")
+	(mult:HI (match_operand:HI 1 "general_operand" "")
+		 (match_operand:HI 2 "nonmemory_operand" "")))
+   ]
+  "! RL78_MUL_NONE"
+  ""
+)
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI          0 "register_operand" "=&v")
+	(mult:SI (match_operand:SI 1 "general_operand" "+vim")
+		 (match_operand:SI 2 "nonmemory_operand" "vi")))
+   ]
+  "! RL78_MUL_NONE"
+  ""
+)
+
+(define_insn "*mulqi3_rl78"
+  [(set (match_operand:QI          0 "register_operand" "=&v")
+	(mult:QI (match_operand:QI 1 "general_operand" "+viU")
+		 (match_operand:QI 2 "general_operand" "vi")))
+   ]
+  "" ; mulu supported by all targets
+  "; mulqi macro %0 = %1 * %2
+	mov    a, %h1
+	mov    x, a
+	mov    a, %h2
+	mulu   x ; ax = a * x
+	mov    a, x
+	mov    %h0, a
+	; end of mulqi macro"
+;;  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_rl78"
+  [(set (match_operand:HI          0 "register_operand" "=&v")
+	(mult:HI (match_operand:HI 1 "general_operand" "+viU")
+		 (match_operand:HI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_RL78"
+  "; mulhi macro %0 = %1 * %2
+	movw    ax, %h1
+	movw    bc, %h2
+	mulhu   ; bcax = bc * ax
+	movw    %h0, ax
+	; end of mulhi macro"
+;;  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_g13"
+  [(set (match_operand:HI          0 "register_operand" "=&v")
+	(mult:HI (match_operand:HI 1 "general_operand" "+viU")
+		 (match_operand:HI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_G13"
+  "; mulhi macro %0 = %1 * %2
+	mov     a, #0x00
+	mov     !0xf00e8, a     ; MDUC
+	movw    ax, %h1
+	movw    0xffff0, ax     ; MDAL
+	movw    ax, %h2
+	movw    0xffff2, ax     ; MDAH
+	nop     ; mdb = mdal * mdah
+	movw    ax, 0xffff6     ; MDBL
+	movw    %h0, ax
+        ; end of mulhi macro"
+;;  [(set_attr "valloc" "umul")]
+)
+
+;; 0xFFFF0 is MACR(L).  0xFFFF2 is MACR(H) but we don't care about it
+;; because we're only using the lower 16 bits (which is the upper 16
+;; bits of the result).
+(define_insn "mulsi3_rl78"
+  [(set (match_operand:SI          0 "register_operand" "=&v")
+	(mult:SI (match_operand:SI 1 "general_operand" "+viU")
+		 (match_operand:SI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_RL78"
+  "; mulsi macro %0 = %1 * %2
+	movw	ax, %h1
+	movw	bc, %h2
+	MULHU	; bcax = bc * ax
+	movw	%h0, ax
+	movw	ax, bc
+	movw	0xffff0, ax
+	movw	ax, %H1
+	movw	bc, %h2
+	MACHU	; MACR += bc * ax
+	movw	ax, %h1
+	movw	bc, %H2
+	MACHU	; MACR += bc * ax
+	movw	ax, 0xffff0
+	movw	%H0, ax
+	; end of mulsi macro"
+  [(set_attr "valloc" "macax")]
+  )
+
+;; 0xFFFF0 is MDAL.  0xFFFF2 is MDAH.
+;; 0xFFFF6 is MDBL.  0xFFFF4 is MDBH.
+;; 0xF00E0 is MDCL.  0xF00E2 is MDCH.
+;; 0xF00E8 is MDUC.
+;; Warning: this matches the silicon not the documentation.
+(define_insn "mulsi3_g13"
+  [(set (match_operand:SI          0 "register_operand" "=&v")
+	(mult:SI (match_operand:SI 1 "general_operand" "viU")
+		 (match_operand:SI 2 "general_operand" "viU")))
+   ]
+  "RL78_MUL_G13"
+  "; mulsi macro %0 = %1 * %2
+	mov	a, #0x00
+	mov	!0xf00e8, a	; MDUC
+	movw	ax, %h1
+	movw	0xffff0, ax	; MDAL
+	movw	ax, %h2
+	movw	0xffff2, ax	; MDAH
+	nop	; mdb = mdal * mdah
+	movw	ax, 0xffff6	; MDBL
+	movw	%h0, ax
+
+	mov	a, #0x40
+	mov	!0xf00e8, a	; MDUC
+	movw	ax, 0xffff4	; MDBH
+	movw	!0xf00e0, ax	; MDCL
+	movw	ax, #0
+	movw	!0xf00e2, ax	; MDCL
+	movw	ax, %H1
+	movw	0xffff0, ax	; MDAL
+	movw	ax, %h2
+	movw	0xffff2, ax	; MDAH
+	nop	; mdc += mdal * mdah
+
+	mov	a, #0x40
+	mov	!0xf00e8, a	; MDUC
+	movw	ax, %h1
+	movw	0xffff0, ax	; MDAL
+	movw	ax, %H2
+	movw	0xffff2, ax	; MDAH
+	nop	; mdc += mdal * mdah
+	nop	; Additional nop for MAC
+	movw	ax, !0xf00e0	; MDCL
+	movw	%H0, ax
+	; end of mulsi macro"
+  [(set_attr "valloc" "macax")]
+  )
+
+(define_expand "es_addr"
+  [(unspec:SI [(reg:QI ES_REG)
+	       (match_operand:HI 0 "" "")
+	       ] UNS_ES_ADDR)]
+  ""
+  ""
+)
diff --git a/gcc-4.9/gcc/config/rl78/rl78.opt b/gcc-4.9/gcc/config/rl78/rl78.opt
new file mode 100644
index 000000000..4d2be5baf
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/rl78.opt
@@ -0,0 +1,55 @@
+; Command line options for the Renesas RL78 port of GCC.
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+;---------------------------------------------------
+
+HeaderInclude
+config/rl78/rl78-opts.h
+
+msim
+Target Report
+Use the simulator runtime.
+
+mmul=
+Target RejectNegative Joined Var(rl78_mul_type) Report Tolower Enum(rl78_mul_types) Init(MUL_NONE)
+Select hardware or software multiplication support.
+
+Enum
+Name(rl78_mul_types) Type(enum rl78_mul_types)
+
+EnumValue
+Enum(rl78_mul_types) String(none) Value(MUL_NONE)
+
+EnumValue
+Enum(rl78_mul_types) String(rl78) Value(MUL_RL78)
+
+EnumValue
+Enum(rl78_mul_types) String(g13) Value(MUL_G13)
+
+mallregs
+Target Mask(ALLREGS) Report Optimization
+Use all registers, reserving none for interrupt handlers.
+
+mrelax
+Target Report Optimization
+Enable assembler and linker relaxation.  Enabled by default at -Os.
+
+mg10
+Target Mask(G10) Report
+Target the RL78/G10 series
diff --git a/gcc-4.9/gcc/config/rl78/t-rl78 b/gcc-4.9/gcc/config/rl78/t-rl78
new file mode 100644
index 000000000..8db50e1a7
--- /dev/null
+++ b/gcc-4.9/gcc/config/rl78/t-rl78
@@ -0,0 +1,27 @@
+# Makefile fragment for building GCC for the Renesas RL78 target.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See
+# the GNU General Public License for more details.
+#
+# You should have received a copy of the  GNU General Public
+# License along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+rl78-c.o: $(srcdir)/config/rl78/rl78-c.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+# Enable multilibs:
+
+MULTILIB_OPTIONS    = mg10
+MULTILIB_DIRNAMES   = g10
diff --git a/gcc-4.9/gcc/config/rpath.opt b/gcc-4.9/gcc/config/rpath.opt
new file mode 100644
index 000000000..5fbc60175
--- /dev/null
+++ b/gcc-4.9/gcc/config/rpath.opt
@@ -0,0 +1,28 @@
+; -rpath option to the driver.
+
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rpath
+Driver Separate
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/rs6000/40x.md b/gcc-4.9/gcc/config/rs6000/40x.md
new file mode 100644
index 000000000..b2a83b163
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/40x.md
@@ -0,0 +1,120 @@
+;; Scheduling description for IBM PowerPC 403 and PowerPC 405  processors.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc40x,ppc40xiu")
+(define_cpu_unit "bpu_40x,fpu_405" "ppc40x")
+(define_cpu_unit "iu_40x" "ppc40xiu")
+
+;; PPC401 / PPC403 / PPC405 32-bit integer only  IU BPU
+;; Embedded PowerPC controller
+;; In-order execution
+;; Max issue two insns/cycle (includes one branch)
+(define_insn_reservation "ppc403-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x,iu_40x")
+
+(define_insn_reservation "ppc403-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x,iu_40x,iu_40x")
+
+(define_insn_reservation "ppc403-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x,nothing,bpu_40x")
+
+(define_insn_reservation "ppc403-imul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc403"))
+  "iu_40x*4")
+
+(define_insn_reservation "ppc405-imul" 5
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc405"))
+  "iu_40x*4")
+
+(define_insn_reservation "ppc405-imul2" 3
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "ppc405"))
+  "iu_40x*2")
+
+(define_insn_reservation "ppc405-imul3" 2
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-idiv" 33
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x*33")
+
+(define_insn_reservation "ppc403-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "bpu_40x")
+
+(define_insn_reservation "ppc403-cr" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "bpu_40x")
+
+(define_insn_reservation "ppc405-float" 11
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,\
+			fpcompare,fp,dmul,sdiv,ddiv")
+       (eq_attr "cpu" "ppc405"))
+  "fpu_405*10")
diff --git a/gcc-4.9/gcc/config/rs6000/440.md b/gcc-4.9/gcc/config/rs6000/440.md
new file mode 100644
index 000000000..7ca209b68
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/440.md
@@ -0,0 +1,133 @@
+;; Scheduling description for IBM PowerPC 440 processor.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; PPC440 Embedded PowerPC controller
+;; dual issue
+;; i_pipe - complex integer / compare / branch
+;; j_pipe - simple integer arithmetic
+;; l_pipe - load-store
+;; f_pipe - floating point arithmetic
+
+(define_automaton "ppc440_core,ppc440_apu")
+(define_cpu_unit "ppc440_i_pipe,ppc440_j_pipe,ppc440_l_pipe" "ppc440_core")
+(define_cpu_unit "ppc440_f_pipe" "ppc440_apu")
+(define_cpu_unit "ppc440_issue_0,ppc440_issue_1" "ppc440_core")
+
+(define_reservation "ppc440_issue" "ppc440_issue_0|ppc440_issue_1")
+
+
+(define_insn_reservation "ppc440-load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-store" 3
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe|ppc440_j_pipe")
+
+(define_insn_reservation "ppc440-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue_0+ppc440_issue_1,\
+   ppc440_i_pipe|ppc440_j_pipe,ppc440_i_pipe|ppc440_j_pipe")
+
+(define_insn_reservation "ppc440-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue_0+ppc440_issue_1,ppc440_i_pipe|ppc440_j_pipe,\
+   ppc440_i_pipe|ppc440_j_pipe,ppc440_i_pipe|ppc440_j_pipe")
+
+(define_insn_reservation "ppc440-imul" 3
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-imul2" 2
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-idiv" 34
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe*33")
+
+(define_insn_reservation "ppc440-branch" 1
+  (and (eq_attr "type" "branch,jmpreg,isync")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-compare" 2
+  (and (eq_attr "type" "cmp,fast_compare,compare,cr_logical,delayed_cr,mfcr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-fpcompare" 3 ; 2
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe+ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-fp" 5
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe")
+
+(define_insn_reservation "ppc440-sdiv" 19
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe*15")
+
+(define_insn_reservation "ppc440-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe*29")
+
+(define_insn_reservation "ppc440-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
diff --git a/gcc-4.9/gcc/config/rs6000/476.h b/gcc-4.9/gcc/config/rs6000/476.h
new file mode 100644
index 000000000..43e2b8f39
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/476.h
@@ -0,0 +1,32 @@
+/* Enable IBM PowerPC 476 support.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Peter Bergner (bergner@vnet.ibm.com)
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_LINK_STACK
+#define TARGET_LINK_STACK (rs6000_link_stack)
+
+#undef SET_TARGET_LINK_STACK
+#define SET_TARGET_LINK_STACK(X) do { TARGET_LINK_STACK = (X); } while (0)
+
+#undef TARGET_ASM_CODE_END
+#define TARGET_ASM_CODE_END rs6000_code_end
diff --git a/gcc-4.9/gcc/config/rs6000/476.md b/gcc-4.9/gcc/config/rs6000/476.md
new file mode 100644
index 000000000..3921bbd98
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/476.md
@@ -0,0 +1,141 @@
+;; Scheduling description for IBM PowerPC 476 processor.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner (bergner@vnet.ibm.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; PPC476 Embedded PowerPC controller
+;; 3 issue (476) / 4 issue (476fp)
+;;
+;; i_pipe  - complex integer / compare
+;; lj_pipe - load-store / simple integer arithmetic
+;; b_pipe  - branch pipe
+;; f_pipe  - floating point arithmetic
+
+(define_automaton "ppc476_core,ppc476_apu")
+
+(define_cpu_unit "ppc476_i_pipe,ppc476_lj_pipe,ppc476_b_pipe" "ppc476_core")
+(define_cpu_unit "ppc476_issue_fp,ppc476_f_pipe" "ppc476_apu")
+(define_cpu_unit "ppc476_issue_0,ppc476_issue_1,ppc476_issue_2" "ppc476_core")
+
+(define_reservation "ppc476_issue" "ppc476_issue_0|ppc476_issue_1|ppc476_issue_2")
+(define_reservation "ppc476_issue2" "ppc476_issue_0+ppc476_issue_1\
+				    |ppc476_issue_0+ppc476_issue_2\
+				    |ppc476_issue_1+ppc476_issue_2")
+(define_reservation "ppc476_issue3" "ppc476_issue_0+ppc476_issue_1+ppc476_issue_2")
+
+(define_insn_reservation "ppc476-load" 4
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-store" 4
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpstore" 4
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-simple-integer" 1
+  (and (eq_attr "type" "integer,insert_word,var_shift_rotate,exts,shift")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-complex-integer" 1
+  (and (eq_attr "type" "cmp,cr_logical,delayed_cr,cntlz,isel,isync,sync,trap,popcnt")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-compare" 4
+  (and (eq_attr "type" "compare,delayed_compare,fast_compare,mfcr,mfcrf,\
+                        mtcr,mfjmpr,mtjmpr,var_delayed_compare")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-imul" 4
+  (and (eq_attr "type" "imul,imul_compare,imul2,imul3")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-idiv" 11
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe*11")
+
+(define_insn_reservation "ppc476-branch" 1
+  (and (eq_attr "type" "branch,jmpreg")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_b_pipe")
+
+(define_insn_reservation "ppc476-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue2,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue3,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpcompare" 6
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue+ppc476_issue_fp,\
+   ppc476_f_pipe+ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,\
+   ppc476_f_pipe")
+
+(define_insn_reservation "ppc476-sdiv" 19
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,
+   ppc476_f_pipe*19")
+
+(define_insn_reservation "ppc476-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,\
+   ppc476_f_pipe*33")
+
diff --git a/gcc-4.9/gcc/config/rs6000/476.opt b/gcc-4.9/gcc/config/rs6000/476.opt
new file mode 100644
index 000000000..761c5e52b
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/476.opt
@@ -0,0 +1,24 @@
+; IBM PowerPC 476 options.
+;
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+; Contributed by Peter Bergner (bergner@vnet.ibm.com)
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mpreserve-link-stack
+Target Var(rs6000_link_stack) Init(-1) Save
+Preserve the PowerPC 476's link stack by matching up a blr with the bcl/bl insns used for GOT accesses
diff --git a/gcc-4.9/gcc/config/rs6000/601.md b/gcc-4.9/gcc/config/rs6000/601.md
new file mode 100644
index 000000000..d2f6825e5
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/601.md
@@ -0,0 +1,136 @@
+;; Scheduling description for PowerPC 601 processor.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc601,ppc601fp")
+(define_cpu_unit "iu_ppc601" "ppc601")
+(define_cpu_unit "fpu_ppc601" "ppc601fp")
+(define_cpu_unit "bpu_ppc601" "ppc601")
+
+;; PPC601  32-bit IU, FPU, BPU
+
+(define_insn_reservation "ppc601-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+		        load_l,store_c,sync")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601")
+
+(define_insn_reservation "ppc601-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601")
+
+(define_insn_reservation "ppc601-fpload" 3
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601")
+
+(define_insn_reservation "ppc601-fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601+fpu_ppc601")
+
+(define_insn_reservation "ppc601-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601")
+
+(define_insn_reservation "ppc601-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,iu_ppc601")
+
+(define_insn_reservation "ppc601-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,iu_ppc601,iu_ppc601")
+
+(define_insn_reservation "ppc601-imul" 5
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601*5")
+
+(define_insn_reservation "ppc601-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601*36")
+
+; compare executes on integer unit, but feeds insns which
+; execute on the branch unit.
+(define_insn_reservation "ppc601-compare" 3
+  (and (eq_attr "type" "cmp,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,nothing,bpu_ppc601")
+
+(define_insn_reservation "ppc601-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc601"))
+  "(fpu_ppc601+iu_ppc601*2),nothing*2,bpu_ppc601")
+
+(define_insn_reservation "ppc601-fp" 4
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_ppc601")
+
+(define_insn_reservation "ppc601-dmul" 5
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_ppc601*2")
+
+(define_insn_reservation "ppc601-sdiv" 17
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_ppc601*17")
+
+(define_insn_reservation "ppc601-ddiv" 31
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_ppc601*31")
+
+(define_insn_reservation "ppc601-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,bpu_ppc601")
+
+(define_insn_reservation "ppc601-mtcr" 4
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,bpu_ppc601")
+
+(define_insn_reservation "ppc601-crlogical" 4
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc601"))
+  "bpu_ppc601")
+
+(define_insn_reservation "ppc601-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,bpu_ppc601")
+
+(define_insn_reservation "ppc601-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc601"))
+  "iu_ppc601,bpu_ppc601")
+
+(define_insn_reservation "ppc601-branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc601"))
+  "bpu_ppc601")
+
diff --git a/gcc-4.9/gcc/config/rs6000/603.md b/gcc-4.9/gcc/config/rs6000/603.md
new file mode 100644
index 000000000..1b4141c14
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/603.md
@@ -0,0 +1,143 @@
+;; Scheduling description for PowerPC 603 processor.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc603,ppc603fp")
+(define_cpu_unit "iu_603" "ppc603")
+(define_cpu_unit "fpu_603" "ppc603fp")
+(define_cpu_unit "lsu_603,bpu_603,sru_603" "ppc603")
+
+;; PPC603/PPC603e 32-bit IU, LSU, FPU, BPU, SRU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+
+;; Branches go straight to the BPU.  All other insns are handled
+;; by a dispatch unit which can issue a max of 2 insns per cycle.
+
+;; The PPC603e user's manual recommends that to reduce branch mispredictions,
+;; the insn that sets CR bits should be separated from the branch insn
+;; that evaluates them; separation by more than 9 insns ensures that the CR
+;; bits will be immediately available for execution.
+;; This could be artificially achieved by exaggerating the latency of
+;; compare insns but at the expense of a poorer schedule.
+
+;; CR insns get executed in the SRU.  Not modelled.
+
+(define_insn_reservation "ppc603-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ux,load_u,load_l")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603")
+
+(define_insn_reservation "ppc603-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603*2")
+
+(define_insn_reservation "ppc603-fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603")
+
+(define_insn_reservation "ppc603-storec" 8
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603")
+
+(define_insn_reservation "ppc603-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603")
+
+(define_insn_reservation "ppc603-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603,iu_603")
+
+(define_insn_reservation "ppc603-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603,iu_603,iu_603")
+
+; This takes 2 or 3 cycles
+(define_insn_reservation "ppc603-imul" 3
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603*2")
+
+(define_insn_reservation "ppc603-imul2" 2
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603*2")
+
+(define_insn_reservation "ppc603-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603*37")
+
+(define_insn_reservation "ppc603-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603,nothing,bpu_603")
+
+(define_insn_reservation "ppc603-fpcompare" 3
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc603"))
+  "(fpu_603+iu_603*2),bpu_603")
+
+(define_insn_reservation "ppc603-fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603")
+
+(define_insn_reservation "ppc603-dmul" 4
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603*2")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc603-sdiv" 18
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603*18")
+
+(define_insn_reservation "ppc603-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603*33")
+
+(define_insn_reservation "ppc603-crlogical" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr,mfcr,mtcr")
+       (eq_attr "cpu" "ppc603"))
+  "sru_603")
+
+(define_insn_reservation "ppc603-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc603"))
+  "sru_603")
+
+(define_insn_reservation "ppc603-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr,isync,sync")
+       (eq_attr "cpu" "ppc603"))
+  "sru_603")
+
+(define_insn_reservation "ppc603-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "ppc603"))
+  "bpu_603")
+
diff --git a/gcc-4.9/gcc/config/rs6000/6xx.md b/gcc-4.9/gcc/config/rs6000/6xx.md
new file mode 100644
index 000000000..65e06674a
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/6xx.md
@@ -0,0 +1,275 @@
+;; Scheduling description for PowerPC 604, PowerPC 604e, PowerPC 620,
+;; and PowerPC 630 processors.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc6xx,ppc6xxfp,ppc6xxfp2")
+(define_cpu_unit "iu1_6xx,iu2_6xx,mciu_6xx" "ppc6xx")
+(define_cpu_unit "fpu_6xx" "ppc6xxfp")
+(define_cpu_unit "fpu1_6xx,fpu2_6xx" "ppc6xxfp2")
+(define_cpu_unit "lsu_6xx,bpu_6xx,cru_6xx" "ppc6xx")
+
+;; PPC604  32-bit 2xSCIU, MCIU, LSU, FPU, BPU
+;; PPC604e  32-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU
+;; MCIU used for imul/idiv and moves from/to spr
+;; LSU 2 stage pipelined
+;; FPU 3 stage pipelined
+;; Max issue 4 insns/clock cycle
+
+;; PPC604e is PPC604 with larger caches and a CRU.  In the 604
+;; the CR logical operations are handled in the BPU.
+;; In the 604e, the CRU shares bus with BPU so only one condition
+;; register or branch insn can be issued per clock.  Not modelled.
+
+;; PPC620  64-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU
+;; PPC630 64-bit 2xSCIU, MCIU, LSU, 2xFPU, BPU, CRU
+;; Max issue 4 insns/clock cycle
+;; Out-of-order execution, in-order completion
+
+;; No following instruction can dispatch in the same cycle as a branch
+;; instruction.  Not modelled.  This is no problem if RCSP is not
+;; enabled since the scheduler stops a schedule when it gets to a branch.
+
+;; Four insns can be dispatched per cycle.
+
+(define_insn_reservation "ppc604-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "lsu_6xx")
+
+(define_insn_reservation "ppc604-fpload" 3
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "lsu_6xx")
+
+(define_insn_reservation "ppc604-store" 3
+  (and (eq_attr "type" "store,fpstore,store_ux,store_u,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "lsu_6xx")
+
+(define_insn_reservation "ppc604-llsc" 3
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "lsu_6xx")
+  
+(define_insn_reservation "ppc630-llsc" 4
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "lsu_6xx")
+  
+(define_insn_reservation "ppc604-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-imul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc604"))
+  "mciu_6xx*2")
+
+(define_insn_reservation "ppc604e-imul" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc604e"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc620-imul" 5
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*3")
+
+(define_insn_reservation "ppc620-imul2" 4
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*3")
+
+(define_insn_reservation "ppc620-imul3" 3
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*3")
+
+(define_insn_reservation "ppc620-lmul" 7
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*5")
+
+(define_insn_reservation "ppc604-idiv" 20
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "mciu_6xx*19")
+
+(define_insn_reservation "ppc620-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc620"))
+  "mciu_6xx*36")
+
+(define_insn_reservation "ppc630-idiv" 21
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc630"))
+  "mciu_6xx*20")
+
+(define_insn_reservation "ppc620-ldiv" 37
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*36")
+
+(define_insn_reservation "ppc604-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "(iu1_6xx|iu2_6xx)")
+
+; FPU PPC604{,e},PPC620
+(define_insn_reservation "ppc604-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx")
+
+(define_insn_reservation "ppc604-fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx")
+
+(define_insn_reservation "ppc604-dmul" 3
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc604-sdiv" 18
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx*18")
+
+(define_insn_reservation "ppc604-ddiv" 32
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx*32")
+
+(define_insn_reservation "ppc620-ssqrt" 31
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppc620"))
+  "fpu_6xx*31")
+
+(define_insn_reservation "ppc620-dsqrt" 31
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppc620"))
+  "fpu_6xx*31")
+
+
+; 2xFPU PPC630
+(define_insn_reservation "ppc630-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx|fpu2_6xx")
+
+(define_insn_reservation "ppc630-fp" 3
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx|fpu2_6xx")
+
+(define_insn_reservation "ppc630-sdiv" 17
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*17|fpu2_6xx*17")
+
+(define_insn_reservation "ppc630-ddiv" 21
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*21|fpu2_6xx*21")
+
+(define_insn_reservation "ppc630-ssqrt" 18
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*18|fpu2_6xx*18")
+
+(define_insn_reservation "ppc630-dsqrt" 25
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*25|fpu2_6xx*25")
+
+(define_insn_reservation "ppc604-mfcr" 3
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc604-mtcr" 2
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-crlogical" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc604"))
+  "bpu_6xx")
+
+(define_insn_reservation "ppc604e-crlogical" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc604e,ppc620,ppc630"))
+  "cru_6xx")
+
+(define_insn_reservation "ppc604-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc604-mfjmpr" 3
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc630-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc630"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc604-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "bpu_6xx")
+
+(define_insn_reservation "ppc604-isync" 0
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "bpu_6xx")
+  
+(define_insn_reservation "ppc630-isync" 6
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "bpu_6xx")
+  
+(define_insn_reservation "ppc604-sync" 35
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "lsu_6xx")
+  
+(define_insn_reservation "ppc630-sync" 26
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "lsu_6xx")
+  
diff --git a/gcc-4.9/gcc/config/rs6000/7450.md b/gcc-4.9/gcc/config/rs6000/7450.md
new file mode 100644
index 000000000..7a9bbdd5f
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/7450.md
@@ -0,0 +1,185 @@
+;; Scheduling description for Motorola PowerPC 7450 processor.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc7450,ppc7450mciu,ppc7450fp,ppc7450vec")
+(define_cpu_unit "iu1_7450,iu2_7450,iu3_7450" "ppc7450")
+(define_cpu_unit "mciu_7450" "ppc7450mciu")
+(define_cpu_unit "fpu_7450" "ppc7450fp")
+(define_cpu_unit "lsu_7450,bpu_7450" "ppc7450")
+(define_cpu_unit "du1_7450,du2_7450,du3_7450" "ppc7450")
+(define_cpu_unit "vecsmpl_7450,veccmplx_7450,vecflt_7450,vecperm_7450" "ppc7450vec")
+(define_cpu_unit "vdu1_7450,vdu2_7450" "ppc7450vec")
+
+
+;; PPC7450  32-bit 3xIU, MCIU, LSU, SRU, FPU, BPU, 4xVEC
+;; IU1,IU2,IU3 can perform all integer operations
+;; MCIU performs imul and idiv, cr logical, SPR moves
+;; LSU 2 stage pipelined
+;; FPU 3 stage pipelined
+;; It also has 4 vector units, one for each type of vector instruction.
+;; However, we can only dispatch 2 instructions per cycle. 
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+;; In-order execution
+
+;; Branches go straight to the BPU.  All other insns are handled
+;; by a dispatch unit which can issue a max of 3 insns per cycle.
+(define_reservation "ppc7450_du" "du1_7450|du2_7450|du3_7450")
+(define_reservation "ppc7450_vec_du" "vdu1_7450|vdu2_7450")
+
+(define_insn_reservation "ppc7450-load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\
+		        load_ux,load_u,vecload")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,vecstore")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450*3")
+
+(define_insn_reservation "ppc7450-llsc" 3
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-sync" 35
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,iu1_7450|iu2_7450|iu3_7450")
+
+(define_insn_reservation "ppc7450-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,iu1_7450|iu2_7450|iu3_7450,iu1_7450|iu2_7450|iu3_7450")
+
+(define_insn_reservation "ppc7450-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,iu1_7450|iu2_7450|iu3_7450,\
+   iu1_7450|iu2_7450|iu3_7450,iu1_7450|iu2_7450|iu3_7450")
+
+(define_insn_reservation "ppc7450-imul" 4
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450*2")
+
+(define_insn_reservation "ppc7450-imul2" 3
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450")
+
+(define_insn_reservation "ppc7450-idiv" 23
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450*23")
+
+(define_insn_reservation "ppc7450-compare" 2
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,(iu1_7450|iu2_7450|iu3_7450)")
+
+(define_insn_reservation "ppc7450-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450")
+
+(define_insn_reservation "ppc7450-fp" 5
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc7450-sdiv" 21
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450*21")
+
+(define_insn_reservation "ppc7450-ddiv" 35
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450*35")
+
+(define_insn_reservation "ppc7450-mfcr" 2
+  (and (eq_attr "type" "mfcr,mtcr")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450")
+
+(define_insn_reservation "ppc7450-crlogical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450")
+
+(define_insn_reservation "ppc7450-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc7450"))
+  "nothing,mciu_7450*2")
+
+(define_insn_reservation "ppc7450-mfjmpr" 3
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc7450"))
+  "nothing,mciu_7450*2")
+
+(define_insn_reservation "ppc7450-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc7450"))
+  "nothing,bpu_7450")
+
+;; Altivec
+(define_insn_reservation "ppc7450-vecsimple" 1
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,vecsmpl_7450")
+
+(define_insn_reservation "ppc7450-veccomplex" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,veccmplx_7450")
+
+(define_insn_reservation "ppc7450-veccmp" 2
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,veccmplx_7450")
+
+(define_insn_reservation "ppc7450-vecfloat" 4
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,vecflt_7450")
+
+(define_insn_reservation "ppc7450-vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,vecperm_7450")
+
diff --git a/gcc-4.9/gcc/config/rs6000/750cl.h b/gcc-4.9/gcc/config/rs6000/750cl.h
new file mode 100644
index 000000000..218226bb1
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/750cl.h
@@ -0,0 +1,30 @@
+/* Enable 750cl paired single support.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+   Contributed by Revital Eres (eres@il.ibm.com)
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_PAIRED_FLOAT
+#define TARGET_PAIRED_FLOAT rs6000_paired_float
+
+#undef ASM_CPU_SPEC 
+#define ASM_CPU_SPEC "-m750cl"
+
diff --git a/gcc-4.9/gcc/config/rs6000/7xx.md b/gcc-4.9/gcc/config/rs6000/7xx.md
new file mode 100644
index 000000000..30c2d2648
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/7xx.md
@@ -0,0 +1,184 @@
+;; Scheduling description for Motorola PowerPC 750 and PowerPC 7400 processors.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc7xx,ppc7xxfp")
+(define_cpu_unit "iu1_7xx,iu2_7xx" "ppc7xx")
+(define_cpu_unit "fpu_7xx" "ppc7xxfp")
+(define_cpu_unit "lsu_7xx,bpu_7xx,sru_7xx" "ppc7xx")
+(define_cpu_unit "du1_7xx,du2_7xx" "ppc7xx")
+(define_cpu_unit "veccmplx_7xx,vecperm_7xx,vdu_7xx" "ppc7xx")
+
+;; PPC740/PPC750/PPC7400  32-bit 2xIU, LSU, SRU, FPU, BPU
+;; IU1 can perform all integer operations
+;; IU2 can perform all integer operations except imul and idiv
+;; LSU 2 stage pipelined
+;; FPU 3 stage pipelined
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+;; In-order execution
+
+
+;; The PPC750 user's manual recommends that to reduce branch mispredictions,
+;; the insn that sets CR bits should be separated from the branch insn
+;; that evaluates them.  There is no advantage have more than 10 cycles
+;; of separation.
+;; This could be artificially achieved by exaggerating the latency of
+;; compare insns but at the expense of a poorer schedule.
+
+;; Branches go straight to the BPU.  All other insns are handled
+;; by a dispatch unit which can issue a max of 2 insns per cycle.
+(define_reservation "ppc750_du" "du1_7xx|du2_7xx")
+(define_reservation "ppc7400_vec_du" "vdu_7xx")
+
+(define_insn_reservation "ppc750-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\
+		        load_ux,load_u,fpload,fpload_ux,fpload_u,\
+			vecload,load_l")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,lsu_7xx")
+
+(define_insn_reservation "ppc750-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,\
+		        fpstore,fpstore_ux,fpstore_u,vecstore")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,lsu_7xx")
+
+(define_insn_reservation "ppc750-storec" 8
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,lsu_7xx")
+
+(define_insn_reservation "ppc750-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx|iu2_7xx")
+
+(define_insn_reservation "ppc750-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx")
+
+(define_insn_reservation "ppc750-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx")
+
+(define_insn_reservation "ppc750-imul" 4
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx*4")
+
+(define_insn_reservation "ppc750-imul2" 3
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx*2")
+
+(define_insn_reservation "ppc750-imul3" 2
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx")
+
+(define_insn_reservation "ppc750-idiv" 19
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx*19")
+
+(define_insn_reservation "ppc750-compare" 2
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,(iu1_7xx|iu2_7xx)")
+
+(define_insn_reservation "ppc750-fpcompare" 2
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx")
+
+(define_insn_reservation "ppc750-fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx")
+
+(define_insn_reservation "ppc750-dmul" 4
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc750"))
+  "ppc750_du,fpu_7xx*2")
+
+(define_insn_reservation "ppc7400-dmul" 3
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,fpu_7xx")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc750-sdiv" 17
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx*17")
+
+(define_insn_reservation "ppc750-ddiv" 31
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx*31")
+
+(define_insn_reservation "ppc750-mfcr" 2
+  (and (eq_attr "type" "mfcr,mtcr")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx")
+
+(define_insn_reservation "ppc750-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,sru_7xx*2")
+
+(define_insn_reservation "ppc750-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr,isync,sync")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,sru_7xx*2")
+
+(define_insn_reservation "ppc750-mfjmpr" 3
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,sru_7xx*2")
+
+(define_insn_reservation "ppc750-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,bpu_7xx")
+
+;; Altivec
+(define_insn_reservation "ppc7400-vecsimple" 1
+  (and (eq_attr "type" "vecsimple,veccmp")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,veccmplx_7xx")
+
+(define_insn_reservation "ppc7400-veccomplex" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,veccmplx_7xx")
+
+(define_insn_reservation "ppc7400-vecfloat" 4
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,veccmplx_7xx")
+
+(define_insn_reservation "ppc7400-vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,vecperm_7xx")
+
diff --git a/gcc-4.9/gcc/config/rs6000/8540.md b/gcc-4.9/gcc/config/rs6000/8540.md
new file mode 100644
index 000000000..7913bcdd1
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/8540.md
@@ -0,0 +1,250 @@
+;; Pipeline description for Motorola PowerPC 8540 processor.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc8540_most,ppc8540_long,ppc8540_retire")
+(define_cpu_unit "ppc8540_decode_0,ppc8540_decode_1" "ppc8540_most")
+
+;; We don't simulate general issue queue (GIC).  If we have SU insn
+;; and then SU1 insn, they cannot be issued on the same cycle
+;; (although SU1 insn and then SU insn can be issued) because the SU
+;; insn will go to SU1 from GIC0 entry.  Fortunately, the first cycle
+;; multipass insn scheduling will find the situation and issue the SU1
+;; insn and then the SU insn.
+(define_cpu_unit "ppc8540_issue_0,ppc8540_issue_1"   "ppc8540_most")
+
+;; We could describe completion buffers slots in combination with the
+;; retirement units and the order of completion but the result
+;; automaton would behave in the same way because we cannot describe
+;; real latency time with taking in order completion into account.
+;; Actually we could define the real latency time by querying reserved
+;; automaton units but the current scheduler uses latency time before
+;; issuing insns and making any reservations.
+;;
+;; So our description is aimed to achieve a insn schedule in which the
+;; insns would not wait in the completion buffer.
+(define_cpu_unit "ppc8540_retire_0,ppc8540_retire_1" "ppc8540_retire")
+
+;; Branch unit:
+(define_cpu_unit "ppc8540_bu" "ppc8540_most")
+
+;; SU:
+(define_cpu_unit "ppc8540_su0_stage0,ppc8540_su1_stage0" "ppc8540_most")
+
+;; We could describe here MU subunits for float multiply, float add
+;; etc.  But the result automaton would behave the same way as the
+;; described one pipeline below because MU can start only one insn
+;; per cycle.  Actually we could simplify the automaton more not
+;; describing stages 1-3, the result automata would be the same.
+(define_cpu_unit "ppc8540_mu_stage0,ppc8540_mu_stage1" "ppc8540_most")
+(define_cpu_unit "ppc8540_mu_stage2,ppc8540_mu_stage3" "ppc8540_most")
+
+;; The following unit is used to describe non-pipelined division.
+(define_cpu_unit "ppc8540_mu_div" "ppc8540_long")
+
+;; Here we simplified LSU unit description not describing the stages.
+(define_cpu_unit "ppc8540_lsu" "ppc8540_most")
+
+;; The following units are used to make automata deterministic
+(define_cpu_unit "present_ppc8540_decode_0" "ppc8540_most")
+(define_cpu_unit "present_ppc8540_issue_0" "ppc8540_most")
+(define_cpu_unit "present_ppc8540_retire_0" "ppc8540_retire")
+(define_cpu_unit "present_ppc8540_su0_stage0" "ppc8540_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_ppc8540_decode_0" "ppc8540_decode_0")
+(presence_set "present_ppc8540_issue_0" "ppc8540_issue_0")
+(presence_set "present_ppc8540_retire_0" "ppc8540_retire_0")
+(presence_set "present_ppc8540_su0_stage0" "ppc8540_su0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "ppc8540_decode"
+    "ppc8540_decode_0|ppc8540_decode_1+present_ppc8540_decode_0")
+(define_reservation "ppc8540_issue"
+    "ppc8540_issue_0|ppc8540_issue_1+present_ppc8540_issue_0")
+(define_reservation "ppc8540_retire"
+   "ppc8540_retire_0|ppc8540_retire_1+present_ppc8540_retire_0")
+(define_reservation "ppc8540_su_stage0"
+   "ppc8540_su0_stage0|ppc8540_su1_stage0+present_ppc8540_su0_stage0")
+
+;; Simple SU insns
+(define_insn_reservation "ppc8540_su" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\
+                        delayed_compare,var_delayed_compare,fast_compare,\
+                        shift,trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+(define_insn_reservation "ppc8540_two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
+   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+(define_insn_reservation "ppc8540_three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
+   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
+   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Branch.  Actually this latency time is not used by the scheduler.
+(define_insn_reservation "ppc8540_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_bu,ppc8540_retire")
+
+;; Multiply
+(define_insn_reservation "ppc8540_multiply" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; Divide.  We use the average latency time here.  We omit reserving a
+;; retire unit because of the result automata will be huge.  We ignore
+;; reservation of miu_stage3 here because we use the average latency
+;; time.
+(define_insn_reservation "ppc8540_divide" 14
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
+   ppc8540_mu_div*13")
+
+;; CR logical
+(define_insn_reservation "ppc8540_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_bu,ppc8540_retire")
+
+;; Mfcr
+(define_insn_reservation "ppc8540_mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Mtcrf
+(define_insn_reservation "ppc8540_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Mtjmpr
+(define_insn_reservation "ppc8540_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Loads
+(define_insn_reservation "ppc8540_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
+
+;; Stores.
+(define_insn_reservation "ppc8540_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
+
+;; Simple FP
+(define_insn_reservation "ppc8540_simple_float" 1
+  (and (eq_attr "type" "fpsimple")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; FP
+(define_insn_reservation "ppc8540_float" 4
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; float divides.  We omit reserving a retire unit and miu_stage3
+;; because of the result automata will be huge.
+(define_insn_reservation "ppc8540_float_vector_divide" 29
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
+   ppc8540_mu_div*28")
+
+;; Brinc
+(define_insn_reservation "ppc8540_brinc" 1
+  (and (eq_attr "type" "brinc")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Simple vector
+(define_insn_reservation "ppc8540_simple_vector" 1
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Simple vector compare
+(define_insn_reservation "ppc8540_simple_vector_compare" 1
+  (and (eq_attr "type" "veccmpsimple")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Vector compare
+(define_insn_reservation "ppc8540_vector_compare" 1
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; evsplatfi evsplati
+(define_insn_reservation "ppc8540_vector_perm" 1
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Vector float
+(define_insn_reservation "ppc8540_float_vector" 4
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; Vector divides: Use the average.  We omit reserving a retire unit
+;; because of the result automata will be huge.  We ignore reservation
+;; of miu_stage3 here because we use the average latency time.
+(define_insn_reservation "ppc8540_vector_divide" 14
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
+   ppc8540_mu_div*13")
+
+;; Complex vector.
+(define_insn_reservation "ppc8540_complex_vector" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; Vector load
+(define_insn_reservation "ppc8540_vector_load" 3
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
+
+;; Vector store
+(define_insn_reservation "ppc8540_vector_store" 3
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "ppc8540,ppc8548"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
diff --git a/gcc-4.9/gcc/config/rs6000/a2.md b/gcc-4.9/gcc/config/rs6000/a2.md
new file mode 100644
index 000000000..b51168898
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/a2.md
@@ -0,0 +1,134 @@
+;; Scheduling description for PowerPC A2 processors.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Ben Elliston (bje@au.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppca2")
+
+;; CPU units
+
+;; The multiplier pipeline.
+(define_cpu_unit "mult" "ppca2")
+
+;; The auxiliary processor unit (FP/vector unit).
+(define_cpu_unit "axu" "ppca2")
+
+;; D.4.6
+;; Some peculiarities for certain SPRs
+
+(define_insn_reservation "ppca2-mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppca2"))
+   "nothing")
+
+(define_insn_reservation "ppca2-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+(define_insn_reservation "ppca2-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; D.4.8
+(define_insn_reservation "ppca2-imul" 1
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; FIXME: latency and multiplier reservation for 64-bit multiply?
+(define_insn_reservation "ppca2-lmul" 6
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "ppca2"))
+  "mult*3")
+
+;; D.4.9
+(define_insn_reservation "ppca2-idiv" 32
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppca2"))
+  "mult*32")
+
+(define_insn_reservation "ppca2-ldiv" 65
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppca2"))
+  "mult*65")
+
+;; D.4.13
+(define_insn_reservation "ppca2-load" 5
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; D.8.1
+(define_insn_reservation "ppca2-fp" 6
+  (and (eq_attr "type" "fp")     	   ;; Ignore fpsimple insn types (SPE only).
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.4
+(define_insn_reservation "ppca2-fp-load" 6
+  (and (eq_attr "type" "fpload,fpload_u,fpload_ux")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.5
+(define_insn_reservation "ppca2-fp-store" 2
+  (and (eq_attr "type" "fpstore,fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.6
+(define_insn_reservation "ppca2-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.7
+;;
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed.  Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-ddiv" 72
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppca2"))
+   "axu")
+
+(define_insn_reservation "ppca2-sdiv" 59
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppca2"))
+   "axu")
+
+;; D.8.8
+;; 
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed.  Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-dsqrt" 69
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+(define_insn_reservation "ppca2-ssqrt" 65
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
diff --git a/gcc-4.9/gcc/config/rs6000/aix-stdint.h b/gcc-4.9/gcc/config/rs6000/aix-stdint.h
new file mode 100644
index 000000000..0f49bb018
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix-stdint.h
@@ -0,0 +1,51 @@
+/* Definitions for <stdint.h> types on systems using AIX.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
diff --git a/gcc-4.9/gcc/config/rs6000/aix.h b/gcc-4.9/gcc/config/rs6000/aix.h
new file mode 100644
index 000000000..5ab018486
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix.h
@@ -0,0 +1,225 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Yes!  We are AIX!  */
+#define DEFAULT_ABI ABI_AIX
+#undef  TARGET_AIX
+#define TARGET_AIX 1
+
+/* Linux64.h wants to redefine TARGET_AIX based on -m64, but it can't be used
+   in the #if conditional in options-default.h, so provide another macro.  */
+#undef  TARGET_AIX_OS
+#define TARGET_AIX_OS 1
+
+/* AIX always has a TOC.  */
+#define TARGET_NO_TOC 0
+#define TARGET_TOC 1
+#define FIXED_R2 1
+
+/* AIX allows r13 to be used in 32-bit mode.  */
+#define FIXED_R13 0
+
+/* 32-bit and 64-bit AIX stack boundary is 128.  */
+#undef  STACK_BOUNDARY
+#define STACK_BOUNDARY 128
+
+#undef  TARGET_IEEEQUAD
+#define TARGET_IEEEQUAD 0
+
+/* The AIX linker will discard static constructors in object files before
+   collect has a chance to see them, so scan the object files directly.  */
+#define COLLECT_EXPORT_LIST
+
+/* On AIX, initialisers specified with -binitfini are called in breadth-first
+   order.
+   e.g. if a.out depends on lib1.so, the init function for a.out is called before
+   the init function for lib1.so.
+
+   To ensure global C++ constructors in linked libraries are run before global
+   C++ constructors from the current module, there is additional symbol scanning
+   logic in collect2.
+
+   The global initialiser/finaliser functions are named __GLOBAL_AIXI_{libname}
+   and __GLOBAL_AIXD_{libname} and are exported from each shared library.
+
+   collect2 will detect these symbols when they exist in shared libraries that
+   the current program is being linked against.  All such initiliser functions
+   will be called prior to the constructors of the current program, and
+   finaliser functions called after destructors.
+
+   Reference counting generated by collect2 will ensure that constructors are
+   only invoked once in the case of multiple dependencies on a library.
+
+   -binitfini is still used in parallel to this solution.
+   This handles the case where a library is loaded through dlopen(), and also
+   handles the option -blazy.
+*/
+#define COLLECT_SHARED_INIT_FUNC(STREAM, FUNC) \
+	  fprintf ((STREAM), "void %s() {\n\t%s();\n}\n", aix_shared_initname, (FUNC))
+#define COLLECT_SHARED_FINI_FUNC(STREAM, FUNC) \
+	  fprintf ((STREAM), "void %s() {\n\t%s();\n}\n", aix_shared_fininame, (FUNC))
+
+#if HAVE_AS_REF
+/* Issue assembly directives that create a reference to the given DWARF table
+   identifier label from the current function section.  This is defined to
+   ensure we drag frame frame tables associated with needed function bodies in
+   a link with garbage collection activated.  */
+#define ASM_OUTPUT_DWARF_TABLE_REF rs6000_aix_asm_output_dwarf_table_ref
+#endif
+
+/* This is the only version of nm that collect2 can work with.  */
+#define REAL_NM_FILE_NAME "/usr/ucb/nm"
+
+#define USER_LABEL_PREFIX  ""
+
+/* Don't turn -B into -L if the argument specifies a relative file name.  */
+#define RELATIVE_PREFIX_NOT_LINKDIR
+
+/* Because of the above, we must have gcc search itself to find libgcc.a.  */
+#define LINK_LIBGCC_SPECIAL_1
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_OS_AIX_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("_IBMR2");		\
+      builtin_define ("_POWER");		\
+      builtin_define ("__powerpc__");           \
+      builtin_define ("__PPC__");               \
+      builtin_define ("__unix__");              \
+      builtin_define ("_AIX");			\
+      builtin_define ("_AIX32");		\
+      builtin_define ("_AIX41");		\
+      builtin_define ("_LONG_LONG");		\
+      if (TARGET_LONG_DOUBLE_128)		\
+        builtin_define ("__LONGDOUBLE128");	\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=aix");		\
+    }						\
+  while (0)
+
+/* Define appropriate architecture macros for preprocessor depending on
+   target switches.  */
+
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}\
+   %{ansi: -D_ANSI_C_SOURCE}"
+
+#define CC1_SPEC "%(cc1_cpu)"
+
+#undef ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC ""
+
+/* Tell the assembler to assume that all undefined names are external.
+
+   Don't do this until the fixed IBM assembler is more generally available.
+   When this becomes permanently defined, the ASM_OUTPUT_EXTERNAL,
+   ASM_OUTPUT_EXTERNAL_LIBCALL, and RS6000_OUTPUT_BASENAME macros will no
+   longer be needed.  Also, the extern declaration of mcount in 
+   rs6000_xcoff_file_start will no longer be needed.  */
+
+/* #define ASM_SPEC "-u %(asm_cpu)" */
+
+/* Default location of syscalls.exp under AIX */
+#define LINK_SYSCALLS_SPEC "-bI:%R/lib/syscalls.exp"
+
+/* Default location of libg.exp under AIX */
+#define LINK_LIBG_SPEC "-bexport:%R/usr/lib/libg.exp"
+
+/* Define the options for the binder: Start text at 512, align all segments
+   to 512 bytes, and warn if there is text relocation.
+
+   The -bhalt:4 option supposedly changes the level at which ld will abort,
+   but it also suppresses warnings about multiply defined symbols and is
+   used by the AIX cc command.  So we use it here.
+
+   -bnodelcsect undoes a poor choice of default relating to multiply-defined
+   csects.  See AIX documentation for more information about this.
+
+   -bM:SRE tells the linker that the output file is Shared REusable.  Note
+   that to actually build a shared library you will also need to specify an
+   export list with the -Wl,-bE option.  */
+
+#define LINK_SPEC "-T512 -H512 %{!r:-btextro} -bhalt:4 -bnodelcsect\
+%{static:-bnso %(link_syscalls) } \
+%{!shared:%{g*: %(link_libg) }} %{shared:-bM:SRE}"
+
+/* Profiled library versions are used by linking with special directories.  */
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+%{p:-L%R/lib/profiled -L%R/usr/lib/profiled} %{!shared:%{g*:-lg}} -lc"
+
+/* Static linking with shared libstdc++ requires libsupc++ as well.  */
+#define LIBSTDCXX_STATIC "supc++"
+
+/* This now supports a natural alignment mode.  */
+/* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+  ((TARGET_ALIGN_NATURAL == 0						\
+    && TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode)	\
+   ? MIN ((COMPUTED), 32)						\
+   : (COMPUTED))
+
+/* AIX increases natural record alignment to doubleword if the first
+   field is an FP double while the FP fields remain word aligned.  */
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			\
+  ((TREE_CODE (STRUCT) == RECORD_TYPE					\
+    || TREE_CODE (STRUCT) == UNION_TYPE					\
+    || TREE_CODE (STRUCT) == QUAL_UNION_TYPE)				\
+   && TARGET_ALIGN_NATURAL == 0						\
+   ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED)	\
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* The AIX ABI isn't explicit on whether aggregates smaller than a
+   word/doubleword should be padded upward or downward.  One could
+   reasonably assume that they follow the normal rules for structure
+   layout treating the parameter area as any other block of memory,
+   then map the reg param area to registers, i.e., pad upward, which
+   is the way IBM Compilers for AIX behave.
+   Setting both of the following defines results in this behavior.  */
+#define AGGREGATE_PADDING_FIXED 1
+#define AGGREGATES_PAD_UPWARD_ALWAYS 1
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+/* Indicate that jump tables go in the text section.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Define any extra SPECS that the compiler needs to generate.  */
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "link_syscalls",            LINK_SYSCALLS_SPEC },			\
+  { "link_libg",                LINK_LIBG_SPEC }
+
+#define PROFILE_HOOK(LABEL)   output_profile_hook (LABEL)
+
+/* No version of AIX fully supports AltiVec or 64-bit instructions in
+   32-bit mode.  */
+#define OS_MISSING_POWERPC64 1
+#define OS_MISSING_ALTIVEC 1
+
+/* WINT_TYPE */
+#define WINT_TYPE "int"
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc-4.9/gcc/config/rs6000/aix43.h b/gcc-4.9/gcc/config/rs6000/aix43.h
new file mode 100644
index 000000000..1274d4489
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix43.h
@@ -0,0 +1,164 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX version 4.3.
+   Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_POWERPC64;			\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (global_options_set.x_rs6000_long_double_type_size)		\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{!mpowerpc64: %(asm_default)} \
+  %{mpowerpc64: -mppc64}}} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -m620} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=601: -m601} \
+%{mcpu=602: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}\
+   %{ansi: -D_ANSI_C_SOURCE}\
+   %{maix64: -D__64BIT__}\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}\
+   %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  */
+#undef CPLUSPLUS_CPP_SPEC			
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604e
+
+/* AIX does not support Altivec.  */
+#undef  TARGET_ALTIVEC
+#define TARGET_ALTIVEC 0
+#undef  TARGET_ALTIVEC_ABI
+#define TARGET_ALTIVEC_ABI 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+#define	MULTILIB_DEFAULTS { "mcpu=common" }
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-L%R/usr/lib/threads -lpthreads -lc_r %R/usr/lib/libc.a}\
+   %{!pthread:-lc}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX 4.3 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* The IBM AIX 4.x assembler doesn't support forward references in
+   .set directives.  We handle this by deferring the output of .set
+   directives to the end of the compilation unit.  */
+#define TARGET_DEFERRED_OUTPUT_DEFS(DECL,TARGET) true
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+#define TARGET_AIX_VERSION 43
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc-4.9/gcc/config/rs6000/aix51.h b/gcc-4.9/gcc/config/rs6000/aix51.h
new file mode 100644
index 000000000..caca74bb3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix51.h
@@ -0,0 +1,168 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V5.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_POWERPC64;			\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{!mpowerpc64: %(asm_default)} \
+  %{mpowerpc64: -mppc64}}} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -m620} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=601: -m601} \
+%{mcpu=602: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m620} \
+%{mcpu=G5: -m620}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604e
+
+/* AIX does not support Altivec.  */
+#undef  TARGET_ALTIVEC
+#define TARGET_ALTIVEC 0
+#undef  TARGET_ALTIVEC_ABI
+#define TARGET_ALTIVEC_ABI 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+#define	MULTILIB_DEFAULTS { "mcpu=common" }
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 51
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc-4.9/gcc/config/rs6000/aix52.h b/gcc-4.9/gcc/config/rs6000/aix52.h
new file mode 100644
index 000000000..a90c926b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix52.h
@@ -0,0 +1,181 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V5.2.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_POWERPC64;			\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (global_options_set.x_rs6000_long_double_type_size)		\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpowerpc64: -mppc64} \
+  %{!mpowerpc64: %(asm_default)}}} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -m620} \
+%{mcpu=power5: -m620} \
+%{mcpu=power5+: -m620} \
+%{mcpu=power6: -m620} \
+%{mcpu=power6x: -m620} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m620} \
+%{mcpu=G5: -m620}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      builtin_define ("_AIX52");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  Synchronize with libstdc++ os_defines.h.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER4
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4
+
+/* AIX does not support Altivec.  */
+#undef  TARGET_ALTIVEC
+#define TARGET_ALTIVEC 0
+#undef  TARGET_ALTIVEC_ABI
+#define TARGET_ALTIVEC_ABI 0
+#undef  TARGET_EXTRA_BUILTINS
+#define TARGET_EXTRA_BUILTINS 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+#ifndef _AIX52
+extern long long int    atoll(const char *);  
+#endif
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 52
diff --git a/gcc-4.9/gcc/config/rs6000/aix53.h b/gcc-4.9/gcc/config/rs6000/aix53.h
new file mode 100644
index 000000000..65887c904
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix53.h
@@ -0,0 +1,181 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V5.3.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_POWERPC64;			\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (global_options_set.x_rs6000_long_double_type_size)		\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets for
+   handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
+   provide the default assembler options if the user uses -mcpu=native, so if
+   you make changes here, make them there also.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpowerpc64: -mppc64} \
+  %{maltivec: -m970} \
+  %{!maltivec: %{!mpowerpc64: %(asm_default)}}}} \
+%{mcpu=native: %(asm_cpu_native)} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -mpwr4} \
+%{mcpu=power5: -mpwr5} \
+%{mcpu=power5+: -mpwr5x} \
+%{mcpu=power6: -mpwr6} \
+%{mcpu=power6x: -mpwr6} \
+%{mcpu=power7: -mpwr7} \
+%{mcpu=power8: -mpwr8} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m970} \
+%{mcpu=G5: -m970}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      builtin_define ("_AIX52");     \
+      builtin_define ("_AIX53");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  Synchronize with libstdc++ os_defines.h.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER5
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER5
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{fprofile-arcs|fprofile-generate*|coverage:-lpthreads}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+#ifndef _AIX52
+extern long long int    atoll(const char *);  
+#endif
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 53
diff --git a/gcc-4.9/gcc/config/rs6000/aix61.h b/gcc-4.9/gcc/config/rs6000/aix61.h
new file mode 100644
index 000000000..e21a23030
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix61.h
@@ -0,0 +1,214 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V6.1.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_POWERPC64;			\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (global_options_set.x_rs6000_long_double_type_size)		\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+  if ((rs6000_isa_flags_explicit					\
+       & OPTION_MASK_MINIMAL_TOC) != 0)					\
+    {									\
+      if (global_options_set.x_rs6000_current_cmodel			\
+	  && rs6000_current_cmodel != CMODEL_SMALL)			\
+	error ("-mcmodel incompatible with other toc options"); 	\
+      SET_CMODEL (CMODEL_SMALL);					\
+    }									\
+  if (rs6000_current_cmodel != CMODEL_SMALL)				\
+    {									\
+      TARGET_NO_FP_IN_TOC = 0;						\
+      TARGET_NO_SUM_IN_TOC = 0;						\
+    }									\
+  if (rs6000_current_cmodel == CMODEL_MEDIUM)				\
+    {									\
+      rs6000_current_cmodel = CMODEL_LARGE;				\
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets for
+   handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
+   provide the default assembler options if the user uses -mcpu=native, so if
+   you make changes here, make them there also.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpowerpc64: -mppc64} \
+  %{maltivec: -m970} \
+  %{!maltivec: %{!mpowerpc64: %(asm_default)}}}} \
+%{mcpu=native: %(asm_cpu_native)} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -mpwr4} \
+%{mcpu=power5: -mpwr5} \
+%{mcpu=power5+: -mpwr5x} \
+%{mcpu=power6: -mpwr6} \
+%{mcpu=power6x: -mpwr6} \
+%{mcpu=power7: -mpwr7} \
+%{mcpu=power8: -mpwr8} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m970} \
+%{mcpu=G5: -m970} \
+%{mvsx: %{!mcpu*: -mpwr6}} \
+-many"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mpwr4"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      builtin_define ("_AIX52");     \
+      builtin_define ("_AIX53");     \
+      builtin_define ("_AIX61");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  Synchronize with libstdc++ os_defines.h.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE -D__COMPATMATH__		\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF)
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER7
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
+
+/* AIX 6.1 kernel and assembler have necessary support for Altivec and VSX.  */
+#undef OS_MISSING_ALTIVEC
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{fprofile-arcs|fprofile-generate*|coverage:-lpthreads}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}\
+   %{shared:crtcxa_s%O%s;:crtcxa%O%s}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+#ifndef _AIX52
+extern long long int    atoll(const char *);  
+#endif
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* Large TOC Support */
+#ifdef HAVE_LD_LARGE_TOC
+#undef TARGET_CMODEL
+#define TARGET_CMODEL rs6000_current_cmodel
+#define SET_CMODEL(opt) rs6000_current_cmodel = opt
+#else
+#define SET_CMODEL(opt) do {} while (0)
+#endif
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 61
diff --git a/gcc-4.9/gcc/config/rs6000/aix64.opt b/gcc-4.9/gcc/config/rs6000/aix64.opt
new file mode 100644
index 000000000..78a3b6cae
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/aix64.opt
@@ -0,0 +1,55 @@
+; Options for the 64-bit flavor of AIX.
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+maix64
+Target Report RejectNegative Negative(maix32) Mask(64BIT) Var(rs6000_isa_flags)
+Compile for 64-bit pointers
+
+maix32
+Target Report RejectNegative Negative(maix64) InverseMask(64BIT) Var(rs6000_isa_flags)
+Compile for 32-bit pointers
+
+mcmodel=
+Target RejectNegative Joined Enum(rs6000_cmodel) Var(rs6000_current_cmodel)
+Select code model
+
+Enum
+Name(rs6000_cmodel) Type(enum rs6000_cmodel)
+Known code models (for use with the -mcmodel= option):
+
+EnumValue
+Enum(rs6000_cmodel) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(rs6000_cmodel) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(rs6000_cmodel) String(large) Value(CMODEL_LARGE)
+
+mpe
+Target Report RejectNegative Var(internal_nothing_1) Save
+Support message passing with the Parallel Environment
+
+posix
+Driver
+
+pthread
+Driver
diff --git a/gcc-4.9/gcc/config/rs6000/altivec.h b/gcc-4.9/gcc/config/rs6000/altivec.h
new file mode 100644
index 000000000..49c250c84
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/altivec.h
@@ -0,0 +1,536 @@
+/* PowerPC AltiVec include file.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+   Rewritten by Paolo Bonzini (bonzini@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented to conform to the specification included in the AltiVec
+   Technology Programming Interface Manual (ALTIVECPIM/D 6/1999 Rev 0).  */
+
+#ifndef _ALTIVEC_H
+#define _ALTIVEC_H 1
+
+#if !defined(__VEC__) || !defined(__ALTIVEC__)
+#error Use the "-maltivec" flag to enable PowerPC AltiVec support
+#endif
+
+/* If __APPLE_ALTIVEC__ is defined, the compiler supports 'vector',
+   'pixel' and 'bool' as context-sensitive AltiVec keywords (in 
+   non-AltiVec contexts, they revert to their original meanings,
+   if any), so we do not need to define them as macros.  */
+
+#if !defined(__APPLE_ALTIVEC__)
+/* You are allowed to undef these for C++ compatibility.  */
+#define vector __vector
+#define pixel __pixel
+#define bool __bool
+#endif
+
+/* Condition register codes for AltiVec predicates. */
+
+#define __CR6_EQ		0
+#define __CR6_EQ_REV		1
+#define __CR6_LT		2
+#define __CR6_LT_REV		3
+
+/* Synonyms.  */
+#define vec_vaddcuw vec_addc
+#define vec_vand vec_and
+#define vec_vandc vec_andc
+#define vec_vrfip vec_ceil
+#define vec_vcmpbfp vec_cmpb
+#define vec_vcmpgefp vec_cmpge
+#define vec_vctsxs vec_cts
+#define vec_vctuxs vec_ctu
+#define vec_vexptefp vec_expte
+#define vec_vrfim vec_floor
+#define vec_lvx vec_ld
+#define vec_lvxl vec_ldl
+#define vec_vlogefp vec_loge
+#define vec_vmaddfp vec_madd
+#define vec_vmhaddshs vec_madds
+#define vec_vmladduhm vec_mladd
+#define vec_vmhraddshs vec_mradds
+#define vec_vnmsubfp vec_nmsub
+#define vec_vnor vec_nor
+#define vec_vor vec_or
+#define vec_vpkpx vec_packpx
+#define vec_vperm vec_perm
+#define vec_vrefp vec_re
+#define vec_vrfin vec_round
+#define vec_vrsqrtefp vec_rsqrte
+#define vec_vsel vec_sel
+#define vec_vsldoi vec_sld
+#define vec_vsl vec_sll
+#define vec_vslo vec_slo
+#define vec_vspltisb vec_splat_s8
+#define vec_vspltish vec_splat_s16
+#define vec_vspltisw vec_splat_s32
+#define vec_vsr vec_srl
+#define vec_vsro vec_sro
+#define vec_stvx vec_st
+#define vec_stvxl vec_stl
+#define vec_vsubcuw vec_subc
+#define vec_vsum2sws vec_sum2s
+#define vec_vsumsws vec_sums
+#define vec_vrfiz vec_trunc
+#define vec_vxor vec_xor
+
+/* Functions that are resolved by the backend to one of the
+   typed builtins.  */
+#define vec_vaddfp __builtin_vec_vaddfp
+#define vec_addc __builtin_vec_addc
+#define vec_vaddsws __builtin_vec_vaddsws
+#define vec_vaddshs __builtin_vec_vaddshs
+#define vec_vaddsbs __builtin_vec_vaddsbs
+#define vec_vavgsw __builtin_vec_vavgsw
+#define vec_vavguw __builtin_vec_vavguw
+#define vec_vavgsh __builtin_vec_vavgsh
+#define vec_vavguh __builtin_vec_vavguh
+#define vec_vavgsb __builtin_vec_vavgsb
+#define vec_vavgub __builtin_vec_vavgub
+#define vec_ceil __builtin_vec_ceil
+#define vec_cmpb __builtin_vec_cmpb
+#define vec_vcmpeqfp __builtin_vec_vcmpeqfp
+#define vec_cmpge __builtin_vec_cmpge
+#define vec_vcmpgtfp __builtin_vec_vcmpgtfp
+#define vec_vcmpgtsw __builtin_vec_vcmpgtsw
+#define vec_vcmpgtuw __builtin_vec_vcmpgtuw
+#define vec_vcmpgtsh __builtin_vec_vcmpgtsh
+#define vec_vcmpgtuh __builtin_vec_vcmpgtuh
+#define vec_vcmpgtsb __builtin_vec_vcmpgtsb
+#define vec_vcmpgtub __builtin_vec_vcmpgtub
+#define vec_vcfsx __builtin_vec_vcfsx
+#define vec_vcfux __builtin_vec_vcfux
+#define vec_cts __builtin_vec_cts
+#define vec_ctu __builtin_vec_ctu
+#define vec_expte __builtin_vec_expte
+#define vec_floor __builtin_vec_floor
+#define vec_loge __builtin_vec_loge
+#define vec_madd __builtin_vec_madd
+#define vec_madds __builtin_vec_madds
+#define vec_mtvscr __builtin_vec_mtvscr
+#define vec_vmaxfp __builtin_vec_vmaxfp
+#define vec_vmaxsw __builtin_vec_vmaxsw
+#define vec_vmaxsh __builtin_vec_vmaxsh
+#define vec_vmaxsb __builtin_vec_vmaxsb
+#define vec_vminfp __builtin_vec_vminfp
+#define vec_vminsw __builtin_vec_vminsw
+#define vec_vminsh __builtin_vec_vminsh
+#define vec_vminsb __builtin_vec_vminsb
+#define vec_mradds __builtin_vec_mradds
+#define vec_vmsumshm __builtin_vec_vmsumshm
+#define vec_vmsumuhm __builtin_vec_vmsumuhm
+#define vec_vmsummbm __builtin_vec_vmsummbm
+#define vec_vmsumubm __builtin_vec_vmsumubm
+#define vec_vmsumshs __builtin_vec_vmsumshs
+#define vec_vmsumuhs __builtin_vec_vmsumuhs
+#define vec_vmulesb __builtin_vec_vmulesb
+#define vec_vmulesh __builtin_vec_vmulesh
+#define vec_vmuleuh __builtin_vec_vmuleuh
+#define vec_vmuleub __builtin_vec_vmuleub
+#define vec_vmulosh __builtin_vec_vmulosh
+#define vec_vmulouh __builtin_vec_vmulouh
+#define vec_vmulosb __builtin_vec_vmulosb
+#define vec_vmuloub __builtin_vec_vmuloub
+#define vec_nmsub __builtin_vec_nmsub
+#define vec_packpx __builtin_vec_packpx
+#define vec_vpkswss __builtin_vec_vpkswss
+#define vec_vpkuwus __builtin_vec_vpkuwus
+#define vec_vpkshss __builtin_vec_vpkshss
+#define vec_vpkuhus __builtin_vec_vpkuhus
+#define vec_vpkswus __builtin_vec_vpkswus
+#define vec_vpkshus __builtin_vec_vpkshus
+#define vec_re __builtin_vec_re
+#define vec_round __builtin_vec_round
+#define vec_recipdiv __builtin_vec_recipdiv
+#define vec_rsqrt __builtin_vec_rsqrt
+#define vec_rsqrte __builtin_vec_rsqrte
+#define vec_vsubfp __builtin_vec_vsubfp
+#define vec_subc __builtin_vec_subc
+#define vec_vsubsws __builtin_vec_vsubsws
+#define vec_vsubshs __builtin_vec_vsubshs
+#define vec_vsubsbs __builtin_vec_vsubsbs
+#define vec_sum4s __builtin_vec_sum4s
+#define vec_vsum4shs __builtin_vec_vsum4shs
+#define vec_vsum4sbs __builtin_vec_vsum4sbs
+#define vec_vsum4ubs __builtin_vec_vsum4ubs
+#define vec_sum2s __builtin_vec_sum2s
+#define vec_sums __builtin_vec_sums
+#define vec_trunc __builtin_vec_trunc
+#define vec_vupkhpx __builtin_vec_vupkhpx
+#define vec_vupkhsh __builtin_vec_vupkhsh
+#define vec_vupkhsb __builtin_vec_vupkhsb
+#define vec_vupklpx __builtin_vec_vupklpx
+#define vec_vupklsh __builtin_vec_vupklsh
+#define vec_vupklsb __builtin_vec_vupklsb
+#define vec_abs __builtin_vec_abs
+#define vec_abss __builtin_vec_abss
+#define vec_add __builtin_vec_add
+#define vec_adds __builtin_vec_adds
+#define vec_and __builtin_vec_and
+#define vec_andc __builtin_vec_andc
+#define vec_avg __builtin_vec_avg
+#define vec_cmpeq __builtin_vec_cmpeq
+#define vec_cmpgt __builtin_vec_cmpgt
+#define vec_ctf __builtin_vec_ctf
+#define vec_dst __builtin_vec_dst
+#define vec_dstst __builtin_vec_dstst
+#define vec_dststt __builtin_vec_dststt
+#define vec_dstt __builtin_vec_dstt
+#define vec_ld __builtin_vec_ld
+#define vec_lde __builtin_vec_lde
+#define vec_ldl __builtin_vec_ldl
+#define vec_lvebx __builtin_vec_lvebx
+#define vec_lvehx __builtin_vec_lvehx
+#define vec_lvewx __builtin_vec_lvewx
+/* Cell only intrinsics.  */
+#ifdef __PPU__
+#define vec_lvlx __builtin_vec_lvlx
+#define vec_lvlxl __builtin_vec_lvlxl
+#define vec_lvrx __builtin_vec_lvrx
+#define vec_lvrxl __builtin_vec_lvrxl
+#endif
+#define vec_lvsl __builtin_vec_lvsl
+#define vec_lvsr __builtin_vec_lvsr
+#define vec_max __builtin_vec_max
+#define vec_mergeh __builtin_vec_mergeh
+#define vec_mergel __builtin_vec_mergel
+#define vec_min __builtin_vec_min
+#define vec_mladd __builtin_vec_mladd
+#define vec_msum __builtin_vec_msum
+#define vec_msums __builtin_vec_msums
+#define vec_mule __builtin_vec_mule
+#define vec_mulo __builtin_vec_mulo
+#define vec_nor __builtin_vec_nor
+#define vec_or __builtin_vec_or
+#define vec_pack __builtin_vec_pack
+#define vec_packs __builtin_vec_packs
+#define vec_packsu __builtin_vec_packsu
+#define vec_perm __builtin_vec_perm
+#define vec_rl __builtin_vec_rl
+#define vec_sel __builtin_vec_sel
+#define vec_sl __builtin_vec_sl
+#define vec_sld __builtin_vec_sld
+#define vec_sll __builtin_vec_sll
+#define vec_slo __builtin_vec_slo
+#define vec_splat __builtin_vec_splat
+#define vec_sr __builtin_vec_sr
+#define vec_sra __builtin_vec_sra
+#define vec_srl __builtin_vec_srl
+#define vec_sro __builtin_vec_sro
+#define vec_st __builtin_vec_st
+#define vec_ste __builtin_vec_ste
+#define vec_stl __builtin_vec_stl
+#define vec_stvebx __builtin_vec_stvebx
+#define vec_stvehx __builtin_vec_stvehx
+#define vec_stvewx __builtin_vec_stvewx
+/* Cell only intrinsics.  */
+#ifdef __PPU__
+#define vec_stvlx __builtin_vec_stvlx
+#define vec_stvlxl __builtin_vec_stvlxl
+#define vec_stvrx __builtin_vec_stvrx
+#define vec_stvrxl __builtin_vec_stvrxl
+#endif
+#define vec_sub __builtin_vec_sub
+#define vec_subs __builtin_vec_subs
+#define vec_sum __builtin_vec_sum
+#define vec_unpackh __builtin_vec_unpackh
+#define vec_unpackl __builtin_vec_unpackl
+#define vec_vaddubm __builtin_vec_vaddubm
+#define vec_vaddubs __builtin_vec_vaddubs
+#define vec_vadduhm __builtin_vec_vadduhm
+#define vec_vadduhs __builtin_vec_vadduhs
+#define vec_vadduwm __builtin_vec_vadduwm
+#define vec_vadduws __builtin_vec_vadduws
+#define vec_vcmpequb __builtin_vec_vcmpequb
+#define vec_vcmpequh __builtin_vec_vcmpequh
+#define vec_vcmpequw __builtin_vec_vcmpequw
+#define vec_vmaxub __builtin_vec_vmaxub
+#define vec_vmaxuh __builtin_vec_vmaxuh
+#define vec_vmaxuw __builtin_vec_vmaxuw
+#define vec_vminub __builtin_vec_vminub
+#define vec_vminuh __builtin_vec_vminuh
+#define vec_vminuw __builtin_vec_vminuw
+#define vec_vmrghb __builtin_vec_vmrghb
+#define vec_vmrghh __builtin_vec_vmrghh
+#define vec_vmrghw __builtin_vec_vmrghw
+#define vec_vmrglb __builtin_vec_vmrglb
+#define vec_vmrglh __builtin_vec_vmrglh
+#define vec_vmrglw __builtin_vec_vmrglw
+#define vec_vpkuhum __builtin_vec_vpkuhum
+#define vec_vpkuwum __builtin_vec_vpkuwum
+#define vec_vrlb __builtin_vec_vrlb
+#define vec_vrlh __builtin_vec_vrlh
+#define vec_vrlw __builtin_vec_vrlw
+#define vec_vslb __builtin_vec_vslb
+#define vec_vslh __builtin_vec_vslh
+#define vec_vslw __builtin_vec_vslw
+#define vec_vspltb __builtin_vec_vspltb
+#define vec_vsplth __builtin_vec_vsplth
+#define vec_vspltw __builtin_vec_vspltw
+#define vec_vsrab __builtin_vec_vsrab
+#define vec_vsrah __builtin_vec_vsrah
+#define vec_vsraw __builtin_vec_vsraw
+#define vec_vsrb __builtin_vec_vsrb
+#define vec_vsrh __builtin_vec_vsrh
+#define vec_vsrw __builtin_vec_vsrw
+#define vec_vsububs __builtin_vec_vsububs
+#define vec_vsububm __builtin_vec_vsububm
+#define vec_vsubuhm __builtin_vec_vsubuhm
+#define vec_vsubuhs __builtin_vec_vsubuhs
+#define vec_vsubuwm __builtin_vec_vsubuwm
+#define vec_vsubuws __builtin_vec_vsubuws
+#define vec_xor __builtin_vec_xor
+
+#define vec_extract __builtin_vec_extract
+#define vec_insert __builtin_vec_insert
+#define vec_splats __builtin_vec_splats
+#define vec_promote __builtin_vec_promote
+
+#ifdef __VSX__
+/* VSX additions */
+#define vec_div __builtin_vec_div
+#define vec_mul __builtin_vec_mul
+#define vec_msub __builtin_vec_msub
+#define vec_nmadd __builtin_vec_nmadd
+#define vec_nearbyint __builtin_vec_nearbyint
+#define vec_rint __builtin_vec_rint
+#define vec_sqrt __builtin_vec_sqrt
+#define vec_vsx_ld __builtin_vec_vsx_ld
+#define vec_vsx_st __builtin_vec_vsx_st
+#endif
+
+#ifdef _ARCH_PWR8
+/* Vector additions added in ISA 2.07.  */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
+#define vec_vaddcuq __builtin_vec_vaddcuq
+#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vadduqm __builtin_vec_vadduqm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vaddecuq __builtin_vec_vaddecuq
+#define vec_vaddeuqm __builtin_vec_vaddeuqm
+#define vec_vsubecuq __builtin_vec_vsubecuq
+#define vec_vsubeuqm __builtin_vec_vsubeuqm
+#define vec_vgbbd __builtin_vec_vgbbd
+#define vec_vmaxsd __builtin_vec_vmaxsd
+#define vec_vmaxud __builtin_vec_vmaxud
+#define vec_vminsd __builtin_vec_vminsd
+#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
+#define vec_vpksdss __builtin_vec_vpksdss
+#define vec_vpksdus __builtin_vec_vpksdus
+#define vec_vpkudum __builtin_vec_vpkudum
+#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
+#define vec_vrld __builtin_vec_vrld
+#define vec_vsld __builtin_vec_vsld
+#define vec_vsrad __builtin_vec_vsrad
+#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubcuq __builtin_vec_vsubcuq
+#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vsubuqm __builtin_vec_vsubuqm
+#define vec_vupkhsw __builtin_vec_vupkhsw
+#define vec_vupklsw __builtin_vec_vupklsw
+#endif
+
+/* Predicates.
+   For C++, we use templates in order to allow non-parenthesized arguments.
+   For C, instead, we use macros since non-parenthesized arguments were
+   not allowed even in older GCC implementation of AltiVec.
+
+   In the future, we may add more magic to the back-end, so that no
+   one- or two-argument macros are used.  */
+
+#ifdef __cplusplus__
+#define __altivec_unary_pred(NAME, CALL) \
+template <class T> int NAME (T a1) { return CALL; }
+
+#define __altivec_scalar_pred(NAME, CALL) \
+template <class T, class U> int NAME (T a1, U a2) { return CALL; }
+
+/* Given the vec_step of a type, return the corresponding bool type.  */
+template <int STEP> class __altivec_bool_ret { };
+template <> class __altivec_bool_ret <4> {
+  typedef __vector __bool int __ret;
+};
+template <> class __altivec_bool_ret <8> {
+  typedef __vector __bool short __ret;
+};
+template <> class __altivec_bool_ret <16> {
+  typedef __vector __bool char __ret;
+};
+
+/* Be very liberal in the pairs we accept.  Mistakes such as passing
+   a `vector char' and `vector short' will be caught by the middle-end,
+   while any attempt to detect them here would produce hard to understand
+   error messages involving the implementation details of AltiVec.  */
+#define __altivec_binary_pred(NAME, CALL) \
+template <class T, class U> \
+typename __altivec_bool_ret <vec_step (T)>::__ret \
+NAME (T a1, U a2) \
+{ \
+  return CALL; \
+}
+
+__altivec_binary_pred(vec_cmplt,
+  __builtin_vec_cmpgt (a2, a1))
+__altivec_binary_pred(vec_cmple,
+  __builtin_vec_cmpge (a2, a1))
+
+__altivec_scalar_pred(vec_all_in,
+  __builtin_altivec_vcmpbfp_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_any_out,
+  __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, a1, a2))
+
+__altivec_unary_pred(vec_all_nan,
+  __builtin_altivec_vcmpeq_p (__CR6_EQ, a1, a1))
+__altivec_unary_pred(vec_any_nan,
+  __builtin_altivec_vcmpeq_p (__CR6_LT_REV, a1, a1))
+
+__altivec_unary_pred(vec_all_numeric,
+  __builtin_altivec_vcmpeq_p (__CR6_LT, a1, a1))
+__altivec_unary_pred(vec_any_numeric,
+  __builtin_altivec_vcmpeq_p (__CR6_EQ_REV, a1, a1))
+
+__altivec_scalar_pred(vec_all_eq,
+  __builtin_vec_vcmpeq_p (__CR6_LT, a1, a2))
+__altivec_scalar_pred(vec_all_ne,
+  __builtin_vec_vcmpeq_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_any_eq,
+  __builtin_vec_vcmpeq_p (__CR6_EQ_REV, a1, a2))
+__altivec_scalar_pred(vec_any_ne,
+  __builtin_vec_vcmpeq_p (__CR6_LT_REV, a1, a2))
+
+__altivec_scalar_pred(vec_all_gt,
+  __builtin_vec_vcmpgt_p (__CR6_LT, a1, a2))
+__altivec_scalar_pred(vec_all_lt,
+  __builtin_vec_vcmpgt_p (__CR6_LT, a2, a1))
+__altivec_scalar_pred(vec_any_gt,
+  __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a1, a2))
+__altivec_scalar_pred(vec_any_lt,
+  __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a2, a1))
+
+__altivec_scalar_pred(vec_all_ngt,
+  __builtin_altivec_vcmpgt_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_all_nlt,
+  __builtin_altivec_vcmpgt_p (__CR6_EQ, a2, a1))
+__altivec_scalar_pred(vec_any_ngt,
+  __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a1, a2))
+__altivec_scalar_pred(vec_any_nlt,
+  __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a2, a1))
+
+/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types,
+   while for integer types it is converted to __builtin_vec_vcmpgt_p,
+   with inverted args and condition code.  */
+__altivec_scalar_pred(vec_all_le,
+  __builtin_vec_vcmpge_p (__CR6_LT, a2, a1))
+__altivec_scalar_pred(vec_all_ge,
+  __builtin_vec_vcmpge_p (__CR6_LT, a1, a2))
+__altivec_scalar_pred(vec_any_le,
+  __builtin_vec_vcmpge_p (__CR6_EQ_REV, a2, a1))
+__altivec_scalar_pred(vec_any_ge,
+  __builtin_vec_vcmpge_p (__CR6_EQ_REV, a1, a2))
+
+__altivec_scalar_pred(vec_all_nge,
+  __builtin_altivec_vcmpge_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_all_nle,
+  __builtin_altivec_vcmpge_p (__CR6_EQ, a2, a1))
+__altivec_scalar_pred(vec_any_nge,
+  __builtin_altivec_vcmpge_p (__CR6_LT_REV, a1, a2))
+__altivec_scalar_pred(vec_any_nle,
+  __builtin_altivec_vcmpge_p (__CR6_LT_REV, a2, a1))
+
+#undef __altivec_scalar_pred
+#undef __altivec_unary_pred
+#undef __altivec_binary_pred
+#else
+#define vec_cmplt(a1, a2) __builtin_vec_cmpgt ((a2), (a1))
+#define vec_cmple(a1, a2) __builtin_vec_cmpge ((a2), (a1))
+
+#define vec_all_in(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ, (a1), (a2))
+#define vec_any_out(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, (a1), (a2))
+
+#define vec_all_nan(a1) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a1))
+#define vec_any_nan(a1) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a1))
+
+#define vec_all_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a1))
+#define vec_any_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a1))
+
+#define vec_all_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a2))
+#define vec_all_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a2))
+#define vec_any_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a2))
+#define vec_any_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a2))
+
+#define vec_all_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT, (a1), (a2))
+#define vec_all_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT, (a2), (a1))
+#define vec_any_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a1), (a2))
+#define vec_any_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a2), (a1))
+
+#define vec_all_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a1), (a2))
+#define vec_all_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a2), (a1))
+#define vec_any_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a1), (a2))
+#define vec_any_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a2), (a1))
+
+/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types,
+   while for integer types it is converted to __builtin_vec_vcmpgt_p,
+   with inverted args and condition code.  */
+#define vec_all_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT, (a2), (a1))
+#define vec_all_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT, (a1), (a2))
+#define vec_any_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a2), (a1))
+#define vec_any_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a1), (a2))
+
+#define vec_all_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a1), (a2))
+#define vec_all_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a2), (a1))
+#define vec_any_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a1), (a2))
+#define vec_any_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a2), (a1))
+#endif
+
+/* These do not accept vectors, so they do not have a __builtin_vec_*
+   counterpart.  */
+#define vec_dss(x) __builtin_altivec_dss((x))
+#define vec_dssall() __builtin_altivec_dssall ()
+#define vec_mfvscr() ((__vector unsigned short) __builtin_altivec_mfvscr ())
+#define vec_splat_s8(x) __builtin_altivec_vspltisb ((x))
+#define vec_splat_s16(x) __builtin_altivec_vspltish ((x))
+#define vec_splat_s32(x) __builtin_altivec_vspltisw ((x))
+#define vec_splat_u8(x) ((__vector unsigned char) vec_splat_s8 ((x)))
+#define vec_splat_u16(x) ((__vector unsigned short) vec_splat_s16 ((x)))
+#define vec_splat_u32(x) ((__vector unsigned int) vec_splat_s32 ((x)))
+
+/* This also accepts a type for its parameter, so it is not enough
+   to #define vec_step to __builtin_vec_step.  */
+#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0)
+
+#endif /* _ALTIVEC_H */
diff --git a/gcc-4.9/gcc/config/rs6000/altivec.md b/gcc-4.9/gcc/config/rs6000/altivec.md
new file mode 100644
index 000000000..faa88d007
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/altivec.md
@@ -0,0 +1,3324 @@
+;; AltiVec patterns.
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+  [UNSPEC_VCMPBFP
+   UNSPEC_VMSUMU
+   UNSPEC_VMSUMM
+   UNSPEC_VMSUMSHM
+   UNSPEC_VMSUMUHS
+   UNSPEC_VMSUMSHS
+   UNSPEC_VMHADDSHS
+   UNSPEC_VMHRADDSHS
+   UNSPEC_VMLADDUHM
+   UNSPEC_VADDCUW
+   UNSPEC_VADDU
+   UNSPEC_VADDS
+   UNSPEC_VAVGU
+   UNSPEC_VAVGS
+   UNSPEC_VMULEUB
+   UNSPEC_VMULESB
+   UNSPEC_VMULEUH
+   UNSPEC_VMULESH
+   UNSPEC_VMULOUB
+   UNSPEC_VMULOSB
+   UNSPEC_VMULOUH
+   UNSPEC_VMULOSH
+   UNSPEC_VPKPX
+   UNSPEC_VPACK_SIGN_SIGN_SAT
+   UNSPEC_VPACK_SIGN_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_MOD
+   UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
+   UNSPEC_VSLV4SI
+   UNSPEC_VSLO
+   UNSPEC_VSR
+   UNSPEC_VSRO
+   UNSPEC_VSUBCUW
+   UNSPEC_VSUBU
+   UNSPEC_VSUBS
+   UNSPEC_VSUM4UBS
+   UNSPEC_VSUM4S
+   UNSPEC_VSUM2SWS
+   UNSPEC_VSUMSWS
+   UNSPEC_VPERM
+   UNSPEC_VPERM_UNS
+   UNSPEC_VRFIN
+   UNSPEC_VCFUX
+   UNSPEC_VCFSX
+   UNSPEC_VCTUXS
+   UNSPEC_VCTSXS
+   UNSPEC_VLOGEFP
+   UNSPEC_VEXPTEFP
+   UNSPEC_VLSDOI
+   UNSPEC_VUNPACK_HI_SIGN
+   UNSPEC_VUNPACK_LO_SIGN
+   UNSPEC_VUNPACK_HI_SIGN_DIRECT
+   UNSPEC_VUNPACK_LO_SIGN_DIRECT
+   UNSPEC_VUPKHPX
+   UNSPEC_VUPKLPX
+   UNSPEC_DST
+   UNSPEC_DSTT
+   UNSPEC_DSTST
+   UNSPEC_DSTSTT
+   UNSPEC_LVSL
+   UNSPEC_LVSR
+   UNSPEC_LVE
+   UNSPEC_STVX
+   UNSPEC_STVXL
+   UNSPEC_STVE
+   UNSPEC_SET_VSCR
+   UNSPEC_GET_VRSAVE
+   UNSPEC_LVX
+   UNSPEC_REDUC_PLUS
+   UNSPEC_VECSH
+   UNSPEC_EXTEVEN_V4SI
+   UNSPEC_EXTEVEN_V8HI
+   UNSPEC_EXTEVEN_V16QI
+   UNSPEC_EXTEVEN_V4SF
+   UNSPEC_EXTODD_V4SI
+   UNSPEC_EXTODD_V8HI
+   UNSPEC_EXTODD_V16QI
+   UNSPEC_EXTODD_V4SF
+   UNSPEC_INTERHI_V4SI
+   UNSPEC_INTERHI_V8HI
+   UNSPEC_INTERHI_V16QI
+   UNSPEC_INTERLO_V4SI
+   UNSPEC_INTERLO_V8HI
+   UNSPEC_INTERLO_V16QI
+   UNSPEC_LVLX
+   UNSPEC_LVLXL
+   UNSPEC_LVRX
+   UNSPEC_LVRXL
+   UNSPEC_STVLX
+   UNSPEC_STVLXL
+   UNSPEC_STVRX
+   UNSPEC_STVRXL
+   UNSPEC_VMULWHUB
+   UNSPEC_VMULWLUB
+   UNSPEC_VMULWHSB
+   UNSPEC_VMULWLSB
+   UNSPEC_VMULWHUH
+   UNSPEC_VMULWLUH
+   UNSPEC_VMULWHSH
+   UNSPEC_VMULWLSH
+   UNSPEC_VUPKHUB
+   UNSPEC_VUPKHUH
+   UNSPEC_VUPKLUB
+   UNSPEC_VUPKLUH
+   UNSPEC_VPERMSI
+   UNSPEC_VPERMHI
+   UNSPEC_INTERHI
+   UNSPEC_INTERLO
+   UNSPEC_VUPKHS_V4SF
+   UNSPEC_VUPKLS_V4SF
+   UNSPEC_VUPKHU_V4SF
+   UNSPEC_VUPKLU_V4SF
+   UNSPEC_VGBBD
+   UNSPEC_VMRGH_DIRECT
+   UNSPEC_VMRGL_DIRECT
+   UNSPEC_VSPLT_DIRECT
+   UNSPEC_VSUMSWS_DIRECT
+   UNSPEC_VADDCUQ
+   UNSPEC_VADDEUQM
+   UNSPEC_VADDECUQ
+   UNSPEC_VSUBCUQ
+   UNSPEC_VSUBEUQM
+   UNSPEC_VSUBECUQ
+])
+
+(define_c_enum "unspecv"
+  [UNSPECV_SET_VRSAVE
+   UNSPECV_MTVSCR
+   UNSPECV_MFVSCR
+   UNSPECV_DSSALL
+   UNSPECV_DSS
+  ])
+
+;; Vec int modes
+(define_mode_iterator VI [V4SI V8HI V16QI])
+;; Like VI, but add ISA 2.07 integer vector ops
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
+;; Short vec in modes
+(define_mode_iterator VIshort [V8HI V16QI])
+;; Vec float modes
+(define_mode_iterator VF [V4SF])
+;; Vec modes, pity mode iterators are not composable
+(define_mode_iterator V [V4SI V8HI V16QI V4SF])
+;; Vec modes for move/logical/permute ops, include vector types for move not
+;; otherwise handled by altivec (v2df, v2di, ti)
+(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI V1TI TI])
+
+;; Like VM, except don't do TImode
+(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI V1TI])
+
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
+			   (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+			   (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
+			   (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")
+			   (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")])
+
+;; Vector pack/unpack
+(define_mode_iterator VP [V2DI V4SI V8HI])
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
+
+;; Vector move instructions.
+(define_insn "*altivec_mov<mode>"
+  [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v")
+	(match_operand:VM2 1 "input_operand" "v,Z,v,r,Y,r,j,W"))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "stvx %1,%y0";
+    case 1: return "lvx %0,%y1";
+    case 2: return "vor %0,%1,%1";
+    case 3: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "vxor %0,%0,%0";
+    case 7: return output_vec_const_move (operands);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
+
+;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode
+;; is for unions.  However for plain data movement, slightly favor the vector
+;; loads
+(define_insn "*altivec_movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,v,v,?Y,?r,?r,v,v")
+	(match_operand:TI 1 "input_operand" "v,Z,v,r,Y,r,j,W"))]
+  "VECTOR_MEM_ALTIVEC_P (TImode)
+   && (register_operand (operands[0], TImode) 
+       || register_operand (operands[1], TImode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "stvx %1,%y0";
+    case 1: return "lvx %0,%y1";
+    case 2: return "vor %0,%1,%1";
+    case 3: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "vxor %0,%0,%0";
+    case 7: return output_vec_const_move (operands);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
+
+;; Load up a vector with the most significant bit set by loading up -1 and
+;; doing a shift left
+(define_split
+  [(set (match_operand:VM 0 "altivec_register_operand" "")
+	(match_operand:VM 1 "easy_vector_constant_msb" ""))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtvec v;
+  int i, num_elements;
+
+  if (mode == V4SFmode)
+    {
+      mode = V4SImode;
+      dest = gen_lowpart (V4SImode, dest);
+    }
+
+  num_elements = GET_MODE_NUNITS (mode);
+  v = rtvec_alloc (num_elements);
+  for (i = 0; i < num_elements; i++)
+    RTVEC_ELT (v, i) = constm1_rtx;
+
+  emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v)));
+  emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_ASHIFT (mode, dest, dest)));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:VM 0 "altivec_register_operand" "")
+	(match_operand:VM 1 "easy_vector_constant_add_self" ""))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+{
+  rtx dup = gen_easy_altivec_constant (operands[1]);
+  rtx const_vec;
+  enum machine_mode op_mode = <MODE>mode;
+
+  /* Divide the operand of the resulting VEC_DUPLICATE, and use
+     simplify_rtx to make a CONST_VECTOR.  */
+  XEXP (dup, 0) = simplify_const_binary_operation (ASHIFTRT, QImode,
+						   XEXP (dup, 0), const1_rtx);
+  const_vec = simplify_rtx (dup);
+
+  if (op_mode == V4SFmode)
+    {
+      op_mode = V4SImode;
+      operands[0] = gen_lowpart (op_mode, operands[0]);
+    }
+  if (GET_MODE (const_vec) == op_mode)
+    operands[3] = const_vec;
+  else
+    operands[3] = gen_lowpart (op_mode, const_vec);
+  operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]);
+})
+
+(define_insn "get_vrsave_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))]
+  "TARGET_ALTIVEC"
+{
+  if (TARGET_MACHO)
+     return "mfspr %0,256";
+  else
+     return "mfvrsave %0";
+}
+  [(set_attr "type" "*")])
+
+(define_insn "*set_vrsave_internal"
+  [(match_parallel 0 "vrsave_operation"
+     [(set (reg:SI 109)
+	   (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+				(reg:SI 109)] UNSPECV_SET_VRSAVE))])]
+  "TARGET_ALTIVEC"
+{
+  if (TARGET_MACHO)
+    return "mtspr 256,%1";
+  else
+    return "mtvrsave %1";
+}
+  [(set_attr "type" "*")])
+
+(define_insn "*save_world"
+ [(match_parallel 0 "save_world_operation"
+                  [(clobber (reg:SI 65))
+                   (use (match_operand:SI 1 "call_operand" "s"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"         
+ "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*restore_world"
+ [(match_parallel 0 "restore_world_operation"
+                  [(return)
+		   (use (reg:SI 65))
+                   (use (match_operand:SI 1 "call_operand" "s"))
+                   (clobber (match_operand:SI 2 "gpc_reg_operand" "=r"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ "b %z1")
+
+;; The save_vregs and restore_vregs patterns don't use memory_operand
+;; because (plus (reg) (const_int)) is not a valid vector address.
+;; This way is more compact than describing exactly what happens in
+;; the out-of-line functions, ie. loading the constant into r11/r12
+;; then using indexed addressing, and requires less editing of rtl
+;; to describe the operation to dwarf2out_frame_debug_expr.
+(define_insn "*save_vregs_<mode>_r11"
+  [(match_parallel 0 "any_parallel_operand"
+     [(clobber (reg:P 65))
+      (use (match_operand:P 1 "symbol_ref_operand" "s"))
+      (clobber (reg:P 11))
+      (use (reg:P 0))
+      (set (mem:V4SI (plus:P (match_operand:P 2 "gpc_reg_operand" "b")
+			     (match_operand:P 3 "short_cint_operand" "I")))
+	   (match_operand:V4SI 4 "gpc_reg_operand" "v"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_vregs_<mode>_r12"
+  [(match_parallel 0 "any_parallel_operand"
+     [(clobber (reg:P 65))
+      (use (match_operand:P 1 "symbol_ref_operand" "s"))
+      (clobber (reg:P 12))
+      (use (reg:P 0))
+      (set (mem:V4SI (plus:P (match_operand:P 2 "gpc_reg_operand" "b")
+			     (match_operand:P 3 "short_cint_operand" "I")))
+	   (match_operand:V4SI 4 "gpc_reg_operand" "v"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*restore_vregs_<mode>_r11"
+  [(match_parallel 0 "any_parallel_operand"
+     [(clobber (reg:P 65))
+      (use (match_operand:P 1 "symbol_ref_operand" "s"))
+      (clobber (reg:P 11))
+      (use (reg:P 0))
+      (set (match_operand:V4SI 2 "gpc_reg_operand" "=v")
+	   (mem:V4SI (plus:P (match_operand:P 3 "gpc_reg_operand" "b")
+			     (match_operand:P 4 "short_cint_operand" "I"))))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*restore_vregs_<mode>_r12"
+  [(match_parallel 0 "any_parallel_operand"
+     [(clobber (reg:P 65))
+      (use (match_operand:P 1 "symbol_ref_operand" "s"))
+      (clobber (reg:P 12))
+      (use (reg:P 0))
+      (set (match_operand:V4SI 2 "gpc_reg_operand" "=v")
+	   (mem:V4SI (plus:P (match_operand:P 3 "gpc_reg_operand" "b")
+			     (match_operand:P 4 "short_cint_operand" "I"))))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; Simple binary operations.
+
+;; add
+(define_insn "add<mode>3"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vaddu<VI_char>m %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_addv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (plus:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		   (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vaddfp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vaddcuw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VADDCUW))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
+  "vaddcuw %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddu<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+		    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VADDU))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "<VI_unit>"
+  "vaddu<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vadds<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VADDS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vadds<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+;; sub
+(define_insn "sub<mode>3"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		   (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vsubu<VI_char>m %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_subv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (minus:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                    (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vsubfp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vsubcuw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUBCUW))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
+  "vsubcuw %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubu<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VSUBU))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vsubu<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubs<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VSUBS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vsubs<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+;;
+(define_insn "altivec_vavgu<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VAVGU))]
+  "TARGET_ALTIVEC"
+  "vavgu<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vavgs<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VAVGS))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vavgs<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vcmpbfp"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:V4SF 2 "register_operand" "v")] 
+                      UNSPEC_VCMPBFP))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
+  "vcmpbfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_eq<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
+  "vcmpequ<VI_char> %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gt<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
+  "vcmpgts<VI_char> %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gtu<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		 (match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
+  "vcmpgtu<VI_char> %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_eqv4sf"
+  [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
+	(eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
+		 (match_operand:V4SF 2 "altivec_register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpeqfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gtv4sf"
+  [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
+	(gt:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
+		 (match_operand:V4SF 2 "altivec_register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgtfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gev4sf"
+  [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
+	(ge:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
+		 (match_operand:V4SF 2 "altivec_register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgefp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vsel<mode>"
+  [(set (match_operand:VM 0 "altivec_register_operand" "=v")
+	(if_then_else:VM
+	 (ne:CC (match_operand:VM 1 "altivec_register_operand" "v")
+		(match_operand:VM 4 "zero_constant" ""))
+	 (match_operand:VM 2 "altivec_register_operand" "v")
+	 (match_operand:VM 3 "altivec_register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vsel %0,%3,%2,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vsel<mode>_uns"
+  [(set (match_operand:VM 0 "altivec_register_operand" "=v")
+	(if_then_else:VM
+	 (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v")
+		   (match_operand:VM 4 "zero_constant" ""))
+	 (match_operand:VM 2 "altivec_register_operand" "v")
+	 (match_operand:VM 3 "altivec_register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vsel %0,%3,%2,%1"
+  [(set_attr "type" "vecperm")])
+
+;; Fused multiply add.
+
+(define_insn "*altivec_fmav4sf4"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		  (match_operand:V4SF 2 "register_operand" "v")
+		  (match_operand:V4SF 3 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vmaddfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+;; We do multiply as a fused multiply-add with an add of a -0.0 vector.
+
+(define_expand "altivec_mulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(fma:V4SF (match_operand:V4SF 1 "register_operand" "")
+		  (match_operand:V4SF 2 "register_operand" "")
+		  (match_dup 3)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+{
+  rtx neg0;
+
+  /* Generate [-0.0, -0.0, -0.0, -0.0].  */
+  neg0 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
+  emit_insn (gen_vashlv4si3 (neg0, neg0, neg0));
+
+  operands[3] = gen_lowpart (V4SFmode, neg0);
+})
+
+;; 32-bit integer multiplication
+;; A_high = Operand_0 & 0xFFFF0000 >> 16
+;; A_low = Operand_0 & 0xFFFF
+;; B_high = Operand_1 & 0xFFFF0000 >> 16
+;; B_low = Operand_1 & 0xFFFF
+;; result = A_low * B_low + (A_high * B_low + B_high * A_low) << 16
+
+;; (define_insn "mulv4si3"
+;;   [(set (match_operand:V4SI 0 "register_operand" "=v")
+;;         (mult:V4SI (match_operand:V4SI 1 "register_operand" "v")
+;;                    (match_operand:V4SI 2 "register_operand" "v")))]
+(define_expand "mulv4si3"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+   "TARGET_ALTIVEC"
+   "
+ {
+   rtx zero;
+   rtx swap;
+   rtx small_swap;
+   rtx sixteen;
+   rtx one;
+   rtx two;
+   rtx low_product;
+   rtx high_product;
+       
+   zero = gen_reg_rtx (V4SImode);
+   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
+ 
+   sixteen = gen_reg_rtx (V4SImode);   
+   emit_insn (gen_altivec_vspltisw (sixteen,  gen_rtx_CONST_INT (V4SImode, -16)));
+ 
+   swap = gen_reg_rtx (V4SImode);
+   emit_insn (gen_vrotlv4si3 (swap, operands[2], sixteen));
+ 
+   one = gen_reg_rtx (V8HImode);
+   convert_move (one, operands[1], 0);
+ 
+   two = gen_reg_rtx (V8HImode);
+   convert_move (two, operands[2], 0);
+ 
+   small_swap = gen_reg_rtx (V8HImode);
+   convert_move (small_swap, swap, 0);
+ 
+   low_product = gen_reg_rtx (V4SImode);
+   emit_insn (gen_altivec_vmulouh (low_product, one, two));
+ 
+   high_product = gen_reg_rtx (V4SImode);
+   emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
+ 
+   emit_insn (gen_vashlv4si3 (high_product, high_product, sixteen));
+ 
+   emit_insn (gen_addv4si3 (operands[0], high_product, low_product));
+   
+   DONE;
+ }")
+ 
+(define_expand "mulv8hi3"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+   "TARGET_ALTIVEC"
+   "
+{
+   rtx odd = gen_reg_rtx (V4SImode);
+   rtx even = gen_reg_rtx (V4SImode);
+   rtx high = gen_reg_rtx (V4SImode);
+   rtx low = gen_reg_rtx (V4SImode);
+
+   if (BYTES_BIG_ENDIAN)
+     {
+       emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmrghw_direct (high, even, odd));
+       emit_insn (gen_altivec_vmrglw_direct (low, even, odd));
+       emit_insn (gen_altivec_vpkuwum_direct (operands[0], high, low));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulosh (even, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesh (odd, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmrghw_direct (high, odd, even));
+       emit_insn (gen_altivec_vmrglw_direct (low, odd, even));
+       emit_insn (gen_altivec_vpkuwum_direct (operands[0], low, high));
+     } 
+
+   DONE;
+}")
+
+;; Fused multiply subtract 
+(define_insn "*altivec_vnmsubfp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(neg:V4SF
+	 (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		   (match_operand:V4SF 2 "register_operand" "v")
+		   (neg:V4SF
+		    (match_operand:V4SF 3 "register_operand" "v")))))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vnmsubfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vmsumu<VI_char>m"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
+		      (match_operand:VIshort 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMU))]
+  "TARGET_ALTIVEC"
+  "vmsumu<VI_char>m %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumm<VI_char>m"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
+		      (match_operand:VIshort 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMM))]
+  "TARGET_ALTIVEC"
+  "vmsumm<VI_char>m %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumshm"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMSHM))]
+  "TARGET_ALTIVEC"
+  "vmsumshm %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumuhs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMUHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmsumuhs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumshs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMSHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmsumshs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+;; max
+
+(define_insn "umax<mode>3"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vmaxu<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vmaxs<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (smax:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                   (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vmaxfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "umin<mode>3"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vminu<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vmins<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (smin:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                   (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vminfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "altivec_vmhaddshs"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V8HI 3 "register_operand" "v")]
+		     UNSPEC_VMHADDSHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmhaddshs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmhraddshs"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V8HI 3 "register_operand" "v")]
+		     UNSPEC_VMHRADDSHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmhraddshs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmladduhm"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V8HI 3 "register_operand" "v")]
+		     UNSPEC_VMLADDUHM))]
+  "TARGET_ALTIVEC"
+  "vmladduhm %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_expand "altivec_vmrghb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
+                     GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
+		     GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
+		     GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
+                     GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
+		     GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
+		     GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrghb_internal"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "v")
+	    (match_operand:V16QI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)])))]
+  "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghb %0,%1,%2";
+  else
+    return "vmrglb %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghb_direct"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+                       (match_operand:V16QI 2 "register_operand" "v")]
+		      UNSPEC_VMRGH_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vmrghb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vmrghh"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
+                     GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
+                     GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrghh_internal"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "v")
+	    (match_operand:V8HI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghh %0,%1,%2";
+  else
+    return "vmrglh %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghh_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMRGH_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vmrghh %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vmrghw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrghw_internal"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghw %0,%1,%2";
+  else
+    return "vmrglw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghw_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VMRGH_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vmrghw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vmrghsf"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "v")
+	    (match_operand:V4SF 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghw %0,%1,%2";
+  else
+    return "vmrglw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vmrglb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
+                     GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
+		     GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
+		     GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
+                     GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
+		     GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
+		     GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrglb_internal"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "v")
+	    (match_operand:V16QI 2 "register_operand" "v"))
+	  (parallel [(const_int  8) (const_int 24)
+		     (const_int  9) (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglb %0,%1,%2";
+  else
+    return "vmrghb %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglb_direct"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+    		       (match_operand:V16QI 2 "register_operand" "v")]
+                      UNSPEC_VMRGL_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vmrglb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vmrglh"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
+                     GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
+                     GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrglh_internal"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "v")
+	    (match_operand:V8HI 2 "register_operand" "v"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglh %0,%1,%2";
+  else
+    return "vmrghh %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglh_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMRGL_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vmrglh %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vmrglw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrglw_internal"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglw %0,%1,%2";
+  else
+    return "vmrghw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglw_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+	              (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VMRGL_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vmrglw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vmrglsf"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (vec_select:V4SF
+	 (vec_concat:V8SF
+	   (match_operand:V4SF 1 "register_operand" "v")
+	   (match_operand:V4SF 2 "register_operand" "v"))
+	 (parallel [(const_int 2) (const_int 6)
+		    (const_int 3) (const_int 7)])))]
+  "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglw %0,%1,%2";
+  else
+    return "vmrghw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_P8_VECTOR"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrgew %0,%1,%2";
+  else
+    return "vmrgow %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_P8_VECTOR"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrgow %0,%1,%2";
+  else
+    return "vmrgew %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_expand "vec_widen_umult_even_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_even_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_even_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_even_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_odd_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_odd_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "altivec_vmuleub"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULEUB))]
+  "TARGET_ALTIVEC"
+  "vmuleub %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmuloub"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULOUB))]
+  "TARGET_ALTIVEC"
+  "vmuloub %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesb"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULESB))]
+  "TARGET_ALTIVEC"
+  "vmulesb %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosb"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULOSB))]
+  "TARGET_ALTIVEC"
+  "vmulosb %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmuleuh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULEUH))]
+  "TARGET_ALTIVEC"
+  "vmuleuh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULOUH))]
+  "TARGET_ALTIVEC"
+  "vmulouh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULESH))]
+  "TARGET_ALTIVEC"
+  "vmulesh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULOSH))]
+  "TARGET_ALTIVEC"
+  "vmulosh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+
+;; Vector pack/unpack
+(define_insn "altivec_vpkpx"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VPKPX))]
+  "TARGET_ALTIVEC"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpkpx %0,%1,%2\";
+    else
+      return \"vpkpx %0,%2,%1\";
+  }"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpks<VI_char>ss"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_SIGN_SAT))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpks<VI_char>ss %0,%1,%2\";
+    else
+      return \"vpks<VI_char>ss %0,%2,%1\";
+  }"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpks<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_UNS_SAT))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpks<VI_char>us %0,%1,%2\";
+    else
+      return \"vpks<VI_char>us %0,%2,%1\";
+  }"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpku<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_SAT))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpku<VI_char>us %0,%1,%2\";
+    else
+      return \"vpku<VI_char>us %0,%2,%1\";
+  }"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpku<VI_char>um"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpku<VI_char>um %0,%1,%2\";
+    else
+      return \"vpku<VI_char>um %0,%2,%1\";
+  }"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpku<VI_char>um_direct"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_MOD_DIRECT))]
+  "<VI_unit>"
+  "*
+  {
+    if (BYTES_BIG_ENDIAN)
+      return \"vpku<VI_char>um %0,%1,%2\";
+    else
+      return \"vpku<VI_char>um %0,%2,%1\";
+  }"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vrl<VI_char>"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vrl<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsl"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSLV4SI))]
+  "TARGET_ALTIVEC"
+  "vsl %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vslo"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSLO))]
+  "TARGET_ALTIVEC"
+  "vslo %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vsl<VI_char>"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vsl<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_vsr<VI_char>"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vsr<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_vsra<VI_char>"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
+  "vsra<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsr"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSR))]
+  "TARGET_ALTIVEC"
+  "vsr %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsro"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSRO))]
+  "TARGET_ALTIVEC"
+  "vsro %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsum4ubs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUM4UBS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsum4ubs %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vsum4s<VI_char>s"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUM4S))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsum4s<VI_char>s %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+;; FIXME: For the following two patterns, the scratch should only be
+;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should
+;; be emitted separately.
+(define_insn "altivec_vsum2sws"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUM2SWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vsum2sws %0,%1,%2";
+  else
+    return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4";
+}
+  [(set_attr "type" "veccomplex")
+   (set (attr "length")
+     (if_then_else
+       (match_test "VECTOR_ELT_ORDER_BIG")
+       (const_string "4")
+       (const_string "12")))])
+
+(define_insn "altivec_vsumsws"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUMSWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vsumsws %0,%1,%2";
+  else
+    return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvsldoi %0,%3,%3,12";
+}
+  [(set_attr "type" "veccomplex")
+   (set (attr "length")
+     (if_then_else
+       (match_test "(VECTOR_ELT_ORDER_BIG)")
+       (const_string "4")
+       (const_string "12")))])
+
+(define_insn "altivec_vsumsws_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUMSWS_DIRECT))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsumsws %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_expand "altivec_vspltb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (15 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (QImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V16QImode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vspltb_internal"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (vec_duplicate:V16QI
+	 (vec_select:QI (match_operand:V16QI 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
+  "TARGET_ALTIVEC"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (15 - INTVAL (operands[2]));
+
+  return "vspltb %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltb_direct"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSPLT_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vspltb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vsplth"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (7 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (HImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V8HImode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vsplth_internal"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(vec_duplicate:V8HI
+	 (vec_select:HI (match_operand:V8HI 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
+  "TARGET_ALTIVEC"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (7 - INTVAL (operands[2]));
+
+  return "vsplth %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsplth_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                     UNSPEC_VSPLT_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vsplth %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vspltw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (SImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V4SImode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vspltw_internal"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_duplicate:V4SI
+	 (vec_select:SI (match_operand:V4SI 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
+  "TARGET_ALTIVEC"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  return "vspltw %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltw_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                     UNSPEC_VSPLT_DIRECT))]
+  "TARGET_ALTIVEC"
+  "vspltw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vspltsf"
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (SFmode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V4SFmode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vspltsf_internal"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(vec_duplicate:V4SF
+	 (vec_select:SF (match_operand:V4SF 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  return "vspltw %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltis<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+	(vec_duplicate:VI
+	 (match_operand:QI 1 "s5bit_cint_operand" "i")))]
+  "TARGET_ALTIVEC"
+  "vspltis<VI_char> %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vrfiz"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vrfiz %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_expand "altivec_vperm_<mode>"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM))]
+  "TARGET_ALTIVEC"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_vec_perm_le (operands);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_vperm_<mode>_internal"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "altivec_vperm_<mode>_uns"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM_UNS))]
+  "TARGET_ALTIVEC"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_vec_perm_le (operands);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_vperm_<mode>_uns_internal"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM_UNS))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "vec_permv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
+		       (match_operand:V16QI 2 "register_operand" "")
+		       (match_operand:V16QI 3 "register_operand" "")]
+		      UNSPEC_VPERM))]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN) {
+    altivec_expand_vec_perm_le (operands);
+    DONE;
+  }
+})
+
+(define_expand "vec_perm_constv16qi"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:V16QI 2 "register_operand" "")
+   (match_operand:V16QI 3 "" "")]
+  "TARGET_ALTIVEC"
+{
+  if (altivec_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "altivec_vrfip"		; ceil
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_FRIP))]
+  "TARGET_ALTIVEC"
+  "vrfip %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vrfin"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_VRFIN))]
+  "TARGET_ALTIVEC"
+  "vrfin %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*altivec_vrfim"		; floor
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_FRIM))]
+  "TARGET_ALTIVEC"
+  "vrfim %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vcfux"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SI 1 "register_operand" "v")
+	              (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCFUX))]
+  "TARGET_ALTIVEC"
+  "vcfux %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vcfsx"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SI 1 "register_operand" "v")
+	              (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCFSX))]
+  "TARGET_ALTIVEC"
+  "vcfsx %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vctuxs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCTUXS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vctuxs %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vctsxs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCTSXS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vctsxs %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vlogefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_VLOGEFP))]
+  "TARGET_ALTIVEC"
+  "vlogefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vexptefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_VEXPTEFP))]
+  "TARGET_ALTIVEC"
+  "vexptefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*altivec_vrsqrtefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_RSQRT))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vrsqrtefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vrefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_FRES))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vrefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_expand "altivec_copysign_v4sf3"
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))
+   (use (match_operand:V4SF 2 "register_operand" ""))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "
+{
+  rtx mask = gen_reg_rtx (V4SImode);
+  rtvec v = rtvec_alloc (4);
+  unsigned HOST_WIDE_INT mask_val = ((unsigned HOST_WIDE_INT)1) << 31;
+
+  RTVEC_ELT (v, 0) = GEN_INT (mask_val);
+  RTVEC_ELT (v, 1) = GEN_INT (mask_val);
+  RTVEC_ELT (v, 2) = GEN_INT (mask_val);
+  RTVEC_ELT (v, 3) = GEN_INT (mask_val);
+
+  emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v)));
+  emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2],
+				     gen_lowpart (V4SFmode, mask)));
+  DONE;
+}")
+
+(define_insn "altivec_vsldoi_<mode>"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:QI 3 "immediate_operand" "i")]
+		  UNSPEC_VLSDOI))]
+  "TARGET_ALTIVEC"
+  "vsldoi %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkhs<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_HI_SIGN))]
+  "<VI_unit>"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupkhs<VU_char> %0,%1";
+  else
+    return "vupkls<VU_char> %0,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vupkhs<VU_char>_direct"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
+  "<VI_unit>"
+  "vupkhs<VU_char> %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkls<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_LO_SIGN))]
+  "<VI_unit>"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupkls<VU_char> %0,%1";
+  else
+    return "vupkhs<VU_char> %0,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vupkls<VU_char>_direct"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_LO_SIGN_DIRECT))]
+  "<VI_unit>"
+  "vupkls<VU_char> %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkhpx"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+		     UNSPEC_VUPKHPX))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupkhpx %0,%1";
+  else
+    return "vupklpx %0,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupklpx"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+		     UNSPEC_VUPKLPX))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupklpx %0,%1";
+  else
+    return "vupkhpx %0,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Compare vectors producing a vector result and a predicate, setting CR6 to
+;; indicate a combined status
+(define_insn "*altivec_vcmpequ<VI_char>_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(eq:VI2 (match_dup 1)
+		(match_dup 2)))]
+  "<VI_unit>"
+  "vcmpequ<VI_char>. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgts<VI_char>_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gt:VI2 (match_dup 1)
+		(match_dup 2)))]
+  "<VI_unit>"
+  "vcmpgts<VI_char>. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgtu<VI_char>_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
+			    (match_operand:VI2 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gtu:VI2 (match_dup 1)
+		 (match_dup 2)))]
+  "<VI_unit>"
+  "vcmpgtu<VI_char>. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpeqfp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v")
+			   (match_operand:V4SF 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+	(eq:V4SF (match_dup 1)
+		 (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpeqfp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgtfp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(gt:CC (match_operand:V4SF 1 "register_operand" "v")
+			   (match_operand:V4SF 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+	(gt:V4SF (match_dup 1)
+		 (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgtfp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgefp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(ge:CC (match_operand:V4SF 1 "register_operand" "v")
+			   (match_operand:V4SF 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+	(ge:V4SF (match_dup 1)
+		 (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgefp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "altivec_vcmpbfp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(match_operand:V4SF 1 "register_operand" "v")
+		    (match_operand:V4SF 2 "register_operand" "v")]
+		   UNSPEC_VCMPBFP))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_dup 1)
+                      (match_dup 2)] 
+                      UNSPEC_VCMPBFP))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+  "vcmpbfp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "altivec_mtvscr"
+  [(set (reg:SI 110)
+	(unspec_volatile:SI
+	 [(match_operand:V4SI 0 "register_operand" "v")] UNSPECV_MTVSCR))]
+  "TARGET_ALTIVEC"
+  "mtvscr %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_mfvscr"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(unspec_volatile:V8HI [(reg:SI 110)] UNSPECV_MFVSCR))]
+  "TARGET_ALTIVEC"
+  "mfvscr %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dssall"
+  [(unspec_volatile [(const_int 0)] UNSPECV_DSSALL)]
+  "TARGET_ALTIVEC"
+  "dssall"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dss"
+  [(unspec_volatile [(match_operand:QI 0 "immediate_operand" "i")]
+		    UNSPECV_DSS)]
+  "TARGET_ALTIVEC"
+  "dss %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dst"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DST)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dst %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dstt"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTT)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dstt %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dstst"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTST)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dstst %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dststt"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTSTT)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dststt %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_lvsl"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
+		      UNSPEC_LVSL))]
+  "TARGET_ALTIVEC"
+  "lvsl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvsr"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
+		      UNSPEC_LVSR))]
+  "TARGET_ALTIVEC"
+  "lvsr %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_expand "build_vector_mask_for_load"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+	(unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_LVSR))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx addr;
+  rtx temp;
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  addr = XEXP (operands[1], 0);
+  temp = gen_reg_rtx (GET_MODE (addr));
+  emit_insn (gen_rtx_SET (VOIDmode, temp, 
+			  gen_rtx_NEG (GET_MODE (addr), addr)));
+  emit_insn (gen_altivec_lvsr (operands[0], 
+			       replace_equiv_address (operands[1], temp)));
+  DONE;
+}")
+
+;; Parallel some of the LVE* and STV*'s with unspecs because some have
+;; identical rtl but different instructions-- and gcc gets confused.
+
+(define_expand "altivec_lve<VI_char>x"
+  [(parallel
+    [(set (match_operand:VI 0 "register_operand" "=v")
+	  (match_operand:VI 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVE)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVE);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_lve<VI_char>x_internal"
+  [(parallel
+    [(set (match_operand:VI 0 "register_operand" "=v")
+	  (match_operand:VI 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVE)])]
+  "TARGET_ALTIVEC"
+  "lve<VI_char>x %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*altivec_lvesfx"
+  [(parallel
+    [(set (match_operand:V4SF 0 "register_operand" "=v")
+	  (match_operand:V4SF 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVE)])]
+  "TARGET_ALTIVEC"
+  "lvewx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_expand "altivec_lvxl_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_SET_VSCR);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_lvxl_<mode>_internal"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
+  "TARGET_ALTIVEC"
+  "lvx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_expand "altivec_lvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVX)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVX);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_lvx_<mode>_internal"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVX)])]
+  "TARGET_ALTIVEC"
+  "lvx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_expand "altivec_stvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVX)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVX);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_stvx_<mode>_internal"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVX)])]
+  "TARGET_ALTIVEC"
+  "stvx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_expand "altivec_stvxl_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVXL)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVXL);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_stvxl_<mode>_internal"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVXL)])]
+  "TARGET_ALTIVEC"
+  "stvxl %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_expand "altivec_stve<VI_char>x"
+  [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
+	(unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_stvex_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVE);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_stve<VI_char>x_internal"
+  [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
+	(unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
+  "TARGET_ALTIVEC"
+  "stve<VI_char>x %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*altivec_stvesfx"
+  [(set (match_operand:SF 0 "memory_operand" "=Z")
+	(unspec:SF [(match_operand:V4SF 1 "register_operand" "v")] UNSPEC_STVE))]
+  "TARGET_ALTIVEC"
+  "stvewx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+;; Generate
+;;    xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
+;;    vsubu?m SCRATCH2,SCRATCH1,%1
+;;    vmaxs? %0,%1,SCRATCH2"
+(define_expand "abs<mode>2"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4)
+        (minus:VI2 (match_dup 2)
+		   (match_operand:VI2 1 "register_operand" "v")))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_dup 1) (match_dup 4)))]
+  "<VI_unit>"
+{
+  int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  /* Create an all 0 constant.  */
+  for (i = 0; i < n_elt; ++i)
+    RTVEC_ELT (v, i) = const0_rtx;
+
+  operands[2] = gen_reg_rtx (<MODE>mode);
+  operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+})
+
+;; Generate
+;;    vspltisw SCRATCH1,-1
+;;    vslw SCRATCH2,SCRATCH1,SCRATCH1
+;;    vandc %0,%1,SCRATCH2
+(define_expand "altivec_absv4sf2"
+  [(set (match_dup 2)
+	(vec_duplicate:V4SI (const_int -1)))
+   (set (match_dup 3)
+        (ashift:V4SI (match_dup 2) (match_dup 2)))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+        (and:V4SF (not:V4SF (subreg:V4SF (match_dup 3) 0))
+                  (match_operand:V4SF 1 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+{
+  operands[2] = gen_reg_rtx (V4SImode);
+  operands[3] = gen_reg_rtx (V4SImode);
+})
+
+;; Generate
+;;    vspltis? SCRATCH0,0
+;;    vsubs?s SCRATCH2,SCRATCH1,%1
+;;    vmaxs? %0,%1,SCRATCH2"
+(define_expand "altivec_abss_<mode>"
+  [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
+   (parallel [(set (match_dup 3)
+		   (unspec:VI [(match_dup 2)
+			       (match_operand:VI 1 "register_operand" "v")]
+			      UNSPEC_VSUBS))
+              (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))])
+   (set (match_operand:VI 0 "register_operand" "=v")
+        (smax:VI (match_dup 1) (match_dup 3)))]
+  "TARGET_ALTIVEC"
+{
+  operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
+  operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+})
+
+(define_expand "reduc_splus_<mode>"
+  [(set (match_operand:VIshort 0 "register_operand" "=v")
+        (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")]
+			UNSPEC_REDUC_PLUS))]
+  "TARGET_ALTIVEC"
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx vtmp1 = gen_reg_rtx (V4SImode);
+  rtx dest = gen_lowpart (V4SImode, operands[0]);
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+  emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
+  emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
+  DONE;
+})
+
+(define_expand "reduc_uplus_v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_REDUC_PLUS))]
+  "TARGET_ALTIVEC"
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx vtmp1 = gen_reg_rtx (V4SImode);
+  rtx dest = gen_lowpart (V4SImode, operands[0]);
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+  emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
+  emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
+  DONE;
+})
+
+(define_expand "neg<mode>2"
+  [(use (match_operand:VI 0 "register_operand" ""))
+   (use (match_operand:VI 1 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero;
+
+  vzero = gen_reg_rtx (GET_MODE (operands[0]));
+  emit_insn (gen_altivec_vspltis<VI_char> (vzero, const0_rtx));
+  emit_insn (gen_sub<mode>3 (operands[0], vzero, operands[1])); 
+  
+  DONE;
+}")
+
+(define_expand "udot_prod<mode>"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")  
+                                 (match_operand:VIshort 2 "register_operand" "v")] 
+                                UNSPEC_VMSUMU)))]
+  "TARGET_ALTIVEC"
+  "
+{  
+  emit_insn (gen_altivec_vmsumu<VI_char>m (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+   
+(define_expand "sdot_prodv8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                                 (match_operand:V8HI 2 "register_operand" "v")]
+                                UNSPEC_VMSUMSHM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 2 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")]
+                                UNSPEC_VMSUMU)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vones = gen_reg_rtx (GET_MODE (operands[1]));
+
+  emit_insn (gen_altivec_vspltis<VI_char> (vones, const1_rtx));
+  emit_insn (gen_altivec_vmsumu<VI_char>m (operands[0], operands[1], vones, operands[2]));
+  DONE;
+}")
+
+(define_expand "widen_ssumv16qi3"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 2 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")]
+                                UNSPEC_VMSUMM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vones = gen_reg_rtx (V16QImode);
+
+  emit_insn (gen_altivec_vspltisb (vones, const1_rtx));
+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1], vones, operands[2]));
+  DONE;
+}")
+
+(define_expand "widen_ssumv8hi3"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 2 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                                UNSPEC_VMSUMSHM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vones = gen_reg_rtx (V8HImode);
+
+  emit_insn (gen_altivec_vspltish (vones, const1_rtx));
+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], vones, operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
+  "<VI_unit>"
+  "")
+
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_LO_SIGN_DIRECT))]
+  "<VI_unit>"
+  "")
+
+(define_insn "vperm_v8hiv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                   (match_operand:V4SI 2 "register_operand" "v")
+                   (match_operand:V16QI 3 "register_operand" "v")]
+                  UNSPEC_VPERMSI))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vperm_v16qiv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                   (match_operand:V8HI 2 "register_operand" "v")
+                   (match_operand:V16QI 3 "register_operand" "v")]
+                  UNSPEC_VPERMHI))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+                     UNSPEC_VUPKHUB))]
+  "TARGET_ALTIVEC"      
+  "
+{  
+  rtx vzero = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
+   
+  emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
+   
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 :  7);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ?  0 : 16);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ? 16 :  6);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  1 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 :  5);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ?  2 : 16);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 16 :  4);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ?  3 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 :  3);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ?  4 : 16);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 :  2);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ?  5 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  1);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ?  6 : 16);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  0);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                     UNSPEC_VUPKHUH))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ 
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 :  7);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ? 17 :  6);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ?  0 : 17);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  1 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 :  5);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 17 :  4);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ?  2 : 17);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ?  3 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 :  3);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 17 :  2);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ?  4 : 17);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ?  5 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  1);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 :  0);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ?  6 : 17);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+                     UNSPEC_VUPKLUB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
+
+  emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
+
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ?  8 : 16);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  9 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  9);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  8);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                     UNSPEC_VUPKLUH))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ 
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ?  8 : 17);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  9 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  9);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 :  8);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHUB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLUB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHSB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLSB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHUH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLUH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHSH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLSH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve));
+    }
+  DONE;
+}")
+
+(define_expand "vec_pack_trunc_<mode>"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+        (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+                      UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "")
+
+(define_expand "altivec_negv4sf2"
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx neg0;
+
+  /* Generate [-0.0, -0.0, -0.0, -0.0].  */
+  neg0 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
+  emit_insn (gen_vashlv4si3 (neg0, neg0, neg0));
+
+  /* XOR */
+  emit_insn (gen_xorv4sf3 (operands[0],
+			   gen_lowpart (V4SFmode, neg0), operands[1])); 
+    
+  DONE;
+}")
+
+;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
+;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
+(define_insn "altivec_lvlx"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")]
+		      UNSPEC_LVLX))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvlx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvlxl"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")]
+		      UNSPEC_LVLXL))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvlxl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvrx"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")]
+		      UNSPEC_LVRX))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvrx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvrxl"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")]
+		      UNSPEC_LVRXL))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvrxl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_stvlx"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVLX)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvlx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvlxl"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVLXL)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvlxl %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvrx"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVRX)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvrx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvrxl"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVRXL)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvrxl %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_expand "vec_unpacks_float_hi_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKHS_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfsx (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_float_lo_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKLS_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfsx (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_float_hi_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKHU_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_float_lo_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKLU_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+	(clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vclz<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vpopcnt<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_VGBBD))]
+  "TARGET_P8_VECTOR"
+  "vgbbd %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+
+;; 128-bit binary integer arithmetic
+;; We have a special container type (V1TImode) to allow operations using the
+;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without
+;; having to worry about the register allocator deciding GPRs are better.
+
+(define_insn "altivec_vadduqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(plus:V1TI (match_operand:V1TI 1 "register_operand" "v")
+		   (match_operand:V1TI 2 "register_operand" "v")))]
+  "TARGET_VADDUQM"
+  "vadduqm %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddcuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")]
+		     UNSPEC_VADDCUQ))]
+  "TARGET_VADDUQM"
+  "vaddcuq %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubuqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(minus:V1TI (match_operand:V1TI 1 "register_operand" "v")
+		    (match_operand:V1TI 2 "register_operand" "v")))]
+  "TARGET_VADDUQM"
+  "vsubuqm %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubcuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")]
+		     UNSPEC_VSUBCUQ))]
+  "TARGET_VADDUQM"
+  "vsubcuq %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddeuqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VADDEUQM))]
+  "TARGET_VADDUQM"
+  "vaddeuqm %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddecuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VADDECUQ))]
+  "TARGET_VADDUQM"
+  "vaddecuq %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubeuqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		   UNSPEC_VSUBEUQM))]
+  "TARGET_VADDUQM"
+  "vsubeuqm %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubecuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VSUBECUQ))]
+  "TARGET_VADDUQM"
+  "vsubecuq %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
diff --git a/gcc-4.9/gcc/config/rs6000/biarch64.h b/gcc-4.9/gcc/config/rs6000/biarch64.h
new file mode 100644
index 000000000..b4308e67e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/biarch64.h
@@ -0,0 +1,26 @@
+/* Definitions of target machine for GNU compiler, for 32/64 bit powerpc.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify this in a cover file to provide bi-architecture (32/64) support.  */
+#define RS6000_BI_ARCH 1
diff --git a/gcc-4.9/gcc/config/rs6000/cell.md b/gcc-4.9/gcc/config/rs6000/cell.md
new file mode 100644
index 000000000..642d5af71
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/cell.md
@@ -0,0 +1,399 @@
+;; Scheduling description for cell processor.
+;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+;; Contributed by Sony Computer Entertainment, Inc.,
+
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: BE BOOK4 (/sfs/enc/doc/PPU_BookIV_DD3.0_latest.pdf)
+
+;; BE Architecture *DD3.0 and DD3.1*
+;; This file simulate PPU processor unit backend of pipeline, maualP24. 
+;; manual P27, stall and flush points
+;; IU, XU, VSU, dispatcher decodes and dispatch 2 insns per cycle in program
+;;  order, the grouped address are aligned by 8
+;; This file only simulate one thread situation
+;; XU executes all fixed point insns(3 units, a simple alu, a complex unit,
+;;   and load/store unit)
+;; VSU executes all scalar floating points insn(a float unit),
+;;   VMX insns(VMX unit, 4 sub units, simple, permute, complex, floating point)
+
+;; Dual issue combination
+
+;;	FXU	LSU	BR 	        VMX	               VMX
+;;                             (sx,cx,vsu_fp,fp_arith)    (perm,vsu_ls,fp_ls)
+;;FXU	X
+;;LSU		X               	X               	X	
+;;BR			X
+;;VMX(sx,cx,vsu_fp,fp_arth)		X
+;;VMX(perm,vsu_ls, fp_ls)					X
+;;    X are illegal combination.
+
+;; Dual issue exceptions:
+;;(1) nop-pipelined FXU instr in slot 0 
+;;(2) non-pipelined FPU inst in slot 0
+;; CSI instr(contex-synchronizing insn)
+;; Microcode insn
+
+;; BRU unit: bru(none register stall), bru_cr(cr register stall)
+;; VSU unit: vus(vmx simple), vup(vmx permute), vuc(vmx complex),
+;;  vuf(vmx float), fpu(floats). fpu_div is hypothetical, it is for
+;;  nonpipelined simulation
+;; micr insns will stall at least 7 cycles to get the first instr from ROM,
+;;  micro instructions are not dual issued. 
+
+;; slot0 is older than slot1
+;; non-pipelined insn need to be in slot1 to avoid 1cycle stall
+
+;; There different stall point
+;; IB2, only stall one thread if stall here, so try to stall here as much as
+;; we can 
+;; condition(1) insert nop, OR and ORI instruction form 
+;; condition(2) flush happens, in case of: RAW, WAW, D-ERAT miss, or
+;;   CR0-access while stdcx, or stwcx
+;; IS2 stall ;; Page91 for details
+;; VQ8 stall
+;; IS2 stall can be activated by VQ8 stall and trying to issue a vsu instr to
+;;  the vsu issue queue
+
+;;(define_automaton "cellxu")
+
+;;(define_cpu_unit "fxu_cell,lsu_cell,bru_cell,vsu1_cell,vsu2_cell" "cellxu")
+
+;; ndfa
+(define_automaton "cellxu,cellvsu,cellbru,cell_mis")
+
+(define_cpu_unit "fxu_cell,lsu_cell" "cellxu")
+(define_cpu_unit "bru_cell" "cellbru")
+(define_cpu_unit "vsu1_cell,vsu2_cell" "cellvsu")
+
+(define_cpu_unit "slot0,slot1" "cell_mis")
+
+(absence_set "slot0" "slot1")
+
+(define_reservation "nonpipeline" "fxu_cell+lsu_cell+vsu1_cell+vsu2_cell")
+(define_reservation "slot01" "slot0|slot1")
+
+
+;; Load/store
+;; lmw, lswi, lswx are only generated for optimize for space, MC,
+;;   these instr are not simulated
+(define_insn_reservation "cell-load" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell")
+
+;; ldux, ldu, lbzux, lbzu, hardware breaks it down to two instrs,
+;;  if with 32bytes alignment, CMC
+(define_insn_reservation "cell-load-ux" 2
+  (and (eq_attr "type" "load_ux,load_u")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell+lsu_cell")
+
+;; lha, lhax, lhau, lhaux, lwa, lwax, lwaux, MC, latency unknown
+;;   11/7, 11/8, 11/12
+(define_insn_reservation "cell-load-ext" 2
+  (and (eq_attr "type" "load_ext,load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "cell")) 
+  "slot01,fxu_cell+lsu_cell")
+
+;;lfs,lfsx,lfd,lfdx, 1 cycle
+(define_insn_reservation "cell-fpload" 1
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+lsu_cell+slot01")
+
+;; lfsu,lfsux,lfdu,lfdux 1cycle(fpr) 2 cycle(gpr)
+(define_insn_reservation "cell-fpload-update" 1
+  (and (eq_attr "type" "fpload,fpload_u,fpload_ux")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+vsu2_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-vecload" 2
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu2_cell+lsu_cell")
+
+;;st? stw(MC)
+(define_insn_reservation "cell-store" 1
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "cell"))
+  "lsu_cell+slot01")
+
+;;stdux, stdu, (hardware breaks into store and add) 2 for update reg
+(define_insn_reservation "cell-store-update" 1
+  (and (eq_attr "type" "store_ux,store_u")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-fpstore" 1
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-fpstore-update" 1
+  (and (eq_attr "type" "fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+fxu_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-vecstore" 1
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+lsu_cell+slot01")
+
+;; Integer latency is 2 cycles
+(define_insn_reservation "cell-integer" 2
+  (and (eq_attr "type" "integer,insert_dword,shift,trap,\
+			var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell")
+
+;; Two integer latency is 4 cycles
+(define_insn_reservation "cell-two" 4
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell,fxu_cell*2")
+
+;; Three integer latency is 6 cycles
+(define_insn_reservation "cell-three" 6
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell,fxu_cell*4")
+
+;; rlwimi, alter cr0  
+(define_insn_reservation "cell-insert" 2
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "cell"))
+ "slot01,fxu_cell")
+
+;; cmpi, cmpli, cmpla, add, addo, sub, subo, alter cr0 
+(define_insn_reservation "cell-cmp" 1
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+slot01")
+
+;; add, addo, sub, subo, alter cr0, rldcli, rlwinm 
+(define_insn_reservation "cell-fast-cmp" 2
+  (and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\
+			    var_delayed_compare")
+            (eq_attr "cpu" "cell"))
+        (eq_attr "cell_micro" "not"))
+  "slot01,fxu_cell")
+
+(define_insn_reservation "cell-cmp-microcoded" 9
+  (and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\
+			    var_delayed_compare")
+            (eq_attr "cpu" "cell"))
+        (eq_attr "cell_micro" "always"))
+  "slot0+slot1,fxu_cell,fxu_cell*7")
+
+;; mulld
+(define_insn_reservation "cell-lmul" 15
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*13")
+
+;; mulld. is microcoded
+(define_insn_reservation "cell-lmul-cmp" 22
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "cell"))
+  "slot0+slot1,nonpipeline,nonpipeline*20")
+
+;; mulli, 6 cycles
+(define_insn_reservation "cell-imul23" 6
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*4")
+
+;; mullw, 9
+(define_insn_reservation "cell-imul" 9
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*7")
+ 
+;; divide
+(define_insn_reservation "cell-idiv" 32
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*30")
+
+(define_insn_reservation "cell-ldiv" 64
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*62")
+
+;;mflr and mfctr are pipelined
+(define_insn_reservation "cell-mfjmpr" 1
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "cell"))
+  "slot01+bru_cell")
+
+;;mtlr and mtctr,
+;;mtspr fully pipelined 
+(define_insn_reservation "cell-mtjmpr" 1
+ (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot01")
+
+;; Branches
+;; b, ba, bl, bla, unconditional branch always predicts correctly n/a latency
+;; bcctr, bcctrl, latency 2, actually adjust by be to 4
+(define_insn_reservation "cell-branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot1")
+
+(define_insn_reservation "cell-branchreg" 1
+  (and (eq_attr "type" "jmpreg")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot1")
+
+;; cr hazard
+;; page 90, special cases for CR hazard, only one instr can access cr per cycle
+;; if insn reads CR following a stwcx, pipeline stall till stwcx finish
+(define_insn_reservation "cell-crlogical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot01")
+
+;; mfcrf and mfcr is about 34 cycles and nonpipelined
+(define_insn_reservation "cell-mfcr" 34
+  (and (eq_attr "type" "mfcrf,mfcr")
+       (eq_attr "cpu" "cell"))
+   "slot1,nonpipeline,nonpipeline*32")
+
+;; mtcrf (1 field)
+(define_insn_reservation "cell-mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+slot01")
+
+; Basic FP latency is 10 cycles, thoughput is 1/cycle
+(define_insn_reservation "cell-fp" 10
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*8")
+
+(define_insn_reservation "cell-fpcompare" 1
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "cell"))
+  "vsu1_cell+slot01")
+
+;; sdiv thoughput 1/74, not pipelined but only in the FPU
+(define_insn_reservation "cell-sdiv" 74
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*72")
+
+;; fsqrt thoughput 1/84, not pipelined but only in the FPU
+(define_insn_reservation "cell-sqrt" 84
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*82")
+
+; VMX
+(define_insn_reservation "cell-vecsimple" 4
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*2")
+
+;; mult, div, madd
+(define_insn_reservation "cell-veccomplex" 10
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*8")
+
+;; TODO: add support for recording instructions
+(define_insn_reservation "cell-veccmp" 4
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*2")
+
+(define_insn_reservation "cell-vecfloat" 12
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*10")
+
+(define_insn_reservation "cell-vecperm" 4
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu2_cell,vsu2_cell*2")
+
+;; New for 4.2, syncs
+
+(define_insn_reservation "cell-sync" 11
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+(define_insn_reservation "cell-isync" 11
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+(define_insn_reservation "cell-load_l" 11
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+(define_insn_reservation "cell-store_c" 11
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+;; RAW register dependency
+
+;; addi r3, r3, 1
+;; lw r4,offset(r3)
+;; there are 5 cycle deplay for r3 bypassing
+;; there are 5 cycle delay for a dependent load after a load
+(define_bypass 5 "cell-integer" "cell-load")
+(define_bypass 5 "cell-integer" "cell-load-ext")
+(define_bypass 5 "cell-load,cell-load-ext" "cell-load,cell-load-ext")
+
+;; there is a 6 cycle delay after a fp compare until you can use the cr.
+(define_bypass 6 "cell-fpcompare" "cell-branch,cell-branchreg,cell-mfcr,cell-crlogical")
+
+;; VXU float RAW
+(define_bypass 11 "cell-vecfloat" "cell-vecfloat")
+
+;; VXU and FPU
+(define_bypass 6 "cell-veccomplex" "cell-vecsimple")
+;;(define_bypass 6 "cell-veccompare" "cell-branch,cell-branchreg")
+(define_bypass 3 "cell-vecfloat" "cell-veccomplex")
+; this is not correct, 
+;;  this is a stall in general and not dependent on result
+(define_bypass 13 "cell-vecstore" "cell-fpstore")
+; this is not correct, this can never be true, not dependent on result
+(define_bypass 7 "cell-fp" "cell-fpload")
+;; vsu1 should avoid writing to the same target register as vsu2 insn
+;;   within 12 cycles. 
+
+;; WAW hazard
+
+;; the target of VSU estimate should not be reused within 10 dispatch groups
+;; the target of VSU float should not be reused within 8 dispatch groups
+;; the target of VSU complex should not be reused within 5 dispatch groups
+;; FP LOAD should not reuse an FPU Arithmetic target with 6 dispatch gropus
+
+;; mtctr-bcctr/bcctrl, branch target ctr register shadow update at
+;;  ex4 stage(10 cycles)
+(define_bypass 10 "cell-mtjmpr" "cell-branchreg")
+
+;;Things are not simulated:
+;; update instruction, update address gpr are not simulated
+;; vrefp, vrsqrtefp have latency(14), currently simulated as 12 cycle float
+;;  insns
+
diff --git a/gcc-4.9/gcc/config/rs6000/constraints.md b/gcc-4.9/gcc/config/rs6000/constraints.md
new file mode 100644
index 000000000..50fb101e8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/constraints.md
@@ -0,0 +1,242 @@
+;; Constraint definitions for RS6000
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Available constraint letters: "e", "k", "q", "u", "A", "B", "C", "D"
+
+;; Register constraints
+
+(define_register_constraint "f" "rs6000_constraints[RS6000_CONSTRAINT_f]"
+  "@internal")
+
+(define_register_constraint "d" "rs6000_constraints[RS6000_CONSTRAINT_d]"
+  "@internal")
+
+(define_register_constraint "b" "BASE_REGS"
+  "@internal")
+
+(define_register_constraint "h" "SPECIAL_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CTR_REGS"
+  "@internal")
+
+(define_register_constraint "l" "LINK_REGS"
+  "@internal")
+
+(define_register_constraint "v" "ALTIVEC_REGS"
+  "@internal")
+
+(define_register_constraint "x" "CR0_REGS"
+  "@internal")
+
+(define_register_constraint "y" "CR_REGS"
+  "@internal")
+
+(define_register_constraint "z" "CA_REGS"
+  "@internal")
+
+;; Use w as a prefix to add VSX modes
+;; any VSX register
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
+  "Any VSX register if the -mvsx option was used or NO_REGS.")
+
+;; NOTE: For compatibility, "wc" is reserved to represent individual CR bits.
+;; It is currently used for that purpose in LLVM.
+
+(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
+  "VSX vector register to hold vector double data or NO_REGS.")
+
+(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
+  "VSX vector register to hold vector float data or NO_REGS.")
+
+(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]"
+  "If -mmfpgpr was used, a floating point register or NO_REGS.")
+
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
+  "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
+  "VSX register if direct move instructions are enabled, or NO_REGS.")
+
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
+;; direct move directly, and movsf can't to move between the register sets.
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
+(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
+
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
+  "General purpose register if 64-bit instructions are enabled or NO_REGS.")
+
+(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
+  "VSX vector register to hold scalar double values or NO_REGS.")
+
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
+  "VSX vector register to hold 128 bit integer or NO_REGS.")
+
+(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]"
+  "Altivec register to use for float/32-bit int loads/stores  or NO_REGS.")
+
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
+  "Altivec register to use for double loads/stores  or NO_REGS.")
+
+(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
+  "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
+
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
+  "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]"
+  "VSX vector register to hold scalar float values or NO_REGS.")
+
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
+  "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
+
+;; Lq/stq validates the address for load/store quad
+(define_memory_constraint "wQ"
+  "Memory operand suitable for the load/store quad instructions"
+  (match_operand 0 "quad_memory_operand"))
+
+;; Altivec style load/store that ignores the bottom bits of the address
+(define_memory_constraint "wZ"
+  "Indexed or indirect memory operand, ignoring the bottom 4 bits"
+  (match_operand 0 "altivec_indexed_or_indirect_operand"))
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) (ival + 0x8000) < 0x10000")))
+
+(define_constraint "J"
+  "high-order 16 bits nonzero"
+  (and (match_code "const_int")
+       (match_test "(ival & (~ (unsigned HOST_WIDE_INT) 0xffff0000)) == 0")))
+
+(define_constraint "K"
+  "low-order 16 bits nonzero"
+  (and (match_code "const_int")
+       (match_test "(ival & (~ (HOST_WIDE_INT) 0xffff)) == 0")))
+
+(define_constraint "L"
+  "signed 16-bit constant shifted left 16 bits"
+  (and (match_code "const_int")
+       (match_test "((ival & 0xffff) == 0
+		      && (ival >> 31 == -1 || ival >> 31 == 0))")))
+
+(define_constraint "M"
+  "constant greater than 31"
+  (and (match_code "const_int")
+       (match_test "ival > 31")))
+
+(define_constraint "N"
+  "positive constant that is an exact power of two"
+  (and (match_code "const_int")
+       (match_test "ival > 0 && exact_log2 (ival) >= 0")))
+
+(define_constraint "O"
+  "constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P"
+  "constant whose negation is signed 16-bit constant"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ((- ival) + 0x8000) < 0x10000")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "Constant that can be copied into GPR with two insns for DF/DI
+   and one for SF."
+  (and (match_code "const_double")
+       (match_test "num_insns_constant (op, mode)
+		    == (mode == SFmode ? 1 : 2)")))
+
+(define_constraint "H"
+  "DF/DI constant that takes three insns."
+  (and (match_code "const_double")
+       (match_test "num_insns_constant (op, mode) == 3")))
+
+;; Memory constraints
+
+(define_memory_constraint "es"
+  "A ``stable'' memory operand; that is, one which does not include any
+automodification of the base register.  Unlike @samp{m}, this constraint
+can be used in @code{asm} statements that might access the operand
+several times, or that might not access it at all."
+  (and (match_code "mem")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+(define_memory_constraint "Q"
+  "Memory operand that is an offset from a register (it is usually better
+to use @samp{m} or @samp{es} in @code{asm} statements)"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == REG")))
+
+(define_memory_constraint "Y"
+  "memory operand for 8 byte and 16 byte gpr load/store"
+  (and (match_code "mem")
+       (match_operand 0 "mem_operand_gpr")))
+
+(define_memory_constraint "Z"
+  "Memory operand that is an indexed or indirect from a register (it is
+usually better to use @samp{m} or @samp{es} in @code{asm} statements)"
+  (match_operand 0 "indexed_or_indirect_operand"))
+
+;; Address constraints
+
+(define_address_constraint "a"
+  "Indexed or indirect address operand"
+  (match_operand 0 "indexed_or_indirect_address"))
+
+(define_constraint "R"
+  "AIX TOC entry"
+  (match_test "legitimate_constant_pool_address_p (op, QImode, false)"))
+
+;; General constraints
+
+(define_constraint "S"
+  "Constant that can be placed into a 64-bit mask operand"
+  (match_operand 0 "mask64_operand"))
+
+(define_constraint "T"
+  "Constant that can be placed into a 32-bit mask operand"
+  (match_operand 0 "mask_operand"))
+
+(define_constraint "U"
+  "V.4 small data reference"
+  (and (match_test "DEFAULT_ABI == ABI_V4")
+       (match_operand 0 "small_data_operand")))
+
+(define_constraint "t"
+  "AND masks that can be performed by two rldic{l,r} insns
+   (but excluding those that could match other constraints of anddi3)"
+  (and (and (and (match_operand 0 "mask64_2_operand")
+		 (match_test "(fixed_regs[CR0_REGNO]
+			      || !logical_operand (op, DImode))"))
+	    (not (match_operand 0 "mask_operand")))
+       (not (match_operand 0 "mask64_operand"))))
+
+(define_constraint "W"
+  "vector constant that does not require memory"
+  (match_operand 0 "easy_vector_constant"))
+
+(define_constraint "j"
+  "Zero vector constant"
+  (match_test "op == const0_rtx || op == CONST0_RTX (GET_MODE (op))"))
diff --git a/gcc-4.9/gcc/config/rs6000/crypto.md b/gcc-4.9/gcc/config/rs6000/crypto.md
new file mode 100644
index 000000000..b2704a92e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/crypto.md
@@ -0,0 +1,101 @@
+;; Cryptographic instructions added in ISA 2.07
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+  [UNSPEC_VCIPHER
+   UNSPEC_VNCIPHER
+   UNSPEC_VCIPHERLAST
+   UNSPEC_VNCIPHERLAST
+   UNSPEC_VSBOX
+   UNSPEC_VSHASIGMA
+   UNSPEC_VPERMXOR
+   UNSPEC_VPMSUM])
+
+;; Iterator for VPMSUM/VPERMXOR
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
+
+(define_mode_attr CR_char [(V16QI "b")
+			   (V8HI  "h")
+			   (V4SI  "w")
+			   (V2DI  "d")])
+
+;; Iterator for VSHASIGMAD/VSHASIGMAW
+(define_mode_iterator CR_hash [V4SI V2DI])
+
+;; Iterator for the other crypto functions
+(define_int_iterator CR_code   [UNSPEC_VCIPHER
+				UNSPEC_VNCIPHER
+				UNSPEC_VCIPHERLAST
+				UNSPEC_VNCIPHERLAST])
+
+(define_int_attr CR_insn [(UNSPEC_VCIPHER      "vcipher")
+			  (UNSPEC_VNCIPHER     "vncipher")
+			  (UNSPEC_VCIPHERLAST  "vcipherlast")
+			  (UNSPEC_VNCIPHERLAST "vncipherlast")])
+
+;; 2 operand crypto instructions
+(define_insn "crypto_<CR_insn>"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
+		      (match_operand:V2DI 2 "register_operand" "v")]
+		     CR_code))]
+  "TARGET_CRYPTO"
+  "<CR_insn> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "crypto_vpmsum<CR_char>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")]
+			UNSPEC_VPMSUM))]
+  "TARGET_CRYPTO"
+  "vpmsum<CR_char> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+;; 3 operand crypto instructions
+(define_insn "crypto_vpermxor_<mode>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")
+			 (match_operand:CR_mode 3 "register_operand" "v")]
+			UNSPEC_VPERMXOR))]
+  "TARGET_CRYPTO"
+  "vpermxor %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+;; 1 operand crypto instruction
+(define_insn "crypto_vsbox"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
+		     UNSPEC_VSBOX))]
+  "TARGET_CRYPTO"
+  "vsbox %0,%1"
+  [(set_attr "type" "crypto")])
+
+;; Hash crypto instructions
+(define_insn "crypto_vshasigma<CR_char>"
+  [(set (match_operand:CR_hash 0 "register_operand" "=v")
+	(unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
+			 (match_operand:SI 2 "const_0_to_1_operand" "n")
+			 (match_operand:SI 3 "const_0_to_15_operand" "n")]
+			UNSPEC_VSHASIGMA))]
+  "TARGET_CRYPTO"
+  "vshasigma<CR_char> %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
diff --git a/gcc-4.9/gcc/config/rs6000/darwin.h b/gcc-4.9/gcc/config/rs6000/darwin.h
new file mode 100644
index 000000000..0329f3f62
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/darwin.h
@@ -0,0 +1,426 @@
+/* Target definitions for PowerPC running Darwin (Mac OS X).
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef DARWIN_PPC
+#define DARWIN_PPC 1
+
+/* The "Darwin ABI" is mostly like AIX, but with some key differences.  */
+
+#define DEFAULT_ABI ABI_DARWIN
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __powerpc64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* The object file format is Mach-O.  */
+
+#define TARGET_OBJECT_FORMAT OBJECT_MACHO
+
+/* Size of the Obj-C jump buffer.  */
+#define OBJC_JBLEN ((TARGET_64BIT) ? (26*2 + 18*2 + 129 + 1) : (26 + 18*2 + 129 + 1))
+
+/* We're not ever going to do TOCs.  */
+
+#define TARGET_TOC 0
+#define TARGET_NO_TOC 1
+
+/* Override the default rs6000 definition.  */
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      if (!TARGET_64BIT) builtin_define ("__ppc__");	\
+      if (TARGET_64BIT) builtin_define ("__ppc64__");	\
+      builtin_define ("__POWERPC__");			\
+      builtin_define ("__NATURAL_ALIGNMENT__");		\
+      darwin_cpp_builtins (pfile);			\
+    }							\
+  while (0)
+
+/* Generate branch islands stubs if this is true.  */
+extern int darwin_emit_branch_islands;
+
+#define SUBTARGET_OVERRIDE_OPTIONS darwin_rs6000_override_options ()
+
+#define C_COMMON_OVERRIDE_OPTIONS do {					\
+  /* On powerpc, __cxa_get_exception_ptr is available starting in the	\
+     10.4.6 libstdc++.dylib.  */					\
+  if (strverscmp (darwin_macosx_version_min, "10.4.6") < 0		\
+      && flag_use_cxa_get_exception_ptr == 2)				\
+    flag_use_cxa_get_exception_ptr = 0;					\
+  if (flag_mkernel)							\
+    flag_no_builtin = 1;						\
+  SUBTARGET_C_COMMON_OVERRIDE_OPTIONS;					\
+} while (0)
+
+/* Darwin has 128-bit long double support in libc in 10.4 and later.
+   Default to 128-bit long doubles even on earlier platforms for ABI
+   consistency; arithmetic will work even if libc and libm support is
+   not available.  */
+
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
+
+
+/* We want -fPIC by default, unless we're using -static to compile for
+   the kernel or some such.  The "-faltivec" option should have been
+   called "-maltivec" all along.  */
+
+#define CC1_SPEC "\
+  %(cc1_cpu) \
+  %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }} \
+  %{static: %{Zdynamic: %e conflicting code gen style switches are used}}\
+  %{!mmacosx-version-min=*:-mmacosx-version-min=%(darwin_minversion)} \
+  %{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \
+  %{faltivec:-maltivec -include altivec.h} %{fno-altivec:-mno-altivec} \
+  %<faltivec %<fno-altivec " \
+  DARWIN_CC1_SPEC
+
+#define DARWIN_ARCH_SPEC "%{m64:ppc64;:ppc}"
+
+#define DARWIN_SUBARCH_SPEC "			\
+ %{m64: ppc64}					\
+ %{!m64:					\
+ %{mcpu=601:ppc601;				\
+   mcpu=603:ppc603;				\
+   mcpu=603e:ppc603;				\
+   mcpu=604:ppc604;				\
+   mcpu=604e:ppc604e;				\
+   mcpu=740:ppc750;				\
+   mcpu=750:ppc750;				\
+   mcpu=G3:ppc750;				\
+   mcpu=7400:ppc7400;				\
+   mcpu=G4:ppc7400;				\
+   mcpu=7450:ppc7450;				\
+   mcpu=970:ppc970;				\
+   mcpu=power4:ppc970;				\
+   mcpu=G5:ppc970;				\
+   :ppc}}"
+
+/* crt2.o is at least partially required for 10.3.x and earlier.  */
+#define DARWIN_CRT2_SPEC \
+  "%{!m64:%:version-compare(!> 10.4 mmacosx-version-min= crt2.o%s)}"
+
+/* Determine a minimum version based on compiler options.  */
+#define DARWIN_MINVERSION_SPEC					\
+  "%{m64:%{fgnu-runtime:10.4;					\
+	   ,objective-c|,objc-cpp-output:10.5;			\
+	   ,objective-c-header:10.5;				\
+	   ,objective-c++|,objective-c++-cpp-output:10.5;	\
+	   ,objective-c++-header|,objc++-cpp-output:10.5;	\
+	   :10.4};						\
+     shared-libgcc:10.3;					\
+     :10.1}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  DARWIN_EXTRA_SPECS                            \
+  { "darwin_arch", DARWIN_ARCH_SPEC },		\
+  { "darwin_crt2", DARWIN_CRT2_SPEC },		\
+  { "darwin_subarch", DARWIN_SUBARCH_SPEC },
+
+/* Output a .machine directive.  */
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START rs6000_darwin_file_start
+
+/* Make both r2 and r13 available for allocation.  */
+#define FIXED_R2 0
+#define FIXED_R13 0
+
+/* Base register for access to local variables of the function.  */
+
+#undef  HARD_FRAME_POINTER_REGNUM
+#define HARD_FRAME_POINTER_REGNUM 30
+
+#undef  RS6000_PIC_OFFSET_TABLE_REGNUM
+#define RS6000_PIC_OFFSET_TABLE_REGNUM 31
+
+/* Pad the outgoing args area to 16 bytes instead of the usual 8.  */
+
+#undef STARTING_FRAME_OFFSET
+#define STARTING_FRAME_OFFSET						\
+  (FRAME_GROWS_DOWNWARD							\
+   ? 0									\
+   : (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
+      + RS6000_SAVE_AREA))
+
+#undef STACK_DYNAMIC_OFFSET
+#define STACK_DYNAMIC_OFFSET(FUNDECL)					\
+  (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
+   + (STACK_POINTER_OFFSET))
+
+/* Darwin uses a function call if everything needs to be saved/restored.  */
+
+#undef WORLD_SAVE_P
+#define WORLD_SAVE_P(INFO) ((INFO)->world_save_p)
+
+/* We don't use these on Darwin, they are just place-holders.  */
+#define SAVE_FP_PREFIX ""
+#define SAVE_FP_SUFFIX ""
+#define RESTORE_FP_PREFIX ""
+#define RESTORE_FP_SUFFIX ""
+
+/* The assembler wants the alternate register names, but without
+   leading percent sign.  */
+#undef REGISTER_NAMES
+#define REGISTER_NAMES							\
+{									\
+     "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",		\
+     "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",		\
+    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",		\
+    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",		\
+     "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",		\
+     "f8",  "f9", "f10", "f11", "f12", "f13", "f14", "f15",		\
+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",		\
+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",		\
+     "mq",  "lr", "ctr",  "ap",						\
+    "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",		\
+    "xer",								\
+     "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",             \
+     "v8",  "v9", "v10", "v11", "v12", "v13", "v14", "v15",             \
+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",             \
+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",             \
+    "vrsave", "vscr",							\
+    "spe_acc", "spefscr",                                               \
+    "sfp",								\
+    "tfhar", "tfiar", "texasr"						\
+}
+
+/* This outputs NAME to FILE.  */
+
+#undef  RS6000_OUTPUT_BASENAME
+#define RS6000_OUTPUT_BASENAME(FILE, NAME)	\
+    assemble_name (FILE, NAME)
+
+/* Globalizing directive for a label.  */
+#undef GLOBAL_ASM_OP
+#define GLOBAL_ASM_OP "\t.globl "
+#undef TARGET_ASM_GLOBALIZE_LABEL
+
+/* This is how to output an internal label prefix.  rs6000.c uses this
+   when generating traceback tables.  */
+/* Not really used for Darwin?  */
+
+#undef ASM_OUTPUT_INTERNAL_LABEL_PREFIX
+#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX)	\
+  fprintf (FILE, "%s", PREFIX)
+
+/* Override the standard rs6000 definition.  */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START ";"
+
+/* This is how to output an assembler line that says to advance
+   the location counter to a multiple of 2**LOG bytes using the
+   "nop" instruction as padding.  */
+
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG)                   \
+  do                                                          \
+    {                                                         \
+      if ((LOG) < 3)                                          \
+        {                                                     \
+          ASM_OUTPUT_ALIGN (FILE,LOG);                        \
+        }                                                     \
+      else /* nop == ori r0,r0,0 */                           \
+        fprintf (FILE, "\t.align32 %d,0x60000000\n", (LOG));  \
+    } while (0)
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* This is supported in cctools 465 and later.  The macro test
+   above prevents using it in earlier build environments.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)          \
+  if ((LOG) != 0)                                             \
+    {                                                         \
+      if ((MAX_SKIP) == 0)                                    \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));           \
+      else                                                    \
+        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+    }
+#endif
+
+/* Generate insns to call the profiler.  */
+
+#define PROFILE_HOOK(LABEL)   output_profile_hook (LABEL)
+
+/* Function name to call to do profiling.  */
+
+#define RS6000_MCOUNT "*mcount"
+
+/* Default processor: G4, and G5 for 64-bit.  */
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT  PROCESSOR_PPC7400
+#undef PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64  PROCESSOR_POWER4
+
+/* Default target flag settings.  Despite the fact that STMW/LMW
+   serializes, it's still a big code size win to use them.  Use FSEL by
+   default as well.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_MULTIPLE | MASK_PPC_GFXOPT)
+
+/* Darwin always uses IBM long double, never IEEE long double.  */
+#undef  TARGET_IEEEQUAD
+#define TARGET_IEEEQUAD 0
+
+/* Since Darwin doesn't do TOCs, stub this out.  */
+
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)  ((void)X, (void)MODE, 0)
+
+/* Unlike most other PowerPC targets, chars are signed, for
+   consistency with other Darwin architectures.  */
+
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR (1)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.
+
+   On the RS/6000, we have to return NO_REGS when we want to reload a
+   floating-point CONST_DOUBLE to force it to be copied to memory.
+
+   Don't allow R0 when loading the address of, or otherwise furtling with,
+   a SYMBOL_REF.  */
+
+#undef PREFERRED_RELOAD_CLASS
+#define PREFERRED_RELOAD_CLASS(X,CLASS)				\
+  ((CONSTANT_P (X)						\
+    && reg_classes_intersect_p ((CLASS), FLOAT_REGS))		\
+   ? NO_REGS							\
+   : ((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == HIGH)	\
+      && reg_class_subset_p (BASE_REGS, (CLASS)))		\
+   ? BASE_REGS							\
+   : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT			\
+      && (CLASS) == NON_SPECIAL_REGS)				\
+   ? GENERAL_REGS						\
+   : (CLASS))
+
+/* Compute field alignment.
+   This implements the 'power' alignment rule by pegging the alignment of
+   items (beyond the first aggregate field) to 32 bits.  The pegging is
+   suppressed for vector and long double items (both 128 in size).
+   There is a dummy use of the FIELD argument to avoid an unused variable
+   warning (see PR59496).  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED)			\
+  ((void) (FIELD),						\
+    (TARGET_ALIGN_NATURAL					\
+     ? (COMPUTED)						\
+     : (COMPUTED) == 128					\
+	? 128							\
+	: MIN ((COMPUTED), 32)))
+
+/* Darwin increases natural record alignment to doubleword if the first
+   field is an FP double while the FP fields remain word aligned.  */
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			  \
+  ((TREE_CODE (STRUCT) == RECORD_TYPE					  \
+    || TREE_CODE (STRUCT) == UNION_TYPE					  \
+    || TREE_CODE (STRUCT) == QUAL_UNION_TYPE)				  \
+   && TARGET_ALIGN_NATURAL == 0						  \
+   ? darwin_rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED) \
+   : (TREE_CODE (STRUCT) == VECTOR_TYPE					  \
+      && ALTIVEC_VECTOR_MODE (TYPE_MODE (STRUCT)))			  \
+   ? MAX (MAX ((COMPUTED), (SPECIFIED)), 128)				  \
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+#define DOUBLE_INT_ASM_OP "\t.quad\t"
+
+/* For binary compatibility with 2.95; Darwin C APIs use bool from
+   stdbool.h, which was an int-sized enum in 2.95.  Users can explicitly
+   choose to have sizeof(bool)==1 with the -mone-byte-bool switch. */
+#define BOOL_TYPE_SIZE (darwin_one_byte_bool ? CHAR_TYPE_SIZE : INT_TYPE_SIZE)
+
+#undef REGISTER_TARGET_PRAGMAS
+#define REGISTER_TARGET_PRAGMAS() \
+  do \
+    { \
+      DARWIN_REGISTER_TARGET_PRAGMAS(); \
+      targetm.resolve_overloaded_builtin = altivec_resolve_overloaded_builtin; \
+    } \
+  while (0)
+
+#ifdef IN_LIBGCC2
+#include <stdbool.h>
+#endif
+
+/* True, iff we're generating fast turn around debugging code.  When
+   true, we arrange for function prologues to start with 5 nops so
+   that gdb may insert code to redirect them, and for data to be
+   accessed indirectly.  The runtime uses this indirection to forward
+   references for data to the original instance of that data.  */
+
+#define TARGET_FIX_AND_CONTINUE (darwin_fix_and_continue)
+
+/* This is the reserved direct dispatch address for Objective-C.  */
+#define OFFS_MSGSEND_FAST		0xFFFEFF00
+
+/* This is the reserved ivar address Objective-C.  */
+#define OFFS_ASSIGNIVAR_FAST		0xFFFEFEC0
+
+/* Old versions of Mac OS/Darwin don't have C99 functions available.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION darwin_libc_has_function
+
+/* When generating kernel code or kexts, we don't use Altivec by
+   default, as kernel code doesn't save/restore those registers.  */
+#define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext)
+
+/* Darwin has support for section anchors on powerpc*.  
+   It is disabled for any section containing a "zero-sized item" (because these
+   are re-written as size=1 to be compatible with the OSX ld64).
+   The re-writing would interfere with the computation of anchor offsets.
+   Therefore, we place zero-sized items in their own sections and make such
+   sections unavailable to section anchoring.  */
+
+#undef TARGET_ASM_OUTPUT_ANCHOR 
+#define TARGET_ASM_OUTPUT_ANCHOR darwin_asm_output_anchor
+
+#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+#define TARGET_USE_ANCHORS_FOR_SYMBOL_P darwin_use_anchors_for_symbol_p
+
+#undef DARWIN_SECTION_ANCHORS
+#define DARWIN_SECTION_ANCHORS 1
+
+/* PPC Darwin has to rename some of the long double builtins.  */
+#undef  SUBTARGET_INIT_BUILTINS
+#define SUBTARGET_INIT_BUILTINS						\
+do {									\
+  darwin_patch_builtins ();						\
+  rs6000_builtin_decls[(unsigned) (RS6000_BUILTIN_CFSTRING)]		\
+    = darwin_init_cfstring_builtins ((unsigned) (RS6000_BUILTIN_CFSTRING)); \
+} while(0)
+
+/* So far, there is no rs6000_fold_builtin, if one is introduced, then
+   this will need to be modified similar to the x86 case.  */
+#define TARGET_FOLD_BUILTIN SUBTARGET_FOLD_BUILTIN
diff --git a/gcc-4.9/gcc/config/rs6000/darwin.md b/gcc-4.9/gcc/config/rs6000/darwin.md
new file mode 100644
index 000000000..8b816b7a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/darwin.md
@@ -0,0 +1,480 @@
+/* Machine description patterns for PowerPC running Darwin (Mac OS X).
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_insn "adddi3_high"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+        (plus:DI (match_operand:DI 1 "gpc_reg_operand" "b")
+                 (high:DI (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "addis %0,%1,ha16(%2)"
+  [(set_attr "length" "4")])
+
+(define_insn "movdf_low_si"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f,!r")
+        (mem:DF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_64BIT"
+  "*
+{
+  switch (which_alternative)
+    {
+      case 0:
+	return \"lfd %0,lo16(%2)(%1)\";
+      case 1:
+	{
+	  if (TARGET_POWERPC64 && TARGET_32BIT)
+	    /* Note, old assemblers didn't support relocation here.  */
+	    return \"ld %0,lo16(%2)(%1)\";
+	  else
+	    {
+	      output_asm_insn (\"la %0,lo16(%2)(%1)\", operands);
+	      output_asm_insn (\"lwz %L0,4(%0)\", operands);
+	      return (\"lwz %0,0(%0)\");
+	    }
+	}
+      default:
+	gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4,12")])
+
+
+(define_insn "movdf_low_di"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f,!r")
+        (mem:DF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "*
+{
+  switch (which_alternative)
+    {
+      case 0:
+	return \"lfd %0,lo16(%2)(%1)\";
+      case 1:
+	return \"ld %0,lo16(%2)(%1)\";
+      default:
+	gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4,4")])
+
+(define_insn "movdf_low_st_si"
+  [(set (mem:DF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" "")))
+	(match_operand:DF 0 "gpc_reg_operand" "f"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT"
+  "stfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movdf_low_st_di"
+  [(set (mem:DF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" "")))
+	(match_operand:DF 0 "gpc_reg_operand" "f"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "stfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_si"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,!r")
+        (mem:SF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT"
+  "@
+   lfs %0,lo16(%2)(%1)
+   lwz %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_di"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,!r")
+        (mem:SF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "@
+   lfs %0,lo16(%2)(%1)
+   lwz %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_st_si"
+  [(set (mem:SF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" "")))
+	(match_operand:SF 0 "gpc_reg_operand" "f,!r"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT"
+  "@
+   stfs %0,lo16(%2)(%1)
+   stw %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_st_di"
+  [(set (mem:SF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" "")))
+	(match_operand:SF 0 "gpc_reg_operand" "f,!r"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "@
+   stfs %0,lo16(%2)(%1)
+   stw %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; 64-bit MachO load/store support
+(define_insn "movdi_low"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,*!d")
+        (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "@
+   ld %0,lo16(%2)(%1)
+   lfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_low_st"
+  [(set (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" "")))
+	(match_operand:SI 0 "gpc_reg_operand" "r"))]
+  "TARGET_MACHO && ! TARGET_64BIT"
+  "stw %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movdi_low_st"
+  [(set (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" "")))
+	(match_operand:DI 0 "gpc_reg_operand" "r,*!d"))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "@
+   std %0,lo16(%2)(%1)
+   stfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; Mach-O PIC trickery.
+(define_expand "macho_high"
+  [(set (match_operand 0 "" "")
+	(high (match_operand 1 "" "")))]
+  "TARGET_MACHO"
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_macho_high_di (operands[0], operands[1]));
+  else
+    emit_insn (gen_macho_high_si (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "macho_high_si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=b*r")
+	(high:SI (match_operand 1 "" "")))]
+  "TARGET_MACHO && ! TARGET_64BIT"
+  "lis %0,ha16(%1)")
+  
+
+(define_insn "macho_high_di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r")
+	(high:DI (match_operand 1 "" "")))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "lis %0,ha16(%1)")
+
+(define_expand "macho_low"
+  [(set (match_operand 0 "" "")
+	(lo_sum (match_operand 1 "" "")
+		   (match_operand 2 "" "")))]
+   "TARGET_MACHO"
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_macho_low_di (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_macho_low_si (operands[0], operands[1], operands[2]));
+
+  DONE;
+})
+
+(define_insn "macho_low_si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,!*r")
+		   (match_operand 2 "" "")))]
+   "TARGET_MACHO && ! TARGET_64BIT"
+   "@
+    la %0,lo16(%2)(%1)
+    addic %0,%1,lo16(%2)")
+
+(define_insn "macho_low_di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,!*r")
+		   (match_operand 2 "" "")))]
+   "TARGET_MACHO && TARGET_64BIT"
+   "@
+    la %0,lo16(%2)(%1)
+    addic %0,%1,lo16(%2)")
+
+(define_split
+  [(set (mem:V4SI (plus:DI (match_operand:DI 0 "gpc_reg_operand" "")
+			 (match_operand:DI 1 "short_cint_operand" "")))
+	(match_operand:V4SI 2 "register_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))]
+  "TARGET_MACHO && TARGET_64BIT"
+  [(set (match_dup 3) (plus:DI (match_dup 0) (match_dup 1)))
+   (set (mem:V4SI (match_dup 3))
+	(match_dup 2))]
+  "")
+
+(define_expand "load_macho_picbase"
+  [(set (reg:SI 65)
+        (unspec [(match_operand 0 "" "")]
+                   UNSPEC_LD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic"
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_load_macho_picbase_si (operands[0]));
+  else
+    emit_insn (gen_load_macho_picbase_di (operands[0]));
+
+  DONE;
+})
+
+(define_insn "load_macho_picbase_si"
+  [(set (reg:SI 65)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+		    (pc)] UNSPEC_LD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic"
+{
+#if TARGET_MACHO
+  machopic_should_output_picbase_label (); /* Update for new func.  */
+#else
+  gcc_unreachable ();
+#endif
+  return "bcl 20,31,%0\\n%0:";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "load_macho_picbase_di"
+  [(set (reg:DI 65)
+	(unspec:DI [(match_operand:DI 0 "immediate_operand" "s")
+		    (pc)] UNSPEC_LD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic && TARGET_64BIT"
+{
+#if TARGET_MACHO
+  machopic_should_output_picbase_label (); /* Update for new func.  */
+#else
+  gcc_unreachable ();
+#endif
+  return "bcl 20,31,%0\\n%0:";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_expand "macho_correct_pic"
+  [(set (match_operand 0 "" "")
+	(plus (match_operand 1 "" "")
+		 (unspec [(match_operand 2 "" "")
+			     (match_operand 3 "" "")]
+			    UNSPEC_MPIC_CORRECT)))]
+  "DEFAULT_ABI == ABI_DARWIN"
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_macho_correct_pic_si (operands[0], operands[1], operands[2],
+	       operands[3]));
+  else
+    emit_insn (gen_macho_correct_pic_di (operands[0], operands[1], operands[2],
+	       operands[3]));
+
+  DONE;
+})
+
+(define_insn "macho_correct_pic_si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "immediate_operand" "s")
+			     (match_operand:SI 3 "immediate_operand" "s")]
+			    UNSPEC_MPIC_CORRECT)))]
+  "DEFAULT_ABI == ABI_DARWIN"
+  "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)"
+  [(set_attr "length" "8")])
+
+(define_insn "macho_correct_pic_di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(plus:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+		 (unspec:DI [(match_operand:DI 2 "immediate_operand" "s")
+			     (match_operand:DI 3 "immediate_operand" "s")]
+			    16)))]
+  "DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT"
+  "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)"
+  [(set_attr "length" "8")])
+
+(define_insn "*call_indirect_nonlocal_darwin64"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "c,*l,c,*l"))
+	 (match_operand 1 "" "g,g,g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI 65))]
+  "DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT"
+{
+  return "b%T0l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*call_nonlocal_darwin64"
+  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI 65))]
+  "(DEFAULT_ABI == ABI_DARWIN)
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+{
+#if TARGET_MACHO
+  return output_call(insn, operands, 0, 2);
+#else
+  gcc_unreachable ();
+#endif
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_indirect_nonlocal_darwin64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "c,*l,c,*l"))
+	      (match_operand 2 "" "g,g,g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI 65))]
+  "DEFAULT_ABI == ABI_DARWIN"
+{
+  return "b%T1l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*call_value_nonlocal_darwin64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI 65))]
+  "(DEFAULT_ABI == ABI_DARWIN)
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+{
+#if TARGET_MACHO
+  return output_call(insn, operands, 1, 3);
+#else
+  gcc_unreachable ();
+#endif
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_expand "reload_macho_picbase"
+  [(set (reg:SI 65)
+        (unspec [(match_operand 0 "" "")]
+                   UNSPEC_RELD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic"
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_reload_macho_picbase_si (operands[0]));
+  else
+    emit_insn (gen_reload_macho_picbase_di (operands[0]));
+
+  DONE;
+})
+
+(define_insn "reload_macho_picbase_si"
+  [(set (reg:SI 65)
+        (unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+		    (pc)] UNSPEC_RELD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic"
+{
+#if TARGET_MACHO
+  if (machopic_should_output_picbase_label ())
+    {
+      static char tmp[64];
+      const char *cnam = machopic_get_function_picbase ();
+      snprintf (tmp, 64, "bcl 20,31,%s\\n%s:\\n%%0:", cnam, cnam);
+      return tmp;
+    }
+  else
+#else
+  gcc_unreachable ();
+#endif
+    return "bcl 20,31,%0\\n%0:";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "reload_macho_picbase_di"
+  [(set (reg:DI 65)
+	(unspec:DI [(match_operand:DI 0 "immediate_operand" "s")
+		    (pc)] UNSPEC_RELD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic && TARGET_64BIT"
+{
+#if TARGET_MACHO
+  if (machopic_should_output_picbase_label ())
+    {
+      static char tmp[64];
+      const char *cnam = machopic_get_function_picbase ();
+      snprintf (tmp, 64, "bcl 20,31,%s\\n%s:\\n%%0:", cnam, cnam);
+      return tmp;
+    }
+  else
+#else
+  gcc_unreachable ();
+#endif
+    return "bcl 20,31,%0\\n%0:";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; We need to restore the PIC register, at the site of nonlocal label.
+
+(define_insn_and_split "nonlocal_goto_receiver"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NLGR)]
+  "TARGET_MACHO && flag_pic"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+#if TARGET_MACHO
+  if (crtl->uses_pic_offset_table)
+    {
+      static unsigned n = 0;
+      rtx picrtx = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
+      rtx picreg = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+      rtx tmplrtx;
+      char tmplab[20];
+
+      ASM_GENERATE_INTERNAL_LABEL(tmplab, "Lnlgr", ++n);
+      tmplrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab));
+
+      emit_insn (gen_reload_macho_picbase (tmplrtx));
+      emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO));
+      emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplrtx));
+    }
+  else
+    /* Not using PIC reg, no reload needed.  */
+    emit_note (NOTE_INSN_DELETED);
+#else
+  gcc_unreachable ();
+#endif
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/rs6000/darwin.opt b/gcc-4.9/gcc/config/rs6000/darwin.opt
new file mode 100644
index 000000000..8edfdcb6f
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/darwin.opt
@@ -0,0 +1,42 @@
+; Darwin options for PPC port.
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Waltivec-long-deprecated
+Driver Alias(mwarn-altivec-long)
+
+faltivec
+Driver
+
+; -ffix-and-continue and -findirect-data are for compatibility for old
+; compilers.
+ffix-and-continue
+Driver RejectNegative Alias(mfix-and-continue)
+
+findirect-data
+Driver RejectNegative Alias(mfix-and-continue)
+
+m64
+Target RejectNegative Negative(m32) Mask(64BIT) Var(rs6000_isa_flags)
+Generate 64-bit code
+
+m32
+Target RejectNegative Negative(m64) InverseMask(64BIT) Var(rs6000_isa_flags)
+Generate 32-bit code
diff --git a/gcc-4.9/gcc/config/rs6000/darwin64.h b/gcc-4.9/gcc/config/rs6000/darwin64.h
new file mode 100644
index 000000000..d960d5eae
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/darwin64.h
@@ -0,0 +1,32 @@
+/* Target definitions for PowerPC running Darwin (Mac OS X).
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC64 | MASK_64BIT \
+			| MASK_MULTIPLE | MASK_PPC_GFXOPT)
+
+#undef DARWIN_ARCH_SPEC
+#define DARWIN_ARCH_SPEC "%{m32:ppc;:ppc64}"
+
+#undef DARWIN_SUBARCH_SPEC
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+#undef DARWIN_CRT2_SPEC
+#define DARWIN_CRT2_SPEC ""
diff --git a/gcc-4.9/gcc/config/rs6000/darwin7.h b/gcc-4.9/gcc/config/rs6000/darwin7.h
new file mode 100644
index 000000000..cf6204c8d
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/darwin7.h
@@ -0,0 +1,32 @@
+/* Target definitions for Darwin 7.x (Mac OS X) systems.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Machine dependent libraries.  Include libmx when compiling for
+   Darwin 7.0 and above, but before libSystem, since the functions are
+   actually in libSystem but for 7.x compatibility we want them to be
+   looked for in libmx first.  Include libmx by default because otherwise
+   libstdc++ isn't usable.  */
+
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!static:\
+  %:version-compare(!< 10.3 mmacosx-version-min= -lmx)\
+  -lSystem}"
+
+#undef DEF_MIN_OSX_VERSION
+#define DEF_MIN_OSX_VERSION "10.3.9"
diff --git a/gcc-4.9/gcc/config/rs6000/darwin8.h b/gcc-4.9/gcc/config/rs6000/darwin8.h
new file mode 100644
index 000000000..28a126320
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/darwin8.h
@@ -0,0 +1,31 @@
+/* Target definitions for Darwin 8.0 and above (Mac OS X) systems.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Machine dependent libraries.  Include libmx when compiling on
+   Darwin 7.0 and above, but before libSystem, since the functions are
+   actually in libSystem but for 7.x compatibility we want them to be
+   looked for in libmx first---but only do this if 7.x compatibility
+   is a concern, which it's not in 64-bit mode.  Include
+   libSystemStubs when compiling on (not necessarily for) 8.0 and
+   above and not 64-bit long double.  */
+
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!static:\
+  %{!mlong-double-64:%{pg:-lSystemStubs_profile;:-lSystemStubs}} \
+  %{!m64:%:version-compare(>< 10.3 10.4 mmacosx-version-min= -lmx)} -lSystem}"
diff --git a/gcc-4.9/gcc/config/rs6000/default64.h b/gcc-4.9/gcc/config/rs6000/default64.h
new file mode 100644
index 000000000..48dcdf04a
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/default64.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for GNU compiler,
+   for 64 bit powerpc linux defaulting to -m64.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_64BIT | MASK_LITTLE_ENDIAN)
+#else
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_64BIT)
+#endif
diff --git a/gcc-4.9/gcc/config/rs6000/dfp.md b/gcc-4.9/gcc/config/rs6000/dfp.md
new file mode 100644
index 000000000..8e99bc0d7
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/dfp.md
@@ -0,0 +1,324 @@
+;; Decimal Floating Point (DFP) patterns.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by Ben Elliston (bje@au.ibm.com) and Peter Bergner
+;; (bergner@vnet.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; UNSPEC usage
+;;
+
+(define_c_enum "unspec"
+  [UNSPEC_MOVSD_LOAD
+   UNSPEC_MOVSD_STORE
+  ])
+
+
+(define_insn "movsd_store"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=m")
+	(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
+		   UNSPEC_MOVSD_STORE))]
+  "(gpc_reg_operand (operands[0], DDmode)
+   || gpc_reg_operand (operands[1], SDmode))
+   && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "stfd%U0%X0 %1,%0"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	(const_string "fpstore_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[0], VOIDmode)")
+	  (const_string "fpstore_u")
+	  (const_string "fpstore"))))
+   (set_attr "length" "4")])
+
+(define_insn "movsd_load"
+  [(set (match_operand:SD 0 "nonimmediate_operand" "=f")
+	(unspec:SD [(match_operand:DD 1 "input_operand" "m")]
+		   UNSPEC_MOVSD_LOAD))]
+  "(gpc_reg_operand (operands[0], SDmode)
+   || gpc_reg_operand (operands[1], DDmode))
+   && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "lfd%U1%X1 %0,%1"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[1], VOIDmode)")
+	  (const_string "fpload_u")
+	  (const_string "fpload"))))
+   (set_attr "length" "4")])
+
+;; Hardware support for decimal floating point operations.
+
+(define_insn "extendsddd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(float_extend:DD (match_operand:SD 1 "gpc_reg_operand" "f")))]
+  "TARGET_DFP"
+  "dctdp %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "extendsdtd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(float_extend:TD (match_operand:SD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+{
+  rtx tmp = gen_reg_rtx (DDmode);
+  emit_insn (gen_extendsddd2 (tmp, operands[1]));
+  emit_insn (gen_extendddtd2 (operands[0], tmp));
+  DONE;
+})
+
+(define_insn "truncddsd2"
+  [(set (match_operand:SD 0 "gpc_reg_operand" "=f")
+	(float_truncate:SD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "drsp %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "negdd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(neg:DD (match_operand:DD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*negdd2_fpr"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(neg:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fneg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "absdd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(abs:DD (match_operand:DD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*absdd2_fpr"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(abs:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "*nabsdd2_fpr"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(neg:DD (abs:DD (match_operand:DD 1 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fnabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "negtd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "")
+	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*negtd2_fpr"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "@
+   fneg %0,%1
+   fneg %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
+
+(define_expand "abstd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "")
+	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*abstd2_fpr"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "@
+   fabs %0,%1
+   fabs %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
+
+(define_insn "*nabstd2_fpr"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "@
+   fnabs %0,%1
+   fnabs %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
+
+;; Hardware support for decimal floating point operations.
+
+(define_insn "extendddtd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dctqpq %0,%1"
+  [(set_attr "type" "fp")])
+
+;; The result of drdpq is an even/odd register pair with the converted
+;; value in the even register and zero in the odd register.
+;; FIXME: Avoid the register move by using a reload constraint to ensure
+;; that the result is the first of the pair receiving the result of drdpq.
+
+(define_insn "trunctddd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(float_truncate:DD (match_operand:TD 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:TD 2 "=d"))]
+  "TARGET_DFP"
+  "drdpq %2,%1\;fmr %0,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "adddd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(plus:DD (match_operand:DD 1 "gpc_reg_operand" "%d")
+		 (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "addtd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(plus:TD (match_operand:TD 1 "gpc_reg_operand" "%d")
+		 (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "daddq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "subdd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(minus:DD (match_operand:DD 1 "gpc_reg_operand" "d")
+		  (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dsub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "subtd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(minus:TD (match_operand:TD 1 "gpc_reg_operand" "d")
+		  (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dsubq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "muldd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(mult:DD (match_operand:DD 1 "gpc_reg_operand" "%d")
+		 (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dmul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "multd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(mult:TD (match_operand:TD 1 "gpc_reg_operand" "%d")
+		 (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dmulq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "divdd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(div:DD (match_operand:DD 1 "gpc_reg_operand" "d")
+		(match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "ddiv %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "divtd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(div:TD (match_operand:TD 1 "gpc_reg_operand" "d")
+		(match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "ddivq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*cmpdd_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:DD 1 "gpc_reg_operand" "d")
+		      (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dcmpu %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "*cmptd_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:TD 1 "gpc_reg_operand" "d")
+		      (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dcmpuq %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "floatdidd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP && TARGET_POPCNTD"
+  "dcffix %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "floatditd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dcffixq %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal64 to a decimal64 whose value is an integer.
+;; This is the first stage of converting it to an integer type.
+
+(define_insn "ftruncdd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(fix:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "drintn. 0,%0,%1,1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal64 whose value is an integer to an actual integer.
+;; This is the second stage of converting decimal float to integer type.
+
+(define_insn "fixdddi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(fix:DI (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dctfix %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal128 to a decimal128 whose value is an integer.
+;; This is the first stage of converting it to an integer type.
+
+(define_insn "ftrunctd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(fix:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "drintnq. 0,%0,%1,1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal128 whose value is an integer to an actual integer.
+;; This is the second stage of converting decimal float to integer type.
+
+(define_insn "fixtddi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(fix:DI (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dctfixq %0,%1"
+  [(set_attr "type" "fp")])
diff --git a/gcc-4.9/gcc/config/rs6000/driver-rs6000.c b/gcc-4.9/gcc/config/rs6000/driver-rs6000.c
new file mode 100644
index 000000000..7df5fbaa5
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/driver-rs6000.c
@@ -0,0 +1,528 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include <stdlib.h>
+
+#ifdef _AIX
+# include <sys/systemcfg.h>
+#endif
+
+#ifdef __linux__
+# include <link.h>
+#endif
+
+#if defined (__APPLE__) || (__FreeBSD__)
+# include <sys/types.h>
+# include <sys/sysctl.h>
+#endif
+
+const char *host_detect_local_cpu (int argc, const char **argv);
+
+#if GCC_VERSION >= 0
+
+/* Returns parameters that describe L1_ASSOC associative cache of size
+   L1_SIZEKB with lines of size L1_LINE, and L2_SIZEKB.  */
+
+static char *
+describe_cache (unsigned l1_sizekb, unsigned l1_line,
+		unsigned l1_assoc ATTRIBUTE_UNUSED, unsigned l2_sizekb)
+{
+  char l1size[1000], line[1000], l2size[1000];
+
+  /* At the moment, gcc middle-end does not use the information about the
+     associativity of the cache.  */
+
+  sprintf (l1size, "--param l1-cache-size=%u", l1_sizekb);
+  sprintf (line, "--param l1-cache-line-size=%u", l1_line);
+  sprintf (l2size, "--param l2-cache-size=%u", l2_sizekb);
+
+  return concat (l1size, " ", line, " ", l2size, " ", NULL);
+}
+
+#ifdef __APPLE__
+
+/* Returns the description of caches on Darwin.  */
+
+static char *
+detect_caches_darwin (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+  size_t len = 4;
+  static int l1_size_name[2] = { CTL_HW, HW_L1DCACHESIZE };
+  static int l1_line_name[2] = { CTL_HW, HW_CACHELINE };
+  static int l2_size_name[2] = { CTL_HW, HW_L2CACHESIZE };
+
+  sysctl (l1_size_name, 2, &l1_sizekb, &len, NULL, 0);
+  sysctl (l1_line_name, 2, &l1_line, &len, NULL, 0);
+  sysctl (l2_size_name, 2, &l2_sizekb, &len, NULL, 0);
+  l1_assoc = 0;
+
+  return describe_cache (l1_sizekb / 1024, l1_line, l1_assoc,
+			 l2_sizekb / 1024);
+}
+
+static const char *
+detect_processor_darwin (void)
+{
+  unsigned int proc;
+  size_t len = 4;
+
+  sysctlbyname ("hw.cpusubtype", &proc, &len, NULL, 0);
+
+  if (len > 0)
+    switch (proc)
+      {
+      case 1:
+	return "601";
+      case 2:
+	return "602";
+      case 3:
+	return "603";
+      case 4:
+      case 5:
+	return "603e";
+      case 6:
+	return "604";
+      case 7:
+	return "604e";
+      case 8:
+	return "620";
+      case 9:
+	return "750";
+      case 10:
+	return "7400";
+      case 11:
+	return "7450";
+      case 100:
+	return "970";
+      default:
+	return "powerpc";
+      }
+
+  return "powerpc";
+}
+
+#endif /* __APPLE__ */
+
+#ifdef __FreeBSD__
+
+/* Returns the description of caches on FreeBSD PPC.  */
+
+static char *
+detect_caches_freebsd (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+  size_t len = 4;
+
+  /* Currently, as of FreeBSD-7.0, there is only the cacheline_size
+     available via sysctl.  */
+  sysctlbyname ("machdep.cacheline_size", &l1_line, &len, NULL, 0);
+
+  l1_sizekb = 32;
+  l1_assoc = 0;
+  l2_sizekb = 512;
+
+  return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb);
+}
+
+/* Currently returns default powerpc.  */
+static const char *
+detect_processor_freebsd (void)
+{
+  return "powerpc";
+}
+
+#endif /* __FreeBSD__  */
+
+#ifdef __linux__
+
+/* Returns AT_PLATFORM if present, otherwise generic PowerPC.  */
+
+static const char *
+elf_platform (void)
+{
+  int fd;
+
+  fd = open ("/proc/self/auxv", O_RDONLY);
+
+  if (fd != -1)
+    {
+      char buf[1024];
+      ElfW(auxv_t) *av;
+      ssize_t n;
+
+      n = read (fd, buf, sizeof (buf));
+      close (fd);
+
+      if (n > 0)
+	{
+	  for (av = (ElfW(auxv_t) *) buf; av->a_type != AT_NULL; ++av)
+	    switch (av->a_type)
+	      {
+	      case AT_PLATFORM:
+		return (const char *) av->a_un.a_val;
+
+	      default:
+		break;
+	      }
+	}
+    }
+  return NULL;
+}
+
+/* Returns AT_DCACHEBSIZE if present, otherwise generic 32.  */
+
+static int
+elf_dcachebsize (void)
+{
+  int fd;
+
+  fd = open ("/proc/self/auxv", O_RDONLY);
+
+  if (fd != -1)
+    {
+      char buf[1024];
+      ElfW(auxv_t) *av;
+      ssize_t n;
+
+      n = read (fd, buf, sizeof (buf));
+      close (fd);
+
+      if (n > 0)
+	{
+	  for (av = (ElfW(auxv_t) *) buf; av->a_type != AT_NULL; ++av)
+	    switch (av->a_type)
+	      {
+	      case AT_DCACHEBSIZE:
+		return av->a_un.a_val;
+
+	      default:
+		break;
+	      }
+	}
+    }
+  return 32;
+}
+
+/* Returns the description of caches on Linux.  */
+
+static char *
+detect_caches_linux (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+  const char *platform;
+
+  platform = elf_platform ();
+
+  if (platform != NULL)
+    {
+      l1_line = 128;
+
+      if (platform[5] == '6')
+	/* POWER6 and POWER6x */
+	l1_sizekb = 64;
+      else
+	l1_sizekb = 32;
+    }
+  else
+    {
+      l1_line = elf_dcachebsize ();
+      l1_sizekb = 32;
+    }
+
+  l1_assoc = 0;
+  l2_sizekb = 512;
+
+  return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb);
+}
+
+static const char *
+detect_processor_linux (void)
+{
+  const char *platform;
+
+  platform = elf_platform ();
+
+  if (platform != NULL)
+    return platform;
+  else
+    return "powerpc";
+}
+
+#endif /* __linux__ */
+
+#ifdef _AIX
+/* Returns the description of caches on AIX.  */
+
+static char *
+detect_caches_aix (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+
+  l1_sizekb = _system_configuration.dcache_size / 1024;
+  l1_line = _system_configuration.dcache_line;
+  l1_assoc = _system_configuration.dcache_asc;
+  l2_sizekb = _system_configuration.L2_cache_size / 1024;
+
+  return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb);
+}
+
+
+/* Returns the processor implementation on AIX.  */
+
+static const char *
+detect_processor_aix (void)
+{
+  switch (_system_configuration.implementation)
+    {
+    case 0x0008:
+      return "601";
+
+    case 0x0020:
+      return "603";
+
+    case 0x0010:
+      return "604";
+
+    case 0x0040:
+      return "620";
+
+    case 0x0080:
+      return "630";
+
+    case 0x0100:
+    case 0x0200:
+    case 0x0400:
+      return "rs64";
+
+    case 0x0800:
+      return "power4";
+
+    case 0x2000:
+      if (_system_configuration.version == 0x0F0000)
+	return "power5";
+      else
+	return "power5+";
+
+    case 0x4000:
+      return "power6";
+
+    default:
+      return "powerpc";
+    }
+}
+#endif /* _AIX */
+
+
+/*
+ * Array to map -mcpu=native names to the switches passed to the assembler.
+ * This list mirrors the specs in ASM_CPU_SPEC, and any changes made here
+ * should be made there as well.
+ */
+
+struct asm_name {
+  const char *cpu;
+  const char *asm_sw;
+};
+
+static const struct asm_name asm_names[] = {
+#if defined (_AIX)
+  { "power3",	"-m620" },
+  { "power4",	"-mpwr4" },
+  { "power5",	"-mpwr5" },
+  { "power5+",	"-mpwr5x" },
+  { "power6",	"-mpwr6" },
+  { "power6x",	"-mpwr6" },
+  { "power7",	"-mpwr7" },
+  { "power8",	"-mpwr8" },
+  { "powerpc",	"-mppc" },
+  { "rs64a",	"-mppc" },
+  { "603",	"-m603" },
+  { "603e",	"-m603" },
+  { "604",	"-m604" },
+  { "604e",	"-m604" },
+  { "620",	"-m620" },
+  { "630",	"-m620" },
+  { "970",	"-m970" },
+  { "G5",	"-m970" },
+  { NULL,	"\
+%{!maix64: \
+%{mpowerpc64: -mppc64} \
+%{maltivec: -m970} \
+%{!maltivec: %{!mpowerpc64: %(asm_default)}}}" },
+
+#else
+  { "cell",	"-mcell" },
+  { "power3",	"-mppc64" },
+  { "power4",	"-mpower4" },
+  { "power5",	"%(asm_cpu_power5)" },
+  { "power5+",	"%(asm_cpu_power5)" },
+  { "power6",	"%(asm_cpu_power6) -maltivec" },
+  { "power6x",	"%(asm_cpu_power6) -maltivec" },
+  { "power7",	"%(asm_cpu_power7)" },
+  { "power8",	"%(asm_cpu_power8)" },
+  { "powerpc",	"-mppc" },
+  { "rs64a",	"-mppc64" },
+  { "401",	"-mppc" },
+  { "403",	"-m403" },
+  { "405",	"-m405" },
+  { "405fp",	"-m405" },
+  { "440",	"-m440" },
+  { "440fp",	"-m440" },
+  { "464",	"-m440" },
+  { "464fp",	"-m440" },
+  { "505",	"-mppc" },
+  { "601",	"-m601" },
+  { "602",	"-mppc" },
+  { "603",	"-mppc" },
+  { "603e",	"-mppc" },
+  { "ec603e",	"-mppc" },
+  { "604",	"-mppc" },
+  { "604e",	"-mppc" },
+  { "620",	"-mppc64" },
+  { "630",	"-mppc64" },
+  { "740",	"-mppc" },
+  { "750",	"-mppc" },
+  { "G3",	"-mppc" },
+  { "7400",	"-mppc -maltivec" },
+  { "7450",	"-mppc -maltivec" },
+  { "G4",	"-mppc -maltivec" },
+  { "801",	"-mppc" },
+  { "821",	"-mppc" },
+  { "823",	"-mppc" },
+  { "860",	"-mppc" },
+  { "970",	"-mpower4 -maltivec" },
+  { "G5",	"-mpower4 -maltivec" },
+  { "8540",	"-me500" },
+  { "8548",	"-me500" },
+  { "e300c2",	"-me300" },
+  { "e300c3",	"-me300" },
+  { "e500mc",	"-me500mc" },
+  { NULL,	"\
+%{mpowerpc64*: -mppc64} \
+%{!mpowerpc64*: %(asm_default)}" },
+#endif
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch" or "tune" as argument depending on if -march=native
+   or -mtune=native is to be substituted.
+
+   Additionally it will be called with "asm" to select the appropriate flags
+   for the assembler.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu = NULL;
+  const char *cache = "";
+  const char *options = "";
+  bool arch;
+  bool assembler;
+  size_t i;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = strcmp (argv[0], "cpu") == 0;
+  assembler = (!arch && strcmp (argv[0], "asm") == 0);
+  if (!arch && !assembler && strcmp (argv[0], "tune"))
+    return NULL;
+
+  if (! assembler)
+    {
+#if defined (_AIX)
+      cache = detect_caches_aix ();
+#elif defined (__APPLE__)
+      cache = detect_caches_darwin ();
+#elif defined (__FreeBSD__)
+      cache = detect_caches_freebsd ();
+      /* FreeBSD PPC does not provide any cache information yet.  */
+      cache = "";
+#elif defined (__linux__)
+      cache = detect_caches_linux ();
+      /* PPC Linux does not provide any cache information yet.  */
+      cache = "";
+#else
+      cache = "";
+#endif
+    }
+
+#if defined (_AIX)
+  cpu = detect_processor_aix ();
+#elif defined (__APPLE__)
+  cpu = detect_processor_darwin ();
+#elif defined (__FreeBSD__)
+  cpu = detect_processor_freebsd ();
+#elif defined (__linux__)
+  cpu = detect_processor_linux ();
+#else
+  cpu = "powerpc";
+#endif
+
+  if (assembler)
+    {
+      for (i = 0; i < sizeof (asm_names) / sizeof (asm_names[0]); i++)
+	{
+	  if (!asm_names[i].cpu || !strcmp (asm_names[i].cpu, cpu))
+	    return asm_names[i].asm_sw;
+	}
+
+      return NULL;
+    }
+
+  return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
+}
+
+#else /* GCC_VERSION */
+
+/* If we aren't compiling with GCC we just provide a minimal
+   default value.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu;
+  bool arch;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = strcmp (argv[0], "cpu") == 0;
+  if (!arch && strcmp (argv[0], "tune"))
+    return NULL;
+  
+  if (arch)
+    cpu = "powerpc";
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
+
+#endif /* GCC_VERSION */
+
diff --git a/gcc-4.9/gcc/config/rs6000/e300c2c3.md b/gcc-4.9/gcc/config/rs6000/e300c2c3.md
new file mode 100644
index 000000000..80b84f5f2
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/e300c2c3.md
@@ -0,0 +1,189 @@
+;; Pipeline description for Motorola PowerPC e300c3 core.
+;;   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppce300c3_most,ppce300c3_long,ppce300c3_retire")
+(define_cpu_unit "ppce300c3_decode_0,ppce300c3_decode_1" "ppce300c3_most")
+
+;; We don't simulate general issue queue (GIC).  If we have SU insn
+;; and then SU1 insn, they can not be issued on the same cycle
+;; (although SU1 insn and then SU insn can be issued) because the SU
+;; insn will go to SU1 from GIC0 entry.  Fortunately, the first cycle
+;; multipass insn scheduling will find the situation and issue the SU1
+;; insn and then the SU insn.
+(define_cpu_unit "ppce300c3_issue_0,ppce300c3_issue_1"   "ppce300c3_most")
+
+;; We could describe completion buffers slots in combination with the
+;; retirement units and the order of completion but the result
+;; automaton would behave in the same way because we can not describe
+;; real latency time with taking in order completion into account.
+;; Actually we could define the real latency time by querying reserved
+;; automaton units but the current scheduler uses latency time before
+;; issuing insns and making any reservations.
+;;
+;; So our description is aimed to achieve a insn schedule in which the
+;; insns would not wait in the completion buffer.
+(define_cpu_unit "ppce300c3_retire_0,ppce300c3_retire_1" "ppce300c3_retire")
+
+;; Branch unit:
+(define_cpu_unit "ppce300c3_bu" "ppce300c3_most")
+
+;; IU:
+(define_cpu_unit "ppce300c3_iu0_stage0,ppce300c3_iu1_stage0" "ppce300c3_most")
+
+;; IU: This used to describe non-pipelined division.
+(define_cpu_unit "ppce300c3_mu_div" "ppce300c3_long")
+
+;; SRU:
+(define_cpu_unit "ppce300c3_sru_stage0" "ppce300c3_most")
+
+;; Here we simplified LSU unit description not describing the stages.
+(define_cpu_unit "ppce300c3_lsu" "ppce300c3_most")
+
+;; FPU:
+(define_cpu_unit "ppce300c3_fpu" "ppce300c3_most")
+
+;; The following units are used to make automata deterministic
+(define_cpu_unit "present_ppce300c3_decode_0" "ppce300c3_most")
+(define_cpu_unit "present_ppce300c3_issue_0" "ppce300c3_most")
+(define_cpu_unit "present_ppce300c3_retire_0" "ppce300c3_retire")
+(define_cpu_unit "present_ppce300c3_iu0_stage0" "ppce300c3_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_ppce300c3_decode_0" "ppce300c3_decode_0")
+(presence_set "present_ppce300c3_issue_0" "ppce300c3_issue_0")
+(presence_set "present_ppce300c3_retire_0" "ppce300c3_retire_0")
+(presence_set "present_ppce300c3_iu0_stage0" "ppce300c3_iu0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "ppce300c3_decode"
+    "ppce300c3_decode_0|ppce300c3_decode_1+present_ppce300c3_decode_0")
+(define_reservation "ppce300c3_issue"
+    "ppce300c3_issue_0|ppce300c3_issue_1+present_ppce300c3_issue_0")
+(define_reservation "ppce300c3_retire"
+   "ppce300c3_retire_0|ppce300c3_retire_1+present_ppce300c3_retire_0")
+(define_reservation "ppce300c3_iu_stage0"
+   "ppce300c3_iu0_stage0|ppce300c3_iu1_stage0+present_ppce300c3_iu0_stage0")
+
+;; Compares can be executed either one of the IU or SRU
+(define_insn_reservation "ppce300c3_cmp" 1
+  (and (eq_attr "type" "cmp,compare,delayed_compare,fast_compare")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+(ppce300c3_iu_stage0|ppce300c3_sru_stage0) \
+        +ppce300c3_retire")
+
+;; Other one cycle IU insns
+(define_insn_reservation "ppce300c3_iu" 1
+  (and (eq_attr "type" "integer,insert_word,isel")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_retire")
+
+;; Branch.  Actually this latency time is not used by the scheduler.
+(define_insn_reservation "ppce300c3_branch" 1
+  (and (eq_attr "type" "jmpreg,branch")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_bu,ppce300c3_retire")
+
+;; Multiply is non-pipelined but can be executed in any IU
+(define_insn_reservation "ppce300c3_multiply" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0, \
+   ppce300c3_iu_stage0+ppce300c3_retire")
+
+;; Divide.  We use the average latency time here.  We omit reserving a
+;; retire unit because of the result automata will be huge.
+(define_insn_reservation "ppce300c3_divide" 20
+  (and (eq_attr "type" "idiv")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_mu_div,\
+   ppce300c3_mu_div*19")
+
+;; CR logical
+(define_insn_reservation "ppce300c3_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Mfcr
+(define_insn_reservation "ppce300c3_mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Mtcrf
+(define_insn_reservation "ppce300c3_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Mtjmpr
+(define_insn_reservation "ppce300c3_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Float point instructions
+(define_insn_reservation "ppce300c3_fpcompare" 3
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_dmul" 4
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu,nothing,ppce300c3_retire")
+
+; Divides are not pipelined
+(define_insn_reservation "ppce300c3_sdiv" 18
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu*17")
+
+(define_insn_reservation "ppce300c3_ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu*32")
+
+;; Loads
+(define_insn_reservation "ppce300c3_load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
+
+;; Stores.
+(define_insn_reservation "ppce300c3_store" 2
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_fpstore" 2
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
diff --git a/gcc-4.9/gcc/config/rs6000/e500.h b/gcc-4.9/gcc/config/rs6000/e500.h
new file mode 100644
index 000000000..2fd1d121b
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/e500.h
@@ -0,0 +1,54 @@
+/* Enable E500 support.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_SPE_ABI
+#undef TARGET_SPE
+#undef TARGET_FPRS
+#undef TARGET_E500_SINGLE
+#undef TARGET_E500_DOUBLE
+#undef CHECK_E500_OPTIONS
+
+#define TARGET_SPE_ABI rs6000_spe_abi
+#define TARGET_SPE rs6000_spe
+#define TARGET_FPRS (rs6000_float_gprs == 0)
+#define TARGET_E500_SINGLE (TARGET_HARD_FLOAT && rs6000_float_gprs == 1)
+#define TARGET_E500_DOUBLE (TARGET_HARD_FLOAT && rs6000_float_gprs == 2)
+#define CHECK_E500_OPTIONS						\
+  do {									\
+    if (TARGET_SPE || TARGET_SPE_ABI					\
+	|| TARGET_E500_SINGLE || TARGET_E500_DOUBLE)			\
+      {									\
+	if (TARGET_ALTIVEC)						\
+	  error ("AltiVec and SPE instructions cannot coexist");	\
+	if (TARGET_VSX)							\
+	  error ("VSX and SPE instructions cannot coexist");		\
+	if (TARGET_64BIT)						\
+	  error ("64-bit SPE not supported");				\
+	if (TARGET_HARD_FLOAT && TARGET_FPRS)				\
+	  error ("E500 and FPRs not supported");			\
+      }									\
+  } while (0)
+
+/* Override rs6000.h definition.  */
+#undef HARD_REGNO_CALLER_SAVE_MODE
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we
+   allocate space for DFmode.  Save gprs in the correct mode too.  */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  (TARGET_E500_DOUBLE && ((MODE) == VOIDmode || (MODE) == DFmode)	\
+   ? DFmode								\
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
diff --git a/gcc-4.9/gcc/config/rs6000/e500mc.md b/gcc-4.9/gcc/config/rs6000/e500mc.md
new file mode 100644
index 000000000..14056f7d6
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/e500mc.md
@@ -0,0 +1,200 @@
+;; Pipeline description for Motorola PowerPC e500mc core.
+;;   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; e500mc 32-bit SU(2), LSU, FPU, BPU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+;; FP is half clocked, timings of other instructions are as in the e500v2.
+
+(define_automaton "e500mc_most,e500mc_long,e500mc_retire")
+(define_cpu_unit "e500mc_decode_0,e500mc_decode_1" "e500mc_most")
+(define_cpu_unit "e500mc_issue_0,e500mc_issue_1"   "e500mc_most")
+(define_cpu_unit "e500mc_retire_0,e500mc_retire_1" "e500mc_retire")
+
+;; SU.
+(define_cpu_unit "e500mc_su0_stage0,e500mc_su1_stage0" "e500mc_most")
+
+;; MU.
+(define_cpu_unit "e500mc_mu_stage0,e500mc_mu_stage1" "e500mc_most")
+(define_cpu_unit "e500mc_mu_stage2,e500mc_mu_stage3" "e500mc_most")
+
+;; Non-pipelined division.
+(define_cpu_unit "e500mc_mu_div" "e500mc_long")
+
+;; LSU.
+(define_cpu_unit "e500mc_lsu" "e500mc_most")
+
+;; FPU.
+(define_cpu_unit "e500mc_fpu" "e500mc_most")
+
+;; Branch unit.
+(define_cpu_unit "e500mc_bu" "e500mc_most")
+
+;; The following units are used to make the automata deterministic.
+(define_cpu_unit "present_e500mc_decode_0" "e500mc_most")
+(define_cpu_unit "present_e500mc_issue_0" "e500mc_most")
+(define_cpu_unit "present_e500mc_retire_0" "e500mc_retire")
+(define_cpu_unit "present_e500mc_su0_stage0" "e500mc_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_e500mc_decode_0" "e500mc_decode_0")
+(presence_set "present_e500mc_issue_0" "e500mc_issue_0")
+(presence_set "present_e500mc_retire_0" "e500mc_retire_0")
+(presence_set "present_e500mc_su0_stage0" "e500mc_su0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "e500mc_decode"
+    "e500mc_decode_0|e500mc_decode_1+present_e500mc_decode_0")
+(define_reservation "e500mc_issue"
+    "e500mc_issue_0|e500mc_issue_1+present_e500mc_issue_0")
+(define_reservation "e500mc_retire"
+   "e500mc_retire_0|e500mc_retire_1+present_e500mc_retire_0")
+(define_reservation "e500mc_su_stage0"
+   "e500mc_su0_stage0|e500mc_su1_stage0+present_e500mc_su0_stage0")
+
+;; Simple SU insns.
+(define_insn_reservation "e500mc_su" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\
+                        delayed_compare,var_delayed_compare,fast_compare,\
+                        shift,trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+(define_insn_reservation "e500mc_two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire,\
+   e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+(define_insn_reservation "e500mc_three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire,\
+   e500mc_issue+e500mc_su_stage0+e500mc_retire,\
+   e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+;; Multiply.
+(define_insn_reservation "e500mc_multiply" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_mu_stage0,e500mc_mu_stage1,\
+   e500mc_mu_stage2,e500mc_mu_stage3+e500mc_retire")
+
+;; Divide. We use the average latency time here.
+(define_insn_reservation "e500mc_divide" 14
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_mu_stage0+e500mc_mu_div,\
+   e500mc_mu_div*13")
+
+;; Branch.
+(define_insn_reservation "e500mc_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_bu,e500mc_retire")
+
+;; CR logical.
+(define_insn_reservation "e500mc_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_bu,e500mc_retire")
+
+;; Mfcr.
+(define_insn_reservation "e500mc_mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su1_stage0+e500mc_retire")
+
+;; Mtcrf.
+(define_insn_reservation "e500mc_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su1_stage0+e500mc_retire")
+
+;; Mtjmpr.
+(define_insn_reservation "e500mc_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+;; Brinc.
+(define_insn_reservation "e500mc_brinc" 1
+  (and (eq_attr "type" "brinc")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+;; Loads.
+(define_insn_reservation "e500mc_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire")
+
+(define_insn_reservation "e500mc_fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing*2,e500mc_retire")
+
+;; Stores.
+(define_insn_reservation "e500mc_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire")
+
+(define_insn_reservation "e500mc_fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire")
+
+;; The following ignores the retire unit to avoid a large automata.
+
+;; Simple FP.
+(define_insn_reservation "e500mc_simple_float" 8
+  (and (eq_attr "type" "fpsimple")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+; "e500mc_decode,e500mc_issue+e500mc_fpu,nothing*6,e500mc_retire")
+
+;; FP.
+(define_insn_reservation "e500mc_float" 8
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+; "e500mc_decode,e500mc_issue+e500mc_fpu,nothing*6,e500mc_retire")
+
+(define_insn_reservation "e500mc_fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+
+(define_insn_reservation "e500mc_dmul" 10
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+
+;; FP divides are not pipelined.
+(define_insn_reservation "e500mc_sdiv" 36
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu,e500mc_fpu*35")
+
+(define_insn_reservation "e500mc_ddiv" 66
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu,e500mc_fpu*65")
diff --git a/gcc-4.9/gcc/config/rs6000/e500mc64.md b/gcc-4.9/gcc/config/rs6000/e500mc64.md
new file mode 100644
index 000000000..9c29a31e9
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/e500mc64.md
@@ -0,0 +1,191 @@
+;; Pipeline description for Freescale PowerPC e500mc64 core.
+;;   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; e500mc64 64-bit SU(2), LSU, FPU, BPU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+
+(define_automaton "e500mc64_most,e500mc64_long,e500mc64_retire")
+(define_cpu_unit "e500mc64_decode_0,e500mc64_decode_1" "e500mc64_most")
+(define_cpu_unit "e500mc64_issue_0,e500mc64_issue_1"   "e500mc64_most")
+(define_cpu_unit "e500mc64_retire_0,e500mc64_retire_1" "e500mc64_retire")
+
+;; SU.
+(define_cpu_unit "e500mc64_su0_stage0,e500mc64_su1_stage0" "e500mc64_most")
+
+;; MU.
+(define_cpu_unit "e500mc64_mu_stage0,e500mc64_mu_stage1" "e500mc64_most")
+(define_cpu_unit "e500mc64_mu_stage2,e500mc64_mu_stage3" "e500mc64_most")
+
+;; Non-pipelined division.
+(define_cpu_unit "e500mc64_mu_div" "e500mc64_long")
+
+;; LSU.
+(define_cpu_unit "e500mc64_lsu" "e500mc64_most")
+
+;; FPU.
+(define_cpu_unit "e500mc64_fpu" "e500mc64_most")
+
+;; Branch unit.
+(define_cpu_unit "e500mc64_bu" "e500mc64_most")
+
+;; The following units are used to make the automata deterministic.
+(define_cpu_unit "present_e500mc64_decode_0" "e500mc64_most")
+(define_cpu_unit "present_e500mc64_issue_0" "e500mc64_most")
+(define_cpu_unit "present_e500mc64_retire_0" "e500mc64_retire")
+(define_cpu_unit "present_e500mc64_su0_stage0" "e500mc64_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_e500mc64_decode_0" "e500mc64_decode_0")
+(presence_set "present_e500mc64_issue_0" "e500mc64_issue_0")
+(presence_set "present_e500mc64_retire_0" "e500mc64_retire_0")
+(presence_set "present_e500mc64_su0_stage0" "e500mc64_su0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "e500mc64_decode"
+    "e500mc64_decode_0|e500mc64_decode_1+present_e500mc64_decode_0")
+(define_reservation "e500mc64_issue"
+    "e500mc64_issue_0|e500mc64_issue_1+present_e500mc64_issue_0")
+(define_reservation "e500mc64_retire"
+   "e500mc64_retire_0|e500mc64_retire_1+present_e500mc64_retire_0")
+(define_reservation "e500mc64_su_stage0"
+   "e500mc64_su0_stage0|e500mc64_su1_stage0+present_e500mc64_su0_stage0")
+
+;; Simple SU insns.
+(define_insn_reservation "e500mc64_su" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,delayed_compare,\
+	shift,cntlz,exts")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+(define_insn_reservation "e500mc64_su2" 2
+  (and (eq_attr "type" "cmp,compare,delayed_compare,fast_compare,trap")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_delayed" 2
+  (and (eq_attr "type" "var_shift_rotate,var_delayed_compare")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\
+   e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+(define_insn_reservation "e500mc64_three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\
+   e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\
+   e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+;; Multiply.
+(define_insn_reservation "e500mc64_multiply" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_mu_stage0,e500mc64_mu_stage1,\
+   e500mc64_mu_stage2,e500mc64_mu_stage3+e500mc64_retire")
+
+;; Divide. We use the average latency time here.
+(define_insn_reservation "e500mc64_divide" 14
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_mu_stage0+e500mc64_mu_div,\
+   e500mc64_mu_div*13")
+
+;; Branch.
+(define_insn_reservation "e500mc64_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_bu,e500mc64_retire")
+
+;; CR logical.
+(define_insn_reservation "e500mc64_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_bu,e500mc64_retire")
+
+;; Mfcr.
+(define_insn_reservation "e500mc64_mfcr" 4
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su1_stage0,e500mc64_su1_stage0*3+e500mc64_retire")
+
+;; Mtcrf.
+(define_insn_reservation "e500mc64_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su1_stage0+e500mc64_retire")
+
+;; Mtjmpr.
+(define_insn_reservation "e500mc64_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+;; Brinc.
+(define_insn_reservation "e500mc64_brinc" 1
+  (and (eq_attr "type" "brinc")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+;; Loads.
+(define_insn_reservation "e500mc64_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing*2,e500mc64_retire")
+
+;; Stores.
+(define_insn_reservation "e500mc64_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire")
+
+;; The following ignores the retire unit to avoid a large automata.
+
+;; FP.
+(define_insn_reservation "e500mc64_float" 7
+  (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_fpu")
+; "e500mc64_decode,e500mc64_issue+e500mc64_fpu,nothing*5,e500mc64_retire")
+
+;; FP divides are not pipelined.
+(define_insn_reservation "e500mc64_sdiv" 20
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_fpu,e500mc64_fpu*19")
+
+(define_insn_reservation "e500mc64_ddiv" 35
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_fpu,e500mc64_fpu*34")
diff --git a/gcc-4.9/gcc/config/rs6000/e5500.md b/gcc-4.9/gcc/config/rs6000/e5500.md
new file mode 100644
index 000000000..b2547a03a
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/e5500.md
@@ -0,0 +1,176 @@
+;; Pipeline description for Freescale PowerPC e5500 core.
+;;   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; e5500 64-bit SFX(2), CFX, LSU, FPU, BU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+
+(define_automaton "e5500_most,e5500_long")
+(define_cpu_unit "e5500_decode_0,e5500_decode_1" "e5500_most")
+
+;; SFX.
+(define_cpu_unit "e5500_sfx_0,e5500_sfx_1" "e5500_most")
+
+;; CFX.
+(define_cpu_unit "e5500_cfx_stage0,e5500_cfx_stage1" "e5500_most")
+
+;; Non-pipelined division.
+(define_cpu_unit "e5500_cfx_div" "e5500_long")
+
+;; LSU.
+(define_cpu_unit "e5500_lsu" "e5500_most")
+
+;; FPU.
+(define_cpu_unit "e5500_fpu" "e5500_long")
+
+;; BU.
+(define_cpu_unit "e5500_bu" "e5500_most")
+
+;; The following units are used to make the automata deterministic.
+(define_cpu_unit "present_e5500_decode_0" "e5500_most")
+(define_cpu_unit "present_e5500_sfx_0" "e5500_most")
+(presence_set "present_e5500_decode_0" "e5500_decode_0")
+(presence_set "present_e5500_sfx_0" "e5500_sfx_0")
+
+;; Some useful abbreviations.
+(define_reservation "e5500_decode"
+    "e5500_decode_0|e5500_decode_1+present_e5500_decode_0")
+(define_reservation "e5500_sfx"
+   "e5500_sfx_0|e5500_sfx_1+present_e5500_sfx_0")
+
+;; SFX.
+(define_insn_reservation "e5500_sfx" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,delayed_compare,\
+	shift,cntlz,exts")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_sfx")
+
+(define_insn_reservation "e5500_sfx2" 2
+  (and (eq_attr "type" "cmp,compare,fast_compare,trap")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_sfx")
+
+(define_insn_reservation "e5500_delayed" 2
+  (and (eq_attr "type" "var_shift_rotate,var_delayed_compare")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_sfx*2")
+
+(define_insn_reservation "e5500_two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_decode+e5500_sfx,e5500_sfx")
+
+(define_insn_reservation "e5500_three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,(e5500_decode+e5500_sfx)*2,e5500_sfx")
+
+;; SFX - Mfcr.
+(define_insn_reservation "e5500_mfcr" 4
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_sfx_0*4")
+
+;; SFX - Mtcrf.
+(define_insn_reservation "e5500_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_sfx_0")
+
+;; SFX - Mtjmpr.
+(define_insn_reservation "e5500_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_sfx")
+
+;; CFX - Multiply.
+(define_insn_reservation "e5500_multiply" 4
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_cfx_stage0,e5500_cfx_stage1")
+
+(define_insn_reservation "e5500_multiply_i" 5
+  (and (eq_attr "type" "imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_cfx_stage0,\
+   e5500_cfx_stage0+e5500_cfx_stage1,e5500_cfx_stage1")
+
+;; CFX - Divide.
+(define_insn_reservation "e5500_divide" 16
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_cfx_stage0+e5500_cfx_div,\
+   e5500_cfx_div*15")
+
+(define_insn_reservation "e5500_divide_d" 26
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_cfx_stage0+e5500_cfx_div,\
+   e5500_cfx_div*25")
+
+;; LSU - Loads.
+(define_insn_reservation "e5500_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_lsu")
+
+(define_insn_reservation "e5500_fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_lsu")
+
+;; LSU - Stores.
+(define_insn_reservation "e5500_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_lsu")
+
+(define_insn_reservation "e5500_fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_lsu")
+
+;; FP.
+(define_insn_reservation "e5500_float" 7
+  (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_fpu")
+
+(define_insn_reservation "e5500_sdiv" 20
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_fpu*20")
+
+(define_insn_reservation "e5500_ddiv" 35
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_fpu*35")
+
+;; BU.
+(define_insn_reservation "e5500_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_bu")
+
+;; BU - CR logical.
+(define_insn_reservation "e5500_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppce5500"))
+  "e5500_decode,e5500_bu")
diff --git a/gcc-4.9/gcc/config/rs6000/e6500.md b/gcc-4.9/gcc/config/rs6000/e6500.md
new file mode 100644
index 000000000..2ed550b92
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/e6500.md
@@ -0,0 +1,213 @@
+;; Pipeline description for Freescale PowerPC e6500 core.
+;;   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; e6500 64-bit SFX(2), CFX, LSU, FPU, BU, VSFX, VCFX, VFPU, VPERM
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+
+(define_automaton "e6500_most,e6500_long,e6500_vec")
+(define_cpu_unit "e6500_decode_0,e6500_decode_1" "e6500_most")
+
+;; SFX.
+(define_cpu_unit "e6500_sfx_0,e6500_sfx_1" "e6500_most")
+
+;; CFX.
+(define_cpu_unit "e6500_cfx_stage0,e6500_cfx_stage1" "e6500_most")
+
+;; Non-pipelined division.
+(define_cpu_unit "e6500_cfx_div" "e6500_long")
+
+;; LSU.
+(define_cpu_unit "e6500_lsu" "e6500_most")
+
+;; FPU.
+(define_cpu_unit "e6500_fpu" "e6500_long")
+
+;; BU.
+(define_cpu_unit "e6500_bu" "e6500_most")
+
+;; Altivec unit
+(define_cpu_unit "e6500_vec,e6500_vecperm" "e6500_vec")
+
+;; The following units are used to make the automata deterministic.
+(define_cpu_unit "present_e6500_decode_0" "e6500_most")
+(define_cpu_unit "present_e6500_sfx_0" "e6500_most")
+(presence_set "present_e6500_decode_0" "e6500_decode_0")
+(presence_set "present_e6500_sfx_0" "e6500_sfx_0")
+
+;; Some useful abbreviations.
+(define_reservation "e6500_decode"
+    "e6500_decode_0|e6500_decode_1+present_e6500_decode_0")
+(define_reservation "e6500_sfx"
+   "e6500_sfx_0|e6500_sfx_1+present_e6500_sfx_0")
+
+;; SFX.
+(define_insn_reservation "e6500_sfx" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,delayed_compare,\
+	shift,cntlz,exts")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_sfx")
+
+(define_insn_reservation "e6500_sfx2" 2
+  (and (eq_attr "type" "cmp,compare,fast_compare,trap")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_sfx")
+
+(define_insn_reservation "e6500_delayed" 2
+  (and (eq_attr "type" "var_shift_rotate,var_delayed_compare")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_sfx*2")
+
+(define_insn_reservation "e6500_two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_decode+e6500_sfx,e6500_sfx")
+
+(define_insn_reservation "e6500_three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,(e6500_decode+e6500_sfx)*2,e6500_sfx")
+
+;; SFX - Mfcr.
+(define_insn_reservation "e6500_mfcr" 4
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_sfx_0*4")
+
+;; SFX - Mtcrf.
+(define_insn_reservation "e6500_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_sfx_0")
+
+;; SFX - Mtjmpr.
+(define_insn_reservation "e6500_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_sfx")
+
+;; CFX - Multiply.
+(define_insn_reservation "e6500_multiply" 4
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_cfx_stage0,e6500_cfx_stage1")
+
+(define_insn_reservation "e6500_multiply_i" 5
+  (and (eq_attr "type" "imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_cfx_stage0,\
+   e6500_cfx_stage0+e6500_cfx_stage1,e6500_cfx_stage1")
+
+;; CFX - Divide.
+(define_insn_reservation "e6500_divide" 16
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_cfx_stage0+e6500_cfx_div,\
+   e6500_cfx_div*15")
+
+(define_insn_reservation "e6500_divide_d" 26
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_cfx_stage0+e6500_cfx_div,\
+   e6500_cfx_div*25")
+
+;; LSU - Loads.
+(define_insn_reservation "e6500_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_lsu")
+
+(define_insn_reservation "e6500_fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_lsu")
+
+(define_insn_reservation "e6500_vecload" 4
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_lsu")
+
+;; LSU - Stores.
+(define_insn_reservation "e6500_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_lsu")
+
+(define_insn_reservation "e6500_fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_lsu")
+
+(define_insn_reservation "e6500_vecstore" 4
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_lsu")
+
+;; FP.
+(define_insn_reservation "e6500_float" 7
+  (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_fpu")
+
+(define_insn_reservation "e6500_sdiv" 20
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_fpu*20")
+
+(define_insn_reservation "e6500_ddiv" 35
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_fpu*35")
+
+;; BU.
+(define_insn_reservation "e6500_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_bu")
+
+;; BU - CR logical.
+(define_insn_reservation "e6500_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_bu")
+
+;; VSFX.
+(define_insn_reservation "e6500_vecsimple" 1
+  (and (eq_attr "type" "vecsimple,veccmp")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_vec")
+
+;; VCFX.
+(define_insn_reservation "e6500_veccomplex" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_vec")
+
+;; VFPU.
+(define_insn_reservation "e6500_vecfloat" 6
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_vec")
+
+;; VPERM.
+(define_insn_reservation "e6500_vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppce6500"))
+  "e6500_decode,e6500_vecperm")
diff --git a/gcc-4.9/gcc/config/rs6000/eabi.h b/gcc-4.9/gcc/config/rs6000/eabi.h
new file mode 100644
index 000000000..f3be1e308
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/eabi.h
@@ -0,0 +1,41 @@
+/* Core target definitions for GNU compiler
+   for IBM RS/6000 PowerPC targeted to embedded ELF systems.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Add -meabi to target flags.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_EABI
+
+/* Invoke an initializer function to set up the GOT.  */
+#define NAME__MAIN "__eabi"
+#define INVOKE__main
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()          \
+  do                                      \
+    {                                     \
+      builtin_define_std ("PPC");         \
+      builtin_define ("__embedded__");    \
+      builtin_assert ("system=embedded"); \
+      builtin_assert ("cpu=powerpc");     \
+      builtin_assert ("machine=powerpc"); \
+      TARGET_OS_SYSV_CPP_BUILTINS ();     \
+    }                                     \
+  while (0)
diff --git a/gcc-4.9/gcc/config/rs6000/eabialtivec.h b/gcc-4.9/gcc/config/rs6000/eabialtivec.h
new file mode 100644
index 000000000..d65f0d529
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/eabialtivec.h
@@ -0,0 +1,27 @@
+/* Core target definitions for GNU compiler
+   for PowerPC targeted systems with AltiVec support.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Add -meabi and -maltivec to target flags.  */
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_EABI | MASK_ALTIVEC)
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS	rs6000_altivec_abi = 1
diff --git a/gcc-4.9/gcc/config/rs6000/eabisim.h b/gcc-4.9/gcc/config/rs6000/eabisim.h
new file mode 100644
index 000000000..27035dc00
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/eabisim.h
@@ -0,0 +1,51 @@
+/* Support for GCC on simulated PowerPC systems targeted to embedded ELF
+   systems.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()           \
+  do                                       \
+    {                                      \
+      builtin_define_std ("PPC");          \
+      builtin_define ("__embedded__");     \
+      builtin_define ("__simulator__");    \
+      builtin_assert ("system=embedded");  \
+      builtin_assert ("system=simulator"); \
+      builtin_assert ("cpu=powerpc");      \
+      builtin_assert ("machine=powerpc");  \
+      TARGET_OS_SYSV_CPP_BUILTINS ();      \
+    }                                      \
+  while (0)
+
+/* Make the simulator the default */
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_sim)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_sim)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_sim)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_sim)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_sim)"
diff --git a/gcc-4.9/gcc/config/rs6000/eabispe.h b/gcc-4.9/gcc/config/rs6000/eabispe.h
new file mode 100644
index 000000000..9a6a37235
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/eabispe.h
@@ -0,0 +1,26 @@
+/* Core target definitions for GNU compiler
+   for PowerPC embedded targeted systems with SPE support.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_STRICT_ALIGN | MASK_EABI)
+
+#undef  ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc -mspe -me500"
diff --git a/gcc-4.9/gcc/config/rs6000/freebsd.h b/gcc-4.9/gcc/config/rs6000/freebsd.h
new file mode 100644
index 000000000..9293dcabb
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/freebsd.h
@@ -0,0 +1,78 @@
+/* Definitions for PowerPC running FreeBSD using the ELF format
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override the defaults, which exist to force the proper definition.  */
+
+#undef	CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_freebsd)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_freebsd)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_freebsd)"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_freebsd)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_freebsd)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define	LINK_OS_DEFAULT_SPEC "%(link_os_freebsd)"
+
+/* XXX: This is wrong for many platforms in sysv4.h.
+   We should work on getting that definition fixed.  */
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+/* rs6000.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE)
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
+
+#define POWERPC_FREEBSD
diff --git a/gcc-4.9/gcc/config/rs6000/freebsd64.h b/gcc-4.9/gcc/config/rs6000/freebsd64.h
new file mode 100644
index 000000000..4f678f6f4
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/freebsd64.h
@@ -0,0 +1,435 @@
+/* Definitions for 64-bit PowerPC running FreeBSD using the ELF format
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override the defaults, which exist to force the proper definition.  */
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __powerpc64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+#undef	TARGET_AIX
+#define	TARGET_AIX TARGET_64BIT
+
+#ifdef HAVE_LD_NO_DOT_SYMS
+/* New ABI uses a local sym for the function entry point.  */
+extern int dot_symbols;
+#undef DOT_SYMBOLS
+#define DOT_SYMBOLS dot_symbols
+#endif
+
+#define TARGET_USES_LINUX64_OPT 1
+#ifdef HAVE_LD_LARGE_TOC
+#undef TARGET_CMODEL
+#define TARGET_CMODEL rs6000_current_cmodel
+#define SET_CMODEL(opt) rs6000_current_cmodel = opt
+#else
+#define SET_CMODEL(opt) do {} while (0)
+#endif
+
+/* Until now the 970 is the only Processor where FreeBSD 64-bit runs on.  */
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER4
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4
+
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE)
+
+#undef  RS6000_ABI_NAME
+#define RS6000_ABI_NAME "freebsd"
+
+#define INVALID_64BIT "-m%s not supported in this configuration"
+#define INVALID_32BIT INVALID_64BIT
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      if (!global_options_set.x_rs6000_alignment_flags)		\
+	rs6000_alignment_flags = MASK_ALIGN_NATURAL;		\
+      if (TARGET_64BIT)						\
+	{							\
+	  if (DEFAULT_ABI != ABI_AIX)				\
+	    {							\
+	      rs6000_current_abi = ABI_AIX;			\
+	      error (INVALID_64BIT, "call");			\
+	    }							\
+	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
+	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
+	    {							\
+	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
+	      error (INVALID_64BIT, "relocatable");		\
+	    }							\
+	  if (rs6000_isa_flags & OPTION_MASK_EABI)		\
+	    {							\
+	      rs6000_isa_flags &= ~OPTION_MASK_EABI;		\
+	      error (INVALID_64BIT, "eabi");			\
+	    }							\
+	  if (TARGET_PROTOTYPE)					\
+	    {							\
+	      target_prototype = 0;				\
+	      error (INVALID_64BIT, "prototype");		\
+	    }							\
+	  if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)	\
+	    {							\
+	      rs6000_isa_flags |= OPTION_MASK_POWERPC64;	\
+	      error ("-m64 requires a PowerPC64 cpu");		\
+	    }							\
+	   if ((rs6000_isa_flags_explicit			\
+		& OPTION_MASK_MINIMAL_TOC) != 0)		\
+	    {							\
+	      if (global_options_set.x_rs6000_current_cmodel	\
+		  && rs6000_current_cmodel != CMODEL_SMALL)	\
+		error ("-mcmodel incompatible with other toc options"); \
+	      SET_CMODEL (CMODEL_SMALL);			\
+	    }							\
+	  else							\
+	    {							\
+	      if (!global_options_set.x_rs6000_current_cmodel)	\
+		SET_CMODEL (CMODEL_MEDIUM);			\
+	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
+		{						\
+		  TARGET_NO_FP_IN_TOC = 0;			\
+		  TARGET_NO_SUM_IN_TOC = 0;			\
+		}						\
+	    }							\
+	}							\
+    }								\
+  while (0)
+
+#undef	ASM_DEFAULT_SPEC
+#undef	ASM_SPEC
+#undef	LINK_OS_FREEBSD_SPEC
+
+#define	ASM_DEFAULT_SPEC "-mppc%{!m32:64}"
+#define	ASM_SPEC	 "%{m32:%(asm_spec32)}%{!m32:%(asm_spec64)} %(asm_spec_common)"
+#define	LINK_OS_FREEBSD_SPEC "%{m32:%(link_os_freebsd_spec32)}%{!m32:%(link_os_freebsd_spec64)}"
+
+#define ASM_SPEC32 "-a32 \
+%{mrelocatable} %{mrelocatable-lib} %{fpic:-K PIC} %{fPIC:-K PIC} \
+%{memb} %{!memb: %{msdata=eabi: -memb}} \
+%{!mlittle: %{!mlittle-endian: %{!mbig: %{!mbig-endian: \
+    %{mcall-freebsd: -mbig} \
+    %{mcall-i960-old: -mlittle} \
+    %{mcall-linux: -mbig} \
+    %{mcall-gnu: -mbig} \
+    %{mcall-netbsd: -mbig} \
+}}}}"
+
+#define ASM_SPEC64 "-a64"
+
+#define ASM_SPEC_COMMON "%(asm_cpu) \
+%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+%{mlittle} %{mlittle-endian} %{mbig} %{mbig-endian}"
+
+#undef	SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS					\
+  { "asm_spec_common",		ASM_SPEC_COMMON },			\
+  { "asm_spec32",		ASM_SPEC32 },				\
+  { "asm_spec64",		ASM_SPEC64 },				\
+  { "link_os_freebsd_spec32",	LINK_OS_FREEBSD_SPEC32 },     		\
+  { "link_os_freebsd_spec64",	LINK_OS_FREEBSD_SPEC64 },
+
+#define FREEBSD_DYNAMIC_LINKER32 "/libexec/ld-elf32.so.1"
+#define FREEBSD_DYNAMIC_LINKER64 "/libexec/ld-elf.so.1"
+
+#define LINK_OS_FREEBSD_SPEC_DEF32 "\
+  %{p:%nconsider using `-pg' instead of `-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic: -export-dynamic} \
+      %{!dynamic-linker:-dynamic-linker " FREEBSD_DYNAMIC_LINKER32 "}} \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
+
+#define LINK_OS_FREEBSD_SPEC_DEF64 "\
+  %{p:%nconsider using `-pg' instead of `-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic: -export-dynamic} \
+      %{!dynamic-linker:-dynamic-linker " FREEBSD_DYNAMIC_LINKER64 "}} \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
+
+#define LINK_OS_FREEBSD_SPEC32 "-melf32ppc_fbsd " LINK_OS_FREEBSD_SPEC_DEF32
+  
+#define LINK_OS_FREEBSD_SPEC64 "-melf64ppc_fbsd " LINK_OS_FREEBSD_SPEC_DEF64
+
+#undef	MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "m64" }
+
+/* PowerPC-64 FreeBSD increases natural record alignment to doubleword if
+   the first field is an FP double, only if in power alignment mode.  */
+#undef  ROUND_TYPE_ALIGN
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			\
+  ((TARGET_64BIT							\
+    && (TREE_CODE (STRUCT) == RECORD_TYPE				\
+	|| TREE_CODE (STRUCT) == UNION_TYPE				\
+	|| TREE_CODE (STRUCT) == QUAL_UNION_TYPE)			\
+    && TARGET_ALIGN_NATURAL == 0)					\
+   ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED)	\
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* Use the default for compiling target libs.  */
+#ifdef IN_TARGET_LIBS
+#undef TARGET_ALIGN_NATURAL
+#define TARGET_ALIGN_NATURAL 1
+#endif
+
+/* Indicate that jump tables go in the text section.  */
+#undef  JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT
+
+/* The linux ppc64 ABI isn't explicit on whether aggregates smaller
+   than a doubleword should be padded upward or downward.  You could
+   reasonably assume that they follow the normal rules for structure
+   layout treating the parameter area as any other block of memory,
+   then map the reg param area to registers.  i.e. pad upward.
+   Setting both of the following defines results in this behavior.
+   Setting just the first one will result in aggregates that fit in a
+   doubleword being padded downward, and others being padded upward.
+   Not a bad idea as this results in struct { int x; } being passed
+   the same way as an int.  */
+#define AGGREGATE_PADDING_FIXED TARGET_64BIT
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+/* FreeBSD doesn't support saving and restoring 64-bit regs with a 32-bit
+   kernel. This is supported when running on a 64-bit kernel with
+   COMPAT_FREEBSD32, but tell GCC it isn't so that our 32-bit binaries
+   are compatible. */
+#define OS_MISSING_POWERPC64 !TARGET_64BIT
+
+#undef  FBSD_TARGET_CPU_CPP_BUILTINS
+#define FBSD_TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_define ("__PPC__");			\
+      builtin_define ("__ppc__");			\
+      builtin_define ("__powerpc__");			\
+      if (TARGET_64BIT)					\
+	{						\
+	  builtin_define ("__arch64__");		\
+	  builtin_define ("__LP64__");			\
+	  builtin_define ("__PPC64__");			\
+	  builtin_define ("__powerpc64__");		\
+	  builtin_assert ("cpu=powerpc64");		\
+	  builtin_assert ("machine=powerpc64");		\
+	}						\
+      else						\
+	{						\
+	  builtin_define_std ("PPC");			\
+	  builtin_define_std ("powerpc");		\
+	  builtin_assert ("cpu=powerpc");		\
+	  builtin_assert ("machine=powerpc");		\
+	  TARGET_OS_SYSV_CPP_BUILTINS ();		\
+	}						\
+    }							\
+  while (0)
+
+#undef	CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_freebsd)"
+
+#undef CPP_OS_FREEBSD_SPEC
+#define CPP_OS_FREEBSD_SPEC ""
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_freebsd)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_freebsd)"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_freebsd)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_freebsd)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define	LINK_OS_DEFAULT_SPEC "%(link_os_freebsd)"
+
+/* XXX: This is wrong for many platforms in sysv4.h.
+   We should work on getting that definition fixed.  */
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE	(TARGET_64BIT ? "long int" : "int")
+
+/* rs6000.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+#define	WCHAR_TYPE      (TARGET_64BIT ? "int" : "long int")
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Function profiling bits */
+#undef  RS6000_MCOUNT
+#define RS6000_MCOUNT "_mcount"
+
+#define PROFILE_HOOK(LABEL) \
+  do { if (TARGET_64BIT) output_profile_hook (LABEL); } while (0)
+
+/* _init and _fini functions are built from bits spread across many
+   object files, each potentially with a different TOC pointer.  For
+   that reason, place a nop after the call so that the linker can
+   restore the TOC pointer if a TOC adjusting call stub is needed.  */
+#ifdef __powerpc64__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl " #FUNC "\n"					\
+"	nop\n"						\
+"	.previous");
+#endif
+
+/* FP save and restore routines.  */
+#undef  SAVE_FP_PREFIX
+#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_")
+#undef  SAVE_FP_SUFFIX
+#define SAVE_FP_SUFFIX ""
+#undef  RESTORE_FP_PREFIX
+#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_")
+#undef  RESTORE_FP_SUFFIX
+#define RESTORE_FP_SUFFIX ""
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#undef	ASM_PREFERRED_EH_DATA_FORMAT
+#define	ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  ((TARGET_64BIT || flag_pic || TARGET_RELOCATABLE)			\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel		\
+      | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4))		\
+   : DW_EH_PE_absptr)
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* The default value isn't sufficient in 64-bit mode.  */
+#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
+
+/* PowerPC64 Linux word-aligns FP doubles when -malign-power is given.  */
+#undef  ADJUST_FIELD_ALIGN
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+  ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE)     \
+   ? 128                                                                \
+   : (TARGET_64BIT                                                      \
+      && TARGET_ALIGN_NATURAL == 0                                      \
+      && TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode)   \
+   ? MIN ((COMPUTED), 32)                                               \
+   : (COMPUTED))
+
+#undef  TOC_SECTION_ASM_OP
+#define TOC_SECTION_ASM_OP \
+  (TARGET_64BIT                                         \
+   ? "\t.section\t\".toc\",\"aw\""                      \
+   : "\t.section\t\".got\",\"aw\"")
+
+#undef  MINIMAL_TOC_SECTION_ASM_OP
+#define MINIMAL_TOC_SECTION_ASM_OP \
+  (TARGET_64BIT                                         \
+   ? "\t.section\t\".toc1\",\"aw\""                     \
+   : ((TARGET_RELOCATABLE || flag_pic)                  \
+      ? "\t.section\t\".got2\",\"aw\""                  \
+      : "\t.section\t\".got1\",\"aw\""))
+
+/* This is how to declare the size of a function.  */
+#undef  ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)                    \
+  do                                                                    \
+    {                                                                   \
+      if (!flag_inhibit_size_directive)                                 \
+        {                                                               \
+          fputs ("\t.size\t", (FILE));                                  \
+          if (TARGET_64BIT && DOT_SYMBOLS)                              \
+            putc ('.', (FILE));                                         \
+          assemble_name ((FILE), (FNAME));                              \
+          fputs (",.-", (FILE));                                        \
+          rs6000_output_function_entry (FILE, FNAME);                   \
+          putc ('\n', (FILE));                                          \
+        }                                                               \
+    }                                                                   \
+  while (0)
+
+#undef  ASM_OUTPUT_SPECIAL_POOL_ENTRY_P
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)                        \
+  (TARGET_TOC                                                           \
+   && (GET_CODE (X) == SYMBOL_REF                                       \
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS      \
+           && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)           \
+       || GET_CODE (X) == LABEL_REF                                     \
+       || (GET_CODE (X) == CONST_INT                                    \
+           && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))      \
+       || (GET_CODE (X) == CONST_DOUBLE                                 \
+           && ((TARGET_64BIT                                            \
+                && (TARGET_MINIMAL_TOC                                  \
+                    || (SCALAR_FLOAT_MODE_P (GET_MODE (X))              \
+                        && ! TARGET_NO_FP_IN_TOC)))                     \
+               || (!TARGET_64BIT                                        \
+                   && !TARGET_NO_FP_IN_TOC                              \
+                   && !TARGET_RELOCATABLE                               \
+                   && SCALAR_FLOAT_MODE_P (GET_MODE (X))                \
+                   && BITS_PER_WORD == HOST_BITS_PER_INT)))))
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#define POWERPC_FREEBSD
diff --git a/gcc-4.9/gcc/config/rs6000/genopt.sh b/gcc-4.9/gcc/config/rs6000/genopt.sh
new file mode 100755
index 000000000..957c5a609
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/genopt.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+# Generate rs6000-tables.opt from the list of CPUs in rs6000-cpus.def.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+cat <<EOF
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from rs6000-cpus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(rs6000_cpu_opt_value) Type(int)
+Known CPUs (for use with the -mcpu= and -mtune= options):
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(native) Value(RS6000_CPU_OPTION_NATIVE) DriverOnly
+
+EOF
+
+awk -F'[(, 	]+' '
+BEGIN {
+    value = 0
+}
+
+/^RS6000_CPU/ {
+    name = $2
+    gsub("\"", "", name)
+    print "EnumValue"
+    print "Enum(rs6000_cpu_opt_value) String(" name ") Value(" value ")"
+    print ""
+    value++
+}' $1/rs6000-cpus.def
diff --git a/gcc-4.9/gcc/config/rs6000/host-darwin.c b/gcc-4.9/gcc/config/rs6000/host-darwin.c
new file mode 100644
index 000000000..754f7c7f7
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/host-darwin.c
@@ -0,0 +1,153 @@
+/* Darwin/powerpc host-specific hook definitions.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include <sys/ucontext.h>
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "diagnostic.h"
+#include "config/host-darwin.h"
+
+static void segv_crash_handler (int);
+static void segv_handler (int, siginfo_t *, void *);
+static void darwin_rs6000_extra_signals (void);
+
+#ifndef HAVE_DECL_SIGALTSTACK
+/* This doesn't have a prototype in signal.h in 10.2.x and earlier,
+   fixed in later releases.  */
+extern int sigaltstack(const struct sigaltstack *, struct sigaltstack *);
+#endif
+
+/* The fields of the mcontext_t type have acquired underscores in later
+   OS versions.  */
+#ifdef HAS_MCONTEXT_T_UNDERSCORES
+#define MC_FLD(x) __ ## x
+#else
+#define MC_FLD(x) x
+#endif
+
+#undef HOST_HOOKS_EXTRA_SIGNALS
+#define HOST_HOOKS_EXTRA_SIGNALS darwin_rs6000_extra_signals
+
+/* On Darwin/powerpc, hitting the stack limit turns into a SIGSEGV.
+   This code detects the difference between hitting the stack limit and
+   a true wild pointer dereference by looking at the instruction that
+   faulted; only a few kinds of instruction are used to access below
+   the previous bottom of the stack.  */
+
+static void
+segv_crash_handler (int sig ATTRIBUTE_UNUSED)
+{
+  internal_error ("Segmentation Fault (code)");
+}
+
+static void
+segv_handler (int sig ATTRIBUTE_UNUSED,
+	      siginfo_t *sip ATTRIBUTE_UNUSED,
+	      void *scp)
+{
+  ucontext_t *uc = (ucontext_t *)scp;
+  sigset_t sigset;
+  unsigned faulting_insn;
+
+  /* The fault might have happened when trying to run some instruction, in
+     which case the next line will segfault _again_.  Handle this case.  */
+  signal (SIGSEGV, segv_crash_handler);
+  sigemptyset (&sigset);
+  sigaddset (&sigset, SIGSEGV);
+  sigprocmask (SIG_UNBLOCK, &sigset, NULL);
+
+  faulting_insn = *(unsigned *)uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0);
+
+  /* Note that this only has to work for GCC, so we don't have to deal
+     with all the possible cases (GCC has no AltiVec code, for
+     instance).  It's complicated because Darwin allows stores to
+     below the stack pointer, and the prologue code takes advantage of
+     this.  */
+
+  if ((faulting_insn & 0xFFFF8000) == 0x94218000  /* stwu %r1, -xxx(%r1) */
+      || (faulting_insn & 0xFC1F03FF) == 0x7C01016E /* stwux xxx, %r1, xxx */
+      || (faulting_insn & 0xFC1F8000) == 0x90018000 /* stw xxx, -yyy(%r1) */
+      || (faulting_insn & 0xFC1F8000) == 0xD8018000 /* stfd xxx, -yyy(%r1) */
+      || (faulting_insn & 0xFC1F8000) == 0xBC018000 /* stmw xxx, -yyy(%r1) */)
+    {
+      char *shell_name;
+      
+      fnotice (stderr, "Out of stack space.\n");
+      shell_name = getenv ("SHELL");
+      if (shell_name != NULL)
+	shell_name = strrchr (shell_name, '/');
+      if (shell_name != NULL)
+	{
+	  static const char * shell_commands[][2] = {
+	    { "sh", "ulimit -S -s unlimited" },
+	    { "bash", "ulimit -S -s unlimited" },
+	    { "tcsh", "limit stacksize unlimited" },
+	    { "csh", "limit stacksize unlimited" },
+	    /* zsh doesn't have "unlimited", this will work under the
+	       default configuration.  */
+	    { "zsh", "limit stacksize 32m" }
+	  };
+	  size_t i;
+	  
+	  for (i = 0; i < ARRAY_SIZE (shell_commands); i++)
+	    if (strcmp (shell_commands[i][0], shell_name + 1) == 0)
+	      {
+		fnotice (stderr, 
+			 "Try running '%s' in the shell to raise its limit.\n",
+			 shell_commands[i][1]);
+	      }
+	}
+      
+      if (global_dc->abort_on_error)
+	fancy_abort (__FILE__, __LINE__, __FUNCTION__);
+
+      exit (FATAL_EXIT_CODE);
+    }
+
+  fprintf (stderr, "[address=%08lx pc=%08x]\n", 
+	   uc->uc_mcontext->MC_FLD(es).MC_FLD(dar),
+	   uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0));
+  internal_error ("Segmentation Fault");
+  exit (FATAL_EXIT_CODE);
+}
+
+static void
+darwin_rs6000_extra_signals (void)
+{
+  struct sigaction sact;
+  stack_t sigstk;
+
+  sigstk.ss_sp = (char*)xmalloc (SIGSTKSZ);
+  sigstk.ss_size = SIGSTKSZ;
+  sigstk.ss_flags = 0;
+  if (sigaltstack (&sigstk, NULL) < 0)
+    fatal_error ("While setting up signal stack: %m");
+
+  sigemptyset(&sact.sa_mask);
+  sact.sa_flags = SA_ONSTACK | SA_SIGINFO;
+  sact.sa_sigaction = segv_handler;
+  if (sigaction (SIGSEGV, &sact, 0) < 0) 
+    fatal_error ("While setting up signal handler: %m");
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c b/gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c
new file mode 100644
index 000000000..e640b8d8b
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c
@@ -0,0 +1,30 @@
+/* ppc64-darwin host-specific hook definitions.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for ppc64 hosts; this file exists just
+   to include config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/rs6000/htm.md b/gcc-4.9/gcc/config/rs6000/htm.md
new file mode 100644
index 000000000..9dbb499ce
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/htm.md
@@ -0,0 +1,366 @@
+;; Hardware Transactional Memory (HTM) patterns.
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <bergner@vnet.ibm.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(TFHAR_SPR		128)
+   (TFIAR_SPR		129)
+   (TEXASR_SPR		130)
+   (TEXASRU_SPR		131)
+   (MAX_HTM_OPERANDS	4)
+  ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_c_enum "unspecv"
+  [UNSPECV_HTM_TABORT
+   UNSPECV_HTM_TABORTDC
+   UNSPECV_HTM_TABORTDCI
+   UNSPECV_HTM_TABORTWC
+   UNSPECV_HTM_TABORTWCI
+   UNSPECV_HTM_TBEGIN
+   UNSPECV_HTM_TCHECK
+   UNSPECV_HTM_TEND
+   UNSPECV_HTM_TRECHKPT
+   UNSPECV_HTM_TRECLAIM
+   UNSPECV_HTM_TSR
+   UNSPECV_HTM_MFSPR
+   UNSPECV_HTM_MTSPR
+  ])
+
+
+(define_expand "tabort"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand:SI 1 "int_reg_operand" "")]
+			    UNSPECV_HTM_TABORT))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabort_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand:SI 0 "int_reg_operand" "r")]
+			    UNSPECV_HTM_TABORT))]
+  "TARGET_HTM"
+  "tabort. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortdc"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand:SI 3 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTDC))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortdc_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTDC))]
+  "TARGET_HTM"
+  "tabortdc. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortdci"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand 3 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTDCI))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortdci_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand 2 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTDCI))]
+  "TARGET_HTM"
+  "tabortdci. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortwc"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand:SI 3 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTWC))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortwc_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTWC))]
+  "TARGET_HTM"
+  "tabortwc. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortwci"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand 3 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTWCI))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_expand "ttest"
+  [(set (match_dup 1)
+	(unspec_volatile:CC [(const_int 0)
+			     (reg:SI 0)
+			     (const_int 0)]
+			    UNSPECV_HTM_TABORTWCI))
+   (set (subreg:CC (match_dup 2) 0) (match_dup 1))
+   (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24)))
+   (parallel [(set (match_operand:SI 0 "int_reg_operand" "")
+		   (and:SI (match_dup 3) (const_int 15)))
+              (clobber (scratch:CC))])]
+  "TARGET_HTM"
+{
+  operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortwci_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand 2 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTWCI))]
+  "TARGET_HTM"
+  "tabortwci. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tbegin"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TBEGIN))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tbegin_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TBEGIN))]
+  "TARGET_HTM"
+  "tbegin. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tcheck"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "u3bit_cint_operand" "n")]
+			    UNSPECV_HTM_TCHECK))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tcheck_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u3bit_cint_operand" "n")]
+			    UNSPECV_HTM_TCHECK))]
+  "TARGET_HTM"
+  "tcheck. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tend"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TEND))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tend_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TEND))]
+  "TARGET_HTM"
+  "tend. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "trechkpt"
+  [(set (match_dup 1)
+	(unspec_volatile:CC [(const_int 0)]
+			    UNSPECV_HTM_TRECHKPT))
+   (set (match_dup 2)
+	(eq:SI (match_dup 1)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 2)))]
+  "TARGET_HTM"
+{
+  operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*trechkpt_internal"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)]
+			    UNSPECV_HTM_TRECHKPT))]
+  "TARGET_HTM"
+  "trechkpt."
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "treclaim"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand:SI 1 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TRECLAIM))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*treclaim_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TRECLAIM))]
+  "TARGET_HTM"
+  "treclaim. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tsr"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TSR))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tsr_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TSR))]
+  "TARGET_HTM"
+  "tsr. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_insn "htm_mfspr_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+        (unspec_volatile:P [(match_operand 1 "u10bit_cint_operand" "n")
+			    (match_operand:P 2 "htm_spr_reg_operand" "")]
+			   UNSPECV_HTM_MFSPR))]
+  "TARGET_HTM"
+  "mfspr %0,%1";
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_insn "htm_mtspr_<mode>"
+  [(set (match_operand:P 2 "htm_spr_reg_operand" "")
+        (unspec_volatile:P [(match_operand:P 0 "gpc_reg_operand" "r")
+			    (match_operand 1 "u10bit_cint_operand" "n")]
+                           UNSPECV_HTM_MTSPR))]
+  "TARGET_HTM"
+  "mtspr %1,%0";
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
diff --git a/gcc-4.9/gcc/config/rs6000/htmintrin.h b/gcc-4.9/gcc/config/rs6000/htmintrin.h
new file mode 100644
index 000000000..9c433c43c
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/htmintrin.h
@@ -0,0 +1,131 @@
+/* Hardware Transactional Memory (HTM) intrinsics.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Peter Bergner <bergner@vnet.ibm.com>.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __HTM__
+# error "HTM instruction set not enabled"
+#endif /* __HTM__ */
+
+#ifndef _HTMINTRIN_H
+#define _HTMINTRIN_H
+
+#include <stdint.h>
+
+typedef uint64_t texasr_t;
+typedef uint32_t texasru_t;
+typedef uint32_t texasrl_t;
+typedef uintptr_t tfiar_t;
+typedef uintptr_t tfhar_t;
+
+#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)
+#define _HTM_NONTRANSACTIONAL 0x0
+#define _HTM_SUSPENDED        0x1
+#define _HTM_TRANSACTIONAL    0x2
+
+/* The following macros use the IBM bit numbering for BITNUM
+   as used in the ISA documentation.  */
+
+#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
+  (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))
+#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
+  (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))
+
+#define _TEXASR_FAILURE_CODE(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)
+#define _TEXASRU_FAILURE_CODE(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)
+
+#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)
+#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)
+
+#define _TEXASR_DISALLOWED(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)
+#define _TEXASRU_DISALLOWED(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)
+
+#define _TEXASR_NESTING_OVERFLOW(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)
+#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)
+
+#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)
+#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)
+
+#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)
+#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)
+
+#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)
+#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)
+
+#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)
+#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)
+
+#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)
+#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)
+
+#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)
+#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)
+
+#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)
+#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)
+
+#define _TEXASR_ABORT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)
+#define _TEXASRU_ABORT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)
+
+
+#define _TEXASR_SUSPENDED(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)
+
+#define _TEXASR_PRIVILEGE(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)
+
+#define _TEXASR_FAILURE_SUMMARY(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)
+
+#define _TEXASR_TFIAR_EXACT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)
+
+#define _TEXASR_ROT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)
+
+#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)
+
+#endif /* _HTMINTRIN_H */
diff --git a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
new file mode 100644
index 000000000..38dc066d3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
@@ -0,0 +1,208 @@
+/* XL compiler Hardware Transactional Memory (HTM) execution intrinsics.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Peter Bergner <bergner@vnet.ibm.com>.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __HTM__
+# error "HTM instruction set not enabled"
+#endif /* __HTM__ */
+
+#ifndef _HTMXLINTRIN_H
+#define _HTMXLINTRIN_H
+
+#include <stdint.h>
+#include <htmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define _TEXASR_PTR(TM_BUF) \
+  ((texasr_t *)((TM_BUF)+0))
+#define _TEXASRU_PTR(TM_BUF) \
+  ((texasru_t *)((TM_BUF)+0))
+#define _TEXASRL_PTR(TM_BUF) \
+  ((texasrl_t *)((TM_BUF)+4))
+#define _TFIAR_PTR(TM_BUF) \
+  ((tfiar_t *)((TM_BUF)+8))
+
+typedef char TM_buff_type[16];
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_simple_begin (void)
+{
+  if (__builtin_expect (__builtin_tbegin (0), 1))
+    return 1;
+  return 0;
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_begin (void* const TM_buff)
+{
+  *_TEXASRL_PTR (TM_buff) = 0;
+  if (__builtin_expect (__builtin_tbegin (0), 1))
+    return 1;
+#ifdef __powerpc64__
+  *_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
+#else
+  *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru ();
+  *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr ();
+#endif
+  *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar ();
+  return 0;
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_end (void)
+{
+  if (__builtin_expect (__builtin_tend (0), 1))
+    return 1;
+  return 0;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_abort (void)
+{
+  __builtin_tabort (0);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_named_abort (unsigned char const code)
+{
+  __builtin_tabort (code);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_resume (void)
+{
+  __builtin_tresume ();
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_suspend (void)
+{
+  __builtin_tsuspend ();
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_user_abort (void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_ABORT (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_named_user_abort (void* const TM_buff, unsigned char *code)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+
+  *code = _TEXASRU_FAILURE_CODE (texasru);
+  return _TEXASRU_ABORT (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_illegal (void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_DISALLOWED (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_footprint_exceeded (void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_nesting_depth (void* const TM_buff)
+{
+  texasrl_t texasrl;
+
+  if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)
+    {
+      texasrl = *_TEXASRL_PTR (TM_buff);
+      if (!_TEXASR_FAILURE_SUMMARY (texasrl))
+        texasrl = 0;
+    }
+  else
+    texasrl = (texasrl_t) __builtin_get_texasr ();
+
+  return _TEXASR_TRANSACTION_LEVEL (texasrl);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_nested_too_deep(void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_NESTING_OVERFLOW (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_conflict(void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  /* Return TEXASR bits 11 (Self-Induced Conflict) through
+     14 (Translation Invalidation Conflict).  */
+  return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_failure_persistent(void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_FAILURE_PERSISTENT (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_address(void* const TM_buff)
+{
+  return *_TFIAR_PTR (TM_buff);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_code(void* const TM_buff)
+{
+  return *_TEXASR_PTR (TM_buff);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTMXLINTRIN_H */
diff --git a/gcc-4.9/gcc/config/rs6000/linux.h b/gcc-4.9/gcc/config/rs6000/linux.h
new file mode 100644
index 000000000..1f4579f33
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linux.h
@@ -0,0 +1,153 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC machines running Linux.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Linux doesn't support saving and restoring 64-bit regs in a 32-bit
+   process.  */
+#define OS_MISSING_POWERPC64 1
+
+/* We use glibc _mcount for profiling.  */
+#define NO_PROFILE_COUNTERS 1
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
+#endif
+
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("PPC");		\
+      builtin_define_std ("powerpc");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+      TARGET_OS_SYSV_CPP_BUILTINS ();		\
+    }						\
+  while (0)
+
+#undef	CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_linux)"
+
+/* The GNU C++ standard library currently requires _GNU_SOURCE being
+   defined on glibc-based systems. This temporary hack accomplishes this,
+   it should go away as soon as libstdc++-v3 has a real fix.  */
+#undef  CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_linux)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_linux)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_linux)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_linux)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
+
+#undef  DEFAULT_ASM_ENDIAN
+#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
+#define DEFAULT_ASM_ENDIAN " -mlittle"
+#define LINK_OS_LINUX_EMUL ENDIAN_SELECT(" -m elf32ppclinux",	\
+					 " -m elf32lppclinux",	\
+					 " -m elf32lppclinux")
+#else
+#define DEFAULT_ASM_ENDIAN " -mbig"
+#define LINK_OS_LINUX_EMUL ENDIAN_SELECT(" -m elf32ppclinux",	\
+					 " -m elf32lppclinux",	\
+					 " -m elf32ppclinux")
+#endif
+
+#undef LINK_OS_LINUX_SPEC
+#define LINK_OS_LINUX_SPEC LINK_OS_LINUX_EMUL " %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* For backward compatibility, we must continue to use the AIX
+   structure return convention.  */
+#undef  DRAFT_V4_STRUCT_RET
+#define DRAFT_V4_STRUCT_RET 1
+
+/* We are 32-bit all the time, so optimize a little.  */
+#undef TARGET_64BIT
+#define TARGET_64BIT 0
+ 
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE)
+
+#define TARGET_POSIX_IO
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* ppc32 glibc provides __stack_chk_guard in -0x7008(2).  */
+#define TARGET_THREAD_SSP_OFFSET	-0x7008
+#endif
+
+#define POWERPC_LINUX
+
+/* ppc linux has 128-bit long double support in glibc 2.4 and later.  */
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
+#endif
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Software floating point support for exceptions and rounding modes
+   depends on the C library in use.  */
+#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
+#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
+  rs6000_linux_float_exceptions_rounding_supported_p
diff --git a/gcc-4.9/gcc/config/rs6000/linux64.h b/gcc-4.9/gcc/config/rs6000/linux64.h
new file mode 100644
index 000000000..dbc9a527e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linux64.h
@@ -0,0 +1,583 @@
+/* Definitions of target machine for GNU compiler,
+   for 64 bit PowerPC linux.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef RS6000_BI_ARCH
+
+#undef	TARGET_64BIT
+#define	TARGET_64BIT 1
+
+#define	DEFAULT_ARCH64_P 1
+#define	RS6000_BI_ARCH_P 0
+
+#else
+
+#define	DEFAULT_ARCH64_P (TARGET_DEFAULT & MASK_64BIT)
+#define	RS6000_BI_ARCH_P 1
+
+#endif
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __powerpc64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+#undef	TARGET_AIX
+#define	TARGET_AIX TARGET_64BIT
+
+#ifdef HAVE_LD_NO_DOT_SYMS
+/* New ABI uses a local sym for the function entry point.  */
+extern int dot_symbols;
+#undef DOT_SYMBOLS
+#define DOT_SYMBOLS dot_symbols
+#endif
+
+#define TARGET_PROFILE_KERNEL profile_kernel
+
+#define TARGET_USES_LINUX64_OPT 1
+#ifdef HAVE_LD_LARGE_TOC
+#undef TARGET_CMODEL
+#define TARGET_CMODEL rs6000_current_cmodel
+#define SET_CMODEL(opt) rs6000_current_cmodel = opt
+#else
+#define SET_CMODEL(opt) do {} while (0)
+#endif
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER7
+#undef  PROCESSOR_DEFAULT64
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
+#else
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
+#endif
+
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE)
+
+#undef	RS6000_ABI_NAME
+#define	RS6000_ABI_NAME "linux"
+
+#define INVALID_64BIT "-m%s not supported in this configuration"
+#define INVALID_32BIT INVALID_64BIT
+
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1)
+#else
+#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2)
+#endif
+
+#undef	SUBSUBTARGET_OVERRIDE_OPTIONS
+#define	SUBSUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      if (!global_options_set.x_rs6000_alignment_flags)		\
+	rs6000_alignment_flags = MASK_ALIGN_NATURAL;		\
+      if (TARGET_64BIT)						\
+	{							\
+	  if (DEFAULT_ABI != ABI_AIX)				\
+	    {							\
+	      rs6000_current_abi = ABI_AIX;			\
+	      error (INVALID_64BIT, "call");			\
+	    }							\
+	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
+	  if (ELFv2_ABI_CHECK)					\
+	    {							\
+	      rs6000_current_abi = ABI_ELFv2;			\
+	      if (dot_symbols)					\
+		error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \
+	    }							\
+	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
+	    {							\
+	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
+	      error (INVALID_64BIT, "relocatable");		\
+	    }							\
+	  if (rs6000_isa_flags & OPTION_MASK_EABI)		\
+	    {							\
+	      rs6000_isa_flags &= ~OPTION_MASK_EABI;		\
+	      error (INVALID_64BIT, "eabi");			\
+	    }							\
+	  if (TARGET_PROTOTYPE)					\
+	    {							\
+	      target_prototype = 0;				\
+	      error (INVALID_64BIT, "prototype");		\
+	    }							\
+	  if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)	\
+	    {							\
+	      rs6000_isa_flags |= OPTION_MASK_POWERPC64;	\
+	      error ("-m64 requires a PowerPC64 cpu");		\
+	    }							\
+	  if ((rs6000_isa_flags_explicit			\
+	       & OPTION_MASK_MINIMAL_TOC) != 0)			\
+	    {							\
+	      if (global_options_set.x_rs6000_current_cmodel	\
+		  && rs6000_current_cmodel != CMODEL_SMALL)	\
+		error ("-mcmodel incompatible with other toc options"); \
+	      SET_CMODEL (CMODEL_SMALL);			\
+	    }							\
+	  else							\
+	    {							\
+	      if (!global_options_set.x_rs6000_current_cmodel)	\
+		SET_CMODEL (CMODEL_MEDIUM);			\
+	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
+		{						\
+		  if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
+		    TARGET_NO_FP_IN_TOC				\
+		      = rs6000_current_cmodel == CMODEL_MEDIUM;	\
+		  if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
+		    TARGET_NO_SUM_IN_TOC = 0;			\
+		}						\
+	    }							\
+	}							\
+      else							\
+	{							\
+	  if (!RS6000_BI_ARCH_P)				\
+	    error (INVALID_32BIT, "32");			\
+	  if (TARGET_PROFILE_KERNEL)				\
+	    {							\
+	      TARGET_PROFILE_KERNEL = 0;			\
+	      error (INVALID_32BIT, "profile-kernel");		\
+	    }							\
+	  if (global_options_set.x_rs6000_current_cmodel)	\
+	    {							\
+	      SET_CMODEL (CMODEL_SMALL);			\
+	      error (INVALID_32BIT, "cmodel");			\
+	    }							\
+	}							\
+    }								\
+  while (0)
+
+#undef	ASM_DEFAULT_SPEC
+#undef	ASM_SPEC
+#undef	LINK_OS_LINUX_SPEC
+
+#ifndef	RS6000_BI_ARCH
+#define	ASM_DEFAULT_SPEC "-mppc64"
+#define	ASM_SPEC	 "%(asm_spec64) %(asm_spec_common)"
+#define	LINK_OS_LINUX_SPEC "%(link_os_linux_spec64)"
+#else
+#if DEFAULT_ARCH64_P
+#define	ASM_DEFAULT_SPEC "-mppc%{!m32:64}"
+#define	ASM_SPEC	 "%{m32:%(asm_spec32)}%{!m32:%(asm_spec64)} %(asm_spec_common)"
+#define	LINK_OS_LINUX_SPEC "%{m32:%(link_os_linux_spec32)}%{!m32:%(link_os_linux_spec64)}"
+#else
+#define	ASM_DEFAULT_SPEC "-mppc%{m64:64}"
+#define	ASM_SPEC	 "%{!m64:%(asm_spec32)}%{m64:%(asm_spec64)} %(asm_spec_common)"
+#define	LINK_OS_LINUX_SPEC "%{!m64:%(link_os_linux_spec32)}%{m64:%(link_os_linux_spec64)}"
+#endif
+#endif
+
+#define ASM_SPEC32 "-a32 \
+%{mrelocatable} %{mrelocatable-lib} %{fpic|fpie|fPIC|fPIE:-K PIC} \
+%{memb|msdata=eabi: -memb}"
+
+#define ASM_SPEC64 "-a64"
+
+#define ASM_SPEC_COMMON "%(asm_cpu) \
+%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}" \
+  ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
+
+#undef	SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS \
+  { "asm_spec_common",		ASM_SPEC_COMMON },			\
+  { "asm_spec32",		ASM_SPEC32 },				\
+  { "asm_spec64",		ASM_SPEC64 },				\
+  { "link_os_linux_spec32",	LINK_OS_LINUX_SPEC32 },			\
+  { "link_os_linux_spec64",	LINK_OS_LINUX_SPEC64 },
+
+#undef	MULTILIB_DEFAULTS
+#if DEFAULT_ARCH64_P
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+#ifndef RS6000_BI_ARCH
+
+/* 64-bit PowerPC Linux always has a TOC.  */
+#undef  TARGET_TOC
+#define	TARGET_TOC		1
+
+/* Some things from sysv4.h we don't do when 64 bit.  */
+#undef	OPTION_RELOCATABLE
+#define	OPTION_RELOCATABLE	0
+#undef	OPTION_EABI
+#define	OPTION_EABI		0
+#undef	OPTION_PROTOTYPE
+#define	OPTION_PROTOTYPE	0
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP 0
+
+#endif
+
+/* We use glibc _mcount for profiling.  */
+#define NO_PROFILE_COUNTERS 1
+#define PROFILE_HOOK(LABEL) \
+  do { if (TARGET_64BIT) output_profile_hook (LABEL); } while (0)
+
+/* PowerPC64 Linux word-aligns FP doubles when -malign-power is given.  */
+#undef  ADJUST_FIELD_ALIGN
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+  ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE)	\
+   ? 128								\
+   : (TARGET_64BIT							\
+      && TARGET_ALIGN_NATURAL == 0					\
+      && TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode)	\
+   ? MIN ((COMPUTED), 32)						\
+   : (COMPUTED))
+
+/* PowerPC64 Linux increases natural record alignment to doubleword if
+   the first field is an FP double, only if in power alignment mode.  */
+#undef  ROUND_TYPE_ALIGN
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			\
+  ((TARGET_64BIT							\
+    && (TREE_CODE (STRUCT) == RECORD_TYPE				\
+	|| TREE_CODE (STRUCT) == UNION_TYPE				\
+	|| TREE_CODE (STRUCT) == QUAL_UNION_TYPE)			\
+    && TARGET_ALIGN_NATURAL == 0)					\
+   ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED)	\
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* Use the default for compiling target libs.  */
+#ifdef IN_TARGET_LIBS
+#undef TARGET_ALIGN_NATURAL
+#define TARGET_ALIGN_NATURAL 1
+#endif
+
+/* Indicate that jump tables go in the text section.  */
+#undef  JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT
+
+/* The linux ppc64 ABI isn't explicit on whether aggregates smaller
+   than a doubleword should be padded upward or downward.  You could
+   reasonably assume that they follow the normal rules for structure
+   layout treating the parameter area as any other block of memory,
+   then map the reg param area to registers.  i.e. pad upward.
+   Setting both of the following defines results in this behavior.
+   Setting just the first one will result in aggregates that fit in a
+   doubleword being padded downward, and others being padded upward.
+   Not a bad idea as this results in struct { int x; } being passed
+   the same way as an int.  */
+#define AGGREGATE_PADDING_FIXED TARGET_64BIT
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+/* Linux doesn't support saving and restoring 64-bit regs in a 32-bit
+   process.  */
+#define OS_MISSING_POWERPC64 !TARGET_64BIT
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
+#endif
+
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      if (TARGET_64BIT)					\
+	{						\
+	  builtin_define ("__PPC__");			\
+	  builtin_define ("__PPC64__");			\
+	  builtin_define ("__powerpc__");		\
+	  builtin_define ("__powerpc64__");		\
+	  if (!DOT_SYMBOLS)				\
+	    builtin_define ("_CALL_LINUX");		\
+	  builtin_assert ("cpu=powerpc64");		\
+	  builtin_assert ("machine=powerpc64");		\
+	}						\
+      else						\
+	{						\
+	  builtin_define_std ("PPC");			\
+	  builtin_define_std ("powerpc");		\
+	  builtin_assert ("cpu=powerpc");		\
+	  builtin_assert ("machine=powerpc");		\
+	  TARGET_OS_SYSV_CPP_BUILTINS ();		\
+	}						\
+    }							\
+  while (0)
+
+#undef  CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_linux)"
+
+/* The GNU C++ standard library currently requires _GNU_SOURCE being
+   defined on glibc-based systems. This temporary hack accomplishes this,
+   it should go away as soon as libstdc++-v3 has a real fix.  */
+#undef  CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+#undef  LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_linux)"
+
+#undef  STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_linux)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_linux)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_linux)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
+#else
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
+#endif
+#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define GNU_USER_DYNAMIC_LINKER32 \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32)
+#define GNU_USER_DYNAMIC_LINKER64 \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64)
+
+#undef  DEFAULT_ASM_ENDIAN
+#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
+#define DEFAULT_ASM_ENDIAN " -mlittle"
+#define LINK_OS_LINUX_EMUL32 ENDIAN_SELECT(" -m elf32ppclinux",		\
+					   " -m elf32lppclinux",	\
+					   " -m elf32lppclinux")
+#define LINK_OS_LINUX_EMUL64 ENDIAN_SELECT(" -m elf64ppc",		\
+					   " -m elf64lppc",		\
+					   " -m elf64lppc")
+#else
+#define DEFAULT_ASM_ENDIAN " -mbig"
+#define LINK_OS_LINUX_EMUL32 ENDIAN_SELECT(" -m elf32ppclinux",		\
+					   " -m elf32lppclinux",	\
+					   " -m elf32ppclinux")
+#define LINK_OS_LINUX_EMUL64 ENDIAN_SELECT(" -m elf64ppc",		\
+					   " -m elf64lppc",		\
+					   " -m elf64ppc")
+#endif
+
+#define LINK_OS_LINUX_SPEC32 LINK_OS_LINUX_EMUL32 " %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "}}"
+
+#define LINK_OS_LINUX_SPEC64 LINK_OS_LINUX_EMUL64 " %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "}}"
+
+#undef  TOC_SECTION_ASM_OP
+#define TOC_SECTION_ASM_OP \
+  (TARGET_64BIT						\
+   ? "\t.section\t\".toc\",\"aw\""			\
+   : "\t.section\t\".got\",\"aw\"")
+
+#undef  MINIMAL_TOC_SECTION_ASM_OP
+#define MINIMAL_TOC_SECTION_ASM_OP \
+  (TARGET_64BIT						\
+   ? "\t.section\t\".toc1\",\"aw\""			\
+   : ((TARGET_RELOCATABLE || flag_pic)			\
+      ? "\t.section\t\".got2\",\"aw\""			\
+      : "\t.section\t\".got1\",\"aw\""))
+
+/* Must be at least as big as our pointer type.  */
+#undef	SIZE_TYPE
+#define	SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+#undef	PTRDIFF_TYPE
+#define	PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef	WCHAR_TYPE
+#define	WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  RS6000_MCOUNT
+#define RS6000_MCOUNT "_mcount"
+
+#ifdef __powerpc64__
+/* _init and _fini functions are built from bits spread across many
+   object files, each potentially with a different TOC pointer.  For
+   that reason, place a nop after the call so that the linker can
+   restore the TOC pointer if a TOC adjusting call stub is needed.  */
+#if DOT_SYMBOLS
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl ." #FUNC "\n"				\
+"	nop\n"						\
+"	.previous");
+#else
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl " #FUNC "\n"					\
+"	nop\n"						\
+"	.previous");
+#endif
+#endif
+
+/* FP save and restore routines.  */
+#undef  SAVE_FP_PREFIX
+#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_")
+#undef  SAVE_FP_SUFFIX
+#define SAVE_FP_SUFFIX ""
+#undef  RESTORE_FP_PREFIX
+#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_")
+#undef  RESTORE_FP_SUFFIX
+#define RESTORE_FP_SUFFIX ""
+
+/* Dwarf2 debugging.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* This is how to declare the size of a function.  */
+#undef	ASM_DECLARE_FUNCTION_SIZE
+#define	ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do									\
+    {									\
+      if (!flag_inhibit_size_directive)					\
+	{								\
+	  fputs ("\t.size\t", (FILE));					\
+	  if (TARGET_64BIT && DOT_SYMBOLS)				\
+	    putc ('.', (FILE));						\
+	  assemble_name ((FILE), (FNAME));				\
+	  fputs (",.-", (FILE));					\
+	  rs6000_output_function_entry (FILE, FNAME);			\
+	  putc ('\n', (FILE));						\
+	}								\
+    }									\
+  while (0)
+
+/* Return nonzero if this entry is to be written into the constant
+   pool in a special way.  We do so if this is a SYMBOL_REF, LABEL_REF
+   or a CONST containing one of them.  If -mfp-in-toc (the default),
+   we also do this for floating-point constants.  We actually can only
+   do this if the FP formats of the target and host machines are the
+   same, but we can't check that since not every file that uses
+   the macros includes real.h.  We also do this when we can write the
+   entry into the TOC and the entry is not larger than a TOC entry.  */
+
+#undef  ASM_OUTPUT_SPECIAL_POOL_ENTRY_P
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (GET_CODE (X) == CONST_DOUBLE					\
+	   && ((TARGET_64BIT						\
+		&& (TARGET_MINIMAL_TOC					\
+		    || (SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
+			&& ! TARGET_NO_FP_IN_TOC)))			\
+	       || (!TARGET_64BIT					\
+		   && !TARGET_NO_FP_IN_TOC				\
+		   && !TARGET_RELOCATABLE				\
+		   && SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
+		   && BITS_PER_WORD == HOST_BITS_PER_INT)))))
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#undef	ASM_PREFERRED_EH_DATA_FORMAT
+#define	ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  ((TARGET_64BIT || flag_pic || TARGET_RELOCATABLE)			\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel		\
+      | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4))		\
+   : DW_EH_PE_absptr)
+
+/* For backward compatibility, we must continue to use the AIX
+   structure return convention.  */
+#undef DRAFT_V4_STRUCT_RET
+#define DRAFT_V4_STRUCT_RET (!TARGET_64BIT)
+
+#define TARGET_POSIX_IO
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* ppc32 glibc provides __stack_chk_guard in -0x7008(2),
+   ppc64 glibc provides it at -0x7010(13).  */
+#define TARGET_THREAD_SSP_OFFSET	(TARGET_64BIT ? -0x7010 : -0x7008)
+#endif
+
+#define POWERPC_LINUX
+
+/* ppc{32,64} linux has 128-bit long double support in glibc 2.4 and later.  */
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
+#endif
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* The default value isn't sufficient in 64-bit mode.  */
+#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+
+/* Software floating point support for exceptions and rounding modes
+   depends on the C library in use.  */
+#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
+#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
+  rs6000_linux_float_exceptions_rounding_supported_p
diff --git a/gcc-4.9/gcc/config/rs6000/linux64.opt b/gcc-4.9/gcc/config/rs6000/linux64.opt
new file mode 100644
index 000000000..a108ca322
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linux64.opt
@@ -0,0 +1,42 @@
+; Options for 64-bit PowerPC Linux.
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mprofile-kernel
+Target Report Var(profile_kernel) Save
+Call mcount for profiling before a function prologue
+
+mcmodel=
+Target RejectNegative Joined Enum(rs6000_cmodel) Var(rs6000_current_cmodel)
+Select code model
+
+Enum
+Name(rs6000_cmodel) Type(enum rs6000_cmodel)
+Known code models (for use with the -mcmodel= option):
+
+EnumValue
+Enum(rs6000_cmodel) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(rs6000_cmodel) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(rs6000_cmodel) String(large) Value(CMODEL_LARGE)
+
diff --git a/gcc-4.9/gcc/config/rs6000/linuxaltivec.h b/gcc-4.9/gcc/config/rs6000/linuxaltivec.h
new file mode 100644
index 000000000..ebd03c94f
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linuxaltivec.h
@@ -0,0 +1,32 @@
+/* Definitions of target machine for GNU compiler,
+   for AltiVec enhanced PowerPC machines running GNU/Linux.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override rs6000.h and sysv4.h definition.  */
+#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
+#undef	TARGET_DEFAULT
+#define	TARGET_DEFAULT (MASK_ALTIVEC | MASK_LITTLE_ENDIAN)
+#else
+#undef	TARGET_DEFAULT
+#define	TARGET_DEFAULT MASK_ALTIVEC
+#endif
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS rs6000_altivec_abi = 1
diff --git a/gcc-4.9/gcc/config/rs6000/linuxspe.h b/gcc-4.9/gcc/config/rs6000/linuxspe.h
new file mode 100644
index 000000000..5d9729d9f
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linuxspe.h
@@ -0,0 +1,32 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC e500 machines running GNU/Linux.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldy@quesejoda.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override rs6000.h and sysv4.h definition.  */
+#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN)
+#undef	TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_STRICT_ALIGN | MASK_LITTLE_ENDIAN)
+#else
+#undef	TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_STRICT_ALIGN
+#endif
+
+#undef  ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc -mspe -me500"
diff --git a/gcc-4.9/gcc/config/rs6000/lynx.h b/gcc-4.9/gcc/config/rs6000/lynx.h
new file mode 100644
index 000000000..6377846cf
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/lynx.h
@@ -0,0 +1,119 @@
+/* Definitions for Rs6000 running LynxOS.
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by David Henkel-Wallace, Cygnus Support (gumby@cygnus.com)
+   Rewritten by Adam Nemet, LynuxWorks Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Undefine the definition to enable the LynxOS default from the
+   top-level lynx.h.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+
+/* Get rid off the spec definitions from rs6000/sysv4.h.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+"%{msoft-float: -D_SOFT_FLOAT} \
+ %(cpp_cpu) \
+ %(cpp_os_lynx)"
+
+/* LynxOS only supports big-endian on PPC so we override the
+   definition from sysv4.h.  Since the LynxOS 4.0 compiler was set to
+   return every structure in memory regardless of their size we have
+   to emulate the same behavior here with disabling the SVR4 structure
+   returning.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC \
+"%{G*} %{mno-sdata:-msdata=none} \
+ %{maltivec:-mabi=altivec} \
+ -maix-struct-return"
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+"%(asm_cpu) \
+ %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}"
+
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#undef LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC \
+"%{!msdata=none:%{G*}} %{msdata=none:-G0} \
+ %(link_os_lynx)"
+
+/* Override the definition from sysv4.h.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__BIG_ENDIAN__");	\
+      builtin_define ("__powerpc__");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+      builtin_define ("__PPC__");		\
+    }						\
+  while (0)
+
+/* Override the rs6000.h definition.  */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override the rs6000.h definition.  */
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* LynxOS does not do anything with .fixup plus let's not create
+   writable section for linkonce.r and linkonce.t.  */
+
+#undef RELOCATABLE_NEEDS_FIXUP
+
+/* Override these from rs6000.h with the generic definition.  */
+
+#undef SIZE_TYPE
+#undef ASM_OUTPUT_ALIGN
+#undef PREFERRED_DEBUGGING_TYPE
+
+/* The file rs6000.c defines TARGET_HAVE_TLS unconditionally to the
+   value of HAVE_AS_TLS.  HAVE_AS_TLS is true as gas support for TLS
+   is detected by configure.  Override the definition to false.  */
+
+#undef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
+
+#ifdef CRT_BEGIN
+/* This function is part of crtbegin*.o which is at the beginning of
+   the link and is called from .fini which is usually toward the end
+   of the executable.  Make it longcall so that we don't limit the
+   text size of the executables to 32M.  */
+
+static void __do_global_dtors_aux (void) __attribute__ ((longcall));
+#endif	/* CRT_BEGIN */
+
+#ifdef CRT_END
+/* Similarly here.  This function resides in crtend*.o which is toward
+   to end of the link and is called from .init which is at the
+   beginning.  */
+
+static void __do_global_ctors_aux (void) __attribute__ ((longcall));
+#endif	/* CRT_END */
diff --git a/gcc-4.9/gcc/config/rs6000/milli.exp b/gcc-4.9/gcc/config/rs6000/milli.exp
new file mode 100644
index 000000000..ea3a2b757
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/milli.exp
@@ -0,0 +1,7 @@
+#!
+__mulh          0x3100
+__mull          0x3180
+__divss         0x3200
+__divus         0x3280
+__quoss         0x3300
+__quous         0x3380
diff --git a/gcc-4.9/gcc/config/rs6000/mpc.md b/gcc-4.9/gcc/config/rs6000/mpc.md
new file mode 100644
index 000000000..65cc5a4a3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/mpc.md
@@ -0,0 +1,111 @@
+;; Scheduling description for Motorola PowerPC processor cores.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "mpc,mpcfp")
+(define_cpu_unit "iu_mpc,mciu_mpc" "mpc")
+(define_cpu_unit "fpu_mpc" "mpcfp")
+(define_cpu_unit "lsu_mpc,bpu_mpc" "mpc")
+
+;; MPCCORE 32-bit SCIU, MCIU, LSU, FPU, BPU
+;; 505/801/821/823
+
+(define_insn_reservation "mpccore-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "mpccore"))
+  "lsu_mpc")
+
+(define_insn_reservation "mpccore-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "mpccore"))
+  "lsu_mpc")
+
+(define_insn_reservation "mpccore-fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "mpccore"))
+  "lsu_mpc")
+
+(define_insn_reservation "mpccore-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc")
+
+(define_insn_reservation "mpccore-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc,iu_mpc")
+
+(define_insn_reservation "mpccore-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc,iu_mpc,iu_mpc")
+
+(define_insn_reservation "mpccore-imul" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "mpccore"))
+  "mciu_mpc")
+
+; Divide latency varies greatly from 2-11, use 6 as average
+(define_insn_reservation "mpccore-idiv" 6
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "mpccore"))
+  "mciu_mpc*6")
+
+(define_insn_reservation "mpccore-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc,nothing,bpu_mpc")
+
+(define_insn_reservation "mpccore-fpcompare" 2
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc,bpu_mpc")
+
+(define_insn_reservation "mpccore-fp" 4
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*2")
+
+(define_insn_reservation "mpccore-dmul" 5
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*5")
+
+(define_insn_reservation "mpccore-sdiv" 10
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*10")
+
+(define_insn_reservation "mpccore-ddiv" 17
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*17")
+
+(define_insn_reservation "mpccore-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "mpccore"))
+  "bpu_mpc")
+
+(define_insn_reservation "mpccore-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr,mfcr,mtcr,isync")
+       (eq_attr "cpu" "mpccore"))
+  "bpu_mpc")
+
diff --git a/gcc-4.9/gcc/config/rs6000/netbsd.h b/gcc-4.9/gcc/config/rs6000/netbsd.h
new file mode 100644
index 000000000..a2be6df10
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/netbsd.h
@@ -0,0 +1,90 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC NetBSD systems.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_OS_CPP_BUILTINS	/* FIXME: sysv4.h should not define this! */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+      builtin_define ("__powerpc__");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+    }						\
+  while (0)
+
+/* Override the default from rs6000.h to avoid conflicts with macros
+   defined in NetBSD header files.  */
+
+#undef  RS6000_CPU_CPP_ENDIAN_BUILTINS
+#define RS6000_CPU_CPP_ENDIAN_BUILTINS()	\
+  do						\
+    {						\
+      if (BYTES_BIG_ENDIAN)			\
+	{					\
+	  builtin_define ("__BIG_ENDIAN__");	\
+	  builtin_assert ("machine=bigendian");	\
+	}					\
+      else					\
+	{					\
+	  builtin_define ("__LITTLE_ENDIAN__");	\
+	  builtin_assert ("machine=littleendian"); \
+	}					\
+    }						\
+  while (0)
+
+/* Make GCC agree with <machine/ansi.h>.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Undo the spec mess from sysv4.h, and just define the specs
+   the way NetBSD systems actually expect.  */
+
+#undef  CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{!msdata=none:%{G*}} %{msdata=none:-G0} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "_start"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC NETBSD_STARTFILE_SPEC
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "%(netbsd_endfile_spec)"
+
+#undef  LIB_SPEC
+#define LIB_SPEC NETBSD_LIB_SPEC
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS					\
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF },		\
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },		\
+  { "netbsd_endfile_spec",	NETBSD_ENDFILE_SPEC },
+
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
diff --git a/gcc-4.9/gcc/config/rs6000/option-defaults.h b/gcc-4.9/gcc/config/rs6000/option-defaults.h
new file mode 100644
index 000000000..dc4cce0d6
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/option-defaults.h
@@ -0,0 +1,64 @@
+/* Definitions of default options for config/rs6000 configurations.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This header needs to be included after any other headers affecting
+   TARGET_DEFAULT.  */
+
+#if TARGET_AIX_OS
+#define OPT_64 "maix64"
+#define OPT_32 "maix32"
+#else
+#define OPT_64 "m64"
+#define OPT_32 "m32"
+#endif
+
+#ifndef OPTION_MASK_64BIT
+#define OPTION_MASK_64BIT 0
+#define MASK_64BIT 0
+#endif
+
+#if TARGET_DEFAULT & OPTION_MASK_64BIT
+#define OPT_ARCH64 "!"OPT_32
+#define OPT_ARCH32 OPT_32
+#else
+#define OPT_ARCH64 OPT_64
+#define OPT_ARCH32 "!"OPT_64
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified; likewise --with-cpu-32
+     and --with-cpu-64.
+   --with-tune is ignored if -mtune or -mcpu is specified; likewise
+     --with-tune-32 and --with-tune-64.
+   --with-float is ignored if -mhard-float or -msoft-float are
+     specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"abi", "%{!mabi=elfv*:-mabi=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
+  {"cpu_32", "%{" OPT_ARCH32 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"cpu_64", "%{" OPT_ARCH64 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }
diff --git a/gcc-4.9/gcc/config/rs6000/paired.h b/gcc-4.9/gcc/config/rs6000/paired.h
new file mode 100644
index 000000000..44bcb45b3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/paired.h
@@ -0,0 +1,75 @@
+/* PowerPC 750CL user include file.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+   Contributed by Revital Eres (eres@il.ibm.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _PAIRED_H
+#define _PAIRED_H
+
+#define vector __attribute__((vector_size(8)))
+
+#define paired_msub __builtin_paired_msub
+#define paired_madd __builtin_paired_madd
+#define paired_nmsub __builtin_paired_nmsub
+#define paired_nmadd __builtin_paired_nmadd
+#define paired_sum0 __builtin_paired_sum0
+#define paired_sum1 __builtin_paired_sum1
+#define paired_div __builtin_paired_divv2sf3
+#define paired_add __builtin_paired_addv2sf3
+#define paired_sub __builtin_paired_subv2sf3
+#define paired_mul __builtin_paired_mulv2sf3
+#define paired_muls0 __builtin_paired_muls0
+#define paired_muls1 __builtin_paired_muls1
+#define paired_madds0 __builtin_paired_madds0
+#define paired_madds1 __builtin_paired_madds1
+#define paired_merge00 __builtin_paired_merge00
+#define paired_merge01 __builtin_paired_merge01
+#define paired_merge10 __builtin_paired_merge10
+#define paired_merge11 __builtin_paired_merge11
+#define paired_abs __builtin_paired_absv2sf2
+#define paired_nabs __builtin_paired_nabsv2sf2
+#define paired_neg __builtin_paired_negv2sf2
+#define paired_sqrt __builtin_paired_sqrtv2sf2
+#define paired_res __builtin_paired_resv2sf2
+#define paired_stx __builtin_paired_stx
+#define paired_lx __builtin_paired_lx
+#define paired_cmpu0 __builtin_paired_cmpu0
+#define paired_cmpu1 __builtin_paired_cmpu1
+#define paired_sel __builtin_paired_selv2sf4
+
+/* Condition register codes for Paired predicates. */
+#define LT            0
+#define GT            1
+#define EQ            2
+#define UN            3
+
+#define paired_cmpu0_un(a,b) __builtin_paired_cmpu0 (UN, (a), (b))
+#define paired_cmpu0_eq(a,b) __builtin_paired_cmpu0 (EQ, (a), (b))
+#define paired_cmpu0_lt(a,b) __builtin_paired_cmpu0 (LT, (a), (b))
+#define paired_cmpu0_gt(a,b) __builtin_paired_cmpu0 (GT, (a), (b))
+#define paired_cmpu1_un(a,b) __builtin_paired_cmpu1 (UN, (a), (b))
+#define paired_cmpu1_eq(a,b) __builtin_paired_cmpu1 (EQ, (a), (b))
+#define paired_cmpu1_lt(a,b) __builtin_paired_cmpu1 (LT, (a), (b))
+#define paired_cmpu1_gt(a,b) __builtin_paired_cmpu1 (GT, (a), (b))
+
+#endif /* _PAIRED_H */
diff --git a/gcc-4.9/gcc/config/rs6000/paired.md b/gcc-4.9/gcc/config/rs6000/paired.md
new file mode 100644
index 000000000..7a0ed22c3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/paired.md
@@ -0,0 +1,488 @@
+;; PowerPC paired single and double hummer description
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;; Contributed by David Edelsohn <edelsohn@gnu.org> and Revital Eres
+;; <eres@il.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;; 
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+  [UNSPEC_INTERHI_V2SF
+   UNSPEC_INTERLO_V2SF
+   UNSPEC_EXTEVEN_V2SF
+   UNSPEC_EXTODD_V2SF
+  ])
+
+(define_insn "paired_negv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_neg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "sqrtv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(sqrt:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_rsqrte %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_absv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_abs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "nabsv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f"))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_nabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_addv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_add %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_subv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                    (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_sub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_mulv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_mul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "resv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+  "TARGET_PAIRED_FLOAT && flag_finite_math_only"
+  "ps_res %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_divv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_div %0,%1,%2"
+  [(set_attr "type" "sdiv")])
+
+(define_insn "paired_madds0"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+       (vec_concat:V2SF
+	 (fma:SF
+           (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+			  (parallel [(const_int 0)]))
+	   (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)]))
+	   (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)])))
+	 (fma:SF
+	   (vec_select:SF (match_dup 1)
+                          (parallel [(const_int 1)]))
+	   (vec_select:SF (match_dup 2)
+                          (parallel [(const_int 0)]))
+	   (vec_select:SF (match_dup 3)
+                          (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_madds0 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_madds1"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+       (vec_concat:V2SF
+         (fma:SF
+	   (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)]))
+           (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+                          (parallel [(const_int 1)]))
+           (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)])))
+	 (fma:SF
+	   (vec_select:SF (match_dup 1)
+                          (parallel [(const_int 1)]))
+           (vec_select:SF (match_dup 2)
+                          (parallel [(const_int 1)]))
+           (vec_select:SF (match_dup 3)
+                          (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_madds1 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*paired_madd"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	  (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	  (match_operand:V2SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_madd %0,%1,%2,%3"
+  [(set_attr "type" "fp")]) 
+
+(define_insn "*paired_msub"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	  (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	  (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_msub %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*paired_nmadd"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF
+	  (fma:V2SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_nmadd %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*paired_nmsub"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF
+	  (fma:V2SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	    (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f")))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_nmsub %0,%1,%2,%3"
+  [(set_attr "type" "dmul")])
+
+(define_insn "selv2sf4"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (if_then_else:SF (ge (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+					     (parallel [(const_int 0)]))
+			      (match_operand:SF 4 "zero_fp_constant" "F"))
+			  (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+					 (parallel [(const_int 0)]))
+			  (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+					 (parallel [(const_int 0)])))
+	 (if_then_else:SF (ge (vec_select:SF (match_dup 1)
+					     (parallel [(const_int 1)]))
+			      (match_dup 4))
+			  (vec_select:SF (match_dup 2)
+					 (parallel [(const_int 1)]))
+			  (vec_select:SF (match_dup 3)
+					 (parallel [(const_int 1)])))))]
+
+  "TARGET_PAIRED_FLOAT"
+  "ps_sel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*movv2sf_paired"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=Z,f,f,Y,r,r,f")
+		 (match_operand:V2SF 1 "input_operand" "f,Z,f,r,Y,r,W"))]
+  "TARGET_PAIRED_FLOAT
+   && (register_operand (operands[0], V2SFmode) 
+       || register_operand (operands[1], V2SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "psq_stx %1,%y0,0,0";
+    case 1: return "psq_lx %0,%y1,0,0";
+    case 2: return "ps_mr %0,%1";
+    case 3: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#"; 
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fpstore,fpload,fp,*,*,*,*")])
+
+(define_insn "paired_stx"
+  [(set (match_operand:V2SF 0 "memory_operand" "=Z")
+        (match_operand:V2SF 1 "gpc_reg_operand" "f"))]
+  "TARGET_PAIRED_FLOAT"
+  "psq_stx %1,%y0,0,0"
+  [(set_attr "type" "fpstore")])
+
+(define_insn "paired_lx"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (match_operand:V2SF 1 "memory_operand" "Z"))]
+  "TARGET_PAIRED_FLOAT"
+  "psq_lx %0,%y1,0,0"
+  [(set_attr "type" "fpload")])
+
+
+(define_split
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+        (match_operand:V2SF 1 "input_operand" ""))]
+  "TARGET_PAIRED_FLOAT && reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(pc)]
+  {
+  rs6000_split_multireg_move (operands[0], operands[1]); DONE;
+  })
+
+(define_insn "paired_cmpu0"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (vec_select:SF
+		       (match_operand:V2SF 1 "gpc_reg_operand" "f")
+		       (parallel [(const_int 0)]))
+		      (vec_select:SF
+		       (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		       (parallel [(const_int 0)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_cmpu0 %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "paired_cmpu1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (vec_select:SF
+		       (match_operand:V2SF 1 "gpc_reg_operand" "f")
+		       (parallel [(const_int 1)]))
+		      (vec_select:SF
+		       (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		       (parallel [(const_int 1)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_cmpu1 %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "paired_merge00"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge00 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_merge01"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f"))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge01 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_merge10"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f"))
+	  (parallel [(const_int 1) (const_int 2)])))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge10 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_merge11"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge11 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_expand "vec_perm_constv2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "")
+   (match_operand:V2SF 1 "gpc_reg_operand" "")
+   (match_operand:V2SF 2 "gpc_reg_operand" "")
+   (match_operand:V2SI 3 "" "")]
+  "TARGET_PAIRED_FLOAT"
+{
+  if (rs6000_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "paired_sum0"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF (plus:SF (vec_select:SF
+				   (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 0)]))
+				  (vec_select:SF
+				   (match_operand:V2SF 2 "gpc_reg_operand" "f")
+				   (parallel [(const_int 1)])))
+			 (vec_select:SF
+			  (match_operand:V2SF 3 "gpc_reg_operand" "f")
+			  (parallel [(const_int 1)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_sum0 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_sum1"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF (vec_select:SF
+			  (match_operand:V2SF 2 "gpc_reg_operand" "f")
+			  (parallel [(const_int 1)]))
+			 (plus:SF (vec_select:SF
+				   (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 0)]))
+				  (vec_select:SF
+				   (match_operand:V2SF 3 "gpc_reg_operand" "f")
+				   (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_sum1 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_muls0"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		   (vec_duplicate:V2SF
+		    (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 0)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_muls0 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+
+(define_insn "paired_muls1"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		   (vec_duplicate:V2SF
+		    (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_muls1 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+   (match_operand 1 "" "")]
+  "TARGET_PAIRED_FLOAT"
+{
+  paired_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "*vconcatsf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (vec_concat:V2SF
+         (match_operand:SF 1 "gpc_reg_operand" "f")
+         (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge00 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_expand "sminv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (smin:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_subv2sf3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[2], operands[1], CONST0_RTX (SFmode)));
+  DONE;
+})
+
+(define_expand "smaxv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (smax:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_subv2sf3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[1], operands[2], CONST0_RTX (SFmode)));
+  DONE;
+})
+
+(define_expand "reduc_smax_v2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+   (match_operand:V2SF 1 "gpc_reg_operand" "f")]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp_swap = gen_reg_rtx (V2SFmode);
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1]));
+  emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[1], tmp_swap, CONST0_RTX (SFmode)));
+
+  DONE;
+})
+
+(define_expand "reduc_smin_v2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+   (match_operand:V2SF 1 "gpc_reg_operand" "f")]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp_swap = gen_reg_rtx (V2SFmode);
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1]));
+  emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, tmp_swap, operands[1], CONST0_RTX (SFmode)));
+
+  DONE;
+})
+
+(define_expand "reduc_splus_v2sf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (match_operand:V2SF 1 "gpc_reg_operand" "f"))]
+  "TARGET_PAIRED_FLOAT"
+  "
+{
+  emit_insn (gen_paired_sum1 (operands[0], operands[1], operands[1], operands[1]));
+  DONE;
+}")
+
+(define_expand "movmisalignv2sf"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+        (match_operand:V2SF 1 "any_operand" ""))]
+  "TARGET_PAIRED_FLOAT"
+{
+  paired_expand_vector_move (operands);
+  DONE;
+})
+
+(define_expand "vcondv2sfv2sf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (if_then_else:V2SF
+         (match_operator 3 "gpc_reg_operand"
+                         [(match_operand:V2SF 4 "gpc_reg_operand" "f")
+                          (match_operand:V2SF 5 "gpc_reg_operand" "f")])
+         (match_operand:V2SF 1 "gpc_reg_operand" "f")
+         (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT && flag_unsafe_math_optimizations"
+{
+  if (paired_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+                                    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+})
diff --git a/gcc-4.9/gcc/config/rs6000/power4.md b/gcc-4.9/gcc/config/rs6000/power4.md
new file mode 100644
index 000000000..196f40c26
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/power4.md
@@ -0,0 +1,408 @@
+;; Scheduling description for IBM Power4 and PowerPC 970 processors.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: IBM Red Book and White Paper on POWER4
+
+;; The POWER4 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip).
+;; Instructions that update more than one register get broken into two
+;; (split) or more internal ops.  The chip can issue up to 5
+;; internal ops per cycle.
+
+(define_automaton "power4iu,power4fpu,power4vec,power4misc")
+
+(define_cpu_unit "iu1_power4,iu2_power4" "power4iu")
+(define_cpu_unit "lsu1_power4,lsu2_power4" "power4misc")
+(define_cpu_unit "fpu1_power4,fpu2_power4" "power4fpu")
+(define_cpu_unit "bpu_power4,cru_power4" "power4misc")
+(define_cpu_unit "vec_power4,vecperm_power4" "power4vec")
+(define_cpu_unit "du1_power4,du2_power4,du3_power4,du4_power4,du5_power4"
+		 "power4misc")
+
+(define_reservation "lsq_power4"
+		    "(du1_power4,lsu1_power4)\
+		    |(du2_power4,lsu2_power4)\
+		    |(du3_power4,lsu2_power4)\
+		    |(du4_power4,lsu1_power4)")
+
+(define_reservation "lsuq_power4"
+		    "((du1_power4+du2_power4,lsu1_power4)\
+		      |(du2_power4+du3_power4,lsu2_power4)\
+		      |(du3_power4+du4_power4,lsu2_power4))\
+                     +(nothing,iu2_power4|nothing,iu1_power4)")
+
+(define_reservation "iq_power4"
+		    "(du1_power4|du2_power4|du3_power4|du4_power4),\
+                     (iu1_power4|iu2_power4)")
+
+(define_reservation "fpq_power4"
+		    "(du1_power4|du2_power4|du3_power4|du4_power4),\
+                     (fpu1_power4|fpu2_power4)")
+
+(define_reservation "vq_power4"
+		    "(du1_power4,vec_power4)\
+		    |(du2_power4,vec_power4)\
+		    |(du3_power4,vec_power4)\
+		    |(du4_power4,vec_power4)")
+
+(define_reservation "vpq_power4"
+		    "(du1_power4,vecperm_power4)\
+		    |(du2_power4,vecperm_power4)\
+		    |(du3_power4,vecperm_power4)\
+		    |(du4_power4,vecperm_power4)")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power4" "du2_power4,du3_power4,du4_power4,du5_power4")
+(absence_set "du2_power4" "du3_power4,du4_power4,du5_power4")
+(absence_set "du3_power4" "du4_power4,du5_power4")
+(absence_set "du4_power4" "du5_power4")
+
+
+; Load/store
+(define_insn_reservation "power4-load" 4 ; 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power4"))
+  "lsq_power4")
+
+(define_insn_reservation "power4-load-ext" 5
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4,lsu1_power4\
+    |du2_power4+du3_power4,lsu2_power4\
+    |du3_power4+du4_power4,lsu2_power4),\
+   nothing,nothing,\
+   (iu2_power4|iu1_power4)")
+
+(define_insn_reservation "power4-load-ext-update" 5
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   lsu1_power4+iu2_power4,nothing,nothing,iu2_power4")
+
+(define_insn_reservation "power4-load-ext-update-indexed" 5
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   iu1_power4,lsu2_power4+iu1_power4,nothing,nothing,iu2_power4")
+
+(define_insn_reservation "power4-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   iu1_power4,lsu2_power4+iu2_power4")
+
+(define_insn_reservation "power4-load-update" 4 ; 3
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power4"))
+  "lsuq_power4")
+
+(define_insn_reservation "power4-fpload" 6 ; 5
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power4"))
+  "lsq_power4")
+
+(define_insn_reservation "power4-fpload-update" 6 ; 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power4"))
+  "lsuq_power4")
+
+(define_insn_reservation "power4-vecload" 6 ; 5
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power4"))
+  "lsq_power4")
+
+(define_insn_reservation "power4-store" 12
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4,lsu1_power4)\
+    |(du2_power4,lsu2_power4)\
+    |(du3_power4,lsu2_power4)\
+    |(du4_power4,lsu1_power4)),\
+   (iu1_power4|iu2_power4)")
+
+(define_insn_reservation "power4-store-update" 12
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4+du2_power4,lsu1_power4)\
+    |(du2_power4+du3_power4,lsu2_power4)\
+    |(du3_power4+du4_power4,lsu2_power4))+\
+   ((nothing,iu1_power4,iu2_power4)\
+    |(nothing,iu2_power4,iu2_power4)\
+    |(nothing,iu2_power4,iu1_power4))")
+
+(define_insn_reservation "power4-store-update-indexed" 12
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power4"))
+   "du1_power4+du2_power4+du3_power4+du4_power4,\
+    iu1_power4,lsu2_power4+iu2_power4,iu2_power4")
+
+(define_insn_reservation "power4-fpstore" 12
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4,lsu1_power4)\
+    |(du2_power4,lsu2_power4)\
+    |(du3_power4,lsu2_power4)\
+    |(du4_power4,lsu1_power4)),\
+   (fpu1_power4|fpu2_power4)")
+
+(define_insn_reservation "power4-fpstore-update" 12
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4+du2_power4,lsu1_power4)\
+    |(du2_power4+du3_power4,lsu2_power4)\
+    |(du3_power4+du4_power4,lsu2_power4))\
+   +(nothing,(iu1_power4|iu2_power4),(fpu1_power4|fpu2_power4))")
+
+(define_insn_reservation "power4-vecstore" 12
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4,lsu1_power4,vec_power4)\
+  |(du2_power4,lsu2_power4,vec_power4)\
+  |(du3_power4,lsu2_power4,vec_power4)\
+  |(du4_power4,lsu1_power4,vec_power4)")
+
+(define_insn_reservation "power4-llsc" 11
+  (and (eq_attr "type" "load_l,store_c,sync")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,lsu1_power4")
+
+
+; Integer latency is 2 cycles
+(define_insn_reservation "power4-integer" 2
+  (and (eq_attr "type" "integer,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "power4"))
+  "iq_power4")
+
+(define_insn_reservation "power4-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4+du2_power4)\
+    |(du2_power4+du3_power4)\
+    |(du3_power4+du4_power4)\
+    |(du4_power4+du1_power4)),\
+    ((iu1_power4,nothing,iu2_power4)\
+     |(iu2_power4,nothing,iu2_power4)\
+     |(iu2_power4,nothing,iu1_power4)\
+     |(iu1_power4,nothing,iu1_power4))")
+
+(define_insn_reservation "power4-three" 2
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4+du3_power4|du2_power4+du3_power4+du4_power4\
+    |du3_power4+du4_power4+du1_power4|du4_power4+du1_power4+du2_power4),\
+   ((iu1_power4,nothing,iu2_power4,nothing,iu2_power4)\
+    |(iu2_power4,nothing,iu2_power4,nothing,iu1_power4)\
+    |(iu2_power4,nothing,iu1_power4,nothing,iu1_power4)\
+    |(iu1_power4,nothing,iu1_power4,nothing,iu2_power4))")
+
+(define_insn_reservation "power4-insert" 4
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4,nothing,iu2_power4)\
+    |(iu2_power4,nothing,iu2_power4)\
+    |(iu2_power4,nothing,iu1_power4))")
+
+(define_insn_reservation "power4-cmp" 3
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power4"))
+  "iq_power4")
+
+(define_insn_reservation "power4-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4,iu2_power4)\
+    |(iu2_power4,iu2_power4)\
+    |(iu2_power4,iu1_power4))")
+
+(define_bypass 4 "power4-compare" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_insn_reservation "power4-lmul-cmp" 7
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4*6,iu2_power4)\
+    |(iu2_power4*6,iu2_power4)\
+    |(iu2_power4*6,iu1_power4))")
+
+(define_bypass 10 "power4-lmul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_insn_reservation "power4-imul-cmp" 5
+  (and (eq_attr "type" "imul_compare")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4*4,iu2_power4)\
+    |(iu2_power4*4,iu2_power4)\
+    |(iu2_power4*4,iu1_power4))")
+
+(define_bypass 8 "power4-imul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_insn_reservation "power4-lmul" 7
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (iu1_power4*6|iu2_power4*6)")
+
+(define_insn_reservation "power4-imul" 5
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (iu1_power4*4|iu2_power4*4)")
+
+(define_insn_reservation "power4-imul3" 4
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (iu1_power4*3|iu2_power4*3)")
+
+
+; SPR move only executes in first IU.
+; Integer division only executes in second IU.
+(define_insn_reservation "power4-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4,iu2_power4*35")
+
+(define_insn_reservation "power4-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4,iu2_power4*67")
+
+
+(define_insn_reservation "power4-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,bpu_power4")
+
+
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power4-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power4"))
+  "(du5_power4\
+   |du4_power4+du5_power4\
+   |du3_power4+du4_power4+du5_power4\
+   |du2_power4+du3_power4+du4_power4+du5_power4\
+   |du1_power4+du2_power4+du3_power4+du4_power4+du5_power4),bpu_power4")
+
+
+; Condition Register logical ops are split if non-destructive (RT != RB)
+(define_insn_reservation "power4-crlogical" 2
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,cru_power4")
+
+(define_insn_reservation "power4-delayedcr" 4
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4,cru_power4,cru_power4")
+
+; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu
+(define_insn_reservation "power4-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   du1_power4+du2_power4+du3_power4+du4_power4+cru_power4,\
+   cru_power4,cru_power4,cru_power4")
+
+; mfcrf (1 field)
+(define_insn_reservation "power4-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,cru_power4")
+
+; mtcrf (1 field)
+(define_insn_reservation "power4-mtcr" 4
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,iu1_power4")
+
+; Basic FP latency is 6 cycles
+(define_insn_reservation "power4-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power4"))
+  "fpq_power4")
+
+(define_insn_reservation "power4-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power4"))
+  "fpq_power4")
+
+(define_insn_reservation "power4-sdiv" 33
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (fpu1_power4*28|fpu2_power4*28)")
+
+(define_insn_reservation "power4-sqrt" 40
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (fpu1_power4*35|fpu2_power4*35)")
+
+(define_insn_reservation "power4-isync" 2
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,lsu1_power4")
+
+
+; VMX
+(define_insn_reservation "power4-vecsimple" 2
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+(define_insn_reservation "power4-veccomplex" 5
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+; vecfp compare
+(define_insn_reservation "power4-veccmp" 8
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+(define_insn_reservation "power4-vecfloat" 8
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+(define_insn_reservation "power4-vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power4"))
+  "vpq_power4")
+
+(define_bypass 4 "power4-vecload" "power4-vecperm")
+
+(define_bypass 3 "power4-vecsimple" "power4-vecperm")
+(define_bypass 6 "power4-veccomplex" "power4-vecperm")
+(define_bypass 3 "power4-vecperm"
+		 "power4-vecsimple,power4-veccomplex,power4-vecfloat")
+(define_bypass 9 "power4-vecfloat" "power4-vecperm")
+
+(define_bypass 5 "power4-vecsimple,power4-veccomplex"
+		 "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_bypass 4 "power4-vecsimple,power4-vecperm" "power4-vecstore")
+(define_bypass 7 "power4-veccomplex" "power4-vecstore")
+(define_bypass 10 "power4-vecfloat" "power4-vecstore")
diff --git a/gcc-4.9/gcc/config/rs6000/power5.md b/gcc-4.9/gcc/config/rs6000/power5.md
new file mode 100644
index 000000000..fcd7e8e84
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/power5.md
@@ -0,0 +1,308 @@
+;; Scheduling description for IBM POWER5 processor.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: IBM Red Book and White Paper on POWER5
+
+;; The POWER5 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip).
+;; Instructions that update more than one register get broken into two
+;; (split) or more internal ops.  The chip can issue up to 5
+;; internal ops per cycle.
+
+(define_automaton "power5iu,power5fpu,power5misc")
+
+(define_cpu_unit "iu1_power5,iu2_power5" "power5iu")
+(define_cpu_unit "lsu1_power5,lsu2_power5" "power5misc")
+(define_cpu_unit "fpu1_power5,fpu2_power5" "power5fpu")
+(define_cpu_unit "bpu_power5,cru_power5" "power5misc")
+(define_cpu_unit "du1_power5,du2_power5,du3_power5,du4_power5,du5_power5"
+		 "power5misc")
+
+(define_reservation "lsq_power5"
+		    "(du1_power5,lsu1_power5)\
+		    |(du2_power5,lsu2_power5)\
+		    |(du3_power5,lsu2_power5)\
+		    |(du4_power5,lsu1_power5)")
+
+(define_reservation "iq_power5"
+		    "(du1_power5|du2_power5|du3_power5|du4_power5),\
+                     (iu1_power5|iu2_power5)")
+
+(define_reservation "fpq_power5"
+		    "(du1_power5|du2_power5|du3_power5|du4_power5),\
+                     (fpu1_power5|fpu2_power5)")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power5" "du2_power5,du3_power5,du4_power5,du5_power5")
+(absence_set "du2_power5" "du3_power5,du4_power5,du5_power5")
+(absence_set "du3_power5" "du4_power5,du5_power5")
+(absence_set "du4_power5" "du5_power5")
+
+
+; Load/store
+(define_insn_reservation "power5-load" 4 ; 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power5"))
+  "lsq_power5")
+
+(define_insn_reservation "power5-load-ext" 5
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5,nothing,nothing,iu2_power5")
+
+(define_insn_reservation "power5-load-ext-update" 5
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   lsu1_power5+iu2_power5,nothing,nothing,iu2_power5")
+
+(define_insn_reservation "power5-load-ext-update-indexed" 5
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   iu1_power5,lsu2_power5+iu1_power5,nothing,nothing,iu2_power5")
+
+(define_insn_reservation "power5-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   iu1_power5,lsu2_power5+iu2_power5")
+
+(define_insn_reservation "power5-load-update" 4 ; 3
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5")
+
+(define_insn_reservation "power5-fpload" 6 ; 5
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power5"))
+  "lsq_power5")
+
+(define_insn_reservation "power5-fpload-update" 6 ; 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5")
+
+(define_insn_reservation "power5-store" 12
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power5"))
+  "((du1_power5,lsu1_power5)\
+    |(du2_power5,lsu2_power5)\
+    |(du3_power5,lsu2_power5)\
+    |(du4_power5,lsu1_power5)),\
+    (iu1_power5|iu2_power5)")
+
+(define_insn_reservation "power5-store-update" 12
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5,iu1_power5")
+
+(define_insn_reservation "power5-store-update-indexed" 12
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power5"))
+   "du1_power5+du2_power5+du3_power5+du4_power5,\
+    iu1_power5,lsu2_power5+iu2_power5,iu2_power5")
+
+(define_insn_reservation "power5-fpstore" 12
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power5"))
+  "((du1_power5,lsu1_power5)\
+    |(du2_power5,lsu2_power5)\
+    |(du3_power5,lsu2_power5)\
+    |(du4_power5,lsu1_power5)),\
+    (fpu1_power5|fpu2_power5)")
+
+(define_insn_reservation "power5-fpstore-update" 12
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5,fpu1_power5")
+
+(define_insn_reservation "power5-llsc" 11
+  (and (eq_attr "type" "load_l,store_c,sync")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+  lsu1_power5")
+
+
+; Integer latency is 2 cycles
+(define_insn_reservation "power5-integer" 2
+  (and (eq_attr "type" "integer,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel,popcnt")
+       (eq_attr "cpu" "power5"))
+  "iq_power5")
+
+(define_insn_reservation "power5-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power5"))
+  "((du1_power5+du2_power5)\
+    |(du2_power5+du3_power5)\
+    |(du3_power5+du4_power5)\
+    |(du4_power5+du1_power5)),\
+    ((iu1_power5,nothing,iu2_power5)\
+     |(iu2_power5,nothing,iu2_power5)\
+     |(iu2_power5,nothing,iu1_power5)\
+     |(iu1_power5,nothing,iu1_power5))")
+
+(define_insn_reservation "power5-three" 2
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5+du2_power5+du3_power5|du2_power5+du3_power5+du4_power5\
+    |du3_power5+du4_power5+du1_power5|du4_power5+du1_power5+du2_power5),\
+   ((iu1_power5,nothing,iu2_power5,nothing,iu2_power5)\
+    |(iu2_power5,nothing,iu2_power5,nothing,iu1_power5)\
+    |(iu2_power5,nothing,iu1_power5,nothing,iu1_power5)\
+    |(iu1_power5,nothing,iu2_power5,nothing,iu2_power5))")
+
+(define_insn_reservation "power5-insert" 4
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5,nothing,iu2_power5")
+
+(define_insn_reservation "power5-cmp" 3
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power5"))
+  "iq_power5")
+
+(define_insn_reservation "power5-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5,iu2_power5")
+
+(define_bypass 4 "power5-compare" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf")
+
+(define_insn_reservation "power5-lmul-cmp" 7
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5*6,iu2_power5")
+
+(define_bypass 10 "power5-lmul-cmp" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf")
+
+(define_insn_reservation "power5-imul-cmp" 5
+  (and (eq_attr "type" "imul_compare")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5*4,iu2_power5")
+
+(define_bypass 8 "power5-imul-cmp" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf")
+
+(define_insn_reservation "power5-lmul" 7
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*6|iu2_power5*6)")
+
+(define_insn_reservation "power5-imul" 5
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*4|iu2_power5*4)")
+
+(define_insn_reservation "power5-imul3" 4
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*3|iu2_power5*3)")
+
+
+; SPR move only executes in first IU.
+; Integer division only executes in second IU.
+(define_insn_reservation "power5-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu2_power5*35")
+
+(define_insn_reservation "power5-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu2_power5*67")
+
+
+(define_insn_reservation "power5-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,bpu_power5")
+
+
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power5-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power5"))
+  "(du5_power5\
+   |du4_power5+du5_power5\
+   |du3_power5+du4_power5+du5_power5\
+   |du2_power5+du3_power5+du4_power5+du5_power5\
+   |du1_power5+du2_power5+du3_power5+du4_power5+du5_power5),bpu_power5")
+
+
+; Condition Register logical ops are split if non-destructive (RT != RB)
+(define_insn_reservation "power5-crlogical" 2
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,cru_power5")
+
+(define_insn_reservation "power5-delayedcr" 4
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,cru_power5,cru_power5")
+
+; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu
+(define_insn_reservation "power5-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   du1_power5+du2_power5+du3_power5+du4_power5+cru_power5,\
+   cru_power5,cru_power5,cru_power5")
+
+; mfcrf (1 field)
+(define_insn_reservation "power5-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,cru_power5")
+
+; mtcrf (1 field)
+(define_insn_reservation "power5-mtcr" 4
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,iu1_power5")
+
+; Basic FP latency is 6 cycles
+(define_insn_reservation "power5-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power5"))
+  "fpq_power5")
+
+(define_insn_reservation "power5-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power5"))
+  "fpq_power5")
+
+(define_insn_reservation "power5-sdiv" 33
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),\
+   (fpu1_power5*28|fpu2_power5*28)")
+
+(define_insn_reservation "power5-sqrt" 40
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),\
+   (fpu1_power5*35|fpu2_power5*35)")
+
+(define_insn_reservation "power5-isync" 2 
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+  lsu1_power5")
+
diff --git a/gcc-4.9/gcc/config/rs6000/power6.md b/gcc-4.9/gcc/config/rs6000/power6.md
new file mode 100644
index 000000000..4ed721eef
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/power6.md
@@ -0,0 +1,578 @@
+;; Scheduling description for IBM POWER6 processor.
+;;   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;   Contributed by Peter Steinmetz (steinmtz@us.ibm.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources:
+
+;; The POWER6 has 2 iu, 2 fpu, 2 lsu, and 1 bu/cru unit per engine 
+;; (2 engines per chip).  The chip can issue up to 5 internal ops 
+;; per cycle.
+
+(define_automaton "power6iu,power6lsu,power6fpu,power6bu")
+
+(define_cpu_unit "iu1_power6,iu2_power6" "power6iu")
+(define_cpu_unit "lsu1_power6,lsu2_power6" "power6lsu")
+(define_cpu_unit "bpu_power6" "power6bu")
+(define_cpu_unit "fpu1_power6,fpu2_power6" "power6fpu")
+
+(define_reservation "LS2_power6"
+                    "lsu1_power6+lsu2_power6")
+
+(define_reservation "FPU_power6"
+                    "fpu1_power6|fpu2_power6")
+
+(define_reservation "BRU_power6"
+                    "bpu_power6")
+
+(define_reservation "LSU_power6"
+                    "lsu1_power6|lsu2_power6")
+
+(define_reservation "LSF_power6"
+                    "(lsu1_power6+fpu1_power6)\
+                    |(lsu1_power6+fpu2_power6)\
+                    |(lsu2_power6+fpu1_power6)\
+                    |(lsu2_power6+fpu2_power6)")
+
+(define_reservation "LX2_power6"
+                    "(iu1_power6+iu2_power6+lsu1_power6)\
+                    |(iu1_power6+iu2_power6+lsu2_power6)")
+
+(define_reservation "FX2_power6"
+                    "iu1_power6+iu2_power6")
+
+(define_reservation "X2F_power6"
+                    "(iu1_power6+iu2_power6+fpu1_power6)\
+                    |(iu1_power6+iu2_power6+fpu2_power6)")
+
+(define_reservation "BX2_power6"
+                    "iu1_power6+iu2_power6+bpu_power6")
+
+(define_reservation "LSX_power6"
+                    "(iu1_power6+lsu1_power6)\
+                    |(iu1_power6+lsu2_power6)\
+                    |(iu2_power6+lsu1_power6)\
+                    |(iu2_power6+lsu2_power6)")
+
+(define_reservation "FXU_power6"
+                    "iu1_power6|iu2_power6")
+
+(define_reservation "XLF_power6"
+                    "(iu1_power6+lsu1_power6+fpu1_power6)\
+                    |(iu1_power6+lsu1_power6+fpu2_power6)\
+                    |(iu1_power6+lsu2_power6+fpu1_power6)\
+                    |(iu1_power6+lsu2_power6+fpu2_power6)\
+                    |(iu2_power6+lsu1_power6+fpu1_power6)\
+                    |(iu2_power6+lsu1_power6+fpu2_power6)\
+                    |(iu2_power6+lsu2_power6+fpu1_power6)\
+                    |(iu2_power6+lsu2_power6+fpu2_power6)")
+
+(define_reservation "BRX_power6"
+                    "(bpu_power6+iu1_power6)\
+                    |(bpu_power6+iu2_power6)")
+
+; Load/store
+
+; The default for a value written by a fixed point load
+; that is read/written by a subsequent fixed point op.
+(define_insn_reservation "power6-load" 2 ; fx
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point load is used as the source value on
+; a store.
+(define_bypass 1 "power6-load,\
+                  power6-load-update,\
+                  power6-load-update-indexed"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-load-ext" 4 ; fx
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point load ext is used as the source value on
+; a store.
+(define_bypass 1 "power6-load-ext,\
+                  power6-load-ext-update,\
+	          power6-load-ext-update-indexed"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-load-update" 2 ; fx
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-load-update-indexed" 2 ; fx
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-load-ext-update" 4 ; fx
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-load-ext-update-indexed" 4 ; fx
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-fpload" 1
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-fpload-update" 1
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-store" 14
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-store-update" 14
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-store-update-indexed" 14
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power6"))
+  "LX2_power6")
+
+(define_insn_reservation "power6-fpstore" 14
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power6"))
+  "LSF_power6")
+
+(define_insn_reservation "power6-fpstore-update" 14
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power6"))
+  "XLF_power6")
+
+(define_insn_reservation "power6-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power6"))
+  "LS2_power6")
+
+(define_insn_reservation "power6-stcx" 10 ; best case
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-sync" 11 ; N/A
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-integer" 1
+  (and (eq_attr "type" "integer")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-isel" 1
+  (and (eq_attr "type" "isel")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-exts" 1
+  (and (eq_attr "type" "exts")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-shift" 1
+  (and (eq_attr "type" "shift")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-popcnt" 1
+  (and (eq_attr "type" "popcnt")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-insert" 1
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "power6"))
+  "FX2_power6")
+
+(define_insn_reservation "power6-insert-dword" 1
+  (and (eq_attr "type" "insert_dword")
+       (eq_attr "cpu" "power6"))
+  "FX2_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point op is used as the source value on a
+; store.
+(define_bypass 1 "power6-integer,\
+                  power6-exts,\
+                  power6-shift,\
+                  power6-insert,\
+                  power6-insert-dword"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-cntlz" 2
+  (and (eq_attr "type" "cntlz")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_bypass 1 "power6-cntlz"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-var-rotate" 4
+  (and (eq_attr "type" "var_shift_rotate")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-trap" 1 ; N/A
+  (and (eq_attr "type" "trap")
+       (eq_attr "cpu" "power6"))
+  "BRX_power6")
+
+(define_insn_reservation "power6-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6,iu1_power6)\
+  |(iu1_power6+iu2_power6,nothing)\
+  |(iu1_power6,iu2_power6)\
+  |(iu2_power6,iu1_power6)\
+  |(iu2_power6,iu2_power6)")
+
+(define_insn_reservation "power6-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6,iu1_power6,iu1_power6)\
+  |(iu1_power6,iu1_power6,iu2_power6)\
+  |(iu1_power6,iu2_power6,iu1_power6)\
+  |(iu1_power6,iu2_power6,iu2_power6)\
+  |(iu2_power6,iu1_power6,iu1_power6)\
+  |(iu2_power6,iu1_power6,iu2_power6)\
+  |(iu2_power6,iu2_power6,iu1_power6)\
+  |(iu2_power6,iu2_power6,iu2_power6)\
+  |(iu1_power6+iu2_power6,iu1_power6)\
+  |(iu1_power6+iu2_power6,iu2_power6)\
+  |(iu1_power6,iu1_power6+iu2_power6)\
+  |(iu2_power6,iu1_power6+iu2_power6)")
+
+(define_insn_reservation "power6-cmp" 1
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-compare" 1
+  (and (eq_attr "type" "compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-fast-compare" 1
+  (and (eq_attr "type" "fast_compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point rec form op is used as the source value
+; on a store.
+(define_bypass 1 "power6-compare,\
+                  power6-fast-compare"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-delayed-compare" 2 ; N/A
+  (and (eq_attr "type" "delayed_compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-var-delayed-compare" 4
+  (and (eq_attr "type" "var_delayed_compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-lmul-cmp" 16
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-imul-cmp" 16
+  (and (eq_attr "type" "imul_compare")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-lmul" 16
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-imul" 16
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-imul3" 16
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_bypass 9 "power6-imul,\
+                  power6-lmul,\
+                  power6-imul-cmp,\
+                  power6-lmul-cmp,\
+                  power6-imul3"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-idiv" 44
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*44+iu2_power6*44+fpu1_power6*44)\
+  |(iu1_power6*44+iu2_power6*44+fpu2_power6*44)");
+
+; The latency for this bypass is yet to be defined
+;(define_bypass ? "power6-idiv"
+;                 "power6-store,\
+;                  power6-store-update,\
+;                  power6-store-update-indexed,\
+;                  power6-fpstore,\
+;                  power6-fpstore-update"
+;  "store_data_bypass_p")
+
+(define_insn_reservation "power6-ldiv" 56
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*56+iu2_power6*56+fpu1_power6*56)\
+  |(iu1_power6*56+iu2_power6*56+fpu2_power6*56)");
+
+; The latency for this bypass is yet to be defined
+;(define_bypass ? "power6-ldiv"
+;                 "power6-store,\
+;                  power6-store-update,\
+;                  power6-store-update-indexed,\
+;                  power6-fpstore,\
+;                  power6-fpstore-update"
+;  "store_data_bypass_p")
+
+(define_insn_reservation "power6-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6")
+
+(define_bypass 5 "power6-mtjmpr" "power6-branch")
+
+(define_insn_reservation "power6-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power6"))
+  "BRU_power6")
+
+(define_bypass 5 "power6-branch" "power6-mtjmpr")
+
+(define_insn_reservation "power6-crlogical" 3
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power6"))
+  "BRU_power6")
+
+(define_bypass 3 "power6-crlogical" "power6-branch")
+
+(define_insn_reservation "power6-delayedcr" 3
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power6"))
+  "BRU_power6")
+
+(define_insn_reservation "power6-mfcr" 6 ; N/A
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6")
+
+; mfcrf (1 field)
+(define_insn_reservation "power6-mfcrf" 3 ; N/A
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6") ;
+
+; mtcrf (1 field)
+(define_insn_reservation "power6-mtcr" 4 ; N/A
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6")
+
+(define_bypass 9 "power6-mtcr" "power6-branch")
+
+(define_insn_reservation "power6-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+; Any fp instruction that updates a CR has a latency
+; of 6 to a dependent branch
+(define_bypass 6 "power6-fp" "power6-branch")
+
+(define_bypass 1 "power6-fp"
+                 "power6-fpstore,power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 12 "power6-fpcompare"
+                  "power6-branch,power6-crlogical")
+
+(define_insn_reservation "power6-sdiv" 26
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-ddiv" 32
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-sqrt" 30
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-dsqrt" 42
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-isync" 2 ; N/A 
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-vecload" 1
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-vecstore" 1
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power6"))
+  "LSF_power6")
+
+(define_insn_reservation "power6-vecsimple" 3
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 6 "power6-vecsimple" "power6-veccomplex,\
+                                     power6-vecperm")
+
+(define_bypass 5 "power6-vecsimple" "power6-vecfloat")
+
+(define_bypass 4 "power6-vecsimple" "power6-vecstore" )
+
+(define_insn_reservation "power6-veccmp" 1
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 10 "power6-veccmp" "power6-branch")
+
+(define_insn_reservation "power6-vecfloat" 7
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 10 "power6-vecfloat" "power6-vecsimple")
+
+(define_bypass 11 "power6-vecfloat" "power6-veccomplex,\
+                                     power6-vecperm")
+
+(define_bypass 9 "power6-vecfloat" "power6-vecstore" )
+
+(define_insn_reservation "power6-veccomplex" 7
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 10 "power6-veccomplex" "power6-vecsimple,\
+                                       power6-vecfloat" )
+
+(define_bypass 9 "power6-veccomplex" "power6-vecperm" )
+
+(define_bypass 8 "power6-veccomplex" "power6-vecstore" )
+
+(define_insn_reservation "power6-vecperm" 4
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 7 "power6-vecperm" "power6-vecsimple,\
+                                   power6-vecfloat" )
+
+(define_bypass 6 "power6-vecperm" "power6-veccomplex" )
+
+(define_bypass 5 "power6-vecperm" "power6-vecstore" )
+
+(define_insn_reservation "power6-mftgpr" 8
+  (and (eq_attr "type" "mftgpr")
+       (eq_attr "cpu" "power6"))
+  "X2F_power6")
+
+(define_insn_reservation "power6-mffgpr" 14
+  (and (eq_attr "type" "mffgpr")
+       (eq_attr "cpu" "power6"))
+  "LX2_power6")
+
+(define_bypass 4 "power6-mftgpr" "power6-imul,\
+                                  power6-lmul,\
+                                  power6-imul-cmp,\
+                                  power6-lmul-cmp,\
+                                  power6-imul3,\
+                                  power6-idiv,\
+                                  power6-ldiv" )
diff --git a/gcc-4.9/gcc/config/rs6000/power7.md b/gcc-4.9/gcc/config/rs6000/power7.md
new file mode 100644
index 000000000..3578c97ea
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/power7.md
@@ -0,0 +1,333 @@
+;; Scheduling description for IBM POWER7 processor.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power7iu,power7lsu,power7vsu,power7misc")
+
+(define_cpu_unit "iu1_power7,iu2_power7" "power7iu")
+(define_cpu_unit "lsu1_power7,lsu2_power7" "power7lsu")
+(define_cpu_unit "vsu1_power7,vsu2_power7" "power7vsu")
+(define_cpu_unit "bpu_power7,cru_power7" "power7misc")
+(define_cpu_unit "du1_power7,du2_power7,du3_power7,du4_power7,du5_power7"
+                 "power7misc")
+
+
+(define_reservation "DU_power7"
+		    "du1_power7|du2_power7|du3_power7|du4_power7")
+
+(define_reservation "DU2F_power7"
+		    "du1_power7+du2_power7")
+
+(define_reservation "DU4_power7"
+		    "du1_power7+du2_power7+du3_power7+du4_power7")
+
+(define_reservation "FXU_power7"
+                    "iu1_power7|iu2_power7")
+
+(define_reservation "VSU_power7"
+                    "vsu1_power7|vsu2_power7")
+
+(define_reservation "LSU_power7"
+                    "lsu1_power7|lsu2_power7")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power7" "du2_power7,du3_power7,du4_power7,du5_power7")
+(absence_set "du2_power7" "du3_power7,du4_power7,du5_power7")
+(absence_set "du3_power7" "du4_power7,du5_power7")
+(absence_set "du4_power7" "du5_power7")
+
+
+; LS Unit
+(define_insn_reservation "power7-load" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-update" 2
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update" 4
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update-indexed" 4
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpload" 3
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-fpload-update" 3
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store" 6 ; store-forwarding latency
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store-update" 6
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-store-update-indexed" 6
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpstore" 6
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-fpstore-update" 6
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecload" 3
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecstore" 6
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+vsu2_power7")
+
+(define_insn_reservation "power7-sync" 11
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+
+; FX Unit
+(define_insn_reservation "power7-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts,isel,popcnt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cntlz" 2
+  (and (eq_attr "type" "cntlz")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power7"))
+  "DU_power7+DU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power7"))
+  "DU_power7+DU_power7+DU_power7,FXU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cmp" 1
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,FXU_power7,FXU_power7")
+
+(define_bypass 3 "power7-cmp,power7-compare" "power7-crlogical,power7-delayedcr")
+
+(define_insn_reservation "power7-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-mul-compare" 5
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,FXU_power7,nothing*3,FXU_power7")
+
+(define_insn_reservation "power7-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,iu1_power7*36|iu2_power7*36")
+
+(define_insn_reservation "power7-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,iu1_power7*68|iu2_power7*68")
+
+(define_insn_reservation "power7-isync" 1 ;
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7")
+
+
+; CR Unit
+(define_insn_reservation "power7-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,FXU_power7")
+
+(define_insn_reservation "power7-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7+FXU_power7")
+
+(define_insn_reservation "power7-crlogical" 3
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-delayedcr" 3
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,cru_power7+FXU_power7")
+
+
+; BR Unit
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power7-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power7"))
+  "(du5_power7\
+   |du4_power7+du5_power7\
+   |du3_power7+du4_power7+du5_power7\
+   |du2_power7+du3_power7+du4_power7+du5_power7\
+   |du1_power7+du2_power7+du3_power7+du4_power7+du5_power7),bpu_power7")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP)
+(define_insn_reservation "power7-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_bypass 8 "power7-fp" "power7-branch")
+
+(define_insn_reservation "power7-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sdiv" 27
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sqrt" 32
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-dsqrt" 44
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecsimple" 2
+  (and (eq_attr "type" "vecsimple,veccmp")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,vsu1_power7")
+
+(define_insn_reservation "power7-vecfloat" 6
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,vsu1_power7")
+
+(define_bypass 7 "power7-vecfloat" "power7-vecsimple,power7-veccomplex,\
+				    power7-vecperm")
+
+(define_insn_reservation "power7-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,vsu1_power7")
+
+(define_insn_reservation "power7-vecperm" 3
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,vsu2_power7")
+
+(define_insn_reservation "power7-vecdouble" 6
+  (and (eq_attr "type" "vecdouble")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_bypass 7 "power7-vecdouble" "power7-vecsimple,power7-veccomplex,\
+				    power7-vecperm")
+
+(define_insn_reservation "power7-vecfdiv" 26
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecdiv" 32
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
diff --git a/gcc-4.9/gcc/config/rs6000/power8.md b/gcc-4.9/gcc/config/rs6000/power8.md
new file mode 100644
index 000000000..b6bb853e1
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/power8.md
@@ -0,0 +1,373 @@
+;; Scheduling description for IBM POWER8 processor.
+;; Copyright (C) 2013-2014 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
+
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
+		  du5_power8,du6_power8"  "power8misc")
+
+
+; Dispatch group reservations
+(define_reservation "DU_any_power8"
+		    "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
+		     du5_power8")
+
+; 2-way Cracked instructions go in slots 0-1
+;   (can also have a second in slots 3-4 if insns are adjacent)
+(define_reservation "DU_cracked_power8"
+		    "du0_power8+du1_power8")
+
+; Insns that are first in group
+(define_reservation "DU_first_power8"
+		    "du0_power8")
+
+; Insns that are first and last in group
+(define_reservation "DU_both_power8"
+		    "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
+		     du5_power8+du6_power8")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
+	      du5_power8,du6_power8")
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
+	      du6_power8")
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
+(absence_set "du4_power8" "du5_power8,du6_power8")
+(absence_set "du5_power8" "du6_power8")
+
+
+; Execution unit reservations
+(define_reservation "FXU_power8"
+                    "fxu0_power8|fxu1_power8")
+
+(define_reservation "LU_power8"
+                    "lu0_power8|lu1_power8")
+
+(define_reservation "LSU_power8"
+                    "lsu0_power8|lsu1_power8")
+
+(define_reservation "LU_or_LSU_power8"
+                    "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
+
+(define_reservation "VSU_power8"
+                    "vsu0_power8|vsu1_power8")
+
+
+; LS Unit
+(define_insn_reservation "power8-load" 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-load-update" 3
+  (and (eq_attr "type" "load_u,load_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
+
+(define_insn_reservation "power8-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
+
+(define_insn_reservation "power8-load-ext-update" 3
+  (and (eq_attr "type" "load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-fpload" 5
+  (and (eq_attr "type" "fpload,vecload")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_power8")
+
+(define_insn_reservation "power8-fpload-update" 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_power8+FXU_power8")
+
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
+  (and (eq_attr "type" "store,store_u")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-store-update-indexed" 5
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-fpstore" 5
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-fpstore-update" 5
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-vecstore" 5
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-sync" 1
+  (and (eq_attr "type" "sync,isync")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8")
+
+
+; FX Unit
+(define_insn_reservation "power8-1cyc" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts,isel")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 2 "power8-1cyc"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+;		 "power8-load,power8-load-update,power8-load-ext,\
+;		  power8-load-ext-update,power8-fpload,power8-fpload-update,\
+;		  power8-store,power8-store-update,power8-store-update-indexed,\
+;		  power8-fpstore,power8-fpstore-update,power8-vecstore,\
+;		  power8-larx,power8-stcx")
+
+(define_insn_reservation "power8-2cyc" 2
+  (and (eq_attr "type" "cntlz,popcnt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
+
+; cmp - Normal compare insns
+(define_insn_reservation "power8-cmp" 2
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; fast_compare : add./and./nor./etc
+(define_insn_reservation "power8-fast-compare" 2
+  (and (eq_attr "type" "fast_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; compare : rldicl./exts./etc
+; delayed_compare : rlwinm./slwi./etc
+; var_delayed_compare : rlwnm./slw./etc
+(define_insn_reservation "power8-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 3 "power8-fast-compare,power8-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 5 cycle CR latency 
+(define_bypass 5 "power8-fast-compare,power8-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+(define_insn_reservation "power8-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-mul-compare" 4
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 5 "power8-mul,power8-mul-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 7 cycle CR latency 
+(define_bypass 7 "power8-mul,power8-mul-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+; FXU divides are not pipelined
+(define_insn_reservation "power8-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
+
+(define_insn_reservation "power8-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
+
+(define_insn_reservation "power8-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,FXU_power8")
+
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
+(define_insn_reservation "power8-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,FXU_power8")
+
+
+; CR Unit
+(define_insn_reservation "power8-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8+FXU_power8")
+
+(define_insn_reservation "power8-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcr" 5
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+
+; BR Unit
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
+; prevent other insns from grabbing them once this is assigned.
+(define_insn_reservation "power8-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power8"))
+  "(du6_power8\
+   |du5_power8+du6_power8\
+   |du4_power8+du5_power8+du6_power8\
+   |du3_power8+du4_power8+du5_power8+du6_power8\
+   |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
+    du6_power8),bpu_power8")
+
+; Branch updating LR/CTR feeding mf[lr|ctr]
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
+(define_insn_reservation "power8-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+; Additional 3 cycles for any CR result
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
+
+(define_insn_reservation "power8-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sdiv" 27
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sqrt" 32
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-dsqrt" 44
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecsimple" 2
+  (and (eq_attr "type" "vecperm,vecsimple,veccmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecnormal" 6
+  (and (eq_attr "type" "vecfloat,vecdouble")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_bypass 7 "power8-vecnormal"
+		 "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
+		  power8-vecstore")
+
+(define_insn_reservation "power8-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecfdiv" 25
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecdiv" 31
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mffgpr" 5
+  (and (eq_attr "type" "mffgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mftgpr" 6
+  (and (eq_attr "type" "mftgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-crypto" 7
+  (and (eq_attr "type" "crypto")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
diff --git a/gcc-4.9/gcc/config/rs6000/ppc-asm.h b/gcc-4.9/gcc/config/rs6000/ppc-asm.h
new file mode 100644
index 000000000..c26a11a98
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/ppc-asm.h
@@ -0,0 +1,381 @@
+/* PowerPC asm definitions for GNU C.
+
+Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Under winnt, 1) gas supports the following as names and 2) in particular
+   defining "toc" breaks the FUNC_START macro as ".toc" becomes ".2" */
+
+#define r0	0
+#define sp	1
+#define toc	2
+#define r3	3
+#define r4	4
+#define r5	5
+#define r6	6
+#define r7	7
+#define r8	8
+#define r9	9
+#define r10	10
+#define r11	11
+#define r12	12
+#define r13	13
+#define r14	14
+#define r15	15
+#define r16	16
+#define r17	17
+#define r18	18
+#define r19     19
+#define r20	20
+#define r21	21
+#define r22	22
+#define r23	23
+#define r24	24
+#define r25	25
+#define r26	26
+#define r27	27
+#define r28	28
+#define r29	29
+#define r30	30
+#define r31	31
+
+#define cr0	0
+#define cr1	1
+#define cr2	2
+#define cr3	3
+#define cr4	4
+#define cr5	5
+#define cr6	6
+#define cr7	7
+
+#define f0	0
+#define f1	1
+#define f2	2
+#define f3	3
+#define f4	4
+#define f5	5
+#define f6	6
+#define f7	7
+#define f8	8
+#define f9	9
+#define f10	10
+#define f11	11
+#define f12	12
+#define f13	13
+#define f14	14
+#define f15	15
+#define f16	16
+#define f17	17
+#define f18	18
+#define f19	19
+#define f20	20
+#define f21	21
+#define f22	22
+#define f23	23
+#define f24	24
+#define f25	25
+#define f26	26
+#define f27	27
+#define f28	28
+#define f29	29
+#define f30	30
+#define f31	31
+
+#ifdef __VSX__
+#define f32	32
+#define f33	33
+#define f34	34
+#define f35	35
+#define f36	36
+#define f37	37
+#define f38	38
+#define f39	39
+#define f40	40
+#define f41	41
+#define f42	42
+#define f43	43
+#define f44	44
+#define f45	45
+#define f46	46
+#define f47	47
+#define f48	48
+#define f49	49
+#define f50	30
+#define f51	51
+#define f52	52
+#define f53	53
+#define f54	54
+#define f55	55
+#define f56	56
+#define f57	57
+#define f58	58
+#define f59	59
+#define f60	60
+#define f61	61
+#define f62	62
+#define f63	63
+#endif
+
+#ifdef __ALTIVEC__
+#define v0	0
+#define v1	1
+#define v2	2
+#define v3	3
+#define v4	4
+#define v5	5
+#define v6	6
+#define v7	7
+#define v8	8
+#define v9	9
+#define v10	10
+#define v11	11
+#define v12	12
+#define v13	13
+#define v14	14
+#define v15	15
+#define v16	16
+#define v17	17
+#define v18	18
+#define v19	19
+#define v20	20
+#define v21	21
+#define v22	22
+#define v23	23
+#define v24	24
+#define v25	25
+#define v26	26
+#define v27	27
+#define v28	28
+#define v29	29
+#define v30	30
+#define v31	31
+#endif
+
+#ifdef __VSX__
+#define vs0	0
+#define vs1	1
+#define vs2	2
+#define vs3	3
+#define vs4	4
+#define vs5	5
+#define vs6	6
+#define vs7	7
+#define vs8	8
+#define vs9	9
+#define vs10	10
+#define vs11	11
+#define vs12	12
+#define vs13	13
+#define vs14	14
+#define vs15	15
+#define vs16	16
+#define vs17	17
+#define vs18	18
+#define vs19	19
+#define vs20	20
+#define vs21	21
+#define vs22	22
+#define vs23	23
+#define vs24	24
+#define vs25	25
+#define vs26	26
+#define vs27	27
+#define vs28	28
+#define vs29	29
+#define vs30	30
+#define vs31	31
+#define vs32	32
+#define vs33	33
+#define vs34	34
+#define vs35	35
+#define vs36	36
+#define vs37	37
+#define vs38	38
+#define vs39	39
+#define vs40	40
+#define vs41	41
+#define vs42	42
+#define vs43	43
+#define vs44	44
+#define vs45	45
+#define vs46	46
+#define vs47	47
+#define vs48	48
+#define vs49	49
+#define vs50	30
+#define vs51	51
+#define vs52	52
+#define vs53	53
+#define vs54	54
+#define vs55	55
+#define vs56	56
+#define vs57	57
+#define vs58	58
+#define vs59	59
+#define vs60	60
+#define vs61	61
+#define vs62	62
+#define vs63	63
+#endif
+
+/*
+ * Macros to glue together two tokens.
+ */
+
+#ifdef __STDC__
+#define XGLUE(a,b) a##b
+#else
+#define XGLUE(a,b) a/**/b
+#endif
+
+#define GLUE(a,b) XGLUE(a,b)
+
+/*
+ * Macros to begin and end a function written in assembler.  If -mcall-aixdesc
+ * or -mcall-nt, create a function descriptor with the given name, and create
+ * the real function with one or two leading periods respectively.
+ */
+
+#if defined(__powerpc64__) && _CALL_ELF == 2
+
+/* Defining "toc" above breaks @toc in assembler code.  */
+#undef toc
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+0:	addis 2,12,(.TOC.-0b)@ha; \
+	addi 2,2,(.TOC.-0b)@l; \
+	.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+	.size FUNC_NAME(name),.-FUNC_NAME(name)
+
+#elif defined (__powerpc64__)
+
+#define FUNC_NAME(name) GLUE(.,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.section ".opd","aw"; \
+name: \
+	.quad GLUE(.,name); \
+	.quad .TOC.@tocbase; \
+	.quad 0; \
+	.previous; \
+	.type GLUE(.,name),@function; \
+	.globl name; \
+	.globl GLUE(.,name); \
+GLUE(.,name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden name;	\
+  .hidden GLUE(.,name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size GLUE(.,name),GLUE(.L,name)-GLUE(.,name)
+
+#elif defined(_CALL_AIXDESC)
+
+#ifdef _RELOCATABLE
+#define DESC_SECTION ".got2"
+#else
+#define DESC_SECTION ".got1"
+#endif
+
+#define FUNC_NAME(name) GLUE(.,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.section DESC_SECTION,"aw"; \
+name: \
+	.long GLUE(.,name); \
+	.long _GLOBAL_OFFSET_TABLE_; \
+	.long 0; \
+	.previous; \
+	.type GLUE(.,name),@function; \
+	.globl name; \
+	.globl GLUE(.,name); \
+GLUE(.,name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden name; \
+  .hidden GLUE(.,name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size GLUE(.,name),GLUE(.L,name)-GLUE(.,name)
+
+#else
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#if defined __PIC__ || defined __pic__
+#define JUMP_TARGET(name) FUNC_NAME(name@plt)
+#else
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#endif
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size FUNC_NAME(name),GLUE(.L,name)-FUNC_NAME(name)
+#endif
+
+#ifdef IN_GCC
+/* For HAVE_GAS_CFI_DIRECTIVE.  */
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_DIRECTIVE
+# define CFI_STARTPROC			.cfi_startproc
+# define CFI_ENDPROC			.cfi_endproc
+# define CFI_OFFSET(reg, off)		.cfi_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg)	.cfi_def_cfa_register reg
+# define CFI_RESTORE(reg)		.cfi_restore reg
+#else
+# define CFI_STARTPROC
+# define CFI_ENDPROC
+# define CFI_OFFSET(reg, off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_RESTORE(reg)
+#endif
+#endif
+
+#if defined __linux__ && !defined __powerpc64__
+	.section .note.GNU-stack
+	.previous
+#endif
diff --git a/gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h b/gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h
new file mode 100644
index 000000000..f572f29b0
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h
@@ -0,0 +1,727 @@
+/* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*  TODO:
+    misc ops (traps)
+    supervisor/hypervisor mode ops.  */
+
+#ifndef  _PPU_INTRINSICS_H
+#define _PPU_INTRINSICS_H
+
+#if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__) \
+    && !defined(__GNUC__)
+  #error ppu_intrinsics.h included on wrong platform/compiler
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif 
+
+/*
+ * unsigned int __cntlzw(unsigned int)
+ * unsigned int __cntlzd(unsigned long long)
+ * int __mulhw(int, int)
+ * unsigned int __mulhwu(unsigned int, unsigned int)
+ * long long __mulhd(long long, long long)
+ * unsigned long long __mulhdu(unsigned long long, unsigned long long)
+ *
+ * void __sync(void)
+ * void __isync(void)
+ * void __lwsync(void)
+ * void __eieio(void)
+ *
+ * void __nop(void)
+ * void __cctpl(void)
+ * void __cctpm(void)
+ * void __cctph(void)
+ * void __db8cyc(void)
+ * void __db10cyc(void)
+ * void __db12cyc(void)
+ * void __db16cyc(void)
+ *
+ * void __mtspr(unsigned int spr, unsigned long long value)
+ * unsigned long long __mfspr(unsigned int spr)
+ * unsigned long long __mftb(void)
+ *
+ * void __icbi(void *base)
+ * void __dcbi(void *base)
+ *
+ * void __dcbf(void *base)
+ * void __dcbz(void *base)
+ * void __dcbst(void *base)
+ * void __dcbtst(void *base)
+ * void __dcbt(void *base)
+ * void __dcbt_TH1000(void *EATRUNC, bool D, bool UG, int ID)
+ * void __dcbt_TH1010(bool GO, int S, int UNITCNT, bool T, bool U, int ID)
+ *
+ * unsigned __lwarx(void *base)
+ * unsigned long long __ldarx(void *base)
+ * bool __stwcx(void *base, unsigned value)
+ * bool __stdcx(void *base, unsigned long long value)
+ *
+ * unsigned short __lhbrx(void *base)
+ * unsigned int __lwbrx(void *base)
+ * unsigned long long __ldbrx(void *base)
+ * void __sthbrx(void *base, unsigned short value)
+ * void __stwbrx(void *base, unsigned int value)
+ * void __stdbrx(void *base, unsigned long long value)
+ *
+ * double __fabs(double x)
+ * float __fabsf(float x)
+ * double __fnabs(double x)
+ * float __fnabsf(float x)
+ * double __fmadd(double x, double y, double z)
+ * double __fmsub(double x, double y, double z)
+ * double __fnmadd(double x, double y, double z)
+ * double __fnmsub(double x, double y, double z)
+ * float __fmadds(float x, float y, float z)
+ * float __fmsubs(float x, float y, float z)
+ * float __fnmadds(float x, float y, float z)
+ * float __fnmsubs(float x, float y, float z)
+ * double __fsel(double x, double y, double z)
+ * float __fsels(float x, float y, float z)
+ * double __frsqrte(double x)
+ * float __fres(float x)
+ * double __fsqrt(double x)
+ * float __fsqrts(float x)
+ * long long __fctid(double x)
+ * long long __fctiw(double x)
+ * double __fcfid(long long x)
+ * double __mffs(void)
+ * void __mtfsf(int mask, double value)
+ * void __mtfsfi(int bits, int field)
+ * void __mtfsb0(int)
+ * void __mtfsb1(int)
+ * double __setflm(double)
+ *
+ * dcbt intrinsics 
+ * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID)
+ * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID)
+ * void __protected_stream_stop_all (void)
+ * void __protected_stream_stop (unsigned int ID)
+ * void __protected_stream_count (unsigned int unit_cnt, unsigned int ID)
+ * void __protected_stream_go (void)
+ */
+
+typedef int __V4SI __attribute__((vector_size(16)));
+
+#define __cntlzw(v) __builtin_clz(v)
+#define __cntlzd(v) __builtin_clzll(v)
+
+#define __mulhw(a,b) __extension__ \
+  ({int result;			   \
+  __asm__ ("mulhw %0,%1,%2"	   \
+	   : "=r" (result)	   \
+	   : "r" ((int) (a)),	   \
+	     "r" ((int) (b)));	   \
+  result; })
+
+#define __mulhwu(a,b) __extension__	\
+  ({unsigned int result;		\
+  __asm__ ("mulhwu %0,%1,%2"		\
+	   : "=r" (result)		\
+	   : "r" ((unsigned int) (a)),	\
+	     "r" ((unsigned int) (b))); \
+  result; })
+
+#ifdef __powerpc64__
+#define __mulhd(a,b) __extension__   \
+  ({ long long result;		     \
+  __asm__ ("mulhd %0,%1,%2"	     \
+	   : "=r" (result)	     \
+	   : "r" ((long long) (a)),  \
+	     "r" ((long long) (b))); \
+  result; })
+
+#define __mulhdu(a,b) __extension__	      \
+  ({unsigned long long result;		      \
+  __asm__ ("mulhdu %0,%1,%2"		      \
+	   : "=r" (result)		      \
+	   : "r" ((unsigned long long) (a)),  \
+	     "r" ((unsigned long long) (b))); \
+  result; })
+#endif /* __powerpc64__ */
+
+#define __sync() __asm__ volatile ("sync" : : : "memory")
+#define __isync() __asm__ volatile ("isync" : : : "memory")
+#define __lwsync() __asm__ volatile ("lwsync" : : : "memory")
+#define __eieio() __asm__ volatile ("eieio" : : : "memory")
+
+#define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
+#define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
+#define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
+#define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
+#define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
+#define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
+#define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
+#define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
+
+#ifdef __powerpc64__
+#define __mtspr(spr, value) \
+  __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value))
+  
+#define __mfspr(spr) __extension__				\
+  ({ unsigned long long result;					\
+  __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \
+  result; })
+#endif /* __powerpc64__ */
+
+#ifdef __powerpc64__
+/* Work around the hardware bug in the current Cell implementation.  */
+#define __mftb() __extension__					\
+  ({ unsigned long long result;					\
+  __asm__ volatile ("1: mftb %[current_tb]\n"			\
+      "\tcmpwi 7, %[current_tb], 0\n"				\
+      "\tbeq-  7, 1b"						\
+      : [current_tb] "=r" (result):				\
+      :"cr7");							\
+  result; })
+#else
+#define __mftb() __extension__			\
+  ({ unsigned long long result;			\
+  unsigned long t;				\
+  __asm__ volatile ("1:\n"			\
+		    "\tmftbu %0\n"		\
+		    "\tmftb %L0\n"		\
+		    "\tmftbu %1\n"		\
+		    "\tcmpw %0,%1\n"		\
+		    "\tbne 1b"			\
+		    : "=r" (result), "=r" (t));	\
+  result; })
+#endif /* __powerpc64__ */
+
+#define __dcbf(base) \
+  __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+  
+#define __dcbz(base) \
+  __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __dcbst(base) \
+  __asm__ volatile ("dcbst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __dcbtst(base) \
+  __asm__ volatile ("dcbtst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __dcbt(base) \
+  __asm__ volatile ("dcbt %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __icbi(base) \
+  __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+  
+#define __dcbt_TH1000(EATRUNC, D, UG, ID)				\
+  __asm__ volatile ("dcbt %y0,8"					\
+	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F)	\
+	   		       | ((((D) & 1) << 6)			\
+	   		       | (((UG) & 1) << 5)			\
+	   		       | ((ID) & 0xF)))) : : "memory")
+
+#define __dcbt_TH1010(GO, S, UNITCNT, T, U, ID)			     \
+  __asm__ volatile ("dcbt %y0,10"				     \
+	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (GO) & 1) << 31) \
+	   		       | (((S) & 0x3) << 29)		     \
+	   		       | (((UNITCNT) & 0x3FF) << 7)	     \
+	   		       | (((T) & 1) << 6)			     \
+	   		       | (((U) & 1) << 5)			     \
+	   		       | ((ID) & 0xF))) : : "memory")
+
+#define __protected_unlimited_stream_set(DIRECTION, ADDR, ID)	\
+	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 1, (ID))
+
+#define __protected_stream_set(DIRECTION, ADDR, ID)	\
+	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 0, (ID))
+
+#define __protected_stream_stop_all()			\
+	__dcbt_TH1010 (0, 3, 0, 0, 0, 0)
+
+#define __protected_stream_stop(ID)			\
+	__dcbt_TH1010 (0, 2, 0, 0, 0, (ID))
+
+#define __protected_stream_count(COUNT, ID)		\
+	__dcbt_TH1010 (0, 0, (COUNT), 0, 0, (ID))
+
+#define __protected_stream_go()				\
+	__dcbt_TH1010 (1, 0, 0, 0, 0, 0)
+
+#define __lhbrx(base) __extension__		\
+  ({unsigned short result;	       		\
+    typedef  struct {char a[2];} halfwordsize;	\
+    halfwordsize *ptrp = (halfwordsize*)(void*)(base);	\
+  __asm__ ("lhbrx %0,%y1"			\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+#define __lwbrx(base) __extension__		\
+  ({unsigned int result;	       		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);		\
+  __asm__ ("lwbrx %0,%y1"			\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+
+#ifdef __powerpc64__
+#define __ldbrx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ ("ldbrx %0,%y1"				\
+	   : "=r" (result)				\
+	   : "Z" (*ptrp));				\
+  result; })
+#else
+#define __ldbrx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ ("lwbrx %L0,%y1\n"				\
+	   "\tlwbrx %0,%y2"				\
+	   : "=&r" (result)				\
+	   : "Z" (*ptrp), "Z" (*((char *) ptrp + 4)));	\
+  result; })
+#endif /* __powerpc64__ */
+
+
+#define __sthbrx(base, value) do {			\
+    typedef  struct {char a[2];} halfwordsize;		\
+    halfwordsize *ptrp = (halfwordsize*)(void*)(base);		\
+    __asm__ ("sthbrx %1,%y0"				\
+	   : "=Z" (*ptrp)				\
+	   : "r" (value));				\
+   } while (0)
+
+#define __stwbrx(base, value) do {		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);		\
+    __asm__ ("stwbrx %1,%y0"			\
+	   : "=Z" (*ptrp)			\
+	   : "r" (value));			\
+   } while (0)
+
+#ifdef __powerpc64__
+#define __stdbrx(base, value) do {			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+    __asm__ ("stdbrx %1,%y0"				\
+	   : "=Z" (*ptrp)				\
+	   : "r" (value));				\
+   } while (0)
+#else
+#define __stdbrx(base, value) do {			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+    __asm__ ("stwbrx %L2,%y0\n"				\
+	     "\tstwbrx %2,%y1"				\
+	   : "=Z" (*ptrp), "=Z" (*((char *) ptrp + 4))	\
+	   : "r" (value));				\
+   } while (0)
+#endif /* __powerpc64__ */
+
+
+#define __lwarx(base) __extension__		\
+  ({unsigned int result;	       		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);	\
+  __asm__ volatile ("lwarx %0,%y1"		\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+#ifdef __powerpc64__
+#define __ldarx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ volatile ("ldarx %0,%y1"			\
+	   : "=r" (result)				\
+	   : "Z" (*ptrp));				\
+  result; })
+#endif /* __powerpc64__ */
+
+#define __stwcx(base, value) __extension__	\
+  ({unsigned int result;			\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);	\
+  __asm__ volatile ("stwcx. %2,%y1\n"		\
+	   "\tmfocrf %0,0x80"			\
+	   : "=r" (result),			\
+	     "=Z" (*ptrp)			\
+	   : "r" (value) : "cr0");		\
+  ((result & 0x20000000) >> 29); })
+
+
+#ifdef __powerpc64__
+#define __stdcx(base, value) __extension__		\
+  ({unsigned long long result;				\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ volatile ("stdcx. %2,%y1\n"			\
+	   "\tmfocrf %0,0x80"				\
+	   : "=r" (result),				\
+	     "=Z" (*ptrp)				\
+	   : "r" (value) : "cr0");			\
+  ((result & 0x20000000) >> 29); })
+#endif /* __powerpc64__ */
+
+#define __mffs() __extension__			\
+  ({double result;				\
+  __asm__ volatile ("mffs %0" : "=d" (result)); \
+  result; })
+
+#define __mtfsf(mask,value) \
+  __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value)))
+  
+#define __mtfsfi(bits,field) \
+  __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field))
+
+#define __mtfsb0(bit) __asm__ volatile ("mtfsb0 %0" : : "n" (bit))
+#define __mtfsb1(bit) __asm__ volatile ("mtfsb1 %0" : : "n" (bit))
+
+#define __setflm(v) __extension__	      \
+  ({double result;			      \
+  __asm__ volatile ("mffs %0\n\tmtfsf 255,%1" \
+		    : "=&d" (result)	      \
+		    : "d" ((double) (v)));    \
+  result; })
+
+/* __builtin_fabs may perform unnecessary rounding.  */
+
+/* Rename __fabs and __fabsf to work around internal prototypes defined 
+   in bits/mathcalls.h with some glibc versions.  */ 
+#define __fabs __ppu_fabs 
+#define __fabsf __ppu_fabsf 
+
+static __inline__ double __fabs(double x) __attribute__((always_inline));
+static __inline__ double
+__fabs(double x)
+{
+  double r;
+  __asm__("fabs %0,%1" : "=d"(r) : "d"(x));
+  return r;
+}
+
+static __inline__ float __fabsf(float x) __attribute__((always_inline));
+static __inline__ float
+__fabsf(float x)
+{
+  float r;
+  __asm__("fabs %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fnabs(double x) __attribute__((always_inline));
+static __inline__ double
+__fnabs(double x)
+{
+  double r;
+  __asm__("fnabs %0,%1" : "=d"(r) : "d"(x));
+  return r;
+}
+
+static __inline__ float __fnabsf(float x) __attribute__((always_inline));
+static __inline__ float
+__fnabsf(float x)
+{
+  float r;
+  __asm__("fnabs %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fmadd(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fmadd(double x, double y, double z)
+{
+  double r;
+  __asm__("fmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ double __fmsub(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fmsub(double x, double y, double z)
+{
+  double r;
+  __asm__("fmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ double __fnmadd(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fnmadd(double x, double y, double z)
+{
+  double r;
+  __asm__("fnmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ double __fnmsub(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fnmsub(double x, double y, double z)
+{
+  double r;
+  __asm__("fnmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ float __fmadds(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fmadds(float x, float y, float z)
+{
+  float r;
+  __asm__("fmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ float __fmsubs(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fmsubs(float x, float y, float z)
+{
+  float r;
+  __asm__("fmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ float __fnmadds(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fnmadds(float x, float y, float z)
+{
+  float r;
+  __asm__("fnmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ float __fnmsubs(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fnmsubs(float x, float y, float z)
+{
+  float r;
+  __asm__("fnmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ double __fsel(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fsel(double x, double y, double z)
+{
+  double r;
+  __asm__("fsel %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ float __fsels(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fsels(float x, float y, float z)
+{
+  float r;
+  __asm__("fsel %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ double __frsqrte(double x) __attribute__((always_inline));
+static __inline__ double
+__frsqrte(double x)
+{
+  double r;
+  __asm__("frsqrte %0,%1" : "=d" (r) : "d" (x));
+  return r;
+}
+
+static __inline__ float __fres(float x) __attribute__((always_inline));
+static __inline__ float
+__fres(float x)
+{
+  float r;
+  __asm__("fres %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fsqrt(double x) __attribute__((always_inline));
+static __inline__ double
+__fsqrt(double x)
+{
+  double r;
+  __asm__("fsqrt %0,%1" : "=d"(r) : "d"(x));
+  return r;
+}
+
+static __inline__ float __fsqrts(float x) __attribute__((always_inline));
+static __inline__ float
+__fsqrts(float x)
+{
+  float r;
+  __asm__("fsqrts %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fmul (double a, double b) __attribute__ ((always_inline));
+static __inline__ double
+__fmul(double a, double b)
+{
+  double d;
+  __asm__ ("fmul %0,%1,%2" : "=d" (d) : "d" (a), "d" (b));
+  return d;
+}
+
+static __inline__ float __fmuls (float a, float b) __attribute__ ((always_inline));
+static __inline__ float
+__fmuls (float a, float b)
+{
+  float d;
+  __asm__ ("fmuls %0,%1,%2" : "=d" (d) : "f" (a), "f" (b));
+  return d;
+}
+
+static __inline__ float __frsp (float a) __attribute__ ((always_inline));
+static __inline__ float
+__frsp (float a)
+{
+  float d;
+  __asm__ ("frsp %0,%1" : "=d" (d) : "f" (a));
+  return d;
+}
+
+static __inline__ double __fcfid (long long a) __attribute__((always_inline));
+static __inline__ double
+__fcfid (long long a)
+{
+  double d;
+  __asm__ ("fcfid %0,%1" : "=d" (d) : "d" (a));
+  return d;
+}
+
+static __inline__ long long __fctid (double a) __attribute__ ((always_inline));
+static __inline__ long long
+__fctid (double a)
+{
+  long long d;
+  __asm__ ("fctid %0,%1" : "=d" (d) : "d" (a));
+  return d;
+}
+
+static __inline__ long long __fctidz (double a) __attribute__ ((always_inline));
+static __inline__ long long
+__fctidz (double a)
+{
+  long long d;
+  __asm__ ("fctidz %0,%1" : "=d" (d) : "d" (a));
+  return d;
+}
+
+static __inline__ int __fctiw (double a) __attribute__ ((always_inline));
+static __inline__ int
+__fctiw (double a)
+{
+  unsigned long long d;
+  __asm__ ("fctiw %0,%1" : "=d" (d) : "d" (a));
+  return (int) d;
+}
+
+static __inline__ int __fctiwz (double a) __attribute__ ((always_inline));
+static __inline__ int
+__fctiwz (double a)
+{
+  long long d;
+  __asm__ ("fctiwz %0,%1" : "=d" (d) : "d" (a));
+  return (int) d;
+}
+
+#ifdef __powerpc64__
+#define __rldcl(a,b,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldcl %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (mb)); \
+    d; \
+  })
+
+#define __rldcr(a,b,me) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldcr %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (me)); \
+    d; \
+  })
+
+#define __rldic(a,sh,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldic %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
+    d; \
+  })
+
+#define __rldicl(a,sh,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldicl %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
+    d; \
+  })
+
+#define __rldicr(a,sh,me) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldicr %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (me)); \
+    d; \
+  })
+
+#define __rldimi(a,b,sh,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldimi %0,%1,%2,%3" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "0" (a)); \
+    d; \
+  })
+#endif /* __powerpc64__ */
+
+#define __rlwimi(a,b,sh,mb,me) __extension__ \
+  ({ \
+    unsigned int d; \
+    __asm__ ("rlwimi %0,%1,%2,%3,%4" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "i" (me), "0" (a)); \
+    d; \
+  })
+
+#define __rlwinm(a,sh,mb,me) __extension__ \
+  ({ \
+    unsigned int d; \
+    __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "i" (sh), "i" (mb), "i" (me)); \
+    d; \
+  })
+
+#define __rlwnm(a,b,mb,me) __extension__ \
+  ({ \
+    unsigned int d; \
+    __asm__ ("rlwnm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "r" (b), "i" (mb), "i" (me)); \
+    d; \
+  })
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PPU_INTRINSICS_H */
diff --git a/gcc-4.9/gcc/config/rs6000/predicates.md b/gcc-4.9/gcc/config/rs6000/predicates.md
new file mode 100644
index 000000000..7b1121ddb
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/predicates.md
@@ -0,0 +1,1828 @@
+;; Predicate definitions for POWER and PowerPC.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 for anything except PARALLEL.
+(define_predicate "any_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem"))
+
+;; Return 1 for any PARALLEL.
+(define_predicate "any_parallel_operand"
+  (match_code "parallel"))
+
+;; Return 1 if op is COUNT register.
+(define_predicate "count_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CTR_REGNO
+		    || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is an Altivec register.
+(define_predicate "altivec_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return ALTIVEC_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a VSX register.
+(define_predicate "vsx_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return VSX_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a vector register that operates on floating point vectors
+;; (either altivec or VSX).
+(define_predicate "vfloat_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return VFLOAT_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a vector register that operates on integer vectors
+;; (only altivec, VSX doesn't support integer vectors)
+(define_predicate "vint_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return VINT_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a vector register to do logical operations on (and, or,
+;; xor, etc.)
+(define_predicate "vlogical_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return VLOGICAL_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is the carry register.
+(define_predicate "ca_operand"
+  (and (match_code "reg")
+       (match_test "CA_REGNO_P (REGNO (op))")))
+
+;; Return 1 if op is a signed 5-bit constant integer.
+(define_predicate "s5bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15")))
+
+;; Return 1 if op is a unsigned 3-bit constant integer.
+(define_predicate "u3bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
+
+;; Return 1 if op is a unsigned 5-bit constant integer.
+(define_predicate "u5bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 31")))
+
+;; Return 1 if op is a signed 8-bit constant integer.
+;; Integer multiplication complete more quickly
+(define_predicate "s8bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= -128 && INTVAL (op) <= 127")))
+
+;; Return 1 if op is a unsigned 10-bit constant integer.
+(define_predicate "u10bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023")))
+
+;; Return 1 if op is a constant integer that can fit in a D field.
+(define_predicate "short_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_I (op)")))
+
+;; Return 1 if op is a constant integer that can fit in an unsigned D field.
+(define_predicate "u_short_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_K (op)")))
+
+;; Return 1 if op is a constant integer that cannot fit in a signed D field.
+(define_predicate "non_short_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)
+		    (INTVAL (op) + 0x8000) >= 0x10000")))
+
+;; Return 1 if op is a positive constant integer that is an exact power of 2.
+(define_predicate "exact_log2_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) > 0 && exact_log2 (INTVAL (op)) >= 0")))
+
+;; Match op = 0 or op = 1.
+(define_predicate "const_0_to_1_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
+
+;; Match op = 2 or op = 3.
+(define_predicate "const_2_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+
+;; Match op = 0..15
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+;; Return 1 if op is a register that is not special.
+(define_predicate "gpc_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
+    return 1;
+
+  if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
+    return 1;
+
+  return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a general purpose register.  Unlike gpc_reg_operand, don't
+;; allow floating point or vector registers.
+(define_predicate "int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return INT_REGNO_P (REGNO (op));
+})
+
+;; Like int_reg_operand, but only return true for base registers
+(define_predicate "base_reg_operand"
+  (match_operand 0 "int_reg_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  return (REGNO (op) != FIRST_GPR_REGNO);
+})
+
+;; Return 1 if op is a HTM specific SPR register.
+(define_predicate "htm_spr_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!TARGET_HTM)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  switch (REGNO (op))
+    {
+      case TFHAR_REGNO:
+      case TFIAR_REGNO:
+      case TEXASR_REGNO:
+	return 1;
+      default:
+	break;
+    }
+  
+  /* Unknown SPR.  */
+  return 0;
+})
+
+;; Return 1 if op is a general purpose register that is an even register
+;; which suitable for a load/store quad operation
+(define_predicate "quad_int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  HOST_WIDE_INT r;
+
+  if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  if (r >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return (INT_REGNO_P (r) && ((r & 1) == 0));
+})
+
+;; Return 1 if op is a register that is a condition register field.
+(define_predicate "cc_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return CR_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a register that is a condition register field not cr0.
+(define_predicate "cc_reg_not_cr0_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  return CR_REGNO_NOT_CR0_P (REGNO (op));
+})
+
+;; Return 1 if op is a register that is a condition register field and if generating microcode, not cr0.
+(define_predicate "cc_reg_not_micro_cr0_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER)
+    return 1;
+
+  if (rs6000_gen_cell_microcode)
+    return CR_REGNO_NOT_CR0_P (REGNO (op));
+  else
+    return CR_REGNO_P (REGNO (op));
+})
+
+;; Return 1 if op is a constant integer valid for D field
+;; or non-special register register.
+(define_predicate "reg_or_short_operand"
+  (if_then_else (match_code "const_int")
+    (match_operand 0 "short_cint_operand")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid whose negation is valid for
+;; D field or non-special register register.
+;; Do not allow a constant zero because all patterns that call this
+;; predicate use "addic r1,r2,-const" to set carry when r2 is greater than
+;; or equal to const, which does not work for zero.
+(define_predicate "reg_or_neg_short_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_P (op)
+		 && INTVAL (op) != 0")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid for DS field
+;; or non-special register.
+(define_predicate "reg_or_aligned_short_operand"
+  (if_then_else (match_code "const_int")
+    (and (match_operand 0 "short_cint_operand")
+	 (match_test "!(INTVAL (op) & 3)"))
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer whose high-order 16 bits are zero
+;; or non-special register.
+(define_predicate "reg_or_u_short_operand"
+  (if_then_else (match_code "const_int")
+    (match_operand 0 "u_short_cint_operand")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is any constant integer 
+;; or non-special register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid for addition with addis, addi.
+(define_predicate "add_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)
+		      (INTVAL (op) + (mode == SImode ? 0x80000000 : 0x80008000))
+		    < (unsigned HOST_WIDE_INT) 0x100000000ll")))
+
+;; Return 1 if op is a constant integer valid for addition
+;; or non-special register.
+(define_predicate "reg_or_add_cint_operand"
+  (if_then_else (match_code "const_int")
+    (match_operand 0 "add_cint_operand")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid for subtraction
+;; or non-special register.
+(define_predicate "reg_or_sub_cint_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT)
+		   (- INTVAL (op) + (mode == SImode ? 0x80000000 : 0x80008000))
+		 < (unsigned HOST_WIDE_INT) 0x100000000ll")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is any 32-bit unsigned constant integer
+;; or non-special register.
+(define_predicate "reg_or_logical_cint_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
+		  && INTVAL (op) >= 0)
+		 || ((INTVAL (op) & GET_MODE_MASK (mode)
+		      & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Like reg_or_logical_cint_operand, but allow vsx registers
+(define_predicate "vsx_reg_or_cint_operand"
+  (ior (match_operand 0 "vsx_register_operand")
+       (match_operand 0 "reg_or_logical_cint_operand")))
+
+;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
+;; with no more than one instruction per word.
+(define_predicate "easy_fp_constant"
+  (match_code "const_double")
+{
+  long k[4];
+  REAL_VALUE_TYPE rv;
+
+  if (GET_MODE (op) != mode
+      || (!SCALAR_FLOAT_MODE_P (mode) && mode != DImode))
+    return 0;
+
+  /* Consider all constants with -msoft-float to be easy.  */
+  if ((TARGET_SOFT_FLOAT || TARGET_E500_SINGLE 
+      || (TARGET_HARD_FLOAT && (TARGET_SINGLE_FLOAT && ! TARGET_DOUBLE_FLOAT)))
+      && mode != DImode)
+    return 1;
+
+  /* The constant 0.0 is easy under VSX.  */
+  if ((mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode)
+      && VECTOR_UNIT_VSX_P (DFmode) && op == CONST0_RTX (mode))
+    return 1;
+
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return 0;
+
+  /* If we are using V.4 style PIC, consider all constants to be hard.  */
+  if (flag_pic && DEFAULT_ABI == ABI_V4)
+    return 0;
+
+#ifdef TARGET_RELOCATABLE
+  /* Similarly if we are using -mrelocatable, consider all constants
+     to be hard.  */
+  if (TARGET_RELOCATABLE)
+    return 0;
+#endif
+
+  switch (mode)
+    {
+    case TFmode:
+      if (TARGET_E500_DOUBLE)
+	return 0;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+      REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
+
+      return (num_insns_constant_wide ((HOST_WIDE_INT) k[0]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[1]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[2]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[3]) == 1);
+
+    case DFmode:
+      /* The constant 0.f is easy under VSX.  */
+      if (op == CONST0_RTX (DFmode) && VECTOR_UNIT_VSX_P (DFmode))
+	return 1;
+
+      /* Force constants to memory before reload to utilize
+	 compress_float_constant.
+	 Avoid this when flag_unsafe_math_optimizations is enabled
+	 because RDIV division to reciprocal optimization is not able
+	 to regenerate the division.  */
+      if (TARGET_E500_DOUBLE
+          || (!reload_in_progress && !reload_completed
+	      && !flag_unsafe_math_optimizations))
+        return 0;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+      REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
+
+      return (num_insns_constant_wide ((HOST_WIDE_INT) k[0]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[1]) == 1);
+
+    case SFmode:
+      /* The constant 0.f is easy.  */
+      if (op == CONST0_RTX (SFmode))
+	return 1;
+
+      /* Force constants to memory before reload to utilize
+	 compress_float_constant.
+	 Avoid this when flag_unsafe_math_optimizations is enabled
+	 because RDIV division to reciprocal optimization is not able
+	 to regenerate the division.  */
+      if (!reload_in_progress && !reload_completed
+          && !flag_unsafe_math_optimizations)
+	return 0;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, k[0]);
+
+      return num_insns_constant_wide (k[0]) == 1;
+
+  case DImode:
+    return (num_insns_constant (op, DImode) <= 2);
+
+  case SImode:
+    return 1;
+
+  default:
+    gcc_unreachable ();
+  }
+})
+
+;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
+;; vector register without using memory.
+(define_predicate "easy_vector_constant"
+  (match_code "const_vector")
+{
+  /* As the paired vectors are actually FPRs it seems that there is
+     no easy way to load a CONST_VECTOR without using memory.  */
+  if (TARGET_PAIRED_FLOAT)
+    return false;
+
+  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+    {
+      if (zero_constant (op, mode))
+	return true;
+
+      return easy_altivec_constant (op, mode);
+    }
+
+  if (SPE_VECTOR_MODE (mode))
+    {
+      int cst, cst2;
+      if (zero_constant (op, mode))
+	return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+        return false;
+
+      /* Limit SPE vectors to 15 bits signed.  These we can generate with:
+	   li r0, CONSTANT1
+	   evmergelo r0, r0, r0
+	   li r0, CONSTANT2
+
+	 I don't know how efficient it would be to allow bigger constants,
+	 considering we'll have an extra 'ori' for every 'li'.  I doubt 5
+	 instructions is better than a 64-bit memory load, but I don't
+	 have the e500 timing specs.  */
+      if (mode == V2SImode)
+	{
+	  cst  = INTVAL (CONST_VECTOR_ELT (op, 0));
+	  cst2 = INTVAL (CONST_VECTOR_ELT (op, 1));
+	  return cst  >= -0x7fff && cst <= 0x7fff
+	         && cst2 >= -0x7fff && cst2 <= 0x7fff;
+	}
+    }
+
+  return false;
+})
+
+;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
+(define_predicate "easy_vector_constant_add_self"
+  (and (match_code "const_vector")
+       (and (match_test "TARGET_ALTIVEC")
+	    (match_test "easy_altivec_constant (op, mode)")))
+{
+  HOST_WIDE_INT val;
+  int elt;
+  if (mode == V2DImode || mode == V2DFmode)
+    return 0;
+  elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+  val = const_vector_elt_as_int (op, elt);
+  val = ((val & 0xff) ^ 0x80) - 0x80;
+  return EASY_VECTOR_15_ADD_SELF (val);
+})
+
+;; Same as easy_vector_constant but only for EASY_VECTOR_MSB.
+(define_predicate "easy_vector_constant_msb"
+  (and (match_code "const_vector")
+       (and (match_test "TARGET_ALTIVEC")
+	    (match_test "easy_altivec_constant (op, mode)")))
+{
+  HOST_WIDE_INT val;
+  int elt;
+  if (mode == V2DImode || mode == V2DFmode)
+    return 0;
+  elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+  val = const_vector_elt_as_int (op, elt);
+  return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
+})
+
+;; Return 1 if operand is constant zero (scalars and vectors).
+(define_predicate "zero_constant"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return 1 if operand is 0.0.
+(define_predicate "zero_fp_constant"
+  (and (match_code "const_double")
+       (match_test "SCALAR_FLOAT_MODE_P (mode)
+		    && op == CONST0_RTX (mode)")))
+
+;; Return 1 if the operand is in volatile memory.  Note that during the
+;; RTL generation phase, memory_operand does not return TRUE for volatile
+;; memory references.  So this function allows us to recognize volatile
+;; references where it's safe.
+(define_predicate "volatile_mem_operand"
+  (and (and (match_code "mem")
+	    (match_test "MEM_VOLATILE_P (op)"))
+       (if_then_else (match_test "reload_completed")
+         (match_operand 0 "memory_operand")
+         (if_then_else (match_test "reload_in_progress")
+	   (match_test "strict_memory_address_p (mode, XEXP (op, 0))")
+	   (match_test "memory_address_p (mode, XEXP (op, 0))")))))
+
+;; Return 1 if the operand is an offsettable memory operand.
+(define_predicate "offsettable_mem_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "offsettable_nonstrict_memref_p (op)")))
+
+;; Return 1 if the operand is suitable for load/store quad memory.
+;; This predicate only checks for non-atomic loads/stores.
+(define_predicate "quad_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, op0, op1;
+  int ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = 0;
+
+  else if (!memory_operand (op, mode))
+    ret = 0;
+
+  else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
+    ret = 0;
+
+  else if (MEM_ALIGN (op) < 128)
+    ret = 0;
+
+  else
+    {
+      addr = XEXP (op, 0);
+      if (int_reg_operand (addr, Pmode))
+	ret = 1;
+
+      else if (GET_CODE (addr) != PLUS)
+	ret = 0;
+
+      else
+	{
+	  op0 = XEXP (addr, 0);
+	  op1 = XEXP (addr, 1);
+	  ret = (int_reg_operand (op0, Pmode)
+		 && GET_CODE (op1) == CONST_INT
+		 && IN_RANGE (INTVAL (op1), -32768, 32767)
+		 && (INTVAL (op1) & 15) == 0);
+	}
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
+      debug_rtx (op);
+    }
+
+  return ret;
+})
+
+;; Return 1 if the operand is an indexed or indirect memory operand.
+(define_predicate "indexed_or_indirect_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (VECTOR_MEM_ALTIVEC_P (mode)
+      && GET_CODE (op) == AND
+      && GET_CODE (XEXP (op, 1)) == CONST_INT
+      && INTVAL (XEXP (op, 1)) == -16)
+    op = XEXP (op, 0);
+
+  return indexed_or_indirect_address (op, mode);
+})
+
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
+;; moves are supported.
+(define_predicate "reg_or_indexed_operand"
+  (match_code "mem,reg")
+{
+  if (MEM_P (op))
+    return indexed_or_indirect_operand (op, mode);
+  else if (TARGET_DIRECT_MOVE)
+    return register_operand (op, mode);
+  return
+    0;
+})
+
+;; Return 1 if the operand is an indexed or indirect memory operand with an
+;; AND -16 in it, used to recognize when we need to switch to Altivec loads
+;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
+;; while VSX uses the full address and traps)
+(define_predicate "altivec_indexed_or_indirect_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+      && GET_CODE (op) == AND
+      && GET_CODE (XEXP (op, 1)) == CONST_INT
+      && INTVAL (XEXP (op, 1)) == -16)
+    return indexed_or_indirect_address (XEXP (op, 0), mode);
+
+  return 0;
+})
+
+;; Return 1 if the operand is an indexed or indirect address.
+(define_special_predicate "indexed_or_indirect_address"
+  (and (match_test "REG_P (op)
+		    || (GET_CODE (op) == PLUS
+			/* Omit testing REG_P (XEXP (op, 0)).  */
+			&& REG_P (XEXP (op, 1)))")
+       (match_operand 0 "address_operand")))
+
+;; Return 1 if the operand is an index-form address.
+(define_special_predicate "indexed_address"
+  (match_test "(GET_CODE (op) == PLUS
+		&& REG_P (XEXP (op, 0))
+		&& REG_P (XEXP (op, 1)))"))
+
+;; Return 1 if the operand is a MEM with an update-form address. This may
+;; also include update-indexed form.
+(define_special_predicate "update_address_mem"
+  (match_test "(MEM_P (op)
+		&& (GET_CODE (XEXP (op, 0)) == PRE_INC
+		    || GET_CODE (XEXP (op, 0)) == PRE_DEC
+		    || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))"))
+
+;; Return 1 if the operand is a MEM with an update-indexed-form address. Note
+;; that PRE_INC/PRE_DEC will always be non-indexed (i.e. non X-form) since the
+;; increment is based on the mode size and will therefor always be a const.
+(define_special_predicate "update_indexed_address_mem"
+  (match_test "(MEM_P (op)
+		&& GET_CODE (XEXP (op, 0)) == PRE_MODIFY
+		&& indexed_address (XEXP (XEXP (op, 0), 1), mode))"))
+
+;; Used for the destination of the fix_truncdfsi2 expander.
+;; If stfiwx will be used, the result goes to memory; otherwise,
+;; we're going to emit a store and a load of a subreg, so the dest is a
+;; register.
+(define_predicate "fix_trunc_dest_operand"
+  (if_then_else (match_test "! TARGET_E500_DOUBLE && TARGET_PPC_GFXOPT")
+   (match_operand 0 "memory_operand")
+   (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if the operand is either a non-special register or can be used
+;; as the operand of a `mode' add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op)
+		 || satisfies_constraint_L (op)")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if OP is a constant but not a valid add_operand.
+(define_predicate "non_add_cint_operand"
+  (and (match_code "const_int")
+       (match_test "!satisfies_constraint_I (op)
+		    && !satisfies_constraint_L (op)")))
+
+;; Return 1 if the operand is a constant that can be used as the operand
+;; of an OR or XOR.
+(define_predicate "logical_const_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT opl;
+
+  opl = INTVAL (op) & GET_MODE_MASK (mode);
+
+  return ((opl & ~ (unsigned HOST_WIDE_INT) 0xffff) == 0
+	  || (opl & ~ (unsigned HOST_WIDE_INT) 0xffff0000) == 0);
+})
+
+;; Return 1 if the operand is a non-special register or a constant that
+;; can be used as the operand of an OR or XOR.
+(define_predicate "logical_operand"
+  (ior (match_operand 0 "gpc_reg_operand")
+       (match_operand 0 "logical_const_operand")))
+
+;; Return 1 if op is a constant that is not a logical operand, but could
+;; be split into one.
+(define_predicate "non_logical_cint_operand"
+  (and (match_code "const_int,const_double")
+       (and (not (match_operand 0 "logical_operand"))
+	    (match_operand 0 "reg_or_logical_cint_operand"))))
+
+;; Return 1 if op is a constant that can be encoded in a 32-bit mask,
+;; suitable for use with rlwinm (no more than two 1->0 or 0->1
+;; transitions).  Reject all ones and all zeros, since these should have
+;; been optimized away and confuse the making of MB and ME.
+(define_predicate "mask_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  if (TARGET_POWERPC64)
+    {
+      /* Fail if the mask is not 32-bit.  */
+      if (mode == DImode && (c & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0)
+	return 0;
+
+      /* Fail if the mask wraps around because the upper 32-bits of the
+	 mask will all be 1s, contrary to GCC's internal view.  */
+      if ((c & 0x80000001) == 0x80000001)
+	return 0;
+    }
+
+  /* We don't change the number of transitions by inverting,
+     so make sure we start with the LS bit zero.  */
+  if (c & 1)
+    c = ~c;
+
+  /* Reject all zeros or all ones.  */
+  if (c == 0)
+    return 0;
+
+  /* Find the first transition.  */
+  lsb = c & -c;
+
+  /* Invert to look for a second transition.  */
+  c = ~c;
+
+  /* Erase first transition.  */
+  c &= -lsb;
+
+  /* Find the second transition (if any).  */
+  lsb = c & -c;
+
+  /* Match if all the bits above are 1's (or c is zero).  */
+  return c == -lsb;
+})
+
+;; Return 1 for the PowerPC64 rlwinm corner case.
+(define_predicate "mask_operand_wrap"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  if ((c & 0x80000001) != 0x80000001)
+    return 0;
+
+  c = ~c;
+  if (c == 0)
+    return 0;
+
+  lsb = c & -c;
+  c = ~c;
+  c &= -lsb;
+  lsb = c & -c;
+  return c == -lsb;
+})
+
+;; Return 1 if the operand is a constant that is a PowerPC64 mask
+;; suitable for use with rldicl or rldicr (no more than one 1->0 or 0->1
+;; transition).  Reject all zeros, since zero should have been
+;; optimized away and confuses the making of MB and ME.
+(define_predicate "mask64_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  /* Reject all zeros.  */
+  if (c == 0)
+    return 0;
+
+  /* We don't change the number of transitions by inverting,
+     so make sure we start with the LS bit zero.  */
+  if (c & 1)
+    c = ~c;
+
+  /* Find the first transition.  */
+  lsb = c & -c;
+
+  /* Match if all the bits above are 1's (or c is zero).  */
+  return c == -lsb;
+})
+
+;; Like mask64_operand, but allow up to three transitions.  This
+;; predicate is used by insn patterns that generate two rldicl or
+;; rldicr machine insns.
+(define_predicate "mask64_2_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  /* Disallow all zeros.  */
+  if (c == 0)
+    return 0;
+
+  /* We don't change the number of transitions by inverting,
+     so make sure we start with the LS bit zero.  */
+  if (c & 1)
+    c = ~c;
+
+  /* Find the first transition.  */
+  lsb = c & -c;
+
+  /* Invert to look for a second transition.  */
+  c = ~c;
+
+  /* Erase first transition.  */
+  c &= -lsb;
+
+  /* Find the second transition.  */
+  lsb = c & -c;
+
+  /* Invert to look for a third transition.  */
+  c = ~c;
+
+  /* Erase second transition.  */
+  c &= -lsb;
+
+  /* Find the third transition (if any).  */
+  lsb = c & -c;
+
+  /* Match if all the bits above are 1's (or c is zero).  */
+  return c == -lsb;
+})
+
+;; Like and_operand, but also match constants that can be implemented
+;; with two rldicl or rldicr insns.
+(define_predicate "and64_2_operand"
+  (ior (match_operand 0 "mask64_2_operand")
+       (if_then_else (match_test "fixed_regs[CR0_REGNO]")
+	 (match_operand 0 "gpc_reg_operand")
+	 (match_operand 0 "logical_operand"))))
+
+;; Return 1 if the operand is either a non-special register or a
+;; constant that can be used as the operand of a logical AND.
+(define_predicate "and_operand"
+  (ior (match_operand 0 "mask_operand")
+       (ior (and (match_test "TARGET_POWERPC64 && mode == DImode")
+		 (match_operand 0 "mask64_operand"))
+            (if_then_else (match_test "fixed_regs[CR0_REGNO]")
+	      (match_operand 0 "gpc_reg_operand")
+	      (match_operand 0 "logical_operand")))))
+
+;; Return 1 if the operand is either a logical operand or a short cint operand.
+(define_predicate "scc_eq_operand"
+  (ior (match_operand 0 "logical_operand")
+       (match_operand 0 "short_cint_operand")))
+
+;; Return 1 if the operand is a general non-special register or memory operand.
+(define_predicate "reg_or_mem_operand"
+     (ior (match_operand 0 "memory_operand")
+	  (ior (and (match_code "mem")
+		    (match_test "macho_lo_sum_memory_operand (op, mode)"))
+	       (ior (match_operand 0 "volatile_mem_operand")
+		    (match_operand 0 "gpc_reg_operand")))))
+
+;; Return 1 if the operand is either an easy FP constant or memory or reg.
+(define_predicate "reg_or_none500mem_operand"
+  (if_then_else (match_code "mem")
+     (and (match_test "!TARGET_E500_DOUBLE")
+	  (ior (match_operand 0 "memory_operand")
+	       (ior (match_test "macho_lo_sum_memory_operand (op, mode)")
+		    (match_operand 0 "volatile_mem_operand"))))
+     (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if the operand is CONST_DOUBLE 0, register or memory operand.
+(define_predicate "zero_reg_mem_operand"
+  (ior (match_operand 0 "zero_fp_constant")
+       (match_operand 0 "reg_or_mem_operand")))
+
+;; Return 1 if the operand is a general register or memory operand without
+;; pre_inc or pre_dec or pre_modify, which produces invalid form of PowerPC
+;; lwa instruction.
+(define_predicate "lwa_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx inner, addr, offset;
+
+  inner = op;
+  if (reload_completed && GET_CODE (inner) == SUBREG)
+    inner = SUBREG_REG (inner);
+
+  if (gpc_reg_operand (inner, mode))
+    return true;
+  if (!memory_operand (inner, mode))
+    return false;
+  addr = XEXP (inner, 0);
+  if (GET_CODE (addr) == PRE_INC
+      || GET_CODE (addr) == PRE_DEC
+      || (GET_CODE (addr) == PRE_MODIFY
+	  && !legitimate_indexed_address_p (XEXP (addr, 1), 0)))
+    return false;
+  if (GET_CODE (addr) == LO_SUM
+      && GET_CODE (XEXP (addr, 0)) == REG
+      && GET_CODE (XEXP (addr, 1)) == CONST)
+    addr = XEXP (XEXP (addr, 1), 0);
+  if (GET_CODE (addr) != PLUS)
+    return true;
+  offset = XEXP (addr, 1);
+  if (GET_CODE (offset) != CONST_INT)
+    return true;
+  return INTVAL (offset) % 4 == 0;
+})
+
+;; Return 1 if the operand, used inside a MEM, is a SYMBOL_REF.
+(define_predicate "symbol_ref_operand"
+  (and (match_code "symbol_ref")
+       (match_test "(mode == VOIDmode || GET_MODE (op) == mode)
+		    && (DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))")))
+
+;; Return 1 if op is an operand that can be loaded via the GOT.
+;; or non-special register register field no cr0
+(define_predicate "got_operand"
+  (match_code "symbol_ref,const,label_ref"))
+
+;; Return 1 if op is a simple reference that can be loaded via the GOT,
+;; excluding labels involving addition.
+(define_predicate "got_no_const_operand"
+  (match_code "symbol_ref,label_ref"))
+
+;; Return 1 if op is a SYMBOL_REF for a TLS symbol.
+(define_predicate "rs6000_tls_symbol_ref"
+  (and (match_code "symbol_ref")
+       (match_test "RS6000_SYMBOL_REF_TLS_P (op)")))
+
+;; Return 1 if the operand, used inside a MEM, is a valid first argument
+;; to CALL.  This is a SYMBOL_REF, a pseudo-register, LR or CTR.
+(define_predicate "call_operand"
+  (if_then_else (match_code "reg")
+     (match_test "REGNO (op) == LR_REGNO
+		  || REGNO (op) == CTR_REGNO
+		  || REGNO (op) >= FIRST_PSEUDO_REGISTER")
+     (match_code "symbol_ref")))
+
+;; Return 1 if the operand is a SYMBOL_REF for a function known to be in
+;; this file.
+(define_predicate "current_file_function_operand"
+  (and (match_code "symbol_ref")
+       (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
+		    && ((SYMBOL_REF_LOCAL_P (op)
+			 && ((DEFAULT_ABI != ABI_AIX
+			      && DEFAULT_ABI != ABI_ELFv2)
+			     || !SYMBOL_REF_EXTERNAL_P (op)))
+		        || (op == XEXP (DECL_RTL (current_function_decl),
+						  0)))")))
+
+;; Return 1 if this operand is a valid input for a move insn.
+(define_predicate "input_operand"
+  (match_code "symbol_ref,const,reg,subreg,mem,
+	       const_double,const_vector,const_int")
+{
+  /* Memory is always valid.  */
+  if (memory_operand (op, mode))
+    return 1;
+
+  /* For floating-point, easy constants are valid.  */
+  if (SCALAR_FLOAT_MODE_P (mode)
+      && easy_fp_constant (op, mode))
+    return 1;
+
+  /* Allow any integer constant.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && (GET_CODE (op) == CONST_INT
+	  || GET_CODE (op) == CONST_DOUBLE))
+    return 1;
+
+  /* Allow easy vector constants.  */
+  if (GET_CODE (op) == CONST_VECTOR
+      && easy_vector_constant (op, mode))
+    return 1;
+
+  /* Do not allow invalid E500 subregs.  */
+  if ((TARGET_E500_DOUBLE || TARGET_SPE)
+      && GET_CODE (op) == SUBREG
+      && invalid_e500_subreg (op, mode))
+    return 0;
+
+  /* For floating-point or multi-word mode, the only remaining valid type
+     is a register.  */
+  if (SCALAR_FLOAT_MODE_P (mode)
+      || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return register_operand (op, mode);
+
+  /* The only cases left are integral modes one word or smaller (we
+     do not get called for MODE_CC values).  These can be in any
+     register.  */
+  if (register_operand (op, mode))
+    return 1;
+
+  /* V.4 allows SYMBOL_REFs and CONSTs that are in the small data region
+     to be valid.  */
+  if (DEFAULT_ABI == ABI_V4
+      && (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST)
+      && small_data_operand (op, Pmode))
+    return 1;
+
+  return 0;
+})
+
+;; Return 1 if this operand is a valid input for a vsx_splat insn.
+(define_predicate "splat_input_operand"
+  (match_code "symbol_ref,const,reg,subreg,mem,
+	       const_double,const_vector,const_int")
+{
+  if (MEM_P (op))
+    {
+      if (! volatile_ok && MEM_VOLATILE_P (op))
+	return 0;
+      if (mode == DFmode)
+	mode = V2DFmode;
+      else if (mode == DImode)
+	mode = V2DImode;
+      else
+	gcc_unreachable ();
+      return memory_address_addr_space_p (mode, XEXP (op, 0),
+					  MEM_ADDR_SPACE (op));
+    }
+  return input_operand (op, mode);
+})
+
+;; Return true if OP is a non-immediate operand and not an invalid
+;; SUBREG operation on the e500.
+(define_predicate "rs6000_nonimmediate_operand"
+  (match_code "reg,subreg,mem")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE)
+      && GET_CODE (op) == SUBREG
+      && invalid_e500_subreg (op, mode))
+    return 0;
+
+  return nonimmediate_operand (op, mode);
+})
+
+;; Return true if operand is boolean operator.
+(define_predicate "boolean_operator"
+  (match_code "and,ior,xor"))
+
+;; Return true if operand is OR-form of boolean operator.
+(define_predicate "boolean_or_operator"
+  (match_code "ior,xor"))
+
+;; Return true if operand is an equality operator.
+(define_special_predicate "equality_operator"
+  (match_code "eq,ne"))
+
+;; Return true if operand is MIN or MAX operator.
+(define_predicate "min_max_operator"
+  (match_code "smin,smax,umin,umax"))
+
+;; Return 1 if OP is a comparison operation that is valid for a branch
+;; instruction.  We check the opcode against the mode of the CC value.
+;; validate_condition_mode is an assertion.
+(define_predicate "branch_comparison_operator"
+   (and (match_operand 0 "comparison_operator")
+	(and (match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
+	     (match_test "validate_condition_mode (GET_CODE (op),
+						   GET_MODE (XEXP (op, 0))),
+			  1"))))
+
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; If we're assuming that FP operations cannot generate user-visible traps,
+;; then on e500 we can use the ordered-signaling instructions to implement
+;; the unordered-quiet FP comparison predicates modulo a reversal.
+(define_predicate "rs6000_cbranch_operator"
+  (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS")
+		(if_then_else (match_test "flag_trapping_math")
+			      (match_operand 0 "ordered_comparison_operator")
+			      (ior (match_operand 0 "ordered_comparison_operator")
+				   (match_code ("unlt,unle,ungt,unge"))))
+		(match_operand 0 "comparison_operator")))
+
+;; Return 1 if OP is a comparison operation that is valid for an SCC insn --
+;; it must be a positive comparison.
+(define_predicate "scc_comparison_operator"
+  (and (match_operand 0 "branch_comparison_operator")
+       (match_code "eq,lt,gt,ltu,gtu,unordered")))
+
+;; Return 1 if OP is a comparison operation whose inverse would be valid for
+;; an SCC insn.
+(define_predicate "scc_rev_comparison_operator"
+  (and (match_operand 0 "branch_comparison_operator")
+       (match_code "ne,le,ge,leu,geu,ordered")))
+
+;; Return 1 if OP is a comparison operation that is valid for a branch
+;; insn, which is true if the corresponding bit in the CC register is set.
+(define_predicate "branch_positive_comparison_operator"
+  (and (match_operand 0 "branch_comparison_operator")
+       (match_code "eq,lt,gt,ltu,gtu,unordered")))
+
+;; Return 1 if OP is a load multiple operation, known to be a PARALLEL.
+(define_predicate "load_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx src_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt)) != dest_regno + i
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is a store multiple operation, known to be a PARALLEL.
+;; The second vector element is a CLOBBER.
+(define_predicate "store_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0) - 1;
+  unsigned int src_regno;
+  rtx dest_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i + 1);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != src_regno + i
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	  || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is valid for a save_world call in prologue, known to be
+;; a PARLLEL.
+(define_predicate "save_world_operation"
+  (match_code "parallel")
+{
+  int index;
+  int i;
+  rtx elt;
+  int count = XVECLEN (op, 0);
+
+  if (count != 54)
+    return 0;
+
+  index = 0;
+  if (GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE)
+    return 0;
+
+  for (i=1; i <= 18; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || ! memory_operand (SET_DEST (elt), DFmode)
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != DFmode)
+	return 0;
+    }
+
+  for (i=1; i <= 12; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != V4SImode)
+	return 0;
+    }
+
+  for (i=1; i <= 19; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || ! memory_operand (SET_DEST (elt), Pmode)
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != Pmode)
+	return 0;
+    }
+
+  elt = XVECEXP (op, 0, index++);
+  if (GET_CODE (elt) != SET
+      || GET_CODE (SET_DEST (elt)) != MEM
+      || ! memory_operand (SET_DEST (elt), Pmode)
+      || GET_CODE (SET_SRC (elt)) != REG
+      || REGNO (SET_SRC (elt)) != CR2_REGNO
+      || GET_MODE (SET_SRC (elt)) != Pmode)
+    return 0;
+
+  if (GET_CODE (XVECEXP (op, 0, index++)) != SET
+      || GET_CODE (XVECEXP (op, 0, index++)) != SET)
+    return 0;
+  return 1;
+})
+
+;; Return 1 if OP is valid for a restore_world call in epilogue, known to be
+;; a PARLLEL.
+(define_predicate "restore_world_operation"
+  (match_code "parallel")
+{
+  int index;
+  int i;
+  rtx elt;
+  int count = XVECLEN (op, 0);
+
+  if (count != 59)
+    return 0;
+
+  index = 0;
+  if (GET_CODE (XVECEXP (op, 0, index++)) != RETURN
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER)
+    return 0;
+
+  elt = XVECEXP (op, 0, index++);
+  if (GET_CODE (elt) != SET
+      || GET_CODE (SET_SRC (elt)) != MEM
+      || ! memory_operand (SET_SRC (elt), Pmode)
+      || GET_CODE (SET_DEST (elt)) != REG
+      || REGNO (SET_DEST (elt)) != CR2_REGNO
+      || GET_MODE (SET_DEST (elt)) != Pmode)
+    return 0;
+
+  for (i=1; i <= 19; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || ! memory_operand (SET_SRC (elt), Pmode)
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != Pmode)
+	return 0;
+    }
+
+  for (i=1; i <= 12; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != V4SImode)
+	return 0;
+    }
+
+  for (i=1; i <= 18; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || ! memory_operand (SET_SRC (elt), DFmode)
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != DFmode)
+	return 0;
+    }
+
+  if (GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE)
+    return 0;
+  return 1;
+})
+
+;; Return 1 if OP is valid for a vrsave call, known to be a PARALLEL.
+(define_predicate "vrsave_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno, src_regno;
+  int i;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC_VOLATILE
+      || XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPECV_SET_VRSAVE)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_regno  = REGNO (XVECEXP (SET_SRC (XVECEXP (op, 0, 0)), 0, 1));
+
+  if (dest_regno != VRSAVE_REGNO || src_regno != VRSAVE_REGNO)
+    return 0;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != CLOBBER
+	  && GET_CODE (elt) != SET)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is valid for mfcr insn, known to be a PARALLEL.
+(define_predicate "mfcr_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count < 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC
+      || XVECLEN (SET_SRC (XVECEXP (op, 0, 0)), 0) != 2)
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+      rtx unspec;
+      int maskval;
+      rtx src_reg;
+
+      src_reg = XVECEXP (SET_SRC (exp), 0, 0);
+
+      if (GET_CODE (src_reg) != REG
+	  || GET_MODE (src_reg) != CCmode
+	  || ! CR_REGNO_P (REGNO (src_reg)))
+	return 0;
+
+      if (GET_CODE (exp) != SET
+	  || GET_CODE (SET_DEST (exp)) != REG
+	  || GET_MODE (SET_DEST (exp)) != SImode
+	  || ! INT_REGNO_P (REGNO (SET_DEST (exp))))
+	return 0;
+      unspec = SET_SRC (exp);
+      maskval = 1 << (MAX_CR_REGNO - REGNO (src_reg));
+
+      if (GET_CODE (unspec) != UNSPEC
+	  || XINT (unspec, 1) != UNSPEC_MOVESI_FROM_CR
+	  || XVECLEN (unspec, 0) != 2
+	  || XVECEXP (unspec, 0, 0) != src_reg
+	  || GET_CODE (XVECEXP (unspec, 0, 1)) != CONST_INT
+	  || INTVAL (XVECEXP (unspec, 0, 1)) != maskval)
+	return 0;
+    }
+  return 1;
+})
+
+;; Return 1 if OP is valid for mtcrf insn, known to be a PARALLEL.
+(define_predicate "mtcrf_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+  rtx src_reg;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count < 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC
+      || XVECLEN (SET_SRC (XVECEXP (op, 0, 0)), 0) != 2)
+    return 0;
+  src_reg = XVECEXP (SET_SRC (XVECEXP (op, 0, 0)), 0, 0);
+
+  if (GET_CODE (src_reg) != REG
+      || GET_MODE (src_reg) != SImode
+      || ! INT_REGNO_P (REGNO (src_reg)))
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+      rtx unspec;
+      int maskval;
+
+      if (GET_CODE (exp) != SET
+	  || GET_CODE (SET_DEST (exp)) != REG
+	  || GET_MODE (SET_DEST (exp)) != CCmode
+	  || ! CR_REGNO_P (REGNO (SET_DEST (exp))))
+	return 0;
+      unspec = SET_SRC (exp);
+      maskval = 1 << (MAX_CR_REGNO - REGNO (SET_DEST (exp)));
+
+      if (GET_CODE (unspec) != UNSPEC
+	  || XINT (unspec, 1) != UNSPEC_MOVESI_TO_CR
+	  || XVECLEN (unspec, 0) != 2
+	  || XVECEXP (unspec, 0, 0) != src_reg
+	  || GET_CODE (XVECEXP (unspec, 0, 1)) != CONST_INT
+	  || INTVAL (XVECEXP (unspec, 0, 1)) != maskval)
+	return 0;
+    }
+  return 1;
+})
+
+;; Return 1 if OP is valid for crsave insn, known to be a PARALLEL.
+(define_predicate "crsave_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+
+      if (GET_CODE (exp) != USE
+	  || GET_CODE (XEXP (exp, 0)) != REG
+	  || GET_MODE (XEXP (exp, 0)) != CCmode
+	  || ! CR_REGNO_P (REGNO (XEXP (exp, 0))))
+	return 0;
+    }
+  return 1;
+})
+
+;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL.
+(define_predicate "lmw_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx src_addr;
+  unsigned int base_regno;
+  HOST_WIDE_INT offset;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+
+  if (dest_regno > 31
+      || count != 32 - (int) dest_regno)
+    return 0;
+
+  if (legitimate_indirect_address_p (src_addr, 0))
+    {
+      offset = 0;
+      base_regno = REGNO (src_addr);
+      if (base_regno == 0)
+	return 0;
+    }
+  else if (rs6000_legitimate_offset_address_p (SImode, src_addr, false, false))
+    {
+      offset = INTVAL (XEXP (src_addr, 1));
+      base_regno = REGNO (XEXP (src_addr, 0));
+    }
+  else
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      rtx newaddr;
+      rtx addr_reg;
+      HOST_WIDE_INT newoffset;
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt)) != dest_regno + i
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_MODE (SET_SRC (elt)) != SImode)
+	return 0;
+      newaddr = XEXP (SET_SRC (elt), 0);
+      if (legitimate_indirect_address_p (newaddr, 0))
+	{
+	  newoffset = 0;
+	  addr_reg = newaddr;
+	}
+      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, false, false))
+	{
+	  addr_reg = XEXP (newaddr, 0);
+	  newoffset = INTVAL (XEXP (newaddr, 1));
+	}
+      else
+	return 0;
+      if (REGNO (addr_reg) != base_regno
+	  || newoffset != offset + 4 * i)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is valid for stmw insn, known to be a PARALLEL.
+(define_predicate "stmw_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx dest_addr;
+  unsigned int base_regno;
+  HOST_WIDE_INT offset;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+
+  if (src_regno > 31
+      || count != 32 - (int) src_regno)
+    return 0;
+
+  if (legitimate_indirect_address_p (dest_addr, 0))
+    {
+      offset = 0;
+      base_regno = REGNO (dest_addr);
+      if (base_regno == 0)
+	return 0;
+    }
+  else if (rs6000_legitimate_offset_address_p (SImode, dest_addr, false, false))
+    {
+      offset = INTVAL (XEXP (dest_addr, 1));
+      base_regno = REGNO (XEXP (dest_addr, 0));
+    }
+  else
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      rtx newaddr;
+      rtx addr_reg;
+      HOST_WIDE_INT newoffset;
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != src_regno + i
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != SImode)
+	return 0;
+      newaddr = XEXP (SET_DEST (elt), 0);
+      if (legitimate_indirect_address_p (newaddr, 0))
+	{
+	  newoffset = 0;
+	  addr_reg = newaddr;
+	}
+      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, false, false))
+	{
+	  addr_reg = XEXP (newaddr, 0);
+	  newoffset = INTVAL (XEXP (newaddr, 1));
+	}
+      else
+	return 0;
+      if (REGNO (addr_reg) != base_regno
+	  || newoffset != offset + 4 * i)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is a stack tie operand.
+(define_predicate "tie_operand"
+  (match_code "parallel")
+{
+  return (GET_CODE (XVECEXP (op, 0, 0)) == SET
+	  && GET_CODE (XEXP (XVECEXP (op, 0, 0), 0)) == MEM
+	  && GET_MODE (XEXP (XVECEXP (op, 0, 0), 0)) == BLKmode
+	  && XEXP (XVECEXP (op, 0, 0), 1) == const0_rtx);
+})
+
+;; Match a small code model toc reference (or medium and large
+;; model toc references before reload).
+(define_predicate "small_toc_ref"
+  (match_code "unspec,plus")
+{
+  if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), mode))
+    op = XEXP (op, 0);
+
+  return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
+})
+
+;; Match the first insn (addis) in fusing the combination of addis and loads to
+;; GPR registers on power8.
+(define_predicate "fusion_gpr_addis"
+  (match_code "const_int,high,plus")
+{
+  HOST_WIDE_INT value;
+  rtx int_const;
+
+  if (GET_CODE (op) == HIGH)
+    return 1;
+
+  if (CONST_INT_P (op))
+    int_const = op;
+
+  else if (GET_CODE (op) == PLUS
+	   && base_reg_operand (XEXP (op, 0), Pmode)
+	   && CONST_INT_P (XEXP (op, 1)))
+    int_const = XEXP (op, 1);
+
+  else
+    return 0;
+
+  /* Power8 currently will only do the fusion if the top 11 bits of the addis
+     value are all 1's or 0's.  */
+  value = INTVAL (int_const);
+  if ((value & (HOST_WIDE_INT)0xffff) != 0)
+    return 0;
+
+  if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
+    return 0;
+
+  return (IN_RANGE (value >> 16, -32, 31));
+})
+
+;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
+;; and loads to GPR registers on power8.
+(define_predicate "fusion_gpr_mem_load"
+  (match_code "mem,sign_extend,zero_extend")
+{
+  rtx addr;
+
+  /* Handle sign/zero extend.  */
+  if (GET_CODE (op) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
+    {
+      op = XEXP (op, 0);
+      mode = GET_MODE (op);
+    }
+
+  if (!MEM_P (op))
+    return 0;
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      break;
+
+    case DImode:
+      if (!TARGET_POWERPC64)
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      return (base_reg_operand (base, GET_MODE (base))
+	      && satisfies_constraint_I (offset));
+    }
+
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      if (!base_reg_operand (base, GET_MODE (base)))
+	return 0;
+
+      else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+	return small_toc_ref (offset, GET_MODE (offset));
+
+      else if (TARGET_ELF && !TARGET_POWERPC64)
+	return CONSTANT_P (offset);
+    }
+
+  return 0;
+})
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
new file mode 100644
index 000000000..9226035a3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
@@ -0,0 +1,1845 @@
+/* Builtin functions for rs6000/powerpc.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before including this file, some macros must be defined:
+   RS6000_BUILTIN_1 -- 1 arg builtins
+   RS6000_BUILTIN_2 -- 2 arg builtins
+   RS6000_BUILTIN_3 -- 3 arg builtins
+   RS6000_BUILTIN_A -- ABS builtins
+   RS6000_BUILTIN_D -- DST builtins
+   RS6000_BUILTIN_E -- SPE EVSEL builtins.
+   RS6000_BUILTIN_H -- HTM builtins
+   RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
+   RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
+   RS6000_BUILTIN_S -- SPE predicate builtins
+   RS6000_BUILTIN_X -- special builtins
+
+   Each of the above macros takes 4 arguments:
+	ENUM	Enumeration name
+	NAME	String literal for the name
+	MASK	Mask of bits that indicate which options enables the builtin
+	ATTR	builtin attribute information.
+	ICODE	Insn code of the function that implents the builtin.  */
+
+#ifndef RS6000_BUILTIN_1
+  #error "RS6000_BUILTIN_1 is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_2
+  #error "RS6000_BUILTIN_2 is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_3
+  #error "RS6000_BUILTIN_3 is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_A
+  #error "RS6000_BUILTIN_A is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_D
+  #error "RS6000_BUILTIN_D is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_E
+  #error "RS6000_BUILTIN_E is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_H
+  #error "RS6000_BUILTIN_H is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_P
+  #error "RS6000_BUILTIN_P is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_Q
+  #error "RS6000_BUILTIN_Q is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_S
+  #error "RS6000_BUILTIN_S is not defined."
+#endif
+
+#ifndef RS6000_BUILTIN_X
+  #error "RS6000_BUILTIN_X is not defined."
+#endif
+
+#ifndef BU_AV_1
+/* Define convenience macros using token pasting to allow fitting everything in
+   one line.  */
+
+/* Altivec convenience macros.  */
+#define BU_ALTIVEC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_ALTIVEC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_ALTIVEC_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_ALTIVEC_A(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_A (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_ABS),					\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_ALTIVEC_D(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_D (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_DST),					\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_ALTIVEC_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_ALTIVEC_X(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_X (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_C(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_X (ALTIVEC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    (RS6000_BTM_ALTIVEC			/* MASK */	\
+		     | RS6000_BTM_CELL),				\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* Altivec overloaded builtin function macros.  */
+#define BU_ALTIVEC_OVERLOAD_1(ENUM, NAME)				\
+  RS6000_BUILTIN_1 (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_OVERLOAD_2(ENUM, NAME)				\
+  RS6000_BUILTIN_2 (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_OVERLOAD_3(ENUM, NAME)				\
+  RS6000_BUILTIN_3 (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_OVERLOAD_A(ENUM, NAME)				\
+  RS6000_BUILTIN_A (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_ABS),					\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_OVERLOAD_D(ENUM, NAME)				\
+  RS6000_BUILTIN_D (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_DST),					\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_OVERLOAD_P(ENUM, NAME)				\
+  RS6000_BUILTIN_P (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_ALTIVEC_OVERLOAD_X(ENUM, NAME)				\
+  RS6000_BUILTIN_X (ALTIVEC_BUILTIN_VEC_ ## ENUM,	/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_ALTIVEC,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* VSX convenience macros.  */
+#define BU_VSX_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_VSX_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_VSX_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_VSX_A(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_A (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_ABS),					\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_VSX_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_VSX_X(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_X (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* VSX overloaded builtin function macros.  */
+#define BU_VSX_OVERLOAD_1(ENUM, NAME)					\
+  RS6000_BUILTIN_1 (VSX_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_VSX_OVERLOAD_2(ENUM, NAME)					\
+  RS6000_BUILTIN_2 (VSX_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_VSX_OVERLOAD_3(ENUM, NAME)					\
+  RS6000_BUILTIN_3 (VSX_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* xxpermdi and xxsldwi are overloaded functions, but had __builtin_vsx names
+   instead of __builtin_vec.  */
+#define BU_VSX_OVERLOAD_3V(ENUM, NAME)					\
+  RS6000_BUILTIN_3 (VSX_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_VSX_OVERLOAD_X(ENUM, NAME)					\
+  RS6000_BUILTIN_X (VSX_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_VSX,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* ISA 2.07 (power8) vector convenience macros.  */
+/* For the instructions that are encoded as altivec instructions use
+   __builtin_altivec_ as the builtin name.  */
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
+   builtin name.  */
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_1(ENUM, NAME)					\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_2(ENUM, NAME)					\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_3(ENUM, NAME)					\
+  RS6000_BUILTIN_3 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* Crypto convenience macros.  */
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* HTM convenience macros.  */
+#define BU_HTM_0(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    RS6000_BTC_ ## ATTR,		/* ATTR */	\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_SPR0(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPR),					\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_SPR1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY					\
+		     | RS6000_BTC_SPR					\
+		     | RS6000_BTC_VOID),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* SPE convenience macros.  */
+#define BU_SPE_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_spe_" NAME,		/* NAME */	\
+		    RS6000_BTM_SPE,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_SPE_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_spe_" NAME,		/* NAME */	\
+		    RS6000_BTM_SPE,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_SPE_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_spe_" NAME,		/* NAME */	\
+		    RS6000_BTM_SPE,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_SPE_E(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_E (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_spe_" NAME,		/* NAME */	\
+		    RS6000_BTM_SPE,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_EVSEL),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_SPE_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_S (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_spe_" NAME,		/* NAME */	\
+		    RS6000_BTM_SPE,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_SPE_X(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_X (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_spe_" NAME,		/* NAME */	\
+		    RS6000_BTM_SPE,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* Paired floating point convenience macros.  */
+#define BU_PAIRED_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (PAIRED_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_paired_" NAME,		/* NAME */	\
+		    RS6000_BTM_PAIRED,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_PAIRED_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (PAIRED_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_paired_" NAME,		/* NAME */	\
+		    RS6000_BTM_PAIRED,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_PAIRED_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (PAIRED_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_paired_" NAME,		/* NAME */	\
+		    RS6000_BTM_PAIRED,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_PAIRED_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_Q (PAIRED_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_paired_" NAME,		/* NAME */	\
+		    RS6000_BTM_PAIRED,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_PAIRED_X(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_X (PAIRED_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_paired_" NAME,		/* NAME */	\
+		    RS6000_BTM_PAIRED,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_SPECIAL_X(ENUM, NAME, MASK, ATTR)				\
+  RS6000_BUILTIN_X (ENUM,				/* ENUM */	\
+		    NAME,				/* NAME */	\
+		    MASK,				/* MASK */	\
+		    (ATTR | RS6000_BTC_SPECIAL),	/* ATTR */	\
+		    CODE_FOR_nothing)			/* ICODE */
+#endif
+
+/* Insure 0 is not a legitimate index.  */
+BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC)
+
+/* 3 argument Altivec builtins.  */
+BU_ALTIVEC_3 (VMADDFP,        "vmaddfp",        FP,    	fmav4sf4)
+BU_ALTIVEC_3 (VMHADDSHS,      "vmhaddshs",      SAT,   	altivec_vmhaddshs)
+BU_ALTIVEC_3 (VMHRADDSHS,     "vmhraddshs",     SAT,   	altivec_vmhraddshs)
+BU_ALTIVEC_3 (VMLADDUHM,      "vmladduhm",      CONST, 	altivec_vmladduhm)
+BU_ALTIVEC_3 (VMSUMUBM,       "vmsumubm",       CONST, 	altivec_vmsumubm)
+BU_ALTIVEC_3 (VMSUMMBM,       "vmsummbm",       CONST, 	altivec_vmsummbm)
+BU_ALTIVEC_3 (VMSUMUHM,       "vmsumuhm",       CONST, 	altivec_vmsumuhm)
+BU_ALTIVEC_3 (VMSUMSHM,       "vmsumshm",       CONST, 	altivec_vmsumshm)
+BU_ALTIVEC_3 (VMSUMUHS,       "vmsumuhs",       SAT,   	altivec_vmsumuhs)
+BU_ALTIVEC_3 (VMSUMSHS,       "vmsumshs",       SAT,   	altivec_vmsumshs)
+BU_ALTIVEC_3 (VNMSUBFP,       "vnmsubfp",       FP,    	nfmsv4sf4)
+BU_ALTIVEC_3 (VPERM_1TI,      "vperm_1ti",      CONST, 	altivec_vperm_v1ti)
+BU_ALTIVEC_3 (VPERM_2DF,      "vperm_2df",      CONST, 	altivec_vperm_v2df)
+BU_ALTIVEC_3 (VPERM_2DI,      "vperm_2di",      CONST, 	altivec_vperm_v2di)
+BU_ALTIVEC_3 (VPERM_4SF,      "vperm_4sf",      CONST, 	altivec_vperm_v4sf)
+BU_ALTIVEC_3 (VPERM_4SI,      "vperm_4si",      CONST, 	altivec_vperm_v4si)
+BU_ALTIVEC_3 (VPERM_8HI,      "vperm_8hi",      CONST, 	altivec_vperm_v8hi)
+BU_ALTIVEC_3 (VPERM_16QI,     "vperm_16qi",     CONST, 	altivec_vperm_v16qi_uns)
+BU_ALTIVEC_3 (VPERM_1TI_UNS,  "vperm_1ti_uns",  CONST, 	altivec_vperm_v1ti_uns)
+BU_ALTIVEC_3 (VPERM_2DI_UNS,  "vperm_2di_uns",  CONST, 	altivec_vperm_v2di_uns)
+BU_ALTIVEC_3 (VPERM_4SI_UNS,  "vperm_4si_uns",  CONST, 	altivec_vperm_v4si_uns)
+BU_ALTIVEC_3 (VPERM_8HI_UNS,  "vperm_8hi_uns",  CONST, 	altivec_vperm_v8hi_uns)
+BU_ALTIVEC_3 (VPERM_16QI_UNS, "vperm_16qi_uns", CONST, 	altivec_vperm_v16qi_uns)
+BU_ALTIVEC_3 (VSEL_4SF,       "vsel_4sf",       CONST, 	vector_select_v4sf)
+BU_ALTIVEC_3 (VSEL_4SI,       "vsel_4si",       CONST, 	vector_select_v4si)
+BU_ALTIVEC_3 (VSEL_8HI,       "vsel_8hi",       CONST, 	vector_select_v8hi)
+BU_ALTIVEC_3 (VSEL_16QI,      "vsel_16qi",      CONST, 	vector_select_v16qi)
+BU_ALTIVEC_3 (VSEL_2DF,       "vsel_2df",       CONST, 	vector_select_v2df)
+BU_ALTIVEC_3 (VSEL_2DI,       "vsel_2di",       CONST, 	vector_select_v2di)
+BU_ALTIVEC_3 (VSEL_1TI,       "vsel_1ti",       CONST, 	vector_select_v1ti)
+BU_ALTIVEC_3 (VSEL_4SI_UNS,   "vsel_4si_uns",   CONST, 	vector_select_v4si_uns)
+BU_ALTIVEC_3 (VSEL_8HI_UNS,   "vsel_8hi_uns",   CONST, 	vector_select_v8hi_uns)
+BU_ALTIVEC_3 (VSEL_16QI_UNS,  "vsel_16qi_uns",  CONST, 	vector_select_v16qi_uns)
+BU_ALTIVEC_3 (VSEL_2DI_UNS,   "vsel_2di_uns",   CONST, 	vector_select_v2di_uns)
+BU_ALTIVEC_3 (VSEL_1TI_UNS,   "vsel_1ti_uns",   CONST, 	vector_select_v1ti_uns)
+BU_ALTIVEC_3 (VSLDOI_16QI,    "vsldoi_16qi",    CONST, 	altivec_vsldoi_v16qi)
+BU_ALTIVEC_3 (VSLDOI_8HI,     "vsldoi_8hi",     CONST, 	altivec_vsldoi_v8hi)
+BU_ALTIVEC_3 (VSLDOI_4SI,     "vsldoi_4si",     CONST, 	altivec_vsldoi_v4si)
+BU_ALTIVEC_3 (VSLDOI_4SF,     "vsldoi_4sf",     CONST, 	altivec_vsldoi_v4sf)
+
+/* Altivec DST builtins.  */
+BU_ALTIVEC_D (DST,	      "dst",		MISC,  	altivec_dst)
+BU_ALTIVEC_D (DSTT,	      "dstt",		MISC,  	altivec_dstt)
+BU_ALTIVEC_D (DSTST,	      "dstst",		MISC,  	altivec_dstst)
+BU_ALTIVEC_D (DSTSTT,	      "dststt",		MISC,  	altivec_dststt)
+
+/* Altivec 2 argument builtin functions.  */
+BU_ALTIVEC_2 (VADDUBM,        "vaddubm",	CONST,	addv16qi3)
+BU_ALTIVEC_2 (VADDUHM,	      "vadduhm",	CONST,	addv8hi3)
+BU_ALTIVEC_2 (VADDUWM,	      "vadduwm",	CONST,	addv4si3)
+BU_ALTIVEC_2 (VADDFP,	      "vaddfp",		CONST,	addv4sf3)
+BU_ALTIVEC_2 (VADDCUW,	      "vaddcuw",	CONST,	altivec_vaddcuw)
+BU_ALTIVEC_2 (VADDUBS,	      "vaddubs",	CONST,	altivec_vaddubs)
+BU_ALTIVEC_2 (VADDSBS,	      "vaddsbs",	CONST,	altivec_vaddsbs)
+BU_ALTIVEC_2 (VADDUHS,	      "vadduhs",	CONST,	altivec_vadduhs)
+BU_ALTIVEC_2 (VADDSHS,	      "vaddshs",	CONST,	altivec_vaddshs)
+BU_ALTIVEC_2 (VADDUWS,	      "vadduws",	CONST,	altivec_vadduws)
+BU_ALTIVEC_2 (VADDSWS,	      "vaddsws",	CONST,	altivec_vaddsws)
+BU_ALTIVEC_2 (VAND,	      "vand",		CONST,	andv4si3)
+BU_ALTIVEC_2 (VANDC,	      "vandc",		CONST,	andcv4si3)
+BU_ALTIVEC_2 (VAVGUB,	      "vavgub",		CONST,	altivec_vavgub)
+BU_ALTIVEC_2 (VAVGSB,	      "vavgsb",		CONST,	altivec_vavgsb)
+BU_ALTIVEC_2 (VAVGUH,	      "vavguh",		CONST,	altivec_vavguh)
+BU_ALTIVEC_2 (VAVGSH,	      "vavgsh",		CONST,	altivec_vavgsh)
+BU_ALTIVEC_2 (VAVGUW,	      "vavguw",		CONST,	altivec_vavguw)
+BU_ALTIVEC_2 (VAVGSW,	      "vavgsw",		CONST,	altivec_vavgsw)
+BU_ALTIVEC_2 (VCFUX,	      "vcfux",		CONST,	altivec_vcfux)
+BU_ALTIVEC_2 (VCFSX,	      "vcfsx",		CONST,	altivec_vcfsx)
+BU_ALTIVEC_2 (VCMPBFP,	      "vcmpbfp",	CONST,	altivec_vcmpbfp)
+BU_ALTIVEC_2 (VCMPEQUB,	      "vcmpequb",	CONST,	vector_eqv16qi)
+BU_ALTIVEC_2 (VCMPEQUH,	      "vcmpequh",	CONST,	vector_eqv8hi)
+BU_ALTIVEC_2 (VCMPEQUW,	      "vcmpequw",	CONST,	vector_eqv4si)
+BU_ALTIVEC_2 (VCMPEQFP,	      "vcmpeqfp",	CONST,	vector_eqv4sf)
+BU_ALTIVEC_2 (VCMPGEFP,	      "vcmpgefp",	CONST,	vector_gev4sf)
+BU_ALTIVEC_2 (VCMPGTUB,	      "vcmpgtub",	CONST,	vector_gtuv16qi)
+BU_ALTIVEC_2 (VCMPGTSB,	      "vcmpgtsb",	CONST,	vector_gtv16qi)
+BU_ALTIVEC_2 (VCMPGTUH,	      "vcmpgtuh",	CONST,	vector_gtuv8hi)
+BU_ALTIVEC_2 (VCMPGTSH,	      "vcmpgtsh",	CONST,	vector_gtv8hi)
+BU_ALTIVEC_2 (VCMPGTUW,	      "vcmpgtuw",	CONST,	vector_gtuv4si)
+BU_ALTIVEC_2 (VCMPGTSW,	      "vcmpgtsw",	CONST,	vector_gtv4si)
+BU_ALTIVEC_2 (VCMPGTFP,	      "vcmpgtfp",	CONST,	vector_gtv4sf)
+BU_ALTIVEC_2 (VCTSXS,	      "vctsxs",		CONST,	altivec_vctsxs)
+BU_ALTIVEC_2 (VCTUXS,	      "vctuxs",		CONST,	altivec_vctuxs)
+BU_ALTIVEC_2 (VMAXUB,	      "vmaxub",		CONST,	umaxv16qi3)
+BU_ALTIVEC_2 (VMAXSB,	      "vmaxsb",		CONST,	smaxv16qi3)
+BU_ALTIVEC_2 (VMAXUH,	      "vmaxuh",		CONST,	umaxv8hi3)
+BU_ALTIVEC_2 (VMAXSH,	      "vmaxsh",		CONST,	smaxv8hi3)
+BU_ALTIVEC_2 (VMAXUW,	      "vmaxuw",		CONST,	umaxv4si3)
+BU_ALTIVEC_2 (VMAXSW,	      "vmaxsw",		CONST,	smaxv4si3)
+BU_ALTIVEC_2 (VMAXFP,	      "vmaxfp",		CONST,	smaxv4sf3)
+BU_ALTIVEC_2 (VMRGHB,	      "vmrghb",		CONST,	altivec_vmrghb)
+BU_ALTIVEC_2 (VMRGHH,	      "vmrghh",		CONST,	altivec_vmrghh)
+BU_ALTIVEC_2 (VMRGHW,	      "vmrghw",		CONST,	altivec_vmrghw)
+BU_ALTIVEC_2 (VMRGLB,	      "vmrglb",		CONST,	altivec_vmrglb)
+BU_ALTIVEC_2 (VMRGLH,	      "vmrglh",		CONST,	altivec_vmrglh)
+BU_ALTIVEC_2 (VMRGLW,	      "vmrglw",		CONST,	altivec_vmrglw)
+BU_ALTIVEC_2 (VMINUB,	      "vminub",		CONST,	uminv16qi3)
+BU_ALTIVEC_2 (VMINSB,	      "vminsb",		CONST,	sminv16qi3)
+BU_ALTIVEC_2 (VMINUH,	      "vminuh",		CONST,	uminv8hi3)
+BU_ALTIVEC_2 (VMINSH,	      "vminsh",		CONST,	sminv8hi3)
+BU_ALTIVEC_2 (VMINUW,	      "vminuw",		CONST,	uminv4si3)
+BU_ALTIVEC_2 (VMINSW,	      "vminsw",		CONST,	sminv4si3)
+BU_ALTIVEC_2 (VMINFP,	      "vminfp",		CONST,	sminv4sf3)
+BU_ALTIVEC_2 (VMULEUB,	      "vmuleub",	CONST,	vec_widen_umult_even_v16qi)
+BU_ALTIVEC_2 (VMULEUB_UNS,    "vmuleub_uns",	CONST,	vec_widen_umult_even_v16qi)
+BU_ALTIVEC_2 (VMULESB,	      "vmulesb",	CONST,	vec_widen_smult_even_v16qi)
+BU_ALTIVEC_2 (VMULEUH,	      "vmuleuh",	CONST,	vec_widen_umult_even_v8hi)
+BU_ALTIVEC_2 (VMULEUH_UNS,    "vmuleuh_uns",	CONST,	vec_widen_umult_even_v8hi)
+BU_ALTIVEC_2 (VMULESH,	      "vmulesh",	CONST,	vec_widen_smult_even_v8hi)
+BU_ALTIVEC_2 (VMULOUB,	      "vmuloub",	CONST,	vec_widen_umult_odd_v16qi)
+BU_ALTIVEC_2 (VMULOUB_UNS,    "vmuloub_uns",	CONST,	vec_widen_umult_odd_v16qi)
+BU_ALTIVEC_2 (VMULOSB,	      "vmulosb",	CONST,	vec_widen_smult_odd_v16qi)
+BU_ALTIVEC_2 (VMULOUH,	      "vmulouh",	CONST,	vec_widen_umult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOUH_UNS,    "vmulouh_uns",	CONST,	vec_widen_umult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOSH,	      "vmulosh",	CONST,	vec_widen_smult_odd_v8hi)
+BU_ALTIVEC_2 (VNOR,	      "vnor",		CONST,	norv4si3)
+BU_ALTIVEC_2 (VOR,	      "vor",		CONST,	iorv4si3)
+BU_ALTIVEC_2 (VPKUHUM,	      "vpkuhum",	CONST,	altivec_vpkuhum)
+BU_ALTIVEC_2 (VPKUWUM,	      "vpkuwum",	CONST,	altivec_vpkuwum)
+BU_ALTIVEC_2 (VPKPX,	      "vpkpx",		CONST,	altivec_vpkpx)
+BU_ALTIVEC_2 (VPKSHSS,	      "vpkshss",	CONST,	altivec_vpkshss)
+BU_ALTIVEC_2 (VPKSWSS,	      "vpkswss",	CONST,	altivec_vpkswss)
+BU_ALTIVEC_2 (VPKUHUS,	      "vpkuhus",	CONST,	altivec_vpkuhus)
+BU_ALTIVEC_2 (VPKSHUS,	      "vpkshus",	CONST,	altivec_vpkshus)
+BU_ALTIVEC_2 (VPKUWUS,	      "vpkuwus",	CONST,	altivec_vpkuwus)
+BU_ALTIVEC_2 (VPKSWUS,	      "vpkswus",	CONST,	altivec_vpkswus)
+BU_ALTIVEC_2 (VRECIPFP,	      "vrecipdivfp",	CONST,	recipv4sf3)
+BU_ALTIVEC_2 (VRLB,	      "vrlb",		CONST,	vrotlv16qi3)
+BU_ALTIVEC_2 (VRLH,	      "vrlh",		CONST,	vrotlv8hi3)
+BU_ALTIVEC_2 (VRLW,	      "vrlw",		CONST,	vrotlv4si3)
+BU_ALTIVEC_2 (VSLB,	      "vslb",		CONST,	vashlv16qi3)
+BU_ALTIVEC_2 (VSLH,	      "vslh",		CONST,	vashlv8hi3)
+BU_ALTIVEC_2 (VSLW,	      "vslw",		CONST,	vashlv4si3)
+BU_ALTIVEC_2 (VSL,	      "vsl",		CONST,	altivec_vsl)
+BU_ALTIVEC_2 (VSLO,	      "vslo",		CONST,	altivec_vslo)
+BU_ALTIVEC_2 (VSPLTB,	      "vspltb",		CONST,	altivec_vspltb)
+BU_ALTIVEC_2 (VSPLTH,	      "vsplth",		CONST,	altivec_vsplth)
+BU_ALTIVEC_2 (VSPLTW,	      "vspltw",		CONST,	altivec_vspltw)
+BU_ALTIVEC_2 (VSRB,	      "vsrb",		CONST,	vlshrv16qi3)
+BU_ALTIVEC_2 (VSRH,	      "vsrh",		CONST,	vlshrv8hi3)
+BU_ALTIVEC_2 (VSRW,	      "vsrw",		CONST,	vlshrv4si3)
+BU_ALTIVEC_2 (VSRAB,	      "vsrab",		CONST,	vashrv16qi3)
+BU_ALTIVEC_2 (VSRAH,	      "vsrah",		CONST,	vashrv8hi3)
+BU_ALTIVEC_2 (VSRAW,	      "vsraw",		CONST,	vashrv4si3)
+BU_ALTIVEC_2 (VSR,	      "vsr",		CONST,	altivec_vsr)
+BU_ALTIVEC_2 (VSRO,	      "vsro",		CONST,	altivec_vsro)
+BU_ALTIVEC_2 (VSUBUBM,	      "vsububm",	CONST,	subv16qi3)
+BU_ALTIVEC_2 (VSUBUHM,	      "vsubuhm",	CONST,	subv8hi3)
+BU_ALTIVEC_2 (VSUBUWM,	      "vsubuwm",	CONST,	subv4si3)
+BU_ALTIVEC_2 (VSUBFP,	      "vsubfp",		CONST,	subv4sf3)
+BU_ALTIVEC_2 (VSUBCUW,	      "vsubcuw",	CONST,	altivec_vsubcuw)
+BU_ALTIVEC_2 (VSUBUBS,	      "vsububs",	CONST,	altivec_vsububs)
+BU_ALTIVEC_2 (VSUBSBS,	      "vsubsbs",	CONST,	altivec_vsubsbs)
+BU_ALTIVEC_2 (VSUBUHS,	      "vsubuhs",	CONST,	altivec_vsubuhs)
+BU_ALTIVEC_2 (VSUBSHS,	      "vsubshs",	CONST,	altivec_vsubshs)
+BU_ALTIVEC_2 (VSUBUWS,	      "vsubuws",	CONST,	altivec_vsubuws)
+BU_ALTIVEC_2 (VSUBSWS,	      "vsubsws",	CONST,	altivec_vsubsws)
+BU_ALTIVEC_2 (VSUM4UBS,	      "vsum4ubs",	CONST,	altivec_vsum4ubs)
+BU_ALTIVEC_2 (VSUM4SBS,	      "vsum4sbs",	CONST,	altivec_vsum4sbs)
+BU_ALTIVEC_2 (VSUM4SHS,	      "vsum4shs",	CONST,	altivec_vsum4shs)
+BU_ALTIVEC_2 (VSUM2SWS,	      "vsum2sws",	CONST,	altivec_vsum2sws)
+BU_ALTIVEC_2 (VSUMSWS,	      "vsumsws",	CONST,	altivec_vsumsws)
+BU_ALTIVEC_2 (VXOR,	      "vxor",		CONST,	xorv4si3)
+BU_ALTIVEC_2 (COPYSIGN_V4SF,  "copysignfp",	CONST,	vector_copysignv4sf3)
+
+/* Altivec ABS functions.  */
+BU_ALTIVEC_A (ABS_V4SI,	      "abs_v4si",	CONST,	absv4si2)
+BU_ALTIVEC_A (ABS_V8HI,	      "abs_v8hi",	CONST,	absv8hi2)
+BU_ALTIVEC_A (ABS_V4SF,	      "abs_v4sf",	CONST,	absv4sf2)
+BU_ALTIVEC_A (ABS_V16QI,      "abs_v16qi",	CONST,	absv16qi2)
+BU_ALTIVEC_A (ABSS_V4SI,      "abss_v4si",	SAT,	altivec_abss_v4si)
+BU_ALTIVEC_A (ABSS_V8HI,      "abss_v8hi",	SAT,	altivec_abss_v8hi)
+BU_ALTIVEC_A (ABSS_V16QI,     "abss_v16qi",	SAT,	altivec_abss_v16qi)
+
+/* 1 argument Altivec builtin functions.  */
+BU_ALTIVEC_1 (VEXPTEFP,	      "vexptefp",	FP,	altivec_vexptefp)
+BU_ALTIVEC_1 (VLOGEFP,	      "vlogefp",	FP,	altivec_vlogefp)
+BU_ALTIVEC_1 (VREFP,	      "vrefp",		FP,	rev4sf2)
+BU_ALTIVEC_1 (VRFIM,	      "vrfim",		FP,	vector_floorv4sf2)
+BU_ALTIVEC_1 (VRFIN,	      "vrfin",		FP,	altivec_vrfin)
+BU_ALTIVEC_1 (VRFIP,	      "vrfip",		FP,	vector_ceilv4sf2)
+BU_ALTIVEC_1 (VRFIZ,	      "vrfiz",		FP,	vector_btruncv4sf2)
+BU_ALTIVEC_1 (VRSQRTFP,	      "vrsqrtfp",	FP,	rsqrtv4sf2)
+BU_ALTIVEC_1 (VRSQRTEFP,      "vrsqrtefp",	FP,	rsqrtev4sf2)
+BU_ALTIVEC_1 (VSPLTISB,	      "vspltisb",	CONST,	altivec_vspltisb)
+BU_ALTIVEC_1 (VSPLTISH,	      "vspltish",	CONST,	altivec_vspltish)
+BU_ALTIVEC_1 (VSPLTISW,	      "vspltisw",	CONST,	altivec_vspltisw)
+BU_ALTIVEC_1 (VUPKHSB,	      "vupkhsb",	CONST,	altivec_vupkhsb)
+BU_ALTIVEC_1 (VUPKHPX,	      "vupkhpx",	CONST,	altivec_vupkhpx)
+BU_ALTIVEC_1 (VUPKHSH,	      "vupkhsh",	CONST,	altivec_vupkhsh)
+BU_ALTIVEC_1 (VUPKLSB,	      "vupklsb",	CONST,	altivec_vupklsb)
+BU_ALTIVEC_1 (VUPKLPX,	      "vupklpx",	CONST,	altivec_vupklpx)
+BU_ALTIVEC_1 (VUPKLSH,	      "vupklsh",	CONST,	altivec_vupklsh)
+
+BU_ALTIVEC_1 (FLOAT_V4SI_V4SF,    "float_sisf",	    FP,	floatv4siv4sf2)
+BU_ALTIVEC_1 (UNSFLOAT_V4SI_V4SF, "uns_float_sisf", FP, floatunsv4siv4sf2)
+BU_ALTIVEC_1 (FIX_V4SF_V4SI,      "fix_sfsi",       FP, fix_truncv4sfv4si2)
+BU_ALTIVEC_1 (FIXUNS_V4SF_V4SI,   "fixuns_sfsi",    FP, fixuns_truncv4sfv4si2)
+
+/* Altivec predicate functions.  */
+BU_ALTIVEC_P (VCMPBFP_P,      "vcmpbfp_p",	CONST,	altivec_vcmpbfp_p)
+BU_ALTIVEC_P (VCMPEQFP_P,     "vcmpeqfp_p",	CONST,	vector_eq_v4sf_p)
+BU_ALTIVEC_P (VCMPGEFP_P,     "vcmpgefp_p",	CONST,	vector_ge_v4sf_p)
+BU_ALTIVEC_P (VCMPGTFP_P,     "vcmpgtfp_p",	CONST,	vector_gt_v4sf_p)
+BU_ALTIVEC_P (VCMPEQUW_P,     "vcmpequw_p",	CONST,	vector_eq_v4si_p)
+BU_ALTIVEC_P (VCMPGTSW_P,     "vcmpgtsw_p",	CONST,	vector_gt_v4si_p)
+BU_ALTIVEC_P (VCMPGTUW_P,     "vcmpgtuw_p",	CONST,	vector_gtu_v4si_p)
+BU_ALTIVEC_P (VCMPEQUH_P,     "vcmpequh_p",	CONST,	vector_eq_v8hi_p)
+BU_ALTIVEC_P (VCMPGTSH_P,     "vcmpgtsh_p",	CONST,	vector_gt_v8hi_p)
+BU_ALTIVEC_P (VCMPGTUH_P,     "vcmpgtuh_p",	CONST,	vector_gtu_v8hi_p)
+BU_ALTIVEC_P (VCMPEQUB_P,     "vcmpequb_p",	CONST,	vector_eq_v16qi_p)
+BU_ALTIVEC_P (VCMPGTSB_P,     "vcmpgtsb_p",	CONST,	vector_gt_v16qi_p)
+BU_ALTIVEC_P (VCMPGTUB_P,     "vcmpgtub_p",	CONST,	vector_gtu_v16qi_p)
+
+/* AltiVec builtins that are handled as special cases.  */
+BU_ALTIVEC_X (ST_INTERNAL_4si,  "st_internal_4si",  MEM)
+BU_ALTIVEC_X (LD_INTERNAL_4si,  "ld_internal_4si",  MEM)
+BU_ALTIVEC_X (ST_INTERNAL_8hi,	"st_internal_8hi",  MEM)
+BU_ALTIVEC_X (LD_INTERNAL_8hi,	"ld_internal_8hi",  MEM)
+BU_ALTIVEC_X (ST_INTERNAL_16qi,	"st_internal_16qi", MEM)
+BU_ALTIVEC_X (LD_INTERNAL_16qi,	"ld_internal_16qi", MEM)
+BU_ALTIVEC_X (ST_INTERNAL_4sf,	"st_internal_16qi", MEM)
+BU_ALTIVEC_X (LD_INTERNAL_4sf,	"ld_internal_4sf",  MEM)
+BU_ALTIVEC_X (ST_INTERNAL_2df,	"st_internal_4sf",  MEM)
+BU_ALTIVEC_X (LD_INTERNAL_2df,	"ld_internal_2df",  MEM)
+BU_ALTIVEC_X (ST_INTERNAL_2di,	"st_internal_2di",  MEM)
+BU_ALTIVEC_X (LD_INTERNAL_2di,	"ld_internal_2di",  MEM)
+BU_ALTIVEC_X (ST_INTERNAL_1ti,	"st_internal_1ti",  MEM)
+BU_ALTIVEC_X (LD_INTERNAL_1ti,	"ld_internal_1ti",  MEM)
+BU_ALTIVEC_X (MTVSCR,		"mtvscr",	    MISC)
+BU_ALTIVEC_X (MFVSCR,		"mfvscr",	    MISC)
+BU_ALTIVEC_X (DSSALL,		"dssall",	    MISC)
+BU_ALTIVEC_X (DSS,		"dss",		    MISC)
+BU_ALTIVEC_X (LVSL,		"lvsl",		    MEM)
+BU_ALTIVEC_X (LVSR,		"lvsr",		    MEM)
+BU_ALTIVEC_X (LVEBX,		"lvebx",	    MEM)
+BU_ALTIVEC_X (LVEHX,		"lvehx",	    MEM)
+BU_ALTIVEC_X (LVEWX,		"lvewx",	    MEM)
+BU_ALTIVEC_X (LVXL,		"lvxl",		    MEM)
+BU_ALTIVEC_X (LVXL_V2DF,	"lvxl_v2df",	    MEM)
+BU_ALTIVEC_X (LVXL_V2DI,	"lvxl_v2di",	    MEM)
+BU_ALTIVEC_X (LVXL_V4SF,	"lvxl_v4sf",	    MEM)
+BU_ALTIVEC_X (LVXL_V4SI,	"lvxl_v4si",	    MEM)
+BU_ALTIVEC_X (LVXL_V8HI,	"lvxl_v8hi",	    MEM)
+BU_ALTIVEC_X (LVXL_V16QI,	"lvxl_v16qi",	    MEM)
+BU_ALTIVEC_X (LVX,		"lvx",		    MEM)
+BU_ALTIVEC_X (LVX_V2DF,		"lvx_v2df",	    MEM)
+BU_ALTIVEC_X (LVX_V2DI,		"lvx_v2di",	    MEM)
+BU_ALTIVEC_X (LVX_V4SF,		"lvx_v4sf",	    MEM)
+BU_ALTIVEC_X (LVX_V4SI,		"lvx_v4si",	    MEM)
+BU_ALTIVEC_X (LVX_V8HI,		"lvx_v8hi",	    MEM)
+BU_ALTIVEC_X (LVX_V16QI,	"lvx_v16qi",	    MEM)
+BU_ALTIVEC_X (STVX,		"stvx",		    MEM)
+BU_ALTIVEC_X (STVX_V2DF,	"stvx_v2df",	    MEM)
+BU_ALTIVEC_X (STVX_V2DI,	"stvx_v2di",	    MEM)
+BU_ALTIVEC_X (STVX_V4SF,	"stvx_v4sf",	    MEM)
+BU_ALTIVEC_X (STVX_V4SI,	"stvx_v4si",	    MEM)
+BU_ALTIVEC_X (STVX_V8HI,	"stvx_v8hi",	    MEM)
+BU_ALTIVEC_X (STVX_V16QI,	"stvx_v16qi",	    MEM)
+BU_ALTIVEC_C (LVLX,		"lvlx",		    MEM)
+BU_ALTIVEC_C (LVLXL,		"lvlxl",	    MEM)
+BU_ALTIVEC_C (LVRX,		"lvrx",		    MEM)
+BU_ALTIVEC_C (LVRXL,		"lvrxl",	    MEM)
+BU_ALTIVEC_X (STVEBX,		"stvebx",	    MEM)
+BU_ALTIVEC_X (STVEHX,		"stvehx",	    MEM)
+BU_ALTIVEC_X (STVEWX,		"stvewx",	    MEM)
+BU_ALTIVEC_X (STVXL,		"stvxl",	    MEM)
+BU_ALTIVEC_X (STVXL_V2DF,	"stvxl_v2df",	    MEM)
+BU_ALTIVEC_X (STVXL_V2DI,	"stvxl_v2di",	    MEM)
+BU_ALTIVEC_X (STVXL_V4SF,	"stvxl_v4sf",	    MEM)
+BU_ALTIVEC_X (STVXL_V4SI,	"stvxl_v4si",	    MEM)
+BU_ALTIVEC_X (STVXL_V8HI,	"stvxl_v8hi",	    MEM)
+BU_ALTIVEC_X (STVXL_V16QI,	"stvxl_v16qi",	    MEM)
+BU_ALTIVEC_C (STVLX,		"stvlx",	    MEM)
+BU_ALTIVEC_C (STVLXL,		"stvlxl",	    MEM)
+BU_ALTIVEC_C (STVRX,		"stvrx",	    MEM)
+BU_ALTIVEC_C (STVRXL,		"stvrxl",	    MEM)
+BU_ALTIVEC_X (MASK_FOR_LOAD,	"mask_for_load",    MISC)
+BU_ALTIVEC_X (MASK_FOR_STORE,	"mask_for_store",   MISC)
+BU_ALTIVEC_X (VEC_INIT_V4SI,	"vec_init_v4si",    CONST)
+BU_ALTIVEC_X (VEC_INIT_V8HI,	"vec_init_v8hi",    CONST)
+BU_ALTIVEC_X (VEC_INIT_V16QI,	"vec_init_v16qi",   CONST)
+BU_ALTIVEC_X (VEC_INIT_V4SF,	"vec_init_v4sf",    CONST)
+BU_ALTIVEC_X (VEC_SET_V4SI,	"vec_set_v4si",     CONST)
+BU_ALTIVEC_X (VEC_SET_V8HI,	"vec_set_v8hi",     CONST)
+BU_ALTIVEC_X (VEC_SET_V16QI,	"vec_set_v16qi",    CONST)
+BU_ALTIVEC_X (VEC_SET_V4SF,	"vec_set_v4sf",     CONST)
+BU_ALTIVEC_X (VEC_EXT_V4SI,	"vec_ext_v4si",     CONST)
+BU_ALTIVEC_X (VEC_EXT_V8HI,	"vec_ext_v8hi",     CONST)
+BU_ALTIVEC_X (VEC_EXT_V16QI,	"vec_ext_v16qi",    CONST)
+BU_ALTIVEC_X (VEC_EXT_V4SF,	"vec_ext_v4sf",     CONST)
+
+/* Altivec overloaded builtins.  */
+/* For now, don't set the classification for overloaded functions.
+   The function should be converted to the type specific instruction
+   before we get to the point about classifying the builtin type.  */
+
+/* 3 argument Altivec overloaded builtins.  */
+BU_ALTIVEC_OVERLOAD_3 (MADD,       "madd")
+BU_ALTIVEC_OVERLOAD_3 (MADDS,      "madds")
+BU_ALTIVEC_OVERLOAD_3 (MLADD,      "mladd")
+BU_ALTIVEC_OVERLOAD_3 (MRADDS,     "mradds")
+BU_ALTIVEC_OVERLOAD_3 (MSUM,       "msum")
+BU_ALTIVEC_OVERLOAD_3 (MSUMS,      "msums")
+BU_ALTIVEC_OVERLOAD_3 (NMSUB,      "nmsub")
+BU_ALTIVEC_OVERLOAD_3 (PERM,       "perm")
+BU_ALTIVEC_OVERLOAD_3 (SEL,        "sel")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMMBM,   "vmsummbm")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMSHM,   "vmsumshm")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMSHS,   "vmsumshs")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMUBM,   "vmsumubm")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMUHM,   "vmsumuhm")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMUHS,   "vmsumuhs")
+
+/* Altivec DST overloaded builtins.  */
+BU_ALTIVEC_OVERLOAD_D (DST,	   "dst")
+BU_ALTIVEC_OVERLOAD_D (DSTT,	   "dstt")
+BU_ALTIVEC_OVERLOAD_D (DSTST,	   "dstst")
+BU_ALTIVEC_OVERLOAD_D (DSTSTT,	   "dststt")
+
+/* 2 argument Altivec overloaded builtins.  */
+BU_ALTIVEC_OVERLOAD_2 (ADD,	   "add")
+BU_ALTIVEC_OVERLOAD_2 (ADDC,	   "addc")
+BU_ALTIVEC_OVERLOAD_2 (ADDS,	   "adds")
+BU_ALTIVEC_OVERLOAD_2 (AND,	   "and")
+BU_ALTIVEC_OVERLOAD_2 (ANDC,	   "andc")
+BU_ALTIVEC_OVERLOAD_2 (AVG,	   "avg")
+BU_ALTIVEC_OVERLOAD_2 (CMPB,	   "cmpb")
+BU_ALTIVEC_OVERLOAD_2 (CMPEQ,	   "cmpeq")
+BU_ALTIVEC_OVERLOAD_2 (CMPGE,	   "cmpge")
+BU_ALTIVEC_OVERLOAD_2 (CMPGT,	   "cmpgt")
+BU_ALTIVEC_OVERLOAD_2 (CMPLE,	   "cmple")
+BU_ALTIVEC_OVERLOAD_2 (CMPLT,	   "cmplt")
+BU_ALTIVEC_OVERLOAD_2 (COPYSIGN,   "copysign")
+BU_ALTIVEC_OVERLOAD_2 (MAX,	   "max")
+BU_ALTIVEC_OVERLOAD_2 (MERGEH,	   "mergeh")
+BU_ALTIVEC_OVERLOAD_2 (MERGEL,	   "mergel")
+BU_ALTIVEC_OVERLOAD_2 (MIN,	   "min")
+BU_ALTIVEC_OVERLOAD_2 (MULE,	   "mule")
+BU_ALTIVEC_OVERLOAD_2 (MULO,	   "mulo")
+BU_ALTIVEC_OVERLOAD_2 (NOR,	   "nor")
+BU_ALTIVEC_OVERLOAD_2 (OR,	   "or")
+BU_ALTIVEC_OVERLOAD_2 (PACK,	   "pack")
+BU_ALTIVEC_OVERLOAD_2 (PACKPX,	   "packpx")
+BU_ALTIVEC_OVERLOAD_2 (PACKS,	   "packs")
+BU_ALTIVEC_OVERLOAD_2 (PACKSU,	   "packsu")
+BU_ALTIVEC_OVERLOAD_2 (RECIP,	   "recipdiv")
+BU_ALTIVEC_OVERLOAD_2 (RL,	   "rl")
+BU_ALTIVEC_OVERLOAD_2 (SL,	   "sl")
+BU_ALTIVEC_OVERLOAD_2 (SLL,	   "sll")
+BU_ALTIVEC_OVERLOAD_2 (SLO,	   "slo")
+BU_ALTIVEC_OVERLOAD_2 (SR,	   "sr")
+BU_ALTIVEC_OVERLOAD_2 (SRA,	   "sra")
+BU_ALTIVEC_OVERLOAD_2 (SRL,	   "srl")
+BU_ALTIVEC_OVERLOAD_2 (SRO,	   "sro")
+BU_ALTIVEC_OVERLOAD_2 (SUB,	   "sub")
+BU_ALTIVEC_OVERLOAD_2 (SUBC,	   "subc")
+BU_ALTIVEC_OVERLOAD_2 (SUBS,	   "subs")
+BU_ALTIVEC_OVERLOAD_2 (SUM2S,	   "sum2s")
+BU_ALTIVEC_OVERLOAD_2 (SUM4S,	   "sum4s")
+BU_ALTIVEC_OVERLOAD_2 (SUMS,	   "sums")
+BU_ALTIVEC_OVERLOAD_2 (VADDFP,	   "vaddfp")
+BU_ALTIVEC_OVERLOAD_2 (VADDSBS,	   "vaddsbs")
+BU_ALTIVEC_OVERLOAD_2 (VADDSHS,	   "vaddshs")
+BU_ALTIVEC_OVERLOAD_2 (VADDSWS,	   "vaddsws")
+BU_ALTIVEC_OVERLOAD_2 (VADDUBM,	   "vaddubm")
+BU_ALTIVEC_OVERLOAD_2 (VADDUBS,	   "vaddubs")
+BU_ALTIVEC_OVERLOAD_2 (VADDUHM,	   "vadduhm")
+BU_ALTIVEC_OVERLOAD_2 (VADDUHS,	   "vadduhs")
+BU_ALTIVEC_OVERLOAD_2 (VADDUWM,	   "vadduwm")
+BU_ALTIVEC_OVERLOAD_2 (VADDUWS,	   "vadduws")
+BU_ALTIVEC_OVERLOAD_2 (VAVGSB,	   "vavgsb")
+BU_ALTIVEC_OVERLOAD_2 (VAVGSH,	   "vavgsh")
+BU_ALTIVEC_OVERLOAD_2 (VAVGSW,	   "vavgsw")
+BU_ALTIVEC_OVERLOAD_2 (VAVGUB,	   "vavgub")
+BU_ALTIVEC_OVERLOAD_2 (VAVGUH,	   "vavguh")
+BU_ALTIVEC_OVERLOAD_2 (VAVGUW,	   "vavguw")
+BU_ALTIVEC_OVERLOAD_2 (VCMPEQFP,   "vcmpeqfp")
+BU_ALTIVEC_OVERLOAD_2 (VCMPEQUB,   "vcmpequb")
+BU_ALTIVEC_OVERLOAD_2 (VCMPEQUH,   "vcmpequh")
+BU_ALTIVEC_OVERLOAD_2 (VCMPEQUW,   "vcmpequw")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTFP,   "vcmpgtfp")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTSB,   "vcmpgtsb")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTSH,   "vcmpgtsh")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTSW,   "vcmpgtsw")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTUB,   "vcmpgtub")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTUH,   "vcmpgtuh")
+BU_ALTIVEC_OVERLOAD_2 (VCMPGTUW,   "vcmpgtuw")
+BU_ALTIVEC_OVERLOAD_2 (VMAXFP,	   "vmaxfp")
+BU_ALTIVEC_OVERLOAD_2 (VMAXSB,	   "vmaxsb")
+BU_ALTIVEC_OVERLOAD_2 (VMAXSH,	   "vmaxsh")
+BU_ALTIVEC_OVERLOAD_2 (VMAXSW,	   "vmaxsw")
+BU_ALTIVEC_OVERLOAD_2 (VMAXUB,	   "vmaxub")
+BU_ALTIVEC_OVERLOAD_2 (VMAXUH,	   "vmaxuh")
+BU_ALTIVEC_OVERLOAD_2 (VMAXUW,	   "vmaxuw")
+BU_ALTIVEC_OVERLOAD_2 (VMINFP,	   "vminfp")
+BU_ALTIVEC_OVERLOAD_2 (VMINSB,	   "vminsb")
+BU_ALTIVEC_OVERLOAD_2 (VMINSH,	   "vminsh")
+BU_ALTIVEC_OVERLOAD_2 (VMINSW,	   "vminsw")
+BU_ALTIVEC_OVERLOAD_2 (VMINUB,	   "vminub")
+BU_ALTIVEC_OVERLOAD_2 (VMINUH,	   "vminuh")
+BU_ALTIVEC_OVERLOAD_2 (VMINUW,	   "vminuw")
+BU_ALTIVEC_OVERLOAD_2 (VMRGHB,	   "vmrghb")
+BU_ALTIVEC_OVERLOAD_2 (VMRGHH,	   "vmrghh")
+BU_ALTIVEC_OVERLOAD_2 (VMRGHW,	   "vmrghw")
+BU_ALTIVEC_OVERLOAD_2 (VMRGLB,	   "vmrglb")
+BU_ALTIVEC_OVERLOAD_2 (VMRGLH,	   "vmrglh")
+BU_ALTIVEC_OVERLOAD_2 (VMRGLW,	   "vmrglw")
+BU_ALTIVEC_OVERLOAD_2 (VMULESB,	   "vmulesb")
+BU_ALTIVEC_OVERLOAD_2 (VMULESH,	   "vmulesh")
+BU_ALTIVEC_OVERLOAD_2 (VMULEUB,	   "vmuleub")
+BU_ALTIVEC_OVERLOAD_2 (VMULEUH,	   "vmuleuh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOSB,	   "vmulosb")
+BU_ALTIVEC_OVERLOAD_2 (VMULOSH,	   "vmulosh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOUB,	   "vmuloub")
+BU_ALTIVEC_OVERLOAD_2 (VMULOUH,	   "vmulouh")
+BU_ALTIVEC_OVERLOAD_2 (VPKSHSS,	   "vpkshss")
+BU_ALTIVEC_OVERLOAD_2 (VPKSHUS,	   "vpkshus")
+BU_ALTIVEC_OVERLOAD_2 (VPKSWSS,	   "vpkswss")
+BU_ALTIVEC_OVERLOAD_2 (VPKSWUS,	   "vpkswus")
+BU_ALTIVEC_OVERLOAD_2 (VPKUHUM,	   "vpkuhum")
+BU_ALTIVEC_OVERLOAD_2 (VPKUHUS,	   "vpkuhus")
+BU_ALTIVEC_OVERLOAD_2 (VPKUWUM,	   "vpkuwum")
+BU_ALTIVEC_OVERLOAD_2 (VPKUWUS,	   "vpkuwus")
+BU_ALTIVEC_OVERLOAD_2 (VRLB,	   "vrlb")
+BU_ALTIVEC_OVERLOAD_2 (VRLH,	   "vrlh")
+BU_ALTIVEC_OVERLOAD_2 (VRLW,	   "vrlw")
+BU_ALTIVEC_OVERLOAD_2 (VSLB,	   "vslb")
+BU_ALTIVEC_OVERLOAD_2 (VSLH,	   "vslh")
+BU_ALTIVEC_OVERLOAD_2 (VSLW,	   "vslw")
+BU_ALTIVEC_OVERLOAD_2 (VSRAB,	   "vsrab")
+BU_ALTIVEC_OVERLOAD_2 (VSRAH,	   "vsrah")
+BU_ALTIVEC_OVERLOAD_2 (VSRAW,	   "vsraw")
+BU_ALTIVEC_OVERLOAD_2 (VSRB,	   "vsrb")
+BU_ALTIVEC_OVERLOAD_2 (VSRH,	   "vsrh")
+BU_ALTIVEC_OVERLOAD_2 (VSRW,	   "vsrw")
+BU_ALTIVEC_OVERLOAD_2 (VSUBFP,	   "vsubfp")
+BU_ALTIVEC_OVERLOAD_2 (VSUBSBS,	   "vsubsbs")
+BU_ALTIVEC_OVERLOAD_2 (VSUBSHS,	   "vsubshs")
+BU_ALTIVEC_OVERLOAD_2 (VSUBSWS,	   "vsubsws")
+BU_ALTIVEC_OVERLOAD_2 (VSUBUBM,	   "vsububm")
+BU_ALTIVEC_OVERLOAD_2 (VSUBUBS,	   "vsububs")
+BU_ALTIVEC_OVERLOAD_2 (VSUBUHM,	   "vsubuhm")
+BU_ALTIVEC_OVERLOAD_2 (VSUBUHS,	   "vsubuhs")
+BU_ALTIVEC_OVERLOAD_2 (VSUBUWM,	   "vsubuwm")
+BU_ALTIVEC_OVERLOAD_2 (VSUBUWS,	   "vsubuws")
+BU_ALTIVEC_OVERLOAD_2 (VSUM4SBS,   "vsum4sbs")
+BU_ALTIVEC_OVERLOAD_2 (VSUM4SHS,   "vsum4shs")
+BU_ALTIVEC_OVERLOAD_2 (VSUM4UBS,   "vsum4ubs")
+BU_ALTIVEC_OVERLOAD_2 (XOR,	   "xor")
+
+/* 1 argument Altivec overloaded functions.  */
+BU_ALTIVEC_OVERLOAD_1 (ABS,	   "abs")
+BU_ALTIVEC_OVERLOAD_1 (ABSS,	   "abss")
+BU_ALTIVEC_OVERLOAD_1 (CEIL,	   "ceil")
+BU_ALTIVEC_OVERLOAD_1 (EXPTE,	   "expte")
+BU_ALTIVEC_OVERLOAD_1 (FLOOR,	   "floor")
+BU_ALTIVEC_OVERLOAD_1 (LOGE,	   "loge")
+BU_ALTIVEC_OVERLOAD_1 (MTVSCR,	   "mtvscr")
+BU_ALTIVEC_OVERLOAD_1 (NEARBYINT,  "nearbyint")
+BU_ALTIVEC_OVERLOAD_1 (RE,	   "re")
+BU_ALTIVEC_OVERLOAD_1 (RINT,       "rint")
+BU_ALTIVEC_OVERLOAD_1 (ROUND,	   "round")
+BU_ALTIVEC_OVERLOAD_1 (RSQRT,	   "rsqrt")
+BU_ALTIVEC_OVERLOAD_1 (RSQRTE,	   "rsqrte")
+BU_ALTIVEC_OVERLOAD_1 (SQRT,       "sqrt")
+BU_ALTIVEC_OVERLOAD_1 (TRUNC,	   "trunc")
+BU_ALTIVEC_OVERLOAD_1 (UNPACKH,	   "unpackh")
+BU_ALTIVEC_OVERLOAD_1 (UNPACKL,	   "unpackl")
+BU_ALTIVEC_OVERLOAD_1 (VUPKHPX,	   "vupkhpx")
+BU_ALTIVEC_OVERLOAD_1 (VUPKHSB,	   "vupkhsb")
+BU_ALTIVEC_OVERLOAD_1 (VUPKHSH,	   "vupkhsh")
+BU_ALTIVEC_OVERLOAD_1 (VUPKLPX,	   "vupklpx")
+BU_ALTIVEC_OVERLOAD_1 (VUPKLSB,	   "vupklsb")
+BU_ALTIVEC_OVERLOAD_1 (VUPKLSH,	   "vupklsh")
+
+/* Overloaded altivec predicates.  */
+BU_ALTIVEC_OVERLOAD_P (VCMPEQ_P,   "vcmpeq_p")
+BU_ALTIVEC_OVERLOAD_P (VCMPGT_P,   "vcmpgt_p")
+BU_ALTIVEC_OVERLOAD_P (VCMPGE_P,   "vcmpge_p")
+
+/* Overloaded Altivec builtins that are handled as special cases.  */
+BU_ALTIVEC_OVERLOAD_X (CTF,	   "ctf")
+BU_ALTIVEC_OVERLOAD_X (CTS,	   "cts")
+BU_ALTIVEC_OVERLOAD_X (CTU,	   "ctu")
+BU_ALTIVEC_OVERLOAD_X (EXTRACT,	   "extract")
+BU_ALTIVEC_OVERLOAD_X (INSERT,	   "insert")
+BU_ALTIVEC_OVERLOAD_X (LD,	   "ld")
+BU_ALTIVEC_OVERLOAD_X (LDE,	   "lde")
+BU_ALTIVEC_OVERLOAD_X (LDL,	   "ldl")
+BU_ALTIVEC_OVERLOAD_X (LVEBX,	   "lvebx")
+BU_ALTIVEC_OVERLOAD_X (LVEHX,	   "lvehx")
+BU_ALTIVEC_OVERLOAD_X (LVEWX,	   "lvewx")
+BU_ALTIVEC_OVERLOAD_X (LVLX,	   "lvlx")
+BU_ALTIVEC_OVERLOAD_X (LVLXL,	   "lvlxl")
+BU_ALTIVEC_OVERLOAD_X (LVRX,	   "lvrx")
+BU_ALTIVEC_OVERLOAD_X (LVRXL,	   "lvrxl")
+BU_ALTIVEC_OVERLOAD_X (LVSL,	   "lvsl")
+BU_ALTIVEC_OVERLOAD_X (LVSR,	   "lvsr")
+BU_ALTIVEC_OVERLOAD_X (PROMOTE,	   "promote")
+BU_ALTIVEC_OVERLOAD_X (SLD,	   "sld")
+BU_ALTIVEC_OVERLOAD_X (SPLAT,	   "splat")
+BU_ALTIVEC_OVERLOAD_X (SPLATS,	   "splats")
+BU_ALTIVEC_OVERLOAD_X (ST,	   "st")
+BU_ALTIVEC_OVERLOAD_X (STE,	   "ste")
+BU_ALTIVEC_OVERLOAD_X (STEP,	   "step")
+BU_ALTIVEC_OVERLOAD_X (STL,	   "stl")
+BU_ALTIVEC_OVERLOAD_X (STVEBX,	   "stvebx")
+BU_ALTIVEC_OVERLOAD_X (STVEHX,	   "stvehx")
+BU_ALTIVEC_OVERLOAD_X (STVEWX,	   "stvewx")
+BU_ALTIVEC_OVERLOAD_X (STVLX,	   "stvlx")
+BU_ALTIVEC_OVERLOAD_X (STVLXL,	   "stvlxl")
+BU_ALTIVEC_OVERLOAD_X (STVRX,	   "stvrx")
+BU_ALTIVEC_OVERLOAD_X (STVRXL,	   "stvrxl")
+BU_ALTIVEC_OVERLOAD_X (VCFSX,	   "vcfsx")
+BU_ALTIVEC_OVERLOAD_X (VCFUX,	   "vcfux")
+BU_ALTIVEC_OVERLOAD_X (VSPLTB,	   "vspltb")
+BU_ALTIVEC_OVERLOAD_X (VSPLTH,	   "vsplth")
+BU_ALTIVEC_OVERLOAD_X (VSPLTW,	   "vspltw")
+
+/* 3 argument VSX builtins.  */
+BU_VSX_3 (XVMADDSP,           "xvmaddsp",       CONST, 	fmav4sf4)
+BU_VSX_3 (XVMSUBSP,           "xvmsubsp",       CONST, 	fmsv4sf4)
+BU_VSX_3 (XVNMADDSP,          "xvnmaddsp",      CONST, 	nfmav4sf4)
+BU_VSX_3 (XVNMSUBSP,          "xvnmsubsp",      CONST, 	nfmsv4sf4)
+
+BU_VSX_3 (XVMADDDP,           "xvmadddp",       CONST, 	fmav2df4)
+BU_VSX_3 (XVMSUBDP,           "xvmsubdp",       CONST, 	fmsv2df4)
+BU_VSX_3 (XVNMADDDP,          "xvnmadddp",      CONST, 	nfmav2df4)
+BU_VSX_3 (XVNMSUBDP,          "xvnmsubdp",      CONST, 	nfmsv2df4)
+
+BU_VSX_3 (XXSEL_1TI,          "xxsel_1ti",      CONST, 	vector_select_v1ti)
+BU_VSX_3 (XXSEL_2DI,          "xxsel_2di",      CONST, 	vector_select_v2di)
+BU_VSX_3 (XXSEL_2DF,          "xxsel_2df",      CONST, 	vector_select_v2df)
+BU_VSX_3 (XXSEL_4SF,          "xxsel_4sf",      CONST, 	vector_select_v4sf)
+BU_VSX_3 (XXSEL_4SI,          "xxsel_4si",      CONST, 	vector_select_v4si)
+BU_VSX_3 (XXSEL_8HI,          "xxsel_8hi",      CONST, 	vector_select_v8hi)
+BU_VSX_3 (XXSEL_16QI,         "xxsel_16qi",     CONST, 	vector_select_v16qi)
+BU_VSX_3 (XXSEL_1TI_UNS,      "xxsel_1ti_uns",  CONST, 	vector_select_v1ti_uns)
+BU_VSX_3 (XXSEL_2DI_UNS,      "xxsel_2di_uns",  CONST, 	vector_select_v2di_uns)
+BU_VSX_3 (XXSEL_4SI_UNS,      "xxsel_4si_uns",  CONST, 	vector_select_v4si_uns)
+BU_VSX_3 (XXSEL_8HI_UNS,      "xxsel_8hi_uns",  CONST, 	vector_select_v8hi_uns)
+BU_VSX_3 (XXSEL_16QI_UNS,     "xxsel_16qi_uns", CONST, 	vector_select_v16qi_uns)
+
+BU_VSX_3 (VPERM_1TI,          "vperm_1ti",      CONST, 	altivec_vperm_v1ti)
+BU_VSX_3 (VPERM_2DI,          "vperm_2di",      CONST, 	altivec_vperm_v2di)
+BU_VSX_3 (VPERM_2DF,          "vperm_2df",      CONST, 	altivec_vperm_v2df)
+BU_VSX_3 (VPERM_4SF,          "vperm_4sf",      CONST, 	altivec_vperm_v4sf)
+BU_VSX_3 (VPERM_4SI,          "vperm_4si",      CONST, 	altivec_vperm_v4si)
+BU_VSX_3 (VPERM_8HI,          "vperm_8hi",      CONST, 	altivec_vperm_v8hi)
+BU_VSX_3 (VPERM_16QI,         "vperm_16qi",     CONST, 	altivec_vperm_v16qi)
+BU_VSX_3 (VPERM_1TI_UNS,      "vperm_1ti_uns",  CONST, 	altivec_vperm_v1ti_uns)
+BU_VSX_3 (VPERM_2DI_UNS,      "vperm_2di_uns",  CONST, 	altivec_vperm_v2di_uns)
+BU_VSX_3 (VPERM_4SI_UNS,      "vperm_4si_uns",  CONST, 	altivec_vperm_v4si_uns)
+BU_VSX_3 (VPERM_8HI_UNS,      "vperm_8hi_uns",  CONST, 	altivec_vperm_v8hi_uns)
+BU_VSX_3 (VPERM_16QI_UNS,     "vperm_16qi_uns", CONST, 	altivec_vperm_v16qi_uns)
+
+BU_VSX_3 (XXPERMDI_1TI,       "xxpermdi_1ti",   CONST, 	vsx_xxpermdi_v1ti)
+BU_VSX_3 (XXPERMDI_2DF,       "xxpermdi_2df",   CONST, 	vsx_xxpermdi_v2df)
+BU_VSX_3 (XXPERMDI_2DI,       "xxpermdi_2di",   CONST, 	vsx_xxpermdi_v2di)
+BU_VSX_3 (XXPERMDI_4SF,       "xxpermdi_4sf",   CONST, 	vsx_xxpermdi_v4sf)
+BU_VSX_3 (XXPERMDI_4SI,       "xxpermdi_4si",   CONST, 	vsx_xxpermdi_v4si)
+BU_VSX_3 (XXPERMDI_8HI,       "xxpermdi_8hi",   CONST, 	vsx_xxpermdi_v8hi)
+BU_VSX_3 (XXPERMDI_16QI,      "xxpermdi_16qi",  CONST, 	vsx_xxpermdi_v16qi)
+BU_VSX_3 (SET_1TI,            "set_1ti",        CONST, 	vsx_set_v1ti)
+BU_VSX_3 (SET_2DF,            "set_2df",        CONST, 	vsx_set_v2df)
+BU_VSX_3 (SET_2DI,            "set_2di",        CONST, 	vsx_set_v2di)
+BU_VSX_3 (XXSLDWI_2DI,        "xxsldwi_2di",    CONST, 	vsx_xxsldwi_v2di)
+BU_VSX_3 (XXSLDWI_2DF,        "xxsldwi_2df",    CONST, 	vsx_xxsldwi_v2df)
+BU_VSX_3 (XXSLDWI_4SF,        "xxsldwi_4sf",    CONST, 	vsx_xxsldwi_v4sf)
+BU_VSX_3 (XXSLDWI_4SI,        "xxsldwi_4si",    CONST, 	vsx_xxsldwi_v4si)
+BU_VSX_3 (XXSLDWI_8HI,        "xxsldwi_8hi",    CONST, 	vsx_xxsldwi_v8hi)
+BU_VSX_3 (XXSLDWI_16QI,       "xxsldwi_16qi",   CONST, 	vsx_xxsldwi_v16qi)
+
+/* 2 argument VSX builtins.  */
+BU_VSX_2 (XVADDDP,	      "xvadddp",	FP,	addv2df3)
+BU_VSX_2 (XVSUBDP,	      "xvsubdp",	FP,	subv2df3)
+BU_VSX_2 (XVMULDP,	      "xvmuldp",	FP,	mulv2df3)
+BU_VSX_2 (XVDIVDP,	      "xvdivdp",	FP,	divv2df3)
+BU_VSX_2 (RECIP_V2DF,	      "xvrecipdivdp",	FP,	recipv2df3)
+BU_VSX_2 (XVMINDP,	      "xvmindp",	CONST,	sminv2df3)
+BU_VSX_2 (XVMAXDP,	      "xvmaxdp",	CONST,	smaxv2df3)
+BU_VSX_2 (XVTDIVDP_FE,	      "xvtdivdp_fe",	CONST,	vsx_tdivv2df3_fe)
+BU_VSX_2 (XVTDIVDP_FG,	      "xvtdivdp_fg",	CONST,	vsx_tdivv2df3_fg)
+BU_VSX_2 (XVCMPEQDP,	      "xvcmpeqdp",	CONST,	vector_eqv2df)
+BU_VSX_2 (XVCMPGTDP,	      "xvcmpgtdp",	CONST,	vector_gtv2df)
+BU_VSX_2 (XVCMPGEDP,	      "xvcmpgedp",	CONST,	vector_gev2df)
+
+BU_VSX_2 (XVADDSP,	      "xvaddsp",	FP,	addv4sf3)
+BU_VSX_2 (XVSUBSP,	      "xvsubsp",	FP,	subv4sf3)
+BU_VSX_2 (XVMULSP,	      "xvmulsp",	FP,	mulv4sf3)
+BU_VSX_2 (XVDIVSP,	      "xvdivsp",	FP,	divv4sf3)
+BU_VSX_2 (RECIP_V4SF,	      "xvrecipdivsp",	FP,	recipv4sf3)
+BU_VSX_2 (XVMINSP,	      "xvminsp",	CONST,	sminv4sf3)
+BU_VSX_2 (XVMAXSP,	      "xvmaxsp",	CONST,	smaxv4sf3)
+BU_VSX_2 (XVTDIVSP_FE,	      "xvtdivsp_fe",	CONST,	vsx_tdivv4sf3_fe)
+BU_VSX_2 (XVTDIVSP_FG,	      "xvtdivsp_fg",	CONST,	vsx_tdivv4sf3_fg)
+BU_VSX_2 (XVCMPEQSP,	      "xvcmpeqsp",	CONST,	vector_eqv4sf)
+BU_VSX_2 (XVCMPGTSP,	      "xvcmpgtsp",	CONST,	vector_gtv4sf)
+BU_VSX_2 (XVCMPGESP,	      "xvcmpgesp",	CONST,	vector_gev4sf)
+
+BU_VSX_2 (XSMINDP,	      "xsmindp",	CONST,	smindf3)
+BU_VSX_2 (XSMAXDP,	      "xsmaxdp",	CONST,	smaxdf3)
+BU_VSX_2 (XSTDIVDP_FE,	      "xstdivdp_fe",	CONST,	vsx_tdivdf3_fe)
+BU_VSX_2 (XSTDIVDP_FG,	      "xstdivdp_fg",	CONST,	vsx_tdivdf3_fg)
+BU_VSX_2 (CPSGNDP,	      "cpsgndp",	CONST,	vector_copysignv2df3)
+BU_VSX_2 (CPSGNSP,	      "cpsgnsp",	CONST,	vector_copysignv4sf3)
+
+BU_VSX_2 (CONCAT_2DF,	      "concat_2df",	CONST,	vsx_concat_v2df)
+BU_VSX_2 (CONCAT_2DI,	      "concat_2di",	CONST,	vsx_concat_v2di)
+BU_VSX_2 (SPLAT_2DF,	      "splat_2df",	CONST,	vsx_splat_v2df)
+BU_VSX_2 (SPLAT_2DI,	      "splat_2di",	CONST,	vsx_splat_v2di)
+BU_VSX_2 (XXMRGHW_4SF,	      "xxmrghw",	CONST,	vsx_xxmrghw_v4sf)
+BU_VSX_2 (XXMRGHW_4SI,	      "xxmrghw_4si",	CONST,	vsx_xxmrghw_v4si)
+BU_VSX_2 (XXMRGLW_4SF,	      "xxmrglw",	CONST,	vsx_xxmrglw_v4sf)
+BU_VSX_2 (XXMRGLW_4SI,	      "xxmrglw_4si",	CONST,	vsx_xxmrglw_v4si)
+BU_VSX_2 (VEC_MERGEL_V2DF,    "mergel_2df",	CONST,	vsx_mergel_v2df)
+BU_VSX_2 (VEC_MERGEL_V2DI,    "mergel_2di",	CONST,	vsx_mergel_v2di)
+BU_VSX_2 (VEC_MERGEH_V2DF,    "mergeh_2df",	CONST,	vsx_mergeh_v2df)
+BU_VSX_2 (VEC_MERGEH_V2DI,    "mergeh_2di",	CONST,	vsx_mergeh_v2di)
+
+/* VSX abs builtin functions.  */
+BU_VSX_A (XVABSDP,	      "xvabsdp",	CONST,	absv2df2)
+BU_VSX_A (XVNABSDP,	      "xvnabsdp",	CONST,	vsx_nabsv2df2)
+BU_VSX_A (XVABSSP,	      "xvabssp",	CONST,	absv4sf2)
+BU_VSX_A (XVNABSSP,	      "xvnabssp",	CONST,	vsx_nabsv4sf2)
+
+/* 1 argument VSX builtin functions.  */
+BU_VSX_1 (XVNEGDP,	      "xvnegdp",	CONST,	negv2df2)
+BU_VSX_1 (XVSQRTDP,	      "xvsqrtdp",	CONST,	sqrtv2df2)
+BU_VSX_1 (RSQRT_2DF,	      "xvrsqrtdp",	CONST,	rsqrtv2df2)
+BU_VSX_1 (XVRSQRTEDP,	      "xvrsqrtedp",	CONST,	rsqrtev2df2)
+BU_VSX_1 (XVTSQRTDP_FE,	      "xvtsqrtdp_fe",	CONST,	vsx_tsqrtv2df2_fe)
+BU_VSX_1 (XVTSQRTDP_FG,	      "xvtsqrtdp_fg",	CONST,	vsx_tsqrtv2df2_fg)
+BU_VSX_1 (XVREDP,	      "xvredp",		CONST,	vsx_frev2df2)
+
+BU_VSX_1 (XVNEGSP,	      "xvnegsp",	CONST,	negv4sf2)
+BU_VSX_1 (XVSQRTSP,	      "xvsqrtsp",	CONST,	sqrtv4sf2)
+BU_VSX_1 (RSQRT_4SF,          "xvrsqrtsp",	CONST,	rsqrtv4sf2)
+BU_VSX_1 (XVRSQRTESP,	      "xvrsqrtesp",	CONST,	rsqrtev4sf2)
+BU_VSX_1 (XVTSQRTSP_FE,	      "xvtsqrtsp_fe",	CONST,	vsx_tsqrtv4sf2_fe)
+BU_VSX_1 (XVTSQRTSP_FG,	      "xvtsqrtsp_fg",	CONST,	vsx_tsqrtv4sf2_fg)
+BU_VSX_1 (XVRESP,	      "xvresp",		CONST,	vsx_frev4sf2)
+
+BU_VSX_1 (XSCVDPSP,	      "xscvdpsp",	CONST,	vsx_xscvdpsp)
+BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvspdp)
+BU_VSX_1 (XVCVDPSP,	      "xvcvdpsp",	CONST,	vsx_xvcvdpsp)
+BU_VSX_1 (XVCVSPDP,	      "xvcvspdp",	CONST,	vsx_xvcvspdp)
+BU_VSX_1 (XSTSQRTDP_FE,	      "xstsqrtdp_fe",	CONST,	vsx_tsqrtdf2_fe)
+BU_VSX_1 (XSTSQRTDP_FG,	      "xstsqrtdp_fg",	CONST,	vsx_tsqrtdf2_fg)
+
+BU_VSX_1 (XVCVDPSXDS,	      "xvcvdpsxds",	CONST,	vsx_fix_truncv2dfv2di2)
+BU_VSX_1 (XVCVDPUXDS,	      "xvcvdpuxds",	CONST,	vsx_fixuns_truncv2dfv2di2)
+BU_VSX_1 (XVCVDPUXDS_UNS,     "xvcvdpuxds_uns",	CONST,	vsx_fixuns_truncv2dfv2di2)
+BU_VSX_1 (XVCVSXDDP,	      "xvcvsxddp",	CONST,	vsx_floatv2div2df2)
+BU_VSX_1 (XVCVUXDDP,	      "xvcvuxddp",	CONST,	vsx_floatunsv2div2df2)
+BU_VSX_1 (XVCVUXDDP_UNS,       "xvcvuxddp_uns",	CONST,	vsx_floatunsv2div2df2)
+
+BU_VSX_1 (XVCVSPSXWS,	      "xvcvspsxws",	CONST,	vsx_fix_truncv4sfv4si2)
+BU_VSX_1 (XVCVSPUXWS,	      "xvcvspuxws",	CONST,	vsx_fixuns_truncv4sfv4si2)
+BU_VSX_1 (XVCVSXWSP,	      "xvcvsxwsp",	CONST,	vsx_floatv4siv4sf2)
+BU_VSX_1 (XVCVUXWSP,	      "xvcvuxwsp",	CONST,	vsx_floatunsv4siv4sf2)
+
+BU_VSX_1 (XVCVDPSXWS,	      "xvcvdpsxws",	CONST,	vsx_xvcvdpsxws)
+BU_VSX_1 (XVCVDPUXWS,	      "xvcvdpuxws",	CONST,	vsx_xvcvdpuxws)
+BU_VSX_1 (XVCVSXWDP,	      "xvcvsxwdp",	CONST,	vsx_xvcvsxwdp)
+BU_VSX_1 (XVCVUXWDP,	      "xvcvuxwdp",	CONST,	vsx_xvcvuxwdp)
+BU_VSX_1 (XVRDPI,	      "xvrdpi",		CONST,	vsx_xvrdpi)
+BU_VSX_1 (XVRDPIC,	      "xvrdpic",	CONST,	vsx_xvrdpic)
+BU_VSX_1 (XVRDPIM,	      "xvrdpim",	CONST,	vsx_floorv2df2)
+BU_VSX_1 (XVRDPIP,	      "xvrdpip",	CONST,	vsx_ceilv2df2)
+BU_VSX_1 (XVRDPIZ,	      "xvrdpiz",	CONST,	vsx_btruncv2df2)
+
+BU_VSX_1 (XVCVSPSXDS,	      "xvcvspsxds",	CONST,	vsx_xvcvspsxds)
+BU_VSX_1 (XVCVSPUXDS,	      "xvcvspuxds",	CONST,	vsx_xvcvspuxds)
+BU_VSX_1 (XVCVSXDSP,	      "xvcvsxdsp",	CONST,	vsx_xvcvsxdsp)
+BU_VSX_1 (XVCVUXDSP,	      "xvcvuxdsp",	CONST,	vsx_xvcvuxdsp)
+BU_VSX_1 (XVRSPI,	      "xvrspi",		CONST,	vsx_xvrspi)
+BU_VSX_1 (XVRSPIC,	      "xvrspic",	CONST,	vsx_xvrspic)
+BU_VSX_1 (XVRSPIM,	      "xvrspim",	CONST,	vsx_floorv4sf2)
+BU_VSX_1 (XVRSPIP,	      "xvrspip",	CONST,	vsx_ceilv4sf2)
+BU_VSX_1 (XVRSPIZ,	      "xvrspiz",	CONST,	vsx_btruncv4sf2)
+
+BU_VSX_1 (XSRDPI,	      "xsrdpi",		CONST,	vsx_xsrdpi)
+BU_VSX_1 (XSRDPIC,	      "xsrdpic",	CONST,	vsx_xsrdpic)
+BU_VSX_1 (XSRDPIM,	      "xsrdpim",	CONST,	floordf2)
+BU_VSX_1 (XSRDPIP,	      "xsrdpip",	CONST,	ceildf2)
+BU_VSX_1 (XSRDPIZ,	      "xsrdpiz",	CONST,	btruncdf2)
+
+/* VSX predicate functions.  */
+BU_VSX_P (XVCMPEQSP_P,	      "xvcmpeqsp_p",	CONST,	vector_eq_v4sf_p)
+BU_VSX_P (XVCMPGESP_P,	      "xvcmpgesp_p",	CONST,	vector_ge_v4sf_p)
+BU_VSX_P (XVCMPGTSP_P,	      "xvcmpgtsp_p",	CONST,	vector_gt_v4sf_p)
+BU_VSX_P (XVCMPEQDP_P,	      "xvcmpeqdp_p",	CONST,	vector_eq_v2df_p)
+BU_VSX_P (XVCMPGEDP_P,	      "xvcmpgedp_p",	CONST,	vector_ge_v2df_p)
+BU_VSX_P (XVCMPGTDP_P,	      "xvcmpgtdp_p",	CONST,	vector_gt_v2df_p)
+
+/* VSX builtins that are handled as special cases.  */
+BU_VSX_X (LXSDX,	      "lxsdx",		MEM)
+BU_VSX_X (LXVD2X_V1TI,	      "lxvd2x_v1ti",	MEM)
+BU_VSX_X (LXVD2X_V2DF,	      "lxvd2x_v2df",	MEM)
+BU_VSX_X (LXVD2X_V2DI,	      "lxvd2x_v2di",	MEM)
+BU_VSX_X (LXVDSX,	      "lxvdsx",		MEM)
+BU_VSX_X (LXVW4X_V4SF,	      "lxvw4x_v4sf",	MEM)
+BU_VSX_X (LXVW4X_V4SI,        "lxvw4x_v4si",	MEM)
+BU_VSX_X (LXVW4X_V8HI,        "lxvw4x_v8hi",	MEM)
+BU_VSX_X (LXVW4X_V16QI,	      "lxvw4x_v16qi",	MEM)
+BU_VSX_X (STXSDX,	      "stxsdx",		MEM)
+BU_VSX_X (STXVD2X_V1TI,	      "stxsdx_v1ti",	MEM)
+BU_VSX_X (STXVD2X_V2DF,	      "stxsdx_v2df",	MEM)
+BU_VSX_X (STXVD2X_V2DI,	      "stxsdx_v2di",	MEM)
+BU_VSX_X (STXVW4X_V4SF,	      "stxsdx_v4sf",	MEM)
+BU_VSX_X (STXVW4X_V4SI,	      "stxsdx_v4si",	MEM)
+BU_VSX_X (STXVW4X_V8HI,	      "stxsdx_v8hi",	MEM)
+BU_VSX_X (STXVW4X_V16QI,      "stxsdx_v16qi",	MEM)
+BU_VSX_X (XSABSDP,	      "xsabsdp",	CONST)
+BU_VSX_X (XSADDDP,	      "xsadddp",	FP)
+BU_VSX_X (XSCMPODP,	      "xscmpodp",	FP)
+BU_VSX_X (XSCMPUDP,	      "xscmpudp",	FP)
+BU_VSX_X (XSCVDPSXDS,	      "xscvdpsxds",	FP)
+BU_VSX_X (XSCVDPSXWS,	      "xscvdpsxws",	FP)
+BU_VSX_X (XSCVDPUXDS,	      "xscvdpuxds",	FP)
+BU_VSX_X (XSCVDPUXWS,	      "xscvdpuxws",	FP)
+BU_VSX_X (XSCVSXDDP,	      "xscvsxddp",	FP)
+BU_VSX_X (XSCVUXDDP,	      "xscvuxddp",	FP)
+BU_VSX_X (XSDIVDP,	      "xsdivdp",	FP)
+BU_VSX_X (XSMADDADP,	      "xsmaddadp",	FP)
+BU_VSX_X (XSMADDMDP,	      "xsmaddmdp",	FP)
+BU_VSX_X (XSMOVDP,	      "xsmovdp",	FP)
+BU_VSX_X (XSMSUBADP,	      "xsmsubadp",	FP)
+BU_VSX_X (XSMSUBMDP,	      "xsmsubmdp",	FP)
+BU_VSX_X (XSMULDP,	      "xsmuldp",	FP)
+BU_VSX_X (XSNABSDP,	      "xsnabsdp",	FP)
+BU_VSX_X (XSNEGDP,	      "xsnegdp",	FP)
+BU_VSX_X (XSNMADDADP,	      "xsnmaddadp",	FP)
+BU_VSX_X (XSNMADDMDP,	      "xsnmaddmdp",	FP)
+BU_VSX_X (XSNMSUBADP,	      "xsnmsubadp",	FP)
+BU_VSX_X (XSNMSUBMDP,	      "xsnmsubmdp",	FP)
+BU_VSX_X (XSSUBDP,	      "xssubdp",	FP)
+BU_VSX_X (VEC_INIT_V1TI,      "vec_init_v1ti",	CONST)
+BU_VSX_X (VEC_INIT_V2DF,      "vec_init_v2df",	CONST)
+BU_VSX_X (VEC_INIT_V2DI,      "vec_init_v2di",	CONST)
+BU_VSX_X (VEC_SET_V1TI,	      "vec_set_v1ti",	CONST)
+BU_VSX_X (VEC_SET_V2DF,	      "vec_set_v2df",	CONST)
+BU_VSX_X (VEC_SET_V2DI,	      "vec_set_v2di",	CONST)
+BU_VSX_X (VEC_EXT_V1TI,	      "vec_ext_v1ti",	CONST)
+BU_VSX_X (VEC_EXT_V2DF,	      "vec_ext_v2df",	CONST)
+BU_VSX_X (VEC_EXT_V2DI,	      "vec_ext_v2di",	CONST)
+
+/* VSX overloaded builtins, add the overloaded functions not present in
+   Altivec.  */
+
+/* 3 argument VSX overloaded builtins.  */
+BU_VSX_OVERLOAD_3  (MSUB,     "msub")
+BU_VSX_OVERLOAD_3  (NMADD,    "nmadd")
+BU_VSX_OVERLOAD_3V (XXPERMDI, "xxpermdi")
+BU_VSX_OVERLOAD_3V (XXSLDWI,  "xxsldwi")
+
+/* 2 argument VSX overloaded builtin functions.  */
+BU_VSX_OVERLOAD_2 (MUL,	     "mul")
+BU_VSX_OVERLOAD_2 (DIV,	     "div")
+BU_VSX_OVERLOAD_2 (XXMRGHW,  "xxmrghw")
+BU_VSX_OVERLOAD_2 (XXMRGLW,  "xxmrglw")
+BU_VSX_OVERLOAD_2 (XXSPLTD,  "xxspltd")
+BU_VSX_OVERLOAD_2 (XXSPLTW,  "xxspltw")
+
+/* VSX builtins that are handled as special cases.  */
+BU_VSX_OVERLOAD_X (LD,	     "ld")
+BU_VSX_OVERLOAD_X (ST,	     "st")
+
+/* 1 argument VSX instructions added in ISA 2.07.  */
+BU_P8V_VSX_1 (XSCVSPDPN,      "xscvspdpn",	CONST,	vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN,      "xscvdpspn",	CONST,	vsx_xscvdpspn)
+
+/* 1 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_1 (ABS_V2DI,	      "abs_v2di",	CONST,	absv2di2)
+BU_P8V_AV_1 (VUPKHSW,	      "vupkhsw",	CONST,	altivec_vupkhsw)
+BU_P8V_AV_1 (VUPKLSW,	      "vupklsw",	CONST,	altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB,	      "vclzb",		CONST,  clzv16qi2)
+BU_P8V_AV_1 (VCLZH,	      "vclzh",		CONST,  clzv8hi2)
+BU_P8V_AV_1 (VCLZW,	      "vclzw",		CONST,  clzv4si2)
+BU_P8V_AV_1 (VCLZD,	      "vclzd",		CONST,  clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB,	      "vpopcntb",	CONST,  popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH,	      "vpopcnth",	CONST,  popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW,	      "vpopcntw",	CONST,  popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD,	      "vpopcntd",	CONST,  popcountv2di2)
+BU_P8V_AV_1 (VGBBD,	      "vgbbd",		CONST,  p8v_vgbbd)
+
+/* 2 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VADDCUQ,		"vaddcuq",	CONST,	altivec_vaddcuq)
+BU_P8V_AV_2 (VADDUDM,		"vaddudm",	CONST,	addv2di3)
+BU_P8V_AV_2 (VADDUQM,		"vadduqm",	CONST,	altivec_vadduqm)
+BU_P8V_AV_2 (VMINSD,		"vminsd",	CONST,	sminv2di3)
+BU_P8V_AV_2 (VMAXSD,		"vmaxsd",	CONST,	smaxv2di3)
+BU_P8V_AV_2 (VMINUD,		"vminud",	CONST,	uminv2di3)
+BU_P8V_AV_2 (VMAXUD,		"vmaxud",	CONST,	umaxv2di3)
+BU_P8V_AV_2 (VMRGEW,		"vmrgew",	CONST,	p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW,		"vmrgow",	CONST,	p8_vmrgow)
+BU_P8V_AV_2 (VPKUDUM,		"vpkudum",	CONST,	altivec_vpkudum)
+BU_P8V_AV_2 (VPKSDSS,		"vpksdss",	CONST,	altivec_vpksdss)
+BU_P8V_AV_2 (VPKUDUS,		"vpkudus",	CONST,	altivec_vpkudus)
+BU_P8V_AV_2 (VPKSDUS,		"vpksdus",	CONST,	altivec_vpksdus)
+BU_P8V_AV_2 (VRLD,		"vrld",		CONST,	vrotlv2di3)
+BU_P8V_AV_2 (VSLD,		"vsld",		CONST,	vashlv2di3)
+BU_P8V_AV_2 (VSRD,		"vsrd",		CONST,	vlshrv2di3)
+BU_P8V_AV_2 (VSRAD,		"vsrad",	CONST,	vashrv2di3)
+BU_P8V_AV_2 (VSUBCUQ,		"vsubcuq",	CONST,	altivec_vsubcuq)
+BU_P8V_AV_2 (VSUBUDM,		"vsubudm",	CONST,	subv2di3)
+BU_P8V_AV_2 (VSUBUQM,		"vsubuqm",	CONST,	altivec_vsubuqm)
+
+BU_P8V_AV_2 (EQV_V16QI,		"eqv_v16qi",	CONST,	eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI,		"eqv_v8hi",	CONST,	eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI,		"eqv_v4si",	CONST,	eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI,		"eqv_v2di",	CONST,	eqvv2di3)
+BU_P8V_AV_2 (EQV_V1TI,		"eqv_v1ti",	CONST,	eqvv1ti3)
+BU_P8V_AV_2 (EQV_V4SF,		"eqv_v4sf",	CONST,	eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF,		"eqv_v2df",	CONST,	eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI,	"nand_v16qi",	CONST,	nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI,		"nand_v8hi",	CONST,	nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI,		"nand_v4si",	CONST,	nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI,		"nand_v2di",	CONST,	nandv2di3)
+BU_P8V_AV_2 (NAND_V1TI,		"nand_v1ti",	CONST,	nandv1ti3)
+BU_P8V_AV_2 (NAND_V4SF,		"nand_v4sf",	CONST,	nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF,		"nand_v2df",	CONST,	nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI,		"orc_v16qi",	CONST,	orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI,		"orc_v8hi",	CONST,	orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI,		"orc_v4si",	CONST,	orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI,		"orc_v2di",	CONST,	orcv2di3)
+BU_P8V_AV_2 (ORC_V1TI,		"orc_v1ti",	CONST,	orcv1ti3)
+BU_P8V_AV_2 (ORC_V4SF,		"orc_v4sf",	CONST,	orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF,		"orc_v2df",	CONST,	orcv2df3)
+
+/* 3 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_3 (VADDEUQM,		"vaddeuqm",	 CONST,	altivec_vaddeuqm)
+BU_P8V_AV_3 (VADDECUQ,		"vaddecuq",	 CONST,	altivec_vaddecuq)
+BU_P8V_AV_3 (VSUBEUQM,		"vsubeuqm",	 CONST,	altivec_vsubeuqm)
+BU_P8V_AV_3 (VSUBECUQ,		"vsubecuq",	 CONST,	altivec_vsubecuq)
+
+/* Vector comparison instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VCMPEQUD,		"vcmpequd",	CONST,	vector_eqv2di)
+BU_P8V_AV_2 (VCMPGTSD,		"vcmpgtsd",	CONST,	vector_gtv2di)
+BU_P8V_AV_2 (VCMPGTUD,		"vcmpgtud",	CONST,	vector_gtuv2di)
+
+/* Vector comparison predicate instructions added in ISA 2.07.  */
+BU_P8V_AV_P (VCMPEQUD_P,	"vcmpequd_p",	CONST,	vector_eq_v2di_p)
+BU_P8V_AV_P (VCMPGTSD_P,	"vcmpgtsd_p",	CONST,	vector_gt_v2di_p)
+BU_P8V_AV_P (VCMPGTUD_P,	"vcmpgtud_p",	CONST,	vector_gtu_v2di_p)
+
+/* ISA 2.07 vector overloaded 1 argument functions.  */
+BU_P8V_OVERLOAD_1 (VUPKHSW,	"vupkhsw")
+BU_P8V_OVERLOAD_1 (VUPKLSW,	"vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ,	"vclz")
+BU_P8V_OVERLOAD_1 (VCLZB,	"vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH,	"vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW,	"vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD,	"vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT,	"vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB,	"vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH,	"vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW,	"vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD,	"vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD,	"vgbbd")
+
+/* ISA 2.07 vector overloaded 2 argument functions.  */
+BU_P8V_OVERLOAD_2 (EQV,		"eqv")
+BU_P8V_OVERLOAD_2 (NAND,	"nand")
+BU_P8V_OVERLOAD_2 (ORC,		"orc")
+BU_P8V_OVERLOAD_2 (VADDCUQ,	"vaddcuq")
+BU_P8V_OVERLOAD_2 (VADDUDM,	"vaddudm")
+BU_P8V_OVERLOAD_2 (VADDUQM,	"vadduqm")
+BU_P8V_OVERLOAD_2 (VMAXSD,	"vmaxsd")
+BU_P8V_OVERLOAD_2 (VMAXUD,	"vmaxud")
+BU_P8V_OVERLOAD_2 (VMINSD,	"vminsd")
+BU_P8V_OVERLOAD_2 (VMINUD,	"vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW,	"vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW,	"vmrgow")
+BU_P8V_OVERLOAD_2 (VPKSDSS,	"vpksdss")
+BU_P8V_OVERLOAD_2 (VPKSDUS,	"vpksdus")
+BU_P8V_OVERLOAD_2 (VPKUDUM,	"vpkudum")
+BU_P8V_OVERLOAD_2 (VPKUDUS,	"vpkudus")
+BU_P8V_OVERLOAD_2 (VRLD,	"vrld")
+BU_P8V_OVERLOAD_2 (VSLD,	"vsld")
+BU_P8V_OVERLOAD_2 (VSRAD,	"vsrad")
+BU_P8V_OVERLOAD_2 (VSRD,	"vsrd")
+BU_P8V_OVERLOAD_2 (VSUBCUQ,	"vsubcuq")
+BU_P8V_OVERLOAD_2 (VSUBUDM,	"vsubudm")
+BU_P8V_OVERLOAD_2 (VSUBUQM,	"vsubuqm")
+
+/* ISA 2.07 vector overloaded 3 argument functions.  */
+BU_P8V_OVERLOAD_3 (VADDECUQ,	"vaddecuq")
+BU_P8V_OVERLOAD_3 (VADDEUQM,	"vaddeuqm")
+BU_P8V_OVERLOAD_3 (VSUBECUQ,	"vsubecuq")
+BU_P8V_OVERLOAD_3 (VSUBEUQM,	"vsubeuqm")
+
+
+/* 1 argument crypto functions.  */
+BU_CRYPTO_1 (VSBOX,		"vsbox",	  CONST, crypto_vsbox)
+
+/* 2 argument crypto functions.  */
+BU_CRYPTO_2 (VCIPHER,		"vcipher",	  CONST, crypto_vcipher)
+BU_CRYPTO_2 (VCIPHERLAST,	"vcipherlast",	  CONST, crypto_vcipherlast)
+BU_CRYPTO_2 (VNCIPHER,		"vncipher",	  CONST, crypto_vncipher)
+BU_CRYPTO_2 (VNCIPHERLAST,	"vncipherlast",	  CONST, crypto_vncipherlast)
+BU_CRYPTO_2 (VPMSUMB,		"vpmsumb",	  CONST, crypto_vpmsumb)
+BU_CRYPTO_2 (VPMSUMH,		"vpmsumh",	  CONST, crypto_vpmsumh)
+BU_CRYPTO_2 (VPMSUMW,		"vpmsumw",	  CONST, crypto_vpmsumw)
+BU_CRYPTO_2 (VPMSUMD,		"vpmsumd",	  CONST, crypto_vpmsumd)
+
+/* 3 argument crypto functions.  */
+BU_CRYPTO_3 (VPERMXOR_V2DI,	"vpermxor_v2di",  CONST, crypto_vpermxor_v2di)
+BU_CRYPTO_3 (VPERMXOR_V4SI,	"vpermxor_v4si",  CONST, crypto_vpermxor_v4si)
+BU_CRYPTO_3 (VPERMXOR_V8HI,	"vpermxor_v8hi",  CONST, crypto_vpermxor_v8hi)
+BU_CRYPTO_3 (VPERMXOR_V16QI,	"vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
+BU_CRYPTO_3 (VSHASIGMAW,	"vshasigmaw",	  CONST, crypto_vshasigmaw)
+BU_CRYPTO_3 (VSHASIGMAD,	"vshasigmad",	  CONST, crypto_vshasigmad)
+
+/* 2 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_2 (VPMSUM,	 "vpmsum")
+
+/* 3 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR,	 "vpermxor")
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
+
+
+/* HTM functions.  */
+BU_HTM_1  (TABORT,	"tabort",	MISC,	tabort)
+BU_HTM_3  (TABORTDC,	"tabortdc",	MISC,	tabortdc)
+BU_HTM_3  (TABORTDCI,	"tabortdci",	MISC,	tabortdci)
+BU_HTM_3  (TABORTWC,	"tabortwc",	MISC,	tabortwc)
+BU_HTM_3  (TABORTWCI,	"tabortwci",	MISC,	tabortwci)
+BU_HTM_1  (TBEGIN,	"tbegin",	MISC,	tbegin)
+BU_HTM_1  (TCHECK,	"tcheck",	MISC,	tcheck)
+BU_HTM_1  (TEND,	"tend",		MISC,	tend)
+BU_HTM_0  (TENDALL,	"tendall",	MISC,	tend)
+BU_HTM_0  (TRECHKPT,	"trechkpt",	MISC,	trechkpt)
+BU_HTM_1  (TRECLAIM,	"treclaim",	MISC,	treclaim)
+BU_HTM_0  (TRESUME,	"tresume",	MISC,	tsr)
+BU_HTM_0  (TSUSPEND,	"tsuspend",	MISC,	tsr)
+BU_HTM_1  (TSR,		"tsr",		MISC,	tsr)
+BU_HTM_0  (TTEST,	"ttest",	MISC,	ttest)
+
+BU_HTM_SPR0 (GET_TFHAR,		"get_tfhar",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TFHAR,		"set_tfhar",	MISC,	nothing)
+BU_HTM_SPR0 (GET_TFIAR,		"get_tfiar",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TFIAR,		"set_tfiar",	MISC,	nothing)
+BU_HTM_SPR0 (GET_TEXASR,	"get_texasr",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TEXASR,	"set_texasr",	MISC,	nothing)
+BU_HTM_SPR0 (GET_TEXASRU,	"get_texasru",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TEXASRU,	"set_texasru",	MISC,	nothing)
+
+
+/* 3 argument paired floating point builtins.  */
+BU_PAIRED_3 (MSUB,            "msub",           FP, 	fmsv2sf4)
+BU_PAIRED_3 (MADD,            "madd",           FP, 	fmav2sf4)
+BU_PAIRED_3 (MADDS0,          "madds0",         FP, 	paired_madds0)
+BU_PAIRED_3 (MADDS1,          "madds1",         FP, 	paired_madds1)
+BU_PAIRED_3 (NMSUB,           "nmsub",          FP, 	nfmsv2sf4)
+BU_PAIRED_3 (NMADD,           "nmadd",          FP, 	nfmav2sf4)
+BU_PAIRED_3 (SUM0,            "sum0",           FP, 	paired_sum0)
+BU_PAIRED_3 (SUM1,            "sum1",           FP, 	paired_sum1)
+BU_PAIRED_3 (SELV2SF4,        "selv2sf4",       CONST, 	selv2sf4)
+
+/* 2 argument paired floating point builtins.  */
+BU_PAIRED_2 (DIVV2SF3,	      "divv2sf3",	FP,	paired_divv2sf3)
+BU_PAIRED_2 (ADDV2SF3,	      "addv2sf3",	FP,	paired_addv2sf3)
+BU_PAIRED_2 (SUBV2SF3,	      "subv2sf3",	FP,	paired_subv2sf3)
+BU_PAIRED_2 (MULV2SF3,	      "mulv2sf3",	FP,	paired_mulv2sf3)
+BU_PAIRED_2 (MULS0,	      "muls0",		FP,	paired_muls0)
+BU_PAIRED_2 (MULS1,	      "muls1",		FP,	paired_muls1)
+BU_PAIRED_2 (MERGE00,	      "merge00",	CONST,	paired_merge00)
+BU_PAIRED_2 (MERGE01,	      "merge01",	CONST,	paired_merge01)
+BU_PAIRED_2 (MERGE10,	      "merge10",	CONST,	paired_merge10)
+BU_PAIRED_2 (MERGE11,	      "merge11",	CONST,	paired_merge11)
+
+/* 1 argument paired floating point builtin functions.  */
+BU_PAIRED_1 (ABSV2SF2,	      "absv2sf2",	CONST,	paired_absv2sf2)
+BU_PAIRED_1 (NABSV2SF2,	      "nabsv2sf2",	CONST,	nabsv2sf2)
+BU_PAIRED_1 (NEGV2SF2,	      "negv2sf2",	CONST,	paired_negv2sf2)
+BU_PAIRED_1 (SQRTV2SF2,	      "sqrtv2sf2",	FP,	sqrtv2sf2)
+BU_PAIRED_1 (RESV2SF,	      "resv2sf2",	FP,	resv2sf2)
+
+/* PAIRED builtins that are handled as special cases.  */
+BU_PAIRED_X (STX,	      "stx",		MISC)
+BU_PAIRED_X (LX,	      "lx",		MISC)
+
+/* Paired predicates.  */
+BU_PAIRED_P (CMPU0,	"cmpu0",	CONST,	paired_cmpu0)
+BU_PAIRED_P (CMPU1,	"cmpu1",	CONST,	paired_cmpu1)
+
+/* PowerPC E500 builtins (SPE).  */
+
+BU_SPE_2 (EVADDW,	"evaddw",	MISC,	addv2si3)
+BU_SPE_2 (EVAND,	"evand",	MISC,	andv2si3)
+BU_SPE_2 (EVANDC,	"evandc",	MISC,	spe_evandc)
+BU_SPE_2 (EVDIVWS,	"evdivws",	MISC,	divv2si3)
+BU_SPE_2 (EVDIVWU,	"evdivwu",	MISC,	spe_evdivwu)
+BU_SPE_2 (EVEQV,	"eveqv",	MISC,	spe_eveqv)
+BU_SPE_2 (EVFSADD,	"evfsadd",	MISC,	spe_evfsadd)
+BU_SPE_2 (EVFSDIV,	"evfsdiv",	MISC,	spe_evfsdiv)
+BU_SPE_2 (EVFSMUL,	"evfsmul",	MISC,	spe_evfsmul)
+BU_SPE_2 (EVFSSUB,	"evfssub",	MISC,	spe_evfssub)
+BU_SPE_2 (EVMERGEHI,	"evmergehi",	MISC,	spe_evmergehi)
+BU_SPE_2 (EVMERGEHILO,	"evmergehilo",	MISC,	spe_evmergehilo)
+BU_SPE_2 (EVMERGELO,	"evmergelo",	MISC,	spe_evmergelo)
+BU_SPE_2 (EVMERGELOHI,	"evmergelohi",	MISC,	spe_evmergelohi)
+BU_SPE_2 (EVMHEGSMFAA,	"evmhegsmfaa",	MISC,	spe_evmhegsmfaa)
+BU_SPE_2 (EVMHEGSMFAN,	"evmhegsmfan",	MISC,	spe_evmhegsmfan)
+BU_SPE_2 (EVMHEGSMIAA,	"evmhegsmiaa",	MISC,	spe_evmhegsmiaa)
+BU_SPE_2 (EVMHEGSMIAN,	"evmhegsmian",	MISC,	spe_evmhegsmian)
+BU_SPE_2 (EVMHEGUMIAA,	"evmhegumiaa",	MISC,	spe_evmhegumiaa)
+BU_SPE_2 (EVMHEGUMIAN,	"evmhegumian",	MISC,	spe_evmhegumian)
+BU_SPE_2 (EVMHESMF,	"evmhesmf",	MISC,	spe_evmhesmf)
+BU_SPE_2 (EVMHESMFA,	"evmhesmfa",	MISC,	spe_evmhesmfa)
+BU_SPE_2 (EVMHESMFAAW,	"evmhesmfaaw",	MISC,	spe_evmhesmfaaw)
+BU_SPE_2 (EVMHESMFANW,	"evmhesmfanw",	MISC,	spe_evmhesmfanw)
+BU_SPE_2 (EVMHESMI,	"evmhesmi",	MISC,	spe_evmhesmi)
+BU_SPE_2 (EVMHESMIA,	"evmhesmia",	MISC,	spe_evmhesmia)
+BU_SPE_2 (EVMHESMIAAW,	"evmhesmiaaw",	MISC,	spe_evmhesmiaaw)
+BU_SPE_2 (EVMHESMIANW,	"evmhesmianw",	MISC,	spe_evmhesmianw)
+BU_SPE_2 (EVMHESSF,	"evmhessf",	MISC,	spe_evmhessf)
+BU_SPE_2 (EVMHESSFA,	"evmhessfa",	MISC,	spe_evmhessfa)
+BU_SPE_2 (EVMHESSFAAW,	"evmhessfaaw",	MISC,	spe_evmhessfaaw)
+BU_SPE_2 (EVMHESSFANW,	"evmhessfanw",	MISC,	spe_evmhessfanw)
+BU_SPE_2 (EVMHESSIAAW,	"evmhessiaaw",	MISC,	spe_evmhessiaaw)
+BU_SPE_2 (EVMHESSIANW,	"evmhessianw",	MISC,	spe_evmhessianw)
+BU_SPE_2 (EVMHEUMI,	"evmheumi",	MISC,	spe_evmheumi)
+BU_SPE_2 (EVMHEUMIA,	"evmheumia",	MISC,	spe_evmheumia)
+BU_SPE_2 (EVMHEUMIAAW,	"evmheumiaaw",	MISC,	spe_evmheumiaaw)
+BU_SPE_2 (EVMHEUMIANW,	"evmheumianw",	MISC,	spe_evmheumianw)
+BU_SPE_2 (EVMHEUSIAAW,	"evmheusiaaw",	MISC,	spe_evmheusiaaw)
+BU_SPE_2 (EVMHEUSIANW,	"evmheusianw",	MISC,	spe_evmheusianw)
+BU_SPE_2 (EVMHOGSMFAA,	"evmhogsmfaa",	MISC,	spe_evmhogsmfaa)
+BU_SPE_2 (EVMHOGSMFAN,	"evmhogsmfan",	MISC,	spe_evmhogsmfan)
+BU_SPE_2 (EVMHOGSMIAA,	"evmhogsmiaa",	MISC,	spe_evmhogsmiaa)
+BU_SPE_2 (EVMHOGSMIAN,	"evmhogsmian",	MISC,	spe_evmhogsmian)
+BU_SPE_2 (EVMHOGUMIAA,	"evmhogumiaa",	MISC,	spe_evmhogumiaa)
+BU_SPE_2 (EVMHOGUMIAN,	"evmhogumian",	MISC,	spe_evmhogumian)
+BU_SPE_2 (EVMHOSMF,	"evmhosmf",	MISC,	spe_evmhosmf)
+BU_SPE_2 (EVMHOSMFA,	"evmhosmfa",	MISC,	spe_evmhosmfa)
+BU_SPE_2 (EVMHOSMFAAW,	"evmhosmfaaw",	MISC,	spe_evmhosmfaaw)
+BU_SPE_2 (EVMHOSMFANW,	"evmhosmfanw",	MISC,	spe_evmhosmfanw)
+BU_SPE_2 (EVMHOSMI,	"evmhosmi",	MISC,	spe_evmhosmi)
+BU_SPE_2 (EVMHOSMIA,	"evmhosmia",	MISC,	spe_evmhosmia)
+BU_SPE_2 (EVMHOSMIAAW,	"evmhosmiaaw",	MISC,	spe_evmhosmiaaw)
+BU_SPE_2 (EVMHOSMIANW,	"evmhosmianw",	MISC,	spe_evmhosmianw)
+BU_SPE_2 (EVMHOSSF,	"evmhossf",	MISC,	spe_evmhossf)
+BU_SPE_2 (EVMHOSSFA,	"evmhossfa",	MISC,	spe_evmhossfa)
+BU_SPE_2 (EVMHOSSFAAW,	"evmhossfaaw",	MISC,	spe_evmhossfaaw)
+BU_SPE_2 (EVMHOSSFANW,	"evmhossfanw",	MISC,	spe_evmhossfanw)
+BU_SPE_2 (EVMHOSSIAAW,	"evmhossiaaw",	MISC,	spe_evmhossiaaw)
+BU_SPE_2 (EVMHOSSIANW,	"evmhossianw",	MISC,	spe_evmhossianw)
+BU_SPE_2 (EVMHOUMI,	"evmhoumi",	MISC,	spe_evmhoumi)
+BU_SPE_2 (EVMHOUMIA,	"evmhoumia",	MISC,	spe_evmhoumia)
+BU_SPE_2 (EVMHOUMIAAW,	"evmhoumiaaw",	MISC,	spe_evmhoumiaaw)
+BU_SPE_2 (EVMHOUMIANW,	"evmhoumianw",	MISC,	spe_evmhoumianw)
+BU_SPE_2 (EVMHOUSIAAW,	"evmhousiaaw",	MISC,	spe_evmhousiaaw)
+BU_SPE_2 (EVMHOUSIANW,	"evmhousianw",	MISC,	spe_evmhousianw)
+BU_SPE_2 (EVMWHSMF,	"evmwhsmf",	MISC,	spe_evmwhsmf)
+BU_SPE_2 (EVMWHSMFA,	"evmwhsmfa",	MISC,	spe_evmwhsmfa)
+BU_SPE_2 (EVMWHSMI,	"evmwhsmi",	MISC,	spe_evmwhsmi)
+BU_SPE_2 (EVMWHSMIA,	"evmwhsmia",	MISC,	spe_evmwhsmia)
+BU_SPE_2 (EVMWHSSF,	"evmwhssf",	MISC,	spe_evmwhssf)
+BU_SPE_2 (EVMWHSSFA,	"evmwhssfa",	MISC,	spe_evmwhssfa)
+BU_SPE_2 (EVMWHUMI,	"evmwhumi",	MISC,	spe_evmwhumi)
+BU_SPE_2 (EVMWHUMIA,	"evmwhumia",	MISC,	spe_evmwhumia)
+BU_SPE_2 (EVMWLSMIAAW,	"evmwlsmiaaw",	MISC,	spe_evmwlsmiaaw)
+BU_SPE_2 (EVMWLSMIANW,	"evmwlsmianw",	MISC,	spe_evmwlsmianw)
+BU_SPE_2 (EVMWLSSIAAW,	"evmwlssiaaw",	MISC,	spe_evmwlssiaaw)
+BU_SPE_2 (EVMWLSSIANW,	"evmwlssianw",	MISC,	spe_evmwlssianw)
+BU_SPE_2 (EVMWLUMI,	"evmwlumi",	MISC,	spe_evmwlumi)
+BU_SPE_2 (EVMWLUMIA,	"evmwlumia",	MISC,	spe_evmwlumia)
+BU_SPE_2 (EVMWLUMIAAW,	"evmwlumiaaw",	MISC,	spe_evmwlumiaaw)
+BU_SPE_2 (EVMWLUMIANW,	"evmwlumianw",	MISC,	spe_evmwlumianw)
+BU_SPE_2 (EVMWLUSIAAW,	"evmwlusiaaw",	MISC,	spe_evmwlusiaaw)
+BU_SPE_2 (EVMWLUSIANW,	"evmwlusianw",	MISC,	spe_evmwlusianw)
+BU_SPE_2 (EVMWSMF,	"evmwsmf",	MISC,	spe_evmwsmf)
+BU_SPE_2 (EVMWSMFA,	"evmwsmfa",	MISC,	spe_evmwsmfa)
+BU_SPE_2 (EVMWSMFAA,	"evmwsmfaa",	MISC,	spe_evmwsmfaa)
+BU_SPE_2 (EVMWSMFAN,	"evmwsmfan",	MISC,	spe_evmwsmfan)
+BU_SPE_2 (EVMWSMI,	"evmwsmi",	MISC,	spe_evmwsmi)
+BU_SPE_2 (EVMWSMIA,	"evmwsmia",	MISC,	spe_evmwsmia)
+BU_SPE_2 (EVMWSMIAA,	"evmwsmiaa",	MISC,	spe_evmwsmiaa)
+BU_SPE_2 (EVMWSMIAN,	"evmwsmian",	MISC,	spe_evmwsmian)
+BU_SPE_2 (EVMWSSF,	"evmwssf",	MISC,	spe_evmwssf)
+BU_SPE_2 (EVMWSSFA,	"evmwssfa",	MISC,	spe_evmwssfa)
+BU_SPE_2 (EVMWSSFAA,	"evmwssfaa",	MISC,	spe_evmwssfaa)
+BU_SPE_2 (EVMWSSFAN,	"evmwssfan",	MISC,	spe_evmwssfan)
+BU_SPE_2 (EVMWUMI,	"evmwumi",	MISC,	spe_evmwumi)
+BU_SPE_2 (EVMWUMIA,	"evmwumia",	MISC,	spe_evmwumia)
+BU_SPE_2 (EVMWUMIAA,	"evmwumiaa",	MISC,	spe_evmwumiaa)
+BU_SPE_2 (EVMWUMIAN,	"evmwumian",	MISC,	spe_evmwumian)
+BU_SPE_2 (EVNAND,	"evnand",	MISC,	spe_evnand)
+BU_SPE_2 (EVNOR,	"evnor",	MISC,	spe_evnor)
+BU_SPE_2 (EVOR,		"evor",		MISC,	spe_evor)
+BU_SPE_2 (EVORC,	"evorc",	MISC,	spe_evorc)
+BU_SPE_2 (EVRLW,	"evrlw",	MISC,	spe_evrlw)
+BU_SPE_2 (EVSLW,	"evslw",	MISC,	spe_evslw)
+BU_SPE_2 (EVSRWS,	"evsrws",	MISC,	spe_evsrws)
+BU_SPE_2 (EVSRWU,	"evsrwu",	MISC,	spe_evsrwu)
+BU_SPE_2 (EVSUBFW,	"evsubfw",	MISC,	subv2si3)
+
+/* SPE binary operations expecting a 5-bit unsigned literal.  */
+BU_SPE_2 (EVADDIW,	"evaddiw",	MISC,	spe_evaddiw)
+
+BU_SPE_2 (EVRLWI,	"evrlwi",	MISC,	spe_evrlwi)
+BU_SPE_2 (EVSLWI,	"evslwi",	MISC,	spe_evslwi)
+BU_SPE_2 (EVSRWIS,	"evsrwis",	MISC,	spe_evsrwis)
+BU_SPE_2 (EVSRWIU,	"evsrwiu",	MISC,	spe_evsrwiu)
+BU_SPE_2 (EVSUBIFW,	"evsubifw",	MISC,	spe_evsubifw)
+BU_SPE_2 (EVMWHSSFAA,	"evmwhssfaa",	MISC,	spe_evmwhssfaa)
+BU_SPE_2 (EVMWHSSMAA,	"evmwhssmaa",	MISC,	spe_evmwhssmaa)
+BU_SPE_2 (EVMWHSMFAA,	"evmwhsmfaa",	MISC,	spe_evmwhsmfaa)
+BU_SPE_2 (EVMWHSMIAA,	"evmwhsmiaa",	MISC,	spe_evmwhsmiaa)
+BU_SPE_2 (EVMWHUSIAA,	"evmwhusiaa",	MISC,	spe_evmwhusiaa)
+BU_SPE_2 (EVMWHUMIAA,	"evmwhumiaa",	MISC,	spe_evmwhumiaa)
+BU_SPE_2 (EVMWHSSFAN,	"evmwhssfan",	MISC,	spe_evmwhssfan)
+BU_SPE_2 (EVMWHSSIAN,	"evmwhssian",	MISC,	spe_evmwhssian)
+BU_SPE_2 (EVMWHSMFAN,	"evmwhsmfan",	MISC,	spe_evmwhsmfan)
+BU_SPE_2 (EVMWHSMIAN,	"evmwhsmian",	MISC,	spe_evmwhsmian)
+BU_SPE_2 (EVMWHUSIAN,	"evmwhusian",	MISC,	spe_evmwhusian)
+BU_SPE_2 (EVMWHUMIAN,	"evmwhumian",	MISC,	spe_evmwhumian)
+BU_SPE_2 (EVMWHGSSFAA,	"evmwhgssfaa",	MISC,	spe_evmwhgssfaa)
+BU_SPE_2 (EVMWHGSMFAA,	"evmwhgsmfaa",	MISC,	spe_evmwhgsmfaa)
+BU_SPE_2 (EVMWHGSMIAA,	"evmwhgsmiaa",	MISC,	spe_evmwhgsmiaa)
+BU_SPE_2 (EVMWHGUMIAA,	"evmwhgumiaa",	MISC,	spe_evmwhgumiaa)
+BU_SPE_2 (EVMWHGSSFAN,	"evmwhgssfan",	MISC,	spe_evmwhgssfan)
+BU_SPE_2 (EVMWHGSMFAN,	"evmwhgsmfan",	MISC,	spe_evmwhgsmfan)
+BU_SPE_2 (EVMWHGSMIAN,	"evmwhgsmian",	MISC,	spe_evmwhgsmian)
+BU_SPE_2 (EVMWHGUMIAN,	"evmwhgumian",	MISC,	spe_evmwhgumian)
+BU_SPE_2 (BRINC,	"brinc",	MISC,	spe_brinc)
+BU_SPE_2 (EVXOR,	"evxor",	MISC,	xorv2si3)
+
+/* SPE predicate builtins.  */
+BU_SPE_P (EVCMPEQ,	"evcmpeq",	MISC,	spe_evcmpeq)
+BU_SPE_P (EVCMPGTS,	"evcmpgts",	MISC,	spe_evcmpgts)
+BU_SPE_P (EVCMPGTU,	"evcmpgtu",	MISC,	spe_evcmpgtu)
+BU_SPE_P (EVCMPLTS,	"evcmplts",	MISC,	spe_evcmplts)
+BU_SPE_P (EVCMPLTU,	"evcmpltu",	MISC,	spe_evcmpltu)
+BU_SPE_P (EVFSCMPEQ,	"evfscmpeq",	MISC,	spe_evfscmpeq)
+BU_SPE_P (EVFSCMPGT,	"evfscmpgt",	MISC,	spe_evfscmpgt)
+BU_SPE_P (EVFSCMPLT,	"evfscmplt",	MISC,	spe_evfscmplt)
+BU_SPE_P (EVFSTSTEQ,	"evfststeq",	MISC,	spe_evfststeq)
+BU_SPE_P (EVFSTSTGT,	"evfststgt",	MISC,	spe_evfststgt)
+BU_SPE_P (EVFSTSTLT,	"evfststlt",	MISC,	spe_evfststlt)
+
+/* SPE evsel builtins.  */
+BU_SPE_E (EVSEL_CMPGTS,	 "evsel_gts",	  MISC,	spe_evcmpgts)
+BU_SPE_E (EVSEL_CMPGTU,	 "evsel_gtu",	  MISC,	spe_evcmpgtu)
+BU_SPE_E (EVSEL_CMPLTS,	 "evsel_lts",	  MISC,	spe_evcmplts)
+BU_SPE_E (EVSEL_CMPLTU,	 "evsel_ltu",	  MISC,	spe_evcmpltu)
+BU_SPE_E (EVSEL_CMPEQ,	 "evsel_eq",	  MISC,	spe_evcmpeq)
+BU_SPE_E (EVSEL_FSCMPGT, "evsel_fsgt",	  MISC,	spe_evfscmpgt)
+BU_SPE_E (EVSEL_FSCMPLT, "evsel_fslt",	  MISC,	spe_evfscmplt)
+BU_SPE_E (EVSEL_FSCMPEQ, "evsel_fseq",	  MISC,	spe_evfscmpeq)
+BU_SPE_E (EVSEL_FSTSTGT, "evsel_fststgt", MISC,	spe_evfststgt)
+BU_SPE_E (EVSEL_FSTSTLT, "evsel_fststlt", MISC,	spe_evfststlt)
+BU_SPE_E (EVSEL_FSTSTEQ, "evsel_fststeq", MISC,	spe_evfststeq)
+
+BU_SPE_1 (EVABS,	"evabs",	CONST,	absv2si2)
+BU_SPE_1 (EVADDSMIAAW,	"evaddsmiaaw",	CONST,	spe_evaddsmiaaw)
+BU_SPE_1 (EVADDSSIAAW,	"evaddssiaaw",	CONST,	spe_evaddssiaaw)
+BU_SPE_1 (EVADDUMIAAW,	"evaddumiaaw",	CONST,	spe_evaddumiaaw)
+BU_SPE_1 (EVADDUSIAAW,	"evaddusiaaw",	CONST,	spe_evaddusiaaw)
+BU_SPE_1 (EVCNTLSW,	"evcntlsw",	CONST,	spe_evcntlsw)
+BU_SPE_1 (EVCNTLZW,	"evcntlzw",	CONST,	spe_evcntlzw)
+BU_SPE_1 (EVEXTSB,	"evextsb",	CONST,	spe_evextsb)
+BU_SPE_1 (EVEXTSH,	"evextsh",	CONST,	spe_evextsh)
+BU_SPE_1 (EVFSABS,	"evfsabs",	CONST,	spe_evfsabs)
+BU_SPE_1 (EVFSCFSF,	"evfscfsf",	CONST,	spe_evfscfsf)
+BU_SPE_1 (EVFSCFSI,	"evfscfsi",	CONST,	spe_evfscfsi)
+BU_SPE_1 (EVFSCFUF,	"evfscfuf",	CONST,	spe_evfscfuf)
+BU_SPE_1 (EVFSCFUI,	"evfscfui",	CONST,	spe_evfscfui)
+BU_SPE_1 (EVFSCTSF,	"evfsctsf",	CONST,	spe_evfsctsf)
+BU_SPE_1 (EVFSCTSI,	"evfsctsi",	CONST,	spe_evfsctsi)
+BU_SPE_1 (EVFSCTSIZ,	"evfsctsiz",	CONST,	spe_evfsctsiz)
+BU_SPE_1 (EVFSCTUF,	"evfsctuf",	CONST,	spe_evfsctuf)
+BU_SPE_1 (EVFSCTUI,	"evfsctui",	CONST,	spe_evfsctui)
+BU_SPE_1 (EVFSCTUIZ,	"evfsctuiz",	CONST,	spe_evfsctuiz)
+BU_SPE_1 (EVFSNABS,	"evfsnabs",	CONST,	spe_evfsnabs)
+BU_SPE_1 (EVFSNEG,	"evfsneg",	CONST,	spe_evfsneg)
+BU_SPE_1 (EVMRA,	"evmra",	CONST,	spe_evmra)
+BU_SPE_1 (EVNEG,	"evneg",	CONST,	negv2si2)
+BU_SPE_1 (EVRNDW,	"evrndw",	CONST,	spe_evrndw)
+BU_SPE_1 (EVSUBFSMIAAW,	"evsubfsmiaaw",	CONST,	spe_evsubfsmiaaw)
+BU_SPE_1 (EVSUBFSSIAAW,	"evsubfssiaaw",	CONST,	spe_evsubfssiaaw)
+BU_SPE_1 (EVSUBFUMIAAW,	"evsubfumiaaw",	CONST,	spe_evsubfumiaaw)
+BU_SPE_1 (EVSUBFUSIAAW,	"evsubfusiaaw",	CONST,	spe_evsubfusiaaw)
+
+/* SPE builtins that are handled as special cases.  */
+BU_SPE_X (EVLDD,	      "evldd",		MISC)
+BU_SPE_X (EVLDDX,	      "evlddx",		MISC)
+BU_SPE_X (EVLDH,	      "evldh",		MISC)
+BU_SPE_X (EVLDHX,	      "evldhx",		MISC)
+BU_SPE_X (EVLDW,	      "evldw",		MISC)
+BU_SPE_X (EVLDWX,	      "evldwx",		MISC)
+BU_SPE_X (EVLHHESPLAT,	      "evlhhesplat",	MISC)
+BU_SPE_X (EVLHHESPLATX,	      "evlhhesplatx",	MISC)
+BU_SPE_X (EVLHHOSSPLAT,	      "evlhhossplat",	MISC)
+BU_SPE_X (EVLHHOSSPLATX,      "evlhhossplatx",	MISC)
+BU_SPE_X (EVLHHOUSPLAT,	      "evlhhousplat",	MISC)
+BU_SPE_X (EVLHHOUSPLATX,      "evlhhousplatx",	MISC)
+BU_SPE_X (EVLWHE,	      "evlwhe",		MISC)
+BU_SPE_X (EVLWHEX,	      "evlwhex",	MISC)
+BU_SPE_X (EVLWHOS,	      "evlwhos",	MISC)
+BU_SPE_X (EVLWHOSX,	      "evlwhosx",	MISC)
+BU_SPE_X (EVLWHOU,	      "evlwhou",	MISC)
+BU_SPE_X (EVLWHOUX,	      "evlwhoux",	MISC)
+BU_SPE_X (EVLWHSPLAT,	      "evlwhsplat",	MISC)
+BU_SPE_X (EVLWHSPLATX,	      "evlwhsplatx",	MISC)
+BU_SPE_X (EVLWWSPLAT,	      "evlwwsplat",	MISC)
+BU_SPE_X (EVLWWSPLATX,	      "evlwwsplatx",	MISC)
+BU_SPE_X (EVSPLATFI,	      "evsplatfi",	MISC)
+BU_SPE_X (EVSPLATI,	      "evsplati",	MISC)
+BU_SPE_X (EVSTDD,	      "evstdd",		MISC)
+BU_SPE_X (EVSTDDX,	      "evstddx",	MISC)
+BU_SPE_X (EVSTDH,	      "evstdh",		MISC)
+BU_SPE_X (EVSTDHX,	      "evstdhx",	MISC)
+BU_SPE_X (EVSTDW,	      "evstdw",		MISC)
+BU_SPE_X (EVSTDWX,	      "evstdwx",	MISC)
+BU_SPE_X (EVSTWHE,	      "evstwhe",	MISC)
+BU_SPE_X (EVSTWHEX,	      "evstwhex",	MISC)
+BU_SPE_X (EVSTWHO,	      "evstwho",	MISC)
+BU_SPE_X (EVSTWHOX,	      "evstwhox",	MISC)
+BU_SPE_X (EVSTWWE,	      "evstwwe",	MISC)
+BU_SPE_X (EVSTWWEX,	      "evstwwex",	MISC)
+BU_SPE_X (EVSTWWO,	      "evstwwo",	MISC)
+BU_SPE_X (EVSTWWOX,	      "evstwwox",	MISC)
+BU_SPE_X (MFSPEFSCR,	      "mfspefscr",	MISC)
+BU_SPE_X (MTSPEFSCR,	      "mtspefscr",	MISC)
+
+
+/* Power7 builtins, that aren't VSX instructions.  */
+BU_SPECIAL_X (POWER7_BUILTIN_BPERMD, "__builtin_bpermd", RS6000_BTM_POPCNTD,
+	      RS6000_BTC_CONST)
+
+/* Miscellaneous builtins.  */
+BU_SPECIAL_X (RS6000_BUILTIN_RECIP, "__builtin_recipdiv", RS6000_BTM_FRE,
+	      RS6000_BTC_FP)
+
+BU_SPECIAL_X (RS6000_BUILTIN_RECIPF, "__builtin_recipdivf", RS6000_BTM_FRES,
+	      RS6000_BTC_FP)
+
+BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, "__builtin_rsqrt", RS6000_BTM_FRSQRTE,
+	      RS6000_BTC_FP)
+
+BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", RS6000_BTM_FRSQRTES,
+	      RS6000_BTC_FP)
+
+BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
+BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
+BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
+	          RS6000_BTM_ALWAYS,
+	          RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
+		  CODE_FOR_rs6000_mtfsf)
+
+/* Darwin CfString builtin.  */
+BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
+	      RS6000_BTC_MISC)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-c.c b/gcc-4.9/gcc/config/rs6000/rs6000-c.c
new file mode 100644
index 000000000..0f1dafc5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-c.c
@@ -0,0 +1,4557 @@
+/* Subroutines for the C front end on the PowerPC architecture.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+   Contributed by Zack Weinberg <zack@codesourcery.com>
+   and Paolo Bonzini <bonzini@gnu.org>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "target.h"
+#include "langhooks.h"
+
+
+
+/* Handle the machine specific pragma longcall.  Its syntax is
+
+   # pragma longcall ( TOGGLE )
+
+   where TOGGLE is either 0 or 1.
+
+   rs6000_default_long_calls is set to the value of TOGGLE, changing
+   whether or not new function declarations receive a longcall
+   attribute by default.  */
+
+#define SYNTAX_ERROR(gmsgid) do {					\
+  warning (OPT_Wpragmas, gmsgid);					\
+  warning (OPT_Wpragmas, "ignoring malformed #pragma longcall");	\
+  return;								\
+} while (0)
+
+void
+rs6000_pragma_longcall (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x, n;
+
+  /* If we get here, generic code has already scanned the directive
+     leader and the word "longcall".  */
+
+  if (pragma_lex (&x) != CPP_OPEN_PAREN)
+    SYNTAX_ERROR ("missing open paren");
+  if (pragma_lex (&n) != CPP_NUMBER)
+    SYNTAX_ERROR ("missing number");
+  if (pragma_lex (&x) != CPP_CLOSE_PAREN)
+    SYNTAX_ERROR ("missing close paren");
+
+  if (n != integer_zero_node && n != integer_one_node)
+    SYNTAX_ERROR ("number must be 0 or 1");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma longcall");
+
+  rs6000_default_long_calls = (n == integer_one_node);
+}
+
+/* Handle defining many CPP flags based on TARGET_xxx.  As a general
+   policy, rather than trying to guess what flags a user might want a
+   #define for, it's better to define a flag for everything.  */
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Keep the AltiVec keywords handy for fast comparisons.  */
+static GTY(()) tree __vector_keyword;
+static GTY(()) tree vector_keyword;
+static GTY(()) tree __pixel_keyword;
+static GTY(()) tree pixel_keyword;
+static GTY(()) tree __bool_keyword;
+static GTY(()) tree bool_keyword;
+static GTY(()) tree _Bool_keyword;
+static GTY(()) tree __int128_type;
+static GTY(()) tree __uint128_type;
+
+/* Preserved across calls.  */
+static tree expand_bool_pixel;
+
+static cpp_hashnode *
+altivec_categorize_keyword (const cpp_token *tok)
+{
+  if (tok->type == CPP_NAME)
+    {
+      cpp_hashnode *ident = tok->val.node.node;
+
+      if (ident == C_CPP_HASHNODE (vector_keyword))
+	return C_CPP_HASHNODE (__vector_keyword);
+
+      if (ident == C_CPP_HASHNODE (pixel_keyword))
+	return C_CPP_HASHNODE (__pixel_keyword);
+
+      if (ident == C_CPP_HASHNODE (bool_keyword))
+	return C_CPP_HASHNODE (__bool_keyword);
+
+      if (ident == C_CPP_HASHNODE (_Bool_keyword))
+	return C_CPP_HASHNODE (__bool_keyword);
+
+      return ident;
+    }
+
+  return 0;
+}
+
+static void
+init_vector_keywords (void)
+{
+  /* Keywords without two leading underscores are context-sensitive, and hence
+     implemented as conditional macros, controlled by the
+     rs6000_macro_to_expand() function below.  If we have ISA 2.07 64-bit
+     support, record the __int128_t and __uint128_t types.  */
+
+  __vector_keyword = get_identifier ("__vector");
+  C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
+
+  __pixel_keyword = get_identifier ("__pixel");
+  C_CPP_HASHNODE (__pixel_keyword)->flags |= NODE_CONDITIONAL;
+
+  __bool_keyword = get_identifier ("__bool");
+  C_CPP_HASHNODE (__bool_keyword)->flags |= NODE_CONDITIONAL;
+
+  vector_keyword = get_identifier ("vector");
+  C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL;
+
+  pixel_keyword = get_identifier ("pixel");
+  C_CPP_HASHNODE (pixel_keyword)->flags |= NODE_CONDITIONAL;
+
+  bool_keyword = get_identifier ("bool");
+  C_CPP_HASHNODE (bool_keyword)->flags |= NODE_CONDITIONAL;
+
+  _Bool_keyword = get_identifier ("_Bool");
+  C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL;
+
+  if (TARGET_VADDUQM)
+    {
+      __int128_type = get_identifier ("__int128_t");
+      __uint128_type = get_identifier ("__uint128_t");
+    }
+}
+
+/* Called to decide whether a conditional macro should be expanded.
+   Since we have exactly one such macro (i.e, 'vector'), we do not
+   need to examine the 'tok' parameter.  */
+
+static cpp_hashnode *
+rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
+{
+  cpp_hashnode *expand_this = tok->val.node.node;
+  cpp_hashnode *ident;
+
+  /* If the current machine does not have altivec, don't look for the
+     keywords.  */
+  if (!TARGET_ALTIVEC)
+    return NULL;
+
+  ident = altivec_categorize_keyword (tok);
+
+  if (ident != expand_this)
+    expand_this = NULL;
+
+  if (ident == C_CPP_HASHNODE (__vector_keyword))
+    {
+      int idx = 0;
+      do
+	tok = cpp_peek_token (pfile, idx++);
+      while (tok->type == CPP_PADDING);
+      ident = altivec_categorize_keyword (tok);
+
+      if (ident == C_CPP_HASHNODE (__pixel_keyword))
+	{
+	  expand_this = C_CPP_HASHNODE (__vector_keyword);
+	  expand_bool_pixel = __pixel_keyword;
+	}
+      else if (ident == C_CPP_HASHNODE (__bool_keyword))
+	{
+	  expand_this = C_CPP_HASHNODE (__vector_keyword);
+	  expand_bool_pixel = __bool_keyword;
+	}
+      /* The boost libraries have code with Iterator::vector vector in it.  If
+	 we allow the normal handling, this module will be called recursively,
+	 and the vector will be skipped.; */
+      else if (ident && (ident != C_CPP_HASHNODE (__vector_keyword)))
+	{
+	  enum rid rid_code = (enum rid)(ident->rid_code);
+	  if (ident->type == NT_MACRO)
+	    {
+	      do
+		(void) cpp_get_token (pfile);
+	      while (--idx > 0);
+	      do
+		tok = cpp_peek_token (pfile, idx++);
+	      while (tok->type == CPP_PADDING);
+	      ident = altivec_categorize_keyword (tok);
+	      if (ident == C_CPP_HASHNODE (__pixel_keyword))
+		{
+		  expand_this = C_CPP_HASHNODE (__vector_keyword);
+		  expand_bool_pixel = __pixel_keyword;
+		  rid_code = RID_MAX;
+		}
+	      else if (ident == C_CPP_HASHNODE (__bool_keyword))
+		{
+		  expand_this = C_CPP_HASHNODE (__vector_keyword);
+		  expand_bool_pixel = __bool_keyword;
+		  rid_code = RID_MAX;
+		}
+	      else if (ident)
+		rid_code = (enum rid)(ident->rid_code);
+	    }
+
+	  if (rid_code == RID_UNSIGNED || rid_code == RID_LONG
+	      || rid_code == RID_SHORT || rid_code == RID_SIGNED
+	      || rid_code == RID_INT || rid_code == RID_CHAR
+	      || rid_code == RID_FLOAT
+	      || (rid_code == RID_DOUBLE && TARGET_VSX)
+	      || (rid_code == RID_INT128 && TARGET_VADDUQM))
+	    {
+	      expand_this = C_CPP_HASHNODE (__vector_keyword);
+	      /* If the next keyword is bool or pixel, it
+		 will need to be expanded as well.  */
+	      do
+		tok = cpp_peek_token (pfile, idx++);
+	      while (tok->type == CPP_PADDING);
+	      ident = altivec_categorize_keyword (tok);
+
+	      if (ident == C_CPP_HASHNODE (__pixel_keyword))
+		expand_bool_pixel = __pixel_keyword;
+	      else if (ident == C_CPP_HASHNODE (__bool_keyword))
+		expand_bool_pixel = __bool_keyword;
+	      else
+		{
+		  /* Try two tokens down, too.  */
+		  do
+		    tok = cpp_peek_token (pfile, idx++);
+		  while (tok->type == CPP_PADDING);
+		  ident = altivec_categorize_keyword (tok);
+		  if (ident == C_CPP_HASHNODE (__pixel_keyword))
+		    expand_bool_pixel = __pixel_keyword;
+		  else if (ident == C_CPP_HASHNODE (__bool_keyword))
+		    expand_bool_pixel = __bool_keyword;
+		}
+	    }
+
+	  /* Support vector __int128_t, but we don't need to worry about bool
+	     or pixel on this type.  */
+	  else if (TARGET_VADDUQM
+		   && (ident == C_CPP_HASHNODE (__int128_type)
+		       || ident == C_CPP_HASHNODE (__uint128_type)))
+	    expand_this = C_CPP_HASHNODE (__vector_keyword);
+	}
+    }
+  else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__pixel_keyword))
+    {
+      expand_this = C_CPP_HASHNODE (__pixel_keyword);
+      expand_bool_pixel = 0;
+    }
+  else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__bool_keyword))
+    {
+      expand_this = C_CPP_HASHNODE (__bool_keyword);
+      expand_bool_pixel = 0;
+    }
+
+  return expand_this;
+}
+
+
+/* Define or undefine a single macro.  */
+
+static void
+rs6000_define_or_undefine_macro (bool define_p, const char *name)
+{
+  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
+    fprintf (stderr, "#%s %s\n", (define_p) ? "define" : "undef", name);
+
+  if (define_p)
+    cpp_define (parse_in, name);
+  else
+    cpp_undef (parse_in, name);
+}
+
+/* Define or undefine macros based on the current target.  If the user does
+   #pragma GCC target, we need to adjust the macros dynamically.  Note, some of
+   the options needed for builtins have been moved to separate variables, so
+   have both the target flags and the builtin flags as arguments.  */
+
+void
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
+			     HOST_WIDE_INT bu_mask)
+{
+  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
+    fprintf (stderr,
+	     "rs6000_target_modify_macros (%s, " HOST_WIDE_INT_PRINT_HEX
+	     ", " HOST_WIDE_INT_PRINT_HEX ")\n",
+	     (define_p) ? "define" : "undef",
+	     flags, bu_mask);
+
+  /* rs6000_isa_flags based options.  */
+  rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC");
+  if ((flags & OPTION_MASK_PPC_GPOPT) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ");
+  if ((flags & OPTION_MASK_PPC_GFXOPT) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
+  if ((flags & OPTION_MASK_POWERPC64) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");
+  if ((flags & OPTION_MASK_MFCRF) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4");
+  if ((flags & OPTION_MASK_POPCNTB) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5");
+  if ((flags & OPTION_MASK_FPRND) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X");
+  if ((flags & OPTION_MASK_CMPB) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6");
+  if ((flags & OPTION_MASK_MFPGPR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
+  if ((flags & OPTION_MASK_POPCNTD) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
+  if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
+  if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
+  if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__RECIP_PRECISION__");
+  if ((flags & OPTION_MASK_ALTIVEC) != 0)
+    {
+      const char *vec_str = (define_p) ? "__VEC__=10206" : "__VEC__";
+      rs6000_define_or_undefine_macro (define_p, "__ALTIVEC__");
+      rs6000_define_or_undefine_macro (define_p, vec_str);
+
+	  /* Define this when supporting context-sensitive keywords.  */
+      if (!flag_iso)
+	rs6000_define_or_undefine_macro (define_p, "__APPLE_ALTIVEC__");
+    }
+  if ((flags & OPTION_MASK_VSX) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__VSX__");
+  if ((flags & OPTION_MASK_HTM) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__HTM__");
+  if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+  if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
+  if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
+  if ((flags & OPTION_MASK_CRYPTO) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
+
+  /* options from the builtin masks.  */
+  if ((bu_mask & RS6000_BTM_SPE) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__SPE__");
+  if ((bu_mask & RS6000_BTM_PAIRED) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__PAIRED__");
+  if ((bu_mask & RS6000_BTM_CELL) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__PPU__");
+}
+
+void
+rs6000_cpu_cpp_builtins (cpp_reader *pfile)
+{
+  /* Define all of the common macros.  */
+  rs6000_target_modify_macros (true, rs6000_isa_flags,
+			       rs6000_builtin_mask_calculate ());
+
+  if (TARGET_FRE)
+    builtin_define ("__RECIP__");
+  if (TARGET_FRES)
+    builtin_define ("__RECIPF__");
+  if (TARGET_FRSQRTE)
+    builtin_define ("__RSQRTE__");
+  if (TARGET_FRSQRTES)
+    builtin_define ("__RSQRTEF__");
+
+  if (TARGET_EXTRA_BUILTINS)
+    {
+      /* Define the AltiVec syntactic elements.  */
+      builtin_define ("__vector=__attribute__((altivec(vector__)))");
+      builtin_define ("__pixel=__attribute__((altivec(pixel__))) unsigned short");
+      builtin_define ("__bool=__attribute__((altivec(bool__))) unsigned");
+
+      if (!flag_iso)
+	{
+	  builtin_define ("vector=vector");
+	  builtin_define ("pixel=pixel");
+	  builtin_define ("bool=bool");
+	  builtin_define ("_Bool=_Bool");
+	  init_vector_keywords ();
+
+	  /* Enable context-sensitive macros.  */
+	  cpp_get_callbacks (pfile)->macro_to_expand = rs6000_macro_to_expand;
+	}
+    }
+  if ((!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
+      ||(TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_DOUBLE_FLOAT))
+    builtin_define ("_SOFT_DOUBLE");
+  /* Used by lwarx/stwcx. errata work-around.  */
+  if (rs6000_cpu == PROCESSOR_PPC405)
+    builtin_define ("__PPC405__");
+  /* Used by libstdc++.  */
+  if (TARGET_NO_LWSYNC)
+    builtin_define ("__NO_LWSYNC__");
+
+  if (TARGET_EXTRA_BUILTINS)
+    {
+      /* For the VSX builtin functions identical to Altivec functions, just map
+	 the altivec builtin into the vsx version (the altivec functions
+	 generate VSX code if -mvsx).  */
+      builtin_define ("__builtin_vsx_xxland=__builtin_vec_and");
+      builtin_define ("__builtin_vsx_xxlandc=__builtin_vec_andc");
+      builtin_define ("__builtin_vsx_xxlnor=__builtin_vec_nor");
+      builtin_define ("__builtin_vsx_xxlor=__builtin_vec_or");
+      builtin_define ("__builtin_vsx_xxlxor=__builtin_vec_xor");
+      builtin_define ("__builtin_vsx_xxsel=__builtin_vec_sel");
+      builtin_define ("__builtin_vsx_vperm=__builtin_vec_perm");
+
+      /* Also map the a and m versions of the multiply/add instructions to the
+	 builtin for people blindly going off the instruction manual.  */
+      builtin_define ("__builtin_vsx_xvmaddadp=__builtin_vsx_xvmadddp");
+      builtin_define ("__builtin_vsx_xvmaddmdp=__builtin_vsx_xvmadddp");
+      builtin_define ("__builtin_vsx_xvmaddasp=__builtin_vsx_xvmaddsp");
+      builtin_define ("__builtin_vsx_xvmaddmsp=__builtin_vsx_xvmaddsp");
+      builtin_define ("__builtin_vsx_xvmsubadp=__builtin_vsx_xvmsubdp");
+      builtin_define ("__builtin_vsx_xvmsubmdp=__builtin_vsx_xvmsubdp");
+      builtin_define ("__builtin_vsx_xvmsubasp=__builtin_vsx_xvmsubsp");
+      builtin_define ("__builtin_vsx_xvmsubmsp=__builtin_vsx_xvmsubsp");
+      builtin_define ("__builtin_vsx_xvnmaddadp=__builtin_vsx_xvnmadddp");
+      builtin_define ("__builtin_vsx_xvnmaddmdp=__builtin_vsx_xvnmadddp");
+      builtin_define ("__builtin_vsx_xvnmaddasp=__builtin_vsx_xvnmaddsp");
+      builtin_define ("__builtin_vsx_xvnmaddmsp=__builtin_vsx_xvnmaddsp");
+      builtin_define ("__builtin_vsx_xvnmsubadp=__builtin_vsx_xvnmsubdp");
+      builtin_define ("__builtin_vsx_xvnmsubmdp=__builtin_vsx_xvnmsubdp");
+      builtin_define ("__builtin_vsx_xvnmsubasp=__builtin_vsx_xvnmsubsp");
+      builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp");
+    }
+
+  /* Tell users they can use __builtin_bswap{16,64}.  */
+  builtin_define ("__HAVE_BSWAP__");
+
+  /* May be overridden by target configuration.  */
+  RS6000_CPU_CPP_ENDIAN_BUILTINS();
+
+  if (TARGET_LONG_DOUBLE_128)
+    {
+      builtin_define ("__LONG_DOUBLE_128__");
+      builtin_define ("__LONGDOUBLE128");
+    }
+
+  switch (TARGET_CMODEL)
+    {
+      /* Deliberately omit __CMODEL_SMALL__ since that was the default
+	 before --mcmodel support was added.  */
+    case CMODEL_MEDIUM:
+      builtin_define ("__CMODEL_MEDIUM__");
+      break;
+    case CMODEL_LARGE:
+      builtin_define ("__CMODEL_LARGE__");
+      break;
+    default:
+      break;
+    }
+
+  switch (rs6000_current_abi)
+    {
+    case ABI_V4:
+      builtin_define ("_CALL_SYSV");
+      break;
+    case ABI_AIX:
+      builtin_define ("_CALL_AIXDESC");
+      builtin_define ("_CALL_AIX");
+      builtin_define ("_CALL_ELF=1");
+      break;
+    case ABI_ELFv2:
+      builtin_define ("_CALL_ELF=2");
+      break;
+    case ABI_DARWIN:
+      builtin_define ("_CALL_DARWIN");
+      break;
+    default:
+      break;
+    }
+
+  /* Let the compiled code know if 'f' class registers will not be available.  */
+  if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+    builtin_define ("__NO_FPRS__");
+
+  /* Whether aggregates passed by value are aligned to a 16 byte boundary
+     if their alignment is 16 bytes or larger.  */
+  if ((TARGET_MACHO && rs6000_darwin64_abi)
+      || DEFAULT_ABI == ABI_ELFv2
+      || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
+    builtin_define ("__STRUCT_PARM_ALIGN__=16");
+
+  /* Generate defines for Xilinx FPU. */
+  if (rs6000_xilinx_fpu) 
+    {
+      builtin_define ("_XFPU");
+      if (rs6000_single_float && ! rs6000_double_float)
+	{
+	  if (rs6000_simple_fpu) 
+	    builtin_define ("_XFPU_SP_LITE"); 
+	  else 
+	    builtin_define ("_XFPU_SP_FULL");
+	}
+      if (rs6000_double_float)
+	{
+	  if (rs6000_simple_fpu) 
+	    builtin_define ("_XFPU_DP_LITE");
+	  else
+	    builtin_define ("_XFPU_DP_FULL");
+        }
+    }
+}
+
+
+struct altivec_builtin_types
+{
+  enum rs6000_builtins code;
+  enum rs6000_builtins overloaded_code;
+  signed char ret_type;
+  signed char op1;
+  signed char op2;
+  signed char op3;
+};
+
+const struct altivec_builtin_types altivec_overloaded_builtins[] = {
+  /* Unary AltiVec/VSX builtins.  */
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_CEIL, ALTIVEC_BUILTIN_VRFIP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_CEIL, VSX_BUILTIN_XVRDPIP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_EXPTE, ALTIVEC_BUILTIN_VEXPTEFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_FLOOR, VSX_BUILTIN_XVRDPIM,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_FLOOR, ALTIVEC_BUILTIN_VRFIM,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_LOGE, ALTIVEC_BUILTIN_VLOGEFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RE, ALTIVEC_BUILTIN_VREFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RE, VSX_BUILTIN_XVREDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRT, ALTIVEC_BUILTIN_VRSQRTFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRT, VSX_BUILTIN_RSQRT_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRTE, ALTIVEC_BUILTIN_VRSQRTEFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRTE, VSX_BUILTIN_XVRSQRTEDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_TRUNC, ALTIVEC_BUILTIN_VRFIZ,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_TRUNC, VSX_BUILTIN_XVRDPIZ,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSB, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSB, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSH, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSH, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+
+  /* Binary AltiVec/VSX builtins.  */
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDC, ALTIVEC_BUILTIN_VADDCUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGSW, ALTIVEC_BUILTIN_VAVGSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGUW, ALTIVEC_BUILTIN_VAVGUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGSH, ALTIVEC_BUILTIN_VAVGSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGUH, ALTIVEC_BUILTIN_VAVGUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGSB, ALTIVEC_BUILTIN_VAVGSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGUB, ALTIVEC_BUILTIN_VAVGUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPB, ALTIVEC_BUILTIN_VCMPBFP,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQFP, ALTIVEC_BUILTIN_VCMPEQFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUW, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUW, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUH, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUH, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUB, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUB, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_CMPGE, ALTIVEC_BUILTIN_VCMPGEFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTFP, ALTIVEC_BUILTIN_VCMPGTFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSH, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSH, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUH, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUH, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSB, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSB, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUB, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUB, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLE, ALTIVEC_BUILTIN_VCMPGEFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_COPYSIGN, VSX_BUILTIN_CPSGNDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_COPYSIGN, ALTIVEC_BUILTIN_COPYSIGN_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFUX,
+    RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX,
+    RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEHX, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEHX, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXFP, ALTIVEC_BUILTIN_VMAXFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINFP, ALTIVEC_BUILTIN_VMINFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULEUH, ALTIVEC_BUILTIN_VMULEUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULESH, ALTIVEC_BUILTIN_VMULESH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOUH, ALTIVEC_BUILTIN_VMULOUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOSB, ALTIVEC_BUILTIN_VMULOSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOUB, ALTIVEC_BUILTIN_VMULOUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRDPI,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRSPI,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKPX, ALTIVEC_BUILTIN_VPKPX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKUHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKSHSS,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKUWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKSWSS,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSWSS, ALTIVEC_BUILTIN_VPKSWSS,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKUHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKUWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRDPIC,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRSPIC,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLH, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLH, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLH, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLH, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLB, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLB, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRH, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRH, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRB, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRB, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAH, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAH, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAB, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAB, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBC, ALTIVEC_BUILTIN_VSUBCUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4UBS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4SBS,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4SHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUM4SHS, ALTIVEC_BUILTIN_VSUM4SHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUM4SBS, ALTIVEC_BUILTIN_VSUM4SBS,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUM4UBS, ALTIVEC_BUILTIN_VSUM4UBS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM2S, ALTIVEC_BUILTIN_VSUM2SWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+
+  /* Ternary AltiVec/VSX builtins.  */
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMADDFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MRADDS, ALTIVEC_BUILTIN_VMHRADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUBM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMMBM,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUHM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMSHM,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMSHM, ALTIVEC_BUILTIN_VMSUMSHM,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMUHM, ALTIVEC_BUILTIN_VMSUMUHM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMMBM, ALTIVEC_BUILTIN_VMSUMMBM,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMUBM, ALTIVEC_BUILTIN_VMSUMUBM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUMS, ALTIVEC_BUILTIN_VMSUMUHS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUMS, ALTIVEC_BUILTIN_VMSUMSHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMSHS, ALTIVEC_BUILTIN_VMSUMSHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMUHS, ALTIVEC_BUILTIN_VMSUMUHS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VNMSUBFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_NMSUB, VSX_BUILTIN_XVNMSUBDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_NOT_OPAQUE },
+
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_pixel_V8HI },
+
+  /* Predicates.  */
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+
+
+  /* cmpge is the same as cmpgt for all cases except floating point.
+     There is further code to deal with this special case in
+     altivec_build_resolved_builtin.  */
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+
+  /* Power8 vector overloaded functions.  */
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, 0, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
+  /* Crypto builtins.  */
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+
+  { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
+};
+
+
+/* Convert a type stored into a struct altivec_builtin_types as ID,
+   into a tree.  The types are in rs6000_builtin_types: negative values
+   create a pointer type for the type associated to ~ID.  Note it is
+   a logical NOT, rather than a negation, otherwise you cannot represent
+   a pointer type for ID 0.  */
+
+static inline tree
+rs6000_builtin_type (int id)
+{
+  tree t;
+  t = rs6000_builtin_types[id < 0 ? ~id : id];
+  return id < 0 ? build_pointer_type (t) : t;
+}
+
+/* Check whether the type of an argument, T, is compatible with a
+   type ID stored into a struct altivec_builtin_types.  Integer
+   types are considered compatible; otherwise, the language hook
+   lang_hooks.types_compatible_p makes the decision.  */
+
+static inline bool
+rs6000_builtin_type_compatible (tree t, int id)
+{
+  tree builtin_type;
+  builtin_type = rs6000_builtin_type (id);
+  if (t == error_mark_node)
+    return false;
+  if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (builtin_type))
+    return true;
+  else
+    return lang_hooks.types_compatible_p (t, builtin_type);
+}
+
+
+/* In addition to calling fold_convert for EXPR of type TYPE, also
+   call c_fully_fold to remove any C_MAYBE_CONST_EXPRs that could be
+   hiding there (PR47197).  */
+
+static tree
+fully_fold_convert (tree type, tree expr)
+{
+  tree result = fold_convert (type, expr);
+  bool maybe_const = true;
+
+  if (!c_dialect_cxx ())
+    result = c_fully_fold (result, false, &maybe_const);
+
+  return result;
+}
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.  
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_resolved_builtin (tree *args, int n,
+				const struct altivec_builtin_types *desc)
+{
+  tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
+  tree ret_type = rs6000_builtin_type (desc->ret_type);
+  tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
+  tree arg_type[3];
+  tree call;
+
+  int i;
+  for (i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P)
+    {
+      tree t;
+      t = args[2], args[2] = args[1], args[1] = t;
+      t = arg_type[2], arg_type[2] = arg_type[1], arg_type[1] = t;
+      
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (impl_fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (impl_fndecl, 1,
+			      fully_fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (impl_fndecl, 2,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (impl_fndecl, 3,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+tree
+altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
+				    void *passed_arglist)
+{
+  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
+  unsigned int nargs = vec_safe_length (arglist);
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[3], args[3];
+  const struct altivec_builtin_types *desc;
+  unsigned int n;
+
+  if (!rs6000_overloaded_builtin_p (fcode))
+    return NULL_TREE;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
+	     (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
+      || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      vec<constructor_elt, va_gc> *vec;
+      const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote";
+
+      if (nargs == 0)
+	{
+	  error ("%s only accepts %d arguments", name, (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)+1 );
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1)
+	{
+	  error ("%s only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("%s only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
+	goto bad;
+
+      arg = (*arglist)[0];
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
+	  case DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case SFmode: type = V4SF_type_node; size = 4; break;
+	  case DFmode: type = V2DF_type_node; size = 2; break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec_alloc (vec, size);
+      for(i = 0; i < size; i++)
+	{
+	  constructor_elt elt = {NULL_TREE, arg};
+	  vec->quick_push (elt);
+	}
+	return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      enum machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("vec_extract only accepts 2 arguments");
+	  return error_mark_node;
+	}
+
+      arg2 = (*arglist)[1];
+      arg1 = (*arglist)[0];
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad; 
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad; 
+
+      /* If we are targeting little-endian, but -maltivec=be has been
+	 specified to override the element order, adjust the element
+	 number accordingly.  */
+      if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2)
+	{
+	  unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1;
+	  arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2),
+				  build_int_cstu (TREE_TYPE (arg2), last_elem),
+				  arg2);
+	}
+
+      /* If we can use the VSX xxpermdi instruction, use that for extract.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST
+	  && TREE_INT_CST_HIGH (arg2) == 0
+	  && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+	{
+	  tree call = NULL_TREE;
+
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+
+	  if (call)
+	    return build_call_expr (call, 2, arg1, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_MEM_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST
+	       && TREE_INT_CST_HIGH (arg2) == 0
+	       && TREE_INT_CST_LOW (arg2) == 0)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+	  return build_call_expr (call, 2, arg1, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      enum machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("vec_insert only accepts 3 arguments");
+	  return error_mark_node;
+	}
+
+      arg0 = (*arglist)[0];
+      arg1 = (*arglist)[1];
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = (*arglist)[2];
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad; 
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad; 
+
+      /* If we are targeting little-endian, but -maltivec=be has been
+	 specified to override the element order, adjust the element
+	 number accordingly.  */
+      if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2)
+	{
+	  unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1;
+	  arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2),
+				  build_int_cstu (TREE_TYPE (arg2), last_elem),
+				  arg2);
+	}
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST
+	  && TREE_INT_CST_HIGH (arg2) == 0
+	  && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+	{
+	  tree call = NULL_TREE;
+
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST
+	       && TREE_INT_CST_HIGH (arg2) == 0
+	       && TREE_INT_CST_LOW (arg2) == 0)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+		     convert (TREE_TYPE (stmt), arg0));
+      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = (*arglist)[n];
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= 3)
+        abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+          type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+          if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing arg %d of %qE discards qualifiers from"
+		        "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (n == 0)
+    abort ();
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  for (desc = altivec_overloaded_builtins;
+       desc->code && desc->code != fcode; desc++)
+    continue;
+
+  /* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in
+     the opX fields.  */
+  for (; desc->code == fcode; desc++)
+    if ((desc->op1 == RS6000_BTI_NOT_OPAQUE
+	 || rs6000_builtin_type_compatible (types[0], desc->op1))
+	&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
+	    || rs6000_builtin_type_compatible (types[1], desc->op2))
+	&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
+	    || rs6000_builtin_type_compatible (types[2], desc->op3))
+	&& rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+      return altivec_build_resolved_builtin (args, n, desc);
+
+ bad:
+  error ("invalid parameter combination for AltiVec intrinsic");
+  return error_mark_node;
+}
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-cpus.def b/gcc-4.9/gcc/config/rs6000/rs6000-cpus.def
new file mode 100644
index 000000000..b17fd0d72
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-cpus.def
@@ -0,0 +1,191 @@
+/* IBM RS/6000 CPU names..
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ISA masks.  */
+#ifndef ISA_2_1_MASKS
+#define ISA_2_1_MASKS		OPTION_MASK_MFCRF
+#define ISA_2_2_MASKS		(ISA_2_1_MASKS | OPTION_MASK_POPCNTB)
+#define ISA_2_4_MASKS		(ISA_2_2_MASKS | OPTION_MASK_FPRND)
+
+  /* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and don't add
+     ALTIVEC, since in general it isn't a win on power6.  In ISA 2.04, fsel,
+     fre, fsqrt, etc. were no longer documented as optional.  Group masks by
+     server and embedded. */
+#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_4_MASKS				\
+				 | OPTION_MASK_CMPB			\
+				 | OPTION_MASK_RECIP_PRECISION		\
+				 | OPTION_MASK_PPC_GFXOPT		\
+				 | OPTION_MASK_PPC_GPOPT)
+
+#define ISA_2_5_MASKS_SERVER	(ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_DFP)
+
+  /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
+     altivec is a win so enable it.  */
+  /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until
+     PR 58587 is fixed.  */
+#define ISA_2_6_MASKS_EMBEDDED	(ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD)
+#define ISA_2_6_MASKS_SERVER	(ISA_2_5_MASKS_SERVER			\
+				 | OPTION_MASK_POPCNTD			\
+				 | OPTION_MASK_ALTIVEC			\
+				 | OPTION_MASK_VSX)
+
+/* For now, don't provide an embedded version of ISA 2.07.  */
+#define ISA_2_7_MASKS_SERVER	(ISA_2_6_MASKS_SERVER			\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
+				 | OPTION_MASK_CRYPTO			\
+				 | OPTION_MASK_DIRECT_MOVE		\
+				 | OPTION_MASK_HTM			\
+				 | OPTION_MASK_QUAD_MEMORY		\
+  				 | OPTION_MASK_QUAD_MEMORY_ATOMIC)
+
+#define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
+
+/* Deal with ports that do not have -mstrict-align.  */
+#ifdef OPTION_MASK_STRICT_ALIGN
+#define OPTION_MASK_STRICT_ALIGN_OPTIONAL OPTION_MASK_STRICT_ALIGN
+#else
+#define OPTION_MASK_STRICT_ALIGN 0
+#define OPTION_MASK_STRICT_ALIGN_OPTIONAL 0
+#ifndef MASK_STRICT_ALIGN
+#define MASK_STRICT_ALIGN 0
+#endif
+#endif
+
+/* Mask of all options to set the default isa flags based on -mcpu=<xxx>.  */
+#define POWERPC_MASKS		(OPTION_MASK_ALTIVEC			\
+				 | OPTION_MASK_CMPB			\
+				 | OPTION_MASK_CRYPTO			\
+				 | OPTION_MASK_DFP			\
+				 | OPTION_MASK_DIRECT_MOVE		\
+				 | OPTION_MASK_DLMZB			\
+				 | OPTION_MASK_FPRND			\
+				 | OPTION_MASK_HTM			\
+				 | OPTION_MASK_ISEL			\
+				 | OPTION_MASK_MFCRF			\
+				 | OPTION_MASK_MFPGPR			\
+				 | OPTION_MASK_MULHW			\
+				 | OPTION_MASK_NO_UPDATE		\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
+				 | OPTION_MASK_POPCNTB			\
+				 | OPTION_MASK_POPCNTD			\
+				 | OPTION_MASK_POWERPC64		\
+				 | OPTION_MASK_PPC_GFXOPT		\
+				 | OPTION_MASK_PPC_GPOPT		\
+				 | OPTION_MASK_QUAD_MEMORY		\
+				 | OPTION_MASK_RECIP_PRECISION		\
+				 | OPTION_MASK_SOFT_FLOAT		\
+				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_VSX_TIMODE)
+
+#endif
+
+/* This table occasionally claims that a processor does not support a
+   particular feature even though it does, but the feature is slower than the
+   alternative.  Thus, it shouldn't be relied on as a complete description of
+   the processor's support.
+
+   Please keep this list in order, and don't forget to update the documentation
+   in invoke.texi when adding a new processor or flag.
+
+   Before including this file, define a macro:
+
+   RS6000_CPU (NAME, CPU, FLAGS)
+
+   where the arguments are the fields of struct rs6000_ptt.  */
+
+RS6000_CPU ("401", PROCESSOR_PPC403, MASK_SOFT_FLOAT)
+RS6000_CPU ("403", PROCESSOR_PPC403, MASK_SOFT_FLOAT | MASK_STRICT_ALIGN)
+RS6000_CPU ("405", PROCESSOR_PPC405, MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("405fp", PROCESSOR_PPC405, MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("440", PROCESSOR_PPC440, MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("440fp", PROCESSOR_PPC440, MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("464", PROCESSOR_PPC440, MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("464fp", PROCESSOR_PPC440, MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("476", PROCESSOR_PPC476,
+	    MASK_SOFT_FLOAT | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB
+	    | MASK_FPRND | MASK_CMPB | MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("476fp", PROCESSOR_PPC476,
+	    MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND
+	    | MASK_CMPB | MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("505", PROCESSOR_MPCCORE, 0)
+RS6000_CPU ("601", PROCESSOR_PPC601, MASK_MULTIPLE | MASK_STRING)
+RS6000_CPU ("602", PROCESSOR_PPC603, MASK_PPC_GFXOPT)
+RS6000_CPU ("603", PROCESSOR_PPC603, MASK_PPC_GFXOPT)
+RS6000_CPU ("603e", PROCESSOR_PPC603, MASK_PPC_GFXOPT)
+RS6000_CPU ("604", PROCESSOR_PPC604, MASK_PPC_GFXOPT)
+RS6000_CPU ("604e", PROCESSOR_PPC604e, MASK_PPC_GFXOPT)
+RS6000_CPU ("620", PROCESSOR_PPC620, MASK_PPC_GFXOPT | MASK_POWERPC64)
+RS6000_CPU ("630", PROCESSOR_PPC630, MASK_PPC_GFXOPT | MASK_POWERPC64)
+RS6000_CPU ("740", PROCESSOR_PPC750, MASK_PPC_GFXOPT)
+RS6000_CPU ("7400", PROCESSOR_PPC7400, POWERPC_7400_MASK)
+RS6000_CPU ("7450", PROCESSOR_PPC7450, POWERPC_7400_MASK)
+RS6000_CPU ("750", PROCESSOR_PPC750, MASK_PPC_GFXOPT)
+RS6000_CPU ("801", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT)
+RS6000_CPU ("821", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT)
+RS6000_CPU ("823", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT)
+RS6000_CPU ("8540", PROCESSOR_PPC8540, MASK_STRICT_ALIGN | MASK_ISEL)
+RS6000_CPU ("8548", PROCESSOR_PPC8548, MASK_STRICT_ALIGN | MASK_ISEL)
+RS6000_CPU ("a2", PROCESSOR_PPCA2,
+	    MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_POPCNTB | MASK_CMPB
+	    | MASK_NO_UPDATE)
+RS6000_CPU ("e300c2", PROCESSOR_PPCE300C2, MASK_SOFT_FLOAT)
+RS6000_CPU ("e300c3", PROCESSOR_PPCE300C3, 0)
+RS6000_CPU ("e500mc", PROCESSOR_PPCE500MC, MASK_PPC_GFXOPT | MASK_ISEL)
+RS6000_CPU ("e500mc64", PROCESSOR_PPCE500MC64,
+	    MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_ISEL)
+RS6000_CPU ("e5500", PROCESSOR_PPCE5500,
+	    MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_ISEL)
+RS6000_CPU ("e6500", PROCESSOR_PPCE6500, POWERPC_7400_MASK | MASK_POWERPC64
+	    | MASK_MFCRF | MASK_ISEL)
+RS6000_CPU ("860", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT)
+RS6000_CPU ("970", PROCESSOR_POWER4,
+	    POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64)
+RS6000_CPU ("cell", PROCESSOR_CELL,
+	    POWERPC_7400_MASK  | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64)
+RS6000_CPU ("ec603e", PROCESSOR_PPC603, MASK_SOFT_FLOAT)
+RS6000_CPU ("G3", PROCESSOR_PPC750, MASK_PPC_GFXOPT)
+RS6000_CPU ("G4",  PROCESSOR_PPC7450, POWERPC_7400_MASK)
+RS6000_CPU ("G5", PROCESSOR_POWER4,
+	    POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64)
+RS6000_CPU ("titan", PROCESSOR_TITAN, MASK_MULHW | MASK_DLMZB)
+RS6000_CPU ("power3", PROCESSOR_PPC630, MASK_PPC_GFXOPT | MASK_POWERPC64)
+RS6000_CPU ("power4", PROCESSOR_POWER4, MASK_POWERPC64 | MASK_PPC_GPOPT
+	    | MASK_PPC_GFXOPT | MASK_MFCRF)
+RS6000_CPU ("power5", PROCESSOR_POWER5, MASK_POWERPC64 | MASK_PPC_GPOPT
+	    | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB)
+RS6000_CPU ("power5+", PROCESSOR_POWER5, MASK_POWERPC64 | MASK_PPC_GPOPT
+	    | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND)
+RS6000_CPU ("power6", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT
+	    | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND
+	    | MASK_CMPB | MASK_DFP | MASK_RECIP_PRECISION)
+RS6000_CPU ("power6x", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT
+	    | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND
+	    | MASK_CMPB | MASK_DFP | MASK_MFPGPR | MASK_RECIP_PRECISION)
+RS6000_CPU ("power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
+	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
+	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
+	    | MASK_VSX | MASK_RECIP_PRECISION)
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
+RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
+RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
+RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-linux.c b/gcc-4.9/gcc/config/rs6000/rs6000-linux.c
new file mode 100644
index 000000000..c41dbbd2b
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-linux.c
@@ -0,0 +1,38 @@
+/* Functions for Linux on PowerPC.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tm_p.h"
+
+/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
+
+bool
+rs6000_linux_float_exceptions_rounding_supported_p (void)
+{
+  /* glibc has support for exceptions and rounding modes for software
+     floating point.  */
+  if (OPTION_GLIBC)
+    return true;
+  else
+    return TARGET_DF_INSN;
+}
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-modes.def b/gcc-4.9/gcc/config/rs6000/rs6000-modes.def
new file mode 100644
index 000000000..30a4dd378
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-modes.def
@@ -0,0 +1,49 @@
+/* Definitions of target machine for GNU compiler, for IBM RS/6000.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point.  ABI_V4 uses IEEE quad, AIX/Darwin
+   adjust this in rs6000_option_override_internal.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Add any extra modes needed to represent the condition code.
+
+   For the RS/6000, we need separate modes when unsigned (logical) comparisons
+   are being done and we need a separate mode for floating-point.  We also
+   use a mode for the case when we are comparing the results of two
+   comparisons, as then only the EQ bit is valid in the register.  */
+
+CC_MODE (CCUNS);
+CC_MODE (CCFP);
+CC_MODE (CCEQ);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI  V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI  V4SI V2DI */
+VECTOR_MODES (INT, 32);       /* V32QI V16HI V8SI V4DI */
+VECTOR_MODE (INT, DI, 1);
+VECTOR_MODE (INT, TI, 1);
+VECTOR_MODES (FLOAT, 8);      /*             V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF  V4SF V2DF */
+VECTOR_MODES (FLOAT, 32);     /*       V16HF V8SF V4DF */
+
+/* Replacement for TImode that only is allowed in GPRs.  We also use PTImode
+   for quad memory atomic operations to force getting an even/odd register
+   combination.  */
+PARTIAL_INT_MODE (TI, 128, PTI);
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-opts.h b/gcc-4.9/gcc/config/rs6000/rs6000-opts.h
new file mode 100644
index 000000000..72151d884
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-opts.h
@@ -0,0 +1,160 @@
+/* Definitions of target machine needed for option handling for GNU compiler,
+   for IBM RS/6000.
+   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef RS6000_OPTS_H
+#define RS6000_OPTS_H
+
+/* Processor type.  Order must match cpu attribute in MD file.  */
+enum processor_type
+ {
+   PROCESSOR_PPC601,
+   PROCESSOR_PPC603,
+   PROCESSOR_PPC604,
+   PROCESSOR_PPC604e,
+   PROCESSOR_PPC620,
+   PROCESSOR_PPC630,
+
+   PROCESSOR_PPC750,
+   PROCESSOR_PPC7400,
+   PROCESSOR_PPC7450,
+
+   PROCESSOR_PPC403,
+   PROCESSOR_PPC405,
+   PROCESSOR_PPC440,
+   PROCESSOR_PPC476,
+
+   PROCESSOR_PPC8540,
+   PROCESSOR_PPC8548,
+   PROCESSOR_PPCE300C2,
+   PROCESSOR_PPCE300C3,
+   PROCESSOR_PPCE500MC,
+   PROCESSOR_PPCE500MC64,
+   PROCESSOR_PPCE5500,
+   PROCESSOR_PPCE6500,
+
+   PROCESSOR_POWER4,
+   PROCESSOR_POWER5,
+   PROCESSOR_POWER6,
+   PROCESSOR_POWER7,
+   PROCESSOR_POWER8,
+
+   PROCESSOR_RS64A,
+   PROCESSOR_MPCCORE,
+   PROCESSOR_CELL,
+   PROCESSOR_PPCA2,
+   PROCESSOR_TITAN
+};
+
+
+/* FP processor type.  */
+enum fpu_type_t
+{
+  FPU_NONE,			/* No FPU */
+  FPU_SF_LITE,			/* Limited Single Precision FPU */
+  FPU_DF_LITE,			/* Limited Double Precision FPU */
+  FPU_SF_FULL,			/* Full Single Precision FPU */
+  FPU_DF_FULL			/* Full Double Single Precision FPU */
+};
+
+/* Types of costly dependences.  */
+enum rs6000_dependence_cost
+{
+  max_dep_latency = 1000,
+  no_dep_costly,
+  all_deps_costly,
+  true_store_to_load_dep_costly,
+  store_to_load_dep_costly
+};
+
+/* Types of nop insertion schemes in sched target hook sched_finish.  */
+enum rs6000_nop_insertion
+{
+  sched_finish_regroup_exact = 1000,
+  sched_finish_pad_groups,
+  sched_finish_none
+};
+
+/* Dispatch group termination caused by an insn.  */
+enum group_termination
+{
+  current_group,
+  previous_group
+};
+
+/* Enumeration to give which calling sequence to use.  */
+enum rs6000_abi {
+  ABI_NONE,
+  ABI_AIX,			/* IBM's AIX, or Linux ELFv1 */
+  ABI_ELFv2,			/* Linux ELFv2 ABI */
+  ABI_V4,			/* System V.4/eabi */
+  ABI_DARWIN			/* Apple's Darwin (OS X kernel) */
+};
+
+/* Small data support types.  */
+enum rs6000_sdata_type {
+  SDATA_NONE,			/* No small data support.  */
+  SDATA_DATA,			/* Just put data in .sbss/.sdata, don't use relocs.  */
+  SDATA_SYSV,			/* Use r13 to point to .sdata/.sbss.  */
+  SDATA_EABI			/* Use r13 like above, r2 points to .sdata2/.sbss2.  */
+};
+
+/* Type of traceback to use.  */
+enum  rs6000_traceback_type {
+  traceback_default = 0,
+  traceback_none,
+  traceback_part,
+  traceback_full
+};
+
+/* Code model for 64-bit linux.
+   small: 16-bit toc offsets.
+   medium: 32-bit toc offsets, static data and code within 2G of TOC pointer.
+   large: 32-bit toc offsets, no limit on static data and code.  */
+enum rs6000_cmodel {
+  CMODEL_SMALL,
+  CMODEL_MEDIUM,
+  CMODEL_LARGE
+};
+
+/* Describe which vector unit to use for a given machine mode.  The
+   VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
+   P8_VECTOR are contiguous.  */
+enum rs6000_vector {
+  VECTOR_NONE,			/* Type is not  a vector or not supported */
+  VECTOR_ALTIVEC,		/* Use altivec for vector processing */
+  VECTOR_VSX,			/* Use VSX for vector processing */
+  VECTOR_P8_VECTOR,		/* Use ISA 2.07 VSX for vector processing */
+  VECTOR_PAIRED,		/* Use paired floating point for vectors */
+  VECTOR_SPE,			/* Use SPE for vector processing */
+  VECTOR_OTHER			/* Some other vector unit */
+};
+
+/* No enumeration is defined to index the -mcpu= values (entries in
+   processor_target_table), with the type int being used instead, but
+   we need to distinguish the special "native" value.  */
+#define RS6000_CPU_OPTION_NATIVE -1
+
+#endif
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
new file mode 100644
index 000000000..69bb26331
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
@@ -0,0 +1,224 @@
+/* Definitions of target machine for GNU compiler, for IBM RS/6000.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RS6000_PROTOS_H
+#define GCC_RS6000_PROTOS_H
+
+/* Declare functions in rs6000.c */
+
+#ifdef RTX_CODE
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
+				  tree, enum machine_mode);
+#endif /* TREE_CODE */
+
+extern bool easy_altivec_constant (rtx, enum machine_mode);
+extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
+extern bool macho_lo_sum_memory_operand (rtx, enum machine_mode);
+extern int num_insns_constant (rtx, enum machine_mode);
+extern int num_insns_constant_wide (HOST_WIDE_INT);
+extern int small_data_operand (rtx, enum machine_mode);
+extern bool mem_operand_gpr (rtx, enum machine_mode);
+extern bool toc_relative_expr_p (const_rtx, bool);
+extern bool invalid_e500_subreg (rtx, enum machine_mode);
+extern void validate_condition_mode (enum rtx_code, enum machine_mode);
+extern bool legitimate_constant_pool_address_p (const_rtx, enum machine_mode,
+						bool);
+extern bool legitimate_indirect_address_p (rtx, int);
+extern bool legitimate_indexed_address_p (rtx, int);
+extern bool avoiding_indexed_address_p (enum machine_mode);
+
+extern rtx rs6000_got_register (rtx);
+extern rtx find_addr_reg (rtx);
+extern rtx gen_easy_altivec_constant (rtx);
+extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
+extern bool rs6000_move_128bit_ok_p (rtx []);
+extern bool rs6000_split_128bit_ok_p (rtx []);
+extern void rs6000_expand_vector_init (rtx, rtx);
+extern void paired_expand_vector_init (rtx, rtx);
+extern void rs6000_expand_vector_set (rtx, rtx, int);
+extern void rs6000_expand_vector_extract (rtx, rtx, int);
+extern bool altivec_expand_vec_perm_const (rtx op[4]);
+extern void altivec_expand_vec_perm_le (rtx op[4]);
+extern bool rs6000_expand_vec_perm_const (rtx op[4]);
+extern void altivec_expand_lvx_be (rtx, rtx, enum machine_mode, unsigned);
+extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned);
+extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned);
+extern void rs6000_expand_extract_even (rtx, rtx, rtx);
+extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
+extern void build_mask64_2_operands (rtx, rtx *);
+extern int expand_block_clear (rtx[]);
+extern int expand_block_move (rtx[]);
+extern const char * rs6000_output_load_multiple (rtx[]);
+extern int includes_lshift_p (rtx, rtx);
+extern int includes_rshift_p (rtx, rtx);
+extern int includes_rldic_lshift_p (rtx, rtx);
+extern int includes_rldicr_lshift_p (rtx, rtx);
+extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
+extern int registers_ok_for_quad_peep (rtx, rtx);
+extern int mems_ok_for_quad_peep (rtx, rtx);
+extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
+extern bool fusion_gpr_load_p (rtx *, bool);
+extern void expand_fusion_gpr_load (rtx *);
+extern const char *emit_fusion_gpr_load (rtx *);
+extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
+							    enum reg_class);
+extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
+							    enum machine_mode,
+							    rtx);
+extern bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class,
+						   enum reg_class,
+						   enum machine_mode);
+extern bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
+						    enum machine_mode,
+						    enum reg_class);
+extern void rs6000_secondary_reload_inner (rtx, rtx, rtx, bool);
+extern void rs6000_secondary_reload_gpr (rtx, rtx, rtx, bool);
+extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
+                                         rtx, rtx, rtx);
+extern void paired_expand_vector_move (rtx operands[]);
+
+
+extern int ccr_bit (rtx, int);
+extern int extract_MB (rtx);
+extern int extract_ME (rtx);
+extern void rs6000_output_function_entry (FILE *, const char *);
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern enum rtx_code rs6000_reverse_condition (enum machine_mode,
+					       enum rtx_code);
+extern void rs6000_emit_sISEL (enum machine_mode, rtx[]);
+extern void rs6000_emit_sCOND (enum machine_mode, rtx[]);
+extern void rs6000_emit_cbranch (enum machine_mode, rtx[]);
+extern char * output_cbranch (rtx, const char *, int, rtx);
+extern char * output_e500_flip_gt_bit (rtx, rtx);
+extern const char * output_probe_stack_range (rtx, rtx);
+extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int);
+extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
+extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
+extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
+extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
+extern void rs6000_expand_atomic_exchange (rtx op[]);
+extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool);
+extern void rs6000_emit_swrsqrt (rtx, rtx);
+extern void output_toc (FILE *, rtx, int, enum machine_mode);
+extern rtx rs6000_longcall_ref (rtx);
+extern void rs6000_fatal_bad_address (rtx);
+extern rtx create_TOC_reference (rtx, rtx);
+extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
+extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
+extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
+extern enum machine_mode rs6000_secondary_memory_needed_mode (enum
+							      machine_mode);
+extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
+						    int, int, int, int *);
+extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx,
+						bool, bool);
+extern rtx rs6000_find_base_term (rtx);
+extern rtx rs6000_return_addr (int, rtx);
+extern void rs6000_output_symbol_ref (FILE*, rtx);
+extern HOST_WIDE_INT rs6000_initial_elimination_offset (int, int);
+extern void rs6000_emit_popcount (rtx, rtx);
+extern void rs6000_emit_parity (rtx, rtx);
+
+extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode,
+						   rtx);
+extern rtx rs6000_address_for_fpconvert (rtx);
+extern rtx rs6000_address_for_altivec (rtx);
+extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
+extern int rs6000_loop_align (rtx);
+extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool, rtx);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align);
+extern unsigned int rs6000_special_round_type_align (tree, unsigned int,
+						     unsigned int);
+extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int,
+							    unsigned int);
+extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
+extern rtx rs6000_libcall_value (enum machine_mode);
+extern rtx rs6000_va_arg (tree, tree);
+extern int function_ok_for_sibcall (tree);
+extern int rs6000_reg_parm_stack_space (tree);
+extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
+extern bool rs6000_elf_in_small_data_p (const_tree);
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction' */
+extern enum direction function_arg_padding (enum machine_mode, const_tree);
+#endif /* ARGS_SIZE_RTX */
+
+#endif /* TREE_CODE */
+
+extern int direct_return (void);
+extern int first_reg_to_save (void);
+extern int first_fp_reg_to_save (void);
+extern void output_ascii (FILE *, const char *, int);
+extern void rs6000_gen_section_name (char **, const char *, const char *);
+extern void output_function_profiler (FILE *, int);
+extern void output_profile_hook  (int);
+extern int rs6000_trampoline_size (void);
+extern alias_set_type get_TOC_alias_set (void);
+extern void rs6000_emit_prologue (void);
+extern void rs6000_emit_load_toc_table (int);
+extern unsigned int rs6000_dbx_register_number (unsigned int);
+extern void rs6000_emit_epilogue (int);
+extern void rs6000_emit_eh_reg_restore (rtx, rtx);
+extern const char * output_isel (rtx *);
+extern void rs6000_call_aix (rtx, rtx, rtx, rtx);
+extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx);
+extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
+extern void get_ppc476_thunk_name (char name[32]);
+extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins);
+extern HOST_WIDE_INT rs6000_builtin_mask_calculate (void);
+
+/* Declare functions in rs6000-c.c */
+
+extern void rs6000_pragma_longcall (struct cpp_reader *);
+extern void rs6000_cpu_cpp_builtins (struct cpp_reader *);
+#ifdef TREE_CODE
+extern bool rs6000_pragma_target_parse (tree, tree);
+#endif
+extern void rs6000_target_modify_macros (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+extern void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT,
+						HOST_WIDE_INT);
+
+#if TARGET_MACHO
+char *output_call (rtx, rtx *, int, int);
+#endif
+
+#ifdef NO_DOLLAR_IN_LABEL
+const char * rs6000_xcoff_strip_dollar (const char *);
+#endif
+
+void rs6000_final_prescan_insn (rtx, rtx *operand, int num_operands);
+
+extern bool rs6000_hard_regno_mode_ok_p[][FIRST_PSEUDO_REGISTER];
+extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES];
+extern unsigned char rs6000_hard_regno_nregs[][FIRST_PSEUDO_REGISTER];
+
+extern bool rs6000_linux_float_exceptions_rounding_supported_p (void);
+#endif  /* rs6000-protos.h */
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-tables.opt b/gcc-4.9/gcc/config/rs6000/rs6000-tables.opt
new file mode 100644
index 000000000..85678d2bc
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-tables.opt
@@ -0,0 +1,190 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from rs6000-cpus.def.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(rs6000_cpu_opt_value) Type(int)
+Known CPUs (for use with the -mcpu= and -mtune= options):
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(native) Value(RS6000_CPU_OPTION_NATIVE) DriverOnly
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(401) Value(0)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(403) Value(1)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(405) Value(2)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(405fp) Value(3)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(440) Value(4)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(440fp) Value(5)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(464) Value(6)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(464fp) Value(7)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(476) Value(8)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(476fp) Value(9)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(505) Value(10)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(601) Value(11)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(602) Value(12)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(603) Value(13)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(603e) Value(14)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(604) Value(15)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(604e) Value(16)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(620) Value(17)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(630) Value(18)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(740) Value(19)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(7400) Value(20)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(7450) Value(21)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(750) Value(22)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(801) Value(23)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(821) Value(24)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(823) Value(25)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(8540) Value(26)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(8548) Value(27)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(a2) Value(28)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(e300c2) Value(29)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(e300c3) Value(30)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(e500mc) Value(31)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(e500mc64) Value(32)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(e5500) Value(33)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(e6500) Value(34)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(860) Value(35)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(970) Value(36)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(cell) Value(37)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(ec603e) Value(38)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(G3) Value(39)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(G4) Value(40)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(G5) Value(41)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(titan) Value(42)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power3) Value(43)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power4) Value(44)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power5) Value(45)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power5+) Value(46)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power6) Value(47)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power6x) Value(48)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power7) Value(49)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(power8) Value(50)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(powerpc) Value(51)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(powerpc64) Value(52)
+
+EnumValue
+Enum(rs6000_cpu_opt_value) String(rs64) Value(53)
+
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c
new file mode 100644
index 000000000..fc837352c
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.c
@@ -0,0 +1,32834 @@
+/* Subroutines used for code generation on IBM RS/6000.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "output.h"
+#include "dbxout.h"
+#include "basic-block.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "common/common-target.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "cfgloop.h"
+#include "sched-int.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "intl.h"
+#include "params.h"
+#include "tm-constrs.h"
+#include "ira.h"
+#include "opts.h"
+#include "tree-vectorizer.h"
+#include "dumpfile.h"
+#include "cgraph.h"
+#include "target-globals.h"
+#if TARGET_XCOFF
+#include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
+#endif
+#if TARGET_MACHO
+#include "gstab.h"  /* for N_SLINE */
+#endif
+
+#ifndef TARGET_NO_PROTOTYPE
+#define TARGET_NO_PROTOTYPE 0
+#endif
+
+#define min(A,B)	((A) < (B) ? (A) : (B))
+#define max(A,B)	((A) > (B) ? (A) : (B))
+
+/* Structure used to define the rs6000 stack */
+typedef struct rs6000_stack {
+  int reload_completed;		/* stack info won't change from here on */
+  int first_gp_reg_save;	/* first callee saved GP register used */
+  int first_fp_reg_save;	/* first callee saved FP register used */
+  int first_altivec_reg_save;	/* first callee saved AltiVec register used */
+  int lr_save_p;		/* true if the link reg needs to be saved */
+  int cr_save_p;		/* true if the CR reg needs to be saved */
+  unsigned int vrsave_mask;	/* mask of vec registers to save */
+  int push_p;			/* true if we need to allocate stack space */
+  int calls_p;			/* true if the function makes any calls */
+  int world_save_p;		/* true if we're saving *everything*:
+				   r13-r31, cr, f14-f31, vrsave, v20-v31  */
+  enum rs6000_abi abi;		/* which ABI to use */
+  int gp_save_offset;		/* offset to save GP regs from initial SP */
+  int fp_save_offset;		/* offset to save FP regs from initial SP */
+  int altivec_save_offset;	/* offset to save AltiVec regs from initial SP */
+  int lr_save_offset;		/* offset to save LR from initial SP */
+  int cr_save_offset;		/* offset to save CR from initial SP */
+  int vrsave_save_offset;	/* offset to save VRSAVE from initial SP */
+  int spe_gp_save_offset;	/* offset to save spe 64-bit gprs  */
+  int varargs_save_offset;	/* offset to save the varargs registers */
+  int ehrd_offset;		/* offset to EH return data */
+  int ehcr_offset;		/* offset to EH CR field data */
+  int reg_size;			/* register size (4 or 8) */
+  HOST_WIDE_INT vars_size;	/* variable save area size */
+  int parm_size;		/* outgoing parameter size */
+  int save_size;		/* save area size */
+  int fixed_size;		/* fixed size of stack frame */
+  int gp_size;			/* size of saved GP registers */
+  int fp_size;			/* size of saved FP registers */
+  int altivec_size;		/* size of saved AltiVec registers */
+  int cr_size;			/* size to hold CR if not in save_size */
+  int vrsave_size;		/* size to hold VRSAVE if not in save_size */
+  int altivec_padding_size;	/* size of altivec alignment padding if
+				   not in save_size */
+  int spe_gp_size;		/* size of 64-bit GPR save size for SPE */
+  int spe_padding_size;
+  HOST_WIDE_INT total_size;	/* total bytes allocated for stack */
+  int spe_64bit_regs_used;
+  int savres_strategy;
+} rs6000_stack_t;
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Some local-dynamic symbol.  */
+  const char *some_ld_name;
+  /* Whether the instruction chain has been scanned already.  */
+  int insn_chain_scanned_p;
+  /* Flags if __builtin_return_address (n) with n >= 1 was used.  */
+  int ra_needs_full_frame;
+  /* Flags if __builtin_return_address (0) was used.  */
+  int ra_need_lr;
+  /* Cache lr_save_p after expansion of builtin_eh_return.  */
+  int lr_save_state;
+  /* Whether we need to save the TOC to the reserved stack location in the
+     function prologue.  */
+  bool save_toc_in_prologue;
+  /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
+     varargs save area.  */
+  HOST_WIDE_INT varargs_save_offset;
+  /* Temporary stack slot to use for SDmode copies.  This slot is
+     64-bits wide and is allocated early enough so that the offset
+     does not overflow the 16-bit load/store offset field.  */
+  rtx sdmode_stack_slot;
+  /* Flag if r2 setup is needed with ELFv2 ABI.  */
+  bool r2_setup_needed;
+} machine_function;
+
+/* Support targetm.vectorize.builtin_mask_for_load.  */
+static GTY(()) tree altivec_builtin_mask_for_load;
+
+/* Set to nonzero once AIX common-mode calls have been defined.  */
+static GTY(()) int common_mode_defined;
+
+/* Label number of label created for -mrelocatable, to call to so we can
+   get the address of the GOT section */
+static int rs6000_pic_labelno;
+
+#ifdef USING_ELFOS_H
+/* Counter for labels which are to be placed in .fixup.  */
+int fixuplabelno = 0;
+#endif
+
+/* Whether to use variant of AIX ABI for PowerPC64 Linux.  */
+int dot_symbols;
+
+/* Specify the machine mode that pointers have.  After generation of rtl, the
+   compiler makes no further distinction between pointers and any other objects
+   of this machine mode.  The type is unsigned since not all things that
+   include rs6000.h also include machmode.h.  */
+unsigned rs6000_pmode;
+
+/* Width in bits of a pointer.  */
+unsigned rs6000_pointer_size;
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+/* Flag whether floating point values have been passed/returned.  */
+static bool rs6000_passes_float;
+/* Flag whether vector values have been passed/returned.  */
+static bool rs6000_passes_vector;
+/* Flag whether small (<= 8 byte) structures have been returned.  */
+static bool rs6000_returns_struct;
+#endif
+
+/* Value is TRUE if register/mode pair is acceptable.  */
+bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
+
+/* Maximum number of registers needed for a given register class and mode.  */
+unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
+
+/* How many registers are needed for a given register and mode.  */
+unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
+
+/* Map register number to register class.  */
+enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
+
+static int dbg_cost_ctrl;
+
+/* Built in types.  */
+tree rs6000_builtin_types[RS6000_BTI_MAX];
+tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
+
+/* Flag to say the TOC is initialized */
+int toc_initialized;
+char toc_label_name[10];
+
+/* Cached value of rs6000_variable_issue. This is cached in
+   rs6000_variable_issue hook and returned from rs6000_sched_reorder2.  */
+static short cached_can_issue_more;
+
+static GTY(()) section *read_only_data_section;
+static GTY(()) section *private_data_section;
+static GTY(()) section *tls_data_section;
+static GTY(()) section *tls_private_data_section;
+static GTY(()) section *read_only_private_data_section;
+static GTY(()) section *sdata2_section;
+static GTY(()) section *toc_section;
+
+struct builtin_description
+{
+  const HOST_WIDE_INT mask;
+  const enum insn_code icode;
+  const char *const name;
+  const enum rs6000_builtins code;
+};
+
+/* Describe the vector unit used for modes.  */
+enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
+enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
+
+/* Register classes for various constraints that are based on the target
+   switches.  */
+enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
+
+/* Describe the alignment of a vector.  */
+int rs6000_vector_align[NUM_MACHINE_MODES];
+
+/* Map selected modes to types for builtins.  */
+static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
+
+/* What modes to automatically generate reciprocal divide estimate (fre) and
+   reciprocal sqrt (frsqrte) for.  */
+unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
+
+/* Masks to determine which reciprocal esitmate instructions to generate
+   automatically.  */
+enum rs6000_recip_mask {
+  RECIP_SF_DIV		= 0x001,	/* Use divide estimate */
+  RECIP_DF_DIV		= 0x002,
+  RECIP_V4SF_DIV	= 0x004,
+  RECIP_V2DF_DIV	= 0x008,
+
+  RECIP_SF_RSQRT	= 0x010,	/* Use reciprocal sqrt estimate.  */
+  RECIP_DF_RSQRT	= 0x020,
+  RECIP_V4SF_RSQRT	= 0x040,
+  RECIP_V2DF_RSQRT	= 0x080,
+
+  /* Various combination of flags for -mrecip=xxx.  */
+  RECIP_NONE		= 0,
+  RECIP_ALL		= (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
+			   | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
+			   | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
+
+  RECIP_HIGH_PRECISION	= RECIP_ALL,
+
+  /* On low precision machines like the power5, don't enable double precision
+     reciprocal square root estimate, since it isn't accurate enough.  */
+  RECIP_LOW_PRECISION	= (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
+};
+
+/* -mrecip options.  */
+static struct
+{
+  const char *string;		/* option name */
+  unsigned int mask;		/* mask bits to set */
+} recip_options[] = {
+  { "all",	 RECIP_ALL },
+  { "none",	 RECIP_NONE },
+  { "div",	 (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
+		  | RECIP_V2DF_DIV) },
+  { "divf",	 (RECIP_SF_DIV | RECIP_V4SF_DIV) },
+  { "divd",	 (RECIP_DF_DIV | RECIP_V2DF_DIV) },
+  { "rsqrt",	 (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
+		  | RECIP_V2DF_RSQRT) },
+  { "rsqrtf",	 (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
+  { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
+};
+
+/* Pointer to function (in rs6000-c.c) that can define or undefine target
+   macros that have changed.  Languages that don't support the preprocessor
+   don't link in rs6000-c.c, so we can't call it directly.  */
+void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+
+/* Simplfy register classes into simpler classifications.  We assume
+   GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+   check for standard register classes (gpr/floating/altivec/vsx) and
+   floating/vector classes (float/altivec/vsx).  */
+
+enum rs6000_reg_type {
+  NO_REG_TYPE,
+  PSEUDO_REG_TYPE,
+  GPR_REG_TYPE,
+  VSX_REG_TYPE,
+  ALTIVEC_REG_TYPE,
+  FPR_REG_TYPE,
+  SPR_REG_TYPE,
+  CR_REG_TYPE,
+  SPE_ACC_TYPE,
+  SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type.  */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+   purpose, floating point, altivec, and VSX registers).  */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+
+/* Register classes we care about in secondary reload or go if legitimate
+   address.  We only need to worry about GPR, FPR, and Altivec registers here,
+   along an ANY field that is the OR of the 3 register classes.  */
+
+enum rs6000_reload_reg_type {
+  RELOAD_REG_GPR,			/* General purpose registers.  */
+  RELOAD_REG_FPR,			/* Traditional floating point regs.  */
+  RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
+  RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
+  N_RELOAD_REG
+};
+
+/* For setting up register classes, loop through the 3 register classes mapping
+   into real registers, and skip the ANY class, which is just an OR of the
+   bits.  */
+#define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
+#define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
+
+/* Map reload register type to a register in the register class.  */
+struct reload_reg_map_type {
+  const char *name;			/* Register class name.  */
+  int reg;				/* Register in the register class.  */
+};
+
+static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
+  { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
+  { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
+  { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
+  { "Any",	-1 },			/* RELOAD_REG_ANY.  */
+};
+
+/* Mask bits for each register class, indexed per mode.  Historically the
+   compiler has been more restrictive which types can do PRE_MODIFY instead of
+   PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
+typedef unsigned char addr_mask_type;
+
+#define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
+#define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
+#define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
+#define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
+#define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
+#define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
+
+/* Register type masks based on the type, of valid addressing modes.  */
+struct rs6000_reg_addr {
+  enum insn_code reload_load;		/* INSN to reload for loading. */
+  enum insn_code reload_store;		/* INSN to reload for storing.  */
+  enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
+  enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
+  enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
+  addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
+};
+
+static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
+
+/* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
+static inline bool
+mode_supports_pre_incdec_p (enum machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
+	  != 0);
+}
+
+/* Helper function to say whether a mode supports PRE_MODIFY.  */
+static inline bool
+mode_supports_pre_modify_p (enum machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
+	  != 0);
+}
+
+
+/* Target cpu costs.  */
+
+struct processor_costs {
+  const int mulsi;	  /* cost of SImode multiplication.  */
+  const int mulsi_const;  /* cost of SImode multiplication by constant.  */
+  const int mulsi_const9; /* cost of SImode mult by short constant.  */
+  const int muldi;	  /* cost of DImode multiplication.  */
+  const int divsi;	  /* cost of SImode division.  */
+  const int divdi;	  /* cost of DImode division.  */
+  const int fp;		  /* cost of simple SFmode and DFmode insns.  */
+  const int dmul;	  /* cost of DFmode multiplication (and fmadd).  */
+  const int sdiv;	  /* cost of SFmode division (fdivs).  */
+  const int ddiv;	  /* cost of DFmode division (fdiv).  */
+  const int cache_line_size;    /* cache line size in bytes. */
+  const int l1_cache_size;	/* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size;	/* size of l2 cache, in kilobytes.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+					operations.  */
+};
+
+const struct processor_costs *rs6000_cost;
+
+/* Processor costs (relative to an add) */
+
+/* Instruction size costs on 32bit processors.  */
+static const
+struct processor_costs size32_cost = {
+  COSTS_N_INSNS (1),    /* mulsi */
+  COSTS_N_INSNS (1),    /* mulsi_const */
+  COSTS_N_INSNS (1),    /* mulsi_const9 */
+  COSTS_N_INSNS (1),    /* muldi */
+  COSTS_N_INSNS (1),    /* divsi */
+  COSTS_N_INSNS (1),    /* divdi */
+  COSTS_N_INSNS (1),    /* fp */
+  COSTS_N_INSNS (1),    /* dmul */
+  COSTS_N_INSNS (1),    /* sdiv */
+  COSTS_N_INSNS (1),    /* ddiv */
+  32,
+  0,
+  0,
+  0,
+};
+
+/* Instruction size costs on 64bit processors.  */
+static const
+struct processor_costs size64_cost = {
+  COSTS_N_INSNS (1),    /* mulsi */
+  COSTS_N_INSNS (1),    /* mulsi_const */
+  COSTS_N_INSNS (1),    /* mulsi_const9 */
+  COSTS_N_INSNS (1),    /* muldi */
+  COSTS_N_INSNS (1),    /* divsi */
+  COSTS_N_INSNS (1),    /* divdi */
+  COSTS_N_INSNS (1),    /* fp */
+  COSTS_N_INSNS (1),    /* dmul */
+  COSTS_N_INSNS (1),    /* sdiv */
+  COSTS_N_INSNS (1),    /* ddiv */
+  128,
+  0,
+  0,
+  0,
+};
+
+/* Instruction costs on RS64A processors.  */
+static const
+struct processor_costs rs64a_cost = {
+  COSTS_N_INSNS (20),   /* mulsi */
+  COSTS_N_INSNS (12),   /* mulsi_const */
+  COSTS_N_INSNS (8),    /* mulsi_const9 */
+  COSTS_N_INSNS (34),   /* muldi */
+  COSTS_N_INSNS (65),   /* divsi */
+  COSTS_N_INSNS (67),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (31),   /* sdiv */
+  COSTS_N_INSNS (31),   /* ddiv */
+  128,			/* cache line size */
+  128,			/* l1 cache */
+  2048,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on MPCCORE processors.  */
+static const
+struct processor_costs mpccore_cost = {
+  COSTS_N_INSNS (2),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (2),    /* muldi */
+  COSTS_N_INSNS (6),    /* divsi */
+  COSTS_N_INSNS (6),    /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (10),   /* sdiv */
+  COSTS_N_INSNS (17),   /* ddiv */
+  32,			/* cache line size */
+  4,			/* l1 cache */
+  16,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC403 processors.  */
+static const
+struct processor_costs ppc403_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (33),   /* divsi */
+  COSTS_N_INSNS (33),   /* divdi */
+  COSTS_N_INSNS (11),   /* fp */
+  COSTS_N_INSNS (11),   /* dmul */
+  COSTS_N_INSNS (11),   /* sdiv */
+  COSTS_N_INSNS (11),   /* ddiv */
+  32,			/* cache line size */
+  4,			/* l1 cache */
+  16,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC405 processors.  */
+static const
+struct processor_costs ppc405_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (35),   /* divsi */
+  COSTS_N_INSNS (35),   /* divdi */
+  COSTS_N_INSNS (11),   /* fp */
+  COSTS_N_INSNS (11),   /* dmul */
+  COSTS_N_INSNS (11),   /* sdiv */
+  COSTS_N_INSNS (11),   /* ddiv */
+  32,			/* cache line size */
+  16,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC440 processors.  */
+static const
+struct processor_costs ppc440_cost = {
+  COSTS_N_INSNS (3),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (3),    /* muldi */
+  COSTS_N_INSNS (34),   /* divsi */
+  COSTS_N_INSNS (34),   /* divdi */
+  COSTS_N_INSNS (5),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (19),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC476 processors.  */
+static const
+struct processor_costs ppc476_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (11),   /* divsi */
+  COSTS_N_INSNS (11),   /* divdi */
+  COSTS_N_INSNS (6),    /* fp */
+  COSTS_N_INSNS (6),    /* dmul */
+  COSTS_N_INSNS (19),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* l1 cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC601 processors.  */
+static const
+struct processor_costs ppc601_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (5),    /* mulsi_const */
+  COSTS_N_INSNS (5),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (36),   /* divsi */
+  COSTS_N_INSNS (36),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (31),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC603 processors.  */
+static const
+struct processor_costs ppc603_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (3),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (37),   /* divsi */
+  COSTS_N_INSNS (37),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* cache line size */
+  8,			/* l1 cache */
+  64,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC604 processors.  */
+static const
+struct processor_costs ppc604_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (20),   /* divsi */
+  COSTS_N_INSNS (20),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  32,			/* cache line size */
+  16,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC604e processors.  */
+static const
+struct processor_costs ppc604e_cost = {
+  COSTS_N_INSNS (2),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (2),    /* muldi */
+  COSTS_N_INSNS (20),   /* divsi */
+  COSTS_N_INSNS (20),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC620 processors.  */
+static const
+struct processor_costs ppc620_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (7),    /* muldi */
+  COSTS_N_INSNS (21),   /* divsi */
+  COSTS_N_INSNS (37),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC630 processors.  */
+static const
+struct processor_costs ppc630_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (7),    /* muldi */
+  COSTS_N_INSNS (21),   /* divsi */
+  COSTS_N_INSNS (37),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (21),   /* ddiv */
+  128,			/* cache line size */
+  64,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on Cell processor.  */
+/* COSTS_N_INSNS (1) ~ one add.  */
+static const
+struct processor_costs ppccell_cost = {
+  COSTS_N_INSNS (9/2)+2,    /* mulsi */
+  COSTS_N_INSNS (6/2),    /* mulsi_const */
+  COSTS_N_INSNS (6/2),    /* mulsi_const9 */
+  COSTS_N_INSNS (15/2)+2,   /* muldi */
+  COSTS_N_INSNS (38/2),   /* divsi */
+  COSTS_N_INSNS (70/2),   /* divdi */
+  COSTS_N_INSNS (10/2),   /* fp */
+  COSTS_N_INSNS (10/2),   /* dmul */
+  COSTS_N_INSNS (74/2),   /* sdiv */
+  COSTS_N_INSNS (74/2),   /* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  6,			/* streams */
+};
+
+/* Instruction costs on PPC750 and PPC7400 processors.  */
+static const
+struct processor_costs ppc750_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (3),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (17),   /* divsi */
+  COSTS_N_INSNS (17),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (31),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC7450 processors.  */
+static const
+struct processor_costs ppc7450_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (3),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (23),   /* divsi */
+  COSTS_N_INSNS (23),   /* divdi */
+  COSTS_N_INSNS (5),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (21),   /* sdiv */
+  COSTS_N_INSNS (35),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC8540 processors.  */
+static const
+struct processor_costs ppc8540_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (19),   /* divsi */
+  COSTS_N_INSNS (19),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (29),   /* sdiv */
+  COSTS_N_INSNS (29),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on E300C2 and E300C3 cores.  */
+static const
+struct processor_costs ppce300c2c3_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (19),   /* divsi */
+  COSTS_N_INSNS (19),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,
+  16,			/* l1 cache */
+  16,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on PPCE500MC processors.  */
+static const
+struct processor_costs ppce500mc_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (14),   /* divsi */
+  COSTS_N_INSNS (14),   /* divdi */
+  COSTS_N_INSNS (8),    /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (36),   /* sdiv */
+  COSTS_N_INSNS (66),   /* ddiv */
+  64,			/* cache line size */
+  32,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on PPCE500MC64 processors.  */
+static const
+struct processor_costs ppce500mc64_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (14),   /* divsi */
+  COSTS_N_INSNS (14),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (36),   /* sdiv */
+  COSTS_N_INSNS (66),   /* ddiv */
+  64,			/* cache line size */
+  32,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on PPCE5500 processors.  */
+static const
+struct processor_costs ppce5500_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (5),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (14),   /* divsi */
+  COSTS_N_INSNS (14),   /* divdi */
+  COSTS_N_INSNS (7),    /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (36),   /* sdiv */
+  COSTS_N_INSNS (66),   /* ddiv */
+  64,			/* cache line size */
+  32,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on PPCE6500 processors.  */
+static const
+struct processor_costs ppce6500_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (5),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (14),   /* divsi */
+  COSTS_N_INSNS (14),   /* divdi */
+  COSTS_N_INSNS (7),    /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (36),   /* sdiv */
+  COSTS_N_INSNS (66),   /* ddiv */
+  64,			/* cache line size */
+  32,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on AppliedMicro Titan processors.  */
+static const
+struct processor_costs titan_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (5),    /* mulsi_const */
+  COSTS_N_INSNS (5),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (18),   /* divsi */
+  COSTS_N_INSNS (18),   /* divdi */
+  COSTS_N_INSNS (10),   /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (46),   /* sdiv */
+  COSTS_N_INSNS (72),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on POWER4 and POWER5 processors.  */
+static const
+struct processor_costs power4_cost = {
+  COSTS_N_INSNS (3),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (18),   /* divsi */
+  COSTS_N_INSNS (34),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (17),   /* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  8,			/* prefetch streams /*/
+};
+
+/* Instruction costs on POWER6 processors.  */
+static const
+struct processor_costs power6_cost = {
+  COSTS_N_INSNS (8),    /* mulsi */
+  COSTS_N_INSNS (8),    /* mulsi_const */
+  COSTS_N_INSNS (8),    /* mulsi_const9 */
+  COSTS_N_INSNS (8),    /* muldi */
+  COSTS_N_INSNS (22),   /* divsi */
+  COSTS_N_INSNS (28),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (13),   /* sdiv */
+  COSTS_N_INSNS (16),   /* ddiv */
+  128,			/* cache line size */
+  64,			/* l1 cache */
+  2048,			/* l2 cache */
+  16,			/* prefetch streams */
+};
+
+/* Instruction costs on POWER7 processors.  */
+static const
+struct processor_costs power7_cost = {
+  COSTS_N_INSNS (2),	/* mulsi */
+  COSTS_N_INSNS (2),	/* mulsi_const */
+  COSTS_N_INSNS (2),	/* mulsi_const9 */
+  COSTS_N_INSNS (2),	/* muldi */
+  COSTS_N_INSNS (18),	/* divsi */
+  COSTS_N_INSNS (34),	/* divdi */
+  COSTS_N_INSNS (3),	/* fp */
+  COSTS_N_INSNS (3),	/* dmul */
+  COSTS_N_INSNS (13),	/* sdiv */
+  COSTS_N_INSNS (16),	/* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
+/* Instruction costs on POWER8 processors.  */
+static const
+struct processor_costs power8_cost = {
+  COSTS_N_INSNS (3),	/* mulsi */
+  COSTS_N_INSNS (3),	/* mulsi_const */
+  COSTS_N_INSNS (3),	/* mulsi_const9 */
+  COSTS_N_INSNS (3),	/* muldi */
+  COSTS_N_INSNS (19),	/* divsi */
+  COSTS_N_INSNS (35),	/* divdi */
+  COSTS_N_INSNS (3),	/* fp */
+  COSTS_N_INSNS (3),	/* dmul */
+  COSTS_N_INSNS (14),	/* sdiv */
+  COSTS_N_INSNS (17),	/* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
+/* Instruction costs on POWER A2 processors.  */
+static const
+struct processor_costs ppca2_cost = {
+  COSTS_N_INSNS (16),    /* mulsi */
+  COSTS_N_INSNS (16),    /* mulsi_const */
+  COSTS_N_INSNS (16),    /* mulsi_const9 */
+  COSTS_N_INSNS (16),   /* muldi */
+  COSTS_N_INSNS (22),   /* divsi */
+  COSTS_N_INSNS (28),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (59),   /* sdiv */
+  COSTS_N_INSNS (72),   /* ddiv */
+  64,
+  16,			/* l1 cache */
+  2048,			/* l2 cache */
+  16,			/* prefetch streams */
+};
+
+
+/* Table that classifies rs6000 builtin functions (pure, const, etc.).  */
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
+struct rs6000_builtin_info_type {
+  const char *name;
+  const enum insn_code icode;
+  const HOST_WIDE_INT mask;
+  const unsigned attr;
+};
+
+static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+/* Support for -mveclibabi=<xxx> to control which vector library to use.  */
+static tree (*rs6000_veclib_handler) (tree, tree, tree);
+
+
+static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool spe_func_has_64bit_regs_p (void);
+static struct machine_function * rs6000_init_machine_status (void);
+static int rs6000_ra_ever_killed (void);
+static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
+static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
+static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
+static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
+static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
+static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
+				      bool);
+static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
+static bool is_microcoded_insn (rtx);
+static bool is_nonpipeline_insn (rtx);
+static bool is_cracked_insn (rtx);
+static bool is_load_insn (rtx, rtx *);
+static bool is_store_insn (rtx, rtx *);
+static bool set_to_load_agen (rtx,rtx);
+static bool insn_terminates_group_p (rtx , enum group_termination);
+static bool insn_must_be_first_in_group (rtx);
+static bool insn_must_be_last_in_group (rtx);
+static void altivec_init_builtins (void);
+static tree builtin_function_type (enum machine_mode, enum machine_mode,
+				   enum machine_mode, enum machine_mode,
+				   enum rs6000_builtins, const char *name);
+static void rs6000_common_init_builtins (void);
+static void paired_init_builtins (void);
+static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
+static void spe_init_builtins (void);
+static void htm_init_builtins (void);
+static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
+static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
+static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
+static rs6000_stack_t *rs6000_stack_info (void);
+static void is_altivec_return_reg (rtx, void *);
+int easy_vector_constant (rtx, enum machine_mode);
+static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
+static int rs6000_tls_symbol_ref_1 (rtx *, void *);
+static int rs6000_get_some_local_dynamic_name_1 (rtx *, void *);
+static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
+				       bool, bool);
+#if TARGET_MACHO
+static void macho_branch_islands (void);
+#endif
+static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
+					     int, int *);
+static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
+						   int, int, int *);
+static bool rs6000_mode_dependent_address (const_rtx);
+static bool rs6000_debug_mode_dependent_address (const_rtx);
+static enum reg_class rs6000_secondary_reload_class (enum reg_class,
+						     enum machine_mode, rtx);
+static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
+							   enum machine_mode,
+							   rtx);
+static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
+static enum reg_class rs6000_debug_preferred_reload_class (rtx,
+							   enum reg_class);
+static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
+					    enum machine_mode);
+static bool rs6000_debug_secondary_memory_needed (enum reg_class,
+						  enum reg_class,
+						  enum machine_mode);
+static bool rs6000_cannot_change_mode_class (enum machine_mode,
+					     enum machine_mode,
+					     enum reg_class);
+static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
+						   enum machine_mode,
+						   enum reg_class);
+static bool rs6000_save_toc_in_prologue_p (void);
+
+rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
+					     int, int *)
+  = rs6000_legitimize_reload_address;
+
+static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
+  = rs6000_mode_dependent_address;
+
+enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
+						     enum machine_mode, rtx)
+  = rs6000_secondary_reload_class;
+
+enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
+  = rs6000_preferred_reload_class;
+
+bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
+					    enum machine_mode)
+  = rs6000_secondary_memory_needed;
+
+bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
+					     enum machine_mode,
+					     enum reg_class)
+  = rs6000_cannot_change_mode_class;
+
+const int INSN_NOT_AVAILABLE = -1;
+
+static void rs6000_print_isa_options (FILE *, int, const char *,
+				      HOST_WIDE_INT);
+static void rs6000_print_builtin_options (FILE *, int, const char *,
+					  HOST_WIDE_INT);
+
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+					  enum rs6000_reg_type,
+					  enum machine_mode,
+					  secondary_reload_info *,
+					  bool);
+
+/* Hash table stuff for keeping track of TOC entries.  */
+
+struct GTY(()) toc_hash_struct
+{
+  /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
+     ASM_OUTPUT_SPECIAL_POOL_ENTRY_P.  */
+  rtx key;
+  enum machine_mode key_mode;
+  int labelno;
+};
+
+static GTY ((param_is (struct toc_hash_struct))) htab_t toc_hash_table;
+
+/* Hash table to keep track of the argument types for builtin functions.  */
+
+struct GTY(()) builtin_hash_struct
+{
+  tree type;
+  enum machine_mode mode[4];	/* return value + 3 arguments.  */
+  unsigned char uns_p[4];	/* and whether the types are unsigned.  */
+};
+
+static GTY ((param_is (struct builtin_hash_struct))) htab_t builtin_hash_table;
+
+
+/* Default register names.  */
+char rs6000_reg_names[][8] =
+{
+      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+      "8",  "9", "10", "11", "12", "13", "14", "15",
+     "16", "17", "18", "19", "20", "21", "22", "23",
+     "24", "25", "26", "27", "28", "29", "30", "31",
+      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+      "8",  "9", "10", "11", "12", "13", "14", "15",
+     "16", "17", "18", "19", "20", "21", "22", "23",
+     "24", "25", "26", "27", "28", "29", "30", "31",
+     "mq", "lr", "ctr","ap",
+      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+      "ca",
+      /* AltiVec registers.  */
+      "0",  "1",  "2",  "3",  "4",  "5",  "6", "7",
+      "8",  "9",  "10", "11", "12", "13", "14", "15",
+      "16", "17", "18", "19", "20", "21", "22", "23",
+      "24", "25", "26", "27", "28", "29", "30", "31",
+      "vrsave", "vscr",
+      /* SPE registers.  */
+      "spe_acc", "spefscr",
+      /* Soft frame pointer.  */
+      "sfp",
+      /* HTM SPR registers.  */
+      "tfhar", "tfiar", "texasr"
+};
+
+#ifdef TARGET_REGNAMES
+static const char alt_reg_names[][8] =
+{
+   "%r0",   "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+   "%r8",   "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+  "%r16",  "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
+  "%r24",  "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
+   "%f0",   "%f1",  "%f2",  "%f3",  "%f4",  "%f5",  "%f6",  "%f7",
+   "%f8",   "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+  "%f16",  "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+  "%f24",  "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+    "mq",    "lr",  "ctr",   "ap",
+  "%cr0",  "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
+   "ca",
+  /* AltiVec registers.  */
+   "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6", "%v7",
+   "%v8",  "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
+  "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
+  "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
+  "vrsave", "vscr",
+  /* SPE registers.  */
+  "spe_acc", "spefscr",
+  /* Soft frame pointer.  */
+  "sfp",
+  /* HTM SPR registers.  */
+  "tfhar", "tfiar", "texasr"
+};
+#endif
+
+/* Table of valid machine attributes.  */
+
+static const struct attribute_spec rs6000_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "altivec",   1, 1, false, true,  false, rs6000_handle_altivec_attribute,
+    false },
+  { "longcall",  0, 0, false, true,  true,  rs6000_handle_longcall_attribute,
+    false },
+  { "shortcall", 0, 0, false, true,  true,  rs6000_handle_longcall_attribute,
+    false },
+  { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
+    false },
+  { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
+    false },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+  SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+#ifndef TARGET_PROFILE_KERNEL
+#define TARGET_PROFILE_KERNEL 0
+#endif
+
+/* The VRSAVE bitmask puts bit %v0 as the most significant bit.  */
+#define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
+#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
+#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
+
+/* Default unaligned ops are only provided for ELF.  Find the ops needed
+   for non-ELF systems.  */
+#ifndef OBJECT_FORMAT_ELF
+#if TARGET_XCOFF
+/* For XCOFF.  rs6000_assemble_integer will handle unaligned DIs on
+   64-bit targets.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
+#else
+/* For Darwin.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+#endif
+#endif
+
+/* This hook deals with fixups for relocatable code and DI-mode objects
+   in 64-bit code.  */
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER rs6000_assemble_integer
+
+#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
+#endif
+
+#undef TARGET_SET_UP_BY_PROLOGUE
+#define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
+
+#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
+#define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
+
+#undef  TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
+#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
+#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT rs6000_sched_init
+#undef TARGET_SCHED_FINISH
+#define TARGET_SCHED_FINISH rs6000_sched_finish
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER rs6000_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
+
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
+
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT		\
+  rs6000_builtin_support_vector_misalignment
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  rs6000_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
+  rs6000_preferred_simd_mode
+#undef TARGET_VECTORIZE_INIT_COST
+#define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
+#undef TARGET_VECTORIZE_FINISH_COST
+#define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
+#undef TARGET_VECTORIZE_DESTROY_COST_DATA
+#define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS rs6000_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL rs6000_builtin_decl
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE rs6000_mangle_type
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS rs6000_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
+
+#undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
+#define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
+
+#undef TARGET_MEMBER_TYPE_FORCES_BLK
+#define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
+
+/* On rs6000, function arguments are promoted, as are function return
+   values.  */
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
+
+/* Always strict argument naming on rs6000.  */
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG rs6000_function_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
+
+#undef TARGET_EH_RETURN_FILTER_MODE
+#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
+
+#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
+#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
+
+#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
+#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE rs6000_option_override
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  rs6000_builtin_vectorized_function
+
+#if !TARGET_MACHO
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
+#endif
+
+/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
+   The PowerPC architecture requires only weak consistency among
+   processors--that is, memory accesses between processors need not be
+   sequentially consistent and memory accesses among processors can occur
+   in any order. The ability to order memory accesses weakly provides
+   opportunities for more efficient use of the system bus. Unless a
+   dependency exists, the 604e allows read operations to precede store
+   operations.  */
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING true
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
+#endif
+
+/* Use a 32-bit anchor range.  This leads to sequences like:
+
+	addis	tmp,anchor,high
+	add	dest,tmp,low
+
+   where tmp itself acts as an anchor, and can be shared between
+   accesses to the same 64k page.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
+#undef TARGET_USE_BLOCKS_FOR_DECL_P
+#define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
+
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
+
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
+
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE rs6000_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE rs6000_function_value
+
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
+
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE rs6000_function_specific_save
+
+#undef TARGET_OPTION_RESTORE
+#define TARGET_OPTION_RESTORE rs6000_function_specific_restore
+
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT rs6000_function_specific_print
+
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P rs6000_can_inline_p
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+
+/* Processor table.  */
+struct rs6000_ptt
+{
+  const char *const name;		/* Canonical processor name.  */
+  const enum processor_type processor;	/* Processor type enum value.  */
+  const HOST_WIDE_INT target_enable;	/* Target flags to enable.  */
+};
+
+static struct rs6000_ptt const processor_target_table[] =
+{
+#define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
+#include "rs6000-cpus.def"
+#undef RS6000_CPU
+};
+
+/* Look up a processor name for -mcpu=xxx and -mtune=xxx.  Return -1 if the
+   name is invalid.  */
+
+static int
+rs6000_cpu_name_lookup (const char *name)
+{
+  size_t i;
+
+  if (name != NULL)
+    {
+      for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
+	if (! strcmp (name, processor_target_table[i].name))
+	  return (int)i;
+    }
+
+  return -1;
+}
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   For the SPE, GPRs are 64 bits but only 32 bits are visible in
+   scalar instructions.  The upper 32 bits are only available to the
+   SIMD instructions.
+
+   POWER and PowerPC GPRs hold 32 bits worth;
+   PowerPC64 GPRs and FPRs point register holds 64 bits worth.  */
+
+static int
+rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
+{
+  unsigned HOST_WIDE_INT reg_size;
+
+  /* TF/TD modes are special in that they always take 2 registers.  */
+  if (FP_REGNO_P (regno))
+    reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
+		? UNITS_PER_VSX_WORD
+		: UNITS_PER_FP_WORD);
+
+  else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
+    reg_size = UNITS_PER_SPE_WORD;
+
+  else if (ALTIVEC_REGNO_P (regno))
+    reg_size = UNITS_PER_ALTIVEC_WORD;
+
+  /* The value returned for SCmode in the E500 double case is 2 for
+     ABI compatibility; storing an SCmode value in a single register
+     would require function_arg and rs6000_spe_function_arg to handle
+     SCmode so as to pass the value correctly in a pair of
+     registers.  */
+  else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
+	   && !DECIMAL_FLOAT_MODE_P (mode))
+    reg_size = UNITS_PER_FP_WORD;
+
+  else
+    reg_size = UNITS_PER_WORD;
+
+  return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
+}
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+static int
+rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
+
+  /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
+     register combinations, and use PTImode where we need to deal with quad
+     word memory operations.  Don't allow quad words in the argument or frame
+     pointer registers, just registers 0..31.  */
+  if (mode == PTImode)
+    return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && ((regno & 1) == 0));
+
+  /* VSX registers that overlap the FPR registers are larger than for non-VSX
+     implementations.  Don't allow an item to be split between a FP register
+     and an Altivec register.  Allow TImode in all VSX registers if the user
+     asked for it.  */
+  if (TARGET_VSX && VSX_REGNO_P (regno)
+      && (VECTOR_MEM_VSX_P (mode)
+	  || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
+	  || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
+	  || (TARGET_VSX_TIMODE && mode == TImode)
+	  || (TARGET_VADDUQM && mode == V1TImode)))
+    {
+      if (FP_REGNO_P (regno))
+	return FP_REGNO_P (last_regno);
+
+      if (ALTIVEC_REGNO_P (regno))
+	{
+	  if (mode == SFmode && !TARGET_UPPER_REGS_SF)
+	    return 0;
+
+	  if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
+	    return 0;
+
+	  return ALTIVEC_REGNO_P (last_regno);
+	}
+    }
+
+  /* The GPRs can hold any mode, but values bigger than one register
+     cannot go past R31.  */
+  if (INT_REGNO_P (regno))
+    return INT_REGNO_P (last_regno);
+
+  /* The float registers (except for VSX vector modes) can only hold floating
+     modes and DImode.  */
+  if (FP_REGNO_P (regno))
+    {
+      if (SCALAR_FLOAT_MODE_P (mode)
+	  && (mode != TDmode || (regno % 2) == 0)
+	  && FP_REGNO_P (last_regno))
+	return 1;
+
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
+	return 1;
+
+      if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
+	  && PAIRED_VECTOR_MODE (mode))
+	return 1;
+
+      return 0;
+    }
+
+  /* The CR register can only hold CC modes.  */
+  if (CR_REGNO_P (regno))
+    return GET_MODE_CLASS (mode) == MODE_CC;
+
+  if (CA_REGNO_P (regno))
+    return mode == BImode;
+
+  /* AltiVec only in AldyVec registers.  */
+  if (ALTIVEC_REGNO_P (regno))
+    return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+	    || mode == V1TImode);
+
+  /* ...but GPRs can hold SIMD data on the SPE in one register.  */
+  if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
+    return 1;
+
+  /* We cannot put non-VSX TImode or PTImode anywhere except general register
+     and it must be able to fit within the register set.  */
+
+  return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+}
+
+/* Print interesting facts about registers.  */
+static void
+rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
+{
+  int r, m;
+
+  for (r = first_regno; r <= last_regno; ++r)
+    {
+      const char *comma = "";
+      int len;
+
+      if (first_regno == last_regno)
+	fprintf (stderr, "%s:\t", reg_name);
+      else
+	fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
+
+      len = 8;
+      for (m = 0; m < NUM_MACHINE_MODES; ++m)
+	if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
+	  {
+	    if (len > 70)
+	      {
+		fprintf (stderr, ",\n\t");
+		len = 8;
+		comma = "";
+	      }
+
+	    if (rs6000_hard_regno_nregs[m][r] > 1)
+	      len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
+			     rs6000_hard_regno_nregs[m][r]);
+	    else
+	      len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
+
+	    comma = ", ";
+	  }
+
+      if (call_used_regs[r])
+	{
+	  if (len > 70)
+	    {
+	      fprintf (stderr, ",\n\t");
+	      len = 8;
+	      comma = "";
+	    }
+
+	  len += fprintf (stderr, "%s%s", comma, "call-used");
+	  comma = ", ";
+	}
+
+      if (fixed_regs[r])
+	{
+	  if (len > 70)
+	    {
+	      fprintf (stderr, ",\n\t");
+	      len = 8;
+	      comma = "";
+	    }
+
+	  len += fprintf (stderr, "%s%s", comma, "fixed");
+	  comma = ", ";
+	}
+
+      if (len > 70)
+	{
+	  fprintf (stderr, ",\n\t");
+	  comma = "";
+	}
+
+      len += fprintf (stderr, "%sreg-class = %s", comma,
+		      reg_class_names[(int)rs6000_regno_regclass[r]]);
+      comma = ", ";
+
+      if (len > 70)
+	{
+	  fprintf (stderr, ",\n\t");
+	  comma = "";
+	}
+
+      fprintf (stderr, "%sregno = %d\n", comma, r);
+    }
+}
+
+static const char *
+rs6000_debug_vector_unit (enum rs6000_vector v)
+{
+  const char *ret;
+
+  switch (v)
+    {
+    case VECTOR_NONE:	   ret = "none";      break;
+    case VECTOR_ALTIVEC:   ret = "altivec";   break;
+    case VECTOR_VSX:	   ret = "vsx";       break;
+    case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
+    case VECTOR_PAIRED:	   ret = "paired";    break;
+    case VECTOR_SPE:	   ret = "spe";       break;
+    case VECTOR_OTHER:	   ret = "other";     break;
+    default:		   ret = "unknown";   break;
+    }
+
+  return ret;
+}
+
+/* Print the address masks in a human readble fashion.  */
+DEBUG_FUNCTION void
+rs6000_debug_print_mode (ssize_t m)
+{
+  ssize_t rc;
+
+  fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
+  for (rc = 0; rc < N_RELOAD_REG; rc++)
+    {
+      addr_mask_type mask = reg_addr[m].addr_mask[rc];
+      fprintf (stderr,
+	       "  %s: %c%c%c%c%c%c",
+	       reload_reg_map[rc].name,
+	       (mask & RELOAD_REG_VALID)      != 0 ? 'v' : ' ',
+	       (mask & RELOAD_REG_MULTIPLE)   != 0 ? 'm' : ' ',
+	       (mask & RELOAD_REG_INDEXED)    != 0 ? 'i' : ' ',
+	       (mask & RELOAD_REG_OFFSET)     != 0 ? 'o' : ' ',
+	       (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
+	       (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
+    }
+
+  if (rs6000_vector_unit[m] != VECTOR_NONE
+      || rs6000_vector_mem[m] != VECTOR_NONE
+      || (reg_addr[m].reload_store != CODE_FOR_nothing)
+      || (reg_addr[m].reload_load != CODE_FOR_nothing))
+    {
+      fprintf (stderr,
+	       "  Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
+	       rs6000_debug_vector_unit (rs6000_vector_unit[m]),
+	       rs6000_debug_vector_unit (rs6000_vector_mem[m]),
+	       (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
+	       (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+    }
+
+  fputs ("\n", stderr);
+}
+
+#define DEBUG_FMT_ID "%-32s= "
+#define DEBUG_FMT_D   DEBUG_FMT_ID "%d\n"
+#define DEBUG_FMT_WX  DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
+#define DEBUG_FMT_S   DEBUG_FMT_ID "%s\n"
+
+/* Print various interesting information with -mdebug=reg.  */
+static void
+rs6000_debug_reg_global (void)
+{
+  static const char *const tf[2] = { "false", "true" };
+  const char *nl = (const char *)0;
+  int m;
+  size_t m1, m2, v;
+  char costly_num[20];
+  char nop_num[20];
+  char flags_buffer[40];
+  const char *costly_str;
+  const char *nop_str;
+  const char *trace_str;
+  const char *abi_str;
+  const char *cmodel_str;
+  struct cl_target_option cl_opts;
+
+  /* Modes we want tieable information on.  */
+  static const enum machine_mode print_tieable_modes[] = {
+    QImode,
+    HImode,
+    SImode,
+    DImode,
+    TImode,
+    PTImode,
+    SFmode,
+    DFmode,
+    TFmode,
+    SDmode,
+    DDmode,
+    TDmode,
+    V8QImode,
+    V4HImode,
+    V2SImode,
+    V16QImode,
+    V8HImode,
+    V4SImode,
+    V2DImode,
+    V1TImode,
+    V32QImode,
+    V16HImode,
+    V8SImode,
+    V4DImode,
+    V2TImode,
+    V2SFmode,
+    V4SFmode,
+    V2DFmode,
+    V8SFmode,
+    V4DFmode,
+    CCmode,
+    CCUNSmode,
+    CCEQmode,
+  };
+
+  /* Virtual regs we are interested in.  */
+  const static struct {
+    int regno;			/* register number.  */
+    const char *name;		/* register name.  */
+  } virtual_regs[] = {
+    { STACK_POINTER_REGNUM,			"stack pointer:" },
+    { TOC_REGNUM,				"toc:          " },
+    { STATIC_CHAIN_REGNUM,			"static chain: " },
+    { RS6000_PIC_OFFSET_TABLE_REGNUM,		"pic offset:   " },
+    { HARD_FRAME_POINTER_REGNUM,		"hard frame:   " },
+    { ARG_POINTER_REGNUM,			"arg pointer:  " },
+    { FRAME_POINTER_REGNUM,			"frame pointer:" },
+    { FIRST_PSEUDO_REGISTER,			"first pseudo: " },
+    { FIRST_VIRTUAL_REGISTER,			"first virtual:" },
+    { VIRTUAL_INCOMING_ARGS_REGNUM,		"incoming_args:" },
+    { VIRTUAL_STACK_VARS_REGNUM,		"stack_vars:   " },
+    { VIRTUAL_STACK_DYNAMIC_REGNUM,		"stack_dynamic:" },
+    { VIRTUAL_OUTGOING_ARGS_REGNUM,		"outgoing_args:" },
+    { VIRTUAL_CFA_REGNUM,			"cfa (frame):  " },
+    { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM,	"stack boundry:" },
+    { LAST_VIRTUAL_REGISTER,			"last virtual: " },
+  };
+
+  fputs ("\nHard register information:\n", stderr);
+  rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
+  rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
+  rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
+			  LAST_ALTIVEC_REGNO,
+			  "vs");
+  rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
+  rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
+  rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
+  rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
+  rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
+  rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
+  rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
+  rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
+
+  fputs ("\nVirtual/stack/frame registers:\n", stderr);
+  for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
+    fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
+
+  fprintf (stderr,
+	   "\n"
+	   "d  reg_class = %s\n"
+	   "f  reg_class = %s\n"
+	   "v  reg_class = %s\n"
+	   "wa reg_class = %s\n"
+	   "wd reg_class = %s\n"
+	   "wf reg_class = %s\n"
+	   "wg reg_class = %s\n"
+	   "wl reg_class = %s\n"
+	   "wm reg_class = %s\n"
+	   "wr reg_class = %s\n"
+	   "ws reg_class = %s\n"
+	   "wt reg_class = %s\n"
+	   "wu reg_class = %s\n"
+	   "wv reg_class = %s\n"
+	   "ww reg_class = %s\n"
+	   "wx reg_class = %s\n"
+	   "wy reg_class = %s\n"
+	   "wz reg_class = %s\n"
+	   "\n",
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
+
+  nl = "\n";
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    rs6000_debug_print_mode (m);
+
+  fputs ("\n", stderr);
+
+  for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
+    {
+      enum machine_mode mode1 = print_tieable_modes[m1];
+      bool first_time = true;
+
+      nl = (const char *)0;
+      for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
+	{
+	  enum machine_mode mode2 = print_tieable_modes[m2];
+	  if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
+	    {
+	      if (first_time)
+		{
+		  fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
+		  nl = "\n";
+		  first_time = false;
+		}
+
+	      fprintf (stderr, " %s", GET_MODE_NAME (mode2));
+	    }
+	}
+
+      if (!first_time)
+	fputs ("\n", stderr);
+    }
+
+  if (nl)
+    fputs (nl, stderr);
+
+  if (rs6000_recip_control)
+    {
+      fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
+
+      for (m = 0; m < NUM_MACHINE_MODES; ++m)
+	if (rs6000_recip_bits[m])
+	  {
+	    fprintf (stderr,
+		     "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
+		     GET_MODE_NAME (m),
+		     (RS6000_RECIP_AUTO_RE_P (m)
+		      ? "auto"
+		      : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
+		     (RS6000_RECIP_AUTO_RSQRTE_P (m)
+		      ? "auto"
+		      : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
+	  }
+
+      fputs ("\n", stderr);
+    }
+
+  if (rs6000_cpu_index >= 0)
+    {
+      const char *name = processor_target_table[rs6000_cpu_index].name;
+      HOST_WIDE_INT flags
+	= processor_target_table[rs6000_cpu_index].target_enable;
+
+      sprintf (flags_buffer, "-mcpu=%s flags", name);
+      rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
+    }
+  else
+    fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
+
+  if (rs6000_tune_index >= 0)
+    {
+      const char *name = processor_target_table[rs6000_tune_index].name;
+      HOST_WIDE_INT flags
+	= processor_target_table[rs6000_tune_index].target_enable;
+
+      sprintf (flags_buffer, "-mtune=%s flags", name);
+      rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
+    }
+  else
+    fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
+
+  cl_target_option_save (&cl_opts, &global_options);
+  rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
+			    rs6000_isa_flags);
+
+  rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
+			    rs6000_isa_flags_explicit);
+
+  rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
+				rs6000_builtin_mask);
+
+  rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
+
+  fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
+	   OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
+
+  switch (rs6000_sched_costly_dep)
+    {
+    case max_dep_latency:
+      costly_str = "max_dep_latency";
+      break;
+
+    case no_dep_costly:
+      costly_str = "no_dep_costly";
+      break;
+
+    case all_deps_costly:
+      costly_str = "all_deps_costly";
+      break;
+
+    case true_store_to_load_dep_costly:
+      costly_str = "true_store_to_load_dep_costly";
+      break;
+
+    case store_to_load_dep_costly:
+      costly_str = "store_to_load_dep_costly";
+      break;
+
+    default:
+      costly_str = costly_num;
+      sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
+      break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
+
+  switch (rs6000_sched_insert_nops)
+    {
+    case sched_finish_regroup_exact:
+      nop_str = "sched_finish_regroup_exact";
+      break;
+
+    case sched_finish_pad_groups:
+      nop_str = "sched_finish_pad_groups";
+      break;
+
+    case sched_finish_none:
+      nop_str = "sched_finish_none";
+      break;
+
+    default:
+      nop_str = nop_num;
+      sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
+      break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
+
+  switch (rs6000_sdata)
+    {
+    default:
+    case SDATA_NONE:
+      break;
+
+    case SDATA_DATA:
+      fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
+      break;
+
+    case SDATA_SYSV:
+      fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
+      break;
+
+    case SDATA_EABI:
+      fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
+      break;
+
+    }
+
+  switch (rs6000_traceback)
+    {
+    case traceback_default:	trace_str = "default";	break;
+    case traceback_none:	trace_str = "none";	break;
+    case traceback_part:	trace_str = "part";	break;
+    case traceback_full:	trace_str = "full";	break;
+    default:			trace_str = "unknown";	break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
+
+  switch (rs6000_current_cmodel)
+    {
+    case CMODEL_SMALL:	cmodel_str = "small";	break;
+    case CMODEL_MEDIUM:	cmodel_str = "medium";	break;
+    case CMODEL_LARGE:	cmodel_str = "large";	break;
+    default:		cmodel_str = "unknown";	break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
+
+  switch (rs6000_current_abi)
+    {
+    case ABI_NONE:	abi_str = "none";	break;
+    case ABI_AIX:	abi_str = "aix";	break;
+    case ABI_ELFv2:	abi_str = "ELFv2";	break;
+    case ABI_V4:	abi_str = "V4";		break;
+    case ABI_DARWIN:	abi_str = "darwin";	break;
+    default:		abi_str = "unknown";	break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
+
+  if (rs6000_altivec_abi)
+    fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
+
+  if (rs6000_spe_abi)
+    fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
+
+  if (rs6000_darwin64_abi)
+    fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
+
+  if (rs6000_float_gprs)
+    fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
+
+  if (TARGET_LINK_STACK)
+    fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
+
+  if (targetm.lra_p ())
+    fprintf (stderr, DEBUG_FMT_S, "lra", "true");
+
+  if (TARGET_P8_FUSION)
+    fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+	     (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
+  fprintf (stderr, DEBUG_FMT_S, "plt-format",
+	   TARGET_SECURE_PLT ? "secure" : "bss");
+  fprintf (stderr, DEBUG_FMT_S, "struct-return",
+	   aix_struct_return ? "aix" : "sysv");
+  fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
+  fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
+  fprintf (stderr, DEBUG_FMT_S, "align_branch",
+	   tf[!!rs6000_align_branch_targets]);
+  fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
+  fprintf (stderr, DEBUG_FMT_D, "long_double_size",
+	   rs6000_long_double_type_size);
+  fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
+	   (int)rs6000_sched_restricted_insns_priority);
+  fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
+	   (int)END_BUILTINS);
+  fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
+	   (int)RS6000_BUILTIN_COUNT);
+}
+
+
+/* Update the addr mask bits in reg_addr to help secondary reload and go if
+   legitimate address support to figure out the appropriate addressing to
+   use.  */
+
+static void
+rs6000_setup_reg_addr_masks (void)
+{
+  ssize_t rc, reg, m, nregs;
+  addr_mask_type any_addr_mask, addr_mask;
+
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    {
+      /* SDmode is special in that we want to access it only via REG+REG
+	 addressing on power7 and above, since we want to use the LFIWZX and
+	 STFIWZX instructions to load it.  */
+      bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
+
+      any_addr_mask = 0;
+      for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
+	{
+	  addr_mask = 0;
+	  reg = reload_reg_map[rc].reg;
+
+	  /* Can mode values go in the GPR/FPR/Altivec registers?  */
+	  if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
+	    {
+	      nregs = rs6000_hard_regno_nregs[m][reg];
+	      addr_mask |= RELOAD_REG_VALID;
+
+	      /* Indicate if the mode takes more than 1 physical register.  If
+		 it takes a single register, indicate it can do REG+REG
+		 addressing.  */
+	      if (nregs > 1 || m == BLKmode)
+		addr_mask |= RELOAD_REG_MULTIPLE;
+	      else
+		addr_mask |= RELOAD_REG_INDEXED;
+
+	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
+		 addressing.  Restrict addressing on SPE for 64-bit types
+		 because of the SUBREG hackery used to address 64-bit floats in
+		 '32-bit' GPRs.  To simplify secondary reload, don't allow
+		 update forms on scalar floating point types that can go in the
+		 upper registers.  */
+
+	      if (TARGET_UPDATE
+		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
+		  && GET_MODE_SIZE (m) <= 8
+		  && !VECTOR_MODE_P (m)
+		  && !COMPLEX_MODE_P (m)
+		  && !indexed_only_p
+		  && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
+		  && !(m == DFmode && TARGET_UPPER_REGS_DF)
+		  && !(m == SFmode && TARGET_UPPER_REGS_SF))
+		{
+		  addr_mask |= RELOAD_REG_PRE_INCDEC;
+
+		  /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
+		     we don't allow PRE_MODIFY for some multi-register
+		     operations.  */
+		  switch (m)
+		    {
+		    default:
+		      addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+
+		    case DImode:
+		      if (TARGET_POWERPC64)
+			addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+
+		    case DFmode:
+		    case DDmode:
+		      if (TARGET_DF_INSN)
+			addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+		    }
+		}
+	    }
+
+	  /* GPR and FPR registers can do REG+OFFSET addressing, except
+	     possibly for SDmode.  */
+	  if ((addr_mask != 0) && !indexed_only_p
+	      && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
+	    addr_mask |= RELOAD_REG_OFFSET;
+
+	  reg_addr[m].addr_mask[rc] = addr_mask;
+	  any_addr_mask |= addr_mask;
+	}
+
+      reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
+    }
+}
+
+
+/* Initialize the various global tables that are based on register size.  */
+static void
+rs6000_init_hard_regno_mode_ok (bool global_init_p)
+{
+  ssize_t r, m, c;
+  int align64;
+  int align32;
+
+  /* Precalculate REGNO_REG_CLASS.  */
+  rs6000_regno_regclass[0] = GENERAL_REGS;
+  for (r = 1; r < 32; ++r)
+    rs6000_regno_regclass[r] = BASE_REGS;
+
+  for (r = 32; r < 64; ++r)
+    rs6000_regno_regclass[r] = FLOAT_REGS;
+
+  for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
+    rs6000_regno_regclass[r] = NO_REGS;
+
+  for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
+    rs6000_regno_regclass[r] = ALTIVEC_REGS;
+
+  rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
+  for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
+    rs6000_regno_regclass[r] = CR_REGS;
+
+  rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
+  rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
+  rs6000_regno_regclass[CA_REGNO] = CA_REGS;
+  rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
+  rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
+  rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
+  rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
+  rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
+  rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
+  rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
+  rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
+  rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
+
+  /* Precalculate register class to simpler reload register class.  We don't
+     need all of the register classes that are combinations of different
+     classes, just the simple ones that have constraint letters.  */
+  for (c = 0; c < N_REG_CLASSES; c++)
+    reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+  reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+  reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+  reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+  if (TARGET_VSX)
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+    }
+  else
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+    }
+
+  /* Precalculate the valid memory formats as well as the vector information,
+     this must be set up before the rs6000_hard_regno_nregs_internal calls
+     below.  */
+  gcc_assert ((int)VECTOR_NONE == 0);
+  memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
+  memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
+
+  gcc_assert ((int)CODE_FOR_nothing == 0);
+  memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
+
+  gcc_assert ((int)NO_REGS == 0);
+  memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
+
+  /* The VSX hardware allows native alignment for vectors, but control whether the compiler
+     believes it can use native alignment or still uses 128-bit alignment.  */
+  if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
+    {
+      align64 = 64;
+      align32 = 32;
+    }
+  else
+    {
+      align64 = 128;
+      align32 = 128;
+    }
+
+  /* V2DF mode, VSX only.  */
+  if (TARGET_VSX)
+    {
+      rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
+      rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
+      rs6000_vector_align[V2DFmode] = align64;
+    }
+
+  /* V4SF mode, either VSX or Altivec.  */
+  if (TARGET_VSX)
+    {
+      rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
+      rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
+      rs6000_vector_align[V4SFmode] = align32;
+    }
+  else if (TARGET_ALTIVEC)
+    {
+      rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
+      rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
+      rs6000_vector_align[V4SFmode] = align32;
+    }
+
+  /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
+     and stores. */
+  if (TARGET_ALTIVEC)
+    {
+      rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
+      rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
+      rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
+      rs6000_vector_align[V4SImode] = align32;
+      rs6000_vector_align[V8HImode] = align32;
+      rs6000_vector_align[V16QImode] = align32;
+
+      if (TARGET_VSX)
+	{
+	  rs6000_vector_mem[V4SImode] = VECTOR_VSX;
+	  rs6000_vector_mem[V8HImode] = VECTOR_VSX;
+	  rs6000_vector_mem[V16QImode] = VECTOR_VSX;
+	}
+      else
+	{
+	  rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
+	  rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
+	  rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
+	}
+    }
+
+  /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
+     do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
+  if (TARGET_VSX)
+    {
+      rs6000_vector_mem[V2DImode] = VECTOR_VSX;
+      rs6000_vector_unit[V2DImode]
+	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
+      rs6000_vector_align[V2DImode] = align64;
+
+      rs6000_vector_mem[V1TImode] = VECTOR_VSX;
+      rs6000_vector_unit[V1TImode]
+	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
+      rs6000_vector_align[V1TImode] = 128;
+    }
+
+  /* DFmode, see if we want to use the VSX unit.  */
+  if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
+    {
+      rs6000_vector_unit[DFmode] = VECTOR_VSX;
+      rs6000_vector_mem[DFmode]
+	= (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
+      rs6000_vector_align[DFmode] = align64;
+    }
+
+  /* Allow TImode in VSX register and set the VSX memory macros.  */
+  if (TARGET_VSX && TARGET_VSX_TIMODE)
+    {
+      rs6000_vector_mem[TImode] = VECTOR_VSX;
+      rs6000_vector_align[TImode] = align64;
+    }
+
+  /* TODO add SPE and paired floating point vector support.  */
+
+  /* Register class constraints for the constraints that depend on compile
+     switches. When the VSX code was added, different constraints were added
+     based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
+     of the VSX registers are used.  The register classes for scalar floating
+     point types is set, based on whether we allow that type into the upper
+     (Altivec) registers.  GCC has register classes to target the Altivec
+     registers for load/store operations, to select using a VSX memory
+     operation instead of the traditional floating point operation.  The
+     constraints are:
+
+	d  - Register class to use with traditional DFmode instructions.
+	f  - Register class to use with traditional SFmode instructions.
+	v  - Altivec register.
+	wa - Any VSX register.
+	wd - Preferred register class for V2DFmode.
+	wf - Preferred register class for V4SFmode.
+	wg - Float register for power6x move insns.
+	wl - Float register if we can do 32-bit signed int loads.
+	wm - VSX register for ISA 2.07 direct move operations.
+	wr - GPR if 64-bit mode is permitted.
+	ws - Register class to do ISA 2.06 DF operations.
+	wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
+	wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
+	wt - VSX register for TImode in VSX registers.
+	ww - Register class to do SF conversions in with VSX operations.
+	wx - Float register if we can do 32-bit int stores.
+	wy - Register class to do ISA 2.07 SF operations.
+	wz - Float register if we can do 32-bit unsigned int loads.  */
+
+  if (TARGET_HARD_FLOAT && TARGET_FPRS)
+    rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
+
+  if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+    rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
+
+  if (TARGET_VSX)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
+
+      if (TARGET_VSX_TIMODE)
+	rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
+
+      if (TARGET_UPPER_REGS_DF)
+	{
+	  rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
+	  rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+	}
+      else
+	rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
+    }
+
+  /* Add conditional constraints based on various options, to allow us to
+     collapse multiple insn patterns.  */
+  if (TARGET_ALTIVEC)
+    rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
+
+  if (TARGET_MFPGPR)
+    rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
+
+  if (TARGET_LFIWAX)
+    rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+
+  if (TARGET_DIRECT_MOVE)
+    rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+  if (TARGET_POWERPC64)
+    rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+  if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
+    }
+  else if (TARGET_P8_VECTOR)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+    }
+  else if (TARGET_VSX)
+    rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+
+  if (TARGET_STFIWX)
+    rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+
+  if (TARGET_LFIWZX)
+    rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+
+  /* Set up the reload helper and direct move functions.  */
+  if (TARGET_VSX || TARGET_ALTIVEC)
+    {
+      if (TARGET_64BIT)
+	{
+	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
+	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
+	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
+	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
+	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
+	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
+	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
+	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_di_load;
+	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_di_store;
+	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_di_load;
+	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_di_store;
+	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
+	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
+	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
+	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
+	    {
+	      reg_addr[DFmode].reload_store  = CODE_FOR_reload_df_di_store;
+	      reg_addr[DFmode].reload_load   = CODE_FOR_reload_df_di_load;
+	      reg_addr[DDmode].reload_store  = CODE_FOR_reload_dd_di_store;
+	      reg_addr[DDmode].reload_load   = CODE_FOR_reload_dd_di_load;
+	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_di_store;
+	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_di_load;
+	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_di_store;
+	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_di_load;
+	    }
+	  if (TARGET_VSX_TIMODE)
+	    {
+	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
+	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
+	    }
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      if (TARGET_POWERPC64)
+		{
+		  reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
+		  reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
+		  reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
+		  reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
+		  reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
+		  reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
+		  reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
+		  reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
+		  reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
+
+		  reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
+		  reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
+		  reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
+		  reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
+		  reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
+		  reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
+		  reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
+		  reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
+		  reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
+		}
+	      else
+		{
+		  reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
+		  reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
+		  reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
+		}
+	    }
+	}
+      else
+	{
+	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
+	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_si_load;
+	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_si_store;
+	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_si_load;
+	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_si_store;
+	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_si_load;
+	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_si_store;
+	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
+	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
+	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
+	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
+	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
+	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
+	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
+	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
+	    {
+	      reg_addr[DFmode].reload_store  = CODE_FOR_reload_df_si_store;
+	      reg_addr[DFmode].reload_load   = CODE_FOR_reload_df_si_load;
+	      reg_addr[DDmode].reload_store  = CODE_FOR_reload_dd_si_store;
+	      reg_addr[DDmode].reload_load   = CODE_FOR_reload_dd_si_load;
+	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_si_store;
+	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_si_load;
+	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_si_store;
+	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_si_load;
+	    }
+	  if (TARGET_VSX_TIMODE)
+	    {
+	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
+	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
+	    }
+	}
+    }
+
+  /* Precalculate HARD_REGNO_NREGS.  */
+  for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
+    for (m = 0; m < NUM_MACHINE_MODES; ++m)
+      rs6000_hard_regno_nregs[m][r]
+	= rs6000_hard_regno_nregs_internal (r, (enum machine_mode)m);
+
+  /* Precalculate HARD_REGNO_MODE_OK.  */
+  for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
+    for (m = 0; m < NUM_MACHINE_MODES; ++m)
+      if (rs6000_hard_regno_mode_ok (r, (enum machine_mode)m))
+	rs6000_hard_regno_mode_ok_p[m][r] = true;
+
+  /* Precalculate CLASS_MAX_NREGS sizes.  */
+  for (c = 0; c < LIM_REG_CLASSES; ++c)
+    {
+      int reg_size;
+
+      if (TARGET_VSX && VSX_REG_CLASS_P (c))
+	reg_size = UNITS_PER_VSX_WORD;
+
+      else if (c == ALTIVEC_REGS)
+	reg_size = UNITS_PER_ALTIVEC_WORD;
+
+      else if (c == FLOAT_REGS)
+	reg_size = UNITS_PER_FP_WORD;
+
+      else
+	reg_size = UNITS_PER_WORD;
+
+      for (m = 0; m < NUM_MACHINE_MODES; ++m)
+	{
+	  int reg_size2 = reg_size;
+
+	  /* TFmode/TDmode always takes 2 registers, even in VSX.  */
+	  if (TARGET_VSX && VSX_REG_CLASS_P (c)
+	      && (m == TDmode || m == TFmode))
+	    reg_size2 = UNITS_PER_FP_WORD;
+
+	  rs6000_class_max_nregs[m][c]
+	    = (GET_MODE_SIZE (m) + reg_size2 - 1) / reg_size2;
+	}
+    }
+
+  if (TARGET_E500_DOUBLE)
+    rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
+
+  /* Calculate which modes to automatically generate code to use a the
+     reciprocal divide and square root instructions.  In the future, possibly
+     automatically generate the instructions even if the user did not specify
+     -mrecip.  The older machines double precision reciprocal sqrt estimate is
+     not accurate enough.  */
+  memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
+  if (TARGET_FRES)
+    rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
+  if (TARGET_FRE)
+    rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
+  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
+    rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
+  if (VECTOR_UNIT_VSX_P (V2DFmode))
+    rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
+
+  if (TARGET_FRSQRTES)
+    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+  if (TARGET_FRSQRTE)
+    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
+    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+  if (VECTOR_UNIT_VSX_P (V2DFmode))
+    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+
+  if (rs6000_recip_control)
+    {
+      if (!flag_finite_math_only)
+	warning (0, "-mrecip requires -ffinite-math or -ffast-math");
+      if (flag_trapping_math)
+	warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
+      if (!flag_reciprocal_math)
+	warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
+      if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
+	{
+	  if (RS6000_RECIP_HAVE_RE_P (SFmode)
+	      && (rs6000_recip_control & RECIP_SF_DIV) != 0)
+	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RE_P (DFmode)
+	      && (rs6000_recip_control & RECIP_DF_DIV) != 0)
+	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
+	      && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
+	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
+	      && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
+	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
+	      && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
+	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
+	      && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
+	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
+	      && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
+	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
+	      && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
+	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+	}
+    }
+
+  /* Update the addr mask bits in reg_addr to help secondary reload and go if
+     legitimate address support to figure out the appropriate addressing to
+     use.  */
+  rs6000_setup_reg_addr_masks ();
+
+  if (global_init_p || TARGET_DEBUG_TARGET)
+    {
+      if (TARGET_DEBUG_REG)
+	rs6000_debug_reg_global ();
+
+      if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
+	fprintf (stderr,
+		 "SImode variable mult cost       = %d\n"
+		 "SImode constant mult cost       = %d\n"
+		 "SImode short constant mult cost = %d\n"
+		 "DImode multipliciation cost     = %d\n"
+		 "SImode division cost            = %d\n"
+		 "DImode division cost            = %d\n"
+		 "Simple fp operation cost        = %d\n"
+		 "DFmode multiplication cost      = %d\n"
+		 "SFmode division cost            = %d\n"
+		 "DFmode division cost            = %d\n"
+		 "cache line size                 = %d\n"
+		 "l1 cache size                   = %d\n"
+		 "l2 cache size                   = %d\n"
+		 "simultaneous prefetches         = %d\n"
+		 "\n",
+		 rs6000_cost->mulsi,
+		 rs6000_cost->mulsi_const,
+		 rs6000_cost->mulsi_const9,
+		 rs6000_cost->muldi,
+		 rs6000_cost->divsi,
+		 rs6000_cost->divdi,
+		 rs6000_cost->fp,
+		 rs6000_cost->dmul,
+		 rs6000_cost->sdiv,
+		 rs6000_cost->ddiv,
+		 rs6000_cost->cache_line_size,
+		 rs6000_cost->l1_cache_size,
+		 rs6000_cost->l2_cache_size,
+		 rs6000_cost->simultaneous_prefetches);
+    }
+}
+
+#if TARGET_MACHO
+/* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS.  */
+
+static void
+darwin_rs6000_override_options (void)
+{
+  /* The Darwin ABI always includes AltiVec, can't be (validly) turned
+     off.  */
+  rs6000_altivec_abi = 1;
+  TARGET_ALTIVEC_VRSAVE = 1;
+  rs6000_current_abi = ABI_DARWIN;
+
+  if (DEFAULT_ABI == ABI_DARWIN
+      && TARGET_64BIT)
+      darwin_one_byte_bool = 1;
+
+  if (TARGET_64BIT && ! TARGET_POWERPC64)
+    {
+      rs6000_isa_flags |= OPTION_MASK_POWERPC64;
+      warning (0, "-m64 requires PowerPC64 architecture, enabling");
+    }
+  if (flag_mkernel)
+    {
+      rs6000_default_long_calls = 1;
+      rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
+    }
+
+  /* Make -m64 imply -maltivec.  Darwin's 64-bit ABI includes
+     Altivec.  */
+  if (!flag_mkernel && !flag_apple_kext
+      && TARGET_64BIT
+      && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
+    rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
+
+  /* Unless the user (not the configurer) has explicitly overridden
+     it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
+     G4 unless targeting the kernel.  */
+  if (!flag_mkernel
+      && !flag_apple_kext
+      && strverscmp (darwin_macosx_version_min, "10.5") >= 0
+      && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
+      && ! global_options_set.x_rs6000_cpu_index)
+    {
+      rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
+    }
+}
+#endif
+
+/* If not otherwise specified by a target, make 'long double' equivalent to
+   'double'.  */
+
+#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
+#endif
+
+/* Return the builtin mask of the various options used that could affect which
+   builtins were used.  In the past we used target_flags, but we've run out of
+   bits, and some options like SPE and PAIRED are no longer in
+   target_flags.  */
+
+HOST_WIDE_INT
+rs6000_builtin_mask_calculate (void)
+{
+  return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC   : 0)
+	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	   : 0)
+	  | ((TARGET_SPE)		    ? RS6000_BTM_SPE	   : 0)
+	  | ((TARGET_PAIRED_FLOAT)	    ? RS6000_BTM_PAIRED	   : 0)
+	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	   : 0)
+	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	   : 0)
+	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE   : 0)
+	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES  : 0)
+	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD   : 0)
+	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
+	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
+	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
+	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0));
+}
+
+/* Override command line options.  Mostly we process the processor type and
+   sometimes adjust other TARGET_ options.  */
+
+static bool
+rs6000_option_override_internal (bool global_init_p)
+{
+  bool ret = true;
+  bool have_cpu = false;
+
+  /* The default cpu requested at configure time, if any.  */
+  const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
+
+  HOST_WIDE_INT set_masks;
+  int cpu_index;
+  int tune_index;
+  struct cl_target_option *main_target_opt
+    = ((global_init_p || target_option_default_node == NULL)
+       ? NULL : TREE_TARGET_OPTION (target_option_default_node));
+
+  /* Remember the explicit arguments.  */
+  if (global_init_p)
+    rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
+
+  /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
+     library functions, so warn about it. The flag may be useful for
+     performance studies from time to time though, so don't disable it
+     entirely.  */
+  if (global_options_set.x_rs6000_alignment_flags
+      && rs6000_alignment_flags == MASK_ALIGN_POWER
+      && DEFAULT_ABI == ABI_DARWIN
+      && TARGET_64BIT)
+    warning (0, "-malign-power is not supported for 64-bit Darwin;"
+	     " it is incompatible with the installed C and C++ libraries");
+
+  /* Numerous experiment shows that IRA based loop pressure
+     calculation works better for RTL loop invariant motion on targets
+     with enough (>= 32) registers.  It is an expensive optimization.
+     So it is on only for peak performance.  */
+  if (optimize >= 3 && global_init_p
+      && !global_options_set.x_flag_ira_loop_pressure)
+    flag_ira_loop_pressure = 1;
+
+  /* Set the pointer size.  */
+  if (TARGET_64BIT)
+    {
+      rs6000_pmode = (int)DImode;
+      rs6000_pointer_size = 64;
+    }
+  else
+    {
+      rs6000_pmode = (int)SImode;
+      rs6000_pointer_size = 32;
+    }
+
+  /* Some OSs don't support saving the high part of 64-bit registers on context
+     switch.  Other OSs don't support saving Altivec registers.  On those OSs,
+     we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
+     if the user wants either, the user must explicitly specify them and we
+     won't interfere with the user's specification.  */
+
+  set_masks = POWERPC_MASKS;
+#ifdef OS_MISSING_POWERPC64
+  if (OS_MISSING_POWERPC64)
+    set_masks &= ~OPTION_MASK_POWERPC64;
+#endif
+#ifdef OS_MISSING_ALTIVEC
+  if (OS_MISSING_ALTIVEC)
+    set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
+#endif
+
+  /* Don't override by the processor default if given explicitly.  */
+  set_masks &= ~rs6000_isa_flags_explicit;
+
+  /* Process the -mcpu=<xxx> and -mtune=<xxx> argument.  If the user changed
+     the cpu in a target attribute or pragma, but did not specify a tuning
+     option, use the cpu for the tuning option rather than the option specified
+     with -mtune on the command line.  Process a '--with-cpu' configuration
+     request as an implicit --cpu.  */
+  if (rs6000_cpu_index >= 0)
+    {
+      cpu_index = rs6000_cpu_index;
+      have_cpu = true;
+    }
+  else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
+    {
+      rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
+      have_cpu = true;
+    }
+  else if (implicit_cpu)
+    {
+      rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
+      have_cpu = true;
+    }
+  else
+    {
+      const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
+      rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
+      have_cpu = false;
+    }
+
+  gcc_assert (cpu_index >= 0);
+
+  /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
+     compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
+     with those from the cpu, except for options that were explicitly set.  If
+     we don't have a cpu, do not override the target bits set in
+     TARGET_DEFAULT.  */
+  if (have_cpu)
+    {
+      rs6000_isa_flags &= ~set_masks;
+      rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
+			   & set_masks);
+    }
+  else
+    rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
+			 & ~rs6000_isa_flags_explicit);
+
+  /* If no -mcpu=<xxx>, inherit any default options that were cleared via
+     POWERPC_MASKS.  Originally, TARGET_DEFAULT was used to initialize
+     target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook.  When we switched
+     to using rs6000_isa_flags, we need to do the initialization here.  */
+  if (!have_cpu)
+    rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
+
+  if (rs6000_tune_index >= 0)
+    tune_index = rs6000_tune_index;
+  else if (have_cpu)
+    rs6000_tune_index = tune_index = cpu_index;
+  else
+    {
+      size_t i;
+      enum processor_type tune_proc
+	= (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
+
+      tune_index = -1;
+      for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
+	if (processor_target_table[i].processor == tune_proc)
+	  {
+	    rs6000_tune_index = tune_index = i;
+	    break;
+	  }
+    }
+
+  gcc_assert (tune_index >= 0);
+  rs6000_cpu = processor_target_table[tune_index].processor;
+
+  /* Pick defaults for SPE related control flags.  Do this early to make sure
+     that the TARGET_ macros are representative ASAP.  */
+  {
+    int spe_capable_cpu =
+      (rs6000_cpu == PROCESSOR_PPC8540
+       || rs6000_cpu == PROCESSOR_PPC8548);
+
+    if (!global_options_set.x_rs6000_spe_abi)
+      rs6000_spe_abi = spe_capable_cpu;
+
+    if (!global_options_set.x_rs6000_spe)
+      rs6000_spe = spe_capable_cpu;
+
+    if (!global_options_set.x_rs6000_float_gprs)
+      rs6000_float_gprs =
+        (rs6000_cpu == PROCESSOR_PPC8540 ? 1
+         : rs6000_cpu == PROCESSOR_PPC8548 ? 2
+         : 0);
+  }
+
+  if (global_options_set.x_rs6000_spe_abi
+      && rs6000_spe_abi
+      && !TARGET_SPE_ABI)
+    error ("not configured for SPE ABI");
+
+  if (global_options_set.x_rs6000_spe
+      && rs6000_spe
+      && !TARGET_SPE)
+    error ("not configured for SPE instruction set");
+
+  if (main_target_opt != NULL
+      && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
+          || (main_target_opt->x_rs6000_spe != rs6000_spe)
+          || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
+    error ("target attribute or pragma changes SPE ABI");
+
+  if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
+      || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
+      || rs6000_cpu == PROCESSOR_PPCE5500)
+    {
+      if (TARGET_ALTIVEC)
+	error ("AltiVec not supported in this target");
+      if (TARGET_SPE)
+	error ("SPE not supported in this target");
+    }
+  if (rs6000_cpu == PROCESSOR_PPCE6500)
+    {
+      if (TARGET_SPE)
+	error ("SPE not supported in this target");
+    }
+
+  /* Disable Cell microcode if we are optimizing for the Cell
+     and not optimizing for size.  */
+  if (rs6000_gen_cell_microcode == -1)
+    rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
+                                  && !optimize_size);
+
+  /* If we are optimizing big endian systems for space and it's OK to
+     use instructions that would be microcoded on the Cell, use the
+     load/store multiple and string instructions.  */
+  if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
+    rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
+						      | OPTION_MASK_STRING);
+
+  /* Don't allow -mmultiple or -mstring on little endian systems
+     unless the cpu is a 750, because the hardware doesn't support the
+     instructions used in little endian mode, and causes an alignment
+     trap.  The 750 does not cause an alignment trap (except when the
+     target is unaligned).  */
+
+  if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
+    {
+      if (TARGET_MULTIPLE)
+	{
+	  rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
+	  if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
+	    warning (0, "-mmultiple is not supported on little endian systems");
+	}
+
+      if (TARGET_STRING)
+	{
+	  rs6000_isa_flags &= ~OPTION_MASK_STRING;
+	  if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
+	    warning (0, "-mstring is not supported on little endian systems");
+	}
+    }
+
+  /* If little-endian, default to -mstrict-align on older processors.
+     Testing for htm matches power8 and later.  */
+  if (!BYTES_BIG_ENDIAN
+      && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
+    rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
+
+  /* -maltivec={le,be} implies -maltivec.  */
+  if (rs6000_altivec_element_order != 0)
+    rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
+
+  /* Disallow -maltivec=le in big endian mode for now.  This is not
+     known to be useful for anyone.  */
+  if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
+    {
+      warning (0, N_("-maltivec=le not allowed for big-endian targets"));
+      rs6000_altivec_element_order = 0;
+    }
+
+  /* Add some warnings for VSX.  */
+  if (TARGET_VSX)
+    {
+      const char *msg = NULL;
+      if (!TARGET_HARD_FLOAT || !TARGET_FPRS
+	  || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
+	{
+	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
+	    msg = N_("-mvsx requires hardware floating point");
+	  else
+	    {
+	      rs6000_isa_flags &= ~ OPTION_MASK_VSX;
+	      rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+	    }
+	}
+      else if (TARGET_PAIRED_FLOAT)
+	msg = N_("-mvsx and -mpaired are incompatible");
+      else if (TARGET_AVOID_XFORM > 0)
+	msg = N_("-mvsx needs indexed addressing");
+      else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
+				   & OPTION_MASK_ALTIVEC))
+        {
+	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
+	    msg = N_("-mvsx and -mno-altivec are incompatible");
+	  else
+	    msg = N_("-mno-altivec disables vsx");
+        }
+
+      if (msg)
+	{
+	  warning (0, msg);
+	  rs6000_isa_flags &= ~ OPTION_MASK_VSX;
+	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+	}
+    }
+
+  /* If hard-float/altivec/vsx were explicitly turned off then don't allow
+     the -mcpu setting to enable options that conflict. */
+  if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
+      && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
+				       | OPTION_MASK_ALTIVEC
+				       | OPTION_MASK_VSX)) != 0)
+    rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
+			   | OPTION_MASK_DIRECT_MOVE)
+		         & ~rs6000_isa_flags_explicit);
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
+
+  /* For the newer switches (vsx, dfp, etc.) set some of the older options,
+     unless the user explicitly used the -mno-<option> to disable the code.  */
+  if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+    rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_VSX)
+    rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_POPCNTD)
+    rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
+  else if (TARGET_DFP)
+    rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_CMPB)
+    rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
+  else if (TARGET_FPRND)
+    rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
+  else if (TARGET_POPCNTB)
+    rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
+  else if (TARGET_ALTIVEC)
+    rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
+
+  if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+	error ("-mcrypto requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+    }
+
+  if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+	error ("-mdirect-move requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_VSX_TIMODE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
+	error ("-mvsx-timode requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
+    }
+
+  /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+     silently turn off quad memory mode.  */
+  if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+	warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+      rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+			    | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+    }
+
+  /* Non-atomic quad memory load/store are disabled for little endian, since
+     the words are reversed, but atomic operations can still be done by
+     swapping the words.  */
+  if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory is not available in little endian mode"));
+
+      rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+    }
+
+  /* Assume if the user asked for normal quad memory instructions, they want
+     the atomic versions as well, unless they explicity told us not to use quad
+     word atomic instructions.  */
+  if (TARGET_QUAD_MEMORY
+      && !TARGET_QUAD_MEMORY_ATOMIC
+      && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+    rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
+  /* Enable power8 fusion if we are tuning for power8, even if we aren't
+     generating power8 instructions.  */
+  if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
+    rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
+			 & OPTION_MASK_P8_FUSION);
+
+  /* Power8 does not fuse sign extended loads with the addis.  If we are
+     optimizing at high levels for speed, convert a sign extended load into a
+     zero extending load, and an explicit sign extension.  */
+  if (TARGET_P8_FUSION
+      && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
+      && optimize_function_for_speed_p (cfun)
+      && optimize >= 3)
+    rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
+
+  /* E500mc does "better" if we inline more aggressively.  Respect the
+     user's opinion, though.  */
+  if (rs6000_block_move_inline_limit == 0
+      && (rs6000_cpu == PROCESSOR_PPCE500MC
+	  || rs6000_cpu == PROCESSOR_PPCE500MC64
+	  || rs6000_cpu == PROCESSOR_PPCE5500
+	  || rs6000_cpu == PROCESSOR_PPCE6500))
+    rs6000_block_move_inline_limit = 128;
+
+  /* store_one_arg depends on expand_block_move to handle at least the
+     size of reg_parm_stack_space.  */
+  if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
+    rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
+
+  if (global_init_p)
+    {
+      /* If the appropriate debug option is enabled, replace the target hooks
+	 with debug versions that call the real version and then prints
+	 debugging information.  */
+      if (TARGET_DEBUG_COST)
+	{
+	  targetm.rtx_costs = rs6000_debug_rtx_costs;
+	  targetm.address_cost = rs6000_debug_address_cost;
+	  targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
+	}
+
+      if (TARGET_DEBUG_ADDR)
+	{
+	  targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
+	  targetm.legitimize_address = rs6000_debug_legitimize_address;
+	  rs6000_secondary_reload_class_ptr
+	    = rs6000_debug_secondary_reload_class;
+	  rs6000_secondary_memory_needed_ptr
+	    = rs6000_debug_secondary_memory_needed;
+	  rs6000_cannot_change_mode_class_ptr
+	    = rs6000_debug_cannot_change_mode_class;
+	  rs6000_preferred_reload_class_ptr
+	    = rs6000_debug_preferred_reload_class;
+	  rs6000_legitimize_reload_address_ptr
+	    = rs6000_debug_legitimize_reload_address;
+	  rs6000_mode_dependent_address_ptr
+	    = rs6000_debug_mode_dependent_address;
+	}
+
+      if (rs6000_veclibabi_name)
+	{
+	  if (strcmp (rs6000_veclibabi_name, "mass") == 0)
+	    rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
+	  else
+	    {
+	      error ("unknown vectorization library ABI type (%s) for "
+		     "-mveclibabi= switch", rs6000_veclibabi_name);
+	      ret = false;
+	    }
+	}
+    }
+
+  if (!global_options_set.x_rs6000_long_double_type_size)
+    {
+      if (main_target_opt != NULL
+	  && (main_target_opt->x_rs6000_long_double_type_size
+	      != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
+	error ("target attribute or pragma changes long double size");
+      else
+	rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
+    }
+
+#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
+  if (!global_options_set.x_rs6000_ieeequad)
+    rs6000_ieeequad = 1;
+#endif
+
+  /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
+     target attribute or pragma which automatically enables both options,
+     unless the altivec ABI was set.  This is set by default for 64-bit, but
+     not for 32-bit.  */
+  if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
+    rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
+			  & ~rs6000_isa_flags_explicit);
+
+  /* Enable Altivec ABI for AIX -maltivec.  */
+  if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
+    {
+      if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
+	error ("target attribute or pragma changes AltiVec ABI");
+      else
+	rs6000_altivec_abi = 1;
+    }
+
+  /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux.  For
+     PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI.  It can
+     be explicitly overridden in either case.  */
+  if (TARGET_ELF)
+    {
+      if (!global_options_set.x_rs6000_altivec_abi
+	  && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
+	{
+	  if (main_target_opt != NULL &&
+	      !main_target_opt->x_rs6000_altivec_abi)
+	    error ("target attribute or pragma changes AltiVec ABI");
+	  else
+	    rs6000_altivec_abi = 1;
+	}
+    }
+
+  /* Set the Darwin64 ABI as default for 64-bit Darwin.  
+     So far, the only darwin64 targets are also MACH-O.  */
+  if (TARGET_MACHO
+      && DEFAULT_ABI == ABI_DARWIN 
+      && TARGET_64BIT)
+    {
+      if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
+	error ("target attribute or pragma changes darwin64 ABI");
+      else
+	{
+	  rs6000_darwin64_abi = 1;
+	  /* Default to natural alignment, for better performance.  */
+	  rs6000_alignment_flags = MASK_ALIGN_NATURAL;
+	}
+    }
+
+  /* Place FP constants in the constant pool instead of TOC
+     if section anchors enabled.  */
+  if (flag_section_anchors
+      && !global_options_set.x_TARGET_NO_FP_IN_TOC)
+    TARGET_NO_FP_IN_TOC = 1;
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+  SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+#ifdef SUB3TARGET_OVERRIDE_OPTIONS
+  SUB3TARGET_OVERRIDE_OPTIONS;
+#endif
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
+
+  /* For the E500 family of cores, reset the single/double FP flags to let us
+     check that they remain constant across attributes or pragmas.  Also,
+     clear a possible request for string instructions, not supported and which
+     we might have silently queried above for -Os. 
+
+     For other families, clear ISEL in case it was set implicitly.
+  */
+
+  switch (rs6000_cpu)
+    {
+    case PROCESSOR_PPC8540:
+    case PROCESSOR_PPC8548:
+    case PROCESSOR_PPCE500MC:
+    case PROCESSOR_PPCE500MC64:
+    case PROCESSOR_PPCE5500:
+    case PROCESSOR_PPCE6500:
+
+      rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
+      rs6000_double_float = TARGET_E500_DOUBLE;
+
+      rs6000_isa_flags &= ~OPTION_MASK_STRING;
+
+      break;
+
+    default:
+
+      if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
+	rs6000_isa_flags &= ~OPTION_MASK_ISEL;
+
+      break;
+    }
+
+  if (main_target_opt)
+    {
+      if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
+	error ("target attribute or pragma changes single precision floating "
+	       "point");
+      if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
+	error ("target attribute or pragma changes double precision floating "
+	       "point");
+    }
+
+  /* Detect invalid option combinations with E500.  */
+  CHECK_E500_OPTIONS;
+
+  rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
+			&& rs6000_cpu != PROCESSOR_POWER5
+			&& rs6000_cpu != PROCESSOR_POWER6
+			&& rs6000_cpu != PROCESSOR_POWER7
+			&& rs6000_cpu != PROCESSOR_POWER8
+			&& rs6000_cpu != PROCESSOR_PPCA2
+			&& rs6000_cpu != PROCESSOR_CELL
+			&& rs6000_cpu != PROCESSOR_PPC476);
+  rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
+			 || rs6000_cpu == PROCESSOR_POWER5
+			 || rs6000_cpu == PROCESSOR_POWER7
+			 || rs6000_cpu == PROCESSOR_POWER8);
+  rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
+				 || rs6000_cpu == PROCESSOR_POWER5
+				 || rs6000_cpu == PROCESSOR_POWER6
+				 || rs6000_cpu == PROCESSOR_POWER7
+				 || rs6000_cpu == PROCESSOR_POWER8
+				 || rs6000_cpu == PROCESSOR_PPCE500MC
+				 || rs6000_cpu == PROCESSOR_PPCE500MC64
+				 || rs6000_cpu == PROCESSOR_PPCE5500
+				 || rs6000_cpu == PROCESSOR_PPCE6500);
+
+  /* Allow debug switches to override the above settings.  These are set to -1
+     in rs6000.opt to indicate the user hasn't directly set the switch.  */
+  if (TARGET_ALWAYS_HINT >= 0)
+    rs6000_always_hint = TARGET_ALWAYS_HINT;
+
+  if (TARGET_SCHED_GROUPS >= 0)
+    rs6000_sched_groups = TARGET_SCHED_GROUPS;
+
+  if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
+    rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
+
+  rs6000_sched_restricted_insns_priority
+    = (rs6000_sched_groups ? 1 : 0);
+
+  /* Handle -msched-costly-dep option.  */
+  rs6000_sched_costly_dep
+    = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
+
+  if (rs6000_sched_costly_dep_str)
+    {
+      if (! strcmp (rs6000_sched_costly_dep_str, "no"))
+	rs6000_sched_costly_dep = no_dep_costly;
+      else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
+	rs6000_sched_costly_dep = all_deps_costly;
+      else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
+	rs6000_sched_costly_dep = true_store_to_load_dep_costly;
+      else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
+	rs6000_sched_costly_dep = store_to_load_dep_costly;
+      else
+	rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
+				   atoi (rs6000_sched_costly_dep_str));
+    }
+
+  /* Handle -minsert-sched-nops option.  */
+  rs6000_sched_insert_nops
+    = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
+
+  if (rs6000_sched_insert_nops_str)
+    {
+      if (! strcmp (rs6000_sched_insert_nops_str, "no"))
+	rs6000_sched_insert_nops = sched_finish_none;
+      else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
+	rs6000_sched_insert_nops = sched_finish_pad_groups;
+      else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
+	rs6000_sched_insert_nops = sched_finish_regroup_exact;
+      else
+	rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
+				    atoi (rs6000_sched_insert_nops_str));
+    }
+
+  if (global_init_p)
+    {
+#ifdef TARGET_REGNAMES
+      /* If the user desires alternate register names, copy in the
+	 alternate names now.  */
+      if (TARGET_REGNAMES)
+	memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
+#endif
+
+      /* Set aix_struct_return last, after the ABI is determined.
+	 If -maix-struct-return or -msvr4-struct-return was explicitly
+	 used, don't override with the ABI default.  */
+      if (!global_options_set.x_aix_struct_return)
+	aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
+
+#if 0
+      /* IBM XL compiler defaults to unsigned bitfields.  */
+      if (TARGET_XL_COMPAT)
+	flag_signed_bitfields = 0;
+#endif
+
+      if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+	REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
+
+      if (TARGET_TOC)
+	ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
+
+      /* We can only guarantee the availability of DI pseudo-ops when
+	 assembling for 64-bit targets.  */
+      if (!TARGET_64BIT)
+	{
+	  targetm.asm_out.aligned_op.di = NULL;
+	  targetm.asm_out.unaligned_op.di = NULL;
+	}
+
+
+      /* Set branch target alignment, if not optimizing for size.  */
+      if (!optimize_size)
+	{
+	  /* Cell wants to be aligned 8byte for dual issue.  Titan wants to be
+	     aligned 8byte to avoid misprediction by the branch predictor.  */
+	  if (rs6000_cpu == PROCESSOR_TITAN
+	      || rs6000_cpu == PROCESSOR_CELL)
+	    {
+	      if (align_functions <= 0)
+		align_functions = 8;
+	      if (align_jumps <= 0)
+		align_jumps = 8;
+	      if (align_loops <= 0)
+		align_loops = 8;
+	    }
+	  if (rs6000_align_branch_targets)
+	    {
+	      if (align_functions <= 0)
+		align_functions = 16;
+	      if (align_jumps <= 0)
+		align_jumps = 16;
+	      if (align_loops <= 0)
+		{
+		  can_override_loop_align = 1;
+		  align_loops = 16;
+		}
+	    }
+	  if (align_jumps_max_skip <= 0)
+	    align_jumps_max_skip = 15;
+	  if (align_loops_max_skip <= 0)
+	    align_loops_max_skip = 15;
+	}
+
+      /* Arrange to save and restore machine status around nested functions.  */
+      init_machine_status = rs6000_init_machine_status;
+
+      /* We should always be splitting complex arguments, but we can't break
+	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
+      if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
+	targetm.calls.split_complex_arg = NULL;
+    }
+
+  /* Initialize rs6000_cost with the appropriate target costs.  */
+  if (optimize_size)
+    rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
+  else
+    switch (rs6000_cpu)
+      {
+      case PROCESSOR_RS64A:
+	rs6000_cost = &rs64a_cost;
+	break;
+
+      case PROCESSOR_MPCCORE:
+	rs6000_cost = &mpccore_cost;
+	break;
+
+      case PROCESSOR_PPC403:
+	rs6000_cost = &ppc403_cost;
+	break;
+
+      case PROCESSOR_PPC405:
+	rs6000_cost = &ppc405_cost;
+	break;
+
+      case PROCESSOR_PPC440:
+	rs6000_cost = &ppc440_cost;
+	break;
+
+      case PROCESSOR_PPC476:
+	rs6000_cost = &ppc476_cost;
+	break;
+
+      case PROCESSOR_PPC601:
+	rs6000_cost = &ppc601_cost;
+	break;
+
+      case PROCESSOR_PPC603:
+	rs6000_cost = &ppc603_cost;
+	break;
+
+      case PROCESSOR_PPC604:
+	rs6000_cost = &ppc604_cost;
+	break;
+
+      case PROCESSOR_PPC604e:
+	rs6000_cost = &ppc604e_cost;
+	break;
+
+      case PROCESSOR_PPC620:
+	rs6000_cost = &ppc620_cost;
+	break;
+
+      case PROCESSOR_PPC630:
+	rs6000_cost = &ppc630_cost;
+	break;
+
+      case PROCESSOR_CELL:
+	rs6000_cost = &ppccell_cost;
+	break;
+
+      case PROCESSOR_PPC750:
+      case PROCESSOR_PPC7400:
+	rs6000_cost = &ppc750_cost;
+	break;
+
+      case PROCESSOR_PPC7450:
+	rs6000_cost = &ppc7450_cost;
+	break;
+
+      case PROCESSOR_PPC8540:
+      case PROCESSOR_PPC8548:
+	rs6000_cost = &ppc8540_cost;
+	break;
+
+      case PROCESSOR_PPCE300C2:
+      case PROCESSOR_PPCE300C3:
+	rs6000_cost = &ppce300c2c3_cost;
+	break;
+
+      case PROCESSOR_PPCE500MC:
+	rs6000_cost = &ppce500mc_cost;
+	break;
+
+      case PROCESSOR_PPCE500MC64:
+	rs6000_cost = &ppce500mc64_cost;
+	break;
+
+      case PROCESSOR_PPCE5500:
+	rs6000_cost = &ppce5500_cost;
+	break;
+
+      case PROCESSOR_PPCE6500:
+	rs6000_cost = &ppce6500_cost;
+	break;
+
+      case PROCESSOR_TITAN:
+	rs6000_cost = &titan_cost;
+	break;
+
+      case PROCESSOR_POWER4:
+      case PROCESSOR_POWER5:
+	rs6000_cost = &power4_cost;
+	break;
+
+      case PROCESSOR_POWER6:
+	rs6000_cost = &power6_cost;
+	break;
+
+      case PROCESSOR_POWER7:
+	rs6000_cost = &power7_cost;
+	break;
+
+      case PROCESSOR_POWER8:
+	rs6000_cost = &power8_cost;
+	break;
+
+      case PROCESSOR_PPCA2:
+	rs6000_cost = &ppca2_cost;
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  if (global_init_p)
+    {
+      maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			     rs6000_cost->simultaneous_prefetches,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+			     rs6000_cost->cache_line_size,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+
+      /* Increase loop peeling limits based on performance analysis. */
+      maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+
+      /* If using typedef char *va_list, signal that
+	 __builtin_va_start (&ap, 0) can be optimized to
+	 ap = __builtin_next_arg (0).  */
+      if (DEFAULT_ABI != ABI_V4)
+	targetm.expand_builtin_va_start = NULL;
+    }
+
+  /* Set up single/double float flags.  
+     If TARGET_HARD_FLOAT is set, but neither single or double is set, 
+     then set both flags. */
+  if (TARGET_HARD_FLOAT && TARGET_FPRS 
+      && rs6000_single_float == 0 && rs6000_double_float == 0)
+    rs6000_single_float = rs6000_double_float = 1;
+
+  /* If not explicitly specified via option, decide whether to generate indexed
+     load/store instructions.  */
+  if (TARGET_AVOID_XFORM == -1)
+    /* Avoid indexed addressing when targeting Power6 in order to avoid the
+     DERAT mispredict penalty.  However the LVE and STVE altivec instructions
+     need indexed accesses and the type used is the scalar type of the element
+     being loaded or stored.  */
+    TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
+			  && !TARGET_ALTIVEC);
+
+  /* Set the -mrecip options.  */
+  if (rs6000_recip_name)
+    {
+      char *p = ASTRDUP (rs6000_recip_name);
+      char *q;
+      unsigned int mask, i;
+      bool invert;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  p = NULL;
+	  if (*q == '!')
+	    {
+	      invert = true;
+	      q++;
+	    }
+	  else
+	    invert = false;
+
+	  if (!strcmp (q, "default"))
+	    mask = ((TARGET_RECIP_PRECISION)
+		    ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
+	  else
+	    {
+	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+		if (!strcmp (q, recip_options[i].string))
+		  {
+		    mask = recip_options[i].mask;
+		    break;
+		  }
+
+	      if (i == ARRAY_SIZE (recip_options))
+		{
+		  error ("unknown option for -mrecip=%s", q);
+		  invert = false;
+		  mask = 0;
+		  ret = false;
+		}
+	    }
+
+	  if (invert)
+	    rs6000_recip_control &= ~mask;
+	  else
+	    rs6000_recip_control |= mask;
+	}
+    }
+
+  /* Set the builtin mask of the various options used that could affect which
+     builtins were used.  In the past we used target_flags, but we've run out
+     of bits, and some options like SPE and PAIRED are no longer in
+     target_flags.  */
+  rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
+  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
+    {
+      fprintf (stderr,
+	       "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
+	       rs6000_builtin_mask);
+      rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
+    }
+
+  /* Initialize all of the registers.  */
+  rs6000_init_hard_regno_mode_ok (global_init_p);
+
+  /* Save the initial options in case the user does function specific options */
+  if (global_init_p)
+    target_option_default_node = target_option_current_node
+      = build_target_option_node (&global_options);
+
+  /* If not explicitly specified via option, decide whether to generate the
+     extra blr's required to preserve the link stack on some cpus (eg, 476).  */
+  if (TARGET_LINK_STACK == -1)
+    SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
+
+  return ret;
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  On the RS/6000 this is used to
+   define the target cpu type.  */
+
+static void
+rs6000_option_override (void)
+{
+  (void) rs6000_option_override_internal (true);
+}
+
+
+/* Implement targetm.vectorize.builtin_mask_for_load.  */
+static tree
+rs6000_builtin_mask_for_load (void)
+{
+  if (TARGET_ALTIVEC || TARGET_VSX)
+    return altivec_builtin_mask_for_load;
+  else
+    return 0;
+}
+
+/* Implement LOOP_ALIGN. */
+int
+rs6000_loop_align (rtx label)
+{
+  basic_block bb;
+  int ninsns;
+
+  /* Don't override loop alignment if -falign-loops was specified. */
+  if (!can_override_loop_align)
+    return align_loops_log;
+
+  bb = BLOCK_FOR_INSN (label);
+  ninsns = num_loop_insns(bb->loop_father);
+
+  /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
+  if (ninsns > 4 && ninsns <= 8
+      && (rs6000_cpu == PROCESSOR_POWER4
+	  || rs6000_cpu == PROCESSOR_POWER5
+	  || rs6000_cpu == PROCESSOR_POWER6
+	  || rs6000_cpu == PROCESSOR_POWER7
+	  || rs6000_cpu == PROCESSOR_POWER8))
+    return 5;
+  else
+    return align_loops_log;
+}
+
+/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
+static int
+rs6000_loop_align_max_skip (rtx label)
+{
+  return (1 << rs6000_loop_align (label)) - 1;
+}
+
+/* Return true iff, data reference of TYPE can reach vector alignment (16)
+   after applying N number of iterations.  This routine does not determine
+   how may iterations are required to reach desired alignment.  */
+
+static bool
+rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
+{
+  if (is_packed)
+    return false;
+
+  if (TARGET_32BIT)
+    {
+      if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
+        return true;
+
+      if (rs6000_alignment_flags ==  MASK_ALIGN_POWER)
+        return true;
+
+      return false;
+    }
+  else
+    {
+      if (TARGET_MACHO)
+        return false;
+
+      /* Assuming that all other types are naturally aligned. CHECKME!  */
+      return true;
+    }
+}
+
+/* Return true if the vector misalignment factor is supported by the
+   target.  */ 
+static bool
+rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
+					    const_tree type,
+					    int misalignment,
+					    bool is_packed)
+{
+  if (TARGET_VSX)
+    {
+      /* Return if movmisalign pattern is not supported for this mode.  */
+      if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
+        return false;
+
+      if (misalignment == -1)
+	{
+	  /* Misalignment factor is unknown at compile time but we know
+	     it's word aligned.  */
+	  if (rs6000_vector_alignment_reachable (type, is_packed))
+            {
+              int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
+
+              if (element_size == 64 || element_size == 32)
+               return true;
+            }
+
+	  return false;
+	}
+
+      /* VSX supports word-aligned vector.  */
+      if (misalignment % 4 == 0)
+	return true;
+    }
+  return false;
+}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                   tree vectype, int misalign)
+{
+  unsigned elements;
+  tree elem_type;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+      case scalar_load:
+      case scalar_store:
+      case vector_stmt:
+      case vector_load:
+      case vector_store:
+      case vec_to_scalar:
+      case scalar_to_vec:
+      case cond_branch_not_taken:
+        return 1;
+
+      case vec_perm:
+	if (TARGET_VSX)
+	  return 3;
+	else
+	  return 1;
+
+      case vec_promote_demote:
+        if (TARGET_VSX)
+          return 4;
+        else
+          return 1;
+
+      case cond_branch_taken:
+        return 3;
+
+      case unaligned_load:
+        if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+          {
+            elements = TYPE_VECTOR_SUBPARTS (vectype);
+            if (elements == 2)
+              /* Double word aligned.  */
+              return 2;
+
+            if (elements == 4)
+              {
+                switch (misalign)
+                  {
+                    case 8:
+                      /* Double word aligned.  */
+                      return 2;
+
+                    case -1:
+                      /* Unknown misalignment.  */
+                    case 4:
+                    case 12:
+                      /* Word aligned.  */
+                      return 22;
+
+                    default:
+                      gcc_unreachable ();
+                  }
+              }
+          }
+
+        if (TARGET_ALTIVEC)
+          /* Misaligned loads are not supported.  */
+          gcc_unreachable ();
+
+        return 2;
+
+      case unaligned_store:
+        if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+          {
+            elements = TYPE_VECTOR_SUBPARTS (vectype);
+            if (elements == 2)
+              /* Double word aligned.  */
+              return 2;
+
+            if (elements == 4)
+              {
+                switch (misalign)
+                  {
+                    case 8:
+                      /* Double word aligned.  */
+                      return 2;
+
+                    case -1:
+                      /* Unknown misalignment.  */
+                    case 4:
+                    case 12:
+                      /* Word aligned.  */
+                      return 23;
+
+                    default:
+                      gcc_unreachable ();
+                  }
+              }
+          }
+
+        if (TARGET_ALTIVEC)
+          /* Misaligned stores are not supported.  */
+          gcc_unreachable ();
+
+        return 2;
+
+      case vec_construct:
+	elements = TYPE_VECTOR_SUBPARTS (vectype);
+	elem_type = TREE_TYPE (vectype);
+	/* 32-bit vectors loaded into registers are stored as double
+	   precision, so we need n/2 converts in addition to the usual
+	   n/2 merges to construct a vector of short floats from them.  */
+	if (SCALAR_FLOAT_TYPE_P (elem_type)
+	    && TYPE_PRECISION (elem_type) == 32)
+	  return elements + 1;
+	else
+	  return elements / 2 + 1;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.preferred_simd_mode.  */
+
+static enum machine_mode
+rs6000_preferred_simd_mode (enum machine_mode mode)
+{
+  if (TARGET_VSX)
+    switch (mode)
+      {
+      case DFmode:
+	return V2DFmode;
+      default:;
+      }
+  if (TARGET_ALTIVEC || TARGET_VSX)
+    switch (mode)
+      {
+      case SFmode:
+	return V4SFmode;
+      case TImode:
+	return V1TImode;
+      case DImode:
+	return V2DImode;
+      case SImode:
+	return V4SImode;
+      case HImode:
+	return V8HImode;
+      case QImode:
+	return V16QImode;
+      default:;
+      }
+  if (TARGET_SPE)
+    switch (mode)
+      {
+      case SFmode:
+	return V2SFmode;
+      case SImode:
+	return V2SImode;
+      default:;
+      }
+  if (TARGET_PAIRED_FLOAT
+      && mode == SFmode)
+    return V2SFmode;
+  return word_mode;
+}
+
+typedef struct _rs6000_cost_data
+{
+  struct loop *loop_info;
+  unsigned cost[3];
+} rs6000_cost_data;
+
+/* Test for likely overcommitment of vector hardware resources.  If a
+   loop iteration is relatively large, and too large a percentage of
+   instructions in the loop are vectorized, the cost model may not
+   adequately reflect delays from unavailable vector resources.
+   Penalize the loop body cost for this case.  */
+
+static void
+rs6000_density_test (rs6000_cost_data *data)
+{
+  const int DENSITY_PCT_THRESHOLD = 85;
+  const int DENSITY_SIZE_THRESHOLD = 70;
+  const int DENSITY_PENALTY = 10;
+  struct loop *loop = data->loop_info;
+  basic_block *bbs = get_loop_body (loop);
+  int nbbs = loop->num_nodes;
+  int vec_cost = data->cost[vect_body], not_vec_cost = 0;
+  int i, density_pct;
+
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+      gimple_stmt_iterator gsi;
+
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple stmt = gsi_stmt (gsi);
+	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
+	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
+	    not_vec_cost++;
+	}
+    }
+
+  free (bbs);
+  density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
+
+  if (density_pct > DENSITY_PCT_THRESHOLD
+      && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
+    {
+      data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "density %d%%, cost %d exceeds threshold, penalizing "
+			 "loop body cost by %d%%", density_pct,
+			 vec_cost + not_vec_cost, DENSITY_PENALTY);
+    }
+}
+
+/* Implement targetm.vectorize.init_cost.  */
+
+static void *
+rs6000_init_cost (struct loop *loop_info)
+{
+  rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
+  data->loop_info = loop_info;
+  data->cost[vect_prologue] = 0;
+  data->cost[vect_body]     = 0;
+  data->cost[vect_epilogue] = 0;
+  return data;
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+
+static unsigned
+rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		      struct _stmt_vec_info *stmt_info, int misalign,
+		      enum vect_cost_model_location where)
+{
+  rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
+							 misalign);
+      /* Statements in an inner loop relative to the loop being
+	 vectorized are weighted more heavily.  The value here is
+	 arbitrary and could potentially be improved with analysis.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+	count *= 50;  /* FIXME.  */
+
+      retval = (unsigned) (count * stmt_cost);
+      cost_data->cost[where] += retval;
+    }
+
+  return retval;
+}
+
+/* Implement targetm.vectorize.finish_cost.  */
+
+static void
+rs6000_finish_cost (void *data, unsigned *prologue_cost,
+		    unsigned *body_cost, unsigned *epilogue_cost)
+{
+  rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
+
+  if (cost_data->loop_info)
+    rs6000_density_test (cost_data);
+
+  *prologue_cost = cost_data->cost[vect_prologue];
+  *body_cost     = cost_data->cost[vect_body];
+  *epilogue_cost = cost_data->cost[vect_epilogue];
+}
+
+/* Implement targetm.vectorize.destroy_cost_data.  */
+
+static void
+rs6000_destroy_cost_data (void *data)
+{
+  free (data);
+}
+
+/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
+   library with vectorized intrinsics.  */
+
+static tree
+rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
+{
+  char name[32];
+  const char *suffix = NULL;
+  tree fntype, new_fndecl, bdecl = NULL_TREE;
+  int n_args = 1;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* Libmass is suitable for unsafe math only as it does not correctly support
+     parts of IEEE with the required precision such as denormals.  Only support
+     it if we have VSX to use the simd d2 or f4 functions.
+     XXX: Add variable length support.  */
+  if (!flag_unsafe_math_optimizations || !TARGET_VSX)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+	case BUILT_IN_ATAN2:
+	case BUILT_IN_HYPOT:
+	case BUILT_IN_POW:
+	  n_args = 2;
+	  /* fall through */
+
+	case BUILT_IN_ACOS:
+	case BUILT_IN_ACOSH:
+	case BUILT_IN_ASIN:
+	case BUILT_IN_ASINH:
+	case BUILT_IN_ATAN:
+	case BUILT_IN_ATANH:
+	case BUILT_IN_CBRT:
+	case BUILT_IN_COS:
+	case BUILT_IN_COSH:
+	case BUILT_IN_ERF:
+	case BUILT_IN_ERFC:
+	case BUILT_IN_EXP2:
+	case BUILT_IN_EXP:
+	case BUILT_IN_EXPM1:
+	case BUILT_IN_LGAMMA:
+	case BUILT_IN_LOG10:
+	case BUILT_IN_LOG1P:
+	case BUILT_IN_LOG2:
+	case BUILT_IN_LOG:
+	case BUILT_IN_SIN:
+	case BUILT_IN_SINH:
+	case BUILT_IN_SQRT:
+	case BUILT_IN_TAN:
+	case BUILT_IN_TANH:
+	  bdecl = builtin_decl_implicit (fn);
+	  suffix = "d2";				/* pow -> powd2 */
+	  if (el_mode != DFmode
+	      || n != 2
+	      || !bdecl)
+	    return NULL_TREE;
+	  break;
+
+	case BUILT_IN_ATAN2F:
+	case BUILT_IN_HYPOTF:
+	case BUILT_IN_POWF:
+	  n_args = 2;
+	  /* fall through */
+
+	case BUILT_IN_ACOSF:
+	case BUILT_IN_ACOSHF:
+	case BUILT_IN_ASINF:
+	case BUILT_IN_ASINHF:
+	case BUILT_IN_ATANF:
+	case BUILT_IN_ATANHF:
+	case BUILT_IN_CBRTF:
+	case BUILT_IN_COSF:
+	case BUILT_IN_COSHF:
+	case BUILT_IN_ERFF:
+	case BUILT_IN_ERFCF:
+	case BUILT_IN_EXP2F:
+	case BUILT_IN_EXPF:
+	case BUILT_IN_EXPM1F:
+	case BUILT_IN_LGAMMAF:
+	case BUILT_IN_LOG10F:
+	case BUILT_IN_LOG1PF:
+	case BUILT_IN_LOG2F:
+	case BUILT_IN_LOGF:
+	case BUILT_IN_SINF:
+	case BUILT_IN_SINHF:
+	case BUILT_IN_SQRTF:
+	case BUILT_IN_TANF:
+	case BUILT_IN_TANHF:
+	  bdecl = builtin_decl_implicit (fn);
+	  suffix = "4";					/* powf -> powf4 */
+	  if (el_mode != SFmode
+	      || n != 4
+	      || !bdecl)
+	    return NULL_TREE;
+	  break;
+
+	default:
+	  return NULL_TREE;
+	}
+    }
+  else
+    return NULL_TREE;
+
+  gcc_assert (suffix != NULL);
+  bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
+  if (!bname)
+    return NULL_TREE;
+
+  strcpy (name, bname + sizeof ("__builtin_") - 1);
+  strcat (name, suffix);
+
+  if (n_args == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else if (n_args == 2)
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+  else
+    gcc_unreachable ();
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
+				    tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
+	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || !TARGET_VECTORIZE_BUILTINS)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+	case BUILT_IN_CLZIMAX:
+	case BUILT_IN_CLZLL:
+	case BUILT_IN_CLZL:
+	case BUILT_IN_CLZ:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+	    }
+	  break;
+	case BUILT_IN_COPYSIGN:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
+	  break;
+	case BUILT_IN_COPYSIGNF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
+	  break;
+	case BUILT_IN_POPCOUNTIMAX:
+	case BUILT_IN_POPCOUNTLL:
+	case BUILT_IN_POPCOUNTL:
+	case BUILT_IN_POPCOUNT:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+	    }
+	  break;
+	case BUILT_IN_SQRT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
+	  break;
+	case BUILT_IN_SQRTF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
+	  break;
+	case BUILT_IN_CEIL:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
+	  break;
+	case BUILT_IN_CEILF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
+	  break;
+	case BUILT_IN_FLOOR:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
+	  break;
+	case BUILT_IN_FLOORF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
+	  break;
+	case BUILT_IN_FMA:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
+	  break;
+	case BUILT_IN_FMAF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
+	  else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
+	  break;
+	case BUILT_IN_TRUNC:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
+	  break;
+	case BUILT_IN_TRUNCF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
+	  break;
+	case BUILT_IN_NEARBYINT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && flag_unsafe_math_optimizations
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
+	  break;
+	case BUILT_IN_NEARBYINTF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && flag_unsafe_math_optimizations
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
+	  break;
+	case BUILT_IN_RINT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && !flag_trapping_math
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
+	  break;
+	case BUILT_IN_RINTF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && !flag_trapping_math
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
+	  break;
+	default:
+	  break;
+	}
+    }
+
+  else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+    {
+      enum rs6000_builtins fn
+	= (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+	case RS6000_BUILTIN_RSQRTF:
+	  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
+	  break;
+	case RS6000_BUILTIN_RSQRT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
+	  break;
+	case RS6000_BUILTIN_RECIPF:
+	  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
+	  break;
+	case RS6000_BUILTIN_RECIP:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
+	  break;
+	default:
+	  break;
+	}
+    }
+
+  /* Generate calls to libmass if appropriate.  */
+  if (rs6000_veclib_handler)
+    return rs6000_veclib_handler (fndecl, type_out, type_in);
+
+  return NULL_TREE;
+}
+
+/* Default CPU string for rs6000*_file_start functions.  */
+static const char *rs6000_default_cpu;
+
+/* Do anything needed at the start of the asm file.  */
+
+static void
+rs6000_file_start (void)
+{
+  char buffer[80];
+  const char *start = buffer;
+  FILE *file = asm_out_file;
+
+  rs6000_default_cpu = TARGET_CPU_DEFAULT;
+
+  default_file_start ();
+
+  if (flag_verbose_asm)
+    {
+      sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
+
+      if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
+	{
+	  fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
+	  start = "";
+	}
+
+      if (global_options_set.x_rs6000_cpu_index)
+	{
+	  fprintf (file, "%s -mcpu=%s", start,
+		   processor_target_table[rs6000_cpu_index].name);
+	  start = "";
+	}
+
+      if (global_options_set.x_rs6000_tune_index)
+	{
+	  fprintf (file, "%s -mtune=%s", start,
+		   processor_target_table[rs6000_tune_index].name);
+	  start = "";
+	}
+
+      if (PPC405_ERRATUM77)
+	{
+	  fprintf (file, "%s PPC405CR_ERRATUM77", start);
+	  start = "";
+	}
+
+#ifdef USING_ELFOS_H
+      switch (rs6000_sdata)
+	{
+	case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
+	case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
+	case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
+	case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
+	}
+
+      if (rs6000_sdata && g_switch_value)
+	{
+	  fprintf (file, "%s -G %d", start,
+		   g_switch_value);
+	  start = "";
+	}
+#endif
+
+      if (*start == '\0')
+	putc ('\n', file);
+    }
+
+  if (DEFAULT_ABI == ABI_ELFv2)
+    fprintf (file, "\t.abiversion 2\n");
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
+      || (TARGET_ELF && flag_pic == 2))
+    {
+      switch_to_section (toc_section);
+      switch_to_section (text_section);
+    }
+}
+
+
+/* Return nonzero if this function is known to have a null epilogue.  */
+
+int
+direct_return (void)
+{
+  if (reload_completed)
+    {
+      rs6000_stack_t *info = rs6000_stack_info ();
+
+      if (info->first_gp_reg_save == 32
+	  && info->first_fp_reg_save == 64
+	  && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
+	  && ! info->lr_save_p
+	  && ! info->cr_save_p
+	  && info->vrsave_mask == 0
+	  && ! info->push_p)
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return the number of instructions it takes to form a constant in an
+   integer register.  */
+
+int
+num_insns_constant_wide (HOST_WIDE_INT value)
+{
+  /* signed constant loadable with addi */
+  if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
+    return 1;
+
+  /* constant loadable with addis */
+  else if ((value & 0xffff) == 0
+	   && (value >> 31 == -1 || value >> 31 == 0))
+    return 1;
+
+  else if (TARGET_POWERPC64)
+    {
+      HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      HOST_WIDE_INT high = value >> 31;
+
+      if (high == 0 || high == -1)
+	return 2;
+
+      high >>= 1;
+
+      if (low == 0)
+	return num_insns_constant_wide (high) + 1;
+      else if (high == 0)
+	return num_insns_constant_wide (low) + 1;
+      else
+	return (num_insns_constant_wide (high)
+		+ num_insns_constant_wide (low) + 1);
+    }
+
+  else
+    return 2;
+}
+
+int
+num_insns_constant (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT low, high;
+
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
+	  && mask64_operand (op, mode))
+	return 2;
+      else
+	return num_insns_constant_wide (INTVAL (op));
+
+      case CONST_DOUBLE:
+	if (mode == SFmode || mode == SDmode)
+	  {
+	    long l;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	    if (DECIMAL_FLOAT_MODE_P (mode))
+	      REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
+	    else
+	      REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+	    return num_insns_constant_wide ((HOST_WIDE_INT) l);
+	  }
+
+	long l[2];
+	REAL_VALUE_TYPE rv;
+
+	REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	if (DECIMAL_FLOAT_MODE_P (mode))
+	  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+	else
+	  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+	high = l[WORDS_BIG_ENDIAN == 0];
+	low  = l[WORDS_BIG_ENDIAN != 0];
+
+	if (TARGET_32BIT)
+	  return (num_insns_constant_wide (low)
+		  + num_insns_constant_wide (high));
+	else
+	  {
+	    if ((high == 0 && low >= 0)
+		|| (high == -1 && low < 0))
+	      return num_insns_constant_wide (low);
+
+	    else if (mask64_operand (op, mode))
+	      return 2;
+
+	    else if (low == 0)
+	      return num_insns_constant_wide (high) + 1;
+
+	    else
+	      return (num_insns_constant_wide (high)
+		      + num_insns_constant_wide (low) + 1);
+	  }
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Interpret element ELT of the CONST_VECTOR OP as an integer value.
+   If the mode of OP is MODE_VECTOR_INT, this simply returns the
+   corresponding element of the vector, but for V4SFmode and V2SFmode,
+   the corresponding "float" is interpreted as an SImode integer.  */
+
+HOST_WIDE_INT
+const_vector_elt_as_int (rtx op, unsigned int elt)
+{
+  rtx tmp;
+
+  /* We can't handle V2DImode and V2DFmode vector constants here yet.  */
+  gcc_assert (GET_MODE (op) != V2DImode
+	      && GET_MODE (op) != V2DFmode);
+
+  tmp = CONST_VECTOR_ELT (op, elt);
+  if (GET_MODE (op) == V4SFmode
+      || GET_MODE (op) == V2SFmode)
+    tmp = gen_lowpart (SImode, tmp);
+  return INTVAL (tmp);
+}
+
+/* Return true if OP can be synthesized with a particular vspltisb, vspltish
+   or vspltisw instruction.  OP is a CONST_VECTOR.  Which instruction is used
+   depends on STEP and COPIES, one of which will be 1.  If COPIES > 1,
+   all items are set to the same value and contain COPIES replicas of the
+   vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
+   operand and the others are set to the value of the operand's msb.  */
+
+static bool
+vspltis_constant (rtx op, unsigned step, unsigned copies)
+{
+  enum machine_mode mode = GET_MODE (op);
+  enum machine_mode inner = GET_MODE_INNER (mode);
+
+  unsigned i;
+  unsigned nunits;
+  unsigned bitsize;
+  unsigned mask;
+
+  HOST_WIDE_INT val;
+  HOST_WIDE_INT splat_val;
+  HOST_WIDE_INT msb_val;
+
+  if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
+    return false;
+
+  nunits = GET_MODE_NUNITS (mode);
+  bitsize = GET_MODE_BITSIZE (inner);
+  mask = GET_MODE_MASK (inner);
+
+  val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
+  splat_val = val;
+  msb_val = val >= 0 ? 0 : -1;
+
+  /* Construct the value to be splatted, if possible.  If not, return 0.  */
+  for (i = 2; i <= copies; i *= 2)
+    {
+      HOST_WIDE_INT small_val;
+      bitsize /= 2;
+      small_val = splat_val >> bitsize;
+      mask >>= bitsize;
+      if (splat_val != ((small_val << bitsize) | (small_val & mask)))
+	return false;
+      splat_val = small_val;
+    }
+
+  /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
+  if (EASY_VECTOR_15 (splat_val))
+    ;
+
+  /* Also check if we can splat, and then add the result to itself.  Do so if
+     the value is positive, of if the splat instruction is using OP's mode;
+     for splat_val < 0, the splat and the add should use the same mode.  */
+  else if (EASY_VECTOR_15_ADD_SELF (splat_val)
+           && (splat_val >= 0 || (step == 1 && copies == 1)))
+    ;
+
+  /* Also check if are loading up the most significant bit which can be done by
+     loading up -1 and shifting the value left by -1.  */
+  else if (EASY_VECTOR_MSB (splat_val, inner))
+    ;
+
+  else
+    return false;
+
+  /* Check if VAL is present in every STEP-th element, and the
+     other elements are filled with its most significant bit.  */
+  for (i = 1; i < nunits; ++i)
+    {
+      HOST_WIDE_INT desired_val;
+      unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
+      if ((i & (step - 1)) == 0)
+	desired_val = val;
+      else
+	desired_val = msb_val;
+
+      if (desired_val != const_vector_elt_as_int (op, elt))
+	return false;
+    }
+
+  return true;
+}
+
+
+/* Return true if OP is of the given MODE and can be synthesized
+   with a vspltisb, vspltish or vspltisw.  */
+
+bool
+easy_altivec_constant (rtx op, enum machine_mode mode)
+{
+  unsigned step, copies;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  else if (mode != GET_MODE (op))
+    return false;
+
+  /* V2DI/V2DF was added with VSX.  Only allow 0 and all 1's as easy
+     constants.  */
+  if (mode == V2DFmode)
+    return zero_constant (op, mode);
+
+  else if (mode == V2DImode)
+    {
+      /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
+	 easy.  */
+      if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
+	  || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
+	return false;
+
+      if (zero_constant (op, mode))
+	return true;
+
+      if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
+	  && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
+	return true;
+
+      return false;
+    }
+
+  /* V1TImode is a special container for TImode.  Ignore for now.  */
+  else if (mode == V1TImode)
+    return false;
+
+  /* Start with a vspltisw.  */
+  step = GET_MODE_NUNITS (mode) / 4;
+  copies = 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  /* Then try with a vspltish.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  /* And finally a vspltisb.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  return false;
+}
+
+/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
+   result is OP.  Abort if it is not possible.  */
+
+rtx
+gen_easy_altivec_constant (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+  int nunits = GET_MODE_NUNITS (mode);
+  rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
+  unsigned step = nunits / 4;
+  unsigned copies = 1;
+
+  /* Start with a vspltisw.  */
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
+
+  /* Then try with a vspltish.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
+
+  /* And finally a vspltisb.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
+
+  gcc_unreachable ();
+}
+
+const char *
+output_vec_const_move (rtx *operands)
+{
+  int cst, cst2;
+  enum machine_mode mode;
+  rtx dest, vec;
+
+  dest = operands[0];
+  vec = operands[1];
+  mode = GET_MODE (dest);
+
+  if (TARGET_VSX)
+    {
+      if (zero_constant (vec, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      if ((mode == V2DImode || mode == V1TImode)
+	  && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
+	  && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
+	return "vspltisw %0,-1";
+    }
+
+  if (TARGET_ALTIVEC)
+    {
+      rtx splat_vec;
+      if (zero_constant (vec, mode))
+	return "vxor %0,%0,%0";
+
+      splat_vec = gen_easy_altivec_constant (vec);
+      gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
+      operands[1] = XEXP (splat_vec, 0);
+      if (!EASY_VECTOR_15 (INTVAL (operands[1])))
+	return "#";
+
+      switch (GET_MODE (splat_vec))
+	{
+	case V4SImode:
+	  return "vspltisw %0,%1";
+
+	case V8HImode:
+	  return "vspltish %0,%1";
+
+	case V16QImode:
+	  return "vspltisb %0,%1";
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  gcc_assert (TARGET_SPE);
+
+  /* Vector constant 0 is handled as a splitter of V2SI, and in the
+     pattern of V1DI, V4HI, and V2SF.
+
+     FIXME: We should probably return # and add post reload
+     splitters for these, but this way is so easy ;-).  */
+  cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
+  cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
+  operands[1] = CONST_VECTOR_ELT (vec, 0);
+  operands[2] = CONST_VECTOR_ELT (vec, 1);
+  if (cst == cst2)
+    return "li %0,%1\n\tevmergelo %0,%0,%0";
+  else
+    return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
+}
+
+/* Initialize TARGET of vector PAIRED to VALS.  */
+
+void
+paired_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0;
+  rtx x, new_rtx, tmp, constant_op, op1, op2;
+  int i;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
+	++n_var;
+    }
+  if (n_var == 0)
+    {
+      /* Load from constant pool.  */
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+      return;
+    }
+
+  if (n_var == 2)
+    {
+      /* The vector is initialized only with non-constants.  */
+      new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
+				XVECEXP (vals, 0, 1));
+
+      emit_move_insn (target, new_rtx);
+      return;
+    }
+  
+  /* One field is non-constant and the other one is a constant.  Load the
+     constant from the constant pool and use ps_merge instruction to
+     construct the whole vector.  */
+  op1 = XVECEXP (vals, 0, 0);
+  op2 = XVECEXP (vals, 0, 1);
+
+  constant_op = (CONSTANT_P (op1)) ? op1 : op2;
+
+  tmp = gen_reg_rtx (GET_MODE (constant_op));
+  emit_move_insn (tmp, constant_op);
+
+  if (CONSTANT_P (op1))
+    new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
+  else
+    new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
+
+  emit_move_insn (target, new_rtx);
+}
+
+void
+paired_expand_vector_move (rtx operands[])
+{
+  rtx op0 = operands[0], op1 = operands[1];
+
+  emit_move_insn (op0, op1);
+}
+
+/* Emit vector compare for code RCODE.  DEST is destination, OP1 and
+   OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
+   operands for the relation operation COND.  This is a recursive
+   function.  */
+
+static void
+paired_emit_vector_compare (enum rtx_code rcode,
+                            rtx dest, rtx op0, rtx op1,
+                            rtx cc_op0, rtx cc_op1)
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  rtx tmp1, max, min;
+
+  gcc_assert (TARGET_PAIRED_FLOAT);
+  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+  switch (rcode)
+    {
+    case LT:
+    case LTU:
+      paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case GE:
+    case GEU:
+      emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
+      emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
+      return;
+    case LE:
+    case LEU:
+      paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
+      return;
+    case GT:
+      paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case EQ:
+      tmp1 = gen_reg_rtx (V2SFmode);
+      max = gen_reg_rtx (V2SFmode);
+      min = gen_reg_rtx (V2SFmode);
+      gen_reg_rtx (V2SFmode);
+      
+      emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
+      emit_insn (gen_selv2sf4
+                 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
+      emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
+      emit_insn (gen_selv2sf4
+                 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
+      emit_insn (gen_subv2sf3 (tmp1, min, max));
+      emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
+      return;
+    case NE:
+      paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNLE:
+      paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNLT:
+      paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNGE:
+      paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNGT:
+      paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    default:
+      gcc_unreachable ();
+    }
+
+  return;
+}
+
+/* Emit vector conditional expression.
+   DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+   CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
+
+int
+paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+			      rtx cond, rtx cc_op0, rtx cc_op1)
+{
+  enum rtx_code rcode = GET_CODE (cond);
+
+  if (!TARGET_PAIRED_FLOAT)
+    return 0;
+
+  paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
+
+  return 1;
+}
+
+/* Initialize vector TARGET to VALS.  */
+
+void
+rs6000_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true, all_const_zero = true;
+  rtx x, mem;
+  int i;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
+	++n_var, one_var = i;
+      else if (x != CONST0_RTX (inner_mode))
+	all_const_zero = false;
+
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (n_var == 0)
+    {
+      rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+      bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+      if ((int_vector_p || TARGET_VSX) && all_const_zero)
+	{
+	  /* Zero register.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, target,
+				  gen_rtx_XOR (mode, target, target)));
+	  return;
+	}
+      else if (int_vector_p && easy_vector_constant (const_vec, mode))
+	{
+	  /* Splat immediate.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
+	  return;
+	}
+      else
+	{
+	  /* Load from constant pool.  */
+	  emit_move_insn (target, const_vec);
+	  return;
+	}
+    }
+
+  /* Double word values on VSX can use xxpermdi or lxvdsx.  */
+  if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+    {
+      rtx op0 = XVECEXP (vals, 0, 0);
+      rtx op1 = XVECEXP (vals, 0, 1);
+      if (all_same)
+	{
+	  if (!MEM_P (op0) && !REG_P (op0))
+	    op0 = force_reg (inner_mode, op0);
+	  if (mode == V2DFmode)
+	    emit_insn (gen_vsx_splat_v2df (target, op0));
+	  else
+	    emit_insn (gen_vsx_splat_v2di (target, op0));
+	}
+      else
+	{
+	  op0 = force_reg (inner_mode, op0);
+	  op1 = force_reg (inner_mode, op1);
+	  if (mode == V2DFmode)
+	    emit_insn (gen_vsx_concat_v2df (target, op0, op1));
+	  else
+	    emit_insn (gen_vsx_concat_v2di (target, op0, op1));
+	}
+      return;
+    }
+
+  /* With single precision floating point on VSX, know that internally single
+     precision is actually represented as a double, and either make 2 V2DF
+     vectors, and convert these vectors to single precision, or do one
+     conversion, and splat the result to the other elements.  */
+  if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
+    {
+      if (all_same)
+	{
+	  rtx freg = gen_reg_rtx (V4SFmode);
+	  rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx cvt  = ((TARGET_XSCVDPSPN)
+		      ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+		      : gen_vsx_xscvdpsp_scalar (freg, sreg));
+
+	  emit_insn (cvt);
+	  emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
+	}
+      else
+	{
+	  rtx dbl_even = gen_reg_rtx (V2DFmode);
+	  rtx dbl_odd  = gen_reg_rtx (V2DFmode);
+	  rtx flt_even = gen_reg_rtx (V4SFmode);
+	  rtx flt_odd  = gen_reg_rtx (V4SFmode);
+	  rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
+	  rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
+	  rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
+
+	  emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
+	  emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
+	  emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
+	  emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
+	  rs6000_expand_extract_even (target, flt_even, flt_odd);
+	}
+      return;
+    }
+
+  /* Store value to stack temp.  Load vector element.  Splat.  However, splat
+     of 64-bit items is not supported on Altivec.  */
+  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+    {
+      mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
+      emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
+		      XVECEXP (vals, 0, 0));
+      x = gen_rtx_UNSPEC (VOIDmode,
+			  gen_rtvec (1, const0_rtx), UNSPEC_LVE);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (2,
+					      gen_rtx_SET (VOIDmode,
+							   target, mem),
+					      x)));
+      x = gen_rtx_VEC_SELECT (inner_mode, target,
+			      gen_rtx_PARALLEL (VOIDmode,
+						gen_rtvec (1, const0_rtx)));
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_DUPLICATE (mode, x)));
+      return;
+    }
+
+  /* One field is non-constant.  Load constant then overwrite
+     varying field.  */
+  if (n_var == 1)
+    {
+      rtx copy = copy_rtx (vals);
+
+      /* Load constant part of vector, substitute neighboring value for
+	 varying element.  */
+      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+      rs6000_expand_vector_init (target, copy);
+
+      /* Insert variable.  */
+      rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
+      return;
+    }
+
+  /* Construct the vector in memory one field at a time
+     and load the whole vector.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner_mode,
+				    i * GET_MODE_SIZE (inner_mode)),
+		    XVECEXP (vals, 0, i));
+  emit_move_insn (target, mem);
+}
+
+/* Set field ELT of TARGET to VAL.  */
+
+void
+rs6000_expand_vector_set (rtx target, rtx val, int elt)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  rtx reg = gen_reg_rtx (mode);
+  rtx mask, mem, x;
+  int width = GET_MODE_SIZE (inner_mode);
+  int i;
+
+  if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+    {
+      rtx (*set_func) (rtx, rtx, rtx, rtx)
+	= ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
+      emit_insn (set_func (target, target, val, GEN_INT (elt)));
+      return;
+    }
+
+  /* Simplify setting single element vectors like V1TImode.  */
+  if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
+    {
+      emit_move_insn (target, gen_lowpart (mode, val));
+      return;
+    }
+
+  /* Load single variable value.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
+  emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
+  x = gen_rtx_UNSPEC (VOIDmode,
+		      gen_rtvec (1, const0_rtx), UNSPEC_LVE);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (2,
+					  gen_rtx_SET (VOIDmode,
+						       reg, mem),
+					  x)));
+
+  /* Linear sequence.  */
+  mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
+  for (i = 0; i < 16; ++i)
+    XVECEXP (mask, 0, i) = GEN_INT (i);
+
+  /* Set permute mask to insert element into target.  */
+  for (i = 0; i < width; ++i)
+    XVECEXP (mask, 0, elt*width + i)
+      = GEN_INT (i + 0x10);
+  x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
+
+  if (BYTES_BIG_ENDIAN)
+    x = gen_rtx_UNSPEC (mode,
+			gen_rtvec (3, target, reg,
+				   force_reg (V16QImode, x)),
+			UNSPEC_VPERM);
+  else 
+    {
+      /* Invert selector.  */
+      rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
+      rtx andx = gen_rtx_AND (V16QImode, notx, notx);
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_move_insn (tmp, andx);
+
+      /* Permute with operands reversed and adjusted selector.  */
+      x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
+			  UNSPEC_VPERM);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, target, x));
+}
+
+/* Extract field ELT from VEC into TARGET.  */
+
+void
+rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
+{
+  enum machine_mode mode = GET_MODE (vec);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  rtx mem;
+
+  if (VECTOR_MEM_VSX_P (mode))
+    {
+      switch (mode)
+	{
+	default:
+	  break;
+	case V1TImode:
+	  gcc_assert (elt == 0 && inner_mode == TImode);
+	  emit_move_insn (target, gen_lowpart (TImode, vec));
+	  break;
+	case V2DFmode:
+	  emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
+	  return;
+	case V2DImode:
+	  emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
+	  return;
+	case V4SFmode:
+	  emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
+	  return;
+	}
+    }
+
+  /* Allocate mode-sized buffer.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+
+  emit_move_insn (mem, vec);
+
+  /* Add offset to field within buffer matching vector element.  */
+  mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
+
+  emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
+}
+
+/* Generates shifts and masks for a pair of rldicl or rldicr insns to
+   implement ANDing by the mask IN.  */
+void
+build_mask64_2_operands (rtx in, rtx *out)
+{
+  unsigned HOST_WIDE_INT c, lsb, m1, m2;
+  int shift;
+
+  gcc_assert (GET_CODE (in) == CONST_INT);
+
+  c = INTVAL (in);
+  if (c & 1)
+    {
+      /* Assume c initially something like 0x00fff000000fffff.  The idea
+	 is to rotate the word so that the middle ^^^^^^ group of zeros
+	 is at the MS end and can be cleared with an rldicl mask.  We then
+	 rotate back and clear off the MS    ^^ group of zeros with a
+	 second rldicl.  */
+      c = ~c;			/*   c == 0xff000ffffff00000 */
+      lsb = c & -c;		/* lsb == 0x0000000000100000 */
+      m1 = -lsb;		/*  m1 == 0xfffffffffff00000 */
+      c = ~c;			/*   c == 0x00fff000000fffff */
+      c &= -lsb;		/*   c == 0x00fff00000000000 */
+      lsb = c & -c;		/* lsb == 0x0000100000000000 */
+      c = ~c;			/*   c == 0xff000fffffffffff */
+      c &= -lsb;		/*   c == 0xff00000000000000 */
+      shift = 0;
+      while ((lsb >>= 1) != 0)
+	shift++;		/* shift == 44 on exit from loop */
+      m1 <<= 64 - shift;	/*  m1 == 0xffffff0000000000 */
+      m1 = ~m1;			/*  m1 == 0x000000ffffffffff */
+      m2 = ~c;			/*  m2 == 0x00ffffffffffffff */
+    }
+  else
+    {
+      /* Assume c initially something like 0xff000f0000000000.  The idea
+	 is to rotate the word so that the     ^^^  middle group of zeros
+	 is at the LS end and can be cleared with an rldicr mask.  We then
+	 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
+	 a second rldicr.  */
+      lsb = c & -c;		/* lsb == 0x0000010000000000 */
+      m2 = -lsb;		/*  m2 == 0xffffff0000000000 */
+      c = ~c;			/*   c == 0x00fff0ffffffffff */
+      c &= -lsb;		/*   c == 0x00fff00000000000 */
+      lsb = c & -c;		/* lsb == 0x0000100000000000 */
+      c = ~c;			/*   c == 0xff000fffffffffff */
+      c &= -lsb;		/*   c == 0xff00000000000000 */
+      shift = 0;
+      while ((lsb >>= 1) != 0)
+	shift++;		/* shift == 44 on exit from loop */
+      m1 = ~c;			/*  m1 == 0x00ffffffffffffff */
+      m1 >>= shift;		/*  m1 == 0x0000000000000fff */
+      m1 = ~m1;			/*  m1 == 0xfffffffffffff000 */
+    }
+
+  /* Note that when we only have two 0->1 and 1->0 transitions, one of the
+     masks will be all 1's.  We are guaranteed more than one transition.  */
+  out[0] = GEN_INT (64 - shift);
+  out[1] = GEN_INT (m1);
+  out[2] = GEN_INT (shift);
+  out[3] = GEN_INT (m2);
+}
+
+/* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
+
+bool
+invalid_e500_subreg (rtx op, enum machine_mode mode)
+{
+  if (TARGET_E500_DOUBLE)
+    {
+      /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
+	 subreg:TI and reg:TF.  Decimal float modes are like integer
+	 modes (only low part of each register used) for this
+	 purpose.  */
+      if (GET_CODE (op) == SUBREG
+	  && (mode == SImode || mode == DImode || mode == TImode
+	      || mode == DDmode || mode == TDmode || mode == PTImode)
+	  && REG_P (SUBREG_REG (op))
+	  && (GET_MODE (SUBREG_REG (op)) == DFmode
+	      || GET_MODE (SUBREG_REG (op)) == TFmode))
+	return true;
+
+      /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
+	 reg:TI.  */
+      if (GET_CODE (op) == SUBREG
+	  && (mode == DFmode || mode == TFmode)
+	  && REG_P (SUBREG_REG (op))
+	  && (GET_MODE (SUBREG_REG (op)) == DImode
+	      || GET_MODE (SUBREG_REG (op)) == TImode
+	      || GET_MODE (SUBREG_REG (op)) == PTImode
+	      || GET_MODE (SUBREG_REG (op)) == DDmode
+	      || GET_MODE (SUBREG_REG (op)) == TDmode))
+	return true;
+    }
+
+  if (TARGET_SPE
+      && GET_CODE (op) == SUBREG
+      && mode == SImode
+      && REG_P (SUBREG_REG (op))
+      && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
+    return true;
+
+  return false;
+}
+
+/* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
+   selects whether the alignment is abi mandated, optional, or
+   both abi and optional alignment.  */
+   
+unsigned int
+rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
+{
+  if (how != align_opt)
+    {
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	{
+	  if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
+	      || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
+	    {
+	      if (align < 64)
+		align = 64;
+	    }
+	  else if (align < 128)
+	    align = 128;
+	}
+      else if (TARGET_E500_DOUBLE
+	       && TREE_CODE (type) == REAL_TYPE
+	       && TYPE_MODE (type) == DFmode)
+	{
+	  if (align < 64)
+	    align = 64;
+	}
+    }
+
+  if (how != align_abi)
+    {
+      if (TREE_CODE (type) == ARRAY_TYPE
+	  && TYPE_MODE (TREE_TYPE (type)) == QImode)
+	{
+	  if (align < BITS_PER_WORD)
+	    align = BITS_PER_WORD;
+	}
+    }
+
+  return align;
+}
+
+/* AIX increases natural record alignment to doubleword if the first
+   field is an FP double while the FP fields remain word aligned.  */
+
+unsigned int
+rs6000_special_round_type_align (tree type, unsigned int computed,
+				 unsigned int specified)
+{
+  unsigned int align = MAX (computed, specified);
+  tree field = TYPE_FIELDS (type);
+
+  /* Skip all non field decls */
+  while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+    field = DECL_CHAIN (field);
+
+  if (field != NULL && field != type)
+    {
+      type = TREE_TYPE (field);
+      while (TREE_CODE (type) == ARRAY_TYPE)
+	type = TREE_TYPE (type);
+
+      if (type != error_mark_node && TYPE_MODE (type) == DFmode)
+	align = MAX (align, 64);
+    }
+
+  return align;
+}
+
+/* Darwin increases record alignment to the natural alignment of
+   the first field.  */
+
+unsigned int
+darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
+					unsigned int specified)
+{
+  unsigned int align = MAX (computed, specified);
+
+  if (TYPE_PACKED (type))
+    return align;
+
+  /* Find the first field, looking down into aggregates.  */
+  do {
+    tree field = TYPE_FIELDS (type);
+    /* Skip all non field decls */
+    while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+      field = DECL_CHAIN (field);
+    if (! field)
+      break;
+    /* A packed field does not contribute any extra alignment.  */
+    if (DECL_PACKED (field))
+      return align;
+    type = TREE_TYPE (field);
+    while (TREE_CODE (type) == ARRAY_TYPE)
+      type = TREE_TYPE (type);
+  } while (AGGREGATE_TYPE_P (type));
+
+  if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
+    align = MAX (align, TYPE_ALIGN (type));
+
+  return align;
+}
+
+/* Return 1 for an operand in small memory on V.4/eabi.  */
+
+int
+small_data_operand (rtx op ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+#if TARGET_ELF
+  rtx sym_ref;
+
+  if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
+    return 0;
+
+  if (DEFAULT_ABI != ABI_V4)
+    return 0;
+
+  /* Vector and float memory instructions have a limited offset on the
+     SPE, so using a vector or float variable directly as an operand is
+     not useful.  */
+  if (TARGET_SPE
+      && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    sym_ref = op;
+
+  else if (GET_CODE (op) != CONST
+	   || GET_CODE (XEXP (op, 0)) != PLUS
+	   || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
+	   || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
+    return 0;
+
+  else
+    {
+      rtx sum = XEXP (op, 0);
+      HOST_WIDE_INT summand;
+
+      /* We have to be careful here, because it is the referenced address
+	 that must be 32k from _SDA_BASE_, not just the symbol.  */
+      summand = INTVAL (XEXP (sum, 1));
+      if (summand < 0 || summand > g_switch_value)
+	return 0;
+
+      sym_ref = XEXP (sum, 0);
+    }
+
+  return SYMBOL_REF_SMALL_P (sym_ref);
+#else
+  return 0;
+#endif
+}
+
+/* Return true if either operand is a general purpose register.  */
+
+bool
+gpr_or_gpr_p (rtx op0, rtx op1)
+{
+  return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
+	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
+}
+
+/* Return true if this is a move direct operation between GPR registers and
+   floating point/VSX registers.  */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+  int regno0, regno1;
+
+  if (!REG_P (op0) || !REG_P (op1))
+    return false;
+
+  if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+    return false;
+
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+  if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+    return false;
+
+  if (INT_REGNO_P (regno0))
+    return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+  else if (INT_REGNO_P (regno1))
+    {
+      if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+	return true;
+
+      else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if this is a load or store quad operation.  This function does
+   not handle the atomic quad memory instructions.  */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+  bool ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = false;
+
+  else if (REG_P (op0) && MEM_P (op1))
+    ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+	   && quad_memory_operand (op1, GET_MODE (op1))
+	   && !reg_overlap_mentioned_p (op0, op1));
+
+  else if (MEM_P (op0) && REG_P (op1))
+    ret = (quad_memory_operand (op0, GET_MODE (op0))
+	   && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+  else
+    ret = false;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== quad_load_store, return %s\n",
+	       ret ? "true" : "false");
+      debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+    }
+
+  return ret;
+}
+
+/* Given an address, return a constant offset term if one exists.  */
+
+static rtx
+address_offset (rtx op)
+{
+  if (GET_CODE (op) == PRE_INC
+      || GET_CODE (op) == PRE_DEC)
+    op = XEXP (op, 0);
+  else if (GET_CODE (op) == PRE_MODIFY
+	   || GET_CODE (op) == LO_SUM)
+    op = XEXP (op, 1);
+
+  if (GET_CODE (op) == CONST)
+    op = XEXP (op, 0);
+
+  if (GET_CODE (op) == PLUS)
+    op = XEXP (op, 1);
+
+  if (CONST_INT_P (op))
+    return op;
+
+  return NULL_RTX;
+}
+
+/* Return true if the MEM operand is a memory operand suitable for use
+   with a (full width, possibly multiple) gpr load/store.  On
+   powerpc64 this means the offset must be divisible by 4.
+   Implements 'Y' constraint.
+
+   Accept direct, indexed, offset, lo_sum and tocref.  Since this is
+   a constraint function we know the operand has satisfied a suitable
+   memory predicate.  Also accept some odd rtl generated by reload
+   (see rs6000_legitimize_reload_address for various forms).  It is
+   important that reload rtl be accepted by appropriate constraints
+   but not by the operand predicate.
+
+   Offsetting a lo_sum should not be allowed, except where we know by
+   alignment that a 32k boundary is not crossed, but see the ???
+   comment in rs6000_legitimize_reload_address.  Note that by
+   "offsetting" here we mean a further offset to access parts of the
+   MEM.  It's fine to have a lo_sum where the inner address is offset
+   from a sym, since the same sym+offset will appear in the high part
+   of the address calculation.  */
+
+bool
+mem_operand_gpr (rtx op, enum machine_mode mode)
+{
+  unsigned HOST_WIDE_INT offset;
+  int extra;
+  rtx addr = XEXP (op, 0);
+
+  op = address_offset (addr);
+  if (op == NULL_RTX)
+    return true;
+
+  offset = INTVAL (op);
+  if (TARGET_POWERPC64 && (offset & 3) != 0)
+    return false;
+
+  extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
+  gcc_assert (extra >= 0);
+
+  if (GET_CODE (addr) == LO_SUM)
+    /* For lo_sum addresses, we must allow any offset except one that
+       causes a wrap, so test only the low 16 bits.  */
+    offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
+
+  return offset + 0x8000 < 0x10000u - extra;
+}
+
+/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
+
+static bool
+reg_offset_addressing_ok_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SFmode:
+    case V4SImode:
+    case V2DFmode:
+    case V2DImode:
+    case V1TImode:
+    case TImode:
+      /* AltiVec/VSX vector modes.  Only reg+reg addressing is valid.  While
+	 TImode is not a vector mode, if we want to use the VSX registers to
+	 move it around, we need to restrict ourselves to reg+reg
+	 addressing.  */
+      if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+	return false;
+      break;
+
+    case V4HImode:
+    case V2SImode:
+    case V1DImode:
+    case V2SFmode:
+       /* Paired vector modes.  Only reg+reg addressing is valid.  */
+      if (TARGET_PAIRED_FLOAT)
+        return false;
+      break;
+
+    case SDmode:
+      /* If we can do direct load/stores of SDmode, restrict it to reg+reg
+	 addressing for the LFIWZX and STFIWX instructions.  */
+      if (TARGET_NO_SDMODE_STACK)
+	return false;
+      break;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+static bool
+virtual_stack_registers_memory_p (rtx op)
+{
+  int regnum;
+
+  if (GET_CODE (op) == REG)
+    regnum = REGNO (op);
+
+  else if (GET_CODE (op) == PLUS
+	   && GET_CODE (XEXP (op, 0)) == REG
+	   && GET_CODE (XEXP (op, 1)) == CONST_INT)
+    regnum = REGNO (XEXP (op, 0));
+
+  else
+    return false;
+
+  return (regnum >= FIRST_VIRTUAL_REGISTER
+	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
+}
+
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+   is known to not straddle a 32k boundary.  */
+
+static bool
+offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
+			     enum machine_mode mode)
+{
+  tree decl, type;
+  unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return false;
+
+  dsize = GET_MODE_SIZE (mode);
+  decl = SYMBOL_REF_DECL (op);
+  if (!decl)
+    {
+      if (dsize == 0)
+	return false;
+
+      /* -fsection-anchors loses the original SYMBOL_REF_DECL when
+	 replacing memory addresses with an anchor plus offset.  We
+	 could find the decl by rummaging around in the block->objects
+	 VEC for the given offset but that seems like too much work.  */
+      dalign = BITS_PER_UNIT;
+      if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
+	  && SYMBOL_REF_ANCHOR_P (op)
+	  && SYMBOL_REF_BLOCK (op) != NULL)
+	{
+	  struct object_block *block = SYMBOL_REF_BLOCK (op);
+
+	  dalign = block->alignment;
+	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
+	}
+      else if (CONSTANT_POOL_ADDRESS_P (op))
+	{
+	  /* It would be nice to have get_pool_align()..  */
+	  enum machine_mode cmode = get_pool_mode (op);
+
+	  dalign = GET_MODE_ALIGNMENT (cmode);
+	}
+    }
+  else if (DECL_P (decl))
+    {
+      dalign = DECL_ALIGN (decl);
+
+      if (dsize == 0)
+	{
+	  /* Allow BLKmode when the entire object is known to not
+	     cross a 32k boundary.  */
+	  if (!DECL_SIZE_UNIT (decl))
+	    return false;
+
+	  if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
+	    return false;
+
+	  dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
+	  if (dsize > 32768)
+	    return false;
+
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
+    }
+  else
+    {
+      type = TREE_TYPE (decl);
+
+      dalign = TYPE_ALIGN (type);
+      if (CONSTANT_CLASS_P (decl))
+	dalign = CONSTANT_ALIGNMENT (decl, dalign);
+      else
+	dalign = DATA_ALIGNMENT (decl, dalign);
+
+      if (dsize == 0)
+	{
+	  /* BLKmode, check the entire object.  */
+	  if (TREE_CODE (decl) == STRING_CST)
+	    dsize = TREE_STRING_LENGTH (decl);
+	  else if (TYPE_SIZE_UNIT (type)
+		   && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
+	    dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+	  else
+	    return false;
+	  if (dsize > 32768)
+	    return false;
+
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
+    }
+
+  /* Find how many bits of the alignment we know for this access.  */
+  mask = dalign / BITS_PER_UNIT - 1;
+  lsb = offset & -offset;
+  mask &= lsb - 1;
+  dalign = mask + 1;
+
+  return dalign >= dsize;
+}
+
+static bool
+constant_pool_expr_p (rtx op)
+{
+  rtx base, offset;
+
+  split_const (op, &base, &offset);
+  return (GET_CODE (base) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (base)
+	  && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
+}
+
+static const_rtx tocrel_base, tocrel_offset;
+
+/* Return true if OP is a toc pointer relative address (the output
+   of create_TOC_reference).  If STRICT, do not match high part or
+   non-split -mcmodel=large/medium toc pointer relative addresses.  */
+
+bool
+toc_relative_expr_p (const_rtx op, bool strict)
+{
+  if (!TARGET_TOC)
+    return false;
+
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    {
+      /* Only match the low part.  */
+      if (GET_CODE (op) == LO_SUM
+	  && REG_P (XEXP (op, 0))
+	  && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
+	op = XEXP (op, 1);
+      else if (strict)
+	return false;
+    }
+
+  tocrel_base = op;
+  tocrel_offset = const0_rtx;
+  if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
+    {
+      tocrel_base = XEXP (op, 0);
+      tocrel_offset = XEXP (op, 1);
+    }
+
+  return (GET_CODE (tocrel_base) == UNSPEC
+	  && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
+}
+
+/* Return true if X is a constant pool address, and also for cmodel=medium
+   if X is a toc-relative address known to be offsettable within MODE.  */
+
+bool
+legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
+				    bool strict)
+{
+  return (toc_relative_expr_p (x, strict)
+	  && (TARGET_CMODEL != CMODEL_MEDIUM
+	      || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
+	      || mode == QImode
+	      || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
+					      INTVAL (tocrel_offset), mode)));
+}
+
+static bool
+legitimate_small_data_p (enum machine_mode mode, rtx x)
+{
+  return (DEFAULT_ABI == ABI_V4
+	  && !flag_pic && !TARGET_TOC
+	  && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
+	  && small_data_operand (x, mode));
+}
+
+/* SPE offset addressing is limited to 5-bits worth of double words.  */
+#define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
+
+bool
+rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
+				    bool strict, bool worst_case)
+{
+  unsigned HOST_WIDE_INT offset;
+  unsigned int extra;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+  if (!REG_P (XEXP (x, 0)))
+    return false;
+  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
+    return false;
+  if (!reg_offset_addressing_ok_p (mode))
+    return virtual_stack_registers_memory_p (x);
+  if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
+    return true;
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return false;
+
+  offset = INTVAL (XEXP (x, 1));
+  extra = 0;
+  switch (mode)
+    {
+    case V4HImode:
+    case V2SImode:
+    case V1DImode:
+    case V2SFmode:
+      /* SPE vector modes.  */
+      return SPE_CONST_OFFSET_OK (offset);
+
+    case DFmode:
+    case DDmode:
+    case DImode:
+      /* On e500v2, we may have:
+
+	   (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
+
+         Which gets addressed with evldd instructions.  */
+      if (TARGET_E500_DOUBLE)
+	return SPE_CONST_OFFSET_OK (offset);
+
+      /* If we are using VSX scalar loads, restrict ourselves to reg+reg
+	 addressing.  */
+      if (VECTOR_MEM_VSX_P (mode))
+	return false;
+
+      if (!worst_case)
+	break;
+      if (!TARGET_POWERPC64)
+	extra = 4;
+      else if (offset & 3)
+	return false;
+      break;
+
+    case TFmode:
+      if (TARGET_E500_DOUBLE)
+	return (SPE_CONST_OFFSET_OK (offset)
+		&& SPE_CONST_OFFSET_OK (offset + 8));
+      /* fall through */
+
+    case TDmode:
+    case TImode:
+    case PTImode:
+      extra = 8;
+      if (!worst_case)
+	break;
+      if (!TARGET_POWERPC64)
+	extra = 12;
+      else if (offset & 3)
+	return false;
+      break;
+
+    default:
+      break;
+    }
+
+  offset += 0x8000;
+  return offset < 0x10000 - extra;
+}
+
+bool
+legitimate_indexed_address_p (rtx x, int strict)
+{
+  rtx op0, op1;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  op0 = XEXP (x, 0);
+  op1 = XEXP (x, 1);
+
+  /* Recognize the rtl generated by reload which we know will later be
+     replaced with proper base and index regs.  */
+  if (!strict
+      && reload_in_progress
+      && (REG_P (op0) || GET_CODE (op0) == PLUS)
+      && REG_P (op1))
+    return true;
+
+  return (REG_P (op0) && REG_P (op1)
+	  && ((INT_REG_OK_FOR_BASE_P (op0, strict)
+	       && INT_REG_OK_FOR_INDEX_P (op1, strict))
+	      || (INT_REG_OK_FOR_BASE_P (op1, strict)
+		  && INT_REG_OK_FOR_INDEX_P (op0, strict))));
+}
+
+bool
+avoiding_indexed_address_p (enum machine_mode mode)
+{
+  /* Avoid indexed addressing for modes that have non-indexed
+     load/store instruction forms.  */
+  return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
+}
+
+bool
+legitimate_indirect_address_p (rtx x, int strict)
+{
+  return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
+}
+
+bool
+macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
+{
+  if (!TARGET_MACHO || !flag_pic
+      || mode != SImode || GET_CODE (x) != MEM)
+    return false;
+  x = XEXP (x, 0);
+
+  if (GET_CODE (x) != LO_SUM)
+    return false;
+  if (GET_CODE (XEXP (x, 0)) != REG)
+    return false;
+  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
+    return false;
+  x = XEXP (x, 1);
+
+  return CONSTANT_P (x);
+}
+
+static bool
+legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
+{
+  if (GET_CODE (x) != LO_SUM)
+    return false;
+  if (GET_CODE (XEXP (x, 0)) != REG)
+    return false;
+  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
+    return false;
+  /* Restrict addressing for DI because of our SUBREG hackery.  */
+  if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return false;
+  x = XEXP (x, 1);
+
+  if (TARGET_ELF || TARGET_MACHO)
+    {
+      bool large_toc_ok;
+
+      if (DEFAULT_ABI == ABI_V4 && flag_pic)
+	return false;
+      /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
+	 recognizes some LO_SUM addresses as valid although this
+	 function says opposite.  In most cases, LRA through different
+	 transformations can generate correct code for address reloads.
+	 It can not manage only some LO_SUM cases.  So we need to add
+	 code analogous to one in rs6000_legitimize_reload_address for
+	 LOW_SUM here saying that some addresses are still valid.  */
+      large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+		      && small_toc_ref (x, VOIDmode));
+      if (TARGET_TOC && ! large_toc_ok)
+	return false;
+      if (GET_MODE_NUNITS (mode) != 1)
+	return false;
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+	  && !(/* ??? Assume floating point reg based on mode?  */
+	       TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+	       && (mode == DFmode || mode == DDmode)))
+	return false;
+
+      return CONSTANT_P (x) || large_toc_ok;
+    }
+
+  return false;
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This is used from only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was
+   called.  In some cases it is useful to look at this to decide what
+   needs to be done.
+
+   It is always safe for this function to do nothing.  It exists to
+   recognize opportunities to optimize the output.
+
+   On RS/6000, first check for the sum of a register with a constant
+   integer that is out of range.  If so, generate code to add the
+   constant with the low-order 16 bits masked to the register and force
+   this result into another register (this can be done with `cau').
+   Then generate an address of REG+(CONST&0xffff), allowing for the
+   possibility of bit 16 being a one.
+
+   Then check for the sum of a register and something not constant, try to
+   load the other things into a register and return the sum.  */
+
+static rtx
+rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  unsigned int extra;
+
+  if (!reg_offset_addressing_ok_p (mode))
+    {
+      if (virtual_stack_registers_memory_p (x))
+	return x;
+
+      /* In theory we should not be seeing addresses of the form reg+0,
+	 but just in case it is generated, optimize it away.  */
+      if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
+	return force_reg (Pmode, XEXP (x, 0));
+
+      /* For TImode with load/store quad, restrict addresses to just a single
+	 pointer, so it works with both GPRs and VSX registers.  */
+      /* Make sure both operands are registers.  */
+      else if (GET_CODE (x) == PLUS
+	       && (mode != TImode || !TARGET_QUAD_MEMORY))
+	return gen_rtx_PLUS (Pmode,
+			     force_reg (Pmode, XEXP (x, 0)),
+			     force_reg (Pmode, XEXP (x, 1)));
+      else
+	return force_reg (Pmode, x);
+    }
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
+      if (model != 0)
+	return rs6000_legitimize_tls_address (x, model);
+    }
+
+  extra = 0;
+  switch (mode)
+    {
+    case TFmode:
+    case TDmode:
+    case TImode:
+    case PTImode:
+      /* As in legitimate_offset_address_p we do not assume
+	 worst-case.  The mode here is just a hint as to the registers
+	 used.  A TImode is usually in gprs, but may actually be in
+	 fprs.  Leave worst-case scenario for reload to handle via
+	 insn constraints.  PTImode is only GPRs.  */
+      extra = 8;
+      break;
+    default:
+      break;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
+	  >= 0x10000 - extra)
+      && !(SPE_VECTOR_MODE (mode)
+	   || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
+    {
+      HOST_WIDE_INT high_int, low_int;
+      rtx sum;
+      low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
+      if (low_int >= 0x8000 - extra)
+	low_int = 0;
+      high_int = INTVAL (XEXP (x, 1)) - low_int;
+      sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
+					 GEN_INT (high_int)), 0);
+      return plus_constant (Pmode, sum, low_int);
+    }
+  else if (GET_CODE (x) == PLUS
+	   && GET_CODE (XEXP (x, 0)) == REG
+	   && GET_CODE (XEXP (x, 1)) != CONST_INT
+	   && GET_MODE_NUNITS (mode) == 1
+	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	       || (/* ??? Assume floating point reg based on mode?  */
+		   (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+		   && (mode == DFmode || mode == DDmode)))
+	   && !avoiding_indexed_address_p (mode))
+    {
+      return gen_rtx_PLUS (Pmode, XEXP (x, 0),
+			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
+    }
+  else if (SPE_VECTOR_MODE (mode)
+	   || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
+    {
+      if (mode == DImode)
+	return x;
+      /* We accept [reg + reg] and [reg + OFFSET].  */
+
+      if (GET_CODE (x) == PLUS)
+       {
+         rtx op1 = XEXP (x, 0);
+         rtx op2 = XEXP (x, 1);
+         rtx y;
+
+         op1 = force_reg (Pmode, op1);
+
+         if (GET_CODE (op2) != REG
+             && (GET_CODE (op2) != CONST_INT
+                 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
+                 || (GET_MODE_SIZE (mode) > 8
+                     && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
+           op2 = force_reg (Pmode, op2);
+
+         /* We can't always do [reg + reg] for these, because [reg +
+            reg + offset] is not a legitimate addressing mode.  */
+         y = gen_rtx_PLUS (Pmode, op1, op2);
+
+         if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
+           return force_reg (Pmode, y);
+         else
+           return y;
+       }
+
+      return force_reg (Pmode, x);
+    }
+  else if ((TARGET_ELF
+#if TARGET_MACHO
+	    || !MACHO_DYNAMIC_NO_PIC_P
+#endif
+	    )
+	   && TARGET_32BIT
+	   && TARGET_NO_TOC
+	   && ! flag_pic
+	   && GET_CODE (x) != CONST_INT
+	   && GET_CODE (x) != CONST_DOUBLE
+	   && CONSTANT_P (x)
+	   && GET_MODE_NUNITS (mode) == 1
+	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	       || (/* ??? Assume floating point reg based on mode?  */
+		   (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+		   && (mode == DFmode || mode == DDmode))))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+      if (TARGET_ELF)
+	emit_insn (gen_elf_high (reg, x));
+      else
+	emit_insn (gen_macho_high (reg, x));
+      return gen_rtx_LO_SUM (Pmode, reg, x);
+    }
+  else if (TARGET_TOC
+	   && GET_CODE (x) == SYMBOL_REF
+	   && constant_pool_expr_p (x)
+	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
+    return create_TOC_reference (x, NULL_RTX);
+  else
+    return x;
+}
+
+/* Debug version of rs6000_legitimize_address.  */
+static rtx
+rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  rtx ret;
+  rtx insns;
+
+  start_sequence ();
+  ret = rs6000_legitimize_address (x, oldx, mode);
+  insns = get_insns ();
+  end_sequence ();
+
+  if (ret != x)
+    {
+      fprintf (stderr,
+	       "\nrs6000_legitimize_address: mode %s, old code %s, "
+	       "new code %s, modified\n",
+	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
+	       GET_RTX_NAME (GET_CODE (ret)));
+
+      fprintf (stderr, "Original address:\n");
+      debug_rtx (x);
+
+      fprintf (stderr, "oldx:\n");
+      debug_rtx (oldx);
+
+      fprintf (stderr, "New address:\n");
+      debug_rtx (ret);
+
+      if (insns)
+	{
+	  fprintf (stderr, "Insns added:\n");
+	  debug_rtx_list (insns, 20);
+	}
+    }
+  else
+    {
+      fprintf (stderr,
+	       "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
+	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
+
+      debug_rtx (x);
+    }
+
+  if (insns)
+    emit_insn (insns);
+
+  return ret;
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static void
+rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.long\t", file);
+      break;
+    case 8:
+      fputs (DOUBLE_INT_ASM_OP, file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("@dtprel+0x8000", file);
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+rs6000_delegitimize_address (rtx orig_x)
+{
+  rtx x, y, offset;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+
+  y = x;
+  if (TARGET_CMODEL != CMODEL_SMALL
+      && GET_CODE (y) == LO_SUM)
+    y = XEXP (y, 1);
+
+  offset = NULL_RTX;
+  if (GET_CODE (y) == PLUS
+      && GET_MODE (y) == Pmode
+      && CONST_INT_P (XEXP (y, 1)))
+    {
+      offset = XEXP (y, 1);
+      y = XEXP (y, 0);
+    }
+
+  if (GET_CODE (y) == UNSPEC
+      && XINT (y, 1) == UNSPEC_TOCREL)
+    {
+#ifdef ENABLE_CHECKING
+      if (REG_P (XVECEXP (y, 0, 1))
+	  && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
+	{
+	  /* All good.  */
+	}
+      else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
+	{
+	  /* Weirdness alert.  df_note_compute can replace r2 with a
+	     debug_expr when this unspec is in a debug_insn.
+	     Seen in gcc.dg/pr51957-1.c  */
+	}
+      else
+	{
+	  debug_rtx (orig_x);
+	  abort ();
+	}
+#endif
+      y = XVECEXP (y, 0, 0);
+
+#ifdef HAVE_AS_TLS
+      /* Do not associate thread-local symbols with the original
+	 constant pool symbol.  */
+      if (TARGET_XCOFF
+	  && GET_CODE (y) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (y)
+	  && SYMBOL_REF_TLS_MODEL (get_pool_constant (y)) >= TLS_MODEL_REAL)
+	return orig_x;
+#endif
+
+      if (offset != NULL_RTX)
+	y = gen_rtx_PLUS (Pmode, y, offset);
+      if (!MEM_P (orig_x))
+	return y;
+      else
+	return replace_equiv_address_nv (orig_x, y);
+    }
+
+  if (TARGET_MACHO
+      && GET_CODE (orig_x) == LO_SUM
+      && GET_CODE (XEXP (orig_x, 1)) == CONST)
+    {
+      y = XEXP (XEXP (orig_x, 1), 0);
+      if (GET_CODE (y) == UNSPEC
+	  && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
+	return XVECEXP (y, 0, 0);
+    }
+
+  return orig_x;
+}
+
+/* Return true if X shouldn't be emitted into the debug info.
+   The linker doesn't like .toc section references from
+   .debug_* sections, so reject .toc section symbols.  */
+
+static bool
+rs6000_const_not_ok_for_debug_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      && CONSTANT_POOL_ADDRESS_P (x))
+    {
+      rtx c = get_pool_constant (x);
+      enum machine_mode cmode = get_pool_mode (x);
+      if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
+	return true;
+    }
+
+  return false;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function.  */
+
+static GTY(()) rtx rs6000_tls_symbol;
+static rtx
+rs6000_tls_get_addr (void)
+{
+  if (!rs6000_tls_symbol)
+    rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  return rs6000_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for TLS GOT references.  */
+
+static GTY(()) rtx rs6000_got_symbol;
+static rtx
+rs6000_got_sym (void)
+{
+  if (!rs6000_got_symbol)
+    {
+      rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+      SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
+      SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
+    }
+
+  return rs6000_got_symbol;
+}
+
+/* AIX Thread-Local Address support.  */
+
+static rtx
+rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
+{
+  rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
+  const char *name;
+  char *tlsname;
+
+  name = XSTR (addr, 0);
+  /* Append TLS CSECT qualifier, unless the symbol already is qualified
+     or the symbol will be in TLS private data section.  */
+  if (name[strlen (name) - 1] != ']'
+      && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
+	  || bss_initializer_p (SYMBOL_REF_DECL (addr))))
+    {
+      tlsname = XALLOCAVEC (char, strlen (name) + 4);
+      strcpy (tlsname, name);
+      strcat (tlsname,
+	      bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
+      tlsaddr = copy_rtx (addr);
+      XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
+    }
+  else
+    tlsaddr = addr;
+
+  /* Place addr into TOC constant pool.  */
+  sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
+
+  /* Output the TOC entry and create the MEM referencing the value.  */
+  if (constant_pool_expr_p (XEXP (sym, 0))
+      && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
+    {
+      tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
+      mem = gen_const_mem (Pmode, tocref);
+      set_mem_alias_set (mem, get_TOC_alias_set ());
+    }
+  else
+    return sym;
+
+  /* Use global-dynamic for local-dynamic.  */
+  if (model == TLS_MODEL_GLOBAL_DYNAMIC
+      || model == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      /* Create new TOC reference for @m symbol.  */
+      name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
+      tlsname = XALLOCAVEC (char, strlen (name) + 1);
+      strcpy (tlsname, "*LCM");
+      strcat (tlsname, name + 3);
+      rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
+      SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
+      tocref = create_TOC_reference (modaddr, NULL_RTX);
+      rtx modmem = gen_const_mem (Pmode, tocref);
+      set_mem_alias_set (modmem, get_TOC_alias_set ());
+      
+      rtx modreg = gen_reg_rtx (Pmode);
+      emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
+
+      tmpreg = gen_reg_rtx (Pmode);
+      emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
+
+      dest = gen_reg_rtx (Pmode);
+      if (TARGET_32BIT)
+	emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
+      else
+	emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
+      return dest;
+    }
+  /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13.  */
+  else if (TARGET_32BIT)
+    {
+      tlsreg = gen_reg_rtx (SImode);
+      emit_insn (gen_tls_get_tpointer (tlsreg));
+    }
+  else
+    tlsreg = gen_rtx_REG (DImode, 13);
+
+  /* Load the TOC value into temporary register.  */
+  tmpreg = gen_reg_rtx (Pmode);
+  emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
+  set_unique_reg_note (get_last_insn (), REG_EQUAL,
+		       gen_rtx_MINUS (Pmode, addr, tlsreg));
+
+  /* Add TOC symbol value to TLS pointer.  */
+  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
+
+  return dest;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+
+static rtx
+rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
+{
+  rtx dest, insn;
+
+  if (TARGET_XCOFF)
+    return rs6000_legitimize_tls_address_aix (addr, model);
+
+  dest = gen_reg_rtx (Pmode);
+  if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
+    {
+      rtx tlsreg;
+
+      if (TARGET_64BIT)
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 13);
+	  insn = gen_tls_tprel_64 (dest, tlsreg, addr);
+	}
+      else
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 2);
+	  insn = gen_tls_tprel_32 (dest, tlsreg, addr);
+	}
+      emit_insn (insn);
+    }
+  else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
+    {
+      rtx tlsreg, tmp;
+
+      tmp = gen_reg_rtx (Pmode);
+      if (TARGET_64BIT)
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 13);
+	  insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
+	}
+      else
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 2);
+	  insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
+	}
+      emit_insn (insn);
+      if (TARGET_64BIT)
+	insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
+      else
+	insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
+      emit_insn (insn);
+    }
+  else
+    {
+      rtx r3, got, tga, tmp1, tmp2, call_insn;
+
+      /* We currently use relocations like @got@tlsgd for tls, which
+	 means the linker will handle allocation of tls entries, placing
+	 them in the .got section.  So use a pointer to the .got section,
+	 not one to secondary TOC sections used by 64-bit -mminimal-toc,
+	 or to secondary GOT sections used by 32-bit -fPIC.  */
+      if (TARGET_64BIT)
+	got = gen_rtx_REG (Pmode, 2);
+      else
+	{
+	  if (flag_pic == 1)
+	    got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+	  else
+	    {
+	      rtx gsym = rs6000_got_sym ();
+	      got = gen_reg_rtx (Pmode);
+	      if (flag_pic == 0)
+		rs6000_emit_move (got, gsym, Pmode);
+	      else
+		{
+		  rtx mem, lab, last;
+
+		  tmp1 = gen_reg_rtx (Pmode);
+		  tmp2 = gen_reg_rtx (Pmode);
+		  mem = gen_const_mem (Pmode, tmp1);
+		  lab = gen_label_rtx ();
+		  emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
+		  emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
+		  if (TARGET_LINK_STACK)
+		    emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
+		  emit_move_insn (tmp2, mem);
+		  last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
+		  set_unique_reg_note (last, REG_EQUAL, gsym);
+		}
+	    }
+	}
+
+      if (model == TLS_MODEL_GLOBAL_DYNAMIC)
+	{
+	  tga = rs6000_tls_get_addr ();
+	  emit_library_call_value (tga, dest, LCT_CONST, Pmode,
+				   1, const0_rtx, Pmode);
+
+	  r3 = gen_rtx_REG (Pmode, 3);
+	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	    {
+	      if (TARGET_64BIT)
+		insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
+	      else
+		insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
+	    }
+	  else if (DEFAULT_ABI == ABI_V4)
+	    insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
+	  else
+	    gcc_unreachable ();
+	  call_insn = last_call_insn ();
+	  PATTERN (call_insn) = insn;
+	  if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
+	    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+		     pic_offset_table_rtx);
+	}
+      else if (model == TLS_MODEL_LOCAL_DYNAMIC)
+	{
+	  tga = rs6000_tls_get_addr ();
+	  tmp1 = gen_reg_rtx (Pmode);
+	  emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
+				   1, const0_rtx, Pmode);
+
+	  r3 = gen_rtx_REG (Pmode, 3);
+	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	    {
+	      if (TARGET_64BIT)
+		insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
+	      else
+		insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
+	    }
+	  else if (DEFAULT_ABI == ABI_V4)
+	    insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
+	  else
+	    gcc_unreachable ();
+	  call_insn = last_call_insn ();
+	  PATTERN (call_insn) = insn;
+	  if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
+	    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+		     pic_offset_table_rtx);
+
+	  if (rs6000_tls_size == 16)
+	    {
+	      if (TARGET_64BIT)
+		insn = gen_tls_dtprel_64 (dest, tmp1, addr);
+	      else
+		insn = gen_tls_dtprel_32 (dest, tmp1, addr);
+	    }
+	  else if (rs6000_tls_size == 32)
+	    {
+	      tmp2 = gen_reg_rtx (Pmode);
+	      if (TARGET_64BIT)
+		insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
+	      else
+		insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
+	      emit_insn (insn);
+	      if (TARGET_64BIT)
+		insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
+	      else
+		insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
+	    }
+	  else
+	    {
+	      tmp2 = gen_reg_rtx (Pmode);
+	      if (TARGET_64BIT)
+		insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
+	      else
+		insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
+	      emit_insn (insn);
+	      insn = gen_rtx_SET (Pmode, dest,
+				  gen_rtx_PLUS (Pmode, tmp2, tmp1));
+	    }
+	  emit_insn (insn);
+	}
+      else
+	{
+	  /* IE, or 64-bit offset LE.  */
+	  tmp2 = gen_reg_rtx (Pmode);
+	  if (TARGET_64BIT)
+	    insn = gen_tls_got_tprel_64 (tmp2, got, addr);
+	  else
+	    insn = gen_tls_got_tprel_32 (tmp2, got, addr);
+	  emit_insn (insn);
+	  if (TARGET_64BIT)
+	    insn = gen_tls_tls_64 (dest, tmp2, addr);
+	  else
+	    insn = gen_tls_tls_32 (dest, tmp2, addr);
+	  emit_insn (insn);
+	}
+    }
+
+  return dest;
+}
+
+/* Return 1 if X contains a thread-local symbol.  */
+
+static bool
+rs6000_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, &rs6000_tls_symbol_ref_1, 0);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+rs6000_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  if (GET_CODE (x) == HIGH
+      && GET_CODE (XEXP (x, 0)) == UNSPEC)
+    return true;
+
+  /* A TLS symbol in the TOC cannot contain a sum.  */
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
+    return true;
+
+  /* Do not place an ELF TLS symbol in the constant pool.  */
+  return TARGET_ELF && rs6000_tls_referenced_p (x);
+}
+
+/* Return 1 if *X is a thread-local symbol.  This is the same as
+   rs6000_tls_symbol_ref except for the type of the unused argument.  */
+
+static int
+rs6000_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return RS6000_SYMBOL_REF_TLS_P (*x);
+}
+
+/* Return true iff the given SYMBOL_REF refers to a constant pool entry
+   that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
+   can be addressed relative to the toc pointer.  */
+
+static bool
+use_toc_relative_ref (rtx sym)
+{
+  return ((constant_pool_expr_p (sym)
+	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
+					       get_pool_mode (sym)))
+	  || (TARGET_CMODEL == CMODEL_MEDIUM
+	      && SYMBOL_REF_LOCAL_P (sym)));
+}
+
+/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
+   replace the input X, or the original X if no replacement is called for.
+   The output parameter *WIN is 1 if the calling macro should goto WIN,
+   0 if it should not.
+
+   For RS/6000, we wish to handle large displacements off a base
+   register by splitting the addend across an addiu/addis and the mem insn.
+   This cuts number of extra insns needed from 3 to 1.
+
+   On Darwin, we use this to generate code for floating point constants.
+   A movsf_low is generated so we wind up with 2 instructions rather than 3.
+   The Darwin code is inside #if TARGET_MACHO because only then are the
+   machopic_* functions defined.  */
+static rtx
+rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
+				  int opnum, int type,
+				  int ind_levels ATTRIBUTE_UNUSED, int *win)
+{
+  bool reg_offset_p = reg_offset_addressing_ok_p (mode);
+
+  /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
+     DFmode/DImode MEM.  */
+  if (reg_offset_p
+      && opnum == 1
+      && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
+	  || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
+    reg_offset_p = false;
+
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+  /* Likewise for (lo_sum (high ...) ...) output we have generated.  */
+  if (GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == HIGH)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+#if TARGET_MACHO
+  if (DEFAULT_ABI == ABI_DARWIN && flag_pic
+      && GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
+      && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
+      && machopic_operand_p (XEXP (x, 1)))
+    {
+      /* Result of previous invocation of this function on Darwin
+	 floating point constant.  */
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+#endif
+
+  if (TARGET_CMODEL != CMODEL_SMALL
+      && reg_offset_p
+      && small_toc_ref (x, VOIDmode))
+    {
+      rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
+      x = gen_rtx_LO_SUM (Pmode, hi, x);
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
+      && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && reg_offset_p
+      && !SPE_VECTOR_MODE (mode)
+      && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+      && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT high
+	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+      /* Check for 32-bit overflow.  */
+      if (high + low != val)
+	{
+	  *win = 0;
+	  return x;
+	}
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem directly.  */
+
+      x = gen_rtx_PLUS (GET_MODE (x),
+			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
+				      GEN_INT (high)),
+			GEN_INT (low));
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && reg_offset_p
+      && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
+      && !SPE_VECTOR_MODE (mode)
+#if TARGET_MACHO
+      && DEFAULT_ABI == ABI_DARWIN
+      && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
+      && machopic_symbol_defined_p (x)
+#else
+      && DEFAULT_ABI == ABI_V4
+      && !flag_pic
+#endif
+      /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
+	 The same goes for DImode without 64-bit gprs and DFmode and DDmode
+	 without fprs.
+	 ??? Assume floating point reg based on mode?  This assumption is
+	 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
+	 where reload ends up doing a DFmode load of a constant from
+	 mem using two gprs.  Unfortunately, at this point reload
+	 hasn't yet selected regs so poking around in reload data
+	 won't help and even if we could figure out the regs reliably,
+	 we'd still want to allow this transformation when the mem is
+	 naturally aligned.  Since we say the address is good here, we
+	 can't disable offsets from LO_SUMs in mem_operand_gpr.
+	 FIXME: Allow offset from lo_sum for other modes too, when
+	 mem is sufficiently aligned.  */
+      && mode != TFmode
+      && mode != TDmode
+      && (mode != TImode || !TARGET_VSX_TIMODE)
+      && mode != PTImode
+      && (mode != DImode || TARGET_POWERPC64)
+      && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
+	  || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
+    {
+#if TARGET_MACHO
+      if (flag_pic)
+	{
+	  rtx offset = machopic_gen_offset (x);
+	  x = gen_rtx_LO_SUM (GET_MODE (x),
+		gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+		  gen_rtx_HIGH (Pmode, offset)), offset);
+	}
+      else
+#endif
+	x = gen_rtx_LO_SUM (GET_MODE (x),
+	      gen_rtx_HIGH (Pmode, x), x);
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+  /* Reload an offset address wrapped by an AND that represents the
+     masking of the lower bits.  Strip the outer AND and let reload
+     convert the offset address into an indirect address.  For VSX,
+     force reload to create the address with an AND in a separate
+     register, because we can't guarantee an altivec register will
+     be used.  */
+  if (VECTOR_MEM_ALTIVEC_P (mode)
+      && GET_CODE (x) == AND
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && INTVAL (XEXP (x, 1)) == -16)
+    {
+      x = XEXP (x, 0);
+      *win = 1;
+      return x;
+    }
+
+  if (TARGET_TOC
+      && reg_offset_p
+      && GET_CODE (x) == SYMBOL_REF
+      && use_toc_relative_ref (x))
+    {
+      x = create_TOC_reference (x, NULL_RTX);
+      if (TARGET_CMODEL != CMODEL_SMALL)
+	push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		     BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		     opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+  *win = 0;
+  return x;
+}
+
+/* Debug version of rs6000_legitimize_reload_address.  */
+static rtx
+rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
+					int opnum, int type,
+					int ind_levels, int *win)
+{
+  rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
+					      ind_levels, win);
+  fprintf (stderr,
+	   "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
+	   "type = %d, ind_levels = %d, win = %d, original addr:\n",
+	   GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
+  debug_rtx (x);
+
+  if (x == ret)
+    fprintf (stderr, "Same address returned\n");
+  else if (!ret)
+    fprintf (stderr, "NULL returned\n");
+  else
+    {
+      fprintf (stderr, "New address:\n");
+      debug_rtx (ret);
+    }
+
+  return ret;
+}
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   On the RS/6000, there are four valid address: a SYMBOL_REF that
+   refers to a constant pool entry of an address (or the sum of it
+   plus a constant), a short (16-bit signed) constant plus a register,
+   the sum of two registers, or a register indirect, possibly with an
+   auto-increment.  For DFmode, DDmode and DImode with a constant plus
+   register, we must ensure that both words are addressable or PowerPC64
+   with offset word aligned.
+
+   For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
+   32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
+   because adjacent memory cells are accessed by adding word-sized offsets
+   during assembly output.  */
+static bool
+rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
+{
+  bool reg_offset_p = reg_offset_addressing_ok_p (mode);
+
+  /* If this is an unaligned stvx/ldvx type address, discard the outer AND.  */
+  if (VECTOR_MEM_ALTIVEC_P (mode)
+      && GET_CODE (x) == AND
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && INTVAL (XEXP (x, 1)) == -16)
+    x = XEXP (x, 0);
+
+  if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
+    return 0;
+  if (legitimate_indirect_address_p (x, reg_ok_strict))
+    return 1;
+  if (TARGET_UPDATE
+      && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+      && mode_supports_pre_incdec_p (mode)
+      && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
+    return 1;
+  if (virtual_stack_registers_memory_p (x))
+    return 1;
+  if (reg_offset_p && legitimate_small_data_p (mode, x))
+    return 1;
+  if (reg_offset_p
+      && legitimate_constant_pool_address_p (x, mode,
+					     reg_ok_strict || lra_in_progress))
+    return 1;
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+     allow register indirect addresses.  This will allow the values to go in
+     either GPRs or VSX registers without reloading.  The vector types would
+     tend to go into VSX registers, so we allow REG+REG, while TImode seems
+     somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
+    return 0;
+  /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
+  if (! reg_ok_strict
+      && reg_offset_p
+      && GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && (XEXP (x, 0) == virtual_stack_vars_rtx
+	  || XEXP (x, 0) == arg_pointer_rtx)
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    return 1;
+  if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
+    return 1;
+  if (mode != TFmode
+      && mode != TDmode
+      && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+	  || TARGET_POWERPC64
+	  || (mode != DFmode && mode != DDmode)
+	  || (TARGET_E500_DOUBLE && mode != DDmode))
+      && (TARGET_POWERPC64 || mode != DImode)
+      && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
+      && mode != PTImode
+      && !avoiding_indexed_address_p (mode)
+      && legitimate_indexed_address_p (x, reg_ok_strict))
+    return 1;
+  if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
+      && mode_supports_pre_modify_p (mode)
+      && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
+      && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
+					      reg_ok_strict, false)
+	  || (!avoiding_indexed_address_p (mode)
+	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
+      && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+    return 1;
+  if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
+    return 1;
+  return 0;
+}
+
+/* Debug version of rs6000_legitimate_address_p.  */
+static bool
+rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
+				   bool reg_ok_strict)
+{
+  bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
+  fprintf (stderr,
+	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
+	   "strict = %d, reload = %s, code = %s\n",
+	   ret ? "true" : "false",
+	   GET_MODE_NAME (mode),
+	   reg_ok_strict,
+	   (reload_completed
+	    ? "after"
+	    : (reload_in_progress ? "progress" : "before")),
+	   GET_RTX_NAME (GET_CODE (x)));
+  debug_rtx (x);
+
+  return ret;
+}
+
+/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
+
+static bool
+rs6000_mode_dependent_address_p (const_rtx addr,
+				 addr_space_t as ATTRIBUTE_UNUSED)
+{
+  return rs6000_mode_dependent_address_ptr (addr);
+}
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   On the RS/6000 this is true of all integral offsets (since AltiVec
+   and VSX modes don't allow them) or is a pre-increment or decrement.
+
+   ??? Except that due to conceptual problems in offsettable_address_p
+   we can't really report the problems of integral offsets.  So leave
+   this assuming that the adjustable offset must be valid for the
+   sub-words of a TFmode operand, which is what we had before.  */
+
+static bool
+rs6000_mode_dependent_address (const_rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case PLUS:
+      /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
+	 is considered a legitimate address before reload, so there
+	 are no offset restrictions in that case.  Note that this
+	 condition is safe in strict mode because any address involving
+	 virtual_stack_vars_rtx or arg_pointer_rtx would already have
+	 been rejected as illegitimate.  */
+      if (XEXP (addr, 0) != virtual_stack_vars_rtx
+	  && XEXP (addr, 0) != arg_pointer_rtx
+	  && GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	{
+	  unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
+	  return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
+	}
+      break;
+
+    case LO_SUM:
+      /* Anything in the constant pool is sufficiently aligned that
+	 all bytes have the same high part address.  */
+      return !legitimate_constant_pool_address_p (addr, QImode, false);
+
+    /* Auto-increment cases are now treated generically in recog.c.  */
+    case PRE_MODIFY:
+      return TARGET_UPDATE;
+
+    /* AND is only allowed in Altivec loads.  */
+    case AND:
+      return true;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
+/* Debug version of rs6000_mode_dependent_address.  */
+static bool
+rs6000_debug_mode_dependent_address (const_rtx addr)
+{
+  bool ret = rs6000_mode_dependent_address (addr);
+
+  fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
+	   ret ? "true" : "false");
+  debug_rtx (addr);
+
+  return ret;
+}
+
+/* Implement FIND_BASE_TERM.  */
+
+rtx
+rs6000_find_base_term (rtx op)
+{
+  rtx base;
+
+  base = op;
+  if (GET_CODE (base) == CONST)
+    base = XEXP (base, 0);
+  if (GET_CODE (base) == PLUS)
+    base = XEXP (base, 0);
+  if (GET_CODE (base) == UNSPEC)
+    switch (XINT (base, 1))
+      {
+      case UNSPEC_TOCREL:
+      case UNSPEC_MACHOPIC_OFFSET:
+	/* OP represents SYM [+ OFFSET] - ANCHOR.  SYM is the base term
+	   for aliasing purposes.  */
+	return XVECEXP (base, 0, 0);
+      }
+
+  return op;
+}
+
+/* More elaborate version of recog's offsettable_memref_p predicate
+   that works around the ??? note of rs6000_mode_dependent_address.
+   In particular it accepts
+
+     (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
+
+   in 32-bit mode, that the recog predicate rejects.  */
+
+static bool
+rs6000_offsettable_memref_p (rtx op, enum machine_mode reg_mode)
+{
+  bool worst_case;
+
+  if (!MEM_P (op))
+    return false;
+
+  /* First mimic offsettable_memref_p.  */
+  if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
+    return true;
+
+  /* offsettable_address_p invokes rs6000_mode_dependent_address, but
+     the latter predicate knows nothing about the mode of the memory
+     reference and, therefore, assumes that it is the largest supported
+     mode (TFmode).  As a consequence, legitimate offsettable memory
+     references are rejected.  rs6000_legitimate_offset_address_p contains
+     the correct logic for the PLUS case of rs6000_mode_dependent_address,
+     at least with a little bit of help here given that we know the
+     actual registers used.  */
+  worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
+		|| GET_MODE_SIZE (reg_mode) == 4);
+  return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
+					     true, worst_case);
+}
+
+/* Change register usage conditional on target flags.  */
+static void
+rs6000_conditional_register_usage (void)
+{
+  int i;
+
+  if (TARGET_DEBUG_TARGET)
+    fprintf (stderr, "rs6000_conditional_register_usage called\n");
+
+  /* Set MQ register fixed (already call_used) so that it will not be
+     allocated.  */
+  fixed_regs[64] = 1;
+
+  /* 64-bit AIX and Linux reserve GPR13 for thread-private data.  */
+  if (TARGET_64BIT)
+    fixed_regs[13] = call_used_regs[13]
+      = call_really_used_regs[13] = 1;
+
+  /* Conditionally disable FPRs.  */
+  if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+    for (i = 32; i < 64; i++)
+      fixed_regs[i] = call_used_regs[i]
+	= call_really_used_regs[i] = 1;
+
+  /* The TOC register is not killed across calls in a way that is
+     visible to the compiler.  */
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    call_really_used_regs[2] = 0;
+
+  if (DEFAULT_ABI == ABI_V4
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
+      && flag_pic == 2)
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (DEFAULT_ABI == ABI_V4
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
+      && flag_pic == 1)
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (DEFAULT_ABI == ABI_DARWIN
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+      fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (TARGET_TOC && TARGET_MINIMAL_TOC)
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (TARGET_SPE)
+    {
+      global_regs[SPEFSCR_REGNO] = 1;
+      /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
+         registers in prologues and epilogues.  We no longer use r14
+         for FIXED_SCRATCH, but we're keeping r14 out of the allocation
+         pool for link-compatibility with older versions of GCC.  Once
+         "old" code has died out, we can return r14 to the allocation
+         pool.  */
+      fixed_regs[14]
+	= call_used_regs[14]
+	= call_really_used_regs[14] = 1;
+    }
+
+  if (!TARGET_ALTIVEC && !TARGET_VSX)
+    {
+      for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
+	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
+      call_really_used_regs[VRSAVE_REGNO] = 1;
+    }
+
+  if (TARGET_ALTIVEC || TARGET_VSX)
+    global_regs[VSCR_REGNO] = 1;
+
+  if (TARGET_ALTIVEC_ABI)
+    {
+      for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
+	call_used_regs[i] = call_really_used_regs[i] = 1;
+
+      /* AIX reserves VR20:31 in non-extended ABI mode.  */
+      if (TARGET_XCOFF)
+	for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
+	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
+    }
+}
+
+
+/* Try to output insns to set TARGET equal to the constant C if it can
+   be done in less than N insns.  Do all computations in MODE.
+   Returns the place where the output has been placed if it can be
+   done and the insns have been emitted.  If it would take more than N
+   insns, zero is returned and no insns and emitted.  */
+
+rtx
+rs6000_emit_set_const (rtx dest, enum machine_mode mode,
+		       rtx source, int n ATTRIBUTE_UNUSED)
+{
+  rtx result, insn, set;
+  HOST_WIDE_INT c0, c1;
+
+  switch (mode)
+    {
+    case  QImode:
+    case HImode:
+      if (dest == NULL)
+	dest = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, source));
+      return dest;
+
+    case SImode:
+      result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
+			      GEN_INT (INTVAL (source)
+				       & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_IOR (SImode, copy_rtx (result),
+					   GEN_INT (INTVAL (source) & 0xffff))));
+      result = dest;
+      break;
+
+    case DImode:
+      switch (GET_CODE (source))
+	{
+	case CONST_INT:
+	  c0 = INTVAL (source);
+	  c1 = -(c0 < 0);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      result = rs6000_emit_set_long_const (dest, c0, c1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  insn = get_last_insn ();
+  set = single_set (insn);
+  if (! CONSTANT_P (SET_SRC (set)))
+    set_unique_reg_note (insn, REG_EQUAL, source);
+
+  return result;
+}
+
+/* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
+   fall back to a straight forward decomposition.  We do this to avoid
+   exponential run times encountered when looking for longer sequences
+   with rs6000_emit_set_const.  */
+static rtx
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
+{
+  if (!TARGET_POWERPC64)
+    {
+      rtx operand1, operand2;
+
+      operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
+					DImode);
+      operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
+					DImode);
+      emit_move_insn (operand1, GEN_INT (c1));
+      emit_move_insn (operand2, GEN_INT (c2));
+    }
+  else
+    {
+      HOST_WIDE_INT ud1, ud2, ud3, ud4;
+
+      ud1 = c1 & 0xffff;
+      ud2 = (c1 & 0xffff0000) >> 16;
+      c2 = c1 >> 32;
+      ud3 = c2 & 0xffff;
+      ud4 = (c2 & 0xffff0000) >> 16;
+
+      if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
+	  || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
+	emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
+
+      else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
+	       || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
+	{
+	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
+					 - 0x80000000));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	}
+      else if (ud3 == 0 && ud4 == 0)
+	{
+	  gcc_assert (ud2 & 0x8000);
+	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
+					 - 0x80000000));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ZERO_EXTEND (DImode,
+					       gen_lowpart (SImode,
+							    copy_rtx (dest))));
+	}
+      else if ((ud4 == 0xffff && (ud3 & 0x8000))
+	       || (ud4 == 0 && ! (ud3 & 0x8000)))
+	{
+	  emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
+					 - 0x80000000));
+	  if (ud2 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud2)));
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+					  GEN_INT (16)));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	}
+      else
+	{
+	  emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
+					 - 0x80000000));
+	  if (ud3 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud3)));
+
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+					  GEN_INT (32)));
+	  if (ud2 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud2 << 16)));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	}
+    }
+  return dest;
+}
+
+/* Helper for the following.  Get rid of [r+r] memory refs
+   in cases where it won't work (TImode, TFmode, TDmode, PTImode).  */
+
+static void
+rs6000_eliminate_indexed_memrefs (rtx operands[2])
+{
+  if (reload_in_progress)
+    return;
+
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (XEXP (operands[0], 0)) != REG
+      && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
+					       GET_MODE (operands[0]), false))
+    operands[0]
+      = replace_equiv_address (operands[0],
+			       copy_addr_to_reg (XEXP (operands[0], 0)));
+
+  if (GET_CODE (operands[1]) == MEM
+      && GET_CODE (XEXP (operands[1], 0)) != REG
+      && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
+					       GET_MODE (operands[1]), false))
+    operands[1]
+      = replace_equiv_address (operands[1],
+			       copy_addr_to_reg (XEXP (operands[1], 0)));
+}
+
+/* Generate a vector of constants to permute MODE for a little-endian
+   storage operation by swapping the two halves of a vector.  */
+static rtvec
+rs6000_const_vec (enum machine_mode mode)
+{
+  int i, subparts;
+  rtvec v;
+
+  switch (mode)
+    {
+    case V1TImode:
+      subparts = 1;
+      break;
+    case V2DFmode:
+    case V2DImode:
+      subparts = 2;
+      break;
+    case V4SFmode:
+    case V4SImode:
+      subparts = 4;
+      break;
+    case V8HImode:
+      subparts = 8;
+      break;
+    case V16QImode:
+      subparts = 16;
+      break;
+    default:
+      gcc_unreachable();
+    }
+
+  v = rtvec_alloc (subparts);
+
+  for (i = 0; i < subparts / 2; ++i)
+    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
+  for (i = subparts / 2; i < subparts; ++i)
+    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
+
+  return v;
+}
+
+/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
+   for a VSX load or store operation.  */
+rtx
+rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
+{
+  rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
+  return gen_rtx_VEC_SELECT (mode, source, par);
+}
+
+/* Emit a little-endian load from vector memory location SOURCE to VSX
+   register DEST in mode MODE.  The load is done with two permuting
+   insn's that represent an lxvd2x and xxpermdi.  */
+void
+rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
+{
+  rtx tmp, permute_mem, permute_reg;
+
+  /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+     V1TImode).  */
+  if (mode == TImode || mode == V1TImode)
+    {
+      mode = V2DImode;
+      dest = gen_lowpart (V2DImode, dest);
+      source = adjust_address (source, V2DImode, 0);
+    }
+
+  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+  permute_mem = rs6000_gen_le_vsx_permute (source, mode);
+  permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
+  emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
+}
+
+/* Emit a little-endian store to vector memory location DEST from VSX
+   register SOURCE in mode MODE.  The store is done with two permuting
+   insn's that represent an xxpermdi and an stxvd2x.  */
+void
+rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
+{
+  rtx tmp, permute_src, permute_tmp;
+
+  /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+     V1TImode).  */
+  if (mode == TImode || mode == V1TImode)
+    {
+      mode = V2DImode;
+      dest = adjust_address (dest, V2DImode, 0);
+      source = gen_lowpart (V2DImode, source);
+    }
+
+  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+  permute_src = rs6000_gen_le_vsx_permute (source, mode);
+  permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
+  emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
+}
+
+/* Emit a sequence representing a little-endian VSX load or store,
+   moving data from SOURCE to DEST in mode MODE.  This is done
+   separately from rs6000_emit_move to ensure it is called only
+   during expand.  LE VSX loads and stores introduced later are
+   handled with a split.  The expand-time RTL generation allows
+   us to optimize away redundant pairs of register-permutes.  */
+void
+rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
+{
+  gcc_assert (!BYTES_BIG_ENDIAN
+	      && VECTOR_MEM_VSX_P (mode)
+	      && !gpr_or_gpr_p (dest, source)
+	      && (MEM_P (source) ^ MEM_P (dest)));
+
+  if (MEM_P (source))
+    {
+      gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
+      rs6000_emit_le_vsx_load (dest, source, mode);
+    }
+  else
+    {
+      if (!REG_P (source))
+	source = force_reg (mode, source);
+      rs6000_emit_le_vsx_store (dest, source, mode);
+    }
+}
+
+/* Emit a move from SOURCE to DEST in mode MODE.  */
+void
+rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
+{
+  rtx operands[2];
+  operands[0] = dest;
+  operands[1] = source;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr,
+	       "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
+	       "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
+	       GET_MODE_NAME (mode),
+	       reload_in_progress,
+	       reload_completed,
+	       can_create_pseudo_p ());
+      debug_rtx (dest);
+      fprintf (stderr, "source:\n");
+      debug_rtx (source);
+    }
+
+  /* Sanity checks.  Check that we get CONST_DOUBLE only when we should.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && ! FLOAT_MODE_P (mode)
+      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+    {
+      /* FIXME.  This should never happen.  */
+      /* Since it seems that it does, do the safe thing and convert
+	 to a CONST_INT.  */
+      operands[1] = gen_int_mode (CONST_DOUBLE_LOW (operands[1]), mode);
+    }
+  gcc_assert (GET_CODE (operands[1]) != CONST_DOUBLE
+	      || FLOAT_MODE_P (mode)
+	      || ((CONST_DOUBLE_HIGH (operands[1]) != 0
+		   || CONST_DOUBLE_LOW (operands[1]) < 0)
+		  && (CONST_DOUBLE_HIGH (operands[1]) != -1
+		      || CONST_DOUBLE_LOW (operands[1]) >= 0)));
+
+  /* Check if GCC is setting up a block move that will end up using FP
+     registers as temporaries.  We must make sure this is acceptable.  */
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM
+      && mode == DImode
+      && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
+	  || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
+      && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
+					    ? 32 : MEM_ALIGN (operands[0])))
+	    || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
+					       ? 32
+					       : MEM_ALIGN (operands[1]))))
+      && ! MEM_VOLATILE_P (operands [0])
+      && ! MEM_VOLATILE_P (operands [1]))
+    {
+      emit_move_insn (adjust_address (operands[0], SImode, 0),
+		      adjust_address (operands[1], SImode, 0));
+      emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
+		      adjust_address (copy_rtx (operands[1]), SImode, 4));
+      return;
+    }
+
+  if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
+      && !gpc_reg_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (rs6000_tls_referenced_p (operands[1]))
+    {
+      enum tls_model model;
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+	{
+          addend = XEXP (XEXP (tmp, 0), 1);
+	  tmp = XEXP (XEXP (tmp, 0), 0);
+	}
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      model = SYMBOL_REF_TLS_MODEL (tmp);
+      gcc_assert (model != 0);
+
+      tmp = rs6000_legitimize_tls_address (tmp, model);
+      if (addend)
+	{
+	  tmp = gen_rtx_PLUS (mode, tmp, addend);
+	  tmp = force_operand (tmp, operands[0]);
+	}
+      operands[1] = tmp;
+    }
+
+  /* Handle the case where reload calls us with an invalid address.  */
+  if (reload_in_progress && mode == Pmode
+      && (! general_operand (operands[1], mode)
+	  || ! nonimmediate_operand (operands[0], mode)))
+    goto emit_set;
+
+  /* 128-bit constant floating-point values on Darwin should really be
+     loaded as two parts.  */
+  if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
+      && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
+			simplify_gen_subreg (DFmode, operands[1], mode, 0),
+			DFmode);
+      rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
+					     GET_MODE_SIZE (DFmode)),
+			simplify_gen_subreg (DFmode, operands[1], mode,
+					     GET_MODE_SIZE (DFmode)),
+			DFmode);
+      return;
+    }
+
+  if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
+    cfun->machine->sdmode_stack_slot =
+      eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
+
+
+  if (lra_in_progress
+      && mode == SDmode
+      && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+      && (REG_P (operands[1])
+	  || (GET_CODE (operands[1]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[1])))))
+    {
+      int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+			 ? SUBREG_REG (operands[1]) : operands[1]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[0]) != DDmode)
+	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+	  emit_insn (gen_movsd_store (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (lra_in_progress
+      && mode == SDmode
+      && (REG_P (operands[0])
+	  || (GET_CODE (operands[0]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[0]))))
+      && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+    {
+      int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+			 ? SUBREG_REG (operands[0]) : operands[0]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[1]) != DDmode)
+	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+	  emit_insn (gen_movsd_load (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+
+  if (reload_in_progress
+      && mode == SDmode
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
+      && MEM_P (operands[0])
+      && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
+      && REG_P (operands[1]))
+    {
+      if (FP_REGNO_P (REGNO (operands[1])))
+	{
+	  rtx mem = adjust_address_nv (operands[0], DDmode, 0);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_store (mem, operands[1]));
+	}
+      else if (INT_REGNO_P (REGNO (operands[1])))
+	{
+	  rtx mem = operands[0];
+	  if (BYTES_BIG_ENDIAN)
+	    mem = adjust_address_nv (mem, mode, 4);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_hardfloat (mem, operands[1]));
+	}
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (reload_in_progress
+      && mode == SDmode
+      && REG_P (operands[0])
+      && MEM_P (operands[1])
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
+      && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
+    {
+      if (FP_REGNO_P (REGNO (operands[0])))
+	{
+	  rtx mem = adjust_address_nv (operands[1], DDmode, 0);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_load (operands[0], mem));
+	}
+      else if (INT_REGNO_P (REGNO (operands[0])))
+	{
+	  rtx mem = operands[1];
+	  if (BYTES_BIG_ENDIAN)
+	    mem = adjust_address_nv (mem, mode, 4);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_hardfloat (operands[0], mem));
+	}
+      else
+	gcc_unreachable();
+      return;
+    }
+
+  /* FIXME:  In the long term, this switch statement should go away
+     and be replaced by a sequence of tests based on things like
+     mode == Pmode.  */
+  switch (mode)
+    {
+    case HImode:
+    case QImode:
+      if (CONSTANT_P (operands[1])
+	  && GET_CODE (operands[1]) != CONST_INT)
+	operands[1] = force_const_mem (mode, operands[1]);
+      break;
+
+    case TFmode:
+    case TDmode:
+      rs6000_eliminate_indexed_memrefs (operands);
+      /* fall through */
+
+    case DFmode:
+    case DDmode:
+    case SFmode:
+    case SDmode:
+      if (CONSTANT_P (operands[1])
+	  && ! easy_fp_constant (operands[1], mode))
+	operands[1] = force_const_mem (mode, operands[1]);
+      break;
+
+    case V16QImode:
+    case V8HImode:
+    case V4SFmode:
+    case V4SImode:
+    case V4HImode:
+    case V2SFmode:
+    case V2SImode:
+    case V1DImode:
+    case V2DFmode:
+    case V2DImode:
+    case V1TImode:
+      if (CONSTANT_P (operands[1])
+	  && !easy_vector_constant (operands[1], mode))
+	operands[1] = force_const_mem (mode, operands[1]);
+      break;
+
+    case SImode:
+    case DImode:
+      /* Use default pattern for address of ELF small data */
+      if (TARGET_ELF
+	  && mode == Pmode
+	  && DEFAULT_ABI == ABI_V4
+	  && (GET_CODE (operands[1]) == SYMBOL_REF
+	      || GET_CODE (operands[1]) == CONST)
+	  && small_data_operand (operands[1], mode))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+	  return;
+	}
+
+      if (DEFAULT_ABI == ABI_V4
+	  && mode == Pmode && mode == SImode
+	  && flag_pic == 1 && got_operand (operands[1], mode))
+	{
+	  emit_insn (gen_movsi_got (operands[0], operands[1]));
+	  return;
+	}
+
+      if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
+	  && TARGET_NO_TOC
+	  && ! flag_pic
+	  && mode == Pmode
+	  && CONSTANT_P (operands[1])
+	  && GET_CODE (operands[1]) != HIGH
+	  && GET_CODE (operands[1]) != CONST_INT)
+	{
+	  rtx target = (!can_create_pseudo_p ()
+			? operands[0]
+			: gen_reg_rtx (mode));
+
+	  /* If this is a function address on -mcall-aixdesc,
+	     convert it to the address of the descriptor.  */
+	  if (DEFAULT_ABI == ABI_AIX
+	      && GET_CODE (operands[1]) == SYMBOL_REF
+	      && XSTR (operands[1], 0)[0] == '.')
+	    {
+	      const char *name = XSTR (operands[1], 0);
+	      rtx new_ref;
+	      while (*name == '.')
+		name++;
+	      new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
+	      CONSTANT_POOL_ADDRESS_P (new_ref)
+		= CONSTANT_POOL_ADDRESS_P (operands[1]);
+	      SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
+	      SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
+	      SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
+	      operands[1] = new_ref;
+	    }
+
+	  if (DEFAULT_ABI == ABI_DARWIN)
+	    {
+#if TARGET_MACHO
+	      if (MACHO_DYNAMIC_NO_PIC_P)
+		{
+		  /* Take care of any required data indirection.  */
+		  operands[1] = rs6000_machopic_legitimize_pic_address (
+				  operands[1], mode, operands[0]);
+		  if (operands[0] != operands[1])
+		    emit_insn (gen_rtx_SET (VOIDmode,
+					    operands[0], operands[1]));
+		  return;
+		}
+#endif
+	      emit_insn (gen_macho_high (target, operands[1]));
+	      emit_insn (gen_macho_low (operands[0], target, operands[1]));
+	      return;
+	    }
+
+	  emit_insn (gen_elf_high (target, operands[1]));
+	  emit_insn (gen_elf_low (operands[0], target, operands[1]));
+	  return;
+	}
+
+      /* If this is a SYMBOL_REF that refers to a constant pool entry,
+	 and we have put it in the TOC, we just need to make a TOC-relative
+	 reference to it.  */
+      if (TARGET_TOC
+	  && GET_CODE (operands[1]) == SYMBOL_REF
+	  && use_toc_relative_ref (operands[1]))
+	operands[1] = create_TOC_reference (operands[1], operands[0]);
+      else if (mode == Pmode
+	       && CONSTANT_P (operands[1])
+	       && GET_CODE (operands[1]) != HIGH
+	       && ((GET_CODE (operands[1]) != CONST_INT
+		    && ! easy_fp_constant (operands[1], mode))
+		   || (GET_CODE (operands[1]) == CONST_INT
+		       && (num_insns_constant (operands[1], mode)
+			   > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
+		   || (GET_CODE (operands[0]) == REG
+		       && FP_REGNO_P (REGNO (operands[0]))))
+	       && !toc_relative_expr_p (operands[1], false)
+	       && (TARGET_CMODEL == CMODEL_SMALL
+		   || can_create_pseudo_p ()
+		   || (REG_P (operands[0])
+		       && INT_REG_OK_FOR_BASE_P (operands[0], true))))
+	{
+
+#if TARGET_MACHO
+	  /* Darwin uses a special PIC legitimizer.  */
+	  if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
+	    {
+	      operands[1] =
+		rs6000_machopic_legitimize_pic_address (operands[1], mode,
+							operands[0]);
+	      if (operands[0] != operands[1])
+		emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+	      return;
+	    }
+#endif
+
+	  /* If we are to limit the number of things we put in the TOC and
+	     this is a symbol plus a constant we can add in one insn,
+	     just put the symbol in the TOC and add the constant.  Don't do
+	     this if reload is in progress.  */
+	  if (GET_CODE (operands[1]) == CONST
+	      && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
+	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	      && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
+	      && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
+		  || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
+	      && ! side_effects_p (operands[0]))
+	    {
+	      rtx sym =
+		force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
+	      rtx other = XEXP (XEXP (operands[1], 0), 1);
+
+	      sym = force_reg (mode, sym);
+	      emit_insn (gen_add3_insn (operands[0], sym, other));
+	      return;
+	    }
+
+	  operands[1] = force_const_mem (mode, operands[1]);
+
+	  if (TARGET_TOC
+	      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	      && constant_pool_expr_p (XEXP (operands[1], 0))
+	      && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
+			get_pool_constant (XEXP (operands[1], 0)),
+			get_pool_mode (XEXP (operands[1], 0))))
+	    {
+	      rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
+						 operands[0]);
+	      operands[1] = gen_const_mem (mode, tocref);
+	      set_mem_alias_set (operands[1], get_TOC_alias_set ());
+	    }
+	}
+      break;
+
+    case TImode:
+      if (!VECTOR_MEM_VSX_P (TImode))
+	rs6000_eliminate_indexed_memrefs (operands);
+      break;
+
+    case PTImode:
+      rs6000_eliminate_indexed_memrefs (operands);
+      break;
+
+    default:
+      fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
+    }
+
+  /* Above, we may have called force_const_mem which may have returned
+     an invalid address.  If we can, fix this up; otherwise, reload will
+     have to deal with it.  */
+  if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
+    operands[1] = validize_mem (operands[1]);
+
+ emit_set:
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+}
+
+/* Return true if a structure, union or array containing FIELD should be
+   accessed using `BLKMODE'.
+
+   For the SPE, simd types are V2SI, and gcc can be tempted to put the
+   entire thing in a DI and use subregs to access the internals.
+   store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
+   back-end.  Because a single GPR can hold a V2SI, but not a DI, the
+   best thing to do is set structs to BLKmode and avoid Severe Tire
+   Damage.
+
+   On e500 v2, DF and DI modes suffer from the same anomaly.  DF can
+   fit into 1, whereas DI still needs two.  */
+
+static bool
+rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
+{
+  return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+	  || (TARGET_E500_DOUBLE && mode == DFmode));
+}
+
+/* Nonzero if we can use a floating-point register to pass this arg.  */
+#define USE_FP_FOR_ARG_P(CUM,MODE)		\
+  (SCALAR_FLOAT_MODE_P (MODE)			\
+   && (CUM)->fregno <= FP_ARG_MAX_REG		\
+   && TARGET_HARD_FLOAT && TARGET_FPRS)
+
+/* Nonzero if we can use an AltiVec register to pass this arg.  */
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED)			\
+  (ALTIVEC_OR_VSX_VECTOR_MODE (MODE)				\
+   && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG			\
+   && TARGET_ALTIVEC_ABI					\
+   && (NAMED))
+
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   or vector type.  If a non-floating point or vector type is found, or
+   if a floating point or vector type that doesn't match a non-VOIDmode
+   *MODEP is found, then return -1, otherwise return the count in the
+   sub-tree.  */
+
+static int
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
+	return -1;
+
+      /* Use V4SImode as representative of all 128-bit vector types.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
+	    || !TYPE_MIN_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
+   float or vector aggregate that shall be passed in FP/vector registers
+   according to the ELFv2 ABI, return the homogeneous element mode in
+   *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
+
+   Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE.  */
+
+static bool
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
+				       enum machine_mode *elt_mode,
+				       int *n_elts)
+{
+  /* Note that we do not accept complex types at the top level as
+     homogeneous aggregates; these types are handled via the
+     targetm.calls.split_complex_arg mechanism.  Complex types
+     can be elements of homogeneous aggregates, however.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
+    {
+      enum machine_mode field_mode = VOIDmode;
+      int field_count = rs6000_aggregate_candidate (type, &field_mode);
+
+      if (field_count > 0)
+	{
+	  int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
+			(GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
+
+	  /* The ELFv2 ABI allows homogeneous aggregates to occupy
+	     up to AGGR_ARG_NUM_REG registers.  */
+	  if (field_count * n_regs <= AGGR_ARG_NUM_REG)
+	    {
+	      if (elt_mode)
+		*elt_mode = field_mode;
+	      if (n_elts)
+		*n_elts = field_count;
+	      return true;
+	    }
+	}
+    }
+
+  if (elt_mode)
+    *elt_mode = mode;
+  if (n_elts)
+    *n_elts = 1;
+  return false;
+}
+
+/* Return a nonzero value to say to return the function value in
+   memory, just as large structures are always returned.  TYPE will be
+   the data type of the value, and FNTYPE will be the type of the
+   function doing the returning, or @code{NULL} for libcalls.
+
+   The AIX ABI for the RS/6000 specifies that all structures are
+   returned in memory.  The Darwin ABI does the same.
+   
+   For the Darwin 64 Bit ABI, a function result can be returned in
+   registers or in memory, depending on the size of the return data
+   type.  If it is returned in registers, the value occupies the same
+   registers as it would if it were the first and only function
+   argument.  Otherwise, the function places its result in memory at
+   the location pointed to by GPR3.
+   
+   The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4, 
+   but a draft put them in memory, and GCC used to implement the draft
+   instead of the final standard.  Therefore, aix_struct_return
+   controls this instead of DEFAULT_ABI; V.4 targets needing backward
+   compatibility can change DRAFT_V4_STRUCT_RET to override the
+   default, and -m switches get the final word.  See
+   rs6000_option_override_internal for more details.
+
+   The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
+   long double support is enabled.  These values are returned in memory.
+
+   int_size_in_bytes returns -1 for variable size objects, which go in
+   memory always.  The cast to unsigned makes -1 > 8.  */
+
+static bool
+rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
+  if (TARGET_MACHO
+      && rs6000_darwin64_abi
+      && TREE_CODE (type) == RECORD_TYPE
+      && int_size_in_bytes (type) > 0)
+    {
+      CUMULATIVE_ARGS valcum;
+      rtx valret;
+
+      valcum.words = 0;
+      valcum.fregno = FP_ARG_MIN_REG;
+      valcum.vregno = ALTIVEC_ARG_MIN_REG;
+      /* Do a trial code generation as if this were going to be passed
+	 as an argument; if any part goes in memory, we return NULL.  */
+      valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
+      if (valret)
+	return false;
+      /* Otherwise fall through to more conventional ABI rules.  */
+    }
+
+  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
+  if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
+					     NULL, NULL))
+    return false;
+
+  /* The ELFv2 ABI returns aggregates up to 16B in registers */
+  if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
+      && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
+    return false;
+
+  if (AGGREGATE_TYPE_P (type)
+      && (aix_struct_return
+	  || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
+    return true;
+
+  /* Allow -maltivec -mabi=no-altivec without warning.  Altivec vector
+     modes only exist for GCC vector types if -maltivec.  */
+  if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
+      && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
+    return false;
+
+  /* Return synthetic vectors in memory.  */
+  if (TREE_CODE (type) == VECTOR_TYPE
+      && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
+    {
+      static bool warned_for_return_big_vectors = false;
+      if (!warned_for_return_big_vectors)
+	{
+	  warning (0, "GCC vector returned by reference: "
+		   "non-standard ABI extension with no compatibility guarantee");
+	  warned_for_return_big_vectors = true;
+	}
+      return true;
+    }
+
+  if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
+    return true;
+
+  return false;
+}
+
+/* Specify whether values returned in registers should be at the most
+   significant end of a register.  We want aggregates returned by
+   value to match the way aggregates are passed to functions.  */
+
+static bool
+rs6000_return_in_msb (const_tree valtype)
+{
+  return (DEFAULT_ABI == ABI_ELFv2
+	  && BYTES_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
+}
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+/* Return TRUE if a call to function FNDECL may be one that
+   potentially affects the function calling ABI of the object file.  */
+
+static bool
+call_ABI_of_interest (tree fndecl)
+{
+  if (cgraph_state == CGRAPH_STATE_EXPANSION)
+    {
+      struct cgraph_node *c_node;
+
+      /* Libcalls are always interesting.  */
+      if (fndecl == NULL_TREE)
+	return true;
+
+      /* Any call to an external function is interesting.  */
+      if (DECL_EXTERNAL (fndecl))
+	return true;
+
+      /* Interesting functions that we are emitting in this object file.  */
+      c_node = cgraph_get_node (fndecl);
+      c_node = cgraph_function_or_thunk_node (c_node, NULL);
+      return !cgraph_only_called_directly_p (c_node);
+    }
+  return false;
+}
+#endif
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
+
+   For incoming args we set the number of arguments in the prototype large
+   so we never return a PARALLEL.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED, int incoming,
+		      int libcall, int n_named_args,
+		      tree fndecl ATTRIBUTE_UNUSED,
+		      enum machine_mode return_mode ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cumulative;
+
+  *cum = zero_cumulative;
+  cum->words = 0;
+  cum->fregno = FP_ARG_MIN_REG;
+  cum->vregno = ALTIVEC_ARG_MIN_REG;
+  cum->prototype = (fntype && prototype_p (fntype));
+  cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
+		      ? CALL_LIBCALL : CALL_NORMAL);
+  cum->sysv_gregno = GP_ARG_MIN_REG;
+  cum->stdarg = stdarg_p (fntype);
+
+  cum->nargs_prototype = 0;
+  if (incoming || cum->prototype)
+    cum->nargs_prototype = n_named_args;
+
+  /* Check for a longcall attribute.  */
+  if ((!fntype && rs6000_default_long_calls)
+      || (fntype
+	  && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
+	  && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
+    cum->call_cookie |= CALL_LONG;
+
+  if (TARGET_DEBUG_ARG)
+    {
+      fprintf (stderr, "\ninit_cumulative_args:");
+      if (fntype)
+	{
+	  tree ret_type = TREE_TYPE (fntype);
+	  fprintf (stderr, " ret code = %s,",
+		   get_tree_code_name (TREE_CODE (ret_type)));
+	}
+
+      if (cum->call_cookie & CALL_LONG)
+	fprintf (stderr, " longcall,");
+
+      fprintf (stderr, " proto = %d, nargs = %d\n",
+	       cum->prototype, cum->nargs_prototype);
+    }
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  if (DEFAULT_ABI == ABI_V4)
+    {
+      cum->escapes = call_ABI_of_interest (fndecl);
+      if (cum->escapes)
+	{
+	  tree return_type;
+
+	  if (fntype)
+	    {
+	      return_type = TREE_TYPE (fntype);
+	      return_mode = TYPE_MODE (return_type);
+	    }
+	  else
+	    return_type = lang_hooks.types.type_for_mode (return_mode, 0);
+
+	  if (return_type != NULL)
+	    {
+	      if (TREE_CODE (return_type) == RECORD_TYPE
+		  && TYPE_TRANSPARENT_AGGR (return_type))
+		{
+		  return_type = TREE_TYPE (first_field (return_type));
+		  return_mode = TYPE_MODE (return_type);
+		}
+	      if (AGGREGATE_TYPE_P (return_type)
+		  && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
+		      <= 8))
+		rs6000_returns_struct = true;
+	    }
+	  if (SCALAR_FLOAT_MODE_P (return_mode))
+	    rs6000_passes_float = true;
+	  else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
+		   || SPE_VECTOR_MODE (return_mode))
+	    rs6000_passes_vector = true;
+	}
+    }
+#endif
+
+  if (fntype
+      && !TARGET_ALTIVEC
+      && TARGET_ALTIVEC_ABI
+      && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
+    {
+      error ("cannot return value in vector register because"
+	     " altivec instructions are disabled, use -maltivec"
+	     " to enable them");
+    }
+}
+
+/* Return true if TYPE must be passed on the stack and not in registers.  */
+
+static bool
+rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
+    return must_pass_in_stack_var_size (mode, type);
+  else
+    return must_pass_in_stack_var_size_or_pad (mode, type);
+}
+
+/* If defined, a C expression which determines whether, and in which
+   direction, to pad out an argument with extra space.  The value
+   should be of type `enum direction': either `upward' to pad above
+   the argument, `downward' to pad below, or `none' to inhibit
+   padding.
+
+   For the AIX ABI structs are always stored left shifted in their
+   argument slot.  */
+
+enum direction
+function_arg_padding (enum machine_mode mode, const_tree type)
+{
+#ifndef AGGREGATE_PADDING_FIXED
+#define AGGREGATE_PADDING_FIXED 0
+#endif
+#ifndef AGGREGATES_PAD_UPWARD_ALWAYS
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+#endif
+
+  if (!AGGREGATE_PADDING_FIXED)
+    {
+      /* GCC used to pass structures of the same size as integer types as
+	 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
+	 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
+	 passed padded downward, except that -mstrict-align further
+	 muddied the water in that multi-component structures of 2 and 4
+	 bytes in size were passed padded upward.
+
+	 The following arranges for best compatibility with previous
+	 versions of gcc, but removes the -mstrict-align dependency.  */
+      if (BYTES_BIG_ENDIAN)
+	{
+	  HOST_WIDE_INT size = 0;
+
+	  if (mode == BLKmode)
+	    {
+	      if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
+		size = int_size_in_bytes (type);
+	    }
+	  else
+	    size = GET_MODE_SIZE (mode);
+
+	  if (size == 1 || size == 2 || size == 4)
+	    return downward;
+	}
+      return upward;
+    }
+
+  if (AGGREGATES_PAD_UPWARD_ALWAYS)
+    {
+      if (type != 0 && AGGREGATE_TYPE_P (type))
+	return upward;
+    }
+
+  /* Fall back to the default.  */
+  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+/* If defined, a C expression that gives the alignment boundary, in bits,
+   of an argument with the specified mode and type.  If it is not defined,
+   PARM_BOUNDARY is used for all arguments.
+
+   V.4 wants long longs and doubles to be double word aligned.  Just
+   testing the mode size is a boneheaded way to do this as it means
+   that other types such as complex int are also double word aligned.
+   However, we're stuck with this because changing the ABI might break
+   existing library interfaces.
+
+   Doubleword align SPE vectors.
+   Quadword align Altivec/VSX vectors.
+   Quadword align large synthetic vector types.   */
+
+static unsigned int
+rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
+  if (DEFAULT_ABI == ABI_V4
+      && (GET_MODE_SIZE (mode) == 8
+	  || (TARGET_HARD_FLOAT
+	      && TARGET_FPRS
+	      && (mode == TFmode || mode == TDmode))))
+    return 64;
+  else if (SPE_VECTOR_MODE (mode)
+	   || (type && TREE_CODE (type) == VECTOR_TYPE
+	       && int_size_in_bytes (type) >= 8
+	       && int_size_in_bytes (type) < 16))
+    return 64;
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
+	   || (type && TREE_CODE (type) == VECTOR_TYPE
+	       && int_size_in_bytes (type) >= 16))
+    return 128;
+  else if (((TARGET_MACHO && rs6000_darwin64_abi)
+	    || DEFAULT_ABI == ABI_ELFv2
+            || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
+ 	   && mode == BLKmode
+	   && type && TYPE_ALIGN (type) > 64)
+    return 128;
+  else
+    return PARM_BOUNDARY;
+}
+
+/* The offset in words to the start of the parameter save area.  */
+
+static unsigned int
+rs6000_parm_offset (void)
+{
+  return (DEFAULT_ABI == ABI_V4 ? 2
+	  : DEFAULT_ABI == ABI_ELFv2 ? 4
+	  : 6);
+}
+
+/* For a function parm of MODE and TYPE, return the starting word in
+   the parameter area.  NWORDS of the parameter area are already used.  */
+
+static unsigned int
+rs6000_parm_start (enum machine_mode mode, const_tree type,
+		   unsigned int nwords)
+{
+  unsigned int align;
+
+  align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
+  return nwords + (-(rs6000_parm_offset () + nwords) & align);
+}
+
+/* Compute the size (in words) of a function argument.  */
+
+static unsigned long
+rs6000_arg_size (enum machine_mode mode, const_tree type)
+{
+  unsigned long size;
+
+  if (mode != BLKmode)
+    size = GET_MODE_SIZE (mode);
+  else
+    size = int_size_in_bytes (type);
+
+  if (TARGET_32BIT)
+    return (size + 3) >> 2;
+  else
+    return (size + 7) >> 3;
+}
+
+/* Use this to flush pending int fields.  */
+
+static void
+rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
+					  HOST_WIDE_INT bitpos, int final)
+{
+  unsigned int startbit, endbit;
+  int intregs, intoffset;
+  enum machine_mode mode;
+
+  /* Handle the situations where a float is taking up the first half
+     of the GPR, and the other half is empty (typically due to
+     alignment restrictions). We can detect this by a 8-byte-aligned
+     int field, or by seeing that this is the final flush for this
+     argument. Count the word and continue on.  */
+  if (cum->floats_in_gpr == 1
+      && (cum->intoffset % 64 == 0
+	  || (cum->intoffset == -1 && final)))
+    {
+      cum->words++;
+      cum->floats_in_gpr = 0;
+    }
+
+  if (cum->intoffset == -1)
+    return;
+
+  intoffset = cum->intoffset;
+  cum->intoffset = -1;
+  cum->floats_in_gpr = 0;
+
+  if (intoffset % BITS_PER_WORD != 0)
+    {
+      mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+			    MODE_INT, 0);
+      if (mode == BLKmode)
+	{
+	  /* We couldn't find an appropriate mode, which happens,
+	     e.g., in packed structs when there are 3 bytes to load.
+	     Back intoffset back to the beginning of the word in this
+	     case.  */
+	  intoffset = intoffset & -BITS_PER_WORD;
+	}
+    }
+
+  startbit = intoffset & -BITS_PER_WORD;
+  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+  intregs = (endbit - startbit) / BITS_PER_WORD;
+  cum->words += intregs;
+  /* words should be unsigned. */
+  if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
+    {
+      int pad = (endbit/BITS_PER_WORD) - cum->words;
+      cum->words += pad;
+    }
+}
+
+/* The darwin64 ABI calls for us to recurse down through structs,
+   looking for elements passed in registers.  Unfortunately, we have
+   to track int register count here also because of misalignments
+   in powerpc alignment mode.  */
+
+static void
+rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
+					    const_tree type,
+					    HOST_WIDE_INT startbitpos)
+{
+  tree f;
+
+  for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+    if (TREE_CODE (f) == FIELD_DECL)
+      {
+	HOST_WIDE_INT bitpos = startbitpos;
+	tree ftype = TREE_TYPE (f);
+	enum machine_mode mode;
+	if (ftype == error_mark_node)
+	  continue;
+	mode = TYPE_MODE (ftype);
+
+	if (DECL_SIZE (f) != 0
+	    && tree_fits_uhwi_p (bit_position (f)))
+	  bitpos += int_bit_position (f);
+
+	/* ??? FIXME: else assume zero offset.  */
+
+	if (TREE_CODE (ftype) == RECORD_TYPE)
+	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
+	else if (USE_FP_FOR_ARG_P (cum, mode))
+	  {
+	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
+	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
+	    cum->fregno += n_fpregs;
+	    /* Single-precision floats present a special problem for
+	       us, because they are smaller than an 8-byte GPR, and so
+	       the structure-packing rules combined with the standard
+	       varargs behavior mean that we want to pack float/float
+	       and float/int combinations into a single register's
+	       space. This is complicated by the arg advance flushing,
+	       which works on arbitrarily large groups of int-type
+	       fields.  */
+	    if (mode == SFmode)
+	      {
+		if (cum->floats_in_gpr == 1)
+		  {
+		    /* Two floats in a word; count the word and reset
+		       the float count.  */
+		    cum->words++;
+		    cum->floats_in_gpr = 0;
+		  }
+		else if (bitpos % 64 == 0)
+		  {
+		    /* A float at the beginning of an 8-byte word;
+		       count it and put off adjusting cum->words until
+		       we see if a arg advance flush is going to do it
+		       for us.  */
+		    cum->floats_in_gpr++;
+		  }
+		else
+		  {
+		    /* The float is at the end of a word, preceded
+		       by integer fields, so the arg advance flush
+		       just above has already set cum->words and
+		       everything is taken care of.  */
+		  }
+	      }
+	    else
+	      cum->words += n_fpregs;
+	  }
+	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
+	  {
+	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
+	    cum->vregno++;
+	    cum->words += 2;
+	  }
+	else if (cum->intoffset == -1)
+	  cum->intoffset = bitpos;
+      }
+}
+
+/* Check for an item that needs to be considered specially under the darwin 64
+   bit ABI.  These are record types where the mode is BLK or the structure is
+   8 bytes in size.  */
+static int
+rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
+{
+  return rs6000_darwin64_abi
+	 && ((mode == BLKmode 
+	      && TREE_CODE (type) == RECORD_TYPE 
+	      && int_size_in_bytes (type) > 0)
+	  || (type && TREE_CODE (type) == RECORD_TYPE 
+	      && int_size_in_bytes (type) == 8)) ? 1 : 0;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)
+
+   Note that for args passed by reference, function_arg will be called
+   with MODE and TYPE set to that of the pointer to the arg, not the arg
+   itself.  */
+
+static void
+rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			       const_tree type, bool named, int depth)
+{
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
+  /* Only tick off an argument if we're not recursing.  */
+  if (depth == 0)
+    cum->nargs_prototype--;
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  if (DEFAULT_ABI == ABI_V4
+      && cum->escapes)
+    {
+      if (SCALAR_FLOAT_MODE_P (mode))
+	rs6000_passes_float = true;
+      else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
+	rs6000_passes_vector = true;
+      else if (SPE_VECTOR_MODE (mode)
+	       && !cum->stdarg
+	       && cum->sysv_gregno <= GP_ARG_MAX_REG)
+	rs6000_passes_vector = true;
+    }
+#endif
+
+  if (TARGET_ALTIVEC_ABI
+      && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
+	  || (type && TREE_CODE (type) == VECTOR_TYPE
+	      && int_size_in_bytes (type) == 16)))
+    {
+      bool stack = false;
+
+      if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+	{
+	  cum->vregno += n_elts;
+
+	  if (!TARGET_ALTIVEC)
+	    error ("cannot pass argument in vector register because"
+		   " altivec instructions are disabled, use -maltivec"
+		   " to enable them");
+
+	  /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
+	     even if it is going to be passed in a vector register.
+	     Darwin does the same for variable-argument functions.  */
+	  if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	       && TARGET_64BIT)
+	      || (cum->stdarg && DEFAULT_ABI != ABI_V4))
+	    stack = true;
+	}
+      else
+	stack = true;
+
+      if (stack)
+	{
+	  int align;
+
+	  /* Vector parameters must be 16-byte aligned.  In 32-bit
+	     mode this means we need to take into account the offset
+	     to the parameter save area.  In 64-bit mode, they just
+	     have to start on an even word, since the parameter save
+	     area is 16-byte aligned.  */
+	  if (TARGET_32BIT)
+	    align = -(rs6000_parm_offset () + cum->words) & 3;
+	  else
+	    align = cum->words & 1;
+	  cum->words += align + rs6000_arg_size (mode, type);
+
+	  if (TARGET_DEBUG_ARG)
+	    {
+	      fprintf (stderr, "function_adv: words = %2d, align=%d, ",
+		       cum->words, align);
+	      fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
+		       cum->nargs_prototype, cum->prototype,
+		       GET_MODE_NAME (mode));
+	    }
+	}
+    }
+  else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
+	   && !cum->stdarg
+	   && cum->sysv_gregno <= GP_ARG_MAX_REG)
+    cum->sysv_gregno++;
+
+  else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
+    {
+      int size = int_size_in_bytes (type);
+      /* Variable sized types have size == -1 and are
+	 treated as if consisting entirely of ints.
+	 Pad to 16 byte boundary if needed.  */
+      if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
+	  && (cum->words % 2) != 0)
+	cum->words++;
+      /* For varargs, we can just go up by the size of the struct. */
+      if (!named)
+	cum->words += (size + 7) / 8;
+      else
+	{
+	  /* It is tempting to say int register count just goes up by
+	     sizeof(type)/8, but this is wrong in a case such as
+	     { int; double; int; } [powerpc alignment].  We have to
+	     grovel through the fields for these too.  */
+	  cum->intoffset = 0;
+	  cum->floats_in_gpr = 0;
+	  rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
+	  rs6000_darwin64_record_arg_advance_flush (cum,
+						    size * BITS_PER_UNIT, 1);
+	}
+	  if (TARGET_DEBUG_ARG)
+	    {
+	      fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
+		       cum->words, TYPE_ALIGN (type), size);
+	      fprintf (stderr, 
+	           "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
+		       cum->nargs_prototype, cum->prototype,
+		       GET_MODE_NAME (mode));
+	    }
+    }
+  else if (DEFAULT_ABI == ABI_V4)
+    {
+      if (TARGET_HARD_FLOAT && TARGET_FPRS
+	  && ((TARGET_SINGLE_FLOAT && mode == SFmode)
+	      || (TARGET_DOUBLE_FLOAT && mode == DFmode)
+	      || (mode == TFmode && !TARGET_IEEEQUAD)
+	      || mode == SDmode || mode == DDmode || mode == TDmode))
+	{
+	  /* _Decimal128 must use an even/odd register pair.  This assumes
+	     that the register number is odd when fregno is odd.  */
+	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	    cum->fregno++;
+
+	  if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
+	      <= FP_ARG_V4_MAX_REG)
+	    cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	  else
+	    {
+	      cum->fregno = FP_ARG_V4_MAX_REG + 1;
+	      if (mode == DFmode || mode == TFmode
+		  || mode == DDmode || mode == TDmode)
+		cum->words += cum->words & 1;
+	      cum->words += rs6000_arg_size (mode, type);
+	    }
+	}
+      else
+	{
+	  int n_words = rs6000_arg_size (mode, type);
+	  int gregno = cum->sysv_gregno;
+
+	  /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
+	     (r7,r8) or (r9,r10).  As does any other 2 word item such
+	     as complex int due to a historical mistake.  */
+	  if (n_words == 2)
+	    gregno += (1 - gregno) & 1;
+
+	  /* Multi-reg args are not split between registers and stack.  */
+	  if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+	    {
+	      /* Long long and SPE vectors are aligned on the stack.
+		 So are other 2 word items such as complex int due to
+		 a historical mistake.  */
+	      if (n_words == 2)
+		cum->words += cum->words & 1;
+	      cum->words += n_words;
+	    }
+
+	  /* Note: continuing to accumulate gregno past when we've started
+	     spilling to the stack indicates the fact that we've started
+	     spilling to the stack to expand_builtin_saveregs.  */
+	  cum->sysv_gregno = gregno + n_words;
+	}
+
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
+		   cum->words, cum->fregno);
+	  fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
+		   cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
+	  fprintf (stderr, "mode = %4s, named = %d\n",
+		   GET_MODE_NAME (mode), named);
+	}
+    }
+  else
+    {
+      int n_words = rs6000_arg_size (mode, type);
+      int start_words = cum->words;
+      int align_words = rs6000_parm_start (mode, type, start_words);
+
+      cum->words = align_words + n_words;
+
+      if (SCALAR_FLOAT_MODE_P (elt_mode)
+	  && TARGET_HARD_FLOAT && TARGET_FPRS)
+	{
+	  /* _Decimal128 must be passed in an even/odd float register pair.
+	     This assumes that the register number is odd when fregno is
+	     odd.  */
+	  if (elt_mode == TDmode && (cum->fregno % 2) == 1)
+	    cum->fregno++;
+	  cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
+	}
+
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
+		   cum->words, cum->fregno);
+	  fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
+		   cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
+	  fprintf (stderr, "named = %d, align = %d, depth = %d\n",
+		   named, align_words - start_words, depth);
+	}
+    }
+}
+
+static void
+rs6000_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
+			     const_tree type, bool named)
+{
+  rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
+				 0);
+}
+
+static rtx
+spe_build_register_parallel (enum machine_mode mode, int gregno)
+{
+  rtx r1, r3, r5, r7;
+
+  switch (mode)
+    {
+    case DFmode:
+      r1 = gen_rtx_REG (DImode, gregno);
+      r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+      return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
+
+    case DCmode:
+    case TFmode:
+      r1 = gen_rtx_REG (DImode, gregno);
+      r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+      r3 = gen_rtx_REG (DImode, gregno + 2);
+      r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
+      return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
+
+    case TCmode:
+      r1 = gen_rtx_REG (DImode, gregno);
+      r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+      r3 = gen_rtx_REG (DImode, gregno + 2);
+      r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
+      r5 = gen_rtx_REG (DImode, gregno + 4);
+      r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
+      r7 = gen_rtx_REG (DImode, gregno + 6);
+      r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
+      return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Determine where to put a SIMD argument on the SPE.  */
+static rtx
+rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type)
+{
+  int gregno = cum->sysv_gregno;
+
+  /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
+     are passed and returned in a pair of GPRs for ABI compatibility.  */
+  if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+			     || mode == DCmode || mode == TCmode))
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      /* Doubles go in an odd/even register pair (r5/r6, etc).  */
+      if (mode == DFmode)
+	gregno += (1 - gregno) & 1;
+
+      /* Multi-reg args are not split between registers and stack.  */
+      if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+	return NULL_RTX;
+
+      return spe_build_register_parallel (mode, gregno);
+    }
+  if (cum->stdarg)
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      /* SPE vectors are put in odd registers.  */
+      if (n_words == 2 && (gregno & 1) == 0)
+	gregno += 1;
+
+      if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
+	{
+	  rtx r1, r2;
+	  enum machine_mode m = SImode;
+
+	  r1 = gen_rtx_REG (m, gregno);
+	  r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
+	  r2 = gen_rtx_REG (m, gregno + 1);
+	  r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
+	}
+      else
+	return NULL_RTX;
+    }
+  else
+    {
+      if (gregno <= GP_ARG_MAX_REG)
+	return gen_rtx_REG (mode, gregno);
+      else
+	return NULL_RTX;
+    }
+}
+
+/* A subroutine of rs6000_darwin64_record_arg.  Assign the bits of the
+   structure between cum->intoffset and bitpos to integer registers.  */
+
+static void
+rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
+				  HOST_WIDE_INT bitpos, rtx rvec[], int *k)
+{
+  enum machine_mode mode;
+  unsigned int regno;
+  unsigned int startbit, endbit;
+  int this_regno, intregs, intoffset;
+  rtx reg;
+
+  if (cum->intoffset == -1)
+    return;
+
+  intoffset = cum->intoffset;
+  cum->intoffset = -1;
+
+  /* If this is the trailing part of a word, try to only load that
+     much into the register.  Otherwise load the whole register.  Note
+     that in the latter case we may pick up unwanted bits.  It's not a
+     problem at the moment but may wish to revisit.  */
+
+  if (intoffset % BITS_PER_WORD != 0)
+    {
+      mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+			  MODE_INT, 0);
+      if (mode == BLKmode)
+	{
+	  /* We couldn't find an appropriate mode, which happens,
+	     e.g., in packed structs when there are 3 bytes to load.
+	     Back intoffset back to the beginning of the word in this
+	     case.  */
+	 intoffset = intoffset & -BITS_PER_WORD;
+	 mode = word_mode;
+	}
+    }
+  else
+    mode = word_mode;
+
+  startbit = intoffset & -BITS_PER_WORD;
+  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+  intregs = (endbit - startbit) / BITS_PER_WORD;
+  this_regno = cum->words + intoffset / BITS_PER_WORD;
+
+  if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
+    cum->use_stack = 1;
+
+  intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
+  if (intregs <= 0)
+    return;
+
+  intoffset /= BITS_PER_UNIT;
+  do
+    {
+      regno = GP_ARG_MIN_REG + this_regno;
+      reg = gen_rtx_REG (mode, regno);
+      rvec[(*k)++] =
+	gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
+
+      this_regno += 1;
+      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
+      mode = word_mode;
+      intregs -= 1;
+    }
+  while (intregs > 0);
+}
+
+/* Recursive workhorse for the following.  */
+
+static void
+rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
+				    HOST_WIDE_INT startbitpos, rtx rvec[],
+				    int *k)
+{
+  tree f;
+
+  for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+    if (TREE_CODE (f) == FIELD_DECL)
+      {
+	HOST_WIDE_INT bitpos = startbitpos;
+	tree ftype = TREE_TYPE (f);
+	enum machine_mode mode;
+	if (ftype == error_mark_node)
+	  continue;
+	mode = TYPE_MODE (ftype);
+
+	if (DECL_SIZE (f) != 0
+	    && tree_fits_uhwi_p (bit_position (f)))
+	  bitpos += int_bit_position (f);
+
+	/* ??? FIXME: else assume zero offset.  */
+
+	if (TREE_CODE (ftype) == RECORD_TYPE)
+	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
+	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
+	  {
+	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+#if 0
+	    switch (mode)
+	      {
+	      case SCmode: mode = SFmode; break;
+	      case DCmode: mode = DFmode; break;
+	      case TCmode: mode = TFmode; break;
+	      default: break;
+	      }
+#endif
+	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
+	    if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
+	      {
+		gcc_assert (cum->fregno == FP_ARG_MAX_REG
+			    && (mode == TFmode || mode == TDmode));
+		/* Long double or _Decimal128 split over regs and memory.  */
+		mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
+		cum->use_stack=1;
+	      }
+	    rvec[(*k)++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode, cum->fregno++),
+				   GEN_INT (bitpos / BITS_PER_UNIT));
+	    if (mode == TFmode || mode == TDmode)
+	      cum->fregno++;
+	  }
+	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
+	  {
+	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
+	    rvec[(*k)++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode, cum->vregno++),
+				   GEN_INT (bitpos / BITS_PER_UNIT));
+	  }
+	else if (cum->intoffset == -1)
+	  cum->intoffset = bitpos;
+      }
+}
+
+/* For the darwin64 ABI, we want to construct a PARALLEL consisting of
+   the register(s) to be used for each field and subfield of a struct
+   being passed by value, along with the offset of where the
+   register's value may be found in the block.  FP fields go in FP
+   register, vector fields go in vector registers, and everything
+   else goes in int registers, packed as in memory.
+
+   This code is also used for function return values.  RETVAL indicates
+   whether this is the case.
+
+   Much of this is taken from the SPARC V9 port, which has a similar
+   calling convention.  */
+
+static rtx
+rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
+			    bool named, bool retval)
+{
+  rtx rvec[FIRST_PSEUDO_REGISTER];
+  int k = 1, kbase = 1;
+  HOST_WIDE_INT typesize = int_size_in_bytes (type);
+  /* This is a copy; modifications are not visible to our caller.  */
+  CUMULATIVE_ARGS copy_cum = *orig_cum;
+  CUMULATIVE_ARGS *cum = &copy_cum;
+
+  /* Pad to 16 byte boundary if needed.  */
+  if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
+      && (cum->words % 2) != 0)
+    cum->words++;
+
+  cum->intoffset = 0;
+  cum->use_stack = 0;
+  cum->named = named;
+
+  /* Put entries into rvec[] for individual FP and vector fields, and
+     for the chunks of memory that go in int regs.  Note we start at
+     element 1; 0 is reserved for an indication of using memory, and
+     may or may not be filled in below. */
+  rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
+  rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
+
+  /* If any part of the struct went on the stack put all of it there.
+     This hack is because the generic code for
+     FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
+     parts of the struct are not at the beginning.  */
+  if (cum->use_stack)
+    {
+      if (retval)
+	return NULL_RTX;    /* doesn't go in registers at all */
+      kbase = 0;
+      rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+    }
+  if (k > 1 || cum->use_stack)
+    return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
+  else
+    return NULL_RTX;
+}
+
+/* Determine where to place an argument in 64-bit mode with 32-bit ABI.  */
+
+static rtx
+rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
+			   int align_words)
+{
+  int n_units;
+  int i, k;
+  rtx rvec[GP_ARG_NUM_REG + 1];
+
+  if (align_words >= GP_ARG_NUM_REG)
+    return NULL_RTX;
+
+  n_units = rs6000_arg_size (mode, type);
+
+  /* Optimize the simple case where the arg fits in one gpr, except in
+     the case of BLKmode due to assign_parms assuming that registers are
+     BITS_PER_WORD wide.  */
+  if (n_units == 0
+      || (n_units == 1 && mode != BLKmode))
+    return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+
+  k = 0;
+  if (align_words + n_units > GP_ARG_NUM_REG)
+    /* Not all of the arg fits in gprs.  Say that it goes in memory too,
+       using a magic NULL_RTX component.
+       This is not strictly correct.  Only some of the arg belongs in
+       memory, not all of it.  However, the normal scheme using
+       function_arg_partial_nregs can result in unusual subregs, eg.
+       (subreg:SI (reg:DF) 4), which are not handled well.  The code to
+       store the whole arg to memory is often more efficient than code
+       to store pieces, and we know that space is available in the right
+       place for the whole arg.  */
+    rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+
+  i = 0;
+  do
+    {
+      rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
+      rtx off = GEN_INT (i++ * 4);
+      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+    }
+  while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
+
+  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+}
+
+/* We have an argument of MODE and TYPE that goes into FPRs or VRs,
+   but must also be copied into the parameter save area starting at
+   offset ALIGN_WORDS.  Fill in RVEC with the elements corresponding
+   to the GPRs and/or memory.  Return the number of elements used.  */
+
+static int
+rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
+			   int align_words, rtx *rvec)
+{
+  int k = 0;
+
+  if (align_words < GP_ARG_NUM_REG)
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      if (align_words + n_words > GP_ARG_NUM_REG
+	  || mode == BLKmode
+	  || (TARGET_32BIT && TARGET_POWERPC64))
+	{
+	  /* If this is partially on the stack, then we only
+	     include the portion actually in registers here.  */
+	  enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+	  int i = 0;
+
+	  if (align_words + n_words > GP_ARG_NUM_REG)
+	    {
+	      /* Not all of the arg fits in gprs.  Say that it goes in memory
+		 too, using a magic NULL_RTX component.  Also see comment in
+		 rs6000_mixed_function_arg for why the normal
+		 function_arg_partial_nregs scheme doesn't work in this case. */
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	    }
+
+	  do
+	    {
+	      rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
+	      rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	    }
+	  while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
+	}
+      else
+	{
+	  /* The whole arg fits in gprs.  */
+	  rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+	}
+    }
+  else
+    {
+      /* It's entirely in memory.  */
+      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+    }
+
+  return k;
+}
+
+/* RVEC is a vector of K components of an argument of mode MODE.
+   Construct the final function_arg return value from it.  */
+
+static rtx
+rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
+{
+  gcc_assert (k >= 1);
+
+  /* Avoid returning a PARALLEL in the trivial cases.  */
+  if (k == 1)
+    {
+      if (XEXP (rvec[0], 0) == NULL_RTX)
+	return NULL_RTX;
+
+      if (GET_MODE (XEXP (rvec[0], 0)) == mode)
+	return XEXP (rvec[0], 0);
+    }
+
+  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.  It is
+    not modified in this routine.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On RS/6000 the first eight words of non-FP are normally in registers
+   and the rest are pushed.  Under AIX, the first 13 FP args are in registers.
+   Under V.4, the first 8 FP args are in registers.
+
+   If this is floating-point and no prototype is specified, we use
+   both an FP and integer register (or possibly FP reg and stack).  Library
+   functions (when CALL_LIBCALL is set) always have the proper types for args,
+   so we can pass the FP value just in one register.  emit_library_function
+   doesn't support PARALLEL anyway.
+
+   Note that for args passed by reference, function_arg will be called
+   with MODE and TYPE set to that of the pointer to the arg, not the arg
+   itself.  */
+
+static rtx
+rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		     const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  enum rs6000_abi abi = DEFAULT_ABI;
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  /* Return a marker to indicate whether CR1 needs to set or clear the
+     bit that V.4 uses to say fp args were passed in registers.
+     Assume that we don't need the marker for software floating point,
+     or compiler generated library calls.  */
+  if (mode == VOIDmode)
+    {
+      if (abi == ABI_V4
+	  && (cum->call_cookie & CALL_LIBCALL) == 0
+	  && (cum->stdarg
+	      || (cum->nargs_prototype < 0
+		  && (cum->prototype || TARGET_NO_PROTOTYPE))))
+	{
+	  /* For the SPE, we need to crxor CR6 always.  */
+	  if (TARGET_SPE_ABI)
+	    return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
+	  else if (TARGET_HARD_FLOAT && TARGET_FPRS)
+	    return GEN_INT (cum->call_cookie
+			    | ((cum->fregno == FP_ARG_MIN_REG)
+			       ? CALL_V4_SET_FP_ARGS
+			       : CALL_V4_CLEAR_FP_ARGS));
+	}
+
+      return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
+    }
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
+  if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
+    {
+      rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
+      if (rslt != NULL_RTX)
+	return rslt;
+      /* Else fall through to usual handling.  */
+    }
+
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+    {
+      rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+      rtx r, off;
+      int i, k = 0;
+
+      /* Do we also need to pass this argument in the parameter
+	 save area?  */
+      if (TARGET_64BIT && ! cum->prototype)
+	{
+	  int align_words = (cum->words + 1) & ~1;
+	  k = rs6000_psave_function_arg (mode, type, align_words, rvec);
+	}
+
+      /* Describe where this argument goes in the vector registers.  */
+      for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
+	{
+	  r = gen_rtx_REG (elt_mode, cum->vregno + i);
+	  off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  rvec[k++] =  gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	}
+
+      return rs6000_finish_function_arg (mode, rvec, k);
+    }
+  else if (TARGET_ALTIVEC_ABI
+	   && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	       || (type && TREE_CODE (type) == VECTOR_TYPE
+		   && int_size_in_bytes (type) == 16)))
+    {
+      if (named || abi == ABI_V4)
+	return NULL_RTX;
+      else
+	{
+	  /* Vector parameters to varargs functions under AIX or Darwin
+	     get passed in memory and possibly also in GPRs.  */
+	  int align, align_words, n_words;
+	  enum machine_mode part_mode;
+
+	  /* Vector parameters must be 16-byte aligned.  In 32-bit
+	     mode this means we need to take into account the offset
+	     to the parameter save area.  In 64-bit mode, they just
+	     have to start on an even word, since the parameter save
+	     area is 16-byte aligned.  */
+	  if (TARGET_32BIT)
+	    align = -(rs6000_parm_offset () + cum->words) & 3;
+	  else
+	    align = cum->words & 1;
+	  align_words = cum->words + align;
+
+	  /* Out of registers?  Memory, then.  */
+	  if (align_words >= GP_ARG_NUM_REG)
+	    return NULL_RTX;
+
+	  if (TARGET_32BIT && TARGET_POWERPC64)
+	    return rs6000_mixed_function_arg (mode, type, align_words);
+
+	  /* The vector value goes in GPRs.  Only the part of the
+	     value in GPRs is reported here.  */
+	  part_mode = mode;
+	  n_words = rs6000_arg_size (mode, type);
+	  if (align_words + n_words > GP_ARG_NUM_REG)
+	    /* Fortunately, there are only two possibilities, the value
+	       is either wholly in GPRs or half in GPRs and half not.  */
+	    part_mode = DImode;
+
+	  return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
+	}
+    }
+  else if (TARGET_SPE_ABI && TARGET_SPE
+	   && (SPE_VECTOR_MODE (mode)
+	       || (TARGET_E500_DOUBLE && (mode == DFmode
+					  || mode == DCmode
+					  || mode == TFmode
+					  || mode == TCmode))))
+    return rs6000_spe_function_arg (cum, mode, type);
+
+  else if (abi == ABI_V4)
+    {
+      if (TARGET_HARD_FLOAT && TARGET_FPRS
+	  && ((TARGET_SINGLE_FLOAT && mode == SFmode)
+	      || (TARGET_DOUBLE_FLOAT && mode == DFmode)
+	      || (mode == TFmode && !TARGET_IEEEQUAD)
+	      || mode == SDmode || mode == DDmode || mode == TDmode))
+	{
+	  /* _Decimal128 must use an even/odd register pair.  This assumes
+	     that the register number is odd when fregno is odd.  */
+	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	    cum->fregno++;
+
+	  if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
+	      <= FP_ARG_V4_MAX_REG)
+	    return gen_rtx_REG (mode, cum->fregno);
+	  else
+	    return NULL_RTX;
+	}
+      else
+	{
+	  int n_words = rs6000_arg_size (mode, type);
+	  int gregno = cum->sysv_gregno;
+
+	  /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
+	     (r7,r8) or (r9,r10).  As does any other 2 word item such
+	     as complex int due to a historical mistake.  */
+	  if (n_words == 2)
+	    gregno += (1 - gregno) & 1;
+
+	  /* Multi-reg args are not split between registers and stack.  */
+	  if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+	    return NULL_RTX;
+
+	  if (TARGET_32BIT && TARGET_POWERPC64)
+	    return rs6000_mixed_function_arg (mode, type,
+					      gregno - GP_ARG_MIN_REG);
+	  return gen_rtx_REG (mode, gregno);
+	}
+    }
+  else
+    {
+      int align_words = rs6000_parm_start (mode, type, cum->words);
+
+      /* _Decimal128 must be passed in an even/odd float register pair.
+	 This assumes that the register number is odd when fregno is odd.  */
+      if (elt_mode == TDmode && (cum->fregno % 2) == 1)
+	cum->fregno++;
+
+      if (USE_FP_FOR_ARG_P (cum, elt_mode))
+	{
+	  rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+	  rtx r, off;
+	  int i, k = 0;
+	  unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+
+	  /* Do we also need to pass this argument in the parameter
+	     save area?  */
+	  if (type && (cum->nargs_prototype <= 0
+		       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+			   && TARGET_XL_COMPAT
+			   && align_words >= GP_ARG_NUM_REG)))
+	    k = rs6000_psave_function_arg (mode, type, align_words, rvec);
+
+	  /* Describe where this argument goes in the fprs.  */
+	  for (i = 0; i < n_elts
+		      && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
+	    {
+	      /* Check if the argument is split over registers and memory.
+		 This can only ever happen for long double or _Decimal128;
+		 complex types are handled via split_complex_arg.  */
+	      enum machine_mode fmode = elt_mode;
+	      if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
+		{
+		  gcc_assert (fmode == TFmode || fmode == TDmode);
+		  fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
+		}
+
+	      r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
+	      off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	    }
+
+	  return rs6000_finish_function_arg (mode, rvec, k);
+	}
+      else if (align_words < GP_ARG_NUM_REG)
+	{
+	  if (TARGET_32BIT && TARGET_POWERPC64)
+	    return rs6000_mixed_function_arg (mode, type, align_words);
+
+	  return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	}
+      else
+	return NULL_RTX;
+    }
+}
+
+/* For an arg passed partly in registers and partly in memory, this is
+   the number of bytes passed in registers.  For args passed entirely in
+   registers or entirely in memory, zero.  When an arg is described by a
+   PARALLEL, perhaps using more than one register type, this function
+   returns the number of bytes used by the first element of the PARALLEL.  */
+
+static int
+rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+			  tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  bool passed_in_gprs = true;
+  int ret = 0;
+  int align_words;
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
+  if (DEFAULT_ABI == ABI_V4)
+    return 0;
+
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+    {
+      /* If we are passing this arg in the fixed parameter save area
+         (gprs or memory) as well as VRs, we do not use the partial
+	 bytes mechanism; instead, rs6000_function_arg will return a
+	 PARALLEL including a memory element as necessary.  */
+      if (TARGET_64BIT && ! cum->prototype)
+	return 0;
+
+      /* Otherwise, we pass in VRs only.  Check for partial copies.  */
+      passed_in_gprs = false;
+      if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
+	ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
+    }
+
+  /* In this complicated case we just disable the partial_nregs code.  */
+  if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
+    return 0;
+
+  align_words = rs6000_parm_start (mode, type, cum->words);
+
+  if (USE_FP_FOR_ARG_P (cum, elt_mode))
+    {
+      unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+
+      /* If we are passing this arg in the fixed parameter save area
+         (gprs or memory) as well as FPRs, we do not use the partial
+	 bytes mechanism; instead, rs6000_function_arg will return a
+	 PARALLEL including a memory element as necessary.  */
+      if (type
+	  && (cum->nargs_prototype <= 0
+	      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+		  && TARGET_XL_COMPAT
+		  && align_words >= GP_ARG_NUM_REG)))
+	return 0;
+
+      /* Otherwise, we pass in FPRs only.  Check for partial copies.  */
+      passed_in_gprs = false;
+      if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
+	ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
+	       * MIN (8, GET_MODE_SIZE (elt_mode)));
+    }
+
+  if (passed_in_gprs
+      && align_words < GP_ARG_NUM_REG
+      && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
+    ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
+
+  if (ret != 0 && TARGET_DEBUG_ARG)
+    fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
+
+  return ret;
+}
+
+/* A C expression that indicates when an argument must be passed by
+   reference.  If nonzero for an argument, a copy of that argument is
+   made in memory and a pointer to the argument is passed instead of
+   the argument itself.  The pointer is passed in whatever way is
+   appropriate for passing a pointer to that type.
+
+   Under V.4, aggregates and long double are passed by reference.
+
+   As an extension to all 32-bit ABIs, AltiVec vectors are passed by
+   reference unless the AltiVec vector extension ABI is in force.
+
+   As an extension to all ABIs, variable sized types are passed by
+   reference.  */
+
+static bool
+rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			  enum machine_mode mode, const_tree type,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
+      return 1;
+    }
+
+  if (!type)
+    return 0;
+
+  if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
+      return 1;
+    }
+
+  if (int_size_in_bytes (type) < 0)
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
+      return 1;
+    }
+
+  /* Allow -maltivec -mabi=no-altivec without warning.  Altivec vector
+     modes only exist for GCC vector types if -maltivec.  */
+  if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
+      return 1;
+    }
+
+  /* Pass synthetic vectors in memory.  */
+  if (TREE_CODE (type) == VECTOR_TYPE
+      && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
+    {
+      static bool warned_for_pass_big_vectors = false;
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
+      if (!warned_for_pass_big_vectors)
+	{
+	  warning (0, "GCC vector passed by reference: "
+		   "non-standard ABI extension with no compatibility guarantee");
+	  warned_for_pass_big_vectors = true;
+	}
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Process parameter of type TYPE after ARGS_SO_FAR parameters were
+   already processes.  Return true if the parameter must be passed
+   (fully or partially) on the stack.  */
+
+static bool
+rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
+{
+  enum machine_mode mode;
+  int unsignedp;
+  rtx entry_parm;
+
+  /* Catch errors.  */
+  if (type == NULL || type == error_mark_node)
+    return true;
+
+  /* Handle types with no storage requirement.  */
+  if (TYPE_MODE (type) == VOIDmode)
+    return false;
+
+  /* Handle complex types.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
+	    || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
+
+  /* Handle transparent aggregates.  */
+  if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
+      && TYPE_TRANSPARENT_AGGR (type))
+    type = TREE_TYPE (first_field (type));
+
+  /* See if this arg was passed by invisible reference.  */
+  if (pass_by_reference (get_cumulative_args (args_so_far),
+			 TYPE_MODE (type), type, true))
+    type = build_pointer_type (type);
+
+  /* Find mode as it is passed by the ABI.  */
+  unsignedp = TYPE_UNSIGNED (type);
+  mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
+
+  /* If we must pass in stack, we need a stack.  */
+  if (rs6000_must_pass_in_stack (mode, type))
+    return true;
+
+  /* If there is no incoming register, we need a stack.  */
+  entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
+  if (entry_parm == NULL)
+    return true;
+
+  /* Likewise if we need to pass both in registers and on the stack.  */
+  if (GET_CODE (entry_parm) == PARALLEL
+      && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
+    return true;
+
+  /* Also true if we're partially in registers and partially not.  */
+  if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
+    return true;
+
+  /* Update info on where next arg arrives in registers.  */
+  rs6000_function_arg_advance (args_so_far, mode, type, true);
+  return false;
+}
+
+/* Return true if FUN has no prototype, has a variable argument
+   list, or passes any parameter in memory.  */
+
+static bool
+rs6000_function_parms_need_stack (tree fun)
+{
+  function_args_iterator args_iter;
+  tree arg_type;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+
+  if (!fun)
+    /* Must be a libcall, all of which only use reg parms.  */
+    return false;
+  if (!TYPE_P (fun))
+    fun = TREE_TYPE (fun);
+
+  /* Varargs functions need the parameter save area.  */
+  if (!prototype_p (fun) || stdarg_p (fun))
+    return true;
+
+  INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+
+  if (aggregate_value_p (TREE_TYPE (fun), fun))
+    {
+      tree type = build_pointer_type (TREE_TYPE (fun));
+      rs6000_parm_needs_stack (args_so_far, type);
+    }
+
+  FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
+    if (rs6000_parm_needs_stack (args_so_far, arg_type))
+      return true;
+
+  return false;
+}
+
+/* Return the size of the REG_PARM_STACK_SPACE are for FUN.  This is
+   usually a constant depending on the ABI.  However, in the ELFv2 ABI
+   the register parameter area is optional when calling a function that
+   has a prototype is scope, has no variable argument list, and passes
+   all parameters in registers.  */
+
+int
+rs6000_reg_parm_stack_space (tree fun)
+{
+  int reg_parm_stack_space;
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      reg_parm_stack_space = 0;
+      break;
+
+    case ABI_AIX:
+    case ABI_DARWIN:
+      reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+      break;
+
+    case ABI_ELFv2:
+      /* ??? Recomputing this every time is a bit expensive.  Is there
+	 a place to cache this information?  */
+      if (rs6000_function_parms_need_stack (fun))
+	reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+      else
+	reg_parm_stack_space = 0;
+      break;
+    }
+
+  return reg_parm_stack_space;
+}
+
+static void
+rs6000_move_block_from_reg (int regno, rtx x, int nregs)
+{
+  int i;
+  enum machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
+
+  if (nregs == 0)
+    return;
+
+  for (i = 0; i < nregs; i++)
+    {
+      rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
+      if (reload_completed)
+	{
+	  if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
+	    tem = NULL_RTX;
+	  else
+	    tem = simplify_gen_subreg (reg_mode, x, BLKmode,
+				       i * GET_MODE_SIZE (reg_mode));
+	}
+      else
+	tem = replace_equiv_address (tem, XEXP (tem, 0));
+
+      gcc_assert (tem);
+
+      emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
+    }
+}
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.
+
+   CUM is as above.
+
+   MODE and TYPE are the mode and type of the current parameter.
+
+   PRETEND_SIZE is a variable that should be set to the amount of stack
+   that must be pushed by the prolog to pretend that our caller pushed
+   it.
+
+   Normally, this macro will push all remaining incoming registers on the
+   stack and set PRETEND_SIZE to the length of the registers pushed.  */
+
+static void
+setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+			tree type, int *pretend_size ATTRIBUTE_UNUSED,
+			int no_rtl)
+{
+  CUMULATIVE_ARGS next_cum;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  rtx save_area = NULL_RTX, mem;
+  int first_reg_offset;
+  alias_set_type set;
+
+  /* Skip the last named argument.  */
+  next_cum = *get_cumulative_args (cum);
+  rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
+
+  if (DEFAULT_ABI == ABI_V4)
+    {
+      first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
+
+      if (! no_rtl)
+	{
+	  int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
+	  HOST_WIDE_INT offset = 0;
+
+	  /* Try to optimize the size of the varargs save area.
+	     The ABI requires that ap.reg_save_area is doubleword
+	     aligned, but we don't need to allocate space for all
+	     the bytes, only those to which we actually will save
+	     anything.  */
+	  if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
+	    gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
+	  if (TARGET_HARD_FLOAT && TARGET_FPRS
+	      && next_cum.fregno <= FP_ARG_V4_MAX_REG
+	      && cfun->va_list_fpr_size)
+	    {
+	      if (gpr_reg_num)
+		fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
+			   * UNITS_PER_FP_WORD;
+	      if (cfun->va_list_fpr_size
+		  < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
+		fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
+	      else
+		fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
+			    * UNITS_PER_FP_WORD;
+	    }
+	  if (gpr_reg_num)
+	    {
+	      offset = -((first_reg_offset * reg_size) & ~7);
+	      if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
+		{
+		  gpr_reg_num = cfun->va_list_gpr_size;
+		  if (reg_size == 4 && (first_reg_offset & 1))
+		    gpr_reg_num++;
+		}
+	      gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
+	    }
+	  else if (fpr_size)
+	    offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
+		       * UNITS_PER_FP_WORD
+		     - (int) (GP_ARG_NUM_REG * reg_size);
+
+	  if (gpr_size + fpr_size)
+	    {
+	      rtx reg_save_area
+		= assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
+	      gcc_assert (GET_CODE (reg_save_area) == MEM);
+	      reg_save_area = XEXP (reg_save_area, 0);
+	      if (GET_CODE (reg_save_area) == PLUS)
+		{
+		  gcc_assert (XEXP (reg_save_area, 0)
+			      == virtual_stack_vars_rtx);
+		  gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
+		  offset += INTVAL (XEXP (reg_save_area, 1));
+		}
+	      else
+		gcc_assert (reg_save_area == virtual_stack_vars_rtx);
+	    }
+
+	  cfun->machine->varargs_save_offset = offset;
+	  save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
+	}
+    }
+  else
+    {
+      first_reg_offset = next_cum.words;
+      save_area = virtual_incoming_args_rtx;
+
+      if (targetm.calls.must_pass_in_stack (mode, type))
+	first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
+    }
+
+  set = get_varargs_alias_set ();
+  if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
+      && cfun->va_list_gpr_size)
+    {
+      int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
+
+      if (va_list_gpr_counter_field)
+	/* V4 va_list_gpr_size counts number of registers needed.  */
+	n_gpr = cfun->va_list_gpr_size;
+      else
+	/* char * va_list instead counts number of bytes needed.  */
+	n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
+
+      if (nregs > n_gpr)
+	nregs = n_gpr;
+
+      mem = gen_rtx_MEM (BLKmode,
+			 plus_constant (Pmode, save_area,
+					first_reg_offset * reg_size));
+      MEM_NOTRAP_P (mem) = 1;
+      set_mem_alias_set (mem, set);
+      set_mem_align (mem, BITS_PER_WORD);
+
+      rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
+				  nregs);
+    }
+
+  /* Save FP registers if needed.  */
+  if (DEFAULT_ABI == ABI_V4
+      && TARGET_HARD_FLOAT && TARGET_FPRS
+      && ! no_rtl
+      && next_cum.fregno <= FP_ARG_V4_MAX_REG
+      && cfun->va_list_fpr_size)
+    {
+      int fregno = next_cum.fregno, nregs;
+      rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
+      rtx lab = gen_label_rtx ();
+      int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
+					       * UNITS_PER_FP_WORD);
+
+      emit_jump_insn
+	(gen_rtx_SET (VOIDmode,
+		      pc_rtx,
+		      gen_rtx_IF_THEN_ELSE (VOIDmode,
+					    gen_rtx_NE (VOIDmode, cr1,
+							const0_rtx),
+					    gen_rtx_LABEL_REF (VOIDmode, lab),
+					    pc_rtx)));
+
+      for (nregs = 0;
+	   fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
+	   fregno++, off += UNITS_PER_FP_WORD, nregs++)
+	{
+	  mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			      ? DFmode : SFmode, 
+                             plus_constant (Pmode, save_area, off));
+  	  MEM_NOTRAP_P (mem) = 1;
+  	  set_mem_alias_set (mem, set);
+	  set_mem_align (mem, GET_MODE_ALIGNMENT (
+			 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			  ? DFmode : SFmode));
+	  emit_move_insn (mem, gen_rtx_REG (
+                          (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			   ? DFmode : SFmode, fregno));
+	}
+
+      emit_label (lab);
+    }
+}
+
+/* Create the va_list data type.  */
+
+static tree
+rs6000_build_builtin_va_list (void)
+{
+  tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
+
+  /* For AIX, prefer 'char *' because that's what the system
+     header files like.  */
+  if (DEFAULT_ABI != ABI_V4)
+    return build_pointer_type (char_type_node);
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      			  get_identifier ("__va_list_tag"), record);
+
+  f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
+		      unsigned_char_type_node);
+  f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
+		      unsigned_char_type_node);
+  /* Give the two bytes of padding a name, so that -Wpadded won't warn on
+     every user file.  */
+  f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+      		      get_identifier ("reserved"), short_unsigned_type_node);
+  f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+      		      get_identifier ("overflow_arg_area"),
+		      ptr_type_node);
+  f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+      		      get_identifier ("reg_save_area"),
+		      ptr_type_node);
+
+  va_list_gpr_counter_field = f_gpr;
+  va_list_fpr_counter_field = f_fpr;
+
+  DECL_FIELD_CONTEXT (f_gpr) = record;
+  DECL_FIELD_CONTEXT (f_fpr) = record;
+  DECL_FIELD_CONTEXT (f_res) = record;
+  DECL_FIELD_CONTEXT (f_ovf) = record;
+  DECL_FIELD_CONTEXT (f_sav) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_gpr;
+  DECL_CHAIN (f_gpr) = f_fpr;
+  DECL_CHAIN (f_fpr) = f_res;
+  DECL_CHAIN (f_res) = f_ovf;
+  DECL_CHAIN (f_ovf) = f_sav;
+
+  layout_type (record);
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Implement va_start.  */
+
+static void
+rs6000_va_start (tree valist, rtx nextarg)
+{
+  HOST_WIDE_INT words, n_gpr, n_fpr;
+  tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+
+  /* Only SVR4 needs something special.  */
+  if (DEFAULT_ABI != ABI_V4)
+    {
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_res = DECL_CHAIN (f_fpr);
+  f_ovf = DECL_CHAIN (f_res);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_simple_mem_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+		f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+		f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+		f_sav, NULL_TREE);
+
+  /* Count number of gp and fp argument registers used.  */
+  words = crtl->args.info.words;
+  n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
+	       GP_ARG_NUM_REG);
+  n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
+	       FP_ARG_NUM_REG);
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
+	     HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
+	     words, n_gpr, n_fpr);
+
+  if (cfun->va_list_gpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+		  build_int_cst (NULL_TREE, n_gpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  if (cfun->va_list_fpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+		  build_int_cst (NULL_TREE, n_fpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+      if (call_ABI_of_interest (cfun->decl))
+	rs6000_passes_float = true;
+#endif
+    }
+
+  /* Find the overflow area.  */
+  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+  if (words != 0)
+    t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* If there were no va_arg invocations, don't set up the register
+     save area.  */
+  if (!cfun->va_list_gpr_size
+      && !cfun->va_list_fpr_size
+      && n_gpr < GP_ARG_NUM_REG
+      && n_fpr < FP_ARG_V4_MAX_REG)
+    return;
+
+  /* Find the register save area.  */
+  t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
+  if (cfun->machine->varargs_save_offset)
+    t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Implement va_arg.  */
+
+static tree
+rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+			gimple_seq *post_p)
+{
+  tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, reg, t, u;
+  int size, rsize, n_reg, sav_ofs, sav_scale;
+  tree lab_false, lab_over, addr;
+  int align;
+  tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
+  int regalign = 0;
+  gimple stmt;
+
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
+      return build_va_arg_indirect_ref (t);
+    }
+
+  /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
+     earlier version of gcc, with the property that it always applied alignment
+     adjustments to the va-args (even for zero-sized types).  The cheapest way
+     to deal with this is to replicate the effect of the part of 
+     std_gimplify_va_arg_expr that carries out the align adjust, for the case 
+     of relevance.  
+     We don't need to check for pass-by-reference because of the test above.
+     We can return a simplifed answer, since we know there's no offset to add.  */
+
+  if (((TARGET_MACHO
+        && rs6000_darwin64_abi)
+       || DEFAULT_ABI == ABI_ELFv2
+       || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
+      && integer_zerop (TYPE_SIZE (type)))
+    {
+      unsigned HOST_WIDE_INT align, boundary;
+      tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
+      align = PARM_BOUNDARY / BITS_PER_UNIT;
+      boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
+      if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+	boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+      boundary /= BITS_PER_UNIT;
+      if (boundary > align)
+	{
+	  tree t ;
+	  /* This updates arg ptr by the amount that would be necessary
+	     to align the zero-sized (but not zero-alignment) item.  */
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		      fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
+	  gimplify_and_add (t, pre_p);
+
+	  t = fold_convert (sizetype, valist_tmp);
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_convert (TREE_TYPE (valist),
+				fold_build2 (BIT_AND_EXPR, sizetype, t,
+					     size_int (-boundary))));
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+	  gimplify_and_add (t, pre_p);
+	}
+      /* Since it is zero-sized there's no increment for the item itself. */
+      valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
+      return build_va_arg_indirect_ref (valist_tmp);
+    }
+
+  if (DEFAULT_ABI != ABI_V4)
+    {
+      if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
+	{
+	  tree elem_type = TREE_TYPE (type);
+	  enum machine_mode elem_mode = TYPE_MODE (elem_type);
+	  int elem_size = GET_MODE_SIZE (elem_mode);
+
+	  if (elem_size < UNITS_PER_WORD)
+	    {
+	      tree real_part, imag_part;
+	      gimple_seq post = NULL;
+
+	      real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
+						  &post);
+	      /* Copy the value into a temporary, lest the formal temporary
+		 be reused out from under us.  */
+	      real_part = get_initialized_tmp_var (real_part, pre_p, &post);
+	      gimple_seq_add_seq (pre_p, post);
+
+	      imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
+						  post_p);
+
+	      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+	    }
+	}
+
+      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+    }
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_res = DECL_CHAIN (f_fpr);
+  f_ovf = DECL_CHAIN (f_res);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_va_arg_indirect_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+		f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+		f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+		f_sav, NULL_TREE);
+
+  size = int_size_in_bytes (type);
+  rsize = (size + 3) / 4;
+  align = 1;
+
+  if (TARGET_HARD_FLOAT && TARGET_FPRS
+      && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
+          || (TARGET_DOUBLE_FLOAT 
+              && (TYPE_MODE (type) == DFmode 
+ 	          || TYPE_MODE (type) == TFmode
+	          || TYPE_MODE (type) == SDmode
+	          || TYPE_MODE (type) == DDmode
+	          || TYPE_MODE (type) == TDmode))))
+    {
+      /* FP args go in FP registers, if present.  */
+      reg = fpr;
+      n_reg = (size + 7) / 8;
+      sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
+      sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
+      if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
+	align = 8;
+    }
+  else
+    {
+      /* Otherwise into GP registers.  */
+      reg = gpr;
+      n_reg = rsize;
+      sav_ofs = 0;
+      sav_scale = 4;
+      if (n_reg == 2)
+	align = 8;
+    }
+
+  /* Pull the value out of the saved registers....  */
+
+  lab_over = NULL;
+  addr = create_tmp_var (ptr_type_node, "addr");
+
+  /*  AltiVec vectors never go in registers when -mabi=altivec.  */
+  if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
+    align = 16;
+  else
+    {
+      lab_false = create_artificial_label (input_location);
+      lab_over = create_artificial_label (input_location);
+
+      /* Long long and SPE vectors are aligned in the registers.
+	 As are any other 2 gpr item such as complex int due to a
+	 historical mistake.  */
+      u = reg;
+      if (n_reg == 2 && reg == gpr)
+	{
+	  regalign = 1;
+	  u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
+		     build_int_cst (TREE_TYPE (reg), n_reg - 1));
+	  u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
+		      unshare_expr (reg), u);
+	}
+      /* _Decimal128 is passed in even/odd fpr pairs; the stored
+	 reg number is 0 for f1, so we want to make it odd.  */
+      else if (reg == fpr && TYPE_MODE (type) == TDmode)
+	{
+	  t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
+		      build_int_cst (TREE_TYPE (reg), 1));
+	  u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
+	}
+
+      t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
+      t = build2 (GE_EXPR, boolean_type_node, u, t);
+      u = build1 (GOTO_EXPR, void_type_node, lab_false);
+      t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      t = sav;
+      if (sav_ofs)
+	t = fold_build_pointer_plus_hwi (sav, sav_ofs);
+
+      u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
+		  build_int_cst (TREE_TYPE (reg), n_reg));
+      u = fold_convert (sizetype, u);
+      u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
+      t = fold_build_pointer_plus (t, u);
+
+      /* _Decimal32 varargs are located in the second word of the 64-bit
+	 FP register for 32-bit binaries.  */
+      if (!TARGET_POWERPC64
+	  && TARGET_HARD_FLOAT && TARGET_FPRS
+	  && TYPE_MODE (type) == SDmode)
+	t = fold_build_pointer_plus_hwi (t, size);
+
+      gimplify_assign (addr, t, pre_p);
+
+      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+      stmt = gimple_build_label (lab_false);
+      gimple_seq_add_stmt (pre_p, stmt);
+
+      if ((n_reg == 2 && !regalign) || n_reg > 2)
+	{
+	  /* Ensure that we don't find any more args in regs.
+	     Alignment has taken care of for special cases.  */
+	  gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
+	}
+    }
+
+  /* ... otherwise out of the overflow area.  */
+
+  /* Care for on-stack alignment if needed.  */
+  t = ovf;
+  if (align != 1)
+    {
+      t = fold_build_pointer_plus_hwi (t, align - 1);
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  build_int_cst (TREE_TYPE (t), -align));
+    }
+  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+  gimplify_assign (unshare_expr (addr), t, pre_p);
+
+  t = fold_build_pointer_plus_hwi (t, size);
+  gimplify_assign (unshare_expr (ovf), t, pre_p);
+
+  if (lab_over)
+    {
+      stmt = gimple_build_label (lab_over);
+      gimple_seq_add_stmt (pre_p, stmt);
+    }
+
+  if (STRICT_ALIGNMENT
+      && (TYPE_ALIGN (type)
+	  > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
+    {
+      /* The value (of type complex double, for example) may not be
+	 aligned in memory in the saved registers, so copy via a
+	 temporary.  (This is the same code as used for SPARC.)  */
+      tree tmp = create_tmp_var (type, "va_arg_tmp");
+      tree dest_addr = build_fold_addr_expr (tmp);
+
+      tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
+				   3, dest_addr, addr, size_int (rsize * 4));
+
+      gimplify_and_add (copy, pre_p);
+      addr = dest_addr;
+    }
+
+  addr = fold_convert (ptrtype, addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Builtins.  */
+
+static void
+def_builtin (const char *name, tree type, enum rs6000_builtins code)
+{
+  tree t;
+  unsigned classify = rs6000_builtin_info[(int)code].attr;
+  const char *attr_string = "";
+
+  gcc_assert (name != NULL);
+  gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
+
+  if (rs6000_builtin_decls[(int)code])
+    fatal_error ("internal error: builtin function %s already processed", name);
+
+  rs6000_builtin_decls[(int)code] = t =
+    add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* Set any special attributes.  */
+  if ((classify & RS6000_BTC_CONST) != 0)
+    {
+      /* const function, function only depends on the inputs.  */
+      TREE_READONLY (t) = 1;
+      TREE_NOTHROW (t) = 1;
+      attr_string = ", pure";
+    }
+  else if ((classify & RS6000_BTC_PURE) != 0)
+    {
+      /* pure function, function can read global memory, but does not set any
+	 external state.  */
+      DECL_PURE_P (t) = 1;
+      TREE_NOTHROW (t) = 1;
+      attr_string = ", const";
+    }
+  else if ((classify & RS6000_BTC_FP) != 0)
+    {
+      /* Function is a math function.  If rounding mode is on, then treat the
+	 function as not reading global memory, but it can have arbitrary side
+	 effects.  If it is off, then assume the function is a const function.
+	 This mimics the ATTR_MATHFN_FPROUNDING attribute in
+	 builtin-attribute.def that is used for the math functions. */
+      TREE_NOTHROW (t) = 1;
+      if (flag_rounding_math)
+	{
+	  DECL_PURE_P (t) = 1;
+	  DECL_IS_NOVOPS (t) = 1;
+	  attr_string = ", fp, pure";
+	}
+      else
+	{
+	  TREE_READONLY (t) = 1;
+	  attr_string = ", fp, const";
+	}
+    }
+  else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
+    gcc_unreachable ();
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
+	     (int)code, name, attr_string);
+}
+
+/* Simple ternary operations: VECd = foo (VECa, VECb, VECc).  */
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_3arg[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* DST operations: void foo (void *, const int, const char).  */
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_dst[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* Simple binary operations: VECc = foo (VECa, VECb).  */
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_2arg[] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+/* AltiVec predicates.  */
+
+static const struct builtin_description bdesc_altivec_preds[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* SPE predicates.  */
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_spe_predicates[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* SPE evsel predicates.  */
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_spe_evsel[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* PAIRED predicates.  */
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_paired_preds[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* ABS* operations.  */
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_abs[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* Simple unary operations: VECb = foo (unsigned literal) or VECb =
+   foo (VECa).  */
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_1arg[] =
+{
+#include "rs6000-builtin.def"
+};
+
+/* HTM builtins.  */
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_htm[] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+
+/* Return true if a builtin function is overloaded.  */
+bool
+rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
+{
+  return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
+}
+
+/* Expand an expression EXP that calls a builtin without arguments.  */
+static rtx
+rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
+{
+  rtx pat;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  pat = GEN_FCN (icode) (target);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+
+static rtx
+rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (GET_CODE (op0) != CONST_INT
+      || INTVAL (op0) > 255
+      || INTVAL (op0) < 0)
+    {
+      error ("argument 1 must be an 8-bit field value");
+      return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (op0, op1);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+
+static rtx
+rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  if (icode == CODE_FOR_altivec_vspltisb
+      || icode == CODE_FOR_altivec_vspltish
+      || icode == CODE_FOR_altivec_vspltisw
+      || icode == CODE_FOR_spe_evsplatfi
+      || icode == CODE_FOR_spe_evsplati)
+    {
+      /* Only allow 5-bit *signed* literals.  */
+      if (GET_CODE (op0) != CONST_INT
+	  || INTVAL (op0) > 15
+	  || INTVAL (op0) < -16)
+	{
+	  error ("argument 1 must be a 5-bit signed literal");
+	  return const0_rtx;
+	}
+    }
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch1, scratch2;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  /* If we have invalid arguments, bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  scratch1 = gen_reg_rtx (mode0);
+  scratch2 = gen_reg_rtx (mode0);
+
+  pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (icode == CODE_FOR_altivec_vcfux
+      || icode == CODE_FOR_altivec_vcfsx
+      || icode == CODE_FOR_altivec_vctsxs
+      || icode == CODE_FOR_altivec_vctuxs
+      || icode == CODE_FOR_altivec_vspltb
+      || icode == CODE_FOR_altivec_vsplth
+      || icode == CODE_FOR_altivec_vspltw
+      || icode == CODE_FOR_spe_evaddiw
+      || icode == CODE_FOR_spe_evldd
+      || icode == CODE_FOR_spe_evldh
+      || icode == CODE_FOR_spe_evldw
+      || icode == CODE_FOR_spe_evlhhesplat
+      || icode == CODE_FOR_spe_evlhhossplat
+      || icode == CODE_FOR_spe_evlhhousplat
+      || icode == CODE_FOR_spe_evlwhe
+      || icode == CODE_FOR_spe_evlwhos
+      || icode == CODE_FOR_spe_evlwhou
+      || icode == CODE_FOR_spe_evlwhsplat
+      || icode == CODE_FOR_spe_evlwwsplat
+      || icode == CODE_FOR_spe_evrlwi
+      || icode == CODE_FOR_spe_evslwi
+      || icode == CODE_FOR_spe_evsrwis
+      || icode == CODE_FOR_spe_evsubifw
+      || icode == CODE_FOR_spe_evsrwiu)
+    {
+      /* Only allow 5-bit unsigned literals.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
+	{
+	  error ("argument 2 must be a 5-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch;
+  tree cr6_form = CALL_EXPR_ARG (exp, 0);
+  tree arg0 = CALL_EXPR_ARG (exp, 1);
+  tree arg1 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = SImode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  int cr6_form_int;
+
+  if (TREE_CODE (cr6_form) != INTEGER_CST)
+    {
+      error ("argument 1 of __builtin_altivec_predicate must be a constant");
+      return const0_rtx;
+    }
+  else
+    cr6_form_int = TREE_INT_CST_LOW (cr6_form);
+
+  gcc_assert (mode0 == mode1);
+
+  /* If we have invalid arguments, bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  scratch = gen_reg_rtx (mode0);
+
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  /* The vec_any* and vec_all* predicates use the same opcodes for two
+     different operations, but the bits in CR6 will be different
+     depending on what information we want.  So we have to play tricks
+     with CR6 to get the right bits out.
+
+     If you think this is disgusting, look at the specs for the
+     AltiVec predicates.  */
+
+  switch (cr6_form_int)
+    {
+    case 0:
+      emit_insn (gen_cr6_test_for_zero (target));
+      break;
+    case 1:
+      emit_insn (gen_cr6_test_for_zero_reverse (target));
+      break;
+    case 2:
+      emit_insn (gen_cr6_test_for_lt (target));
+      break;
+    case 3:
+      emit_insn (gen_cr6_test_for_lt_reverse (target));
+      break;
+    default:
+      error ("argument 1 of __builtin_altivec_predicate is out of range");
+      break;
+    }
+
+  return target;
+}
+
+static rtx
+paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = Pmode;
+  enum machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (tmode, op1);
+    }
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Return a constant vector for use as a little-endian permute control vector
+   to reverse the order of elements of the given vector mode.  */
+static rtx
+swap_selector_for_mode (enum machine_mode mode)
+{
+  /* These are little endian vectors, so their elements are reversed
+     from what you would normally expect for a permute control vector.  */
+  unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
+  unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
+  unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
+  unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  unsigned int *swaparray, i;
+  rtx perm[16];
+
+  switch (mode)
+    {
+    case V2DFmode:
+    case V2DImode:
+      swaparray = swap2;
+      break;
+    case V4SFmode:
+    case V4SImode:
+      swaparray = swap4;
+      break;
+    case V8HImode:
+      swaparray = swap8;
+      break;
+    case V16QImode:
+      swaparray = swap16;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  for (i = 0; i < 16; ++i)
+    perm[i] = GEN_INT (swaparray[i]);
+
+  return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
+}
+
+/* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
+   with -maltivec=be specified.  Issue the load followed by an element-reversing
+   permute.  */
+void
+altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+  rtx tmp = gen_reg_rtx (mode);
+  rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
+  rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
+  rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
+  rtx sel = swap_selector_for_mode (mode);
+  rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
+
+  gcc_assert (REG_P (op0));
+  emit_insn (par);
+  emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
+}
+
+/* Generate code for a "stvx" or "stvxl" built-in for a little endian target
+   with -maltivec=be specified.  Issue the store preceded by an element-reversing
+   permute.  */
+void
+altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+  rtx tmp = gen_reg_rtx (mode);
+  rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
+  rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
+  rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
+  rtx sel = swap_selector_for_mode (mode);
+  rtx vperm;
+
+  gcc_assert (REG_P (op1));
+  vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
+  emit_insn (par);
+}
+
+/* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
+   specified.  Issue the store preceded by an element-reversing permute.  */
+void
+altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  rtx tmp = gen_reg_rtx (mode);
+  rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
+  rtx sel = swap_selector_for_mode (mode);
+  rtx vperm;
+
+  gcc_assert (REG_P (op1));
+  vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
+  emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
+}
+
+static rtx
+altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = Pmode;
+  enum machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+    }
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+spe_expand_stv_builtin (enum insn_code icode, tree exp)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx pat;
+  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+  /* Invalid arguments.  Bail before doing anything stoopid!  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
+    op0 = copy_to_mode_reg (mode2, op0);
+  if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
+    op1 = copy_to_mode_reg (mode0, op1);
+  if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
+    op2 = copy_to_mode_reg (mode1, op2);
+
+  pat = GEN_FCN (icode) (op1, op2, op0);
+  if (pat)
+    emit_insn (pat);
+  return NULL_RTX;
+}
+
+static rtx
+paired_expand_stv_builtin (enum insn_code icode, tree exp)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx pat, addr;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode1 = Pmode;
+  enum machine_mode mode2 = Pmode;
+
+  /* Invalid arguments.  Bail before doing anything stoopid!  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
+    op0 = copy_to_mode_reg (tmode, op0);
+
+  op2 = copy_to_mode_reg (mode2, op2);
+
+  if (op1 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (tmode, op2);
+    }
+  else
+    {
+      op1 = copy_to_mode_reg (mode1, op1);
+      addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
+    }
+
+  pat = GEN_FCN (icode) (addr, op0);
+  if (pat)
+    emit_insn (pat);
+  return NULL_RTX;
+}
+
+static rtx
+altivec_expand_stv_builtin (enum insn_code icode, tree exp)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx pat, addr;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode smode = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = Pmode;
+  enum machine_mode mode2 = Pmode;
+
+  /* Invalid arguments.  Bail before doing anything stoopid!  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
+    op0 = copy_to_mode_reg (smode, op0);
+
+  op2 = copy_to_mode_reg (mode2, op2);
+
+  if (op1 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (tmode, op2);
+    }
+  else
+    {
+      op1 = copy_to_mode_reg (mode1, op1);
+      addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
+    }
+
+  pat = GEN_FCN (icode) (addr, op0);
+  if (pat)
+    emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Return the appropriate SPR number associated with the given builtin.  */
+static inline HOST_WIDE_INT
+htm_spr_num (enum rs6000_builtins code)
+{
+  if (code == HTM_BUILTIN_GET_TFHAR
+      || code == HTM_BUILTIN_SET_TFHAR)
+    return TFHAR_SPR;
+  else if (code == HTM_BUILTIN_GET_TFIAR
+	   || code == HTM_BUILTIN_SET_TFIAR)
+    return TFIAR_SPR;
+  else if (code == HTM_BUILTIN_GET_TEXASR
+	   || code == HTM_BUILTIN_SET_TEXASR)
+    return TEXASR_SPR;
+  gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
+	      || code == HTM_BUILTIN_SET_TEXASRU);
+  return TEXASRU_SPR;
+}
+
+/* Return the appropriate SPR regno associated with the given builtin.  */
+static inline HOST_WIDE_INT
+htm_spr_regno (enum rs6000_builtins code)
+{
+  if (code == HTM_BUILTIN_GET_TFHAR
+      || code == HTM_BUILTIN_SET_TFHAR)
+    return TFHAR_REGNO;
+  else if (code == HTM_BUILTIN_GET_TFIAR
+	   || code == HTM_BUILTIN_SET_TFIAR)
+    return TFIAR_REGNO;
+  gcc_assert (code == HTM_BUILTIN_GET_TEXASR
+	      || code == HTM_BUILTIN_SET_TEXASR
+	      || code == HTM_BUILTIN_GET_TEXASRU
+	      || code == HTM_BUILTIN_SET_TEXASRU);
+  return TEXASR_REGNO;
+}
+
+/* Return the correct ICODE value depending on whether we are
+   setting or reading the HTM SPRs.  */
+static inline enum insn_code
+rs6000_htm_spr_icode (bool nonvoid)
+{
+  if (nonvoid)
+    return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
+  else
+    return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
+}
+
+/* Expand the HTM builtin in EXP and store the result in TARGET.
+   Store true in *EXPANDEDP if we found a builtin to expand.  */
+static rtx
+htm_expand_builtin (tree exp, rtx target, bool * expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+  enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = false;
+
+  /* Expand the HTM builtins.  */
+  d = bdesc_htm;
+  for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
+    if (d->code == fcode)
+      {
+	rtx op[MAX_HTM_OPERANDS], pat;
+	int nopnds = 0;
+	tree arg;
+	call_expr_arg_iterator iter;
+	unsigned attr = rs6000_builtin_info[fcode].attr;
+	enum insn_code icode = d->icode;
+
+	if (attr & RS6000_BTC_SPR)
+	  icode = rs6000_htm_spr_icode (nonvoid);
+
+	if (nonvoid)
+	  {
+	    enum machine_mode tmode = insn_data[icode].operand[0].mode;
+	    if (!target
+		|| GET_MODE (target) != tmode
+		|| !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	      target = gen_reg_rtx (tmode);
+	    op[nopnds++] = target;
+	  }
+
+	FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+	{
+	  const struct insn_operand_data *insn_op;
+
+	  if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
+	    return NULL_RTX;
+
+	  insn_op = &insn_data[icode].operand[nopnds];
+
+	  op[nopnds] = expand_normal (arg);
+
+	  if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
+	    {
+	      if (!strcmp (insn_op->constraint, "n"))
+		{
+		  int arg_num = (nonvoid) ? nopnds : nopnds + 1;
+		  if (!CONST_INT_P (op[nopnds]))
+		    error ("argument %d must be an unsigned literal", arg_num);
+		  else
+		    error ("argument %d is an unsigned literal that is "
+			   "out of range", arg_num);
+		  return const0_rtx;
+		}
+	      op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
+	    }
+
+	  nopnds++;
+	}
+
+	/* Handle the builtins for extended mnemonics.  These accept
+	   no arguments, but map to builtins that take arguments.  */
+	switch (fcode)
+	  {
+	  case HTM_BUILTIN_TENDALL:  /* Alias for: tend. 1  */
+	  case HTM_BUILTIN_TRESUME:  /* Alias for: tsr. 1  */
+	    op[nopnds++] = GEN_INT (1);
+#ifdef ENABLE_CHECKING
+	    attr |= RS6000_BTC_UNARY;
+#endif
+	    break;
+	  case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0  */
+	    op[nopnds++] = GEN_INT (0);
+#ifdef ENABLE_CHECKING
+	    attr |= RS6000_BTC_UNARY;
+#endif
+	    break;
+	  default:
+	    break;
+	  }
+
+	/* If this builtin accesses SPRs, then pass in the appropriate
+	   SPR number and SPR regno as the last two operands.  */
+	if (attr & RS6000_BTC_SPR)
+	  {
+	    op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
+	    op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
+	  }
+
+#ifdef ENABLE_CHECKING
+	int expected_nopnds = 0;
+	if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
+	  expected_nopnds = 1;
+	else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
+	  expected_nopnds = 2;
+	else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
+	  expected_nopnds = 3;
+	if (!(attr & RS6000_BTC_VOID))
+	  expected_nopnds += 1;
+	if (attr & RS6000_BTC_SPR)
+	  expected_nopnds += 2;
+
+	gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
+#endif
+
+	switch (nopnds)
+	  {
+	  case 1:
+	    pat = GEN_FCN (icode) (op[0]);
+	    break;
+	  case 2:
+	    pat = GEN_FCN (icode) (op[0], op[1]);
+	    break;
+	  case 3:
+	    pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+	    break;
+	  case 4:
+	    pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	if (!pat)
+	  return NULL_RTX;
+	emit_insn (pat);
+
+	*expandedp = true;
+	if (nonvoid)
+	  return target;
+	return const0_rtx;
+      }
+
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  enum machine_mode mode2 = insn_data[icode].operand[3].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  /* Check and prepare argument depending on the instruction code.
+
+     Note that a switch statement instead of the sequence of tests
+     would be incorrect as many of the CODE_FOR values could be
+     CODE_FOR_nothing and that would yield multiple alternatives
+     with identical values.  We'd never reach here at runtime in
+     this case.  */
+  if (icode == CODE_FOR_altivec_vsldoi_v4sf
+      || icode == CODE_FOR_altivec_vsldoi_v4si
+      || icode == CODE_FOR_altivec_vsldoi_v8hi
+      || icode == CODE_FOR_altivec_vsldoi_v16qi)
+    {
+      /* Only allow 4-bit unsigned literals.  */
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0xf)
+	{
+	  error ("argument 3 must be a 4-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_vsx_xxpermdi_v2df
+           || icode == CODE_FOR_vsx_xxpermdi_v2di
+           || icode == CODE_FOR_vsx_xxsldwi_v16qi
+           || icode == CODE_FOR_vsx_xxsldwi_v8hi
+           || icode == CODE_FOR_vsx_xxsldwi_v4si
+           || icode == CODE_FOR_vsx_xxsldwi_v4sf
+           || icode == CODE_FOR_vsx_xxsldwi_v2di
+           || icode == CODE_FOR_vsx_xxsldwi_v2df)
+    {
+      /* Only allow 2-bit unsigned literals.  */
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0x3)
+	{
+	  error ("argument 3 must be a 2-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_vsx_set_v2df
+           || icode == CODE_FOR_vsx_set_v2di)
+    {
+      /* Only allow 1-bit unsigned literals.  */
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0x1)
+	{
+	  error ("argument 3 must be a 1-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_crypto_vshasigmaw
+	   || icode == CODE_FOR_crypto_vshasigmad)
+    {
+      /* Check whether the 2nd and 3rd arguments are integer constants and in
+	 range and prepare arguments.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+	{
+	  error ("argument 2 must be 0 or 1");
+	  return const0_rtx;
+	}
+
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+	{
+	  error ("argument 3 must be in the range 0..15");
+	  return const0_rtx;
+	}
+    }
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+  if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+    op2 = copy_to_mode_reg (mode2, op2);
+
+  if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
+    pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
+  else 
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Expand the lvx builtins.  */
+static rtx
+altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0;
+  enum machine_mode tmode, mode0;
+  rtx pat, op0;
+  enum insn_code icode;
+
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
+      icode = CODE_FOR_vector_altivec_load_v16qi;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
+      icode = CODE_FOR_vector_altivec_load_v8hi;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
+      icode = CODE_FOR_vector_altivec_load_v4si;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
+      icode = CODE_FOR_vector_altivec_load_v4sf;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
+      icode = CODE_FOR_vector_altivec_load_v2df;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
+      icode = CODE_FOR_vector_altivec_load_v2di;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
+      icode = CODE_FOR_vector_altivec_load_v1ti;
+      break;
+    default:
+      *expandedp = false;
+      return NULL_RTX;
+    }
+
+  *expandedp = true;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  op0 = expand_normal (arg0);
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand the stvx builtins.  */
+static rtx
+altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+			   bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1;
+  enum machine_mode mode0, mode1;
+  rtx pat, op0, op1;
+  enum insn_code icode;
+
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
+      icode = CODE_FOR_vector_altivec_store_v16qi;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
+      icode = CODE_FOR_vector_altivec_store_v8hi;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
+      icode = CODE_FOR_vector_altivec_store_v4si;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
+      icode = CODE_FOR_vector_altivec_store_v4sf;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
+      icode = CODE_FOR_vector_altivec_store_v2df;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
+      icode = CODE_FOR_vector_altivec_store_v2di;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
+      icode = CODE_FOR_vector_altivec_store_v1ti;
+      break;
+    default:
+      *expandedp = false;
+      return NULL_RTX;
+    }
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  op0 = expand_normal (arg0);
+  op1 = expand_normal (arg1);
+  mode0 = insn_data[icode].operand[0].mode;
+  mode1 = insn_data[icode].operand[1].mode;
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (op0, op1);
+  if (pat)
+    emit_insn (pat);
+
+  *expandedp = true;
+  return NULL_RTX;
+}
+
+/* Expand the dst builtins.  */
+static rtx
+altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+			    bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1, arg2;
+  enum machine_mode mode0, mode1;
+  rtx pat, op0, op1, op2;
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = false;
+
+  /* Handle DST variants.  */
+  d = bdesc_dst;
+  for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
+    if (d->code == fcode)
+      {
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	arg2 = CALL_EXPR_ARG (exp, 2);
+	op0 = expand_normal (arg0);
+	op1 = expand_normal (arg1);
+	op2 = expand_normal (arg2);
+	mode0 = insn_data[d->icode].operand[0].mode;
+	mode1 = insn_data[d->icode].operand[1].mode;
+
+	/* Invalid arguments, bail out before generating bad rtl.  */
+	if (arg0 == error_mark_node
+	    || arg1 == error_mark_node
+	    || arg2 == error_mark_node)
+	  return const0_rtx;
+
+	*expandedp = true;
+	STRIP_NOPS (arg2);
+	if (TREE_CODE (arg2) != INTEGER_CST
+	    || TREE_INT_CST_LOW (arg2) & ~0x3)
+	  {
+	    error ("argument to %qs must be a 2-bit unsigned literal", d->name);
+	    return const0_rtx;
+	  }
+
+	if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+	  op0 = copy_to_mode_reg (Pmode, op0);
+	if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+	  op1 = copy_to_mode_reg (mode1, op1);
+
+	pat = GEN_FCN (d->icode) (op0, op1, op2);
+	if (pat != 0)
+	  emit_insn (pat);
+
+	return NULL_RTX;
+      }
+
+  return NULL_RTX;
+}
+
+/* Expand vec_init builtin.  */
+static rtx
+altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
+{
+  enum machine_mode tmode = TYPE_MODE (type);
+  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+  int i, n_elt = GET_MODE_NUNITS (tmode);
+
+  gcc_assert (VECTOR_MODE_P (tmode));
+  gcc_assert (n_elt == call_expr_nargs (exp));
+
+  if (!target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* If we have a vector compromised of a single element, such as V1TImode, do
+     the initialization directly.  */
+  if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
+    {
+      rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
+      emit_move_insn (target, gen_lowpart (tmode, x));
+    }
+  else
+    {
+      rtvec v = rtvec_alloc (n_elt);
+
+      for (i = 0; i < n_elt; ++i)
+	{
+	  rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
+	  RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+	}
+
+      rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
+    }
+
+  return target;
+}
+
+/* Return the integer constant in ARG.  Constrain it to be in the range
+   of the subparts of VEC_TYPE; issue an error if not.  */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+  if (!tree_fits_uhwi_p (arg)
+      || (elt = tree_to_uhwi (arg), elt > max))
+    {
+      error ("selector must be an integer constant in the range 0..%wi", max);
+      return 0;
+    }
+
+  return elt;
+}
+
+/* Expand vec_set builtin.  */
+static rtx
+altivec_expand_vec_set_builtin (tree exp)
+{
+  enum machine_mode tmode, mode1;
+  tree arg0, arg1, arg2;
+  int elt;
+  rtx op0, op1;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  tmode = TYPE_MODE (TREE_TYPE (arg0));
+  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  gcc_assert (VECTOR_MODE_P (tmode));
+
+  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+  elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+  op0 = force_reg (tmode, op0);
+  op1 = force_reg (mode1, op1);
+
+  rs6000_expand_vector_set (op0, op1, elt);
+
+  return op0;
+}
+
+/* Expand vec_ext builtin.  */
+static rtx
+altivec_expand_vec_ext_builtin (tree exp, rtx target)
+{
+  enum machine_mode tmode, mode0;
+  tree arg0, arg1;
+  int elt;
+  rtx op0;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+
+  op0 = expand_normal (arg0);
+  elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  mode0 = TYPE_MODE (TREE_TYPE (arg0));
+  gcc_assert (VECTOR_MODE_P (mode0));
+
+  op0 = force_reg (mode0, op0);
+
+  if (optimize || !target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  rs6000_expand_vector_extract (target, op0, elt);
+
+  return target;
+}
+
+/* Expand the builtin in EXP and store the result in TARGET.  Store
+   true in *EXPANDEDP if we found a builtin to expand.  */
+static rtx
+altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
+{
+  const struct builtin_description *d;
+  size_t i;
+  enum insn_code icode;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg0;
+  rtx op0, pat;
+  enum machine_mode tmode, mode0;
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+
+  if (rs6000_overloaded_builtin_p (fcode))
+    {
+      *expandedp = true;
+      error ("unresolved overload for Altivec builtin %qF", fndecl);
+
+      /* Given it is invalid, just generate a normal call.  */
+      return expand_call (exp, target, false);
+    }
+
+  target = altivec_expand_ld_builtin (exp, target, expandedp);
+  if (*expandedp)
+    return target;
+
+  target = altivec_expand_st_builtin (exp, target, expandedp);
+  if (*expandedp)
+    return target;
+
+  target = altivec_expand_dst_builtin (exp, target, expandedp);
+  if (*expandedp)
+    return target;
+
+  *expandedp = true;
+
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_STVX_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
+    case ALTIVEC_BUILTIN_STVX_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
+    case ALTIVEC_BUILTIN_STVX_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
+    case ALTIVEC_BUILTIN_STVX:
+    case ALTIVEC_BUILTIN_STVX_V4SI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
+    case ALTIVEC_BUILTIN_STVX_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
+    case ALTIVEC_BUILTIN_STVX_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
+    case ALTIVEC_BUILTIN_STVEBX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
+    case ALTIVEC_BUILTIN_STVEHX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
+    case ALTIVEC_BUILTIN_STVEWX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
+    case ALTIVEC_BUILTIN_STVXL_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
+    case ALTIVEC_BUILTIN_STVXL_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
+    case ALTIVEC_BUILTIN_STVXL_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
+    case ALTIVEC_BUILTIN_STVXL:
+    case ALTIVEC_BUILTIN_STVXL_V4SI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
+    case ALTIVEC_BUILTIN_STVXL_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
+    case ALTIVEC_BUILTIN_STVXL_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
+
+    case ALTIVEC_BUILTIN_STVLX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
+    case ALTIVEC_BUILTIN_STVLXL:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
+    case ALTIVEC_BUILTIN_STVRX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
+    case ALTIVEC_BUILTIN_STVRXL:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
+
+    case VSX_BUILTIN_STXVD2X_V1TI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
+    case VSX_BUILTIN_STXVD2X_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
+    case VSX_BUILTIN_STXVD2X_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
+    case VSX_BUILTIN_STXVW4X_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
+    case VSX_BUILTIN_STXVW4X_V4SI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
+    case VSX_BUILTIN_STXVW4X_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
+    case VSX_BUILTIN_STXVW4X_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
+
+    case ALTIVEC_BUILTIN_MFVSCR:
+      icode = CODE_FOR_altivec_mfvscr;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+
+      pat = GEN_FCN (icode) (target);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ALTIVEC_BUILTIN_MTVSCR:
+      icode = CODE_FOR_altivec_mtvscr;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      /* If we got invalid arguments bail out before generating bad rtl.  */
+      if (arg0 == error_mark_node)
+	return const0_rtx;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (op0);
+      if (pat)
+	emit_insn (pat);
+      return NULL_RTX;
+
+    case ALTIVEC_BUILTIN_DSSALL:
+      emit_insn (gen_altivec_dssall ());
+      return NULL_RTX;
+
+    case ALTIVEC_BUILTIN_DSS:
+      icode = CODE_FOR_altivec_dss;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      STRIP_NOPS (arg0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      /* If we got invalid arguments bail out before generating bad rtl.  */
+      if (arg0 == error_mark_node)
+	return const0_rtx;
+
+      if (TREE_CODE (arg0) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg0) & ~0x3)
+	{
+	  error ("argument to dss must be a 2-bit unsigned literal");
+	  return const0_rtx;
+	}
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      emit_insn (gen_altivec_dss (op0));
+      return NULL_RTX;
+
+    case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
+    case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
+    case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
+    case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
+    case VSX_BUILTIN_VEC_INIT_V2DF:
+    case VSX_BUILTIN_VEC_INIT_V2DI:
+    case VSX_BUILTIN_VEC_INIT_V1TI:
+      return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
+
+    case ALTIVEC_BUILTIN_VEC_SET_V4SI:
+    case ALTIVEC_BUILTIN_VEC_SET_V8HI:
+    case ALTIVEC_BUILTIN_VEC_SET_V16QI:
+    case ALTIVEC_BUILTIN_VEC_SET_V4SF:
+    case VSX_BUILTIN_VEC_SET_V2DF:
+    case VSX_BUILTIN_VEC_SET_V2DI:
+    case VSX_BUILTIN_VEC_SET_V1TI:
+      return altivec_expand_vec_set_builtin (exp);
+
+    case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
+    case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
+    case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
+    case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
+    case VSX_BUILTIN_VEC_EXT_V2DF:
+    case VSX_BUILTIN_VEC_EXT_V2DI:
+    case VSX_BUILTIN_VEC_EXT_V1TI:
+      return altivec_expand_vec_ext_builtin (exp, target);
+
+    default:
+      break;
+      /* Fall through.  */
+    }
+
+  /* Expand abs* operations.  */
+  d = bdesc_abs;
+  for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
+    if (d->code == fcode)
+      return altivec_expand_abs_builtin (d->icode, exp, target);
+
+  /* Expand the AltiVec predicates.  */
+  d = bdesc_altivec_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
+    if (d->code == fcode)
+      return altivec_expand_predicate_builtin (d->icode, exp, target);
+
+  /* LV* are funky.  We initialized them differently.  */
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_LVSL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVSR:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVEBX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVEHX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVEWX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL:
+    case ALTIVEC_BUILTIN_LVXL_V4SI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX:
+    case ALTIVEC_BUILTIN_LVX_V4SI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVLX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
+					exp, target, true);
+    case ALTIVEC_BUILTIN_LVLXL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
+					exp, target, true);
+    case ALTIVEC_BUILTIN_LVRX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
+					exp, target, true);
+    case ALTIVEC_BUILTIN_LVRXL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
+					exp, target, true);
+    case VSX_BUILTIN_LXVD2X_V1TI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
+					exp, target, false);
+    case VSX_BUILTIN_LXVD2X_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
+					exp, target, false);
+    case VSX_BUILTIN_LXVD2X_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V4SI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
+					exp, target, false);
+      break;
+    default:
+      break;
+      /* Fall through.  */
+    }
+
+  *expandedp = false;
+  return NULL_RTX;
+}
+
+/* Expand the builtin in EXP and store the result in TARGET.  Store
+   true in *EXPANDEDP if we found a builtin to expand.  */
+static rtx
+paired_expand_builtin (tree exp, rtx target, bool * expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = true;
+
+  switch (fcode)
+    {
+    case PAIRED_BUILTIN_STX:
+      return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
+    case PAIRED_BUILTIN_LX:
+      return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
+    default:
+      break;
+      /* Fall through.  */
+    }
+
+  /* Expand the paired predicates.  */
+  d = bdesc_paired_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
+    if (d->code == fcode)
+      return paired_expand_predicate_builtin (d->icode, exp, target);
+
+  *expandedp = false;
+  return NULL_RTX;
+}
+
+/* Binops that need to be initialized manually, but can be expanded
+   automagically by rs6000_expand_binop_builtin.  */
+static const struct builtin_description bdesc_2arg_spe[] =
+{
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
+  { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
+};
+
+/* Expand the builtin in EXP and store the result in TARGET.  Store
+   true in *EXPANDEDP if we found a builtin to expand.
+
+   This expands the SPE builtins that are not simple unary and binary
+   operations.  */
+static rtx
+spe_expand_builtin (tree exp, rtx target, bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg1, arg0;
+  enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode;
+  enum machine_mode tmode, mode0;
+  rtx pat, op0;
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = true;
+
+  /* Syntax check for a 5-bit unsigned immediate.  */
+  switch (fcode)
+    {
+    case SPE_BUILTIN_EVSTDD:
+    case SPE_BUILTIN_EVSTDH:
+    case SPE_BUILTIN_EVSTDW:
+    case SPE_BUILTIN_EVSTWHE:
+    case SPE_BUILTIN_EVSTWHO:
+    case SPE_BUILTIN_EVSTWWE:
+    case SPE_BUILTIN_EVSTWWO:
+      arg1 = CALL_EXPR_ARG (exp, 2);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
+	{
+	  error ("argument 2 must be a 5-bit unsigned literal");
+	  return const0_rtx;
+	}
+      break;
+    default:
+      break;
+    }
+
+  /* The evsplat*i instructions are not quite generic.  */
+  switch (fcode)
+    {
+    case SPE_BUILTIN_EVSPLATFI:
+      return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
+					 exp, target);
+    case SPE_BUILTIN_EVSPLATI:
+      return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
+					 exp, target);
+    default:
+      break;
+    }
+
+  d = bdesc_2arg_spe;
+  for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
+    if (d->code == fcode)
+      return rs6000_expand_binop_builtin (d->icode, exp, target);
+
+  d = bdesc_spe_predicates;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
+    if (d->code == fcode)
+      return spe_expand_predicate_builtin (d->icode, exp, target);
+
+  d = bdesc_spe_evsel;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
+    if (d->code == fcode)
+      return spe_expand_evsel_builtin (d->icode, exp, target);
+
+  switch (fcode)
+    {
+    case SPE_BUILTIN_EVSTDDX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
+    case SPE_BUILTIN_EVSTDHX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
+    case SPE_BUILTIN_EVSTDWX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
+    case SPE_BUILTIN_EVSTWHEX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
+    case SPE_BUILTIN_EVSTWHOX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
+    case SPE_BUILTIN_EVSTWWEX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
+    case SPE_BUILTIN_EVSTWWOX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
+    case SPE_BUILTIN_EVSTDD:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
+    case SPE_BUILTIN_EVSTDH:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
+    case SPE_BUILTIN_EVSTDW:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
+    case SPE_BUILTIN_EVSTWHE:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
+    case SPE_BUILTIN_EVSTWHO:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
+    case SPE_BUILTIN_EVSTWWE:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
+    case SPE_BUILTIN_EVSTWWO:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
+    case SPE_BUILTIN_MFSPEFSCR:
+      icode = CODE_FOR_spe_mfspefscr;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+
+      pat = GEN_FCN (icode) (target);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+    case SPE_BUILTIN_MTSPEFSCR:
+      icode = CODE_FOR_spe_mtspefscr;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      if (arg0 == error_mark_node)
+	return const0_rtx;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (op0);
+      if (pat)
+	emit_insn (pat);
+      return NULL_RTX;
+    default:
+      break;
+    }
+
+  *expandedp = false;
+  return NULL_RTX;
+}
+
+static rtx
+paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch, tmp;
+  tree form = CALL_EXPR_ARG (exp, 0);
+  tree arg0 = CALL_EXPR_ARG (exp, 1);
+  tree arg1 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  int form_int;
+  enum rtx_code code;
+
+  if (TREE_CODE (form) != INTEGER_CST)
+    {
+      error ("argument 1 of __builtin_paired_predicate must be a constant");
+      return const0_rtx;
+    }
+  else
+    form_int = TREE_INT_CST_LOW (form);
+
+  gcc_assert (mode0 == mode1);
+
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != SImode
+      || !(*insn_data[icode].operand[0].predicate) (target, SImode))
+    target = gen_reg_rtx (SImode);
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  scratch = gen_reg_rtx (CCFPmode);
+
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (!pat)
+    return const0_rtx;
+
+  emit_insn (pat);
+
+  switch (form_int)
+    {
+      /* LT bit.  */
+    case 0:
+      code = LT;
+      break;
+      /* GT bit.  */
+    case 1:
+      code = GT;
+      break;
+      /* EQ bit.  */
+    case 2:
+      code = EQ;
+      break;
+      /* UN bit.  */
+    case 3:
+      emit_insn (gen_move_from_CR_ov_bit (target, scratch));
+      return target;
+    default:
+      error ("argument 1 of __builtin_paired_predicate is out of range");
+      return const0_rtx;
+    }
+
+  tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
+  emit_move_insn (target, tmp);
+  return target;
+}
+
+static rtx
+spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch, tmp;
+  tree form = CALL_EXPR_ARG (exp, 0);
+  tree arg0 = CALL_EXPR_ARG (exp, 1);
+  tree arg1 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  int form_int;
+  enum rtx_code code;
+
+  if (TREE_CODE (form) != INTEGER_CST)
+    {
+      error ("argument 1 of __builtin_spe_predicate must be a constant");
+      return const0_rtx;
+    }
+  else
+    form_int = TREE_INT_CST_LOW (form);
+
+  gcc_assert (mode0 == mode1);
+
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != SImode
+      || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
+    target = gen_reg_rtx (SImode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  scratch = gen_reg_rtx (CCmode);
+
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  /* There are 4 variants for each predicate: _any_, _all_, _upper_,
+     _lower_.  We use one compare, but look in different bits of the
+     CR for each variant.
+
+     There are 2 elements in each SPE simd type (upper/lower).  The CR
+     bits are set as follows:
+
+     BIT0  | BIT 1  | BIT 2   | BIT 3
+     U     |   L    | (U | L) | (U & L)
+
+     So, for an "all" relationship, BIT 3 would be set.
+     For an "any" relationship, BIT 2 would be set.  Etc.
+
+     Following traditional nomenclature, these bits map to:
+
+     BIT0  | BIT 1  | BIT 2   | BIT 3
+     LT    | GT     | EQ      | OV
+
+     Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
+  */
+
+  switch (form_int)
+    {
+      /* All variant.  OV bit.  */
+    case 0:
+      /* We need to get to the OV bit, which is the ORDERED bit.  We
+	 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
+	 that's ugly and will make validate_condition_mode die.
+	 So let's just use another pattern.  */
+      emit_insn (gen_move_from_CR_ov_bit (target, scratch));
+      return target;
+      /* Any variant.  EQ bit.  */
+    case 1:
+      code = EQ;
+      break;
+      /* Upper variant.  LT bit.  */
+    case 2:
+      code = LT;
+      break;
+      /* Lower variant.  GT bit.  */
+    case 3:
+      code = GT;
+      break;
+    default:
+      error ("argument 1 of __builtin_spe_predicate is out of range");
+      return const0_rtx;
+    }
+
+  tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
+  emit_move_insn (target, tmp);
+
+  return target;
+}
+
+/* The evsel builtins look like this:
+
+     e = __builtin_spe_evsel_OP (a, b, c, d);
+
+   and work like this:
+
+     e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
+     e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
+*/
+
+static rtx
+spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  tree arg3 = CALL_EXPR_ARG (exp, 3);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx op3 = expand_normal (arg3);
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  gcc_assert (mode0 == mode1);
+
+  if (arg0 == error_mark_node || arg1 == error_mark_node
+      || arg2 == error_mark_node || arg3 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != mode0
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
+    target = gen_reg_rtx (mode0);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode0, op1);
+  if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
+    op2 = copy_to_mode_reg (mode0, op2);
+  if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
+    op3 = copy_to_mode_reg (mode0, op3);
+
+  /* Generate the compare.  */
+  scratch = gen_reg_rtx (CCmode);
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  if (mode0 == V2SImode)
+    emit_insn (gen_spe_evsel (target, op2, op3, scratch));
+  else
+    emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
+
+  return target;
+}
+
+/* Raise an error message for a builtin function that is called without the
+   appropriate target options being set.  */
+
+static void
+rs6000_invalid_builtin (enum rs6000_builtins fncode)
+{
+  size_t uns_fncode = (size_t)fncode;
+  const char *name = rs6000_builtin_info[uns_fncode].name;
+  HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
+
+  gcc_assert (name != NULL);
+  if ((fnmask & RS6000_BTM_CELL) != 0)
+    error ("Builtin function %s is only valid for the cell processor", name);
+  else if ((fnmask & RS6000_BTM_VSX) != 0)
+    error ("Builtin function %s requires the -mvsx option", name);
+  else if ((fnmask & RS6000_BTM_HTM) != 0)
+    error ("Builtin function %s requires the -mhtm option", name);
+  else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
+    error ("Builtin function %s requires the -maltivec option", name);
+  else if ((fnmask & RS6000_BTM_PAIRED) != 0)
+    error ("Builtin function %s requires the -mpaired option", name);
+  else if ((fnmask & RS6000_BTM_SPE) != 0)
+    error ("Builtin function %s requires the -mspe option", name);
+  else
+    error ("Builtin function %s is not supported with the current options",
+	   name);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
+  size_t uns_fcode = (size_t)fcode;
+  const struct builtin_description *d;
+  size_t i;
+  rtx ret;
+  bool success;
+  HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+  bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
+
+  if (TARGET_DEBUG_BUILTIN)
+    {
+      enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+      const char *name1 = rs6000_builtin_info[uns_fcode].name;
+      const char *name2 = ((icode != CODE_FOR_nothing)
+			   ? get_insn_name ((int)icode)
+			   : "nothing");
+      const char *name3;
+
+      switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
+	{
+	default:		   name3 = "unknown";	break;
+	case RS6000_BTC_SPECIAL:   name3 = "special";	break;
+	case RS6000_BTC_UNARY:	   name3 = "unary";	break;
+	case RS6000_BTC_BINARY:	   name3 = "binary";	break;
+	case RS6000_BTC_TERNARY:   name3 = "ternary";	break;
+	case RS6000_BTC_PREDICATE: name3 = "predicate";	break;
+	case RS6000_BTC_ABS:	   name3 = "abs";	break;
+	case RS6000_BTC_EVSEL:	   name3 = "evsel";	break;
+	case RS6000_BTC_DST:	   name3 = "dst";	break;
+	}
+
+
+      fprintf (stderr,
+	       "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
+	       (name1) ? name1 : "---", fcode,
+	       (name2) ? name2 : "---", (int)icode,
+	       name3,
+	       func_valid_p ? "" : ", not valid");
+    }	     
+
+  if (!func_valid_p)
+    {
+      rs6000_invalid_builtin (fcode);
+
+      /* Given it is invalid, just generate a normal call.  */
+      return expand_call (exp, target, ignore);
+    }
+
+  switch (fcode)
+    {
+    case RS6000_BUILTIN_RECIP:
+      return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
+
+    case RS6000_BUILTIN_RECIPF:
+      return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+
+    case RS6000_BUILTIN_RSQRTF:
+      return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
+
+    case RS6000_BUILTIN_RSQRT:
+      return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
+
+    case POWER7_BUILTIN_BPERMD:
+      return rs6000_expand_binop_builtin (((TARGET_64BIT)
+					   ? CODE_FOR_bpermd_di
+					   : CODE_FOR_bpermd_si), exp, target);
+
+    case RS6000_BUILTIN_GET_TB:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+					   target);
+
+    case RS6000_BUILTIN_MFTB:
+      return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+					    ? CODE_FOR_rs6000_mftb_di
+					    : CODE_FOR_rs6000_mftb_si),
+					   target);
+
+    case RS6000_BUILTIN_MFFS:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+    case RS6000_BUILTIN_MTFSF:
+      return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+    case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+    case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+      {
+	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
+		     : (int) CODE_FOR_altivec_lvsl);
+	enum machine_mode tmode = insn_data[icode].operand[0].mode;
+	enum machine_mode mode = insn_data[icode].operand[1].mode;
+	tree arg;
+	rtx op, addr, pat;
+
+	gcc_assert (TARGET_ALTIVEC);
+
+	arg = CALL_EXPR_ARG (exp, 0);
+	gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+	op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+	addr = memory_address (mode, op);
+	if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+	  op = addr;
+	else
+	  {
+	    /* For the load case need to negate the address.  */
+	    op = gen_reg_rtx (GET_MODE (addr));
+	    emit_insn (gen_rtx_SET (VOIDmode, op,
+				    gen_rtx_NEG (GET_MODE (addr), addr)));
+	  }
+	op = gen_rtx_MEM (mode, op);
+
+	if (target == 0
+	    || GET_MODE (target) != tmode
+	    || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	  target = gen_reg_rtx (tmode);
+
+	/*pat = gen_altivec_lvsr (target, op);*/
+	pat = GEN_FCN (icode) (target, op);
+	if (!pat)
+	  return 0;
+	emit_insn (pat);
+
+	return target;
+      }
+
+    case ALTIVEC_BUILTIN_VCFUX:
+    case ALTIVEC_BUILTIN_VCFSX:
+    case ALTIVEC_BUILTIN_VCTUXS:
+    case ALTIVEC_BUILTIN_VCTSXS:
+  /* FIXME: There's got to be a nicer way to handle this case than
+     constructing a new CALL_EXPR.  */
+      if (call_expr_nargs (exp) == 1)
+	{
+	  exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+				 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  if (TARGET_ALTIVEC)
+    {
+      ret = altivec_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }
+  if (TARGET_SPE)
+    {
+      ret = spe_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }
+  if (TARGET_PAIRED_FLOAT)
+    {
+      ret = paired_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }  
+  if (TARGET_HTM)
+    {
+      ret = htm_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }  
+
+  gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
+
+  /* Handle simple unary operations.  */
+  d = bdesc_1arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return rs6000_expand_unop_builtin (d->icode, exp, target);
+
+  /* Handle simple binary operations.  */
+  d = bdesc_2arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return rs6000_expand_binop_builtin (d->icode, exp, target);
+
+  /* Handle simple ternary operations.  */
+  d = bdesc_3arg;
+  for (i = 0; i < ARRAY_SIZE  (bdesc_3arg); i++, d++)
+    if (d->code == fcode)
+      return rs6000_expand_ternop_builtin (d->icode, exp, target);
+
+  gcc_unreachable ();
+}
+
+static void
+rs6000_init_builtins (void)
+{
+  tree tdecl;
+  tree ftype;
+  enum machine_mode mode;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
+	     (TARGET_PAIRED_FLOAT) ? ", paired"	 : "",
+	     (TARGET_SPE)	   ? ", spe"	 : "",
+	     (TARGET_ALTIVEC)	   ? ", altivec" : "",
+	     (TARGET_VSX)	   ? ", vsx"	 : "");
+
+  V2SI_type_node = build_vector_type (intSI_type_node, 2);
+  V2SF_type_node = build_vector_type (float_type_node, 2);
+  V2DI_type_node = build_vector_type (intDI_type_node, 2);
+  V2DF_type_node = build_vector_type (double_type_node, 2);
+  V4HI_type_node = build_vector_type (intHI_type_node, 4);
+  V4SI_type_node = build_vector_type (intSI_type_node, 4);
+  V4SF_type_node = build_vector_type (float_type_node, 4);
+  V8HI_type_node = build_vector_type (intHI_type_node, 8);
+  V16QI_type_node = build_vector_type (intQI_type_node, 16);
+
+  unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
+  unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
+  unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
+  unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
+
+  opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
+  opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
+  opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
+  opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
+
+  /* We use V1TI mode as a special container to hold __int128_t items that
+     must live in VSX registers.  */
+  if (intTI_type_node)
+    {
+      V1TI_type_node = build_vector_type (intTI_type_node, 1);
+      unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
+    }
+
+  /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
+     types, especially in C++ land.  Similarly, 'vector pixel' is distinct from
+     'vector unsigned short'.  */
+
+  bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
+  bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
+  bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
+  bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
+  pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
+
+  long_integer_type_internal_node = long_integer_type_node;
+  long_unsigned_type_internal_node = long_unsigned_type_node;
+  long_long_integer_type_internal_node = long_long_integer_type_node;
+  long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
+  intQI_type_internal_node = intQI_type_node;
+  uintQI_type_internal_node = unsigned_intQI_type_node;
+  intHI_type_internal_node = intHI_type_node;
+  uintHI_type_internal_node = unsigned_intHI_type_node;
+  intSI_type_internal_node = intSI_type_node;
+  uintSI_type_internal_node = unsigned_intSI_type_node;
+  intDI_type_internal_node = intDI_type_node;
+  uintDI_type_internal_node = unsigned_intDI_type_node;
+  intTI_type_internal_node = intTI_type_node;
+  uintTI_type_internal_node = unsigned_intTI_type_node;
+  float_type_internal_node = float_type_node;
+  double_type_internal_node = double_type_node;
+  void_type_internal_node = void_type_node;
+
+  /* Initialize the modes for builtin_function_type, mapping a machine mode to
+     tree type node.  */
+  builtin_mode_to_type[QImode][0] = integer_type_node;
+  builtin_mode_to_type[HImode][0] = integer_type_node;
+  builtin_mode_to_type[SImode][0] = intSI_type_node;
+  builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
+  builtin_mode_to_type[DImode][0] = intDI_type_node;
+  builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
+  builtin_mode_to_type[TImode][0] = intTI_type_node;
+  builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
+  builtin_mode_to_type[SFmode][0] = float_type_node;
+  builtin_mode_to_type[DFmode][0] = double_type_node;
+  builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
+  builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
+  builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
+  builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
+  builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
+  builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
+  builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
+  builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
+  builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
+  builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
+  builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
+  builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
+  builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
+  builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
+  builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
+
+  tdecl = add_builtin_type ("__bool char", bool_char_type_node);
+  TYPE_NAME (bool_char_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__bool short", bool_short_type_node);
+  TYPE_NAME (bool_short_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__bool int", bool_int_type_node);
+  TYPE_NAME (bool_int_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__pixel", pixel_type_node);
+  TYPE_NAME (pixel_type_node) = tdecl;
+
+  bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
+  bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
+  bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
+  bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
+  pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
+
+  tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
+  TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
+  TYPE_NAME (V16QI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
+  TYPE_NAME ( bool_V16QI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
+  TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
+  TYPE_NAME (V8HI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
+  TYPE_NAME (bool_V8HI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
+  TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
+  TYPE_NAME (V4SI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
+  TYPE_NAME (bool_V4SI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector float", V4SF_type_node);
+  TYPE_NAME (V4SF_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
+  TYPE_NAME (pixel_V8HI_type_node) = tdecl;
+
+  tdecl = add_builtin_type ("__vector double", V2DF_type_node);
+  TYPE_NAME (V2DF_type_node) = tdecl;
+
+  if (TARGET_POWERPC64)
+    {
+      tdecl = add_builtin_type ("__vector long", V2DI_type_node);
+      TYPE_NAME (V2DI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector unsigned long",
+				unsigned_V2DI_type_node);
+      TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
+      TYPE_NAME (bool_V2DI_type_node) = tdecl;
+    }
+  else
+    {
+      tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
+      TYPE_NAME (V2DI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector unsigned long long",
+				unsigned_V2DI_type_node);
+      TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector __bool long long",
+				bool_V2DI_type_node);
+      TYPE_NAME (bool_V2DI_type_node) = tdecl;
+    }
+
+  if (V1TI_type_node)
+    {
+      tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
+      TYPE_NAME (V1TI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector unsigned __int128",
+				unsigned_V1TI_type_node);
+      TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
+    }
+
+  /* Paired and SPE builtins are only available if you build a compiler with
+     the appropriate options, so only create those builtins with the
+     appropriate compiler option.  Create Altivec and VSX builtins on machines
+     with at least the general purpose extensions (970 and newer) to allow the
+     use of the target attribute.  */
+  if (TARGET_PAIRED_FLOAT)
+    paired_init_builtins ();
+  if (TARGET_SPE)
+    spe_init_builtins ();
+  if (TARGET_EXTRA_BUILTINS)
+    altivec_init_builtins ();
+  if (TARGET_HTM)
+    htm_init_builtins ();
+
+  if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
+    rs6000_common_init_builtins ();
+
+  ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
+				 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
+  def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
+
+  ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
+				 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
+  def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
+
+  ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
+				 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
+  def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
+
+  ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
+				 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
+  def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
+
+  mode = (TARGET_64BIT) ? DImode : SImode;
+  ftype = builtin_function_type (mode, mode, mode, VOIDmode,
+				 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
+  def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
+
+  ftype = build_function_type_list (unsigned_intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
+
+  if (TARGET_64BIT)
+    ftype = build_function_type_list (unsigned_intDI_type_node,
+				      NULL_TREE);
+  else
+    ftype = build_function_type_list (unsigned_intSI_type_node,
+				      NULL_TREE);
+  def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
+
+  ftype = build_function_type_list (double_type_node, NULL_TREE);
+  def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node, double_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
+
+#if TARGET_XCOFF
+  /* AIX libm provides clog as __clog.  */
+  if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
+    set_user_assembler_name (tdecl, "__clog");
+#endif
+
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
+}
+
+/* Returns the rs6000 builtin decl for CODE.  */
+
+static tree
+rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT fnmask;
+
+  if (code >= RS6000_BUILTIN_COUNT)
+    return error_mark_node;
+
+  fnmask = rs6000_builtin_info[code].mask;
+  if ((fnmask & rs6000_builtin_mask) != fnmask)
+    {
+      rs6000_invalid_builtin ((enum rs6000_builtins)code);
+      return error_mark_node;
+    }
+
+  return rs6000_builtin_decls[code];
+}
+
+static void
+spe_init_builtins (void)
+{
+  tree puint_type_node = build_pointer_type (unsigned_type_node);
+  tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
+  const struct builtin_description *d;
+  size_t i;
+
+  tree v2si_ftype_4_v2si
+    = build_function_type_list (opaque_V2SI_type_node,
+                                opaque_V2SI_type_node,
+                                opaque_V2SI_type_node,
+                                opaque_V2SI_type_node,
+                                opaque_V2SI_type_node,
+                                NULL_TREE);
+
+  tree v2sf_ftype_4_v2sf
+    = build_function_type_list (opaque_V2SF_type_node,
+                                opaque_V2SF_type_node,
+                                opaque_V2SF_type_node,
+                                opaque_V2SF_type_node,
+                                opaque_V2SF_type_node,
+                                NULL_TREE);
+
+  tree int_ftype_int_v2si_v2si
+    = build_function_type_list (integer_type_node,
+                                integer_type_node,
+                                opaque_V2SI_type_node,
+                                opaque_V2SI_type_node,
+                                NULL_TREE);
+
+  tree int_ftype_int_v2sf_v2sf
+    = build_function_type_list (integer_type_node,
+                                integer_type_node,
+                                opaque_V2SF_type_node,
+                                opaque_V2SF_type_node,
+                                NULL_TREE);
+
+  tree void_ftype_v2si_puint_int
+    = build_function_type_list (void_type_node,
+                                opaque_V2SI_type_node,
+                                puint_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  tree void_ftype_v2si_puint_char
+    = build_function_type_list (void_type_node,
+                                opaque_V2SI_type_node,
+                                puint_type_node,
+                                char_type_node,
+                                NULL_TREE);
+
+  tree void_ftype_v2si_pv2si_int
+    = build_function_type_list (void_type_node,
+                                opaque_V2SI_type_node,
+                                opaque_p_V2SI_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  tree void_ftype_v2si_pv2si_char
+    = build_function_type_list (void_type_node,
+                                opaque_V2SI_type_node,
+                                opaque_p_V2SI_type_node,
+                                char_type_node,
+                                NULL_TREE);
+
+  tree void_ftype_int
+    = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
+
+  tree int_ftype_void
+    = build_function_type_list (integer_type_node, NULL_TREE);
+
+  tree v2si_ftype_pv2si_int
+    = build_function_type_list (opaque_V2SI_type_node,
+                                opaque_p_V2SI_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  tree v2si_ftype_puint_int
+    = build_function_type_list (opaque_V2SI_type_node,
+                                puint_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  tree v2si_ftype_pushort_int
+    = build_function_type_list (opaque_V2SI_type_node,
+                                pushort_type_node,
+                                integer_type_node,
+                                NULL_TREE);
+
+  tree v2si_ftype_signed_char
+    = build_function_type_list (opaque_V2SI_type_node,
+                                signed_char_type_node,
+                                NULL_TREE);
+
+  add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
+
+  /* Initialize irregular SPE builtins.  */
+
+  def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
+  def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
+  def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
+  def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
+  def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
+  def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
+  def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
+  def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
+  def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
+  def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
+  def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
+  def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
+  def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
+  def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
+  def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
+  def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
+  def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
+  def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
+
+  /* Loads.  */
+  def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
+  def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
+  def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
+  def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
+  def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
+  def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
+  def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
+  def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
+  def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
+  def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
+  def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
+  def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
+  def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
+  def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
+  def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
+  def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
+  def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
+  def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
+  def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
+  def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
+  def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
+  def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
+
+  /* Predicates.  */
+  d = bdesc_spe_predicates;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
+    {
+      tree type;
+
+      switch (insn_data[d->icode].operand[1].mode)
+	{
+	case V2SImode:
+	  type = int_ftype_int_v2si_v2si;
+	  break;
+	case V2SFmode:
+	  type = int_ftype_int_v2sf_v2sf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+
+  /* Evsel predicates.  */
+  d = bdesc_spe_evsel;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
+    {
+      tree type;
+
+      switch (insn_data[d->icode].operand[1].mode)
+	{
+	case V2SImode:
+	  type = v2si_ftype_4_v2si;
+	  break;
+	case V2SFmode:
+	  type = v2sf_ftype_4_v2sf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+}
+
+static void
+paired_init_builtins (void)
+{
+  const struct builtin_description *d;
+  size_t i;
+
+   tree int_ftype_int_v2sf_v2sf
+    = build_function_type_list (integer_type_node,
+                                integer_type_node,
+                                V2SF_type_node,
+                                V2SF_type_node,
+                                NULL_TREE);
+  tree pcfloat_type_node =
+    build_pointer_type (build_qualified_type
+			(float_type_node, TYPE_QUAL_CONST));
+
+  tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
+							   long_integer_type_node,
+							   pcfloat_type_node,
+							   NULL_TREE);
+  tree void_ftype_v2sf_long_pcfloat =
+    build_function_type_list (void_type_node,
+			      V2SF_type_node,
+			      long_integer_type_node,
+			      pcfloat_type_node,
+			      NULL_TREE);
+
+
+  def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
+	       PAIRED_BUILTIN_LX);
+
+
+  def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
+	       PAIRED_BUILTIN_STX);
+
+  /* Predicates.  */
+  d = bdesc_paired_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
+    {
+      tree type;
+
+      if (TARGET_DEBUG_BUILTIN)
+	fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
+		 (int)i, get_insn_name (d->icode), (int)d->icode,
+		 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
+
+      switch (insn_data[d->icode].operand[1].mode)
+	{
+	case V2SFmode:
+	  type = int_ftype_int_v2sf_v2sf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+}
+
+static void
+altivec_init_builtins (void)
+{
+  const struct builtin_description *d;
+  size_t i;
+  tree ftype;
+  tree decl;
+
+  tree pvoid_type_node = build_pointer_type (void_type_node);
+
+  tree pcvoid_type_node
+    = build_pointer_type (build_qualified_type (void_type_node,
+						TYPE_QUAL_CONST));
+
+  tree int_ftype_opaque
+    = build_function_type_list (integer_type_node,
+				opaque_V4SI_type_node, NULL_TREE);
+  tree opaque_ftype_opaque
+    = build_function_type_list (integer_type_node, NULL_TREE);
+  tree opaque_ftype_opaque_int
+    = build_function_type_list (opaque_V4SI_type_node,
+				opaque_V4SI_type_node, integer_type_node, NULL_TREE);
+  tree opaque_ftype_opaque_opaque_int
+    = build_function_type_list (opaque_V4SI_type_node,
+				opaque_V4SI_type_node, opaque_V4SI_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_int_opaque_opaque
+    = build_function_type_list (integer_type_node,
+                                integer_type_node, opaque_V4SI_type_node,
+                                opaque_V4SI_type_node, NULL_TREE);
+  tree int_ftype_int_v4si_v4si
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V4SI_type_node,
+				V4SI_type_node, NULL_TREE);
+  tree int_ftype_int_v2di_v2di
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V2DI_type_node,
+				V2DI_type_node, NULL_TREE);
+  tree void_ftype_v4si
+    = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
+  tree v8hi_ftype_void
+    = build_function_type_list (V8HI_type_node, NULL_TREE);
+  tree void_ftype_void
+    = build_function_type_list (void_type_node, NULL_TREE);
+  tree void_ftype_int
+    = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
+
+  tree opaque_ftype_long_pcvoid
+    = build_function_type_list (opaque_V4SI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v16qi_ftype_long_pcvoid
+    = build_function_type_list (V16QI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v8hi_ftype_long_pcvoid
+    = build_function_type_list (V8HI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v4si_ftype_long_pcvoid
+    = build_function_type_list (V4SI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v4sf_ftype_long_pcvoid
+    = build_function_type_list (V4SF_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v2df_ftype_long_pcvoid
+    = build_function_type_list (V2DF_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v2di_ftype_long_pcvoid
+    = build_function_type_list (V2DI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+
+  tree void_ftype_opaque_long_pvoid
+    = build_function_type_list (void_type_node,
+				opaque_V4SI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v4si_long_pvoid
+    = build_function_type_list (void_type_node,
+				V4SI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v16qi_long_pvoid
+    = build_function_type_list (void_type_node,
+				V16QI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v8hi_long_pvoid
+    = build_function_type_list (void_type_node,
+				V8HI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v4sf_long_pvoid
+    = build_function_type_list (void_type_node,
+				V4SF_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v2df_long_pvoid
+    = build_function_type_list (void_type_node,
+				V2DF_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v2di_long_pvoid
+    = build_function_type_list (void_type_node,
+				V2DI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree int_ftype_int_v8hi_v8hi
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V8HI_type_node,
+				V8HI_type_node, NULL_TREE);
+  tree int_ftype_int_v16qi_v16qi
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V16QI_type_node,
+				V16QI_type_node, NULL_TREE);
+  tree int_ftype_int_v4sf_v4sf
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V4SF_type_node,
+				V4SF_type_node, NULL_TREE);
+  tree int_ftype_int_v2df_v2df
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V2DF_type_node,
+				V2DF_type_node, NULL_TREE);
+  tree v2di_ftype_v2di
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
+  tree v4si_ftype_v4si
+    = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+  tree v8hi_ftype_v8hi
+    = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+  tree v16qi_ftype_v16qi
+    = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+  tree v4sf_ftype_v4sf
+    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+  tree v2df_ftype_v2df
+    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
+  tree void_ftype_pcvoid_int_int
+    = build_function_type_list (void_type_node,
+				pcvoid_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+
+  def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
+  def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
+  def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
+  def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
+  def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
+  def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
+  def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
+  def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
+  def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
+  def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
+  def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V2DF);
+  def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V2DI);
+  def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V4SF);
+  def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V4SI);
+  def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V8HI);
+  def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V16QI);
+  def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
+  def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V2DF);
+  def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V2DI);
+  def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V4SF);
+  def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V4SI);
+  def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V8HI);
+  def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V16QI);
+  def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
+  def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V2DF);
+  def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V2DI);
+  def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V4SF);
+  def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V4SI);
+  def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V8HI);
+  def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V16QI);
+  def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
+  def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
+  def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V2DF);
+  def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V2DI);
+  def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V4SF);
+  def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V4SI);
+  def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V8HI);
+  def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V16QI);
+  def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
+  def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
+  def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
+  def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
+  def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
+  def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
+  def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
+  def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
+  def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
+  def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
+  def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
+  def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
+  def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
+  def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
+  def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
+  def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
+
+  def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVD2X_V2DF);
+  def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVD2X_V2DI);
+  def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V4SF);
+  def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V4SI);
+  def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V8HI);
+  def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V16QI);
+  def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
+	       VSX_BUILTIN_STXVD2X_V2DF);
+  def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
+	       VSX_BUILTIN_STXVD2X_V2DI);
+  def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
+	       VSX_BUILTIN_STXVW4X_V4SF);
+  def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
+	       VSX_BUILTIN_STXVW4X_V4SI);
+  def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
+	       VSX_BUILTIN_STXVW4X_V8HI);
+  def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
+	       VSX_BUILTIN_STXVW4X_V16QI);
+  def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
+	       VSX_BUILTIN_VEC_LD);
+  def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
+	       VSX_BUILTIN_VEC_ST);
+
+  def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
+  def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
+  def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
+
+  def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
+  def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
+  def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
+  def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
+  def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
+  def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
+  def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
+  def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
+  def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
+  def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
+  def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
+  def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
+
+  /* Cell builtins.  */
+  def_builtin ("__builtin_altivec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
+  def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
+  def_builtin ("__builtin_altivec_lvrx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
+  def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
+
+  def_builtin ("__builtin_vec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
+  def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
+  def_builtin ("__builtin_vec_lvrx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
+  def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
+
+  def_builtin ("__builtin_altivec_stvlx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
+  def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
+  def_builtin ("__builtin_altivec_stvrx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
+  def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
+
+  def_builtin ("__builtin_vec_stvlx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
+  def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
+  def_builtin ("__builtin_vec_stvrx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
+  def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
+
+  /* Add the DST variants.  */
+  d = bdesc_dst;
+  for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
+    def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
+
+  /* Initialize the predicates.  */
+  d = bdesc_altivec_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
+    {
+      enum machine_mode mode1;
+      tree type;
+
+      if (rs6000_overloaded_builtin_p (d->code))
+	mode1 = VOIDmode;
+      else
+	mode1 = insn_data[d->icode].operand[1].mode;
+
+      switch (mode1)
+	{
+	case VOIDmode:
+	  type = int_ftype_int_opaque_opaque;
+	  break;
+	case V2DImode:
+	  type = int_ftype_int_v2di_v2di;
+	  break;
+	case V4SImode:
+	  type = int_ftype_int_v4si_v4si;
+	  break;
+	case V8HImode:
+	  type = int_ftype_int_v8hi_v8hi;
+	  break;
+	case V16QImode:
+	  type = int_ftype_int_v16qi_v16qi;
+	  break;
+	case V4SFmode:
+	  type = int_ftype_int_v4sf_v4sf;
+	  break;
+	case V2DFmode:
+	  type = int_ftype_int_v2df_v2df;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+
+  /* Initialize the abs* operators.  */
+  d = bdesc_abs;
+  for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
+    {
+      enum machine_mode mode0;
+      tree type;
+
+      mode0 = insn_data[d->icode].operand[0].mode;
+
+      switch (mode0)
+	{
+	case V2DImode:
+	  type = v2di_ftype_v2di;
+	  break;
+	case V4SImode:
+	  type = v4si_ftype_v4si;
+	  break;
+	case V8HImode:
+	  type = v8hi_ftype_v8hi;
+	  break;
+	case V16QImode:
+	  type = v16qi_ftype_v16qi;
+	  break;
+	case V4SFmode:
+	  type = v4sf_ftype_v4sf;
+	  break;
+	case V2DFmode:
+	  type = v2df_ftype_v2df;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+
+  /* Initialize target builtin that implements
+     targetm.vectorize.builtin_mask_for_load.  */
+
+  decl = add_builtin_function ("__builtin_altivec_mask_for_load",
+			       v16qi_ftype_long_pcvoid,
+			       ALTIVEC_BUILTIN_MASK_FOR_LOAD,
+			       BUILT_IN_MD, NULL, NULL_TREE);
+  TREE_READONLY (decl) = 1;
+  /* Record the decl. Will be used by rs6000_builtin_mask_for_load.  */
+  altivec_builtin_mask_for_load = decl;
+
+  /* Access to the vec_init patterns.  */
+  ftype = build_function_type_list (V4SI_type_node, integer_type_node,
+				    integer_type_node, integer_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
+
+  ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
+
+  ftype = build_function_type_list (V16QI_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_init_v16qi", ftype,
+	       ALTIVEC_BUILTIN_VEC_INIT_V16QI);
+
+  ftype = build_function_type_list (V4SF_type_node, float_type_node,
+				    float_type_node, float_type_node,
+				    float_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
+
+  /* VSX builtins.  */
+  ftype = build_function_type_list (V2DF_type_node, double_type_node,
+				    double_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
+
+  ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
+				    intDI_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
+
+  /* Access to the vec_set patterns.  */
+  ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+				    intSI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
+
+  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+				    intHI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
+
+  ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+				    intQI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
+
+  ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+				    float_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
+
+  ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
+				    double_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
+
+  ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+				    intDI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
+
+  /* Access to the vec_extract patterns.  */
+  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
+
+  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
+
+  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
+
+  ftype = build_function_type_list (float_type_node, V4SF_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
+
+  ftype = build_function_type_list (double_type_node, V2DF_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
+
+  ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
+
+
+  if (V1TI_type_node)
+    {
+      tree v1ti_ftype_long_pcvoid
+	= build_function_type_list (V1TI_type_node,
+				    long_integer_type_node, pcvoid_type_node,
+				    NULL_TREE);
+      tree void_ftype_v1ti_long_pvoid
+	= build_function_type_list (void_type_node,
+				    V1TI_type_node, long_integer_type_node,
+				    pvoid_type_node, NULL_TREE);
+      def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
+		   VSX_BUILTIN_LXVD2X_V1TI);
+      def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
+		   VSX_BUILTIN_STXVD2X_V1TI);
+      ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
+					NULL_TREE, NULL_TREE);
+      def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
+      ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
+					intTI_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
+      ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
+    }
+
+}
+
+static void
+htm_init_builtins (void)
+{
+  HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
+  const struct builtin_description *d;
+  size_t i;
+
+  d = bdesc_htm;
+  for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
+    {
+      tree op[MAX_HTM_OPERANDS], type;
+      HOST_WIDE_INT mask = d->mask;
+      unsigned attr = rs6000_builtin_info[d->code].attr;
+      bool void_func = (attr & RS6000_BTC_VOID);
+      int attr_args = (attr & RS6000_BTC_TYPE_MASK);
+      int nopnds = 0;
+      tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
+					     : unsigned_type_node;
+
+      if ((mask & builtin_mask) != mask)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
+	  continue;
+	}
+
+      if (d->name == 0)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
+		     (long unsigned) i);
+	  continue;
+	}
+
+      op[nopnds++] = (void_func) ? void_type_node : argtype;
+
+      if (attr_args == RS6000_BTC_UNARY)
+	op[nopnds++] = argtype;
+      else if (attr_args == RS6000_BTC_BINARY)
+	{
+	  op[nopnds++] = argtype;
+	  op[nopnds++] = argtype;
+	}
+      else if (attr_args == RS6000_BTC_TERNARY)
+	{
+	  op[nopnds++] = argtype;
+	  op[nopnds++] = argtype;
+	  op[nopnds++] = argtype;
+	}
+
+      switch (nopnds)
+	{
+	case 1:
+	  type = build_function_type_list (op[0], NULL_TREE);
+	  break;
+	case 2:
+	  type = build_function_type_list (op[0], op[1], NULL_TREE);
+	  break;
+	case 3:
+	  type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
+	  break;
+	case 4:
+	  type = build_function_type_list (op[0], op[1], op[2], op[3],
+					   NULL_TREE);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+}
+
+/* Hash function for builtin functions with up to 3 arguments and a return
+   type.  */
+static unsigned
+builtin_hash_function (const void *hash_entry)
+{
+  unsigned ret = 0;
+  int i;
+  const struct builtin_hash_struct *bh =
+    (const struct builtin_hash_struct *) hash_entry;
+
+  for (i = 0; i < 4; i++)
+    {
+      ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
+      ret = (ret * 2) + bh->uns_p[i];
+    }
+
+  return ret;
+}
+
+/* Compare builtin hash entries H1 and H2 for equivalence.  */
+static int
+builtin_hash_eq (const void *h1, const void *h2)
+{
+  const struct builtin_hash_struct *p1 = (const struct builtin_hash_struct *) h1;
+  const struct builtin_hash_struct *p2 = (const struct builtin_hash_struct *) h2;
+
+  return ((p1->mode[0] == p2->mode[0])
+	  && (p1->mode[1] == p2->mode[1])
+	  && (p1->mode[2] == p2->mode[2])
+	  && (p1->mode[3] == p2->mode[3])
+	  && (p1->uns_p[0] == p2->uns_p[0])
+	  && (p1->uns_p[1] == p2->uns_p[1])
+	  && (p1->uns_p[2] == p2->uns_p[2])
+	  && (p1->uns_p[3] == p2->uns_p[3]));
+}
+
+/* Map types for builtin functions with an explicit return type and up to 3
+   arguments.  Functions with fewer than 3 arguments use VOIDmode as the type
+   of the argument.  */
+static tree
+builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
+		       enum machine_mode mode_arg1, enum machine_mode mode_arg2,
+		       enum rs6000_builtins builtin, const char *name)
+{
+  struct builtin_hash_struct h;
+  struct builtin_hash_struct *h2;
+  void **found;
+  int num_args = 3;
+  int i;
+  tree ret_type = NULL_TREE;
+  tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
+
+  /* Create builtin_hash_table.  */
+  if (builtin_hash_table == NULL)
+    builtin_hash_table = htab_create_ggc (1500, builtin_hash_function,
+					  builtin_hash_eq, NULL);
+
+  h.type = NULL_TREE;
+  h.mode[0] = mode_ret;
+  h.mode[1] = mode_arg0;
+  h.mode[2] = mode_arg1;
+  h.mode[3] = mode_arg2;
+  h.uns_p[0] = 0;
+  h.uns_p[1] = 0;
+  h.uns_p[2] = 0;
+  h.uns_p[3] = 0;
+
+  /* If the builtin is a type that produces unsigned results or takes unsigned
+     arguments, and it is returned as a decl for the vectorizer (such as
+     widening multiplies, permute), make sure the arguments and return value
+     are type correct.  */
+  switch (builtin)
+    {
+      /* unsigned 1 argument functions.  */
+    case CRYPTO_BUILTIN_VSBOX:
+    case P8V_BUILTIN_VGBBD:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      break;
+
+      /* unsigned 2 argument functions.  */
+    case ALTIVEC_BUILTIN_VMULEUB_UNS:
+    case ALTIVEC_BUILTIN_VMULEUH_UNS:
+    case ALTIVEC_BUILTIN_VMULOUB_UNS:
+    case ALTIVEC_BUILTIN_VMULOUH_UNS:
+    case CRYPTO_BUILTIN_VCIPHER:
+    case CRYPTO_BUILTIN_VCIPHERLAST:
+    case CRYPTO_BUILTIN_VNCIPHER:
+    case CRYPTO_BUILTIN_VNCIPHERLAST:
+    case CRYPTO_BUILTIN_VPMSUMB:
+    case CRYPTO_BUILTIN_VPMSUMH:
+    case CRYPTO_BUILTIN_VPMSUMW:
+    case CRYPTO_BUILTIN_VPMSUMD:
+    case CRYPTO_BUILTIN_VPMSUM:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      h.uns_p[2] = 1;
+      break;
+
+      /* unsigned 3 argument functions.  */
+    case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
+    case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
+    case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
+    case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
+    case VSX_BUILTIN_VPERM_16QI_UNS:
+    case VSX_BUILTIN_VPERM_8HI_UNS:
+    case VSX_BUILTIN_VPERM_4SI_UNS:
+    case VSX_BUILTIN_VPERM_2DI_UNS:
+    case VSX_BUILTIN_XXSEL_16QI_UNS:
+    case VSX_BUILTIN_XXSEL_8HI_UNS:
+    case VSX_BUILTIN_XXSEL_4SI_UNS:
+    case VSX_BUILTIN_XXSEL_2DI_UNS:
+    case CRYPTO_BUILTIN_VPERMXOR:
+    case CRYPTO_BUILTIN_VPERMXOR_V2DI:
+    case CRYPTO_BUILTIN_VPERMXOR_V4SI:
+    case CRYPTO_BUILTIN_VPERMXOR_V8HI:
+    case CRYPTO_BUILTIN_VPERMXOR_V16QI:
+    case CRYPTO_BUILTIN_VSHASIGMAW:
+    case CRYPTO_BUILTIN_VSHASIGMAD:
+    case CRYPTO_BUILTIN_VSHASIGMA:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      h.uns_p[2] = 1;
+      h.uns_p[3] = 1;
+      break;
+
+      /* signed permute functions with unsigned char mask.  */
+    case ALTIVEC_BUILTIN_VPERM_16QI:
+    case ALTIVEC_BUILTIN_VPERM_8HI:
+    case ALTIVEC_BUILTIN_VPERM_4SI:
+    case ALTIVEC_BUILTIN_VPERM_4SF:
+    case ALTIVEC_BUILTIN_VPERM_2DI:
+    case ALTIVEC_BUILTIN_VPERM_2DF:
+    case VSX_BUILTIN_VPERM_16QI:
+    case VSX_BUILTIN_VPERM_8HI:
+    case VSX_BUILTIN_VPERM_4SI:
+    case VSX_BUILTIN_VPERM_4SF:
+    case VSX_BUILTIN_VPERM_2DI:
+    case VSX_BUILTIN_VPERM_2DF:
+      h.uns_p[3] = 1;
+      break;
+
+      /* unsigned args, signed return.  */
+    case VSX_BUILTIN_XVCVUXDDP_UNS:
+    case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
+      h.uns_p[1] = 1;
+      break;
+
+      /* signed args, unsigned return.  */
+    case VSX_BUILTIN_XVCVDPUXDS_UNS:
+    case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
+      h.uns_p[0] = 1;
+      break;
+
+    default:
+      break;
+    }
+
+  /* Figure out how many args are present.  */
+  while (num_args > 0 && h.mode[num_args] == VOIDmode)
+    num_args--;
+
+  if (num_args == 0)
+    fatal_error ("internal error: builtin function %s had no type", name);
+
+  ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
+  if (!ret_type && h.uns_p[0])
+    ret_type = builtin_mode_to_type[h.mode[0]][0];
+
+  if (!ret_type)
+    fatal_error ("internal error: builtin function %s had an unexpected "
+		 "return type %s", name, GET_MODE_NAME (h.mode[0]));
+
+  for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
+    arg_type[i] = NULL_TREE;
+
+  for (i = 0; i < num_args; i++)
+    {
+      int m = (int) h.mode[i+1];
+      int uns_p = h.uns_p[i+1];
+
+      arg_type[i] = builtin_mode_to_type[m][uns_p];
+      if (!arg_type[i] && uns_p)
+	arg_type[i] = builtin_mode_to_type[m][0];
+
+      if (!arg_type[i])
+	fatal_error ("internal error: builtin function %s, argument %d "
+		     "had unexpected argument type %s", name, i,
+		     GET_MODE_NAME (m));
+    }
+
+  found = htab_find_slot (builtin_hash_table, &h, INSERT);
+  if (*found == NULL)
+    {
+      h2 = ggc_alloc_builtin_hash_struct ();
+      *h2 = h;
+      *found = (void *)h2;
+
+      h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
+					   arg_type[2], NULL_TREE);
+    }
+
+  return ((struct builtin_hash_struct *)(*found))->type;
+}
+
+static void
+rs6000_common_init_builtins (void)
+{
+  const struct builtin_description *d;
+  size_t i;
+
+  tree opaque_ftype_opaque = NULL_TREE;
+  tree opaque_ftype_opaque_opaque = NULL_TREE;
+  tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
+  tree v2si_ftype_qi = NULL_TREE;
+  tree v2si_ftype_v2si_qi = NULL_TREE;
+  tree v2si_ftype_int_qi = NULL_TREE;
+  HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
+
+  if (!TARGET_PAIRED_FLOAT)
+    {
+      builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
+      builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
+    }
+
+  /* Paired and SPE builtins are only available if you build a compiler with
+     the appropriate options, so only create those builtins with the
+     appropriate compiler option.  Create Altivec and VSX builtins on machines
+     with at least the general purpose extensions (970 and newer) to allow the
+     use of the target attribute..  */
+
+  if (TARGET_EXTRA_BUILTINS)
+    builtin_mask |= RS6000_BTM_COMMON;
+
+  /* Add the ternary operators.  */
+  d = bdesc_3arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+    {
+      tree type;
+      HOST_WIDE_INT mask = d->mask;
+
+      if ((mask & builtin_mask) != mask)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
+	  continue;
+	}
+
+      if (rs6000_overloaded_builtin_p (d->code))
+	{
+	  if (! (type = opaque_ftype_opaque_opaque_opaque))
+	    type = opaque_ftype_opaque_opaque_opaque
+	      = build_function_type_list (opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  NULL_TREE);
+	}
+      else
+	{
+	  enum insn_code icode = d->icode;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
+
+	  type = builtin_function_type (insn_data[icode].operand[0].mode,
+					insn_data[icode].operand[1].mode,
+					insn_data[icode].operand[2].mode,
+					insn_data[icode].operand[3].mode,
+					d->code, d->name);
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+
+  /* Add the binary operators.  */
+  d = bdesc_2arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    {
+      enum machine_mode mode0, mode1, mode2;
+      tree type;
+      HOST_WIDE_INT mask = d->mask;
+
+      if ((mask & builtin_mask) != mask)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
+	  continue;
+	}
+
+      if (rs6000_overloaded_builtin_p (d->code))
+	{
+	  if (! (type = opaque_ftype_opaque_opaque))
+	    type = opaque_ftype_opaque_opaque
+	      = build_function_type_list (opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  NULL_TREE);
+	}
+      else
+	{
+	  enum insn_code icode = d->icode;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
+
+          mode0 = insn_data[icode].operand[0].mode;
+          mode1 = insn_data[icode].operand[1].mode;
+          mode2 = insn_data[icode].operand[2].mode;
+
+	  if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
+	    {
+	      if (! (type = v2si_ftype_v2si_qi))
+		type = v2si_ftype_v2si_qi
+		  = build_function_type_list (opaque_V2SI_type_node,
+					      opaque_V2SI_type_node,
+					      char_type_node,
+					      NULL_TREE);
+	    }
+
+	  else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
+		   && mode2 == QImode)
+	    {
+	      if (! (type = v2si_ftype_int_qi))
+		type = v2si_ftype_int_qi
+		  = build_function_type_list (opaque_V2SI_type_node,
+					      integer_type_node,
+					      char_type_node,
+					      NULL_TREE);
+	    }
+
+	  else
+	    type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
+					  d->code, d->name);
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+
+  /* Add the simple unary operators.  */
+  d = bdesc_1arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    {
+      enum machine_mode mode0, mode1;
+      tree type;
+      HOST_WIDE_INT mask = d->mask;
+
+      if ((mask & builtin_mask) != mask)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
+	  continue;
+	}
+
+      if (rs6000_overloaded_builtin_p (d->code))
+	{
+	  if (! (type = opaque_ftype_opaque))
+	    type = opaque_ftype_opaque
+	      = build_function_type_list (opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  NULL_TREE);
+	}
+      else
+        {
+	  enum insn_code icode = d->icode;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
+
+          mode0 = insn_data[icode].operand[0].mode;
+          mode1 = insn_data[icode].operand[1].mode;
+
+	  if (mode0 == V2SImode && mode1 == QImode)
+	    {
+	      if (! (type = v2si_ftype_qi))
+		type = v2si_ftype_qi
+		  = build_function_type_list (opaque_V2SI_type_node,
+					      char_type_node,
+					      NULL_TREE);
+	    }
+
+	  else
+	    type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
+					  d->code, d->name);
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
+}
+
+static void
+rs6000_init_libfuncs (void)
+{
+  if (!TARGET_IEEEQUAD)
+      /* AIX/Darwin/64-bit Linux quad floating point routines.  */
+    if (!TARGET_XL_COMPAT)
+      {
+	set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
+	set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
+	set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
+	set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
+
+	if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
+	  {
+	    set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
+	    set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
+	    set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
+	    set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
+	    set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
+	    set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
+	    set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
+
+	    set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
+	    set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
+	    set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
+	    set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
+	    set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
+	    set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
+	    set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
+	    set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
+	  }
+
+	if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
+	  set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
+      }
+    else
+      {
+	set_optab_libfunc (add_optab, TFmode, "_xlqadd");
+	set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
+	set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
+	set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
+      }
+  else
+    {
+      /* 32-bit SVR4 quad floating point routines.  */
+
+      set_optab_libfunc (add_optab, TFmode, "_q_add");
+      set_optab_libfunc (sub_optab, TFmode, "_q_sub");
+      set_optab_libfunc (neg_optab, TFmode, "_q_neg");
+      set_optab_libfunc (smul_optab, TFmode, "_q_mul");
+      set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
+      if (TARGET_PPC_GPOPT)
+	set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
+
+      set_optab_libfunc (eq_optab, TFmode, "_q_feq");
+      set_optab_libfunc (ne_optab, TFmode, "_q_fne");
+      set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
+      set_optab_libfunc (ge_optab, TFmode, "_q_fge");
+      set_optab_libfunc (lt_optab, TFmode, "_q_flt");
+      set_optab_libfunc (le_optab, TFmode, "_q_fle");
+
+      set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
+      set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
+      set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
+      set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
+      set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
+      set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
+      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
+      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
+    }
+}
+
+
+/* Expand a block clear operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the length
+   operands[3] is the alignment */
+
+int
+expand_block_clear (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx bytes_rtx	= operands[1];
+  rtx align_rtx = operands[3];
+  bool constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  HOST_WIDE_INT align;
+  HOST_WIDE_INT bytes;
+  int offset;
+  int clear_bytes;
+  int clear_step;
+
+  /* If this is not a fixed size move, just call memcpy */
+  if (! constp)
+    return 0;
+
+  /* This must be a fixed size alignment  */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+  align = INTVAL (align_rtx) * BITS_PER_UNIT;
+
+  /* Anything to clear? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return 1;
+
+  /* Use the builtin memset after a point, to avoid huge code bloat.
+     When optimize_size, avoid any significant code bloat; calling
+     memset is about 4 instructions, so allow for one instruction to
+     load zero and three to do clearing.  */
+  if (TARGET_ALTIVEC && align >= 128)
+    clear_step = 16;
+  else if (TARGET_POWERPC64 && align >= 32)
+    clear_step = 8;
+  else if (TARGET_SPE && align >= 64)
+    clear_step = 8;
+  else
+    clear_step = 4;
+
+  if (optimize_size && bytes > 3 * clear_step)
+    return 0;
+  if (! optimize_size && bytes > 8 * clear_step)
+    return 0;
+
+  for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
+    {
+      enum machine_mode mode = BLKmode;
+      rtx dest;
+
+      if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
+	{
+	  clear_bytes = 16;
+	  mode = V4SImode;
+	}
+      else if (bytes >= 8 && TARGET_SPE && align >= 64)
+        {
+          clear_bytes = 8;
+          mode = V2SImode;
+        }
+      else if (bytes >= 8 && TARGET_POWERPC64
+	       /* 64-bit loads and stores require word-aligned
+		  displacements.  */
+	       && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+	{
+	  clear_bytes = 8;
+	  mode = DImode;
+	}
+      else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
+	{			/* move 4 bytes */
+	  clear_bytes = 4;
+	  mode = SImode;
+	}
+      else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
+	{			/* move 2 bytes */
+	  clear_bytes = 2;
+	  mode = HImode;
+	}
+      else /* move 1 byte at a time */
+	{
+	  clear_bytes = 1;
+	  mode = QImode;
+	}
+
+      dest = adjust_address (orig_dest, mode, offset);
+
+      emit_move_insn (dest, CONST0_RTX (mode));
+    }
+
+  return 1;
+}
+
+
+/* Expand a block move operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+#define MAX_MOVE_REG 4
+
+int
+expand_block_move (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx orig_src	= operands[1];
+  rtx bytes_rtx	= operands[2];
+  rtx align_rtx = operands[3];
+  int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  int align;
+  int bytes;
+  int offset;
+  int move_bytes;
+  rtx stores[MAX_MOVE_REG];
+  int num_reg = 0;
+
+  /* If this is not a fixed size move, just call memcpy */
+  if (! constp)
+    return 0;
+
+  /* This must be a fixed size alignment */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+  align = INTVAL (align_rtx) * BITS_PER_UNIT;
+
+  /* Anything to move? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return 1;
+
+  if (bytes > rs6000_block_move_inline_limit)
+    return 0;
+
+  for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
+    {
+      union {
+	rtx (*movmemsi) (rtx, rtx, rtx, rtx);
+	rtx (*mov) (rtx, rtx);
+      } gen_func;
+      enum machine_mode mode = BLKmode;
+      rtx src, dest;
+
+      /* Altivec first, since it will be faster than a string move
+	 when it applies, and usually not significantly larger.  */
+      if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
+	{
+	  move_bytes = 16;
+	  mode = V4SImode;
+	  gen_func.mov = gen_movv4si;
+	}
+      else if (TARGET_SPE && bytes >= 8 && align >= 64)
+        {
+          move_bytes = 8;
+          mode = V2SImode;
+          gen_func.mov = gen_movv2si;
+        }
+      else if (TARGET_STRING
+	  && bytes > 24		/* move up to 32 bytes at a time */
+	  && ! fixed_regs[5]
+	  && ! fixed_regs[6]
+	  && ! fixed_regs[7]
+	  && ! fixed_regs[8]
+	  && ! fixed_regs[9]
+	  && ! fixed_regs[10]
+	  && ! fixed_regs[11]
+	  && ! fixed_regs[12])
+	{
+	  move_bytes = (bytes > 32) ? 32 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_8reg;
+	}
+      else if (TARGET_STRING
+	       && bytes > 16	/* move up to 24 bytes at a time */
+	       && ! fixed_regs[5]
+	       && ! fixed_regs[6]
+	       && ! fixed_regs[7]
+	       && ! fixed_regs[8]
+	       && ! fixed_regs[9]
+	       && ! fixed_regs[10])
+	{
+	  move_bytes = (bytes > 24) ? 24 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_6reg;
+	}
+      else if (TARGET_STRING
+	       && bytes > 8	/* move up to 16 bytes at a time */
+	       && ! fixed_regs[5]
+	       && ! fixed_regs[6]
+	       && ! fixed_regs[7]
+	       && ! fixed_regs[8])
+	{
+	  move_bytes = (bytes > 16) ? 16 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_4reg;
+	}
+      else if (bytes >= 8 && TARGET_POWERPC64
+	       /* 64-bit loads and stores require word-aligned
+		  displacements.  */
+	       && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+	{
+	  move_bytes = 8;
+	  mode = DImode;
+	  gen_func.mov = gen_movdi;
+	}
+      else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
+	{			/* move up to 8 bytes at a time */
+	  move_bytes = (bytes > 8) ? 8 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_2reg;
+	}
+      else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
+	{			/* move 4 bytes */
+	  move_bytes = 4;
+	  mode = SImode;
+	  gen_func.mov = gen_movsi;
+	}
+      else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
+	{			/* move 2 bytes */
+	  move_bytes = 2;
+	  mode = HImode;
+	  gen_func.mov = gen_movhi;
+	}
+      else if (TARGET_STRING && bytes > 1)
+	{			/* move up to 4 bytes at a time */
+	  move_bytes = (bytes > 4) ? 4 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_1reg;
+	}
+      else /* move 1 byte at a time */
+	{
+	  move_bytes = 1;
+	  mode = QImode;
+	  gen_func.mov = gen_movqi;
+	}
+
+      src = adjust_address (orig_src, mode, offset);
+      dest = adjust_address (orig_dest, mode, offset);
+
+      if (mode != BLKmode)
+	{
+	  rtx tmp_reg = gen_reg_rtx (mode);
+
+	  emit_insn ((*gen_func.mov) (tmp_reg, src));
+	  stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
+	}
+
+      if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
+	{
+	  int i;
+	  for (i = 0; i < num_reg; i++)
+	    emit_insn (stores[i]);
+	  num_reg = 0;
+	}
+
+      if (mode == BLKmode)
+	{
+	  /* Move the address into scratch registers.  The movmemsi
+	     patterns require zero offset.  */
+	  if (!REG_P (XEXP (src, 0)))
+	    {
+	      rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
+	      src = replace_equiv_address (src, src_reg);
+	    }
+	  set_mem_size (src, move_bytes);
+
+	  if (!REG_P (XEXP (dest, 0)))
+	    {
+	      rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
+	      dest = replace_equiv_address (dest, dest_reg);
+	    }
+	  set_mem_size (dest, move_bytes);
+
+	  emit_insn ((*gen_func.movmemsi) (dest, src,
+					   GEN_INT (move_bytes & 31),
+					   align_rtx));
+	}
+    }
+
+  return 1;
+}
+
+
+/* Return a string to perform a load_multiple operation.
+   operands[0] is the vector.
+   operands[1] is the source address.
+   operands[2] is the first destination register.  */
+
+const char *
+rs6000_output_load_multiple (rtx operands[3])
+{
+  /* We have to handle the case where the pseudo used to contain the address
+     is assigned to one of the output registers.  */
+  int i, j;
+  int words = XVECLEN (operands[0], 0);
+  rtx xop[10];
+
+  if (XVECLEN (operands[0], 0) == 1)
+    return "lwz %2,0(%1)";
+
+  for (i = 0; i < words; i++)
+    if (refers_to_regno_p (REGNO (operands[2]) + i,
+			   REGNO (operands[2]) + i + 1, operands[1], 0))
+      {
+	if (i == words-1)
+	  {
+	    xop[0] = GEN_INT (4 * (words-1));
+	    xop[1] = operands[1];
+	    xop[2] = operands[2];
+	    output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
+	    return "";
+	  }
+	else if (i == 0)
+	  {
+	    xop[0] = GEN_INT (4 * (words-1));
+	    xop[1] = operands[1];
+	    xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+	    output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
+	    return "";
+	  }
+	else
+	  {
+	    for (j = 0; j < words; j++)
+	      if (j != i)
+		{
+		  xop[0] = GEN_INT (j * 4);
+		  xop[1] = operands[1];
+		  xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
+		  output_asm_insn ("lwz %2,%0(%1)", xop);
+		}
+	    xop[0] = GEN_INT (i * 4);
+	    xop[1] = operands[1];
+	    output_asm_insn ("lwz %1,%0(%1)", xop);
+	    return "";
+	  }
+      }
+
+  return "lswi %2,%1,%N0";
+}
+
+
+/* A validation routine: say whether CODE, a condition code, and MODE
+   match.  The other alternatives either don't make sense or should
+   never be generated.  */
+
+void
+validate_condition_mode (enum rtx_code code, enum machine_mode mode)
+{
+  gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
+	       || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
+	      && GET_MODE_CLASS (mode) == MODE_CC);
+
+  /* These don't make sense.  */
+  gcc_assert ((code != GT && code != LT && code != GE && code != LE)
+	      || mode != CCUNSmode);
+
+  gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
+	      || mode == CCUNSmode);
+
+  gcc_assert (mode == CCFPmode
+	      || (code != ORDERED && code != UNORDERED
+		  && code != UNEQ && code != LTGT
+		  && code != UNGT && code != UNLT
+		  && code != UNGE && code != UNLE));
+
+  /* These should never be generated except for
+     flag_finite_math_only.  */
+  gcc_assert (mode != CCFPmode
+	      || flag_finite_math_only
+	      || (code != LE && code != GE
+		  && code != UNEQ && code != LTGT
+		  && code != UNGT && code != UNLT));
+
+  /* These are invalid; the information is not there.  */
+  gcc_assert (mode != CCEQmode || code == EQ || code == NE);
+}
+
+
+/* Return 1 if ANDOP is a mask that has no bits on that are not in the
+   mask required to convert the result of a rotate insn into a shift
+   left insn of SHIFTOP bits.  Both are known to be SImode CONST_INT.  */
+
+int
+includes_lshift_p (rtx shiftop, rtx andop)
+{
+  unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
+
+  shift_mask <<= INTVAL (shiftop);
+
+  return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
+}
+
+/* Similar, but for right shift.  */
+
+int
+includes_rshift_p (rtx shiftop, rtx andop)
+{
+  unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
+
+  shift_mask >>= INTVAL (shiftop);
+
+  return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
+}
+
+/* Return 1 if ANDOP is a mask suitable for use with an rldic insn
+   to perform a left shift.  It must have exactly SHIFTOP least
+   significant 0's, then one or more 1's, then zero or more 0's.  */
+
+int
+includes_rldic_lshift_p (rtx shiftop, rtx andop)
+{
+  if (GET_CODE (andop) == CONST_INT)
+    {
+      HOST_WIDE_INT c, lsb, shift_mask;
+
+      c = INTVAL (andop);
+      if (c == 0 || c == ~0)
+	return 0;
+
+      shift_mask = ~0;
+      shift_mask <<= INTVAL (shiftop);
+
+      /* Find the least significant one bit.  */
+      lsb = c & -c;
+
+      /* It must coincide with the LSB of the shift mask.  */
+      if (-lsb != shift_mask)
+	return 0;
+
+      /* Invert to look for the next transition (if any).  */
+      c = ~c;
+
+      /* Remove the low group of ones (originally low group of zeros).  */
+      c &= -lsb;
+
+      /* Again find the lsb, and check we have all 1's above.  */
+      lsb = c & -c;
+      return c == -lsb;
+    }
+  else
+    return 0;
+}
+
+/* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
+   to perform a left shift.  It must have SHIFTOP or more least
+   significant 0's, with the remainder of the word 1's.  */
+
+int
+includes_rldicr_lshift_p (rtx shiftop, rtx andop)
+{
+  if (GET_CODE (andop) == CONST_INT)
+    {
+      HOST_WIDE_INT c, lsb, shift_mask;
+
+      shift_mask = ~0;
+      shift_mask <<= INTVAL (shiftop);
+      c = INTVAL (andop);
+
+      /* Find the least significant one bit.  */
+      lsb = c & -c;
+
+      /* It must be covered by the shift mask.
+	 This test also rejects c == 0.  */
+      if ((lsb & shift_mask) == 0)
+	return 0;
+
+      /* Check we have all 1's above the transition, and reject all 1's.  */
+      return c == -lsb && lsb != 1;
+    }
+  else
+    return 0;
+}
+
+/* Return 1 if operands will generate a valid arguments to rlwimi
+instruction for insert with right shift in 64-bit mode.  The mask may
+not start on the first bit or stop on the last bit because wrap-around
+effects of instruction do not correspond to semantics of RTL insn.  */
+
+int
+insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
+{
+  if (INTVAL (startop) > 32
+      && INTVAL (startop) < 64
+      && INTVAL (sizeop) > 1
+      && INTVAL (sizeop) + INTVAL (startop) < 64
+      && INTVAL (shiftop) > 0
+      && INTVAL (sizeop) + INTVAL (shiftop) < 32
+      && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
+    return 1;
+
+  return 0;
+}
+
+/* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
+   for lfq and stfq insns iff the registers are hard registers.   */
+
+int
+registers_ok_for_quad_peep (rtx reg1, rtx reg2)
+{
+  /* We might have been passed a SUBREG.  */
+  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
+    return 0;
+
+  /* We might have been passed non floating point registers.  */
+  if (!FP_REGNO_P (REGNO (reg1))
+      || !FP_REGNO_P (REGNO (reg2)))
+    return 0;
+
+  return (REGNO (reg1) == REGNO (reg2) - 1);
+}
+
+/* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
+   addr1 and addr2 must be in consecutive memory locations
+   (addr2 == addr1 + 8).  */
+
+int
+mems_ok_for_quad_peep (rtx mem1, rtx mem2)
+{
+  rtx addr1, addr2;
+  unsigned int reg1, reg2;
+  int offset1, offset2;
+
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
+    return 0;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  /* Extract an offset (if used) from the first addr.  */
+  if (GET_CODE (addr1) == PLUS)
+    {
+      /* If not a REG, return zero.  */
+      if (GET_CODE (XEXP (addr1, 0)) != REG)
+	return 0;
+      else
+	{
+	  reg1 = REGNO (XEXP (addr1, 0));
+	  /* The offset must be constant!  */
+	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
+	    return 0;
+	  offset1 = INTVAL (XEXP (addr1, 1));
+	}
+    }
+  else if (GET_CODE (addr1) != REG)
+    return 0;
+  else
+    {
+      reg1 = REGNO (addr1);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset1 = 0;
+    }
+
+  /* And now for the second addr.  */
+  if (GET_CODE (addr2) == PLUS)
+    {
+      /* If not a REG, return zero.  */
+      if (GET_CODE (XEXP (addr2, 0)) != REG)
+	return 0;
+      else
+	{
+	  reg2 = REGNO (XEXP (addr2, 0));
+	  /* The offset must be constant. */
+	  if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
+	    return 0;
+	  offset2 = INTVAL (XEXP (addr2, 1));
+	}
+    }
+  else if (GET_CODE (addr2) != REG)
+    return 0;
+  else
+    {
+      reg2 = REGNO (addr2);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset2 = 0;
+    }
+
+  /* Both of these must have the same base register.  */
+  if (reg1 != reg2)
+    return 0;
+
+  /* The offset for the second addr must be 8 more than the first addr.  */
+  if (offset2 != offset1 + 8)
+    return 0;
+
+  /* All the tests passed.  addr1 and addr2 are valid for lfq or stfq
+     instructions.  */
+  return 1;
+}
+
+
+rtx
+rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
+{
+  static bool eliminated = false;
+  rtx ret;
+
+  if (mode != SDmode || TARGET_NO_SDMODE_STACK)
+    ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+  else
+    {
+      rtx mem = cfun->machine->sdmode_stack_slot;
+      gcc_assert (mem != NULL_RTX);
+
+      if (!eliminated)
+	{
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  cfun->machine->sdmode_stack_slot = mem;
+	  eliminated = true;
+	}
+      ret = mem;
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
+	       GET_MODE_NAME (mode));
+      if (!ret)
+	fprintf (stderr, "\tNULL_RTX\n");
+      else
+	debug_rtx (ret);
+    }
+
+  return ret;
+}
+
+/* Return the mode to be used for memory when a secondary memory
+   location is needed.  For SDmode values we need to use DDmode, in
+   all other cases we can use the same mode.  */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+  if (lra_in_progress && mode == SDmode)
+    return DDmode;
+  return mode;
+}
+
+static tree
+rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
+{
+  /* Don't walk into types.  */
+  if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
+    {
+      *walk_subtrees = 0;
+      return NULL_TREE;
+    }
+
+  switch (TREE_CODE (*tp))
+    {
+    case VAR_DECL:
+    case PARM_DECL:
+    case FIELD_DECL:
+    case RESULT_DECL:
+    case SSA_NAME:
+    case REAL_CST:
+    case MEM_REF:
+    case VIEW_CONVERT_EXPR:
+      if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
+	return *tp;
+      break;
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
+   on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+   only work on the traditional altivec registers, note if an altivec register
+   was chosen.  */
+
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
+{
+  HOST_WIDE_INT regno;
+  enum reg_class rclass;
+
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  if (!REG_P (reg))
+    return NO_REG_TYPE;
+
+  regno = REGNO (reg);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!lra_in_progress && !reload_in_progress && !reload_completed)
+	return PSEUDO_REG_TYPE;
+
+      regno = true_regnum (reg);
+      if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	return PSEUDO_REG_TYPE;
+    }	
+
+  gcc_assert (regno >= 0);
+
+  if (is_altivec && ALTIVEC_REGNO_P (regno))
+    *is_altivec = true;
+
+  rclass = rs6000_regno_regclass[regno];
+  return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+   different register classe is really a simple move.  */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode)
+{
+  int size;
+
+  /* Add support for various direct moves available.  In this function, we only
+     look at cases where we don't need any extra registers, and one or more
+     simple move insns are issued.  At present, 32-bit integers are not allowed
+     in FPR/VSX registers.  Single precision binary floating is not a simple
+     move because we need to convert to the single precision memory layout.
+     The 4-byte SDmode can be moved.  */
+  size = GET_MODE_SIZE (mode);
+  if (TARGET_DIRECT_MOVE
+      && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+      && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+	   && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+	       || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+   special direct moves that involve allocating an extra register, return the
+   insn code of the helper function if there is such a function or
+   CODE_FOR_nothing if not.  */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode,
+				     secondary_reload_info *sri,
+				     bool altivec_p)
+{
+  bool ret = false;
+  enum insn_code icode = CODE_FOR_nothing;
+  int cost = 0;
+  int size = GET_MODE_SIZE (mode);
+
+  if (TARGET_POWERPC64)
+    {
+      if (size == 16)
+	{
+	  /* Handle moving 128-bit values from GPRs to VSX point registers on
+	     power8 when running in 64-bit mode using XXPERMDI to glue the two
+	     64-bit values back together.  */
+	  if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	      icode = reg_addr[mode].reload_vsx_gpr;
+	    }
+
+	  /* Handle moving 128-bit values from VSX point registers to GPRs on
+	     power8 when running in 64-bit mode using XXPERMDI to get access to the
+	     bottom 64-bit value.  */
+	  else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	      icode = reg_addr[mode].reload_gpr_vsx;
+	    }
+	}
+
+      else if (mode == SFmode)
+	{
+	  if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* xscvdpspn, mfvsrd, and.  */
+	      icode = reg_addr[mode].reload_gpr_vsx;
+	    }
+
+	  else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 2;			/* mtvsrz, xscvspdpn.  */
+	      icode = reg_addr[mode].reload_vsx_gpr;
+	    }
+	}
+    }
+
+  if (TARGET_POWERPC64 && size == 16)
+    {
+      /* Handle moving 128-bit values from GPRs to VSX point registers on
+	 power8 when running in 64-bit mode using XXPERMDI to glue the two
+	 64-bit values back together.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	  icode = reg_addr[mode].reload_vsx_gpr;
+	}
+
+      /* Handle moving 128-bit values from VSX point registers to GPRs on
+	 power8 when running in 64-bit mode using XXPERMDI to get access to the
+	 bottom 64-bit value.  */
+      else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	  icode = reg_addr[mode].reload_gpr_vsx;
+	}
+    }
+
+  else if (!TARGET_POWERPC64 && size == 8)
+    {
+      /* Handle moving 64-bit values from GPRs to floating point registers on
+	 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+	 values back together.  Altivec register classes must be handled
+	 specially since a different instruction is used, and the secondary
+	 reload support requires a single instruction class in the scratch
+	 register constraint.  However, right now TFmode is not allowed in
+	 Altivec registers, so the pattern will never match.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+	{
+	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
+	  icode = reg_addr[mode].reload_fpr_gpr;
+	}
+    }
+
+  if (icode != CODE_FOR_nothing)
+    {
+      ret = true;
+      if (sri)
+	{
+	  sri->icode = icode;
+	  sri->extra_cost = cost;
+	}
+    }
+
+  return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+   directly (simple move) or via a pattern that uses a single extra temporary
+   (using power8's direct move in this case.  */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+			      enum rs6000_reg_type from_type,
+			      enum machine_mode mode,
+			      secondary_reload_info *sri,
+			      bool altivec_p)
+{
+  /* Fall back to load/store reloads if either type is not a register.  */
+  if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+    return false;
+
+  /* If we haven't allocated registers yet, assume the move can be done for the
+     standard register types.  */
+  if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+      || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+      || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+    return true;
+
+  /* Moves to the same set of registers is a simple move for non-specialized
+     registers.  */
+  if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+    return true;
+
+  /* Check whether a simple move can be done directly.  */
+  if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+    {
+      if (sri)
+	{
+	  sri->icode = CODE_FOR_nothing;
+	  sri->extra_cost = 0;
+	}
+      return true;
+    }
+
+  /* Now check if we can do it in a few steps.  */
+  return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+					      altivec_p);
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.
+
+   For VSX and Altivec, we may need a register to convert sp+offset into
+   reg+sp.
+
+   For misaligned 64-bit gpr loads and stores we need a register to
+   convert an offset address to indirect.  */
+
+static reg_class_t
+rs6000_secondary_reload (bool in_p,
+			 rtx x,
+			 reg_class_t rclass_i,
+			 enum machine_mode mode,
+			 secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+  reg_class_t ret = ALL_REGS;
+  enum insn_code icode;
+  bool default_p = false;
+
+  sri->icode = CODE_FOR_nothing;
+  icode = ((in_p)
+	   ? reg_addr[mode].reload_load
+	   : reg_addr[mode].reload_store);
+
+  if (REG_P (x) || register_operand (x, mode))
+    {
+      enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+      bool altivec_p = (rclass == ALTIVEC_REGS);
+      enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+      if (!in_p)
+	{
+	  enum rs6000_reg_type exchange = to_type;
+	  to_type = from_type;
+	  from_type = exchange;
+	}
+
+      /* Can we do a direct move of some sort?  */
+      if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+					altivec_p))
+	{
+	  icode = (enum insn_code)sri->icode;
+	  default_p = false;
+	  ret = NO_REGS;
+	}
+    }
+
+  /* Handle vector moves with reload helper functions.  */
+  if (ret == ALL_REGS && icode != CODE_FOR_nothing)
+    {
+      ret = NO_REGS;
+      sri->icode = CODE_FOR_nothing;
+      sri->extra_cost = 0;
+
+      if (GET_CODE (x) == MEM)
+	{
+	  rtx addr = XEXP (x, 0);
+
+	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
+	     an extra register in that case, but it would need an extra
+	     register if the addressing is reg+reg or (reg+reg)&(-16).  Special
+	     case load/store quad.  */
+	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
+	    {
+	      if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+		  && GET_MODE_SIZE (mode) == 16
+		  && quad_memory_operand (x, mode))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 2;
+		}
+
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && !rs6000_legitimate_offset_address_p (PTImode, addr,
+							       false, true))
+		{
+		  sri->icode = icode;
+		  /* account for splitting the loads, and converting the
+		     address from reg+reg to reg.  */
+		  sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
+				     + ((GET_CODE (addr) == AND) ? 1 : 0));
+		}
+	    }
+         /* Allow scalar loads to/from the traditional floating point
+            registers, even if VSX memory is set.  */
+         else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
+                  && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+                  && (legitimate_indirect_address_p (addr, false)
+                      || legitimate_indirect_address_p (addr, false)
+                      || rs6000_legitimate_offset_address_p (mode, addr,
+                                                             false, true)))
+
+           ;
+         /* Loads to and stores from vector registers can only do reg+reg
+            addressing.  Altivec registers can also do (reg+reg)&(-16).  Allow
+            scalar modes loading up the traditional floating point registers
+            to use offset addresses.  */
+	  else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
+		   || rclass == FLOAT_REGS || rclass == NO_REGS)
+	    {
+	      if (!VECTOR_MEM_ALTIVEC_P (mode)
+		  && GET_CODE (addr) == AND
+		  && GET_CODE (XEXP (addr, 1)) == CONST_INT
+		  && INTVAL (XEXP (addr, 1)) == -16
+		  && (legitimate_indirect_address_p (XEXP (addr, 0), false)
+		      || legitimate_indexed_address_p (XEXP (addr, 0), false)))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
+				     ? 2 : 1);
+		}
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && (rclass == NO_REGS
+			   || !legitimate_indexed_address_p (addr, false)))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 1;
+		}
+	      else
+		icode = CODE_FOR_nothing;
+	    }
+	  /* Any other loads, including to pseudo registers which haven't been
+	     assigned to a register yet, default to require a scratch
+	     register.  */
+	  else
+	    {
+	      sri->icode = icode;
+	      sri->extra_cost = 2;
+	    }
+	}
+      else if (REG_P (x))
+	{
+	  int regno = true_regnum (x);
+
+	  icode = CODE_FOR_nothing;
+	  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	    default_p = true;
+	  else
+	    {
+	      enum reg_class xclass = REGNO_REG_CLASS (regno);
+	      enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+	      enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
+
+	      /* If memory is needed, use default_secondary_reload to create the
+		 stack slot.  */
+	      if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
+		default_p = true;
+	      else
+		ret = NO_REGS;
+	    }
+	}
+      else
+	default_p = true;
+    }
+  else if (TARGET_POWERPC64
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
+	   && MEM_P (x)
+	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
+    {
+      rtx addr = XEXP (x, 0);
+      rtx off = address_offset (addr);
+
+      if (off != NULL_RTX)
+	{
+	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
+	  unsigned HOST_WIDE_INT offset = INTVAL (off);
+
+	  /* We need a secondary reload when our legitimate_address_p
+	     says the address is good (as otherwise the entire address
+	     will be reloaded), and the offset is not a multiple of
+	     four or we have an address wrap.  Address wrap will only
+	     occur for LO_SUMs since legitimate_offset_address_p
+	     rejects addresses for 16-byte mems that will wrap.  */
+	  if (GET_CODE (addr) == LO_SUM
+	      ? (1 /* legitimate_address_p allows any offset for lo_sum */
+		 && ((offset & 3) != 0
+		     || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
+	      : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
+		 && (offset & 3) != 0))
+	    {
+	      if (in_p)
+		sri->icode = CODE_FOR_reload_di_load;
+	      else
+		sri->icode = CODE_FOR_reload_di_store;
+	      sri->extra_cost = 2;
+	      ret = NO_REGS;
+	    }
+	  else
+	    default_p = true;
+	}
+      else
+	default_p = true;
+    }
+  else if (!TARGET_POWERPC64
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
+	   && MEM_P (x)
+	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+    {
+      rtx addr = XEXP (x, 0);
+      rtx off = address_offset (addr);
+
+      if (off != NULL_RTX)
+	{
+	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
+	  unsigned HOST_WIDE_INT offset = INTVAL (off);
+
+	  /* We need a secondary reload when our legitimate_address_p
+	     says the address is good (as otherwise the entire address
+	     will be reloaded), and we have a wrap.
+
+	     legitimate_lo_sum_address_p allows LO_SUM addresses to
+	     have any offset so test for wrap in the low 16 bits.
+
+	     legitimate_offset_address_p checks for the range
+	     [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
+	     for mode size of 16.  We wrap at [0x7ffc,0x7fff] and
+	     [0x7ff4,0x7fff] respectively, so test for the
+	     intersection of these ranges, [0x7ffc,0x7fff] and
+	     [0x7ff4,0x7ff7] respectively.
+
+	     Note that the address we see here may have been
+	     manipulated by legitimize_reload_address.  */
+	  if (GET_CODE (addr) == LO_SUM
+	      ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
+	      : offset - (0x8000 - extra) < UNITS_PER_WORD)
+	    {
+	      if (in_p)
+		sri->icode = CODE_FOR_reload_si_load;
+	      else
+		sri->icode = CODE_FOR_reload_si_store;
+	      sri->extra_cost = 2;
+	      ret = NO_REGS;
+	    }
+	  else
+	    default_p = true;
+	}
+      else
+	default_p = true;
+    }
+  else
+    default_p = true;
+
+  if (default_p)
+    ret = default_secondary_reload (in_p, x, rclass, mode, sri);
+
+  gcc_assert (ret != ALL_REGS);
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr,
+	       "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
+	       "mode = %s",
+	       reg_class_names[ret],
+	       in_p ? "true" : "false",
+	       reg_class_names[rclass],
+	       GET_MODE_NAME (mode));
+
+      if (default_p)
+	fprintf (stderr, ", default secondary reload");
+
+      if (sri->icode != CODE_FOR_nothing)
+	fprintf (stderr, ", reload func = %s, extra cost = %d\n",
+		 insn_data[sri->icode].name, sri->extra_cost);
+      else
+	fprintf (stderr, "\n");
+
+      debug_rtx (x);
+    }
+
+  return ret;
+}
+
+/* Better tracing for rs6000_secondary_reload_inner.  */
+
+static void
+rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
+			       bool store_p)
+{
+  rtx set, clobber;
+
+  gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
+
+  fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
+	   store_p ? "store" : "load");
+
+  if (store_p)
+    set = gen_rtx_SET (VOIDmode, mem, reg);
+  else
+    set = gen_rtx_SET (VOIDmode, reg, mem);
+
+  clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
+  debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+}
+
+static void
+rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
+			      bool store_p)
+{
+  rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
+  gcc_unreachable ();
+}
+
+/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
+   to SP+reg addressing.  */
+
+void
+rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
+{
+  int regno = true_regnum (reg);
+  enum machine_mode mode = GET_MODE (reg);
+  enum reg_class rclass;
+  rtx addr;
+  rtx and_op2 = NULL_RTX;
+  rtx addr_op1;
+  rtx addr_op2;
+  rtx scratch_or_premodify = scratch;
+  rtx and_rtx;
+  rtx cc_clobber;
+
+  if (TARGET_DEBUG_ADDR)
+    rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
+
+  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+  if (GET_CODE (mem) != MEM)
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+  rclass = REGNO_REG_CLASS (regno);
+  addr = find_replacement (&XEXP (mem, 0));
+
+  switch (rclass)
+    {
+      /* GPRs can handle reg + small constant, all other addresses need to use
+	 the scratch register.  */
+    case GENERAL_REGS:
+    case BASE_REGS:
+      if (GET_CODE (addr) == AND)
+	{
+	  and_op2 = XEXP (addr, 1);
+	  addr = find_replacement (&XEXP (addr, 0));
+	}
+
+      if (GET_CODE (addr) == PRE_MODIFY)
+	{
+	  scratch_or_premodify = find_replacement (&XEXP (addr, 0));
+	  if (!REG_P (scratch_or_premodify))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  addr = find_replacement (&XEXP (addr, 1));
+	  if (GET_CODE (addr) != PLUS)
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+	}
+
+      if (GET_CODE (addr) == PLUS
+	  && (and_op2 != NULL_RTX
+	      || !rs6000_legitimate_offset_address_p (PTImode, addr,
+						      false, true)))
+	{
+	  /* find_replacement already recurses into both operands of
+	     PLUS so we don't need to call it here.  */
+	  addr_op1 = XEXP (addr, 0);
+	  addr_op2 = XEXP (addr, 1);
+	  if (!legitimate_indirect_address_p (addr_op1, false))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  if (!REG_P (addr_op2)
+	      && (GET_CODE (addr_op2) != CONST_INT
+		  || !satisfies_constraint_I (addr_op2)))
+	    {
+	      if (TARGET_DEBUG_ADDR)
+		{
+		  fprintf (stderr,
+			   "\nMove plus addr to register %s, mode = %s: ",
+			   rs6000_reg_names[REGNO (scratch)],
+			   GET_MODE_NAME (mode));
+		  debug_rtx (addr_op2);
+		}
+	      rs6000_emit_move (scratch, addr_op2, Pmode);
+	      addr_op2 = scratch;
+	    }
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch_or_premodify,
+				  gen_rtx_PLUS (Pmode,
+						addr_op1,
+						addr_op2)));
+
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+      else if (!legitimate_indirect_address_p (addr, false)
+	       && !rs6000_legitimate_offset_address_p (PTImode, addr,
+						       false, true))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    {
+	      fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
+		       rs6000_reg_names[REGNO (scratch_or_premodify)],
+		       GET_MODE_NAME (mode));
+	      debug_rtx (addr);
+	    }
+	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+      break;
+
+      /* Float registers can do offset+reg addressing for scalar types.  */
+    case FLOAT_REGS:
+      if (legitimate_indirect_address_p (addr, false)	/* reg */
+	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
+	  || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+	      && and_op2 == NULL_RTX
+	      && scratch_or_premodify == scratch
+	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+	break;
+
+      /* If this isn't a legacy floating point load/store, fall through to the
+	 VSX defaults.  */
+
+      /* VSX/Altivec registers can only handle reg+reg addressing.  Move other
+	 addresses into a scratch register.  */
+    case VSX_REGS:
+    case ALTIVEC_REGS:
+
+      /* With float regs, we need to handle the AND ourselves, since we can't
+	 use the Altivec instruction with an implicit AND -16.  Allow scalar
+	 loads to float registers to use reg+offset even if VSX.  */
+      if (GET_CODE (addr) == AND
+	  && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
+	      || GET_CODE (XEXP (addr, 1)) != CONST_INT
+	      || INTVAL (XEXP (addr, 1)) != -16
+	      || !VECTOR_MEM_ALTIVEC_P (mode)))
+	{
+	  and_op2 = XEXP (addr, 1);
+	  addr = find_replacement (&XEXP (addr, 0));
+	}
+
+      /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
+	 as the address later.  */
+      if (GET_CODE (addr) == PRE_MODIFY
+	  && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	       && (rclass != FLOAT_REGS
+		   || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
+	      || and_op2 != NULL_RTX
+	      || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
+	{
+	  scratch_or_premodify = find_replacement (&XEXP (addr, 0));
+	  if (!legitimate_indirect_address_p (scratch_or_premodify, false))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  addr = find_replacement (&XEXP (addr, 1));
+	  if (GET_CODE (addr) != PLUS)
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+	}
+
+      if (legitimate_indirect_address_p (addr, false)	/* reg */
+	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
+	  || (GET_CODE (addr) == AND			/* Altivec memory */
+	      && rclass == ALTIVEC_REGS
+	      && GET_CODE (XEXP (addr, 1)) == CONST_INT
+	      && INTVAL (XEXP (addr, 1)) == -16
+	      && (legitimate_indirect_address_p (XEXP (addr, 0), false)
+		  || legitimate_indexed_address_p (XEXP (addr, 0), false))))
+	;
+
+      else if (GET_CODE (addr) == PLUS)
+	{
+	  addr_op1 = XEXP (addr, 0);
+	  addr_op2 = XEXP (addr, 1);
+	  if (!REG_P (addr_op1))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  if (TARGET_DEBUG_ADDR)
+	    {
+	      fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
+		       rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
+	      debug_rtx (addr_op2);
+	    }
+	  rs6000_emit_move (scratch, addr_op2, Pmode);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch_or_premodify,
+				  gen_rtx_PLUS (Pmode,
+						addr_op1,
+						scratch)));
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+
+      else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
+	       || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
+	       || REG_P (addr))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    {
+	      fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
+		       rs6000_reg_names[REGNO (scratch_or_premodify)],
+		       GET_MODE_NAME (mode));
+	      debug_rtx (addr);
+	    }
+
+	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+
+      else
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+      break;
+
+    default:
+      rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+    }
+
+  /* If the original address involved a pre-modify that we couldn't use the VSX
+     memory instruction with update, and we haven't taken care of already,
+     store the address in the pre-modify register and use that as the
+     address.  */
+  if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
+      addr = scratch_or_premodify;
+    }
+
+  /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
+     memory instruction, recreate the AND now, including the clobber which is
+     generated by the general ANDSI3/ANDDI3 patterns for the
+     andi. instruction.  */
+  if (and_op2 != NULL_RTX)
+    {
+      if (! legitimate_indirect_address_p (addr, false))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	  addr = scratch;
+	}
+
+      if (TARGET_DEBUG_ADDR)
+	{
+	  fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
+		   rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
+	  debug_rtx (and_op2);
+	}
+
+      and_rtx = gen_rtx_SET (VOIDmode,
+			     scratch,
+			     gen_rtx_AND (Pmode,
+					  addr,
+					  and_op2));
+
+      cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (2, and_rtx, cc_clobber)));
+      addr = scratch;
+    }
+
+  /* Adjust the address if it changed.  */
+  if (addr != XEXP (mem, 0))
+    {
+      mem = replace_equiv_address_nv (mem, addr);
+      if (TARGET_DEBUG_ADDR)
+	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
+    }
+
+  /* Now create the move.  */
+  if (store_p)
+    emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+
+  return;
+}
+
+/* Convert reloads involving 64-bit gprs and misaligned offset
+   addressing, or multiple 32-bit gprs and offsets that are too large,
+   to use indirect addressing.  */
+
+void
+rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
+{
+  int regno = true_regnum (reg);
+  enum reg_class rclass;
+  rtx addr;
+  rtx scratch_or_premodify = scratch;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
+	       store_p ? "store" : "load");
+      fprintf (stderr, "reg:\n");
+      debug_rtx (reg);
+      fprintf (stderr, "mem:\n");
+      debug_rtx (mem);
+      fprintf (stderr, "scratch:\n");
+      debug_rtx (scratch);
+    }
+
+  gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
+  gcc_assert (GET_CODE (mem) == MEM);
+  rclass = REGNO_REG_CLASS (regno);
+  gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
+  addr = XEXP (mem, 0);
+
+  if (GET_CODE (addr) == PRE_MODIFY)
+    {
+      scratch_or_premodify = XEXP (addr, 0);
+      gcc_assert (REG_P (scratch_or_premodify));
+      addr = XEXP (addr, 1);
+    }
+  gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
+
+  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
+
+  mem = replace_equiv_address_nv (mem, scratch_or_premodify);
+
+  /* Now create the move.  */
+  if (store_p)
+    emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+
+  return;
+}
+
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
+   this function has any SDmode references.  If we are on a power7 or later, we
+   don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
+   can load/store the value.  */
+
+static void
+rs6000_alloc_sdmode_stack_slot (void)
+{
+  tree t;
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+
+  gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+  /* We use a different approach for dealing with the secondary
+     memory in LRA.  */
+  if (ira_use_lra_p)
+    return;
+
+  if (TARGET_NO_SDMODE_STACK)
+    return;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
+	if (ret)
+	  {
+	    rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
+	    cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
+								  SDmode, 0);
+	    return;
+	  }
+      }
+
+  /* Check for any SDmode parameters of the function.  */
+  for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
+    {
+      if (TREE_TYPE (t) == error_mark_node)
+	continue;
+
+      if (TYPE_MODE (TREE_TYPE (t)) == SDmode
+	  || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
+	{
+	  rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
+	  cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
+								SDmode, 0);
+	  return;
+	}
+    }
+}
+
+static void
+rs6000_instantiate_decls (void)
+{
+  if (cfun->machine->sdmode_stack_slot != NULL_RTX)
+    instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
+}
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.
+
+   On the RS/6000, we have to return NO_REGS when we want to reload a
+   floating-point CONST_DOUBLE to force it to be copied to memory.
+
+   We also don't want to reload integer values into floating-point
+   registers if we can at all help it.  In fact, this can
+   cause reload to die, if it tries to generate a reload of CTR
+   into a FP register and discovers it doesn't have the memory location
+   required.
+
+   ??? Would it be a good idea to have reload do the converse, that is
+   try to reload floating modes into FP registers if possible?
+ */
+
+static enum reg_class
+rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+    return rclass;
+
+  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
+      && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
+      && easy_vector_constant (x, mode))
+    return ALTIVEC_REGS;
+
+  if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
+    {
+      if (reg_class_subset_p (GENERAL_REGS, rclass))
+	return GENERAL_REGS;
+      if (reg_class_subset_p (BASE_REGS, rclass))
+	return BASE_REGS;
+      return NO_REGS;
+    }
+
+  if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
+    return GENERAL_REGS;
+
+  /* For VSX, prefer the traditional registers for 64-bit values because we can
+     use the non-VSX loads.  Prefer the Altivec registers if Altivec is
+     handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
+     prefer Altivec loads..  */
+  if (rclass == VSX_REGS)
+    {
+      if (GET_MODE_SIZE (mode) <= 8)
+	return FLOAT_REGS;
+
+      if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
+	  || mode == V1TImode)
+	return ALTIVEC_REGS;
+
+      return rclass;
+    }
+
+  return rclass;
+}
+
+/* Debug version of rs6000_preferred_reload_class.  */
+static enum reg_class
+rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
+
+  fprintf (stderr,
+	   "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
+	   "mode = %s, x:\n",
+	   reg_class_names[ret], reg_class_names[rclass],
+	   GET_MODE_NAME (GET_MODE (x)));
+  debug_rtx (x);
+
+  return ret;
+}
+
+/* If we are copying between FP or AltiVec registers and anything else, we need
+   a memory location.  The exception is when we are targeting ppc64 and the
+   move to/from fpr to gpr instructions are available.  Also, under VSX, you
+   can copy vector registers from the FP register set to the Altivec register
+   set and vice versa.  */
+
+static bool
+rs6000_secondary_memory_needed (enum reg_class from_class,
+				enum reg_class to_class,
+				enum machine_mode mode)
+{
+  enum rs6000_reg_type from_type, to_type;
+  bool altivec_p = ((from_class == ALTIVEC_REGS)
+		    || (to_class == ALTIVEC_REGS));
+
+  /* If a simple/direct move is available, we don't need secondary memory  */
+  from_type = reg_class_to_reg_type[(int)from_class];
+  to_type = reg_class_to_reg_type[(int)to_class];
+
+  if (rs6000_secondary_reload_move (to_type, from_type, mode,
+				    (secondary_reload_info *)0, altivec_p))
+    return false;
+
+  /* If we have a floating point or vector register class, we need to use
+     memory to transfer the data.  */
+  if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
+    return true;
+
+  return false;
+}
+
+/* Debug version of rs6000_secondary_memory_needed.  */
+static bool
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+				      enum reg_class to_class,
+				      enum machine_mode mode)
+{
+  bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
+
+  fprintf (stderr,
+	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+	   "to_class = %s, mode = %s\n",
+	   ret ? "true" : "false",
+	   reg_class_names[from_class],
+	   reg_class_names[to_class],
+	   GET_MODE_NAME (mode));
+
+  return ret;
+}
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in RCLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+
+static enum reg_class
+rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
+			       rtx in)
+{
+  int regno;
+
+  if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
+#if TARGET_MACHO
+		     && MACHOPIC_INDIRECT
+#endif
+		     ))
+    {
+      /* We cannot copy a symbolic operand directly into anything
+	 other than BASE_REGS for TARGET_ELF.  So indicate that a
+	 register from BASE_REGS is needed as an intermediate
+	 register.
+
+	 On Darwin, pic addresses require a load from memory, which
+	 needs a base register.  */
+      if (rclass != BASE_REGS
+	  && (GET_CODE (in) == SYMBOL_REF
+	      || GET_CODE (in) == HIGH
+	      || GET_CODE (in) == LABEL_REF
+	      || GET_CODE (in) == CONST))
+	return BASE_REGS;
+    }
+
+  if (GET_CODE (in) == REG)
+    {
+      regno = REGNO (in);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  regno = true_regnum (in);
+	  if (regno >= FIRST_PSEUDO_REGISTER)
+	    regno = -1;
+	}
+    }
+  else if (GET_CODE (in) == SUBREG)
+    {
+      regno = true_regnum (in);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	regno = -1;
+    }
+  else
+    regno = -1;
+
+  /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
+     into anything.  */
+  if (rclass == GENERAL_REGS || rclass == BASE_REGS
+      || (regno >= 0 && INT_REGNO_P (regno)))
+    return NO_REGS;
+
+  /* Constants, memory, and FP registers can go into FP registers.  */
+  if ((regno == -1 || FP_REGNO_P (regno))
+      && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
+    return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
+
+  /* Memory, and FP/altivec registers can go into fp/altivec registers under
+     VSX.  However, for scalar variables, use the traditional floating point
+     registers so that we can use offset+register addressing.  */
+  if (TARGET_VSX
+      && (regno == -1 || VSX_REGNO_P (regno))
+      && VSX_REG_CLASS_P (rclass))
+    {
+      if (GET_MODE_SIZE (mode) < 16)
+	return FLOAT_REGS;
+
+      return NO_REGS;
+    }
+
+  /* Memory, and AltiVec registers can go into AltiVec registers.  */
+  if ((regno == -1 || ALTIVEC_REGNO_P (regno))
+      && rclass == ALTIVEC_REGS)
+    return NO_REGS;
+
+  /* We can copy among the CR registers.  */
+  if ((rclass == CR_REGS || rclass == CR0_REGS)
+      && regno >= 0 && CR_REGNO_P (regno))
+    return NO_REGS;
+
+  /* Otherwise, we need GENERAL_REGS.  */
+  return GENERAL_REGS;
+}
+
+/* Debug version of rs6000_secondary_reload_class.  */
+static enum reg_class
+rs6000_debug_secondary_reload_class (enum reg_class rclass,
+				     enum machine_mode mode, rtx in)
+{
+  enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
+  fprintf (stderr,
+	   "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
+	   "mode = %s, input rtx:\n",
+	   reg_class_names[ret], reg_class_names[rclass],
+	   GET_MODE_NAME (mode));
+  debug_rtx (in);
+
+  return ret;
+}
+
+/* Return nonzero if for CLASS a mode change from FROM to TO is invalid.  */
+
+static bool
+rs6000_cannot_change_mode_class (enum machine_mode from,
+				 enum machine_mode to,
+				 enum reg_class rclass)
+{
+  unsigned from_size = GET_MODE_SIZE (from);
+  unsigned to_size = GET_MODE_SIZE (to);
+
+  if (from_size != to_size)
+    {
+      enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
+
+      if (reg_classes_intersect_p (xclass, rclass))
+	{
+	  unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
+	  unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+
+	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
+	     single register under VSX because the scalar part of the register
+	     is in the upper 64-bits, and not the lower 64-bits.  Types like
+	     TFmode/TDmode that take 2 scalar register can overlap.  128-bit
+	     IEEE floating point can't overlap, and neither can small
+	     values.  */
+
+	  if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+	    return true;
+
+	  /* TDmode in floating-mode registers must always go into a register
+	     pair with the most significant word in the even-numbered register
+	     to match ISA requirements.  In little-endian mode, this does not
+	     match subreg numbering, so we cannot allow subregs.  */
+	  if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
+	    return true;
+
+	  if (from_size < 8 || to_size < 8)
+	    return true;
+
+	  if (from_size == 8 && (8 * to_nregs) != to_size)
+	    return true;
+
+	  if (to_size == 8 && (8 * from_nregs) != from_size)
+	    return true;
+
+	  return false;
+	}
+      else
+	return false;
+    }
+
+  if (TARGET_E500_DOUBLE
+      && ((((to) == DFmode) + ((from) == DFmode)) == 1
+	  || (((to) == TFmode) + ((from) == TFmode)) == 1
+	  || (((to) == DDmode) + ((from) == DDmode)) == 1
+	  || (((to) == TDmode) + ((from) == TDmode)) == 1
+	  || (((to) == DImode) + ((from) == DImode)) == 1))
+    return true;
+
+  /* Since the VSX register set includes traditional floating point registers
+     and altivec registers, just check for the size being different instead of
+     trying to check whether the modes are vector modes.  Otherwise it won't
+     allow say DF and DI to change classes.  For types like TFmode and TDmode
+     that take 2 64-bit registers, rather than a single 128-bit register, don't
+     allow subregs of those types to other 128 bit types.  */
+  if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
+    {
+      unsigned num_regs = (from_size + 15) / 16;
+      if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
+	  || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
+	return true;
+
+      return (from_size != 8 && from_size != 16);
+    }
+
+  if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
+      && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
+    return true;
+
+  if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
+      && reg_classes_intersect_p (GENERAL_REGS, rclass))
+    return true;
+
+  return false;
+}
+
+/* Debug version of rs6000_cannot_change_mode_class.  */
+static bool
+rs6000_debug_cannot_change_mode_class (enum machine_mode from,
+				       enum machine_mode to,
+				       enum reg_class rclass)
+{
+  bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
+
+  fprintf (stderr,
+	   "rs6000_cannot_change_mode_class, return %s, from = %s, "
+	   "to = %s, rclass = %s\n",
+	   ret ? "true" : "false",
+	   GET_MODE_NAME (from), GET_MODE_NAME (to),
+	   reg_class_names[rclass]);
+
+  return ret;
+}
+
+/* Return a string to do a move operation of 128 bits of data.  */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+  int dest_regno;
+  int src_regno;
+  bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
+  bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
+
+  if (REG_P (dest))
+    {
+      dest_regno = REGNO (dest);
+      dest_gpr_p = INT_REGNO_P (dest_regno);
+      dest_fp_p = FP_REGNO_P (dest_regno);
+      dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
+      dest_vsx_p = dest_fp_p | dest_vmx_p;
+    }
+  else
+    {
+      dest_regno = -1;
+      dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
+    }
+
+  if (REG_P (src))
+    {
+      src_regno = REGNO (src);
+      src_gpr_p = INT_REGNO_P (src_regno);
+      src_fp_p = FP_REGNO_P (src_regno);
+      src_vmx_p = ALTIVEC_REGNO_P (src_regno);
+      src_vsx_p = src_fp_p | src_vmx_p;
+    }
+  else
+    {
+      src_regno = -1;
+      src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
+    }
+
+  /* Register moves.  */
+  if (dest_regno >= 0 && src_regno >= 0)
+    {
+      if (dest_gpr_p)
+	{
+	  if (src_gpr_p)
+	    return "#";
+
+	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+	    return "#";
+	}
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (src_vsx_p)
+	    return "xxlor %x0,%x1,%x1";
+
+	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
+	return "vor %0,%1,%1";
+
+      else if (dest_fp_p && src_fp_p)
+	return "#";
+    }
+
+  /* Loads.  */
+  else if (dest_regno >= 0 && MEM_P (src))
+    {
+      if (dest_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "lq %0,%1";
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "lvx %0,%y1";
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "lxvw4x %x0,%y1";
+	  else
+	    return "lxvd2x %x0,%y1";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p)
+	return "lvx %0,%y1";
+
+      else if (dest_fp_p)
+	return "#";
+    }
+
+  /* Stores.  */
+  else if (src_regno >= 0 && MEM_P (dest))
+    {
+      if (src_gpr_p)
+	{
+ 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "stq %1,%0";
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && src_vmx_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "stvx %1,%y0";
+
+      else if (TARGET_VSX && src_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "stxvw4x %x1,%y0";
+	  else
+	    return "stxvd2x %x1,%y0";
+	}
+
+      else if (TARGET_ALTIVEC && src_vmx_p)
+	return "stvx %1,%y0";
+
+      else if (src_fp_p)
+	return "#";
+    }
+
+  /* Constants.  */
+  else if (dest_regno >= 0
+	   && (GET_CODE (src) == CONST_INT
+	       || GET_CODE (src) == CONST_DOUBLE
+	       || GET_CODE (src) == CONST_VECTOR))
+    {
+      if (dest_gpr_p)
+	return "#";
+
+      else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      else if (TARGET_ALTIVEC && dest_vmx_p)
+	return output_vec_const_move (operands);
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n===== Bad 128 bit move:\n");
+      debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+    }
+
+  gcc_unreachable ();
+}
+
+/* Validate a 128-bit move.  */
+bool
+rs6000_move_128bit_ok_p (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  return (gpc_reg_operand (operands[0], mode)
+	  || gpc_reg_operand (operands[1], mode));
+}
+
+/* Return true if a 128-bit move needs to be split.  */
+bool
+rs6000_split_128bit_ok_p (rtx operands[])
+{
+  if (!reload_completed)
+    return false;
+
+  if (!gpr_or_gpr_p (operands[0], operands[1]))
+    return false;
+
+  if (quad_load_store_p (operands[0], operands[1]))
+    return false;
+
+  return true;
+}
+
+
+/* Given a comparison operation, return the bit number in CCR to test.  We
+   know this is a valid comparison.
+
+   SCC_P is 1 if this is for an scc.  That means that %D will have been
+   used instead of %C, so the bits will be in different places.
+
+   Return -1 if OP isn't a valid comparison for some reason.  */
+
+int
+ccr_bit (rtx op, int scc_p)
+{
+  enum rtx_code code = GET_CODE (op);
+  enum machine_mode cc_mode;
+  int cc_regnum;
+  int base_bit;
+  rtx reg;
+
+  if (!COMPARISON_P (op))
+    return -1;
+
+  reg = XEXP (op, 0);
+
+  gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
+
+  cc_mode = GET_MODE (reg);
+  cc_regnum = REGNO (reg);
+  base_bit = 4 * (cc_regnum - CR0_REGNO);
+
+  validate_condition_mode (code, cc_mode);
+
+  /* When generating a sCOND operation, only positive conditions are
+     allowed.  */
+  gcc_assert (!scc_p
+	      || code == EQ || code == GT || code == LT || code == UNORDERED
+	      || code == GTU || code == LTU);
+
+  switch (code)
+    {
+    case NE:
+      return scc_p ? base_bit + 3 : base_bit + 2;
+    case EQ:
+      return base_bit + 2;
+    case GT:  case GTU:  case UNLE:
+      return base_bit + 1;
+    case LT:  case LTU:  case UNGE:
+      return base_bit;
+    case ORDERED:  case UNORDERED:
+      return base_bit + 3;
+
+    case GE:  case GEU:
+      /* If scc, we will have done a cror to put the bit in the
+	 unordered position.  So test that bit.  For integer, this is ! LT
+	 unless this is an scc insn.  */
+      return scc_p ? base_bit + 3 : base_bit;
+
+    case LE:  case LEU:
+      return scc_p ? base_bit + 3 : base_bit + 1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the GOT register.  */
+
+rtx
+rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
+{
+  /* The second flow pass currently (June 1999) can't update
+     regs_ever_live without disturbing other parts of the compiler, so
+     update it here to make the prolog/epilogue code happy.  */
+  if (!can_create_pseudo_p ()
+      && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
+    df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
+
+  crtl->uses_pic_offset_table = 1;
+
+  return pic_offset_table_rtx;
+}
+
+static rs6000_stack_t stack_info;
+
+/* Function to init struct machine_function.
+   This will be called, via a pointer variable,
+   from push_function_context.  */
+
+static struct machine_function *
+rs6000_init_machine_status (void)
+{
+  stack_info.reload_completed = 0;
+  return ggc_alloc_cleared_machine_function ();
+}
+
+#define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
+
+int
+extract_MB (rtx op)
+{
+  int i;
+  unsigned long val = INTVAL (op);
+
+  /* If the high bit is zero, the value is the first 1 bit we find
+     from the left.  */
+  if ((val & 0x80000000) == 0)
+    {
+      gcc_assert (val & 0xffffffff);
+
+      i = 1;
+      while (((val <<= 1) & 0x80000000) == 0)
+	++i;
+      return i;
+    }
+
+  /* If the high bit is set and the low bit is not, or the mask is all
+     1's, the value is zero.  */
+  if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
+    return 0;
+
+  /* Otherwise we have a wrap-around mask.  Look for the first 0 bit
+     from the right.  */
+  i = 31;
+  while (((val >>= 1) & 1) != 0)
+    --i;
+
+  return i;
+}
+
+int
+extract_ME (rtx op)
+{
+  int i;
+  unsigned long val = INTVAL (op);
+
+  /* If the low bit is zero, the value is the first 1 bit we find from
+     the right.  */
+  if ((val & 1) == 0)
+    {
+      gcc_assert (val & 0xffffffff);
+
+      i = 30;
+      while (((val >>= 1) & 1) == 0)
+	--i;
+
+      return i;
+    }
+
+  /* If the low bit is set and the high bit is not, or the mask is all
+     1's, the value is 31.  */
+  if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
+    return 31;
+
+  /* Otherwise we have a wrap-around mask.  Look for the first 0 bit
+     from the left.  */
+  i = 0;
+  while (((val <<= 1) & 0x80000000) != 0)
+    ++i;
+
+  return i;
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in some tls_ld pattern.  */
+
+static const char *
+rs6000_get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn),
+			 rs6000_get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+/* Helper function for rs6000_get_some_local_dynamic_name.  */
+
+static int
+rs6000_get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      const char *str = XSTR (x, 0);
+      if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+	{
+	  cfun->machine->some_ld_name = str;
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
+/* Write out a function code label.  */
+
+void
+rs6000_output_function_entry (FILE *file, const char *fname)
+{
+  if (fname[0] != '.')
+    {
+      switch (DEFAULT_ABI)
+	{
+	default:
+	  gcc_unreachable ();
+
+	case ABI_AIX:
+	  if (DOT_SYMBOLS)
+	    putc ('.', file);
+	  else
+	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
+	  break;
+
+	case ABI_ELFv2:
+	case ABI_V4:
+	case ABI_DARWIN:
+	  break;
+	}
+    }
+
+  RS6000_OUTPUT_BASENAME (file, fname);
+}
+
+/* Print an operand.  Recognize special options, documented below.  */
+
+#if TARGET_ELF
+#define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
+#define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
+#else
+#define SMALL_DATA_RELOC "sda21"
+#define SMALL_DATA_REG 0
+#endif
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  int i;
+  unsigned HOST_WIDE_INT uval;
+
+  switch (code)
+    {
+      /* %a is output_address.  */
+
+    case 'b':
+      /* If constant, low-order 16 bits of constant, unsigned.
+	 Otherwise, write normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'B':
+      /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
+	 for 64-bit mask direction.  */
+      putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
+      return;
+
+      /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
+	 output_operand.  */
+
+    case 'D':
+      /* Like 'J' but get to the GT bit only.  */
+      gcc_assert (REG_P (x));
+
+      /* Bit 1 is GT bit.  */
+      i = 4 * (REGNO (x) - CR0_REGNO) + 1;
+
+      /* Add one for shift count in rlinm for scc.  */
+      fprintf (file, "%d", i + 1);
+      return;
+
+    case 'E':
+      /* X is a CR register.  Print the number of the EQ bit of the CR */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%E value");
+      else
+	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
+      return;
+
+    case 'f':
+      /* X is a CR register.  Print the shift count needed to move it
+	 to the high-order four bits.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%f value");
+      else
+	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
+      return;
+
+    case 'F':
+      /* Similar, but print the count for the rotate in the opposite
+	 direction.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%F value");
+      else
+	fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
+      return;
+
+    case 'G':
+      /* X is a constant integer.  If it is negative, print "m",
+	 otherwise print "z".  This is to make an aze or ame insn.  */
+      if (GET_CODE (x) != CONST_INT)
+	output_operand_lossage ("invalid %%G value");
+      else if (INTVAL (x) >= 0)
+	putc ('z', file);
+      else
+	putc ('m', file);
+      return;
+
+    case 'h':
+      /* If constant, output low-order five bits.  Otherwise, write
+	 normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'H':
+      /* If constant, output low-order six bits.  Otherwise, write
+	 normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'I':
+      /* Print `i' if this is a constant, else nothing.  */
+      if (INT_P (x))
+	putc ('i', file);
+      return;
+
+    case 'j':
+      /* Write the bit number in CCR for jump.  */
+      i = ccr_bit (x, 0);
+      if (i == -1)
+	output_operand_lossage ("invalid %%j code");
+      else
+	fprintf (file, "%d", i);
+      return;
+
+    case 'J':
+      /* Similar, but add one for shift count in rlinm for scc and pass
+	 scc flag to `ccr_bit'.  */
+      i = ccr_bit (x, 1);
+      if (i == -1)
+	output_operand_lossage ("invalid %%J code");
+      else
+	/* If we want bit 31, write a shift count of zero, not 32.  */
+	fprintf (file, "%d", i == 31 ? 0 : i + 1);
+      return;
+
+    case 'k':
+      /* X must be a constant.  Write the 1's complement of the
+	 constant.  */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%k value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
+      return;
+
+    case 'K':
+      /* X must be a symbolic constant on ELF.  Write an
+	 expression suitable for an 'addi' that adds in the low 16
+	 bits of the MEM.  */
+      if (GET_CODE (x) == CONST)
+	{
+	  if (GET_CODE (XEXP (x, 0)) != PLUS
+	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
+	      || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+	    output_operand_lossage ("invalid %%K value");
+	}
+      print_operand_address (file, x);
+      fputs ("@l", file);
+      return;
+
+      /* %l is output_asm_label.  */
+
+    case 'L':
+      /* Write second word of DImode or DFmode reference.  Works on register
+	 or non-indexed memory only.  */
+      if (REG_P (x))
+	fputs (reg_names[REGNO (x) + 1], file);
+      else if (MEM_P (x))
+	{
+	  /* Handle possible auto-increment.  Since it is pre-increment and
+	     we have already done it, we can just use an offset of word.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
+					   UNITS_PER_WORD));
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
+					   UNITS_PER_WORD));
+	  else
+	    output_address (XEXP (adjust_address_nv (x, SImode,
+						     UNITS_PER_WORD),
+				  0));
+
+	  if (small_data_operand (x, GET_MODE (x)))
+	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		     reg_names[SMALL_DATA_REG]);
+	}
+      return;
+
+    case 'm':
+      /* MB value for a mask operand.  */
+      if (! mask_operand (x, SImode))
+	output_operand_lossage ("invalid %%m value");
+
+      fprintf (file, "%d", extract_MB (x));
+      return;
+
+    case 'M':
+      /* ME value for a mask operand.  */
+      if (! mask_operand (x, SImode))
+	output_operand_lossage ("invalid %%M value");
+
+      fprintf (file, "%d", extract_ME (x));
+      return;
+
+      /* %n outputs the negative of its operand.  */
+
+    case 'N':
+      /* Write the number of elements in the vector times 4.  */
+      if (GET_CODE (x) != PARALLEL)
+	output_operand_lossage ("invalid %%N value");
+      else
+	fprintf (file, "%d", XVECLEN (x, 0) * 4);
+      return;
+
+    case 'O':
+      /* Similar, but subtract 1 first.  */
+      if (GET_CODE (x) != PARALLEL)
+	output_operand_lossage ("invalid %%O value");
+      else
+	fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
+      return;
+
+    case 'p':
+      /* X is a CONST_INT that is a power of two.  Output the logarithm.  */
+      if (! INT_P (x)
+	  || INTVAL (x) < 0
+	  || (i = exact_log2 (INTVAL (x))) < 0)
+	output_operand_lossage ("invalid %%p value");
+      else
+	fprintf (file, "%d", i);
+      return;
+
+    case 'P':
+      /* The operand must be an indirect memory reference.  The result
+	 is the register name.  */
+      if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
+	  || REGNO (XEXP (x, 0)) >= 32)
+	output_operand_lossage ("invalid %%P value");
+      else
+	fputs (reg_names[REGNO (XEXP (x, 0))], file);
+      return;
+
+    case 'q':
+      /* This outputs the logical code corresponding to a boolean
+	 expression.  The expression may have one or both operands
+	 negated (if one, only the first one).  For condition register
+	 logical operations, it will also treat the negated
+	 CR codes as NOTs, but not handle NOTs of them.  */
+      {
+	const char *const *t = 0;
+	const char *s;
+	enum rtx_code code = GET_CODE (x);
+	static const char * const tbl[3][3] = {
+	  { "and", "andc", "nor" },
+	  { "or", "orc", "nand" },
+	  { "xor", "eqv", "xor" } };
+
+	if (code == AND)
+	  t = tbl[0];
+	else if (code == IOR)
+	  t = tbl[1];
+	else if (code == XOR)
+	  t = tbl[2];
+	else
+	  output_operand_lossage ("invalid %%q value");
+
+	if (GET_CODE (XEXP (x, 0)) != NOT)
+	  s = t[0];
+	else
+	  {
+	    if (GET_CODE (XEXP (x, 1)) == NOT)
+	      s = t[2];
+	    else
+	      s = t[1];
+	  }
+
+	fputs (s, file);
+      }
+      return;
+
+    case 'Q':
+      if (! TARGET_MFCRF)
+	return;
+      fputc (',', file);
+      /* FALLTHRU */
+
+    case 'R':
+      /* X is a CR register.  Print the mask for `mtcrf'.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%R value");
+      else
+	fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
+      return;
+
+    case 's':
+      /* Low 5 bits of 32 - value */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%s value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
+      return;
+
+    case 'S':
+      /* PowerPC64 mask position.  All 0's is excluded.
+	 CONST_INT 32-bit mask is considered sign-extended so any
+	 transition must occur within the CONST_INT, not on the boundary.  */
+      if (! mask64_operand (x, DImode))
+	output_operand_lossage ("invalid %%S value");
+
+      uval = INTVAL (x);
+
+      if (uval & 1)	/* Clear Left */
+	{
+#if HOST_BITS_PER_WIDE_INT > 64
+	  uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
+#endif
+	  i = 64;
+	}
+      else		/* Clear Right */
+	{
+	  uval = ~uval;
+#if HOST_BITS_PER_WIDE_INT > 64
+	  uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
+#endif
+	  i = 63;
+	}
+      while (uval != 0)
+	--i, uval >>= 1;
+      gcc_assert (i >= 0);
+      fprintf (file, "%d", i);
+      return;
+
+    case 't':
+      /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
+      gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
+
+      /* Bit 3 is OV bit.  */
+      i = 4 * (REGNO (x) - CR0_REGNO) + 3;
+
+      /* If we want bit 31, write a shift count of zero, not 32.  */
+      fprintf (file, "%d", i == 31 ? 0 : i + 1);
+      return;
+
+    case 'T':
+      /* Print the symbolic name of a branch target register.  */
+      if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
+				  && REGNO (x) != CTR_REGNO))
+	output_operand_lossage ("invalid %%T value");
+      else if (REGNO (x) == LR_REGNO)
+	fputs ("lr", file);
+      else
+	fputs ("ctr", file);
+      return;
+
+    case 'u':
+      /* High-order 16 bits of constant for use in unsigned operand.  */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%u value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		 (INTVAL (x) >> 16) & 0xffff);
+      return;
+
+    case 'v':
+      /* High-order 16 bits of constant for use in signed operand.  */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%v value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		 (INTVAL (x) >> 16) & 0xffff);
+      return;
+
+    case 'U':
+      /* Print `u' if this has an auto-increment or auto-decrement.  */
+      if (MEM_P (x)
+	  && (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
+	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
+	putc ('u', file);
+      return;
+
+    case 'V':
+      /* Print the trap code for this operand.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("eq", file);   /* 4 */
+	  break;
+	case NE:
+	  fputs ("ne", file);   /* 24 */
+	  break;
+	case LT:
+	  fputs ("lt", file);   /* 16 */
+	  break;
+	case LE:
+	  fputs ("le", file);   /* 20 */
+	  break;
+	case GT:
+	  fputs ("gt", file);   /* 8 */
+	  break;
+	case GE:
+	  fputs ("ge", file);   /* 12 */
+	  break;
+	case LTU:
+	  fputs ("llt", file);  /* 2 */
+	  break;
+	case LEU:
+	  fputs ("lle", file);  /* 6 */
+	  break;
+	case GTU:
+	  fputs ("lgt", file);  /* 1 */
+	  break;
+	case GEU:
+	  fputs ("lge", file);  /* 5 */
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'w':
+      /* If constant, low-order 16 bits of constant, signed.  Otherwise, write
+	 normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'W':
+      /* MB value for a PowerPC64 rldic operand.  */
+      i = clz_hwi (INTVAL (x));
+
+      fprintf (file, "%d", i);
+      return;
+
+    case 'x':
+      /* X is a FPR or Altivec register used in a VSX context.  */
+      if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%x value");
+      else
+	{
+	  int reg = REGNO (x);
+	  int vsx_reg = (FP_REGNO_P (reg)
+			 ? reg - 32
+			 : reg - FIRST_ALTIVEC_REGNO + 32);
+
+#ifdef TARGET_REGNAMES      
+	  if (TARGET_REGNAMES)
+	    fprintf (file, "%%vs%d", vsx_reg);
+	  else
+#endif
+	    fprintf (file, "%d", vsx_reg);
+	}
+      return;
+
+    case 'X':
+      if (MEM_P (x)
+	  && (legitimate_indexed_address_p (XEXP (x, 0), 0)
+	      || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
+		  && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
+	putc ('x', file);
+      return;
+
+    case 'Y':
+      /* Like 'L', for third word of TImode/PTImode  */
+      if (REG_P (x))
+	fputs (reg_names[REGNO (x) + 2], file);
+      else if (MEM_P (x))
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
+	  else
+	    output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
+	  if (small_data_operand (x, GET_MODE (x)))
+	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		     reg_names[SMALL_DATA_REG]);
+	}
+      return;
+
+    case 'z':
+      /* X is a SYMBOL_REF.  Write out the name preceded by a
+	 period and without any trailing data in brackets.  Used for function
+	 names.  If we are configured for System V (or the embedded ABI) on
+	 the PowerPC, do not emit the period, since those systems do not use
+	 TOCs and the like.  */
+      gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+      /* For macho, check to see if we need a stub.  */
+      if (TARGET_MACHO)
+	{
+	  const char *name = XSTR (x, 0);
+#if TARGET_MACHO
+	  if (darwin_emit_branch_islands
+	      && MACHOPIC_INDIRECT
+	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+	    name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+	  assemble_name (file, name);
+	}
+      else if (!DOT_SYMBOLS)
+	assemble_name (file, XSTR (x, 0));
+      else
+	rs6000_output_function_entry (file, XSTR (x, 0));
+      return;
+
+    case 'Z':
+      /* Like 'L', for last word of TImode/PTImode.  */
+      if (REG_P (x))
+	fputs (reg_names[REGNO (x) + 3], file);
+      else if (MEM_P (x))
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
+	  else
+	    output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
+	  if (small_data_operand (x, GET_MODE (x)))
+	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		     reg_names[SMALL_DATA_REG]);
+	}
+      return;
+
+      /* Print AltiVec or SPE memory operand.  */
+    case 'y':
+      {
+	rtx tmp;
+
+	gcc_assert (MEM_P (x));
+
+	tmp = XEXP (x, 0);
+
+	/* Ugly hack because %y is overloaded.  */
+	if ((TARGET_SPE || TARGET_E500_DOUBLE)
+	    && (GET_MODE_SIZE (GET_MODE (x)) == 8
+		|| GET_MODE (x) == TFmode
+		|| GET_MODE (x) == TImode
+		|| GET_MODE (x) == PTImode))
+	  {
+	    /* Handle [reg].  */
+	    if (REG_P (tmp))
+	      {
+		fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
+		break;
+	      }
+	    /* Handle [reg+UIMM].  */
+	    else if (GET_CODE (tmp) == PLUS &&
+		     GET_CODE (XEXP (tmp, 1)) == CONST_INT)
+	      {
+		int x;
+
+		gcc_assert (REG_P (XEXP (tmp, 0)));
+
+		x = INTVAL (XEXP (tmp, 1));
+		fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
+		break;
+	      }
+
+	    /* Fall through.  Must be [reg+reg].  */
+	  }
+	if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
+	    && GET_CODE (tmp) == AND
+	    && GET_CODE (XEXP (tmp, 1)) == CONST_INT
+	    && INTVAL (XEXP (tmp, 1)) == -16)
+	  tmp = XEXP (tmp, 0);
+	else if (VECTOR_MEM_VSX_P (GET_MODE (x))
+		 && GET_CODE (tmp) == PRE_MODIFY)
+	  tmp = XEXP (tmp, 1);
+	if (REG_P (tmp))
+	  fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
+	else
+	  {
+	    if (!GET_CODE (tmp) == PLUS
+		|| !REG_P (XEXP (tmp, 0))
+		|| !REG_P (XEXP (tmp, 1)))
+	      {
+		output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
+		break;
+	      }
+
+	    if (REGNO (XEXP (tmp, 0)) == 0)
+	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
+		       reg_names[ REGNO (XEXP (tmp, 0)) ]);
+	    else
+	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
+		       reg_names[ REGNO (XEXP (tmp, 1)) ]);
+	  }
+	break;
+      }
+
+    case 0:
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (MEM_P (x))
+	{
+	  /* We need to handle PRE_INC and PRE_DEC here, since we need to
+	     know the width from the mode.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC)
+	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
+		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
+		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (XEXP (XEXP (x, 0), 1));
+	  else
+	    output_address (XEXP (x, 0));
+	}
+      else
+	{
+	  if (toc_relative_expr_p (x, false))
+	    /* This hack along with a corresponding hack in
+	       rs6000_output_addr_const_extra arranges to output addends
+	       where the assembler expects to find them.  eg.
+	       (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
+	       without this hack would be output as "x@toc+4".  We
+	       want "x+4@toc".  */
+	    output_addr_const (file, CONST_CAST_RTX (tocrel_base));
+	  else
+	    output_addr_const (file, x);
+	}
+      return;
+
+    case '&':
+      assemble_name (file, rs6000_get_some_local_dynamic_name ());
+      return;
+
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+}
+
+/* Print the address of an operand.  */
+
+void
+print_operand_address (FILE *file, rtx x)
+{
+  if (REG_P (x))
+    fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
+  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
+	   || GET_CODE (x) == LABEL_REF)
+    {
+      output_addr_const (file, x);
+      if (small_data_operand (x, GET_MODE (x)))
+	fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		 reg_names[SMALL_DATA_REG]);
+      else
+	gcc_assert (!TARGET_TOC);
+    }
+  else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
+	   && REG_P (XEXP (x, 1)))
+    {
+      if (REGNO (XEXP (x, 0)) == 0)
+	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
+		 reg_names[ REGNO (XEXP (x, 0)) ]);
+      else
+	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
+		 reg_names[ REGNO (XEXP (x, 1)) ]);
+    }
+  else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
+	   && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
+	     INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
+#if TARGET_MACHO
+  else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
+	   && CONSTANT_P (XEXP (x, 1)))
+    {
+      fprintf (file, "lo16(");
+      output_addr_const (file, XEXP (x, 1));
+      fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
+    }
+#endif
+#if TARGET_ELF
+  else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
+	   && CONSTANT_P (XEXP (x, 1)))
+    {
+      output_addr_const (file, XEXP (x, 1));
+      fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
+    }
+#endif
+  else if (toc_relative_expr_p (x, false))
+    {
+      /* This hack along with a corresponding hack in
+	 rs6000_output_addr_const_extra arranges to output addends
+	 where the assembler expects to find them.  eg.
+	 (lo_sum (reg 9)
+	 .       (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
+	 without this hack would be output as "x@toc+8@l(9)".  We
+	 want "x+8@toc@l(9)".  */
+      output_addr_const (file, CONST_CAST_RTX (tocrel_base));
+      if (GET_CODE (x) == LO_SUM)
+	fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
+      else
+	fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+rs6000_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    switch (XINT (x, 1))
+      {
+      case UNSPEC_TOCREL:
+	gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
+			     && REG_P (XVECEXP (x, 0, 1))
+			     && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	if (x == tocrel_base && tocrel_offset != const0_rtx)
+	  {
+	    if (INTVAL (tocrel_offset) >= 0)
+	      fprintf (file, "+");
+	    output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
+	  }
+	if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
+	  {
+	    putc ('-', file);
+	    assemble_name (file, toc_label_name);
+	  }
+	else if (TARGET_ELF)
+	  fputs ("@toc", file);
+	return true;
+
+#if TARGET_MACHO
+      case UNSPEC_MACHOPIC_OFFSET:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	putc ('-', file);
+	machopic_output_function_base_name (file);
+	return true;
+#endif
+      }
+  return false;
+}
+
+/* Target hook for assembling integer objects.  The PowerPC version has
+   to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
+   is defined.  It also needs to handle DI-mode objects on 64-bit
+   targets.  */
+
+static bool
+rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+#ifdef RELOCATABLE_NEEDS_FIXUP
+  /* Special handling for SI values.  */
+  if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
+    {
+      static int recurse = 0;
+
+      /* For -mrelocatable, we mark all addresses that need to be fixed up in
+	 the .fixup section.  Since the TOC section is already relocated, we
+	 don't need to mark it here.  We used to skip the text section, but it
+	 should never be valid for relocated addresses to be placed in the text
+	 section.  */
+      if (TARGET_RELOCATABLE
+	  && in_section != toc_section
+	  && !recurse
+	  && GET_CODE (x) != CONST_INT
+	  && GET_CODE (x) != CONST_DOUBLE
+	  && CONSTANT_P (x))
+	{
+	  char buf[256];
+
+	  recurse = 1;
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
+	  fixuplabelno++;
+	  ASM_OUTPUT_LABEL (asm_out_file, buf);
+	  fprintf (asm_out_file, "\t.long\t(");
+	  output_addr_const (asm_out_file, x);
+	  fprintf (asm_out_file, ")@fixup\n");
+	  fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
+	  ASM_OUTPUT_ALIGN (asm_out_file, 2);
+	  fprintf (asm_out_file, "\t.long\t");
+	  assemble_name (asm_out_file, buf);
+	  fprintf (asm_out_file, "\n\t.previous\n");
+	  recurse = 0;
+	  return true;
+	}
+      /* Remove initial .'s to turn a -mcall-aixdesc function
+	 address into the address of the descriptor, not the function
+	 itself.  */
+      else if (GET_CODE (x) == SYMBOL_REF
+	       && XSTR (x, 0)[0] == '.'
+	       && DEFAULT_ABI == ABI_AIX)
+	{
+	  const char *name = XSTR (x, 0);
+	  while (*name == '.')
+	    name++;
+
+	  fprintf (asm_out_file, "\t.long\t%s\n", name);
+	  return true;
+	}
+    }
+#endif /* RELOCATABLE_NEEDS_FIXUP */
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
+/* Emit an assembler directive to set symbol visibility for DECL to
+   VISIBILITY_TYPE.  */
+
+static void
+rs6000_assemble_visibility (tree decl, int vis)
+{
+  if (TARGET_XCOFF)
+    return;
+
+  /* Functions need to have their entry point symbol visibility set as
+     well as their descriptor symbol visibility.  */
+  if (DEFAULT_ABI == ABI_AIX
+      && DOT_SYMBOLS
+      && TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      static const char * const visibility_types[] = {
+	NULL, "internal", "hidden", "protected"
+      };
+
+      const char *name, *type;
+
+      name = ((* targetm.strip_name_encoding)
+	      (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
+      type = visibility_types[vis];
+
+      fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
+      fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
+    }
+  else
+    default_assemble_visibility (decl, vis);
+}
+#endif
+
+enum rtx_code
+rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
+{
+  /* Reversal of FP compares takes care -- an ordered compare
+     becomes an unordered compare and vice versa.  */
+  if (mode == CCFPmode
+      && (!flag_finite_math_only
+	  || code == UNLT || code == UNLE || code == UNGT || code == UNGE
+	  || code == UNEQ || code == LTGT))
+    return reverse_condition_maybe_unordered (code);
+  else
+    return reverse_condition (code);
+}
+
+/* Generate a compare for CODE.  Return a brand-new rtx that
+   represents the result of the compare.  */
+
+static rtx
+rs6000_generate_compare (rtx cmp, enum machine_mode mode)
+{
+  enum machine_mode comp_mode;
+  rtx compare_result;
+  enum rtx_code code = GET_CODE (cmp);
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+
+  if (FLOAT_MODE_P (mode))
+    comp_mode = CCFPmode;
+  else if (code == GTU || code == LTU
+	   || code == GEU || code == LEU)
+    comp_mode = CCUNSmode;
+  else if ((code == EQ || code == NE)
+	   && unsigned_reg_p (op0)
+	   && (unsigned_reg_p (op1)
+	       || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
+    /* These are unsigned values, perhaps there will be a later
+       ordering compare that can be shared with this one.  */
+    comp_mode = CCUNSmode;
+  else
+    comp_mode = CCmode;
+
+  /* If we have an unsigned compare, make sure we don't have a signed value as
+     an immediate.  */
+  if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
+      && INTVAL (op1) < 0)
+    {
+      op0 = copy_rtx_if_shared (op0);
+      op1 = force_reg (GET_MODE (op0), op1);
+      cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
+    }
+
+  /* First, the compare.  */
+  compare_result = gen_reg_rtx (comp_mode);
+
+  /* E500 FP compare instructions on the GPRs.  Yuck!  */
+  if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
+      && FLOAT_MODE_P (mode))
+    {
+      rtx cmp, or_result, compare_result2;
+      enum machine_mode op_mode = GET_MODE (op0);
+      bool reverse_p;
+
+      if (op_mode == VOIDmode)
+	op_mode = GET_MODE (op1);
+
+      /* First reverse the condition codes that aren't directly supported.  */
+      switch (code)
+	{
+	  case NE:
+	  case UNLT:
+	  case UNLE:
+	  case UNGT:
+	  case UNGE:
+	    code = reverse_condition_maybe_unordered (code);
+	    reverse_p = true;
+	    break;
+
+	  case EQ:
+	  case LT:
+	  case LE:
+	  case GT:
+	  case GE:
+	    reverse_p = false;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	}
+
+      /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
+	 This explains the following mess.  */
+
+      switch (code)
+	{
+	case EQ:
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsfeq_gpr (compare_result, op0, op1)
+		: gen_cmpsfeq_gpr (compare_result, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdfeq_gpr (compare_result, op0, op1)
+		: gen_cmpdfeq_gpr (compare_result, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttfeq_gpr (compare_result, op0, op1)
+		: gen_cmptfeq_gpr (compare_result, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	case GT:
+	case GE:
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsfgt_gpr (compare_result, op0, op1)
+		: gen_cmpsfgt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdfgt_gpr (compare_result, op0, op1)
+		: gen_cmpdfgt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttfgt_gpr (compare_result, op0, op1)
+		: gen_cmptfgt_gpr (compare_result, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	case LT: 
+	case LE:
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsflt_gpr (compare_result, op0, op1)
+		: gen_cmpsflt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdflt_gpr (compare_result, op0, op1)
+		: gen_cmpdflt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttflt_gpr (compare_result, op0, op1)
+		: gen_cmptflt_gpr (compare_result, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+        default:
+          gcc_unreachable ();
+	}
+
+      /* Synthesize LE and GE from LT/GT || EQ.  */
+      if (code == LE || code == GE)
+	{
+	  emit_insn (cmp);
+
+	  compare_result2 = gen_reg_rtx (CCFPmode);
+
+	  /* Do the EQ.  */
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsfeq_gpr (compare_result2, op0, op1)
+		: gen_cmpsfeq_gpr (compare_result2, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdfeq_gpr (compare_result2, op0, op1)
+		: gen_cmpdfeq_gpr (compare_result2, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttfeq_gpr (compare_result2, op0, op1)
+		: gen_cmptfeq_gpr (compare_result2, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  emit_insn (cmp);
+
+	  /* OR them together.  */
+	  or_result = gen_reg_rtx (CCFPmode);
+	  cmp = gen_e500_cr_ior_compare (or_result, compare_result,
+					 compare_result2);
+	  compare_result = or_result;
+	}
+
+      code = reverse_p ? NE : EQ;
+
+      emit_insn (cmp);
+    }
+  else
+    {
+      /* Generate XLC-compatible TFmode compare as PARALLEL with extra
+	 CLOBBERs to match cmptf_internal2 pattern.  */
+      if (comp_mode == CCFPmode && TARGET_XL_COMPAT
+	  && GET_MODE (op0) == TFmode
+	  && !TARGET_IEEEQUAD
+	  && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
+	emit_insn (gen_rtx_PARALLEL (VOIDmode,
+	  gen_rtvec (10,
+		     gen_rtx_SET (VOIDmode,
+				  compare_result,
+				  gen_rtx_COMPARE (comp_mode, op0, op1)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
+      else if (GET_CODE (op1) == UNSPEC
+	       && XINT (op1, 1) == UNSPEC_SP_TEST)
+	{
+	  rtx op1b = XVECEXP (op1, 0, 0);
+	  comp_mode = CCEQmode;
+	  compare_result = gen_reg_rtx (CCEQmode);
+	  if (TARGET_64BIT)
+	    emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
+	  else
+	    emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
+	}
+      else
+	emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+				gen_rtx_COMPARE (comp_mode, op0, op1)));
+    }
+
+  /* Some kinds of FP comparisons need an OR operation;
+     under flag_finite_math_only we don't bother.  */
+  if (FLOAT_MODE_P (mode)
+      && !flag_finite_math_only
+      && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
+      && (code == LE || code == GE
+	  || code == UNEQ || code == LTGT
+	  || code == UNGT || code == UNLT))
+    {
+      enum rtx_code or1, or2;
+      rtx or1_rtx, or2_rtx, compare2_rtx;
+      rtx or_result = gen_reg_rtx (CCEQmode);
+
+      switch (code)
+	{
+	case LE: or1 = LT;  or2 = EQ;  break;
+	case GE: or1 = GT;  or2 = EQ;  break;
+	case UNEQ: or1 = UNORDERED;  or2 = EQ;  break;
+	case LTGT: or1 = LT;  or2 = GT;  break;
+	case UNGT: or1 = UNORDERED;  or2 = GT;  break;
+	case UNLT: or1 = UNORDERED;  or2 = LT;  break;
+	default:  gcc_unreachable ();
+	}
+      validate_condition_mode (or1, comp_mode);
+      validate_condition_mode (or2, comp_mode);
+      or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
+      or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
+      compare2_rtx = gen_rtx_COMPARE (CCEQmode,
+				      gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
+				      const_true_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
+
+      compare_result = or_result;
+      code = EQ;
+    }
+
+  validate_condition_mode (code, GET_MODE (compare_result));
+
+  return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
+}
+
+
+/* Emit the RTL for an sISEL pattern.  */
+
+void
+rs6000_emit_sISEL (enum machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
+{
+  rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
+}
+
+void
+rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
+{
+  rtx condition_rtx;
+  enum machine_mode op_mode;
+  enum rtx_code cond_code;
+  rtx result = operands[0];
+
+  if (TARGET_ISEL && (mode == SImode || mode == DImode))
+    {
+      rs6000_emit_sISEL (mode, operands);
+      return;
+    }
+
+  condition_rtx = rs6000_generate_compare (operands[1], mode);
+  cond_code = GET_CODE (condition_rtx);
+
+  if (FLOAT_MODE_P (mode)
+      && !TARGET_FPRS && TARGET_HARD_FLOAT)
+    {
+      rtx t;
+
+      PUT_MODE (condition_rtx, SImode);
+      t = XEXP (condition_rtx, 0);
+
+      gcc_assert (cond_code == NE || cond_code == EQ);
+
+      if (cond_code == NE)
+	emit_insn (gen_e500_flip_gt_bit (t, t));
+
+      emit_insn (gen_move_from_CR_gt_bit (result, t));
+      return;
+    }
+
+  if (cond_code == NE
+      || cond_code == GE || cond_code == LE
+      || cond_code == GEU || cond_code == LEU
+      || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
+    {
+      rtx not_result = gen_reg_rtx (CCEQmode);
+      rtx not_op, rev_cond_rtx;
+      enum machine_mode cc_mode;
+
+      cc_mode = GET_MODE (XEXP (condition_rtx, 0));
+
+      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
+				     SImode, XEXP (condition_rtx, 0), const0_rtx);
+      not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
+      condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
+    }
+
+  op_mode = GET_MODE (XEXP (operands[1], 0));
+  if (op_mode == VOIDmode)
+    op_mode = GET_MODE (XEXP (operands[1], 1));
+
+  if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
+    {
+      PUT_MODE (condition_rtx, DImode);
+      convert_move (result, condition_rtx, 0);
+    }
+  else
+    {
+      PUT_MODE (condition_rtx, SImode);
+      emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
+    }
+}
+
+/* Emit a branch of kind CODE to location LOC.  */
+
+void
+rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
+{
+  rtx condition_rtx, loc_ref;
+
+  condition_rtx = rs6000_generate_compare (operands[0], mode);
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
+						     loc_ref, pc_rtx)));
+}
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand template of the label, or NULL if the branch is really a
+   conditional return.
+
+   OP is the conditional expression.  XEXP (OP, 0) is assumed to be a
+   condition code register and its mode specifies what kind of
+   comparison we made.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   INSN is the insn.  */
+
+char *
+output_cbranch (rtx op, const char *label, int reversed, rtx insn)
+{
+  static char string[64];
+  enum rtx_code code = GET_CODE (op);
+  rtx cc_reg = XEXP (op, 0);
+  enum machine_mode mode = GET_MODE (cc_reg);
+  int cc_regno = REGNO (cc_reg) - CR0_REGNO;
+  int need_longbranch = label != NULL && get_attr_length (insn) == 8;
+  int really_reversed = reversed ^ need_longbranch;
+  char *s = string;
+  const char *ccode;
+  const char *pred;
+  rtx note;
+
+  validate_condition_mode (code, mode);
+
+  /* Work out which way this really branches.  We could use
+     reverse_condition_maybe_unordered here always but this
+     makes the resulting assembler clearer.  */
+  if (really_reversed)
+    {
+      /* Reversal of FP compares takes care -- an ordered compare
+	 becomes an unordered compare and vice versa.  */
+      if (mode == CCFPmode)
+	code = reverse_condition_maybe_unordered (code);
+      else
+	code = reverse_condition (code);
+    }
+
+  if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
+    {
+      /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
+	 to the GT bit.  */
+      switch (code)
+	{
+	case EQ:
+	  /* Opposite of GT.  */
+	  code = GT;
+	  break;
+
+	case NE:
+	  code = UNLE;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+      /* Not all of these are actually distinct opcodes, but
+	 we distinguish them for clarity of the resulting assembler.  */
+    case NE: case LTGT:
+      ccode = "ne"; break;
+    case EQ: case UNEQ:
+      ccode = "eq"; break;
+    case GE: case GEU:
+      ccode = "ge"; break;
+    case GT: case GTU: case UNGT:
+      ccode = "gt"; break;
+    case LE: case LEU:
+      ccode = "le"; break;
+    case LT: case LTU: case UNLT:
+      ccode = "lt"; break;
+    case UNORDERED: ccode = "un"; break;
+    case ORDERED: ccode = "nu"; break;
+    case UNGE: ccode = "nl"; break;
+    case UNLE: ccode = "ng"; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Maybe we have a guess as to how likely the branch is.  */
+  pred = "";
+  note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
+  if (note != NULL_RTX)
+    {
+      /* PROB is the difference from 50%.  */
+      int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
+
+      /* Only hint for highly probable/improbable branches on newer
+	 cpus as static prediction overrides processor dynamic
+	 prediction.  For older cpus we may as well always hint, but
+	 assume not taken for branches that are very close to 50% as a
+	 mispredicted taken branch is more expensive than a
+	 mispredicted not-taken branch.  */
+      if (rs6000_always_hint
+	  || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
+	      && br_prob_note_reliable_p (note)))
+	{
+	  if (abs (prob) > REG_BR_PROB_BASE / 20
+	      && ((prob > 0) ^ need_longbranch))
+	    pred = "+";
+	  else
+	    pred = "-";
+	}
+    }
+
+  if (label == NULL)
+    s += sprintf (s, "b%slr%s ", ccode, pred);
+  else
+    s += sprintf (s, "b%s%s ", ccode, pred);
+
+  /* We need to escape any '%' characters in the reg_names string.
+     Assume they'd only be the first character....  */
+  if (reg_names[cc_regno + CR0_REGNO][0] == '%')
+    *s++ = '%';
+  s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
+
+  if (label != NULL)
+    {
+      /* If the branch distance was too far, we may have to use an
+	 unconditional branch to go the distance.  */
+      if (need_longbranch)
+	s += sprintf (s, ",$+8\n\tb %s", label);
+      else
+	s += sprintf (s, ",%s", label);
+    }
+
+  return string;
+}
+
+/* Return the string to flip the GT bit on a CR.  */
+char *
+output_e500_flip_gt_bit (rtx dst, rtx src)
+{
+  static char string[64];
+  int a, b;
+
+  gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
+	      && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
+
+  /* GT bit.  */
+  a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
+  b = 4 * (REGNO (src) - CR0_REGNO) + 1;
+
+  sprintf (string, "crnot %d,%d", a, b);
+  return string;
+}
+
+/* Return insn for VSX or Altivec comparisons.  */
+
+static rtx
+rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx mask;
+  enum machine_mode mode = GET_MODE (op0);
+
+  switch (code)
+    {
+    default:
+      break;
+
+    case GE:
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	return NULL_RTX;
+
+    case EQ:
+    case GT:
+    case GTU:
+    case ORDERED:
+    case UNORDERED:
+    case UNEQ:
+    case LTGT:
+      mask = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      mask,
+			      gen_rtx_fmt_ee (code, mode, op0, op1)));
+      return mask;
+    }
+
+  return NULL_RTX;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+   DMODE is expected destination mode. This is a recursive function.  */
+
+static rtx
+rs6000_emit_vector_compare (enum rtx_code rcode,
+			    rtx op0, rtx op1,
+			    enum machine_mode dmode)
+{
+  rtx mask;
+  bool swap_operands = false;
+  bool try_again = false;
+
+  gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
+  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+  /* See if the comparison works as is.  */
+  mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
+  if (mask)
+    return mask;
+
+  switch (rcode)
+    {
+    case LT:
+      rcode = GT;
+      swap_operands = true;
+      try_again = true;
+      break;
+    case LTU:
+      rcode = GTU;
+      swap_operands = true;
+      try_again = true;
+      break;
+    case NE:
+    case UNLE:
+    case UNLT:
+    case UNGE:
+    case UNGT:
+      /* Invert condition and try again.
+	 e.g., A != B becomes ~(A==B).  */
+      {
+	enum rtx_code rev_code;
+	enum insn_code nor_code;
+	rtx mask2;
+
+	rev_code = reverse_condition_maybe_unordered (rcode);
+	if (rev_code == UNKNOWN)
+	  return NULL_RTX;
+
+	nor_code = optab_handler (one_cmpl_optab, dmode);
+	if (nor_code == CODE_FOR_nothing)
+	  return NULL_RTX;
+
+	mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
+	if (!mask2)
+	  return NULL_RTX;
+
+	mask = gen_reg_rtx (dmode);
+	emit_insn (GEN_FCN (nor_code) (mask, mask2));
+	return mask;
+      }
+      break;
+    case GE:
+    case GEU:
+    case LE:
+    case LEU:
+      /* Try GT/GTU/LT/LTU OR EQ */
+      {
+	rtx c_rtx, eq_rtx;
+	enum insn_code ior_code;
+	enum rtx_code new_code;
+
+	switch (rcode)
+	  {
+	  case  GE:
+	    new_code = GT;
+	    break;
+
+	  case GEU:
+	    new_code = GTU;
+	    break;
+
+	  case LE:
+	    new_code = LT;
+	    break;
+
+	  case LEU:
+	    new_code = LTU;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	ior_code = optab_handler (ior_optab, dmode);
+	if (ior_code == CODE_FOR_nothing)
+	  return NULL_RTX;
+
+	c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
+	if (!c_rtx)
+	  return NULL_RTX;
+
+	eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
+	if (!eq_rtx)
+	  return NULL_RTX;
+
+	mask = gen_reg_rtx (dmode);
+	emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+	return mask;
+      }
+      break;
+    default:
+      return NULL_RTX;
+    }
+
+  if (try_again)
+    {
+      if (swap_operands)
+	{
+	  rtx tmp;
+	  tmp = op0;
+	  op0 = op1;
+	  op1 = tmp;
+	}
+
+      mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
+      if (mask)
+	return mask;
+    }
+
+  /* You only get two chances.  */
+  return NULL_RTX;
+}
+
+/* Emit vector conditional expression.  DEST is destination. OP_TRUE and
+   OP_FALSE are two VEC_COND_EXPR operands.  CC_OP0 and CC_OP1 are the two
+   operands for the relation operation COND.  */
+
+int
+rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
+			      rtx cond, rtx cc_op0, rtx cc_op1)
+{
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum machine_mode mask_mode = GET_MODE (cc_op0);
+  enum rtx_code rcode = GET_CODE (cond);
+  enum machine_mode cc_mode = CCmode;
+  rtx mask;
+  rtx cond2;
+  rtx tmp;
+  bool invert_move = false;
+
+  if (VECTOR_UNIT_NONE_P (dest_mode))
+    return 0;
+
+  gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
+	      && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
+
+  switch (rcode)
+    {
+      /* Swap operands if we can, and fall back to doing the operation as
+	 specified, and doing a NOR to invert the test.  */
+    case NE:
+    case UNLE:
+    case UNLT:
+    case UNGE:
+    case UNGT:
+      /* Invert condition and try again.
+	 e.g., A  = (B != C) ? D : E becomes A = (B == C) ? E : D.  */
+      invert_move = true;
+      rcode = reverse_condition_maybe_unordered (rcode);
+      if (rcode == UNKNOWN)
+	return 0;
+      break;
+
+      /* Mark unsigned tests with CCUNSmode.  */
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      cc_mode = CCUNSmode;
+      break;
+
+    default:
+      break;
+    }
+
+  /* Get the vector mask for the given relational operations.  */
+  mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
+
+  if (!mask)
+    return 0;
+
+  if (invert_move)
+    {
+      tmp = op_true;
+      op_true = op_false;
+      op_false = tmp;
+    }
+
+  cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
+			  CONST0_RTX (dest_mode));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  dest,
+			  gen_rtx_IF_THEN_ELSE (dest_mode,
+						cond2,
+						op_true,
+						op_false)));
+  return 1;
+}
+
+/* Emit a conditional move: move TRUE_COND to DEST if OP of the
+   operands of the last comparison is nonzero/true, FALSE_COND if it
+   is zero/false.  Return 0 if the hardware has no such operation.  */
+
+int
+rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
+{
+  enum rtx_code code = GET_CODE (op);
+  rtx op0 = XEXP (op, 0);
+  rtx op1 = XEXP (op, 1);
+  REAL_VALUE_TYPE c1;
+  enum machine_mode compare_mode = GET_MODE (op0);
+  enum machine_mode result_mode = GET_MODE (dest);
+  rtx temp;
+  bool is_against_zero;
+
+  /* These modes should always match.  */
+  if (GET_MODE (op1) != compare_mode
+      /* In the isel case however, we can use a compare immediate, so
+	 op1 may be a small constant.  */
+      && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
+    return 0;
+  if (GET_MODE (true_cond) != result_mode)
+    return 0;
+  if (GET_MODE (false_cond) != result_mode)
+    return 0;
+
+  /* Don't allow using floating point comparisons for integer results for
+     now.  */
+  if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
+    return 0;
+
+  /* First, work out if the hardware can do this at all, or
+     if it's too slow....  */
+  if (!FLOAT_MODE_P (compare_mode))
+    {
+      if (TARGET_ISEL)
+	return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
+      return 0;
+    }
+  else if (TARGET_HARD_FLOAT && !TARGET_FPRS
+	   && SCALAR_FLOAT_MODE_P (compare_mode))
+    return 0;
+
+  is_against_zero = op1 == CONST0_RTX (compare_mode);
+
+  /* A floating-point subtract might overflow, underflow, or produce
+     an inexact result, thus changing the floating-point flags, so it
+     can't be generated if we care about that.  It's safe if one side
+     of the construct is zero, since then no subtract will be
+     generated.  */
+  if (SCALAR_FLOAT_MODE_P (compare_mode)
+      && flag_trapping_math && ! is_against_zero)
+    return 0;
+
+  /* Eliminate half of the comparisons by switching operands, this
+     makes the remaining code simpler.  */
+  if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
+      || code == LTGT || code == LT || code == UNLE)
+    {
+      code = reverse_condition_maybe_unordered (code);
+      temp = true_cond;
+      true_cond = false_cond;
+      false_cond = temp;
+    }
+
+  /* UNEQ and LTGT take four instructions for a comparison with zero,
+     it'll probably be faster to use a branch here too.  */
+  if (code == UNEQ && HONOR_NANS (compare_mode))
+    return 0;
+
+  if (GET_CODE (op1) == CONST_DOUBLE)
+    REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
+
+  /* We're going to try to implement comparisons by performing
+     a subtract, then comparing against zero.  Unfortunately,
+     Inf - Inf is NaN which is not zero, and so if we don't
+     know that the operand is finite and the comparison
+     would treat EQ different to UNORDERED, we can't do it.  */
+  if (HONOR_INFINITIES (compare_mode)
+      && code != GT && code != UNGE
+      && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
+      /* Constructs of the form (a OP b ? a : b) are safe.  */
+      && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
+	  || (! rtx_equal_p (op0, true_cond)
+	      && ! rtx_equal_p (op1, true_cond))))
+    return 0;
+
+  /* At this point we know we can use fsel.  */
+
+  /* Reduce the comparison to a comparison against zero.  */
+  if (! is_against_zero)
+    {
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_MINUS (compare_mode, op0, op1)));
+      op0 = temp;
+      op1 = CONST0_RTX (compare_mode);
+    }
+
+  /* If we don't care about NaNs we can reduce some of the comparisons
+     down to faster ones.  */
+  if (! HONOR_NANS (compare_mode))
+    switch (code)
+      {
+      case GT:
+	code = LE;
+	temp = true_cond;
+	true_cond = false_cond;
+	false_cond = temp;
+	break;
+      case UNGE:
+	code = GE;
+	break;
+      case UNEQ:
+	code = EQ;
+	break;
+      default:
+	break;
+      }
+
+  /* Now, reduce everything down to a GE.  */
+  switch (code)
+    {
+    case GE:
+      break;
+
+    case LE:
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    case ORDERED:
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    case EQ:
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_NEG (compare_mode,
+					   gen_rtx_ABS (compare_mode, op0))));
+      op0 = temp;
+      break;
+
+    case UNGE:
+      /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
+      temp = gen_reg_rtx (result_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_IF_THEN_ELSE (result_mode,
+						    gen_rtx_GE (VOIDmode,
+								op0, op1),
+						    true_cond, false_cond)));
+      false_cond = true_cond;
+      true_cond = temp;
+
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    case GT:
+      /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
+      temp = gen_reg_rtx (result_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_IF_THEN_ELSE (result_mode,
+						    gen_rtx_GE (VOIDmode,
+								op0, op1),
+						    true_cond, false_cond)));
+      true_cond = false_cond;
+      false_cond = temp;
+
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest,
+			  gen_rtx_IF_THEN_ELSE (result_mode,
+						gen_rtx_GE (VOIDmode,
+							    op0, op1),
+						true_cond, false_cond)));
+  return 1;
+}
+
+/* Same as above, but for ints (isel).  */
+
+static int
+rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
+{
+  rtx condition_rtx, cr;
+  enum machine_mode mode = GET_MODE (dest);
+  enum rtx_code cond_code;
+  rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
+  bool signedp;
+
+  if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
+    return 0;
+
+  /* We still have to do the compare, because isel doesn't do a
+     compare, it just looks at the CRx bits set by a previous compare
+     instruction.  */
+  condition_rtx = rs6000_generate_compare (op, mode);
+  cond_code = GET_CODE (condition_rtx);
+  cr = XEXP (condition_rtx, 0);
+  signedp = GET_MODE (cr) == CCmode;
+
+  isel_func = (mode == SImode
+	       ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
+	       : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
+
+  switch (cond_code)
+    {
+    case LT: case GT: case LTU: case GTU: case EQ:
+      /* isel handles these directly.  */
+      break;
+
+    default:
+      /* We need to swap the sense of the comparison.  */
+      {
+	rtx t = true_cond;
+	true_cond = false_cond;
+	false_cond = t;
+	PUT_CODE (condition_rtx, reverse_condition (cond_code));
+      }
+      break;
+    }
+
+  false_cond = force_reg (mode, false_cond);
+  if (true_cond != const0_rtx)
+    true_cond = force_reg (mode, true_cond);
+
+  emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
+
+  return 1;
+}
+
+const char *
+output_isel (rtx *operands)
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
+    {
+      gcc_assert (GET_CODE (operands[2]) == REG
+		  && GET_CODE (operands[3]) == REG);
+      PUT_CODE (operands[1], reverse_condition (code));
+      return "isel %0,%3,%2,%j1";
+    }
+
+  return "isel %0,%2,%3,%j1";
+}
+
+void
+rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  enum rtx_code c;
+  rtx target;
+
+  /* VSX/altivec have direct min/max insns.  */
+  if ((code == SMAX || code == SMIN)
+      && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
+	  || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      dest,
+			      gen_rtx_fmt_ee (code, mode, op0, op1)));
+      return;
+    }
+
+  if (code == SMAX || code == SMIN)
+    c = GE;
+  else
+    c = GEU;
+
+  if (code == SMAX || code == UMAX)
+    target = emit_conditional_move (dest, c, op0, op1, mode,
+				    op0, op1, mode, 0);
+  else
+    target = emit_conditional_move (dest, c, op0, op1, mode,
+				    op1, op0, mode, 0);
+  gcc_assert (target);
+  if (target != dest)
+    emit_move_insn (dest, target);
+}
+
+/* A subroutine of the atomic operation splitters.  Jump to LABEL if
+   COND is true.  Mark the jump as unlikely to be taken.  */
+
+static void
+emit_unlikely_jump (rtx cond, rtx label)
+{
+  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+  rtx x;
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
+  x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
+  add_int_reg_note (x, REG_BR_PROB, very_unlikely);
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a load-locked
+   instruction in MODE.  For QI/HImode, possibly use a pattern than includes
+   the zero_extend operation.  */
+
+static void
+emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
+{
+  rtx (*fn) (rtx, rtx) = NULL;
+
+  switch (mode)
+    {
+    case QImode:
+      fn = gen_load_lockedqi;
+      break;
+    case HImode:
+      fn = gen_load_lockedhi;
+      break;
+    case SImode:
+      if (GET_MODE (mem) == QImode)
+	fn = gen_load_lockedqi_si;
+      else if (GET_MODE (mem) == HImode)
+	fn = gen_load_lockedhi_si;
+      else
+	fn = gen_load_lockedsi;
+      break;
+    case DImode:
+      fn = gen_load_lockeddi;
+      break;
+    case TImode:
+      fn = gen_load_lockedti;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  emit_insn (fn (reg, mem));
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a store-conditional
+   instruction in MODE.  */
+
+static void
+emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
+{
+  rtx (*fn) (rtx, rtx, rtx) = NULL;
+
+  switch (mode)
+    {
+    case QImode:
+      fn = gen_store_conditionalqi;
+      break;
+    case HImode:
+      fn = gen_store_conditionalhi;
+      break;
+    case SImode:
+      fn = gen_store_conditionalsi;
+      break;
+    case DImode:
+      fn = gen_store_conditionaldi;
+      break;
+    case TImode:
+      fn = gen_store_conditionalti;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Emit sync before stwcx. to address PPC405 Erratum.  */
+  if (PPC405_ERRATUM77)
+    emit_insn (gen_hwsync ());
+
+  emit_insn (fn (res, mem, val));
+}
+
+/* Expand barriers before and after a load_locked/store_cond sequence.  */
+
+static rtx
+rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
+{
+  rtx addr = XEXP (mem, 0);
+  int strict_p = (reload_in_progress || reload_completed);
+
+  if (!legitimate_indirect_address_p (addr, strict_p)
+      && !legitimate_indexed_address_p (addr, strict_p))
+    {
+      addr = force_reg (Pmode, addr);
+      mem = replace_equiv_address_nv (mem, addr);
+    }
+
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+      break;
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+      emit_insn (gen_lwsync ());
+      break;
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_hwsync ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return mem;
+}
+
+static void
+rs6000_post_atomic_barrier (enum memmodel model)
+{
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_RELEASE:
+      break;
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_isync ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of the various atomic expanders.  For sub-word operations,
+   we must adjust things to operate on SImode.  Given the original MEM,
+   return a new aligned memory.  Also build and return the quantities by
+   which to shift and mask.  */
+
+static rtx
+rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
+{
+  rtx addr, align, shift, mask, mem;
+  HOST_WIDE_INT shift_mask;
+  enum machine_mode mode = GET_MODE (orig_mem);
+
+  /* For smaller modes, we have to implement this via SImode.  */
+  shift_mask = (mode == QImode ? 0x18 : 0x10);
+
+  addr = XEXP (orig_mem, 0);
+  addr = force_reg (GET_MODE (addr), addr);
+
+  /* Aligned memory containing subword.  Generate a new memory.  We
+     do not want any of the existing MEM_ATTR data, as we're now
+     accessing memory outside the original object.  */
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
+			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  mem = gen_rtx_MEM (SImode, align);
+  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
+  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
+    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
+
+  /* Shift amount for subword relative to aligned word.  */
+  shift = gen_reg_rtx (SImode);
+  addr = gen_lowpart (SImode, addr);
+  emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
+  if (WORDS_BIG_ENDIAN)
+    shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
+			         shift, 1, OPTAB_LIB_WIDEN);
+  *pshift = shift;
+
+  /* Mask for insertion.  */
+  mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
+			      shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  *pmask = mask;
+
+  return mem;
+}
+
+/* A subroutine of the various atomic expanders.  For sub-word operands,
+   combine OLDVAL and NEWVAL via MASK.  Returns a new pseduo.  */
+
+static rtx
+rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
+{
+  rtx x;
+
+  x = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (VOIDmode, x,
+			  gen_rtx_AND (SImode,
+				       gen_rtx_NOT (SImode, mask),
+				       oldval)));
+
+  x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
+
+  return x;
+}
+
+/* A subroutine of the various atomic expanders.  For sub-word operands,
+   extract WIDE to NARROW via SHIFT.  */
+
+static void
+rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
+{
+  wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
+			      wide, 1, OPTAB_LIB_WIDEN);
+  emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
+}
+
+/* Expand an atomic compare and swap operation.  */
+
+void
+rs6000_expand_atomic_compare_and_swap (rtx operands[])
+{
+  rtx boolval, retval, mem, oldval, newval, cond;
+  rtx label1, label2, x, mask, shift;
+  enum machine_mode mode, orig_mode;
+  enum memmodel mod_s, mod_f;
+  bool is_weak;
+
+  boolval = operands[0];
+  retval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = (INTVAL (operands[5]) != 0);
+  mod_s = (enum memmodel) INTVAL (operands[6]);
+  mod_f = (enum memmodel) INTVAL (operands[7]);
+  orig_mode = mode = GET_MODE (mem);
+
+  mask = shift = NULL_RTX;
+  if (mode == QImode || mode == HImode)
+    {
+      /* Before power8, we didn't have access to lbarx/lharx, so generate a
+	 lwarx and shift/mask operations.  With power8, we need to do the
+	 comparison in SImode, but the store is still done in QI/HImode.  */
+      oldval = convert_modes (SImode, mode, oldval, 1);
+
+      if (!TARGET_SYNC_HI_QI)
+	{
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+	  /* Shift and mask OLDVAL into position with the word.  */
+	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+	  /* Shift and mask NEWVAL into position within the word.  */
+	  newval = convert_modes (SImode, mode, newval, 1);
+	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	}
+
+      /* Prepare to adjust the return value.  */
+      retval = gen_reg_rtx (SImode);
+      mode = SImode;
+    }
+  else if (reg_overlap_mentioned_p (retval, oldval))
+    oldval = copy_to_reg (oldval);
+
+  mem = rs6000_pre_atomic_barrier (mem, mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+      emit_label (XEXP (label1, 0));
+    }
+  label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+
+  emit_load_locked (mode, retval, mem);
+
+  x = retval;
+  if (mask)
+    {
+      x = expand_simple_binop (SImode, AND, retval, mask,
+			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+    }
+
+  cond = gen_reg_rtx (CCmode);
+  /* If we have TImode, synthesize a comparison.  */
+  if (mode != TImode)
+    x = gen_rtx_COMPARE (CCmode, x, oldval);
+  else
+    {
+      rtx xor1_result = gen_reg_rtx (DImode);
+      rtx xor2_result = gen_reg_rtx (DImode);
+      rtx or_result = gen_reg_rtx (DImode);
+      rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+      rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+      rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+      rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+      emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+      emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+      emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+      x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label2);
+
+  x = newval;
+  if (mask)
+    x = rs6000_mask_atomic_subword (retval, newval, mask);
+
+  emit_store_conditional (orig_mode, cond, mem, x);
+
+  if (!is_weak)
+    {
+      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+      emit_unlikely_jump (x, label1);
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (XEXP (label2, 0));
+
+  rs6000_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (XEXP (label2, 0));
+
+  if (shift)
+    rs6000_finish_atomic_subword (operands[1], retval, shift);
+  else if (mode != GET_MODE (operands[1]))
+    convert_move (operands[1], retval, 1);
+
+  /* In all cases, CR0 contains EQ on success, and NE on failure.  */
+  x = gen_rtx_EQ (SImode, cond, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
+}
+
+/* Expand an atomic exchange operation.  */
+
+void
+rs6000_expand_atomic_exchange (rtx operands[])
+{
+  rtx retval, mem, val, cond;
+  enum machine_mode mode;
+  enum memmodel model;
+  rtx label, x, mask, shift;
+
+  retval = operands[0];
+  mem = operands[1];
+  val = operands[2];
+  model = (enum memmodel) INTVAL (operands[3]);
+  mode = GET_MODE (mem);
+
+  mask = shift = NULL_RTX;
+  if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
+    {
+      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+      /* Shift and mask VAL into position with the word.  */
+      val = convert_modes (SImode, mode, val, 1);
+      val = expand_simple_binop (SImode, ASHIFT, val, shift,
+				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+      /* Prepare to adjust the return value.  */
+      retval = gen_reg_rtx (SImode);
+      mode = SImode;
+    }
+
+  mem = rs6000_pre_atomic_barrier (mem, model);
+
+  label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_load_locked (mode, retval, mem);
+
+  x = val;
+  if (mask)
+    x = rs6000_mask_atomic_subword (retval, val, mask);
+
+  cond = gen_reg_rtx (CCmode);
+  emit_store_conditional (mode, cond, mem, x);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  rs6000_post_atomic_barrier (model);
+
+  if (shift)
+    rs6000_finish_atomic_subword (operands[0], retval, shift);
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  MODEL_RTX
+   is a CONST_INT containing the memory model to use.  */
+
+void
+rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+			 rtx orig_before, rtx orig_after, rtx model_rtx)
+{
+  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
+  enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode store_mode = mode;
+  rtx label, x, cond, mask, shift;
+  rtx before = orig_before, after = orig_after;
+
+  mask = shift = NULL_RTX;
+  /* On power8, we want to use SImode for the operation.  On previous systems,
+     use the operation in a subword and shift/mask to get the proper byte or
+     halfword.  */
+  if (mode == QImode || mode == HImode)
+    {
+      if (TARGET_SYNC_HI_QI)
+	{
+	  val = convert_modes (SImode, mode, val, 1);
+
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  mode = SImode;
+	}
+      else
+	{
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+	  /* Shift and mask VAL into position with the word.  */
+	  val = convert_modes (SImode, mode, val, 1);
+	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+	  switch (code)
+	    {
+	    case IOR:
+	    case XOR:
+	      /* We've already zero-extended VAL.  That is sufficient to
+		 make certain that it does not affect other bits.  */
+	      mask = NULL;
+	      break;
+
+	    case AND:
+	      /* If we make certain that all of the other bits in VAL are
+		 set, that will be sufficient to not affect other bits.  */
+	      x = gen_rtx_NOT (SImode, mask);
+	      x = gen_rtx_IOR (SImode, x, val);
+	      emit_insn (gen_rtx_SET (VOIDmode, val, x));
+	      mask = NULL;
+	      break;
+
+	    case NOT:
+	    case PLUS:
+	    case MINUS:
+	      /* These will all affect bits outside the field and need
+		 adjustment via MASK within the loop.  */
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  store_mode = mode = SImode;
+	}
+    }
+
+  mem = rs6000_pre_atomic_barrier (mem, model);
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+  if (before == NULL_RTX)
+    before = gen_reg_rtx (mode);
+
+  emit_load_locked (mode, before, mem);
+
+  if (code == NOT)
+    {
+      x = expand_simple_binop (mode, AND, before, val,
+			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      after = expand_simple_unop (mode, NOT, x, after, 1);
+    }
+  else
+    {
+      after = expand_simple_binop (mode, code, before, val,
+				   after, 1, OPTAB_LIB_WIDEN);
+    }
+
+  x = after;
+  if (mask)
+    {
+      x = expand_simple_binop (SImode, AND, after, mask,
+			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      x = rs6000_mask_atomic_subword (before, x, mask);
+    }
+  else if (store_mode != mode)
+    x = convert_modes (store_mode, mode, x, 1);
+
+  cond = gen_reg_rtx (CCmode);
+  emit_store_conditional (store_mode, cond, mem, x);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  rs6000_post_atomic_barrier (model);
+
+  if (shift)
+    {
+      /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+	 then do the calcuations in a SImode register.  */
+      if (orig_before)
+	rs6000_finish_atomic_subword (orig_before, before, shift);
+      if (orig_after)
+	rs6000_finish_atomic_subword (orig_after, after, shift);
+    }
+  else if (store_mode != mode)
+    {
+      /* QImode/HImode on machines with lbarx/lharx where we do the native
+	 operation and then do the calcuations in a SImode register.  */
+      if (orig_before)
+	convert_move (orig_before, before, 1);
+      if (orig_after)
+	convert_move (orig_after, after, 1);
+    }
+  else if (orig_after && after != orig_after)
+    emit_move_insn (orig_after, after);
+}
+
+/* Emit instructions to move SRC to DST.  Called by splitters for
+   multi-register moves.  It will emit at most one instruction for
+   each register that is accessed; that is, it won't emit li/lis pairs
+   (or equivalent for 64-bit code).  One of SRC or DST must be a hard
+   register.  */
+
+void
+rs6000_split_multireg_move (rtx dst, rtx src)
+{
+  /* The register number of the first register being moved.  */
+  int reg;
+  /* The mode that is to be moved.  */
+  enum machine_mode mode;
+  /* The mode that the move is being done in, and its size.  */
+  enum machine_mode reg_mode;
+  int reg_mode_size;
+  /* The number of registers that will be moved.  */
+  int nregs;
+
+  reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
+  mode = GET_MODE (dst);
+  nregs = hard_regno_nregs[reg][mode];
+  if (FP_REGNO_P (reg))
+    reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : 
+	((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
+  else if (ALTIVEC_REGNO_P (reg))
+    reg_mode = V16QImode;
+  else if (TARGET_E500_DOUBLE && mode == TFmode)
+    reg_mode = DFmode;
+  else
+    reg_mode = word_mode;
+  reg_mode_size = GET_MODE_SIZE (reg_mode);
+
+  gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
+
+  /* TDmode residing in FP registers is special, since the ISA requires that
+     the lower-numbered word of a register pair is always the most significant
+     word, even in little-endian mode.  This does not match the usual subreg
+     semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
+     the appropriate constituent registers "by hand" in little-endian mode.
+
+     Note we do not need to check for destructive overlap here since TDmode
+     can only reside in even/odd register pairs.  */
+  if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
+    {
+      rtx p_src, p_dst;
+      int i;
+
+      for (i = 0; i < nregs; i++)
+	{
+	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
+	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
+	  else
+	    p_src = simplify_gen_subreg (reg_mode, src, mode,
+					 i * reg_mode_size);
+
+	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
+	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
+	  else
+	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
+					 i * reg_mode_size);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
+	}
+
+      return;
+    }
+
+  if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
+    {
+      /* Move register range backwards, if we might have destructive
+	 overlap.  */
+      int i;
+      for (i = nregs - 1; i >= 0; i--)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				simplify_gen_subreg (reg_mode, dst, mode,
+						     i * reg_mode_size),
+				simplify_gen_subreg (reg_mode, src, mode,
+						     i * reg_mode_size)));
+    }
+  else
+    {
+      int i;
+      int j = -1;
+      bool used_update = false;
+      rtx restore_basereg = NULL_RTX;
+
+      if (MEM_P (src) && INT_REGNO_P (reg))
+	{
+	  rtx breg;
+
+	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
+	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
+	    {
+	      rtx delta_rtx;
+	      breg = XEXP (XEXP (src, 0), 0);
+	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
+			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
+			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
+	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
+	      src = replace_equiv_address (src, breg);
+	    }
+	  else if (! rs6000_offsettable_memref_p (src, reg_mode))
+	    {
+	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
+		{
+		  rtx basereg = XEXP (XEXP (src, 0), 0);
+		  if (TARGET_UPDATE)
+		    {
+		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
+		      emit_insn (gen_rtx_SET (VOIDmode, ndst,
+				 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
+		      used_update = true;
+		    }
+		  else
+		    emit_insn (gen_rtx_SET (VOIDmode, basereg,
+			       XEXP (XEXP (src, 0), 1)));
+		  src = replace_equiv_address (src, basereg);
+		}
+	      else
+		{
+		  rtx basereg = gen_rtx_REG (Pmode, reg);
+		  emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
+		  src = replace_equiv_address (src, basereg);
+		}
+	    }
+
+	  breg = XEXP (src, 0);
+	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
+	    breg = XEXP (breg, 0);
+
+	  /* If the base register we are using to address memory is
+	     also a destination reg, then change that register last.  */
+	  if (REG_P (breg)
+	      && REGNO (breg) >= REGNO (dst)
+	      && REGNO (breg) < REGNO (dst) + nregs)
+	    j = REGNO (breg) - REGNO (dst);
+	}
+      else if (MEM_P (dst) && INT_REGNO_P (reg))
+	{
+	  rtx breg;
+
+	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
+	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+	    {
+	      rtx delta_rtx;
+	      breg = XEXP (XEXP (dst, 0), 0);
+	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
+			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
+			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
+
+	      /* We have to update the breg before doing the store.
+		 Use store with update, if available.  */
+
+	      if (TARGET_UPDATE)
+		{
+		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
+		  emit_insn (TARGET_32BIT
+			     ? (TARGET_POWERPC64
+				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
+				: gen_movsi_update (breg, breg, delta_rtx, nsrc))
+			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
+		  used_update = true;
+		}
+	      else
+		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
+	      dst = replace_equiv_address (dst, breg);
+	    }
+	  else if (!rs6000_offsettable_memref_p (dst, reg_mode)
+		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
+	    {
+	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
+		{
+		  rtx basereg = XEXP (XEXP (dst, 0), 0);
+		  if (TARGET_UPDATE)
+		    {
+		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
+		      emit_insn (gen_rtx_SET (VOIDmode,
+				 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
+		      used_update = true;
+		    }
+		  else
+		    emit_insn (gen_rtx_SET (VOIDmode, basereg,
+			       XEXP (XEXP (dst, 0), 1)));
+		  dst = replace_equiv_address (dst, basereg);
+		}
+	      else
+		{
+		  rtx basereg = XEXP (XEXP (dst, 0), 0);
+		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
+		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
+			      && REG_P (basereg)
+			      && REG_P (offsetreg)
+			      && REGNO (basereg) != REGNO (offsetreg));
+		  if (REGNO (basereg) == 0)
+		    {
+		      rtx tmp = offsetreg;
+		      offsetreg = basereg;
+		      basereg = tmp;
+		    }
+		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
+		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
+		  dst = replace_equiv_address (dst, basereg);
+		}
+	    }
+	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
+	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
+	}
+
+      for (i = 0; i < nregs; i++)
+	{
+	  /* Calculate index to next subword.  */
+	  ++j;
+	  if (j == nregs)
+	    j = 0;
+
+	  /* If compiler already emitted move of first word by
+	     store with update, no need to do anything.  */
+	  if (j == 0 && used_update)
+	    continue;
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  simplify_gen_subreg (reg_mode, dst, mode,
+						       j * reg_mode_size),
+				  simplify_gen_subreg (reg_mode, src, mode,
+						       j * reg_mode_size)));
+	}
+      if (restore_basereg != NULL_RTX)
+	emit_insn (restore_basereg);
+    }
+}
+
+
+/* This page contains routines that are used to determine what the
+   function prologue and epilogue code will do and write them out.  */
+
+static inline bool
+save_reg_p (int r)
+{
+  return !call_used_regs[r] && df_regs_ever_live_p (r);
+}
+
+/* Return the first fixed-point register that is required to be
+   saved. 32 if none.  */
+
+int
+first_reg_to_save (void)
+{
+  int first_reg;
+
+  /* Find lowest numbered live register.  */
+  for (first_reg = 13; first_reg <= 31; first_reg++)
+    if (save_reg_p (first_reg))
+      break;
+
+  if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
+      && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
+	  || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
+	  || (TARGET_TOC && TARGET_MINIMAL_TOC))
+      && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
+    first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
+
+#if TARGET_MACHO
+  if (flag_pic
+      && crtl->uses_pic_offset_table
+      && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
+    return RS6000_PIC_OFFSET_TABLE_REGNUM;
+#endif
+
+  return first_reg;
+}
+
+/* Similar, for FP regs.  */
+
+int
+first_fp_reg_to_save (void)
+{
+  int first_reg;
+
+  /* Find lowest numbered live register.  */
+  for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
+    if (save_reg_p (first_reg))
+      break;
+
+  return first_reg;
+}
+
+/* Similar, for AltiVec regs.  */
+
+static int
+first_altivec_reg_to_save (void)
+{
+  int i;
+
+  /* Stack frame remains as is unless we are in AltiVec ABI.  */
+  if (! TARGET_ALTIVEC_ABI)
+    return LAST_ALTIVEC_REGNO + 1;
+
+  /* On Darwin, the unwind routines are compiled without
+     TARGET_ALTIVEC, and use save_world to save/restore the
+     altivec registers when necessary.  */
+  if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
+      && ! TARGET_ALTIVEC)
+    return FIRST_ALTIVEC_REGNO + 20;
+
+  /* Find lowest numbered live register.  */
+  for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
+    if (save_reg_p (i))
+      break;
+
+  return i;
+}
+
+/* Return a 32-bit mask of the AltiVec registers we need to set in
+   VRSAVE.  Bit n of the return value is 1 if Vn is live.  The MSB in
+   the 32-bit word is 0.  */
+
+static unsigned int
+compute_vrsave_mask (void)
+{
+  unsigned int i, mask = 0;
+
+  /* On Darwin, the unwind routines are compiled without
+     TARGET_ALTIVEC, and use save_world to save/restore the
+     call-saved altivec registers when necessary.  */
+  if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
+      && ! TARGET_ALTIVEC)
+    mask |= 0xFFF;
+
+  /* First, find out if we use _any_ altivec registers.  */
+  for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
+    if (df_regs_ever_live_p (i))
+      mask |= ALTIVEC_REG_BIT (i);
+
+  if (mask == 0)
+    return mask;
+
+  /* Next, remove the argument registers from the set.  These must
+     be in the VRSAVE mask set by the caller, so we don't need to add
+     them in again.  More importantly, the mask we compute here is
+     used to generate CLOBBERs in the set_vrsave insn, and we do not
+     wish the argument registers to die.  */
+  for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
+    mask &= ~ALTIVEC_REG_BIT (i);
+
+  /* Similarly, remove the return value from the set.  */
+  {
+    bool yes = false;
+    diddle_return_value (is_altivec_return_reg, &yes);
+    if (yes)
+      mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
+  }
+
+  return mask;
+}
+
+/* For a very restricted set of circumstances, we can cut down the
+   size of prologues/epilogues by calling our own save/restore-the-world
+   routines.  */
+
+static void
+compute_save_world_info (rs6000_stack_t *info_ptr)
+{
+  info_ptr->world_save_p = 1;
+  info_ptr->world_save_p
+    = (WORLD_SAVE_P (info_ptr)
+       && DEFAULT_ABI == ABI_DARWIN
+       && !cfun->has_nonlocal_label
+       && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
+       && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
+       && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
+       && info_ptr->cr_save_p);
+
+  /* This will not work in conjunction with sibcalls.  Make sure there
+     are none.  (This check is expensive, but seldom executed.) */
+  if (WORLD_SAVE_P (info_ptr))
+    {
+      rtx insn;
+      for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
+	if (CALL_P (insn) && SIBLING_CALL_P (insn))
+	  {
+	    info_ptr->world_save_p = 0;
+	    break;
+	  }
+    }
+
+  if (WORLD_SAVE_P (info_ptr))
+    {
+      /* Even if we're not touching VRsave, make sure there's room on the
+	 stack for it, if it looks like we're calling SAVE_WORLD, which
+	 will attempt to save it. */
+      info_ptr->vrsave_size  = 4;
+
+      /* If we are going to save the world, we need to save the link register too.  */
+      info_ptr->lr_save_p = 1;
+
+      /* "Save" the VRsave register too if we're saving the world.  */
+      if (info_ptr->vrsave_mask == 0)
+	info_ptr->vrsave_mask = compute_vrsave_mask ();
+
+      /* Because the Darwin register save/restore routines only handle
+	 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
+	 check.  */
+      gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
+		  && (info_ptr->first_altivec_reg_save
+		      >= FIRST_SAVED_ALTIVEC_REGNO));
+    }
+  return;
+}
+
+
+static void
+is_altivec_return_reg (rtx reg, void *xyes)
+{
+  bool *yes = (bool *) xyes;
+  if (REGNO (reg) == ALTIVEC_ARG_RETURN)
+    *yes = true;
+}
+
+
+/* Look for user-defined global regs in the range FIRST to LAST-1.
+   We should not restore these, and so cannot use lmw or out-of-line
+   restore functions if there are any.  We also can't save them
+   (well, emit frame notes for them), because frame unwinding during
+   exception handling will restore saved registers.  */
+
+static bool
+global_regs_p (unsigned first, unsigned last)
+{
+  while (first < last)
+    if (global_regs[first++])
+      return true;
+  return false;
+}
+
+/* Determine the strategy for savings/restoring registers.  */
+
+enum {
+  SAVRES_MULTIPLE = 0x1,
+  SAVE_INLINE_FPRS = 0x2,
+  SAVE_INLINE_GPRS = 0x4,
+  REST_INLINE_FPRS = 0x8,
+  REST_INLINE_GPRS = 0x10,
+  SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
+  SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
+  REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
+  SAVE_INLINE_VRS = 0x100,
+  REST_INLINE_VRS = 0x200
+};
+
+static int
+rs6000_savres_strategy (rs6000_stack_t *info,
+			bool using_static_chain_p)
+{
+  int strategy = 0;
+  bool lr_save_p;
+
+  if (TARGET_MULTIPLE
+      && !TARGET_POWERPC64
+      && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
+      && info->first_gp_reg_save < 31
+      && !global_regs_p (info->first_gp_reg_save, 32))
+    strategy |= SAVRES_MULTIPLE;
+
+  if (crtl->calls_eh_return
+      || cfun->machine->ra_need_lr)
+    strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
+		 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
+		 | SAVE_INLINE_VRS | REST_INLINE_VRS);
+
+  if (info->first_fp_reg_save == 64
+      /* The out-of-line FP routines use double-precision stores;
+	 we can't use those routines if we don't have such stores.  */
+      || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
+      || global_regs_p (info->first_fp_reg_save, 64))
+    strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
+
+  if (info->first_gp_reg_save == 32
+      || (!(strategy & SAVRES_MULTIPLE)
+	  && global_regs_p (info->first_gp_reg_save, 32)))
+    strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+
+  if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
+      || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
+    strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
+
+  /* Define cutoff for using out-of-line functions to save registers.  */
+  if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
+    {
+      if (!optimize_size)
+	{
+	  strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
+	  strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+	  strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
+	}
+      else
+	{
+	  /* Prefer out-of-line restore if it will exit.  */
+	  if (info->first_fp_reg_save > 61)
+	    strategy |= SAVE_INLINE_FPRS;
+	  if (info->first_gp_reg_save > 29)
+	    {
+	      if (info->first_fp_reg_save == 64)
+		strategy |= SAVE_INLINE_GPRS;
+	      else
+		strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+	    }
+	  if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
+	    strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
+	}
+    }
+  else if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      if (info->first_fp_reg_save > 60)
+	strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
+      if (info->first_gp_reg_save > 29)
+	strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+      strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
+    }
+  else
+    {
+      gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
+      if (info->first_fp_reg_save > 61)
+	strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
+      strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+      strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
+    }
+
+  /* Don't bother to try to save things out-of-line if r11 is occupied
+     by the static chain.  It would require too much fiddling and the
+     static chain is rarely used anyway.  FPRs are saved w.r.t the stack
+     pointer on Darwin, and AIX uses r1 or r12.  */
+  if (using_static_chain_p
+      && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
+    strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
+		 | SAVE_INLINE_GPRS
+		 | SAVE_INLINE_VRS | REST_INLINE_VRS);
+
+  /* We can only use the out-of-line routines to restore if we've
+     saved all the registers from first_fp_reg_save in the prologue.
+     Otherwise, we risk loading garbage.  */
+  if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
+    {
+      int i;
+
+      for (i = info->first_fp_reg_save; i < 64; i++)
+	if (!save_reg_p (i))
+	  {
+	    strategy |= REST_INLINE_FPRS;
+	    break;
+	  }
+    }
+
+  /* If we are going to use store multiple, then don't even bother
+     with the out-of-line routines, since the store-multiple
+     instruction will always be smaller.  */
+  if ((strategy & SAVRES_MULTIPLE))
+    strategy |= SAVE_INLINE_GPRS;
+
+  /* info->lr_save_p isn't yet set if the only reason lr needs to be
+     saved is an out-of-line save or restore.  Set up the value for
+     the next test (excluding out-of-line gpr restore).  */
+  lr_save_p = (info->lr_save_p
+	       || !(strategy & SAVE_INLINE_GPRS)
+	       || !(strategy & SAVE_INLINE_FPRS)
+	       || !(strategy & SAVE_INLINE_VRS)
+	       || !(strategy & REST_INLINE_FPRS)
+	       || !(strategy & REST_INLINE_VRS));
+
+  /* The situation is more complicated with load multiple.  We'd
+     prefer to use the out-of-line routines for restores, since the
+     "exit" out-of-line routines can handle the restore of LR and the
+     frame teardown.  However if doesn't make sense to use the
+     out-of-line routine if that is the only reason we'd need to save
+     LR, and we can't use the "exit" out-of-line gpr restore if we
+     have saved some fprs; In those cases it is advantageous to use
+     load multiple when available.  */
+  if ((strategy & SAVRES_MULTIPLE)
+      && (!lr_save_p
+	  || info->first_fp_reg_save != 64))
+    strategy |= REST_INLINE_GPRS;
+
+  /* Saving CR interferes with the exit routines used on the SPE, so
+     just punt here.  */
+  if (TARGET_SPE_ABI
+      && info->spe_64bit_regs_used
+      && info->cr_save_p)
+    strategy |= REST_INLINE_GPRS;
+
+  /* We can only use load multiple or the out-of-line routines to
+     restore if we've used store multiple or out-of-line routines
+     in the prologue, i.e. if we've saved all the registers from
+     first_gp_reg_save.  Otherwise, we risk loading garbage.  */
+  if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
+      == SAVE_INLINE_GPRS)
+    {
+      int i;
+
+      for (i = info->first_gp_reg_save; i < 32; i++)
+	if (!save_reg_p (i))
+	  {
+	    strategy |= REST_INLINE_GPRS;
+	    break;
+	  }
+    }
+
+  if (TARGET_ELF && TARGET_64BIT)
+    {
+      if (!(strategy & SAVE_INLINE_FPRS))
+	strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
+      else if (!(strategy & SAVE_INLINE_GPRS)
+	       && info->first_fp_reg_save == 64)
+	strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
+    }
+  else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
+    strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
+
+  if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
+    strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
+
+  return strategy;
+}
+
+/* Calculate the stack information for the current function.  This is
+   complicated by having two separate calling sequences, the AIX calling
+   sequence and the V.4 calling sequence.
+
+   AIX (and Darwin/Mac OS X) stack frames look like:
+							  32-bit  64-bit
+	SP---->	+---------------------------------------+
+		| back chain to caller			| 0	  0
+		+---------------------------------------+
+		| saved CR				| 4       8 (8-11)
+		+---------------------------------------+
+		| saved LR				| 8       16
+		+---------------------------------------+
+		| reserved for compilers		| 12      24
+		+---------------------------------------+
+		| reserved for binders			| 16      32
+		+---------------------------------------+
+		| saved TOC pointer			| 20      40
+		+---------------------------------------+
+		| Parameter save area (P)		| 24      48
+		+---------------------------------------+
+		| Alloca space (A)			| 24+P    etc.
+		+---------------------------------------+
+		| Local variable space (L)		| 24+P+A
+		+---------------------------------------+
+		| Float/int conversion temporary (X)	| 24+P+A+L
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	| 24+P+A+L+X
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		| 24+P+A+L+X+W
+		+---------------------------------------+
+		| Save area for VRSAVE register (Z)	| 24+P+A+L+X+W+Y
+		+---------------------------------------+
+		| Save area for GP registers (G)	| 24+P+A+X+L+X+W+Y+Z
+		+---------------------------------------+
+		| Save area for FP registers (F)	| 24+P+A+X+L+X+W+Y+Z+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|
+		+---------------------------------------+
+
+   The required alignment for AIX configurations is two words (i.e., 8
+   or 16 bytes).
+
+   The ELFv2 ABI is a variant of the AIX ABI.  Stack frames look like:
+
+	SP---->	+---------------------------------------+
+		| Back chain to caller			|  0
+		+---------------------------------------+
+		| Save area for CR			|  8
+		+---------------------------------------+
+		| Saved LR				|  16
+		+---------------------------------------+
+		| Saved TOC pointer			|  24
+		+---------------------------------------+
+		| Parameter save area (P)		|  32
+		+---------------------------------------+
+		| Alloca space (A)			|  32+P
+		+---------------------------------------+
+		| Local variable space (L)		|  32+P+A
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	|  32+P+A+L
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		|  32+P+A+L+W
+		+---------------------------------------+
+		| Save area for GP registers (G)	|  32+P+A+L+W+Y
+		+---------------------------------------+
+		| Save area for FP registers (F)	|  32+P+A+L+W+Y+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|  32+P+A+L+W+Y+G+F
+		+---------------------------------------+
+
+
+   V.4 stack frames look like:
+
+	SP---->	+---------------------------------------+
+		| back chain to caller			| 0
+		+---------------------------------------+
+		| caller's saved LR			| 4
+		+---------------------------------------+
+		| Parameter save area (P)		| 8
+		+---------------------------------------+
+		| Alloca space (A)			| 8+P
+		+---------------------------------------+
+		| Varargs save area (V)			| 8+P+A
+		+---------------------------------------+
+		| Local variable space (L)		| 8+P+A+V
+		+---------------------------------------+
+		| Float/int conversion temporary (X)	| 8+P+A+V+L
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	| 8+P+A+V+L+X
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		| 8+P+A+V+L+X+W
+		+---------------------------------------+
+		| Save area for VRSAVE register (Z)	| 8+P+A+V+L+X+W+Y
+		+---------------------------------------+
+		| SPE: area for 64-bit GP registers	|
+		+---------------------------------------+
+		| SPE alignment padding			|
+		+---------------------------------------+
+		| saved CR (C)				| 8+P+A+V+L+X+W+Y+Z
+		+---------------------------------------+
+		| Save area for GP registers (G)	| 8+P+A+V+L+X+W+Y+Z+C
+		+---------------------------------------+
+		| Save area for FP registers (F)	| 8+P+A+V+L+X+W+Y+Z+C+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|
+		+---------------------------------------+
+
+   The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
+   given.  (But note below and in sysv4.h that we require only 8 and
+   may round up the size of our stack frame anyways.  The historical
+   reason is early versions of powerpc-linux which didn't properly
+   align the stack at program startup.  A happy side-effect is that
+   -mno-eabi libraries can be used with -meabi programs.)
+
+   The EABI configuration defaults to the V.4 layout.  However,
+   the stack alignment requirements may differ.  If -mno-eabi is not
+   given, the required stack alignment is 8 bytes; if -mno-eabi is
+   given, the required alignment is 16 bytes.  (But see V.4 comment
+   above.)  */
+
+#ifndef ABI_STACK_BOUNDARY
+#define ABI_STACK_BOUNDARY STACK_BOUNDARY
+#endif
+
+static rs6000_stack_t *
+rs6000_stack_info (void)
+{
+  rs6000_stack_t *info_ptr = &stack_info;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  int ehrd_size;
+  int ehcr_size;
+  int save_align;
+  int first_gp;
+  HOST_WIDE_INT non_fixed_size;
+  bool using_static_chain_p;
+
+  if (reload_completed && info_ptr->reload_completed)
+    return info_ptr;
+
+  memset (info_ptr, 0, sizeof (*info_ptr));
+  info_ptr->reload_completed = reload_completed;
+
+  if (TARGET_SPE)
+    {
+      /* Cache value so we don't rescan instruction chain over and over.  */
+      if (cfun->machine->insn_chain_scanned_p == 0)
+	cfun->machine->insn_chain_scanned_p
+	  = spe_func_has_64bit_regs_p () + 1;
+      info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
+    }
+
+  /* Select which calling sequence.  */
+  info_ptr->abi = DEFAULT_ABI;
+
+  /* Calculate which registers need to be saved & save area size.  */
+  info_ptr->first_gp_reg_save = first_reg_to_save ();
+  /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
+     even if it currently looks like we won't.  Reload may need it to
+     get at a constant; if so, it will have already created a constant
+     pool entry for it.  */
+  if (((TARGET_TOC && TARGET_MINIMAL_TOC)
+       || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
+       || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
+      && crtl->uses_const_pool
+      && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
+    first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
+  else
+    first_gp = info_ptr->first_gp_reg_save;
+
+  info_ptr->gp_size = reg_size * (32 - first_gp);
+
+  /* For the SPE, we have an additional upper 32-bits on each GPR.
+     Ideally we should save the entire 64-bits only when the upper
+     half is used in SIMD instructions.  Since we only record
+     registers live (not the size they are used in), this proves
+     difficult because we'd have to traverse the instruction chain at
+     the right time, taking reload into account.  This is a real pain,
+     so we opt to save the GPRs in 64-bits always if but one register
+     gets used in 64-bits.  Otherwise, all the registers in the frame
+     get saved in 32-bits.
+
+     So... since when we save all GPRs (except the SP) in 64-bits, the
+     traditional GP save area will be empty.  */
+  if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
+    info_ptr->gp_size = 0;
+
+  info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
+  info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
+
+  info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
+  info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
+				 - info_ptr->first_altivec_reg_save);
+
+  /* Does this function call anything?  */
+  info_ptr->calls_p = (! crtl->is_leaf 
+		       || cfun->machine->ra_needs_full_frame);
+
+  /* Determine if we need to save the condition code registers.  */
+  if (df_regs_ever_live_p (CR2_REGNO)
+      || df_regs_ever_live_p (CR3_REGNO)
+      || df_regs_ever_live_p (CR4_REGNO))
+    {
+      info_ptr->cr_save_p = 1;
+      if (DEFAULT_ABI == ABI_V4)
+	info_ptr->cr_size = reg_size;
+    }
+
+  /* If the current function calls __builtin_eh_return, then we need
+     to allocate stack space for registers that will hold data for
+     the exception handler.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
+	continue;
+
+      /* SPE saves EH registers in 64-bits.  */
+      ehrd_size = i * (TARGET_SPE_ABI
+		       && info_ptr->spe_64bit_regs_used != 0
+		       ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
+    }
+  else
+    ehrd_size = 0;
+
+  /* In the ELFv2 ABI, we also need to allocate space for separate
+     CR field save areas if the function calls __builtin_eh_return.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      /* This hard-codes that we have three call-saved CR fields.  */
+      ehcr_size = 3 * reg_size;
+      /* We do *not* use the regular CR save mechanism.  */
+      info_ptr->cr_save_p = 0;
+    }
+  else
+    ehcr_size = 0;
+
+  /* Determine various sizes.  */
+  info_ptr->reg_size     = reg_size;
+  info_ptr->fixed_size   = RS6000_SAVE_AREA;
+  info_ptr->vars_size    = RS6000_ALIGN (get_frame_size (), 8);
+  info_ptr->parm_size    = RS6000_ALIGN (crtl->outgoing_args_size,
+					 TARGET_ALTIVEC ? 16 : 8);
+  if (FRAME_GROWS_DOWNWARD)
+    info_ptr->vars_size
+      += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
+		       + info_ptr->parm_size,
+		       ABI_STACK_BOUNDARY / BITS_PER_UNIT)
+	 - (info_ptr->fixed_size + info_ptr->vars_size
+	    + info_ptr->parm_size);
+
+  if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
+    info_ptr->spe_gp_size = 8 * (32 - first_gp);
+  else
+    info_ptr->spe_gp_size = 0;
+
+  if (TARGET_ALTIVEC_ABI)
+    info_ptr->vrsave_mask = compute_vrsave_mask ();
+  else
+    info_ptr->vrsave_mask = 0;
+
+  if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
+    info_ptr->vrsave_size  = 4;
+  else
+    info_ptr->vrsave_size  = 0;
+
+  compute_save_world_info (info_ptr);
+
+  /* Calculate the offsets.  */
+  switch (DEFAULT_ABI)
+    {
+    case ABI_NONE:
+    default:
+      gcc_unreachable ();
+
+    case ABI_AIX:
+    case ABI_ELFv2:
+    case ABI_DARWIN:
+      info_ptr->fp_save_offset   = - info_ptr->fp_size;
+      info_ptr->gp_save_offset   = info_ptr->fp_save_offset - info_ptr->gp_size;
+
+      if (TARGET_ALTIVEC_ABI)
+	{
+	  info_ptr->vrsave_save_offset
+	    = info_ptr->gp_save_offset - info_ptr->vrsave_size;
+
+	  /* Align stack so vector save area is on a quadword boundary.
+	     The padding goes above the vectors.  */
+	  if (info_ptr->altivec_size != 0)
+	    info_ptr->altivec_padding_size
+	      = info_ptr->vrsave_save_offset & 0xF;
+	  else
+	    info_ptr->altivec_padding_size = 0;
+
+	  info_ptr->altivec_save_offset
+	    = info_ptr->vrsave_save_offset
+	    - info_ptr->altivec_padding_size
+	    - info_ptr->altivec_size;
+	  gcc_assert (info_ptr->altivec_size == 0
+		      || info_ptr->altivec_save_offset % 16 == 0);
+
+	  /* Adjust for AltiVec case.  */
+	  info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
+	}
+      else
+	info_ptr->ehrd_offset      = info_ptr->gp_save_offset - ehrd_size;
+
+      info_ptr->ehcr_offset      = info_ptr->ehrd_offset - ehcr_size;
+      info_ptr->cr_save_offset   = reg_size; /* first word when 64-bit.  */
+      info_ptr->lr_save_offset   = 2*reg_size;
+      break;
+
+    case ABI_V4:
+      info_ptr->fp_save_offset   = - info_ptr->fp_size;
+      info_ptr->gp_save_offset   = info_ptr->fp_save_offset - info_ptr->gp_size;
+      info_ptr->cr_save_offset   = info_ptr->gp_save_offset - info_ptr->cr_size;
+
+      if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
+	{
+	  /* Align stack so SPE GPR save area is aligned on a
+	     double-word boundary.  */
+	  if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
+	    info_ptr->spe_padding_size
+	      = 8 - (-info_ptr->cr_save_offset % 8);
+	  else
+	    info_ptr->spe_padding_size = 0;
+
+	  info_ptr->spe_gp_save_offset
+	    = info_ptr->cr_save_offset
+	    - info_ptr->spe_padding_size
+	    - info_ptr->spe_gp_size;
+
+	  /* Adjust for SPE case.  */
+	  info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
+	}
+      else if (TARGET_ALTIVEC_ABI)
+	{
+	  info_ptr->vrsave_save_offset
+	    = info_ptr->cr_save_offset - info_ptr->vrsave_size;
+
+	  /* Align stack so vector save area is on a quadword boundary.  */
+	  if (info_ptr->altivec_size != 0)
+	    info_ptr->altivec_padding_size
+	      = 16 - (-info_ptr->vrsave_save_offset % 16);
+	  else
+	    info_ptr->altivec_padding_size = 0;
+
+	  info_ptr->altivec_save_offset
+	    = info_ptr->vrsave_save_offset
+	    - info_ptr->altivec_padding_size
+	    - info_ptr->altivec_size;
+
+	  /* Adjust for AltiVec case.  */
+	  info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
+	}
+      else
+	info_ptr->ehrd_offset    = info_ptr->cr_save_offset;
+      info_ptr->ehrd_offset      -= ehrd_size;
+      info_ptr->lr_save_offset   = reg_size;
+      break;
+    }
+
+  save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
+  info_ptr->save_size    = RS6000_ALIGN (info_ptr->fp_size
+					 + info_ptr->gp_size
+					 + info_ptr->altivec_size
+					 + info_ptr->altivec_padding_size
+					 + info_ptr->spe_gp_size
+					 + info_ptr->spe_padding_size
+					 + ehrd_size
+					 + ehcr_size
+					 + info_ptr->cr_size
+					 + info_ptr->vrsave_size,
+					 save_align);
+
+  non_fixed_size	 = (info_ptr->vars_size
+			    + info_ptr->parm_size
+			    + info_ptr->save_size);
+
+  info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
+				       ABI_STACK_BOUNDARY / BITS_PER_UNIT);
+
+  /* Determine if we need to save the link register.  */
+  if (info_ptr->calls_p
+      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	  && crtl->profile
+	  && !TARGET_PROFILE_KERNEL)
+      || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
+#ifdef TARGET_RELOCATABLE
+      || (TARGET_RELOCATABLE && (get_pool_size () != 0))
+#endif
+      || rs6000_ra_ever_killed ())
+    info_ptr->lr_save_p = 1;
+
+  using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
+			  && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
+			  && call_used_regs[STATIC_CHAIN_REGNUM]);
+  info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
+						      using_static_chain_p);
+
+  if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
+      || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
+      || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
+      || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
+      || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
+      || !(info_ptr->savres_strategy & REST_INLINE_VRS))
+    info_ptr->lr_save_p = 1;
+
+  if (info_ptr->lr_save_p)
+    df_set_regs_ever_live (LR_REGNO, true);
+
+  /* Determine if we need to allocate any stack frame:
+
+     For AIX we need to push the stack if a frame pointer is needed
+     (because the stack might be dynamically adjusted), if we are
+     debugging, if we make calls, or if the sum of fp_save, gp_save,
+     and local variables are more than the space needed to save all
+     non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
+     + 18*8 = 288 (GPR13 reserved).
+
+     For V.4 we don't have the stack cushion that AIX uses, but assume
+     that the debugger can handle stackless frames.  */
+
+  if (info_ptr->calls_p)
+    info_ptr->push_p = 1;
+
+  else if (DEFAULT_ABI == ABI_V4)
+    info_ptr->push_p = non_fixed_size != 0;
+
+  else if (frame_pointer_needed)
+    info_ptr->push_p = 1;
+
+  else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
+    info_ptr->push_p = 1;
+
+  else
+    info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
+
+  /* Zero offsets if we're not saving those registers.  */
+  if (info_ptr->fp_size == 0)
+    info_ptr->fp_save_offset = 0;
+
+  if (info_ptr->gp_size == 0)
+    info_ptr->gp_save_offset = 0;
+
+  if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
+    info_ptr->altivec_save_offset = 0;
+
+  /* Zero VRSAVE offset if not saved and restored.  */
+  if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
+    info_ptr->vrsave_save_offset = 0;
+
+  if (! TARGET_SPE_ABI
+      || info_ptr->spe_64bit_regs_used == 0
+      || info_ptr->spe_gp_size == 0)
+    info_ptr->spe_gp_save_offset = 0;
+
+  if (! info_ptr->lr_save_p)
+    info_ptr->lr_save_offset = 0;
+
+  if (! info_ptr->cr_save_p)
+    info_ptr->cr_save_offset = 0;
+
+  return info_ptr;
+}
+
+/* Return true if the current function uses any GPRs in 64-bit SIMD
+   mode.  */
+
+static bool
+spe_func_has_64bit_regs_p (void)
+{
+  rtx insns, insn;
+
+  /* Functions that save and restore all the call-saved registers will
+     need to save/restore the registers in 64-bits.  */
+  if (crtl->calls_eh_return
+      || cfun->calls_setjmp
+      || crtl->has_nonlocal_goto)
+    return true;
+
+  insns = get_insns ();
+
+  for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	{
+	  rtx i;
+
+	  /* FIXME: This should be implemented with attributes...
+
+	         (set_attr "spe64" "true")....then,
+	         if (get_spe64(insn)) return true;
+
+	     It's the only reliable way to do the stuff below.  */
+
+	  i = PATTERN (insn);
+	  if (GET_CODE (i) == SET)
+	    {
+	      enum machine_mode mode = GET_MODE (SET_SRC (i));
+
+	      if (SPE_VECTOR_MODE (mode))
+		return true;
+	      if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
+		return true;
+	    }
+	}
+    }
+
+  return false;
+}
+
+static void
+debug_stack_info (rs6000_stack_t *info)
+{
+  const char *abi_string;
+
+  if (! info)
+    info = rs6000_stack_info ();
+
+  fprintf (stderr, "\nStack information for function %s:\n",
+	   ((current_function_decl && DECL_NAME (current_function_decl))
+	    ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
+	    : "<unknown>"));
+
+  switch (info->abi)
+    {
+    default:		 abi_string = "Unknown";	break;
+    case ABI_NONE:	 abi_string = "NONE";		break;
+    case ABI_AIX:	 abi_string = "AIX";		break;
+    case ABI_ELFv2:	 abi_string = "ELFv2";		break;
+    case ABI_DARWIN:	 abi_string = "Darwin";		break;
+    case ABI_V4:	 abi_string = "V.4";		break;
+    }
+
+  fprintf (stderr, "\tABI                 = %5s\n", abi_string);
+
+  if (TARGET_ALTIVEC_ABI)
+    fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
+
+  if (TARGET_SPE_ABI)
+    fprintf (stderr, "\tSPE ABI extensions enabled.\n");
+
+  if (info->first_gp_reg_save != 32)
+    fprintf (stderr, "\tfirst_gp_reg_save   = %5d\n", info->first_gp_reg_save);
+
+  if (info->first_fp_reg_save != 64)
+    fprintf (stderr, "\tfirst_fp_reg_save   = %5d\n", info->first_fp_reg_save);
+
+  if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
+    fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
+	     info->first_altivec_reg_save);
+
+  if (info->lr_save_p)
+    fprintf (stderr, "\tlr_save_p           = %5d\n", info->lr_save_p);
+
+  if (info->cr_save_p)
+    fprintf (stderr, "\tcr_save_p           = %5d\n", info->cr_save_p);
+
+  if (info->vrsave_mask)
+    fprintf (stderr, "\tvrsave_mask         = 0x%x\n", info->vrsave_mask);
+
+  if (info->push_p)
+    fprintf (stderr, "\tpush_p              = %5d\n", info->push_p);
+
+  if (info->calls_p)
+    fprintf (stderr, "\tcalls_p             = %5d\n", info->calls_p);
+
+  if (info->gp_save_offset)
+    fprintf (stderr, "\tgp_save_offset      = %5d\n", info->gp_save_offset);
+
+  if (info->fp_save_offset)
+    fprintf (stderr, "\tfp_save_offset      = %5d\n", info->fp_save_offset);
+
+  if (info->altivec_save_offset)
+    fprintf (stderr, "\taltivec_save_offset = %5d\n",
+	     info->altivec_save_offset);
+
+  if (info->spe_gp_save_offset)
+    fprintf (stderr, "\tspe_gp_save_offset  = %5d\n",
+	     info->spe_gp_save_offset);
+
+  if (info->vrsave_save_offset)
+    fprintf (stderr, "\tvrsave_save_offset  = %5d\n",
+	     info->vrsave_save_offset);
+
+  if (info->lr_save_offset)
+    fprintf (stderr, "\tlr_save_offset      = %5d\n", info->lr_save_offset);
+
+  if (info->cr_save_offset)
+    fprintf (stderr, "\tcr_save_offset      = %5d\n", info->cr_save_offset);
+
+  if (info->varargs_save_offset)
+    fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
+
+  if (info->total_size)
+    fprintf (stderr, "\ttotal_size          = "HOST_WIDE_INT_PRINT_DEC"\n",
+	     info->total_size);
+
+  if (info->vars_size)
+    fprintf (stderr, "\tvars_size           = "HOST_WIDE_INT_PRINT_DEC"\n",
+	     info->vars_size);
+
+  if (info->parm_size)
+    fprintf (stderr, "\tparm_size           = %5d\n", info->parm_size);
+
+  if (info->fixed_size)
+    fprintf (stderr, "\tfixed_size          = %5d\n", info->fixed_size);
+
+  if (info->gp_size)
+    fprintf (stderr, "\tgp_size             = %5d\n", info->gp_size);
+
+  if (info->spe_gp_size)
+    fprintf (stderr, "\tspe_gp_size         = %5d\n", info->spe_gp_size);
+
+  if (info->fp_size)
+    fprintf (stderr, "\tfp_size             = %5d\n", info->fp_size);
+
+  if (info->altivec_size)
+    fprintf (stderr, "\taltivec_size        = %5d\n", info->altivec_size);
+
+  if (info->vrsave_size)
+    fprintf (stderr, "\tvrsave_size         = %5d\n", info->vrsave_size);
+
+  if (info->altivec_padding_size)
+    fprintf (stderr, "\taltivec_padding_size= %5d\n",
+	     info->altivec_padding_size);
+
+  if (info->spe_padding_size)
+    fprintf (stderr, "\tspe_padding_size    = %5d\n",
+	     info->spe_padding_size);
+
+  if (info->cr_size)
+    fprintf (stderr, "\tcr_size             = %5d\n", info->cr_size);
+
+  if (info->save_size)
+    fprintf (stderr, "\tsave_size           = %5d\n", info->save_size);
+
+  if (info->reg_size != 4)
+    fprintf (stderr, "\treg_size            = %5d\n", info->reg_size);
+
+    fprintf (stderr, "\tsave-strategy       =  %04x\n", info->savres_strategy);
+
+  fprintf (stderr, "\n");
+}
+
+rtx
+rs6000_return_addr (int count, rtx frame)
+{
+  /* Currently we don't optimize very well between prolog and body
+     code and for PIC code the code can be actually quite bad, so
+     don't try to be too clever here.  */
+  if (count != 0
+      || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
+    {
+      cfun->machine->ra_needs_full_frame = 1;
+
+      return
+	gen_rtx_MEM
+	  (Pmode,
+	   memory_address
+	   (Pmode,
+	    plus_constant (Pmode,
+			   copy_to_reg
+			   (gen_rtx_MEM (Pmode,
+					 memory_address (Pmode, frame))),
+			   RETURN_ADDRESS_OFFSET)));
+    }
+
+  cfun->machine->ra_need_lr = 1;
+  return get_hard_reg_initial_val (Pmode, LR_REGNO);
+}
+
+/* Say whether a function is a candidate for sibcall handling or not.  */
+
+static bool
+rs6000_function_ok_for_sibcall (tree decl, tree exp)
+{
+  tree fntype;
+
+  if (decl)
+    fntype = TREE_TYPE (decl);
+  else
+    fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
+
+  /* We can't do it if the called function has more vector parameters
+     than the current function; there's nowhere to put the VRsave code.  */
+  if (TARGET_ALTIVEC_ABI
+      && TARGET_ALTIVEC_VRSAVE
+      && !(decl && decl == current_function_decl))
+    {
+      function_args_iterator args_iter;
+      tree type;
+      int nvreg = 0;
+
+      /* Functions with vector parameters are required to have a
+	 prototype, so the argument type info must be available
+	 here.  */
+      FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
+	if (TREE_CODE (type) == VECTOR_TYPE
+	    && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
+	  nvreg++;
+
+      FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
+	if (TREE_CODE (type) == VECTOR_TYPE
+	    && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
+	  nvreg--;
+
+      if (nvreg > 0)
+	return false;
+    }
+
+  /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
+     functions, because the callee may have a different TOC pointer to
+     the caller and there's no way to ensure we restore the TOC when
+     we return.  With the secure-plt SYSV ABI we can't make non-local
+     calls when -fpic/PIC because the plt call stubs use r30.  */
+  if (DEFAULT_ABI == ABI_DARWIN
+      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	  && decl
+	  && !DECL_EXTERNAL (decl)
+	  && (*targetm.binds_local_p) (decl))
+      || (DEFAULT_ABI == ABI_V4
+	  && (!TARGET_SECURE_PLT
+	      || !flag_pic
+	      || (decl
+		  && (*targetm.binds_local_p) (decl)))))
+    {
+      tree attr_list = TYPE_ATTRIBUTES (fntype);
+
+      if (!lookup_attribute ("longcall", attr_list)
+	  || lookup_attribute ("shortcall", attr_list))
+	return true;
+    }
+
+  return false;
+}
+
+static int
+rs6000_ra_ever_killed (void)
+{
+  rtx top;
+  rtx reg;
+  rtx insn;
+
+  if (cfun->is_thunk)
+    return 0;
+
+  if (cfun->machine->lr_save_state)
+    return cfun->machine->lr_save_state - 1;
+
+  /* regs_ever_live has LR marked as used if any sibcalls are present,
+     but this should not force saving and restoring in the
+     pro/epilogue.  Likewise, reg_set_between_p thinks a sibcall
+     clobbers LR, so that is inappropriate.  */
+
+  /* Also, the prologue can generate a store into LR that
+     doesn't really count, like this:
+
+        move LR->R0
+        bcl to set PIC register
+        move LR->R31
+        move R0->LR
+
+     When we're called from the epilogue, we need to avoid counting
+     this as a store.  */
+
+  push_topmost_sequence ();
+  top = get_insns ();
+  pop_topmost_sequence ();
+  reg = gen_rtx_REG (Pmode, LR_REGNO);
+
+  for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	{
+	  if (CALL_P (insn))
+	    {
+	      if (!SIBLING_CALL_P (insn))
+		return 1;
+	    }
+	  else if (find_regno_note (insn, REG_INC, LR_REGNO))
+	    return 1;
+	  else if (set_of (reg, insn) != NULL_RTX
+		   && !prologue_epilogue_contains (insn))
+	    return 1;
+    	}
+    }
+  return 0;
+}
+
+/* Emit instructions needed to load the TOC register.
+   This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
+   a constant pool; or for SVR4 -fpic.  */
+
+void
+rs6000_emit_load_toc_table (int fromprolog)
+{
+  rtx dest;
+  dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+
+  if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      char buf[30];
+      rtx lab, tmp1, tmp2, got;
+
+      lab = gen_label_rtx ();
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
+      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+      if (flag_pic == 2)
+	got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
+      else
+	got = rs6000_got_sym ();
+      tmp1 = tmp2 = dest;
+      if (!fromprolog)
+	{
+	  tmp1 = gen_reg_rtx (Pmode);
+	  tmp2 = gen_reg_rtx (Pmode);
+	}
+      emit_insn (gen_load_toc_v4_PIC_1 (lab));
+      emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
+      emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
+      emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
+    }
+  else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+    {
+      emit_insn (gen_load_toc_v4_pic_si ());
+      emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+    }
+  else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
+    {
+      char buf[30];
+      rtx temp0 = (fromprolog
+		   ? gen_rtx_REG (Pmode, 0)
+		   : gen_reg_rtx (Pmode));
+
+      if (fromprolog)
+	{
+	  rtx symF, symL;
+
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
+	  symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
+	  symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+
+	  emit_insn (gen_load_toc_v4_PIC_1 (symF));
+	  emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+	  emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
+	}
+      else
+	{
+	  rtx tocsym, lab;
+
+	  tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
+	  lab = gen_label_rtx ();
+	  emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
+	  emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+	  if (TARGET_LINK_STACK)
+	    emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
+	  emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
+	}
+      emit_insn (gen_addsi3 (dest, temp0, dest));
+    }
+  else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
+    {
+      /* This is for AIX code running in non-PIC ELF32.  */
+      char buf[30];
+      rtx realsym;
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
+      realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+
+      emit_insn (gen_elf_high (dest, realsym));
+      emit_insn (gen_elf_low (dest, dest, realsym));
+    }
+  else
+    {
+      gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
+
+      if (TARGET_32BIT)
+	emit_insn (gen_load_toc_aix_si (dest));
+      else
+	emit_insn (gen_load_toc_aix_di (dest));
+    }
+}
+
+/* Emit instructions to restore the link register after determining where
+   its value has been stored.  */
+
+void
+rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+  rtx operands[2];
+
+  operands[0] = source;
+  operands[1] = scratch;
+
+  if (info->lr_save_p)
+    {
+      rtx frame_rtx = stack_pointer_rtx;
+      HOST_WIDE_INT sp_offset = 0;
+      rtx tmp;
+
+      if (frame_pointer_needed
+	  || cfun->calls_alloca
+	  || info->total_size > 32767)
+	{
+	  tmp = gen_frame_mem (Pmode, frame_rtx);
+	  emit_move_insn (operands[1], tmp);
+	  frame_rtx = operands[1];
+	}
+      else if (info->push_p)
+	sp_offset = info->total_size;
+
+      tmp = plus_constant (Pmode, frame_rtx,
+			   info->lr_save_offset + sp_offset);
+      tmp = gen_frame_mem (Pmode, tmp);
+      emit_move_insn (tmp, operands[0]);
+    }
+  else
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
+
+  /* Freeze lr_save_p.  We've just emitted rtl that depends on the
+     state of lr_save_p so any change from here on would be a bug.  In
+     particular, stop rs6000_ra_ever_killed from considering the SET
+     of lr we may have added just above.  */ 
+  cfun->machine->lr_save_state = info->lr_save_p + 1;
+}
+
+static GTY(()) alias_set_type set = -1;
+
+alias_set_type
+get_TOC_alias_set (void)
+{
+  if (set == -1)
+    set = new_alias_set ();
+  return set;
+}
+
+/* This returns nonzero if the current function uses the TOC.  This is
+   determined by the presence of (use (unspec ... UNSPEC_TOC)), which
+   is generated by the ABI_V4 load_toc_* patterns.  */
+#if TARGET_ELF
+static int
+uses_TOC (void)
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	rtx pat = PATTERN (insn);
+	int i;
+
+	if (GET_CODE (pat) == PARALLEL)
+	  for (i = 0; i < XVECLEN (pat, 0); i++)
+	    {
+	      rtx sub = XVECEXP (pat, 0, i);
+	      if (GET_CODE (sub) == USE)
+		{
+		  sub = XEXP (sub, 0);
+		  if (GET_CODE (sub) == UNSPEC
+		      && XINT (sub, 1) == UNSPEC_TOC)
+		    return 1;
+		}
+	    }
+      }
+  return 0;
+}
+#endif
+
+rtx
+create_TOC_reference (rtx symbol, rtx largetoc_reg)
+{
+  rtx tocrel, tocreg, hi;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      if (GET_CODE (symbol) == SYMBOL_REF)
+	fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
+		 XSTR (symbol, 0));
+      else
+	{
+	  fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
+		   GET_RTX_NAME (GET_CODE (symbol)));
+	  debug_rtx (symbol);
+	}
+    }
+
+  if (!can_create_pseudo_p ())
+    df_set_regs_ever_live (TOC_REGISTER, true);
+
+  tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
+  tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
+  if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
+    return tocrel;
+
+  hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
+  if (largetoc_reg != NULL)
+    {
+      emit_move_insn (largetoc_reg, hi);
+      hi = largetoc_reg;
+    }
+  return gen_rtx_LO_SUM (Pmode, hi, tocrel);
+}
+
+/* Issue assembly directives that create a reference to the given DWARF
+   FRAME_TABLE_LABEL from the current function section.  */
+void
+rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
+{
+  fprintf (asm_out_file, "\t.ref %s\n",
+	   (* targetm.strip_name_encoding) (frame_table_label));
+}
+
+/* This ties together stack memory (MEM with an alias set of frame_alias_set)
+   and the change to the stack pointer.  */
+
+static void
+rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
+{
+  rtvec p;
+  int i;
+  rtx regs[3];
+
+  i = 0;
+  regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  if (hard_frame_needed)
+    regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
+  if (!(REGNO (fp) == STACK_POINTER_REGNUM
+	|| (hard_frame_needed
+	    && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
+    regs[i++] = fp;
+
+  p = rtvec_alloc (i);
+  while (--i >= 0)
+    {
+      rtx mem = gen_frame_mem (BLKmode, regs[i]);
+      RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
+    }
+
+  emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
+}
+
+/* Emit the correct code for allocating stack space, as insns.
+   If COPY_REG, make sure a copy of the old frame is left there.
+   The generated code may use hard register 0 as a temporary.  */
+
+static void
+rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
+{
+  rtx insn;
+  rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx tmp_reg = gen_rtx_REG (Pmode, 0);
+  rtx todec = gen_int_mode (-size, Pmode);
+  rtx par, set, mem;
+
+  if (INTVAL (todec) != -size)
+    {
+      warning (0, "stack frame too large");
+      emit_insn (gen_trap ());
+      return;
+    }
+
+  if (crtl->limit_stack)
+    {
+      if (REG_P (stack_limit_rtx)
+	  && REGNO (stack_limit_rtx) > 1
+	  && REGNO (stack_limit_rtx) <= 31)
+	{
+	  emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
+	  emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
+				    const0_rtx));
+	}
+      else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
+	       && TARGET_32BIT
+	       && DEFAULT_ABI == ABI_V4)
+	{
+	  rtx toload = gen_rtx_CONST (VOIDmode,
+				      gen_rtx_PLUS (Pmode,
+						    stack_limit_rtx,
+						    GEN_INT (size)));
+
+	  emit_insn (gen_elf_high (tmp_reg, toload));
+	  emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
+	  emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
+				    const0_rtx));
+	}
+      else
+	warning (0, "stack limit expression is not supported");
+    }
+
+  if (copy_reg)
+    {
+      if (copy_off != 0)
+	emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
+      else
+	emit_move_insn (copy_reg, stack_reg);
+    }
+
+  if (size > 32767)
+    {
+      /* Need a note here so that try_split doesn't get confused.  */
+      if (get_last_insn () == NULL_RTX)
+	emit_note (NOTE_INSN_DELETED);
+      insn = emit_move_insn (tmp_reg, todec);
+      try_split (PATTERN (insn), insn, 0);
+      todec = tmp_reg;
+    }
+  
+  insn = emit_insn (TARGET_32BIT
+		    ? gen_movsi_update_stack (stack_reg, stack_reg,
+					todec, stack_reg)
+		    : gen_movdi_di_update_stack (stack_reg, stack_reg,
+					   todec, stack_reg));
+  /* Since we didn't use gen_frame_mem to generate the MEM, grab
+     it now and set the alias set/attributes. The above gen_*_update
+     calls will generate a PARALLEL with the MEM set being the first
+     operation. */
+  par = PATTERN (insn);
+  gcc_assert (GET_CODE (par) == PARALLEL);
+  set = XVECEXP (par, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  mem = SET_DEST (set);
+  gcc_assert (MEM_P (mem));
+  MEM_NOTRAP_P (mem) = 1;
+  set_mem_alias_set (mem, get_frame_alias_set ());
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		gen_rtx_SET (VOIDmode, stack_reg,
+			     gen_rtx_PLUS (Pmode, stack_reg,
+					   GEN_INT (-size))));
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if PROBE_INTERVAL > 32768
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if (first + size <= 32768)
+    {
+      HOST_WIDE_INT i;
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+	 it exceeds SIZE.  If only one probe is needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					 -(first + i)));
+
+      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+				       -(first + size)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx r12 = gen_rtx_REG (Pmode, 12);
+      rtx r0 = gen_rtx_REG (Pmode, 0);
+
+      /* Sanity check for the addressing mode we're going to use.  */
+      gcc_assert (first <= 32768);
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_insn (gen_rtx_SET (VOIDmode, r12,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     -first)));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      if (rounded_size > 32768)
+	{
+	  emit_move_insn (r0, GEN_INT (-rounded_size));
+	  emit_insn (gen_rtx_SET (VOIDmode, r0,
+				  gen_rtx_PLUS (Pmode, r12, r0)));
+	}
+      else
+	emit_insn (gen_rtx_SET (VOIDmode, r0,
+			        plus_constant (Pmode, r12, -rounded_size)));
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+
+      if (TARGET_64BIT)
+	emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
+      else
+	emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
+    }
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg1;
+  xops[1] = reg2;
+  if (TARGET_64BIT)
+    output_asm_insn ("cmpd 0,%0,%1", xops);
+  else
+    output_asm_insn ("cmpw 0,%0,%1", xops);
+
+  fputs ("\tbeq 0,", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  output_asm_insn ("addi %0,%0,%1", xops);
+
+  /* Probe at TEST_ADDR and branch.  */
+  xops[1] = gen_rtx_REG (Pmode, 0);
+  output_asm_insn ("stw %1,0(%0)", xops);
+  fprintf (asm_out_file, "\tb ");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
+   with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
+   is not NULL.  It would be nice if dwarf2out_frame_debug_expr could
+   deduce these equivalences by itself so it wasn't necessary to hold
+   its hand so much.  Don't be tempted to always supply d2_f_d_e with
+   the actual cfa register, ie. r31 when we are using a hard frame
+   pointer.  That fails when saving regs off r1, and sched moves the
+   r31 setup past the reg saves.  */
+
+static rtx
+rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
+		      rtx reg2, rtx rreg, rtx split_reg)
+{
+  rtx real, temp;
+
+  if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
+    {
+      /* No need for any replacement.  Just set RTX_FRAME_RELATED_P.  */
+      int i;
+
+      gcc_checking_assert (val == 0);
+      real = PATTERN (insn);
+      if (GET_CODE (real) == PARALLEL)
+	for (i = 0; i < XVECLEN (real, 0); i++)
+	  if (GET_CODE (XVECEXP (real, 0, i)) == SET)
+	    {
+	      rtx set = XVECEXP (real, 0, i);
+
+	      RTX_FRAME_RELATED_P (set) = 1;
+	    }
+      RTX_FRAME_RELATED_P (insn) = 1;
+      return insn;
+    }
+
+  /* copy_rtx will not make unique copies of registers, so we need to
+     ensure we don't have unwanted sharing here.  */
+  if (reg == reg2)
+    reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
+
+  if (reg == rreg)
+    reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
+
+  real = copy_rtx (PATTERN (insn));
+
+  if (reg2 != NULL_RTX)
+    real = replace_rtx (real, reg2, rreg);
+
+  if (REGNO (reg) == STACK_POINTER_REGNUM)
+    gcc_checking_assert (val == 0);
+  else
+    real = replace_rtx (real, reg,
+			gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
+							  STACK_POINTER_REGNUM),
+				      GEN_INT (val)));
+
+  /* We expect that 'real' is either a SET or a PARALLEL containing
+     SETs (and possibly other stuff).  In a PARALLEL, all the SETs
+     are important so they all have to be marked RTX_FRAME_RELATED_P.  */
+
+  if (GET_CODE (real) == SET)
+    {
+      rtx set = real;
+
+      temp = simplify_rtx (SET_SRC (set));
+      if (temp)
+	SET_SRC (set) = temp;
+      temp = simplify_rtx (SET_DEST (set));
+      if (temp)
+	SET_DEST (set) = temp;
+      if (GET_CODE (SET_DEST (set)) == MEM)
+	{
+	  temp = simplify_rtx (XEXP (SET_DEST (set), 0));
+	  if (temp)
+	    XEXP (SET_DEST (set), 0) = temp;
+	}
+    }
+  else
+    {
+      int i;
+
+      gcc_assert (GET_CODE (real) == PARALLEL);
+      for (i = 0; i < XVECLEN (real, 0); i++)
+	if (GET_CODE (XVECEXP (real, 0, i)) == SET)
+	  {
+	    rtx set = XVECEXP (real, 0, i);
+
+	    temp = simplify_rtx (SET_SRC (set));
+	    if (temp)
+	      SET_SRC (set) = temp;
+	    temp = simplify_rtx (SET_DEST (set));
+	    if (temp)
+	      SET_DEST (set) = temp;
+	    if (GET_CODE (SET_DEST (set)) == MEM)
+	      {
+		temp = simplify_rtx (XEXP (SET_DEST (set), 0));
+		if (temp)
+		  XEXP (SET_DEST (set), 0) = temp;
+	      }
+	    RTX_FRAME_RELATED_P (set) = 1;
+	  }
+    }
+
+  /* If a store insn has been split into multiple insns, the
+     true source register is given by split_reg.  */
+  if (split_reg != NULL_RTX)
+    real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+
+  return insn;
+}
+
+/* Returns an insn that has a vrsave set operation with the
+   appropriate CLOBBERs.  */
+
+static rtx
+generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
+{
+  int nclobs, i;
+  rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
+  rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
+
+  clobs[0]
+    = gen_rtx_SET (VOIDmode,
+		   vrsave,
+		   gen_rtx_UNSPEC_VOLATILE (SImode,
+					    gen_rtvec (2, reg, vrsave),
+					    UNSPECV_SET_VRSAVE));
+
+  nclobs = 1;
+
+  /* We need to clobber the registers in the mask so the scheduler
+     does not move sets to VRSAVE before sets of AltiVec registers.
+
+     However, if the function receives nonlocal gotos, reload will set
+     all call saved registers live.  We will end up with:
+
+     	(set (reg 999) (mem))
+	(parallel [ (set (reg vrsave) (unspec blah))
+		    (clobber (reg 999))])
+
+     The clobber will cause the store into reg 999 to be dead, and
+     flow will attempt to delete an epilogue insn.  In this case, we
+     need an unspec use/set of the register.  */
+
+  for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
+    if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+      {
+	if (!epiloguep || call_used_regs [i])
+	  clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
+					     gen_rtx_REG (V4SImode, i));
+	else
+	  {
+	    rtx reg = gen_rtx_REG (V4SImode, i);
+
+	    clobs[nclobs++]
+	      = gen_rtx_SET (VOIDmode,
+			     reg,
+			     gen_rtx_UNSPEC (V4SImode,
+					     gen_rtvec (1, reg), 27));
+	  }
+      }
+
+  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
+
+  for (i = 0; i < nclobs; ++i)
+    XVECEXP (insn, 0, i) = clobs[i];
+
+  return insn;
+}
+
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+  rtx addr, mem;
+
+  addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+  mem = gen_frame_mem (GET_MODE (reg), addr);
+  return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
+}
+
+static rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+/* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
+   Save REGNO into [FRAME_REG + OFFSET] in mode MODE.  */
+
+static rtx
+emit_frame_save (rtx frame_reg, enum machine_mode mode,
+		 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
+{
+  rtx reg, insn;
+
+  /* Some cases that need register indexed addressing.  */
+  gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
+			 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
+			 || (TARGET_E500_DOUBLE && mode == DFmode)
+			 || (TARGET_SPE_ABI
+			     && SPE_VECTOR_MODE (mode)
+			     && !SPE_CONST_OFFSET_OK (offset))));
+
+  reg = gen_rtx_REG (mode, regno);
+  insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
+  return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
+			       NULL_RTX, NULL_RTX, NULL_RTX);
+}
+
+/* Emit an offset memory reference suitable for a frame store, while
+   converting to a valid addressing mode.  */
+
+static rtx
+gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset)
+{
+  rtx int_rtx, offset_rtx;
+
+  int_rtx = GEN_INT (offset);
+
+  if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
+      || (TARGET_E500_DOUBLE && mode == DFmode))
+    {
+      offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
+      emit_move_insn (offset_rtx, int_rtx);
+    }
+  else
+    offset_rtx = int_rtx;
+
+  return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
+}
+
+#ifndef TARGET_FIX_AND_CONTINUE
+#define TARGET_FIX_AND_CONTINUE 0
+#endif
+
+/* It's really GPR 13 or 14, FPR 14 and VR 20.  We need the smallest.  */
+#define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
+#define LAST_SAVRES_REGISTER 31
+#define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
+
+enum {
+  SAVRES_LR = 0x1,
+  SAVRES_SAVE = 0x2,
+  SAVRES_REG = 0x0c,
+  SAVRES_GPR = 0,
+  SAVRES_FPR = 4,
+  SAVRES_VR  = 8
+};
+
+static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
+
+/* Temporary holding space for an out-of-line register save/restore
+   routine name.  */
+static char savres_routine_name[30];
+
+/* Return the name for an out-of-line register save/restore routine.
+   We are saving/restoring GPRs if GPR is true.  */
+
+static char *
+rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
+{
+  const char *prefix = "";
+  const char *suffix = "";
+
+  /* Different targets are supposed to define
+     {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
+     routine name could be defined with:
+
+     sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
+
+     This is a nice idea in practice, but in reality, things are
+     complicated in several ways:
+
+     - ELF targets have save/restore routines for GPRs.
+
+     - SPE targets use different prefixes for 32/64-bit registers, and
+       neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
+
+     - PPC64 ELF targets have routines for save/restore of GPRs that
+       differ in what they do with the link register, so having a set
+       prefix doesn't work.  (We only use one of the save routines at
+       the moment, though.)
+
+     - PPC32 elf targets have "exit" versions of the restore routines
+       that restore the link register and can save some extra space.
+       These require an extra suffix.  (There are also "tail" versions
+       of the restore routines and "GOT" versions of the save routines,
+       but we don't generate those at present.  Same problems apply,
+       though.)
+
+     We deal with all this by synthesizing our own prefix/suffix and
+     using that for the simple sprintf call shown above.  */
+  if (TARGET_SPE)
+    {
+      /* No floating point saves on the SPE.  */
+      gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
+
+      if ((sel & SAVRES_SAVE))
+	prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
+      else
+	prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
+
+      if ((sel & SAVRES_LR))
+	suffix = "_x";
+    }
+  else if (DEFAULT_ABI == ABI_V4)
+    {
+      if (TARGET_64BIT)
+	goto aix_names;
+
+      if ((sel & SAVRES_REG) == SAVRES_GPR)
+	prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
+      else if ((sel & SAVRES_REG) == SAVRES_FPR)
+	prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
+      else if ((sel & SAVRES_REG) == SAVRES_VR)
+	prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
+      else
+	abort ();
+
+      if ((sel & SAVRES_LR))
+	suffix = "_x";
+    }
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
+      /* No out-of-line save/restore routines for GPRs on AIX.  */
+      gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
+#endif
+
+    aix_names:
+      if ((sel & SAVRES_REG) == SAVRES_GPR)
+	prefix = ((sel & SAVRES_SAVE)
+		  ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
+		  : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
+      else if ((sel & SAVRES_REG) == SAVRES_FPR)
+	{
+#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
+	  if ((sel & SAVRES_LR))
+	    prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
+	  else
+#endif
+	    {
+	      prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
+	      suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
+	    }
+	}
+      else if ((sel & SAVRES_REG) == SAVRES_VR)
+	prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
+      else
+	abort ();
+    }
+
+   if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      /* The Darwin approach is (slightly) different, in order to be
+	 compatible with code generated by the system toolchain.  There is a
+	 single symbol for the start of save sequence, and the code here
+	 embeds an offset into that code on the basis of the first register
+	 to be saved.  */
+      prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
+      if ((sel & SAVRES_REG) == SAVRES_GPR)
+	sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
+		 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
+		 (regno - 13) * 4, prefix, regno);
+      else if ((sel & SAVRES_REG) == SAVRES_FPR)
+	sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
+		 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
+      else if ((sel & SAVRES_REG) == SAVRES_VR)
+	sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
+		 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
+      else
+	abort ();
+    }
+  else
+    sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
+
+  return savres_routine_name;
+}
+
+/* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
+   We are saving/restoring GPRs if GPR is true.  */
+
+static rtx
+rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
+{
+  int regno = ((sel & SAVRES_REG) == SAVRES_GPR
+	       ? info->first_gp_reg_save
+	       : (sel & SAVRES_REG) == SAVRES_FPR
+	       ? info->first_fp_reg_save - 32
+	       : (sel & SAVRES_REG) == SAVRES_VR
+	       ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
+	       : -1);
+  rtx sym;
+  int select = sel;
+
+  /* On the SPE, we never have any FPRs, but we do have 32/64-bit
+     versions of the gpr routines.  */
+  if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
+      && info->spe_64bit_regs_used)
+    select ^= SAVRES_FPR ^ SAVRES_GPR;
+
+  /* Don't generate bogus routine names.  */
+  gcc_assert (FIRST_SAVRES_REGISTER <= regno
+	      && regno <= LAST_SAVRES_REGISTER
+	      && select >= 0 && select <= 12);
+
+  sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
+
+  if (sym == NULL)
+    {
+      char *name;
+
+      name = rs6000_savres_routine_name (info, regno, sel);
+
+      sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
+	= gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+      SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
+    }
+
+  return sym;
+}
+
+/* Emit a sequence of insns, including a stack tie if needed, for
+   resetting the stack pointer.  If UPDT_REGNO is not 1, then don't
+   reset the stack pointer, but move the base of the frame into
+   reg UPDT_REGNO for use by out-of-line register restore routines.  */
+
+static rtx
+rs6000_emit_stack_reset (rs6000_stack_t *info,
+			 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
+			 unsigned updt_regno)
+{
+  rtx updt_reg_rtx;
+
+  /* This blockage is needed so that sched doesn't decide to move
+     the sp change before the register restores.  */
+  if (DEFAULT_ABI == ABI_V4
+      || (TARGET_SPE_ABI
+	  && info->spe_64bit_regs_used != 0
+	  && info->first_gp_reg_save != 32))
+    rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
+
+  /* If we are restoring registers out-of-line, we will be using the
+     "exit" variants of the restore routines, which will reset the
+     stack for us.  But we do need to point updt_reg into the
+     right place for those routines.  */
+  updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
+
+  if (frame_off != 0)
+    return emit_insn (gen_add3_insn (updt_reg_rtx,
+				     frame_reg_rtx, GEN_INT (frame_off)));
+  else if (REGNO (frame_reg_rtx) != updt_regno)
+    return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
+
+  return NULL_RTX;
+}
+
+/* Return the register number used as a pointer by out-of-line
+   save/restore functions.  */
+
+static inline unsigned
+ptr_regno_for_savres (int sel)
+{
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
+  return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
+}
+
+/* Construct a parallel rtx describing the effect of a call to an
+   out-of-line register save/restore routine, and emit the insn
+   or jump_insn as appropriate.  */
+
+static rtx
+rs6000_emit_savres_rtx (rs6000_stack_t *info,
+			rtx frame_reg_rtx, int save_area_offset, int lr_offset,
+			enum machine_mode reg_mode, int sel)
+{
+  int i;
+  int offset, start_reg, end_reg, n_regs, use_reg;
+  int reg_size = GET_MODE_SIZE (reg_mode);
+  rtx sym;
+  rtvec p;
+  rtx par, insn;
+
+  offset = 0;
+  start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
+	       ? info->first_gp_reg_save
+	       : (sel & SAVRES_REG) == SAVRES_FPR
+	       ? info->first_fp_reg_save
+	       : (sel & SAVRES_REG) == SAVRES_VR
+	       ? info->first_altivec_reg_save
+	       : -1);
+  end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
+	     ? 32
+	     : (sel & SAVRES_REG) == SAVRES_FPR
+	     ? 64
+	     : (sel & SAVRES_REG) == SAVRES_VR
+	     ? LAST_ALTIVEC_REGNO + 1
+	     : -1);
+  n_regs = end_reg - start_reg;
+  p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
+		   + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
+		   + n_regs);
+
+  if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
+    RTVEC_ELT (p, offset++) = ret_rtx;
+
+  RTVEC_ELT (p, offset++)
+    = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
+
+  sym = rs6000_savres_routine_sym (info, sel);
+  RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
+
+  use_reg = ptr_regno_for_savres (sel);
+  if ((sel & SAVRES_REG) == SAVRES_VR)
+    {
+      /* Vector regs are saved/restored using [reg+reg] addressing.  */
+      RTVEC_ELT (p, offset++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
+      RTVEC_ELT (p, offset++)
+	= gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
+    }
+  else
+    RTVEC_ELT (p, offset++)
+      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
+
+  for (i = 0; i < end_reg - start_reg; i++)
+    RTVEC_ELT (p, i + offset)
+      = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
+		       frame_reg_rtx, save_area_offset + reg_size * i,
+		       (sel & SAVRES_SAVE) != 0);
+
+  if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
+    RTVEC_ELT (p, i + offset)
+      = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
+
+  par = gen_rtx_PARALLEL (VOIDmode, p);
+
+  if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
+    {
+      insn = emit_jump_insn (par);
+      JUMP_LABEL (insn) = ret_rtx;
+    }
+  else
+    insn = emit_insn (par);
+  return insn;
+}
+
+/* Emit code to store CR fields that need to be saved into REG.  */
+
+static void
+rs6000_emit_move_from_cr (rtx reg)
+{
+  /* Only the ELFv2 ABI allows storing only selected fields.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
+    {
+      int i, cr_reg[8], count = 0;
+
+      /* Collect CR fields that must be saved.  */
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  cr_reg[count++] = i;
+
+      /* If it's just a single one, use mfcrf.  */
+      if (count == 1)
+	{
+	  rtvec p = rtvec_alloc (1);
+	  rtvec r = rtvec_alloc (2);
+	  RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
+	  RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
+	  RTVEC_ELT (p, 0)
+	    = gen_rtx_SET (VOIDmode, reg,
+			   gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
+
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+	  return;
+	}
+
+      /* ??? It might be better to handle count == 2 / 3 cases here
+	 as well, using logical operations to combine the values.  */
+    }
+
+  emit_insn (gen_movesi_from_cr (reg));
+}
+
+/* Determine whether the gp REG is really used.  */
+
+static bool
+rs6000_reg_live_or_pic_offset_p (int reg)
+{
+  /* If the function calls eh_return, claim used all the registers that would
+     be checked for liveness otherwise.  This is required for the PIC offset
+     register with -mminimal-toc on AIX, as it is advertised as "fixed" for
+     register allocation purposes in this case.  */
+
+  return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
+           && (!call_used_regs[reg]
+               || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
+		   && !TARGET_SINGLE_PIC_BASE
+                   && TARGET_TOC && TARGET_MINIMAL_TOC)))
+          || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
+	      && !TARGET_SINGLE_PIC_BASE
+              && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
+                  || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
+}
+
+/* Emit function prologue as insns.  */
+
+void
+rs6000_emit_prologue (void)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+  enum machine_mode reg_mode = Pmode;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx frame_reg_rtx = sp_reg_rtx;
+  unsigned int cr_save_regno;
+  rtx cr_save_rtx = NULL_RTX;
+  rtx insn;
+  int strategy;
+  int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
+			      && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
+			      && call_used_regs[STATIC_CHAIN_REGNUM]);
+  /* Offset to top of frame for frame_reg and sp respectively.  */
+  HOST_WIDE_INT frame_off = 0;
+  HOST_WIDE_INT sp_off = 0;
+
+#ifdef ENABLE_CHECKING
+  /* Track and check usage of r0, r11, r12.  */
+  int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
+#define START_USE(R) do \
+  {						\
+    gcc_assert ((reg_inuse & (1 << (R))) == 0);	\
+    reg_inuse |= 1 << (R);			\
+  } while (0)
+#define END_USE(R) do \
+  {						\
+    gcc_assert ((reg_inuse & (1 << (R))) != 0);	\
+    reg_inuse &= ~(1 << (R));			\
+  } while (0)
+#define NOT_INUSE(R) do \
+  {						\
+    gcc_assert ((reg_inuse & (1 << (R))) == 0);	\
+  } while (0)
+#else
+#define START_USE(R) do {} while (0)
+#define END_USE(R) do {} while (0)
+#define NOT_INUSE(R) do {} while (0)
+#endif
+
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
+
+      /* With -mminimal-toc we may generate an extra use of r2 below.  */
+      if (!TARGET_SINGLE_PIC_BASE
+	  && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+	cfun->machine->r2_setup_needed = true;
+    }
+
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = info->total_size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      HOST_WIDE_INT size = info->total_size;
+
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					   size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
+
+  if (TARGET_FIX_AND_CONTINUE)
+    {
+      /* gdb on darwin arranges to forward a function from the old
+	 address by modifying the first 5 instructions of the function
+	 to branch to the overriding function.  This is necessary to
+	 permit function pointers that point to the old function to
+	 actually forward to the new function.  */
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+    }
+
+  if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
+    {
+      reg_mode = V2SImode;
+      reg_size = 8;
+    }
+
+  /* Handle world saves specially here.  */
+  if (WORLD_SAVE_P (info))
+    {
+      int i, j, sz;
+      rtx treg;
+      rtvec p;
+      rtx reg0;
+
+      /* save_world expects lr in r0. */
+      reg0 = gen_rtx_REG (Pmode, 0);
+      if (info->lr_save_p)
+	{
+	  insn = emit_move_insn (reg0,
+				 gen_rtx_REG (Pmode, LR_REGNO));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
+	 assumptions about the offsets of various bits of the stack
+	 frame.  */
+      gcc_assert (info->gp_save_offset == -220
+		  && info->fp_save_offset == -144
+		  && info->lr_save_offset == 8
+		  && info->cr_save_offset == 4
+		  && info->push_p
+		  && info->lr_save_p
+		  && (!crtl->calls_eh_return
+		      || info->ehrd_offset == -432)
+		  && info->vrsave_save_offset == -224
+		  && info->altivec_save_offset == -416);
+
+      treg = gen_rtx_REG (SImode, 11);
+      emit_move_insn (treg, GEN_INT (-info->total_size));
+
+      /* SAVE_WORLD takes the caller's LR in R0 and the frame size
+	 in R11.  It also clobbers R12, so beware!  */
+
+      /* Preserve CR2 for save_world prologues */
+      sz = 5;
+      sz += 32 - info->first_gp_reg_save;
+      sz += 64 - info->first_fp_reg_save;
+      sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
+      p = rtvec_alloc (sz);
+      j = 0;
+      RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
+					    gen_rtx_REG (SImode,
+							 LR_REGNO));
+      RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+					gen_rtx_SYMBOL_REF (Pmode,
+							    "*save_world"));
+      /* We do floats first so that the instruction pattern matches
+	 properly.  */
+      for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+	RTVEC_ELT (p, j++)
+	  = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
+					  ? DFmode : SFmode,
+					  info->first_fp_reg_save + i),
+			     frame_reg_rtx,
+			     info->fp_save_offset + frame_off + 8 * i);
+      for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
+	RTVEC_ELT (p, j++)
+	  = gen_frame_store (gen_rtx_REG (V4SImode,
+					  info->first_altivec_reg_save + i),
+			     frame_reg_rtx,
+			     info->altivec_save_offset + frame_off + 16 * i);
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	RTVEC_ELT (p, j++)
+	  = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
+			     frame_reg_rtx,
+			     info->gp_save_offset + frame_off + reg_size * i);
+
+      /* CR register traditionally saved as CR2.  */
+      RTVEC_ELT (p, j++)
+	= gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
+			   frame_reg_rtx, info->cr_save_offset + frame_off);
+      /* Explain about use of R0.  */
+      if (info->lr_save_p)
+	RTVEC_ELT (p, j++)
+	  = gen_frame_store (reg0,
+			     frame_reg_rtx, info->lr_save_offset + frame_off);
+      /* Explain what happens to the stack pointer.  */
+      {
+	rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
+	RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
+      }
+
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
+			    treg, GEN_INT (-info->total_size), NULL_RTX);
+      sp_off = frame_off = info->total_size;
+    }
+
+  strategy = info->savres_strategy;
+
+  /* For V.4, update stack before we do any saving and set back pointer.  */
+  if (! WORLD_SAVE_P (info)
+      && info->push_p
+      && (DEFAULT_ABI == ABI_V4
+	  || crtl->calls_eh_return))
+    {
+      bool need_r11 = (TARGET_SPE
+		       ? (!(strategy & SAVE_INLINE_GPRS)
+			  && info->spe_64bit_regs_used == 0)
+		       : (!(strategy & SAVE_INLINE_FPRS)
+			  || !(strategy & SAVE_INLINE_GPRS)
+			  || !(strategy & SAVE_INLINE_VRS)));
+      int ptr_regno = -1;
+      rtx ptr_reg = NULL_RTX;
+      int ptr_off = 0;
+
+      if (info->total_size < 32767)
+	frame_off = info->total_size;
+      else if (need_r11)
+	ptr_regno = 11;
+      else if (info->cr_save_p
+	       || info->lr_save_p
+	       || info->first_fp_reg_save < 64
+	       || info->first_gp_reg_save < 32
+	       || info->altivec_size != 0
+	       || info->vrsave_mask != 0
+	       || crtl->calls_eh_return)
+	ptr_regno = 12;
+      else
+	{
+	  /* The prologue won't be saving any regs so there is no need
+	     to set up a frame register to access any frame save area.
+	     We also won't be using frame_off anywhere below, but set
+	     the correct value anyway to protect against future
+	     changes to this function.  */
+	  frame_off = info->total_size;
+	}
+      if (ptr_regno != -1)
+	{
+	  /* Set up the frame offset to that needed by the first
+	     out-of-line save function.  */
+	  START_USE (ptr_regno);
+	  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
+	  frame_reg_rtx = ptr_reg;
+	  if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
+	    gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
+	  else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
+	    ptr_off = info->gp_save_offset + info->gp_size;
+	  else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
+	    ptr_off = info->altivec_save_offset + info->altivec_size;
+	  frame_off = -ptr_off;
+	}
+      rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
+      sp_off = info->total_size;
+      if (frame_reg_rtx != sp_reg_rtx)
+	rs6000_emit_stack_tie (frame_reg_rtx, false);
+    }
+
+  /* If we use the link register, get it into r0.  */
+  if (!WORLD_SAVE_P (info) && info->lr_save_p)
+    {
+      rtx addr, reg, mem;
+
+      reg = gen_rtx_REG (Pmode, 0);
+      START_USE (0);
+      insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
+			| SAVE_NOINLINE_FPRS_SAVES_LR)))
+	{
+	  addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->lr_save_offset + frame_off));
+	  mem = gen_rtx_MEM (Pmode, addr);
+	  /* This should not be of rs6000_sr_alias_set, because of
+	     __builtin_return_address.  */
+
+	  insn = emit_move_insn (mem, reg);
+	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
+				NULL_RTX, NULL_RTX, NULL_RTX);
+	  END_USE (0);
+	}
+    }
+
+  /* If we need to save CR, put it into r12 or r11.  Choose r12 except when
+     r12 will be needed by out-of-line gpr restore.  */
+  cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+		   && !(strategy & (SAVE_INLINE_GPRS
+				    | SAVE_NOINLINE_GPRS_SAVES_LR))
+		   ? 11 : 12);
+  if (!WORLD_SAVE_P (info)
+      && info->cr_save_p
+      && REGNO (frame_reg_rtx) != cr_save_regno
+      && !(using_static_chain_p && cr_save_regno == 11))
+    {
+      cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
+      START_USE (cr_save_regno);
+      rs6000_emit_move_from_cr (cr_save_rtx);
+    }
+
+  /* Do any required saving of fpr's.  If only one or two to save, do
+     it ourselves.  Otherwise, call function.  */
+  if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
+    {
+      int i;
+      for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+	if (save_reg_p (info->first_fp_reg_save + i))
+	  emit_frame_save (frame_reg_rtx,
+			   (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
+			    ? DFmode : SFmode),
+			   info->first_fp_reg_save + i,
+			   info->fp_save_offset + frame_off + 8 * i,
+			   sp_off - frame_off);
+    }
+  else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
+    {
+      bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
+      int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
+      unsigned ptr_regno = ptr_regno_for_savres (sel);
+      rtx ptr_reg = frame_reg_rtx;
+
+      if (REGNO (frame_reg_rtx) == ptr_regno)
+	gcc_checking_assert (frame_off == 0);
+      else
+	{
+	  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
+	  NOT_INUSE (ptr_regno);
+	  emit_insn (gen_add3_insn (ptr_reg,
+				    frame_reg_rtx, GEN_INT (frame_off)));
+	}
+      insn = rs6000_emit_savres_rtx (info, ptr_reg,
+				     info->fp_save_offset,
+				     info->lr_save_offset,
+				     DFmode, sel);
+      rs6000_frame_related (insn, ptr_reg, sp_off,
+			    NULL_RTX, NULL_RTX, NULL_RTX);
+      if (lr)
+	END_USE (0);
+    }
+
+  /* Save GPRs.  This is done as a PARALLEL if we are using
+     the store-multiple instructions.  */
+  if (!WORLD_SAVE_P (info)
+      && TARGET_SPE_ABI
+      && info->spe_64bit_regs_used != 0
+      && info->first_gp_reg_save != 32)
+    {
+      int i;
+      rtx spe_save_area_ptr;
+      HOST_WIDE_INT save_off;
+      int ool_adjust = 0;
+
+      /* Determine whether we can address all of the registers that need
+	 to be saved with an offset from frame_reg_rtx that fits in
+	 the small const field for SPE memory instructions.  */
+      int spe_regs_addressable
+	= (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
+				+ reg_size * (32 - info->first_gp_reg_save - 1))
+	   && (strategy & SAVE_INLINE_GPRS));
+
+      if (spe_regs_addressable)
+	{
+	  spe_save_area_ptr = frame_reg_rtx;
+	  save_off = frame_off;
+	}
+      else
+	{
+	  /* Make r11 point to the start of the SPE save area.  We need
+	     to be careful here if r11 is holding the static chain.  If
+	     it is, then temporarily save it in r0.  */
+	  HOST_WIDE_INT offset;
+
+	  if (!(strategy & SAVE_INLINE_GPRS))
+	    ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
+	  offset = info->spe_gp_save_offset + frame_off - ool_adjust;
+	  spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
+	  save_off = frame_off - offset;
+
+	  if (using_static_chain_p)
+	    {
+	      rtx r0 = gen_rtx_REG (Pmode, 0);
+
+	      START_USE (0);
+	      gcc_assert (info->first_gp_reg_save > 11);
+
+	      emit_move_insn (r0, spe_save_area_ptr);
+	    }
+	  else if (REGNO (frame_reg_rtx) != 11)
+	    START_USE (11);
+
+	  emit_insn (gen_addsi3 (spe_save_area_ptr,
+				 frame_reg_rtx, GEN_INT (offset)));
+	  if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
+	    frame_off = -info->spe_gp_save_offset + ool_adjust;
+	}
+
+      if ((strategy & SAVE_INLINE_GPRS))
+	{
+	  for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	    if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	      emit_frame_save (spe_save_area_ptr, reg_mode,
+			       info->first_gp_reg_save + i,
+			       (info->spe_gp_save_offset + save_off
+				+ reg_size * i),
+			       sp_off - save_off);
+	}
+      else
+	{
+	  insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
+					 info->spe_gp_save_offset + save_off,
+					 0, reg_mode,
+					 SAVRES_SAVE | SAVRES_GPR);
+
+	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
+				NULL_RTX, NULL_RTX, NULL_RTX);
+	}
+
+      /* Move the static chain pointer back.  */
+      if (!spe_regs_addressable)
+	{
+	  if (using_static_chain_p)
+	    {
+	      emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
+	      END_USE (0);
+	    }
+	  else if (REGNO (frame_reg_rtx) != 11)
+	    END_USE (11);
+	}
+    }
+  else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
+    {
+      bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
+      int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
+      unsigned ptr_regno = ptr_regno_for_savres (sel);
+      rtx ptr_reg = frame_reg_rtx;
+      bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
+      int end_save = info->gp_save_offset + info->gp_size;
+      int ptr_off;
+
+      if (!ptr_set_up)
+	ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
+
+      /* Need to adjust r11 (r12) if we saved any FPRs.  */
+      if (end_save + frame_off != 0)
+	{
+	  rtx offset = GEN_INT (end_save + frame_off);
+
+	  if (ptr_set_up)
+	    frame_off = -end_save;
+	  else
+	    NOT_INUSE (ptr_regno);
+	  emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
+	}
+      else if (!ptr_set_up)
+	{
+	  NOT_INUSE (ptr_regno);
+	  emit_move_insn (ptr_reg, frame_reg_rtx);
+	}
+      ptr_off = -end_save;
+      insn = rs6000_emit_savres_rtx (info, ptr_reg,
+				     info->gp_save_offset + ptr_off,
+				     info->lr_save_offset + ptr_off,
+				     reg_mode, sel);
+      rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
+			    NULL_RTX, NULL_RTX, NULL_RTX);
+      if (lr)
+	END_USE (0);
+    }
+  else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
+    {
+      rtvec p;
+      int i;
+      p = rtvec_alloc (32 - info->first_gp_reg_save);
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	RTVEC_ELT (p, i)
+	  = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
+			     frame_reg_rtx,
+			     info->gp_save_offset + frame_off + reg_size * i);
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
+			    NULL_RTX, NULL_RTX, NULL_RTX);
+    }
+  else if (!WORLD_SAVE_P (info))
+    {
+      int i;
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	  emit_frame_save (frame_reg_rtx, reg_mode,
+			   info->first_gp_reg_save + i,
+			   info->gp_save_offset + frame_off + reg_size * i,
+			   sp_off - frame_off);
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      rtvec p;
+
+      for (i = 0; ; ++i)
+	{
+	  unsigned int regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+	}
+
+      p = rtvec_alloc (i);
+
+      for (i = 0; ; ++i)
+	{
+	  unsigned int regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+
+	  insn
+	    = gen_frame_store (gen_rtx_REG (reg_mode, regno),
+			       sp_reg_rtx,
+			       info->ehrd_offset + sp_off + reg_size * (int) i);
+	  RTVEC_ELT (p, i) = insn;
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      insn = emit_insn (gen_blockage ());
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
+    }
+
+  /* In AIX ABI we need to make sure r2 is really saved.  */
+  if (TARGET_AIX && crtl->calls_eh_return)
+    {
+      rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
+      rtx save_insn, join_insn, note;
+      long toc_restore_insn;
+
+      tmp_reg = gen_rtx_REG (Pmode, 11);
+      tmp_reg_si = gen_rtx_REG (SImode, 11);
+      if (using_static_chain_p)
+	{
+	  START_USE (0);
+	  emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
+	}
+      else
+	START_USE (11);
+      emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
+      /* Peek at instruction to which this function returns.  If it's
+	 restoring r2, then we know we've already saved r2.  We can't
+	 unconditionally save r2 because the value we have will already
+	 be updated if we arrived at this function via a plt call or
+	 toc adjusting stub.  */
+      emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
+      toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
+			  + RS6000_TOC_SAVE_SLOT);
+      hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
+      emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
+      compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
+      validate_condition_mode (EQ, CCUNSmode);
+      lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
+      emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+			      gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
+      toc_save_done = gen_label_rtx ();
+      jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
+				   gen_rtx_EQ (VOIDmode, compare_result,
+					       const0_rtx),
+				   gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
+				   pc_rtx);
+      jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
+      JUMP_LABEL (jump) = toc_save_done;
+      LABEL_NUSES (toc_save_done) += 1;
+
+      save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
+				   TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
+				   sp_off - frame_off);
+
+      emit_label (toc_save_done);
+
+      /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
+	 have a CFG that has different saves along different paths.
+	 Move the note to a dummy blockage insn, which describes that
+	 R2 is unconditionally saved after the label.  */
+      /* ??? An alternate representation might be a special insn pattern
+	 containing both the branch and the store.  That might let the
+	 code that minimizes the number of DW_CFA_advance opcodes better
+	 freedom in placing the annotations.  */
+      note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
+      if (note)
+	remove_note (save_insn, note);
+      else
+	note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
+			       copy_rtx (PATTERN (save_insn)), NULL_RTX);
+      RTX_FRAME_RELATED_P (save_insn) = 0;
+
+      join_insn = emit_insn (gen_blockage ());
+      REG_NOTES (join_insn) = note;
+      RTX_FRAME_RELATED_P (join_insn) = 1;
+
+      if (using_static_chain_p)
+	{
+	  emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
+	  END_USE (0);
+	}
+      else
+	END_USE (11);
+    }
+
+  /* Save CR if we use any that must be preserved.  */
+  if (!WORLD_SAVE_P (info) && info->cr_save_p)
+    {
+      rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->cr_save_offset + frame_off));
+      rtx mem = gen_frame_mem (SImode, addr);
+
+      /* If we didn't copy cr before, do so now using r0.  */
+      if (cr_save_rtx == NULL_RTX)
+	{
+	  START_USE (0);
+	  cr_save_rtx = gen_rtx_REG (SImode, 0);
+	  rs6000_emit_move_from_cr (cr_save_rtx);
+	}
+
+      /* Saving CR requires a two-instruction sequence: one instruction
+	 to move the CR to a general-purpose register, and a second
+	 instruction that stores the GPR to memory.
+
+	 We do not emit any DWARF CFI records for the first of these,
+	 because we cannot properly represent the fact that CR is saved in
+	 a register.  One reason is that we cannot express that multiple
+	 CR fields are saved; another reason is that on 64-bit, the size
+	 of the CR register in DWARF (4 bytes) differs from the size of
+	 a general-purpose register.
+
+	 This means if any intervening instruction were to clobber one of
+	 the call-saved CR fields, we'd have incorrect CFI.  To prevent
+	 this from happening, we mark the store to memory as a use of
+	 those CR fields, which prevents any such instruction from being
+	 scheduled in between the two instructions.  */
+      rtx crsave_v[9];
+      int n_crsave = 0;
+      int i;
+
+      crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  crsave_v[n_crsave++]
+	    = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crsave, crsave_v)));
+      END_USE (REGNO (cr_save_rtx));
+
+      /* Now, there's no way that dwarf2out_frame_debug_expr is going to
+	 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
+	 so we need to construct a frame expression manually.  */
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Update address to be stack-pointer relative, like
+	 rs6000_frame_related would do.  */
+      addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
+			   GEN_INT (info->cr_save_offset + sp_off));
+      mem = gen_frame_mem (SImode, addr);
+
+      if (DEFAULT_ABI == ABI_ELFv2)
+	{
+	  /* In the ELFv2 ABI we generate separate CFI records for each
+	     CR field that was actually saved.  They all point to the
+	     same 32-bit stack slot.  */
+	  rtx crframe[8];
+	  int n_crframe = 0;
+
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      {
+		crframe[n_crframe]
+		  = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
+		n_crframe++;
+	     }
+
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crframe, crframe)));
+	}
+      else
+	{
+	  /* In other ABIs, by convention, we use a single CR regnum to
+	     represent the fact that all call-saved CR fields are saved.
+	     We use CR2_REGNO to be compatible with gcc-2.95 on Linux.  */
+	  rtx set = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR2_REGNO));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	}
+    }
+
+  /* In the ELFv2 ABI we need to save all call-saved CR fields into
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+      rtx crsave;
+
+      /* ??? We might get better performance by using multiple mfocrf
+	 instructions.  */
+      crsave = gen_rtx_REG (SImode, 0);
+      emit_insn (gen_movesi_from_cr (crsave));
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtvec p = rtvec_alloc (2);
+	    RTVEC_ELT (p, 0)
+	      = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
+	    RTVEC_ELT (p, 1)
+	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+	    insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
+					   sp_reg_rtx, cr_off + sp_off));
+
+	    cr_off += reg_size;
+	  }
+    }
+
+  /* Update stack and set back pointer unless this is V.4,
+     for which it was done previously.  */
+  if (!WORLD_SAVE_P (info) && info->push_p
+      && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
+    {
+      rtx ptr_reg = NULL;
+      int ptr_off = 0;
+
+      /* If saving altivec regs we need to be able to address all save
+	 locations using a 16-bit offset.  */
+      if ((strategy & SAVE_INLINE_VRS) == 0
+	  || (info->altivec_size != 0
+	      && (info->altivec_save_offset + info->altivec_size - 16
+		  + info->total_size - frame_off) > 32767)
+	  || (info->vrsave_size != 0
+	      && (info->vrsave_save_offset
+		  + info->total_size - frame_off) > 32767))
+	{
+	  int sel = SAVRES_SAVE | SAVRES_VR;
+	  unsigned ptr_regno = ptr_regno_for_savres (sel);
+
+	  if (using_static_chain_p
+	      && ptr_regno == STATIC_CHAIN_REGNUM)
+	    ptr_regno = 12;
+	  if (REGNO (frame_reg_rtx) != ptr_regno)
+	    START_USE (ptr_regno);
+	  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
+	  frame_reg_rtx = ptr_reg;
+	  ptr_off = info->altivec_save_offset + info->altivec_size;
+	  frame_off = -ptr_off;
+	}
+      else if (REGNO (frame_reg_rtx) == 1)
+	frame_off = info->total_size;
+      rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
+      sp_off = info->total_size;
+      if (frame_reg_rtx != sp_reg_rtx)
+	rs6000_emit_stack_tie (frame_reg_rtx, false);
+    }
+
+  /* Set frame pointer, if needed.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
+			     sp_reg_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Save AltiVec registers if needed.  Save here because the red zone does
+     not always include AltiVec registers.  */
+  if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
+      && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
+    {
+      int end_save = info->altivec_save_offset + info->altivec_size;
+      int ptr_off;
+      /* Oddly, the vector save/restore functions point r0 at the end
+	 of the save area, then use r11 or r12 to load offsets for
+	 [reg+reg] addressing.  */
+      rtx ptr_reg = gen_rtx_REG (Pmode, 0);
+      int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
+      rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+
+      gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
+      NOT_INUSE (0);
+      if (end_save + frame_off != 0)
+	{
+	  rtx offset = GEN_INT (end_save + frame_off);
+
+	  emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
+	}
+      else
+	emit_move_insn (ptr_reg, frame_reg_rtx);
+
+      ptr_off = -end_save;
+      insn = rs6000_emit_savres_rtx (info, scratch_reg,
+				     info->altivec_save_offset + ptr_off,
+				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
+      rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
+			    NULL_RTX, NULL_RTX, NULL_RTX);
+      if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
+	{
+	  /* The oddity mentioned above clobbered our frame reg.  */
+	  emit_move_insn (frame_reg_rtx, ptr_reg);
+	  frame_off = ptr_off;
+	}
+    }
+  else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
+	   && info->altivec_size != 0)
+    {
+      int i;
+
+      for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+	  {
+	    rtx areg, savereg, mem, split_reg;
+	    int offset;
+
+	    offset = (info->altivec_save_offset + frame_off
+		      + 16 * (i - info->first_altivec_reg_save));
+
+	    savereg = gen_rtx_REG (V4SImode, i);
+
+	    NOT_INUSE (0);
+	    areg = gen_rtx_REG (Pmode, 0);
+	    emit_move_insn (areg, GEN_INT (offset));
+
+	    /* AltiVec addressing mode is [reg+reg].  */
+	    mem = gen_frame_mem (V4SImode,
+				 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
+
+	    insn = emit_move_insn (mem, savereg);
+
+	    /* When we split a VSX store into two insns, we need to make
+	       sure the DWARF info knows which register we are storing.
+	       Pass it in to be used on the appropriate note.  */
+	    if (!BYTES_BIG_ENDIAN
+		&& GET_CODE (PATTERN (insn)) == SET
+		&& GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
+	      split_reg = savereg;
+	    else
+	      split_reg = NULL_RTX;
+
+	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
+				  areg, GEN_INT (offset), split_reg);
+	  }
+    }
+
+  /* VRSAVE is a bit vector representing which AltiVec registers
+     are used.  The OS uses this to determine which vector
+     registers to save on a context switch.  We need to save
+     VRSAVE on the stack frame, add whatever AltiVec registers we
+     used in this function, and do the corresponding magic in the
+     epilogue.  */
+
+  if (!WORLD_SAVE_P (info)
+      && TARGET_ALTIVEC
+      && TARGET_ALTIVEC_VRSAVE
+      && info->vrsave_mask != 0)
+    {
+      rtx reg, vrsave;
+      int offset;
+      int save_regno;
+
+      /* Get VRSAVE onto a GPR.  Note that ABI_V4 and ABI_DARWIN might
+	 be using r12 as frame_reg_rtx and r11 as the static chain
+	 pointer for nested functions.  */
+      save_regno = 12;
+      if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	  && !using_static_chain_p)
+	save_regno = 11;
+      else if (REGNO (frame_reg_rtx) == 12)
+	{
+	  save_regno = 11;
+	  if (using_static_chain_p)
+	    save_regno = 0;
+	}
+
+      NOT_INUSE (save_regno);
+      reg = gen_rtx_REG (SImode, save_regno);
+      vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
+      if (TARGET_MACHO)
+	emit_insn (gen_get_vrsave_internal (reg));
+      else
+	emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
+
+      /* Save VRSAVE.  */
+      offset = info->vrsave_save_offset + frame_off;
+      insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
+
+      /* Include the registers in the mask.  */
+      emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
+
+      insn = emit_insn (generate_set_vrsave (reg, info, 0));
+    }
+
+  /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up.  */
+  if (!TARGET_SINGLE_PIC_BASE
+      && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+	  || (DEFAULT_ABI == ABI_V4
+	      && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
+	      && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
+    {
+      /* If emit_load_toc_table will use the link register, we need to save
+	 it.  We use R12 for this purpose because emit_load_toc_table
+	 can use register 0.  This allows us to use a plain 'blr' to return
+	 from the procedure more often.  */
+      int save_LR_around_toc_setup = (TARGET_ELF
+				      && DEFAULT_ABI == ABI_V4
+				      && flag_pic
+				      && ! info->lr_save_p
+				      && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
+      if (save_LR_around_toc_setup)
+	{
+	  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+	  rtx tmp = gen_rtx_REG (Pmode, 12);
+
+	  insn = emit_move_insn (tmp, lr);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  rs6000_emit_load_toc_table (TRUE);
+
+	  insn = emit_move_insn (lr, tmp);
+	  add_reg_note (insn, REG_CFA_RESTORE, lr);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else
+	rs6000_emit_load_toc_table (TRUE);
+    }
+
+#if TARGET_MACHO
+  if (!TARGET_SINGLE_PIC_BASE
+      && DEFAULT_ABI == ABI_DARWIN
+      && flag_pic && crtl->uses_pic_offset_table)
+    {
+      rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+      rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
+
+      /* Save and restore LR locally around this call (in R0).  */
+      if (!info->lr_save_p)
+	emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
+
+      emit_insn (gen_load_macho_picbase (src));
+
+      emit_move_insn (gen_rtx_REG (Pmode,
+				   RS6000_PIC_OFFSET_TABLE_REGNUM),
+		      lr);
+
+      if (!info->lr_save_p)
+	emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
+    }
+#endif
+
+  /* If we need to, save the TOC register after doing the stack setup.
+     Do not emit eh frame info for this save.  The unwinder wants info,
+     conceptually attached to instructions in this function, about
+     register values in the caller of this function.  This R2 may have
+     already been changed from the value in the caller.
+     We don't attempt to write accurate DWARF EH frame info for R2
+     because code emitted by gcc for a (non-pointer) function call
+     doesn't save and restore R2.  Instead, R2 is managed out-of-line
+     by a linker generated plt call stub when the function resides in
+     a shared library.  This behaviour is costly to describe in DWARF,
+     both in terms of the size of DWARF info and the time taken in the
+     unwinder to interpret it.  R2 changes, apart from the
+     calls_eh_return case earlier in this function, are handled by
+     linux-unwind.h frob_update_context.  */
+  if (rs6000_save_toc_in_prologue_p ())
+    {
+      rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
+      emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
+    }
+}
+
+/* Write function prologue.  */
+
+static void
+rs6000_output_function_prologue (FILE *file,
+				 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+
+  if (TARGET_DEBUG_STACK)
+    debug_stack_info (info);
+
+  /* Write .extern for any function we will call to save and restore
+     fp values.  */
+  if (info->first_fp_reg_save < 64
+      && !TARGET_MACHO
+      && !TARGET_ELF)
+    {
+      char *name;
+      int regno = info->first_fp_reg_save - 32;
+
+      if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
+	{
+	  bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
+	  int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
+	  name = rs6000_savres_routine_name (info, regno, sel);
+	  fprintf (file, "\t.extern %s\n", name);
+	}
+      if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
+	{
+	  bool lr = (info->savres_strategy
+		     & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
+	  int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
+	  name = rs6000_savres_routine_name (info, regno, sel);
+	  fprintf (file, "\t.extern %s\n", name);
+	}
+    }
+
+  /* ELFv2 ABI r2 setup code and local entry point.  This must follow
+     immediately after the global entry point label.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+      fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
+      fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
+
+      fputs ("\t.localentry\t", file);
+      assemble_name (file, name);
+      fputs (",.-", file);
+      assemble_name (file, name);
+      fputs ("\n", file);
+    }
+
+  /* Output -mprofile-kernel code.  This needs to be done here instead of
+     in output_function_profile since it must go after the ELFv2 ABI
+     local entry point.  */
+  if (TARGET_PROFILE_KERNEL && crtl->profile)
+    {
+      gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
+      gcc_assert (!TARGET_32BIT);
+
+      asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
+      asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
+
+      /* In the ELFv2 ABI we have no compiler stack word.  It must be
+	 the resposibility of _mcount to preserve the static chain
+	 register if required.  */
+      if (DEFAULT_ABI != ABI_ELFv2
+	  && cfun->static_chain_decl != NULL)
+	{
+	  asm_fprintf (file, "\tstd %s,24(%s)\n",
+		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	  fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+	  asm_fprintf (file, "\tld %s,24(%s)\n",
+		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	}
+      else
+	fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+    }
+
+  rs6000_pic_labelno++;
+}
+
+/* Non-zero if vmx regs are restored before the frame pop, zero if
+   we restore after the pop when possible.  */
+#define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
+
+/* Restoring cr is a two step process: loading a reg from the frame
+   save, then moving the reg to cr.  For ABI_V4 we must let the
+   unwinder know that the stack location is no longer valid at or
+   before the stack deallocation, but we can't emit a cfa_restore for
+   cr at the stack deallocation like we do for other registers.
+   The trouble is that it is possible for the move to cr to be
+   scheduled after the stack deallocation.  So say exactly where cr
+   is located on each of the two insns.  */
+
+static rtx
+load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
+{
+  rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
+  rtx reg = gen_rtx_REG (SImode, regno);
+  rtx insn = emit_move_insn (reg, mem);
+
+  if (!exit_func && DEFAULT_ABI == ABI_V4)
+    {
+      rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
+      rtx set = gen_rtx_SET (VOIDmode, reg, cr);
+
+      add_reg_note (insn, REG_CFA_REGISTER, set);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  return reg;
+}
+
+/* Reload CR from REG.  */
+
+static void
+restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
+{
+  int count = 0;
+  int i;
+
+  if (using_mfcr_multiple)
+    {
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  count++;
+      gcc_assert (count);
+    }
+
+  if (using_mfcr_multiple && count > 1)
+    {
+      rtx insn;
+      rtvec p;
+      int ndx;
+
+      p = rtvec_alloc (count);
+
+      ndx = 0;
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  {
+	    rtvec r = rtvec_alloc (2);
+	    RTVEC_ELT (r, 0) = reg;
+	    RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
+	    RTVEC_ELT (p, ndx) =
+	      gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
+			   gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
+	    ndx++;
+	  }
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      gcc_assert (ndx == count);
+
+      /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	 CR field separately.  */
+      if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	{
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+  else
+    for (i = 0; i < 8; i++)
+      if (save_reg_p (CR0_REGNO + i))
+	{
+	  rtx insn = emit_insn (gen_movsi_to_cr_one
+				 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+	  /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	     CR field separately, attached to the insn that in fact
+	     restores this particular CR field.  */
+	  if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	    {
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+  /* For other ABIs, we just generate a single CFA_RESTORE for CR2.  */
+  if (!exit_func && DEFAULT_ABI != ABI_ELFv2
+      && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+    {
+      rtx insn = get_last_insn ();
+      rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
+
+      add_reg_note (insn, REG_CFA_RESTORE, cr);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Like cr, the move to lr instruction can be scheduled after the
+   stack deallocation, but unlike cr, its stack frame save is still
+   valid.  So we only need to emit the cfa_restore on the correct
+   instruction.  */
+
+static void
+load_lr_save (int regno, rtx frame_reg_rtx, int offset)
+{
+  rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
+  rtx reg = gen_rtx_REG (Pmode, regno);
+
+  emit_move_insn (reg, mem);
+}
+
+static void
+restore_saved_lr (int regno, bool exit_func)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx insn = emit_move_insn (lr, reg);
+
+  if (!exit_func && flag_shrink_wrap)
+    {
+      add_reg_note (insn, REG_CFA_RESTORE, lr);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+static rtx
+add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
+{
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      int i;
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  {
+	    rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
+					   cfa_restores);
+	  }
+    }
+  else if (info->cr_save_p)
+    cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+				   gen_rtx_REG (SImode, CR2_REGNO),
+				   cfa_restores);
+
+  if (info->lr_save_p)
+    cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+				   gen_rtx_REG (Pmode, LR_REGNO),
+				   cfa_restores);
+  return cfa_restores;
+}
+
+/* Return true if OFFSET from stack pointer can be clobbered by signals.
+   V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
+   below stack pointer not cloberred by signals.  */
+
+static inline bool
+offset_below_red_zone_p (HOST_WIDE_INT offset)
+{
+  return offset < (DEFAULT_ABI == ABI_V4
+		   ? 0
+		   : TARGET_32BIT ? -220 : -288);
+}
+
+/* Append CFA_RESTORES to any existing REG_NOTES on the last insn.  */
+
+static void
+emit_cfa_restores (rtx cfa_restores)
+{
+  rtx insn = get_last_insn ();
+  rtx *loc = &REG_NOTES (insn);
+
+  while (*loc)
+    loc = &XEXP (*loc, 1);
+  *loc = cfa_restores;
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Emit function epilogue as insns.  */
+
+void
+rs6000_emit_epilogue (int sibcall)
+{
+  rs6000_stack_t *info;
+  int restoring_GPRs_inline;
+  int restoring_FPRs_inline;
+  int using_load_multiple;
+  int using_mtcr_multiple;
+  int use_backchain_to_restore_sp;
+  int restore_lr;
+  int strategy;
+  HOST_WIDE_INT frame_off = 0;
+  rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
+  rtx frame_reg_rtx = sp_reg_rtx;
+  rtx cfa_restores = NULL_RTX;
+  rtx insn;
+  rtx cr_save_reg = NULL_RTX;
+  enum machine_mode reg_mode = Pmode;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  int i;
+  bool exit_func;
+  unsigned ptr_regno;
+
+  info = rs6000_stack_info ();
+
+  if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
+    {
+      reg_mode = V2SImode;
+      reg_size = 8;
+    }
+
+  strategy = info->savres_strategy;
+  using_load_multiple = strategy & SAVRES_MULTIPLE;
+  restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
+  restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
+  using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
+			 || rs6000_cpu == PROCESSOR_PPC603
+			 || rs6000_cpu == PROCESSOR_PPC750
+			 || optimize_size);
+  /* Restore via the backchain when we have a large frame, since this
+     is more efficient than an addis, addi pair.  The second condition
+     here will not trigger at the moment;  We don't actually need a
+     frame pointer for alloca, but the generic parts of the compiler
+     give us one anyway.  */
+  use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
+				 || (cfun->calls_alloca
+				     && !frame_pointer_needed));
+  restore_lr = (info->lr_save_p
+		&& (restoring_FPRs_inline
+		    || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
+		&& (restoring_GPRs_inline
+		    || info->first_fp_reg_save < 64));
+
+  if (WORLD_SAVE_P (info))
+    {
+      int i, j;
+      char rname[30];
+      const char *alloc_rname;
+      rtvec p;
+
+      /* eh_rest_world_r10 will return to the location saved in the LR
+	 stack slot (which is not likely to be our caller.)
+	 Input: R10 -- stack adjustment.  Clobbers R0, R11, R12, R7, R8.
+	 rest_world is similar, except any R10 parameter is ignored.
+	 The exception-handling stuff that was here in 2.95 is no
+	 longer necessary.  */
+
+      p = rtvec_alloc (9
+		       + 1
+		       + 32 - info->first_gp_reg_save
+		       + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
+		       + 63 + 1 - info->first_fp_reg_save);
+
+      strcpy (rname, ((crtl->calls_eh_return) ?
+		      "*eh_rest_world_r10" : "*rest_world"));
+      alloc_rname = ggc_strdup (rname);
+
+      j = 0;
+      RTVEC_ELT (p, j++) = ret_rtx;
+      RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+					gen_rtx_REG (Pmode,
+						     LR_REGNO));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
+      /* The instruction pattern requires a clobber here;
+	 it is shared with the restVEC helper. */
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
+
+      {
+	/* CR register traditionally saved as CR2.  */
+	rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
+	RTVEC_ELT (p, j++)
+	  = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
+	if (flag_shrink_wrap)
+	  {
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+					   gen_rtx_REG (Pmode, LR_REGNO),
+					   cfa_restores);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	  }
+      }
+
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	{
+	  rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+	  RTVEC_ELT (p, j++)
+	    = gen_frame_load (reg,
+			      frame_reg_rtx, info->gp_save_offset + reg_size * i);
+	  if (flag_shrink_wrap)
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	}
+      for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
+	{
+	  rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
+	  RTVEC_ELT (p, j++)
+	    = gen_frame_load (reg,
+			      frame_reg_rtx, info->altivec_save_offset + 16 * i);
+	  if (flag_shrink_wrap)
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	}
+      for (i = 0; info->first_fp_reg_save + i <= 63; i++)
+	{
+	  rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
+				  ? DFmode : SFmode),
+				 info->first_fp_reg_save + i);
+	  RTVEC_ELT (p, j++)
+	    = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
+	  if (flag_shrink_wrap)
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	}
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
+      insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+      if (flag_shrink_wrap)
+	{
+	  REG_NOTES (insn) = cfa_restores;
+	  add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      return;
+    }
+
+  /* frame_reg_rtx + frame_off points to the top of this stack frame.  */
+  if (info->push_p)
+    frame_off = info->total_size;
+
+  /* Restore AltiVec registers if we must do so before adjusting the
+     stack.  */
+  if (TARGET_ALTIVEC_ABI
+      && info->altivec_size != 0
+      && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+	  || (DEFAULT_ABI != ABI_V4
+	      && offset_below_red_zone_p (info->altivec_save_offset))))
+    {
+      int i;
+      int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
+
+      gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
+      if (use_backchain_to_restore_sp)
+	{
+	  int frame_regno = 11;
+
+	  if ((strategy & REST_INLINE_VRS) == 0)
+	    {
+	      /* Of r11 and r12, select the one not clobbered by an
+		 out-of-line restore function for the frame register.  */
+	      frame_regno = 11 + 12 - scratch_regno;
+	    }
+	  frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
+	  emit_move_insn (frame_reg_rtx,
+			  gen_rtx_MEM (Pmode, sp_reg_rtx));
+	  frame_off = 0;
+	}
+      else if (frame_pointer_needed)
+	frame_reg_rtx = hard_frame_pointer_rtx;
+
+      if ((strategy & REST_INLINE_VRS) == 0)
+	{
+	  int end_save = info->altivec_save_offset + info->altivec_size;
+	  int ptr_off;
+	  rtx ptr_reg = gen_rtx_REG (Pmode, 0);
+	  rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+
+	  if (end_save + frame_off != 0)
+	    {
+	      rtx offset = GEN_INT (end_save + frame_off);
+
+	      emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
+	    }
+	  else
+	    emit_move_insn (ptr_reg, frame_reg_rtx);
+
+	  ptr_off = -end_save;
+	  insn = rs6000_emit_savres_rtx (info, scratch_reg,
+					 info->altivec_save_offset + ptr_off,
+					 0, V4SImode, SAVRES_VR);
+	}
+      else
+	{
+	  for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	    if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+	      {
+		rtx addr, areg, mem, reg;
+
+		areg = gen_rtx_REG (Pmode, 0);
+		emit_move_insn
+		  (areg, GEN_INT (info->altivec_save_offset
+				  + frame_off
+				  + 16 * (i - info->first_altivec_reg_save)));
+
+		/* AltiVec addressing mode is [reg+reg].  */
+		addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
+		mem = gen_frame_mem (V4SImode, addr);
+
+		reg = gen_rtx_REG (V4SImode, i);
+		emit_move_insn (reg, mem);
+	      }
+	}
+
+      for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	if (((strategy & REST_INLINE_VRS) == 0
+	     || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
+	    && (flag_shrink_wrap
+		|| (offset_below_red_zone_p
+		    (info->altivec_save_offset
+		     + 16 * (i - info->first_altivec_reg_save)))))
+	  {
+	    rtx reg = gen_rtx_REG (V4SImode, i);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	  }
+    }
+
+  /* Restore VRSAVE if we must do so before adjusting the stack.  */
+  if (TARGET_ALTIVEC
+      && TARGET_ALTIVEC_VRSAVE
+      && info->vrsave_mask != 0
+      && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+	  || (DEFAULT_ABI != ABI_V4
+	      && offset_below_red_zone_p (info->vrsave_save_offset))))
+    {
+      rtx reg;
+
+      if (frame_reg_rtx == sp_reg_rtx)
+	{
+	  if (use_backchain_to_restore_sp)
+	    {
+	      frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+	      emit_move_insn (frame_reg_rtx,
+			      gen_rtx_MEM (Pmode, sp_reg_rtx));
+	      frame_off = 0;
+	    }
+	  else if (frame_pointer_needed)
+	    frame_reg_rtx = hard_frame_pointer_rtx;
+	}
+
+      reg = gen_rtx_REG (SImode, 12);
+      emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				 info->vrsave_save_offset + frame_off));
+
+      emit_insn (generate_set_vrsave (reg, info, 1));
+    }
+
+  insn = NULL_RTX;
+  /* If we have a large stack frame, restore the old stack pointer
+     using the backchain.  */
+  if (use_backchain_to_restore_sp)
+    {
+      if (frame_reg_rtx == sp_reg_rtx)
+	{
+	  /* Under V.4, don't reset the stack pointer until after we're done
+	     loading the saved registers.  */
+	  if (DEFAULT_ABI == ABI_V4)
+	    frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+
+	  insn = emit_move_insn (frame_reg_rtx,
+				 gen_rtx_MEM (Pmode, sp_reg_rtx));
+	  frame_off = 0;
+	}
+      else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+	       && DEFAULT_ABI == ABI_V4)
+	/* frame_reg_rtx has been set up by the altivec restore.  */
+	;
+      else
+	{
+	  insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
+	  frame_reg_rtx = sp_reg_rtx;
+	}
+    }
+  /* If we have a frame pointer, we can restore the old stack pointer
+     from it.  */
+  else if (frame_pointer_needed)
+    {
+      frame_reg_rtx = sp_reg_rtx;
+      if (DEFAULT_ABI == ABI_V4)
+	frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+      /* Prevent reordering memory accesses against stack pointer restore.  */
+      else if (cfun->calls_alloca
+	       || offset_below_red_zone_p (-info->total_size))
+	rs6000_emit_stack_tie (frame_reg_rtx, true);
+
+      insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
+				       GEN_INT (info->total_size)));
+      frame_off = 0;
+    }
+  else if (info->push_p
+	   && DEFAULT_ABI != ABI_V4
+	   && !crtl->calls_eh_return)
+    {
+      /* Prevent reordering memory accesses against stack pointer restore.  */
+      if (cfun->calls_alloca
+	  || offset_below_red_zone_p (-info->total_size))
+	rs6000_emit_stack_tie (frame_reg_rtx, false);
+      insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
+				       GEN_INT (info->total_size)));
+      frame_off = 0;
+    }
+  if (insn && frame_reg_rtx == sp_reg_rtx)
+    {
+      if (cfa_restores)
+	{
+	  REG_NOTES (insn) = cfa_restores;
+	  cfa_restores = NULL_RTX;
+	}
+      add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Restore AltiVec registers if we have not done so already.  */
+  if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+      && TARGET_ALTIVEC_ABI
+      && info->altivec_size != 0
+      && (DEFAULT_ABI == ABI_V4
+	  || !offset_below_red_zone_p (info->altivec_save_offset)))
+    {
+      int i;
+
+      if ((strategy & REST_INLINE_VRS) == 0)
+	{
+	  int end_save = info->altivec_save_offset + info->altivec_size;
+	  int ptr_off;
+	  rtx ptr_reg = gen_rtx_REG (Pmode, 0);
+	  int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
+	  rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+
+	  if (end_save + frame_off != 0)
+	    {
+	      rtx offset = GEN_INT (end_save + frame_off);
+
+	      emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
+	    }
+	  else
+	    emit_move_insn (ptr_reg, frame_reg_rtx);
+
+	  ptr_off = -end_save;
+	  insn = rs6000_emit_savres_rtx (info, scratch_reg,
+					 info->altivec_save_offset + ptr_off,
+					 0, V4SImode, SAVRES_VR);
+	  if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
+	    {
+	      /* Frame reg was clobbered by out-of-line save.  Restore it
+		 from ptr_reg, and if we are calling out-of-line gpr or
+		 fpr restore set up the correct pointer and offset.  */
+	      unsigned newptr_regno = 1;
+	      if (!restoring_GPRs_inline)
+		{
+		  bool lr = info->gp_save_offset + info->gp_size == 0;
+		  int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
+		  newptr_regno = ptr_regno_for_savres (sel);
+		  end_save = info->gp_save_offset + info->gp_size;
+		}
+	      else if (!restoring_FPRs_inline)
+		{
+		  bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
+		  int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
+		  newptr_regno = ptr_regno_for_savres (sel);
+		  end_save = info->gp_save_offset + info->gp_size;
+		}
+
+	      if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
+		frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
+		
+	      if (end_save + ptr_off != 0)
+		{
+		  rtx offset = GEN_INT (end_save + ptr_off);
+
+		  frame_off = -end_save;
+		  emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
+		}
+	      else
+		{
+		  frame_off = ptr_off;
+		  emit_move_insn (frame_reg_rtx, ptr_reg);
+		}
+	    }
+	}
+      else
+	{
+	  for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	    if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+	      {
+		rtx addr, areg, mem, reg;
+
+		areg = gen_rtx_REG (Pmode, 0);
+		emit_move_insn
+		  (areg, GEN_INT (info->altivec_save_offset
+				  + frame_off
+				  + 16 * (i - info->first_altivec_reg_save)));
+
+		/* AltiVec addressing mode is [reg+reg].  */
+		addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
+		mem = gen_frame_mem (V4SImode, addr);
+
+		reg = gen_rtx_REG (V4SImode, i);
+		emit_move_insn (reg, mem);
+	      }
+	}
+
+      for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	if (((strategy & REST_INLINE_VRS) == 0
+	     || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
+	    && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+	  {
+	    rtx reg = gen_rtx_REG (V4SImode, i);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	  }
+    }
+
+  /* Restore VRSAVE if we have not done so already.  */
+  if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+      && TARGET_ALTIVEC
+      && TARGET_ALTIVEC_VRSAVE
+      && info->vrsave_mask != 0
+      && (DEFAULT_ABI == ABI_V4
+	  || !offset_below_red_zone_p (info->vrsave_save_offset)))
+    {
+      rtx reg;
+
+      reg = gen_rtx_REG (SImode, 12);
+      emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				 info->vrsave_save_offset + frame_off));
+
+      emit_insn (generate_set_vrsave (reg, info, 1));
+    }
+
+  /* If we exit by an out-of-line restore function on ABI_V4 then that
+     function will deallocate the stack, so we don't need to worry
+     about the unwinder restoring cr from an invalid stack frame
+     location.  */
+  exit_func = (!restoring_FPRs_inline
+	       || (!restoring_GPRs_inline
+		   && info->first_fp_reg_save == 64));
+
+  /* In the ELFv2 ABI we need to restore all call-saved CR fields from
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtx reg = gen_rtx_REG (SImode, 0);
+	    emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				       cr_off + frame_off));
+
+	    insn = emit_insn (gen_movsi_to_cr_one
+				(gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+	    if (!exit_func && flag_shrink_wrap)
+	      {
+		add_reg_note (insn, REG_CFA_RESTORE,
+			      gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (insn) = 1;
+	      }
+
+	    cr_off += reg_size;
+	  }
+    }
+
+  /* Get the old lr if we saved it.  If we are restoring registers
+     out-of-line, then the out-of-line routines can do this for us.  */
+  if (restore_lr && restoring_GPRs_inline)
+    load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
+
+  /* Get the old cr if we saved it.  */
+  if (info->cr_save_p)
+    {
+      unsigned cr_save_regno = 12;
+
+      if (!restoring_GPRs_inline)
+	{
+	  /* Ensure we don't use the register used by the out-of-line
+	     gpr register restore below.  */
+	  bool lr = info->gp_save_offset + info->gp_size == 0;
+	  int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
+	  int gpr_ptr_regno = ptr_regno_for_savres (sel);
+
+	  if (gpr_ptr_regno == 12)
+	    cr_save_regno = 11;
+	  gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
+	}
+      else if (REGNO (frame_reg_rtx) == 12)
+	cr_save_regno = 11;
+
+      cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
+				  info->cr_save_offset + frame_off,
+				  exit_func);
+    }
+
+  /* Set LR here to try to overlap restores below.  */
+  if (restore_lr && restoring_GPRs_inline)
+    restore_saved_lr (0, exit_func);
+
+  /* Load exception handler data registers, if needed.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i, regno;
+
+      if (TARGET_AIX)
+	{
+	  rtx reg = gen_rtx_REG (reg_mode, 2);
+	  emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				     frame_off + RS6000_TOC_SAVE_SLOT));
+	}
+
+      for (i = 0; ; ++i)
+	{
+	  rtx mem;
+
+	  regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+
+	  /* Note: possible use of r0 here to address SPE regs.  */
+	  mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
+				      info->ehrd_offset + frame_off
+				      + reg_size * (int) i);
+
+	  emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
+	}
+    }
+
+  /* Restore GPRs.  This is done as a PARALLEL if we are using
+     the load-multiple instructions.  */
+  if (TARGET_SPE_ABI
+      && info->spe_64bit_regs_used
+      && info->first_gp_reg_save != 32)
+    {
+      /* Determine whether we can address all of the registers that need
+	 to be saved with an offset from frame_reg_rtx that fits in
+	 the small const field for SPE memory instructions.  */
+      int spe_regs_addressable
+	= (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
+				+ reg_size * (32 - info->first_gp_reg_save - 1))
+	   && restoring_GPRs_inline);
+
+      if (!spe_regs_addressable)
+	{
+	  int ool_adjust = 0;
+	  rtx old_frame_reg_rtx = frame_reg_rtx;
+	  /* Make r11 point to the start of the SPE save area.  We worried about
+	     not clobbering it when we were saving registers in the prologue.
+	     There's no need to worry here because the static chain is passed
+	     anew to every function.  */
+
+	  if (!restoring_GPRs_inline)
+	    ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
+	  frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+	  emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
+				 GEN_INT (info->spe_gp_save_offset
+					  + frame_off
+					  - ool_adjust)));
+	  /* Keep the invariant that frame_reg_rtx + frame_off points
+	     at the top of the stack frame.  */
+	  frame_off = -info->spe_gp_save_offset + ool_adjust;
+	}
+
+      if (restoring_GPRs_inline)
+	{
+	  HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
+
+	  for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	    if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	      {
+		rtx offset, addr, mem, reg;
+
+		/* We're doing all this to ensure that the immediate offset
+		   fits into the immediate field of 'evldd'.  */
+		gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
+
+		offset = GEN_INT (spe_offset + reg_size * i);
+		addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
+		mem = gen_rtx_MEM (V2SImode, addr);
+		reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+
+		emit_move_insn (reg, mem);
+	      }
+	}
+      else
+	rs6000_emit_savres_rtx (info, frame_reg_rtx,
+				info->spe_gp_save_offset + frame_off,
+				info->lr_save_offset + frame_off,
+				reg_mode,
+				SAVRES_GPR | SAVRES_LR);
+    }
+  else if (!restoring_GPRs_inline)
+    {
+      /* We are jumping to an out-of-line function.  */
+      rtx ptr_reg;
+      int end_save = info->gp_save_offset + info->gp_size;
+      bool can_use_exit = end_save == 0;
+      int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
+      int ptr_off;
+
+      /* Emit stack reset code if we need it.  */
+      ptr_regno = ptr_regno_for_savres (sel);
+      ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
+      if (can_use_exit)
+	rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
+      else if (end_save + frame_off != 0)
+	emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
+				  GEN_INT (end_save + frame_off)));
+      else if (REGNO (frame_reg_rtx) != ptr_regno)
+	emit_move_insn (ptr_reg, frame_reg_rtx);
+      if (REGNO (frame_reg_rtx) == ptr_regno)
+	frame_off = -end_save;
+
+      if (can_use_exit && info->cr_save_p)
+	restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
+
+      ptr_off = -end_save;
+      rs6000_emit_savres_rtx (info, ptr_reg,
+			      info->gp_save_offset + ptr_off,
+			      info->lr_save_offset + ptr_off,
+			      reg_mode, sel);
+    }
+  else if (using_load_multiple)
+    {
+      rtvec p;
+      p = rtvec_alloc (32 - info->first_gp_reg_save);
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	RTVEC_ELT (p, i)
+	  = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
+			    frame_reg_rtx,
+			    info->gp_save_offset + frame_off + reg_size * i);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+    }
+  else
+    {
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	  emit_insn (gen_frame_load
+		     (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
+		      frame_reg_rtx,
+		      info->gp_save_offset + frame_off + reg_size * i));
+    }
+
+  if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
+    {
+      /* If the frame pointer was used then we can't delay emitting
+	 a REG_CFA_DEF_CFA note.  This must happen on the insn that
+	 restores the frame pointer, r31.  We may have already emitted
+	 a REG_CFA_DEF_CFA note, but that's OK;  A duplicate is
+	 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
+	 be harmless if emitted.  */
+      if (frame_pointer_needed)
+	{
+	  insn = get_last_insn ();
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (Pmode, frame_reg_rtx, frame_off));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* Set up cfa_restores.  We always need these when
+	 shrink-wrapping.  If not shrink-wrapping then we only need
+	 the cfa_restore when the stack location is no longer valid.
+	 The cfa_restores must be emitted on or before the insn that
+	 invalidates the stack, and of course must not be emitted
+	 before the insn that actually does the restore.  The latter
+	 is why it is a bad idea to emit the cfa_restores as a group
+	 on the last instruction here that actually does a restore:
+	 That insn may be reordered with respect to others doing
+	 restores.  */
+      if (flag_shrink_wrap
+	  && !restoring_GPRs_inline
+	  && info->first_fp_reg_save == 64)
+	cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
+
+      for (i = info->first_gp_reg_save; i < 32; i++)
+	if (!restoring_GPRs_inline
+	    || using_load_multiple
+	    || rs6000_reg_live_or_pic_offset_p (i))
+	  {
+	    rtx reg = gen_rtx_REG (reg_mode, i);
+
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	  }
+    }
+
+  if (!restoring_GPRs_inline
+      && info->first_fp_reg_save == 64)
+    {
+      /* We are jumping to an out-of-line function.  */
+      if (cfa_restores)
+	emit_cfa_restores (cfa_restores);
+      return;
+    }
+
+  if (restore_lr && !restoring_GPRs_inline)
+    {
+      load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
+      restore_saved_lr (0, exit_func);
+    }
+
+  /* Restore fpr's if we need to do it without calling a function.  */
+  if (restoring_FPRs_inline)
+    for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+      if (save_reg_p (info->first_fp_reg_save + i))
+	{
+	  rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
+				  ? DFmode : SFmode),
+				 info->first_fp_reg_save + i);
+	  emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				     info->fp_save_offset + frame_off + 8 * i));
+	  if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	}
+
+  /* If we saved cr, restore it here.  Just those that were used.  */
+  if (info->cr_save_p)
+    restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
+
+  /* If this is V.4, unwind the stack pointer after all of the loads
+     have been done, or set up r11 if we are restoring fp out of line.  */
+  ptr_regno = 1;
+  if (!restoring_FPRs_inline)
+    {
+      bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
+      int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
+      ptr_regno = ptr_regno_for_savres (sel);
+    }
+
+  insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
+  if (REGNO (frame_reg_rtx) == ptr_regno)
+    frame_off = 0;
+
+  if (insn && restoring_FPRs_inline)
+    {
+      if (cfa_restores)
+	{
+	  REG_NOTES (insn) = cfa_restores;
+	  cfa_restores = NULL_RTX;
+	}
+      add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      rtx sa = EH_RETURN_STACKADJ_RTX;
+      emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
+    }
+
+  if (!sibcall)
+    {
+      rtvec p;
+      bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
+      if (! restoring_FPRs_inline)
+	{
+	  p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
+	  RTVEC_ELT (p, 0) = ret_rtx;
+	}
+      else
+	{
+	  if (cfa_restores)
+	    {
+	      /* We can't hang the cfa_restores off a simple return,
+		 since the shrink-wrap code sometimes uses an existing
+		 return.  This means there might be a path from
+		 pre-prologue code to this return, and dwarf2cfi code
+		 wants the eh_frame unwinder state to be the same on
+		 all paths to any point.  So we need to emit the
+		 cfa_restores before the return.  For -m64 we really
+		 don't need epilogue cfa_restores at all, except for
+		 this irritating dwarf2cfi with shrink-wrap
+		 requirement;  The stack red-zone means eh_frame info
+		 from the prologue telling the unwinder to restore
+		 from the stack is perfectly good right to the end of
+		 the function.  */
+	      emit_insn (gen_blockage ());
+	      emit_cfa_restores (cfa_restores);
+	      cfa_restores = NULL_RTX;
+	    }
+	  p = rtvec_alloc (2);
+	  RTVEC_ELT (p, 0) = simple_return_rtx;
+	}
+
+      RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
+			  ? gen_rtx_USE (VOIDmode,
+					 gen_rtx_REG (Pmode, LR_REGNO))
+			  : gen_rtx_CLOBBER (VOIDmode,
+					     gen_rtx_REG (Pmode, LR_REGNO)));
+
+      /* If we have to restore more than two FP registers, branch to the
+	 restore function.  It will return to our caller.  */
+      if (! restoring_FPRs_inline)
+	{
+	  int i;
+	  int reg;
+	  rtx sym;
+
+	  if (flag_shrink_wrap)
+	    cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
+
+	  sym = rs6000_savres_routine_sym (info,
+					   SAVRES_FPR | (lr ? SAVRES_LR : 0));
+	  RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
+	  reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
+	  RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
+
+	  for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+	    {
+	      rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
+
+	      RTVEC_ELT (p, i + 4)
+		= gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
+	      if (flag_shrink_wrap)
+		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					       cfa_restores);
+	    }
+	}
+
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+    }
+
+  if (cfa_restores)
+    {
+      if (sibcall)
+	/* Ensure the cfa_restores are hung off an insn that won't
+	   be reordered above other restores.  */
+	emit_insn (gen_blockage ());
+
+      emit_cfa_restores (cfa_restores);
+    }
+}
+
+/* Write function epilogue.  */
+
+static void
+rs6000_output_function_epilogue (FILE *file,
+				 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+#if TARGET_MACHO
+  macho_branch_islands ();
+  /* Mach-O doesn't support labels at the end of objects, so if
+     it looks like we might want one, insert a NOP.  */
+  {
+    rtx insn = get_last_insn ();
+    rtx deleted_debug_label = NULL_RTX;
+    while (insn
+	   && NOTE_P (insn)
+	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
+      {
+	/* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
+	   notes only, instead set their CODE_LABEL_NUMBER to -1,
+	   otherwise there would be code generation differences
+	   in between -g and -g0.  */
+	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+	  deleted_debug_label = insn;
+	insn = PREV_INSN (insn);
+      }
+    if (insn
+	&& (LABEL_P (insn)
+	    || (NOTE_P (insn)
+		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
+      fputs ("\tnop\n", file);
+    else if (deleted_debug_label)
+      for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
+	if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+	  CODE_LABEL_NUMBER (insn) = -1;
+  }
+#endif
+
+  /* Output a traceback table here.  See /usr/include/sys/debug.h for info
+     on its format.
+
+     We don't output a traceback table if -finhibit-size-directive was
+     used.  The documentation for -finhibit-size-directive reads
+     ``don't output a @code{.size} assembler directive, or anything
+     else that would cause trouble if the function is split in the
+     middle, and the two halves are placed at locations far apart in
+     memory.''  The traceback table has this property, since it
+     includes the offset from the start of the function to the
+     traceback table itself.
+
+     System V.4 Powerpc's (and the embedded ABI derived from it) use a
+     different traceback table.  */
+  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+      && ! flag_inhibit_size_directive
+      && rs6000_traceback != traceback_none && !cfun->is_thunk)
+    {
+      const char *fname = NULL;
+      const char *language_string = lang_hooks.name;
+      int fixed_parms = 0, float_parms = 0, parm_info = 0;
+      int i;
+      int optional_tbtab;
+      rs6000_stack_t *info = rs6000_stack_info ();
+
+      if (rs6000_traceback == traceback_full)
+	optional_tbtab = 1;
+      else if (rs6000_traceback == traceback_part)
+	optional_tbtab = 0;
+      else
+	optional_tbtab = !optimize_size && !TARGET_ELF;
+
+      if (optional_tbtab)
+	{
+	  fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+	  while (*fname == '.')	/* V.4 encodes . in the name */
+	    fname++;
+
+	  /* Need label immediately before tbtab, so we can compute
+	     its offset from the function start.  */
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
+	  ASM_OUTPUT_LABEL (file, fname);
+	}
+
+      /* The .tbtab pseudo-op can only be used for the first eight
+	 expressions, since it can't handle the possibly variable
+	 length fields that follow.  However, if you omit the optional
+	 fields, the assembler outputs zeros for all optional fields
+	 anyways, giving each variable length field is minimum length
+	 (as defined in sys/debug.h).  Thus we can not use the .tbtab
+	 pseudo-op at all.  */
+
+      /* An all-zero word flags the start of the tbtab, for debuggers
+	 that have to find it by searching forward from the entry
+	 point or from the current pc.  */
+      fputs ("\t.long 0\n", file);
+
+      /* Tbtab format type.  Use format type 0.  */
+      fputs ("\t.byte 0,", file);
+
+      /* Language type.  Unfortunately, there does not seem to be any
+	 official way to discover the language being compiled, so we
+	 use language_string.
+	 C is 0.  Fortran is 1.  Pascal is 2.  Ada is 3.  C++ is 9.
+	 Java is 13.  Objective-C is 14.  Objective-C++ isn't assigned
+	 a number, so for now use 9.  LTO and Go aren't assigned numbers
+	 either, so for now use 0.  */
+      if (! strcmp (language_string, "GNU C")
+	  || ! strcmp (language_string, "GNU GIMPLE")
+	  || ! strcmp (language_string, "GNU Go"))
+	i = 0;
+      else if (! strcmp (language_string, "GNU F77")
+	       || ! strcmp (language_string, "GNU Fortran"))
+	i = 1;
+      else if (! strcmp (language_string, "GNU Pascal"))
+	i = 2;
+      else if (! strcmp (language_string, "GNU Ada"))
+	i = 3;
+      else if (! strcmp (language_string, "GNU C++")
+	       || ! strcmp (language_string, "GNU Objective-C++"))
+	i = 9;
+      else if (! strcmp (language_string, "GNU Java"))
+	i = 13;
+      else if (! strcmp (language_string, "GNU Objective-C"))
+	i = 14;
+      else
+	gcc_unreachable ();
+      fprintf (file, "%d,", i);
+
+      /* 8 single bit fields: global linkage (not set for C extern linkage,
+	 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
+	 from start of procedure stored in tbtab, internal function, function
+	 has controlled storage, function has no toc, function uses fp,
+	 function logs/aborts fp operations.  */
+      /* Assume that fp operations are used if any fp reg must be saved.  */
+      fprintf (file, "%d,",
+	       (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
+
+      /* 6 bitfields: function is interrupt handler, name present in
+	 proc table, function calls alloca, on condition directives
+	 (controls stack walks, 3 bits), saves condition reg, saves
+	 link reg.  */
+      /* The `function calls alloca' bit seems to be set whenever reg 31 is
+	 set up as a frame pointer, even when there is no alloca call.  */
+      fprintf (file, "%d,",
+	       ((optional_tbtab << 6)
+		| ((optional_tbtab & frame_pointer_needed) << 5)
+		| (info->cr_save_p << 1)
+		| (info->lr_save_p)));
+
+      /* 3 bitfields: saves backchain, fixup code, number of fpr saved
+	 (6 bits).  */
+      fprintf (file, "%d,",
+	       (info->push_p << 7) | (64 - info->first_fp_reg_save));
+
+      /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits).  */
+      fprintf (file, "%d,", (32 - first_reg_to_save ()));
+
+      if (optional_tbtab)
+	{
+	  /* Compute the parameter info from the function decl argument
+	     list.  */
+	  tree decl;
+	  int next_parm_info_bit = 31;
+
+	  for (decl = DECL_ARGUMENTS (current_function_decl);
+	       decl; decl = DECL_CHAIN (decl))
+	    {
+	      rtx parameter = DECL_INCOMING_RTL (decl);
+	      enum machine_mode mode = GET_MODE (parameter);
+
+	      if (GET_CODE (parameter) == REG)
+		{
+		  if (SCALAR_FLOAT_MODE_P (mode))
+		    {
+		      int bits;
+
+		      float_parms++;
+
+		      switch (mode)
+			{
+			case SFmode:
+			case SDmode:
+			  bits = 0x2;
+			  break;
+
+			case DFmode:
+			case DDmode:
+			case TFmode:
+			case TDmode:
+			  bits = 0x3;
+			  break;
+
+			default:
+			  gcc_unreachable ();
+			}
+
+		      /* If only one bit will fit, don't or in this entry.  */
+		      if (next_parm_info_bit > 0)
+			parm_info |= (bits << (next_parm_info_bit - 1));
+		      next_parm_info_bit -= 2;
+		    }
+		  else
+		    {
+		      fixed_parms += ((GET_MODE_SIZE (mode)
+				       + (UNITS_PER_WORD - 1))
+				      / UNITS_PER_WORD);
+		      next_parm_info_bit -= 1;
+		    }
+		}
+	    }
+	}
+
+      /* Number of fixed point parameters.  */
+      /* This is actually the number of words of fixed point parameters; thus
+	 an 8 byte struct counts as 2; and thus the maximum value is 8.  */
+      fprintf (file, "%d,", fixed_parms);
+
+      /* 2 bitfields: number of floating point parameters (7 bits), parameters
+	 all on stack.  */
+      /* This is actually the number of fp registers that hold parameters;
+	 and thus the maximum value is 13.  */
+      /* Set parameters on stack bit if parameters are not in their original
+	 registers, regardless of whether they are on the stack?  Xlc
+	 seems to set the bit when not optimizing.  */
+      fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
+
+      if (! optional_tbtab)
+	return;
+
+      /* Optional fields follow.  Some are variable length.  */
+
+      /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
+	 11 double float.  */
+      /* There is an entry for each parameter in a register, in the order that
+	 they occur in the parameter list.  Any intervening arguments on the
+	 stack are ignored.  If the list overflows a long (max possible length
+	 34 bits) then completely leave off all elements that don't fit.  */
+      /* Only emit this long if there was at least one parameter.  */
+      if (fixed_parms || float_parms)
+	fprintf (file, "\t.long %d\n", parm_info);
+
+      /* Offset from start of code to tb table.  */
+      fputs ("\t.long ", file);
+      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
+      RS6000_OUTPUT_BASENAME (file, fname);
+      putc ('-', file);
+      rs6000_output_function_entry (file, fname);
+      putc ('\n', file);
+
+      /* Interrupt handler mask.  */
+      /* Omit this long, since we never set the interrupt handler bit
+	 above.  */
+
+      /* Number of CTL (controlled storage) anchors.  */
+      /* Omit this long, since the has_ctl bit is never set above.  */
+
+      /* Displacement into stack of each CTL anchor.  */
+      /* Omit this list of longs, because there are no CTL anchors.  */
+
+      /* Length of function name.  */
+      if (*fname == '*')
+	++fname;
+      fprintf (file, "\t.short %d\n", (int) strlen (fname));
+
+      /* Function name.  */
+      assemble_string (fname, strlen (fname));
+
+      /* Register for alloca automatic storage; this is always reg 31.
+	 Only emit this if the alloca bit was set above.  */
+      if (frame_pointer_needed)
+	fputs ("\t.byte 31\n", file);
+
+      fputs ("\t.align 2\n", file);
+    }
+}
+
+/* A C compound statement that outputs the assembler code for a thunk
+   function, used to implement C++ virtual function calls with
+   multiple inheritance.  The thunk acts as a wrapper around a virtual
+   function, adjusting the implicit object parameter before handing
+   control off to the real function.
+
+   First, emit code to add the integer DELTA to the location that
+   contains the incoming first argument.  Assume that this argument
+   contains a pointer, and is the one used to pass the `this' pointer
+   in C++.  This is the incoming argument *before* the function
+   prologue, e.g. `%o0' on a sparc.  The addition must preserve the
+   values of all other incoming arguments.
+
+   After the addition, emit code to jump to FUNCTION, which is a
+   `FUNCTION_DECL'.  This is a direct pure jump, not a call, and does
+   not touch the return address.  Hence returning from FUNCTION will
+   return to whoever called the current `thunk'.
+
+   The effect must be as if FUNCTION had been called directly with the
+   adjusted first argument.  This macro is responsible for emitting
+   all of the code for a thunk function; output_function_prologue()
+   and output_function_epilogue() are not invoked.
+
+   The THUNK_FNDECL is redundant.  (DELTA and FUNCTION have already
+   been extracted from it.)  It might possibly be useful on some
+   targets, but probably not.
+
+   If you do not define this macro, the target-independent code in the
+   C++ frontend will generate a less efficient heavyweight thunk that
+   calls FUNCTION instead of jumping to it.  The generic approach does
+   not support varargs.  */
+
+static void
+rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			tree function)
+{
+  rtx this_rtx, insn, funexp;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in r3.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 4);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 3);
+
+  /* Apply the constant offset, if required.  */
+  if (delta)
+    emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
+
+  /* Apply the offset from the vtable, if required.  */
+  if (vcall_offset)
+    {
+      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+      rtx tmp = gen_rtx_REG (Pmode, 12);
+
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+      if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
+	{
+	  emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
+	  emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+	}
+      else
+	{
+	  rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
+
+	  emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
+	}
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    funexp = machopic_indirect_call_target (funexp);
+#endif
+
+  /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
+     generate sibcall RTL explicitly.  */
+  insn = emit_call_insn (
+	   gen_rtx_PARALLEL (VOIDmode,
+	     gen_rtvec (4,
+			gen_rtx_CALL (VOIDmode,
+				      funexp, const0_rtx),
+			gen_rtx_USE (VOIDmode, const0_rtx),
+			gen_rtx_USE (VOIDmode,
+				     gen_rtx_REG (SImode,
+						  LR_REGNO)),
+			simple_return_rtx)));
+  SIBLING_CALL_P (insn) = 1;
+  emit_barrier ();
+
+  /* Ensure we have a global entry point for the thunk.   ??? We could
+     avoid that if the target routine doesn't need a global entry point,
+     but we do not know whether this is the case at this point.  */
+  if (DEFAULT_ABI == ABI_ELFv2)
+    cfun->machine->r2_setup_needed = true;
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+/* A quick summary of the various types of 'constant-pool tables'
+   under PowerPC:
+
+   Target	Flags		Name		One table per
+   AIX		(none)		AIX TOC		object file
+   AIX		-mfull-toc	AIX TOC		object file
+   AIX		-mminimal-toc	AIX minimal TOC	translation unit
+   SVR4/EABI	(none)		SVR4 SDATA	object file
+   SVR4/EABI	-fpic		SVR4 pic	object file
+   SVR4/EABI	-fPIC		SVR4 PIC	translation unit
+   SVR4/EABI	-mrelocatable	EABI TOC	function
+   SVR4/EABI	-maix		AIX TOC		object file
+   SVR4/EABI	-maix -mminimal-toc
+				AIX minimal TOC	translation unit
+
+   Name			Reg.	Set by	entries	      contains:
+					made by	 addrs?	fp?	sum?
+
+   AIX TOC		2	crt0	as	 Y	option	option
+   AIX minimal TOC	30	prolog	gcc	 Y	Y	option
+   SVR4 SDATA		13	crt0	gcc	 N	Y	N
+   SVR4 pic		30	prolog	ld	 Y	not yet	N
+   SVR4 PIC		30	prolog	gcc	 Y	option	option
+   EABI TOC		30	prolog	gcc	 Y	option	option
+
+*/
+
+/* Hash functions for the hash table.  */
+
+static unsigned
+rs6000_hash_constant (rtx k)
+{
+  enum rtx_code code = GET_CODE (k);
+  enum machine_mode mode = GET_MODE (k);
+  unsigned result = (code << 3) ^ mode;
+  const char *format;
+  int flen, fidx;
+
+  format = GET_RTX_FORMAT (code);
+  flen = strlen (format);
+  fidx = 0;
+
+  switch (code)
+    {
+    case LABEL_REF:
+      return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
+
+    case CONST_DOUBLE:
+      if (mode != VOIDmode)
+	return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
+      flen = 2;
+      break;
+
+    case CODE_LABEL:
+      fidx = 3;
+      break;
+
+    default:
+      break;
+    }
+
+  for (; fidx < flen; fidx++)
+    switch (format[fidx])
+      {
+      case 's':
+	{
+	  unsigned i, len;
+	  const char *str = XSTR (k, fidx);
+	  len = strlen (str);
+	  result = result * 613 + len;
+	  for (i = 0; i < len; i++)
+	    result = result * 613 + (unsigned) str[i];
+	  break;
+	}
+      case 'u':
+      case 'e':
+	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
+	break;
+      case 'i':
+      case 'n':
+	result = result * 613 + (unsigned) XINT (k, fidx);
+	break;
+      case 'w':
+	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
+	  result = result * 613 + (unsigned) XWINT (k, fidx);
+	else
+	  {
+	    size_t i;
+	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
+	      result = result * 613 + (unsigned) (XWINT (k, fidx)
+						  >> CHAR_BIT * i);
+	  }
+	break;
+      case '0':
+	break;
+      default:
+	gcc_unreachable ();
+      }
+
+  return result;
+}
+
+static unsigned
+toc_hash_function (const void *hash_entry)
+{
+  const struct toc_hash_struct *thc =
+    (const struct toc_hash_struct *) hash_entry;
+  return rs6000_hash_constant (thc->key) ^ thc->key_mode;
+}
+
+/* Compare H1 and H2 for equivalence.  */
+
+static int
+toc_hash_eq (const void *h1, const void *h2)
+{
+  rtx r1 = ((const struct toc_hash_struct *) h1)->key;
+  rtx r2 = ((const struct toc_hash_struct *) h2)->key;
+
+  if (((const struct toc_hash_struct *) h1)->key_mode
+      != ((const struct toc_hash_struct *) h2)->key_mode)
+    return 0;
+
+  return rtx_equal_p (r1, r2);
+}
+
+/* These are the names given by the C++ front-end to vtables, and
+   vtable-like objects.  Ideally, this logic should not be here;
+   instead, there should be some programmatic way of inquiring as
+   to whether or not an object is a vtable.  */
+
+#define VTABLE_NAME_P(NAME)				\
+  (strncmp ("_vt.", name, strlen ("_vt.")) == 0		\
+  || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0	\
+  || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0	\
+  || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0	\
+  || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
+
+#ifdef NO_DOLLAR_IN_LABEL
+/* Return a GGC-allocated character string translating dollar signs in
+   input NAME to underscores.  Used by XCOFF ASM_OUTPUT_LABELREF.  */
+
+const char *
+rs6000_xcoff_strip_dollar (const char *name)
+{
+  char *strip, *p;
+  const char *q;
+  size_t len;
+
+  q = (const char *) strchr (name, '$');
+
+  if (q == 0 || q == name)
+    return name;
+
+  len = strlen (name);
+  strip = XALLOCAVEC (char, len + 1);
+  strcpy (strip, name);
+  p = strip + (q - name);
+  while (p)
+    {
+      *p = '_';
+      p = strchr (p + 1, '$');
+    }
+
+  return ggc_alloc_string (strip, len);
+}
+#endif
+
+void
+rs6000_output_symbol_ref (FILE *file, rtx x)
+{
+  /* Currently C++ toc references to vtables can be emitted before it
+     is decided whether the vtable is public or private.  If this is
+     the case, then the linker will eventually complain that there is
+     a reference to an unknown section.  Thus, for vtables only,
+     we emit the TOC reference to reference the symbol and not the
+     section.  */
+  const char *name = XSTR (x, 0);
+
+  if (VTABLE_NAME_P (name))
+    {
+      RS6000_OUTPUT_BASENAME (file, name);
+    }
+  else
+    assemble_name (file, name);
+}
+
+/* Output a TOC entry.  We derive the entry name from what is being
+   written.  */
+
+void
+output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
+{
+  char buf[256];
+  const char *name = buf;
+  rtx base = x;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (!TARGET_NO_TOC);
+
+  /* When the linker won't eliminate them, don't output duplicate
+     TOC entries (this happens on AIX if there is any kind of TOC,
+     and on SVR4 under -fPIC or -mrelocatable).  Don't do this for
+     CODE_LABELs.  */
+  if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
+    {
+      struct toc_hash_struct *h;
+      void * * found;
+
+      /* Create toc_hash_table.  This can't be done at TARGET_OPTION_OVERRIDE
+	 time because GGC is not initialized at that point.  */
+      if (toc_hash_table == NULL)
+	toc_hash_table = htab_create_ggc (1021, toc_hash_function,
+					  toc_hash_eq, NULL);
+
+      h = ggc_alloc_toc_hash_struct ();
+      h->key = x;
+      h->key_mode = mode;
+      h->labelno = labelno;
+
+      found = htab_find_slot (toc_hash_table, h, INSERT);
+      if (*found == NULL)
+	*found = h;
+      else  /* This is indeed a duplicate.
+	       Set this label equal to that label.  */
+	{
+	  fputs ("\t.set ", file);
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
+	  fprintf (file, "%d,", labelno);
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
+	  fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
+					      found)->labelno));
+
+#ifdef HAVE_AS_TLS
+	  if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
+	      && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
+		  || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
+	    {
+	      fputs ("\t.set ", file);
+	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
+	      fprintf (file, "%d,", labelno);
+	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
+	      fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
+			       			      found)->labelno));
+	    }
+#endif
+	  return;
+	}
+    }
+
+  /* If we're going to put a double constant in the TOC, make sure it's
+     aligned properly when strict alignment is on.  */
+  if (GET_CODE (x) == CONST_DOUBLE
+      && STRICT_ALIGNMENT
+      && GET_MODE_BITSIZE (mode) >= 64
+      && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
+    ASM_OUTPUT_ALIGN (file, 3);
+  }
+
+  (*targetm.asm_out.internal_label) (file, "LC", labelno);
+
+  /* Handle FP constants specially.  Note that if we have a minimal
+     TOC, things we put here aren't actually in the TOC, so we can allow
+     FP constants.  */
+  if (GET_CODE (x) == CONST_DOUBLE &&
+      (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
+    {
+      REAL_VALUE_TYPE rv;
+      long k[4];
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
+	REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
+      else
+	REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff,
+		     k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
+		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
+		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
+		   k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
+		   k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs ("\t.long ", file);
+	  else
+	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff,
+		     k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
+		   k[0] & 0xffffffff, k[1] & 0xffffffff,
+		   k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  return;
+	}
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE &&
+	   (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
+    {
+      REAL_VALUE_TYPE rv;
+      long k[2];
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+
+      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
+	REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
+      else
+	REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  fprintf (file, "0x%lx%08lx\n",
+		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
+		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs ("\t.long ", file);
+	  else
+	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  fprintf (file, "0x%lx,0x%lx\n",
+		   k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  return;
+	}
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE &&
+	   (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
+    {
+      REAL_VALUE_TYPE rv;
+      long l;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
+	REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
+      else
+	REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
+	  if (WORDS_BIG_ENDIAN)
+	    fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+	  else
+	    fprintf (file, "0x%lx\n", l & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs ("\t.long ", file);
+	  else
+	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
+	  fprintf (file, "0x%lx\n", l & 0xffffffff);
+	  return;
+	}
+    }
+  else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
+    {
+      unsigned HOST_WIDE_INT low;
+      HOST_WIDE_INT high;
+
+      low = INTVAL (x) & 0xffffffff;
+      high = (HOST_WIDE_INT) INTVAL (x) >> 32;
+
+      /* TOC entries are always Pmode-sized, so when big-endian
+	 smaller integer constants in the TOC need to be padded.
+	 (This is still a win over putting the constants in
+	 a separate constant pool, because then we'd have
+	 to have both a TOC entry _and_ the actual constant.)
+
+	 For a 32-bit target, CONST_INT values are loaded and shifted
+	 entirely within `low' and can be stored in one TOC entry.  */
+
+      /* It would be easy to make this work, but it doesn't now.  */
+      gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
+
+      if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
+	{
+	  low |= high << 32;
+	  low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
+	  high = (HOST_WIDE_INT) low >> 32;
+	  low &= 0xffffffff;
+	}
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc ID_%lx_%lx[TC],",
+		     (long) high & 0xffffffff, (long) low & 0xffffffff);
+	  fprintf (file, "0x%lx%08lx\n",
+		   (long) high & 0xffffffff, (long) low & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
+	    {
+	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
+		fputs ("\t.long ", file);
+	      else
+		fprintf (file, "\t.tc ID_%lx_%lx[TC],",
+			 (long) high & 0xffffffff, (long) low & 0xffffffff);
+	      fprintf (file, "0x%lx,0x%lx\n",
+		       (long) high & 0xffffffff, (long) low & 0xffffffff);
+	    }
+	  else
+	    {
+	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
+		fputs ("\t.long ", file);
+	      else
+		fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
+	      fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
+	    }
+	  return;
+	}
+    }
+
+  if (GET_CODE (x) == CONST)
+    {
+      gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
+
+      base = XEXP (XEXP (x, 0), 0);
+      offset = INTVAL (XEXP (XEXP (x, 0), 1));
+    }
+
+  switch (GET_CODE (base))
+    {
+    case SYMBOL_REF:
+      name = XSTR (base, 0);
+      break;
+
+    case LABEL_REF:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L",
+				   CODE_LABEL_NUMBER (XEXP (base, 0)));
+      break;
+
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (TARGET_ELF || TARGET_MINIMAL_TOC)
+    fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
+  else
+    {
+      fputs ("\t.tc ", file);
+      RS6000_OUTPUT_BASENAME (file, name);
+
+      if (offset < 0)
+	fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
+      else if (offset)
+	fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
+
+      /* Mark large TOC symbols on AIX with [TE] so they are mapped
+	 after other TOC symbols, reducing overflow of small TOC access
+	 to [TC] symbols.  */
+      fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
+	     ? "[TE]," : "[TC],", file);
+    }
+
+  /* Currently C++ toc references to vtables can be emitted before it
+     is decided whether the vtable is public or private.  If this is
+     the case, then the linker will eventually complain that there is
+     a TOC reference to an unknown section.  Thus, for vtables only,
+     we emit the TOC reference to reference the symbol and not the
+     section.  */
+  if (VTABLE_NAME_P (name))
+    {
+      RS6000_OUTPUT_BASENAME (file, name);
+      if (offset < 0)
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
+      else if (offset > 0)
+	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
+    }
+  else
+    output_addr_const (file, x);
+
+#if HAVE_AS_TLS
+  if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (base) != 0)
+    {
+      if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
+	fputs ("@le", file);
+      else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
+	fputs ("@ie", file);
+      /* Use global-dynamic for local-dynamic.  */
+      else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
+	       || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
+	{
+	  putc ('\n', file);
+	  (*targetm.asm_out.internal_label) (file, "LCM", labelno);
+	  fputs ("\t.tc .", file);
+	  RS6000_OUTPUT_BASENAME (file, name);
+	  fputs ("[TC],", file);
+	  output_addr_const (file, x);
+	  fputs ("@m", file);
+	}
+    }
+#endif
+
+  putc ('\n', file);
+}
+
+/* Output an assembler pseudo-op to write an ASCII string of N characters
+   starting at P to FILE.
+
+   On the RS/6000, we have to do this using the .byte operation and
+   write out special characters outside the quoted string.
+   Also, the assembler is broken; very long strings are truncated,
+   so we must artificially break them up early.  */
+
+void
+output_ascii (FILE *file, const char *p, int n)
+{
+  char c;
+  int i, count_string;
+  const char *for_string = "\t.byte \"";
+  const char *for_decimal = "\t.byte ";
+  const char *to_close = NULL;
+
+  count_string = 0;
+  for (i = 0; i < n; i++)
+    {
+      c = *p++;
+      if (c >= ' ' && c < 0177)
+	{
+	  if (for_string)
+	    fputs (for_string, file);
+	  putc (c, file);
+
+	  /* Write two quotes to get one.  */
+	  if (c == '"')
+	    {
+	      putc (c, file);
+	      ++count_string;
+	    }
+
+	  for_string = NULL;
+	  for_decimal = "\"\n\t.byte ";
+	  to_close = "\"\n";
+	  ++count_string;
+
+	  if (count_string >= 512)
+	    {
+	      fputs (to_close, file);
+
+	      for_string = "\t.byte \"";
+	      for_decimal = "\t.byte ";
+	      to_close = NULL;
+	      count_string = 0;
+	    }
+	}
+      else
+	{
+	  if (for_decimal)
+	    fputs (for_decimal, file);
+	  fprintf (file, "%d", c);
+
+	  for_string = "\n\t.byte \"";
+	  for_decimal = ", ";
+	  to_close = "\n";
+	  count_string = 0;
+	}
+    }
+
+  /* Now close the string if we have written one.  Then end the line.  */
+  if (to_close)
+    fputs (to_close, file);
+}
+
+/* Generate a unique section name for FILENAME for a section type
+   represented by SECTION_DESC.  Output goes into BUF.
+
+   SECTION_DESC can be any string, as long as it is different for each
+   possible section type.
+
+   We name the section in the same manner as xlc.  The name begins with an
+   underscore followed by the filename (after stripping any leading directory
+   names) with the last period replaced by the string SECTION_DESC.  If
+   FILENAME does not contain a period, SECTION_DESC is appended to the end of
+   the name.  */
+
+void
+rs6000_gen_section_name (char **buf, const char *filename,
+			 const char *section_desc)
+{
+  const char *q, *after_last_slash, *last_period = 0;
+  char *p;
+  int len;
+
+  after_last_slash = filename;
+  for (q = filename; *q; q++)
+    {
+      if (*q == '/')
+	after_last_slash = q + 1;
+      else if (*q == '.')
+	last_period = q;
+    }
+
+  len = strlen (after_last_slash) + strlen (section_desc) + 2;
+  *buf = (char *) xmalloc (len);
+
+  p = *buf;
+  *p++ = '_';
+
+  for (q = after_last_slash; *q; q++)
+    {
+      if (q == last_period)
+	{
+	  strcpy (p, section_desc);
+	  p += strlen (section_desc);
+	  break;
+	}
+
+      else if (ISALNUM (*q))
+	*p++ = *q;
+    }
+
+  if (last_period == 0)
+    strcpy (p, section_desc);
+  else
+    *p = '\0';
+}
+
+/* Emit profile function.  */
+
+void
+output_profile_hook (int labelno ATTRIBUTE_UNUSED)
+{
+  /* Non-standard profiling for kernels, which just saves LR then calls
+     _mcount without worrying about arg saves.  The idea is to change
+     the function prologue as little as possible as it isn't easy to
+     account for arg save/restore code added just for _mcount.  */
+  if (TARGET_PROFILE_KERNEL)
+    return;
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+#ifndef NO_PROFILE_COUNTERS
+# define NO_PROFILE_COUNTERS 0
+#endif
+      if (NO_PROFILE_COUNTERS)
+	emit_library_call (init_one_libfunc (RS6000_MCOUNT),
+			   LCT_NORMAL, VOIDmode, 0);
+      else
+	{
+	  char buf[30];
+	  const char *label_name;
+	  rtx fun;
+
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+	  label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
+	  fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
+
+	  emit_library_call (init_one_libfunc (RS6000_MCOUNT),
+			     LCT_NORMAL, VOIDmode, 1, fun, Pmode);
+	}
+    }
+  else if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      const char *mcount_name = RS6000_MCOUNT;
+      int caller_addr_regno = LR_REGNO;
+
+      /* Be conservative and always set this, at least for now.  */
+      crtl->uses_pic_offset_table = 1;
+
+#if TARGET_MACHO
+      /* For PIC code, set up a stub and collect the caller's address
+	 from r0, which is where the prologue puts it.  */
+      if (MACHOPIC_INDIRECT
+	  && crtl->uses_pic_offset_table)
+	caller_addr_regno = 0;
+#endif
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
+			 LCT_NORMAL, VOIDmode, 1,
+			 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
+    }
+}
+
+/* Write function profiler code.  */
+
+void
+output_function_profiler (FILE *file, int labelno)
+{
+  char buf[100];
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      gcc_unreachable ();
+
+    case ABI_V4:
+      if (!TARGET_32BIT)
+	{
+	  warning (0, "no profiling of 64-bit code for this ABI");
+	  return;
+	}
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+      fprintf (file, "\tmflr %s\n", reg_names[0]);
+      if (NO_PROFILE_COUNTERS)
+	{
+	  asm_fprintf (file, "\tstw %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	}
+      else if (TARGET_SECURE_PLT && flag_pic)
+	{
+	  if (TARGET_LINK_STACK)
+	    {
+	      char name[32];
+	      get_ppc476_thunk_name (name);
+	      asm_fprintf (file, "\tbl %s\n", name);
+	    }
+	  else
+	    asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
+	  asm_fprintf (file, "\tstw %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
+	  asm_fprintf (file, "\taddis %s,%s,",
+		       reg_names[12], reg_names[12]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
+	}
+      else if (flag_pic == 1)
+	{
+	  fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
+	  asm_fprintf (file, "\tstw %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
+	  asm_fprintf (file, "\tlwz %s,", reg_names[0]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "@got(%s)\n", reg_names[12]);
+	}
+      else if (flag_pic > 1)
+	{
+	  asm_fprintf (file, "\tstw %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  /* Now, we need to get the address of the label.  */
+	  if (TARGET_LINK_STACK)
+	    {
+	      char name[32];
+	      get_ppc476_thunk_name (name);
+	      asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
+	      assemble_name (file, buf);
+	      fputs ("-.\n1:", file);
+	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
+	      asm_fprintf (file, "\taddi %s,%s,4\n",
+			   reg_names[11], reg_names[11]);
+	    }
+	  else
+	    {
+	      fputs ("\tbcl 20,31,1f\n\t.long ", file);
+	      assemble_name (file, buf);
+	      fputs ("-.\n1:", file);
+	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
+	    }
+	  asm_fprintf (file, "\tlwz %s,0(%s)\n",
+		       reg_names[0], reg_names[11]);
+	  asm_fprintf (file, "\tadd %s,%s,%s\n",
+		       reg_names[0], reg_names[0], reg_names[11]);
+	}
+      else
+	{
+	  asm_fprintf (file, "\tlis %s,", reg_names[12]);
+	  assemble_name (file, buf);
+	  fputs ("@ha\n", file);
+	  asm_fprintf (file, "\tstw %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  asm_fprintf (file, "\tla %s,", reg_names[0]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "@l(%s)\n", reg_names[12]);
+	}
+
+      /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH.  */
+      fprintf (file, "\tbl %s%s\n",
+	       RS6000_MCOUNT, flag_pic ? "@plt" : "");
+      break;
+
+    case ABI_AIX:
+    case ABI_ELFv2:
+    case ABI_DARWIN:
+      /* Don't do anything, done in output_profile_hook ().  */
+      break;
+    }
+}
+
+
+
+/* The following variable value is the last issued insn.  */
+
+static rtx last_scheduled_insn;
+
+/* The following variable helps to balance issuing of load and
+   store instructions */
+
+static int load_store_pendulum;
+
+/* Power4 load update and store update instructions are cracked into a
+   load or store and an integer insn which are executed in the same cycle.
+   Branches have their own dispatch slot which does not count against the
+   GCC issue rate, but it changes the program flow so there are no other
+   instructions to issue in this cycle.  */
+
+static int
+rs6000_variable_issue_1 (rtx insn, int more)
+{
+  last_scheduled_insn = insn;
+  if (GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    {
+      cached_can_issue_more = more;
+      return cached_can_issue_more;
+    }
+
+  if (insn_terminates_group_p (insn, current_group))
+    {
+      cached_can_issue_more = 0;
+      return cached_can_issue_more;
+    }
+
+  /* If no reservation, but reach here */
+  if (recog_memoized (insn) < 0)
+    return more;
+
+  if (rs6000_sched_groups)
+    {
+      if (is_microcoded_insn (insn))
+        cached_can_issue_more = 0;
+      else if (is_cracked_insn (insn))
+        cached_can_issue_more = more > 2 ? more - 2 : 0;
+      else
+        cached_can_issue_more = more - 1;
+
+      return cached_can_issue_more;
+    }
+
+  if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
+    return 0;
+
+  cached_can_issue_more = more - 1;
+  return cached_can_issue_more;
+}
+
+static int
+rs6000_variable_issue (FILE *stream, int verbose, rtx insn, int more)
+{
+  int r = rs6000_variable_issue_1 (insn, more);
+  if (verbose)
+    fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
+  return r;
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type attr_type;
+
+  if (! recog_memoized (insn))
+    return 0;
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case REG_DEP_TRUE:
+      {
+        /* Data dependency; DEP_INSN writes a register that INSN reads
+	   some cycles later.  */
+
+	/* Separate a load from a narrower, dependent store.  */
+	if (rs6000_sched_groups
+	    && GET_CODE (PATTERN (insn)) == SET
+	    && GET_CODE (PATTERN (dep_insn)) == SET
+	    && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
+	    && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
+	    && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
+		> GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
+	  return cost + 14;
+
+        attr_type = get_attr_type (insn);
+
+        switch (attr_type)
+          {
+          case TYPE_JMPREG:
+            /* Tell the first scheduling pass about the latency between
+               a mtctr and bctr (and mtlr and br/blr).  The first
+               scheduling pass will not know about this latency since
+               the mtctr instruction, which has the latency associated
+               to it, will be generated by reload.  */
+            return 4;
+          case TYPE_BRANCH:
+            /* Leave some extra cycles between a compare and its
+               dependent branch, to inhibit expensive mispredicts.  */
+            if ((rs6000_cpu_attr == CPU_PPC603
+                 || rs6000_cpu_attr == CPU_PPC604
+                 || rs6000_cpu_attr == CPU_PPC604E
+                 || rs6000_cpu_attr == CPU_PPC620
+                 || rs6000_cpu_attr == CPU_PPC630
+                 || rs6000_cpu_attr == CPU_PPC750
+                 || rs6000_cpu_attr == CPU_PPC7400
+                 || rs6000_cpu_attr == CPU_PPC7450
+                 || rs6000_cpu_attr == CPU_PPCE5500
+                 || rs6000_cpu_attr == CPU_PPCE6500
+                 || rs6000_cpu_attr == CPU_POWER4
+                 || rs6000_cpu_attr == CPU_POWER5
+		 || rs6000_cpu_attr == CPU_POWER7
+		 || rs6000_cpu_attr == CPU_POWER8
+                 || rs6000_cpu_attr == CPU_CELL)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0))
+
+              switch (get_attr_type (dep_insn))
+                {
+                case TYPE_CMP:
+                case TYPE_COMPARE:
+                case TYPE_DELAYED_COMPARE:
+                case TYPE_IMUL_COMPARE:
+                case TYPE_LMUL_COMPARE:
+                case TYPE_FPCOMPARE:
+                case TYPE_CR_LOGICAL:
+                case TYPE_DELAYED_CR:
+		  return cost + 2;
+		default:
+		  break;
+		}
+            break;
+
+          case TYPE_STORE:
+          case TYPE_STORE_U:
+          case TYPE_STORE_UX:
+          case TYPE_FPSTORE:
+          case TYPE_FPSTORE_U:
+          case TYPE_FPSTORE_UX:
+            if ((rs6000_cpu == PROCESSOR_POWER6)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0))
+              {
+
+                if (GET_CODE (PATTERN (insn)) != SET)
+                  /* If this happens, we have to extend this to schedule
+                     optimally.  Return default for now.  */
+                  return cost;
+
+                /* Adjust the cost for the case where the value written
+                   by a fixed point operation is used as the address
+                   gen value on a store. */
+                switch (get_attr_type (dep_insn))
+                  {
+                  case TYPE_LOAD:
+                  case TYPE_LOAD_U:
+                  case TYPE_LOAD_UX:
+                  case TYPE_CNTLZ:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 4;
+                      break;
+                    }
+                  case TYPE_LOAD_EXT:
+                  case TYPE_LOAD_EXT_U:
+                  case TYPE_LOAD_EXT_UX:
+                  case TYPE_VAR_SHIFT_ROTATE:
+                  case TYPE_VAR_DELAYED_COMPARE:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 6;
+                      break;
+		    }
+                  case TYPE_INTEGER:
+                  case TYPE_COMPARE:
+                  case TYPE_FAST_COMPARE:
+                  case TYPE_EXTS:
+                  case TYPE_SHIFT:
+                  case TYPE_INSERT_WORD:
+                  case TYPE_INSERT_DWORD:
+                  case TYPE_FPLOAD_U:
+                  case TYPE_FPLOAD_UX:
+                  case TYPE_STORE_U:
+                  case TYPE_STORE_UX:
+                  case TYPE_FPSTORE_U:
+                  case TYPE_FPSTORE_UX:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 3;
+                      break;
+                    }
+                  case TYPE_IMUL:
+                  case TYPE_IMUL2:
+                  case TYPE_IMUL3:
+                  case TYPE_LMUL:
+                  case TYPE_IMUL_COMPARE:
+                  case TYPE_LMUL_COMPARE:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 17;
+                      break;
+                    }
+                  case TYPE_IDIV:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 45;
+                      break;
+                    }
+                  case TYPE_LDIV:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 57;
+                      break;
+                    }
+                  default:
+                    break;
+                  }
+              }
+	    break;
+
+          case TYPE_LOAD:
+          case TYPE_LOAD_U:
+          case TYPE_LOAD_UX:
+          case TYPE_LOAD_EXT:
+          case TYPE_LOAD_EXT_U:
+          case TYPE_LOAD_EXT_UX:
+            if ((rs6000_cpu == PROCESSOR_POWER6)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0))
+              {
+
+                /* Adjust the cost for the case where the value written
+                   by a fixed point instruction is used within the address
+                   gen portion of a subsequent load(u)(x) */
+                switch (get_attr_type (dep_insn))
+                  {
+                  case TYPE_LOAD:
+                  case TYPE_LOAD_U:
+                  case TYPE_LOAD_UX:
+                  case TYPE_CNTLZ:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 4;
+                      break;
+                    }
+                  case TYPE_LOAD_EXT:
+                  case TYPE_LOAD_EXT_U:
+                  case TYPE_LOAD_EXT_UX:
+                  case TYPE_VAR_SHIFT_ROTATE:
+                  case TYPE_VAR_DELAYED_COMPARE:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 6;
+                      break;
+                    }
+                  case TYPE_INTEGER:
+                  case TYPE_COMPARE:
+                  case TYPE_FAST_COMPARE:
+                  case TYPE_EXTS:
+                  case TYPE_SHIFT:
+                  case TYPE_INSERT_WORD:
+                  case TYPE_INSERT_DWORD:
+                  case TYPE_FPLOAD_U:
+                  case TYPE_FPLOAD_UX:
+                  case TYPE_STORE_U:
+                  case TYPE_STORE_UX:
+                  case TYPE_FPSTORE_U:
+                  case TYPE_FPSTORE_UX:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 3;
+                      break;
+                    }
+                  case TYPE_IMUL:
+                  case TYPE_IMUL2:
+                  case TYPE_IMUL3:
+                  case TYPE_LMUL:
+                  case TYPE_IMUL_COMPARE:
+                  case TYPE_LMUL_COMPARE:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 17;
+                      break;
+                    }
+                  case TYPE_IDIV:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 45;
+                      break;
+                    }
+                  case TYPE_LDIV:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 57;
+                      break;
+                    }
+                  default:
+                    break;
+                  }
+              }
+            break;
+
+          case TYPE_FPLOAD:
+            if ((rs6000_cpu == PROCESSOR_POWER6)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0)
+                && (get_attr_type (dep_insn) == TYPE_MFFGPR))
+              return 2;
+
+          default:
+            break;
+          }
+
+	/* Fall out to return default cost.  */
+      }
+      break;
+
+    case REG_DEP_OUTPUT:
+      /* Output dependency; DEP_INSN writes a register that INSN writes some
+	 cycles later.  */
+      if ((rs6000_cpu == PROCESSOR_POWER6)
+          && recog_memoized (dep_insn)
+          && (INSN_CODE (dep_insn) >= 0))
+        {
+          attr_type = get_attr_type (insn);
+
+          switch (attr_type)
+            {
+            case TYPE_FP:
+              if (get_attr_type (dep_insn) == TYPE_FP)
+                return 1;
+              break;
+            case TYPE_FPLOAD:
+              if (get_attr_type (dep_insn) == TYPE_MFFGPR)
+                return 2;
+              break;
+            default:
+              break;
+            }
+        }
+    case REG_DEP_ANTI:
+      /* Anti dependency; DEP_INSN reads a register that INSN writes some
+	 cycles later.  */
+      return 0;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return cost;
+}
+
+/* Debug version of rs6000_adjust_cost.  */
+
+static int
+rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
+
+  if (ret != cost)
+    {
+      const char *dep;
+
+      switch (REG_NOTE_KIND (link))
+	{
+	default:	     dep = "unknown depencency"; break;
+	case REG_DEP_TRUE:   dep = "data dependency";	 break;
+	case REG_DEP_OUTPUT: dep = "output dependency";  break;
+	case REG_DEP_ANTI:   dep = "anti depencency";	 break;
+	}
+
+      fprintf (stderr,
+	       "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
+	       "%s, insn:\n", ret, cost, dep);
+
+      debug_rtx (insn);
+    }
+
+  return ret;
+}
+
+/* The function returns a true if INSN is microcoded.
+   Return false otherwise.  */
+
+static bool
+is_microcoded_insn (rtx insn)
+{
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  if (rs6000_cpu_attr == CPU_CELL)
+    return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
+
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_LOAD_EXT_U
+	  || type == TYPE_LOAD_EXT_UX
+	  || type == TYPE_LOAD_UX
+	  || type == TYPE_STORE_UX
+	  || type == TYPE_MFCR)
+	return true;
+    }
+
+  return false;
+}
+
+/* The function returns true if INSN is cracked into 2 instructions
+   by the processor (and therefore occupies 2 issue slots).  */
+
+static bool
+is_cracked_insn (rtx insn)
+{
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_LOAD_U || type == TYPE_STORE_U
+	  || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U
+	  || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX
+	  || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR
+	  || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE
+	  || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
+	  || type == TYPE_IDIV || type == TYPE_LDIV
+	  || type == TYPE_INSERT_WORD)
+	return true;
+    }
+
+  return false;
+}
+
+/* The function returns true if INSN can be issued only from
+   the branch slot.  */
+
+static bool
+is_branch_slot_insn (rtx insn)
+{
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  if (rs6000_sched_groups)
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_BRANCH || type == TYPE_JMPREG)
+	return true;
+      return false;
+    }
+
+  return false;
+}
+
+/* The function returns true if out_inst sets a value that is
+   used in the address generation computation of in_insn */
+static bool
+set_to_load_agen (rtx out_insn, rtx in_insn)
+{
+  rtx out_set, in_set;
+
+  /* For performance reasons, only handle the simple case where
+     both loads are a single_set. */
+  out_set = single_set (out_insn);
+  if (out_set)
+    {
+      in_set = single_set (in_insn);
+      if (in_set)
+        return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
+    }
+
+  return false;
+}
+
+/* Try to determine base/offset/size parts of the given MEM.
+   Return true if successful, false if all the values couldn't
+   be determined.
+
+   This function only looks for REG or REG+CONST address forms.
+   REG+REG address form will return false. */
+
+static bool
+get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
+		  HOST_WIDE_INT *size)
+{
+  rtx addr_rtx;
+  if MEM_SIZE_KNOWN_P (mem)
+    *size = MEM_SIZE (mem);
+  else
+    return false;
+
+  if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
+    addr_rtx = XEXP (XEXP (mem, 0), 1);
+  else
+    addr_rtx = (XEXP (mem, 0));
+
+  if (GET_CODE (addr_rtx) == REG)
+    {
+      *base = addr_rtx;
+      *offset = 0;
+    }
+  else if (GET_CODE (addr_rtx) == PLUS
+	   && CONST_INT_P (XEXP (addr_rtx, 1)))
+    {
+      *base = XEXP (addr_rtx, 0);
+      *offset = INTVAL (XEXP (addr_rtx, 1));
+    }
+  else
+    return false;
+
+  return true;
+}
+
+/* The function returns true if the target storage location of
+   mem1 is adjacent to the target storage location of mem2 */
+/* Return 1 if memory locations are adjacent.  */
+
+static bool
+adjacent_mem_locations (rtx mem1, rtx mem2)
+{
+  rtx reg1, reg2;
+  HOST_WIDE_INT off1, size1, off2, size2;
+
+  if (get_memref_parts (mem1, &reg1, &off1, &size1)
+      && get_memref_parts (mem2, &reg2, &off2, &size2))
+    return ((REGNO (reg1) == REGNO (reg2))
+	    && ((off1 + size1 == off2)
+		|| (off2 + size2 == off1)));
+
+  return false;
+}
+
+/* This function returns true if it can be determined that the two MEM
+   locations overlap by at least 1 byte based on base reg/offset/size. */
+
+static bool
+mem_locations_overlap (rtx mem1, rtx mem2)
+{
+  rtx reg1, reg2;
+  HOST_WIDE_INT off1, size1, off2, size2;
+
+  if (get_memref_parts (mem1, &reg1, &off1, &size1)
+      && get_memref_parts (mem2, &reg2, &off2, &size2))
+    return ((REGNO (reg1) == REGNO (reg2))
+	    && (((off1 <= off2) && (off1 + size1 > off2))
+		|| ((off2 <= off1) && (off2 + size2 > off1))));
+
+  return false;
+}
+
+/* A C statement (sans semicolon) to update the integer scheduling
+   priority INSN_PRIORITY (INSN). Increase the priority to execute the
+   INSN earlier, reduce the priority to execute INSN later.  Do not
+   define this macro if you do not need to adjust the scheduling
+   priorities of insns.  */
+
+static int
+rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
+{
+  rtx load_mem, str_mem;
+  /* On machines (like the 750) which have asymmetric integer units,
+     where one integer unit can do multiply and divides and the other
+     can't, reduce the priority of multiply/divide so it is scheduled
+     before other integer operations.  */
+
+#if 0
+  if (! INSN_P (insn))
+    return priority;
+
+  if (GET_CODE (PATTERN (insn)) == USE)
+    return priority;
+
+  switch (rs6000_cpu_attr) {
+  case CPU_PPC750:
+    switch (get_attr_type (insn))
+      {
+      default:
+	break;
+
+      case TYPE_IMUL:
+      case TYPE_IDIV:
+	fprintf (stderr, "priority was %#x (%d) before adjustment\n",
+		 priority, priority);
+	if (priority >= 0 && priority < 0x01000000)
+	  priority >>= 3;
+	break;
+      }
+  }
+#endif
+
+  if (insn_must_be_first_in_group (insn)
+      && reload_completed
+      && current_sched_info->sched_max_insns_priority
+      && rs6000_sched_restricted_insns_priority)
+    {
+
+      /* Prioritize insns that can be dispatched only in the first
+	 dispatch slot.  */
+      if (rs6000_sched_restricted_insns_priority == 1)
+	/* Attach highest priority to insn. This means that in
+	   haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
+	   precede 'priority' (critical path) considerations.  */
+	return current_sched_info->sched_max_insns_priority;
+      else if (rs6000_sched_restricted_insns_priority == 2)
+	/* Increase priority of insn by a minimal amount. This means that in
+	   haifa-sched.c:ready_sort(), only 'priority' (critical path)
+	   considerations precede dispatch-slot restriction considerations.  */
+	return (priority + 1);
+    }
+
+  if (rs6000_cpu == PROCESSOR_POWER6
+      && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
+          || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
+    /* Attach highest priority to insn if the scheduler has just issued two
+       stores and this instruction is a load, or two loads and this instruction
+       is a store. Power6 wants loads and stores scheduled alternately
+       when possible */
+    return current_sched_info->sched_max_insns_priority;
+
+  return priority;
+}
+
+/* Return true if the instruction is nonpipelined on the Cell. */
+static bool
+is_nonpipeline_insn (rtx insn)
+{
+  enum attr_type type;
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  type = get_attr_type (insn);
+  if (type == TYPE_IMUL
+      || type == TYPE_IMUL2
+      || type == TYPE_IMUL3
+      || type == TYPE_LMUL
+      || type == TYPE_IDIV
+      || type == TYPE_LDIV
+      || type == TYPE_SDIV
+      || type == TYPE_DDIV
+      || type == TYPE_SSQRT
+      || type == TYPE_DSQRT
+      || type == TYPE_MFCR
+      || type == TYPE_MFCRF
+      || type == TYPE_MFJMPR)
+    {
+      return true;
+    }
+  return false;
+}
+
+
+/* Return how many instructions the machine can issue per cycle.  */
+
+static int
+rs6000_issue_rate (void)
+{
+  /* Unless scheduling for register pressure, use issue rate of 1 for
+     first scheduling pass to decrease degradation.  */
+  if (!reload_completed && !flag_sched_pressure)
+    return 1;
+
+  switch (rs6000_cpu_attr) {
+  case CPU_RS64A:
+  case CPU_PPC601: /* ? */
+  case CPU_PPC7450:
+    return 3;
+  case CPU_PPC440:
+  case CPU_PPC603:
+  case CPU_PPC750:
+  case CPU_PPC7400:
+  case CPU_PPC8540:
+  case CPU_PPC8548:
+  case CPU_CELL:
+  case CPU_PPCE300C2:
+  case CPU_PPCE300C3:
+  case CPU_PPCE500MC:
+  case CPU_PPCE500MC64:
+  case CPU_PPCE5500:
+  case CPU_PPCE6500:
+  case CPU_TITAN:
+    return 2;
+  case CPU_PPC476:
+  case CPU_PPC604:
+  case CPU_PPC604E:
+  case CPU_PPC620:
+  case CPU_PPC630:
+    return 4;
+  case CPU_POWER4:
+  case CPU_POWER5:
+  case CPU_POWER6:
+  case CPU_POWER7:
+    return 5;
+  case CPU_POWER8:
+    return 7;
+  default:
+    return 1;
+  }
+}
+
+/* Return how many instructions to look ahead for better insn
+   scheduling.  */
+
+static int
+rs6000_use_sched_lookahead (void)
+{
+  switch (rs6000_cpu_attr)
+    {
+    case CPU_PPC8540:
+    case CPU_PPC8548:
+      return 4;
+
+    case CPU_CELL:
+      return (reload_completed ? 8 : 0);
+
+    default:
+      return 0;
+    }
+}
+
+/* We are choosing insn from the ready queue.  Return nonzero if INSN can be chosen.  */
+static int
+rs6000_use_sched_lookahead_guard (rtx insn)
+{
+  if (rs6000_cpu_attr != CPU_CELL)
+    return 1;
+
+   if (insn == NULL_RTX || !INSN_P (insn))
+     abort ();
+
+  if (!reload_completed
+      || is_nonpipeline_insn (insn)
+      || is_microcoded_insn (insn))
+    return 0;
+
+  return 1;
+}
+
+/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
+   and return true.  */
+
+static bool
+find_mem_ref (rtx pat, rtx *mem_ref)
+{
+  const char * fmt;
+  int i, j;
+
+  /* stack_tie does not produce any real memory traffic.  */
+  if (tie_operand (pat, VOIDmode))
+    return false;
+
+  if (GET_CODE (pat) == MEM)
+    {
+      *mem_ref = pat;
+      return true;
+    }
+
+  /* Recursively process the pattern.  */
+  fmt = GET_RTX_FORMAT (GET_CODE (pat));
+
+  for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if (find_mem_ref (XEXP (pat, i), mem_ref))
+	    return true;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
+	  {
+	    if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
+	      return true;
+	  }
+    }
+
+  return false;
+}
+
+/* Determine if PAT is a PATTERN of a load insn.  */
+
+static bool
+is_load_insn1 (rtx pat, rtx *load_mem)
+{
+  if (!pat || pat == NULL_RTX)
+    return false;
+
+  if (GET_CODE (pat) == SET)
+    return find_mem_ref (SET_SRC (pat), load_mem);
+
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
+	  return true;
+    }
+
+  return false;
+}
+
+/* Determine if INSN loads from memory.  */
+
+static bool
+is_load_insn (rtx insn, rtx *load_mem)
+{
+  if (!insn || !INSN_P (insn))
+    return false;
+
+  if (CALL_P (insn))
+    return false;
+
+  return is_load_insn1 (PATTERN (insn), load_mem);
+}
+
+/* Determine if PAT is a PATTERN of a store insn.  */
+
+static bool
+is_store_insn1 (rtx pat, rtx *str_mem)
+{
+  if (!pat || pat == NULL_RTX)
+    return false;
+
+  if (GET_CODE (pat) == SET)
+    return find_mem_ref (SET_DEST (pat), str_mem);
+
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
+	  return true;
+    }
+
+  return false;
+}
+
+/* Determine if INSN stores to memory.  */
+
+static bool
+is_store_insn (rtx insn, rtx *str_mem)
+{
+  if (!insn || !INSN_P (insn))
+    return false;
+
+  return is_store_insn1 (PATTERN (insn), str_mem);
+}
+
+/* Returns whether the dependence between INSN and NEXT is considered
+   costly by the given target.  */
+
+static bool
+rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
+{
+  rtx insn;
+  rtx next;
+  rtx load_mem, str_mem;
+
+  /* If the flag is not enabled - no dependence is considered costly;
+     allow all dependent insns in the same group.
+     This is the most aggressive option.  */
+  if (rs6000_sched_costly_dep == no_dep_costly)
+    return false;
+
+  /* If the flag is set to 1 - a dependence is always considered costly;
+     do not allow dependent instructions in the same group.
+     This is the most conservative option.  */
+  if (rs6000_sched_costly_dep == all_deps_costly)
+    return true;
+
+  insn = DEP_PRO (dep);
+  next = DEP_CON (dep);
+
+  if (rs6000_sched_costly_dep == store_to_load_dep_costly
+      && is_load_insn (next, &load_mem)
+      && is_store_insn (insn, &str_mem))
+    /* Prevent load after store in the same group.  */
+    return true;
+
+  if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
+      && is_load_insn (next, &load_mem)
+      && is_store_insn (insn, &str_mem)
+      && DEP_TYPE (dep) == REG_DEP_TRUE
+      && mem_locations_overlap(str_mem, load_mem))
+     /* Prevent load after store in the same group if it is a true
+	dependence.  */
+     return true;
+
+  /* The flag is set to X; dependences with latency >= X are considered costly,
+     and will not be scheduled in the same group.  */
+  if (rs6000_sched_costly_dep <= max_dep_latency
+      && ((cost - distance) >= (int)rs6000_sched_costly_dep))
+    return true;
+
+  return false;
+}
+
+/* Return the next insn after INSN that is found before TAIL is reached,
+   skipping any "non-active" insns - insns that will not actually occupy
+   an issue slot.  Return NULL_RTX if such an insn is not found.  */
+
+static rtx
+get_next_active_insn (rtx insn, rtx tail)
+{
+  if (insn == NULL_RTX || insn == tail)
+    return NULL_RTX;
+
+  while (1)
+    {
+      insn = NEXT_INSN (insn);
+      if (insn == NULL_RTX || insn == tail)
+	return NULL_RTX;
+
+      if (CALL_P (insn)
+	  || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
+	  || (NONJUMP_INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) != USE
+	      && GET_CODE (PATTERN (insn)) != CLOBBER
+	      && INSN_CODE (insn) != CODE_FOR_stack_tie))
+	break;
+    }
+  return insn;
+}
+
+/* We are about to begin issuing insns for this clock cycle. */
+
+static int
+rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
+                        rtx *ready ATTRIBUTE_UNUSED,
+                        int *pn_ready ATTRIBUTE_UNUSED,
+		        int clock_var ATTRIBUTE_UNUSED)
+{
+  int n_ready = *pn_ready;
+
+  if (sched_verbose)
+    fprintf (dump, "// rs6000_sched_reorder :\n");
+
+  /* Reorder the ready list, if the second to last ready insn
+     is a nonepipeline insn.  */
+  if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
+  {
+    if (is_nonpipeline_insn (ready[n_ready - 1])
+        && (recog_memoized (ready[n_ready - 2]) > 0))
+      /* Simply swap first two insns.  */
+      {
+	rtx tmp = ready[n_ready - 1];
+	ready[n_ready - 1] = ready[n_ready - 2];
+	ready[n_ready - 2] = tmp;
+      }
+  }
+
+  if (rs6000_cpu == PROCESSOR_POWER6)
+    load_store_pendulum = 0;
+
+  return rs6000_issue_rate ();
+}
+
+/* Like rs6000_sched_reorder, but called after issuing each insn.  */
+
+static int
+rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
+		         int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
+{
+  if (sched_verbose)
+    fprintf (dump, "// rs6000_sched_reorder2 :\n");
+
+  /* For Power6, we need to handle some special cases to try and keep the
+     store queue from overflowing and triggering expensive flushes.
+
+     This code monitors how load and store instructions are being issued
+     and skews the ready list one way or the other to increase the likelihood
+     that a desired instruction is issued at the proper time.
+
+     A couple of things are done.  First, we maintain a "load_store_pendulum"
+     to track the current state of load/store issue.
+
+       - If the pendulum is at zero, then no loads or stores have been
+         issued in the current cycle so we do nothing.
+
+       - If the pendulum is 1, then a single load has been issued in this
+         cycle and we attempt to locate another load in the ready list to
+         issue with it.
+
+       - If the pendulum is -2, then two stores have already been
+         issued in this cycle, so we increase the priority of the first load
+         in the ready list to increase it's likelihood of being chosen first
+         in the next cycle.
+
+       - If the pendulum is -1, then a single store has been issued in this
+         cycle and we attempt to locate another store in the ready list to
+         issue with it, preferring a store to an adjacent memory location to
+         facilitate store pairing in the store queue.
+
+       - If the pendulum is 2, then two loads have already been
+         issued in this cycle, so we increase the priority of the first store
+         in the ready list to increase it's likelihood of being chosen first
+         in the next cycle.
+
+       - If the pendulum < -2 or > 2, then do nothing.
+
+       Note: This code covers the most common scenarios.  There exist non
+             load/store instructions which make use of the LSU and which
+             would need to be accounted for to strictly model the behavior
+             of the machine.  Those instructions are currently unaccounted
+             for to help minimize compile time overhead of this code.
+   */
+  if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
+    {
+      int pos;
+      int i;
+      rtx tmp, load_mem, str_mem;
+
+      if (is_store_insn (last_scheduled_insn, &str_mem))
+        /* Issuing a store, swing the load_store_pendulum to the left */
+        load_store_pendulum--;
+      else if (is_load_insn (last_scheduled_insn, &load_mem))
+        /* Issuing a load, swing the load_store_pendulum to the right */
+        load_store_pendulum++;
+      else
+        return cached_can_issue_more;
+
+      /* If the pendulum is balanced, or there is only one instruction on
+         the ready list, then all is well, so return. */
+      if ((load_store_pendulum == 0) || (*pn_ready <= 1))
+        return cached_can_issue_more;
+
+      if (load_store_pendulum == 1)
+        {
+          /* A load has been issued in this cycle.  Scan the ready list
+             for another load to issue with it */
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_load_insn (ready[pos], &load_mem))
+                {
+                  /* Found a load.  Move it to the head of the ready list,
+                     and adjust it's priority so that it is more likely to
+                     stay there */
+                  tmp = ready[pos];
+                  for (i=pos; i<*pn_ready-1; i++)
+                    ready[i] = ready[i + 1];
+                  ready[*pn_ready-1] = tmp;
+
+                  if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
+                    INSN_PRIORITY (tmp)++;
+                  break;
+                }
+              pos--;
+            }
+        }
+      else if (load_store_pendulum == -2)
+        {
+          /* Two stores have been issued in this cycle.  Increase the
+             priority of the first load in the ready list to favor it for
+             issuing in the next cycle. */
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_load_insn (ready[pos], &load_mem)
+                  && !sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[pos]))
+                {
+                  INSN_PRIORITY (ready[pos])++;
+
+                  /* Adjust the pendulum to account for the fact that a load
+                     was found and increased in priority.  This is to prevent
+                     increasing the priority of multiple loads */
+                  load_store_pendulum--;
+
+                  break;
+                }
+              pos--;
+            }
+        }
+      else if (load_store_pendulum == -1)
+        {
+          /* A store has been issued in this cycle.  Scan the ready list for
+             another store to issue with it, preferring a store to an adjacent
+             memory location */
+          int first_store_pos = -1;
+
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_store_insn (ready[pos], &str_mem))
+                {
+		  rtx str_mem2;
+                  /* Maintain the index of the first store found on the
+                     list */
+                  if (first_store_pos == -1)
+                    first_store_pos = pos;
+
+                  if (is_store_insn (last_scheduled_insn, &str_mem2)
+                      && adjacent_mem_locations (str_mem, str_mem2))
+                    {
+                      /* Found an adjacent store.  Move it to the head of the
+                         ready list, and adjust it's priority so that it is
+                         more likely to stay there */
+                      tmp = ready[pos];
+                      for (i=pos; i<*pn_ready-1; i++)
+                        ready[i] = ready[i + 1];
+                      ready[*pn_ready-1] = tmp;
+
+                      if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
+                        INSN_PRIORITY (tmp)++;
+
+                      first_store_pos = -1;
+
+                      break;
+                    };
+                }
+              pos--;
+            }
+
+          if (first_store_pos >= 0)
+            {
+              /* An adjacent store wasn't found, but a non-adjacent store was,
+                 so move the non-adjacent store to the front of the ready
+                 list, and adjust its priority so that it is more likely to
+                 stay there. */
+              tmp = ready[first_store_pos];
+              for (i=first_store_pos; i<*pn_ready-1; i++)
+                ready[i] = ready[i + 1];
+              ready[*pn_ready-1] = tmp;
+              if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
+                INSN_PRIORITY (tmp)++;
+            }
+        }
+      else if (load_store_pendulum == 2)
+       {
+           /* Two loads have been issued in this cycle.  Increase the priority
+              of the first store in the ready list to favor it for issuing in
+              the next cycle. */
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_store_insn (ready[pos], &str_mem)
+                  && !sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[pos]))
+                {
+                  INSN_PRIORITY (ready[pos])++;
+
+                  /* Adjust the pendulum to account for the fact that a store
+                     was found and increased in priority.  This is to prevent
+                     increasing the priority of multiple stores */
+                  load_store_pendulum++;
+
+                  break;
+                }
+              pos--;
+            }
+        }
+    }
+
+  return cached_can_issue_more;
+}
+
+/* Return whether the presence of INSN causes a dispatch group termination
+   of group WHICH_GROUP.
+
+   If WHICH_GROUP == current_group, this function will return true if INSN
+   causes the termination of the current group (i.e, the dispatch group to
+   which INSN belongs). This means that INSN will be the last insn in the
+   group it belongs to.
+
+   If WHICH_GROUP == previous_group, this function will return true if INSN
+   causes the termination of the previous group (i.e, the dispatch group that
+   precedes the group to which INSN belongs).  This means that INSN will be
+   the first insn in the group it belongs to).  */
+
+static bool
+insn_terminates_group_p (rtx insn, enum group_termination which_group)
+{
+  bool first, last;
+
+  if (! insn)
+    return false;
+
+  first = insn_must_be_first_in_group (insn);
+  last = insn_must_be_last_in_group (insn);
+
+  if (first && last)
+    return true;
+
+  if (which_group == current_group)
+    return last;
+  else if (which_group == previous_group)
+    return first;
+
+  return false;
+}
+
+
+static bool
+insn_must_be_first_in_group (rtx insn)
+{
+  enum attr_type type;
+
+  if (!insn
+      || NOTE_P (insn)
+      || DEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  switch (rs6000_cpu)
+    {
+    case PROCESSOR_POWER5:
+      if (is_cracked_insn (insn))
+        return true;
+    case PROCESSOR_POWER4:
+      if (is_microcoded_insn (insn))
+        return true;
+
+      if (!rs6000_sched_groups)
+        return false;
+
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_DELAYED_CR:
+        case TYPE_CR_LOGICAL:
+        case TYPE_MTJMPR:
+        case TYPE_MFJMPR:
+        case TYPE_IDIV:
+        case TYPE_LDIV:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_ISYNC:
+        case TYPE_SYNC:
+          return true;
+        default:
+          break;
+        }
+      break;
+    case PROCESSOR_POWER6:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_INSERT_DWORD:
+        case TYPE_EXTS:
+        case TYPE_CNTLZ:
+        case TYPE_SHIFT:
+        case TYPE_VAR_SHIFT_ROTATE:
+        case TYPE_TRAP:
+        case TYPE_IMUL:
+        case TYPE_IMUL2:
+        case TYPE_IMUL3:
+        case TYPE_LMUL:
+        case TYPE_IDIV:
+        case TYPE_INSERT_WORD:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_IMUL_COMPARE:
+        case TYPE_LMUL_COMPARE:
+        case TYPE_FPCOMPARE:
+        case TYPE_MFCR:
+        case TYPE_MTCR:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+        case TYPE_ISYNC:
+        case TYPE_SYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_U:
+        case TYPE_STORE_UX:
+        case TYPE_FPLOAD_U:
+        case TYPE_FPLOAD_UX:
+        case TYPE_FPSTORE_U:
+        case TYPE_FPSTORE_UX:
+          return true;
+        default:
+          break;
+        }
+      break;
+    case PROCESSOR_POWER7:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_CR_LOGICAL:
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_IDIV:
+        case TYPE_LDIV:
+        case TYPE_COMPARE:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_VAR_DELAYED_COMPARE:
+        case TYPE_ISYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT:
+        case TYPE_LOAD_EXT_U:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_U:
+        case TYPE_STORE_UX:
+        case TYPE_FPLOAD_U:
+        case TYPE_FPLOAD_UX:
+        case TYPE_FPSTORE_U:
+        case TYPE_FPSTORE_UX:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+          return true;
+        default:
+          break;
+        }
+      break;
+    case PROCESSOR_POWER8:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_CR_LOGICAL:
+        case TYPE_DELAYED_CR:
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_COMPARE:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_VAR_DELAYED_COMPARE:
+        case TYPE_IMUL_COMPARE:
+        case TYPE_LMUL_COMPARE:
+        case TYPE_SYNC:
+        case TYPE_ISYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT:
+        case TYPE_LOAD_EXT_U:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_UX:
+        case TYPE_VECSTORE:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+          return true;
+        default:
+          break;
+        }
+      break;
+    default:
+      break;
+    }
+
+  return false;
+}
+
+static bool
+insn_must_be_last_in_group (rtx insn)
+{
+  enum attr_type type;
+
+  if (!insn
+      || NOTE_P (insn)
+      || DEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  switch (rs6000_cpu) {
+  case PROCESSOR_POWER4:
+  case PROCESSOR_POWER5:
+    if (is_microcoded_insn (insn))
+      return true;
+
+    if (is_branch_slot_insn (insn))
+      return true;
+
+    break;
+  case PROCESSOR_POWER6:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_EXTS:
+      case TYPE_CNTLZ:
+      case TYPE_SHIFT:
+      case TYPE_VAR_SHIFT_ROTATE:
+      case TYPE_TRAP:
+      case TYPE_IMUL:
+      case TYPE_IMUL2:
+      case TYPE_IMUL3:
+      case TYPE_LMUL:
+      case TYPE_IDIV:
+      case TYPE_DELAYED_COMPARE:
+      case TYPE_IMUL_COMPARE:
+      case TYPE_LMUL_COMPARE:
+      case TYPE_FPCOMPARE:
+      case TYPE_MFCR:
+      case TYPE_MTCR:
+      case TYPE_MFJMPR:
+      case TYPE_MTJMPR:
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+        return true;
+      default:
+        break;
+    }
+    break;
+  case PROCESSOR_POWER7:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+      case TYPE_LOAD_EXT_U:
+      case TYPE_LOAD_EXT_UX:
+      case TYPE_STORE_UX:
+        return true;
+      default:
+        break;
+    }
+    break;
+  case PROCESSOR_POWER8:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_MFCR:
+      case TYPE_MTCR:
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+      case TYPE_LOAD_EXT_U:
+      case TYPE_LOAD_EXT_UX:
+      case TYPE_STORE_UX:
+        return true;
+      default:
+        break;
+    }
+    break;
+  default:
+    break;
+  }
+
+  return false;
+}
+
+/* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
+   dispatch group) from the insns in GROUP_INSNS.  Return false otherwise.  */
+
+static bool
+is_costly_group (rtx *group_insns, rtx next_insn)
+{
+  int i;
+  int issue_rate = rs6000_issue_rate ();
+
+  for (i = 0; i < issue_rate; i++)
+    {
+      sd_iterator_def sd_it;
+      dep_t dep;
+      rtx insn = group_insns[i];
+
+      if (!insn)
+	continue;
+
+      FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
+	{
+	  rtx next = DEP_CON (dep);
+
+	  if (next == next_insn
+	      && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Utility of the function redefine_groups.
+   Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
+   in the same dispatch group.  If so, insert nops before NEXT_INSN, in order
+   to keep it "far" (in a separate group) from GROUP_INSNS, following
+   one of the following schemes, depending on the value of the flag
+   -minsert_sched_nops = X:
+   (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
+       in order to force NEXT_INSN into a separate group.
+   (2) X < sched_finish_regroup_exact: insert exactly X nops.
+   GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
+   insertion (has a group just ended, how many vacant issue slots remain in the
+   last group, and how many dispatch groups were encountered so far).  */
+
+static int
+force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
+		 rtx next_insn, bool *group_end, int can_issue_more,
+		 int *group_count)
+{
+  rtx nop;
+  bool force;
+  int issue_rate = rs6000_issue_rate ();
+  bool end = *group_end;
+  int i;
+
+  if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
+    return can_issue_more;
+
+  if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
+    return can_issue_more;
+
+  force = is_costly_group (group_insns, next_insn);
+  if (!force)
+    return can_issue_more;
+
+  if (sched_verbose > 6)
+    fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
+	     *group_count ,can_issue_more);
+
+  if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
+    {
+      if (*group_end)
+	can_issue_more = 0;
+
+      /* Since only a branch can be issued in the last issue_slot, it is
+	 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
+	 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
+	 in this case the last nop will start a new group and the branch
+	 will be forced to the new group.  */
+      if (can_issue_more && !is_branch_slot_insn (next_insn))
+	can_issue_more--;
+
+      /* Do we have a special group ending nop? */
+      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+	  || rs6000_cpu_attr == CPU_POWER8)
+	{
+	  nop = gen_group_ending_nop ();
+	  emit_insn_before (nop, next_insn);
+	  can_issue_more = 0;
+	}
+      else
+	while (can_issue_more > 0)
+	  {
+	    nop = gen_nop ();
+	    emit_insn_before (nop, next_insn);
+	    can_issue_more--;
+	  }
+
+      *group_end = true;
+      return 0;
+    }
+
+  if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
+    {
+      int n_nops = rs6000_sched_insert_nops;
+
+      /* Nops can't be issued from the branch slot, so the effective
+	 issue_rate for nops is 'issue_rate - 1'.  */
+      if (can_issue_more == 0)
+	can_issue_more = issue_rate;
+      can_issue_more--;
+      if (can_issue_more == 0)
+	{
+	  can_issue_more = issue_rate - 1;
+	  (*group_count)++;
+	  end = true;
+	  for (i = 0; i < issue_rate; i++)
+	    {
+	      group_insns[i] = 0;
+	    }
+	}
+
+      while (n_nops > 0)
+	{
+	  nop = gen_nop ();
+	  emit_insn_before (nop, next_insn);
+	  if (can_issue_more == issue_rate - 1) /* new group begins */
+	    end = false;
+	  can_issue_more--;
+	  if (can_issue_more == 0)
+	    {
+	      can_issue_more = issue_rate - 1;
+	      (*group_count)++;
+	      end = true;
+	      for (i = 0; i < issue_rate; i++)
+		{
+		  group_insns[i] = 0;
+		}
+	    }
+	  n_nops--;
+	}
+
+      /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1').  */
+      can_issue_more++;
+
+      /* Is next_insn going to start a new group?  */
+      *group_end
+	= (end
+	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
+	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
+	   || (can_issue_more < issue_rate &&
+	       insn_terminates_group_p (next_insn, previous_group)));
+      if (*group_end && end)
+	(*group_count)--;
+
+      if (sched_verbose > 6)
+	fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
+		 *group_count, can_issue_more);
+      return can_issue_more;
+    }
+
+  return can_issue_more;
+}
+
+/* This function tries to synch the dispatch groups that the compiler "sees"
+   with the dispatch groups that the processor dispatcher is expected to
+   form in practice.  It tries to achieve this synchronization by forcing the
+   estimated processor grouping on the compiler (as opposed to the function
+   'pad_goups' which tries to force the scheduler's grouping on the processor).
+
+   The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
+   examines the (estimated) dispatch groups that will be formed by the processor
+   dispatcher.  It marks these group boundaries to reflect the estimated
+   processor grouping, overriding the grouping that the scheduler had marked.
+   Depending on the value of the flag '-minsert-sched-nops' this function can
+   force certain insns into separate groups or force a certain distance between
+   them by inserting nops, for example, if there exists a "costly dependence"
+   between the insns.
+
+   The function estimates the group boundaries that the processor will form as
+   follows:  It keeps track of how many vacant issue slots are available after
+   each insn.  A subsequent insn will start a new group if one of the following
+   4 cases applies:
+   - no more vacant issue slots remain in the current dispatch group.
+   - only the last issue slot, which is the branch slot, is vacant, but the next
+     insn is not a branch.
+   - only the last 2 or less issue slots, including the branch slot, are vacant,
+     which means that a cracked insn (which occupies two issue slots) can't be
+     issued in this group.
+   - less than 'issue_rate' slots are vacant, and the next insn always needs to
+     start a new group.  */
+
+static int
+redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
+{
+  rtx insn, next_insn;
+  int issue_rate;
+  int can_issue_more;
+  int slot, i;
+  bool group_end;
+  int group_count = 0;
+  rtx *group_insns;
+
+  /* Initialize.  */
+  issue_rate = rs6000_issue_rate ();
+  group_insns = XALLOCAVEC (rtx, issue_rate);
+  for (i = 0; i < issue_rate; i++)
+    {
+      group_insns[i] = 0;
+    }
+  can_issue_more = issue_rate;
+  slot = 0;
+  insn = get_next_active_insn (prev_head_insn, tail);
+  group_end = false;
+
+  while (insn != NULL_RTX)
+    {
+      slot = (issue_rate - can_issue_more);
+      group_insns[slot] = insn;
+      can_issue_more =
+	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
+      if (insn_terminates_group_p (insn, current_group))
+	can_issue_more = 0;
+
+      next_insn = get_next_active_insn (insn, tail);
+      if (next_insn == NULL_RTX)
+	return group_count + 1;
+
+      /* Is next_insn going to start a new group?  */
+      group_end
+	= (can_issue_more == 0
+	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
+	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
+	   || (can_issue_more < issue_rate &&
+	       insn_terminates_group_p (next_insn, previous_group)));
+
+      can_issue_more = force_new_group (sched_verbose, dump, group_insns,
+					next_insn, &group_end, can_issue_more,
+					&group_count);
+
+      if (group_end)
+	{
+	  group_count++;
+	  can_issue_more = 0;
+	  for (i = 0; i < issue_rate; i++)
+	    {
+	      group_insns[i] = 0;
+	    }
+	}
+
+      if (GET_MODE (next_insn) == TImode && can_issue_more)
+	PUT_MODE (next_insn, VOIDmode);
+      else if (!can_issue_more && GET_MODE (next_insn) != TImode)
+	PUT_MODE (next_insn, TImode);
+
+      insn = next_insn;
+      if (can_issue_more == 0)
+	can_issue_more = issue_rate;
+    } /* while */
+
+  return group_count;
+}
+
+/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
+   dispatch group boundaries that the scheduler had marked.  Pad with nops
+   any dispatch groups which have vacant issue slots, in order to force the
+   scheduler's grouping on the processor dispatcher.  The function
+   returns the number of dispatch groups found.  */
+
+static int
+pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
+{
+  rtx insn, next_insn;
+  rtx nop;
+  int issue_rate;
+  int can_issue_more;
+  int group_end;
+  int group_count = 0;
+
+  /* Initialize issue_rate.  */
+  issue_rate = rs6000_issue_rate ();
+  can_issue_more = issue_rate;
+
+  insn = get_next_active_insn (prev_head_insn, tail);
+  next_insn = get_next_active_insn (insn, tail);
+
+  while (insn != NULL_RTX)
+    {
+      can_issue_more =
+      	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
+
+      group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
+
+      if (next_insn == NULL_RTX)
+	break;
+
+      if (group_end)
+	{
+	  /* If the scheduler had marked group termination at this location
+	     (between insn and next_insn), and neither insn nor next_insn will
+	     force group termination, pad the group with nops to force group
+	     termination.  */
+	  if (can_issue_more
+	      && (rs6000_sched_insert_nops == sched_finish_pad_groups)
+	      && !insn_terminates_group_p (insn, current_group)
+	      && !insn_terminates_group_p (next_insn, previous_group))
+	    {
+	      if (!is_branch_slot_insn (next_insn))
+		can_issue_more--;
+
+	      while (can_issue_more)
+		{
+		  nop = gen_nop ();
+		  emit_insn_before (nop, next_insn);
+		  can_issue_more--;
+		}
+	    }
+
+	  can_issue_more = issue_rate;
+	  group_count++;
+	}
+
+      insn = next_insn;
+      next_insn = get_next_active_insn (insn, tail);
+    }
+
+  return group_count;
+}
+
+/* We're beginning a new block.  Initialize data structures as necessary.  */
+
+static void
+rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		     int sched_verbose ATTRIBUTE_UNUSED,
+		     int max_ready ATTRIBUTE_UNUSED)
+{
+  last_scheduled_insn = NULL_RTX;
+  load_store_pendulum = 0;
+}
+
+/* The following function is called at the end of scheduling BB.
+   After reload, it inserts nops at insn group bundling.  */
+
+static void
+rs6000_sched_finish (FILE *dump, int sched_verbose)
+{
+  int n_groups;
+
+  if (sched_verbose)
+    fprintf (dump, "=== Finishing schedule.\n");
+
+  if (reload_completed && rs6000_sched_groups)
+    {
+      /* Do not run sched_finish hook when selective scheduling enabled.  */
+      if (sel_sched_p ())
+	return;
+
+      if (rs6000_sched_insert_nops == sched_finish_none)
+	return;
+
+      if (rs6000_sched_insert_nops == sched_finish_pad_groups)
+	n_groups = pad_groups (dump, sched_verbose,
+			       current_sched_info->prev_head,
+			       current_sched_info->next_tail);
+      else
+	n_groups = redefine_groups (dump, sched_verbose,
+				    current_sched_info->prev_head,
+				    current_sched_info->next_tail);
+
+      if (sched_verbose >= 6)
+	{
+    	  fprintf (dump, "ngroups = %d\n", n_groups);
+	  print_rtl (dump, current_sched_info->prev_head);
+	  fprintf (dump, "Done finish_sched\n");
+	}
+    }
+}
+
+struct _rs6000_sched_context
+{
+  short cached_can_issue_more;
+  rtx last_scheduled_insn;
+  int load_store_pendulum;
+};
+
+typedef struct _rs6000_sched_context rs6000_sched_context_def;
+typedef rs6000_sched_context_def *rs6000_sched_context_t;
+
+/* Allocate store for new scheduling context.  */
+static void *
+rs6000_alloc_sched_context (void)
+{
+  return xmalloc (sizeof (rs6000_sched_context_def));
+}
+
+/* If CLEAN_P is true then initializes _SC with clean data,
+   and from the global context otherwise.  */
+static void
+rs6000_init_sched_context (void *_sc, bool clean_p)
+{
+  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+  if (clean_p)
+    {
+      sc->cached_can_issue_more = 0;
+      sc->last_scheduled_insn = NULL_RTX;
+      sc->load_store_pendulum = 0;
+    }
+  else
+    {
+      sc->cached_can_issue_more = cached_can_issue_more;
+      sc->last_scheduled_insn = last_scheduled_insn;
+      sc->load_store_pendulum = load_store_pendulum;
+    }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC.  */
+static void
+rs6000_set_sched_context (void *_sc)
+{
+  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+  gcc_assert (sc != NULL);
+
+  cached_can_issue_more = sc->cached_can_issue_more;
+  last_scheduled_insn = sc->last_scheduled_insn;
+  load_store_pendulum = sc->load_store_pendulum;
+}
+
+/* Free _SC.  */
+static void
+rs6000_free_sched_context (void *_sc)
+{
+  gcc_assert (_sc != NULL);
+
+  free (_sc);
+}
+
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+int
+rs6000_trampoline_size (void)
+{
+  int ret = 0;
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      gcc_unreachable ();
+
+    case ABI_AIX:
+      ret = (TARGET_32BIT) ? 12 : 24;
+      break;
+
+    case ABI_ELFv2:
+      gcc_assert (!TARGET_32BIT);
+      ret = 32;
+      break;
+
+    case ABI_DARWIN:
+    case ABI_V4:
+      ret = (TARGET_32BIT) ? 40 : 48;
+      break;
+    }
+
+  return ret;
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+static void
+rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  int regsize = (TARGET_32BIT) ? 4 : 8;
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx ctx_reg = force_reg (Pmode, cxt);
+  rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      gcc_unreachable ();
+
+    /* Under AIX, just build the 3 word function descriptor */
+    case ABI_AIX:
+      {
+	rtx fnmem, fn_reg, toc_reg;
+
+	if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
+	  error ("You cannot take the address of a nested function if you use "
+		 "the -mno-pointers-to-nested-functions option.");
+
+	fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
+	fn_reg = gen_reg_rtx (Pmode);
+	toc_reg = gen_reg_rtx (Pmode);
+
+  /* Macro to shorten the code expansions below.  */
+# define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
+
+	m_tramp = replace_equiv_address (m_tramp, addr);
+
+	emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
+	emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
+	emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
+	emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
+	emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
+
+# undef MEM_PLUS
+      }
+      break;
+
+    /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
+    case ABI_ELFv2:
+    case ABI_DARWIN:
+    case ABI_V4:
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
+			 LCT_NORMAL, VOIDmode, 4,
+			 addr, Pmode,
+			 GEN_INT (rs6000_trampoline_size ()), SImode,
+			 fnaddr, Pmode,
+			 ctx_reg, Pmode);
+      break;
+    }
+}
+
+
+/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
+   identifier as an argument, so the front end shouldn't look it up.  */
+
+static bool
+rs6000_attribute_takes_identifier_p (const_tree attr_id)
+{
+  return is_attribute_p ("altivec", attr_id);
+}
+
+/* Handle the "altivec" attribute.  The attribute may have
+   arguments as follows:
+
+	__attribute__((altivec(vector__)))
+	__attribute__((altivec(pixel__)))	(always followed by 'unsigned short')
+	__attribute__((altivec(bool__)))	(always followed by 'unsigned')
+
+  and may appear more than once (e.g., 'vector bool char') in a
+  given declaration.  */
+
+static tree
+rs6000_handle_altivec_attribute (tree *node,
+				 tree name ATTRIBUTE_UNUSED,
+				 tree args,
+				 int flags ATTRIBUTE_UNUSED,
+				 bool *no_add_attrs)
+{
+  tree type = *node, result = NULL_TREE;
+  enum machine_mode mode;
+  int unsigned_p;
+  char altivec_type
+    = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
+	&& TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
+       ? *IDENTIFIER_POINTER (TREE_VALUE (args))
+       : '?');
+
+  while (POINTER_TYPE_P (type)
+	 || TREE_CODE (type) == FUNCTION_TYPE
+	 || TREE_CODE (type) == METHOD_TYPE
+	 || TREE_CODE (type) == ARRAY_TYPE)
+    type = TREE_TYPE (type);
+
+  mode = TYPE_MODE (type);
+
+  /* Check for invalid AltiVec type qualifiers.  */
+  if (type == long_double_type_node)
+    error ("use of %<long double%> in AltiVec types is invalid");
+  else if (type == boolean_type_node)
+    error ("use of boolean types in AltiVec types is invalid");
+  else if (TREE_CODE (type) == COMPLEX_TYPE)
+    error ("use of %<complex%> in AltiVec types is invalid");
+  else if (DECIMAL_FLOAT_MODE_P (mode))
+    error ("use of decimal floating point types in AltiVec types is invalid");
+  else if (!TARGET_VSX)
+    {
+      if (type == long_unsigned_type_node || type == long_integer_type_node)
+	{
+	  if (TARGET_64BIT)
+	    error ("use of %<long%> in AltiVec types is invalid for "
+		   "64-bit code without -mvsx");
+	  else if (rs6000_warn_altivec_long)
+	    warning (0, "use of %<long%> in AltiVec types is deprecated; "
+		     "use %<int%>");
+	}
+      else if (type == long_long_unsigned_type_node
+	       || type == long_long_integer_type_node)
+	error ("use of %<long long%> in AltiVec types is invalid without "
+	       "-mvsx");
+      else if (type == double_type_node)
+	error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+    }
+
+  switch (altivec_type)
+    {
+    case 'v':
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (mode)
+	{
+	case TImode:
+	  result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	  break;
+	case DImode:
+	  result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	  break;
+	case SImode:
+	  result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	  break;
+	case HImode:
+	  result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	  break;
+	case QImode:
+	  result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	  break;
+	case SFmode: result = V4SF_type_node; break;
+	case DFmode: result = V2DF_type_node; break;
+	  /* If the user says 'vector int bool', we may be handed the 'bool'
+	     attribute _before_ the 'vector' attribute, and so select the
+	     proper type in the 'b' case below.  */
+	case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
+	case V2DImode: case V2DFmode:
+	  result = type;
+	default: break;
+	}
+      break;
+    case 'b':
+      switch (mode)
+	{
+	case DImode: case V2DImode: result = bool_V2DI_type_node; break;
+	case SImode: case V4SImode: result = bool_V4SI_type_node; break;
+	case HImode: case V8HImode: result = bool_V8HI_type_node; break;
+	case QImode: case V16QImode: result = bool_V16QI_type_node;
+	default: break;
+	}
+      break;
+    case 'p':
+      switch (mode)
+	{
+	case V8HImode: result = pixel_V8HI_type_node;
+	default: break;
+	}
+    default: break;
+    }
+
+  /* Propagate qualifiers attached to the element type
+     onto the vector type.  */
+  if (result && result != type && TYPE_QUALS (type))
+    result = build_qualified_type (result, TYPE_QUALS (type));
+
+  *no_add_attrs = true;  /* No need to hang on to the attribute.  */
+
+  if (result)
+    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
+
+  return NULL_TREE;
+}
+
+/* AltiVec defines four built-in scalar types that serve as vector
+   elements; we must teach the compiler how to mangle them.  */
+
+static const char *
+rs6000_mangle_type (const_tree type)
+{
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+    return NULL;
+
+  if (type == bool_char_type_node) return "U6__boolc";
+  if (type == bool_short_type_node) return "U6__bools";
+  if (type == pixel_type_node) return "u7__pixel";
+  if (type == bool_int_type_node) return "U6__booli";
+  if (type == bool_long_type_node) return "U6__booll";
+
+  /* Mangle IBM extended float long double as `g' (__float128) on
+     powerpc*-linux where long-double-64 previously was the default.  */
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_ELF
+      && TARGET_LONG_DOUBLE_128
+      && !TARGET_IEEEQUAD)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+
+/* Handle a "longcall" or "shortcall" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+rs6000_handle_longcall_attribute (tree *node, tree name,
+				  tree args ATTRIBUTE_UNUSED,
+				  int flags ATTRIBUTE_UNUSED,
+				  bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Set longcall attributes on all functions declared when
+   rs6000_default_long_calls is true.  */
+static void
+rs6000_set_default_type_attributes (tree type)
+{
+  if (rs6000_default_long_calls
+      && (TREE_CODE (type) == FUNCTION_TYPE
+	  || TREE_CODE (type) == METHOD_TYPE))
+    TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
+					NULL_TREE,
+					TYPE_ATTRIBUTES (type));
+
+#if TARGET_MACHO
+  darwin_set_default_type_attributes (type);
+#endif
+}
+
+/* Return a reference suitable for calling a function with the
+   longcall attribute.  */
+
+rtx
+rs6000_longcall_ref (rtx call_ref)
+{
+  const char *call_name;
+  tree node;
+
+  if (GET_CODE (call_ref) != SYMBOL_REF)
+    return call_ref;
+
+  /* System V adds '.' to the internal name, so skip them.  */
+  call_name = XSTR (call_ref, 0);
+  if (*call_name == '.')
+    {
+      while (*call_name == '.')
+	call_name++;
+
+      node = get_identifier (call_name);
+      call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
+    }
+
+  return force_reg (Pmode, call_ref);
+}
+
+#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
+#define TARGET_USE_MS_BITFIELD_LAYOUT 0
+#endif
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+rs6000_handle_struct_attribute (tree *node, tree name,
+				tree args ATTRIBUTE_UNUSED,
+				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  tree *type = NULL;
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) == TYPE_DECL)
+        type = &TREE_TYPE (*node);
+    }
+  else
+    type = node;
+
+  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+                 || TREE_CODE (*type) == UNION_TYPE)))
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored", name);
+      *no_add_attrs = true;
+    }
+
+  else if ((is_attribute_p ("ms_struct", name)
+            && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+           || ((is_attribute_p ("gcc_struct", name)
+                && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+    {
+      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
+               name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static bool
+rs6000_ms_bitfield_layout_p (const_tree record_type)
+{
+  return (TARGET_USE_MS_BITFIELD_LAYOUT &&
+          !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
+}
+
+#ifdef USING_ELFOS_H
+
+/* A get_unnamed_section callback, used for switching to toc_section.  */
+
+static void
+rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+      && TARGET_MINIMAL_TOC
+      && !TARGET_RELOCATABLE)
+    {
+      if (!toc_initialized)
+	{
+	  toc_initialized = 1;
+	  fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
+	  fprintf (asm_out_file, "\t.tc ");
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
+	  fprintf (asm_out_file, "\n");
+
+	  fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
+	  fprintf (asm_out_file, " = .+32768\n");
+	}
+      else
+	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+    }
+  else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	   && !TARGET_RELOCATABLE)
+    fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+  else
+    {
+      fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+      if (!toc_initialized)
+	{
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
+	  fprintf (asm_out_file, " = .+32768\n");
+	  toc_initialized = 1;
+	}
+    }
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+rs6000_elf_asm_init_sections (void)
+{
+  toc_section
+    = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
+
+  sdata2_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   SDATA2_SECTION_ASM_OP);
+}
+
+/* Implement TARGET_SELECT_RTX_SECTION.  */
+
+static section *
+rs6000_elf_select_rtx_section (enum machine_mode mode, rtx x,
+			       unsigned HOST_WIDE_INT align)
+{
+  if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
+    return toc_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+/* For a SYMBOL_REF, set generic flags and then perform some
+   target-specific processing.
+
+   When the AIX ABI is requested on a non-AIX system, replace the
+   function name with the real name (with a leading .) rather than the
+   function descriptor name.  This saves a lot of overriding code to
+   read the prefixes.  */
+
+static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
+static void
+rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (first
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && !TARGET_AIX
+      && DEFAULT_ABI == ABI_AIX)
+    {
+      rtx sym_ref = XEXP (rtl, 0);
+      size_t len = strlen (XSTR (sym_ref, 0));
+      char *str = XALLOCAVEC (char, len + 2);
+      str[0] = '.';
+      memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
+      XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
+    }
+}
+
+static inline bool
+compare_section_name (const char *section, const char *templ)
+{
+  int len;
+
+  len = strlen (templ);
+  return (strncmp (section, templ, len) == 0
+	  && (section[len] == 0 || section[len] == '.'));
+}
+
+bool
+rs6000_elf_in_small_data_p (const_tree decl)
+{
+  if (rs6000_sdata == SDATA_NONE)
+    return false;
+
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (decl) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (compare_section_name (section, ".sdata")
+	  || compare_section_name (section, ".sdata2")
+	  || compare_section_name (section, ".gnu.linkonce.s")
+	  || compare_section_name (section, ".sbss")
+	  || compare_section_name (section, ".sbss2")
+	  || compare_section_name (section, ".gnu.linkonce.sb")
+	  || strcmp (section, ".PPC.EMB.sdata0") == 0
+	  || strcmp (section, ".PPC.EMB.sbss0") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
+
+      if (size > 0
+	  && size <= g_switch_value
+	  /* If it's not public, and we're not going to reference it there,
+	     there's no need to put it in the small data section.  */
+	  && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
+	return true;
+    }
+
+  return false;
+}
+
+#endif /* USING_ELFOS_H */
+
+/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  */
+
+static bool
+rs6000_use_blocks_for_constant_p (enum machine_mode mode, const_rtx x)
+{
+  return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
+}
+
+/* Do not place thread-local symbols refs in the object blocks.  */
+
+static bool
+rs6000_use_blocks_for_decl_p (const_tree decl)
+{
+  return !DECL_THREAD_LOCAL_P (decl);
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+   ADDR can be effectively incremented by incrementing REG.
+
+   r0 is special and we must not select it as an address
+   register by this routine since our caller will try to
+   increment the returned register via an "la" instruction.  */
+
+rtx
+find_addr_reg (rtx addr)
+{
+  while (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG
+	  && REGNO (XEXP (addr, 0)) != 0)
+	addr = XEXP (addr, 0);
+      else if (GET_CODE (XEXP (addr, 1)) == REG
+	       && REGNO (XEXP (addr, 1)) != 0)
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 0)))
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 1)))
+	addr = XEXP (addr, 0);
+      else
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
+  return addr;
+}
+
+void
+rs6000_fatal_bad_address (rtx op)
+{
+  fatal_insn ("bad address", op);
+}
+
+#if TARGET_MACHO
+
+typedef struct branch_island_d {
+  tree function_name;
+  tree label_name;
+  int line_number;
+} branch_island;
+
+
+static vec<branch_island, va_gc> *branch_islands;
+
+/* Remember to generate a branch island for far calls to the given
+   function.  */
+
+static void
+add_compiler_branch_island (tree label_name, tree function_name,
+			    int line_number)
+{
+  branch_island bi = {function_name, label_name, line_number};
+  vec_safe_push (branch_islands, bi);
+}
+
+/* Generate far-jump branch islands for everything recorded in
+   branch_islands.  Invoked immediately after the last instruction of
+   the epilogue has been emitted; the branch islands must be appended
+   to, and contiguous with, the function body.  Mach-O stubs are
+   generated in machopic_output_stub().  */
+
+static void
+macho_branch_islands (void)
+{
+  char tmp_buf[512];
+
+  while (!vec_safe_is_empty (branch_islands))
+    {
+      branch_island *bi = &branch_islands->last ();
+      const char *label = IDENTIFIER_POINTER (bi->label_name);
+      const char *name = IDENTIFIER_POINTER (bi->function_name);
+      char name_buf[512];
+      /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF().  */
+      if (name[0] == '*' || name[0] == '&')
+	strcpy (name_buf, name+1);
+      else
+	{
+	  name_buf[0] = '_';
+	  strcpy (name_buf+1, name);
+	}
+      strcpy (tmp_buf, "\n");
+      strcat (tmp_buf, label);
+#if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
+      if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
+	dbxout_stabd (N_SLINE, bi->line_number);
+#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
+      if (flag_pic)
+	{
+	  if (TARGET_LINK_STACK)
+	    {
+	      char name[32];
+	      get_ppc476_thunk_name (name);
+	      strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
+	      strcat (tmp_buf, name);
+	      strcat (tmp_buf, "\n");
+	      strcat (tmp_buf, label);
+	      strcat (tmp_buf, "_pic:\n\tmflr r11\n");
+	    }
+	  else
+	    {
+	      strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
+	      strcat (tmp_buf, label);
+	      strcat (tmp_buf, "_pic\n");
+	      strcat (tmp_buf, label);
+	      strcat (tmp_buf, "_pic:\n\tmflr r11\n");
+	    }
+
+	  strcat (tmp_buf, "\taddis r11,r11,ha16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, " - ");
+	  strcat (tmp_buf, label);
+	  strcat (tmp_buf, "_pic)\n");
+
+	  strcat (tmp_buf, "\tmtlr r0\n");
+
+	  strcat (tmp_buf, "\taddi r12,r11,lo16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, " - ");
+	  strcat (tmp_buf, label);
+	  strcat (tmp_buf, "_pic)\n");
+
+	  strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
+	}
+      else
+	{
+	  strcat (tmp_buf, ":\nlis r12,hi16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
+	}
+      output_asm_insn (tmp_buf, 0);
+#if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
+      if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
+	dbxout_stabd (N_SLINE, bi->line_number);
+#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
+      branch_islands->pop ();
+    }
+}
+
+/* NO_PREVIOUS_DEF checks in the link list whether the function name is
+   already there or not.  */
+
+static int
+no_previous_def (tree function_name)
+{
+  branch_island *bi;
+  unsigned ix;
+
+  FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
+    if (function_name == bi->function_name)
+      return 0;
+  return 1;
+}
+
+/* GET_PREV_LABEL gets the label name from the previous definition of
+   the function.  */
+
+static tree
+get_prev_label (tree function_name)
+{
+  branch_island *bi;
+  unsigned ix;
+
+  FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
+    if (function_name == bi->function_name)
+      return bi->label_name;
+  return NULL_TREE;
+}
+
+/* INSN is either a function call or a millicode call.  It may have an
+   unconditional jump in its delay slot.
+
+   CALL_DEST is the routine we are calling.  */
+
+char *
+output_call (rtx insn, rtx *operands, int dest_operand_number,
+	     int cookie_operand_number)
+{
+  static char buf[256];
+  if (darwin_emit_branch_islands
+      && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
+      && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
+    {
+      tree labelname;
+      tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
+
+      if (no_previous_def (funname))
+	{
+	  rtx label_rtx = gen_label_rtx ();
+	  char *label_buf, temp_buf[256];
+	  ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
+				       CODE_LABEL_NUMBER (label_rtx));
+	  label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
+	  labelname = get_identifier (label_buf);
+	  add_compiler_branch_island (labelname, funname, insn_line (insn));
+	}
+      else
+	labelname = get_prev_label (funname);
+
+      /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
+	 instruction will reach 'foo', otherwise link as 'bl L42'".
+	 "L42" should be a 'branch island', that will do a far jump to
+	 'foo'.  Branch islands are generated in
+	 macho_branch_islands().  */
+      sprintf (buf, "jbsr %%z%d,%.246s",
+	       dest_operand_number, IDENTIFIER_POINTER (labelname));
+    }
+  else
+    sprintf (buf, "bl %%z%d", dest_operand_number);
+  return buf;
+}
+
+/* Generate PIC and indirect symbol stubs.  */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+  unsigned int length;
+  char *symbol_name, *lazy_ptr_name;
+  char *local_label_0;
+  static int label = 0;
+
+  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
+  symb = (*targetm.strip_name_encoding) (symb);
+
+
+  length = strlen (symb);
+  symbol_name = XALLOCAVEC (char, length + 32);
+  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+  lazy_ptr_name = XALLOCAVEC (char, length + 32);
+  GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
+
+  if (flag_pic == 2)
+    switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
+  else
+    switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
+
+  if (flag_pic == 2)
+    {
+      fprintf (file, "\t.align 5\n");
+
+      fprintf (file, "%s:\n", stub);
+      fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+      label++;
+      local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
+      sprintf (local_label_0, "\"L%011d$spb\"", label);
+
+      fprintf (file, "\tmflr r0\n");
+      if (TARGET_LINK_STACK)
+	{
+	  char name[32];
+	  get_ppc476_thunk_name (name);
+	  fprintf (file, "\tbl %s\n", name);
+	  fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
+	}
+      else
+	{
+	  fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
+	  fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
+	}
+      fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
+	       lazy_ptr_name, local_label_0);
+      fprintf (file, "\tmtlr r0\n");
+      fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
+	       (TARGET_64BIT ? "ldu" : "lwzu"),
+	       lazy_ptr_name, local_label_0);
+      fprintf (file, "\tmtctr r12\n");
+      fprintf (file, "\tbctr\n");
+    }
+  else
+    {
+      fprintf (file, "\t.align 4\n");
+
+      fprintf (file, "%s:\n", stub);
+      fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+      fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
+      fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
+	       (TARGET_64BIT ? "ldu" : "lwzu"),
+	       lazy_ptr_name);
+      fprintf (file, "\tmtctr r12\n");
+      fprintf (file, "\tbctr\n");
+    }
+
+  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+  fprintf (file, "%s:\n", lazy_ptr_name);
+  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+  fprintf (file, "%sdyld_stub_binding_helper\n",
+	   (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
+}
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go into a reg.  This is REG if non
+   zero, otherwise we allocate register(s) as necessary.  */
+
+#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
+
+rtx
+rs6000_machopic_legitimize_pic_address (rtx orig, enum machine_mode mode,
+					rtx reg)
+{
+  rtx base, offset;
+
+  if (reg == NULL && ! reload_in_progress && ! reload_completed)
+    reg = gen_reg_rtx (Pmode);
+
+  if (GET_CODE (orig) == CONST)
+    {
+      rtx reg_temp;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+      /* Use a different reg for the intermediate value, as
+	 it will be marked UNCHANGING.  */
+      reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
+      base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
+						     Pmode, reg_temp);
+      offset =
+	rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
+						Pmode, reg);
+
+      if (GET_CODE (offset) == CONST_INT)
+	{
+	  if (SMALL_INT (offset))
+	    return plus_constant (Pmode, base, INTVAL (offset));
+	  else if (! reload_in_progress && ! reload_completed)
+	    offset = force_reg (Pmode, offset);
+	  else
+	    {
+ 	      rtx mem = force_const_mem (Pmode, orig);
+	      return machopic_legitimize_pic_address (mem, Pmode, reg);
+	    }
+	}
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  /* Fall back on generic machopic code.  */
+  return machopic_legitimize_pic_address (orig, mode, reg);
+}
+
+/* Output a .machine directive for the Darwin assembler, and call
+   the generic start_file routine.  */
+
+static void
+rs6000_darwin_file_start (void)
+{
+  static const struct
+  {
+    const char *arg;
+    const char *name;
+    HOST_WIDE_INT if_set;
+  } mapping[] = {
+    { "ppc64", "ppc64", MASK_64BIT },
+    { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
+    { "power4", "ppc970", 0 },
+    { "G5", "ppc970", 0 },
+    { "7450", "ppc7450", 0 },
+    { "7400", "ppc7400", MASK_ALTIVEC },
+    { "G4", "ppc7400", 0 },
+    { "750", "ppc750", 0 },
+    { "740", "ppc750", 0 },
+    { "G3", "ppc750", 0 },
+    { "604e", "ppc604e", 0 },
+    { "604", "ppc604", 0 },
+    { "603e", "ppc603", 0 },
+    { "603", "ppc603", 0 },
+    { "601", "ppc601", 0 },
+    { NULL, "ppc", 0 } };
+  const char *cpu_id = "";
+  size_t i;
+
+  rs6000_file_start ();
+  darwin_file_start ();
+
+  /* Determine the argument to -mcpu=.  Default to G3 if not specified.  */
+  
+  if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
+    cpu_id = rs6000_default_cpu;
+
+  if (global_options_set.x_rs6000_cpu_index)
+    cpu_id = processor_target_table[rs6000_cpu_index].name;
+
+  /* Look through the mapping array.  Pick the first name that either
+     matches the argument, has a bit set in IF_SET that is also set
+     in the target flags, or has a NULL name.  */
+
+  i = 0;
+  while (mapping[i].arg != NULL
+	 && strcmp (mapping[i].arg, cpu_id) != 0
+	 && (mapping[i].if_set & rs6000_isa_flags) == 0)
+    i++;
+
+  fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
+}
+
+#endif /* TARGET_MACHO */
+
+#if TARGET_ELF
+static int
+rs6000_elf_reloc_rw_mask (void)
+{
+  if (flag_pic)
+    return 3;
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    return 2;
+  else
+    return 0;
+}
+
+/* Record an element in the table of global constructors.  SYMBOL is
+   a SYMBOL_REF of the function to be called; PRIORITY is a number
+   between 0 and MAX_INIT_PRIORITY.
+
+   This differs from default_named_section_asm_out_constructor in
+   that we have special handling for -mrelocatable.  */
+
+static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
+static void
+rs6000_elf_asm_out_constructor (rtx symbol, int priority)
+{
+  const char *section = ".ctors";
+  char buf[16];
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".ctors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, SECTION_WRITE, NULL));
+  assemble_align (POINTER_SIZE);
+
+  if (TARGET_RELOCATABLE)
+    {
+      fputs ("\t.long (", asm_out_file);
+      output_addr_const (asm_out_file, symbol);
+      fputs (")@fixup\n", asm_out_file);
+    }
+  else
+    assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
+static void
+rs6000_elf_asm_out_destructor (rtx symbol, int priority)
+{
+  const char *section = ".dtors";
+  char buf[16];
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".dtors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, SECTION_WRITE, NULL));
+  assemble_align (POINTER_SIZE);
+
+  if (TARGET_RELOCATABLE)
+    {
+      fputs ("\t.long (", asm_out_file);
+      output_addr_const (asm_out_file, symbol);
+      fputs (")@fixup\n", asm_out_file);
+    }
+  else
+    assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+void
+rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
+{
+  if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
+    {
+      fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
+      ASM_OUTPUT_LABEL (file, name);
+      fputs (DOUBLE_INT_ASM_OP, file);
+      rs6000_output_function_entry (file, name);
+      fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
+      if (DOT_SYMBOLS)
+	{
+	  fputs ("\t.size\t", file);
+	  assemble_name (file, name);
+	  fputs (",24\n\t.type\t.", file);
+	  assemble_name (file, name);
+	  fputs (",@function\n", file);
+	  if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
+	    {
+	      fputs ("\t.globl\t.", file);
+	      assemble_name (file, name);
+	      putc ('\n', file);
+	    }
+	}
+      else
+	ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+      ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
+      rs6000_output_function_entry (file, name);
+      fputs (":\n", file);
+      return;
+    }
+
+  if (TARGET_RELOCATABLE
+      && !TARGET_SECURE_PLT
+      && (get_pool_size () != 0 || crtl->profile)
+      && uses_TOC ())
+    {
+      char buf[256];
+
+      (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
+
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
+      fprintf (file, "\t.long ");
+      assemble_name (file, buf);
+      putc ('-', file);
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
+      assemble_name (file, buf);
+      putc ('\n', file);
+    }
+
+  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+  ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
+
+  if (DEFAULT_ABI == ABI_AIX)
+    {
+      const char *desc_name, *orig_name;
+
+      orig_name = (*targetm.strip_name_encoding) (name);
+      desc_name = orig_name;
+      while (*desc_name == '.')
+	desc_name++;
+
+      if (TREE_PUBLIC (decl))
+	fprintf (file, "\t.globl %s\n", desc_name);
+
+      fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+      fprintf (file, "%s:\n", desc_name);
+      fprintf (file, "\t.long %s\n", orig_name);
+      fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
+      fputs ("\t.long 0\n", file);
+      fprintf (file, "\t.previous\n");
+    }
+  ASM_OUTPUT_LABEL (file, name);
+}
+
+static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
+static void
+rs6000_elf_file_end (void)
+{
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
+    {
+      if (rs6000_passes_float)
+	fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
+		 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1 
+		  : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3 
+		  : 2));
+      if (rs6000_passes_vector)
+	fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
+		 (TARGET_ALTIVEC_ABI ? 2
+		  : TARGET_SPE_ABI ? 3
+		  : 1));
+      if (rs6000_returns_struct)
+	fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
+		 aix_struct_return ? 2 : 1);
+    }
+#endif
+#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
+  if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
+    file_end_indicate_exec_stack ();
+#endif
+}
+#endif
+
+#if TARGET_XCOFF
+static void
+rs6000_xcoff_asm_output_anchor (rtx symbol)
+{
+  char buffer[100];
+
+  sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
+	   SYMBOL_REF_BLOCK_OFFSET (symbol));
+  ASM_OUTPUT_DEF (asm_out_file, XSTR (symbol, 0), buffer);
+}
+
+static void
+rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
+{
+  fputs (GLOBAL_ASM_OP, stream);
+  RS6000_OUTPUT_BASENAME (stream, name);
+  putc ('\n', stream);
+}
+
+/* A get_unnamed_decl callback, used for read-only sections.  PTR
+   points to the section string variable.  */
+
+static void
+rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
+{
+  fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
+	   *(const char *const *) directive,
+	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
+}
+
+/* Likewise for read-write sections.  */
+
+static void
+rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
+{
+  fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
+	   *(const char *const *) directive,
+	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
+}
+
+static void
+rs6000_xcoff_output_tls_section_asm_op (const void *directive)
+{
+  fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
+	   *(const char *const *) directive,
+	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
+}
+
+/* A get_unnamed_section callback, used for switching to toc_section.  */
+
+static void
+rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  if (TARGET_MINIMAL_TOC)
+    {
+      /* toc_section is always selected at least once from
+	 rs6000_xcoff_file_start, so this is guaranteed to
+	 always be defined once and only once in each file.  */
+      if (!toc_initialized)
+	{
+	  fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
+	  fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
+	  toc_initialized = 1;
+	}
+      fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
+	       (TARGET_32BIT ? "" : ",3"));
+    }
+  else
+    fputs ("\t.toc\n", asm_out_file);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+rs6000_xcoff_asm_init_sections (void)
+{
+  read_only_data_section
+    = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
+			   &xcoff_read_only_section_name);
+
+  private_data_section
+    = get_unnamed_section (SECTION_WRITE,
+			   rs6000_xcoff_output_readwrite_section_asm_op,
+			   &xcoff_private_data_section_name);
+
+  tls_data_section
+    = get_unnamed_section (SECTION_TLS,
+			   rs6000_xcoff_output_tls_section_asm_op,
+			   &xcoff_tls_data_section_name);
+
+  tls_private_data_section
+    = get_unnamed_section (SECTION_TLS,
+			   rs6000_xcoff_output_tls_section_asm_op,
+			   &xcoff_private_data_section_name);
+
+  read_only_private_data_section
+    = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
+			   &xcoff_private_data_section_name);
+
+  toc_section
+    = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
+
+  readonly_data_section = read_only_data_section;
+  exception_section = data_section;
+}
+
+static int
+rs6000_xcoff_reloc_rw_mask (void)
+{
+  return 3;
+}
+
+static void
+rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
+				tree decl ATTRIBUTE_UNUSED)
+{
+  int smclass;
+  static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
+
+  if (flags & SECTION_CODE)
+    smclass = 0;
+  else if (flags & SECTION_TLS)
+    smclass = 3;
+  else if (flags & SECTION_WRITE)
+    smclass = 2;
+  else
+    smclass = 1;
+
+  fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
+	   (flags & SECTION_CODE) ? "." : "",
+	   name, suffix[smclass], flags & SECTION_ENTSIZE);
+}
+
+#define IN_NAMED_SECTION(DECL) \
+  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
+   && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
+static section *
+rs6000_xcoff_select_section (tree decl, int reloc,
+			     unsigned HOST_WIDE_INT align)
+{
+  /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
+     named section.  */
+  if (align > BIGGEST_ALIGNMENT)
+    {
+      resolve_unique_section (decl, reloc, true);
+      if (IN_NAMED_SECTION (decl))
+	return get_named_section (decl, NULL, reloc);
+    }
+
+  if (decl_readonly_section (decl, reloc))
+    {
+      if (TREE_PUBLIC (decl))
+	return read_only_data_section;
+      else
+	return read_only_private_data_section;
+    }
+  else
+    {
+#if HAVE_AS_TLS
+      if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+	{
+	  if (TREE_PUBLIC (decl))
+	    return tls_data_section;
+	  else if (bss_initializer_p (decl))
+	    {
+	      /* Convert to COMMON to emit in BSS.  */
+	      DECL_COMMON (decl) = 1;
+	      return tls_comm_section;
+	    }
+	  else
+	    return tls_private_data_section;
+	}
+      else
+#endif
+	if (TREE_PUBLIC (decl))
+	return data_section;
+      else
+	return private_data_section;
+    }
+}
+
+static void
+rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
+{
+  const char *name;
+
+  /* Use select_section for private data and uninitialized data with
+     alignment <= BIGGEST_ALIGNMENT.  */
+  if (!TREE_PUBLIC (decl)
+      || DECL_COMMON (decl)
+      || (DECL_INITIAL (decl) == NULL_TREE
+	  && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
+      || DECL_INITIAL (decl) == error_mark_node
+      || (flag_zero_initialized_in_bss
+	  && initializer_zerop (DECL_INITIAL (decl))))
+    return;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = (*targetm.strip_name_encoding) (name);
+  DECL_SECTION_NAME (decl) = build_string (strlen (name), name);
+}
+
+/* Select section for constant in constant pool.
+
+   On RS/6000, all constants are in the private read-only data area.
+   However, if this is being placed in the TOC it must be output as a
+   toc entry.  */
+
+static section *
+rs6000_xcoff_select_rtx_section (enum machine_mode mode, rtx x,
+				 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
+    return toc_section;
+  else
+    return read_only_private_data_section;
+}
+
+/* Remove any trailing [DS] or the like from the symbol name.  */
+
+static const char *
+rs6000_xcoff_strip_name_encoding (const char *name)
+{
+  size_t len;
+  if (*name == '*')
+    name++;
+  len = strlen (name);
+  if (name[len - 1] == ']')
+    return ggc_alloc_string (name, len - 4);
+  else
+    return name;
+}
+
+/* Section attributes.  AIX is always PIC.  */
+
+static unsigned int
+rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int align;
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  /* Align to at least UNIT size.  */
+  if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
+    align = MIN_UNITS_PER_WORD;
+  else
+    /* Increase alignment of large objects if not already stricter.  */
+    align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
+		 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
+		 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
+
+  return flags | (exact_log2 (align) & SECTION_ENTSIZE);
+}
+
+/* Output at beginning of assembler file.
+
+   Initialize the section names for the RS/6000 at this point.
+
+   Specify filename, including full path, to assembler.
+
+   We want to go into the TOC section so at least one .toc will be emitted.
+   Also, in order to output proper .bs/.es pairs, we need at least one static
+   [RW] section emitted.
+
+   Finally, declare mcount when profiling to make the assembler happy.  */
+
+static void
+rs6000_xcoff_file_start (void)
+{
+  rs6000_gen_section_name (&xcoff_bss_section_name,
+			   main_input_filename, ".bss_");
+  rs6000_gen_section_name (&xcoff_private_data_section_name,
+			   main_input_filename, ".rw_");
+  rs6000_gen_section_name (&xcoff_read_only_section_name,
+			   main_input_filename, ".ro_");
+  rs6000_gen_section_name (&xcoff_tls_data_section_name,
+			   main_input_filename, ".tls_");
+  rs6000_gen_section_name (&xcoff_tbss_section_name,
+			   main_input_filename, ".tbss_[UL]");
+
+  fputs ("\t.file\t", asm_out_file);
+  output_quoted_string (asm_out_file, main_input_filename);
+  fputc ('\n', asm_out_file);
+  if (write_symbols != NO_DEBUG)
+    switch_to_section (private_data_section);
+  switch_to_section (text_section);
+  if (profile_flag)
+    fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
+  rs6000_file_start ();
+}
+
+/* Output at end of assembler file.
+   On the RS/6000, referencing data should automatically pull in text.  */
+
+static void
+rs6000_xcoff_file_end (void)
+{
+  switch_to_section (text_section);
+  fputs ("_section_.text:\n", asm_out_file);
+  switch_to_section (data_section);
+  fputs (TARGET_32BIT
+	 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
+	 asm_out_file);
+}
+
+#ifdef HAVE_AS_TLS
+static void
+rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx symbol;
+  int flags;
+
+  default_encode_section_info (decl, rtl, first);
+
+  /* Careful not to prod global register variables.  */
+  if (!MEM_P (rtl))
+    return;
+  symbol = XEXP (rtl, 0);
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return;
+
+  flags = SYMBOL_REF_FLAGS (symbol);
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+    flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
+
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+#endif /* HAVE_AS_TLS */
+#endif /* TARGET_XCOFF */
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		  int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+      /* On the RS/6000, if it is valid in the insn, it is free.  */
+    case CONST_INT:
+      if (((outer_code == SET
+	    || outer_code == PLUS
+	    || outer_code == MINUS)
+	   && (satisfies_constraint_I (x)
+	       || satisfies_constraint_L (x)))
+	  || (outer_code == AND
+	      && (satisfies_constraint_K (x)
+		  || (mode == SImode
+		      ? satisfies_constraint_L (x)
+		      : satisfies_constraint_J (x))
+		  || mask_operand (x, mode)
+		  || (mode == DImode
+		      && mask64_operand (x, DImode))))
+	  || ((outer_code == IOR || outer_code == XOR)
+	      && (satisfies_constraint_K (x)
+		  || (mode == SImode
+		      ? satisfies_constraint_L (x)
+		      : satisfies_constraint_J (x))))
+	  || outer_code == ASHIFT
+	  || outer_code == ASHIFTRT
+	  || outer_code == LSHIFTRT
+	  || outer_code == ROTATE
+	  || outer_code == ROTATERT
+	  || outer_code == ZERO_EXTRACT
+	  || (outer_code == MULT
+	      && satisfies_constraint_I (x))
+	  || ((outer_code == DIV || outer_code == UDIV
+	       || outer_code == MOD || outer_code == UMOD)
+	      && exact_log2 (INTVAL (x)) >= 0)
+	  || (outer_code == COMPARE
+	      && (satisfies_constraint_I (x)
+		  || satisfies_constraint_K (x)))
+	  || ((outer_code == EQ || outer_code == NE)
+	      && (satisfies_constraint_I (x)
+		  || satisfies_constraint_K (x)
+		  || (mode == SImode
+		      ? satisfies_constraint_L (x)
+		      : satisfies_constraint_J (x))))
+	  || (outer_code == GTU
+	      && satisfies_constraint_I (x))
+	  || (outer_code == LTU
+	      && satisfies_constraint_P (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      else if ((outer_code == PLUS
+		&& reg_or_add_cint_operand (x, VOIDmode))
+	       || (outer_code == MINUS
+		   && reg_or_sub_cint_operand (x, VOIDmode))
+	       || ((outer_code == SET
+		    || outer_code == IOR
+		    || outer_code == XOR)
+		   && (INTVAL (x)
+		       & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST_DOUBLE:
+    case CONST:
+    case HIGH:
+    case SYMBOL_REF:
+    case MEM:
+      /* When optimizing for size, MEM should be slightly more expensive
+	 than generating address, e.g., (plus (reg) (const)).
+	 L1 cache latency is about two instructions.  */
+      *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
+      return true;
+
+    case LABEL_REF:
+      *total = 0;
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (FLOAT_MODE_P (mode))
+	*total = rs6000_cost->fp;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case MULT:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && satisfies_constraint_I (XEXP (x, 1)))
+	{
+	  if (INTVAL (XEXP (x, 1)) >= -256
+	      && INTVAL (XEXP (x, 1)) <= 255)
+	    *total = rs6000_cost->mulsi_const9;
+	  else
+	    *total = rs6000_cost->mulsi_const;
+	}
+      else if (mode == SFmode)
+	*total = rs6000_cost->fp;
+      else if (FLOAT_MODE_P (mode))
+	*total = rs6000_cost->dmul;
+      else if (mode == DImode)
+	*total = rs6000_cost->muldi;
+      else
+	*total = rs6000_cost->mulsi;
+      return false;
+
+    case FMA:
+      if (mode == SFmode)
+	*total = rs6000_cost->fp;
+      else
+	*total = rs6000_cost->dmul;
+      break;
+
+    case DIV:
+    case MOD:
+      if (FLOAT_MODE_P (mode))
+	{
+	  *total = mode == DFmode ? rs6000_cost->ddiv
+				  : rs6000_cost->sdiv;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case UDIV:
+    case UMOD:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
+	{
+	  if (code == DIV || code == MOD)
+	    /* Shift, addze */
+	    *total = COSTS_N_INSNS (2);
+	  else
+	    /* Shift */
+	    *total = COSTS_N_INSNS (1);
+	}
+      else
+	{
+	  if (GET_MODE (XEXP (x, 1)) == DImode)
+	    *total = rs6000_cost->divdi;
+	  else
+	    *total = rs6000_cost->divsi;
+	}
+      /* Add in shift and subtract for MOD. */
+      if (code == MOD || code == UMOD)
+	*total += COSTS_N_INSNS (2);
+      return false;
+
+    case CTZ:
+    case FFS:
+      *total = COSTS_N_INSNS (4);
+      return false;
+
+    case POPCOUNT:
+      *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
+      return false;
+
+    case PARITY:
+      *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
+      return false;
+
+    case NOT:
+      if (outer_code == AND || outer_code == IOR || outer_code == XOR)
+	{
+	  *total = 0;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case AND:
+    case CLZ:
+    case IOR:
+    case XOR:
+    case ZERO_EXTRACT:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+      /* Handle mul_highpart.  */
+      if (outer_code == TRUNCATE
+	  && GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  if (mode == DImode)
+	    *total = rs6000_cost->muldi;
+	  else
+	    *total = rs6000_cost->mulsi;
+	  return true;
+	}
+      else if (outer_code == AND)
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      if (GET_CODE (XEXP (x, 0)) == MEM)
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case COMPARE:
+    case NEG:
+    case ABS:
+      if (!FLOAT_MODE_P (mode))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+    case FLOAT_TRUNCATE:
+      *total = rs6000_cost->fp;
+      return false;
+
+    case FLOAT_EXTEND:
+      if (mode == DFmode)
+	*total = 0;
+      else
+	*total = rs6000_cost->fp;
+      return false;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_FRSP:
+	  *total = rs6000_cost->fp;
+	  return true;
+
+	default:
+	  break;
+	}
+      break;
+
+    case CALL:
+    case IF_THEN_ELSE:
+      if (!speed)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else if (FLOAT_MODE_P (mode)
+	       && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
+	{
+	  *total = rs6000_cost->fp;
+	  return false;
+	}
+      break;
+
+    case EQ:
+    case GTU:
+    case LTU:
+      /* Carry bit requires mode == Pmode.
+	 NEG or PLUS already counted so only add one.  */
+      if (mode == Pmode
+	  && (outer_code == NEG || outer_code == PLUS))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      if (outer_code == SET)
+	{
+	  if (XEXP (x, 1) == const0_rtx)
+	    {
+	      if (TARGET_ISEL && !TARGET_MFCRF)
+		*total = COSTS_N_INSNS (8);
+	      else
+		*total = COSTS_N_INSNS (2);
+	      return true;
+	    }
+	  else if (mode == Pmode)
+	    {
+	      *total = COSTS_N_INSNS (3);
+	      return false;
+	    }
+	}
+      /* FALLTHRU */
+
+    case GT:
+    case LT:
+    case UNORDERED:
+      if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
+	{
+	  if (TARGET_ISEL && !TARGET_MFCRF)
+	    *total = COSTS_N_INSNS (8);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+      /* CC COMPARE.  */
+      if (outer_code == COMPARE)
+	{
+	  *total = 0;
+	  return true;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
+/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost.  */
+
+static bool
+rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
+			bool speed)
+{
+  bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
+
+  fprintf (stderr,
+	   "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
+	   "opno = %d, total = %d, speed = %s, x:\n",
+	   ret ? "complete" : "scan inner",
+	   GET_RTX_NAME (code),
+	   GET_RTX_NAME (outer_code),
+	   opno,
+	   *total,
+	   speed ? "true" : "false");
+
+  debug_rtx (x);
+
+  return ret;
+}
+
+/* Debug form of ADDRESS_COST that is selected if -mdebug=cost.  */
+
+static int
+rs6000_debug_address_cost (rtx x, enum machine_mode mode,
+			   addr_space_t as, bool speed)
+{
+  int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
+
+  fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
+	   ret, speed ? "true" : "false");
+  debug_rtx (x);
+
+  return ret;
+}
+
+
+/* A C expression returning the cost of moving data from a register of class
+   CLASS1 to one of CLASS2.  */
+
+static int
+rs6000_register_move_cost (enum machine_mode mode,
+			   reg_class_t from, reg_class_t to)
+{
+  int ret;
+
+  if (TARGET_DEBUG_COST)
+    dbg_cost_ctrl++;
+
+  /*  Moves from/to GENERAL_REGS.  */
+  if (reg_classes_intersect_p (to, GENERAL_REGS)
+      || reg_classes_intersect_p (from, GENERAL_REGS))
+    {
+      reg_class_t rclass = from;
+
+      if (! reg_classes_intersect_p (to, GENERAL_REGS))
+	rclass = to;
+
+      if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
+	ret = (rs6000_memory_move_cost (mode, rclass, false)
+	       + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+
+      /* It's more expensive to move CR_REGS than CR0_REGS because of the
+	 shift.  */
+      else if (rclass == CR_REGS)
+	ret = 4;
+
+      /* For those processors that have slow LR/CTR moves, make them more
+         expensive than memory in order to bias spills to memory .*/
+      else if ((rs6000_cpu == PROCESSOR_POWER6
+		|| rs6000_cpu == PROCESSOR_POWER7
+		|| rs6000_cpu == PROCESSOR_POWER8)
+	       && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
+        ret = 6 * hard_regno_nregs[0][mode];
+
+      else
+	/* A move will cost one instruction per GPR moved.  */
+	ret = 2 * hard_regno_nregs[0][mode];
+    }
+
+  /* If we have VSX, we can easily move between FPR or Altivec registers.  */
+  else if (VECTOR_MEM_VSX_P (mode)
+	   && reg_classes_intersect_p (to, VSX_REGS)
+	   && reg_classes_intersect_p (from, VSX_REGS))
+    ret = 2 * hard_regno_nregs[32][mode];
+
+  /* Moving between two similar registers is just one instruction.  */
+  else if (reg_classes_intersect_p (to, from))
+    ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
+
+  /* Everything else has to go through GENERAL_REGS.  */
+  else
+    ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
+	   + rs6000_register_move_cost (mode, from, GENERAL_REGS));
+
+  if (TARGET_DEBUG_COST)
+    {
+      if (dbg_cost_ctrl == 1)
+	fprintf (stderr,
+		 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
+		 ret, GET_MODE_NAME (mode), reg_class_names[from],
+		 reg_class_names[to]);
+      dbg_cost_ctrl--;
+    }
+
+  return ret;
+}
+
+/* A C expressions returning the cost of moving data of MODE from a register to
+   or from memory.  */
+
+static int
+rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+			 bool in ATTRIBUTE_UNUSED)
+{
+  int ret;
+
+  if (TARGET_DEBUG_COST)
+    dbg_cost_ctrl++;
+
+  if (reg_classes_intersect_p (rclass, GENERAL_REGS))
+    ret = 4 * hard_regno_nregs[0][mode];
+  else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
+	    || reg_classes_intersect_p (rclass, VSX_REGS)))
+    ret = 4 * hard_regno_nregs[32][mode];
+  else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
+    ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
+  else
+    ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
+
+  if (TARGET_DEBUG_COST)
+    {
+      if (dbg_cost_ctrl == 1)
+	fprintf (stderr,
+		 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
+		 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
+      dbg_cost_ctrl--;
+    }
+
+  return ret;
+}
+
+/* Returns a code for a target-specific builtin that implements
+   reciprocal of the function, or NULL_TREE if not available.  */
+
+static tree
+rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
+			   bool sqrt ATTRIBUTE_UNUSED)
+{
+  if (optimize_insn_for_size_p ())
+    return NULL_TREE;
+
+  if (md_fn)
+    switch (fn)
+      {
+      case VSX_BUILTIN_XVSQRTDP:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
+
+      case VSX_BUILTIN_XVSQRTSP:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
+
+      default:
+	return NULL_TREE;
+      }
+
+  else
+    switch (fn)
+      {
+      case BUILT_IN_SQRT:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
+
+      case BUILT_IN_SQRTF:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
+
+      default:
+	return NULL_TREE;
+      }
+}
+
+/* Load up a constant.  If the mode is a vector mode, splat the value across
+   all of the vector elements.  */
+
+static rtx
+rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
+{
+  rtx reg;
+
+  if (mode == SFmode || mode == DFmode)
+    {
+      rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
+      reg = force_reg (mode, d);
+    }
+  else if (mode == V4SFmode)
+    {
+      rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
+      rtvec v = gen_rtvec (4, d, d, d, d);
+      reg = gen_reg_rtx (mode);
+      rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
+    }
+  else if (mode == V2DFmode)
+    {
+      rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
+      rtvec v = gen_rtvec (2, d, d);
+      reg = gen_reg_rtx (mode);
+      rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
+    }
+  else
+    gcc_unreachable ();
+
+  return reg;
+}
+
+/* Generate an FMA instruction.  */
+
+static void
+rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
+{
+  enum machine_mode mode = GET_MODE (target);
+  rtx dst;
+
+  dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
+  gcc_assert (dst != NULL);
+
+  if (dst != target)
+    emit_move_insn (target, dst);
+}
+
+/* Generate a FMSUB instruction: dst = fma(m1, m2, -a).  */
+
+static void
+rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
+{
+  enum machine_mode mode = GET_MODE (target);
+  rtx dst;
+
+  /* Altivec does not support fms directly;
+     generate in terms of fma in that case.  */
+  if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+    dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
+  else
+    {
+      a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
+      dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
+    }
+  gcc_assert (dst != NULL);
+
+  if (dst != target)
+    emit_move_insn (target, dst);
+}
+    
+/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a).  */
+
+static void
+rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx r;
+
+  /* This is a tad more complicated, since the fnma_optab is for
+     a different expression: fma(-m1, m2, a), which is the same
+     thing except in the case of signed zeros.
+
+     Fortunately we know that if FMA is supported that FNMSUB is
+     also supported in the ISA.  Just expand it directly.  */
+
+  gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
+
+  r = gen_rtx_NEG (mode, a);
+  r = gen_rtx_FMA (mode, m1, m2, r);
+  r = gen_rtx_NEG (mode, r);
+  emit_insn (gen_rtx_SET (VOIDmode, dst, r));
+}
+
+/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
+   add a reg_note saying that this was a division.  Support both scalar and
+   vector divide.  Assumes no trapping math and finite arguments.  */
+
+void
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
+  int i;
+
+  /* Low precision estimates guarantee 5 bits of accuracy.  High
+     precision estimates guarantee 14 bits of accuracy.  SFmode
+     requires 23 bits of accuracy.  DFmode requires 52 bits of
+     accuracy.  Each pass at least doubles the accuracy, leading
+     to the following.  */
+  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+  if (mode == DFmode || mode == V2DFmode)
+    passes++;
+
+  enum insn_code code = optab_handler (smul_optab, mode);
+  insn_gen_fn gen_mul = GEN_FCN (code);
+
+  gcc_assert (code != CODE_FOR_nothing);
+
+  one = rs6000_load_constant_and_splat (mode, dconst1);
+
+  /* x0 = 1./d estimate */
+  x0 = gen_reg_rtx (mode);
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
+					  UNSPEC_FRES)));
+
+  /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
+  if (passes > 1) {
+
+    /* e0 = 1. - d * x0  */
+    e0 = gen_reg_rtx (mode);
+    rs6000_emit_nmsub (e0, d, x0, one);
+
+    /* x1 = x0 + e0 * x0  */
+    x1 = gen_reg_rtx (mode);
+    rs6000_emit_madd (x1, e0, x0, x0);
+
+    for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
+	 ++i, xprev = xnext, eprev = enext) {
+      
+      /* enext = eprev * eprev  */
+      enext = gen_reg_rtx (mode);
+      emit_insn (gen_mul (enext, eprev, eprev));
+
+      /* xnext = xprev + enext * xprev  */
+      xnext = gen_reg_rtx (mode);
+      rs6000_emit_madd (xnext, enext, xprev, xprev);
+    }
+
+  } else
+    xprev = x0;
+
+  /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
+
+  /* u = n * xprev  */
+  u = gen_reg_rtx (mode);
+  emit_insn (gen_mul (u, n, xprev));
+
+  /* v = n - (d * u)  */
+  v = gen_reg_rtx (mode);
+  rs6000_emit_nmsub (v, d, u, n);
+
+  /* dst = (v * xprev) + u  */
+  rs6000_emit_madd (dst, v, xprev, u);
+
+  if (note_p)
+    add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
+}
+
+/* Newton-Raphson approximation of single/double-precision floating point
+   rsqrt.  Assumes no trapping math and finite arguments.  */
+
+void
+rs6000_emit_swrsqrt (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (src);
+  rtx x0 = gen_reg_rtx (mode);
+  rtx y = gen_reg_rtx (mode);
+
+  /* Low precision estimates guarantee 5 bits of accuracy.  High
+     precision estimates guarantee 14 bits of accuracy.  SFmode
+     requires 23 bits of accuracy.  DFmode requires 52 bits of
+     accuracy.  Each pass at least doubles the accuracy, leading
+     to the following.  */
+  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+  if (mode == DFmode || mode == V2DFmode)
+    passes++;
+
+  REAL_VALUE_TYPE dconst3_2;
+  int i;
+  rtx halfthree;
+  enum insn_code code = optab_handler (smul_optab, mode);
+  insn_gen_fn gen_mul = GEN_FCN (code);
+
+  gcc_assert (code != CODE_FOR_nothing);
+
+  /* Load up the constant 1.5 either as a scalar, or as a vector.  */
+  real_from_integer (&dconst3_2, VOIDmode, 3, 0, 0);
+  SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
+
+  halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
+
+  /* x0 = rsqrt estimate */
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
+					  UNSPEC_RSQRT)));
+
+  /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
+  rs6000_emit_msub (y, src, halfthree, src);
+
+  for (i = 0; i < passes; i++)
+    {
+      rtx x1 = gen_reg_rtx (mode);
+      rtx u = gen_reg_rtx (mode);
+      rtx v = gen_reg_rtx (mode);
+
+      /* x1 = x0 * (1.5 - y * (x0 * x0)) */
+      emit_insn (gen_mul (u, x0, x0));
+      rs6000_emit_nmsub (v, y, u, halfthree);
+      emit_insn (gen_mul (x1, x0, v));
+      x0 = x1;
+    }
+
+  emit_move_insn (dst, x0);
+  return;
+}
+
+/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
+   (Power7) targets.  DST is the target, and SRC is the argument operand.  */
+
+void
+rs6000_emit_popcount (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx tmp1, tmp2;
+
+  /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can.  */
+  if (TARGET_POPCNTD)
+    {
+      if (mode == SImode)
+	emit_insn (gen_popcntdsi2 (dst, src));
+      else
+	emit_insn (gen_popcntddi2 (dst, src));
+      return;
+    }
+
+  tmp1 = gen_reg_rtx (mode);
+
+  if (mode == SImode)
+    {
+      emit_insn (gen_popcntbsi2 (tmp1, src));
+      tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
+			   NULL_RTX, 0);
+      tmp2 = force_reg (SImode, tmp2);
+      emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
+    }
+  else
+    {
+      emit_insn (gen_popcntbdi2 (tmp1, src));
+      tmp2 = expand_mult (DImode, tmp1,
+			  GEN_INT ((HOST_WIDE_INT)
+				   0x01010101 << 32 | 0x01010101),
+			  NULL_RTX, 0);
+      tmp2 = force_reg (DImode, tmp2);
+      emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
+    }
+}
+
+
+/* Emit parity intrinsic on TARGET_POPCNTB targets.  DST is the
+   target, and SRC is the argument operand.  */
+
+void
+rs6000_emit_parity (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx tmp;
+
+  tmp = gen_reg_rtx (mode);
+
+  /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
+  if (TARGET_CMPB)
+    {
+      if (mode == SImode)
+	{
+	  emit_insn (gen_popcntbsi2 (tmp, src));
+	  emit_insn (gen_paritysi2_cmpb (dst, tmp));
+	}
+      else
+	{
+	  emit_insn (gen_popcntbdi2 (tmp, src));
+	  emit_insn (gen_paritydi2_cmpb (dst, tmp));
+	}
+      return;
+    }
+
+  if (mode == SImode)
+    {
+      /* Is mult+shift >= shift+xor+shift+xor?  */
+      if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
+	{
+	  rtx tmp1, tmp2, tmp3, tmp4;
+
+	  tmp1 = gen_reg_rtx (SImode);
+	  emit_insn (gen_popcntbsi2 (tmp1, src));
+
+	  tmp2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
+	  tmp3 = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
+
+	  tmp4 = gen_reg_rtx (SImode);
+	  emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
+	  emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
+	}
+      else
+	rs6000_emit_popcount (tmp, src);
+      emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
+    }
+  else
+    {
+      /* Is mult+shift >= shift+xor+shift+xor+shift+xor?  */
+      if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
+	{
+	  rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+
+	  tmp1 = gen_reg_rtx (DImode);
+	  emit_insn (gen_popcntbdi2 (tmp1, src));
+
+	  tmp2 = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
+	  tmp3 = gen_reg_rtx (DImode);
+	  emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
+
+	  tmp4 = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
+	  tmp5 = gen_reg_rtx (DImode);
+	  emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
+
+	  tmp6 = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
+	  emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
+	}
+      else
+        rs6000_emit_popcount (tmp, src);
+      emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
+    }
+}
+
+/* Expand an Altivec constant permutation for little endian mode.
+   There are two issues: First, the two input operands must be
+   swapped so that together they form a double-wide array in LE
+   order.  Second, the vperm instruction has surprising behavior
+   in LE mode:  it interprets the elements of the source vectors
+   in BE mode ("left to right") and interprets the elements of
+   the destination vector in LE mode ("right to left").  To
+   correct for this, we must subtract each element of the permute
+   control vector from 31.
+
+   For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
+   with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
+   We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
+   serve as the permute control vector.  Then, in BE mode,
+
+     vperm 9,10,11,12
+
+   places the desired result in vr9.  However, in LE mode the 
+   vector contents will be
+
+     vr10 = 00000003 00000002 00000001 00000000
+     vr11 = 00000007 00000006 00000005 00000004
+
+   The result of the vperm using the same permute control vector is
+
+     vr9  = 05000000 07000000 01000000 03000000
+
+   That is, the leftmost 4 bytes of vr10 are interpreted as the
+   source for the rightmost 4 bytes of vr9, and so on.
+
+   If we change the permute control vector to
+
+     vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
+
+   and issue
+
+     vperm 9,11,10,12
+
+   we get the desired
+
+   vr9  = 00000006 00000004 00000002 00000000.  */
+
+void
+altivec_expand_vec_perm_const_le (rtx operands[4])
+{
+  unsigned int i;
+  rtx perm[16];
+  rtx constv, unspec;
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx sel = operands[3];
+
+  /* Unpack and adjust the constant selector.  */
+  for (i = 0; i < 16; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      unsigned int elt = 31 - (INTVAL (e) & 31);
+      perm[i] = GEN_INT (elt);
+    }
+
+  /* Expand to a permute, swapping the inputs and using the
+     adjusted selector.  */
+  if (!REG_P (op0))
+    op0 = force_reg (V16QImode, op0);
+  if (!REG_P (op1))
+    op1 = force_reg (V16QImode, op1);
+
+  constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+  constv = force_reg (V16QImode, constv);
+  unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
+			   UNSPEC_VPERM);
+  if (!REG_P (target))
+    {
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_move_insn (tmp, unspec);
+      unspec = tmp;
+    }
+
+  emit_move_insn (target, unspec);
+}
+
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
+   permute control vector.  But here it's not a constant, so we must
+   generate a vector NOR to do the adjustment.  */
+
+void
+altivec_expand_vec_perm_le (rtx operands[4])
+{
+  rtx notx, andx, unspec;
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx sel = operands[3];
+  rtx tmp = target;
+  rtx norreg = gen_reg_rtx (V16QImode);
+  enum machine_mode mode = GET_MODE (target);
+
+  /* Get everything in regs so the pattern matches.  */
+  if (!REG_P (op0))
+    op0 = force_reg (mode, op0);
+  if (!REG_P (op1))
+    op1 = force_reg (mode, op1);
+  if (!REG_P (sel))
+    sel = force_reg (V16QImode, sel);
+  if (!REG_P (target))
+    tmp = gen_reg_rtx (mode);
+
+  /* Invert the selector with a VNOR.  */
+  notx = gen_rtx_NOT (V16QImode, sel);
+  andx = gen_rtx_AND (V16QImode, notx, notx);
+  emit_move_insn (norreg, andx);
+
+  /* Permute with operands reversed and adjusted selector.  */
+  unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
+			   UNSPEC_VPERM);
+
+  /* Copy into target, possibly by way of a register.  */
+  if (!REG_P (target))
+    {
+      emit_move_insn (tmp, unspec);
+      unspec = tmp;
+    }
+
+  emit_move_insn (target, unspec);
+}
+
+/* Expand an Altivec constant permutation.  Return true if we match
+   an efficient implementation; false to fall back to VPERM.  */
+
+bool
+altivec_expand_vec_perm_const (rtx operands[4])
+{
+  struct altivec_perm_insn {
+    HOST_WIDE_INT mask;
+    enum insn_code impl;
+    unsigned char perm[16];
+  };
+  static const struct altivec_perm_insn patterns[] = {
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
+      {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
+      {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
+    { OPTION_MASK_ALTIVEC, 
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
+       : CODE_FOR_altivec_vmrglb_direct),
+      {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
+       : CODE_FOR_altivec_vmrglh_direct),
+      {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
+       : CODE_FOR_altivec_vmrglw_direct),
+      {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
+       : CODE_FOR_altivec_vmrghb_direct),
+      {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
+       : CODE_FOR_altivec_vmrghh_direct),
+      {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
+       : CODE_FOR_altivec_vmrghw_direct),
+      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+      {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+      {  4,  5,  6,  7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
+  };
+
+  unsigned int i, j, elt, which;
+  unsigned char perm[16];
+  rtx target, op0, op1, sel, x;
+  bool one_vec;
+
+  target = operands[0];
+  op0 = operands[1];
+  op1 = operands[2];
+  sel = operands[3];
+
+  /* Unpack the constant selector.  */
+  for (i = which = 0; i < 16; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      elt = INTVAL (e) & 31;
+      which |= (elt < 16 ? 1 : 2);
+      perm[i] = elt;
+    }
+
+  /* Simplify the constant selector based on operands.  */
+  switch (which)
+    {
+    default:
+      gcc_unreachable ();
+
+    case 3:
+      one_vec = false;
+      if (!rtx_equal_p (op0, op1))
+	break;
+      /* FALLTHRU */
+
+    case 2:
+      for (i = 0; i < 16; ++i)
+	perm[i] &= 15;
+      op0 = op1;
+      one_vec = true;
+      break;
+
+    case 1:
+      op1 = op0;
+      one_vec = true;
+      break;
+    }
+ 
+  /* Look for splat patterns.  */
+  if (one_vec)
+    {
+      elt = perm[0];
+
+      for (i = 0; i < 16; ++i)
+	if (perm[i] != elt)
+	  break;
+      if (i == 16)
+	{
+          if (!BYTES_BIG_ENDIAN)
+            elt = 15 - elt;
+	  emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
+	  return true;
+	}
+
+      if (elt % 2 == 0)
+	{
+	  for (i = 0; i < 16; i += 2)
+	    if (perm[i] != elt || perm[i + 1] != elt + 1)
+	      break;
+	  if (i == 16)
+	    {
+	      int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
+	      x = gen_reg_rtx (V8HImode);
+	      emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
+						    GEN_INT (field)));
+	      emit_move_insn (target, gen_lowpart (V16QImode, x));
+	      return true;
+	    }
+	}
+
+      if (elt % 4 == 0)
+	{
+	  for (i = 0; i < 16; i += 4)
+	    if (perm[i] != elt
+		|| perm[i + 1] != elt + 1
+		|| perm[i + 2] != elt + 2
+		|| perm[i + 3] != elt + 3)
+	      break;
+	  if (i == 16)
+	    {
+	      int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
+	      x = gen_reg_rtx (V4SImode);
+	      emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
+						    GEN_INT (field)));
+	      emit_move_insn (target, gen_lowpart (V16QImode, x));
+	      return true;
+	    }
+	}
+    }
+
+  /* Look for merge and pack patterns.  */
+  for (j = 0; j < ARRAY_SIZE (patterns); ++j)
+    {
+      bool swapped;
+
+      if ((patterns[j].mask & rs6000_isa_flags) == 0)
+	continue;
+
+      elt = patterns[j].perm[0];
+      if (perm[0] == elt)
+	swapped = false;
+      else if (perm[0] == elt + 16)
+	swapped = true;
+      else
+	continue;
+      for (i = 1; i < 16; ++i)
+	{
+	  elt = patterns[j].perm[i];
+	  if (swapped)
+	    elt = (elt >= 16 ? elt - 16 : elt + 16);
+	  else if (one_vec && elt >= 16)
+	    elt -= 16;
+	  if (perm[i] != elt)
+	    break;
+	}
+      if (i == 16)
+	{
+	  enum insn_code icode = patterns[j].impl;
+	  enum machine_mode omode = insn_data[icode].operand[0].mode;
+	  enum machine_mode imode = insn_data[icode].operand[1].mode;
+
+	  /* For little-endian, don't use vpkuwum and vpkuhum if the
+	     underlying vector type is not V4SI and V8HI, respectively.
+	     For example, using vpkuwum with a V8HI picks up the even
+	     halfwords (BE numbering) when the even halfwords (LE
+	     numbering) are what we need.  */
+	  if (!BYTES_BIG_ENDIAN
+	      && icode == CODE_FOR_altivec_vpkuwum_direct
+	      && ((GET_CODE (op0) == REG
+		   && GET_MODE (op0) != V4SImode)
+		  || (GET_CODE (op0) == SUBREG
+		      && GET_MODE (XEXP (op0, 0)) != V4SImode)))
+	    continue;
+	  if (!BYTES_BIG_ENDIAN
+	      && icode == CODE_FOR_altivec_vpkuhum_direct
+	      && ((GET_CODE (op0) == REG
+		   && GET_MODE (op0) != V8HImode)
+		  || (GET_CODE (op0) == SUBREG
+		      && GET_MODE (XEXP (op0, 0)) != V8HImode)))
+	    continue;
+
+          /* For little-endian, the two input operands must be swapped
+             (or swapped back) to ensure proper right-to-left numbering
+             from 0 to 2N-1.  */
+	  if (swapped ^ !BYTES_BIG_ENDIAN)
+	    x = op0, op0 = op1, op1 = x;
+	  if (imode != V16QImode)
+	    {
+	      op0 = gen_lowpart (imode, op0);
+	      op1 = gen_lowpart (imode, op1);
+	    }
+	  if (omode == V16QImode)
+	    x = target;
+	  else
+	    x = gen_reg_rtx (omode);
+	  emit_insn (GEN_FCN (icode) (x, op0, op1));
+	  if (omode != V16QImode)
+	    emit_move_insn (target, gen_lowpart (V16QImode, x));
+	  return true;
+	}
+    }
+
+  if (!BYTES_BIG_ENDIAN)
+    {
+      altivec_expand_vec_perm_const_le (operands);
+      return true;
+    }
+
+  return false;
+}
+
+/* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
+   Return true if we match an efficient implementation.  */
+
+static bool
+rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
+				unsigned char perm0, unsigned char perm1)
+{
+  rtx x;
+
+  /* If both selectors come from the same operand, fold to single op.  */
+  if ((perm0 & 2) == (perm1 & 2))
+    {
+      if (perm0 & 2)
+	op0 = op1;
+      else
+	op1 = op0;
+    }
+  /* If both operands are equal, fold to simpler permutation.  */
+  if (rtx_equal_p (op0, op1))
+    {
+      perm0 = perm0 & 1;
+      perm1 = (perm1 & 1) + 2;
+    }
+  /* If the first selector comes from the second operand, swap.  */
+  else if (perm0 & 2)
+    {
+      if (perm1 & 2)
+	return false;
+      perm0 -= 2;
+      perm1 += 2;
+      x = op0, op0 = op1, op1 = x;
+    }
+  /* If the second selector does not come from the second operand, fail.  */
+  else if ((perm1 & 2) == 0)
+    return false;
+
+  /* Success! */
+  if (target != NULL)
+    {
+      enum machine_mode vmode, dmode;
+      rtvec v;
+
+      vmode = GET_MODE (target);
+      gcc_assert (GET_MODE_NUNITS (vmode) == 2);
+      dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
+      x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
+      v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
+      x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
+      emit_insn (gen_rtx_SET (VOIDmode, target, x));
+    }
+  return true;
+}
+
+bool
+rs6000_expand_vec_perm_const (rtx operands[4])
+{
+  rtx target, op0, op1, sel;
+  unsigned char perm0, perm1;
+
+  target = operands[0];
+  op0 = operands[1];
+  op1 = operands[2];
+  sel = operands[3];
+
+  /* Unpack the constant selector.  */
+  perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
+  perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
+
+  return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
+}
+
+/* Test whether a constant permutation is supported.  */
+
+static bool
+rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				    const unsigned char *sel)
+{
+  /* AltiVec (and thus VSX) can handle arbitrary permutations.  */
+  if (TARGET_ALTIVEC)
+    return true;
+
+  /* Check for ps_merge* or evmerge* insns.  */
+  if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
+      || (TARGET_SPE && vmode == V2SImode))
+    {
+      rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
+      rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
+      return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
+    }
+
+  return false;
+}
+
+/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.  */
+
+static void
+rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
+			   enum machine_mode vmode, unsigned nelt, rtx perm[])
+{
+  enum machine_mode imode;
+  rtx x;
+
+  imode = vmode;
+  if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
+    {
+      imode = GET_MODE_INNER (vmode);
+      imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
+      imode = mode_for_vector (imode, nelt);
+    }
+
+  x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
+  x = expand_vec_perm (vmode, op0, op1, x, target);
+  if (x != target)
+    emit_move_insn (target, x);
+}
+
+/* Expand an extract even operation.  */
+
+void
+rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  unsigned i, nelt = GET_MODE_NUNITS (vmode);
+  rtx perm[16];
+
+  for (i = 0; i < nelt; i++)
+    perm[i] = GEN_INT (i * 2);
+
+  rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
+}
+
+/* Expand a vector interleave operation.  */
+
+void
+rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
+  rtx perm[16];
+
+  high = (highp ? 0 : nelt / 2);
+  for (i = 0; i < nelt / 2; i++)
+    {
+      perm[i * 2] = GEN_INT (i + high);
+      perm[i * 2 + 1] = GEN_INT (i + nelt + high);
+    }
+
+  rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
+}
+
+/* Return an RTX representing where to find the function value of a
+   function returning MODE.  */
+static rtx
+rs6000_complex_function_value (enum machine_mode mode)
+{
+  unsigned int regno;
+  rtx r1, r2;
+  enum machine_mode inner = GET_MODE_INNER (mode);
+  unsigned int inner_bytes = GET_MODE_SIZE (inner);
+
+  if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
+    regno = FP_ARG_RETURN;
+  else
+    {
+      regno = GP_ARG_RETURN;
+
+      /* 32-bit is OK since it'll go in r3/r4.  */
+      if (TARGET_32BIT && inner_bytes >= 4)
+	return gen_rtx_REG (mode, regno);
+    }
+
+  if (inner_bytes >= 8)
+    return gen_rtx_REG (mode, regno);
+
+  r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
+			  const0_rtx);
+  r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
+			  GEN_INT (inner_bytes));
+  return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
+}
+
+/* Target hook for TARGET_FUNCTION_VALUE.
+
+   On the SPE, both FPs and vectors are returned in r3.
+
+   On RS/6000 an integer value is in r3 and a floating-point value is in
+   fp1, unless -msoft-float.  */
+
+static rtx
+rs6000_function_value (const_tree valtype,
+		       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		       bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  unsigned int regno;
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  /* Special handling for structs in darwin64.  */
+  if (TARGET_MACHO 
+      && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
+    {
+      CUMULATIVE_ARGS valcum;
+      rtx valret;
+
+      valcum.words = 0;
+      valcum.fregno = FP_ARG_MIN_REG;
+      valcum.vregno = ALTIVEC_ARG_MIN_REG;
+      /* Do a trial code generation as if this were going to be passed as
+	 an argument; if any part goes in memory, we return NULL.  */
+      valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
+      if (valret)
+	return valret;
+      /* Otherwise fall through to standard ABI rules.  */
+    }
+
+  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers.  */
+  if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
+					     &elt_mode, &n_elts))
+    {
+      int first_reg, n_regs, i;
+      rtx par;
+
+      if (SCALAR_FLOAT_MODE_P (elt_mode))
+	{
+	  /* _Decimal128 must use even/odd register pairs.  */
+	  first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+	  n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+	}
+      else
+	{
+	  first_reg = ALTIVEC_ARG_RETURN;
+	  n_regs = 1;
+	}
+
+      par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
+      for (i = 0; i < n_elts; i++)
+	{
+	  rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
+	  rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	}
+
+      return par;
+    }
+
+  if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
+    {
+      /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
+      return gen_rtx_PARALLEL (DImode,
+	gen_rtvec (2,
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode, GP_ARG_RETURN),
+				      const0_rtx),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 1),
+				      GEN_INT (4))));
+    }
+  if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
+    {
+      return gen_rtx_PARALLEL (DCmode,
+	gen_rtvec (4,
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode, GP_ARG_RETURN),
+				      const0_rtx),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 1),
+				      GEN_INT (4)),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 2),
+				      GEN_INT (8)),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 3),
+				      GEN_INT (12))));
+    }
+
+  mode = TYPE_MODE (valtype);
+  if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
+      || POINTER_TYPE_P (valtype))
+    mode = TARGET_32BIT ? SImode : DImode;
+
+  if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
+    /* _Decimal128 must use an even/odd register pair.  */
+    regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+  else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
+	   && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
+    regno = FP_ARG_RETURN;
+  else if (TREE_CODE (valtype) == COMPLEX_TYPE
+	   && targetm.calls.split_complex_arg)
+    return rs6000_complex_function_value (mode);
+  /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
+     return register is used in both cases, and we won't see V2DImode/V2DFmode
+     for pure altivec, combine the two cases.  */
+  else if (TREE_CODE (valtype) == VECTOR_TYPE
+	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
+	   && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
+    regno = ALTIVEC_ARG_RETURN;
+  else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
+	   && (mode == DFmode || mode == DCmode
+	       || mode == TFmode || mode == TCmode))
+    return spe_build_register_parallel (mode, GP_ARG_RETURN);
+  else
+    regno = GP_ARG_RETURN;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+rtx
+rs6000_libcall_value (enum machine_mode mode)
+{
+  unsigned int regno;
+
+  if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
+    {
+      /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
+      return gen_rtx_PARALLEL (DImode,
+	gen_rtvec (2,
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode, GP_ARG_RETURN),
+				      const0_rtx),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 1),
+				      GEN_INT (4))));
+    }
+
+  if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
+    /* _Decimal128 must use an even/odd register pair.  */
+    regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+  else if (SCALAR_FLOAT_MODE_P (mode)
+	   && TARGET_HARD_FLOAT && TARGET_FPRS
+           && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
+    regno = FP_ARG_RETURN;
+  /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
+     return register is used in both cases, and we won't see V2DImode/V2DFmode
+     for pure altivec, combine the two cases.  */
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
+    regno = ALTIVEC_ARG_RETURN;
+  else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
+    return rs6000_complex_function_value (mode);
+  else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
+	   && (mode == DFmode || mode == DCmode
+	       || mode == TFmode || mode == TCmode))
+    return spe_build_register_parallel (mode, GP_ARG_RETURN);
+  else
+    regno = GP_ARG_RETURN;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+rs6000_lra_p (void)
+{
+  return rs6000_lra_flag;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   For the RS/6000, if frame pointer elimination is being done, we would like
+   to convert ap into fp, not sp.
+
+   We need r30 if -mminimal-toc was specified, and there are constant pool
+   references.  */
+
+static bool
+rs6000_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : from == RS6000_PIC_OFFSET_TABLE_REGNUM
+            ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
+            : true);
+}
+
+/* Define the offset between two registers, FROM to be eliminated and its
+   replacement TO, at the start of a routine.  */
+HOST_WIDE_INT
+rs6000_initial_elimination_offset (int from, int to)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+  HOST_WIDE_INT offset;
+
+  if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    offset = info->push_p ? 0 : -info->total_size;
+  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      offset = info->push_p ? 0 : -info->total_size;
+      if (FRAME_GROWS_DOWNWARD)
+	offset += info->fixed_size + info->vars_size + info->parm_size;
+    }
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    offset = FRAME_GROWS_DOWNWARD
+	     ? info->fixed_size + info->vars_size + info->parm_size
+	     : 0;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    offset = info->total_size;
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    offset = info->push_p ? info->total_size : 0;
+  else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
+    offset = 0;
+  else
+    gcc_unreachable ();
+
+  return offset;
+}
+
+static rtx
+rs6000_dwarf_register_span (rtx reg)
+{
+  rtx parts[8];
+  int i, words;
+  unsigned regno = REGNO (reg);
+  enum machine_mode mode = GET_MODE (reg);
+
+  if (TARGET_SPE
+      && regno < 32
+      && (SPE_VECTOR_MODE (GET_MODE (reg))
+	  || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
+	      && mode != SFmode && mode != SDmode && mode != SCmode)))
+    ;
+  else
+    return NULL_RTX;
+
+  regno = REGNO (reg);
+
+  /* The duality of the SPE register size wreaks all kinds of havoc.
+     This is a way of distinguishing r0 in 32-bits from r0 in
+     64-bits.  */
+  words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
+  gcc_assert (words <= 4);
+  for (i = 0; i < words; i++, regno++)
+    {
+      if (BYTES_BIG_ENDIAN)
+	{
+	  parts[2 * i] = gen_rtx_REG (SImode, regno + 1200);
+	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
+	}
+      else
+	{
+	  parts[2 * i] = gen_rtx_REG (SImode, regno);
+	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno + 1200);
+	}
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
+}
+
+/* Fill in sizes for SPE register high parts in table used by unwinder.  */
+
+static void
+rs6000_init_dwarf_reg_sizes_extra (tree address)
+{
+  if (TARGET_SPE)
+    {
+      int i;
+      enum machine_mode mode = TYPE_MODE (char_type_node);
+      rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      rtx mem = gen_rtx_MEM (BLKmode, addr);
+      rtx value = gen_int_mode (4, mode);
+
+      for (i = 1201; i < 1232; i++)
+	{
+	  int column = DWARF_REG_TO_UNWIND_COLUMN (i);
+	  HOST_WIDE_INT offset
+	    = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
+
+	  emit_move_insn (adjust_address (mem, mode, offset), value);
+	}
+    }
+
+  if (TARGET_MACHO && ! TARGET_ALTIVEC)
+    {
+      int i;
+      enum machine_mode mode = TYPE_MODE (char_type_node);
+      rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      rtx mem = gen_rtx_MEM (BLKmode, addr);
+      rtx value = gen_int_mode (16, mode);
+
+      /* On Darwin, libgcc may be built to run on both G3 and G4/5.
+	 The unwinder still needs to know the size of Altivec registers.  */
+
+      for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
+	{
+	  int column = DWARF_REG_TO_UNWIND_COLUMN (i);
+	  HOST_WIDE_INT offset
+	    = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
+
+	  emit_move_insn (adjust_address (mem, mode, offset), value);
+	}
+    }
+}
+
+/* Map internal gcc register numbers to DWARF2 register numbers.  */
+
+unsigned int
+rs6000_dbx_register_number (unsigned int regno)
+{
+  if (regno <= 63 || write_symbols != DWARF2_DEBUG)
+    return regno;
+  if (regno == LR_REGNO)
+    return 108;
+  if (regno == CTR_REGNO)
+    return 109;
+  if (CR_REGNO_P (regno))
+    return regno - CR0_REGNO + 86;
+  if (regno == CA_REGNO)
+    return 101;  /* XER */
+  if (ALTIVEC_REGNO_P (regno))
+    return regno - FIRST_ALTIVEC_REGNO + 1124;
+  if (regno == VRSAVE_REGNO)
+    return 356;
+  if (regno == VSCR_REGNO)
+    return 67;
+  if (regno == SPE_ACC_REGNO)
+    return 99;
+  if (regno == SPEFSCR_REGNO)
+    return 612;
+  /* SPE high reg number.  We get these values of regno from
+     rs6000_dwarf_register_span.  */
+  gcc_assert (regno >= 1200 && regno < 1232);
+  return regno;
+}
+
+/* target hook eh_return_filter_mode */
+static enum machine_mode
+rs6000_eh_return_filter_mode (void)
+{
+  return TARGET_32BIT ? SImode : word_mode;
+}
+
+/* Target hook for scalar_mode_supported_p.  */
+static bool
+rs6000_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+  else
+    return default_scalar_mode_supported_p (mode);
+}
+
+/* Target hook for vector_mode_supported_p.  */
+static bool
+rs6000_vector_mode_supported_p (enum machine_mode mode)
+{
+
+  if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
+    return true;
+
+  if (TARGET_SPE && SPE_VECTOR_MODE (mode))
+    return true;
+
+  else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+    return true;
+
+  else
+    return false;
+}
+
+/* Target hook for invalid_arg_for_unprototyped_fn. */
+static const char *
+invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
+{
+  return (!rs6000_darwin64_abi
+	  && typelist == 0
+          && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
+          && (funcdecl == NULL_TREE
+              || (TREE_CODE (funcdecl) == FUNCTION_DECL
+                  && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
+	  ? N_("AltiVec argument passed to unprototyped function")
+	  : NULL;
+}
+
+/* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
+   setup by using __stack_chk_fail_local hidden function instead of
+   calling __stack_chk_fail directly.  Otherwise it is better to call
+   __stack_chk_fail directly.  */
+
+static tree ATTRIBUTE_UNUSED
+rs6000_stack_protect_fail (void)
+{
+  return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
+	 ? default_hidden_stack_protect_fail ()
+	 : default_external_stack_protect_fail ();
+}
+
+void
+rs6000_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+			   int num_operands ATTRIBUTE_UNUSED)
+{
+  if (rs6000_warn_cell_microcode)
+    {
+      const char *temp;
+      int insn_code_number = recog_memoized (insn);
+      location_t location = INSN_LOCATION (insn);
+
+      /* Punt on insns we cannot recognize.  */
+      if (insn_code_number < 0)
+	return;
+
+      temp = get_insn_template (insn_code_number, insn);
+
+      if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
+	warning_at (location, OPT_mwarn_cell_microcode,
+		    "emitting microcode insn %s\t[%s] #%d",
+		    temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); 
+      else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
+	warning_at (location, OPT_mwarn_cell_microcode,
+		    "emitting conditional microcode insn %s\t[%s] #%d",
+		    temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
+    }
+}
+
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
+
+#if TARGET_ELF
+static unsigned HOST_WIDE_INT
+rs6000_asan_shadow_offset (void)
+{
+  return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
+}
+#endif
+
+/* Mask options that we want to support inside of attribute((target)) and
+   #pragma GCC target operations.  Note, we do not include things like
+   64/32-bit, endianess, hard/soft floating point, etc. that would have
+   different calling sequences.  */
+
+struct rs6000_opt_mask {
+  const char *name;		/* option name */
+  HOST_WIDE_INT mask;		/* mask to set */
+  bool invert;			/* invert sense of mask */
+  bool valid_target;		/* option is a target option */
+};
+
+static struct rs6000_opt_mask const rs6000_opt_masks[] =
+{
+  { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
+  { "cmpb",			OPTION_MASK_CMPB,		false, true  },
+  { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
+  { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
+  { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
+  { "fprnd",			OPTION_MASK_FPRND,		false, true  },
+  { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
+  { "htm",			OPTION_MASK_HTM,		false, true  },
+  { "isel",			OPTION_MASK_ISEL,		false, true  },
+  { "mfcrf",			OPTION_MASK_MFCRF,		false, true  },
+  { "mfpgpr",			OPTION_MASK_MFPGPR,		false, true  },
+  { "mulhw",			OPTION_MASK_MULHW,		false, true  },
+  { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
+  { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
+  { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
+  { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
+  { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
+  { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
+  { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
+  { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
+  { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
+  { "quad-memory-atomic",	OPTION_MASK_QUAD_MEMORY_ATOMIC,	false, true  },
+  { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
+  { "string",			OPTION_MASK_STRING,		false, true  },
+  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
+  { "upper-regs-df",		OPTION_MASK_UPPER_REGS_DF,	false, false },
+  { "upper-regs-sf",		OPTION_MASK_UPPER_REGS_SF,	false, false },
+  { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "vsx-timode",		OPTION_MASK_VSX_TIMODE,		false, true  },
+#ifdef OPTION_MASK_64BIT
+#if TARGET_AIX_OS
+  { "aix64",			OPTION_MASK_64BIT,		false, false },
+  { "aix32",			OPTION_MASK_64BIT,		true,  false },
+#else
+  { "64",			OPTION_MASK_64BIT,		false, false },
+  { "32",			OPTION_MASK_64BIT,		true,  false },
+#endif
+#endif
+#ifdef OPTION_MASK_EABI
+  { "eabi",			OPTION_MASK_EABI,		false, false },
+#endif
+#ifdef OPTION_MASK_LITTLE_ENDIAN
+  { "little",			OPTION_MASK_LITTLE_ENDIAN,	false, false },
+  { "big",			OPTION_MASK_LITTLE_ENDIAN,	true,  false },
+#endif
+#ifdef OPTION_MASK_RELOCATABLE
+  { "relocatable",		OPTION_MASK_RELOCATABLE,	false, false },
+#endif
+#ifdef OPTION_MASK_STRICT_ALIGN
+  { "strict-align",		OPTION_MASK_STRICT_ALIGN,	false, false },
+#endif
+  { "soft-float",		OPTION_MASK_SOFT_FLOAT,		false, false },
+  { "string",			OPTION_MASK_STRING,		false, false },
+};
+
+/* Builtin mask mapping for printing the flags.  */
+static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
+{
+  { "altivec",		 RS6000_BTM_ALTIVEC,	false, false },
+  { "vsx",		 RS6000_BTM_VSX,	false, false },
+  { "spe",		 RS6000_BTM_SPE,	false, false },
+  { "paired",		 RS6000_BTM_PAIRED,	false, false },
+  { "fre",		 RS6000_BTM_FRE,	false, false },
+  { "fres",		 RS6000_BTM_FRES,	false, false },
+  { "frsqrte",		 RS6000_BTM_FRSQRTE,	false, false },
+  { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
+  { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
+  { "cell",		 RS6000_BTM_CELL,	false, false },
+  { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
+  { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
+  { "htm",		 RS6000_BTM_HTM,	false, false },
+};
+
+/* Option variables that we want to support inside attribute((target)) and
+   #pragma GCC target operations.  */
+
+struct rs6000_opt_var {
+  const char *name;		/* option name */
+  size_t global_offset;		/* offset of the option in global_options.  */
+  size_t target_offset;		/* offset of the option in target optiosn.  */
+};
+
+static struct rs6000_opt_var const rs6000_opt_vars[] =
+{
+  { "friz",
+    offsetof (struct gcc_options, x_TARGET_FRIZ),
+    offsetof (struct cl_target_option, x_TARGET_FRIZ), },
+  { "avoid-indexed-addresses",
+    offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
+    offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
+  { "paired",
+    offsetof (struct gcc_options, x_rs6000_paired_float),
+    offsetof (struct cl_target_option, x_rs6000_paired_float), },
+  { "longcall",
+    offsetof (struct gcc_options, x_rs6000_default_long_calls),
+    offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
+};
+
+/* Inner function to handle attribute((target("..."))) and #pragma GCC target
+   parsing.  Return true if there were no errors.  */
+
+static bool
+rs6000_inner_target_options (tree args, bool attr_p)
+{
+  bool ret = true;
+
+  if (args == NULL_TREE)
+    ;
+
+  else if (TREE_CODE (args) == STRING_CST)
+    {
+      char *p = ASTRDUP (TREE_STRING_POINTER (args));
+      char *q;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  bool error_p = false;
+	  bool not_valid_p = false;
+	  const char *cpu_opt = NULL;
+
+	  p = NULL;
+	  if (strncmp (q, "cpu=", 4) == 0)
+	    {
+	      int cpu_index = rs6000_cpu_name_lookup (q+4);
+	      if (cpu_index >= 0)
+		rs6000_cpu_index = cpu_index;
+	      else
+		{
+		  error_p = true;
+		  cpu_opt = q+4;
+		}
+	    }
+	  else if (strncmp (q, "tune=", 5) == 0)
+	    {
+	      int tune_index = rs6000_cpu_name_lookup (q+5);
+	      if (tune_index >= 0)
+		rs6000_tune_index = tune_index;
+	      else
+		{
+		  error_p = true;
+		  cpu_opt = q+5;
+		}
+	    }
+	  else
+	    {
+	      size_t i;
+	      bool invert = false;
+	      char *r = q;
+
+	      error_p = true;
+	      if (strncmp (r, "no-", 3) == 0)
+		{
+		  invert = true;
+		  r += 3;
+		}
+
+	      for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
+		if (strcmp (r, rs6000_opt_masks[i].name) == 0)
+		  {
+		    HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
+
+		    if (!rs6000_opt_masks[i].valid_target)
+		      not_valid_p = true;
+		    else
+		      {
+			error_p = false;
+			rs6000_isa_flags_explicit |= mask;
+
+			/* VSX needs altivec, so -mvsx automagically sets
+			   altivec.  */
+			if (mask == OPTION_MASK_VSX && !invert)
+			  mask |= OPTION_MASK_ALTIVEC;
+
+			if (rs6000_opt_masks[i].invert)
+			  invert = !invert;
+
+			if (invert)
+			  rs6000_isa_flags &= ~mask;
+			else
+			  rs6000_isa_flags |= mask;
+		      }
+		    break;
+		  }
+
+	      if (error_p && !not_valid_p)
+		{
+		  for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
+		    if (strcmp (r, rs6000_opt_vars[i].name) == 0)
+		      {
+			size_t j = rs6000_opt_vars[i].global_offset;
+			*((int *) ((char *)&global_options + j)) = !invert;
+			error_p = false;
+			break;
+		      }
+		}
+	    }
+
+	  if (error_p)
+	    {
+	      const char *eprefix, *esuffix;
+
+	      ret = false;
+	      if (attr_p)
+		{
+		  eprefix = "__attribute__((__target__(";
+		  esuffix = ")))";
+		}
+	      else
+		{
+		  eprefix = "#pragma GCC target ";
+		  esuffix = "";
+		}
+
+	      if (cpu_opt)
+		error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
+		       q, esuffix);
+	      else if (not_valid_p)
+		error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
+	      else
+		error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
+	    }
+	}
+    }
+
+  else if (TREE_CODE (args) == TREE_LIST)
+    {
+      do
+	{
+	  tree value = TREE_VALUE (args);
+	  if (value)
+	    {
+	      bool ret2 = rs6000_inner_target_options (value, attr_p);
+	      if (!ret2)
+		ret = false;
+	    }
+	  args = TREE_CHAIN (args);
+	}
+      while (args != NULL_TREE);
+    }
+
+  else
+    gcc_unreachable ();
+
+  return ret;
+}
+
+/* Print out the target options as a list for -mdebug=target.  */
+
+static void
+rs6000_debug_target_options (tree args, const char *prefix)
+{
+  if (args == NULL_TREE)
+    fprintf (stderr, "%s<NULL>", prefix);
+
+  else if (TREE_CODE (args) == STRING_CST)
+    {
+      char *p = ASTRDUP (TREE_STRING_POINTER (args));
+      char *q;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  p = NULL;
+	  fprintf (stderr, "%s\"%s\"", prefix, q);
+	  prefix = ", ";
+	}
+    }
+
+  else if (TREE_CODE (args) == TREE_LIST)
+    {
+      do
+	{
+	  tree value = TREE_VALUE (args);
+	  if (value)
+	    {
+	      rs6000_debug_target_options (value, prefix);
+	      prefix = ", ";
+	    }
+	  args = TREE_CHAIN (args);
+	}
+      while (args != NULL_TREE);
+    }
+
+  else
+    gcc_unreachable ();
+
+  return;
+}
+
+
+/* Hook to validate attribute((target("..."))).  */
+
+static bool
+rs6000_valid_attribute_p (tree fndecl,
+			  tree ARG_UNUSED (name),
+			  tree args,
+			  int flags)
+{
+  struct cl_target_option cur_target;
+  bool ret;
+  tree old_optimize = build_optimization_node (&global_options);
+  tree new_target, new_optimize;
+  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
+
+  if (TARGET_DEBUG_TARGET)
+    {
+      tree tname = DECL_NAME (fndecl);
+      fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
+      if (tname)
+	fprintf (stderr, "function: %.*s\n",
+		 (int) IDENTIFIER_LENGTH (tname),
+		 IDENTIFIER_POINTER (tname));
+      else
+	fprintf (stderr, "function: unknown\n");
+  
+      fprintf (stderr, "args:");
+      rs6000_debug_target_options (args, " ");
+      fprintf (stderr, "\n");
+
+      if (flags)
+	fprintf (stderr, "flags: 0x%x\n", flags);
+
+      fprintf (stderr, "--------------------\n");
+    }
+
+  old_optimize = build_optimization_node (&global_options);
+  func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  /* If the function changed the optimization levels as well as setting target
+     options, start with the optimizations specified.  */
+  if (func_optimize && func_optimize != old_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (func_optimize));
+
+  /* The target attributes may also change some optimization flags, so update
+     the optimization options if necessary.  */
+  cl_target_option_save (&cur_target, &global_options);
+  rs6000_cpu_index = rs6000_tune_index = -1;
+  ret = rs6000_inner_target_options (args, true);
+
+  /* Set up any additional state.  */
+  if (ret)
+    {
+      ret = rs6000_option_override_internal (false);
+      new_target = build_target_option_node (&global_options);
+    }
+  else
+    new_target = NULL;
+
+  new_optimize = build_optimization_node (&global_options);
+
+  if (!new_target)
+    ret = false;
+
+  else if (fndecl)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+      if (old_optimize != new_optimize)
+	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+    }
+
+  cl_target_option_restore (&global_options, &cur_target);
+
+  if (old_optimize != new_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (old_optimize));
+
+  return ret;
+}
+
+
+/* Hook to validate the current #pragma GCC target and set the state, and
+   update the macros based on what was changed.  If ARGS is NULL, then
+   POP_TARGET is used to reset the options.  */
+
+bool
+rs6000_pragma_target_parse (tree args, tree pop_target)
+{
+  tree prev_tree = build_target_option_node (&global_options);
+  tree cur_tree;
+  struct cl_target_option *prev_opt, *cur_opt;
+  HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
+  HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
+
+  if (TARGET_DEBUG_TARGET)
+    {
+      fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
+      fprintf (stderr, "args:");
+      rs6000_debug_target_options (args, " ");
+      fprintf (stderr, "\n");
+
+      if (pop_target)
+	{
+	  fprintf (stderr, "pop_target:\n");
+	  debug_tree (pop_target);
+	}
+      else
+	fprintf (stderr, "pop_target: <NULL>\n");
+
+      fprintf (stderr, "--------------------\n");
+    }
+
+  if (! args)
+    {
+      cur_tree = ((pop_target)
+		  ? pop_target
+		  : target_option_default_node);
+      cl_target_option_restore (&global_options,
+				TREE_TARGET_OPTION (cur_tree));
+    }
+  else
+    {
+      rs6000_cpu_index = rs6000_tune_index = -1;
+      if (!rs6000_inner_target_options (args, false)
+	  || !rs6000_option_override_internal (false)
+	  || (cur_tree = build_target_option_node (&global_options))
+	     == NULL_TREE)
+	{
+	  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
+	    fprintf (stderr, "invalid pragma\n");
+
+	  return false;
+	}
+    }
+
+  target_option_current_node = cur_tree;
+
+  /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
+     change the macros that are defined.  */
+  if (rs6000_target_modify_macros_ptr)
+    {
+      prev_opt    = TREE_TARGET_OPTION (prev_tree);
+      prev_bumask = prev_opt->x_rs6000_builtin_mask;
+      prev_flags  = prev_opt->x_rs6000_isa_flags;
+
+      cur_opt     = TREE_TARGET_OPTION (cur_tree);
+      cur_flags   = cur_opt->x_rs6000_isa_flags;
+      cur_bumask  = cur_opt->x_rs6000_builtin_mask;
+
+      diff_bumask = (prev_bumask ^ cur_bumask);
+      diff_flags  = (prev_flags ^ cur_flags);
+
+      if ((diff_flags != 0) || (diff_bumask != 0))
+	{
+	  /* Delete old macros.  */
+	  rs6000_target_modify_macros_ptr (false,
+					   prev_flags & diff_flags,
+					   prev_bumask & diff_bumask);
+
+	  /* Define new macros.  */
+	  rs6000_target_modify_macros_ptr (true,
+					   cur_flags & diff_flags,
+					   cur_bumask & diff_bumask);
+	}
+    }
+
+  return true;
+}
+
+
+/* Remember the last target of rs6000_set_current_function.  */
+static GTY(()) tree rs6000_previous_fndecl;
+
+/* Establish appropriate back-end context for processing the function
+   FNDECL.  The argument might be NULL to indicate processing at top
+   level, outside of any function scope.  */
+static void
+rs6000_set_current_function (tree fndecl)
+{
+  tree old_tree = (rs6000_previous_fndecl
+		   ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
+		   : NULL_TREE);
+
+  tree new_tree = (fndecl
+		   ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+		   : NULL_TREE);
+
+  if (TARGET_DEBUG_TARGET)
+    {
+      bool print_final = false;
+      fprintf (stderr, "\n==================== rs6000_set_current_function");
+
+      if (fndecl)
+	fprintf (stderr, ", fndecl %s (%p)",
+		 (DECL_NAME (fndecl)
+		  ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
+		  : "<unknown>"), (void *)fndecl);
+
+      if (rs6000_previous_fndecl)
+	fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
+
+      fprintf (stderr, "\n");
+      if (new_tree)
+	{
+	  fprintf (stderr, "\nnew fndecl target specific options:\n");
+	  debug_tree (new_tree);
+	  print_final = true;
+	}
+
+      if (old_tree)
+	{
+	  fprintf (stderr, "\nold fndecl target specific options:\n");
+	  debug_tree (old_tree);
+	  print_final = true;
+	}
+
+      if (print_final)
+	fprintf (stderr, "--------------------\n");
+    }
+
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want to
+     slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl && fndecl != rs6000_previous_fndecl)
+    {
+      rs6000_previous_fndecl = fndecl;
+      if (old_tree == new_tree)
+	;
+
+      else if (new_tree)
+	{
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  if (TREE_TARGET_GLOBALS (new_tree))
+	    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
+	  else
+	    TREE_TARGET_GLOBALS (new_tree)
+	      = save_target_globals_default_opts ();
+	}
+
+      else if (old_tree)
+	{
+	  new_tree = target_option_current_node;
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  if (TREE_TARGET_GLOBALS (new_tree))
+	    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
+	  else if (new_tree == target_option_default_node)
+	    restore_target_globals (&default_target_globals);
+	  else
+	    TREE_TARGET_GLOBALS (new_tree)
+	      = save_target_globals_default_opts ();
+	}
+    }
+}
+
+
+/* Save the current options */
+
+static void
+rs6000_function_specific_save (struct cl_target_option *ptr,
+			       struct gcc_options *opts)
+{
+  ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
+  ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
+}
+
+/* Restore the current options */
+
+static void
+rs6000_function_specific_restore (struct gcc_options *opts,
+				  struct cl_target_option *ptr)
+				  
+{
+  opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
+  opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
+  (void) rs6000_option_override_internal (false);
+}
+
+/* Print the current options */
+
+static void
+rs6000_function_specific_print (FILE *file, int indent,
+				struct cl_target_option *ptr)
+{
+  rs6000_print_isa_options (file, indent, "Isa options set",
+			    ptr->x_rs6000_isa_flags);
+
+  rs6000_print_isa_options (file, indent, "Isa options explicit",
+			    ptr->x_rs6000_isa_flags_explicit);
+}
+
+/* Helper function to print the current isa or misc options on a line.  */
+
+static void
+rs6000_print_options_internal (FILE *file,
+			       int indent,
+			       const char *string,
+			       HOST_WIDE_INT flags,
+			       const char *prefix,
+			       const struct rs6000_opt_mask *opts,
+			       size_t num_elements)
+{
+  size_t i;
+  size_t start_column = 0;
+  size_t cur_column;
+  size_t max_column = 76;
+  const char *comma = "";
+
+  if (indent)
+    start_column += fprintf (file, "%*s", indent, "");
+
+  if (!flags)
+    {
+      fprintf (stderr, DEBUG_FMT_S, string, "<none>");
+      return;
+    }
+
+  start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
+
+  /* Print the various mask options.  */
+  cur_column = start_column;
+  for (i = 0; i < num_elements; i++)
+    {
+      if ((flags & opts[i].mask) != 0)
+	{
+	  const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
+	  size_t len = (strlen (comma)
+			+ strlen (prefix)
+			+ strlen (no_str)
+			+ strlen (rs6000_opt_masks[i].name));
+
+	  cur_column += len;
+	  if (cur_column > max_column)
+	    {
+	      fprintf (stderr, ", \\\n%*s", (int)start_column, "");
+	      cur_column = start_column + len;
+	      comma = "";
+	    }
+
+	  fprintf (file, "%s%s%s%s", comma, prefix, no_str,
+		   rs6000_opt_masks[i].name);
+	  flags &= ~ opts[i].mask;
+	  comma = ", ";
+	}
+    }
+
+  fputs ("\n", file);
+}
+
+/* Helper function to print the current isa options on a line.  */
+
+static void
+rs6000_print_isa_options (FILE *file, int indent, const char *string,
+			  HOST_WIDE_INT flags)
+{
+  rs6000_print_options_internal (file, indent, string, flags, "-m",
+				 &rs6000_opt_masks[0],
+				 ARRAY_SIZE (rs6000_opt_masks));
+}
+
+static void
+rs6000_print_builtin_options (FILE *file, int indent, const char *string,
+			      HOST_WIDE_INT flags)
+{
+  rs6000_print_options_internal (file, indent, string, flags, "",
+				 &rs6000_builtin_mask_names[0],
+				 ARRAY_SIZE (rs6000_builtin_mask_names));
+}
+
+
+/* Hook to determine if one function can safely inline another.  */
+
+static bool
+rs6000_can_inline_p (tree caller, tree callee)
+{
+  bool ret = false;
+  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+  /* If callee has no option attributes, then it is ok to inline.  */
+  if (!callee_tree)
+    ret = true;
+
+  /* If caller has no option attributes, but callee does then it is not ok to
+     inline.  */
+  else if (!caller_tree)
+    ret = false;
+
+  else
+    {
+      struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+
+      /* Callee's options should a subset of the caller's, i.e. a vsx function
+	 can inline an altivec function but a non-vsx function can't inline a
+	 vsx function.  */
+      if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
+	  == callee_opts->x_rs6000_isa_flags)
+	ret = true;
+    }
+
+  if (TARGET_DEBUG_TARGET)
+    fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
+	     (DECL_NAME (caller)
+	      ? IDENTIFIER_POINTER (DECL_NAME (caller))
+	      : "<unknown>"),
+	     (DECL_NAME (callee)
+	      ? IDENTIFIER_POINTER (DECL_NAME (callee))
+	      : "<unknown>"),
+	     (ret ? "can" : "cannot"));
+
+  return ret;
+}
+
+/* Allocate a stack temp and fixup the address so it meets the particular
+   memory requirements (either offetable or REG+REG addressing).  */
+
+rtx
+rs6000_allocate_stack_temp (enum machine_mode mode,
+			    bool offsettable_p,
+			    bool reg_reg_p)
+{
+  rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+  rtx addr = XEXP (stack, 0);
+  int strict_p = (reload_in_progress || reload_completed);
+
+  if (!legitimate_indirect_address_p (addr, strict_p))
+    {
+      if (offsettable_p
+	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
+	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
+
+      else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
+	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
+    }
+
+  return stack;
+}
+
+/* Given a memory reference, if it is not a reg or reg+reg addressing, convert
+   to such a form to deal with memory reference instructions like STFIWX that
+   only take reg+reg addressing.  */
+
+rtx
+rs6000_address_for_fpconvert (rtx x)
+{
+  int strict_p = (reload_in_progress || reload_completed);
+  rtx addr;
+
+  gcc_assert (MEM_P (x));
+  addr = XEXP (x, 0);
+  if (! legitimate_indirect_address_p (addr, strict_p)
+      && ! legitimate_indexed_address_p (addr, strict_p))
+    {
+      if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	{
+	  rtx reg = XEXP (addr, 0);
+	  HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
+	  rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
+	  gcc_assert (REG_P (reg));
+	  emit_insn (gen_add3_insn (reg, reg, size_rtx));
+	  addr = reg;
+	}
+      else if (GET_CODE (addr) == PRE_MODIFY)
+	{
+	  rtx reg = XEXP (addr, 0);
+	  rtx expr = XEXP (addr, 1);
+	  gcc_assert (REG_P (reg));
+	  gcc_assert (GET_CODE (expr) == PLUS);
+	  emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
+	  addr = reg;
+	}
+
+      x = replace_equiv_address (x, copy_addr_to_reg (addr));
+    }
+
+  return x;
+}
+
+/* Given a memory reference, if it is not in the form for altivec memory
+   reference instructions (i.e. reg or reg+reg addressing with AND of -16),
+   convert to the altivec format.  */
+
+rtx
+rs6000_address_for_altivec (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
+    {
+      rtx addr = XEXP (x, 0);
+      int strict_p = (reload_in_progress || reload_completed);
+
+      if (!legitimate_indexed_address_p (addr, strict_p)
+	  && !legitimate_indirect_address_p (addr, strict_p))
+	addr = copy_to_mode_reg (Pmode, addr);
+
+      addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
+      x = change_address (x, GET_MODE (x), addr);
+    }
+
+  return x;
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.
+
+   On the RS/6000, all integer constants are acceptable, most won't be valid
+   for particular insns, though.  Only easy FP constants are acceptable.  */
+
+static bool
+rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  if (TARGET_ELF && rs6000_tls_referenced_p (x))
+    return false;
+
+  return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
+	  || GET_MODE (x) == VOIDmode
+	  || (TARGET_POWERPC64 && mode == DImode)
+	  || easy_fp_constant (x, mode)
+	  || easy_vector_constant (x, mode));
+}
+
+
+
+/* Expand code to perform a call under the AIX or ELFv2 ABI.  */
+
+void
+rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
+{
+  rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
+  rtx toc_load = NULL_RTX;
+  rtx toc_restore = NULL_RTX;
+  rtx func_addr;
+  rtx abi_reg = NULL_RTX;
+  rtx call[4];
+  int n_call;
+  rtx insn;
+
+  /* Handle longcall attributes.  */
+  if (INTVAL (cookie) & CALL_LONG)
+    func_desc = rs6000_longcall_ref (func_desc);
+
+  /* Handle indirect calls.  */
+  if (GET_CODE (func_desc) != SYMBOL_REF
+      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
+    {
+      /* Save the TOC into its reserved slot before the call,
+	 and prepare to restore it after the call.  */
+      rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+      rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
+      rtx stack_toc_mem = gen_frame_mem (Pmode,
+					 gen_rtx_PLUS (Pmode, stack_ptr,
+						       stack_toc_offset));
+      toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
+
+      /* Can we optimize saving the TOC in the prologue or
+	 do we need to do it at every call?  */
+      if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
+	cfun->machine->save_toc_in_prologue = true;
+      else
+	{
+	  MEM_VOLATILE_P (stack_toc_mem) = 1;
+	  emit_move_insn (stack_toc_mem, toc_reg);
+	}
+
+      if (DEFAULT_ABI == ABI_ELFv2)
+	{
+	  /* A function pointer in the ELFv2 ABI is just a plain address, but
+	     the ABI requires it to be loaded into r12 before the call.  */
+	  func_addr = gen_rtx_REG (Pmode, 12);
+	  emit_move_insn (func_addr, func_desc);
+	  abi_reg = func_addr;
+	}
+      else
+	{
+	  /* A function pointer under AIX is a pointer to a data area whose
+	     first word contains the actual address of the function, whose
+	     second word contains a pointer to its TOC, and whose third word
+	     contains a value to place in the static chain register (r11).
+	     Note that if we load the static chain, our "trampoline" need
+	     not have any executable code.  */
+
+	  /* Load up address of the actual function.  */
+	  func_desc = force_reg (Pmode, func_desc);
+	  func_addr = gen_reg_rtx (Pmode);
+	  emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
+
+	  /* Prepare to load the TOC of the called function.  Note that the
+	     TOC load must happen immediately before the actual call so
+	     that unwinding the TOC registers works correctly.  See the
+	     comment in frob_update_context.  */
+	  rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
+	  rtx func_toc_mem = gen_rtx_MEM (Pmode,
+					  gen_rtx_PLUS (Pmode, func_desc,
+							func_toc_offset));
+	  toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
+
+	  /* If we have a static chain, load it up.  */
+	  if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
+	    {
+	      rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
+	      rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
+	      rtx func_sc_mem = gen_rtx_MEM (Pmode,
+					     gen_rtx_PLUS (Pmode, func_desc,
+							   func_sc_offset));
+	      emit_move_insn (sc_reg, func_sc_mem);
+	      abi_reg = sc_reg;
+	    }
+	}
+    }
+  else
+    {
+      /* Direct calls use the TOC: for local calls, the callee will
+	 assume the TOC register is set; for non-local calls, the
+	 PLT stub needs the TOC register.  */
+      abi_reg = toc_reg;
+      func_addr = func_desc;
+    }
+
+  /* Create the call.  */
+  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
+  if (value != NULL_RTX)
+    call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
+  n_call = 1;
+
+  if (toc_load)
+    call[n_call++] = toc_load;
+  if (toc_restore)
+    call[n_call++] = toc_restore;
+
+  call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
+
+  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
+  insn = emit_call_insn (insn);
+
+  /* Mention all registers defined by the ABI to hold information
+     as uses in CALL_INSN_FUNCTION_USAGE.  */
+  if (abi_reg)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
+}
+
+/* Expand code to perform a sibling call under the AIX or ELFv2 ABI.  */
+
+void
+rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
+{
+  rtx call[2];
+  rtx insn;
+
+  gcc_assert (INTVAL (cookie) == 0);
+
+  /* Create the call.  */
+  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
+  if (value != NULL_RTX)
+    call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
+
+  call[1] = simple_return_rtx;
+
+  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
+  insn = emit_call_insn (insn);
+
+  /* Note use of the TOC register.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
+  /* We need to also mark a use of the link register since the function we
+     sibling-call to will use it to return to our caller.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
+}
+
+/* Return whether we need to always update the saved TOC pointer when we update
+   the stack pointer.  */
+
+static bool
+rs6000_save_toc_in_prologue_p (void)
+{
+  return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
+}
+
+#ifdef HAVE_GAS_HIDDEN
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* Fills in the label name that should be used for a 476 link stack thunk.  */
+
+void
+get_ppc476_thunk_name (char name[32])
+{
+  gcc_assert (TARGET_LINK_STACK);
+
+  if (USE_HIDDEN_LINKONCE)
+    sprintf (name, "__ppc476.get_thunk");
+  else
+    ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
+}
+
+/* This function emits the simple thunk routine that is used to preserve
+   the link stack on the 476 cpu.  */
+
+static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
+static void
+rs6000_code_end (void)
+{
+  char name[32];
+  tree decl;
+
+  if (!TARGET_LINK_STACK)
+    return;
+
+  get_ppc476_thunk_name (name);
+
+  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
+		     build_function_type_list (void_type_node, NULL_TREE));
+  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				   NULL_TREE, void_type_node);
+  TREE_PUBLIC (decl) = 1;
+  TREE_STATIC (decl) = 1;
+
+#if RS6000_WEAK
+  if (USE_HIDDEN_LINKONCE)
+    {
+      DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+      targetm.asm_out.unique_section (decl, 0);
+      switch_to_section (get_named_section (decl, NULL, 0));
+      DECL_WEAK (decl) = 1;
+      ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
+      targetm.asm_out.globalize_label (asm_out_file, name);
+      targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
+      ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+    }
+  else
+#endif
+    {
+      switch_to_section (text_section);
+      ASM_OUTPUT_LABEL (asm_out_file, name);
+    }
+
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  current_function_decl = decl;
+  init_function_start (decl);
+  first_function_block_is_cold = false;
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), asm_out_file, 1);
+
+  fputs ("\tblr\n", asm_out_file);
+
+  final_end_function ();
+  init_insn_lengths ();
+  free_after_compilation (cfun);
+  set_cfun (NULL);
+  current_function_decl = NULL;
+}
+
+/* Add r30 to hard reg set if the prologue sets it up and it is not
+   pic_offset_table_rtx.  */
+
+static void
+rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
+{
+  if (!TARGET_SINGLE_PIC_BASE
+      && TARGET_TOC
+      && TARGET_MINIMAL_TOC
+      && get_pool_size () != 0)
+    add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+}
+
+
+/* Helper function for rs6000_split_logical to emit a logical instruction after
+   spliting the operation to single GPR registers.
+
+   DEST is the destination register.
+   OP1 and OP2 are the input source registers.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_inner (rtx dest,
+			    rtx op1,
+			    rtx op2,
+			    enum rtx_code code,
+			    enum machine_mode mode,
+			    bool complement_final_p,
+			    bool complement_op1_p,
+			    bool complement_op2_p,
+			    rtx clobber_reg)
+{
+  rtx bool_rtx;
+  rtx set_rtx;
+
+  /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
+  if (op2 && GET_CODE (op2) == CONST_INT
+      && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
+      && !complement_final_p && !complement_op1_p && !complement_op2_p)
+    {
+      HOST_WIDE_INT mask = GET_MODE_MASK (mode);
+      HOST_WIDE_INT value = INTVAL (op2) & mask;
+
+      /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
+      if (code == AND)
+	{
+	  if (value == 0)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+	      return;
+	    }
+
+	  else if (value == mask)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+	      return;
+	    }
+	}
+
+      /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
+	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
+      else if (code == IOR || code == XOR)
+	{
+	  if (value == 0)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+	      return;
+	    }
+	}
+    }
+
+  if (complement_op1_p)
+    op1 = gen_rtx_NOT (mode, op1);
+
+  if (complement_op2_p)
+    op2 = gen_rtx_NOT (mode, op2);
+
+  bool_rtx = ((code == NOT)
+	      ? gen_rtx_NOT (mode, op1)
+	      : gen_rtx_fmt_ee (code, mode, op1, op2));
+
+  if (complement_final_p)
+    bool_rtx = gen_rtx_NOT (mode, bool_rtx);
+
+  set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
+
+  /* Is this AND with an explicit clobber?  */
+  if (clobber_reg)
+    {
+      rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
+      set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
+    }
+
+  emit_insn (set_rtx);
+  return;
+}
+
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
+   operations are split immediately during RTL generation to allow for more
+   optimizations of the AND/IOR/XOR.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_di (rtx operands[3],
+			 enum rtx_code code,
+			 bool complement_final_p,
+			 bool complement_op1_p,
+			 bool complement_op2_p,
+			 rtx clobber_reg)
+{
+  const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
+  const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
+  const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
+  enum hi_lo { hi = 0, lo = 1 };
+  rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
+  size_t i;
+
+  op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
+  op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
+  op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
+  op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
+
+  if (code == NOT)
+    op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
+  else
+    {
+      if (GET_CODE (operands[2]) != CONST_INT)
+	{
+	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
+	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
+	}
+      else
+	{
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  HOST_WIDE_INT value_hi_lo[2];
+
+	  gcc_assert (!complement_final_p);
+	  gcc_assert (!complement_op1_p);
+	  gcc_assert (!complement_op2_p);
+
+	  value_hi_lo[hi] = value >> 32;
+	  value_hi_lo[lo] = value & lower_32bits;
+
+	  for (i = 0; i < 2; i++)
+	    {
+	      HOST_WIDE_INT sub_value = value_hi_lo[i];
+
+	      if (sub_value & sign_bit)
+		sub_value |= upper_32bits;
+
+	      op2_hi_lo[i] = GEN_INT (sub_value);
+
+	      /* If this is an AND instruction, check to see if we need to load
+		 the value in a register.  */
+	      if (code == AND && sub_value != -1 && sub_value != 0
+		  && !and_operand (op2_hi_lo[i], SImode))
+		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
+	    }
+	}
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      /* Split large IOR/XOR operations.  */
+      if ((code == IOR || code == XOR)
+	  && GET_CODE (op2_hi_lo[i]) == CONST_INT
+	  && !complement_final_p
+	  && !complement_op1_p
+	  && !complement_op2_p
+	  && clobber_reg == NULL_RTX
+	  && !logical_const_operand (op2_hi_lo[i], SImode))
+	{
+	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
+	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
+	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  /* Make sure the constant is sign extended.  */
+	  if ((hi_16bits & sign_bit) != 0)
+	    hi_16bits |= upper_32bits;
+
+	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
+				      code, SImode, false, false, false,
+				      NULL_RTX);
+
+	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
+				      code, SImode, false, false, false,
+				      NULL_RTX);
+	}
+      else
+	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
+				    code, SImode, complement_final_p,
+				    complement_op1_p, complement_op2_p,
+				    clobber_reg);
+    }
+
+  return;
+}
+
+/* Split the insns that make up boolean operations operating on multiple GPR
+   registers.  The boolean MD patterns ensure that the inputs either are
+   exactly the same as the output registers, or there is no overlap.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+void
+rs6000_split_logical (rtx operands[3],
+		      enum rtx_code code,
+		      bool complement_final_p,
+		      bool complement_op1_p,
+		      bool complement_op2_p,
+		      rtx clobber_reg)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum machine_mode sub_mode;
+  rtx op0, op1, op2;
+  int sub_size, regno0, regno1, nregs, i;
+
+  /* If this is DImode, use the specialized version that can run before
+     register allocation.  */
+  if (mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical_di (operands, code, complement_final_p,
+			       complement_op1_p, complement_op2_p,
+			       clobber_reg);
+      return;
+    }
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = (code == NOT) ? NULL_RTX : operands[2];
+  sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
+  sub_size = GET_MODE_SIZE (sub_mode);
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+
+  gcc_assert (reload_completed);
+  gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+  gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+  nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
+  gcc_assert (nregs > 1);
+
+  if (op2 && REG_P (op2))
+    gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+  for (i = 0; i < nregs; i++)
+    {
+      int offset = i * sub_size;
+      rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
+      rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
+      rtx sub_op2 = ((code == NOT)
+		     ? NULL_RTX
+		     : simplify_subreg (sub_mode, op2, mode, offset));
+
+      rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
+				  complement_final_p, complement_op1_p,
+				  complement_op2_p, clobber_reg);
+    }
+
+  return;
+}
+
+
+/* Return true if the peephole2 can combine a load involving a combination of
+   an addis instruction and a load with an offset that can be fused together on
+   a power8.
+
+   The operands are:
+	operands[0]	register set with addis
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].
+
+   In addition, we are passed a boolean that is true if this is a peephole2,
+   and we can use see if the addis_reg is dead after the insn and can be
+   replaced by the target register.  */
+
+bool
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx addr;
+  rtx base_reg;
+
+  /* Validate arguments.  */
+  if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
+    return false;
+
+  if (!base_reg_operand (target, GET_MODE (target)))
+    return false;
+
+  if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
+    return false;
+
+  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+    return false;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND
+      || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+    mem = XEXP (mem, 0);
+
+  if (!MEM_P (mem))
+    return false;
+
+  addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    return false;
+
+  /* Validate that the register used to load the high value is either the
+     register being loaded, or we can safely replace its use in a peephole2.
+
+     If this is a peephole2, we assume that there are 2 instructions in the
+     peephole (addis and load), so we want to check if the target register was
+     not used in the memory address and the register to hold the addis result
+     is dead after the peephole.  */
+  if (REGNO (addis_reg) != REGNO (target))
+    {
+      if (!peep2_p)
+	return false;
+
+      if (reg_mentioned_p (target, mem))
+	return false;
+
+      if (!peep2_reg_dead_p (2, addis_reg))
+	return false;
+    }
+
+  base_reg = XEXP (addr, 0);
+  return REGNO (addis_reg) == REGNO (base_reg);
+}
+
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
+   sequence.  We adjust the addis register to use the target register.  If the
+   load sign extends, we adjust the code to do the zero extending load, and an
+   explicit sign extension later since the fusion only covers zero extending
+   loads.
+
+   The operands are:
+	operands[0]	register set with addis (to be replaced with target)
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+void
+expand_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx orig_mem = operands[3];
+  rtx  new_addr, new_mem, orig_addr, offset;
+  enum rtx_code plus_or_lo_sum;
+  enum machine_mode target_mode = GET_MODE (target);
+  enum machine_mode extend_mode = target_mode;
+  enum machine_mode ptr_mode = Pmode;
+  enum rtx_code extend = UNKNOWN;
+  rtx addis_reg = ((ptr_mode == target_mode)
+		   ? target
+		   : simplify_subreg (ptr_mode, target, target_mode, 0));
+
+  if (GET_CODE (orig_mem) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+    {
+      extend = GET_CODE (orig_mem);
+      orig_mem = XEXP (orig_mem, 0);
+      target_mode = GET_MODE (orig_mem);
+    }
+
+  gcc_assert (MEM_P (orig_mem));
+
+  orig_addr = XEXP (orig_mem, 0);
+  plus_or_lo_sum = GET_CODE (orig_addr);
+  gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
+
+  offset = XEXP (orig_addr, 1);
+  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
+  new_mem = change_address (orig_mem, target_mode, new_addr);
+
+  if (extend != UNKNOWN)
+    new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
+
+  emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+  emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
+
+  if (extend == SIGN_EXTEND)
+    {
+      int sub_off = ((BYTES_BIG_ENDIAN)
+		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
+		     : 0);
+      rtx sign_reg
+	= simplify_subreg (target_mode, target, extend_mode, sub_off);
+
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
+    }
+
+  return;
+}
+
+/* Return a string to fuse an addis instruction with a gpr load to the same
+   register that we loaded up the addis instruction.  The code is complicated,
+   so we call output_asm_insn directly, and just return "".
+
+   The operands are:
+	operands[0]	register set with addis (must be same reg as target).
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+const char *
+emit_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx fuse_ops[10];
+  rtx addr;
+  rtx load_offset;
+  const char *addis_str = NULL;
+  const char *load_str = NULL;
+  const char *extend_insn = NULL;
+  const char *mode_name = NULL;
+  char insn_template[80];
+  enum machine_mode mode;
+  const char *comment_str = ASM_COMMENT_START;
+  bool sign_p = false;
+
+  gcc_assert (REG_P (addis_reg) && REG_P (target));
+  gcc_assert (REGNO (addis_reg) == REGNO (target));
+
+  if (*comment_str == ' ')
+    comment_str++;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
+    {
+      sign_p = true;
+      mem = XEXP (mem, 0);
+    }
+
+  gcc_assert (MEM_P (mem));
+  addr = XEXP (mem, 0);
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    gcc_unreachable ();
+
+  load_offset = XEXP (addr, 1);
+
+  /* Now emit the load instruction to the same register.  */
+  mode = GET_MODE (mem);
+  switch (mode)
+    {
+    case QImode:
+      mode_name = "char";
+      load_str = "lbz";
+      extend_insn = "extsb %0,%0";
+      break;
+
+    case HImode:
+      mode_name = "short";
+      load_str = "lhz";
+      extend_insn = "extsh %0,%0";
+      break;
+
+    case SImode:
+      mode_name = "int";
+      load_str = "lwz";
+      extend_insn = "extsw %0,%0";
+      break;
+
+    case DImode:
+      if (TARGET_POWERPC64)
+	{
+	  mode_name = "long";
+	  load_str = "ld";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Emit the addis instruction.  */
+  fuse_ops[0] = target;
+  if (satisfies_constraint_L (addis_value))
+    {
+      fuse_ops[1] = addis_value;
+      addis_str = "lis %0,%v1";
+    }
+
+  else if (GET_CODE (addis_value) == PLUS)
+    {
+      rtx op0 = XEXP (addis_value, 0);
+      rtx op1 = XEXP (addis_value, 1);
+
+      if (REG_P (op0) && CONST_INT_P (op1)
+	  && satisfies_constraint_L (op1))
+	{
+	  fuse_ops[1] = op0;
+	  fuse_ops[2] = op1;
+	  addis_str = "addis %0,%1,%v2";
+	}
+    }
+
+  else if (GET_CODE (addis_value) == HIGH)
+    {
+      rtx value = XEXP (addis_value, 0);
+      if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
+	{
+	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
+	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
+	  if (TARGET_ELF)
+	    addis_str = "addis %0,%2,%1@toc@ha";
+
+	  else if (TARGET_XCOFF)
+	    addis_str = "addis %0,%1@u(%2)";
+
+	  else
+	    gcc_unreachable ();
+	}
+
+      else if (GET_CODE (value) == PLUS)
+	{
+	  rtx op0 = XEXP (value, 0);
+	  rtx op1 = XEXP (value, 1);
+
+	  if (GET_CODE (op0) == UNSPEC
+	      && XINT (op0, 1) == UNSPEC_TOCREL
+	      && CONST_INT_P (op1))
+	    {
+	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
+	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
+	      fuse_ops[3] = op1;
+	      if (TARGET_ELF)
+		addis_str = "addis %0,%2,%1+%3@toc@ha";
+
+	      else if (TARGET_XCOFF)
+		addis_str = "addis %0,%1+%3@u(%2)";
+
+	      else
+		gcc_unreachable ();
+	    }
+	}
+
+      else if (satisfies_constraint_L (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%v1";
+	}
+
+      else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%1@ha";
+	}
+    }
+
+  if (!addis_str)
+    fatal_insn ("Could not generate addis value for fusion", addis_value);
+
+  sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
+	   comment_str, mode_name);
+  output_asm_insn (insn_template, fuse_ops);
+
+  /* Emit the D-form load instruction.  */
+  if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == UNSPEC
+	   && XINT (load_offset, 1) == UNSPEC_TOCREL)
+    {
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (load_offset, 0, 0);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == PLUS
+	   && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
+	   && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
+	   && CONST_INT_P (XEXP (load_offset, 1)))
+    {
+      rtx tocrel_unspec = XEXP (load_offset, 0);
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
+      fuse_ops[2] = XEXP (load_offset, 1);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else
+    fatal_insn ("Unable to generate load offset for fusion", load_offset);
+
+  /* Handle sign extension.  The peephole2 pass generates this as a separate
+     insn, but we handle it just in case it got reattached.  */
+  if (sign_p)
+    {
+      gcc_assert (extend_insn != NULL);
+      output_asm_insn (extend_insn, fuse_ops);
+    }
+
+  return "";
+}
+
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-rs6000.h"
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.h b/gcc-4.9/gcc/config/rs6000/rs6000.h
new file mode 100644
index 000000000..a6afb6c37
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.h
@@ -0,0 +1,2676 @@
+/* Definitions of target machine for GNU compiler, for IBM RS/6000.
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Note that some other tm.h files include this one and then override
+   many of the definitions.  */
+
+#ifndef RS6000_OPTS_H
+#include "config/rs6000/rs6000-opts.h"
+#endif
+
+/* Definitions for the object file format.  These are set at
+   compile-time.  */
+
+#define OBJECT_XCOFF 1
+#define OBJECT_ELF 2
+#define OBJECT_PEF 3
+#define OBJECT_MACHO 4
+
+#define TARGET_ELF (TARGET_OBJECT_FORMAT == OBJECT_ELF)
+#define TARGET_XCOFF (TARGET_OBJECT_FORMAT == OBJECT_XCOFF)
+#define TARGET_MACOS (TARGET_OBJECT_FORMAT == OBJECT_PEF)
+#define TARGET_MACHO (TARGET_OBJECT_FORMAT == OBJECT_MACHO)
+
+#ifndef TARGET_AIX
+#define TARGET_AIX 0
+#endif
+
+#ifndef TARGET_AIX_OS
+#define TARGET_AIX_OS 0
+#endif
+
+/* Control whether function entry points use a "dot" symbol when
+   ABI_AIX.  */
+#define DOT_SYMBOLS 1
+
+/* Default string to use for cpu if not specified.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT ((char *)0)
+#endif
+
+/* If configured for PPC405, support PPC405CR Erratum77.  */
+#ifdef CONFIG_PPC405CR
+#define PPC405_ERRATUM77 (rs6000_cpu == PROCESSOR_PPC405)
+#else
+#define PPC405_ERRATUM77 0
+#endif
+
+#ifndef TARGET_PAIRED_FLOAT
+#define TARGET_PAIRED_FLOAT 0
+#endif
+
+#ifdef HAVE_AS_POPCNTB
+#define ASM_CPU_POWER5_SPEC "-mpower5"
+#else
+#define ASM_CPU_POWER5_SPEC "-mpower4"
+#endif
+
+#ifdef HAVE_AS_DFP
+#define ASM_CPU_POWER6_SPEC "-mpower6 -maltivec"
+#else
+#define ASM_CPU_POWER6_SPEC "-mpower4 -maltivec"
+#endif
+
+#ifdef HAVE_AS_POPCNTD
+#define ASM_CPU_POWER7_SPEC "-mpower7"
+#else
+#define ASM_CPU_POWER7_SPEC "-mpower4 -maltivec"
+#endif
+
+#ifdef HAVE_AS_POWER8
+#define ASM_CPU_POWER8_SPEC "-mpower8"
+#else
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
+#endif
+
+#ifdef HAVE_AS_DCI
+#define ASM_CPU_476_SPEC "-m476"
+#else
+#define ASM_CPU_476_SPEC "-mpower4"
+#endif
+
+/* Common ASM definitions used by ASM_SPEC among the various targets for
+   handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
+   provide the default assembler options if the user uses -mcpu=native, so if
+   you make changes here, make them also there.  */
+#define ASM_CPU_SPEC \
+"%{!mcpu*: \
+  %{mpowerpc64*: -mppc64} \
+  %{!mpowerpc64*: %(asm_default)}} \
+%{mcpu=native: %(asm_cpu_native)} \
+%{mcpu=cell: -mcell} \
+%{mcpu=power3: -mppc64} \
+%{mcpu=power4: -mpower4} \
+%{mcpu=power5: %(asm_cpu_power5)} \
+%{mcpu=power5+: %(asm_cpu_power5)} \
+%{mcpu=power6: %(asm_cpu_power6) -maltivec} \
+%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \
+%{mcpu=power7: %(asm_cpu_power7)} \
+%{mcpu=power8: %(asm_cpu_power8)} \
+%{mcpu=a2: -ma2} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc64} \
+%{mcpu=401: -mppc} \
+%{mcpu=403: -m403} \
+%{mcpu=405: -m405} \
+%{mcpu=405fp: -m405} \
+%{mcpu=440: -m440} \
+%{mcpu=440fp: -m440} \
+%{mcpu=464: -m440} \
+%{mcpu=464fp: -m440} \
+%{mcpu=476: %(asm_cpu_476)} \
+%{mcpu=476fp: %(asm_cpu_476)} \
+%{mcpu=505: -mppc} \
+%{mcpu=601: -m601} \
+%{mcpu=602: -mppc} \
+%{mcpu=603: -mppc} \
+%{mcpu=603e: -mppc} \
+%{mcpu=ec603e: -mppc} \
+%{mcpu=604: -mppc} \
+%{mcpu=604e: -mppc} \
+%{mcpu=620: -mppc64} \
+%{mcpu=630: -mppc64} \
+%{mcpu=740: -mppc} \
+%{mcpu=750: -mppc} \
+%{mcpu=G3: -mppc} \
+%{mcpu=7400: -mppc -maltivec} \
+%{mcpu=7450: -mppc -maltivec} \
+%{mcpu=G4: -mppc -maltivec} \
+%{mcpu=801: -mppc} \
+%{mcpu=821: -mppc} \
+%{mcpu=823: -mppc} \
+%{mcpu=860: -mppc} \
+%{mcpu=970: -mpower4 -maltivec} \
+%{mcpu=G5: -mpower4 -maltivec} \
+%{mcpu=8540: -me500} \
+%{mcpu=8548: -me500} \
+%{mcpu=e300c2: -me300} \
+%{mcpu=e300c3: -me300} \
+%{mcpu=e500mc: -me500mc} \
+%{mcpu=e500mc64: -me500mc64} \
+%{mcpu=e5500: -me5500} \
+%{mcpu=e6500: -me6500} \
+%{maltivec: -maltivec} \
+%{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+%{mpower8-vector|mcrypto|mdirect-move|mhtm: %{!mcpu*: %(asm_cpu_power8)}} \
+-many"
+
+#define CPP_DEFAULT_SPEC ""
+
+#define ASM_DEFAULT_SPEC ""
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define SUBTARGET_EXTRA_SPECS
+
+#define EXTRA_SPECS							\
+  { "cpp_default",		CPP_DEFAULT_SPEC },			\
+  { "asm_cpu",			ASM_CPU_SPEC },				\
+  { "asm_cpu_native",		ASM_CPU_NATIVE_SPEC },			\
+  { "asm_default",		ASM_DEFAULT_SPEC },			\
+  { "cc1_cpu",			CC1_CPU_SPEC },				\
+  { "asm_cpu_power5",		ASM_CPU_POWER5_SPEC },			\
+  { "asm_cpu_power6",		ASM_CPU_POWER6_SPEC },			\
+  { "asm_cpu_power7",		ASM_CPU_POWER7_SPEC },			\
+  { "asm_cpu_power8",		ASM_CPU_POWER8_SPEC },			\
+  { "asm_cpu_476",		ASM_CPU_476_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an PowerPC chip.  If changing this condition, also change
+   the condition in driver-rs6000.c.  */
+#if defined(__powerpc__) || defined(__POWERPC__) || defined(_AIX)
+/* In driver-rs6000.c.  */
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS \
+  { "local_cpu_detect", host_detect_local_cpu },
+#define HAVE_LOCAL_CPU_DETECT
+#define ASM_CPU_NATIVE_SPEC "%:local_cpu_detect(asm)"
+
+#else
+#define ASM_CPU_NATIVE_SPEC "%(asm_default)"
+#endif
+
+#ifndef CC1_CPU_SPEC
+#ifdef HAVE_LOCAL_CPU_DETECT
+#define CC1_CPU_SPEC \
+"%{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)} \
+ %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+#define CC1_CPU_SPEC ""
+#endif
+#endif
+
+/* Architecture type.  */
+
+/* Define TARGET_MFCRF if the target assembler does not support the
+   optional field operand for mfcr.  */
+
+#ifndef HAVE_AS_MFCRF
+#undef  TARGET_MFCRF
+#define TARGET_MFCRF 0
+#endif
+
+/* Define TARGET_POPCNTB if the target assembler does not support the
+   popcount byte instruction.  */
+
+#ifndef HAVE_AS_POPCNTB
+#undef  TARGET_POPCNTB
+#define TARGET_POPCNTB 0
+#endif
+
+/* Define TARGET_FPRND if the target assembler does not support the
+   fp rounding instructions.  */
+
+#ifndef HAVE_AS_FPRND
+#undef  TARGET_FPRND
+#define TARGET_FPRND 0
+#endif
+
+/* Define TARGET_CMPB if the target assembler does not support the
+   cmpb instruction.  */
+
+#ifndef HAVE_AS_CMPB
+#undef  TARGET_CMPB
+#define TARGET_CMPB 0
+#endif
+
+/* Define TARGET_MFPGPR if the target assembler does not support the
+   mffpr and mftgpr instructions. */
+
+#ifndef HAVE_AS_MFPGPR
+#undef  TARGET_MFPGPR
+#define TARGET_MFPGPR 0
+#endif
+
+/* Define TARGET_DFP if the target assembler does not support decimal
+   floating point instructions.  */
+#ifndef HAVE_AS_DFP
+#undef  TARGET_DFP
+#define TARGET_DFP 0
+#endif
+
+/* Define TARGET_POPCNTD if the target assembler does not support the
+   popcount word and double word instructions.  */
+
+#ifndef HAVE_AS_POPCNTD
+#undef  TARGET_POPCNTD
+#define TARGET_POPCNTD 0
+#endif
+
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
+   waitasecond instruction.  Allow -mpower8-fusion, since it does not add new
+   instructions.  */
+
+#ifndef HAVE_AS_POWER8
+#undef  TARGET_DIRECT_MOVE
+#undef  TARGET_CRYPTO
+#undef  TARGET_HTM
+#undef  TARGET_P8_VECTOR
+#define TARGET_DIRECT_MOVE 0
+#define TARGET_CRYPTO 0
+#define TARGET_HTM 0
+#define TARGET_P8_VECTOR 0
+#endif
+
+/* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync.  If
+   not, generate the lwsync code as an integer constant.  */
+#ifdef HAVE_AS_LWSYNC
+#define TARGET_LWSYNC_INSTRUCTION 1
+#else
+#define TARGET_LWSYNC_INSTRUCTION 0
+#endif
+
+/* Define TARGET_TLS_MARKERS if the target assembler does not support
+   arg markers for __tls_get_addr calls.  */
+#ifndef HAVE_AS_TLS_MARKERS
+#undef  TARGET_TLS_MARKERS
+#define TARGET_TLS_MARKERS 0
+#else
+#define TARGET_TLS_MARKERS tls_markers
+#endif
+
+#ifndef TARGET_SECURE_PLT
+#define TARGET_SECURE_PLT 0
+#endif
+
+#ifndef TARGET_CMODEL
+#define TARGET_CMODEL CMODEL_SMALL
+#endif
+
+#define TARGET_32BIT		(! TARGET_64BIT)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#ifndef TARGET_LINK_STACK
+#define TARGET_LINK_STACK 0
+#endif
+
+#ifndef SET_TARGET_LINK_STACK
+#define SET_TARGET_LINK_STACK(X) do { } while (0)
+#endif
+
+/* Return 1 for a symbol ref for a thread-local storage symbol.  */
+#define RS6000_SYMBOL_REF_TLS_P(RTX) \
+  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0)
+
+#ifdef IN_LIBGCC2
+/* For libgcc2 we make sure this is a compile time constant */
+#if defined (__64BIT__) || defined (__powerpc64__) || defined (__ppc64__)
+#undef TARGET_POWERPC64
+#define TARGET_POWERPC64	1
+#else
+#undef TARGET_POWERPC64
+#define TARGET_POWERPC64	0
+#endif
+#else
+    /* The option machinery will define this.  */
+#endif
+
+#define TARGET_DEFAULT (MASK_MULTIPLE | MASK_STRING)
+
+/* FPU operations supported. 
+   Each use of TARGET_SINGLE_FLOAT or TARGET_DOUBLE_FLOAT must 
+   also test TARGET_HARD_FLOAT.  */
+#define TARGET_SINGLE_FLOAT 1
+#define TARGET_DOUBLE_FLOAT 1
+#define TARGET_SINGLE_FPU   0
+#define TARGET_SIMPLE_FPU   0
+#define TARGET_XILINX_FPU   0
+
+/* Recast the processor type to the cpu attribute.  */
+#define rs6000_cpu_attr ((enum attr_cpu)rs6000_cpu)
+
+/* Define generic processor types based upon current deployment.  */
+#define PROCESSOR_COMMON    PROCESSOR_PPC601
+#define PROCESSOR_POWERPC   PROCESSOR_PPC604
+#define PROCESSOR_POWERPC64 PROCESSOR_RS64A
+
+/* Define the default processor.  This is overridden by other tm.h files.  */
+#define PROCESSOR_DEFAULT   PROCESSOR_PPC603
+#define PROCESSOR_DEFAULT64 PROCESSOR_RS64A
+
+/* Specify the dialect of assembler to use.  Only new mnemonics are supported
+   starting with GCC 4.8, i.e. just one dialect, but for backwards
+   compatibility with older inline asm ASSEMBLER_DIALECT needs to be
+   defined.  */
+#define ASSEMBLER_DIALECT 1
+
+/* Debug support */
+#define MASK_DEBUG_STACK	0x01	/* debug stack applications */
+#define	MASK_DEBUG_ARG		0x02	/* debug argument handling */
+#define MASK_DEBUG_REG		0x04	/* debug register handling */
+#define MASK_DEBUG_ADDR		0x08	/* debug memory addressing */
+#define MASK_DEBUG_COST		0x10	/* debug rtx codes */
+#define MASK_DEBUG_TARGET	0x20	/* debug target attribute/pragma */
+#define MASK_DEBUG_BUILTIN	0x40	/* debug builtins */
+#define MASK_DEBUG_ALL		(MASK_DEBUG_STACK \
+				 | MASK_DEBUG_ARG \
+				 | MASK_DEBUG_REG \
+				 | MASK_DEBUG_ADDR \
+				 | MASK_DEBUG_COST \
+				 | MASK_DEBUG_TARGET \
+				 | MASK_DEBUG_BUILTIN)
+
+#define	TARGET_DEBUG_STACK	(rs6000_debug & MASK_DEBUG_STACK)
+#define	TARGET_DEBUG_ARG	(rs6000_debug & MASK_DEBUG_ARG)
+#define TARGET_DEBUG_REG	(rs6000_debug & MASK_DEBUG_REG)
+#define TARGET_DEBUG_ADDR	(rs6000_debug & MASK_DEBUG_ADDR)
+#define TARGET_DEBUG_COST	(rs6000_debug & MASK_DEBUG_COST)
+#define TARGET_DEBUG_TARGET	(rs6000_debug & MASK_DEBUG_TARGET)
+#define TARGET_DEBUG_BUILTIN	(rs6000_debug & MASK_DEBUG_BUILTIN)
+
+/* Describe the vector unit used for arithmetic operations.  */
+extern enum rs6000_vector rs6000_vector_unit[];
+
+#define VECTOR_UNIT_NONE_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_NONE)
+
+#define VECTOR_UNIT_VSX_P(MODE)				\
+  (rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+
+#define VECTOR_UNIT_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
+
+#define VECTOR_UNIT_ALTIVEC_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
+
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
+   altivec (VMX) or VSX vector instructions.  P8 vector support is upwards
+   compatible, so allow it as well, rather than changing all of the uses of the
+   macro.  */
+#define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
+
+/* Describe whether to use VSX loads or Altivec loads.  For now, just use the
+   same unit as the vector unit we are using, but we may want to migrate to
+   using VSX style loads even for types handled by altivec.  */
+extern enum rs6000_vector rs6000_vector_mem[];
+
+#define VECTOR_MEM_NONE_P(MODE)				\
+  (rs6000_vector_mem[(MODE)] == VECTOR_NONE)
+
+#define VECTOR_MEM_VSX_P(MODE)				\
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
+#define VECTOR_MEM_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
+#define VECTOR_MEM_ALTIVEC_P(MODE)			\
+  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
+
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
+#define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
+
+/* Return the alignment of a given vector type, which is set based on the
+   vector unit use.  VSX for instance can load 32 or 64 bit aligned words
+   without problems, while Altivec requires 128-bit aligned vectors.  */
+extern int rs6000_vector_align[];
+
+#define VECTOR_ALIGN(MODE)						\
+  ((rs6000_vector_align[(MODE)] != 0)					\
+   ? rs6000_vector_align[(MODE)]					\
+   : (int)GET_MODE_BITSIZE ((MODE)))
+
+/* Determine the element order to use for vector instructions.  By
+   default we use big-endian element order when targeting big-endian,
+   and little-endian element order when targeting little-endian.  For
+   programs being ported from BE Power to LE Power, it can sometimes
+   be useful to use big-endian element order when targeting little-endian.
+   This is set via -maltivec=be, for example.  */
+#define VECTOR_ELT_ORDER_BIG                                  \
+  (BYTES_BIG_ENDIAN || (rs6000_altivec_element_order == 2))
+
+/* Alignment options for fields in structures for sub-targets following
+   AIX-like ABI.
+   ALIGN_POWER word-aligns FP doubles (default AIX ABI).
+   ALIGN_NATURAL doubleword-aligns FP doubles (align to object size).
+
+   Override the macro definitions when compiling libobjc to avoid undefined
+   reference to rs6000_alignment_flags due to library's use of GCC alignment
+   macros which use the macros below.  */
+
+#ifndef IN_TARGET_LIBS
+#define MASK_ALIGN_POWER   0x00000000
+#define MASK_ALIGN_NATURAL 0x00000001
+#define TARGET_ALIGN_NATURAL (rs6000_alignment_flags & MASK_ALIGN_NATURAL)
+#else
+#define TARGET_ALIGN_NATURAL 0
+#endif
+
+#define TARGET_LONG_DOUBLE_128 (rs6000_long_double_type_size == 128)
+#define TARGET_IEEEQUAD rs6000_ieeequad
+#define TARGET_ALTIVEC_ABI rs6000_altivec_abi
+#define TARGET_LDBRX (TARGET_POPCNTD || rs6000_cpu == PROCESSOR_CELL)
+
+#define TARGET_SPE_ABI 0
+#define TARGET_SPE 0
+#define TARGET_ISEL64 (TARGET_ISEL && TARGET_POWERPC64)
+#define TARGET_FPRS 1
+#define TARGET_E500_SINGLE 0
+#define TARGET_E500_DOUBLE 0
+#define CHECK_E500_OPTIONS do { } while (0)
+
+/* ISA 2.01 allowed FCFID to be done in 32-bit, previously it was 64-bit only.
+   Enable 32-bit fcfid's on any of the switches for newer ISA machines or
+   XILINX.  */
+#define TARGET_FCFID	(TARGET_POWERPC64				\
+			 || TARGET_PPC_GPOPT	/* 970/power4 */	\
+			 || TARGET_POPCNTB	/* ISA 2.02 */		\
+			 || TARGET_CMPB		/* ISA 2.05 */		\
+			 || TARGET_POPCNTD	/* ISA 2.06 */		\
+			 || TARGET_XILINX_FPU)
+
+#define TARGET_FCTIDZ	TARGET_FCFID
+#define TARGET_STFIWX	TARGET_PPC_GFXOPT
+#define TARGET_LFIWAX	TARGET_CMPB
+#define TARGET_LFIWZX	TARGET_POPCNTD
+#define TARGET_FCFIDS	TARGET_POPCNTD
+#define TARGET_FCFIDU	TARGET_POPCNTD
+#define TARGET_FCFIDUS	TARGET_POPCNTD
+#define TARGET_FCTIDUZ	TARGET_POPCNTD
+#define TARGET_FCTIWUZ	TARGET_POPCNTD
+
+#define TARGET_XSCVDPSPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVSPDPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_VADDUQM		(TARGET_P8_VECTOR && TARGET_POWERPC64)
+
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
+   in power7, so conditionalize them on p8 features.  TImode syncs need quad
+   memory support.  */
+#define TARGET_SYNC_HI_QI	(TARGET_QUAD_MEMORY			\
+				 || TARGET_QUAD_MEMORY_ATOMIC		\
+				 || TARGET_DIRECT_MOVE)
+
+#define TARGET_SYNC_TI		TARGET_QUAD_MEMORY_ATOMIC
+
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
+   to allocate the SDmode stack slot to get the value into the proper location
+   in the register.  */
+#define TARGET_NO_SDMODE_STACK	(TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
+
+/* In switching from using target_flags to using rs6000_isa_flags, the options
+   machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>.  For now map
+   OPTION_MASK_<xxx> back into MASK_<xxx>.  */
+#define MASK_ALTIVEC			OPTION_MASK_ALTIVEC
+#define MASK_CMPB			OPTION_MASK_CMPB
+#define MASK_CRYPTO			OPTION_MASK_CRYPTO
+#define MASK_DFP			OPTION_MASK_DFP
+#define MASK_DIRECT_MOVE		OPTION_MASK_DIRECT_MOVE
+#define MASK_DLMZB			OPTION_MASK_DLMZB
+#define MASK_EABI			OPTION_MASK_EABI
+#define MASK_FPRND			OPTION_MASK_FPRND
+#define MASK_P8_FUSION			OPTION_MASK_P8_FUSION
+#define MASK_HARD_FLOAT			OPTION_MASK_HARD_FLOAT
+#define MASK_HTM			OPTION_MASK_HTM
+#define MASK_ISEL			OPTION_MASK_ISEL
+#define MASK_MFCRF			OPTION_MASK_MFCRF
+#define MASK_MFPGPR			OPTION_MASK_MFPGPR
+#define MASK_MULHW			OPTION_MASK_MULHW
+#define MASK_MULTIPLE			OPTION_MASK_MULTIPLE
+#define MASK_NO_UPDATE			OPTION_MASK_NO_UPDATE
+#define MASK_P8_VECTOR			OPTION_MASK_P8_VECTOR
+#define MASK_POPCNTB			OPTION_MASK_POPCNTB
+#define MASK_POPCNTD			OPTION_MASK_POPCNTD
+#define MASK_PPC_GFXOPT			OPTION_MASK_PPC_GFXOPT
+#define MASK_PPC_GPOPT			OPTION_MASK_PPC_GPOPT
+#define MASK_RECIP_PRECISION		OPTION_MASK_RECIP_PRECISION
+#define MASK_SOFT_FLOAT			OPTION_MASK_SOFT_FLOAT
+#define MASK_STRICT_ALIGN		OPTION_MASK_STRICT_ALIGN
+#define MASK_STRING			OPTION_MASK_STRING
+#define MASK_UPDATE			OPTION_MASK_UPDATE
+#define MASK_VSX			OPTION_MASK_VSX
+#define MASK_VSX_TIMODE			OPTION_MASK_VSX_TIMODE
+
+#ifndef IN_LIBGCC2
+#define MASK_POWERPC64			OPTION_MASK_POWERPC64
+#endif
+
+#ifdef TARGET_64BIT
+#define MASK_64BIT			OPTION_MASK_64BIT
+#endif
+
+#ifdef TARGET_RELOCATABLE
+#define MASK_RELOCATABLE		OPTION_MASK_RELOCATABLE
+#endif
+
+#ifdef TARGET_LITTLE_ENDIAN
+#define MASK_LITTLE_ENDIAN		OPTION_MASK_LITTLE_ENDIAN
+#endif
+
+#ifdef TARGET_MINIMAL_TOC
+#define MASK_MINIMAL_TOC		OPTION_MASK_MINIMAL_TOC
+#endif
+
+#ifdef TARGET_REGNAMES
+#define MASK_REGNAMES			OPTION_MASK_REGNAMES
+#endif
+
+#ifdef TARGET_PROTOTYPE
+#define MASK_PROTOTYPE			OPTION_MASK_PROTOTYPE
+#endif
+
+/* For power systems, we want to enable Altivec and VSX builtins even if the
+   user did not use -maltivec or -mvsx to allow the builtins to be used inside
+   of #pragma GCC target or the target attribute to change the code level for a
+   given system.  The SPE and Paired builtins are only enabled if you configure
+   the compiler for those builtins, and those machines don't support altivec or
+   VSX.  */
+
+#define TARGET_EXTRA_BUILTINS	(!TARGET_SPE && !TARGET_PAIRED_FLOAT	 \
+				 && ((TARGET_POWERPC64			 \
+				      || TARGET_PPC_GPOPT /* 970/power4 */ \
+				      || TARGET_POPCNTB	  /* ISA 2.02 */ \
+				      || TARGET_CMPB	  /* ISA 2.05 */ \
+				      || TARGET_POPCNTD	  /* ISA 2.06 */ \
+				      || TARGET_ALTIVEC			 \
+				      || TARGET_VSX)))
+
+/* E500 cores only support plain "sync", not lwsync.  */
+#define TARGET_NO_LWSYNC (rs6000_cpu == PROCESSOR_PPC8540 \
+			  || rs6000_cpu == PROCESSOR_PPC8548)
+
+
+/* Whether SF/DF operations are supported on the E500.  */
+#define TARGET_SF_SPE	(TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT	\
+			 && !TARGET_FPRS)
+
+#define TARGET_DF_SPE	(TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT	\
+			 && !TARGET_FPRS && TARGET_E500_DOUBLE)
+
+/* Whether SF/DF operations are supported by by the normal floating point unit
+   (or the vector/scalar unit).  */
+#define TARGET_SF_FPR	(TARGET_HARD_FLOAT && TARGET_FPRS		\
+			 && TARGET_SINGLE_FLOAT)
+
+#define TARGET_DF_FPR	(TARGET_HARD_FLOAT && TARGET_FPRS		\
+			 && TARGET_DOUBLE_FLOAT)
+
+/* Whether SF/DF operations are supported by any hardware.  */
+#define TARGET_SF_INSN	(TARGET_SF_FPR || TARGET_SF_SPE)
+#define TARGET_DF_INSN	(TARGET_DF_FPR || TARGET_DF_SPE)
+
+/* Which machine supports the various reciprocal estimate instructions.  */
+#define TARGET_FRES	(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
+			 && TARGET_FPRS && TARGET_SINGLE_FLOAT)
+
+#define TARGET_FRE	(TARGET_HARD_FLOAT && TARGET_FPRS \
+			 && TARGET_DOUBLE_FLOAT \
+			 && (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+
+#define TARGET_FRSQRTES	(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+			 && TARGET_FPRS && TARGET_SINGLE_FLOAT)
+
+#define TARGET_FRSQRTE	(TARGET_HARD_FLOAT && TARGET_FPRS \
+			 && TARGET_DOUBLE_FLOAT \
+			 && (TARGET_PPC_GFXOPT || VECTOR_UNIT_VSX_P (DFmode)))
+
+/* Whether the various reciprocal divide/square root estimate instructions
+   exist, and whether we should automatically generate code for the instruction
+   by default.  */
+#define RS6000_RECIP_MASK_HAVE_RE	0x1	/* have RE instruction.  */
+#define RS6000_RECIP_MASK_AUTO_RE	0x2	/* generate RE by default.  */
+#define RS6000_RECIP_MASK_HAVE_RSQRTE	0x4	/* have RSQRTE instruction.  */
+#define RS6000_RECIP_MASK_AUTO_RSQRTE	0x8	/* gen. RSQRTE by default.  */
+
+extern unsigned char rs6000_recip_bits[];
+
+#define RS6000_RECIP_HAVE_RE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_HAVE_RE)
+
+#define RS6000_RECIP_AUTO_RE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RE)
+
+#define RS6000_RECIP_HAVE_RSQRTE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_HAVE_RSQRTE)
+
+#define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
+
+/* The default CPU for TARGET_OPTION_OVERRIDE.  */
+#define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
+
+/* Target pragma.  */
+#define REGISTER_TARGET_PRAGMAS() do {				\
+  c_register_pragma (0, "longcall", rs6000_pragma_longcall);	\
+  targetm.target_option.pragma_parse = rs6000_pragma_target_parse; \
+  targetm.resolve_overloaded_builtin = altivec_resolve_overloaded_builtin; \
+  rs6000_target_modify_macros_ptr = rs6000_target_modify_macros; \
+} while (0)
+
+/* Target #defines.  */
+#define TARGET_CPU_CPP_BUILTINS() \
+  rs6000_cpu_cpp_builtins (pfile)
+
+/* This is used by rs6000_cpu_cpp_builtins to indicate the byte order
+   we're compiling for.  Some configurations may need to override it.  */
+#define RS6000_CPU_CPP_ENDIAN_BUILTINS()	\
+  do						\
+    {						\
+      if (BYTES_BIG_ENDIAN)			\
+	{					\
+	  builtin_define ("__BIG_ENDIAN__");	\
+	  builtin_define ("_BIG_ENDIAN");	\
+	  builtin_assert ("machine=bigendian");	\
+	}					\
+      else					\
+	{					\
+	  builtin_define ("__LITTLE_ENDIAN__");	\
+	  builtin_define ("_LITTLE_ENDIAN");	\
+	  builtin_assert ("machine=littleendian"); \
+	}					\
+    }						\
+  while (0)
+
+/* Target machine storage layout.  */
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    (MODE) = TARGET_32BIT ? SImode : DImode;
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+/* That is true on RS/6000.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on RS/6000.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+
+   For RS/6000 we can decide arbitrarily since there are no machine
+   instructions for them.  Might as well be consistent with bits and bytes.  */
+#define WORDS_BIG_ENDIAN 1
+
+/* This says that for the IBM long double the larger magnitude double
+   comes first.  It's really a two element double array, and arrays
+   don't index differently between little- and big-endian.  */
+#define LONG_DOUBLE_LARGE_FIRST 1
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (! TARGET_POWERPC64 ? 4 : 8)
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD UNITS_PER_WORD
+#else
+#define MIN_UNITS_PER_WORD 4
+#endif
+#define UNITS_PER_FP_WORD 8
+#define UNITS_PER_ALTIVEC_WORD 16
+#define UNITS_PER_VSX_WORD 16
+#define UNITS_PER_SPE_WORD 8
+#define UNITS_PER_PAIRED_WORD 8
+
+/* Type used for ptrdiff_t, as a string used in a declaration.  */
+#define PTRDIFF_TYPE "int"
+
+/* Type used for size_t, as a string used in a declaration.  */
+#define SIZE_TYPE "long unsigned int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#define WCHAR_TYPE "short unsigned int"
+
+/* Width of wchar_t in bits.  */
+#define WCHAR_TYPE_SIZE 16
+
+/* A C expression for the size in bits of the type `short' on the
+   target machine.  If you don't define this, the default is half a
+   word.  (If this would be less than one storage unit, it is
+   rounded up to one unit.)  */
+#define SHORT_TYPE_SIZE 16
+
+/* A C expression for the size in bits of the type `int' on the
+   target machine.  If you don't define this, the default is one
+   word.  */
+#define INT_TYPE_SIZE 32
+
+/* A C expression for the size in bits of the type `long' on the
+   target machine.  If you don't define this, the default is one
+   word.  */
+#define LONG_TYPE_SIZE (TARGET_32BIT ? 32 : 64)
+
+/* A C expression for the size in bits of the type `long long' on the
+   target machine.  If you don't define this, the default is two
+   words.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* A C expression for the size in bits of the type `float' on the
+   target machine.  If you don't define this, the default is one
+   word.  */
+#define FLOAT_TYPE_SIZE 32
+
+/* A C expression for the size in bits of the type `double' on the
+   target machine.  If you don't define this, the default is two
+   words.  */
+#define DOUBLE_TYPE_SIZE 64
+
+/* A C expression for the size in bits of the type `long double' on
+   the target machine.  If you don't define this, the default is two
+   words.  */
+#define LONG_DOUBLE_TYPE_SIZE rs6000_long_double_type_size
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Work around rs6000_long_double_type_size dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+extern unsigned rs6000_pointer_size;
+#define POINTER_SIZE rs6000_pointer_size
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_32BIT ? 32 : 64)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY	\
+  ((TARGET_32BIT && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI && !TARGET_VSX) \
+    ? 64 : 128)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+enum data_align { align_abi, align_opt, align_both };
+
+/* A C expression to compute the alignment for a variables in the
+   local store.  TYPE is the data type, and ALIGN is the alignment
+   that the object would ordinarily have.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)				\
+  rs6000_data_alignment (TYPE, ALIGN, align_both)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)                           \
+  (TREE_CODE (EXP) == STRING_CST	                         \
+   && (STRICT_ALIGNMENT || !optimize_size)                       \
+   && (ALIGN) < BITS_PER_WORD                                    \
+   ? BITS_PER_WORD                                               \
+   : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  rs6000_data_alignment (TYPE, ALIGN, align_opt)
+
+/* Align vectors to 128 bits.  Align SPE vectors and E500 v2 doubles to
+   64 bits.  */
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+  rs6000_data_alignment (TYPE, ALIGN, align_abi)
+
+/* Nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 0
+
+/* Define this macro to be the value 1 if unaligned accesses have a cost
+   many times greater than aligned accesses, for example if they are
+   emulated in a trap handler.  */
+/* Altivec vector memory instructions simply ignore the low bits; SPE vector
+   memory instructions trap on unaligned accesses; VSX memory instructions are
+   aligned to 4 or 8 bytes.  */
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN)				\
+  (STRICT_ALIGNMENT							\
+   || (((MODE) == SFmode || (MODE) == DFmode || (MODE) == TFmode	\
+	|| (MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)	\
+       && (ALIGN) < 32)							\
+   || (VECTOR_MODE_P ((MODE)) && (((int)(ALIGN)) < VECTOR_ALIGN (MODE))))
+
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   RS/6000 has 32 fixed-point registers, 32 floating-point registers,
+   a count register, a link register, and 8 condition register fields,
+   which we view here as separate registers.  AltiVec adds 32 vector
+   registers and a VRsave register.
+
+   In addition, the difference between the frame and argument pointers is
+   a function of the number of registers saved, so we need to have a
+   register for AP that will later be eliminated in favor of SP or FP.
+   This is a normal register, but it is fixed.
+
+   We also create a pseudo register for float/int conversions, that will
+   really represent the memory location used.  It is represented here as
+   a register, in order to work around problems in allocating stack storage
+   in inline functions.
+
+   Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
+   pointer, which is eventually eliminated in favor of SP or FP.
+
+   The 3 HTM registers aren't also included in DWARF_FRAME_REGISTERS.  */
+
+#define FIRST_PSEUDO_REGISTER 117
+
+/* This must be included for pre gcc 3.0 glibc compatibility.  */
+#define PRE_GCC3_DWARF_FRAME_REGISTERS 77
+
+/* Add 32 dwarf columns for synthetic SPE registers.  */
+#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 4) + 32)
+
+/* The SPE has an additional 32 synthetic registers, with DWARF debug
+   info numbering for these registers starting at 1200.  While eh_frame
+   register numbering need not be the same as the debug info numbering,
+   we choose to number these regs for eh_frame at 1200 too.  This allows
+   future versions of the rs6000 backend to add hard registers and
+   continue to use the gcc hard register numbering for eh_frame.  If the
+   extra SPE registers in eh_frame were numbered starting from the
+   current value of FIRST_PSEUDO_REGISTER, then if FIRST_PSEUDO_REGISTER
+   changed we'd need to introduce a mapping in DWARF_FRAME_REGNUM to
+   avoid invalidating older SPE eh_frame info.
+
+   We must map them here to avoid huge unwinder tables mostly consisting
+   of unused space.  */
+#define DWARF_REG_TO_UNWIND_COLUMN(r) \
+  ((r) > 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r))
+
+/* Use standard DWARF numbering for DWARF debugging information.  */
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
+
+/* Use gcc hard register numbering for eh_frame.  */
+#define DWARF_FRAME_REGNUM(REGNO) (REGNO)
+
+/* Map register numbers held in the call frame info that gcc has
+   collected using DWARF_FRAME_REGNUM to those that should be output in
+   .debug_frame and .eh_frame.  We continue to use gcc hard reg numbers
+   for .eh_frame, but use the numbers mandated by the various ABIs for
+   .debug_frame.  rs6000_emit_prologue has translated any combination of
+   CR2, CR3, CR4 saves to a save of CR2.  The actual code emitted saves
+   the whole of CR, so we map CR2_REGNO to the DWARF reg for CR.  */
+#define DWARF2_FRAME_REG_OUT(REGNO, FOR_EH)	\
+  ((FOR_EH) ? (REGNO)				\
+   : (REGNO) == CR2_REGNO ? 64			\
+   : DBX_REGISTER_NUMBER (REGNO))
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On RS/6000, r1 is used for the stack.  On Darwin, r2 is available
+   as a local register; for all other OS's r2 is the TOC pointer.
+
+   cr5 is not supposed to be used.
+
+   On System V implementations, r13 is fixed and not available for use.  */
+
+#define FIXED_REGISTERS  \
+  {0, 1, FIXED_R2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,	   \
+   /* AltiVec registers.  */			   \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1						   \
+   , 1, 1, 1, 1, 1, 1				   \
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS  \
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,	   \
+   /* AltiVec registers.  */			   \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1						   \
+   , 1, 1, 1, 1, 1, 1				   \
+}
+
+/* Like `CALL_USED_REGISTERS' except this macro doesn't require that
+   the entire set of `FIXED_REGISTERS' be included.
+   (`CALL_USED_REGISTERS' must be a superset of `FIXED_REGISTERS').
+   This macro is optional.  If not specified, it defaults to the value
+   of `CALL_USED_REGISTERS'.  */
+
+#define CALL_REALLY_USED_REGISTERS  \
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,	   \
+   /* AltiVec registers.  */			   \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0						   \
+   , 0, 0, 0, 0, 0, 0				   \
+}
+
+#define TOTAL_ALTIVEC_REGS	(LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
+
+#define FIRST_SAVED_ALTIVEC_REGNO (FIRST_ALTIVEC_REGNO+20)
+#define FIRST_SAVED_FP_REGNO	  (14+32)
+#define FIRST_SAVED_GP_REGNO	  (FIXED_R13 ? 14 : 13)
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.
+
+   We allocate in the following order:
+	fp0		(not saved or used for anything)
+	fp13 - fp2	(not saved; incoming fp arg registers)
+	fp1		(not saved; return value)
+	fp31 - fp14	(saved; order given to save least number)
+	cr7, cr6	(not saved or special)
+	cr1		(not saved, but used for FP operations)
+	cr0		(not saved, but used for arithmetic operations)
+	cr4, cr3, cr2	(saved)
+	r9		(not saved; best for TImode)
+	r10, r8-r4	(not saved; highest first for less conflict with params)
+	r3		(not saved; return value register)
+	r11		(not saved; later alloc to help shrink-wrap)
+	r0		(not saved; cannot be base reg)
+	r31 - r13	(saved; order given to save least number)
+	r12		(not saved; if used for DImode or DFmode would use r13)
+	ctr		(not saved; when we have the choice ctr is better)
+	lr		(saved)
+	cr5, r1, r2, ap, ca (fixed)
+	v0 - v1		(not saved or used for anything)
+	v13 - v3	(not saved; incoming vector arg registers)
+	v2		(not saved; incoming vector arg reg; return value)
+	v19 - v14	(not saved or used for anything)
+	v31 - v20	(saved; order given to save least number)
+	vrsave, vscr	(fixed)
+	spe_acc, spefscr (fixed)
+	sfp		(fixed)
+	tfhar		(fixed)
+	tfiar		(fixed)
+	texasr		(fixed)
+*/
+
+#if FIXED_R2 == 1
+#define MAYBE_R2_AVAILABLE
+#define MAYBE_R2_FIXED 2,
+#else
+#define MAYBE_R2_AVAILABLE 2,
+#define MAYBE_R2_FIXED
+#endif
+
+#if FIXED_R13 == 1
+#define EARLY_R12 12,
+#define LATE_R12
+#else
+#define EARLY_R12
+#define LATE_R12 12,
+#endif
+
+#define REG_ALLOC_ORDER						\
+  {32,								\
+   /* move fr13 (ie 45) later, so if we need TFmode, it does */	\
+   /* not use fr14 which is a saved register.  */		\
+   44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45,		\
+   33,								\
+   63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,		\
+   50, 49, 48, 47, 46,						\
+   75, 74, 69, 68, 72, 71, 70,					\
+   MAYBE_R2_AVAILABLE						\
+   9, 10, 8, 7, 6, 5, 4,					\
+   3, EARLY_R12 11, 0,						\
+   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19,		\
+   18, 17, 16, 15, 14, 13, LATE_R12				\
+   66, 65,							\
+   73, 1, MAYBE_R2_FIXED 67, 76,				\
+   /* AltiVec registers.  */					\
+   77, 78,							\
+   90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80,			\
+   79,								\
+   96, 95, 94, 93, 92, 91,					\
+   108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97,	\
+   109, 110,							\
+   111, 112, 113, 114, 115, 116					\
+}
+
+/* True if register is floating-point.  */
+#define FP_REGNO_P(N) ((N) >= 32 && (N) <= 63)
+
+/* True if register is a condition register.  */
+#define CR_REGNO_P(N) ((N) >= CR0_REGNO && (N) <= CR7_REGNO)
+
+/* True if register is a condition register, but not cr0.  */
+#define CR_REGNO_NOT_CR0_P(N) ((N) >= CR1_REGNO && (N) <= CR7_REGNO)
+
+/* True if register is an integer register.  */
+#define INT_REGNO_P(N) \
+  ((N) <= 31 || (N) == ARG_POINTER_REGNUM || (N) == FRAME_POINTER_REGNUM)
+
+/* SPE SIMD registers are just the GPRs.  */
+#define SPE_SIMD_REGNO_P(N) ((N) <= 31)
+
+/* PAIRED SIMD registers are just the FPRs.  */
+#define PAIRED_SIMD_REGNO_P(N) ((N) >= 32 && (N) <= 63)
+
+/* True if register is the CA register.  */
+#define CA_REGNO_P(N) ((N) == CA_REGNO)
+
+/* True if register is an AltiVec register.  */
+#define ALTIVEC_REGNO_P(N) ((N) >= FIRST_ALTIVEC_REGNO && (N) <= LAST_ALTIVEC_REGNO)
+
+/* True if register is a VSX register.  */
+#define VSX_REGNO_P(N) (FP_REGNO_P (N) || ALTIVEC_REGNO_P (N))
+
+/* Alternate name for any vector register supporting floating point, no matter
+   which instruction set(s) are available.  */
+#define VFLOAT_REGNO_P(N) \
+  (ALTIVEC_REGNO_P (N) || (TARGET_VSX && FP_REGNO_P (N)))
+
+/* Alternate name for any vector register supporting integer, no matter which
+   instruction set(s) are available.  */
+#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
+
+/* Alternate name for any vector register supporting logical operations, no
+   matter which instruction set(s) are available.  Allow GPRs as well as the
+   vector registers.  */
+#define VLOGICAL_REGNO_P(N)						\
+  (INT_REGNO_P (N) || ALTIVEC_REGNO_P (N)				\
+   || (TARGET_VSX && FP_REGNO_P (N)))					\
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
+
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate
+   enough space to account for vectors in FP regs.  However, TFmode/TDmode
+   should not use VSX instructions to do a caller save. */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)			\
+  (TARGET_VSX								\
+   && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE))		\
+   && FP_REGNO_P (REGNO)						\
+   ? V2DFmode								\
+   : ((MODE) == TFmode && FP_REGNO_P (REGNO))				\
+   ? DFmode								\
+   : ((MODE) == TDmode && FP_REGNO_P (REGNO))				\
+   ? DImode								\
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
+  (((TARGET_32BIT && TARGET_POWERPC64					\
+     && (GET_MODE_SIZE (MODE) > 4)					\
+     && INT_REGNO_P (REGNO)) ? 1 : 0)					\
+   || (TARGET_VSX && FP_REGNO_P (REGNO)					\
+       && GET_MODE_SIZE (MODE) > 8 && ((MODE) != TDmode) 		\
+       && ((MODE) != TFmode)))
+
+#define VSX_VECTOR_MODE(MODE)		\
+	 ((MODE) == V4SFmode		\
+	  || (MODE) == V2DFmode)	\
+
+#define ALTIVEC_VECTOR_MODE(MODE)	\
+	 ((MODE) == V16QImode		\
+	  || (MODE) == V8HImode		\
+	  || (MODE) == V4SFmode		\
+	  || (MODE) == V4SImode)
+
+#define ALTIVEC_OR_VSX_VECTOR_MODE(MODE)				\
+  (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)			\
+   || (MODE) == V2DImode || (MODE) == V1TImode)
+
+#define SPE_VECTOR_MODE(MODE)		\
+	((MODE) == V4HImode          	\
+         || (MODE) == V2SFmode          \
+         || (MODE) == V1DImode          \
+         || (MODE) == V2SImode)
+
+#define PAIRED_VECTOR_MODE(MODE)        \
+         ((MODE) == V2SFmode)            
+
+/* Value is TRUE if hard register REGNO can hold a value of
+   machine-mode MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  rs6000_hard_regno_mode_ok_p[(int)(MODE)][REGNO]
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.
+
+   PTImode cannot tie with other modes because PTImode is restricted to even
+   GPR registers, and TImode can go in any GPR as well as VSX registers (PR
+   57744).  */
+#define MODES_TIEABLE_P(MODE1, MODE2)		\
+  ((MODE1) == PTImode				\
+   ? (MODE2) == PTImode				\
+   : (MODE2) == PTImode				\
+   ? 0						\
+   : SCALAR_FLOAT_MODE_P (MODE1)		\
+   ? SCALAR_FLOAT_MODE_P (MODE2)		\
+   : SCALAR_FLOAT_MODE_P (MODE2)		\
+   ? 0						\
+   : GET_MODE_CLASS (MODE1) == MODE_CC		\
+   ? GET_MODE_CLASS (MODE2) == MODE_CC		\
+   : GET_MODE_CLASS (MODE2) == MODE_CC		\
+   ? 0						\
+   : SPE_VECTOR_MODE (MODE1)			\
+   ? SPE_VECTOR_MODE (MODE2)			\
+   : SPE_VECTOR_MODE (MODE2)			\
+   ? 0						\
+   : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
+   ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
+   : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
+   ? 0						\
+   : 1)
+
+/* Post-reload, we can't use any new AltiVec registers, as we already
+   emitted the vrsave mask.  */
+
+#define HARD_REGNO_RENAME_OK(SRC, DST) \
+  (! ALTIVEC_REGNO_P (DST) || df_regs_ever_live_p (DST))
+
+/* Specify the cost of a branch insn; roughly the number of extra insns that
+   should be added to avoid a branch.
+
+   Set this to 3 on the RS/6000 since that is roughly the average cost of an
+   unscheduled conditional branch.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 3
+
+/* Override BRANCH_COST heuristic which empirically produces worse
+   performance for removing short circuiting from the logical ops.  */
+
+#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+
+/* A fixed register used at epilogue generation to address SPE registers
+   with negative offsets.  The 64-bit load/store instructions on the SPE
+   only take positive offsets (and small ones at that), so we need to
+   reserve a register for consing up negative offsets.  */
+
+#define FIXED_SCRATCH 0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* RS/6000 pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 1
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 31
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 113
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 67
+
+/* Place to put static chain when calling a function that requires it.  */
+#define STATIC_CHAIN_REGNUM 11
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The RS/6000 has three types of registers, fixed-point, floating-point, and
+   condition registers, plus three special registers, CTR, and the link
+   register.  AltiVec adds a vector register class.  VSX registers overlap the
+   FPR registers and the Altivec registers.
+
+   However, r0 is special in that it cannot be used as a base register.
+   So make a class for registers valid as base registers.
+
+   Also, cr0 is the only condition code register that can be used in
+   arithmetic insns, so make a separate class for it.  */
+
+enum reg_class
+{
+  NO_REGS,
+  BASE_REGS,
+  GENERAL_REGS,
+  FLOAT_REGS,
+  ALTIVEC_REGS,
+  VSX_REGS,
+  VRSAVE_REGS,
+  VSCR_REGS,
+  SPE_ACC_REGS,
+  SPEFSCR_REGS,
+  SPR_REGS,
+  NON_SPECIAL_REGS,
+  LINK_REGS,
+  CTR_REGS,
+  LINK_OR_CTR_REGS,
+  SPECIAL_REGS,
+  SPEC_OR_GEN_REGS,
+  CR0_REGS,
+  CR_REGS,
+  NON_FLOAT_REGS,
+  CA_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "BASE_REGS",								\
+  "GENERAL_REGS",							\
+  "FLOAT_REGS",								\
+  "ALTIVEC_REGS",							\
+  "VSX_REGS",								\
+  "VRSAVE_REGS",							\
+  "VSCR_REGS",								\
+  "SPE_ACC_REGS",                                                       \
+  "SPEFSCR_REGS",                                                       \
+  "SPR_REGS",								\
+  "NON_SPECIAL_REGS",							\
+  "LINK_REGS",								\
+  "CTR_REGS",								\
+  "LINK_OR_CTR_REGS",							\
+  "SPECIAL_REGS",							\
+  "SPEC_OR_GEN_REGS",							\
+  "CR0_REGS",								\
+  "CR_REGS",								\
+  "NON_FLOAT_REGS",							\
+  "CA_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS						     \
+{									     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */	     \
+  { 0xfffffffe, 0x00000000, 0x00000008, 0x00020000 }, /* BASE_REGS */	     \
+  { 0xffffffff, 0x00000000, 0x00000008, 0x00020000 }, /* GENERAL_REGS */     \
+  { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 }, /* FLOAT_REGS */       \
+  { 0x00000000, 0x00000000, 0xffffe000, 0x00001fff }, /* ALTIVEC_REGS */     \
+  { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff }, /* VSX_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, /* VRSAVE_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, /* SPEFSCR_REGS */     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00040000 }, /* SPR_REGS */     \
+  { 0xffffffff, 0xffffffff, 0x00000008, 0x00020000 }, /* NON_SPECIAL_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, /* LINK_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000004, 0x00000000 }, /* CTR_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000006, 0x00000000 }, /* LINK_OR_CTR_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000006, 0x00002000 }, /* SPECIAL_REGS */     \
+  { 0xffffffff, 0x00000000, 0x0000000e, 0x00022000 }, /* SPEC_OR_GEN_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000010, 0x00000000 }, /* CR0_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000ff0, 0x00000000 }, /* CR_REGS */	     \
+  { 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000 }, /* NON_FLOAT_REGS */   \
+  { 0x00000000, 0x00000000, 0x00001000, 0x00000000 }, /* CA_REGS */	     \
+  { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0007ffff }  /* ALL_REGS */	     \
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
+
+#if ENABLE_CHECKING
+#define REGNO_REG_CLASS(REGNO) 						\
+  (gcc_assert (IN_RANGE ((REGNO), 0, FIRST_PSEUDO_REGISTER-1)),		\
+   rs6000_regno_regclass[(REGNO)])
+
+#else
+#define REGNO_REG_CLASS(REGNO) rs6000_regno_regclass[(REGNO)]
+#endif
+
+/* Register classes for various constraints that are based on the target
+   switches.  */
+enum r6000_reg_class_enum {
+  RS6000_CONSTRAINT_d,		/* fpr registers for double values */
+  RS6000_CONSTRAINT_f,		/* fpr registers for single values */
+  RS6000_CONSTRAINT_v,		/* Altivec registers */
+  RS6000_CONSTRAINT_wa,		/* Any VSX register */
+  RS6000_CONSTRAINT_wd,		/* VSX register for V2DF */
+  RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
+  RS6000_CONSTRAINT_wg,		/* FPR register for -mmfpgpr */
+  RS6000_CONSTRAINT_wl,		/* FPR register for LFIWAX */
+  RS6000_CONSTRAINT_wm,		/* VSX register for direct move */
+  RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
+  RS6000_CONSTRAINT_ws,		/* VSX register for DF */
+  RS6000_CONSTRAINT_wt,		/* VSX register for TImode */
+  RS6000_CONSTRAINT_wu,		/* Altivec register for float load/stores.  */
+  RS6000_CONSTRAINT_wv,		/* Altivec register for double load/stores.  */
+  RS6000_CONSTRAINT_ww,		/* FP or VSX register for vsx float ops.  */
+  RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
+  RS6000_CONSTRAINT_wy,		/* VSX register for SF */
+  RS6000_CONSTRAINT_wz,		/* FPR register for LFIWZX */
+  RS6000_CONSTRAINT_MAX
+};
+
+extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS BASE_REGS
+
+/* Return whether a given register class can hold VSX objects.  */
+#define VSX_REG_CLASS_P(CLASS)			\
+  ((CLASS) == VSX_REGS || (CLASS) == FLOAT_REGS || (CLASS) == ALTIVEC_REGS)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.
+
+   On the RS/6000, we have to return NO_REGS when we want to reload a
+   floating-point CONST_DOUBLE to force it to be copied to memory.
+
+   We also don't want to reload integer values into floating-point
+   registers if we can at all help it.  In fact, this can
+   cause reload to die, if it tries to generate a reload of CTR
+   into a FP register and discovers it doesn't have the memory location
+   required.
+
+   ??? Would it be a good idea to have reload do the converse, that is
+   try to reload floating modes into FP registers if possible?
+ */
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)			\
+  rs6000_preferred_reload_class_ptr (X, CLASS)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+
+#define SECONDARY_RELOAD_CLASS(CLASS,MODE,IN) \
+  rs6000_secondary_reload_class_ptr (CLASS, MODE, IN)
+
+/* If we are copying between FP or AltiVec registers and anything
+   else, we need a memory location.  The exception is when we are
+   targeting ppc64 and the move to/from fpr to gpr instructions
+   are available.*/
+
+#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE)			\
+  rs6000_secondary_memory_needed_ptr (CLASS1, CLASS2, MODE)
+
+/* For cpus that cannot load/store SDmode values from the 64-bit
+   FP registers without using a full 64-bit load/store, we need
+   to allocate a full 64-bit stack slot for them.  */
+
+#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
+  rs6000_secondary_memory_needed_rtx (MODE)
+
+/* Specify the mode to be used for memory when a secondary memory
+   location is needed.  For cpus that cannot load/store SDmode values
+   from the 64-bit FP registers without using a full 64-bit
+   load/store, we need a wider mode.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+  rs6000_secondary_memory_needed_mode (MODE)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+
+   On RS/6000, this is the size of MODE in words, except in the FP regs, where
+   a single reg is enough for two words, unless we have VSX, where the FP
+   registers can hold 128 bits.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) rs6000_class_max_nregs[(MODE)][(CLASS)]
+
+/* Return nonzero if for CLASS a mode change from FROM to TO is invalid.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)			\
+  rs6000_cannot_change_mode_class_ptr (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.  */
+#define DWARF_CIE_DATA_ALIGNMENT (-((int) (TARGET_32BIT ? 4 : 8)))
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.
+
+   On the RS/6000, we grow upwards, from the area after the outgoing
+   arguments.  */
+#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0			\
+			      || (flag_sanitize & SANITIZE_ADDRESS) != 0)
+
+/* Size of the fixed area on the stack */
+#define RS6000_SAVE_AREA \
+  ((DEFAULT_ABI == ABI_V4 ? 8 : DEFAULT_ABI == ABI_ELFv2 ? 16 : 24)	\
+   << (TARGET_64BIT ? 1 : 0))
+
+/* Stack offset for toc save slot.  */
+#define RS6000_TOC_SAVE_SLOT \
+  ((DEFAULT_ABI == ABI_ELFv2 ? 12 : 20) << (TARGET_64BIT ? 1 : 0))
+
+/* Align an address */
+#define RS6000_ALIGN(n,a) (((n) + (a) - 1) & ~((a) - 1))
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.
+
+   On the RS/6000, the frame pointer is the same as the stack pointer,
+   except for dynamic allocations.  So we start after the fixed area and
+   outgoing parameter area.  */
+
+#define STARTING_FRAME_OFFSET						\
+  (FRAME_GROWS_DOWNWARD							\
+   ? 0									\
+   : (RS6000_ALIGN (crtl->outgoing_args_size,				\
+		    (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
+      + RS6000_SAVE_AREA))
+
+/* Offset from the stack pointer register to an item dynamically
+   allocated on the stack, e.g., by `alloca'.
+
+   The default value for this macro is `STACK_POINTER_OFFSET' plus the
+   length of the outgoing arguments.  The default is correct for most
+   machines.  See `function.c' for details.  */
+#define STACK_DYNAMIC_OFFSET(FUNDECL)					\
+  (RS6000_ALIGN (crtl->outgoing_args_size,				\
+		 (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
+   + (STACK_POINTER_OFFSET))
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On RS/6000, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Offset of first parameter from the argument pointer register value.
+   On the RS/6000, we define the argument pointer to the start of the fixed
+   area.  */
+#define FIRST_PARM_OFFSET(FNDECL) RS6000_SAVE_AREA
+
+/* Offset from the argument pointer register value to the top of
+   stack.  This is different from FIRST_PARM_OFFSET because of the
+   register save area.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* Define this if stack space is still allocated for a parameter passed
+   in a register.  The value is the number of bytes allocated to this
+   area.  */
+#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
+
+/* Define this if the above stack space is to be considered part of the
+   space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+/* This is the difference between the logical top of stack and the actual sp.
+
+   For the RS/6000, sp points past the fixed area.  */
+#define STACK_POINTER_OFFSET RS6000_SAVE_AREA
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) rs6000_libcall_value ((MODE))
+
+/* DRAFT_V4_STRUCT_RET defaults off.  */
+#define DRAFT_V4_STRUCT_RET 0
+
+/* Let TARGET_RETURN_IN_MEMORY control what happens.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Mode of stack savearea.
+   FUNCTION is VOIDmode because calling convention maintains SP.
+   BLOCK needs Pmode for SP.
+   NONLOCAL needs twice Pmode to maintain both backchain and SP.  */
+#define STACK_SAVEAREA_MODE(LEVEL)	\
+  (LEVEL == SAVE_FUNCTION ? VOIDmode	\
+  : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : PTImode) : Pmode)
+
+/* Minimum and maximum general purpose registers used to hold arguments.  */
+#define GP_ARG_MIN_REG 3
+#define GP_ARG_MAX_REG 10
+#define GP_ARG_NUM_REG (GP_ARG_MAX_REG - GP_ARG_MIN_REG + 1)
+
+/* Minimum and maximum floating point registers used to hold arguments.  */
+#define FP_ARG_MIN_REG 33
+#define	FP_ARG_AIX_MAX_REG 45
+#define	FP_ARG_V4_MAX_REG  40
+#define	FP_ARG_MAX_REG (DEFAULT_ABI == ABI_V4				\
+			? FP_ARG_V4_MAX_REG : FP_ARG_AIX_MAX_REG)
+#define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1)
+
+/* Minimum and maximum AltiVec registers used to hold arguments.  */
+#define ALTIVEC_ARG_MIN_REG (FIRST_ALTIVEC_REGNO + 2)
+#define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
+#define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
+
+/* Maximum number of registers per ELFv2 homogeneous aggregate argument.  */
+#define AGGR_ARG_NUM_REG 8
+
+/* Return registers */
+#define GP_ARG_RETURN GP_ARG_MIN_REG
+#define FP_ARG_RETURN FP_ARG_MIN_REG
+#define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2)
+#define FP_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? FP_ARG_RETURN	\
+			   : (FP_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
+#define ALTIVEC_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? ALTIVEC_ARG_RETURN \
+			        : (ALTIVEC_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
+
+/* Flags for the call/call_value rtl operations set up by function_arg */
+#define CALL_NORMAL		0x00000000	/* no special processing */
+/* Bits in 0x00000001 are unused.  */
+#define CALL_V4_CLEAR_FP_ARGS	0x00000002	/* V.4, no FP args passed */
+#define CALL_V4_SET_FP_ARGS	0x00000004	/* V.4, FP args were passed */
+#define CALL_LONG		0x00000008	/* always call indirect */
+#define CALL_LIBCALL		0x00000010	/* libcall */
+
+/* We don't have prologue and epilogue functions to save/restore
+   everything for most ABIs.  */
+#define WORLD_SAVE_P(INFO) 0
+
+/* 1 if N is a possible register number for a function value
+   as seen by the caller.
+
+   On RS/6000, this is r3, fp1, and v2 (for AltiVec).  */
+#define FUNCTION_VALUE_REGNO_P(N)					\
+  ((N) == GP_ARG_RETURN							\
+   || ((N) >= FP_ARG_RETURN && (N) <= FP_ARG_MAX_RETURN			\
+       && TARGET_HARD_FLOAT && TARGET_FPRS)				\
+   || ((N) >= ALTIVEC_ARG_RETURN && (N) <= ALTIVEC_ARG_MAX_RETURN	\
+       && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
+
+/* 1 if N is a possible register number for function argument passing.
+   On RS/6000, these are r3-r10 and fp1-fp13.
+   On AltiVec, v2 - v13 are used for passing vectors.  */
+#define FUNCTION_ARG_REGNO_P(N)						\
+  ((unsigned) (N) - GP_ARG_MIN_REG < GP_ARG_NUM_REG			\
+   || ((unsigned) (N) - ALTIVEC_ARG_MIN_REG < ALTIVEC_ARG_NUM_REG	\
+       && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)				\
+   || ((unsigned) (N) - FP_ARG_MIN_REG < FP_ARG_NUM_REG			\
+       && TARGET_HARD_FLOAT && TARGET_FPRS))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the RS/6000, this is a structure.  The first element is the number of
+   total argument words, the second is used to store the next
+   floating-point register number, and the third says how many more args we
+   have prototype types for.
+
+   For ABI_V4, we treat these slightly differently -- `sysv_gregno' is
+   the next available GP register, `fregno' is the next available FP
+   register, and `words' is the number of words used on the stack.
+
+   The varargs/stdarg support requires that this structure's size
+   be a multiple of sizeof(int).  */
+
+typedef struct rs6000_args
+{
+  int words;			/* # words used for passing GP registers */
+  int fregno;			/* next available FP register */
+  int vregno;			/* next available AltiVec register */
+  int nargs_prototype;		/* # args left in the current prototype */
+  int prototype;		/* Whether a prototype was defined */
+  int stdarg;			/* Whether function is a stdarg function.  */
+  int call_cookie;		/* Do special things for this call */
+  int sysv_gregno;		/* next available GP register */
+  int intoffset;		/* running offset in struct (darwin64) */
+  int use_stack;		/* any part of struct on stack (darwin64) */
+  int floats_in_gpr;		/* count of SFmode floats taking up
+				   GPR space (darwin64) */
+  int named;			/* false for varargs params */
+  int escapes;			/* if function visible outside tu */
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME, FALSE, FALSE, \
+			N_NAMED_ARGS, FNDECL, VOIDmode)
+
+/* Similar, but when scanning the definition of a procedure.  We always
+   set NARGS_PROTOTYPE large so we never return an EXPR_LIST.  */
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME, TRUE, FALSE, \
+			1000, current_function_decl, VOIDmode)
+
+/* Like INIT_CUMULATIVE_ARGS' but only used for outgoing libcalls.  */
+
+#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \
+  init_cumulative_args (&CUM, NULL_TREE, LIBNAME, FALSE, TRUE, \
+			0, NULL_TREE, MODE)
+
+/* If defined, a C expression which determines whether, and in which
+   direction, to pad out an argument with extra space.  The value
+   should be of type `enum direction': either `upward' to pad above
+   the argument, `downward' to pad below, or `none' to inhibit
+   padding.  */
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) function_arg_padding (MODE, TYPE)
+
+#define PAD_VARARGS_DOWN \
+   (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+  output_function_profiler ((FILE), (LABELNO));
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter. No definition is equivalent to
+   always zero.
+
+   On the RS/6000, this is nonzero because we can restore the stack from
+   its backpointer, which we maintain.  */
+#define EXIT_IGNORE_STACK	1
+
+/* Define this macro as a C expression that is nonzero for registers
+   that are used by the epilogue or the return' pattern.  The stack
+   and frame pointer registers are already be assumed to be used as
+   needed.  */
+
+#define	EPILOGUE_USES(REGNO)					\
+  ((reload_completed && (REGNO) == LR_REGNO)			\
+   || (TARGET_ALTIVEC && (REGNO) == VRSAVE_REGNO)		\
+   || (crtl->calls_eh_return					\
+       && TARGET_AIX						\
+       && (REGNO) == 2))
+
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE rs6000_trampoline_size ()
+
+/* Definitions for __builtin_return_address and __builtin_frame_address.
+   __builtin_return_address (0) should give link register (65), enable
+   this.  */
+/* This should be uncommented, so that the link register is used, but
+   currently this would result in unmatched insns and spilling fixed
+   registers so we'll leave it for another day.  When these problems are
+   taken care of one additional fetch will be necessary in RETURN_ADDR_RTX.
+   (mrs) */
+/* #define RETURN_ADDR_IN_PREVIOUS_FRAME */
+
+/* Number of bytes into the frame return addresses can be found.  See
+   rs6000_stack_info in rs6000.c for more information on how the different
+   abi's store the return address.  */
+#define RETURN_ADDRESS_OFFSET \
+  ((DEFAULT_ABI == ABI_V4 ? 4 : 8) << (TARGET_64BIT ? 1 : 0))
+
+/* The current return address is in link register (65).  The return address
+   of anything farther back is accessed normally at an offset of 8 from the
+   frame pointer.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)                 \
+  (rs6000_return_addr (COUNT, FRAME))
+
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the RS/6000.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.
+
+   In addition, we use the elimination mechanism to see if r30 is needed
+   Initially we assume that it isn't.  If it is, we spill it.  This is done
+   by making it an eliminable register.  We replace it with itself so that
+   if it isn't needed, then existing uses won't be modified.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+#define ELIMINABLE_REGS					\
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { RS6000_PIC_OFFSET_TABLE_REGNUM, RS6000_PIC_OFFSET_TABLE_REGNUM } }
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = rs6000_initial_elimination_offset(FROM, TO))
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_PRE_MODIFY_REG 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)				\
+((REGNO) < FIRST_PSEUDO_REGISTER				\
+ ? (REGNO) <= 31 || (REGNO) == 67				\
+   || (REGNO) == FRAME_POINTER_REGNUM				\
+ : (reg_renumber[REGNO] >= 0					\
+    && (reg_renumber[REGNO] <= 31 || reg_renumber[REGNO] == 67	\
+	|| reg_renumber[REGNO] == FRAME_POINTER_REGNUM)))
+
+#define REGNO_OK_FOR_BASE_P(REGNO)				\
+((REGNO) < FIRST_PSEUDO_REGISTER				\
+ ? ((REGNO) > 0 && (REGNO) <= 31) || (REGNO) == 67		\
+   || (REGNO) == FRAME_POINTER_REGNUM				\
+ : (reg_renumber[REGNO] > 0					\
+    && (reg_renumber[REGNO] <= 31 || reg_renumber[REGNO] == 67	\
+	|| reg_renumber[REGNO] == FRAME_POINTER_REGNUM)))
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg in the non-strict case.  */
+#define INT_REG_OK_FOR_INDEX_P(X, STRICT)			\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)		\
+   || REGNO_OK_FOR_INDEX_P (REGNO (X)))
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg in the non-strict case.  */
+#define INT_REG_OK_FOR_BASE_P(X, STRICT)			\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)		\
+   || REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X)   \
+  (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+   || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST		\
+   || GET_CODE (X) == HIGH)
+
+#define EASY_VECTOR_15(n) ((n) >= -16 && (n) <= 15)
+#define EASY_VECTOR_15_ADD_SELF(n) (!EASY_VECTOR_15((n))	\
+				    && EASY_VECTOR_15((n) >> 1) \
+				    && ((n) & 1) == 0)
+
+#define EASY_VECTOR_MSB(n,mode)						\
+  (((unsigned HOST_WIDE_INT)n) ==					\
+   ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1))
+
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.
+
+   Implemented on rs6000 by rs6000_legitimize_reload_address.
+   Note that (X) is evaluated twice; this is safe in current usage.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	     \
+do {									     \
+  int win;								     \
+  (X) = rs6000_legitimize_reload_address_ptr ((X), (MODE), (OPNUM),	     \
+			(int)(TYPE), (IND_LEVELS), &win);		     \
+  if ( win )								     \
+    goto WIN;								     \
+} while (0)
+
+#define FIND_BASE_TERM rs6000_find_base_term
+
+/* The register number of the register used to address a table of
+   static data addresses in memory.  In some cases this register is
+   defined by a processor's "application binary interface" (ABI).
+   When this macro is defined, RTL is generated for this register
+   once, as with the stack pointer and frame pointer registers.  If
+   this macro is not defined, it is up to the machine-dependent files
+   to allocate such a register (if necessary).  */
+
+#define RS6000_PIC_OFFSET_TABLE_REGNUM 30
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? RS6000_PIC_OFFSET_TABLE_REGNUM : INVALID_REGNUM)
+
+#define TOC_REGISTER (TARGET_MINIMAL_TOC ? RS6000_PIC_OFFSET_TABLE_REGNUM : 2)
+
+/* Define this macro if the register defined by
+   `PIC_OFFSET_TABLE_REGNUM' is clobbered by calls.  Do not define
+   this macro if `PIC_OFFSET_TABLE_REGNUM' is not defined.  */
+
+/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent
+   code.  You can assume that X satisfies `CONSTANT_P', so you need
+   not check this.  You can also assume FLAG_PIC is true, so you need
+   not check it either.  You need not define this macro if all
+   constants (including `SYMBOL_REF') can be immediate operands when
+   generating position independent code.  */
+
+/* #define LEGITIMATE_PIC_OPERAND_P (X) */
+
+/* Define this if some processing needs to be done immediately before
+   emitting code for an insn.  */
+
+#define FINAL_PRESCAN_INSN(INSN,OPERANDS,NOPERANDS) \
+  rs6000_final_prescan_insn (INSN, OPERANDS, NOPERANDS)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  */
+
+/* Allow pairs of registers to be used, which is the intent of the default.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_POWERPC64 ? TImode : DImode)
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX (! TARGET_POWERPC64 ? 4 : 8)
+#define MAX_MOVE_MAX 8
+
+/* Nonzero if access to memory by bytes is no faster than for words.
+   Also nonzero if doing byte operations (specifically shifts) in registers
+   is undesirable.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+
+/* The CTZ patterns return -1 for input of zero.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 1)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+extern unsigned rs6000_pmode;
+#define Pmode ((enum machine_mode)rs6000_pmode)
+
+/* Supply definition of STACK_SIZE_MODE for allocate_dynamic_stack_space.  */
+#define STACK_SIZE_MODE (TARGET_32BIT ? SImode : DImode)
+
+/* Mode of a function address in a call instruction (for indexing purposes).
+   Doesn't matter on RS/6000.  */
+#define FUNCTION_MODE SImode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+#define NO_FUNCTION_CSE
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.
+
+   The sle and sre instructions which allow SHIFT_COUNT_TRUNCATED
+   have been dropped from the PowerPC architecture.  */
+#define SHIFT_COUNT_TRUNCATED 0
+
+/* Adjust the length of an INSN.  LENGTH is the currently-computed length and
+   should be adjusted to reflect any required changes.  This macro is used when
+   there is some systematic length adjustment required that would be difficult
+   to express in the length attribute.  */
+
+/* #define ADJUST_INSN_LENGTH(X,LENGTH) */
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a
+   COMPARE, return the mode to be used for the comparison.  For
+   floating-point, CCFPmode should be used.  CCUNSmode should be used
+   for unsigned comparisons.  CCEQmode should be used when we are
+   doing an inequality comparison on the result of a
+   comparison.  CCmode should be used in all other cases.  */
+
+#define SELECT_CC_MODE(OP,X,Y) \
+  (SCALAR_FLOAT_MODE_P (GET_MODE (X)) ? CCFPmode	\
+   : (OP) == GTU || (OP) == LTU || (OP) == GEU || (OP) == LEU ? CCUNSmode \
+   : (((OP) == EQ || (OP) == NE) && COMPARISON_P (X)			  \
+      ? CCEQmode : CCmode))
+
+/* Can the condition code MODE be safely reversed?  This is safe in
+   all cases on this port, because at present it doesn't use the
+   trapping FP comparisons (fcmpo).  */
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* Given a condition code and a mode, return the inverse condition.  */
+#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
+
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+#define ASM_COMMENT_START " #"
+
+/* Flag to say the TOC is initialized */
+extern int toc_initialized;
+
+/* Macro to output a special constant pool entry.  Go to WIN if we output
+   it.  Otherwise, it is written the usual way.
+
+   On the RS/6000, toc entries are handled this way.  */
+
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, WIN) \
+{ if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (X, MODE))			  \
+    {									  \
+      output_toc (FILE, X, LABELNO, MODE);				  \
+      goto WIN;								  \
+    }									  \
+}
+
+#ifdef HAVE_GAS_WEAK
+#define RS6000_WEAK 1
+#else
+#define RS6000_WEAK 0
+#endif
+
+#if RS6000_WEAK
+/* Used in lieu of ASM_WEAKEN_LABEL.  */
+#define	ASM_WEAKEN_DECL(FILE, DECL, NAME, VAL)			 	\
+  do									\
+    {									\
+      fputs ("\t.weak\t", (FILE));					\
+      RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 				\
+      if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL			\
+	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	{								\
+	  if (TARGET_XCOFF)						\
+	    fputs ("[DS]", (FILE));					\
+	  fputs ("\n\t.weak\t.", (FILE));				\
+	  RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 			\
+	}								\
+      fputc ('\n', (FILE));						\
+      if (VAL)								\
+	{								\
+	  ASM_OUTPUT_DEF ((FILE), (NAME), (VAL));			\
+	  if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL		\
+	      && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	    {								\
+	      fputs ("\t.set\t.", (FILE));				\
+	      RS6000_OUTPUT_BASENAME ((FILE), (NAME));			\
+	      fputs (",.", (FILE));					\
+	      RS6000_OUTPUT_BASENAME ((FILE), (VAL));			\
+	      fputc ('\n', (FILE));					\
+	    }								\
+	}								\
+    }									\
+  while (0)
+#endif
+
+#if HAVE_GAS_WEAKREF
+#define ASM_OUTPUT_WEAKREF(FILE, DECL, NAME, VALUE)			\
+  do									\
+    {									\
+      fputs ("\t.weakref\t", (FILE));					\
+      RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 				\
+      fputs (", ", (FILE));						\
+      RS6000_OUTPUT_BASENAME ((FILE), (VALUE));				\
+      if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL			\
+	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	{								\
+	  fputs ("\n\t.weakref\t.", (FILE));				\
+	  RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 			\
+	  fputs (", .", (FILE));					\
+	  RS6000_OUTPUT_BASENAME ((FILE), (VALUE));			\
+	}								\
+      fputc ('\n', (FILE));						\
+    } while (0)
+#endif
+
+/* This implements the `alias' attribute.  */
+#undef	ASM_OUTPUT_DEF_FROM_DECLS
+#define	ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)			\
+  do									\
+    {									\
+      const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		\
+      const char *name = IDENTIFIER_POINTER (TARGET);			\
+      if (TREE_CODE (DECL) == FUNCTION_DECL				\
+	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	{								\
+	  if (TREE_PUBLIC (DECL))					\
+	    {								\
+	      if (!RS6000_WEAK || !DECL_WEAK (DECL))			\
+		{							\
+		  fputs ("\t.globl\t.", FILE);				\
+		  RS6000_OUTPUT_BASENAME (FILE, alias);			\
+		  putc ('\n', FILE);					\
+		}							\
+	    }								\
+	  else if (TARGET_XCOFF)					\
+	    {								\
+	      if (!RS6000_WEAK || !DECL_WEAK (DECL))			\
+		{							\
+		  fputs ("\t.lglobl\t.", FILE);				\
+		  RS6000_OUTPUT_BASENAME (FILE, alias);			\
+		  putc ('\n', FILE);					\
+		  fputs ("\t.lglobl\t", FILE);				\
+		  RS6000_OUTPUT_BASENAME (FILE, alias);			\
+		  putc ('\n', FILE);					\
+		}							\
+	    }								\
+	  fputs ("\t.set\t.", FILE);					\
+	  RS6000_OUTPUT_BASENAME (FILE, alias);				\
+	  fputs (",.", FILE);						\
+	  RS6000_OUTPUT_BASENAME (FILE, name);				\
+	  fputc ('\n', FILE);						\
+	}								\
+      ASM_OUTPUT_DEF (FILE, alias, name);				\
+    }									\
+   while (0)
+
+#define TARGET_ASM_FILE_START rs6000_file_start
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
+
+#define REGISTER_NAMES							\
+{									\
+  &rs6000_reg_names[ 0][0],	/* r0   */				\
+  &rs6000_reg_names[ 1][0],	/* r1	*/				\
+  &rs6000_reg_names[ 2][0],     /* r2	*/				\
+  &rs6000_reg_names[ 3][0],	/* r3	*/				\
+  &rs6000_reg_names[ 4][0],	/* r4	*/				\
+  &rs6000_reg_names[ 5][0],	/* r5	*/				\
+  &rs6000_reg_names[ 6][0],	/* r6	*/				\
+  &rs6000_reg_names[ 7][0],	/* r7	*/				\
+  &rs6000_reg_names[ 8][0],	/* r8	*/				\
+  &rs6000_reg_names[ 9][0],	/* r9	*/				\
+  &rs6000_reg_names[10][0],	/* r10  */				\
+  &rs6000_reg_names[11][0],	/* r11  */				\
+  &rs6000_reg_names[12][0],	/* r12  */				\
+  &rs6000_reg_names[13][0],	/* r13  */				\
+  &rs6000_reg_names[14][0],	/* r14  */				\
+  &rs6000_reg_names[15][0],	/* r15  */				\
+  &rs6000_reg_names[16][0],	/* r16  */				\
+  &rs6000_reg_names[17][0],	/* r17  */				\
+  &rs6000_reg_names[18][0],	/* r18  */				\
+  &rs6000_reg_names[19][0],	/* r19  */				\
+  &rs6000_reg_names[20][0],	/* r20  */				\
+  &rs6000_reg_names[21][0],	/* r21  */				\
+  &rs6000_reg_names[22][0],	/* r22  */				\
+  &rs6000_reg_names[23][0],	/* r23  */				\
+  &rs6000_reg_names[24][0],	/* r24  */				\
+  &rs6000_reg_names[25][0],	/* r25  */				\
+  &rs6000_reg_names[26][0],	/* r26  */				\
+  &rs6000_reg_names[27][0],	/* r27  */				\
+  &rs6000_reg_names[28][0],	/* r28  */				\
+  &rs6000_reg_names[29][0],	/* r29  */				\
+  &rs6000_reg_names[30][0],	/* r30  */				\
+  &rs6000_reg_names[31][0],	/* r31  */				\
+									\
+  &rs6000_reg_names[32][0],     /* fr0  */				\
+  &rs6000_reg_names[33][0],	/* fr1  */				\
+  &rs6000_reg_names[34][0],	/* fr2  */				\
+  &rs6000_reg_names[35][0],	/* fr3  */				\
+  &rs6000_reg_names[36][0],	/* fr4  */				\
+  &rs6000_reg_names[37][0],	/* fr5  */				\
+  &rs6000_reg_names[38][0],	/* fr6  */				\
+  &rs6000_reg_names[39][0],	/* fr7  */				\
+  &rs6000_reg_names[40][0],	/* fr8  */				\
+  &rs6000_reg_names[41][0],	/* fr9  */				\
+  &rs6000_reg_names[42][0],	/* fr10 */				\
+  &rs6000_reg_names[43][0],	/* fr11 */				\
+  &rs6000_reg_names[44][0],	/* fr12 */				\
+  &rs6000_reg_names[45][0],	/* fr13 */				\
+  &rs6000_reg_names[46][0],	/* fr14 */				\
+  &rs6000_reg_names[47][0],	/* fr15 */				\
+  &rs6000_reg_names[48][0],	/* fr16 */				\
+  &rs6000_reg_names[49][0],	/* fr17 */				\
+  &rs6000_reg_names[50][0],	/* fr18 */				\
+  &rs6000_reg_names[51][0],	/* fr19 */				\
+  &rs6000_reg_names[52][0],	/* fr20 */				\
+  &rs6000_reg_names[53][0],	/* fr21 */				\
+  &rs6000_reg_names[54][0],	/* fr22 */				\
+  &rs6000_reg_names[55][0],	/* fr23 */				\
+  &rs6000_reg_names[56][0],	/* fr24 */				\
+  &rs6000_reg_names[57][0],	/* fr25 */				\
+  &rs6000_reg_names[58][0],	/* fr26 */				\
+  &rs6000_reg_names[59][0],	/* fr27 */				\
+  &rs6000_reg_names[60][0],	/* fr28 */				\
+  &rs6000_reg_names[61][0],	/* fr29 */				\
+  &rs6000_reg_names[62][0],	/* fr30 */				\
+  &rs6000_reg_names[63][0],	/* fr31 */				\
+									\
+  &rs6000_reg_names[64][0],     /* was mq  */				\
+  &rs6000_reg_names[65][0],	/* lr   */				\
+  &rs6000_reg_names[66][0],	/* ctr  */				\
+  &rs6000_reg_names[67][0],	/* ap   */				\
+									\
+  &rs6000_reg_names[68][0],	/* cr0  */				\
+  &rs6000_reg_names[69][0],	/* cr1  */				\
+  &rs6000_reg_names[70][0],	/* cr2  */				\
+  &rs6000_reg_names[71][0],	/* cr3  */				\
+  &rs6000_reg_names[72][0],	/* cr4  */				\
+  &rs6000_reg_names[73][0],	/* cr5  */				\
+  &rs6000_reg_names[74][0],	/* cr6  */				\
+  &rs6000_reg_names[75][0],	/* cr7  */				\
+									\
+  &rs6000_reg_names[76][0],	/* ca  */				\
+									\
+  &rs6000_reg_names[77][0],	/* v0  */				\
+  &rs6000_reg_names[78][0],	/* v1  */				\
+  &rs6000_reg_names[79][0],	/* v2  */				\
+  &rs6000_reg_names[80][0],	/* v3  */				\
+  &rs6000_reg_names[81][0],	/* v4  */				\
+  &rs6000_reg_names[82][0],	/* v5  */				\
+  &rs6000_reg_names[83][0],	/* v6  */				\
+  &rs6000_reg_names[84][0],	/* v7  */				\
+  &rs6000_reg_names[85][0],	/* v8  */				\
+  &rs6000_reg_names[86][0],	/* v9  */				\
+  &rs6000_reg_names[87][0],	/* v10  */				\
+  &rs6000_reg_names[88][0],	/* v11  */				\
+  &rs6000_reg_names[89][0],	/* v12  */				\
+  &rs6000_reg_names[90][0],	/* v13  */				\
+  &rs6000_reg_names[91][0],	/* v14  */				\
+  &rs6000_reg_names[92][0],	/* v15  */				\
+  &rs6000_reg_names[93][0],	/* v16  */				\
+  &rs6000_reg_names[94][0],	/* v17  */				\
+  &rs6000_reg_names[95][0],	/* v18  */				\
+  &rs6000_reg_names[96][0],	/* v19  */				\
+  &rs6000_reg_names[97][0],	/* v20  */				\
+  &rs6000_reg_names[98][0],	/* v21  */				\
+  &rs6000_reg_names[99][0],	/* v22  */				\
+  &rs6000_reg_names[100][0],	/* v23  */				\
+  &rs6000_reg_names[101][0],	/* v24  */				\
+  &rs6000_reg_names[102][0],	/* v25  */				\
+  &rs6000_reg_names[103][0],	/* v26  */				\
+  &rs6000_reg_names[104][0],	/* v27  */				\
+  &rs6000_reg_names[105][0],	/* v28  */				\
+  &rs6000_reg_names[106][0],	/* v29  */				\
+  &rs6000_reg_names[107][0],	/* v30  */				\
+  &rs6000_reg_names[108][0],	/* v31  */				\
+  &rs6000_reg_names[109][0],	/* vrsave  */				\
+  &rs6000_reg_names[110][0],	/* vscr  */				\
+  &rs6000_reg_names[111][0],	/* spe_acc */				\
+  &rs6000_reg_names[112][0],	/* spefscr */				\
+  &rs6000_reg_names[113][0],	/* sfp  */				\
+  &rs6000_reg_names[114][0],	/* tfhar  */				\
+  &rs6000_reg_names[115][0],	/* tfiar  */				\
+  &rs6000_reg_names[116][0],	/* texasr  */				\
+}
+
+/* Table of additional register names to use in user input.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+ {{"r0",    0}, {"r1",    1}, {"r2",    2}, {"r3",    3},	\
+  {"r4",    4}, {"r5",    5}, {"r6",    6}, {"r7",    7},	\
+  {"r8",    8}, {"r9",    9}, {"r10",  10}, {"r11",  11},	\
+  {"r12",  12}, {"r13",  13}, {"r14",  14}, {"r15",  15},	\
+  {"r16",  16}, {"r17",  17}, {"r18",  18}, {"r19",  19},	\
+  {"r20",  20}, {"r21",  21}, {"r22",  22}, {"r23",  23},	\
+  {"r24",  24}, {"r25",  25}, {"r26",  26}, {"r27",  27},	\
+  {"r28",  28}, {"r29",  29}, {"r30",  30}, {"r31",  31},	\
+  {"fr0",  32}, {"fr1",  33}, {"fr2",  34}, {"fr3",  35},	\
+  {"fr4",  36}, {"fr5",  37}, {"fr6",  38}, {"fr7",  39},	\
+  {"fr8",  40}, {"fr9",  41}, {"fr10", 42}, {"fr11", 43},	\
+  {"fr12", 44}, {"fr13", 45}, {"fr14", 46}, {"fr15", 47},	\
+  {"fr16", 48}, {"fr17", 49}, {"fr18", 50}, {"fr19", 51},	\
+  {"fr20", 52}, {"fr21", 53}, {"fr22", 54}, {"fr23", 55},	\
+  {"fr24", 56}, {"fr25", 57}, {"fr26", 58}, {"fr27", 59},	\
+  {"fr28", 60}, {"fr29", 61}, {"fr30", 62}, {"fr31", 63},	\
+  {"v0",   77}, {"v1",   78}, {"v2",   79}, {"v3",   80},       \
+  {"v4",   81}, {"v5",   82}, {"v6",   83}, {"v7",   84},       \
+  {"v8",   85}, {"v9",   86}, {"v10",  87}, {"v11",  88},       \
+  {"v12",  89}, {"v13",  90}, {"v14",  91}, {"v15",  92},       \
+  {"v16",  93}, {"v17",  94}, {"v18",  95}, {"v19",  96},       \
+  {"v20",  97}, {"v21",  98}, {"v22",  99}, {"v23",  100},	\
+  {"v24",  101},{"v25",  102},{"v26",  103},{"v27",  104},      \
+  {"v28",  105},{"v29",  106},{"v30",  107},{"v31",  108},      \
+  {"vrsave", 109}, {"vscr", 110},				\
+  {"spe_acc", 111}, {"spefscr", 112},				\
+  /* no additional names for: lr, ctr, ap */			\
+  {"cr0",  68}, {"cr1",  69}, {"cr2",  70}, {"cr3",  71},	\
+  {"cr4",  72}, {"cr5",  73}, {"cr6",  74}, {"cr7",  75},	\
+  {"cc",   68}, {"sp",    1}, {"toc",   2},			\
+  /* CA is only part of XER, but we do not model the other parts (yet).  */ \
+  {"xer",  76},							\
+  /* VSX registers overlaid on top of FR, Altivec registers */	\
+  {"vs0",  32}, {"vs1",  33}, {"vs2",  34}, {"vs3",  35},	\
+  {"vs4",  36}, {"vs5",  37}, {"vs6",  38}, {"vs7",  39},	\
+  {"vs8",  40}, {"vs9",  41}, {"vs10", 42}, {"vs11", 43},	\
+  {"vs12", 44}, {"vs13", 45}, {"vs14", 46}, {"vs15", 47},	\
+  {"vs16", 48}, {"vs17", 49}, {"vs18", 50}, {"vs19", 51},	\
+  {"vs20", 52}, {"vs21", 53}, {"vs22", 54}, {"vs23", 55},	\
+  {"vs24", 56}, {"vs25", 57}, {"vs26", 58}, {"vs27", 59},	\
+  {"vs28", 60}, {"vs29", 61}, {"vs30", 62}, {"vs31", 63},	\
+  {"vs32", 77}, {"vs33", 78}, {"vs34", 79}, {"vs35", 80},       \
+  {"vs36", 81}, {"vs37", 82}, {"vs38", 83}, {"vs39", 84},       \
+  {"vs40", 85}, {"vs41", 86}, {"vs42", 87}, {"vs43", 88},       \
+  {"vs44", 89}, {"vs45", 90}, {"vs46", 91}, {"vs47", 92},       \
+  {"vs48", 93}, {"vs49", 94}, {"vs50", 95}, {"vs51", 96},       \
+  {"vs52", 97}, {"vs53", 98}, {"vs54", 99}, {"vs55", 100},	\
+  {"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104},      \
+  {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108},	\
+  /* Transactional Memory Facility (HTM) Registers.  */		\
+  {"tfhar",  114}, {"tfiar",  115}, {"texasr",  116} }
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  do { char buf[100];					\
+       fputs ("\t.long ", FILE);			\
+       ASM_GENERATE_INTERNAL_LABEL (buf, "L", VALUE);	\
+       assemble_name (FILE, buf);			\
+       putc ('-', FILE);				\
+       ASM_GENERATE_INTERNAL_LABEL (buf, "L", REL);	\
+       assemble_name (FILE, buf);			\
+       putc ('\n', FILE);				\
+     } while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* How to align the given loop. */
+#define LOOP_ALIGN(LABEL)  rs6000_loop_align(LABEL)
+
+/* Alignment guaranteed by __builtin_malloc.  */
+/* FIXME:  128-bit alignment is guaranteed by glibc for TARGET_64BIT.
+   However, specifying the stronger guarantee currently leads to
+   a regression in SPEC CPU2006 437.leslie3d.  The stronger
+   guarantee should be implemented here once that's fixed.  */
+#define MALLOC_ABI_ALIGNMENT (64)
+
+/* Pick up the return address upon entry to a procedure. Used for
+   dwarf2 unwind information.  This also enables the table driven
+   mechanism.  */
+
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_REG (Pmode, LR_REGNO)
+#define DWARF_FRAME_RETURN_COLUMN  DWARF_FRAME_REGNUM (LR_REGNO)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 3 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, 10)
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+#define PRINT_OPERAND(FILE, X, CODE)  print_operand (FILE, X, CODE)
+
+/* Define which CODE values are valid.  */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)  ((CODE) == '&')
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* For switching between functions with different target attributes.  */
+#define SWITCHABLE_TARGET 1
+
+/* uncomment for disabling the corresponding default options */
+/* #define  MACHINE_no_sched_interblock */
+/* #define  MACHINE_no_sched_speculative */
+/* #define  MACHINE_no_sched_speculative_load */
+
+/* General flags.  */
+extern int frame_pointer_needed;
+
+/* Classification of the builtin functions as to which switches enable the
+   builtin, and what attributes it should have.  We used to use the target
+   flags macros, but we've run out of bits, so we now map the options into new
+   settings used here.  */
+
+/* Builtin attributes.  */
+#define RS6000_BTC_SPECIAL	0x00000000	/* Special function.  */
+#define RS6000_BTC_UNARY	0x00000001	/* normal unary function.  */
+#define RS6000_BTC_BINARY	0x00000002	/* normal binary function.  */
+#define RS6000_BTC_TERNARY	0x00000003	/* normal ternary function.  */
+#define RS6000_BTC_PREDICATE	0x00000004	/* predicate function.  */
+#define RS6000_BTC_ABS		0x00000005	/* Altivec/VSX ABS function.  */
+#define RS6000_BTC_EVSEL	0x00000006	/* SPE EVSEL function.  */
+#define RS6000_BTC_DST		0x00000007	/* Altivec DST function.  */
+#define RS6000_BTC_TYPE_MASK	0x0000000f	/* Mask to isolate types */
+
+#define RS6000_BTC_MISC		0x00000000	/* No special attributes.  */
+#define RS6000_BTC_CONST	0x00000100	/* uses no global state.  */
+#define RS6000_BTC_PURE		0x00000200	/* reads global state/mem.  */
+#define RS6000_BTC_FP		0x00000400	/* depends on rounding mode.  */
+#define RS6000_BTC_ATTR_MASK	0x00000700	/* Mask of the attributes.  */
+
+/* Miscellaneous information.  */
+#define RS6000_BTC_SPR		0x01000000	/* function references SPRs.  */
+#define RS6000_BTC_VOID		0x02000000	/* function has no return value.  */
+#define RS6000_BTC_OVERLOADED	0x04000000	/* function is overloaded.  */
+#define RS6000_BTC_32BIT	0x08000000	/* function references SPRs.  */
+#define RS6000_BTC_64BIT	0x10000000	/* function references SPRs.  */
+#define RS6000_BTC_MISC_MASK	0x1f000000	/* Mask of the misc info.  */
+
+/* Convenience macros to document the instruction type.  */
+#define RS6000_BTC_MEM		RS6000_BTC_MISC	/* load/store touches mem.  */
+#define RS6000_BTC_SAT		RS6000_BTC_MISC	/* saturate sets VSCR.  */
+
+/* Builtin targets.  For now, we reuse the masks for those options that are in
+   target flags, and pick two random bits for SPE and paired which aren't in
+   target_flags.  */
+#define RS6000_BTM_ALWAYS	0		/* Always enabled.  */
+#define RS6000_BTM_ALTIVEC	MASK_ALTIVEC	/* VMX/altivec vectors.  */
+#define RS6000_BTM_VSX		MASK_VSX	/* VSX (vector/scalar).  */
+#define RS6000_BTM_P8_VECTOR	MASK_P8_VECTOR	/* ISA 2.07 vector.  */
+#define RS6000_BTM_CRYPTO	MASK_CRYPTO	/* crypto funcs.  */
+#define RS6000_BTM_HTM		MASK_HTM	/* hardware TM funcs.  */
+#define RS6000_BTM_SPE		MASK_STRING	/* E500 */
+#define RS6000_BTM_PAIRED	MASK_MULHW	/* 750CL paired insns.  */
+#define RS6000_BTM_FRE		MASK_POPCNTB	/* FRE instruction.  */
+#define RS6000_BTM_FRES		MASK_PPC_GFXOPT	/* FRES instruction.  */
+#define RS6000_BTM_FRSQRTE	MASK_PPC_GFXOPT	/* FRSQRTE instruction.  */
+#define RS6000_BTM_FRSQRTES	MASK_POPCNTB	/* FRSQRTES instruction.  */
+#define RS6000_BTM_POPCNTD	MASK_POPCNTD	/* Target supports ISA 2.06.  */
+#define RS6000_BTM_CELL		MASK_FPRND	/* Target is cell powerpc.  */
+
+#define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
+				 | RS6000_BTM_VSX			\
+				 | RS6000_BTM_P8_VECTOR			\
+				 | RS6000_BTM_CRYPTO			\
+				 | RS6000_BTM_FRE			\
+				 | RS6000_BTM_FRES			\
+				 | RS6000_BTM_FRSQRTE			\
+				 | RS6000_BTM_FRSQRTES			\
+				 | RS6000_BTM_HTM			\
+				 | RS6000_BTM_POPCNTD			\
+				 | RS6000_BTM_CELL)
+
+/* Define builtin enum index.  */
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+
+enum rs6000_builtins
+{
+#include "rs6000-builtin.def"
+
+  RS6000_BUILTIN_COUNT
+};
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+#undef RS6000_BUILTIN_X
+
+enum rs6000_builtin_type_index
+{
+  RS6000_BTI_NOT_OPAQUE,
+  RS6000_BTI_opaque_V2SI,
+  RS6000_BTI_opaque_V2SF,
+  RS6000_BTI_opaque_p_V2SI,
+  RS6000_BTI_opaque_V4SI,
+  RS6000_BTI_V16QI,
+  RS6000_BTI_V1TI,
+  RS6000_BTI_V2SI,
+  RS6000_BTI_V2SF,
+  RS6000_BTI_V2DI,
+  RS6000_BTI_V2DF,
+  RS6000_BTI_V4HI,
+  RS6000_BTI_V4SI,
+  RS6000_BTI_V4SF,
+  RS6000_BTI_V8HI,
+  RS6000_BTI_unsigned_V16QI,
+  RS6000_BTI_unsigned_V1TI,
+  RS6000_BTI_unsigned_V8HI,
+  RS6000_BTI_unsigned_V4SI,
+  RS6000_BTI_unsigned_V2DI,
+  RS6000_BTI_bool_char,          /* __bool char */
+  RS6000_BTI_bool_short,         /* __bool short */
+  RS6000_BTI_bool_int,           /* __bool int */
+  RS6000_BTI_bool_long,		 /* __bool long */
+  RS6000_BTI_pixel,              /* __pixel */
+  RS6000_BTI_bool_V16QI,         /* __vector __bool char */
+  RS6000_BTI_bool_V8HI,          /* __vector __bool short */
+  RS6000_BTI_bool_V4SI,          /* __vector __bool int */
+  RS6000_BTI_bool_V2DI,          /* __vector __bool long */
+  RS6000_BTI_pixel_V8HI,         /* __vector __pixel */
+  RS6000_BTI_long,	         /* long_integer_type_node */
+  RS6000_BTI_unsigned_long,      /* long_unsigned_type_node */
+  RS6000_BTI_long_long,	         /* long_long_integer_type_node */
+  RS6000_BTI_unsigned_long_long, /* long_long_unsigned_type_node */
+  RS6000_BTI_INTQI,	         /* intQI_type_node */
+  RS6000_BTI_UINTQI,		 /* unsigned_intQI_type_node */
+  RS6000_BTI_INTHI,	         /* intHI_type_node */
+  RS6000_BTI_UINTHI,		 /* unsigned_intHI_type_node */
+  RS6000_BTI_INTSI,		 /* intSI_type_node */
+  RS6000_BTI_UINTSI,		 /* unsigned_intSI_type_node */
+  RS6000_BTI_INTDI,		 /* intDI_type_node */
+  RS6000_BTI_UINTDI,		 /* unsigned_intDI_type_node */
+  RS6000_BTI_INTTI,		 /* intTI_type_node */
+  RS6000_BTI_UINTTI,		 /* unsigned_intTI_type_node */
+  RS6000_BTI_float,	         /* float_type_node */
+  RS6000_BTI_double,	         /* double_type_node */
+  RS6000_BTI_void,	         /* void_type_node */
+  RS6000_BTI_MAX
+};
+
+
+#define opaque_V2SI_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V2SI])
+#define opaque_V2SF_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V2SF])
+#define opaque_p_V2SI_type_node       (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI])
+#define opaque_V4SI_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V4SI])
+#define V16QI_type_node               (rs6000_builtin_types[RS6000_BTI_V16QI])
+#define V1TI_type_node                (rs6000_builtin_types[RS6000_BTI_V1TI])
+#define V2DI_type_node                (rs6000_builtin_types[RS6000_BTI_V2DI])
+#define V2DF_type_node                (rs6000_builtin_types[RS6000_BTI_V2DF])
+#define V2SI_type_node                (rs6000_builtin_types[RS6000_BTI_V2SI])
+#define V2SF_type_node                (rs6000_builtin_types[RS6000_BTI_V2SF])
+#define V4HI_type_node                (rs6000_builtin_types[RS6000_BTI_V4HI])
+#define V4SI_type_node                (rs6000_builtin_types[RS6000_BTI_V4SI])
+#define V4SF_type_node                (rs6000_builtin_types[RS6000_BTI_V4SF])
+#define V8HI_type_node                (rs6000_builtin_types[RS6000_BTI_V8HI])
+#define unsigned_V16QI_type_node      (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI])
+#define unsigned_V1TI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V1TI])
+#define unsigned_V8HI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI])
+#define unsigned_V4SI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI])
+#define unsigned_V2DI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI])
+#define bool_char_type_node           (rs6000_builtin_types[RS6000_BTI_bool_char])
+#define bool_short_type_node          (rs6000_builtin_types[RS6000_BTI_bool_short])
+#define bool_int_type_node            (rs6000_builtin_types[RS6000_BTI_bool_int])
+#define bool_long_type_node           (rs6000_builtin_types[RS6000_BTI_bool_long])
+#define pixel_type_node               (rs6000_builtin_types[RS6000_BTI_pixel])
+#define bool_V16QI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V16QI])
+#define bool_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V8HI])
+#define bool_V4SI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V4SI])
+#define bool_V2DI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V2DI])
+#define pixel_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_pixel_V8HI])
+
+#define long_long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long_long])
+#define long_long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long_long])
+#define long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long])
+#define long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long])
+#define intQI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTQI])
+#define uintQI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTQI])
+#define intHI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTHI])
+#define uintHI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTHI])
+#define intSI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTSI])
+#define uintSI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTSI])
+#define intDI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTDI])
+#define uintDI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTDI])
+#define intTI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTTI])
+#define uintTI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTTI])
+#define float_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_float])
+#define double_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_double])
+#define void_type_internal_node		 (rs6000_builtin_types[RS6000_BTI_void])
+
+extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX];
+extern GTY(()) tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
+
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md
new file mode 100644
index 000000000..4bab9591e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.md
@@ -0,0 +1,15700 @@
+;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler
+;; Copyright (C) 1990-2014 Free Software Foundation, Inc.
+;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;
+;; REGNOS
+;;
+
+(define_constants
+  [(FIRST_GPR_REGNO		0)
+   (STACK_POINTER_REGNUM	1)
+   (TOC_REGNUM			2)
+   (STATIC_CHAIN_REGNUM		11)
+   (HARD_FRAME_POINTER_REGNUM	31)
+   (LAST_GPR_REGNO		31)
+   (FIRST_FPR_REGNO		32)
+   (LAST_FPR_REGNO		63)
+   (LR_REGNO			65)
+   (CTR_REGNO			66)
+   (ARG_POINTER_REGNUM		67)
+   (CR0_REGNO			68)
+   (CR1_REGNO			69)
+   (CR2_REGNO			70)
+   (CR3_REGNO			71)
+   (CR4_REGNO			72)
+   (CR5_REGNO			73)
+   (CR6_REGNO			74)
+   (CR7_REGNO			75)
+   (MAX_CR_REGNO		75)
+   (CA_REGNO			76)
+   (FIRST_ALTIVEC_REGNO		77)
+   (LAST_ALTIVEC_REGNO		108)
+   (VRSAVE_REGNO		109)
+   (VSCR_REGNO			110)
+   (SPE_ACC_REGNO		111)
+   (SPEFSCR_REGNO		112)
+   (FRAME_POINTER_REGNUM	113)
+   (TFHAR_REGNO			114)
+   (TFIAR_REGNO			115)
+   (TEXASR_REGNO		116)
+  ])
+
+;;
+;; UNSPEC usage
+;;
+
+(define_c_enum "unspec"
+  [UNSPEC_FRSP			; frsp for POWER machines
+   UNSPEC_PROBE_STACK		; probe stack memory reference
+   UNSPEC_TOCPTR		; address of a word pointing to the TOC
+   UNSPEC_TOC			; address of the TOC (more-or-less)
+   UNSPEC_MOVSI_GOT
+   UNSPEC_MV_CR_OV		; move_from_CR_ov_bit
+   UNSPEC_FCTIWZ
+   UNSPEC_FRIM
+   UNSPEC_FRIN
+   UNSPEC_FRIP
+   UNSPEC_FRIZ
+   UNSPEC_LD_MPIC		; load_macho_picbase
+   UNSPEC_RELD_MPIC		; re-load_macho_picbase
+   UNSPEC_MPIC_CORRECT		; macho_correct_pic
+   UNSPEC_TLSGD
+   UNSPEC_TLSLD
+   UNSPEC_MOVESI_FROM_CR
+   UNSPEC_MOVESI_TO_CR
+   UNSPEC_TLSDTPREL
+   UNSPEC_TLSDTPRELHA
+   UNSPEC_TLSDTPRELLO
+   UNSPEC_TLSGOTDTPREL
+   UNSPEC_TLSTPREL
+   UNSPEC_TLSTPRELHA
+   UNSPEC_TLSTPRELLO
+   UNSPEC_TLSGOTTPREL
+   UNSPEC_TLSTLS
+   UNSPEC_FIX_TRUNC_TF		; fadd, rounding towards zero
+   UNSPEC_MV_CR_GT		; move_from_CR_gt_bit
+   UNSPEC_STFIWX
+   UNSPEC_POPCNTB
+   UNSPEC_FRES
+   UNSPEC_SP_SET
+   UNSPEC_SP_TEST
+   UNSPEC_SYNC
+   UNSPEC_LWSYNC
+   UNSPEC_SYNC_OP
+   UNSPEC_ATOMIC
+   UNSPEC_CMPXCHG
+   UNSPEC_XCHG
+   UNSPEC_AND
+   UNSPEC_DLMZB
+   UNSPEC_DLMZB_CR
+   UNSPEC_DLMZB_STRLEN
+   UNSPEC_RSQRT
+   UNSPEC_TOCREL
+   UNSPEC_MACHOPIC_OFFSET
+   UNSPEC_BPERM
+   UNSPEC_COPYSIGN
+   UNSPEC_PARITY
+   UNSPEC_FCTIW
+   UNSPEC_FCTID
+   UNSPEC_LFIWAX
+   UNSPEC_LFIWZX
+   UNSPEC_FCTIWUZ
+   UNSPEC_GRP_END_NOP
+   UNSPEC_P8V_FMRGOW
+   UNSPEC_P8V_MTVSRWZ
+   UNSPEC_P8V_RELOAD_FROM_GPR
+   UNSPEC_P8V_MTVSRD
+   UNSPEC_P8V_XXPERMDI
+   UNSPEC_P8V_RELOAD_FROM_VSX
+  ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_c_enum "unspecv"
+  [UNSPECV_BLOCK
+   UNSPECV_LL			; load-locked
+   UNSPECV_SC			; store-conditional
+   UNSPECV_PROBE_STACK_RANGE	; probe range of stack addresses
+   UNSPECV_EH_RR		; eh_reg_restore
+   UNSPECV_ISYNC		; isync instruction
+   UNSPECV_MFTB			; move from time base
+   UNSPECV_NLGR			; non-local goto receiver
+   UNSPECV_MFFS			; Move from FPSCR
+   UNSPECV_MTFSF		; Move to FPSCR Fields
+  ])
+
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations.
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto,htm"
+  (const_string "integer"))
+
+;; Define floating point instruction sub-types for use with Xfpu.md
+(define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default"))
+
+;; Length (in bytes).
+; '(pc)' in the following doesn't include the instruction itself; it is
+; calculated as if the instruction had zero size.
+(define_attr "length" ""
+  (if_then_else (eq_attr "type" "branch")
+		(if_then_else (and (ge (minus (match_dup 0) (pc))
+				       (const_int -32768))
+				   (lt (minus (match_dup 0) (pc))
+				       (const_int 32764)))
+			      (const_int 4)
+			      (const_int 8))
+		(const_int 4)))
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in rs6000-opts.h.
+(define_attr "cpu"
+  "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,
+   ppc750,ppc7400,ppc7450,
+   ppc403,ppc405,ppc440,ppc476,
+   ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,
+   power4,power5,power6,power7,power8,
+   rs64a,mpccore,cell,ppca2,titan"
+  (const (symbol_ref "rs6000_cpu_attr")))
+
+
+;; If this instruction is microcoded on the CELL processor
+; The default for load extended, the recorded instructions and rotate/shifts by a variable is always microcoded
+(define_attr "cell_micro" "not,conditional,always"
+  (if_then_else (eq_attr "type" "compare,delayed_compare,imul_compare,lmul_compare,load_ext,load_ext_ux,var_shift_rotate,var_delayed_compare")
+		(const_string "always")
+		(const_string "not")))
+
+(automata_option "ndfa")
+
+(include "rs64.md")
+(include "mpc.md")
+(include "40x.md")
+(include "440.md")
+(include "476.md")
+(include "601.md")
+(include "603.md")
+(include "6xx.md")
+(include "7xx.md")
+(include "7450.md")
+(include "8540.md")
+(include "e300c2c3.md")
+(include "e500mc.md")
+(include "e500mc64.md")
+(include "e5500.md")
+(include "e6500.md")
+(include "power4.md")
+(include "power5.md")
+(include "power6.md")
+(include "power7.md")
+(include "power8.md")
+(include "cell.md")
+(include "xfpu.md")
+(include "a2.md")
+(include "titan.md")
+
+(include "predicates.md")
+(include "constraints.md")
+
+(include "darwin.md")
+
+
+;; Mode iterators
+
+; This mode iterator allows :GPR to be used to indicate the allowable size
+; of whole values in GPRs.
+(define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])
+
+; Any supported integer mode.
+(define_mode_iterator INT [QI HI SI DI TI PTI])
+
+; Any supported integer mode that fits in one register.
+(define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")])
+
+; extend modes for DImode
+(define_mode_iterator QHSI [QI HI SI])
+
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
+; SImode or DImode, even if DImode doesn't fit in GPRs.
+(define_mode_iterator SDI [SI DI])
+
+; The size of a pointer.  Also, the size of the value that a record-condition
+; (one with a '.') will compare; and the size used for arithmetic carries.
+(define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+
+; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
+; PTImode is GPR only)
+(define_mode_iterator TI2 [TI PTI])
+
+; Any hardware-supported floating-point mode
+(define_mode_iterator FP [
+  (SF "TARGET_HARD_FLOAT 
+   && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) || TARGET_E500_SINGLE)")
+  (DF "TARGET_HARD_FLOAT 
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)")
+  (TF "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128")
+  (DD "TARGET_DFP")
+  (TD "TARGET_DFP")])
+
+; Any fma capable floating-point mode.
+(define_mode_iterator FMA_F [
+  (SF "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT")
+  (DF "(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+       || VECTOR_UNIT_VSX_P (DFmode)")
+  (V2SF "TARGET_PAIRED_FLOAT")
+  (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)")
+  (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
+  ])
+
+; Floating point move iterators to combine binary and decimal moves
+(define_mode_iterator FMOVE32 [SF SD])
+(define_mode_iterator FMOVE64 [DF DD])
+(define_mode_iterator FMOVE64X [DI DF DD])
+(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
+				(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI    "TARGET_VSX_TIMODE")
+				    (V16QI "")
+				    (V8HI  "")
+				    (V4SI  "")
+				    (V4SF  "")
+				    (V2DI  "")
+				    (V2DF  "")
+				    (V1TI  "")])
+
+; Whether a floating point move is ok, don't allow SD without hardware FP
+(define_mode_attr fmove_ok [(SF "")
+			    (DF "")
+			    (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
+			    (DD "")])
+
+; Convert REAL_VALUE to the appropriate bits
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
+					(DF "REAL_VALUE_TO_TARGET_DOUBLE")
+					(SD "REAL_VALUE_TO_TARGET_DECIMAL32")
+					(DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
+
+; Definitions for load to 32-bit fpr register
+(define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
+(define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1")	 (SD "lxsiwzx %x0,%y1")])
+
+; Definitions for store from 32-bit fpr register
+(define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
+(define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
+
+; These modes do not fit in integer registers in 32-bit mode.
+; but on e500v2, the gpr are 64 bit registers
+(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
+
+; Iterator for reciprocal estimate instructions
+(define_mode_iterator RECIPF [SF DF V4SF V2DF])
+
+; Iterator for just SF/DF
+(define_mode_iterator SFDF [SF DF])
+
+; SF/DF suffix for traditional floating instructions
+(define_mode_attr Ftrad		[(SF "s") (DF "")])
+
+; SF/DF suffix for VSX instructions
+(define_mode_attr Fvsx		[(SF "sp") (DF	"dp")])
+
+; SF/DF constraint for arithmetic on traditional floating point registers
+(define_mode_attr Ff		[(SF "f") (DF "d")])
+
+; SF/DF constraint for arithmetic on VSX registers
+(define_mode_attr Fv		[(SF "wy") (DF "ws")])
+
+; s/d suffix for things like fp_addsub_s/fp_addsub_d
+(define_mode_attr Fs		[(SF "s")  (DF "d")])
+
+; FRE/FRES support
+(define_mode_attr Ffre		[(SF "fres") (DF "fre")])
+(define_mode_attr FFRE		[(SF "FRES") (DF "FRE")])
+
+; Conditional returns.
+(define_code_iterator any_return [return simple_return])
+(define_code_attr return_pred [(return "direct_return ()")
+			       (simple_return "1")])
+(define_code_attr return_str [(return "") (simple_return "simple_")])
+
+; Various instructions that come in SI and DI forms.
+; A generic w/d attribute, for things like cmpw/cmpd.
+(define_mode_attr wd [(QI    "b")
+		      (HI    "h")
+		      (SI    "w")
+		      (DI    "d")
+		      (V16QI "b")
+		      (V8HI  "h")
+		      (V4SI  "w")
+		      (V2DI  "d")])
+
+; DImode bits
+(define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
+
+;; ISEL/ISEL64 target selection
+(define_mode_attr sel [(SI "") (DI "64")])
+
+;; Suffix for reload patterns
+(define_mode_attr ptrsize [(SI "32bit")
+			   (DI "64bit")])
+
+(define_mode_attr tptrsize [(SI "TARGET_32BIT")
+			    (DI "TARGET_64BIT")])
+
+(define_mode_attr mptrsize [(SI "si")
+			    (DI "di")])
+
+(define_mode_attr ptrload [(SI "lwz")
+			   (DI "ld")])
+
+(define_mode_attr ptrm [(SI "m")
+			(DI "Y")])
+
+(define_mode_attr rreg [(SF   "f")
+			(DF   "ws")
+			(TF   "f")
+			(TD   "f")
+			(V4SF "wf")
+			(V2DF "wd")])
+
+(define_mode_attr rreg2 [(SF   "f")
+			 (DF   "d")])
+
+(define_mode_attr SI_CONVERT_FP [(SF "TARGET_FCFIDS")
+				 (DF "TARGET_FCFID")])
+
+(define_mode_attr E500_CONVERT [(SF "!TARGET_FPRS")
+				(DF "TARGET_E500_DOUBLE")])
+
+(define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
+				(DF "TARGET_DOUBLE_FLOAT")])
+
+;; Mode iterator for logical operations on 128-bit types
+(define_mode_iterator BOOL_128		[TI
+					 PTI
+					 (V16QI	"TARGET_ALTIVEC")
+					 (V8HI	"TARGET_ALTIVEC")
+					 (V4SI	"TARGET_ALTIVEC")
+					 (V4SF	"TARGET_ALTIVEC")
+					 (V2DI	"TARGET_ALTIVEC")
+					 (V2DF	"TARGET_ALTIVEC")
+					 (V1TI  "TARGET_ALTIVEC")])
+
+;; For the GPRs we use 3 constraints for register outputs, two that are the
+;; same as the output register, and a third where the output register is an
+;; early clobber, so we don't have to deal with register overlaps.  For the
+;; vector types, we prefer to use the vector registers.  For TI mode, allow
+;; either.
+
+;; Mode attribute for boolean operation register constraints for output
+(define_mode_attr BOOL_REGS_OUTPUT	[(TI	"&r,r,r,wa,v")
+					 (PTI	"&r,r,r")
+					 (V16QI	"wa,v,&?r,?r,?r")
+					 (V8HI	"wa,v,&?r,?r,?r")
+					 (V4SI	"wa,v,&?r,?r,?r")
+					 (V4SF	"wa,v,&?r,?r,?r")
+					 (V2DI	"wa,v,&?r,?r,?r")
+					 (V2DF	"wa,v,&?r,?r,?r")
+					 (V1TI	"wa,v,&?r,?r,?r")])
+
+;; Mode attribute for boolean operation register constraints for operand1
+(define_mode_attr BOOL_REGS_OP1		[(TI	"r,0,r,wa,v")
+					 (PTI	"r,0,r")
+					 (V16QI	"wa,v,r,0,r")
+					 (V8HI	"wa,v,r,0,r")
+					 (V4SI	"wa,v,r,0,r")
+					 (V4SF	"wa,v,r,0,r")
+					 (V2DI	"wa,v,r,0,r")
+					 (V2DF	"wa,v,r,0,r")
+					 (V1TI	"wa,v,r,0,r")])
+
+;; Mode attribute for boolean operation register constraints for operand2
+(define_mode_attr BOOL_REGS_OP2		[(TI	"r,r,0,wa,v")
+					 (PTI	"r,r,0")
+					 (V16QI	"wa,v,r,r,0")
+					 (V8HI	"wa,v,r,r,0")
+					 (V4SI	"wa,v,r,r,0")
+					 (V4SF	"wa,v,r,r,0")
+					 (V2DI	"wa,v,r,r,0")
+					 (V2DF	"wa,v,r,r,0")
+					 (V1TI	"wa,v,r,r,0")])
+
+;; Mode attribute for boolean operation register constraints for operand1
+;; for one_cmpl.  To simplify things, we repeat the constraint where 0
+;; is used for operand1 or operand2
+(define_mode_attr BOOL_REGS_UNARY	[(TI	"r,0,0,wa,v")
+					 (PTI	"r,0,0")
+					 (V16QI	"wa,v,r,0,0")
+					 (V8HI	"wa,v,r,0,0")
+					 (V4SI	"wa,v,r,0,0")
+					 (V4SF	"wa,v,r,0,0")
+					 (V2DI	"wa,v,r,0,0")
+					 (V2DF	"wa,v,r,0,0")
+					 (V1TI	"wa,v,r,0,0")])
+
+;; Mode attribute for the clobber of CC0 for AND expansion.
+;; For the 128-bit types, we never do AND immediate, but we need to
+;; get the correct number of X's for the number of operands.
+(define_mode_attr BOOL_REGS_AND_CR0	[(TI	"X,X,X,X,X")
+					 (PTI	"X,X,X")
+					 (V16QI	"X,X,X,X,X")
+					 (V8HI	"X,X,X,X,X")
+					 (V4SI	"X,X,X,X,X")
+					 (V4SF	"X,X,X,X,X")
+					 (V2DI	"X,X,X,X,X")
+					 (V2DF	"X,X,X,X,X")
+					 (V1TI	"X,X,X,X,X")])
+
+
+;; Start with fixed-point load and store insns.  Here we put only the more
+;; complex forms.  Basic data transfer is done later.
+
+(define_expand "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn "*zero_extend<mode>di2_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
+  "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
+  "@
+   l<wd>z%U1%X1 %0,%1
+   rldicl %0,%1,0,<dbits>"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
+
+(define_insn "*zero_extend<mode>di2_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldicl. %2,%1,0,<dbits>
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*zero_extend<mode>di2_internal3"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   rldicl. %0,%1,0,<dbits>
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*zero_extendsidi2_lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu")
+	(zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWZX"
+  "@
+   lwz%U1%X1 %0,%1
+   rldicl %0,%1,0,32
+   mtvsrwz %x0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64"
+  "extsb %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   extsb. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   extsb. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendhidi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "@
+   lha%U1%X1 %0,%1
+   extsh %0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
+  "extsh %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   extsh. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   extsh. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn "*extendsidi2_lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWAX"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1
+   mtvsrwa %x0,%1
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
+
+(define_insn "*extendsidi2_nocell"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
+  "extsw %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   extsw. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   extsw. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "reg_or_mem_operand" "m,r")))]
+  ""
+  "@
+   lbz%U1%X1 %0,%1
+   rlwinm %0,%1,0,0xff"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   andi. %2,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "@
+   andi. %0,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r")))]
+  ""
+  "extsb %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   extsb. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI (match_dup 1)))]
+  ""
+  "@
+   extsb. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(sign_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "reg_or_mem_operand" "m,r")))]
+  ""
+  "@
+   lbz%U1%X1 %0,%1
+   rlwinm %0,%1,0,0xff"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 "=r,r"))]
+  ""
+  "@
+   andi. %2,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:HI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:HI (match_dup 1)))]
+  ""
+  "@
+   andi. %0,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "")
+	(zero_extend:HI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:HI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "gpc_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r")))]
+  ""
+  "extsb %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 "=r,r"))]
+  ""
+  "@
+   extsb. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:HI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:HI (match_dup 1)))]
+  ""
+  "@
+   extsb. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "")
+	(sign_extend:HI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:HI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))]
+  ""
+  "@
+   lhz%U1%X1 %0,%1
+   rlwinm %0,%1,0,0xffff"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   andi. %2,%1,0xffff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "@
+   andi. %0,%1,0xffff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))]
+  "rs6000_gen_cell_microcode"
+  "@
+   lha%U1%X1 %0,%1
+   extsh %0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r")))]
+  "!rs6000_gen_cell_microcode"
+  "extsh %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   extsh. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI (match_dup 1)))]
+  ""
+  "@
+   extsh. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(sign_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; IBM 405, 440, 464 and 476 half-word multiplication operations.
+
+(define_insn "*macchwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (ashiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16))
+                                      (sign_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_dup 2)
+                           (const_int 16))
+                          (sign_extend:SI
+                           (match_dup 1)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "macchw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*macchw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16))
+                          (sign_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "macchw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*macchwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (lshiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16))
+                                      (zero_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_dup 2)
+                           (const_int 16))
+                          (zero_extend:SI
+                           (match_dup 1)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "macchwu. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*macchwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16))
+                          (zero_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "macchwu %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (ashiftrt:SI
+                                       (match_operand:SI 1 "gpc_reg_operand" "%r")
+                                       (const_int 16))
+                                      (ashiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16)))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_dup 1)
+                           (const_int 16))
+                          (ashiftrt:SI
+                           (match_dup 2)
+                           (const_int 16)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "machhw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_operand:SI 1 "gpc_reg_operand" "%r")
+                           (const_int 16))
+                          (ashiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16)))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "machhw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (lshiftrt:SI
+                                       (match_operand:SI 1 "gpc_reg_operand" "%r")
+                                       (const_int 16))
+                                      (lshiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16)))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_dup 1)
+                           (const_int 16))
+                          (lshiftrt:SI
+                           (match_dup 2)
+                           (const_int 16)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "machhwu. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_operand:SI 1 "gpc_reg_operand" "%r")
+                           (const_int 16))
+                          (lshiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16)))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "machhwu %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (sign_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                                      (sign_extend:SI
+                                       (match_operand:HI 2 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (sign_extend:SI
+                           (match_dup 1))
+                          (sign_extend:SI
+                           (match_dup 2)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "maclhw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (sign_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                          (sign_extend:SI
+                           (match_operand:HI 2 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "maclhw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (zero_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                                      (zero_extend:SI
+                                       (match_operand:HI 2 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (zero_extend:SI
+                           (match_dup 1))
+                          (zero_extend:SI
+                           (match_dup 2)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "maclhwu. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (zero_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                          (zero_extend:SI
+                           (match_operand:HI 2 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "maclhwu %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmacchwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                              (mult:SI (ashiftrt:SI
+                                        (match_operand:SI 2 "gpc_reg_operand" "r")
+                                        (const_int 16))
+                                       (sign_extend:SI
+                                        (match_operand:HI 1 "gpc_reg_operand" "r"))))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_dup 4)
+                  (mult:SI (ashiftrt:SI
+                            (match_dup 2)
+                            (const_int 16))
+                           (sign_extend:SI
+                            (match_dup 1)))))]
+  "TARGET_MULHW"
+  "nmacchw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmacchw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0")
+                  (mult:SI (ashiftrt:SI
+                            (match_operand:SI 2 "gpc_reg_operand" "r")
+                            (const_int 16))
+                           (sign_extend:SI
+                            (match_operand:HI 1 "gpc_reg_operand" "r")))))]
+  "TARGET_MULHW"
+  "nmacchw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmachhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                              (mult:SI (ashiftrt:SI
+                                        (match_operand:SI 1 "gpc_reg_operand" "%r")
+                                        (const_int 16))
+                                       (ashiftrt:SI
+                                        (match_operand:SI 2 "gpc_reg_operand" "r")
+                                        (const_int 16))))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_dup 4)
+                  (mult:SI (ashiftrt:SI
+                            (match_dup 1)
+                            (const_int 16))
+                           (ashiftrt:SI
+                            (match_dup 2)
+                            (const_int 16)))))]
+  "TARGET_MULHW"
+  "nmachhw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmachhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0")
+                  (mult:SI (ashiftrt:SI
+                            (match_operand:SI 1 "gpc_reg_operand" "%r")
+                            (const_int 16))
+                           (ashiftrt:SI
+                            (match_operand:SI 2 "gpc_reg_operand" "r")
+                            (const_int 16)))))]
+  "TARGET_MULHW"
+  "nmachhw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmaclhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                              (mult:SI (sign_extend:SI
+                                        (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                                       (sign_extend:SI
+                                        (match_operand:HI 2 "gpc_reg_operand" "r"))))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_dup 4)
+                  (mult:SI (sign_extend:SI
+                            (match_dup 1))
+                           (sign_extend:SI
+                            (match_dup 2)))))]
+  "TARGET_MULHW"
+  "nmaclhw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmaclhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0")
+                  (mult:SI (sign_extend:SI
+                            (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                           (sign_extend:SI
+                            (match_operand:HI 2 "gpc_reg_operand" "r")))))]
+  "TARGET_MULHW"
+  "nmaclhw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (ashiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16))
+                             (sign_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))
+                 (sign_extend:SI
+                  (match_dup 1))))]
+  "TARGET_MULHW"
+  "mulchw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))
+                 (sign_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mulchw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (lshiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16))
+                             (zero_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))
+                 (zero_extend:SI
+                  (match_dup 1))))]
+  "TARGET_MULHW"
+  "mulchwu. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))
+                 (zero_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mulchwu %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (ashiftrt:SI
+                              (match_operand:SI 1 "gpc_reg_operand" "%r")
+                              (const_int 16))
+                             (ashiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16)))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_dup 1)
+                  (const_int 16))
+                 (ashiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_operand:SI 1 "gpc_reg_operand" "%r")
+                  (const_int 16))
+                 (ashiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (lshiftrt:SI
+                              (match_operand:SI 1 "gpc_reg_operand" "%r")
+                              (const_int 16))
+                             (lshiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16)))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_dup 1)
+                  (const_int 16))
+                 (lshiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhwu. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_operand:SI 1 "gpc_reg_operand" "%r")
+                  (const_int 16))
+                 (lshiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhwu %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (sign_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                             (sign_extend:SI
+                              (match_operand:HI 2 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (sign_extend:SI
+                  (match_dup 1))
+                 (sign_extend:SI
+                  (match_dup 2))))]
+  "TARGET_MULHW"
+  "mullhw. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (sign_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                 (sign_extend:SI
+                  (match_operand:HI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mullhw %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (zero_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                             (zero_extend:SI
+                              (match_operand:HI 2 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (zero_extend:SI
+                  (match_dup 1))
+                 (zero_extend:SI
+                  (match_dup 2))))]
+  "TARGET_MULHW"
+  "mullhwu. %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (zero_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                 (zero_extend:SI
+                  (match_operand:HI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mullhwu %0,%1,%2"
+  [(set_attr "type" "imul3")])
+
+;; IBM 405, 440, 464 and 476 string-search dlmzb instruction support.
+(define_insn "dlmzb"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r")
+                    (match_operand:SI 2 "gpc_reg_operand" "r")]
+                   UNSPEC_DLMZB_CR))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (unspec:SI [(match_dup 1)
+                    (match_dup 2)]
+                   UNSPEC_DLMZB))]
+  "TARGET_DLMZB"
+  "dlmzb. %0,%1,%2")
+
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+        (unspec:SI [(match_operand:BLK 1 "general_operand" "")
+                    (match_operand:QI 2 "const_int_operand" "")
+                    (match_operand 3 "const_int_operand" "")]
+                   UNSPEC_DLMZB_STRLEN))
+   (clobber (match_scratch:CC 4 "=x"))]
+  "TARGET_DLMZB && WORDS_BIG_ENDIAN && !optimize_size"
+{
+  rtx result = operands[0];
+  rtx src = operands[1];
+  rtx search_char = operands[2];
+  rtx align = operands[3];
+  rtx addr, scratch_string, word1, word2, scratch_dlmzb;
+  rtx loop_label, end_label, mem, cr0, cond;
+  if (search_char != const0_rtx
+      || GET_CODE (align) != CONST_INT
+      || INTVAL (align) < 8)
+        FAIL;
+  word1 = gen_reg_rtx (SImode);
+  word2 = gen_reg_rtx (SImode);
+  scratch_dlmzb = gen_reg_rtx (SImode);
+  scratch_string = gen_reg_rtx (Pmode);
+  loop_label = gen_label_rtx ();
+  end_label = gen_label_rtx ();
+  addr = force_reg (Pmode, XEXP (src, 0));
+  emit_move_insn (scratch_string, addr);
+  emit_label (loop_label);
+  mem = change_address (src, SImode, scratch_string);
+  emit_move_insn (word1, mem);
+  emit_move_insn (word2, adjust_address (mem, SImode, 4));
+  cr0 = gen_rtx_REG (CCmode, CR0_REGNO);
+  emit_insn (gen_dlmzb (scratch_dlmzb, word1, word2, cr0));
+  cond = gen_rtx_NE (VOIDmode, cr0, const0_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode,
+                               pc_rtx,
+                               gen_rtx_IF_THEN_ELSE (VOIDmode,
+                                                     cond,
+                                                     gen_rtx_LABEL_REF
+                                                       (VOIDmode,
+                                                        end_label),
+                                                     pc_rtx)));
+  emit_insn (gen_addsi3 (scratch_string, scratch_string, GEN_INT (8)));
+  emit_jump_insn (gen_rtx_SET (VOIDmode,
+                               pc_rtx,
+                               gen_rtx_LABEL_REF (VOIDmode, loop_label)));
+  emit_barrier ();
+  emit_label (end_label);
+  emit_insn (gen_addsi3 (scratch_string, scratch_string, scratch_dlmzb));
+  emit_insn (gen_subsi3 (result, scratch_string, addr));
+  emit_insn (gen_subsi3 (result, result, const1_rtx));
+  DONE;
+})
+
+;; Fixed-point arithmetic insns.
+
+(define_expand "add<mode>3"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(plus:SDI (match_operand:SDI 1 "gpc_reg_operand" "")
+		  (match_operand:SDI 2 "reg_or_add_cint_operand" "")))]
+  ""
+{
+  if (<MODE>mode == DImode && ! TARGET_POWERPC64)
+    {
+      if (non_short_cint_operand (operands[2], DImode))
+	FAIL;
+    }
+  else if (GET_CODE (operands[2]) == CONST_INT
+	   && ! add_operand (operands[2], <MODE>mode))
+    {
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (<MODE>mode));
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
+
+      if (<MODE>mode == DImode && !satisfies_constraint_L (GEN_INT (rest)))
+	FAIL;
+
+      /* The ordering here is important for the prolog expander.
+	 When space is allocated from the stack, adding 'low' first may
+	 produce a temporary deallocation (which would be bad).  */
+      emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest)));
+      emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low)));
+      DONE;
+    }
+})
+
+;; Discourage ai/addic because of carry but provide it in an alternative
+;; allowing register zero as source.
+(define_insn "*add<mode>3_internal1"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,?r,r")
+	(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,r,b")
+		  (match_operand:GPR 2 "add_operand" "r,I,I,L")))]
+  "!DECIMAL_FLOAT_MODE_P (GET_MODE (operands[0])) && !DECIMAL_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "@
+   add %0,%1,%2
+   addi %0,%1,%2
+   addic %0,%1,%2
+   addis %0,%1,%v2"
+  [(set_attr "length" "4,4,4,4")])
+
+(define_insn "addsi3_high"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=b")
+        (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                 (high:SI (match_operand 2 "" ""))))]
+  "TARGET_MACHO && !TARGET_64BIT"
+  "addis %0,%1,ha16(%2)"
+  [(set_attr "length" "4")])
+
+(define_insn "*add<mode>3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (plus:P (match_operand:P 1 "gpc_reg_operand" "%r,r,r,r")
+			    (match_operand:P 2 "reg_or_short_operand" "r,I,r,I"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=r,r,r,r"))]
+  ""
+  "@
+   add. %3,%1,%2
+   addic. %3,%1,%2
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,compare,compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			      (match_operand:GPR 2 "reg_or_short_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:GPR 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(plus:GPR (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*add<mode>3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (plus:P (match_operand:P 1 "gpc_reg_operand" "%r,r,r,r")
+			    (match_operand:P 2 "reg_or_short_operand" "r,I,r,I"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r,r")
+	(plus:P (match_dup 1)
+		(match_dup 2)))]
+  ""
+  "@
+   add. %0,%1,%2
+   addic. %0,%1,%2
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,compare,compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (plus:P (match_operand:P 1 "gpc_reg_operand" "")
+			    (match_operand:P 2 "reg_or_short_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(plus:P (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:P (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Split an add that we can't do in one insn into two insns, each of which
+;; does one 16-bit part.  This is used by combine.  Note that the low-order
+;; add should be last in case the result gets used in an address.
+
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+		  (match_operand:GPR 2 "non_add_cint_operand" "")))]
+  ""
+  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+  HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
+
+  operands[4] = GEN_INT (low);
+  if (<MODE>mode == SImode || satisfies_constraint_L (GEN_INT (rest)))
+    operands[3] = GEN_INT (rest);
+  else if (can_create_pseudo_p ())
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      emit_move_insn (operands[3], operands[2]);
+      emit_insn (gen_adddi3 (operands[0], operands[1], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(not:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  ""
+{
+  if (<MODE>mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
+      DONE;
+    }
+})
+
+(define_insn "*one_cmpl<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  ""
+  "nor %0,%1,%1")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 "=r,r"))]
+  ""
+  "@
+   nor. %2,%1,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(not:P (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(not:P (match_dup 1)))]
+  ""
+  "@
+   nor. %0,%1,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(not:P (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(not:P (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(minus:GPR (match_operand:GPR 1 "reg_or_short_operand" "r,I")
+		   (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
+  ""
+  "@
+   subf %0,%2,%1
+   subfic %0,%2,%1")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			     (match_operand:P 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=r,r"))]
+  ""
+  "@
+   subf. %3,%2,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "")
+			     (match_operand:P 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(minus:P (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			     (match_operand:P 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(minus:P (match_dup 1)
+		  (match_dup 2)))]
+  ""
+  "@
+   subf. %0,%2,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "")
+			     (match_operand:P 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(minus:P (match_dup 1)
+		  (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(minus:P (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(minus:SDI (match_operand:SDI 1 "reg_or_short_operand" "")
+		   (match_operand:SDI 2 "reg_or_sub_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_add<mode>3 (operands[0], operands[1],
+				 negate_rtx (<MODE>mode, operands[2])));
+      DONE;
+    }
+}")
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(neg:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn "*neg<mode>2_internal"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  ""
+  "neg %0,%1")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 "=r,r"))]
+  ""
+  "@
+   neg. %2,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(neg:P (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(neg:P (match_dup 1)))]
+  ""
+  "@
+   neg. %0,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_cr0_operand" "")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(neg:P (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(neg:P (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(clz:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  ""
+  "cntlz<wd> %0,%1"
+  [(set_attr "type" "cntlz")])
+
+(define_expand "ctz<mode>2"
+  [(set (match_dup 2)
+	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
+   (parallel [(set (match_dup 3) (and:GPR (match_dup 1)
+					  (match_dup 2)))
+	      (clobber (scratch:CC))])
+   (set (match_dup 4) (clz:GPR (match_dup 3)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(minus:GPR (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  })
+
+(define_expand "ffs<mode>2"
+  [(set (match_dup 2)
+	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
+   (parallel [(set (match_dup 3) (and:GPR (match_dup 1)
+					  (match_dup 2)))
+	      (clobber (scratch:CC))])
+   (set (match_dup 4) (clz:GPR (match_dup 3)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(minus:GPR (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+  })
+
+(define_insn "popcntb<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
+                     UNSPEC_POPCNTB))]
+  "TARGET_POPCNTB"
+  "popcntb %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
+
+(define_insn "popcntd<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  "TARGET_POPCNTD"
+  "popcnt<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
+
+(define_expand "popcount<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))]
+  "TARGET_POPCNTB || TARGET_POPCNTD"
+  {
+    rs6000_emit_popcount (operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn "parity<mode>2_cmpb"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
+  "TARGET_CMPB && TARGET_POPCNTB"
+  "prty<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
+
+(define_expand "parity<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(parity:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))]
+  "TARGET_POPCNTB"
+  {
+    rs6000_emit_parity (operands[0], operands[1]);
+    DONE;
+  })
+
+;; Since the hardware zeros the upper part of the register, save generating the
+;; AND immediate if we are converting to unsigned
+(define_insn "*bswaphi2_extenddi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extend:DI
+	 (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+  "TARGET_POWERPC64"
+  "lhbrx %0,%y1"
+  [(set_attr "length" "4")
+   (set_attr "type" "load")])
+
+(define_insn "*bswaphi2_extendsi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+  ""
+  "lhbrx %0,%y1"
+  [(set_attr "length" "4")
+   (set_attr "type" "load")])
+
+(define_expand "bswaphi2"
+  [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "")
+		   (bswap:HI
+		    (match_operand:HI 1 "reg_or_mem_operand" "")))
+	      (clobber (match_scratch:SI 2 ""))])]
+  ""
+{
+  if (!REG_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "bswaphi2_internal"
+  [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r")
+	(bswap:HI
+	 (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:SI 2 "=X,X,&r"))]
+  ""
+  "@
+   lhbrx %0,%y1
+   sthbrx %1,%y0
+   #"
+  [(set_attr "length" "4,4,12")
+   (set_attr "type" "load,store,*")])
+
+;; We are always BITS_BIG_ENDIAN, so the (const_int 16) below is
+;; correct for -mlittle as well as -mbig.
+(define_split
+  [(set (match_operand:HI 0 "gpc_reg_operand" "")
+	(bswap:HI (match_operand:HI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(zero_extract:SI (match_dup 4)
+			 (const_int 8)
+			 (const_int 16)))
+   (set (match_dup 2)
+	(and:SI (ashift:SI (match_dup 4)
+			   (const_int 8))
+		(const_int 65280)))		;; 0xff00
+   (set (match_dup 3)
+	(ior:SI (match_dup 3)
+		(match_dup 2)))]
+  "
+{
+  operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
+  operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+}")
+
+(define_insn "*bswapsi2_extenddi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extend:DI
+	 (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))]
+  "TARGET_POWERPC64"
+  "lwbrx %0,%y1"
+  [(set_attr "length" "4")
+   (set_attr "type" "load")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "reg_or_mem_operand" "")
+	(bswap:SI
+	 (match_operand:SI 1 "reg_or_mem_operand" "")))]
+  ""
+{
+  if (!REG_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*bswapsi2_internal"
+  [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r")
+	(bswap:SI
+	 (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
+  ""
+  "@
+   lwbrx %0,%y1
+   stwbrx %1,%y0
+   #"
+  [(set_attr "length" "4,4,12")
+   (set_attr "type" "load,store,*")])
+
+;; We are always BITS_BIG_ENDIAN, so the bit positions below in
+;; zero_extract insns do not change for -mlittle.
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(rotate:SI (match_dup 1) (const_int 8)))
+   (set (zero_extract:SI (match_dup 0)
+			 (const_int 8)
+			 (const_int 0))
+	(match_dup 1))
+   (set (zero_extract:SI (match_dup 0)
+			 (const_int 8)
+			 (const_int 16))
+	(rotate:SI (match_dup 1)
+		   (const_int 16)))]
+  "")
+
+(define_expand "bswapdi2"
+  [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "")
+		   (bswap:DI
+		    (match_operand:DI 1 "reg_or_mem_operand" "")))
+	      (clobber (match_scratch:DI 2 ""))
+	      (clobber (match_scratch:DI 3 ""))
+	      (clobber (match_scratch:DI 4 ""))])]
+  ""
+{
+  if (!REG_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+
+  if (!TARGET_POWERPC64)
+    {
+      /* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode
+	 that uses 64-bit registers needs the same scratch registers as 64-bit
+	 mode.  */
+      emit_insn (gen_bswapdi2_32bit (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+;; Power7/cell has ldbrx/stdbrx, so use it directly
+(define_insn "*bswapdi2_ldbrx"
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:DI 2 "=X,X,&r"))
+   (clobber (match_scratch:DI 3 "=X,X,&r"))
+   (clobber (match_scratch:DI 4 "=X,X,&r"))]
+  "TARGET_POWERPC64 && TARGET_LDBRX
+   && (REG_P (operands[0]) || REG_P (operands[1]))"
+  "@
+   ldbrx %0,%y1
+   stdbrx %1,%y0
+   #"
+  [(set_attr "length" "4,4,36")
+   (set_attr "type" "load,store,*")])
+
+;; Non-power7/cell, fall back to use lwbrx/stwbrx
+(define_insn "*bswapdi2_64bit"
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,&r")
+	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:DI 2 "=&b,&b,&r"))
+   (clobber (match_scratch:DI 3 "=&r,&r,&r"))
+   (clobber (match_scratch:DI 4 "=&r,X,&r"))]
+  "TARGET_POWERPC64 && !TARGET_LDBRX
+   && (REG_P (operands[0]) || REG_P (operands[1]))
+   && !(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))
+   && !(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
+  "#"
+  [(set_attr "length" "16,12,36")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" "")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 4 "gpc_reg_operand" ""))]
+  "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest   = operands[0];
+  rtx src    = operands[1];
+  rtx op2    = operands[2];
+  rtx op3    = operands[3];
+  rtx op4    = operands[4];
+  rtx op3_32 = simplify_gen_subreg (SImode, op3, DImode,
+				    BYTES_BIG_ENDIAN ? 4 : 0);
+  rtx op4_32 = simplify_gen_subreg (SImode, op4, DImode,
+				    BYTES_BIG_ENDIAN ? 4 : 0);
+  rtx addr1;
+  rtx addr2;
+  rtx word_high;
+  rtx word_low;
+
+  addr1 = XEXP (src, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (Pmode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (Pmode, op2, addr1);
+    }
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      word_high = change_address (src, SImode, addr1);
+      word_low  = change_address (src, SImode, addr2);
+    }
+  else
+    {
+      word_high = change_address (src, SImode, addr2);
+      word_low  = change_address (src, SImode, addr1);
+    }
+
+  emit_insn (gen_bswapsi2 (op3_32, word_low));
+  emit_insn (gen_bswapsi2 (op4_32, word_high));
+  emit_insn (gen_ashldi3 (dest, op3, GEN_INT (32)));
+  emit_insn (gen_iordi3 (dest, dest, op4));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "indexed_or_indirect_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 4 "" ""))]
+  "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest   = operands[0];
+  rtx src    = operands[1];
+  rtx op2    = operands[2];
+  rtx op3    = operands[3];
+  rtx src_si = simplify_gen_subreg (SImode, src, DImode,
+				    BYTES_BIG_ENDIAN ? 4 : 0);
+  rtx op3_si = simplify_gen_subreg (SImode, op3, DImode,
+				    BYTES_BIG_ENDIAN ? 4 : 0);
+  rtx addr1;
+  rtx addr2;
+  rtx word_high;
+  rtx word_low;
+
+  addr1 = XEXP (dest, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (Pmode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (Pmode, op2, addr1);
+    }
+
+  emit_insn (gen_lshrdi3 (op3, src, GEN_INT (32)));
+  if (BYTES_BIG_ENDIAN)
+    {
+      word_high = change_address (dest, SImode, addr1);
+      word_low  = change_address (dest, SImode, addr2);
+    }
+  else
+    {
+      word_high = change_address (dest, SImode, addr2);
+      word_low  = change_address (dest, SImode, addr1);
+    }
+  emit_insn (gen_bswapsi2 (word_high, src_si));
+  emit_insn (gen_bswapsi2 (word_low, op3_si));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 4 "" ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest    = operands[0];
+  rtx src     = operands[1];
+  rtx op2     = operands[2];
+  rtx op3     = operands[3];
+  int lo_off  = BYTES_BIG_ENDIAN ? 4 : 0;
+  rtx dest_si = simplify_gen_subreg (SImode, dest, DImode, lo_off);
+  rtx src_si  = simplify_gen_subreg (SImode, src, DImode, lo_off);
+  rtx op2_si  = simplify_gen_subreg (SImode, op2, DImode, lo_off);
+  rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, lo_off);
+
+  emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
+  emit_insn (gen_bswapsi2 (dest_si, src_si));
+  emit_insn (gen_bswapsi2 (op3_si, op2_si));
+  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
+  emit_insn (gen_iordi3 (dest, dest, op3));
+}")
+
+(define_insn "bswapdi2_32bit"
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,&r")
+	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:SI 2 "=&b,&b,X"))]
+  "!TARGET_POWERPC64 && (REG_P (operands[0]) || REG_P (operands[1]))"
+  "#"
+  [(set_attr "length" "16,12,36")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" "")))
+   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "!TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest  = operands[0];
+  rtx src   = operands[1];
+  rtx op2   = operands[2];
+  rtx dest1 = simplify_gen_subreg (SImode, dest, DImode, 0);
+  rtx dest2 = simplify_gen_subreg (SImode, dest, DImode, 4);
+  rtx addr1;
+  rtx addr2;
+  rtx word1;
+  rtx word2;
+
+  addr1 = XEXP (src, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (SImode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (SImode, op2, addr1);
+    }
+
+  word1 = change_address (src, SImode, addr1);
+  word2 = change_address (src, SImode, addr2);
+
+  emit_insn (gen_bswapsi2 (dest2, word1));
+  emit_insn (gen_bswapsi2 (dest1, word2));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "indexed_or_indirect_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "!TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  rtx op2  = operands[2];
+  rtx src1 = simplify_gen_subreg (SImode, src, DImode, 0);
+  rtx src2 = simplify_gen_subreg (SImode, src, DImode, 4);
+  rtx addr1;
+  rtx addr2;
+  rtx word1;
+  rtx word2;
+
+  addr1 = XEXP (dest, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (SImode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (SImode, op2, addr1);
+    }
+
+  word1 = change_address (dest, SImode, addr1);
+  word2 = change_address (dest, SImode, addr2);
+
+  emit_insn (gen_bswapsi2 (word2, src1));
+  emit_insn (gen_bswapsi2 (word1, src2));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 2 "" ""))]
+  "!TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest  = operands[0];
+  rtx src   = operands[1];
+  rtx src1  = simplify_gen_subreg (SImode, src, DImode, 0);
+  rtx src2  = simplify_gen_subreg (SImode, src, DImode, 4);
+  rtx dest1 = simplify_gen_subreg (SImode, dest, DImode, 0);
+  rtx dest2 = simplify_gen_subreg (SImode, dest, DImode, 4);
+
+  emit_insn (gen_bswapsi2 (dest1, src2));
+  emit_insn (gen_bswapsi2 (dest2, src1));
+}")
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+		 (match_operand:SI 2 "reg_or_short_operand" "r,I")))]
+  ""
+  "@
+   mullw %0,%1,%2
+   mulli %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 2 "s8bit_cint_operand" "")
+		(const_string "imul3")
+             (match_operand:SI 2 "short_cint_operand" "")
+		(const_string "imul2")]
+	(const_string "imul")))])
+
+(define_insn "*mulsi3_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   mullw. %3,%1,%2
+   #"
+  [(set_attr "type" "imul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			     (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*mulsi3_internal2"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   mullw. %0,%1,%2
+   #"
+  [(set_attr "type" "imul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			     (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+
+(define_insn "udiv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  ""
+  "div<wd>u %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 0 "" "")
+		(const_string "idiv")]
+	(const_string "ldiv")))])
+
+
+;; For powers of two we can do srai/aze for divide and then adjust for
+;; modulus.  If it isn't a power of two, force operands into register and do
+;; a normal divide.
+(define_expand "div<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+		 (match_operand:GPR 2 "reg_or_cint_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 0
+      || exact_log2 (INTVAL (operands[2])) < 0)
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+})
+
+(define_insn "*div<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		 (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  ""
+  "div<wd> %0,%1,%2"
+  [(set (attr "type")
+     (cond [(match_operand:SI 0 "" "")
+		(const_string "idiv")]
+	(const_string "ldiv")))])
+
+(define_expand "mod<mode>3"
+  [(use (match_operand:GPR 0 "gpc_reg_operand" ""))
+   (use (match_operand:GPR 1 "gpc_reg_operand" ""))
+   (use (match_operand:GPR 2 "reg_or_cint_operand" ""))]
+  ""
+  "
+{
+  int i;
+  rtx temp1;
+  rtx temp2;
+
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 0
+      || (i = exact_log2 (INTVAL (operands[2]))) < 0)
+    FAIL;
+
+  temp1 = gen_reg_rtx (<MODE>mode);
+  temp2 = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+  emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
+  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		 (match_operand:GPR 2 "exact_log2_cint_operand" "N")))]
+  ""
+  "sra<wd>i %0,%1,%p2\;addze %0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (div:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			   (match_operand:P 2 "exact_log2_cint_operand" "N,N"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=r,r"))]
+  ""
+  "@
+   sra<wd>i %3,%1,%p2\;addze. %3,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")
+   (set_attr "cell_micro" "not")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			     (match_operand:GPR 2 "exact_log2_cint_operand"
+			      ""))
+		    (const_int 0)))
+   (clobber (match_scratch:GPR 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(div:<MODE> (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (div:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			   (match_operand:P 2 "exact_log2_cint_operand" "N,N"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(div:P (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   sra<wd>i %0,%1,%p2\;addze. %0,%0
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")
+   (set_attr "cell_micro" "not")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			     (match_operand:GPR 2 "exact_log2_cint_operand"
+			      ""))
+		    (const_int 0)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(div:GPR (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(div:<MODE> (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Logical instructions
+;; The logical instructions are mostly combined by using match_operator,
+;; but the plain AND insns are somewhat different because there is no
+;; plain 'andi' (only 'andi.'), no plain 'andis', and there are all
+;; those rotate-and-mask operations.  Thus, the AND insns come first.
+
+(define_expand "andsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	  (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		  (match_operand:SI 2 "and_operand" "")))
+     (clobber (match_scratch:CC 3 ""))])]
+  ""
+  "")
+
+(define_insn "andsi3_mc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r")
+		(match_operand:SI 2 "and_operand" "?r,T,K,L")))
+   (clobber (match_scratch:CC 3 "=X,X,x,x"))]
+  "rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rlwinm %0,%1,0,%m2,%M2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2"
+  [(set_attr "type" "*,*,fast_compare,fast_compare")])
+
+(define_insn "andsi3_nomc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+		(match_operand:SI 2 "and_operand" "?r,T")))
+   (clobber (match_scratch:CC 3 "=X,X"))]
+  "!rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rlwinm %0,%1,0,%m2,%M2")
+
+(define_insn "andsi3_internal0_nomc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+        (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+                (match_operand:SI 2 "and_operand" "?r,T")))]
+  "!rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rlwinm %0,%1,0,%m2,%M2")
+
+
+;; Note to set cr's other than cr0 we do the and immediate and then
+;; the test again -- this avoids a mfcr which on the higher end
+;; machines causes an execution serialization
+
+(define_insn "*andsi3_internal2_mc"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r,r,r,r,r"))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_32BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %3,%1,%2
+   andi. %3,%1,%b2
+   andis. %3,%1,%u2
+   rlwinm. %3,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,fast_compare,fast_compare,delayed_compare,\
+		     compare,compare,compare,compare")
+   (set_attr "length" "4,4,4,4,8,8,8,8")])
+
+(define_insn "*andsi3_internal3_mc"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r,r,r,r,r"))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   #
+   andi. %3,%1,%b2
+   andis. %3,%1,%u2
+   rlwinm. %3,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare,fast_compare,fast_compare,delayed_compare,compare,\
+		     compare,compare,compare")
+   (set_attr "length" "8,4,4,4,8,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			     (match_operand:GPR 2 "and_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:GPR 3 ""))
+   (clobber (match_scratch:CC 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (and:<MODE> (match_dup 1)
+			       (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+;; We don't have a 32 bit "and. rt,ra,rb" for ppc64.  cr is set from the
+;; whole 64 bit reg, and we don't know what is in the high 32 bits.
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_operand" "")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (and:SI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*andsi3_internal4"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r,r,r")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_32BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %0,%1,%2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2
+   rlwinm. %0,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,fast_compare,fast_compare,delayed_compare,\
+		     compare,compare,compare,compare")
+   (set_attr "length" "4,4,4,4,8,8,8,8")])
+
+(define_insn "*andsi3_internal5_mc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r,r,r")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   #
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2
+   rlwinm. %0,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare,fast_compare,fast_compare,delayed_compare,compare,\
+		     compare,compare,compare")
+   (set_attr "length" "8,4,4,4,8,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "and_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:SI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_operand" "")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:SI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Handle the PowerPC64 rlwinm corner case
+
+(define_insn_and_split "*andsi3_internal6"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(and:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		(match_operand:SI 2 "mask_operand_wrap" "i")))]
+  "TARGET_POWERPC64"
+  "#"
+  "TARGET_POWERPC64"
+  [(set (match_dup 0)
+	(and:SI (rotate:SI (match_dup 1) (match_dup 3))
+		(match_dup 4)))
+   (set (match_dup 0)
+	(rotate:SI (match_dup 0) (match_dup 5)))]
+  "
+{
+  int mb = extract_MB (operands[2]);
+  int me = extract_ME (operands[2]);
+  operands[3] = GEN_INT (me + 1);
+  operands[5] = GEN_INT (32 - (me + 1));
+  operands[4] = GEN_INT (~((HOST_WIDE_INT) -1 << (33 + me - mb)));
+}"
+  [(set_attr "length" "8")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_logical_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ! logical_operand (operands[2], SImode))
+    {
+      HOST_WIDE_INT value = INTVAL (operands[2]);
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (SImode));
+
+      emit_insn (gen_iorsi3 (tmp, operands[1],
+			     GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_iorsi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+}")
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(xor:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_logical_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ! logical_operand (operands[2], SImode))
+    {
+      HOST_WIDE_INT value = INTVAL (operands[2]);
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (SImode));
+
+      emit_insn (gen_xorsi3 (tmp, operands[1],
+			     GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_xorsi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+}")
+
+(define_insn "*boolsi3_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+	(match_operator:SI 3 "boolean_or_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "%r,r,r")
+	  (match_operand:SI 2 "logical_operand" "r,K,L")]))]
+  ""
+  "@
+   %q3 %0,%1,%2
+   %q3i %0,%1,%b2
+   %q3is %0,%1,%u2")
+
+(define_insn "*boolsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_or_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "")
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_32BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "")
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Split a logical operation that we can't do in one insn into two insns,
+;; each of which does one 16-bit part.  This is used by combine.
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_operator:SI 3 "boolean_or_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "")
+	  (match_operand:SI 2 "non_logical_cint_operand" "")]))]
+  ""
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 0) (match_dup 5))]
+"
+{
+  rtx i;
+  i = GEN_INT (INTVAL (operands[2]) & (~ (HOST_WIDE_INT) 0xffff));
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[1], i);
+  i = GEN_INT (INTVAL (operands[2]) & 0xffff);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[0], i);
+}")
+
+(define_insn "*boolcsi3_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 3 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r")]))]
+  ""
+  "%q3 %0,%2,%1")
+
+(define_insn "*boolcsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   %q4. %3,%2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolcsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_32BIT"
+  "@
+   %q4. %0,%2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccsi3_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 3 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r"))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" "r"))]))]
+  ""
+  "%q3 %0,%1,%2")
+
+(define_insn "*boolccsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r"))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_32BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Rotate and shift insns, in all their variants.  These support shifts,
+;; field inserts and extracts, and various combinations thereof.
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "gpc_reg_operand" "")
+		       (match_operand:SI 1 "const_int_operand" "")
+		       (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand 3 "gpc_reg_operand" ""))]
+  ""
+  "
+{
+  /* Do not handle 16/8 bit structures that fit in HI/QI modes directly, since
+     the (SUBREG:SI (REG:HI xxx)) that is otherwise generated can confuse the
+     compiler if the address of the structure is taken later.  Likewise, do
+     not handle invalid E500 subregs.  */
+  if (GET_CODE (operands[0]) == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) < UNITS_PER_WORD
+	  || ((TARGET_E500_DOUBLE || TARGET_SPE)
+	      && invalid_e500_subreg (operands[0], GET_MODE (operands[0])))))
+    FAIL;
+
+  if (TARGET_POWERPC64 && GET_MODE (operands[0]) == DImode)
+    emit_insn (gen_insvdi_internal (operands[0], operands[1], operands[2],
+				    operands[3]));
+  else
+    emit_insn (gen_insvsi_internal (operands[0], operands[1], operands[2],
+				    operands[3]));
+  DONE;
+}")
+
+(define_insn "insvsi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(match_operand:SI 3 "gpc_reg_operand" "r"))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (32 - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal1"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(rotate:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+		   (match_operand:SI 4 "const_int_operand" "i")))]
+  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 31;
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (shift - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal2"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(ashiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 31;
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (32 - shift - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal3"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 31;
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (32 - shift - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal4"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(zero_extract:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+			 (match_operand:SI 4 "const_int_operand" "i")
+			 (match_operand:SI 5 "const_int_operand" "i")))]
+  "INTVAL (operands[4]) >= INTVAL (operands[1])"
+  "*
+{
+  int extract_start = INTVAL (operands[5]) & 31;
+  int extract_size = INTVAL (operands[4]) & 31;
+  int insert_start = INTVAL (operands[2]) & 31;
+  int insert_size = INTVAL (operands[1]) & 31;
+
+/* Align extract field with insert field */
+  operands[5] = GEN_INT (extract_start + extract_size - insert_start - insert_size);
+  operands[1] = GEN_INT (insert_start + insert_size - 1);
+  return \"rlwimi %0,%3,%h5,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+;; combine patterns for rlwimi
+(define_insn "*insvsi_internal5"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (ior:SI (and:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                        (match_operand:SI 1 "mask_operand" "i"))
+                (and:SI (lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+                                     (match_operand:SI 2 "const_int_operand" "i"))
+                        (match_operand:SI 5 "mask_operand" "i"))))]
+  "INTVAL(operands[1]) == ~INTVAL(operands[5])"
+  "*
+{
+ int me = extract_ME(operands[5]);
+ int mb = extract_MB(operands[5]);
+ operands[4] = GEN_INT(32 - INTVAL(operands[2]));
+ operands[2] = GEN_INT(mb);
+ operands[1] = GEN_INT(me);
+ return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal6"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+                                     (match_operand:SI 2 "const_int_operand" "i"))
+                        (match_operand:SI 5 "mask_operand" "i"))
+                (and:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                        (match_operand:SI 1 "mask_operand" "i"))))]
+  "INTVAL(operands[1]) == ~INTVAL(operands[5])"
+  "*
+{
+ int me = extract_ME(operands[5]);
+ int mb = extract_MB(operands[5]);
+ operands[4] = GEN_INT(32 - INTVAL(operands[2]));
+ operands[2] = GEN_INT(mb);
+ operands[1] = GEN_INT(me);
+ return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "insvdi_internal"
+  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(match_operand:DI 3 "gpc_reg_operand" "r"))]
+  "TARGET_POWERPC64"
+  "*
+{
+  int start = INTVAL (operands[2]) & 63;
+  int size = INTVAL (operands[1]) & 63;
+
+  operands[1] = GEN_INT (64 - start - size);
+  return \"rldimi %0,%3,%H1,%H2\";
+}"
+  [(set_attr "type" "insert_dword")])
+
+(define_insn "*insvdi_internal2"
+  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(ashiftrt:DI (match_operand:DI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "TARGET_POWERPC64
+   && insvdi_rshift_rlwimi_p (operands[1], operands[2], operands[4])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 63;
+  int start = (INTVAL (operands[2]) & 63) - 32;
+  int size = INTVAL (operands[1]) & 63;
+
+  operands[4] = GEN_INT (64 - shift - start - size);
+  operands[2] = GEN_INT (start);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}")
+
+(define_insn "*insvdi_internal3"
+  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(lshiftrt:DI (match_operand:DI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "TARGET_POWERPC64
+   && insvdi_rshift_rlwimi_p (operands[1], operands[2], operands[4])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 63;
+  int start = (INTVAL (operands[2]) & 63) - 32;
+  int size = INTVAL (operands[1]) & 63;
+
+  operands[4] = GEN_INT (64 - shift - start - size);
+  operands[2] = GEN_INT (start);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}")
+
+(define_expand "extzv"
+  [(set (match_operand 0 "gpc_reg_operand" "")
+	(zero_extract (match_operand 1 "gpc_reg_operand" "")
+		       (match_operand:SI 2 "const_int_operand" "")
+		       (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+  "
+{
+  /* Do not handle 16/8 bit structures that fit in HI/QI modes directly, since
+     the (SUBREG:SI (REG:HI xxx)) that is otherwise generated can confuse the
+     compiler if the address of the structure is taken later.  */
+  if (GET_CODE (operands[0]) == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) < UNITS_PER_WORD))
+    FAIL;
+
+  if (TARGET_POWERPC64 && GET_MODE (operands[1]) == DImode)
+    emit_insn (gen_extzvdi_internal (operands[0], operands[1], operands[2],
+				     operands[3]));
+  else
+    emit_insn (gen_extzvsi_internal (operands[0], operands[1], operands[2],
+				     operands[3]));
+  DONE;
+}")
+
+(define_insn "extzvsi_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[3]) & 31;
+  int size = INTVAL (operands[2]) & 31;
+
+  if (start + size >= 32)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  return \"rlwinm %0,%1,%3,%s2,31\";
+}")
+
+(define_insn "*extzvsi_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "const_int_operand" "i,i")
+			 (match_operand:SI 3 "const_int_operand" "i,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r"))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[3]) & 31;
+  int size = INTVAL (operands[2]) & 31;
+
+  /* Force split for non-cc0 compare.  */
+  if (which_alternative == 1)
+     return \"#\";
+
+  /* If the bit-field being tested fits in the upper or lower half of a
+     word, it is possible to use andiu. or andil. to test it.  This is
+     useful because the condition register set-use delay is smaller for
+     andi[ul]. than for rlinm.  This doesn't work when the starting bit
+     position is 0 because the LT and GT bits may be set wrong.  */
+
+  if ((start > 0 && start + size <= 16) || start >= 16)
+    {
+      operands[3] = GEN_INT (((1 << (16 - (start & 15)))
+			      - (1 << (16 - (start & 15) - size))));
+      if (start < 16)
+	return \"andis. %4,%1,%3\";
+      else
+	return \"andi. %4,%1,%3\";
+    }
+
+  if (start + size >= 32)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  return \"rlwinm. %4,%1,%3,%s2,31\";
+}"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "reload_completed"
+  [(set (match_dup 4)
+	(zero_extract:SI (match_dup 1) (match_dup 2)
+			 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*extzvsi_internal2"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "const_int_operand" "i,i")
+			 (match_operand:SI 3 "const_int_operand" "i,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extract:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[3]) & 31;
+  int size = INTVAL (operands[2]) & 31;
+
+  /* Force split for non-cc0 compare.  */
+  if (which_alternative == 1)
+     return \"#\";
+
+  /* Since we are using the output value, we can't ignore any need for
+     a shift.  The bit-field must end at the LSB.  */
+  if (start >= 16 && start + size == 32)
+    {
+      operands[3] = GEN_INT ((1 << size) - 1);
+      return \"andi. %0,%1,%3\";
+    }
+
+  if (start + size >= 32)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  return \"rlwinm. %0,%1,%3,%s2,31\";
+}"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extract:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extract:SI (match_dup 1) (match_dup 2) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "extzvdi_internal"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  "TARGET_POWERPC64"
+  "*
+{
+  int start = INTVAL (operands[3]) & 63;
+  int size = INTVAL (operands[2]) & 63;
+
+  if (start + size >= 64)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  operands[2] = GEN_INT (64 - size);
+  return \"rldicl %0,%1,%3,%2\";
+}")
+
+(define_insn "*extzvdi_internal1"
+  [(set (match_operand:CC 0 "gpc_reg_operand" "=x")
+	(compare:CC (zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "*
+{
+  int start = INTVAL (operands[3]) & 63;
+  int size = INTVAL (operands[2]) & 63;
+
+  if (start + size >= 64)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  operands[2] = GEN_INT (64 - size);
+  return \"rldicl. %4,%1,%3,%2\";
+}"
+  [(set_attr "type" "compare")])
+
+(define_insn "*extzvdi_internal2"
+  [(set (match_operand:CC 4 "gpc_reg_operand" "=x")
+	(compare:CC (zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extract:DI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "*
+{
+  int start = INTVAL (operands[3]) & 63;
+  int size = INTVAL (operands[2]) & 63;
+
+  if (start + size >= 64)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  operands[2] = GEN_INT (64 - size);
+  return \"rldicl. %0,%1,%3,%2\";
+}"
+  [(set_attr "type" "compare")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  ""
+  "@
+   rlwnm %0,%1,%2,0xffffffff
+   rlwinm %0,%1,%h2,0xffffffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(zero_extend:DI
+	    (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		       (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_64BIT"
+  "@
+   rlwnm %0,%1,%2,0xffffffff
+   rlwinm %0,%1,%h2,0xffffffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  ""
+  "@
+   rlwnm. %3,%1,%2,0xffffffff
+   rlwinm. %3,%1,%h2,0xffffffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(rotate:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(rotate:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   rlwnm. %0,%1,%2,0xffffffff
+   rlwinm. %0,%1,%h2,0xffffffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(rotate:SI (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(rotate:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal4"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			   (match_operand:SI 2 "reg_or_cint_operand" "r,i"))
+		(match_operand:SI 3 "mask_operand" "n,n")))]
+  ""
+  "@
+   rlwnm %0,%1,%2,%m3,%M3
+   rlwinm %0,%1,%h2,%m3,%M3"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_internal5"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:SI 3 "mask_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r,r,r"))]
+  ""
+  "@
+   rlwnm. %4,%1,%2,%m3,%M3
+   rlwinm. %4,%1,%h2,%m3,%M3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				(match_operand:SI 2 "reg_or_cint_operand" ""))
+		     (match_operand:SI 3 "mask_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "reload_completed"
+  [(set (match_dup 4)
+	(and:SI (rotate:SI (match_dup 1)
+				(match_dup 2))
+		     (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:SI 3 "mask_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "@
+   rlwnm. %0,%1,%2,%m3,%M3
+   rlwinm. %0,%1,%h2,%m3,%M3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				(match_operand:SI 2 "reg_or_cint_operand" ""))
+		     (match_operand:SI 3 "mask_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal7le"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "ri")) 0)))]
+  "!BYTES_BIG_ENDIAN"
+  "rlw%I2nm %0,%1,%h2,0xff"
+  [(set (attr "cell_micro")
+     (if_then_else (match_operand:SI 2 "const_int_operand" "")
+	(const_string "not")
+	(const_string "always")))])
+
+(define_insn "*rotlsi3_internal7be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "ri")) 3)))]
+  "BYTES_BIG_ENDIAN"
+  "rlw%I2nm %0,%1,%h2,0xff"
+  [(set (attr "cell_micro")
+     (if_then_else (match_operand:SI 2 "const_int_operand" "")
+	(const_string "not")
+	(const_string "always")))])
+
+(define_insn "*rotlsi3_internal8le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %3,%1,%2,0xff
+   rlwinm. %3,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal8be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 3))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %3,%1,%2,0xff
+   rlwinm. %3,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "!BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 3))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal9le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %0,%1,%2,0xff
+   rlwinm. %0,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal9be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 3))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %0,%1,%2,0xff
+   rlwinm. %0,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 3))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 3)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal10le"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm %0,%1,%2,0xffff
+   rlwinm %0,%1,%h2,0xffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_internal10be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")) 2)))]
+  "BYTES_BIG_ENDIAN"
+  "@
+   rlwnm %0,%1,%2,0xffff
+   rlwinm %0,%1,%h2,0xffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_internal11le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %3,%1,%2,0xffff
+   rlwinm. %3,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal11be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 2))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %3,%1,%2,0xffff
+   rlwinm. %3,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "!BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 2))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal12le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %0,%1,%2,0xffff
+   rlwinm. %0,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal12be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 2))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %0,%1,%2,0xffff
+   rlwinm. %0,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 2))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  ""
+  "@
+   slw %0,%1,%2
+   slwi %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashlsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(zero_extend:DI
+	    (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		       (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_POWERPC64"
+  "@
+   slw %0,%1,%2
+   slwi %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "TARGET_32BIT"
+  "@
+   slw. %3,%1,%2
+   slwi. %3,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashift:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   slw. %0,%1,%2
+   slwi. %0,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashift:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "rlwinm"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:SI 3 "mask_operand" "n")))]
+  "includes_lshift_p (operands[2], operands[3])"
+  "rlwinm %0,%1,%h2,%m3,%M3")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r"))]
+  "includes_lshift_p (operands[2], operands[3])"
+  "@
+   rlwinm. %4,%1,%h2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "includes_lshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_lshift_p (operands[2], operands[3])"
+  "@
+   rlwinm. %0,%1,%h2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_lshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "O,r,i")))]
+  ""
+  "@
+  mr %0,%1
+  srw %0,%1,%2
+  srwi %0,%1,%h2"
+  [(set_attr "type" "integer,var_shift_rotate,shift")])
+
+(define_insn "*lshrsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(zero_extend:DI
+	    (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_POWERPC64"
+  "@
+  srw %0,%1,%2
+  srwi %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,?y,?y,?y")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "O,r,i,O,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=X,r,r,X,r,r"))]
+  "TARGET_32BIT"
+  "@
+   mr. %1,%1
+   srw. %3,%1,%2
+   srwi. %3,%1,%h2
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,4,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,?y,?y,?y")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "O,r,i,O,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   mr. %0,%1
+   srw. %0,%1,%2
+   srwi. %0,%1,%h2
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,4,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:SI 3 "mask_operand" "n")))]
+  "includes_rshift_p (operands[2], operands[3])"
+  "rlwinm %0,%1,%s2,%m3,%M3")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r"))]
+  "includes_rshift_p (operands[2], operands[3])"
+  "@
+   rlwinm. %4,%1,%s2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "includes_rshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_rshift_p (operands[2], operands[3])"
+  "@
+   rlwinm. %0,%1,%s2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_rshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal1le"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 0)))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "rlwinm %0,%1,%s2,0xff")
+
+(define_insn "*lshiftrt_internal1be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 3)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "rlwinm %0,%1,%s2,0xff")
+
+(define_insn "*lshiftrt_internal2le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   rlwinm. %3,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal2be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 3))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   rlwinm. %3,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 3))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal3le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   rlwinm. %0,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal3be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 3))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   rlwinm. %0,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 3))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 3)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal4le"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 0)))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "rlwinm %0,%1,%s2,0xffff")
+
+(define_insn "*lshiftrt_internal4be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 2)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "rlwinm %0,%1,%s2,0xffff")
+
+(define_insn "*lshiftrt_internal5le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   rlwinm. %3,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal5be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 2))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   rlwinm. %3,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 2))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal5le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   rlwinm. %0,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal5be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 2))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   rlwinm. %0,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 2))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  ""
+  "@
+   sraw %0,%1,%2
+   srawi %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashrsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(sign_extend:DI
+	    (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_POWERPC64"
+  "@
+   sraw %0,%1,%2
+   srawi %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  ""
+  "@
+   sraw. %3,%1,%2
+   srawi. %3,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   sraw. %0,%1,%2
+   srawi. %0,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+;; Builtins to replace a division to generate FRE reciprocal estimate
+;; instructions and the necessary fixup instructions
+(define_expand "recip<mode>3"
+  [(match_operand:RECIPF 0 "gpc_reg_operand" "")
+   (match_operand:RECIPF 1 "gpc_reg_operand" "")
+   (match_operand:RECIPF 2 "gpc_reg_operand" "")]
+  "RS6000_RECIP_HAVE_RE_P (<MODE>mode)"
+{
+   rs6000_emit_swdiv (operands[0], operands[1], operands[2], false);
+   DONE;
+})
+
+;; Split to create division from FRE/FRES/etc. and fixup instead of the normal
+;; hardware division.  This is only done before register allocation and with
+;; -ffast-math.  This must appear before the divsf3/divdf3 insns.
+(define_split
+  [(set (match_operand:RECIPF 0 "gpc_reg_operand" "")
+	(div:RECIPF (match_operand 1 "gpc_reg_operand" "")
+		    (match_operand 2 "gpc_reg_operand" "")))]
+  "RS6000_RECIP_AUTO_RE_P (<MODE>mode)
+   && can_create_pseudo_p () && optimize_insn_for_speed_p ()
+   && flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math"
+  [(const_int 0)]
+{
+  rs6000_emit_swdiv (operands[0], operands[1], operands[2], true);
+  DONE;
+})
+
+;; Builtins to replace 1/sqrt(x) with instructions using RSQRTE and the
+;; appropriate fixup.
+(define_expand "rsqrt<mode>2"
+  [(match_operand:RECIPF 0 "gpc_reg_operand" "")
+   (match_operand:RECIPF 1 "gpc_reg_operand" "")]
+  "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
+{
+  rs6000_emit_swrsqrt (operands[0], operands[1]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+
+;; Floating-point insns, excluding normal data motion.  We combine the SF/DF
+;; modes here, and also add in conditional vsx/power8-vector support to access
+;; values in the traditional Altivec registers if the appropriate
+;; -mupper-regs-{df,sf} option is enabled.
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
+  "")
+
+(define_insn "*abs<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fabs %0,%1
+   xsabsdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_insn "*nabs<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(neg:SFDF
+	 (abs:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnabs %0,%1
+   xsnabsdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
+  "")
+
+(define_insn "*neg<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fneg %0,%1
+   xsnegdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_expand "add<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
+  "")
+
+(define_insn "*add<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fadd<Ftrad> %0,%1,%2
+   xsadd<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		    (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
+  "")
+
+(define_insn "*sub<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		    (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fsub<Ftrad> %0,%1,%2
+   xssub<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
+  "")
+
+(define_insn "*mul<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmul<Ftrad> %0,%1,%2
+   xsmul<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "dmul")
+   (set_attr "fp_type" "fp_mul_<Fs>")])
+
+(define_expand "div<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		  (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN && !TARGET_SIMPLE_FPU"
+  "")
+
+(define_insn "*div<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU"
+  "@
+   fdiv<Ftrad> %0,%1,%2
+   xsdiv<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "<Fs>div")
+   (set_attr "fp_type" "fp_div_<Fs>")])
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
+   && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
+  "@
+   fsqrt<Ftrad> %0,%1
+   xssqrt<Fvsx> %x0,%x1"
+  [(set_attr "type" "<Fs>sqrt")
+   (set_attr "fp_type" "fp_sqrt_<Fs>")])
+
+;; Floating point reciprocal approximation
+(define_insn "fre<Fs>"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_FRES))]
+  "TARGET_<FFRE>"
+  "@
+   fre<Ftrad> %0,%1
+   xsre<Fvsx> %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "*rsqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_RSQRT))]
+  "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
+  "@
+   frsqrte<Ftrad> %0,%1
+   xsrsqrte<Fvsx> %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Floating point comparisons
+(define_insn "*cmp<mode>_fpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y")
+	(compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fcmpu %0,%1,%2
+   xscmpudp %0,%x1,%x2"
+  [(set_attr "type" "fpcompare")])
+
+;; Floating point conversions
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn_and_split "*extendsfdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "@
+   #
+   fmr %0,%1
+   lfs%U1%X1 %0,%1
+   #
+   xxlor %x0,%x1,%x1
+   lxsspx %x0,%y1"
+  "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr_alternative "type"
+      [(const_string "fp")
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	 (match_test "update_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_u")
+	 (const_string "fpload")))])])
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*truncdfsf2_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "frsp %0,%1"
+  [(set_attr "type" "fp")])
+
+;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
+;; builtins.c and optabs.c that are not correct for IBM long double
+;; when little-endian.
+(define_expand "signbittf2"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+   (set (match_dup 3)
+   	(subreg:DI (match_dup 2) 0))
+   (set (match_dup 4)
+   	(match_dup 5))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+  	(match_dup 6))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (DImode);
+  if (TARGET_POWERPC64)
+    {
+      operands[4] = gen_reg_rtx (DImode);
+      operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63));
+      operands[6] = gen_rtx_SUBREG (SImode, operands[4],
+				    WORDS_BIG_ENDIAN ? 4 : 0);
+    }
+  else
+    {
+      operands[4] = gen_reg_rtx (SImode);
+      operands[5] = gen_rtx_SUBREG (SImode, operands[3],
+				    WORDS_BIG_ENDIAN ? 0 : 4);
+      operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31));
+    }
+})
+
+(define_expand "copysign<mode>3"
+  [(set (match_dup 3)
+        (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
+   (set (match_dup 4)
+	(neg:SFDF (abs:SFDF (match_dup 1))))
+   (set (match_operand:SFDF 0 "gpc_reg_operand" "")
+        (if_then_else:SFDF (ge (match_operand:SFDF 2 "gpc_reg_operand" "")
+			       (match_dup 5))
+			 (match_dup 3)
+			 (match_dup 4)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
+   && ((TARGET_PPC_GFXOPT
+        && !HONOR_NANS (<MODE>mode)
+        && !HONOR_SIGNED_ZEROS (<MODE>mode))
+       || TARGET_CMPB
+       || VECTOR_UNIT_VSX_P (<MODE>mode))"
+{
+  if (TARGET_CMPB || VECTOR_UNIT_VSX_P (<MODE>mode))
+    {
+      emit_insn (gen_copysign<mode>3_fcpsgn (operands[0], operands[1],
+					     operands[2]));
+      DONE;
+    }
+
+   operands[3] = gen_reg_rtx (<MODE>mode);
+   operands[4] = gen_reg_rtx (<MODE>mode);
+   operands[5] = CONST0_RTX (<MODE>mode);
+  })
+
+;; Use an unspec rather providing an if-then-else in RTL, to prevent the
+;; compiler from optimizing -0.0
+(define_insn "copysign<mode>3_fcpsgn"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_COPYSIGN))]
+  "TARGET_<MODE>_FPR && TARGET_CMPB"
+  "@
+   fcpsgn %0,%2,%1
+   xscpsgn<Fvsx> %x0,%x2,%x1"
+  [(set_attr "type" "fp")])
+
+;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
+;; fsel instruction and some auxiliary computations.  Then we just have a
+;; single DEFINE_INSN for fsel and the define_splits to make them if made by
+;; combine.
+;; For MIN, MAX on non-VSX machines, and conditional move all of the time, we
+;; use DEFINE_EXPAND's that involve a fsel instruction and some auxiliary
+;; computations.  Then we just have a single DEFINE_INSN for fsel and the
+;; define_splits to make them if made by combine.  On VSX machines we have the
+;; min/max instructions.
+;;
+;; On VSX, we only check for TARGET_VSX instead of checking for a vsx/p8 vector
+;; to allow either DF/SF to use only traditional registers.
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
+			       (match_operand:SFDF 2 "gpc_reg_operand" ""))
+			   (match_dup 1)
+			   (match_dup 2)))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
+{
+  rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*smax<mode>3_vsx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(smax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && TARGET_VSX"
+  "xsmaxdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")])
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
+			       (match_operand:SFDF 2 "gpc_reg_operand" ""))
+			   (match_dup 2)
+			   (match_dup 1)))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
+{
+  rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*smin<mode>3_vsx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(smin:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && TARGET_VSX"
+  "xsmindp %x0,%x1,%x2"
+  [(set_attr "type" "fp")])
+
+(define_split
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(match_operator:SFDF 3 "min_max_operator"
+	 [(match_operand:SFDF 1 "gpc_reg_operand" "")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "")]))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math
+   && !TARGET_VSX"
+  [(const_int 0)]
+{
+  rs6000_emit_minmax (operands[0], GET_CODE (operands[3]), operands[1],
+		      operands[2]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(match_operator:SF 3 "min_max_operator"
+	 [(match_operand:SF 1 "gpc_reg_operand" "")
+	  (match_operand:SF 2 "gpc_reg_operand" "")]))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
+  [(const_int 0)]
+  "
+{ rs6000_emit_minmax (operands[0], GET_CODE (operands[3]),
+		      operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "mov<mode>cc"
+   [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	 (if_then_else:GPR (match_operand 1 "comparison_operator" "")
+			   (match_operand:GPR 2 "gpc_reg_operand" "")
+			   (match_operand:GPR 3 "gpc_reg_operand" "")))]
+  "TARGET_ISEL<sel>"
+  "
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; We use the BASE_REGS for the isel input operands because, if rA is
+;; 0, the value of 0 is placed in rD upon truth.  Similarly for rB
+;; because we may switch the operands and rB may end up being rA.
+;;
+;; We need 2 patterns: an unsigned and a signed pattern.  We could
+;; leave out the mode in operand 4 and use one pattern, but reload can
+;; change the mode underneath our feet and then gets confused trying
+;; to reload the value.
+(define_insn "isel_signed_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_comparison_operator"
+			 [(match_operand:CC 4 "cc_reg_operand" "y,y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "reg_or_cint_operand" "O,b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+(define_insn "isel_unsigned_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_comparison_operator"
+			 [(match_operand:CCUNS 4 "cc_reg_operand" "y,y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "reg_or_cint_operand" "O,b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+;; These patterns can be useful for combine; they let combine know that
+;; isel can handle reversed comparisons so long as the operands are
+;; registers.
+
+(define_insn "*isel_reversed_signed_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_rev_comparison_operator"
+			 [(match_operand:CC 4 "cc_reg_operand" "y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "gpc_reg_operand" "b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "b")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+(define_insn "*isel_reversed_unsigned_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_rev_comparison_operator"
+			 [(match_operand:CCUNS 4 "cc_reg_operand" "y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "gpc_reg_operand" "b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "b")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+(define_expand "movsfcc"
+   [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	 (if_then_else:SF (match_operand 1 "comparison_operator" "")
+			  (match_operand:SF 2 "gpc_reg_operand" "")
+			  (match_operand:SF 3 "gpc_reg_operand" "")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn "*fselsfsf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "f")
+			     (match_operand:SF 4 "zero_fp_constant" "F"))
+			 (match_operand:SF 2 "gpc_reg_operand" "f")
+			 (match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*fseldfsf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(if_then_else:SF (ge (match_operand:DF 1 "gpc_reg_operand" "d")
+			     (match_operand:DF 4 "zero_fp_constant" "F"))
+			 (match_operand:SF 2 "gpc_reg_operand" "f")
+			 (match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_SINGLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+;; The conditional move instructions allow us to perform max and min
+;; operations even when
+
+(define_split
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operator:DF 3 "min_max_operator"
+	 [(match_operand:DF 1 "gpc_reg_operand" "")
+	  (match_operand:DF 2 "gpc_reg_operand" "")]))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && !flag_trapping_math"
+  [(const_int 0)]
+  "
+{ rs6000_emit_minmax (operands[0], GET_CODE (operands[3]),
+		      operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "movdfcc"
+   [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	 (if_then_else:DF (match_operand 1 "comparison_operator" "")
+			  (match_operand:DF 2 "gpc_reg_operand" "")
+			  (match_operand:DF 3 "gpc_reg_operand" "")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn "*fseldfdf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "d")
+			     (match_operand:DF 4 "zero_fp_constant" "F"))
+			 (match_operand:DF 2 "gpc_reg_operand" "d")
+			 (match_operand:DF 3 "gpc_reg_operand" "d")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*fselsfdf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(if_then_else:DF (ge (match_operand:SF 1 "gpc_reg_operand" "f")
+			     (match_operand:SF 4 "zero_fp_constant" "F"))
+			 (match_operand:DF 2 "gpc_reg_operand" "d")
+			 (match_operand:DF 3 "gpc_reg_operand" "d")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_SINGLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+;; Conversions to and from floating-point.
+
+; We don't define lfiwax/lfiwzx with the normal definition, because we
+; don't want to support putting SImode in FPR registers.
+(define_insn "lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
+		   UNSPEC_LFIWAX))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
+  "@
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1
+   mtvsrwa %x0,%1"
+  [(set_attr "type" "fpload,fpload,mffgpr")])
+
+; This split must be run before register allocation because it allocates the
+; memory slot that is needed to move values to/from the FPR.  We don't allocate
+; it earlier to allow for the combiner to merge insns together where it might
+; not be needed and also in case the insns are deleted as dead code.
+
+(define_insn_and_split "floatsi<mode>2_lfiwax"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX
+   && <SI_CONVERT_FP> && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp;
+
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
+    tmp = convert_to_mode (DImode, src, false);
+  else
+    {
+      tmp = operands[2];
+      if (GET_CODE (tmp) == SCRATCH)
+	tmp = gen_reg_rtx (DImode);
+      if (MEM_P (src))
+	{
+	  src = rs6000_address_for_fpconvert (src);
+	  emit_insn (gen_lfiwax (tmp, src));
+	}
+      else
+	{
+	  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+	  emit_move_insn (stack, src);
+	  emit_insn (gen_lfiwax (tmp, stack));
+	}
+    }
+  emit_insn (gen_floatdi<mode>2 (dest, tmp));
+  DONE;
+}"
+  [(set_attr "length" "12")
+   (set_attr "type" "fpload")])
+
+(define_insn_and_split "floatsi<mode>2_lfiwax_mem"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>")
+	(float:SFDF
+	 (sign_extend:DI
+	  (match_operand:SI 1 "memory_operand" "Z,Z"))))
+   (clobber (match_scratch:DI 2 "=0,d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX
+   && <SI_CONVERT_FP>"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  operands[1] = rs6000_address_for_fpconvert (operands[1]);
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+  emit_insn (gen_lfiwax (operands[2], operands[1]));
+  emit_insn (gen_floatdi<mode>2 (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+(define_insn "lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
+		   UNSPEC_LFIWZX))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
+  "@
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1
+   mtvsrwz %x0,%1"
+  [(set_attr "type" "fpload,fpload,mftgpr")])
+
+(define_insn_and_split "floatunssi<mode>2_lfiwzx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX
+   && <SI_CONVERT_FP>"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp;
+
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
+    tmp = convert_to_mode (DImode, src, true);
+  else
+    {
+      tmp = operands[2];
+      if (GET_CODE (tmp) == SCRATCH)
+	tmp = gen_reg_rtx (DImode);
+      if (MEM_P (src))
+	{
+	  src = rs6000_address_for_fpconvert (src);
+	  emit_insn (gen_lfiwzx (tmp, src));
+	}
+      else
+	{
+	  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+	  emit_move_insn (stack, src);
+	  emit_insn (gen_lfiwzx (tmp, stack));
+	}
+    }
+  emit_insn (gen_floatdi<mode>2 (dest, tmp));
+  DONE;
+}"
+  [(set_attr "length" "12")
+   (set_attr "type" "fpload")])
+
+(define_insn_and_split "floatunssi<mode>2_lfiwzx_mem"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>")
+	(unsigned_float:SFDF
+	 (zero_extend:DI
+	  (match_operand:SI 1 "memory_operand" "Z,Z"))))
+   (clobber (match_scratch:DI 2 "=0,d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX
+   && <SI_CONVERT_FP>"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  operands[1] = rs6000_address_for_fpconvert (operands[1]);
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+  emit_insn (gen_lfiwzx (operands[2], operands[1]));
+  emit_insn (gen_floatdi<mode>2 (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+; For each of these conversions, there is a define_expand, a define_insn
+; with a '#' template, and a define_split (with C code).  The idea is
+; to allow constant folding with the template of the define_insn,
+; then to have the insns split later (between sched1 and final).
+
+(define_expand "floatsidf2"
+  [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "")
+		   (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))])]
+  "TARGET_HARD_FLOAT 
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "
+{
+  if (TARGET_E500_DOUBLE)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_spe_floatsidf2 (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_LFIWAX && TARGET_FCFID)
+    {
+      emit_insn (gen_floatsidf2_lfiwax (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_FCFID)
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, false);
+      emit_insn (gen_floatdidf2 (operands[0], dreg));
+      DONE;
+    }
+
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  operands[2] = force_reg (SImode, GEN_INT (0x43300000));
+  operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503601774854144\", DFmode));
+  operands[4] = rs6000_allocate_stack_temp (DFmode, true, false);
+  operands[5] = gen_reg_rtx (DFmode);
+  operands[6] = gen_reg_rtx (SImode);
+}")
+
+(define_insn_and_split "*floatsidf2_internal"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=&d")
+	(float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))
+   (use (match_operand:SI 2 "gpc_reg_operand" "r"))
+   (use (match_operand:DF 3 "gpc_reg_operand" "d"))
+   (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o"))
+   (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d"))
+   (clobber (match_operand:SI 6 "gpc_reg_operand" "=&r"))]
+  "! TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx lowword, highword;
+  gcc_assert (MEM_P (operands[4]));
+  highword = adjust_address (operands[4], SImode, 0);
+  lowword = adjust_address (operands[4], SImode, 4);
+  if (! WORDS_BIG_ENDIAN)
+    {
+      rtx tmp;
+      tmp = highword; highword = lowword; lowword = tmp;
+    }
+
+  emit_insn (gen_xorsi3 (operands[6], operands[1],
+			 GEN_INT (~ (HOST_WIDE_INT) 0x7fffffff)));
+  emit_move_insn (lowword, operands[6]);
+  emit_move_insn (highword, operands[2]);
+  emit_move_insn (operands[5], operands[4]);
+  emit_insn (gen_subdf3 (operands[0], operands[5], operands[3]));
+  DONE;
+}"
+  [(set_attr "length" "24")
+   (set_attr "type" "fp")])
+
+;; If we don't have a direct conversion to single precision, don't enable this
+;; conversion for 32-bit without fast math, because we don't have the insn to
+;; generate the fixup swizzle to avoid double rounding problems.
+(define_expand "floatunssisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (unsigned_float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT
+   && (!TARGET_FPRS
+       || (TARGET_FPRS
+	   && ((TARGET_FCFIDUS && TARGET_LFIWZX)
+	       || (TARGET_DOUBLE_FLOAT && TARGET_FCFID
+		   && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))"
+  "
+{
+  if (!TARGET_FPRS)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+  else if (TARGET_LFIWZX && TARGET_FCFIDUS)
+    {
+      emit_insn (gen_floatunssisf2_lfiwzx (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, true);
+      emit_insn (gen_floatdisf2 (operands[0], dreg));
+      DONE;
+    }
+}")
+
+(define_expand "floatunssidf2"
+  [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "")
+		   (unsigned_float:DF (match_operand:SI 1 "nonimmediate_operand" "")))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))])]
+  "TARGET_HARD_FLOAT
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "
+{
+  if (TARGET_E500_DOUBLE)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_spe_floatunssidf2 (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_LFIWZX && TARGET_FCFID)
+    {
+      emit_insn (gen_floatunssidf2_lfiwzx (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_FCFID)
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, true);
+      emit_insn (gen_floatdidf2 (operands[0], dreg));
+      DONE;
+    }
+
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  operands[2] = force_reg (SImode, GEN_INT (0x43300000));
+  operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503599627370496\", DFmode));
+  operands[4] = rs6000_allocate_stack_temp (DFmode, true, false);
+  operands[5] = gen_reg_rtx (DFmode);
+}")
+
+(define_insn_and_split "*floatunssidf2_internal"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=&d")
+	(unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))
+   (use (match_operand:SI 2 "gpc_reg_operand" "r"))
+   (use (match_operand:DF 3 "gpc_reg_operand" "d"))
+   (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o"))
+   (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d"))]
+  "! TARGET_FCFIDU && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !(TARGET_FCFID && TARGET_POWERPC64)"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx lowword, highword;
+  gcc_assert (MEM_P (operands[4]));
+  highword = adjust_address (operands[4], SImode, 0);
+  lowword = adjust_address (operands[4], SImode, 4);
+  if (! WORDS_BIG_ENDIAN)
+    {
+      rtx tmp;
+      tmp = highword; highword = lowword; lowword = tmp;
+    }
+
+  emit_move_insn (lowword, operands[1]);
+  emit_move_insn (highword, operands[2]);
+  emit_move_insn (operands[5], operands[4]);
+  emit_insn (gen_subdf3 (operands[0], operands[5], operands[3]));
+  DONE;
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "fp")])
+
+(define_expand "fix_trunc<mode>si2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && <TARGET_FLOAT>) || <E500_CONVERT>)"
+  "
+{
+  if (!<E500_CONVERT>)
+    {
+      rtx tmp, stack;
+
+      if (TARGET_STFIWX)
+	emit_insn (gen_fix_trunc<mode>si2_stfiwx (operands[0], operands[1]));
+      else
+	{
+	  tmp = gen_reg_rtx (DImode);
+	  stack = rs6000_allocate_stack_temp (DImode, true, false);
+	  emit_insn (gen_fix_trunc<mode>si2_internal (operands[0], operands[1],
+						      tmp, stack));
+	}
+      DONE;
+    }
+}")
+
+; Like the convert to float patterns, this insn must be split before
+; register allocation so that it can allocate the memory slot if it
+; needed
+(define_insn_and_split "fix_trunc<mode>si2_stfiwx"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT)
+   && TARGET_STFIWX && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwz_<mode> (tmp, src));
+  if (MEM_P (dest))
+    {
+      dest = rs6000_address_for_fpconvert (dest);
+      emit_insn (gen_stfiwx (dest, tmp));
+      DONE;
+    }
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
+    {
+      dest = gen_lowpart (DImode, dest);
+      emit_move_insn (dest, tmp);
+      DONE;
+    }
+  else
+    {
+      rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+      emit_insn (gen_stfiwx (stack, tmp));
+      emit_move_insn (dest, stack);
+      DONE;
+    }
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "fp")])
+
+(define_insn_and_split "fix_trunc<mode>si2_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,?r")
+	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d"))
+   (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx lowword;
+  gcc_assert (MEM_P (operands[3]));
+  lowword = adjust_address (operands[3], SImode, WORDS_BIG_ENDIAN ? 4 : 0);
+
+  emit_insn (gen_fctiwz_<mode> (operands[2], operands[1]));
+  emit_move_insn (operands[3], operands[2]);
+  emit_move_insn (operands[0], lowword);
+  DONE;
+}"
+  [(set_attr "length" "16")
+   (set_attr "type" "fp")])
+
+(define_expand "fix_trunc<mode>di2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+   && TARGET_FCFID"
+  "")
+
+(define_insn "*fix_trunc<mode>di2_fctidz"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+    && TARGET_FCFID && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "fctidz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "fixuns_trunc<mode>si2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT
+   && ((TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ && TARGET_STFIWX)
+       || <E500_CONVERT>)"
+  "
+{
+  if (!<E500_CONVERT>)
+    {
+      emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_insn_and_split "fixuns_trunc<mode>si2_stfiwx"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ
+   && TARGET_STFIWX && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwuz_<mode> (tmp, src));
+  if (MEM_P (dest))
+    {
+      dest = rs6000_address_for_fpconvert (dest);
+      emit_insn (gen_stfiwx (dest, tmp));
+      DONE;
+    }
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
+    {
+      dest = gen_lowpart (DImode, dest);
+      emit_move_insn (dest, tmp);
+      DONE;
+    }
+  else
+    {
+      rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+      emit_insn (gen_stfiwx (stack, tmp));
+      emit_move_insn (dest, stack);
+      DONE;
+    }
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "fp")])
+
+(define_expand "fixuns_trunc<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unsigned_fix:DI (match_operand:SFDF 1 "register_operand" "")))]
+  "TARGET_HARD_FLOAT && (TARGET_FCTIDUZ || VECTOR_UNIT_VSX_P (<MODE>mode))"
+  "")
+
+(define_insn "*fixuns_trunc<mode>di2_fctiduz"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+    && TARGET_FCTIDUZ && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "fctiduz %0,%1"
+  [(set_attr "type" "fp")])
+
+; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
+; rather than (set (subreg:SI (reg)) (fix:SI ...))
+; because the first makes it clear that operand 0 is not live
+; before the instruction.
+(define_insn "fctiwz_<mode>"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))]
+		   UNSPEC_FCTIWZ))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "fctiwz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "fctiwuz_<mode>"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(unsigned_fix:SI
+		     (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))]
+		   UNSPEC_FCTIWUZ))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ"
+  "fctiwuz %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
+;; since the friz instruction does not truncate the value if the floating
+;; point value is < LONG_MIN or > LONG_MAX.
+(define_insn "*friz"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_FPRND
+   && !VECTOR_UNIT_VSX_P (DFmode) && flag_unsafe_math_optimizations
+   && !flag_trapping_math && TARGET_FRIZ"
+  "friz %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Since FCTIWZ doesn't sign extend the upper bits, we have to do a store and a
+;; load to properly sign extend the value, but at least doing a store, load
+;; into a GPR to sign extend, a store from the GPR and a load back into the FPR
+;; if we have 32-bit memory ops
+(define_insn_and_split "*round32<mode>2_fprs"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(float:SFDF
+	 (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))))
+   (clobber (match_scratch:DI 2 "=d"))
+   (clobber (match_scratch:DI 3 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && <SI_CONVERT_FP> && TARGET_LFIWAX && TARGET_STFIWX && TARGET_FCFID
+   && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp1 = operands[2];
+  rtx tmp2 = operands[3];
+  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+
+  if (GET_CODE (tmp1) == SCRATCH)
+    tmp1 = gen_reg_rtx (DImode);
+  if (GET_CODE (tmp2) == SCRATCH)
+    tmp2 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwz_<mode> (tmp1, src));
+  emit_insn (gen_stfiwx (stack, tmp1));
+  emit_insn (gen_lfiwax (tmp2, stack));
+  emit_insn (gen_floatdi<mode>2 (dest, tmp2));
+  DONE;
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "16")])
+
+(define_insn_and_split "*roundu32<mode>2_fprs"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:SFDF
+	 (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))))
+   (clobber (match_scratch:DI 2 "=d"))
+   (clobber (match_scratch:DI 3 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && TARGET_LFIWZX && TARGET_STFIWX && TARGET_FCFIDU
+   && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp1 = operands[2];
+  rtx tmp2 = operands[3];
+  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+
+  if (GET_CODE (tmp1) == SCRATCH)
+    tmp1 = gen_reg_rtx (DImode);
+  if (GET_CODE (tmp2) == SCRATCH)
+    tmp2 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwuz_<mode> (tmp1, src));
+  emit_insn (gen_stfiwx (stack, tmp1));
+  emit_insn (gen_lfiwzx (tmp2, stack));
+  emit_insn (gen_floatdi<mode>2 (dest, tmp2));
+  DONE;
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "16")])
+
+;; No VSX equivalent to fctid
+(define_insn "lrint<mode>di2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		   UNSPEC_FCTID))]
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "fctid %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "btrunc<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_FRIZ))]
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   friz %0,%1
+   xsrdpiz %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_insn "ceil<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_FRIP))]
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   frip %0,%1
+   xsrdpip %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+(define_insn "floor<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_FRIM))]
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   frim %0,%1
+   xsrdpim %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+;; No VSX equivalent to frin
+(define_insn "round<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		     UNSPEC_FRIN))]
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "frin %0,%1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
+
+; An UNSPEC is used so we don't have to support SImode in FP registers.
+(define_insn "stfiwx"
+  [(set (match_operand:SI 0 "memory_operand" "=Z")
+	(unspec:SI [(match_operand:DI 1 "gpc_reg_operand" "d")]
+		   UNSPEC_STFIWX))]
+  "TARGET_PPC_GFXOPT"
+  "stfiwx %1,%y0"
+  [(set_attr "type" "fpstore")])
+
+;; If we don't have a direct conversion to single precision, don't enable this
+;; conversion for 32-bit without fast math, because we don't have the insn to
+;; generate the fixup swizzle to avoid double rounding problems.
+(define_expand "floatsisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT
+   && (!TARGET_FPRS
+       || (TARGET_FPRS
+	   && ((TARGET_FCFIDS && TARGET_LFIWAX)
+	       || (TARGET_DOUBLE_FLOAT && TARGET_FCFID
+		   && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))"
+  "
+{
+  if (!TARGET_FPRS)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+  else if (TARGET_FCFIDS && TARGET_LFIWAX)
+    {
+      emit_insn (gen_floatsisf2_lfiwax (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_FCFID && TARGET_LFIWAX)
+    {
+      rtx dfreg = gen_reg_rtx (DFmode);
+      emit_insn (gen_floatsidf2_lfiwax (dfreg, operands[1]));
+      emit_insn (gen_truncdfsf2 (operands[0], dfreg));
+      DONE;
+    }
+  else
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, false);
+      emit_insn (gen_floatdisf2 (operands[0], dreg));
+      DONE;
+    }
+}")
+
+(define_expand "floatdidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float:DF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*floatdidf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fcfid %0,%1"
+  [(set_attr "type" "fp")])
+
+; Allow the combiner to merge source memory operands to the conversion so that
+; the optimizer/register allocator doesn't try to load the value too early in a
+; GPR and then use store/load to move it to a FPR and suffer from a store-load
+; hit.  We will split after reload to avoid the trip through the GPRs
+
+(define_insn_and_split "*floatdidf2_mem"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float:DF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCFID"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+(define_expand "floatunsdidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(unsigned_float:DF
+	 (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))"
+  "")
+
+(define_insn "*floatunsdidf2_fcfidu"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FCFIDU && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fcfidu %0,%1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*floatunsdidf2_mem"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:DF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (unsigned_float:DF (match_dup 2)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+(define_expand "floatdisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (float:SF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && (TARGET_FCFIDS || TARGET_POWERPC64 || flag_unsafe_math_optimizations)"
+  "
+{
+  if (!TARGET_FCFIDS)
+    {
+      rtx val = operands[1];
+      if (!flag_unsafe_math_optimizations)
+	{
+	  rtx label = gen_label_rtx ();
+	  val = gen_reg_rtx (DImode);
+	  emit_insn (gen_floatdisf2_internal2 (val, operands[1], label));
+	  emit_label (label);
+	}
+      emit_insn (gen_floatdisf2_internal1 (operands[0], val));
+      DONE;
+    }
+}")
+
+(define_insn "floatdisf2_fcfids"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS"
+  "fcfids %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn_and_split "*floatdisf2_mem"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+  "
+{
+  emit_move_insn (operands[2], operands[1]);
+  emit_insn (gen_floatdisf2_fcfids (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")])
+
+;; This is not IEEE compliant if rounding mode is "round to nearest".
+;; If the DI->DF conversion is inexact, then it's possible to suffer
+;; from double rounding.
+;; Instead of creating a new cpu type for two FP operations, just use fp
+(define_insn_and_split "floatdisf2_internal1"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+        (float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DF 2 "=d"))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+        (float:DF (match_dup 1)))
+   (set (match_dup 0)
+        (float_truncate:SF (match_dup 2)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "fp")])
+
+;; Twiddles bits to avoid double rounding.
+;; Bits that might be truncated when converting to DFmode are replaced
+;; by a bit that won't be lost at that stage, but is below the SFmode
+;; rounding position.
+(define_expand "floatdisf2_internal2"
+  [(set (match_dup 3) (ashiftrt:DI (match_operand:DI 1 "" "")
+				   (const_int 53)))
+   (parallel [(set (match_operand:DI 0 "" "") (and:DI (match_dup 1)
+						      (const_int 2047)))
+	      (clobber (scratch:CC))])
+   (set (match_dup 3) (plus:DI (match_dup 3)
+			       (const_int 1)))
+   (set (match_dup 0) (plus:DI (match_dup 0)
+			       (const_int 2047)))
+   (set (match_dup 4) (compare:CCUNS (match_dup 3)
+				     (const_int 2)))
+   (set (match_dup 0) (ior:DI (match_dup 0)
+			      (match_dup 1)))
+   (parallel [(set (match_dup 0) (and:DI (match_dup 0)
+					 (const_int -2048)))
+	      (clobber (scratch:CC))])
+   (set (pc) (if_then_else (geu (match_dup 4) (const_int 0))
+			   (label_ref (match_operand:DI 2 "" ""))
+			   (pc)))
+   (set (match_dup 0) (match_dup 1))]
+  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (CCUNSmode);
+}")
+
+(define_expand "floatunsdisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
+  "")
+
+(define_insn "floatunsdisf2_fcfidus"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+        (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
+  "fcfidus %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn_and_split "*floatunsdisf2_mem"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+  "
+{
+  emit_move_insn (operands[2], operands[1]);
+  emit_insn (gen_floatunsdisf2_fcfidus (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+;; Define the DImode operations that can be done in a small number
+;; of instructions.  The & constraints are to prevent the register
+;; allocator from allocating registers that overlap with the inputs
+;; (for example, having an input in 7,8 and an output in 6,7).  We
+;; also allow for the output being the same as one of the inputs.
+
+(define_insn "*adddi3_noppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r,r,r")
+	(plus:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,0,0")
+		 (match_operand:DI 2 "reg_or_short_operand" "r,I,r,I")))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"addc %L0,%L1,%L2\;adde %0,%1,%2\"
+	    : \"addic %L0,%L1,%2\;add%G2e %0,%1\";
+  else
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"addc %0,%1,%2\;adde %L0,%L1,%L2\"
+	    : \"addic %0,%1,%2\;add%G2e %L0,%L1\";
+}"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*subdi3_noppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r,r,r,r")
+	(minus:DI (match_operand:DI 1 "reg_or_short_operand" "r,I,0,r,I")
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r,r,0,0")))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"subfc %L0,%L2,%L1\;subfe %0,%2,%1\"
+	    : \"subfic %L0,%L2,%1\;subf%G1e %0,%2\";
+  else
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"subfc %0,%2,%1\;subfe %L0,%L2,%L1\"
+	    : \"subfic %0,%2,%1\;subf%G1e %L0,%L2\";
+}"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*negdi2_noppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
+	(neg:DI (match_operand:DI 1 "gpc_reg_operand" "r,0")))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"subfic %L0,%L1,0\;subfze %0,%1\"
+    : \"subfic %0,%1,0\;subfze %L0,%L1\";
+}"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "%r"))
+		 (sign_extend:DI (match_operand:SI 2 "gpc_reg_operand" "r"))))]
+  "! TARGET_POWERPC64"
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"mulhw %0,%1,%2\;mullw %L0,%1,%2\"
+    : \"mulhw %L0,%1,%2\;mullw %0,%1,%2\";
+}
+  [(set_attr "type" "imul")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "gpc_reg_operand" ""))))]
+  "! TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+			       (sign_extend:DI (match_dup 2)))
+		      (const_int 32))))
+   (set (match_dup 4)
+	(mult:SI (match_dup 1)
+		 (match_dup 2)))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  operands[3] = operand_subword (operands[0], endian, 0, DImode);
+  operands[4] = operand_subword (operands[0], 1 - endian, 0, DImode);
+}")
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" "%r"))
+		 (zero_extend:DI (match_operand:SI 2 "gpc_reg_operand" "r"))))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"mulhwu %0,%1,%2\;mullw %L0,%1,%2\"
+    : \"mulhwu %L0,%1,%2\;mullw %0,%1,%2\";
+}"
+  [(set_attr "type" "imul")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "gpc_reg_operand" ""))))]
+  "! TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+			       (zero_extend:DI (match_dup 2)))
+		      (const_int 32))))
+   (set (match_dup 4)
+	(mult:SI (match_dup 1)
+		 (match_dup 2)))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  operands[3] = operand_subword (operands[0], endian, 0, DImode);
+  operands[4] = operand_subword (operands[0], 1 - endian, 0, DImode);
+}")
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" "%r"))
+			       (sign_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "r")))
+		      (const_int 32))))]
+  ""
+  "mulhw %0,%1,%2"
+  [(set_attr "type" "imul")])
+
+(define_insn "umulsi3_highpart"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" "%r"))
+			       (zero_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "r")))
+		      (const_int 32))))]
+  ""
+  "mulhwu %0,%1,%2"
+  [(set_attr "type" "imul")])
+
+;; Shift by a variable amount is too complex to be worth open-coding.  We
+;; just handle shifts by constants.
+(define_insn "ashrdi3_no_power"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "const_int_operand" "M,i")))]
+  "!TARGET_POWERPC64"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      gcc_unreachable ();
+    case 0:
+      if (WORDS_BIG_ENDIAN)
+        return \"srawi %0,%1,31\;srawi %L0,%1,%h2\";
+      else
+        return \"srawi %L0,%L1,31\;srawi %0,%L1,%h2\";
+    case 1:
+      if (WORDS_BIG_ENDIAN)
+	return \"srwi %L0,%L1,%h2\;insrwi %L0,%1,%h2,0\;srawi %0,%1,%h2\";
+      else
+	return \"srwi %0,%1,%h2\;insrwi %0,%L1,%h2,0\;srawi %L0,%L1,%h2\";
+    }
+}"
+  [(set_attr "type" "two,three")
+   (set_attr "length" "8,12")])
+
+(define_insn "*ashrdisi3_noppc64be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (subreg:SI (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+                                (const_int 32)) 4))]
+  "TARGET_32BIT && !TARGET_POWERPC64 && WORDS_BIG_ENDIAN"
+  "*
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    return \"\";
+  else
+    return \"mr %0,%1\";
+}"
+   [(set_attr "length" "4")])
+
+
+;; PowerPC64 DImode operations.
+
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+        (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
+                 (match_operand:DI 2 "reg_or_short_operand" "r,I")))]
+  "TARGET_POWERPC64"
+  "@
+   mulld %0,%1,%2
+   mulli %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 2 "s8bit_cint_operand" "")
+		(const_string "imul3")
+	     (match_operand:SI 2 "short_cint_operand" "")
+		(const_string "imul2")]
+	(const_string "lmul")))])
+
+(define_insn "*muldi3_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_POWERPC64"
+  "@
+   mulld. %3,%1,%2
+   #"
+  [(set_attr "type" "lmul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			     (match_operand:DI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*muldi3_internal2"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(mult:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64"
+  "@
+   mulld. %0,%1,%2
+   #"
+  [(set_attr "type" "lmul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			     (match_operand:DI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI (mult:TI (sign_extend:TI
+				(match_operand:DI 1 "gpc_reg_operand" "%r"))
+			       (sign_extend:TI
+				(match_operand:DI 2 "gpc_reg_operand" "r")))
+		      (const_int 64))))]
+  "TARGET_POWERPC64"
+  "mulhd %0,%1,%2"
+  [(set_attr "type" "lmul")])
+
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI (mult:TI (zero_extend:TI
+				(match_operand:DI 1 "gpc_reg_operand" "%r"))
+			       (zero_extend:TI
+				(match_operand:DI 2 "gpc_reg_operand" "r")))
+		      (const_int 64))))]
+  "TARGET_POWERPC64"
+  "mulhdu %0,%1,%2"
+  [(set_attr "type" "lmul")])
+
+(define_expand "mulditi3"
+  [(set (match_operand:TI 0 "gpc_reg_operand")
+	(mult:TI (sign_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
+		 (sign_extend:TI (match_operand:DI 2 "gpc_reg_operand"))))]
+  "TARGET_POWERPC64"
+{
+  rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (l, operands[1], operands[2]));
+  emit_insn (gen_smuldi3_highpart (h, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h);
+  DONE;
+})
+
+(define_expand "umulditi3"
+  [(set (match_operand:TI 0 "gpc_reg_operand")
+	(mult:TI (zero_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
+		 (zero_extend:TI (match_operand:DI 2 "gpc_reg_operand"))))]
+  "TARGET_POWERPC64"
+{
+  rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (l, operands[1], operands[2]));
+  emit_insn (gen_umuldi3_highpart (h, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h);
+  DONE;
+})
+
+(define_insn "rotldi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:DI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   rldcl %0,%1,%2,0
+   rldicl %0,%1,%H2,0"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %3,%1,%2,0
+   rldicl. %3,%1,%H2,0
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:DI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(rotate:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(rotate:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %0,%1,%2,0
+   rldicl. %0,%1,%H2,0
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:DI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(rotate:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(rotate:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal4"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(and:DI (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			   (match_operand:DI 2 "reg_or_cint_operand" "r,i"))
+		(match_operand:DI 3 "mask64_operand" "n,n")))]
+  "TARGET_POWERPC64"
+  "@
+   rldc%B3 %0,%1,%2,%S3
+   rldic%B3 %0,%1,%H2,%S3"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal5"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:DI 3 "mask64_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldc%B3. %4,%1,%2,%S3
+   rldic%B3. %4,%1,%H2,%S3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				(match_operand:DI 2 "reg_or_cint_operand" ""))
+		     (match_operand:DI 3 "mask64_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 4 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 4)
+	(and:DI (rotate:DI (match_dup 1)
+				(match_dup 2))
+		     (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:DI 3 "mask64_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_64BIT"
+  "@
+   rldc%B3. %0,%1,%2,%S3
+   rldic%B3. %0,%1,%H2,%S3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				(match_operand:DI 2 "reg_or_cint_operand" ""))
+		     (match_operand:DI 3 "mask64_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal7le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:QI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,56
+   rldicl %0,%1,%H2,56"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal7be"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:QI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 7)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,56
+   rldicl %0,%1,%H2,56"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal8le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,56
+   rldicl. %3,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal8be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 7))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,56
+   rldicl. %3,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:QI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 7))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:QI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 7)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal9le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,56
+   rldicl. %0,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal9be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 7))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 7)))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,56
+   rldicl. %0,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 7))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 7)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 7)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal10le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:HI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,48
+   rldicl %0,%1,%H2,48"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal10be"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:HI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 6)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,48
+   rldicl %0,%1,%H2,48"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal11le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,48
+   rldicl. %3,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal11be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 6))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,48
+   rldicl. %3,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:HI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 6))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:HI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 6)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal12le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,48
+   rldicl. %0,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal12be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 6))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 6)))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,48
+   rldicl. %0,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 6))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 6)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 6)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal13le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:SI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,32
+   rldicl %0,%1,%H2,32"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal13be"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:SI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 4)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,32
+   rldicl %0,%1,%H2,32"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal14le"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,32
+   rldicl. %3,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal14be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 4))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,32
+   rldicl. %3,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:SI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 4))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:SI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 4)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal15le"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,32
+   rldicl. %0,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal15be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 4))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 4)))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,32
+   rldicl. %0,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 4))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 4)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 4)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		   (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn "*ashldi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   sld %0,%1,%2
+   sldi %0,%1,%H2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashldi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   sld. %3,%1,%2
+   sldi. %3,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashift:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   sld. %0,%1,%2
+   sldi. %0,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashift:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal4"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:DI 3 "const_int_operand" "n")))]
+  "TARGET_POWERPC64 && includes_rldic_lshift_p (operands[2], operands[3])"
+  "rldic %0,%1,%H2,%W3")
+
+(define_insn "ashldi3_internal5"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:DI 3 "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r,r"))]
+  "TARGET_64BIT && includes_rldic_lshift_p (operands[2], operands[3])"
+  "@
+   rldic. %4,%1,%H2,%W3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "const_int_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldic_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 4)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		    (match_operand:DI 3 "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_64BIT && includes_rldic_lshift_p (operands[2], operands[3])"
+  "@
+   rldic. %0,%1,%H2,%W3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "const_int_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldic_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 0)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal7"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:DI 3 "mask64_operand" "n")))]
+  "TARGET_POWERPC64 && includes_rldicr_lshift_p (operands[2], operands[3])"
+  "rldicr %0,%1,%H2,%S3")
+
+(define_insn "ashldi3_internal8"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:DI 3 "mask64_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r,r"))]
+  "TARGET_64BIT && includes_rldicr_lshift_p (operands[2], operands[3])"
+  "@
+   rldicr. %4,%1,%H2,%S3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "mask64_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldicr_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 4)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal9"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		    (match_operand:DI 3 "mask64_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_64BIT && includes_rldicr_lshift_p (operands[2], operands[3])"
+  "@
+   rldicr. %0,%1,%H2,%S3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "mask64_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldicr_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 0)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		     (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn "*lshrdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   srd %0,%1,%2
+   srdi %0,%1,%H2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*lshrdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT "
+  "@
+   srd. %3,%1,%2
+   srdi. %3,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshrdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   srd. %0,%1,%2
+   srdi. %0,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		     (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_POWERPC64)
+    ;
+  else if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_ashrdi3_no_power (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+(define_insn "*ashrdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   srad %0,%1,%2
+   sradi %0,%1,%H2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashrdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   srad. %3,%1,%2
+   sradi. %3,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashrdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   srad. %0,%1,%2
+   sradi. %0,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "anddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	  (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		  (match_operand:DI 2 "reg_or_cint_operand" "")))
+     (clobber (match_scratch:CC 3 ""))])]
+  ""
+{
+  if (!TARGET_POWERPC64)
+    {
+      rtx cc = gen_rtx_SCRATCH (CCmode);
+      rs6000_split_logical (operands, AND, false, false, false, cc);
+      DONE;
+    }
+  else if (!and64_2_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+(define_insn "anddi3_mc"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
+	(and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r,r")
+		(match_operand:DI 2 "and64_2_operand" "?r,S,T,K,J,t")))
+   (clobber (match_scratch:CC 3 "=X,X,X,x,x,X"))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rldic%B2 %0,%1,0,%S2
+   rlwinm %0,%1,0,%m2,%M2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2
+   #"
+  [(set_attr "type" "*,*,*,fast_compare,fast_compare,*")
+   (set_attr "length" "4,4,4,4,4,8")])
+
+(define_insn "anddi3_nomc"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r")
+		(match_operand:DI 2 "and64_2_operand" "?r,S,T,t")))
+   (clobber (match_scratch:CC 3 "=X,X,X,X"))]
+  "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rldic%B2 %0,%1,0,%S2
+   rlwinm %0,%1,0,%m2,%M2
+   #"
+  [(set_attr "length" "4,4,4,8")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		(match_operand:DI 2 "mask64_2_operand" "")))
+   (clobber (match_scratch:CC 3 ""))]
+  "TARGET_POWERPC64
+    && (fixed_regs[CR0_REGNO] || !logical_operand (operands[2], DImode))
+    && !mask_operand (operands[2], DImode)
+    && !mask64_operand (operands[2], DImode)"
+  [(set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 1)
+			   (match_dup 4))
+		(match_dup 5)))
+   (set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 0)
+			   (match_dup 6))
+		(match_dup 7)))]
+{
+  build_mask64_2_operands (operands[2], &operands[4]);
+})
+
+(define_insn "*anddi3_internal2_mc"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,x,x,?y,?y,?y,??y,??y,?y")
+	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r,r,r")
+			    (match_operand:DI 2 "and64_2_operand" "r,S,T,K,J,t,r,S,T,K,J,t"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r,r,r,r,r,r,r,r,r"))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %3,%1,%2
+   rldic%B2. %3,%1,0,%S2
+   rlwinm. %3,%1,0,%m2,%M2
+   andi. %3,%1,%b2
+   andis. %3,%1,%u2
+   #
+   #
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,delayed_compare,fast_compare,\
+		     fast_compare,compare,compare,compare,compare,compare,\
+		     compare,compare")
+   (set_attr "length" "4,4,4,4,4,8,8,8,8,8,8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_operand" "")
+        (compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+                            (match_operand:DI 2 "mask64_2_operand" ""))
+                    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_64BIT && reload_completed
+    && (fixed_regs[CR0_REGNO] || !logical_operand (operands[2], DImode))
+    && !mask_operand (operands[2], DImode)
+    && !mask64_operand (operands[2], DImode)"
+  [(set (match_dup 3)
+	(and:DI (rotate:DI (match_dup 1)
+			   (match_dup 5))
+		(match_dup 6)))
+   (parallel [(set (match_dup 0)
+		   (compare:CC (and:DI (rotate:DI (match_dup 3)
+						  (match_dup 7))
+				       (match_dup 8))
+			       (const_int 0)))
+	      (clobber (match_dup 3))])]
+  "
+{
+  build_mask64_2_operands (operands[2], &operands[5]);
+}")
+
+(define_insn "*anddi3_internal3_mc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,x,x,x,?y,?y,?y,??y,??y,?y")
+	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r,r,r")
+			    (match_operand:DI 2 "and64_2_operand" "r,S,T,K,J,t,r,S,T,K,J,t"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r,r,r,r,r,r,r")
+	(and:DI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %0,%1,%2
+   rldic%B2. %0,%1,0,%S2
+   rlwinm. %0,%1,0,%m2,%M2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2
+   #
+   #
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,delayed_compare,fast_compare,\
+		     fast_compare,compare,compare,compare,compare,compare,\
+		     compare,compare")
+   (set_attr "length" "4,4,4,4,4,8,8,8,8,8,8,12")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:DI 2 "and64_2_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+		    (and:DI (match_dup 1) (match_dup 2)))
+	       (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_operand" "")
+        (compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+                            (match_operand:DI 2 "mask64_2_operand" ""))
+                    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_64BIT && reload_completed
+    && (fixed_regs[CR0_REGNO] || !logical_operand (operands[2], DImode))
+    && !mask_operand (operands[2], DImode)
+    && !mask64_operand (operands[2], DImode)"
+  [(set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 1)
+			   (match_dup 5))
+		(match_dup 6)))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (and:DI (rotate:DI (match_dup 0)
+						  (match_dup 7))
+				       (match_dup 8))
+			       (const_int 0)))
+	      (set (match_dup 0)
+		   (and:DI (rotate:DI (match_dup 0)
+				      (match_dup 7))
+			   (match_dup 8)))])]
+  "
+{
+  build_mask64_2_operands (operands[2], &operands[5]);
+}")
+
+(define_expand "iordi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ior:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		(match_operand:DI 2 "reg_or_cint_operand" "")))]
+  ""
+{
+  if (!TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, IOR, false, false, false, NULL_RTX);
+      DONE;
+    }
+  else if (!reg_or_logical_cint_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  else if (non_logical_cint_operand (operands[2], DImode))
+    {
+      HOST_WIDE_INT value;
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (DImode));
+
+      value = INTVAL (operands[2]);
+      emit_insn (gen_iordi3 (tmp, operands[1],
+			     GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+
+      emit_insn (gen_iordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+})
+
+(define_expand "xordi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(xor:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		(match_operand:DI 2 "reg_or_cint_operand" "")))]
+  ""
+{
+  if (!TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, XOR, false, false, false, NULL_RTX);
+      DONE;
+    }
+  else if (!reg_or_logical_cint_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  if (non_logical_cint_operand (operands[2], DImode))
+    {
+      HOST_WIDE_INT value;
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (DImode));
+
+      value = INTVAL (operands[2]);
+      emit_insn (gen_xordi3 (tmp, operands[1],
+			     GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+
+      emit_insn (gen_xordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+})
+
+(define_insn "*booldi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
+	(match_operator:DI 3 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "%r,r,r")
+	  (match_operand:DI 2 "logical_operand" "r,K,JF")]))]
+  "TARGET_POWERPC64"
+  "@
+   %q3 %0,%1,%2
+   %q3i %0,%1,%b2
+   %q3is %0,%1,%u2")
+
+(define_insn "*booldi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "")
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*booldi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_64BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "")
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Split a logical operation that we can't do in one insn into two insns,
+;; each of which does one 16-bit part.  This is used by combine.
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operator:DI 3 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "")
+	  (match_operand:DI 2 "non_logical_cint_operand" "")]))]
+  "TARGET_POWERPC64"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 0) (match_dup 5))]
+"
+{
+  rtx i3,i4;
+
+  i3 = GEN_INT (INTVAL (operands[2]) & (~ (HOST_WIDE_INT) 0xffff));
+  i4 = GEN_INT (INTVAL (operands[2]) & 0xffff);
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+				operands[1], i3);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+				operands[0], i4);
+}")
+
+(define_insn "*boolcdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 3 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	  (match_operand:DI 2 "gpc_reg_operand" "r")]))]
+  "TARGET_POWERPC64"
+  "%q3 %0,%2,%1")
+
+(define_insn "*boolcdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r,r"))
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   %q4. %3,%2,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolcdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r"))
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_64BIT"
+  "@
+   %q4. %0,%2,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 3 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" "r"))]))]
+  "TARGET_POWERPC64"
+  "%q3 %0,%1,%2")
+
+(define_insn "*boolccdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r,r"))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r"))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_64BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR
+	 (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+  ""
+  "eqv %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+
+;; 128-bit logical operations expanders
+
+(define_expand "and<mode>3"
+  [(parallel [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+		   (and:BOOL_128
+		    (match_operand:BOOL_128 1 "vlogical_operand" "")
+		    (match_operand:BOOL_128 2 "vlogical_operand" "")))
+	      (clobber (match_scratch:CC 3 ""))])]
+  ""
+  "")
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		      (match_operand:BOOL_128 2 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		      (match_operand:BOOL_128 2 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "nor<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(and:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  ""
+  "")
+
+(define_expand "andc<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (and:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
+	 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(not:BOOL_128
+	 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		       (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(ior:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; The canonical form is to have the negated element first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(ior:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
+	 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; 128-bit logical operations insns and split operations
+(define_insn_and_split "*and<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+        (and:BOOL_128
+	 (match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
+	 (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")))
+   (clobber (match_scratch:CC 3 "<BOOL_REGS_AND_CR0>"))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxland %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "vand %0,%1,%2";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, AND, false, false, false, operands[3]);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+;; 128-bit IOR/XOR
+(define_insn_and_split "*bool<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_or_operator"
+	 [(match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
+	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+;; 128-bit ANDC/ORC
+(define_insn_and_split "*boolc<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_operator"
+	 [(not:BOOL_128
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+  "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
+   && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*boolc<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(match_operator:TI2 3 "boolean_operator"
+	 [(not:TI2
+	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
+	  (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
+  "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+;; 128-bit NAND/NOR
+(define_insn_and_split "*boolcc<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_operator"
+	 [(not:BOOL_128
+	   (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>"))
+	  (not:BOOL_128
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))]))]
+  "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
+   && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*boolcc<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(match_operator:TI2 3 "boolean_operator"
+	 [(not:TI2
+	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
+	  (not:TI2
+	   (match_operand:TI2 2 "int_reg_operand" "r,r,0"))]))]
+  "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
+			NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+
+;; 128-bit EQV
+(define_insn_and_split "*eqv<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(not:BOOL_128
+	 (xor:BOOL_128
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")
+	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))]
+  "TARGET_P8_VECTOR"
+{
+  if (vsx_register_operand (operands[0], <MODE>mode))
+    return "xxleqv %x0,%x1,%x2";
+
+  return "#";
+}
+  "TARGET_P8_VECTOR && reload_completed
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*eqv<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(not:TI2
+	 (xor:TI2
+	  (match_operand:TI2 1 "int_reg_operand" "r,0,r")
+	  (match_operand:TI2 2 "int_reg_operand" "r,r,0"))))]
+  "!TARGET_P8_VECTOR"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+;; 128-bit one's complement
+(define_insn_and_split "*one_cmpl<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(not:BOOL_128
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxlnor %x0,%x1,%x1";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "vnor %0,%1,%1";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+
+;; Now define ways of moving data around.
+
+;; Set up a register with a value from the GOT table
+
+(define_expand "movsi_got"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unspec:SI [(match_operand:SI 1 "got_operand" "")
+		    (match_dup 2)] UNSPEC_MOVSI_GOT))]
+  "DEFAULT_ABI == ABI_V4 && flag_pic == 1"
+  "
+{
+  if (GET_CODE (operands[1]) == CONST)
+    {
+      rtx offset = const0_rtx;
+      HOST_WIDE_INT value;
+
+      operands[1] = eliminate_constant_term (XEXP (operands[1], 0), &offset);
+      value = INTVAL (offset);
+      if (value != 0)
+	{
+	  rtx tmp = (!can_create_pseudo_p ()
+		     ? operands[0]
+		     : gen_reg_rtx (Pmode));
+	  emit_insn (gen_movsi_got (tmp, operands[1]));
+	  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+	  DONE;
+	}
+    }
+
+  operands[2] = rs6000_got_register (operands[1]);
+}")
+
+(define_insn "*movsi_got_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "got_no_const_operand" "")
+		    (match_operand:SI 2 "gpc_reg_operand" "b")]
+		   UNSPEC_MOVSI_GOT))]
+  "DEFAULT_ABI == ABI_V4 && flag_pic == 1"
+  "lwz %0,%a1@got(%2)"
+  [(set_attr "type" "load")])
+
+;; Used by sched, shorten_branches and final when the GOT pseudo reg
+;; didn't get allocated to a hard register.
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unspec:SI [(match_operand:SI 1 "got_no_const_operand" "")
+		    (match_operand:SI 2 "memory_operand" "")]
+		   UNSPEC_MOVSI_GOT))]
+  "DEFAULT_ABI == ABI_V4
+    && flag_pic == 1
+    && (reload_in_progress || reload_completed)"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (unspec:SI [(match_dup 1)(match_dup 0)]
+				 UNSPEC_MOVSI_GOT))]
+  "")
+
+;; For SI, we special-case integers that can't be loaded in one insn.  We
+;; do the load 16-bits at a time.  We could do this by loading from memory,
+;; and this is even supposed to be faster, but it is simpler not to get
+;; integers in the TOC.
+(define_insn "movsi_low"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && ! TARGET_64BIT"
+  "lwz %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_internal1"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,r,r,m,r,r,r,r,*c*l,*h,*h")
+	(match_operand:SI 1 "input_operand" "r,U,m,r,I,L,n,*h,r,r,0"))]
+  "!TARGET_SINGLE_FPU &&
+   (gpc_reg_operand (operands[0], SImode) || gpc_reg_operand (operands[1], SImode))"
+  "@
+   mr %0,%1
+   la %0,%a1
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0
+   li %0,%1
+   lis %0,%v1
+   #
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   nop"
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "*")])
+
+   (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4")])
+
+(define_insn "*movsi_internal1_single"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,r,r,m,r,r,r,r,*c*l,*h,*h,m,*f")
+        (match_operand:SI 1 "input_operand" "r,U,m,r,I,L,n,*h,r,r,0,f,m"))]
+  "TARGET_SINGLE_FPU &&
+   (gpc_reg_operand (operands[0], SImode) || gpc_reg_operand (operands[1], SImode))"
+  "@
+   mr %0,%1
+   la %0,%a1
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0
+   li %0,%1
+   lis %0,%v1
+   #
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   nop
+   stfs%U0%X0 %1,%0
+   lfs%U1%X1 %0,%1"
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))])
+   (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")])
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "(unsigned HOST_WIDE_INT) (INTVAL (operands[1]) + 0x8000) >= 0x10000
+   && (INTVAL (operands[1]) & 0xffff) != 0"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(ior:SI (match_dup 0)
+		(match_dup 3)))]
+  "
+{ rtx tem = rs6000_emit_set_const (operands[0], SImode, operands[1], 2);
+
+  if (tem == operands[0])
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn "*mov<mode>_internal2"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
+	(compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+  ""
+  "@
+   cmp<wd>i %2,%0,0
+   mr. %0,%1
+   #"
+  [(set_attr "type" "cmp,fast_compare,cmp")
+   (set_attr "length" "4,4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operand:P 1 "gpc_reg_operand" "")
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "") (match_dup 1))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r,r,*c*l,*h")
+	(match_operand:HI 1 "input_operand" "r,m,r,i,*h,r,0"))]
+  "gpc_reg_operand (operands[0], HImode)
+   || gpc_reg_operand (operands[1], HImode)"
+  "@
+   mr %0,%1
+   lhz%U1%X1 %0,%1
+   sth%U0%X0 %1,%0
+   li %0,%w1
+   mf%1 %0
+   mt%0 %1
+   nop"
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")])])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:INT 0 "general_operand" "")
+	(match_operand:INT 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m,r,r,*c*l,*h")
+	(match_operand:QI 1 "input_operand" "r,m,r,i,*h,r,0"))]
+  "gpc_reg_operand (operands[0], QImode)
+   || gpc_reg_operand (operands[1], QImode)"
+  "@
+   mr %0,%1
+   lbz%U1%X1 %0,%1
+   stb%U0%X0 %1,%0
+   li %0,%1
+   mf%1 %0
+   mt%0 %1
+   nop"
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")])])
+
+;; Here is how to move condition codes around.  When we store CC data in
+;; an integer register or memory, we store just the high-order 4 bits.
+;; This lets us not shift in the most common case of CR0.
+(define_expand "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "")
+	(match_operand:CC 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_insn "*movcc_internal1"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "=y,x,?y,y,r,r,r,r,r,cl,r,m")
+	(match_operand:CC 1 "general_operand" "y,r,r,O,x,y,r,I,h,r,m,r"))]
+  "register_operand (operands[0], CCmode)
+   || register_operand (operands[1], CCmode)"
+  "@
+   mcrf %0,%1
+   mtcrf 128,%1
+   rlwinm %1,%1,%F0,0xffffffff\;mtcrf %R0,%1\;rlwinm %1,%1,%f0,0xffffffff
+   crxor %0,%0,%0
+   mfcr %0%Q1
+   mfcr %0%Q1\;rlwinm %0,%0,%f1,0xf0000000
+   mr %0,%1
+   li %0,%1
+   mf%1 %0
+   mt%0 %1
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0"
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "0,3")
+		(const_string "cr_logical")
+	    (eq_attr "alternative" "1,2")
+		(const_string "mtcr")
+	    (eq_attr "alternative" "6,7")
+		(const_string "integer")
+	    (eq_attr "alternative" "8")
+		(const_string "mfjmpr")
+	    (eq_attr "alternative" "9")
+		(const_string "mtjmpr")
+	    (eq_attr "alternative" "10")
+		(if_then_else
+		  (match_test "update_indexed_address_mem (operands[1],
+							   VOIDmode)")
+		  (const_string "load_ux")
+		  (if_then_else
+		    (match_test "update_address_mem (operands[1], VOIDmode)")
+		    (const_string "load_u")
+		    (const_string "load")))
+	    (eq_attr "alternative" "11")
+		(if_then_else
+		  (match_test "update_indexed_address_mem (operands[0],
+							   VOIDmode)")
+		  (const_string "store_ux")
+		  (if_then_else
+		    (match_test "update_address_mem (operands[0], VOIDmode)")
+		    (const_string "store_u")
+		    (const_string "store")))
+	    (match_test "TARGET_MFCRF")
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "4,4,12,4,4,8,4,4,4,4,4,4")])
+
+;; For floating-point, we normally deal with the floating-point registers
+;; unless -msoft-float is used.  The sole exception is that parameter passing
+;; can produce floating-point values in fixed-point registers.  Unless the
+;; value is a simple constant or already in memory, we deal with this by
+;; allocating memory and copying the value explicitly via that memory location.
+
+;; Move 32-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE32 1 "any_operand" ""))]
+  "<fmove_ok>"
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
+
+(define_split
+  [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE32 1 "const_double_operand" ""))]
+  "reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  long l;
+  REAL_VALUE_TYPE rv;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  <real_value_to_target> (rv, l);
+
+  if (! TARGET_POWERPC64)
+    operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  else
+    operands[2] = gen_lowpart (SImode, operands[0]);
+
+  operands[3] = gen_int_mode (l, SImode);
+}")
+
+(define_insn "mov<mode>_hardfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
+   && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
+  "@
+   mr %0,%1
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0
+   fmr %0,%1
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   <f32_li>
+   <f32_si>
+   <f32_lv>
+   <f32_sv>
+   mtvsrwz %x0,%1
+   mfvsrwz %0,%x1
+   mt%0 %1
+   mf%1 %0
+   nop
+   #
+   #"
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (const_string "fpload")
+       (const_string "fpstore")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
+
+(define_insn "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
+	(match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
+   && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
+  "@
+   mr %0,%1
+   mt%0 %1
+   mf%1 %0
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0
+   li %0,%1
+   lis %0,%v1
+   #
+   #
+   nop"
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,8,4")])
+
+
+;; Move 64-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE64 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
+
+(define_split
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_int_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 1))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+
+  operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
+  operands[4] = GEN_INT (value >> 32);
+  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
+}")
+
+(define_split
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_double_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  long l[2];
+  REAL_VALUE_TYPE rv;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  <real_value_to_target> (rv, l);
+
+  operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
+  operands[4] = gen_int_mode (l[endian], SImode);
+  operands[5] = gen_int_mode (l[1 - endian], SImode);
+}")
+
+(define_split
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  long l[2];
+  REAL_VALUE_TYPE rv;
+  HOST_WIDE_INT val;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  <real_value_to_target> (rv, l);
+
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
+  val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
+         | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
+
+  operands[3] = gen_int_mode (val, DImode);
+}")
+
+;; Don't have reload use general registers to load a constant.  It is
+;; less efficient than loading the constant into an FP register, since
+;; it will probably be used there.
+
+;; The move constraints are ordered to prefer floating point registers before
+;; general purpose registers to avoid doing a store and a load to get the value
+;; into a floating point register when it is needed for a floating point
+;; operation.  Prefer traditional floating point registers over VSX registers,
+;; since the D-form version of the memory instructions does not need a GPR for
+;; reloading.
+
+(define_insn "*mov<mode>_hardfloat32"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
+  "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "@
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   lxsd%U1x %x0,%y1
+   stxsd%U0x %x1,%y0
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   #
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (const_string "fpload"))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (const_string "fpstore"))
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (const_string "store")
+       (const_string "load")
+       (const_string "two")
+       (const_string "fp")
+       (const_string "fp")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")])
+
+(define_insn "*mov<mode>_softfloat32"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
+	(match_operand:FMOVE64 1 "input_operand" "r,Y,r,G,H,F"))]
+  "! TARGET_POWERPC64 
+   && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) 
+       || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "#"
+  [(set_attr "type" "store,load,two,*,*,*")
+   (set_attr "length" "8,8,8,8,12,16")])
+
+; ld/std require word-aligned displacements -> 'Y' constraint.
+; List Y->r and r->Y before r->r for reload.
+(define_insn "*mov<mode>_hardfloat64"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
+  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "@
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   lxsd%U1x %x0,%y1
+   stxsd%U0x %x1,%y0
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   mt%0 %1
+   mf%1 %0
+   nop
+   #
+   #
+   #
+   mftgpr %0,%1
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (const_string "fpload"))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (const_string "fpstore"))
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
+       (const_string "mffgpr")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
+
+(define_insn "*mov<mode>_softfloat64"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
+	(match_operand:FMOVE64 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
+  "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   mt%0 %1
+   mf%1 %0
+   #
+   #
+   #
+   nop"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,8,12,16,4")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE128 0 "general_operand" "")
+	(match_operand:FMOVE128 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
+
+;; It's important to list Y->r and r->Y before r->r because otherwise
+;; reload, given m->r, will try to pick r->r and reload it, which
+;; doesn't make progress.
+
+;; We can't split little endian direct moves of TDmode, because the words are
+;; not swapped like they are for TImode or TFmode.  Subregs therefore are
+;; problematical.  Don't allow direct move for this case.
+
+(define_insn_and_split "*mov<mode>_64bit_dm"
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r,r,wm")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r,wm,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
+   && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,12,12,8,8,8")])
+
+(define_insn_and_split "*movtd_64bit_nodm"
+  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
+	(match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN
+   && (gpc_reg_operand (operands[0], TDmode)
+       || gpc_reg_operand (operands[1], TDmode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,12,12,8")])
+
+(define_insn_and_split "*mov<mode>_32bit"
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,20,20,16")])
+
+(define_insn_and_split "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "r,YGHF,r"))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "20,20,16")])
+
+(define_expand "extenddftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF (match_operand:DF 1 "input_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  if (TARGET_E500_DOUBLE)
+    emit_insn (gen_spe_extenddftf2 (operands[0], operands[1]));
+  else
+    emit_insn (gen_extenddftf2_fprs (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "extenddftf2_fprs"
+  [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "")
+		   (float_extend:TF (match_operand:DF 1 "input_operand" "")))
+	      (use (match_dup 2))])]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = CONST0_RTX (DFmode);
+  /* Generate GOT reference early for SVR4 PIC.  */
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    operands[2] = validize_mem (force_const_mem (DFmode, operands[2]));
+})
+
+(define_insn_and_split "*extenddftf2_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=m,Y,d,&d,r")
+       (float_extend:TF (match_operand:DF 1 "input_operand" "d,r,md,md,rmGHF")))
+   (use (match_operand:DF 2 "zero_reg_mem_operand" "d,r,m,d,n"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word),
+		  operands[1]);
+  emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word),
+		  operands[2]);
+  DONE;
+})
+
+(define_expand "extendsftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "trunctfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+  "")
+
+(define_insn_and_split "trunctfdf2_internal1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "0,d")))]
+  "!TARGET_IEEEQUAD && !TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "@
+   #
+   fmr %0,%1"
+  "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr "type" "fp")])
+
+(define_insn "trunctfdf2_internal2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "d")))]
+  "!TARGET_IEEEQUAD && TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "fadd %0,%1,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_d")])
+
+(define_expand "trunctfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  if (TARGET_E500_DOUBLE)
+    emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1]));
+  else
+    emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn_and_split "trunctfsf2_fprs"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DF 2 "=d"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_dup 1)))
+   (set (match_dup 0)
+	(float_truncate:SF (match_dup 2)))]
+  "")
+
+(define_expand "floatsitf2"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+        (float:TF (match_operand:SI 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  expand_float (tmp, operands[1], false);
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+; fadd, but rounding towards zero.
+; This is probably not the optimal code sequence.
+(define_insn "fix_trunc_helper"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unspec:DF [(match_operand:TF 1 "gpc_reg_operand" "d")]
+		   UNSPEC_FIX_TRUNC_TF))
+   (clobber (match_operand:DF 2 "gpc_reg_operand" "=&d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "mffs %2\n\tmtfsb1 31\n\tmtfsb0 30\n\tfadd %0,%1,%L1\n\tmtfsf 1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "length" "20")])
+
+(define_expand "fix_trunctfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128"
+{
+  if (TARGET_E500_DOUBLE)
+    emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1]));
+  else
+    emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "fix_trunctfsi2_fprs"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))
+	      (clobber (match_dup 2))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))])]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = assign_stack_temp (DImode, GET_MODE_SIZE (DImode));
+})
+
+(define_insn_and_split "*fix_trunctfsi2_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (fix:SI (match_operand:TF 1 "gpc_reg_operand" "d")))
+   (clobber (match_operand:DF 2 "gpc_reg_operand" "=d"))
+   (clobber (match_operand:DF 3 "gpc_reg_operand" "=&d"))
+   (clobber (match_operand:DI 4 "gpc_reg_operand" "=d"))
+   (clobber (match_operand:DI 5 "offsettable_mem_operand" "=o"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx lowword;
+  emit_insn (gen_fix_trunc_helper (operands[2], operands[1], operands[3]));
+
+  gcc_assert (MEM_P (operands[5]));
+  lowword = adjust_address (operands[5], SImode, WORDS_BIG_ENDIAN ? 4 : 0);
+
+  emit_insn (gen_fctiwz_df (operands[4], operands[2]));
+  emit_move_insn (operands[5], operands[4]);
+  emit_move_insn (operands[0], lowword);
+  DONE;
+})
+
+(define_expand "negtf2"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+  "")
+
+(define_insn "negtf2_internal"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=d")
+	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "*
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    return \"fneg %L0,%L1\;fneg %0,%1\";
+  else
+    return \"fneg %0,%1\;fneg %L0,%L1\";
+}"
+  [(set_attr "type" "fp")
+   (set_attr "length" "8")])
+
+(define_expand "abstf2"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+	(abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+  "
+{
+  rtx label = gen_label_rtx ();
+  if (TARGET_E500_DOUBLE)
+    {
+      if (flag_finite_math_only && !flag_trapping_math)
+	emit_insn (gen_spe_abstf2_tst (operands[0], operands[1], label));
+      else
+	emit_insn (gen_spe_abstf2_cmp (operands[0], operands[1], label));
+    }
+  else
+    emit_insn (gen_abstf2_internal (operands[0], operands[1], label));
+  emit_label (label);
+  DONE;
+}")
+
+(define_expand "abstf2_internal"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+	(match_operand:TF 1 "gpc_reg_operand" ""))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 5) (abs:DF (match_dup 5)))
+   (set (match_dup 4) (compare:CCFP (match_dup 3) (match_dup 5)))
+   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))
+   (set (match_dup 6) (neg:DF (match_dup 6)))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "
+{
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (CCFPmode);
+  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
+}")
+
+;; Reload helper functions used by rs6000_secondary_reload.  The patterns all
+;; must have 3 arguments, and scratch register constraint must be a single
+;; constraint.
+
+;; Reload patterns to support gpr load/store with misaligned mem.
+;; and multiple gpr load/store at offset >= 0xfffc
+(define_expand "reload_<mode>_store"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+              (match_operand 1 "gpc_reg_operand" "r")
+              (match_operand:GPR 2 "register_operand" "=&b")])]
+  ""
+{
+  rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
+  DONE;
+})
+
+(define_expand "reload_<mode>_load"
+  [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
+              (match_operand 1 "memory_operand" "m")
+              (match_operand:GPR 2 "register_operand" "=b")])]
+  ""
+{
+  rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode.  We use TF mode to get two registers to move the
+;; individual 32-bit parts across.  Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it.  We have two patterns to do the two moves between gprs and
+;; fprs.  There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions.  TFmode is
+;; currently limited to traditional FPR registers.  If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+	(unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+			 UNSPEC_P8V_FMRGOW))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "fmrgow %0,%1,%L1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+  [(set (match_operand:TF 0 "register_operand" "=d")
+	(unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+  [(set (match_operand:TF 0 "register_operand" "+d")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+	(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+			 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=d"))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (SImode, src);
+  rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+  emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+  [(set (match_operand:TF 0 "register_operand" "=ws")
+	(unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+  [(set (match_operand:TF 0 "register_operand" "+ws")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+			     UNSPEC_P8V_XXPERMDI))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "xxpermdi %x0,%1,%L1,0"
+  [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+	 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DImode, src);
+  rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+  emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_split
+  [(set (match_operand:FMOVE128_GPR 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE128_GPR 1 "input_operand" ""))]
+  "reload_completed
+   && (int_reg_operand (operands[0], <MODE>mode)
+       || int_reg_operand (operands[1], <MODE>mode))"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
+
+;; Move SFmode to a VSX from a GPR register.  Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+  [(set (match_operand:SF 0 "register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+		   UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:DI 2 "register_operand" "=r"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+  rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_move_insn (op0_di, op2);
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+	 UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+  rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+  emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register.  Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+  emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+  emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+  emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+
+;; Next come the multi-word integer load and store and the load and store
+;; multiple insns.
+
+;; List r->r after r->Y, otherwise reload will try to reload a
+;; non-offsettable address by using r->r which won't make progress.
+;; Use of fprs is disparaged slightly otherwise reload prefers to reload
+;; a gpr into a fpr instead of reloading an invalid 'Y' address
+(define_insn "*movdi_internal32"
+  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
+	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
+  "! TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   #
+   #
+   #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   #"
+  [(set_attr_alternative "type"
+      [(const_string "store")
+       (const_string "load")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "*")])])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "const_int_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 1))]
+  "
+{
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+  operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
+				       DImode);
+  operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
+				       DImode);
+  operands[4] = GEN_INT (value >> 32);
+  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
+}")
+
+(define_split
+  [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
+        (match_operand:DIFD 1 "input_operand" ""))]
+  "reload_completed && !TARGET_POWERPC64
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
+
+(define_insn "*movdi_internal64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r,*wm,r"))]
+  "TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   li %0,%1
+   lis %0,%v1
+   #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   mf%1 %0
+   mt%0 %1
+   nop
+   mftgpr %0,%1
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
+       (const_string "mffgpr")])
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4")])
+
+;; Generate all one-bits and clear left or right.
+;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "mask64_operand" ""))]
+  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
+  [(set (match_dup 0) (const_int -1))
+   (set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 0)
+			   (const_int 0))
+		(match_dup 1)))]
+  "")
+
+;; Split a load of a large constant into the appropriate five-instruction
+;; sequence.  Handle anything in a constant number of insns.
+;; When non-easy constants can go in the TOC, this should use
+;; easy_fp_constant predicate.
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "const_int_operand" ""))]
+  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
+  "
+{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
+
+  if (tem == operands[0])
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
+  "
+{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
+
+  if (tem == operands[0])
+    DONE;
+  else
+    FAIL;
+}")
+
+;; TImode/PTImode is similar, except that we usually want to compute the
+;; address into a register and use lsi/stsi (the exception is during reload).
+
+(define_insn "*mov<mode>_string"
+  [(set (match_operand:TI2 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,Q,Y,r,n"))]
+  "! TARGET_POWERPC64
+   && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      gcc_unreachable ();
+    case 0:
+      if (TARGET_STRING)
+        return \"stswi %1,%P0,16\";
+    case 1:
+      return \"#\";
+    case 2:
+      /* If the address is not used in the output, we can use lsi.  Otherwise,
+	 fall through to generating four loads.  */
+      if (TARGET_STRING
+          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+	return \"lswi %0,%P1,16\";
+      /* ... fall through ...  */
+    case 3:
+    case 4:
+    case 5:
+      return \"#\";
+    }
+}"
+  [(set_attr "type" "store_ux,store_ux,load_ux,load_ux,*,*")
+   (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
+   			                  (const_string "always")
+					  (const_string "conditional")))])
+
+(define_insn "*mov<mode>_ppc64"
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
+  "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode)))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "store,store,load,load,*,*")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:TI2 0 "int_reg_operand" "")
+	(match_operand:TI2 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64
+   && (VECTOR_MEM_NONE_P (<MODE>mode)
+       || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
+				       <MODE>mode);
+  operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
+				       <MODE>mode);
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
+      operands[5] = GEN_INT (CONST_DOUBLE_LOW (operands[1]));
+    }
+  else if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      operands[4] = GEN_INT (- (INTVAL (operands[1]) < 0));
+      operands[5] = operands[1];
+    }
+  else
+    FAIL;
+}")
+
+(define_split
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "")
+        (match_operand:TI2 1 "input_operand" ""))]
+  "reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_STRING && !TARGET_POWERPC64"
+  "
+{
+  int regno;
+  int count;
+  rtx op1;
+  int i;
+
+  /* Support only loading a constant number of fixed-point registers from
+     memory and only bother with this if more than two; the machine
+     doesn't support more than eight.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 2
+      || INTVAL (operands[2]) > 8
+      || GET_CODE (operands[1]) != MEM
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) >= 32)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[0]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  op1 = replace_equiv_address (operands[1],
+			       force_reg (SImode, XEXP (operands[1], 0)));
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno + i),
+		     adjust_address_nv (op1, SImode, i * 4));
+}")
+
+(define_insn "*ldmsi8"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 24))))
+     (set (match_operand:SI 9 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 28))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 8"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi7"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 24))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 7"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi6"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 20))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 6"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi5"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 5"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi4"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 4"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi3"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 3"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (clobber (scratch:SI))
+		     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_STRING && !TARGET_POWERPC64"
+  "
+{
+  int regno;
+  int count;
+  rtx to;
+  rtx op0;
+  int i;
+
+  /* Support only storing a constant number of fixed-point registers to
+     memory and only bother with this if more than two; the machine
+     doesn't support more than eight.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 2
+      || INTVAL (operands[2]) > 8
+      || GET_CODE (operands[0]) != MEM
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) >= 32)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[1]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
+  to = force_reg (SImode, XEXP (operands[0], 0));
+  op0 = replace_equiv_address (operands[0], to);
+
+  XVECEXP (operands[3], 0, 0)
+    = gen_rtx_SET (VOIDmode, adjust_address_nv (op0, SImode, 0), operands[1]);
+  XVECEXP (operands[3], 0, 1) = gen_rtx_CLOBBER (VOIDmode,
+						 gen_rtx_SCRATCH (SImode));
+
+  for (i = 1; i < count; i++)
+    XVECEXP (operands[3], 0, i + 1)
+      = gen_rtx_SET (VOIDmode,
+		     adjust_address_nv (op0, SImode, i * 4),
+		     gen_rtx_REG (SImode, regno + i));
+}")
+
+(define_insn "*stmsi8"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 9 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
+	  (match_operand:SI 10 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 9"
+  "stswi %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi7"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 9 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 8"
+  "stswi %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi6"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 7"
+  "stswi %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi5"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 6"
+  "stswi %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi4"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 5"
+  "stswi %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi3"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 4"
+  "stswi %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_expand "setmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (use (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; String/block move insn.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  if (expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; Move up to 32 bytes at a time.  The fixed registers are needed because the
+;; register allocator doesn't have a clue about allocating 8 word registers.
+;; rD/rS = r5 is preferred, efficient form.
+(define_expand "movmemsi_8reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI  5))
+	      (clobber (reg:SI  6))
+	      (clobber (reg:SI  7))
+	      (clobber (reg:SI  8))
+	      (clobber (reg:SI  9))
+	      (clobber (reg:SI 10))
+	      (clobber (reg:SI 11))
+	      (clobber (reg:SI 12))
+	      (clobber (match_scratch:SI 4 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI  6))
+   (clobber (reg:SI  7))
+   (clobber (reg:SI  8))
+   (clobber (reg:SI  9))
+   (clobber (reg:SI 10))
+   (clobber (reg:SI 11))
+   (clobber (reg:SI 12))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING
+   && ((INTVAL (operands[2]) > 24 && INTVAL (operands[2]) < 32)
+       || INTVAL (operands[2]) == 0)
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 12)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 12)
+   && REGNO (operands[4]) == 5"
+  "lswi %4,%1,%2\;stswi %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 24 bytes at a time.  The fixed registers are needed because the
+;; register allocator doesn't have a clue about allocating 6 word registers.
+;; rD/rS = r5 is preferred, efficient form.
+(define_expand "movmemsi_6reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI  5))
+	      (clobber (reg:SI  6))
+	      (clobber (reg:SI  7))
+	      (clobber (reg:SI  8))
+	      (clobber (reg:SI  9))
+	      (clobber (reg:SI 10))
+	      (clobber (match_scratch:SI 4 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI  6))
+   (clobber (reg:SI  7))
+   (clobber (reg:SI  8))
+   (clobber (reg:SI  9))
+   (clobber (reg:SI 10))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING
+   && INTVAL (operands[2]) > 16 && INTVAL (operands[2]) <= 32
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 10)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 10)
+   && REGNO (operands[4]) == 5"
+  "lswi %4,%1,%2\;stswi %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 16 bytes at a time, using 4 fixed registers to avoid spill
+;; problems with TImode.
+;; rD/rS = r5 is preferred, efficient form.
+(define_expand "movmemsi_4reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI 5))
+	      (clobber (reg:SI 6))
+	      (clobber (reg:SI 7))
+	      (clobber (reg:SI 8))
+	      (clobber (match_scratch:SI 4 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI 6))
+   (clobber (reg:SI 7))
+   (clobber (reg:SI 8))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING
+   && INTVAL (operands[2]) > 8 && INTVAL (operands[2]) <= 16
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 8)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 8)
+   && REGNO (operands[4]) == 5"
+  "lswi %4,%1,%2\;stswi %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 8 bytes at a time.
+(define_expand "movmemsi_2reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (match_scratch:DI 4 ""))
+	      (clobber (match_scratch:SI 5 ""))])]
+  "TARGET_STRING && ! TARGET_POWERPC64"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_scratch:DI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && ! TARGET_POWERPC64
+   && INTVAL (operands[2]) > 4 && INTVAL (operands[2]) <= 8"
+  "lswi %4,%1,%2\;stswi %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 4 bytes at a time.
+(define_expand "movmemsi_1reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (match_scratch:SI 4 ""))
+	      (clobber (match_scratch:SI 5 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 4"
+  "lswi %4,%1,%2\;stswi %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Define insns that do load or store with update.  Some of these we can
+;; get by using pre-decrement or pre-increment, but the hardware can also
+;; do cases where the increment is not the size of the object.
+;;
+;; In all these cases, we use operands 0 and 1 for the register being
+;; incremented because those are the operands that local-alloc will
+;; tie and these are the pair most likely to be tieable (and the ones
+;; that will benefit the most).
+
+(define_insn "*movdi_update1"
+  [(set (match_operand:DI 3 "gpc_reg_operand" "=r,r")
+	(mem:DI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:DI 2 "reg_or_aligned_short_operand" "r,I"))))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=b,b")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (DImode)
+       || !gpc_reg_operand (operands[2], DImode))"
+  "@
+   ldux %3,%0,%2
+   ldu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "movdi_<mode>_update"
+  [(set (mem:DI (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0")
+			 (match_operand:P 2 "reg_or_aligned_short_operand" "r,I")))
+	(match_operand:DI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:P 0 "gpc_reg_operand" "=b,b")
+	(plus:P (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (Pmode)
+       || !gpc_reg_operand (operands[2], Pmode)
+       || (REG_P (operands[0])
+	   && REGNO (operands[0]) == STACK_POINTER_REGNUM))"
+  "@
+   stdux %3,%0,%2
+   stdu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+;; This pattern is only conditional on TARGET_POWERPC64, as it is
+;; needed for stack allocation, even if the user passes -mno-update.
+(define_insn "movdi_<mode>_update_stack"
+  [(set (mem:DI (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0")
+			 (match_operand:P 2 "reg_or_aligned_short_operand" "r,I")))
+	(match_operand:DI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:P 0 "gpc_reg_operand" "=b,b")
+	(plus:P (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64"
+  "@
+   stdux %3,%0,%2
+   stdu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movsi_update1"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lwzux %3,%0,%2
+   lwzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movsi_update2"
+  [(set (match_operand:DI 3 "gpc_reg_operand" "=r")
+	(sign_extend:DI
+	 (mem:SI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0")
+			  (match_operand:DI 2 "gpc_reg_operand" "r")))))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=b")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode
+   && !avoiding_indexed_address_p (DImode)"
+  "lwaux %3,%0,%2"
+  [(set_attr "type" "load_ext_ux")])
+
+(define_insn "movsi_update"
+  [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode)
+       || (REG_P (operands[0])
+	   && REGNO (operands[0]) == STACK_POINTER_REGNUM))"
+  "@
+   stwux %3,%0,%2
+   stwu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+;; This is an unconditional pattern; needed for stack allocation, even
+;; if the user passes -mno-update.
+(define_insn "movsi_update_stack"
+  [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   stwux %3,%0,%2
+   stwu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movhi_update1"
+  [(set (match_operand:HI 3 "gpc_reg_operand" "=r,r")
+	(mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lhzux %3,%0,%2
+   lhzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movhi_update2"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			  (match_operand:SI 2 "reg_or_short_operand" "r,I")))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lhzux %3,%0,%2
+   lhzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movhi_update3"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			  (match_operand:SI 2 "reg_or_short_operand" "r,I")))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE && rs6000_gen_cell_microcode
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lhaux %3,%0,%2
+   lhau %3,%2(%0)"
+  [(set_attr "type" "load_ext_ux,load_ext_u")])
+
+(define_insn "*movhi_update4"
+  [(set (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:HI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   sthux %3,%0,%2
+   sthu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movqi_update1"
+  [(set (match_operand:QI 3 "gpc_reg_operand" "=r,r")
+	(mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lbzux %3,%0,%2
+   lbzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movqi_update2"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			  (match_operand:SI 2 "reg_or_short_operand" "r,I")))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lbzux %3,%0,%2
+   lbzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movqi_update3"
+  [(set (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:QI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stbux %3,%0,%2
+   stbu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movsf_update1"
+  [(set (match_operand:SF 3 "gpc_reg_operand" "=f,f")
+	(mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lfsux %3,%0,%2
+   lfsu %3,%2(%0)"
+  [(set_attr "type" "fpload_ux,fpload_u")])
+
+(define_insn "*movsf_update2"
+  [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SF 3 "gpc_reg_operand" "f,f"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stfsux %3,%0,%2
+   stfsu %3,%2(%0)"
+  [(set_attr "type" "fpstore_ux,fpstore_u")])
+
+(define_insn "*movsf_update3"
+  [(set (match_operand:SF 3 "gpc_reg_operand" "=r,r")
+	(mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lwzux %3,%0,%2
+   lwzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movsf_update4"
+  [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SF 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stwux %3,%0,%2
+   stwu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movdf_update1"
+  [(set (match_operand:DF 3 "gpc_reg_operand" "=d,d")
+	(mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lfdux %3,%0,%2
+   lfdu %3,%2(%0)"
+  [(set_attr "type" "fpload_ux,fpload_u")])
+
+(define_insn "*movdf_update2"
+  [(set (mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:DF 3 "gpc_reg_operand" "d,d"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stfdux %3,%0,%2
+   stfdu %3,%2(%0)"
+  [(set_attr "type" "fpstore_ux,fpstore_u")])
+
+
+;; After inserting conditional returns we can sometimes have
+;; unnecessary register moves.  Unfortunately we cannot have a
+;; modeless peephole here, because some single SImode sets have early
+;; clobber outputs.  Although those sets expand to multi-ppc-insn
+;; sequences, using get_attr_length here will smash the operands
+;; array.  Neither is there an early_cobbler_p predicate.
+;; Disallow subregs for E500 so we don't munge frob_di_df_2.
+(define_peephole2
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operand:DF 1 "any_operand" ""))
+   (set (match_operand:DF 2 "gpc_reg_operand" "")
+	(match_dup 0))]
+  "!(TARGET_E500_DOUBLE && GET_CODE (operands[2]) == SUBREG)
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (match_dup 1))])
+
+(define_peephole2
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(match_operand:SF 1 "any_operand" ""))
+   (set (match_operand:SF 2 "gpc_reg_operand" "")
+	(match_dup 0))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (match_dup 1))])
+
+
+;; TLS support.
+
+;; Mode attributes for different ABIs.
+(define_mode_iterator TLSmode [(SI "! TARGET_64BIT") (DI "TARGET_64BIT")])
+(define_mode_attr tls_abi_suffix [(SI "32") (DI "64")])
+(define_mode_attr tls_sysv_suffix [(SI "si") (DI "di")])
+(define_mode_attr tls_insn_suffix [(SI "wz") (DI "d")])
+
+(define_insn_and_split "tls_gd_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 3 "symbol_ref_operand" "s"))
+	      (match_operand 4 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 	    (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
+{
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;"
+	   "bl %z3\;nop";
+  else
+    return "addi %0,%1,%2@got@tlsgd\;bl %z3\;nop";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)
+			 (match_dup 2)]
+			UNSPEC_TLSGD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 3))
+		   	 (match_dup 4)))
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "type" "two")
+   (set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 16)
+     		   (const_int 12)))])
+
+(define_insn_and_split "tls_gd_sysv<TLSmode:tls_sysv_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 3 "symbol_ref_operand" "s"))
+	      (match_operand 4 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 	    (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "addi %0,%1,%2@got@tlsgd\;bl %z3+32768@plt";
+      else
+	return "addi %0,%1,%2@got@tlsgd\;bl %z3@plt";
+    }
+  else
+    return "addi %0,%1,%2@got@tlsgd\;bl %z3";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)
+			 (match_dup 2)]
+			UNSPEC_TLSGD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 3))
+		   	 (match_dup 4)))
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*tls_gd<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSGD))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS"
+  "addi %0,%1,%2@got@tlsgd"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 3)
+  	(high:TLSmode
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGD)))
+   (set (match_dup 0)
+   	(lo_sum:TLSmode (match_dup 3)
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGD)))]
+  "
+{
+  operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_gd_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (high:TLSmode
+       (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		       UNSPEC_TLSGD)))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%2@got@tlsgd@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_gd_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+       (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b")
+			(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		       UNSPEC_TLSGD)))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addi %0,%1,%2@got@tlsgd@l"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_gd_call_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS
+   && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
+  "bl %z1(%3@tlsgd)\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*tls_gd_call_sysv<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4 && TARGET_TLS_MARKERS"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "bl %z1+32768(%3@tlsgd)@plt";
+      return "bl %z1(%3@tlsgd)@plt";
+    }
+  return "bl %z1(%3@tlsgd)";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "tls_ld_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 2 "symbol_ref_operand" "s"))
+	      (match_operand 3 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+		   UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
+{
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;"
+	   "bl %z2\;nop";
+  else
+    return "addi %0,%1,%&@got@tlsld\;bl %z2\;nop";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)]
+			UNSPEC_TLSLD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 2))
+		   	 (match_dup 3)))
+	      (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "type" "two")
+   (set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 16)
+     		   (const_int 12)))])
+
+(define_insn_and_split "tls_ld_sysv<TLSmode:tls_sysv_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 2 "symbol_ref_operand" "s"))
+	      (match_operand 3 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+		   UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "addi %0,%1,%&@got@tlsld\;bl %z2+32768@plt";
+      else
+	return "addi %0,%1,%&@got@tlsld\;bl %z2@plt";
+    }
+  else
+    return "addi %0,%1,%&@got@tlsld\;bl %z2";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)]
+			UNSPEC_TLSLD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 2))
+		   	 (match_dup 3)))
+	      (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "length" "8")])
+
+(define_insn_and_split "*tls_ld<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+			UNSPEC_TLSLD))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS"
+  "addi %0,%1,%&@got@tlsld"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 2)
+  	(high:TLSmode
+	    (unspec:TLSmode [(const_int 0) (match_dup 1)] UNSPEC_TLSLD)))
+   (set (match_dup 0)
+   	(lo_sum:TLSmode (match_dup 2)
+	    (unspec:TLSmode [(const_int 0) (match_dup 1)] UNSPEC_TLSLD)))]
+  "
+{
+  operands[2] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_ld_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (high:TLSmode
+       (unspec:TLSmode [(const_int 0)
+			(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+		       UNSPEC_TLSLD)))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%&@got@tlsld@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_ld_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+       (unspec:TLSmode [(const_int 0)
+                        (match_operand:TLSmode 2 "gpc_reg_operand" "b")]
+                       UNSPEC_TLSLD)))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addi %0,%1,%&@got@tlsld@l"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_ld_call_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS
+   && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
+  "bl %z1(%&@tlsld)\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*tls_ld_call_sysv<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4 && TARGET_TLS_MARKERS"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "bl %z1+32768(%&@tlsld)@plt";
+      return "bl %z1(%&@tlsld)@plt";
+    }
+  return "bl %z1(%&@tlsld)";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "tls_dtprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSDTPREL))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@dtprel")
+
+(define_insn "tls_dtprel_ha_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSDTPRELHA))]
+  "HAVE_AS_TLS"
+  "addis %0,%1,%2@dtprel@ha")
+
+(define_insn "tls_dtprel_lo_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSDTPRELLO))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@dtprel@l")
+
+(define_insn_and_split "tls_got_dtprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSGOTDTPREL))]
+  "HAVE_AS_TLS"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel(%1)"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 3)
+	(high:TLSmode
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTDTPREL)))
+   (set (match_dup 0)
+	(lo_sum:TLSmode (match_dup 3)
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTDTPREL)))]
+  "
+{
+  operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_got_dtprel_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (high:TLSmode
+       (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		       UNSPEC_TLSGOTDTPREL)))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%2@got@dtprel@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_got_dtprel_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b")
+			  (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			 UNSPEC_TLSGOTDTPREL)))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel@l(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "tls_tprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTPREL))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@tprel")
+
+(define_insn "tls_tprel_ha_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTPRELHA))]
+  "HAVE_AS_TLS"
+  "addis %0,%1,%2@tprel@ha")
+
+(define_insn "tls_tprel_lo_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTPRELLO))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@tprel@l")
+
+;; "b" output constraint here and on tls_tls input to support linker tls
+;; optimization.  The linker may edit the instructions emitted by a
+;; tls_got_tprel/tls_tls pair to addis,addi.
+(define_insn_and_split "tls_got_tprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSGOTTPREL))]
+  "HAVE_AS_TLS"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel(%1)"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 3)
+	(high:TLSmode
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTTPREL)))
+   (set (match_dup 0)
+	(lo_sum:TLSmode (match_dup 3)
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTTPREL)))]
+  "
+{
+  operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_got_tprel_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (high:TLSmode
+       (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		       UNSPEC_TLSGOTTPREL)))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%2@got@tprel@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_got_tprel_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b")
+			  (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			 UNSPEC_TLSGOTTPREL)))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel@l(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "tls_tls_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTLS))]
+  "TARGET_ELF && HAVE_AS_TLS"
+  "add %0,%1,%2@tls")
+
+(define_expand "tls_get_tpointer"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unspec:SI [(const_int 0)] UNSPEC_TLSTLS))]
+  "TARGET_XCOFF && HAVE_AS_TLS"
+  "
+{
+  emit_insn (gen_tls_get_tpointer_internal ());
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, 3));
+  DONE;
+}")
+
+(define_insn "tls_get_tpointer_internal"
+  [(set (reg:SI 3)
+	(unspec:SI [(const_int 0)] UNSPEC_TLSTLS))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_XCOFF && HAVE_AS_TLS"
+  "bla __get_tpointer")
+
+(define_expand "tls_get_addr<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "")
+	(unspec:P [(match_operand:P 1 "gpc_reg_operand" "")
+                   (match_operand:P 2 "gpc_reg_operand" "")] UNSPEC_TLSTLS))]
+  "TARGET_XCOFF && HAVE_AS_TLS"
+  "
+{
+  emit_move_insn (gen_rtx_REG (Pmode, 3), operands[1]);
+  emit_move_insn (gen_rtx_REG (Pmode, 4), operands[2]);
+  emit_insn (gen_tls_get_addr_internal<mode> ());
+  emit_move_insn (operands[0], gen_rtx_REG (Pmode, 3));
+  DONE;
+}")
+
+(define_insn "tls_get_addr_internal<mode>"
+  [(set (reg:P 3)
+	(unspec:P [(reg:P 3) (reg:P 4)] UNSPEC_TLSTLS))
+   (clobber (reg:P 0))
+   (clobber (reg:P 4))
+   (clobber (reg:P 5))
+   (clobber (reg:P 11))
+   (clobber (reg:CC CR0_REGNO))
+   (clobber (reg:P LR_REGNO))]
+  "TARGET_XCOFF && HAVE_AS_TLS"
+  "bla __tls_get_addr")
+
+;; Next come insns related to the calling sequence.
+;;
+;; First, an insn to allocate new stack space for dynamic use (e.g., alloca).
+;; We move the back-chain and decrement the stack pointer.
+
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "gpc_reg_operand" "")
+	(minus (reg 1) (match_operand 1 "reg_or_short_operand" "")))
+   (set (reg 1)
+	(minus (reg 1) (match_dup 1)))]
+  ""
+  "
+{ rtx chain = gen_reg_rtx (Pmode);
+  rtx stack_bot = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+  rtx neg_op0;
+  rtx insn, par, set, mem;
+
+  emit_move_insn (chain, stack_bot);
+
+  /* Check stack bounds if necessary.  */
+  if (crtl->limit_stack)
+    {
+      rtx available;
+      available = expand_binop (Pmode, sub_optab,
+				stack_pointer_rtx, stack_limit_rtx,
+				NULL_RTX, 1, OPTAB_WIDEN);
+      emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx));
+    }
+
+  if (GET_CODE (operands[1]) != CONST_INT
+      || INTVAL (operands[1]) < -32767
+      || INTVAL (operands[1]) > 32768)
+    {
+      neg_op0 = gen_reg_rtx (Pmode);
+      if (TARGET_32BIT)
+	emit_insn (gen_negsi2 (neg_op0, operands[1]));
+      else
+	emit_insn (gen_negdi2 (neg_op0, operands[1]));
+    }
+  else
+    neg_op0 = GEN_INT (- INTVAL (operands[1]));
+
+  insn = emit_insn ((* ((TARGET_32BIT) ? gen_movsi_update_stack
+				       : gen_movdi_di_update_stack))
+			(stack_pointer_rtx, stack_pointer_rtx, neg_op0,
+			 chain));
+  /* Since we didn't use gen_frame_mem to generate the MEM, grab
+     it now and set the alias set/attributes. The above gen_*_update
+     calls will generate a PARALLEL with the MEM set being the first
+     operation. */
+  par = PATTERN (insn);
+  gcc_assert (GET_CODE (par) == PARALLEL);
+  set = XVECEXP (par, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  mem = SET_DEST (set);
+  gcc_assert (MEM_P (mem));
+  MEM_NOTRAP_P (mem) = 1;
+  set_mem_alias_set (mem, get_frame_alias_set ());
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+}")
+
+;; These patterns say how to save and restore the stack pointer.  We need not
+;; save the stack pointer at function level since we are careful to
+;; preserve the backchain.  At block level, we have to restore the backchain
+;; when we restore the stack pointer.
+;;
+;; For nonlocal gotos, we must save both the stack pointer and its
+;; backchain and restore both.  Note that in the nonlocal case, the
+;; save area is a memory location.
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "any_operand" "")
+   (match_operand 1 "any_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_function"
+  [(match_operand 0 "any_operand" "")
+   (match_operand 1 "any_operand" "")]
+  ""
+  "DONE;")
+
+;; Adjust stack pointer (op0) to a new value (op1).
+;; First copy old stack backchain to new location, and ensure that the
+;; scheduler won't reorder the sp assignment before the backchain write.
+(define_expand "restore_stack_block"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 2))
+   (match_dup 5)
+   (set (match_operand 0 "register_operand" "")
+	(match_operand 1 "register_operand" ""))]
+  ""
+  "
+{
+  rtvec p;
+
+  operands[1] = force_reg (Pmode, operands[1]);
+  operands[2] = gen_reg_rtx (Pmode);
+  operands[3] = gen_frame_mem (Pmode, operands[0]);
+  operands[4] = gen_frame_mem (Pmode, operands[1]);
+  p = rtvec_alloc (1);
+  RTVEC_ELT (p, 0) = gen_rtx_SET (VOIDmode,
+				  gen_frame_mem (BLKmode, operands[0]),
+				  const0_rtx);
+  operands[5] = gen_rtx_PARALLEL (VOIDmode, p);
+}")
+
+(define_expand "save_stack_nonlocal"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_operand 0 "memory_operand" "") (match_dup 3))
+   (set (match_dup 2) (match_operand 1 "register_operand" ""))]
+  ""
+  "
+{
+  int units_per_word = (TARGET_32BIT) ? 4 : 8;
+
+  /* Copy the backchain to the first word, sp to the second.  */
+  operands[0] = adjust_address_nv (operands[0], Pmode, 0);
+  operands[2] = adjust_address_nv (operands[0], Pmode, units_per_word);
+  operands[3] = gen_reg_rtx (Pmode);
+  operands[4] = gen_frame_mem (Pmode, operands[1]);
+}")
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_dup 2) (match_operand 1 "memory_operand" ""))
+   (set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 2))
+   (match_dup 6)
+   (set (match_operand 0 "register_operand" "") (match_dup 3))]
+  ""
+  "
+{
+  int units_per_word = (TARGET_32BIT) ? 4 : 8;
+  rtvec p;
+
+  /* Restore the backchain from the first word, sp from the second.  */
+  operands[2] = gen_reg_rtx (Pmode);
+  operands[3] = gen_reg_rtx (Pmode);
+  operands[1] = adjust_address_nv (operands[1], Pmode, 0);
+  operands[4] = adjust_address_nv (operands[1], Pmode, units_per_word);
+  operands[5] = gen_frame_mem (Pmode, operands[3]);
+  p = rtvec_alloc (1);
+  RTVEC_ELT (p, 0) = gen_rtx_SET (VOIDmode,
+				  gen_frame_mem (BLKmode, operands[0]),
+				  const0_rtx);
+  operands[6] = gen_rtx_PARALLEL (VOIDmode, p);
+}")
+
+;; TOC register handling.
+
+;; Code to initialize the TOC register...
+
+(define_insn "load_toc_aix_si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+		   (unspec:SI [(const_int 0)] UNSPEC_TOC))
+	      (use (reg:SI 2))])]
+  "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_32BIT"
+  "*
+{
+  char buf[30];
+  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", 1);
+  operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+  operands[2] = gen_rtx_REG (Pmode, 2);
+  return \"lwz %0,%1(%2)\";
+}"
+  [(set_attr "type" "load")])
+
+(define_insn "load_toc_aix_di"
+  [(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+		   (unspec:DI [(const_int 0)] UNSPEC_TOC))
+	      (use (reg:DI 2))])]
+  "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_64BIT"
+  "*
+{
+  char buf[30];
+#ifdef TARGET_RELOCATABLE
+  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\",
+			       !TARGET_MINIMAL_TOC || TARGET_RELOCATABLE);
+#else
+  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", 1);
+#endif
+  if (TARGET_ELF)
+    strcat (buf, \"@toc\");
+  operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+  operands[2] = gen_rtx_REG (Pmode, 2);
+  return \"ld %0,%1(%2)\";
+}"
+  [(set_attr "type" "load")])
+
+(define_insn "load_toc_v4_pic_si"
+  [(set (reg:SI LR_REGNO)
+	(unspec:SI [(const_int 0)] UNSPEC_TOC))]
+  "DEFAULT_ABI == ABI_V4 && flag_pic == 1 && TARGET_32BIT"
+  "bl _GLOBAL_OFFSET_TABLE_@local-4"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_expand "load_toc_v4_PIC_1"
+  [(parallel [(set (reg:SI LR_REGNO)
+		   (match_operand:SI 0 "immediate_operand" "s"))
+	      (use (unspec [(match_dup 0)] UNSPEC_TOC))])]
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4
+   && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
+  "")
+
+(define_insn "load_toc_v4_PIC_1_normal"
+  [(set (reg:SI LR_REGNO)
+	(match_operand:SI 0 "immediate_operand" "s"))
+   (use (unspec [(match_dup 0)] UNSPEC_TOC))]
+  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
+   && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
+  "bcl 20,31,%0\\n%0:"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "load_toc_v4_PIC_1_476"
+  [(set (reg:SI LR_REGNO)
+	(match_operand:SI 0 "immediate_operand" "s"))
+   (use (unspec [(match_dup 0)] UNSPEC_TOC))]
+  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
+   && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
+  "*
+{
+  char name[32];
+  static char templ[32];
+
+  get_ppc476_thunk_name (name);
+  sprintf (templ, \"bl %s\\n%%0:\", name);
+  return templ;
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_expand "load_toc_v4_PIC_1b"
+  [(parallel [(set (reg:SI LR_REGNO)
+		   (unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+			       (label_ref (match_operand 1 "" ""))]
+		           UNSPEC_TOCPTR))
+	      (match_dup 1)])]
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
+  "")
+
+(define_insn "load_toc_v4_PIC_1b_normal"
+  [(set (reg:SI LR_REGNO)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+		    (label_ref (match_operand 1 "" ""))]
+		UNSPEC_TOCPTR))
+   (match_dup 1)]
+  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
+  "bcl 20,31,$+8\;.long %0-$"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "load_toc_v4_PIC_1b_476"
+  [(set (reg:SI LR_REGNO)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+		    (label_ref (match_operand 1 "" ""))]
+		UNSPEC_TOCPTR))
+   (match_dup 1)]
+  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
+  "*
+{
+  char name[32];
+  static char templ[32];
+
+  get_ppc476_thunk_name (name);
+  sprintf (templ, \"bl %s\\n\\tb $+8\\n\\t.long %%0-$\", name);
+  return templ;
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "16")])
+
+(define_insn "load_toc_v4_PIC_2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+		   (minus:SI (match_operand:SI 2 "immediate_operand" "s")
+			     (match_operand:SI 3 "immediate_operand" "s")))))]
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
+  "lwz %0,%2-%3(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn "load_toc_v4_PIC_3b"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+		 (high:SI
+		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
+			     (match_operand:SI 3 "symbol_ref_operand" "s")))))]
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
+  "addis %0,%1,%2-%3@ha")
+
+(define_insn "load_toc_v4_PIC_3c"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
+			     (match_operand:SI 3 "symbol_ref_operand" "s"))))]
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
+  "addi %0,%1,%2-%3@l")
+
+;; If the TOC is shared over a translation unit, as happens with all
+;; the kinds of PIC that we support, we need to restore the TOC
+;; pointer only when jumping over units of translation.
+;; On Darwin, we need to reload the picbase.
+
+(define_expand "builtin_setjmp_receiver"
+  [(use (label_ref (match_operand 0 "" "")))]
+  "(DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+   || (TARGET_TOC && TARGET_MINIMAL_TOC)
+   || (DEFAULT_ABI == ABI_DARWIN && flag_pic)"
+  "
+{
+#if TARGET_MACHO
+  if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      rtx picrtx = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
+      rtx picreg = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+      rtx tmplabrtx;
+      char tmplab[20];
+
+      crtl->uses_pic_offset_table = 1;
+      ASM_GENERATE_INTERNAL_LABEL(tmplab, \"LSJR\",
+				  CODE_LABEL_NUMBER (operands[0]));
+      tmplabrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab));
+
+      emit_insn (gen_load_macho_picbase (tmplabrtx));
+      emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO));
+      emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplabrtx));
+    }
+  else
+#endif
+    rs6000_emit_load_toc_table (FALSE);
+  DONE;
+}")
+
+;; Largetoc support
+(define_insn "*largetoc_high"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r")
+        (high:DI
+	  (unspec [(match_operand:DI 1 "" "")
+		   (match_operand:DI 2 "gpc_reg_operand" "b")]
+		  UNSPEC_TOCREL)))]
+   "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL"
+   "addis %0,%2,%1@toc@ha")
+
+(define_insn "*largetoc_high_aix<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b*r")
+        (high:P
+	  (unspec [(match_operand:P 1 "" "")
+		   (match_operand:P 2 "gpc_reg_operand" "b")]
+		  UNSPEC_TOCREL)))]
+   "TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL"
+   "addis %0,%1@u(%2)")
+
+(define_insn "*largetoc_high_plus"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r")
+        (high:DI
+	  (plus:DI
+	    (unspec [(match_operand:DI 1 "" "")
+		     (match_operand:DI 2 "gpc_reg_operand" "b")]
+		    UNSPEC_TOCREL)
+	    (match_operand:DI 3 "add_cint_operand" "n"))))]
+   "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL"
+   "addis %0,%2,%1+%3@toc@ha")
+
+(define_insn "*largetoc_high_plus_aix<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b*r")
+        (high:P
+	  (plus:P
+	    (unspec [(match_operand:P 1 "" "")
+		     (match_operand:P 2 "gpc_reg_operand" "b")]
+		    UNSPEC_TOCREL)
+	    (match_operand:P 3 "add_cint_operand" "n"))))]
+   "TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL"
+   "addis %0,%1+%3@u(%2)")
+
+(define_insn "*largetoc_low"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+        (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,!*r")
+	           (match_operand:DI 2 "" "")))]
+   "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL"
+   "@
+    addi %0,%1,%2@l
+    addic %0,%1,%2@l")
+
+(define_insn "*largetoc_low_aix<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+        (lo_sum:P (match_operand:P 1 "gpc_reg_operand" "b")
+	           (match_operand:P 2 "" "")))]
+   "TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL"
+   "la %0,%2@l(%1)")
+
+(define_insn_and_split "*tocref<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b*r")
+	(match_operand:P 1 "small_toc_ref" "R"))]
+   "TARGET_TOC"
+   "la %0,%a1"
+   "&& TARGET_CMODEL != CMODEL_SMALL && reload_completed"
+  [(set (match_dup 0) (high:P (match_dup 1)))
+   (set (match_dup 0) (lo_sum:P (match_dup 0) (match_dup 1)))])
+
+;; Elf specific ways of loading addresses for non-PIC code.
+;; The output of this could be r0, but we make a very strong
+;; preference for a base register because it will usually
+;; be needed there.
+(define_insn "elf_high"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=b*r")
+	(high:SI (match_operand 1 "" "")))]
+  "TARGET_ELF && ! TARGET_64BIT"
+  "lis %0,%1@ha")
+
+(define_insn "elf_low"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,!*r")
+		   (match_operand 2 "" "")))]
+   "TARGET_ELF && ! TARGET_64BIT"
+   "@
+    la %0,%2@l(%1)
+    addic %0,%1,%K2")
+
+;; Call and call_value insns
+(define_expand "call"
+  [(parallel [(call (mem:SI (match_operand 0 "address_operand" ""))
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[0] = machopic_indirect_call_target (operands[0]);
+#endif
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+
+  operands[0] = XEXP (operands[0], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_call_aix (NULL_RTX, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+
+  if (GET_CODE (operands[0]) != SYMBOL_REF
+      || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0))
+    {
+      if (INTVAL (operands[2]) & CALL_LONG)
+	operands[0] = rs6000_longcall_ref (operands[0]);
+
+      switch (DEFAULT_ABI)
+        {
+	case ABI_V4:
+	case ABI_DARWIN:
+	  operands[0] = force_reg (Pmode, operands[0]);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand 1 "address_operand" ""))
+			 (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[1] = machopic_indirect_call_target (operands[1]);
+#endif
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  operands[1] = XEXP (operands[1], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_call_aix (operands[0], operands[1], operands[2], operands[3]);
+      DONE;
+    }
+
+  if (GET_CODE (operands[1]) != SYMBOL_REF
+      || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0))
+    {
+      if (INTVAL (operands[3]) & CALL_LONG)
+	operands[1] = rs6000_longcall_ref (operands[1]);
+
+      switch (DEFAULT_ABI)
+        {
+	case ABI_V4:
+	case ABI_DARWIN:
+	  operands[1] = force_reg (Pmode, operands[1]);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}")
+
+;; Call to function in current module.  No TOC pointer reload needed.
+;; Operand2 is nonzero if we are using the V.4 calling sequence and
+;; either the function was not prototyped, or it was prototyped as a
+;; variable argument function.  It is > 0 if FP registers were passed
+;; and < 0 if they were not.
+
+(define_insn "*call_local32"
+  [(call (mem:SI (match_operand:SI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z0@local\" : \"bl %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_local64"
+  [(call (mem:SI (match_operand:DI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_64BIT && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z0@local\" : \"bl %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_local32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z1@local\" : \"bl %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+
+(define_insn "*call_value_local64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_64BIT && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z1@local\" : \"bl %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+
+;; A function pointer under System V is just a normal pointer
+;; operands[0] is the function pointer
+;; operands[1] is the stack size to clean up
+;; operands[2] is the value FUNCTION_ARG returns for the VOID argument
+;; which indicates how to set cr1
+
+(define_insn "*call_indirect_nonlocal_sysv<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l,c,*l"))
+	 (match_operand 1 "" "g,g,g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "DEFAULT_ABI == ABI_V4
+   || DEFAULT_ABI == ABI_DARWIN"
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  return "b%T0l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn_and_split "*call_nonlocal_sysv<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_DARWIN
+   || (DEFAULT_ABI == ABI_V4
+       && (INTVAL (operands[2]) & CALL_LONG) == 0))"
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+#if TARGET_MACHO
+  return output_call(insn, operands, 0, 2);
+#else
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return "bl %z0@plt";
+    }
+  else
+    return "bl %z0";
+#endif
+}
+  "DEFAULT_ABI == ABI_V4
+   && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[0])
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  [(parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (reg:SI LR_REGNO))])]
+{
+  operands[3] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_nonlocal_sysv_secure<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (use (match_operand:SI 3 "register_operand" "r,r"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_V4
+    && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[0])
+    && (INTVAL (operands[2]) & CALL_LONG) == 0)"
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  if (flag_pic == 2)
+    /* The magic 32768 offset here and in the other sysv call insns
+       corresponds to the offset of r30 in .got2, as given by LCTOC1.
+       See sysv4.h:toc_section.  */
+    return "bl %z0+32768@plt";
+  else
+    return "bl %z0@plt";
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_indirect_nonlocal_sysv<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l,c,*l"))
+	      (match_operand 2 "" "g,g,g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "DEFAULT_ABI == ABI_V4
+   || DEFAULT_ABI == ABI_DARWIN"
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  return "b%T1l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn_and_split "*call_value_nonlocal_sysv<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_DARWIN
+   || (DEFAULT_ABI == ABI_V4
+       && (INTVAL (operands[3]) & CALL_LONG) == 0))"
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+#if TARGET_MACHO
+  return output_call(insn, operands, 1, 3);
+#else
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return "bl %z1@plt";
+    }
+  else
+    return "bl %z1";
+#endif
+}
+  "DEFAULT_ABI == ABI_V4
+   && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[1])
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (use (match_dup 3))
+	      (use (match_dup 4))
+	      (clobber (reg:SI LR_REGNO))])]
+{
+  operands[4] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_nonlocal_sysv_secure<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (match_operand:SI 4 "register_operand" "r,r"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_V4
+    && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[1])
+    && (INTVAL (operands[3]) & CALL_LONG) == 0)"
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  if (flag_pic == 2)
+    return "bl %z1+32768@plt";
+  else
+    return "bl %z1@plt";
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+
+;; Call to AIX abi function in the same module.
+
+(define_insn "*call_local_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "current_file_function_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_local_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "current_file_function_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; Call to AIX abi function which may be in another module.
+;; Restore the TOC pointer (r2) after the call.
+
+(define_insn "*call_nonlocal_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z0\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_nonlocal_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z1\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+;; Call to indirect functions with the AIX abi using a 3 word descriptor.
+;; Operand0 is the addresss of the function to call
+;; Operand2 is the location in the function descriptor to load r2 from
+;; Operand3 is the stack location to hold the current TOC pointer
+
+(define_insn "*call_indirect_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX"
+  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_value_indirect_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX"
+  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+;; Call to indirect functions with the ELFv2 ABI.
+;; Operand0 is the addresss of the function to call
+;; Operand2 is the stack location to hold the current TOC pointer
+
+(define_insn "*call_indirect_elfv2<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+	 (match_operand 1 "" "g,g"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_ELFv2"
+  "b%T0l\;<ptrload> 2,%2"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_indirect_elfv2<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+	      (match_operand 2 "" "g,g")))
+   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_ELFv2"
+  "b%T1l\;<ptrload> 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8")])
+
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; sibling call patterns
+(define_expand "sibcall"
+  [(parallel [(call (mem:SI (match_operand 0 "address_operand" ""))
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (reg:SI LR_REGNO))
+	      (simple_return)])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[0] = machopic_indirect_call_target (operands[0]);
+#endif
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+
+  operands[0] = XEXP (operands[0], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_sibcall_aix (NULL_RTX, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+}")
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		(call (mem:SI (match_operand 1 "address_operand" ""))
+		      (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (use (reg:SI LR_REGNO))
+	      (simple_return)])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[1] = machopic_indirect_call_target (operands[1]);
+#endif
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  operands[1] = XEXP (operands[1], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_sibcall_aix (operands[0], operands[1], operands[2], operands[3]);
+      DONE;
+    }
+}")
+
+;; this and similar patterns must be marked as using LR, otherwise
+;; dataflow will try to delete the store into it.  This is true
+;; even when the actual reg to jump to is in CTR, when LR was
+;; saved and restored around the PIC-setting BCL.
+(define_insn "*sibcall_local32"
+  [(call (mem:SI (match_operand:SI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (simple_return)]
+  "(INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z0@local\" : \"b %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_local64"
+  [(call (mem:SI (match_operand:DI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (simple_return)]
+  "TARGET_64BIT && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z0@local\" : \"b %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_value_local32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (simple_return)]
+  "(INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z1@local\" : \"b %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_value_local64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (simple_return)]
+  "TARGET_64BIT && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z1@local\" : \"b %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_nonlocal_sysv<mode>"
+  [(call (mem:SI (match_operand:P 0 "call_operand" "s,s,c,c"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" "O,n,O,n"))
+   (use (reg:SI LR_REGNO))
+   (simple_return)]
+  "(DEFAULT_ABI == ABI_DARWIN
+    || DEFAULT_ABI == ABI_V4)
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  if (which_alternative >= 2)
+    return \"b%T0\";
+  else if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return \"b %z0@plt\";
+    }
+  else
+    return \"b %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8,4,8")])
+
+(define_insn "*sibcall_value_nonlocal_sysv<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "call_operand" "s,s,c,c"))
+	      (match_operand 2 "" "")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n,O,n"))
+   (use (reg:SI LR_REGNO))
+   (simple_return)]
+  "(DEFAULT_ABI == ABI_DARWIN
+    || DEFAULT_ABI == ABI_V4)
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  if (which_alternative >= 2)
+    return \"b%T1\";
+  else if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return \"b %z1@plt\";
+    }
+  else
+    return \"b %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8,4,8")])
+
+;; AIX ABI sibling call patterns.
+
+(define_insn "*sibcall_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
+	 (match_operand 1 "" "g,g"))
+   (simple_return)]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "@
+   b %z0
+   b%T0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
+	      (match_operand 2 "" "g,g")))
+   (simple_return)]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "@
+   b %z1
+   b%T1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_expand "sibcall_epilogue"
+  [(use (const_int 0))]
+  ""
+{
+  if (!TARGET_SCHED_PROLOG)
+    emit_insn (gen_blockage ());
+  rs6000_emit_epilogue (TRUE);
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCK)]
+  ""
+  "")
+
+(define_expand "probe_stack"
+  [(set (match_operand 0 "memory_operand" "=m")
+        (unspec [(const_int 0)] UNSPEC_PROBE_STACK))]
+  ""
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_probe_stack_di (operands[0]));
+  else
+    emit_insn (gen_probe_stack_si (operands[0]));
+  DONE;
+})
+
+(define_insn "probe_stack_<mode>"
+  [(set (match_operand:P 0 "memory_operand" "=m")
+        (unspec:P [(const_int 0)] UNSPEC_PROBE_STACK))]
+  ""
+{
+  operands[1] = gen_rtx_REG (Pmode, 0);
+  return "st<wd>%U0%X0 %1,%0";
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	(const_string "store_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[0], VOIDmode)")
+	  (const_string "store_u")
+	  (const_string "store"))))
+   (set_attr "length" "4")])
+
+(define_insn "probe_stack_range<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "register_operand" "r")]
+			   UNSPECV_PROBE_STACK_RANGE))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "three")])
+
+;; Compare insns are next.  Note that the RS/6000 has two types of compares,
+;; signed & unsigned, and one type of branch.
+;;
+;; Start with the DEFINE_EXPANDs to generate the rtl for compares, scc
+;; insns, and branches.
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "rs6000_cbranch_operator"
+         [(match_operand:GPR 1 "gpc_reg_operand" "")
+          (match_operand:GPR 2 "reg_or_short_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  "
+{
+  /* Take care of the possibility that operands[2] might be negative but
+     this might be a logical operation.  That insn doesn't exist.  */
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) < 0)
+    {
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]),
+				    GET_MODE (operands[0]),
+				    operands[1], operands[2]);
+   }
+
+  rs6000_emit_cbranch (<MODE>mode, operands);
+  DONE;
+}")
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "rs6000_cbranch_operator"
+         [(match_operand:FP 1 "gpc_reg_operand" "")
+          (match_operand:FP 2 "gpc_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  "
+{
+  rs6000_emit_cbranch (<MODE>mode, operands);
+  DONE;
+}")
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "rs6000_cbranch_operator"
+         [(match_operand:GPR 2 "gpc_reg_operand" "")
+          (match_operand:GPR 3 "reg_or_short_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+  "
+{
+  /* Take care of the possibility that operands[3] might be negative but
+     this might be a logical operation.  That insn doesn't exist.  */
+  if (GET_CODE (operands[3]) == CONST_INT
+      && INTVAL (operands[3]) < 0)
+    {
+      operands[3] = force_reg (<MODE>mode, operands[3]);
+      operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]),
+				    GET_MODE (operands[1]),
+				    operands[2], operands[3]);
+    }
+
+  /* For SNE, we would prefer that the xor/abs sequence be used for integers.
+     For SEQ, likewise, except that comparisons with zero should be done
+     with an scc insns.  However, due to the order that combine see the
+     resulting insns, we must, in fact, allow SEQ for integers.  Fail in
+     the cases we don't want to handle or are best handled by portable
+     code.  */
+  if (GET_CODE (operands[1]) == NE)
+    FAIL;
+  if ((GET_CODE (operands[1]) == LT || GET_CODE (operands[1]) == LE
+       || GET_CODE (operands[1]) == GT || GET_CODE (operands[1]) == GE)
+      && operands[3] == const0_rtx)
+    FAIL;
+  rs6000_emit_sCOND (<MODE>mode, operands);
+  DONE;
+}")
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "rs6000_cbranch_operator"
+         [(match_operand:FP 2 "gpc_reg_operand" "")
+          (match_operand:FP 3 "gpc_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+  "
+{
+  rs6000_emit_sCOND (<MODE>mode, operands);
+  DONE;
+}")
+
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, TARGET_64BIT ? 13 : 2);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_setsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_32BIT"
+  "lwz%U1%X1 %2,%1\;stw%U0%X0 %2,%0\;li %2,0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "stack_protect_setdi"
+  [(set (match_operand:DI 0 "memory_operand" "=Y")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "Y")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_64BIT"
+  "ld%U1%X1 %2,%1\;std%U0%X0 %2,%0\;li %2,0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx test, op0, op1;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, TARGET_64BIT ? 13 : 2);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  op0 = operands[0];
+  op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]), UNSPEC_SP_TEST);
+  test = gen_rtx_EQ (VOIDmode, op0, op1);
+  emit_jump_insn (gen_cbranchsi4 (test, op0, op1, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_testsi"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
+        (unspec:CCEQ [(match_operand:SI 1 "memory_operand" "m,m")
+		      (match_operand:SI 2 "memory_operand" "m,m")]
+		     UNSPEC_SP_TEST))
+   (set (match_scratch:SI 4 "=r,r") (const_int 0))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   lwz%U1%X1 %3,%1\;lwz%U2%X2 %4,%2\;xor. %3,%3,%4\;li %4,0
+   lwz%U1%X1 %3,%1\;lwz%U2%X2 %4,%2\;cmplw %0,%3,%4\;li %3,0\;li %4,0"
+  [(set_attr "length" "16,20")])
+
+(define_insn "stack_protect_testdi"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
+        (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "Y,Y")
+		      (match_operand:DI 2 "memory_operand" "Y,Y")]
+		     UNSPEC_SP_TEST))
+   (set (match_scratch:DI 4 "=r,r") (const_int 0))
+   (clobber (match_scratch:DI 3 "=&r,&r"))]
+  "TARGET_64BIT"
+  "@
+   ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;xor. %3,%3,%4\;li %4,0
+   ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;cmpld %0,%3,%4\;li %3,0\;li %4,0"
+  [(set_attr "length" "16,20")])
+
+
+;; Here are the actual compare insns.
+(define_insn "*cmp<mode>_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+	(compare:CC (match_operand:GPR 1 "gpc_reg_operand" "r")
+		    (match_operand:GPR 2 "reg_or_short_operand" "rI")))]
+  ""
+  "cmp<wd>%I2 %0,%1,%2"
+  [(set_attr "type" "cmp")])
+
+;; If we are comparing a register for equality with a large constant,
+;; we can do this with an XOR followed by a compare.  But this is profitable
+;; only if the large constant is only used for the comparison (and in this
+;; case we already have a register to reuse as scratch).
+;;
+;; For 64-bit registers, we could only do so if the constant's bit 15 is clear:
+;; otherwise we'd need to XOR with FFFFFFFF????0000 which is not available.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "logical_const_operand" ""))
+   (set (match_dup 0) (match_operator:SI 3 "boolean_or_operator"
+		       [(match_dup 0)
+			(match_operand:SI 2 "logical_const_operand" "")]))
+   (set (match_operand:CC 4 "cc_reg_operand" "")
+        (compare:CC (match_operand:SI 5 "gpc_reg_operand" "")
+                    (match_dup 0)))
+   (set (pc)
+        (if_then_else (match_operator 6 "equality_operator"
+                       [(match_dup 4) (const_int 0)])
+                      (match_operand 7 "" "")
+                      (match_operand 8 "" "")))]
+  "peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (4, operands[4])"
+ [(set (match_dup 0) (xor:SI (match_dup 5) (match_dup 9)))
+  (set (match_dup 4) (compare:CC (match_dup 0) (match_dup 10)))
+  (set (pc) (if_then_else (match_dup 6) (match_dup 7) (match_dup 8)))]
+ 
+{
+  /* Get the constant we are comparing against, and see what it looks like
+     when sign-extended from 16 to 32 bits.  Then see what constant we could
+     XOR with SEXTC to get the sign-extended value.  */
+  rtx cnst = simplify_const_binary_operation (GET_CODE (operands[3]),
+					      SImode,
+					      operands[1], operands[2]);
+  HOST_WIDE_INT c = INTVAL (cnst);
+  HOST_WIDE_INT sextc = ((c & 0xffff) ^ 0x8000) - 0x8000;
+  HOST_WIDE_INT xorv = c ^ sextc;
+
+  operands[9] = GEN_INT (xorv);
+  operands[10] = GEN_INT (sextc);
+})
+
+(define_insn "*cmpsi_internal2"
+  [(set (match_operand:CCUNS 0 "cc_reg_operand" "=y")
+	(compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "reg_or_u_short_operand" "rK")))]
+  ""
+  "cmplw%I2 %0,%1,%b2"
+  [(set_attr "type" "cmp")])
+
+(define_insn "*cmpdi_internal2"
+  [(set (match_operand:CCUNS 0 "cc_reg_operand" "=y")
+	(compare:CCUNS (match_operand:DI 1 "gpc_reg_operand" "r")
+		       (match_operand:DI 2 "reg_or_u_short_operand" "rK")))]
+  ""
+  "cmpld%I2 %0,%1,%b2"
+  [(set_attr "type" "cmp")])
+
+;; The following two insns don't exist as single insns, but if we provide
+;; them, we can swap an add and compare, which will enable us to overlap more
+;; of the required delay between a compare and branch.  We generate code for
+;; them by splitting.
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=y")
+	(compare:CC (match_operand:SI 1 "gpc_reg_operand" "r")
+		    (match_operand:SI 2 "short_cint_operand" "i")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "i")))]
+  ""
+  "#"
+  [(set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CCUNS 3 "cc_reg_operand" "=y")
+	(compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "u_short_cint_operand" "i")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "i")))]
+  ""
+  "#"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_operand" "")
+	(compare:CC (match_operand:SI 1 "gpc_reg_operand" "")
+		    (match_operand:SI 2 "short_cint_operand" "")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "")))]
+  ""
+  [(set (match_dup 3) (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
+
+(define_split
+  [(set (match_operand:CCUNS 3 "cc_reg_operand" "")
+	(compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "")
+		       (match_operand:SI 2 "u_short_cint_operand" "")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "")))]
+  ""
+  [(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
+
+;; Only need to compare second words if first words equal
+(define_insn "*cmptf_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "d")
+		      (match_operand:TF 2 "gpc_reg_operand" "d")))]
+  "!TARGET_IEEEQUAD && !TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128"
+  "fcmpu %0,%1,%2\;bne %0,$+8\;fcmpu %0,%L1,%L2"
+  [(set_attr "type" "fpcompare")
+   (set_attr "length" "12")])
+
+(define_insn_and_split "*cmptf_internal2"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "d")
+		      (match_operand:TF 2 "gpc_reg_operand" "d")))
+    (clobber (match_scratch:DF 3 "=d"))
+    (clobber (match_scratch:DF 4 "=d"))
+    (clobber (match_scratch:DF 5 "=d"))
+    (clobber (match_scratch:DF 6 "=d"))
+    (clobber (match_scratch:DF 7 "=d"))
+    (clobber (match_scratch:DF 8 "=d"))
+    (clobber (match_scratch:DF 9 "=d"))
+    (clobber (match_scratch:DF 10 "=d"))
+    (clobber (match_scratch:GPR 11 "=b"))]
+  "!TARGET_IEEEQUAD && TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 14))
+   (set (match_dup 4) (match_dup 15))
+   (set (match_dup 9) (abs:DF (match_dup 5)))
+   (set (match_dup 0) (compare:CCFP (match_dup 9) (match_dup 3)))
+   (set (pc) (if_then_else (ne (match_dup 0) (const_int 0))
+			   (label_ref (match_dup 12))
+			   (pc)))
+   (set (match_dup 0) (compare:CCFP (match_dup 5) (match_dup 7)))
+   (set (pc) (label_ref (match_dup 13)))
+   (match_dup 12)
+   (set (match_dup 10) (minus:DF (match_dup 5) (match_dup 7)))
+   (set (match_dup 9) (minus:DF (match_dup 6) (match_dup 8)))
+   (set (match_dup 9) (plus:DF (match_dup 10) (match_dup 9)))
+   (set (match_dup 0) (compare:CCFP (match_dup 9) (match_dup 4)))
+   (match_dup 13)]
+{
+  REAL_VALUE_TYPE rv;
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+
+  operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word);
+  operands[7] = simplify_gen_subreg (DFmode, operands[2], TFmode, hi_word);
+  operands[8] = simplify_gen_subreg (DFmode, operands[2], TFmode, lo_word);
+  operands[12] = gen_label_rtx ();
+  operands[13] = gen_label_rtx ();
+  real_inf (&rv);
+  operands[14] = force_const_mem (DFmode,
+				  CONST_DOUBLE_FROM_REAL_VALUE (rv, DFmode));
+  operands[15] = force_const_mem (DFmode,
+				  CONST_DOUBLE_FROM_REAL_VALUE (dconst0,
+								DFmode));
+  if (TARGET_TOC)
+    {
+      rtx tocref;
+      tocref = create_TOC_reference (XEXP (operands[14], 0), operands[11]);
+      operands[14] = gen_const_mem (DFmode, tocref);
+      tocref = create_TOC_reference (XEXP (operands[15], 0), operands[11]);
+      operands[15] = gen_const_mem (DFmode, tocref);
+      set_mem_alias_set (operands[14], get_TOC_alias_set ());
+      set_mem_alias_set (operands[15], get_TOC_alias_set ());
+    }
+})
+
+;; Now we have the scc insns.  We can do some combinations because of the
+;; way the machine works.
+;;
+;; Note that this is probably faster if we can put an insn between the
+;; mfcr and rlinm, but this is tricky.  Let's leave it for now.  In most
+;; cases the insns below which don't use an intermediate CR field will
+;; be used instead.
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  ""
+  "mfcr %0%Q2\;rlwinm %0,%0,%J1,1"
+  [(set (attr "type")
+     (cond [(match_test "TARGET_MFCRF")
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "8")])
+
+;; Same as above, but get the GT bit.
+(define_insn "move_from_CR_gt_bit"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unspec:SI [(match_operand 1 "cc_reg_operand" "y")] UNSPEC_MV_CR_GT))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "mfcr %0\;rlwinm %0,%0,%D1,31,31"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "8")])
+
+;; Same as above, but get the OV/ORDERED bit.
+(define_insn "move_from_CR_ov_bit"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unspec:SI [(match_operand:CC 1 "cc_reg_operand" "y")]
+		   UNSPEC_MV_CR_OV))]
+  "TARGET_ISEL"
+  "mfcr %0\;rlwinm %0,%0,%t1,1"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  "TARGET_POWERPC64"
+  "mfcr %0%Q2\;rlwinm %0,%0,%J1,1"
+  [(set (attr "type")
+     (cond [(match_test "TARGET_MFCRF")
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "y,y")
+					(const_int 0)])
+		    (const_int 0)))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+  "TARGET_32BIT"
+  "@
+   mfcr %3%Q2\;rlwinm. %3,%3,%J1,1
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "8,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "")
+					(const_int 0)])
+		    (const_int 0)))
+   (set (match_operand:SI 3 "gpc_reg_operand" "")
+	(match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(match_op_dup 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ashift:SI (match_operator:SI 1 "scc_comparison_operator"
+				      [(match_operand 2 "cc_reg_operand" "y")
+				       (const_int 0)])
+		   (match_operand:SI 3 "const_int_operand" "n")))]
+  ""
+  "*
+{
+  int is_bit = ccr_bit (operands[1], 1);
+  int put_bit = 31 - (INTVAL (operands[3]) & 31);
+  int count;
+
+  if (is_bit >= put_bit)
+    count = is_bit - put_bit;
+  else
+    count = 32 - (put_bit - is_bit);
+
+  operands[4] = GEN_INT (count);
+  operands[5] = GEN_INT (put_bit);
+
+  return \"mfcr %0%Q2\;rlwinm %0,%0,%4,%5,%5\";
+}"
+  [(set (attr "type")
+     (cond [(match_test "TARGET_MFCRF")
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ashift:SI (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "y,y")
+					(const_int 0)])
+		    (match_operand:SI 3 "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 4 "gpc_reg_operand" "=r,r")
+	(ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)])
+		   (match_dup 3)))]
+  ""
+  "*
+{
+  int is_bit = ccr_bit (operands[1], 1);
+  int put_bit = 31 - (INTVAL (operands[3]) & 31);
+  int count;
+
+  /* Force split for non-cc0 compare.  */
+  if (which_alternative == 1)
+     return \"#\";
+
+  if (is_bit >= put_bit)
+    count = is_bit - put_bit;
+  else
+    count = 32 - (put_bit - is_bit);
+
+  operands[5] = GEN_INT (count);
+  operands[6] = GEN_INT (put_bit);
+
+  return \"mfcr %4%Q2\;rlwinm. %4,%4,%5,%6,%6\";
+}"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "8,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (ashift:SI (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "")
+					(const_int 0)])
+		    (match_operand:SI 3 "const_int_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 4 "gpc_reg_operand" "")
+	(ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)])
+		   (match_dup 3)))]
+  "reload_completed"
+  [(set (match_dup 4)
+	(ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)])
+		   (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+;; There is a 3 cycle delay between consecutive mfcr instructions
+;; so it is useful to combine 2 scc instructions to use only one mfcr.
+
+(define_peephole
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=r")
+	(match_operator:SI 4 "scc_comparison_operator"
+			   [(match_operand 5 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  "REGNO (operands[2]) != REGNO (operands[5])"
+  "mfcr %3\;rlwinm %0,%3,%J1,1\;rlwinm %3,%3,%J4,1"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "12")])
+
+(define_peephole
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))
+   (set (match_operand:DI 3 "gpc_reg_operand" "=r")
+	(match_operator:DI 4 "scc_comparison_operator"
+			   [(match_operand 5 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  "TARGET_POWERPC64 && REGNO (operands[2]) != REGNO (operands[5])"
+  "mfcr %3\;rlwinm %0,%3,%J1,1\;rlwinm %3,%3,%J4,1"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "12")])
+
+;; There are some scc insns that can be done directly, without a compare.
+;; These are faster because they don't involve the communications between
+;; the FXU and branch units.   In fact, we will be replacing all of the
+;; integer scc insns here or in the portable methods in emit_store_flag.
+;;
+;; Also support (neg (scc ..)) since that construct is used to replace
+;; branches, (plus (scc ..) ..) since that construct is common and
+;; takes no more insns than scc, and (and (neg (scc ..)) ..) in the
+;; cases where it is no more expensive than (neg (scc ..)).
+
+;; Have reload force a constant into a register for the simple insns that
+;; otherwise won't accept constants.  We do this because it is faster than
+;; the cmp/mfcr sequence we would otherwise generate.
+
+(define_mode_attr scc_eq_op2 [(SI "rKLI")
+			      (DI "rKJI")])
+
+(define_insn_and_split "*eq<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(eq:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		(match_operand:GPR 2 "scc_eq_operand" "<scc_eq_op2>")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0)
+	(clz:GPR (match_dup 3)))
+   (set (match_dup 0)
+	(lshiftrt:GPR (match_dup 0) (match_dup 4)))]
+  {
+    if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 0)
+      {
+	/* Use output operand as intermediate.  */
+	operands[3] = operands[0];
+
+	if (logical_operand (operands[2], <MODE>mode))
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_XOR (<MODE>mode,
+					       operands[1], operands[2])));
+	else
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_PLUS (<MODE>mode, operands[1],
+						negate_rtx (<MODE>mode,
+							    operands[2]))));
+      }
+    else
+      operands[3] = operands[1];
+
+    operands[4] = GEN_INT (exact_log2 (GET_MODE_BITSIZE (<MODE>mode)));
+  })
+
+(define_insn_and_split "*eq<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=y")
+	(compare:CC
+	 (eq:P (match_operand:P 1 "gpc_reg_operand" "=r")
+	       (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(eq:P (match_dup 1) (match_dup 2)))]
+  "optimize_size"
+  "#"
+  "optimize_size"
+  [(set (match_dup 0)
+	(clz:P (match_dup 4)))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (lshiftrt:P (match_dup 0) (match_dup 5))
+			       (const_int 0)))
+	      (set (match_dup 0)
+		   (lshiftrt:P (match_dup 0) (match_dup 5)))])]
+  {
+    if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 0)
+      {
+	/* Use output operand as intermediate.  */
+	operands[4] = operands[0];
+
+	if (logical_operand (operands[2], <MODE>mode))
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+				  gen_rtx_XOR (<MODE>mode,
+					       operands[1], operands[2])));
+	else
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+				  gen_rtx_PLUS (<MODE>mode, operands[1],
+						negate_rtx (<MODE>mode,
+							    operands[2]))));
+      }
+    else
+      operands[4] = operands[1];
+
+    operands[5] = GEN_INT (exact_log2 (GET_MODE_BITSIZE (<MODE>mode)));
+  })
+
+;; We have insns of the form shown by the first define_insn below.  If
+;; there is something inside the comparison operation, we must split it.
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_operator 1 "comparison_operator"
+				 [(match_operand:SI 2 "" "")
+				  (match_operand:SI 3
+						    "reg_or_cint_operand" "")])
+		 (match_operand:SI 4 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  "! gpc_reg_operand (operands[2], SImode)"
+  [(set (match_dup 5) (match_dup 2))
+   (set (match_dup 0) (plus:SI (match_op_dup 1 [(match_dup 5) (match_dup 3)])
+			       (match_dup 4)))])
+
+(define_insn "*plus_eqsi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r,&r")
+	(plus:SI (eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r")
+			(match_operand:SI 2 "scc_eq_operand" "r,O,K,L,I"))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r,r")))]
+  "TARGET_32BIT"
+  "@
+   xor %0,%1,%2\;subfic %0,%0,0\;addze %0,%3
+   subfic %0,%1,0\;addze %0,%3
+   xori %0,%1,%b2\;subfic %0,%0,0\;addze %0,%3
+   xoris %0,%1,%u2\;subfic %0,%0,0\;addze %0,%3
+   subfic %0,%1,%2\;subfic %0,%0,0\;addze %0,%3"
+  [(set_attr "type" "three,two,three,three,three")
+   (set_attr "length" "12,8,12,12,12")])
+
+(define_insn "*compare_plus_eqsi"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,x,?y,?y,?y,?y,?y")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r")
+		 (match_operand:SI 2 "scc_eq_operand" "r,O,K,L,I,r,O,K,L,I"))
+	  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r,r,r,r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r,&r,&r,&r,&r,&r,&r"))]
+  "TARGET_32BIT && optimize_size"
+  "@
+   xor %4,%1,%2\;subfic %4,%4,0\;addze. %4,%3
+   subfic %4,%1,0\;addze. %4,%3
+   xori %4,%1,%b2\;subfic %4,%4,0\;addze. %4,%3
+   xoris %4,%1,%u2\;subfic %4,%4,0\;addze. %4,%3
+   subfic %4,%1,%2\;subfic %4,%4,0\;addze. %4,%3
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,8,12,12,12,16,12,16,16,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		 (match_operand:SI 2 "scc_eq_operand" ""))
+	  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && optimize_size && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (eq:SI (match_dup 1)
+		 (match_dup 2))
+	  (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_eqsi_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,x,x,x,?y,?y,?y,?y,?y")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r")
+		 (match_operand:SI 2 "scc_eq_operand" "r,O,K,L,I,r,O,K,L,I"))
+	  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r,r,r,r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r,&r,&r,&r,&r,&r,&r")
+	(plus:SI (eq:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && optimize_size"
+  "@
+   xor %0,%1,%2\;subfic %0,%0,0\;addze. %0,%3
+   subfic %0,%1,0\;addze. %0,%3
+   xori %0,%1,%b2\;subfic %0,%0,0\;addze. %0,%3
+   xoris %0,%1,%u2\;subfic %0,%0,0\;addze. %0,%3
+   subfic %0,%1,%2\;subfic %0,%0,0\;addze. %0,%3
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,8,12,12,12,16,12,16,16,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		 (match_operand:SI 2 "scc_eq_operand" ""))
+	  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (eq:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && optimize_size && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (eq:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*neg_eq0<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (eq:P (match_operand:P 1 "gpc_reg_operand" "r")
+		     (const_int 0))))]
+  ""
+  "addic %0,%1,-1\;subfe %0,%0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*neg_eq<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (eq:P (match_operand:P 1 "gpc_reg_operand" "%r")
+		     (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (eq:P (match_dup 3) (const_int 0))))]
+  {
+    if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 0)
+      {
+	/* Use output operand as intermediate.  */
+	operands[3] = operands[0];
+
+	if (logical_operand (operands[2], <MODE>mode))
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_XOR (<MODE>mode,
+					       operands[1], operands[2])));
+	else
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_PLUS (<MODE>mode, operands[1],
+						negate_rtx (<MODE>mode,
+							    operands[2]))));
+      }
+    else
+      operands[3] = operands[1];
+  })
+
+(define_insn "*ne0_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(ne:P (match_operand:P 1 "gpc_reg_operand" "r")
+	      (const_int 0)))
+   (clobber (match_scratch:P 2 "=&r"))]
+  "!(TARGET_32BIT && TARGET_ISEL)"
+  "addic %2,%1,-1\;subfe %0,%2,%1"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*plus_ne0_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(plus:P (ne:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (const_int 0))
+		(match_operand:P 2 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:P 3 "=&r"))]
+  ""
+  "addic %3,%1,-1\;addze %0,%2"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*compare_plus_ne0_<mode>"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (plus:P (ne:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+				  (const_int 0))
+			    (match_operand:P 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=&r,&r"))
+   (clobber (match_scratch:P 4 "=X,&r"))]
+  ""
+  "@
+   addic %3,%1,-1\;addze. %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (ne:P (match_operand:SI 1 "gpc_reg_operand" "")
+			  (const_int 0))
+		    (neg:P (match_operand:P 2 "gpc_reg_operand" ""))))
+   (clobber (match_scratch:P 3 ""))
+   (clobber (match_scratch:P 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (plus:P (ne:P (match_dup 1)
+				 (const_int 0))
+			   (match_dup 2)))
+              (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+; For combine.
+(define_insn "*compare_plus_ne0_<mode>_1"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
+	(compare:CCEQ (ne:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			    (const_int 0))
+		      (neg:P (match_operand:P 2 "gpc_reg_operand" "r,r"))))
+   (clobber (match_scratch:P 3 "=&r,&r"))
+   (clobber (match_scratch:P 4 "=X,&r"))]
+  ""
+  "@
+   addic %3,%1,-1\;addze. %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CCEQ 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CCEQ (ne:P (match_operand:SI 1 "gpc_reg_operand" "")
+			    (const_int 0))
+		      (neg:P (match_operand:P 2 "gpc_reg_operand" ""))))
+   (clobber (match_scratch:P 3 ""))
+   (clobber (match_scratch:P 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (plus:P (ne:P (match_dup 1)
+				 (const_int 0))
+			   (match_dup 2)))
+              (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_ne0_<mode>_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:P (ne:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		       (const_int 0))
+		 (match_operand:P 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(plus:P (ne:P (match_dup 1)
+		      (const_int 0))
+		(match_dup 2)))
+   (clobber (match_scratch:P 3 "=&r,&r"))]
+  ""
+  "@
+   addic %3,%1,-1\;addze. %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:P (ne:P (match_operand:P 1 "gpc_reg_operand" "")
+		       (const_int 0))
+		 (match_operand:P 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(plus:P (ne:P (match_dup 1)
+		      (const_int 0))
+		(match_dup 2)))
+   (clobber (match_scratch:P 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (plus:P (ne:P (match_dup 1)
+				 (const_int 0))
+			   (match_dup 2)))
+	      (clobber (match_dup 3))])
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+	       (match_operand:P 2 "reg_or_short_operand" "rI")))]
+  ""
+  "subf%I2c %0,%1,%2\;li %0,0\;adde %0,%0,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "*leu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (leu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		(match_operand:P 2 "reg_or_short_operand" "rI,rI"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(leu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   subf%I2c %0,%1,%2\;li %0,0\;adde. %0,%0,%0
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (leu:P (match_operand:P 1 "gpc_reg_operand" "")
+		(match_operand:P 2 "reg_or_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(leu:P (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(leu:P (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+	(plus:P (leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_short_operand" "rI"))
+		(match_operand:P 3 "gpc_reg_operand" "r")))]
+  ""
+  "subf%I2c %0,%1,%2\;addze %0,%3"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   subf%I2c %4,%1,%2\;addze. %4,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   subf%I2c %0,%1,%2\;addze. %0,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*neg_leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (match_operand:P 2 "reg_or_short_operand" "rI"))))]
+  ""
+  "subf%I2c %0,%1,%2\;subfe %0,%0,%0\;nand %0,%0,%0"
+   [(set_attr "type" "three")
+    (set_attr "length" "12")])
+
+(define_insn "*and_neg_leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+	(and:P (neg:P
+		 (leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+			(match_operand:P 2 "reg_or_short_operand" "rI")))
+		(match_operand:P 3 "gpc_reg_operand" "r")))]
+  ""
+  "subf%I2c %0,%1,%2\;subfe %0,%0,%0\;andc %0,%3,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   subf%I2c %4,%1,%2\;subfe %4,%4,%4\;andc. %4,%3,%4
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2)))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   subf%I2c %0,%1,%2\;subfe %0,%0,%0\;andc. %0,%3,%0
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2)))
+		(match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn_and_split "*ltu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+	       (match_operand:P 2 "reg_or_neg_short_operand" "r,P")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:P (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*ltu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+		(match_operand:P 2 "reg_or_neg_short_operand" "r,P,r,P"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r,r")
+	(ltu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (neg:P (match_dup 0)) (const_int 0)))
+	      (set (match_dup 0) (neg:P (match_dup 0)))])]
+  "")
+
+(define_insn_and_split "*plus_ltu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r,r")
+	(plus:P (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		       (match_operand:P 2 "reg_or_neg_short_operand" "r,P"))
+		(match_operand:P 3 "reg_or_short_operand" "rI,rI")))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*plus_ltu<mode>_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:P (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+			(match_operand:P 2 "reg_or_neg_short_operand" "r,P,r,P"))
+		 (match_operand:P 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:P (ltu:P (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 4)
+		   (compare:CC (minus:P (match_dup 3) (match_dup 0))
+			       (const_int 0)))
+	      (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))])]
+  "")
+
+(define_insn "*neg_ltu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(neg:P (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		      (match_operand:P 2 "reg_or_neg_short_operand" "r,P"))))]
+  ""
+  "@
+   subfc %0,%2,%1\;subfe %0,%0,%0
+   addic %0,%1,%n2\;subfe %0,%0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+	       (match_operand:P 2 "reg_or_neg_short_operand" "r,P")))]
+  ""
+  "@
+   subfc %0,%2,%1\;li %0,0\;adde %0,%0,%0
+   addic %0,%1,%n2\;li %0,0\;adde %0,%0,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "*geu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+		(match_operand:P 2 "reg_or_neg_short_operand" "r,P,r,P"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r,r")
+	(geu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   subfc %0,%2,%1\;li %0,0\;adde. %0,%0,%0
+   addic %0,%1,%n2\;li %0,0\;adde. %0,%0,%0
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (geu:P (match_operand:P 1 "gpc_reg_operand" "")
+		(match_operand:P 2 "reg_or_neg_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(geu:P (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(geu:P (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r,&r")
+	(plus:P (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		       (match_operand:P 2 "reg_or_neg_short_operand" "r,P"))
+		(match_operand:P 3 "gpc_reg_operand" "r,r")))]
+  ""
+  "@
+   subfc %0,%2,%1\;addze %0,%3
+   addic %0,%1,%n2\;addze %0,%3"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   subfc %4,%2,%1\;addze. %4,%3
+   addic %4,%1,%n2\;addze. %4,%3
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,8,12,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   subfc %0,%2,%1\;addze. %0,%3
+   addic %0,%1,%n2\;addze. %0,%3
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,8,12,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*neg_geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(neg:P (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		      (match_operand:P 2 "reg_or_short_operand" "r,I"))))]
+  ""
+  "@
+   subfc %0,%2,%1\;subfe %0,%0,%0\;nand %0,%0,%0
+   subfic %0,%1,-1\;add%I2c %0,%0,%2\;subfe %0,%0,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "*and_neg_geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r,&r")
+	(and:P (neg:P
+		 (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			(match_operand:P 2 "reg_or_neg_short_operand" "r,P")))
+		(match_operand:P 3 "gpc_reg_operand" "r,r")))]
+  ""
+  "@
+   subfc %0,%2,%1\;subfe %0,%0,%0\;andc %0,%3,%0
+   addic %0,%1,%n2\;subfe %0,%0,%0\;andc %0,%3,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   subfc %4,%2,%1\;subfe %4,%4,%4\;andc. %4,%3,%4
+   addic %4,%1,%n2\;subfe %4,%4,%4\;andc. %4,%3,%4
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2)))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   subfc %0,%2,%1\;subfe %0,%0,%0\;andc. %0,%3,%0
+   addic %0,%1,%n2\;subfe %0,%0,%0\;andc. %0,%3,%0
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_gt0<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+	(plus:P (gt:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (const_int 0))
+		 (match_operand:P 2 "gpc_reg_operand" "r")))]
+  ""
+  "addc %0,%1,%1\;subfe %0,%1,%0\;addze %0,%2"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   addc %3,%1,%1\;subfe %3,%1,%3\;addze. %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(plus:SI (gt:SI (match_dup 1) (const_int 0))
+		  (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=&r,&r"))]
+  "TARGET_64BIT"
+  "@
+   addc %3,%1,%1\;subfe %3,%1,%3\;addze. %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3)
+	(plus:DI (gt:DI (match_dup 1) (const_int 0))
+		 (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (gt:SI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   addc %0,%1,%1\;subfe %0,%1,%0\;addze. %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (gt:SI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (gt:SI (match_dup 1) (const_int 0)) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:DI (gt:DI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   addc %0,%1,%1\;subfe %0,%1,%0\;addze. %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(plus:DI (gt:DI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (gt:DI (match_dup 1) (const_int 0)) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn_and_split "*gtu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(gtu:P (match_operand:P 1 "gpc_reg_operand" "r")
+	       (match_operand:P 2 "reg_or_short_operand" "rI")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:P (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*gtu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (gtu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		 (match_operand:P 2 "reg_or_short_operand" "rI,rI"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(gtu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (neg:P (match_dup 0)) (const_int 0)))
+	      (set (match_dup 0) (neg:P (match_dup 0)))])]
+  "")
+
+(define_insn_and_split "*plus_gtu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+        (plus:P (gtu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_short_operand" "rI"))
+		(match_operand:P 3 "reg_or_short_operand" "rI")))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*plus_gtu<mode>_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:P (gtu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+			(match_operand:P 2 "reg_or_short_operand" "I,r,I,r"))
+		 (match_operand:P 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:P (gtu:P (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 4)
+		   (compare:CC (minus:P (match_dup 3) (match_dup 0))
+			       (const_int 0)))
+	      (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))])]
+  "")
+
+(define_insn "*neg_gtu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (gtu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (match_operand:P 2 "reg_or_short_operand" "rI"))))]
+  ""
+  "subf%I2c %0,%1,%2\;subfe %0,%0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+
+;; Define both directions of branch and return.  If we need a reload
+;; register, we'd rather use CR0 since it is much easier to copy a
+;; register CC value to there.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  return output_cbranch (operands[1], \"%l0\", 0, insn);
+}"
+  [(set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (any_return)
+		      (pc)))]
+  "<return_pred>"
+  "*
+{
+  return output_cbranch (operands[0], NULL, 0, insn);
+}"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  return output_cbranch (operands[1], \"%l0\", 1, insn);
+}"
+  [(set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (pc)
+		      (any_return)))]
+  "<return_pred>"
+  "*
+{
+  return output_cbranch (operands[0], NULL, 1, insn);
+}"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "4")])
+
+;; Logic on condition register values.
+
+; This pattern matches things like
+; (set (reg:CCEQ 68) (compare:CCEQ (ior:SI (gt:SI (reg:CCFP 68) (const_int 0))
+;					   (eq:SI (reg:CCFP 68) (const_int 0)))
+;				   (const_int 1)))
+; which are generated by the branch logic.
+; Prefer destructive operations where BT = BB (for crXX BT,BA,BB)
+
+(define_insn "*cceq_ior_compare"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y")
+        (compare:CCEQ (match_operator:SI 1 "boolean_operator"
+	                [(match_operator:SI 2
+				      "branch_positive_comparison_operator"
+				      [(match_operand 3
+						      "cc_reg_operand" "y,y")
+				       (const_int 0)])
+	                 (match_operator:SI 4
+				      "branch_positive_comparison_operator"
+				      [(match_operand 5
+						      "cc_reg_operand" "0,y")
+				       (const_int 0)])])
+		      (const_int 1)))]
+  ""
+  "cr%q1 %E0,%j2,%j4"
+  [(set_attr "type" "cr_logical,delayed_cr")])
+
+; Why is the constant -1 here, but 1 in the previous pattern?
+; Because ~1 has all but the low bit set.
+(define_insn ""
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y")
+        (compare:CCEQ (match_operator:SI 1 "boolean_or_operator"
+	                [(not:SI (match_operator:SI 2
+				      "branch_positive_comparison_operator"
+				      [(match_operand 3
+						      "cc_reg_operand" "y,y")
+				       (const_int 0)]))
+	                 (match_operator:SI 4
+				"branch_positive_comparison_operator"
+				[(match_operand 5
+						"cc_reg_operand" "0,y")
+				 (const_int 0)])])
+		      (const_int -1)))]
+  ""
+  "cr%q1 %E0,%j2,%j4"
+  [(set_attr "type" "cr_logical,delayed_cr")])
+
+(define_insn "*cceq_rev_compare"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y")
+	(compare:CCEQ (match_operator:SI 1
+				      "branch_positive_comparison_operator"
+				      [(match_operand 2
+						      "cc_reg_operand" "0,y")
+				       (const_int 0)])
+		      (const_int 0)))]
+  ""
+  "crnot %E0,%j1"
+  [(set_attr "type" "cr_logical,delayed_cr")])
+
+;; If we are comparing the result of two comparisons, this can be done
+;; using creqv or crxor.
+
+(define_insn_and_split ""
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y")
+	(compare:CCEQ (match_operator 1 "branch_comparison_operator"
+			      [(match_operand 2 "cc_reg_operand" "y")
+			       (const_int 0)])
+		      (match_operator 3 "branch_comparison_operator"
+			      [(match_operand 4 "cc_reg_operand" "y")
+			       (const_int 0)])))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (compare:CCEQ (xor:SI (match_dup 1) (match_dup 3))
+				    (match_dup 5)))]
+  "
+{
+  int positive_1, positive_2;
+
+  positive_1 = branch_positive_comparison_operator (operands[1],
+						    GET_MODE (operands[1]));
+  positive_2 = branch_positive_comparison_operator (operands[3],
+						    GET_MODE (operands[3]));
+
+  if (! positive_1)
+    operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[2]),
+							    GET_CODE (operands[1])),
+				  SImode,
+				  operands[2], const0_rtx);
+  else if (GET_MODE (operands[1]) != SImode)
+    operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode,
+				  operands[2], const0_rtx);
+
+  if (! positive_2)
+    operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[4]),
+							    GET_CODE (operands[3])),
+				  SImode,
+				  operands[4], const0_rtx);
+  else if (GET_MODE (operands[3]) != SImode)
+    operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				  operands[4], const0_rtx);
+
+  if (positive_1 == positive_2)
+    {
+      operands[1] = gen_rtx_NOT (SImode, operands[1]);
+      operands[5] = constm1_rtx;
+    }
+  else
+    {
+      operands[5] = const1_rtx;
+    }
+}")
+
+;; Unconditional branch and return.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "b %l0"
+  [(set_attr "type" "branch")])
+
+(define_insn "<return_str>return"
+  [(any_return)]
+  "<return_pred>"
+  "blr"
+  [(set_attr "type" "jmpreg")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" ""))])
+
+(define_insn "*indirect_jump<mode>"
+  [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))]
+  ""
+  "@
+   bctr
+   blr"
+  [(set_attr "type" "jmpreg")])
+
+;; Table jump for switch statements:
+(define_expand "tablejump"
+  [(use (match_operand 0 "" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "
+{
+  if (TARGET_32BIT)
+    emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
+  else
+    emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_expand "tablejumpsi"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 0 "" "")
+		 (match_dup 2)))
+   (parallel [(set (pc) (match_dup 3))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "TARGET_32BIT"
+  "
+{ operands[0] = force_reg (SImode, operands[0]);
+  operands[2] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1]));
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_expand "tablejumpdi"
+  [(set (match_dup 4)
+        (sign_extend:DI (match_operand:SI 0 "lwa_operand" "")))
+   (set (match_dup 3)
+	(plus:DI (match_dup 4)
+		 (match_dup 2)))
+   (parallel [(set (pc) (match_dup 3))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "TARGET_64BIT"
+  "
+{ operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1]));
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "*tablejump<mode>_internal1"
+  [(set (pc)
+	(match_operand:P 0 "register_operand" "c,*l"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "@
+   bctr
+   blr"
+  [(set_attr "type" "jmpreg")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+(define_insn "group_ending_nop"
+  [(unspec [(const_int 0)] UNSPEC_GRP_END_NOP)]
+  ""
+  "*
+{
+  if (rs6000_cpu_attr == CPU_POWER6)
+    return \"ori 1,1,0\";
+  return \"ori 2,2,0\";
+}")
+
+;; Define the subtract-one-and-jump insns, starting with the template
+;; so loop.c knows what to generate.
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))]	; label
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      if (GET_MODE (operands[0]) != DImode)
+	FAIL;
+      emit_jump_insn (gen_ctrdi (operands[0], operands[1]));
+    }
+  else
+    {
+      if (GET_MODE (operands[0]) != SImode)
+	FAIL;
+      emit_jump_insn (gen_ctrsi (operands[0], operands[1]));
+    }
+  DONE;
+}")
+
+(define_expand "ctr<mode>"
+  [(parallel [(set (pc)
+		   (if_then_else (ne (match_operand:P 0 "register_operand" "")
+				     (const_int 1))
+				 (label_ref (match_operand 1 "" ""))
+				 (pc)))
+	      (set (match_dup 0)
+		   (plus:P (match_dup 0)
+			    (const_int -1)))
+	      (clobber (match_scratch:CC 2 ""))
+	      (clobber (match_scratch:P 3 ""))])]
+  ""
+  "")
+
+;; We need to be able to do this for any operand, including MEM, or we
+;; will cause reload to blow up since we don't allow output reloads on
+;; JUMP_INSNs.
+;; For the length attribute to be calculated correctly, the
+;; label MUST be operand 0.
+
+(define_insn "*ctr<mode>_internal1"
+  [(set (pc)
+	(if_then_else (ne (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"bdnz %l0\";
+  else
+    return \"bdz $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+(define_insn "*ctr<mode>_internal2"
+  [(set (pc)
+	(if_then_else (ne (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"bdz %l0\";
+  else
+    return \"bdnz $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+;; Similar but use EQ
+
+(define_insn "*ctr<mode>_internal5"
+  [(set (pc)
+	(if_then_else (eq (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"bdz %l0\";
+  else
+    return \"bdnz $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+(define_insn "*ctr<mode>_internal6"
+  [(set (pc)
+	(if_then_else (eq (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"bdnz %l0\";
+  else
+    return \"bdz $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+;; Now the splitters if we could not allocate the CTR register
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 2 "comparison_operator"
+				      [(match_operand:P 1 "gpc_reg_operand" "")
+				       (const_int 1)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(plus:P (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:CC 3 ""))
+   (clobber (match_scratch:P 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (compare:CC (plus:P (match_dup 1)
+					(const_int -1))
+			       (const_int 0)))
+	      (set (match_dup 0)
+		   (plus:P (match_dup 1)
+			    (const_int -1)))])
+   (set (pc) (if_then_else (match_dup 7)
+			   (match_dup 5)
+			   (match_dup 6)))]
+  "
+{ operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[2]), VOIDmode,
+				operands[3], const0_rtx); }")
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 2 "comparison_operator"
+				      [(match_operand:P 1 "gpc_reg_operand" "")
+				       (const_int 1)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))
+   (set (match_operand:P 0 "nonimmediate_operand" "")
+	(plus:P (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:CC 3 ""))
+   (clobber (match_scratch:P 4 ""))]
+  "reload_completed && ! gpc_reg_operand (operands[0], SImode)"
+  [(parallel [(set (match_dup 3)
+		   (compare:CC (plus:P (match_dup 1)
+					(const_int -1))
+			       (const_int 0)))
+	      (set (match_dup 4)
+		   (plus:P (match_dup 1)
+			    (const_int -1)))])
+   (set (match_dup 0)
+	(match_dup 4))
+   (set (pc) (if_then_else (match_dup 7)
+			   (match_dup 5)
+			   (match_dup 6)))]
+  "
+{ operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[2]), VOIDmode,
+				operands[3], const0_rtx); }")
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "trap"
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(match_operand:GPR 1 "register_operand")
+			     (match_operand:GPR 2 "reg_or_short_operand")])
+	    (match_operand 3 "zero_constant" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+                            [(match_operand:GPR 1 "register_operand" "r")
+                             (match_operand:GPR 2 "reg_or_short_operand" "rI")])
+	    (const_int 0))]
+  ""
+  "t<wd>%V0%I2 %1,%2"
+  [(set_attr "type" "trap")])
+
+;; Insns related to generating the function prologue and epilogue.
+
+(define_expand "prologue"
+  [(use (const_int 0))]
+  ""
+{
+  rs6000_emit_prologue ();
+  if (!TARGET_SCHED_PROLOG)
+    emit_insn (gen_blockage ());
+  DONE;
+})
+
+(define_insn "*movesi_from_cr_one"
+  [(match_parallel 0 "mfcr_operation"
+		   [(set (match_operand:SI 1 "gpc_reg_operand" "=r")
+			 (unspec:SI [(match_operand:CC 2 "cc_reg_operand" "y")
+				     (match_operand 3 "immediate_operand" "n")]
+			  UNSPEC_MOVESI_FROM_CR))])]
+  "TARGET_MFCRF"
+  "*
+{
+  int mask = 0;
+  int i;
+  for (i = 0; i < XVECLEN (operands[0], 0); i++)
+  {
+    mask = INTVAL (XVECEXP (SET_SRC (XVECEXP (operands[0], 0, i)), 0, 1));
+    operands[4] = GEN_INT (mask);
+    output_asm_insn (\"mfcr %1,%4\", operands);
+  }
+  return \"\";
+}"
+  [(set_attr "type" "mfcrf")])
+
+(define_insn "movesi_from_cr"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (unspec:SI [(reg:CC CR0_REGNO) (reg:CC CR1_REGNO)
+		    (reg:CC CR2_REGNO) (reg:CC CR3_REGNO)
+		    (reg:CC CR4_REGNO) (reg:CC CR5_REGNO)
+		    (reg:CC CR6_REGNO) (reg:CC CR7_REGNO)]
+		   UNSPEC_MOVESI_FROM_CR))]
+  ""
+  "mfcr %0"
+  [(set_attr "type" "mfcr")])
+
+(define_insn "*crsave"
+  [(match_parallel 0 "crsave_operation"
+		   [(set (match_operand:SI 1 "memory_operand" "=m")
+			 (match_operand:SI 2 "gpc_reg_operand" "r"))])]
+  ""
+  "stw %2,%1"
+  [(set_attr "type" "store")])
+
+(define_insn "*stmw"
+  [(match_parallel 0 "stmw_operation"
+		   [(set (match_operand:SI 1 "memory_operand" "=m")
+       			 (match_operand:SI 2 "gpc_reg_operand" "r"))])]
+  "TARGET_MULTIPLE"
+  "stmw %2,%1"
+  [(set_attr "type" "store_ux")])
+
+; The following comment applies to:
+;     save_gpregs_*
+;     save_fpregs_*
+;     restore_gpregs*
+;     return_and_restore_gpregs*
+;     return_and_restore_fpregs*
+;     return_and_restore_fpregs_aix*
+;
+; The out-of-line save / restore functions expects one input argument.
+; Since those are not standard call_insn's, we must avoid using
+; MATCH_OPERAND for that argument. That way the register rename
+; optimization will not try to rename this register.
+; Each pattern is repeated for each possible register number used in 
+; various ABIs (r11, r1, and for some functions r12)
+
+(define_insn "*save_gpregs_<mode>_r11"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 11))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_gpregs_<mode>_r12"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 12))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_gpregs_<mode>_r1"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 1))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_fpregs_<mode>_r11"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 11))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "d"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_fpregs_<mode>_r12"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 12))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "d"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_fpregs_<mode>_r1"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 1))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "d"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+; This is to explain that changes to the stack pointer should
+; not be moved over loads from or stores to stack memory.
+(define_insn "stack_tie"
+  [(match_parallel 0 "tie_operand"
+		   [(set (mem:BLK (reg 1)) (const_int 0))])]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+(define_expand "epilogue"
+  [(use (const_int 0))]
+  ""
+{
+  if (!TARGET_SCHED_PROLOG)
+    emit_insn (gen_blockage ());
+  rs6000_emit_epilogue (FALSE);
+  DONE;
+})
+
+; On some processors, doing the mtcrf one CC register at a time is
+; faster (like on the 604e).  On others, doing them all at once is
+; faster; for instance, on the 601 and 750.
+
+(define_expand "movsi_to_cr_one"
+  [(set (match_operand:CC 0 "cc_reg_operand" "")
+        (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "")
+		    (match_dup 2)] UNSPEC_MOVESI_TO_CR))]
+  ""
+  "operands[2] = GEN_INT (1 << (75 - REGNO (operands[0])));")
+
+(define_insn "*movsi_to_cr"
+  [(match_parallel 0 "mtcrf_operation"
+		   [(set (match_operand:CC 1 "cc_reg_operand" "=y")
+			 (unspec:CC [(match_operand:SI 2 "gpc_reg_operand" "r")
+				     (match_operand 3 "immediate_operand" "n")]
+				    UNSPEC_MOVESI_TO_CR))])]
+ ""
+ "*
+{
+  int mask = 0;
+  int i;
+  for (i = 0; i < XVECLEN (operands[0], 0); i++)
+    mask |= INTVAL (XVECEXP (SET_SRC (XVECEXP (operands[0], 0, i)), 0, 1));
+  operands[4] = GEN_INT (mask);
+  return \"mtcrf %4,%2\";
+}"
+  [(set_attr "type" "mtcr")])
+
+(define_insn "*mtcrfsi"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r")
+		    (match_operand 2 "immediate_operand" "n")]
+		   UNSPEC_MOVESI_TO_CR))]
+  "GET_CODE (operands[0]) == REG
+   && CR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[2]) == CONST_INT
+   && INTVAL (operands[2]) == 1 << (75 - REGNO (operands[0]))"
+  "mtcrf %R0,%1"
+  [(set_attr "type" "mtcr")])
+
+; The load-multiple instructions have similar properties.
+; Note that "load_multiple" is a name known to the machine-independent
+; code that actually corresponds to the PowerPC load-string.
+
+(define_insn "*lmw"
+  [(match_parallel 0 "lmw_operation"
+		   [(set (match_operand:SI 1 "gpc_reg_operand" "=r")
+       			 (match_operand:SI 2 "memory_operand" "m"))])]
+  "TARGET_MULTIPLE"
+  "lmw %1,%2"
+  [(set_attr "type" "load_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*return_internal_<mode>"
+  [(simple_return)
+   (use (match_operand:P 0 "register_operand" "lc"))]
+  ""
+  "b%T0"
+  [(set_attr "type" "jmpreg")])
+
+; FIXME: This would probably be somewhat simpler if the Cygnus sibcall
+; stuff was in GCC.  Oh, and "any_parallel_operand" is a bit flexible...
+
+; The following comment applies to:
+;     save_gpregs_*
+;     save_fpregs_*
+;     restore_gpregs*
+;     return_and_restore_gpregs*
+;     return_and_restore_fpregs*
+;     return_and_restore_fpregs_aix*
+;
+; The out-of-line save / restore functions expects one input argument.
+; Since those are not standard call_insn's, we must avoid using
+; MATCH_OPERAND for that argument. That way the register rename
+; optimization will not try to rename this register.
+; Each pattern is repeated for each possible register number used in 
+; various ABIs (r11, r1, and for some functions r12)
+
+(define_insn "*restore_gpregs_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(clobber (match_operand:P 1 "register_operand" "=l"))
+                   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 11))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "bl %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*restore_gpregs_<mode>_r12"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(clobber (match_operand:P 1 "register_operand" "=l"))
+                   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 12))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "bl %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*restore_gpregs_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(clobber (match_operand:P 1 "register_operand" "=l"))
+                   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 1))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "bl %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 11))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_<mode>_r12"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 12))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 1))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 11))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_<mode>_r12"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 12))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 1))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_aix_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(return)
+		   (use (match_operand:P 1 "register_operand" "l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+		   (use (reg:P 11))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_aix_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(return)
+		   (use (match_operand:P 1 "register_operand" "l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+		   (use (reg:P 1))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_eh_set_lr_si (operands[0]));
+  else
+    emit_insn (gen_eh_set_lr_di (operands[0]));
+  DONE;
+}")
+
+; We can't expand this before we know where the link register is stored.
+(define_insn "eh_set_lr_<mode>"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+  		    UNSPECV_EH_RR)
+   (clobber (match_scratch:P 1 "=&b"))]
+  ""
+  "#")
+
+(define_split
+  [(unspec_volatile [(match_operand 0 "register_operand" "")] UNSPECV_EH_RR)
+   (clobber (match_scratch 1 ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rs6000_emit_eh_reg_restore (operands[0], operands[1]);
+  DONE;
+}")
+
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "indexed_or_indirect_address" "a")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[0]) == REG)
+    return INTVAL (operands[1]) ? \"dcbtst 0,%0\" : \"dcbt 0,%0\";
+  return INTVAL (operands[1]) ? \"dcbtst %a0\" : \"dcbt %a0\";
+}"
+  [(set_attr "type" "load")])
+
+(define_insn "bpermd_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(unspec:P [(match_operand:P 1 "gpc_reg_operand" "r")
+		   (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
+  "TARGET_POPCNTD"
+  "bpermd %0,%1,%2"
+  [(set_attr "type" "popcnt")])
+
+
+;; Builtin fma support.  Handle 
+;; Note that the conditions for expansion are in the FMA_F iterator.
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(fma:FMA_F
+	  (match_operand:FMA_F 1 "register_operand" "")
+	  (match_operand:FMA_F 2 "register_operand" "")
+	  (match_operand:FMA_F 3 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*fma<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmadd<Ftrad> %0,%1,%2,%3
+   xsmadda<Fvsx> %x0,%x1,%x2
+   xsmaddm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
+; Altivec only has fma and nfms.
+(define_expand "fms<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(fma:FMA_F
+	  (match_operand:FMA_F 1 "register_operand" "")
+	  (match_operand:FMA_F 2 "register_operand" "")
+	  (neg:FMA_F (match_operand:FMA_F 3 "register_operand" ""))))]
+  "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+(define_insn "*fms<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(fma:SFDF
+	 (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	 (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	 (neg:SFDF (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmsub<Ftrad> %0,%1,%2,%3
+   xsmsuba<Fvsx> %x0,%x1,%x2
+   xsmsubm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
+;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
+(define_expand "fnma<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_expand "fnms<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (match_operand:FMA_F 3 "register_operand" ""))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && !VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+; Not an official optab name, but used from builtins.
+(define_expand "nfma<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (match_operand:FMA_F 3 "register_operand" ""))))]
+  "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+(define_insn "*nfma<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(neg:SFDF
+	 (fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnmadd<Ftrad> %0,%1,%2,%3
+   xsnmadda<Fvsx> %x0,%x1,%x2
+   xsnmaddm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
+; Not an official optab name, but used from builtins.
+(define_expand "nfms<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))]
+  ""
+  "")
+
+(define_insn "*nfmssf4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(neg:SFDF
+	 (fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (neg:SFDF
+	   (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnmsub<Ftrad> %0,%1,%2,%3
+   xsnmsuba<Fvsx> %x0,%x1,%x2
+   xsnmsubm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
+
+(define_expand "rs6000_get_timebase"
+  [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
+  ""
+{
+  if (TARGET_POWERPC64)
+    emit_insn (gen_rs6000_mftb_di (operands[0]));
+  else
+    emit_insn (gen_rs6000_get_timebase_ppc32 (operands[0]));
+  DONE;
+})
+
+(define_insn "rs6000_get_timebase_ppc32"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_MFTB))
+   (clobber (match_scratch:SI 1 "=r"))
+   (clobber (match_scratch:CC 2 "=y"))]
+  "!TARGET_POWERPC64"
+{
+  if (WORDS_BIG_ENDIAN)
+    if (TARGET_MFCRF)
+      {
+        return "mfspr %0,269\;"
+	       "mfspr %L0,268\;"
+	       "mfspr %1,269\;"
+	       "cmpw %2,%0,%1\;"
+	       "bne- %2,$-16";
+      }
+    else
+      {
+        return "mftbu %0\;"
+	       "mftb %L0\;"
+	       "mftbu %1\;"
+	       "cmpw %2,%0,%1\;"
+	       "bne- %2,$-16";
+      }
+  else
+    if (TARGET_MFCRF)
+      {
+        return "mfspr %L0,269\;"
+	       "mfspr %0,268\;"
+	       "mfspr %1,269\;"
+	       "cmpw %2,%L0,%1\;"
+	       "bne- %2,$-16";
+      }
+    else
+      {
+        return "mftbu %L0\;"
+	       "mftb %0\;"
+	       "mftbu %1\;"
+	       "cmpw %2,%L0,%1\;"
+	       "bne- %2,$-16";
+      }
+}
+  [(set_attr "length" "20")])
+
+(define_insn "rs6000_mftb_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+        (unspec_volatile:P [(const_int 0)] UNSPECV_MFTB))]
+  ""
+{
+  if (TARGET_MFCRF)
+    return "mfspr %0,268";
+  else
+    return "mftb %0";
+})
+
+
+(define_insn "rs6000_mffs"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "mffs %0")
+
+(define_insn "rs6000_mtfsf"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")
+		     (match_operand:DF 1 "gpc_reg_operand" "d")]
+		    UNSPECV_MTFSF)]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "mtfsf %0,%1")
+
+
+;; Power8 fusion support for fusing an addis instruction with a D-form load of
+;; a GPR.  The addis instruction must be adjacent to the load, and use the same
+;; register that is being loaded.  The fused ops must be physically adjacent.
+
+;; We use define_peephole for the actual addis/load, and the register used to
+;; hold the addis value must be the same as the register being loaded.  We use
+;; define_peephole2 to change the register used for addis to be the register
+;; being loaded, since we can look at whether it is dead after the load insn.
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
+{
+  return emit_fusion_gpr_load (operands);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+(define_peephole2
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION
+   && (REGNO (operands[0]) != REGNO (operands[2])
+       || GET_CODE (operands[3]) == SIGN_EXTEND)
+   && fusion_gpr_load_p (operands, true)"
+  [(const_int 0)]
+{
+  expand_fusion_gpr_load (operands);
+  DONE;
+})
+
+
+
+(include "sync.md")
+(include "vector.md")
+(include "vsx.md")
+(include "altivec.md")
+(include "spe.md")
+(include "dfp.md")
+(include "paired.md")
+(include "crypto.md")
+(include "htm.md")
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.opt b/gcc-4.9/gcc/config/rs6000/rs6000.opt
new file mode 100644
index 000000000..4c1a02a52
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.opt
@@ -0,0 +1,590 @@
+; Options for the rs6000 port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/rs6000/rs6000-opts.h
+
+;; ISA flag bits (on/off)
+Variable
+HOST_WIDE_INT rs6000_isa_flags = TARGET_DEFAULT
+
+TargetSave
+HOST_WIDE_INT x_rs6000_isa_flags
+
+;; Miscellaneous flag bits that were set explicitly by the user
+Variable
+HOST_WIDE_INT rs6000_isa_flags_explicit
+
+TargetSave
+HOST_WIDE_INT x_rs6000_isa_flags_explicit
+
+;; Current processor
+TargetVariable
+enum processor_type rs6000_cpu = PROCESSOR_PPC603
+
+;; Always emit branch hint bits.
+TargetVariable
+unsigned char rs6000_always_hint
+
+;; Schedule instructions for group formation.
+TargetVariable
+unsigned char rs6000_sched_groups
+
+;; Align branch targets.
+TargetVariable
+unsigned char rs6000_align_branch_targets
+
+;; Support for -msched-costly-dep option.
+TargetVariable
+enum rs6000_dependence_cost rs6000_sched_costly_dep = no_dep_costly
+
+;; Support for -minsert-sched-nops option.
+TargetVariable
+enum rs6000_nop_insertion rs6000_sched_insert_nops = sched_finish_none
+
+;; Non-zero to allow overriding loop alignment.
+TargetVariable
+unsigned char can_override_loop_align
+
+;; Which small data model to use (for System V targets only)
+TargetVariable
+enum rs6000_sdata_type rs6000_sdata = SDATA_DATA
+
+;; Bit size of immediate TLS offsets and string from which it is decoded.
+TargetVariable
+int rs6000_tls_size = 32
+
+;; ABI enumeration available for subtarget to use.
+TargetVariable
+enum rs6000_abi rs6000_current_abi = ABI_NONE
+
+;; Type of traceback to use.
+TargetVariable
+enum rs6000_traceback_type rs6000_traceback = traceback_default
+
+;; Control alignment for fields within structures.
+TargetVariable
+unsigned char rs6000_alignment_flags
+
+;; Code model for 64-bit linux.
+TargetVariable
+enum rs6000_cmodel rs6000_current_cmodel = CMODEL_SMALL
+
+;; What type of reciprocal estimation instructions to generate
+TargetVariable
+unsigned int rs6000_recip_control
+
+;; Mask of what builtin functions are allowed
+TargetVariable
+HOST_WIDE_INT rs6000_builtin_mask
+
+;; Debug flags
+TargetVariable
+unsigned int rs6000_debug
+
+;; This option existed in the past, but now is always on.
+mpowerpc
+Target RejectNegative Undocumented Ignore
+
+mpowerpc64
+Target Report Mask(POWERPC64) Var(rs6000_isa_flags)
+Use PowerPC-64 instruction set
+
+mpowerpc-gpopt
+Target Report Mask(PPC_GPOPT) Var(rs6000_isa_flags)
+Use PowerPC General Purpose group optional instructions
+
+mpowerpc-gfxopt
+Target Report Mask(PPC_GFXOPT) Var(rs6000_isa_flags)
+Use PowerPC Graphics group optional instructions
+
+mmfcrf
+Target Report Mask(MFCRF) Var(rs6000_isa_flags)
+Use PowerPC V2.01 single field mfcr instruction
+
+mpopcntb
+Target Report Mask(POPCNTB) Var(rs6000_isa_flags)
+Use PowerPC V2.02 popcntb instruction
+
+mfprnd
+Target Report Mask(FPRND) Var(rs6000_isa_flags)
+Use PowerPC V2.02 floating point rounding instructions
+
+mcmpb
+Target Report Mask(CMPB) Var(rs6000_isa_flags)
+Use PowerPC V2.05 compare bytes instruction
+
+mmfpgpr
+Target Report Mask(MFPGPR) Var(rs6000_isa_flags)
+Use extended PowerPC V2.05 move floating point to/from GPR instructions
+
+maltivec
+Target Report Mask(ALTIVEC) Var(rs6000_isa_flags)
+Use AltiVec instructions
+
+maltivec=le
+Target Report RejectNegative Var(rs6000_altivec_element_order, 1) Save
+Generate Altivec instructions using little-endian element order
+
+maltivec=be
+Target Report RejectNegative Var(rs6000_altivec_element_order, 2)
+Generate Altivec instructions using big-endian element order
+
+mhard-dfp
+Target Report Mask(DFP) Var(rs6000_isa_flags)
+Use decimal floating point instructions
+
+mmulhw
+Target Report Mask(MULHW) Var(rs6000_isa_flags)
+Use 4xx half-word multiply instructions
+
+mdlmzb
+Target Report Mask(DLMZB) Var(rs6000_isa_flags)
+Use 4xx string-search dlmzb instruction
+
+mmultiple
+Target Report Mask(MULTIPLE) Var(rs6000_isa_flags)
+Generate load/store multiple instructions
+
+mstring
+Target Report Mask(STRING) Var(rs6000_isa_flags)
+Generate string instructions for block moves
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT) Var(rs6000_isa_flags)
+Do not use hardware floating point
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT) Var(rs6000_isa_flags)
+Use hardware floating point
+
+mpopcntd
+Target Report Mask(POPCNTD) Var(rs6000_isa_flags)
+Use PowerPC V2.06 popcntd instruction
+
+mfriz
+Target Report Var(TARGET_FRIZ) Init(-1) Save
+Under -ffast-math, generate a FRIZ instruction for (double)(long long) conversions
+
+mveclibabi=
+Target RejectNegative Joined Var(rs6000_veclibabi_name)
+Vector library ABI to use
+
+mvsx
+Target Report Mask(VSX) Var(rs6000_isa_flags)
+Use vector/scalar (VSX) instructions
+
+mvsx-scalar-float
+Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1)
+; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default)
+
+mvsx-scalar-double
+Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1)
+; If -mvsx, use VSX arithmetic instructions for DFmode (on by default)
+
+mvsx-scalar-memory
+Target Undocumented Report Alias(mupper-regs-df)
+
+mvsx-align-128
+Target Undocumented Report Var(TARGET_VSX_ALIGN_128)
+; If -mvsx, set alignment to 128 bits instead of 32/64
+
+mallow-movmisalign
+Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1)
+; Allow/disallow the movmisalign in DF/DI vectors
+
+mallow-df-permute
+Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE)
+; Allow/disallow permutation of DF/DI vectors
+
+msched-groups
+Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1)
+; Explicitly set/unset whether rs6000_sched_groups is set
+
+malways-hint
+Target Undocumented Report Var(TARGET_ALWAYS_HINT) Init(-1)
+; Explicitly set/unset whether rs6000_always_hint is set
+
+malign-branch-targets
+Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1)
+; Explicitly set/unset whether rs6000_align_branch_targets is set
+
+mvectorize-builtins
+Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1)
+; Explicitly control whether we vectorize the builtins or not.
+
+mno-update
+Target Report RejectNegative Mask(NO_UPDATE) Var(rs6000_isa_flags)
+Do not generate load/store with update instructions
+
+mupdate
+Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE) Var(rs6000_isa_flags)
+Generate load/store with update instructions
+
+msingle-pic-base
+Target Report Var(TARGET_SINGLE_PIC_BASE) Init(0)
+Do not load the PIC register in function prologues
+
+mavoid-indexed-addresses
+Target Report Var(TARGET_AVOID_XFORM) Init(-1) Save
+Avoid generation of indexed load/store instructions when possible
+
+mtls-markers
+Target Report Var(tls_markers) Init(1) Save
+Mark __tls_get_addr calls with argument info
+
+msched-epilog
+Target Undocumented Var(TARGET_SCHED_PROLOG) Init(1) Save
+
+msched-prolog
+Target Report Var(TARGET_SCHED_PROLOG) Save
+Schedule the start and end of the procedure
+
+maix-struct-return
+Target Report RejectNegative Var(aix_struct_return) Save
+Return all structures in memory (AIX default)
+
+msvr4-struct-return
+Target Report RejectNegative Var(aix_struct_return,0) Save
+Return small structures in registers (SVR4 default)
+
+mxl-compat
+Target Report Var(TARGET_XL_COMPAT) Save
+Conform more closely to IBM XLC semantics
+
+mrecip
+Target Report
+Generate software reciprocal divide and square root for better throughput.
+
+mrecip=
+Target Report RejectNegative Joined Var(rs6000_recip_name)
+Generate software reciprocal divide and square root for better throughput.
+
+mrecip-precision
+Target Report Mask(RECIP_PRECISION) Var(rs6000_isa_flags)
+Assume that the reciprocal estimate instructions provide more accuracy.
+
+mno-fp-in-toc
+Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC) Save
+Do not place floating point constants in TOC
+
+mfp-in-toc
+Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC,0) Save
+Place floating point constants in TOC
+
+mno-sum-in-toc
+Target RejectNegative Var(TARGET_NO_SUM_IN_TOC) Save
+Do not place symbol+offset constants in TOC
+
+msum-in-toc
+Target RejectNegative Var(TARGET_NO_SUM_IN_TOC,0) Save
+Place symbol+offset constants in TOC
+
+;  Output only one TOC entry per module.  Normally linking fails if
+;   there are more than 16K unique variables/constants in an executable.  With
+;   this option, linking fails only if there are more than 16K modules, or
+;   if there are more than 16K unique variables/constant in a single module.
+;
+;   This is at the cost of having 2 extra loads and one extra store per
+;   function, and one less allocable register.
+mminimal-toc
+Target Report Mask(MINIMAL_TOC) Var(rs6000_isa_flags)
+Use only one TOC entry per procedure
+
+mfull-toc
+Target Report
+Put everything in the regular TOC
+
+mvrsave
+Target Report Var(TARGET_ALTIVEC_VRSAVE) Save
+Generate VRSAVE instructions when generating AltiVec code
+
+mvrsave=no
+Target RejectNegative Alias(mvrsave) NegativeAlias
+Deprecated option.  Use -mno-vrsave instead
+
+mvrsave=yes
+Target RejectNegative Alias(mvrsave)
+Deprecated option.  Use -mvrsave instead
+
+mblock-move-inline-limit=
+Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative Joined UInteger Save
+Specify how many bytes should be moved inline before calling out to memcpy/memmove
+
+misel
+Target Report Mask(ISEL) Var(rs6000_isa_flags)
+Generate isel instructions
+
+misel=no
+Target RejectNegative Alias(misel) NegativeAlias
+Deprecated option.  Use -mno-isel instead
+
+misel=yes
+Target RejectNegative Alias(misel)
+Deprecated option.  Use -misel instead
+
+mspe
+Target Var(rs6000_spe) Save
+Generate SPE SIMD instructions on E500
+
+mpaired
+Target Var(rs6000_paired_float) Save
+Generate PPC750CL paired-single instructions
+
+mspe=no
+Target RejectNegative Alias(mspe) NegativeAlias
+Deprecated option.  Use -mno-spe instead
+
+mspe=yes
+Target RejectNegative Alias(mspe)
+Deprecated option.  Use -mspe instead
+
+mdebug=
+Target RejectNegative Joined
+-mdebug=	Enable debug output
+
+mabi=altivec
+Target RejectNegative Var(rs6000_altivec_abi) Save
+Use the AltiVec ABI extensions
+
+mabi=no-altivec
+Target RejectNegative Var(rs6000_altivec_abi, 0)
+Do not use the AltiVec ABI extensions
+
+mabi=spe
+Target RejectNegative Var(rs6000_spe_abi) Save
+Use the SPE ABI extensions
+
+mabi=no-spe
+Target RejectNegative Var(rs6000_spe_abi, 0)
+Do not use the SPE ABI extensions
+
+mabi=elfv1
+Target RejectNegative Var(rs6000_elf_abi, 1) Save
+Use the ELFv1 ABI
+
+mabi=elfv2
+Target RejectNegative Var(rs6000_elf_abi, 2)
+Use the ELFv2 ABI
+
+; These are here for testing during development only, do not document
+; in the manual please.
+
+; If we want Darwin's struct-by-value-in-regs ABI.
+mabi=d64
+Target RejectNegative Undocumented Warn(using darwin64 ABI) Var(rs6000_darwin64_abi) Save
+
+mabi=d32
+Target RejectNegative Undocumented Warn(using old darwin ABI) Var(rs6000_darwin64_abi, 0)
+
+mabi=ieeelongdouble
+Target RejectNegative Undocumented Warn(using IEEE extended precision long double) Var(rs6000_ieeequad) Save
+
+mabi=ibmlongdouble
+Target RejectNegative Undocumented Warn(using IBM extended precision long double) Var(rs6000_ieeequad, 0)
+
+mcpu=
+Target RejectNegative Joined Var(rs6000_cpu_index) Init(-1) Enum(rs6000_cpu_opt_value) Save
+-mcpu=	Use features of and schedule code for given CPU
+
+mtune=
+Target RejectNegative Joined Var(rs6000_tune_index) Init(-1) Enum(rs6000_cpu_opt_value) Save
+-mtune=	Schedule code for given CPU
+
+mtraceback=
+Target RejectNegative Joined Enum(rs6000_traceback_type) Var(rs6000_traceback)
+-mtraceback=	Select full, part, or no traceback table
+
+Enum
+Name(rs6000_traceback_type) Type(enum rs6000_traceback_type)
+
+EnumValue
+Enum(rs6000_traceback_type) String(full) Value(traceback_full)
+
+EnumValue
+Enum(rs6000_traceback_type) String(part) Value(traceback_part)
+
+EnumValue
+Enum(rs6000_traceback_type) String(no) Value(traceback_none)
+
+mlongcall
+Target Report Var(rs6000_default_long_calls) Save
+Avoid all range limits on call instructions
+
+mgen-cell-microcode
+Target Report Var(rs6000_gen_cell_microcode) Init(-1) Save
+Generate Cell microcode
+
+mwarn-cell-microcode
+Target Var(rs6000_warn_cell_microcode) Init(0) Warning Save
+Warn when a Cell microcoded instruction is emitted
+
+mwarn-altivec-long
+Target Var(rs6000_warn_altivec_long) Init(1) Save
+Warn about deprecated 'vector long ...' AltiVec type usage
+
+mfloat-gprs=
+Target RejectNegative Joined Enum(rs6000_float_gprs) Var(rs6000_float_gprs) Save
+-mfloat-gprs=	Select GPR floating point method
+
+Enum
+Name(rs6000_float_gprs) Type(unsigned char)
+Valid arguments to -mfloat-gprs=:
+
+EnumValue
+Enum(rs6000_float_gprs) String(yes) Value(1)
+
+EnumValue
+Enum(rs6000_float_gprs) String(single) Value(1)
+
+EnumValue
+Enum(rs6000_float_gprs) String(double) Value(2)
+
+EnumValue
+Enum(rs6000_float_gprs) String(no) Value(0)
+
+mlong-double-
+Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
+-mlong-double-<n>	Specify size of long double (64 or 128 bits)
+
+mlra
+Target Report Var(rs6000_lra_flag) Init(0) Save
+Use LRA instead of reload
+
+msched-costly-dep=
+Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
+Determine which dependences between insns are considered costly
+
+minsert-sched-nops=
+Target RejectNegative Joined Var(rs6000_sched_insert_nops_str)
+Specify which post scheduling nop insertion scheme to apply
+
+malign-
+Target RejectNegative Joined Enum(rs6000_alignment_flags) Var(rs6000_alignment_flags)
+Specify alignment of structure fields default/natural
+
+Enum
+Name(rs6000_alignment_flags) Type(unsigned char)
+Valid arguments to -malign-:
+
+EnumValue
+Enum(rs6000_alignment_flags) String(power) Value(MASK_ALIGN_POWER)
+
+EnumValue
+Enum(rs6000_alignment_flags) String(natural) Value(MASK_ALIGN_NATURAL)
+
+mprioritize-restricted-insns=
+Target RejectNegative Joined UInteger Var(rs6000_sched_restricted_insns_priority) Save
+Specify scheduling priority for dispatch slot restricted insns
+
+msingle-float
+Target RejectNegative Var(rs6000_single_float) Save
+Single-precision floating point unit
+
+mdouble-float
+Target RejectNegative Var(rs6000_double_float) Save
+Double-precision floating point unit
+
+msimple-fpu
+Target RejectNegative Var(rs6000_simple_fpu) Save
+Floating point unit does not support divide & sqrt
+
+mfpu=
+Target RejectNegative Joined Enum(fpu_type_t) Var(rs6000_fpu_type) Init(FPU_NONE)
+-mfpu=	Specify FP (sp, dp, sp-lite, dp-lite) (implies -mxilinx-fpu)
+
+Enum
+Name(fpu_type_t) Type(enum fpu_type_t)
+
+EnumValue
+Enum(fpu_type_t) String(none) Value(FPU_NONE)
+
+EnumValue
+Enum(fpu_type_t) String(sp_lite) Value(FPU_SF_LITE)
+
+EnumValue
+Enum(fpu_type_t) String(dp_lite) Value(FPU_DF_LITE)
+
+EnumValue
+Enum(fpu_type_t) String(sp_full) Value(FPU_SF_FULL)
+
+EnumValue
+Enum(fpu_type_t) String(dp_full) Value(FPU_DF_FULL)
+
+mxilinx-fpu
+Target Var(rs6000_xilinx_fpu) Save
+Specify Xilinx FPU.
+
+mpointers-to-nested-functions
+Target Report Var(TARGET_POINTERS_TO_NESTED_FUNCTIONS) Init(1) Save
+Use/do not use r11 to hold the static link in calls to functions via pointers.
+
+msave-toc-indirect
+Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save
+Control whether we save the TOC in the prologue for indirect calls or generate the save inline
+
+mvsx-timode
+Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
+Allow 128-bit integers in VSX registers
+
+mpower8-fusion
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power8
+
+mpower8-fusion-sign
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
+Allow sign extension in fusion operations
+
+mpower8-vector
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
+Use/do not use vector and scalar instructions added in ISA 2.07.
+
+mcrypto
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
+Use ISA 2.07 crypto instructions
+
+mdirect-move
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
+Use ISA 2.07 direct move between GPR & VSX register instructions
+
+mhtm
+Target Report Mask(HTM) Var(rs6000_isa_flags)
+Use ISA 2.07 transactional memory (HTM) instructions
+
+mquad-memory
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
+Generate the quad word memory instructions (lq/stq).
+
+mquad-memory-atomic
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
+Generate the quad word memory atomic instructions (lqarx/stqcx).
+
+mcompat-align-parm
+Target Report Var(rs6000_compat_align_parm) Init(0) Save
+Generate aggregate parameter passing code with at most 64-bit alignment.
+
+mupper-regs-df
+Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
+Allow double variables in upper registers with -mcpu=power7 or -mvsx
+
+mupper-regs-sf
+Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
+Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
diff --git a/gcc-4.9/gcc/config/rs6000/rs64.md b/gcc-4.9/gcc/config/rs6000/rs64.md
new file mode 100644
index 000000000..597f3aeec
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rs64.md
@@ -0,0 +1,154 @@
+;; Scheduling description for IBM RS64 processors.
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "rs64,rs64fp")
+(define_cpu_unit "iu_rs64" "rs64")
+(define_cpu_unit "mciu_rs64" "rs64")
+(define_cpu_unit "fpu_rs64" "rs64fp")
+(define_cpu_unit "lsu_rs64,bpu_rs64" "rs64")
+
+;; RS64a 64-bit IU, LSU, FPU, BPU
+
+(define_insn_reservation "rs64a-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-fpload" 3
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-llsc" 2
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64")
+
+(define_insn_reservation "rs64a-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64,iu_rs64")
+
+(define_insn_reservation "rs64a-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64,iu_rs64,iu_rs64")
+
+(define_insn_reservation "rs64a-imul" 20
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*13")
+
+(define_insn_reservation "rs64a-imul2" 12
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*5")
+
+(define_insn_reservation "rs64a-imul3" 8
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*2")
+
+(define_insn_reservation "rs64a-lmul" 34
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*34")
+
+(define_insn_reservation "rs64a-idiv" 66
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*66")
+
+(define_insn_reservation "rs64a-ldiv" 66
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*66")
+
+(define_insn_reservation "rs64a-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,\
+                delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64,nothing,bpu_rs64")
+
+(define_insn_reservation "rs64a-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64,bpu_rs64")
+
+(define_insn_reservation "rs64a-fp" 4
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64")
+
+(define_insn_reservation "rs64a-sdiv" 31
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64*31")
+
+(define_insn_reservation "rs64a-sqrt" 49
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64*49")
+
+(define_insn_reservation "rs64a-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr")
+       (eq_attr "cpu" "rs64a"))
+  "bpu_rs64")
+
+(define_insn_reservation "rs64a-isync" 6
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "rs64a"))
+  "bpu_rs64")
+
+(define_insn_reservation "rs64a-sync" 1
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
diff --git a/gcc-4.9/gcc/config/rs6000/rtems.h b/gcc-4.9/gcc/config/rs6000/rtems.h
new file mode 100644
index 000000000..2402d5336
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/rtems.h
@@ -0,0 +1,59 @@
+/* Definitions for rtems targeting a PowerPC using elf.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()          \
+  do                                      \
+    {                                     \
+      builtin_define_std ("PPC");         \
+      builtin_define ("__rtems__");       \
+      builtin_define ("__USE_INIT_FINI__"); \
+      builtin_assert ("system=rtems");    \
+      builtin_assert ("cpu=powerpc");     \
+      builtin_assert ("machine=powerpc"); \
+      TARGET_OS_SYSV_CPP_BUILTINS ();     \
+    }                                     \
+  while (0)
+
+#undef TARGET_LIBGCC_SDATA_SECTION
+#define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+
+#undef CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)"
+
+#define CPP_OS_RTEMS_SPEC "\
+%{!mcpu*:  %{!Dppc*: %{!Dmpc*: -Dmpc750} } }\
+%{mcpu=403:  %{!Dppc*: %{!Dmpc*: -Dppc403}  } } \
+%{mcpu=505:  %{!Dppc*: %{!Dmpc*: -Dmpc505}  } } \
+%{mcpu=601:  %{!Dppc*: %{!Dmpc*: -Dppc601}  } } \
+%{mcpu=602:  %{!Dppc*: %{!Dmpc*: -Dppc602}  } } \
+%{mcpu=603:  %{!Dppc*: %{!Dmpc*: -Dppc603}  } } \
+%{mcpu=603e: %{!Dppc*: %{!Dmpc*: -Dppc603e} } } \
+%{mcpu=604:  %{!Dppc*: %{!Dmpc*: -Dmpc604}  } } \
+%{mcpu=750:  %{!Dppc*: %{!Dmpc*: -Dmpc750}  } } \
+%{mcpu=821:  %{!Dppc*: %{!Dmpc*: -Dmpc821}  } } \
+%{mcpu=860:  %{!Dppc*: %{!Dmpc*: -Dmpc860}  } } \
+%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540}  } }" 
+
+#undef  SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS \
+  { "cpp_os_rtems",		CPP_OS_RTEMS_SPEC }
diff --git a/gcc-4.9/gcc/config/rs6000/secureplt.h b/gcc-4.9/gcc/config/rs6000/secureplt.h
new file mode 100644
index 000000000..01959e3dd
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/secureplt.h
@@ -0,0 +1,20 @@
+/* Default to -msecure-plt.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define CC1_SECURE_PLT_DEFAULT_SPEC "-msecure-plt"
diff --git a/gcc-4.9/gcc/config/rs6000/si2vmx.h b/gcc-4.9/gcc/config/rs6000/si2vmx.h
new file mode 100644
index 000000000..75f98be14
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/si2vmx.h
@@ -0,0 +1,2048 @@
+/* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SI2VMX_H_
+#define _SI2VMX_H_	1
+
+#ifndef __SPU__
+
+#include <stdlib.h>
+#include <vec_types.h>
+
+
+/* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics.
+ * Users can override the action by defining it prior to including this 
+ * header file.
+ */
+#ifndef SPU_HALT_ACTION
+#define SPU_HALT_ACTION		abort()
+#endif
+
+/* Specify a default stop action for the spu_stop intrinsic.
+ * Users can override the action by defining it prior to including this 
+ * header file.
+ */
+#ifndef SPU_STOP_ACTION
+#define SPU_STOP_ACTION		abort()
+#endif
+
+
+/* Specify a default action for unsupported intrinsic.
+ * Users can override the action by defining it prior to including this 
+ * header file.
+ */
+#ifndef SPU_UNSUPPORTED_ACTION
+#define SPU_UNSUPPORTED_ACTION	abort()
+#endif
+
+
+/* Casting intrinsics - from scalar to quadword 
+ */
+
+static __inline qword si_from_uchar(unsigned char c) {
+  union {
+    qword q;
+    unsigned char c[16];
+  } x;
+  x.c[3] = c;
+  return (x.q);
+}
+
+static __inline qword si_from_char(signed char c) {
+  union {
+    qword q;
+    signed char c[16];
+  } x;
+  x.c[3] = c;
+  return (x.q);
+}
+
+static __inline qword si_from_ushort(unsigned short s) {
+  union {
+    qword q;
+    unsigned short s[8];
+  } x;
+  x.s[1] = s;
+  return (x.q);
+}
+
+static __inline qword si_from_short(short s) {
+  union {
+    qword q;
+    short s[8];
+  } x;
+  x.s[1] = s;
+  return (x.q);
+}
+
+
+static __inline qword si_from_uint(unsigned int i) {
+  union {
+    qword q;
+    unsigned int i[4];
+  } x;
+  x.i[0] = i;
+  return (x.q);
+}
+
+static __inline qword si_from_int(int i) {
+  union {
+    qword q;
+    int i[4];
+  } x;
+  x.i[0] = i;
+  return (x.q);
+}
+
+static __inline qword si_from_ullong(unsigned long long l) {
+  union {
+    qword q;
+    unsigned long long l[2];
+  } x;
+  x.l[0] = l;
+  return (x.q);
+}
+
+static __inline qword si_from_llong(long long l) {
+  union {
+    qword q;
+    long long l[2];
+  } x;
+  x.l[0] = l;
+  return (x.q);
+}
+
+static __inline qword si_from_float(float f) {
+  union {
+    qword q;
+    float f[4];
+  } x;
+  x.f[0] = f;
+  return (x.q);
+}
+
+static __inline qword si_from_double(double d) {
+  union {
+    qword q;
+    double d[2];
+  } x;
+  x.d[0] = d;
+  return (x.q);
+}
+
+static __inline qword si_from_ptr(void *ptr) {
+  union {
+    qword q;
+    void *p;
+  } x;
+  x.p = ptr;
+  return (x.q);
+}
+
+
+/* Casting intrinsics - from quadword to scalar
+ */
+static __inline unsigned char si_to_uchar(qword q) {
+  union {
+    qword q;
+    unsigned char c[16];
+  } x;
+  x.q = q;
+  return (x.c[3]);
+}
+
+static __inline signed char si_to_char(qword q) {
+  union {
+    qword q;
+    signed char c[16];
+  } x;
+  x.q = q;
+  return (x.c[3]);
+}
+
+static __inline unsigned short si_to_ushort(qword q) {
+  union {
+    qword q;
+    unsigned short s[8];
+  } x;
+  x.q = q;
+  return (x.s[1]);
+}
+
+static __inline short si_to_short(qword q) {
+  union {
+    qword q;
+    short s[8];
+  } x;
+  x.q = q;
+  return (x.s[1]);
+}
+
+static __inline unsigned int si_to_uint(qword q) {
+  union {
+    qword q;
+    unsigned int i[4];
+  } x;
+  x.q = q;
+  return (x.i[0]);
+}
+
+static __inline int si_to_int(qword q) {
+  union {
+    qword q;
+    int i[4];
+  } x;
+  x.q = q;
+  return (x.i[0]);
+}
+
+static __inline unsigned long long si_to_ullong(qword q) {
+  union {
+    qword q;
+    unsigned long long l[2];
+  } x;
+  x.q = q;
+  return (x.l[0]);
+}
+
+static __inline long long si_to_llong(qword q) {
+  union {
+    qword q;
+    long long l[2];
+  } x;
+  x.q = q;
+  return (x.l[0]);
+}
+
+static __inline float si_to_float(qword q) {
+  union {
+    qword q;
+    float f[4];
+  } x;
+  x.q = q;
+  return (x.f[0]);
+}
+
+static __inline double si_to_double(qword q) {
+  union {
+    qword q;
+    double d[2];
+  } x;
+  x.q = q;
+  return (x.d[0]);
+}
+
+static __inline void * si_to_ptr(qword q) {
+  union {
+    qword q;
+    void *p;
+  } x;
+  x.q = q;
+  return (x.p);
+}
+
+
+/* Absolute difference
+ */
+static __inline qword si_absdb(qword a, qword b)
+{
+  vec_uchar16 ac, bc, dc;
+
+  ac = (vec_uchar16)(a);
+  bc = (vec_uchar16)(b);
+  dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc));
+
+  return ((qword)(dc));
+}
+
+/* Add intrinsics 
+ */
+#define si_a(_a, _b)		((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b))))
+
+#define si_ah(_a, _b)		((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b))))
+
+static __inline qword si_ai(qword a, int b)
+{
+  return ((qword)(vec_add((vec_int4)(a), 
+			  vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+
+static __inline qword si_ahi(qword a, short b)
+{
+  return ((qword)(vec_add((vec_short8)(a), 
+			  vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+
+#define si_fa(_a, _b)	((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b))))
+
+
+static __inline qword si_dfa(qword a, qword b)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } ad, bd, dd;
+
+  ad.v = (vec_double2)(a);
+  bd.v = (vec_double2)(b);
+  dd.d[0] = ad.d[0] + bd.d[0];
+  dd.d[1] = ad.d[1] + bd.d[1];
+
+  return ((qword)(dd.v));
+}
+
+/* Add word extended
+ */
+#define si_addx(_a, _b, _c)	((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), 	\
+						 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
+
+
+/* Bit-wise AND
+ */
+#define si_and(_a, _b)		((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b))))
+
+
+static __inline qword si_andbi(qword a, signed char b)
+{
+  return ((qword)(vec_and((vec_char16)(a), 
+			  vec_splat((vec_char16)(si_from_char(b)), 3))));
+}
+
+static __inline qword si_andhi(qword a, signed short b)
+{
+  return ((qword)(vec_and((vec_short8)(a), 
+			  vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+
+static __inline qword si_andi(qword a, signed int b)
+{
+  return ((qword)(vec_and((vec_int4)(a),
+			  vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+
+/* Bit-wise AND with complement
+ */
+#define si_andc(_a, _b)		((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b))))
+
+
+/* Average byte vectors
+ */
+#define si_avgb(_a, _b)		((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b))))
+
+
+/* Branch indirect and set link on external data
+ */
+#define si_bisled(_func)	/* not mappable */
+#define si_bisledd(_func)	/* not mappable */
+#define si_bislede(_func)	/* not mappable */
+
+
+/* Borrow generate
+ */
+#define si_bg(_a, _b)		((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a))))
+
+#define si_bgx(_a, _b, _c)	((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)),		\
+							vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), 	\
+								(vec_uint4)(_c))), vec_splat_u32(1))))
+
+/* Compare absolute equal
+ */
+static __inline qword si_fcmeq(qword a, qword b)
+{
+  vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
+  
+  return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb), 
+				  vec_andc((vec_float4)(b), msb))));
+}
+
+static __inline qword si_dfcmeq(qword a, qword b)
+{
+  vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
+  vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3,  16,17,18,19,  8,9,10,11, 24,25,26,27};
+
+  vec_uint4 biteq;
+  vec_uint4 aabs;
+  vec_uint4 babs;
+  vec_uint4 a_gt;
+  vec_uint4 ahi_inf;
+  vec_uint4 anan;
+  vec_uint4 result;
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  /*  Mask out sign bits */
+  aabs = vec_and((vec_uint4)a,sign_mask);
+  babs = vec_and((vec_uint4)b,sign_mask);
+
+  /*  A)  Check for bit equality, store in high word */
+  biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs);
+  biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
+
+  /*  
+      B)  Check if a is NaN, store in high word
+        
+      B1) If the high word is greater than max_exp (indicates a NaN)
+      B2) If the low word is greater than 0 
+  */
+  a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
+
+  /*  B3) Check if the high word is equal to the inf exponent */
+  ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
+
+  /*  anan = B1[hi] or (B2[lo] and B3[hi]) */
+  anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
+
+  /*  result = A and not B  */
+  result = vec_andc(biteq, anan);
+
+  /*  Promote high words to 64 bits and return  */
+  return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
+}
+
+
+/* Compare absolute greater than
+ */
+static __inline qword si_fcmgt(qword a, qword b)
+{
+  vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
+  
+  return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb),
+				  vec_andc((vec_float4)(b), msb))));
+}
+
+static __inline qword si_dfcmgt(qword a, qword b)
+{
+  vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
+  vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  // absolute value of a,b 
+  vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
+  vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
+
+  // check if a is nan
+  vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
+  vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
+  a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
+  a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
+
+  // check if b is nan
+  vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
+  vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
+  b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
+  b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
+
+  // A) Check if the exponents are different 
+  vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs);
+
+  // B) Check if high word equal, and low word greater
+  vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs);
+  vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs);
+  vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
+
+  //  If either A or B is true, return true (unless NaNs detected) 
+  vec_uint4 r = vec_or(gt_hi, eqgt);
+
+  // splat the high words of the comparison step
+  r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
+
+  // correct for NaNs in input
+  return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
+}
+
+
+/* Compare equal
+ */
+static __inline qword si_ceqb(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_ceqh(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_ceq(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b))));
+}
+
+static __inline qword si_fceq(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b))));
+}
+
+static __inline qword si_ceqbi(qword a, signed char b)
+{
+  return ((qword)(vec_cmpeq((vec_char16)(a), 
+			    vec_splat((vec_char16)(si_from_char(b)), 3))));
+}
+
+static __inline qword si_ceqhi(qword a, signed short b)
+{
+  return ((qword)(vec_cmpeq((vec_short8)(a), 
+			  vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+static __inline qword si_ceqi(qword a, signed int b)
+{
+  return ((qword)(vec_cmpeq((vec_int4)(a), 
+			  vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+static __inline qword si_dfceq(qword a, qword b)
+{
+  vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
+  vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3,  16,17,18,19,  8,9,10,11, 24,25,26,27};
+
+  vec_uint4 biteq;
+  vec_uint4 aabs;
+  vec_uint4 babs;
+  vec_uint4 a_gt;
+  vec_uint4 ahi_inf;
+  vec_uint4 anan;
+  vec_uint4 iszero;
+  vec_uint4 result;
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  /*  A)  Check for bit equality, store in high word */
+  biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b);
+  biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
+
+  /*  Mask out sign bits */
+  aabs = vec_and((vec_uint4)a,sign_mask);
+  babs = vec_and((vec_uint4)b,sign_mask);
+
+  /*  
+      B)  Check if a is NaN, store in high word
+        
+      B1) If the high word is greater than max_exp (indicates a NaN)
+      B2) If the low word is greater than 0 
+  */
+  a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
+
+  /*  B3) Check if the high word is equal to the inf exponent */
+  ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
+
+  /*  anan = B1[hi] or (B2[lo] and B3[hi]) */
+  anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
+
+  /*  C)  Check for 0 = -0 special case */
+  iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0));
+  iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
+
+  /*  result = (A or C) and not B  */
+  result = vec_or(biteq,iszero);
+  result = vec_andc(result, anan);
+
+  /*  Promote high words to 64 bits and return  */
+  return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); 
+}
+
+
+/* Compare greater than
+ */
+static __inline qword si_cgtb(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b))));
+}
+
+static __inline qword si_cgth(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b))));
+}
+
+static __inline qword si_cgt(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b))));
+}
+
+static __inline qword si_clgtb(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_clgth(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_clgt(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b))));
+}
+
+static __inline qword si_fcgt(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b))));
+}
+
+static __inline qword si_dfcgt(qword a, qword b)
+{
+  vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+  vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
+  vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  // absolute value of a,b 
+  vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
+  vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
+
+  // check if a is nan
+  vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
+  vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
+  a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
+  a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
+
+  // check if b is nan
+  vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
+  vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
+  b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
+  b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
+
+  // sign of a
+  vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
+  asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi);
+
+  // sign of b
+  vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
+  bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi);
+
+  // negative a
+  vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs);
+  vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7)));
+  abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
+  vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1)));
+
+  // pick the one we want
+  vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel);
+
+  // negative b
+  vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs);
+  bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
+  vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1)));
+
+  // pick the one we want
+  vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel);
+
+  // A) Check if the exponents are different 
+  vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval);
+
+  // B) Check if high word equal, and low word greater
+  vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval);
+  vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval);
+  vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
+
+  //  If either A or B is true, return true (unless NaNs detected) 
+  vec_uint4 r = vec_or(gt_hi, eqgt);
+
+  // splat the high words of the comparison step
+  r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
+
+  // correct for NaNs in input
+  return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
+}
+
+static __inline qword si_cgtbi(qword a, signed char b)
+{
+  return ((qword)(vec_cmpgt((vec_char16)(a), 
+			    vec_splat((vec_char16)(si_from_char(b)), 3))));
+}
+
+static __inline qword si_cgthi(qword a, signed short b)
+{
+  return ((qword)(vec_cmpgt((vec_short8)(a), 
+			    vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+static __inline qword si_cgti(qword a, signed int b)
+{
+  return ((qword)(vec_cmpgt((vec_int4)(a), 
+			    vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+static __inline qword si_clgtbi(qword a, unsigned char b)
+{
+  return ((qword)(vec_cmpgt((vec_uchar16)(a), 
+			    vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
+}
+
+static __inline qword si_clgthi(qword a, unsigned short b)
+{
+  return ((qword)(vec_cmpgt((vec_ushort8)(a),
+			    vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+static __inline qword si_clgti(qword a, unsigned int b)
+{
+  return ((qword)(vec_cmpgt((vec_uint4)(a), 
+			    vec_splat((vec_uint4)(si_from_uint(b)), 0))));
+}
+
+static __inline qword si_dftsv(qword a, char b)
+{
+  vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+  vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+  vec_uint4 result = (vec_uint4){0};
+  vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
+  sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi);
+  vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask);
+  
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+  
+  /* Nan or +inf or -inf  */
+  if (b & 0x70)
+  {
+    vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
+    vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
+     /* NaN  */
+     if (b & 0x40)
+     {
+       vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
+       a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
+       a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); 
+       result = vec_or(result, a_nan);
+     }
+     /* inf  */ 
+     if (b & 0x30)
+     {
+       a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf);
+       a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi); 
+        /* +inf  */
+        if (b & 0x20)
+          result = vec_or(vec_andc(a_inf, sign), result);
+        /* -inf  */
+        if (b & 0x10)
+          result = vec_or(vec_and(a_inf, sign), result);
+     } 
+  }
+  /* 0 or denorm  */
+  if (b & 0xF)
+  {
+    vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0));
+    iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
+    /* denorm  */
+    if (b & 0x3)
+    {
+      vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF};
+      vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero);
+      isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi);
+      /* +denorm  */
+     if (b & 0x2)
+        result = vec_or(vec_andc(isdenorm, sign), result);
+      /* -denorm  */
+     if (b & 0x1)
+        result = vec_or(vec_and(isdenorm, sign), result);
+    }
+    /* 0  */
+    if (b & 0xC)
+    {
+      iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi);
+      /* +0  */
+     if (b & 0x8)
+        result = vec_or(vec_andc(iszero, sign), result);
+      /* -0  */
+     if (b & 0x4)
+        result = vec_or(vec_and(iszero, sign), result);
+    }
+  }
+  return ((qword)result);
+}
+
+
+/* Carry generate
+ */
+#define si_cg(_a, _b)		((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b))))
+
+#define si_cgx(_a, _b, _c)	((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), 		\
+						vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)),	\
+							 vec_and((vec_uint4)(_c), vec_splat_u32(1))))))
+
+
+/* Count ones for bytes
+ */
+static __inline qword si_cntb(qword a)
+{
+  vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+  vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
+  vec_uchar16 av;
+
+  av = (vec_uchar16)(a);
+
+  return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av),
+			  vec_perm(nib_cnt, nib_cnt, vec_sr (av, four)))));
+}
+
+/* Count ones for bytes
+ */
+static __inline qword si_clz(qword a)
+{
+  vec_uchar16 av;
+  vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3;
+  vec_uchar16 four    = vec_splat_u8(4);
+  vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
+  vec_uchar16 eight   = vec_splat_u8(8);
+  vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
+  vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24};
+
+  av = (vec_uchar16)(a);
+
+  cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four));
+  cnt_lo = vec_perm(nib_cnt, nib_cnt, av);
+
+  cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four)));
+
+  tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight));
+  tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen));
+  tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour));
+
+  cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight)));
+  cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen)));
+  cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour)));
+  
+  return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour))));
+}
+
+/* Convert to float
+ */
+#define si_cuflt(_a, _b)	((qword)(vec_ctf((vec_uint4)(_a), _b)))
+#define si_csflt(_a, _b)	((qword)(vec_ctf((vec_int4)(_a), _b)))
+
+/* Convert to signed int
+ */
+#define si_cflts(_a, _b)	((qword)(vec_cts((vec_float4)(_a), _b)))
+
+/* Convert to unsigned int
+ */
+#define si_cfltu(_a, _b)	((qword)(vec_ctu((vec_float4)(_a), _b)))
+
+/* Synchronize
+ */
+#define si_dsync()		/* do nothing */
+#define si_sync()		/* do nothing */
+#define si_syncc()		/* do nothing */
+
+
+/* Equivalence
+ */
+static __inline qword si_eqv(qword a, qword b)
+{
+  vec_uchar16 d;
+
+  d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b));
+  return ((qword)(vec_nor(d, d)));
+}
+
+/* Extend
+ */
+static __inline qword si_xsbh(qword a)
+{
+  vec_char16 av;
+
+  av = (vec_char16)(a);
+  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15, 
+						              0, 0, 0, 0, 0, 0, 0, 0})))));
+}
+
+static __inline qword si_xshw(qword a)
+{
+  vec_short8 av;
+
+  av = (vec_short8)(a);
+  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7, 
+					                      10,11,14,15,
+							      0, 0, 0, 0, 
+						              0, 0, 0, 0})))));
+}
+
+static __inline qword si_xswd(qword a)
+{
+  vec_int4 av;
+
+  av = (vec_int4)(a);
+  return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})), 
+			   ((vec_uchar16){20, 21, 22, 23,  
+					   4,  5,  6,  7, 
+				          28, 29, 30, 31, 
+				          12, 13, 14, 15}))));
+}
+
+static __inline qword si_fesd(qword a)
+{
+  union {
+    double d[2];
+    vec_double2	vd;
+  } out;
+  union {
+    float f[4];
+    vec_float4 vf;
+  } in;
+
+  in.vf = (vec_float4)(a);
+  out.d[0] = (double)(in.f[0]);
+  out.d[1] = (double)(in.f[2]);
+  return ((qword)(out.vd));
+}
+
+/* Gather
+ */
+static __inline qword si_gbb(qword a)
+{
+  vec_uchar16 bits;
+  vec_uint4   bytes;
+
+  bits  = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0,
+								            7, 6, 5, 4, 3, 2, 1, 0}));
+  bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0}));
+
+  return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0,
+					                0, 0, 0, 0, 0, 0, 0, 0}))));
+}
+
+
+static __inline qword si_gbh(qword a)
+{
+  vec_ushort8 bits;
+  vec_uint4   bytes;
+
+  bits  = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0}));
+
+  bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0});
+
+  return ((qword)(vec_sld(bytes, bytes, 12)));
+}
+
+static __inline qword si_gb(qword a)
+{
+  vec_uint4 bits;
+  vec_uint4 bytes;
+
+  bits  = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0}));
+  bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0}));
+  return ((qword)(vec_sld(bytes, bytes, 12)));
+}
+
+
+/* Compare and halt 
+ */
+static __inline void si_heq(qword a, qword b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa, bb;
+
+  aa.v = (vector unsigned int)(a);
+  bb.v = (vector unsigned int)(b);
+
+  if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_heqi(qword a, unsigned int b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa;
+
+  aa.v = (vector unsigned int)(a);
+
+  if (aa.i[0] == b) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hgt(qword a, qword b)
+{
+  union {
+    vector signed int v;
+    signed int i[4];
+  } aa, bb;
+
+  aa.v = (vector signed int)(a);
+  bb.v = (vector signed int)(b);
+
+  if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hgti(qword a, signed int b)
+{
+  union {
+    vector signed int v;
+    signed int i[4];
+  } aa;
+
+  aa.v = (vector signed int)(a);
+
+  if (aa.i[0] > b) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hlgt(qword a, qword b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa, bb;
+
+  aa.v = (vector unsigned int)(a);
+  bb.v = (vector unsigned int)(b);
+
+  if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hlgti(qword a, unsigned int b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa;
+
+  aa.v = (vector unsigned int)(a);
+
+  if (aa.i[0] > b) { SPU_HALT_ACTION; };
+}
+
+
+/* Multiply and Add
+ */
+static __inline qword si_mpya(qword a, qword b, qword c)
+{
+  return ((qword)(vec_msum(vec_and((vec_short8)(a), 
+				   ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})), 
+			   (vec_short8)(b), (vec_int4)(c))));
+}
+
+static __inline qword si_fma(qword a, qword b, qword c)
+{
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
+}
+
+static __inline qword si_dfma(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0];
+  dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Form Mask
+ */
+#define si_fsmbi(_a)	si_fsmb(si_from_int(_a))
+
+static __inline qword si_fsmb(qword a)
+{
+  vec_char16 mask;
+  vec_ushort8 in;
+
+  in = (vec_ushort8)(a);
+  mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2,
+					              3, 3, 3, 3, 3, 3, 3, 3})));
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7,
+				                      0, 1, 2, 3, 4, 5, 6, 7})),
+			  vec_splat_u8(7))));
+}
+
+
+static __inline qword si_fsmh(qword a)
+{
+  vec_uchar16 in;
+  vec_short8 mask;
+
+  in = (vec_uchar16)(a);
+  mask = (vec_short8)(vec_splat(in, 3));
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})), 
+			  vec_splat_u16(15))));
+}
+
+static __inline qword si_fsm(qword a)
+{
+  vec_uchar16 in;
+  vec_int4 mask;
+
+  in = (vec_uchar16)(a);
+  mask = (vec_int4)(vec_splat(in, 3));
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})),
+			  ((vec_uint4){31,31,31,31}))));
+}
+
+/* Move from/to registers
+ */
+#define si_fscrrd()		((qword)((vec_uint4){0}))
+#define si_fscrwr(_a)
+
+#define si_mfspr(_reg)		((qword)((vec_uint4){0}))
+#define si_mtspr(_reg, _a)
+
+/* Multiply High High Add
+ */
+static __inline qword si_mpyhha(qword a, qword b, qword c)
+{
+  return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c))));
+}
+
+static __inline qword si_mpyhhau(qword a, qword b, qword c)
+{
+  return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c))));
+}
+
+/* Multiply Subtract
+ */
+static __inline qword si_fms(qword a, qword b, qword c)
+{
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), 
+			   vec_sub(((vec_float4){0.0f}), (vec_float4)(c)))));
+}
+
+static __inline qword si_dfms(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0];
+  dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Multiply
+ */
+static __inline qword si_fm(qword a, qword b)
+{
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f}))));
+}
+
+static __inline qword si_dfm(qword a, qword b)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  dd.d[0] = aa.d[0] * bb.d[0];
+  dd.d[1] = aa.d[1] * bb.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Multiply High
+ */
+static __inline qword si_mpyh(qword a, qword b)
+{
+  vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16};
+
+  return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen)));
+}
+
+
+/* Multiply High High
+ */
+static __inline qword si_mpyhh(qword a, qword b)
+{
+  return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b))));
+}
+
+static __inline qword si_mpyhhu(qword a, qword b)
+{
+  return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+/* Multiply Odd
+ */
+static __inline qword si_mpy(qword a, qword b)
+{
+  return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b))));
+}
+
+static __inline qword si_mpyu(qword a, qword b)
+{
+  return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_mpyi(qword a, short b)
+{
+  return ((qword)(vec_mulo((vec_short8)(a), 
+			   vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+static __inline qword si_mpyui(qword a, unsigned short b)
+{
+  return ((qword)(vec_mulo((vec_ushort8)(a), 
+			   vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+/* Multiply and Shift Right
+ */
+static __inline qword si_mpys(qword a, qword b)
+{
+  return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16}))));
+}
+
+/* Nand
+ */
+static __inline qword si_nand(qword a, qword b)
+{
+  vec_uchar16 d;
+
+  d = vec_and((vec_uchar16)(a), (vec_uchar16)(b));
+  return ((qword)(vec_nor(d, d)));
+}
+
+/* Negative Multiply Add
+ */
+static __inline qword si_dfnma(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0];
+  dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Negative Multiply and Subtract
+ */
+static __inline qword si_fnms(qword a, qword b, qword c)
+{
+  return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
+}
+
+static __inline qword si_dfnms(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0];
+  dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Nor
+ */
+static __inline qword si_nor(qword a, qword b)
+{
+  return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+/* Or
+ */
+static __inline qword si_or(qword a, qword b)
+{
+  return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_orbi(qword a, unsigned char b)
+{
+  return ((qword)(vec_or((vec_uchar16)(a), 
+			 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
+}
+
+static __inline qword si_orhi(qword a, unsigned short b)
+{
+  return ((qword)(vec_or((vec_ushort8)(a), 
+			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+static __inline qword si_ori(qword a, unsigned int b)
+{
+  return ((qword)(vec_or((vec_uint4)(a), 
+			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
+}
+
+/* Or Complement
+ */
+static __inline qword si_orc(qword a, qword b)
+{
+  return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b)))));
+}
+
+
+/* Or Across
+ */
+static __inline qword si_orx(qword a)
+{
+  vec_uchar16 tmp;
+  tmp = (vec_uchar16)(a);
+  tmp = vec_or(tmp, vec_sld(tmp, tmp, 8));
+  tmp = vec_or(tmp, vec_sld(tmp, tmp, 4));
+  return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00,
+				              0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}))));
+}
+
+
+/* Estimates
+ */
+static __inline qword si_frest(qword a)
+{
+  return ((qword)(vec_re((vec_float4)(a))));
+}
+
+static __inline qword si_frsqest(qword a)
+{
+  return ((qword)(vec_rsqrte((vec_float4)(a))));
+}
+
+#define si_fi(_a, _d)		(_d)
+
+/* Channel Read and Write
+ */
+#define si_rdch(_channel)		((qword)(vec_splat_u8(0)))	/* not mappable */
+#define si_rchcnt(_channel)		((qword)(vec_splat_u8(0)))	/* not mappable */
+#define si_wrch(_channel, _a)		/* not mappable */
+
+/* Rotate Left
+ */
+static __inline qword si_roth(qword a, qword b)
+{
+  return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_rot(qword a, qword b)
+{
+  return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b))));
+}
+
+static __inline qword si_rothi(qword a, int b)
+{
+  return ((qword)(vec_rl((vec_ushort8)(a), 
+			 vec_splat((vec_ushort8)(si_from_int(b)), 1))));
+}
+
+static __inline qword si_roti(qword a, int b)
+{
+  return ((qword)(vec_rl((vec_uint4)(a), 
+			 vec_splat((vec_uint4)(si_from_int(b)), 0))));
+}
+
+/* Rotate Left with Mask
+ */
+static __inline qword si_rothm(qword a, qword b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
+}
+
+static __inline qword si_rotm(qword a, qword b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
+}
+
+static __inline qword si_rothmi(qword a, int b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
+}
+
+static __inline qword si_rotmi(qword a, int b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
+}
+
+
+/* Rotate Left Algebraic with Mask
+ */
+static __inline qword si_rotmah(qword a, qword b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
+}
+
+static __inline qword si_rotma(qword a, qword b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
+}
+
+
+static __inline qword si_rotmahi(qword a, int b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
+}
+
+static __inline qword si_rotmai(qword a, int b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
+}
+
+
+/* Rotate Left Quadword by Bytes with Mask
+ */
+static __inline qword si_rotqmbyi(qword a, int count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  vec_uchar16 mask;
+
+  count = 0 - count;
+  x.i[3] = count << 3;
+  mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
+
+  return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
+}
+
+
+static __inline qword si_rotqmby(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  int cnt;
+  vec_uchar16 mask;
+
+  x.v = (vec_uchar16)(count);
+  x.i[0] = cnt = (0 - x.i[0]) << 3;
+
+  x.v = vec_splat(x.v, 3);
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+
+  return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
+}
+
+
+/* Rotate Left Quadword by Bytes
+ */
+static __inline qword si_rotqbyi(qword a, int count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } left, right;
+ 
+  count <<= 3;
+  left.i[3] = count;
+  right.i[3] = 0 - count;
+  return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v))));
+}
+
+static __inline qword si_rotqby(qword a, qword count)
+{
+  vec_uchar16 left, right;
+ 
+  left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
+  right = vec_sub(vec_splat_u8(0), left);
+  return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
+}
+
+/* Rotate Left Quadword by Bytes Bit Count
+ */
+static __inline qword si_rotqbybi(qword a, qword count)
+{
+  vec_uchar16 left, right;
+
+  left = vec_splat((vec_uchar16)(count), 3);
+  right = vec_sub(vec_splat_u8(7), left);
+  return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
+}
+
+
+/* Rotate Left Quadword by Bytes Bit Count
+ */
+static __inline qword si_rotqbii(qword a, int count)
+{
+  vec_uchar16 x, y;
+  vec_uchar16 result;
+ 
+  x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3);
+  y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
+			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
+  result = vec_or(vec_sll((qword)(a), x), y);
+  return ((qword)(result));
+}
+
+static __inline qword si_rotqbi(qword a, qword count)
+{
+  vec_uchar16 x, y;
+  vec_uchar16 result;
+ 
+  x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7));
+  y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
+			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
+  
+  result = vec_or(vec_sll((qword)(a), x), y);
+  return ((qword)(result));
+}
+
+
+/* Rotate Left Quadword and Mask by Bits
+ */
+static __inline qword si_rotqmbii(qword a, int count)
+{
+  return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3))));
+}
+
+static __inline qword si_rotqmbi(qword a, qword count)
+{
+  return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3)))));
+}
+
+
+/* Rotate Left Quadword and Mask by Bytes with Bit Count
+ */
+static __inline qword si_rotqmbybi(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  int cnt;
+  vec_uchar16 mask;
+
+  x.v = (vec_uchar16)(count);
+  x.i[0] = cnt = 0 - (x.i[0] & ~7);
+  x.v = vec_splat(x.v, 3);
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+
+  return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
+}
+
+
+
+
+/* Round Double to Float
+ */
+static __inline qword si_frds(qword a)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } d;
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.v = (vec_double2)(a);
+  d.v = (vec_float4){0.0f};
+  d.f[0] = (float)in.d[0];
+  d.f[2] = (float)in.d[1];
+
+  return ((qword)(d.v));
+}
+
+/* Select Bits
+ */
+static __inline qword si_selb(qword a, qword b, qword c)
+{
+  return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c))));
+}
+
+
+/* Shuffle Bytes
+ */
+static __inline qword si_shufb(qword a, qword b, qword pattern)
+{
+  vec_uchar16 pat;
+
+  pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), 
+		vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)),
+		vec_sra((vec_uchar16)(pattern), vec_splat_u8(7)));
+  return ((qword)(vec_perm(vec_perm(a, b, pattern), 
+			   ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0,
+				          0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),
+			   pat)));
+}
+
+
+/* Shift Left
+ */
+static __inline qword si_shlh(qword a, qword b)
+{
+  vec_ushort8 mask;
+
+  mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask)));
+}
+
+static __inline qword si_shl(qword a, qword b)
+{
+  vec_uint4 mask;
+
+  mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask)));
+}
+
+
+static __inline qword si_shlhi(qword a, unsigned int b)
+{
+  vec_ushort8 mask;
+  vec_ushort8 bv;
+
+  bv = vec_splat((vec_ushort8)(si_from_int(b)), 1);
+  mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask)));
+}
+
+static __inline qword si_shli(qword a, unsigned int b)
+{
+  vec_uint4 bv;
+  vec_uint4 mask;
+
+  bv = vec_splat((vec_uint4)(si_from_uint(b)), 0);
+  mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask)));
+}
+
+
+/* Shift Left Quadword
+ */
+static __inline qword si_shlqbii(qword a, unsigned int count)
+{
+  vec_uchar16 x;
+
+  x = vec_splat((vec_uchar16)(si_from_uint(count)), 3);
+  return ((qword)(vec_sll((vec_uchar16)(a), x)));
+}
+
+static __inline qword si_shlqbi(qword a, qword count)
+{
+  vec_uchar16 x;
+
+  x = vec_splat((vec_uchar16)(count), 3);
+  return ((qword)(vec_sll((vec_uchar16)(a), x)));
+}
+
+
+/* Shift Left Quadword by Bytes
+ */
+static __inline qword si_shlqbyi(qword a, unsigned int count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  vec_uchar16 mask;
+
+  x.i[3] = count << 3;
+  mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
+  return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
+}
+
+static __inline qword si_shlqby(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    unsigned int i[4];
+  } x;
+  unsigned int cnt;
+  vec_uchar16 mask;
+
+  x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
+  cnt = x.i[0];
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+  return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
+}
+
+/* Shift Left Quadword by Bytes with Bit Count
+ */
+static __inline qword si_shlqbybi(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  unsigned int cnt;
+  vec_uchar16 mask;
+
+  x.v = vec_splat((vec_uchar16)(count), 3);
+  cnt = x.i[0];
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+  return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
+}
+
+
+/* Stop and Signal
+ */
+#define si_stop(_type)		SPU_STOP_ACTION
+#define si_stopd(a, b, c)	SPU_STOP_ACTION
+
+
+/* Subtract
+ */
+static __inline qword si_sfh(qword a, qword b)
+{
+  return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a))));
+}
+
+static __inline qword si_sf(qword a, qword b)
+{
+  return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a))));
+}
+
+static __inline qword si_fs(qword a, qword b)
+{
+  return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b))));
+}
+
+static __inline qword si_dfs(qword a, qword b)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  dd.d[0] = aa.d[0] - bb.d[0];
+  dd.d[1] = aa.d[1] - bb.d[1];
+  return ((qword)(dd.v));
+}
+
+static __inline qword si_sfhi(qword a, short b)
+{
+  return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1),
+			  (vec_short8)(a))));
+}
+
+static __inline qword si_sfi(qword a, int b)
+{
+  return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0),
+			  (vec_int4)(a))));
+}
+
+/* Subtract word extended
+ */
+#define si_sfx(_a, _b, _c)	((qword)(vec_add(vec_add((vec_uint4)(_b), 				\
+							 vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), 	\
+						 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
+
+
+/* Sum Bytes into Shorts
+ */
+static __inline qword si_sumb(qword a, qword b)
+{
+  vec_uint4 zero = (vec_uint4){0};
+  vec_ushort8 sum_a, sum_b;
+  
+  sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero);
+  sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero);
+
+  return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19,  2,  3, 22, 23,  6,  7,
+					                26, 27, 10, 11, 30, 31, 14, 15}))));
+}
+
+/* Exclusive OR
+ */
+static __inline qword si_xor(qword a, qword b)
+{
+  return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_xorbi(qword a, unsigned char b)
+{
+  return ((qword)(vec_xor((vec_uchar16)(a), 
+			  vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
+}
+
+static __inline qword si_xorhi(qword a, unsigned short b)
+{
+  return ((qword)(vec_xor((vec_ushort8)(a), 
+			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+static __inline qword si_xori(qword a, unsigned int b)
+{
+  return ((qword)(vec_xor((vec_uint4)(a), 
+			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
+}
+
+
+/* Generate Controls for Sub-Quadword Insertion
+ */
+static __inline qword si_cbd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned char c[16];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cdd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned long long ll[2];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_chd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned short s[8];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cwd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cbx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned char c[16];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03;
+  return ((qword)(shmask.v));
+}
+
+
+static __inline qword si_cdx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned long long ll[2];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_chx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned short s[8];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cwx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203;
+  return ((qword)(shmask.v));
+}
+
+
+/* Constant Formation
+ */
+static __inline qword si_il(signed short imm)
+{
+  return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0)));
+}
+
+
+static __inline qword si_ila(unsigned int imm)
+{
+  return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0)));
+}
+
+static __inline qword si_ilh(signed short imm)
+{
+  return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1)));
+}
+
+static __inline qword si_ilhu(signed short imm)
+{
+  return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0)));
+}
+
+static __inline qword si_iohl(qword a, unsigned short imm)
+{
+  return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0))));
+}
+
+/* No Operation
+ */
+#define si_lnop()		/* do nothing */
+#define si_nop()		/* do nothing */
+
+
+/* Memory Load and Store
+ */
+static __inline qword si_lqa(unsigned int imm)
+{
+  return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
+}
+
+static __inline qword si_lqd(qword a, unsigned int imm)
+{
+  return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm))));
+}
+
+static __inline qword si_lqr(unsigned int imm)
+{
+  return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
+}
+
+static __inline qword si_lqx(qword a, qword b)
+{
+  return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0))));
+}
+
+static __inline void si_stqa(qword a, unsigned int imm)
+{
+  vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
+}
+
+static __inline void si_stqd(qword a, qword b, unsigned int imm)
+{
+  vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm));
+}
+
+static __inline void si_stqr(qword a, unsigned int imm)
+{
+  vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
+}
+
+static __inline void si_stqx(qword a, qword b, qword c)
+{
+  vec_st((vec_uchar16)(a), 
+	 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))),
+	 (vector unsigned char *)(0));
+}
+
+#endif /* !__SPU__ */
+#endif /* !_SI2VMX_H_ */
+
diff --git a/gcc-4.9/gcc/config/rs6000/singlefp.h b/gcc-4.9/gcc/config/rs6000/singlefp.h
new file mode 100644
index 000000000..7922e5641
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/singlefp.h
@@ -0,0 +1,40 @@
+/* Definitions for PowerPC single-precision floating point unit
+   such as Xilinx PowerPC 405/440 APU.
+
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Michael Eager (eager@eagercon.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Undefine definitions from rs6000.h. */
+#undef TARGET_SINGLE_FLOAT
+#undef TARGET_DOUBLE_FLOAT
+#undef TARGET_SINGLE_FPU
+#undef TARGET_SIMPLE_FPU
+#undef UNITS_PER_FP_WORD
+
+/* FPU operations supported. 
+   If TARGET_SINGLE_FPU set, processor supports single fp options. */
+#define TARGET_SINGLE_FLOAT (rs6000_single_float)
+#define TARGET_DOUBLE_FLOAT (rs6000_double_float)
+#define TARGET_SINGLE_FPU   1
+#define TARGET_SIMPLE_FPU   (rs6000_simple_fpu)
+
+/* FP word width depends on single/double fp support. */
+#define UNITS_PER_FP_WORD ((TARGET_SOFT_FLOAT || TARGET_DOUBLE_FLOAT) ? 8 : 4)
+
diff --git a/gcc-4.9/gcc/config/rs6000/spe.h b/gcc-4.9/gcc/config/rs6000/spe.h
new file mode 100644
index 000000000..f84741e24
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/spe.h
@@ -0,0 +1,1107 @@
+/* PowerPC E500 user include file.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPE_H
+#define _SPE_H
+
+#define __vector __attribute__((vector_size(8)))
+
+typedef int 	 		int32_t;
+typedef unsigned 		uint32_t;
+typedef short    		int16_t;
+typedef unsigned short  	uint16_t;
+typedef long long 		int64_t;
+typedef unsigned long long	uint64_t;
+
+typedef short 			__vector __ev64_s16__;
+typedef unsigned short  	__vector __ev64_u16__;
+typedef int 			__vector __ev64_s32__;
+typedef unsigned 		__vector __ev64_u32__;
+typedef long long 		__vector __ev64_s64__;
+typedef unsigned long long 	__vector __ev64_u64__;
+typedef float 			__vector __ev64_fs__;
+
+#define __v2si __ev64_opaque__
+#define __v2sf __ev64_fs__
+
+#define __ev_addw __builtin_spe_evaddw
+#define __ev_addiw __builtin_spe_evaddiw
+#define __ev_subfw(a,b) __builtin_spe_evsubfw ((b), (a))
+#define __ev_subw __builtin_spe_evsubfw
+#define __ev_subifw(a,b) __builtin_spe_evsubifw ((b), (a))
+#define __ev_subiw __builtin_spe_evsubifw
+#define __ev_abs __builtin_spe_evabs
+#define __ev_neg __builtin_spe_evneg
+#define __ev_extsb __builtin_spe_evextsb
+#define __ev_extsh __builtin_spe_evextsh
+#define __ev_and __builtin_spe_evand
+#define __ev_or __builtin_spe_evor
+#define __ev_xor __builtin_spe_evxor
+#define __ev_nand __builtin_spe_evnand
+#define __ev_nor __builtin_spe_evnor
+#define __ev_eqv __builtin_spe_eveqv
+#define __ev_andc __builtin_spe_evandc
+#define __ev_orc __builtin_spe_evorc
+#define __ev_rlw __builtin_spe_evrlw
+#define __ev_rlwi __builtin_spe_evrlwi
+#define __ev_slw __builtin_spe_evslw
+#define __ev_slwi __builtin_spe_evslwi
+#define __ev_srws __builtin_spe_evsrws
+#define __ev_srwu __builtin_spe_evsrwu
+#define __ev_srwis __builtin_spe_evsrwis
+#define __ev_srwiu __builtin_spe_evsrwiu
+#define __ev_cntlzw __builtin_spe_evcntlzw
+#define __ev_cntlsw __builtin_spe_evcntlsw
+#define __ev_rndw __builtin_spe_evrndw
+#define __ev_mergehi __builtin_spe_evmergehi
+#define __ev_mergelo __builtin_spe_evmergelo
+#define __ev_mergelohi __builtin_spe_evmergelohi
+#define __ev_mergehilo __builtin_spe_evmergehilo
+#define __ev_splati __builtin_spe_evsplati
+#define __ev_splatfi __builtin_spe_evsplatfi
+#define __ev_divws __builtin_spe_evdivws
+#define __ev_divwu __builtin_spe_evdivwu
+#define __ev_mra __builtin_spe_evmra
+
+#define __brinc __builtin_spe_brinc
+
+/* Loads.  */
+
+#define __ev_lddx __builtin_spe_evlddx
+#define __ev_ldwx __builtin_spe_evldwx
+#define __ev_ldhx __builtin_spe_evldhx
+#define __ev_lwhex __builtin_spe_evlwhex
+#define __ev_lwhoux __builtin_spe_evlwhoux
+#define __ev_lwhosx __builtin_spe_evlwhosx
+#define __ev_lwwsplatx __builtin_spe_evlwwsplatx
+#define __ev_lwhsplatx __builtin_spe_evlwhsplatx
+#define __ev_lhhesplatx __builtin_spe_evlhhesplatx
+#define __ev_lhhousplatx __builtin_spe_evlhhousplatx
+#define __ev_lhhossplatx __builtin_spe_evlhhossplatx
+#define __ev_ldd __builtin_spe_evldd
+#define __ev_ldw __builtin_spe_evldw
+#define __ev_ldh __builtin_spe_evldh
+#define __ev_lwhe __builtin_spe_evlwhe
+#define __ev_lwhou __builtin_spe_evlwhou
+#define __ev_lwhos __builtin_spe_evlwhos
+#define __ev_lwwsplat __builtin_spe_evlwwsplat
+#define __ev_lwhsplat __builtin_spe_evlwhsplat
+#define __ev_lhhesplat __builtin_spe_evlhhesplat
+#define __ev_lhhousplat __builtin_spe_evlhhousplat
+#define __ev_lhhossplat __builtin_spe_evlhhossplat
+
+/* Stores.  */
+
+#define __ev_stddx __builtin_spe_evstddx
+#define __ev_stdwx __builtin_spe_evstdwx
+#define __ev_stdhx __builtin_spe_evstdhx
+#define __ev_stwwex __builtin_spe_evstwwex
+#define __ev_stwwox __builtin_spe_evstwwox
+#define __ev_stwhex __builtin_spe_evstwhex
+#define __ev_stwhox __builtin_spe_evstwhox
+#define __ev_stdd __builtin_spe_evstdd
+#define __ev_stdw __builtin_spe_evstdw
+#define __ev_stdh __builtin_spe_evstdh
+#define __ev_stwwe __builtin_spe_evstwwe
+#define __ev_stwwo __builtin_spe_evstwwo
+#define __ev_stwhe __builtin_spe_evstwhe
+#define __ev_stwho __builtin_spe_evstwho
+
+/* Fixed point complex.  */
+
+#define __ev_mhossf __builtin_spe_evmhossf
+#define __ev_mhosmf __builtin_spe_evmhosmf
+#define __ev_mhosmi __builtin_spe_evmhosmi
+#define __ev_mhoumi __builtin_spe_evmhoumi
+#define __ev_mhessf __builtin_spe_evmhessf
+#define __ev_mhesmf __builtin_spe_evmhesmf
+#define __ev_mhesmi __builtin_spe_evmhesmi
+#define __ev_mheumi __builtin_spe_evmheumi
+#define __ev_mhossfa __builtin_spe_evmhossfa
+#define __ev_mhosmfa __builtin_spe_evmhosmfa
+#define __ev_mhosmia __builtin_spe_evmhosmia
+#define __ev_mhoumia __builtin_spe_evmhoumia
+#define __ev_mhessfa __builtin_spe_evmhessfa
+#define __ev_mhesmfa __builtin_spe_evmhesmfa
+#define __ev_mhesmia __builtin_spe_evmhesmia
+#define __ev_mheumia __builtin_spe_evmheumia
+
+#define __ev_mhoumf __ev_mhoumi
+#define __ev_mheumf __ev_mheumi
+#define __ev_mhoumfa __ev_mhoumia
+#define __ev_mheumfa __ev_mheumia
+
+#define __ev_mhossfaaw __builtin_spe_evmhossfaaw
+#define __ev_mhossiaaw __builtin_spe_evmhossiaaw
+#define __ev_mhosmfaaw __builtin_spe_evmhosmfaaw
+#define __ev_mhosmiaaw __builtin_spe_evmhosmiaaw
+#define __ev_mhousiaaw __builtin_spe_evmhousiaaw
+#define __ev_mhoumiaaw __builtin_spe_evmhoumiaaw
+#define __ev_mhessfaaw __builtin_spe_evmhessfaaw
+#define __ev_mhessiaaw __builtin_spe_evmhessiaaw
+#define __ev_mhesmfaaw __builtin_spe_evmhesmfaaw
+#define __ev_mhesmiaaw __builtin_spe_evmhesmiaaw
+#define __ev_mheusiaaw __builtin_spe_evmheusiaaw
+#define __ev_mheumiaaw __builtin_spe_evmheumiaaw
+
+#define __ev_mhousfaaw __ev_mhousiaaw
+#define __ev_mhoumfaaw __ev_mhoumiaaw
+#define __ev_mheusfaaw __ev_mheusiaaw
+#define __ev_mheumfaaw __ev_mheumiaaw
+
+#define __ev_mhossfanw __builtin_spe_evmhossfanw
+#define __ev_mhossianw __builtin_spe_evmhossianw
+#define __ev_mhosmfanw __builtin_spe_evmhosmfanw
+#define __ev_mhosmianw __builtin_spe_evmhosmianw
+#define __ev_mhousianw __builtin_spe_evmhousianw
+#define __ev_mhoumianw __builtin_spe_evmhoumianw
+#define __ev_mhessfanw __builtin_spe_evmhessfanw
+#define __ev_mhessianw __builtin_spe_evmhessianw
+#define __ev_mhesmfanw __builtin_spe_evmhesmfanw
+#define __ev_mhesmianw __builtin_spe_evmhesmianw
+#define __ev_mheusianw __builtin_spe_evmheusianw
+#define __ev_mheumianw __builtin_spe_evmheumianw
+
+#define __ev_mhousfanw __ev_mhousianw
+#define __ev_mhoumfanw __ev_mhoumianw
+#define __ev_mheusfanw __ev_mheusianw
+#define __ev_mheumfanw __ev_mheumianw
+
+#define __ev_mhogsmfaa __builtin_spe_evmhogsmfaa
+#define __ev_mhogsmiaa __builtin_spe_evmhogsmiaa
+#define __ev_mhogumiaa __builtin_spe_evmhogumiaa
+#define __ev_mhegsmfaa __builtin_spe_evmhegsmfaa
+#define __ev_mhegsmiaa __builtin_spe_evmhegsmiaa
+#define __ev_mhegumiaa __builtin_spe_evmhegumiaa
+
+#define __ev_mhogumfaa __ev_mhogumiaa
+#define __ev_mhegumfaa __ev_mhegumiaa
+
+#define __ev_mhogsmfan __builtin_spe_evmhogsmfan
+#define __ev_mhogsmian __builtin_spe_evmhogsmian
+#define __ev_mhogumian __builtin_spe_evmhogumian
+#define __ev_mhegsmfan __builtin_spe_evmhegsmfan
+#define __ev_mhegsmian __builtin_spe_evmhegsmian
+#define __ev_mhegumian __builtin_spe_evmhegumian
+
+#define __ev_mhogumfan __ev_mhogumian
+#define __ev_mhegumfan __ev_mhegumian
+
+#define __ev_mwhssf __builtin_spe_evmwhssf
+#define __ev_mwhsmf __builtin_spe_evmwhsmf
+#define __ev_mwhsmi __builtin_spe_evmwhsmi
+#define __ev_mwhumi __builtin_spe_evmwhumi
+#define __ev_mwhssfa __builtin_spe_evmwhssfa
+#define __ev_mwhsmfa __builtin_spe_evmwhsmfa
+#define __ev_mwhsmia __builtin_spe_evmwhsmia
+#define __ev_mwhumia __builtin_spe_evmwhumia
+
+#define __ev_mwhumf __ev_mwhumi
+#define __ev_mwhumfa __ev_mwhumia
+
+#define __ev_mwlumi __builtin_spe_evmwlumi
+#define __ev_mwlumia __builtin_spe_evmwlumia
+#define __ev_mwlumiaaw __builtin_spe_evmwlumiaaw
+
+#define __ev_mwlssiaaw __builtin_spe_evmwlssiaaw
+#define __ev_mwlsmiaaw __builtin_spe_evmwlsmiaaw
+#define __ev_mwlusiaaw __builtin_spe_evmwlusiaaw
+#define __ev_mwlusiaaw __builtin_spe_evmwlusiaaw
+
+#define __ev_mwlssianw __builtin_spe_evmwlssianw
+#define __ev_mwlsmianw __builtin_spe_evmwlsmianw
+#define __ev_mwlusianw __builtin_spe_evmwlusianw
+#define __ev_mwlumianw __builtin_spe_evmwlumianw
+
+#define __ev_mwssf __builtin_spe_evmwssf
+#define __ev_mwsmf __builtin_spe_evmwsmf
+#define __ev_mwsmi __builtin_spe_evmwsmi
+#define __ev_mwumi __builtin_spe_evmwumi
+#define __ev_mwssfa __builtin_spe_evmwssfa
+#define __ev_mwsmfa __builtin_spe_evmwsmfa
+#define __ev_mwsmia __builtin_spe_evmwsmia
+#define __ev_mwumia __builtin_spe_evmwumia
+
+#define __ev_mwumf __ev_mwumi
+#define __ev_mwumfa __ev_mwumia
+
+#define __ev_mwssfaa __builtin_spe_evmwssfaa
+#define __ev_mwsmfaa __builtin_spe_evmwsmfaa
+#define __ev_mwsmiaa __builtin_spe_evmwsmiaa
+#define __ev_mwumiaa __builtin_spe_evmwumiaa
+
+#define __ev_mwumfaa __ev_mwumiaa
+
+#define __ev_mwssfan __builtin_spe_evmwssfan
+#define __ev_mwsmfan __builtin_spe_evmwsmfan
+#define __ev_mwsmian __builtin_spe_evmwsmian
+#define __ev_mwumian __builtin_spe_evmwumian
+
+#define __ev_mwumfan __ev_mwumian
+
+#define __ev_addssiaaw __builtin_spe_evaddssiaaw
+#define __ev_addsmiaaw __builtin_spe_evaddsmiaaw
+#define __ev_addusiaaw __builtin_spe_evaddusiaaw
+#define __ev_addumiaaw __builtin_spe_evaddumiaaw
+
+#define __ev_addusfaaw __ev_addusiaaw
+#define __ev_addumfaaw __ev_addumiaaw
+#define __ev_addsmfaaw __ev_addsmiaaw
+#define __ev_addssfaaw __ev_addssiaaw
+
+#define __ev_subfssiaaw __builtin_spe_evsubfssiaaw
+#define __ev_subfsmiaaw __builtin_spe_evsubfsmiaaw
+#define __ev_subfusiaaw __builtin_spe_evsubfusiaaw
+#define __ev_subfumiaaw __builtin_spe_evsubfumiaaw
+
+#define __ev_subfusfaaw __ev_subfusiaaw
+#define __ev_subfumfaaw __ev_subfumiaaw
+#define __ev_subfsmfaaw __ev_subfsmiaaw
+#define __ev_subfssfaaw __ev_subfssiaaw
+
+/* Floating Point SIMD Instructions  */
+
+#define __ev_fsabs __builtin_spe_evfsabs
+#define __ev_fsnabs __builtin_spe_evfsnabs
+#define __ev_fsneg __builtin_spe_evfsneg
+#define __ev_fsadd __builtin_spe_evfsadd
+#define __ev_fssub __builtin_spe_evfssub
+#define __ev_fsmul __builtin_spe_evfsmul
+#define __ev_fsdiv __builtin_spe_evfsdiv
+#define __ev_fscfui __builtin_spe_evfscfui
+#define __ev_fscfsi __builtin_spe_evfscfsi
+#define __ev_fscfuf __builtin_spe_evfscfuf
+#define __ev_fscfsf __builtin_spe_evfscfsf
+#define __ev_fsctui __builtin_spe_evfsctui
+#define __ev_fsctsi __builtin_spe_evfsctsi
+#define __ev_fsctuf __builtin_spe_evfsctuf
+#define __ev_fsctsf __builtin_spe_evfsctsf
+#define __ev_fsctuiz __builtin_spe_evfsctuiz
+#define __ev_fsctsiz __builtin_spe_evfsctsiz
+
+/* NOT SUPPORTED IN FIRST e500, support via two instructions:  */
+
+#define __ev_mwhusfaaw  __ev_mwhusiaaw
+#define __ev_mwhumfaaw  __ev_mwhumiaaw
+#define __ev_mwhusfanw  __ev_mwhusianw
+#define __ev_mwhumfanw  __ev_mwhumianw
+#define __ev_mwhgumfaa  __ev_mwhgumiaa
+#define __ev_mwhgumfan  __ev_mwhgumian
+
+#define __ev_mwhgssfaa __internal_ev_mwhgssfaa
+#define __ev_mwhgsmfaa __internal_ev_mwhgsmfaa
+#define __ev_mwhgsmiaa __internal_ev_mwhgsmiaa
+#define __ev_mwhgumiaa __internal_ev_mwhgumiaa
+#define __ev_mwhgssfan __internal_ev_mwhgssfan
+#define __ev_mwhgsmfan __internal_ev_mwhgsmfan
+#define __ev_mwhgsmian __internal_ev_mwhgsmian
+#define __ev_mwhgumian __internal_ev_mwhgumian
+#define __ev_mwhssiaaw __internal_ev_mwhssiaaw
+#define __ev_mwhssfaaw __internal_ev_mwhssfaaw
+#define __ev_mwhsmfaaw __internal_ev_mwhsmfaaw
+#define __ev_mwhsmiaaw __internal_ev_mwhsmiaaw
+#define __ev_mwhusiaaw __internal_ev_mwhusiaaw
+#define __ev_mwhumiaaw __internal_ev_mwhumiaaw
+#define __ev_mwhssfanw __internal_ev_mwhssfanw
+#define __ev_mwhssianw __internal_ev_mwhssianw
+#define __ev_mwhsmfanw __internal_ev_mwhsmfanw
+#define __ev_mwhsmianw __internal_ev_mwhsmianw
+#define __ev_mwhusianw __internal_ev_mwhusianw
+#define __ev_mwhumianw __internal_ev_mwhumianw
+
+static inline __ev64_opaque__
+__internal_ev_mwhssfaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_addssiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhssiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+  
+  t = __ev_mwhsmi (a, b);
+  return __ev_addssiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhsmfaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_addsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhsmiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_addsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhusiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_addusiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhumiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_addumiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhssfanw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_subfssiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhssianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_subfssiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhsmfanw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_subfsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhsmianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_subfsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhusianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_subfusiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhumianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_subfumiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgssfaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmfaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmiaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgumiaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_mwumiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgssfan (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_mwsmian (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmfan (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_mwsmian (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmian (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_mwsmian (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgumian (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_mwumian (t, ((__ev64_s32__){1, 1}));
+}
+
+/* END OF NOT SUPPORTED */
+
+/* __ev_create* functions.  */
+
+#define __ev_create_ufix32_u32 __ev_create_u32
+#define __ev_create_sfix32_s32 __ev_create_s32
+
+static inline __ev64_opaque__
+__ev_create_s16 (int16_t a, int16_t b, int16_t c, int16_t d)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int16_t i[4];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+  u.i[2] = c;
+  u.i[3] = d;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_u16 (uint16_t a, uint16_t b, uint16_t c, uint16_t d)
+				  
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint16_t i[4];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+  u.i[2] = c;
+  u.i[3] = d;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_s32 (int32_t a, int32_t b)
+{
+  union
+  {
+    __ev64_opaque__ v;
+   int32_t i[2];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_u32 (uint32_t a, uint32_t b)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint32_t i[2];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_fs (float a, float b)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    float f[2];
+  } u;
+
+  u.f[0] = a;
+  u.f[1] = b;
+  
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_sfix32_fs (float a, float b)
+{
+  __ev64_opaque__ ev;
+
+  ev = (__ev64_opaque__) __ev_create_fs (a, b);
+  return (__ev64_opaque__) __builtin_spe_evfsctsf ((__v2sf) ev);
+}
+
+static inline __ev64_opaque__
+__ev_create_ufix32_fs (float a, float b)
+{
+  __ev64_opaque__ ev;
+
+  ev = (__ev64_opaque__) __ev_create_fs (a, b);
+  return (__ev64_opaque__) __builtin_spe_evfsctuf ((__v2sf) ev);
+}
+
+static inline __ev64_opaque__
+__ev_create_s64 (int64_t a)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int64_t i;
+  } u;
+
+  u.i = a;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_u64 (uint64_t a)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint64_t i;
+  } u;
+
+  u.i = a;
+  return u.v;
+}
+
+static inline uint64_t
+__ev_convert_u64 (__ev64_opaque__ a)
+{
+  return (uint64_t) a;
+}
+
+static inline int64_t
+__ev_convert_s64 (__ev64_opaque__ a)
+{
+  return (int64_t) a;
+}
+
+/* __ev_get_* functions.  */
+
+#define __ev_get_upper_u32(a) __ev_get_u32_internal ((a), 0)
+#define __ev_get_lower_u32(a) __ev_get_u32_internal ((a), 1)
+#define __ev_get_upper_s32(a) __ev_get_s32_internal ((a), 0)
+#define __ev_get_lower_s32(a) __ev_get_s32_internal ((a), 1)
+#define __ev_get_upper_fs(a) __ev_get_fs_internal ((a), 0)
+#define __ev_get_lower_fs(a) __ev_get_fs_internal ((a), 1)
+#define __ev_get_upper_ufix32_u32 __ev_get_upper_u32
+#define __ev_get_lower_ufix32_u32 __ev_get_lower_u32
+#define __ev_get_upper_sfix32_s32 __ev_get_upper_s32
+#define __ev_get_lower_sfix32_s32 __ev_get_lower_s32
+#define __ev_get_upper_sfix32_fs(a)  __ev_get_sfix32_fs ((a), 0)
+#define __ev_get_lower_sfix32_fs(a)  __ev_get_sfix32_fs ((a), 1)
+#define __ev_get_upper_ufix32_fs(a)  __ev_get_ufix32_fs ((a), 0)
+#define __ev_get_lower_ufix32_fs(a)  __ev_get_ufix32_fs ((a), 1)
+
+#define __ev_get_u32 __ev_get_u32_internal
+#define __ev_get_s32 __ev_get_s32_internal
+#define __ev_get_fs __ev_get_fs_internal
+#define __ev_get_u16 __ev_get_u16_internal
+#define __ev_get_s16 __ev_get_s16_internal
+
+#define __ev_get_ufix32_u32 __ev_get_u32
+#define __ev_get_sfix32_s32 __ev_get_s32
+#define __ev_get_ufix32_fs     __ev_get_ufix32_fs_internal
+#define __ev_get_sfix32_fs     __ev_get_sfix32_fs_internal
+
+static inline uint32_t
+__ev_get_u32_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint32_t i[2];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+static inline int32_t
+__ev_get_s32_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int32_t i[2];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+static inline float
+__ev_get_fs_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    float f[2];
+  } u;
+
+  u.v = a;
+  return u.f[pos];
+}
+
+static inline float
+__ev_get_sfix32_fs_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  __ev64_fs__ v;
+
+  v = __builtin_spe_evfscfsf ((__v2sf) a);
+  return __ev_get_fs_internal ((__ev64_opaque__) v, pos);
+}
+
+static inline float
+__ev_get_ufix32_fs_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  __ev64_fs__ v;
+
+  v = __builtin_spe_evfscfuf ((__v2sf) a);
+  return __ev_get_fs_internal ((__ev64_opaque__) v, pos);
+}
+
+static inline uint16_t
+__ev_get_u16_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint16_t i[4];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+static inline int16_t
+__ev_get_s16_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int16_t i[4];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+/* __ev_set_* functions.  */
+
+#define __ev_set_u32 __ev_set_u32_internal
+#define __ev_set_s32 __ev_set_s32_internal
+#define __ev_set_fs __ev_set_fs_internal
+#define __ev_set_u16 __ev_set_u16_internal
+#define __ev_set_s16 __ev_set_s16_internal
+
+#define __ev_set_ufix32_u32 __ev_set_u32
+#define __ev_set_sfix32_s32 __ev_set_s32
+
+#define __ev_set_sfix32_fs  __ev_set_sfix32_fs_internal
+#define __ev_set_ufix32_fs  __ev_set_ufix32_fs_internal
+
+#define __ev_set_upper_u32(a, b) __ev_set_u32 (a, b, 0)
+#define __ev_set_lower_u32(a, b) __ev_set_u32 (a, b, 1)
+#define __ev_set_upper_s32(a, b) __ev_set_s32 (a, b, 0)
+#define __ev_set_lower_s32(a, b) __ev_set_s32 (a, b, 1)
+#define __ev_set_upper_fs(a, b) __ev_set_fs (a, b, 0)
+#define __ev_set_lower_fs(a, b) __ev_set_fs (a, b, 1)
+#define __ev_set_upper_ufix32_u32 __ev_set_upper_u32
+#define __ev_set_lower_ufix32_u32 __ev_set_lower_u32
+#define __ev_set_upper_sfix32_s32 __ev_set_upper_s32
+#define __ev_set_lower_sfix32_s32 __ev_set_lower_s32
+#define __ev_set_upper_sfix32_fs(a, b)  __ev_set_sfix32_fs (a, b, 0)
+#define __ev_set_lower_sfix32_fs(a, b)  __ev_set_sfix32_fs (a, b, 1)
+#define __ev_set_upper_ufix32_fs(a, b)  __ev_set_ufix32_fs (a, b, 0)
+#define __ev_set_lower_ufix32_fs(a, b)  __ev_set_ufix32_fs (a, b, 1)
+
+#define __ev_set_acc_vec64 __builtin_spe_evmra
+
+static inline __ev64_opaque__
+__ev_set_acc_u64 (uint64_t a)
+{
+  __ev64_opaque__ ev32;
+  ev32 = __ev_create_u64 (a);
+  __ev_mra (ev32);
+  return ev32;
+}
+
+static inline __ev64_opaque__
+__ev_set_acc_s64 (int64_t a)
+{
+  __ev64_opaque__ ev32;
+  ev32 = __ev_create_s64 (a);
+  __ev_mra (ev32);
+  return ev32;
+}
+
+static inline __ev64_opaque__
+__ev_set_u32_internal (__ev64_opaque__ a, uint32_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint32_t i[2];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_s32_internal (__ev64_opaque__ a, int32_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int32_t i[2];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    float f[2];
+  } u;
+
+  u.v = a;
+  u.f[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_sfix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
+{
+  __ev64_opaque__ v;
+  float other;
+
+  /* Get other half.  */
+  other = __ev_get_fs_internal (a, pos ^ 1);
+
+  /* Make an sfix32 with 'b'.  */
+  v = __ev_create_sfix32_fs (b, b);
+
+  /* Set other half to what it used to be.  */
+  return __ev_set_fs_internal (v, other, pos ^ 1);
+}
+
+static inline __ev64_opaque__
+__ev_set_ufix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
+{
+  __ev64_opaque__ v;
+  float other;
+
+  /* Get other half.  */
+  other = __ev_get_fs_internal (a, pos ^ 1);
+
+  /* Make an ufix32 with 'b'.  */
+  v = __ev_create_ufix32_fs (b, b);
+
+  /* Set other half to what it used to be.  */
+  return __ev_set_fs_internal (v, other, pos ^ 1);
+}
+
+static inline __ev64_opaque__
+__ev_set_u16_internal (__ev64_opaque__ a, uint16_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint16_t i[4];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int16_t i[4];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+/* Predicates.  */
+
+#define __pred_all	0
+#define __pred_any	1
+#define __pred_upper	2
+#define __pred_lower	3
+
+#define __ev_any_gts(a, b)		__builtin_spe_evcmpgts (__pred_any, (a), (b))
+#define __ev_all_gts(a, b)		__builtin_spe_evcmpgts (__pred_all, (a), (b))
+#define __ev_upper_gts(a, b)		__builtin_spe_evcmpgts (__pred_upper, (a), (b))
+#define __ev_lower_gts(a, b)		__builtin_spe_evcmpgts (__pred_lower, (a), (b))
+#define __ev_select_gts			__builtin_spe_evsel_gts
+
+#define __ev_any_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_any, (a), (b))
+#define __ev_all_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_all, (a), (b))
+#define __ev_upper_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_upper, (a), (b))
+#define __ev_lower_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_lower, (a), (b))
+#define __ev_select_gtu			__builtin_spe_evsel_gtu
+
+#define __ev_any_lts(a, b)		__builtin_spe_evcmplts (__pred_any, (a), (b))
+#define __ev_all_lts(a, b)		__builtin_spe_evcmplts (__pred_all, (a), (b))
+#define __ev_upper_lts(a, b)		__builtin_spe_evcmplts (__pred_upper, (a), (b))
+#define __ev_lower_lts(a, b)		__builtin_spe_evcmplts (__pred_lower, (a), (b))
+#define __ev_select_lts(a, b, c, d) 	((__v2si) __builtin_spe_evsel_lts ((a), (b), (c), (d)))
+
+#define __ev_any_ltu(a, b)		__builtin_spe_evcmpltu (__pred_any, (a), (b))
+#define __ev_all_ltu(a, b)		__builtin_spe_evcmpltu (__pred_all, (a), (b))
+#define __ev_upper_ltu(a, b)		__builtin_spe_evcmpltu (__pred_upper, (a), (b))
+#define __ev_lower_ltu(a, b)		__builtin_spe_evcmpltu (__pred_lower, (a), (b))
+#define __ev_select_ltu 		__builtin_spe_evsel_ltu
+#define __ev_any_eq(a, b)		__builtin_spe_evcmpeq (__pred_any, (a), (b))
+#define __ev_all_eq(a, b)		__builtin_spe_evcmpeq (__pred_all, (a), (b))
+#define __ev_upper_eq(a, b)		__builtin_spe_evcmpeq (__pred_upper, (a), (b))
+#define __ev_lower_eq(a, b)		__builtin_spe_evcmpeq (__pred_lower, (a), (b))
+#define __ev_select_eq			__builtin_spe_evsel_eq
+
+#define __ev_any_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_any, (a), (b))
+#define __ev_all_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_all, (a), (b))
+#define __ev_upper_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_upper, (a), (b))
+#define __ev_lower_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_lower, (a), (b))
+#define __ev_select_fs_gt		__builtin_spe_evsel_fsgt
+
+#define __ev_any_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_any, (a), (b))
+#define __ev_all_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_all, (a), (b))
+#define __ev_upper_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_upper, (a), (b))
+#define __ev_lower_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_lower, (a), (b))
+#define __ev_select_fs_lt		__builtin_spe_evsel_fslt
+
+#define __ev_any_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_any, (a), (b))
+#define __ev_all_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_all, (a), (b))
+#define __ev_upper_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_upper, (a), (b))
+#define __ev_lower_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_lower, (a), (b))
+#define __ev_select_fs_eq		__builtin_spe_evsel_fseq
+
+#define __ev_any_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_any, (a), (b))
+#define __ev_all_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_all, (a), (b))
+#define __ev_upper_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_upper, (a), (b))
+#define __ev_lower_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_lower, (a), (b))
+#define __ev_select_fs_tst_gt           __builtin_spe_evsel_fststgt
+
+#define __ev_any_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_any, (a), (b))
+#define __ev_all_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_all, (a), (b))
+#define __ev_upper_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_upper, (a), (b))
+#define __ev_lower_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_lower, (a), (b))
+#define __ev_select_fs_tst_lt		__builtin_spe_evsel_fststlt
+
+#define __ev_any_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_any, (a), (b))
+#define __ev_all_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_all, (a), (b))
+#define __ev_upper_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_upper, (a), (b))
+#define __ev_lower_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_lower, (a), (b))
+#define __ev_select_fs_tst_eq		__builtin_spe_evsel_fststeq
+
+/* SPEFSCR accessor functions.  */
+
+#define __SPEFSCR_SOVH		0x80000000
+#define __SPEFSCR_OVH		0x40000000
+#define __SPEFSCR_FGH		0x20000000
+#define __SPEFSCR_FXH		0x10000000
+#define __SPEFSCR_FINVH		0x08000000
+#define __SPEFSCR_FDBZH		0x04000000
+#define __SPEFSCR_FUNFH		0x02000000
+#define __SPEFSCR_FOVFH		0x01000000
+/* 2 unused bits.  */
+#define __SPEFSCR_FINXS		0x00200000
+#define __SPEFSCR_FINVS		0x00100000
+#define __SPEFSCR_FDBZS		0x00080000
+#define __SPEFSCR_FUNFS		0x00040000
+#define __SPEFSCR_FOVFS		0x00020000
+#define __SPEFSCR_MODE		0x00010000
+#define __SPEFSCR_SOV		0x00008000
+#define __SPEFSCR_OV		0x00004000
+#define __SPEFSCR_FG		0x00002000
+#define __SPEFSCR_FX		0x00001000
+#define __SPEFSCR_FINV		0x00000800
+#define __SPEFSCR_FDBZ		0x00000400
+#define __SPEFSCR_FUNF		0x00000200
+#define __SPEFSCR_FOVF		0x00000100
+/* 1 unused bit.  */
+#define __SPEFSCR_FINXE		0x00000040
+#define __SPEFSCR_FINVE		0x00000020
+#define __SPEFSCR_FDBZE		0x00000010
+#define __SPEFSCR_FUNFE		0x00000008
+#define __SPEFSCR_FOVFE		0x00000004
+#define __SPEFSCR_FRMC		0x00000003
+
+#define __ev_get_spefscr_sovh() (__builtin_spe_mfspefscr () & __SPEFSCR_SOVH)
+#define __ev_get_spefscr_ovh() (__builtin_spe_mfspefscr () & __SPEFSCR_OVH)
+#define __ev_get_spefscr_fgh() (__builtin_spe_mfspefscr () & __SPEFSCR_FGH)
+#define __ev_get_spefscr_fxh() (__builtin_spe_mfspefscr () & __SPEFSCR_FXH)
+#define __ev_get_spefscr_finvh() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVH)
+#define __ev_get_spefscr_fdbzh() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZH)
+#define __ev_get_spefscr_funfh() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFH)
+#define __ev_get_spefscr_fovfh() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFH)
+#define __ev_get_spefscr_finxs() (__builtin_spe_mfspefscr () & __SPEFSCR_FINXS)
+#define __ev_get_spefscr_finvs() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVS)
+#define __ev_get_spefscr_fdbzs() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZS)
+#define __ev_get_spefscr_funfs() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFS)
+#define __ev_get_spefscr_fovfs() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFS)
+#define __ev_get_spefscr_mode() (__builtin_spe_mfspefscr () & __SPEFSCR_MODE)
+#define __ev_get_spefscr_sov() (__builtin_spe_mfspefscr () & __SPEFSCR_SOV)
+#define __ev_get_spefscr_ov() (__builtin_spe_mfspefscr () & __SPEFSCR_OV)
+#define __ev_get_spefscr_fg() (__builtin_spe_mfspefscr () & __SPEFSCR_FG)
+#define __ev_get_spefscr_fx() (__builtin_spe_mfspefscr () & __SPEFSCR_FX)
+#define __ev_get_spefscr_finv() (__builtin_spe_mfspefscr () & __SPEFSCR_FINV)
+#define __ev_get_spefscr_fdbz() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZ)
+#define __ev_get_spefscr_funf() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNF)
+#define __ev_get_spefscr_fovf() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVF)
+#define __ev_get_spefscr_finxe() (__builtin_spe_mfspefscr () & __SPEFSCR_FINXE)
+#define __ev_get_spefscr_finve() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVE)
+#define __ev_get_spefscr_fdbze() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZE)
+#define __ev_get_spefscr_funfe() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFE)
+#define __ev_get_spefscr_fovfe() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFE)
+#define __ev_get_spefscr_frmc() (__builtin_spe_mfspefscr () & __SPEFSCR_FRMC)
+
+static inline void
+__ev_clr_spefscr_field (int mask)
+{
+  int i;
+
+  i = __builtin_spe_mfspefscr ();
+  i &= ~mask;
+  __builtin_spe_mtspefscr (i);
+}
+
+#define __ev_clr_spefscr_sovh() __ev_clr_spefscr_field (__SPEFSCR_SOVH)
+#define __ev_clr_spefscr_sov() __ev_clr_spefscr_field (__SPEFSCR_SOV)
+#define __ev_clr_spefscr_finxs() __ev_clr_spefscr_field (__SPEFSCR_FINXS)
+#define __ev_clr_spefscr_finvs() __ev_clr_spefscr_field (__SPEFSCR_FINVS)
+#define __ev_clr_spefscr_fdbzs() __ev_clr_spefscr_field (__SPEFSCR_FDBZS)
+#define __ev_clr_spefscr_funfs() __ev_clr_spefscr_field (__SPEFSCR_FUNFS)
+#define __ev_clr_spefscr_fovfs() __ev_clr_spefscr_field (__SPEFSCR_FOVFS)
+
+/* Set rounding mode:
+     rnd = 0 (nearest)
+     rnd = 1 (zero)
+     rnd = 2 (+inf)
+     rnd = 3 (-inf).  */
+
+static inline void
+__ev_set_spefscr_frmc (int rnd)
+{
+  int i;
+
+  i = __builtin_spe_mfspefscr ();
+  i &= ~__SPEFSCR_FRMC;
+  i |= rnd;
+  __builtin_spe_mtspefscr (i);
+}
+
+/* The SPE PIM says these are declared in <spe.h>, although they are
+   not provided by GCC: they must be taken from a separate
+   library.  */
+extern short int atosfix16 (const char *);
+extern int atosfix32 (const char *);
+extern long long atosfix64 (const char *);
+
+extern unsigned short atoufix16 (const char *);
+extern unsigned int atoufix32 (const char *);
+extern unsigned long long atoufix64 (const char *);
+
+extern short int strtosfix16 (const char *, char **);
+extern int strtosfix32 (const char *, char **);
+extern long long strtosfix64 (const char *, char **);
+
+extern unsigned short int strtoufix16 (const char *, char **);
+extern unsigned int strtoufix32 (const char *, char **);
+extern unsigned long long strtoufix64 (const char *, char **);
+
+#endif /* _SPE_H */
diff --git a/gcc-4.9/gcc/config/rs6000/spe.md b/gcc-4.9/gcc/config/rs6000/spe.md
new file mode 100644
index 000000000..ad7eaf0c3
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/spe.md
@@ -0,0 +1,3223 @@
+;; e500 SPE description
+;; Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(CMPDFEQ_GPR		1006)
+   (TSTDFEQ_GPR		1007)
+   (CMPDFGT_GPR		1008)
+   (TSTDFGT_GPR		1009)
+   (CMPDFLT_GPR		1010)
+   (TSTDFLT_GPR		1011)
+   (CMPTFEQ_GPR		1012)
+   (TSTTFEQ_GPR		1013)
+   (CMPTFGT_GPR		1014)
+   (TSTTFGT_GPR		1015)
+   (CMPTFLT_GPR		1016)
+   (TSTTFLT_GPR		1017)
+   (E500_CR_IOR_COMPARE 1018)
+   ])
+
+;; Modes using a 64-bit register.
+(define_mode_iterator SPE64 [DF V4HI V2SF V1DI V2SI])
+
+;; Likewise, but allow TFmode (two registers) as well.
+(define_mode_iterator SPE64TF [DF V4HI V2SF V1DI V2SI TF])
+
+;; DImode and TImode.
+(define_mode_iterator DITI [DI TI])
+
+(define_insn "*negsf2_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (neg:SF (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsneg %0,%1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "*abssf2_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsabs %0,%1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "*nabssf2_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "r"))))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsnabs %0,%1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "*addsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "%r")
+		 (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*subsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "r")
+		  (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efssub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*mulsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%r")
+                 (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsmul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*divsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (div:SF (match_operand:SF 1 "gpc_reg_operand" "r")
+                (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsdiv %0,%1,%2"
+  [(set_attr "type" "vecfdiv")])
+
+;; Floating point conversion instructions.
+
+(define_insn "spe_fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unsigned_fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdctuiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_extendsfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(float_extend:DF (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdcfs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unsigned_fix:SI (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsctuiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_fix_truncsfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(fix:SI (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsctsiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_fix_truncdfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdctsiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatunssisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (unsigned_float:SF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efscfui %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatunssidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+        (unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdcfui %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatsisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (float:SF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efscfsi %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatsidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdcfsi %0,%1"
+  [(set_attr "type" "fp")])
+
+;; SPE SIMD instructions
+
+(define_insn "absv2si2"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(abs:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evabs %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evandc"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (not:V2SI (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evandc %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "andv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evand %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+;; Vector compare instructions
+
+(define_insn "spe_evcmpeq"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+	(unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 500))]
+  "TARGET_SPE"
+  "evcmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmpgts"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 501))]
+  "TARGET_SPE"
+  "evcmpgts %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmpgtu"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 502))]
+  "TARGET_SPE"
+  "evcmpgtu %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmplts"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 503))]
+  "TARGET_SPE"
+  "evcmplts %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmpltu"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 504))]
+  "TARGET_SPE"
+  "evcmpltu %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+;; Floating point vector compare instructions
+
+(define_insn "spe_evfscmpeq"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 538))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfscmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscmpgt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 539))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfscmpgt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscmplt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 540))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfscmplt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfststeq"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 541))]
+  "TARGET_SPE"
+  "evfststeq %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfststgt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 542))]
+  "TARGET_SPE"
+  "evfststgt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfststlt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 543))]
+  "TARGET_SPE"
+  "evfststlt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+;; End of vector compare instructions
+
+(define_insn "spe_evcntlsw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 505))]
+  "TARGET_SPE"
+  "evcntlsw %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcntlzw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 506))]
+  "TARGET_SPE"
+  "evcntlzw %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_eveqv"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (not:V2SI (xor:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+			    (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "eveqv %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evextsb"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 507))]
+  "TARGET_SPE"
+  "evextsb %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evextsh"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 508))]
+  "TARGET_SPE"
+  "evextsh %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhesplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand"  "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand"   "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 509)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlhhesplat %0,%2*2(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhesplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 510)]
+  "TARGET_SPE"
+  "evlhhesplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhossplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 511)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlhhossplat %0,%2*2(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhossplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 512)]
+  "TARGET_SPE"
+  "evlhhossplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhousplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 513)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlhhousplat %0,%2*2(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhousplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 514)]
+  "TARGET_SPE"
+  "evlhhousplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhsplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 515)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhsplat %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhsplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 516)]
+  "TARGET_SPE"
+  "evlwhsplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwwsplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 517)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwwsplat %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwwsplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 518)]
+  "TARGET_SPE"
+  "evlwwsplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergehi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gpc_reg_operand" "r")
+	    (match_operand:V2SI 2 "gpc_reg_operand" "r"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_SPE"
+  "evmergehi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergehilo"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gpc_reg_operand" "r")
+	    (match_operand:V2SI 2 "gpc_reg_operand" "r"))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  "TARGET_SPE"
+  "evmergehilo %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergelo"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gpc_reg_operand" "r")
+	    (match_operand:V2SI 2 "gpc_reg_operand" "r"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_SPE"
+  "evmergelo %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergelohi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gpc_reg_operand" "r")
+	    (match_operand:V2SI 2 "gpc_reg_operand" "r"))
+	  (parallel [(const_int 1) (const_int 2)])))]
+  "TARGET_SPE"
+  "evmergelohi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_expand "vec_perm_constv2si"
+  [(match_operand:V2SI 0 "gpc_reg_operand" "")
+   (match_operand:V2SI 1 "gpc_reg_operand" "")
+   (match_operand:V2SI 2 "gpc_reg_operand" "")
+   (match_operand:V2SI 3 "" "")]
+  "TARGET_SPE"
+{
+  if (rs6000_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "spe_evnand"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (not:V2SI (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+                            (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evnand %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "negv2si2"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (neg:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evneg %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evnor"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (not:V2SI  (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+                             (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evnor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evorc"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (not:V2SI (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evorc %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evor"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evrlwi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")] 519))]
+  "TARGET_SPE"
+  "evrlwi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evrlw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 520))]
+  "TARGET_SPE"
+  "evrlw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evrndw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 521))]
+  "TARGET_SPE"
+  "evrndw %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsel"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (match_operand:CC 3 "cc_reg_operand" "y")] 522))]
+  "TARGET_SPE"
+  "evsel %0,%1,%2,%3"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "4")])
+
+(define_insn "spe_evsel_fs"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+	(unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SF 2 "gpc_reg_operand" "r")
+		      (match_operand:CC 3 "cc_reg_operand" "y")] 725))]
+  "TARGET_SPE"
+  "evsel %0,%1,%2,%3"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "4")])
+
+(define_insn "spe_evslwi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")]
+		     523))]
+  "TARGET_SPE"
+  "evslwi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evslw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 524))]
+  "TARGET_SPE"
+  "evslw %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrwis"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")]
+		     525))]
+  "TARGET_SPE"
+  "evsrwis %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrwiu"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")]
+		     526))]
+  "TARGET_SPE"
+  "evsrwiu %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrws"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 527))]
+  "TARGET_SPE"
+  "evsrws %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrwu"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 528))]
+  "TARGET_SPE"
+  "evsrwu %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+;; vector xors
+
+(define_insn "xorv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (xor:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "xorv4hi3"
+  [(set (match_operand:V4HI 0 "gpc_reg_operand" "=r")
+        (xor:V4HI (match_operand:V4HI 1 "gpc_reg_operand" "r")
+		  (match_operand:V4HI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "xorv1di3"
+  [(set (match_operand:V1DI 0 "gpc_reg_operand" "=r")
+        (xor:V1DI (match_operand:V1DI 1 "gpc_reg_operand" "r")
+		  (match_operand:V1DI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+;; end of vector xors
+
+(define_insn "spe_evfsabs"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evfsabs %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsadd"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfsadd %0,%1,%2"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfsf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 529))]
+  "TARGET_SPE"
+  "evfscfsf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfsi"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (float:V2SF (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evfscfsi %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfuf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 530))]
+  "TARGET_SPE"
+  "evfscfuf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfui"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+	(unspec:V2SF [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 701))]
+  "TARGET_SPE"
+  "evfscfui %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctsf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 531))]
+  "TARGET_SPE"
+  "evfsctsf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctsi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 532))]
+  "TARGET_SPE"
+  "evfsctsi %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctsiz"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 533))]
+  "TARGET_SPE"
+  "evfsctsiz %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctuf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 534))]
+  "TARGET_SPE"
+  "evfsctuf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctui"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 535))]
+  "TARGET_SPE"
+  "evfsctui %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctuiz"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 536))]
+  "TARGET_SPE"
+  "evfsctuiz %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsdiv"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfsdiv %0,%1,%2"
+  [(set_attr "type" "vecfdiv")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsmul"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfsmul %0,%1,%2"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsnabs"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+	(unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 537))]
+  "TARGET_SPE"
+  "evfsnabs %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsneg"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evfsneg %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfssub"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfssub %0,%1,%2"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+;; SPE SIMD load instructions.
+
+;; Only the hardware engineer who designed the SPE understands the
+;; plethora of load and store instructions ;-).  We have no way of
+;; differentiating between them with RTL so use an unspec of const_int 0 
+;; to avoid identical RTL.
+
+(define_insn "spe_evldd"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 544)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evldd %0,%2*8(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlddx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 545)]
+  "TARGET_SPE"
+  "evlddx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldh"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 546)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evldh %0,%2*8(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldhx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 547)]
+  "TARGET_SPE"
+  "evldhx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 548)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evldw %0,%2*8(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldwx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 549)]
+  "TARGET_SPE"
+  "evldwx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhe"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 550)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhe %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhex"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 551)]
+  "TARGET_SPE"
+  "evlwhex %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhos"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 552)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhos %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhosx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 553)]
+  "TARGET_SPE"
+  "evlwhosx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhou"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 554)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhou %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhoux"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 555)]
+  "TARGET_SPE"
+  "evlwhoux %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_brinc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "gpc_reg_operand" "r")
+		    (match_operand:SI 2 "gpc_reg_operand" "r")] 556))]
+  "TARGET_SPE"
+  "brinc %0,%1,%2"
+  [(set_attr "type" "brinc")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 557))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 558))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 559))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 560))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 561))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 562))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 563))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 564))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 565))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 566))]
+  "TARGET_SPE"
+  "evmhesmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 567))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 568))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 569))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 570))]
+  "TARGET_SPE"
+  "evmhesmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 571))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 572))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 573))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 574))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmhessf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 575))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 576))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 577))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheumiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 578))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheumianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 579))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 580))]
+  "TARGET_SPE"
+  "evmheumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 581))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheusiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheusianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 582))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheusianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 583))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 584))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 585))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 586))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 587))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 588))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 589))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 590))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 591))]
+  "TARGET_SPE"
+  "evmhosmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 592))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 593))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 594))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 595))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 596))]
+  "TARGET_SPE"
+  "evmhosmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 597))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 598))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 599))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 600))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmhossf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 601))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 602))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 603))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhoumiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 604))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhoumianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 605))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhoumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 606))]
+  "TARGET_SPE"
+  "evmhoumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhousiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 607))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhousiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhousianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 608))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhousianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmmlssfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 609))]
+  "TARGET_SPE"
+  "evmmlssfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmmlssf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 610))]
+  "TARGET_SPE"
+  "evmmlssf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 611))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 612))]
+  "TARGET_SPE"
+  "evmwhsmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 613))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 614))]
+  "TARGET_SPE"
+  "evmwhsmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 615))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhusian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 626))]
+  "TARGET_SPE"
+  "evmwhusian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 628))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmwhssf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 629))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 630))]
+  "TARGET_SPE"
+  "evmwhumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlsmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 635))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlsmiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlsmianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 636))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlsmianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlssiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 641))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlssiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlssianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 642))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlssianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 643))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlumiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 644))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlumianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 645))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 646))]
+  "TARGET_SPE"
+  "evmwlumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 647))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlusiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlusianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 648))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlusianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 649))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 650))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 651))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 652))]
+  "TARGET_SPE"
+  "evmwsmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 653))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 654))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 655))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 656))]
+  "TARGET_SPE"
+  "evmwsmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 657))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwssfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 658))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwssfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 659))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwssfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 660))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmwssf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 661))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 662))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 663))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 664))]
+  "TARGET_SPE"
+  "evmwumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (plus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evaddw %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 673))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddusiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 674))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddumiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddssiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 675))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddssiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddsmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 676))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddsmiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddiw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")] 677))]
+  "TARGET_SPE"
+  "evaddiw %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubifw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")] 678))]
+  "TARGET_SPE"
+  "evsubifw %0,%2,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (minus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evsubfw %0,%2,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 679))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfusiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 680))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfumiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfssiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 681))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfssiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfsmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 682))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfsmiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmra"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (match_operand:V2SI 1 "gpc_reg_operand" "r"))
+   (set (reg:V2SI SPE_ACC_REGNO)
+	(unspec:V2SI [(match_dup 1)] 726))]
+  "TARGET_SPE"
+  "evmra %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "divv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (div:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evdivws %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evdivwu"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (udiv:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SI 2 "gpc_reg_operand" "r")))
+      (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evdivwu %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsplatfi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:QI 1 "immediate_operand" "i")] 684))]
+  "TARGET_SPE"
+  "evsplatfi %0,%1"
+  [(set_attr "type" "vecperm")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsplati"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:QI 1 "immediate_operand" "i")] 685))]
+  "TARGET_SPE"
+  "evsplati %0,%1"
+  [(set_attr "type" "vecperm")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdd"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 686)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstdd %2,%1*8(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstddx"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 687)]
+  "TARGET_SPE"
+  "evstddx %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdh"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 688)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstdh %2,%1*8(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdhx"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 689)]
+  "TARGET_SPE"
+  "evstdhx %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdw"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 690)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstdw %2,%1*8(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdwx"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 691)]
+  "TARGET_SPE"
+  "evstdwx %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwhe"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 692)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwhe %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwhex"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 693)]
+  "TARGET_SPE"
+  "evstwhex %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwho"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 694)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwho %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwhox"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 695)]
+  "TARGET_SPE"
+  "evstwhox %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwe"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 696)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwwe %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwex"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 697)]
+  "TARGET_SPE"
+  "evstwwex %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwo"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 698)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwwo %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwox"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 699)]
+  "TARGET_SPE"
+  "evstwwox %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+;; Double-precision floating point instructions.
+
+;; FIXME: Add o=r option.
+(define_insn "*frob_<SPE64:mode>_<DITI:mode>"
+  [(set (match_operand:SPE64 0 "nonimmediate_operand" "=r,r")
+        (subreg:SPE64 (match_operand:DITI 1 "input_operand" "r,m") 0))]
+  "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode)
+   || (TARGET_SPE && <SPE64:MODE>mode != DFmode)"
+  "@
+   evmergelo %0,%1,%L1
+   evldd%X1 %0,%y1")
+
+(define_insn "*frob_tf_ti"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=r")
+        (subreg:TF (match_operand:TI 1 "gpc_reg_operand" "r") 0))]
+  "TARGET_E500_DOUBLE"
+  "evmergelo %0,%1,%L1\;evmergelo %L0,%Y1,%Z1"
+  [(set_attr "length" "8")])
+
+(define_insn "*frob_<mode>_di_2"
+  [(set (subreg:DI (match_operand:SPE64TF 0 "nonimmediate_operand" "+&r,r") 0)
+        (match_operand:DI 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   evmergelo %0,%1,%L1
+   evldd%X1 %0,%y1")
+
+(define_insn "*frob_tf_di_8_2"
+  [(set (subreg:DI (match_operand:TF 0 "nonimmediate_operand" "+&r,r") 8)
+        (match_operand:DI 1 "input_operand" "r,m"))]
+  "TARGET_E500_DOUBLE"
+  "@
+   evmergelo %L0,%1,%L1
+   evldd%X1 %L0,%y1")
+
+(define_insn "*frob_di_<mode>"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r")
+        (subreg:DI (match_operand:SPE64TF 1 "input_operand" "r") 0))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "evmergehi %0,%1,%1\;mr %L0,%1"
+  [(set_attr "length" "8")])
+
+(define_insn "*frob_ti_tf"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=&r")
+        (subreg:TI (match_operand:TF 1 "input_operand" "r") 0))]
+  "TARGET_E500_DOUBLE"
+  "evmergehi %0,%1,%1\;mr %L0,%1\;evmergehi %Y0,%L1,%L1\;mr %Z0,%L1"
+  [(set_attr "length" "16")])
+
+(define_insn "*frob_<DITI:mode>_<SPE64:mode>_2"
+  [(set (subreg:SPE64 (match_operand:DITI 0 "register_operand" "+&r,r") 0)
+	(match_operand:SPE64 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode)
+   || (TARGET_SPE && <SPE64:MODE>mode != DFmode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    default: 
+      gcc_unreachable ();
+    case 0:
+      return \"evmergehi %0,%1,%1\;mr %L0,%1\";
+    case 1:
+      /* If the address is not offsettable we need to load the whole
+	 doubleword into a 64-bit register and then copy the high word
+	 to form the correct output layout.  */
+      if (!offsettable_nonstrict_memref_p (operands[1]))
+	return \"evldd%X1 %L0,%y1\;evmergehi %0,%L0,%L0\";
+      /* If the low-address word is used in the address, we must load
+	it last.  Otherwise, load it first.  Note that we cannot have
+	auto-increment in that case since the address register is
+	known to be dead.  */
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands[1], 0))
+	return \"lwz %L0,%L1\;lwz %0,%1\";
+      else
+        return \"lwz%U1%X1 %0,%1\;lwz %L0,%L1\";
+    }
+}"
+  [(set_attr "length" "8,8")])
+
+; As the above, but TImode at offset 8.
+(define_insn "*frob_ti_<mode>_8_2"
+  [(set (subreg:SPE64 (match_operand:TI 0 "register_operand" "+&r,r") 8)
+	(match_operand:SPE64 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && <MODE>mode == DFmode)
+   || (TARGET_SPE && <MODE>mode != DFmode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    default: 
+      gcc_unreachable ();
+    case 0:
+      return \"evmergehi %Y0,%1,%1\;mr %Z0,%1\";
+    case 1:
+      if (!offsettable_nonstrict_memref_p (operands[1]))
+	return \"evldd%X1 %Z0,%y1\;evmergehi %Y0,%Z0,%Z0\";
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands[1], 0))
+	return \"lwz %Z0,%L1\;lwz %Y0,%1\";
+      else
+        return \"lwz%U1%X1 %Y0,%1\;lwz %Z0,%L1\";
+    }
+}"
+  [(set_attr "length" "8,8")])
+
+(define_insn "*frob_ti_tf_2"
+  [(set (subreg:TF (match_operand:TI 0 "gpc_reg_operand" "=&r") 0)
+	(match_operand:TF 1 "input_operand" "r"))]
+  "TARGET_E500_DOUBLE"
+  "evmergehi %0,%1,%1\;mr %L0,%1\;evmergehi %Y0,%L1,%L1\;mr %Z0,%L1"
+  [(set_attr "length" "16")])
+
+(define_insn "mov_si<mode>_e500_subreg0"
+  [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,&r") 0)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   evmergelo %0,%1,%0
+   evmergelohi %0,%0,%0\;lwz%U1%X1 %0,%1\;evmergelohi %0,%0,%0"
+  [(set_attr "length" "4,12")])
+
+(define_insn_and_split "*mov_si<mode>_e500_subreg0_elf_low"
+  [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r") 0)
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		   (match_operand 2 "" "")))]
+  "((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+    || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))
+   && TARGET_ELF && !TARGET_64BIT && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_elf_low (tmp, operands[1], operands[2]));
+  emit_insn (gen_mov_si<mode>_e500_subreg0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+;; ??? Could use evstwwe for memory stores in some cases, depending on
+;; the offset.
+(define_insn "*mov_si<mode>_e500_subreg0_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:SPE64TF 1 "register_operand" "+r,&r") 0))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   evmergehi %0,%0,%1
+   evmergelohi %1,%1,%1\;stw%U0%X0 %1,%0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*mov_si<mode>_e500_subreg4"
+  [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,r") 4)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   mr %0,%1
+   lwz%U1%X1 %0,%1")
+
+(define_insn "*mov_si<mode>_e500_subreg4_elf_low"
+  [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r") 4)
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		   (match_operand 2 "" "")))]
+  "((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+    || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))
+   && TARGET_ELF && !TARGET_64BIT"
+  "addic %0,%1,%K2")
+
+(define_insn "*mov_si<mode>_e500_subreg4_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:SPE64TF 1 "register_operand" "r,r") 4))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   mr %0,%1
+   stw%U0%X0 %1,%0")
+
+(define_insn "*mov_sitf_e500_subreg8"
+  [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,&r") 8)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "TARGET_E500_DOUBLE"
+  "@
+   evmergelo %L0,%1,%L0
+   evmergelohi %L0,%L0,%L0\;lwz%U1%X1 %L0,%1\;evmergelohi %L0,%L0,%L0"
+  [(set_attr "length" "4,12")])
+
+(define_insn "*mov_sitf_e500_subreg8_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:TF 1 "register_operand" "+r,&r") 8))]
+  "TARGET_E500_DOUBLE"
+  "@
+   evmergehi %0,%0,%L1
+   evmergelohi %L1,%L1,%L1\;stw%U0%X0 %L1,%0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*mov_sitf_e500_subreg12"
+  [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,r") 12)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "TARGET_E500_DOUBLE"
+  "@
+   mr %L0,%1
+   lwz%U1%X1 %L0,%1")
+
+(define_insn "*mov_sitf_e500_subreg12_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:TF 1 "register_operand" "r,r") 12))]
+  "TARGET_E500_DOUBLE"
+  "@
+   mr %0,%L1
+   stw%U0%X0 %L1,%0")
+
+;; FIXME: Allow r=CONST0.
+(define_insn "*movdf_e500_double"
+  [(set (match_operand:DF 0 "rs6000_nonimmediate_operand" "=r,r,m")
+	(match_operand:DF 1 "input_operand" "r,m,r"))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+    && (gpc_reg_operand (operands[0], DFmode)
+        || gpc_reg_operand (operands[1], DFmode))"
+  "*
+ {
+   switch (which_alternative)
+     {
+     case 0:
+       return \"evor %0,%1,%1\";
+     case 1:
+       return \"evldd%X1 %0,%y1\";
+     case 2:
+       return \"evstdd%X0 %1,%y0\";
+     default:
+       gcc_unreachable ();
+     }
+ }"
+  [(set_attr "type" "*,vecload,vecstore")
+   (set_attr "length" "*,*,*")])
+
+(define_insn "spe_truncdfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efscfd %0,%1")
+
+(define_insn "spe_absdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdabs %0,%1")
+
+(define_insn "spe_nabsdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "r"))))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdnabs %0,%1")
+
+(define_insn "spe_negdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdneg %0,%1")
+
+(define_insn "spe_adddf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		 (match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdadd %0,%1,%2")
+
+(define_insn "spe_subdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		  (match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdsub %0,%1,%2")
+
+(define_insn "spe_muldf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		 (match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdmul %0,%1,%2")
+
+(define_insn "spe_divdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(div:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		(match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efddiv %0,%1,%2")
+
+;; Double-precision floating point instructions for IBM long double.
+
+(define_insn_and_split "spe_trunctfdf2_internal1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r,?r")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "0,r")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "@
+   #
+   evor %0,%1,%1"
+  "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+(define_insn_and_split "spe_trunctfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:DF 2 "=r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_dup 1)))
+   (set (match_dup 0)
+	(float_truncate:SF (match_dup 2)))]
+  "")
+
+(define_insn "spe_extenddftf2"
+  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=r,?r,r,o")
+	(float_extend:TF (match_operand:DF 1 "input_operand" "0,r,m,r")))
+   (clobber (match_scratch:DF 2 "=X,X,X,&r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "@
+   evxor %L0,%L0,%L0
+   evor %0,%1,%1\;evxor %L0,%L0,%L0
+   evldd%X1 %0,%y1\;evxor %L0,%L0,%L0
+   evstdd%X0 %1,%y0\;evxor %2,%2,%2\;evstdd %2,%Y0"
+  [(set_attr "length" "4,8,8,12")])
+
+(define_expand "spe_fix_trunctfsi2"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))
+	      (clobber (match_dup 2))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_reg_rtx (SImode);
+})
+
+; Like fix_trunc_helper, add with rounding towards 0.
+(define_insn "spe_fix_trunctfsi2_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (fix:SI (match_operand:TF 1 "gpc_reg_operand" "r")))
+   (clobber (match_operand:DF 2 "gpc_reg_operand" "=r"))
+   (clobber (match_operand:SI 3 "gpc_reg_operand" "=&r"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "mfspefscr %3\;rlwinm %4,%3,0,0,29\;ori %4,%4,1\;efdadd %2,%1,%L1\;mtspefscr %3\;efdctsiz %0, %2"
+  [(set_attr "length" "24")])
+
+(define_insn "spe_negtf2_internal"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=r")
+	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "r")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "*
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    return \"efdneg %L0,%L1\;efdneg %0,%1\";
+  else
+    return \"efdneg %0,%1\;efdneg %L0,%L1\";
+}"
+  [(set_attr "length" "8")])
+
+(define_expand "spe_abstf2_cmp"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=f")
+	(match_operand:TF 1 "gpc_reg_operand" "f"))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 5) (abs:DF (match_dup 5)))
+   (set (match_dup 4) (unspec:CCFP [(compare:CCFP (match_dup 3)
+                                                  (match_dup 5))] CMPDFEQ_GPR))
+   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))
+   (set (match_dup 6) (neg:DF (match_dup 6)))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "
+{
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (CCFPmode);
+  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
+}")
+
+(define_expand "spe_abstf2_tst"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=f")
+	(match_operand:TF 1 "gpc_reg_operand" "f"))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 5) (abs:DF (match_dup 5)))
+   (set (match_dup 4) (unspec:CCFP [(compare:CCFP (match_dup 3)
+                                                  (match_dup 5))] TSTDFEQ_GPR))
+   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))
+   (set (match_dup 6) (neg:DF (match_dup 6)))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "
+{
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (CCFPmode);
+  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
+}")
+
+;; Vector move instructions.
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+	(match_operand:V2SI 1 "any_operand" ""))]
+  "TARGET_SPE"
+  "{ rs6000_emit_move (operands[0], operands[1], V2SImode); DONE; }")
+
+(define_insn "*movv2si_internal"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V2SI 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V2SImode)
+       || gpc_reg_operand (operands[1], V2SImode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0: return \"evstdd%X0 %1,%y0\";
+    case 1: return \"evldd%X1 %0,%y1\";
+    case 2: return \"evor %0,%1,%1\";
+    case 3: return output_vec_const_move (operands);
+    default: gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "vecload,vecstore,*,*")
+   (set_attr "length" "*,*,*,12")])
+
+(define_split
+  [(set (match_operand:V2SI 0 "register_operand" "")
+	(match_operand:V2SI 1 "zero_constant" ""))]
+  "TARGET_SPE && reload_completed"
+  [(set (match_dup 0)
+	(xor:V2SI (match_dup 0) (match_dup 0)))]
+  "")
+
+(define_expand "movv1di"
+  [(set (match_operand:V1DI 0 "nonimmediate_operand" "")
+	(match_operand:V1DI 1 "any_operand" ""))]
+  "TARGET_SPE"
+  "{ rs6000_emit_move (operands[0], operands[1], V1DImode); DONE; }")
+
+(define_insn "*movv1di_internal"
+  [(set (match_operand:V1DI 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V1DI 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V1DImode)
+       || gpc_reg_operand (operands[1], V1DImode))"
+  "@
+   evstdd%X0 %1,%y0
+   evldd%X1 %0,%y1
+   evor %0,%1,%1
+   evxor %0,%0,%0"
+  [(set_attr "type" "vecload,vecstore,*,*")
+   (set_attr "length" "*,*,*,*")])
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+	(match_operand:V4HI 1 "any_operand" ""))]
+  "TARGET_SPE"
+  "{ rs6000_emit_move (operands[0], operands[1], V4HImode); DONE; }")
+
+(define_insn "*movv4hi_internal"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V4HI 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V4HImode)
+       || gpc_reg_operand (operands[1], V4HImode))"
+  "@
+   evstdd%X0 %1,%y0
+   evldd%X1 %0,%y1
+   evor %0,%1,%1
+   evxor %0,%0,%0"
+  [(set_attr "type" "vecload")])
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+	(match_operand:V2SF 1 "any_operand" ""))]
+  "TARGET_SPE || TARGET_PAIRED_FLOAT"
+  "{ rs6000_emit_move (operands[0], operands[1], V2SFmode); DONE; }")
+
+(define_insn "*movv2sf_internal"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V2SF 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V2SFmode)
+       || gpc_reg_operand (operands[1], V2SFmode))"
+  "@
+   evstdd%X0 %1,%y0
+   evldd%X1 %0,%y1
+   evor %0,%1,%1
+   evxor %0,%0,%0"
+  [(set_attr "type" "vecload,vecstore,*,*")
+   (set_attr "length" "*,*,*,*")])
+
+;; End of vector move instructions.
+
+(define_insn "spe_evmwhssfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 702))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssmaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 703))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssmaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 704))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 705))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhusiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 706))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhusiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 707))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 708))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 709))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 710))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 711))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 713))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgssfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 714))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgssfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 715))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 716))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 717))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgssfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 718))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgssfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 719))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 720))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 721))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_mtspefscr"
+  [(set (reg:SI SPEFSCR_REGNO)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+			    722))]
+  "TARGET_SPE"
+  "mtspefscr %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "spe_mfspefscr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(reg:SI SPEFSCR_REGNO)] 723))]
+  "TARGET_SPE"
+  "mfspefscr %0"
+  [(set_attr "type" "vecsimple")])
+
+;; Flip the GT bit.
+(define_insn "e500_flip_gt_bit"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(match_operand:CCFP 1 "cc_reg_operand" "y")] 999))]
+  "!TARGET_FPRS && TARGET_HARD_FLOAT"
+  "*
+{
+  return output_e500_flip_gt_bit (operands[0], operands[1]);
+}"
+  [(set_attr "type" "cr_logical")])
+
+;; MPC8540 single-precision FP instructions on GPRs.
+;; We have 2 variants for each.  One for IEEE compliant math and one
+;; for non IEEE compliant math.
+
+(define_insn "cmpsfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1000))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efscmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstsfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1001))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && flag_finite_math_only && !flag_trapping_math"
+  "efststeq %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpsfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1002))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efscmpgt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstsfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1003))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && flag_finite_math_only && !flag_trapping_math"
+  "efststgt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpsflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1004))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efscmplt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstsflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1005))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && flag_finite_math_only && !flag_trapping_math"
+  "efststlt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+;; Same thing, but for double-precision.
+
+(define_insn "cmpdfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 CMPDFEQ_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstdfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 TSTDFEQ_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtsteq %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpdfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 CMPDFGT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpgt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstdfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 TSTDFGT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstgt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpdflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 CMPDFLT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmplt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstdflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 TSTDFLT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstlt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+;; Same thing, but for IBM long double.
+
+(define_insn "cmptfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 CMPTFEQ_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmpeq %0,%L1,%L2"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "12")])
+
+(define_insn "tsttfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 TSTTFEQ_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtsteq %0,%1,%2\;bng %0,$+8\;efdtsteq %0,%L1,%L2"
+  [(set_attr "type" "veccmpsimple")
+   (set_attr "length" "12")])
+
+(define_insn "cmptfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 CMPTFGT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpgt %0,%1,%2\;bgt %0,$+16\;efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmpgt %0,%L1,%L2"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "20")])
+
+(define_insn "tsttfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 TSTTFGT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstgt %0,%1,%2\;bgt %0,$+16\;efdtsteq %0,%1,%2\;bng %0,$+8\;efdtstgt %0,%L1,%L2"
+  [(set_attr "type" "veccmpsimple")
+   (set_attr "length" "20")])
+
+(define_insn "cmptflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 CMPTFLT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmplt %0,%1,%2\;bgt %0,$+16\;efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmplt %0,%L1,%L2"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "20")])
+
+(define_insn "tsttflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 TSTTFLT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstlt %0,%1,%2\;bgt %0,$+16\;efdtsteq %0,%1,%2\;bng %0,$+8\;efdtstlt %0,%L1,%L2"
+  [(set_attr "type" "veccmpsimple")
+   (set_attr "length" "20")])
+
+;; Like cceq_ior_compare, but compare the GT bits.
+(define_insn "e500_cr_ior_compare"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP [(match_operand 1 "cc_reg_operand" "y")
+		      (match_operand 2 "cc_reg_operand" "y")]
+		     E500_CR_IOR_COMPARE))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "cror 4*%0+gt,4*%1+gt,4*%2+gt"
+  [(set_attr "type" "cr_logical")])
+
+;; Out-of-line prologues and epilogues.
+(define_insn "*save_gpregs_spe"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+		    (use (reg:P 11))
+		    (set (match_operand:V2SI 2 "memory_operand" "=m")
+			 (match_operand:V2SI 3 "gpc_reg_operand" "r"))])]
+  "TARGET_SPE_ABI"
+  "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*restore_gpregs_spe"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(clobber (reg:P 65))
+		   (use (match_operand:P 1 "symbol_ref_operand" "s"))
+		   (use (reg:P 11))
+		   (set (match_operand:V2SI 2 "gpc_reg_operand" "=r")
+			(match_operand:V2SI 3 "memory_operand" "m"))])]
+ "TARGET_SPE_ABI"
+ "bl %z1"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_spe"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(return)
+		   (clobber (reg:P 65))
+		   (use (match_operand:P 1 "symbol_ref_operand" "s"))
+		   (use (reg:P 11))
+		   (set (match_operand:V2SI 2 "gpc_reg_operand" "=r")
+			(match_operand:V2SI 3 "memory_operand" "m"))])]
+ "TARGET_SPE_ABI"
+ "b %z1"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
diff --git a/gcc-4.9/gcc/config/rs6000/spu2vmx.h b/gcc-4.9/gcc/config/rs6000/spu2vmx.h
new file mode 100644
index 000000000..1e63bf749
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/spu2vmx.h
@@ -0,0 +1,2415 @@
+/* Cell SPU 2 VMX intrinsics header
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU2VMX_H_
+#define _SPU2VMX_H_	1
+
+#ifdef __cplusplus
+
+#ifndef __SPU__
+
+#include <si2vmx.h>
+
+/* spu_absd (absolute difference)
+ * ========
+ */
+static __inline vec_uchar16 spu_absd(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_absdb((qword)(a), (qword)(b))));
+
+}
+
+
+/* spu_add
+ * =======
+ */
+static __inline vec_uint4 spu_add(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_a((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_add(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_a((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_add(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_ah((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_add(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_ah((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_add(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_ai((qword)(a), (int)(b))));
+}
+
+static __inline vec_int4 spu_add(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_ai((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_add(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_ahi((qword)(a), (short)(b))));
+}
+
+static __inline vec_short8 spu_add(vec_short8 a, short b)
+{
+  return ((vec_short8)(si_ahi((qword)(a), b)));
+}
+
+static __inline vec_float4 spu_add(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_fa((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_add(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_dfa((qword)(a), (qword)(b))));
+}
+
+
+/* spu_addx
+ * ========
+ */
+static __inline vec_uint4 spu_addx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_addx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_int4 spu_addx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_addx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_and
+ * =======
+ */
+static __inline vec_uchar16 spu_and(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_and(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_and(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_and(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_and(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_and(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_and(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_and(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_and(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_and(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_and(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_andbi((qword)(a), (signed char)(b))));
+}
+
+
+static __inline vec_char16 spu_and(vec_char16 a, signed char b)
+{
+  return ((vec_char16)(si_andbi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_and(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_andhi((qword)(a), (signed short)(b))));
+}
+
+static __inline vec_short8 spu_and(vec_short8 a, signed short b)
+{
+  return ((vec_short8)(si_andhi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_and(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_andi((qword)(a), (signed int)(b))));
+}
+
+static __inline vec_int4 spu_and(vec_int4 a, signed int b)
+{
+  return ((vec_int4)(si_andi((qword)(a), b)));
+}
+
+
+/* spu_andc
+ * ========
+ */
+#define spu_andc(_a, _b)	vec_andc(_a, _b)
+
+
+/* spu_avg
+ * =======
+ */
+#define spu_avg(_a, _b)		vec_avg(_a, _b)
+  
+
+/* spu_bisled
+ * spu_bisled_d
+ * spu_bisled_e
+ * ============
+ */
+#define spu_bisled(_func)	/* not mappable */
+#define spu_bisled_d(_func)	/* not mappable */
+#define spu_bisled_e(_func)	/* not mappable */
+
+/* spu_cmpabseq
+ * ============
+ */
+static __inline vec_uint4 spu_cmpabseq(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fcmeq((qword)(a), (qword)(b))));
+
+}
+
+static __inline vec_ullong2 spu_cmpabseq(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfcmeq((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cmpabsgt
+ * ============
+ */
+static __inline vec_uint4 spu_cmpabsgt(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fcmgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_cmpabsgt(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfcmgt((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cmpeq
+ * ========
+ */
+static __inline vec_uchar16 spu_cmpeq(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_ceqb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpeq(vec_char16 a, vec_char16 b)
+{
+  return ((vec_uchar16)(si_ceqb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_ceqh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_short8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_ceqh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_ceq((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_int4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_ceq((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fceq((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpeq(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_ceqbi((qword)(a), (signed char)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpeq(vec_char16 a, signed char b)
+{
+  return ((vec_uchar16)(si_ceqbi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_ceqhi((qword)(a), (signed short)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_short8 a, signed short b)
+{
+  return ((vec_ushort8)(si_ceqhi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_ceqi((qword)(a), (signed int)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_int4 a, signed int b)
+{
+  return ((vec_uint4)(si_ceqi((qword)(a), b)));
+}
+
+static __inline vec_ullong2 spu_cmpeq(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfceq((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cmpgt
+ * ========
+ */
+static __inline vec_uchar16 spu_cmpgt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_clgtb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpgt(vec_char16 a, vec_char16 b)
+{
+  return ((vec_uchar16)(si_cgtb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_clgth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_short8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_cgth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_clgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_int4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_cgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fcgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpgt(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_clgtbi((qword)(a), b)));
+}
+
+static __inline vec_uchar16 spu_cmpgt(vec_char16 a, signed char b)
+{
+  return ((vec_uchar16)(si_cgtbi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_clgthi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_short8 a, signed short b)
+{
+  return ((vec_ushort8)(si_cgthi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_clgti((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_int4 a, signed int b)
+{
+  return ((vec_uint4)(si_cgti((qword)(a), b)));
+}
+
+static __inline vec_ullong2 spu_cmpgt(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfcgt((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cntb
+ * ========
+ */
+static __inline vec_uchar16 spu_cntb(vec_uchar16 a)
+{
+  return ((vec_uchar16)(si_cntb((qword)(a))));
+}
+
+
+static __inline vec_uchar16 spu_cntb(vec_char16 a)
+{
+  return ((vec_uchar16)(si_cntb((qword)(a))));
+}
+
+/* spu_cntlz
+ * =========
+ */
+static __inline vec_uint4 spu_cntlz(vec_uint4 a)
+{
+  return ((vec_uint4)(si_clz((qword)(a))));
+}
+
+static __inline vec_uint4 spu_cntlz(vec_int4 a)
+{
+  return ((vec_uint4)(si_clz((qword)(a))));
+}
+
+static __inline vec_uint4 spu_cntlz(vec_float4 a)
+{
+  return ((vec_uint4)(si_clz((qword)(a))));
+}
+
+/* spu_testsv
+ * ==========
+ */
+static __inline vec_ullong2 spu_testsv(vec_double2 a, char b)
+{
+  return ((vec_ullong2)(si_dftsv((qword)(a), b)));
+}
+
+/* spu_convtf
+ * ==========
+ */
+#define spu_convtf(_a, _b)	(vec_ctf(_a, _b))
+
+/* spu_convts
+ * ==========
+ */
+#define spu_convts(_a, _b)	(vec_cts(_a, _b))
+
+/* spu_convtu
+ * ==========
+ */
+#define spu_convtu(_a, _b)	(vec_ctu(_a, _b))
+
+
+/* spu_dsync
+ * ========
+ */
+#define spu_dsync()
+
+/* spu_eqv
+ * =======
+ */
+static __inline vec_uchar16 spu_eqv(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_eqv(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_eqv(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_eqv(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_eqv(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_eqv(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_eqv(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_eqv(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_eqv(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_eqv(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_eqv((qword)(a), (qword)(b))));
+}
+
+/* spu_extend
+ * ========
+ */
+static __inline vec_short8 spu_extend(vec_char16 a)
+{
+  return ((vec_short8)(si_xsbh((qword)(a))));
+}
+
+
+static __inline vec_int4 spu_extend(vec_short8 a)
+{
+  return ((vec_int4)(si_xshw((qword)(a))));
+}
+
+static __inline vec_llong2 spu_extend(vec_int4 a)
+{
+  return ((vec_llong2)(si_xswd((qword)(a))));
+}
+
+
+static __inline vec_double2 spu_extend(vec_float4 a)
+{
+  return ((vec_double2)(si_fesd((qword)(a))));
+}
+
+
+/* spu_extract
+ * ========
+ */
+static __inline unsigned char spu_extract(vec_uchar16 a, int element)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.v = a;
+  return (in.c[element & 15]);
+}
+
+static __inline signed char spu_extract(vec_char16 a, int element)
+{
+  union {
+    vec_char16 v;
+    signed char c[16];
+  } in;
+
+  in.v = a;
+  return (in.c[element & 15]);
+}
+
+static __inline unsigned short spu_extract(vec_ushort8 a, int element)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.v = a;
+  return (in.s[element & 7]);
+}
+
+static __inline signed short spu_extract(vec_short8 a, int element)
+{
+  union {
+    vec_short8 v;
+    signed short s[8];
+  } in;
+
+  in.v = a;
+  return (in.s[element & 7]);
+}
+
+static __inline unsigned int spu_extract(vec_uint4 a, int element)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.v = a;
+  return (in.i[element & 3]);
+}
+
+static __inline signed int spu_extract(vec_int4 a, int element)
+{
+  union {
+    vec_int4 v;
+    signed int i[4];
+  } in;
+
+  in.v = a;
+  return (in.i[element & 3]);
+}
+
+static __inline float spu_extract(vec_float4 a, int element)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.v = a;
+  return (in.f[element & 3]);
+}
+
+static __inline unsigned long long  spu_extract(vec_ullong2 a, int element)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.v = a;
+  return (in.l[element & 1]);
+}
+
+static __inline signed long long  spu_extract(vec_llong2 a, int element)
+{
+  union {
+    vec_llong2 v;
+    signed long long l[2];
+  } in;
+
+  in.v = a;
+  return (in.l[element & 1]);
+}
+
+static __inline double spu_extract(vec_double2 a, int element)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.v = a;
+  return (in.d[element & 1]);
+}
+
+/* spu_gather
+ * ========
+ */
+static __inline vec_uint4 spu_gather(vec_uchar16 a)
+{
+  return ((vec_uint4)(si_gbb((qword)(a))));
+}
+
+
+static __inline vec_uint4 spu_gather(vec_char16 a)
+{
+  return ((vec_uint4)(si_gbb((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_ushort8 a)
+{
+  return ((vec_uint4)(si_gbh((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_short8 a)
+{
+  return ((vec_uint4)(si_gbh((qword)(a))));
+}
+
+
+static __inline vec_uint4 spu_gather(vec_uint4 a)
+{
+  return ((vec_uint4)(si_gb((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_int4 a)
+{
+  return ((vec_uint4)(si_gb((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_float4 a)
+{
+  return ((vec_uint4)(si_gb((qword)(a))));
+}
+
+/* spu_genb
+ * ========
+ */
+static __inline vec_uint4 spu_genb(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_bg((qword)(b), (qword)(a))));
+}
+
+static __inline vec_int4 spu_genb(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_bg((qword)(b), (qword)(a))));
+}
+
+/* spu_genbx
+ * =========
+ */
+static __inline vec_uint4 spu_genbx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_bgx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+static __inline vec_int4 spu_genbx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_bgx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+
+/* spu_genc
+ * ========
+ */
+static __inline vec_uint4 spu_genc(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_cg((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_genc(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_cg((qword)(a), (qword)(b))));
+}
+
+/* spu_gencx
+ * =========
+ */
+static __inline vec_uint4 spu_gencx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_cgx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_int4 spu_gencx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_cgx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_hcmpeq
+ * ========
+ */
+#define spu_hcmpeq(_a, _b)	if (_a == _b) { SPU_HALT_ACTION; };
+
+
+/* spu_hcmpgt
+ * ========
+ */
+#define spu_hcmpgt(_a, _b)	if (_a > _b) { SPU_HALT_ACTION; };
+
+
+/* spu_idisable
+ * ============
+ */
+#define spu_idisable()		SPU_UNSUPPORTED_ACTION
+
+
+/* spu_ienable
+ * ===========
+ */
+#define spu_ienable()		SPU_UNSUPPORTED_ACTION
+
+
+/* spu_insert
+ * ========
+ */
+static __inline vec_uchar16 spu_insert(unsigned char a, vec_uchar16 b, int element)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.v = b;
+  in.c[element & 15] = a;
+  return (in.v);
+}
+
+static __inline vec_char16 spu_insert(signed char a, vec_char16 b, int element)
+{
+  return ((vec_char16)spu_insert((unsigned char)(a), (vec_uchar16)(b), element));
+}
+
+static __inline vec_ushort8 spu_insert(unsigned short a, vec_ushort8 b, int element)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.v = b;
+  in.s[element & 7] = a;
+  return (in.v);
+}
+
+static __inline vec_short8 spu_insert(signed short a, vec_short8 b, int element)
+{
+  return ((vec_short8)spu_insert((unsigned short)(a), (vec_ushort8)(b), element));
+}
+
+static __inline vec_uint4 spu_insert(unsigned int a, vec_uint4 b, int element)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.v = b;
+  in.i[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_int4 spu_insert(signed int a, vec_int4 b, int element)
+{
+  return ((vec_int4)spu_insert((unsigned int)(a), (vec_uint4)(b), element));
+}
+
+static __inline vec_float4 spu_insert(float a, vec_float4 b, int element)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.v = b;
+  in.f[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_ullong2 spu_insert(unsigned long long a, vec_ullong2 b, int element)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.v = b;
+  in.l[element & 1] = a;
+  return (in.v);
+}
+
+static __inline vec_llong2 spu_insert(signed long long a, vec_llong2 b, int element)
+{
+  return ((vec_llong2)spu_insert((unsigned long long)(a), (vec_ullong2)(b), element));
+}
+
+static __inline vec_double2 spu_insert(double a, vec_double2 b, int element)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.v = b;
+  in.d[element & 1] = a;
+  return (in.v);
+}
+
+
+/* spu_madd
+ * ========
+ */
+static __inline vec_int4 spu_madd(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return ((vec_int4)(si_mpya((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_float4 spu_madd(vec_float4 a, vec_float4 b, vec_float4 c)
+{
+  return ((vec_float4)(si_fma((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_double2 spu_madd(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfma((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_maskb
+ * ========
+ */
+#define spu_maskb(_a)	(vec_uchar16)(si_fsmb(si_from_int((int)(_a))))
+
+/* spu_maskh
+ * ========
+ */
+#define spu_maskh(_a)	(vec_ushort8)(si_fsmh(si_from_int((int)(_a))))
+
+
+/* spu_maskw
+ * ========
+ */
+#define spu_maskw(_a)	(vec_uint4)(si_fsm(si_from_int((int)(_a))))
+
+
+/* spu_mfcdma32
+ * ========
+ */
+#define spu_mfcdma32(_ls, _ea, _size, _tagid, _cmd)
+
+
+/* spu_mfcdma64
+ * ========
+ */
+#define spu_mfcdma64(_ls, _eahi, _ealow,  _size, _tagid, _cmd)
+
+/* spu_mfcstat
+ * ========
+ */
+#define spu_mfcstat(_type)	0xFFFFFFFF
+
+
+
+/* spu_mffpscr
+ * ===========
+ */
+#define spu_mffpscr()		(vec_uint4)(si_fscrrd())
+
+
+/* spu_mfspr
+ * ========
+ */
+
+#define spu_mfspr(_reg)		si_to_uint(si_mfspr(_reg))
+
+
+
+/* spu_mhhadd
+ * ==========
+ */
+static __inline vec_int4 spu_mhhadd(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return ((vec_int4)(si_mpyhha((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+static __inline vec_uint4 spu_mhhadd(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_mpyhhau((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_msub
+ * ========
+ */
+static __inline vec_float4 spu_msub(vec_float4 a, vec_float4 b, vec_float4 c)
+{
+  return ((vec_float4)(si_fms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_double2 spu_msub(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_mtfpscr
+ * ===========
+ */
+#define spu_mtfpscr(_a)
+
+
+/* spu_mtspr
+ * ========
+ */
+#define spu_mtspr(_reg, _a)
+
+
+/* spu_mul
+ * ========
+ */
+static __inline vec_float4 spu_mul(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_fm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_mul(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_dfm((qword)(a), (qword)(b))));
+}
+
+
+/* spu_mulh
+ * ========
+ */
+static __inline vec_int4 spu_mulh(vec_short8 a, vec_short8 b)
+{
+  return ((vec_int4)(si_mpyh((qword)(a), (qword)(b))));
+}
+
+/* spu_mule
+ * =========
+ */
+#define spu_mule(_a, _b)	vec_mule(_a, _b)
+
+
+
+/* spu_mulo
+ * ========
+ */
+static __inline vec_int4 spu_mulo(vec_short8 a, vec_short8 b)
+{
+  return ((vec_int4)(si_mpy((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_uint4 spu_mulo(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_uint4)(si_mpyu((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_int4 spu_mulo(vec_short8 a, short b)
+{
+  return ((vec_int4)(si_mpyi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_mulo(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_uint4)(si_mpyui((qword)(a), b)));
+}
+
+
+/* spu_mulsr
+ * =========
+ */
+static __inline vec_int4 spu_mulsr(vec_short8 a, vec_short8 b)
+{
+  return ((vec_int4)(si_mpys((qword)(a), (qword)(b))));
+}
+
+
+/* spu_nand
+ * ========
+ */
+static __inline vec_uchar16 spu_nand(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_nand(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_nand(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_nand(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_nand(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_nand(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_nand(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_nand(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_nand((qword)(a), (qword)(b)))); 
+}
+
+static __inline vec_llong2 spu_nand(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_nand((qword)(a), (qword)(b)))); 
+}
+
+static __inline vec_double2 spu_nand(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_nand((qword)(a), (qword)(b))));
+}
+
+
+/* spu_nmadd
+ * =========
+ */
+static __inline vec_double2 spu_nmadd(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfnma((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_nmsub
+ * =========
+ */
+static __inline vec_float4 spu_nmsub(vec_float4 a, vec_float4 b, vec_float4 c)
+{
+  return ((vec_float4)(si_fnms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_double2 spu_nmsub(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfnms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_nor
+ * =======
+ */
+#define spu_nor(_a, _b)		vec_nor(_a, _b)
+
+
+/* spu_or
+ * ======
+ */
+static __inline vec_uchar16 spu_or(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_or(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_or(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_or(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_or(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_or(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_or(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_or(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_or(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_or(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_or((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_uchar16 spu_or(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_orbi((qword)(a), b)));
+}
+
+static __inline vec_char16 spu_or(vec_char16 a, signed char b)
+{
+  return ((vec_char16)(si_orbi((qword)(a), (unsigned char)(b))));
+}
+
+static __inline vec_ushort8 spu_or(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_orhi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_or(vec_short8 a, signed short b)
+{
+  return ((vec_short8)(si_orhi((qword)(a), (unsigned short)(b))));
+}
+
+static __inline vec_uint4 spu_or(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_ori((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_or(vec_int4 a, signed int b)
+{
+  return ((vec_int4)(si_ori((qword)(a), (unsigned int)(b))));
+}
+
+
+/* spu_orc
+ * =======
+ */
+#define spu_orc(_a, _b)		vec_or(_a, vec_nor(_b, _b))
+
+
+/* spu_orx
+ * =======
+ */
+static __inline vec_uint4 spu_orx(vec_uint4 a)
+{
+  return ((vec_uint4)(si_orx((qword)(a))));
+}
+
+static __inline vec_int4 spu_orx(vec_int4 a)
+{
+  return ((vec_int4)(si_orx((qword)(a))));
+}
+
+
+/* spu_promote
+ * ===========
+ */
+static __inline vec_uchar16 spu_promote(unsigned char a, int element)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.c[element & 15] = a;
+  return (in.v);
+}
+
+static __inline vec_char16 spu_promote(signed char a, int element)
+{
+  union {
+    vec_char16 v;
+    signed char c[16];
+  } in;
+
+  in.c[element & 15] = a;
+  return (in.v);
+}
+
+static __inline vec_ushort8 spu_promote(unsigned short a, int element)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.s[element & 7] = a;
+  return (in.v);
+}
+
+static __inline vec_short8 spu_promote(signed short a, int element)
+{
+  union {
+    vec_short8 v;
+    signed short s[8];
+  } in;
+
+  in.s[element & 7] = a;
+  return (in.v);
+}
+
+static __inline vec_uint4 spu_promote(unsigned int a, int element)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.i[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_int4 spu_promote(signed int a, int element)
+{
+  union {
+    vec_int4 v;
+    signed int i[4];
+  } in;
+
+  in.i[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_float4 spu_promote(float a, int element)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.f[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_ullong2 spu_promote(unsigned long long a, int element)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.l[element & 1] = a;
+  return (in.v);
+}
+
+static __inline vec_llong2 spu_promote(signed long long a, int element)
+{
+  union {
+    vec_llong2 v;
+    signed long long l[2];
+  } in;
+
+  in.l[element & 1] = a;
+  return (in.v);
+}
+
+static __inline vec_double2 spu_promote(double a, int element)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.d[element & 1] = a;
+  return (in.v);
+}
+
+/* spu_re
+ * ======
+ */
+#define spu_re(_a)		vec_re(_a)
+
+
+/* spu_readch
+ * ==========
+ */
+#define spu_readch(_channel)		0	/* not mappable */
+
+
+/* spu_readchcnt
+ * =============
+ */
+#define spu_readchcnt(_channel)		0	/* not mappable */
+
+
+/* spu_readchqw
+ * ============
+ */
+#define spu_readchqw(_channel) __extension__ ({ vec_uint4 result = { 0, 0, 0, 0 }; result; })
+
+/* spu_rl
+ * ======
+ */
+static __inline vec_ushort8 spu_rl(vec_ushort8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_roth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_rl(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_roth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_rl(vec_uint4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_rot((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_rl(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_rot((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rl(vec_ushort8 a, int b)
+{
+  return ((vec_ushort8)(si_rothi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_rl(vec_short8 a, int b)
+{
+  return ((vec_short8)(si_rothi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_rl(vec_uint4 a, int b)
+{
+  return ((vec_uint4)(si_roti((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_rl(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_roti((qword)(a), b)));
+}
+
+
+/* spu_rlmask
+ * ==========
+ */
+static __inline vec_ushort8 spu_rlmask(vec_ushort8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_rothm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_rlmask(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_rothm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_rlmask(vec_uint4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_rotm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_rlmask(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_rotm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rlmask(vec_ushort8 a, int b)
+{
+  return ((vec_ushort8)(si_rothmi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_rlmask(vec_short8 a, int b)
+{
+  return ((vec_short8)(si_rothmi((qword)(a), b)));
+}
+
+
+static __inline vec_uint4 spu_rlmask(vec_uint4 a, int b)
+{
+  return ((vec_uint4)(si_rotmi((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_rlmask(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_rotmi((qword)(a), b)));
+}
+
+/* spu_rlmaska
+ * ===========
+ */
+static __inline vec_short8 spu_rlmaska(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_rotmah((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rlmaska(vec_ushort8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_rotmah((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_int4 spu_rlmaska(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_rotma((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_rlmaska(vec_uint4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_rotma((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rlmaska(vec_ushort8 a, int b)
+{
+  return ((vec_ushort8)(si_rotmahi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_rlmaska(vec_short8 a, int b)
+{
+  return ((vec_short8)(si_rotmahi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_rlmaska(vec_uint4 a, int b)
+{
+  return ((vec_uint4)(si_rotmai((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_rlmaska(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_rotmai((qword)(a), b)));
+}
+
+
+/* spu_rlmaskqw
+ * ============
+ */
+static __inline vec_uchar16 spu_rlmaskqw(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlmaskqw(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlmaskqw(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlmaskqw(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlmaskqw(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlmaskqw(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlmaskqw(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlmaskqw(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlmaskqw(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlmaskqw(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+/* spu_rlmaskqwbyte
+ * ================
+ */
+static __inline vec_uchar16 spu_rlmaskqwbyte(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlmaskqwbyte(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlmaskqwbyte(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlmaskqwbyte(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlmaskqwbyte(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlmaskqwbyte(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlmaskqwbyte(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlmaskqwbyte(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlmaskqwbyte(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlmaskqwbyte(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+/* spu_rlmaskqwbytebc
+ * ==================
+ */
+static __inline vec_uchar16 spu_rlmaskqwbytebc(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlmaskqwbytebc(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlmaskqwbytebc(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlmaskqwbytebc(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlmaskqwbytebc(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlmaskqwbytebc(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlmaskqwbytebc(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlmaskqwbytebc(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlmaskqwbytebc(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlmaskqwbytebc(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+
+/* spu_rlqwbyte
+ * ============
+ */
+static __inline vec_uchar16 spu_rlqwbyte(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqby((qword)(a), si_from_int(count))));
+}  
+
+static __inline vec_char16 spu_rlqwbyte(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlqwbyte(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqby((qword)(a), si_from_int(count))));
+}  
+
+static __inline vec_short8 spu_rlqwbyte(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlqwbyte(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlqwbyte(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlqwbyte(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlqwbyte(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlqwbyte(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlqwbyte(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+
+/* spu_rlqwbytebc
+ * ==============
+ */
+static __inline vec_uchar16 spu_rlqwbytebc(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlqwbytebc(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlqwbytebc(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlqwbytebc(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlqwbytebc(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlqwbytebc(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlqwbytebc(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlqwbytebc(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlqwbytebc(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlqwbytebc(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+/* spu_rlqw
+ * ========
+ */
+static __inline vec_uchar16 spu_rlqw(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlqw(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlqw(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlqw(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlqw(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlqw(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlqw(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlqw(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlqw(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlqw(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+/* spu_roundtf
+ * ===========
+ */
+static __inline vec_float4 spu_roundtf(vec_double2 a)
+{
+  return ((vec_float4)(si_frds((qword)(a))));
+}
+
+
+/* spu_rsqrte
+ * ==========
+ */
+#define spu_rsqrte(_a)		vec_rsqrte(_a)
+
+
+/* spu_sel
+ * =======
+ */
+static __inline vec_uchar16 spu_sel(vec_uchar16 a, vec_uchar16 b, vec_uchar16 pattern)
+{
+  return ((vec_uchar16)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_char16 spu_sel(vec_char16 a, vec_char16 b, vec_uchar16 pattern)
+{
+  return ((vec_char16)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ushort8 spu_sel(vec_ushort8 a, vec_ushort8 b, vec_ushort8 pattern)
+{
+  return ((vec_ushort8)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_short8 spu_sel(vec_short8 a, vec_short8 b, vec_ushort8 pattern)
+{
+  return ((vec_short8)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_uint4 spu_sel(vec_uint4 a, vec_uint4 b, vec_uint4 pattern)
+{
+  return ((vec_uint4)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_int4 spu_sel(vec_int4 a, vec_int4 b, vec_uint4 pattern)
+{
+  return ((vec_int4)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_float4 spu_sel(vec_float4 a, vec_float4 b, vec_uint4 pattern)
+{
+  return ((vec_float4)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ullong2 spu_sel(vec_ullong2 a, vec_ullong2 b, vec_ullong2 pattern)
+{
+  return ((vec_ullong2)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_llong2 spu_sel(vec_llong2 a, vec_llong2 b, vec_ullong2 pattern)
+{
+  return ((vec_llong2)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_double2 spu_sel(vec_double2 a, vec_double2 b, vec_ullong2 pattern)
+{
+  return ((vec_double2)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+
+
+/* spu_shuffle
+ * ===========
+ */
+static __inline vec_uchar16 spu_shuffle(vec_uchar16 a, vec_uchar16 b, vec_uchar16 pattern)
+{
+  return ((vec_uchar16)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_char16 spu_shuffle(vec_char16 a, vec_char16 b, vec_uchar16 pattern)
+{
+  return ((vec_char16)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ushort8 spu_shuffle(vec_ushort8 a, vec_ushort8 b, vec_uchar16 pattern)
+{
+  return ((vec_ushort8)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_short8 spu_shuffle(vec_short8 a, vec_short8 b, vec_uchar16 pattern)
+{
+  return ((vec_short8)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_uint4 spu_shuffle(vec_uint4 a, vec_uint4 b, vec_uchar16 pattern)
+{
+  return ((vec_uint4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_int4 spu_shuffle(vec_int4 a, vec_int4 b, vec_uchar16 pattern)
+{
+  return ((vec_int4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_float4 spu_shuffle(vec_float4 a, vec_float4 b, vec_uchar16 pattern)
+{
+  return ((vec_float4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ullong2 spu_shuffle(vec_ullong2 a, vec_ullong2 b, vec_uchar16 pattern)
+{
+  return ((vec_ullong2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_llong2 spu_shuffle(vec_llong2 a, vec_llong2 b, vec_uchar16 pattern)
+{
+  return ((vec_llong2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_double2 spu_shuffle(vec_double2 a, vec_double2 b, vec_uchar16 pattern)
+{
+  return ((vec_double2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+
+/* spu_sl
+ * ======
+ */
+static __inline vec_ushort8 spu_sl(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_shlh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_sl(vec_short8 a, vec_ushort8 b)
+{
+  return ((vec_short8)(si_shlh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_sl(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_shl((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_sl(vec_int4 a, vec_uint4 b)
+{
+  return ((vec_int4)(si_shl((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_sl(vec_ushort8 a, unsigned int b)
+{
+  return ((vec_ushort8)(si_shlhi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_sl(vec_short8 a, unsigned int b)
+{
+  return ((vec_short8)(si_shlhi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_sl(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_shli((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_sl(vec_int4 a, unsigned int b)
+{
+  return ((vec_int4)(si_shli((qword)(a), b)));
+}
+
+
+/* spu_slqw
+ * ========
+ */
+static __inline vec_uchar16 spu_slqw(vec_uchar16 a, unsigned int count)
+{
+  return ((vec_uchar16)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_char16 spu_slqw(vec_char16 a, unsigned int count)
+{
+  return ((vec_char16)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ushort8 spu_slqw(vec_ushort8 a, unsigned int count)
+{
+  return ((vec_ushort8)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_short8 spu_slqw(vec_short8 a, unsigned int count)
+{
+  return ((vec_short8)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_uint4 spu_slqw(vec_uint4 a, unsigned int count)
+{
+  return ((vec_uint4)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_int4 spu_slqw(vec_int4 a, unsigned int count)
+{
+  return ((vec_int4)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_float4 spu_slqw(vec_float4 a, unsigned int count)
+{
+  return ((vec_float4)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ullong2 spu_slqw(vec_ullong2 a, unsigned int count)
+{
+  return ((vec_ullong2)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_llong2 spu_slqw(vec_llong2 a, unsigned int count)
+{
+  return ((vec_llong2)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_double2 spu_slqw(vec_double2 a, unsigned int count)
+{
+  return ((vec_double2)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+/* spu_slqwbyte
+ * ============
+ */
+static __inline vec_uchar16 spu_slqwbyte(vec_uchar16 a, unsigned int count)
+{
+  return ((vec_uchar16)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_char16 spu_slqwbyte(vec_char16 a, unsigned int count)
+{
+  return ((vec_char16)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ushort8 spu_slqwbyte(vec_ushort8 a, unsigned int count)
+{
+  return ((vec_ushort8)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_short8 spu_slqwbyte(vec_short8 a, unsigned int count)
+{
+  return ((vec_short8)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_uint4 spu_slqwbyte(vec_uint4 a, unsigned int count)
+{
+  return ((vec_uint4)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_int4 spu_slqwbyte(vec_int4 a, unsigned int count)
+{
+  return ((vec_int4)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_float4 spu_slqwbyte(vec_float4 a, unsigned int count)
+{
+  return ((vec_float4)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ullong2 spu_slqwbyte(vec_ullong2 a, unsigned int count)
+{
+  return ((vec_ullong2)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_llong2 spu_slqwbyte(vec_llong2 a, unsigned int count)
+{
+  return ((vec_llong2)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_double2 spu_slqwbyte(vec_double2 a, unsigned int count)
+{
+  return ((vec_double2)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+/* spu_slqwbytebc
+ * ==============
+ */
+static __inline vec_uchar16 spu_slqwbytebc(vec_uchar16 a, unsigned int count)
+{
+  return ((vec_uchar16)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_char16 spu_slqwbytebc(vec_char16 a, unsigned int count)
+{
+  return ((vec_char16)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ushort8 spu_slqwbytebc(vec_ushort8 a, unsigned int count)
+{
+  return ((vec_ushort8)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_short8 spu_slqwbytebc(vec_short8 a, unsigned int count)
+{
+  return ((vec_short8)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_uint4 spu_slqwbytebc(vec_uint4 a, unsigned int count)
+{
+  return ((vec_uint4)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_int4 spu_slqwbytebc(vec_int4 a, unsigned int count)
+{
+  return ((vec_int4)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_float4 spu_slqwbytebc(vec_float4 a, unsigned int count)
+{
+  return ((vec_float4)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ullong2 spu_slqwbytebc(vec_ullong2 a, unsigned int count)
+{
+  return ((vec_ullong2)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_llong2 spu_slqwbytebc(vec_llong2 a, unsigned int count)
+{
+  return ((vec_llong2)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_double2 spu_slqwbytebc(vec_double2 a, unsigned int count)
+{
+  return ((vec_double2)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+/* spu_splats
+ * ==========
+ */
+static __inline vec_uchar16 spu_splats(unsigned char a)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.c[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_char16 spu_splats(signed char a)
+{
+  return ((vec_char16)spu_splats((unsigned char)(a)));
+}
+
+static __inline vec_ushort8 spu_splats(unsigned short a)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.s[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_short8 spu_splats(signed short a)
+{
+  return ((vec_short8)spu_splats((unsigned short)(a)));
+}
+
+static __inline vec_uint4 spu_splats(unsigned int a)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.i[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_int4 spu_splats(signed int a)
+{
+  return ((vec_int4)spu_splats((unsigned int)(a)));
+}
+
+static __inline vec_float4 spu_splats(float a)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.f[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_ullong2 spu_splats(unsigned long long a)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.l[0] = a;
+  in.l[1] = a;
+  return (in.v);
+}
+
+static __inline vec_llong2 spu_splats(signed long long a)
+{
+  return ((vec_llong2)spu_splats((unsigned long long)(a)));
+}
+
+static __inline vec_double2 spu_splats(double a)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.d[0] = a;
+  in.d[1] = a;
+  return (in.v);
+}
+
+
+/* spu_stop
+ * ========
+ */
+#define spu_stop(_type)	si_stop(_type)
+
+
+/* spu_sub
+ * =======
+ */
+static __inline vec_ushort8 spu_sub(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_sfh((qword)(b), (qword)(a))));
+}
+
+static __inline vec_short8 spu_sub(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_sfh((qword)(b), (qword)(a))));
+}
+
+static __inline vec_uint4 spu_sub(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_sf((qword)(b), (qword)(a))));
+}
+
+static __inline vec_int4 spu_sub(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_sf((qword)(b), (qword)(a))));
+}
+
+static __inline vec_float4 spu_sub(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_fs((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_sub(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_dfs((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_sub(unsigned int a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_sfi((qword)b, (int)a)));
+}
+
+static __inline vec_int4 spu_sub(signed int a, vec_int4 b)
+{
+  return ((vec_int4)(si_sfi((qword)b, (int)a)));
+}
+
+static __inline vec_ushort8 spu_sub(unsigned short a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_sfhi((qword)b, (short)a)));
+}
+
+static __inline vec_short8 spu_sub(signed short a, vec_short8 b)
+{
+  return ((vec_short8)(si_sfhi((qword)b, (short)a)));
+}
+
+/* spu_subx
+ * ========
+ */
+static __inline vec_uint4 spu_subx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_sfx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+static __inline vec_int4 spu_subx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_sfx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+/* spu_sumb
+ * ========
+ */
+static __inline vec_ushort8 spu_sumb(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_ushort8)(si_sumb((qword)(a), (qword)(b))));
+}  
+
+
+/* spu_sync
+ * spu_sync_c
+ * ========
+ */
+#define spu_sync()	/* do nothing */
+
+#define spu_sync_c()	/* do nothing */
+
+
+/* spu_writech
+ * ===========
+ */
+#define spu_writech(_channel, _a)	/* not mappable */
+
+/* spu_writechqw
+ * =============
+ */
+#define spu_writechqw(_channel, _a)	/* not mappable */
+
+
+/* spu_xor
+ * =======
+ */
+static __inline vec_uchar16 spu_xor(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_xor(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_xor(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_xor(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_xor(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_xor(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_xor(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_xor(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_xor(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_xor(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_xor(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_xorbi((qword)(a), b)));
+}
+
+static __inline vec_char16 spu_xor(vec_char16 a, signed char b)
+{
+  return ((vec_char16)(si_xorbi((qword)(a), (unsigned char)(b))));
+}
+
+static __inline vec_ushort8 spu_xor(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_xorhi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_xor(vec_short8 a, signed short b)
+{
+  return ((vec_short8)(si_xorhi((qword)(a), (unsigned short)(b))));
+}
+
+static __inline vec_uint4 spu_xor(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_xori((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_xor(vec_int4 a, signed int b)
+{
+  return ((vec_int4)(si_xori((qword)(a), (unsigned int)(b))));
+}
+
+#endif /* !__SPU__ */
+#endif /* __cplusplus */
+#endif /* !_SPU2VMX_H_ */
diff --git a/gcc-4.9/gcc/config/rs6000/sync.md b/gcc-4.9/gcc/config/rs6000/sync.md
new file mode 100644
index 000000000..7db439074
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/sync.md
@@ -0,0 +1,411 @@
+;; Machine description for PowerPC synchronization instructions.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Geoffrey Keating.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_attr larx [(QI "lbarx")
+			(HI "lharx")
+			(SI "lwarx")
+			(DI "ldarx")
+			(TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+			(HI "sthcx.")
+			(SI "stwcx.")
+			(DI "stdcx.")
+			(TI "stqcx.")])
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+(define_code_attr fetchop_pred
+  [(plus "add_operand") (minus "int_reg_operand")
+   (ior "logical_operand") (xor "logical_operand") (and "and_operand")])
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]		;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[0]);
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+      break;
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+      emit_insn (gen_lwsync ());
+      break;
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_hwsync ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_expand "hwsync"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*hwsync"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
+  ""
+  "sync"
+  [(set_attr "type" "sync")])
+
+(define_expand "lwsync"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_LWSYNC))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*lwsync"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_LWSYNC))]
+  ""
+{
+  /* Some AIX assemblers don't accept lwsync, so we use a .long.  */
+  if (TARGET_NO_LWSYNC)
+    return "sync";
+  else if (TARGET_LWSYNC_INSTRUCTION)
+    return "lwsync";
+  else
+    return ".long 0x7c2004ac";
+}
+  [(set_attr "type" "sync")])
+
+(define_insn "isync"
+  [(unspec_volatile:BLK [(const_int 0)] UNSPECV_ISYNC)]
+  ""
+  "isync"
+  [(set_attr "type" "isync")])
+
+;; The control dependency used for load dependency described
+;; in B.2.3 of the Power ISA 2.06B.
+(define_insn "loadsync_<mode>"
+  [(unspec_volatile:BLK [(match_operand:INT1 0 "register_operand" "r")]
+			UNSPECV_ISYNC)
+   (clobber (match_scratch:CC 1 "=y"))]
+  ""
+  "cmpw %1,%0,%0\;bne- %1,$+4\;isync"
+  [(set_attr "type" "isync")
+   (set_attr "length" "12")])
+
+(define_expand "atomic_load<mode>"
+  [(set (match_operand:INT1 0 "register_operand" "")		;; output
+	(match_operand:INT1 1 "memory_operand" ""))		;; memory
+   (use (match_operand:SI 2 "const_int_operand" ""))]		;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_hwsync ());
+
+  emit_move_insn (operands[0], operands[1]);
+
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+      break;
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_loadsync_<mode> (operands[0]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_expand "atomic_store<mode>"
+  [(set (match_operand:INT1 0 "memory_operand" "")		;; memory
+	(match_operand:INT1 1 "register_operand" ""))		;; input
+   (use (match_operand:SI 2 "const_int_operand" ""))]		;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+      break;
+    case MEMMODEL_RELEASE:
+      emit_insn (gen_lwsync ());
+      break;
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_hwsync ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8.  TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+			      (HI "TARGET_SYNC_HI_QI")
+			      SI
+			      (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
+
+(define_mode_iterator AINT [QI
+			    HI
+			    SI
+			    (DI "TARGET_POWERPC64")
+			    (TI "TARGET_SYNC_TI")])
+
+(define_insn "load_locked<mode>"
+  [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
+	(unspec_volatile:ATOMIC
+         [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
+  ""
+  "<larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+(define_insn "load_locked<QHI:mode>_si"
+  [(set (match_operand:SI 0 "int_reg_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_HI_QI"
+  "<QHI:larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs.
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions.  Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
+(define_expand "load_lockedti"
+  [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx pti = gen_reg_rtx (PTImode);
+
+  if (!indexed_or_indirect_operand (op1, TImode))
+    {
+      rtx old_addr = XEXP (op1, 0);
+      rtx new_addr = force_reg (Pmode, old_addr);
+      operands[1] = op1 = change_address (op1, TImode, new_addr);
+    }
+
+  emit_insn (gen_load_lockedpti (pti, op1));
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (op0, gen_lowpart (TImode, pti));
+  else
+    {
+      emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti));
+      emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti));
+    }
+  DONE;
+})
+
+(define_insn "load_lockedpti"
+  [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+	(unspec_volatile:PTI
+         [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_TI
+   && !reg_mentioned_p (operands[0], operands[1])
+   && quad_int_reg_operand (operands[0], PTImode)"
+  "lqarx %0,%y1"
+  [(set_attr "type" "load_l")])
+
+(define_insn "store_conditional<mode>"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:ATOMIC 1 "memory_operand" "=Z")
+	(match_operand:ATOMIC 2 "int_reg_operand" "r"))]
+  ""
+  "<stcx> %2,%y1"
+  [(set_attr "type" "store_c")])
+
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions.  Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
+(define_expand "store_conditionalti"
+  [(use (match_operand:CC 0 "cc_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))
+   (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx addr = XEXP (op1, 0);
+  rtx pti_mem;
+  rtx pti_reg;
+
+  if (!indexed_or_indirect_operand (op1, TImode))
+    {
+      rtx new_addr = force_reg (Pmode, addr);
+      operands[1] = op1 = change_address (op1, TImode, new_addr);
+      addr = new_addr;
+    }
+
+  pti_mem = change_address (op1, PTImode, addr);
+  pti_reg = gen_reg_rtx (PTImode);
+
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
+  else
+    {
+      emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2));
+      emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2));
+    }
+
+  emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));
+  DONE;
+})
+
+(define_insn "store_conditionalpti"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
+	(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+  "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+  "stqcx. %2,%y1"
+  [(set_attr "type" "store_c")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "int_reg_operand" "")		;; bool out
+   (match_operand:AINT 1 "int_reg_operand" "")		;; val out
+   (match_operand:AINT 2 "memory_operand" "")		;; memory
+   (match_operand:AINT 3 "reg_or_short_operand" "")	;; expected
+   (match_operand:AINT 4 "int_reg_operand" "")		;; desired
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; model succ
+   (match_operand:SI 7 "const_int_operand" "")]		;; model fail
+  ""
+{
+  rs6000_expand_atomic_compare_and_swap (operands);
+  DONE;
+})
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; input
+   (match_operand:SI 3 "const_int_operand" "")]		;; model
+  ""
+{
+  rs6000_expand_atomic_exchange (operands);
+  DONE;
+})
+
+(define_expand "atomic_<fetchop_name><mode>"
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 0)
+     (match_operand:AINT 1 "<fetchop_pred>" ""))	;; operand
+   (match_operand:SI 2 "const_int_operand" "")]		;; model
+  ""
+{
+  rs6000_expand_atomic_op (<CODE>, operands[0], operands[1],
+			   NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_expand "atomic_nand<mode>"
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (match_operand:AINT 1 "int_reg_operand" "")		;; operand
+   (match_operand:SI 2 "const_int_operand" "")]		;; model
+  ""
+{
+  rs6000_expand_atomic_op (NOT, operands[0], operands[1],
+			   NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_expand "atomic_fetch_<fetchop_name><mode>"
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
+   (match_operand:SI 3 "const_int_operand" "")]		;; model
+  ""
+{ 
+  rs6000_expand_atomic_op (<CODE>, operands[1], operands[2],
+			   operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_fetch_nand<mode>"
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
+   (match_operand:SI 3 "const_int_operand" "")]		;; model
+  ""
+{
+  rs6000_expand_atomic_op (NOT, operands[1], operands[2],
+			   operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_<fetchop_name>_fetch<mode>"
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
+   (match_operand:SI 3 "const_int_operand" "")]		;; model
+  ""
+{
+  rs6000_expand_atomic_op (<CODE>, operands[1], operands[2],
+			   NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_nand_fetch<mode>"
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
+   (match_operand:SI 3 "const_int_operand" "")]		;; model
+  ""
+{
+  rs6000_expand_atomic_op (NOT, operands[1], operands[2],
+			   NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h
new file mode 100644
index 000000000..d04e6e4a0
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/sysv4.h
@@ -0,0 +1,951 @@
+/* Target definitions for GNU compiler for PowerPC running System V.4
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Header files should be C++ aware in general.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/* Yes!  We are ELF.  */
+#define	TARGET_OBJECT_FORMAT OBJECT_ELF
+
+/* Default ABI to compile code for.  */
+#define DEFAULT_ABI rs6000_current_abi
+
+/* Default ABI to use.  */
+#define RS6000_ABI_NAME "sysv"
+
+/* Override rs6000.h definition.  */
+#undef	ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc"
+
+#define	TARGET_TOC		((rs6000_isa_flags & OPTION_MASK_64BIT)	\
+				 || ((rs6000_isa_flags			\
+				      & (OPTION_MASK_RELOCATABLE	\
+					 | OPTION_MASK_MINIMAL_TOC))	\
+				     && flag_pic > 1)			\
+				 || DEFAULT_ABI != ABI_V4)
+
+#define	TARGET_BITFIELD_TYPE	(! TARGET_NO_BITFIELD_TYPE)
+#define	TARGET_BIG_ENDIAN	(! TARGET_LITTLE_ENDIAN)
+#define	TARGET_PROTOTYPE	target_prototype
+#define	TARGET_NO_PROTOTYPE	(! TARGET_PROTOTYPE)
+#define	TARGET_NO_TOC		(! TARGET_TOC)
+#define	TARGET_NO_EABI		(! TARGET_EABI)
+#define	TARGET_REGNAMES		rs6000_regnames
+
+#ifdef HAVE_AS_REL16
+#undef TARGET_SECURE_PLT
+#define TARGET_SECURE_PLT	secure_plt
+#endif
+
+#define SDATA_DEFAULT_SIZE 8
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (!global_options_set.x_g_switch_value)				\
+    g_switch_value = SDATA_DEFAULT_SIZE;				\
+									\
+  if (rs6000_abi_name == NULL)						\
+    rs6000_abi_name = RS6000_ABI_NAME;					\
+									\
+  if (!strcmp (rs6000_abi_name, "sysv"))				\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "sysv-noeabi"))			\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      rs6000_isa_flags &= ~ OPTION_MASK_EABI;				\
+    }									\
+  else if (!strcmp (rs6000_abi_name, "sysv-eabi")			\
+	   || !strcmp (rs6000_abi_name, "eabi"))			\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      rs6000_isa_flags |= OPTION_MASK_EABI;				\
+    }									\
+  else if (!strcmp (rs6000_abi_name, "aixdesc"))			\
+    rs6000_current_abi = ABI_AIX;					\
+  else if (!strcmp (rs6000_abi_name, "freebsd")				\
+	   || !strcmp (rs6000_abi_name, "linux"))			\
+    {									\
+      if (TARGET_64BIT)							\
+	rs6000_current_abi = ABI_AIX;					\
+      else								\
+	rs6000_current_abi = ABI_V4;					\
+    }									\
+  else if (!strcmp (rs6000_abi_name, "netbsd"))				\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "openbsd"))			\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "i960-old"))			\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      rs6000_isa_flags |= (OPTION_MASK_LITTLE_ENDIAN | OPTION_MASK_EABI); \
+      rs6000_isa_flags &= ~OPTION_MASK_STRICT_ALIGN;			\
+      TARGET_NO_BITFIELD_WORD = 1;					\
+    }									\
+  else									\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      error ("bad value for -mcall-%s", rs6000_abi_name);		\
+    }									\
+									\
+  if (rs6000_sdata_name)						\
+    {									\
+      if (!strcmp (rs6000_sdata_name, "none"))				\
+	rs6000_sdata = SDATA_NONE;					\
+      else if (!strcmp (rs6000_sdata_name, "data"))			\
+	rs6000_sdata = SDATA_DATA;					\
+      else if (!strcmp (rs6000_sdata_name, "default"))			\
+	rs6000_sdata = (TARGET_EABI) ? SDATA_EABI : SDATA_SYSV;		\
+      else if (!strcmp (rs6000_sdata_name, "sysv"))			\
+	rs6000_sdata = SDATA_SYSV;					\
+      else if (!strcmp (rs6000_sdata_name, "eabi"))			\
+	rs6000_sdata = SDATA_EABI;					\
+      else								\
+	error ("bad value for -msdata=%s", rs6000_sdata_name);		\
+    }									\
+  else if (DEFAULT_ABI == ABI_V4)					\
+    {									\
+      rs6000_sdata = SDATA_DATA;					\
+      rs6000_sdata_name = "data";					\
+    }									\
+  else									\
+    {									\
+      rs6000_sdata = SDATA_NONE;					\
+      rs6000_sdata_name = "none";					\
+    }									\
+									\
+  if (TARGET_RELOCATABLE &&						\
+      (rs6000_sdata == SDATA_EABI || rs6000_sdata == SDATA_SYSV))	\
+    {									\
+      rs6000_sdata = SDATA_DATA;					\
+      error ("-mrelocatable and -msdata=%s are incompatible",		\
+	     rs6000_sdata_name);					\
+    }									\
+									\
+  else if (flag_pic && DEFAULT_ABI == ABI_V4				\
+	   && (rs6000_sdata == SDATA_EABI				\
+	       || rs6000_sdata == SDATA_SYSV))				\
+    {									\
+      rs6000_sdata = SDATA_DATA;					\
+      error ("-f%s and -msdata=%s are incompatible",			\
+	     (flag_pic > 1) ? "PIC" : "pic",				\
+	     rs6000_sdata_name);					\
+    }									\
+									\
+  if ((rs6000_sdata != SDATA_NONE && DEFAULT_ABI != ABI_V4)		\
+      || (rs6000_sdata == SDATA_EABI && !TARGET_EABI))			\
+    {									\
+      rs6000_sdata = SDATA_NONE;					\
+      error ("-msdata=%s and -mcall-%s are incompatible",		\
+	     rs6000_sdata_name, rs6000_abi_name);			\
+    }									\
+									\
+  targetm.have_srodata_section = rs6000_sdata == SDATA_EABI;		\
+									\
+  if (TARGET_RELOCATABLE && !TARGET_MINIMAL_TOC)			\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC;			\
+      error ("-mrelocatable and -mno-minimal-toc are incompatible");	\
+    }									\
+									\
+  if (TARGET_RELOCATABLE && rs6000_current_abi != ABI_V4)		\
+    {									\
+      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;			\
+      error ("-mrelocatable and -mcall-%s are incompatible",		\
+	     rs6000_abi_name);						\
+    }									\
+									\
+  if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi != ABI_V4)	\
+    {									\
+      flag_pic = 0;							\
+      error ("-fPIC and -mcall-%s are incompatible",			\
+	     rs6000_abi_name);						\
+    }									\
+									\
+  if (TARGET_SECURE_PLT != secure_plt)					\
+    {									\
+      error ("-msecure-plt not supported by your assembler");		\
+    }									\
+									\
+  /* Treat -fPIC the same as -mrelocatable.  */				\
+  if (flag_pic > 1 && DEFAULT_ABI == ABI_V4)				\
+    {									\
+      rs6000_isa_flags |= OPTION_MASK_RELOCATABLE | OPTION_MASK_MINIMAL_TOC; \
+      TARGET_NO_FP_IN_TOC = 1;						\
+    }									\
+									\
+  else if (TARGET_RELOCATABLE)						\
+    if (!flag_pic)							\
+      flag_pic = 2;							\
+} while (0)
+
+#ifndef RS6000_BI_ARCH
+# define SUBSUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if ((TARGET_DEFAULT ^ rs6000_isa_flags) & OPTION_MASK_64BIT)		\
+    error ("-m%s not supported in this configuration",			\
+	   (rs6000_isa_flags & OPTION_MASK_64BIT) ? "64" : "32");	\
+} while (0)
+#endif
+
+/* Override rs6000.h definition.  */
+#undef	TARGET_DEFAULT
+#define	TARGET_DEFAULT 0
+
+/* Override rs6000.h definition.  */
+#undef	PROCESSOR_DEFAULT
+#define	PROCESSOR_DEFAULT PROCESSOR_PPC750
+
+#define FIXED_R2 1
+/* System V.4 uses register 13 as a pointer to the small data area,
+   so it is not available to the normal user.  */
+#define FIXED_R13 1
+
+/* Override default big endianism definitions in rs6000.h.  */
+#undef	BYTES_BIG_ENDIAN
+#undef	WORDS_BIG_ENDIAN
+#define	BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+#define	WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+/* Put jump tables in read-only memory, rather than in .text.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+/* Prefix and suffix to use to saving floating point.  */
+#define	SAVE_FP_PREFIX "_savefpr_"
+#define SAVE_FP_SUFFIX ""
+
+/* Prefix and suffix to use to restoring floating point.  */
+#define	RESTORE_FP_PREFIX "_restfpr_"
+#define RESTORE_FP_SUFFIX ""
+
+/* Type used for size_t, as a string used in a declaration.  */
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+/* Type used for ptrdiff_t, as a string used in a declaration.  */
+#define PTRDIFF_TYPE "int"
+
+#undef	WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef	WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Make int foo : 8 not cause structures to be aligned to an int boundary.  */
+/* Override elfos.h definition.  */
+#undef	PCC_BITFIELD_TYPE_MATTERS
+#define	PCC_BITFIELD_TYPE_MATTERS (TARGET_BITFIELD_TYPE)
+
+#undef	BITFIELD_NBYTES_LIMITED
+#define	BITFIELD_NBYTES_LIMITED (TARGET_NO_BITFIELD_WORD)
+
+/* Define this macro to be the value 1 if instructions will fail to
+   work if given data not on the nominal alignment.  If instructions
+   will merely go slower in that case, define this macro as 0.  */
+#undef	STRICT_ALIGNMENT
+#define	STRICT_ALIGNMENT (TARGET_STRICT_ALIGN)
+
+/* Define this macro if you wish to preserve a certain alignment for
+   the stack pointer, greater than what the hardware enforces.  The
+   definition is a C expression for the desired alignment (measured
+   in bits).  This macro must evaluate to a value equal to or larger
+   than STACK_BOUNDARY.
+   For the SYSV ABI and variants the alignment of the stack pointer
+   is usually controlled manually in rs6000.c. However, to maintain
+   alignment across alloca () in all circumstances,
+   PREFERRED_STACK_BOUNDARY needs to be set as well.
+   This has the additional advantage of allowing a bigger maximum
+   alignment of user objects on the stack.  */
+
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY 128
+
+/* Real stack boundary as mandated by the appropriate ABI.  */
+#define ABI_STACK_BOUNDARY \
+  ((TARGET_EABI && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI) ? 64 : 128)
+
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED)				      \
+	((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE)     \
+	 ? 128 : COMPUTED)
+
+#undef  BIGGEST_FIELD_ALIGNMENT
+
+/* Use ELF style section commands.  */
+
+#define	TEXT_SECTION_ASM_OP	"\t.section\t\".text\""
+
+#define	DATA_SECTION_ASM_OP	"\t.section\t\".data\""
+
+#define	BSS_SECTION_ASM_OP	"\t.section\t\".bss\""
+
+/* Override elfos.h definition.  */
+#undef	INIT_SECTION_ASM_OP
+#define	INIT_SECTION_ASM_OP "\t.section\t\".init\",\"ax\""
+
+/* Override elfos.h definition.  */
+#undef	FINI_SECTION_ASM_OP
+#define	FINI_SECTION_ASM_OP "\t.section\t\".fini\",\"ax\""
+
+#define	TOC_SECTION_ASM_OP "\t.section\t\".got\",\"aw\""
+
+/* Put PC relative got entries in .got2.  */
+#define	MINIMAL_TOC_SECTION_ASM_OP \
+  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI == ABI_V4)		\
+   ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
+
+#define	SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
+#define	SDATA2_SECTION_ASM_OP "\t.section\t\".sdata2\",\"a\""
+#define	SBSS_SECTION_ASM_OP "\t.section\t\".sbss\",\"aw\",@nobits"
+
+/* Override default elf definitions.  */
+#define TARGET_ASM_INIT_SECTIONS rs6000_elf_asm_init_sections
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK rs6000_elf_reloc_rw_mask
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION rs6000_elf_select_rtx_section
+
+/* Return nonzero if this entry is to be written into the constant pool
+   in a special way.  We do so if this is a SYMBOL_REF, LABEL_REF or a CONST
+   containing one of them.  If -mfp-in-toc (the default), we also do
+   this for floating-point constants.  We actually can only do this
+   if the FP formats of the target and host machines are the same, but
+   we can't check that since not every file that uses these target macros
+   includes real.h.
+
+   Unlike AIX, we don't key off of -mminimal-toc, but instead do not
+   allow floating point constants in the TOC if -mrelocatable.  */
+
+#undef	ASM_OUTPUT_SPECIAL_POOL_ENTRY_P
+#define	ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (!TARGET_NO_FP_IN_TOC						\
+	   && !TARGET_RELOCATABLE					\
+	   && GET_CODE (X) == CONST_DOUBLE				\
+	   && SCALAR_FLOAT_MODE_P (GET_MODE (X))			\
+	   && BITS_PER_WORD == HOST_BITS_PER_INT)))
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries in an ELF object file under SVR4.  These macros also output
+   the starting labels for the relevant functions/objects.  */
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+
+/* Override elfos.h definition.  */
+#undef	ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  rs6000_elf_declare_function_name ((FILE), (NAME), (DECL))
+
+/* The USER_LABEL_PREFIX stuff is affected by the -fleading-underscore
+   flag.  The LOCAL_LABEL_PREFIX variable is used by dbxelf.h.  */
+
+#define	LOCAL_LABEL_PREFIX "."
+#define	USER_LABEL_PREFIX ""
+
+#define	ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX)	\
+  asm_fprintf (FILE, "%L%s", PREFIX)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#define	LOCAL_ASM_OP	"\t.local\t"
+
+#define	LCOMM_ASM_OP	"\t.lcomm\t"
+
+/* Describe how to emit uninitialized local items.  */
+#define	ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+do {									\
+  if ((DECL) && rs6000_elf_in_small_data_p (DECL))			\
+    {									\
+      switch_to_section (sbss_section);					\
+      ASM_OUTPUT_ALIGN (FILE, exact_log2 (ALIGN / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL (FILE, NAME);					\
+      ASM_OUTPUT_SKIP (FILE, SIZE);					\
+      if (!flag_inhibit_size_directive && (SIZE) > 0)			\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+    }									\
+  else									\
+    {									\
+      fprintf (FILE, "%s", LCOMM_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+    }									\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+} while (0)
+
+/* Describe how to emit uninitialized external linkage items.  */
+#define	ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+do {									\
+  ASM_OUTPUT_ALIGNED_DECL_LOCAL (FILE, DECL, NAME, SIZE, ALIGN);	\
+} while (0)
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* To support -falign-* switches we need to use .p2align so
+   that alignment directives in code sections will be padded
+   with no-op instructions, rather than zeroes.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  if ((LOG) != 0)							\
+    {									\
+      if ((MAX_SKIP) == 0)						\
+	fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n",	(LOG), (MAX_SKIP));	\
+    }
+#endif
+
+/* This is how to output code to push a register on the stack.
+   It need not be very fast code.
+
+   On the rs6000, we must keep the backchain up to date.  In order
+   to simplify things, always allocate 16 bytes for a push (System V
+   wants to keep stack aligned to a 16 byte boundary).  */
+
+#define	ASM_OUTPUT_REG_PUSH(FILE, REGNO)				\
+do {									\
+  if (DEFAULT_ABI == ABI_V4)						\
+    asm_fprintf (FILE,							\
+		 "\tstwu %s,-16(%s)\n\tstw %s,12(%s)\n",	\
+		 reg_names[1], reg_names[1], reg_names[REGNO],		\
+		 reg_names[1]);						\
+} while (0)
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define	ASM_OUTPUT_REG_POP(FILE, REGNO)					\
+do {									\
+  if (DEFAULT_ABI == ABI_V4)						\
+    asm_fprintf (FILE,							\
+		 "\tlwz %s,12(%s)\n\taddic %s,%s,16\n",	\
+		 reg_names[REGNO], reg_names[1], reg_names[1],		\
+		 reg_names[1]);						\
+} while (0)
+
+extern int fixuplabelno;
+
+/* Handle constructors specially for -mrelocatable.  */
+#define TARGET_ASM_CONSTRUCTOR  rs6000_elf_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   rs6000_elf_asm_out_destructor
+
+/* This is the end of what might become sysv4.h.  */
+
+/* Use DWARF 2 debugging information by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Historically we have also supported stabs debugging.  */
+#define DBX_DEBUGGING_INFO 1
+
+#define TARGET_ENCODE_SECTION_INFO  rs6000_elf_encode_section_info
+#define TARGET_IN_SMALL_DATA_P  rs6000_elf_in_small_data_p
+
+/* The ELF version doesn't encode [DS] or whatever at the end of symbols.  */
+
+#define	RS6000_OUTPUT_BASENAME(FILE, NAME)	\
+    assemble_name (FILE, NAME)
+
+/* We have to output the stabs for the function name *first*, before
+   outputting its label.  */
+
+#define	DBX_FUNCTION_FIRST
+
+/* This is the end of what might become sysv4dbx.h.  */
+
+#define TARGET_OS_SYSV_CPP_BUILTINS()		\
+  do						\
+    {						\
+      if (rs6000_isa_flags_explicit		\
+	  & OPTION_MASK_RELOCATABLE)		\
+	builtin_define ("_RELOCATABLE");	\
+    }						\
+  while (0)
+
+#ifndef	TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("PPC");		\
+      builtin_define_std ("unix");		\
+      builtin_define ("__svr4__");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=svr4");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+      TARGET_OS_SYSV_CPP_BUILTINS ();		\
+    }						\
+  while (0)
+#endif
+
+/* Select one of BIG_OPT, LITTLE_OPT or DEFAULT_OPT depending
+   on various -mbig, -mlittle and -mcall- options.  */
+#define ENDIAN_SELECT(BIG_OPT, LITTLE_OPT, DEFAULT_OPT)	\
+"%{mlittle|mlittle-endian:"	LITTLE_OPT ";"	\
+  "mbig|mbig-endian:"		BIG_OPT    ";"	\
+  "mcall-i960-old:"		LITTLE_OPT ";"	\
+  ":"				DEFAULT_OPT "}"
+
+#define DEFAULT_ASM_ENDIAN " -mbig"
+
+#undef	ASM_SPEC
+#define	ASM_SPEC "%(asm_cpu) \
+%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+%{mrelocatable} %{mrelocatable-lib} %{fpic|fpie|fPIC|fPIE:-K PIC} \
+%{memb|msdata=eabi: -memb}" \
+ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
+
+#ifndef CC1_SECURE_PLT_DEFAULT_SPEC
+#define CC1_SECURE_PLT_DEFAULT_SPEC ""
+#endif
+
+/* Pass -G xxx to the compiler.  */
+#define	CC1_SPEC "%{G*} %(cc1_cpu)" \
+"%{meabi: %{!mcall-*: -mcall-sysv }} \
+%{!meabi: %{!mno-eabi: \
+    %{mrelocatable: -meabi } \
+    %{mcall-freebsd: -mno-eabi } \
+    %{mcall-i960-old: -meabi } \
+    %{mcall-linux: -mno-eabi } \
+    %{mcall-netbsd: -mno-eabi } \
+    %{mcall-openbsd: -mno-eabi }}} \
+%{msdata: -msdata=default} \
+%{mno-sdata: -msdata=none} \
+%{!mbss-plt: %{!msecure-plt: %(cc1_secure_plt_default)}} \
+%{profile: -p}"
+
+/* Default starting address if specified.  */
+#define LINK_START_SPEC "\
+%{mads         : %(link_start_ads)         ; \
+  myellowknife : %(link_start_yellowknife) ; \
+  mmvme        : %(link_start_mvme)        ; \
+  msim         : %(link_start_sim)         ; \
+  mcall-freebsd: %(link_start_freebsd)     ; \
+  mcall-linux  : %(link_start_linux)       ; \
+  mcall-netbsd : %(link_start_netbsd)      ; \
+  mcall-openbsd: %(link_start_openbsd)     ; \
+               : %(link_start_default)     }"
+
+#define LINK_START_DEFAULT_SPEC ""
+
+#undef	LINK_SPEC
+#define	LINK_SPEC "\
+%{h*} %{v:-V} %{!msdata=none:%{G*}} %{msdata=none:-G0} \
+%{R*} \
+%(link_shlib) \
+%{!T*: %(link_start) } \
+%(link_target) \
+%(link_os)"
+
+/* Shared libraries are not default.  */
+#define LINK_SHLIB_SPEC "\
+%{!mshlib: %{!shared: %{!symbolic: -dn -Bstatic}}} \
+%{static: } \
+%{shared:-G -dy -z text } \
+%{symbolic:-Bsymbolic -G -dy -z text }"
+
+/* Override the default target of the linker.  */
+#define	LINK_TARGET_SPEC \
+  ENDIAN_SELECT("", " --oformat elf32-powerpcle", "")
+
+/* Any specific OS flags.  */
+#define LINK_OS_SPEC "\
+%{mads         : %(link_os_ads)         ; \
+  myellowknife : %(link_os_yellowknife) ; \
+  mmvme        : %(link_os_mvme)        ; \
+  msim         : %(link_os_sim)         ; \
+  mcall-freebsd: %(link_os_freebsd)     ; \
+  mcall-linux  : %(link_os_linux)       ; \
+  mcall-netbsd : %(link_os_netbsd)      ; \
+  mcall-openbsd: %(link_os_openbsd)     ; \
+               : %(link_os_default)     }"
+
+#define LINK_OS_DEFAULT_SPEC ""
+
+#define DRIVER_SELF_SPECS "%{mfpu=none: %<mfpu=* \
+ 	%<msingle-float %<mdouble-float}"
+
+/* Override rs6000.h definition.  */
+#undef	CPP_SPEC
+#define	CPP_SPEC "%{posix: -D_POSIX_SOURCE} \
+%{mads         : %(cpp_os_ads)         ; \
+  myellowknife : %(cpp_os_yellowknife) ; \
+  mmvme        : %(cpp_os_mvme)        ; \
+  msim         : %(cpp_os_sim)         ; \
+  mcall-freebsd: %(cpp_os_freebsd)     ; \
+  mcall-linux  : %(cpp_os_linux)       ; \
+  mcall-netbsd : %(cpp_os_netbsd)      ; \
+  mcall-openbsd: %(cpp_os_openbsd)     ; \
+               : %(cpp_os_default)     }"
+
+#define	CPP_OS_DEFAULT_SPEC ""
+
+#undef	STARTFILE_SPEC
+#define	STARTFILE_SPEC "\
+%{mads         : %(startfile_ads)         ; \
+  myellowknife : %(startfile_yellowknife) ; \
+  mmvme        : %(startfile_mvme)        ; \
+  msim         : %(startfile_sim)         ; \
+  mcall-freebsd: %(startfile_freebsd)     ; \
+  mcall-linux  : %(startfile_linux)       ; \
+  mcall-netbsd : %(startfile_netbsd)      ; \
+  mcall-openbsd: %(startfile_openbsd)     ; \
+               : %(startfile_default)     }"
+
+#define	STARTFILE_DEFAULT_SPEC "ecrti.o%s crtbegin.o%s"
+
+#undef	LIB_SPEC
+#define	LIB_SPEC "\
+%{mads         : %(lib_ads)         ; \
+  myellowknife : %(lib_yellowknife) ; \
+  mmvme        : %(lib_mvme)        ; \
+  msim         : %(lib_sim)         ; \
+  mcall-freebsd: %(lib_freebsd)     ; \
+  mcall-linux  : %(lib_linux)       ; \
+  mcall-netbsd : %(lib_netbsd)      ; \
+  mcall-openbsd: %(lib_openbsd)     ; \
+               : %(lib_default)     }"
+
+#define LIB_DEFAULT_SPEC "-lc"
+
+#undef	ENDFILE_SPEC
+#define	ENDFILE_SPEC "\
+%{mads         : %(endfile_ads)         ; \
+  myellowknife : %(endfile_yellowknife) ; \
+  mmvme        : %(endfile_mvme)        ; \
+  msim         : %(endfile_sim)         ; \
+  mcall-freebsd: %(endfile_freebsd)     ; \
+  mcall-linux  : %(endfile_linux)       ; \
+  mcall-netbsd : %(endfile_netbsd)      ; \
+  mcall-openbsd: %(endfile_openbsd)     ; \
+               : %(crtsavres_default) %(endfile_default)     }"
+
+#define CRTSAVRES_DEFAULT_SPEC ""
+
+#define	ENDFILE_DEFAULT_SPEC "crtend.o%s ecrtn.o%s"
+
+/* Motorola ADS support.  */
+#define LIB_ADS_SPEC "--start-group -lads -lc --end-group"
+
+#define	STARTFILE_ADS_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_ADS_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_ADS_SPEC "-T ads.ld%s"
+
+#define LINK_OS_ADS_SPEC ""
+
+#define CPP_OS_ADS_SPEC ""
+
+/* Motorola Yellowknife support.  */
+#define LIB_YELLOWKNIFE_SPEC "--start-group -lyk -lc --end-group"
+
+#define	STARTFILE_YELLOWKNIFE_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_YELLOWKNIFE_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_YELLOWKNIFE_SPEC "-T yellowknife.ld%s"
+
+#define LINK_OS_YELLOWKNIFE_SPEC ""
+
+#define CPP_OS_YELLOWKNIFE_SPEC ""
+
+/* Motorola MVME support.  */
+#define LIB_MVME_SPEC "--start-group -lmvme -lc --end-group"
+
+#define	STARTFILE_MVME_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_MVME_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_MVME_SPEC "-Ttext 0x40000"
+
+#define LINK_OS_MVME_SPEC ""
+
+#define CPP_OS_MVME_SPEC ""
+
+/* PowerPC simulator based on netbsd system calls support.  */
+#define LIB_SIM_SPEC "--start-group -lsim -lc --end-group"
+
+#define	STARTFILE_SIM_SPEC "ecrti.o%s sim-crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_SIM_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_SIM_SPEC ""
+
+#define LINK_OS_SIM_SPEC "-m elf32ppcsim"
+
+#define CPP_OS_SIM_SPEC ""
+
+/* FreeBSD support.  */
+
+#define CPP_OS_FREEBSD_SPEC	"\
+  -D__PPC__ -D__ppc__ -D__PowerPC__ -D__powerpc__ \
+  -Acpu=powerpc -Amachine=powerpc"
+
+#define	STARTFILE_FREEBSD_SPEC	FBSD_STARTFILE_SPEC
+#define ENDFILE_FREEBSD_SPEC	FBSD_ENDFILE_SPEC
+#define LIB_FREEBSD_SPEC	FBSD_LIB_SPEC
+#define LINK_START_FREEBSD_SPEC	""
+
+#define LINK_OS_FREEBSD_SPEC "\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic: -export-dynamic} \
+      -dynamic-linker %(fbsd_dynamic_linker) } \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
+
+/* GNU/Linux support.  */
+#define LIB_LINUX_SPEC "%{mnewlib: --start-group -llinux -lc --end-group } \
+%{!mnewlib: %{pthread:-lpthread} %{shared:-lc} \
+%{!shared: %{profile:-lc_p} %{!profile:-lc}}}"
+
+#ifdef HAVE_LD_PIE
+#define	STARTFILE_LINUX_SPEC "\
+%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+%{mnewlib:ecrti.o%s;:crti.o%s} \
+%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define	STARTFILE_LINUX_SPEC "\
+%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
+%{mnewlib:ecrti.o%s;:crti.o%s} \
+%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+#define	ENDFILE_LINUX_SPEC "\
+%{shared|pie:crtendS.o%s;:crtend.o%s} \
+%{mnewlib:ecrtn.o%s;:crtn.o%s}"
+
+#define LINK_START_LINUX_SPEC ""
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif !defined (DEFAULT_LIBC) || DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define GNU_USER_DYNAMIC_LINKER \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER)
+
+#define LINK_OS_LINUX_SPEC "-m elf32ppclinux %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \
+%{!undef:							  \
+  %{!ansi:							  \
+    %{!std=*:-Dunix -D__unix -Dlinux -D__linux}			  \
+    %{std=gnu*:-Dunix -D__unix -Dlinux -D__linux}}}		  \
+-Asystem=linux -Asystem=unix -Asystem=posix %{pthread:-D_REENTRANT}"
+
+/* NetBSD support.  */
+#define LIB_NETBSD_SPEC "\
+-lc"
+
+#define	STARTFILE_NETBSD_SPEC "\
+ncrti.o%s crt0.o%s \
+%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#define ENDFILE_NETBSD_SPEC "\
+%{!shared:crtend.o%s} %{shared:crtendS.o%s} \
+ncrtn.o%s"
+
+#define LINK_START_NETBSD_SPEC "\
+"
+
+#define LINK_OS_NETBSD_SPEC "\
+%{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker /usr/libexec/ld.elf_so}}"
+
+#define CPP_OS_NETBSD_SPEC "\
+-D__powerpc__ -D__NetBSD__ -D__KPRINTF_ATTRIBUTE__"
+
+/* OpenBSD support.  */
+#ifndef	LIB_OPENBSD_SPEC
+#define LIB_OPENBSD_SPEC "%{!shared:%{pthread:-lpthread%{p:_p}%{!p:%{pg:_p}}}} %{!shared:-lc%{p:_p}%{!p:%{pg:_p}}}"
+#endif
+
+#ifndef	STARTFILE_OPENBSD_SPEC
+#define	STARTFILE_OPENBSD_SPEC "\
+%{!shared: %{pg:gcrt0.o%s} %{!pg:%{p:gcrt0.o%s} %{!p:crt0.o%s}}} \
+%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+#endif
+
+#ifndef	ENDFILE_OPENBSD_SPEC
+#define	ENDFILE_OPENBSD_SPEC "\
+%{!shared:crtend.o%s} %{shared:crtendS.o%s}"
+#endif
+
+#ifndef LINK_START_OPENBSD_SPEC
+#define LINK_START_OPENBSD_SPEC "-Ttext 0x400074"
+#endif
+
+#ifndef LINK_OS_OPENBSD_SPEC
+#define LINK_OS_OPENBSD_SPEC ""
+#endif
+
+#ifndef CPP_OS_OPENBSD_SPEC
+#define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+#endif
+
+/* Define any extra SPECS that the compiler needs to generate.  */
+/* Override rs6000.h definition.  */
+#undef	SUBTARGET_EXTRA_SPECS
+#define	SUBTARGET_EXTRA_SPECS						\
+  { "crtsavres_default",	CRTSAVRES_DEFAULT_SPEC },		\
+  { "lib_ads",			LIB_ADS_SPEC },				\
+  { "lib_yellowknife",		LIB_YELLOWKNIFE_SPEC },			\
+  { "lib_mvme",			LIB_MVME_SPEC },			\
+  { "lib_sim",			LIB_SIM_SPEC },				\
+  { "lib_freebsd",		LIB_FREEBSD_SPEC },			\
+  { "lib_linux",		LIB_LINUX_SPEC },			\
+  { "lib_netbsd",		LIB_NETBSD_SPEC },			\
+  { "lib_openbsd",		LIB_OPENBSD_SPEC },			\
+  { "lib_default",		LIB_DEFAULT_SPEC },			\
+  { "startfile_ads",		STARTFILE_ADS_SPEC },			\
+  { "startfile_yellowknife",	STARTFILE_YELLOWKNIFE_SPEC },		\
+  { "startfile_mvme",		STARTFILE_MVME_SPEC },			\
+  { "startfile_sim",		STARTFILE_SIM_SPEC },			\
+  { "startfile_freebsd",	STARTFILE_FREEBSD_SPEC },		\
+  { "startfile_linux",		STARTFILE_LINUX_SPEC },			\
+  { "startfile_netbsd",		STARTFILE_NETBSD_SPEC },		\
+  { "startfile_openbsd",	STARTFILE_OPENBSD_SPEC },		\
+  { "startfile_default",	STARTFILE_DEFAULT_SPEC },		\
+  { "endfile_ads",		ENDFILE_ADS_SPEC },			\
+  { "endfile_yellowknife",	ENDFILE_YELLOWKNIFE_SPEC },		\
+  { "endfile_mvme",		ENDFILE_MVME_SPEC },			\
+  { "endfile_sim",		ENDFILE_SIM_SPEC },			\
+  { "endfile_freebsd",		ENDFILE_FREEBSD_SPEC },			\
+  { "endfile_linux",		ENDFILE_LINUX_SPEC },			\
+  { "endfile_netbsd",		ENDFILE_NETBSD_SPEC },			\
+  { "endfile_openbsd",		ENDFILE_OPENBSD_SPEC },			\
+  { "endfile_default",		ENDFILE_DEFAULT_SPEC },			\
+  { "link_shlib",		LINK_SHLIB_SPEC },			\
+  { "link_target",		LINK_TARGET_SPEC },			\
+  { "link_start",		LINK_START_SPEC },			\
+  { "link_start_ads",		LINK_START_ADS_SPEC },			\
+  { "link_start_yellowknife",	LINK_START_YELLOWKNIFE_SPEC },		\
+  { "link_start_mvme",		LINK_START_MVME_SPEC },			\
+  { "link_start_sim",		LINK_START_SIM_SPEC },			\
+  { "link_start_freebsd",	LINK_START_FREEBSD_SPEC },		\
+  { "link_start_linux",		LINK_START_LINUX_SPEC },		\
+  { "link_start_netbsd",	LINK_START_NETBSD_SPEC },		\
+  { "link_start_openbsd",	LINK_START_OPENBSD_SPEC },		\
+  { "link_start_default",	LINK_START_DEFAULT_SPEC },		\
+  { "link_os",			LINK_OS_SPEC },				\
+  { "link_os_ads",		LINK_OS_ADS_SPEC },			\
+  { "link_os_yellowknife",	LINK_OS_YELLOWKNIFE_SPEC },		\
+  { "link_os_mvme",		LINK_OS_MVME_SPEC },			\
+  { "link_os_sim",		LINK_OS_SIM_SPEC },			\
+  { "link_os_freebsd",		LINK_OS_FREEBSD_SPEC },			\
+  { "link_os_linux",		LINK_OS_LINUX_SPEC },			\
+  { "link_os_netbsd",		LINK_OS_NETBSD_SPEC },			\
+  { "link_os_openbsd",		LINK_OS_OPENBSD_SPEC },			\
+  { "link_os_default",		LINK_OS_DEFAULT_SPEC },			\
+  { "cc1_secure_plt_default",	CC1_SECURE_PLT_DEFAULT_SPEC },		\
+  { "cpp_os_ads",		CPP_OS_ADS_SPEC },			\
+  { "cpp_os_yellowknife",	CPP_OS_YELLOWKNIFE_SPEC },		\
+  { "cpp_os_mvme",		CPP_OS_MVME_SPEC },			\
+  { "cpp_os_sim",		CPP_OS_SIM_SPEC },			\
+  { "cpp_os_freebsd",		CPP_OS_FREEBSD_SPEC },			\
+  { "cpp_os_linux",		CPP_OS_LINUX_SPEC },			\
+  { "cpp_os_netbsd",		CPP_OS_NETBSD_SPEC },			\
+  { "cpp_os_openbsd",		CPP_OS_OPENBSD_SPEC },			\
+  { "cpp_os_default",		CPP_OS_DEFAULT_SPEC },			\
+  { "fbsd_dynamic_linker",	FBSD_DYNAMIC_LINKER },			\
+  SUBSUBTARGET_EXTRA_SPECS
+
+#define	SUBSUBTARGET_EXTRA_SPECS
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#define	MULTILIB_DEFAULTS { "mbig", "mcall-sysv" }
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Function name to call to do profiling.  */
+#define RS6000_MCOUNT "_mcount"
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			     \
+  ((flag_pic || TARGET_RELOCATABLE)					     \
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \
+   : DW_EH_PE_absptr)
+
+#define DOUBLE_INT_ASM_OP "\t.quad\t"
+
+/* Generate entries in .fixup for relocatable addresses.  */
+#define RELOCATABLE_NEEDS_FIXUP 1
+
+#define TARGET_ASM_FILE_END rs6000_elf_file_end
+
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET rs6000_asan_shadow_offset
+
+/* This target uses the sysv4.opt file.  */
+#define TARGET_USES_SYSV4_OPT 1
+
+#undef DBX_REGISTER_NUMBER
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.opt b/gcc-4.9/gcc/config/rs6000/sysv4.opt
new file mode 100644
index 000000000..77f9ddaf8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/sysv4.opt
@@ -0,0 +1,157 @@
+; SYSV4 options for PPC port.
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mcall-
+Target RejectNegative Joined Var(rs6000_abi_name)
+Select ABI calling convention
+
+msdata=
+Target RejectNegative Joined Var(rs6000_sdata_name)
+Select method for sdata handling
+
+mtls-size=
+Target RejectNegative Joined Var(rs6000_tls_size) Enum(rs6000_tls_size)
+Specify bit size of immediate TLS offsets
+
+Enum
+Name(rs6000_tls_size) Type(int)
+
+EnumValue
+Enum(rs6000_tls_size) String(16) Value(16)
+
+EnumValue
+Enum(rs6000_tls_size) String(32) Value(32)
+
+EnumValue
+Enum(rs6000_tls_size) String(64) Value(64)
+
+mbit-align
+Target Report Var(TARGET_NO_BITFIELD_TYPE) Save
+Align to the base type of the bit-field
+
+mstrict-align
+Target Report Mask(STRICT_ALIGN) Var(rs6000_isa_flags)
+Align to the base type of the bit-field
+Don't assume that unaligned accesses are handled by the system
+
+mrelocatable
+Target Report Mask(RELOCATABLE) Var(rs6000_isa_flags)
+Produce code relocatable at runtime
+
+mrelocatable-lib
+Target
+Produce code relocatable at runtime
+
+mlittle-endian
+Target Report RejectNegative Mask(LITTLE_ENDIAN) Var(rs6000_isa_flags)
+Produce little endian code
+
+mlittle
+Target Report RejectNegative Mask(LITTLE_ENDIAN) Var(rs6000_isa_flags)
+Produce little endian code
+
+mbig-endian
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN) Var(rs6000_isa_flags)
+Produce big endian code
+
+mbig
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN) Var(rs6000_isa_flags)
+Produce big endian code
+
+;; FIXME: This does nothing.  What should be done?
+mno-toc
+Target RejectNegative
+no description yet
+
+mtoc
+Target RejectNegative
+no description yet
+
+mprototype
+Target Var(target_prototype) Save
+Assume all variable arg functions are prototyped
+
+;; FIXME: Does nothing.
+mno-traceback
+Target RejectNegative
+no description yet
+
+meabi
+Target Report Mask(EABI) Var(rs6000_isa_flags)
+Use EABI
+
+mbit-word
+Target Report Var(TARGET_NO_BITFIELD_WORD) Save
+Allow bit-fields to cross word boundaries
+
+mregnames
+Target Var(rs6000_regnames) Save
+Use alternate register names
+
+;; This option does nothing and only exists because the compiler
+;; driver passes all -m* options through.
+msdata
+Target
+Use default method for sdata handling
+
+msim
+Target RejectNegative
+Link with libsim.a, libc.a and sim-crt0.o
+
+mads
+Target RejectNegative
+Link with libads.a, libc.a and crt0.o
+
+myellowknife
+Target RejectNegative
+Link with libyk.a, libc.a and crt0.o
+
+mmvme
+Target RejectNegative
+Link with libmvme.a, libc.a and crt0.o
+
+memb
+Target RejectNegative
+Set the PPC_EMB bit in the ELF flags header
+
+mshlib
+Target RejectNegative
+no description yet
+
+m64
+Target Report RejectNegative Negative(m32) Mask(64BIT) Var(rs6000_isa_flags)
+Generate 64-bit code
+
+m32
+Target Report RejectNegative Negative(m64) InverseMask(64BIT) Var(rs6000_isa_flags)
+Generate 32-bit code
+
+mnewlib
+Target RejectNegative
+no description yet
+
+msecure-plt
+Target Report RejectNegative Var(secure_plt, 1) Save
+Generate code to use a non-exec PLT and GOT
+
+mbss-plt
+Target Report RejectNegative Var(secure_plt, 0) Save
+Generate code for old exec BSS PLT
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4le.h b/gcc-4.9/gcc/config/rs6000/sysv4le.h
new file mode 100644
index 000000000..ac3bc4ab0
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/sysv4le.h
@@ -0,0 +1,37 @@
+/* Target definitions for GCC for a little endian PowerPC
+   running System V.4
+   Copyright (C) 1995-2014 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_LITTLE_ENDIAN
+
+#undef	DEFAULT_ASM_ENDIAN
+#define	DEFAULT_ASM_ENDIAN " -mlittle"
+
+#undef	LINK_TARGET_SPEC
+#define	LINK_TARGET_SPEC \
+  ENDIAN_SELECT(" --oformat elf32-powerpc", "", "")
+
+#undef	MULTILIB_DEFAULTS
+#define	MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
+
+/* Little-endian PowerPC64 Linux uses the ELF v2 ABI by default.  */
+#define LINUX64_DEFAULT_ABI_ELFv2
+
diff --git a/gcc-4.9/gcc/config/rs6000/t-aix43 b/gcc-4.9/gcc/config/rs6000/t-aix43
new file mode 100644
index 000000000..f2d005bfb
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-aix43
@@ -0,0 +1,39 @@
+# Copyright (C) 1998-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build the libraries for pthread and all of the 
+# different processor models
+
+MULTILIB_OPTIONS	= pthread \
+			  mcpu=common/mcpu=powerpc/maix64
+
+MULTILIB_DIRNAMES	= pthread \
+			  common powerpc ppc64
+
+MULTILIB_MATCHES	= mcpu?powerpc=mcpu?power3 \
+			  mcpu?powerpc=mcpu?power4 \
+			  mcpu?powerpc=mcpu?powerpc \
+			  mcpu?powerpc=mcpu?rs64a \
+			  mcpu?powerpc=mcpu?601 \
+			  mcpu?powerpc=mcpu?602 \
+			  mcpu?powerpc=mcpu?603 \
+			  mcpu?powerpc=mcpu?603e \
+			  mcpu?powerpc=mcpu?604 \
+			  mcpu?powerpc=mcpu?604e \
+			  mcpu?powerpc=mcpu?620 \
+			  mcpu?powerpc=mcpu?630
diff --git a/gcc-4.9/gcc/config/rs6000/t-aix52 b/gcc-4.9/gcc/config/rs6000/t-aix52
new file mode 100644
index 000000000..d26c0f717
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-aix52
@@ -0,0 +1,26 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build the libraries for pthread and all of the 
+# different processor models
+
+MULTILIB_OPTIONS	= pthread maix64
+
+MULTILIB_DIRNAMES	= pthread ppc64
+
+MULTILIB_MATCHES	= 
diff --git a/gcc-4.9/gcc/config/rs6000/t-darwin64 b/gcc-4.9/gcc/config/rs6000/t-darwin64
new file mode 100644
index 000000000..b0a04c7d8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-darwin64
@@ -0,0 +1,2 @@
+MULTILIB_OPTIONS = m32
+MULTILIB_DIRNAMES = ppc
diff --git a/gcc-4.9/gcc/config/rs6000/t-darwin8 b/gcc-4.9/gcc/config/rs6000/t-darwin8
new file mode 100644
index 000000000..2f3bb32f8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-darwin8
@@ -0,0 +1,3 @@
+# 64-bit libraries can only be built in Darwin 8.x or later.
+MULTILIB_OPTIONS = m64
+MULTILIB_DIRNAMES = ppc64
diff --git a/gcc-4.9/gcc/config/rs6000/t-fprules b/gcc-4.9/gcc/config/rs6000/t-fprules
new file mode 100644
index 000000000..5361ff303
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-fprules
@@ -0,0 +1,26 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+SOFT_FLOAT_CPUS = e300c2 401 403 405 440 464 476 ec603e 801 821 823 860
+MULTILIB_MATCHES_FLOAT = $(foreach cpu, $(SOFT_FLOAT_CPUS), msoft-float=mcpu?$(cpu))
+
+# Build the libraries for both hard and soft floating point by default
+
+MULTILIB_OPTIONS = msoft-float
+MULTILIB_DIRNAMES = soft-float
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
diff --git a/gcc-4.9/gcc/config/rs6000/t-freebsd64 b/gcc-4.9/gcc/config/rs6000/t-freebsd64
new file mode 100644
index 000000000..e3ba65c94
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-freebsd64
@@ -0,0 +1,31 @@
+#rs6000/t-freebsd64
+
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# On FreeBSD the 32-bit libraries are found under /usr/lib32.
+# Set MULTILIB_OSDIRNAMES according to this.
+
+MULTILIB_OPTIONS        = m32 msoft-float
+MULTILIB_DIRNAMES       = 32 nof
+MULTILIB_EXTRA_OPTS     = fPIC mstrict-align
+MULTILIB_EXCEPTIONS     =
+MULTILIB_EXCLUSIONS     = !m32/msoft-float 
+MULTILIB_OSDIRNAMES	= ../lib32
+#MULTILIB_MATCHES        = $(MULTILIB_MATCHES_FLOAT)
+
diff --git a/gcc-4.9/gcc/config/rs6000/t-linux b/gcc-4.9/gcc/config/rs6000/t-linux
new file mode 100644
index 000000000..0b92eba46
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-linux
@@ -0,0 +1,13 @@
+# do not define the multiarch name if configured for a soft-float cpu
+# or soft-float.
+ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
+ifneq (,$(findstring spe,$(target)))
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1)
+else
+MULTIARCH_DIRNAME = powerpc-linux-gnu
+endif
+endif
+
+rs6000-linux.o: $(srcdir)/config/rs6000/rs6000-linux.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/rs6000/t-linux64 b/gcc-4.9/gcc/config/rs6000/t-linux64
new file mode 100644
index 000000000..f6b1b0e77
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-linux64
@@ -0,0 +1,36 @@
+#rs6000/t-linux64
+
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS    := m64/m32
+MULTILIB_DIRNAMES   := 64 32
+MULTILIB_EXTRA_OPTS := 
+MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu)
+MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
+
+rs6000-linux.o: $(srcdir)/config/rs6000/rs6000-linux.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/rs6000/t-linux64bele b/gcc-4.9/gcc/config/rs6000/t-linux64bele
new file mode 100644
index 000000000..97c1ee6fb
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-linux64bele
@@ -0,0 +1,7 @@
+#rs6000/t-linux64end
+
+MULTILIB_OPTIONS    += mlittle
+MULTILIB_DIRNAMES   += le
+MULTILIB_OSDIRNAMES += $(subst =,.mlittle=,$(subst lible32,lib32le,$(subst lible64,lib64le,$(subst lib,lible,$(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))))))
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mlittle%,$(MULTILIB_OSDIRNAMES)))
+MULTILIB_MATCHES    := ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc-4.9/gcc/config/rs6000/t-linux64le b/gcc-4.9/gcc/config/rs6000/t-linux64le
new file mode 100644
index 000000000..0cf38e152
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-linux64le
@@ -0,0 +1,3 @@
+#rs6000/t-linux64le
+
+MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))
diff --git a/gcc-4.9/gcc/config/rs6000/t-linux64lebe b/gcc-4.9/gcc/config/rs6000/t-linux64lebe
new file mode 100644
index 000000000..2e63bdb9f
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-linux64lebe
@@ -0,0 +1,7 @@
+#rs6000/t-linux64leend
+
+MULTILIB_OPTIONS    += mbig
+MULTILIB_DIRNAMES   += be
+MULTILIB_OSDIRNAMES += $(subst =,.mbig=,$(subst libbe32,lib32be,$(subst libbe64,lib64be,$(subst lib,libbe,$(subst le-linux,-linux,$(MULTILIB_OSDIRNAMES))))))
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mbig%,$(MULTILIB_OSDIRNAMES)))
+MULTILIB_MATCHES    := ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc-4.9/gcc/config/rs6000/t-lynx b/gcc-4.9/gcc/config/rs6000/t-lynx
new file mode 100644
index 000000000..739d41911
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-lynx
@@ -0,0 +1,29 @@
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS    += msoft-float
+MULTILIB_DIRNAMES   += soft-float
+
+MULTILIB_OPTIONS    += maltivec
+MULTILIB_DIRNAMES   += altivec
+
+MULTILIB_EXCEPTIONS = *msoft-float/*maltivec*
+
+Local Variables:
+mode: makefile
+End:
diff --git a/gcc-4.9/gcc/config/rs6000/t-netbsd b/gcc-4.9/gcc/config/rs6000/t-netbsd
new file mode 100644
index 000000000..ffdd509da
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-netbsd
@@ -0,0 +1,36 @@
+# Support for NetBSD PowerPC ELF targets (SVR4 ABI).
+#
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Switch synonyms
+MULTILIB_MATCHES_FLOAT	= msoft-float=mcpu?401 \
+			  msoft-float=mcpu?403 \
+			  msoft-float=mcpu?405 \
+			  msoft-float=mcpu?ec603e \
+			  msoft-float=mcpu?801 \
+			  msoft-float=mcpu?821 \
+			  msoft-float=mcpu?823 \
+			  msoft-float=mcpu?860
+
+MULTILIB_OPTIONS	= msoft-float
+MULTILIB_DIRNAMES	= soft-float
+MULTILIB_EXTRA_OPTS	= fPIC mstrict-align
+MULTILIB_EXCEPTIONS	=
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
diff --git a/gcc-4.9/gcc/config/rs6000/t-ppccomm b/gcc-4.9/gcc/config/rs6000/t-ppccomm
new file mode 100644
index 000000000..6be274ae8
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-ppccomm
@@ -0,0 +1,23 @@
+# Common support for PowerPC ELF targets (both EABI and SVR4).
+#
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Switch synonyms
+MULTILIB_MATCHES_ENDIAN	= mlittle=mlittle-endian mbig=mbig-endian
+MULTILIB_MATCHES_SYSV	= mcall-sysv=mcall-sysv-eabi mcall-sysv=mcall-sysv-noeabi mcall-sysv=mcall-linux mcall-sysv=mcall-netbsd
diff --git a/gcc-4.9/gcc/config/rs6000/t-ppcendian b/gcc-4.9/gcc/config/rs6000/t-ppcendian
new file mode 100644
index 000000000..6729d0802
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-ppcendian
@@ -0,0 +1,30 @@
+# Multilibs for powerpc embedded ELF targets with altivec.
+#
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS	= msoft-float \
+			  mlittle/mbig
+
+MULTILIB_DIRNAMES	= nof \
+			  le be
+
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT} \
+			  ${MULTILIB_MATCHES_ENDIAN} \
+			  ${MULTILIB_MATCHES_SYSV}
diff --git a/gcc-4.9/gcc/config/rs6000/t-ppcgas b/gcc-4.9/gcc/config/rs6000/t-ppcgas
new file mode 100644
index 000000000..cee220f47
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-ppcgas
@@ -0,0 +1,32 @@
+# Multilibs for powerpc embedded ELF targets.
+#
+# Copyright (C) 1995-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS	= msoft-float \
+			  mlittle/mbig \
+			  fleading-underscore
+
+MULTILIB_DIRNAMES	= nof \
+			  le be \
+			  und
+
+MULTILIB_EXTRA_OPTS	= mrelocatable-lib mno-eabi mstrict-align
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT} \
+			  ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc-4.9/gcc/config/rs6000/t-ppcos b/gcc-4.9/gcc/config/rs6000/t-ppcos
new file mode 100644
index 000000000..819863bea
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-ppcos
@@ -0,0 +1,8 @@
+# Multilibs for a powerpc hosted ELF target (linux, SVR4)
+
+MULTILIB_OPTIONS	= msoft-float
+MULTILIB_DIRNAMES	= nof
+MULTILIB_EXTRA_OPTS	= fPIC mstrict-align
+MULTILIB_EXCEPTIONS	= 
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
diff --git a/gcc-4.9/gcc/config/rs6000/t-rs6000 b/gcc-4.9/gcc/config/rs6000/t-rs6000
new file mode 100644
index 000000000..9b4baf833
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-rs6000
@@ -0,0 +1,66 @@
+# General rules that all rs6000/ targets must have.
+#
+# Copyright (C) 1995-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/rs6000/rs6000-builtin.def
+
+rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+$(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
+  $(srcdir)/config/rs6000/rs6000-cpus.def
+	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
+		$(srcdir)/config/rs6000/rs6000-tables.opt
+
+# The rs6000 backend doesn't cause warnings in these files.
+insn-conditions.o-warn =
+
+MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
+	$(srcdir)/config/rs6000/mpc.md \
+	$(srcdir)/config/rs6000/40x.md \
+	$(srcdir)/config/rs6000/440.md \
+	$(srcdir)/config/rs6000/601.md \
+	$(srcdir)/config/rs6000/603.md \
+	$(srcdir)/config/rs6000/6xx.md \
+	$(srcdir)/config/rs6000/7xx.md \
+	$(srcdir)/config/rs6000/7450.md \
+	$(srcdir)/config/rs6000/8540.md \
+	$(srcdir)/config/rs6000/e300c2c3.md \
+	$(srcdir)/config/rs6000/e500mc.md \
+	$(srcdir)/config/rs6000/power4.md \
+	$(srcdir)/config/rs6000/power5.md \
+	$(srcdir)/config/rs6000/power6.md \
+	$(srcdir)/config/rs6000/power7.md \
+	$(srcdir)/config/rs6000/power8.md \
+	$(srcdir)/config/rs6000/cell.md \
+	$(srcdir)/config/rs6000/xfpu.md \
+	$(srcdir)/config/rs6000/a2.md \
+	$(srcdir)/config/rs6000/predicates.md \
+	$(srcdir)/config/rs6000/constraints.md \
+	$(srcdir)/config/rs6000/darwin.md \
+	$(srcdir)/config/rs6000/sync.md \
+	$(srcdir)/config/rs6000/vector.md \
+	$(srcdir)/config/rs6000/vsx.md \
+	$(srcdir)/config/rs6000/altivec.md \
+	$(srcdir)/config/rs6000/crypto.md \
+	$(srcdir)/config/rs6000/htm.md \
+	$(srcdir)/config/rs6000/spe.md \
+	$(srcdir)/config/rs6000/dfp.md \
+	$(srcdir)/config/rs6000/paired.md
diff --git a/gcc-4.9/gcc/config/rs6000/t-rtems b/gcc-4.9/gcc/config/rs6000/t-rtems
new file mode 100644
index 000000000..426f75ac5
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-rtems
@@ -0,0 +1,88 @@
+# Multilibs for powerpc RTEMS targets.
+#
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS	= \
+mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540 \
+msoft-float/mfloat-gprs=double
+
+MULTILIB_DIRNAMES	= \
+m403 m505 m603e m604 m860 m7400 m8540 \
+nof gprsdouble
+
+# MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
+MULTILIB_MATCHES	=
+MULTILIB_MATCHES  	+= ${MULTILIB_MATCHES_ENDIAN}
+MULTILIB_MATCHES	+= ${MULTILIB_MATCHES_SYSV}
+# Map 405 to 403
+MULTILIB_MATCHES	+= mcpu?403=mcpu?405
+# Map 602, 603e, 603 to 603e
+MULTILIB_MATCHES	+= mcpu?603e=mcpu?602
+MULTILIB_MATCHES	+= mcpu?603e=mcpu?603
+# Map 801, 821, 823 to 860
+MULTILIB_MATCHES 	+= mcpu?860=mcpu?801
+MULTILIB_MATCHES 	+= mcpu?860=mcpu?821
+MULTILIB_MATCHES 	+= mcpu?860=mcpu?823
+# Map 7450 to 7400
+MULTILIB_MATCHES	+= mcpu?7400=mcpu?7450
+
+# Map 750 to .
+MULTILIB_MATCHES	+= mcpu?750=
+
+# Map 8548 to 8540
+MULTILIB_MATCHES	+= mcpu?8540=mcpu?8548
+
+# Map -mcpu=8540 -mfloat-gprs=single to -mcpu=8540
+# (mfloat-gprs=single is implicit default)
+MULTILIB_MATCHES	+= mcpu?8540=mcpu?8540/mfloat-gprs?single
+
+# Soft-float only, default implies msoft-float
+# NOTE: Must match with MULTILIB_MATCHES_FLOAT and MULTILIB_MATCHES
+MULTILIB_SOFTFLOAT_ONLY = \
+*mcpu=401/*msoft-float* \
+*mcpu=403/*msoft-float* \
+*mcpu=405/*msoft-float* \
+*mcpu=801/*msoft-float* \
+*mcpu=821/*msoft-float* \
+*mcpu=823/*msoft-float* \
+*mcpu=860/*msoft-float*
+
+# Hard-float only, take out msoft-float
+MULTILIB_HARDFLOAT_ONLY = \
+*mcpu=505/*msoft-float*
+
+# Targets which do not support gprs
+MULTILIB_NOGPRS = \
+mfloat-gprs=* \
+*mcpu=403/*mfloat-gprs=* \
+*mcpu=505/*mfloat-gprs=* \
+*mcpu=603e/*mfloat-gprs=* \
+*mcpu=604/*mfloat-gprs=* \
+*mcpu=860/*mfloat-gprs=* \
+*mcpu=7400/*mfloat-gprs=*
+
+MULTILIB_EXCEPTIONS =
+
+# Disallow -Dppc and -Dmpc without other options
+MULTILIB_EXCEPTIONS 	+= Dppc* Dmpc*
+
+MULTILIB_EXCEPTIONS	+= \
+${MULTILIB_SOFTFLOAT_ONLY} \
+${MULTILIB_HARDFLOAT_ONLY} \
+${MULTILIB_NOGPRS}
diff --git a/gcc-4.9/gcc/config/rs6000/t-spe b/gcc-4.9/gcc/config/rs6000/t-spe
new file mode 100644
index 000000000..48980d19a
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-spe
@@ -0,0 +1,73 @@
+# Multilibs for e500
+#
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# What we really want are these variants:
+#	-mcpu=7400
+#	-mcpu=7400 -maltivec -mabi=altivec
+#	-mcpu=7400 -msoft-float
+#	-msoft-float
+#	-mno-spe -mabi=no-spe -mno-isel
+# so we'll need to create exceptions later below.
+
+MULTILIB_OPTIONS	= mcpu=7400 \
+			  maltivec \
+			  mabi=altivec \
+			  msoft-float \
+			  mno-spe \
+			  mabi=no-spe \
+			  mno-isel \
+			  mlittle
+
+MULTILIB_DIRNAMES	= mpc7400 altivec abi-altivec \
+			  nof no-spe no-abi-spe no-isel le
+
+MULTILIB_EXCEPTIONS	= maltivec mabi=altivec mno-spe mabi=no-spe mno-isel \
+			  maltivec/mabi=altivec \
+			  mcpu=7400/maltivec \
+			  mcpu=7400/mabi=altivec \
+			  *mcpu=7400/*mno-spe* \
+			  *mcpu=7400/*mabi=no-spe* \
+			  *mcpu=7400/*mno-isel* \
+			  *maltivec/*msoft-float* \
+			  *maltivec/*mno-spe* \
+			  *maltivec/*mabi=no-spe* \
+			  *maltivec/*mno-isel* \
+			  *mabi=altivec/*msoft-float* \
+			  *mabi=altivec/*mno-spe* \
+			  *mabi=altivec/*mabi=no-spe* \
+			  *mabi=altivec/*mno-isel* \
+			  *msoft-float/*mno-spe* \
+			  *msoft-float/*mabi=no-spe* \
+			  *msoft-float/*mno-isel* \
+			  mno-spe/mabi=no-spe \
+			  mno-spe/mno-isel \
+			  mabi=no-spe/mno-isel \
+			  mno-isel/mlittle \
+			  mabi=no-spe/mno-isel/mlittle \
+			  mno-spe/mlittle \
+			  mabi=spe/mlittle \
+			  mcpu=7400/mabi=altivec/mlittle \
+			  mcpu=7400/maltivec/mlittle \
+			  mabi=no-spe/mlittle \
+			  mno-spe/mno-isel/mlittle \
+			  mno-spe/mabi=no-spe/mlittle \
+			  mabi=altivec/mlittle \
+			  maltivec/mlittle \
+			  maltivec/mabi=altivec/mlittle
diff --git a/gcc-4.9/gcc/config/rs6000/t-vxworks b/gcc-4.9/gcc/config/rs6000/t-vxworks
new file mode 100644
index 000000000..df727eee9
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-vxworks
@@ -0,0 +1,25 @@
+# Multilibs for VxWorks.
+#
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The base multilib is -mhard-float.
+MULTILIB_OPTIONS = mrtp fPIC msoft-float
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC*
diff --git a/gcc-4.9/gcc/config/rs6000/t-vxworksae b/gcc-4.9/gcc/config/rs6000/t-vxworksae
new file mode 100644
index 000000000..5f682627e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-vxworksae
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks AE.
+
+MULTILIB_OPTIONS = mvthreads msoft-float
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS = 
diff --git a/gcc-4.9/gcc/config/rs6000/t-xilinx b/gcc-4.9/gcc/config/rs6000/t-xilinx
new file mode 100644
index 000000000..3e822adac
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/t-xilinx
@@ -0,0 +1,28 @@
+# Multilibs for Xilinx powerpc embedded ELF targets.
+#
+# Copyright (C) 2009-2014 Free Software Foundation, Inc.
+# Contributed by Michael Eager, eager@eagercon.com
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Switch synonyms
+MULTILIB_MATCHES        = mfpu?sp_lite=msingle-float mfpu?dp_lite=mdouble-float mfpu?dp_lite=mhard-float mfpu?sp_lite=mfpu?sp_full mfpu?dp_lite=mfpu?dp_full 
+
+MULTILIB_OPTIONS        = mfpu=sp_lite/mfpu=dp_lite 
+
+MULTILIB_DIRNAMES       = single double 
+
diff --git a/gcc-4.9/gcc/config/rs6000/titan.md b/gcc-4.9/gcc/config/rs6000/titan.md
new file mode 100644
index 000000000..c0c3155e4
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/titan.md
@@ -0,0 +1,169 @@
+;; Pipeline description for the AppliedMicro Titan core.
+;;   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;;   Contributed by Theobroma Systems Design und Consulting GmbH
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; AppliedMicro Titan core complex
+
+(define_automaton "titan_core,titan_fpu,titan_fxu,titan_bpu,titan_lsu")
+(define_cpu_unit "titan_issue_0,titan_issue_1" "titan_core")
+
+;; Some useful abbreviations.
+(define_reservation "titan_issue" "titan_issue_0|titan_issue_1")
+
+;; === FXU scheduling ===
+
+(define_cpu_unit "titan_fxu_sh,titan_fxu_wb" "titan_fxu")
+
+;; The 1-cycle adder executes add, addi, subf, neg, compare and trap
+;; instructions. It provides its own, dedicated result-bus, so we
+;; don't need the titan_fxu_wb reservation to complete.
+(define_insn_reservation "titan_fxu_adder" 1
+  (and (eq_attr "type" "cmp,fast_compare,trap")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh")
+
+;; Keep the titan_imul and titan_mulhw (half-word) rules in order, to
+;; ensure the proper match: the half-word instructions are tagged as
+;; imul3 only, whereas regular multiplys will always carry a imul tag.
+
+(define_insn_reservation "titan_imul" 5
+  (and (eq_attr "type" "imul,imul2,imul_compare")
+       (eq_attr "cpu" "titan"))       
+  "titan_issue,titan_fxu_sh,nothing*5,titan_fxu_wb")  
+
+(define_insn_reservation "titan_mulhw" 4
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh,nothing*4,titan_fxu_wb")
+
+(define_bypass 2 "titan_mulhw" "titan_mulhw")
+
+(define_insn_reservation "titan_fxu_shift_and_rotate" 2
+  (and (eq_attr "type" "insert_word,shift,var_shift_rotate,cntlz")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh,nothing*2,titan_fxu_wb")
+
+;; We model the divider for the worst-case (i.e. a full 32-bit
+;; divide).  To model the bypass for byte-wise completion, a
+;; define_bypass with a guard-function could be used... however, this
+;; would be an optimization of doubtful value, as a large number of
+;; divides will operate on 32-bit variables.
+
+;; To avoid an unmanagably large automata (generating the automata
+;; would require well over 2GB in memory), we don't model the shared
+;; result bus on this one. The divider-pipeline is thus modeled
+;; through its latency and initial disptach bottlenecks (i.e. issue
+;; slots and fxu scheduler availability)
+(define_insn_reservation "titan_fxu_div" 34
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh")
+
+(define_insn_reservation "titan_fxu_alu" 1
+  (and (eq_attr "type" "integer,exts")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh,nothing,titan_fxu_wb")
+
+;; === BPU scheduling ===
+
+(define_cpu_unit "titan_bpu_sh" "titan_bpu")
+
+(define_insn_reservation "titan_bpu" 2
+  (and (eq_attr "type" "branch,jmpreg,cr_logical,delayed_cr")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_bpu_sh")
+
+;; === LSU scheduling ===
+
+(define_cpu_unit "titan_lsu_sh" "titan_lsu")
+
+;; Loads.
+(define_insn_reservation "titan_lsu_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+(define_insn_reservation "titan_lsu_fpload" 12
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+;; Note that the isync is not clearly placed within any execution
+;; unit. We've made the assumption that it will be running out of the
+;; LSU, as msync is also executed within the LSU.
+(define_insn_reservation "titan_lsu_sync" 20
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh*20")
+
+;; Stores.
+(define_insn_reservation "titan_lsu_store" 12
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+(define_insn_reservation "titan_lsu_fpstore" 12
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+;; === FPU scheduling ===
+
+;; In order to keep the automaton for the Titan FPU efficient and
+;; maintainable, we've kept in as concise as possible and created a
+;; mapping for the main "choke points" only instead of modelling the
+;; overall flow of instructions through the FP-pipeline(s).
+
+;; The key elements modelled are:
+;;  * each FP-instruction takes up one of the two issue slots 
+;;  * the FPU runs at half the core frequency
+;;  * divides are not pipelined (but execute in a separate unit)
+;;  * the FPU has a shared result bus for all its units
+
+(define_cpu_unit "titan_fp0,titan_fpdiv,titan_fpwb" "titan_fpu")
+
+(define_insn_reservation "titan_fp_div_double" 72
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fpdiv*72,titan_fpwb")
+
+(define_insn_reservation "titan_fp_div_single" 46
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fpdiv*46,titan_fpwb")
+
+(define_insn_reservation "titan_fp_single" 12
+  (and (eq_attr "fp_type" "fp_addsub_s,fp_mul_s,fp_maddsub_s")       
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fp0*2,nothing*10,titan_fpwb")
+
+;; Make sure the "titan_fp" rule stays last, as it's a catch all for
+;; double-precision and unclassified (e.g. fsel) FP-instructions
+(define_insn_reservation "titan_fp" 10
+  (and (eq_attr "type" "fpcompare,fp,dmul")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fp0*2,nothing*8,titan_fpwb")
+
+;; Please note, that the non-pipelined FP-instructions "mcrfs",
+;; "mtfsb0[.]", "mtfsb1[.]", "mtfsf[.]", "mtfsfi[.]" are not
+;; accessible from regular language constructs (i.e. they are not used
+;; by the code generator, except for special purpose sequences defined
+;; in rs6000.md), no special provisions are made for these.
+
diff --git a/gcc-4.9/gcc/config/rs6000/vec_types.h b/gcc-4.9/gcc/config/rs6000/vec_types.h
new file mode 100644
index 000000000..789e9c1ad
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/vec_types.h
@@ -0,0 +1,52 @@
+/* Cell single token vector types
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Single token vector data types for the PowerPC SIMD/Vector Multi-media 
+   eXtension */
+
+#ifndef _VEC_TYPES_H_
+#define _VEC_TYPES_H_	1
+
+#define qword		__vector unsigned char
+
+#define vec_uchar16	__vector unsigned char
+#define vec_char16	__vector signed char
+#define vec_bchar16	__vector bool char
+
+#define vec_ushort8	__vector unsigned short
+#define vec_short8	__vector signed short
+#define vec_bshort8	__vector bool short
+
+#define vec_pixel8	__vector pixel
+
+#define vec_uint4	__vector unsigned int
+#define vec_int4	__vector signed int
+#define vec_bint4	__vector bool int
+
+#define vec_float4	__vector float
+
+#define vec_ullong2	__vector bool char
+#define vec_llong2	__vector bool short
+
+#define vec_double2	__vector bool int
+
+#endif /* _VEC_TYPES_H_ */
diff --git a/gcc-4.9/gcc/config/rs6000/vector.md b/gcc-4.9/gcc/config/rs6000/vector.md
new file mode 100644
index 000000000..edbb83161
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/vector.md
@@ -0,0 +1,1217 @@
+;; Expander definitions for vector support between altivec & vsx.  No
+;; instructions are in this file, this file provides the generic vector
+;; expander, and the actual vector instructions will be in altivec.md and
+;; vsx.md
+
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Vector int modes
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
+
+;; Vector float modes
+(define_mode_iterator VEC_F [V4SF V2DF])
+
+;; Vector arithmetic modes
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Vector modes that need alginment via permutes
+(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
+
+;; Vector logical modes
+(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
+
+;; Vector modes for moves.  Don't do TImode here.
+(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
+
+;; Vector modes for types that don't need a realignment under VSX
+(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI])
+
+;; Vector comparison modes
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Vector init/extract modes
+(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Vector modes for 64-bit base types
+(define_mode_iterator VEC_64 [V2DI V2DF])
+
+;; Vector reload iterator
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
+			     SF SD SI DF DD DI TI])
+
+;; Base type from vector mode
+(define_mode_attr VEC_base [(V16QI "QI")
+			    (V8HI  "HI")
+			    (V4SI  "SI")
+			    (V2DI  "DI")
+			    (V4SF  "SF")
+			    (V2DF  "DF")
+			    (V1TI  "TI")
+			    (TI    "TI")])
+
+;; Same size integer type for floating point data
+(define_mode_attr VEC_int [(V4SF  "v4si")
+			   (V2DF  "v2di")])
+
+(define_mode_attr VEC_INT [(V4SF  "V4SI")
+			   (V2DF  "V2DI")])
+
+;; constants for unspec
+(define_c_enum "unspec" [UNSPEC_PREDICATE
+			 UNSPEC_REDUC])
+
+;; Vector reduction code iterators
+(define_code_iterator VEC_reduc [plus smin smax])
+
+(define_code_attr VEC_reduc_name [(plus "splus")
+				  (smin "smin")
+				  (smax "smax")])
+
+(define_code_attr VEC_reduc_rtx [(plus "add")
+				 (smin "smin")
+				 (smax "smax")])
+
+
+;; Vector move instructions.  Little-endian VSX loads and stores require
+;; special handling to circumvent "element endianness."
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
+	(match_operand:VEC_M 1 "any_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (CONSTANT_P (operands[1])
+	  && !easy_vector_constant (operands[1], <MODE>mode))
+	operands[1] = force_const_mem (<MODE>mode, operands[1]);
+
+      else if (!vlogical_operand (operands[0], <MODE>mode)
+	       && !vlogical_operand (operands[1], <MODE>mode))
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
+  if (!BYTES_BIG_ENDIAN
+      && VECTOR_MEM_VSX_P (<MODE>mode)
+      && !gpr_or_gpr_p (operands[0], operands[1])
+      && (memory_operand (operands[0], <MODE>mode)
+          ^ memory_operand (operands[1], <MODE>mode)))
+    {
+      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
+      DONE;
+    }
+})
+
+;; Generic vector floating point load/store instructions.  These will match
+;; insns defined in vsx.md or altivec.md depending on the switches.
+(define_expand "vector_load_<mode>"
+  [(set (match_operand:VEC_M 0 "vfloat_operand" "")
+	(match_operand:VEC_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vfloat_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Splits if a GPR register was chosen for the move
+(define_split
+  [(set (match_operand:VEC_L 0 "nonimmediate_operand" "")
+        (match_operand:VEC_L 1 "input_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
+   && reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
+  [(pc)]
+{
+  rs6000_split_multireg_move (operands[0], operands[1]);
+  DONE;
+})
+
+;; Vector floating point load/store instructions that uses the Altivec
+;; instructions even if we are compiling for VSX, since the Altivec
+;; instructions silently ignore the bottom 3 bits of the address, and VSX does
+;; not.
+(define_expand "vector_altivec_load_<mode>"
+  [(set (match_operand:VEC_M 0 "vfloat_operand" "")
+	(match_operand:VEC_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+  if (VECTOR_MEM_VSX_P (<MODE>mode))
+    {
+      operands[1] = rs6000_address_for_altivec (operands[1]);
+      emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "vector_altivec_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vfloat_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+  if (VECTOR_MEM_VSX_P (<MODE>mode))
+    {
+      operands[0] = rs6000_address_for_altivec (operands[0]);
+      emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+
+
+;; Reload patterns for vector operations.  We may need an additional base
+;; register to convert the reg+offset addressing to reg+reg for vector
+;; registers and reg+reg or (reg+reg)&(-16) addressing to just an index
+;; register for gpr registers.
+(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_store"
+  [(parallel [(match_operand:VEC_R 0 "memory_operand" "m")
+              (match_operand:VEC_R 1 "gpc_reg_operand" "r")
+              (match_operand:P 2 "register_operand" "=&b")])]
+  "<P:tptrsize>"
+{
+  rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true);
+  DONE;
+})
+
+(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_load"
+  [(parallel [(match_operand:VEC_R 0 "gpc_reg_operand" "=&r")
+              (match_operand:VEC_R 1 "memory_operand" "m")
+              (match_operand:P 2 "register_operand" "=&b")])]
+  "<P:tptrsize>"
+{
+  rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+;; Reload sometimes tries to move the address to a GPR, and can generate
+;; invalid RTL for addresses involving AND -16.  Allow addresses involving
+;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16.
+
+(define_insn_and_split "*vec_reload_and_plus_<mptrsize>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
+	(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_cint_operand" "rI"))
+	       (const_int -16)))]
+  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(plus:P (match_dup 1)
+		(match_dup 2)))
+   (parallel [(set (match_dup 0)
+		   (and:P (match_dup 0)
+			  (const_int -16)))
+	      (clobber:CC (scratch:CC))])])
+
+;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16
+;; address to a register because there is no clobber of a (scratch), so we add
+;; it here.
+(define_insn_and_split "*vec_reload_and_reg_<mptrsize>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
+	(and:P (match_operand:P 1 "gpc_reg_operand" "r")
+	       (const_int -16)))]
+  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:P (match_dup 1)
+			  (const_int -16)))
+	      (clobber:CC (scratch:CC))])])
+
+;; Generic floating point vector arithmetic support
+(define_expand "add<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(plus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		     (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_expand "div<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(div:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		   (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VEC_F 0 "register_operand" "")
+        (smin:VEC_F (match_operand:VEC_F 1 "register_operand" "")
+		    (match_operand:VEC_F 2 "register_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VEC_F 0 "register_operand" "")
+        (smax:VEC_F (match_operand:VEC_F 1 "register_operand" "")
+		    (match_operand:VEC_F 2 "register_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(sqrt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "rsqrte<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+        (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+		      UNSPEC_RSQRT))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "re<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "f")]
+		      UNSPEC_FRES))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "ftrunc<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+  	(fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_ceil<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+		      UNSPEC_FRIP))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_floor<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+		      UNSPEC_FRIM))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_btrunc<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_copysign<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")
+		       (match_operand:VEC_F 2 "vfloat_operand" "")] UNSPEC_COPYSIGN))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_copysign_v4sf3 (operands[0], operands[1],
+					     operands[2]));
+      DONE;
+    }
+}")
+
+
+;; Vector comparisons
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(if_then_else:VEC_F
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:VEC_F 4 "vfloat_operand" "")
+			  (match_operand:VEC_F 5 "vfloat_operand" "")])
+	 (match_operand:VEC_F 1 "vfloat_operand" "")
+	 (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(if_then_else:VEC_I
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:VEC_I 4 "vint_operand" "")
+			  (match_operand:VEC_I 5 "vint_operand" "")])
+	 (match_operand:VEC_I 1 "vint_operand" "")
+	 (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vcondv4sfv4si"
+  [(set (match_operand:V4SF 0 "vfloat_operand" "")
+	(if_then_else:V4SF
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:V4SI 4 "vint_operand" "")
+			  (match_operand:V4SI 5 "vint_operand" "")])
+	 (match_operand:V4SF 1 "vfloat_operand" "")
+	 (match_operand:V4SF 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+   && VECTOR_UNIT_ALTIVEC_P (V4SImode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vcondv4siv4sf"
+  [(set (match_operand:V4SI 0 "vint_operand" "")
+	(if_then_else:V4SI
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:V4SF 4 "vfloat_operand" "")
+			  (match_operand:V4SF 5 "vfloat_operand" "")])
+	 (match_operand:V4SI 1 "vint_operand" "")
+	 (match_operand:V4SI 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+   && VECTOR_UNIT_ALTIVEC_P (V4SImode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vcondu<mode><mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(if_then_else:VEC_I
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:VEC_I 4 "vint_operand" "")
+			  (match_operand:VEC_I 5 "vint_operand" "")])
+	 (match_operand:VEC_I 1 "vint_operand" "")
+	 (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vconduv4sfv4si"
+  [(set (match_operand:V4SF 0 "vfloat_operand" "")
+	(if_then_else:V4SF
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:V4SI 4 "vint_operand" "")
+			  (match_operand:V4SI 5 "vint_operand" "")])
+	 (match_operand:V4SF 1 "vfloat_operand" "")
+	 (match_operand:V4SF 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+   && VECTOR_UNIT_ALTIVEC_P (V4SImode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vector_eq<mode>"
+  [(set (match_operand:VEC_C 0 "vlogical_operand" "")
+	(eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
+		  (match_operand:VEC_C 2 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gt<mode>"
+  [(set (match_operand:VEC_C 0 "vlogical_operand" "")
+	(gt:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
+		  (match_operand:VEC_C 2 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_ge<mode>"
+  [(set (match_operand:VEC_C 0 "vlogical_operand" "")
+	(ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
+		  (match_operand:VEC_C 2 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gtu<mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		   (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_geu<mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		   (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_insn_and_split "*vector_uneq<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(gt:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(gt:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(not:VEC_F (ior:VEC_F (match_dup 3)
+			      (match_dup 4))))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+(define_insn_and_split "*vector_ltgt<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(gt:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(gt:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(ior:VEC_F (match_dup 3)
+		   (match_dup 4)))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+(define_insn_and_split "*vector_ordered<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		       (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(ge:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(ge:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(ior:VEC_F (match_dup 3)
+		   (match_dup 4)))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+(define_insn_and_split "*vector_unordered<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+			 (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(ge:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(ge:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+        (and:VEC_F (not:VEC_F (match_dup 3))
+                   (not:VEC_F (match_dup 4))))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+;; Note the arguments for __builtin_altivec_vsel are op2, op1, mask
+;; which is in the reverse order that we want
+(define_expand "vector_select_<mode>"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+	(if_then_else:VEC_L
+	 (ne:CC (match_operand:VEC_L 3 "vlogical_operand" "")
+		(match_dup 4))
+	 (match_operand:VEC_L 2 "vlogical_operand" "")
+	 (match_operand:VEC_L 1 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
+
+(define_expand "vector_select_<mode>_uns"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+	(if_then_else:VEC_L
+	 (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand" "")
+		   (match_dup 4))
+	 (match_operand:VEC_L 2 "vlogical_operand" "")
+	 (match_operand:VEC_L 1 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
+
+;; Expansions that compare vectors producing a vector result and a predicate,
+;; setting CR6 to indicate a combined status
+(define_expand "vector_eq_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(eq:CC (match_operand:VEC_A 1 "vlogical_operand" "")
+			     (match_operand:VEC_A 2 "vlogical_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_A 0 "vlogical_operand" "")
+	  (eq:VEC_A (match_dup 1)
+		    (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gt_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(gt:CC (match_operand:VEC_A 1 "vlogical_operand" "")
+			     (match_operand:VEC_A 2 "vlogical_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_A 0 "vlogical_operand" "")
+	  (gt:VEC_A (match_dup 1)
+		    (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_ge_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(ge:CC (match_operand:VEC_F 1 "vfloat_operand" "")
+			     (match_operand:VEC_F 2 "vfloat_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_F 0 "vfloat_operand" "")
+	  (ge:VEC_F (match_dup 1)
+		    (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gtu_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand" "")
+			      (match_operand:VEC_I 2 "vint_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_I 0 "vlogical_operand" "")
+	  (gtu:VEC_I (match_dup 1)
+		     (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; AltiVec/VSX predicates.
+
+(define_expand "cr6_test_for_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (reg:CC 74)
+	       (const_int 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+(define_expand "cr6_test_for_zero_reverse"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (reg:CC 74)
+	       (const_int 0)))
+   (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+(define_expand "cr6_test_for_lt"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lt:SI (reg:CC 74)
+	       (const_int 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+(define_expand "cr6_test_for_lt_reverse"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lt:SI (reg:CC 74)
+	       (const_int 0)))
+   (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+	(clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+        (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+
+;; Same size conversions
+(define_expand "float<VEC_int><mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+(define_expand "floatuns<VEC_int><mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unsigned_float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+(define_expand "fix_trunc<mode><VEC_int>2"
+  [(set (match_operand:<VEC_INT> 0 "vint_operand" "")
+	(fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+(define_expand "fixuns_trunc<mode><VEC_int>2"
+  [(set (match_operand:<VEC_INT> 0 "vint_operand" "")
+	(unsigned_fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+
+;; Vector initialization, set, extract
+(define_expand "vec_init<mode>"
+  [(match_operand:VEC_E 0 "vlogical_operand" "")
+   (match_operand:VEC_E 1 "" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rs6000_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VEC_E 0 "vlogical_operand" "")
+   (match_operand:<VEC_base> 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<VEC_base> 0 "register_operand" "")
+   (match_operand:VEC_E 1 "vlogical_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rs6000_expand_vector_extract (operands[0], operands[1],
+				INTVAL (operands[2]));
+  DONE;
+})
+
+;; Convert double word types to single word types
+(define_expand "vec_pack_trunc_v2df"
+  [(match_operand:V4SF 0 "vfloat_operand" "")
+   (match_operand:V2DF 1 "vfloat_operand" "")
+   (match_operand:V2DF 2 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+  rtx r1 = gen_reg_rtx (V4SFmode);
+  rtx r2 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_vsx_xvcvdpsp (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvdpsp (r2, operands[2]));
+  rs6000_expand_extract_even (operands[0], r1, r2);
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+  [(match_operand:V4SI 0 "vint_operand" "")
+   (match_operand:V2DF 1 "vfloat_operand" "")
+   (match_operand:V2DF 2 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+  rtx r1 = gen_reg_rtx (V4SImode);
+  rtx r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2]));
+  rs6000_expand_extract_even (operands[0], r1, r2);
+  DONE;
+})
+
+(define_expand "vec_pack_ufix_trunc_v2df"
+  [(match_operand:V4SI 0 "vint_operand" "")
+   (match_operand:V2DF 1 "vfloat_operand" "")
+   (match_operand:V2DF 2 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+  rtx r1 = gen_reg_rtx (V4SImode);
+  rtx r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2]));
+  rs6000_expand_extract_even (operands[0], r1, r2);
+  DONE;
+})
+
+;; Convert single word types to double word
+(define_expand "vec_unpacks_hi_v4sf"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SF 1 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+  rtx reg = gen_reg_rtx (V4SFmode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SF 1 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+  rtx reg = gen_reg_rtx (V4SFmode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+  DONE;
+})
+
+
+;; Align vector loads with a permute.
+(define_expand "vec_realign_load_<mode>"
+  [(match_operand:VEC_K 0 "vlogical_operand" "")
+   (match_operand:VEC_K 1 "vlogical_operand" "")
+   (match_operand:VEC_K 2 "vlogical_operand" "")
+   (match_operand:V16QI 3 "vlogical_operand" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
+    	      				 operands[2], operands[3]));
+  else
+    {
+      /* We have changed lvsr to lvsl, so to complete the transformation
+         of vperm for LE, we must swap the inputs.  */
+      rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
+                                   gen_rtvec (3, operands[2],
+                                              operands[1], operands[3]),
+                                   UNSPEC_VPERM);
+      emit_move_insn (operands[0], unspec);
+    }
+  DONE;
+})
+
+;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned
+;; since the load already handles it.
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VEC_N 0 "nonimmediate_operand" "")
+       (match_operand:VEC_N 1 "any_operand" ""))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN"
+ "")
+
+
+;; Vector shift left in bits.  Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vslo + vsl. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shl_<mode>"
+  [(match_operand:VEC_L 0 "vlogical_operand" "")
+   (match_operand:VEC_L 1 "vlogical_operand" "")
+   (match_operand:QI 2 "reg_or_short_operand" "")]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx bitshift = operands[2];
+  rtx shift;
+  rtx insn;
+  HOST_WIDE_INT bitshift_val;
+  HOST_WIDE_INT byteshift_val;
+
+  if (! CONSTANT_P (bitshift))
+    FAIL;
+  bitshift_val = INTVAL (bitshift);
+  if (bitshift_val & 0x7)
+    FAIL;
+  byteshift_val = bitshift_val >> 3;
+  if (TARGET_VSX && (byteshift_val & 0x3) == 0)
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
+      insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
+				     shift);
+    }
+  else
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val);
+      insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+					shift);
+    }
+
+  emit_insn (insn);
+  DONE;
+}")
+
+;; Vector shift right in bits. Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vsro + vsr. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shr_<mode>"
+  [(match_operand:VEC_L 0 "vlogical_operand" "")
+   (match_operand:VEC_L 1 "vlogical_operand" "")
+   (match_operand:QI 2 "reg_or_short_operand" "")]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx bitshift = operands[2];
+  rtx shift;
+  rtx insn;
+  HOST_WIDE_INT bitshift_val;
+  HOST_WIDE_INT byteshift_val;
+
+  if (! CONSTANT_P (bitshift))
+    FAIL;
+  bitshift_val = INTVAL (bitshift);
+  if (bitshift_val & 0x7)
+    FAIL;
+  byteshift_val = 16 - (bitshift_val >> 3);
+  if (TARGET_VSX && (byteshift_val & 0x3) == 0)
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
+      insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
+				     shift);
+    }
+  else
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val);
+      insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+					shift);
+    }
+
+  emit_insn (insn);
+  DONE;
+}")
+
+;; Expanders for rotate each element in a vector
+(define_expand "vrotl<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		      (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Expanders for arithmetic shift left on each vector element
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		      (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Expanders for logical shift right on each vector element
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+			(match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Expanders for arithmetic shift right on each vector element
+(define_expand "vashr<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+			(match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Vector reduction expanders for VSX
+
+(define_expand "reduc_<VEC_reduc_name>_v2df"
+  [(parallel [(set (match_operand:V2DF 0 "vfloat_operand" "")
+		   (VEC_reduc:V2DF
+		    (vec_concat:V2DF
+		     (vec_select:DF
+		      (match_operand:V2DF 1 "vfloat_operand" "")
+		      (parallel [(const_int 1)]))
+		     (vec_select:DF
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		    (match_dup 1)))
+	      (clobber (match_scratch:V2DF 2 ""))])]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "")
+
+; The (VEC_reduc:V4SF
+;	(op1)
+;	(unspec:V4SF [(const_int 0)] UNSPEC_REDUC))
+;
+; is to allow us to use a code iterator, but not completely list all of the
+; vector rotates, etc. to prevent canonicalization
+
+(define_expand "reduc_<VEC_reduc_name>_v4sf"
+  [(parallel [(set (match_operand:V4SF 0 "vfloat_operand" "")
+		   (VEC_reduc:V4SF
+		    (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+		    (match_operand:V4SF 1 "vfloat_operand" "")))
+	      (clobber (match_scratch:V4SF 2 ""))
+	      (clobber (match_scratch:V4SF 3 ""))])]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "")
+
+
+;;; Expanders for vector insn patterns shared between the SPE and TARGET_PAIRED systems.
+
+(define_expand "absv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "")
+
+(define_expand "negv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "")
+
+(define_expand "addv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_PLUS (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "subv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_MINUS (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "mulv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_MULT (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "divv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_DIV (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md
new file mode 100644
index 000000000..93c8c3b29
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/vsx.md
@@ -0,0 +1,2015 @@
+;; VSX patterns.
+;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Iterator for both scalar and vector floating point types supported by VSX
+(define_mode_iterator VSX_B [DF V4SF V2DF])
+
+;; Iterator for the 2 64-bit vector types
+(define_mode_iterator VSX_D [V2DF V2DI])
+
+;; Iterator for the 2 32-bit vector types
+(define_mode_iterator VSX_W [V4SF V4SI])
+
+;; Iterator for the DF types
+(define_mode_iterator VSX_DF [V2DF DF])
+
+;; Iterator for vector floating point types supported by VSX
+(define_mode_iterator VSX_F [V4SF V2DF])
+
+;; Iterator for logical types supported by VSX
+(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
+
+;; Iterator for memory move.  Handle TImode specially to allow
+;; it to use gprs as well as vsx registers.
+(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
+
+(define_mode_iterator VSX_M2 [V16QI
+			      V8HI
+			      V4SI
+			      V2DI
+			      V4SF
+			      V2DF
+			      V1TI
+			      (TI	"TARGET_VSX_TIMODE")])
+
+;; Map into the appropriate load/store name based on the type
+(define_mode_attr VSm  [(V16QI "vw4")
+			(V8HI  "vw4")
+			(V4SI  "vw4")
+			(V4SF  "vw4")
+			(V2DF  "vd2")
+			(V2DI  "vd2")
+			(DF    "d")
+			(V1TI  "vd2")
+			(TI    "vd2")])
+
+;; Map into the appropriate suffix based on the type
+(define_mode_attr VSs	[(V16QI "sp")
+			 (V8HI  "sp")
+			 (V4SI  "sp")
+			 (V4SF  "sp")
+			 (V2DF  "dp")
+			 (V2DI  "dp")
+			 (DF    "dp")
+			 (SF	"sp")
+			 (V1TI  "dp")
+			 (TI    "dp")])
+
+;; Map the register class used
+(define_mode_attr VSr	[(V16QI "v")
+			 (V8HI  "v")
+			 (V4SI  "v")
+			 (V4SF  "wf")
+			 (V2DI  "wd")
+			 (V2DF  "wd")
+			 (DF    "ws")
+			 (SF	"d")
+			 (V1TI  "v")
+			 (TI    "wt")])
+
+;; Map the register class used for float<->int conversions
+(define_mode_attr VSr2	[(V2DF  "wd")
+			 (V4SF  "wf")
+			 (DF    "ws")])
+
+(define_mode_attr VSr3	[(V2DF  "wa")
+			 (V4SF  "wa")
+			 (DF    "ws")])
+
+;; Map the register class for sp<->dp float conversions, destination
+(define_mode_attr VSr4	[(SF	"ws")
+			 (DF	"f")
+			 (V2DF  "wd")
+			 (V4SF	"v")])
+
+;; Map the register class for sp<->dp float conversions, destination
+(define_mode_attr VSr5	[(SF	"ws")
+			 (DF	"f")
+			 (V2DF  "v")
+			 (V4SF	"wd")])
+
+;; Same size integer type for floating point data
+(define_mode_attr VSi [(V4SF  "v4si")
+		       (V2DF  "v2di")
+		       (DF    "di")])
+
+(define_mode_attr VSI [(V4SF  "V4SI")
+		       (V2DF  "V2DI")
+		       (DF    "DI")])
+
+;; Word size for same size conversion
+(define_mode_attr VSc [(V4SF "w")
+		       (V2DF "d")
+		       (DF   "d")])
+
+;; Map into either s or v, depending on whether this is a scalar or vector
+;; operation
+(define_mode_attr VSv	[(V16QI "v")
+			 (V8HI  "v")
+			 (V4SI  "v")
+			 (V4SF  "v")
+			 (V2DI  "v")
+			 (V2DF  "v")
+			 (V1TI  "v")
+			 (DF    "s")])
+
+;; Appropriate type for add ops (and other simple FP ops)
+(define_mode_attr VStype_simple	[(V2DF "vecdouble")
+				 (V4SF "vecfloat")
+				 (DF   "fp")])
+
+(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
+				   (V4SF "fp_addsub_s")
+				   (DF   "fp_addsub_d")])
+
+;; Appropriate type for multiply ops
+(define_mode_attr VStype_mul	[(V2DF "vecdouble")
+				 (V4SF "vecfloat")
+				 (DF   "dmul")])
+
+(define_mode_attr VSfptype_mul	[(V2DF "fp_mul_d")
+				 (V4SF "fp_mul_s")
+				 (DF   "fp_mul_d")])
+
+;; Appropriate type for divide ops.
+(define_mode_attr VStype_div	[(V2DF "vecdiv")
+				 (V4SF "vecfdiv")
+				 (DF   "ddiv")])
+
+(define_mode_attr VSfptype_div	[(V2DF "fp_div_d")
+				 (V4SF "fp_div_s")
+				 (DF   "fp_div_d")])
+
+;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
+;; the scalar sqrt
+(define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
+				 (V4SF "ssqrt")
+				 (DF   "dsqrt")])
+
+(define_mode_attr VSfptype_sqrt	[(V2DF "fp_sqrt_d")
+				 (V4SF "fp_sqrt_s")
+				 (DF   "fp_sqrt_d")])
+
+;; Iterator and modes for sp<->dp conversions
+;; Because scalar SF values are represented internally as double, use the
+;; V4SF type to represent this than SF.
+(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
+
+(define_mode_attr VS_spdp_res [(DF	"V4SF")
+			       (V4SF	"V2DF")
+			       (V2DF	"V4SF")])
+
+(define_mode_attr VS_spdp_insn [(DF	"xscvdpsp")
+				(V4SF	"xvcvspdp")
+				(V2DF	"xvcvdpsp")])
+
+(define_mode_attr VS_spdp_type [(DF	"fp")
+				(V4SF	"vecdouble")
+				(V2DF	"vecdouble")])
+
+;; Map the scalar mode for a vector type
+(define_mode_attr VS_scalar [(V1TI	"TI")
+			     (V2DF	"DF")
+			     (V2DI	"DI")
+			     (V4SF	"SF")
+			     (V4SI	"SI")
+			     (V8HI	"HI")
+			     (V16QI	"QI")])
+
+;; Map to a double-sized vector mode
+(define_mode_attr VS_double [(V4SI	"V8SI")
+			     (V4SF	"V8SF")
+			     (V2DI	"V4DI")
+			     (V2DF	"V4DF")
+			     (V1TI	"V2TI")])
+
+;; Constants for creating unspecs
+(define_c_enum "unspec"
+  [UNSPEC_VSX_CONCAT
+   UNSPEC_VSX_CVDPSXWS
+   UNSPEC_VSX_CVDPUXWS
+   UNSPEC_VSX_CVSPDP
+   UNSPEC_VSX_CVSPDPN
+   UNSPEC_VSX_CVDPSPN
+   UNSPEC_VSX_CVSXWDP
+   UNSPEC_VSX_CVUXWDP
+   UNSPEC_VSX_CVSXDSP
+   UNSPEC_VSX_CVUXDSP
+   UNSPEC_VSX_CVSPSXDS
+   UNSPEC_VSX_CVSPUXDS
+   UNSPEC_VSX_TDIV
+   UNSPEC_VSX_TSQRT
+   UNSPEC_VSX_SET
+   UNSPEC_VSX_ROUND_I
+   UNSPEC_VSX_ROUND_IC
+   UNSPEC_VSX_SLDWI
+   UNSPEC_VSX_XXSPLTW
+  ])
+
+;; VSX moves
+
+;; The patterns for LE permuted loads and stores come before the general
+;; VSX moves so they match first.
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (match_operand:VSX_D 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 1) (const_int 0)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+        (match_operand:VSX_W 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*vsx_le_perm_load_v8hi"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (match_operand:V8HI 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 2)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*vsx_le_perm_load_v16qi"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (match_operand:V16QI 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 2)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
+
+(define_insn "*vsx_le_perm_store_<mode>"
+  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+        (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 1) (const_int 0)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_<mode>"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+        (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))
+   (set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_v8hi"
+  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:V8HI 0 "memory_operand" "")
+        (match_operand:V8HI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 2)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:V8HI 0 "memory_operand" "")
+        (match_operand:V8HI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 1)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_v16qi"
+  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+        (match_operand:V16QI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 2)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+        (match_operand:V16QI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 1)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "")
+
+
+(define_insn "*vsx_mov<mode>"
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+   (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
+
+;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
+;; use of TImode is for unions.  However for plain data movement, slightly
+;; favor the vector loads
+(define_insn "*vsx_movti_64bit"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+	(match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
+   && (register_operand (operands[0], TImode) 
+       || register_operand (operands[1], TImode))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+   (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
+
+(define_insn "*vsx_movti_32bit"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v,r,r,    Q,    Y,    r,n"))]
+  "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
+   && (register_operand (operands[0], TImode)
+       || register_operand (operands[1], TImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "stxvd2x %x1,%y0";
+
+    case 1:
+      return "lxvd2x %x0,%y1";
+
+    case 2:
+      return "xxlor %x0,%x1,%x1";
+
+    case 3:
+      return "xxlxor %x0,%x0,%x0";
+
+    case 4:
+      return output_vec_const_move (operands);
+
+    case 5:
+      return "stvx %1,%y0";
+
+    case 6:
+      return "lvx %0,%y1";
+
+    case 7:
+      if (TARGET_STRING)
+        return \"stswi %1,%P0,16\";
+
+    case 8:
+      return \"#\";
+
+    case 9:
+      /* If the address is not used in the output, we can use lsi.  Otherwise,
+	 fall through to generating four loads.  */
+      if (TARGET_STRING
+          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+	return \"lswi %0,%P1,16\";
+      /* ... fall through ...  */
+
+    case 10:
+    case 11:
+    case 12:
+      return \"#\";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
+   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,      16,      16,     16,     16,16,16")
+   (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
+   			                  (const_string "always")
+					  (const_string "conditional")))])
+
+;; Explicit  load/store expanders for the builtin functions
+(define_expand "vsx_load_<mode>"
+  [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
+	(match_operand:VSX_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vsx_store_<mode>"
+  [(set (match_operand:VSX_M 0 "memory_operand" "")
+	(match_operand:VSX_M 1 "vsx_register_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "")
+
+
+;; VSX vector floating point arithmetic instructions.  The VSX scalar
+;; instructions are now combined with the insn for the traditional floating
+;; point unit.
+(define_insn "*vsx_add<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvadd<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_sub<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvsub<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_mul<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvmul<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_div<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvdiv<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_div>")
+   (set_attr "fp_type" "<VSfptype_div>")])
+
+;; *tdiv* instruction returning the FG flag
+(define_expand "vsx_tdiv<mode>3_fg"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
+		      (match_operand:VSX_B 2 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TDIV))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(gt:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+;; *tdiv* instruction returning the FE flag
+(define_expand "vsx_tdiv<mode>3_fe"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
+		      (match_operand:VSX_B 2 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TDIV))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(eq:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+(define_insn "*vsx_tdiv<mode>3_internal"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		      (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+		   UNSPEC_VSX_TDIV))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>tdiv<VSs> %0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fre<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRES))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvre<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_neg<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvneg<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_abs<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvabs<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_nabs<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_F
+	 (abs:VSX_F
+	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvnabs<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_smax<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvmax<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_smin<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvmin<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_sqrt<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvsqrt<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_sqrt>")
+   (set_attr "fp_type" "<VSfptype_sqrt>")])
+
+(define_insn "*vsx_rsqrte<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_RSQRT))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvrsqrte<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; *tsqrt* returning the fg flag
+(define_expand "vsx_tsqrt<mode>2_fg"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TSQRT))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(gt:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+;; *tsqrt* returning the fe flag
+(define_expand "vsx_tsqrt<mode>2_fe"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TSQRT))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(eq:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+(define_insn "*vsx_tsqrt<mode>2_internal"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		     UNSPEC_VSX_TSQRT))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>tsqrt<VSs> %0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Fused vector multiply/add instructions. Support the classical Altivec
+;; versions of fma, which allows the target to be a separate register from the
+;; 3 inputs.  Under VSX, the target must be either the addend or the first
+;; multiply.
+
+(define_insn "*vsx_fmav4sf4"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
+	(fma:V4SF
+	  (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
+	  (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
+	  (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "@
+   xvmaddasp %x0,%x1,%x2
+   xvmaddmsp %x0,%x1,%x3
+   xvmaddasp %x0,%x1,%x2
+   xvmaddmsp %x0,%x1,%x3
+   vmaddfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*vsx_fmav2df4"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
+	(fma:V2DF
+	  (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
+	  (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
+	  (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "@
+   xvmaddadp %x0,%x1,%x2
+   xvmaddmdp %x0,%x1,%x3
+   xvmaddadp %x0,%x1,%x2
+   xvmaddmdp %x0,%x1,%x3"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "*vsx_fms<mode>4"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(fma:VSX_F
+	  (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (neg:VSX_F
+	    (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "@
+   xvmsuba<VSs> %x0,%x1,%x2
+   xvmsubm<VSs> %x0,%x1,%x3
+   xvmsuba<VSs> %x0,%x1,%x2
+   xvmsubm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")])
+
+(define_insn "*vsx_nfma<mode>4"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(neg:VSX_F
+	 (fma:VSX_F
+	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "@
+   xvnmadda<VSs> %x0,%x1,%x2
+   xvnmaddm<VSs> %x0,%x1,%x3
+   xvnmadda<VSs> %x0,%x1,%x2
+   xvnmaddm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_nfmsv4sf4"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
+	(neg:V4SF
+	 (fma:V4SF
+	   (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
+	   (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
+	   (neg:V4SF
+	     (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "@
+   xvnmsubasp %x0,%x1,%x2
+   xvnmsubmsp %x0,%x1,%x3
+   xvnmsubasp %x0,%x1,%x2
+   xvnmsubmsp %x0,%x1,%x3
+   vnmsubfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*vsx_nfmsv2df4"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
+	(neg:V2DF
+	 (fma:V2DF
+	   (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
+	   (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
+	   (neg:V2DF
+	     (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "@
+   xvnmsubadp %x0,%x1,%x2
+   xvnmsubmdp %x0,%x1,%x3
+   xvnmsubadp %x0,%x1,%x2
+   xvnmsubmdp %x0,%x1,%x3"
+  [(set_attr "type" "vecdouble")])
+
+;; Vector conditional expressions (no scalar version for these instructions)
+(define_insn "vsx_eq<mode>"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpeq<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_gt<mode>"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpgt<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_ge<mode>"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpge<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Compare vectors producing a vector result and a predicate, setting CR6 to
+;; indicate a combined status
+(define_insn "*vsx_eq_<mode>_p"
+  [(set (reg:CC 74)
+	(unspec:CC
+	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+	 UNSPEC_PREDICATE))
+   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(eq:VSX_F (match_dup 1)
+		  (match_dup 2)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpeq<VSs>. %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")])
+
+(define_insn "*vsx_gt_<mode>_p"
+  [(set (reg:CC 74)
+	(unspec:CC
+	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+	 UNSPEC_PREDICATE))
+   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(gt:VSX_F (match_dup 1)
+		  (match_dup 2)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpgt<VSs>. %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")])
+
+(define_insn "*vsx_ge_<mode>_p"
+  [(set (reg:CC 74)
+	(unspec:CC
+	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+	 UNSPEC_PREDICATE))
+   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(ge:VSX_F (match_dup 1)
+		  (match_dup 2)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpge<VSs>. %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")])
+
+;; Vector select
+(define_insn "*vsx_xxsel<mode>"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+	(if_then_else:VSX_L
+	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+		(match_operand:VSX_L 4 "zero_constant" ""))
+	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxsel %x0,%x3,%x2,%x1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxsel<mode>_uns"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+	(if_then_else:VSX_L
+	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+		   (match_operand:VSX_L 4 "zero_constant" ""))
+	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxsel %x0,%x3,%x2,%x1"
+  [(set_attr "type" "vecperm")])
+
+;; Copy sign
+(define_insn "vsx_copysign<mode>3"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F
+	 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
+	 UNSPEC_COPYSIGN))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcpsgn<VSs> %x0,%x2,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; For the conversions, limit the register class for the integer value to be
+;; the fprs because we don't want to add the altivec registers to movdi/movsi.
+;; For the unsigned tests, there isn't a generic double -> unsigned conversion
+;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
+;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
+(define_insn "vsx_float<VSi><mode>2"
+  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
+	(float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cvsx<VSc><VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_floatuns<VSi><mode>2"
+  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
+	(unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cvux<VSc><VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fix_trunc<mode><VSi>2"
+  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
+	(fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fixuns_trunc<mode><VSi>2"
+  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
+	(unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Math rounding functions
+(define_insn "vsx_x<VSv>r<VSs>i"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_VSX_ROUND_I))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>i %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_x<VSv>r<VSs>ic"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_VSX_ROUND_IC))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>ic %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_btrunc<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvr<VSs>iz %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_b2trunc<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRIZ))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>iz %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_floor<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRIM))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvr<VSs>im %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_ceil<mode>2"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRIP))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvr<VSs>ip %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+
+;; VSX convert to/from double vector
+
+;; Convert between single and double precision
+;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
+;; scalar single precision instructions internally use the double format.
+;; Prefer the altivec registers, since we likely will need to do a vperm
+(define_insn "vsx_<VS_spdp_insn>"
+  [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa")
+	(unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")]
+			      UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "<VS_spdp_insn> %x0,%x1"
+  [(set_attr "type" "<VS_spdp_type>")])
+
+;; xscvspdp, represent the scalar SF type as V4SF
+(define_insn "vsx_xscvspdp"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "xscvspdp %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
+;; format of scalars is actually DF.
+(define_insn "vsx_xscvdpsp_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "xscvdpsp %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Same as vsx_xscvspdp, but use SF as the type
+(define_insn "vsx_xscvspdp_scalar2"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
+	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "xscvspdp %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Convert from 64-bit to 32-bit types
+;; Note, favor the Altivec registers since the usual use of these instructions
+;; is in vector converts and we need to use the Altivec vperm instruction.
+
+(define_insn "vsx_xvcvdpsxws"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSXWS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpsxws %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "vsx_xvcvdpuxws"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPUXWS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpuxws %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "vsx_xvcvsxdsp"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVSXDSP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvsxdsp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxdsp"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVUXDSP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvuxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+;; Convert from 32-bit to 64-bit types
+(define_insn "vsx_xvcvsxwdp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVSXWDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvsxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "vsx_xvcvuxwdp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVUXWDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvuxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "vsx_xvcvspsxds"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVSPSXDS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvspsxds %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "vsx_xvcvspuxds"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVSPUXDS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvspuxds %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
+;; since the xsrdpiz instruction does not truncate the value if the floating
+;; point value is < LONG_MIN or > LONG_MAX.
+(define_insn "*vsx_float_fix_<mode>2"
+  [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?wa")
+	(float:VSX_DF
+	 (fix:<VSI>
+	  (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?wa"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
+   && !flag_trapping_math && TARGET_FRIZ"
+  "x<VSv>r<VSs>iz %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+
+;; Permute operations
+
+;; Build a V2DF/V2DI vector from two scalars
+(define_insn "vsx_concat_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
+	(vec_concat:VSX_D
+	 (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
+	 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "xxpermdi %x0,%x1,%x2,0";
+  else
+    return "xxpermdi %x0,%x2,%x1,0";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Special purpose concat using xxpermdi to glue two single precision values
+;; together, relying on the fact that internally scalar floats are represented
+;; as doubles.  This is used to initialize a V4SF vector with 4 floats
+(define_insn "vsx_concat_v2sf"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V2DF
+	 [(match_operand:SF 1 "vsx_register_operand" "f,f")
+	  (match_operand:SF 2 "vsx_register_operand" "f,f")]
+	 UNSPEC_VSX_CONCAT))]
+  "VECTOR_MEM_VSX_P (V2DFmode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "xxpermdi %x0,%x1,%x2,0";
+  else
+    return "xxpermdi %x0,%x2,%x1,0";
+}
+  [(set_attr "type" "vecperm")])
+
+;; xxpermdi for little endian loads and stores.  We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_xxpermdi2_le_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_D
+          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi4_le_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_W
+          (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi8_le_V8HI"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI
+          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi16_le_V16QI"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (vec_select:V16QI
+          (match_operand:V16QI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+;; lxvd2x for little endian loads.  We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_lxvd2x2_le_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_D
+          (match_operand:VSX_D 1 "memory_operand" "Z")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x4_le_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_W
+          (match_operand:VSX_W 1 "memory_operand" "Z")
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x8_le_V8HI"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI
+          (match_operand:V8HI 1 "memory_operand" "Z")
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x16_le_V16QI"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (vec_select:V16QI
+          (match_operand:V16QI 1 "memory_operand" "Z")
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+;; stxvd2x for little endian stores.  We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_stxvd2x2_le_<mode>"
+  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+        (vec_select:VSX_D
+          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x4_le_<mode>"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+        (vec_select:VSX_W
+          (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x8_le_V8HI"
+  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+        (vec_select:V8HI
+          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x16_le_V16QI"
+  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+        (vec_select:V16QI
+          (match_operand:V16QI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+;; Convert a TImode value into V1TImode
+(define_expand "vsx_set_v1ti"
+  [(match_operand:V1TI 0 "nonimmediate_operand" "")
+   (match_operand:V1TI 1 "nonimmediate_operand" "")
+   (match_operand:TI 2 "input_operand" "")
+   (match_operand:QI 3 "u5bit_cint_operand" "")]
+  "VECTOR_MEM_VSX_P (V1TImode)"
+{
+  if (operands[3] != const0_rtx)
+    gcc_unreachable ();
+
+  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
+  DONE;
+})
+
+;; Set the element of a V2DI/VD2F mode
+(define_insn "vsx_set_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa")
+		       (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")
+		       (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
+		      UNSPEC_VSX_SET))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
+  if (INTVAL (operands[3]) == idx_first)
+    return \"xxpermdi %x0,%x2,%x1,1\";
+  else if (INTVAL (operands[3]) == 1 - idx_first)
+    return \"xxpermdi %x0,%x1,%x2,0\";
+  else
+    gcc_unreachable ();
+}
+  [(set_attr "type" "vecperm")])
+
+;; Extract a DF/DI element from V2DF/V2DI
+(define_insn "vsx_extract_<mode>"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
+		       (parallel
+			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  int fldDM;
+  gcc_assert (UINTVAL (operands[2]) <= 1);
+  fldDM = INTVAL (operands[2]) << 1;
+  if (!BYTES_BIG_ENDIAN)
+    fldDM = 3 - fldDM;
+  operands[3] = GEN_INT (fldDM);
+  return \"xxpermdi %x0,%x1,%x1,%3\";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Optimize extracting element 0 from memory
+(define_insn "*vsx_extract_<mode>_zero"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 0)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
+   (set_attr "length" "4")])  
+
+;; Optimize extracting element 1 from memory for little endian
+(define_insn "*vsx_extract_<mode>_one_le"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 1)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
+   (set_attr "length" "4")])  
+
+;; Extract a SF element from V4SF
+(define_insn_and_split "vsx_extract_v4sf"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
+	(vec_select:SF
+	 (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
+	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
+   (clobber (match_scratch:V4SF 3 "=X,0"))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "@
+   xscvspdp %x0,%x1
+   #"
+  ""
+  [(const_int 0)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+  rtx tmp;
+  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
+
+  if (ele == 0)
+    tmp = op1;
+  else
+    {
+      if (GET_CODE (op3) == SCRATCH)
+	op3 = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
+      tmp = op3;
+    }
+  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
+  DONE;
+}"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "fp")])
+
+;; Expand the builtin form of xxpermdi to canonical rtl.
+(define_expand "vsx_xxpermdi_<mode>"
+  [(match_operand:VSX_L 0 "vsx_register_operand" "")
+   (match_operand:VSX_L 1 "vsx_register_operand" "")
+   (match_operand:VSX_L 2 "vsx_register_operand" "")
+   (match_operand:QI 3 "u5bit_cint_operand" "")]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  int mask = INTVAL (operands[3]);
+  rtx perm0 = GEN_INT ((mask >> 1) & 1);
+  rtx perm1 = GEN_INT ((mask & 1) + 2);
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
+
+  if (<MODE>mode == V2DFmode)
+    gen = gen_vsx_xxpermdi2_v2df_1;
+  else
+    {
+      gen = gen_vsx_xxpermdi2_v2di_1;
+      if (<MODE>mode != V2DImode)
+	{
+	  target = gen_lowpart (V2DImode, target);
+	  op0 = gen_lowpart (V2DImode, op0);
+	  op1 = gen_lowpart (V2DImode, op1);
+	}
+    }
+  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
+     transformation we don't want; it is necessary for
+     rs6000_expand_vec_perm_const_1 but not for this use.  So we
+     prepare for that by reversing the transformation here.  */
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen (target, op0, op1, perm0, perm1));
+  else
+    {
+      rtx p0 = GEN_INT (3 - INTVAL (perm1));
+      rtx p1 = GEN_INT (3 - INTVAL (perm0));
+      emit_insn (gen (target, op1, op0, p0, p1));
+    }
+  DONE;
+})
+
+(define_insn "vsx_xxpermdi2_<mode>_1"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
+	(vec_select:VSX_D
+	  (vec_concat:<VS_double>
+	    (match_operand:VSX_D 1 "vsx_register_operand" "wd")
+	    (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
+		     (match_operand 4 "const_2_to_3_operand" "")])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  int op3, op4, mask;
+
+  /* For little endian, swap operands and invert/swap selectors
+     to get the correct xxpermdi.  The operand swap sets up the
+     inputs as a little endian array.  The selectors are swapped
+     because they are defined to use big endian ordering.  The
+     selectors are inverted to get the correct doublewords for
+     little endian ordering.  */
+  if (BYTES_BIG_ENDIAN)
+    {
+      op3 = INTVAL (operands[3]);
+      op4 = INTVAL (operands[4]);
+    }
+  else
+    {
+      op3 = 3 - INTVAL (operands[4]);
+      op4 = 3 - INTVAL (operands[3]);
+    }
+
+  mask = (op3 << 1) | (op4 - 2);
+  operands[3] = GEN_INT (mask);
+
+  if (BYTES_BIG_ENDIAN)
+    return "xxpermdi %x0,%x1,%x2,%3";
+  else
+    return "xxpermdi %x0,%x2,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VSX_D 0 "vsx_register_operand" "")
+   (match_operand:VSX_D 1 "vsx_register_operand" "")
+   (match_operand:VSX_D 2 "vsx_register_operand" "")
+   (match_operand:V2DI  3 "" "")]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  if (rs6000_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Expanders for builtins
+(define_expand "vsx_mergel_<mode>"
+  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_expand "vsx_mergeh_<mode>"
+  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+;; V2DF/V2DI splat
+(define_insn "vsx_splat_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
+	(vec_duplicate:VSX_D
+	 (match_operand:<VS_scalar> 1 "splat_input_operand" "ws,f,Z,wa,wa,Z")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxpermdi %x0,%x1,%x1,0
+   xxpermdi %x0,%x1,%x1,0
+   lxvdsx %x0,%y1
+   xxpermdi %x0,%x1,%x1,0
+   xxpermdi %x0,%x1,%x1,0
+   lxvdsx %x0,%y1"
+  [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
+
+;; V4SF/V4SI splat
+(define_insn "vsx_xxspltw_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+	(vec_duplicate:VSX_W
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+	  (parallel
+	   [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  return "xxspltw %x0,%x1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vsx_xxspltw_<mode>_direct"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+                       (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
+                      UNSPEC_VSX_XXSPLTW))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxspltw %x0,%x1,%2"
+  [(set_attr "type" "vecperm")])
+
+;; V4SF/V4SI interleave
+(define_insn "vsx_xxmrghw_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+        (vec_select:VSX_W
+	  (vec_concat:<VS_double>
+	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxmrghw %x0,%x1,%x2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vsx_xxmrglw_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+	(vec_select:VSX_W
+	  (vec_concat:<VS_double>
+	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxmrglw %x0,%x1,%x2"
+  [(set_attr "type" "vecperm")])
+
+;; Shift left double by word immediate
+(define_insn "vsx_xxsldwi_<mode>"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
+	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
+		       (match_operand:VSX_L 2 "vsx_register_operand" "wa")
+		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
+		      UNSPEC_VSX_SLDWI))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxsldwi %x0,%x1,%x2,%3"
+  [(set_attr "type" "vecperm")])
+
+
+;; Vector reduction insns and splitters
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
+  [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
+	(VEC_reduc:V2DF
+	 (vec_concat:V2DF
+	  (vec_select:DF
+	   (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
+	   (parallel [(const_int 1)]))
+	  (vec_select:DF
+	   (match_dup 1)
+	   (parallel [(const_int 0)])))
+	 (match_dup 1)))
+   (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "#"
+  ""
+  [(const_int 0)]
+  "
+{
+  rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
+	     ? gen_reg_rtx (V2DFmode)
+	     : operands[2];
+  emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
+  emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "veccomplex")])
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
+  [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
+	(VEC_reduc:V4SF
+	 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+	 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
+   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
+   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "#"
+  ""
+  [(const_int 0)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp2, tmp3, tmp4;
+
+  if (can_create_pseudo_p ())
+    {
+      tmp2 = gen_reg_rtx (V4SFmode);
+      tmp3 = gen_reg_rtx (V4SFmode);
+      tmp4 = gen_reg_rtx (V4SFmode);
+    }
+  else
+    {
+      tmp2 = operands[2];
+      tmp3 = operands[3];
+      tmp4 = tmp2;
+    }
+
+  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
+  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
+  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
+  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
+  DONE;
+}"
+  [(set_attr "length" "16")
+   (set_attr "type" "veccomplex")])
+
+;; Combiner patterns with the vector reduction patterns that knows we can get
+;; to the top element of the V2DF array without doing an extract.
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
+  [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
+	(vec_select:DF
+	 (VEC_reduc:V2DF
+	  (vec_concat:V2DF
+	   (vec_select:DF
+	    (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
+	    (parallel [(const_int 1)]))
+	   (vec_select:DF
+	    (match_dup 1)
+	    (parallel [(const_int 0)])))
+	  (match_dup 1))
+	 (parallel [(const_int 1)])))
+   (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "#"
+  ""
+  [(const_int 0)]
+  "
+{
+  rtx hi = gen_highpart (DFmode, operands[1]);
+  rtx lo = (GET_CODE (operands[2]) == SCRATCH)
+	    ? gen_reg_rtx (DFmode)
+	    : operands[2];
+
+  emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
+  emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "veccomplex")])
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
+  [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
+	(vec_select:SF
+	 (VEC_reduc:V4SF
+	  (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+	  (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
+	 (parallel [(const_int 3)])))
+   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
+   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
+   (clobber (match_scratch:V4SF 4 "=0,0"))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "#"
+  ""
+  [(const_int 0)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp2, tmp3, tmp4, tmp5;
+
+  if (can_create_pseudo_p ())
+    {
+      tmp2 = gen_reg_rtx (V4SFmode);
+      tmp3 = gen_reg_rtx (V4SFmode);
+      tmp4 = gen_reg_rtx (V4SFmode);
+      tmp5 = gen_reg_rtx (V4SFmode);
+    }
+  else
+    {
+      tmp2 = operands[2];
+      tmp3 = operands[3];
+      tmp4 = tmp2;
+      tmp5 = operands[4];
+    }
+
+  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
+  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
+  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
+  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
+  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
+  DONE;
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "veccomplex")])
+
+
+;; Power8 Vector fusion.  The fused ops must be physically adjacent.
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_dup 0)
+			    (match_operand:P 3 "int_reg_operand" ""))))]
+  "TARGET_VSX && TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
+			    (match_dup 0))))]
+  "TARGET_VSX && TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
diff --git a/gcc-4.9/gcc/config/rs6000/vxworks.h b/gcc-4.9/gcc/config/rs6000/vxworks.h
new file mode 100644
index 000000000..7b4e9b9eb
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/vxworks.h
@@ -0,0 +1,133 @@
+/* Definitions of target machine for GNU compiler.  Vxworks PowerPC version.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Note to future editors: VxWorks is mostly an EABI target.  We do
+   not use rs6000/eabi.h because we would have to override most of
+   it anyway.  However, if you change that file, consider making
+   analogous changes here too.  */
+
+/* CPP predefined macros.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__ppc");			\
+      builtin_define ("__PPC__");		\
+      builtin_define ("__EABI__");		\
+      builtin_define ("__ELF__");		\
+      if (!TARGET_SOFT_FLOAT)			\
+	builtin_define ("__hardfp");		\
+						\
+      /* C89 namespace violation! */		\
+      builtin_define ("CPU_FAMILY=PPC");	\
+        					\
+      VXWORKS_OS_CPP_BUILTINS ();		\
+    }		\
+  while (0)
+
+/* Only big endian PPC is supported by VxWorks.  */
+#undef BYTES_BIG_ENDIAN
+#define BYTES_BIG_ENDIAN 1
+#undef WORDS_BIG_ENDIAN
+#define WORDS_BIG_ENDIAN 1
+
+/* We have to kill off the entire specs set created by rs6000/sysv4.h
+   and substitute our own set.  The top level vxworks.h has done some
+   of this for us.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#undef CPP_SPEC
+#undef CC1_SPEC
+#undef ASM_SPEC
+
+#define SUBTARGET_EXTRA_SPECS /* none needed */
+
+/* FIXME: The only reason we allow no -mcpu switch at all is because
+   config-ml.in insists on a "." multilib. */
+#define CPP_SPEC \
+"%{!DCPU=*:		  \
+   %{mcpu=403 : -DCPU=PPC403  ; \
+     mcpu=405 : -DCPU=PPC405  ; \
+     mcpu=440 : -DCPU=PPC440  ; \
+     mcpu=464 : -DCPU=PPC464  ; \
+     mcpu=476 : -DCPU=PPC476  ; \
+     mcpu=603 : -DCPU=PPC603  ; \
+     mcpu=604 : -DCPU=PPC604  ; \
+     mcpu=860 : -DCPU=PPC860  ; \
+     mcpu=8540: -DCPU=PPC85XX ; \
+              : -DCPU=PPC604  }}" \
+VXWORKS_ADDITIONAL_CPP_SPEC
+
+#define CC1_SPEC						\
+"%{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default}	\
+ %{mlittle|mlittle-endian:-mstrict-align}"
+
+#define ASM_SPEC \
+"%(asm_cpu) \
+ %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+ %{mrelocatable} %{mrelocatable-lib} %{fpic:-K PIC} %{fPIC:-K PIC} -mbig"
+
+#undef  LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef  LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_EABI | MASK_STRICT_ALIGN)
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604
+
+/* Nor sdata, for kernel mode.  We use this in
+   SUBSUBTARGET_INITIALIZE_OPTIONS, after rs6000_rtp has been initialized.  */
+#undef SDATA_DEFAULT_SIZE
+#define SDATA_DEFAULT_SIZE (TARGET_VXWORKS_RTP ? 8 : 0)
+
+/* Enforce 16bytes alignment for the stack pointer, to permit general
+   compliance with e.g. Altivec instructions requirements.  Make sure
+   this isn't overruled by the EABI constraints.  */
+
+#undef  STACK_BOUNDARY
+#define STACK_BOUNDARY (16*BITS_PER_UNIT)
+
+#undef  PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
+
+#undef  ABI_STACK_BOUNDARY
+
+#undef SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS		\
+  do {						\
+  if (!global_options_set.x_g_switch_value)	\
+    g_switch_value = SDATA_DEFAULT_SIZE;	\
+  VXWORKS_OVERRIDE_OPTIONS;			\
+  } while (0)
+
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
diff --git a/gcc-4.9/gcc/config/rs6000/x-aix b/gcc-4.9/gcc/config/rs6000/x-aix
new file mode 100644
index 000000000..d40690f2d
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/x-aix
@@ -0,0 +1,6 @@
+# genautomata requires more than 256MB of data
+build/genautomata : override LDFLAGS += -Wl,-bmaxdata:0x20000000
+
+# jc1 requires more than 256MB of data
+$(COMPILERS) : override LDFLAGS += -Wl,-bmaxdata:0x40000000
+
diff --git a/gcc-4.9/gcc/config/rs6000/x-darwin b/gcc-4.9/gcc/config/rs6000/x-darwin
new file mode 100644
index 000000000..9d92ef547
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/x-darwin
@@ -0,0 +1,3 @@
+host-ppc-darwin.o : $(srcdir)/config/rs6000/host-darwin.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/rs6000/x-darwin64 b/gcc-4.9/gcc/config/rs6000/x-darwin64
new file mode 100644
index 000000000..093277147
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/x-darwin64
@@ -0,0 +1,3 @@
+host-ppc64-darwin.o : $(srcdir)/config/rs6000/host-ppc64-darwin.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/rs6000/x-linux-relax b/gcc-4.9/gcc/config/rs6000/x-linux-relax
new file mode 100644
index 000000000..2743a94e4
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/x-linux-relax
@@ -0,0 +1,2 @@
+# At -O0 cc1 etc. are too large and -Wl,--relax is needed
+$(COMPILERS) : override LDFLAGS += -Wl,--relax
diff --git a/gcc-4.9/gcc/config/rs6000/x-rs6000 b/gcc-4.9/gcc/config/rs6000/x-rs6000
new file mode 100644
index 000000000..9e31f24cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/x-rs6000
@@ -0,0 +1,3 @@
+driver-rs6000.o : $(srcdir)/config/rs6000/driver-rs6000.c \
+  $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/rs6000/xcoff.h b/gcc-4.9/gcc/config/rs6000/xcoff.h
new file mode 100644
index 000000000..f2b7bd07a
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/xcoff.h
@@ -0,0 +1,362 @@
+/* Definitions of target machine for GNU compiler,
+   for some generic XCOFF file format
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJECT_FORMAT OBJECT_XCOFF
+
+/* The RS/6000 uses the XCOFF format.  */
+#define XCOFF_DEBUGGING_INFO 1
+
+/* Define if the object format being used is COFF or a superset.  */
+#define OBJECT_FORMAT_COFF
+
+/* Define the magic numbers that we recognize as COFF.
+ 
+    AIX 4.3 adds U803XTOCMAGIC (0757) for 64-bit objects and AIX V5 adds
+    U64_TOCMAGIC (0767), but collect2.c does not include files in the
+    correct order to conditionally define the symbolic name in this macro.
+ 
+    The AIX linker accepts import/export files as object files,
+    so accept "#!" (0x2321) magic number.  */
+#define MY_ISCOFF(magic) \
+  ((magic) == U802WRMAGIC || (magic) == U802ROMAGIC \
+   || (magic) == U802TOCMAGIC || (magic) == 0757 || (magic) == 0767 \
+   || (magic) == 0x2321)
+
+/* We don't have GAS for the RS/6000 yet, so don't write out special
+    .stabs in cc1plus.  */
+
+#define FASCIST_ASSEMBLER
+
+/* We define this to prevent the name mangler from putting dollar signs into
+   function names.  */
+
+#define NO_DOLLAR_IN_LABEL
+
+/* We define this to 0 so that gcc will never accept a dollar sign in a
+   variable name.  This is needed because the AIX assembler will not accept
+   dollar signs.  */
+
+#define DOLLARS_IN_IDENTIFIERS 0
+
+/* AIX .align pseudo-op accept value from 0 to 12, corresponding to
+   log base 2 of the alignment in bytes; 12 = 4096 bytes = 32768 bits.  */
+
+#define MAX_OFILE_ALIGNMENT 32768
+
+/* Default alignment factor for csect directives, chosen to honor
+   BIGGEST_ALIGNMENT.  */
+#define XCOFF_CSECT_DEFAULT_ALIGNMENT_STR "4"
+
+/* Return nonzero if this entry is to be written into the constant
+   pool in a special way.  We do so if this is a SYMBOL_REF, LABEL_REF
+   or a CONST containing one of them.  If -mfp-in-toc (the default),
+   we also do this for floating-point constants.  We actually can only
+   do this if the FP formats of the target and host machines are the
+   same, but we can't check that since not every file that uses these
+   target macros includes real.h.  We also do this when we can write the
+   entry into the TOC and the entry is not larger than a TOC entry.  */
+
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (GET_CODE (X) == CONST_DOUBLE					\
+	   && (TARGET_MINIMAL_TOC					\
+	       || (SCALAR_FLOAT_MODE_P (GET_MODE (X))			\
+		   && ! TARGET_NO_FP_IN_TOC)))))
+
+#define TARGET_ASM_OUTPUT_ANCHOR  rs6000_xcoff_asm_output_anchor
+#define TARGET_ASM_GLOBALIZE_LABEL  rs6000_xcoff_asm_globalize_label
+#define TARGET_ASM_INIT_SECTIONS  rs6000_xcoff_asm_init_sections
+#define TARGET_ASM_RELOC_RW_MASK  rs6000_xcoff_reloc_rw_mask
+#define TARGET_ASM_NAMED_SECTION  rs6000_xcoff_asm_named_section
+#define TARGET_ASM_SELECT_SECTION  rs6000_xcoff_select_section
+#define TARGET_ASM_SELECT_RTX_SECTION  rs6000_xcoff_select_rtx_section
+#define TARGET_ASM_UNIQUE_SECTION  rs6000_xcoff_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+#define TARGET_STRIP_NAME_ENCODING  rs6000_xcoff_strip_name_encoding
+#define TARGET_SECTION_TYPE_FLAGS  rs6000_xcoff_section_type_flags
+#ifdef HAVE_AS_TLS
+#define TARGET_ENCODE_SECTION_INFO rs6000_xcoff_encode_section_info
+#endif
+
+/* FP save and restore routines.  */
+#define	SAVE_FP_PREFIX "._savef"
+#define SAVE_FP_SUFFIX ""
+#define	RESTORE_FP_PREFIX "._restf"
+#define RESTORE_FP_SUFFIX ""
+
+/* Function name to call to do profiling.  */
+#undef  RS6000_MCOUNT
+#define RS6000_MCOUNT ".__mcount"
+
+/* This outputs NAME to FILE up to the first null or '['.  */
+
+#define RS6000_OUTPUT_BASENAME(FILE, NAME) \
+  assemble_name ((FILE), (*targetm.strip_name_encoding) (NAME))
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+
+#define ASM_OUTPUT_LABEL(FILE,NAME)	\
+  do { RS6000_OUTPUT_BASENAME (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+/* This is how to output a command to make the user-level label named NAME
+   defined for reference from other files.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START rs6000_xcoff_file_start
+#define TARGET_ASM_FILE_END rs6000_xcoff_file_end
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false
+
+/* This macro produces the initial definition of a function name.
+   On the RS/6000, we need to place an extra '.' in the function name and
+   output the function descriptor.
+   Dollar signs are converted to underscores.
+
+   The csect for the function will have already been created when
+   text_section was selected.  We do have to go back to that csect, however.
+
+   The third and fourth parameters to the .function pseudo-op (16 and 044)
+   are placeholders which no longer have any use.  */
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL)		\
+{ char *buffer = (char *) alloca (strlen (NAME) + 1);		\
+  char *p;							\
+  int dollar_inside = 0;					\
+  strcpy (buffer, NAME);					\
+  p = strchr (buffer, '$');					\
+  while (p) {							\
+    *p = '_';							\
+    dollar_inside++;						\
+    p = strchr (p + 1, '$');					\
+  }								\
+  if (TREE_PUBLIC (DECL))					\
+    {								\
+      if (!RS6000_WEAK || !DECL_WEAK (decl))			\
+	{							\
+          if (dollar_inside) {					\
+              fprintf(FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME);	\
+              fprintf(FILE, "\t.rename %s,\"%s\"\n", buffer, NAME);	\
+	    }							\
+	  fputs ("\t.globl .", FILE);				\
+	  RS6000_OUTPUT_BASENAME (FILE, buffer);		\
+	  putc ('\n', FILE);					\
+	}							\
+    }								\
+  else								\
+    {								\
+      if (dollar_inside) {					\
+          fprintf(FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME);	\
+          fprintf(FILE, "\t.rename %s,\"%s\"\n", buffer, NAME);	\
+	}							\
+      fputs ("\t.lglobl .", FILE);				\
+      RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+      putc ('\n', FILE);					\
+    }								\
+  fputs ("\t.csect ", FILE);					\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", FILE);		\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (":\n", FILE);						\
+  fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", FILE);	\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (", TOC[tc0], 0\n", FILE);				\
+  in_section = NULL;						\
+  switch_to_section (function_section (DECL));			\
+  putc ('.', FILE);						\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (":\n", FILE);						\
+  if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (DECL))	\
+    xcoffout_declare_function (FILE, DECL, buffer);		\
+}
+
+/* Output a reference to SYM on FILE.  */
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \
+  rs6000_output_symbol_ref (FILE, SYM)
+
+/* This says how to output an external.
+   Dollar signs are converted to underscores.  */
+
+#undef  ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)				\
+{ char *buffer = (char *) alloca (strlen (NAME) + 1);			\
+  char *p;								\
+  rtx _symref = XEXP (DECL_RTL (DECL), 0);				\
+  int dollar_inside = 0;						\
+  strcpy (buffer, NAME);						\
+  p = strchr (buffer, '$');						\
+  while (p) {								\
+    *p = '_';								\
+    dollar_inside++;							\
+    p = strchr (p + 1, '$');						\
+  }									\
+  if (dollar_inside) {							\
+      fputs ("\t.extern .", FILE);					\
+      RS6000_OUTPUT_BASENAME (FILE, buffer);				\
+      putc ('\n', FILE);						\
+      fprintf(FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME);		\
+    }									\
+  if ((TREE_CODE (DECL) == VAR_DECL					\
+       || TREE_CODE (DECL) == FUNCTION_DECL)				\
+      && (NAME)[strlen (NAME) - 1] != ']')				\
+    {									\
+      XSTR (_symref, 0) = concat (XSTR (_symref, 0),			\
+				  (TREE_CODE (DECL) == FUNCTION_DECL	\
+				   ? "[DS]" : "[RW]"),			\
+				  NULL);				\
+    }									\
+}
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  asm_fprintf ((FILE), "%U%s", rs6000_xcoff_strip_dollar (NAME));
+
+/* This is how to output an internal label prefix.  rs6000.c uses this
+   when generating traceback tables.  */
+
+#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX)   \
+  fprintf (FILE, "%s..", PREFIX)
+
+/* This is how to output a label for a jump table.  Arguments are the same as
+   for (*targetm.asm_out.internal_label), except the insn for the jump table is
+   passed.  */
+
+#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN)	\
+{ ASM_OUTPUT_ALIGN (FILE, 2); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); }
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s..%u", rs6000_xcoff_strip_dollar (PREFIX), (unsigned) (NUM))
+
+/* This is how to output an assembler line to define N characters starting
+   at P to FILE.  */
+
+#define ASM_OUTPUT_ASCII(FILE, P, N)  output_ascii ((FILE), (P), (N))
+
+/* This is how to advance the location counter by SIZE bytes.  */
+
+#define SKIP_ASM_OP "\t.space "
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n", SKIP_ASM_OP, (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define COMMON_ASM_OP "\t.comm "
+
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)	\
+  do { fputs (COMMON_ASM_OP, (FILE));			\
+       RS6000_OUTPUT_BASENAME ((FILE), (NAME));		\
+       if ((ALIGN) > 32)				\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", (SIZE), \
+		  floor_log2 ((ALIGN) / BITS_PER_UNIT)); \
+       else if ((SIZE) > 4)				\
+         fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",3\n", (SIZE)); \
+       else						\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)); \
+  } while (0)
+
+/* This says how to output an assembler line
+   to define a local common symbol.
+   The assembler in AIX 6.1 and later supports an alignment argument.
+   For earlier releases of AIX, we try to maintain
+   alignment after preceding TOC section if it was aligned
+   for 64-bit mode.  */
+
+#define LOCAL_COMMON_ASM_OP "\t.lcomm "
+
+#if TARGET_AIX_VERSION >= 61
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)	\
+  do { fputs (LOCAL_COMMON_ASM_OP, (FILE));			\
+       RS6000_OUTPUT_BASENAME ((FILE), (NAME));			\
+       if ((ALIGN) > 32)					\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,%u\n",	\
+		  (SIZE), xcoff_bss_section_name,			\
+		  floor_log2 ((ALIGN) / BITS_PER_UNIT));		\
+       else if ((SIZE) > 4)					\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,3\n",	\
+		  (SIZE), xcoff_bss_section_name);		\
+       else							\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s\n",	\
+		  (SIZE), xcoff_bss_section_name);		\
+     } while (0)
+#endif
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  do { fputs (LOCAL_COMMON_ASM_OP, (FILE));		\
+       RS6000_OUTPUT_BASENAME ((FILE), (NAME));		\
+       fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s\n", \
+		(TARGET_32BIT ? (SIZE) : (ROUNDED)),	\
+		xcoff_bss_section_name);		\
+     } while (0)
+
+#ifdef HAVE_AS_TLS
+#define ASM_OUTPUT_TLS_COMMON(FILE, DECL, NAME, SIZE)	\
+  do { fputs(COMMON_ASM_OP, (FILE));			\
+       RS6000_OUTPUT_BASENAME ((FILE), (NAME));		\
+       fprintf ((FILE), "[UL],"HOST_WIDE_INT_PRINT_UNSIGNED"\n", \
+       (SIZE));						\
+  } while (0)
+#endif
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+#define SET_ASM_OP "\t.set "
+
+/* This is how we tell the assembler to equate two values.  */
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {	fprintf ((FILE), "%s", SET_ASM_OP);				\
+	RS6000_OUTPUT_BASENAME (FILE, LABEL1);				\
+	fprintf (FILE, ",");						\
+	RS6000_OUTPUT_BASENAME (FILE, LABEL2);				\
+	fprintf (FILE, "\n");						\
+  } while (0)
+
+/* Used by rs6000_assemble_integer, among others.  */
+#define DOUBLE_INT_ASM_OP "\t.llong\t"
+
+/* Output before instructions.  */
+#define TEXT_SECTION_ASM_OP "\t.csect .text[PR]"
+
+/* Output before writable data.  */
+#define DATA_SECTION_ASM_OP \
+  "\t.csect .data[RW]," XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
+
+
+/* Define to prevent DWARF2 unwind info in the data section rather
+   than in the .eh_frame section.  We do this because the AIX linker
+   would otherwise garbage collect these sections.  */
+#define EH_FRAME_IN_DATA_SECTION 1
diff --git a/gcc-4.9/gcc/config/rs6000/xfpu.h b/gcc-4.9/gcc/config/rs6000/xfpu.h
new file mode 100644
index 000000000..841800bc0
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/xfpu.h
@@ -0,0 +1,26 @@
+/* Definitions for Xilinx PowerPC 405/440 APU.
+
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Michael Eager (eager@eagercon.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Undefine definitions from rs6000.h. */
+#undef TARGET_XILINX_FPU 
+
+#define TARGET_XILINX_FPU  (rs6000_xilinx_fpu)
diff --git a/gcc-4.9/gcc/config/rs6000/xfpu.md b/gcc-4.9/gcc/config/rs6000/xfpu.md
new file mode 100644
index 000000000..b1e28b9fe
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/xfpu.md
@@ -0,0 +1,140 @@
+;; Scheduling description for the Xilinx PowerPC 405 APU Floating Point Unit.
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Michael Eager (eager@eagercon.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;----------------------------------------------------
+;; Xilinx APU FPU Pipeline Description
+;;
+;;  - attr 'type' and 'fp_type' should definitely
+;;    be cleaned up at some point in the future.
+;;    ddiv,sdiv,dmul,smul etc are quite confusing.
+;;    Should use consistent fp* attrs. 'fp_type'
+;;    should also go away, leaving us only with 'fp'
+;;
+;;----------------------------------------------------
+
+;; -------------------------------------------------------------------------
+;; Latencies
+;; Latest latency figures (all in FCB cycles). PowerPC to FPU frequency ratio
+;; assumed to be 1/2. (most common deployment)
+;; Add 2 PPC cycles for (register file access + wb) and 2 PPC cycles 
+;; for issue (from PPC)
+;;                          SP          DP
+;; Loads:                    4           6
+;; Stores:                   1           2      (from availability of data)
+;; Move/Abs/Neg:             1           1
+;; Add/Subtract:             5           7
+;; Multiply:                 4          11
+;; Multiply-add:            10          19
+;; Convert (any):            4           6
+;; Divide/Sqrt:             27          56
+;; Compares:                 1           2
+;;
+;; bypasses needed for forwarding capability of the FPU. 
+;; Add this at some future time.
+;; -------------------------------------------------------------------------
+(define_automaton "Xfpu")
+(define_cpu_unit "Xfpu_issue,Xfpu_addsub,Xfpu_mul,Xfpu_div,Xfpu_sqrt" "Xfpu")
+
+
+(define_insn_reservation "fp-default" 2
+  (and (and 
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_default"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2")
+
+(define_insn_reservation "fp-compare" 6
+  (and (eq_attr "type" "fpcompare")                     ;; Inconsistent naming
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_addsub")
+
+(define_insn_reservation "fp-addsub-s" 14
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_addsub_s"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_addsub")
+
+(define_insn_reservation "fp-addsub-d" 18
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_addsub_d"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_addsub")
+
+(define_insn_reservation "fp-mul-s" 12
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_mul_s"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul")
+
+(define_insn_reservation "fp-mul-d" 16    ;; Actually 28. Long latencies are killing the automaton formation. Need to figure out why.
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_mul_d"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul")
+
+(define_insn_reservation "fp-div-s" 24                   ;; Actually 34
+   (and (eq_attr "type" "sdiv")                          ;; Inconsistent attr naming
+        (eq_attr "cpu" "ppc405"))
+   "Xfpu_issue*2,Xfpu_div*10")                           ;; Unpipelined
+
+(define_insn_reservation "fp-div-d" 34                   ;; Actually 116
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc405"))                         ;; Inconsistent attr naming
+  "Xfpu_issue*2,Xfpu_div*10")                            ;; Unpipelined
+
+(define_insn_reservation "fp-maddsub-s" 24
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_maddsub_s"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul,nothing*7,Xfpu_addsub")
+
+(define_insn_reservation "fp-maddsub-d" 34              ;; Actually 42
+  (and (and
+        (eq_attr "type" "dmul")                         ;; Inconsistent attr naming
+        (eq_attr "fp_type" "fp_maddsub_d"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul,nothing*7,Xfpu_addsub")
+
+(define_insn_reservation "fp-load" 10                   ;; FIXME. Is double/single precision the same ?
+  (and (eq_attr "type" "fpload, fpload_ux, fpload_u")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*10")
+
+(define_insn_reservation "fp-store" 4 
+  (and (eq_attr "type" "fpstore, fpstore_ux, fpstore_u")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*4")
+
+(define_insn_reservation "fp-sqrt-s" 24         ;; Actually 56
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_sqrt*10")                  ;; Unpipelined
+
+
+(define_insn_reservation "fp-sqrt-d" 34         ;; Actually 116
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_sqrt*10")                  ;; Unpipelined
+
diff --git a/gcc-4.9/gcc/config/rs6000/xilinx.h b/gcc-4.9/gcc/config/rs6000/xilinx.h
new file mode 100644
index 000000000..7041208b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/xilinx.h
@@ -0,0 +1,47 @@
+/* Support for GCC on Xilinx embedded PowerPC systems
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Michael Eager, eager@eagercon.com
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Set defaults for Xilinx embedded target boards. */
+
+#undef  CPP_SPEC
+#define CPP_SPEC "\
+-mxilinx-fpu                                    \
+%{mfpu=sp_lite: -DHAVE_XFPU_SP_LITE}            \
+%{mfpu=sp_full: -DHAVE_XFPU_SP_FULL}            \
+%{mfpu=dp_lite: -DHAVE_XFPU_DP_LITE}            \
+%{mfpu=dp_full: -DHAVE_XFPU_DP_FULL}            \
+%{mfpu=*:   -DHAVE_XFPU}"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "\
+%{!nostdlib: --start-group -lxil -lc -lm --end-group   \
+%{mppcperflib: %{mfpu=*: -lppcstr405 -lgcc}            \
+%{!mfpu=*: -lppcstr405 -lppcfp -lgcc}}                 \
+%{!mppcperflib: -lgcc}}"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "\
+ecrti.o%s %{pg: %{!mno-clearbss: xil-pgcrt0.o%s} \
+%{mno-clearbss: xil-sim-pgcrt0.o%s}}            \
+%{!pg: %{!mno-clearbss: xil-crt0.o%s}           \
+%{mno-clearbss: xil-sim-crt0.o%s}} crtbegin.o%s"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "-T xilinx.ld%s"
diff --git a/gcc-4.9/gcc/config/rs6000/xilinx.opt b/gcc-4.9/gcc/config/rs6000/xilinx.opt
new file mode 100644
index 000000000..96d796212
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/xilinx.opt
@@ -0,0 +1,32 @@
+; Xilinx embedded PowerPC options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+mno-clearbss
+Target RejectNegative
+
+mppcperflib
+Target RejectNegative
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/rtems.h b/gcc-4.9/gcc/config/rtems.h
new file mode 100644
index 000000000..3da27c57e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rtems.h
@@ -0,0 +1,45 @@
+/* Configuration common to all targets running RTEMS. 
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The system headers under RTEMS are C++-aware.  */
+#undef NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/*
+ * Dummy start/end specification to let linker work as
+ * needed by autoconf scripts using this compiler.
+ */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC   ""
+
+/*
+ * Some targets do not set up LIB_SPECS, override it, here.
+ */
+#define STD_LIB_SPEC "%{!shared:%{g*:-lg} %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!qrtems: " STD_LIB_SPEC "} " \
+"%{!nostdlib: %{qrtems: --start-group \
+ -lrtemsbsp -lrtemscpu \
+ -lc -lgcc --end-group %{!qnolinkcmds: -T linkcmds%s}}}"
+
+#define TARGET_POSIX_IO
diff --git a/gcc-4.9/gcc/config/rtems.opt b/gcc-4.9/gcc/config/rtems.opt
new file mode 100644
index 000000000..4e96b258d
--- /dev/null
+++ b/gcc-4.9/gcc/config/rtems.opt
@@ -0,0 +1,35 @@
+; RTEMS options.
+
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+pthread
+Ignore
+
+qnolinkcmds
+Driver
+
+qrtems
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/rx/constraints.md b/gcc-4.9/gcc/config/rx/constraints.md
new file mode 100644
index 000000000..bf0edcc97
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/constraints.md
@@ -0,0 +1,108 @@
+;; Constraint definitions for Renesas RX.
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constraint "Symbol"
+  "@internal Constraint on the type of rtx allowed in call insns"
+  (match_test "GET_CODE (op) == SYMBOL_REF")
+)
+
+
+(define_constraint "Int08"
+  "@internal A signed or unsigned 8-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 8), (1 << 8) - 1)")
+  )
+)
+
+(define_constraint "Sint08"
+  "@internal A signed 8-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 7), (1 << 7) - 1)")
+  )
+)
+
+(define_constraint "Sint16"
+  "@internal A signed 16-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 15), (1 << 15) - 1)")
+  )
+)
+
+(define_constraint "Sint24"
+  "@internal A signed 24-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 23), (1 << 23) - 1)")
+  )
+)
+
+;; This constraint is used by the SUBSI3 pattern because the
+;; RX SUB instruction can only take a 4-bit unsigned integer
+;; value.  Also used by the MVTIPL instruction.
+(define_constraint "Uint04"
+  "@internal An unsigned 4-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")
+  )
+)
+
+(define_constraint "NEGint4"
+  "@internal An signed 4-bit negative immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -15, -1)")
+  )
+)
+
+;; This is used in arithmetic and logic instructions for
+;; a source operand that lies in memory and which satisfies
+;; rx_restricted_memory_address().
+
+(define_memory_constraint "Q"
+  "A MEM which only uses REG or REG+INT addressing."
+  (and (match_code "mem")
+       (ior (match_code "reg" "0")
+	    (and (match_code "plus" "0")
+	         (and (match_code "reg,subreg" "00")
+		      (match_code "const_int" "01")
+		 )
+	    )
+       )
+  )
+)
+
+(define_constraint "Rpid"
+  "A MEM to a PID variable"
+  (and (match_code "mem")
+       (and (match_code "plus" "0")
+	    (and (match_code "reg,subreg" "00")
+		 (match_code "unspec" "01")
+	    )
+       )
+  )
+)
+
+(define_constraint "Rpda"
+  "An address to a PID variable"
+  (and (match_code "plus" "")
+       (and (match_code "reg,subreg" "0")
+	    (match_code "unspec" "1")
+       )
+  )
+)
diff --git a/gcc-4.9/gcc/config/rx/predicates.md b/gcc-4.9/gcc/config/rx/predicates.md
new file mode 100644
index 000000000..85c9521b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/predicates.md
@@ -0,0 +1,307 @@
+;; Predicate definitions for Renesas RX.
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+
+;; Check that the operand is suitable for a call insn.
+;; Only registers and symbol refs are allowed.
+
+(define_predicate "rx_call_operand"
+  (match_code "symbol_ref,reg")
+)
+
+;; For sibcall operations we can only use a symbolic address.
+
+(define_predicate "rx_symbolic_call_operand"
+  (match_code "symbol_ref")
+)
+
+;; Check that the operand is suitable for a shift insn
+;; Only small integers or a value in a register are permitted.
+
+(define_predicate "rx_shift_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+)
+
+(define_predicate "rx_constshift_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)"))
+)
+
+(define_predicate "rx_restricted_mem_operand"
+  (and (match_code "mem")
+       (match_test "rx_is_restricted_memory_address (XEXP (op, 0), mode)"))
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a logic or arithmeitc instruction.  Registers, integers
+;; and a restricted subset of memory addresses are allowed.
+
+(define_predicate "rx_source_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "immediate_operand")
+       (match_operand 0 "rx_restricted_mem_operand"))
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a comparison instruction.  This is the same as
+;; rx_source_operand except that SUBREGs are allowed but
+;; CONST_INTs are not.
+
+(define_predicate "rx_compare_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "rx_restricted_mem_operand"))
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a min/max instruction.  This is the same as
+;; rx_source_operand except that CONST_INTs are allowed but
+;; REGs and SUBREGs are not.
+
+(define_predicate "rx_minmaxex_operand"
+  (ior (match_operand 0 "immediate_operand")
+       (match_operand 0 "rx_restricted_mem_operand"))
+)
+
+;; Return true if OP is a store multiple operation.  This looks like:
+;;
+;;   [(set (SP) (MINUS (SP) (INT)))
+;;    (set (MEM (SP)) (REG))
+;;    (set (MEM (MINUS (SP) (INT))) (REG)) {optionally repeated}
+;;   ]
+
+(define_special_predicate "rx_store_multiple_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != MINUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_SRC (element))
+      || GET_MODE (SET_SRC (element)) != SImode
+      || ! MEM_P (SET_DEST (element))
+      || GET_MODE (SET_DEST (element)) != SImode
+      || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+      || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+      ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+      || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+        != GET_MODE_SIZE (SImode))
+    return false;
+
+  src_regno = REGNO (SET_SRC (element));
+
+  /* Check that the remaining elements use SP-<disp>
+     addressing and decreasing register numbers.  */
+  for (i = 2; i < count; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || REGNO (SET_SRC (element)) != src_regno - (i - 1)
+	  || ! MEM_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+          || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+	     != i * GET_MODE_SIZE (SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a load multiple operation.
+;; This looks like:
+;;  [(set (SP) (PLUS (SP) (INT)))
+;;   (set (REG) (MEM (SP)))
+;;   (set (REG) (MEM (PLUS (SP) (INT)))) {optionally repeated}
+;;  ]
+
+(define_special_predicate "rx_load_multiple_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != PLUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      || ! MEM_P (SET_SRC (element))
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (element));
+
+  /* Check that the remaining elements use SP+<disp>
+     addressing and incremental register numbers.  */
+  for (i = 2; i < count; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || REGNO (SET_DEST (element)) != dest_regno + (i - 1)
+	  || ! MEM_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (element), 0)) != PLUS
+          || ! REG_P (XEXP (XEXP (SET_SRC (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_SRC (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_SRC (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (element), 0), 1))
+	     != (i - 1) * GET_MODE_SIZE (SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a pop-and-return load multiple operation.
+;; This looks like:
+;;  [(set (SP) (PLUS (SP) (INT)))
+;;   (set (REG) (MEM (SP)))
+;;   (set (REG) (MEM (PLUS (SP) (INT)))) {optional and possibly repeated}
+;;   (return)
+;;  ]
+
+(define_special_predicate "rx_rtsd_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != PLUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      || ! MEM_P (SET_SRC (element))
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (element));
+
+  /* Check that the remaining elements, if any, and except
+     for the last one, use SP+<disp> addressing and incremental
+     register numbers.  */
+  for (i = 2; i < count - 1; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || REGNO (SET_DEST (element)) != dest_regno + (i - 1)
+	  || ! MEM_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (element), 0)) != PLUS
+          || ! REG_P (XEXP (XEXP (SET_SRC (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_SRC (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_SRC (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (element), 0), 1))
+	     != (i - 1) * GET_MODE_SIZE (SImode))
+	return false;
+    }
+
+  /* The last element must be a RETURN.  */    
+  element = XVECEXP (op, 0, count - 1);
+  return GET_CODE (element) == RETURN;
+})
+
+(define_predicate "label_ref_operand"
+  (match_code "label_ref")
+)
+
+(define_predicate "rx_z_comparison_operator"
+  (match_code "eq,ne")
+)
+
+(define_predicate "rx_zs_comparison_operator"
+  (match_code "eq,ne,lt,ge")
+)
+
+;; GT and LE omitted due to operand swap required.
+(define_predicate "rx_fp_comparison_operator"
+  (match_code "eq,ne,lt,ge,ordered,unordered")
+)
+
+(define_predicate "rshift_operator"
+  (match_code "ashiftrt,lshiftrt")
+)
diff --git a/gcc-4.9/gcc/config/rx/rx-modes.def b/gcc-4.9/gcc/config/rx/rx-modes.def
new file mode 100644
index 000000000..655d7575b
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx-modes.def
@@ -0,0 +1,25 @@
+/* Definitions of target specific machine modes for the RX.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CC_ZS);
+CC_MODE (CC_ZSO);
+CC_MODE (CC_ZSC);
+
+CC_MODE (CC_F);		/* fcmp */
diff --git a/gcc-4.9/gcc/config/rx/rx-opts.h b/gcc-4.9/gcc/config/rx/rx-opts.h
new file mode 100644
index 000000000..6451dc644
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx-opts.h
@@ -0,0 +1,31 @@
+/* GCC option-handling definitions for the Renesas RX processor.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef RX_OPTS_H
+#define RX_OPTS_H
+
+enum rx_cpu_types
+{
+  RX600,
+  RX610,
+  RX200,
+  RX100
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/rx/rx-protos.h b/gcc-4.9/gcc/config/rx/rx-protos.h
new file mode 100644
index 000000000..189afb07e
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx-protos.h
@@ -0,0 +1,46 @@
+/* Exported function prototypes from the Renesas RX backend.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RX_PROTOS_H
+#define GCC_RX_PROTOS_H
+
+extern bool             rx_can_use_simple_return (void);
+extern void		rx_expand_epilogue (bool);
+extern void		rx_expand_prologue (void);
+extern int		rx_initial_elimination_offset (int, int);
+
+#ifdef RTX_CODE
+extern int		rx_adjust_insn_length (rtx, int);
+extern int 		rx_align_for_label (rtx, int);
+extern void             rx_emit_stack_popm (rtx *, bool);
+extern void             rx_emit_stack_pushm (rtx *);
+extern char *		rx_gen_move_template (rtx *, bool);
+extern bool		rx_is_legitimate_constant (enum machine_mode, rtx);
+extern bool		rx_is_restricted_memory_address (rtx,
+							 enum machine_mode);
+extern bool		rx_match_ccmode (rtx, enum machine_mode);
+extern rtx		rx_maybe_pidify_operand (rtx, int);
+extern void		rx_notice_update_cc (rtx, rtx);
+extern void		rx_split_cbranch (enum machine_mode, enum rtx_code,
+					  rtx, rtx, rtx);
+extern enum machine_mode	rx_select_cc_mode (enum rtx_code, rtx, rtx);
+#endif
+
+#endif /* GCC_RX_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/rx/rx.c b/gcc-4.9/gcc/config/rx/rx.c
new file mode 100644
index 000000000..4242c1a97
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx.c
@@ -0,0 +1,3443 @@
+/* Subroutines used for code generation on Renesas RX processors.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* To Do:
+
+ * Re-enable memory-to-memory copies and fix up reload.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "reload.h"
+#include "df.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "opts.h"
+#include "cgraph.h"
+
+static unsigned int rx_gp_base_regnum_val = INVALID_REGNUM;
+static unsigned int rx_pid_base_regnum_val = INVALID_REGNUM;
+static unsigned int rx_num_interrupt_regs;
+
+static unsigned int
+rx_gp_base_regnum (void)
+{
+  if (rx_gp_base_regnum_val == INVALID_REGNUM)
+    gcc_unreachable ();
+  return rx_gp_base_regnum_val;
+}
+
+static unsigned int
+rx_pid_base_regnum (void)
+{
+  if (rx_pid_base_regnum_val == INVALID_REGNUM)
+    gcc_unreachable ();
+  return rx_pid_base_regnum_val;
+}
+
+/* Find a SYMBOL_REF in a "standard" MEM address and return its decl.  */
+
+static tree
+rx_decl_for_addr (rtx op)
+{
+  if (GET_CODE (op) == MEM)
+    op = XEXP (op, 0);
+  if (GET_CODE (op) == CONST)
+    op = XEXP (op, 0);
+  while (GET_CODE (op) == PLUS)
+    op = XEXP (op, 0);
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_DECL (op);
+  return NULL_TREE;
+}
+
+static void rx_print_operand (FILE *, rtx, int);
+
+#define CC_FLAG_S	(1 << 0)
+#define CC_FLAG_Z	(1 << 1)
+#define CC_FLAG_O	(1 << 2)
+#define CC_FLAG_C	(1 << 3)
+#define CC_FLAG_FP	(1 << 4)	/* Fake, to differentiate CC_Fmode.  */
+
+static unsigned int flags_from_mode (enum machine_mode mode);
+static unsigned int flags_from_code (enum rtx_code code);
+
+/* Return true if OP is a reference to an object in a PID data area.  */
+
+enum pid_type
+{
+  PID_NOT_PID = 0,	/* The object is not in the PID data area.  */
+  PID_ENCODED,		/* The object is in the PID data area.  */
+  PID_UNENCODED		/* The object will be placed in the PID data area, but it has not been placed there yet.  */
+};
+
+static enum pid_type
+rx_pid_data_operand (rtx op)
+{
+  tree op_decl;
+
+  if (!TARGET_PID)
+    return PID_NOT_PID;
+
+  if (GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == REG
+      && GET_CODE (XEXP (op, 1)) == CONST
+      && GET_CODE (XEXP (XEXP (op, 1), 0)) == UNSPEC)
+    return PID_ENCODED;
+
+  op_decl = rx_decl_for_addr (op);
+
+  if (op_decl)
+    {
+      if (TREE_READONLY (op_decl))
+	return PID_UNENCODED;
+    }
+  else
+    {
+      /* Sigh, some special cases.  */
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF)
+	return PID_UNENCODED;
+    }
+
+  return PID_NOT_PID;
+}
+
+static rtx
+rx_legitimize_address (rtx x,
+		       rtx oldx ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (rx_pid_data_operand (x) == PID_UNENCODED)
+    {
+      rtx rv = gen_pid_addr (gen_rtx_REG (SImode, rx_pid_base_regnum ()), x);
+      return rv;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0)) 
+      && REG_P (XEXP (x, 1)))
+    return force_reg (SImode, x);
+
+  return x;
+}
+
+/* Return true if OP is a reference to an object in a small data area.  */
+
+static bool
+rx_small_data_operand (rtx op)
+{
+  if (rx_small_data_limit == 0)
+    return false;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_SMALL_P (op);
+
+  return false;
+}
+
+static bool
+rx_is_legitimate_address (enum machine_mode mode, rtx x,
+			  bool strict ATTRIBUTE_UNUSED)
+{
+  if (RTX_OK_FOR_BASE (x, strict))
+    /* Register Indirect.  */
+    return true;
+
+  if ((GET_MODE_SIZE (mode) == 4
+       || GET_MODE_SIZE (mode) == 2
+       || GET_MODE_SIZE (mode) == 1)
+      && (GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC))
+    /* Pre-decrement Register Indirect or
+       Post-increment Register Indirect.  */
+    return RTX_OK_FOR_BASE (XEXP (x, 0), strict);
+
+  switch (rx_pid_data_operand (x))
+    {
+    case PID_UNENCODED:
+      return false;
+    case PID_ENCODED:
+      return true;
+    default:
+      break;
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx arg1 = XEXP (x, 0);
+      rtx arg2 = XEXP (x, 1);
+      rtx index = NULL_RTX;
+
+      if (REG_P (arg1) && RTX_OK_FOR_BASE (arg1, strict))
+	index = arg2;
+      else if (REG_P (arg2) && RTX_OK_FOR_BASE (arg2, strict))
+	index = arg1;
+      else
+	return false;
+
+      switch (GET_CODE (index))
+	{
+	case CONST_INT:
+	  {
+	    /* Register Relative: REG + INT.
+	       Only positive, mode-aligned, mode-sized
+	       displacements are allowed.  */
+	    HOST_WIDE_INT val = INTVAL (index);
+	    int factor;
+
+	    if (val < 0)
+	      return false;
+
+	    switch (GET_MODE_SIZE (mode))
+	      {
+	      default: 
+	      case 4: factor = 4; break;
+	      case 2: factor = 2; break;
+	      case 1: factor = 1; break;
+	      }
+
+	    if (val > (65535 * factor))
+	      return false;
+	    return (val % factor) == 0;
+	  }
+
+	case REG:
+	  /* Unscaled Indexed Register Indirect: REG + REG
+	     Size has to be "QI", REG has to be valid.  */
+	  return GET_MODE_SIZE (mode) == 1 && RTX_OK_FOR_BASE (index, strict);
+
+	case MULT:
+	  {
+	    /* Scaled Indexed Register Indirect: REG + (REG * FACTOR)
+	       Factor has to equal the mode size, REG has to be valid.  */
+	    rtx factor;
+
+	    factor = XEXP (index, 1);
+	    index = XEXP (index, 0);
+
+	    return REG_P (index)
+	      && RTX_OK_FOR_BASE (index, strict)
+	      && CONST_INT_P (factor)
+	      && GET_MODE_SIZE (mode) == INTVAL (factor);
+	  }
+
+	default:
+	  return false;
+	}
+    }
+
+  /* Small data area accesses turn into register relative offsets.  */
+  return rx_small_data_operand (x);
+}
+
+/* Returns TRUE for simple memory addreses, ie ones
+   that do not involve register indirect addressing
+   or pre/post increment/decrement.  */
+
+bool
+rx_is_restricted_memory_address (rtx mem, enum machine_mode mode)
+{
+  if (! rx_is_legitimate_address
+      (mode, mem, reload_in_progress || reload_completed))
+    return false;
+
+  switch (GET_CODE (mem))
+    {
+    case REG:
+      /* Simple memory addresses are OK.  */
+      return true;
+
+    case PRE_DEC:
+    case POST_INC:
+      return false;
+
+    case PLUS:
+      {
+	rtx base, index;
+	
+	/* Only allow REG+INT addressing.  */
+	base = XEXP (mem, 0);
+	index = XEXP (mem, 1);
+
+	if (! RX_REG_P (base) || ! CONST_INT_P (index))
+	  return false;
+
+	return IN_RANGE (INTVAL (index), 0, (0x10000 * GET_MODE_SIZE (mode)) - 1);
+      }
+
+    case SYMBOL_REF:
+      /* Can happen when small data is being supported.
+         Assume that it will be resolved into GP+INT.  */
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
+
+static bool
+rx_mode_dependent_address_p (const_rtx addr, addr_space_t as ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+      /* --REG and REG++ only work in SImode.  */
+    case PRE_DEC:
+    case POST_INC:
+      return true;
+
+    case MINUS:
+    case PLUS:
+      if (! REG_P (XEXP (addr, 0)))
+	return true;
+
+      addr = XEXP (addr, 1);
+
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	  /* REG+REG only works in SImode.  */
+	  return true;
+
+	case CONST_INT:
+	  /* REG+INT is only mode independent if INT is a
+	     multiple of 4, positive and will fit into 16-bits.  */
+	  if (((INTVAL (addr) & 3) == 0)
+	      && IN_RANGE (INTVAL (addr), 4, 0xfffc))
+	    return false;
+	  return true;
+
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  return true;
+
+	case MULT:
+	  gcc_assert (REG_P (XEXP (addr, 0)));
+	  gcc_assert (CONST_INT_P (XEXP (addr, 1)));
+	  /* REG+REG*SCALE is always mode dependent.  */
+	  return true;
+
+	default:
+	  /* Not recognized, so treat as mode dependent.  */
+	  return true;
+	}
+
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case REG:
+      /* These are all mode independent.  */
+      return false;
+
+    default:
+      /* Everything else is unrecognized,
+	 so treat as mode dependent.  */
+      return true;
+    }
+}
+
+/* A C compound statement to output to stdio stream FILE the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  */
+
+static void
+rx_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "[");
+      rx_print_operand (file, addr, 0);
+      fprintf (file, "]");
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "[-");
+      rx_print_operand (file, XEXP (addr, 0), 0);
+      fprintf (file, "]");
+      break;
+
+    case POST_INC:
+      fprintf (file, "[");
+      rx_print_operand (file, XEXP (addr, 0), 0);
+      fprintf (file, "+]");
+      break;
+
+    case PLUS:
+      {
+	rtx arg1 = XEXP (addr, 0);
+	rtx arg2 = XEXP (addr, 1);
+	rtx base, index;
+
+	if (REG_P (arg1) && RTX_OK_FOR_BASE (arg1, true))
+	  base = arg1, index = arg2;
+	else if (REG_P (arg2) && RTX_OK_FOR_BASE (arg2, true))
+	  base = arg2, index = arg1;
+	else
+	  {
+	    rx_print_operand (file, arg1, 0);
+	    fprintf (file, " + ");
+	    rx_print_operand (file, arg2, 0);
+	    break;
+	  }
+
+	if (REG_P (index) || GET_CODE (index) == MULT)
+	  {
+	    fprintf (file, "[");
+	    rx_print_operand (file, index, 'A');
+	    fprintf (file, ",");
+	  }
+	else /* GET_CODE (index) == CONST_INT  */
+	  {
+	    rx_print_operand (file, index, 'A');
+	    fprintf (file, "[");
+	  }
+	rx_print_operand (file, base, 0);
+	fprintf (file, "]");
+	break;
+      }
+
+    case CONST:
+      if (GET_CODE (XEXP (addr, 0)) == UNSPEC)
+	{
+	  addr = XEXP (addr, 0);
+	  gcc_assert (XINT (addr, 1) == UNSPEC_CONST);
+
+	  /* FIXME: Putting this case label here is an appalling abuse of the C language.  */
+	case UNSPEC:
+          addr = XVECEXP (addr, 0, 0);
+	  gcc_assert (CONST_INT_P (addr));
+	}
+      /* Fall through.  */
+    case LABEL_REF:
+    case SYMBOL_REF:
+      fprintf (file, "#");
+      /* Fall through.  */
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static void
+rx_print_integer (FILE * file, HOST_WIDE_INT val)
+{
+  if (IN_RANGE (val, -64, 64))
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+  else
+    fprintf (file,
+	     TARGET_AS100_SYNTAX
+	     ? "0%" HOST_WIDE_INT_PRINT "xH" : HOST_WIDE_INT_PRINT_HEX,
+	     val);
+}
+
+static bool
+rx_assemble_integer (rtx x, unsigned int size, int is_aligned)
+{
+  const char *  op = integer_asm_op (size, is_aligned);
+
+  if (! CONST_INT_P (x))
+    return default_assemble_integer (x, size, is_aligned);
+
+  if (op == NULL)
+    return false;
+  fputs (op, asm_out_file);
+
+  rx_print_integer (asm_out_file, INTVAL (x));
+  fputc ('\n', asm_out_file);
+  return true;
+}
+
+
+/* Handles the insertion of a single operand into the assembler output.
+   The %<letter> directives supported are:
+
+     %A  Print an operand without a leading # character.
+     %B  Print an integer comparison name.
+     %C  Print a control register name.
+     %F  Print a condition code flag name.
+     %G  Register used for small-data-area addressing
+     %H  Print high part of a DImode register, integer or address.
+     %L  Print low part of a DImode register, integer or address.
+     %N  Print the negation of the immediate value.
+     %P  Register used for PID addressing
+     %Q  If the operand is a MEM, then correctly generate
+         register indirect or register relative addressing.
+     %R  Like %Q but for zero-extending loads.  */
+
+static void
+rx_print_operand (FILE * file, rtx op, int letter)
+{
+  bool unsigned_load = false;
+  bool print_hash = true;
+
+  if (letter == 'A'
+      && ((GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == UNSPEC)
+	  || GET_CODE (op) == UNSPEC))
+    {
+      print_hash = false;
+      letter = 0;
+    }
+
+  switch (letter)
+    {
+    case 'A':
+      /* Print an operand without a leading #.  */
+      if (MEM_P (op))
+	op = XEXP (op, 0);
+
+      switch (GET_CODE (op))
+	{
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  output_addr_const (file, op);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "%ld", (long) INTVAL (op));
+	  break;
+	default:
+	  rx_print_operand (file, op, 0);
+	  break;
+	}
+      break;
+
+    case 'B':
+      {
+	enum rtx_code code = GET_CODE (op);
+	enum machine_mode mode = GET_MODE (XEXP (op, 0));
+	const char *ret;
+
+	if (mode == CC_Fmode)
+	  {
+	    /* C flag is undefined, and O flag carries unordered.  None of the
+	       branch combinations that include O use it helpfully.  */
+	    switch (code)
+	      {
+	      case ORDERED:
+		ret = "no";
+		break;
+	      case UNORDERED:
+		ret = "o";
+		break;
+	      case LT:
+		ret = "n";
+		break;
+	      case GE:
+		ret = "pz";
+		break;
+	      case EQ:
+		ret = "eq";
+		break;
+	      case NE:
+		ret = "ne";
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	  }
+	else
+	  {
+	    unsigned int flags = flags_from_mode (mode);
+
+	    switch (code)
+	      {
+	      case LT:
+		ret = (flags & CC_FLAG_O ? "lt" : "n");
+		break;
+	      case GE:
+		ret = (flags & CC_FLAG_O ? "ge" : "pz");
+		break;
+	      case GT:
+		ret = "gt";
+		break;
+	      case LE:
+		ret = "le";
+		break;
+	      case GEU:
+		ret = "geu";
+		break;
+	      case LTU:
+		ret = "ltu";
+		break;
+	      case GTU:
+		ret = "gtu";
+		break;
+	      case LEU:
+		ret = "leu";
+		break;
+	      case EQ:
+		ret = "eq";
+		break;
+	      case NE:
+		ret = "ne";
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	    gcc_checking_assert ((flags_from_code (code) & ~flags) == 0);
+	  }
+	fputs (ret, file);
+	break;
+      }
+
+    case 'C':
+      gcc_assert (CONST_INT_P (op));
+      switch (INTVAL (op))
+	{
+	case 0:   fprintf (file, "psw"); break;
+	case 2:   fprintf (file, "usp"); break;
+	case 3:   fprintf (file, "fpsw"); break;
+	case 4:   fprintf (file, "cpen"); break;
+	case 8:   fprintf (file, "bpsw"); break;
+	case 9:   fprintf (file, "bpc"); break;
+	case 0xa: fprintf (file, "isp"); break;
+	case 0xb: fprintf (file, "fintv"); break;
+	case 0xc: fprintf (file, "intb"); break;
+	default:
+	  warning (0, "unrecognized control register number: %d - using 'psw'",
+		   (int) INTVAL (op));
+	  fprintf (file, "psw");
+	  break;
+	}
+      break;
+
+    case 'F':
+      gcc_assert (CONST_INT_P (op));
+      switch (INTVAL (op))
+	{
+	case 0: case 'c': case 'C': fprintf (file, "C"); break;
+	case 1:	case 'z': case 'Z': fprintf (file, "Z"); break;
+	case 2: case 's': case 'S': fprintf (file, "S"); break;
+	case 3: case 'o': case 'O': fprintf (file, "O"); break;
+	case 8: case 'i': case 'I': fprintf (file, "I"); break;
+	case 9: case 'u': case 'U': fprintf (file, "U"); break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'G':
+      fprintf (file, "%s", reg_names [rx_gp_base_regnum ()]);
+      break;
+
+    case 'H':
+      switch (GET_CODE (op))
+	{
+	case REG:
+	  fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
+	  break;
+	case CONST_INT:
+	  {
+	    HOST_WIDE_INT v = INTVAL (op);
+
+	    fprintf (file, "#");
+	    /* Trickery to avoid problems with shifting 32 bits at a time.  */
+	    v = v >> 16;
+	    v = v >> 16;	  
+	    rx_print_integer (file, v);
+	    break;
+	  }
+	case CONST_DOUBLE:
+	  fprintf (file, "#");
+	  rx_print_integer (file, CONST_DOUBLE_HIGH (op));
+	  break;
+	case MEM:
+	  if (! WORDS_BIG_ENDIAN)
+	    op = adjust_address (op, SImode, 4);
+	  output_address (XEXP (op, 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'L':
+      switch (GET_CODE (op))
+	{
+	case REG:
+	  fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#");
+	  rx_print_integer (file, INTVAL (op) & 0xffffffff);
+	  break;
+	case CONST_DOUBLE:
+	  fprintf (file, "#");
+	  rx_print_integer (file, CONST_DOUBLE_LOW (op));
+	  break;
+	case MEM:
+	  if (WORDS_BIG_ENDIAN)
+	    op = adjust_address (op, SImode, 4);
+	  output_address (XEXP (op, 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'N':
+      gcc_assert (CONST_INT_P (op));
+      fprintf (file, "#");
+      rx_print_integer (file, - INTVAL (op));
+      break;
+
+    case 'P':
+      fprintf (file, "%s", reg_names [rx_pid_base_regnum ()]);
+      break;
+
+    case 'R':
+      gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4);
+      unsigned_load = true;
+      /* Fall through.  */
+    case 'Q':
+      if (MEM_P (op))
+	{
+	  HOST_WIDE_INT offset;
+	  rtx mem = op;
+
+	  op = XEXP (op, 0);
+
+	  if (REG_P (op))
+	    offset = 0;
+	  else if (GET_CODE (op) == PLUS)
+	    {
+	      rtx displacement;
+
+	      if (REG_P (XEXP (op, 0)))
+		{
+		  displacement = XEXP (op, 1);
+		  op = XEXP (op, 0);
+		}
+	      else
+		{
+		  displacement = XEXP (op, 0);
+		  op = XEXP (op, 1);
+		  gcc_assert (REG_P (op));
+		}
+
+	      gcc_assert (CONST_INT_P (displacement));
+	      offset = INTVAL (displacement);
+	      gcc_assert (offset >= 0);
+
+	      fprintf (file, "%ld", offset);
+	    }
+	  else
+	    gcc_unreachable ();
+
+	  fprintf (file, "[");
+	  rx_print_operand (file, op, 0);
+	  fprintf (file, "].");
+
+	  switch (GET_MODE_SIZE (GET_MODE (mem)))
+	    {
+	    case 1:
+	      gcc_assert (offset <= 65535 * 1);
+	      fprintf (file, unsigned_load ? "UB" : "B");
+	      break;
+	    case 2:
+	      gcc_assert (offset % 2 == 0);
+	      gcc_assert (offset <= 65535 * 2);
+	      fprintf (file, unsigned_load ? "UW" : "W");
+	      break;
+	    case 4:
+	      gcc_assert (offset % 4 == 0);
+	      gcc_assert (offset <= 65535 * 4);
+	      fprintf (file, "L");
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+	}
+
+      /* Fall through.  */
+
+    default:
+      if (GET_CODE (op) == CONST
+	  && GET_CODE (XEXP (op, 0)) == UNSPEC)
+	op = XEXP (op, 0);
+      else if (GET_CODE (op) == CONST
+	       && GET_CODE (XEXP (op, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (op, 0), 0)) == UNSPEC
+	       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
+	{
+	  if (print_hash)
+	    fprintf (file, "#");
+	  fprintf (file, "(");
+	  rx_print_operand (file, XEXP (XEXP (op, 0), 0), 'A');
+	  fprintf (file, " + ");
+	  output_addr_const (file, XEXP (XEXP (op, 0), 1));
+	  fprintf (file, ")");
+	  return;
+	}
+
+      switch (GET_CODE (op))
+	{
+	case MULT:
+	  /* Should be the scaled part of an
+	     indexed register indirect address.  */
+	  {
+	    rtx base = XEXP (op, 0);
+	    rtx index = XEXP (op, 1);
+
+	    /* Check for a swaped index register and scaling factor.
+	       Not sure if this can happen, but be prepared to handle it.  */
+	    if (CONST_INT_P (base) && REG_P (index))
+	      {
+		rtx tmp = base;
+		base = index;
+		index = tmp;
+	      }
+
+	    gcc_assert (REG_P (base));
+	    gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
+	    gcc_assert (CONST_INT_P (index));
+	    /* Do not try to verify the value of the scalar as it is based
+	       on the mode of the MEM not the mode of the MULT.  (Which
+	       will always be SImode).  */
+	    fprintf (file, "%s", reg_names [REGNO (base)]);
+	    break;
+	  }
+
+	case MEM:
+	  output_address (XEXP (op, 0));
+	  break;
+
+	case PLUS:
+	  output_address (op);
+	  break;
+
+	case REG:
+	  gcc_assert (REGNO (op) < FIRST_PSEUDO_REGISTER);
+	  fprintf (file, "%s", reg_names [REGNO (op)]);
+	  break;
+
+	case SUBREG:
+	  gcc_assert (subreg_regno (op) < FIRST_PSEUDO_REGISTER);
+	  fprintf (file, "%s", reg_names [subreg_regno (op)]);
+	  break;
+
+	  /* This will only be single precision....  */
+	case CONST_DOUBLE:
+	  {
+	    unsigned long val;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    if (print_hash)
+	      fprintf (file, "#");
+	    fprintf (file, TARGET_AS100_SYNTAX ? "0%lxH" : "0x%lx", val);
+	    break;
+	  }
+
+	case CONST_INT:
+	  if (print_hash)
+	    fprintf (file, "#");
+	  rx_print_integer (file, INTVAL (op));
+	  break;
+
+	case UNSPEC:
+	  switch (XINT (op, 1))
+	    {
+	    case UNSPEC_PID_ADDR:
+	      {
+		rtx sym, add;
+
+		if (print_hash)
+		  fprintf (file, "#");
+		sym = XVECEXP (op, 0, 0);
+		add = NULL_RTX;
+		fprintf (file, "(");
+		if (GET_CODE (sym) == PLUS)
+		  {
+		    add = XEXP (sym, 1);
+		    sym = XEXP (sym, 0);
+		  }
+		output_addr_const (file, sym);
+		if (add != NULL_RTX)
+		  {
+		    fprintf (file, "+");
+		    output_addr_const (file, add);
+		  }
+		fprintf (file, "-__pid_base");
+		fprintf (file, ")");
+		return;
+	      }
+	    }
+	  /* Fall through */
+
+	case CONST:
+	case SYMBOL_REF:
+	case LABEL_REF:
+	case CODE_LABEL:
+	  rx_print_operand_address (file, op);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    }
+}
+
+/* Maybe convert an operand into its PID format.  */
+
+rtx
+rx_maybe_pidify_operand (rtx op, int copy_to_reg)
+{
+  if (rx_pid_data_operand (op) == PID_UNENCODED)
+    {
+      if (GET_CODE (op) == MEM)
+	{
+	  rtx a = gen_pid_addr (gen_rtx_REG (SImode, rx_pid_base_regnum ()), XEXP (op, 0));
+	  op = replace_equiv_address (op, a);
+	}
+      else
+	{
+	  op = gen_pid_addr (gen_rtx_REG (SImode, rx_pid_base_regnum ()), op);
+	}
+
+      if (copy_to_reg)
+	op = copy_to_mode_reg (GET_MODE (op), op);
+    }
+  return op;
+}
+
+/* Returns an assembler template for a move instruction.  */
+
+char *
+rx_gen_move_template (rtx * operands, bool is_movu)
+{
+  static char  out_template [64];
+  const char * extension = TARGET_AS100_SYNTAX ? ".L" : "";
+  const char * src_template;
+  const char * dst_template;
+  rtx          dest = operands[0];
+  rtx          src  = operands[1];
+
+  /* Decide which extension, if any, should be given to the move instruction.  */
+  switch (CONST_INT_P (src) ? GET_MODE (dest) : GET_MODE (src))
+    {
+    case QImode:
+      /* The .B extension is not valid when
+	 loading an immediate into a register.  */
+      if (! REG_P (dest) || ! CONST_INT_P (src))
+	extension = ".B";
+      break;
+    case HImode:
+      if (! REG_P (dest) || ! CONST_INT_P (src))
+	/* The .W extension is not valid when
+	   loading an immediate into a register.  */
+	extension = ".W";
+      break;
+    case DFmode:
+    case DImode:
+    case SFmode:
+    case SImode:
+      extension = ".L";
+      break;
+    case VOIDmode:
+      /* This mode is used by constants.  */
+      break;
+    default:
+      debug_rtx (src);
+      gcc_unreachable ();
+    }
+
+  if (MEM_P (src) && rx_pid_data_operand (XEXP (src, 0)) == PID_UNENCODED)
+    {
+      gcc_assert (GET_MODE (src) != DImode);
+      gcc_assert (GET_MODE (src) != DFmode);
+      
+      src_template = "(%A1 - __pid_base)[%P1]";
+    }
+  else if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0)))
+    {
+      gcc_assert (GET_MODE (src) != DImode);
+      gcc_assert (GET_MODE (src) != DFmode);
+      
+      src_template = "%%gp(%A1)[%G1]";
+    }
+  else
+    src_template = "%1";
+
+  if (MEM_P (dest) && rx_small_data_operand (XEXP (dest, 0)))
+    {
+      gcc_assert (GET_MODE (dest) != DImode);
+      gcc_assert (GET_MODE (dest) != DFmode);
+      
+      dst_template = "%%gp(%A0)[%G0]";
+    }
+  else
+    dst_template = "%0";
+
+  if (GET_MODE (dest) == DImode || GET_MODE (dest) == DFmode)
+    {
+      gcc_assert (! is_movu);
+
+      if (REG_P (src) && REG_P (dest) && (REGNO (dest) == REGNO (src) + 1))
+	sprintf (out_template, "mov.L\t%%H1, %%H0 ! mov.L\t%%1, %%0");
+      else
+	sprintf (out_template, "mov.L\t%%1, %%0 ! mov.L\t%%H1, %%H0");
+    }
+  else
+    sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
+	     extension, src_template, dst_template);
+  return out_template;
+}
+
+/* Return VALUE rounded up to the next ALIGNMENT boundary.  */
+
+static inline unsigned int
+rx_round_up (unsigned int value, unsigned int alignment)
+{
+  alignment -= 1;
+  return (value + alignment) & (~ alignment);
+}
+
+/* Return the number of bytes in the argument registers
+   occupied by an argument of type TYPE and mode MODE.  */
+
+static unsigned int
+rx_function_arg_size (enum machine_mode mode, const_tree type)
+{
+  unsigned int num_bytes;
+
+  num_bytes = (mode == BLKmode)
+    ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  return rx_round_up (num_bytes, UNITS_PER_WORD);
+}
+
+#define NUM_ARG_REGS		4
+#define MAX_NUM_ARG_BYTES	(NUM_ARG_REGS * UNITS_PER_WORD)
+
+/* Return an RTL expression describing the register holding a function
+   parameter of mode MODE and type TYPE or NULL_RTX if the parameter should
+   be passed on the stack.  CUM describes the previous parameters to the
+   function and NAMED is false if the parameter is part of a variable
+   parameter list, or the last named parameter before the start of a
+   variable parameter list.  */
+
+static rtx
+rx_function_arg (cumulative_args_t cum, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  unsigned int next_reg;
+  unsigned int bytes_so_far = *get_cumulative_args (cum);
+  unsigned int size;
+  unsigned int rounded_size;
+
+  /* An exploded version of rx_function_arg_size.  */
+  size = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  /* If the size is not known it cannot be passed in registers.  */
+  if (size < 1)
+    return NULL_RTX;
+
+  rounded_size = rx_round_up (size, UNITS_PER_WORD);
+
+  /* Don't pass this arg via registers if there
+     are insufficient registers to hold all of it.  */
+  if (rounded_size + bytes_so_far > MAX_NUM_ARG_BYTES)
+    return NULL_RTX;
+
+  /* Unnamed arguments and the last named argument in a
+     variadic function are always passed on the stack.  */
+  if (!named)
+    return NULL_RTX;
+
+  /* Structures must occupy an exact number of registers,
+     otherwise they are passed on the stack.  */
+  if ((type == NULL || AGGREGATE_TYPE_P (type))
+      && (size % UNITS_PER_WORD) != 0)
+    return NULL_RTX;
+
+  next_reg = (bytes_so_far / UNITS_PER_WORD) + 1;
+
+  return gen_rtx_REG (mode, next_reg);
+}
+
+static void
+rx_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
+			 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *get_cumulative_args (cum) += rx_function_arg_size (mode, type);
+}
+
+static unsigned int
+rx_function_arg_boundary (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  const_tree type ATTRIBUTE_UNUSED)
+{
+  /* Older versions of the RX backend aligned all on-stack arguments
+     to 32-bits.  The RX C ABI however says that they should be
+     aligned to their natural alignment.  (See section 5.2.2 of the ABI).  */
+  if (TARGET_GCC_ABI)
+    return STACK_BOUNDARY;
+
+  if (type)
+    {
+      if (DECL_P (type))
+	return DECL_ALIGN (type);
+      return TYPE_ALIGN (type);
+    }
+
+  return PARM_BOUNDARY;
+}
+
+/* Return an RTL describing where a function return value of type RET_TYPE
+   is held.  */
+
+static rtx
+rx_function_value (const_tree ret_type,
+		   const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		   bool       outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (ret_type);
+
+  /* RX ABI specifies that small integer types are
+     promoted to int when returned by a function.  */
+  if (GET_MODE_SIZE (mode) > 0
+      && GET_MODE_SIZE (mode) < 4
+      && ! COMPLEX_MODE_P (mode)
+      )
+    return gen_rtx_REG (SImode, FUNC_RETURN_REGNUM);
+    
+  return gen_rtx_REG (mode, FUNC_RETURN_REGNUM);
+}
+
+/* TARGET_PROMOTE_FUNCTION_MODE must behave in the same way with
+   regard to function returns as does TARGET_FUNCTION_VALUE.  */
+
+static enum machine_mode
+rx_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+			  enum machine_mode mode,
+			  int * punsignedp ATTRIBUTE_UNUSED,
+			  const_tree funtype ATTRIBUTE_UNUSED,
+			  int for_return)
+{
+  if (for_return != 1
+      || GET_MODE_SIZE (mode) >= 4
+      || COMPLEX_MODE_P (mode)
+      || GET_MODE_SIZE (mode) < 1)
+    return mode;
+
+  return SImode;
+}
+
+static bool
+rx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+
+  if (TYPE_MODE (type) != BLKmode
+      && ! AGGREGATE_TYPE_P (type))
+    return false;
+
+  size = int_size_in_bytes (type);
+  /* Large structs and those whose size is not an
+     exact multiple of 4 are returned in memory.  */
+  return size < 1
+    || size > 16
+    || (size % UNITS_PER_WORD) != 0;
+}
+
+static rtx
+rx_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
+		     int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, STRUCT_VAL_REGNUM);
+}
+
+static bool
+rx_return_in_msb (const_tree valtype)
+{
+  return TARGET_BIG_ENDIAN_DATA
+    && (AGGREGATE_TYPE_P (valtype) || TREE_CODE (valtype) == COMPLEX_TYPE);
+}
+
+/* Returns true if the provided function has the specified attribute.  */
+
+static inline bool
+has_func_attr (const_tree decl, const char * func_attr)
+{
+  if (decl == NULL_TREE)
+    decl = current_function_decl;
+
+  return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE;
+}
+
+/* Returns true if the provided function has the "fast_interrupt" attribute.  */
+
+static inline bool
+is_fast_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "fast_interrupt");
+}
+
+/* Returns true if the provided function has the "interrupt" attribute.  */
+
+static inline bool
+is_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "interrupt");
+}
+
+/* Returns true if the provided function has the "naked" attribute.  */
+
+static inline bool
+is_naked_func (const_tree decl)
+{
+  return has_func_attr (decl, "naked");
+}
+
+static bool use_fixed_regs = false;
+
+static void
+rx_conditional_register_usage (void)
+{
+  static bool using_fixed_regs = false;
+
+  if (TARGET_PID)
+    {
+      rx_pid_base_regnum_val = GP_BASE_REGNUM - rx_num_interrupt_regs;
+      fixed_regs[rx_pid_base_regnum_val] = call_used_regs [rx_pid_base_regnum_val] = 1;
+    }
+
+  if (rx_small_data_limit > 0)
+    {
+      if (TARGET_PID)
+	rx_gp_base_regnum_val = rx_pid_base_regnum_val - 1;
+      else
+	rx_gp_base_regnum_val = GP_BASE_REGNUM - rx_num_interrupt_regs;
+
+      fixed_regs[rx_gp_base_regnum_val] = call_used_regs [rx_gp_base_regnum_val] = 1;
+    }
+
+  if (use_fixed_regs != using_fixed_regs)
+    {
+      static char saved_fixed_regs[FIRST_PSEUDO_REGISTER];
+      static char saved_call_used_regs[FIRST_PSEUDO_REGISTER];
+
+      if (use_fixed_regs)
+	{
+	  unsigned int r;
+
+	  memcpy (saved_fixed_regs, fixed_regs, sizeof fixed_regs);
+	  memcpy (saved_call_used_regs, call_used_regs, sizeof call_used_regs);
+
+	  /* This is for fast interrupt handlers.  Any register in
+	     the range r10 to r13 (inclusive) that is currently
+	     marked as fixed is now a viable, call-used register.  */	  
+	  for (r = 10; r <= 13; r++)
+	    if (fixed_regs[r])
+	      {
+		fixed_regs[r] = 0;
+		call_used_regs[r] = 1;
+	      }
+
+	  /* Mark r7 as fixed.  This is just a hack to avoid
+	     altering the reg_alloc_order array so that the newly
+	     freed r10-r13 registers are the preferred registers.  */
+	  fixed_regs[7] = call_used_regs[7] = 1;
+	}
+      else
+	{
+	  /* Restore the normal register masks.  */
+	  memcpy (fixed_regs, saved_fixed_regs, sizeof fixed_regs);
+	  memcpy (call_used_regs, saved_call_used_regs, sizeof call_used_regs);
+	}
+
+      using_fixed_regs = use_fixed_regs;
+    }
+}
+
+struct decl_chain
+{
+  tree fndecl;
+  struct decl_chain * next;
+};
+
+/* Stack of decls for which we have issued warnings.  */
+static struct decl_chain * warned_decls = NULL;
+
+static void
+add_warned_decl (tree fndecl)
+{
+  struct decl_chain * warned = (struct decl_chain *) xmalloc (sizeof * warned);
+
+  warned->fndecl = fndecl;
+  warned->next = warned_decls;
+  warned_decls = warned;
+}
+
+/* Returns TRUE if FNDECL is on our list of warned about decls.  */
+
+static bool
+already_warned (tree fndecl)
+{
+  struct decl_chain * warned;
+
+  for (warned = warned_decls;
+       warned != NULL;
+       warned = warned->next)
+    if (warned->fndecl == fndecl)
+      return true;
+
+  return false;
+}
+
+/* Perform any actions necessary before starting to compile FNDECL.
+   For the RX we use this to make sure that we have the correct
+   set of register masks selected.  If FNDECL is NULL then we are
+   compiling top level things.  */
+
+static void
+rx_set_current_function (tree fndecl)
+{
+  /* Remember the last target of rx_set_current_function.  */
+  static tree rx_previous_fndecl;
+  bool prev_was_fast_interrupt;
+  bool current_is_fast_interrupt;
+
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want
+     to slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl == rx_previous_fndecl)
+    return;
+
+  prev_was_fast_interrupt
+    = rx_previous_fndecl
+    ? is_fast_interrupt_func (rx_previous_fndecl) : false;
+
+  current_is_fast_interrupt
+    = fndecl ? is_fast_interrupt_func (fndecl) : false;
+      
+  if (prev_was_fast_interrupt != current_is_fast_interrupt)
+    {
+      use_fixed_regs = current_is_fast_interrupt;
+      target_reinit ();
+    }
+
+  if (current_is_fast_interrupt && rx_warn_multiple_fast_interrupts)
+    {
+      /* We do not warn about the first fast interrupt routine that
+	 we see.  Instead we just push it onto the stack.  */
+      if (warned_decls == NULL)
+	add_warned_decl (fndecl);
+
+      /* Otherwise if this fast interrupt is one for which we have
+	 not already issued a warning, generate one and then push
+	 it onto the stack as well.  */
+      else if (! already_warned (fndecl))
+	{
+	  warning (0, "multiple fast interrupt routines seen: %qE and %qE",
+		   fndecl, warned_decls->fndecl);
+	  add_warned_decl (fndecl);
+	}
+    }
+
+  rx_previous_fndecl = fndecl;
+}
+
+/* Typical stack layout should looks like this after the function's prologue:
+
+                            |    |
+                              --                       ^
+                            |    | \                   |
+                            |    |   arguments saved   | Increasing
+                            |    |   on the stack      |  addresses
+    PARENT   arg pointer -> |    | /
+  -------------------------- ---- -------------------
+    CHILD                   |ret |   return address
+                              --
+                            |    | \
+                            |    |   call saved
+                            |    |   registers
+			    |    | /
+                              --
+                            |    | \
+                            |    |   local
+                            |    |   variables
+        frame pointer ->    |    | /
+                              --
+                            |    | \
+                            |    |   outgoing          | Decreasing
+                            |    |   arguments         |  addresses
+   current stack pointer -> |    | /                   |
+  -------------------------- ---- ------------------   V
+                            |    |                 */
+
+static unsigned int
+bit_count (unsigned int x)
+{
+  const unsigned int m1 = 0x55555555;
+  const unsigned int m2 = 0x33333333;
+  const unsigned int m4 = 0x0f0f0f0f;
+
+  x -= (x >> 1) & m1;
+  x = (x & m2) + ((x >> 2) & m2);
+  x = (x + (x >> 4)) & m4;
+  x += x >>  8;
+
+  return (x + (x >> 16)) & 0x3f;
+}
+
+#define MUST_SAVE_ACC_REGISTER			\
+  (TARGET_SAVE_ACC_REGISTER			\
+   && (is_interrupt_func (NULL_TREE)		\
+       || is_fast_interrupt_func (NULL_TREE)))
+
+/* Returns either the lowest numbered and highest numbered registers that
+   occupy the call-saved area of the stack frame, if the registers are
+   stored as a contiguous block, or else a bitmask of the individual
+   registers if they are stored piecemeal.
+
+   Also computes the size of the frame and the size of the outgoing
+   arguments block (in bytes).  */
+
+static void
+rx_get_stack_layout (unsigned int * lowest,
+		     unsigned int * highest,
+		     unsigned int * register_mask,
+		     unsigned int * frame_size,
+		     unsigned int * stack_size)
+{
+  unsigned int reg;
+  unsigned int low;
+  unsigned int high;
+  unsigned int fixed_reg = 0;
+  unsigned int save_mask;
+  unsigned int pushed_mask;
+  unsigned int unneeded_pushes;
+
+  if (is_naked_func (NULL_TREE))
+    {
+      /* Naked functions do not create their own stack frame.
+	 Instead the programmer must do that for us.  */
+      * lowest = 0;
+      * highest = 0;
+      * register_mask = 0;
+      * frame_size = 0;
+      * stack_size = 0;
+      return;
+    }
+
+  for (save_mask = high = low = 0, reg = 1; reg < CC_REGNUM; reg++)
+    {
+      if ((df_regs_ever_live_p (reg)
+	   /* Always save all call clobbered registers inside non-leaf
+	      interrupt handlers, even if they are not live - they may
+	      be used in (non-interrupt aware) routines called from this one.  */
+	   || (call_used_regs[reg]
+	       && is_interrupt_func (NULL_TREE)
+	       && ! crtl->is_leaf))
+	  && (! call_used_regs[reg]
+	      /* Even call clobbered registered must
+		 be pushed inside interrupt handlers.  */
+	      || is_interrupt_func (NULL_TREE)
+	      /* Likewise for fast interrupt handlers, except registers r10 -
+		 r13.  These are normally call-saved, but may have been set
+		 to call-used by rx_conditional_register_usage.  If so then
+		 they can be used in the fast interrupt handler without
+		 saving them on the stack.  */
+	      || (is_fast_interrupt_func (NULL_TREE)
+		  && ! IN_RANGE (reg, 10, 13))))
+	{
+	  if (low == 0)
+	    low = reg;
+	  high = reg;
+
+	  save_mask |= 1 << reg;
+	}
+
+      /* Remember if we see a fixed register
+	 after having found the low register.  */
+      if (low != 0 && fixed_reg == 0 && fixed_regs [reg])
+	fixed_reg = reg;
+    }
+
+  /* If we have to save the accumulator register, make sure
+     that at least two registers are pushed into the frame.  */
+  if (MUST_SAVE_ACC_REGISTER
+      && bit_count (save_mask) < 2)
+    {
+      save_mask |= (1 << 13) | (1 << 14);
+      if (low == 0)
+	low = 13;
+      if (high == 0 || low == high)
+	high = low + 1;
+    }
+
+  /* Decide if it would be faster fill in the call-saved area of the stack
+     frame using multiple PUSH instructions instead of a single PUSHM
+     instruction.
+
+     SAVE_MASK is a bitmask of the registers that must be stored in the
+     call-save area.  PUSHED_MASK is a bitmask of the registers that would
+     be pushed into the area if we used a PUSHM instruction.  UNNEEDED_PUSHES
+     is a bitmask of those registers in pushed_mask that are not in
+     save_mask.
+
+     We use a simple heuristic that says that it is better to use
+     multiple PUSH instructions if the number of unnecessary pushes is
+     greater than the number of necessary pushes.
+
+     We also use multiple PUSH instructions if there are any fixed registers
+     between LOW and HIGH.  The only way that this can happen is if the user
+     has specified --fixed-<reg-name> on the command line and in such
+     circumstances we do not want to touch the fixed registers at all.
+
+     FIXME: Is it worth improving this heuristic ?  */
+  pushed_mask = (-1 << low) & ~(-1 << (high + 1));
+  unneeded_pushes = (pushed_mask & (~ save_mask)) & pushed_mask;
+
+  if ((fixed_reg && fixed_reg <= high)
+      || (optimize_function_for_speed_p (cfun)
+	  && bit_count (save_mask) < bit_count (unneeded_pushes)))
+    {
+      /* Use multiple pushes.  */
+      * lowest = 0;
+      * highest = 0;
+      * register_mask = save_mask;
+    }
+  else
+    {
+      /* Use one push multiple instruction.  */
+      * lowest = low;
+      * highest = high;
+      * register_mask = 0;
+    }
+
+  * frame_size = rx_round_up
+    (get_frame_size (), STACK_BOUNDARY / BITS_PER_UNIT);
+
+  if (crtl->args.size > 0)
+    * frame_size += rx_round_up
+      (crtl->args.size, STACK_BOUNDARY / BITS_PER_UNIT);
+
+  * stack_size = rx_round_up
+    (crtl->outgoing_args_size, STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Generate a PUSHM instruction that matches the given operands.  */
+
+void
+rx_emit_stack_pushm (rtx * operands)
+{
+  HOST_WIDE_INT last_reg;
+  rtx first_push;
+
+  gcc_assert (CONST_INT_P (operands[0]));
+  last_reg = (INTVAL (operands[0]) / UNITS_PER_WORD) - 1;
+
+  gcc_assert (GET_CODE (operands[1]) == PARALLEL);
+  first_push = XVECEXP (operands[1], 0, 1);
+  gcc_assert (SET_P (first_push));
+  first_push = SET_SRC (first_push);
+  gcc_assert (REG_P (first_push));
+
+  asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n",
+	       reg_names [REGNO (first_push) - last_reg],
+	       reg_names [REGNO (first_push)]);
+}
+
+/* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate.  */
+
+static rtx
+gen_rx_store_vector (unsigned int low, unsigned int high)
+{
+  unsigned int i;
+  unsigned int count = (high - low) + 2;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		 gen_rtx_MINUS (SImode, stack_pointer_rtx,
+				GEN_INT ((count - 1) * UNITS_PER_WORD)));
+
+  for (i = 0; i < count - 1; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (VOIDmode,
+		   gen_rtx_MEM (SImode,
+				gen_rtx_MINUS (SImode, stack_pointer_rtx,
+					       GEN_INT ((i + 1) * UNITS_PER_WORD))),
+		   gen_rtx_REG (SImode, high - i));
+  return vector;
+}
+
+/* Mark INSN as being frame related.  If it is a PARALLEL
+   then mark each element as being frame related as well.  */
+
+static void
+mark_frame_related (rtx insn)
+{
+  RTX_FRAME_RELATED_P (insn) = 1;
+  insn = PATTERN (insn);
+
+  if (GET_CODE (insn) == PARALLEL)
+    {
+      unsigned int i;
+
+      for (i = 0; i < (unsigned) XVECLEN (insn, 0); i++)
+	RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1;
+    }
+}
+
+static bool
+ok_for_max_constant (HOST_WIDE_INT val)
+{
+  if (rx_max_constant_size == 0  || rx_max_constant_size == 4)
+    /* If there is no constraint on the size of constants
+       used as operands, then any value is legitimate.  */
+    return true;
+
+  /* rx_max_constant_size specifies the maximum number
+     of bytes that can be used to hold a signed value.  */
+  return IN_RANGE (val, (-1 << (rx_max_constant_size * 8)),
+		        ( 1 << (rx_max_constant_size * 8)));
+}
+
+/* Generate an ADD of SRC plus VAL into DEST.
+   Handles the case where VAL is too big for max_constant_value.
+   Sets FRAME_RELATED_P on the insn if IS_FRAME_RELATED is true.  */
+
+static void
+gen_safe_add (rtx dest, rtx src, rtx val, bool is_frame_related)
+{
+  rtx insn;
+
+  if (val == NULL_RTX || INTVAL (val) == 0)
+    {
+      gcc_assert (dest != src);
+
+      insn = emit_move_insn (dest, src);
+    }
+  else if (ok_for_max_constant (INTVAL (val)))
+    insn = emit_insn (gen_addsi3 (dest, src, val));
+  else
+    {
+      /* Wrap VAL in an UNSPEC so that rx_is_legitimate_constant
+	 will not reject it.  */
+      val = gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_CONST));
+      insn = emit_insn (gen_addsi3 (dest, src, val));
+
+      if (is_frame_related)
+	/* We have to provide our own frame related note here
+	   as the dwarf2out code cannot be expected to grok
+	   our unspec.  */
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (SImode, dest,
+				   gen_rtx_PLUS (SImode, src, val)));
+      return;
+    }
+
+  if (is_frame_related)
+    RTX_FRAME_RELATED_P (insn) = 1;
+  return;
+}
+
+void
+rx_expand_prologue (void)
+{
+  unsigned int stack_size;
+  unsigned int frame_size;
+  unsigned int mask;
+  unsigned int low;
+  unsigned int high;
+  unsigned int reg;
+  rtx insn;
+
+  /* Naked functions use their own, programmer provided prologues.  */
+  if (is_naked_func (NULL_TREE))
+    return;
+
+  rx_get_stack_layout (& low, & high, & mask, & frame_size, & stack_size);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = frame_size + stack_size;
+
+  /* If we use any of the callee-saved registers, save them now.  */
+  if (mask)
+    {
+      /* Push registers in reverse order.  */
+      for (reg = CC_REGNUM; reg --;)
+	if (mask & (1 << reg))
+	  {
+	    insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
+	    mark_frame_related (insn);
+	  }
+    }
+  else if (low)
+    {
+      if (high == low)
+	insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
+      else
+	insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
+						    * UNITS_PER_WORD),
+					   gen_rx_store_vector (low, high)));
+      mark_frame_related (insn);
+    }
+
+  if (MUST_SAVE_ACC_REGISTER)
+    {
+      unsigned int acc_high, acc_low;
+
+      /* Interrupt handlers have to preserve the accumulator
+	 register if so requested by the user.  Use the first
+         two pushed registers as intermediaries.  */
+      if (mask)
+	{
+	  acc_low = acc_high = 0;
+
+	  for (reg = 1; reg < CC_REGNUM; reg ++)
+	    if (mask & (1 << reg))
+	      {
+		if (acc_low == 0)
+		  acc_low = reg;
+		else
+		  {
+		    acc_high = reg;
+		    break;
+		  }
+	      }
+	    
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high != 0);
+
+	  /* Note - the bottom 16 bits of the accumulator are inaccessible.
+	     We just assume that they are zero.  */
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high)));
+	}
+      else
+	{
+	  acc_low = low;
+	  acc_high = low + 1;
+
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high <= high);
+
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD),
+				      gen_rx_store_vector (acc_low, acc_high)));
+	}
+    }
+
+  /* If needed, set up the frame pointer.  */
+  if (frame_pointer_needed)
+    gen_safe_add (frame_pointer_rtx, stack_pointer_rtx,
+		  GEN_INT (- (HOST_WIDE_INT) frame_size), true);
+
+  /* Allocate space for the outgoing args.
+     If the stack frame has not already been set up then handle this as well.  */
+  if (stack_size)
+    {
+      if (frame_size)
+	{
+	  if (frame_pointer_needed)
+	    gen_safe_add (stack_pointer_rtx, frame_pointer_rtx,
+			  GEN_INT (- (HOST_WIDE_INT) stack_size), true);
+	  else
+	    gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+			  GEN_INT (- (HOST_WIDE_INT) (frame_size + stack_size)),
+			  true);
+	}
+      else
+	gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		      GEN_INT (- (HOST_WIDE_INT) stack_size), true);
+    }
+  else if (frame_size)
+    {
+      if (! frame_pointer_needed)
+	gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		      GEN_INT (- (HOST_WIDE_INT) frame_size), true);
+      else
+	gen_safe_add (stack_pointer_rtx, frame_pointer_rtx, NULL_RTX,
+		      true);
+    }
+}
+
+static void
+rx_output_function_prologue (FILE * file,
+			     HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
+{
+  if (is_fast_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n");
+
+  if (is_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Interrupt Handler\n");
+
+  if (is_naked_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Naked Function\n");
+
+  if (cfun->static_chain_decl != NULL)
+    asm_fprintf (file, "\t; Note: Nested function declared "
+		 "inside another function.\n");
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (file, "\t; Note: Calls __builtin_eh_return.\n");
+}
+
+/* Generate a POPM or RTSD instruction that matches the given operands.  */
+
+void
+rx_emit_stack_popm (rtx * operands, bool is_popm)
+{
+  HOST_WIDE_INT stack_adjust;
+  HOST_WIDE_INT last_reg;
+  rtx first_push;
+
+  gcc_assert (CONST_INT_P (operands[0]));
+  stack_adjust = INTVAL (operands[0]);
+  
+  gcc_assert (GET_CODE (operands[1]) == PARALLEL);
+  last_reg = XVECLEN (operands[1], 0) - (is_popm ? 2 : 3);
+
+  first_push = XVECEXP (operands[1], 0, 1);
+  gcc_assert (SET_P (first_push));
+  first_push = SET_DEST (first_push);
+  gcc_assert (REG_P (first_push));
+
+  if (is_popm)
+    asm_fprintf (asm_out_file, "\tpopm\t%s-%s\n",
+		 reg_names [REGNO (first_push)],
+		 reg_names [REGNO (first_push) + last_reg]);
+  else
+    asm_fprintf (asm_out_file, "\trtsd\t#%d, %s-%s\n",
+		 (int) stack_adjust,
+		 reg_names [REGNO (first_push)],
+		 reg_names [REGNO (first_push) + last_reg]);
+}
+
+/* Generate a PARALLEL which will satisfy the rx_rtsd_vector predicate.  */
+
+static rtx
+gen_rx_rtsd_vector (unsigned int adjust, unsigned int low, unsigned int high)
+{
+  unsigned int i;
+  unsigned int bias = 3;
+  unsigned int count = (high - low) + bias;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		 plus_constant (Pmode, stack_pointer_rtx, adjust));
+
+  for (i = 0; i < count - 2; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (VOIDmode,
+		   gen_rtx_REG (SImode, low + i),
+		   gen_rtx_MEM (SImode,
+				i == 0 ? stack_pointer_rtx
+				: plus_constant (Pmode, stack_pointer_rtx,
+						 i * UNITS_PER_WORD)));
+
+  XVECEXP (vector, 0, count - 1) = ret_rtx;
+
+  return vector;
+}
+  
+/* Generate a PARALLEL which will satisfy the rx_load_multiple_vector predicate.  */
+
+static rtx
+gen_rx_popm_vector (unsigned int low, unsigned int high)
+{
+  unsigned int i;  
+  unsigned int count = (high - low) + 2;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		 plus_constant (Pmode, stack_pointer_rtx,
+				(count - 1) * UNITS_PER_WORD));
+
+  for (i = 0; i < count - 1; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (VOIDmode,
+		   gen_rtx_REG (SImode, low + i),
+		   gen_rtx_MEM (SImode,
+				i == 0 ? stack_pointer_rtx
+				: plus_constant (Pmode, stack_pointer_rtx,
+						 i * UNITS_PER_WORD)));
+
+  return vector;
+}
+
+/* Returns true if a simple return insn can be used.  */
+
+bool
+rx_can_use_simple_return (void)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int register_mask;
+
+  if (is_naked_func (NULL_TREE)
+      || is_fast_interrupt_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE))
+    return false;
+
+  rx_get_stack_layout (& low, & high, & register_mask,
+		       & frame_size, & stack_size);
+
+  return (register_mask == 0
+	  && (frame_size + stack_size) == 0
+	  && low == 0);
+}
+
+void
+rx_expand_epilogue (bool is_sibcall)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int register_mask;
+  unsigned int regs_size;
+  unsigned int reg;
+  unsigned HOST_WIDE_INT total_size;
+
+  /* FIXME: We do not support indirect sibcalls at the moment becaause we
+     cannot guarantee that the register holding the function address is a
+     call-used register.  If it is a call-saved register then the stack
+     pop instructions generated in the epilogue will corrupt the address
+     before it is used.
+
+     Creating a new call-used-only register class works but then the
+     reload pass gets stuck because it cannot always find a call-used
+     register for spilling sibcalls.
+
+     The other possible solution is for this pass to scan forward for the
+     sibcall instruction (if it has been generated) and work out if it
+     is an indirect sibcall using a call-saved register.  If it is then
+     the address can copied into a call-used register in this epilogue
+     code and the sibcall instruction modified to use that register.  */
+
+  if (is_naked_func (NULL_TREE))
+    {
+      gcc_assert (! is_sibcall);
+
+      /* Naked functions use their own, programmer provided epilogues.
+	 But, in order to keep gcc happy we have to generate some kind of
+	 epilogue RTL.  */
+      emit_jump_insn (gen_naked_return ());
+      return;
+    }
+
+  rx_get_stack_layout (& low, & high, & register_mask,
+		       & frame_size, & stack_size);
+
+  total_size = frame_size + stack_size;
+  regs_size = ((high - low) + 1) * UNITS_PER_WORD;
+
+  /* See if we are unable to use the special stack frame deconstruct and
+     return instructions.  In most cases we can use them, but the exceptions
+     are:
+
+     - Sibling calling functions deconstruct the frame but do not return to
+       their caller.  Instead they branch to their sibling and allow their
+       return instruction to return to this function's parent.
+
+     - Fast and normal interrupt handling functions have to use special
+       return instructions.
+
+     - Functions where we have pushed a fragmented set of registers into the
+       call-save area must have the same set of registers popped.  */
+  if (is_sibcall
+      || is_fast_interrupt_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE)
+      || register_mask)
+    {
+      /* Cannot use the special instructions - deconstruct by hand.  */
+      if (total_size)
+	gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		      GEN_INT (total_size), false);
+
+      if (MUST_SAVE_ACC_REGISTER)
+	{
+	  unsigned int acc_low, acc_high;
+
+	  /* Reverse the saving of the accumulator register onto the stack.
+	     Note we must adjust the saved "low" accumulator value as it
+	     is really the middle 32-bits of the accumulator.  */
+	  if (register_mask)
+	    {
+	      acc_low = acc_high = 0;
+
+	      for (reg = 1; reg < CC_REGNUM; reg ++)
+		if (register_mask & (1 << reg))
+		  {
+		    if (acc_low == 0)
+		      acc_low = reg;
+		    else
+		      {
+			acc_high = reg;
+			break;
+		      }
+		  }
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high)));
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low)));
+	    }
+	  else
+	    {
+	      acc_low = low;
+	      acc_high = low + 1;
+	      emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD),
+					 gen_rx_popm_vector (acc_low, acc_high)));
+	    }
+
+	  emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low),
+				  gen_rtx_REG (SImode, acc_low),
+				  GEN_INT (16)));
+	  emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high)));
+	}
+
+      if (register_mask)
+	{
+	  for (reg = 0; reg < CC_REGNUM; reg ++)
+	    if (register_mask & (1 << reg))
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
+	}
+      else if (low)
+	{
+	  if (high == low)
+	    emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
+	  else
+	    emit_insn (gen_stack_popm (GEN_INT (regs_size),
+				       gen_rx_popm_vector (low, high)));
+	}
+
+      if (is_fast_interrupt_func (NULL_TREE))
+	{
+	  gcc_assert (! is_sibcall);
+	  emit_jump_insn (gen_fast_interrupt_return ());
+	}
+      else if (is_interrupt_func (NULL_TREE))
+	{
+	  gcc_assert (! is_sibcall);
+	  emit_jump_insn (gen_exception_return ());
+	}
+      else if (! is_sibcall)
+	emit_jump_insn (gen_simple_return ());
+
+      return;
+    }
+
+  /* If we allocated space on the stack, free it now.  */
+  if (total_size)
+    {
+      unsigned HOST_WIDE_INT rtsd_size;
+
+      /* See if we can use the RTSD instruction.  */
+      rtsd_size = total_size + regs_size;
+      if (rtsd_size < 1024 && (rtsd_size % 4) == 0)
+	{
+	  if (low)
+	    emit_jump_insn (gen_pop_and_return
+			    (GEN_INT (rtsd_size),
+			     gen_rx_rtsd_vector (rtsd_size, low, high)));
+	  else
+	    emit_jump_insn (gen_deallocate_and_return (GEN_INT (total_size)));
+
+	  return;
+	}
+
+      gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		    GEN_INT (total_size), false);
+    }
+
+  if (low)
+    emit_jump_insn (gen_pop_and_return (GEN_INT (regs_size),
+					gen_rx_rtsd_vector (regs_size,
+							    low, high)));
+  else
+    emit_jump_insn (gen_simple_return ());
+}
+
+
+/* Compute the offset (in words) between FROM (arg pointer
+   or frame pointer) and TO (frame pointer or stack pointer).
+   See ASCII art comment at the start of rx_expand_prologue
+   for more information.  */
+
+int
+rx_initial_elimination_offset (int from, int to)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int mask;
+
+  rx_get_stack_layout (& low, & high, & mask, & frame_size, & stack_size);
+
+  if (from == ARG_POINTER_REGNUM)
+    {
+      /* Extend the computed size of the stack frame to
+	 include the registers pushed in the prologue.  */
+      if (low)
+	frame_size += ((high - low) + 1) * UNITS_PER_WORD;
+      else
+	frame_size += bit_count (mask) * UNITS_PER_WORD;
+
+      /* Remember to include the return address.  */
+      frame_size += 1 * UNITS_PER_WORD;
+
+      if (to == FRAME_POINTER_REGNUM)
+	return frame_size;
+
+      gcc_assert (to == STACK_POINTER_REGNUM);
+      return frame_size + stack_size;
+    }
+
+  gcc_assert (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM);
+  return stack_size;
+}
+
+/* Decide if a variable should go into one of the small data sections.  */
+
+static bool
+rx_in_small_data (const_tree decl)
+{
+  int size;
+  const_tree section;
+
+  if (rx_small_data_limit == 0)
+    return false;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return false;
+
+  /* We do not put read-only variables into a small data area because
+     they would be placed with the other read-only sections, far away
+     from the read-write data sections, and we only have one small
+     data area pointer.
+     Similarly commons are placed in the .bss section which might be
+     far away (and out of alignment with respect to) the .data section.  */
+  if (TREE_READONLY (decl) || DECL_COMMON (decl))
+    return false;
+
+  section = DECL_SECTION_NAME (decl);
+  if (section)
+    {
+      const char * const name = TREE_STRING_POINTER (section);
+
+      return (strcmp (name, "D_2") == 0) || (strcmp (name, "B_2") == 0);
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+  return (size > 0) && (size <= rx_small_data_limit);
+}
+
+/* Return a section for X.
+   The only special thing we do here is to honor small data.  */
+
+static section *
+rx_select_rtx_section (enum machine_mode mode,
+		       rtx x,
+		       unsigned HOST_WIDE_INT align)
+{
+  if (rx_small_data_limit > 0
+      && GET_MODE_SIZE (mode) <= rx_small_data_limit
+      && align <= (unsigned HOST_WIDE_INT) rx_small_data_limit * BITS_PER_UNIT)
+    return sdata_section;
+
+  return default_elf_select_rtx_section (mode, x, align);
+}
+
+static section *
+rx_select_section (tree decl,
+		   int reloc,
+		   unsigned HOST_WIDE_INT align)
+{
+  if (rx_small_data_limit > 0)
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_SDATA:	return sdata_section;
+	case SECCAT_SBSS:	return sbss_section;
+	case SECCAT_SRODATA:
+	  /* Fall through.  We do not put small, read only
+	     data into the C_2 section because we are not
+	     using the C_2 section.  We do not use the C_2
+	     section because it is located with the other
+	     read-only data sections, far away from the read-write
+	     data sections and we only have one small data
+	     pointer (r13).  */
+	default:
+	  break;
+	}
+    }
+
+  /* If we are supporting the Renesas assembler
+     we cannot use mergeable sections.  */
+  if (TARGET_AS100_SYNTAX)
+    switch (categorize_decl_for_section (decl, reloc))
+      {
+      case SECCAT_RODATA_MERGE_CONST:
+      case SECCAT_RODATA_MERGE_STR_INIT:
+      case SECCAT_RODATA_MERGE_STR:
+	return readonly_data_section;
+
+      default:
+	break;
+      }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+enum rx_builtin
+{
+  RX_BUILTIN_BRK,
+  RX_BUILTIN_CLRPSW,
+  RX_BUILTIN_INT,
+  RX_BUILTIN_MACHI,
+  RX_BUILTIN_MACLO,
+  RX_BUILTIN_MULHI,
+  RX_BUILTIN_MULLO,
+  RX_BUILTIN_MVFACHI,
+  RX_BUILTIN_MVFACMI,
+  RX_BUILTIN_MVFC,
+  RX_BUILTIN_MVTACHI,
+  RX_BUILTIN_MVTACLO,
+  RX_BUILTIN_MVTC,
+  RX_BUILTIN_MVTIPL,
+  RX_BUILTIN_RACW,
+  RX_BUILTIN_REVW,
+  RX_BUILTIN_RMPA,
+  RX_BUILTIN_ROUND,
+  RX_BUILTIN_SETPSW,
+  RX_BUILTIN_WAIT,
+  RX_BUILTIN_max
+};
+
+static GTY(()) tree rx_builtins[(int) RX_BUILTIN_max];
+
+static void
+rx_init_builtins (void)
+{
+#define ADD_RX_BUILTIN0(UC_NAME, LC_NAME, RET_TYPE)		\
+   rx_builtins[RX_BUILTIN_##UC_NAME] =					\
+   add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN1(UC_NAME, LC_NAME, RET_TYPE, ARG_TYPE)		\
+   rx_builtins[RX_BUILTIN_##UC_NAME] =					\
+   add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE##_type_node, \
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN2(UC_NAME, LC_NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \
+  rx_builtins[RX_BUILTIN_##UC_NAME] =					\
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN3(UC_NAME,LC_NAME,RET_TYPE,ARG_TYPE1,ARG_TYPE2,ARG_TYPE3) \
+  rx_builtins[RX_BUILTIN_##UC_NAME] =					\
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  ARG_TYPE3##_type_node,\
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+  ADD_RX_BUILTIN0 (BRK,     "brk",     void);
+  ADD_RX_BUILTIN1 (CLRPSW,  "clrpsw",  void,  integer);
+  ADD_RX_BUILTIN1 (SETPSW,  "setpsw",  void,  integer);
+  ADD_RX_BUILTIN1 (INT,     "int",     void,  integer);
+  ADD_RX_BUILTIN2 (MACHI,   "machi",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MACLO,   "maclo",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MULHI,   "mulhi",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MULLO,   "mullo",   void,  intSI, intSI);
+  ADD_RX_BUILTIN0 (MVFACHI, "mvfachi", intSI);
+  ADD_RX_BUILTIN0 (MVFACMI, "mvfacmi", intSI);
+  ADD_RX_BUILTIN1 (MVTACHI, "mvtachi", void,  intSI);
+  ADD_RX_BUILTIN1 (MVTACLO, "mvtaclo", void,  intSI);
+  ADD_RX_BUILTIN0 (RMPA,    "rmpa",    void);
+  ADD_RX_BUILTIN1 (MVFC,    "mvfc",    intSI, integer);
+  ADD_RX_BUILTIN2 (MVTC,    "mvtc",    void,  integer, integer);
+  ADD_RX_BUILTIN1 (MVTIPL,  "mvtipl",  void,  integer);
+  ADD_RX_BUILTIN1 (RACW,    "racw",    void,  integer);
+  ADD_RX_BUILTIN1 (ROUND,   "round",   intSI, float);
+  ADD_RX_BUILTIN1 (REVW,    "revw",    intSI, intSI);
+  ADD_RX_BUILTIN0 (WAIT,    "wait",    void);
+}
+
+/* Return the RX builtin for CODE.  */
+
+static tree
+rx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= RX_BUILTIN_max)
+    return error_mark_node;
+
+  return rx_builtins[code];
+}
+
+static rtx
+rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg)
+{
+  if (reg && ! REG_P (arg))
+    arg = force_reg (SImode, arg);
+
+  emit_insn (gen_func (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mvtc (tree exp)
+{
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+  if (! CONST_INT_P (arg1))
+    return NULL_RTX;
+
+  if (! REG_P (arg2))
+    arg2 = force_reg (SImode, arg2);
+
+  emit_insn (gen_mvtc (arg1, arg2));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mvfc (tree t_arg, rtx target)
+{
+  rtx arg = expand_normal (t_arg);
+
+  if (! CONST_INT_P (arg))
+    return NULL_RTX;
+
+  if (target == NULL_RTX)
+    return NULL_RTX;
+
+  if (! REG_P (target))
+    target = force_reg (SImode, target);
+
+  emit_insn (gen_mvfc (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin_mvtipl (rtx arg)
+{
+  /* The RX610 does not support the MVTIPL instruction.  */
+  if (rx_cpu_type == RX610)
+    return NULL_RTX;
+
+  if (! CONST_INT_P (arg) || ! IN_RANGE (INTVAL (arg), 0, (1 << 4) - 1))
+    return NULL_RTX;
+
+  emit_insn (gen_mvtipl (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx))
+{
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+  if (! REG_P (arg1))
+    arg1 = force_reg (SImode, arg1);
+
+  if (! REG_P (arg2))
+    arg2 = force_reg (SImode, arg2);
+
+  emit_insn (gen_func (arg1, arg2));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_int_builtin_1_arg (rtx arg,
+			     rtx target,
+			     rtx (* gen_func)(rtx, rtx),
+			     bool mem_ok)
+{
+  if (! REG_P (arg))
+    if (!mem_ok || ! MEM_P (arg))
+      arg = force_reg (SImode, arg);
+
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_func (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_int_builtin_0_arg (rtx target, rtx (* gen_func)(rtx))
+{
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_func (target));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin_round (rtx arg, rtx target)
+{
+  if ((! REG_P (arg) && ! MEM_P (arg))
+      || GET_MODE (arg) != SFmode)
+    arg = force_reg (SFmode, arg);
+
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_lrintsf2 (target, arg));
+
+  return target;
+}
+
+static int
+valid_psw_flag (rtx op, const char *which)
+{
+  static int mvtc_inform_done = 0;
+
+  if (GET_CODE (op) == CONST_INT)
+    switch (INTVAL (op))
+      {
+      case 0: case 'c': case 'C':
+      case 1: case 'z': case 'Z':
+      case 2: case 's': case 'S':
+      case 3: case 'o': case 'O':
+      case 8: case 'i': case 'I':
+      case 9: case 'u': case 'U':
+	return 1;
+      }
+
+  error ("__builtin_rx_%s takes 'C', 'Z', 'S', 'O', 'I', or 'U'", which);
+  if (!mvtc_inform_done)
+    error ("use __builtin_rx_mvtc (0, ... ) to write arbitrary values to PSW");
+  mvtc_inform_done = 1;
+
+  return 0;
+}
+
+static rtx
+rx_expand_builtin (tree exp,
+		   rtx target,
+		   rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg    = call_expr_nargs (exp) >= 1 ? CALL_EXPR_ARG (exp, 0) : NULL_TREE;
+  rtx  op     = arg ? expand_normal (arg) : NULL_RTX;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case RX_BUILTIN_BRK:     emit_insn (gen_brk ()); return NULL_RTX;
+    case RX_BUILTIN_CLRPSW:
+      if (!valid_psw_flag (op, "clrpsw"))
+	return NULL_RTX;
+      return rx_expand_void_builtin_1_arg (op, gen_clrpsw, false);
+    case RX_BUILTIN_SETPSW:
+      if (!valid_psw_flag (op, "setpsw"))
+	return NULL_RTX;
+      return rx_expand_void_builtin_1_arg (op, gen_setpsw, false);
+    case RX_BUILTIN_INT:     return rx_expand_void_builtin_1_arg
+	(op, gen_int, false);
+    case RX_BUILTIN_MACHI:   return rx_expand_builtin_mac (exp, gen_machi);
+    case RX_BUILTIN_MACLO:   return rx_expand_builtin_mac (exp, gen_maclo);
+    case RX_BUILTIN_MULHI:   return rx_expand_builtin_mac (exp, gen_mulhi);
+    case RX_BUILTIN_MULLO:   return rx_expand_builtin_mac (exp, gen_mullo);
+    case RX_BUILTIN_MVFACHI: return rx_expand_int_builtin_0_arg
+	(target, gen_mvfachi);
+    case RX_BUILTIN_MVFACMI: return rx_expand_int_builtin_0_arg
+	(target, gen_mvfacmi);
+    case RX_BUILTIN_MVTACHI: return rx_expand_void_builtin_1_arg
+	(op, gen_mvtachi, true);
+    case RX_BUILTIN_MVTACLO: return rx_expand_void_builtin_1_arg
+	(op, gen_mvtaclo, true);
+    case RX_BUILTIN_RMPA:    emit_insn (gen_rmpa ()); return NULL_RTX;
+    case RX_BUILTIN_MVFC:    return rx_expand_builtin_mvfc (arg, target);
+    case RX_BUILTIN_MVTC:    return rx_expand_builtin_mvtc (exp);
+    case RX_BUILTIN_MVTIPL:  return rx_expand_builtin_mvtipl (op);
+    case RX_BUILTIN_RACW:    return rx_expand_void_builtin_1_arg
+	(op, gen_racw, false);
+    case RX_BUILTIN_ROUND:   return rx_expand_builtin_round (op, target);
+    case RX_BUILTIN_REVW:    return rx_expand_int_builtin_1_arg
+	(op, target, gen_revw, false);
+    case RX_BUILTIN_WAIT:    emit_insn (gen_wait ()); return NULL_RTX;
+
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+/* Place an element into a constructor or destructor section.
+   Like default_ctor_section_asm_out_constructor in varasm.c
+   except that it uses .init_array (or .fini_array) and it
+   handles constructor priorities.  */
+
+static void
+rx_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
+{
+  section * s;
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      char buf[18];
+
+      sprintf (buf, "%s.%.5u",
+	       is_ctor ? ".init_array" : ".fini_array",
+	       priority);
+      s = get_section (buf, SECTION_WRITE, NULL_TREE);
+    }
+  else if (is_ctor)
+    s = ctors_section;
+  else
+    s = dtors_section;
+
+  switch_to_section (s);
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void
+rx_elf_asm_constructor (rtx symbol, int priority)
+{
+  rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */true);
+}
+
+static void
+rx_elf_asm_destructor (rtx symbol, int priority)
+{
+  rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false);
+}
+
+/* Check "fast_interrupt", "interrupt" and "naked" attributes.  */
+
+static tree
+rx_handle_func_attribute (tree * node,
+			  tree   name,
+			  tree   args,
+			  int    flags ATTRIBUTE_UNUSED,
+			  bool * no_add_attrs)
+{
+  gcc_assert (DECL_P (* node));
+  gcc_assert (args == NULL_TREE);
+
+  if (TREE_CODE (* node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      * no_add_attrs = true;
+    }
+
+  /* FIXME: We ought to check for conflicting attributes.  */
+
+  /* FIXME: We ought to check that the interrupt and exception
+     handler attributes have been applied to void functions.  */
+  return NULL_TREE;
+}
+
+/* Table of RX specific attributes.  */
+const struct attribute_spec rx_attribute_table[] =
+{
+  /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+     affects_type_identity.  */
+  { "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute,
+    false },
+  { "interrupt",      0, 0, true, false, false, rx_handle_func_attribute,
+    false },
+  { "naked",          0, 0, true, false, false, rx_handle_func_attribute,
+    false },
+  { NULL,             0, 0, false, false, false, NULL, false }
+};
+
+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
+
+static void
+rx_override_options_after_change (void)
+{
+  static bool first_time = TRUE;
+
+  if (first_time)
+    {
+      /* If this is the first time through and the user has not disabled
+	 the use of RX FPU hardware then enable -ffinite-math-only,
+	 since the FPU instructions do not support NaNs and infinities.  */
+      if (TARGET_USE_FPU)
+	flag_finite_math_only = 1;
+
+      first_time = FALSE;
+    }
+  else
+    {
+      /* Alert the user if they are changing the optimization options
+	 to use IEEE compliant floating point arithmetic with RX FPU insns.  */
+      if (TARGET_USE_FPU
+	  && !flag_finite_math_only)
+	warning (0, "RX FPU instructions do not support NaNs and infinities");
+    }
+}
+
+static void
+rx_option_override (void)
+{
+  unsigned int i;
+  cl_deferred_option *opt;
+  vec<cl_deferred_option> *v = (vec<cl_deferred_option> *) rx_deferred_options;
+
+  if (v)
+    FOR_EACH_VEC_ELT (*v, i, opt)
+      {
+	switch (opt->opt_index)
+	  {
+	  case OPT_mint_register_:
+	    switch (opt->value)
+	      {
+	      case 4:
+		fixed_regs[10] = call_used_regs [10] = 1;
+		/* Fall through.  */
+	      case 3:
+		fixed_regs[11] = call_used_regs [11] = 1;
+		/* Fall through.  */
+	      case 2:
+		fixed_regs[12] = call_used_regs [12] = 1;
+		/* Fall through.  */
+	      case 1:
+		fixed_regs[13] = call_used_regs [13] = 1;
+		/* Fall through.  */
+	      case 0:
+		rx_num_interrupt_regs = opt->value;
+		break;
+	      default:
+		rx_num_interrupt_regs = 0;
+		/* Error message already given because rx_handle_option
+		  returned false.  */
+		break;
+	      }
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  rx_override_options_after_change ();
+
+  if (align_jumps == 0 && ! optimize_size)
+    align_jumps = 3;
+  if (align_loops == 0 && ! optimize_size)
+    align_loops = 3;
+  if (align_labels == 0 && ! optimize_size)
+    align_labels = 3;
+}
+
+
+static bool
+rx_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return ! is_naked_func (NULL_TREE);
+}
+
+static bool
+rx_func_attr_inlinable (const_tree decl)
+{
+  return ! is_fast_interrupt_func (decl)
+    &&   ! is_interrupt_func (decl)
+    &&   ! is_naked_func (decl);  
+}
+
+static bool
+rx_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return !is_naked_func (decl);
+}
+
+/* Return nonzero if it is ok to make a tail-call to DECL,
+   a function_decl or NULL if this is an indirect call, using EXP  */
+
+static bool
+rx_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Do not allow indirect tailcalls.  The
+     sibcall patterns do not support them.  */
+  if (decl == NULL)
+    return false;
+
+  /* Never tailcall from inside interrupt handlers or naked functions.  */
+  if (is_fast_interrupt_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE)
+      || is_naked_func (NULL_TREE))
+    return false;
+
+  return true;
+}
+
+static void
+rx_file_start (void)
+{
+  if (! TARGET_AS100_SYNTAX)
+    default_file_start ();
+}
+
+static bool
+rx_is_ms_bitfield_layout (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  /* The packed attribute overrides the MS behaviour.  */
+  return ! TYPE_PACKED (record_type);
+}
+
+/* Returns true if X a legitimate constant for an immediate
+   operand on the RX.  X is already known to satisfy CONSTANT_P.  */
+
+bool
+rx_is_legitimate_constant (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (! CONST_INT_P (XEXP (x, 1)))
+	    return false;
+
+	  /* GCC would not pass us CONST_INT + CONST_INT so we
+	     know that we have {SYMBOL|LABEL} + CONST_INT.  */
+	  x = XEXP (x, 0);
+	  gcc_assert (! CONST_INT_P (x));
+	}
+
+      switch (GET_CODE (x))
+	{
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  return true;
+
+	case UNSPEC:
+	  return XINT (x, 1) == UNSPEC_CONST || XINT (x, 1) == UNSPEC_PID_ADDR;
+
+	default:
+	  /* FIXME: Can this ever happen ?  */
+	  gcc_unreachable ();
+	}
+      break;
+      
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return true;
+    case CONST_DOUBLE:
+      return (rx_max_constant_size == 0 || rx_max_constant_size == 4);
+    case CONST_VECTOR:
+      return false;
+    default:
+      gcc_assert (CONST_INT_P (x));
+      break;
+    }
+
+  return ok_for_max_constant (INTVAL (x));
+}
+
+static int
+rx_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+		 addr_space_t as ATTRIBUTE_UNUSED, bool speed)
+{
+  rtx a, b;
+
+  if (GET_CODE (addr) != PLUS)
+    return COSTS_N_INSNS (1);
+
+  a = XEXP (addr, 0);
+  b = XEXP (addr, 1);
+
+  if (REG_P (a) && REG_P (b))
+    /* Try to discourage REG+REG addressing as it keeps two registers live.  */
+    return COSTS_N_INSNS (4);
+
+  if (speed)
+    /* [REG+OFF] is just as fast as [REG].  */
+    return COSTS_N_INSNS (1);
+
+  if (CONST_INT_P (b)
+      && ((INTVAL (b) > 128) || INTVAL (b) < -127))
+    /* Try to discourage REG + <large OFF> when optimizing for size.  */
+    return COSTS_N_INSNS (2);
+    
+  return COSTS_N_INSNS (1);
+}
+
+static bool
+rx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  /* We can always eliminate to the frame pointer.
+     We can eliminate to the stack pointer unless a frame
+     pointer is needed.  */
+
+  return to == FRAME_POINTER_REGNUM
+    || ( to == STACK_POINTER_REGNUM && ! frame_pointer_needed);
+}
+
+
+static void
+rx_trampoline_template (FILE * file)
+{
+  /* Output assembler code for a block containing the constant
+     part of a trampoline, leaving space for the variable parts.
+
+     On the RX, (where r8 is the static chain regnum) the trampoline
+     looks like:
+
+	   mov 		#<static chain value>, r8
+	   mov          #<function's address>, r9
+	   jmp		r9
+
+     In big-endian-data-mode however instructions are read into the CPU
+     4 bytes at a time.  These bytes are then swapped around before being
+     passed to the decoder.  So...we must partition our trampoline into
+     4 byte packets and swap these packets around so that the instruction
+     reader will reverse the process.  But, in order to avoid splitting
+     the 32-bit constants across these packet boundaries, (making inserting
+     them into the constructed trampoline very difficult) we have to pad the
+     instruction sequence with NOP insns.  ie:
+
+           nop
+	   nop
+           mov.l	#<...>, r8
+	   nop
+	   nop
+           mov.l	#<...>, r9
+           jmp		r9
+	   nop
+	   nop             */
+
+  if (! TARGET_BIG_ENDIAN_DATA)
+    {
+      asm_fprintf (file, "\tmov.L\t#0deadbeefH, r%d\n", STATIC_CHAIN_REGNUM);
+      asm_fprintf (file, "\tmov.L\t#0deadbeefH, r%d\n", TRAMPOLINE_TEMP_REGNUM);
+      asm_fprintf (file, "\tjmp\tr%d\n",                TRAMPOLINE_TEMP_REGNUM);
+    }
+  else
+    {
+      char r8 = '0' + STATIC_CHAIN_REGNUM;
+      char r9 = '0' + TRAMPOLINE_TEMP_REGNUM;
+
+      if (TARGET_AS100_SYNTAX)
+        {
+          asm_fprintf (file, "\t.BYTE 0%c2H, 0fbH, 003H,  003H\n", r8);
+          asm_fprintf (file, "\t.BYTE 0deH,  0adH, 0beH,  0efH\n");
+          asm_fprintf (file, "\t.BYTE 0%c2H, 0fbH, 003H,  003H\n", r9);
+          asm_fprintf (file, "\t.BYTE 0deH,  0adH, 0beH,  0efH\n");
+          asm_fprintf (file, "\t.BYTE 003H,  003H, 00%cH, 07fH\n", r9);
+        }
+      else
+        {
+          asm_fprintf (file, "\t.byte 0x%c2, 0xfb, 0x03,  0x03\n", r8);
+          asm_fprintf (file, "\t.byte 0xde,  0xad, 0xbe,  0xef\n");
+          asm_fprintf (file, "\t.byte 0x%c2, 0xfb, 0x03,  0x03\n", r9);
+          asm_fprintf (file, "\t.byte 0xde,  0xad, 0xbe,  0xef\n");
+          asm_fprintf (file, "\t.byte 0x03,  0x03, 0x0%c, 0x7f\n", r9);
+        }
+    }
+}
+
+static void
+rx_trampoline_init (rtx tramp, tree fndecl, rtx chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_BIG_ENDIAN_DATA)
+    {
+      emit_move_insn (adjust_address (tramp, SImode, 4), chain);
+      emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
+    }
+  else
+    {
+      emit_move_insn (adjust_address (tramp, SImode, 2), chain);
+      emit_move_insn (adjust_address (tramp, SImode, 6 + 2), fnaddr);
+    }
+}
+
+static int
+rx_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		     reg_class_t regclass ATTRIBUTE_UNUSED,
+		     bool in)
+{
+  return (in ? 2 : 0) + REGISTER_MOVE_COST (mode, regclass, regclass);
+}
+
+/* Convert a CC_MODE to the set of flags that it represents.  */
+
+static unsigned int
+flags_from_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case CC_ZSmode:
+      return CC_FLAG_S | CC_FLAG_Z;
+    case CC_ZSOmode:
+      return CC_FLAG_S | CC_FLAG_Z | CC_FLAG_O;
+    case CC_ZSCmode:
+      return CC_FLAG_S | CC_FLAG_Z | CC_FLAG_C;
+    case CCmode:
+      return CC_FLAG_S | CC_FLAG_Z | CC_FLAG_O | CC_FLAG_C;
+    case CC_Fmode:
+      return CC_FLAG_FP;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Convert a set of flags to a CC_MODE that can implement it.  */
+
+static enum machine_mode
+mode_from_flags (unsigned int f)
+{
+  if (f & CC_FLAG_FP)
+    return CC_Fmode;
+  if (f & CC_FLAG_O)
+    {
+      if (f & CC_FLAG_C)
+	return CCmode;
+      else
+	return CC_ZSOmode;
+    }
+  else if (f & CC_FLAG_C)
+    return CC_ZSCmode;
+  else
+    return CC_ZSmode;
+}
+
+/* Convert an RTX_CODE to the set of flags needed to implement it.
+   This assumes an integer comparison.  */
+
+static unsigned int
+flags_from_code (enum rtx_code code)
+{
+  switch (code)
+    {
+    case LT:
+    case GE:
+      return CC_FLAG_S;
+    case GT:
+    case LE:
+      return CC_FLAG_S | CC_FLAG_O | CC_FLAG_Z;
+    case GEU:
+    case LTU:
+      return CC_FLAG_C;
+    case GTU:
+    case LEU:
+      return CC_FLAG_C | CC_FLAG_Z;
+    case EQ:
+    case NE:
+      return CC_FLAG_Z;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return a CC_MODE of which both M1 and M2 are subsets.  */
+
+static enum machine_mode
+rx_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+  unsigned f;
+
+  /* Early out for identical modes.  */
+  if (m1 == m2)
+    return m1;
+
+  /* There's no valid combination for FP vs non-FP.  */
+  f = flags_from_mode (m1) | flags_from_mode (m2);
+  if (f & CC_FLAG_FP)
+    return VOIDmode;
+
+  /* Otherwise, see what mode can implement all the flags.  */
+  return mode_from_flags (f);
+}
+
+/* Return the minimal CC mode needed to implement (CMP_CODE X Y).  */
+
+enum machine_mode
+rx_select_cc_mode (enum rtx_code cmp_code, rtx x, rtx y)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    return CC_Fmode;
+
+  if (y != const0_rtx)
+    return CCmode;
+
+  return mode_from_flags (flags_from_code (cmp_code));
+}
+
+/* Split the conditional branch.  Emit (COMPARE C1 C2) into CC_REG with
+   CC_MODE, and use that in branches based on that compare.  */
+
+void
+rx_split_cbranch (enum machine_mode cc_mode, enum rtx_code cmp1,
+		  rtx c1, rtx c2, rtx label)
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (cc_mode, CC_REG);
+  x = gen_rtx_COMPARE (cc_mode, c1, c2);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (cmp1, VOIDmode, flags, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx);
+  x = gen_rtx_SET (VOIDmode, pc_rtx, x);
+  emit_jump_insn (x);
+}
+
+/* A helper function for matching parallels that set the flags.  */
+
+bool
+rx_match_ccmode (rtx insn, enum machine_mode cc_mode)
+{
+  rtx op1, flags;
+  enum machine_mode flags_mode;
+
+  gcc_checking_assert (XVECLEN (PATTERN (insn), 0) == 2);
+
+  op1 = XVECEXP (PATTERN (insn), 0, 1);
+  gcc_checking_assert (GET_CODE (SET_SRC (op1)) == COMPARE);
+
+  flags = SET_DEST (op1);
+  flags_mode = GET_MODE (flags);
+
+  if (GET_MODE (SET_SRC (op1)) != flags_mode)
+    return false;
+  if (GET_MODE_CLASS (flags_mode) != MODE_CC)
+    return false;
+
+  /* Ensure that the mode of FLAGS is compatible with CC_MODE.  */
+  if (flags_from_mode (flags_mode) & ~flags_from_mode (cc_mode))
+    return false;
+
+  return true;
+}
+
+int
+rx_align_for_label (rtx lab, int uses_threshold)
+{
+  /* This is a simple heuristic to guess when an alignment would not be useful
+     because the delay due to the inserted NOPs would be greater than the delay
+     due to the misaligned branch.  If uses_threshold is zero then the alignment
+     is always useful.  */
+  if (LABEL_P (lab) && LABEL_NUSES (lab) < uses_threshold)
+    return 0;
+
+  return optimize_size ? 1 : 3;
+}
+
+static int
+rx_max_skip_for_label (rtx lab)
+{
+  int opsize;
+  rtx op;
+
+  if (lab == NULL_RTX)
+    return 0;
+
+  op = lab;
+  do
+    {
+      op = next_nonnote_nondebug_insn (op);
+    }
+  while (op && (LABEL_P (op)
+		|| (INSN_P (op) && GET_CODE (PATTERN (op)) == USE)));
+  if (!op)
+    return 0;
+
+  opsize = get_attr_length (op);
+  if (opsize >= 0 && opsize < 8)
+    return opsize - 1;
+  return 0;
+}
+
+/* Compute the real length of the extending load-and-op instructions.  */
+
+int
+rx_adjust_insn_length (rtx insn, int current_length)
+{
+  rtx extend, mem, offset;
+  bool zero;
+  int factor;
+
+  switch (INSN_CODE (insn))
+    {
+    default:
+      return current_length;
+
+    case CODE_FOR_plussi3_zero_extendhi:
+    case CODE_FOR_andsi3_zero_extendhi:
+    case CODE_FOR_iorsi3_zero_extendhi:
+    case CODE_FOR_xorsi3_zero_extendhi:
+    case CODE_FOR_divsi3_zero_extendhi:
+    case CODE_FOR_udivsi3_zero_extendhi:
+    case CODE_FOR_minussi3_zero_extendhi:
+    case CODE_FOR_smaxsi3_zero_extendhi:
+    case CODE_FOR_sminsi3_zero_extendhi:
+    case CODE_FOR_multsi3_zero_extendhi:
+    case CODE_FOR_comparesi3_zero_extendhi:
+      zero = true;
+      factor = 2;
+      break;
+
+    case CODE_FOR_plussi3_sign_extendhi:
+    case CODE_FOR_andsi3_sign_extendhi:
+    case CODE_FOR_iorsi3_sign_extendhi:
+    case CODE_FOR_xorsi3_sign_extendhi:
+    case CODE_FOR_divsi3_sign_extendhi:
+    case CODE_FOR_udivsi3_sign_extendhi:
+    case CODE_FOR_minussi3_sign_extendhi:
+    case CODE_FOR_smaxsi3_sign_extendhi:
+    case CODE_FOR_sminsi3_sign_extendhi:
+    case CODE_FOR_multsi3_sign_extendhi:
+    case CODE_FOR_comparesi3_sign_extendhi:
+      zero = false;
+      factor = 2;
+      break;
+      
+    case CODE_FOR_plussi3_zero_extendqi:
+    case CODE_FOR_andsi3_zero_extendqi:
+    case CODE_FOR_iorsi3_zero_extendqi:
+    case CODE_FOR_xorsi3_zero_extendqi:
+    case CODE_FOR_divsi3_zero_extendqi:
+    case CODE_FOR_udivsi3_zero_extendqi:
+    case CODE_FOR_minussi3_zero_extendqi:
+    case CODE_FOR_smaxsi3_zero_extendqi:
+    case CODE_FOR_sminsi3_zero_extendqi:
+    case CODE_FOR_multsi3_zero_extendqi:
+    case CODE_FOR_comparesi3_zero_extendqi:
+      zero = true;
+      factor = 1;
+      break;
+      
+    case CODE_FOR_plussi3_sign_extendqi:
+    case CODE_FOR_andsi3_sign_extendqi:
+    case CODE_FOR_iorsi3_sign_extendqi:
+    case CODE_FOR_xorsi3_sign_extendqi:
+    case CODE_FOR_divsi3_sign_extendqi:
+    case CODE_FOR_udivsi3_sign_extendqi:
+    case CODE_FOR_minussi3_sign_extendqi:
+    case CODE_FOR_smaxsi3_sign_extendqi:
+    case CODE_FOR_sminsi3_sign_extendqi:
+    case CODE_FOR_multsi3_sign_extendqi:
+    case CODE_FOR_comparesi3_sign_extendqi:
+      zero = false;
+      factor = 1;
+      break;
+    }      
+
+  /* We are expecting: (SET (REG) (<OP> (REG) (<EXTEND> (MEM)))).  */
+  extend = single_set (insn);
+  gcc_assert (extend != NULL_RTX);
+
+  extend = SET_SRC (extend);
+  if (GET_CODE (XEXP (extend, 0)) == ZERO_EXTEND
+      || GET_CODE (XEXP (extend, 0)) == SIGN_EXTEND)
+    extend = XEXP (extend, 0);
+  else
+    extend = XEXP (extend, 1);
+
+  gcc_assert ((zero && (GET_CODE (extend) == ZERO_EXTEND))
+	      || (! zero && (GET_CODE (extend) == SIGN_EXTEND)));
+    
+  mem = XEXP (extend, 0);
+  gcc_checking_assert (MEM_P (mem));
+  if (REG_P (XEXP (mem, 0)))
+    return (zero && factor == 1) ? 2 : 3;
+
+  /* We are expecting: (MEM (PLUS (REG) (CONST_INT))).  */
+  gcc_checking_assert (GET_CODE (XEXP (mem, 0)) == PLUS);
+  gcc_checking_assert (REG_P (XEXP (XEXP (mem, 0), 0)));
+
+  offset = XEXP (XEXP (mem, 0), 1);
+  gcc_checking_assert (GET_CODE (offset) == CONST_INT);
+
+  if (IN_RANGE (INTVAL (offset), 0, 255 * factor))
+    return (zero && factor == 1) ? 3 : 4;
+
+  return (zero && factor == 1) ? 4 : 5;
+}
+
+static bool
+rx_narrow_volatile_bitfield (void)
+{
+  return true;
+}
+
+static bool
+rx_ok_to_inline (tree caller, tree callee)
+{
+  /* Do not inline functions with local variables
+     into a naked CALLER - naked function have no stack frame and
+     locals need a frame in order to have somewhere to live.
+
+     Unfortunately we have no way to determine the presence of
+     local variables in CALLEE, so we have to be cautious and
+     assume that there might be some there.
+
+     We do allow inlining when CALLEE has the "inline" type
+     modifier or the "always_inline" or "gnu_inline" attributes.  */
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (caller)) == NULL_TREE
+    || DECL_DECLARED_INLINE_P (callee)
+    || lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)) != NULL_TREE
+    || lookup_attribute ("gnu_inline", DECL_ATTRIBUTES (callee)) != NULL_TREE;
+}
+
+static bool
+rx_enable_lra (void)
+{
+  return TARGET_ENABLE_LRA;
+}
+
+
+#undef  TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD		rx_narrow_volatile_bitfield
+
+#undef  TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P			rx_ok_to_inline
+
+#undef  TARGET_ASM_JUMP_ALIGN_MAX_SKIP
+#define TARGET_ASM_JUMP_ALIGN_MAX_SKIP			rx_max_skip_for_label
+#undef  TARGET_ASM_LOOP_ALIGN_MAX_SKIP
+#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP			rx_max_skip_for_label
+#undef  TARGET_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
+#define TARGET_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP	rx_max_skip_for_label
+#undef  TARGET_ASM_LABEL_ALIGN_MAX_SKIP
+#define TARGET_ASM_LABEL_ALIGN_MAX_SKIP			rx_max_skip_for_label
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE		rx_function_value
+
+#undef  TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB		rx_return_in_msb
+
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P		rx_in_small_data
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		rx_return_in_memory
+
+#undef  TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION	true
+
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION	rx_select_rtx_section
+
+#undef	TARGET_ASM_SELECT_SECTION
+#define	TARGET_ASM_SELECT_SECTION	rx_select_section
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS		rx_init_builtins
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL		rx_builtin_decl
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN		rx_expand_builtin
+
+#undef  TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR		rx_elf_asm_constructor
+
+#undef  TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR		rx_elf_asm_destructor
+
+#undef  TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX		rx_struct_value_rtx
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		rx_attribute_table
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START			rx_file_start
+
+#undef  TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P		rx_is_ms_bitfield_layout
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P		rx_is_legitimate_address
+
+#undef  TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P		rx_mode_dependent_address_p
+
+#undef  TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS	rx_allocate_stack_slots_for_args
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE 		rx_output_function_prologue
+
+#undef  TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P 	rx_func_attr_inlinable
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL		rx_function_ok_for_sibcall
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG     		rx_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     	rx_function_arg_advance
+
+#undef	TARGET_FUNCTION_ARG_BOUNDARY
+#define	TARGET_FUNCTION_ARG_BOUNDARY		rx_function_arg_boundary
+
+#undef  TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION		rx_set_current_function
+
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER			rx_assemble_integer
+
+#undef  TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P	hook_bool_mode_const_rtx_true
+
+#undef  TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET		32
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST			rx_address_cost
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE			rx_can_eliminate
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE	rx_conditional_register_usage
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE		rx_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT			rx_trampoline_init
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND			rx_print_operand
+
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS		rx_print_operand_address
+
+#undef  TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE		rx_cc_modes_compatible
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST			rx_memory_move_cost
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE			rx_option_override
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE		rx_promote_function_mode
+
+#undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE	rx_override_options_after_change
+
+#undef  TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM			CC_REG
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P		rx_is_legitimate_constant
+
+#undef  TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS		rx_legitimize_address
+
+#undef  TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN 		rx_warn_func_return
+
+#undef  TARGET_LRA_P
+#define TARGET_LRA_P 				rx_enable_lra
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-rx.h"
diff --git a/gcc-4.9/gcc/config/rx/rx.h b/gcc-4.9/gcc/config/rx/rx.h
new file mode 100644
index 000000000..d99b19ad2
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx.h
@@ -0,0 +1,665 @@
+/* GCC backend definitions for the Renesas RX processor.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("__RX__"); 		\
+      builtin_assert ("cpu=RX"); 		\
+      if (rx_cpu_type == RX610)			\
+	{					\
+          builtin_define ("__RX610__");		\
+          builtin_assert ("machine=RX610");	\
+	}					\
+      else if (rx_cpu_type == RX100)		\
+	{					\
+          builtin_define ("__RX100__");		\
+          builtin_assert ("machine=RX100");	\
+	}					\
+      else if (rx_cpu_type == RX200)		\
+	{					\
+          builtin_define ("__RX200__");		\
+          builtin_assert ("machine=RX200");	\
+        }					\
+      else if (rx_cpu_type == RX600)		\
+        {					\
+          builtin_define ("__RX600__");		\
+          builtin_assert ("machine=RX600");	\
+        }					\
+						\
+      if (TARGET_BIG_ENDIAN_DATA)		\
+	builtin_define ("__RX_BIG_ENDIAN__");	\
+      else					\
+	builtin_define ("__RX_LITTLE_ENDIAN__");\
+      						\
+      if (TARGET_64BIT_DOUBLES)			\
+	builtin_define ("__RX_64BIT_DOUBLES__");\
+      else					\
+	builtin_define ("__RX_32BIT_DOUBLES__");\
+      						\
+      if (ALLOW_RX_FPU_INSNS)			\
+	builtin_define ("__RX_FPU_INSNS__");	\
+						\
+      if (TARGET_AS100_SYNTAX)			\
+	builtin_define ("__RX_AS100_SYNTAX__"); \
+      else					\
+	builtin_define ("__RX_GAS_SYNTAX__");   \
+						\
+      if (TARGET_GCC_ABI)			\
+	builtin_define ("__RX_GCC_ABI__");	\
+      else					\
+	builtin_define ("__RX_ABI__");		\
+    }                                           \
+  while (0)
+
+#undef  CC1_SPEC
+#define CC1_SPEC "\
+  %{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}} \
+  %{mcpu=rx100:%{fpu:%erx100 cpu does not have FPU hardware}} \
+  %{mcpu=rx200:%{fpu:%erx200 cpu does not have FPU hardware}}"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  CPP_SPEC
+#define CPP_SPEC "\
+%{mpid:-D_RX_PID=1} \
+%{mint-register=*:-D_RX_INT_REGISTERS=%*} \
+%{msmall-data-limit*:-D_RX_SMALL_DATA} \
+"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mbig-endian-data:-mbig-endian-data} \
+%{m64bit-doubles:-m64bit-doubles} \
+%{!m64bit-doubles:-m32bit-doubles} \
+%{msmall-data-limit*:-msmall-data-limit} \
+%{mrelax:-relax} \
+%{mpid} \
+%{mint-register=*} \
+%{mgcc-abi:-mgcc-abi} %{!mgcc-abi:-mrx-abi} \
+%{mcpu=*} \
+"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "					\
+--start-group						\
+-lc							\
+%{msim:-lsim}%{!msim:-lnosys}				\
+%{fprofile-arcs|fprofile-generate|coverage:-lgcov} 	\
+--end-group					   	\
+%{!T*: %{msim:%Trx-sim.ld}%{!msim:%Trx.ld}}		\
+"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mbig-endian-data:--oformat elf32-rx-be} %{mrelax:-relax}"
+
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		TARGET_BIG_ENDIAN_DATA
+#define WORDS_BIG_ENDIAN 		TARGET_BIG_ENDIAN_DATA
+
+#define UNITS_PER_WORD 			4
+
+#define INT_TYPE_SIZE			32
+#define LONG_TYPE_SIZE			32
+#define LONG_LONG_TYPE_SIZE		64
+
+#define FLOAT_TYPE_SIZE 		32
+#define DOUBLE_TYPE_SIZE 		(TARGET_64BIT_DOUBLES ? 64 : 32)
+#define LONG_DOUBLE_TYPE_SIZE		DOUBLE_TYPE_SIZE
+
+#ifdef __RX_32BIT_DOUBLES__
+#define LIBGCC2_HAS_DF_MODE		0
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   32
+#else
+#define LIBGCC2_HAS_DF_MODE		1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
+#endif
+
+#define DEFAULT_SIGNED_CHAR		0
+
+/* RX load/store instructions can handle unaligned addresses.  */
+#define STRICT_ALIGNMENT 		0
+#define FUNCTION_BOUNDARY 		8
+#define BIGGEST_ALIGNMENT 		32
+#define STACK_BOUNDARY 			32
+#define PARM_BOUNDARY 			8
+
+#define STACK_GROWS_DOWNWARD		1
+#define FRAME_GROWS_DOWNWARD		0
+#define FIRST_PARM_OFFSET(FNDECL) 	0
+
+#define MAX_REGS_PER_ADDRESS 		2
+
+#define Pmode 				SImode
+#define POINTER_SIZE			32
+#undef  SIZE_TYPE
+#define SIZE_TYPE			"long unsigned int"
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE			"long int"
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE			"long int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE			BITS_PER_WORD
+#define POINTERS_EXTEND_UNSIGNED	1
+#define FUNCTION_MODE 			QImode
+#define CASE_VECTOR_MODE		Pmode
+#define WORD_REGISTER_OPERATIONS	1
+#define HAS_LONG_COND_BRANCH		0
+#define HAS_LONG_UNCOND_BRANCH		0
+
+#define MOVE_MAX 			4
+#define STARTING_FRAME_OFFSET		0
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)   1
+
+#define HAVE_PRE_DECREMENT		1
+#define HAVE_POST_INCREMENT		1
+
+#define MOVE_RATIO(SPEED) 		((SPEED) ? 4 : 2)
+#define SLOW_BYTE_ACCESS		1
+
+#define STORE_FLAG_VALUE		1
+#define LOAD_EXTEND_OP(MODE)		SIGN_EXTEND
+#define SHORT_IMMEDIATES_SIGN_EXTEND	1
+
+enum reg_class
+{
+  NO_REGS,			/* No registers in set.  */
+  GR_REGS,			/* Integer registers.  */
+  ALL_REGS,			/* All registers.  */
+  LIM_REG_CLASSES		/* Max value + 1.  */
+};
+
+#define REG_CLASS_NAMES					\
+{							\
+  "NO_REGS",						\
+  "GR_REGS",						\
+  "ALL_REGS"						\
+}
+
+#define REG_CLASS_CONTENTS				\
+{							\
+  { 0x00000000 },	/* No registers,  */		\
+  { 0x0000ffff },	/* Integer registers.  */	\
+  { 0x0000ffff }	/* All registers.  */		\
+}
+
+#define N_REG_CLASSES			(int) LIM_REG_CLASSES
+#define CLASS_MAX_NREGS(CLASS, MODE)    ((GET_MODE_SIZE (MODE) \
+					  + UNITS_PER_WORD - 1) \
+					 / UNITS_PER_WORD)
+
+#define GENERAL_REGS			GR_REGS
+#define BASE_REG_CLASS  		GR_REGS
+#define INDEX_REG_CLASS			GR_REGS
+
+#define FIRST_PSEUDO_REGISTER 		17
+
+#define REGNO_REG_CLASS(REGNO)          ((REGNO) < FIRST_PSEUDO_REGISTER \
+					 ? GR_REGS : NO_REGS)
+
+#define STACK_POINTER_REGNUM 	        0
+#define FUNC_RETURN_REGNUM              1
+#define FRAME_POINTER_REGNUM 		6
+#define ARG_POINTER_REGNUM 		7
+#define STATIC_CHAIN_REGNUM 		8
+#define TRAMPOLINE_TEMP_REGNUM		9
+#define STRUCT_VAL_REGNUM		15
+#define CC_REGNUM                       16
+
+/* This is the register which will probably be used to hold the address of
+   the start of the small data area, if -msmall-data-limit is being used,
+   or the address of the constant data area if -mpid is being used.  If both
+   features are in use then two consecutive registers will be used.
+
+   Note - these registers must not be call_used because otherwise library
+   functions that are compiled without -msmall-data-limit/-mpid support
+   might clobber them.
+
+   Note that the actual values used depends on other options; use
+   rx_gp_base_regnum() and rx_pid_base_regnum() instead.  */
+#define GP_BASE_REGNUM			13
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM },	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)	\
+  (OFFSET) = rx_initial_elimination_offset ((FROM), (TO))
+
+
+#define FUNCTION_ARG_REGNO_P(N)	  	(((N) >= 1) && ((N) <= 4))
+#define FUNCTION_VALUE_REGNO_P(N) 	((N) == FUNC_RETURN_REGNUM)
+#define DEFAULT_PCC_STRUCT_RETURN	0
+
+#define FIXED_REGISTERS					\
+{							\
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1	\
+}
+
+#define CALL_USED_REGISTERS				\
+{							\
+  1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1	\
+}
+
+#define LIBCALL_VALUE(MODE)				\
+  gen_rtx_REG (((GET_MODE_CLASS (MODE) != MODE_INT	\
+                 || COMPLEX_MODE_P (MODE)		\
+		 || GET_MODE_SIZE (MODE) >= 4)		\
+		? (MODE)				\
+		: SImode),				\
+	       FUNC_RETURN_REGNUM)
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER						\
+{  7,  10,  11,  12,  13,  14,  4,  3,  2,  1, 9, 8, 6, 5, 15	\
+}
+
+#define REGNO_IN_RANGE(REGNO, MIN, MAX)		\
+  (IN_RANGE ((REGNO), (MIN), (MAX)) 		\
+   || (reg_renumber != NULL			\
+       && reg_renumber[(REGNO)] >= (MIN)	\
+       && reg_renumber[(REGNO)] <= (MAX)))
+
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(regno)      REGNO_IN_RANGE (regno, 0, 15)
+#else
+#define REGNO_OK_FOR_BASE_P(regno)	1
+#endif
+
+#define REGNO_OK_FOR_INDEX_P(regno)	REGNO_OK_FOR_BASE_P (regno)
+
+#define RTX_OK_FOR_BASE(X, STRICT)				\
+  ((STRICT) ?							\
+   (   (REG_P (X)						\
+        && REGNO_IN_RANGE (REGNO (X), 0, 15))			\
+    || (GET_CODE (X) == SUBREG					\
+        && REG_P (SUBREG_REG (X))				\
+        && REGNO_IN_RANGE (REGNO (SUBREG_REG (X)), 0, 15)))	\
+   :								\
+    ( (REG_P (X)						\
+       || (GET_CODE (X) == SUBREG				\
+	   && REG_P (SUBREG_REG (X))))))
+
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)				\
+  ((COUNT) == 0								\
+   ? gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, arg_pointer_rtx, GEN_INT (-4))) \
+   : NULL_RTX)
+
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_MEM (Pmode, stack_pointer_rtx)
+
+#define ACCUMULATE_OUTGOING_ARGS	1
+
+typedef unsigned int CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+
+#define TRAMPOLINE_SIZE 	(! TARGET_BIG_ENDIAN_DATA ? 14 : 20)
+#define TRAMPOLINE_ALIGNMENT 	32
+
+#define NO_PROFILE_COUNTERS     1
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+    fprintf (FILE, "\tbsr\t__mcount\n");
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   CLASS_MAX_NREGS (0, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+  REGNO_REG_CLASS (REGNO) == GR_REGS
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  (   (   GET_MODE_CLASS (MODE1) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)		\
+   == (   GET_MODE_CLASS (MODE2) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+
+#define REGISTER_NAMES						\
+  {								\
+    "r0",  "r1",  "r2",   "r3",   "r4",   "r5",   "r6",   "r7",	\
+      "r8",  "r9",  "r10",  "r11",  "r12",  "r13",  "r14",  "r15", "cc"	\
+  }
+
+#define ADDITIONAL_REGISTER_NAMES	\
+{					\
+    { "sp",    STACK_POINTER_REGNUM }	\
+  , { "fp",    FRAME_POINTER_REGNUM }	\
+  , { "arg",   ARG_POINTER_REGNUM }	\
+  , { "chain", STATIC_CHAIN_REGNUM }	\
+}
+
+#define DATA_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION D,DATA" 		\
+   : "\t.section D,\"aw\",@progbits\n\t.p2align 2")
+
+#define SDATA_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION D_2,DATA,ALIGN=2" 	\
+   : "\t.section D_2,\"aw\",@progbits\n\t.p2align 1")
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP  			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION C,ROMDATA,ALIGN=4" \
+   : "\t.section C,\"a\",@progbits\n\t.p2align 2")
+
+#define BSS_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION B,DATA,ALIGN=4" 	\
+   : "\t.section B,\"w\",@nobits\n\t.p2align 2")
+
+#define SBSS_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION B_2,DATA,ALIGN=2" 	\
+   : "\t.section B_2,\"w\",@nobits\n\t.p2align 1")
+
+/* The following definitions are conditional depending upon whether the
+   compiler is being built or crtstuff.c is being compiled by the built
+   compiler.  */
+#if defined CRT_BEGIN || defined CRT_END
+# ifdef __RX_AS100_SYNTAX
+#  define TEXT_SECTION_ASM_OP	      "\t.SECTION P,CODE"
+#  define CTORS_SECTION_ASM_OP	      "\t.SECTION init_array,CODE"
+#  define DTORS_SECTION_ASM_OP	      "\t.SECTION fini_array,CODE"
+#  define INIT_ARRAY_SECTION_ASM_OP   "\t.SECTION init_array,CODE"
+#  define FINI_ARRAY_SECTION_ASM_OP   "\t.SECTION fini_array,CODE"
+# else
+#  define TEXT_SECTION_ASM_OP	      "\t.section P,\"ax\""
+#  define CTORS_SECTION_ASM_OP	      \
+  "\t.section\t.init_array,\"awx\",@init_array"
+#  define DTORS_SECTION_ASM_OP	      \
+  "\t.section\t.fini_array,\"awx\",@fini_array"
+#  define INIT_ARRAY_SECTION_ASM_OP   \
+  "\t.section\t.init_array,\"awx\",@init_array"
+#  define FINI_ARRAY_SECTION_ASM_OP   \
+  "\t.section\t.fini_array,\"awx\",@fini_array"
+# endif
+#else
+# define TEXT_SECTION_ASM_OP	      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION P,CODE" : "\t.section P,\"ax\"")
+
+# define CTORS_SECTION_ASM_OP			      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION init_array,CODE" \
+   : "\t.section\t.init_array,\"awx\",@init_array")
+
+# define DTORS_SECTION_ASM_OP			      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION fini_array,CODE" \
+   : "\t.section\t.fini_array,\"awx\",@fini_array")
+
+# define INIT_ARRAY_SECTION_ASM_OP		      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION init_array,CODE" \
+   : "\t.section\t.init_array,\"awx\",@init_array")
+
+# define FINI_ARRAY_SECTION_ASM_OP		      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION fini_array,CODE" \
+   : "\t.section\t.fini_array,\"awx\",@fini_array")
+#endif
+
+#define GLOBAL_ASM_OP 		\
+  (TARGET_AS100_SYNTAX ? "\t.GLB\t" : "\t.global\t")
+#define ASM_COMMENT_START	" ;"
+#define ASM_APP_ON		""
+#define ASM_APP_OFF 		""
+#define LOCAL_LABEL_PREFIX	"L"
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	"_"
+
+/* Compute the alignment needed for label X in various situations.
+   If the user has specified an alignment then honour that, otherwise
+   use rx_align_for_label.  */
+#define JUMP_ALIGN(x)				(align_jumps ? align_jumps : rx_align_for_label (x, 0))
+#define LABEL_ALIGN(x)				(align_labels ? align_labels : rx_align_for_label (x, 3))
+#define LOOP_ALIGN(x)				(align_loops ? align_loops : rx_align_for_label (x, 2))
+#define LABEL_ALIGN_AFTER_BARRIER(x)		rx_align_for_label (x, 0)
+
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP)	\
+  do						\
+    {						\
+      if ((LOG) == 0 || (MAX_SKIP) == 0)	\
+        break;					\
+      if (TARGET_AS100_SYNTAX)			\
+	{					\
+	  if ((LOG) >= 2)			\
+	    fprintf (STREAM, "\t.ALIGN 4\t; %d alignment actually requested\n", 1 << (LOG)); \
+	  else					\
+	    fprintf (STREAM, "\t.ALIGN 2\n");	\
+	}					\
+      else					\
+	fprintf (STREAM, "\t.balign %d,3,%d\n", 1 << (LOG), (MAX_SKIP));	\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)		\
+  do						\
+    {						\
+      if ((LOG) == 0)				\
+        break;					\
+      if (TARGET_AS100_SYNTAX)			\
+	{					\
+	  if ((LOG) >= 2)			\
+	    fprintf (STREAM, "\t.ALIGN 4\t; %d alignment actually requested\n", 1 << (LOG)); \
+	  else					\
+	    fprintf (STREAM, "\t.ALIGN 2\n");	\
+	}					\
+      else					\
+	fprintf (STREAM, "\t.balign %d\n", 1 << (LOG));	\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, TARGET_AS100_SYNTAX ? "\t.LWORD L%d\n" : "\t.long .L%d\n", \
+	   VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Note: The local label referenced by the "1b" below is emitted by
+   the tablejump insn.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, TARGET_AS100_SYNTAX \
+	   ? "\t.LWORD L%d - ?-\n" : "\t.long .L%d - 1b\n", VALUE)
+
+#define CASE_VECTOR_PC_RELATIVE	(TARGET_PID)
+
+#define ASM_OUTPUT_SIZE_DIRECTIVE(STREAM, NAME, SIZE)			\
+  do									\
+    {									\
+      HOST_WIDE_INT size_ = (SIZE);					\
+									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+									\
+      fputs (SIZE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fprintf (STREAM, ", " HOST_WIDE_INT_PRINT_DEC "\n", size_);	\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_MEASURED_SIZE(STREAM, NAME)				\
+  do									\
+    {									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+      fputs (SIZE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fputs (", .-", STREAM);						\
+      assemble_name (STREAM, NAME);					\
+      putc ('\n', STREAM);						\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)			\
+  do									\
+    {									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+      fputs (TYPE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fputs (", ", STREAM);						\
+      fprintf (STREAM, TYPE_OPERAND_FMT, TYPE);				\
+      putc ('\n', STREAM);						\
+    }									\
+  while (0)
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
+  do								\
+    {								\
+      sprintf (LABEL, TARGET_AS100_SYNTAX ? "*%s%u" : "*.%s%u", \
+	       PREFIX, (unsigned) (NUM));			\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)			\
+  do								\
+    {								\
+      if (TARGET_AS100_SYNTAX)					\
+	targetm.asm_out.globalize_label (FILE, NAME);		\
+      default_elf_asm_output_external (FILE, DECL, NAME);	\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_AS100_SYNTAX)						\
+	{								\
+	  fprintf ((FILE), "\t.GLB\t");					\
+	  assemble_name ((FILE), (NAME));				\
+	  fprintf ((FILE), "\n");					\
+          assemble_name ((FILE), (NAME));				\
+	  switch ((ALIGN) / BITS_PER_UNIT)				\
+            {								\
+            case 4:							\
+              fprintf ((FILE), ":\t.BLKL\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE) / 4);					\
+	      break;							\
+            case 2:							\
+              fprintf ((FILE), ":\t.BLKW\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE) / 2);					\
+	      break;							\
+            default:							\
+              fprintf ((FILE), ":\t.BLKB\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE));						\
+	      break;							\
+            }								\
+        }								\
+      else								\
+        {								\
+          fprintf ((FILE), "%s", COMMON_ASM_OP);			\
+          assemble_name ((FILE), (NAME));				\
+          fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",	\
+	           (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+	}								\
+    }									\
+  while (0)
+
+#undef  SKIP_ASM_OP
+#define SKIP_ASM_OP   (TARGET_AS100_SYNTAX ? "\t.BLKB\t" : "\t.zero\t")
+
+#undef  ASM_OUTPUT_LIMITED_STRING
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)		\
+  do							\
+    {							\
+      const unsigned char *_limited_str =		\
+	(const unsigned char *) (STR);			\
+      unsigned ch;					\
+							\
+      fprintf ((FILE), TARGET_AS100_SYNTAX 		\
+	       ? "\t.BYTE\t\"" : "\t.string\t\"");	\
+							\
+      for (; (ch = *_limited_str); _limited_str++)	\
+        {						\
+	  int escape;					\
+							\
+	  switch (escape = ESCAPES[ch])			\
+	    {						\
+	    case 0:					\
+	      putc (ch, (FILE));			\
+	      break;					\
+	    case 1:					\
+	      fprintf ((FILE), "\\%03o", ch);		\
+	      break;					\
+	    default:					\
+	      putc ('\\', (FILE));			\
+	      putc (escape, (FILE));			\
+	      break;					\
+	    }						\
+        }						\
+							\
+      fprintf ((FILE), TARGET_AS100_SYNTAX ? "\"\n\t.BYTE\t0\n" : "\"\n");\
+    }							\
+  while (0)
+
+/* For PIC put jump tables into the text section so that the offsets that
+   they contain are always computed between two same-section symbols.  */
+#define JUMP_TABLES_IN_TEXT_SECTION	(TARGET_PID || flag_pic)
+
+/* This is a version of REG_P that also returns TRUE for SUBREGs.  */
+#define RX_REG_P(rtl) (REG_P (rtl) || GET_CODE (rtl) == SUBREG)
+
+/* Like REG_P except that this macro is true for SET expressions.  */
+#define SET_P(rtl)    (GET_CODE (rtl) == SET)
+
+/* The AS100 assembler does not support .leb128 and .uleb128, but
+   the compiler-build-time configure tests will have enabled their
+   use because GAS supports them.  So default to generating STABS
+   debug information instead of DWARF2 when generating AS100
+   compatible output.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \
+				  ? DBX_DEBUG : DWARF2_DEBUG)
+
+#define INCOMING_FRAME_SP_OFFSET		4
+#define ARG_POINTER_CFA_OFFSET(FNDECL)		4
+#define FRAME_POINTER_CFA_OFFSET(FNDECL)	4
+
+#define TARGET_USE_FPU		(! TARGET_NO_USE_FPU)
+
+/* This macro is used to decide when RX FPU instructions can be used.  */
+#define ALLOW_RX_FPU_INSNS	(TARGET_USE_FPU)
+
+#define BRANCH_COST(SPEED,PREDICT)       1
+#define REGISTER_MOVE_COST(MODE,FROM,TO) 2
+
+#define SELECT_CC_MODE(OP,X,Y)  rx_select_cc_mode(OP, X, Y)
+
+#define ADJUST_INSN_LENGTH(INSN,LENGTH)				\
+  do								\
+    {								\
+      (LENGTH) = rx_adjust_insn_length ((INSN), (LENGTH));	\
+    }								\
+  while (0)
diff --git a/gcc-4.9/gcc/config/rx/rx.md b/gcc-4.9/gcc/config/rx/rx.md
new file mode 100644
index 000000000..ecdfc15b5
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx.md
@@ -0,0 +1,2641 @@
+;;  Machine Description for Renesas RX processors
+;;  Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This code iterator is used for sign- and zero- extensions.
+(define_mode_iterator small_int_modes [(HI "") (QI "")])
+
+;; This code iterator is used for max and min operations.
+(define_mode_iterator int_modes [(SI "") (HI "") (QI "")])
+
+;; We do not handle DFmode here because it is either
+;; the same as SFmode, or if -m64bit-doubles is active
+;; then all operations on doubles have to be handled by
+;; library functions.
+(define_mode_iterator register_modes
+  [(SF "") (SI "") (HI "") (QI "")])
+
+(define_constants
+  [
+   (SP_REG 0)
+   (CC_REG 		   16)
+
+   (UNSPEC_LOW_REG         0)
+   (UNSPEC_HIGH_REG        1)
+
+   (UNSPEC_RTE             10)
+   (UNSPEC_RTFI            11)
+   (UNSPEC_NAKED           12)
+   (UNSPEC_CONST           13)
+   
+   (UNSPEC_MOVSTR          20)
+   (UNSPEC_MOVMEM          21)
+   (UNSPEC_SETMEM          22)
+   (UNSPEC_STRLEN          23)
+   (UNSPEC_CMPSTRN         24)
+
+   (UNSPEC_BUILTIN_BRK     30)
+   (UNSPEC_BUILTIN_CLRPSW  31)
+   (UNSPEC_BUILTIN_INT     32)
+   (UNSPEC_BUILTIN_MACHI   33)
+   (UNSPEC_BUILTIN_MACLO   34)
+   (UNSPEC_BUILTIN_MULHI   35)
+   (UNSPEC_BUILTIN_MULLO   36)
+   (UNSPEC_BUILTIN_MVFACHI 37)
+   (UNSPEC_BUILTIN_MVFACMI 38)
+   (UNSPEC_BUILTIN_MVFC    39)
+   (UNSPEC_BUILTIN_MVFCP   40)
+   (UNSPEC_BUILTIN_MVTACHI 41)
+   (UNSPEC_BUILTIN_MVTACLO 42)
+   (UNSPEC_BUILTIN_MVTC    43)
+   (UNSPEC_BUILTIN_MVTIPL  44)
+   (UNSPEC_BUILTIN_RACW	   45)
+   (UNSPEC_BUILTIN_REVW    46)
+   (UNSPEC_BUILTIN_RMPA	   47)
+   (UNSPEC_BUILTIN_ROUND   48)
+   (UNSPEC_BUILTIN_SAT     49)
+   (UNSPEC_BUILTIN_SETPSW  50)
+   (UNSPEC_BUILTIN_WAIT	   51)
+
+   (UNSPEC_PID_ADDR	   52)
+  ]
+)
+
+(define_attr "length" "" (const_int 8))
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Pipeline description.
+
+;; The RX only has a single pipeline.  It has five stages (fetch,
+;; decode, execute, memory access, writeback) each of which normally
+;; takes a single CPU clock cycle.
+
+;; The timings attribute consists of two numbers, the first is the
+;; throughput, which is the number of cycles the instruction takes
+;; to execute and generate a result.  The second is the latency
+;; which is the effective number of cycles the instruction takes to
+;; execute if its result is used by the following instruction.  The
+;; latency is always greater than or equal to the throughput.
+;; These values were taken from tables 2.13 and 2.14 in section 2.8
+;; of the RX610 Group Hardware Manual v0.11
+
+;; Note - it would be nice to use strings rather than integers for
+;; the possible values of this attribute, so that we can have the
+;; gcc build mechanism check for values that are not supported by
+;; the reservations below.  But this will not work because the code
+;; in rx_adjust_sched_cost() needs integers not strings.
+
+(define_attr "timings" "" (const_int 11))
+
+(define_automaton "pipelining")
+(define_cpu_unit "throughput" "pipelining")
+
+(define_insn_reservation "throughput__1_latency__1"  1
+  (eq_attr "timings" "11") "throughput")
+(define_insn_reservation "throughput__1_latency__2"  2
+  (eq_attr "timings" "12") "throughput,nothing")
+(define_insn_reservation "throughput__2_latency__2"  1
+  (eq_attr "timings" "22") "throughput*2")
+(define_insn_reservation "throughput__3_latency__3"  1
+  (eq_attr "timings" "33") "throughput*3")
+(define_insn_reservation "throughput__3_latency__4"  2
+  (eq_attr "timings" "34") "throughput*3,nothing")
+(define_insn_reservation "throughput__4_latency__4"  1
+  (eq_attr "timings" "44") "throughput*4")
+(define_insn_reservation "throughput__4_latency__5"  2
+  (eq_attr "timings" "45") "throughput*4,nothing")
+(define_insn_reservation "throughput__5_latency__5"  1
+  (eq_attr "timings" "55") "throughput*5")
+(define_insn_reservation "throughput__5_latency__6"  2
+  (eq_attr "timings" "56") "throughput*5,nothing")
+(define_insn_reservation "throughput__6_latency__6"  1
+  (eq_attr "timings" "66") "throughput*6")
+(define_insn_reservation "throughput_10_latency_10"  1
+  (eq_attr "timings" "1010") "throughput*10")
+(define_insn_reservation "throughput_11_latency_11"  1
+  (eq_attr "timings" "1111") "throughput*11")
+(define_insn_reservation "throughput_16_latency_16"  1
+  (eq_attr "timings" "1616") "throughput*16")
+(define_insn_reservation "throughput_18_latency_18"  1
+  (eq_attr "timings" "1818") "throughput*18")
+
+;; ----------------------------------------------------------------------------
+
+;; Comparisons
+
+;; Note - we do not specify the two instructions necessary to perform
+;; a compare-and-branch in the cbranchsi4 pattern because that would
+;; allow the comparison to be moved away from the jump before the reload
+;; pass has completed.  That would be problematical because reload can
+;; generate ADDSI3 instructions which would corrupt the PSW flags.
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "comparison_operator"
+	    [(match_operand:SI 1 "register_operand")
+	     (match_operand:SI 2 "rx_source_operand")])
+	  (label_ref (match_operand 3 ""))
+	  (pc)))]
+  ""
+)
+
+(define_insn_and_split "*cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:SI  0 "register_operand"  "r")
+	     (match_operand:SI  1 "rx_source_operand" "riQ")])
+	  (match_operand        2 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rx_split_cbranch (CCmode, GET_CODE (operands[3]),
+		    operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*cmpsi"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 0 "register_operand"  "r,r,r,r,r,r,r")
+		    (match_operand:SI 1 "rx_source_operand" "r,Uint04,Int08,Sint16,Sint24,i,Q")))]
+  "reload_completed"
+  "cmp\t%Q1, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,5")]
+)
+
+;; Canonical method for representing TST.
+(define_insn_and_split "*cbranchsi4_tst"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "rx_zs_comparison_operator"
+	    [(and:SI (match_operand:SI  0 "register_operand"  "r")
+		     (match_operand:SI  1 "rx_source_operand" "riQ"))
+	     (const_int 0)])
+	  (match_operand 2 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rx_split_cbranch (CC_ZSmode, GET_CODE (operands[3]),
+		    XEXP (operands[3], 0), XEXP (operands[3], 1),
+		    operands[2]);
+  DONE;
+})
+
+;; Various other ways that GCC codes "var & const"
+(define_insn_and_split "*cbranchsi4_tst_ext"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 4 "rx_z_comparison_operator"
+	    [(zero_extract:SI
+		(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "rx_constshift_operand" "")
+		(match_operand:SI 2 "rx_constshift_operand" ""))
+	     (const_int 0)])
+	  (match_operand 3 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT mask;
+  rtx x;
+
+  mask = 1;
+  mask <<= INTVAL (operands[1]);
+  mask -= 1;
+  mask <<= INTVAL (operands[2]);
+  x = gen_rtx_AND (SImode, operands[0], gen_int_mode (mask, SImode));
+
+  rx_split_cbranch (CC_ZSmode, GET_CODE (operands[4]),
+		    x, const0_rtx, operands[3]);
+  DONE;
+})
+
+(define_insn "*tstsi"
+  [(set (reg:CC_ZS CC_REG)
+	(compare:CC_ZS
+	  (and:SI (match_operand:SI 0 "register_operand"  "r,r,r")
+		  (match_operand:SI 1 "rx_source_operand" "r,i,Q"))
+	  (const_int 0)))]
+  "reload_completed"
+  "tst\t%Q1, %0"
+  [(set_attr "timings" "11,11,33")
+   (set_attr "length"  "3,7,6")]
+)
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "rx_fp_comparison_operator"
+	    [(match_operand:SF 1 "register_operand")
+	     (match_operand:SF 2 "rx_source_operand")])
+	  (label_ref (match_operand 3 ""))
+	  (pc)))]
+  "ALLOW_RX_FPU_INSNS"
+)
+
+(define_insn_and_split "*cbranchsf4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "rx_fp_comparison_operator"
+	    [(match_operand:SF  0 "register_operand"  "r")
+	     (match_operand:SF  1 "rx_source_operand" "rFQ")])
+	  (match_operand        2 "label_ref_operand" "")
+	  (pc)))]
+  "ALLOW_RX_FPU_INSNS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rx_split_cbranch (CC_Fmode, GET_CODE (operands[3]),
+		    operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*cmpsf"
+  [(set (reg:CC_F CC_REG)
+	(compare:CC_F
+	  (match_operand:SF 0 "register_operand"  "r,r,r")
+	  (match_operand:SF 1 "rx_source_operand" "r,F,Q")))]
+  "ALLOW_RX_FPU_INSNS && reload_completed"
+  "fcmp\t%1, %0"
+  [(set_attr "timings" "11,11,33")
+   (set_attr "length" "3,7,5")]
+)
+
+;; Flow Control Instructions:
+
+(define_insn "*conditional_branch"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "comparison_operator"
+	    [(reg CC_REG) (const_int 0)])
+	  (label_ref (match_operand 0 "" ""))
+	  (pc)))]
+  "reload_completed"
+  "b%B1\t%0"
+  [(set_attr "length" "8")    ;; This length is wrong, but it is
+                              ;; too hard to compute statically.
+   (set_attr "timings" "33")] ;; The timing assumes that the branch is taken.
+)
+
+;; ----------------------------------------------------------------------------
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "bra\t%0"
+  [(set_attr "length" "4")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "tablejump"
+  [(set (pc)
+	(match_operand:SI          0 "register_operand" "r"))
+   (use (label_ref (match_operand  1 "" "")))]
+  ""
+  { return TARGET_PID ? (TARGET_AS100_SYNTAX ? "\n?:\tbra\t%0"
+					     : "\n1:\tbra\t%0")
+	                                     : "\n1:jmp\t%0";
+  }
+  [(set_attr "timings" "33")
+   (set_attr "length" "2")]
+)
+
+(define_expand "return"
+  [(return)]
+  "rx_can_use_simple_return ()"
+  "rx_expand_epilogue (false); DONE;"
+)
+
+(define_insn "simple_return"
+  [(simple_return)]
+  ""
+  "rts"
+  [(set_attr "length" "1")
+   (set_attr "timings" "55")]
+)
+
+;; Unspec used so that the constant will not be invalid
+;; if -mmax-constant-size has been specified.
+(define_insn "deallocate_and_return"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const:SI (unspec:SI [(match_operand 0 "const_int_operand" "n")] UNSPEC_CONST))))
+   (return)]
+  ""
+  "rtsd\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "55")]
+)
+
+(define_insn "pop_and_return"
+  [(match_parallel 1 "rx_rtsd_vector"
+     [(set (reg:SI SP_REG)
+	   (plus:SI (reg:SI SP_REG)
+		    (match_operand:SI 0 "const_int_operand" "n")))])
+   (return)]
+  "reload_completed"
+  {
+    rx_emit_stack_popm (operands, false);
+    return "";
+  }
+  [(set_attr "length" "3")
+   (set_attr "timings" "56")]
+)
+
+(define_insn "fast_interrupt_return"
+  [(unspec_volatile [(return)] UNSPEC_RTFI) ]
+  ""
+  "rtfi"
+  [(set_attr "length" "2")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "exception_return"
+  [(unspec_volatile [(return)] UNSPEC_RTE) ]
+  ""
+  "rte"
+  [(set_attr "length" "2")
+   (set_attr "timings" "66")]
+)
+
+(define_insn "naked_return"
+  [(unspec_volatile [(return)] UNSPEC_NAKED) ]
+  ""
+  "; Naked function: epilogue provided by programmer."
+)
+
+
+;; Note - the following set of patterns do not use the "memory_operand"
+;; predicate or an "m" constraint because we do not allow symbol_refs
+;; or label_refs as legitimate memory addresses.  This matches the
+;; behaviour of most of the RX instructions.  Only the call/branch
+;; instructions are allowed to refer to symbols/labels directly.
+;; The call operands are in QImode because that is the value of
+;; FUNCTION_MODE
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand")
+	 (match_operand:SI 1 "general_operand"))]
+  ""
+  {
+    rtx dest = XEXP (operands[0], 0);
+
+    if (! rx_call_operand (dest, Pmode))
+      dest = force_reg (Pmode, dest);
+    emit_call_insn (gen_call_internal (dest));
+    DONE;
+  }
+)
+
+(define_insn "call_internal"
+  [(call (mem:QI (match_operand:SI 0 "rx_call_operand" "r,Symbol"))
+	 (const_int 0))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  jsr\t%0
+  bsr\t%A0"
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "33")]
+)
+
+(define_expand "call_value"
+  [(set (match_operand          0 "register_operand")
+	(call (match_operand:QI 1 "general_operand")
+	      (match_operand:SI 2 "general_operand")))]
+  ""
+  {
+    rtx dest = XEXP (operands[1], 0);
+
+    if (! rx_call_operand (dest, Pmode))
+      dest = force_reg (Pmode, dest);
+    emit_call_insn (gen_call_value_internal (operands[0], dest));
+    DONE;
+  }
+)
+
+(define_insn "call_value_internal"
+  [(set (match_operand                  0 "register_operand" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "rx_call_operand"   "r,Symbol"))
+	      (const_int 0)))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  jsr\t%1
+  bsr\t%A1"
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "33")]
+)
+
+;; Note - we do not allow indirect sibcalls (with the address
+;; held in a register) because we cannot guarantee that the register
+;; chosen will be a call-used one.  If it is a call-saved register,
+;; then the epilogue code will corrupt it by popping the saved value
+;; off of the stack.
+(define_expand "sibcall"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "rx_symbolic_call_operand"))
+	   (match_operand:SI         1 "general_operand"))
+     (return)])]
+  ""
+  {
+    if (MEM_P (operands[0]))
+      operands[0] = XEXP (operands[0], 0);
+    emit_call_insn (gen_sibcall_internal (operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "sibcall_internal"
+  [(call (mem:QI (match_operand:SI 0 "rx_symbolic_call_operand" "Symbol"))
+	 (const_int 0))
+   (return)]
+  ""
+  "bra\t%A0"
+  [(set_attr "length"  "4")
+   (set_attr "timings" "33")]
+)
+
+(define_expand "sibcall_value"
+ [(parallel
+   [(set (match_operand                  0 "register_operand")
+	 (call (mem:QI (match_operand:SI 1 "rx_symbolic_call_operand"))
+	       (match_operand:SI         2 "general_operand")))
+    (return)])]
+  ""
+  {
+    if (MEM_P (operands[1]))
+      operands[1] = XEXP (operands[1], 0);
+    emit_call_insn (gen_sibcall_value_internal (operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "sibcall_value_internal"
+ [(set (match_operand                  0 "register_operand"         "=r")
+       (call (mem:QI (match_operand:SI 1 "rx_symbolic_call_operand" "Symbol"))
+	     (const_int 0)))
+  (return)]
+  ""
+  "bra\t%A1"
+  [(set_attr "length"  "4")
+   (set_attr "timings" "33")]
+)
+
+;; Function Prologue/Epilogue Instructions
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "rx_expand_prologue (); DONE;"
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "rx_expand_epilogue (false); DONE;"
+)
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "rx_expand_epilogue (true); DONE;"
+)
+
+;; Move Instructions
+
+;; Note - we do not allow memory to memory moves, even though the ISA
+;; supports them.  The reason is that the conditions on such moves are
+;; too restrictive, specifically the source addressing mode is limited
+;; by the destination addressing mode and vice versa.  (For example it
+;; is not possible to use indexed register indirect addressing for one
+;; of the operands if the other operand is anything other than a register,
+;; but it is possible to use register relative addressing when the other
+;; operand also uses register relative or register indirect addressing).
+;;
+;; GCC does not support computing legitimate addresses based on the
+;; nature of other operands involved in the instruction, and reload is
+;; not smart enough to cope with a whole variety of different memory
+;; addressing constraints, so it is simpler and safer to just refuse
+;; to support memory to memory moves.
+
+(define_expand "mov<register_modes:mode>"
+  [(set (match_operand:register_modes 0 "general_operand")
+	(match_operand:register_modes 1 "general_operand"))]
+  ""
+  {
+    if (MEM_P (operands[0]) && MEM_P (operands[1]))
+      operands[1] = copy_to_mode_reg (<register_modes:MODE>mode, operands[1]);
+    operands[0] = rx_maybe_pidify_operand (operands[0], 0);
+    operands[1] = rx_maybe_pidify_operand (operands[1], 0);
+    if (GET_CODE (operands[0]) != REG
+	&& GET_CODE (operands[1]) == PLUS)
+      operands[1] = copy_to_mode_reg (<register_modes:MODE>mode, operands[1]);
+    if (GET_CODE (operands[1]) == PLUS && GET_MODE (operands[1]) == SImode)
+      {
+        emit_insn (gen_addsi3 (operands[0], XEXP (operands[1], 0), XEXP (operands[1], 1)));
+        DONE;
+      }
+    if (CONST_INT_P (operand1)
+        && ! rx_is_legitimate_constant (<register_modes:MODE>mode, operand1))
+      FAIL;
+  }
+)
+
+(define_insn "*mov<register_modes:mode>_internal"
+  [(set (match_operand:register_modes
+	 0 "nonimmediate_operand" "=r,r,r,r,r,r,m,Q,Q,Q,Q,r")
+	(match_operand:register_modes
+	 1 "general_operand" "Int08,Sint16,Sint24,i,r,m,r,Int08,Sint16,Sint24,i,RpdaRpid"))]
+  ""
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "3,4,5,6,2,4,6,5,6,7,8,8")
+   (set_attr "timings" "11,11,11,11,11,12,11,11,11,11,11,11")]
+)
+
+(define_insn "extend<small_int_modes:mode>si2"
+  [(set (match_operand:SI 0 "register_operand"    "=r,r")
+        (sign_extend:SI (match_operand:small_int_modes
+			  1 "nonimmediate_operand" "r,m")))]
+  ""
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "2,6")
+   (set_attr "timings" "11,12")]
+)
+
+(define_insn "zero_extend<small_int_modes:mode>si2"
+  [(set (match_operand:SI 0 "register_operand"     "=r,r")
+        (zero_extend:SI (match_operand:small_int_modes
+			  1 "nonimmediate_operand"  "r,m")))]
+  ""
+  { return rx_gen_move_template (operands, true); }
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "11,12")]
+)
+
+(define_insn "stack_push"
+  [(set (reg:SI SP_REG)
+	(minus:SI (reg:SI SP_REG)
+		  (const_int 4)))
+   (set (mem:SI (reg:SI SP_REG))
+	(match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "push.l\t%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "stack_pushm"
+  [(match_parallel 1 "rx_store_multiple_vector"
+     [(set (reg:SI SP_REG)
+	   (minus:SI (reg:SI SP_REG)
+		     (match_operand:SI 0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_pushm (operands);
+    return "";
+  }
+  [(set_attr "length" "2")
+   (set_attr "timings" "44")] ;; The timing is a guesstimate average timing.
+)
+
+(define_insn "stack_pop"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (reg:SI SP_REG)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int 4)))]
+  ""
+  "pop\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "12")]
+)
+
+(define_insn "stack_popm"
+  [(match_parallel 1 "rx_load_multiple_vector"
+     [(set (reg:SI SP_REG)
+	   (plus:SI (reg:SI SP_REG)
+		    (match_operand:SI 0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_popm (operands, true);
+    return "";
+  }
+  [(set_attr "length" "2")
+   (set_attr "timings" "45")] ;; The timing is a guesstimate average timing.
+)
+
+(define_insn_and_split "cstoresi4"
+  [(set (match_operand:SI   0 "register_operand" "=r")
+	(match_operator:SI  1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand"  "r")
+	   (match_operand:SI 3 "rx_source_operand" "riQ")]))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (CCmode, CC_REG);
+  x = gen_rtx_COMPARE (CCmode, operands[2], operands[3]);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, flags, const0_rtx);
+  x = gen_rtx_SET (VOIDmode, operands[0], x);
+  emit_insn (x);
+  DONE;
+})
+
+(define_insn "*sccc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	  [(reg CC_REG) (const_int 0)]))]
+  "reload_completed"
+  "sc%B1.L\t%0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn_and_split "cstoresf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "rx_fp_comparison_operator"
+	 [(match_operand:SF 2 "register_operand" "r")
+	  (match_operand:SF 3 "rx_source_operand" "rFQ")]))]
+  "ALLOW_RX_FPU_INSNS"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (CC_Fmode, CC_REG);
+  x = gen_rtx_COMPARE (CC_Fmode, operands[2], operands[3]);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, flags, const0_rtx);
+  x = gen_rtx_SET (VOIDmode, operands[0], x);
+  emit_insn (x);
+  DONE;
+})
+
+(define_expand "movsicc"
+  [(parallel
+    [(set (match_operand:SI                  0 "register_operand")
+	  (if_then_else:SI (match_operand:SI 1 "comparison_operator")
+			   (match_operand:SI 2 "nonmemory_operand")
+			   (match_operand:SI 3 "nonmemory_operand")))
+     (clobber (reg:CC CC_REG))])]
+  ""
+{
+  /* One operand must be a constant or a register, the other must be a register.  */
+  if (   ! CONSTANT_P (operands[2])
+      && ! CONSTANT_P (operands[3])
+      && ! (REG_P (operands[2]) && REG_P (operands[3])))
+    FAIL;
+})
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI     0 "register_operand" "=r,r,r")
+	(if_then_else:SI
+	  (match_operator     5 "comparison_operator"
+	   [(match_operand:SI 3 "register_operand"  "r,r,r")
+	    (match_operand:SI 4 "rx_source_operand" "riQ,riQ,riQ")])
+	  (match_operand:SI   1 "nonmemory_operand" "i,ri,r")
+	  (match_operand:SI   2 "nonmemory_operand" "ri,i,r")))
+   (clobber (reg:CC CC_REG))]
+  "(CONSTANT_P (operands[1]) || CONSTANT_P (operands[2]))
+    || (REG_P (operands[1]) && REG_P (operands[2]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx x, flags, op0, op1, op2;
+  enum rtx_code cmp_code;
+
+  flags = gen_rtx_REG (CCmode, CC_REG);
+  x = gen_rtx_COMPARE (CCmode, operands[3], operands[4]);
+  emit_insn (gen_rtx_SET (VOIDmode, flags, x));
+
+  cmp_code = GET_CODE (operands[5]);
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = operands[2];
+
+  /* If OP2 is the constant, reverse the sense of the move.
+     Likewise if both operands are registers but OP1 == OP0.  */
+  if ((! CONSTANT_P (operands[1]) && CONSTANT_P (operands[2]))
+      || (REG_P (operands[1]) && REG_P (operands[2])
+          && rtx_equal_p (op0, op1)))
+    {
+      x = op1, op1 = op2, op2 = x;
+      cmp_code = reverse_condition (cmp_code);
+    }
+
+  /* If OP2 does not match the output, copy it into place.  We have allowed
+     these alternatives so that the destination can legitimately be one of
+     the comparison operands without increasing register pressure.  */
+  if (! rtx_equal_p (op0, op2))
+    emit_move_insn (op0, op2);
+
+  x = gen_rtx_fmt_ee (cmp_code, VOIDmode, flags, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (SImode, x, op1, op0);
+  emit_insn (gen_rtx_SET (VOIDmode, op0, x));
+  DONE;
+})
+
+(define_insn "*stcc"
+  [(set (match_operand:SI 0 "register_operand" "+r,r,r,r")
+	(if_then_else:SI
+	  (match_operator 2 "rx_z_comparison_operator"
+	    [(reg CC_REG) (const_int 0)])
+	  (match_operand:SI 1 "immediate_operand" "Sint08,Sint16,Sint24,i")
+	  (match_dup 0)))]
+  "reload_completed
+   && ((GET_CODE (operands[2]) == EQ) || (GET_CODE (operands[2]) == NE))"
+  {
+    if (GET_CODE (operands[2]) == EQ)
+      return "stz\t%1, %0";
+    else
+     return "stnz\t%1, %0";
+  }
+  [(set_attr "length" "4,5,6,7")]
+)
+
+(define_insn "*stcc_reg"
+  [(set (match_operand:SI 0 "register_operand" "+r,r,r,r,r,r")
+	(if_then_else:SI
+	  (match_operator 2 "comparison_operator"
+	    [(reg CC_REG) (const_int 0)])
+	  (match_operand:SI 1 "nonmemory_operand"
+		              "r,Uint04,Sint08,Sint16,Sint24,i")
+	  (match_dup 0)))]
+  "reload_completed"
+  {
+    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
+    return "b%B2 1f\n\tmov %1, %0\n1:";
+  }
+  [(set_attr "length" "3,3,4,5,6,7")]
+)
+
+;; Arithmetic Instructions
+
+(define_insn "abssi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (abs:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  abs\t%0
+  abs\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "*abssi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (abs:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (set (reg CC_REG)
+	(compare (abs:SI (match_dup 1))
+		 (const_int 0)))]
+  ;; Note - although the ABS instruction does set the O bit in the processor
+  ;; status word, it does not do so in a way that is comparable with the CMP
+  ;; instruction.  Hence we use CC_ZSmode rather than CC_ZSOmode.
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  abs\t%0
+  abs\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_expand "addsi3"
+  [(parallel [(set (match_operand:SI          0 "register_operand"  "")
+	(plus:SI (match_operand:SI 1 "register_operand"  "")
+		 (match_operand:SI 2 "rx_source_operand" "")))
+    (clobber (reg:CC CC_REG))])]
+  ""
+  "
+      operands[0] = rx_maybe_pidify_operand (operands[0], 1);
+      operands[1] = rx_maybe_pidify_operand (operands[1], 1);
+      operands[2] = rx_maybe_pidify_operand (operands[2], 1);
+  "
+)
+
+(define_insn "addsi3_internal"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,0,r,r,r,r,r,r,0")
+		 (match_operand:SI 2 "rx_source_operand" "r,Uint04,NEGint4,Sint08,Sint16,Sint24,i,0,r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  add\t%2, %0
+  add\t%2, %0
+  sub\t%N2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,11,11,11,11,11,33")
+   (set_attr "length"   "2,2,2,3,4,5,6,2,3,3,4,5,6,5")]
+)
+
+(define_insn "*addsi3_flags"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,0,r,r,r,r,r,r,0")
+		 (match_operand:SI 2 "rx_source_operand" "r,Uint04,NEGint4,Sint08,Sint16,Sint24,i,0,r,Sint08,Sint16,Sint24,i,Q")))
+   (set (reg CC_REG)
+	(compare (plus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
+  "@
+  add\t%2, %0
+  add\t%2, %0
+  sub\t%N2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,11,11,11,11,11,33")
+   (set_attr "length"   "2,2,2,3,4,5,6,2,3,3,4,5,6,5")]
+)
+
+;; A helper to expand the above with the CC_MODE filled in.
+(define_expand "addsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (plus:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "rx_source_operand")))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (plus:SI (match_dup 1) (match_dup 2))
+				   (const_int 0)))])]
+)
+
+(define_insn "adc_internal"
+  [(set (match_operand:SI     0 "register_operand"  "=r,r,r,r,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0"))
+	  (match_operand:SI   2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
+    (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "adc\t%2, %0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length"   "3,4,5,6,7,6")]
+)
+
+(define_insn "*adc_flags"
+  [(set (match_operand:SI     0 "register_operand"  "=r,r,r,r,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0"))
+	  (match_operand:SI   2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
+   (set (reg CC_REG)
+	(compare 
+	  (plus:SI
+	    (plus:SI
+	      (ltu:SI (reg:CC CC_REG) (const_int 0))
+	      (match_dup 1))
+	    (match_dup 2))
+	  (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
+  "adc\t%2, %0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length"   "3,4,5,6,7,6")]
+)
+
+;; Peepholes to match:
+;;   (set (reg A) (reg B))
+;;   (set (CC) (compare:CC (reg A/reg B) (const_int 0)))
+;; and replace them with the addsi3_flags pattern, using an add
+;; of zero to copy the register and set the condition code bits.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "register_operand"))
+   (set (reg:CC CC_REG)
+        (compare:CC (match_dup 0)
+                    (const_int 0)))]
+  ""
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 1) (const_int 0)))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (plus:SI (match_dup 1) (const_int 0))
+				   (const_int 0)))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "register_operand"))
+   (set (reg:CC CC_REG)
+        (compare:CC (match_dup 1)
+                    (const_int 0)))]
+  ""
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 1) (const_int 0)))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (plus:SI (match_dup 1) (const_int 0))
+				   (const_int 0)))])]
+)
+
+(define_expand "adddi3"
+  [(set (match_operand:DI          0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "rx_source_operand")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  emit_insn (gen_adddi3_internal (op0l, op0h, op1l, op2l, op1h, op2h));
+  DONE;
+})
+
+(define_insn_and_split "adddi3_internal"
+  [(set (match_operand:SI          0 "register_operand"  "=&r")
+	(plus:SI (match_operand:SI 2 "register_operand"  "r")
+		 (match_operand:SI 3 "rx_source_operand" "riQ")))
+   (set (match_operand:SI          1 "register_operand"  "=r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (plus:SI (match_dup 2) (match_dup 3)) (match_dup 2))
+	    (match_operand:SI      4 "register_operand"  "%1"))
+	  (match_operand:SI        5 "rx_source_operand" "riQ")))
+   (clobber (match_scratch:SI      6                     "=&r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op0l = operands[0];
+  rtx op0h = operands[1];
+  rtx op1l = operands[2];
+  rtx op2l = operands[3];
+  rtx op1h = operands[4];
+  rtx op2h = operands[5];
+  rtx scratch = operands[6];
+  rtx x;
+
+  if (reg_overlap_mentioned_p (op0l, op1h))
+    {
+      emit_move_insn (scratch, op0l);
+      op1h = scratch;
+      if (reg_overlap_mentioned_p (op0l, op2h))
+	op2h = scratch;
+    }
+  else if (reg_overlap_mentioned_p (op0l, op2h))
+    {
+      emit_move_insn (scratch, op0l);
+      op2h = scratch;
+    }
+
+  if (rtx_equal_p (op0l, op1l))
+    ;
+  /* It is preferable that op0l == op1l...  */
+  else if (rtx_equal_p (op0l, op2l))
+    x = op1l, op1l = op2l, op2l = x;
+  /* ... but it is only a requirement if op2l == MEM.  */
+  else if (MEM_P (op2l))
+    {
+      /* Let's hope that we still have a scratch register free.  */
+      gcc_assert (op1h != scratch);
+      emit_move_insn (scratch, op2l);
+      op2l = scratch;
+    }
+
+  emit_insn (gen_addsi3_flags (op0l, op1l, op2l));
+
+  if (rtx_equal_p (op0h, op1h))
+    ;
+  else if (rtx_equal_p (op0h, op2h))
+    x = op1h, op1h = op2h, op2h = x;
+  else
+    {
+      emit_move_insn (op0h, op1h);
+      op1h = op0h;
+    }
+  emit_insn (gen_adc_internal (op0h, op1h, op2h));
+  DONE;
+})
+
+(define_insn "andsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
+		(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%1, %0
+  and\t%2, %1, %0
+  and\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length" "2,2,3,4,5,6,2,5,5")]
+)
+
+(define_insn "*andsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
+		(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (set (reg CC_REG)
+	(compare (and:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%1, %0
+  and\t%2, %1, %0
+  and\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length" "2,2,3,4,5,6,2,5,5")]
+)
+
+;; Byte swap (single 32-bit value).
+(define_insn "bswapsi2"
+  [(set (match_operand:SI           0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand"  "r")))]
+  ""
+  "revl\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Byte swap (single 16-bit value).  Note - we ignore the swapping of the high 16-bits.
+(define_insn "bswaphi2"
+  [(set (match_operand:HI           0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand"  "r")))]
+  ""
+  "revw\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "divsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(div:SI (match_operand:SI 1 "register_operand"  "0,0,0,0,0,0")
+		(match_operand:SI 2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "div\t%Q2, %0"
+  [(set_attr "timings" "1111") ;; Strictly speaking the timing should be
+                               ;; 2222, but that is a worst case sceanario.
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r")
+	(udiv:SI (match_operand:SI 1 "register_operand"   "0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"  "r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "divu\t%Q2, %0"
+  [(set_attr "timings" "1010") ;; Strictly speaking the timing should be
+                               ;; 2020, but that is a worst case sceanario.
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+;; Note - these patterns are suppressed in big-endian mode because they
+;; generate a little endian result.  ie the most significant word of the
+;; result is placed in the higher numbered register of the destination
+;; register pair.
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI          0 "register_operand"  "=r,r,r,r,r,r")
+        (mult:DI (sign_extend:DI (match_operand:SI
+				  1 "register_operand"  "%0,0,0,0,0,0"))
+                 (sign_extend:DI (match_operand:SI
+				  2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q"))))]
+  "! TARGET_BIG_ENDIAN_DATA"
+  "emul\t%Q2, %0"
+  [(set_attr "length" "3,4,5,6,7,6")   
+   (set_attr "timings" "22,22,22,22,22,44")]
+)
+
+;; See comment for mulsidi3.
+;; Note - the zero_extends are to distinguish this pattern from the
+;; mulsidi3 pattern.  Immediate mode addressing is not supported
+;; because gcc cannot handle the expression: (zero_extend (const_int)).
+(define_insn "umulsidi3"
+  [(set (match_operand:DI                          0 "register_operand"	 "=r,r")
+        (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"  "%0,0"))
+                 (zero_extend:DI (match_operand:SI 2 "rx_compare_operand" "r,Q"))))]
+  "! TARGET_BIG_ENDIAN_DATA"
+  "emulu\t%Q2, %0"
+  [(set_attr "length" "3,6")
+   (set_attr "timings" "22,44")]
+)
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smax:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "max\t%Q2, %0"
+  [(set_attr "length" "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smin:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "min\t%Q2, %0"
+  [(set_attr "length"  "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "umax<small_int_modes:mode>3_u"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smax:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+		 (zero_extend:SI (match_operand:small_int_modes 2 "rx_minmaxex_operand"
+								"r,Sint08,Sint16,Sint24,i,Q"))))]
+  ""
+  "max\t%R2, %0"
+  [(set_attr "length"  "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "umin<small_int_modes:mode>3_ur"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smin:SI (zero_extend:SI (match_operand:small_int_modes 2 "rx_minmaxex_operand"
+								"r,Sint08,Sint16,Sint24,i,Q"))
+		 (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")))]
+  ""
+  "min\t%R2, %0"
+  [(set_attr "length"  "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "umax<small_int_modes:mode>3_ur"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smax:SI (zero_extend:SI (match_operand:small_int_modes 2 "rx_minmaxex_operand"
+								"r,Sint08,Sint16,Sint24,i,Q"))
+		 (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")))]
+  ""
+  "max\t%R2, %0"
+  [(set_attr "length"  "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_expand "umax<small_int_modes:mode>3"
+  [(set (match_dup 4)
+	(zero_extend:SI (match_operand:small_int_modes 1 "register_operand" "%0,0,0,0,0,0")))
+   (set (match_dup 3)
+	(smax:SI (match_dup 4)
+		 (match_operand:small_int_modes 2 "rx_source_operand"
+						"r,Sint08,Sint16,Sint24,i,Q")))
+   (set (match_operand:small_int_modes          0 "register_operand" "=r,r,r,r,r,r")
+	(match_dup 6))
+   ]
+  ""
+  "operands[3] = gen_reg_rtx (SImode);
+   operands[4] = gen_reg_rtx (SImode);
+   operands[5] = gen_reg_rtx (SImode);
+   operands[6] = gen_rtx_SUBREG (GET_MODE (operands[0]), operands[3],
+     TARGET_BIG_ENDIAN_DATA ? (GET_MODE (operands[0]) == HImode ? 2 : 3) : 0);
+   if (GET_CODE (operands[2]) != CONST_INT)
+     {
+       emit_move_insn (operands[5], gen_rtx_ZERO_EXTEND (SImode, operands[2]));
+       operands[2] = operands[5];
+     }
+  "
+)
+
+(define_expand "umin<small_int_modes:mode>3"
+  [(set (match_dup 4)
+	(zero_extend:SI (match_operand:small_int_modes 1 "register_operand" "%0,0,0,0,0,0")))
+   (set (match_dup 3)
+	(smin:SI (match_dup 4)
+		 (match_operand:small_int_modes 2 "rx_source_operand"
+						"r,Sint08,Sint16,Sint24,i,Q")))
+   (set (match_operand:small_int_modes          0 "register_operand" "=r,r,r,r,r,r")
+	(match_dup 6))
+   ]
+  ""
+  "operands[3] = gen_reg_rtx (SImode);
+   operands[4] = gen_reg_rtx (SImode);
+   operands[5] = gen_reg_rtx (SImode);
+   operands[6] = gen_rtx_SUBREG (GET_MODE (operands[0]), operands[3],
+     TARGET_BIG_ENDIAN_DATA ? (GET_MODE (operands[0]) == HImode ? 2 : 3) : 0);
+   if (GET_CODE (operands[2]) != CONST_INT)
+     {
+       emit_move_insn (operands[5], gen_rtx_ZERO_EXTEND (SImode, operands[2]));
+       operands[2] = operands[5];
+     }
+   "
+)
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+        (mult:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,0,r,r")
+                 (match_operand:SI 2 "rx_source_operand"
+				   "r,Uint04,Sint08,Sint16,Sint24,i,Q,0,r")))]
+  ""
+  "@
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%1, %0
+  mul\t%2, %1, %0"
+  [(set_attr "length"  "2,2,3,4,5,6,5,2,3")
+   (set_attr "timings" "11,11,11,11,11,11,33,11,11")]
+)
+
+(define_insn "negsi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (neg:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  neg\t%0
+  neg\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+;; Note that the O and C flags are not set as per a normal compare,
+;; and thus are unusable in that context.
+(define_insn "*negsi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (neg:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (set (reg CC_REG)
+	(compare (neg:SI (match_dup 1))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  neg\t%0
+  neg\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+	(not:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  not\t%0
+  not\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "*one_cmplsi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+	(not:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (set (reg CC_REG)
+	(compare (not:SI (match_dup 1))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  not\t%0
+  not\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
+	        (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%Q2, %0
+  or\t%1, %0
+  or\t%2, %1, %0
+  or\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,2,3,5")]
+)
+
+(define_insn "*iorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
+	        (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (set (reg CC_REG)
+	(compare (ior:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%Q2, %0
+  or\t%1, %0
+  or\t%2, %1, %0
+  or\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,2,3,5")]
+)
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand"  "0")
+		   (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "rotl\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*rotlsi3_flags"
+  [(set (match_operand:SI            0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand"  "0")
+		   (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (set (reg CC_REG)
+	(compare (rotate:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "rotl\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r")
+	(rotatert:SI (match_operand:SI 1 "register_operand"  "0")
+		     (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "rotr\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*rotrsi3_flags"
+  [(set (match_operand:SI              0 "register_operand" "=r")
+	(rotatert:SI (match_operand:SI 1 "register_operand"  "0")
+		     (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (set (reg CC_REG)
+	(compare (rotatert:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "rotr\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  shar\t%2, %0
+  shar\t%2, %0
+  shar\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "*ashrsi3_flags"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (set (reg CC_REG)
+	(compare (ashiftrt:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  shar\t%2, %0
+  shar\t%2, %0
+  shar\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  shlr\t%2, %0
+  shlr\t%2, %0
+  shlr\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "*lshrsi3_flags"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (set (reg CC_REG)
+	(compare (lshiftrt:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  shlr\t%2, %0
+  shlr\t%2, %0
+  shlr\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+	           (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  shll\t%2, %0
+  shll\t%2, %0
+  shll\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "*ashlsi3_flags"
+  [(set (match_operand:SI            0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+	           (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (set (reg CC_REG)
+	(compare (ashift:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  shll\t%2, %0
+  shll\t%2, %0
+  shll\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+;; Saturate to 32-bits
+(define_insn_and_split "ssaddsi3"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"  "r")
+		    (match_operand:SI 2 "rx_source_operand" "riQ")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 1) (match_dup 2)))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC
+		     (plus:SI (match_dup 1) (match_dup 2))
+		     (const_int 0)))])
+   (set (match_dup 0)
+	(unspec:SI [(match_dup 0) (reg:CC CC_REG)] 
+		   UNSPEC_BUILTIN_SAT))]
+   ""
+)
+
+(define_insn "*sat"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"  "0")
+		    (reg:CC CC_REG)]
+		   UNSPEC_BUILTIN_SAT))]
+  "reload_completed"
+  "sat\t%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI           0 "register_operand" "=r,r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
+		  (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  sub\t%2, %0
+  sub\t%2, %0
+  add\t%N2, %0
+  sub\t%2, %1, %0
+  sub\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,33")
+   (set_attr "length" "2,2,6,3,5")]
+)
+
+;; Note that the O flag is set as if (compare op1 op2) not for
+;; what is described here, (compare op0 0).
+(define_insn "*subsi3_flags"
+  [(set (match_operand:SI           0 "register_operand" "=r,r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
+		  (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q")))
+   (set (reg CC_REG)
+	(compare (minus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
+  "@
+  sub\t%2, %0
+  sub\t%2, %0
+  add\t%N2, %0
+  sub\t%2, %1, %0
+  sub\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,33")
+   (set_attr "length" "2,2,6,3,5")]
+)
+
+;; A helper to expand the above with the CC_MODE filled in.
+(define_expand "subsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (minus:SI (match_operand:SI 1 "register_operand")
+			     (match_operand:SI 2 "rx_source_operand")))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (minus:SI (match_dup 1) (match_dup 2))
+				   (const_int 0)))])]
+)
+
+(define_insn "sbb_internal"
+  [(set (match_operand:SI     0 "register_operand"   "=r,r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI 1 "register_operand"   " 0,0")
+	    (match_operand:SI 2 "rx_compare_operand" " r,Q"))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))
+    (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "sbb\t%2, %0"
+  [(set_attr "timings" "11,33")
+   (set_attr "length"  "3,6")]
+)
+
+(define_insn "*sbb_flags"
+  [(set (match_operand:SI     0 "register_operand"   "=r,r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI 1 "register_operand"   " 0,0")
+	    (match_operand:SI 2 "rx_compare_operand" " r,Q"))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))
+   (set (reg CC_REG)
+	(compare
+	  (minus:SI
+	    (minus:SI (match_dup 1) (match_dup 2))
+	    (geu:SI (reg:CC CC_REG) (const_int 0)))
+	  (const_int 0)))]
+  "reload_completed"
+  "sbb\t%2, %0"
+  [(set_attr "timings" "11,33")
+   (set_attr "length"  "3,6")]
+)
+
+(define_expand "subdi3"
+  [(set (match_operand:DI           0 "register_operand")
+	(minus:DI (match_operand:DI 1 "register_operand")
+		  (match_operand:DI 2 "register_operand")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  emit_insn (gen_subdi3_internal (op0l, op0h, op1l, op2l, op1h, op2h));
+  DONE;
+})
+
+(define_insn_and_split "subdi3_internal"
+  [(set (match_operand:SI          0 "register_operand"   "=&r,&r")
+	(minus:SI (match_operand:SI 2 "register_operand"  "  0, r")
+		  (match_operand:SI 3 "rx_compare_operand" "rQ, r")))
+   (set (match_operand:SI          1 "register_operand"   "= r, r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI      4 "register_operand"   "  1, 1")
+	    (match_operand:SI      5 "rx_compare_operand" " rQ,rQ"))
+	  (geu:SI (match_dup 2) (match_dup 3))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  emit_insn (gen_subsi3_flags (operands[0], operands[2], operands[3]));
+  emit_insn (gen_sbb_internal (operands[1], operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+	        (match_operand:SI 2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "xor\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+(define_insn "*xorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+	        (match_operand:SI 2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q")))
+   (set (reg CC_REG)
+	(compare (xor:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "xor\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+;; A set of peepholes to catch extending loads followed by arithmetic operations.
+;; We use iterators where possible to reduce the amount of typing and hence the
+;; possibilities for typos.
+
+(define_code_iterator extend_types [(zero_extend "") (sign_extend "")])
+(define_code_attr     letter       [(zero_extend "R") (sign_extend "Q")])
+
+(define_code_iterator memex_commutative [(plus "") (and "") (ior "") (xor "")])
+(define_code_iterator memex_noncomm     [(div "") (udiv "") (minus "")])
+(define_code_iterator memex_nocc        [(smax "") (smin "") (mult "")])
+
+(define_code_attr     op                [(plus "add") (and "and") (div "div") (udiv "divu") (smax "max") (smin "min") (mult "mul") (ior "or") (minus "sub") (xor "xor")])
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+		   (memex_commutative:SI (match_dup 0)
+					 (match_dup 2)))
+	      (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(parallel [(set:SI (match_dup 2)
+		      (memex_commutative:SI (match_dup 2)
+					    (extend_types:SI (match_dup 1))))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+		   (memex_commutative:SI (match_dup 2)
+					 (match_dup 0)))
+	      (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(parallel [(set:SI (match_dup 2)
+		      (memex_commutative:SI (match_dup 2)
+					    (extend_types:SI (match_dup 1))))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+		   (memex_noncomm:SI (match_dup 2)
+				     (match_dup 0)))
+	      (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(parallel [(set:SI (match_dup 2)
+		      (memex_noncomm:SI (match_dup 2)
+					(extend_types:SI (match_dup 1))))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (set (match_operand:SI                               2 "register_operand")
+	(memex_nocc:SI (match_dup 0)
+		       (match_dup 2)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(set:SI (match_dup 2)
+	   (memex_nocc:SI (match_dup 2)
+			  (extend_types:SI (match_dup 1))))]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (set (match_operand:SI                               2 "register_operand")
+	(memex_nocc:SI (match_dup 2)
+		       (match_dup 0)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(set:SI (match_dup 2)
+	   (memex_nocc:SI (match_dup 2)
+			  (extend_types:SI (match_dup 1))))]
+)
+
+(define_insn "<memex_commutative:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                                     0 "register_operand" "=r")
+	(memex_commutative:SI (match_operand:SI                               1 "register_operand" "%0")
+ 		              (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))
+   (clobber (reg:CC CC_REG))]
+  "(optimize < 3 || optimize_size)"
+  "<memex_commutative:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; This length is corrected in rx_adjust_insn_length
+)
+
+(define_insn "<memex_noncomm:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                                 0 "register_operand" "=r")
+	(memex_noncomm:SI (match_operand:SI                               1 "register_operand" "0")
+                          (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))
+   (clobber (reg:CC CC_REG))]
+  "(optimize < 3 || optimize_size)"
+  "<memex_noncomm:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; This length is corrected in rx_adjust_insn_length
+)
+
+(define_insn "<memex_nocc:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                              0 "register_operand" "=r")
+	(memex_nocc:SI (match_operand:SI                               1 "register_operand" "%0")
+		       (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))]
+  "(optimize < 3 || optimize_size)"
+  "<memex_nocc:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; This length is corrected in rx_adjust_insn_length
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI                   2 "register_operand")
+		    (match_dup 0)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_dup 2)
+		    (extend_types:SI (match_dup 1))))]
+)
+
+;; Convert:
+;;   (set (reg1) (sign_extend (mem))
+;;   (set (reg2) (zero_extend (reg1))
+;; into
+;;   (set (reg2) (zero_extend (mem)))
+(define_peephole2
+  [(set (match_operand:SI                              0 "register_operand")
+	(sign_extend:SI (match_operand:small_int_modes 1 "memory_operand")))
+   (set (match_operand:SI                              2 "register_operand")
+	(zero_extend:SI (match_operand:small_int_modes 3 "register_operand")))]
+  "REGNO (operands[0]) == REGNO (operands[3])
+   && (REGNO (operands[0]) == REGNO (operands[2])
+       || peep2_regno_dead_p (2, REGNO (operands[0])))"
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))]
+)
+
+;; Remove the redundant sign extension from:
+;;   (set (reg) (extend (mem)))
+;;   (set (reg) (extend (reg)))
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "memory_operand")))
+   (set (match_dup 0)
+	(extend_types:SI (match_operand:small_int_modes 2 "register_operand")))]
+  "REGNO (operands[0]) == REGNO (operands[2])"
+  [(set (match_dup 0) (extend_types:SI (match_dup 1)))]
+)
+
+(define_insn "comparesi3_<extend_types:code><small_int_modes:mode>"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI                               0 "register_operand" "r")
+		    (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand" "Q"))))]
+  "(optimize < 3 || optimize_size)"
+  "cmp\t%<extend_types:letter>1, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; This length is corrected in rx_adjust_insn_length
+)
+
+;; Floating Point Instructions
+
+(define_insn "addsf3"
+  [(set (match_operand:SF          0 "register_operand"  "=r,r,r")
+	(plus:SF (match_operand:SF 1 "register_operand"  "%0,0,0")
+		 (match_operand:SF 2 "rx_source_operand"  "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fadd\t%2, %0"
+  [(set_attr "timings" "44,44,66")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "divsf3"
+  [(set (match_operand:SF         0 "register_operand" "=r,r,r")
+	(div:SF (match_operand:SF 1 "register_operand"  "0,0,0")
+		(match_operand:SF 2 "rx_source_operand" "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fdiv\t%2, %0"
+  [(set_attr "timings" "1616,1616,1818")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF          0 "register_operand" "=r,r,r")
+	(mult:SF (match_operand:SF 1 "register_operand" "%0,0,0")
+		(match_operand:SF  2 "rx_source_operand" "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fmul\t%2, %0"
+  [(set_attr "timings" "33,33,55")
+   (set_attr "length"  "3,7,5")]
+)
+
+(define_insn "subsf3"
+  [(set (match_operand:SF           0 "register_operand" "=r,r,r")
+	(minus:SF (match_operand:SF 1 "register_operand"  "0,0,0")
+		  (match_operand:SF 2 "rx_source_operand" "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fsub\t%Q2, %0"
+  [(set_attr "timings" "44,44,66")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r")
+	(fix:SI (match_operand:SF 1 "rx_compare_operand" "r,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "ftoi\t%Q1, %0"
+  [(set_attr "timings" "22,44")
+   (set_attr "length" "3,5")]
+)
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF           0 "register_operand"  "=r,r")
+	(float:SF (match_operand:SI 1 "rx_compare_operand" "r,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "itof\t%Q1, %0"
+  [(set_attr "timings" "22,44")
+   (set_attr "length" "3,6")]
+)
+
+;; Bit manipulation instructions.
+
+;; ??? The *_in_memory patterns will not be matched without further help.
+;; At one time we had the insv expander generate them, but I suspect that
+;; in general we get better performance by exposing the register load to
+;; the optimizers.
+;;
+;; An alternate solution would be to re-organize these patterns such
+;; that allow both register and memory operands.  This would allow the
+;; register allocator to spill and not load the register operand.  This
+;; would be possible only for operations for which we have a constant
+;; bit offset, so that we can adjust the address by ofs/8 and replace
+;; the offset in the insn by ofs%8.
+
+(define_insn "*bitset"
+  [(set (match_operand:SI                    0 "register_operand" "=r")
+	(ior:SI (ashift:SI (const_int 1)
+			   (match_operand:SI 1 "rx_shift_operand" "ri"))
+		(match_operand:SI            2 "register_operand" "0")))]
+  ""
+  "bset\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*bitset_in_memory"
+  [(set (match_operand:QI                    0 "rx_restricted_mem_operand" "+Q")
+	(ior:QI (ashift:QI (const_int 1)
+			   (match_operand:QI 1 "nonmemory_operand" "ri"))
+		(match_dup 0)))]
+  ""
+  "bset\t%1, %0.B"
+  [(set_attr "length" "3")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "*bitinvert"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (ashift:SI (const_int 1)
+			   (match_operand:SI 1 "rx_shift_operand" "ri"))
+		(match_operand:SI 2 "register_operand" "0")))]
+  ""
+  "bnot\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*bitinvert_in_memory"
+  [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
+	(xor:QI (ashift:QI (const_int 1)
+			   (match_operand:QI 1 "nonmemory_operand" "ri"))
+		(match_dup 0)))]
+  ""
+  "bnot\t%1, %0.B"
+  [(set_attr "length" "5")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "*bitclr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (not:SI
+		  (ashift:SI
+		    (const_int 1)
+		    (match_operand:SI 1 "rx_shift_operand" "ri")))
+		(match_operand:SI 2 "register_operand" "0")))]
+  ""
+  "bclr\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*bitclr_in_memory"
+  [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
+	(and:QI (not:QI
+		  (ashift:QI
+		    (const_int 1)
+		    (match_operand:QI 1 "nonmemory_operand" "ri")))
+		(match_dup 0)))]
+  ""
+  "bclr\t%1, %0.B"
+  [(set_attr "length" "3")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "*insv_imm"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "rx_shift_operand" "ri"))
+	(match_operand:SI 2 "const_int_operand" ""))]
+  ""
+{
+  if (INTVAL (operands[2]) & 1)
+    return "bset\t%1, %0";
+  else
+    return "bclr\t%1, %0";
+}
+  [(set_attr "length" "3")]
+)
+
+(define_insn_and_split "rx_insv_reg"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operand:SI 2 "register_operand" "r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1))
+	(match_dup 3))]
+{
+  rtx flags, x;
+
+  /* Emit tst #1, op2.  */
+  flags = gen_rtx_REG (CC_ZSmode, CC_REG);
+  x = gen_rtx_AND (SImode, operands[2], const1_rtx);
+  x = gen_rtx_COMPARE (CC_ZSmode, x, const0_rtx);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  /* Emit bmne.  */
+  operands[3] = gen_rtx_NE (SImode, flags, const0_rtx);
+})
+
+(define_insn_and_split "*insv_cond"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operator:SI 4 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "r")
+	   (match_operand:SI 3 "rx_source_operand" "riQ")]))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1))
+	(match_dup 4))]
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (CCmode, CC_REG);
+  x = gen_rtx_COMPARE (CCmode, operands[2], operands[3]);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[4]), SImode,
+			        flags, const0_rtx);
+})
+
+(define_insn "*bmcc"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operator:SI 2 "comparison_operator"
+	  [(reg CC_REG) (const_int 0)]))]
+  "reload_completed"
+  "bm%B2\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Work around the fact that X=Y<0 is preferentially expanded as a shift.
+(define_insn_and_split "*insv_cond_lt"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operator:SI 3 "rshift_operator"
+	  [(match_operand:SI 2 "register_operand" "r")
+	   (const_int 31)]))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1))
+		   (lt:SI (match_dup 2) (const_int 0)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+)
+
+(define_expand "insv"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand")	;; Destination
+	  (match_operand:SI 1 "const_int_operand")	;; # of bits to set
+	  (match_operand:SI 2 "nonmemory_operand"))	;; Starting bit
+	(match_operand:SI   3 "nonmemory_operand"))]	;; Bits to insert
+  ""
+{
+  /* We only handle single-bit inserts.  */
+  if (!CONST_INT_P (operands[1]) || INTVAL (operands[1]) != 1)
+    FAIL;
+
+  /* Either the bit to insert or the position must be constant.  */
+  if (CONST_INT_P (operands[3]))
+    operands[3] = GEN_INT (INTVAL (operands[3]) & 1);
+  else if (CONST_INT_P (operands[2]))
+    {
+      emit_insn (gen_rx_insv_reg (operands[0], operands[2], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+;; Atomic exchange operation.
+
+(define_insn "sync_lock_test_and_setsi"
+  [(set (match_operand:SI 0 "register_operand"   "=r,r")
+	(match_operand:SI 1 "rx_compare_operand" "=r,Q"))
+   (set (match_dup 1)
+	(match_operand:SI 2 "register_operand"    "0,0"))]
+  ""
+  "xchg\t%1, %0"
+  [(set_attr "length" "3,6")
+   (set_attr "timings" "22")]
+)
+
+;; Block move functions.
+
+(define_expand "movstr"
+  [(set (match_operand:BLK 1 "memory_operand")    ;; Dest
+	(match_operand:BLK 2 "memory_operand"))   ;; Source
+   (use (match_operand:SI  0 "register_operand")) ;; Updated Dest
+  ]
+  ""
+  {
+    rtx addr1 = gen_rtx_REG (SImode, 1);
+    rtx addr2 = gen_rtx_REG (SImode, 2);
+    rtx len   = gen_rtx_REG (SImode, 3);
+    rtx dest_copy = gen_reg_rtx (SImode);
+
+    emit_move_insn (len, GEN_INT (-1));
+    emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+    emit_move_insn (dest_copy, addr1);
+    emit_insn (gen_rx_movstr ());
+    emit_move_insn (len, GEN_INT (-1));
+    emit_insn (gen_rx_strend (operands[0], dest_copy));
+    DONE;
+  }
+)
+
+(define_insn "rx_movstr"
+  [(set (mem:BLK (reg:SI 1))
+	(mem:BLK (reg:SI 2)))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVSTR)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))]
+  ""
+  "smovu"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_insn "rx_strend"
+  [(set (match_operand:SI                      0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand"  "r")
+				(reg:SI 3)] UNSPEC_STRLEN))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:CC CC_REG))
+   ]
+  ""
+  "mov\t%1, r1\n\tmov\t#0, r2\n\tsuntil.b\n\tmov\tr1, %0\n\tsub\t#1, %0"
+  [(set_attr "length" "10")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "movmemsi"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand")    ;; Dest
+	  (match_operand:BLK 1 "memory_operand"))   ;; Source
+     (use (match_operand:SI  2 "register_operand")) ;; Length in bytes
+     (match_operand          3 "immediate_operand") ;; Align
+     (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)]
+    )]
+  ""
+  {
+    rtx addr1 = gen_rtx_REG (SImode, 1);
+    rtx addr2 = gen_rtx_REG (SImode, 2);
+    rtx len   = gen_rtx_REG (SImode, 3);
+
+    /* Do not use when the source or destination are volatile - the SMOVF
+       instruction will read and write in word sized blocks, which may be
+       outside of the valid address range.  */
+    if (MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))
+      FAIL;
+    if (MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))
+      FAIL;
+
+    if (REG_P (operands[0]) && (REGNO (operands[0]) == 2
+				      || REGNO (operands[0]) == 3))
+      FAIL;
+    if (REG_P (operands[1]) && (REGNO (operands[1]) == 1
+				      || REGNO (operands[1]) == 3))
+      FAIL;
+    if (REG_P (operands[2]) && (REGNO (operands[2]) == 1
+				      || REGNO (operands[2]) == 2))
+      FAIL;
+
+    emit_move_insn (addr1, force_operand (XEXP (operands[0], 0), NULL_RTX));
+    emit_move_insn (addr2, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[2], NULL_RTX));
+    operands[0] = replace_equiv_address_nv (operands[0], addr1);
+    operands[1] = replace_equiv_address_nv (operands[1], addr2);
+    emit_insn (gen_rx_movmem ());
+    DONE;
+  }
+)
+
+(define_insn "rx_movmem"
+  [(set (mem:BLK (reg:SI 1))
+	(mem:BLK (reg:SI 2)))
+   (use (reg:SI 3))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))]
+  ""
+  "smovf"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "setmemsi"
+  [(set (match_operand:BLK 0 "memory_operand")     ;; Dest
+        (match_operand:QI  2 "nonmemory_operand")) ;; Value
+   (use (match_operand:SI  1 "nonmemory_operand")) ;; Length
+   (match_operand          3 "immediate_operand")  ;; Align
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_SETMEM)]
+  ""
+  {
+    rtx addr = gen_rtx_REG (SImode, 1);
+    rtx val  = gen_rtx_REG (QImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+
+    emit_move_insn (addr, force_operand (XEXP (operands[0], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[1], NULL_RTX));
+    emit_move_insn (val, operands[2]);
+    emit_insn (gen_rx_setmem ());
+    DONE;
+  }
+)
+
+(define_insn "rx_setmem"
+  [(set (mem:BLK (reg:SI 1))
+	(unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_SETMEM))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 3))]
+  ""
+  "sstr.b"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI                       0 "register_operand")   ;; Result
+	(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand")     ;; String1
+			     (match_operand:BLK 2 "memory_operand")]    ;; String2
+			    UNSPEC_CMPSTRN))
+   (use (match_operand:SI                       3 "register_operand"))  ;; Max Length
+   (match_operand:SI                            4 "immediate_operand")] ;; Known Align
+  ""
+  {
+    rtx str1 = gen_rtx_REG (SImode, 1);
+    rtx str2 = gen_rtx_REG (SImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+  
+    emit_move_insn (str1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[3], NULL_RTX));
+
+    emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "cmpstrsi"
+  [(set (match_operand:SI                       0 "register_operand")   ;; Result
+	(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand")     ;; String1
+			     (match_operand:BLK 2 "memory_operand")]    ;; String2
+			    UNSPEC_CMPSTRN))
+   (match_operand:SI                            3 "immediate_operand")] ;; Known Align
+  ""
+  {
+    rtx str1 = gen_rtx_REG (SImode, 1);
+    rtx str2 = gen_rtx_REG (SImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+  
+    emit_move_insn (str1, force_reg (SImode, XEXP (operands[1], 0)));
+    emit_move_insn (str2, force_reg (SImode, XEXP (operands[2], 0)));
+    emit_move_insn (len, GEN_INT (-1));
+
+    emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_insn "rx_cmpstrn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(reg:SI 1) (reg:SI 2) (reg:SI 3)]
+			    UNSPEC_CMPSTRN))
+   (use (match_operand:BLK 1 "memory_operand" "m"))
+   (use (match_operand:BLK 2 "memory_operand" "m"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "scmpu		; Perform the string comparison
+   mov     #-1, %0      ; Set up -1 result (which cannot be created
+                        ; by the SC insn)
+   bnc	   ?+		; If Carry is not set skip over
+   scne.L  %0		; Set result based on Z flag
+?:              	
+"
+  [(set_attr "length" "9")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+;;   Builtin Functions
+;;
+;; GCC does not have the ability to generate the following instructions
+;; on its own so they are provided as builtins instead.  To use them from
+;; a program for example invoke them as __builtin_rx_<insn_name>.  For
+;; example:
+;;
+;;    int short_byte_swap (int arg) { return __builtin_rx_revw (arg); }
+
+;;---------- Accumulator Support ------------------------
+
+;; Multiply & Accumulate (high)
+(define_insn "machi"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MACHI)]
+  ""
+  "machi\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply & Accumulate (low)
+(define_insn "maclo"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MACLO)]
+  ""
+  "maclo\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply (high)
+(define_insn "mulhi"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MULHI)]
+  ""
+  "mulhi\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply (low)
+(define_insn "mullo"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MULLO)]
+  ""
+  "mullo\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Move from Accumulator (high)
+(define_insn "mvfachi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)]
+		   UNSPEC_BUILTIN_MVFACHI))]
+  ""
+  "mvfachi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move from Accumulator (middle)
+(define_insn "mvfacmi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)]
+		   UNSPEC_BUILTIN_MVFACMI))]
+  ""
+  "mvfacmi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to Accumulator (high)
+(define_insn "mvtachi"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACHI)]
+  ""
+  "mvtachi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to Accumulator (low)
+(define_insn "mvtaclo"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACLO)]
+  ""
+  "mvtaclo\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Round Accumulator
+(define_insn "racw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_RACW)]
+  ""
+  "racw\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Repeat multiply and accumulate
+(define_insn "rmpa"
+  [(unspec:SI [(const_int 0) (reg:SI 1) (reg:SI 2) (reg:SI 3)
+	       (reg:SI 4) (reg:SI 5) (reg:SI 6)]
+	      UNSPEC_BUILTIN_RMPA)
+  (clobber (reg:SI 1))
+  (clobber (reg:SI 2))
+  (clobber (reg:SI 3))]
+  ""
+  "rmpa"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1010")]
+)
+
+;;---------- Arithmetic ------------------------
+
+;; Byte swap (two 16-bit values).
+(define_insn "revw"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"  "r")]
+		   UNSPEC_BUILTIN_REVW))]
+  ""
+  "revw\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Round to integer.
+(define_insn "lrintsf2"
+  [(set (match_operand:SI             0 "register_operand"  "=r,r")
+	(unspec:SI [(match_operand:SF 1 "rx_compare_operand" "r,Q")]
+		   UNSPEC_BUILTIN_ROUND))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "round\t%1, %0"
+  [(set_attr "timings" "22,44")   
+   (set_attr "length" "3,5")]
+)
+
+;;---------- Control Registers ------------------------
+
+;; Clear Processor Status Word
+(define_insn "clrpsw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+	      UNSPEC_BUILTIN_CLRPSW)
+   (clobber (reg:CC CC_REG))]
+  ""
+  "clrpsw\t%F0"
+  [(set_attr "length" "2")]
+)
+
+;; Set Processor Status Word
+(define_insn "setpsw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+	      UNSPEC_BUILTIN_SETPSW)
+   (clobber (reg:CC CC_REG))]
+  ""
+  "setpsw\t%F0"
+  [(set_attr "length" "2")]
+)
+
+;; Move from control register
+(define_insn "mvfc"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")]
+		   UNSPEC_BUILTIN_MVFC))]
+  ""
+  "mvfc\t%C1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to control register
+(define_insn "mvtc"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i,i")
+	       (match_operand:SI 1 "nonmemory_operand" "r,i")]
+	      UNSPEC_BUILTIN_MVTC)]
+  ""
+  "mvtc\t%1, %C0"
+  [(set_attr "length" "3,7")]
+  ;; Ignore possible clobbering of the comparison flags in the
+  ;; PSW register.  This is a cc0 target so any cc0 setting
+  ;; instruction will always be paired with a cc0 user, without
+  ;; the possibility of this instruction being placed in between
+  ;; them.
+)
+
+;; Move to interrupt priority level
+(define_insn "mvtipl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "Uint04")]
+	      UNSPEC_BUILTIN_MVTIPL)]
+  ""
+  "mvtipl\t%0"
+  [(set_attr "length" "3")]
+)
+
+;;---------- Interrupts ------------------------
+
+;; Break
+(define_insn "brk"
+  [(unspec_volatile [(const_int 0)]
+		    UNSPEC_BUILTIN_BRK)]
+  ""
+  "brk"
+  [(set_attr "length" "1")
+   (set_attr "timings" "66")]
+)
+
+;; Interrupt
+(define_insn "int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_INT)]
+  ""
+  "int\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Wait
+(define_insn "wait"
+  [(unspec_volatile [(const_int 0)]
+		    UNSPEC_BUILTIN_WAIT)]
+  ""
+  "wait"
+  [(set_attr "length" "2")]
+)
+
+;;---------- CoProcessor Support ------------------------
+
+;; FIXME: The instructions are currently commented out because
+;; the bit patterns have not been finalized, so the assembler
+;; does not support them.  Once they are decided and the assembler
+;; supports them, enable the instructions here.
+
+;; Move from co-processor register
+(define_insn "mvfcp"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "immediate_operand" "i")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+		   UNSPEC_BUILTIN_MVFCP))]
+  ""
+  "; mvfcp\t%1, %0, %2"
+  [(set_attr "length" "5")]
+)
+
+;;---------- Misc ------------------------
+
+;; Required by cfglayout.c...
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "1")]
+)
+
+(define_expand "pid_addr"
+  [(plus:SI (match_operand:SI 0)
+	    (const:SI (unspec:SI [(match_operand:SI 1)] UNSPEC_PID_ADDR)))]
+  ""
+  ""
+)
+
+(define_insn "movdi"
+  [(set:DI (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	   (match_operand:DI 1 "general_operand"      "rmi"))]
+  "TARGET_ENABLE_LRA"
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "16")
+   (set_attr "timings" "22")]
+)
+
+(define_insn "movdf"
+  [(set:DF (match_operand:DF 0 "nonimmediate_operand" "=rm")
+	   (match_operand:DF 1 "general_operand"      "rmi"))]
+  "TARGET_ENABLE_LRA"
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "16")
+   (set_attr "timings" "22")]
+)
diff --git a/gcc-4.9/gcc/config/rx/rx.opt b/gcc-4.9/gcc/config/rx/rx.opt
new file mode 100644
index 000000000..53e572987
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/rx.opt
@@ -0,0 +1,141 @@
+; Command line options for the Renesas RX port of GCC.
+; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+;---------------------------------------------------
+
+HeaderInclude
+config/rx/rx-opts.h
+
+; The default is -fpu -m32bit-doubles.
+
+m64bit-doubles
+Target RejectNegative Mask(64BIT_DOUBLES) Report
+Store doubles in 64 bits.
+
+m32bit-doubles
+Target RejectNegative InverseMask(64BIT_DOUBLES) Report
+Stores doubles in 32 bits.  This is the default.
+
+nofpu
+Target RejectNegative Alias(mnofpu)
+Disable the use of RX FPU instructions.  
+
+mnofpu
+Target RejectNegative Mask(NO_USE_FPU) Report Undocumented
+
+fpu
+Target RejectNegative InverseMask(NO_USE_FPU) Report
+Enable the use of RX FPU instructions.  This is the default.
+
+;---------------------------------------------------
+
+mcpu=
+Target RejectNegative Joined Var(rx_cpu_type) Report ToLower Enum(rx_cpu_types) Init(RX600)
+Specify the target RX cpu type.
+
+Enum
+Name(rx_cpu_types) Type(enum rx_cpu_types)
+
+EnumValue
+Enum(rx_cpu_types) String(rx610) Value(RX610)
+
+EnumValue
+Enum(rx_cpu_types) String(rx200) Value(RX200)
+
+EnumValue
+Enum(rx_cpu_types) String(rx600) Value(RX600)
+
+EnumValue
+Enum(rx_cpu_types) String(rx100) Value(RX100)
+
+;---------------------------------------------------
+
+mbig-endian-data
+Target RejectNegative Mask(BIG_ENDIAN_DATA) Report
+Data is stored in big-endian format.
+
+mlittle-endian-data
+Target RejectNegative InverseMask(BIG_ENDIAN_DATA) Report
+Data is stored in little-endian format.  (Default).
+
+;---------------------------------------------------
+
+msmall-data-limit=
+Target RejectNegative Joined UInteger Var(rx_small_data_limit) Init(0)
+Maximum size of global and static variables which can be placed into the small data area.
+
+;---------------------------------------------------
+
+msim
+Target
+Use the simulator runtime.
+
+;---------------------------------------------------
+
+mas100-syntax
+Target Mask(AS100_SYNTAX) Report
+Generate assembler output that is compatible with the Renesas AS100 assembler.  This may restrict some of the compiler's capabilities.  The default is to generate GAS compatible syntax.
+
+;---------------------------------------------------
+
+mrelax
+Target
+Enable linker relaxation.
+
+;---------------------------------------------------
+
+mmax-constant-size=
+Target RejectNegative Joined UInteger Var(rx_max_constant_size) Init(0)
+Maximum size in bytes of constant values allowed as operands.
+
+;---------------------------------------------------
+
+mint-register=
+Target RejectNegative Joined UInteger Var(rx_deferred_options) Defer
+Specifies the number of registers to reserve for interrupt handlers.
+
+;---------------------------------------------------
+
+msave-acc-in-interrupts
+Target Mask(SAVE_ACC_REGISTER)
+Specifies whether interrupt functions should save and restore the accumulator register.
+
+;---------------------------------------------------
+
+mpid
+Target Mask(PID)
+Enables Position-Independent-Data (PID) mode.
+
+;---------------------------------------------------
+
+mwarn-multiple-fast-interrupts
+Target Report Var(rx_warn_multiple_fast_interrupts) Init(1) Warning
+Warn when multiple, different, fast interrupt handlers are in the compilation unit.
+
+mgcc-abi
+Target RejectNegative Report Mask(GCC_ABI)
+Enable the use of the old, broken, ABI where all stacked function arguments are aligned to 32-bits.
+
+mrx-abi
+Target RejectNegative Report InverseMask(GCC_ABI)
+Enable the use the standard RX ABI where all stacked function arguments are naturally aligned.  This is the default.
+
+mlra
+Target Report Mask(ENABLE_LRA)
+Enable the use of the LRA register allocator.
diff --git a/gcc-4.9/gcc/config/rx/t-rx b/gcc-4.9/gcc/config/rx/t-rx
new file mode 100644
index 000000000..e7f6e82e1
--- /dev/null
+++ b/gcc-4.9/gcc/config/rx/t-rx
@@ -0,0 +1,34 @@
+# Makefile fragment for building GCC for the Renesas RX target.
+# Copyright (C) 2008-2014 Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See
+# the GNU General Public License for more details.
+#
+# You should have received a copy of the  GNU General Public
+# License along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Enable multilibs:
+
+MULTILIB_OPTIONS    = m64bit-doubles  nofpu        mbig-endian-data  mpid
+MULTILIB_DIRNAMES   =  64-bit-double  no-fpu-libs   big-endian-data   pid
+
+# If necessary uncomment the next two lines to generate multilibs
+# using the old, broken, ABI.
+# MULTILIB_OPTIONS    += mgcc-abi
+# MULTILIB_DIRNAMES   +=  gcc-abi
+
+MULTILIB_MATCHES    = nofpu=mnofpu  nofpu=mcpu?rx200  nofpu=mcpu?rx100
+
+MULTILIB_EXCEPTIONS =
+MULTILIB_EXTRA_OPTS = 
diff --git a/gcc-4.9/gcc/config/s390/2064.md b/gcc-4.9/gcc/config/s390/2064.md
new file mode 100644
index 000000000..a17a8d96b
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/2064.md
@@ -0,0 +1,135 @@
+;; Scheduling description for z900 (cpu 2064).
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                  Ulrich Weigand (uweigand@de.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; References:
+;;   The microarchitecture of the IBM eServer z900 processor.
+;;   E.M. Schwarz et al.
+;;   IBM Journal of Research and Development Vol. 46 No 4/5, 2002.
+;;
+;;            z900 (cpu 2064) pipeline
+;;
+;;                 dec
+;;              --> | <---
+;;  LA bypass  |  agen    |
+;;             |    |     |
+;;              --- c1    |  Load bypass
+;;                  |     |
+;;                  c2----
+;;                  |
+;;                  e1
+;;                  |
+;;                  wr
+
+;; This scheduler description is also used for the g5 and g6.
+
+(define_automaton "z_ipu")
+(define_cpu_unit "z_e1"   "z_ipu")
+(define_cpu_unit "z_wr"   "z_ipu")
+
+
+(define_insn_reservation "z_la" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "la"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_larl" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "larl"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_load" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "load"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_store" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "store"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_sem" 2
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "sem"))
+  "z_e1*2,z_wr")
+
+(define_insn_reservation "z_call" 5
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "jsr"))
+  "z_e1*5,z_wr")
+
+(define_insn_reservation "z_mul" 5
+  (and (eq_attr "cpu" "g5,g6,z900")
+       (eq_attr "type" "imulsi,imulhi"))
+  "z_e1*5,z_wr")
+
+(define_insn_reservation "z_inf" 10
+  (and (eq_attr "cpu" "g5,g6,z900")
+       (eq_attr "type" "idiv,imuldi"))
+  "z_e1*10,z_wr")
+
+;; For everything else we check the atype flag.
+
+(define_insn_reservation "z_int" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (and (not (eq_attr "type" "la,larl,load,store,jsr"))
+            (eq_attr "atype" "reg")))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_agen" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (and (not (eq_attr "type" "la,larl,load,store,jsr"))
+            (eq_attr "atype" "agen")))
+  "z_e1,z_wr")
+
+;;
+;; s390_agen_dep_p returns 1, if a register is set in the
+;; first insn and used in the dependent insn to form a address.
+;;
+
+;;
+;; If an instruction uses a register to address memory, it needs
+;; to be set 5 cycles in advance.
+;;
+
+(define_bypass 5 "z_int,z_agen"
+	       "z_agen,z_la,z_call,z_load,z_store" "s390_agen_dep_p")
+
+;;
+;; A load type instruction uses a bypass to feed the result back
+;; to the address generation pipeline stage.
+;;
+
+(define_bypass 3 "z_load"
+	         "z_agen,z_la,z_call,z_load,z_store" "s390_agen_dep_p")
+
+;;
+;; A load address type instruction uses a bypass to feed the
+;; result back to the address generation pipeline stage.
+;;
+
+(define_bypass 2 "z_larl,z_la"
+	         "z_agen,z_la,z_call,z_load,z_store" "s390_agen_dep_p")
+
+
+
+
+
diff --git a/gcc-4.9/gcc/config/s390/2084.md b/gcc-4.9/gcc/config/s390/2084.md
new file mode 100644
index 000000000..6a568de32
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/2084.md
@@ -0,0 +1,309 @@
+;; Scheduling description for z990 (cpu 2084).
+;;   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                  Ulrich Weigand (uweigand@de.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "x_ipu")
+
+(define_cpu_unit "x_e1_r,x_e1_s,x_e1_t"  "x_ipu")
+(define_cpu_unit "x_wr_r,x_wr_s,x_wr_t,x_wr_fp" "x_ipu")
+(define_cpu_unit "x_s1,x_s2,x_s3,x_s4"   "x_ipu")
+(define_cpu_unit "x_t1,x_t2,x_t3,x_t4"   "x_ipu")
+(define_cpu_unit "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6"   "x_ipu")
+(define_cpu_unit "x_store_tok"   "x_ipu")
+(define_cpu_unit "x_ms,x_mt"   "x_ipu")
+
+(define_reservation "x-e1-st" "(x_e1_s | x_e1_t)")
+
+(define_reservation "x-e1-np" "(x_e1_r + x_e1_s + x_e1_t)")
+
+(absence_set "x_e1_r" "x_e1_s,x_e1_t")
+(absence_set "x_e1_s" "x_e1_t")
+
+;; Try to avoid int <-> fp transitions.
+
+(define_reservation "x-x" "x_s1|x_t1,x_s2|x_t2,x_s3|x_t3,x_s4|x_t4")
+(define_reservation "x-f" "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6")
+(define_reservation "x-wr-st" "((x_wr_s | x_wr_t),x-x)")
+(define_reservation "x-wr-np" "((x_wr_r + x_wr_s + x_wr_t),x-x)")
+(define_reservation "x-wr-fp" "x_wr_fp,x-f")
+(define_reservation "x-mem"   "x_ms|x_mt")
+
+(absence_set "x_wr_fp"
+             "x_s1,x_s2,x_s3,x_s4,x_t1,x_t2,x_t3,x_t4,x_wr_s,x_wr_t")
+
+(absence_set "x_e1_r,x_wr_r,x_wr_s,x_wr_t"
+             "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6,x_wr_fp")
+
+;; Don't have any load type insn in same group as store
+
+(absence_set "x_ms,x_mt" "x_store_tok")
+
+
+;;
+;; Simple insns
+;;
+
+(define_insn_reservation "x_int" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (and (eq_attr "type" "integer")
+            (eq_attr "atype" "reg")))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_agen" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (and (eq_attr "type" "integer")
+            (eq_attr "atype" "agen")))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_lr" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "lr"))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_la" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "la"))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_larl" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "larl"))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_load" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "load"))
+  "x-e1-st+x-mem,x-wr-st")
+
+(define_insn_reservation "x_store" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "store"))
+  "x-e1-st+x_store_tok,x-wr-st")
+
+(define_insn_reservation "x_branch" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "branch"))
+  "x_e1_r,x_wr_r")
+
+(define_insn_reservation "x_call" 5
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "jsr"))
+  "x-e1-np*5,x-wr-np")
+
+(define_insn_reservation "x_mul_hi" 2
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "imulhi"))
+  "x-e1-np*2,x-wr-np")
+
+(define_insn_reservation "x_mul_sidi" 4
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "imulsi,imuldi"))
+  "x-e1-np*4,x-wr-np")
+
+(define_insn_reservation "x_div" 10
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "idiv"))
+  "x-e1-np*10,x-wr-np")
+
+(define_insn_reservation "x_sem" 17
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "sem"))
+  "x-e1-np+x-mem,x-e1-np*16,x-wr-st")
+
+;;
+;; Multicycle insns
+;;
+
+(define_insn_reservation "x_cs" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "cs"))
+  "x-e1-np,x-wr-np")
+
+(define_insn_reservation "x_vs" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "vs"))
+  "x-e1-np*10,x-wr-np")
+
+(define_insn_reservation "x_stm" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "stm"))
+  "(x-e1-np+x_store_tok)*10,x-wr-np")
+
+(define_insn_reservation "x_lm" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "lm"))
+  "x-e1-np*10,x-wr-np")
+
+(define_insn_reservation "x_other" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "other"))
+  "x-e1-np,x-wr-np")
+
+;;
+;; Floating point insns
+;;
+
+(define_insn_reservation "x_fsimptf" 7
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fsimptf,fhex"))
+  "x_e1_t*2,x-wr-fp")
+
+(define_insn_reservation "x_fsimpdf" 6
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fsimpdf,fmuldf,fmadddf,fhex"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_fsimpsf" 6
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fsimpsf,fmulsf,fmaddsf,fhex"))
+  "x_e1_t,x-wr-fp")
+
+
+(define_insn_reservation "x_fmultf" 33
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fmultf"))
+  "x_e1_t*27,x-wr-fp")
+
+
+(define_insn_reservation "x_fdivtf" 82
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fdivtf,fsqrttf"))
+  "x_e1_t*76,x-wr-fp")
+
+(define_insn_reservation "x_fdivdf" 36
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fdivdf,fsqrtdf"))
+  "x_e1_t*30,x-wr-fp")
+
+(define_insn_reservation "x_fdivsf" 36
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fdivsf,fsqrtsf"))
+  "x_e1_t*30,x-wr-fp")
+
+
+(define_insn_reservation "x_floadtf" 6
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "floadtf"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_floaddf" 6
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "floaddf"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_floadsf" 6
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "floadsf"))
+  "x_e1_t,x-wr-fp")
+
+
+(define_insn_reservation "x_fstoredf" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fstoredf"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_fstoresf" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "fstoresf"))
+  "x_e1_t,x-wr-fp")
+
+
+(define_insn_reservation "x_ftrunctf" 16
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "ftrunctf"))
+  "x_e1_t*10,x-wr-fp")
+
+(define_insn_reservation "x_ftruncdf" 11
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "ftruncdf"))
+  "x_e1_t*5,x-wr-fp")
+
+
+(define_insn_reservation "x_ftoi" 1
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "ftoi"))
+  "x_e1_t*3,x-wr-fp")
+
+(define_insn_reservation "x_itof" 7
+  (and (eq_attr "cpu" "z990,z9_109,z9_ec")
+       (eq_attr "type" "itoftf,itofdf,itofsf"))
+  "x_e1_t*3,x-wr-fp")
+
+(define_bypass 1 "x_fsimpdf" "x_fstoredf")
+
+(define_bypass 1 "x_fsimpsf" "x_fstoresf")
+
+(define_bypass 1 "x_floaddf" "x_fsimpdf,x_fstoredf,x_floaddf")
+
+(define_bypass 1 "x_floadsf" "x_fsimpsf,x_fstoresf,x_floadsf")
+
+;;
+;; s390_agen_dep_p returns 1, if a register is set in the
+;; first insn and used in the dependent insn to form a address.
+;;
+
+;;
+;; If an instruction uses a register to address memory, it needs
+;; to be set 5 cycles in advance.
+;;
+
+(define_bypass 5 "x_int,x_agen,x_lr"
+                 "x_agen,x_la,x_branch,x_call,x_load,x_store,x_cs,x_stm,x_lm,x_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 9 "x_int,x_agen,x_lr"
+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+	         "s390_agen_dep_p")
+;;
+;; A load type instruction uses a bypass to feed the result back
+;; to the address generation pipeline stage.
+;;
+
+(define_bypass 4 "x_load"
+                 "x_agen,x_la,x_branch,x_call,x_load,x_store,x_cs,x_stm,x_lm,x_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 5 "x_load"
+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+	         "s390_agen_dep_p")
+
+;;
+;; A load address type instruction uses a bypass to feed the
+;; result back to the address generation pipeline stage.
+;;
+
+(define_bypass 3 "x_larl,x_la"
+                 "x_agen,x_la,x_branch,x_call,x_load,x_store,x_cs,x_stm,x_lm,x_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 5 "x_larl, x_la"
+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+	         "s390_agen_dep_p")
+
+;;
+;; Operand forwarding
+;;
+
+(define_bypass 0 "x_lr,x_la,x_load" "x_int,x_lr")
+
+
diff --git a/gcc-4.9/gcc/config/s390/2097.md b/gcc-4.9/gcc/config/s390/2097.md
new file mode 100644
index 000000000..5c4f8a890
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/2097.md
@@ -0,0 +1,764 @@
+;; Scheduling description for z10 (cpu 2097).
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;; Contributed by Wolfgang Gellerich (gellerich@de.ibm.com).
+
+
+; General naming conventions used in this file:
+; - The two pipelines are called S and T, respectively.
+; - A name ending "_S" or "_T" indicates that something happens in
+;   (or belongs to) this pipeline.
+; - A name ending "_ANY" indicates that something happens in (or belongs
+;   to) either of the two pipelines.
+; - A name ending "_BOTH" indicates that something happens in (or belongs
+;   to) both pipelines.
+
+
+;; Automaton and components.
+
+(define_automaton "z10_cpu")
+
+(define_cpu_unit "z10_e1_S, z10_e1_T"  "z10_cpu")
+(define_reservation "z10_e1_ANY" "(z10_e1_S | z10_e1_T)")
+(define_reservation "z10_e1_BOTH" "(z10_e1_S + z10_e1_T)")
+
+
+; Both pipelines can execute a branch instruction, and branch
+; instructions can be grouped with all other groupable instructions
+; but not with a second branch instruction.
+
+(define_cpu_unit "z10_branch_ANY"  "z10_cpu")
+
+(define_insn_reservation "z10_branch" 4
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "branch"))
+  "z10_branch_ANY + z10_e1_ANY, z10_Gate_ANY")
+
+
+; Z10 operand and result forwarding.
+
+; Instructions marked with the attributes as z10_fwd or z10_fr can
+; forward a value they load from one of their operants into a register
+; if the instruction in the second pipeline reads the same register.
+; The second operation must be superscalar.  Instructions marked as
+; z10_rec or z10_fr can receive a value they read from a register is
+; this register gets updated by an instruction in the first pipeline.
+; The first instruction must be superscalar.
+
+
+; Forwarding from z10_fwd and z10_fr to z10_super.
+
+(define_bypass 0 "z10_la_fwd, z10_la_fwd_A1, z10_larl_fwd, z10_larl_fwd_A3, \
+                  z10_load_fwd, z10_load_fwd_A3, \
+                  z10_other_fwd, z10_other_fwd_A1, z10_other_fwd_A3, \
+	 	  z10_other_fr, z10_other_fr_A3, z10_other_fr_E1, \
+                  z10_other_fwd_E1, z10_lr_fr, z10_lr_fr_E1, \
+                  z10_int_fwd, z10_int_fwd_A1, z10_int_fwd_A3, \
+                  z10_int_fwd_E1, z10_int_fr, z10_int_fr_E1, \
+                  z10_int_fr_A3"
+                  "z10_other_super, z10_other_super_c_E1, z10_other_super_E1, \
+                  z10_int_super, z10_int_super_E1, \
+                  z10_lr, z10_store_super"
+                  " ! s390_agen_dep_p")
+
+
+; Forwarding from z10_super to frz10_ and z10_rec.
+
+(define_bypass 0 "z10_other_super, z10_other_super_E1, z10_other_super_c_E1, \
+                  z10_int_super, z10_int_super_E1, \
+                  z10_larl_super_E1, z10_larl_super, \
+                  z10_store_super"
+                  "z10_int_fr, z10_int_fr_E1, z10_int_fr_A3, \
+                  z10_other_fr, z10_other_fr_A3, z10_lr_fr, z10_lr_fr_E1, \
+                  z10_other_fr_E1, z10_store_rec"
+                  " ! s390_agen_dep_p")
+
+
+; Forwarding from z10_fwd and z10_fr to z10_rec and z10_fr.
+
+(define_bypass 0 "z10_la_fwd, z10_la_fwd_A1, z10_larl_fwd, z10_larl_fwd_A3, \
+                  z10_load_fwd, z10_load_fwd_A3, \
+                  z10_other_fwd, z10_other_fwd_A1, z10_other_fwd_A3, \
+                  z10_other_fr, z10_other_fr_A3, z10_other_fr_E1, \
+                  z10_other_fwd_E1, \
+                  z10_lr_fr, z10_lr_fr_E1, \
+                  z10_int_fwd, z10_int_fwd_A1, z10_int_fwd_A3, \
+                  z10_int_fwd_E1, z10_int_fr, z10_int_fr_E1, \
+                  z10_int_fr_A3"
+                  "z10_int_fr, z10_int_fr_E1, z10_int_fr_A3, \
+                  z10_other_fr, z10_other_fr_A3, z10_lr_fr, z10_lr_fr_E1, \
+                  z10_other_fr_E1, z10_store_rec"
+                  " ! s390_agen_dep_p")
+
+
+;
+; Simple insns
+;
+
+; Here is the cycle diagram for FXU-executed instructions:
+; ... A1 A2 A3 E1 P1 P2 P3 R0 ...
+;        ^              ^  ^
+;        |              |  updated GPR is available
+;        |              write to GPR
+;        instruction reads GPR during this cycle
+
+
+; Variants of z10_int follow.
+
+(define_insn_reservation "z10_int" 6
+  (and (and (eq_attr "cpu" "z10")
+            (eq_attr "type" "integer"))
+       (and (eq_attr "atype" "reg")
+             (and (and (eq_attr "z10prop" "!z10_super")
+                       (eq_attr "z10prop" "!z10_super_c"))
+                  (and (and (and (and (eq_attr "z10prop" "!z10_super_E1")
+                                      (eq_attr "z10prop" "!z10_super_c_E1"))
+                                  (eq_attr "z10prop" "!z10_fwd"))
+                             (and (eq_attr "z10prop" "!z10_fwd_A1")
+                                  (eq_attr "z10prop" "!z10_fwd_A3")))
+                        (and (and (eq_attr "z10prop" "!z10_fwd_E1")
+                                  (eq_attr "z10prop" "!z10_fr"))
+                             (and (eq_attr "z10prop" "!z10_fr_E1")
+                                  (eq_attr "z10prop" "!z10_fr_A3")))))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (ior (eq_attr "z10prop" "z10_super")
+                      (eq_attr "z10prop" "z10_super_c")))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_super_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (ior (eq_attr "z10prop" "z10_super_E1")
+                      (eq_attr "z10prop" "z10_super_c_E1")))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd_A1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd_A3"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fr"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fr_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fr_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fr_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fr_A3"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+; END of z10_int variants
+
+
+(define_insn_reservation "z10_agen" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (eq_attr "atype" "agen")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+
+(define_insn_reservation "z10_lr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "lr")
+            (and (eq_attr "z10prop" "!z10_fr")
+                 (eq_attr "z10prop" "!z10_fr_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_lr_fr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "lr")
+            (eq_attr "z10prop" "z10_fr")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_lr_fr_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "lr")
+            (eq_attr "z10prop" "z10_fr_E1")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_la" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "la")
+            (and (eq_attr "z10prop" "!z10_fwd")
+                 (eq_attr "z10prop" "!z10_fwd_A1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_la_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "la")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_la_fwd_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "la")
+            (eq_attr "z10prop" "z10_fwd_A1")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+
+; larl-type instructions
+
+(define_insn_reservation "z10_larl" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+                 (and (eq_attr "z10prop" "!z10_super_A1")
+                      (and (eq_attr "z10prop" "!z10_fwd")
+                           (and (eq_attr "z10prop" "!z10_fwd_A3")
+                                (and (eq_attr "z10prop" "!z10_super")
+                                     (eq_attr "z10prop" "!z10_super_c"))
+                                (and (eq_attr "z10prop" "!z10_super_E1")
+                                     (eq_attr "z10prop" "!z10_super_c_E1")))))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_larl_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (and (eq_attr "z10prop" "z10_super")
+                 (eq_attr "z10prop" "z10_super_c"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_larl_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_larl_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (eq_attr "z10prop" "z10_fwd_A3")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+
+(define_insn_reservation "z10_larl_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (eq_attr "z10prop" "z10_super_A1")))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+(define_insn_reservation "z10_larl_super_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (ior (eq_attr "z10prop" "z10_super_E1")
+                 (eq_attr "z10prop" "z10_super_c_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+
+(define_insn_reservation "z10_load" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "load")
+            (and (eq_attr "z10prop" "!z10_fwd")
+                 (eq_attr "z10prop" "!z10_fwd_A3"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_load_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "load")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+(define_insn_reservation "z10_load_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "load")
+            (eq_attr "z10prop" "z10_fwd_A3")))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+(define_insn_reservation "z10_store" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "store")
+            (and (eq_attr "z10prop" "!z10_rec")
+                 (and (eq_attr "z10prop" "!z10_super")
+                      (eq_attr "z10prop" "!z10_super_c")))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_store_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "store")
+            (ior (eq_attr "z10prop" "z10_super")
+                 (eq_attr "z10prop" "z10_super_c"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_store_rec" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "store")
+            (eq_attr "z10prop" "z10_rec")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+; The default_latency is chosen to drain off the pipeline.
+(define_insn_reservation "z10_call" 14
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "jsr"))
+  "z10_e1_BOTH*4, z10_Gate_BOTH")
+
+; The default latency is for worst case.  CS and CSG take one
+; cycle only (i.e. latency would be 6).
+(define_insn_reservation "z10_sem" 9
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "sem"))
+  "z10_e1_BOTH*5, z10_Gate_ANY")
+
+(define_insn_reservation "z10_cs" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "cs"))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_vs" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "vs"))
+  "z10_e1_BOTH*4, z10_Gate_BOTH")
+
+; Load and store multiple. Actual number of cycles
+; in unknown at compile.time.
+(define_insn_reservation "z10_stm" 10
+  (and (eq_attr "cpu" "z10")
+       (ior (eq_attr "type" "stm")
+            (eq_attr "type" "lm")))
+  "z10_e1_BOTH*4, z10_Gate_BOTH")
+
+
+; Subsets of z10_other follow.
+
+(define_insn_reservation "z10_other" 6
+  (and (and (eq_attr "cpu" "z10")
+            (eq_attr "type" "other"))
+       (and (and (eq_attr "z10prop" "!z10_fwd")
+                 (eq_attr "z10prop" "!z10_fwd_A1"))
+            (and (and  (and (eq_attr "z10prop" "!z10_fr_A3")
+                            (eq_attr "z10prop" "!z10_fwd_A3"))
+                       (and (eq_attr "z10prop" "!z10_fr")
+                            (eq_attr "z10prop" "!z10_fr_E1")))
+                 (and  (and (and (eq_attr "z10prop" "!z10_super")
+                                  (eq_attr "z10prop" "!z10_super_c"))
+                            (eq_attr "z10prop" "!z10_super_c_E1"))
+                       (and (eq_attr "z10prop" "!z10_super_E1")
+                            (eq_attr "z10prop" "!z10_fwd_E1"))))))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fr_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fr_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_super_c_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_super_c_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_super_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_super_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd_A3")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd_A1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fr")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fr_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fr_A3")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (ior (eq_attr "z10prop" "z10_super")
+                 (eq_attr "z10prop" "z10_super_c"))))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+; END of z10_other subsets.
+
+
+;
+; Floating point insns
+;
+
+; Z10 executes the following integer operations in the BFU pipeline.
+
+(define_insn_reservation "z10_mul_sidi" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "imulsi,imuldi,imulhi"))
+  "z10_e1_BOTH, z10_Gate_FP")
+
+; Some variants take fewer cycles, but that is not relevant here.
+(define_insn_reservation "z10_div" 162
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "idiv"))
+  "z10_e1_BOTH*4, z10_Gate_FP")
+
+
+; BFP multiplication and general instructions
+
+(define_insn_reservation "z10_fsimpdf" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpdf,fmuldf,fmadddf"))
+  "z10_e1_BOTH, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsimpsf" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpsf,fmulsf,fmaddsf"))
+  "z10_e1_BOTH, z10_Gate_FP")
+
+(define_insn_reservation "z10_fmultf" 52
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fmultf"))
+  "z10_e1_BOTH*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsimptf" 14
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimptf"))
+  "z10_e1_BOTH*2, z10_Gate_FP")
+
+
+; BFP division
+
+(define_insn_reservation "z10_fdivtf" 113
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivtf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fdivdf" 41
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivdf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fdivsf" 34
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivsf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+
+; BFP sqrt
+
+(define_insn_reservation "z10_fsqrtsf" 41
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsqrtsf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsqrtdf" 54
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsqrtdf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsqrtf" 122
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsqrttf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+
+; BFP load and store
+
+(define_insn_reservation "z10_floadtf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floadtf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_floaddf" 1
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floaddf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_floadsf" 1
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floadsf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_fstoredf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fstoredf,fstoredd"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_fstoresf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fstoresf,fstoresd"))
+  "z10_e1_T, z10_Gate_FP")
+
+
+; BFP truncate
+(define_insn_reservation "z10_ftrunctf" 16
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftrunctf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_ftruncdf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftruncdf"))
+  "z10_e1_T, z10_Gate_FP")
+
+
+; Conversion between BFP and int.
+(define_insn_reservation "z10_ftoi" 13
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftoi"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_itoftf" 14
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itoftf"))
+  "z10_e1_T*2, z10_Gate_FP")
+
+(define_insn_reservation "z10_itofsfdf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itofdf,itofsf"))
+  "z10_e1_T, z10_Gate_FP")
+
+
+
+; BFP-related bypasses.  There is no bypass for extended mode.
+(define_bypass 1 "z10_fsimpdf" "z10_fstoredf")
+(define_bypass 1 "z10_fsimpsf" "z10_fstoresf")
+(define_bypass 1 "z10_floaddf" "z10_fsimpdf, z10_fstoredf")
+(define_bypass 1 "z10_floadsf" "z10_fsimpsf, z10_fstoresf")
+
+
+;
+; insn_reservations for DFP instructions.
+;
+
+; Exact number of cycles is not known at compile-time.
+(define_insn_reservation "z10_fdivddtd" 40
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivdd,fdivtd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_ftruncsd" 38
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftruncsd"))
+  "z10_e1_BOTH*4,z10_Gate_DFU")
+
+(define_insn_reservation "z10_ftruncdd" 340
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftruncsd"))
+  "z10_e1_BOTH*4,z10_Gate_DFU")
+
+(define_insn_reservation "z10_floaddd" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floaddd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_floadsd" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floadsd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+; Exact number of cycles is not known at compile-time.
+(define_insn_reservation "z10_fmulddtd" 35
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fmuldd,fmultd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_fsimpdd" 17
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpdd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_fsimpsd" 17
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpsd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_fsimptd" 18
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimptd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_itofdd" 36
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itofdd"))
+  "z10_e1_BOTH*3,z10_Gate_DFU")
+
+(define_insn_reservation "z10_itoftd" 49
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itoftd"))
+  "z10_e1_BOTH*3,z10_Gate_DFU")
+
+; Exact number of cycles is not known at compile-time.
+(define_insn_reservation "z10_ftoidfp" 30
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftoidfp"))
+  "z10_e1_BOTH*3,z10_Gate_DFU")
+
+
+;
+; Address-related bypasses
+;
+
+; Here is the cycle diagram for address-related bypasses:
+; ... G1 G2 G3 A0 A1 A2 A3 E1 P1 P2 P3 R0 ...
+;         ^  ^    ^     ^  ^        ^
+;         |  |    |     |  |        without bypass, its available AFTER this cycle
+;         |  |    |     |  E1-type bypasses provide the new value AFTER this cycle
+;         |  |    |     A3-type bypasses provide the new value AFTER this cycle
+;         |  |    A1-type bypasses provide the new value AFTER this cycle
+;         |  AGI resolution, actual USE of new value is DURING this cycle
+;         AGI detection
+
+(define_bypass 3 "z10_larl_A1, z10_la_fwd_A1, z10_other_fwd_A1, \
+                  z10_int_fwd_A1"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 5 "z10_larl_fwd_A3, z10_load_fwd_A3, z10_other_fwd_A3, \
+                  z10_other_fr_A3, z10_int_fwd_A3, z10_int_fr_A3"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 6 "z10_other_fr_E1, z10_other_super_c_E1, z10_other_super_E1, \
+                  z10_other_fwd_E1, \
+                  z10_lr_fr_E1, z10_larl_super_E1, \
+                  z10_int_super_E1, z10_int_fwd_E1, z10_int_fr_E1"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+ 	         "s390_agen_dep_p")
+
+(define_bypass 9 "z10_int_super, z10_int_fwd, z10_int_fr"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+ 	         "s390_agen_dep_p")
+
+
+
+;
+; Try to avoid transitions between DFU-, BFU- and FXU-executed instructions as there is a
+; dispatch delay required.
+;
+
+
+; Declaration for some pseudo-pipeline stages that reflect the
+; dispatch gap when issuing an INT/FXU/BFU-executed instruction after
+; an instruction executed by a different unit has been executed.  The
+; approach is that we pretend a pipelined execution of BFU operations
+; with as many stages as the gap is long and request that none of
+; these stages is busy when issuing a FXU- or DFU-executed
+; instruction.  Similar for FXU- and DFU-executed instructions.
+
+; Declaration for FPU stages.
+(define_cpu_unit "z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, z10_f5, z10_f6, \
+                  z10_f7, z10_f8, z10_f9, z10_f10, z10_f11, z10_f12" "z10_cpu")
+(define_reservation "z10_FP_PP" "z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, \
+                  z10_f5, z10_f6, z10_f7, z10_f8, z10_f9, z10_f10, z10_f11, \
+                  z10_f12")
+
+; Declaration for FXU stages.
+(define_cpu_unit "z10_S1, z10_S2, z10_S3, z10_S4, z10_S5, z10_S6"  "z10_cpu")
+(define_cpu_unit "z10_T1, z10_T2, z10_T3, z10_T4, z10_T5, z10_T6"  "z10_cpu")
+(define_reservation "z10_INT_PP" "z10_S1 | z10_T1, z10_S2 | z10_T2, z10_S3 \
+                                  | z10_T3, z10_S4 | z10_T4, z10_S5 | \
+                                  z10_T5, z10_S6 | z10_T6")
+
+; Declaration for DFU stages.
+(define_cpu_unit "z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, z10_d5, z10_d6"
+                 "z10_cpu")
+(define_reservation "z10_DFU_PP" "z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, \
+                                 z10_d5, z10_d6")
+
+
+; Pseudo-units representing whether the respective unit is available
+; in the sense that using it does not cause a dispatch delay.
+
+(define_cpu_unit "z10_S_avail, z10_T_avail, z10_FP_avail, z10_DFU_avail"
+                 "z10_cpu")
+
+(absence_set "z10_FP_avail"
+             "z10_S1, z10_S2, z10_S3, z10_S4, z10_S5, z10_S6, z10_T1, z10_T2, z10_T3, z10_T4, \
+              z10_T5, z10_T6, \
+              z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, z10_d5, z10_d6")
+
+(absence_set "z10_S_avail,z10_T_avail"
+             "z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, z10_f5, z10_f6, z10_f7, \
+              z10_f8, z10_f9, z10_f10, z10_f11, z10_f12, \
+              z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, z10_d5, z10_d6")
+
+(absence_set "z10_DFU_avail"
+             "z10_S1, z10_S2, z10_S3, z10_S4, z10_S5, z10_S6, z10_T1, z10_T2, z10_T3, z10_T4, \
+              z10_T5, z10_T6, \
+              z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, z10_f5, z10_f6, z10_f7, \
+              z10_f8, z10_f9, z10_f10, z10_f11, z10_f12")
+
+
+; Pseudo-units to be used in insn_reservations.
+
+(define_reservation "z10_Gate_ANY" "((z10_S_avail | z10_T_avail), z10_INT_PP)")
+(define_reservation "z10_Gate_BOTH" "((z10_S_avail + z10_T_avail), z10_INT_PP)")
+
+(define_reservation "z10_Gate_FP" "z10_FP_avail, z10_FP_PP")
+
+(define_reservation "z10_Gate_DFU" "z10_DFU_avail, z10_DFU_PP")
diff --git a/gcc-4.9/gcc/config/s390/2817.md b/gcc-4.9/gcc/config/s390/2817.md
new file mode 100644
index 000000000..54b30cd39
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/2817.md
@@ -0,0 +1,314 @@
+;; Scheduling description for z196 (cpu 2817).
+;;   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;;   Contributed by Christian Borntraeger (Christian.Borntraeger@de.ibm.com)
+;;                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "z196_ipu")
+
+;; Fetch + Decoder
+(define_cpu_unit "z196_g1" "z196_ipu")
+(define_cpu_unit "z196_g2" "z196_ipu")
+(define_cpu_unit "z196_g3" "z196_ipu")
+(define_cpu_unit "z196_cr1" "z196_ipu")
+(define_cpu_unit "z196_cr2" "z196_ipu")
+(define_cpu_unit "z196_cr3" "z196_ipu")
+
+(final_presence_set "z196_g2" "z196_g1")
+(final_presence_set "z196_g3" "z196_g2")
+(final_presence_set "z196_cr2" "z196_cr1")
+(final_presence_set "z196_cr3" "z196_cr2")
+(exclusion_set "z196_g1" "z196_cr1")
+
+;; Instructions can be groupable, end a group, or be alone in a group.
+(define_reservation "z196_simple" "( z196_g1 | z196_g2 | z196_g3 )")
+(define_reservation "z196_ends" "( z196_g3 | ( z196_g2 + z196_g3 ) | ( z196_g1 + z196_g2 + z196_g3 ) )")
+
+;; Try to keep cracked and alone insns together in a clump.  This will also
+;; improve the clumping of "normal" insns.  We also allow crackes insns
+;; to go as a last instruction together with normal ones.
+(define_reservation "z196_crack"  "( z196_cr1 | z196_cr2 | z196_cr3 | z196_g3)")
+(define_reservation "z196_alone"  "( z196_cr1 | z196_cr2 | z196_cr3 )")
+
+;; Most simple instruction a fast enough to be handled by OOO even with
+;; latency == 0.  This reduces life ranges and spilling. We want to increase
+;; life range for longer running ops, though, thats why we do not use
+;; -fno-schedule-insns.
+(define_insn_reservation "z196_simple_LSU" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "load,store,lr")
+            (eq_attr "z196prop" "none")))
+  "z196_simple")
+
+(define_insn_reservation "z196_simple_FXU" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,other")
+            (and (eq_attr "z196prop" "none")
+                 (eq_attr "op_type" "RR"))))
+  "z196_simple")
+
+(define_insn_reservation "z196_simple_DUAL" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,other")
+            (and (eq_attr "z196prop" "none")
+                 (eq_attr "op_type" "!RR"))))
+  "z196_simple")
+
+(define_insn_reservation "z196_cracked" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,load,lr,store,other")
+            (eq_attr "z196prop" "z196_cracked")))
+  "z196_crack")
+
+(define_insn_reservation "z196_alone" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,load,lr,store,other")
+            (eq_attr "z196prop" "z196_alone")))
+  "z196_alone")
+
+(define_insn_reservation "z196_ends" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,load,lr,store,other")
+            (eq_attr "z196prop" "z196_ends")))
+  "z196_ends")
+
+(define_insn_reservation "z196_branch" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "branch"))
+  "z196_ends")
+
+(define_insn_reservation "z196_call" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "jsr"))
+  "z196_ends")
+
+(define_insn_reservation "z196_mul_hi" 10
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "imulhi"))
+  "z196_simple")
+
+(define_insn_reservation "z196_mul_si" 12
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "imulsi"))
+  "z196_simple")
+
+(define_insn_reservation "z196_mul_di" 14
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "imuldi"))
+  "z196_simple")
+
+(define_insn_reservation "z196_div" 73
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "idiv"))
+  "z196_alone")
+
+(define_insn_reservation "z196_sem" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "sem"))
+  "z196_crack")
+
+(define_insn_reservation "z196_cs" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "cs"))
+  "z196_crack")
+
+(define_insn_reservation "z196_vs" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "vs"))
+  "z196_alone")
+
+(define_insn_reservation "z196_lm_stm" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "stm,lm"))
+  "z196_crack")
+
+
+;;
+;; Binary Floating Point
+;;
+
+(define_insn_reservation "z196_fsimptf" 18
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimptf,fhex"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fmultf" 47
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmultf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fsimpdf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpdf,fmuldf,fhex"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fmadddf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmadddf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fsimpsf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpsf,fmulsf,fhex"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fmaddsf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmaddsf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fdivtf" 108
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivtf,fsqrttf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fdivdf" 36
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivdf,fsqrtdf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fdivsf" 29
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivsf,fsqrtsf"))
+  "z196_simple")
+
+
+;; Loads and stores are cheap as well.
+(define_insn_reservation "z196_floaddf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "floaddf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_floadsf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "floadsf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fstoredf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fstoredf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fstoresf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fstoresf"))
+  "z196_simple")
+
+
+(define_insn_reservation "z196_ftrunctf" 9
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftrunctf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_ftruncdf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftruncdf"))
+  "z196_simple")
+
+
+(define_insn_reservation "z196_ftoi" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftoi"))
+  "z196_crack")
+
+(define_insn_reservation "z196_itof" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "itoftf,itofdf,itofsf"))
+  "z196_crack")
+
+;;
+;; Decimal Floating Point
+;;
+
+;; DDTR
+(define_insn_reservation "z196_fdivdd" 33
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivdd"))
+  "z196_simple")
+
+;; DXTR
+(define_insn_reservation "z196_fdivtd" 35
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivtd"))
+  "z196_alone")
+
+;; LEDTR
+(define_insn_reservation "z196_ftruncsd" 34
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftruncsd"))
+  "z196_simple")
+
+;; LDXTR
+(define_insn_reservation "z196_ftruncdd" 36
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftruncdd"))
+  "z196_simple")
+
+;; These are normal fp loads/stores - which are cheap.
+(define_insn_reservation "z196_floadsddd" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "floadsd,floaddd,fstoredd,fstoresd"))
+  "z196_simple")
+
+;; MDTR
+(define_insn_reservation "z196_fmuldd" 23
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmuldd"))
+  "z196_simple")
+
+;; MXTR
+(define_insn_reservation "z196_fmultd" 25
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmultd"))
+  "z196_alone")
+
+;; multiple different isns like add, sub etc.
+;; Just use the same defaults as z10.
+(define_insn_reservation "z196_fsimpsd" 17
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpsd"))
+  "z196_simple")
+(define_insn_reservation "z196_fsimpdd" 17
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpdd"))
+  "z196_simple")
+(define_insn_reservation "z196_fsimptd" 18
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimptd"))
+  "z196_alone")
+
+;; CDGTR
+(define_insn_reservation "z196_itofdd" 45
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "itofdd"))
+  "z196_crack")
+
+;; CXGTR
+(define_insn_reservation "z196_itoftd" 33
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "itoftd"))
+  "z196_crack")
+
+;; CGXTR, CGDTR
+(define_insn_reservation "z196_ftoidfp" 33
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftoidfp"))
+  "z196_crack")
+
+
+
diff --git a/gcc-4.9/gcc/config/s390/2827.md b/gcc-4.9/gcc/config/s390/2827.md
new file mode 100644
index 000000000..913b22942
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/2827.md
@@ -0,0 +1,624 @@
+;; Scheduling description for zEC12 (cpu 2827).
+;;   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_attr "ooo_cracked" ""
+  (cond [(eq_attr "mnemonic" "cgdbr,clfxtr,cdgtr,celfbr,cxgtr,clfebr,clc,lngfr,cs,cfxbr,xc,clfdbr,basr,ex,cxlgtr,clfdtr,srdl,lpgfr,cdlgbr,cgxtr,cxlftr,nc,cxftr,cdfbr,clfxbr,cdftr,clgxbr,cgdtr,cxlgbr,mvc,clgdtr,cegbr,cfebr,cdlftr,sldl,cdlgtr,csg,chhsi,clgebr,cxgbr,cxfbr,cdlfbr,cgebr,lzxr,oc,cdgbr,brasl,cgxbr,cxlfbr,clgxtr,exrl,cfdbr,celgbr,clgdbr,lxr,cpsdr,lcgfr,bras,srda,cefbr") (const_int 1)]
+        (const_int 0)))
+
+(define_attr "ooo_expanded" ""
+  (cond [(eq_attr "mnemonic" "dlr,dsgr,d,dsgf,stam,dsgfr,dlgr,dsg,cds,dr,stm,mvc,dl,cdsg,stmy,dlg,stmg,lam") (const_int 1)]
+        (const_int 0)))
+
+(define_attr "ooo_endgroup" ""
+  (cond [(eq_attr "mnemonic" "ipm") (const_int 1)]
+        (const_int 0)))
+
+(define_attr "ooo_groupalone" ""
+  (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush") (const_int 1)]
+        (const_int 0)))
+
+(define_insn_reservation "zEC12_simple" 1
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ltg,ogrk,lr,lnebr,lghrl,sdbr,x,asi,lhr,sebr,madb,ar,lhrl,clfxtr,llgfr,clghrl,cgr,cli,agrk,ic,adbr,aebr,lrv,clg,cy,cghi,sy,celfbr,seb,clgfr,al,tm,lang,clfebr,lghr,cdb,lpebr,laa,ark,lh,or,icy,xi,msebr,n,llihl,afi,cs,nrk,sth,lgr,l,lcr,stey,xg,crt,slgfr,ny,ld,j,llihh,slgr,clfhsi,slg,lb,lgrl,lrl,llihf,lndbr,llcr,laxg,mvghi,rllg,sdb,xrk,laag,alhsik,algfi,algr,aly,agfi,lrvr,d,crl,llgc,tmhl,algsi,lgh,icmh,clhrl,xgrk,icm,iilf,ork,lbr,cg,ldgr,lgf,iihf,llghr,sg,clfdbr,llgtr,stam,cebr,tmhh,tceb,slgf,basr,lgbr,maebr,lgb,cgfi,aeb,ltebr,lax,clfit,lrvgr,nihl,ni,clfdtr,srdl,mdb,srk,xihf,stgrl,sthrl,algf,ltr,cdlgbr,cgit,ng,lat,llghrl,ltgr,nihh,clgfrl,srlk,maeb,agr,cxlftr,ler,bcr_flush,stcy,cds,clfi,nihf,ly,clt,lgat,alg,lhy,lgfrl,clghsi,clrt,tmll,srlg,tcdb,ay,sty,clr,lgfi,lan,lpdbr,clgt,adb,ahik,sra,algrk,cdfbr,lcebr,clfxbr,msdbr,ceb,clgr,tmy,tmlh,alghsik,lcgr,mvi,cdbr,ltgf,xr,larl,ldr,llgcr,clgrt,clrl,cghsi,cliy,madbr,oy,ogr,llgt,meebr,slr,clgxbr,chi,s,icmy,llc,ngr,clhhsi,ltgfr,llill,lhi,o,meeb,clgdtr,sll,clgrl,clgf,ledbr,cegbr,mviy,algfr,rll,cdlftr,sldl,cdlgtr,lg,niy,st,sgr,ag,le,xgr,cr,stg,llilh,sr,lzer,cdsg,sllk,mdbr,stoc,csg,clgit,chhsi,strl,llilf,lndfr,ngrk,clgebr,clgfi,llgh,mseb,ltdbr,oill,la,llhrl,stc,lghi,oihl,xiy,sllg,llgf,cgrt,ldeb,cl,sl,cdlfbr,oi,oilh,nr,srak,oihh,ear,slgrk,og,c,slgfi,sthy,oilf,oiy,msdb,oihf,a,cfi,lzxr,lzdr,srag,cdgbr,brasl,alr,cgrl,llgfrl,cit,clgxtr,ley,exrl,lcdfr,lay,xilf,lcdbr,alsi,mvhhi,srl,chsi,lgfr,lrvg,cly,sgrk,ahi,celgbr,nill,clgdbr,jg,slrk,lxr,sar,slfi,cpsdr,lcgfr,aghik,nilh,mvhi,lpdfr,xy,alrk,lao,agsi,ldy,nilf,llhr,alfi,laog,sly,aghi,ldebr,bras,srda,cefbr,lt,fiebra,fidbra,fixbra,fidtr,fixtr")) "nothing")
+
+(define_insn_reservation "zEC12_cgdbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgdbr")) "nothing")
+
+(define_insn_reservation "zEC12_clm" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "clm")) "nothing")
+
+(define_insn_reservation "zEC12_lnxbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lnxbr")) "nothing")
+
+(define_insn_reservation "zEC12_lngr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lngr")) "nothing")
+
+(define_insn_reservation "zEC12_cdgtr" 45
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cdgtr")) "nothing")
+
+(define_insn_reservation "zEC12_ddtr" 37
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ddtr")) "nothing")
+
+(define_insn_reservation "zEC12_mhy" 5
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mhy")) "nothing")
+
+(define_insn_reservation "zEC12_dlr" 25
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dlr")) "nothing")
+
+(define_insn_reservation "zEC12_ltxtr" 18
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ltxtr")) "nothing")
+
+(define_insn_reservation "zEC12_cxgtr" 32
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxgtr")) "nothing")
+
+(define_insn_reservation "zEC12_lgdr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lgdr")) "nothing")
+
+(define_insn_reservation "zEC12_clc" 5
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "clc")) "nothing")
+
+(define_insn_reservation "zEC12_dsgr" 25
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dsgr")) "nothing")
+
+(define_insn_reservation "zEC12_axtr" 27
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "axtr")) "nothing")
+
+(define_insn_reservation "zEC12_lngfr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lngfr")) "nothing")
+
+(define_insn_reservation "zEC12_cghrl" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cghrl")) "nothing")
+
+(define_insn_reservation "zEC12_ah" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ah")) "nothing")
+
+(define_insn_reservation "zEC12_cgh" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgh")) "nothing")
+
+(define_insn_reservation "zEC12_locg" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "locg")) "nothing")
+
+(define_insn_reservation "zEC12_msgfi" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msgfi")) "nothing")
+
+(define_insn_reservation "zEC12_slbgr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "slbgr")) "nothing")
+
+(define_insn_reservation "zEC12_lpgr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lpgr")) "nothing")
+
+(define_insn_reservation "zEC12_loc" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "loc")) "nothing")
+
+(define_insn_reservation "zEC12_cgf" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgf")) "nothing")
+
+(define_insn_reservation "zEC12_lmy" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lmy")) "nothing")
+
+(define_insn_reservation "zEC12_std" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "std")) "nothing")
+
+(define_insn_reservation "zEC12_xc" 4
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "xc")) "nothing")
+
+(define_insn_reservation "zEC12_msy" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msy")) "nothing")
+
+(define_insn_reservation "zEC12_sqebr" 29
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sqebr")) "nothing")
+
+(define_insn_reservation "zEC12_alcr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "alcr")) "nothing")
+
+(define_insn_reservation "zEC12_msgr" 8
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msgr")) "nothing")
+
+(define_insn_reservation "zEC12_mhi" 5
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mhi")) "nothing")
+
+(define_insn_reservation "zEC12_mdtr" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mdtr")) "nothing")
+
+(define_insn_reservation "zEC12_dsgf" 25
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dsgf")) "nothing")
+
+(define_insn_reservation "zEC12_lpxbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lpxbr")) "nothing")
+
+(define_insn_reservation "zEC12_stdy" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "stdy")) "nothing")
+
+(define_insn_reservation "zEC12_deb" 23
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "deb")) "nothing")
+
+(define_insn_reservation "zEC12_ltdtr" 17
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ltdtr")) "nothing")
+
+(define_insn_reservation "zEC12_dsgfr" 25
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dsgfr")) "nothing")
+
+(define_insn_reservation "zEC12_slbr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "slbr")) "nothing")
+
+(define_insn_reservation "zEC12_dlgr" 27
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dlgr")) "nothing")
+
+(define_insn_reservation "zEC12_dsg" 25
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dsg")) "nothing")
+
+(define_insn_reservation "zEC12_mlg" 9
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mlg")) "nothing")
+
+(define_insn_reservation "zEC12_ex" 16
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ex")) "nothing")
+
+(define_insn_reservation "zEC12_mfy" 7
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mfy")) "nothing")
+
+(define_insn_reservation "zEC12_cxlgtr" 4
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxlgtr")) "nothing")
+
+(define_insn_reservation "zEC12_lxdtr" 15
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lxdtr")) "nothing")
+
+(define_insn_reservation "zEC12_mghi" 5
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mghi")) "nothing")
+
+(define_insn_reservation "zEC12_lpgfr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lpgfr")) "nothing")
+
+(define_insn_reservation "zEC12_ledtr" 34
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ledtr")) "nothing")
+
+(define_insn_reservation "zEC12_ms" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ms")) "nothing")
+
+(define_insn_reservation "zEC12_ldetr" 17
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ldetr")) "nothing")
+
+(define_insn_reservation "zEC12_tdcxt" 12
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "tdcxt")) "nothing")
+
+(define_insn_reservation "zEC12_lpr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lpr")) "nothing")
+
+(define_insn_reservation "zEC12_msgf" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msgf")) "nothing")
+
+(define_insn_reservation "zEC12_chy" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "chy")) "nothing")
+
+(define_insn_reservation "zEC12_cgxtr" 30
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgxtr")) "nothing")
+
+(define_insn_reservation "zEC12_lmg" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lmg")) "nothing")
+
+(define_insn_reservation "zEC12_tdcet" 14
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "tdcet")) "nothing")
+
+(define_insn_reservation "zEC12_lxeb" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lxeb")) "nothing")
+
+(define_insn_reservation "zEC12_msg" 8
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msg")) "nothing")
+
+(define_insn_reservation "zEC12_nc" 4
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "nc")) "nothing")
+
+(define_insn_reservation "zEC12_locgr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "locgr")) "nothing")
+
+(define_insn_reservation "zEC12_debr" 23
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "debr")) "nothing")
+
+(define_insn_reservation "zEC12_chrl" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "chrl")) "nothing")
+
+(define_insn_reservation "zEC12_mxtr" 37
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mxtr")) "nothing")
+
+(define_insn_reservation "zEC12_sgfr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sgfr")) "nothing")
+
+(define_insn_reservation "zEC12_cxftr" 4
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxftr")) "nothing")
+
+(define_insn_reservation "zEC12_sxtr" 27
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sxtr")) "nothing")
+
+(define_insn_reservation "zEC12_dxbr" 50
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dxbr")) "nothing")
+
+(define_insn_reservation "zEC12_alc" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "alc")) "nothing")
+
+(define_insn_reservation "zEC12_cgfrl" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgfrl")) "nothing")
+
+(define_insn_reservation "zEC12_ltxbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ltxbr")) "nothing")
+
+(define_insn_reservation "zEC12_dr" 16
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dr")) "nothing")
+
+(define_insn_reservation "zEC12_lxdb" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lxdb")) "nothing")
+
+(define_insn_reservation "zEC12_tdcdt" 11
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "tdcdt")) "nothing")
+
+(define_insn_reservation "zEC12_clmy" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "clmy")) "nothing")
+
+(define_insn_reservation "zEC12_locr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "locr")) "nothing")
+
+(define_insn_reservation "zEC12_agf" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "agf")) "nothing")
+
+(define_insn_reservation "zEC12_lm" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lm")) "nothing")
+
+(define_insn_reservation "zEC12_msgfr" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msgfr")) "nothing")
+
+(define_insn_reservation "zEC12_lxdbr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lxdbr")) "nothing")
+
+(define_insn_reservation "zEC12_stm" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "stm")) "nothing")
+
+(define_insn_reservation "zEC12_cgdtr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgdtr")) "nothing")
+
+(define_insn_reservation "zEC12_lxebr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lxebr")) "nothing")
+
+(define_insn_reservation "zEC12_cxlgbr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxlgbr")) "nothing")
+
+(define_insn_reservation "zEC12_mvc" 4
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mvc")) "nothing")
+
+(define_insn_reservation "zEC12_sqdbr" 43
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sqdbr")) "nothing")
+
+(define_insn_reservation "zEC12_dl" 25
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dl")) "nothing")
+
+(define_insn_reservation "zEC12_cfebr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cfebr")) "nothing")
+
+(define_insn_reservation "zEC12_agfr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "agfr")) "nothing")
+
+(define_insn_reservation "zEC12_lnr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lnr")) "nothing")
+
+(define_insn_reservation "zEC12_m" 7
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "m")) "nothing")
+
+(define_insn_reservation "zEC12_ipm" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ipm")) "nothing")
+
+(define_insn_reservation "zEC12_cxbr" 18
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxbr")) "nothing")
+
+(define_insn_reservation "zEC12_ddbr" 30
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ddbr")) "nothing")
+
+(define_insn_reservation "zEC12_stmy" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "stmy")) "nothing")
+
+(define_insn_reservation "zEC12_ste" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ste")) "nothing")
+
+(define_insn_reservation "zEC12_ahy" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ahy")) "nothing")
+
+(define_insn_reservation "zEC12_mlr" 7
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mlr")) "nothing")
+
+(define_insn_reservation "zEC12_sqeb" 29
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sqeb")) "nothing")
+
+(define_insn_reservation "zEC12_dlg" 27
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dlg")) "nothing")
+
+(define_insn_reservation "zEC12_cxgbr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxgbr")) "nothing")
+
+(define_insn_reservation "zEC12_cxfbr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxfbr")) "nothing")
+
+(define_insn_reservation "zEC12_mlgr" 9
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mlgr")) "nothing")
+
+(define_insn_reservation "zEC12_cgfr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgfr")) "nothing")
+
+(define_insn_reservation "zEC12_slb" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "slb")) "nothing")
+
+(define_insn_reservation "zEC12_sdtr" 12
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sdtr")) "nothing")
+
+(define_insn_reservation "zEC12_tcxb" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "tcxb")) "nothing")
+
+(define_insn_reservation "zEC12_cgebr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgebr")) "nothing")
+
+(define_insn_reservation "zEC12_ch" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ch")) "nothing")
+
+(define_insn_reservation "zEC12_sh" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sh")) "nothing")
+
+(define_insn_reservation "zEC12_sgf" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sgf")) "nothing")
+
+(define_insn_reservation "zEC12_msr" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msr")) "nothing")
+
+(define_insn_reservation "zEC12_sqxbr" 50
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sqxbr")) "nothing")
+
+(define_insn_reservation "zEC12_alcgr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "alcgr")) "nothing")
+
+(define_insn_reservation "zEC12_oc" 4
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "oc")) "nothing")
+
+(define_insn_reservation "zEC12_adtr" 24
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "adtr")) "nothing")
+
+(define_insn_reservation "zEC12_cgxbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cgxbr")) "nothing")
+
+(define_insn_reservation "zEC12_cxlfbr" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxlfbr")) "nothing")
+
+(define_insn_reservation "zEC12_sqdb" 43
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sqdb")) "nothing")
+
+(define_insn_reservation "zEC12_flogr" 7
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "flogr")) "nothing")
+
+(define_insn_reservation "zEC12_popcnt" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "popcnt")) "nothing")
+
+(define_insn_reservation "zEC12_cfdbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cfdbr")) "nothing")
+
+(define_insn_reservation "zEC12_alcg" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "alcg")) "nothing")
+
+(define_insn_reservation "zEC12_mxbr" 41
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mxbr")) "nothing")
+
+(define_insn_reservation "zEC12_dxtr" 45
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "dxtr")) "nothing")
+
+(define_insn_reservation "zEC12_axbr" 12
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "axbr")) "nothing")
+
+(define_insn_reservation "zEC12_mr" 7
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mr")) "nothing")
+
+(define_insn_reservation "zEC12_stmg" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "stmg")) "nothing")
+
+(define_insn_reservation "zEC12_sxbr" 12
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "sxbr")) "nothing")
+
+(define_insn_reservation "zEC12_ddb" 30
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ddb")) "nothing")
+
+(define_insn_reservation "zEC12_cdtr" 11
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cdtr")) "nothing")
+
+(define_insn_reservation "zEC12_cxtr" 14
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "cxtr")) "nothing")
+
+(define_insn_reservation "zEC12_slbg" 3
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "slbg")) "nothing")
+
+(define_insn_reservation "zEC12_ml" 7
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "ml")) "nothing")
+
+(define_insn_reservation "zEC12_lam" 0
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lam")) "nothing")
+
+(define_insn_reservation "zEC12_lcxbr" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "lcxbr")) "nothing")
+
+(define_insn_reservation "zEC12_msfi" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "msfi")) "nothing")
+
+(define_insn_reservation "zEC12_shy" 2
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "shy")) "nothing")
+
+(define_insn_reservation "zEC12_mh" 5
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "mh")) "nothing")
+
+(define_insn_reservation "zEC12_fiebra" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fiebra")) "nothing")
+
+(define_insn_reservation "zEC12_fidbra" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fidbra")) "nothing")
+
+(define_insn_reservation "zEC12_fixbra" 10
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fixbra")) "nothing")
+
+(define_insn_reservation "zEC12_fidtr" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fidtr")) "nothing")
+
+(define_insn_reservation "zEC12_fixtr" 10
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fixtr")) "nothing")
diff --git a/gcc-4.9/gcc/config/s390/constraints.md b/gcc-4.9/gcc/config/s390/constraints.md
new file mode 100644
index 000000000..404424d94
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/constraints.md
@@ -0,0 +1,501 @@
+;; Constraints definitions belonging to the gcc backend for IBM S/390.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;; Written by Wolfgang Gellerich, using code and information found in
+;; files s390.md, s390.h, and s390.c.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;
+;; Special constraints for s/390 machine description:
+;;
+;;    a -- Any address register from 1 to 15.
+;;    b -- Memory operand whose address is a symbol reference or a symbol
+;;         reference + constant which can be proven to be naturally aligned.
+;;    c -- Condition code register 33.
+;;    d -- Any register from 0 to 15.
+;;    f -- Floating point registers.
+;;    t -- Access registers 36 and 37.
+;;    C -- A signed 8-bit constant (-128..127)
+;;    D -- An unsigned 16-bit constant (0..65535)
+;;    G -- Const double zero operand
+;;    I -- An 8-bit constant (0..255).
+;;    J -- A 12-bit constant (0..4095).
+;;    K -- A 16-bit constant (-32768..32767).
+;;    L -- Value appropriate as displacement.
+;;         (0..4095) for short displacement
+;;         (-524288..524287) for long displacement
+;;    M -- Constant integer with a value of 0x7fffffff.
+;;    N -- Multiple letter constraint followed by 4 parameter letters.
+;;         0..9,x:  number of the part counting from most to least significant
+;;         H,Q:     mode of the part
+;;         D,S,H:   mode of the containing operand
+;;         0,F:     value of the other parts (F - all bits set)
+;;         --
+;;         xx[DS]q  satisfies s390_contiguous_bitmask_p for DImode or SImode
+;;
+;;         The constraint matches if the specified part of a constant
+;;         has a value different from its other parts.  If the letter x
+;;         is specified instead of a part number, the constraint matches
+;;         if there is any single part with non-default value.
+;;    O -- Multiple letter constraint followed by 1 parameter.
+;;         s:  Signed extended immediate value (-2G .. 2G-1).
+;;         p:  Positive extended immediate value (0 .. 4G-1).
+;;         n:  Negative extended immediate value (-4G+1 .. -1).
+;;         These constraints do not accept any operand if the machine does
+;;         not provide the extended-immediate facility.
+;;    P -- Any integer constant that can be loaded without literal pool.
+;;    Q -- Memory reference without index register and with short displacement.
+;;    R -- Memory reference with index register and short displacement.
+;;    S -- Memory reference without index register but with long displacement.
+;;    T -- Memory reference with index register and long displacement.
+;;    A -- Multiple letter constraint followed by Q, R, S, or T:
+;;         Offsettable memory reference of type specified by second letter.
+;;    B -- Multiple letter constraint followed by Q, R, S, or T:
+;;         Memory reference of the type specified by second letter that
+;;         does *not* refer to a literal pool entry.
+;;    U -- Pointer with short displacement. (deprecated - use ZQZR)
+;;    W -- Pointer with long displacement. (deprecated - use ZSZT)
+;;    Y -- Shift count operand.
+;;    ZQ -- Pointer without index register and with short displacement.
+;;    ZR -- Pointer with index register and short displacement.
+;;    ZS -- Pointer without index register but with long displacement.
+;;    ZT -- Pointer with index register and long displacement.
+;;
+;;
+
+
+;;
+;;  Register constraints.
+;;
+
+(define_register_constraint "a"
+  "ADDR_REGS"
+  "Any address register from 1 to 15.")
+
+
+(define_register_constraint "c"
+  "CC_REGS"
+  "Condition code register 33")
+
+
+(define_register_constraint "d"
+  "GENERAL_REGS"
+  "Any register from 0 to 15")
+
+
+(define_register_constraint "f"
+  "FP_REGS"
+  "Floating point registers")
+
+
+(define_register_constraint "t"
+  "ACCESS_REGS"
+  "@internal
+   Access registers 36 and 37")
+
+
+;;
+;;  General constraints for constants.
+;;
+
+(define_constraint "C"
+  "@internal
+   An 8-bit signed immediate constant (-128..127)"
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+
+(define_constraint "D"
+  "An unsigned 16-bit constant (0..65535)"
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+
+(define_constraint "G"
+  "@internal
+   Const double zero operand"
+   (and (match_code "const_double")
+        (match_test "s390_float_const_zero_p (op)")))
+
+
+(define_constraint "I"
+  "An 8-bit constant (0..255)"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 255")))
+
+
+(define_constraint "J"
+  "A 12-bit constant (0..4095)"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 4095")))
+
+
+(define_constraint "K"
+  "A 16-bit constant (-32768..32767)"
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+
+(define_constraint "L"
+  "Value appropriate as displacement.
+      (0..4095) for short displacement
+      (-524288..524287) for long displacement"
+  (and (match_code "const_int")
+       (match_test "TARGET_LONG_DISPLACEMENT ?
+              (ival >= -524288 && ival <= 524287)
+            : (ival >= 0 && ival <= 4095)")))
+
+
+(define_constraint "M"
+  "Constant integer with a value of 0x7fffffff"
+  (and (match_code "const_int")
+       (match_test "ival == 2147483647")))
+
+
+(define_constraint "P"
+  "@internal
+   Any integer constant that can be loaded without literal pool"
+   (and (match_code "const_int")
+        (match_test "legitimate_reload_constant_p (GEN_INT (ival))")))
+
+
+(define_address_constraint "Y"
+  "Shift count operand"
+
+;; Simply check for the basic form of a shift count.  Reload will
+;; take care of making sure we have a proper base register.
+
+  (match_test "s390_decompose_shift_count (op, NULL, NULL)"  ))
+
+
+;;    N -- Multiple letter constraint followed by 4 parameter letters.
+;;         0..9,x:  number of the part counting from most to least significant
+;;         H,Q:     mode of the part
+;;         D,S,H:   mode of the containing operand
+;;         0,F:     value of the other parts (F = all bits set)
+;;
+;;         The constraint matches if the specified part of a constant
+;;         has a value different from its other parts.  If the letter x
+;;         is specified instead of a part number, the constraint matches
+;;         if there is any single part with non-default value.
+;;
+;; The following patterns define only those constraints that are actually
+;; used in s390.md.  If you need an additional one, simply add it in the
+;; obvious way.  Function s390_N_constraint_str is ready to handle all
+;; combinations.
+;;
+
+
+(define_constraint "NxQS0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQS0\", ival)")))
+
+
+(define_constraint "NxQD0"
+  "@internal"
+   (and (match_code "const_int")
+        (match_test "s390_N_constraint_str (\"xQD0\", ival)")))
+
+
+(define_constraint "N3HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"3HD0\", ival)")))
+
+
+(define_constraint "N2HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"2HD0\", ival)")))
+
+
+(define_constraint "N1SD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1SD0\", ival)")))
+
+
+(define_constraint "N1HS0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HS0\", ival)")))
+
+
+(define_constraint "N1HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HD0\", ival)")))
+
+
+(define_constraint "N0SD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0SD0\", ival)")))
+
+
+(define_constraint "N0HS0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HS0\", ival)")))
+
+
+(define_constraint "N0HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HD0\", ival)")))
+
+
+(define_constraint "NxQDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQDF\", ival)")))
+
+
+(define_constraint "N1SDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1SDF\", ival)")))
+
+
+(define_constraint "N0SDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0SDF\", ival)")))
+
+
+(define_constraint "N3HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"3HDF\", ival)")))
+
+
+(define_constraint "N2HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"2HDF\", ival)")))
+
+
+(define_constraint "N1HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HDF\", ival)")))
+
+
+(define_constraint "N0HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HDF\", ival)")))
+
+
+(define_constraint "N0HSF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HSF\", ival)")))
+
+
+(define_constraint "N1HSF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HSF\", ival)")))
+
+
+(define_constraint "NxQSF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQSF\", ival)")))
+
+
+(define_constraint "NxQHF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQHF\", ival)")))
+
+
+(define_constraint "NxQH0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQH0\", ival)")))
+
+(define_constraint "NxxDq"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_contiguous_bitmask_p (ival, 64, NULL, NULL)")))
+
+(define_constraint "NxxSq"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_contiguous_bitmask_p (ival, 32, NULL, NULL)")))
+
+;;
+;; Double-letter constraints starting with O follow.
+;;
+
+
+(define_constraint "Os"
+  "@internal
+   Signed extended immediate value (-2G .. 2G-1).
+   This constraint will only match if the machine provides
+   the extended-immediate facility."
+  (and (match_code "const_int")
+       (match_test "s390_O_constraint_str ('s', ival)")))
+
+
+(define_constraint "Op"
+  "@internal
+   Positive extended immediate value (0 .. 4G-1).
+   This constraint will only match if the machine provides
+   the extended-immediate facility."
+  (and (match_code "const_int")
+       (match_test "s390_O_constraint_str ('p', ival)")))
+
+
+(define_constraint "On"
+  "@internal
+   Negative extended immediate value (-4G+1 .. -1).
+   This constraint will only match if the machine provides
+   the extended-immediate facility."
+  (and (match_code "const_int")
+       (match_test "s390_O_constraint_str ('n', ival)")))
+
+
+
+
+;;
+;; Memory constraints follow.
+;;
+
+(define_memory_constraint "Q"
+  "Memory reference without index register and with short displacement"
+  (match_test "s390_mem_constraint (\"Q\", op)"))
+
+
+(define_memory_constraint "R"
+  "Memory reference with index register and short displacement"
+  (match_test "s390_mem_constraint (\"R\", op)"))
+
+
+(define_memory_constraint "S"
+  "Memory reference without index register but with long displacement"
+  (match_test "s390_mem_constraint (\"S\", op)"))
+
+
+(define_memory_constraint "T"
+  "Memory reference with index register and long displacement"
+  (match_test "s390_mem_constraint (\"T\", op)"))
+
+
+(define_memory_constraint "b"
+  "Memory reference whose address is a naturally aligned symbol reference."
+  (match_test "MEM_P (op)
+               && s390_check_symref_alignment (XEXP (op, 0),
+                                               GET_MODE_SIZE (GET_MODE (op)))"))
+
+(define_memory_constraint "e"
+  "Matches all memory references available on the current architecture
+level.  This constraint will never be used and using it in an inline
+assembly is *always* a bug since there is no instruction accepting all
+those addresses.  It just serves as a placeholder for a generic memory
+constraint."
+  (match_test "strict_memory_address_p (GET_MODE (op), op)"))
+
+; This defines 'm' as normal memory constraint.  This is only possible
+; since the standard memory constraint is re-defined in s390.h using
+; the TARGET_MEM_CONSTRAINT macro.
+(define_memory_constraint "m"
+  "Matches the most general memory address for pre-z10 machines."
+  (match_test "s390_mem_constraint (\"R\", op)
+               || s390_mem_constraint (\"T\", op)"))
+
+(define_memory_constraint "AQ"
+  "@internal
+   Offsettable memory reference without index register and with short displacement"
+  (match_test "s390_mem_constraint (\"AQ\", op)"))
+
+
+(define_memory_constraint "AR"
+  "@internal
+   Offsettable memory reference with index register and short displacement"
+  (match_test "s390_mem_constraint (\"AR\", op)"))
+
+
+(define_memory_constraint "AS"
+  "@internal
+   Offsettable memory reference without index register but with long displacement"
+  (match_test "s390_mem_constraint (\"AS\", op)"))
+
+
+(define_memory_constraint "AT"
+  "@internal
+   Offsettable memory reference with index register and long displacement"
+  (match_test "s390_mem_constraint (\"AT\", op)"))
+
+
+
+(define_constraint "BQ"
+  "@internal
+   Memory reference without index register and with short
+   displacement that does *not* refer to a literal pool entry."
+  (match_test "s390_mem_constraint (\"BQ\", op)"))
+
+
+(define_constraint "BR"
+  "@internal
+   Memory reference with index register and short displacement that
+   does *not* refer to a literal pool entry. "
+  (match_test "s390_mem_constraint (\"BR\", op)"))
+
+
+(define_constraint "BS"
+  "@internal
+   Memory reference without index register but with long displacement
+   that does *not* refer to a literal pool entry. "
+  (match_test "s390_mem_constraint (\"BS\", op)"))
+
+
+(define_constraint "BT"
+  "@internal
+   Memory reference with index register and long displacement that
+   does *not* refer to a literal pool entry. "
+  (match_test "s390_mem_constraint (\"BT\", op)"))
+
+
+(define_address_constraint "U"
+  "Pointer with short displacement. (deprecated - use ZQZR)"
+  (match_test "s390_mem_constraint (\"U\", op)"))
+
+(define_address_constraint "W"
+  "Pointer with long displacement. (deprecated - use ZSZT)"
+  (match_test "s390_mem_constraint (\"W\", op)"))
+
+
+(define_address_constraint "ZQ"
+  "Pointer without index register and with short displacement."
+  (match_test "s390_mem_constraint (\"ZQ\", op)"))
+
+(define_address_constraint "ZR"
+  "Pointer with index register and short displacement."
+  (match_test "s390_mem_constraint (\"ZR\", op)"))
+
+(define_address_constraint "ZS"
+  "Pointer without index register but with long displacement."
+  (match_test "s390_mem_constraint (\"ZS\", op)"))
+
+(define_address_constraint "ZT"
+  "Pointer with index register and long displacement."
+  (match_test "s390_mem_constraint (\"ZT\", op)"))
diff --git a/gcc-4.9/gcc/config/s390/htmintrin.h b/gcc-4.9/gcc/config/s390/htmintrin.h
new file mode 100644
index 000000000..827e596b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/htmintrin.h
@@ -0,0 +1,57 @@
+/* GNU compiler hardware transactional execution intrinsics
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _HTMINTRIN_H
+#define _HTMINTRIN_H
+
+
+/* Condition codes generated by tbegin  */
+#define _HTM_TBEGIN_STARTED       0
+#define _HTM_TBEGIN_INDETERMINATE 1
+#define _HTM_TBEGIN_TRANSIENT     2
+#define _HTM_TBEGIN_PERSISTENT    3
+
+/* The abort codes below this threshold are reserved for machine
+   use.  */
+#define _HTM_FIRST_USER_ABORT_CODE 256
+
+/* The transaction diagnostic block is it is defined in the Principles
+   of Operation chapter 5-91.  */
+
+struct __htm_tdb {
+  unsigned char format;                /*   0 */
+  unsigned char flags;
+  unsigned char reserved1[4];
+  unsigned short nesting_depth;
+  unsigned long long abort_code;       /*   8 */
+  unsigned long long conflict_token;   /*  16 */
+  unsigned long long atia;             /*  24 */
+  unsigned char eaid;                  /*  32 */
+  unsigned char dxc;
+  unsigned char reserved2[2];
+  unsigned int program_int_id;
+  unsigned long long exception_id;     /*  40 */
+  unsigned long long bea;              /*  48 */
+  unsigned char reserved3[72];         /*  56 */
+  unsigned long long gprs[16];         /* 128 */
+} __attribute__((__packed__, __aligned__ (8)));
+
+
+#endif /* _HTMINTRIN_H */
diff --git a/gcc-4.9/gcc/config/s390/htmxlintrin.h b/gcc-4.9/gcc/config/s390/htmxlintrin.h
new file mode 100644
index 000000000..94784a014
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/htmxlintrin.h
@@ -0,0 +1,189 @@
+/* XL compiler hardware transactional execution intrinsics
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _HTMXLINTRIN_H
+#define _HTMXLINTRIN_H
+
+#include <stdint.h>
+
+#include <htmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These intrinsics are being made available for compatibility with
+   the IBM XL compiler.  For documentation please see the "z/OS XL
+   C/C++ Programming Guide" publicly available on the web.  */
+
+/* FIXME: __TM_simple_begin and __TM_begin should be marked
+   __always_inline__ as well but this currently produces an error
+   since the tbegin builtins are "returns_twice" and setjmp_call_p
+   (calls.c) therefore identifies the functions as calling setjmp.
+   The tree inliner currently refuses to inline functions calling
+   setjmp.  */
+
+long
+__TM_simple_begin ()
+{
+  return __builtin_tbegin_nofloat (0);
+}
+
+long
+__TM_begin (void* const tdb)
+{
+  return __builtin_tbegin_nofloat (tdb);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_end ()
+{
+  return __builtin_tend ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_abort ()
+{
+  return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_named_abort (unsigned char const code)
+{
+  return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_non_transactional_store (void* const addr, long long const value)
+{
+  __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_nesting_depth (void* const tdb_ptr)
+{
+  int depth = __builtin_tx_nesting_depth ();
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  if (depth != 0)
+    return depth;
+
+  if (tdb->format != 1)
+    return 0;
+  return tdb->nesting_depth;
+}
+
+/* Transaction failure diagnostics */
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_user_abort (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  if (tdb->format != 1)
+    return 0;
+
+  return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  if (tdb->format != 1)
+    return 0;
+
+  if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)
+    {
+      *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;
+      return 1;
+    }
+  return 0;
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_illegal (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return (tdb->format == 1
+	  && (tdb->abort_code == 4 /* unfiltered program interruption */
+	      || tdb->abort_code == 11 /* restricted instruction */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_footprint_exceeded (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return (tdb->format == 1
+	  && (tdb->abort_code == 7 /* fetch overflow */
+	      || tdb->abort_code == 8 /* store overflow */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_nested_too_deep (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_conflict (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return (tdb->format == 1
+	  && (tdb->abort_code == 9 /* fetch conflict */
+	      || tdb->abort_code == 10 /* store conflict */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_failure_persistent (long const result)
+{
+  return result == _HTM_TBEGIN_PERSISTENT;
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_address (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+#ifdef __s390x__
+  return tdb->atia;
+#else
+  return tdb->atia & 0xffffffff;
+#endif
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_code (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return tdb->abort_code;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTMXLINTRIN_H */
diff --git a/gcc-4.9/gcc/config/s390/linux.h b/gcc-4.9/gcc/config/s390/linux.h
new file mode 100644
index 000000000..65ac22955
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/linux.h
@@ -0,0 +1,93 @@
+/* Definitions for Linux for S/390.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _LINUX_H
+#define _LINUX_H
+
+/* Target specific type definitions.  */
+
+/* ??? Do we really want long as size_t on 31-bit?  */
+#undef  SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "long unsigned int")
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Target specific preprocessor settings.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+    }						\
+  while (0)
+
+
+/* Target specific assembler settings.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC "%{m31&m64}%{mesa&mzarch}%{march=*}"
+
+
+/* Target specific linker settings.  */
+
+#ifdef DEFAULT_TARGET_64BIT
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m31" }
+#endif
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER64 "/lib/ld64.so.1"
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{m31:-m elf_s390}%{m64:-m elf64_s390} \
+   %{shared:-shared} \
+   %{!shared: \
+      %{static:-static} \
+      %{!static: \
+	%{rdynamic:-export-dynamic} \
+	%{m31:-dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "} \
+	%{m64:-dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "}}}"
+
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* s390 glibc provides __stack_chk_guard in 0x14(tp),
+   s390x glibc provides it at 0x28(tp).  */
+#define TARGET_THREAD_SSP_OFFSET        (TARGET_64BIT ? 0x28 : 0x14)
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
+#endif
diff --git a/gcc-4.9/gcc/config/s390/predicates.md b/gcc-4.9/gcc/config/s390/predicates.md
new file mode 100644
index 000000000..0191b93f9
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/predicates.md
@@ -0,0 +1,470 @@
+;; Predicate definitions for S/390 and zSeries.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                Ulrich Weigand (uweigand@de.ibm.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; OP is the current operation.
+;; MODE is the current operation mode.
+
+;; operands --------------------------------------------------------------
+
+;; Return true if OP a (const_int 0) operand.
+
+(define_predicate "const0_operand"
+  (and (match_code "const_int, const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return true if OP is constant.
+
+(define_special_predicate "consttable_operand"
+  (and (match_code "symbol_ref, label_ref, const, const_int, const_double")
+       (match_test "CONSTANT_P (op)")))
+
+;; Return true if OP is a valid S-type operand.
+
+(define_predicate "s_operand"
+  (and (match_code "subreg, mem")
+       (match_operand 0 "general_operand"))
+{
+  /* Just like memory_operand, allow (subreg (mem ...))
+     after reload.  */
+  if (reload_completed
+      && GET_CODE (op) == SUBREG
+      && GET_CODE (SUBREG_REG (op)) == MEM)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) != MEM)
+    return false;
+  if (!s390_legitimate_address_without_index_p (op))
+    return false;
+
+  return true;
+})
+
+;; Return true if OP is a valid operand for the BRAS instruction.
+;; Allow SYMBOL_REFs and @PLT stubs.
+
+(define_special_predicate "bras_sym_operand"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!flag_pic || SYMBOL_REF_LOCAL_P (op)"))
+       (and (match_code "const")
+	    (and (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+		 (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_PLT")))))
+
+;; Return true if OP is a PLUS that is not a legitimate
+;; operand for the LA instruction.
+
+(define_predicate "s390_plus_operand"
+  (and (match_code "plus")
+       (and (match_test "mode == Pmode")
+	    (match_test "!legitimate_la_operand_p (op)"))))
+
+;; Return true if OP is a valid operand as shift count or setmem.
+
+(define_predicate "shift_count_or_setmem_operand"
+  (match_code "reg, subreg, plus, const_int")
+{
+  HOST_WIDE_INT offset;
+  rtx base;
+
+  /* Extract base register and offset.  */
+  if (!s390_decompose_shift_count (op, &base, &offset))
+    return false;
+
+  /* Don't allow any non-base hard registers.  Doing so without
+     confusing reload and/or regrename would be tricky, and doesn't
+     buy us much anyway.  */
+  if (base && REGNO (base) < FIRST_PSEUDO_REGISTER && !ADDR_REG_P (base))
+    return false;
+
+  /* Unfortunately we have to reject constants that are invalid
+     for an address, or else reload will get confused.  */
+  if (!DISP_IN_RANGE (offset))
+    return false;
+
+  return true;
+})
+
+(define_predicate "nonzero_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (mode) - 1)")))
+
+;;  Return true if OP a valid operand for the LARL instruction.
+
+(define_predicate "larl_operand"
+  (match_code "label_ref, symbol_ref, const, const_int, const_double")
+{
+  /* Allow labels and local symbols.  */
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+  if (GET_CODE (op) == SYMBOL_REF)
+    return (!SYMBOL_REF_ALIGN1_P (op)
+	    && SYMBOL_REF_TLS_MODEL (op) == 0
+	    && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+
+  /* Everything else must have a CONST, so strip it.  */
+  if (GET_CODE (op) != CONST)
+    return false;
+  op = XEXP (op, 0);
+
+  /* Allow adding *even* in-range constants.  */
+  if (GET_CODE (op) == PLUS)
+    {
+      if (GET_CODE (XEXP (op, 1)) != CONST_INT
+          || (INTVAL (XEXP (op, 1)) & 1) != 0)
+        return false;
+      if (INTVAL (XEXP (op, 1)) >= (HOST_WIDE_INT)1 << 31
+	  || INTVAL (XEXP (op, 1)) < -((HOST_WIDE_INT)1 << 31))
+        return false;
+      op = XEXP (op, 0);
+    }
+
+  /* Labels and local symbols allowed here as well.  */
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+  if (GET_CODE (op) == SYMBOL_REF)
+    return ((SYMBOL_REF_FLAGS (op) & SYMBOL_FLAG_ALIGN1) == 0
+	    && SYMBOL_REF_TLS_MODEL (op) == 0
+	    && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+
+  /* Now we must have a @GOTENT offset or @PLT stub
+     or an @INDNTPOFF TLS offset.  */
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_GOTENT)
+    return true;
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_PLT)
+    return true;
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_INDNTPOFF)
+    return true;
+
+  return false;
+})
+
+(define_predicate "contiguous_bitmask_operand"
+  (match_code "const_int")
+{
+  return s390_contiguous_bitmask_p (INTVAL (op), GET_MODE_BITSIZE (mode), NULL, NULL);
+})
+
+;; operators --------------------------------------------------------------
+
+;; Return nonzero if OP is a valid comparison operator
+;; for a branch condition.
+
+(define_predicate "s390_comparison"
+  (match_code "eq, ne, lt, gt, le, ge, ltu, gtu, leu, geu,
+	       uneq, unlt, ungt, unle, unge, ltgt,
+	       unordered, ordered")
+{
+  if (GET_CODE (XEXP (op, 0)) != REG
+      || REGNO (XEXP (op, 0)) != CC_REGNUM
+      || (XEXP (op, 1) != const0_rtx
+          && !(CONST_INT_P (XEXP (op, 1))
+	       && GET_MODE (XEXP (op, 0)) == CCRAWmode
+	       && INTVAL (XEXP (op, 1)) >= 0
+               && INTVAL (XEXP (op, 1)) <= 15)))
+    return false;
+
+  return (s390_branch_condition_mask (op) >= 0);
+})
+
+;; Return true if op is the cc register.
+(define_predicate "cc_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CC_REGNUM")))
+
+(define_predicate "s390_signed_integer_comparison"
+  (match_code "eq, ne, lt, gt, le, ge")
+{
+  return (s390_compare_and_branch_condition_mask (op) >= 0);
+})
+
+(define_predicate "s390_unsigned_integer_comparison"
+  (match_code "eq, ne, ltu, gtu, leu, geu")
+{
+  return (s390_compare_and_branch_condition_mask (op) >= 0);
+})
+
+;; Return nonzero if OP is a valid comparison operator for the
+;; cstore expanders -- respectively cstorecc4 and integer cstore.
+(define_predicate "s390_eqne_operator"
+  (match_code "eq, ne"))
+
+(define_predicate "s390_scond_operator"
+  (match_code "ltu, gtu, leu, geu"))
+
+(define_predicate "s390_brx_operator"
+  (match_code "le, gt"))
+
+;; Return nonzero if OP is a valid comparison operator
+;; for an ALC condition.
+
+(define_predicate "s390_alc_comparison"
+  (match_code "zero_extend, sign_extend, ltu, gtu, leu, geu")
+{
+  while (GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND)
+    op = XEXP (op, 0);
+
+  if (!COMPARISON_P (op))
+    return false;
+
+  if (GET_CODE (XEXP (op, 0)) != REG
+      || REGNO (XEXP (op, 0)) != CC_REGNUM
+      || (XEXP (op, 1) != const0_rtx
+          && !(CONST_INT_P (XEXP (op, 1))
+	       && GET_MODE (XEXP (op, 0)) == CCRAWmode
+	       && INTVAL (XEXP (op, 1)) >= 0
+               && INTVAL (XEXP (op, 1)) <= 15)))
+    return false;
+
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case CCL1mode:
+      return GET_CODE (op) == LTU;
+
+    case CCL2mode:
+      return GET_CODE (op) == LEU;
+
+    case CCL3mode:
+      return GET_CODE (op) == GEU;
+
+    case CCUmode:
+      return GET_CODE (op) == GTU;
+
+    case CCURmode:
+      return GET_CODE (op) == LTU;
+
+    case CCSmode:
+      return GET_CODE (op) == UNGT;
+
+    case CCSRmode:
+      return GET_CODE (op) == UNLT;
+
+    default:
+      return false;
+    }
+})
+
+;; Return nonzero if OP is a valid comparison operator
+;; for an SLB condition.
+
+(define_predicate "s390_slb_comparison"
+  (match_code "zero_extend, sign_extend, ltu, gtu, leu, geu")
+{
+  while (GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND)
+    op = XEXP (op, 0);
+
+  if (!COMPARISON_P (op))
+    return false;
+
+  if (GET_CODE (XEXP (op, 0)) != REG
+      || REGNO (XEXP (op, 0)) != CC_REGNUM
+      || XEXP (op, 1) != const0_rtx)
+    return false;
+
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case CCL1mode:
+      return GET_CODE (op) == GEU;
+
+    case CCL2mode:
+      return GET_CODE (op) == GTU;
+
+    case CCL3mode:
+      return GET_CODE (op) == LTU;
+
+    case CCUmode:
+      return GET_CODE (op) == LEU;
+
+    case CCURmode:
+      return GET_CODE (op) == GEU;
+
+    case CCSmode:
+      return GET_CODE (op) == LE;
+
+    case CCSRmode:
+      return GET_CODE (op) == GE;
+
+    default:
+      return false;
+    }
+})
+
+;; Return true if OP is a load multiple operation.  It is known to be a
+;; PARALLEL and the first section will be tested.
+
+(define_special_predicate "load_multiple_operation"
+  (match_code "parallel")
+{
+  enum machine_mode elt_mode;
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx src_addr;
+  int i, off;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+  elt_mode = GET_MODE (SET_DEST (XVECEXP (op, 0, 0)));
+
+  /* Check, is base, or base + displacement.  */
+
+  if (GET_CODE (src_addr) == REG)
+    off = 0;
+  else if (GET_CODE (src_addr) == PLUS
+	   && GET_CODE (XEXP (src_addr, 0)) == REG
+	   && GET_CODE (XEXP (src_addr, 1)) == CONST_INT)
+    {
+      off = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+  else
+    return false;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != elt_mode
+	  || REGNO (SET_DEST (elt)) != dest_regno + i
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_MODE (SET_SRC (elt)) != elt_mode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1))
+	     != off + i * GET_MODE_SIZE (elt_mode))
+	return false;
+    }
+
+  return true;
+})
+
+;; For an execute pattern the target instruction is embedded into the
+;; RTX but will not get checked for validity by recog automatically.
+;; The execute_operation predicate extracts the target RTX and invokes
+;; recog.
+(define_special_predicate "execute_operation"
+  (match_code "parallel")
+{
+  rtx pattern = op;
+  rtx insn;
+  int icode;
+
+  /* This is redundant but since this predicate is evaluated
+     first when recognizing the insn we can prevent the more
+     expensive code below from being executed for many cases.  */
+  if (GET_CODE (XVECEXP (pattern, 0, 0)) != UNSPEC
+      || XINT (XVECEXP (pattern, 0, 0), 1) != UNSPEC_EXECUTE)
+    return false;
+
+  /* Keep in sync with s390_execute_target.  */
+  if (XVECLEN (pattern, 0) == 2)
+    {
+      pattern = copy_rtx (XVECEXP (pattern, 0, 1));
+    }
+  else
+    {
+      rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
+      int i;
+
+      for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
+	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
+
+      pattern = gen_rtx_PARALLEL (VOIDmode, vec);
+    }
+
+  /* Since we do not have the wrapping insn here we have to build one.  */
+  insn = make_insn_raw (pattern);
+  icode = recog_memoized (insn);
+  if (icode < 0)
+    return false;
+
+  extract_insn (insn);
+  constrain_operands (1);
+
+  return which_alternative >= 0;
+})
+
+;; Return true if OP is a store multiple operation.  It is known to be a
+;; PARALLEL and the first section will be tested.
+
+(define_special_predicate "store_multiple_operation"
+  (match_code "parallel")
+{
+  enum machine_mode elt_mode;
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx dest_addr;
+  int i, off;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return false;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+  elt_mode = GET_MODE (SET_SRC (XVECEXP (op, 0, 0)));
+
+  /* Check, is base, or base + displacement.  */
+
+  if (GET_CODE (dest_addr) == REG)
+    off = 0;
+  else if (GET_CODE (dest_addr) == PLUS
+	   && GET_CODE (XEXP (dest_addr, 0)) == REG
+	   && GET_CODE (XEXP (dest_addr, 1)) == CONST_INT)
+    {
+      off = INTVAL (XEXP (dest_addr, 1));
+      dest_addr = XEXP (dest_addr, 0);
+    }
+  else
+    return false;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != elt_mode
+	  || REGNO (SET_SRC (elt)) != src_regno + i
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != elt_mode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	  || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1))
+	     != off + i * GET_MODE_SIZE (elt_mode))
+	return false;
+    }
+  return true;
+})
diff --git a/gcc-4.9/gcc/config/s390/s390-modes.def b/gcc-4.9/gcc/config/s390/s390-modes.def
new file mode 100644
index 000000000..0e5f021e9
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390-modes.def
@@ -0,0 +1,183 @@
+/* Definitions of target machine for GNU compiler, for IBM S/390
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 256-bit integer mode is needed for STACK_SAVEAREA_MODE.  */
+INT_MODE (OI, 32);
+
+/* Define TFmode to work around reload problem PR 20927.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Add any extra modes needed to represent the condition code.  */
+
+/*
+
+Condition Codes
+
+Check for zero
+
+CCZ:  EQ          NE           NE          NE
+CCZ1: EQ          NE                                  (CS)
+
+Unsigned compares
+
+CCU:  EQ          LTU          GTU         NE         (CLG/R, CL/R/Y, CLM/Y, CLI/Y)
+CCUR: EQ          GTU          LTU         NE         (CLGF/R)
+
+Signed compares
+
+CCS:  EQ          LT           GT          UNORDERED  (LTGFR, LTGR, LTR, ICM/Y,
+                                                       LTDBR, LTDR, LTEBR, LTER,
+                                                       CG/R, C/R/Y, CGHI, CHI,
+                                                       CDB/R, CD/R, CEB/R, CE/R,
+                                                       ADB/R, AEB/R, SDB/R, SEB/R,
+                                                       SRAG, SRA, SRDA)
+CCSR: EQ          GT           LT          UNORDERED  (CGF/R, CH/Y)
+
+Condition codes resulting from add with overflow
+
+CCA:  EQ          LT           GT          Overflow
+CCAP: EQ          LT           GT          LT         (AGHI, AHI)
+CCAN: EQ          LT           GT          GT         (AGHI, AHI)
+
+Condition codes of unsigned adds and subs
+
+CCL:  EQ          NE           EQ          NE         (ALGF/R, ALG/R, AL/R/Y,
+                                                       ALCG/R, ALC/R,
+                                                       SLGF/R, SLG/R, SL/R/Y,
+                                                       SLBG/R, SLB/R)
+CCL1: GEU         GEU          LTU         LTU        (ALG/R, AL/R/Y)
+CCL2: GTU         GTU          LEU         LEU        (SLG/R, SL/R/Y)
+CCL3: EQ          LTU          EQ          GTU        (SLG/R, SL/R/Y)
+
+Test under mask checks
+
+CCT:  EQ          NE           NE          NE         (ICM/Y, TML, CG/R, CGHI,
+                                                       C/R/Y, CHI, NG/R, N/R/Y,
+                                                       OG/R, O/R/Y, XG/R, X/R/Y)
+CCT1: NE          EQ           NE          NE         (TMH, TML)
+CCT2: NE          NE           EQ          NE         (TMH, TML)
+CCT3: NE          NE           NE          EQ         (TMH, TML)
+
+CCA and CCT modes are request only modes. These modes are never returned by
+s390_select_cc_mode. They are only intended to match other modes.
+
+Requested mode            -> Destination CC register mode
+
+CCS, CCU, CCT, CCSR, CCUR -> CCZ
+CCA                       -> CCAP, CCAN
+
+
+*** Comments ***
+
+CCAP, CCAN
+
+The CC obtained from add instruction usually can't be used for comparisons
+because its coupling with overflow flag. In case of an overflow the
+less than/greater than data are lost. Nevertheless a comparison can be done
+whenever immediate values are involved because they are known at compile time.
+If you know whether the used constant is positive or negative you can predict
+the sign of the result even in case of an overflow.
+
+
+CCT, CCT1, CCT2, CCT3
+
+If bits of an integer masked with an AND instruction are checked, the test under
+mask instructions turn out to be very handy for a set of special cases.
+The simple cases are checks whether all masked bits are zero or ones:
+
+  int a;
+  if ((a & (16 + 128)) == 0)          -> CCT/CCZ
+  if ((a & (16 + 128)) == 16 + 128)   -> CCT3
+
+Using two extra modes makes it possible to do complete checks on two bits of an
+integer (This is possible on register operands only. TM does not provide the
+information necessary for CCT1 and CCT2 modes.):
+
+  int a;
+  if ((a & (16 + 128)) == 16)         -> CCT1
+  if ((a & (16 + 128)) == 128)        -> CCT2
+
+
+CCSR, CCUR
+
+There are several instructions comparing 32 bit with 64-bit unsigned/signed
+values. Such instructions can be considered to have a builtin zero/sign_extend.
+The problem is that in the RTL (to be canonical) the zero/sign extended operand
+has to be the first one but the machine instructions like it the other way
+around. The following both modes can be considered as CCS and CCU modes with
+exchanged operands.
+
+
+CCL1, CCL2
+
+These modes represent the result of overflow checks.
+
+if (a + b < a) -> CCL1 state of the carry bit   (CC2 | CC3)
+if (a - b > a) -> CCL2 state of the borrow bit  (CC0 | CC1)
+
+They are used when multi word numbers are computed dealing one SImode part after
+another or whenever manual overflow checks like the examples above are
+compiled.
+
+
+CCL3
+
+A logical subtract instruction sets the borrow bit in case of an overflow.
+The resulting condition code of those instructions is represented by the
+CCL3 mode. Together with the CCU mode this mode is used for jumpless
+implementations of several if-constructs - see s390_expand_addcc for more
+details.
+
+CCZ1
+
+The compare and swap instructions sets the condition code to 0/1 if the
+operands were equal/unequal. The CCZ1 mode ensures the result can be
+effectively placed into a register.
+
+CCRAW
+
+The cc mode generated by a non-compare instruction.  The condition
+code mask for the CC consumer is determined by the comparison operator
+(only EQ and NE allowed) and the immediate value given as second
+operand to the operator.  For the other CC modes this value used to be
+0.
+
+*/
+
+
+CC_MODE (CCZ);
+CC_MODE (CCZ1);
+CC_MODE (CCA);
+CC_MODE (CCAP);
+CC_MODE (CCAN);
+CC_MODE (CCL);
+CC_MODE (CCL1);
+CC_MODE (CCL2);
+CC_MODE (CCL3);
+CC_MODE (CCU);
+CC_MODE (CCUR);
+CC_MODE (CCS);
+CC_MODE (CCSR);
+CC_MODE (CCT);
+CC_MODE (CCT1);
+CC_MODE (CCT2);
+CC_MODE (CCT3);
+CC_MODE (CCRAW);
diff --git a/gcc-4.9/gcc/config/s390/s390-opts.h b/gcc-4.9/gcc/config/s390/s390-opts.h
new file mode 100644
index 000000000..5718afaae
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390-opts.h
@@ -0,0 +1,41 @@
+/* Definitions for option handling for IBM S/390.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef S390_OPTS_H
+#define S390_OPTS_H
+
+/* Which processor to generate code or schedule for. The cpu attribute
+   defines a list that mirrors this list, so changes to s390.md must be
+   made at the same time.  */
+
+enum processor_type
+{
+  PROCESSOR_9672_G5,
+  PROCESSOR_9672_G6,
+  PROCESSOR_2064_Z900,
+  PROCESSOR_2084_Z990,
+  PROCESSOR_2094_Z9_109,
+  PROCESSOR_2094_Z9_EC,
+  PROCESSOR_2097_Z10,
+  PROCESSOR_2817_Z196,
+  PROCESSOR_2827_ZEC12,
+  PROCESSOR_max
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/s390/s390-protos.h b/gcc-4.9/gcc/config/s390/s390-protos.h
new file mode 100644
index 000000000..9bd08fa20
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390-protos.h
@@ -0,0 +1,117 @@
+/* Definitions of target machine for GNU compiler, for IBM S/390.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+   Contributed by Hartmut Penner (hpenner@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+
+/* Prototypes of functions used for constraint evaluation in
+   constraints.c.  */
+
+extern int s390_mem_constraint (const char *str, rtx op);
+extern int s390_O_constraint_str (const char c, HOST_WIDE_INT value);
+extern int s390_N_constraint_str (const char *str, HOST_WIDE_INT value);
+extern int s390_float_const_zero_p (rtx value);
+extern bool s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment);
+
+
+/* Declare functions in s390.c.  */
+
+extern HOST_WIDE_INT s390_initial_elimination_offset (int, int);
+extern void s390_emit_prologue (void);
+extern void s390_emit_epilogue (bool);
+extern bool s390_can_use_simple_return_insn (void);
+extern bool s390_can_use_return_insn (void);
+extern void s390_function_profiler (FILE *, int);
+extern void s390_set_has_landing_pad_p (bool);
+extern bool s390_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int s390_class_max_nregs (enum reg_class, enum machine_mode);
+
+#ifdef RTX_CODE
+extern int s390_extra_constraint_str (rtx, int, const char *);
+extern int s390_const_ok_for_constraint_p (HOST_WIDE_INT, int, const char *);
+extern int s390_const_double_ok_for_constraint_p (rtx, int, const char *);
+extern int s390_single_part (rtx, enum machine_mode, enum machine_mode, int);
+extern unsigned HOST_WIDE_INT s390_extract_part (rtx, enum machine_mode, int);
+extern bool s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT, int, int *, int *);
+extern bool s390_split_ok_p (rtx, rtx, enum machine_mode, int);
+extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT);
+extern bool s390_offset_p (rtx, rtx, rtx);
+extern int tls_symbolic_operand (rtx);
+
+extern bool s390_match_ccmode (rtx, enum machine_mode);
+extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool);
+extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
+extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
+extern rtx s390_emit_jump (rtx, rtx);
+extern bool symbolic_reference_mentioned_p (rtx);
+extern bool tls_symbolic_reference_mentioned_p (rtx);
+extern bool legitimate_la_operand_p (rtx);
+extern bool preferred_la_operand_p (rtx, rtx);
+extern int legitimate_pic_operand_p (rtx);
+extern bool legitimate_reload_constant_p (rtx);
+extern rtx legitimize_pic_address (rtx, rtx);
+extern rtx legitimize_reload_address (rtx, enum machine_mode, int, int);
+extern enum reg_class s390_secondary_input_reload_class (enum reg_class,
+							 enum machine_mode,
+							 rtx);
+extern enum reg_class s390_secondary_output_reload_class (enum reg_class,
+							  enum machine_mode,
+							  rtx);
+extern void s390_reload_larl_operand (rtx , rtx , rtx);
+extern void s390_reload_symref_address (rtx , rtx , rtx , bool);
+extern void s390_expand_plus_operand (rtx, rtx, rtx);
+extern void emit_symbolic_move (rtx *);
+extern void s390_load_address (rtx, rtx);
+extern bool s390_expand_movmem (rtx, rtx, rtx);
+extern void s390_expand_setmem (rtx, rtx, rtx);
+extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
+extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
+extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx,
+				rtx, rtx, bool);
+extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
+				rtx, rtx, rtx, bool);
+extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
+extern rtx s390_return_addr_rtx (int, rtx);
+extern rtx s390_back_chain_rtx (void);
+extern rtx s390_emit_call (rtx, rtx, rtx, rtx);
+extern void s390_expand_logical_operator (enum rtx_code,
+					  enum machine_mode, rtx *);
+extern bool s390_logical_operator_ok_p (rtx *);
+extern void s390_narrow_logical_operator (enum rtx_code, rtx *, rtx *);
+extern void s390_split_access_reg (rtx, rtx *, rtx *);
+
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern void s390_output_pool_entry (rtx, enum machine_mode, unsigned int);
+extern int s390_label_align (rtx);
+extern int s390_agen_dep_p (rtx, rtx);
+extern rtx s390_load_got (void);
+extern rtx s390_get_thread_pointer (void);
+extern void s390_emit_tpf_eh_return (rtx);
+extern bool s390_legitimate_address_without_index_p (rtx);
+extern bool s390_decompose_shift_count (rtx, rtx *, HOST_WIDE_INT *);
+extern int s390_branch_condition_mask (rtx);
+extern int s390_compare_and_branch_condition_mask (rtx);
+extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT);
+extern void s390_asm_output_function_label (FILE *, const char *, tree);
+
+#endif /* RTX_CODE */
diff --git a/gcc-4.9/gcc/config/s390/s390.c b/gcc-4.9/gcc/config/s390/s390.c
new file mode 100644
index 000000000..7a79286c9
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390.c
@@ -0,0 +1,12237 @@
+/* Subroutines used for code generation on IBM S/390 and zSeries
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com) and
+                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "print-tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "reload.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "optabs.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "df.h"
+#include "params.h"
+#include "cfgloop.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "context.h"
+
+/* Define the specific costs for a given cpu.  */
+
+struct processor_costs
+{
+  /* multiplication */
+  const int m;        /* cost of an M instruction.  */
+  const int mghi;     /* cost of an MGHI instruction.  */
+  const int mh;       /* cost of an MH instruction.  */
+  const int mhi;      /* cost of an MHI instruction.  */
+  const int ml;       /* cost of an ML instruction.  */
+  const int mr;       /* cost of an MR instruction.  */
+  const int ms;       /* cost of an MS instruction.  */
+  const int msg;      /* cost of an MSG instruction.  */
+  const int msgf;     /* cost of an MSGF instruction.  */
+  const int msgfr;    /* cost of an MSGFR instruction.  */
+  const int msgr;     /* cost of an MSGR instruction.  */
+  const int msr;      /* cost of an MSR instruction.  */
+  const int mult_df;  /* cost of multiplication in DFmode.  */
+  const int mxbr;
+  /* square root */
+  const int sqxbr;    /* cost of square root in TFmode.  */
+  const int sqdbr;    /* cost of square root in DFmode.  */
+  const int sqebr;    /* cost of square root in SFmode.  */
+  /* multiply and add */
+  const int madbr;    /* cost of multiply and add in DFmode.  */
+  const int maebr;    /* cost of multiply and add in SFmode.  */
+  /* division */
+  const int dxbr;
+  const int ddbr;
+  const int debr;
+  const int dlgr;
+  const int dlr;
+  const int dr;
+  const int dsgfr;
+  const int dsgr;
+};
+
+const struct processor_costs *s390_cost;
+
+static const
+struct processor_costs z900_cost =
+{
+  COSTS_N_INSNS (5),     /* M     */
+  COSTS_N_INSNS (10),    /* MGHI  */
+  COSTS_N_INSNS (5),     /* MH    */
+  COSTS_N_INSNS (4),     /* MHI   */
+  COSTS_N_INSNS (5),     /* ML    */
+  COSTS_N_INSNS (5),     /* MR    */
+  COSTS_N_INSNS (4),     /* MS    */
+  COSTS_N_INSNS (15),    /* MSG   */
+  COSTS_N_INSNS (7),     /* MSGF  */
+  COSTS_N_INSNS (7),     /* MSGFR */
+  COSTS_N_INSNS (10),    /* MSGR  */
+  COSTS_N_INSNS (4),     /* MSR   */
+  COSTS_N_INSNS (7),     /* multiplication in DFmode */
+  COSTS_N_INSNS (13),    /* MXBR */
+  COSTS_N_INSNS (136),   /* SQXBR */
+  COSTS_N_INSNS (44),    /* SQDBR */
+  COSTS_N_INSNS (35),    /* SQEBR */
+  COSTS_N_INSNS (18),    /* MADBR */
+  COSTS_N_INSNS (13),    /* MAEBR */
+  COSTS_N_INSNS (134),   /* DXBR */
+  COSTS_N_INSNS (30),    /* DDBR */
+  COSTS_N_INSNS (27),    /* DEBR */
+  COSTS_N_INSNS (220),   /* DLGR */
+  COSTS_N_INSNS (34),    /* DLR */
+  COSTS_N_INSNS (34),    /* DR */
+  COSTS_N_INSNS (32),    /* DSGFR */
+  COSTS_N_INSNS (32),    /* DSGR */
+};
+
+static const
+struct processor_costs z990_cost =
+{
+  COSTS_N_INSNS (4),     /* M     */
+  COSTS_N_INSNS (2),     /* MGHI  */
+  COSTS_N_INSNS (2),     /* MH    */
+  COSTS_N_INSNS (2),     /* MHI   */
+  COSTS_N_INSNS (4),     /* ML    */
+  COSTS_N_INSNS (4),     /* MR    */
+  COSTS_N_INSNS (5),     /* MS    */
+  COSTS_N_INSNS (6),     /* MSG   */
+  COSTS_N_INSNS (4),     /* MSGF  */
+  COSTS_N_INSNS (4),     /* MSGFR */
+  COSTS_N_INSNS (4),     /* MSGR  */
+  COSTS_N_INSNS (4),     /* MSR   */
+  COSTS_N_INSNS (1),     /* multiplication in DFmode */
+  COSTS_N_INSNS (28),    /* MXBR */
+  COSTS_N_INSNS (130),   /* SQXBR */
+  COSTS_N_INSNS (66),    /* SQDBR */
+  COSTS_N_INSNS (38),    /* SQEBR */
+  COSTS_N_INSNS (1),     /* MADBR */
+  COSTS_N_INSNS (1),     /* MAEBR */
+  COSTS_N_INSNS (60),    /* DXBR */
+  COSTS_N_INSNS (40),    /* DDBR */
+  COSTS_N_INSNS (26),    /* DEBR */
+  COSTS_N_INSNS (176),   /* DLGR */
+  COSTS_N_INSNS (31),    /* DLR */
+  COSTS_N_INSNS (31),    /* DR */
+  COSTS_N_INSNS (31),    /* DSGFR */
+  COSTS_N_INSNS (31),    /* DSGR */
+};
+
+static const
+struct processor_costs z9_109_cost =
+{
+  COSTS_N_INSNS (4),     /* M     */
+  COSTS_N_INSNS (2),     /* MGHI  */
+  COSTS_N_INSNS (2),     /* MH    */
+  COSTS_N_INSNS (2),     /* MHI   */
+  COSTS_N_INSNS (4),     /* ML    */
+  COSTS_N_INSNS (4),     /* MR    */
+  COSTS_N_INSNS (5),     /* MS    */
+  COSTS_N_INSNS (6),     /* MSG   */
+  COSTS_N_INSNS (4),     /* MSGF  */
+  COSTS_N_INSNS (4),     /* MSGFR */
+  COSTS_N_INSNS (4),     /* MSGR  */
+  COSTS_N_INSNS (4),     /* MSR   */
+  COSTS_N_INSNS (1),     /* multiplication in DFmode */
+  COSTS_N_INSNS (28),    /* MXBR */
+  COSTS_N_INSNS (130),   /* SQXBR */
+  COSTS_N_INSNS (66),    /* SQDBR */
+  COSTS_N_INSNS (38),    /* SQEBR */
+  COSTS_N_INSNS (1),     /* MADBR */
+  COSTS_N_INSNS (1),     /* MAEBR */
+  COSTS_N_INSNS (60),    /* DXBR */
+  COSTS_N_INSNS (40),    /* DDBR */
+  COSTS_N_INSNS (26),    /* DEBR */
+  COSTS_N_INSNS (30),    /* DLGR */
+  COSTS_N_INSNS (23),    /* DLR */
+  COSTS_N_INSNS (23),    /* DR */
+  COSTS_N_INSNS (24),    /* DSGFR */
+  COSTS_N_INSNS (24),    /* DSGR */
+};
+
+static const
+struct processor_costs z10_cost =
+{
+  COSTS_N_INSNS (10),    /* M     */
+  COSTS_N_INSNS (10),    /* MGHI  */
+  COSTS_N_INSNS (10),    /* MH    */
+  COSTS_N_INSNS (10),    /* MHI   */
+  COSTS_N_INSNS (10),    /* ML    */
+  COSTS_N_INSNS (10),    /* MR    */
+  COSTS_N_INSNS (10),    /* MS    */
+  COSTS_N_INSNS (10),    /* MSG   */
+  COSTS_N_INSNS (10),    /* MSGF  */
+  COSTS_N_INSNS (10),    /* MSGFR */
+  COSTS_N_INSNS (10),    /* MSGR  */
+  COSTS_N_INSNS (10),    /* MSR   */
+  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
+  COSTS_N_INSNS (50),    /* MXBR */
+  COSTS_N_INSNS (120),   /* SQXBR */
+  COSTS_N_INSNS (52),    /* SQDBR */
+  COSTS_N_INSNS (38),    /* SQEBR */
+  COSTS_N_INSNS (1),     /* MADBR */
+  COSTS_N_INSNS (1),     /* MAEBR */
+  COSTS_N_INSNS (111),   /* DXBR */
+  COSTS_N_INSNS (39),    /* DDBR */
+  COSTS_N_INSNS (32),    /* DEBR */
+  COSTS_N_INSNS (160),   /* DLGR */
+  COSTS_N_INSNS (71),    /* DLR */
+  COSTS_N_INSNS (71),    /* DR */
+  COSTS_N_INSNS (71),    /* DSGFR */
+  COSTS_N_INSNS (71),    /* DSGR */
+};
+
+static const
+struct processor_costs z196_cost =
+{
+  COSTS_N_INSNS (7),     /* M     */
+  COSTS_N_INSNS (5),     /* MGHI  */
+  COSTS_N_INSNS (5),     /* MH    */
+  COSTS_N_INSNS (5),     /* MHI   */
+  COSTS_N_INSNS (7),     /* ML    */
+  COSTS_N_INSNS (7),     /* MR    */
+  COSTS_N_INSNS (6),     /* MS    */
+  COSTS_N_INSNS (8),     /* MSG   */
+  COSTS_N_INSNS (6),     /* MSGF  */
+  COSTS_N_INSNS (6),     /* MSGFR */
+  COSTS_N_INSNS (8),     /* MSGR  */
+  COSTS_N_INSNS (6),     /* MSR   */
+  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
+  COSTS_N_INSNS (40),    /* MXBR B+40 */
+  COSTS_N_INSNS (100),   /* SQXBR B+100 */
+  COSTS_N_INSNS (42),    /* SQDBR B+42 */
+  COSTS_N_INSNS (28),    /* SQEBR B+28 */
+  COSTS_N_INSNS (1),     /* MADBR B */
+  COSTS_N_INSNS (1),     /* MAEBR B */
+  COSTS_N_INSNS (101),   /* DXBR B+101 */
+  COSTS_N_INSNS (29),    /* DDBR */
+  COSTS_N_INSNS (22),    /* DEBR */
+  COSTS_N_INSNS (160),   /* DLGR cracked */
+  COSTS_N_INSNS (160),   /* DLR cracked */
+  COSTS_N_INSNS (160),   /* DR expanded */
+  COSTS_N_INSNS (160),   /* DSGFR cracked */
+  COSTS_N_INSNS (160),   /* DSGR cracked */
+};
+
+static const
+struct processor_costs zEC12_cost =
+{
+  COSTS_N_INSNS (7),     /* M     */
+  COSTS_N_INSNS (5),     /* MGHI  */
+  COSTS_N_INSNS (5),     /* MH    */
+  COSTS_N_INSNS (5),     /* MHI   */
+  COSTS_N_INSNS (7),     /* ML    */
+  COSTS_N_INSNS (7),     /* MR    */
+  COSTS_N_INSNS (6),     /* MS    */
+  COSTS_N_INSNS (8),     /* MSG   */
+  COSTS_N_INSNS (6),     /* MSGF  */
+  COSTS_N_INSNS (6),     /* MSGFR */
+  COSTS_N_INSNS (8),     /* MSGR  */
+  COSTS_N_INSNS (6),     /* MSR   */
+  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
+  COSTS_N_INSNS (40),    /* MXBR B+40 */
+  COSTS_N_INSNS (100),   /* SQXBR B+100 */
+  COSTS_N_INSNS (42),    /* SQDBR B+42 */
+  COSTS_N_INSNS (28),    /* SQEBR B+28 */
+  COSTS_N_INSNS (1),     /* MADBR B */
+  COSTS_N_INSNS (1),     /* MAEBR B */
+  COSTS_N_INSNS (131),   /* DXBR B+131 */
+  COSTS_N_INSNS (29),    /* DDBR */
+  COSTS_N_INSNS (22),    /* DEBR */
+  COSTS_N_INSNS (160),   /* DLGR cracked */
+  COSTS_N_INSNS (160),   /* DLR cracked */
+  COSTS_N_INSNS (160),   /* DR expanded */
+  COSTS_N_INSNS (160),   /* DSGFR cracked */
+  COSTS_N_INSNS (160),   /* DSGR cracked */
+};
+
+extern int reload_completed;
+
+/* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
+static rtx last_scheduled_insn;
+
+/* Structure used to hold the components of a S/390 memory
+   address.  A legitimate address on S/390 is of the general
+   form
+          base + index + displacement
+   where any of the components is optional.
+
+   base and index are registers of the class ADDR_REGS,
+   displacement is an unsigned 12-bit immediate constant.  */
+
+struct s390_address
+{
+  rtx base;
+  rtx indx;
+  rtx disp;
+  bool pointer;
+  bool literal_pool;
+};
+
+/* The following structure is embedded in the machine
+   specific part of struct function.  */
+
+struct GTY (()) s390_frame_layout
+{
+  /* Offset within stack frame.  */
+  HOST_WIDE_INT gprs_offset;
+  HOST_WIDE_INT f0_offset;
+  HOST_WIDE_INT f4_offset;
+  HOST_WIDE_INT f8_offset;
+  HOST_WIDE_INT backchain_offset;
+
+  /* Number of first and last gpr where slots in the register
+     save area are reserved for.  */
+  int first_save_gpr_slot;
+  int last_save_gpr_slot;
+
+  /* Location (FP register number) where GPRs (r0-r15) should
+     be saved to.
+      0 - does not need to be saved at all
+     -1 - stack slot  */
+  signed char gpr_save_slots[16];
+
+  /* Number of first and last gpr to be saved, restored.  */
+  int first_save_gpr;
+  int first_restore_gpr;
+  int last_save_gpr;
+  int last_restore_gpr;
+
+  /* Bits standing for floating point registers. Set, if the
+     respective register has to be saved. Starting with reg 16 (f0)
+     at the rightmost bit.
+     Bit 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+     fpr 15 13 11  9 14 12 10  8  7  5  3  1  6  4  2  0
+     reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16  */
+  unsigned int fpr_bitmap;
+
+  /* Number of floating point registers f8-f15 which must be saved.  */
+  int high_fprs;
+
+  /* Set if return address needs to be saved.
+     This flag is set by s390_return_addr_rtx if it could not use
+     the initial value of r14 and therefore depends on r14 saved
+     to the stack.  */
+  bool save_return_addr_p;
+
+  /* Size of stack frame.  */
+  HOST_WIDE_INT frame_size;
+};
+
+/* Define the structure for the machine field in struct function.  */
+
+struct GTY(()) machine_function
+{
+  struct s390_frame_layout frame_layout;
+
+  /* Literal pool base register.  */
+  rtx base_reg;
+
+  /* True if we may need to perform branch splitting.  */
+  bool split_branches_pending_p;
+
+  /* Some local-dynamic TLS symbol name.  */
+  const char *some_ld_name;
+
+  bool has_landing_pad_p;
+
+  /* True if the current function may contain a tbegin clobbering
+     FPRs.  */
+  bool tbegin_p;
+};
+
+/* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
+
+#define cfun_frame_layout (cfun->machine->frame_layout)
+#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
+#define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
+				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
+				 : cfun_frame_layout.fpr_bitmap & 0x03))
+#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
+  cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
+#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
+  (1 << (REGNO - FPR0_REGNUM)))
+#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
+  (1 << (REGNO - FPR0_REGNUM))))
+#define cfun_gpr_save_slot(REGNO) \
+  cfun->machine->frame_layout.gpr_save_slots[REGNO]
+
+/* Number of GPRs and FPRs used for argument passing.  */
+#define GP_ARG_NUM_REG 5
+#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
+
+/* A couple of shortcuts.  */
+#define CONST_OK_FOR_J(x) \
+	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
+#define CONST_OK_FOR_K(x) \
+	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
+#define CONST_OK_FOR_Os(x) \
+        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
+#define CONST_OK_FOR_Op(x) \
+        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
+#define CONST_OK_FOR_On(x) \
+        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
+
+#define REGNO_PAIR_OK(REGNO, MODE)                               \
+  (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
+
+/* That's the read ahead of the dynamic branch prediction unit in
+   bytes on a z10 (or higher) CPU.  */
+#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
+
+static const int s390_hotpatch_trampoline_halfwords_default = 12;
+static const int s390_hotpatch_trampoline_halfwords_max = 1000000;
+static int s390_hotpatch_trampoline_halfwords = -1;
+
+/* Return the argument of the given hotpatch attribute or the default value if
+   no argument is present.  */
+
+static inline int
+get_hotpatch_attribute (tree hotpatch_attr)
+{
+  const_tree args;
+
+  args = TREE_VALUE (hotpatch_attr);
+
+  return (args) ?
+    TREE_INT_CST_LOW (TREE_VALUE (args)):
+    s390_hotpatch_trampoline_halfwords_default;
+}
+
+/* Check whether the hotpatch attribute is applied to a function and, if it has
+   an argument, the argument is valid.  */
+
+static tree
+s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
+				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (args)
+    {
+      tree expr = TREE_VALUE (args);
+
+      if (TREE_CODE (expr) != INTEGER_CST
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
+	  || TREE_INT_CST_HIGH (expr) != 0
+	  || TREE_INT_CST_LOW (expr) > (unsigned int)
+	  s390_hotpatch_trampoline_halfwords_max)
+	{
+	  error ("requested %qE attribute is not a non-negative integer"
+		 " constant or too large (max. %d)", name,
+		 s390_hotpatch_trampoline_halfwords_max);
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
+static const struct attribute_spec s390_attribute_table[] = {
+  { "hotpatch", 0, 1, true, false, false, s390_handle_hotpatch_attribute, false
+  },
+  /* End element.  */
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+/* Return the alignment for LABEL.  We default to the -falign-labels
+   value except for the literal pool base label.  */
+int
+s390_label_align (rtx label)
+{
+  rtx prev_insn = prev_active_insn (label);
+
+  if (prev_insn == NULL_RTX)
+    goto old;
+
+  prev_insn = single_set (prev_insn);
+
+  if (prev_insn == NULL_RTX)
+    goto old;
+
+  prev_insn = SET_SRC (prev_insn);
+
+  /* Don't align literal pool base labels.  */
+  if (GET_CODE (prev_insn) == UNSPEC
+      && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
+    return 0;
+
+ old:
+  return align_labels_log;
+}
+
+static enum machine_mode
+s390_libgcc_cmp_return_mode (void)
+{
+  return TARGET_64BIT ? DImode : SImode;
+}
+
+static enum machine_mode
+s390_libgcc_shift_count_mode (void)
+{
+  return TARGET_64BIT ? DImode : SImode;
+}
+
+static enum machine_mode
+s390_unwind_word_mode (void)
+{
+  return TARGET_64BIT ? DImode : SImode;
+}
+
+/* Return true if the back end supports mode MODE.  */
+static bool
+s390_scalar_mode_supported_p (enum machine_mode mode)
+{
+  /* In contrast to the default implementation reject TImode constants on 31bit
+     TARGET_ZARCH for ABI compliance.  */
+  if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
+    return false;
+
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
+
+void
+s390_set_has_landing_pad_p (bool value)
+{
+  cfun->machine->has_landing_pad_p = value;
+}
+
+/* If two condition code modes are compatible, return a condition code
+   mode which is compatible with both.  Otherwise, return
+   VOIDmode.  */
+
+static enum machine_mode
+s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+  if (m1 == m2)
+    return m1;
+
+  switch (m1)
+    {
+    case CCZmode:
+      if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
+	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
+        return m2;
+      return VOIDmode;
+
+    case CCSmode:
+    case CCUmode:
+    case CCTmode:
+    case CCSRmode:
+    case CCURmode:
+    case CCZ1mode:
+      if (m2 == CCZmode)
+	return m1;
+
+      return VOIDmode;
+
+    default:
+      return VOIDmode;
+    }
+  return VOIDmode;
+}
+
+/* Return true if SET either doesn't set the CC register, or else
+   the source and destination have matching CC modes and that
+   CC mode is at least as constrained as REQ_MODE.  */
+
+static bool
+s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
+{
+  enum machine_mode set_mode;
+
+  gcc_assert (GET_CODE (set) == SET);
+
+  if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
+    return 1;
+
+  set_mode = GET_MODE (SET_DEST (set));
+  switch (set_mode)
+    {
+    case CCSmode:
+    case CCSRmode:
+    case CCUmode:
+    case CCURmode:
+    case CCLmode:
+    case CCL1mode:
+    case CCL2mode:
+    case CCL3mode:
+    case CCT1mode:
+    case CCT2mode:
+    case CCT3mode:
+      if (req_mode != set_mode)
+        return 0;
+      break;
+
+    case CCZmode:
+      if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
+	  && req_mode != CCSRmode && req_mode != CCURmode)
+        return 0;
+      break;
+
+    case CCAPmode:
+    case CCANmode:
+      if (req_mode != CCAmode)
+        return 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return (GET_MODE (SET_SRC (set)) == set_mode);
+}
+
+/* Return true if every SET in INSN that sets the CC register
+   has source and destination with matching CC modes and that
+   CC mode is at least as constrained as REQ_MODE.
+   If REQ_MODE is VOIDmode, always return false.  */
+
+bool
+s390_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+  int i;
+
+  /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
+  if (req_mode == VOIDmode)
+    return false;
+
+  if (GET_CODE (PATTERN (insn)) == SET)
+    return s390_match_ccmode_set (PATTERN (insn), req_mode);
+
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+      for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+        {
+          rtx set = XVECEXP (PATTERN (insn), 0, i);
+          if (GET_CODE (set) == SET)
+            if (!s390_match_ccmode_set (set, req_mode))
+              return false;
+        }
+
+  return true;
+}
+
+/* If a test-under-mask instruction can be used to implement
+   (compare (and ... OP1) OP2), return the CC mode required
+   to do that.  Otherwise, return VOIDmode.
+   MIXED is true if the instruction can distinguish between
+   CC1 and CC2 for mixed selected bits (TMxx), it is false
+   if the instruction cannot (TM).  */
+
+enum machine_mode
+s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
+{
+  int bit0, bit1;
+
+  /* ??? Fixme: should work on CONST_DOUBLE as well.  */
+  if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
+    return VOIDmode;
+
+  /* Selected bits all zero: CC0.
+     e.g.: int a; if ((a & (16 + 128)) == 0) */
+  if (INTVAL (op2) == 0)
+    return CCTmode;
+
+  /* Selected bits all one: CC3.
+     e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
+  if (INTVAL (op2) == INTVAL (op1))
+    return CCT3mode;
+
+  /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
+     int a;
+     if ((a & (16 + 128)) == 16)         -> CCT1
+     if ((a & (16 + 128)) == 128)        -> CCT2  */
+  if (mixed)
+    {
+      bit1 = exact_log2 (INTVAL (op2));
+      bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
+      if (bit0 != -1 && bit1 != -1)
+        return bit0 > bit1 ? CCT1mode : CCT2mode;
+    }
+
+  return VOIDmode;
+}
+
+/* Given a comparison code OP (EQ, NE, etc.) and the operands
+   OP0 and OP1 of a COMPARE, return the mode to be used for the
+   comparison.  */
+
+enum machine_mode
+s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
+{
+  switch (code)
+    {
+      case EQ:
+      case NE:
+	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCAPmode;
+	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
+	  return CCAPmode;
+	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
+	     || GET_CODE (op1) == NEG)
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCLmode;
+
+	if (GET_CODE (op0) == AND)
+	  {
+	    /* Check whether we can potentially do it via TM.  */
+	    enum machine_mode ccmode;
+	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
+	    if (ccmode != VOIDmode)
+	      {
+		/* Relax CCTmode to CCZmode to allow fall-back to AND
+		   if that turns out to be beneficial.  */
+	        return ccmode == CCTmode ? CCZmode : ccmode;
+	      }
+	  }
+
+	if (register_operand (op0, HImode)
+	    && GET_CODE (op1) == CONST_INT
+	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
+	  return CCT3mode;
+	if (register_operand (op0, QImode)
+	    && GET_CODE (op1) == CONST_INT
+	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
+	  return CCT3mode;
+
+	return CCZmode;
+
+      case LE:
+      case LT:
+      case GE:
+      case GT:
+	/* The only overflow condition of NEG and ABS happens when
+	   -INT_MAX is used as parameter, which stays negative. So
+	   we have an overflow from a positive value to a negative.
+	   Using CCAP mode the resulting cc can be used for comparisons.  */
+	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCAPmode;
+
+ 	/* If constants are involved in an add instruction it is possible to use
+ 	   the resulting cc for comparisons with zero. Knowing the sign of the
+	   constant the overflow behavior gets predictable. e.g.:
+ 	     int a, b; if ((b = a + c) > 0)
+ 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
+	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
+		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
+		    /* Avoid INT32_MIN on 32 bit.  */
+		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
+	  {
+	    if (INTVAL (XEXP((op0), 1)) < 0)
+	      return CCANmode;
+	    else
+	      return CCAPmode;
+	  }
+	/* Fall through.  */
+      case UNORDERED:
+      case ORDERED:
+      case UNEQ:
+      case UNLE:
+      case UNLT:
+      case UNGE:
+      case UNGT:
+      case LTGT:
+	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	    && GET_CODE (op1) != CONST_INT)
+	  return CCSRmode;
+	return CCSmode;
+
+      case LTU:
+      case GEU:
+	if (GET_CODE (op0) == PLUS
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCL1mode;
+
+	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	    && GET_CODE (op1) != CONST_INT)
+	  return CCURmode;
+	return CCUmode;
+
+      case LEU:
+      case GTU:
+	if (GET_CODE (op0) == MINUS
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCL2mode;
+
+	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	    && GET_CODE (op1) != CONST_INT)
+	  return CCURmode;
+	return CCUmode;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
+   that we can implement more efficiently.  */
+
+static void
+s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			      bool op0_preserve_value)
+{
+  if (op0_preserve_value)
+    return;
+
+  /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
+  if ((*code == EQ || *code == NE)
+      && *op1 == const0_rtx
+      && GET_CODE (*op0) == ZERO_EXTRACT
+      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
+      && GET_CODE (XEXP (*op0, 2)) == CONST_INT
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
+    {
+      rtx inner = XEXP (*op0, 0);
+      HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
+      HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
+      HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
+
+      if (len > 0 && len < modesize
+	  && pos >= 0 && pos + len <= modesize
+	  && modesize <= HOST_BITS_PER_WIDE_INT)
+	{
+	  unsigned HOST_WIDE_INT block;
+	  block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
+	  block <<= modesize - pos - len;
+
+	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
+			      gen_int_mode (block, GET_MODE (inner)));
+	}
+    }
+
+  /* Narrow AND of memory against immediate to enable TM.  */
+  if ((*code == EQ || *code == NE)
+      && *op1 == const0_rtx
+      && GET_CODE (*op0) == AND
+      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
+    {
+      rtx inner = XEXP (*op0, 0);
+      rtx mask = XEXP (*op0, 1);
+
+      /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
+      if (GET_CODE (inner) == SUBREG
+	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
+	  && (GET_MODE_SIZE (GET_MODE (inner))
+	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+	  && ((INTVAL (mask)
+               & GET_MODE_MASK (GET_MODE (inner))
+               & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
+	      == 0))
+	inner = SUBREG_REG (inner);
+
+      /* Do not change volatile MEMs.  */
+      if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
+	{
+	  int part = s390_single_part (XEXP (*op0, 1),
+				       GET_MODE (inner), QImode, 0);
+	  if (part >= 0)
+	    {
+	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
+	      inner = adjust_address_nv (inner, QImode, part);
+	      *op0 = gen_rtx_AND (QImode, inner, mask);
+	    }
+	}
+    }
+
+  /* Narrow comparisons against 0xffff to HImode if possible.  */
+  if ((*code == EQ || *code == NE)
+      && GET_CODE (*op1) == CONST_INT
+      && INTVAL (*op1) == 0xffff
+      && SCALAR_INT_MODE_P (GET_MODE (*op0))
+      && (nonzero_bits (*op0, GET_MODE (*op0))
+	  & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
+    {
+      *op0 = gen_lowpart (HImode, *op0);
+      *op1 = constm1_rtx;
+    }
+
+  /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
+  if (GET_CODE (*op0) == UNSPEC
+      && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
+      && XVECLEN (*op0, 0) == 1
+      && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
+      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
+      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
+      && *op1 == const0_rtx)
+    {
+      enum rtx_code new_code = UNKNOWN;
+      switch (*code)
+	{
+	  case EQ: new_code = EQ;  break;
+	  case NE: new_code = NE;  break;
+	  case LT: new_code = GTU; break;
+	  case GT: new_code = LTU; break;
+	  case LE: new_code = GEU; break;
+	  case GE: new_code = LEU; break;
+	  default: break;
+	}
+
+      if (new_code != UNKNOWN)
+	{
+	  *op0 = XVECEXP (*op0, 0, 0);
+	  *code = new_code;
+	}
+    }
+
+  /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
+  if (GET_CODE (*op0) == UNSPEC
+      && XINT (*op0, 1) == UNSPEC_CC_TO_INT
+      && XVECLEN (*op0, 0) == 1
+      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
+      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
+      && CONST_INT_P (*op1))
+    {
+      enum rtx_code new_code = UNKNOWN;
+      switch (GET_MODE (XVECEXP (*op0, 0, 0)))
+	{
+	case CCZmode:
+	case CCRAWmode:
+	  switch (*code)
+	    {
+	    case EQ: new_code = EQ;  break;
+	    case NE: new_code = NE;  break;
+	    default: break;
+	    }
+	  break;
+	default: break;
+	}
+
+      if (new_code != UNKNOWN)
+	{
+	  /* For CCRAWmode put the required cc mask into the second
+	     operand.  */
+        if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
+            && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
+	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
+	  *op0 = XVECEXP (*op0, 0, 0);
+	  *code = new_code;
+	}
+    }
+
+  /* Simplify cascaded EQ, NE with const0_rtx.  */
+  if ((*code == NE || *code == EQ)
+      && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
+      && GET_MODE (*op0) == SImode
+      && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
+      && REG_P (XEXP (*op0, 0))
+      && XEXP (*op0, 1) == const0_rtx
+      && *op1 == const0_rtx)
+    {
+      if ((*code == EQ && GET_CODE (*op0) == NE)
+          || (*code == NE && GET_CODE (*op0) == EQ))
+	*code = EQ;
+      else
+	*code = NE;
+      *op0 = XEXP (*op0, 0);
+    }
+
+  /* Prefer register over memory as first operand.  */
+  if (MEM_P (*op0) && REG_P (*op1))
+    {
+      rtx tem = *op0; *op0 = *op1; *op1 = tem;
+      *code = (int)swap_condition ((enum rtx_code)*code);
+    }
+}
+
+/* Emit a compare instruction suitable to implement the comparison
+   OP0 CODE OP1.  Return the correct condition RTL to be placed in
+   the IF_THEN_ELSE of the conditional branch testing the result.  */
+
+rtx
+s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode mode = s390_select_ccmode (code, op0, op1);
+  rtx cc;
+
+  /* Do not output a redundant compare instruction if a compare_and_swap
+     pattern already computed the result and the machine modes are compatible.  */
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+    {
+      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
+		  == GET_MODE (op0));
+      cc = op0;
+    }
+  else
+    {
+      cc = gen_rtx_REG (mode, CC_REGNUM);
+      emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
+}
+
+/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
+   matches CMP.
+   Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
+   conditional branch testing the result.  */
+
+static rtx
+s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
+			    rtx cmp, rtx new_rtx)
+{
+  emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
+  return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
+			    const0_rtx);
+}
+
+/* Emit a jump instruction to TARGET and return it.  If COND is
+   NULL_RTX, emit an unconditional jump, else a conditional jump under
+   condition COND.  */
+
+rtx
+s390_emit_jump (rtx target, rtx cond)
+{
+  rtx insn;
+
+  target = gen_rtx_LABEL_REF (VOIDmode, target);
+  if (cond)
+    target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
+
+  insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
+  return emit_jump_insn (insn);
+}
+
+/* Return branch condition mask to implement a branch
+   specified by CODE.  Return -1 for invalid comparisons.  */
+
+int
+s390_branch_condition_mask (rtx code)
+{
+  const int CC0 = 1 << 3;
+  const int CC1 = 1 << 2;
+  const int CC2 = 1 << 1;
+  const int CC3 = 1 << 0;
+
+  gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
+  gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
+  gcc_assert (XEXP (code, 1) == const0_rtx
+	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+		  && CONST_INT_P (XEXP (code, 1))));
+
+
+  switch (GET_MODE (XEXP (code, 0)))
+    {
+    case CCZmode:
+    case CCZ1mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+	case NE:	return CC1 | CC2 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCT1mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC1;
+	case NE:	return CC0 | CC2 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCT2mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC2;
+	case NE:	return CC0 | CC1 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCT3mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC3;
+	case NE:	return CC0 | CC1 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCLmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0 | CC2;
+	case NE:	return CC1 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCL1mode:
+      switch (GET_CODE (code))
+        {
+	case LTU:	return CC2 | CC3;  /* carry */
+	case GEU:	return CC0 | CC1;  /* no carry */
+	default:	return -1;
+        }
+      break;
+
+    case CCL2mode:
+      switch (GET_CODE (code))
+        {
+	case GTU:	return CC0 | CC1;  /* borrow */
+	case LEU:	return CC2 | CC3;  /* no borrow */
+	default:	return -1;
+        }
+      break;
+
+    case CCL3mode:
+      switch (GET_CODE (code))
+	{
+	case EQ:	return CC0 | CC2;
+	case NE:	return CC1 | CC3;
+	case LTU:	return CC1;
+	case GTU:	return CC3;
+	case LEU:	return CC1 | CC2;
+	case GEU:	return CC2 | CC3;
+	default:	return -1;
+	}
+
+    case CCUmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LTU:	return CC1;
+        case GTU:	return CC2;
+        case LEU:	return CC0 | CC1;
+        case GEU:	return CC0 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCURmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC2 | CC1 | CC3;
+        case LTU:	return CC2;
+        case GTU:	return CC1;
+        case LEU:	return CC0 | CC2;
+        case GEU:	return CC0 | CC1;
+	default:	return -1;
+        }
+      break;
+
+    case CCAPmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LT:	return CC1 | CC3;
+        case GT:	return CC2;
+        case LE:	return CC0 | CC1 | CC3;
+        case GE:	return CC0 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCANmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LT:	return CC1;
+        case GT:	return CC2 | CC3;
+        case LE:	return CC0 | CC1;
+        case GE:	return CC0 | CC2 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCSmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LT:	return CC1;
+        case GT:	return CC2;
+        case LE:	return CC0 | CC1;
+        case GE:	return CC0 | CC2;
+	case UNORDERED:	return CC3;
+	case ORDERED:	return CC0 | CC1 | CC2;
+	case UNEQ:	return CC0 | CC3;
+        case UNLT:	return CC1 | CC3;
+        case UNGT:	return CC2 | CC3;
+        case UNLE:	return CC0 | CC1 | CC3;
+        case UNGE:	return CC0 | CC2 | CC3;
+	case LTGT:	return CC1 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCSRmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC2 | CC1 | CC3;
+        case LT:	return CC2;
+        case GT:	return CC1;
+        case LE:	return CC0 | CC2;
+        case GE:	return CC0 | CC1;
+	case UNORDERED:	return CC3;
+	case ORDERED:	return CC0 | CC2 | CC1;
+	case UNEQ:	return CC0 | CC3;
+        case UNLT:	return CC2 | CC3;
+        case UNGT:	return CC1 | CC3;
+        case UNLE:	return CC0 | CC2 | CC3;
+        case UNGE:	return CC0 | CC1 | CC3;
+	case LTGT:	return CC2 | CC1;
+	default:	return -1;
+        }
+      break;
+
+    case CCRAWmode:
+      switch (GET_CODE (code))
+	{
+	case EQ:
+	  return INTVAL (XEXP (code, 1));
+	case NE:
+	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      return -1;
+    }
+}
+
+
+/* Return branch condition mask to implement a compare and branch
+   specified by CODE.  Return -1 for invalid comparisons.  */
+
+int
+s390_compare_and_branch_condition_mask (rtx code)
+{
+  const int CC0 = 1 << 3;
+  const int CC1 = 1 << 2;
+  const int CC2 = 1 << 1;
+
+  switch (GET_CODE (code))
+    {
+    case EQ:
+      return CC0;
+    case NE:
+      return CC1 | CC2;
+    case LT:
+    case LTU:
+      return CC1;
+    case GT:
+    case GTU:
+      return CC2;
+    case LE:
+    case LEU:
+      return CC0 | CC1;
+    case GE:
+    case GEU:
+      return CC0 | CC2;
+    default:
+      gcc_unreachable ();
+    }
+  return -1;
+}
+
+/* If INV is false, return assembler mnemonic string to implement
+   a branch specified by CODE.  If INV is true, return mnemonic
+   for the corresponding inverted branch.  */
+
+static const char *
+s390_branch_condition_mnemonic (rtx code, int inv)
+{
+  int mask;
+
+  static const char *const mnemonic[16] =
+    {
+      NULL, "o", "h", "nle",
+      "l", "nhe", "lh", "ne",
+      "e", "nlh", "he", "nl",
+      "le", "nh", "no", NULL
+    };
+
+  if (GET_CODE (XEXP (code, 0)) == REG
+      && REGNO (XEXP (code, 0)) == CC_REGNUM
+      && (XEXP (code, 1) == const0_rtx
+	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+	      && CONST_INT_P (XEXP (code, 1)))))
+    mask = s390_branch_condition_mask (code);
+  else
+    mask = s390_compare_and_branch_condition_mask (code);
+
+  gcc_assert (mask >= 0);
+
+  if (inv)
+    mask ^= 15;
+
+  gcc_assert (mask >= 1 && mask <= 14);
+
+  return mnemonic[mask];
+}
+
+/* Return the part of op which has a value different from def.
+   The size of the part is determined by mode.
+   Use this function only if you already know that op really
+   contains such a part.  */
+
+unsigned HOST_WIDE_INT
+s390_extract_part (rtx op, enum machine_mode mode, int def)
+{
+  unsigned HOST_WIDE_INT value = 0;
+  int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
+  int part_bits = GET_MODE_BITSIZE (mode);
+  unsigned HOST_WIDE_INT part_mask
+    = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
+  int i;
+
+  for (i = 0; i < max_parts; i++)
+    {
+      if (i == 0)
+	value = (unsigned HOST_WIDE_INT) INTVAL (op);
+      else
+	value >>= part_bits;
+
+      if ((value & part_mask) != (def & part_mask))
+	return value & part_mask;
+    }
+
+  gcc_unreachable ();
+}
+
+/* If OP is an integer constant of mode MODE with exactly one
+   part of mode PART_MODE unequal to DEF, return the number of that
+   part. Otherwise, return -1.  */
+
+int
+s390_single_part (rtx op,
+		  enum machine_mode mode,
+		  enum machine_mode part_mode,
+		  int def)
+{
+  unsigned HOST_WIDE_INT value = 0;
+  int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
+  unsigned HOST_WIDE_INT part_mask
+    = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
+  int i, part = -1;
+
+  if (GET_CODE (op) != CONST_INT)
+    return -1;
+
+  for (i = 0; i < n_parts; i++)
+    {
+      if (i == 0)
+	value = (unsigned HOST_WIDE_INT) INTVAL (op);
+      else
+	value >>= GET_MODE_BITSIZE (part_mode);
+
+      if ((value & part_mask) != (def & part_mask))
+	{
+	  if (part != -1)
+	    return -1;
+	  else
+	    part = i;
+	}
+    }
+  return part == -1 ? -1 : n_parts - 1 - part;
+}
+
+/* Return true if IN contains a contiguous bitfield in the lower SIZE
+   bits and no other bits are set in IN.  POS and LENGTH can be used
+   to obtain the start position and the length of the bitfield.
+
+   POS gives the position of the first bit of the bitfield counting
+   from the lowest order bit starting with zero.  In order to use this
+   value for S/390 instructions this has to be converted to "bits big
+   endian" style.  */
+
+bool
+s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
+			   int *pos, int *length)
+{
+  int tmp_pos = 0;
+  int tmp_length = 0;
+  int i;
+  unsigned HOST_WIDE_INT mask = 1ULL;
+  bool contiguous = false;
+
+  for (i = 0; i < size; mask <<= 1, i++)
+    {
+      if (contiguous)
+	{
+	  if (mask & in)
+	    tmp_length++;
+	  else
+	    break;
+	}
+      else
+	{
+	  if (mask & in)
+	    {
+	      contiguous = true;
+	      tmp_length++;
+	    }
+	  else
+	    tmp_pos++;
+	}
+    }
+
+  if (!tmp_length)
+    return false;
+
+  /* Calculate a mask for all bits beyond the contiguous bits.  */
+  mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
+
+  if (mask & in)
+    return false;
+
+  if (tmp_length + tmp_pos - 1 > size)
+    return false;
+
+  if (length)
+    *length = tmp_length;
+
+  if (pos)
+    *pos = tmp_pos;
+
+  return true;
+}
+
+/* Check whether a rotate of ROTL followed by an AND of CONTIG is
+   equivalent to a shift followed by the AND.  In particular, CONTIG
+   should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
+   for ROTL indicate a rotate to the right.  */
+
+bool
+s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
+{
+  int pos, len;
+  bool ok;
+
+  ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
+  gcc_assert (ok);
+
+  return ((rotl >= 0 && rotl <= pos)
+	  || (rotl < 0 && -rotl <= bitsize - len - pos));
+}
+
+/* Check whether we can (and want to) split a double-word
+   move in mode MODE from SRC to DST into two single-word
+   moves, moving the subword FIRST_SUBWORD first.  */
+
+bool
+s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
+{
+  /* Floating point registers cannot be split.  */
+  if (FP_REG_P (src) || FP_REG_P (dst))
+    return false;
+
+  /* We don't need to split if operands are directly accessible.  */
+  if (s_operand (src, mode) || s_operand (dst, mode))
+    return false;
+
+  /* Non-offsettable memory references cannot be split.  */
+  if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
+      || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
+    return false;
+
+  /* Moving the first subword must not clobber a register
+     needed to move the second subword.  */
+  if (register_operand (dst, mode))
+    {
+      rtx subreg = operand_subword (dst, first_subword, 0, mode);
+      if (reg_overlap_mentioned_p (subreg, src))
+        return false;
+    }
+
+  return true;
+}
+
+/* Return true if it can be proven that [MEM1, MEM1 + SIZE]
+   and [MEM2, MEM2 + SIZE] do overlap and false
+   otherwise.  */
+
+bool
+s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
+{
+  rtx addr1, addr2, addr_delta;
+  HOST_WIDE_INT delta;
+
+  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
+    return true;
+
+  if (size == 0)
+    return false;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
+
+  /* This overlapping check is used by peepholes merging memory block operations.
+     Overlapping operations would otherwise be recognized by the S/390 hardware
+     and would fall back to a slower implementation. Allowing overlapping
+     operations would lead to slow code but not to wrong code. Therefore we are
+     somewhat optimistic if we cannot prove that the memory blocks are
+     overlapping.
+     That's why we return false here although this may accept operations on
+     overlapping memory areas.  */
+  if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
+    return false;
+
+  delta = INTVAL (addr_delta);
+
+  if (delta == 0
+      || (delta > 0 && delta < size)
+      || (delta < 0 && -delta < size))
+    return true;
+
+  return false;
+}
+
+/* Check whether the address of memory reference MEM2 equals exactly
+   the address of memory reference MEM1 plus DELTA.  Return true if
+   we can prove this to be the case, false otherwise.  */
+
+bool
+s390_offset_p (rtx mem1, rtx mem2, rtx delta)
+{
+  rtx addr1, addr2, addr_delta;
+
+  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
+    return false;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
+  if (!addr_delta || !rtx_equal_p (addr_delta, delta))
+    return false;
+
+  return true;
+}
+
+/* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
+
+void
+s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
+			      rtx *operands)
+{
+  enum machine_mode wmode = mode;
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+  rtx op, clob, tem;
+
+  /* If we cannot handle the operation directly, use a temp register.  */
+  if (!s390_logical_operator_ok_p (operands))
+    dst = gen_reg_rtx (mode);
+
+  /* QImode and HImode patterns make sense only if we have a destination
+     in memory.  Otherwise perform the operation in SImode.  */
+  if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
+    wmode = SImode;
+
+  /* Widen operands if required.  */
+  if (mode != wmode)
+    {
+      if (GET_CODE (dst) == SUBREG
+	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
+	dst = tem;
+      else if (REG_P (dst))
+	dst = gen_rtx_SUBREG (wmode, dst, 0);
+      else
+        dst = gen_reg_rtx (wmode);
+
+      if (GET_CODE (src1) == SUBREG
+	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
+	src1 = tem;
+      else if (GET_MODE (src1) != VOIDmode)
+	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
+
+      if (GET_CODE (src2) == SUBREG
+	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
+	src2 = tem;
+      else if (GET_MODE (src2) != VOIDmode)
+	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
+    }
+
+  /* Emit the instruction.  */
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
+  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], gen_lowpart (mode, dst));
+}
+
+/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
+
+bool
+s390_logical_operator_ok_p (rtx *operands)
+{
+  /* If the destination operand is in memory, it needs to coincide
+     with one of the source operands.  After reload, it has to be
+     the first source operand.  */
+  if (GET_CODE (operands[0]) == MEM)
+    return rtx_equal_p (operands[0], operands[1])
+	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
+
+  return true;
+}
+
+/* Narrow logical operation CODE of memory operand MEMOP with immediate
+   operand IMMOP to switch from SS to SI type instructions.  */
+
+void
+s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
+{
+  int def = code == AND ? -1 : 0;
+  HOST_WIDE_INT mask;
+  int part;
+
+  gcc_assert (GET_CODE (*memop) == MEM);
+  gcc_assert (!MEM_VOLATILE_P (*memop));
+
+  mask = s390_extract_part (*immop, QImode, def);
+  part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
+  gcc_assert (part >= 0);
+
+  *memop = adjust_address (*memop, QImode, part);
+  *immop = gen_int_mode (mask, QImode);
+}
+
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+s390_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Map for smallest class containing reg regno.  */
+
+const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
+{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,
+  ACCESS_REGS,	ACCESS_REGS
+};
+
+/* Return attribute type of insn.  */
+
+static enum attr_type
+s390_safe_attr_type (rtx insn)
+{
+  if (recog_memoized (insn) >= 0)
+    return get_attr_type (insn);
+  else
+    return TYPE_NONE;
+}
+
+/* Return true if DISP is a valid short displacement.  */
+
+static bool
+s390_short_displacement (rtx disp)
+{
+  /* No displacement is OK.  */
+  if (!disp)
+    return true;
+
+  /* Without the long displacement facility we don't need to
+     distingiush between long and short displacement.  */
+  if (!TARGET_LONG_DISPLACEMENT)
+    return true;
+
+  /* Integer displacement in range.  */
+  if (GET_CODE (disp) == CONST_INT)
+    return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
+
+  /* GOT offset is not OK, the GOT can be large.  */
+  if (GET_CODE (disp) == CONST
+      && GET_CODE (XEXP (disp, 0)) == UNSPEC
+      && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
+          || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
+    return false;
+
+  /* All other symbolic constants are literal pool references,
+     which are OK as the literal pool must be small.  */
+  if (GET_CODE (disp) == CONST)
+    return true;
+
+  return false;
+}
+
+/* Decompose a RTL expression ADDR for a memory address into
+   its components, returned in OUT.
+
+   Returns false if ADDR is not a valid memory address, true
+   otherwise.  If OUT is NULL, don't return the components,
+   but check for validity only.
+
+   Note: Only addresses in canonical form are recognized.
+   LEGITIMIZE_ADDRESS should convert non-canonical forms to the
+   canonical form so that they will be recognized.  */
+
+static int
+s390_decompose_address (rtx addr, struct s390_address *out)
+{
+  HOST_WIDE_INT offset = 0;
+  rtx base = NULL_RTX;
+  rtx indx = NULL_RTX;
+  rtx disp = NULL_RTX;
+  rtx orig_disp;
+  bool pointer = false;
+  bool base_ptr = false;
+  bool indx_ptr = false;
+  bool literal_pool = false;
+
+  /* We may need to substitute the literal pool base register into the address
+     below.  However, at this point we do not know which register is going to
+     be used as base, so we substitute the arg pointer register.  This is going
+     to be treated as holding a pointer below -- it shouldn't be used for any
+     other purpose.  */
+  rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
+
+  /* Decompose address into base + index + displacement.  */
+
+  if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
+    base = addr;
+
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+      enum rtx_code code0 = GET_CODE (op0);
+      enum rtx_code code1 = GET_CODE (op1);
+
+      if (code0 == REG || code0 == UNSPEC)
+	{
+	  if (code1 == REG || code1 == UNSPEC)
+	    {
+	      indx = op0;	/* index + base */
+	      base = op1;
+	    }
+
+	  else
+	    {
+	      base = op0;	/* base + displacement */
+	      disp = op1;
+	    }
+	}
+
+      else if (code0 == PLUS)
+	{
+	  indx = XEXP (op0, 0);	/* index + base + disp */
+	  base = XEXP (op0, 1);
+	  disp = op1;
+	}
+
+      else
+	{
+	  return false;
+	}
+    }
+
+  else
+    disp = addr;		/* displacement */
+
+  /* Extract integer part of displacement.  */
+  orig_disp = disp;
+  if (disp)
+    {
+      if (GET_CODE (disp) == CONST_INT)
+	{
+	  offset = INTVAL (disp);
+	  disp = NULL_RTX;
+	}
+      else if (GET_CODE (disp) == CONST
+	       && GET_CODE (XEXP (disp, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
+	{
+	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
+	  disp = XEXP (XEXP (disp, 0), 0);
+	}
+    }
+
+  /* Strip off CONST here to avoid special case tests later.  */
+  if (disp && GET_CODE (disp) == CONST)
+    disp = XEXP (disp, 0);
+
+  /* We can convert literal pool addresses to
+     displacements by basing them off the base register.  */
+  if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
+    {
+      /* Either base or index must be free to hold the base register.  */
+      if (!base)
+        base = fake_pool_base, literal_pool = true;
+      else if (!indx)
+        indx = fake_pool_base, literal_pool = true;
+      else
+        return false;
+
+      /* Mark up the displacement.  */
+      disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
+			     UNSPEC_LTREL_OFFSET);
+    }
+
+  /* Validate base register.  */
+  if (base)
+    {
+      if (GET_CODE (base) == UNSPEC)
+	switch (XINT (base, 1))
+	  {
+	  case UNSPEC_LTREF:
+	    if (!disp)
+	      disp = gen_rtx_UNSPEC (Pmode,
+				     gen_rtvec (1, XVECEXP (base, 0, 0)),
+				     UNSPEC_LTREL_OFFSET);
+	    else
+	      return false;
+
+	    base = XVECEXP (base, 0, 1);
+	    break;
+
+	  case UNSPEC_LTREL_BASE:
+	    if (XVECLEN (base, 0) == 1)
+	      base = fake_pool_base, literal_pool = true;
+	    else
+	      base = XVECEXP (base, 0, 1);
+	    break;
+
+	  default:
+	    return false;
+	  }
+
+      if (!REG_P (base)
+	  || (GET_MODE (base) != SImode
+	      && GET_MODE (base) != Pmode))
+	return false;
+
+      if (REGNO (base) == STACK_POINTER_REGNUM
+	  || REGNO (base) == FRAME_POINTER_REGNUM
+	  || ((reload_completed || reload_in_progress)
+	      && frame_pointer_needed
+	      && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
+	  || REGNO (base) == ARG_POINTER_REGNUM
+          || (flag_pic
+              && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
+        pointer = base_ptr = true;
+
+      if ((reload_completed || reload_in_progress)
+	  && base == cfun->machine->base_reg)
+        pointer = base_ptr = literal_pool = true;
+    }
+
+  /* Validate index register.  */
+  if (indx)
+    {
+      if (GET_CODE (indx) == UNSPEC)
+	switch (XINT (indx, 1))
+	  {
+	  case UNSPEC_LTREF:
+	    if (!disp)
+	      disp = gen_rtx_UNSPEC (Pmode,
+				     gen_rtvec (1, XVECEXP (indx, 0, 0)),
+				     UNSPEC_LTREL_OFFSET);
+	    else
+	      return false;
+
+	    indx = XVECEXP (indx, 0, 1);
+	    break;
+
+	  case UNSPEC_LTREL_BASE:
+	    if (XVECLEN (indx, 0) == 1)
+	      indx = fake_pool_base, literal_pool = true;
+	    else
+	      indx = XVECEXP (indx, 0, 1);
+	    break;
+
+	  default:
+	    return false;
+	  }
+
+      if (!REG_P (indx)
+	  || (GET_MODE (indx) != SImode
+	      && GET_MODE (indx) != Pmode))
+	return false;
+
+      if (REGNO (indx) == STACK_POINTER_REGNUM
+	  || REGNO (indx) == FRAME_POINTER_REGNUM
+	  || ((reload_completed || reload_in_progress)
+	      && frame_pointer_needed
+	      && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
+	  || REGNO (indx) == ARG_POINTER_REGNUM
+          || (flag_pic
+              && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
+        pointer = indx_ptr = true;
+
+      if ((reload_completed || reload_in_progress)
+	  && indx == cfun->machine->base_reg)
+        pointer = indx_ptr = literal_pool = true;
+    }
+
+  /* Prefer to use pointer as base, not index.  */
+  if (base && indx && !base_ptr
+      && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
+    {
+      rtx tmp = base;
+      base = indx;
+      indx = tmp;
+    }
+
+  /* Validate displacement.  */
+  if (!disp)
+    {
+      /* If virtual registers are involved, the displacement will change later
+	 anyway as the virtual registers get eliminated.  This could make a
+	 valid displacement invalid, but it is more likely to make an invalid
+	 displacement valid, because we sometimes access the register save area
+	 via negative offsets to one of those registers.
+	 Thus we don't check the displacement for validity here.  If after
+	 elimination the displacement turns out to be invalid after all,
+	 this is fixed up by reload in any case.  */
+      /* LRA maintains always displacements up to date and we need to
+	 know the displacement is right during all LRA not only at the
+	 final elimination.  */
+      if (lra_in_progress
+	  || (base != arg_pointer_rtx
+	      && indx != arg_pointer_rtx
+	      && base != return_address_pointer_rtx
+	      && indx != return_address_pointer_rtx
+	      && base != frame_pointer_rtx
+	      && indx != frame_pointer_rtx
+	      && base != virtual_stack_vars_rtx
+	      && indx != virtual_stack_vars_rtx))
+	if (!DISP_IN_RANGE (offset))
+	  return false;
+    }
+  else
+    {
+      /* All the special cases are pointers.  */
+      pointer = true;
+
+      /* In the small-PIC case, the linker converts @GOT
+         and @GOTNTPOFF offsets to possible displacements.  */
+      if (GET_CODE (disp) == UNSPEC
+          && (XINT (disp, 1) == UNSPEC_GOT
+	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
+	  && flag_pic == 1)
+        {
+	  ;
+        }
+
+      /* Accept pool label offsets.  */
+      else if (GET_CODE (disp) == UNSPEC
+	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
+	;
+
+      /* Accept literal pool references.  */
+      else if (GET_CODE (disp) == UNSPEC
+	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
+        {
+	  /* In case CSE pulled a non literal pool reference out of
+	     the pool we have to reject the address.  This is
+	     especially important when loading the GOT pointer on non
+	     zarch CPUs.  In this case the literal pool contains an lt
+	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
+	     will most likely exceed the displacement.  */
+	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
+	    return false;
+
+	  orig_disp = gen_rtx_CONST (Pmode, disp);
+	  if (offset)
+	    {
+	      /* If we have an offset, make sure it does not
+		 exceed the size of the constant pool entry.  */
+	      rtx sym = XVECEXP (disp, 0, 0);
+	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
+		return false;
+
+              orig_disp = plus_constant (Pmode, orig_disp, offset);
+	    }
+        }
+
+      else
+	return false;
+    }
+
+  if (!base && !indx)
+    pointer = true;
+
+  if (out)
+    {
+      out->base = base;
+      out->indx = indx;
+      out->disp = orig_disp;
+      out->pointer = pointer;
+      out->literal_pool = literal_pool;
+    }
+
+  return true;
+}
+
+/* Decompose a RTL expression OP for a shift count into its components,
+   and return the base register in BASE and the offset in OFFSET.
+
+   Return true if OP is a valid shift count, false if not.  */
+
+bool
+s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
+{
+  HOST_WIDE_INT off = 0;
+
+  /* We can have an integer constant, an address register,
+     or a sum of the two.  */
+  if (GET_CODE (op) == CONST_INT)
+    {
+      off = INTVAL (op);
+      op = NULL_RTX;
+    }
+  if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
+    {
+      off = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+    }
+  while (op && GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (op && GET_CODE (op) != REG)
+    return false;
+
+  if (offset)
+    *offset = off;
+  if (base)
+    *base = op;
+
+   return true;
+}
+
+
+/* Return true if CODE is a valid address without index.  */
+
+bool
+s390_legitimate_address_without_index_p (rtx op)
+{
+  struct s390_address addr;
+
+  if (!s390_decompose_address (XEXP (op, 0), &addr))
+    return false;
+  if (addr.indx)
+    return false;
+
+  return true;
+}
+
+
+/* Return TRUE if ADDR is an operand valid for a load/store relative
+   instruction.  Be aware that the alignment of the operand needs to
+   be checked separately.
+   Valid addresses are single references or a sum of a reference and a
+   constant integer. Return these parts in SYMREF and ADDEND.  You can
+   pass NULL in REF and/or ADDEND if you are not interested in these
+   values.  Literal pool references are *not* considered symbol
+   references.  */
+
+static bool
+s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
+{
+  HOST_WIDE_INT tmpaddend = 0;
+
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (addr, 1)))
+	return false;
+
+      tmpaddend = INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
+      || (GET_CODE (addr) == UNSPEC
+	  && (XINT (addr, 1) == UNSPEC_GOTENT
+	      || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
+    {
+      if (symref)
+	*symref = addr;
+      if (addend)
+	*addend = tmpaddend;
+
+      return true;
+    }
+  return false;
+}
+
+/* Return true if the address in OP is valid for constraint letter C
+   if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
+   pool MEMs should be accepted.  Only the Q, R, S, T constraint
+   letters are allowed for C.  */
+
+static int
+s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
+{
+  struct s390_address addr;
+  bool decomposed = false;
+
+  /* This check makes sure that no symbolic address (except literal
+     pool references) are accepted by the R or T constraints.  */
+  if (s390_loadrelative_operand_p (op, NULL, NULL))
+    return 0;
+
+  /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
+  if (!lit_pool_ok)
+    {
+      if (!s390_decompose_address (op, &addr))
+	return 0;
+      if (addr.literal_pool)
+	return 0;
+      decomposed = true;
+    }
+
+  switch (c)
+    {
+    case 'Q': /* no index short displacement */
+      if (!decomposed && !s390_decompose_address (op, &addr))
+	return 0;
+      if (addr.indx)
+	return 0;
+      if (!s390_short_displacement (addr.disp))
+	return 0;
+      break;
+
+    case 'R': /* with index short displacement */
+      if (TARGET_LONG_DISPLACEMENT)
+	{
+	  if (!decomposed && !s390_decompose_address (op, &addr))
+	    return 0;
+	  if (!s390_short_displacement (addr.disp))
+	    return 0;
+	}
+      /* Any invalid address here will be fixed up by reload,
+	 so accept it for the most generic constraint.  */
+      break;
+
+    case 'S': /* no index long displacement */
+      if (!TARGET_LONG_DISPLACEMENT)
+	return 0;
+      if (!decomposed && !s390_decompose_address (op, &addr))
+	return 0;
+      if (addr.indx)
+	return 0;
+      if (s390_short_displacement (addr.disp))
+	return 0;
+      break;
+
+    case 'T': /* with index long displacement */
+      if (!TARGET_LONG_DISPLACEMENT)
+	return 0;
+      /* Any invalid address here will be fixed up by reload,
+	 so accept it for the most generic constraint.  */
+      if ((decomposed || s390_decompose_address (op, &addr))
+	  && s390_short_displacement (addr.disp))
+	return 0;
+      break;
+    default:
+      return 0;
+    }
+  return 1;
+}
+
+
+/* Evaluates constraint strings described by the regular expression
+   ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
+   the constraint given in STR, or 0 else.  */
+
+int
+s390_mem_constraint (const char *str, rtx op)
+{
+  char c = str[0];
+
+  switch (c)
+    {
+    case 'A':
+      /* Check for offsettable variants of memory constraints.  */
+      if (!MEM_P (op) || MEM_VOLATILE_P (op))
+	return 0;
+      if ((reload_completed || reload_in_progress)
+	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
+	return 0;
+      return s390_check_qrst_address (str[1], XEXP (op, 0), true);
+    case 'B':
+      /* Check for non-literal-pool variants of memory constraints.  */
+      if (!MEM_P (op))
+	return 0;
+      return s390_check_qrst_address (str[1], XEXP (op, 0), false);
+    case 'Q':
+    case 'R':
+    case 'S':
+    case 'T':
+      if (GET_CODE (op) != MEM)
+	return 0;
+      return s390_check_qrst_address (c, XEXP (op, 0), true);
+    case 'U':
+      return (s390_check_qrst_address ('Q', op, true)
+	      || s390_check_qrst_address ('R', op, true));
+    case 'W':
+      return (s390_check_qrst_address ('S', op, true)
+	      || s390_check_qrst_address ('T', op, true));
+    case 'Y':
+      /* Simply check for the basic form of a shift count.  Reload will
+	 take care of making sure we have a proper base register.  */
+      if (!s390_decompose_shift_count (op, NULL, NULL))
+	return 0;
+      break;
+    case 'Z':
+      return s390_check_qrst_address (str[1], op, true);
+    default:
+      return 0;
+    }
+  return 1;
+}
+
+
+/* Evaluates constraint strings starting with letter O.  Input
+   parameter C is the second letter following the "O" in the constraint
+   string. Returns 1 if VALUE meets the respective constraint and 0
+   otherwise.  */
+
+int
+s390_O_constraint_str (const char c, HOST_WIDE_INT value)
+{
+  if (!TARGET_EXTIMM)
+    return 0;
+
+  switch (c)
+    {
+    case 's':
+      return trunc_int_for_mode (value, SImode) == value;
+
+    case 'p':
+      return value == 0
+	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
+
+    case 'n':
+      return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Evaluates constraint strings starting with letter N.  Parameter STR
+   contains the letters following letter "N" in the constraint string.
+   Returns true if VALUE matches the constraint.  */
+
+int
+s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
+{
+  enum machine_mode mode, part_mode;
+  int def;
+  int part, part_goal;
+
+
+  if (str[0] == 'x')
+    part_goal = -1;
+  else
+    part_goal = str[0] - '0';
+
+  switch (str[1])
+    {
+    case 'Q':
+      part_mode = QImode;
+      break;
+    case 'H':
+      part_mode = HImode;
+      break;
+    case 'S':
+      part_mode = SImode;
+      break;
+    default:
+      return 0;
+    }
+
+  switch (str[2])
+    {
+    case 'H':
+      mode = HImode;
+      break;
+    case 'S':
+      mode = SImode;
+      break;
+    case 'D':
+      mode = DImode;
+      break;
+    default:
+      return 0;
+    }
+
+  switch (str[3])
+    {
+    case '0':
+      def = 0;
+      break;
+    case 'F':
+      def = -1;
+      break;
+    default:
+      return 0;
+    }
+
+  if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
+    return 0;
+
+  part = s390_single_part (GEN_INT (value), mode, part_mode, def);
+  if (part < 0)
+    return 0;
+  if (part_goal != -1 && part_goal != part)
+    return 0;
+
+  return 1;
+}
+
+
+/* Returns true if the input parameter VALUE is a float zero.  */
+
+int
+s390_float_const_zero_p (rtx value)
+{
+  return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
+	  && value == CONST0_RTX (GET_MODE (value)));
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.  */
+
+static int
+s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                         reg_class_t from, reg_class_t to)
+{
+  /* On s390, copy between fprs and gprs is expensive as long as no
+     ldgr/lgdr can be used.  */
+  if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
+      && ((reg_classes_intersect_p (from, GENERAL_REGS)
+	   && reg_classes_intersect_p (to, FP_REGS))
+	  || (reg_classes_intersect_p (from, FP_REGS)
+	      && reg_classes_intersect_p (to, GENERAL_REGS))))
+    return 10;
+
+  return 1;
+}
+
+/* Implement TARGET_MEMORY_MOVE_COST.  */
+
+static int
+s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.
+   CODE contains GET_CODE (x), OUTER_CODE contains the code
+   of the superexpression of x.  */
+
+static bool
+s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST:
+    case CONST_INT:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+    case MEM:
+      *total = 0;
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+    case AND:
+    case IOR:
+    case XOR:
+    case NEG:
+    case NOT:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case PLUS:
+    case MINUS:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case MULT:
+      switch (GET_MODE (x))
+	{
+	case SImode:
+	  {
+	    rtx left = XEXP (x, 0);
+	    rtx right = XEXP (x, 1);
+	    if (GET_CODE (right) == CONST_INT
+		&& CONST_OK_FOR_K (INTVAL (right)))
+	      *total = s390_cost->mhi;
+	    else if (GET_CODE (left) == SIGN_EXTEND)
+	      *total = s390_cost->mh;
+	    else
+	      *total = s390_cost->ms;  /* msr, ms, msy */
+	    break;
+	  }
+	case DImode:
+	  {
+	    rtx left = XEXP (x, 0);
+	    rtx right = XEXP (x, 1);
+	    if (TARGET_ZARCH)
+	      {
+		if (GET_CODE (right) == CONST_INT
+		    && CONST_OK_FOR_K (INTVAL (right)))
+		  *total = s390_cost->mghi;
+		else if (GET_CODE (left) == SIGN_EXTEND)
+		  *total = s390_cost->msgf;
+		else
+		  *total = s390_cost->msg;  /* msgr, msg */
+	      }
+	    else /* TARGET_31BIT */
+	      {
+		if (GET_CODE (left) == SIGN_EXTEND
+		    && GET_CODE (right) == SIGN_EXTEND)
+		  /* mulsidi case: mr, m */
+		  *total = s390_cost->m;
+		else if (GET_CODE (left) == ZERO_EXTEND
+			 && GET_CODE (right) == ZERO_EXTEND
+			 && TARGET_CPU_ZARCH)
+		  /* umulsidi case: ml, mlr */
+		  *total = s390_cost->ml;
+		else
+		  /* Complex calculation is required.  */
+		  *total = COSTS_N_INSNS (40);
+	      }
+	    break;
+	  }
+	case SFmode:
+	case DFmode:
+	  *total = s390_cost->mult_df;
+	  break;
+	case TFmode:
+	  *total = s390_cost->mxbr;
+	  break;
+	default:
+	  return false;
+	}
+      return false;
+
+    case FMA:
+      switch (GET_MODE (x))
+	{
+	case DFmode:
+	  *total = s390_cost->madbr;
+	  break;
+	case SFmode:
+	  *total = s390_cost->maebr;
+	  break;
+	default:
+	  return false;
+	}
+      /* Negate in the third argument is free: FMSUB.  */
+      if (GET_CODE (XEXP (x, 2)) == NEG)
+	{
+	  *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
+		     + rtx_cost (XEXP (x, 1), FMA, 1, speed)
+		     + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
+	  return true;
+	}
+      return false;
+
+    case UDIV:
+    case UMOD:
+      if (GET_MODE (x) == TImode) 	       /* 128 bit division */
+	*total = s390_cost->dlgr;
+      else if (GET_MODE (x) == DImode)
+	{
+	  rtx right = XEXP (x, 1);
+	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
+	    *total = s390_cost->dlr;
+	  else 	                               /* 64 by 64 bit division */
+	    *total = s390_cost->dlgr;
+	}
+      else if (GET_MODE (x) == SImode)         /* 32 bit division */
+	*total = s390_cost->dlr;
+      return false;
+
+    case DIV:
+    case MOD:
+      if (GET_MODE (x) == DImode)
+	{
+	  rtx right = XEXP (x, 1);
+	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
+	    if (TARGET_ZARCH)
+	      *total = s390_cost->dsgfr;
+	    else
+	      *total = s390_cost->dr;
+	  else 	                               /* 64 by 64 bit division */
+	    *total = s390_cost->dsgr;
+	}
+      else if (GET_MODE (x) == SImode)         /* 32 bit division */
+	*total = s390_cost->dlr;
+      else if (GET_MODE (x) == SFmode)
+	{
+	  *total = s390_cost->debr;
+	}
+      else if (GET_MODE (x) == DFmode)
+	{
+	  *total = s390_cost->ddbr;
+	}
+      else if (GET_MODE (x) == TFmode)
+	{
+	  *total = s390_cost->dxbr;
+	}
+      return false;
+
+    case SQRT:
+      if (GET_MODE (x) == SFmode)
+	*total = s390_cost->sqebr;
+      else if (GET_MODE (x) == DFmode)
+	*total = s390_cost->sqdbr;
+      else /* TFmode */
+	*total = s390_cost->sqxbr;
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      if (outer_code == MULT || outer_code == DIV || outer_code == MOD
+	  || outer_code == PLUS || outer_code == MINUS
+	  || outer_code == COMPARE)
+	*total = 0;
+      return false;
+
+    case COMPARE:
+      *total = COSTS_N_INSNS (1);
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+	{
+	  rtx op0 = XEXP (XEXP (x, 0), 0);
+	  rtx op1 = XEXP (XEXP (x, 0), 1);
+	  rtx op2 = XEXP (x, 1);
+
+	  if (memory_operand (op0, GET_MODE (op0))
+	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
+	    return true;
+	  if (register_operand (op0, GET_MODE (op0))
+	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
+	    return true;
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return the cost of an address rtx ADDR.  */
+
+static int
+s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  struct s390_address ad;
+  if (!s390_decompose_address (addr, &ad))
+    return 1000;
+
+  return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
+}
+
+/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
+   otherwise return 0.  */
+
+int
+tls_symbolic_operand (rtx op)
+{
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+  return SYMBOL_REF_TLS_MODEL (op);
+}
+
+/* Split DImode access register reference REG (on 64-bit) into its constituent
+   low and high parts, and store them into LO and HI.  Note that gen_lowpart/
+   gen_highpart cannot be used as they assume all registers are word-sized,
+   while our access registers have only half that size.  */
+
+void
+s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
+{
+  gcc_assert (TARGET_64BIT);
+  gcc_assert (ACCESS_REG_P (reg));
+  gcc_assert (GET_MODE (reg) == DImode);
+  gcc_assert (!(REGNO (reg) & 1));
+
+  *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
+  *hi = gen_rtx_REG (SImode, REGNO (reg));
+}
+
+/* Return true if OP contains a symbol reference */
+
+bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return true if OP contains a reference to a thread-local symbol.  */
+
+bool
+tls_symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return tls_symbolic_operand (op);
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+
+/* Return true if OP is a legitimate general operand when
+   generating PIC code.  It is given that flag_pic is on
+   and that OP satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+int
+legitimate_pic_operand_p (rtx op)
+{
+  /* Accept all non-symbolic constants.  */
+  if (!SYMBOLIC_CONST (op))
+    return 1;
+
+  /* Reject everything else; must be handled
+     via emit_symbolic_move.  */
+  return 0;
+}
+
+/* Returns true if the constant value OP is a legitimate general operand.
+   It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+static bool
+s390_legitimate_constant_p (enum machine_mode mode, rtx op)
+{
+  /* Accept all non-symbolic constants.  */
+  if (!SYMBOLIC_CONST (op))
+    return 1;
+
+  /* Accept immediate LARL operands.  */
+  if (TARGET_CPU_ZARCH && larl_operand (op, mode))
+    return 1;
+
+  /* Thread-local symbols are never legal constants.  This is
+     so that emit_call knows that computing such addresses
+     might require a function call.  */
+  if (TLS_SYMBOLIC_CONST (op))
+    return 0;
+
+  /* In the PIC case, symbolic constants must *not* be
+     forced into the literal pool.  We accept them here,
+     so that they will be handled by emit_symbolic_move.  */
+  if (flag_pic)
+    return 1;
+
+  /* All remaining non-PIC symbolic constants are
+     forced into the literal pool.  */
+  return 0;
+}
+
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible if X contains the address of a symbol that is
+   not constant (TLS) or not known at final link time (PIC).  */
+
+static bool
+s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+      /* Accept all non-symbolic constants.  */
+      return false;
+
+    case LABEL_REF:
+      /* Labels are OK iff we are non-PIC.  */
+      return flag_pic != 0;
+
+    case SYMBOL_REF:
+      /* 'Naked' TLS symbol references are never OK,
+         non-TLS symbols are OK iff we are non-PIC.  */
+      if (tls_symbolic_operand (x))
+	return true;
+      else
+	return flag_pic != 0;
+
+    case CONST:
+      return s390_cannot_force_const_mem (mode, XEXP (x, 0));
+    case PLUS:
+    case MINUS:
+      return s390_cannot_force_const_mem (mode, XEXP (x, 0))
+	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
+	case UNSPEC_LTREL_OFFSET:
+	case UNSPEC_GOT:
+	case UNSPEC_GOTOFF:
+	case UNSPEC_PLTOFF:
+	case UNSPEC_TLSGD:
+	case UNSPEC_TLSLDM:
+	case UNSPEC_NTPOFF:
+	case UNSPEC_DTPOFF:
+	case UNSPEC_GOTNTPOFF:
+	case UNSPEC_INDNTPOFF:
+	  return false;
+
+	/* If the literal pool shares the code section, be put
+	   execute template placeholders into the pool as well.  */
+	case UNSPEC_INSN:
+	  return TARGET_CPU_ZARCH;
+
+	default:
+	  return true;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Returns true if the constant value OP is a legitimate general
+   operand during and after reload.  The difference to
+   legitimate_constant_p is that this function will not accept
+   a constant that would need to be forced to the literal pool
+   before it can be used as operand.
+   This function accepts all constants which can be loaded directly
+   into a GPR.  */
+
+bool
+legitimate_reload_constant_p (rtx op)
+{
+  /* Accept la(y) operands.  */
+  if (GET_CODE (op) == CONST_INT
+      && DISP_IN_RANGE (INTVAL (op)))
+    return true;
+
+  /* Accept l(g)hi/l(g)fi operands.  */
+  if (GET_CODE (op) == CONST_INT
+      && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
+    return true;
+
+  /* Accept lliXX operands.  */
+  if (TARGET_ZARCH
+      && GET_CODE (op) == CONST_INT
+      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
+      && s390_single_part (op, word_mode, HImode, 0) >= 0)
+  return true;
+
+  if (TARGET_EXTIMM
+      && GET_CODE (op) == CONST_INT
+      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
+      && s390_single_part (op, word_mode, SImode, 0) >= 0)
+    return true;
+
+  /* Accept larl operands.  */
+  if (TARGET_CPU_ZARCH
+      && larl_operand (op, VOIDmode))
+    return true;
+
+  /* Accept floating-point zero operands that fit into a single GPR.  */
+  if (GET_CODE (op) == CONST_DOUBLE
+      && s390_float_const_zero_p (op)
+      && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
+    return true;
+
+  /* Accept double-word operands that can be split.  */
+  if (GET_CODE (op) == CONST_INT
+      && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
+    {
+      enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
+      rtx hi = operand_subword (op, 0, 0, dword_mode);
+      rtx lo = operand_subword (op, 1, 0, dword_mode);
+      return legitimate_reload_constant_p (hi)
+	     && legitimate_reload_constant_p (lo);
+    }
+
+  /* Everything else cannot be handled without reload.  */
+  return false;
+}
+
+/* Returns true if the constant value OP is a legitimate fp operand
+   during and after reload.
+   This function accepts all constants which can be loaded directly
+   into an FPR.  */
+
+static bool
+legitimate_reload_fp_constant_p (rtx op)
+{
+  /* Accept floating-point zero operands if the load zero instruction
+     can be used.  Prior to z196 the load fp zero instruction caused a
+     performance penalty if the result is used as BFP number.  */
+  if (TARGET_Z196
+      && GET_CODE (op) == CONST_DOUBLE
+      && s390_float_const_zero_p (op))
+    return true;
+
+  return false;
+}
+
+/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
+   return the class of reg to actually use.  */
+
+static reg_class_t
+s390_preferred_reload_class (rtx op, reg_class_t rclass)
+{
+  switch (GET_CODE (op))
+    {
+      /* Constants we cannot reload into general registers
+	 must be forced into the literal pool.  */
+      case CONST_DOUBLE:
+      case CONST_INT:
+	if (reg_class_subset_p (GENERAL_REGS, rclass)
+	    && legitimate_reload_constant_p (op))
+	  return GENERAL_REGS;
+	else if (reg_class_subset_p (ADDR_REGS, rclass)
+		 && legitimate_reload_constant_p (op))
+	  return ADDR_REGS;
+	else if (reg_class_subset_p (FP_REGS, rclass)
+		 && legitimate_reload_fp_constant_p (op))
+	  return FP_REGS;
+	return NO_REGS;
+
+      /* If a symbolic constant or a PLUS is reloaded,
+	 it is most likely being used as an address, so
+	 prefer ADDR_REGS.  If 'class' is not a superset
+	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
+      case CONST:
+	/* Symrefs cannot be pushed into the literal pool with -fPIC
+	   so we *MUST NOT* return NO_REGS for these cases
+	   (s390_cannot_force_const_mem will return true).  
+
+	   On the other hand we MUST return NO_REGS for symrefs with
+	   invalid addend which might have been pushed to the literal
+	   pool (no -fPIC).  Usually we would expect them to be
+	   handled via secondary reload but this does not happen if
+	   they are used as literal pool slot replacement in reload
+	   inheritance (see emit_input_reload_insns).  */
+	if (TARGET_CPU_ZARCH
+	    && GET_CODE (XEXP (op, 0)) == PLUS
+	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
+	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
+	  {
+	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
+	      return ADDR_REGS;
+	    else
+	      return NO_REGS;
+	  }
+	/* fallthrough */
+      case LABEL_REF:
+      case SYMBOL_REF:
+	if (!legitimate_reload_constant_p (op))
+          return NO_REGS;
+	/* fallthrough */
+      case PLUS:
+	/* load address will be used.  */
+	if (reg_class_subset_p (ADDR_REGS, rclass))
+	  return ADDR_REGS;
+	else
+	  return NO_REGS;
+
+      default:
+	break;
+    }
+
+  return rclass;
+}
+
+/* Return true if ADDR is SYMBOL_REF + addend with addend being a
+   multiple of ALIGNMENT and the SYMBOL_REF being naturally
+   aligned.  */
+
+bool
+s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
+{
+  HOST_WIDE_INT addend;
+  rtx symref;
+
+  if (!s390_loadrelative_operand_p (addr, &symref, &addend))
+    return false;
+
+  if (addend & (alignment - 1))
+    return false;
+
+  if (GET_CODE (symref) == SYMBOL_REF
+      && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
+    return true;
+
+  if (GET_CODE (symref) == UNSPEC
+      && alignment <= UNITS_PER_LONG)
+    return true;
+
+  return false;
+}
+
+/* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
+   operand SCRATCH is used to reload the even part of the address and
+   adding one.  */
+
+void
+s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
+{
+  HOST_WIDE_INT addend;
+  rtx symref;
+
+  if (!s390_loadrelative_operand_p (addr, &symref, &addend))
+    gcc_unreachable ();
+
+  if (!(addend & 1))
+    /* Easy case.  The addend is even so larl will do fine.  */
+    emit_move_insn (reg, addr);
+  else
+    {
+      /* We can leave the scratch register untouched if the target
+	 register is a valid base register.  */
+      if (REGNO (reg) < FIRST_PSEUDO_REGISTER
+	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
+	scratch = reg;
+
+      gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
+      gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
+
+      if (addend != 1)
+	emit_move_insn (scratch,
+			gen_rtx_CONST (Pmode,
+				       gen_rtx_PLUS (Pmode, symref,
+						     GEN_INT (addend - 1))));
+      else
+	emit_move_insn (scratch, symref);
+
+      /* Increment the address using la in order to avoid clobbering cc.  */
+      s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
+    }
+}
+
+/* Generate what is necessary to move between REG and MEM using
+   SCRATCH.  The direction is given by TOMEM.  */
+
+void
+s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
+{
+  /* Reload might have pulled a constant out of the literal pool.
+     Force it back in.  */
+  if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
+      || GET_CODE (mem) == CONST)
+    mem = force_const_mem (GET_MODE (reg), mem);
+
+  gcc_assert (MEM_P (mem));
+
+  /* For a load from memory we can leave the scratch register
+     untouched if the target register is a valid base register.  */
+  if (!tomem
+      && REGNO (reg) < FIRST_PSEUDO_REGISTER
+      && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
+      && GET_MODE (reg) == GET_MODE (scratch))
+    scratch = reg;
+
+  /* Load address into scratch register.  Since we can't have a
+     secondary reload for a secondary reload we have to cover the case
+     where larl would need a secondary reload here as well.  */
+  s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
+
+  /* Now we can use a standard load/store to do the move.  */
+  if (tomem)
+    emit_move_insn (replace_equiv_address (mem, scratch), reg);
+  else
+    emit_move_insn (reg, replace_equiv_address (mem, scratch));
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.  */
+
+static reg_class_t
+s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		       enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Intermediate register needed.  */
+  if (reg_classes_intersect_p (CC_REGS, rclass))
+    return GENERAL_REGS;
+
+  if (TARGET_Z10)
+    {
+      HOST_WIDE_INT offset;
+      rtx symref;
+
+      /* On z10 several optimizer steps may generate larl operands with
+	 an odd addend.  */
+      if (in_p
+	  && s390_loadrelative_operand_p (x, &symref, &offset)
+	  && mode == Pmode
+	  && !SYMBOL_REF_ALIGN1_P (symref)
+	  && (offset & 1) == 1)
+	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
+		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
+
+      /* On z10 we need a scratch register when moving QI, TI or floating
+	 point mode values from or to a memory location with a SYMBOL_REF
+	 or if the symref addend of a SI or DI move is not aligned to the
+	 width of the access.  */
+      if (MEM_P (x)
+	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
+	  && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
+	      || (!TARGET_ZARCH && mode == DImode)
+	      || ((mode == HImode || mode == SImode || mode == DImode)
+		  && (!s390_check_symref_alignment (XEXP (x, 0),
+						    GET_MODE_SIZE (mode))))))
+	{
+#define __SECONDARY_RELOAD_CASE(M,m)					\
+	  case M##mode:							\
+	    if (TARGET_64BIT)						\
+	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
+                                  CODE_FOR_reload##m##di_tomem_z10;	\
+	    else							\
+  	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
+                                  CODE_FOR_reload##m##si_tomem_z10;	\
+	  break;
+
+	  switch (GET_MODE (x))
+	    {
+	      __SECONDARY_RELOAD_CASE (QI, qi);
+	      __SECONDARY_RELOAD_CASE (HI, hi);
+	      __SECONDARY_RELOAD_CASE (SI, si);
+	      __SECONDARY_RELOAD_CASE (DI, di);
+	      __SECONDARY_RELOAD_CASE (TI, ti);
+	      __SECONDARY_RELOAD_CASE (SF, sf);
+	      __SECONDARY_RELOAD_CASE (DF, df);
+	      __SECONDARY_RELOAD_CASE (TF, tf);
+	      __SECONDARY_RELOAD_CASE (SD, sd);
+	      __SECONDARY_RELOAD_CASE (DD, dd);
+	      __SECONDARY_RELOAD_CASE (TD, td);
+
+	    default:
+	      gcc_unreachable ();
+	    }
+#undef __SECONDARY_RELOAD_CASE
+	}
+    }
+
+  /* We need a scratch register when loading a PLUS expression which
+     is not a legitimate operand of the LOAD ADDRESS instruction.  */
+  /* LRA can deal with transformation of plus op very well -- so we
+     don't need to prompt LRA in this case.  */
+  if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
+    sri->icode = (TARGET_64BIT ?
+		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
+
+  /* Performing a multiword move from or to memory we have to make sure the
+     second chunk in memory is addressable without causing a displacement
+     overflow.  If that would be the case we calculate the address in
+     a scratch register.  */
+  if (MEM_P (x)
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
+			 + GET_MODE_SIZE (mode) - 1))
+    {
+      /* For GENERAL_REGS a displacement overflow is no problem if occurring
+	 in a s_operand address since we may fallback to lm/stm.  So we only
+	 have to care about overflows in the b+i+d case.  */
+      if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
+	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
+	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
+	  /* For FP_REGS no lm/stm is available so this check is triggered
+	     for displacement overflows in b+i+d and b+d like addresses.  */
+	  || (reg_classes_intersect_p (FP_REGS, rclass)
+	      && s390_class_max_nregs (FP_REGS, mode) > 1))
+	{
+	  if (in_p)
+	    sri->icode = (TARGET_64BIT ?
+			  CODE_FOR_reloaddi_nonoffmem_in :
+			  CODE_FOR_reloadsi_nonoffmem_in);
+	  else
+	    sri->icode = (TARGET_64BIT ?
+			  CODE_FOR_reloaddi_nonoffmem_out :
+			  CODE_FOR_reloadsi_nonoffmem_out);
+	}
+    }
+
+  /* A scratch address register is needed when a symbolic constant is
+     copied to r0 compiling with -fPIC.  In other cases the target
+     register might be used as temporary (see legitimize_pic_address).  */
+  if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
+    sri->icode = (TARGET_64BIT ?
+		  CODE_FOR_reloaddi_PIC_addr :
+		  CODE_FOR_reloadsi_PIC_addr);
+
+  /* Either scratch or no register needed.  */
+  return NO_REGS;
+}
+
+/* Generate code to load SRC, which is PLUS that is not a
+   legitimate operand for the LA instruction, into TARGET.
+   SCRATCH may be used as scratch register.  */
+
+void
+s390_expand_plus_operand (rtx target, rtx src,
+			  rtx scratch)
+{
+  rtx sum1, sum2;
+  struct s390_address ad;
+
+  /* src must be a PLUS; get its two operands.  */
+  gcc_assert (GET_CODE (src) == PLUS);
+  gcc_assert (GET_MODE (src) == Pmode);
+
+  /* Check if any of the two operands is already scheduled
+     for replacement by reload.  This can happen e.g. when
+     float registers occur in an address.  */
+  sum1 = find_replacement (&XEXP (src, 0));
+  sum2 = find_replacement (&XEXP (src, 1));
+  src = gen_rtx_PLUS (Pmode, sum1, sum2);
+
+  /* If the address is already strictly valid, there's nothing to do.  */
+  if (!s390_decompose_address (src, &ad)
+      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
+    {
+      /* Otherwise, one of the operands cannot be an address register;
+         we reload its value into the scratch register.  */
+      if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
+	{
+	  emit_move_insn (scratch, sum1);
+	  sum1 = scratch;
+	}
+      if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
+	{
+	  emit_move_insn (scratch, sum2);
+	  sum2 = scratch;
+	}
+
+      /* According to the way these invalid addresses are generated
+         in reload.c, it should never happen (at least on s390) that
+         *neither* of the PLUS components, after find_replacements
+         was applied, is an address register.  */
+      if (sum1 == scratch && sum2 == scratch)
+	{
+	  debug_rtx (src);
+	  gcc_unreachable ();
+	}
+
+      src = gen_rtx_PLUS (Pmode, sum1, sum2);
+    }
+
+  /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
+     is only ever performed on addresses, so we can mark the
+     sum as legitimate for LA in any case.  */
+  s390_load_address (target, src);
+}
+
+
+/* Return true if ADDR is a valid memory address.
+   STRICT specifies whether strict register checking applies.  */
+
+static bool
+s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  struct s390_address ad;
+
+  if (TARGET_Z10
+      && larl_operand (addr, VOIDmode)
+      && (mode == VOIDmode
+	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
+    return true;
+
+  if (!s390_decompose_address (addr, &ad))
+    return false;
+
+  if (strict)
+    {
+      if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	return false;
+
+      if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
+	return false;
+    }
+  else
+    {
+      if (ad.base
+	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
+	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
+	return false;
+
+      if (ad.indx
+	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
+	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
+	  return false;
+    }
+  return true;
+}
+
+/* Return true if OP is a valid operand for the LA instruction.
+   In 31-bit, we need to prove that the result is used as an
+   address, as LA performs only a 31-bit addition.  */
+
+bool
+legitimate_la_operand_p (rtx op)
+{
+  struct s390_address addr;
+  if (!s390_decompose_address (op, &addr))
+    return false;
+
+  return (TARGET_64BIT || addr.pointer);
+}
+
+/* Return true if it is valid *and* preferable to use LA to
+   compute the sum of OP1 and OP2.  */
+
+bool
+preferred_la_operand_p (rtx op1, rtx op2)
+{
+  struct s390_address addr;
+
+  if (op2 != const0_rtx)
+    op1 = gen_rtx_PLUS (Pmode, op1, op2);
+
+  if (!s390_decompose_address (op1, &addr))
+    return false;
+  if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
+    return false;
+  if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
+    return false;
+
+  /* Avoid LA instructions with index register on z196; it is
+     preferable to use regular add instructions when possible.
+     Starting with zEC12 the la with index register is "uncracked"
+     again.  */
+  if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
+    return false;
+
+  if (!TARGET_64BIT && !addr.pointer)
+    return false;
+
+  if (addr.pointer)
+    return true;
+
+  if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
+      || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
+    return true;
+
+  return false;
+}
+
+/* Emit a forced load-address operation to load SRC into DST.
+   This will use the LOAD ADDRESS instruction even in situations
+   where legitimate_la_operand_p (SRC) returns false.  */
+
+void
+s390_load_address (rtx dst, rtx src)
+{
+  if (TARGET_64BIT)
+    emit_move_insn (dst, src);
+  else
+    emit_insn (gen_force_la_31 (dst, src));
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+   register REG.  If REG is 0, a new pseudo is generated.
+
+   There are two types of references that must be handled:
+
+   1. Global data references must load the address from the GOT, via
+      the PIC reg.  An insn is emitted to do this load, and the reg is
+      returned.
+
+   2. Static data references, constant pool addresses, and code labels
+      compute the address as an offset from the GOT, whose base is in
+      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
+      differentiate them from global data objects.  The returned
+      address is the PIC reg + an unspec constant.
+
+   TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
+   reg also appears in the address.  */
+
+rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+  rtx addr = orig;
+  rtx addend = const0_rtx;
+  rtx new_rtx = orig;
+
+  gcc_assert (!TLS_SYMBOLIC_CONST (addr));
+
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      addend = XEXP (addr, 1);
+      addr = XEXP (addr, 0);
+    }
+
+  if ((GET_CODE (addr) == LABEL_REF
+       || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
+       || (GET_CODE (addr) == UNSPEC &&
+	   (XINT (addr, 1) == UNSPEC_GOTENT
+	    || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
+      && GET_CODE (addend) == CONST_INT)
+    {
+      /* This can be locally addressed.  */
+
+      /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
+      rtx const_addr = (GET_CODE (addr) == UNSPEC ?
+			gen_rtx_CONST (Pmode, addr) : addr);
+
+      if (TARGET_CPU_ZARCH
+	  && larl_operand (const_addr, VOIDmode)
+	  && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
+	  && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
+	{
+	  if (INTVAL (addend) & 1)
+	    {
+	      /* LARL can't handle odd offsets, so emit a pair of LARL
+		 and LA.  */
+	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+	      if (!DISP_IN_RANGE (INTVAL (addend)))
+		{
+		  HOST_WIDE_INT even = INTVAL (addend) - 1;
+		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
+		  addr = gen_rtx_CONST (Pmode, addr);
+		  addend = const1_rtx;
+		}
+
+	      emit_move_insn (temp, addr);
+	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
+
+	      if (reg != 0)
+		{
+		  s390_load_address (reg, new_rtx);
+		  new_rtx = reg;
+		}
+	    }
+	  else
+	    {
+	      /* If the offset is even, we can just use LARL.  This
+		 will happen automatically.  */
+	    }
+	}
+      else
+	{
+	  /* No larl - Access local symbols relative to the GOT.  */
+
+	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+	  if (reload_in_progress || reload_completed)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+	  if (addend != const0_rtx)
+	    addr = gen_rtx_PLUS (Pmode, addr, addend);
+	  addr = gen_rtx_CONST (Pmode, addr);
+	  addr = force_const_mem (Pmode, addr);
+	  emit_move_insn (temp, addr);
+
+	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+	  if (reg != 0)
+	    {
+	      s390_load_address (reg, new_rtx);
+	      new_rtx = reg;
+	    }
+	}
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
+    {
+      /* A non-local symbol reference without addend.
+
+	 The symbol ref is wrapped into an UNSPEC to make sure the
+	 proper operand modifier (@GOT or @GOTENT) will be emitted.
+	 This will tell the linker to put the symbol into the GOT.
+
+	 Additionally the code dereferencing the GOT slot is emitted here.
+
+	 An addend to the symref needs to be added afterwards.
+	 legitimize_pic_address calls itself recursively to handle
+	 that case.  So no need to do it here.  */
+
+      if (reg == 0)
+        reg = gen_reg_rtx (Pmode);
+
+      if (TARGET_Z10)
+	{
+	  /* Use load relative if possible.
+	     lgrl <target>, sym@GOTENT  */
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
+
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+      else if (flag_pic == 1)
+        {
+          /* Assume GOT offset is a valid displacement operand (< 4k
+             or < 512k with z990).  This is handled the same way in
+             both 31- and 64-bit code (@GOT).
+             lg <target>, sym@GOT(r12)  */
+
+	  if (reload_in_progress || reload_completed)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+          new_rtx = gen_const_mem (Pmode, new_rtx);
+          emit_move_insn (reg, new_rtx);
+          new_rtx = reg;
+        }
+      else if (TARGET_CPU_ZARCH)
+        {
+          /* If the GOT offset might be >= 4k, we determine the position
+             of the GOT entry via a PC-relative LARL (@GOTENT).
+	     larl temp, sym@GOTENT
+             lg   <target>, 0(temp) */
+
+          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+
+	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
+		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
+
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
+          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  emit_move_insn (temp, new_rtx);
+
+	  new_rtx = gen_const_mem (Pmode, temp);
+          emit_move_insn (reg, new_rtx);
+
+          new_rtx = reg;
+        }
+      else
+        {
+          /* If the GOT offset might be >= 4k, we have to load it
+             from the literal pool (@GOT).
+
+	     lg temp, lit-litbase(r13)
+             lg <target>, 0(temp)
+	     lit:  .long sym@GOT  */
+
+          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+
+	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
+		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
+
+	  if (reload_in_progress || reload_completed)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+          addr = gen_rtx_CONST (Pmode, addr);
+          addr = force_const_mem (Pmode, addr);
+          emit_move_insn (temp, addr);
+
+          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+          new_rtx = gen_const_mem (Pmode, new_rtx);
+          emit_move_insn (reg, new_rtx);
+          new_rtx = reg;
+        }
+    }
+  else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
+    {
+      gcc_assert (XVECLEN (addr, 0) == 1);
+      switch (XINT (addr, 1))
+	{
+	  /* These address symbols (or PLT slots) relative to the GOT
+	     (not GOT slots!).  In general this will exceed the
+	     displacement range so these value belong into the literal
+	     pool.  */
+	case UNSPEC_GOTOFF:
+	case UNSPEC_PLTOFF:
+	  new_rtx = force_const_mem (Pmode, orig);
+	  break;
+
+	  /* For -fPIC the GOT size might exceed the displacement
+	     range so make sure the value is in the literal pool.  */
+	case UNSPEC_GOT:
+	  if (flag_pic == 2)
+	    new_rtx = force_const_mem (Pmode, orig);
+	  break;
+
+	  /* For @GOTENT larl is used.  This is handled like local
+	     symbol refs.  */
+	case UNSPEC_GOTENT:
+	  gcc_unreachable ();
+	  break;
+
+	  /* @PLT is OK as is on 64-bit, must be converted to
+	     GOT-relative @PLTOFF on 31-bit.  */
+	case UNSPEC_PLT:
+	  if (!TARGET_CPU_ZARCH)
+	    {
+	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+	      if (reload_in_progress || reload_completed)
+		df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	      addr = XVECEXP (addr, 0, 0);
+	      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+				     UNSPEC_PLTOFF);
+	      if (addend != const0_rtx)
+		addr = gen_rtx_PLUS (Pmode, addr, addend);
+	      addr = gen_rtx_CONST (Pmode, addr);
+	      addr = force_const_mem (Pmode, addr);
+	      emit_move_insn (temp, addr);
+
+	      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+	      if (reg != 0)
+		{
+		  s390_load_address (reg, new_rtx);
+		  new_rtx = reg;
+		}
+	    }
+	  else
+	    /* On 64 bit larl can be used.  This case is handled like
+	       local symbol refs.  */
+	    gcc_unreachable ();
+	  break;
+
+	  /* Everything else cannot happen.  */
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (addend != const0_rtx)
+    {
+      /* Otherwise, compute the sum.  */
+
+      rtx base = legitimize_pic_address (addr, reg);
+      new_rtx  = legitimize_pic_address (addend,
+					 base == reg ? NULL_RTX : reg);
+      if (GET_CODE (new_rtx) == CONST_INT)
+	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
+      else
+	{
+	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
+	    {
+	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+	      new_rtx = XEXP (new_rtx, 1);
+	    }
+	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
+	}
+
+      if (GET_CODE (new_rtx) == CONST)
+	new_rtx = XEXP (new_rtx, 0);
+      new_rtx = force_operand (new_rtx, 0);
+    }
+
+  return new_rtx;
+}
+
+/* Load the thread pointer into a register.  */
+
+rtx
+s390_get_thread_pointer (void)
+{
+  rtx tp = gen_reg_rtx (Pmode);
+
+  emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
+  mark_reg_pointer (tp, BITS_PER_WORD);
+
+  return tp;
+}
+
+/* Emit a tls call insn. The call target is the SYMBOL_REF stored
+   in s390_tls_symbol which always refers to __tls_get_offset.
+   The returned offset is written to RESULT_REG and an USE rtx is
+   generated for TLS_CALL.  */
+
+static GTY(()) rtx s390_tls_symbol;
+
+static void
+s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
+{
+  rtx insn;
+
+  if (!flag_pic)
+    emit_insn (s390_load_got ());
+
+  if (!s390_tls_symbol)
+    s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
+
+  insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
+			 gen_rtx_REG (Pmode, RETURN_REGNUM));
+
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
+  RTL_CONST_CALL_P (insn) = 1;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  REG may be used as temporary.  */
+
+static rtx
+legitimize_tls_address (rtx addr, rtx reg)
+{
+  rtx new_rtx, tls_call, temp, base, r2, insn;
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (tls_symbolic_operand (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	start_sequence ();
+	r2 = gen_rtx_REG (Pmode, 2);
+	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
+	new_rtx = gen_rtx_CONST (Pmode, tls_call);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+	emit_move_insn (r2, new_rtx);
+	s390_emit_tls_call_insn (r2, tls_call);
+	insn = get_insns ();
+	end_sequence ();
+
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+	temp = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, temp, r2, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	start_sequence ();
+	r2 = gen_rtx_REG (Pmode, 2);
+	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
+	new_rtx = gen_rtx_CONST (Pmode, tls_call);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+	emit_move_insn (r2, new_rtx);
+	s390_emit_tls_call_insn (r2, tls_call);
+	insn = get_insns ();
+	end_sequence ();
+
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
+	temp = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, temp, r2, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	base = gen_reg_rtx (Pmode);
+	s390_load_address (base, new_rtx);
+
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
+	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+	temp = gen_reg_rtx (Pmode);
+	emit_move_insn (temp, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	if (flag_pic == 1)
+	  {
+	    /* Assume GOT offset < 4k.  This is handled the same way
+	       in both 31- and 64-bit code.  */
+
+	    if (reload_in_progress || reload_completed)
+	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+	    new_rtx = gen_const_mem (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+	  }
+	else if (TARGET_CPU_ZARCH)
+	  {
+	    /* If the GOT offset might be >= 4k, we determine the position
+	       of the GOT entry via a PC-relative LARL.  */
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+
+	    new_rtx = gen_const_mem (Pmode, temp);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+	  }
+	else if (flag_pic)
+	  {
+	    /* If the GOT offset might be >= 4k, we have to load it
+	       from the literal pool.  */
+
+	    if (reload_in_progress || reload_completed)
+	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    new_rtx = force_const_mem (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+
+            new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+	    new_rtx = gen_const_mem (Pmode, new_rtx);
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
+	  }
+	else
+	  {
+	    /* In position-dependent code, load the absolute address of
+	       the GOT entry from the literal pool.  */
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    new_rtx = force_const_mem (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+
+	    new_rtx = temp;
+	    new_rtx = gen_const_mem (Pmode, new_rtx);
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
+	  }
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+        temp = gen_reg_rtx (Pmode);
+	emit_move_insn (temp, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
+    {
+      switch (XINT (XEXP (addr, 0), 1))
+	{
+	case UNSPEC_INDNTPOFF:
+	  gcc_assert (TARGET_CPU_ZARCH);
+	  new_rtx = addr;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+    {
+      new_rtx = XEXP (XEXP (addr, 0), 0);
+      if (GET_CODE (new_rtx) != SYMBOL_REF)
+	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+
+      new_rtx = legitimize_tls_address (new_rtx, reg);
+      new_rtx = plus_constant (Pmode, new_rtx,
+			       INTVAL (XEXP (XEXP (addr, 0), 1)));
+      new_rtx = force_operand (new_rtx, 0);
+    }
+
+  else
+    gcc_unreachable ();  /* for now ... */
+
+  return new_rtx;
+}
+
+/* Emit insns making the address in operands[1] valid for a standard
+   move to operands[0].  operands[1] is replaced by an address which
+   should be used instead of the former RTX to emit the move
+   pattern.  */
+
+void
+emit_symbolic_move (rtx *operands)
+{
+  rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (Pmode, operands[1]);
+  else if (TLS_SYMBOLIC_CONST (operands[1]))
+    operands[1] = legitimize_tls_address (operands[1], temp);
+  else if (flag_pic)
+    operands[1] = legitimize_pic_address (operands[1], temp);
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address X
+   to be legitimate.  If we find one, return the new, valid address.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   MODE is the mode of the operand pointed to by X.
+
+   When -fpic is used, special handling is needed for symbolic references.
+   See comments by legitimize_pic_address for details.  */
+
+static rtx
+s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx constant_term = const0_rtx;
+
+  if (TLS_SYMBOLIC_CONST (x))
+    {
+      x = legitimize_tls_address (x, 0);
+
+      if (s390_legitimate_address_p (mode, x, FALSE))
+	return x;
+    }
+  else if (GET_CODE (x) == PLUS
+	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
+	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
+    {
+      return x;
+    }
+  else if (flag_pic)
+    {
+      if (SYMBOLIC_CONST (x)
+          || (GET_CODE (x) == PLUS
+              && (SYMBOLIC_CONST (XEXP (x, 0))
+                  || SYMBOLIC_CONST (XEXP (x, 1)))))
+	  x = legitimize_pic_address (x, 0);
+
+      if (s390_legitimate_address_p (mode, x, FALSE))
+	return x;
+    }
+
+  x = eliminate_constant_term (x, &constant_term);
+
+  /* Optimize loading of large displacements by splitting them
+     into the multiple of 4K and the rest; this allows the
+     former to be CSE'd if possible.
+
+     Don't do this if the displacement is added to a register
+     pointing into the stack frame, as the offsets will
+     change later anyway.  */
+
+  if (GET_CODE (constant_term) == CONST_INT
+      && !TARGET_LONG_DISPLACEMENT
+      && !DISP_IN_RANGE (INTVAL (constant_term))
+      && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
+    {
+      HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
+      HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
+
+      rtx temp = gen_reg_rtx (Pmode);
+      rtx val  = force_operand (GEN_INT (upper), temp);
+      if (val != temp)
+	emit_move_insn (temp, val);
+
+      x = gen_rtx_PLUS (Pmode, x, temp);
+      constant_term = GEN_INT (lower);
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 0)) == REG)
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 1), temp);
+	  if (val != temp)
+	    emit_move_insn (temp, val);
+
+	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
+	}
+
+      else if (GET_CODE (XEXP (x, 1)) == REG)
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 0), temp);
+	  if (val != temp)
+	    emit_move_insn (temp, val);
+
+	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
+	}
+    }
+
+  if (constant_term != const0_rtx)
+    x = gen_rtx_PLUS (Pmode, x, constant_term);
+
+  return x;
+}
+
+/* Try a machine-dependent way of reloading an illegitimate address AD
+   operand.  If we find one, push the reload and return the new address.
+
+   MODE is the mode of the enclosing MEM.  OPNUM is the operand number
+   and TYPE is the reload type of the current reload.  */
+
+rtx
+legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
+			   int opnum, int type)
+{
+  if (!optimize || TARGET_LONG_DISPLACEMENT)
+    return NULL_RTX;
+
+  if (GET_CODE (ad) == PLUS)
+    {
+      rtx tem = simplify_binary_operation (PLUS, Pmode,
+					   XEXP (ad, 0), XEXP (ad, 1));
+      if (tem)
+	ad = tem;
+    }
+
+  if (GET_CODE (ad) == PLUS
+      && GET_CODE (XEXP (ad, 0)) == REG
+      && GET_CODE (XEXP (ad, 1)) == CONST_INT
+      && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
+    {
+      HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
+      HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
+      rtx cst, tem, new_rtx;
+
+      cst = GEN_INT (upper);
+      if (!legitimate_reload_constant_p (cst))
+	cst = force_const_mem (Pmode, cst);
+
+      tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
+      new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
+
+      push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return new_rtx;
+    }
+
+  return NULL_RTX;
+}
+
+/* Emit code to move LEN bytes from DST to SRC.  */
+
+bool
+s390_expand_movmem (rtx dst, rtx src, rtx len)
+{
+  /* When tuning for z10 or higher we rely on the Glibc functions to
+     do the right thing. Only for constant lengths below 64k we will
+     generate inline code.  */
+  if (s390_tune >= PROCESSOR_2097_Z10
+      && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
+    return false;
+
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+    {
+      if (INTVAL (len) > 0)
+        emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
+    }
+
+  else if (TARGET_MVCLE)
+    {
+      emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
+    }
+
+  else
+    {
+      rtx dst_addr, src_addr, count, blocks, temp;
+      rtx loop_start_label = gen_label_rtx ();
+      rtx loop_end_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+      enum machine_mode mode;
+
+      mode = GET_MODE (len);
+      if (mode == VOIDmode)
+        mode = Pmode;
+
+      dst_addr = gen_reg_rtx (Pmode);
+      src_addr = gen_reg_rtx (Pmode);
+      count = gen_reg_rtx (mode);
+      blocks = gen_reg_rtx (mode);
+
+      convert_move (count, len, 1);
+      emit_cmp_and_jump_insns (count, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, end_label);
+
+      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
+      emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
+      dst = change_address (dst, VOIDmode, dst_addr);
+      src = change_address (src, VOIDmode, src_addr);
+
+      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+			   OPTAB_DIRECT);
+      if (temp != count)
+        emit_move_insn (count, temp);
+
+      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_label (loop_start_label);
+
+      if (TARGET_Z10
+	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
+	{
+	  rtx prefetch;
+
+	  /* Issue a read prefetch for the +3 cache line.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
+				   const0_rtx, const0_rtx);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	  emit_insn (prefetch);
+
+	  /* Issue a write prefetch for the +3 cache line.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
+				   const1_rtx, const0_rtx);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	  emit_insn (prefetch);
+	}
+
+      emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
+      s390_load_address (dst_addr,
+			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
+      s390_load_address (src_addr,
+			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
+
+      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_jump (loop_start_label);
+      emit_label (loop_end_label);
+
+      emit_insn (gen_movmem_short (dst, src,
+				   convert_to_mode (Pmode, count, 1)));
+      emit_label (end_label);
+    }
+  return true;
+}
+
+/* Emit code to set LEN bytes at DST to VAL.
+   Make use of clrmem if VAL is zero.  */
+
+void
+s390_expand_setmem (rtx dst, rtx len, rtx val)
+{
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
+    return;
+
+  gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
+
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
+    {
+      if (val == const0_rtx && INTVAL (len) <= 256)
+        emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
+      else
+	{
+	  /* Initialize memory by storing the first byte.  */
+	  emit_move_insn (adjust_address (dst, QImode, 0), val);
+
+	  if (INTVAL (len) > 1)
+	    {
+	      /* Initiate 1 byte overlap move.
+	         The first byte of DST is propagated through DSTP1.
+		 Prepare a movmem for:  DST+1 = DST (length = LEN - 1).
+		 DST is set to size 1 so the rest of the memory location
+		 does not count as source operand.  */
+	      rtx dstp1 = adjust_address (dst, VOIDmode, 1);
+	      set_mem_size (dst, 1);
+
+	      emit_insn (gen_movmem_short (dstp1, dst,
+					   GEN_INT (INTVAL (len) - 2)));
+	    }
+	}
+    }
+
+  else if (TARGET_MVCLE)
+    {
+      val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
+      emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
+    }
+
+  else
+    {
+      rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
+      rtx loop_start_label = gen_label_rtx ();
+      rtx loop_end_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+      enum machine_mode mode;
+
+      mode = GET_MODE (len);
+      if (mode == VOIDmode)
+        mode = Pmode;
+
+      dst_addr = gen_reg_rtx (Pmode);
+      count = gen_reg_rtx (mode);
+      blocks = gen_reg_rtx (mode);
+
+      convert_move (count, len, 1);
+      emit_cmp_and_jump_insns (count, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, end_label);
+
+      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
+      dst = change_address (dst, VOIDmode, dst_addr);
+
+      if (val == const0_rtx)
+        temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+			     OPTAB_DIRECT);
+      else
+	{
+	  dstp1 = adjust_address (dst, VOIDmode, 1);
+	  set_mem_size (dst, 1);
+
+	  /* Initialize memory by storing the first byte.  */
+	  emit_move_insn (adjust_address (dst, QImode, 0), val);
+
+	  /* If count is 1 we are done.  */
+	  emit_cmp_and_jump_insns (count, const1_rtx,
+				   EQ, NULL_RTX, mode, 1, end_label);
+
+	  temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
+			       OPTAB_DIRECT);
+	}
+      if (temp != count)
+        emit_move_insn (count, temp);
+
+      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_label (loop_start_label);
+
+      if (TARGET_Z10
+	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
+	{
+	  /* Issue a write prefetch for the +4 cache line.  */
+	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
+						     GEN_INT (1024)),
+				       const1_rtx, const0_rtx);
+	  emit_insn (prefetch);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	}
+
+      if (val == const0_rtx)
+	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
+      else
+	emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
+      s390_load_address (dst_addr,
+			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
+
+      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_jump (loop_start_label);
+      emit_label (loop_end_label);
+
+      if (val == const0_rtx)
+        emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
+      else
+        emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
+      emit_label (end_label);
+    }
+}
+
+/* Emit code to compare LEN bytes at OP0 with those at OP1,
+   and return the result in TARGET.  */
+
+bool
+s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
+{
+  rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
+  rtx tmp;
+
+  /* When tuning for z10 or higher we rely on the Glibc functions to
+     do the right thing. Only for constant lengths below 64k we will
+     generate inline code.  */
+  if (s390_tune >= PROCESSOR_2097_Z10
+      && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
+    return false;
+
+  /* As the result of CMPINT is inverted compared to what we need,
+     we have to swap the operands.  */
+  tmp = op0; op0 = op1; op1 = tmp;
+
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+    {
+      if (INTVAL (len) > 0)
+        {
+          emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
+          emit_insn (gen_cmpint (target, ccreg));
+        }
+      else
+        emit_move_insn (target, const0_rtx);
+    }
+  else if (TARGET_MVCLE)
+    {
+      emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
+      emit_insn (gen_cmpint (target, ccreg));
+    }
+  else
+    {
+      rtx addr0, addr1, count, blocks, temp;
+      rtx loop_start_label = gen_label_rtx ();
+      rtx loop_end_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+      enum machine_mode mode;
+
+      mode = GET_MODE (len);
+      if (mode == VOIDmode)
+        mode = Pmode;
+
+      addr0 = gen_reg_rtx (Pmode);
+      addr1 = gen_reg_rtx (Pmode);
+      count = gen_reg_rtx (mode);
+      blocks = gen_reg_rtx (mode);
+
+      convert_move (count, len, 1);
+      emit_cmp_and_jump_insns (count, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, end_label);
+
+      emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
+      emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
+      op0 = change_address (op0, VOIDmode, addr0);
+      op1 = change_address (op1, VOIDmode, addr1);
+
+      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+			   OPTAB_DIRECT);
+      if (temp != count)
+        emit_move_insn (count, temp);
+
+      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_label (loop_start_label);
+
+      if (TARGET_Z10
+	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
+	{
+	  rtx prefetch;
+
+	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
+				   const0_rtx, const0_rtx);
+	  emit_insn (prefetch);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+
+	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
+				   const0_rtx, const0_rtx);
+	  emit_insn (prefetch);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	}
+
+      emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
+      temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
+      temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+      emit_jump_insn (temp);
+
+      s390_load_address (addr0,
+			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
+      s390_load_address (addr1,
+			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
+
+      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_jump (loop_start_label);
+      emit_label (loop_end_label);
+
+      emit_insn (gen_cmpmem_short (op0, op1,
+				   convert_to_mode (Pmode, count, 1)));
+      emit_label (end_label);
+
+      emit_insn (gen_cmpint (target, ccreg));
+    }
+  return true;
+}
+
+
+/* Expand conditional increment or decrement using alc/slb instructions.
+   Should generate code setting DST to either SRC or SRC + INCREMENT,
+   depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
+   Returns true if successful, false otherwise.
+
+   That makes it possible to implement some if-constructs without jumps e.g.:
+   (borrow = CC0 | CC1 and carry = CC2 | CC3)
+   unsigned int a, b, c;
+   if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
+   if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
+   if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
+   if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
+
+   Checks for EQ and NE with a nonzero value need an additional xor e.g.:
+   if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
+   if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
+   if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
+   if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
+
+bool
+s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
+		   rtx dst, rtx src, rtx increment)
+{
+  enum machine_mode cmp_mode;
+  enum machine_mode cc_mode;
+  rtx op_res;
+  rtx insn;
+  rtvec p;
+  int ret;
+
+  if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
+      && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
+    cmp_mode = SImode;
+  else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
+	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
+    cmp_mode = DImode;
+  else
+    return false;
+
+  /* Try ADD LOGICAL WITH CARRY.  */
+  if (increment == const1_rtx)
+    {
+      /* Determine CC mode to use.  */
+      if (cmp_code == EQ || cmp_code == NE)
+	{
+	  if (cmp_op1 != const0_rtx)
+	    {
+	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
+					     NULL_RTX, 0, OPTAB_WIDEN);
+	      cmp_op1 = const0_rtx;
+	    }
+
+	  cmp_code = cmp_code == EQ ? LEU : GTU;
+	}
+
+      if (cmp_code == LTU || cmp_code == LEU)
+	{
+	  rtx tem = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tem;
+	  cmp_code = swap_condition (cmp_code);
+	}
+
+      switch (cmp_code)
+	{
+	  case GTU:
+	    cc_mode = CCUmode;
+	    break;
+
+	  case GEU:
+	    cc_mode = CCL3mode;
+	    break;
+
+	  default:
+	    return false;
+	}
+
+      /* Emit comparison instruction pattern. */
+      if (!register_operand (cmp_op0, cmp_mode))
+	cmp_op0 = force_reg (cmp_mode, cmp_op0);
+
+      insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
+      /* We use insn_invalid_p here to add clobbers if required.  */
+      ret = insn_invalid_p (emit_insn (insn), false);
+      gcc_assert (!ret);
+
+      /* Emit ALC instruction pattern.  */
+      op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
+			       gen_rtx_REG (cc_mode, CC_REGNUM),
+			       const0_rtx);
+
+      if (src != const0_rtx)
+	{
+	  if (!register_operand (src, GET_MODE (dst)))
+	    src = force_reg (GET_MODE (dst), src);
+
+	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
+	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
+	}
+
+      p = rtvec_alloc (2);
+      RTVEC_ELT (p, 0) =
+        gen_rtx_SET (VOIDmode, dst, op_res);
+      RTVEC_ELT (p, 1) =
+	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+      return true;
+    }
+
+  /* Try SUBTRACT LOGICAL WITH BORROW.  */
+  if (increment == constm1_rtx)
+    {
+      /* Determine CC mode to use.  */
+      if (cmp_code == EQ || cmp_code == NE)
+	{
+	  if (cmp_op1 != const0_rtx)
+	    {
+	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
+					     NULL_RTX, 0, OPTAB_WIDEN);
+	      cmp_op1 = const0_rtx;
+	    }
+
+	  cmp_code = cmp_code == EQ ? LEU : GTU;
+	}
+
+      if (cmp_code == GTU || cmp_code == GEU)
+	{
+	  rtx tem = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tem;
+	  cmp_code = swap_condition (cmp_code);
+	}
+
+      switch (cmp_code)
+	{
+	  case LEU:
+	    cc_mode = CCUmode;
+	    break;
+
+	  case LTU:
+	    cc_mode = CCL3mode;
+	    break;
+
+	  default:
+	    return false;
+	}
+
+      /* Emit comparison instruction pattern. */
+      if (!register_operand (cmp_op0, cmp_mode))
+	cmp_op0 = force_reg (cmp_mode, cmp_op0);
+
+      insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
+      /* We use insn_invalid_p here to add clobbers if required.  */
+      ret = insn_invalid_p (emit_insn (insn), false);
+      gcc_assert (!ret);
+
+      /* Emit SLB instruction pattern.  */
+      if (!register_operand (src, GET_MODE (dst)))
+	src = force_reg (GET_MODE (dst), src);
+
+      op_res = gen_rtx_MINUS (GET_MODE (dst),
+			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
+			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
+					      gen_rtx_REG (cc_mode, CC_REGNUM),
+					      const0_rtx));
+      p = rtvec_alloc (2);
+      RTVEC_ELT (p, 0) =
+        gen_rtx_SET (VOIDmode, dst, op_res);
+      RTVEC_ELT (p, 1) =
+	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+      return true;
+    }
+
+  return false;
+}
+
+/* Expand code for the insv template. Return true if successful.  */
+
+bool
+s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
+{
+  int bitsize = INTVAL (op1);
+  int bitpos = INTVAL (op2);
+  enum machine_mode mode = GET_MODE (dest);
+  enum machine_mode smode;
+  int smode_bsize, mode_bsize;
+  rtx op, clobber;
+
+  if (bitsize + bitpos > GET_MODE_SIZE (mode))
+    return false;
+
+  /* Generate INSERT IMMEDIATE (IILL et al).  */
+  /* (set (ze (reg)) (const_int)).  */
+  if (TARGET_ZARCH
+      && register_operand (dest, word_mode)
+      && (bitpos % 16) == 0
+      && (bitsize % 16) == 0
+      && const_int_operand (src, VOIDmode))
+    {
+      HOST_WIDE_INT val = INTVAL (src);
+      int regpos = bitpos + bitsize;
+
+      while (regpos > bitpos)
+	{
+	  enum machine_mode putmode;
+	  int putsize;
+
+	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
+	    putmode = SImode;
+	  else
+	    putmode = HImode;
+
+	  putsize = GET_MODE_BITSIZE (putmode);
+	  regpos -= putsize;
+	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
+						GEN_INT (putsize),
+						GEN_INT (regpos)),
+			  gen_int_mode (val, putmode));
+	  val >>= putsize;
+	}
+      gcc_assert (regpos == bitpos);
+      return true;
+    }
+
+  smode = smallest_mode_for_size (bitsize, MODE_INT);
+  smode_bsize = GET_MODE_BITSIZE (smode);
+  mode_bsize = GET_MODE_BITSIZE (mode);
+
+  /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
+  if (bitpos == 0
+      && (bitsize % BITS_PER_UNIT) == 0
+      && MEM_P (dest)
+      && (register_operand (src, word_mode)
+	  || const_int_operand (src, VOIDmode)))
+    {
+      /* Emit standard pattern if possible.  */
+      if (smode_bsize == bitsize)
+	{
+	  emit_move_insn (adjust_address (dest, smode, 0),
+			  gen_lowpart (smode, src));
+	  return true;
+	}
+
+      /* (set (ze (mem)) (const_int)).  */
+      else if (const_int_operand (src, VOIDmode))
+	{
+	  int size = bitsize / BITS_PER_UNIT;
+	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
+					BLKmode,
+					UNITS_PER_WORD - size);
+
+	  dest = adjust_address (dest, BLKmode, 0);
+	  set_mem_size (dest, size);
+	  s390_expand_movmem (dest, src_mem, GEN_INT (size));
+	  return true;
+	}
+
+      /* (set (ze (mem)) (reg)).  */
+      else if (register_operand (src, word_mode))
+	{
+	  if (bitsize <= 32)
+	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
+						  const0_rtx), src);
+	  else
+	    {
+	      /* Emit st,stcmh sequence.  */
+	      int stcmh_width = bitsize - 32;
+	      int size = stcmh_width / BITS_PER_UNIT;
+
+	      emit_move_insn (adjust_address (dest, SImode, size),
+			      gen_lowpart (SImode, src));
+	      set_mem_size (dest, size);
+	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
+						    GEN_INT (stcmh_width),
+						    const0_rtx),
+			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
+	    }
+	  return true;
+	}
+    }
+
+  /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
+  if ((bitpos % BITS_PER_UNIT) == 0
+      && (bitsize % BITS_PER_UNIT) == 0
+      && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
+      && MEM_P (src)
+      && (mode == DImode || mode == SImode)
+      && register_operand (dest, mode))
+    {
+      /* Emit a strict_low_part pattern if possible.  */
+      if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
+	{
+	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
+	  op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
+	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
+	  return true;
+	}
+
+      /* ??? There are more powerful versions of ICM that are not
+	 completely represented in the md file.  */
+    }
+
+  /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
+  if (TARGET_Z10 && (mode == DImode || mode == SImode))
+    {
+      enum machine_mode mode_s = GET_MODE (src);
+
+      if (mode_s == VOIDmode)
+	{
+	  /* Assume const_int etc already in the proper mode.  */
+	  src = force_reg (mode, src);
+	}
+      else if (mode_s != mode)
+	{
+	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
+	  src = force_reg (mode_s, src);
+	  src = gen_lowpart (mode, src);
+	}
+
+      op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
+      op = gen_rtx_SET (VOIDmode, op, src);
+
+      if (!TARGET_ZEC12)
+	{
+	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
+	}
+      emit_insn (op);
+
+      return true;
+    }
+
+  return false;
+}
+
+/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
+   register that holds VAL of mode MODE shifted by COUNT bits.  */
+
+static inline rtx
+s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
+{
+  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
+			     NULL_RTX, 1, OPTAB_DIRECT);
+  return expand_simple_binop (SImode, ASHIFT, val, count,
+			      NULL_RTX, 1, OPTAB_DIRECT);
+}
+
+/* Structure to hold the initial parameters for a compare_and_swap operation
+   in HImode and QImode.  */
+
+struct alignment_context
+{
+  rtx memsi;	  /* SI aligned memory location.  */
+  rtx shift;	  /* Bit offset with regard to lsb.  */
+  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
+  rtx modemaski;  /* ~modemask */
+  bool aligned;	  /* True if memory is aligned, false else.  */
+};
+
+/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
+   structure AC for transparent simplifying, if the memory alignment is known
+   to be at least 32bit.  MEM is the memory location for the actual operation
+   and MODE its mode.  */
+
+static void
+init_alignment_context (struct alignment_context *ac, rtx mem,
+			enum machine_mode mode)
+{
+  ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
+  ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
+
+  if (ac->aligned)
+    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
+  else
+    {
+      /* Alignment is unknown.  */
+      rtx byteoffset, addr, align;
+
+      /* Force the address into a register.  */
+      addr = force_reg (Pmode, XEXP (mem, 0));
+
+      /* Align it to SImode.  */
+      align = expand_simple_binop (Pmode, AND, addr,
+				   GEN_INT (-GET_MODE_SIZE (SImode)),
+				   NULL_RTX, 1, OPTAB_DIRECT);
+      /* Generate MEM.  */
+      ac->memsi = gen_rtx_MEM (SImode, align);
+      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
+      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
+      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
+
+      /* Calculate shiftcount.  */
+      byteoffset = expand_simple_binop (Pmode, AND, addr,
+					GEN_INT (GET_MODE_SIZE (SImode) - 1),
+					NULL_RTX, 1, OPTAB_DIRECT);
+      /* As we already have some offset, evaluate the remaining distance.  */
+      ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
+				      NULL_RTX, 1, OPTAB_DIRECT);
+    }
+
+  /* Shift is the byte count, but we need the bitcount.  */
+  ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
+				   NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Calculate masks.  */
+  ac->modemask = expand_simple_binop (SImode, ASHIFT,
+				      GEN_INT (GET_MODE_MASK (mode)),
+				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
+  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
+				      NULL_RTX, 1);
+}
+
+/* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
+   use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
+   perform the merge in SEQ2.  */
+
+static rtx
+s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
+		    enum machine_mode mode, rtx val, rtx ins)
+{
+  rtx tmp;
+
+  if (ac->aligned)
+    {
+      start_sequence ();
+      tmp = copy_to_mode_reg (SImode, val);
+      if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
+			    const0_rtx, ins))
+	{
+	  *seq1 = NULL;
+	  *seq2 = get_insns ();
+	  end_sequence ();
+	  return tmp;
+	}
+      end_sequence ();
+    }
+
+  /* Failed to use insv.  Generate a two part shift and mask.  */
+  start_sequence ();
+  tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
+  *seq1 = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
+  *seq2 = get_insns ();
+  end_sequence ();
+
+  return tmp;
+}
+
+/* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
+   the memory location, CMP the old value to compare MEM with and NEW_RTX the
+   value to set if CMP == MEM.  */
+
+void
+s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
+		    rtx cmp, rtx new_rtx, bool is_weak)
+{
+  struct alignment_context ac;
+  rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
+  rtx res = gen_reg_rtx (SImode);
+  rtx csloop = NULL, csend = NULL;
+
+  gcc_assert (MEM_P (mem));
+
+  init_alignment_context (&ac, mem, mode);
+
+  /* Load full word.  Subsequent loads are performed by CS.  */
+  val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
+			     NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Prepare insertions of cmp and new_rtx into the loaded value.  When
+     possible, we try to use insv to make this happen efficiently.  If
+     that fails we'll generate code both inside and outside the loop.  */
+  cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
+  newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
+
+  if (seq0)
+    emit_insn (seq0);
+  if (seq1)
+    emit_insn (seq1);
+
+  /* Start CS loop.  */
+  if (!is_weak)
+    {
+      /* Begin assuming success.  */
+      emit_move_insn (btarget, const1_rtx);
+
+      csloop = gen_label_rtx ();
+      csend = gen_label_rtx ();
+      emit_label (csloop);
+    }
+
+  /* val = "<mem>00..0<mem>"
+   * cmp = "00..0<cmp>00..0"
+   * new = "00..0<new>00..0"
+   */
+
+  emit_insn (seq2);
+  emit_insn (seq3);
+
+  cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
+  if (is_weak)
+    emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
+  else
+    {
+      rtx tmp;
+
+      /* Jump to end if we're done (likely?).  */
+      s390_emit_jump (csend, cc);
+
+      /* Check for changes outside mode, and loop internal if so.
+	 Arrange the moves so that the compare is adjacent to the
+	 branch so that we can generate CRJ.  */
+      tmp = copy_to_reg (val);
+      force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
+			  1, OPTAB_DIRECT);
+      cc = s390_emit_compare (NE, val, tmp);
+      s390_emit_jump (csloop, cc);
+
+      /* Failed.  */
+      emit_move_insn (btarget, const0_rtx);
+      emit_label (csend);
+    }
+
+  /* Return the correct part of the bitfield.  */
+  convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+					      NULL_RTX, 1, OPTAB_DIRECT), 1);
+}
+
+/* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
+   and VAL the value to play with.  If AFTER is true then store the value
+   MEM holds after the operation, if AFTER is false then store the value MEM
+   holds before the operation.  If TARGET is zero then discard that value, else
+   store it to TARGET.  */
+
+void
+s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
+		    rtx target, rtx mem, rtx val, bool after)
+{
+  struct alignment_context ac;
+  rtx cmp;
+  rtx new_rtx = gen_reg_rtx (SImode);
+  rtx orig = gen_reg_rtx (SImode);
+  rtx csloop = gen_label_rtx ();
+
+  gcc_assert (!target || register_operand (target, VOIDmode));
+  gcc_assert (MEM_P (mem));
+
+  init_alignment_context (&ac, mem, mode);
+
+  /* Shift val to the correct bit positions.
+     Preserve "icm", but prevent "ex icm".  */
+  if (!(ac.aligned && code == SET && MEM_P (val)))
+    val = s390_expand_mask_and_shift (val, mode, ac.shift);
+
+  /* Further preparation insns.  */
+  if (code == PLUS || code == MINUS)
+    emit_move_insn (orig, val);
+  else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
+    val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
+			       NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Load full word.  Subsequent loads are performed by CS.  */
+  cmp = force_reg (SImode, ac.memsi);
+
+  /* Start CS loop.  */
+  emit_label (csloop);
+  emit_move_insn (new_rtx, cmp);
+
+  /* Patch new with val at correct position.  */
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      val = expand_simple_binop (SImode, code, new_rtx, orig,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      val = expand_simple_binop (SImode, AND, val, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* FALLTHRU */
+    case SET:
+      if (ac.aligned && MEM_P (val))
+	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
+			 0, 0, SImode, val);
+      else
+	{
+	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
+				     NULL_RTX, 1, OPTAB_DIRECT);
+	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
+				     NULL_RTX, 1, OPTAB_DIRECT);
+	}
+      break;
+    case AND:
+    case IOR:
+    case XOR:
+      new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+    case MULT: /* NAND */
+      new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
+						      ac.memsi, cmp, new_rtx));
+
+  /* Return the correct part of the bitfield.  */
+  if (target)
+    convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
+					       after ? new_rtx : cmp, ac.shift,
+					       NULL_RTX, 1, OPTAB_DIRECT), 1);
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+
+static void
+s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.long\t", file);
+      break;
+    case 8:
+      fputs ("\t.quad\t", file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("@DTPOFF", file);
+}
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+s390_mangle_type (const_tree type)
+{
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+s390_delegitimize_address (rtx orig_x)
+{
+  rtx x, y;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+  x = orig_x;
+
+  /* Extract the symbol ref from:
+     (plus:SI (reg:SI 12 %r12)
+              (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
+	                            UNSPEC_GOTOFF/PLTOFF)))
+     and
+     (plus:SI (reg:SI 12 %r12)
+              (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
+                                             UNSPEC_GOTOFF/PLTOFF)
+				 (const_int 4 [0x4]))))  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
+      && GET_CODE (XEXP (x, 1)) == CONST)
+    {
+      HOST_WIDE_INT offset = 0;
+
+      /* The const operand.  */
+      y = XEXP (XEXP (x, 1), 0);
+
+      if (GET_CODE (y) == PLUS
+	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
+	{
+	  offset = INTVAL (XEXP (y, 1));
+	  y = XEXP (y, 0);
+	}
+
+      if (GET_CODE (y) == UNSPEC
+	  && (XINT (y, 1) == UNSPEC_GOTOFF
+	      || XINT (y, 1) == UNSPEC_PLTOFF))
+	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
+    }
+
+  if (GET_CODE (x) != MEM)
+    return orig_x;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+    {
+      y = XEXP (XEXP (x, 1), 0);
+      if (GET_CODE (y) == UNSPEC
+	  && XINT (y, 1) == UNSPEC_GOT)
+	y = XVECEXP (y, 0, 0);
+      else
+	return orig_x;
+    }
+  else if (GET_CODE (x) == CONST)
+    {
+      /* Extract the symbol ref from:
+	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
+	                               UNSPEC_PLT/GOTENT)))  */
+
+      y = XEXP (x, 0);
+      if (GET_CODE (y) == UNSPEC
+	  && (XINT (y, 1) == UNSPEC_GOTENT
+	      || XINT (y, 1) == UNSPEC_PLT))
+	y = XVECEXP (y, 0, 0);
+      else
+	return orig_x;
+    }
+  else
+    return orig_x;
+
+  if (GET_MODE (orig_x) != Pmode)
+    {
+      if (GET_MODE (orig_x) == BLKmode)
+	return orig_x;
+      y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
+      if (y == NULL_RTX)
+	return orig_x;
+    }
+  return y;
+}
+
+/* Output operand OP to stdio stream FILE.
+   OP is an address (register + offset) which is not used to address data;
+   instead the rightmost bits are interpreted as the value.  */
+
+static void
+print_shift_count_operand (FILE *file, rtx op)
+{
+  HOST_WIDE_INT offset;
+  rtx base;
+
+  /* Extract base register and offset.  */
+  if (!s390_decompose_shift_count (op, &base, &offset))
+    gcc_unreachable ();
+
+  /* Sanity check.  */
+  if (base)
+    {
+      gcc_assert (GET_CODE (base) == REG);
+      gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
+      gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
+    }
+
+  /* Offsets are constricted to twelve bits.  */
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
+  if (base)
+    fprintf (file, "(%s)", reg_names[REGNO (base)]);
+}
+
+/* See 'get_some_local_dynamic_name'.  */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+    {
+      x = get_pool_constant (x);
+      return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in local-dynamic base patterns.  */
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+        && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+/* Returns -1 if the function should not be made hotpatchable.  Otherwise it
+   returns a number >= 0 that is the desired size of the hotpatch trampoline
+   in halfwords. */
+
+static int s390_function_num_hotpatch_trampoline_halfwords (tree decl,
+							    bool do_warn)
+{
+  tree attr;
+
+  if (DECL_DECLARED_INLINE_P (decl)
+      || DECL_ARTIFICIAL (decl)
+      || MAIN_NAME_P (DECL_NAME (decl)))
+    {
+      /* - Explicitly inlined functions cannot be hotpatched.
+	 - Artificial functions need not be hotpatched.
+	 - Making the main function hotpatchable is useless. */
+      return -1;
+    }
+  attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
+  if (attr || s390_hotpatch_trampoline_halfwords >= 0)
+    {
+      if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (decl)))
+	{
+	  if (do_warn)
+	    warning (OPT_Wattributes, "function %qE with the %qs attribute"
+		     " is not hotpatchable", DECL_NAME (decl), "always_inline");
+	  return -1;
+	}
+      else
+	{
+	  return (attr) ?
+	    get_hotpatch_attribute (attr) : s390_hotpatch_trampoline_halfwords;
+	}
+    }
+
+  return -1;
+}
+
+/* Hook to determine if one function can safely inline another.  */
+
+static bool
+s390_can_inline_p (tree caller, tree callee)
+{
+  if (s390_function_num_hotpatch_trampoline_halfwords (callee, false) >= 0)
+    return false;
+
+  return default_target_can_inline_p (caller, callee);
+}
+
+/* Write the extra assembler code needed to declare a function properly.  */
+
+void
+s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
+				tree decl)
+{
+  int hotpatch_trampoline_halfwords = -1;
+
+  if (decl)
+    {
+      hotpatch_trampoline_halfwords =
+	s390_function_num_hotpatch_trampoline_halfwords (decl, true);
+      if (hotpatch_trampoline_halfwords >= 0
+	  && decl_function_context (decl) != NULL_TREE)
+	{
+	  warning_at (DECL_SOURCE_LOCATION (decl), OPT_mhotpatch,
+		      "hotpatching is not compatible with nested functions");
+	  hotpatch_trampoline_halfwords = -1;
+	}
+    }
+
+  if (hotpatch_trampoline_halfwords > 0)
+    {
+      int i;
+
+      /* Add a trampoline code area before the function label and initialize it
+	 with two-byte nop instructions.  This area can be overwritten with code
+	 that jumps to a patched version of the function.  */
+      for (i = 0; i < hotpatch_trampoline_halfwords; i++)
+	asm_fprintf (asm_out_file, "\tnopr\t%%r7\n");
+      /* Note:  The function label must be aligned so that (a) the bytes of the
+	 following nop do not cross a cacheline boundary, and (b) a jump address
+	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
+	 stored directly before the label without crossing a cacheline
+	 boundary.  All this is necessary to make sure the trampoline code can
+	 be changed atomically.  */
+    }
+
+  ASM_OUTPUT_LABEL (asm_out_file, fname);
+
+  /* Output a four-byte nop if hotpatching is enabled.  This can be overwritten
+     atomically with a relative backwards jump to the trampoline area.  */
+  if (hotpatch_trampoline_halfwords >= 0)
+    asm_fprintf (asm_out_file, "\tnop\t0\n");
+}
+
+/* Output machine-dependent UNSPECs occurring in address constant X
+   in assembler syntax to stdio stream FILE.  Returns true if the
+   constant X could be recognized, false otherwise.  */
+
+static bool
+s390_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
+    switch (XINT (x, 1))
+      {
+      case UNSPEC_GOTENT:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOTENT");
+	return true;
+      case UNSPEC_GOT:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOT");
+	return true;
+      case UNSPEC_GOTOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOTOFF");
+	return true;
+      case UNSPEC_PLT:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@PLT");
+	return true;
+      case UNSPEC_PLTOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@PLTOFF");
+	return true;
+      case UNSPEC_TLSGD:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@TLSGD");
+	return true;
+      case UNSPEC_TLSLDM:
+	assemble_name (file, get_some_local_dynamic_name ());
+	fprintf (file, "@TLSLDM");
+	return true;
+      case UNSPEC_DTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@DTPOFF");
+	return true;
+      case UNSPEC_NTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@NTPOFF");
+	return true;
+      case UNSPEC_GOTNTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOTNTPOFF");
+	return true;
+      case UNSPEC_INDNTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@INDNTPOFF");
+	return true;
+      }
+
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
+    switch (XINT (x, 1))
+      {
+      case UNSPEC_POOL_OFFSET:
+	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
+	output_addr_const (file, x);
+	return true;
+      }
+  return false;
+}
+
+/* Output address operand ADDR in assembler syntax to
+   stdio stream FILE.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  struct s390_address ad;
+
+  if (s390_loadrelative_operand_p (addr, NULL, NULL))
+    {
+      if (!TARGET_Z10)
+	{
+	  output_operand_lossage ("symbolic memory references are "
+				  "only supported on z10 or later");
+	  return;
+	}
+      output_addr_const (file, addr);
+      return;
+    }
+
+  if (!s390_decompose_address (addr, &ad)
+      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
+    output_operand_lossage ("cannot decompose address");
+
+  if (ad.disp)
+    output_addr_const (file, ad.disp);
+  else
+    fprintf (file, "0");
+
+  if (ad.base && ad.indx)
+    fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
+                              reg_names[REGNO (ad.base)]);
+  else if (ad.base)
+    fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
+}
+
+/* Output operand X in assembler syntax to stdio stream FILE.
+   CODE specified the format flag.  The following format flags
+   are recognized:
+
+    'C': print opcode suffix for branch condition.
+    'D': print opcode suffix for inverse branch condition.
+    'E': print opcode suffix for branch on index instruction.
+    'G': print the size of the operand in bytes.
+    'J': print tls_load/tls_gdcall/tls_ldcall suffix
+    'M': print the second word of a TImode operand.
+    'N': print the second word of a DImode operand.
+    'O': print only the displacement of a memory reference.
+    'R': print only the base register of a memory reference.
+    'S': print S-type memory reference (base+displacement).
+    'Y': print shift count operand.
+
+    'b': print integer X as if it's an unsigned byte.
+    'c': print integer X as if it's an signed byte.
+    'e': "end" of DImode contiguous bitmask X.
+    'f': "end" of SImode contiguous bitmask X.
+    'h': print integer X as if it's a signed halfword.
+    'i': print the first nonzero HImode part of X.
+    'j': print the first HImode part unequal to -1 of X.
+    'k': print the first nonzero SImode part of X.
+    'm': print the first SImode part unequal to -1 of X.
+    'o': print integer X as if it's an unsigned 32bit word.
+    's': "start" of DImode contiguous bitmask X.
+    't': "start" of SImode contiguous bitmask X.
+    'x': print integer X as if it's an unsigned halfword.
+*/
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  HOST_WIDE_INT ival;
+
+  switch (code)
+    {
+    case 'C':
+      fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
+      return;
+
+    case 'D':
+      fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
+      return;
+
+    case 'E':
+      if (GET_CODE (x) == LE)
+	fprintf (file, "l");
+      else if (GET_CODE (x) == GT)
+	fprintf (file, "h");
+      else
+	output_operand_lossage ("invalid comparison operator "
+				"for 'E' output modifier");
+      return;
+
+    case 'J':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  fprintf (file, "%s", ":tls_load:");
+	  output_addr_const (file, x);
+	}
+      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
+	{
+	  fprintf (file, "%s", ":tls_gdcall:");
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	}
+      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
+	{
+	  fprintf (file, "%s", ":tls_ldcall:");
+	  assemble_name (file, get_some_local_dynamic_name ());
+	}
+      else
+	output_operand_lossage ("invalid reference for 'J' output modifier");
+      return;
+
+    case 'G':
+      fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
+      return;
+
+    case 'O':
+      {
+        struct s390_address ad;
+	int ret;
+
+	if (!MEM_P (x))
+	  {
+	    output_operand_lossage ("memory reference expected for "
+				    "'O' output modifier");
+	    return;
+	  }
+
+	ret = s390_decompose_address (XEXP (x, 0), &ad);
+
+	if (!ret
+	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	    || ad.indx)
+	  {
+	    output_operand_lossage ("invalid address for 'O' output modifier");
+	    return;
+	  }
+
+        if (ad.disp)
+          output_addr_const (file, ad.disp);
+        else
+          fprintf (file, "0");
+      }
+      return;
+
+    case 'R':
+      {
+        struct s390_address ad;
+	int ret;
+
+	if (!MEM_P (x))
+	  {
+	    output_operand_lossage ("memory reference expected for "
+				    "'R' output modifier");
+	    return;
+	  }
+
+	ret = s390_decompose_address (XEXP (x, 0), &ad);
+
+	if (!ret
+	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	    || ad.indx)
+	  {
+	    output_operand_lossage ("invalid address for 'R' output modifier");
+	    return;
+	  }
+
+        if (ad.base)
+          fprintf (file, "%s", reg_names[REGNO (ad.base)]);
+        else
+          fprintf (file, "0");
+      }
+      return;
+
+    case 'S':
+      {
+	struct s390_address ad;
+	int ret;
+
+	if (!MEM_P (x))
+	  {
+	    output_operand_lossage ("memory reference expected for "
+				    "'S' output modifier");
+	    return;
+	  }
+	ret = s390_decompose_address (XEXP (x, 0), &ad);
+
+	if (!ret
+	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	    || ad.indx)
+	  {
+	    output_operand_lossage ("invalid address for 'S' output modifier");
+	    return;
+	  }
+
+	if (ad.disp)
+	  output_addr_const (file, ad.disp);
+	else
+	  fprintf (file, "0");
+
+	if (ad.base)
+	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
+      }
+      return;
+
+    case 'N':
+      if (GET_CODE (x) == REG)
+	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
+      else if (GET_CODE (x) == MEM)
+	x = change_address (x, VOIDmode,
+			    plus_constant (Pmode, XEXP (x, 0), 4));
+      else
+	output_operand_lossage ("register or memory expression expected "
+				"for 'N' output modifier");
+      break;
+
+    case 'M':
+      if (GET_CODE (x) == REG)
+	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
+      else if (GET_CODE (x) == MEM)
+	x = change_address (x, VOIDmode,
+			    plus_constant (Pmode, XEXP (x, 0), 8));
+      else
+	output_operand_lossage ("register or memory expression expected "
+				"for 'M' output modifier");
+      break;
+
+    case 'Y':
+      print_shift_count_operand (file, x);
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fprintf (file, "%s", reg_names[REGNO (x)]);
+      break;
+
+    case MEM:
+      output_address (XEXP (x, 0));
+      break;
+
+    case CONST:
+    case CODE_LABEL:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      output_addr_const (file, x);
+      break;
+
+    case CONST_INT:
+      ival = INTVAL (x);
+      switch (code)
+	{
+	case 0:
+	  break;
+	case 'b':
+	  ival &= 0xff;
+	  break;
+	case 'c':
+	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
+	  break;
+	case 'x':
+	  ival &= 0xffff;
+	  break;
+	case 'h':
+	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
+	  break;
+	case 'i':
+	  ival = s390_extract_part (x, HImode, 0);
+	  break;
+	case 'j':
+	  ival = s390_extract_part (x, HImode, -1);
+	  break;
+	case 'k':
+	  ival = s390_extract_part (x, SImode, 0);
+	  break;
+	case 'm':
+	  ival = s390_extract_part (x, SImode, -1);
+	  break;
+	case 'o':
+	  ival &= 0xffffffff;
+	  break;
+	case 'e': case 'f':
+	case 's': case 't':
+	  {
+	    int pos, len;
+	    bool ok;
+
+	    len = (code == 's' || code == 'e' ? 64 : 32);
+	    ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
+	    gcc_assert (ok);
+	    if (code == 's' || code == 't')
+	      ival = 64 - pos - len;
+	    else
+	      ival = 64 - 1 - pos;
+	  }
+	  break;
+	default:
+	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
+	}
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+      break;
+
+    case CONST_DOUBLE:
+      gcc_assert (GET_MODE (x) == VOIDmode);
+      if (code == 'b')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
+      else if (code == 'x')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
+      else if (code == 'h')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
+      else
+	{
+	  if (code == 0)
+	    output_operand_lossage ("invalid constant - try using "
+				    "an output modifier");
+	  else
+	    output_operand_lossage ("invalid constant for output modifier '%c'",
+				    code);
+	}
+      break;
+
+    default:
+      if (code == 0)
+	output_operand_lossage ("invalid expression - try using "
+				"an output modifier");
+      else
+	output_operand_lossage ("invalid expression for output "
+				"modifier '%c'", code);
+      break;
+    }
+}
+
+/* Target hook for assembling integer objects.  We need to define it
+   here to work a round a bug in some versions of GAS, which couldn't
+   handle values smaller than INT_MIN when printed in decimal.  */
+
+static bool
+s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == 8 && aligned_p
+      && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
+    {
+      fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
+	       INTVAL (x));
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Returns true if register REGNO is used  for forming
+   a memory address in expression X.  */
+
+static bool
+reg_used_in_mem_p (int regno, rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  int i, j;
+  const char *fmt;
+
+  if (code == MEM)
+    {
+      if (refers_to_regno_p (regno, regno+1,
+			     XEXP (x, 0), 0))
+	return true;
+    }
+  else if (code == SET
+	   && GET_CODE (SET_DEST (x)) == PC)
+    {
+      if (refers_to_regno_p (regno, regno+1,
+			     SET_SRC (x), 0))
+	return true;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e'
+	  && reg_used_in_mem_p (regno, XEXP (x, i)))
+	return true;
+
+      else if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
+	    return true;
+    }
+  return false;
+}
+
+/* Returns true if expression DEP_RTX sets an address register
+   used by instruction INSN to address memory.  */
+
+static bool
+addr_generation_dependency_p (rtx dep_rtx, rtx insn)
+{
+  rtx target, pat;
+
+  if (NONJUMP_INSN_P (dep_rtx))
+    dep_rtx = PATTERN (dep_rtx);
+
+  if (GET_CODE (dep_rtx) == SET)
+    {
+      target = SET_DEST (dep_rtx);
+      if (GET_CODE (target) == STRICT_LOW_PART)
+	target = XEXP (target, 0);
+      while (GET_CODE (target) == SUBREG)
+	target = SUBREG_REG (target);
+
+      if (GET_CODE (target) == REG)
+	{
+	  int regno = REGNO (target);
+
+	  if (s390_safe_attr_type (insn) == TYPE_LA)
+	    {
+	      pat = PATTERN (insn);
+	      if (GET_CODE (pat) == PARALLEL)
+		{
+		  gcc_assert (XVECLEN (pat, 0) == 2);
+		  pat = XVECEXP (pat, 0, 0);
+		}
+	      gcc_assert (GET_CODE (pat) == SET);
+	      return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
+	    }
+	  else if (get_attr_atype (insn) == ATYPE_AGEN)
+	    return reg_used_in_mem_p (regno, PATTERN (insn));
+	}
+    }
+  return false;
+}
+
+/* Return 1, if dep_insn sets register used in insn in the agen unit.  */
+
+int
+s390_agen_dep_p (rtx dep_insn, rtx insn)
+{
+  rtx dep_rtx = PATTERN (dep_insn);
+  int i;
+
+  if (GET_CODE (dep_rtx) == SET
+      && addr_generation_dependency_p (dep_rtx, insn))
+    return 1;
+  else if (GET_CODE (dep_rtx) == PARALLEL)
+    {
+      for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
+	{
+	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
+	    return 1;
+	}
+    }
+  return 0;
+}
+
+
+/* A C statement (sans semicolon) to update the integer scheduling priority
+   INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
+   reduce the priority to execute INSN later.  Do not define this macro if
+   you do not need to adjust the scheduling priorities of insns.
+
+   A STD instruction should be scheduled earlier,
+   in order to use the bypass.  */
+static int
+s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
+{
+  if (! INSN_P (insn))
+    return priority;
+
+  if (s390_tune != PROCESSOR_2084_Z990
+      && s390_tune != PROCESSOR_2094_Z9_109
+      && s390_tune != PROCESSOR_2097_Z10
+      && s390_tune != PROCESSOR_2817_Z196
+      && s390_tune != PROCESSOR_2827_ZEC12)
+    return priority;
+
+  switch (s390_safe_attr_type (insn))
+    {
+      case TYPE_FSTOREDF:
+      case TYPE_FSTORESF:
+	priority = priority << 3;
+	break;
+      case TYPE_STORE:
+      case TYPE_STM:
+	priority = priority << 1;
+	break;
+      default:
+        break;
+    }
+  return priority;
+}
+
+
+/* The number of instructions that can be issued per cycle.  */
+
+static int
+s390_issue_rate (void)
+{
+  switch (s390_tune)
+    {
+    case PROCESSOR_2084_Z990:
+    case PROCESSOR_2094_Z9_109:
+    case PROCESSOR_2817_Z196:
+      return 3;
+    case PROCESSOR_2097_Z10:
+    case PROCESSOR_2827_ZEC12:
+      return 2;
+    default:
+      return 1;
+    }
+}
+
+static int
+s390_first_cycle_multipass_dfa_lookahead (void)
+{
+  return 4;
+}
+
+/* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
+   Fix up MEMs as required.  */
+
+static void
+annotate_constant_pool_refs (rtx *x)
+{
+  int i, j;
+  const char *fmt;
+
+  gcc_assert (GET_CODE (*x) != SYMBOL_REF
+	      || !CONSTANT_POOL_ADDRESS_P (*x));
+
+  /* Literal pool references can only occur inside a MEM ...  */
+  if (GET_CODE (*x) == MEM)
+    {
+      rtx memref = XEXP (*x, 0);
+
+      if (GET_CODE (memref) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (memref))
+	{
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
+				     UNSPEC_LTREF);
+
+	  *x = replace_equiv_address (*x, addr);
+	  return;
+	}
+
+      if (GET_CODE (memref) == CONST
+	  && GET_CODE (XEXP (memref, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
+	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
+	{
+	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
+	  rtx sym = XEXP (XEXP (memref, 0), 0);
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
+				     UNSPEC_LTREF);
+
+	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
+	  return;
+	}
+    }
+
+  /* ... or a load-address type pattern.  */
+  if (GET_CODE (*x) == SET)
+    {
+      rtx addrref = SET_SRC (*x);
+
+      if (GET_CODE (addrref) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (addrref))
+	{
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
+				     UNSPEC_LTREF);
+
+	  SET_SRC (*x) = addr;
+	  return;
+	}
+
+      if (GET_CODE (addrref) == CONST
+	  && GET_CODE (XEXP (addrref, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
+	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
+	{
+	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
+	  rtx sym = XEXP (XEXP (addrref, 0), 0);
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
+				     UNSPEC_LTREF);
+
+	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
+	  return;
+	}
+    }
+
+  /* Annotate LTREL_BASE as well.  */
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
+    {
+      rtx base = cfun->machine->base_reg;
+      *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
+				  UNSPEC_LTREL_BASE);
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (*x));
+  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          annotate_constant_pool_refs (&XEXP (*x, i));
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (*x, i); j++)
+            annotate_constant_pool_refs (&XVECEXP (*x, i, j));
+        }
+    }
+}
+
+/* Split all branches that exceed the maximum distance.
+   Returns true if this created a new literal pool entry.  */
+
+static int
+s390_split_branches (void)
+{
+  rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  int new_literal = 0, ret;
+  rtx insn, pat, tmp, target;
+  rtx *label;
+
+  /* We need correct insn addresses.  */
+
+  shorten_branches (get_insns ());
+
+  /* Find all branches that exceed 64KB, and split them.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
+	continue;
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
+	continue;
+
+      if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
+	{
+	  label = &SET_SRC (pat);
+	}
+      else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
+	{
+	  if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
+	    label = &XEXP (SET_SRC (pat), 1);
+          else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
+            label = &XEXP (SET_SRC (pat), 2);
+	  else
+	    continue;
+        }
+      else
+	continue;
+
+      if (get_attr_length (insn) <= 4)
+	continue;
+
+      /* We are going to use the return register as scratch register,
+	 make sure it will be saved/restored by the prologue/epilogue.  */
+      cfun_frame_layout.save_return_addr_p = 1;
+
+      if (!flag_pic)
+	{
+	  new_literal = 1;
+	  tmp = force_const_mem (Pmode, *label);
+	  tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
+	  INSN_ADDRESSES_NEW (tmp, -1);
+	  annotate_constant_pool_refs (&PATTERN (tmp));
+
+	  target = temp_reg;
+	}
+      else
+	{
+	  new_literal = 1;
+	  target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
+				   UNSPEC_LTREL_OFFSET);
+	  target = gen_rtx_CONST (Pmode, target);
+	  target = force_const_mem (Pmode, target);
+	  tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
+	  INSN_ADDRESSES_NEW (tmp, -1);
+	  annotate_constant_pool_refs (&PATTERN (tmp));
+
+          target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
+							cfun->machine->base_reg),
+				   UNSPEC_LTREL_BASE);
+	  target = gen_rtx_PLUS (Pmode, temp_reg, target);
+	}
+
+      ret = validate_change (insn, label, target, 0);
+      gcc_assert (ret);
+    }
+
+  return new_literal;
+}
+
+
+/* Find an annotated literal pool symbol referenced in RTX X,
+   and store it at REF.  Will abort if X contains references to
+   more than one such pool symbol; multiple references to the same
+   symbol are allowed, however.
+
+   The rtx pointed to by REF must be initialized to NULL_RTX
+   by the caller before calling this routine.  */
+
+static void
+find_constant_pool_ref (rtx x, rtx *ref)
+{
+  int i, j;
+  const char *fmt;
+
+  /* Ignore LTREL_BASE references.  */
+  if (GET_CODE (x) == UNSPEC
+      && XINT (x, 1) == UNSPEC_LTREL_BASE)
+    return;
+  /* Likewise POOL_ENTRY insns.  */
+  if (GET_CODE (x) == UNSPEC_VOLATILE
+      && XINT (x, 1) == UNSPECV_POOL_ENTRY)
+    return;
+
+  gcc_assert (GET_CODE (x) != SYMBOL_REF
+              || !CONSTANT_POOL_ADDRESS_P (x));
+
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
+    {
+      rtx sym = XVECEXP (x, 0, 0);
+      gcc_assert (GET_CODE (sym) == SYMBOL_REF
+	          && CONSTANT_POOL_ADDRESS_P (sym));
+
+      if (*ref == NULL_RTX)
+	*ref = sym;
+      else
+	gcc_assert (*ref == sym);
+
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          find_constant_pool_ref (XEXP (x, i), ref);
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (x, i); j++)
+            find_constant_pool_ref (XVECEXP (x, i, j), ref);
+        }
+    }
+}
+
+/* Replace every reference to the annotated literal pool
+   symbol REF in X by its base plus OFFSET.  */
+
+static void
+replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
+{
+  int i, j;
+  const char *fmt;
+
+  gcc_assert (*x != ref);
+
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_LTREF
+      && XVECEXP (*x, 0, 0) == ref)
+    {
+      *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
+      return;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT
+      && GET_CODE (XEXP (*x, 0)) == UNSPEC
+      && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
+      && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
+    {
+      rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
+      *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (*x));
+  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (*x, i); j++)
+            replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
+        }
+    }
+}
+
+/* Check whether X contains an UNSPEC_LTREL_BASE.
+   Return its constant pool symbol if found, NULL_RTX otherwise.  */
+
+static rtx
+find_ltrel_base (rtx x)
+{
+  int i, j;
+  const char *fmt;
+
+  if (GET_CODE (x) == UNSPEC
+      && XINT (x, 1) == UNSPEC_LTREL_BASE)
+    return XVECEXP (x, 0, 0);
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          rtx fnd = find_ltrel_base (XEXP (x, i));
+	  if (fnd)
+	    return fnd;
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (x, i); j++)
+	    {
+              rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
+	      if (fnd)
+		return fnd;
+	    }
+        }
+    }
+
+  return NULL_RTX;
+}
+
+/* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base.  */
+
+static void
+replace_ltrel_base (rtx *x)
+{
+  int i, j;
+  const char *fmt;
+
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
+    {
+      *x = XVECEXP (*x, 0, 1);
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (*x));
+  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          replace_ltrel_base (&XEXP (*x, i));
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (*x, i); j++)
+            replace_ltrel_base (&XVECEXP (*x, i, j));
+        }
+    }
+}
+
+
+/* We keep a list of constants which we have to add to internal
+   constant tables in the middle of large functions.  */
+
+#define NR_C_MODES 11
+enum machine_mode constant_modes[NR_C_MODES] =
+{
+  TFmode, TImode, TDmode,
+  DFmode, DImode, DDmode,
+  SFmode, SImode, SDmode,
+  HImode,
+  QImode
+};
+
+struct constant
+{
+  struct constant *next;
+  rtx value;
+  rtx label;
+};
+
+struct constant_pool
+{
+  struct constant_pool *next;
+  rtx first_insn;
+  rtx pool_insn;
+  bitmap insns;
+  rtx emit_pool_after;
+
+  struct constant *constants[NR_C_MODES];
+  struct constant *execute;
+  rtx label;
+  int size;
+};
+
+/* Allocate new constant_pool structure.  */
+
+static struct constant_pool *
+s390_alloc_pool (void)
+{
+  struct constant_pool *pool;
+  int i;
+
+  pool = (struct constant_pool *) xmalloc (sizeof *pool);
+  pool->next = NULL;
+  for (i = 0; i < NR_C_MODES; i++)
+    pool->constants[i] = NULL;
+
+  pool->execute = NULL;
+  pool->label = gen_label_rtx ();
+  pool->first_insn = NULL_RTX;
+  pool->pool_insn = NULL_RTX;
+  pool->insns = BITMAP_ALLOC (NULL);
+  pool->size = 0;
+  pool->emit_pool_after = NULL_RTX;
+
+  return pool;
+}
+
+/* Create new constant pool covering instructions starting at INSN
+   and chain it to the end of POOL_LIST.  */
+
+static struct constant_pool *
+s390_start_pool (struct constant_pool **pool_list, rtx insn)
+{
+  struct constant_pool *pool, **prev;
+
+  pool = s390_alloc_pool ();
+  pool->first_insn = insn;
+
+  for (prev = pool_list; *prev; prev = &(*prev)->next)
+    ;
+  *prev = pool;
+
+  return pool;
+}
+
+/* End range of instructions covered by POOL at INSN and emit
+   placeholder insn representing the pool.  */
+
+static void
+s390_end_pool (struct constant_pool *pool, rtx insn)
+{
+  rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
+
+  if (!insn)
+    insn = get_last_insn ();
+
+  pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
+  INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+}
+
+/* Add INSN to the list of insns covered by POOL.  */
+
+static void
+s390_add_pool_insn (struct constant_pool *pool, rtx insn)
+{
+  bitmap_set_bit (pool->insns, INSN_UID (insn));
+}
+
+/* Return pool out of POOL_LIST that covers INSN.  */
+
+static struct constant_pool *
+s390_find_pool (struct constant_pool *pool_list, rtx insn)
+{
+  struct constant_pool *pool;
+
+  for (pool = pool_list; pool; pool = pool->next)
+    if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
+      break;
+
+  return pool;
+}
+
+/* Add constant VAL of mode MODE to the constant pool POOL.  */
+
+static void
+s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
+{
+  struct constant *c;
+  int i;
+
+  for (i = 0; i < NR_C_MODES; i++)
+    if (constant_modes[i] == mode)
+      break;
+  gcc_assert (i != NR_C_MODES);
+
+  for (c = pool->constants[i]; c != NULL; c = c->next)
+    if (rtx_equal_p (val, c->value))
+      break;
+
+  if (c == NULL)
+    {
+      c = (struct constant *) xmalloc (sizeof *c);
+      c->value = val;
+      c->label = gen_label_rtx ();
+      c->next = pool->constants[i];
+      pool->constants[i] = c;
+      pool->size += GET_MODE_SIZE (mode);
+    }
+}
+
+/* Return an rtx that represents the offset of X from the start of
+   pool POOL.  */
+
+static rtx
+s390_pool_offset (struct constant_pool *pool, rtx x)
+{
+  rtx label;
+
+  label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
+  x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
+		      UNSPEC_POOL_OFFSET);
+  return gen_rtx_CONST (GET_MODE (x), x);
+}
+
+/* Find constant VAL of mode MODE in the constant pool POOL.
+   Return an RTX describing the distance from the start of
+   the pool to the location of the new constant.  */
+
+static rtx
+s390_find_constant (struct constant_pool *pool, rtx val,
+		    enum machine_mode mode)
+{
+  struct constant *c;
+  int i;
+
+  for (i = 0; i < NR_C_MODES; i++)
+    if (constant_modes[i] == mode)
+      break;
+  gcc_assert (i != NR_C_MODES);
+
+  for (c = pool->constants[i]; c != NULL; c = c->next)
+    if (rtx_equal_p (val, c->value))
+      break;
+
+  gcc_assert (c);
+
+  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
+}
+
+/* Check whether INSN is an execute.  Return the label_ref to its
+   execute target template if so, NULL_RTX otherwise.  */
+
+static rtx
+s390_execute_label (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn)
+      && GET_CODE (PATTERN (insn)) == PARALLEL
+      && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
+      && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
+    return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
+
+  return NULL_RTX;
+}
+
+/* Add execute target for INSN to the constant pool POOL.  */
+
+static void
+s390_add_execute (struct constant_pool *pool, rtx insn)
+{
+  struct constant *c;
+
+  for (c = pool->execute; c != NULL; c = c->next)
+    if (INSN_UID (insn) == INSN_UID (c->value))
+      break;
+
+  if (c == NULL)
+    {
+      c = (struct constant *) xmalloc (sizeof *c);
+      c->value = insn;
+      c->label = gen_label_rtx ();
+      c->next = pool->execute;
+      pool->execute = c;
+      pool->size += 6;
+    }
+}
+
+/* Find execute target for INSN in the constant pool POOL.
+   Return an RTX describing the distance from the start of
+   the pool to the location of the execute target.  */
+
+static rtx
+s390_find_execute (struct constant_pool *pool, rtx insn)
+{
+  struct constant *c;
+
+  for (c = pool->execute; c != NULL; c = c->next)
+    if (INSN_UID (insn) == INSN_UID (c->value))
+      break;
+
+  gcc_assert (c);
+
+  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
+}
+
+/* For an execute INSN, extract the execute target template.  */
+
+static rtx
+s390_execute_target (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  gcc_assert (s390_execute_label (insn));
+
+  if (XVECLEN (pattern, 0) == 2)
+    {
+      pattern = copy_rtx (XVECEXP (pattern, 0, 1));
+    }
+  else
+    {
+      rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
+      int i;
+
+      for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
+	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
+
+      pattern = gen_rtx_PARALLEL (VOIDmode, vec);
+    }
+
+  return pattern;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is the case for
+   execute insns that carry a unique label.  */
+
+static bool
+s390_cannot_copy_insn_p (rtx insn)
+{
+  rtx label = s390_execute_label (insn);
+  return label && label != const0_rtx;
+}
+
+/* Dump out the constants in POOL.  If REMOTE_LABEL is true,
+   do not emit the pool base label.  */
+
+static void
+s390_dump_pool (struct constant_pool *pool, bool remote_label)
+{
+  struct constant *c;
+  rtx insn = pool->pool_insn;
+  int i;
+
+  /* Switch to rodata section.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      insn = emit_insn_after (gen_pool_section_start (), insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  /* Ensure minimum pool alignment.  */
+  if (TARGET_CPU_ZARCH)
+    insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
+  else
+    insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+
+  /* Emit pool base label.  */
+  if (!remote_label)
+    {
+      insn = emit_label_after (pool->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  /* Dump constants in descending alignment requirement order,
+     ensuring proper alignment for every constant.  */
+  for (i = 0; i < NR_C_MODES; i++)
+    for (c = pool->constants[i]; c; c = c->next)
+      {
+	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
+	rtx value = copy_rtx (c->value);
+	if (GET_CODE (value) == CONST
+	    && GET_CODE (XEXP (value, 0)) == UNSPEC
+	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
+	    && XVECLEN (XEXP (value, 0), 0) == 1)
+	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
+
+	insn = emit_label_after (c->label, insn);
+	INSN_ADDRESSES_NEW (insn, -1);
+
+	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
+					 gen_rtvec (1, value),
+					 UNSPECV_POOL_ENTRY);
+	insn = emit_insn_after (value, insn);
+	INSN_ADDRESSES_NEW (insn, -1);
+      }
+
+  /* Ensure minimum alignment for instructions.  */
+  insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+
+  /* Output in-pool execute template insns.  */
+  for (c = pool->execute; c; c = c->next)
+    {
+      insn = emit_label_after (c->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      insn = emit_insn_after (s390_execute_target (c->value), insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  /* Switch back to previous section.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      insn = emit_insn_after (gen_pool_section_end (), insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  insn = emit_barrier_after (insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+
+  /* Remove placeholder insn.  */
+  remove_insn (pool->pool_insn);
+}
+
+/* Free all memory used by POOL.  */
+
+static void
+s390_free_pool (struct constant_pool *pool)
+{
+  struct constant *c, *next;
+  int i;
+
+  for (i = 0; i < NR_C_MODES; i++)
+    for (c = pool->constants[i]; c; c = next)
+      {
+	next = c->next;
+	free (c);
+      }
+
+  for (c = pool->execute; c; c = next)
+    {
+      next = c->next;
+      free (c);
+    }
+
+  BITMAP_FREE (pool->insns);
+  free (pool);
+}
+
+
+/* Collect main literal pool.  Return NULL on overflow.  */
+
+static struct constant_pool *
+s390_mainpool_start (void)
+{
+  struct constant_pool *pool;
+  rtx insn;
+
+  pool = s390_alloc_pool ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (NONJUMP_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
+	{
+	  /* There might be two main_pool instructions if base_reg
+	     is call-clobbered; one for shrink-wrapped code and one
+	     for the rest.  We want to keep the first.  */
+	  if (pool->pool_insn)
+	    {
+	      insn = PREV_INSN (insn);
+	      delete_insn (NEXT_INSN (insn));
+	      continue;
+	    }
+	  pool->pool_insn = insn;
+	}
+
+      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
+	{
+	  s390_add_execute (pool, insn);
+	}
+      else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+	{
+	  rtx pool_ref = NULL_RTX;
+	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
+	  if (pool_ref)
+	    {
+	      rtx constant = get_pool_constant (pool_ref);
+	      enum machine_mode mode = get_pool_mode (pool_ref);
+	      s390_add_constant (pool, constant, mode);
+	    }
+	}
+
+      /* If hot/cold partitioning is enabled we have to make sure that
+	 the literal pool is emitted in the same section where the
+	 initialization of the literal pool base pointer takes place.
+	 emit_pool_after is only used in the non-overflow case on non
+	 Z cpus where we can emit the literal pool at the end of the
+	 function body within the text section.  */
+      if (NOTE_P (insn)
+	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
+	  && !pool->emit_pool_after)
+	pool->emit_pool_after = PREV_INSN (insn);
+    }
+
+  gcc_assert (pool->pool_insn || pool->size == 0);
+
+  if (pool->size >= 4096)
+    {
+      /* We're going to chunkify the pool, so remove the main
+	 pool placeholder insn.  */
+      remove_insn (pool->pool_insn);
+
+      s390_free_pool (pool);
+      pool = NULL;
+    }
+
+  /* If the functions ends with the section where the literal pool
+     should be emitted set the marker to its end.  */
+  if (pool && !pool->emit_pool_after)
+    pool->emit_pool_after = get_last_insn ();
+
+  return pool;
+}
+
+/* POOL holds the main literal pool as collected by s390_mainpool_start.
+   Modify the current function to output the pool constants as well as
+   the pool register setup instruction.  */
+
+static void
+s390_mainpool_finish (struct constant_pool *pool)
+{
+  rtx base_reg = cfun->machine->base_reg;
+  rtx insn;
+
+  /* If the pool is empty, we're done.  */
+  if (pool->size == 0)
+    {
+      /* We don't actually need a base register after all.  */
+      cfun->machine->base_reg = NULL_RTX;
+
+      if (pool->pool_insn)
+	remove_insn (pool->pool_insn);
+      s390_free_pool (pool);
+      return;
+    }
+
+  /* We need correct insn addresses.  */
+  shorten_branches (get_insns ());
+
+  /* On zSeries, we use a LARL to load the pool register.  The pool is
+     located in the .rodata section, so we emit it after the function.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      insn = gen_main_base_64 (base_reg, pool->label);
+      insn = emit_insn_after (insn, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+      remove_insn (pool->pool_insn);
+
+      insn = get_last_insn ();
+      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+      s390_dump_pool (pool, 0);
+    }
+
+  /* On S/390, if the total size of the function's code plus literal pool
+     does not exceed 4096 bytes, we use BASR to set up a function base
+     pointer, and emit the literal pool at the end of the function.  */
+  else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
+	   + pool->size + 8 /* alignment slop */ < 4096)
+    {
+      insn = gen_main_base_31_small (base_reg, pool->label);
+      insn = emit_insn_after (insn, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+      remove_insn (pool->pool_insn);
+
+      insn = emit_label_after (pool->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      /* emit_pool_after will be set by s390_mainpool_start to the
+	 last insn of the section where the literal pool should be
+	 emitted.  */
+      insn = pool->emit_pool_after;
+
+      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+      s390_dump_pool (pool, 1);
+    }
+
+  /* Otherwise, we emit an inline literal pool and use BASR to branch
+     over it, setting up the pool register at the same time.  */
+  else
+    {
+      rtx pool_end = gen_label_rtx ();
+
+      insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
+      insn = emit_jump_insn_after (insn, pool->pool_insn);
+      JUMP_LABEL (insn) = pool_end;
+      INSN_ADDRESSES_NEW (insn, -1);
+      remove_insn (pool->pool_insn);
+
+      insn = emit_label_after (pool->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+      insn = emit_label_after (pool_end, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      s390_dump_pool (pool, 1);
+    }
+
+
+  /* Replace all literal pool references.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	replace_ltrel_base (&PATTERN (insn));
+
+      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+        {
+          rtx addr, pool_ref = NULL_RTX;
+          find_constant_pool_ref (PATTERN (insn), &pool_ref);
+          if (pool_ref)
+            {
+	      if (s390_execute_label (insn))
+		addr = s390_find_execute (pool, insn);
+	      else
+		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
+						 get_pool_mode (pool_ref));
+
+              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
+              INSN_CODE (insn) = -1;
+            }
+        }
+    }
+
+
+  /* Free the pool.  */
+  s390_free_pool (pool);
+}
+
+/* POOL holds the main literal pool as collected by s390_mainpool_start.
+   We have decided we cannot use this pool, so revert all changes
+   to the current function that were done by s390_mainpool_start.  */
+static void
+s390_mainpool_cancel (struct constant_pool *pool)
+{
+  /* We didn't actually change the instruction stream, so simply
+     free the pool memory.  */
+  s390_free_pool (pool);
+}
+
+
+/* Chunkify the literal pool.  */
+
+#define S390_POOL_CHUNK_MIN	0xc00
+#define S390_POOL_CHUNK_MAX	0xe00
+
+static struct constant_pool *
+s390_chunkify_start (void)
+{
+  struct constant_pool *curr_pool = NULL, *pool_list = NULL;
+  int extra_size = 0;
+  bitmap far_labels;
+  rtx pending_ltrel = NULL_RTX;
+  rtx insn;
+
+  rtx (*gen_reload_base) (rtx, rtx) =
+    TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
+
+
+  /* We need correct insn addresses.  */
+
+  shorten_branches (get_insns ());
+
+  /* Scan all insns and move literals to pool chunks.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      bool section_switch_p = false;
+
+      /* Check for pending LTREL_BASE.  */
+      if (INSN_P (insn))
+	{
+	  rtx ltrel_base = find_ltrel_base (PATTERN (insn));
+	  if (ltrel_base)
+	    {
+	      gcc_assert (ltrel_base == pending_ltrel);
+	      pending_ltrel = NULL_RTX;
+	    }
+	}
+
+      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
+	{
+	  if (!curr_pool)
+	    curr_pool = s390_start_pool (&pool_list, insn);
+
+	  s390_add_execute (curr_pool, insn);
+	  s390_add_pool_insn (curr_pool, insn);
+	}
+      else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+	{
+	  rtx pool_ref = NULL_RTX;
+	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
+	  if (pool_ref)
+	    {
+	      rtx constant = get_pool_constant (pool_ref);
+	      enum machine_mode mode = get_pool_mode (pool_ref);
+
+	      if (!curr_pool)
+		curr_pool = s390_start_pool (&pool_list, insn);
+
+	      s390_add_constant (curr_pool, constant, mode);
+	      s390_add_pool_insn (curr_pool, insn);
+
+	      /* Don't split the pool chunk between a LTREL_OFFSET load
+		 and the corresponding LTREL_BASE.  */
+	      if (GET_CODE (constant) == CONST
+		  && GET_CODE (XEXP (constant, 0)) == UNSPEC
+		  && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
+		{
+		  gcc_assert (!pending_ltrel);
+		  pending_ltrel = pool_ref;
+		}
+	    }
+	}
+
+      if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
+	{
+	  if (curr_pool)
+	    s390_add_pool_insn (curr_pool, insn);
+	  /* An LTREL_BASE must follow within the same basic block.  */
+	  gcc_assert (!pending_ltrel);
+	}
+
+      if (NOTE_P (insn))
+	switch (NOTE_KIND (insn))
+	  {
+	  case NOTE_INSN_SWITCH_TEXT_SECTIONS:
+	    section_switch_p = true;
+	    break;
+	  case NOTE_INSN_VAR_LOCATION:
+	  case NOTE_INSN_CALL_ARG_LOCATION:
+	    continue;
+	  default:
+	    break;
+	  }
+
+      if (!curr_pool
+	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
+          || INSN_ADDRESSES (INSN_UID (insn)) == -1)
+	continue;
+
+      if (TARGET_CPU_ZARCH)
+	{
+	  if (curr_pool->size < S390_POOL_CHUNK_MAX)
+	    continue;
+
+	  s390_end_pool (curr_pool, NULL_RTX);
+	  curr_pool = NULL;
+	}
+      else
+	{
+          int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
+			   - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
+			 + extra_size;
+
+	  /* We will later have to insert base register reload insns.
+	     Those will have an effect on code size, which we need to
+	     consider here.  This calculation makes rather pessimistic
+	     worst-case assumptions.  */
+	  if (LABEL_P (insn))
+	    extra_size += 6;
+
+	  if (chunk_size < S390_POOL_CHUNK_MIN
+	      && curr_pool->size < S390_POOL_CHUNK_MIN
+	      && !section_switch_p)
+	    continue;
+
+	  /* Pool chunks can only be inserted after BARRIERs ...  */
+	  if (BARRIER_P (insn))
+	    {
+	      s390_end_pool (curr_pool, insn);
+	      curr_pool = NULL;
+	      extra_size = 0;
+	    }
+
+	  /* ... so if we don't find one in time, create one.  */
+          else if (chunk_size > S390_POOL_CHUNK_MAX
+	           || curr_pool->size > S390_POOL_CHUNK_MAX
+		   || section_switch_p)
+	    {
+	      rtx label, jump, barrier, next, prev;
+
+	      if (!section_switch_p)
+		{
+		  /* We can insert the barrier only after a 'real' insn.  */
+		  if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
+		    continue;
+		  if (get_attr_length (insn) == 0)
+		    continue;
+		  /* Don't separate LTREL_BASE from the corresponding
+		     LTREL_OFFSET load.  */
+		  if (pending_ltrel)
+		    continue;
+		  next = insn;
+		  do
+		    {
+		      insn = next;
+		      next = NEXT_INSN (insn);
+		    }
+		  while (next
+			 && NOTE_P (next)
+			 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
+			     || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
+		}
+	      else
+		{
+		  gcc_assert (!pending_ltrel);
+
+		  /* The old pool has to end before the section switch
+		     note in order to make it part of the current
+		     section.  */
+		  insn = PREV_INSN (insn);
+		}
+
+	      label = gen_label_rtx ();
+	      prev = insn;
+	      if (prev && NOTE_P (prev))
+		prev = prev_nonnote_insn (prev);
+	      if (prev)
+		jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
+						    INSN_LOCATION (prev));
+	      else
+		jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
+	      barrier = emit_barrier_after (jump);
+	      insn = emit_label_after (label, barrier);
+	      JUMP_LABEL (jump) = label;
+	      LABEL_NUSES (label) = 1;
+
+	      INSN_ADDRESSES_NEW (jump, -1);
+	      INSN_ADDRESSES_NEW (barrier, -1);
+	      INSN_ADDRESSES_NEW (insn, -1);
+
+	      s390_end_pool (curr_pool, barrier);
+	      curr_pool = NULL;
+	      extra_size = 0;
+	    }
+	}
+    }
+
+  if (curr_pool)
+    s390_end_pool (curr_pool, NULL_RTX);
+  gcc_assert (!pending_ltrel);
+
+  /* Find all labels that are branched into
+     from an insn belonging to a different chunk.  */
+
+  far_labels = BITMAP_ALLOC (NULL);
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx table;
+
+      /* Labels marked with LABEL_PRESERVE_P can be target
+	 of non-local jumps, so we have to mark them.
+	 The same holds for named labels.
+
+	 Don't do that, however, if it is the label before
+	 a jump table.  */
+
+      if (LABEL_P (insn)
+	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
+	{
+	  rtx vec_insn = NEXT_INSN (insn);
+	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
+	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
+	}
+      /* Check potential targets in a table jump (casesi_jump).  */
+      else if (tablejump_p (insn, NULL, &table))
+	{
+	  rtx vec_pat = PATTERN (table);
+	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
+
+	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
+	    {
+	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
+
+	      if (s390_find_pool (pool_list, label)
+		  != s390_find_pool (pool_list, insn))
+		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
+	    }
+	}
+      /* If we have a direct jump (conditional or unconditional),
+	 check all potential targets.  */
+      else if (JUMP_P (insn))
+	{
+	  rtx pat = PATTERN (insn);
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+
+	  if (GET_CODE (pat) == SET)
+	    {
+	      rtx label = JUMP_LABEL (insn);
+	      if (label && !ANY_RETURN_P (label))
+		{
+		  if (s390_find_pool (pool_list, label)
+		      != s390_find_pool (pool_list, insn))
+		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
+		}
+	    }
+	}
+    }
+
+  /* Insert base register reload insns before every pool.  */
+
+  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
+    {
+      rtx new_insn = gen_reload_base (cfun->machine->base_reg,
+				      curr_pool->label);
+      rtx insn = curr_pool->first_insn;
+      INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
+    }
+
+  /* Insert base register reload insns at every far label.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (LABEL_P (insn)
+        && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
+      {
+	struct constant_pool *pool = s390_find_pool (pool_list, insn);
+	if (pool)
+	  {
+	    rtx new_insn = gen_reload_base (cfun->machine->base_reg,
+					    pool->label);
+	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
+	  }
+      }
+
+
+  BITMAP_FREE (far_labels);
+
+
+  /* Recompute insn addresses.  */
+
+  init_insn_lengths ();
+  shorten_branches (get_insns ());
+
+  return pool_list;
+}
+
+/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
+   After we have decided to use this list, finish implementing
+   all changes to the current function as required.  */
+
+static void
+s390_chunkify_finish (struct constant_pool *pool_list)
+{
+  struct constant_pool *curr_pool = NULL;
+  rtx insn;
+
+
+  /* Replace all literal pool references.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	replace_ltrel_base (&PATTERN (insn));
+
+      curr_pool = s390_find_pool (pool_list, insn);
+      if (!curr_pool)
+	continue;
+
+      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+        {
+          rtx addr, pool_ref = NULL_RTX;
+          find_constant_pool_ref (PATTERN (insn), &pool_ref);
+          if (pool_ref)
+            {
+	      if (s390_execute_label (insn))
+		addr = s390_find_execute (curr_pool, insn);
+	      else
+		addr = s390_find_constant (curr_pool,
+					   get_pool_constant (pool_ref),
+					   get_pool_mode (pool_ref));
+
+              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
+              INSN_CODE (insn) = -1;
+            }
+        }
+    }
+
+  /* Dump out all literal pools.  */
+
+  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
+    s390_dump_pool (curr_pool, 0);
+
+  /* Free pool list.  */
+
+  while (pool_list)
+    {
+      struct constant_pool *next = pool_list->next;
+      s390_free_pool (pool_list);
+      pool_list = next;
+    }
+}
+
+/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
+   We have decided we cannot use this list, so revert all changes
+   to the current function that were done by s390_chunkify_start.  */
+
+static void
+s390_chunkify_cancel (struct constant_pool *pool_list)
+{
+  struct constant_pool *curr_pool = NULL;
+  rtx insn;
+
+  /* Remove all pool placeholder insns.  */
+
+  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
+    {
+      /* Did we insert an extra barrier?  Remove it.  */
+      rtx barrier = PREV_INSN (curr_pool->pool_insn);
+      rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
+      rtx label = NEXT_INSN (curr_pool->pool_insn);
+
+      if (jump && JUMP_P (jump)
+	  && barrier && BARRIER_P (barrier)
+	  && label && LABEL_P (label)
+	  && GET_CODE (PATTERN (jump)) == SET
+	  && SET_DEST (PATTERN (jump)) == pc_rtx
+	  && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
+	  && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
+	{
+	  remove_insn (jump);
+	  remove_insn (barrier);
+	  remove_insn (label);
+	}
+
+      remove_insn (curr_pool->pool_insn);
+    }
+
+  /* Remove all base register reload insns.  */
+
+  for (insn = get_insns (); insn; )
+    {
+      rtx next_insn = NEXT_INSN (insn);
+
+      if (NONJUMP_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
+	remove_insn (insn);
+
+      insn = next_insn;
+    }
+
+  /* Free pool list.  */
+
+  while (pool_list)
+    {
+      struct constant_pool *next = pool_list->next;
+      s390_free_pool (pool_list);
+      pool_list = next;
+    }
+}
+
+/* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
+
+void
+s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
+{
+  REAL_VALUE_TYPE r;
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_FLOAT:
+    case MODE_DECIMAL_FLOAT:
+      gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
+      assemble_real (r, mode, align);
+      break;
+
+    case MODE_INT:
+      assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
+      mark_symbol_refs_as_used (exp);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Return an RTL expression representing the value of the return address
+   for the frame COUNT steps up from the current frame.  FRAME is the
+   frame pointer of that frame.  */
+
+rtx
+s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  int offset;
+  rtx addr;
+
+  /* Without backchain, we fail for all but the current frame.  */
+
+  if (!TARGET_BACKCHAIN && count > 0)
+    return NULL_RTX;
+
+  /* For the current frame, we need to make sure the initial
+     value of RETURN_REGNUM is actually saved.  */
+
+  if (count == 0)
+    {
+      /* On non-z architectures branch splitting could overwrite r14.  */
+      if (TARGET_CPU_ZARCH)
+	return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
+      else
+	{
+	  cfun_frame_layout.save_return_addr_p = true;
+	  return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
+	}
+    }
+
+  if (TARGET_PACKED_STACK)
+    offset = -2 * UNITS_PER_LONG;
+  else
+    offset = RETURN_REGNUM * UNITS_PER_LONG;
+
+  addr = plus_constant (Pmode, frame, offset);
+  addr = memory_address (Pmode, addr);
+  return gen_rtx_MEM (Pmode, addr);
+}
+
+/* Return an RTL expression representing the back chain stored in
+   the current stack frame.  */
+
+rtx
+s390_back_chain_rtx (void)
+{
+  rtx chain;
+
+  gcc_assert (TARGET_BACKCHAIN);
+
+  if (TARGET_PACKED_STACK)
+    chain = plus_constant (Pmode, stack_pointer_rtx,
+			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
+  else
+    chain = stack_pointer_rtx;
+
+  chain = gen_rtx_MEM (Pmode, chain);
+  return chain;
+}
+
+/* Find first call clobbered register unused in a function.
+   This could be used as base register in a leaf function
+   or for holding the return address before epilogue.  */
+
+static int
+find_unused_clobbered_reg (void)
+{
+  int i;
+  for (i = 0; i < 6; i++)
+    if (!df_regs_ever_live_p (i))
+      return i;
+  return 0;
+}
+
+
+/* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
+   clobbered hard regs in SETREG.  */
+
+static void
+s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
+{
+  char *regs_ever_clobbered = (char *)data;
+  unsigned int i, regno;
+  enum machine_mode mode = GET_MODE (setreg);
+
+  if (GET_CODE (setreg) == SUBREG)
+    {
+      rtx inner = SUBREG_REG (setreg);
+      if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
+	return;
+      regno = subreg_regno (setreg);
+    }
+  else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
+    regno = REGNO (setreg);
+  else
+    return;
+
+  for (i = regno;
+       i < regno + HARD_REGNO_NREGS (regno, mode);
+       i++)
+    regs_ever_clobbered[i] = 1;
+}
+
+/* Walks through all basic blocks of the current function looking
+   for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
+   of the passed integer array REGS_EVER_CLOBBERED are set to one for
+   each of those regs.  */
+
+static void
+s390_regs_ever_clobbered (char regs_ever_clobbered[])
+{
+  basic_block cur_bb;
+  rtx cur_insn;
+  unsigned int i;
+
+  memset (regs_ever_clobbered, 0, 32);
+
+  /* For non-leaf functions we have to consider all call clobbered regs to be
+     clobbered.  */
+  if (!crtl->is_leaf)
+    {
+      for (i = 0; i < 32; i++)
+	regs_ever_clobbered[i] = call_really_used_regs[i];
+    }
+
+  /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
+     this work is done by liveness analysis (mark_regs_live_at_end).
+     Special care is needed for functions containing landing pads.  Landing pads
+     may use the eh registers, but the code which sets these registers is not
+     contained in that function.  Hence s390_regs_ever_clobbered is not able to
+     deal with this automatically.  */
+  if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
+    for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
+      if (crtl->calls_eh_return
+	  || (cfun->machine->has_landing_pad_p
+	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
+	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
+
+  /* For nonlocal gotos all call-saved registers have to be saved.
+     This flag is also set for the unwinding code in libgcc.
+     See expand_builtin_unwind_init.  For regs_ever_live this is done by
+     reload.  */
+  if (crtl->saves_all_registers)
+    for (i = 0; i < 32; i++)
+      if (!call_really_used_regs[i])
+	regs_ever_clobbered[i] = 1;
+
+  FOR_EACH_BB_FN (cur_bb, cfun)
+    {
+      FOR_BB_INSNS (cur_bb, cur_insn)
+	{
+	  rtx pat;
+
+	  if (!INSN_P (cur_insn))
+	    continue;
+
+	  pat = PATTERN (cur_insn);
+
+	  /* Ignore GPR restore insns.  */
+	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
+	    {
+	      if (GET_CODE (pat) == SET
+		  && GENERAL_REG_P (SET_DEST (pat)))
+		{
+		  /* lgdr  */
+		  if (GET_MODE (SET_SRC (pat)) == DImode
+		      && FP_REG_P (SET_SRC (pat)))
+		    continue;
+
+		  /* l / lg  */
+		  if (GET_CODE (SET_SRC (pat)) == MEM)
+		    continue;
+		}
+
+	      /* lm / lmg */
+	      if (GET_CODE (pat) == PARALLEL
+		  && load_multiple_operation (pat, VOIDmode))
+		continue;
+	    }
+
+	  note_stores (pat,
+		       s390_reg_clobbered_rtx,
+		       regs_ever_clobbered);
+	}
+    }
+}
+
+/* Determine the frame area which actually has to be accessed
+   in the function epilogue. The values are stored at the
+   given pointers AREA_BOTTOM (address of the lowest used stack
+   address) and AREA_TOP (address of the first item which does
+   not belong to the stack frame).  */
+
+static void
+s390_frame_area (int *area_bottom, int *area_top)
+{
+  int b, t;
+
+  b = INT_MAX;
+  t = INT_MIN;
+
+  if (cfun_frame_layout.first_restore_gpr != -1)
+    {
+      b = (cfun_frame_layout.gprs_offset
+	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
+      t = b + (cfun_frame_layout.last_restore_gpr
+	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
+    }
+
+  if (TARGET_64BIT && cfun_save_high_fprs_p)
+    {
+      b = MIN (b, cfun_frame_layout.f8_offset);
+      t = MAX (t, (cfun_frame_layout.f8_offset
+		   + cfun_frame_layout.high_fprs * 8));
+    }
+
+  if (!TARGET_64BIT)
+    {
+      if (cfun_fpr_save_p (FPR4_REGNUM))
+	{
+	  b = MIN (b, cfun_frame_layout.f4_offset);
+	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
+	}
+      if (cfun_fpr_save_p (FPR6_REGNUM))
+	{
+	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
+	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
+	}
+    }
+  *area_bottom = b;
+  *area_top = t;
+}
+/* Update gpr_save_slots in the frame layout trying to make use of
+   FPRs as GPR save slots.
+   This is a helper routine of s390_register_info.  */
+
+static void
+s390_register_info_gprtofpr ()
+{
+  int save_reg_slot = FPR0_REGNUM;
+  int i, j;
+
+  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
+    return;
+
+  for (i = 15; i >= 6; i--)
+    {
+      if (cfun_gpr_save_slot (i) == 0)
+	continue;
+
+      /* Advance to the next FP register which can be used as a
+	 GPR save slot.  */
+      while ((!call_really_used_regs[save_reg_slot]
+	      || df_regs_ever_live_p (save_reg_slot)
+	      || cfun_fpr_save_p (save_reg_slot))
+	     && FP_REGNO_P (save_reg_slot))
+	save_reg_slot++;
+      if (!FP_REGNO_P (save_reg_slot))
+	{
+	  /* We only want to use ldgr/lgdr if we can get rid of
+	     stm/lm entirely.  So undo the gpr slot allocation in
+	     case we ran out of FPR save slots.  */
+	  for (j = 6; j <= 15; j++)
+	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
+	      cfun_gpr_save_slot (j) = -1;
+	  break;
+	}
+      cfun_gpr_save_slot (i) = save_reg_slot++;
+    }
+}
+
+/* Set the bits in fpr_bitmap for FPRs which need to be saved due to
+   stdarg.
+   This is a helper routine for s390_register_info.  */
+
+static void
+s390_register_info_stdarg_fpr ()
+{
+  int i;
+  int min_fpr;
+  int max_fpr;
+
+  /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
+     f0-f4 for 64 bit.  */
+  if (!cfun->stdarg
+      || !TARGET_HARD_FLOAT
+      || !cfun->va_list_fpr_size
+      || crtl->args.info.fprs >= FP_ARG_NUM_REG)
+    return;
+
+  min_fpr = crtl->args.info.fprs;
+  max_fpr = min_fpr + cfun->va_list_fpr_size;
+  if (max_fpr > FP_ARG_NUM_REG)
+    max_fpr = FP_ARG_NUM_REG;
+
+  for (i = min_fpr; i < max_fpr; i++)
+    cfun_set_fpr_save (i + FPR0_REGNUM);
+}
+
+/* Reserve the GPR save slots for GPRs which need to be saved due to
+   stdarg.
+   This is a helper routine for s390_register_info.  */
+
+static void
+s390_register_info_stdarg_gpr ()
+{
+  int i;
+  int min_gpr;
+  int max_gpr;
+
+  if (!cfun->stdarg
+      || !cfun->va_list_gpr_size
+      || crtl->args.info.gprs >= GP_ARG_NUM_REG)
+    return;
+
+  min_gpr = crtl->args.info.gprs;
+  max_gpr = min_gpr + cfun->va_list_gpr_size;
+  if (max_gpr > GP_ARG_NUM_REG)
+    max_gpr = GP_ARG_NUM_REG;
+
+  for (i = min_gpr; i < max_gpr; i++)
+    cfun_gpr_save_slot (2 + i) = -1;
+}
+
+/* The GPR and FPR save slots in cfun->machine->frame_layout are set
+   for registers which need to be saved in function prologue.
+   This function can be used until the insns emitted for save/restore
+   of the regs are visible in the RTL stream.  */
+
+static void
+s390_register_info ()
+{
+  int i, j;
+  char clobbered_regs[32];
+
+  gcc_assert (!epilogue_completed);
+
+  if (reload_completed)
+    /* After reload we rely on our own routine to determine which
+       registers need saving.  */
+    s390_regs_ever_clobbered (clobbered_regs);
+  else
+    /* During reload we use regs_ever_live as a base since reload
+       does changes in there which we otherwise would not be aware
+       of.  */
+    for (i = 0; i < 32; i++)
+      clobbered_regs[i] = df_regs_ever_live_p (i);
+
+  for (i = 0; i < 32; i++)
+    clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
+
+  /* Mark the call-saved FPRs which need to be saved.
+     This needs to be done before checking the special GPRs since the
+     stack pointer usage depends on whether high FPRs have to be saved
+     or not.  */
+  cfun_frame_layout.fpr_bitmap = 0;
+  cfun_frame_layout.high_fprs = 0;
+  for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
+    if (clobbered_regs[i] && !call_really_used_regs[i])
+      {
+	cfun_set_fpr_save (i);
+	if (i >= FPR8_REGNUM)
+	  cfun_frame_layout.high_fprs++;
+      }
+
+  if (flag_pic)
+    clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
+      |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+
+  clobbered_regs[BASE_REGNUM]
+    |= (cfun->machine->base_reg
+	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
+
+  clobbered_regs[HARD_FRAME_POINTER_REGNUM]
+    |= !!frame_pointer_needed;
+
+  /* On pre z900 machines this might take until machine dependent
+     reorg to decide.
+     save_return_addr_p will only be set on non-zarch machines so
+     there is no risk that r14 goes into an FPR instead of a stack
+     slot.  */
+  clobbered_regs[RETURN_REGNUM]
+    |= (!crtl->is_leaf
+	|| TARGET_TPF_PROFILING
+	|| cfun->machine->split_branches_pending_p
+	|| cfun_frame_layout.save_return_addr_p
+	|| crtl->calls_eh_return);
+
+  clobbered_regs[STACK_POINTER_REGNUM]
+    |= (!crtl->is_leaf
+	|| TARGET_TPF_PROFILING
+	|| cfun_save_high_fprs_p
+	|| get_frame_size () > 0
+	|| (reload_completed && cfun_frame_layout.frame_size > 0)
+	|| cfun->calls_alloca);
+
+  memset (cfun_frame_layout.gpr_save_slots, 0, 16);
+
+  for (i = 6; i < 16; i++)
+    if (clobbered_regs[i])
+      cfun_gpr_save_slot (i) = -1;
+
+  s390_register_info_stdarg_fpr ();
+  s390_register_info_gprtofpr ();
+
+  /* First find the range of GPRs to be restored.  Vararg regs don't
+     need to be restored so we do it before assigning slots to the
+     vararg GPRs.  */
+  for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
+  for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
+  cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
+  cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
+
+  /* stdarg functions might need to save GPRs 2 to 6.  This might
+     override the GPR->FPR save decision made above for r6 since
+     vararg regs must go to the stack.  */
+  s390_register_info_stdarg_gpr ();
+
+  /* Now the range of GPRs which need saving.  */
+  for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
+  for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
+  cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
+  cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
+}
+
+/* This function is called by s390_optimize_prologue in order to get
+   rid of unnecessary GPR save/restore instructions.  The register info
+   for the GPRs is re-computed and the ranges are re-calculated.  */
+
+static void
+s390_optimize_register_info ()
+{
+  char clobbered_regs[32];
+  int i, j;
+
+  gcc_assert (epilogue_completed);
+  gcc_assert (!cfun->machine->split_branches_pending_p);
+
+  s390_regs_ever_clobbered (clobbered_regs);
+
+  for (i = 0; i < 32; i++)
+    clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
+
+  /* There is still special treatment needed for cases invisible to
+     s390_regs_ever_clobbered.  */
+  clobbered_regs[RETURN_REGNUM]
+    |= (TARGET_TPF_PROFILING
+	/* When expanding builtin_return_addr in ESA mode we do not
+	   know whether r14 will later be needed as scratch reg when
+	   doing branch splitting.  So the builtin always accesses the
+	   r14 save slot and we need to stick to the save/restore
+	   decision for r14 even if it turns out that it didn't get
+	   clobbered.  */
+	|| cfun_frame_layout.save_return_addr_p
+	|| crtl->calls_eh_return);
+
+  memset (cfun_frame_layout.gpr_save_slots, 0, 6);
+
+  for (i = 6; i < 16; i++)
+    if (!clobbered_regs[i])
+      cfun_gpr_save_slot (i) = 0;
+
+  for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
+  for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
+  cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
+  cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
+
+  s390_register_info_stdarg_gpr ();
+
+  for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
+  for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
+  cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
+  cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
+}
+
+/* Fill cfun->machine with info about frame of current function.  */
+
+static void
+s390_frame_info (void)
+{
+  HOST_WIDE_INT lowest_offset;
+
+  cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
+  cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
+
+  /* The va_arg builtin uses a constant distance of 16 *
+     UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
+     pointer.  So even if we are going to save the stack pointer in an
+     FPR we need the stack space in order to keep the offsets
+     correct.  */
+  if (cfun->stdarg && cfun_save_arg_fprs_p)
+    {
+      cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
+
+      if (cfun_frame_layout.first_save_gpr_slot == -1)
+	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
+    }
+
+  cfun_frame_layout.frame_size = get_frame_size ();
+  if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
+    fatal_error ("total size of local variables exceeds architecture limit");
+
+  if (!TARGET_PACKED_STACK)
+    {
+      /* Fixed stack layout.  */
+      cfun_frame_layout.backchain_offset = 0;
+      cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
+      cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
+      cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
+      cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
+				       * UNITS_PER_LONG);
+    }
+  else if (TARGET_BACKCHAIN)
+    {
+      /* Kernel stack layout - packed stack, backchain, no float  */
+      gcc_assert (TARGET_SOFT_FLOAT);
+      cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
+					    - UNITS_PER_LONG);
+
+      /* The distance between the backchain and the return address
+	 save slot must not change.  So we always need a slot for the
+	 stack pointer which resides in between.  */
+      cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
+
+      cfun_frame_layout.gprs_offset
+	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
+
+      /* FPRs will not be saved.  Nevertheless pick sane values to
+	 keep area calculations valid.  */
+      cfun_frame_layout.f0_offset =
+	cfun_frame_layout.f4_offset =
+	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
+    }
+  else
+    {
+      int num_fprs;
+
+      /* Packed stack layout without backchain.  */
+
+      /* With stdarg FPRs need their dedicated slots.  */
+      num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
+		  : (cfun_fpr_save_p (FPR4_REGNUM) +
+		     cfun_fpr_save_p (FPR6_REGNUM)));
+      cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
+
+      num_fprs = (cfun->stdarg ? 2
+		  : (cfun_fpr_save_p (FPR0_REGNUM)
+		     + cfun_fpr_save_p (FPR2_REGNUM)));
+      cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
+
+      cfun_frame_layout.gprs_offset
+	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
+
+      cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
+				     - cfun_frame_layout.high_fprs * 8);
+    }
+
+  if (cfun_save_high_fprs_p)
+    cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
+
+  if (!crtl->is_leaf)
+    cfun_frame_layout.frame_size += crtl->outgoing_args_size;
+
+  /* In the following cases we have to allocate a STACK_POINTER_OFFSET
+     sized area at the bottom of the stack.  This is required also for
+     leaf functions.  When GCC generates a local stack reference it
+     will always add STACK_POINTER_OFFSET to all these references.  */
+  if (crtl->is_leaf
+      && !TARGET_TPF_PROFILING
+      && cfun_frame_layout.frame_size == 0
+      && !cfun->calls_alloca)
+    return;
+
+  /* Calculate the number of bytes we have used in our own register
+     save area.  With the packed stack layout we can re-use the
+     remaining bytes for normal stack elements.  */
+
+  if (TARGET_PACKED_STACK)
+    lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
+			      cfun_frame_layout.f4_offset),
+			 cfun_frame_layout.gprs_offset);
+  else
+    lowest_offset = 0;
+
+  if (TARGET_BACKCHAIN)
+    lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
+
+  cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
+
+  /* If under 31 bit an odd number of gprs has to be saved we have to
+     adjust the frame size to sustain 8 byte alignment of stack
+     frames.  */
+  cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
+				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
+				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
+}
+
+/* Generate frame layout.  Fills in register and frame data for the current
+   function in cfun->machine.  This routine can be called multiple times;
+   it will re-do the complete frame layout every time.  */
+
+static void
+s390_init_frame_layout (void)
+{
+  HOST_WIDE_INT frame_size;
+  int base_used;
+
+  gcc_assert (!reload_completed);
+
+  /* On S/390 machines, we may need to perform branch splitting, which
+     will require both base and return address register.  We have no
+     choice but to assume we're going to need them until right at the
+     end of the machine dependent reorg phase.  */
+  if (!TARGET_CPU_ZARCH)
+    cfun->machine->split_branches_pending_p = true;
+
+  do
+    {
+      frame_size = cfun_frame_layout.frame_size;
+
+      /* Try to predict whether we'll need the base register.  */
+      base_used = cfun->machine->split_branches_pending_p
+		  || crtl->uses_const_pool
+		  || (!DISP_IN_RANGE (frame_size)
+		      && !CONST_OK_FOR_K (frame_size));
+
+      /* Decide which register to use as literal pool base.  In small
+	 leaf functions, try to use an unused call-clobbered register
+	 as base register to avoid save/restore overhead.  */
+      if (!base_used)
+	cfun->machine->base_reg = NULL_RTX;
+      else if (crtl->is_leaf && !df_regs_ever_live_p (5))
+	cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
+      else
+	cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
+
+      s390_register_info ();
+      s390_frame_info ();
+    }
+  while (frame_size != cfun_frame_layout.frame_size);
+}
+
+/* Remove the FPR clobbers from a tbegin insn if it can be proven that
+   the TX is nonescaping.  A transaction is considered escaping if
+   there is at least one path from tbegin returning CC0 to the
+   function exit block without an tend.
+
+   The check so far has some limitations:
+   - only single tbegin/tend BBs are supported
+   - the first cond jump after tbegin must separate the CC0 path from ~CC0
+   - when CC is copied to a GPR and the CC0 check is done with the GPR
+     this is not supported
+*/
+
+static void
+s390_optimize_nonescaping_tx (void)
+{
+  const unsigned int CC0 = 1 << 3;
+  basic_block tbegin_bb = NULL;
+  basic_block tend_bb = NULL;
+  basic_block bb;
+  rtx insn;
+  bool result = true;
+  int bb_index;
+  rtx tbegin_insn = NULL_RTX;
+
+  if (!cfun->machine->tbegin_p)
+    return;
+
+  for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
+    {
+      bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
+
+      if (!bb)
+	continue;
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  rtx ite, cc, pat, target;
+	  unsigned HOST_WIDE_INT mask;
+
+	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+	    continue;
+
+	  pat = PATTERN (insn);
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+
+	  if (GET_CODE (pat) != SET
+	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
+	    continue;
+
+	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
+	    {
+	      rtx tmp;
+
+	      tbegin_insn = insn;
+
+	      /* Just return if the tbegin doesn't have clobbers.  */
+	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
+		return;
+
+	      if (tbegin_bb != NULL)
+		return;
+
+	      /* Find the next conditional jump.  */
+	      for (tmp = NEXT_INSN (insn);
+		   tmp != NULL_RTX;
+		   tmp = NEXT_INSN (tmp))
+		{
+		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
+		    return;
+		  if (!JUMP_P (tmp))
+		    continue;
+
+		  ite = SET_SRC (PATTERN (tmp));
+		  if (GET_CODE (ite) != IF_THEN_ELSE)
+		    continue;
+
+		  cc = XEXP (XEXP (ite, 0), 0);
+		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
+		      || GET_MODE (cc) != CCRAWmode
+		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
+		    return;
+
+		  if (bb->succs->length () != 2)
+		    return;
+
+		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
+		  if (GET_CODE (XEXP (ite, 0)) == NE)
+		    mask ^= 0xf;
+
+		  if (mask == CC0)
+		    target = XEXP (ite, 1);
+		  else if (mask == (CC0 ^ 0xf))
+		    target = XEXP (ite, 2);
+		  else
+		    return;
+
+		  {
+		    edge_iterator ei;
+		    edge e1, e2;
+
+		    ei = ei_start (bb->succs);
+		    e1 = ei_safe_edge (ei);
+		    ei_next (&ei);
+		    e2 = ei_safe_edge (ei);
+
+		    if (e2->flags & EDGE_FALLTHRU)
+		      {
+			e2 = e1;
+			e1 = ei_safe_edge (ei);
+		      }
+
+		    if (!(e1->flags & EDGE_FALLTHRU))
+		      return;
+
+		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
+		  }
+		  if (tmp == BB_END (bb))
+		    break;
+		}
+	    }
+
+	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
+	    {
+	      if (tend_bb != NULL)
+		return;
+	      tend_bb = bb;
+	    }
+	}
+    }
+
+  /* Either we successfully remove the FPR clobbers here or we are not
+     able to do anything for this TX.  Both cases don't qualify for
+     another look.  */
+  cfun->machine->tbegin_p = false;
+
+  if (tbegin_bb == NULL || tend_bb == NULL)
+    return;
+
+  calculate_dominance_info (CDI_POST_DOMINATORS);
+  result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
+  free_dominance_info (CDI_POST_DOMINATORS);
+
+  if (!result)
+    return;
+
+  PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
+			    gen_rtvec (2,
+				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
+				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
+  INSN_CODE (tbegin_insn) = -1;
+  df_insn_rescan (tbegin_insn);
+
+  return;
+}
+
+/* Return true if it is legal to put a value with MODE into REGNO.  */
+
+bool
+s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  switch (REGNO_REG_CLASS (regno))
+    {
+    case FP_REGS:
+      if (REGNO_PAIR_OK (regno, mode))
+	{
+	  if (mode == SImode || mode == DImode)
+	    return true;
+
+	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+	    return true;
+	}
+      break;
+    case ADDR_REGS:
+      if (FRAME_REGNO_P (regno) && mode == Pmode)
+	return true;
+
+      /* fallthrough */
+    case GENERAL_REGS:
+      if (REGNO_PAIR_OK (regno, mode))
+	{
+	  if (TARGET_ZARCH
+	      || (mode != TFmode && mode != TCmode && mode != TDmode))
+	    return true;
+	}
+      break;
+    case CC_REGS:
+      if (GET_MODE_CLASS (mode) == MODE_CC)
+	return true;
+      break;
+    case ACCESS_REGS:
+      if (REGNO_PAIR_OK (regno, mode))
+	{
+	  if (mode == SImode || mode == Pmode)
+	    return true;
+	}
+      break;
+    default:
+      return false;
+    }
+
+  return false;
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+bool
+s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
+{
+   /* Once we've decided upon a register to use as base register, it must
+      no longer be used for any other purpose.  */
+  if (cfun->machine->base_reg)
+    if (REGNO (cfun->machine->base_reg) == old_reg
+	|| REGNO (cfun->machine->base_reg) == new_reg)
+      return false;
+
+  /* Prevent regrename from using call-saved regs which haven't
+     actually been saved.  This is necessary since regrename assumes
+     the backend save/restore decisions are based on
+     df_regs_ever_live.  Since we have our own routine we have to tell
+     regrename manually about it.  */
+  if (GENERAL_REGNO_P (new_reg)
+      && !call_really_used_regs[new_reg]
+      && cfun_gpr_save_slot (new_reg) == 0)
+    return false;
+
+  return true;
+}
+
+/* Return nonzero if register REGNO can be used as a scratch register
+   in peephole2.  */
+
+static bool
+s390_hard_regno_scratch_ok (unsigned int regno)
+{
+  /* See s390_hard_regno_rename_ok.  */
+  if (GENERAL_REGNO_P (regno)
+      && !call_really_used_regs[regno]
+      && cfun_gpr_save_slot (regno) == 0)
+    return false;
+
+  return true;
+}
+
+/* Maximum number of registers to represent a value of mode MODE
+   in a register of class RCLASS.  */
+
+int
+s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+{
+  switch (rclass)
+    {
+    case FP_REGS:
+      if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
+      else
+	return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
+    case ACCESS_REGS:
+      return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
+    default:
+      break;
+    }
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+s390_lra_p (void)
+{
+  return s390_lra_flag;
+}
+
+/* Return true if register FROM can be eliminated via register TO.  */
+
+static bool
+s390_can_eliminate (const int from, const int to)
+{
+  /* On zSeries machines, we have not marked the base register as fixed.
+     Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
+     If a function requires the base register, we say here that this
+     elimination cannot be performed.  This will cause reload to free
+     up the base register (as if it were fixed).  On the other hand,
+     if the current function does *not* require the base register, we
+     say here the elimination succeeds, which in turn allows reload
+     to allocate the base register for any other purpose.  */
+  if (from == BASE_REGNUM && to == BASE_REGNUM)
+    {
+      if (TARGET_CPU_ZARCH)
+	{
+	  s390_init_frame_layout ();
+	  return cfun->machine->base_reg == NULL_RTX;
+	}
+
+      return false;
+    }
+
+  /* Everything else must point into the stack frame.  */
+  gcc_assert (to == STACK_POINTER_REGNUM
+	      || to == HARD_FRAME_POINTER_REGNUM);
+
+  gcc_assert (from == FRAME_POINTER_REGNUM
+	      || from == ARG_POINTER_REGNUM
+	      || from == RETURN_ADDRESS_POINTER_REGNUM);
+
+  /* Make sure we actually saved the return address.  */
+  if (from == RETURN_ADDRESS_POINTER_REGNUM)
+    if (!crtl->calls_eh_return
+	&& !cfun->stdarg
+	&& !cfun_frame_layout.save_return_addr_p)
+      return false;
+
+  return true;
+}
+
+/* Return offset between register FROM and TO initially after prolog.  */
+
+HOST_WIDE_INT
+s390_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  /* ??? Why are we called for non-eliminable pairs?  */
+  if (!s390_can_eliminate (from, to))
+    return 0;
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      offset = (get_frame_size()
+		+ STACK_POINTER_OFFSET
+		+ crtl->outgoing_args_size);
+      break;
+
+    case ARG_POINTER_REGNUM:
+      s390_init_frame_layout ();
+      offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
+      break;
+
+    case RETURN_ADDRESS_POINTER_REGNUM:
+      s390_init_frame_layout ();
+
+      if (cfun_frame_layout.first_save_gpr_slot == -1)
+	{
+	  /* If it turns out that for stdarg nothing went into the reg
+	     save area we also do not need the return address
+	     pointer.  */
+	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
+	    return 0;
+
+	  gcc_unreachable ();
+	}
+
+      /* In order to make the following work it is not necessary for
+	 r14 to have a save slot.  It is sufficient if one other GPR
+	 got one.  Since the GPRs are always stored without gaps we
+	 are able to calculate where the r14 save slot would
+	 reside.  */
+      offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
+		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
+		UNITS_PER_LONG);
+      break;
+
+    case BASE_REGNUM:
+      offset = 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+/* Emit insn to save fpr REGNUM at offset OFFSET relative
+   to register BASE.  Return generated insn.  */
+
+static rtx
+save_fpr (rtx base, int offset, int regnum)
+{
+  rtx addr;
+  addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
+
+  if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
+    set_mem_alias_set (addr, get_varargs_alias_set ());
+  else
+    set_mem_alias_set (addr, get_frame_alias_set ());
+
+  return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
+}
+
+/* Emit insn to restore fpr REGNUM from offset OFFSET relative
+   to register BASE.  Return generated insn.  */
+
+static rtx
+restore_fpr (rtx base, int offset, int regnum)
+{
+  rtx addr;
+  addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
+  set_mem_alias_set (addr, get_frame_alias_set ());
+
+  return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
+}
+
+/* Return true if REGNO is a global register, but not one
+   of the special ones that need to be saved/restored in anyway.  */
+
+static inline bool
+global_not_special_regno_p (int regno)
+{
+  return (global_regs[regno]
+	  /* These registers are special and need to be
+	     restored in any case.  */
+	  && !(regno == STACK_POINTER_REGNUM
+	       || regno == RETURN_REGNUM
+	       || regno == BASE_REGNUM
+	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
+}
+
+/* Generate insn to save registers FIRST to LAST into
+   the register save area located at offset OFFSET
+   relative to register BASE.  */
+
+static rtx
+save_gprs (rtx base, int offset, int first, int last)
+{
+  rtx addr, insn, note;
+  int i;
+
+  addr = plus_constant (Pmode, base, offset);
+  addr = gen_rtx_MEM (Pmode, addr);
+
+  set_mem_alias_set (addr, get_frame_alias_set ());
+
+  /* Special-case single register.  */
+  if (first == last)
+    {
+      if (TARGET_64BIT)
+        insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
+      else
+        insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
+
+      if (!global_not_special_regno_p (first))
+	RTX_FRAME_RELATED_P (insn) = 1;
+      return insn;
+    }
+
+
+  insn = gen_store_multiple (addr,
+			     gen_rtx_REG (Pmode, first),
+			     GEN_INT (last - first + 1));
+
+  if (first <= 6 && cfun->stdarg)
+    for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+      {
+	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
+
+	if (first + i <= 6)
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+      }
+
+  /* We need to set the FRAME_RELATED flag on all SETs
+     inside the store-multiple pattern.
+
+     However, we must not emit DWARF records for registers 2..5
+     if they are stored for use by variable arguments ...
+
+     ??? Unfortunately, it is not enough to simply not the
+     FRAME_RELATED flags for those SETs, because the first SET
+     of the PARALLEL is always treated as if it had the flag
+     set, even if it does not.  Therefore we emit a new pattern
+     without those registers as REG_FRAME_RELATED_EXPR note.  */
+
+  if (first >= 6 && !global_not_special_regno_p (first))
+    {
+      rtx pat = PATTERN (insn);
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
+								     0, i)))))
+	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else if (last >= 6)
+    {
+      int start;
+
+      for (start = first >= 6 ? first : 6; start <= last; start++)
+	if (!global_not_special_regno_p (start))
+	  break;
+
+      if (start > last)
+	return insn;
+
+      addr = plus_constant (Pmode, base,
+			    offset + (start - first) * UNITS_PER_LONG);
+
+      if (start == last)
+	{
+	  if (TARGET_64BIT)
+	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
+			      gen_rtx_REG (Pmode, start));
+	  else
+	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
+			      gen_rtx_REG (Pmode, start));
+	  note = PATTERN (note);
+
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  return insn;
+	}
+
+      note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
+				 gen_rtx_REG (Pmode, start),
+				 GEN_INT (last - start + 1));
+      note = PATTERN (note);
+
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
+
+      for (i = 0; i < XVECLEN (note, 0); i++)
+	if (GET_CODE (XVECEXP (note, 0, i)) == SET
+	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
+								     0, i)))))
+	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  return insn;
+}
+
+/* Generate insn to restore registers FIRST to LAST from
+   the register save area located at offset OFFSET
+   relative to register BASE.  */
+
+static rtx
+restore_gprs (rtx base, int offset, int first, int last)
+{
+  rtx addr, insn;
+
+  addr = plus_constant (Pmode, base, offset);
+  addr = gen_rtx_MEM (Pmode, addr);
+  set_mem_alias_set (addr, get_frame_alias_set ());
+
+  /* Special-case single register.  */
+  if (first == last)
+    {
+      if (TARGET_64BIT)
+        insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
+      else
+        insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      return insn;
+    }
+
+  insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
+			    addr,
+			    GEN_INT (last - first + 1));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  return insn;
+}
+
+/* Return insn sequence to load the GOT register.  */
+
+static GTY(()) rtx got_symbol;
+rtx
+s390_load_got (void)
+{
+  rtx insns;
+
+  /* We cannot use pic_offset_table_rtx here since we use this
+     function also for non-pic if __tls_get_offset is called and in
+     that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
+     aren't usable.  */
+  rtx got_rtx = gen_rtx_REG (Pmode, 12);
+
+  if (!got_symbol)
+    {
+      got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+      SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
+    }
+
+  start_sequence ();
+
+  if (TARGET_CPU_ZARCH)
+    {
+      emit_move_insn (got_rtx, got_symbol);
+    }
+  else
+    {
+      rtx offset;
+
+      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
+			       UNSPEC_LTREL_OFFSET);
+      offset = gen_rtx_CONST (Pmode, offset);
+      offset = force_const_mem (Pmode, offset);
+
+      emit_move_insn (got_rtx, offset);
+
+      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
+			       UNSPEC_LTREL_BASE);
+      offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
+
+      emit_move_insn (got_rtx, offset);
+    }
+
+  insns = get_insns ();
+  end_sequence ();
+  return insns;
+}
+
+/* This ties together stack memory (MEM with an alias set of frame_alias_set)
+   and the change to the stack pointer.  */
+
+static void
+s390_emit_stack_tie (void)
+{
+  rtx mem = gen_frame_mem (BLKmode,
+			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
+
+  emit_insn (gen_stack_tie (mem));
+}
+
+/* Copy GPRS into FPR save slots.  */
+
+static void
+s390_save_gprs_to_fprs (void)
+{
+  int i;
+
+  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
+    return;
+
+  for (i = 6; i < 16; i++)
+    {
+      if (FP_REGNO_P (cfun_gpr_save_slot (i)))
+	{
+	  rtx insn =
+	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
+			    gen_rtx_REG (DImode, i));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Restore GPRs from FPR save slots.  */
+
+static void
+s390_restore_gprs_from_fprs (void)
+{
+  int i;
+
+  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
+    return;
+
+  for (i = 6; i < 16; i++)
+    {
+      if (FP_REGNO_P (cfun_gpr_save_slot (i)))
+	{
+	  rtx insn =
+	    emit_move_insn (gen_rtx_REG (DImode, i),
+			    gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
+	  df_set_regs_ever_live (i, true);
+	  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
+	  if (i == STACK_POINTER_REGNUM)
+	    add_reg_note (insn, REG_CFA_DEF_CFA,
+			  plus_constant (Pmode, stack_pointer_rtx,
+					 STACK_POINTER_OFFSET));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+
+/* A pass run immediately before shrink-wrapping and prologue and epilogue
+   generation.  */
+
+static unsigned int
+s390_early_mach (void)
+{
+  rtx insn;
+
+  /* Try to get rid of the FPR clobbers.  */
+  s390_optimize_nonescaping_tx ();
+
+  /* Re-compute register info.  */
+  s390_register_info ();
+
+  /* If we're using a base register, ensure that it is always valid for
+     the first non-prologue instruction.  */
+  if (cfun->machine->base_reg)
+    emit_insn_at_entry (gen_main_pool (cfun->machine->base_reg));
+
+  /* Annotate all constant pool references to let the scheduler know
+     they implicitly use the base register.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	annotate_constant_pool_refs (&PATTERN (insn));
+	df_insn_rescan (insn);
+      }
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_s390_early_mach =
+{
+  RTL_PASS, /* type */
+  "early_mach", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_df_verify | TODO_df_finish
+    | TODO_verify_rtl_sharing ), /* todo_flags_finish */
+};
+
+class pass_s390_early_mach : public rtl_opt_pass
+{
+public:
+  pass_s390_early_mach (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return s390_early_mach (); }
+
+}; // class pass_s390_early_mach
+
+} // anon namespace
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+s390_emit_prologue (void)
+{
+  rtx insn, addr;
+  rtx temp_reg;
+  int i;
+  int offset;
+  int next_fpr = 0;
+
+  /* Choose best register to use for temp use within prologue.
+     See below for why TPF must use the register 1.  */
+
+  if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
+      && !crtl->is_leaf
+      && !TARGET_TPF_PROFILING)
+    temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  else
+    temp_reg = gen_rtx_REG (Pmode, 1);
+
+  s390_save_gprs_to_fprs ();
+
+  /* Save call saved gprs.  */
+  if (cfun_frame_layout.first_save_gpr != -1)
+    {
+      insn = save_gprs (stack_pointer_rtx,
+			cfun_frame_layout.gprs_offset +
+			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
+					  - cfun_frame_layout.first_save_gpr_slot),
+			cfun_frame_layout.first_save_gpr,
+			cfun_frame_layout.last_save_gpr);
+      emit_insn (insn);
+    }
+
+  /* Dummy insn to mark literal pool slot.  */
+
+  if (cfun->machine->base_reg)
+    emit_insn (gen_main_pool (cfun->machine->base_reg));
+
+  offset = cfun_frame_layout.f0_offset;
+
+  /* Save f0 and f2.  */
+  for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
+    {
+      if (cfun_fpr_save_p (i))
+	{
+	  save_fpr (stack_pointer_rtx, offset, i);
+	  offset += 8;
+	}
+      else if (!TARGET_PACKED_STACK || cfun->stdarg)
+	offset += 8;
+    }
+
+  /* Save f4 and f6.  */
+  offset = cfun_frame_layout.f4_offset;
+  for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
+    {
+      if (cfun_fpr_save_p (i))
+	{
+	  insn = save_fpr (stack_pointer_rtx, offset, i);
+	  offset += 8;
+
+	  /* If f4 and f6 are call clobbered they are saved due to
+	     stdargs and therefore are not frame related.  */
+	  if (!call_really_used_regs[i])
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
+	offset += 8;
+    }
+
+  if (TARGET_PACKED_STACK
+      && cfun_save_high_fprs_p
+      && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
+    {
+      offset = (cfun_frame_layout.f8_offset
+		+ (cfun_frame_layout.high_fprs - 1) * 8);
+
+      for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
+	if (cfun_fpr_save_p (i))
+	  {
+	    insn = save_fpr (stack_pointer_rtx, offset, i);
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    offset -= 8;
+	  }
+      if (offset >= cfun_frame_layout.f8_offset)
+	next_fpr = i;
+    }
+
+  if (!TARGET_PACKED_STACK)
+    next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = cfun_frame_layout.frame_size;
+
+  /* Decrement stack pointer.  */
+
+  if (cfun_frame_layout.frame_size > 0)
+    {
+      rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+      rtx real_frame_off;
+
+      if (s390_stack_size)
+  	{
+	  HOST_WIDE_INT stack_guard;
+
+	  if (s390_stack_guard)
+	    stack_guard = s390_stack_guard;
+	  else
+	    {
+	      /* If no value for stack guard is provided the smallest power of 2
+		 larger than the current frame size is chosen.  */
+	      stack_guard = 1;
+	      while (stack_guard < cfun_frame_layout.frame_size)
+		stack_guard <<= 1;
+	    }
+
+	  if (cfun_frame_layout.frame_size >= s390_stack_size)
+	    {
+	      warning (0, "frame size of function %qs is %wd"
+		       " bytes exceeding user provided stack limit of "
+		       "%d bytes.  "
+		       "An unconditional trap is added.",
+		       current_function_name(), cfun_frame_layout.frame_size,
+		       s390_stack_size);
+	      emit_insn (gen_trap ());
+	    }
+	  else
+	    {
+	      /* stack_guard has to be smaller than s390_stack_size.
+		 Otherwise we would emit an AND with zero which would
+		 not match the test under mask pattern.  */
+	      if (stack_guard >= s390_stack_size)
+		{
+		  warning (0, "frame size of function %qs is %wd"
+			   " bytes which is more than half the stack size. "
+			   "The dynamic check would not be reliable. "
+			   "No check emitted for this function.",
+			   current_function_name(),
+			   cfun_frame_layout.frame_size);
+		}
+	      else
+		{
+		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
+						    & ~(stack_guard - 1));
+
+		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
+				       GEN_INT (stack_check_mask));
+		  if (TARGET_64BIT)
+		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
+							 t, const0_rtx),
+					     t, const0_rtx, const0_rtx));
+		  else
+		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
+							 t, const0_rtx),
+					     t, const0_rtx, const0_rtx));
+		}
+	    }
+  	}
+
+      if (s390_warn_framesize > 0
+	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
+	warning (0, "frame size of %qs is %wd bytes",
+		 current_function_name (), cfun_frame_layout.frame_size);
+
+      if (s390_warn_dynamicstack_p && cfun->calls_alloca)
+	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
+
+      /* Save incoming stack pointer into temp reg.  */
+      if (TARGET_BACKCHAIN || next_fpr)
+	insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
+
+      /* Subtract frame size from stack pointer.  */
+
+      if (DISP_IN_RANGE (INTVAL (frame_off)))
+	{
+	  insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    frame_off));
+	  insn = emit_insn (insn);
+	}
+      else
+	{
+	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
+	    frame_off = force_const_mem (Pmode, frame_off);
+
+          insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
+	  annotate_constant_pool_refs (&PATTERN (insn));
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					       real_frame_off)));
+
+      /* Set backchain.  */
+
+      if (TARGET_BACKCHAIN)
+	{
+	  if (cfun_frame_layout.backchain_offset)
+	    addr = gen_rtx_MEM (Pmode,
+				plus_constant (Pmode, stack_pointer_rtx,
+				  cfun_frame_layout.backchain_offset));
+	  else
+	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+	  set_mem_alias_set (addr, get_frame_alias_set ());
+	  insn = emit_insn (gen_move_insn (addr, temp_reg));
+	}
+
+      /* If we support non-call exceptions (e.g. for Java),
+	 we need to make sure the backchain pointer is set up
+	 before any possibly trapping memory access.  */
+      if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
+	{
+	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+	  emit_clobber (addr);
+	}
+    }
+
+  /* Save fprs 8 - 15 (64 bit ABI).  */
+
+  if (cfun_save_high_fprs_p && next_fpr)
+    {
+      /* If the stack might be accessed through a different register
+	 we have to make sure that the stack pointer decrement is not
+	 moved below the use of the stack slots.  */
+      s390_emit_stack_tie ();
+
+      insn = emit_insn (gen_add2_insn (temp_reg,
+				       GEN_INT (cfun_frame_layout.f8_offset)));
+
+      offset = 0;
+
+      for (i = FPR8_REGNUM; i <= next_fpr; i++)
+	if (cfun_fpr_save_p (i))
+	  {
+	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
+				      cfun_frame_layout.frame_size
+				      + cfun_frame_layout.f8_offset
+				      + offset);
+
+	    insn = save_fpr (temp_reg, offset, i);
+	    offset += 8;
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode,
+				       gen_rtx_MEM (DFmode, addr),
+				       gen_rtx_REG (DFmode, i)));
+	  }
+    }
+
+  /* Set frame pointer, if needed.  */
+
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Set up got pointer, if needed.  */
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    {
+      rtx insns = s390_load_got ();
+
+      for (insn = insns; insn; insn = NEXT_INSN (insn))
+	annotate_constant_pool_refs (&PATTERN (insn));
+
+      emit_insn (insns);
+    }
+
+  if (TARGET_TPF_PROFILING)
+    {
+      /* Generate a BAS instruction to serve as a function
+	 entry intercept to facilitate the use of tracing
+	 algorithms located at the branch target.  */
+      emit_insn (gen_prologue_tpf ());
+
+      /* Emit a blockage here so that all code
+	 lies between the profiling mechanisms.  */
+      emit_insn (gen_blockage ());
+    }
+}
+
+/* Expand the epilogue into a bunch of separate insns.  */
+
+void
+s390_emit_epilogue (bool sibcall)
+{
+  rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
+  int area_bottom, area_top, offset = 0;
+  int next_offset;
+  rtvec p;
+  int i;
+
+  if (TARGET_TPF_PROFILING)
+    {
+
+      /* Generate a BAS instruction to serve as a function
+	 entry intercept to facilitate the use of tracing
+	 algorithms located at the branch target.  */
+
+      /* Emit a blockage here so that all code
+         lies between the profiling mechanisms.  */
+      emit_insn (gen_blockage ());
+
+      emit_insn (gen_epilogue_tpf ());
+    }
+
+  /* Check whether to use frame or stack pointer for restore.  */
+
+  frame_pointer = (frame_pointer_needed
+		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
+
+  s390_frame_area (&area_bottom, &area_top);
+
+  /* Check whether we can access the register save area.
+     If not, increment the frame pointer as required.  */
+
+  if (area_top <= area_bottom)
+    {
+      /* Nothing to restore.  */
+    }
+  else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
+           && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
+    {
+      /* Area is in range.  */
+      offset = cfun_frame_layout.frame_size;
+    }
+  else
+    {
+      rtx insn, frame_off, cfa;
+
+      offset = area_bottom < 0 ? -area_bottom : 0;
+      frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
+
+      cfa = gen_rtx_SET (VOIDmode, frame_pointer,
+			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
+      if (DISP_IN_RANGE (INTVAL (frame_off)))
+	{
+	  insn = gen_rtx_SET (VOIDmode, frame_pointer,
+			      gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
+	  insn = emit_insn (insn);
+	}
+      else
+	{
+	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
+	    frame_off = force_const_mem (Pmode, frame_off);
+
+	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
+	  annotate_constant_pool_refs (&PATTERN (insn));
+	}
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Restore call saved fprs.  */
+
+  if (TARGET_64BIT)
+    {
+      if (cfun_save_high_fprs_p)
+	{
+	  next_offset = cfun_frame_layout.f8_offset;
+	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
+	    {
+	      if (cfun_fpr_save_p (i))
+		{
+		  restore_fpr (frame_pointer,
+			       offset + next_offset, i);
+		  cfa_restores
+		    = alloc_reg_note (REG_CFA_RESTORE,
+				      gen_rtx_REG (DFmode, i), cfa_restores);
+		  next_offset += 8;
+		}
+	    }
+	}
+
+    }
+  else
+    {
+      next_offset = cfun_frame_layout.f4_offset;
+      /* f4, f6 */
+      for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
+	{
+	  if (cfun_fpr_save_p (i))
+	    {
+	      restore_fpr (frame_pointer,
+			   offset + next_offset, i);
+	      cfa_restores
+		= alloc_reg_note (REG_CFA_RESTORE,
+				  gen_rtx_REG (DFmode, i), cfa_restores);
+	      next_offset += 8;
+	    }
+	  else if (!TARGET_PACKED_STACK)
+	    next_offset += 8;
+	}
+
+    }
+
+  /* Return register.  */
+
+  return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+
+  /* Restore call saved gprs.  */
+
+  if (cfun_frame_layout.first_restore_gpr != -1)
+    {
+      rtx insn, addr;
+      int i;
+
+      /* Check for global register and save them
+	 to stack location from where they get restored.  */
+
+      for (i = cfun_frame_layout.first_restore_gpr;
+	   i <= cfun_frame_layout.last_restore_gpr;
+	   i++)
+	{
+	  if (global_not_special_regno_p (i))
+	    {
+	      addr = plus_constant (Pmode, frame_pointer,
+				    offset + cfun_frame_layout.gprs_offset
+				    + (i - cfun_frame_layout.first_save_gpr_slot)
+				    * UNITS_PER_LONG);
+	      addr = gen_rtx_MEM (Pmode, addr);
+	      set_mem_alias_set (addr, get_frame_alias_set ());
+	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
+	    }
+	  else
+	    cfa_restores
+	      = alloc_reg_note (REG_CFA_RESTORE,
+				gen_rtx_REG (Pmode, i), cfa_restores);
+	}
+
+      if (! sibcall)
+	{
+	  /* Fetch return address from stack before load multiple,
+	     this will do good for scheduling.  */
+
+	  if (cfun_frame_layout.save_return_addr_p
+	      || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
+		  && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
+	    {
+	      int return_regnum = find_unused_clobbered_reg();
+	      if (!return_regnum)
+		return_regnum = 4;
+	      return_reg = gen_rtx_REG (Pmode, return_regnum);
+
+	      addr = plus_constant (Pmode, frame_pointer,
+				    offset + cfun_frame_layout.gprs_offset
+				    + (RETURN_REGNUM
+				       - cfun_frame_layout.first_save_gpr_slot)
+				    * UNITS_PER_LONG);
+	      addr = gen_rtx_MEM (Pmode, addr);
+	      set_mem_alias_set (addr, get_frame_alias_set ());
+	      emit_move_insn (return_reg, addr);
+	    }
+	}
+
+      insn = restore_gprs (frame_pointer,
+			   offset + cfun_frame_layout.gprs_offset
+			   + (cfun_frame_layout.first_restore_gpr
+			      - cfun_frame_layout.first_save_gpr_slot)
+			   * UNITS_PER_LONG,
+			   cfun_frame_layout.first_restore_gpr,
+			   cfun_frame_layout.last_restore_gpr);
+      insn = emit_insn (insn);
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (Pmode, stack_pointer_rtx,
+				   STACK_POINTER_OFFSET));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  s390_restore_gprs_from_fprs ();
+
+  if (! sibcall)
+    {
+
+      /* Return to caller.  */
+
+      p = rtvec_alloc (2);
+
+      RTVEC_ELT (p, 0) = ret_rtx;
+      RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+    }
+}
+
+/* Implement TARGET_SET_UP_BY_PROLOGUE.  */
+
+static void
+s300_set_up_by_prologue (hard_reg_set_container *regs)
+{
+  if (cfun->machine->base_reg
+      && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
+    SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
+}
+
+/* Return true if the function can use simple_return to return outside
+   of a shrink-wrapped region.  At present shrink-wrapping is supported
+   in all cases.  */
+
+bool
+s390_can_use_simple_return_insn (void)
+{
+  return true;
+}
+
+/* Return true if the epilogue is guaranteed to contain only a return
+   instruction and if a direct return can therefore be used instead.
+   One of the main advantages of using direct return instructions
+   is that we can then use conditional returns.  */
+
+bool
+s390_can_use_return_insn (void)
+{
+  int i;
+
+  if (!reload_completed)
+    return false;
+
+  if (crtl->profile)
+    return false;
+
+  if (TARGET_TPF_PROFILING)
+    return false;
+
+  for (i = 0; i < 16; i++)
+    if (cfun_gpr_save_slot (i))
+      return false;
+
+  if (cfun->machine->base_reg
+      && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
+    return false;
+
+  return cfun_frame_layout.frame_size == 0;
+}
+
+/* Return the size in bytes of a function argument of
+   type TYPE and/or mode MODE.  At least one of TYPE or
+   MODE must be specified.  */
+
+static int
+s390_function_arg_size (enum machine_mode mode, const_tree type)
+{
+  if (type)
+    return int_size_in_bytes (type);
+
+  /* No type info available for some library calls ...  */
+  if (mode != BLKmode)
+    return GET_MODE_SIZE (mode);
+
+  /* If we have neither type nor mode, abort */
+  gcc_unreachable ();
+}
+
+/* Return true if a function argument of type TYPE and mode MODE
+   is to be passed in a floating-point register, if available.  */
+
+static bool
+s390_function_arg_float (enum machine_mode mode, const_tree type)
+{
+  int size = s390_function_arg_size (mode, type);
+  if (size > 8)
+    return false;
+
+  /* Soft-float changes the ABI: no floating-point registers are used.  */
+  if (TARGET_SOFT_FLOAT)
+    return false;
+
+  /* No type info available for some library calls ...  */
+  if (!type)
+    return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
+
+  /* The ABI says that record types with a single member are treated
+     just like that member would be.  */
+  while (TREE_CODE (type) == RECORD_TYPE)
+    {
+      tree field, single = NULL_TREE;
+
+      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	{
+	  if (TREE_CODE (field) != FIELD_DECL)
+	    continue;
+
+	  if (single == NULL_TREE)
+	    single = TREE_TYPE (field);
+	  else
+	    return false;
+	}
+
+      if (single == NULL_TREE)
+	return false;
+      else
+	type = single;
+    }
+
+  return TREE_CODE (type) == REAL_TYPE;
+}
+
+/* Return true if a function argument of type TYPE and mode MODE
+   is to be passed in an integer register, or a pair of integer
+   registers, if available.  */
+
+static bool
+s390_function_arg_integer (enum machine_mode mode, const_tree type)
+{
+  int size = s390_function_arg_size (mode, type);
+  if (size > 8)
+    return false;
+
+  /* No type info available for some library calls ...  */
+  if (!type)
+    return GET_MODE_CLASS (mode) == MODE_INT
+	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
+
+  /* We accept small integral (and similar) types.  */
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == NULLPTR_TYPE
+      || TREE_CODE (type) == OFFSET_TYPE
+      || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
+    return true;
+
+  /* We also accept structs of size 1, 2, 4, 8 that are not
+     passed in floating-point registers.  */
+  if (AGGREGATE_TYPE_P (type)
+      && exact_log2 (size) >= 0
+      && !s390_function_arg_float (mode, type))
+    return true;
+
+  return false;
+}
+
+/* Return 1 if a function argument of type TYPE and mode MODE
+   is to be passed by reference.  The ABI specifies that only
+   structures of size 1, 2, 4, or 8 bytes are passed by value,
+   all other structures (and complex numbers) are passed by
+   reference.  */
+
+static bool
+s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  int size = s390_function_arg_size (mode, type);
+  if (size > 8)
+    return true;
+
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
+        return 1;
+
+      if (TREE_CODE (type) == COMPLEX_TYPE
+	  || TREE_CODE (type) == VECTOR_TYPE)
+        return 1;
+    }
+
+  return 0;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.).  The boolean NAMED specifies whether the
+   argument is a named argument (as opposed to an unnamed argument
+   matching an ellipsis).  */
+
+static void
+s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (s390_function_arg_float (mode, type))
+    {
+      cum->fprs += 1;
+    }
+  else if (s390_function_arg_integer (mode, type))
+    {
+      int size = s390_function_arg_size (mode, type);
+      cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On S/390, we use general purpose registers 2 through 6 to
+   pass integer, pointer, and certain structure arguments, and
+   floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
+   to pass floating point arguments.  All remaining arguments
+   are pushed to the stack.  */
+
+static rtx
+s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (s390_function_arg_float (mode, type))
+    {
+      if (cum->fprs + 1 > FP_ARG_NUM_REG)
+	return 0;
+      else
+	return gen_rtx_REG (mode, cum->fprs + 16);
+    }
+  else if (s390_function_arg_integer (mode, type))
+    {
+      int size = s390_function_arg_size (mode, type);
+      int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
+
+      if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
+	return 0;
+      else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
+	return gen_rtx_REG (mode, cum->gprs + 2);
+      else if (n_gprs == 2)
+	{
+	  rtvec p = rtvec_alloc (2);
+
+	  RTVEC_ELT (p, 0)
+	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
+					 const0_rtx);
+	  RTVEC_ELT (p, 1)
+	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
+					 GEN_INT (4));
+
+	  return gen_rtx_PARALLEL (mode, p);
+	}
+    }
+
+  /* After the real arguments, expand_call calls us once again
+     with a void_type_node type.  Whatever we return here is
+     passed as operand 2 to the call expanders.
+
+     We don't need this feature ...  */
+  else if (type == void_type_node)
+    return const0_rtx;
+
+  gcc_unreachable ();
+}
+
+/* Return true if return values of type TYPE should be returned
+   in a memory buffer whose address is passed by the caller as
+   hidden first argument.  */
+
+static bool
+s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
+{
+  /* We accept small integral (and similar) types.  */
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == OFFSET_TYPE
+      || TREE_CODE (type) == REAL_TYPE)
+    return int_size_in_bytes (type) > 8;
+
+  /* Aggregates and similar constructs are always returned
+     in memory.  */
+  if (AGGREGATE_TYPE_P (type)
+      || TREE_CODE (type) == COMPLEX_TYPE
+      || TREE_CODE (type) == VECTOR_TYPE)
+    return true;
+
+  /* ??? We get called on all sorts of random stuff from
+     aggregate_value_p.  We can't abort, but it's not clear
+     what's safe to return.  Pretend it's a struct I guess.  */
+  return true;
+}
+
+/* Function arguments and return values are promoted to word size.  */
+
+static enum machine_mode
+s390_promote_function_mode (const_tree type, enum machine_mode mode,
+                            int *punsignedp,
+                            const_tree fntype ATTRIBUTE_UNUSED,
+                            int for_return ATTRIBUTE_UNUSED)
+{
+  if (INTEGRAL_MODE_P (mode)
+      && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
+    {
+      if (type != NULL_TREE && POINTER_TYPE_P (type))
+	*punsignedp = POINTERS_EXTEND_UNSIGNED;
+      return Pmode;
+    }
+
+  return mode;
+}
+
+/* Define where to return a (scalar) value of type RET_TYPE.
+   If RET_TYPE is null, define where to return a (scalar)
+   value of mode MODE from a libcall.  */
+
+static rtx
+s390_function_and_libcall_value (enum machine_mode mode,
+				 const_tree ret_type,
+				 const_tree fntype_or_decl,
+				 bool outgoing ATTRIBUTE_UNUSED)
+{
+  /* For normal functions perform the promotion as
+     promote_function_mode would do.  */
+  if (ret_type)
+    {
+      int unsignedp = TYPE_UNSIGNED (ret_type);
+      mode = promote_function_mode (ret_type, mode, &unsignedp,
+				    fntype_or_decl, 1);
+    }
+
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
+  gcc_assert (GET_MODE_SIZE (mode) <= 8);
+
+  if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
+    return gen_rtx_REG (mode, 16);
+  else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
+	   || UNITS_PER_LONG == UNITS_PER_WORD)
+    return gen_rtx_REG (mode, 2);
+  else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
+    {
+      /* This case is triggered when returning a 64 bit value with
+	 -m31 -mzarch.  Although the value would fit into a single
+	 register it has to be forced into a 32 bit register pair in
+	 order to match the ABI.  */
+      rtvec p = rtvec_alloc (2);
+
+      RTVEC_ELT (p, 0)
+	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
+      RTVEC_ELT (p, 1)
+	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
+
+      return gen_rtx_PARALLEL (mode, p);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Define where to return a scalar return value of type RET_TYPE.  */
+
+static rtx
+s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
+		     bool outgoing)
+{
+  return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
+					  fn_decl_or_type, outgoing);
+}
+
+/* Define where to return a scalar libcall return value of mode
+   MODE.  */
+
+static rtx
+s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return s390_function_and_libcall_value (mode, NULL_TREE,
+					  NULL_TREE, true);
+}
+
+
+/* Create and return the va_list datatype.
+
+   On S/390, va_list is an array type equivalent to
+
+      typedef struct __va_list_tag
+        {
+            long __gpr;
+            long __fpr;
+            void *__overflow_arg_area;
+            void *__reg_save_area;
+        } va_list[1];
+
+   where __gpr and __fpr hold the number of general purpose
+   or floating point arguments used up to now, respectively,
+   __overflow_arg_area points to the stack location of the
+   next argument passed on the stack, and __reg_save_area
+   always points to the start of the register area in the
+   call frame of the current function.  The function prologue
+   saves all registers used for argument passing into this
+   area if the function uses variable arguments.  */
+
+static tree
+s390_build_builtin_va_list (void)
+{
+  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+  record = lang_hooks.types.make_type (RECORD_TYPE);
+
+  type_decl =
+    build_decl (BUILTINS_LOCATION,
+		TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_gpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__gpr"),
+		      long_integer_type_node);
+  f_fpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__fpr"),
+		      long_integer_type_node);
+  f_ovf = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
+		      ptr_type_node);
+  f_sav = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__reg_save_area"),
+		      ptr_type_node);
+
+  va_list_gpr_counter_field = f_gpr;
+  va_list_fpr_counter_field = f_fpr;
+
+  DECL_FIELD_CONTEXT (f_gpr) = record;
+  DECL_FIELD_CONTEXT (f_fpr) = record;
+  DECL_FIELD_CONTEXT (f_ovf) = record;
+  DECL_FIELD_CONTEXT (f_sav) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_gpr;
+  DECL_CHAIN (f_gpr) = f_fpr;
+  DECL_CHAIN (f_fpr) = f_ovf;
+  DECL_CHAIN (f_ovf) = f_sav;
+
+  layout_type (record);
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Implement va_start by filling the va_list structure VALIST.
+   STDARG_P is always true, and ignored.
+   NEXTARG points to the first anonymous stack argument.
+
+   The following global variables are used to initialize
+   the va_list structure:
+
+     crtl->args.info:
+       holds number of gprs and fprs used for named arguments.
+     crtl->args.arg_offset_rtx:
+       holds the offset of the first anonymous stack argument
+       (relative to the virtual arg pointer).  */
+
+static void
+s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT n_gpr, n_fpr;
+  int off;
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_simple_mem_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+  /* Count number of gp and fp argument registers used.  */
+
+  n_gpr = crtl->args.info.gprs;
+  n_fpr = crtl->args.info.fprs;
+
+  if (cfun->va_list_gpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+		  build_int_cst (NULL_TREE, n_gpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  if (cfun->va_list_fpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+	          build_int_cst (NULL_TREE, n_fpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* Find the overflow area.  */
+  if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
+      || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
+    {
+      t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+
+      off = INTVAL (crtl->args.arg_offset_rtx);
+      off = off < 0 ? 0 : off;
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
+		 (int)n_gpr, (int)n_fpr, off);
+
+      t = fold_build_pointer_plus_hwi (t, off);
+
+      t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* Find the register save area.  */
+  if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
+      || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
+    {
+      t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
+      t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
+
+      t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+}
+
+/* Implement va_arg by updating the va_list structure
+   VALIST as required to retrieve an argument of type
+   TYPE, and returning that argument.
+
+   Generates code equivalent to:
+
+   if (integral value) {
+     if (size  <= 4 && args.gpr < 5 ||
+         size  > 4 && args.gpr < 4 )
+       ret = args.reg_save_area[args.gpr+8]
+     else
+       ret = *args.overflow_arg_area++;
+   } else if (float value) {
+     if (args.fgpr < 2)
+       ret = args.reg_save_area[args.fpr+64]
+     else
+       ret = *args.overflow_arg_area++;
+   } else if (aggregate value) {
+     if (args.gpr < 5)
+       ret = *args.reg_save_area[args.gpr]
+     else
+       ret = **args.overflow_arg_area++;
+   } */
+
+static tree
+s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		      gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, reg, t, u;
+  int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
+  tree lab_false, lab_over, addr;
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_va_arg_indirect_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+  /* The tree for args* cannot be shared between gpr/fpr and ovf since
+     both appear on a lhs.  */
+  valist = unshare_expr (valist);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+
+  size = int_size_in_bytes (type);
+
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "va_arg: aggregate type");
+	  debug_tree (type);
+	}
+
+      /* Aggregates are passed by reference.  */
+      indirect_p = 1;
+      reg = gpr;
+      n_reg = 1;
+
+      /* kernel stack layout on 31 bit: It is assumed here that no padding
+	 will be added by s390_frame_info because for va_args always an even
+	 number of gprs has to be saved r15-r2 = 14 regs.  */
+      sav_ofs = 2 * UNITS_PER_LONG;
+      sav_scale = UNITS_PER_LONG;
+      size = UNITS_PER_LONG;
+      max_reg = GP_ARG_NUM_REG - n_reg;
+    }
+  else if (s390_function_arg_float (TYPE_MODE (type), type))
+    {
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "va_arg: float type");
+	  debug_tree (type);
+	}
+
+      /* FP args go in FP registers, if present.  */
+      indirect_p = 0;
+      reg = fpr;
+      n_reg = 1;
+      sav_ofs = 16 * UNITS_PER_LONG;
+      sav_scale = 8;
+      max_reg = FP_ARG_NUM_REG - n_reg;
+    }
+  else
+    {
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "va_arg: other type");
+	  debug_tree (type);
+	}
+
+      /* Otherwise into GP registers.  */
+      indirect_p = 0;
+      reg = gpr;
+      n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
+
+      /* kernel stack layout on 31 bit: It is assumed here that no padding
+	 will be added by s390_frame_info because for va_args always an even
+	 number of gprs has to be saved r15-r2 = 14 regs.  */
+      sav_ofs = 2 * UNITS_PER_LONG;
+
+      if (size < UNITS_PER_LONG)
+	sav_ofs += UNITS_PER_LONG - size;
+
+      sav_scale = UNITS_PER_LONG;
+      max_reg = GP_ARG_NUM_REG - n_reg;
+    }
+
+  /* Pull the value out of the saved registers ...  */
+
+  lab_false = create_artificial_label (UNKNOWN_LOCATION);
+  lab_over = create_artificial_label (UNKNOWN_LOCATION);
+  addr = create_tmp_var (ptr_type_node, "addr");
+
+  t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
+  t = build2 (GT_EXPR, boolean_type_node, reg, t);
+  u = build1 (GOTO_EXPR, void_type_node, lab_false);
+  t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+  gimplify_and_add (t, pre_p);
+
+  t = fold_build_pointer_plus_hwi (sav, sav_ofs);
+  u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
+	      fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
+  t = fold_build_pointer_plus (t, u);
+
+  gimplify_assign (addr, t, pre_p);
+
+  gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+  gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+
+
+  /* ... Otherwise out of the overflow area.  */
+
+  t = ovf;
+  if (size < UNITS_PER_LONG)
+    t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
+
+  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+  gimplify_assign (addr, t, pre_p);
+
+  t = fold_build_pointer_plus_hwi (t, size);
+  gimplify_assign (ovf, t, pre_p);
+
+  gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+
+
+  /* Increment register save count.  */
+
+  u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
+	      fold_convert (TREE_TYPE (reg), size_int (n_reg)));
+  gimplify_and_add (u, pre_p);
+
+  if (indirect_p)
+    {
+      t = build_pointer_type_for_mode (build_pointer_type (type),
+				       ptr_mode, true);
+      addr = fold_convert (t, addr);
+      addr = build_va_arg_indirect_ref (addr);
+    }
+  else
+    {
+      t = build_pointer_type_for_mode (type, ptr_mode, true);
+      addr = fold_convert (t, addr);
+    }
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
+   expanders.
+   DEST  - Register location where CC will be stored.
+   TDB   - Pointer to a 256 byte area where to store the transaction.
+           diagnostic block. NULL if TDB is not needed.
+   RETRY - Retry count value.  If non-NULL a retry loop for CC2
+           is emitted
+   CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
+                    of the tbegin instruction pattern.  */
+
+void
+s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
+{
+  rtx retry_plus_two = gen_reg_rtx (SImode);
+  rtx retry_reg = gen_reg_rtx (SImode);
+  rtx retry_label = NULL_RTX;
+
+  if (retry != NULL_RTX)
+    {
+      emit_move_insn (retry_reg, retry);
+      emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
+      emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
+      retry_label = gen_label_rtx ();
+      emit_label (retry_label);
+    }
+
+  if (clobber_fprs_p)
+    emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
+  else
+    emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
+				     tdb));
+
+  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
+					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
+								   CC_REGNUM)),
+					UNSPEC_CC_TO_INT));
+  if (retry != NULL_RTX)
+    {
+      const int CC0 = 1 << 3;
+      const int CC1 = 1 << 2;
+      const int CC3 = 1 << 0;
+      rtx jump;
+      rtx count = gen_reg_rtx (SImode);
+      rtx leave_label = gen_label_rtx ();
+
+      /* Exit for success and permanent failures.  */
+      jump = s390_emit_jump (leave_label,
+			     gen_rtx_EQ (VOIDmode,
+			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
+			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
+      LABEL_NUSES (leave_label) = 1;
+
+      /* CC2 - transient failure. Perform retry with ppa.  */
+      emit_move_insn (count, retry_plus_two);
+      emit_insn (gen_subsi3 (count, count, retry_reg));
+      emit_insn (gen_tx_assist (count));
+      jump = emit_jump_insn (gen_doloop_si64 (retry_label,
+					      retry_reg,
+					      retry_reg));
+      JUMP_LABEL (jump) = retry_label;
+      LABEL_NUSES (retry_label) = 1;
+      emit_label (leave_label);
+    }
+}
+
+/* Builtins.  */
+
+enum s390_builtin
+{
+  S390_BUILTIN_TBEGIN,
+  S390_BUILTIN_TBEGIN_NOFLOAT,
+  S390_BUILTIN_TBEGIN_RETRY,
+  S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+  S390_BUILTIN_TBEGINC,
+  S390_BUILTIN_TEND,
+  S390_BUILTIN_TABORT,
+  S390_BUILTIN_NON_TX_STORE,
+  S390_BUILTIN_TX_NESTING_DEPTH,
+  S390_BUILTIN_TX_ASSIST,
+
+  S390_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
+  CODE_FOR_tbegin,
+  CODE_FOR_tbegin_nofloat,
+  CODE_FOR_tbegin_retry,
+  CODE_FOR_tbegin_retry_nofloat,
+  CODE_FOR_tbeginc,
+  CODE_FOR_tend,
+  CODE_FOR_tabort,
+  CODE_FOR_ntstg,
+  CODE_FOR_etnd,
+  CODE_FOR_tx_assist
+};
+
+static void
+s390_init_builtins (void)
+{
+  tree ftype, uint64_type;
+  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
+				       NULL, NULL);
+  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
+
+  /* void foo (void) */
+  ftype = build_function_type_list (void_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
+			BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* void foo (int) */
+  ftype = build_function_type_list (void_type_node, integer_type_node,
+				    NULL_TREE);
+  add_builtin_function ("__builtin_tabort", ftype,
+			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
+  add_builtin_function ("__builtin_tx_assist", ftype,
+			S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* int foo (void *) */
+  ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
+			BUILT_IN_MD, NULL, returns_twice_attr);
+  add_builtin_function ("__builtin_tbegin_nofloat", ftype,
+			S390_BUILTIN_TBEGIN_NOFLOAT,
+			BUILT_IN_MD, NULL, returns_twice_attr);
+
+  /* int foo (void *, int) */
+  ftype = build_function_type_list (integer_type_node, ptr_type_node,
+				    integer_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tbegin_retry", ftype,
+			S390_BUILTIN_TBEGIN_RETRY,
+			BUILT_IN_MD,
+			NULL, returns_twice_attr);
+  add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
+			S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+			BUILT_IN_MD,
+			NULL, returns_twice_attr);
+
+  /* int foo (void) */
+  ftype = build_function_type_list (integer_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tx_nesting_depth", ftype,
+			S390_BUILTIN_TX_NESTING_DEPTH,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_tend", ftype,
+			S390_BUILTIN_TEND, BUILT_IN_MD,	NULL, NULL_TREE);
+
+  /* void foo (uint64_t *, uint64_t) */
+  if (TARGET_64BIT)
+    uint64_type = long_unsigned_type_node;
+  else
+    uint64_type = long_long_unsigned_type_node;
+
+   ftype = build_function_type_list (void_type_node,
+ 				    build_pointer_type (uint64_type),
+				    uint64_type, NULL_TREE);
+  add_builtin_function ("__builtin_non_tx_store", ftype,
+			S390_BUILTIN_NON_TX_STORE,
+			BUILT_IN_MD, NULL, NULL_TREE);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+  tree arg;
+  call_expr_arg_iterator iter;
+
+  if (fcode >= S390_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  if (!TARGET_HTM)
+    error ("Transactional execution builtins not enabled (-mhtm)\n");
+
+  /* Set a flag in the machine specific cfun part in order to support
+     saving/restoring of FPRs.  */
+  if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
+    cfun->machine->tbegin_p = true;
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  arity = 0;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity >= MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	{
+	  if (insn_op->predicate == memory_operand)
+	    {
+	      /* Don't move a NULL pointer into a register. Otherwise
+		 we have to rely on combine being able to move it back
+		 in order to get an immediate 0 in the instruction.  */
+	      if (op[arity] != const0_rtx)
+		op[arity] = copy_to_mode_reg (Pmode, op[arity]);
+	      op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
+	    }
+	  else
+	    op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+	}
+
+      arity++;
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+        pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      if (nonvoid)
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
+/* Output assembly code for the trampoline template to
+   stdio stream FILE.
+
+   On S/390, we use gpr 1 internally in the trampoline code;
+   gpr 0 is used to hold the static chain.  */
+
+static void
+s390_asm_trampoline_template (FILE *file)
+{
+  rtx op[2];
+  op[0] = gen_rtx_REG (Pmode, 0);
+  op[1] = gen_rtx_REG (Pmode, 1);
+
+  if (TARGET_64BIT)
+    {
+      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
+      output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
+      output_asm_insn ("br\t%1", op);             /* 2 byte */
+      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
+    }
+  else
+    {
+      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
+      output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
+      output_asm_insn ("br\t%1", op);             /* 2 byte */
+      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
+    }
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+static void
+s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
+  emit_move_insn (mem, cxt);
+  mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+void
+s390_function_profiler (FILE *file, int labelno)
+{
+  rtx op[7];
+
+  char label[128];
+  ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
+
+  fprintf (file, "# function profiler \n");
+
+  op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
+
+  op[2] = gen_rtx_REG (Pmode, 1);
+  op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
+  SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
+
+  op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
+  if (flag_pic)
+    {
+      op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
+      op[4] = gen_rtx_CONST (Pmode, op[4]);
+    }
+
+  if (TARGET_64BIT)
+    {
+      output_asm_insn ("stg\t%0,%1", op);
+      output_asm_insn ("larl\t%2,%3", op);
+      output_asm_insn ("brasl\t%0,%4", op);
+      output_asm_insn ("lg\t%0,%1", op);
+    }
+  else if (!flag_pic)
+    {
+      op[6] = gen_label_rtx ();
+
+      output_asm_insn ("st\t%0,%1", op);
+      output_asm_insn ("bras\t%2,%l6", op);
+      output_asm_insn (".long\t%4", op);
+      output_asm_insn (".long\t%3", op);
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
+      output_asm_insn ("l\t%0,0(%2)", op);
+      output_asm_insn ("l\t%2,4(%2)", op);
+      output_asm_insn ("basr\t%0,%0", op);
+      output_asm_insn ("l\t%0,%1", op);
+    }
+  else
+    {
+      op[5] = gen_label_rtx ();
+      op[6] = gen_label_rtx ();
+
+      output_asm_insn ("st\t%0,%1", op);
+      output_asm_insn ("bras\t%2,%l6", op);
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
+      output_asm_insn (".long\t%4-%l5", op);
+      output_asm_insn (".long\t%3-%l5", op);
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
+      output_asm_insn ("lr\t%0,%2", op);
+      output_asm_insn ("a\t%0,0(%2)", op);
+      output_asm_insn ("a\t%2,4(%2)", op);
+      output_asm_insn ("basr\t%0,%0", op);
+      output_asm_insn ("l\t%0,%1", op);
+    }
+}
+
+/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
+   into its SYMBOL_REF_FLAGS.  */
+
+static void
+s390_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == VAR_DECL)
+    {
+      /* If a variable has a forced alignment to < 2 bytes, mark it
+	 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
+	 operand.  */
+      if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
+	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
+      if (!DECL_SIZE (decl)
+	  || !DECL_ALIGN (decl)
+	  || !tree_fits_shwi_p (DECL_SIZE (decl))
+	  || (DECL_ALIGN (decl) <= 64
+	      && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
+	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
+    }
+
+  /* Literal pool references don't have a decl so they are handled
+     differently here.  We rely on the information in the MEM_ALIGN
+     entry to decide upon natural alignment.  */
+  if (MEM_P (rtl)
+      && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
+      && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
+      && (MEM_ALIGN (rtl) == 0
+	  || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
+	  || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
+}
+
+/* Output thunk to FILE that implements a C++ virtual function call (with
+   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
+   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
+   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
+   relative to the resulting this pointer.  */
+
+static void
+s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx op[10];
+  int nonlocal = 0;
+
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), file, 1);
+
+  /* Operand 0 is the target function.  */
+  op[0] = XEXP (DECL_RTL (function), 0);
+  if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
+    {
+      nonlocal = 1;
+      op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
+			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
+      op[0] = gen_rtx_CONST (Pmode, op[0]);
+    }
+
+  /* Operand 1 is the 'this' pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    op[1] = gen_rtx_REG (Pmode, 3);
+  else
+    op[1] = gen_rtx_REG (Pmode, 2);
+
+  /* Operand 2 is the delta.  */
+  op[2] = GEN_INT (delta);
+
+  /* Operand 3 is the vcall_offset.  */
+  op[3] = GEN_INT (vcall_offset);
+
+  /* Operand 4 is the temporary register.  */
+  op[4] = gen_rtx_REG (Pmode, 1);
+
+  /* Operands 5 to 8 can be used as labels.  */
+  op[5] = NULL_RTX;
+  op[6] = NULL_RTX;
+  op[7] = NULL_RTX;
+  op[8] = NULL_RTX;
+
+  /* Operand 9 can be used for temporary register.  */
+  op[9] = NULL_RTX;
+
+  /* Generate code.  */
+  if (TARGET_64BIT)
+    {
+      /* Setup literal pool pointer if required.  */
+      if ((!DISP_IN_RANGE (delta)
+	   && !CONST_OK_FOR_K (delta)
+	   && !CONST_OK_FOR_Os (delta))
+	  || (!DISP_IN_RANGE (vcall_offset)
+	      && !CONST_OK_FOR_K (vcall_offset)
+	      && !CONST_OK_FOR_Os (vcall_offset)))
+	{
+	  op[5] = gen_label_rtx ();
+	  output_asm_insn ("larl\t%4,%5", op);
+	}
+
+      /* Add DELTA to this pointer.  */
+      if (delta)
+	{
+	  if (CONST_OK_FOR_J (delta))
+	    output_asm_insn ("la\t%1,%2(%1)", op);
+	  else if (DISP_IN_RANGE (delta))
+	    output_asm_insn ("lay\t%1,%2(%1)", op);
+	  else if (CONST_OK_FOR_K (delta))
+	    output_asm_insn ("aghi\t%1,%2", op);
+ 	  else if (CONST_OK_FOR_Os (delta))
+ 	    output_asm_insn ("agfi\t%1,%2", op);
+	  else
+	    {
+	      op[6] = gen_label_rtx ();
+	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
+	    }
+	}
+
+      /* Perform vcall adjustment.  */
+      if (vcall_offset)
+	{
+	  if (DISP_IN_RANGE (vcall_offset))
+	    {
+	      output_asm_insn ("lg\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,%3(%4)", op);
+	    }
+	  else if (CONST_OK_FOR_K (vcall_offset))
+	    {
+	      output_asm_insn ("lghi\t%4,%3", op);
+	      output_asm_insn ("ag\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,0(%4)", op);
+	    }
+ 	  else if (CONST_OK_FOR_Os (vcall_offset))
+ 	    {
+ 	      output_asm_insn ("lgfi\t%4,%3", op);
+ 	      output_asm_insn ("ag\t%4,0(%1)", op);
+ 	      output_asm_insn ("ag\t%1,0(%4)", op);
+ 	    }
+	  else
+	    {
+	      op[7] = gen_label_rtx ();
+	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
+	      output_asm_insn ("ag\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,0(%4)", op);
+	    }
+	}
+
+      /* Jump to target.  */
+      output_asm_insn ("jg\t%0", op);
+
+      /* Output literal pool if required.  */
+      if (op[5])
+	{
+	  output_asm_insn (".align\t4", op);
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[5]));
+	}
+      if (op[6])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[6]));
+	  output_asm_insn (".long\t%2", op);
+	}
+      if (op[7])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[7]));
+	  output_asm_insn (".long\t%3", op);
+	}
+    }
+  else
+    {
+      /* Setup base pointer if required.  */
+      if (!vcall_offset
+	  || (!DISP_IN_RANGE (delta)
+              && !CONST_OK_FOR_K (delta)
+	      && !CONST_OK_FOR_Os (delta))
+	  || (!DISP_IN_RANGE (delta)
+              && !CONST_OK_FOR_K (vcall_offset)
+	      && !CONST_OK_FOR_Os (vcall_offset)))
+	{
+	  op[5] = gen_label_rtx ();
+	  output_asm_insn ("basr\t%4,0", op);
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[5]));
+	}
+
+      /* Add DELTA to this pointer.  */
+      if (delta)
+	{
+	  if (CONST_OK_FOR_J (delta))
+	    output_asm_insn ("la\t%1,%2(%1)", op);
+	  else if (DISP_IN_RANGE (delta))
+	    output_asm_insn ("lay\t%1,%2(%1)", op);
+	  else if (CONST_OK_FOR_K (delta))
+	    output_asm_insn ("ahi\t%1,%2", op);
+	  else if (CONST_OK_FOR_Os (delta))
+ 	    output_asm_insn ("afi\t%1,%2", op);
+	  else
+	    {
+	      op[6] = gen_label_rtx ();
+	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
+	    }
+	}
+
+      /* Perform vcall adjustment.  */
+      if (vcall_offset)
+        {
+	  if (CONST_OK_FOR_J (vcall_offset))
+	    {
+	      output_asm_insn ("l\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,%3(%4)", op);
+	    }
+	  else if (DISP_IN_RANGE (vcall_offset))
+	    {
+	      output_asm_insn ("l\t%4,0(%1)", op);
+	      output_asm_insn ("ay\t%1,%3(%4)", op);
+	    }
+	  else if (CONST_OK_FOR_K (vcall_offset))
+	    {
+	      output_asm_insn ("lhi\t%4,%3", op);
+	      output_asm_insn ("a\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,0(%4)", op);
+	    }
+	  else if (CONST_OK_FOR_Os (vcall_offset))
+ 	    {
+ 	      output_asm_insn ("iilf\t%4,%3", op);
+ 	      output_asm_insn ("a\t%4,0(%1)", op);
+ 	      output_asm_insn ("a\t%1,0(%4)", op);
+ 	    }
+	  else
+	    {
+	      op[7] = gen_label_rtx ();
+	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
+	      output_asm_insn ("a\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,0(%4)", op);
+	    }
+
+	  /* We had to clobber the base pointer register.
+	     Re-setup the base pointer (with a different base).  */
+	  op[5] = gen_label_rtx ();
+	  output_asm_insn ("basr\t%4,0", op);
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[5]));
+	}
+
+      /* Jump to target.  */
+      op[8] = gen_label_rtx ();
+
+      if (!flag_pic)
+	output_asm_insn ("l\t%4,%8-%5(%4)", op);
+      else if (!nonlocal)
+	output_asm_insn ("a\t%4,%8-%5(%4)", op);
+      /* We cannot call through .plt, since .plt requires %r12 loaded.  */
+      else if (flag_pic == 1)
+	{
+	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
+	  output_asm_insn ("l\t%4,%0(%4)", op);
+	}
+      else if (flag_pic == 2)
+	{
+	  op[9] = gen_rtx_REG (Pmode, 0);
+	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
+	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
+	  output_asm_insn ("ar\t%4,%9", op);
+	  output_asm_insn ("l\t%4,0(%4)", op);
+	}
+
+      output_asm_insn ("br\t%4", op);
+
+      /* Output literal pool.  */
+      output_asm_insn (".align\t4", op);
+
+      if (nonlocal && flag_pic == 2)
+	output_asm_insn (".long\t%0", op);
+      if (nonlocal)
+	{
+	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
+	}
+
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
+      if (!flag_pic)
+	output_asm_insn (".long\t%0", op);
+      else
+	output_asm_insn (".long\t%0-%5", op);
+
+      if (op[6])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[6]));
+	  output_asm_insn (".long\t%2", op);
+	}
+      if (op[7])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[7]));
+	  output_asm_insn (".long\t%3", op);
+	}
+    }
+  final_end_function ();
+}
+
+static bool
+s390_valid_pointer_mode (enum machine_mode mode)
+{
+  return (mode == SImode || (TARGET_64BIT && mode == DImode));
+}
+
+/* Checks whether the given CALL_EXPR would use a caller
+   saved register.  This is used to decide whether sibling call
+   optimization could be performed on the respective function
+   call.  */
+
+static bool
+s390_call_saved_register_used (tree call_expr)
+{
+  CUMULATIVE_ARGS cum_v;
+  cumulative_args_t cum;
+  tree parameter;
+  enum machine_mode mode;
+  tree type;
+  rtx parm_rtx;
+  int reg, i;
+
+  INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
+  cum = pack_cumulative_args (&cum_v);
+
+  for (i = 0; i < call_expr_nargs (call_expr); i++)
+    {
+      parameter = CALL_EXPR_ARG (call_expr, i);
+      gcc_assert (parameter);
+
+      /* For an undeclared variable passed as parameter we will get
+	 an ERROR_MARK node here.  */
+      if (TREE_CODE (parameter) == ERROR_MARK)
+	return true;
+
+      type = TREE_TYPE (parameter);
+      gcc_assert (type);
+
+      mode = TYPE_MODE (type);
+      gcc_assert (mode);
+
+      if (pass_by_reference (&cum_v, mode, type, true))
+ 	{
+ 	  mode = Pmode;
+ 	  type = build_pointer_type (type);
+ 	}
+
+       parm_rtx = s390_function_arg (cum, mode, type, 0);
+
+       s390_function_arg_advance (cum, mode, type, 0);
+
+       if (!parm_rtx)
+	 continue;
+
+       if (REG_P (parm_rtx))
+  	 {
+	   for (reg = 0;
+		reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
+		reg++)
+	     if (!call_used_regs[reg + REGNO (parm_rtx)])
+ 	       return true;
+	 }
+
+       if (GET_CODE (parm_rtx) == PARALLEL)
+	 {
+	   int i;
+
+	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
+	     {
+	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
+
+	       gcc_assert (REG_P (r));
+
+	       for (reg = 0;
+		    reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
+		    reg++)
+		 if (!call_used_regs[reg + REGNO (r)])
+		   return true;
+	     }
+	 }
+
+    }
+  return false;
+}
+
+/* Return true if the given call expression can be
+   turned into a sibling call.
+   DECL holds the declaration of the function to be called whereas
+   EXP is the call expression itself.  */
+
+static bool
+s390_function_ok_for_sibcall (tree decl, tree exp)
+{
+  /* The TPF epilogue uses register 1.  */
+  if (TARGET_TPF_PROFILING)
+    return false;
+
+  /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
+     which would have to be restored before the sibcall.  */
+  if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
+    return false;
+
+  /* Register 6 on s390 is available as an argument register but unfortunately
+     "caller saved". This makes functions needing this register for arguments
+     not suitable for sibcalls.  */
+  return !s390_call_saved_register_used (exp);
+}
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CC_REGNUM;
+  *p2 = INVALID_REGNUM;
+
+  return true;
+}
+
+/* This function is used by the call expanders of the machine description.
+   It emits the call insn itself together with the necessary operations
+   to adjust the target address and returns the emitted insn.
+   ADDR_LOCATION is the target address rtx
+   TLS_CALL the location of the thread-local symbol
+   RESULT_REG the register where the result of the call should be stored
+   RETADDR_REG the register where the return address should be stored
+               If this parameter is NULL_RTX the call is considered
+               to be a sibling call.  */
+
+rtx
+s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
+		rtx retaddr_reg)
+{
+  bool plt_call = false;
+  rtx insn;
+  rtx call;
+  rtx clobber;
+  rtvec vec;
+
+  /* Direct function calls need special treatment.  */
+  if (GET_CODE (addr_location) == SYMBOL_REF)
+    {
+      /* When calling a global routine in PIC mode, we must
+         replace the symbol itself with the PLT stub.  */
+      if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
+        {
+	  if (retaddr_reg != NULL_RTX)
+	    {
+	      addr_location = gen_rtx_UNSPEC (Pmode,
+					      gen_rtvec (1, addr_location),
+					      UNSPEC_PLT);
+	      addr_location = gen_rtx_CONST (Pmode, addr_location);
+	      plt_call = true;
+	    }
+	  else
+	    /* For -fpic code the PLT entries might use r12 which is
+	       call-saved.  Therefore we cannot do a sibcall when
+	       calling directly using a symbol ref.  When reaching
+	       this point we decided (in s390_function_ok_for_sibcall)
+	       to do a sibcall for a function pointer but one of the
+	       optimizers was able to get rid of the function pointer
+	       by propagating the symbol ref into the call.  This
+	       optimization is illegal for S/390 so we turn the direct
+	       call into a indirect call again.  */
+	    addr_location = force_reg (Pmode, addr_location);
+        }
+
+      /* Unless we can use the bras(l) insn, force the
+         routine address into a register.  */
+      if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
+        {
+	  if (flag_pic)
+	    addr_location = legitimize_pic_address (addr_location, 0);
+	  else
+	    addr_location = force_reg (Pmode, addr_location);
+	}
+    }
+
+  /* If it is already an indirect call or the code above moved the
+     SYMBOL_REF to somewhere else make sure the address can be found in
+     register 1.  */
+  if (retaddr_reg == NULL_RTX
+      && GET_CODE (addr_location) != SYMBOL_REF
+      && !plt_call)
+    {
+      emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
+      addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
+    }
+
+  addr_location = gen_rtx_MEM (QImode, addr_location);
+  call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
+
+  if (result_reg != NULL_RTX)
+    call = gen_rtx_SET (VOIDmode, result_reg, call);
+
+  if (retaddr_reg != NULL_RTX)
+    {
+      clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
+
+      if (tls_call != NULL_RTX)
+	vec = gen_rtvec (3, call, clobber,
+			 gen_rtx_USE (VOIDmode, tls_call));
+      else
+	vec = gen_rtvec (2, call, clobber);
+
+      call = gen_rtx_PARALLEL (VOIDmode, vec);
+    }
+
+  insn = emit_call_insn (call);
+
+  /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
+  if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
+    {
+      /* s390_function_ok_for_sibcall should
+	 have denied sibcalls in this case.  */
+      gcc_assert (retaddr_reg != NULL_RTX);
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
+    }
+  return insn;
+}
+
+/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+s390_conditional_register_usage (void)
+{
+  int i;
+
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  if (TARGET_CPU_ZARCH)
+    {
+      fixed_regs[BASE_REGNUM] = 0;
+      call_used_regs[BASE_REGNUM] = 0;
+      fixed_regs[RETURN_REGNUM] = 0;
+      call_used_regs[RETURN_REGNUM] = 0;
+    }
+  if (TARGET_64BIT)
+    {
+      for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
+	call_used_regs[i] = call_really_used_regs[i] = 0;
+    }
+  else
+    {
+      call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
+      call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
+    }
+
+  if (TARGET_SOFT_FLOAT)
+    {
+      for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
+	call_used_regs[i] = fixed_regs[i] = 1;
+    }
+}
+
+/* Corresponding function to eh_return expander.  */
+
+static GTY(()) rtx s390_tpf_eh_return_symbol;
+void
+s390_emit_tpf_eh_return (rtx target)
+{
+  rtx insn, reg;
+
+  if (!s390_tpf_eh_return_symbol)
+    s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
+
+  reg = gen_rtx_REG (Pmode, 2);
+
+  emit_move_insn (reg, target);
+  insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
+                                     gen_rtx_REG (Pmode, RETURN_REGNUM));
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
+
+  emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
+}
+
+/* Rework the prologue/epilogue to avoid saving/restoring
+   registers unnecessarily.  */
+
+static void
+s390_optimize_prologue (void)
+{
+  rtx insn, new_insn, next_insn;
+
+  /* Do a final recompute of the frame-related data.  */
+  s390_optimize_register_info ();
+
+  /* If all special registers are in fact used, there's nothing we
+     can do, so no point in walking the insn list.  */
+
+  if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
+      && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
+      && (TARGET_CPU_ZARCH
+          || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
+              && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
+    return;
+
+  /* Search for prologue/epilogue insns and replace them.  */
+
+  for (insn = get_insns (); insn; insn = next_insn)
+    {
+      int first, last, off;
+      rtx set, base, offset;
+      rtx pat;
+
+      next_insn = NEXT_INSN (insn);
+
+      if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
+	continue;
+
+      pat = PATTERN (insn);
+
+      /* Remove ldgr/lgdr instructions used for saving and restore
+	 GPRs if possible.  */
+      if (TARGET_Z10
+	  && GET_CODE (pat) == SET
+	  && GET_MODE (SET_SRC (pat)) == DImode
+	  && REG_P (SET_SRC (pat))
+	  && REG_P (SET_DEST (pat)))
+	{
+	  int src_regno = REGNO (SET_SRC (pat));
+	  int dest_regno = REGNO (SET_DEST (pat));
+	  int gpr_regno;
+	  int fpr_regno;
+
+	  if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
+		|| (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
+	    continue;
+
+	  gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
+	  fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
+
+	  /* GPR must be call-saved, FPR must be call-clobbered.  */
+	  if (!call_really_used_regs[fpr_regno]
+	      || call_really_used_regs[gpr_regno])
+	    continue;
+
+	  /* It must not happen that what we once saved in an FPR now
+	     needs a stack slot.  */
+	  gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
+
+	  if (cfun_gpr_save_slot (gpr_regno) == 0)
+	    {
+	      remove_insn (insn);
+	      continue;
+	    }
+	}
+
+      if (GET_CODE (pat) == PARALLEL
+	  && store_multiple_operation (pat, VOIDmode))
+	{
+	  set = XVECEXP (pat, 0, 0);
+	  first = REGNO (SET_SRC (set));
+	  last = first + XVECLEN (pat, 0) - 1;
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+	  if (cfun_frame_layout.first_save_gpr != -1
+	      && (cfun_frame_layout.first_save_gpr < first
+		  || cfun_frame_layout.last_save_gpr > last))
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+	  if (first > BASE_REGNUM || last < BASE_REGNUM)
+	    continue;
+
+	  if (cfun_frame_layout.first_save_gpr != -1)
+	    {
+	      new_insn 	= save_gprs (base,
+				     off + (cfun_frame_layout.first_save_gpr
+					    - first) * UNITS_PER_LONG,
+				     cfun_frame_layout.first_save_gpr,
+				     cfun_frame_layout.last_save_gpr);
+	      new_insn = emit_insn_before (new_insn, insn);
+	      INSN_ADDRESSES_NEW (new_insn, -1);
+	    }
+
+	  remove_insn (insn);
+	  continue;
+	}
+
+      if (cfun_frame_layout.first_save_gpr == -1
+	  && GET_CODE (pat) == SET
+	  && GENERAL_REG_P (SET_SRC (pat))
+	  && GET_CODE (SET_DEST (pat)) == MEM)
+	{
+	  set = pat;
+	  first = REGNO (SET_SRC (set));
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+
+	  remove_insn (insn);
+	  continue;
+	}
+
+      if (GET_CODE (pat) == PARALLEL
+	  && load_multiple_operation (pat, VOIDmode))
+	{
+	  set = XVECEXP (pat, 0, 0);
+	  first = REGNO (SET_DEST (set));
+	  last = first + XVECLEN (pat, 0) - 1;
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+
+	  if (cfun_frame_layout.first_restore_gpr != -1
+	      && (cfun_frame_layout.first_restore_gpr < first
+		  || cfun_frame_layout.last_restore_gpr > last))
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+	  if (first > BASE_REGNUM || last < BASE_REGNUM)
+	    continue;
+
+	  if (cfun_frame_layout.first_restore_gpr != -1)
+	    {
+	      new_insn = restore_gprs (base,
+				       off + (cfun_frame_layout.first_restore_gpr
+					      - first) * UNITS_PER_LONG,
+				       cfun_frame_layout.first_restore_gpr,
+				       cfun_frame_layout.last_restore_gpr);
+
+	      /* Remove REG_CFA_RESTOREs for registers that we no
+		 longer need to save.  */
+	      REG_NOTES (new_insn) = REG_NOTES (insn);
+	      for (rtx *ptr = &REG_NOTES (new_insn); *ptr; )
+		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
+		    && ((int) REGNO (XEXP (*ptr, 0))
+			< cfun_frame_layout.first_restore_gpr))
+		  *ptr = XEXP (*ptr, 1);
+		else
+		  ptr = &XEXP (*ptr, 1);
+	      new_insn = emit_insn_before (new_insn, insn);
+	      RTX_FRAME_RELATED_P (new_insn) = 1;
+	      INSN_ADDRESSES_NEW (new_insn, -1);
+	    }
+
+	  remove_insn (insn);
+	  continue;
+	}
+
+      if (cfun_frame_layout.first_restore_gpr == -1
+	  && GET_CODE (pat) == SET
+	  && GENERAL_REG_P (SET_DEST (pat))
+	  && GET_CODE (SET_SRC (pat)) == MEM)
+	{
+	  set = pat;
+	  first = REGNO (SET_DEST (set));
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+
+	  remove_insn (insn);
+	  continue;
+	}
+    }
+}
+
+/* On z10 and later the dynamic branch prediction must see the
+   backward jump within a certain windows.  If not it falls back to
+   the static prediction.  This function rearranges the loop backward
+   branch in a way which makes the static prediction always correct.
+   The function returns true if it added an instruction.  */
+static bool
+s390_fix_long_loop_prediction (rtx insn)
+{
+  rtx set = single_set (insn);
+  rtx code_label, label_ref, new_label;
+  rtx uncond_jump;
+  rtx cur_insn;
+  rtx tmp;
+  int distance;
+
+  /* This will exclude branch on count and branch on index patterns
+     since these are correctly statically predicted.  */
+  if (!set
+      || SET_DEST (set) != pc_rtx
+      || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
+    return false;
+
+  /* Skip conditional returns.  */
+  if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
+      && XEXP (SET_SRC (set), 2) == pc_rtx)
+    return false;
+
+  label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
+	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
+
+  gcc_assert (GET_CODE (label_ref) == LABEL_REF);
+
+  code_label = XEXP (label_ref, 0);
+
+  if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
+      || INSN_ADDRESSES (INSN_UID (insn)) == -1
+      || (INSN_ADDRESSES (INSN_UID (insn))
+	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
+    return false;
+
+  for (distance = 0, cur_insn = PREV_INSN (insn);
+       distance < PREDICT_DISTANCE - 6;
+       distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
+    if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
+      return false;
+
+  new_label = gen_label_rtx ();
+  uncond_jump = emit_jump_insn_after (
+		  gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
+		  insn);
+  emit_label_after (new_label, uncond_jump);
+
+  tmp = XEXP (SET_SRC (set), 1);
+  XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
+  XEXP (SET_SRC (set), 2) = tmp;
+  INSN_CODE (insn) = -1;
+
+  XEXP (label_ref, 0) = new_label;
+  JUMP_LABEL (insn) = new_label;
+  JUMP_LABEL (uncond_jump) = code_label;
+
+  return true;
+}
+
+/* Returns 1 if INSN reads the value of REG for purposes not related
+   to addressing of memory, and 0 otherwise.  */
+static int
+s390_non_addr_reg_read_p (rtx reg, rtx insn)
+{
+  return reg_referenced_p (reg, PATTERN (insn))
+    && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
+}
+
+/* Starting from INSN find_cond_jump looks downwards in the insn
+   stream for a single jump insn which is the last user of the
+   condition code set in INSN.  */
+static rtx
+find_cond_jump (rtx insn)
+{
+  for (; insn; insn = NEXT_INSN (insn))
+    {
+      rtx ite, cc;
+
+      if (LABEL_P (insn))
+	break;
+
+      if (!JUMP_P (insn))
+	{
+	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
+	    break;
+	  continue;
+	}
+
+      /* This will be triggered by a return.  */
+      if (GET_CODE (PATTERN (insn)) != SET)
+	break;
+
+      gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
+      ite = SET_SRC (PATTERN (insn));
+
+      if (GET_CODE (ite) != IF_THEN_ELSE)
+	break;
+
+      cc = XEXP (XEXP (ite, 0), 0);
+      if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
+	break;
+
+      if (find_reg_note (insn, REG_DEAD, cc))
+	return insn;
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+/* Swap the condition in COND and the operands in OP0 and OP1 so that
+   the semantics does not change.  If NULL_RTX is passed as COND the
+   function tries to find the conditional jump starting with INSN.  */
+static void
+s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
+{
+  rtx tmp = *op0;
+
+  if (cond == NULL_RTX)
+    {
+      rtx jump = find_cond_jump (NEXT_INSN (insn));
+      jump = jump ? single_set (jump) : NULL_RTX;
+
+      if (jump == NULL_RTX)
+	return;
+
+      cond = XEXP (XEXP (jump, 1), 0);
+    }
+
+  *op0 = *op1;
+  *op1 = tmp;
+  PUT_CODE (cond, swap_condition (GET_CODE (cond)));
+}
+
+/* On z10, instructions of the compare-and-branch family have the
+   property to access the register occurring as second operand with
+   its bits complemented.  If such a compare is grouped with a second
+   instruction that accesses the same register non-complemented, and
+   if that register's value is delivered via a bypass, then the
+   pipeline recycles, thereby causing significant performance decline.
+   This function locates such situations and exchanges the two
+   operands of the compare.  The function return true whenever it
+   added an insn.  */
+static bool
+s390_z10_optimize_cmp (rtx insn)
+{
+  rtx prev_insn, next_insn;
+  bool insn_added_p = false;
+  rtx cond, *op0, *op1;
+
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      /* Handle compare and branch and branch on count
+	 instructions.  */
+      rtx pattern = single_set (insn);
+
+      if (!pattern
+	  || SET_DEST (pattern) != pc_rtx
+	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
+	return false;
+
+      cond = XEXP (SET_SRC (pattern), 0);
+      op0 = &XEXP (cond, 0);
+      op1 = &XEXP (cond, 1);
+    }
+  else if (GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx src, dest;
+
+      /* Handle normal compare instructions.  */
+      src = SET_SRC (PATTERN (insn));
+      dest = SET_DEST (PATTERN (insn));
+
+      if (!REG_P (dest)
+	  || !CC_REGNO_P (REGNO (dest))
+	  || GET_CODE (src) != COMPARE)
+	return false;
+
+      /* s390_swap_cmp will try to find the conditional
+	 jump when passing NULL_RTX as condition.  */
+      cond = NULL_RTX;
+      op0 = &XEXP (src, 0);
+      op1 = &XEXP (src, 1);
+    }
+  else
+    return false;
+
+  if (!REG_P (*op0) || !REG_P (*op1))
+    return false;
+
+  if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
+    return false;
+
+  /* Swap the COMPARE arguments and its mask if there is a
+     conflicting access in the previous insn.  */
+  prev_insn = prev_active_insn (insn);
+  if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+      && reg_referenced_p (*op1, PATTERN (prev_insn)))
+    s390_swap_cmp (cond, op0, op1, insn);
+
+  /* Check if there is a conflict with the next insn. If there
+     was no conflict with the previous insn, then swap the
+     COMPARE arguments and its mask.  If we already swapped
+     the operands, or if swapping them would cause a conflict
+     with the previous insn, issue a NOP after the COMPARE in
+     order to separate the two instuctions.  */
+  next_insn = next_active_insn (insn);
+  if (next_insn != NULL_RTX && INSN_P (next_insn)
+      && s390_non_addr_reg_read_p (*op1, next_insn))
+    {
+      if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+	  && s390_non_addr_reg_read_p (*op0, prev_insn))
+	{
+	  if (REGNO (*op1) == 0)
+	    emit_insn_after (gen_nop1 (), insn);
+	  else
+	    emit_insn_after (gen_nop (), insn);
+	  insn_added_p = true;
+	}
+      else
+	s390_swap_cmp (cond, op0, op1, insn);
+    }
+  return insn_added_p;
+}
+
+/* Perform machine-dependent processing.  */
+
+static void
+s390_reorg (void)
+{
+  bool pool_overflow = false;
+
+  /* Make sure all splits have been performed; splits after
+     machine_dependent_reorg might confuse insn length counts.  */
+  split_all_insns_noflow ();
+
+  /* Install the main literal pool and the associated base
+     register load insns.
+
+     In addition, there are two problematic situations we need
+     to correct:
+
+     - the literal pool might be > 4096 bytes in size, so that
+       some of its elements cannot be directly accessed
+
+     - a branch target might be > 64K away from the branch, so that
+       it is not possible to use a PC-relative instruction.
+
+     To fix those, we split the single literal pool into multiple
+     pool chunks, reloading the pool base register at various
+     points throughout the function to ensure it always points to
+     the pool chunk the following code expects, and / or replace
+     PC-relative branches by absolute branches.
+
+     However, the two problems are interdependent: splitting the
+     literal pool can move a branch further away from its target,
+     causing the 64K limit to overflow, and on the other hand,
+     replacing a PC-relative branch by an absolute branch means
+     we need to put the branch target address into the literal
+     pool, possibly causing it to overflow.
+
+     So, we loop trying to fix up both problems until we manage
+     to satisfy both conditions at the same time.  Note that the
+     loop is guaranteed to terminate as every pass of the loop
+     strictly decreases the total number of PC-relative branches
+     in the function.  (This is not completely true as there
+     might be branch-over-pool insns introduced by chunkify_start.
+     Those never need to be split however.)  */
+
+  for (;;)
+    {
+      struct constant_pool *pool = NULL;
+
+      /* Collect the literal pool.  */
+      if (!pool_overflow)
+	{
+	  pool = s390_mainpool_start ();
+	  if (!pool)
+	    pool_overflow = true;
+	}
+
+      /* If literal pool overflowed, start to chunkify it.  */
+      if (pool_overflow)
+        pool = s390_chunkify_start ();
+
+      /* Split out-of-range branches.  If this has created new
+	 literal pool entries, cancel current chunk list and
+	 recompute it.  zSeries machines have large branch
+	 instructions, so we never need to split a branch.  */
+      if (!TARGET_CPU_ZARCH && s390_split_branches ())
+        {
+          if (pool_overflow)
+            s390_chunkify_cancel (pool);
+	  else
+            s390_mainpool_cancel (pool);
+
+          continue;
+        }
+
+      /* If we made it up to here, both conditions are satisfied.
+	 Finish up literal pool related changes.  */
+      if (pool_overflow)
+	s390_chunkify_finish (pool);
+      else
+	s390_mainpool_finish (pool);
+
+      /* We're done splitting branches.  */
+      cfun->machine->split_branches_pending_p = false;
+      break;
+    }
+
+  /* Generate out-of-pool execute target insns.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      rtx insn, label, target;
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  label = s390_execute_label (insn);
+	  if (!label)
+	    continue;
+
+	  gcc_assert (label != const0_rtx);
+
+	  target = emit_label (XEXP (label, 0));
+	  INSN_ADDRESSES_NEW (target, -1);
+
+	  target = emit_insn (s390_execute_target (insn));
+	  INSN_ADDRESSES_NEW (target, -1);
+	}
+    }
+
+  /* Try to optimize prologue and epilogue further.  */
+  s390_optimize_prologue ();
+
+  /* Walk over the insns and do some >=z10 specific changes.  */
+  if (s390_tune == PROCESSOR_2097_Z10
+      || s390_tune == PROCESSOR_2817_Z196
+      || s390_tune == PROCESSOR_2827_ZEC12)
+    {
+      rtx insn;
+      bool insn_added_p = false;
+
+      /* The insn lengths and addresses have to be up to date for the
+	 following manipulations.  */
+      shorten_branches (get_insns ());
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+	    continue;
+
+	  if (JUMP_P (insn))
+	    insn_added_p |= s390_fix_long_loop_prediction (insn);
+
+	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
+	       || GET_CODE (PATTERN (insn)) == SET)
+	      && s390_tune == PROCESSOR_2097_Z10)
+	    insn_added_p |= s390_z10_optimize_cmp (insn);
+	}
+
+      /* Adjust branches if we added new instructions.  */
+      if (insn_added_p)
+	shorten_branches (get_insns ());
+    }
+}
+
+/* Return true if INSN is a fp load insn writing register REGNO.  */
+static inline bool
+s390_fpload_toreg (rtx insn, unsigned int regno)
+{
+  rtx set;
+  enum attr_type flag = s390_safe_attr_type (insn);
+
+  if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
+    return false;
+
+  set = single_set (insn);
+
+  if (set == NULL_RTX)
+    return false;
+
+  if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
+    return false;
+
+  if (REGNO (SET_DEST (set)) != regno)
+    return false;
+
+  return true;
+}
+
+/* This value describes the distance to be avoided between an
+   aritmetic fp instruction and an fp load writing the same register.
+   Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
+   fine but the exact value has to be avoided. Otherwise the FP
+   pipeline will throw an exception causing a major penalty.  */
+#define Z10_EARLYLOAD_DISTANCE 7
+
+/* Rearrange the ready list in order to avoid the situation described
+   for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
+   moved to the very end of the ready list.  */
+static void
+s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
+{
+  unsigned int regno;
+  int nready = *nready_p;
+  rtx tmp;
+  int i;
+  rtx insn;
+  rtx set;
+  enum attr_type flag;
+  int distance;
+
+  /* Skip DISTANCE - 1 active insns.  */
+  for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
+       distance > 0 && insn != NULL_RTX;
+       distance--, insn = prev_active_insn (insn))
+    if (CALL_P (insn) || JUMP_P (insn))
+      return;
+
+  if (insn == NULL_RTX)
+    return;
+
+  set = single_set (insn);
+
+  if (set == NULL_RTX || !REG_P (SET_DEST (set))
+      || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
+    return;
+
+  flag = s390_safe_attr_type (insn);
+
+  if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
+    return;
+
+  regno = REGNO (SET_DEST (set));
+  i = nready - 1;
+
+  while (!s390_fpload_toreg (ready[i], regno) && i > 0)
+    i--;
+
+  if (!i)
+    return;
+
+  tmp = ready[i];
+  memmove (&ready[1], &ready[0], sizeof (rtx) * i);
+  ready[0] = tmp;
+}
+
+
+/* The s390_sched_state variable tracks the state of the current or
+   the last instruction group.
+
+   0,1,2 number of instructions scheduled in the current group
+   3     the last group is complete - normal insns
+   4     the last group was a cracked/expanded insn */
+
+static int s390_sched_state;
+
+#define S390_OOO_SCHED_STATE_NORMAL  3
+#define S390_OOO_SCHED_STATE_CRACKED 4
+
+#define S390_OOO_SCHED_ATTR_MASK_CRACKED    0x1
+#define S390_OOO_SCHED_ATTR_MASK_EXPANDED   0x2
+#define S390_OOO_SCHED_ATTR_MASK_ENDGROUP   0x4
+#define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
+
+static unsigned int
+s390_get_sched_attrmask (rtx insn)
+{
+  unsigned int mask = 0;
+
+  if (get_attr_ooo_cracked (insn))
+    mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
+  if (get_attr_ooo_expanded (insn))
+    mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
+  if (get_attr_ooo_endgroup (insn))
+    mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
+  if (get_attr_ooo_groupalone (insn))
+    mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
+  return mask;
+}
+
+/* Return the scheduling score for INSN.  The higher the score the
+   better.  The score is calculated from the OOO scheduling attributes
+   of INSN and the scheduling state s390_sched_state.  */
+static int
+s390_sched_score (rtx insn)
+{
+  unsigned int mask = s390_get_sched_attrmask (insn);
+  int score = 0;
+
+  switch (s390_sched_state)
+    {
+    case 0:
+      /* Try to put insns into the first slot which would otherwise
+	 break a group.  */
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+	  || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+	score += 5;
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+	score += 10;
+    case 1:
+      /* Prefer not cracked insns while trying to put together a
+	 group.  */
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+	  && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
+	  && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
+	score += 10;
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
+	score += 5;
+      break;
+    case 2:
+      /* Prefer not cracked insns while trying to put together a
+	 group.  */
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+	  && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
+	  && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
+	score += 10;
+      /* Prefer endgroup insns in the last slot.  */
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
+	score += 10;
+      break;
+    case S390_OOO_SCHED_STATE_NORMAL:
+      /* Prefer not cracked insns if the last was not cracked.  */
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+	  && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
+	score += 5;
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+	score += 10;
+      break;
+    case S390_OOO_SCHED_STATE_CRACKED:
+      /* Try to keep cracked insns together to prevent them from
+	 interrupting groups.  */
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+	  || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+	score += 5;
+      break;
+    }
+  return score;
+}
+
+/* This function is called via hook TARGET_SCHED_REORDER before
+   issuing one insn from list READY which contains *NREADYP entries.
+   For target z10 it reorders load instructions to avoid early load
+   conflicts in the floating point pipeline  */
+static int
+s390_sched_reorder (FILE *file, int verbose,
+		    rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
+{
+  if (s390_tune == PROCESSOR_2097_Z10)
+    if (reload_completed && *nreadyp > 1)
+      s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
+
+  if (s390_tune == PROCESSOR_2827_ZEC12
+      && reload_completed
+      && *nreadyp > 1)
+    {
+      int i;
+      int last_index = *nreadyp - 1;
+      int max_index = -1;
+      int max_score = -1;
+      rtx tmp;
+
+      /* Just move the insn with the highest score to the top (the
+	 end) of the list.  A full sort is not needed since a conflict
+	 in the hazard recognition cannot happen.  So the top insn in
+	 the ready list will always be taken.  */
+      for (i = last_index; i >= 0; i--)
+	{
+	  int score;
+
+	  if (recog_memoized (ready[i]) < 0)
+	    continue;
+
+	  score = s390_sched_score (ready[i]);
+	  if (score > max_score)
+	    {
+	      max_score = score;
+	      max_index = i;
+	    }
+	}
+
+      if (max_index != -1)
+	{
+	  if (max_index != last_index)
+	    {
+	      tmp = ready[max_index];
+	      ready[max_index] = ready[last_index];
+	      ready[last_index] = tmp;
+
+	      if (verbose > 5)
+		fprintf (file,
+			 "move insn %d to the top of list\n",
+			 INSN_UID (ready[last_index]));
+	    }
+	  else if (verbose > 5)
+	    fprintf (file,
+		     "best insn %d already on top\n",
+		     INSN_UID (ready[last_index]));
+	}
+
+      if (verbose > 5)
+	{
+	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
+		   s390_sched_state);
+
+	  for (i = last_index; i >= 0; i--)
+	    {
+	      if (recog_memoized (ready[i]) < 0)
+		continue;
+	      fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
+		       s390_sched_score (ready[i]));
+#define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
+	      PRINT_OOO_ATTR (ooo_cracked);
+	      PRINT_OOO_ATTR (ooo_expanded);
+	      PRINT_OOO_ATTR (ooo_endgroup);
+	      PRINT_OOO_ATTR (ooo_groupalone);
+#undef PRINT_OOO_ATTR
+	      fprintf (file, "\n");
+	    }
+	}
+    }
+
+  return s390_issue_rate ();
+}
+
+
+/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
+   the scheduler has issued INSN.  It stores the last issued insn into
+   last_scheduled_insn in order to make it available for
+   s390_sched_reorder.  */
+static int
+s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
+{
+  last_scheduled_insn = insn;
+
+  if (s390_tune == PROCESSOR_2827_ZEC12
+      && reload_completed
+      && recog_memoized (insn) >= 0)
+    {
+      unsigned int mask = s390_get_sched_attrmask (insn);
+
+      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+	  || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+	s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
+      else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
+	       || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+	s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
+      else
+	{
+	  /* Only normal insns are left (mask == 0).  */
+	  switch (s390_sched_state)
+	    {
+	    case 0:
+	    case 1:
+	    case 2:
+	    case S390_OOO_SCHED_STATE_NORMAL:
+	      if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
+		s390_sched_state = 1;
+	      else
+		s390_sched_state++;
+
+	      break;
+	    case S390_OOO_SCHED_STATE_CRACKED:
+	      s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
+	      break;
+	    }
+	}
+      if (verbose > 5)
+	{
+	  fprintf (file, "insn %d: ", INSN_UID (insn));
+#define PRINT_OOO_ATTR(ATTR)						\
+	  fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
+	  PRINT_OOO_ATTR (ooo_cracked);
+	  PRINT_OOO_ATTR (ooo_expanded);
+	  PRINT_OOO_ATTR (ooo_endgroup);
+	  PRINT_OOO_ATTR (ooo_groupalone);
+#undef PRINT_OOO_ATTR
+	  fprintf (file, "\n");
+	  fprintf (file, "sched state: %d\n", s390_sched_state);
+	}
+    }
+
+  if (GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    return more - 1;
+  else
+    return more;
+}
+
+static void
+s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
+		 int verbose ATTRIBUTE_UNUSED,
+		 int max_ready ATTRIBUTE_UNUSED)
+{
+  last_scheduled_insn = NULL_RTX;
+  s390_sched_state = 0;
+}
+
+/* This function checks the whole of insn X for memory references. The
+   function always returns zero because the framework it is called
+   from would stop recursively analyzing the insn upon a return value
+   other than zero. The real result of this function is updating
+   counter variable MEM_COUNT.  */
+static int
+check_dpu (rtx *x, unsigned *mem_count)
+{
+  if (*x != NULL_RTX && MEM_P (*x))
+    (*mem_count)++;
+  return 0;
+}
+
+/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
+   a new number struct loop *loop should be unrolled if tuned for cpus with
+   a built-in stride prefetcher.
+   The loop is analyzed for memory accesses by calling check_dpu for
+   each rtx of the loop. Depending on the loop_depth and the amount of
+   memory accesses a new number <=nunroll is returned to improve the
+   behaviour of the hardware prefetch unit.  */
+static unsigned
+s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+  basic_block *bbs;
+  rtx insn;
+  unsigned i;
+  unsigned mem_count = 0;
+
+  if (s390_tune != PROCESSOR_2097_Z10
+      && s390_tune != PROCESSOR_2817_Z196
+      && s390_tune != PROCESSOR_2827_ZEC12)
+    return nunroll;
+
+  /* Count the number of memory references within the loop body.  */
+  bbs = get_loop_body (loop);
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
+	if (INSN_P (insn) && INSN_CODE (insn) != -1)
+            for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
+    }
+  free (bbs);
+
+  /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
+  if (mem_count == 0)
+    return nunroll;
+
+  switch (loop_depth(loop))
+    {
+    case 1:
+      return MIN (nunroll, 28 / mem_count);
+    case 2:
+      return MIN (nunroll, 22 / mem_count);
+    default:
+      return MIN (nunroll, 16 / mem_count);
+    }
+}
+
+static void
+s390_option_override (void)
+{
+  unsigned int i;
+  cl_deferred_option *opt;
+  vec<cl_deferred_option> *v =
+    (vec<cl_deferred_option> *) s390_deferred_options;
+
+  if (v)
+    FOR_EACH_VEC_ELT (*v, i, opt)
+      {
+	switch (opt->opt_index)
+	  {
+	  case OPT_mhotpatch:
+	    s390_hotpatch_trampoline_halfwords = (opt->value) ?
+	      s390_hotpatch_trampoline_halfwords_default : -1;
+	    break;
+	  case OPT_mhotpatch_:
+	    {
+	      int val;
+
+	      val = integral_argument (opt->arg);
+	      if (val == -1)
+		{
+		  /* argument is not a plain number */
+		  error ("argument to %qs should be a non-negative integer",
+			 "-mhotpatch=");
+		  break;
+		}
+	      else if (val > s390_hotpatch_trampoline_halfwords_max)
+		{
+		  error ("argument to %qs is too large (max. %d)",
+			 "-mhotpatch=", s390_hotpatch_trampoline_halfwords_max);
+		  break;
+		}
+	      s390_hotpatch_trampoline_halfwords = val;
+	      break;
+	    }
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+  /* Set up function hooks.  */
+  init_machine_status = s390_init_machine_status;
+
+  /* Architecture mode defaults according to ABI.  */
+  if (!(target_flags_explicit & MASK_ZARCH))
+    {
+      if (TARGET_64BIT)
+	target_flags |= MASK_ZARCH;
+      else
+	target_flags &= ~MASK_ZARCH;
+    }
+
+  /* Set the march default in case it hasn't been specified on
+     cmdline.  */
+  if (s390_arch == PROCESSOR_max)
+    {
+      s390_arch_string = TARGET_ZARCH? "z900" : "g5";
+      s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
+      s390_arch_flags = processor_flags_table[(int)s390_arch];
+    }
+
+  /* Determine processor to tune for.  */
+  if (s390_tune == PROCESSOR_max)
+    {
+      s390_tune = s390_arch;
+      s390_tune_flags = s390_arch_flags;
+    }
+
+  /* Sanity checks.  */
+  if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
+    error ("z/Architecture mode not supported on %s", s390_arch_string);
+  if (TARGET_64BIT && !TARGET_ZARCH)
+    error ("64-bit ABI not supported in ESA/390 mode");
+
+  /* Use hardware DFP if available and not explicitly disabled by
+     user. E.g. with -m31 -march=z10 -mzarch   */
+  if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
+    target_flags |= MASK_HARD_DFP;
+
+  /* Enable hardware transactions if available and not explicitly
+     disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
+  if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
+    target_flags |= MASK_OPT_HTM;
+
+  if (TARGET_HARD_DFP && !TARGET_DFP)
+    {
+      if (target_flags_explicit & MASK_HARD_DFP)
+	{
+	  if (!TARGET_CPU_DFP)
+	    error ("hardware decimal floating point instructions"
+		   " not available on %s", s390_arch_string);
+	  if (!TARGET_ZARCH)
+	    error ("hardware decimal floating point instructions"
+		   " not available in ESA/390 mode");
+	}
+      else
+	target_flags &= ~MASK_HARD_DFP;
+    }
+
+  if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
+    {
+      if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
+	error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
+
+      target_flags &= ~MASK_HARD_DFP;
+    }
+
+  /* Set processor cost function.  */
+  switch (s390_tune)
+    {
+    case PROCESSOR_2084_Z990:
+      s390_cost = &z990_cost;
+      break;
+    case PROCESSOR_2094_Z9_109:
+      s390_cost = &z9_109_cost;
+      break;
+    case PROCESSOR_2097_Z10:
+      s390_cost = &z10_cost;
+      break;
+    case PROCESSOR_2817_Z196:
+      s390_cost = &z196_cost;
+      break;
+    case PROCESSOR_2827_ZEC12:
+      s390_cost = &zEC12_cost;
+      break;
+    default:
+      s390_cost = &z900_cost;
+    }
+
+  if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
+    error ("-mbackchain -mpacked-stack -mhard-float are not supported "
+	   "in combination");
+
+  if (s390_stack_size)
+    {
+      if (s390_stack_guard >= s390_stack_size)
+	error ("stack size must be greater than the stack guard value");
+      else if (s390_stack_size > 1 << 16)
+	error ("stack size must not be greater than 64k");
+    }
+  else if (s390_stack_guard)
+    error ("-mstack-guard implies use of -mstack-size");
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+
+  if (s390_tune == PROCESSOR_2097_Z10
+      || s390_tune == PROCESSOR_2817_Z196
+      || s390_tune == PROCESSOR_2827_ZEC12)
+    {
+      maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+    }
+
+  maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  /* values for loop prefetching */
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  /* s390 has more than 2 levels and the size is much larger.  Since
+     we are always running virtualized assume that we only get a small
+     part of the caches above l1.  */
+  maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+
+  /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
+     requires the arch flags to be evaluated already.  Since prefetching
+     is beneficial on s390, we enable it if available.  */
+  if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
+    flag_prefetch_loop_arrays = 1;
+
+  /* Use the alternative scheduling-pressure algorithm by default.  */
+  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
+                         global_options.x_param_values,
+                         global_options_set.x_param_values);
+
+  if (TARGET_TPF)
+    {
+      /* Don't emit DWARF3/4 unless specifically selected.  The TPF
+	 debuggers do not yet support DWARF 3/4.  */
+      if (!global_options_set.x_dwarf_strict) 
+	dwarf_strict = 1;
+      if (!global_options_set.x_dwarf_version)
+	dwarf_version = 2;
+    }
+
+  /* Register a target-specific optimization-and-lowering pass
+     to run immediately before prologue and epilogue generation.
+
+     Registering the pass must be done at start up.  It's
+     convenient to do it here.  */
+  opt_pass *new_pass = new pass_s390_early_mach (g);
+  struct register_pass_info insert_pass_s390_early_mach =
+    {
+      new_pass,			/* pass */
+      "pro_and_epilogue",	/* reference_pass_name */
+      1,			/* ref_pass_instance_number */
+      PASS_POS_INSERT_BEFORE	/* po_op */
+    };
+  register_pass (&insert_pass_s390_early_mach);
+}
+
+/* Initialize GCC target structure.  */
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef  TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER s390_assemble_integer
+
+#undef  TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+
+#undef  TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE s390_option_override
+
+#undef	TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY s390_return_in_memory
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS s390_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN s390_expand_builtin
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef  TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE s390_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER s390_sched_reorder
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT s390_sched_init
+
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS s390_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST s390_address_cost
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST s390_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG s390_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE s390_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE s390_libcall_value
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
+
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
+#endif
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE s390_mangle_type
+#endif
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD s390_secondary_reload
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
+
+#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
+#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P s390_lra_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE s390_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
+
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT s390_trampoline_init
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
+
+#undef TARGET_HARD_REGNO_SCRATCH_OK
+#define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE s390_attribute_table
+
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P s390_can_inline_p
+
+#undef TARGET_SET_UP_BY_PROLOGUE
+#define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-s390.h"
diff --git a/gcc-4.9/gcc/config/s390/s390.h b/gcc-4.9/gcc/config/s390/s390.h
new file mode 100644
index 000000000..2f2139e91
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390.h
@@ -0,0 +1,931 @@
+/* Definitions of target machine for GNU compiler, for IBM S/390
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _S390_H
+#define _S390_H
+
+/* Optional architectural facilities supported by the processor.  */
+
+enum processor_flags
+{
+  PF_IEEE_FLOAT = 1,
+  PF_ZARCH = 2,
+  PF_LONG_DISPLACEMENT = 4,
+  PF_EXTIMM = 8,
+  PF_DFP = 16,
+  PF_Z10 = 32,
+  PF_Z196 = 64,
+  PF_ZEC12 = 128,
+  PF_TX = 256
+};
+
+/* This is necessary to avoid a warning about comparing different enum
+   types.  */
+#define s390_tune_attr ((enum attr_cpu)s390_tune)
+
+/* These flags indicate that the generated code should run on a cpu
+   providing the respective hardware facility regardless of the
+   current cpu mode (ESA or z/Architecture).  */
+
+#define TARGET_CPU_IEEE_FLOAT \
+	(s390_arch_flags & PF_IEEE_FLOAT)
+#define TARGET_CPU_ZARCH \
+	(s390_arch_flags & PF_ZARCH)
+#define TARGET_CPU_LONG_DISPLACEMENT \
+	(s390_arch_flags & PF_LONG_DISPLACEMENT)
+#define TARGET_CPU_EXTIMM \
+ 	(s390_arch_flags & PF_EXTIMM)
+#define TARGET_CPU_DFP \
+ 	(s390_arch_flags & PF_DFP)
+#define TARGET_CPU_Z10 \
+ 	(s390_arch_flags & PF_Z10)
+#define TARGET_CPU_Z196 \
+ 	(s390_arch_flags & PF_Z196)
+#define TARGET_CPU_ZEC12 \
+ 	(s390_arch_flags & PF_ZEC12)
+#define TARGET_CPU_HTM \
+ 	(s390_arch_flags & PF_TX)
+
+/* These flags indicate that the generated code should run on a cpu
+   providing the respective hardware facility when run in
+   z/Architecture mode.  */
+
+#define TARGET_LONG_DISPLACEMENT \
+       (TARGET_ZARCH && TARGET_CPU_LONG_DISPLACEMENT)
+#define TARGET_EXTIMM \
+       (TARGET_ZARCH && TARGET_CPU_EXTIMM)
+#define TARGET_DFP \
+       (TARGET_ZARCH && TARGET_CPU_DFP && TARGET_HARD_FLOAT)
+#define TARGET_Z10 \
+       (TARGET_ZARCH && TARGET_CPU_Z10)
+#define TARGET_Z196 \
+       (TARGET_ZARCH && TARGET_CPU_Z196)
+#define TARGET_ZEC12 \
+       (TARGET_ZARCH && TARGET_CPU_ZEC12)
+#define TARGET_HTM (TARGET_OPT_HTM)
+
+
+#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
+
+/* Run-time target specification.  */
+
+/* Defaults for option flags defined only on some subtargets.  */
+#ifndef TARGET_TPF_PROFILING
+#define TARGET_TPF_PROFILING 0
+#endif
+
+/* This will be overridden by OS headers.  */
+#define TARGET_TPF 0
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_assert ("cpu=s390");					\
+      builtin_assert ("machine=s390");					\
+      builtin_define ("__s390__");					\
+      if (TARGET_ZARCH)							\
+	builtin_define ("__zarch__");					\
+      if (TARGET_64BIT)							\
+        builtin_define ("__s390x__");					\
+      if (TARGET_LONG_DOUBLE_128)					\
+        builtin_define ("__LONG_DOUBLE_128__");				\
+      if (TARGET_HTM)							\
+	builtin_define ("__HTM__");					\
+    }									\
+  while (0)
+
+#ifdef DEFAULT_TARGET_64BIT
+#define TARGET_DEFAULT             (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM)
+#else
+#define TARGET_DEFAULT             0
+#endif
+
+/* Support for configure-time defaults.  */
+#define OPTION_DEFAULT_SPECS 					\
+  { "mode", "%{!mesa:%{!mzarch:-m%(VALUE)}}" },			\
+  { "arch", "%{!march=*:-march=%(VALUE)}" },			\
+  { "tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+/* Defaulting rules.  */
+#ifdef DEFAULT_TARGET_64BIT
+#define DRIVER_SELF_SPECS					\
+  "%{!m31:%{!m64:-m64}}",					\
+  "%{!mesa:%{!mzarch:%{m31:-mesa}%{m64:-mzarch}}}",		\
+  "%{!march=*:%{mesa:-march=g5}%{mzarch:-march=z900}}"
+#else
+#define DRIVER_SELF_SPECS					\
+  "%{!m31:%{!m64:-m31}}",					\
+  "%{!mesa:%{!mzarch:%{m31:-mesa}%{m64:-mzarch}}}",		\
+  "%{!march=*:%{mesa:-march=g5}%{mzarch:-march=z900}}"
+#endif
+
+/* Constants needed to control the TEST DATA CLASS (TDC) instruction.  */
+#define S390_TDC_POSITIVE_ZERO                     (1 << 11)
+#define S390_TDC_NEGATIVE_ZERO                     (1 << 10)
+#define S390_TDC_POSITIVE_NORMALIZED_BFP_NUMBER    (1 << 9)
+#define S390_TDC_NEGATIVE_NORMALIZED_BFP_NUMBER    (1 << 8)
+#define S390_TDC_POSITIVE_DENORMALIZED_BFP_NUMBER  (1 << 7)
+#define S390_TDC_NEGATIVE_DENORMALIZED_BFP_NUMBER  (1 << 6)
+#define S390_TDC_POSITIVE_INFINITY                 (1 << 5)
+#define S390_TDC_NEGATIVE_INFINITY                 (1 << 4)
+#define S390_TDC_POSITIVE_QUIET_NAN                (1 << 3)
+#define S390_TDC_NEGATIVE_QUIET_NAN                (1 << 2)
+#define S390_TDC_POSITIVE_SIGNALING_NAN            (1 << 1)
+#define S390_TDC_NEGATIVE_SIGNALING_NAN            (1 << 0)
+
+/* The following values are different for DFP.  */
+#define S390_TDC_POSITIVE_DENORMALIZED_DFP_NUMBER (1 << 9)
+#define S390_TDC_NEGATIVE_DENORMALIZED_DFP_NUMBER (1 << 8)
+#define S390_TDC_POSITIVE_NORMALIZED_DFP_NUMBER   (1 << 7)
+#define S390_TDC_NEGATIVE_NORMALIZED_DFP_NUMBER   (1 << 6)
+
+/* For signbit, the BFP-DFP-difference makes no difference. */
+#define S390_TDC_SIGNBIT_SET (S390_TDC_NEGATIVE_ZERO \
+                          | S390_TDC_NEGATIVE_NORMALIZED_BFP_NUMBER \
+                          | S390_TDC_NEGATIVE_DENORMALIZED_BFP_NUMBER\
+                          | S390_TDC_NEGATIVE_INFINITY \
+                          | S390_TDC_NEGATIVE_QUIET_NAN \
+			  | S390_TDC_NEGATIVE_SIGNALING_NAN )
+
+#define S390_TDC_INFINITY (S390_TDC_POSITIVE_INFINITY \
+			  | S390_TDC_NEGATIVE_INFINITY )
+
+/* This is used by float.h to define the float_t and double_t data
+   types.  For historical reasons both are double on s390 what cannot
+   be changed anymore.  */
+#define TARGET_FLT_EVAL_METHOD 1
+
+/* Target machine storage layout.  */
+
+/* Everything is big-endian.  */
+#define BITS_BIG_ENDIAN 1
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+#define STACK_SIZE_MODE (Pmode)
+
+#ifndef IN_LIBGCC2
+
+/* Width of a word, in units (bytes).  */
+  #define UNITS_PER_WORD (TARGET_ZARCH ? 8 : 4)
+
+/* Width of a pointer.  To be used instead of UNITS_PER_WORD in
+   ABI-relevant contexts.  This always matches
+   GET_MODE_SIZE (Pmode).  */
+  #define UNITS_PER_LONG (TARGET_64BIT ? 8 : 4)
+  #define MIN_UNITS_PER_WORD 4
+  #define MAX_BITS_PER_WORD 64
+#else
+
+  /* In libgcc, UNITS_PER_WORD has ABI-relevant effects, e.g. whether
+     the library should export TImode functions or not.  Thus, we have
+     to redefine UNITS_PER_WORD depending on __s390x__ for libgcc.  */
+  #ifdef __s390x__
+    #define UNITS_PER_WORD 8
+  #else
+    #define UNITS_PER_WORD 4
+  #endif
+#endif
+
+/* Width of a pointer, in bits.  */
+#define POINTER_SIZE (TARGET_64BIT ? 64 : 32)
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_64BIT ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 64
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 64
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT 64
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Alignment on even addresses for LARL instruction.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+
+/* Alignment is not required by the hardware.  */
+#define STRICT_ALIGNMENT 0
+
+/* Mode of stack savearea.
+   FUNCTION is VOIDmode because calling convention maintains SP.
+   BLOCK needs Pmode for SP.
+   NONLOCAL needs twice Pmode to maintain both backchain and SP.  */
+#define STACK_SAVEAREA_MODE(LEVEL)      \
+  (LEVEL == SAVE_FUNCTION ? VOIDmode    \
+  : LEVEL == SAVE_NONLOCAL ? (TARGET_64BIT ? OImode : TImode) : Pmode)
+
+
+/* Type layout.  */
+
+/* Sizes in bits of the source language data types.  */
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Work around target_flags dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* We use "unsigned char" as default.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+
+/* Register usage.  */
+
+/* We have 16 general purpose registers (registers 0-15),
+   and 16 floating point registers (registers 16-31).
+   (On non-IEEE machines, we have only 4 fp registers.)
+
+   Amongst the general purpose registers, some are used
+   for specific purposes:
+   GPR 11: Hard frame pointer (if needed)
+   GPR 12: Global offset table pointer (if needed)
+   GPR 13: Literal pool base register
+   GPR 14: Return address register
+   GPR 15: Stack pointer
+
+   Registers 32-35 are 'fake' hard registers that do not
+   correspond to actual hardware:
+   Reg 32: Argument pointer
+   Reg 33: Condition code
+   Reg 34: Frame pointer
+   Reg 35: Return address pointer
+
+   Registers 36 and 37 are mapped to access registers
+   0 and 1, used to implement thread-local storage.  */
+
+#define FIRST_PSEUDO_REGISTER 38
+
+/* Standard register usage.  */
+#define GENERAL_REGNO_P(N)	((int)(N) >= 0 && (N) < 16)
+#define ADDR_REGNO_P(N)		((N) >= 1 && (N) < 16)
+#define FP_REGNO_P(N)		((N) >= 16 && (N) < 32)
+#define CC_REGNO_P(N)		((N) == 33)
+#define FRAME_REGNO_P(N)	((N) == 32 || (N) == 34 || (N) == 35)
+#define ACCESS_REGNO_P(N)	((N) == 36 || (N) == 37)
+
+#define GENERAL_REG_P(X)	(REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+#define ADDR_REG_P(X)		(REG_P (X) && ADDR_REGNO_P (REGNO (X)))
+#define FP_REG_P(X)		(REG_P (X) && FP_REGNO_P (REGNO (X)))
+#define CC_REG_P(X)		(REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define FRAME_REG_P(X)		(REG_P (X) && FRAME_REGNO_P (REGNO (X)))
+#define ACCESS_REG_P(X)		(REG_P (X) && ACCESS_REGNO_P (REGNO (X)))
+
+/* Set up fixed registers and calling convention:
+
+   GPRs 0-5 are always call-clobbered,
+   GPRs 6-15 are always call-saved.
+   GPR 12 is fixed if used as GOT pointer.
+   GPR 13 is always fixed (as literal pool pointer).
+   GPR 14 is always fixed on S/390 machines (as return address).
+   GPR 15 is always fixed (as stack pointer).
+   The 'fake' hard registers are call-clobbered and fixed.
+   The access registers are call-saved and fixed.
+
+   On 31-bit, FPRs 18-19 are call-clobbered;
+   on 64-bit, FPRs 24-31 are call-clobbered.
+   The remaining FPRs are call-saved.  */
+
+#define FIXED_REGISTERS				\
+{ 0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 1, 1, 1,					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  1, 1, 1, 1,					\
+  1, 1 }
+
+#define CALL_USED_REGISTERS			\
+{ 1, 1, 1, 1, 					\
+  1, 1, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 1, 1, 1,					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1,					\
+  1, 1 }
+
+#define CALL_REALLY_USED_REGISTERS		\
+{ 1, 1, 1, 1, 					\
+  1, 1, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0,					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1,					\
+  0, 0 }
+
+/* Preferred register allocation order.  */
+#define REG_ALLOC_ORDER                                         \
+{  1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13,            \
+   16, 17, 18, 19, 20, 21, 22, 23,                              \
+   24, 25, 26, 27, 28, 29, 30, 31,                              \
+   15, 32, 33, 34, 35, 36, 37 }
+
+
+/* Fitting values into registers.  */
+
+/* Integer modes <= word size fit into any GPR.
+   Integer modes > word size fit into successive GPRs, starting with
+   an even-numbered register.
+   SImode and DImode fit into FPRs as well.
+
+   Floating point modes <= word size fit into any FPR or GPR.
+   Floating point modes > word size (i.e. DFmode on 32-bit) fit
+   into any FPR, or an even-odd GPR pair.
+   TFmode fits only into an even-odd FPR pair.
+
+   Complex floating point modes fit either into two FPRs, or into
+   successive GPRs (again starting with an even number).
+   TCmode fits only into two successive even-odd FPR pairs.
+
+   Condition code modes fit only into the CC register.  */
+
+/* Because all registers in a class have the same size HARD_REGNO_NREGS
+   is equivalent to CLASS_MAX_NREGS.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)                           \
+  s390_class_max_nregs (REGNO_REG_CLASS (REGNO), (MODE))
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)         \
+  s390_hard_regno_mode_ok ((REGNO), (MODE))
+
+#define HARD_REGNO_RENAME_OK(FROM, TO)          \
+  s390_hard_regno_rename_ok (FROM, TO)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)		\
+   (((MODE1) == SFmode || (MODE1) == DFmode)	\
+   == ((MODE2) == SFmode || (MODE2) == DFmode))
+
+/* When generating code that runs in z/Architecture mode,
+   but conforms to the 31-bit ABI, GPRs can hold 8 bytes;
+   the ABI guarantees only that the lower 4 bytes are
+   saved across calls, however.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)		\
+  (!TARGET_64BIT && TARGET_ZARCH				\
+   && GET_MODE_SIZE (MODE) > 4					\
+   && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32))
+
+/* Maximum number of registers to represent a value of mode MODE
+   in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)   					\
+  s390_class_max_nregs ((CLASS), (MODE))
+
+/* If a 4-byte value is loaded into a FPR, it is placed into the
+   *upper* half of the register, not the lower.  Therefore, we
+   cannot use SUBREGs to switch between modes in FP registers.
+   Likewise for access registers, since they have only half the
+   word size on 64-bit.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			        \
+   ? ((reg_classes_intersect_p (FP_REGS, CLASS)				\
+       && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8))		\
+      || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
+
+/* Register classes.  */
+
+/* We use the following register classes:
+   GENERAL_REGS     All general purpose registers
+   ADDR_REGS        All general purpose registers except %r0
+                    (These registers can be used in address generation)
+   FP_REGS          All floating point registers
+   CC_REGS          The condition code register
+   ACCESS_REGS      The access registers
+
+   GENERAL_FP_REGS  Union of GENERAL_REGS and FP_REGS
+   ADDR_FP_REGS     Union of ADDR_REGS and FP_REGS
+   GENERAL_CC_REGS  Union of GENERAL_REGS and CC_REGS
+   ADDR_CC_REGS     Union of ADDR_REGS and CC_REGS
+
+   NO_REGS          No registers
+   ALL_REGS         All registers
+
+   Note that the 'fake' frame pointer and argument pointer registers
+   are included amongst the address registers here.  */
+
+enum reg_class
+{
+  NO_REGS, CC_REGS, ADDR_REGS, GENERAL_REGS, ACCESS_REGS,
+  ADDR_CC_REGS, GENERAL_CC_REGS,
+  FP_REGS, ADDR_FP_REGS, GENERAL_FP_REGS,
+  ALL_REGS, LIM_REG_CLASSES
+};
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES							\
+{ "NO_REGS", "CC_REGS", "ADDR_REGS", "GENERAL_REGS", "ACCESS_REGS",	\
+  "ADDR_CC_REGS", "GENERAL_CC_REGS",					\
+  "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", "ALL_REGS" }
+
+/* Class -> register mapping.  */
+#define REG_CLASS_CONTENTS \
+{				       			\
+  { 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x00000000, 0x00000002 },	/* CC_REGS */		\
+  { 0x0000fffe, 0x0000000d },	/* ADDR_REGS */		\
+  { 0x0000ffff, 0x0000000d },	/* GENERAL_REGS */	\
+  { 0x00000000, 0x00000030 },	/* ACCESS_REGS */	\
+  { 0x0000fffe, 0x0000000f },	/* ADDR_CC_REGS */	\
+  { 0x0000ffff, 0x0000000f },	/* GENERAL_CC_REGS */	\
+  { 0xffff0000, 0x00000000 },	/* FP_REGS */		\
+  { 0xfffffffe, 0x0000000d },	/* ADDR_FP_REGS */	\
+  { 0xffffffff, 0x0000000d },	/* GENERAL_FP_REGS */	\
+  { 0xffffffff, 0x0000003f },	/* ALL_REGS */		\
+}
+
+/* In some case register allocation order is not enough for IRA to
+   generate a good code.  The following macro (if defined) increases
+   cost of REGNO for a pseudo approximately by pseudo usage frequency
+   multiplied by the macro value.
+
+   We avoid usage of BASE_REGNUM by nonzero macro value because the
+   reload can decide not to use the hard register because some
+   constant was forced to be in memory.  */
+#define IRA_HARD_REGNO_ADD_COST_MULTIPLIER(regno)	\
+  (regno == BASE_REGNUM ? 0.0 : 0.5)
+
+/* Register -> class mapping.  */
+extern const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO])
+
+/* ADDR_REGS can be used as base or index register.  */
+#define INDEX_REG_CLASS ADDR_REGS
+#define BASE_REG_CLASS ADDR_REGS
+
+/* Check whether REGNO is a hard register of the suitable class
+   or a pseudo register currently allocated to one such.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO)					\
+    (((REGNO) < FIRST_PSEUDO_REGISTER 					\
+      && REGNO_REG_CLASS ((REGNO)) == ADDR_REGS) 			\
+     || ADDR_REGNO_P (reg_renumber[REGNO]))
+#define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO)
+
+
+/* We need secondary memory to move data between GPRs and FPRs.  With
+   DFP the ldgr lgdr instructions are available.  But these
+   instructions do not handle GPR pairs so it is not possible for 31
+   bit.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+ ((CLASS1) != (CLASS2)                                \
+  && ((CLASS1) == FP_REGS || (CLASS2) == FP_REGS)     \
+  && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8))
+
+/* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
+   because the movsi and movsf patterns don't handle r/f moves.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+ (GET_MODE_BITSIZE (MODE) < 32				\
+  ? mode_for_size (32, GET_MODE_CLASS (MODE), 0)	\
+  : MODE)
+
+
+/* Stack layout and calling conventions.  */
+
+/* Our stack grows from higher to lower addresses.  However, local variables
+   are accessed by positive offsets, and function arguments are stored at
+   increasing addresses.  */
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+/* #undef ARGS_GROW_DOWNWARD */
+
+/* The basic stack layout looks like this: the stack pointer points
+   to the register save area for called functions.  Above that area
+   is the location to place outgoing arguments.  Above those follow
+   dynamic allocations (alloca), and finally the local variables.  */
+
+/* Offset from stack-pointer to first location of outgoing args.  */
+#define STACK_POINTER_OFFSET (TARGET_64BIT ? 160 : 96)
+
+/* Offset within stack frame to start allocating local variables at.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to an item dynamically
+   allocated on the stack, e.g., by `alloca'.  */
+#define STACK_DYNAMIC_OFFSET(FUNDECL) \
+  (STACK_POINTER_OFFSET + crtl->outgoing_args_size)
+
+/* Offset of first parameter from the argument pointer register value.
+   We have a fake argument pointer register that points directly to
+   the argument area.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Defining this macro makes __builtin_frame_address(0) and
+   __builtin_return_address(0) work with -fomit-frame-pointer.  */
+#define INITIAL_FRAME_ADDRESS_RTX                                             \
+  (plus_constant (Pmode, arg_pointer_rtx, -STACK_POINTER_OFFSET))
+
+/* The return address of the current frame is retrieved
+   from the initial value of register RETURN_REGNUM.
+   For frames farther back, we use the stack slot where
+   the corresponding RETURN_REGNUM register was saved.  */
+#define DYNAMIC_CHAIN_ADDRESS(FRAME)                                          \
+  (TARGET_PACKED_STACK ?                                                      \
+   plus_constant (Pmode, (FRAME),					      \
+		  STACK_POINTER_OFFSET - UNITS_PER_LONG) : (FRAME))
+
+/* For -mpacked-stack this adds 160 - 8 (96 - 4) to the output of
+   builtin_frame_address.  Otherwise arg pointer -
+   STACK_POINTER_OFFSET would be returned for
+   __builtin_frame_address(0) what might result in an address pointing
+   somewhere into the middle of the local variables since the packed
+   stack layout generally does not need all the bytes in the register
+   save area.  */
+#define FRAME_ADDR_RTX(FRAME)			\
+  DYNAMIC_CHAIN_ADDRESS ((FRAME))
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)					      \
+  s390_return_addr_rtx ((COUNT), DYNAMIC_CHAIN_ADDRESS ((FRAME)))
+
+/* In 31-bit mode, we need to mask off the high bit of return addresses.  */
+#define MASK_RETURN_ADDR (TARGET_64BIT ? constm1_rtx : GEN_INT (0x7fffffff))
+
+
+/* Exception handling.  */
+
+/* Describe calling conventions for DWARF-2 exception handling.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, RETURN_REGNUM)
+#define INCOMING_FRAME_SP_OFFSET STACK_POINTER_OFFSET
+#define DWARF_FRAME_RETURN_COLUMN  14
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 6 : INVALID_REGNUM)
+#define EH_RETURN_HANDLER_RTX gen_rtx_MEM (Pmode, return_address_pointer_rtx)
+
+/* Select a format to encode pointers in exception handling data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			    \
+  (flag_pic								    \
+    ? ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \
+   : DW_EH_PE_absptr)
+
+/* Register save slot alignment.  */
+#define DWARF_CIE_DATA_ALIGNMENT (-UNITS_PER_LONG)
+
+/* Let the assembler generate debug line info.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+
+/* Frame registers.  */
+
+#define STACK_POINTER_REGNUM 15
+#define FRAME_POINTER_REGNUM 34
+#define HARD_FRAME_POINTER_REGNUM 11
+#define ARG_POINTER_REGNUM 32
+#define RETURN_ADDRESS_POINTER_REGNUM 35
+
+/* The static chain must be call-clobbered, but not used for
+   function argument passing.  As register 1 is clobbered by
+   the trampoline code, we only have one option.  */
+#define STATIC_CHAIN_REGNUM 0
+
+/* Number of hardware registers that go into the DWARF-2 unwind info.
+   To avoid ABI incompatibility, this number must not change even as
+   'fake' hard registers are added or removed.  */
+#define DWARF_FRAME_REGISTERS 34
+
+
+/* Frame pointer and argument pointer elimination.  */
+
+#define ELIMINABLE_REGS						\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },		\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM },	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },	\
+ { BASE_REGNUM, BASE_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = s390_initial_elimination_offset ((FROM), (TO))
+
+
+/* Stack arguments.  */
+
+/* We need current_function_outgoing_args to be valid.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Register arguments.  */
+
+typedef struct s390_arg_structure
+{
+  int gprs;			/* gpr so far */
+  int fprs;			/* fpr so far */
+}
+CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, NN, N_NAMED_ARGS) \
+  ((CUM).gprs=0, (CUM).fprs=0)
+
+/* Arguments can be placed in general registers 2 to 6, or in floating
+   point registers 0 and 2 for 31 bit and fprs 0, 2, 4 and 6 for 64
+   bit.  */
+#define FUNCTION_ARG_REGNO_P(N) (((N) >=2 && (N) <7) || \
+  (N) == 16 || (N) == 17 || (TARGET_64BIT && ((N) == 18 || (N) == 19)))
+
+
+/* Only gpr 2 and fpr 0 are ever used as return registers.  */
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == 2 || (N) == 16)
+
+
+/* Function entry and exit.  */
+
+/* When returning from a function, the stack pointer does not matter.  */
+#define EXIT_IGNORE_STACK       1
+
+
+/* Profiling.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) 			\
+  s390_function_profiler ((FILE), ((LABELNO)))
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+
+/* Trampolines for nested functions.  */
+
+#define TRAMPOLINE_SIZE		(TARGET_64BIT ? 32 : 16)
+#define TRAMPOLINE_ALIGNMENT	BITS_PER_WORD
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) 0
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* This definition replaces the formerly used 'm' constraint with a
+   different constraint letter in order to avoid changing semantics of
+   the 'm' constraint when accepting new address formats in
+   TARGET_LEGITIMATE_ADDRESS_P.  The constraint letter defined here
+   must not be used in insn definitions or inline assemblies.  */
+#define TARGET_MEM_CONSTRAINT 'e'
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(AD, MODE, OPNUM, TYPE, IND, WIN)	\
+do {									\
+  rtx new_rtx = legitimize_reload_address (AD, MODE, OPNUM, (int)(TYPE));	\
+  if (new_rtx)								\
+    {									\
+      (AD) = new_rtx;							\
+      goto WIN;								\
+    }									\
+} while (0)
+
+/* Helper macro for s390.c and s390.md to check for symbolic constants.  */
+#define SYMBOLIC_CONST(X)       \
+(GET_CODE (X) == SYMBOL_REF                                             \
+ || GET_CODE (X) == LABEL_REF                                           \
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+#define TLS_SYMBOLIC_CONST(X)	\
+((GET_CODE (X) == SYMBOL_REF && tls_symbolic_operand (X))	\
+ || (GET_CODE (X) == CONST && tls_symbolic_reference_mentioned_p (X)))
+
+
+/* Condition codes.  */
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+#define SELECT_CC_MODE(OP, X, Y) s390_select_ccmode ((OP), (X), (Y))
+
+/* Relative costs of operations.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) s390_branch_cost
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  We allow pairs of registers.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_64BIT ? TImode : DImode)
+
+/* The maximum number of bytes that a single instruction can move quickly
+   between memory and registers or between two memory locations.  */
+#define MOVE_MAX (TARGET_ZARCH ? 16 : 8)
+#define MOVE_MAX_PIECES (TARGET_ZARCH ? 8 : 4)
+#define MAX_MOVE_MAX 16
+
+/* Determine whether to use move_by_pieces or block move insn.  */
+#define MOVE_BY_PIECES_P(SIZE, ALIGN)		\
+  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
+    || (TARGET_ZARCH && (SIZE) == 8) )
+
+/* Determine whether to use clear_by_pieces or block clear insn.  */
+#define CLEAR_BY_PIECES_P(SIZE, ALIGN)		\
+  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
+    || (TARGET_ZARCH && (SIZE) == 8) )
+
+/* This macro is used to determine whether store_by_pieces should be
+   called to "memcpy" storage when the source is a constant string.  */
+#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
+
+/* Likewise to decide whether to "memset" storage with byte values
+   other than zero.  */
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
+
+/* Don't perform CSE on function addresses.  */
+#define NO_FUNCTION_CSE
+
+/* This value is used in tree-sra to decide whether it might benefical
+   to split a struct move into several word-size moves.  For S/390
+   only small values make sense here since struct moves are relatively
+   cheap thanks to mvc so the small default value chosen for archs
+   with memmove patterns should be ok.  But this value is multiplied
+   in tree-sra with UNITS_PER_WORD to make a decision so we adjust it
+   here to compensate for that factor since mvc costs exactly the same
+   on 31 and 64 bit.  */
+#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4)
+
+
+/* Sections.  */
+
+/* Output before read-only data.  */
+#define TEXT_SECTION_ASM_OP ".text"
+
+/* Output before writable (initialized) data.  */
+#define DATA_SECTION_ASM_OP ".data"
+
+/* Output before writable (uninitialized) data.  */
+#define BSS_SECTION_ASM_OP ".bss"
+
+/* S/390 constant pool breaks the devices in crtstuff.c to control section
+   in where code resides.  We have to write it as asm code.  */
+#ifndef __s390x__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\
+	bras\t%r2,1f\n\
+0:	.long\t" USER_LABEL_PREFIX #FUNC " - 0b\n\
+1:	l\t%r3,0(%r2)\n\
+	bas\t%r14,0(%r3,%r2)\n\
+	.previous");
+#endif
+
+
+/* Position independent code.  */
+
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? 12 : INVALID_REGNUM)
+
+#define LEGITIMATE_PIC_OPERAND_P(X)  legitimate_pic_operand_p (X)
+
+
+/* Assembler file format.  */
+
+/* Character to start a comment.  */
+#define ASM_COMMENT_START "#"
+
+/* Declare an uninitialized external linkage data object.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".globl "
+
+/* Advance the location counter to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE, LOG) \
+  if ((LOG)) fprintf ((FILE), "\t.align\t%d\n", 1 << (LOG))
+
+/* Advance the location counter by SIZE bytes.  */
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+  fprintf ((FILE), "\t.set\t.,.+"HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* The LOCAL_LABEL_PREFIX variable is used by dbxelf.h.  */
+#define LOCAL_LABEL_PREFIX "."
+
+#define LABEL_ALIGN(LABEL) \
+  s390_label_align (LABEL)
+
+/* How to refer to registers in assembler output.  This sequence is
+   indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES							\
+{ "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",	\
+  "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",	\
+  "%f0",  "%f2",  "%f4",  "%f6",  "%f1",  "%f3",  "%f5",  "%f7",	\
+  "%f8",  "%f10", "%f12", "%f14", "%f9",  "%f11", "%f13", "%f15",	\
+  "%ap",  "%cc",  "%fp",  "%rp",  "%a0",  "%a1"				\
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.  */
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* Output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)				\
+do {									\
+  char buf[32];								\
+  fputs (integer_asm_op (UNITS_PER_LONG, TRUE), (FILE));		\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "L", (VALUE));			\
+  assemble_name ((FILE), buf);						\
+  fputc ('\n', (FILE));							\
+} while (0)
+
+/* Output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
+do {									\
+  char buf[32];								\
+  fputs (integer_asm_op (UNITS_PER_LONG, TRUE), (FILE));		\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "L", (VALUE));			\
+  assemble_name ((FILE), buf);						\
+  fputc ('-', (FILE));							\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "L", (REL));			\
+  assemble_name ((FILE), buf);						\
+  fputc ('\n', (FILE));							\
+} while (0)
+
+/* Mark the return register as used by the epilogue so that we can
+   use it in unadorned (return) and (simple_return) instructions.  */
+#define EPILOGUE_USES(REGNO) ((REGNO) == RETURN_REGNUM)
+
+#undef ASM_OUTPUT_FUNCTION_LABEL
+#define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
+  s390_asm_output_function_label (FILE, NAME, DECL)
+
+/* Miscellaneous parameters.  */
+
+/* Specify the machine mode that this machine uses for the index in the
+   tablejump instruction.  */
+#define CASE_VECTOR_MODE (TARGET_64BIT ? DImode : SImode)
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode ((enum machine_mode) (TARGET_64BIT ? DImode : SImode))
+
+/* This is -1 for "pointer mode" extend.  See ptr_extend in s390.md.  */
+#define POINTERS_EXTEND_UNSIGNED -1
+
+/* A function address in a call instruction is a byte address (for
+   indexing purposes) so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Specify the value which is used when clz operand is zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_ALIGN1	          (SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_ALIGN1_P(X)		\
+  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ALIGN1))
+#define SYMBOL_FLAG_NOT_NATURALLY_ALIGNED (SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_NOT_NATURALLY_ALIGNED_P(X) \
+  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_NOT_NATURALLY_ALIGNED))
+
+/* Check whether integer displacement is in range.  */
+#define DISP_IN_RANGE(d) \
+  (TARGET_LONG_DISPLACEMENT? ((d) >= -524288 && (d) <= 524287) \
+                           : ((d) >= 0 && (d) <= 4095))
+
+/* Reads can reuse write prefetches, used by tree-ssa-prefetch-loops.c.  */
+#define READ_CAN_USE_WRITE_PREFETCH 1
+
+extern const int processor_flags_table[];
+#endif
diff --git a/gcc-4.9/gcc/config/s390/s390.md b/gcc-4.9/gcc/config/s390/s390.md
new file mode 100644
index 000000000..7d9d1ad7e
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390.md
@@ -0,0 +1,10220 @@
+;;- Machine description for GNU compiler -- S/390 / zSeries version.
+;;  Copyright (C) 1999-2014 Free Software Foundation, Inc.
+;;  Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                 Ulrich Weigand (uweigand@de.ibm.com) and
+;;                 Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; See constraints.md for a description of constraints specific to s390.
+;;
+
+;; Special formats used for outputting 390 instructions.
+;;
+;;     %C: print opcode suffix for branch condition.
+;;     %D: print opcode suffix for inverse branch condition.
+;;     %J: print tls_load/tls_gdcall/tls_ldcall suffix
+;;     %G: print the size of the operand in bytes.
+;;     %O: print only the displacement of a memory reference.
+;;     %R: print only the base register of a memory reference.
+;;     %S: print S-type memory reference (base+displacement).
+;;     %N: print the second word of a DImode operand.
+;;     %M: print the second word of a TImode operand.
+;;     %Y: print shift count operand.
+;;
+;;     %b: print integer X as if it's an unsigned byte.
+;;     %c: print integer X as if it's an signed byte.
+;;     %x: print integer X as if it's an unsigned halfword.
+;;     %h: print integer X as if it's a signed halfword.
+;;     %i: print the first nonzero HImode part of X.
+;;     %j: print the first HImode part unequal to -1 of X.
+;;     %k: print the first nonzero SImode part of X.
+;;     %m: print the first SImode part unequal to -1 of X.
+;;     %o: print integer X as if it's an unsigned 32bit word.
+;;
+;; We have a special constraint for pattern matching.
+;;
+;;   s_operand -- Matches a valid S operand in a RS, SI or SS type instruction.
+;;
+
+;;
+;; UNSPEC usage
+;;
+
+(define_c_enum "unspec" [
+   ; Miscellaneous
+   UNSPEC_ROUND
+   UNSPEC_ICM
+   UNSPEC_TIE
+
+   ; Convert CC into a str comparison result and copy it into an
+   ; integer register
+   ; cc0->0, cc1->1, cc2->-1, (cc3->-1)
+   UNSPEC_STRCMPCC_TO_INT
+
+   ; Copy CC as is into the lower 2 bits of an integer register
+   UNSPEC_CC_TO_INT
+
+   ; GOT/PLT and lt-relative accesses
+   UNSPEC_LTREL_OFFSET
+   UNSPEC_LTREL_BASE
+   UNSPEC_POOL_OFFSET
+   UNSPEC_GOTENT
+   UNSPEC_GOT
+   UNSPEC_GOTOFF
+   UNSPEC_PLT
+   UNSPEC_PLTOFF
+
+   ; Literal pool
+   UNSPEC_RELOAD_BASE
+   UNSPEC_MAIN_BASE
+   UNSPEC_LTREF
+   UNSPEC_INSN
+   UNSPEC_EXECUTE
+
+   ; Atomic Support
+   UNSPEC_MB
+   UNSPEC_MOVA
+
+   ; TLS relocation specifiers
+   UNSPEC_TLSGD
+   UNSPEC_TLSLDM
+   UNSPEC_NTPOFF
+   UNSPEC_DTPOFF
+   UNSPEC_GOTNTPOFF
+   UNSPEC_INDNTPOFF
+
+   ; TLS support
+   UNSPEC_TLSLDM_NTPOFF
+   UNSPEC_TLS_LOAD
+
+   ; String Functions
+   UNSPEC_SRST
+   UNSPEC_MVST
+
+   ; Stack Smashing Protector
+   UNSPEC_SP_SET
+   UNSPEC_SP_TEST
+
+   ; Test Data Class (TDC)
+   UNSPEC_TDC_INSN
+
+   ; Population Count
+   UNSPEC_POPCNT
+   UNSPEC_COPYSIGN
+
+   ; Load FP Integer
+   UNSPEC_FPINT_FLOOR
+   UNSPEC_FPINT_BTRUNC
+   UNSPEC_FPINT_ROUND
+   UNSPEC_FPINT_CEIL
+   UNSPEC_FPINT_NEARBYINT
+   UNSPEC_FPINT_RINT
+ ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_c_enum "unspecv" [
+   ; Blockage
+   UNSPECV_BLOCKAGE
+
+   ; TPF Support
+   UNSPECV_TPF_PROLOGUE
+   UNSPECV_TPF_EPILOGUE
+
+   ; Literal pool
+   UNSPECV_POOL
+   UNSPECV_POOL_SECTION
+   UNSPECV_POOL_ALIGN
+   UNSPECV_POOL_ENTRY
+   UNSPECV_MAIN_POOL
+
+   ; TLS support
+   UNSPECV_SET_TP
+
+   ; Atomic Support
+   UNSPECV_CAS
+   UNSPECV_ATOMIC_OP
+
+   ; Transactional Execution support
+   UNSPECV_TBEGIN
+   UNSPECV_TBEGIN_TDB
+   UNSPECV_TBEGINC
+   UNSPECV_TEND
+   UNSPECV_TABORT
+   UNSPECV_ETND
+   UNSPECV_NTSTG
+   UNSPECV_PPA
+  ])
+
+;;
+;; Registers
+;;
+
+; Registers with special meaning
+
+(define_constants
+  [
+   ; Sibling call register.
+   (SIBCALL_REGNUM		 1)
+   ; Literal pool base register.
+   (BASE_REGNUM			13)
+   ; Return address register.
+   (RETURN_REGNUM		14)
+   ; Condition code register.
+   (CC_REGNUM			33)
+   ; Thread local storage pointer register.
+   (TP_REGNUM			36)
+  ])
+
+; Hardware register names
+
+(define_constants
+  [
+   ; General purpose registers
+   (GPR0_REGNUM                  0)
+   ; Floating point registers.
+   (FPR0_REGNUM                 16)
+   (FPR1_REGNUM                 20)
+   (FPR2_REGNUM                 17)
+   (FPR3_REGNUM                 21)
+   (FPR4_REGNUM                 18)
+   (FPR5_REGNUM                 22)
+   (FPR6_REGNUM                 19)
+   (FPR7_REGNUM                 23)
+   (FPR8_REGNUM                 24)
+   (FPR9_REGNUM                 28)
+   (FPR10_REGNUM                25)
+   (FPR11_REGNUM                29)
+   (FPR12_REGNUM                26)
+   (FPR13_REGNUM                30)
+   (FPR14_REGNUM                27)
+   (FPR15_REGNUM                31)
+  ])
+
+;;
+;; PFPO GPR0 argument format
+;;
+
+(define_constants
+  [
+   ; PFPO operation type
+   (PFPO_CONVERT          0x1000000)
+   ; PFPO operand types
+   (PFPO_OP_TYPE_SF             0x5)
+   (PFPO_OP_TYPE_DF             0x6)
+   (PFPO_OP_TYPE_TF             0x7)
+   (PFPO_OP_TYPE_SD             0x8)
+   (PFPO_OP_TYPE_DD             0x9)
+   (PFPO_OP_TYPE_TD             0xa)
+   ; Bitposition of operand types
+   (PFPO_OP0_TYPE_SHIFT          16)
+   (PFPO_OP1_TYPE_SHIFT           8)
+  ])
+
+; Immediate operands for tbegin and tbeginc
+(define_constants [(TBEGIN_MASK  65292)]) ; 0xff0c
+(define_constants [(TBEGINC_MASK 65288)]) ; 0xff08
+
+;; Instruction operand type as used in the Principles of Operation.
+;; Used to determine defaults for length and other attribute values.
+
+(define_attr "op_type"
+  "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS"
+  (const_string "NN"))
+
+;; Instruction type attribute used for scheduling.
+
+(define_attr "type" "none,integer,load,lr,la,larl,lm,stm,
+	             cs,vs,store,sem,idiv,
+                     imulhi,imulsi,imuldi,
+		     branch,jsr,fsimptf,fsimpdf,fsimpsf,fhex,
+		     floadtf,floaddf,floadsf,fstoredf,fstoresf,
+		     fmultf,fmuldf,fmulsf,fdivtf,fdivdf,fdivsf,
+		     ftoi,fsqrttf,fsqrtdf,fsqrtsf,
+		     fmadddf,fmaddsf,
+                     ftrunctf,ftruncdf, ftruncsd, ftruncdd,
+                     itoftf, itofdf, itofsf, itofdd, itoftd,
+                     fdivdd, fdivtd, floaddd, floadsd, fmuldd, fmultd,
+                     fsimpdd, fsimpsd, fsimptd, fstoredd, fstoresd,
+                     ftoidfp, other"
+  (cond [(eq_attr "op_type" "NN")  (const_string "other")
+         (eq_attr "op_type" "SS")  (const_string "cs")]
+    (const_string "integer")))
+
+;; Another attribute used for scheduling purposes:
+;;   agen: Instruction uses the address generation unit
+;;   reg: Instruction does not use the agen unit
+
+(define_attr "atype" "agen,reg"
+  (if_then_else (eq_attr "op_type" "E,RR,RI,RRE,RSI,RIL,RIE,RRF,RRR")
+		(const_string "reg")
+		(const_string "agen")))
+
+;; Properties concerning Z10 execution grouping and value forwarding.
+;; z10_super: instruction is superscalar.
+;; z10_super_c: instruction is superscalar and meets the condition of z10_c.
+;; z10_fwd: The instruction reads the value of an operand and stores it into a
+;;   target register.  It can forward this value to a second instruction that reads
+;;   the same register if that second instruction is issued in the same group.
+;; z10_rec: The instruction is in the T pipeline and reads a register. If the
+;;   instruction in the S pipe writes to the register, then the T instruction
+;;   can immediately read the new value.
+;; z10_fr: union of Z10_fwd and z10_rec.
+;; z10_c: second operand of instruction is a register and read with complemented bits.
+;;
+;; An additional suffix A1, A3, or E1 indicates the respective AGI bypass.
+
+
+(define_attr "z10prop" "none,
+                        z10_super, z10_super_E1, z10_super_A1, z10_super_c, z10_super_c_E1,
+                        z10_fwd, z10_fwd_A1, z10_fwd_A3, z10_fwd_E1,
+                        z10_rec,
+                        z10_fr, z10_fr_A3, z10_fr_E1,
+                        z10_c"
+             (const_string "none"))
+
+;; Properties concerning Z196 decoding
+;; z196_alone: must group alone
+;; z196_end: ends a group
+;; z196_cracked: instruction is cracked or expanded
+(define_attr "z196prop" "none,
+                         z196_alone, z196_ends,
+                         z196_cracked"
+             (const_string "none"))
+
+(define_attr "mnemonic" "bcr_flush,unknown" (const_string "unknown"))
+
+;; Length in bytes.
+
+(define_attr "length" ""
+  (cond [(eq_attr "op_type" "E,RR")		              (const_int 2)
+         (eq_attr "op_type" "RX,RI,RRE,RS,RSI,S,SI,RRF,RRR")  (const_int 4)]
+    (const_int 6)))
+
+
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in s390.h.  The current machine description does not
+;; distinguish between g5 and g6, but there are differences between the two
+;; CPUs could in theory be modeled.
+
+(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12"
+  (const (symbol_ref "s390_tune_attr")))
+
+(define_attr "cpu_facility"
+  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12"
+  (const_string "standard"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "cpu_facility" "standard")
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "ieee")
+	      (match_test "TARGET_CPU_IEEE_FLOAT"))
+	 (const_int 1)
+
+	 (and (eq_attr "cpu_facility" "zarch")
+	      (match_test "TARGET_ZARCH"))
+	 (const_int 1)
+
+	 (and (eq_attr "cpu_facility" "longdisp")
+	      (match_test "TARGET_LONG_DISPLACEMENT"))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "extimm")
+	      (match_test "TARGET_EXTIMM"))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "dfp")
+	      (match_test "TARGET_DFP"))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "cpu_zarch")
+              (match_test "TARGET_CPU_ZARCH"))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "z10")
+              (match_test "TARGET_Z10"))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "z196")
+              (match_test "TARGET_Z196"))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "zEC12")
+              (match_test "TARGET_ZEC12"))
+	 (const_int 1)]
+	(const_int 0)))
+
+;; Pipeline description for z900.  For lack of anything better,
+;; this description is also used for the g5 and g6.
+(include "2064.md")
+
+;; Pipeline description for z990, z9-109 and z9-ec.
+(include "2084.md")
+
+;; Pipeline description for z10
+(include "2097.md")
+
+;; Pipeline description for z196
+(include "2817.md")
+
+;; Pipeline description for zEC12
+(include "2827.md")
+
+;; Predicates
+(include "predicates.md")
+
+;; Constraint definitions
+(include "constraints.md")
+
+;; Other includes
+(include "tpf.md")
+
+;; Iterators
+
+;; These mode iterators allow floating point patterns to be generated from the
+;; same template.
+(define_mode_iterator FP_ALL [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")
+                              (SD "TARGET_HARD_DFP")])
+(define_mode_iterator FP [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")])
+(define_mode_iterator FPALL [TF DF SF TD DD SD])
+(define_mode_iterator BFP [TF DF SF])
+(define_mode_iterator DFP [TD DD])
+(define_mode_iterator DFP_ALL [TD DD SD])
+(define_mode_iterator DSF [DF SF])
+(define_mode_iterator SD_SF [SF SD])
+(define_mode_iterator DD_DF [DF DD])
+(define_mode_iterator TD_TF [TF TD])
+
+;; These mode iterators allow 31-bit and 64-bit GPR patterns to be generated
+;; from the same template.
+(define_mode_iterator GPR [(DI "TARGET_ZARCH") SI])
+(define_mode_iterator DGPR [(TI "TARGET_ZARCH") DI SI])
+(define_mode_iterator DSI [DI SI])
+(define_mode_iterator TDI [TI DI])
+
+;; These mode iterators allow :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(DI "TARGET_64BIT") (SI "!TARGET_64BIT")])
+
+;; These macros refer to the actual word_mode of the configuration.
+;; This is equal to Pmode except on 31-bit machines in zarch mode.
+(define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")])
+(define_mode_iterator W  [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")])
+
+;; Used by the umul pattern to express modes having half the size.
+(define_mode_attr DWH [(TI "DI") (DI "SI")])
+(define_mode_attr dwh [(TI "di") (DI "si")])
+
+;; This mode iterator allows the QI and HI patterns to be defined from
+;; the same template.
+(define_mode_iterator HQI [HI QI])
+
+;; This mode iterator allows the integer patterns to be defined from the
+;; same template.
+(define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI])
+(define_mode_iterator INTALL [TI DI SI HI QI])
+(define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI])
+
+;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from
+;; the same template.
+(define_code_iterator SHIFT [ashift lshiftrt])
+
+;; This iterator allows r[ox]sbg to be defined with the same template
+(define_code_iterator IXOR [ior xor])
+
+;; This iterator is used to expand the patterns for the nearest
+;; integer functions.
+(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
+			    UNSPEC_FPINT_ROUND UNSPEC_FPINT_CEIL
+			    UNSPEC_FPINT_NEARBYINT])
+(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
+			     (UNSPEC_FPINT_BTRUNC "btrunc")
+			     (UNSPEC_FPINT_ROUND "round")
+			     (UNSPEC_FPINT_CEIL "ceil")
+			     (UNSPEC_FPINT_NEARBYINT "nearbyint")])
+(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR "7")
+				     (UNSPEC_FPINT_BTRUNC "5")
+				     (UNSPEC_FPINT_ROUND "1")
+				     (UNSPEC_FPINT_CEIL "6")
+				     (UNSPEC_FPINT_NEARBYINT "0")])
+
+;; This iterator and attribute allow to combine most atomic operations.
+(define_code_iterator ATOMIC [and ior xor plus minus mult])
+(define_code_iterator ATOMIC_Z196 [and ior xor plus])
+(define_code_attr atomic [(and "and") (ior "ior") (xor "xor")
+			  (plus "add") (minus "sub") (mult "nand")])
+(define_code_attr noxa [(and "n") (ior "o") (xor "x") (plus "a")])
+
+;; In FP templates, a string like "lt<de>br" will expand to "ltxbr" in
+;; TF/TDmode, "ltdbr" in DF/DDmode, and "ltebr" in SF/SDmode.
+(define_mode_attr xde [(TF "x") (DF "d") (SF "e") (TD "x") (DD "d") (SD "e")])
+
+;; In FP templates, a <dee> in "m<dee><bt>r" will expand to "mx<bt>r" in
+;; TF/TDmode, "md<bt>r" in DF/DDmode, "mee<bt>r" in SFmode and "me<bt>r in
+;; SDmode.
+(define_mode_attr xdee [(TF "x") (DF "d") (SF "ee") (TD "x") (DD "d") (SD "e")])
+
+;; In FP templates, "<RRe>" will expand to "RRE" in TFmode and "RR" otherwise.
+;; Likewise for "<RXe>".
+(define_mode_attr RRe [(TF "RRE") (DF "RR") (SF "RR")])
+(define_mode_attr RXe [(TF "RXE") (DF "RX") (SF "RX")])
+
+;; The decimal floating point variants of add, sub, div and mul support 3
+;; fp register operands.  The following attributes allow to merge the bfp and
+;; dfp variants in a single insn definition.
+
+;; This attribute is used to set op_type accordingly.
+(define_mode_attr RRer [(TF "RRE") (DF "RRE") (SF "RRE") (TD "RRR")
+                        (DD "RRR") (SD "RRR")])
+
+;; This attribute is used in the operand constraint list in order to have the
+;; first and the second operand match for bfp modes.
+(define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")])
+
+;; This attribute is used in the operand list of the instruction to have an
+;; additional operand for the dfp instructions.
+(define_mode_attr op1 [(TF "") (DF "") (SF "")
+                       (TD "%1,") (DD "%1,") (SD "%1,")])
+
+
+;; This attribute is used in the operand constraint list
+;; for instructions dealing with the sign bit of 32 or 64bit fp values.
+;; TFmode values are represented by a fp register pair.  Since the
+;; sign bit instructions only handle single source and target fp registers
+;; these instructions can only be used for TFmode values if the source and
+;; target operand uses the same fp register.
+(define_mode_attr fT0 [(TF "0") (DF "f") (SF "f")])
+
+;; In FP templates, "<Rf>" will expand to "f" in TFmode and "R" otherwise.
+;; This is used to disable the memory alternative in TFmode patterns.
+(define_mode_attr Rf [(TF "f") (DF "R") (SF "R") (TD "f") (DD "f") (SD "f")])
+
+;; This attribute adds b for bfp instructions and t for dfp instructions and is used
+;; within instruction mnemonics.
+(define_mode_attr bt [(TF "b") (DF "b") (SF "b") (TD "t") (DD "t") (SD "t")])
+
+;; This attribute is used within instruction mnemonics.  It evaluates to d for dfp
+;; modes and to an empty string for bfp modes.
+(define_mode_attr _d [(TF "") (DF "") (SF "") (TD "d") (DD "d") (SD "d")])
+
+;; In GPR and P templates, a constraint like "<d0>" will expand to "d" in DImode
+;; and "0" in SImode. This allows to combine instructions of which the 31bit
+;; version only operates on one register.
+(define_mode_attr d0 [(DI "d") (SI "0")])
+
+;; In combination with d0 this allows to combine instructions of which the 31bit
+;; version only operates on one register. The DImode version needs an additional
+;; register for the assembler output.
+(define_mode_attr 1 [(DI "%1,") (SI "")])
+
+;; In SHIFT templates, a string like "s<lr>dl" will expand to "sldl" in
+;; 'ashift' and "srdl" in 'lshiftrt'.
+(define_code_attr lr [(ashift "l") (lshiftrt "r")])
+
+;; In SHIFT templates, this attribute holds the correct standard name for the
+;; pattern itself and the corresponding function calls.
+(define_code_attr shift [(ashift "ashl") (lshiftrt "lshr")])
+
+;; This attribute handles differences in the instruction 'type' and will result
+;; in "RRE" for DImode and "RR" for SImode.
+(define_mode_attr E [(DI "E") (SI "")])
+
+;; This attribute handles differences in the instruction 'type' and makes RX<Y>
+;; to result in "RXY" for DImode and "RX" for SImode.
+(define_mode_attr Y [(DI "Y") (SI "")])
+
+;; This attribute handles differences in the instruction 'type' and will result
+;; in "RSE" for TImode and "RS" for DImode.
+(define_mode_attr TE [(TI "E") (DI "")])
+
+;; In GPR templates, a string like "lc<g>r" will expand to "lcgr" in DImode
+;; and "lcr" in SImode.
+(define_mode_attr g [(DI "g") (SI "")])
+
+;; In GPR templates, a string like "sl<y>" will expand to "slg" in DImode
+;; and "sly" in SImode. This is useful because on 64bit the ..g instructions
+;; were enhanced with long displacements whereas 31bit instructions got a ..y
+;; variant for long displacements.
+(define_mode_attr y [(DI "g") (SI "y")])
+
+;; In DW templates, a string like "cds<g>" will expand to "cdsg" in TImode
+;; and "cds" in DImode.
+(define_mode_attr tg [(TI "g") (DI "")])
+
+;; In TDI templates, a string like "c<d>sg".
+(define_mode_attr td [(TI "d") (DI "")])
+
+;; In GPR templates, a string like "c<gf>dbr" will expand to "cgdbr" in DImode
+;; and "cfdbr" in SImode.
+(define_mode_attr gf [(DI "g") (SI "f")])
+
+;; In GPR templates, a string like sll<gk> will expand to sllg for DI
+;; and sllk for SI.  This way it is possible to merge the new z196 SI
+;; 3 operands shift instructions into the existing patterns.
+(define_mode_attr gk [(DI "g") (SI "k")])
+
+;; ICM mask required to load MODE value into the lowest subreg
+;; of a SImode register.
+(define_mode_attr icm_lo [(HI "3") (QI "1")])
+
+;; In HQI templates, a string like "llg<hc>" will expand to "llgh" in
+;; HImode and "llgc" in QImode.
+(define_mode_attr hc [(HI "h") (QI "c")])
+
+;; In P templates, the mode <DBL> will expand to "TI" in DImode and "DI"
+;; in SImode.
+(define_mode_attr DBL [(DI "TI") (SI "DI")])
+
+;; This attribute expands to DF for TFmode and to DD for TDmode .  It is
+;; used for Txmode splitters splitting a Txmode copy into 2 Dxmode copies.
+(define_mode_attr HALF_TMODE [(TF "DF") (TD "DD")])
+
+;; Maximum unsigned integer that fits in MODE.
+(define_mode_attr max_uint [(HI "65535") (QI "255")])
+
+;; Start and end field computations for RISBG et al.
+(define_mode_attr bfstart [(DI "s") (SI "t")])
+(define_mode_attr bfend   [(DI "e") (SI "f")])
+
+;; In place of GET_MODE_BITSIZE (<MODE>mode)
+(define_mode_attr bitsize [(DI "64") (SI "32") (HI "16") (QI "8")])
+
+;; Allow return and simple_return to be defined from a single template.
+(define_code_iterator ANY_RETURN [return simple_return])
+
+;;
+;;- Compare instructions.
+;;
+
+; Test-under-Mask instructions
+
+(define_insn "*tmqi_mem"
+  [(set (reg CC_REGNUM)
+        (compare (and:QI (match_operand:QI 0 "memory_operand" "Q,S")
+                         (match_operand:QI 1 "immediate_operand" "n,n"))
+                 (match_operand:QI 2 "immediate_operand" "n,n")))]
+  "s390_match_ccmode (insn, s390_tm_ccmode (operands[1], operands[2], false))"
+  "@
+   tm\t%S0,%b1
+   tmy\t%S0,%b1"
+  [(set_attr "op_type" "SI,SIY")
+   (set_attr "z10prop" "z10_super,z10_super")])
+
+(define_insn "*tmdi_reg"
+  [(set (reg CC_REGNUM)
+        (compare (and:DI (match_operand:DI 0 "nonimmediate_operand" "d,d,d,d")
+                         (match_operand:DI 1 "immediate_operand"
+					     "N0HD0,N1HD0,N2HD0,N3HD0"))
+                 (match_operand:DI 2 "immediate_operand" "n,n,n,n")))]
+  "TARGET_ZARCH
+   && s390_match_ccmode (insn, s390_tm_ccmode (operands[1], operands[2], true))
+   && s390_single_part (operands[1], DImode, HImode, 0) >= 0"
+  "@
+   tmhh\t%0,%i1
+   tmhl\t%0,%i1
+   tmlh\t%0,%i1
+   tmll\t%0,%i1"
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super,z10_super,z10_super,z10_super")])
+
+(define_insn "*tmsi_reg"
+  [(set (reg CC_REGNUM)
+        (compare (and:SI (match_operand:SI 0 "nonimmediate_operand" "d,d")
+                         (match_operand:SI 1 "immediate_operand" "N0HS0,N1HS0"))
+                 (match_operand:SI 2 "immediate_operand" "n,n")))]
+  "s390_match_ccmode (insn, s390_tm_ccmode (operands[1], operands[2], true))
+   && s390_single_part (operands[1], SImode, HImode, 0) >= 0"
+  "@
+   tmh\t%0,%i1
+   tml\t%0,%i1"
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super,z10_super")])
+
+(define_insn "*tm<mode>_full"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "register_operand" "d")
+                 (match_operand:HQI 1 "immediate_operand" "n")))]
+  "s390_match_ccmode (insn, s390_tm_ccmode (constm1_rtx, operands[1], true))"
+  "tml\t%0,<max_uint>"
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super")])
+
+
+;
+; Load-and-Test instructions
+;
+
+; tst(di|si) instruction pattern(s).
+
+(define_insn "*tstdi_sign"
+  [(set (reg CC_REGNUM)
+        (compare
+          (ashiftrt:DI
+            (ashift:DI
+              (subreg:DI (match_operand:SI 0 "nonimmediate_operand" "d,RT") 0)
+	      (const_int 32)) (const_int 32))
+	  (match_operand:DI 1 "const0_operand" "")))
+   (set (match_operand:DI 2 "register_operand" "=d,d")
+        (sign_extend:DI (match_dup 0)))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_ZARCH"
+  "ltgfr\t%2,%0
+   ltgf\t%2,%0"
+  [(set_attr "op_type"      "RRE,RXY")
+   (set_attr "cpu_facility" "*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1") ])
+
+; ltr, lt, ltgr, ltg
+(define_insn "*tst<mode>_extimm"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "nonimmediate_operand" "d,RT")
+                 (match_operand:GPR 1 "const0_operand" "")))
+   (set (match_operand:GPR 2 "register_operand" "=d,d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_EXTIMM"
+  "@
+   lt<g>r\t%2,%0
+   lt<g>\t%2,%0"
+  [(set_attr "op_type" "RR<E>,RXY")
+   (set_attr "z10prop" "z10_fr_E1,z10_fwd_A3") ])
+
+; ltr, lt, ltgr, ltg
+(define_insn "*tst<mode>_cconly_extimm"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "nonimmediate_operand" "d,RT")
+                 (match_operand:GPR 1 "const0_operand" "")))
+   (clobber (match_scratch:GPR 2 "=X,d"))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_EXTIMM"
+  "@
+   lt<g>r\t%0,%0
+   lt<g>\t%2,%0"
+  [(set_attr "op_type" "RR<E>,RXY")
+   (set_attr "z10prop" "z10_fr_E1,z10_fwd_A3")])
+
+(define_insn "*tstdi"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "register_operand" "d")
+                 (match_operand:DI 1 "const0_operand" "")))
+   (set (match_operand:DI 2 "register_operand" "=d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_ZARCH && !TARGET_EXTIMM"
+  "ltgr\t%2,%0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "z10prop" "z10_fr_E1")])
+
+(define_insn "*tstsi"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "d,Q,S")
+                 (match_operand:SI 1 "const0_operand" "")))
+   (set (match_operand:SI 2 "register_operand" "=d,d,d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode) && !TARGET_EXTIMM"
+  "@
+   ltr\t%2,%0
+   icm\t%2,15,%S0
+   icmy\t%2,15,%S0"
+  [(set_attr "op_type" "RR,RS,RSY")
+   (set_attr "z10prop" "z10_fr_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*tstsi_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "d,Q,S")
+                 (match_operand:SI 1 "const0_operand" "")))
+   (clobber (match_scratch:SI 2 "=X,d,d"))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   ltr\t%0,%0
+   icm\t%2,15,%S0
+   icmy\t%2,15,%S0"
+  [(set_attr "op_type" "RR,RS,RSY")
+   (set_attr "z10prop" "z10_fr_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*tstdi_cconly_31"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "register_operand" "d")
+                 (match_operand:DI 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCSmode) && !TARGET_ZARCH"
+  "srda\t%0,0"
+  [(set_attr "op_type" "RS")
+   (set_attr "atype"   "reg")])
+
+; ltr, ltgr
+(define_insn "*tst<mode>_cconly2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "register_operand" "d")
+                 (match_operand:GPR 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "lt<g>r\t%0,%0"
+  [(set_attr "op_type" "RR<E>")
+   (set_attr "z10prop" "z10_fr_E1")])
+
+; tst(hi|qi) instruction pattern(s).
+
+(define_insn "*tst<mode>CCT"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "nonimmediate_operand" "?Q,?S,d")
+                 (match_operand:HQI 1 "const0_operand" "")))
+   (set (match_operand:HQI 2 "register_operand" "=d,d,0")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\t%2,<icm_lo>,%S0
+   icmy\t%2,<icm_lo>,%S0
+   tml\t%0,<max_uint>"
+  [(set_attr "op_type" "RS,RSY,RI")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super")])
+
+(define_insn "*tsthiCCT_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HI 0 "nonimmediate_operand" "Q,S,d")
+                 (match_operand:HI 1 "const0_operand" "")))
+   (clobber (match_scratch:HI 2 "=d,d,X"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\t%2,3,%S0
+   icmy\t%2,3,%S0
+   tml\t%0,65535"
+  [(set_attr "op_type" "RS,RSY,RI")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super")])
+
+(define_insn "*tstqiCCT_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:QI 0 "nonimmediate_operand" "?Q,?S,d")
+                 (match_operand:QI 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   cli\t%S0,0
+   cliy\t%S0,0
+   tml\t%0,255"
+  [(set_attr "op_type" "SI,SIY,RI")
+   (set_attr "z10prop" "z10_super,z10_super,z10_super")])
+
+(define_insn "*tst<mode>"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "s_operand" "Q,S")
+                 (match_operand:HQI 1 "const0_operand" "")))
+   (set (match_operand:HQI 2 "register_operand" "=d,d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   icm\t%2,<icm_lo>,%S0
+   icmy\t%2,<icm_lo>,%S0"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*tst<mode>_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "s_operand" "Q,S")
+                 (match_operand:HQI 1 "const0_operand" "")))
+   (clobber (match_scratch:HQI 2 "=d,d"))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   icm\t%2,<icm_lo>,%S0
+   icmy\t%2,<icm_lo>,%S0"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+
+; Compare (equality) instructions
+
+(define_insn "*cmpdi_cct"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "nonimmediate_operand" "%d,d,d,d,Q")
+                 (match_operand:DI 1 "general_operand" "d,K,Os,RT,BQ")))]
+  "s390_match_ccmode (insn, CCTmode) && TARGET_ZARCH"
+  "@
+   cgr\t%0,%1
+   cghi\t%0,%h1
+   cgfi\t%0,%1
+   cg\t%0,%1
+   #"
+  [(set_attr "op_type" "RRE,RI,RIL,RXY,SS")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,*")])
+
+(define_insn "*cmpsi_cct"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "%d,d,d,d,d,Q")
+                 (match_operand:SI 1 "general_operand" "d,K,Os,R,T,BQ")))]
+  "s390_match_ccmode (insn, CCTmode)"
+  "@
+   cr\t%0,%1
+   chi\t%0,%h1
+   cfi\t%0,%1
+   c\t%0,%1
+   cy\t%0,%1
+   #"
+  [(set_attr "op_type" "RR,RI,RIL,RX,RXY,SS")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,*")])
+
+; Compare (signed) instructions
+
+(define_insn "*cmpdi_ccs_sign"
+  [(set (reg CC_REGNUM)
+        (compare (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand"
+						     "d,RT,b"))
+                 (match_operand:DI 0 "register_operand" "d, d,d")))]
+  "s390_match_ccmode(insn, CCSRmode) && TARGET_ZARCH"
+  "@
+   cgfr\t%0,%1
+   cgf\t%0,%1
+   cgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "z10prop" "z10_c,*,*")
+   (set_attr "type"         "*,*,larl")])
+
+
+
+(define_insn "*cmpsi_ccs_sign"
+  [(set (reg CC_REGNUM)
+        (compare (sign_extend:SI (match_operand:HI 1 "memory_operand" "R,T,b"))
+                 (match_operand:SI 0 "register_operand" "d,d,d")))]
+  "s390_match_ccmode(insn, CCSRmode)"
+  "@
+   ch\t%0,%1
+   chy\t%0,%1
+   chrl\t%0,%1"
+  [(set_attr "op_type"      "RX,RXY,RIL")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "z196prop" "z196_cracked,z196_cracked,z196_cracked")])
+
+(define_insn "*cmphi_ccs_z10"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HI 0 "s_operand"         "Q")
+                 (match_operand:HI 1 "immediate_operand" "K")))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_Z10"
+  "chhsi\t%0,%1"
+  [(set_attr "op_type" "SIL")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*cmpdi_ccs_signhi_rl"
+  [(set (reg CC_REGNUM)
+	(compare (sign_extend:DI (match_operand:HI 1 "memory_operand" "RT,b"))
+		 (match_operand:GPR 0 "register_operand"  "d,d")))]
+  "s390_match_ccmode(insn, CCSRmode) && TARGET_Z10"
+  "@
+   cgh\t%0,%1
+   cghrl\t%0,%1"
+  [(set_attr "op_type" "RXY,RIL")
+   (set_attr "type"    "*,larl")])
+
+; cr, chi, cfi, c, cy, cgr, cghi, cgfi, cg, chsi, cghsi, crl, cgrl
+(define_insn "*cmp<mode>_ccs"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "nonimmediate_operand"
+                                      "d,d,Q, d,d,d,d")
+                 (match_operand:GPR 1 "general_operand"
+                                      "d,K,K,Os,R,T,b")))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   c<g>r\t%0,%1
+   c<g>hi\t%0,%h1
+   c<g>hsi\t%0,%h1
+   c<g>fi\t%0,%1
+   c<g>\t%0,%1
+   c<y>\t%0,%1
+   c<g>rl\t%0,%1"
+  [(set_attr "op_type" "RR<E>,RI,SIL,RIL,RX<Y>,RXY,RIL")
+   (set_attr "cpu_facility" "*,*,z10,extimm,*,*,z10")
+   (set_attr "type" "*,*,*,*,*,*,larl")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,z10_super,z10_super")])
+
+
+; Compare (unsigned) instructions
+
+(define_insn "*cmpsi_ccu_zerohi_rlsi"
+  [(set (reg CC_REGNUM)
+ 	(compare (zero_extend:SI (mem:HI (match_operand:SI 1
+					  "larl_operand" "X")))
+		 (match_operand:SI 0 "register_operand" "d")))]
+  "s390_match_ccmode(insn, CCURmode) && TARGET_Z10"
+  "clhrl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_super")])
+
+; clhrl, clghrl
+(define_insn "*cmp<GPR:mode>_ccu_zerohi_rldi"
+  [(set (reg CC_REGNUM)
+ 	(compare (zero_extend:GPR (mem:HI (match_operand:DI 1
+					  "larl_operand" "X")))
+		 (match_operand:GPR 0 "register_operand" "d")))]
+  "s390_match_ccmode(insn, CCURmode) && TARGET_Z10"
+  "cl<g>hrl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_super")])
+
+(define_insn "*cmpdi_ccu_zero"
+  [(set (reg CC_REGNUM)
+        (compare (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand"
+                                                        "d,RT,b"))
+                 (match_operand:DI 0 "register_operand" "d, d,d")))]
+  "s390_match_ccmode (insn, CCURmode) && TARGET_ZARCH"
+  "@
+   clgfr\t%0,%1
+   clgf\t%0,%1
+   clgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "z10prop" "z10_super_c,z10_super_E1,z10_super")])
+
+(define_insn "*cmpdi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "nonimmediate_operand"
+                                     "d, d,d,Q, d, Q,BQ")
+                 (match_operand:DI 1 "general_operand"
+                                     "d,Op,b,D,RT,BQ,Q")))]
+  "s390_match_ccmode (insn, CCUmode) && TARGET_ZARCH"
+  "@
+   clgr\t%0,%1
+   clgfi\t%0,%1
+   clgrl\t%0,%1
+   clghsi\t%0,%x1
+   clg\t%0,%1
+   #
+   #"
+  [(set_attr "op_type" "RRE,RIL,RIL,SIL,RXY,SS,SS")
+   (set_attr "cpu_facility" "*,extimm,z10,z10,*,*,*")
+   (set_attr "type"         "*,*,larl,*,*,*,*")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,*,*")])
+
+(define_insn "*cmpsi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "d, d,d,Q,d,d, Q,BQ")
+                 (match_operand:SI 1 "general_operand"      "d,Os,b,D,R,T,BQ, Q")))]
+  "s390_match_ccmode (insn, CCUmode)"
+  "@
+   clr\t%0,%1
+   clfi\t%0,%o1
+   clrl\t%0,%1
+   clfhsi\t%0,%x1
+   cl\t%0,%1
+   cly\t%0,%1
+   #
+   #"
+  [(set_attr "op_type" "RR,RIL,RIL,SIL,RX,RXY,SS,SS")
+   (set_attr "cpu_facility" "*,extimm,z10,z10,*,*,*,*")
+   (set_attr "type"         "*,*,larl,*,*,*,*,*")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,z10_super,*,*")])
+
+(define_insn "*cmphi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HI 0 "nonimmediate_operand" "d,d,Q,Q,BQ")
+                 (match_operand:HI 1 "general_operand"      "Q,S,D,BQ,Q")))]
+  "s390_match_ccmode (insn, CCUmode)
+   && !register_operand (operands[1], HImode)"
+  "@
+   clm\t%0,3,%S1
+   clmy\t%0,3,%S1
+   clhhsi\t%0,%1
+   #
+   #"
+  [(set_attr "op_type" "RS,RSY,SIL,SS,SS")
+   (set_attr "cpu_facility" "*,*,z10,*,*")
+   (set_attr "z10prop" "*,*,z10_super,*,*")])
+
+(define_insn "*cmpqi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:QI 0 "nonimmediate_operand" "d,d,Q,S,Q,BQ")
+                 (match_operand:QI 1 "general_operand" "Q,S,n,n,BQ,Q")))]
+  "s390_match_ccmode (insn, CCUmode)
+   && !register_operand (operands[1], QImode)"
+  "@
+   clm\t%0,1,%S1
+   clmy\t%0,1,%S1
+   cli\t%S0,%b1
+   cliy\t%S0,%b1
+   #
+   #"
+  [(set_attr "op_type" "RS,RSY,SI,SIY,SS,SS")
+   (set_attr "z10prop" "*,*,z10_super,z10_super,*,*")])
+
+
+; Block compare (CLC) instruction patterns.
+
+(define_insn "*clc"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:BLK 0 "memory_operand" "Q")
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))]
+  "s390_match_ccmode (insn, CCUmode)
+   && INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "clc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+(define_split
+  [(set (reg CC_REGNUM)
+        (compare (match_operand 0 "memory_operand" "")
+                 (match_operand 1 "memory_operand" "")))]
+  "reload_completed
+   && s390_match_ccmode (insn, CCUmode)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (match_dup 1))
+     (use (match_dup 2))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+
+  operands[1] = gen_rtx_COMPARE (GET_MODE (SET_DEST (PATTERN (curr_insn))),
+				 operands[0], operands[1]);
+  operands[0] = SET_DEST (PATTERN (curr_insn));
+})
+
+
+; (TF|DF|SF|TD|DD|SD) instructions
+
+; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
+(define_insn "*cmp<mode>_ccs_0"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:FP 0 "register_operand" "f")
+                 (match_operand:FP 1 "const0_operand"   "")))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lt<xde><bt>r\t%0,%0"
+   [(set_attr "op_type" "RRE")
+    (set_attr "type"  "fsimp<mode>")])
+
+; cxtr, cxbr, cdtr, cdbr, cebr, cdb, ceb
+(define_insn "*cmp<mode>_ccs"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:FP 0 "register_operand" "f,f")
+                 (match_operand:FP 1 "general_operand"  "f,<Rf>")))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   c<xde><bt>r\t%0,%1
+   c<xde>b\t%0,%1"
+   [(set_attr "op_type" "RRE,RXE")
+    (set_attr "type"  "fsimp<mode>")])
+
+
+; Compare and Branch instructions
+
+; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
+; The following instructions do a complementary access of their second
+; operand (z01 only): crj_c, cgrjc, cr, cgr
+(define_insn "*cmp_and_br_signed_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_signed_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,C")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "c<g>ij%C0\t%1,%c2,%l3" : "c<g>rj%C0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "c<g>fi\t%1,%c2\;jg%C0\t%l3" : "c<g>r\t%1,%2\;jg%C0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
+                                                       ; 10 byte for cgr/jg
+
+; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
+; The following instructions do a complementary access of their second
+; operand (z10 only): clrj, clgrj, clr, clgr
+(define_insn "*cmp_and_br_unsigned_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,I")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "cl<g>ij%C0\t%1,%b2,%l3" : "cl<g>rj%C0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "cl<g>fi\t%1,%b2\;jg%C0\t%l3" : "cl<g>r\t%1,%2\;jg%C0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
+                                                       ; 10 byte for clgr/jg
+
+; And now the same two patterns as above but with a negated CC mask.
+
+; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
+; The following instructions do a complementary access of their second
+; operand (z01 only): crj_c, cgrjc, cr, cgr
+(define_insn "*icmp_and_br_signed_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_signed_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,C")])
+		      (pc)
+		      (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
+                                                       ; 10 byte for cgr/jg
+
+; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
+; The following instructions do a complementary access of their second
+; operand (z10 only): clrj, clgrj, clr, clgr
+(define_insn "*icmp_and_br_unsigned_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,I")])
+		      (pc)
+		      (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
+                                                       ; 10 byte for clgr/jg
+
+;;
+;;- Move instructions.
+;;
+
+;
+; movti instruction pattern(s).
+;
+
+(define_insn "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,d,o")
+        (match_operand:TI 1 "general_operand" "QS,d,dPRT,d"))]
+  "TARGET_ZARCH"
+  "@
+   lmg\t%0,%N0,%S1
+   stmg\t%1,%N1,%S0
+   #
+   #"
+  [(set_attr "op_type" "RSY,RSY,*,*")
+   (set_attr "type" "lm,stm,*,*")])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], TImode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, TImode);
+  operands[3] = operand_subword (operands[0], 1, 0, TImode);
+  operands[4] = operand_subword (operands[1], 0, 0, TImode);
+  operands[5] = operand_subword (operands[1], 1, 0, TImode);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], TImode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, TImode);
+  operands[3] = operand_subword (operands[0], 0, 0, TImode);
+  operands[4] = operand_subword (operands[1], 1, 0, TImode);
+  operands[5] = operand_subword (operands[1], 0, 0, TImode);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+        (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_ZARCH && reload_completed
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, TImode);
+  addr = gen_lowpart (Pmode, addr);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+
+;
+; Patterns used for secondary reloads
+;
+
+; z10 provides move instructions accepting larl memory operands.
+; Unfortunately there is no such variant for QI, TI and FP mode moves.
+; These patterns are also used for unaligned SI and DI accesses.
+
+(define_expand "reload<INTALL:mode><P:mode>_tomem_z10"
+  [(parallel [(match_operand:INTALL 0 "memory_operand"   "")
+	      (match_operand:INTALL 1 "register_operand" "=d")
+	      (match_operand:P 2 "register_operand" "=&a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
+  DONE;
+})
+
+(define_expand "reload<INTALL:mode><P:mode>_toreg_z10"
+  [(parallel [(match_operand:INTALL 0 "register_operand" "=d")
+	      (match_operand:INTALL 1 "memory_operand"   "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "reload<FPALL:mode><P:mode>_tomem_z10"
+  [(parallel [(match_operand:FPALL 0 "memory_operand"   "")
+	      (match_operand:FPALL 1 "register_operand" "=d")
+	      (match_operand:P 2 "register_operand" "=&a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
+  DONE;
+})
+
+(define_expand "reload<FPALL:mode><P:mode>_toreg_z10"
+  [(parallel [(match_operand:FPALL 0 "register_operand" "=d")
+	      (match_operand:FPALL 1 "memory_operand"   "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "reload<P:mode>_larl_odd_addend_z10"
+  [(parallel [(match_operand:P 0 "register_operand" "=d")
+	      (match_operand:P 1 "larl_operand"     "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  "TARGET_Z10"
+{
+  s390_reload_larl_operand (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+; Handles loading a PLUS (load address) expression
+
+(define_expand "reload<mode>_plus"
+  [(parallel [(match_operand:P 0 "register_operand"  "=a")
+              (match_operand:P 1 "s390_plus_operand" "")
+              (match_operand:P 2 "register_operand"  "=&a")])]
+  ""
+{
+  s390_expand_plus_operand (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+; Handles assessing a non-offsetable memory address
+
+(define_expand "reload<mode>_nonoffmem_in"
+  [(parallel [(match_operand 0   "register_operand" "")
+              (match_operand 1   "" "")
+              (match_operand:P 2 "register_operand" "=&a")])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+  s390_load_address (operands[2], find_replacement (&XEXP (operands[1], 0)));
+  operands[1] = replace_equiv_address (operands[1], operands[2]);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reload<mode>_nonoffmem_out"
+  [(parallel [(match_operand   0 "" "")
+              (match_operand   1 "register_operand" "")
+              (match_operand:P 2 "register_operand" "=&a")])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+  s390_load_address (operands[2], find_replacement (&XEXP (operands[0], 0)));
+  operands[0] = replace_equiv_address (operands[0], operands[2]);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reload<mode>_PIC_addr"
+  [(parallel [(match_operand   0 "register_operand" "=d")
+	      (match_operand   1 "larl_operand"     "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  ""
+{
+  rtx new_rtx = legitimize_pic_address (operands[1], operands[2]);
+  emit_move_insn (operands[0], new_rtx);
+})
+
+;
+; movdi instruction pattern(s).
+;
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  /* Handle symbolic constants.  */
+  if (TARGET_64BIT
+      && (SYMBOLIC_CONST (operands[1])
+	  || (GET_CODE (operands[1]) == PLUS
+	      && XEXP (operands[1], 0) == pic_offset_table_rtx
+	      && SYMBOLIC_CONST (XEXP (operands[1], 1)))))
+    emit_symbolic_move (operands);
+})
+
+(define_insn "*movdi_larl"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (match_operand:DI 1 "larl_operand" "X"))]
+  "TARGET_64BIT
+   && !FP_REG_P (operands[0])"
+  "larl\t%0,%1"
+   [(set_attr "op_type" "RIL")
+    (set_attr "type"    "larl")
+    (set_attr "z10prop" "z10_super_A1")])
+
+(define_insn "*movdi_64"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                            "=d,d,d,d,d,d,d,d,f,d,d,d,d,d,
+                             RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t")
+        (match_operand:DI 1 "general_operand"
+                            "K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT,
+                             d,*f,R,T,*f,*f,d,K,t,d,t,Q"))]
+  "TARGET_ZARCH"
+  "@
+   lghi\t%0,%h1
+   llihh\t%0,%i1
+   llihl\t%0,%i1
+   llilh\t%0,%i1
+   llill\t%0,%i1
+   lgfi\t%0,%1
+   llihf\t%0,%k1
+   llilf\t%0,%k1
+   ldgr\t%0,%1
+   lgdr\t%0,%1
+   lay\t%0,%a1
+   lgrl\t%0,%1
+   lgr\t%0,%1
+   lg\t%0,%1
+   stg\t%1,%0
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   stgrl\t%1,%0
+   mvghi\t%0,%1
+   #
+   #
+   stam\t%1,%N1,%S0
+   lam\t%0,%N0,%S1"
+  [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RXY,RIL,RRE,RXY,
+                        RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS")
+   (set_attr "type" "*,*,*,*,*,*,*,*,floaddf,floaddf,la,larl,lr,load,store,
+                     floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,
+                     *,*")
+   (set_attr "cpu_facility" "*,*,*,*,*,extimm,extimm,extimm,dfp,dfp,longdisp,
+                             z10,*,*,*,*,*,longdisp,*,longdisp,
+                             z10,z10,*,*,*,*")
+   (set_attr "z10prop" "z10_fwd_A1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_A1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        *,
+                        *,
+                        z10_fwd_A1,
+                        z10_fwd_A3,
+                        z10_fr_E1,
+                        z10_fwd_A3,
+                        z10_rec,
+                        *,
+                        *,
+                        *,
+                        *,
+                        *,
+                        z10_rec,
+                        z10_super,
+                        *,
+                        *,
+                        *,
+                        *")
+])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_ZARCH && ACCESS_REG_P (operands[1])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (strict_low_part (match_dup 2)) (match_dup 4))]
+  "operands[2] = gen_lowpart (SImode, operands[0]);
+   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_ZARCH && ACCESS_REG_P (operands[0])
+   && dead_or_set_p (insn, operands[1])"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 1) (lshiftrt:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 4) (match_dup 2))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);
+   s390_split_access_reg (operands[0], &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_ZARCH && ACCESS_REG_P (operands[0])
+   && !dead_or_set_p (insn, operands[1])"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 1) (rotate:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 4) (match_dup 2))
+   (set (match_dup 1) (rotate:DI (match_dup 1) (const_int 32)))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);
+   s390_split_access_reg (operands[0], &operands[3], &operands[4]);")
+
+(define_insn "*movdi_31"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                            "=d,d,Q,S,d   ,o,!*f,!*f,!*f,!R,!T,d")
+        (match_operand:DI 1 "general_operand"
+                            " Q,S,d,d,dPRT,d, *f,  R,  T,*f,*f,b"))]
+  "!TARGET_ZARCH"
+  "@
+   lm\t%0,%N0,%S1
+   lmy\t%0,%N0,%S1
+   stm\t%1,%N1,%S0
+   stmy\t%1,%N1,%S0
+   #
+   #
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   #"
+  [(set_attr "op_type" "RS,RSY,RS,RSY,*,*,RR,RX,RXY,RX,RXY,*")
+   (set_attr "type" "lm,lm,stm,stm,*,*,floaddf,floaddf,floaddf,fstoredf,fstoredf,*")
+   (set_attr "cpu_facility" "*,*,*,*,*,*,*,*,*,*,*,z10")])
+
+; For a load from a symbol ref we can use one of the target registers
+; together with larl to load the address.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "!TARGET_ZARCH && reload_completed && TARGET_Z10
+   && larl_operand (XEXP (operands[1], 0), SImode)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, DImode);
+  operands[3] = XEXP (operands[1], 0);
+  operands[1] = replace_equiv_address (operands[1], operands[2]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], DImode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, DImode);
+  operands[3] = operand_subword (operands[0], 1, 0, DImode);
+  operands[4] = operand_subword (operands[1], 0, 0, DImode);
+  operands[5] = operand_subword (operands[1], 1, 0, DImode);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], DImode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, DImode);
+  operands[3] = operand_subword (operands[0], 0, 0, DImode);
+  operands[4] = operand_subword (operands[1], 1, 0, DImode);
+  operands[5] = operand_subword (operands[1], 0, 0, DImode);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && !FP_REG_P (operands[0])
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, DImode);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mem:DI (match_operand 1 "address_operand" "")))]
+  "TARGET_ZARCH
+   && !FP_REG_P (operands[0])
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == DImode
+   && legitimate_reload_constant_p (get_pool_constant (operands[1]))"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+(define_insn "*la_64"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (match_operand:QI 1 "address_operand" "ZQZR,ZSZT"))]
+  "TARGET_64BIT"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "type"    "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+          (match_operand:QI 1 "address_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_64BIT
+   && preferred_la_operand_p (operands[1], const0_rtx)"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))
+   (parallel
+    [(set (match_dup 0)
+          (plus:DI (match_dup 0)
+                   (match_operand:DI 2 "nonmemory_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_64BIT
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && preferred_la_operand_p (operands[1], operands[2])"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+;
+; movsi instruction pattern(s).
+;
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Handle symbolic constants.  */
+  if (!TARGET_64BIT
+      && (SYMBOLIC_CONST (operands[1])
+	  || (GET_CODE (operands[1]) == PLUS
+	      && XEXP (operands[1], 0) == pic_offset_table_rtx
+	      && SYMBOLIC_CONST (XEXP(operands[1], 1)))))
+    emit_symbolic_move (operands);
+})
+
+(define_insn "*movsi_larl"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (match_operand:SI 1 "larl_operand" "X"))]
+  "!TARGET_64BIT && TARGET_CPU_ZARCH
+   && !FP_REG_P (operands[0])"
+  "larl\t%0,%1"
+   [(set_attr "op_type" "RIL")
+    (set_attr "type"    "larl")
+    (set_attr "z10prop" "z10_fwd_A1")])
+
+(define_insn "*movsi_zarch"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			    "=d,d,d,d,d,d,d,d,d,R,T,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t")
+        (match_operand:SI 1 "general_operand"
+			    "K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d,*f,R,T,*f,*f,t,d,t,d,K,Q"))]
+  "TARGET_ZARCH"
+  "@
+   lhi\t%0,%h1
+   llilh\t%0,%i1
+   llill\t%0,%i1
+   iilf\t%0,%o1
+   lay\t%0,%a1
+   lrl\t%0,%1
+   lr\t%0,%1
+   l\t%0,%1
+   ly\t%0,%1
+   st\t%1,%0
+   sty\t%1,%0
+   ler\t%0,%1
+   le\t%0,%1
+   ley\t%0,%1
+   ste\t%1,%0
+   stey\t%1,%0
+   ear\t%0,%1
+   sar\t%0,%1
+   stam\t%1,%1,%S0
+   strl\t%1,%0
+   mvhi\t%0,%1
+   lam\t%0,%0,%S1"
+  [(set_attr "op_type" "RI,RI,RI,RIL,RXY,RIL,RR,RX,RXY,RX,RXY,
+                        RR,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS")
+   (set_attr "type" "*,
+                     *,
+                     *,
+                     *,
+                     la,
+                     larl,
+                     lr,
+                     load,
+                     load,
+                     store,
+                     store,
+                     floadsf,
+                     floadsf,
+                     floadsf,
+                     fstoresf,
+                     fstoresf,
+                     *,
+                     *,
+                     *,
+                     larl,
+                     *,
+                     *")
+   (set_attr "cpu_facility" "*,*,*,extimm,longdisp,z10,*,*,longdisp,*,longdisp,
+                             *,*,longdisp,*,longdisp,*,*,*,z10,z10,*")
+   (set_attr "z10prop" "z10_fwd_A1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_A1,
+                        z10_fwd_A1,
+                        z10_fwd_A3,
+                        z10_fr_E1,
+                        z10_fwd_A3,
+                        z10_fwd_A3,
+                        z10_rec,
+                        z10_rec,
+                        *,
+                        *,
+                        *,
+                        *,
+                        *,
+                        z10_super_E1,
+                        z10_super,
+                        *,
+                        z10_rec,
+                        z10_super,
+                        *")])
+
+(define_insn "*movsi_esa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!R,d,t,Q,t")
+        (match_operand:SI 1 "general_operand" "K,d,R,d,*f,R,*f,t,d,t,Q"))]
+  "!TARGET_ZARCH"
+  "@
+   lhi\t%0,%h1
+   lr\t%0,%1
+   l\t%0,%1
+   st\t%1,%0
+   ler\t%0,%1
+   le\t%0,%1
+   ste\t%1,%0
+   ear\t%0,%1
+   sar\t%0,%1
+   stam\t%1,%1,%S0
+   lam\t%0,%0,%S1"
+  [(set_attr "op_type" "RI,RR,RX,RX,RR,RX,RX,RRE,RRE,RS,RS")
+   (set_attr "type" "*,lr,load,store,floadsf,floadsf,fstoresf,*,*,*,*")
+   (set_attr "z10prop" "z10_fwd_A1,
+                        z10_fr_E1,
+                        z10_fwd_A3,
+                        z10_rec,
+                        *,
+                        *,
+                        *,
+                        z10_super_E1,
+                        z10_super,
+                        *,
+                        *")
+])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (mem:SI (match_operand 1 "address_operand" "")))]
+  "!FP_REG_P (operands[0])
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == SImode
+   && legitimate_reload_constant_p (get_pool_constant (operands[1]))"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+(define_insn "*la_31"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (match_operand:QI 1 "address_operand" "ZQZR,ZSZT"))]
+  "!TARGET_64BIT && legitimate_la_operand_p (operands[1])"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "type"     "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+          (match_operand:QI 1 "address_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "!TARGET_64BIT
+   && preferred_la_operand_p (operands[1], const0_rtx)"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel
+    [(set (match_dup 0)
+          (plus:SI (match_dup 0)
+                   (match_operand:SI 2 "nonmemory_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "!TARGET_64BIT
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && preferred_la_operand_p (operands[1], operands[2])"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_insn "*la_31_and"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (and:SI (match_operand:QI 1 "address_operand" "ZQZR,ZSZT")
+                (const_int 2147483647)))]
+  "!TARGET_64BIT"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "type"     "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+(define_insn_and_split "*la_31_and_cc"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (and:SI (match_operand:QI 1 "address_operand" "p")
+                (const_int 2147483647)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (and:SI (match_dup 1) (const_int 2147483647)))]
+  ""
+  [(set_attr "op_type"  "RX")
+   (set_attr "type"     "la")])
+
+(define_insn "force_la_31"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (match_operand:QI 1 "address_operand" "ZQZR,ZSZT"))
+   (use (const_int 0))]
+  "!TARGET_64BIT"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type"  "RX")
+   (set_attr "type"     "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+;
+; movhi instruction pattern(s).
+;
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+        (match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Make it explicit that loading a register from memory
+     always sign-extends (at least) to SImode.  */
+  if (optimize && can_create_pseudo_p ()
+      && register_operand (operands[0], VOIDmode)
+      && GET_CODE (operands[1]) == MEM)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      rtx ext = gen_rtx_SIGN_EXTEND (SImode, operands[1]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, ext));
+      operands[1] = gen_lowpart (HImode, tmp);
+    }
+})
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q")
+        (match_operand:HI 1 "general_operand"      " d,n,R,T,b,d,d,d,K"))]
+  ""
+  "@
+   lr\t%0,%1
+   lhi\t%0,%h1
+   lh\t%0,%1
+   lhy\t%0,%1
+   lhrl\t%0,%1
+   sth\t%1,%0
+   sthy\t%1,%0
+   sthrl\t%1,%0
+   mvhhi\t%0,%1"
+  [(set_attr "op_type"      "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL")
+   (set_attr "type"         "lr,*,*,*,larl,store,store,store,*")
+   (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10")
+   (set_attr "z10prop" "z10_fr_E1,
+                       z10_fwd_A1,
+                       z10_super_E1,
+                       z10_super_E1,
+                       z10_super_E1,
+                       z10_rec,
+                       z10_rec,
+                       z10_rec,
+                       z10_super")])
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+        (mem:HI (match_operand 1 "address_operand" "")))]
+  "GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == HImode
+   && GET_CODE (get_pool_constant (operands[1])) == CONST_INT"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+;
+; movqi instruction pattern(s).
+;
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* On z/Architecture, zero-extending from memory to register
+     is just as fast as a QImode load.  */
+  if (TARGET_ZARCH && optimize && can_create_pseudo_p ()
+      && register_operand (operands[0], VOIDmode)
+      && GET_CODE (operands[1]) == MEM)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      rtx ext = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, ext));
+      operands[1] = gen_lowpart (QImode, tmp);
+    }
+})
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q")
+        (match_operand:QI 1 "general_operand"      " d,n,R,T,d,d,n,n,?Q"))]
+  ""
+  "@
+   lr\t%0,%1
+   lhi\t%0,%b1
+   ic\t%0,%1
+   icy\t%0,%1
+   stc\t%1,%0
+   stcy\t%1,%0
+   mvi\t%S0,%b1
+   mviy\t%S0,%b1
+   #"
+  [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS")
+   (set_attr "type" "lr,*,*,*,store,store,store,store,*")
+   (set_attr "z10prop" "z10_fr_E1,
+                        z10_fwd_A1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_rec,
+                        z10_rec,
+                        z10_super,
+                        z10_super,
+                        *")])
+
+(define_peephole2
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (mem:QI (match_operand 1 "address_operand" "")))]
+  "GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == QImode
+   && GET_CODE (get_pool_constant (operands[1])) == CONST_INT"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+;
+; movstrictqi instruction pattern(s).
+;
+
+(define_insn "*movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d,d"))
+                         (match_operand:QI 1 "memory_operand" "R,T"))]
+  ""
+  "@
+   ic\t%0,%1
+   icy\t%0,%1"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; movstricthi instruction pattern(s).
+;
+
+(define_insn "*movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d,d"))
+                         (match_operand:HI 1 "memory_operand" "Q,S"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   icm\t%0,3,%S1
+   icmy\t%0,3,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; movstrictsi instruction pattern(s).
+;
+
+(define_insn "movstrictsi"
+  [(set (strict_low_part (match_operand:SI 0 "register_operand" "+d,d,d,d"))
+                         (match_operand:SI 1 "general_operand" "d,R,T,t"))]
+  "TARGET_ZARCH"
+  "@
+   lr\t%0,%1
+   l\t%0,%1
+   ly\t%0,%1
+   ear\t%0,%1"
+  [(set_attr "op_type" "RR,RX,RXY,RRE")
+   (set_attr "type" "lr,load,load,*")
+   (set_attr "z10prop" "z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_super_E1")])
+
+;
+; mov(tf|td) instruction pattern(s).
+;
+
+(define_expand "mov<mode>"
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
+        (match_operand:TD_TF 1 "general_operand"      ""))]
+  ""
+  "")
+
+(define_insn "*mov<mode>_64"
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "=f,f,f,o, d,QS,  d,o")
+        (match_operand:TD_TF 1 "general_operand"      " G,f,o,f,QS, d,dRT,d"))]
+  "TARGET_ZARCH"
+  "@
+   lzxr\t%0
+   lxr\t%0,%1
+   #
+   #
+   lmg\t%0,%N0,%S1
+   stmg\t%1,%N1,%S0
+   #
+   #"
+  [(set_attr "op_type"      "RRE,RRE,*,*,RSY,RSY,*,*")
+   (set_attr "type"         "fsimptf,fsimptf,*,*,lm,stm,*,*")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*")])
+
+(define_insn "*mov<mode>_31"
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "=f,f,f,o")
+        (match_operand:TD_TF 1 "general_operand"      " G,f,o,f"))]
+  "!TARGET_ZARCH"
+  "@
+   lzxr\t%0
+   lxr\t%0,%1
+   #
+   #"
+  [(set_attr "op_type"      "RRE,RRE,*,*")
+   (set_attr "type"         "fsimptf,fsimptf,*,*")
+   (set_attr "cpu_facility" "z196,*,*,*")])
+
+; TFmode in GPRs splitters
+
+(define_split
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
+        (match_operand:TD_TF 1 "general_operand"      ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
+        (match_operand:TD_TF 1 "general_operand"      ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:TD_TF 0 "register_operand" "")
+        (match_operand:TD_TF 1 "memory_operand"   ""))]
+  "TARGET_ZARCH && reload_completed
+   && !FP_REG_P (operands[0])
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, <MODE>mode);
+  addr = gen_lowpart (Pmode, addr);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+; TFmode in BFPs splitters
+
+(define_split
+  [(set (match_operand:TD_TF 0 "register_operand" "")
+        (match_operand:TD_TF 1 "memory_operand" ""))]
+  "reload_completed && offsettable_memref_p (operands[1])
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = simplify_gen_subreg (<HALF_TMODE>mode, operands[0],
+                                     <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (<HALF_TMODE>mode, operands[0],
+                                     <MODE>mode, 8);
+  operands[4] = adjust_address_nv (operands[1], <HALF_TMODE>mode, 0);
+  operands[5] = adjust_address_nv (operands[1], <HALF_TMODE>mode, 8);
+})
+
+(define_split
+  [(set (match_operand:TD_TF 0 "memory_operand" "")
+        (match_operand:TD_TF 1 "register_operand" ""))]
+  "reload_completed && offsettable_memref_p (operands[0])
+   && FP_REG_P (operands[1])"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = adjust_address_nv (operands[0], <HALF_TMODE>mode, 0);
+  operands[3] = adjust_address_nv (operands[0], <HALF_TMODE>mode, 8);
+  operands[4] = simplify_gen_subreg (<HALF_TMODE>mode, operands[1],
+				     <MODE>mode, 0);
+  operands[5] = simplify_gen_subreg (<HALF_TMODE>mode, operands[1],
+                                     <MODE>mode, 8);
+})
+
+;
+; mov(df|dd) instruction pattern(s).
+;
+
+(define_expand "mov<mode>"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
+        (match_operand:DD_DF 1 "general_operand"  ""))]
+  ""
+  "")
+
+(define_insn "*mov<mode>_64dfp"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand"
+			       "=f,f,f,d,f,f,R,T,d,d, d,RT")
+        (match_operand:DD_DF 1 "general_operand"
+			       " G,f,d,f,R,T,f,f,G,d,RT, d"))]
+  "TARGET_DFP"
+  "@
+   lzdr\t%0
+   ldr\t%0,%1
+   ldgr\t%0,%1
+   lgdr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   lghi\t%0,0
+   lgr\t%0,%1
+   lg\t%0,%1
+   stg\t%1,%0"
+  [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
+   (set_attr "type" "fsimpdf,floaddf,floaddf,floaddf,floaddf,floaddf,
+                     fstoredf,fstoredf,*,lr,load,store")
+   (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+
+(define_insn "*mov<mode>_64"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d, d,RT")
+        (match_operand:DD_DF 1 "general_operand"      " G,f,R,T,f,f,G,d,RT, d"))]
+  "TARGET_ZARCH"
+  "@
+   lzdr\t%0
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   lghi\t%0,0
+   lgr\t%0,%1
+   lg\t%0,%1
+   stg\t%1,%0"
+  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
+   (set_attr "type"    "fsimpdf,fload<mode>,fload<mode>,fload<mode>,
+                        fstore<mode>,fstore<mode>,*,lr,load,store")
+   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*")])
+
+(define_insn "*mov<mode>_31"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand"
+                               "=f,f,f,f,R,T,d,d,Q,S,   d,o")
+        (match_operand:DD_DF 1 "general_operand"
+                               " G,f,R,T,f,f,Q,S,d,d,dPRT,d"))]
+  "!TARGET_ZARCH"
+  "@
+   lzdr\t%0
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   lm\t%0,%N0,%S1
+   lmy\t%0,%N0,%S1
+   stm\t%1,%N1,%S0
+   stmy\t%1,%N1,%S0
+   #
+   #"
+  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RS,RSY,RS,RSY,*,*")
+   (set_attr "type"    "fsimpdf,fload<mode>,fload<mode>,fload<mode>,
+                        fstore<mode>,fstore<mode>,lm,lm,stm,stm,*,*")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+
+(define_split
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
+        (match_operand:DD_DF 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
+        (match_operand:DD_DF 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:DD_DF 0 "register_operand" "")
+        (match_operand:DD_DF 1 "memory_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && !FP_REG_P (operands[0])
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, <MODE>mode);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+;
+; mov(sf|sd) instruction pattern(s).
+;
+
+(define_insn "mov<mode>"
+  [(set (match_operand:SD_SF 0 "nonimmediate_operand"
+			       "=f,f,f,f,R,T,d,d,d,d,R,T")
+        (match_operand:SD_SF 1 "general_operand"
+			       " G,f,R,T,f,f,G,d,R,T,d,d"))]
+  ""
+  "@
+   lzer\t%0
+   ler\t%0,%1
+   le\t%0,%1
+   ley\t%0,%1
+   ste\t%1,%0
+   stey\t%1,%0
+   lhi\t%0,0
+   lr\t%0,%1
+   l\t%0,%1
+   ly\t%0,%1
+   st\t%1,%0
+   sty\t%1,%0"
+  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RR,RX,RXY,RX,RXY")
+   (set_attr "type"    "fsimpsf,fload<mode>,fload<mode>,fload<mode>,
+                        fstore<mode>,fstore<mode>,*,lr,load,load,store,store")
+   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+
+;
+; movcc instruction pattern
+;
+
+(define_insn "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "=d,c,d,d,d,R,T")
+	(match_operand:CC 1 "nonimmediate_operand" " d,d,c,R,T,d,d"))]
+  ""
+  "@
+   lr\t%0,%1
+   tmh\t%1,12288
+   ipm\t%0
+   l\t%0,%1
+   ly\t%0,%1
+   st\t%1,%0
+   sty\t%1,%0"
+  [(set_attr "op_type" "RR,RI,RRE,RX,RXY,RX,RXY")
+   (set_attr "type" "lr,*,*,load,load,store,store")
+   (set_attr "z10prop" "z10_fr_E1,z10_super,*,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec")
+   (set_attr "z196prop" "*,*,z196_ends,*,*,*,*")])
+
+;
+; Block move (MVC) patterns.
+;
+
+(define_insn "*mvc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (match_operand:BLK 1 "memory_operand" "Q"))
+   (use (match_operand 2 "const_int_operand" "n"))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "mvc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+; This splitter converts a QI to QI mode copy into a BLK mode copy in
+; order to have it implemented with mvc.
+
+(define_split
+  [(set (match_operand:QI 0 "memory_operand" "")
+        (match_operand:QI 1 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+{
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+     (use (match_operand 2 "const_int_operand" ""))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (match_operand:BLK 4 "memory_operand" ""))
+     (use (match_operand 5 "const_int_operand" ""))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (match_dup 7))
+     (use (match_dup 8))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+
+;
+; load_multiple pattern(s).
+;
+; ??? Due to reload problems with replacing registers inside match_parallel
+; we currently support load_multiple/store_multiple only after reload.
+;
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand 0 "" "")
+			  (match_operand 1 "" ""))
+		     (use (match_operand 2 "" ""))])]
+  "reload_completed"
+{
+  enum machine_mode mode;
+  int regno;
+  int count;
+  rtx from;
+  int i, off;
+
+  /* Support only loading a constant number of fixed-point registers from
+     memory and only bother with this if more than two */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || INTVAL (operands[2]) > 16
+      || GET_CODE (operands[1]) != MEM
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) >= 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[0]);
+  mode = GET_MODE (operands[0]);
+  if (mode != SImode && (!TARGET_ZARCH || mode != DImode))
+    FAIL;
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  if (!can_create_pseudo_p ())
+    {
+      if (GET_CODE (XEXP (operands[1], 0)) == REG)
+	{
+	  from = XEXP (operands[1], 0);
+	  off = 0;
+	}
+      else if (GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG
+	       && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT)
+	{
+	  from = XEXP (XEXP (operands[1], 0), 0);
+	  off = INTVAL (XEXP (XEXP (operands[1], 0), 1));
+	}
+      else
+	FAIL;
+    }
+  else
+    {
+      from = force_reg (Pmode, XEXP (operands[1], 0));
+      off = 0;
+    }
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, regno + i),
+		     change_address (operands[1], mode,
+		       plus_constant (Pmode, from,
+				      off + i * GET_MODE_SIZE (mode))));
+})
+
+(define_insn "*load_multiple_di"
+  [(match_parallel 0 "load_multiple_operation"
+		   [(set (match_operand:DI 1 "register_operand" "=r")
+			 (match_operand:DI 2 "s_operand" "QS"))])]
+  "reload_completed && TARGET_ZARCH"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[1]) + words - 1);
+  return "lmg\t%1,%0,%S2";
+}
+   [(set_attr "op_type" "RSY")
+    (set_attr "type"    "lm")])
+
+(define_insn "*load_multiple_si"
+  [(match_parallel 0 "load_multiple_operation"
+		   [(set (match_operand:SI 1 "register_operand" "=r,r")
+			 (match_operand:SI 2 "s_operand" "Q,S"))])]
+  "reload_completed"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (SImode, REGNO (operands[1]) + words - 1);
+  return which_alternative == 0 ? "lm\t%1,%0,%S2" : "lmy\t%1,%0,%S2";
+}
+   [(set_attr "op_type" "RS,RSY")
+    (set_attr "type"    "lm")])
+
+;
+; store multiple pattern(s).
+;
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand 0 "" "")
+			  (match_operand 1 "" ""))
+		     (use (match_operand 2 "" ""))])]
+  "reload_completed"
+{
+  enum machine_mode mode;
+  int regno;
+  int count;
+  rtx to;
+  int i, off;
+
+  /* Support only storing a constant number of fixed-point registers to
+     memory and only bother with this if more than two.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || INTVAL (operands[2]) > 16
+      || GET_CODE (operands[0]) != MEM
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) >= 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[1]);
+  mode = GET_MODE (operands[1]);
+  if (mode != SImode && (!TARGET_ZARCH || mode != DImode))
+    FAIL;
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  if (!can_create_pseudo_p ())
+    {
+      if (GET_CODE (XEXP (operands[0], 0)) == REG)
+	{
+	  to = XEXP (operands[0], 0);
+	  off = 0;
+	}
+      else if (GET_CODE (XEXP (operands[0], 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+	       && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT)
+	{
+	  to = XEXP (XEXP (operands[0], 0), 0);
+	  off = INTVAL (XEXP (XEXP (operands[0], 0), 1));
+	}
+      else
+	FAIL;
+    }
+  else
+    {
+      to = force_reg (Pmode, XEXP (operands[0], 0));
+      off = 0;
+    }
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode,
+		     change_address (operands[0], mode,
+		       plus_constant (Pmode, to,
+				      off + i * GET_MODE_SIZE (mode))),
+		     gen_rtx_REG (mode, regno + i));
+})
+
+(define_insn "*store_multiple_di"
+  [(match_parallel 0 "store_multiple_operation"
+		   [(set (match_operand:DI 1 "s_operand" "=QS")
+			 (match_operand:DI 2 "register_operand" "r"))])]
+  "reload_completed && TARGET_ZARCH"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[2]) + words - 1);
+  return "stmg\t%2,%0,%S1";
+}
+   [(set_attr "op_type" "RSY")
+    (set_attr "type"    "stm")])
+
+
+(define_insn "*store_multiple_si"
+  [(match_parallel 0 "store_multiple_operation"
+		   [(set (match_operand:SI 1 "s_operand" "=Q,S")
+			 (match_operand:SI 2 "register_operand" "r,r"))])]
+  "reload_completed"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (SImode, REGNO (operands[2]) + words - 1);
+  return which_alternative == 0 ? "stm\t%2,%0,%S1" : "stmy\t%2,%0,%S1";
+}
+   [(set_attr "op_type" "RS,RSY")
+    (set_attr "type"    "stm")])
+
+;;
+;; String instructions.
+;;
+
+(define_insn "*execute_rl"
+  [(match_parallel 0 "execute_operation"
+    [(unspec [(match_operand 1    "register_operand" "a")
+	      (match_operand 2    "" "")
+              (match_operand:SI 3 "larl_operand" "X")] UNSPEC_EXECUTE)])]
+  "TARGET_Z10 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[1])) <= UNITS_PER_WORD"
+  "exrl\t%1,%3"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "cs")])
+
+(define_insn "*execute"
+  [(match_parallel 0 "execute_operation"
+    [(unspec [(match_operand 1 "register_operand" "a")
+              (match_operand:BLK 2 "memory_operand" "R")
+              (match_operand 3 "" "")] UNSPEC_EXECUTE)])]
+  "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[1])) <= UNITS_PER_WORD"
+  "ex\t%1,%2"
+  [(set_attr "op_type" "RX")
+   (set_attr "type" "cs")])
+
+
+;
+; strlenM instruction pattern(s).
+;
+
+(define_expand "strlen<mode>"
+  [(set (reg:SI 0) (match_operand:SI 2 "immediate_operand" ""))
+   (parallel
+    [(set (match_dup 4)
+	  (unspec:P [(const_int 0)
+		      (match_operand:BLK 1 "memory_operand" "")
+		      (reg:SI 0)
+		      (match_operand 3 "immediate_operand" "")] UNSPEC_SRST))
+     (clobber (scratch:P))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:P 0 "register_operand" "")
+          (minus:P (match_dup 4) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  operands[4] = gen_reg_rtx (Pmode);
+  operands[5] = gen_reg_rtx (Pmode);
+  emit_move_insn (operands[5], force_operand (XEXP (operands[1], 0), NULL_RTX));
+  operands[1] = replace_equiv_address (operands[1], operands[5]);
+})
+
+(define_insn "*strlen<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(unspec:P [(match_operand:P 2 "general_operand" "0")
+		    (mem:BLK (match_operand:P 3 "register_operand" "1"))
+		    (reg:SI 0)
+		    (match_operand 4 "immediate_operand" "")] UNSPEC_SRST))
+   (clobber (match_scratch:P 1 "=a"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "srst\t%0,%1\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+;
+; cmpstrM instruction pattern(s).
+;
+
+(define_expand "cmpstrsi"
+  [(set (reg:SI 0) (const_int 0))
+   (parallel
+    [(clobber (match_operand 3 "" ""))
+     (clobber (match_dup 4))
+     (set (reg:CCU CC_REGNUM)
+	  (compare:CCU (match_operand:BLK 1 "memory_operand" "")
+	 	       (match_operand:BLK 2 "memory_operand" "")))
+     (use (reg:SI 0))])
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_STRCMPCC_TO_INT))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  /* As the result of CMPINT is inverted compared to what we need,
+     we have to swap the operands.  */
+  rtx op1 = operands[2];
+  rtx op2 = operands[1];
+  rtx addr1 = gen_reg_rtx (Pmode);
+  rtx addr2 = gen_reg_rtx (Pmode);
+
+  emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
+  emit_move_insn (addr2, force_operand (XEXP (op2, 0), NULL_RTX));
+  operands[1] = replace_equiv_address_nv (op1, addr1);
+  operands[2] = replace_equiv_address_nv (op2, addr2);
+  operands[3] = addr1;
+  operands[4] = addr2;
+})
+
+(define_insn "*cmpstr<mode>"
+  [(clobber (match_operand:P 0 "register_operand" "=d"))
+   (clobber (match_operand:P 1 "register_operand" "=d"))
+   (set (reg:CCU CC_REGNUM)
+	(compare:CCU (mem:BLK (match_operand:P 2 "register_operand" "0"))
+		     (mem:BLK (match_operand:P 3 "register_operand" "1"))))
+   (use (reg:SI 0))]
+  ""
+  "clst\t%0,%1\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+;
+; movstr instruction pattern.
+;
+
+(define_expand "movstr"
+  [(set (reg:SI 0) (const_int 0))
+   (parallel
+    [(clobber (match_dup 3))
+     (set (match_operand:BLK 1 "memory_operand" "")
+	  (match_operand:BLK 2 "memory_operand" ""))
+     (set (match_operand 0 "register_operand" "")
+	  (unspec [(match_dup 1)
+		   (match_dup 2)
+		   (reg:SI 0)] UNSPEC_MVST))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  rtx addr1 = gen_reg_rtx (Pmode);
+  rtx addr2 = gen_reg_rtx (Pmode);
+
+  emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+  emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+  operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  operands[2] = replace_equiv_address_nv (operands[2], addr2);
+  operands[3] = addr2;
+})
+
+(define_insn "*movstr"
+  [(clobber (match_operand:P 2 "register_operand" "=d"))
+   (set (mem:BLK (match_operand:P 1 "register_operand" "0"))
+	(mem:BLK (match_operand:P 3 "register_operand" "2")))
+   (set (match_operand:P 0 "register_operand" "=d")
+	(unspec [(mem:BLK (match_dup 1))
+		 (mem:BLK (match_dup 3))
+		 (reg:SI 0)] UNSPEC_MVST))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "mvst\t%1,%2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+
+;
+; movmemM instruction pattern(s).
+;
+
+(define_expand "movmem<mode>"
+  [(set (match_operand:BLK 0 "memory_operand" "")   ; destination
+        (match_operand:BLK 1 "memory_operand" ""))  ; source
+   (use (match_operand:GPR 2 "general_operand" "")) ; count
+   (match_operand 3 "" "")]
+  ""
+{
+  if (s390_expand_movmem (operands[0], operands[1], operands[2]))
+    DONE;
+  else
+    FAIL;
+})
+
+; Move a block that is up to 256 bytes in length.
+; The block length is taken as (operands[2] % 256) + 1.
+
+(define_expand "movmem_short"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+     (use (match_operand 2 "nonmemory_operand" ""))
+     (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+     (clobber (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_SCRATCH (Pmode);")
+
+(define_insn "*movmem_short"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q,Q,Q,Q")
+        (match_operand:BLK 1 "memory_operand" "Q,Q,Q,Q"))
+   (use (match_operand 2 "nonmemory_operand" "n,a,a,a"))
+   (use (match_operand 3 "immediate_operand" "X,R,X,X"))
+   (clobber (match_scratch:P 4 "=X,X,X,&a"))]
+  "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
+  "#"
+  [(set_attr "type"         "cs")
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "const_int_operand" ""))
+   (use (match_operand 3 "immediate_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (match_dup 1))
+     (use (match_dup 2))])]
+  "operands[2] = GEN_INT ((INTVAL (operands[2]) & 0xff) + 1);")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "register_operand" ""))
+   (use (match_operand 3 "memory_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (match_dup 3)
+              (const_int 0)] UNSPEC_EXECUTE)
+     (set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+  "")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (scratch))]
+  "TARGET_Z10 && reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (const_int 0)
+              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+  "operands[3] = gen_label_rtx ();")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (match_operand 3 "register_operand" ""))]
+  "reload_completed && TARGET_CPU_ZARCH"
+  [(set (match_dup 3) (label_ref (match_dup 4)))
+   (parallel
+    [(unspec [(match_dup 2) (mem:BLK (match_dup 3))
+              (label_ref (match_dup 4))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+  "operands[4] = gen_label_rtx ();")
+
+; Move a block of arbitrary length.
+
+(define_expand "movmem_long"
+  [(parallel
+    [(clobber (match_dup 2))
+     (clobber (match_dup 3))
+     (set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+     (use (match_operand 2 "general_operand" ""))
+     (use (match_dup 3))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  enum machine_mode sreg_mode = TARGET_ZARCH ? DImode : SImode;
+  enum machine_mode dreg_mode = TARGET_ZARCH ? TImode : DImode;
+  rtx reg0 = gen_reg_rtx (dreg_mode);
+  rtx reg1 = gen_reg_rtx (dreg_mode);
+  rtx addr0 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg0));
+  rtx addr1 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg1));
+  rtx len0 = gen_lowpart (Pmode, reg0);
+  rtx len1 = gen_lowpart (Pmode, reg1);
+
+  emit_clobber (reg0);
+  emit_move_insn (addr0, force_operand (XEXP (operands[0], 0), NULL_RTX));
+  emit_move_insn (len0, operands[2]);
+
+  emit_clobber (reg1);
+  emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+  emit_move_insn (len1, operands[2]);
+
+  operands[0] = replace_equiv_address_nv (operands[0], addr0);
+  operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  operands[2] = reg0;
+  operands[3] = reg1;
+})
+
+(define_insn "*movmem_long"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (clobber (match_operand:<DBL> 1 "register_operand" "=d"))
+   (set (mem:BLK (subreg:P (match_operand:<DBL> 2 "register_operand" "0") 0))
+        (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "1") 0)))
+   (use (match_dup 2))
+   (use (match_dup 3))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_64BIT || !TARGET_ZARCH"
+  "mvcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*movmem_long_31z"
+  [(clobber (match_operand:TI 0 "register_operand" "=d"))
+   (clobber (match_operand:TI 1 "register_operand" "=d"))
+   (set (mem:BLK (subreg:SI (match_operand:TI 2 "register_operand" "0") 4))
+        (mem:BLK (subreg:SI (match_operand:TI 3 "register_operand" "1") 4)))
+   (use (match_dup 2))
+   (use (match_dup 3))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT && TARGET_ZARCH"
+  "mvcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+
+;
+; Test data class.
+;
+
+(define_expand "signbit<mode>2"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
+                     (match_dup 2)]
+                     UNSPEC_TDC_INSN))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET);
+})
+
+(define_expand "isinf<mode>2"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
+                     (match_dup 2)]
+                     UNSPEC_TDC_INSN))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2] = GEN_INT (S390_TDC_INFINITY);
+})
+
+(define_insn_and_split "*cc_to_int"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand 1 "register_operand" "0")]
+                   UNSPEC_CC_TO_INT))]
+  "operands != NULL"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))])
+
+; This insn is used to generate all variants of the Test Data Class
+; instruction, namely tcxb, tcdb, and tceb.  The insn's first operand
+; is the register to be tested and the second one is the bit mask
+; specifying the required test(s).
+;
+; tcxb, tcdb, tceb, tdcxt, tdcdt, tdcet
+(define_insn "*TDC_insn_<mode>"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:FP_ALL 0 "register_operand" "f")
+                     (match_operand:SI 1 "const_int_operand")] UNSPEC_TDC_INSN))]
+  "TARGET_HARD_FLOAT"
+  "t<_d>c<xde><bt>\t%0,%1"
+   [(set_attr "op_type" "RXE")
+    (set_attr "type"  "fsimp<mode>")])
+
+
+
+;
+; setmemM instruction pattern(s).
+;
+
+(define_expand "setmem<mode>"
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:QI 2 "general_operand" ""))
+   (use (match_operand:GPR 1 "general_operand" ""))
+   (match_operand 3 "" "")]
+  ""
+  "s390_expand_setmem (operands[0], operands[1], operands[2]); DONE;")
+
+; Clear a block that is up to 256 bytes in length.
+; The block length is taken as (operands[1] % 256) + 1.
+
+(define_expand "clrmem_short"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (const_int 0))
+     (use (match_operand 1 "nonmemory_operand" ""))
+     (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+     (clobber (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "operands[2] = gen_rtx_SCRATCH (Pmode);")
+
+(define_insn "*clrmem_short"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q,Q,Q,Q")
+        (const_int 0))
+   (use (match_operand 1 "nonmemory_operand" "n,a,a,a"))
+   (use (match_operand 2 "immediate_operand" "X,R,X,X"))
+   (clobber (match_scratch:P 3 "=X,X,X,&a"))
+   (clobber (reg:CC CC_REGNUM))]
+  "(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)"
+  "#"
+  [(set_attr "type" "cs")
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "const_int_operand" ""))
+   (use (match_operand 2 "immediate_operand" ""))
+   (clobber (scratch))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (const_int 0))
+     (use (match_dup 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[1] = GEN_INT ((INTVAL (operands[1]) & 0xff) + 1);")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "register_operand" ""))
+   (use (match_operand 2 "memory_operand" ""))
+   (clobber (scratch))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(unspec [(match_dup 1) (match_dup 2)
+              (const_int 0)] UNSPEC_EXECUTE)
+     (set (match_dup 0) (const_int 0))
+     (use (const_int 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (scratch))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && reload_completed"
+  [(parallel
+    [(unspec [(match_dup 1) (const_int 0)
+              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (const_int 0))
+     (use (const_int 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = gen_label_rtx ();")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (match_operand 2 "register_operand" ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed && TARGET_CPU_ZARCH"
+  [(set (match_dup 2) (label_ref (match_dup 3)))
+   (parallel
+    [(unspec [(match_dup 1) (mem:BLK (match_dup 2))
+              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (const_int 0))
+     (use (const_int 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = gen_label_rtx ();")
+
+; Initialize a block of arbitrary length with (operands[2] % 256).
+
+(define_expand "setmem_long"
+  [(parallel
+    [(clobber (match_dup 1))
+     (set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand 2 "shift_count_or_setmem_operand" ""))
+     (use (match_operand 1 "general_operand" ""))
+     (use (match_dup 3))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  enum machine_mode sreg_mode = TARGET_ZARCH ? DImode : SImode;
+  enum machine_mode dreg_mode = TARGET_ZARCH ? TImode : DImode;
+  rtx reg0 = gen_reg_rtx (dreg_mode);
+  rtx reg1 = gen_reg_rtx (dreg_mode);
+  rtx addr0 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg0));
+  rtx len0 = gen_lowpart (Pmode, reg0);
+
+  emit_clobber (reg0);
+  emit_move_insn (addr0, force_operand (XEXP (operands[0], 0), NULL_RTX));
+  emit_move_insn (len0, operands[1]);
+
+  emit_move_insn (reg1, const0_rtx);
+
+  operands[0] = replace_equiv_address_nv (operands[0], addr0);
+  operands[1] = reg0;
+  operands[3] = reg1;
+})
+
+(define_insn "*setmem_long"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (set (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "0") 0))
+        (match_operand 2 "shift_count_or_setmem_operand" "Y"))
+   (use (match_dup 3))
+   (use (match_operand:<DBL> 1 "register_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_64BIT || !TARGET_ZARCH"
+  "mvcle\t%0,%1,%Y2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*setmem_long_and"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (set (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "0") 0))
+        (and (match_operand 2 "shift_count_or_setmem_operand" "Y")
+	     (match_operand 4 "const_int_operand"             "n")))
+   (use (match_dup 3))
+   (use (match_operand:<DBL> 1 "register_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_64BIT || !TARGET_ZARCH) &&
+   (INTVAL (operands[4]) & 255) == 255"
+  "mvcle\t%0,%1,%Y2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*setmem_long_31z"
+  [(clobber (match_operand:TI 0 "register_operand" "=d"))
+   (set (mem:BLK (subreg:SI (match_operand:TI 3 "register_operand" "0") 4))
+        (match_operand 2 "shift_count_or_setmem_operand" "Y"))
+   (use (match_dup 3))
+   (use (match_operand:TI 1 "register_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT && TARGET_ZARCH"
+  "mvcle\t%0,%1,%Y2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+;
+; cmpmemM instruction pattern(s).
+;
+
+(define_expand "cmpmemsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (compare:SI (match_operand:BLK 1 "memory_operand" "")
+                    (match_operand:BLK 2 "memory_operand" "") ) )
+   (use (match_operand:SI 3 "general_operand" ""))
+   (use (match_operand:SI 4 "" ""))]
+  ""
+{
+  if (s390_expand_cmpmem (operands[0], operands[1],
+                          operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
+; Compare a block that is up to 256 bytes in length.
+; The block length is taken as (operands[2] % 256) + 1.
+
+(define_expand "cmpmem_short"
+  [(parallel
+    [(set (reg:CCU CC_REGNUM)
+          (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                       (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "nonmemory_operand" ""))
+     (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+     (clobber (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_SCRATCH (Pmode);")
+
+(define_insn "*cmpmem_short"
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "Q,Q,Q,Q")
+                     (match_operand:BLK 1 "memory_operand" "Q,Q,Q,Q")))
+   (use (match_operand 2 "nonmemory_operand" "n,a,a,a"))
+   (use (match_operand 3 "immediate_operand" "X,R,X,X"))
+   (clobber (match_scratch:P 4 "=X,X,X,&a"))]
+  "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
+  "#"
+  [(set_attr "type" "cs")
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "const_int_operand" ""))
+   (use (match_operand 3 "immediate_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))])]
+  "operands[2] = GEN_INT ((INTVAL (operands[2]) & 0xff) + 1);")
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "register_operand" ""))
+   (use (match_operand 3 "memory_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (match_dup 3)
+              (const_int 0)] UNSPEC_EXECUTE)
+     (set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (const_int 1))])]
+  "")
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (scratch))]
+  "TARGET_Z10 && reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (const_int 0)
+              (label_ref (match_dup 4))] UNSPEC_EXECUTE)
+     (set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (const_int 1))])]
+  "operands[4] = gen_label_rtx ();")
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (match_operand 3 "register_operand" ""))]
+  "reload_completed && TARGET_CPU_ZARCH"
+  [(set (match_dup 3) (label_ref (match_dup 4)))
+   (parallel
+    [(unspec [(match_dup 2) (mem:BLK (match_dup 3))
+              (label_ref (match_dup 4))] UNSPEC_EXECUTE)
+     (set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (const_int 1))])]
+  "operands[4] = gen_label_rtx ();")
+
+; Compare a block of arbitrary length.
+
+(define_expand "cmpmem_long"
+  [(parallel
+    [(clobber (match_dup 2))
+     (clobber (match_dup 3))
+     (set (reg:CCU CC_REGNUM)
+          (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                       (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "general_operand" ""))
+     (use (match_dup 3))])]
+  ""
+{
+  enum machine_mode sreg_mode = TARGET_ZARCH ? DImode : SImode;
+  enum machine_mode dreg_mode = TARGET_ZARCH ? TImode : DImode;
+  rtx reg0 = gen_reg_rtx (dreg_mode);
+  rtx reg1 = gen_reg_rtx (dreg_mode);
+  rtx addr0 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg0));
+  rtx addr1 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg1));
+  rtx len0 = gen_lowpart (Pmode, reg0);
+  rtx len1 = gen_lowpart (Pmode, reg1);
+
+  emit_clobber (reg0);
+  emit_move_insn (addr0, force_operand (XEXP (operands[0], 0), NULL_RTX));
+  emit_move_insn (len0, operands[2]);
+
+  emit_clobber (reg1);
+  emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+  emit_move_insn (len1, operands[2]);
+
+  operands[0] = replace_equiv_address_nv (operands[0], addr0);
+  operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  operands[2] = reg0;
+  operands[3] = reg1;
+})
+
+(define_insn "*cmpmem_long"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (clobber (match_operand:<DBL> 1 "register_operand" "=d"))
+   (set (reg:CCU CC_REGNUM)
+        (compare:CCU (mem:BLK (subreg:P (match_operand:<DBL> 2 "register_operand" "0") 0))
+                     (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "1") 0))))
+   (use (match_dup 2))
+   (use (match_dup 3))]
+  "TARGET_64BIT || !TARGET_ZARCH"
+  "clcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*cmpmem_long_31z"
+  [(clobber (match_operand:TI 0 "register_operand" "=d"))
+   (clobber (match_operand:TI 1 "register_operand" "=d"))
+   (set (reg:CCU CC_REGNUM)
+        (compare:CCU (mem:BLK (subreg:SI (match_operand:TI 2 "register_operand" "0") 4))
+                     (mem:BLK (subreg:SI (match_operand:TI 3 "register_operand" "1") 4))))
+   (use (match_dup 2))
+   (use (match_dup 3))]
+  "!TARGET_64BIT && TARGET_ZARCH"
+  "clcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "op_type" "NN")
+   (set_attr "type"    "vs")
+   (set_attr "length"  "8")])
+
+; Convert CCUmode condition code to integer.
+; Result is zero if EQ, positive if LTU, negative if GTU.
+
+(define_insn_and_split "cmpint"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                   UNSPEC_STRCMPCC_TO_INT))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 2)))
+   (parallel
+    [(set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 30)))
+     (clobber (reg:CC CC_REGNUM))])])
+
+(define_insn_and_split "*cmpint_cc"
+  [(set (reg CC_REGNUM)
+        (compare (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                            UNSPEC_STRCMPCC_TO_INT)
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT))]
+  "s390_match_ccmode (insn, CCSmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 2)))
+   (parallel
+    [(set (match_dup 2) (match_dup 3))
+     (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 30)))])]
+{
+  rtx result = gen_rtx_ASHIFTRT (SImode, operands[0], GEN_INT (30));
+  operands[2] = SET_DEST (XVECEXP (PATTERN (curr_insn), 0, 0));
+  operands[3] = gen_rtx_COMPARE (GET_MODE (operands[2]), result, const0_rtx);
+})
+
+(define_insn_and_split "*cmpint_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                                   UNSPEC_STRCMPCC_TO_INT)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:DI (match_dup 0) (const_int 34)))
+   (parallel
+    [(set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 62)))
+     (clobber (reg:CC CC_REGNUM))])])
+
+(define_insn_and_split "*cmpint_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (ashift:DI (subreg:DI
+                   (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                              UNSPEC_STRCMPCC_TO_INT) 0)
+                   (const_int 32)) (const_int 32))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:DI (match_dup 0) (const_int 34)))
+   (parallel
+    [(set (match_dup 2) (match_dup 3))
+     (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 62)))])]
+{
+  rtx result = gen_rtx_ASHIFTRT (DImode, operands[0], GEN_INT (62));
+  operands[2] = SET_DEST (XVECEXP (PATTERN (curr_insn), 0, 0));
+  operands[3] = gen_rtx_COMPARE (GET_MODE (operands[2]), result, const0_rtx);
+})
+
+
+;;
+;;- Conversion instructions.
+;;
+
+(define_insn "*sethighpartsi"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(unspec:SI [(match_operand:BLK 1 "s_operand" "Q,S")
+		    (match_operand 2 "const_int_operand" "n,n")] UNSPEC_ICM))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   icm\t%0,%2,%S1
+   icmy\t%0,%2,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*sethighpartdi_64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:BLK 1 "s_operand" "QS")
+		    (match_operand 2 "const_int_operand" "n")] UNSPEC_ICM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "icmh\t%0,%2,%S1"
+  [(set_attr "op_type" "RSY")
+   (set_attr "z10prop" "z10_super")])
+
+(define_insn "*sethighpartdi_31"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(unspec:DI [(match_operand:BLK 1 "s_operand" "Q,S")
+		    (match_operand 2 "const_int_operand" "n,n")] UNSPEC_ICM))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "@
+   icm\t%0,%2,%S1
+   icmy\t%0,%2,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; extv instruction patterns
+;
+
+; FIXME: This expander needs to be converted from DI to GPR as well
+; after resolving some issues with it.
+
+(define_expand "extzv"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=d")
+        (zero_extract:DI
+         (match_operand:DI 1 "register_operand" "d")
+         (match_operand 2 "const_int_operand" "")   ; size
+         (match_operand 3 "const_int_operand" ""))) ; start
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z10"
+{
+  /* Starting with zEC12 there is risbgn not clobbering CC.  */
+  if (TARGET_ZEC12)
+    {
+      emit_move_insn (operands[0],
+                    gen_rtx_ZERO_EXTRACT (DImode,
+                                          operands[1],
+                                          operands[2],
+                                          operands[3]));
+      DONE;
+    }
+})
+
+(define_insn "*extzv<mode>_zEC12"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+      (zero_extract:GPR
+        (match_operand:GPR 1 "register_operand" "d")
+        (match_operand 2 "const_int_operand" "")   ; size
+        (match_operand 3 "const_int_operand" "")))] ; start]
+  "TARGET_ZEC12"
+  "risbgn\t%0,%1,64-%2,128+63,<bitsize>+%3+%2" ; dst, src, start, end, shift
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*extzv<mode>_z10"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+      (zero_extract:GPR
+       (match_operand:GPR 1 "register_operand" "d")
+       (match_operand 2 "const_int_operand" "")   ; size
+       (match_operand 3 "const_int_operand" ""))) ; start
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10"
+  "risbg\t%0,%1,64-%2,128+63,<bitsize>+%3+%2" ; dst, src, start, end, shift
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn_and_split "*pre_z10_extzv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(zero_extract:GPR (match_operand:QI 1 "s_operand" "QS")
+		          (match_operand 2 "nonzero_shift_count_operand" "")
+		          (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_Z10"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0) (unspec:GPR [(match_dup 1) (match_dup 3)] UNSPEC_ICM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_dup 0) (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
+{
+  int bitsize = INTVAL (operands[2]);
+  int size = (bitsize - 1) / BITS_PER_UNIT + 1; /* round up */
+  int mask = ((1ul << size) - 1) << (GET_MODE_SIZE (SImode) - size);
+
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+  set_mem_size (operands[1], size);
+  operands[2] = GEN_INT (<GPR:bitsize> - bitsize);
+  operands[3] = GEN_INT (mask);
+})
+
+(define_insn_and_split "*pre_z10_extv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(sign_extract:GPR (match_operand:QI 1 "s_operand" "QS")
+		          (match_operand 2 "nonzero_shift_count_operand" "")
+		          (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0) (unspec:GPR [(match_dup 1) (match_dup 3)] UNSPEC_ICM))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (ashiftrt:GPR (match_dup 0) (match_dup 2)))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  int bitsize = INTVAL (operands[2]);
+  int size = (bitsize - 1) / BITS_PER_UNIT + 1; /* round up */
+  int mask = ((1ul << size) - 1) << (GET_MODE_SIZE (SImode) - size);
+
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+  set_mem_size (operands[1], size);
+  operands[2] = GEN_INT (<GPR:bitsize> - bitsize);
+  operands[3] = GEN_INT (mask);
+})
+
+;
+; insv instruction patterns
+;
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+		      (match_operand 1 "const_int_operand" "")
+		      (match_operand 2 "const_int_operand" ""))
+	(match_operand 3 "general_operand" ""))]
+  ""
+{
+  if (s390_expand_insv (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  FAIL;
+})
+
+
+; The normal RTL expansion will never generate a zero_extract where
+; the location operand isn't word mode.  However, we do this in the
+; back-end when generating atomic operations. See s390_two_part_insv.
+(define_insn "*insv<mode>_zEC12"
+  [(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d")
+			  (match_operand 1 "const_int_operand"    "I")  ; size
+			  (match_operand 2 "const_int_operand"    "I")) ; pos
+	(match_operand:GPR 3 "nonimmediate_operand" "d"))]
+  "TARGET_ZEC12
+   && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>"
+  "risbgn\t%0,%3,64-<bitsize>+%2,64-<bitsize>+%2+%1-1,<bitsize>-%2-%1"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*insv<mode>_z10"
+  [(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d")
+			  (match_operand 1 "const_int_operand"    "I")  ; size
+			  (match_operand 2 "const_int_operand"    "I")) ; pos
+	(match_operand:GPR 3 "nonimmediate_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>"
+  "risbg\t%0,%3,64-<bitsize>+%2,64-<bitsize>+%2+%1-1,<bitsize>-%2-%1"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; and op1 with a mask being 1 for the selected bits and 0 for the rest
+; and op3=op0 with a mask being 0 for the selected bits and 1 for the rest
+(define_insn "*insv<mode>_zEC12_noshift"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
+			  (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+		 (and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0")
+			  (match_operand:GPR 4 "const_int_operand" ""))))]
+  "TARGET_ZEC12 && INTVAL (operands[2]) == ~INTVAL (operands[4])"
+  "risbgn\t%0,%1,%<bfstart>2,%<bfend>2,0"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*insv<mode>_z10_noshift"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
+			  (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+		 (and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0")
+			  (match_operand:GPR 4 "const_int_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && INTVAL (operands[2]) == ~INTVAL (operands[4])"
+  "risbg\t%0,%1,%<bfstart>2,%<bfend>2,0"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*r<noxa>sbg_<mode>_noshift"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(IXOR:GPR
+	  (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
+                   (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+	  (match_operand:GPR 3 "nonimmediate_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10"
+  "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,0"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*r<noxa>sbg_di_rotl"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+	(IXOR:DI
+	  (and:DI
+	    (rotate:DI
+	      (match_operand:DI 1 "nonimmediate_operand" "d")
+              (match_operand:DI 3 "const_int_operand" ""))
+            (match_operand:DI 2 "contiguous_bitmask_operand" ""))
+	  (match_operand:DI 4 "nonimmediate_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10"
+  "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%b3"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*r<noxa>sbg_<mode>_srl"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(IXOR:GPR
+	  (and:GPR
+	    (lshiftrt:GPR
+              (match_operand:GPR 1 "nonimmediate_operand" "d")
+              (match_operand:GPR 3 "nonzero_shift_count_operand" ""))
+            (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+	  (match_operand:GPR 4 "nonimmediate_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && s390_extzv_shift_ok (<bitsize>, 64 - INTVAL (operands[3]),
+                           INTVAL (operands[2]))"
+  "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,64-%3"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*r<noxa>sbg_<mode>_sll"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(IXOR:GPR
+	  (and:GPR
+	    (ashift:GPR
+              (match_operand:GPR 1 "nonimmediate_operand" "d")
+              (match_operand:GPR 3 "nonzero_shift_count_operand" ""))
+            (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+	  (match_operand:GPR 4 "nonimmediate_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && s390_extzv_shift_ok (<bitsize>, INTVAL (operands[3]),
+                           INTVAL (operands[2]))"
+  "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%3"
+  [(set_attr "op_type" "RIE")])
+
+;; These two are generated by combine for s.bf &= val.
+;; ??? For bitfields smaller than 32-bits, we wind up with SImode
+;; shifts and ands, which results in some truly awful patterns
+;; including subregs of operations.  Rather unnecessisarily, IMO.
+;; Instead of
+;;
+;; (set (zero_extract:DI (reg/v:DI 50 [ s ])
+;;        (const_int 24 [0x18])
+;;        (const_int 0 [0]))
+;;    (subreg:DI (and:SI (subreg:SI (lshiftrt:DI (reg/v:DI 50 [ s ])
+;;                    (const_int 40 [0x28])) 4)
+;;            (reg:SI 4 %r4 [ y+4 ])) 0))
+;;
+;; we should instead generate
+;;
+;; (set (zero_extract:DI (reg/v:DI 50 [ s ])
+;;        (const_int 24 [0x18])
+;;        (const_int 0 [0]))
+;;    (and:DI (lshiftrt:DI (reg/v:DI 50 [ s ])
+;;                    (const_int 40 [0x28]))
+;;            (subreg:DI (reg:SI 4 %r4 [ y+4 ]) 0)))
+;;
+;; by noticing that we can push down the outer paradoxical subreg
+;; into the operation.
+
+(define_insn "*insv_rnsbg_noshift"
+  [(set (zero_extract:DI
+	  (match_operand:DI 0 "nonimmediate_operand" "+d")
+	  (match_operand 1 "const_int_operand" "")
+	  (match_operand 2 "const_int_operand" ""))
+	(and:DI
+	  (match_dup 0)
+	  (match_operand:DI 3 "nonimmediate_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && INTVAL (operands[1]) + INTVAL (operands[2]) == 64"
+  "rnsbg\t%0,%3,%2,63,0"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*insv_rnsbg_srl"
+  [(set (zero_extract:DI
+	  (match_operand:DI 0 "nonimmediate_operand" "+d")
+	  (match_operand 1 "const_int_operand" "")
+	  (match_operand 2 "const_int_operand" ""))
+	(and:DI
+	  (lshiftrt:DI
+	    (match_dup 0)
+	    (match_operand 3 "const_int_operand" ""))
+	  (match_operand:DI 4 "nonimmediate_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[1]) - INTVAL (operands[2])"
+  "rnsbg\t%0,%4,%2,%2+%1-1,%3"
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*insv<mode>_mem_reg"
+  [(set (zero_extract:W (match_operand:QI 0 "memory_operand" "+Q,S")
+			(match_operand 1 "const_int_operand" "n,n")
+			(const_int 0))
+	(match_operand:W 2 "register_operand" "d,d"))]
+  "INTVAL (operands[1]) > 0
+   && INTVAL (operands[1]) <= GET_MODE_BITSIZE (SImode)
+   && INTVAL (operands[1]) % BITS_PER_UNIT == 0"
+{
+    int size = INTVAL (operands[1]) / BITS_PER_UNIT;
+
+    operands[1] = GEN_INT ((1ul << size) - 1);
+    return (which_alternative == 0) ? "stcm\t%2,%1,%S0"
+				    : "stcmy\t%2,%1,%S0";
+}
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super,z10_super")])
+
+(define_insn "*insvdi_mem_reghigh"
+  [(set (zero_extract:DI (match_operand:QI 0 "memory_operand" "+QS")
+			 (match_operand 1 "const_int_operand" "n")
+			 (const_int 0))
+	(lshiftrt:DI (match_operand:DI 2 "register_operand" "d")
+		     (const_int 32)))]
+  "TARGET_ZARCH
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[1]) <= GET_MODE_BITSIZE (SImode)
+   && INTVAL (operands[1]) % BITS_PER_UNIT == 0"
+{
+    int size = INTVAL (operands[1]) / BITS_PER_UNIT;
+
+    operands[1] = GEN_INT ((1ul << size) - 1);
+    return "stcmh\t%2,%1,%S0";
+}
+[(set_attr "op_type" "RSY")
+ (set_attr "z10prop" "z10_super")])
+
+(define_insn "*insvdi_reg_imm"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d")
+			 (const_int 16)
+			 (match_operand 1 "const_int_operand" "n"))
+	(match_operand:DI 2 "const_int_operand" "n"))]
+  "TARGET_ZARCH
+   && INTVAL (operands[1]) >= 0
+   && INTVAL (operands[1]) < BITS_PER_WORD
+   && INTVAL (operands[1]) % 16 == 0"
+{
+  switch (BITS_PER_WORD - INTVAL (operands[1]))
+    {
+      case 64: return "iihh\t%0,%x2"; break;
+      case 48: return "iihl\t%0,%x2"; break;
+      case 32: return "iilh\t%0,%x2"; break;
+      case 16: return "iill\t%0,%x2"; break;
+      default: gcc_unreachable();
+    }
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; Update the left-most 32 bit of a DI.
+(define_insn "*insv_h_di_reg_extimm"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d")
+			 (const_int 32)
+			 (const_int 0))
+	(match_operand:DI 1 "const_int_operand" "n"))]
+  "TARGET_EXTIMM"
+  "iihf\t%0,%o1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "z10prop" "z10_fwd_E1")])
+
+; Update the right-most 32 bit of a DI.
+(define_insn "*insv_l_di_reg_extimm"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d")
+			 (const_int 32)
+			 (const_int 32))
+	(match_operand:DI 1 "const_int_operand" "n"))]
+  "TARGET_EXTIMM"
+  "iilf\t%0,%o1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "z10prop" "z10_fwd_A1")])
+
+;
+; extendsidi2 instruction pattern(s).
+;
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_ZARCH)
+    {
+      emit_clobber (operands[0]);
+      emit_move_insn (gen_highpart (SImode, operands[0]), operands[1]);
+      emit_move_insn (gen_lowpart (SImode, operands[0]), const0_rtx);
+      emit_insn (gen_ashrdi3 (operands[0], operands[0], GEN_INT (32)));
+      DONE;
+    }
+})
+
+(define_insn "*extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,RT,b")))]
+  "TARGET_ZARCH"
+  "@
+   lgfr\t%0,%1
+   lgf\t%0,%1
+   lgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1")])
+
+;
+; extend(hi|qi)(si|di)2 instruction pattern(s).
+;
+
+(define_expand "extend<HQI:mode><DSI:mode>2"
+  [(set (match_operand:DSI 0 "register_operand" "")
+        (sign_extend:DSI (match_operand:HQI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (<DSI:MODE>mode == DImode && !TARGET_ZARCH)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_extend<HQI:mode>si2 (tmp, operands[1]));
+      emit_insn (gen_extendsidi2 (operands[0], tmp));
+      DONE;
+    }
+  else if (!TARGET_EXTIMM)
+    {
+      rtx bitcount = GEN_INT (<DSI:bitsize> - <HQI:bitsize>);
+
+      operands[1] = gen_lowpart (<DSI:MODE>mode, operands[1]);
+      emit_insn (gen_ashl<DSI:mode>3 (operands[0], operands[1], bitcount));
+      emit_insn (gen_ashr<DSI:mode>3 (operands[0], operands[0], bitcount));
+      DONE;
+    }
+})
+
+;
+; extendhidi2 instruction pattern(s).
+;
+
+(define_insn "*extendhidi2_extimm"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (sign_extend:DI (match_operand:HI 1 "general_operand" "d,RT,b")))]
+  "TARGET_ZARCH && TARGET_EXTIMM"
+  "@
+   lghr\t%0,%1
+   lgh\t%0,%1
+   lghrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "extimm,extimm,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:HI 1 "memory_operand" "RT")))]
+  "TARGET_ZARCH"
+  "lgh\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_super_E1")])
+
+;
+; extendhisi2 instruction pattern(s).
+;
+
+(define_insn "*extendhisi2_extimm"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" " d,R,T,b")))]
+  "TARGET_EXTIMM"
+  "@
+   lhr\t%0,%1
+   lh\t%0,%1
+   lhy\t%0,%1
+   lhrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RX,RXY,RIL")
+   (set_attr "type"         "*,*,*,larl")
+   (set_attr "cpu_facility" "extimm,extimm,extimm,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (sign_extend:SI (match_operand:HI 1 "memory_operand" "R,T")))]
+  "!TARGET_EXTIMM"
+  "@
+   lh\t%0,%1
+   lhy\t%0,%1"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; extendqi(si|di)2 instruction pattern(s).
+;
+
+; lbr, lgbr, lb, lgb
+(define_insn "*extendqi<mode>2_extimm"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR (match_operand:QI 1 "nonimmediate_operand" "d,RT")))]
+  "TARGET_EXTIMM"
+  "@
+   l<g>br\t%0,%1
+   l<g>b\t%0,%1"
+  [(set_attr "op_type" "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+; lb, lgb
+(define_insn "*extendqi<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (sign_extend:GPR (match_operand:QI 1 "memory_operand" "RT")))]
+  "!TARGET_EXTIMM && TARGET_LONG_DISPLACEMENT"
+  "l<g>b\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn_and_split "*extendqi<mode>2_short_displ"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (sign_extend:GPR (match_operand:QI 1 "s_operand" "Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_EXTIMM && !TARGET_LONG_DISPLACEMENT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0) (unspec:GPR [(match_dup 1) (const_int 8)] UNSPEC_ICM))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (ashiftrt:GPR (match_dup 0) (match_dup 2)))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+  set_mem_size (operands[1], GET_MODE_SIZE (QImode));
+  operands[2] = GEN_INT (<GPR:bitsize> - BITS_PER_UNIT);
+})
+
+;
+; zero_extendsidi2 instruction pattern(s).
+;
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_ZARCH)
+    {
+      emit_clobber (operands[0]);
+      emit_move_insn (gen_lowpart (SImode, operands[0]), operands[1]);
+      emit_move_insn (gen_highpart (SImode, operands[0]), const0_rtx);
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,RT,b")))]
+  "TARGET_ZARCH"
+  "@
+   llgfr\t%0,%1
+   llgf\t%0,%1
+   llgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3")])
+
+;
+; LLGT-type instructions (zero-extend from 31 bit to 64 bit).
+;
+
+(define_insn "*llgt_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (and:DI (subreg:DI (match_operand:SI 1 "memory_operand" "RT") 0)
+		(const_int 2147483647)))]
+  "TARGET_ZARCH"
+  "llgt\t%0,%1"
+  [(set_attr "op_type"  "RXE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn_and_split "*llgt_sidi_split"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (and:DI (subreg:DI (match_operand:SI 1 "memory_operand" "RT") 0)
+		(const_int 2147483647)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (and:DI (subreg:DI (match_dup 1) 0)
+		(const_int 2147483647)))]
+  "")
+
+(define_insn "*llgt_sisi"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand" "d,RT")
+		(const_int 2147483647)))]
+  "TARGET_ZARCH"
+  "@
+   llgtr\t%0,%1
+   llgt\t%0,%1"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*llgt_didi"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (and:DI (match_operand:DI 1 "nonimmediate_operand" "d,o")
+                (const_int 2147483647)))]
+  "TARGET_ZARCH"
+  "@
+   llgtr\t%0,%1
+   llgt\t%0,%N1"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_split
+  [(set (match_operand:DSI 0 "register_operand" "")
+        (and:DSI (match_operand:DSI 1 "nonimmediate_operand" "")
+                 (const_int 2147483647)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && reload_completed"
+  [(set (match_dup 0)
+        (and:DSI (match_dup 1)
+                 (const_int 2147483647)))]
+  "")
+
+;
+; zero_extend(hi|qi)(si|di)2 instruction pattern(s).
+;
+
+(define_expand "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (zero_extend:DI (match_operand:HQI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_ZARCH)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extend<mode>si2 (tmp, operands[1]));
+      emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
+      DONE;
+    }
+  else if (!TARGET_EXTIMM)
+    {
+      rtx bitcount = GEN_INT (64 - <HQI:bitsize>);
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      emit_insn (gen_ashldi3 (operands[0], operands[1], bitcount));
+      emit_insn (gen_lshrdi3 (operands[0], operands[0], bitcount));
+      DONE;
+    }
+})
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_operand:HQI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_EXTIMM)
+    {
+      operands[1] = gen_lowpart (SImode, operands[1]);
+      emit_insn (gen_andsi3 (operands[0], operands[1],
+			     GEN_INT ((1 << <HQI:bitsize>) - 1)));
+      DONE;
+    }
+})
+
+; llhrl, llghrl
+(define_insn "*zero_extendhi<mode>2_z10"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
+        (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,RT,b")))]
+  "TARGET_Z10"
+  "@
+   ll<g>hr\t%0,%1
+   ll<g>h\t%0,%1
+   ll<g>hrl\t%0,%1"
+  [(set_attr "op_type"      "RXY,RRE,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3")])
+
+; llhr, llcr, llghr, llgcr, llh, llc, llgh, llgc
+(define_insn "*zero_extend<HQI:mode><GPR:mode>2_extimm"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,RT")))]
+  "TARGET_EXTIMM"
+  "@
+   ll<g><hc>r\t%0,%1
+   ll<g><hc>\t%0,%1"
+  [(set_attr "op_type" "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3")])
+
+; llgh, llgc
+(define_insn "*zero_extend<HQI:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR (match_operand:HQI 1 "memory_operand" "RT")))]
+  "TARGET_ZARCH && !TARGET_EXTIMM"
+  "llg<hc>\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_fwd_A3")])
+
+(define_insn_and_split "*zero_extendhisi2_31"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+        (zero_extend:SI (match_operand:HI 1 "s_operand" "QS")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (parallel
+    [(set (strict_low_part (match_dup 2)) (match_dup 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
+
+(define_insn_and_split "*zero_extendqisi2_31"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+        (zero_extend:SI (match_operand:QI 1 "memory_operand" "RT")))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;
+; zero_extendqihi2 instruction pattern(s).
+;
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+        (zero_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_ZARCH && !TARGET_EXTIMM"
+{
+  operands[1] = gen_lowpart (HImode, operands[1]);
+  emit_insn (gen_andhi3 (operands[0], operands[1], GEN_INT (0xff)));
+  DONE;
+})
+
+(define_insn "*zero_extendqihi2_64"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (zero_extend:HI (match_operand:QI 1 "memory_operand" "RT")))]
+  "TARGET_ZARCH && !TARGET_EXTIMM"
+  "llgc\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_fwd_A3")])
+
+(define_insn_and_split "*zero_extendqihi2_31"
+  [(set (match_operand:HI 0 "register_operand" "=&d")
+        (zero_extend:HI (match_operand:QI 1 "memory_operand" "RT")))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;
+; fixuns_trunc(dd|td)di2 instruction pattern(s).
+;
+
+(define_expand "fixuns_truncdddi2"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unsigned_fix:DI (match_operand:DD 1 "register_operand" "")))
+     (unspec:DI [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+
+  "TARGET_HARD_DFP"
+{
+  if (!TARGET_Z196)
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx temp = gen_reg_rtx (TDmode);
+      REAL_VALUE_TYPE cmp, sub;
+
+      decimal_real_from_string (&cmp, "9223372036854775808.0");  /* 2^63 */
+      decimal_real_from_string (&sub, "18446744073709551616.0"); /* 2^64 */
+
+      /* 2^63 can't be represented as 64bit DFP number with full precision.  The
+         solution is doing the check and the subtraction in TD mode and using a
+         TD -> DI convert afterwards.  */
+      emit_insn (gen_extendddtd2 (temp, operands[1]));
+      temp = force_reg (TDmode, temp);
+      emit_cmp_and_jump_insns (temp,
+	    CONST_DOUBLE_FROM_REAL_VALUE (cmp, TDmode),
+	    LT, NULL_RTX, VOIDmode, 0, label1);
+      emit_insn (gen_subtd3 (temp, temp,
+	    CONST_DOUBLE_FROM_REAL_VALUE (sub, TDmode)));
+      emit_insn (gen_fix_trunctddi2_dfp (operands[0], temp, GEN_INT (11)));
+      emit_jump (label2);
+
+      emit_label (label1);
+      emit_insn (gen_fix_truncdddi2_dfp (operands[0], operands[1], GEN_INT (9)));
+      emit_label (label2);
+      DONE;
+    }
+})
+
+(define_expand "fixuns_trunctddi2"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unsigned_fix:DI (match_operand:TD 1 "register_operand" "")))
+     (unspec:DI [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+
+  "TARGET_HARD_DFP"
+{
+  if (!TARGET_Z196)
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx temp = gen_reg_rtx (TDmode);
+      REAL_VALUE_TYPE cmp, sub;
+
+      operands[1] = force_reg (TDmode, operands[1]);
+      decimal_real_from_string (&cmp, "9223372036854775808.0");  /* 2^63 */
+      decimal_real_from_string (&sub, "18446744073709551616.0"); /* 2^64 */
+
+      emit_cmp_and_jump_insns (operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (cmp, TDmode),
+	    LT, NULL_RTX, VOIDmode, 0, label1);
+      emit_insn (gen_subtd3 (temp, operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (sub, TDmode)));
+      emit_insn (gen_fix_trunctddi2_dfp (operands[0], temp, GEN_INT (11)));
+      emit_jump (label2);
+
+      emit_label (label1);
+      emit_insn (gen_fix_trunctddi2_dfp (operands[0], operands[1], GEN_INT (9)));
+      emit_label (label2);
+      DONE;
+    }
+})
+
+;
+; fixuns_trunc(sf|df|tf)(si|di)2 and fix_trunc(sf|df|tf)(si|di)2
+; instruction pattern(s).
+;
+
+(define_expand "fixuns_trunc<BFP:mode><GPR:mode>2"
+  [(parallel
+    [(set (match_operand:GPR 0 "register_operand" "")
+	  (unsigned_fix:GPR (match_operand:BFP 1 "register_operand" "")))
+     (unspec:GPR [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+{
+  if (!TARGET_Z196)
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx temp = gen_reg_rtx (<BFP:MODE>mode);
+      REAL_VALUE_TYPE cmp, sub;
+
+      operands[1] = force_reg (<BFP:MODE>mode, operands[1]);
+      real_2expN (&cmp, <GPR:bitsize> - 1, <BFP:MODE>mode);
+      real_2expN (&sub, <GPR:bitsize>, <BFP:MODE>mode);
+
+      emit_cmp_and_jump_insns (operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (cmp, <BFP:MODE>mode),
+	    LT, NULL_RTX, VOIDmode, 0, label1);
+      emit_insn (gen_sub<BFP:mode>3 (temp, operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (sub, <BFP:MODE>mode)));
+      emit_insn (gen_fix_trunc<BFP:mode><GPR:mode>2_bfp (operands[0], temp,
+	    GEN_INT (7)));
+      emit_jump (label2);
+
+      emit_label (label1);
+      emit_insn (gen_fix_trunc<BFP:mode><GPR:mode>2_bfp (operands[0],
+	    operands[1], GEN_INT (5)));
+      emit_label (label2);
+      DONE;
+    }
+})
+
+; fixuns_trunc(td|dd)si2 expander
+(define_expand "fixuns_trunc<mode>si2"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unsigned_fix:SI (match_operand:DFP 1 "register_operand" "")))
+     (unspec:SI [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196 && TARGET_HARD_DFP"
+  "")
+
+; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns.
+
+; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr
+;         clfdtr, clfxtr,         clgdtr, clgxtr
+(define_insn "*fixuns_trunc<FP:mode><GPR:mode>2_z196"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f")))
+   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_Z196"
+   "cl<GPR:gf><FP:xde><FP:bt>r\t%0,%h2,%1,0"
+   [(set_attr "op_type" "RRF")
+    (set_attr "type"    "ftoi")])
+
+(define_expand "fix_trunc<DSF:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "")
+        (fix:GPR (match_operand:DSF 1 "register_operand" "")))]
+  "TARGET_HARD_FLOAT"
+{
+  emit_insn (gen_fix_trunc<DSF:mode><GPR:mode>2_bfp (operands[0], operands[1],
+             GEN_INT (5)));
+  DONE;
+})
+
+; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr
+(define_insn "fix_trunc<BFP:mode><GPR:mode>2_bfp"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (fix:GPR (match_operand:BFP 1 "register_operand" "f")))
+   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "c<GPR:gf><BFP:xde>br\t%0,%h2,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"    "ftoi")])
+
+
+;
+; fix_trunc(td|dd)di2 instruction pattern(s).
+;
+
+(define_expand "fix_trunc<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (fix:DI (match_operand:DFP 1 "nonimmediate_operand" "")))]
+  "TARGET_ZARCH && TARGET_HARD_DFP"
+{
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  emit_insn (gen_fix_trunc<mode>di2_dfp (operands[0], operands[1],
+      GEN_INT (9)));
+  DONE;
+})
+
+; cgxtr, cgdtr
+(define_insn "fix_trunc<DFP:mode>di2_dfp"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (fix:DI (match_operand:DFP 1 "register_operand" "f")))
+   (unspec:DI [(match_operand:DI 2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && TARGET_HARD_DFP"
+  "cg<DFP:xde>tr\t%0,%h2,%1"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "ftoidfp")])
+
+
+;
+; fix_trunctf(si|di)2 instruction pattern(s).
+;
+
+(define_expand "fix_trunctf<mode>2"
+  [(parallel [(set (match_operand:GPR 0 "register_operand" "")
+		   (fix:GPR (match_operand:TF 1 "register_operand" "")))
+	      (unspec:GPR [(const_int 5)] UNSPEC_ROUND)
+	      (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+  "")
+
+
+;
+; float(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
+;
+
+; cxgbr, cdgbr, cegbr, cxgtr, cdgtr
+(define_insn "floatdi<mode>2"
+  [(set (match_operand:FP 0 "register_operand" "=f")
+        (float:FP (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_ZARCH && TARGET_HARD_FLOAT"
+  "c<xde>g<bt>r\t%0,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"    "itof<mode>" )])
+
+; cxfbr, cdfbr, cefbr
+(define_insn "floatsi<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (float:BFP (match_operand:SI 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "c<xde>fbr\t%0,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"   "itof<mode>" )])
+
+; cxftr, cdftr
+(define_insn "floatsi<mode>2"
+  [(set (match_operand:DFP 0 "register_operand" "=f")
+        (float:DFP (match_operand:SI 1 "register_operand" "d")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT"
+  "c<xde>ftr\t%0,0,%1,0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"   "itof<mode>" )])
+
+;
+; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
+;
+
+; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr
+; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr
+(define_insn "floatuns<GPR:mode><FP:mode>2"
+  [(set (match_operand:FP 0 "register_operand" "=f")
+        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT"
+  "c<FP:xde>l<GPR:gf><FP:bt>r\t%0,0,%1,0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"    "itof<FP:mode>" )])
+
+;
+; truncdfsf2 instruction pattern(s).
+;
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "ledbr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"   "ftruncdf")])
+
+;
+; trunctf(df|sf)2 instruction pattern(s).
+;
+
+; ldxbr, lexbr
+(define_insn "trunctf<mode>2"
+  [(set (match_operand:DSF 0 "register_operand" "=f")
+        (float_truncate:DSF (match_operand:TF 1 "register_operand" "f")))
+   (clobber (match_scratch:TF 2 "=f"))]
+  "TARGET_HARD_FLOAT"
+  "l<xde>xbr\t%2,%1\;l<xde>r\t%0,%2"
+  [(set_attr "length" "6")
+   (set_attr "type"   "ftrunctf")])
+
+;
+; trunctddd2 and truncddsd2 instruction pattern(s).
+;
+
+(define_insn "trunctddd2"
+  [(set (match_operand:DD 0 "register_operand" "=f")
+	(float_truncate:DD (match_operand:TD 1 "register_operand" "f")))
+   (clobber (match_scratch:TD 2 "=f"))]
+  "TARGET_HARD_DFP"
+  "ldxtr\t%2,0,%1,0\;ldr\t%0,%2"
+  [(set_attr "length"  "6")
+   (set_attr "type"    "ftruncdd")])
+
+(define_insn "truncddsd2"
+  [(set (match_operand:SD 0 "register_operand" "=f")
+	(float_truncate:SD (match_operand:DD 1 "register_operand" "f")))]
+  "TARGET_HARD_DFP"
+  "ledtr\t%0,0,%1,0"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "ftruncsd")])
+
+;
+; extend(sf|df)(df|tf)2 instruction pattern(s).
+;
+
+; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb
+(define_insn "extend<DSF:mode><BFP:mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+        (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand"  "f,R")))]
+  "TARGET_HARD_FLOAT
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)"
+  "@
+   l<BFP:xde><DSF:xde>br\t%0,%1
+   l<BFP:xde><DSF:xde>b\t%0,%1"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "type"   "fsimp<BFP:mode>, fload<BFP:mode>")])
+
+;
+; extendddtd2 and extendsddd2 instruction pattern(s).
+;
+
+(define_insn "extendddtd2"
+  [(set (match_operand:TD 0 "register_operand" "=f")
+	(float_extend:TD (match_operand:DD 1 "register_operand" "f")))]
+  "TARGET_HARD_DFP"
+  "lxdtr\t%0,%1,0"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "fsimptf")])
+
+(define_insn "extendsddd2"
+  [(set (match_operand:DD 0 "register_operand" "=f")
+	(float_extend:DD (match_operand:SD 1 "register_operand" "f")))]
+  "TARGET_HARD_DFP"
+  "ldetr\t%0,%1,0"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "fsimptf")])
+
+; Binary Floating Point - load fp integer
+
+; Expanders for: floor, btrunc, round, ceil, and nearbyint
+; For all of them the inexact exceptions are suppressed.
+
+; fiebra, fidbra, fixbra
+(define_insn "<FPINT:fpint_name><BFP:mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+	(unspec:BFP [(match_operand:BFP 1 "register_operand" "f")]
+		    FPINT))]
+  "TARGET_Z196"
+  "fi<BFP:xde>bra\t%0,<FPINT:fpint_roundingmode>,%1,4"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<BFP:mode>")])
+
+; rint is supposed to raise an inexact exception so we can use the
+; older instructions.
+
+; fiebr, fidbr, fixbr
+(define_insn "rint<BFP:mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+	(unspec:BFP [(match_operand:BFP 1 "register_operand" "f")]
+		    UNSPEC_FPINT_RINT))]
+  ""
+  "fi<BFP:xde>br\t%0,0,%1"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<BFP:mode>")])
+
+
+; Decimal Floating Point - load fp integer
+
+; fidtr, fixtr
+(define_insn "<FPINT:fpint_name><DFP:mode>2"
+  [(set (match_operand:DFP 0 "register_operand" "=f")
+	(unspec:DFP [(match_operand:DFP 1 "register_operand" "f")]
+		    FPINT))]
+  "TARGET_HARD_DFP"
+  "fi<DFP:xde>tr\t%0,<FPINT:fpint_roundingmode>,%1,4"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<DFP:mode>")])
+
+; fidtr, fixtr
+(define_insn "rint<DFP:mode>2"
+  [(set (match_operand:DFP 0 "register_operand" "=f")
+	(unspec:DFP [(match_operand:DFP 1 "register_operand" "f")]
+		    UNSPEC_FPINT_RINT))]
+  "TARGET_HARD_DFP"
+  "fi<DFP:xde>tr\t%0,0,%1,0"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<DFP:mode>")])
+
+;
+; Binary <-> Decimal floating point trunc patterns
+;
+
+(define_insn "*trunc<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:DFP_ALL FPR0_REGNUM)
+        (float_truncate:DFP_ALL (reg:BFP FPR4_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_insn "*trunc<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:BFP FPR0_REGNUM)
+        (float_truncate:BFP (reg:DFP_ALL FPR4_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_expand "trunc<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:BFP FPR4_REGNUM) (match_operand:BFP 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:DFP_ALL FPR0_REGNUM)
+          (float_truncate:DFP_ALL (reg:BFP FPR4_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:DFP_ALL 0 "nonimmediate_operand" "")
+        (reg:DFP_ALL FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DFP_ALL:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+(define_expand "trunc<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:DFP_ALL FPR4_REGNUM)
+        (match_operand:DFP_ALL 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:BFP FPR0_REGNUM) (float_truncate:BFP (reg:DFP_ALL FPR4_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:BFP 0 "nonimmediate_operand" "") (reg:BFP FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<DFP_ALL:MODE>mode) >= GET_MODE_SIZE (<BFP:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+;
+; Binary <-> Decimal floating point extend patterns
+;
+
+(define_insn "*extend<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:DFP_ALL FPR0_REGNUM) (float_extend:DFP_ALL (reg:BFP FPR4_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_insn "*extend<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:BFP FPR0_REGNUM) (float_extend:BFP (reg:DFP_ALL FPR4_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_expand "extend<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:BFP FPR4_REGNUM) (match_operand:BFP 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:DFP_ALL FPR0_REGNUM)
+          (float_extend:DFP_ALL (reg:BFP FPR4_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:DFP_ALL 0 "nonimmediate_operand" "")
+        (reg:DFP_ALL FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<BFP:MODE>mode) <= GET_MODE_SIZE (<DFP_ALL:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+(define_expand "extend<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:DFP_ALL FPR4_REGNUM)
+        (match_operand:DFP_ALL 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:BFP FPR0_REGNUM) (float_extend:BFP (reg:DFP_ALL FPR4_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:BFP 0 "nonimmediate_operand" "") (reg:BFP FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<DFP_ALL:MODE>mode) < GET_MODE_SIZE (<BFP:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+
+;;
+;; ARITHMETIC OPERATIONS
+;;
+;  arithmetic operations set the ConditionCode,
+;  because of unpredictable Bits in Register for Halfword and Byte
+;  the ConditionCode can be set wrong in operations for Halfword and Byte
+
+;;
+;;- Add instructions.
+;;
+
+;
+; addti3 instruction pattern(s).
+;
+
+(define_insn_and_split "addti3"
+  [(set (match_operand:TI 0 "register_operand" "=&d")
+        (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+                 (match_operand:TI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL1 CC_REGNUM)
+          (compare:CCL1 (plus:DI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (plus:DI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (plus:DI
+                          (plus:DI (ltu:DI (reg:CCL1 CC_REGNUM) (const_int 0))
+                                   (match_dup 4)) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, TImode);
+   operands[4] = operand_subword (operands[1], 0, 0, TImode);
+   operands[5] = operand_subword (operands[2], 0, 0, TImode);
+   operands[6] = operand_subword (operands[0], 1, 0, TImode);
+   operands[7] = operand_subword (operands[1], 1, 0, TImode);
+   operands[8] = operand_subword (operands[2], 1, 0, TImode);")
+
+;
+; adddi3 instruction pattern(s).
+;
+
+(define_expand "adddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "nonimmediate_operand" "")
+          (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+                   (match_operand:DI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*adddi3_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (plus:DI (sign_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                 (match_operand:DI 1 "register_operand" "0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   agfr\t%0,%2
+   agf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z196prop" "z196_cracked,z196_cracked")])
+
+(define_insn "*adddi3_zero_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                          (match_operand:DI 1 "register_operand" "0,0"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d,d")
+        (plus:DI (zero_extend:DI (match_dup 2)) (match_dup 1)))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   algfr\t%0,%2
+   algf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*adddi3_zero_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                          (match_operand:DI 1 "register_operand" "0,0"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   algfr\t%0,%2
+   algf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*adddi3_zero"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                 (match_operand:DI 1 "register_operand" "0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   algfr\t%0,%2
+   algf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn_and_split "*adddi3_31z"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&d")
+        (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+                 (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL1 CC_REGNUM)
+          (compare:CCL1 (plus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (plus:SI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (plus:SI
+			  (plus:SI (ltu:SI (reg:CCL1 CC_REGNUM) (const_int 0))
+				   (match_dup 4)) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);")
+
+(define_insn_and_split "*adddi3_31"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&d")
+        (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+                 (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 3) (plus:SI (match_dup 4) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CCL1 CC_REGNUM)
+          (compare:CCL1 (plus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (plus:SI (match_dup 7) (match_dup 8)))])
+   (set (pc)
+        (if_then_else (ltu (reg:CCL1 CC_REGNUM) (const_int 0))
+                      (pc)
+                      (label_ref (match_dup 9))))
+   (parallel
+    [(set (match_dup 3) (plus:SI (match_dup 3) (const_int 1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (match_dup 9)]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);
+   operands[9] = gen_label_rtx ();")
+
+;
+; addsi3 instruction pattern(s).
+;
+
+(define_expand "addsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "nonimmediate_operand" "")
+          (plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                   (match_operand:SI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*addsi3_sign"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (plus:SI (sign_extend:SI (match_operand:HI 2 "memory_operand" "R,T"))
+                 (match_operand:SI 1 "register_operand" "0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   ah\t%0,%2
+   ahy\t%0,%2"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "z196prop" "z196_cracked,z196_cracked")])
+
+;
+; add(di|si)3 instruction pattern(s).
+;
+
+; ark, agrk, ar, ahi, ahik, aghik, alfi, slfi, a, ay, agr, aghi, algfi, slgfi, ag, asi, agsi
+(define_insn "*add<mode>3"
+  [(set (match_operand:GPR 0 "nonimmediate_operand"           "=d,d,d,d, d, d,d,d,QS")
+        (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,d, 0, 0,0,0, 0")
+		  (match_operand:GPR 2 "general_operand"      " d,d,K,K,Op,On,R,T, C") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   a<g>r\t%0,%2
+   a<g>rk\t%0,%1,%2
+   a<g>hi\t%0,%h2
+   a<g>hik\t%0,%1,%h2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   a<g>\t%0,%2
+   a<y>\t%0,%2
+   a<g>si\t%0,%c2"
+  [(set_attr "op_type"  "RR<E>,RRF,RI,RIE,RIL,RIL,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,*,z196,extimm,extimm,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*,z10_super_E1,z10_super_E1,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, alfi, slfi, al, aly, alrk, alhsik, algr, algfi, slgfi, alg, alsi, algsi, algrk, alghsik
+(define_insn "*add<mode>3_carry1_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0,0")
+			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T,C"))
+                 (match_dup 1)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,d")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   al<g>hsik\t%0,%1,%h2
+   al<g>\t%0,%2
+   al<y>\t%0,%2
+   al<g>si\t%0,%c2"
+  [(set_attr "op_type"      "RR<E>,RRF,RIL,RIL,RIE,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,extimm,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_carry1_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,0")
+			   (match_operand:GPR 2 "general_operand"       "d,d,R,T"))
+                 (match_dup 1)))
+   (clobber (match_scratch:GPR 0                                       "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; alr, alfi, slfi, al, aly, algr, algfi, slgfi, alg, alsi, algsi, alrk, algrk, alhsik, alghsik
+(define_insn "*add<mode>3_carry2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0, 0")
+			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T, C"))
+                 (match_dup 2)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,RS")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   al<g>hsik\t%0,%1,%h2
+   al<g>\t%0,%2
+   al<y>\t%0,%2
+   al<g>si\t%0,%c2"
+  [(set_attr "op_type"  "RR<E>,RRF,RIL,RIL,RIE,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,extimm,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_carry2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,0")
+			   (match_operand:GPR 2 "general_operand"       "d,d,R,T"))
+                 (match_dup 2)))
+   (clobber (match_scratch:GPR 0                                       "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; alr, alfi, slfi, al, aly, algr, algfi, slgfi, alg, alsi, algsi, alrk, algrk, alhsik, alghsik
+(define_insn "*add<mode>3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0, 0")
+			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T, C"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,RS")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   al<g>hsik\t%0,%1,%h2
+   al<g>\t%0,%2
+   al<y>\t%0,%2
+   al<g>si\t%0,%c2"
+  [(set_attr "op_type"  "RR<E>,RRF,RIL,RIL,RIE,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,extimm,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1,
+                        *,z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,0")
+			   (match_operand:GPR 2 "general_operand"       "d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                       "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_cconly2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 1 "nonimmediate_operand"    "%0,d,0,0")
+                 (neg:GPR (match_operand:GPR 2 "general_operand" "d,d,R,T"))))
+   (clobber (match_scratch:GPR 0                                "=d,d,d,d"))]
+  "s390_match_ccmode(insn, CCLmode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; ahi, afi, aghi, agfi, asi, agsi
+(define_insn "*add<mode>3_imm_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" " 0, d,0, 0")
+			   (match_operand:GPR 2 "const_int_operand"    " K, K,Os, C"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d, d,d,QS")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCAmode)
+   && (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'K', \"K\")
+       || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'O', \"Os\")
+           /* Avoid INT32_MIN on 32 bit.  */
+           && (!TARGET_ZARCH || INTVAL (operands[2]) != -0x7fffffff - 1)))"
+  "@
+   a<g>hi\t%0,%h2
+   a<g>hik\t%0,%1,%h2
+   a<g>fi\t%0,%2
+   a<g>si\t%0,%c2"
+  [(set_attr "op_type"      "RI,RIE,RIL,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+;
+; add(tf|df|sf|td|dd)3 instruction pattern(s).
+;
+
+; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+(define_insn "add<mode>3"
+  [(set (match_operand:FP 0 "register_operand"              "=f,   f")
+        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+		 (match_operand:FP 2 "general_operand"      " f,<Rf>")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "@
+   a<xde><bt>r\t%0,<op1>%2
+   a<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+(define_insn "*add<mode>3_cc"
+  [(set (reg CC_REGNUM)
+	(compare (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+			  (match_operand:FP 2 "general_operand"      " f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (set (match_operand:FP 0 "register_operand" "=f,f")
+	(plus:FP (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   a<xde><bt>r\t%0,<op1>%2
+   a<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+(define_insn "*add<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+	(compare (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+			   (match_operand:FP 2 "general_operand"      " f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (clobber (match_scratch:FP 0 "=f,f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   a<xde><bt>r\t%0,<op1>%2
+   a<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+;
+; Pointer add instruction patterns
+;
+
+; This will match "*la_64"
+(define_expand "addptrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+{
+  HOST_WIDE_INT c = INTVAL (operands[2]);
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (!CONST_OK_FOR_CONSTRAINT_P (c, 'K', "K")
+	  && !CONST_OK_FOR_CONSTRAINT_P (c, 'O', "Os"))
+        {
+	  operands[2] = force_const_mem (DImode, operands[2]);
+	  operands[2] = force_reg (DImode, operands[2]);
+        }
+      else if (!DISP_IN_RANGE (INTVAL (operands[2])))
+        operands[2] = force_reg (DImode, operands[2]);
+    }
+})
+
+; For 31 bit we have to prevent the generated pattern from matching
+; normal ADDs since la only does a 31 bit add.  This is supposed to
+; match "force_la_31".
+(define_expand "addptrsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (plus:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" "")))
+		   (use (const_int 0))])]
+  "!TARGET_64BIT"
+{
+  HOST_WIDE_INT c = INTVAL (operands[2]);
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (!CONST_OK_FOR_CONSTRAINT_P (c, 'K', "K")
+	  && !CONST_OK_FOR_CONSTRAINT_P (c, 'O', "Os"))
+        {
+	  operands[2] = force_const_mem (SImode, operands[2]);
+	  operands[2] = force_reg (SImode, operands[2]);
+        }
+      else if (!DISP_IN_RANGE (INTVAL (operands[2])))
+        operands[2] = force_reg (SImode, operands[2]);
+    }
+})
+
+;;
+;;- Subtract instructions.
+;;
+
+;
+; subti3 instruction pattern(s).
+;
+
+(define_insn_and_split "subti3"
+  [(set (match_operand:TI 0 "register_operand" "=&d")
+        (minus:TI (match_operand:TI 1 "register_operand" "0")
+                  (match_operand:TI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL2 CC_REGNUM)
+          (compare:CCL2 (minus:DI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (minus:DI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (minus:DI (minus:DI (match_dup 4) (match_dup 5))
+                                  (gtu:DI (reg:CCL2 CC_REGNUM) (const_int 0))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, TImode);
+   operands[4] = operand_subword (operands[1], 0, 0, TImode);
+   operands[5] = operand_subword (operands[2], 0, 0, TImode);
+   operands[6] = operand_subword (operands[0], 1, 0, TImode);
+   operands[7] = operand_subword (operands[1], 1, 0, TImode);
+   operands[8] = operand_subword (operands[2], 1, 0, TImode);")
+
+;
+; subdi3 instruction pattern(s).
+;
+
+(define_expand "subdi3"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+          (minus:DI (match_operand:DI 1 "register_operand" "")
+                    (match_operand:DI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*subdi3_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                  (sign_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   sgfr\t%0,%2
+   sgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*subdi3_zero_cc"
+  [(set (reg CC_REGNUM)
+        (compare (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                           (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT")))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d,d")
+        (minus:DI (match_dup 1) (zero_extend:DI (match_dup 2))))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   slgfr\t%0,%2
+   slgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_c_E1,z10_super_E1")])
+
+(define_insn "*subdi3_zero_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                           (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT")))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   slgfr\t%0,%2
+   slgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_c_E1,z10_super_E1")])
+
+(define_insn "*subdi3_zero"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                  (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   slgfr\t%0,%2
+   slgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_c_E1,z10_super_E1")])
+
+(define_insn_and_split "*subdi3_31z"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL2 CC_REGNUM)
+          (compare:CCL2 (minus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (minus:SI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
+                                  (gtu:SI (reg:CCL2 CC_REGNUM) (const_int 0))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);")
+
+(define_insn_and_split "*subdi3_31"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 3) (minus:SI (match_dup 4) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CCL2 CC_REGNUM)
+          (compare:CCL2 (minus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (minus:SI (match_dup 7) (match_dup 8)))])
+   (set (pc)
+        (if_then_else (gtu (reg:CCL2 CC_REGNUM) (const_int 0))
+                      (pc)
+                      (label_ref (match_dup 9))))
+   (parallel
+    [(set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (match_dup 9)]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);
+   operands[9] = gen_label_rtx ();")
+
+;
+; subsi3 instruction pattern(s).
+;
+
+(define_expand "subsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+          (minus:SI (match_operand:SI 1 "register_operand" "")
+                    (match_operand:SI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*subsi3_sign"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,0")
+                  (sign_extend:SI (match_operand:HI 2 "memory_operand" "R,T"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   sh\t%0,%2
+   shy\t%0,%2"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "z196prop" "z196_cracked,z196_cracked")])
+
+;
+; sub(di|si)3 instruction pattern(s).
+;
+
+; sr, s, sy, sgr, sg, srk, sgrk
+(define_insn "*sub<mode>3"
+  [(set (match_operand:GPR 0 "register_operand"           "=d,d,d,d")
+        (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+		   (match_operand:GPR 2 "general_operand"  "d,d,R,T") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   s<g>r\t%0,%2
+   s<g>rk\t%0,%1,%2
+   s<g>\t%0,%2
+   s<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_borrow_cc"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand"                    "=d,d,d,d")
+        (minus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL2mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_borrow_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (match_dup 1)))
+   (clobber (match_scratch:GPR 0                                   "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL2mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand"                    "=d,d,d,d")
+        (minus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cc2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 1 "register_operand" "0,d,0,0")
+                 (match_operand:GPR 2 "general_operand"  "d,d,R,T")))
+   (set (match_operand:GPR 0 "register_operand"         "=d,d,d,d")
+        (minus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL3mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                   "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cconly2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 1 "register_operand" "0,d,0,0")
+                 (match_operand:GPR 2 "general_operand"  "d,d,R,T")))
+   (clobber (match_scratch:GPR 0                        "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL3mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+
+;
+; sub(tf|df|sf|td|dd)3 instruction pattern(s).
+;
+
+; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+(define_insn "sub<mode>3"
+  [(set (match_operand:FP 0 "register_operand"            "=f,  f")
+        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,0")
+                  (match_operand:FP 2 "general_operand"  "f,<Rf>")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "@
+   s<xde><bt>r\t%0,<op1>%2
+   s<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+(define_insn "*sub<mode>3_cc"
+  [(set (reg CC_REGNUM)
+	(compare (minus:FP (match_operand:FP 1 "nonimmediate_operand" "<f0>,0")
+                           (match_operand:FP 2 "general_operand"      "f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (set (match_operand:FP 0 "register_operand" "=f,f")
+	(minus:FP (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   s<xde><bt>r\t%0,<op1>%2
+   s<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+(define_insn "*sub<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+	(compare (minus:FP (match_operand:FP 1 "nonimmediate_operand" "<f0>,0")
+			   (match_operand:FP 2 "general_operand"      "f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (clobber (match_scratch:FP 0 "=f,f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   s<xde><bt>r\t%0,<op1>%2
+   s<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Conditional add/subtract instructions.
+;;
+
+;
+; add(di|si)cc instruction pattern(s).
+;
+
+; the following 4 patterns are used when the result of an add with
+; carry is checked for an overflow condition
+
+; op1 + op2 + c < op1
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry1_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z196prop" "z196_alone,z196_alone")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry1_cconly"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 1)))
+   (clobber (match_scratch:GPR 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z196prop" "z196_alone,z196_alone")])
+
+; op1 + op2 + c < op2
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry2_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 2)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 2)))
+   (clobber (match_scratch:GPR 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                            (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                  (match_operand:GPR 2 "general_operand" "d,RT")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; slbr, slb, slbgr, slbg
+(define_insn "*sub<mode>3_slb_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (minus:GPR (minus:GPR (match_operand:GPR 1 "nonimmediate_operand" "0,0")
+                                (match_operand:GPR 2 "general_operand" "d,RT"))
+                     (match_operand:GPR 3 "s390_slb_comparison" ""))
+          (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (minus:GPR (minus:GPR (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_CPU_ZARCH"
+  "@
+   slb<g>r\t%0,%2
+   slb<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")])
+
+; slbr, slb, slbgr, slbg
+(define_insn "*sub<mode>3_slb"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (minus:GPR (minus:GPR (match_operand:GPR 1 "nonimmediate_operand" "0,0")
+                              (match_operand:GPR 2 "general_operand" "d,RT"))
+                   (match_operand:GPR 3 "s390_slb_comparison" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "@
+   slb<g>r\t%0,%2
+   slb<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")])
+
+(define_expand "add<mode>cc"
+  [(match_operand:GPR 0 "register_operand" "")
+   (match_operand 1 "comparison_operator" "")
+   (match_operand:GPR 2 "register_operand" "")
+   (match_operand:GPR 3 "const_int_operand" "")]
+  "TARGET_CPU_ZARCH"
+  "if (!s390_expand_addcc (GET_CODE (operands[1]),
+			   XEXP (operands[1], 0), XEXP (operands[1], 1),
+			   operands[0], operands[2],
+			   operands[3])) FAIL; DONE;")
+
+;
+; scond instruction pattern(s).
+;
+
+(define_insn_and_split "*scond<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+        (match_operand:GPR 1 "s390_alc_comparison" ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (parallel
+    [(set (match_dup 0) (plus:GPR (plus:GPR (match_dup 1) (match_dup 0))
+                                  (match_dup 0)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+(define_insn_and_split "*scond<mode>_neg"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+        (match_operand:GPR 1 "s390_slb_comparison" ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (parallel
+    [(set (match_dup 0) (minus:GPR (minus:GPR (match_dup 0) (match_dup 0))
+                                   (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (neg:GPR (match_dup 0)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operator:SI 1 "s390_scond_operator"
+  	 [(match_operand:GPR 2 "register_operand" "")
+          (match_operand:GPR 3 "general_operand" "")]))]
+  "TARGET_CPU_ZARCH"
+  "if (!s390_expand_addcc (GET_CODE (operands[1]), operands[2], operands[3],
+			   operands[0], const0_rtx, const1_rtx)) FAIL; DONE;")
+
+(define_expand "cstorecc4"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operator:SI 1 "s390_eqne_operator"
+           [(match_operand:CCZ1 2 "register_operand")
+	    (match_operand 3 "const0_operand")]))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "emit_insn (gen_sne (operands[0], operands[2]));
+   if (GET_CODE (operands[1]) == EQ)
+     emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+   DONE;")
+
+(define_insn_and_split "sne"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI (match_operand:CCZ1 1 "register_operand" "0")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 28)))
+     (clobber (reg:CC CC_REGNUM))])])
+
+
+;;
+;; - Conditional move instructions (introduced with z196)
+;;
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	(if_then_else:GPR (match_operand 1 "comparison_operator" "")
+			  (match_operand:GPR 2 "nonimmediate_operand" "")
+			  (match_operand:GPR 3 "nonimmediate_operand" "")))]
+  "TARGET_Z196"
+  "operands[1] = s390_emit_compare (GET_CODE (operands[1]),
+                                    XEXP (operands[1], 0), XEXP (operands[1], 1));")
+
+; locr, loc, stoc, locgr, locg, stocg
+(define_insn_and_split "*mov<mode>cc"
+  [(set (match_operand:GPR 0 "nonimmediate_operand"   "=d,d, d, d,QS,QS,&d")
+	(if_then_else:GPR
+	  (match_operator 1 "s390_comparison"
+	    [(match_operand 2 "cc_reg_operand"        " c,c, c, c, c, c, c")
+	     (match_operand 5 "const_int_operand"     "")])
+	  (match_operand:GPR 3 "nonimmediate_operand" " d,0,QS, 0, d, 0,QS")
+	  (match_operand:GPR 4 "nonimmediate_operand" " 0,d, 0,QS, 0, d,QS")))]
+  "TARGET_Z196"
+  "@
+   loc<g>r%C1\t%0,%3
+   loc<g>r%D1\t%0,%4
+   loc<g>%C1\t%0,%3
+   loc<g>%D1\t%0,%4
+   stoc<g>%C1\t%3,%0
+   stoc<g>%D1\t%4,%0
+   #"
+  "&& reload_completed
+   && MEM_P (operands[3]) && MEM_P (operands[4])"
+  [(set (match_dup 0)
+	(if_then_else:GPR
+	 (match_op_dup 1 [(match_dup 2) (const_int 0)])
+	 (match_dup 3)
+	 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:GPR
+	 (match_op_dup 1 [(match_dup 2) (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 4)))]
+  ""
+  [(set_attr "op_type" "RRF,RRF,RSY,RSY,RSY,RSY,*")])
+
+;;
+;;- Multiply instructions.
+;;
+
+;
+; muldi3 instruction pattern(s).
+;
+
+(define_insn "*muldi3_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (mult:DI (sign_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                 (match_operand:DI 1 "register_operand" "0,0")))]
+  "TARGET_ZARCH"
+  "@
+   msgfr\t%0,%2
+   msgf\t%0,%2"
+  [(set_attr "op_type"      "RRE,RXY")
+   (set_attr "type"         "imuldi")])
+
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d,d")
+        (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,0")
+                 (match_operand:DI 2 "general_operand" "d,K,RT,Os")))]
+  "TARGET_ZARCH"
+  "@
+   msgr\t%0,%2
+   mghi\t%0,%h2
+   msg\t%0,%2
+   msgfi\t%0,%2"
+  [(set_attr "op_type"      "RRE,RI,RXY,RIL")
+   (set_attr "type"         "imuldi")
+   (set_attr "cpu_facility" "*,*,*,z10")])
+
+;
+; mulsi3 instruction pattern(s).
+;
+
+(define_insn "*mulsi3_sign"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (mult:SI (sign_extend:SI (match_operand:HI 2 "memory_operand" "R,T"))
+                 (match_operand:SI 1 "register_operand" "0,0")))]
+  ""
+  "@
+   mh\t%0,%2
+   mhy\t%0,%2"
+  [(set_attr "op_type"      "RX,RXY")
+   (set_attr "type"         "imulhi")
+   (set_attr "cpu_facility" "*,z10")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d,d")
+        (mult:SI  (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0,0")
+                  (match_operand:SI 2 "general_operand" "d,K,R,T,Os")))]
+  ""
+  "@
+   msr\t%0,%2
+   mhi\t%0,%h2
+   ms\t%0,%2
+   msy\t%0,%2
+   msfi\t%0,%2"
+  [(set_attr "op_type"      "RRE,RI,RX,RXY,RIL")
+   (set_attr "type"         "imulsi,imulhi,imulsi,imulsi,imulsi")
+   (set_attr "cpu_facility" "*,*,*,*,z10")])
+
+;
+; mulsidi3 instruction pattern(s).
+;
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (mult:DI (sign_extend:DI
+	           (match_operand:SI 1 "register_operand" "%0,0,0"))
+                 (sign_extend:DI
+	           (match_operand:SI 2 "nonimmediate_operand" "d,R,T"))))]
+  "!TARGET_ZARCH"
+  "@
+   mr\t%0,%2
+   m\t%0,%2
+   mfy\t%0,%2"
+  [(set_attr "op_type"      "RR,RX,RXY")
+   (set_attr "type"         "imulsi")
+   (set_attr "cpu_facility" "*,*,z10")])
+
+;
+; umul instruction pattern(s).
+;
+
+; mlr, ml, mlgr, mlg
+(define_insn "umul<dwh><mode>3"
+  [(set (match_operand:DW 0 "register_operand"                   "=d, d")
+        (mult:DW (zero_extend:DW
+	           (match_operand:<DWH> 1 "register_operand"     "%0, 0"))
+                 (zero_extend:DW
+	           (match_operand:<DWH> 2 "nonimmediate_operand" " d,RT"))))]
+  "TARGET_CPU_ZARCH"
+  "@
+   ml<tg>r\t%0,%2
+   ml<tg>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "imul<dwh>")])
+
+;
+; mul(tf|df|sf|td|dd)3 instruction pattern(s).
+;
+
+; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr
+(define_insn "mul<mode>3"
+  [(set (match_operand:FP 0 "register_operand"              "=f,f")
+        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+                 (match_operand:FP 2 "general_operand"      "f,<Rf>")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   m<xdee><bt>r\t%0,<op1>%2
+   m<xdee>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fmul<mode>")])
+
+; madbr, maebr, maxb, madb, maeb
+(define_insn "fma<mode>4"
+  [(set (match_operand:DSF 0 "register_operand" "=f,f")
+	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
+		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
+		 (match_operand:DSF 3 "register_operand" "0,0")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   ma<xde>br\t%0,%1,%2
+   ma<xde>b\t%0,%1,%2"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "type"     "fmadd<mode>")])
+
+; msxbr, msdbr, msebr, msxb, msdb, mseb
+(define_insn "fms<mode>4"
+  [(set (match_operand:DSF 0 "register_operand" "=f,f")
+	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
+		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
+		 (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))]
+  "TARGET_HARD_FLOAT"
+  "@
+   ms<xde>br\t%0,%1,%2
+   ms<xde>b\t%0,%1,%2"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "type"     "fmadd<mode>")])
+
+;;
+;;- Divide and modulo instructions.
+;;
+
+;
+; divmoddi4 instruction pattern(s).
+;
+
+(define_expand "divmoddi4"
+  [(parallel [(set (match_operand:DI 0 "general_operand" "")
+		   (div:DI (match_operand:DI 1 "register_operand" "")
+			   (match_operand:DI 2 "general_operand" "")))
+	      (set (match_operand:DI 3 "general_operand" "")
+		   (mod:DI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "TARGET_ZARCH"
+{
+  rtx insn, div_equal, mod_equal;
+
+  div_equal = gen_rtx_DIV (DImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_MOD (DImode, operands[1], operands[2]);
+
+  operands[4] = gen_reg_rtx(TImode);
+  emit_insn (gen_divmodtidi3 (operands[4], operands[1], operands[2]));
+
+  insn = emit_move_insn (operands[0], gen_lowpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "divmodtidi3"
+  [(set (match_operand:TI 0 "register_operand" "=d,d")
+        (ior:TI
+          (ashift:TI
+            (zero_extend:TI
+              (mod:DI (match_operand:DI 1 "register_operand" "0,0")
+                      (match_operand:DI 2 "general_operand" "d,RT")))
+            (const_int 64))
+          (zero_extend:TI (div:DI (match_dup 1) (match_dup 2)))))]
+  "TARGET_ZARCH"
+  "@
+   dsgr\t%0,%2
+   dsg\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+(define_insn "divmodtisi3"
+  [(set (match_operand:TI 0 "register_operand" "=d,d")
+        (ior:TI
+          (ashift:TI
+            (zero_extend:TI
+              (mod:DI (match_operand:DI 1 "register_operand" "0,0")
+                      (sign_extend:DI
+                        (match_operand:SI 2 "nonimmediate_operand" "d,RT"))))
+            (const_int 64))
+          (zero_extend:TI
+            (div:DI (match_dup 1) (sign_extend:DI (match_dup 2))))))]
+  "TARGET_ZARCH"
+  "@
+   dsgfr\t%0,%2
+   dsgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+;
+; udivmoddi4 instruction pattern(s).
+;
+
+(define_expand "udivmoddi4"
+  [(parallel [(set (match_operand:DI 0 "general_operand" "")
+		   (udiv:DI (match_operand:DI 1 "general_operand" "")
+			    (match_operand:DI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:DI 3 "general_operand" "")
+		   (umod:DI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "TARGET_ZARCH"
+{
+  rtx insn, div_equal, mod_equal, equal;
+
+  div_equal = gen_rtx_UDIV (DImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_UMOD (DImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (TImode,
+		       gen_rtx_ASHIFT (TImode,
+				       gen_rtx_ZERO_EXTEND (TImode, mod_equal),
+				       GEN_INT (64)),
+		       gen_rtx_ZERO_EXTEND (TImode, div_equal));
+
+  operands[4] = gen_reg_rtx(TImode);
+  emit_clobber (operands[4]);
+  emit_move_insn (gen_lowpart (DImode, operands[4]), operands[1]);
+  emit_move_insn (gen_highpart (DImode, operands[4]), const0_rtx);
+
+  insn = emit_insn (gen_udivmodtidi3 (operands[4], operands[4], operands[2]));
+  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+  insn = emit_move_insn (operands[0], gen_lowpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "udivmodtidi3"
+  [(set (match_operand:TI 0 "register_operand" "=d,d")
+        (ior:TI
+          (ashift:TI
+            (zero_extend:TI
+              (truncate:DI
+                (umod:TI (match_operand:TI 1 "register_operand" "0,0")
+                         (zero_extend:TI
+                           (match_operand:DI 2 "nonimmediate_operand" "d,RT")))))
+            (const_int 64))
+          (zero_extend:TI
+            (truncate:DI
+              (udiv:TI (match_dup 1) (zero_extend:TI (match_dup 2)))))))]
+  "TARGET_ZARCH"
+  "@
+   dlgr\t%0,%2
+   dlg\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+;
+; divmodsi4 instruction pattern(s).
+;
+
+(define_expand "divmodsi4"
+  [(parallel [(set (match_operand:SI 0 "general_operand" "")
+		   (div:SI (match_operand:SI 1 "general_operand" "")
+			   (match_operand:SI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SI 3 "general_operand" "")
+		   (mod:SI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "!TARGET_ZARCH"
+{
+  rtx insn, div_equal, mod_equal, equal;
+
+  div_equal = gen_rtx_DIV (SImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_MOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, mod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, div_equal));
+
+  operands[4] = gen_reg_rtx(DImode);
+  emit_insn (gen_extendsidi2 (operands[4], operands[1]));
+
+  insn = emit_insn (gen_divmoddisi3 (operands[4], operands[4], operands[2]));
+  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+  insn = emit_move_insn (operands[0], gen_lowpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "divmoddisi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (ior:DI
+          (ashift:DI
+            (zero_extend:DI
+              (truncate:SI
+                (mod:DI (match_operand:DI 1 "register_operand" "0,0")
+                        (sign_extend:DI
+                          (match_operand:SI 2 "nonimmediate_operand" "d,R")))))
+            (const_int 32))
+          (zero_extend:DI
+            (truncate:SI
+              (div:DI (match_dup 1) (sign_extend:DI (match_dup 2)))))))]
+  "!TARGET_ZARCH"
+  "@
+   dr\t%0,%2
+   d\t%0,%2"
+  [(set_attr "op_type"  "RR,RX")
+   (set_attr "type"     "idiv")])
+
+;
+; udivsi3 and umodsi3 instruction pattern(s).
+;
+
+(define_expand "udivmodsi4"
+  [(parallel [(set (match_operand:SI 0 "general_operand" "")
+		   (udiv:SI (match_operand:SI 1 "general_operand" "")
+			    (match_operand:SI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SI 3 "general_operand" "")
+		   (umod:SI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+{
+  rtx insn, div_equal, mod_equal, equal;
+
+  div_equal = gen_rtx_UDIV (SImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_UMOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, mod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, div_equal));
+
+  operands[4] = gen_reg_rtx(DImode);
+  emit_clobber (operands[4]);
+  emit_move_insn (gen_lowpart (SImode, operands[4]), operands[1]);
+  emit_move_insn (gen_highpart (SImode, operands[4]), const0_rtx);
+
+  insn = emit_insn (gen_udivmoddisi3 (operands[4], operands[4], operands[2]));
+  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+  insn = emit_move_insn (operands[0], gen_lowpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "udivmoddisi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (ior:DI
+          (ashift:DI
+            (zero_extend:DI
+              (truncate:SI
+                (umod:DI (match_operand:DI 1 "register_operand" "0,0")
+                         (zero_extend:DI
+                           (match_operand:SI 2 "nonimmediate_operand" "d,RT")))))
+            (const_int 32))
+          (zero_extend:DI
+            (truncate:SI
+              (udiv:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "@
+   dlr\t%0,%2
+   dl\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (udiv:SI (match_operand:SI 1 "general_operand" "")
+                 (match_operand:SI 2 "general_operand" "")))
+   (clobber (match_dup 3))]
+  "!TARGET_ZARCH && !TARGET_CPU_ZARCH"
+{
+  rtx insn, udiv_equal, umod_equal, equal;
+
+  udiv_equal = gen_rtx_UDIV (SImode, operands[1], operands[2]);
+  umod_equal = gen_rtx_UMOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, umod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, udiv_equal));
+
+  operands[3] = gen_reg_rtx (DImode);
+
+  if (CONSTANT_P (operands[2]))
+    {
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+        {
+          rtx label1 = gen_label_rtx ();
+
+	  operands[1] = make_safe_from (operands[1], operands[0]);
+          emit_move_insn (operands[0], const0_rtx);
+	  emit_cmp_and_jump_insns (operands[1], operands[2], LT, NULL_RTX,
+				   SImode, 1, label1);
+          emit_move_insn (operands[0], const1_rtx);
+          emit_label (label1);
+        }
+      else
+        {
+          operands[2] = force_reg (SImode, operands[2]);
+          operands[2] = make_safe_from (operands[2], operands[0]);
+
+	  emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+	  insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					     operands[2]));
+  	  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+	  insn = emit_move_insn (operands[0],
+				 gen_lowpart (SImode, operands[3]));
+  	  set_unique_reg_note (insn, REG_EQUAL, udiv_equal);
+        }
+    }
+  else
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx label3 = gen_label_rtx ();
+
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[1] = make_safe_from (operands[1], operands[0]);
+      operands[2] = force_reg (SImode, operands[2]);
+      operands[2] = make_safe_from (operands[2], operands[0]);
+
+      emit_move_insn (operands[0], const0_rtx);
+      emit_cmp_and_jump_insns (operands[2], operands[1], GT, NULL_RTX,
+			       SImode, 1, label3);
+      emit_cmp_and_jump_insns (operands[2], const0_rtx, LT, NULL_RTX,
+			       SImode, 0, label2);
+      emit_cmp_and_jump_insns (operands[2], const1_rtx, EQ, NULL_RTX,
+			       SImode, 0, label1);
+      emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+      insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					 operands[2]));
+      set_unique_reg_note (insn, REG_EQUAL, equal);
+
+      insn = emit_move_insn (operands[0],
+			     gen_lowpart (SImode, operands[3]));
+      set_unique_reg_note (insn, REG_EQUAL, udiv_equal);
+
+      emit_jump (label3);
+      emit_label (label1);
+      emit_move_insn (operands[0], operands[1]);
+      emit_jump (label3);
+      emit_label (label2);
+      emit_move_insn (operands[0], const1_rtx);
+      emit_label (label3);
+    }
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+})
+
+(define_expand "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (umod:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                 (match_operand:SI 2 "nonimmediate_operand" "")))
+   (clobber (match_dup 3))]
+  "!TARGET_ZARCH && !TARGET_CPU_ZARCH"
+{
+  rtx insn, udiv_equal, umod_equal, equal;
+
+  udiv_equal = gen_rtx_UDIV (SImode, operands[1], operands[2]);
+  umod_equal = gen_rtx_UMOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, umod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, udiv_equal));
+
+  operands[3] = gen_reg_rtx (DImode);
+
+  if (CONSTANT_P (operands[2]))
+    {
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) <= 0)
+        {
+          rtx label1 = gen_label_rtx ();
+
+          operands[1] = make_safe_from (operands[1], operands[0]);
+	  emit_move_insn (operands[0], operands[1]);
+          emit_cmp_and_jump_insns (operands[0], operands[2], LT, NULL_RTX,
+			           SImode, 1, label1);
+	  emit_insn (gen_abssi2 (operands[0], operands[2]));
+          emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+          emit_label (label1);
+        }
+      else
+        {
+          operands[2] = force_reg (SImode, operands[2]);
+          operands[2] = make_safe_from (operands[2], operands[0]);
+
+	  emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+	  insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					     operands[2]));
+	  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+	  insn = emit_move_insn (operands[0],
+				 gen_highpart (SImode, operands[3]));
+	  set_unique_reg_note (insn, REG_EQUAL, umod_equal);
+        }
+    }
+  else
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx label3 = gen_label_rtx ();
+
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[1] = make_safe_from (operands[1], operands[0]);
+      operands[2] = force_reg (SImode, operands[2]);
+      operands[2] = make_safe_from (operands[2], operands[0]);
+
+      emit_move_insn(operands[0], operands[1]);
+      emit_cmp_and_jump_insns (operands[2], operands[1], GT, NULL_RTX,
+			       SImode, 1, label3);
+      emit_cmp_and_jump_insns (operands[2], const0_rtx, LT, NULL_RTX,
+			       SImode, 0, label2);
+      emit_cmp_and_jump_insns (operands[2], const1_rtx, EQ, NULL_RTX,
+			       SImode, 0, label1);
+      emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+      insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					 operands[2]));
+      set_unique_reg_note (insn, REG_EQUAL, equal);
+
+      insn = emit_move_insn (operands[0],
+			     gen_highpart (SImode, operands[3]));
+      set_unique_reg_note (insn, REG_EQUAL, umod_equal);
+
+      emit_jump (label3);
+      emit_label (label1);
+      emit_move_insn (operands[0], const0_rtx);
+      emit_jump (label3);
+      emit_label (label2);
+      emit_insn (gen_subsi3 (operands[0], operands[0], operands[2]));
+      emit_label (label3);
+    }
+  DONE;
+})
+
+;
+; div(df|sf)3 instruction pattern(s).
+;
+
+; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr
+(define_insn "div<mode>3"
+  [(set (match_operand:FP 0 "register_operand"          "=f,f")
+        (div:FP (match_operand:FP 1 "register_operand" "<f0>,0")
+                 (match_operand:FP 2 "general_operand"  "f,<Rf>")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   d<xde><bt>r\t%0,<op1>%2
+   d<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fdiv<mode>")])
+
+
+;;
+;;- And instructions.
+;;
+
+(define_expand "and<mode>3"
+  [(set (match_operand:INT 0 "nonimmediate_operand" "")
+        (and:INT (match_operand:INT 1 "nonimmediate_operand" "")
+                 (match_operand:INT 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s390_expand_logical_operator (AND, <MODE>mode, operands); DONE;")
+
+;
+; anddi3 instruction pattern(s).
+;
+
+(define_insn "*anddi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+	  (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0,    d")
+                  (match_operand:DI 2 "general_operand"      " d,d,RT,NxxDq"))
+          (const_int 0)))
+   (set (match_operand:DI 0 "register_operand"               "=d,d, d,    d")
+        (and:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ZARCH && s390_match_ccmode(insn, CCTmode)"
+  "@
+   ngr\t%0,%2
+   ngrk\t%0,%1,%2
+   ng\t%0,%2
+   risbg\t%0,%1,%s2,128+%e2,0"
+  [(set_attr "op_type"  "RRE,RRF,RXY,RIE")
+   (set_attr "cpu_facility" "*,z196,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*anddi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare
+	  (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0,    d")
+                  (match_operand:DI 2 "general_operand"      " d,d,RT,NxxDq"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0                              "=d,d, d,    d"))]
+  "TARGET_ZARCH
+   && s390_match_ccmode(insn, CCTmode)
+   /* Do not steal TM patterns.  */
+   && s390_single_part (operands[2], DImode, HImode, 0) < 0"
+  "@
+   ngr\t%0,%2
+   ngrk\t%0,%1,%2
+   ng\t%0,%2
+   risbg\t%0,%1,%s2,128+%e2,0"
+  [(set_attr "op_type"  "RRE,RRF,RXY,RIE")
+   (set_attr "cpu_facility" "*,z196,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*anddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+            "=d,d,    d,    d,    d,    d,    d,    d,d,d, d,    d,   AQ,Q")
+        (and:DI
+	  (match_operand:DI 1 "nonimmediate_operand"
+            "%d,o,    0,    0,    0,    0,    0,    0,0,d, 0,    d,    0,0")
+          (match_operand:DI 2 "general_operand"
+            "M, M,N0HDF,N1HDF,N2HDF,N3HDF,N0SDF,N1SDF,d,d,RT,NxxDq,NxQDF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   #
+   #
+   nihh\t%0,%j2
+   nihl\t%0,%j2
+   nilh\t%0,%j2
+   nill\t%0,%j2
+   nihf\t%0,%m2
+   nilf\t%0,%m2
+   ngr\t%0,%2
+   ngrk\t%0,%1,%2
+   ng\t%0,%2
+   risbg\t%0,%1,%s2,128+%e2,0
+   #
+   #"
+  [(set_attr "op_type" "RRE,RXE,RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,RIE,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,*,*,extimm,extimm,*,z196,*,z10,*,*")
+   (set_attr "z10prop" "*,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_split
+  [(set (match_operand:DI 0 "s_operand" "")
+        (and:DI (match_dup 0) (match_operand:DI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+
+;; These two are what combine generates for (ashift (zero_extract)).
+(define_insn "*extzv_<mode>_srl"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(and:GPR (lshiftrt:GPR
+		   (match_operand:GPR 1 "register_operand" "d")
+		   (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+		(match_operand:GPR 3 "contiguous_bitmask_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   /* Note that even for the SImode pattern, the rotate is always DImode.  */
+   && s390_extzv_shift_ok (<bitsize>, -INTVAL (operands[2]),
+			   INTVAL (operands[3]))"
+  "risbg\t%0,%1,%<bfstart>3,128+%<bfend>3,64-%2"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*extzv_<mode>_sll"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(and:GPR (ashift:GPR
+		  (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+		(match_operand:GPR 3 "contiguous_bitmask_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && s390_extzv_shift_ok (<bitsize>, INTVAL (operands[2]),
+			   INTVAL (operands[3]))"
+  "risbg\t%0,%1,%<bfstart>3,128+%<bfend>3,%2"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+
+;
+; andsi3 instruction pattern(s).
+;
+
+(define_insn "*andsi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+	  (and:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0,    d")
+            (match_operand:SI 2 "general_operand"      "Os,d,d,R,T,NxxSq"))
+          (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"         "=d,d,d,d,d,    d")
+        (and:SI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   nilf\t%0,%o2
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   n\t%0,%2
+   ny\t%0,%2
+   risbg\t%0,%1,%t2,128+%f2,0"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY,RIE")
+   (set_attr "cpu_facility" "*,*,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+			z10_super_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*andsi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare
+	  (and:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0,    d")
+            (match_operand:SI 2 "general_operand"      "Os,d,d,R,T,NxxSq"))
+          (const_int 0)))
+   (clobber (match_scratch:SI 0                        "=d,d,d,d,d,    d"))]
+  "s390_match_ccmode(insn, CCTmode)
+   /* Do not steal TM patterns.  */
+   && s390_single_part (operands[2], SImode, HImode, 0) < 0"
+  "@
+   nilf\t%0,%o2
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   n\t%0,%2
+   ny\t%0,%2
+   risbg\t%0,%1,%t2,128+%f2,0"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY,RIE")
+   (set_attr "cpu_facility" "*,*,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*andsi3_zarch"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+                            "=d,d,    d,    d, d,d,d,d,d,    d,   AQ,Q")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand"
+			    "%d,o,    0,    0, 0,0,d,0,0,    d,    0,0")
+                (match_operand:SI 2 "general_operand"
+			    " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxxSq,NxQSF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   #
+   #
+   nilh\t%0,%j2
+   nill\t%0,%j2
+   nilf\t%0,%o2
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   n\t%0,%2
+   ny\t%0,%2
+   risbg\t%0,%1,%t2,128+%f2,0
+   #
+   #"
+  [(set_attr "op_type"  "RRE,RXE,RI,RI,RIL,RR,RRF,RX,RXY,RIE,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,*,*,z196,*,*,z10,*,*")
+   (set_attr "z10prop" "*,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_insn "*andsi3_esa"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=d,d,   AQ,Q")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,    0,0")
+                (match_operand:SI 2 "general_operand"      " d,R,NxQSF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   n\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RX,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,*")])
+
+
+(define_split
+  [(set (match_operand:SI 0 "s_operand" "")
+        (and:SI (match_dup 0) (match_operand:SI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+
+;
+; andhi3 instruction pattern(s).
+;
+
+(define_insn "*andhi3_zarch"
+  [(set (match_operand:HI 0 "nonimmediate_operand"         "=d,d,d,   AQ,Q")
+        (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,d,0,    0,0")
+                (match_operand:HI 2 "general_operand"      " d,d,n,NxQHF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   nill\t%0,%x2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RRF,RI,SI,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*,*")
+])
+
+(define_insn "*andhi3_esa"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,AQ,Q")
+        (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:HI 2 "general_operand" "d,NxQHF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,*,*")
+])
+
+(define_split
+  [(set (match_operand:HI 0 "s_operand" "")
+        (and:HI (match_dup 0) (match_operand:HI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+
+;
+; andqi3 instruction pattern(s).
+;
+
+(define_insn "*andqi3_zarch"
+  [(set (match_operand:QI 0 "nonimmediate_operand"         "=d,d,d,Q,S,Q")
+        (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,d,0,0,0,0")
+                (match_operand:QI 2 "general_operand"      " d,d,n,n,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   nill\t%0,%b2
+   ni\t%S0,%b2
+   niy\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RR,RRF,RI,SI,SIY,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super,z10_super,*")])
+
+(define_insn "*andqi3_esa"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,Q,Q")
+        (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:QI 2 "general_operand" "d,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   ni\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super,*")])
+
+;
+; Block and (NC) patterns.
+;
+
+(define_insn "*nc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (and:BLK (match_dup 0)
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "nc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_split
+  [(set (match_operand 0 "memory_operand" "")
+        (and (match_dup 0)
+             (match_operand 1 "memory_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (and:BLK (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (and:BLK (match_dup 0)
+                   (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (and:BLK (match_dup 3)
+                   (match_operand:BLK 4 "memory_operand" "")))
+     (use (match_operand 5 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (and:BLK (match_dup 6) (match_dup 7)))
+     (use (match_dup 8))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+
+;;
+;;- Bit set (inclusive or) instructions.
+;;
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:INT 0 "nonimmediate_operand" "")
+        (ior:INT (match_operand:INT 1 "nonimmediate_operand" "")
+                 (match_operand:INT 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s390_expand_logical_operator (IOR, <MODE>mode, operands); DONE;")
+
+;
+; iordi3 instruction pattern(s).
+;
+
+(define_insn "*iordi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand"                      "=d,d, d")
+        (ior:DI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   ogr\t%0,%2
+   ogrk\t%0,%1,%2
+   og\t%0,%2"
+  [(set_attr "op_type"  "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*iordi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d,0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0                                     "=d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   ogr\t%0,%2
+   ogrk\t%0,%1,%2
+   og\t%0,%2"
+  [(set_attr "op_type"  "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*iordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                               "=d,    d,    d,    d,    d,    d,d,d, d,   AQ,Q")
+        (ior:DI (match_operand:DI 1 "nonimmediate_operand"
+                            "   %0,    0,    0,    0,    0,    0,0,d, 0,    0,0")
+                (match_operand:DI 2 "general_operand"
+                            "N0HD0,N1HD0,N2HD0,N3HD0,N0SD0,N1SD0,d,d,RT,NxQD0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   oihh\t%0,%i2
+   oihl\t%0,%i2
+   oilh\t%0,%i2
+   oill\t%0,%i2
+   oihf\t%0,%k2
+   oilf\t%0,%k2
+   ogr\t%0,%2
+   ogrk\t%0,%1,%2
+   og\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,extimm,extimm,*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_split
+  [(set (match_operand:DI 0 "s_operand" "")
+        (ior:DI (match_dup 0) (match_operand:DI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ior:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (IOR, &operands[0], &operands[1]);")
+
+;
+; iorsi3 instruction pattern(s).
+;
+
+(define_insn "*iorsi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"                      "=d,d,d,d,d")
+        (ior:SI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   oilf\t%0,%o2
+   or\t%0,%2
+   ork\t%0,%1,%2
+   o\t%0,%2
+   oy\t%0,%2"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*iorsi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:SI 0                                     "=d,d,d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   oilf\t%0,%o2
+   or\t%0,%2
+   ork\t%0,%1,%2
+   o\t%0,%2
+   oy\t%0,%2"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*iorsi3_zarch"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=d,    d, d,d,d,d,d,   AQ,Q")
+        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,    0, 0,0,d,0,0,    0,0")
+                (match_operand:SI 2 "general_operand"   "N0HS0,N1HS0,Os,d,d,R,T,NxQS0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   oilh\t%0,%i2
+   oill\t%0,%i2
+   oilf\t%0,%o2
+   or\t%0,%2
+   ork\t%0,%1,%2
+   o\t%0,%2
+   oy\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RI,RI,RIL,RR,RRF,RX,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_insn "*iorsi3_esa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,AQ,Q")
+        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0")
+                (match_operand:SI 2 "general_operand" "d,R,NxQS0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   o\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RX,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "s_operand" "")
+        (ior:SI (match_dup 0) (match_operand:SI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ior:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (IOR, &operands[0], &operands[1]);")
+
+;
+; iorhi3 instruction pattern(s).
+;
+
+(define_insn "*iorhi3_zarch"
+  [(set (match_operand:HI 0 "nonimmediate_operand"         "=d,d,d,   AQ,Q")
+        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,d,0,    0,0")
+                (match_operand:HI 2 "general_operand"      " d,d,n,NxQH0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   ork\t%0,%1,%2
+   oill\t%0,%x2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RRF,RI,SI,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*,*")])
+
+(define_insn "*iorhi3_esa"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,AQ,Q")
+        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:HI 2 "general_operand" "d,NxQH0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:HI 0 "s_operand" "")
+        (ior:HI (match_dup 0) (match_operand:HI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ior:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (IOR, &operands[0], &operands[1]);")
+
+;
+; iorqi3 instruction pattern(s).
+;
+
+(define_insn "*iorqi3_zarch"
+  [(set (match_operand:QI 0 "nonimmediate_operand"         "=d,d,d,Q,S,Q")
+        (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,d,0,0,0,0")
+                (match_operand:QI 2 "general_operand"      " d,d,n,n,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   ork\t%0,%1,%2
+   oill\t%0,%b2
+   oi\t%S0,%b2
+   oiy\t%S0,%b2
+   #"
+  [(set_attr "op_type" "RR,RRF,RI,SI,SIY,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,
+                        z10_super,z10_super,*")])
+
+(define_insn "*iorqi3_esa"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,Q,Q")
+        (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:QI 2 "general_operand" "d,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   oi\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super,*")])
+
+;
+; Block inclusive or (OC) patterns.
+;
+
+(define_insn "*oc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (ior:BLK (match_dup 0)
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "oc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_split
+  [(set (match_operand 0 "memory_operand" "")
+        (ior (match_dup 0)
+             (match_operand 1 "memory_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (ior:BLK (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (ior:BLK (match_dup 0)
+                   (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (ior:BLK (match_dup 3)
+                   (match_operand:BLK 4 "memory_operand" "")))
+     (use (match_operand 5 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (ior:BLK (match_dup 6) (match_dup 7)))
+     (use (match_dup 8))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+
+;;
+;;- Xor instructions.
+;;
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:INT 0 "nonimmediate_operand" "")
+        (xor:INT (match_operand:INT 1 "nonimmediate_operand" "")
+                 (match_operand:INT 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s390_expand_logical_operator (XOR, <MODE>mode, operands); DONE;")
+
+;
+; xordi3 instruction pattern(s).
+;
+
+(define_insn "*xordi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand"                      "=d,d, d")
+        (xor:DI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   xgr\t%0,%2
+   xgrk\t%0,%1,%2
+   xg\t%0,%2"
+  [(set_attr "op_type" "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*xordi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0                                     "=d,d, d"))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   xgr\t%0,%2
+   xgrk\t%0,%1,%2
+   xg\t%0,%2"
+  [(set_attr "op_type" "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*xordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand"         "=d,    d,d,d, d,   AQ,Q")
+        (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,    0,0,d, 0,    0,0")
+                (match_operand:DI 2 "general_operand"   "N0SD0,N1SD0,d,d,RT,NxQD0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   xihf\t%0,%k2
+   xilf\t%0,%k2
+   xgr\t%0,%2
+   xgrk\t%0,%1,%2
+   xg\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RIL,RIL,RRE,RRF,RXY,SI,SS")
+   (set_attr "cpu_facility" "extimm,extimm,*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1,
+                        *,z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:DI 0 "s_operand" "")
+        (xor:DI (match_dup 0) (match_operand:DI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (XOR, &operands[0], &operands[1]);")
+
+;
+; xorsi3 instruction pattern(s).
+;
+
+(define_insn "*xorsi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"                      "=d,d,d,d,d")
+        (xor:SI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   xilf\t%0,%o2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   x\t%0,%2
+   xy\t%0,%2"
+  [(set_attr "op_type" "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1")])
+
+(define_insn "*xorsi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:SI 0                                     "=d,d,d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   xilf\t%0,%o2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   x\t%0,%2
+   xy\t%0,%2"
+  [(set_attr "op_type" "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1")])
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=d,d,d,d,d,   AQ,Q")
+        (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0,    0,0")
+                (match_operand:SI 2 "general_operand"      "Os,d,d,R,T,NxQS0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "s390_logical_operator_ok_p (operands)"
+  "@
+   xilf\t%0,%o2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   x\t%0,%2
+   xy\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "s_operand" "")
+        (xor:SI (match_dup 0) (match_operand:SI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (XOR, &operands[0], &operands[1]);")
+
+;
+; xorhi3 instruction pattern(s).
+;
+
+(define_insn "*xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand"         "=d,d,d,   AQ,Q")
+        (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,d,    0,0")
+                (match_operand:HI 2 "general_operand"      "Os,d,d,NxQH0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "s390_logical_operator_ok_p (operands)"
+  "@
+   xilf\t%0,%x2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   #
+   #"
+  [(set_attr "op_type"  "RIL,RR,RRF,SI,SS")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,*,*")])
+
+(define_split
+  [(set (match_operand:HI 0 "s_operand" "")
+        (xor:HI (match_dup 0) (match_operand:HI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (XOR, &operands[0], &operands[1]);")
+
+;
+; xorqi3 instruction pattern(s).
+;
+
+(define_insn "*xorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand"         "=d,d,d,Q,S,Q")
+        (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,d,0,0,0")
+                (match_operand:QI 2 "general_operand"      "Os,d,d,n,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "s390_logical_operator_ok_p (operands)"
+  "@
+   xilf\t%0,%b2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   xi\t%S0,%b2
+   xiy\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RIL,RR,RRF,SI,SIY,SS")
+   (set_attr "cpu_facility" "*,*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super,z10_super,*")])
+
+
+;
+; Block exclusive or (XC) patterns.
+;
+
+(define_insn "*xc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (xor:BLK (match_dup 0)
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "xc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+(define_split
+  [(set (match_operand 0 "memory_operand" "")
+        (xor (match_dup 0)
+             (match_operand 1 "memory_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (xor:BLK (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (xor:BLK (match_dup 0)
+                   (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (xor:BLK (match_dup 3)
+                   (match_operand:BLK 4 "memory_operand" "")))
+     (use (match_operand 5 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (xor:BLK (match_dup 6) (match_dup 7)))
+     (use (match_dup 8))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+;
+; Block xor (XC) patterns with src == dest.
+;
+
+(define_insn "*xc_zero"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (const_int 0))
+   (use (match_operand 1 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[1]) >= 1 && INTVAL (operands[1]) <= 256"
+  "xc\t%O0(%1,%R0),%S0"
+  [(set_attr "op_type" "SS")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (const_int 0))
+     (use (match_operand 1 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 2 "memory_operand" "")
+          (const_int 0))
+     (use (match_operand 3 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[2], operands[1])
+   && INTVAL (operands[1]) + INTVAL (operands[3]) <= 256"
+  [(parallel
+    [(set (match_dup 4) (const_int 0))
+     (use (match_dup 5))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[4] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[5] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[3]));")
+
+
+;;
+;;- Negate instructions.
+;;
+
+;
+; neg(di|si)2 instruction pattern(s).
+;
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:DSI 0 "register_operand" "=d")
+          (neg:DSI (match_operand:DSI 1 "register_operand" "d")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*negdi2_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:DI (ashiftrt:DI (ashift:DI (subreg:DI
+                           (match_operand:SI 1 "register_operand" "d") 0)
+                           (const_int 32)) (const_int 32)))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (sign_extend:DI (match_dup 1))))]
+  "TARGET_ZARCH && s390_match_ccmode (insn, CCAmode)"
+  "lcgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+(define_insn "*negdi2_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "lcgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+; lcr, lcgr
+(define_insn "*neg<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (match_operand:GPR 1 "register_operand" "d"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d")
+        (neg:GPR (match_dup 1)))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lc<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_super_c_E1")])
+
+; lcr, lcgr
+(define_insn "*neg<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (match_operand:GPR 1 "register_operand" "d"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=d"))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lc<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_super_c_E1")])
+
+; lcr, lcgr
+(define_insn "*neg<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (neg:GPR (match_operand:GPR 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "lc<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_super_c_E1")])
+
+(define_insn_and_split "*negdi2_31"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (match_operand:DI 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 2) (neg:SI (match_dup 3)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CCAP CC_REGNUM)
+          (compare:CCAP (neg:SI (match_dup 5)) (const_int 0)))
+     (set (match_dup 4) (neg:SI (match_dup 5)))])
+   (set (pc)
+        (if_then_else (ne (reg:CCAP CC_REGNUM) (const_int 0))
+                      (pc)
+                      (label_ref (match_dup 6))))
+   (parallel
+    [(set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (match_dup 6)]
+  "operands[2] = operand_subword (operands[0], 0, 0, DImode);
+   operands[3] = operand_subword (operands[1], 0, 0, DImode);
+   operands[4] = operand_subword (operands[0], 1, 0, DImode);
+   operands[5] = operand_subword (operands[1], 1, 0, DImode);
+   operands[6] = gen_label_rtx ();")
+
+;
+; neg(df|sf)2 instruction pattern(s).
+;
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:BFP 0 "register_operand" "=f")
+          (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+  "")
+
+; lcxbr, lcdbr, lcebr
+(define_insn "*neg<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (match_dup 1)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lc<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lcxbr, lcdbr, lcebr
+(define_insn "*neg<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (clobber (match_scratch:BFP 0 "=f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lc<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lcdfr
+(define_insn "*neg<mode>2_nocc"
+  [(set (match_operand:FP 0 "register_operand"         "=f")
+        (neg:FP (match_operand:FP 1 "register_operand" "<fT0>")))]
+  "TARGET_DFP"
+  "lcdfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lcxbr, lcdbr, lcebr
+(define_insn "*neg<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "lc<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Absolute value instructions.
+;;
+
+;
+; abs(di|si)2 instruction pattern(s).
+;
+
+(define_insn "*absdi2_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (abs:DI (ashiftrt:DI (ashift:DI (subreg:DI
+                           (match_operand:SI 1 "register_operand" "d") 0)
+                           (const_int 32)) (const_int 32)))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (abs:DI (sign_extend:DI (match_dup 1))))]
+  "TARGET_ZARCH && s390_match_ccmode (insn, CCAmode)"
+  "lpgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+(define_insn "*absdi2_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (abs:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "lpgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+; lpr, lpgr
+(define_insn "*abs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (abs:GPR (match_operand:DI 1 "register_operand" "d"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d")
+        (abs:GPR (match_dup 1)))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lp<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lpr, lpgr
+(define_insn "*abs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (abs:GPR (match_operand:GPR 1 "register_operand" "d"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=d"))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lp<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lpr, lpgr
+(define_insn "abs<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (abs:GPR (match_operand:GPR 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "lp<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+;
+; abs(df|sf)2 instruction pattern(s).
+;
+
+(define_expand "abs<mode>2"
+  [(parallel
+    [(set (match_operand:BFP 0 "register_operand" "=f")
+          (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+  "")
+
+; lpxbr, lpdbr, lpebr
+(define_insn "*abs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (abs:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (set (match_operand:BFP 0 "register_operand" "=f")
+        (abs:BFP (match_dup 1)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lp<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lpxbr, lpdbr, lpebr
+(define_insn "*abs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (abs:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (clobber (match_scratch:BFP 0 "=f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lp<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lpdfr
+(define_insn "*abs<mode>2_nocc"
+  [(set (match_operand:FP 0 "register_operand"         "=f")
+        (abs:FP (match_operand:FP 1 "register_operand" "<fT0>")))]
+  "TARGET_DFP"
+  "lpdfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lpxbr, lpdbr, lpebr
+(define_insn "*abs<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "lp<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Negated absolute value instructions
+;;
+
+;
+; Integer
+;
+
+(define_insn "*negabsdi2_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:DI (abs:DI (ashiftrt:DI (ashift:DI (subreg:DI
+                           (match_operand:SI 1 "register_operand" "d") 0)
+                           (const_int 32)) (const_int 32))))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (abs:DI (sign_extend:DI (match_dup 1)))))]
+  "TARGET_ZARCH && s390_match_ccmode (insn, CCAmode)"
+  "lngfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+(define_insn "*negabsdi2_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(neg:DI (abs:DI (sign_extend:DI
+                          (match_operand:SI 1 "register_operand" "d")))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "lngfr\t%0,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+; lnr, lngr
+(define_insn "*negabs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (abs:GPR (match_operand:GPR 1 "register_operand" "d")))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d")
+        (neg:GPR (abs:GPR (match_dup 1))))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "ln<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lnr, lngr
+(define_insn "*negabs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (abs:GPR (match_operand:GPR 1 "register_operand" "d")))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=d"))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "ln<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lnr, lngr
+(define_insn "*negabs<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(neg:GPR (abs:GPR (match_operand:GPR 1 "register_operand" "d"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ln<g>r\t%0,%1"
+  [(set_attr "op_type" "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+;
+; Floating point
+;
+
+; lnxbr, lndbr, lnebr
+(define_insn "*negabs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (abs:BFP (match_dup 1))))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "ln<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lnxbr, lndbr, lnebr
+(define_insn "*negabs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (clobber (match_scratch:BFP 0 "=f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "ln<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lndfr
+(define_insn "*negabs<mode>2_nocc"
+  [(set (match_operand:FP 0 "register_operand"                  "=f")
+        (neg:FP (abs:FP (match_operand:BFP 1 "register_operand" "<fT0>"))))]
+  "TARGET_DFP"
+  "lndfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lnxbr, lndbr, lnebr
+(define_insn "*negabs<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "ln<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+;;
+;;- Square root instructions.
+;;
+
+;
+; sqrt(df|sf)2 instruction pattern(s).
+;
+
+; sqxbr, sqdbr, sqebr, sqdb, sqeb
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+	(sqrt:BFP (match_operand:BFP 1 "general_operand" "f,<Rf>")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   sq<xde>br\t%0,%1
+   sq<xde>b\t%0,%1"
+  [(set_attr "op_type" "RRE,RXE")
+   (set_attr "type" "fsqrt<mode>")])
+
+
+;;
+;;- One complement instructions.
+;;
+
+;
+; one_cmpl(di|si|hi|qi)2 instruction pattern(s).
+;
+
+(define_expand "one_cmpl<mode>2"
+  [(parallel
+    [(set (match_operand:INT 0 "register_operand" "")
+          (xor:INT (match_operand:INT 1 "register_operand" "")
+		   (const_int -1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+
+;;
+;; Find leftmost bit instructions.
+;;
+
+(define_expand "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(clz:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_EXTIMM && TARGET_ZARCH"
+{
+  rtx insn, clz_equal;
+  rtx wide_reg = gen_reg_rtx (TImode);
+  rtx msb = gen_rtx_CONST_INT (DImode, (unsigned HOST_WIDE_INT) 1 << 63);
+
+  clz_equal = gen_rtx_CLZ (DImode, operands[1]);
+
+  emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
+
+  insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
+  set_unique_reg_note (insn, REG_EQUAL, clz_equal);
+
+  DONE;
+})
+
+(define_insn "clztidi2"
+  [(set (match_operand:TI 0 "register_operand" "=d")
+	(ior:TI
+	  (ashift:TI
+            (zero_extend:TI
+   	      (xor:DI (match_operand:DI 1 "register_operand" "d")
+                      (lshiftrt (match_operand:DI 2 "const_int_operand" "")
+				(subreg:SI (clz:DI (match_dup 1)) 4))))
+
+	    (const_int 64))
+          (zero_extend:TI (clz:DI (match_dup 1)))))
+   (clobber (reg:CC CC_REGNUM))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2])
+   == (unsigned HOST_WIDE_INT) 1 << 63
+   && TARGET_EXTIMM && TARGET_ZARCH"
+  "flogr\t%0,%1"
+  [(set_attr "op_type"  "RRE")])
+
+
+;;
+;;- Rotate instructions.
+;;
+
+;
+; rotl(di|si)3 instruction pattern(s).
+;
+
+; rll, rllg
+(define_insn "rotl<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(rotate:GPR (match_operand:GPR 1 "register_operand" "d")
+		    (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+  "TARGET_CPU_ZARCH"
+  "rll<g>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RSE")
+   (set_attr "atype"    "reg")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; rll, rllg
+(define_insn "*rotl<mode>3_and"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(rotate:GPR (match_operand:GPR 1 "register_operand" "d")
+		    (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+			    (match_operand:SI 3 "const_int_operand"   "n"))))]
+  "TARGET_CPU_ZARCH && (INTVAL (operands[3]) & 63) == 63"
+  "rll<g>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RSE")
+   (set_attr "atype"    "reg")
+   (set_attr "z10prop" "z10_super_E1")])
+
+
+;;
+;;- Shift instructions.
+;;
+
+;
+; (ashl|lshr)(di|si)3 instruction pattern(s).
+; Left shifts and logical right shifts
+
+(define_expand "<shift><mode>3"
+  [(set (match_operand:DSI 0 "register_operand" "")
+        (SHIFT:DSI (match_operand:DSI 1 "register_operand" "")
+                   (match_operand:SI 2 "shift_count_or_setmem_operand" "")))]
+  ""
+  "")
+
+; sldl, srdl
+(define_insn "*<shift>di3_31"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (SHIFT:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+  "!TARGET_ZARCH"
+  "s<lr>dl\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")
+   (set_attr "z196prop" "z196_cracked")])
+
+; sll, srl, sllg, srlg, sllk, srlk
+(define_insn "*<shift><mode>3"
+  [(set (match_operand:GPR 0 "register_operand"                          "=d,d")
+        (SHIFT:GPR (match_operand:GPR 1 "register_operand"             "<d0>,d")
+                   (match_operand:SI 2 "shift_count_or_setmem_operand"    "Y,Y")))]
+  ""
+  "@
+   s<lr>l<g>\t%0,<1>%Y2
+   s<lr>l<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sldl, srdl
+(define_insn "*<shift>di3_31_and"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (SHIFT:DI (match_operand:DI 1 "register_operand" "0")
+                  (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+			  (match_operand:SI 3 "const_int_operand"   "n"))))]
+  "!TARGET_ZARCH && (INTVAL (operands[3]) & 63) == 63"
+  "s<lr>dl\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+; sll, srl, sllg, srlg, sllk, srlk
+(define_insn "*<shift><mode>3_and"
+  [(set (match_operand:GPR 0 "register_operand"                                 "=d,d")
+        (SHIFT:GPR (match_operand:GPR 1 "register_operand"                    "<d0>,d")
+                   (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand"   "Y,Y")
+			   (match_operand:SI 3 "const_int_operand"               "n,n"))))]
+  "(INTVAL (operands[3]) & 63) == 63"
+  "@
+   s<lr>l<g>\t%0,<1>%Y2
+   s<lr>l<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+;
+; ashr(di|si)3 instruction pattern(s).
+; Arithmetic right shifts
+
+(define_expand "ashr<mode>3"
+  [(parallel
+    [(set (match_operand:DSI 0 "register_operand" "")
+          (ashiftrt:DSI (match_operand:DSI 1 "register_operand" "")
+                        (match_operand:SI 2 "shift_count_or_setmem_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*ashrdi3_cc_31"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (match_operand:SI 2 "shift_count_or_setmem_operand" "Y"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_cconly_31"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (match_operand:SI 2 "shift_count_or_setmem_operand" "Y"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d"))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_31"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                     (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"          "<d0>,d")
+                               (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand"                                   "=d,d")
+        (ashiftrt:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"          "<d0>,d")
+                               (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                                  "=d,d"))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag
+(define_insn "*ashr<mode>3"
+  [(set (match_operand:GPR 0 "register_operand"                          "=d,d")
+        (ashiftrt:GPR (match_operand:GPR 1 "register_operand"          "<d0>,d")
+                      (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+
+; shift pattern with implicit ANDs
+
+(define_insn "*ashrdi3_cc_31_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+				      (match_operand:SI 3 "const_int_operand"   "n")))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_dup 1) (and:SI (match_dup 2) (match_dup 3))))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)
+   && (INTVAL (operands[3]) & 63) == 63"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_cconly_31_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+				      (match_operand:SI 3 "const_int_operand"   "n")))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d"))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)
+   && (INTVAL (operands[3]) & 63) == 63"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_31_and"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                     (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+			     (match_operand:SI 3 "const_int_operand"   "n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && (INTVAL (operands[3]) & 63) == 63"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cc_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"                  "<d0>,d")
+                               (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")
+				       (match_operand:SI 3 "const_int_operand"             "n,n")))
+		 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand"                                           "=d,d")
+        (ashiftrt:GPR (match_dup 1) (and:SI (match_dup 2) (match_dup 3))))]
+  "s390_match_ccmode(insn, CCSmode) && (INTVAL (operands[3]) & 63) == 63"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cconly_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"                  "<d0>,d")
+                               (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")
+				       (match_operand:SI 3 "const_int_operand"             "n,n")))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                                          "=d,d"))]
+  "s390_match_ccmode(insn, CCSmode) && (INTVAL (operands[3]) & 63) == 63"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_and"
+  [(set (match_operand:GPR 0 "register_operand"                                  "=d,d")
+        (ashiftrt:GPR (match_operand:GPR 1 "register_operand"                  "<d0>,d")
+                      (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")
+			      (match_operand:SI 3 "const_int_operand"             "n,n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "(INTVAL (operands[3]) & 63) == 63"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+
+;;
+;; Branch instruction patterns.
+;;
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+        	       [(match_operand:GPR 1 "register_operand" "")
+                        (match_operand:GPR 2 "general_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  ""
+  "s390_emit_jump (operands[3],
+    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+   DONE;")
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+        	       [(match_operand:FP 1 "register_operand" "")
+                        (match_operand:FP 2 "general_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_HARD_FLOAT"
+  "s390_emit_jump (operands[3],
+    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+   DONE;")
+
+(define_expand "cbranchcc4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "s390_eqne_operator"
+        	       [(match_operand 1 "cc_reg_operand" "")
+                        (match_operand 2 "const0_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_HARD_FLOAT"
+  "s390_emit_jump (operands[3],
+    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+   DONE;")
+
+
+
+;;
+;;- Conditional jump instructions.
+;;
+
+(define_insn "*cjump_64"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+					       (match_operand 2 "const_int_operand" "")])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))]
+  "TARGET_CPU_ZARCH"
+{
+  if (get_attr_length (insn) == 4)
+    return "j%C1\t%l0";
+  else
+    return "jg%C1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 6)))])
+
+(define_insn "*cjump_31"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+					       (match_operand 2 "const_int_operand" "")])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))]
+  "!TARGET_CPU_ZARCH"
+{
+  gcc_assert (get_attr_length (insn) == 4);
+  return "j%C1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (not (match_test "flag_pic"))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+(define_insn "*cjump_long"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (match_operand 0 "address_operand" "ZQZR")
+          (pc)))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "b%C1r\t%0";
+  else
+    return "b%C1\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+;; A conditional return instruction.
+(define_insn "*c<code>"
+  [(set (pc)
+        (if_then_else
+          (match_operator 0 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (ANY_RETURN)
+          (pc)))]
+  "s390_can_use_<code>_insn ()"
+  "b%C0r\t%%r14"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")])
+
+;;
+;;- Negated conditional jump instructions.
+;;
+
+(define_insn "*icjump_64"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (pc)
+          (label_ref (match_operand 0 "" ""))))]
+  "TARGET_CPU_ZARCH"
+{
+  if (get_attr_length (insn) == 4)
+    return "j%D1\t%l0";
+  else
+    return "jg%D1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 6)))])
+
+(define_insn "*icjump_31"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (pc)
+          (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_CPU_ZARCH"
+{
+  gcc_assert (get_attr_length (insn) == 4);
+  return "j%D1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (not (match_test "flag_pic"))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+(define_insn "*icjump_long"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (pc)
+          (match_operand 0 "address_operand" "ZQZR")))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "b%D1r\t%0";
+  else
+    return "b%D1\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+;;
+;;- Trap instructions.
+;;
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "j\t.+2"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")])
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "comparison_operator"
+             [(match_operand:GPR 1 "register_operand" "")
+              (match_operand:GPR 2 "general_operand" "")])
+	     (match_operand 3 "const0_operand" ""))]
+  ""
+  {
+    rtx cond = s390_emit_compare (GET_CODE (operands[0]),
+                                  operands[1], operands[2]);
+    emit_insn (gen_condtrap (cond, XEXP (cond, 0)));
+    DONE;
+  })
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "comparison_operator"
+             [(match_operand:FP 1 "register_operand" "")
+              (match_operand:FP 2 "general_operand" "")])
+	     (match_operand 3 "const0_operand" ""))]
+  ""
+  {
+    rtx cond = s390_emit_compare (GET_CODE (operands[0]),
+                                  operands[1], operands[2]);
+    emit_insn (gen_condtrap (cond, XEXP (cond, 0)));
+    DONE;
+  })
+
+(define_insn "condtrap"
+  [(trap_if (match_operator 0 "s390_comparison"
+             [(match_operand 1 "cc_reg_operand" "c")
+              (const_int 0)])
+	    (const_int 0))]
+  ""
+  "j%C0\t.+2";
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")])
+
+; crt, cgrt, cit, cgit
+(define_insn "*cmp_and_trap_signed_int<mode>"
+  [(trap_if (match_operator 0 "s390_signed_integer_comparison"
+	       [(match_operand:GPR 1 "register_operand"  "d,d")
+		(match_operand:GPR 2 "nonmemory_operand" "d,K")])
+	    (const_int 0))]
+  "TARGET_Z10"
+  "@
+   c<g>rt%C0\t%1,%2
+   c<g>it%C0\t%1,%h2"
+  [(set_attr "op_type" "RRF,RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")])
+
+; clrt, clgrt, clfit, clgit, clt, clgt
+(define_insn "*cmp_and_trap_unsigned_int<mode>"
+  [(trap_if (match_operator 0 "s390_unsigned_integer_comparison"
+	       [(match_operand:GPR 1 "register_operand" "d,d, d")
+		(match_operand:GPR 2 "general_operand"  "d,D,RT")])
+	    (const_int 0))]
+  "TARGET_Z10"
+  "@
+   cl<g>rt%C0\t%1,%2
+   cl<gf>it%C0\t%1,%x2
+   cl<g>t%C0\t%1,%2"
+  [(set_attr "op_type"      "RRF,RIE,RSY")
+   (set_attr "type"         "branch")
+   (set_attr "z10prop"      "z10_super_c,z10_super,*")
+   (set_attr "cpu_facility" "z10,z10,zEC12")])
+
+; lat, lgat
+(define_insn "*load_and_trap<mode>"
+  [(trap_if (eq (match_operand:GPR 0 "memory_operand"  "RT")
+		(const_int 0))
+	    (const_int 0))
+   (set (match_operand:GPR 1 "register_operand" "=d")
+	(match_dup 0))]
+  "TARGET_ZEC12"
+  "l<g>at\t%1,%0"
+  [(set_attr "op_type" "RXY")])
+
+
+;;
+;;- Loop instructions.
+;;
+;;  This is all complicated by the fact that since this is a jump insn
+;;  we must handle our own output reloads.
+
+;; branch on index
+
+; This splitter will be matched by combine and has to add the 2 moves
+; necessary to load the compare and the increment values into a
+; register pair as needed by brxle.
+
+(define_insn_and_split "*brx_stage1_<GPR:mode>"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 6 "s390_brx_operator"
+	    [(plus:GPR (match_operand:GPR 1 "register_operand" "")
+		       (match_operand:GPR 2 "general_operand"  ""))
+	     (match_operand:GPR 3 "register_operand" "")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (set (match_operand:GPR 4 "nonimmediate_operand" "")
+        (plus:GPR (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:GPR 5 ""))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "!reload_completed && !reload_in_progress"
+  [(set (match_dup 7) (match_dup 2)) ; the increment
+   (set (match_dup 8) (match_dup 3)) ; the comparison value
+   (parallel [(set (pc)
+		   (if_then_else
+		    (match_op_dup 6
+		       [(plus:GPR (match_dup 1) (match_dup 7))
+			(match_dup 8)])
+		    (label_ref (match_dup 0))
+		    (pc)))
+	      (set (match_dup 4)
+		   (plus:GPR (match_dup 1) (match_dup 7)))
+	      (clobber (match_dup 5))
+	      (clobber (reg:CC CC_REGNUM))])]
+  {
+    rtx dreg = gen_reg_rtx (word_mode == DImode ? TImode : DImode);
+    operands[7] = gen_lowpart (<GPR:MODE>mode,
+			       gen_highpart (word_mode, dreg));
+    operands[8] = gen_lowpart (<GPR:MODE>mode,
+			       gen_lowpart (word_mode, dreg));
+  })
+
+; brxlg, brxhg
+
+(define_insn_and_split "*brxg_64bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	     [(plus:DI (match_operand:DI 1 "register_operand" "d,d,d")
+		       (subreg:DI (match_operand:TI 2 "register_operand" "d,d,d") 0))
+              (subreg:DI (match_dup 2) 8)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:DI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:DI (match_dup 1)
+		 (subreg:DI (match_dup 2) 0)))
+   (clobber (match_scratch:DI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%E5g\t%1,%2,%l0";
+  else
+    return "agr\t%1,%2\;cgr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:DI (match_dup 4) (subreg:DI (match_dup 2) 0)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:DI (match_dup 2) 8)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RIE")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 16)))])
+
+; brxle, brxh
+
+(define_insn_and_split "*brx_64bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	     [(plus:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		       (subreg:SI (match_operand:TI 2 "register_operand" "d,d,d") 4))
+              (subreg:SI (match_dup 2) 12)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1)
+		 (subreg:SI (match_dup 2) 4)))
+   (clobber (match_scratch:SI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%C5\t%1,%2,%l0";
+  else
+    return "ar\t%1,%2\;cr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:SI (match_dup 4) (subreg:SI (match_dup 2) 4)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:SI (match_dup 2) 12)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RSI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 14)))])
+
+; brxle, brxh
+
+(define_insn_and_split "*brx_31bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	    [(plus:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		      (subreg:SI (match_operand:DI 2 "register_operand" "d,d,d") 0))
+	     (subreg:SI (match_dup 2) 4)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1)
+		 (subreg:SI (match_dup 2) 0)))
+   (clobber (match_scratch:SI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%C5\t%1,%2,%l0";
+  else
+    return "ar\t%1,%2\;cr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:SI (match_dup 4) (subreg:SI (match_dup 2) 0)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:SI (match_dup 2) 4)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RSI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 14)))])
+
+
+;; branch on count
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))        ; loop pseudo
+   (use (match_operand 1 "" ""))]       ; label
+  ""
+{
+  if (GET_MODE (operands[0]) == SImode && !TARGET_CPU_ZARCH)
+    emit_jump_insn (gen_doloop_si31 (operands[1], operands[0], operands[0]));
+  else if (GET_MODE (operands[0]) == SImode && TARGET_CPU_ZARCH)
+    emit_jump_insn (gen_doloop_si64 (operands[1], operands[0], operands[0]));
+  else if (GET_MODE (operands[0]) == DImode && TARGET_ZARCH)
+    emit_jump_insn (gen_doloop_di (operands[1], operands[0], operands[0]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_insn_and_split "doloop_si64"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:SI 1 "register_operand" "d,d,d")
+              (const_int 1))
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 2 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:SI 3 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 4)
+    return "brct\t%1,%l0";
+  else
+    return "ahi\t%1,-1\;jgne\t%l0";
+}
+  "&& reload_completed
+   && (! REG_P (operands[2])
+       || ! rtx_equal_p (operands[1], operands[2]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (reg:CCAN CC_REGNUM)
+                   (compare:CCAN (plus:SI (match_dup 3) (const_int -1))
+                                 (const_int 0)))
+              (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
+   (set (match_dup 2) (match_dup 3))
+   (set (pc) (if_then_else (ne (reg:CCAN CC_REGNUM) (const_int 0))
+                           (label_ref (match_dup 0))
+                           (pc)))]
+  ""
+  [(set_attr "op_type"  "RI")
+   ; Strictly speaking, the z10 properties are valid for brct only, however, it does not
+   ; hurt us in the (rare) case of ahi.
+   (set_attr "z10prop"  "z10_super_E1")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 10)))])
+
+(define_insn_and_split "doloop_si31"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:SI 1 "register_operand" "d,d,d")
+              (const_int 1))
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 2 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:SI 3 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 4)
+    return "brct\t%1,%l0";
+  else
+    gcc_unreachable ();
+}
+  "&& reload_completed
+   && (! REG_P (operands[2])
+       || ! rtx_equal_p (operands[1], operands[2]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (reg:CCAN CC_REGNUM)
+                   (compare:CCAN (plus:SI (match_dup 3) (const_int -1))
+                                 (const_int 0)))
+              (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
+   (set (match_dup 2) (match_dup 3))
+   (set (pc) (if_then_else (ne (reg:CCAN CC_REGNUM) (const_int 0))
+                           (label_ref (match_dup 0))
+                           (pc)))]
+  ""
+  [(set_attr "op_type"  "RI")
+   ; Strictly speaking, the z10 properties are valid for brct only, however, it does not
+   ; hurt us in the (rare) case of ahi.
+   (set_attr "z10prop"  "z10_super_E1")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (not (match_test "flag_pic"))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+(define_insn "*doloop_si_long"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:SI 1 "register_operand" "d")
+              (const_int 1))
+          (match_operand 0 "address_operand" "ZQZR")
+          (pc)))
+   (set (match_operand:SI 2 "register_operand" "=1")
+        (plus:SI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:SI 3 "=X"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "bctr\t%1,%0";
+  else
+    return "bct\t%1,%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")
+   (set_attr "z10prop"  "z10_c")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn_and_split "doloop_di"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:DI 1 "register_operand" "d,d,d")
+              (const_int 1))
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:DI 2 "nonimmediate_operand" "=1,?X,?X")
+        (plus:DI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:DI 3 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 4)
+    return "brctg\t%1,%l0";
+  else
+    return "aghi\t%1,-1\;jgne\t%l0";
+}
+  "&& reload_completed
+   && (! REG_P (operands[2])
+       || ! rtx_equal_p (operands[1], operands[2]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (reg:CCAN CC_REGNUM)
+                   (compare:CCAN (plus:DI (match_dup 3) (const_int -1))
+                                 (const_int 0)))
+              (set (match_dup 3) (plus:DI (match_dup 3) (const_int -1)))])
+   (set (match_dup 2) (match_dup 3))
+   (set (pc) (if_then_else (ne (reg:CCAN CC_REGNUM) (const_int 0))
+                           (label_ref (match_dup 0))
+                           (pc)))]
+  ""
+  [(set_attr "op_type"  "RI")
+   ; Strictly speaking, the z10 properties are valid for brct only, however, it does not
+   ; hurt us in the (rare) case of ahi.
+   (set_attr "z10prop"  "z10_super_E1")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 10)))])
+
+;;
+;;- Unconditional jump instructions.
+;;
+
+;
+; jump instruction pattern(s).
+;
+
+(define_expand "jump"
+  [(match_operand 0 "" "")]
+  ""
+  "s390_emit_jump (operands[0], NULL_RTX); DONE;")
+
+(define_insn "*jump64"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "TARGET_CPU_ZARCH"
+{
+  if (get_attr_length (insn) == 4)
+    return "j\t%l0";
+  else
+    return "jg\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 6)))])
+
+(define_insn "*jump31"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "!TARGET_CPU_ZARCH"
+{
+  gcc_assert (get_attr_length (insn) == 4);
+  return "j\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (not (match_test "flag_pic"))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+;
+; indirect-jump instruction pattern(s).
+;
+
+(define_insn "indirect_jump"
+ [(set (pc) (match_operand 0 "address_operand" "ZQZR"))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "br\t%0";
+  else
+    return "b\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+;
+; casesi instruction pattern(s).
+;
+
+(define_insn "casesi_jump"
+ [(set (pc) (match_operand 0 "address_operand" "ZQZR"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "br\t%0";
+  else
+    return "b\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (label_ref (match_operand 3 "" ""))
+   (label_ref (match_operand 4 "" ""))]
+  ""
+{
+   rtx index  = gen_reg_rtx (SImode);
+   rtx base   = gen_reg_rtx (Pmode);
+   rtx target = gen_reg_rtx (Pmode);
+
+   emit_move_insn (index, operands[0]);
+   emit_insn (gen_subsi3 (index, index, operands[1]));
+   emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1,
+                            operands[4]);
+
+   if (Pmode != SImode)
+     index = convert_to_mode (Pmode, index, 1);
+   if (GET_CODE (index) != REG)
+     index = copy_to_mode_reg (Pmode, index);
+
+   if (TARGET_64BIT)
+       emit_insn (gen_ashldi3 (index, index, GEN_INT (3)));
+   else
+       emit_insn (gen_ashlsi3 (index, index, const2_rtx));
+
+   emit_move_insn (base, gen_rtx_LABEL_REF (Pmode, operands[3]));
+
+   index = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, base, index));
+   emit_move_insn (target, index);
+
+   if (flag_pic)
+     target = gen_rtx_PLUS (Pmode, base, target);
+   emit_jump_insn (gen_casesi_jump (target, operands[3]));
+
+   DONE;
+})
+
+
+;;
+;;- Jump to subroutine.
+;;
+;;
+
+;
+; untyped call instruction pattern(s).
+;
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (const_int 0))
+              (match_operand 1 "" "")
+              (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type"    "none")
+   (set_attr "length"  "0")])
+
+;
+; sibcall patterns
+;
+
+(define_expand "sibcall"
+  [(call (match_operand 0 "" "")
+	 (match_operand 1 "" ""))]
+  ""
+{
+  s390_emit_call (XEXP (operands[0], 0), NULL_RTX, NULL_RTX, NULL_RTX);
+  DONE;
+})
+
+(define_insn "*sibcall_br"
+  [(call (mem:QI (reg SIBCALL_REGNUM))
+         (match_operand 0 "const_int_operand" "n"))]
+  "SIBLING_CALL_P (insn)
+   && GET_MODE (XEXP (XEXP (PATTERN (insn), 0), 0)) == Pmode"
+  "br\t%%r1"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+(define_insn "*sibcall_brc"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))]
+  "SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC"
+  "j\t%0"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")])
+
+(define_insn "*sibcall_brcl"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))]
+  "SIBLING_CALL_P (insn) && TARGET_CPU_ZARCH"
+  "jg\t%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "branch")])
+
+;
+; sibcall_value patterns
+;
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand 1 "" "")
+	      (match_operand 2 "" "")))]
+  ""
+{
+  s390_emit_call (XEXP (operands[1], 0), NULL_RTX, operands[0], NULL_RTX);
+  DONE;
+})
+
+(define_insn "*sibcall_value_br"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (reg SIBCALL_REGNUM))
+	      (match_operand 1 "const_int_operand" "n")))]
+  "SIBLING_CALL_P (insn)
+   && GET_MODE (XEXP (XEXP (XEXP (PATTERN (insn), 1), 0), 0)) == Pmode"
+  "br\t%%r1"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+(define_insn "*sibcall_value_brc"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+	      (match_operand 2 "const_int_operand" "n")))]
+  "SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC"
+  "j\t%1"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")])
+
+(define_insn "*sibcall_value_brcl"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+	      (match_operand 2 "const_int_operand" "n")))]
+  "SIBLING_CALL_P (insn) && TARGET_CPU_ZARCH"
+  "jg\t%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "branch")])
+
+
+;
+; call instruction pattern(s).
+;
+
+(define_expand "call"
+  [(call (match_operand 0 "" "")
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))]
+  ""
+{
+  s390_emit_call (XEXP (operands[0], 0), NULL_RTX, NULL_RTX,
+		  gen_rtx_REG (Pmode, RETURN_REGNUM));
+  DONE;
+})
+
+(define_insn "*bras"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))
+   (clobber (match_operand 2 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_SMALL_EXEC
+   && GET_MODE (operands[2]) == Pmode"
+  "bras\t%2,%0"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*brasl"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))
+   (clobber (match_operand 2 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_CPU_ZARCH
+   && GET_MODE (operands[2]) == Pmode"
+  "brasl\t%2,%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*basr"
+  [(call (mem:QI (match_operand 0 "address_operand" "ZQZR"))
+         (match_operand 1 "const_int_operand" "n"))
+   (clobber (match_operand 2 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn) && GET_MODE (operands[2]) == Pmode"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "basr\t%2,%0";
+  else
+    return "bas\t%2,%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")
+   (set_attr "z196prop" "z196_cracked")])
+
+;
+; call_value instruction pattern(s).
+;
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+        (call (match_operand 1 "" "")
+              (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  s390_emit_call (XEXP (operands[1], 0), NULL_RTX, operands[0],
+		  gen_rtx_REG (Pmode, RETURN_REGNUM));
+  DONE;
+})
+
+(define_insn "*bras_r"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_SMALL_EXEC
+   && GET_MODE (operands[3]) == Pmode"
+  "bras\t%3,%1"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*brasl_r"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_CPU_ZARCH
+   && GET_MODE (operands[3]) == Pmode"
+  "brasl\t%3,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*basr_r"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "address_operand" "ZQZR"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "basr\t%3,%1";
+  else
+    return "bas\t%3,%a1";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 1 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")
+   (set_attr "z196prop" "z196_cracked")])
+
+;;
+;;- Thread-local storage support.
+;;
+
+(define_expand "get_thread_pointer<mode>"
+  [(set (match_operand:P 0 "nonimmediate_operand" "") (reg:P TP_REGNUM))]
+  ""
+  "")
+
+(define_expand "set_thread_pointer<mode>"
+  [(set (reg:P TP_REGNUM) (match_operand:P 0 "nonimmediate_operand" ""))
+   (set (reg:P TP_REGNUM) (unspec_volatile:P [(reg:P TP_REGNUM)] UNSPECV_SET_TP))]
+  ""
+  "")
+
+(define_insn "*set_tp"
+  [(set (reg TP_REGNUM) (unspec_volatile [(reg TP_REGNUM)] UNSPECV_SET_TP))]
+  ""
+  ""
+  [(set_attr "type" "none")
+   (set_attr "length" "0")])
+
+(define_insn "*tls_load_64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "RT")
+                    (match_operand:DI 2 "" "")]
+		   UNSPEC_TLS_LOAD))]
+  "TARGET_64BIT"
+  "lg\t%0,%1%J2"
+  [(set_attr "op_type" "RXE")
+   (set_attr "z10prop" "z10_fwd_A3")])
+
+(define_insn "*tls_load_31"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "R,T")
+                    (match_operand:SI 2 "" "")]
+		   UNSPEC_TLS_LOAD))]
+  "!TARGET_64BIT"
+  "@
+   l\t%0,%1%J2
+   ly\t%0,%1%J2"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "type" "load")
+   (set_attr "z10prop" "z10_fwd_A3,z10_fwd_A3")])
+
+(define_insn "*bras_tls"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))
+   (use (match_operand 4 "" ""))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_SMALL_EXEC
+   && GET_MODE (operands[3]) == Pmode"
+  "bras\t%3,%1%J4"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*brasl_tls"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))
+   (use (match_operand 4 "" ""))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_CPU_ZARCH
+   && GET_MODE (operands[3]) == Pmode"
+  "brasl\t%3,%1%J4"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*basr_tls"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "address_operand" "ZQZR"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))
+   (use (match_operand 4 "" ""))]
+  "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "basr\t%3,%1%J4";
+  else
+    return "bas\t%3,%a1%J4";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 1 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")
+   (set_attr "z196prop" "z196_cracked")])
+
+;;
+;;- Atomic operations
+;;
+
+;
+; memory barrier patterns.
+;
+
+(define_expand "mem_signal_fence"
+  [(match_operand:SI 0 "const_int_operand")]		;; model
+  ""
+{
+  /* The s390 memory model is strong enough not to require any
+     barrier in order to synchronize a thread with itself.  */
+  DONE;
+})
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand")]		;; model
+  ""
+{
+  /* Unless this is a SEQ_CST fence, the s390 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST)
+    {
+      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+      emit_insn (gen_mem_thread_fence_1 (mem));
+    }
+  DONE;
+})
+
+; Although bcr is superscalar on Z10, this variant will never
+; become part of an execution group.
+; With z196 we can make use of the fast-BCR-serialization facility.
+; This allows for a slightly faster sync which is sufficient for our
+; purposes.
+(define_insn "mem_thread_fence_1"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+{
+  if (TARGET_Z196)
+    return "bcr\t14,0";
+  else
+    return "bcr\t15,0";
+}
+  [(set_attr "op_type" "RR")
+   (set_attr "mnemonic" "bcr_flush")
+   (set_attr "z196prop" "z196_alone")])
+
+;
+; atomic load/store operations
+;
+
+; Atomic loads need not examine the memory model at all.
+(define_expand "atomic_load<mode>"
+  [(match_operand:DINT 0 "register_operand")	;; output
+   (match_operand:DINT 1 "memory_operand")	;; memory
+   (match_operand:SI 2 "const_int_operand")]	;; model
+  ""
+{
+  if (MEM_ALIGN (operands[1]) < GET_MODE_BITSIZE (GET_MODE (operands[1])))
+    FAIL;
+
+  if (<MODE>mode == TImode)
+    emit_insn (gen_atomic_loadti_1 (operands[0], operands[1]));
+  else if (<MODE>mode == DImode && !TARGET_ZARCH)
+    emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
+  else
+    emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+; Different from movdi_31 in that we want no splitters.
+(define_insn "atomic_loaddi_1"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,!*f,!*f")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "Q,S,R,T")]
+		   UNSPEC_MOVA))]
+  "!TARGET_ZARCH"
+  "@
+   lm\t%0,%M0,%S1
+   lmy\t%0,%M0,%S1
+   ld\t%0,%1
+   ldy\t%0,%1"
+  [(set_attr "op_type" "RS,RSY,RS,RSY")
+   (set_attr "type" "lm,lm,floaddf,floaddf")])
+
+(define_insn "atomic_loadti_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+	(unspec:TI [(match_operand:TI 1 "memory_operand" "RT")]
+		   UNSPEC_MOVA))]
+  "TARGET_ZARCH"
+  "lpq\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "type" "other")])
+
+; Atomic stores must(?) enforce sequential consistency.
+(define_expand "atomic_store<mode>"
+  [(match_operand:DINT 0 "memory_operand")	;; memory
+   (match_operand:DINT 1 "register_operand")	;; input
+   (match_operand:SI 2 "const_int_operand")]	;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  if (MEM_ALIGN (operands[0]) < GET_MODE_BITSIZE (GET_MODE (operands[0])))
+    FAIL;
+
+  if (<MODE>mode == TImode)
+    emit_insn (gen_atomic_storeti_1 (operands[0], operands[1]));
+  else if (<MODE>mode == DImode && !TARGET_ZARCH)
+    emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
+  else
+    emit_move_insn (operands[0], operands[1]);
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_mem_thread_fence (operands[2]));
+  DONE;
+})
+
+; Different from movdi_31 in that we want no splitters.
+(define_insn "atomic_storedi_1"
+  [(set (match_operand:DI 0 "memory_operand" "=Q,S,R,T")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d,d,!*f,!*f")]
+		   UNSPEC_MOVA))]
+  "!TARGET_ZARCH"
+  "@
+   stm\t%1,%N1,%S0
+   stmy\t%1,%N1,%S0
+   std %1,%0
+   stdy %1,%0"
+  [(set_attr "op_type" "RS,RSY,RS,RSY")
+   (set_attr "type" "stm,stm,fstoredf,fstoredf")])
+
+(define_insn "atomic_storeti_1"
+  [(set (match_operand:TI 0 "memory_operand" "=RT")
+	(unspec:TI [(match_operand:TI 1 "register_operand" "r")]
+		   UNSPEC_MOVA))]
+  "TARGET_ZARCH"
+  "stpq\t%1,%0"
+  [(set_attr "op_type" "RXY")
+   (set_attr "type" "other")])
+
+;
+; compare and swap patterns.
+;
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand")	;; bool success output
+   (match_operand:DGPR 1 "nonimmediate_operand");; oldval output
+   (match_operand:DGPR 2 "memory_operand")	;; memory
+   (match_operand:DGPR 3 "register_operand")	;; expected intput
+   (match_operand:DGPR 4 "register_operand")	;; newval intput
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; success model
+   (match_operand:SI 7 "const_int_operand")]	;; failure model
+  ""
+{
+  rtx cc, cmp, output = operands[1];
+
+  if (!register_operand (output, <MODE>mode))
+    output = gen_reg_rtx (<MODE>mode);
+
+  if (MEM_ALIGN (operands[2]) < GET_MODE_BITSIZE (GET_MODE (operands[2])))
+    FAIL;
+
+  emit_insn (gen_atomic_compare_and_swap<mode>_internal
+	     (output, operands[2], operands[3], operands[4]));
+
+  /* We deliberately accept non-register operands in the predicate
+     to ensure the write back to the output operand happens *before*
+     the store-flags code below.  This makes it easier for combine
+     to merge the store-flags code with a potential test-and-branch
+     pattern following (immediately!) afterwards.  */
+  if (output != operands[1])
+    emit_move_insn (operands[1], output);
+
+  cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
+  cmp = gen_rtx_EQ (SImode, cc, const0_rtx);
+  emit_insn (gen_cstorecc4 (operands[0], cmp, cc, const0_rtx));
+  DONE;
+})
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand")	;; bool success output
+   (match_operand:HQI 1 "nonimmediate_operand")	;; oldval output
+   (match_operand:HQI 2 "memory_operand")	;; memory
+   (match_operand:HQI 3 "general_operand")	;; expected intput
+   (match_operand:HQI 4 "general_operand")	;; newval intput
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; success model
+   (match_operand:SI 7 "const_int_operand")]	;; failure model
+  ""
+{
+  s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1], operands[2],
+		      operands[3], operands[4], INTVAL (operands[5]));
+  DONE;
+})
+
+(define_expand "atomic_compare_and_swap<mode>_internal"
+  [(parallel
+     [(set (match_operand:DGPR 0 "register_operand")
+	   (match_operand:DGPR 1 "memory_operand"))
+      (set (match_dup 1)
+	   (unspec_volatile:DGPR
+	     [(match_dup 1)
+	      (match_operand:DGPR 2 "register_operand")
+	      (match_operand:DGPR 3 "register_operand")]
+	     UNSPECV_CAS))
+      (set (reg:CCZ1 CC_REGNUM)
+	   (compare:CCZ1 (match_dup 1) (match_dup 2)))])]
+  "")
+
+; cdsg, csg
+(define_insn "*atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:TDI 0 "register_operand" "=r")
+	(match_operand:TDI 1 "memory_operand" "+QS"))
+   (set (match_dup 1)
+	(unspec_volatile:TDI
+	  [(match_dup 1)
+	   (match_operand:TDI 2 "register_operand" "0")
+	   (match_operand:TDI 3 "register_operand" "r")]
+	  UNSPECV_CAS))
+   (set (reg:CCZ1 CC_REGNUM)
+	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
+  "TARGET_ZARCH"
+  "c<td>sg\t%0,%3,%S1"
+  [(set_attr "op_type" "RSY")
+   (set_attr "type"   "sem")])
+
+; cds, cdsy
+(define_insn "*atomic_compare_and_swapdi_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(match_operand:DI 1 "memory_operand" "+Q,S"))
+   (set (match_dup 1)
+	(unspec_volatile:DI
+	  [(match_dup 1)
+	   (match_operand:DI 2 "register_operand" "0,0")
+	   (match_operand:DI 3 "register_operand" "r,r")]
+	  UNSPECV_CAS))
+   (set (reg:CCZ1 CC_REGNUM)
+	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
+  "!TARGET_ZARCH"
+  "@
+   cds\t%0,%3,%S1
+   cdsy\t%0,%3,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "type" "sem")])
+
+; cs, csy
+(define_insn "*atomic_compare_and_swapsi_3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operand:SI 1 "memory_operand" "+Q,S"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_dup 1)
+	   (match_operand:SI 2 "register_operand" "0,0")
+	   (match_operand:SI 3 "register_operand" "r,r")]
+	  UNSPECV_CAS))
+   (set (reg:CCZ1 CC_REGNUM)
+	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   cs\t%0,%3,%S1
+   csy\t%0,%3,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "type"   "sem")])
+
+;
+; Other atomic instruction patterns.
+;
+
+; z196 load and add, xor, or and and instructions
+
+(define_expand "atomic_fetch_<atomic><mode>"
+  [(match_operand:GPR 0 "register_operand")		;; val out
+   (ATOMIC_Z196:GPR
+     (match_operand:GPR 1 "memory_operand")		;; memory
+     (match_operand:GPR 2 "register_operand"))		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  "TARGET_Z196"
+{
+  if (MEM_ALIGN (operands[1]) < GET_MODE_BITSIZE (GET_MODE (operands[1])))
+    FAIL;
+
+  emit_insn (gen_atomic_fetch_<atomic><mode>_iaf
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+; lan, lang, lao, laog, lax, laxg, laa, laag
+(define_insn "atomic_fetch_<atomic><mode>_iaf"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(match_operand:GPR 1 "memory_operand" "+QS"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	 [(ATOMIC_Z196:GPR (match_dup 1)
+			   (match_operand:GPR 2 "general_operand" "d"))]
+	 UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z196"
+  "la<noxa><g>\t%0,%2,%1"
+  [(set_attr "op_type" "RSY")
+   (set_attr "type" "sem")])
+
+;; For SImode and larger, the optabs.c code will do just fine in
+;; expanding a compare-and-swap loop.  For QI/HImode, we can do
+;; better by expanding our own loop.
+
+(define_expand "atomic_<atomic><mode>"
+  [(ATOMIC:HQI
+     (match_operand:HQI 0 "memory_operand")		;; memory
+     (match_operand:HQI 1 "general_operand"))		;; val in
+   (match_operand:SI 2 "const_int_operand")]		;; model
+  ""
+{
+  s390_expand_atomic (<MODE>mode, <CODE>, NULL_RTX, operands[0],
+		       operands[1], false);
+  DONE;
+})
+
+(define_expand "atomic_fetch_<atomic><mode>"
+  [(match_operand:HQI 0 "register_operand")		;; val out
+   (ATOMIC:HQI
+     (match_operand:HQI 1 "memory_operand")		;; memory
+     (match_operand:HQI 2 "general_operand"))		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  ""
+{
+  s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
+		      operands[2], false);
+  DONE;
+})
+
+(define_expand "atomic_<atomic>_fetch<mode>"
+  [(match_operand:HQI 0 "register_operand")		;; val out
+   (ATOMIC:HQI
+     (match_operand:HQI 1 "memory_operand")		;; memory
+     (match_operand:HQI 2 "general_operand"))		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  ""
+{
+  s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
+		      operands[2], true);
+  DONE;
+})
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:HQI 0 "register_operand")		;; val out
+   (match_operand:HQI 1 "memory_operand")		;; memory
+   (match_operand:HQI 2 "general_operand")		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  ""
+{
+  s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
+		      operands[2], false);
+  DONE;
+})
+
+;;
+;;- Miscellaneous instructions.
+;;
+
+;
+; allocate stack instruction pattern(s).
+;
+
+(define_expand "allocate_stack"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+ "TARGET_BACKCHAIN"
+{
+  rtx temp = gen_reg_rtx (Pmode);
+
+  emit_move_insn (temp, s390_back_chain_rtx ());
+  anti_adjust_stack (operands[1]);
+  emit_move_insn (s390_back_chain_rtx (), temp);
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+})
+
+
+;
+; setjmp instruction pattern.
+;
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+{
+  emit_insn (s390_load_got ());
+  emit_use (pic_offset_table_rtx);
+  DONE;
+})
+
+;; These patterns say how to save and restore the stack pointer.  We need not
+;; save the stack pointer at function level since we are careful to
+;; preserve the backchain.  At block level, we have to restore the backchain
+;; when we restore the stack pointer.
+;;
+;; For nonlocal gotos, we must save both the stack pointer and its
+;; backchain and restore both.  Note that in the nonlocal case, the
+;; save area is a memory location.
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_block"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "register_operand" "")]
+  "TARGET_BACKCHAIN"
+{
+  rtx temp = gen_reg_rtx (Pmode);
+
+  emit_move_insn (temp, s390_back_chain_rtx ());
+  emit_move_insn (operands[0], operands[1]);
+  emit_move_insn (s390_back_chain_rtx (), temp);
+
+  DONE;
+})
+
+(define_expand "save_stack_nonlocal"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "register_operand" "")]
+  ""
+{
+  rtx base = gen_rtx_REG (Pmode, BASE_REGNUM);
+
+  /* Copy the backchain to the first word, sp to the second and the
+     literal pool base to the third.  */
+
+  rtx save_bc = adjust_address (operands[0], Pmode, 0);
+  rtx save_sp = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
+  rtx save_bp = adjust_address (operands[0], Pmode, 2 * GET_MODE_SIZE (Pmode));
+
+  if (TARGET_BACKCHAIN)
+    emit_move_insn (save_bc, force_reg (Pmode, s390_back_chain_rtx ()));
+
+  emit_move_insn (save_sp, operands[1]);
+  emit_move_insn (save_bp, base);
+
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+  rtx base = gen_rtx_REG (Pmode, BASE_REGNUM);
+  rtx temp = NULL_RTX;
+
+  /* Restore the backchain from the first word, sp from the second and the
+     literal pool base from the third.  */
+
+  rtx save_bc = adjust_address (operands[1], Pmode, 0);
+  rtx save_sp = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
+  rtx save_bp = adjust_address (operands[1], Pmode, 2 * GET_MODE_SIZE (Pmode));
+
+  if (TARGET_BACKCHAIN)
+    temp = force_reg (Pmode, save_bc);
+
+  emit_move_insn (base, save_bp);
+  emit_move_insn (operands[0], save_sp);
+
+  if (temp)
+    emit_move_insn (s390_back_chain_rtx (), temp);
+
+  emit_use (base);
+  DONE;
+})
+
+(define_expand "exception_receiver"
+  [(const_int 0)]
+  ""
+{
+  s390_set_has_landing_pad_p (true);
+  DONE;
+})
+
+;
+; nop instruction pattern(s).
+;
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "lr\t0,0"
+  [(set_attr "op_type" "RR")
+   (set_attr "z10prop"  "z10_fr_E1")])
+
+(define_insn "nop1"
+  [(const_int 1)]
+  ""
+  "lr\t1,1"
+  [(set_attr "op_type" "RR")])
+
+
+;
+; Special literal pool access instruction pattern(s).
+;
+
+(define_insn "*pool_entry"
+  [(unspec_volatile [(match_operand 0 "consttable_operand" "X")]
+                    UNSPECV_POOL_ENTRY)]
+  ""
+{
+  enum machine_mode mode = GET_MODE (PATTERN (insn));
+  unsigned int align = GET_MODE_BITSIZE (mode);
+  s390_output_pool_entry (operands[0], mode, align);
+  return "";
+}
+  [(set (attr "length")
+        (symbol_ref "GET_MODE_SIZE (GET_MODE (PATTERN (insn)))"))])
+
+(define_insn "pool_align"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "n")]
+                    UNSPECV_POOL_ALIGN)]
+  ""
+  ".align\t%0"
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))])
+
+(define_insn "pool_section_start"
+  [(unspec_volatile [(const_int 1)] UNSPECV_POOL_SECTION)]
+  ""
+  ".section\t.rodata"
+  [(set_attr "length" "0")])
+
+(define_insn "pool_section_end"
+  [(unspec_volatile [(const_int 0)] UNSPECV_POOL_SECTION)]
+  ""
+  ".previous"
+  [(set_attr "length" "0")])
+
+(define_insn "main_base_31_small"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))]
+  "!TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "basr\t%0,0"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "la")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "main_base_31_large"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))
+   (set (pc) (label_ref (match_operand 2 "" "")))]
+  "!TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "bras\t%0,%2"
+  [(set_attr "op_type" "RI")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "main_base_64"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))]
+  "TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "larl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_fwd_A1")])
+
+(define_insn "main_pool"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec_volatile [(const_int 0)] UNSPECV_MAIN_POOL))]
+  "GET_MODE (operands[0]) == Pmode"
+{
+  gcc_unreachable ();
+}
+  [(set (attr "type")
+        (if_then_else (match_test "TARGET_CPU_ZARCH")
+                      (const_string "larl") (const_string "la")))])
+
+(define_insn "reload_base_31"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_RELOAD_BASE))]
+  "!TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "basr\t%0,0\;la\t%0,%1-.(%0)"
+  [(set_attr "length" "6")
+   (set_attr "type" "la")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "reload_base_64"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_RELOAD_BASE))]
+  "TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "larl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_fwd_A1")])
+
+(define_insn "pool"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "n")] UNSPECV_POOL)]
+  ""
+{
+  gcc_unreachable ();
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))])
+
+;;
+;; Insns related to generating the function prologue and epilogue.
+;;
+
+
+(define_expand "prologue"
+  [(use (const_int 0))]
+  ""
+  "s390_emit_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(use (const_int 1))]
+  ""
+  "s390_emit_epilogue (false); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(use (const_int 0))]
+  ""
+  "s390_emit_epilogue (true); DONE;")
+
+;; A direct return instruction, without using an epilogue.
+(define_insn "<code>"
+  [(ANY_RETURN)]
+  "s390_can_use_<code>_insn ()"
+  "br\t%%r14"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "jsr")
+   (set_attr "atype"   "agen")])
+
+(define_insn "*return"
+  [(return)
+   (use (match_operand 0 "register_operand" "a"))]
+  "GET_MODE (operands[0]) == Pmode"
+  "br\t%0"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "jsr")
+   (set_attr "atype"   "agen")])
+
+
+;; Instruction definition to extend a 31-bit pointer into a 64-bit
+;; pointer. This is used for compatibility.
+
+(define_expand "ptr_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (match_operand:SI 1 "register_operand" "r"))]
+  "TARGET_64BIT"
+{
+  emit_insn (gen_anddi3 (operands[0],
+			 gen_lowpart (DImode, operands[1]),
+			 GEN_INT (0x7fffffff)));
+  DONE;
+})
+
+;; Instruction definition to expand eh_return macro to support
+;; swapping in special linkage return addresses.
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  "TARGET_TPF"
+{
+  s390_emit_tpf_eh_return (operands[0]);
+  DONE;
+})
+
+;
+; Stack Protector Patterns
+;
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "memory_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1]
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
+                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#endif
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_set<mode>"
+  [(set (match_operand:DSI 0 "memory_operand" "=Q")
+        (unspec:DSI [(match_operand:DSI 1 "memory_operand" "Q")] UNSPEC_SP_SET))]
+  ""
+  "mvc\t%O0(%G0,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+(define_expand "stack_protect_test"
+  [(set (reg:CC CC_REGNUM)
+	(compare (match_operand 0 "memory_operand" "")
+		 (match_operand 1 "memory_operand" "")))
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx cc_reg, test;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1]
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
+                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#endif
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_testdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_testsi (operands[0], operands[1]));
+
+  cc_reg = gen_rtx_REG (CCZmode, CC_REGNUM);
+  test = gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx);
+  emit_jump_insn (gen_cbranchcc4 (test, cc_reg, const0_rtx, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test<mode>"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:DSI 0 "memory_operand" "Q")
+		     (match_operand:DSI 1 "memory_operand" "Q")] UNSPEC_SP_TEST))]
+  ""
+  "clc\t%O0(%G0,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+; This is used in s390_emit_prologue in order to prevent insns
+; adjusting the stack pointer to be moved over insns writing stack
+; slots using a copy of the stack pointer in a different register.
+(define_insn "stack_tie"
+  [(set (match_operand:BLK 0 "memory_operand" "+m")
+        (unspec:BLK [(match_dup 0)] UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+
+;
+; Data prefetch patterns
+;
+
+(define_insn "prefetch"
+  [(prefetch (match_operand 0    "address_operand"   "ZQZRZSZT,X")
+	     (match_operand:SI 1 "const_int_operand" "       n,n")
+	     (match_operand:SI 2 "const_int_operand" "       n,n"))]
+  "TARGET_Z10"
+{
+  switch (which_alternative)
+    {
+      case 0:
+        return INTVAL (operands[1]) == 1 ? "pfd\t2,%a0" : "pfd\t1,%a0";
+      case 1:
+        if (larl_operand (operands[0], Pmode))
+	  return INTVAL (operands[1]) == 1 ? "pfdrl\t2,%a0" : "pfdrl\t1,%a0";
+      default:
+
+        /* This might be reached for symbolic operands with an odd
+           addend.  We simply omit the prefetch for such rare cases.  */
+
+        return "";
+     }
+}
+  [(set_attr "type" "load,larl")
+   (set_attr "op_type" "RXY,RIL")
+   (set_attr "z10prop" "z10_super")
+   (set_attr "z196prop" "z196_alone")])
+
+
+;
+; Byte swap instructions
+;
+
+(define_insn "bswap<mode>2"
+  [(set (match_operand:GPR 0            "register_operand"     "=d, d")
+	(bswap:GPR (match_operand:GPR 1 "nonimmediate_operand" " d,RT")))]
+  "TARGET_CPU_ZARCH"
+  "@
+   lrv<g>r\t%0,%1
+   lrv<g>\t%0,%1"
+  [(set_attr "type" "*,load")
+   (set_attr "op_type" "RRE,RXY")
+   (set_attr "z10prop" "z10_super")])
+
+
+;
+; Population count instruction
+;
+
+; The S/390 popcount instruction counts the bits of op1 in 8 byte
+; portions and stores the result in the corresponding bytes in op0.
+(define_insn "*popcount<mode>"
+  [(set (match_operand:INT 0 "register_operand" "=d")
+	(unspec:INT [(match_operand:INT 1 "register_operand" "d")] UNSPEC_POPCNT))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z196"
+  "popcnt\t%0,%1"
+  [(set_attr "op_type" "RRE")])
+
+(define_expand "popcountdi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (unspec:DI [(match_operand:DI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllg op2, op0, 32
+   (set (match_dup 2) (ashift:DI (match_dup 0) (const_int 32)))
+   ; agr op0, op2
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllg op2, op0, 16
+   (set (match_dup 2)
+	(ashift:DI (match_dup 0) (const_int 16)))
+   ; agr op0, op2
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllg op2, op0, 8
+   (set (match_dup 2) (ashift:DI (match_dup 0) (const_int 8)))
+   ; agr op0, op2
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; srlg op0, op0, 56
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
+  "TARGET_Z196 && TARGET_64BIT"
+  "operands[2] = gen_reg_rtx (DImode);")
+
+(define_expand "popcountsi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI [(match_operand:SI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllk op2, op0, 16
+   (set (match_dup 2)
+	(ashift:SI (match_dup 0) (const_int 16)))
+   ; ar op0, op2
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllk op2, op0, 8
+   (set (match_dup 2) (ashift:SI (match_dup 0) (const_int 8)))
+   ; ar op0, op2
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; srl op0, op0, 24
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))]
+  "TARGET_Z196"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_expand "popcounthi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (unspec:HI [(match_operand:HI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllk op2, op0, 8
+   (set (match_dup 2)
+	(ashift:SI (match_dup 0) (const_int 8)))
+   ; ar op0, op2
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; srl op0, op0, 8
+   (set (match_dup 0) (lshiftrt:HI (match_dup 0) (const_int 8)))]
+  "TARGET_Z196"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_expand "popcountqi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (unspec:QI [(match_operand:QI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196"
+  "")
+
+;;
+;;- Copy sign instructions
+;;
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:FP 0 "register_operand" "=f")
+      (unspec:FP [(match_operand:FP 1 "register_operand" "<fT0>")
+                  (match_operand:FP 2 "register_operand" "f")]
+                  UNSPEC_COPYSIGN))]
+  "TARGET_Z196"
+  "cpsdr\t%0,%2,%1"
+  [(set_attr "op_type"  "RRF")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Transactional execution instructions
+;;
+
+; This splitter helps combine to make use of CC directly when
+; comparing the integer result of a tbegin builtin with a constant.
+; The unspec is already removed by canonicalize_comparison. So this
+; splitters only job is to turn the PARALLEL into separate insns
+; again.  Unfortunately this only works with the very first cc/int
+; compare since combine is not able to deal with data flow across
+; basic block boundaries.
+
+; It needs to be an insn pattern as well since combine does not apply
+; the splitter directly.  Combine would only use it if it actually
+; would reduce the number of instructions.
+(define_insn_and_split "*ccraw_to_int"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "s390_eqne_operator"
+			 [(reg:CCRAW CC_REGNUM)
+			  (match_operand 1 "const_int_operand" "")])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 3)
+	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCRAW CC_REGNUM) (match_dup 1)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+; Non-constrained transaction begin
+
+(define_expand "tbegin"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:BLK 1 "memory_operand" "")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], NULL_RTX, true);
+  DONE;
+})
+
+(define_expand "tbegin_nofloat"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:BLK 1 "memory_operand" "")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], NULL_RTX, false);
+  DONE;
+})
+
+(define_expand "tbegin_retry"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:BLK 1 "memory_operand" "")
+   (match_operand:SI 2 "general_operand" "")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], operands[2], true);
+  DONE;
+})
+
+(define_expand "tbegin_retry_nofloat"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:BLK 1 "memory_operand" "")
+   (match_operand:SI 2 "general_operand" "")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_insn "tbegin_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
+			       UNSPECV_TBEGIN))
+   (set (match_operand:BLK 1 "memory_operand" "=Q")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))
+   (clobber (reg:DF 16))
+   (clobber (reg:DF 17))
+   (clobber (reg:DF 18))
+   (clobber (reg:DF 19))
+   (clobber (reg:DF 20))
+   (clobber (reg:DF 21))
+   (clobber (reg:DF 22))
+   (clobber (reg:DF 23))
+   (clobber (reg:DF 24))
+   (clobber (reg:DF 25))
+   (clobber (reg:DF 26))
+   (clobber (reg:DF 27))
+   (clobber (reg:DF 28))
+   (clobber (reg:DF 29))
+   (clobber (reg:DF 30))
+   (clobber (reg:DF 31))]
+; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is
+; not supposed to be used for immediates (see genpreds.c).
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbegin\t%1,%x0"
+  [(set_attr "op_type" "SIL")])
+
+; Same as above but without the FPR clobbers
+(define_insn "tbegin_nofloat_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
+			       UNSPECV_TBEGIN))
+   (set (match_operand:BLK 1 "memory_operand" "=Q")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))]
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbegin\t%1,%x0"
+  [(set_attr "op_type" "SIL")])
+
+
+; Constrained transaction begin
+
+(define_expand "tbeginc"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(const_int TBEGINC_MASK)]
+			       UNSPECV_TBEGINC))]
+  "TARGET_HTM"
+  "")
+
+(define_insn "*tbeginc_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" " D")]
+			       UNSPECV_TBEGINC))]
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbeginc\t0,%x0"
+  [(set_attr "op_type" "SIL")])
+
+; Transaction end
+
+(define_expand "tend"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))
+   (set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HTM"
+  "")
+
+(define_insn "*tend_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))]
+  "TARGET_HTM"
+  "tend"
+  [(set_attr "op_type" "S")])
+
+; Transaction abort
+
+(define_expand "tabort"
+  [(unspec_volatile [(match_operand:SI 0 "shift_count_or_setmem_operand" "")]
+		    UNSPECV_TABORT)]
+  "TARGET_HTM && operands != NULL"
+{
+  if (CONST_INT_P (operands[0])
+      && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 255)
+    {
+      error ("Invalid transaction abort code: " HOST_WIDE_INT_PRINT_DEC
+	     ".  Values in range 0 through 255 are reserved.",
+	     INTVAL (operands[0]));
+      FAIL;
+    }
+})
+
+(define_insn "*tabort_1"
+  [(unspec_volatile [(match_operand:SI 0 "shift_count_or_setmem_operand" "Y")]
+		    UNSPECV_TABORT)]
+  "TARGET_HTM && operands != NULL"
+  "tabort\t%Y0"
+  [(set_attr "op_type" "S")])
+
+; Transaction extract nesting depth
+
+(define_insn "etnd"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_ETND))]
+  "TARGET_HTM"
+  "etnd\t%0"
+  [(set_attr "op_type" "RRE")])
+
+; Non-transactional store
+
+(define_insn "ntstg"
+  [(set (match_operand:DI 0 "memory_operand" "=RT")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "d")]
+			    UNSPECV_NTSTG))]
+  "TARGET_HTM"
+  "ntstg\t%1,%0"
+  [(set_attr "op_type" "RXY")])
+
+; Transaction perform processor assist
+
+(define_expand "tx_assist"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "")
+		     (reg:SI GPR0_REGNUM)
+		     (const_int 1)]
+		    UNSPECV_PPA)]
+  "TARGET_HTM"
+  "")
+
+(define_insn "*ppa"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")
+		     (match_operand:SI 1 "register_operand" "d")
+		     (match_operand 2 "const_int_operand" "I")]
+		    UNSPECV_PPA)]
+  "TARGET_HTM && INTVAL (operands[2]) < 16"
+  "ppa\t%0,%1,%2"
+  [(set_attr "op_type" "RRF")])
diff --git a/gcc-4.9/gcc/config/s390/s390.opt b/gcc-4.9/gcc/config/s390/s390.opt
new file mode 100644
index 000000000..7780f877c
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390.opt
@@ -0,0 +1,167 @@
+; Options for the S/390 / zSeries port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/s390/s390-opts.h
+
+; The architecture name to use in diagnostics.
+Variable
+const char *s390_arch_string
+
+Variable
+int s390_tune_flags
+
+Variable
+int s390_arch_flags
+
+Variable
+HOST_WIDE_INT s390_warn_framesize = 0
+
+m31
+Target Report RejectNegative Negative(m64) InverseMask(64BIT)
+31 bit ABI
+
+m64
+Target Report RejectNegative Negative(m31) Mask(64BIT)
+64 bit ABI
+
+march=
+Target RejectNegative Joined Enum(processor_type) Var(s390_arch) Init(PROCESSOR_max)
+Generate code for given CPU
+
+Enum
+Name(processor_type) Type(enum processor_type)
+
+EnumValue
+Enum(processor_type) String(g5) Value(PROCESSOR_9672_G5)
+
+EnumValue
+Enum(processor_type) String(g6) Value(PROCESSOR_9672_G6)
+
+EnumValue
+Enum(processor_type) String(z900) Value(PROCESSOR_2064_Z900)
+
+EnumValue
+Enum(processor_type) String(z990) Value(PROCESSOR_2084_Z990)
+
+EnumValue
+Enum(processor_type) String(z9-109) Value(PROCESSOR_2094_Z9_109)
+
+EnumValue
+Enum(processor_type) String(z9-ec) Value(PROCESSOR_2094_Z9_EC)
+
+EnumValue
+Enum(processor_type) String(z10) Value(PROCESSOR_2097_Z10)
+
+EnumValue
+Enum(processor_type) String(z196) Value(PROCESSOR_2817_Z196)
+
+EnumValue
+Enum(processor_type) String(zEC12) Value(PROCESSOR_2827_ZEC12)
+
+mbackchain
+Target Report Mask(BACKCHAIN)
+Maintain backchain pointer
+
+mdebug
+Target Report Mask(DEBUG_ARG)
+Additional debug prints
+
+mesa
+Target Report RejectNegative Negative(mzarch) InverseMask(ZARCH)
+ESA/390 architecture
+
+mhard-dfp
+Target Report Mask(HARD_DFP)
+Enable decimal floating point hardware support
+
+mhard-float
+Target Report RejectNegative Negative(msoft-float) InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Enable hardware floating point
+
+mhotpatch
+Target Report Var(s390_deferred_options) Defer
+Prepend the function label with 12 two-byte Nop instructions, and add a four byte Nop instruction after the label for hotpatching.
+
+mhotpatch=
+Target RejectNegative Report Joined Var(s390_deferred_options) Defer
+Prepend the function label with the given number of two-byte Nop instructions, and add a four byte Nop instruction after the label for hotpatching.
+
+mlong-double-128
+Target Report RejectNegative Negative(mlong-double-64) Mask(LONG_DOUBLE_128)
+Use 128-bit long double
+
+mlong-double-64
+Target Report RejectNegative Negative(mlong-double-128) InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double
+
+mhtm
+Target Report Mask(OPT_HTM)
+Use hardware transactional execution instructions
+
+mpacked-stack
+Target Report Mask(PACKED_STACK)
+Use packed stack layout
+
+msmall-exec
+Target Report Mask(SMALL_EXEC)
+Use bras for executable < 64k
+
+msoft-float
+Target Report RejectNegative Negative(mhard-float) Mask(SOFT_FLOAT)
+Disable hardware floating point
+
+mstack-guard=
+Target RejectNegative Joined UInteger Var(s390_stack_guard)
+Set the max. number of bytes which has to be left to stack size before a trap instruction is triggered
+
+mstack-size=
+Target RejectNegative Joined UInteger Var(s390_stack_size)
+Emit extra code in the function prologue in order to trap if the stack size exceeds the given limit
+
+mtune=
+Target RejectNegative Joined Enum(processor_type) Var(s390_tune) Init(PROCESSOR_max)
+Schedule code for given CPU
+
+mmvcle
+Target Report Mask(MVCLE)
+mvcle use
+
+mwarn-dynamicstack
+Target RejectNegative Var(s390_warn_dynamicstack_p)
+Warn if a function uses alloca or creates an array with dynamic size
+
+mwarn-framesize=
+Target RejectNegative Joined
+Warn if a single function's framesize exceeds the given framesize
+
+mzarch
+Target Report RejectNegative Negative(mesa) Mask(ZARCH)
+z/Architecture
+
+mbranch-cost=
+Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1)
+Set the branch costs for conditional branch instructions.  Reasonable
+values are small, non-negative integers.  The default branch cost is
+1.
+
+mlra
+Target Report Var(s390_lra_flag) Init(1) Save
+Use LRA instead of reload
diff --git a/gcc-4.9/gcc/config/s390/s390intrin.h b/gcc-4.9/gcc/config/s390/s390intrin.h
new file mode 100644
index 000000000..f91ea6c4f
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390intrin.h
@@ -0,0 +1,33 @@
+/* S/390 System z specific intrinsics
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef  _S390INTRIN_H
+#define _S390INTRIN_H
+
+#ifndef __s390__
+  #error s390intrin.h included on wrong platform/compiler
+#endif
+
+#ifdef __HTM__
+#include <htmintrin.h>
+#endif
+
+
+#endif /* _S390INTRIN_H*/
diff --git a/gcc-4.9/gcc/config/s390/s390x.h b/gcc-4.9/gcc/config/s390/s390x.h
new file mode 100644
index 000000000..f7640f220
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/s390x.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for IBM zSeries 64-bit
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _S390X_H
+#define _S390X_H
+
+#define DEFAULT_TARGET_64BIT
+
+#endif
diff --git a/gcc-4.9/gcc/config/s390/t-linux64 b/gcc-4.9/gcc/config/s390/t-linux64
new file mode 100644
index 000000000..cc6ab3670
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/t-linux64
@@ -0,0 +1,11 @@
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS = m64/m31
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:s390x-linux-gnu)
+MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:s390-linux-gnu)
diff --git a/gcc-4.9/gcc/config/s390/tpf.h b/gcc-4.9/gcc/config/s390/tpf.h
new file mode 100644
index 000000000..e71037479
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/tpf.h
@@ -0,0 +1,118 @@
+/* Definitions for target OS TPF for GNU compiler, for IBM S/390 hardware
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Contributed by P.J. Darcy (darcypj@us.ibm.com),
+                  Hartmut Penner (hpenner@de.ibm.com), and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _TPF_H
+#define _TPF_H
+
+/* TPF wants the following macros defined/undefined as follows.  */
+#undef TARGET_TPF
+#define TARGET_TPF 1
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+#define NO_IMPLICIT_EXTERN_C
+#define TARGET_POSIX_IO
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE ("long unsigned int")
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE ("long int")
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* TPF OS specific stack-pointer offset.  */
+#undef STACK_POINTER_OFFSET
+#define STACK_POINTER_OFFSET 		448
+
+/* When building for TPF, set a generic default target that is 64 bits. Also
+   enable TPF profiling support and the standard backchain by default.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_TPF_PROFILING | MASK_64BIT | MASK_ZARCH \
+			| MASK_HARD_DFP | MASK_BACKCHAIN)
+
+/* Exception handling.  */
+
+/* Select a format to encode pointers in exception handling data.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) DW_EH_PE_absptr
+
+/* TPF OS specific compiler settings.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      builtin_define_std ("tpf");               \
+      builtin_assert ("system=tpf");            \
+      builtin_define ("__ELF__");               \
+    }                                           \
+  while (0)
+
+
+#define EXTRA_SPECS                             \
+  { "entry_spec", ENTRY_SPEC }
+
+/* Make TPF specific spec file settings here.  */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{mmain:crt0%O%s} crtbeginS%O%s crt3%O%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtendS%O%s"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{!fverbose-asm: -fverbose-asm}"
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m31&m64}%{mesa&mzarch}%{march=*} \
+                  -alshd=%b.lst"
+
+#define ENTRY_SPEC "%{mmain:-entry=_start} \
+                    %{!mmain:-entry=0}"
+
+/* All linking is done shared on TPF-OS.  */
+/* FIXME: When binutils patch for new emulation is committed
+   then change emulation to elf64_s390_tpf.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-m elf64_s390 \
+   %{static:%estatic is not supported on TPF-OS} \
+   %{shared: -shared} \
+   %{!shared:-shared} \
+   %(entry_spec)"
+
+/* IBM copies these libraries over with these names.  */
+#define MATH_LIBRARY "CLBM"
+#define LIBSTDCXX "CPP2"
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
+#endif /* ! _TPF_H */
diff --git a/gcc-4.9/gcc/config/s390/tpf.md b/gcc-4.9/gcc/config/s390/tpf.md
new file mode 100644
index 000000000..0c27247fc
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/tpf.md
@@ -0,0 +1,33 @@
+;; S390 TPF-OS specific machine patterns
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "prologue_tpf"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TPF_PROLOGUE)
+   (clobber (reg:DI 1))]
+  "TARGET_TPF_PROFILING"
+  "larl\t%%r1,.+14\;tm\t4065,255\;bnz\t4064"
+  [(set_attr "length"   "14")])
+
+
+(define_insn "epilogue_tpf"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TPF_EPILOGUE)
+   (clobber (reg:DI 1))]
+  "TARGET_TPF_PROFILING"
+  "larl\t%%r1,.+14\;tm\t4071,255\;bnz\t4070"
+  [(set_attr "length"   "14")])
diff --git a/gcc-4.9/gcc/config/s390/tpf.opt b/gcc-4.9/gcc/config/s390/tpf.opt
new file mode 100644
index 000000000..1dbf8ee35
--- /dev/null
+++ b/gcc-4.9/gcc/config/s390/tpf.opt
@@ -0,0 +1,27 @@
+; Options for the TPF-OS port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mtpf-trace
+Target Report Mask(TPF_PROFILING)
+Enable TPF-OS tracing code
+
+mmain
+Target Report
+Specify main object for TPF-OS
diff --git a/gcc-4.9/gcc/config/score/constraints.md b/gcc-4.9/gcc/config/score/constraints.md
new file mode 100644
index 000000000..50b0ebfda
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/constraints.md
@@ -0,0 +1,93 @@
+;; Constraint definitions for S+CORE
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;; Contributed by Sunnorth.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; -------------------------------------------------------------------------
+;; Constraints
+;; -------------------------------------------------------------------------
+
+;; Register constraints.
+(define_register_constraint "d" "G32_REGS"
+  "r0 to r31")
+
+(define_register_constraint "e" "G16_REGS"
+  "r0 to r15")
+
+(define_register_constraint "t" "T32_REGS"
+  "r8 to r11 | r22 to r27")
+
+(define_register_constraint "h" "HI_REG"
+  "hi")
+
+(define_register_constraint "l" "LO_REG"
+  "lo")
+
+(define_register_constraint "x" "CE_REGS"
+  "hi + lo")
+
+(define_register_constraint "q" "CN_REG"
+  "cnt")
+
+(define_register_constraint "y" "LC_REG"
+  "lcb")
+
+(define_register_constraint "z" "SC_REG"
+  "scb")
+
+(define_register_constraint "a" "SP_REGS"
+  "cnt + lcb + scb")
+
+(define_register_constraint "c" "CR_REGS"
+  "cr0 to cr15")
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "High 16-bit constant (32-bit constant with 16 LSBs zero)."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0")))
+
+(define_constraint "J"
+  "Unsigned 5 bit integer (in the range 0 to 31)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "K"
+  "Unsigned 16 bit integer (in the range 0 to 65535)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "L"
+  "Signed 16 bit integer (in the range −32768 to 32767)."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "M"
+  "Unsigned 14 bit integer (in the range 0 to 16383)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 16383")))
+
+(define_constraint "N"
+  "Signed 14 bit integer (in the range −8192 to 8191)."
+  (and (match_code "const_int")
+       (match_test "ival >= -8192 && ival <= 8191")))
+
+(define_constraint "Z"
+  "Any SYMBOL_REF."
+  (and (match_code "symbol_ref")
+       (match_test "GET_CODE (op) == SYMBOL_REF")))
diff --git a/gcc-4.9/gcc/config/score/elf.h b/gcc-4.9/gcc/config/score/elf.h
new file mode 100644
index 000000000..a3fb8a930
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/elf.h
@@ -0,0 +1,97 @@
+/* elf.h for Sunplus S+CORE processor
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define OBJECT_FORMAT_ELF
+
+/* Biggest alignment supported by the object file format of this machine.  */
+#undef  MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT        (32768 * 8)
+
+/* Switch into a generic section.  */
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  */
+#define TYPE_OPERAND_FMT        "@%s"
+
+#undef TYPE_ASM_OP
+#define TYPE_ASM_OP             "\t.type\t"
+
+#undef SIZE_ASM_OP
+#define SIZE_ASM_OP             "\t.size\t"
+
+/* A c expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  */
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP      "\t.section\t.bss"
+#endif
+
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS asm_output_aligned_bss
+#endif
+
+#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2)                       \
+  do {                                                             \
+    fputc ('\t', FILE);                                            \
+    assemble_name (FILE, LABEL1);                                  \
+    fputs (" = ", FILE);                                           \
+    assemble_name (FILE, LABEL2);                                  \
+    fputc ('\n', FILE);                                            \
+ } while (0)
+
+
+/* This is how we tell the assembler that a symbol is weak.  */
+#undef  ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE, NAME) ASM_OUTPUT_WEAK_ALIAS (FILE, NAME, 0)
+
+#define ASM_OUTPUT_WEAK_ALIAS(FILE, NAME, VALUE)      \
+  do {                                                \
+    fputs ("\t.weak\t", FILE);                        \
+    assemble_name (FILE, NAME);                       \
+    if (VALUE)                                        \
+      {                                               \
+        fputc (' ', FILE);                            \
+        assemble_name (FILE, VALUE);                  \
+      }                                               \
+    fputc ('\n', FILE);                               \
+ } while (0)
+
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* On elf, we *do* have support for the .init and .fini sections, and we
+   can put stuff in there to be executed before and after `main'.  We let
+   crtstuff.c and other files know this by defining the following symbols.
+   The definitions say how to change sections to the .init and .fini
+   sections.  This is the same for all known elf assemblers.  */
+#undef  INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP     "\t.section\t.init"
+#undef  FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP     "\t.section\t.fini"
+
+/* Don't set the target flags, this is done by the linker script */
+#undef  LIB_SPEC
+#define LIB_SPEC ""
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC          "crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC            "crtend%O%s crtn%O%s"
diff --git a/gcc-4.9/gcc/config/score/predicates.md b/gcc-4.9/gcc/config/score/predicates.md
new file mode 100644
index 000000000..543be7260
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/predicates.md
@@ -0,0 +1,152 @@
+;; Predicate definitions for Sunplus S+CORE.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "const_uimm5"
+  (match_code "const_int")
+{
+  return IMM_IN_RANGE (INTVAL (op), 5, 0);
+})
+
+(define_predicate "const_simm12"
+  (match_code "const_int")
+{
+  return IMM_IN_RANGE (INTVAL (op), 12, 1);
+})
+
+(define_predicate "const_simm15"
+  (match_code "const_int")
+{
+  return IMM_IN_RANGE (INTVAL (op), 15, 1);
+})
+
+(define_predicate "arith_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "score_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (GET_CODE (op) == REG)
+          && (REGNO (op) != CC_REGNUM);
+})
+
+(define_predicate "const_call_insn_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum score_symbol_type symbol_type;
+
+  return (score_symbolic_constant_p (op, &symbol_type)
+          && (symbol_type == SYMBOL_GENERAL));
+})
+
+(define_predicate "call_insn_operand"
+  (ior (match_operand 0 "const_call_insn_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "hireg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == HI_REGNUM")))
+
+(define_predicate "loreg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == LO_REGNUM")))
+
+(define_predicate "sr0_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CN_REGNUM")))
+
+(define_predicate "g32reg_operand"
+  (and (match_code "reg")
+       (match_test "GP_REG_P (REGNO (op))")))
+
+(define_predicate "branch_n_operator"
+  (match_code "lt,ge"))
+
+(define_predicate "branch_nz_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "score_load_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int dest_regno;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || GET_CODE (SET_DEST (elt)) != REG
+          || GET_MODE (SET_DEST (elt)) != SImode
+          || REGNO (SET_DEST (elt)) != (unsigned) (dest_regno + i)
+          || GET_CODE (SET_SRC (elt)) != MEM
+          || GET_MODE (SET_SRC (elt)) != SImode
+          || GET_CODE (XEXP (SET_SRC (elt), 0)) != POST_INC)
+        return 0;
+    }
+
+  return 1;
+})
+
+(define_predicate "score_store_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int src_regno;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || GET_CODE (SET_SRC (elt)) != REG
+          || GET_MODE (SET_SRC (elt)) != SImode
+          || REGNO (SET_SRC (elt)) != (unsigned) (src_regno + i)
+          || GET_CODE (SET_DEST (elt)) != MEM
+          || GET_MODE (SET_DEST (elt)) != SImode
+          || GET_CODE (XEXP (SET_DEST (elt), 0)) != PRE_DEC)
+        return 0;
+    }
+
+  return 1;
+})
+
diff --git a/gcc-4.9/gcc/config/score/score-conv.h b/gcc-4.9/gcc/config/score/score-conv.h
new file mode 100644
index 000000000..c362d9f2d
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score-conv.h
@@ -0,0 +1,78 @@
+/* score-conv.h for Sunplus S+CORE processor
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SCORE_CONV_H
+#define GCC_SCORE_CONV_H
+
+#define GP_REG_FIRST                    0U
+#define GP_REG_LAST                     31U
+#define GP_REG_NUM                      (GP_REG_LAST - GP_REG_FIRST + 1U)
+#define GP_DBX_FIRST                    0U
+
+#define CE_REG_FIRST                    48U
+#define CE_REG_LAST                     49U
+#define CE_REG_NUM                      (CE_REG_LAST - CE_REG_FIRST + 1U)
+
+#define ARG_REG_FIRST                   4U
+#define ARG_REG_LAST                    7U
+#define ARG_REG_NUM                     (ARG_REG_LAST - ARG_REG_FIRST + 1U)
+
+#define REG_CONTAIN(REGNO, FIRST, NUM) \
+  ((unsigned int)((int) (REGNO) - (FIRST)) < (NUM))
+
+#define GP_REG_P(REGNO)        REG_CONTAIN (REGNO, GP_REG_FIRST, GP_REG_NUM)
+
+#define G8_REG_P(REGNO)        REG_CONTAIN (REGNO, GP_REG_FIRST, 8)
+
+#define G16_REG_P(REGNO)       REG_CONTAIN (REGNO, GP_REG_FIRST, 16)
+
+#define CE_REG_P(REGNO)        REG_CONTAIN (REGNO, CE_REG_FIRST, CE_REG_NUM)
+
+#define GR_REG_CLASS_P(C)        ((C) == G16_REGS || (C) == G32_REGS)
+#define SP_REG_CLASS_P(C) \
+  ((C) == CN_REG || (C) == LC_REG || (C) == SC_REG || (C) == SP_REGS)
+#define CP_REG_CLASS_P(C) \
+  ((C) == CP1_REGS || (C) == CP2_REGS || (C) == CP3_REGS || (C) == CPA_REGS)
+#define CE_REG_CLASS_P(C) \
+  ((C) == HI_REG || (C) == LO_REG || (C) == CE_REGS)
+
+#define UIMM_IN_RANGE(V, W) \
+  ((V) >= 0 \
+   && ((unsigned HOST_WIDE_INT) (V) \
+       <= (((unsigned HOST_WIDE_INT) 2 << ((W) - 1)) - 1)))
+
+#define SIMM_IN_RANGE(V, W)                            \
+  ((V) >= ((HOST_WIDE_INT) -1 << ((W) - 1))      \
+   && (V) <= (((HOST_WIDE_INT) 1 << ((W) - 1)) - 1))
+
+#define IMM_IN_RANGE(V, W, S)  \
+  ((S) ? SIMM_IN_RANGE (V, W) : UIMM_IN_RANGE (V, W))
+
+#define IMM_IS_POW_OF_2(V, E1, E2)                 \
+  ((V) >= ((unsigned HOST_WIDE_INT) 1 << (E1))     \
+   && (V) <= ((unsigned HOST_WIDE_INT) 1 << (E2))  \
+   && ((V) & ((V) - 1)) == 0)
+
+enum score_symbol_type
+{
+  SYMBOL_GENERAL,
+  SYMBOL_SMALL_DATA  /* The symbol refers to something in a small data section  */
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/score/score-generic.md b/gcc-4.9/gcc/config/score/score-generic.md
new file mode 100644
index 000000000..4f155f9f4
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score-generic.md
@@ -0,0 +1,44 @@
+;;  Machine description for Sunplus S+CORE
+;;  Sunplus S+CORE Pipeline Description
+;;  Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;  Contributed by Sunnorth.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "score")
+
+(define_cpu_unit "core" "score")
+
+(define_insn_reservation "memory" 3
+                         (eq_attr "type" "load")
+                         "core")
+
+(define_insn_reservation "mul" 3
+                         (eq_attr "type" "mul,div")
+                         "core")
+
+(define_insn_reservation "fce" 1
+                         (eq_attr "type" "fce")
+                         "core")
+
+(define_insn_reservation "tsr" 1
+                         (eq_attr "type" "tsr,fsr")
+                         "core")
+
+(define_insn_reservation "up_c" 1
+                         (eq_attr "up_c" "yes")
+                         "core")
diff --git a/gcc-4.9/gcc/config/score/score-modes.def b/gcc-4.9/gcc/config/score/score-modes.def
new file mode 100644
index 000000000..dc1b38661
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score-modes.def
@@ -0,0 +1,24 @@
+/* score-modes.def for Sunplus S+CORE processor
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* CC_NZmode should be used if the N (sign) and Z (zero) flag is set correctly.
+   CC_Nmode should be used if only the N flag is set correctly.  */
+
+CC_MODE (CC_N);
+CC_MODE (CC_NZ);
diff --git a/gcc-4.9/gcc/config/score/score-protos.h b/gcc-4.9/gcc/config/score/score-protos.h
new file mode 100644
index 000000000..8a83a5329
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score-protos.h
@@ -0,0 +1,83 @@
+/* score-protos.h for Sunplus S+CORE processor
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SCORE_PROTOS_H
+#define GCC_SCORE_PROTOS_H
+
+/* Machine Print.  */
+enum score_mem_unit {SCORE_BYTE = 0, SCORE_HWORD = 1, SCORE_WORD = 2};
+
+#define SCORE_ALIGN_UNIT(V, UNIT)   !(V & ((1 << UNIT) - 1))
+
+extern void score_prologue (void);
+extern void score_epilogue (int sibcall_p);
+extern void score_call (rtx *ops, bool sib);
+extern void score_call_value (rtx *ops, bool sib);
+extern void score_movdi (rtx *ops);
+extern void score_zero_extract_andi (rtx *ops);
+extern const char * score_linsn (rtx *ops, enum score_mem_unit unit, bool sign);
+extern const char * score_sinsn (rtx *ops, enum score_mem_unit unit);
+extern const char * score_limm (rtx *ops);
+extern const char * score_move (rtx *ops);
+extern bool score_unaligned_load (rtx* ops);
+extern bool score_unaligned_store (rtx* ops);
+extern bool score_block_move (rtx* ops);
+extern int score_address_cost (rtx addr, enum machine_mode mode,
+			       addr_space_t as, bool speed);
+extern int score_address_p (enum machine_mode mode, rtx x, int strict);
+extern int score_reg_class (int regno);
+extern int score_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern int score_const_ok_for_letter_p (HOST_WIDE_INT value, char c);
+extern int score_extra_constraint (rtx op, char c);
+extern rtx score_return_addr (int count, rtx frame);
+extern int score_regno_mode_ok_for_base_p (int regno, int strict);
+extern void score_init_cumulative_args (CUMULATIVE_ARGS *cum,
+                                        tree fntype, rtx libname);
+extern void score_declare_object (FILE *stream, const char *name,
+                                  const char *directive, const char *fmt, ...);
+extern int score_output_external (FILE *file, tree decl, const char *name);
+extern enum reg_class score_secondary_reload_class (enum reg_class rclass,
+                                                    enum machine_mode mode,
+                                                    rtx x);
+extern rtx score_function_value (const_tree valtype, const_tree func,
+                                 enum machine_mode mode);
+extern enum reg_class score_preferred_reload_class (rtx x,
+                                                    enum reg_class rclass);
+extern HOST_WIDE_INT score_initial_elimination_offset (int from, int to);
+extern void score_print_operand (FILE *file, rtx op, int letter);
+extern void score_print_operand_address (FILE *file, rtx addr);
+extern int score_symbolic_constant_p (rtx x,
+                                      enum score_symbol_type *symbol_type);
+extern void score_movsicc (rtx *ops);
+extern const char * score_select_add_imm (rtx *ops, bool set_cc);
+extern const char * score_select (rtx *ops, const char *inst_pre, bool commu,
+                                  const char *letter, bool set_cc);
+extern const char * score_output_casesi (rtx *operands);
+extern const char * score_rpush (rtx *ops);
+extern const char * score_rpop (rtx *ops);
+extern bool score_rtx_costs (rtx x, int code, int outer_code, int opno,
+			     int *total, bool speed);
+
+#ifdef RTX_CODE
+extern enum machine_mode score_select_cc_mode (enum rtx_code op, rtx x, rtx y);
+#endif
+
+extern struct extern_list *extern_head;
+
+#endif /* GCC_SCORE_PROTOS_H  */
diff --git a/gcc-4.9/gcc/config/score/score.c b/gcc-4.9/gcc/config/score/score.c
new file mode 100644
index 000000000..e238d6d09
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score.c
@@ -0,0 +1,1939 @@
+/* Output routines for Sunplus S+CORE processor
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Sunnorth.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "output.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "calls.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "gstab.h"
+#include "hashtab.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+#include "opts.h"
+
+#define SCORE_SDATA_MAX                score_sdata_max
+#define SCORE_STACK_ALIGN(LOC)         (((LOC) + 3) & ~3)
+#define SCORE_PROLOGUE_TEMP_REGNUM     (GP_REG_FIRST + 8)
+#define SCORE_EPILOGUE_TEMP_REGNUM     (GP_REG_FIRST + 8)
+#define SCORE_DEFAULT_SDATA_MAX        8
+
+#define BITSET_P(VALUE, BIT)           (((VALUE) & (1L << (BIT))) != 0)
+#define INS_BUF_SZ                     128
+
+enum score_address_type
+{
+  SCORE_ADD_REG,
+  SCORE_ADD_CONST_INT,
+  SCORE_ADD_SYMBOLIC
+};
+
+struct score_frame_info
+{
+  HOST_WIDE_INT total_size;       /* bytes that the entire frame takes up  */
+  HOST_WIDE_INT var_size;         /* bytes that variables take up  */
+  HOST_WIDE_INT args_size;        /* bytes that outgoing arguments take up  */
+  HOST_WIDE_INT gp_reg_size;      /* bytes needed to store gp regs  */
+  HOST_WIDE_INT gp_sp_offset;     /* offset from new sp to store gp registers  */
+  HOST_WIDE_INT cprestore_size;   /* # bytes that the .cprestore slot takes up  */
+  unsigned int  mask;             /* mask of saved gp registers  */
+  int num_gp;                     /* number of gp registers saved  */
+};
+
+struct score_arg_info
+{
+  unsigned int num_bytes;     /* The argument's size in bytes  */
+  unsigned int reg_words;     /* The number of words passed in registers  */
+  unsigned int reg_offset;    /* The offset of the first register from  */
+                              /* GP_ARG_FIRST or FP_ARG_FIRST etc  */
+  unsigned int stack_words;   /* The number of words that must be passed  */
+                              /* on the stack  */
+  unsigned int stack_offset;  /* The offset from the start of the stack  */
+                              /* overflow area  */
+};
+
+#ifdef RTX_CODE
+struct score_address_info
+{
+  enum score_address_type type;
+  rtx reg;
+  rtx offset;
+  enum rtx_code code;
+  enum score_symbol_type symbol_type;
+};
+#endif
+
+static int score_sdata_max;
+static char score_ins[INS_BUF_SZ + 8];
+
+struct extern_list *extern_head = 0;
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START           score_asm_file_start
+
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END             score_asm_file_end
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE    score_function_prologue
+
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE    score_function_epilogue
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE          score_option_override
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE         score_issue_rate
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION   score_select_rtx_section
+
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P          score_in_small_data_p
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL  score_function_ok_for_sibcall
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING   hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK      score_output_mi_thunk
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE    default_promote_function_mode_always_promote
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES       hook_bool_const_tree_true
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK       must_pass_in_stack_var_size
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES        score_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG             score_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     score_function_arg_advance
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE        score_pass_by_reference
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY         score_return_in_memory
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS                score_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST             score_address_cost
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	score_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE            score_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE score_conditional_register_usage
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	score_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		score_trampoline_init
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST	score_register_move_cost
+
+/* Return true if SYMBOL is a SYMBOL_REF and OFFSET + SYMBOL points
+   to the same object as SYMBOL.  */
+static int
+score_offset_within_object_p (rtx symbol, HOST_WIDE_INT offset)
+{
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return 0;
+
+  if (CONSTANT_POOL_ADDRESS_P (symbol)
+      && offset >= 0
+      && offset < (int)GET_MODE_SIZE (get_pool_mode (symbol)))
+    return 1;
+
+  if (SYMBOL_REF_DECL (symbol) != 0
+      && offset >= 0
+      && offset < int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (symbol))))
+    return 1;
+
+  return 0;
+}
+
+/* Split X into a base and a constant offset, storing them in *BASE
+   and *OFFSET respectively.  */
+static void
+score_split_const (rtx x, rtx *base, HOST_WIDE_INT *offset)
+{
+  *offset = 0;
+
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      *offset += INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+    }
+
+  *base = x;
+}
+
+/* Classify symbol X, which must be a SYMBOL_REF or a LABEL_REF.  */
+static enum score_symbol_type
+score_classify_symbol (rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    return SYMBOL_GENERAL;
+
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (CONSTANT_POOL_ADDRESS_P (x))
+    {
+      if (GET_MODE_SIZE (get_pool_mode (x)) <= SCORE_SDATA_MAX)
+        return SYMBOL_SMALL_DATA;
+      return SYMBOL_GENERAL;
+    }
+  if (SYMBOL_REF_SMALL_P (x))
+    return SYMBOL_SMALL_DATA;
+  return SYMBOL_GENERAL;
+}
+
+/* Return true if the current function must save REGNO.  */
+static int
+score_save_reg_p (unsigned int regno)
+{
+  /* Check call-saved registers.  */
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return 1;
+
+  /* We need to save the old frame pointer before setting up a new one.  */
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return 1;
+
+  /* We need to save the incoming return address if it is ever clobbered
+     within the function.  */
+  if (regno == RA_REGNUM && df_regs_ever_live_p (regno))
+    return 1;
+
+  return 0;
+}
+
+/* Return one word of double-word value OP, taking into account the fixed
+   endianness of certain registers.  HIGH_P is true to select the high part,
+   false to select the low part.  */
+static rtx
+score_subw (rtx op, int high_p)
+{
+  unsigned int byte;
+  enum machine_mode mode = GET_MODE (op);
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  byte = (TARGET_LITTLE_ENDIAN ? high_p : !high_p) ? UNITS_PER_WORD : 0;
+
+  if (GET_CODE (op) == REG && REGNO (op) == HI_REGNUM)
+    return gen_rtx_REG (SImode, high_p ? HI_REGNUM : LO_REGNUM);
+
+  if (GET_CODE (op) == MEM)
+    return adjust_address (op, SImode, byte);
+
+  return simplify_gen_subreg (SImode, op, mode, byte);
+}
+
+static struct score_frame_info *
+score_cached_frame (void)
+{
+  static struct score_frame_info _frame_info;
+  return &_frame_info;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  SIZE is the size (in bytes) of the local variables.  */
+static struct score_frame_info *
+score_compute_frame_size (HOST_WIDE_INT size)
+{
+  unsigned int regno;
+  struct score_frame_info *f = score_cached_frame ();
+
+  memset (f, 0, sizeof (struct score_frame_info));
+  f->gp_reg_size = 0;
+  f->mask = 0;
+  f->var_size = SCORE_STACK_ALIGN (size);
+  f->args_size = crtl->outgoing_args_size;
+  f->cprestore_size = flag_pic ? UNITS_PER_WORD : 0;
+  if (f->var_size == 0 && crtl->is_leaf)
+    f->args_size = f->cprestore_size = 0;
+
+  if (f->args_size == 0 && cfun->calls_alloca)
+    f->args_size = UNITS_PER_WORD;
+
+  f->total_size = f->var_size + f->args_size + f->cprestore_size;
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (score_save_reg_p (regno))
+        {
+          f->gp_reg_size += GET_MODE_SIZE (SImode);
+          f->mask |= 1 << (regno - GP_REG_FIRST);
+        }
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      for (i = 0;; ++i)
+        {
+          regno = EH_RETURN_DATA_REGNO (i);
+          if (regno == INVALID_REGNUM)
+            break;
+          f->gp_reg_size += GET_MODE_SIZE (SImode);
+          f->mask |= 1 << (regno - GP_REG_FIRST);
+        }
+    }
+
+  f->total_size += f->gp_reg_size;
+  f->num_gp = f->gp_reg_size / UNITS_PER_WORD;
+
+  if (f->mask)
+    {
+      HOST_WIDE_INT offset;
+      offset = (f->args_size + f->cprestore_size + f->var_size
+                + f->gp_reg_size - GET_MODE_SIZE (SImode));
+      f->gp_sp_offset = offset;
+    }
+  else
+    f->gp_sp_offset = 0;
+
+  return f;
+}
+
+/* Return true if X is a valid base register for the given mode.
+   Allow only hard registers if STRICT.  */
+static int
+score_valid_base_register_p (rtx x, int strict)
+{
+  if (!strict && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (GET_CODE (x) == REG
+          && score_regno_mode_ok_for_base_p (REGNO (x), strict));
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT is true if we should only accept
+   hard base registers.  */
+static int
+score_classify_address (struct score_address_info *info,
+                        enum machine_mode mode, rtx x, int strict)
+{
+  info->code = GET_CODE (x);
+
+  switch (info->code)
+    {
+    case REG:
+    case SUBREG:
+      info->type = SCORE_ADD_REG;
+      info->reg = x;
+      info->offset = const0_rtx;
+      return score_valid_base_register_p (info->reg, strict);
+    case PLUS:
+      info->type = SCORE_ADD_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      return (score_valid_base_register_p (info->reg, strict)
+              && GET_CODE (info->offset) == CONST_INT
+              && IMM_IN_RANGE (INTVAL (info->offset), 15, 1));
+    case PRE_DEC:
+    case POST_DEC:
+    case PRE_INC:
+    case POST_INC:
+      if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
+        return false;
+      info->type = SCORE_ADD_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = GEN_INT (GET_MODE_SIZE (mode));
+      return score_valid_base_register_p (info->reg, strict);
+    case CONST_INT:
+      info->type = SCORE_ADD_CONST_INT;
+      return IMM_IN_RANGE (INTVAL (x), 15, 1);
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      info->type = SCORE_ADD_SYMBOLIC;
+      return (score_symbolic_constant_p (x, &info->symbol_type)
+              && (info->symbol_type == SYMBOL_GENERAL
+                  || info->symbol_type == SYMBOL_SMALL_DATA));
+    default:
+      return 0;
+    }
+}
+
+/* Implement TARGET_RETURN_IN_MEMORY.  In S+core,
+   small structures are returned in a register.
+   Objects with varying size must still be returned in memory.  */
+static bool
+score_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+    return ((TYPE_MODE (type) == BLKmode)
+            || (int_size_in_bytes (type) > 2 * UNITS_PER_WORD)
+            || (int_size_in_bytes (type) == -1));
+}
+
+/* Return a legitimate address for REG + OFFSET.  */
+static rtx
+score_add_offset (rtx reg, HOST_WIDE_INT offset)
+{
+  if (!IMM_IN_RANGE (offset, 15, 1))
+    {
+      reg = expand_simple_binop (GET_MODE (reg), PLUS,
+                                 gen_int_mode (offset & 0xffffc000,
+                                               GET_MODE (reg)),
+                                 reg, NULL, 0, OPTAB_WIDEN);
+      offset &= 0x3fff;
+    }
+
+  return plus_constant (GET_MODE (reg), reg, offset);
+}
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+   in order to avoid duplicating too much logic from elsewhere.  */
+static void
+score_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+                       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+                       tree function)
+{
+  rtx this_rtx, temp1, insn, fnaddr;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* We need two temporary registers in some cases.  */
+  temp1 = gen_rtx_REG (Pmode, 8);
+
+  /* Find out which register contains the "this" pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, ARG_REG_FIRST + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, ARG_REG_FIRST);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (delta != 0)
+    {
+      rtx offset = GEN_INT (delta);
+      if (!(delta >= -32768 && delta <= 32767))
+        {
+          emit_move_insn (temp1, offset);
+          offset = temp1;
+        }
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+    }
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset != 0)
+    {
+      rtx addr;
+
+      /* Set TEMP1 to *THIS_RTX.  */
+      emit_move_insn (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+      addr = score_add_offset (temp1, vcall_offset);
+
+      /* Load the offset and add it to THIS_RTX.  */
+      emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+    }
+
+  /* Jump to the target function.  */
+  fnaddr = XEXP (DECL_RTL (function), 0);
+  insn = emit_call_insn (gen_sibcall_internal_score7 (fnaddr, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from alpha.c.  */
+  insn = get_insns ();
+  split_all_insns_noflow ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Clean up the vars set above.  Note that final_end_function resets
+     the global pointer for us.  */
+  reload_completed = 0;
+}
+
+/* Fill INFO with information about a single argument.  CUM is the
+   cumulative state for earlier arguments.  MODE is the mode of this
+   argument and TYPE is its type (if known).  NAMED is true if this
+   is a named (fixed) argument rather than a variable one.  */
+static void
+score_classify_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                    const_tree type, bool named, struct score_arg_info *info)
+{
+  int even_reg_p;
+  unsigned int num_words, max_regs;
+
+  even_reg_p = 0;
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      || GET_MODE_CLASS (mode) == MODE_FLOAT)
+    even_reg_p = (GET_MODE_SIZE (mode) > UNITS_PER_WORD);
+  else
+    if (type != NULL_TREE && TYPE_ALIGN (type) > BITS_PER_WORD && named)
+      even_reg_p = 1;
+
+  if (TARGET_MUST_PASS_IN_STACK (mode, type))
+    info->reg_offset = ARG_REG_NUM;
+  else
+    {
+      info->reg_offset = cum->num_gprs;
+      if (even_reg_p)
+        info->reg_offset += info->reg_offset & 1;
+    }
+
+  if (mode == BLKmode)
+    info->num_bytes = int_size_in_bytes (type);
+  else
+    info->num_bytes = GET_MODE_SIZE (mode);
+
+  num_words = (info->num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  max_regs = ARG_REG_NUM - info->reg_offset;
+
+  /* Partition the argument between registers and stack.  */
+  info->reg_words = MIN (num_words, max_regs);
+  info->stack_words = num_words - info->reg_words;
+
+  /* The alignment applied to registers is also applied to stack arguments.  */
+  if (info->stack_words)
+    {
+      info->stack_offset = cum->stack_words;
+      if (even_reg_p)
+        info->stack_offset += info->stack_offset & 1;
+    }
+}
+
+/* Set up the stack and frame (if desired) for the function.  */
+static void
+score_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+  struct score_frame_info *f = score_cached_frame ();
+  HOST_WIDE_INT tsize = f->total_size;
+
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent\t", file);
+      assemble_name (file, fnname);
+      fputs ("\n", file);
+    }
+  assemble_name (file, fnname);
+  fputs (":\n", file);
+
+  if (!flag_inhibit_size_directive)
+    {
+      fprintf (file,
+               "\t.frame\t%s," HOST_WIDE_INT_PRINT_DEC ",%s, %d\t\t"
+               "# vars= " HOST_WIDE_INT_PRINT_DEC ", regs= %d"
+               ", args= " HOST_WIDE_INT_PRINT_DEC
+               ", gp= " HOST_WIDE_INT_PRINT_DEC "\n",
+               (reg_names[(frame_pointer_needed)
+                ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM]),
+               tsize,
+               reg_names[RA_REGNUM],
+               crtl->is_leaf ? 1 : 0,
+               f->var_size,
+               f->num_gp,
+               f->args_size,
+               f->cprestore_size);
+
+      fprintf(file, "\t.mask\t0x%08x," HOST_WIDE_INT_PRINT_DEC "\n",
+              f->mask,
+              (f->gp_sp_offset - f->total_size));
+    }
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+static void
+score_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (!flag_inhibit_size_directive)
+    {
+      const char *fnname;
+      fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+      fputs ("\t.end\t", file);
+      assemble_name (file, fnname);
+      fputs ("\n", file);
+    }
+}
+
+/* Returns true if X contains a SYMBOL_REF.  */
+static bool
+score_symbolic_expression_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    return true;
+
+  if (GET_CODE (x) == CONST)
+    return score_symbolic_expression_p (XEXP (x, 0));
+
+  if (UNARY_P (x))
+    return score_symbolic_expression_p (XEXP (x, 0));
+
+  if (ARITHMETIC_P (x))
+    return (score_symbolic_expression_p (XEXP (x, 0))
+            || score_symbolic_expression_p (XEXP (x, 1)));
+
+  return false;
+}
+
+/* Choose the section to use for the constant rtx expression X that has
+   mode MODE.  */
+static section *
+score_select_rtx_section (enum machine_mode mode, rtx x, unsigned HOST_WIDE_INT align)
+{
+  if (GET_MODE_SIZE (mode) <= SCORE_SDATA_MAX)
+    return get_named_section (0, ".sdata", 0);
+  else if (flag_pic && score_symbolic_expression_p (x))
+    return get_named_section (0, ".data.rel.ro", 3);
+  else
+    return mergeable_constant_section (mode, align, 0);
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  */
+static bool
+score_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+
+  if (TREE_CODE (decl) == STRING_CST
+      || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+      name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (name, ".sdata") != 0
+          && strcmp (name, ".sbss") != 0)
+        return true;
+      if (!DECL_EXTERNAL (decl))
+        return false;
+    }
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  return (size > 0 && size <= SCORE_SDATA_MAX);
+}
+
+/* Implement TARGET_ASM_FILE_START.  */
+static void
+score_asm_file_start (void)
+{
+  default_file_start ();
+  fprintf (asm_out_file, ASM_COMMENT_START
+           "GCC for S+core %s \n", SCORE_GCC_VERSION);
+
+  if (flag_pic)
+    fprintf (asm_out_file, "\t.set pic\n");
+}
+
+/* Implement TARGET_ASM_FILE_END.  When using assembler macros, emit
+   .externs for any small-data variables that turned out to be external.  */
+static void
+score_asm_file_end (void)
+{
+  tree name_tree;
+  struct extern_list *p;
+  if (extern_head)
+    {
+      fputs ("\n", asm_out_file);
+      for (p = extern_head; p != 0; p = p->next)
+        {
+          name_tree = get_identifier (p->name);
+          if (!TREE_ASM_WRITTEN (name_tree)
+              && TREE_SYMBOL_REFERENCED (name_tree))
+            {
+              TREE_ASM_WRITTEN (name_tree) = 1;
+              fputs ("\t.extern\t", asm_out_file);
+              assemble_name (asm_out_file, p->name);
+              fprintf (asm_out_file, ", %d\n", p->size);
+            }
+        }
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE hook.  */
+static void
+score_option_override (void)
+{
+  flag_pic = false;
+  score_sdata_max = SCORE_DEFAULT_SDATA_MAX;
+
+}
+
+/* Implement REGNO_REG_CLASS macro.  */
+int
+score_reg_class (int regno)
+{
+  int c;
+  gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
+
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return ALL_REGS;
+
+  for (c = 0; c < N_REG_CLASSES; c++)
+    if (TEST_HARD_REG_BIT (reg_class_contents[c], regno))
+      return c;
+
+  return NO_REGS;
+}
+
+/* Implement PREFERRED_RELOAD_CLASS macro.  */
+enum reg_class
+score_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, enum reg_class rclass)
+{
+  if (reg_class_subset_p (G16_REGS, rclass))
+    return G16_REGS;
+  if (reg_class_subset_p (G32_REGS, rclass))
+    return G32_REGS;
+  return rclass;
+}
+
+/* Implement SECONDARY_INPUT_RELOAD_CLASS
+   and SECONDARY_OUTPUT_RELOAD_CLASS macro.  */
+enum reg_class
+score_secondary_reload_class (enum reg_class rclass,
+                              enum machine_mode mode ATTRIBUTE_UNUSED,
+                              rtx x)
+{
+  int regno = -1;
+  if (GET_CODE (x) == REG || GET_CODE(x) == SUBREG)
+    regno = true_regnum (x);
+
+  if (!GR_REG_CLASS_P (rclass))
+    return GP_REG_P (regno) ? NO_REGS : G32_REGS;
+  return NO_REGS;
+}
+
+
+/* Return truth value on whether or not a given hard register
+   can support a given mode.  */
+int
+score_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  int size = GET_MODE_SIZE (mode);
+  enum mode_class mclass = GET_MODE_CLASS (mode);
+
+  if (mclass == MODE_CC)
+    return regno == CC_REGNUM;
+  else if (regno == FRAME_POINTER_REGNUM
+           || regno == ARG_POINTER_REGNUM)
+    return mclass == MODE_INT;
+  else if (GP_REG_P (regno))
+    /* ((regno <= (GP_REG_LAST- HARD_REGNO_NREGS (dummy, mode)) + 1)  */
+    return !(regno & 1) || (size <= UNITS_PER_WORD);
+  else if (CE_REG_P (regno))
+    return (mclass == MODE_INT
+            && ((size <= UNITS_PER_WORD)
+                || (regno == CE_REG_FIRST && size == 2 * UNITS_PER_WORD)));
+  else
+    return (mclass == MODE_INT) && (size <= UNITS_PER_WORD);
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer or argument pointer.  TO is either the stack pointer or
+   hard frame pointer.  */
+HOST_WIDE_INT
+score_initial_elimination_offset (int from,
+                                  int to ATTRIBUTE_UNUSED)
+{
+  struct score_frame_info *f = score_compute_frame_size (get_frame_size ());
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      return f->total_size;
+    case FRAME_POINTER_REGNUM:
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE hook.  */
+static void
+score_function_arg_advance (cumulative_args_t cum_args, enum machine_mode mode,
+                            const_tree type, bool named)
+{
+  struct score_arg_info info;
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_args);
+  score_classify_arg (cum, mode, type, named, &info);
+  cum->num_gprs = info.reg_offset + info.reg_words;
+  if (info.stack_words > 0)
+    cum->stack_words = info.stack_offset + info.stack_words;
+  cum->arg_number++;
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES macro.  */
+int
+score_arg_partial_bytes (cumulative_args_t cum_args,
+                         enum machine_mode mode, tree type, bool named)
+{
+  struct score_arg_info info;
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_args);
+  score_classify_arg (cum, mode, type, named, &info);
+  return info.stack_words > 0 ? info.reg_words * UNITS_PER_WORD : 0;
+}
+
+/* Implement TARGET_FUNCTION_ARG hook.  */
+static rtx
+score_function_arg (cumulative_args_t cum_args, enum machine_mode mode,
+                    const_tree type, bool named)
+{
+  struct score_arg_info info;
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_args);
+
+  if (mode == VOIDmode || !named)
+    return 0;
+
+  score_classify_arg (cum, mode, type, named, &info);
+
+  if (info.reg_offset == ARG_REG_NUM)
+    return 0;
+
+  if (!info.stack_words)
+    return gen_rtx_REG (mode, ARG_REG_FIRST + info.reg_offset);
+  else
+    {
+      rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc (info.reg_words));
+      unsigned int i, part_offset = 0;
+      for (i = 0; i < info.reg_words; i++)
+        {
+          rtx reg;
+          reg = gen_rtx_REG (SImode, ARG_REG_FIRST + info.reg_offset + i);
+          XVECEXP (ret, 0, i) = gen_rtx_EXPR_LIST (SImode, reg,
+                                                   GEN_INT (part_offset));
+          part_offset += UNITS_PER_WORD;
+        }
+      return ret;
+    }
+}
+
+/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
+   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
+   VALTYPE is null and MODE is the mode of the return value.  */
+rtx
+score_function_value (const_tree valtype, const_tree func, enum machine_mode mode)
+{
+  if (valtype)
+    {
+      int unsignedp;
+      mode = TYPE_MODE (valtype);
+      unsignedp = TYPE_UNSIGNED (valtype);
+      mode = promote_function_mode (valtype, mode, &unsignedp, func, 1);
+    }
+  return gen_rtx_REG (mode, RT_REGNUM);
+}
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+score_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.set r1\n");
+  fprintf (f, "\tmv r31, r3\n");
+  fprintf (f, "\tbl nextinsn\n");
+  fprintf (f, "nextinsn:\n");
+  fprintf (f, "\tlw r1, [r3, 6*4-8]\n");
+  fprintf (f, "\tlw r23, [r3, 6*4-4]\n");
+  fprintf (f, "\tmv r3, r31\n");
+  fprintf (f, "\tbr! r1\n");
+  fprintf (f, "\tnop!\n");
+  fprintf (f, "\t.set nor1\n");
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+static void
+score_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+#define CODE_SIZE        (TRAMPOLINE_INSNS * UNITS_PER_WORD)
+
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, CODE_SIZE);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, SImode, CODE_SIZE + GET_MODE_SIZE (SImode));
+  emit_move_insn (mem, chain_value);
+
+#undef CODE_SIZE
+}
+
+/* This function is used to implement REG_MODE_OK_FOR_BASE_P macro.  */
+int
+score_regno_mode_ok_for_base_p (int regno, int strict)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!strict)
+        return 1;
+      regno = reg_renumber[regno];
+    }
+  if (regno == ARG_POINTER_REGNUM
+      || regno == FRAME_POINTER_REGNUM)
+    return 1;
+  return GP_REG_P (regno);
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P macro.  */
+static bool
+score_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  struct score_address_info addr;
+
+  return score_classify_address (&addr, mode, x, strict);
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.
+
+   Return a number assessing the cost of moving a register in class
+   FROM to class TO. */
+static int
+score_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                          reg_class_t from, reg_class_t to)
+{
+  if (GR_REG_CLASS_P (from))
+    {
+      if (GR_REG_CLASS_P (to))
+        return 2;
+      else if (SP_REG_CLASS_P (to))
+        return 4;
+      else if (CP_REG_CLASS_P (to))
+        return 5;
+      else if (CE_REG_CLASS_P (to))
+        return 6;
+    }
+  if (GR_REG_CLASS_P (to))
+    {
+      if (GR_REG_CLASS_P (from))
+        return 2;
+      else if (SP_REG_CLASS_P (from))
+        return 4;
+      else if (CP_REG_CLASS_P (from))
+        return 5;
+      else if (CE_REG_CLASS_P (from))
+        return 6;
+    }
+  return 12;
+}
+
+/* Return the number of instructions needed to load a symbol of the
+   given type into a register.  */
+static int
+score_symbol_insns (enum score_symbol_type type)
+{
+  switch (type)
+    {
+    case SYMBOL_GENERAL:
+      return 2;
+
+    case SYMBOL_SMALL_DATA:
+      return 1;
+    }
+
+  gcc_unreachable ();
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at X.  Return 0 if X isn't valid for MODE.  */
+static int
+score_address_insns (rtx x, enum machine_mode mode)
+{
+  struct score_address_info addr;
+  int factor;
+
+  if (mode == BLKmode)
+    factor = 1;
+  else
+    factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (score_classify_address (&addr, mode, x, false))
+    switch (addr.type)
+      {
+      case SCORE_ADD_REG:
+      case SCORE_ADD_CONST_INT:
+        return factor;
+
+      case SCORE_ADD_SYMBOLIC:
+        return factor * score_symbol_insns (addr.symbol_type);
+      }
+  return 0;
+}
+
+/* Implement TARGET_RTX_COSTS macro.  */
+bool
+score_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		 int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (outer_code == SET)
+        {
+          if (((INTVAL (x) & 0xffff) == 0) 
+              || (INTVAL (x) >= -32768 && INTVAL (x) <= 32767))
+            *total = COSTS_N_INSNS (1);
+          else
+            *total = COSTS_N_INSNS (2);
+        }
+      else if (outer_code == PLUS || outer_code == MINUS)
+        {
+          if (INTVAL (x) >= -8192 && INTVAL (x) <= 8191)
+            *total = 0;
+          else if (((INTVAL (x) & 0xffff) == 0)
+                   || (INTVAL (x) >= -32768 && INTVAL (x) <= 32767))
+            *total = 1;
+          else
+            *total = COSTS_N_INSNS (2);
+        }
+      else if (outer_code == AND || outer_code == IOR)
+        {
+          if (INTVAL (x) >= 0 && INTVAL (x) <= 16383)
+            *total = 0;
+          else if (((INTVAL (x) & 0xffff) == 0)
+                   || (INTVAL (x) >= 0 && INTVAL (x) <= 65535))
+            *total = 1;
+          else
+            *total = COSTS_N_INSNS (2);
+        }
+      else
+        {
+          *total = 0;
+        }
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case MEM:
+      {
+        /* If the address is legitimate, return the number of
+           instructions it needs, otherwise use the default handling.  */
+        int n = score_address_insns (XEXP (x, 0), GET_MODE (x));
+        if (n > 0)
+          {
+            *total = COSTS_N_INSNS (n + 1);
+            return true;
+          }
+        return false;
+      }
+
+    case FFS:
+      *total = COSTS_N_INSNS (6);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS (2);
+          return true;
+        }
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS ((GET_CODE (XEXP (x, 1)) == CONST_INT)
+                                  ? 4 : 12);
+          return true;
+        }
+      return false;
+
+    case ABS:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS (4);
+          return true;
+        }
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case NEG:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS (4);
+          return true;
+        }
+      return false;
+
+    case MULT:
+      *total = optimize_size ? COSTS_N_INSNS (2) : COSTS_N_INSNS (12);
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      *total = optimize_size ? COSTS_N_INSNS (2) : COSTS_N_INSNS (33);
+      return true;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      switch (GET_MODE (XEXP (x, 0)))
+        {
+        case QImode:
+        case HImode:
+          if (GET_CODE (XEXP (x, 0)) == MEM)
+            {
+              *total = COSTS_N_INSNS (2);
+
+              if (!TARGET_LITTLE_ENDIAN &&
+                  side_effects_p (XEXP (XEXP (x, 0), 0)))
+                *total = 100;
+            }
+          else
+            *total = COSTS_N_INSNS (1);
+          break;
+
+        default:
+          *total = COSTS_N_INSNS (1);
+          break;
+        }
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_ADDRESS_COST macro.  */
+int
+score_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+		    addr_space_t as ATTRIBUTE_UNUSED,
+		    bool speed ATTRIBUTE_UNUSED)
+{
+  return score_address_insns (addr, SImode);
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL macro.  */
+int
+score_output_external (FILE *file ATTRIBUTE_UNUSED,
+                       tree decl, const char *name)
+{
+  register struct extern_list *p;
+
+  if (score_in_small_data_p (decl))
+    {
+      p = ggc_alloc_extern_list ();
+      p->next = extern_head;
+      p->name = name;
+      p->size = int_size_in_bytes (TREE_TYPE (decl));
+      extern_head = p;
+    }
+  return 0;
+}
+
+/* Implement RETURN_ADDR_RTX.  Note, we do not support moving
+   back to a previous frame.  */
+rtx
+score_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+  return get_hard_reg_initial_val (Pmode, RA_REGNUM);
+}
+
+/* Implement PRINT_OPERAND macro.  */
+/* Score-specific operand codes:
+   '['        print .set nor1 directive
+   ']'        print .set r1 directive
+   'U'        print hi part of a CONST_INT rtx
+   'E'        print log2(v)
+   'F'        print log2(~v)
+   'D'        print SFmode const double
+   'S'        selectively print "!" if operand is 15bit instruction accessible
+   'V'        print "v!" if operand is 15bit instruction accessible, or "lfh!"
+   'L'        low  part of DImode reg operand
+   'H'        high part of DImode reg operand
+   'C'        print part of opcode for a branch condition.  */
+void
+score_print_operand (FILE *file, rtx op, int c)
+{
+  enum rtx_code code = UNKNOWN;
+  if (!PRINT_OPERAND_PUNCT_VALID_P (c))
+    code = GET_CODE (op);
+
+  if (c == '[')
+    {
+      fprintf (file, ".set r1\n");
+    }
+  else if (c == ']')
+    {
+      fprintf (file, "\n\t.set nor1");
+    }
+  else if (c == 'U')
+    {
+      gcc_assert (code == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+               (INTVAL (op) >> 16) & 0xffff);
+    }
+  else if (c == 'D')
+    {
+      if (GET_CODE (op) == CONST_DOUBLE)
+        {
+          rtx temp = gen_lowpart (SImode, op);
+          gcc_assert (GET_MODE (op) == SFmode);
+          fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (temp) & 0xffffffff);
+        }
+      else
+        output_addr_const (file, op);
+    }
+  else if (c == 'S')
+    {
+      gcc_assert (code == REG);
+      if (G16_REG_P (REGNO (op)))
+        fprintf (file, "!");
+    }
+  else if (c == 'V')
+    {
+      gcc_assert (code == REG);
+      fprintf (file, G16_REG_P (REGNO (op)) ? "v!" : "lfh!");
+    }
+  else if (c == 'C')
+    {
+      enum machine_mode mode = GET_MODE (XEXP (op, 0));
+
+      switch (code)
+        {
+        case EQ: fputs ("eq", file); break;
+        case NE: fputs ("ne", file); break;
+        case GT: fputs ("gt", file); break;
+        case GE: fputs (mode != CCmode ? "pl" : "ge", file); break;
+        case LT: fputs (mode != CCmode ? "mi" : "lt", file); break;
+        case LE: fputs ("le", file); break;
+        case GTU: fputs ("gtu", file); break;
+        case GEU: fputs ("cs", file); break;
+        case LTU: fputs ("cc", file); break;
+        case LEU: fputs ("leu", file); break;
+        default:
+          output_operand_lossage ("invalid operand for code: '%c'", code);
+        }
+    }
+  else if (c == 'E')
+    {
+      unsigned HOST_WIDE_INT i;
+      unsigned HOST_WIDE_INT pow2mask = 1;
+      unsigned HOST_WIDE_INT val;
+
+      val = INTVAL (op);
+      for (i = 0; i < 32; i++)
+        {
+          if (val == pow2mask)
+            break;
+          pow2mask <<= 1;
+        }
+      gcc_assert (i < 32);
+      fprintf (file, HOST_WIDE_INT_PRINT_HEX, i);
+    }
+  else if (c == 'F')
+    {
+      unsigned HOST_WIDE_INT i;
+      unsigned HOST_WIDE_INT pow2mask = 1;
+      unsigned HOST_WIDE_INT val;
+
+      val = ~INTVAL (op);
+      for (i = 0; i < 32; i++)
+        {
+          if (val == pow2mask)
+            break;
+          pow2mask <<= 1;
+        }
+      gcc_assert (i < 32);
+      fprintf (file, HOST_WIDE_INT_PRINT_HEX, i);
+    }
+  else if (code == REG)
+    {
+      int regnum = REGNO (op);
+      if ((c == 'H' && !WORDS_BIG_ENDIAN)
+          || (c == 'L' && WORDS_BIG_ENDIAN))
+        regnum ++;
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+  else
+    {
+      switch (code)
+        {
+        case MEM:
+          score_print_operand_address (file, op);
+          break;
+        default:
+          output_addr_const (file, op);
+        }
+    }
+}
+
+/* Implement PRINT_OPERAND_ADDRESS macro.  */
+void
+score_print_operand_address (FILE *file, rtx x)
+{
+  struct score_address_info addr;
+  enum rtx_code code = GET_CODE (x);
+  enum machine_mode mode = GET_MODE (x);
+
+  if (code == MEM)
+    x = XEXP (x, 0);
+
+  if (score_classify_address (&addr, mode, x, true))
+    {
+      switch (addr.type)
+        {
+        case SCORE_ADD_REG:
+          {
+            switch (addr.code)
+              {
+              case PRE_DEC:
+                fprintf (file, "[%s,-%ld]+", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              case POST_DEC:
+                fprintf (file, "[%s]+,-%ld", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              case PRE_INC:
+                fprintf (file, "[%s, %ld]+", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              case POST_INC:
+                fprintf (file, "[%s]+, %ld", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              default:
+                if (INTVAL(addr.offset) == 0)
+                  fprintf(file, "[%s]", reg_names[REGNO (addr.reg)]);
+                else
+                  fprintf(file, "[%s, %ld]", reg_names[REGNO (addr.reg)],
+                          INTVAL(addr.offset));
+                break;
+              }
+          }
+          return;
+        case SCORE_ADD_CONST_INT:
+        case SCORE_ADD_SYMBOLIC:
+          output_addr_const (file, x);
+          return;
+        }
+    }
+  print_rtl (stderr, x);
+  gcc_unreachable ();
+}
+
+/* Implement SELECT_CC_MODE macro.  */
+enum machine_mode
+score_select_cc_mode (enum rtx_code op, rtx x, rtx y)
+{
+  if ((op == EQ || op == NE || op == LT || op == GE)
+      && y == const0_rtx
+      && GET_MODE (x) == SImode)
+    {
+      switch (GET_CODE (x))
+        {
+        case PLUS:
+        case MINUS:
+        case NEG:
+        case AND:
+        case IOR:
+        case XOR:
+        case NOT:
+        case ASHIFT:
+        case LSHIFTRT:
+        case ASHIFTRT:
+          return CC_NZmode;
+
+        case SIGN_EXTEND:
+        case ZERO_EXTEND:
+        case ROTATE:
+        case ROTATERT:
+          return (op == LT || op == GE) ? CC_Nmode : CCmode;
+
+        default:
+          return CCmode;
+        }
+    }
+
+  if ((op == EQ || op == NE)
+      && (GET_CODE (y) == NEG)
+      && register_operand (XEXP (y, 0), SImode)
+      && register_operand (x, SImode))
+    {
+      return CC_NZmode;
+    }
+
+  return CCmode;
+}
+
+/* Generate the prologue instructions for entry into a S+core function.  */
+void
+score_prologue (void)
+{
+#define EMIT_PL(_rtx)        RTX_FRAME_RELATED_P (_rtx) = 1
+
+  struct score_frame_info *f = score_compute_frame_size (get_frame_size ());
+  HOST_WIDE_INT size;
+  int regno;
+
+  size = f->total_size - f->gp_reg_size;
+
+  if (flag_pic)
+    emit_insn (gen_cpload_score7 ());
+
+  for (regno = (int) GP_REG_LAST; regno >= (int) GP_REG_FIRST; regno--)
+    {
+      if (BITSET_P (f->mask, regno - GP_REG_FIRST))
+        {
+          rtx mem = gen_rtx_MEM (SImode,
+                                 gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
+          rtx reg = gen_rtx_REG (SImode, regno);
+          if (!crtl->calls_eh_return)
+            MEM_READONLY_P (mem) = 1;
+          EMIT_PL (emit_insn (gen_pushsi_score7 (mem, reg)));
+        }
+    }
+
+  if (size > 0)
+    {
+      rtx insn;
+
+      if (size >= -32768 && size <= 32767)
+        EMIT_PL (emit_insn (gen_add3_insn (stack_pointer_rtx,
+                                           stack_pointer_rtx,
+                                           GEN_INT (-size))));
+      else
+        {
+          EMIT_PL (emit_move_insn (gen_rtx_REG (Pmode, SCORE_PROLOGUE_TEMP_REGNUM),
+                                   GEN_INT (size)));
+          EMIT_PL (emit_insn
+                   (gen_sub3_insn (stack_pointer_rtx,
+                                   stack_pointer_rtx,
+                                   gen_rtx_REG (Pmode,
+                                                SCORE_PROLOGUE_TEMP_REGNUM))));
+        }
+      insn = get_last_insn ();
+      REG_NOTES (insn) =
+        alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+                         gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+                                      plus_constant (Pmode, stack_pointer_rtx,
+						     -size)),
+                                      REG_NOTES (insn));
+    }
+
+  if (frame_pointer_needed)
+    EMIT_PL (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+  if (flag_pic && f->cprestore_size)
+    {
+      if (frame_pointer_needed)
+        emit_insn (gen_cprestore_use_fp_score7 (GEN_INT (size - f->cprestore_size)));
+      else
+        emit_insn (gen_cprestore_use_sp_score7 (GEN_INT (size - f->cprestore_size)));
+    }
+
+#undef EMIT_PL
+}
+
+/* Generate the epilogue instructions in a S+core function.  */
+void
+score_epilogue (int sibcall_p)
+{
+  struct score_frame_info *f = score_compute_frame_size (get_frame_size ());
+  HOST_WIDE_INT size;
+  int regno;
+  rtx base;
+
+  size = f->total_size - f->gp_reg_size;
+
+  if (!frame_pointer_needed)
+    base = stack_pointer_rtx;
+  else
+    base = hard_frame_pointer_rtx;
+
+  if (size)
+    {
+      if (size >= -32768 && size <= 32767)
+        emit_insn (gen_add3_insn (base, base, GEN_INT (size)));
+      else
+        {
+          emit_move_insn (gen_rtx_REG (Pmode, SCORE_EPILOGUE_TEMP_REGNUM),
+                          GEN_INT (size));
+          emit_insn (gen_add3_insn (base, base,
+                                    gen_rtx_REG (Pmode,
+                                                 SCORE_EPILOGUE_TEMP_REGNUM)));
+        }
+    }
+
+  if (base != stack_pointer_rtx)
+    emit_move_insn (stack_pointer_rtx, base);
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_add3_insn (stack_pointer_rtx,
+                              stack_pointer_rtx,
+                              EH_RETURN_STACKADJ_RTX));
+
+  for (regno = (int) GP_REG_FIRST; regno <= (int) GP_REG_LAST; regno++)
+    {
+      if (BITSET_P (f->mask, regno - GP_REG_FIRST))
+        {
+          rtx mem = gen_rtx_MEM (SImode,
+                                 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+          rtx reg = gen_rtx_REG (SImode, regno);
+
+          if (!crtl->calls_eh_return)
+            MEM_READONLY_P (mem) = 1;
+
+          emit_insn (gen_popsi_score7 (reg, mem));
+        }
+    }
+
+  if (!sibcall_p)
+    emit_jump_insn (gen_return_internal_score7 (gen_rtx_REG (Pmode, RA_REGNUM)));
+}
+
+/* Return true if X is a symbolic constant that can be calculated in
+   the same way as a bare symbol.  If it is, store the type of the
+   symbol in *SYMBOL_TYPE.  */
+int
+score_symbolic_constant_p (rtx x, enum score_symbol_type *symbol_type)
+{
+  HOST_WIDE_INT offset;
+
+  score_split_const (x, &x, &offset);
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    *symbol_type = score_classify_symbol (x);
+  else
+    return 0;
+
+  if (offset == 0)
+    return 1;
+
+  /* if offset > 15bit, must reload  */
+  if (!IMM_IN_RANGE (offset, 15, 1))
+    return 0;
+
+  switch (*symbol_type)
+    {
+    case SYMBOL_GENERAL:
+      return 1;
+    case SYMBOL_SMALL_DATA:
+      return score_offset_within_object_p (x, offset);
+    }
+  gcc_unreachable ();
+}
+
+void
+score_movsicc (rtx *ops)
+{
+  enum machine_mode mode;
+
+  mode = score_select_cc_mode (GET_CODE (ops[1]), ops[2], ops[3]);
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, CC_REGNUM),
+                          gen_rtx_COMPARE (mode, XEXP (ops[1], 0),
+					   XEXP (ops[1], 1))));
+}
+
+/* Call and sibcall pattern all need call this function.  */
+void
+score_call (rtx *ops, bool sib)
+{
+  rtx addr = XEXP (ops[0], 0);
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      rtx oaddr = addr;
+      addr = gen_reg_rtx (Pmode);
+      gen_move_insn (addr, oaddr);
+    }
+
+  if (sib)
+    emit_call_insn (gen_sibcall_internal_score7 (addr, ops[1]));
+  else
+    emit_call_insn (gen_call_internal_score7 (addr, ops[1]));
+}
+
+/* Call value and sibcall value pattern all need call this function.  */
+void
+score_call_value (rtx *ops, bool sib)
+{
+  rtx result = ops[0];
+  rtx addr = XEXP (ops[1], 0);
+  rtx arg = ops[2];
+
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      rtx oaddr = addr;
+      addr = gen_reg_rtx (Pmode);
+      gen_move_insn (addr, oaddr);
+    }
+
+  if (sib)
+    emit_call_insn (gen_sibcall_value_internal_score7 (result, addr, arg));
+  else
+    emit_call_insn (gen_call_value_internal_score7 (result, addr, arg));
+}
+
+/* Machine Split  */
+void
+score_movdi (rtx *ops)
+{
+  rtx dst = ops[0];
+  rtx src = ops[1];
+  rtx dst0 = score_subw (dst, 0);
+  rtx dst1 = score_subw (dst, 1);
+  rtx src0 = score_subw (src, 0);
+  rtx src1 = score_subw (src, 1);
+
+  if (GET_CODE (dst0) == REG && reg_overlap_mentioned_p (dst0, src))
+    {
+      emit_move_insn (dst1, src1);
+      emit_move_insn (dst0, src0);
+    }
+  else
+    {
+      emit_move_insn (dst0, src0);
+      emit_move_insn (dst1, src1);
+    }
+}
+
+void
+score_zero_extract_andi (rtx *ops)
+{
+  if (INTVAL (ops[1]) == 1 && const_uimm5 (ops[2], SImode))
+    emit_insn (gen_zero_extract_bittst_score7 (ops[0], ops[2]));
+  else
+    {
+      unsigned HOST_WIDE_INT mask;
+      mask = (0xffffffffU & ((1U << INTVAL (ops[1])) - 1U));
+      mask = mask << INTVAL (ops[2]);
+      emit_insn (gen_andsi3_cmp_score7 (ops[3], ops[0],
+                                 gen_int_mode (mask, SImode)));
+    }
+}
+
+/* Check addr could be present as PRE/POST mode.  */
+static bool
+score_pindex_mem (rtx addr)
+{
+  if (GET_CODE (addr) == MEM)
+    {
+      switch (GET_CODE (XEXP (addr, 0)))
+        {
+        case PRE_DEC:
+        case POST_DEC:
+        case PRE_INC:
+        case POST_INC:
+          return true;
+        default:
+          break;
+        }
+    }
+  return false;
+}
+
+/* Output asm code for ld/sw insn.  */
+static int
+score_pr_addr_post (rtx *ops, int idata, int iaddr, char *ip, enum score_mem_unit unit)
+{
+  struct score_address_info ai;
+
+  gcc_assert (GET_CODE (ops[idata]) == REG);
+  gcc_assert (score_classify_address (&ai, SImode, XEXP (ops[iaddr], 0), true));
+
+  if (!score_pindex_mem (ops[iaddr])
+      && ai.type == SCORE_ADD_REG
+      && GET_CODE (ai.offset) == CONST_INT
+      && G16_REG_P (REGNO (ops[idata]))
+      && G16_REG_P (REGNO (ai.reg)))
+    {
+      if (INTVAL (ai.offset) == 0)
+        {
+          ops[iaddr] = ai.reg;
+          return snprintf (ip, INS_BUF_SZ,
+                           "!\t%%%d, [%%%d]", idata, iaddr);
+        }
+      if (REGNO (ai.reg) == HARD_FRAME_POINTER_REGNUM)
+        {
+          HOST_WIDE_INT offset = INTVAL (ai.offset);
+          if (SCORE_ALIGN_UNIT (offset, unit)
+              && (((offset >> unit) >= 0) && ((offset >> unit) <= 31)))
+            {
+              ops[iaddr] = ai.offset;
+              return snprintf (ip, INS_BUF_SZ,
+                               "p!\t%%%d, %%c%d", idata, iaddr);
+            }
+        }
+    }
+  return snprintf (ip, INS_BUF_SZ, "\t%%%d, %%a%d", idata, iaddr);
+}
+
+/* Output asm insn for load.  */
+const char *
+score_linsn (rtx *ops, enum score_mem_unit unit, bool sign)
+{
+  const char *pre_ins[] =
+    {"lbu", "lhu", "lw", "??", "lb", "lh", "lw", "??"};
+  char *ip;
+
+  strcpy (score_ins, pre_ins[(sign ? 4 : 0) + unit]);
+  ip = score_ins + strlen (score_ins);
+
+  if ((!sign && unit != SCORE_HWORD)
+      || (sign && unit != SCORE_BYTE))
+    score_pr_addr_post (ops, 0, 1, ip, unit);
+  else
+    snprintf (ip, INS_BUF_SZ, "\t%%0, %%a1");
+
+  return score_ins;
+}
+
+/* Output asm insn for store.  */
+const char *
+score_sinsn (rtx *ops, enum score_mem_unit unit)
+{
+  const char *pre_ins[] = {"sb", "sh", "sw"};
+  char *ip;
+
+  strcpy (score_ins, pre_ins[unit]);
+  ip = score_ins + strlen (score_ins);
+  score_pr_addr_post (ops, 1, 0, ip, unit);
+  return score_ins;
+}
+
+/* Output asm insn for load immediate.  */
+const char *
+score_limm (rtx *ops)
+{
+  HOST_WIDE_INT v;
+
+  gcc_assert (GET_CODE (ops[0]) == REG);
+  gcc_assert (GET_CODE (ops[1]) == CONST_INT);
+
+  v = INTVAL (ops[1]);
+  if (G16_REG_P (REGNO (ops[0])) && IMM_IN_RANGE (v, 8, 0))
+    return "ldiu!\t%0, %c1";
+  else if (IMM_IN_RANGE (v, 16, 1))
+    return "ldi\t%0, %c1";
+  else if ((v & 0xffff) == 0)
+    return "ldis\t%0, %U1";
+  else
+    return "li\t%0, %c1";
+}
+
+/* Output asm insn for move.  */
+const char *
+score_move (rtx *ops)
+{
+  gcc_assert (GET_CODE (ops[0]) == REG);
+  gcc_assert (GET_CODE (ops[1]) == REG);
+
+  if (G16_REG_P (REGNO (ops[0])))
+    {
+      if (G16_REG_P (REGNO (ops[1])))
+        return "mv!\t%0, %1";
+      else
+        return "mlfh!\t%0, %1";
+    }
+  else if (G16_REG_P (REGNO (ops[1])))
+    return "mhfl!\t%0, %1";
+  else
+    return "mv\t%0, %1";
+}
+
+/* Generate add insn.  */
+const char *
+score_select_add_imm (rtx *ops, bool set_cc)
+{
+  HOST_WIDE_INT v = INTVAL (ops[2]);
+
+  gcc_assert (GET_CODE (ops[2]) == CONST_INT);
+  gcc_assert (REGNO (ops[0]) == REGNO (ops[1]));
+
+  if (set_cc && G16_REG_P (REGNO (ops[0])))
+    {
+      if (v > 0 && IMM_IS_POW_OF_2 ((unsigned HOST_WIDE_INT) v, 0, 15))
+        {
+          ops[2] = GEN_INT (ffs (v) - 1);
+          return "addei!\t%0, %c2";
+        }
+
+      if (v < 0 && IMM_IS_POW_OF_2 ((unsigned HOST_WIDE_INT) (-v), 0, 15))
+        {
+          ops[2] = GEN_INT (ffs (-v) - 1);
+          return "subei!\t%0, %c2";
+        }
+    }
+
+  if (set_cc)
+    return "addi.c\t%0, %c2";
+  else
+    return "addi\t%0, %c2";
+}
+
+/* Output arith insn.  */
+const char *
+score_select (rtx *ops, const char *inst_pre,
+              bool commu, const char *letter, bool set_cc)
+{
+  gcc_assert (GET_CODE (ops[0]) == REG);
+  gcc_assert (GET_CODE (ops[1]) == REG);
+
+  if (set_cc && G16_REG_P (REGNO (ops[0]))
+      && (GET_CODE (ops[2]) == REG ? G16_REG_P (REGNO (ops[2])) : 1)
+      && REGNO (ops[0]) == REGNO (ops[1]))
+    {
+      snprintf (score_ins, INS_BUF_SZ, "%s!\t%%0, %%%s2", inst_pre, letter);
+      return score_ins;
+    }
+
+  if (commu && set_cc && G16_REG_P (REGNO (ops[0]))
+      && G16_REG_P (REGNO (ops[1]))
+      && REGNO (ops[0]) == REGNO (ops[2]))
+    {
+      gcc_assert (GET_CODE (ops[2]) == REG);
+      snprintf (score_ins, INS_BUF_SZ, "%s!\t%%0, %%%s1", inst_pre, letter);
+      return score_ins;
+    }
+
+  if (set_cc)
+    snprintf (score_ins, INS_BUF_SZ, "%s.c\t%%0, %%1, %%%s2", inst_pre, letter);
+  else
+    snprintf (score_ins, INS_BUF_SZ, "%s\t%%0, %%1, %%%s2", inst_pre, letter);
+  return score_ins;
+}
+
+/* Return nonzero when an argument must be passed by reference.  */
+static bool
+score_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+                         enum machine_mode mode, const_tree type,
+                         bool named ATTRIBUTE_UNUSED)
+{
+  /* If we have a variable-sized parameter, we have no choice.  */
+  return targetm.calls.must_pass_in_stack (mode, type);
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+static bool
+score_function_ok_for_sibcall (ATTRIBUTE_UNUSED tree decl,
+                               ATTRIBUTE_UNUSED tree exp)
+{
+  return true;
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+static int
+score_issue_rate (void)
+{
+  return 1;
+}
+
+/* We can always eliminate to the hard frame pointer.  We can eliminate
+   to the stack pointer unless a frame pointer is needed.  */
+
+static bool
+score_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == HARD_FRAME_POINTER_REGNUM
+          || (to  == STACK_POINTER_REGNUM && !frame_pointer_needed));
+}
+
+/* Argument support functions.  */
+
+/* Initialize CUMULATIVE_ARGS for a function.  */
+void
+score_init_cumulative_args (CUMULATIVE_ARGS *cum,
+                            tree fntype ATTRIBUTE_UNUSED,
+                            rtx libname ATTRIBUTE_UNUSED)
+{
+  memset (cum, 0, sizeof (CUMULATIVE_ARGS));
+}
+
+static void
+score_conditional_register_usage (void)
+{
+   if (!flag_pic)
+     fixed_regs[PIC_OFFSET_TABLE_REGNUM] =
+     call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 0;
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/score/score.h b/gcc-4.9/gcc/config/score/score.h
new file mode 100644
index 000000000..125a20963
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score.h
@@ -0,0 +1,867 @@
+/* score.h for Sunplus S+CORE processor
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by Sunnorth.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "score-conv.h"
+
+#undef CC1_SPEC
+#define CC1_SPEC                 "%{!mel:-meb} %{mel:-mel } \
+%{!mscore*:-mscore7}    \
+%{mscore7:-mscore7}     \
+%{mscore7d:-mscore7d}   \
+%{G*}"
+
+#undef ASM_SPEC
+#define ASM_SPEC                 "%{!mel:-EB} %{mel:-EL} \
+%{!mscore*:-march=score7}         \
+%{mscore7:-march=score7}          \
+%{mscore7d:-march=score7}         \
+%{march=score7:-march=score7}     \
+%{march=score7d:-march=score7}    \
+%{G*}"
+
+#undef LINK_SPEC
+#define LINK_SPEC                "%{!mel:-EB} %{mel:-EL} \
+%{!mscore*:-mscore7_elf}          \
+%{mscore7:-mscore7_elf}           \
+%{mscore7d:-mscore7_elf}          \
+%{march=score7:-mscore7_elf}      \
+%{march=score7d:-mscore7_elf}     \
+%{G*}"
+
+/* Run-time Target Specification.  */
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do {                                          \
+    builtin_define ("SUNPLUS");                 \
+    builtin_define ("__SCORE__");               \
+    builtin_define ("__score__");               \
+    if (TARGET_LITTLE_ENDIAN)                   \
+      builtin_define ("__scorele__");           \
+    else                                        \
+      builtin_define ("__scorebe__");           \
+    if (TARGET_SCORE7)                          \
+      builtin_define ("__score7__");            \
+    if (TARGET_SCORE7D)                         \
+      builtin_define ("__score7d__");           \
+  } while (0)
+
+#define TARGET_DEFAULT         0
+
+#define SCORE_GCC_VERSION      "1.6"
+
+/* Target machine storage layout.  */
+#define BITS_BIG_ENDIAN        0
+#define BYTES_BIG_ENDIAN       (TARGET_LITTLE_ENDIAN == 0)
+#define WORDS_BIG_ENDIAN       (TARGET_LITTLE_ENDIAN == 0)
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD                 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
+  if (GET_MODE_CLASS (MODE) == MODE_INT         \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    (MODE) = SImode;
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY                  BITS_PER_WORD
+#define STACK_BOUNDARY                 BITS_PER_WORD
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY              BITS_PER_WORD
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT              LONG_DOUBLE_TYPE_SIZE
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)                                      \
+  ((((ALIGN) < BITS_PER_WORD)                                            \
+    && (TREE_CODE (TYPE) == ARRAY_TYPE                                   \
+        || TREE_CODE (TYPE) == UNION_TYPE                                \
+        || TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a
+   constant that is being placed in memory.  EXP is the constant
+   and ALIGN is the alignment that the object would ordinarily have.
+   The value of this macro is used instead of that alignment to align
+   the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that `strcpy' calls that copy
+   constants can be done inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)                                  \
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)   \
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)                                    \
+  ((TREE_CODE (TYPE) == ARRAY_TYPE                                      \
+    && TYPE_MODE (TREE_TYPE (TYPE)) == QImode                           \
+    && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN))
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY           32
+
+/* All accesses must be aligned.  */
+#define STRICT_ALIGNMENT               1
+
+/* Score requires that structure alignment is affected by bitfields.  */
+#define PCC_BITFIELD_TYPE_MATTERS      1
+
+/* long double is not a fixed mode, but the idea is that, if we
+   support long double, we also want a 128-bit integer type.  */
+#define MAX_FIXED_MODE_SIZE            LONG_DOUBLE_TYPE_SIZE
+
+/* Layout of Data Type.  */
+/* Set the sizes of the core types.  */
+#define INT_TYPE_SIZE                   32
+#define SHORT_TYPE_SIZE                 16
+#define LONG_TYPE_SIZE                  32
+#define LONG_LONG_TYPE_SIZE             64
+#define CHAR_TYPE_SIZE                  8
+#define FLOAT_TYPE_SIZE                 32
+#define DOUBLE_TYPE_SIZE                64
+#define LONG_DOUBLE_TYPE_SIZE           64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR             1
+
+/* Default definitions for size_t and ptrdiff_t.  */
+#define SIZE_TYPE                       "unsigned int"
+
+#define UINTPTR_TYPE			"long unsigned int"
+
+/* Register Usage
+
+   S+core have:
+   - 32 integer registers
+   - 16 control registers (cond)
+   - 16 special registers (ceh/cel/cnt/lcr/scr/arg/fp)
+   - 32 coprocessors 1 registers
+   - 32 coprocessors 2 registers
+   - 32 coprocessors 3 registers.  */
+#define FIRST_PSEUDO_REGISTER           160
+
+/* By default, fix the kernel registers (r30 and r31), the global
+   pointer (r28) and the stack pointer (r0).  This can change
+   depending on the command-line options.
+
+   Regarding coprocessor registers: without evidence to the contrary,
+   it's best to assume that each coprocessor register has a unique
+   use.  This can be overridden, in, e.g., TARGET_OPTION_OVERRIDE or
+   TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be inappropriate
+   for a particular target.  */
+
+/* Control Registers, use mfcr/mtcr insn
+    32        cr0         PSR
+    33        cr1         Condition
+    34        cr2         ECR
+    35        cr3         EXCPVec
+    36        cr4         CCR
+    37        cr5         EPC
+    38        cr6         EMA
+    39        cr7         TLBLock
+    40        cr8         TLBPT
+    41        cr8         PEADDR
+    42        cr10        TLBRPT
+    43        cr11        PEVN
+    44        cr12        PECTX
+    45        cr13
+    46        cr14
+    47        cr15
+
+    Custom Engine Register, use mfce/mtce
+    48        CEH        CEH
+    49        CEL        CEL
+
+    Special-Purpose Register, use mfsr/mtsr
+    50        sr0        CNT
+    51        sr1        LCR
+    52        sr2        SCR
+
+    53        ARG_POINTER_REGNUM
+    54        FRAME_POINTER_REGNUM
+    but Control register have 32 registers, cr16-cr31.  */
+#define FIXED_REGISTERS                                  \
+{                                                        \
+  /* General Purpose Registers  */                       \
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,        \
+  /* Control Registers  */                               \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CEH/ CEL/ CNT/ LCR/ SCR / ARG_POINTER_REGNUM/ FRAME_POINTER_REGNUM */\
+  0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 1 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 2 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 3 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+}
+
+#define CALL_USED_REGISTERS                              \
+{                                                        \
+  /* General purpose register  */                        \
+  1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,        \
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* Control Registers  */                               \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 1 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 2 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 3 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+}
+
+#define REG_ALLOC_ORDER                                                   \
+{   0,  1,  6,  7,  8,  9, 10, 11,  4,  5, 22, 23, 24, 25, 26, 27,        \
+   12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31,  2,  3,        \
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,        \
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,        \
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,        \
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,        \
+   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,        \
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,        \
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,        \
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159        }
+
+/* Macro to conditionally modify fixed_regs/call_used_regs.  */
+#define PIC_OFFSET_TABLE_REGNUM          29
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Return true if REGNO is suitable for holding a quantity of type MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) score_hard_regno_mode_ok (REGNO, MODE)
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)                             \
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT                          \
+    || GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)              \
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT                       \
+       || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+/* Register Classes.  */
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.  */
+enum reg_class
+{
+  NO_REGS,
+  G16_REGS,    /* r0 ~ r15 */
+  G32_REGS,    /* r0 ~ r31 */
+  T32_REGS,    /* r8 ~ r11 | r22 ~ r27 */
+
+  HI_REG,      /* hi                 */
+  LO_REG,      /* lo                 */
+  CE_REGS,     /* hi + lo            */
+
+  CN_REG,      /* cnt                */
+  LC_REG,      /* lcb                */
+  SC_REG,      /* scb                */
+  SP_REGS,     /* cnt + lcb + scb    */
+
+  CR_REGS,     /* cr0 - cr15         */
+
+  CP1_REGS,    /* cp1                */
+  CP2_REGS,    /* cp2                */
+  CP3_REGS,    /* cp3                */
+  CPA_REGS,    /* cp1 + cp2 + cp3    */
+
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES                  ((int) LIM_REG_CLASSES)
+
+#define GENERAL_REGS                   G32_REGS
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES           \
+{                                 \
+  "NO_REGS",                      \
+  "G16_REGS",                     \
+  "G32_REGS",                     \
+  "T32_REGS",                     \
+                                  \
+  "HI_REG",                       \
+  "LO_REG",                       \
+  "CE_REGS",                      \
+                                  \
+  "CN_REG",                       \
+  "LC_REG",                       \
+  "SC_REG",                       \
+  "SP_REGS",                      \
+                                  \
+  "CR_REGS",                      \
+                                  \
+  "CP1_REGS",                     \
+  "CP2_REGS",                     \
+  "CP3_REGS",                     \
+  "CPA_REGS",                     \
+                                  \
+  "ALL_REGS",                     \
+}
+
+/* Define which registers fit in which classes.  */
+#define REG_CLASS_CONTENTS                                        \
+{                                                                 \
+  /* NO_REGS/G16/G32/T32  */                                      \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x0fc00f00, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  /* HI/LO/CE  */                                                 \
+  { 0x00000000, 0x00010000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00020000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00030000, 0x00000000, 0x00000000, 0x00000000},  \
+  /* CN/LC/SC/SP/CR  */                                           \
+  { 0x00000000, 0x00040000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00080000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00100000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x001c0000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000},  \
+  /* CP1/CP2/CP3/CPA  */                                          \
+  { 0x00000000, 0x00000000, 0xffffffff, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0x00000000},  \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff},  \
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff},  \
+  /* ALL_REGS  */                                                 \
+  { 0xffffffff, 0x001fffff, 0xffffffff, 0xffffffff, 0xffffffff},  \
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+#define REGNO_REG_CLASS(REGNO) (enum reg_class) score_reg_class (REGNO)
+
+/* A macro whose definition is the name of the class to which a
+   valid base register must belong.  A base register is one used in
+   an address which is the register value plus a displacement.  */
+#define BASE_REG_CLASS                 G16_REGS
+
+/* The class value for index registers.  */
+#define INDEX_REG_CLASS                NO_REGS
+
+/* Addressing modes, and classification of registers for them.  */
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
+  score_regno_mode_ok_for_base_p (REGNO, 1)
+
+#define REGNO_OK_FOR_INDEX_P(NUM)       0
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+  score_preferred_reload_class (X, CLASS)
+
+/* If we need to load shorts byte-at-a-time, then we need a scratch.  */
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  score_secondary_reload_class (CLASS, MODE, X)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  score_secondary_reload_class (CLASS, MODE, X)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)    \
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)        \
+   ? reg_classes_intersect_p (HI_REG, (CLASS)) : 0)
+
+
+/* Basic Stack Layout.  */
+/* Stack layout; function entry, exit and calling.  */
+#define STACK_GROWS_DOWNWARD
+
+#define STACK_PUSH_CODE                 PRE_DEC
+#define STACK_POP_CODE                  POST_INC
+
+/* The offset of the first local variable from the beginning of the frame.
+   See compute_frame_size for details about the frame layout.  */
+#define STARTING_FRAME_OFFSET           crtl->outgoing_args_size
+
+/* The argument pointer always points to the first argument.  */
+#define FIRST_PARM_OFFSET(FUNDECL)      0
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.  */
+#define RETURN_ADDR_RTX(count, frame)   score_return_addr (count, frame)
+
+/* Pick up the return address upon entry to a procedure.  */
+#define INCOMING_RETURN_ADDR_RTX        gen_rtx_REG (VOIDmode, RA_REGNUM)
+
+/* Exception handling Support.  */
+/* Use r0 to r3 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 4 ? (N) + ARG_REG_FIRST : INVALID_REGNUM)
+
+/* The register that holds the return address in exception handlers.  */
+#define EH_RETURN_STACKADJ_RTX          gen_rtx_REG (Pmode, EH_REGNUM)
+#define EH_RETURN_HANDLER_RTX  		gen_rtx_REG (SImode, 30)
+
+/* Registers That Address the Stack Frame.  */
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM            SP_REGNUM
+
+/* These two registers don't really exist: they get eliminated to either
+   the stack or hard frame pointer.  */
+#define FRAME_POINTER_REGNUM            53
+
+/*  we use r2 as the frame pointer.  */
+#define HARD_FRAME_POINTER_REGNUM       FP_REGNUM
+
+#define ARG_POINTER_REGNUM              54
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM             23
+
+/* Elimination Frame Pointer and Arg Pointer  */
+
+#define ELIMINABLE_REGS                                \
+  {{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},        \
+   { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},   \
+   { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},      \
+   { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = score_initial_elimination_offset ((FROM), (TO))
+
+/* Passing Function Arguments on the Stack.  */
+/* Allocate stack space for arguments at the beginning of each function.  */
+#define ACCUMULATE_OUTGOING_ARGS        1
+
+/* reserve stack space for all argument registers.  */
+#define REG_PARM_STACK_SPACE(FNDECL)    UNITS_PER_WORD
+
+/* Define this if it is the responsibility of the caller to
+   allocate the area reserved for arguments passed in registers.
+   If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect
+   of this macro is to determine whether the space is included in
+   `crtl->outgoing_args_size'.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+/* Passing Arguments in Registers  */
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the
+   type `int' suffices and can hold the number of bytes of argument so far.  */
+typedef struct score_args
+{
+  unsigned int arg_number;             /* how many arguments have been seen  */
+  unsigned int num_gprs;               /* number of gprs in use  */
+  unsigned int stack_words;            /* number of words in stack  */
+} score_args_t;
+
+#define CUMULATIVE_ARGS                score_args_t
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, n_named_args) \
+  score_init_cumulative_args (&CUM, FNTYPE, LIBNAME)
+
+/* 1 if N is a possible register number for function argument passing.
+   We have no FP argument registers when soft-float.  When FP registers
+   are 32 bits, we can't directly reference the odd numbered ones.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  REG_CONTAIN (REGNO, ARG_REG_FIRST, ARG_REG_NUM)
+
+/* How Scalar Function Values Are Returned.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  score_function_value ((VALTYPE), (FUNC), VOIDmode)
+
+#define LIBCALL_VALUE(MODE)  score_function_value (NULL_TREE, NULL, (MODE))
+
+/* 1 if N is a possible register number for a function value.  */
+#define FUNCTION_VALUE_REGNO_P(REGNO)   ((REGNO) == (ARG_REG_FIRST))
+
+#define PIC_FUNCTION_ADDR_REGNUM        (GP_REG_FIRST + 25)
+
+/* How Large Values Are Returned.  */
+#define STRUCT_VALUE                    0
+
+/* Function Entry and Exit  */
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK               1
+
+/* Generating Code for Profiling  */
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)                              \
+  do {                                                                \
+    if (TARGET_SCORE7)                                                \
+      {                                                               \
+        fprintf (FILE, " .set r1  \n");                               \
+        fprintf (FILE, " mv   r%d,r%d \n", AT_REGNUM, RA_REGNUM);     \
+        fprintf (FILE, " subi r%d, %d \n", STACK_POINTER_REGNUM, 8);  \
+        fprintf (FILE, " jl   _mcount \n");                           \
+        fprintf (FILE, " .set nor1 \n");                              \
+      }                                                               \
+  } while (0)
+
+/* Trampolines for Nested Functions.  */
+#define TRAMPOLINE_INSNS                6
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+#define TRAMPOLINE_SIZE                (24 + GET_MODE_SIZE (ptr_mode) * 2)
+
+#define HAVE_PRE_INCREMENT              1
+#define HAVE_PRE_DECREMENT              1
+#define HAVE_POST_INCREMENT             1
+#define HAVE_POST_DECREMENT             1
+#define HAVE_PRE_MODIFY_DISP            1
+#define HAVE_POST_MODIFY_DISP           1
+#define HAVE_PRE_MODIFY_REG             0
+#define HAVE_POST_MODIFY_REG            0
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS            1
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects them all.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Some source files that are used after register allocation
+   need to be strict.  */
+#ifndef REG_OK_STRICT
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  score_regno_mode_ok_for_base_p (REGNO (X), 0)
+#else
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  score_regno_mode_ok_for_base_p (REGNO (X), 1)
+#endif
+
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* Condition Code Status.  */
+#define SELECT_CC_MODE(OP, X, Y)        score_select_cc_mode (OP, X, Y)
+
+/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+#define REVERSIBLE_CC_MODE(MODE)        1
+
+/* Describing Relative Costs of Operations  */
+/* Try to generate sequences that don't involve branches.  */
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS                1
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE                 1
+
+/* Dividing the Output into Sections (Texts, Data, ...).  */
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP             "\t.text"
+#define DATA_SECTION_ASM_OP             "\t.data"
+#define SDATA_SECTION_ASM_OP            "\t.sdata"
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP    "\t.rdata"
+
+/* The Overall Framework of an Assembler File  */
+/* How to start an assembler comment.
+   The leading space is important.  */
+#define ASM_COMMENT_START               "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON                     "#APP\n\t.set volatile\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF                     "#NO_APP\n\t.set optimize\n"
+
+/* Output of Uninitialized Variables.  */
+/* This says how to define a global common symbol.  */
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGN)     \
+  do {                                                                      \
+    fputs ("\n\t.comm\t", STREAM);                                          \
+    assemble_name (STREAM, NAME);                                           \
+    fprintf (STREAM, " , " HOST_WIDE_INT_PRINT_UNSIGNED ", %u\n",           \
+             SIZE, ALIGN / BITS_PER_UNIT);                                  \
+  } while (0)
+
+/* This says how to define a local common symbol (i.e., not visible to
+   linker).  */
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN)                 \
+  do {                                                                      \
+    fputs ("\n\t.lcomm\t", STREAM);                                         \
+    assemble_name (STREAM, NAME);                                           \
+    fprintf (STREAM, " , " HOST_WIDE_INT_PRINT_UNSIGNED ", %u\n",           \
+             SIZE, ALIGN / BITS_PER_UNIT);                                  \
+  } while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP                   "\t.globl\t"
+
+/* Output and Generation of Labels  */
+/* This is how to declare a function name.  The actual work of
+   emitting the label is moved to function_prologue, so that we can
+   get the line number correctly emitted before the .ent directive,
+   and after any .file directives.  Define as empty so that the function
+   is not declared before the .ent directive elsewhere.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)   \
+  do {                                                \
+    assemble_name (STREAM, NAME);                     \
+    fprintf (STREAM, ":\n");                          \
+  } while (0)
+
+/* This says how to output an external.  It would be possible not to
+   output anything and let undefined symbol become external. However
+   the assembler uses length information on externals to allocate in
+   data/sdata bss/sbss, thereby saving exec time.  */
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(STREAM, DECL, NAME) \
+  score_output_external (STREAM, DECL, NAME)
+
+/* This handles the magic '..CURRENT_FUNCTION' symbol, which means
+   'the start of the function that this code is output in'.  */
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  fprintf ((STREAM), "%s", (NAME))
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.  */
+#define LOCAL_LABEL_PREFIX              (TARGET_SCORE7 ? "." : "$")
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM))
+
+/* Output of Assembler Instructions.  */
+#define REGISTER_NAMES                                                    \
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",                         \
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",                   \
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",                 \
+  "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",                 \
+                                                                          \
+  "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",                 \
+  "cr8", "cr9", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15",           \
+                                                                          \
+  "ceh", "cel", "sr0", "sr1", "sr2", "_arg", "_frame", "",                \
+  "cr24", "cr25", "cr26", "cr27", "cr28", "cr29", "cr30", "cr31",         \
+                                                                          \
+  "c1r0", "c1r1", "c1r2", "c1r3", "c1r4", "c1r5", "c1r6", "c1r7",         \
+  "c1r8", "c1r9", "c1r10", "c1r11", "c1r12", "c1r13", "c1r14", "c1r15",   \
+  "c1r16", "c1r17", "c1r18", "c1r19", "c1r20", "c1r21", "c1r22", "c1r23", \
+  "c1r24", "c1r25", "c1r26", "c1r27", "c1r28", "c1r29", "c1r30", "c1r31", \
+                                                                          \
+  "c2r0", "c2r1", "c2r2", "c2r3", "c2r4", "c2r5", "c2r6", "c2r7",         \
+  "c2r8", "c2r9", "c2r10", "c2r11", "c2r12", "c2r13", "c2r14", "c2r15",   \
+  "c2r16", "c2r17", "c2r18", "c2r19", "c2r20", "c2r21", "c2r22", "c2r23", \
+  "c2r24", "c2r25", "c2r26", "c2r27", "c2r28", "c2r29", "c2r30", "c2r31", \
+                                                                          \
+  "c3r0", "c3r1", "c3r2", "c3r3", "c3r4", "c3r5", "c3r6", "c3r7",         \
+  "c3r8", "c3r9", "c3r10", "c3r11", "c3r12", "c3r13", "c3r14", "c3r15",   \
+  "c3r16", "c3r17", "c3r18", "c3r19", "c3r20", "c3r21", "c3r22", "c3r23", \
+  "c3r24", "c3r25", "c3r26", "c3r27", "c3r28", "c3r29", "c3r30", "c3r31", \
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.  */
+#define PRINT_OPERAND(STREAM, X, CODE)  score_print_operand (STREAM, X, CODE)
+
+/* A C expression which evaluates to true if CODE is a valid
+   punctuation character for use in the `PRINT_OPERAND' macro.  */
+#define PRINT_OPERAND_PUNCT_VALID_P(C)  ((C) == '[' || (C) == ']')
+
+/* Print a memory address as an operand to reference that memory location.  */
+#define PRINT_OPERAND_ADDRESS(STREAM, X) \
+  score_print_operand_address (STREAM, X)
+
+/* By default on the S+core, external symbols do not have an underscore
+   prepended.  */
+#define USER_LABEL_PREFIX        ""
+
+/* This is how to output an insn to push a register on the stack.  */
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)           \
+  do {                                               \
+    if (TARGET_SCORE7)                               \
+        fprintf (STREAM, "\tpush! %s,[%s]\n",        \
+                 reg_names[REGNO],                   \
+                 reg_names[STACK_POINTER_REGNUM]);   \
+  } while (0)
+
+/* This is how to output an insn to pop a register from the stack.  */
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO)            \
+  do {                                               \
+    if (TARGET_SCORE7)                               \
+      fprintf (STREAM, "\tpop! %s,[%s]\n",           \
+               reg_names[REGNO],                     \
+               reg_names[STACK_POINTER_REGNUM]);     \
+  } while (0)
+
+/* Output of Dispatch Tables.  */
+/* This is how to output an element of a case-vector.  We can make the
+   entries PC-relative in GP-relative when .gp(d)word is supported.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)			\
+  do {										\
+    if (TARGET_SCORE7)								\
+      {										\
+	if (flag_pic)								\
+	  fprintf (STREAM, "\t.gpword %sL%d\n", LOCAL_LABEL_PREFIX, VALUE);	\
+	else									\
+	  fprintf (STREAM, "\t.word %sL%d\n", LOCAL_LABEL_PREFIX, VALUE);	\
+      }										\
+  } while (0)
+
+/* Jump table alignment is explicit in ASM_OUTPUT_CASE_LABEL.  */
+#define ADDR_VEC_ALIGN(JUMPTABLE) (GET_MODE (PATTERN (JUMPTABLE)) == SImode ? 2 \
+                                   : GET_MODE (PATTERN (JUMPTABLE)) == HImode ? 1 : 0)
+
+/* This is how to output a label which precedes a jumptable.  Since
+   Score3 instructions are 2 bytes, we may need explicit alignment here.  */
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)             \
+  do {                                                                  \
+      if ((TARGET_SCORE7) && GET_MODE (PATTERN (JUMPTABLE)) == SImode)  \
+        ASM_OUTPUT_ALIGN (FILE, 2);                                     \
+      (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM);            \
+  } while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE                SImode
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  fprintf (STREAM, "\t.word %sL%d\n", LOCAL_LABEL_PREFIX, VALUE)
+
+/* Assembler Commands for Exception Regions  */
+/* Since the S+core is encoded in the least-significant bit
+   of the address, mask it off return addresses for purposes of
+   finding exception handling regions.  */
+#define MASK_RETURN_ADDR               constm1_rtx
+
+/* Assembler Commands for Alignment  */
+/* This is how to output an assembler line to advance the location
+   counter by SIZE bytes.  */
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, SIZE) \
+  fprintf (STREAM, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(STREAM, LOG) \
+  fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+/* Macros Affecting All Debugging Formats.  */
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE         DWARF2_DEBUG
+#endif
+
+/* Specific Options for DBX Output.  */
+#define DBX_DEBUGGING_INFO              1
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS          1
+
+#define DBX_CONTIN_LENGTH               0
+
+/* File Names in DBX Format.  */
+#define DWARF2_DEBUGGING_INFO           1
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN       3
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/*  All references are zero extended.  */
+#define LOAD_EXTEND_OP(MODE)            ZERO_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX                        4
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED           1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode                           SImode
+
+/* Give call MEMs SImode since it is the "most permissive" mode
+   for 32-bit targets.  */
+#define FUNCTION_MODE                   Pmode
+
+struct GTY ((chain_next ("%h.next"))) extern_list
+{
+  struct extern_list *next;             /* next external  */
+  const char *name;                     /* name of the external  */
+  int size;                             /* size in bytes  */
+};
+
+extern GTY (()) struct extern_list      *extern_head;
diff --git a/gcc-4.9/gcc/config/score/score.md b/gcc-4.9/gcc/config/score/score.md
new file mode 100644
index 000000000..a4ffb3a2b
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score.md
@@ -0,0 +1,1879 @@
+;;  Machine description for Sunplus S+CORE
+;;  Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;  Contributed by Sunnorth.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+; branch        conditional branch
+; jump          unconditional jump
+; call          unconditional call
+; load          load instruction(s)
+; store         store instruction(s)
+; cmp           integer compare
+; arith         integer arithmetic instruction
+; move          data movement within same register set
+; const         load constant
+; nop           no operation
+; mul           integer multiply
+; div           integer divide
+; cndmv         conditional moves
+; fce           transfer from hi/lo registers
+; tce           transfer to   hi/lo registers
+; fsr           transfer from special registers
+; tsr           transfer to   special registers
+
+(define_constants
+  [(CC_REGNUM       33)
+   (T_REGNUM        34)
+   (RA_REGNUM       3)
+   (SP_REGNUM       0)
+   (AT_REGNUM       1)
+   (FP_REGNUM       2)
+   (RT_REGNUM       4)
+   (GP_REGNUM       28)
+   (EH_REGNUM       29)
+   (HI_REGNUM       48)
+   (LO_REGNUM       49)
+   (CN_REGNUM       50)
+   (LC_REGNUM       51)
+   (SC_REGNUM       52)])
+
+(define_constants
+   [(BITTST         0)
+    (CPLOAD         1)
+    (CPRESTORE      2)
+
+    (SCB            3)
+    (SCW            4)
+    (SCE            5)
+    (SCLC           6)
+
+    (LCB            7)
+    (LCW            8)
+    (LCE            9)
+
+    (SFFS           10)])
+
+(define_attr "type"
+  "unknown,branch,jump,call,load,store,cmp,arith,move,const,nop,mul,div,cndmv,fce,tce,fsr,tsr,fcr,tcr"
+  (const_string "unknown"))
+
+(define_attr "mode" "unknown,QI,HI,SI,DI"
+  (const_string "unknown"))
+
+(define_attr "length" "" (const_int 4))
+
+(define_attr "up_c" "yes,no"
+  (const_string "no"))
+
+(include "constraints.md")
+(include "score-generic.md")
+(include "predicates.md")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+        (match_operand:QI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], QImode))
+    {
+      operands[1] = force_reg (QImode, operands[1]);
+    }
+})
+
+(define_insn "*movqi_insns_score7"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,d,*x,d,*a")
+        (match_operand:QI 1 "general_operand" "i,d,m,d,*x,d,*a,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], QImode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_limm (operands);
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_BYTE, false);
+    case 3: return score_sinsn (operands, SCORE_BYTE);
+    case 4: return TARGET_SCORE7D ? \"mf%1%S0 %0\" : \"mf%1    %0\";
+    case 5: return TARGET_SCORE7D ? \"mt%0%S1 %1\" : \"mt%0    %1\";
+    case 6: return \"mfsr\t%0, %1\";
+    case 7: return \"mtsr\t%1, %0\";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store,fce,tce,fsr,tsr")
+   (set_attr "mode" "QI")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand")
+        (match_operand:HI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], HImode))
+    {
+      operands[1] = force_reg (HImode, operands[1]);
+    }
+})
+
+(define_insn "*movhi_insns_score7"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,m,d,*x,d,*a")
+        (match_operand:HI 1 "general_operand" "i,d,m,d,*x,d,*a,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], HImode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_limm (operands);
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_HWORD, false);
+    case 3: return score_sinsn (operands, SCORE_HWORD);
+    case 4: return TARGET_SCORE7D ? \"mf%1%S0 %0\" : \"mf%1    %0\";
+    case 5: return TARGET_SCORE7D ? \"mt%0%S1 %1\" : \"mt%0    %1\";
+    case 6: return \"mfsr\t%0, %1\";
+    case 7: return \"mtsr\t%1, %0\";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store,fce,tce,fsr,tsr")
+   (set_attr "mode" "HI")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+        (match_operand:SI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], SImode))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+    }
+})
+
+(define_insn "*movsi_insns_score7"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,m,d,*x,d,*a,d,*c")
+        (match_operand:SI 1 "general_operand" "i,d,m,d,*x,d,*a,d,*c,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], SImode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (GET_CODE (operands[1]) != CONST_INT)
+        return \"la\t%0, %1\";
+      else
+        return score_limm (operands);
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_WORD, false);
+    case 3: return score_sinsn (operands, SCORE_WORD);
+    case 4: return TARGET_SCORE7D ? \"mf%1%S0 %0\" : \"mf%1    %0\";
+    case 5: return TARGET_SCORE7D ? \"mt%0%S1 %1\" : \"mt%0    %1\";
+    case 6: return \"mfsr\t%0, %1\";
+    case 7: return \"mtsr\t%1, %0\";
+    case 8: return \"mfcr\t%0, %1\";
+    case 9: return \"mtcr\t%1, %0\";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store,fce,tce,fsr,tsr,fcr,tcr")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,m,d,*x")
+        (match_operand:DI 1 "general_operand" "i,d,m,d,*x,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  score_movdi (operands);
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+        (match_operand:SF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], SFmode))
+    {
+      operands[1] = force_reg (SFmode, operands[1]);
+    }
+})
+
+(define_insn "*movsf_insns_score7"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,d,m")
+        (match_operand:SF 1 "general_operand" "i,d,m,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], SFmode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"li\t%0, %D1\";;
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_WORD, false);
+    case 3: return score_sinsn (operands, SCORE_WORD);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,d,m")
+        (match_operand:DF 1 "general_operand" "i,d,m,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  score_movdi (operands);
+  DONE;
+})
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "score_register_operand" )
+        (plus:SI (match_operand:SI 1 "score_register_operand")
+                 (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*addsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (plus:SI (match_operand:SI 1 "register_operand" "0,0,d,d")
+                 (match_operand:SI 2 "arith_operand" "I,L,N,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"addis\t%0, %U2\";
+    case 1: return score_select_add_imm (operands, false);
+    case 2: return \"addri\t%0, %1, %c2\";
+    case 3: return score_select (operands, "add", true, "", false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (plus:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,L,N,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d,d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"addis.c\t%0, %U2\";
+    case 1: return score_select_add_imm (operands, true);
+    case 2: return \"addri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "add", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (plus:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,L,N,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (plus:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"addis.c\t%0, %U2\";
+    case 1: return score_select_add_imm (operands, true);
+    case 2: return \"addri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "add", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "adddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "score_register_operand")
+          (plus:DI (match_operand:DI 1 "score_register_operand")
+                   (match_operand:DI 2 "score_register_operand")))
+    (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*adddi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=e,d")
+        (plus:DI (match_operand:DI 1 "register_operand" "0,d")
+                 (match_operand:DI 2 "register_operand" "e,d")))
+  (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   add!    %L0, %L2\;addc!   %H0, %H2
+   add.c   %L0, %L1, %L2\;addc    %H0, %H1, %H2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "DI")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (minus:SI (match_operand:SI 1 "score_register_operand")
+                  (match_operand:SI 2 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*subsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (minus:SI (match_operand:SI 1 "register_operand" "d")
+                  (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "sub", false, "", false);
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "d")
+                                 (match_operand:SI 2 "register_operand" "d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "sub", false, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "g32reg_operand" "")
+        (minus:SI (match_operand:SI 1 "g32reg_operand" "")
+                  (match_operand:SI 2 "g32reg_operand" "")))
+   (set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))]
+  ""
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+        (minus:SI (match_dup 1) (match_dup 2)))])
+
+(define_insn "subsi3_ucc_pcmp"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+          (compare:CC (match_operand:SI 1 "score_register_operand" "d")
+                      (match_operand:SI 2 "score_register_operand" "d")))
+     (set (match_operand:SI 0 "score_register_operand" "=d")
+          (minus:SI (match_dup 1) (match_dup 2)))])]
+  ""
+{
+  return score_select (operands, "sub", false, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "subsi3_ucc"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (minus:SI (match_operand:SI 1 "score_register_operand" "d")
+                                 (match_operand:SI 2 "score_register_operand" "d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "score_register_operand" "=d")
+        (minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+{
+  return score_select (operands, "sub", false, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "subdi3"
+  [(parallel
+    [(set (match_operand:DI 0 "score_register_operand")
+          (minus:DI (match_operand:DI 1 "score_register_operand")
+                    (match_operand:DI 2 "score_register_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*subdi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=e,d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0,d")
+                  (match_operand:DI 2 "register_operand" "e,d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   sub!    %L0, %L2\;subc    %H0, %H1, %H2
+   sub.c   %L0, %L1, %L2\;subc    %H0, %H1, %H2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "DI")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (and:SI (match_operand:SI 1 "score_register_operand")
+                (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*andsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (and:SI (match_operand:SI 1 "register_operand" "0,0,d,d")
+                (match_operand:SI 2 "arith_operand" "I,K,M,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"andis\t%0, %U2\";
+    case 1: return \"andi\t%0, %c2";
+    case 2: return \"andri\t%0, %1, %c2\";
+    case 3: return score_select (operands, "and", true, "", false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "andsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (and:SI (match_operand:SI 1 "register_operand" "0,0,0,d")
+                               (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d,d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"andis.c\t%0, %U2\";
+    case 1: return \"andi.c\t%0, %c2";
+    case 2: return \"andri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "and", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (and:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (and:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"andis.c\t%0, %U2\";
+    case 1: return \"andi.c\t%0, %c2";
+    case 2: return \"andri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "and", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*zero_extract_andi"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (zero_extract:SI
+                     (match_operand:SI 0 "score_register_operand" "d")
+                     (match_operand:SI 1 "const_uimm5" "")
+                     (match_operand:SI 2 "const_uimm5" ""))
+                    (const_int 0)))]
+  ""
+  "#"
+  ""
+  [(const_int 1)]
+{
+  score_zero_extract_andi (operands);
+  DONE;
+})
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (ior:SI (match_operand:SI 1 "score_register_operand")
+                (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*iorsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (ior:SI (match_operand:SI 1 "register_operand" "0,0,d,d")
+                (match_operand:SI 2 "arith_operand" "I,K,M,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"oris\t%0, %U2\";
+    case 1: return \"ori\t%0, %c2\";
+    case 2: return \"orri\t%0, %1, %c2\";
+    case 3: return score_select (operands, "or", true, "", false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ior:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (ior:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"oris.c\t%0, %U2\";
+    case 1: return \"ori.c\t%0, %c2\";
+    case 2: return \"orri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "or", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ior:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d,d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"oris.c\t%0, %U2\";
+    case 1: return \"ori.c\t%0, %c2\";
+    case 2: return \"orri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "or", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (xor:SI (match_operand:SI 1 "score_register_operand")
+                (match_operand:SI 2 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (xor:SI (match_operand:SI 1 "register_operand" "d")
+                (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "xor", true, "", false);
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (xor:SI (match_operand:SI 1 "register_operand" "d")
+                               (match_operand:SI 2 "register_operand" "d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (xor:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "xor", true, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (xor:SI (match_operand:SI 1 "register_operand" "d")
+                               (match_operand:SI 2 "register_operand" "d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  ""
+{
+  return score_select (operands, "xor", true, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*extendqisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extsb\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_BYTE, true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (sign_extend:SI (match_operand:QI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*extendhisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extsh\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_HWORD, true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith, load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendhisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+  (set (match_operand:SI 0 "register_operand" "=d")
+       (sign_extend:SI (match_operand:HI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendhisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*zero_extendqisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extzb\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_BYTE, false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith, load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI (match_operand:QI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*zero_extendhisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extzh\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_HWORD, false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith, load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendhisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+  (set (match_operand:SI 0 "register_operand" "=d")
+       (zero_extend:SI (match_operand:HI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendhisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "mulsi3"
+    [(set (match_operand:SI 0 "score_register_operand")
+          (mult:SI (match_operand:SI 1 "score_register_operand")
+                   (match_operand:SI 2 "score_register_operand")))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_insn (gen_mulsi3_score7 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mulsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+        (mult:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))
+   (clobber (reg:SI HI_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mul     %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "mode" "SI")])
+
+(define_expand "mulsidi3"
+    [(set (match_operand:DI 0 "score_register_operand")
+          (mult:DI (sign_extend:DI
+                    (match_operand:SI 1 "score_register_operand"))
+                   (sign_extend:DI
+                    (match_operand:SI 2 "score_register_operand"))))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_insn (gen_mulsidi3_score7 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mulsidi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (mult:DI (sign_extend:DI
+                  (match_operand:SI 1 "register_operand" "d"))
+                 (sign_extend:DI
+                  (match_operand:SI 2 "register_operand" "d"))))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mul     %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "mode" "DI")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "score_register_operand")
+        (mult:DI (zero_extend:DI
+                  (match_operand:SI 1 "score_register_operand"))
+                 (zero_extend:DI
+                  (match_operand:SI 2 "score_register_operand"))))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_insn (gen_umulsidi3_score7 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "umulsidi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (mult:DI (zero_extend:DI
+                  (match_operand:SI 1 "register_operand" "d"))
+                 (zero_extend:DI
+                  (match_operand:SI 2 "register_operand" "d"))))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mulu    %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "mode" "DI")])
+
+(define_expand "divmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (div:SI (match_operand:SI 1 "score_register_operand")
+                  (match_operand:SI 2 "score_register_operand")))
+     (set (match_operand:SI 3 "score_register_operand")
+          (mod:SI (match_dup 1) (match_dup 2)))])]
+  ""
+  ""
+)
+
+(define_insn "*divmodsi4_score7"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+        (div:SI (match_operand:SI 1 "register_operand" "d")
+                (match_operand:SI 2 "register_operand" "d")))
+   (set (match_operand:SI 3 "register_operand" "=h")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "div     %1, %2"
+  [(set_attr "type" "div")
+   (set_attr "mode" "SI")])
+
+(define_expand "udivmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (udiv:SI (match_operand:SI 1 "score_register_operand")
+                   (match_operand:SI 2 "score_register_operand")))
+     (set (match_operand:SI 3 "score_register_operand")
+          (umod:SI (match_dup 1) (match_dup 2)))])]
+  ""
+  ""
+)
+
+(define_insn "*udivmodsi4_score7"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+        (udiv:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))
+   (set (match_operand:SI 3 "register_operand" "=h")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "divu    %1, %2"
+  [(set_attr "type" "div")
+   (set_attr "mode" "SI")])
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (ashift:SI (match_operand:SI 1 "score_register_operand")
+                   (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*ashlsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashift:SI (match_operand:SI 1 "register_operand" "d,d")
+                   (match_operand:SI 2 "arith_operand" "J,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   slli    %0, %1, %c2
+   sll     %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashift:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashift:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "slli", false, "c", true);
+    case 1: return score_select (operands, "sll", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashift:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "slli", false, "c", true);
+    case 1: return score_select (operands, "sll", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (ashiftrt:SI (match_operand:SI 1 "score_register_operand")
+                     (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*ashrsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand" "J,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   srai    %0, %1, %c2
+   sra     %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"srai.c\t%0, %1, %c2\";
+    case 1: return score_select (operands, "sra", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"srai.c\t%0, %1, %c2\";
+    case 1: return score_select (operands, "sra", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (lshiftrt:SI (match_operand:SI 1 "score_register_operand")
+                     (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*lshrsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (lshiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand" "J,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   srli    %0, %1, %c2
+   srl     %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (lshiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d")
+        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "srli", false, "c", true);
+    case 1: return score_select (operands, "srl", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (lshiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "srli", false, "c", true);
+    case 1: return score_select (operands, "srl", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (neg:SI (match_operand:SI 1 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*negsi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "neg     %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=e,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   neg!    %0, %1
+   neg.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=e,d")
+        (neg:SI (match_dup 1)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   neg!    %0, %1
+   neg.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (not:SI (match_operand:SI 1 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*one_cmplsi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (not:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "not\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (not:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=e,d")
+        (not:SI (match_dup 1)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   not!    %0, %1
+   not.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (not:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=e,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   not!    %0, %1
+   not.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "rotlsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (rotate:SI (match_operand:SI 1 "score_register_operand")
+                     (match_operand:SI 2 "arith_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*rotlsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (rotate:SI (match_operand:SI 1 "register_operand" "d,d")
+                   (match_operand:SI 2 "arith_operand" "J,d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   roli.c  %0, %1, %c2
+   rol.c   %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_expand "rotrsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (rotatert:SI (match_operand:SI 1 "score_register_operand")
+                       (match_operand:SI 2 "arith_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*rotrsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (rotatert:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand" "J,d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   rori.c  %0, %1, %c2
+   ror.c   %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "score_register_operand" "")
+                    (match_operand:SI 2 "arith_operand" "")))
+   (set (pc)
+        (if_then_else
+	 (match_operator 0 "ordered_comparison_operator"
+			 [(reg:CC CC_REGNUM)
+		 	  (const_int 0)]) 
+         (label_ref (match_operand 3 "" ""))
+         (pc)))]
+  ""
+  "")
+
+(define_insn "cmpsi_nz_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (match_operand:SI 0 "register_operand" "d,e,d")
+                       (match_operand:SI 1 "arith_operand" "L,e,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   cmpi.c  %0, %c1
+   cmp!    %0, %1
+   cmp.c   %0, %1"
+   [(set_attr "type" "cmp")
+    (set_attr "up_c" "yes")
+    (set_attr "mode" "SI")])
+
+(define_insn "cmpsi_n_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (match_operand:SI 0 "register_operand" "d,e,d")
+                      (match_operand:SI 1 "arith_operand" "L,e,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   cmpi.c  %0, %c1
+   cmp!    %0, %1
+   cmp.c   %0, %1"
+   [(set_attr "type" "cmp")
+    (set_attr "up_c" "yes")
+    (set_attr "mode" "SI")])
+
+(define_insn "*cmpsi_to_addsi_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (match_operand:SI 1 "register_operand" "0,d")
+                       (neg:SI (match_operand:SI 2 "register_operand" "e,d"))))
+   (clobber (match_scratch:SI 0 "=e,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   add!    %0, %2
+   add.c   %0, %1, %2"
+   [(set_attr "type" "cmp")
+    (set_attr "up_c" "yes")
+    (set_attr "mode" "SI")])
+
+(define_insn "cmpsi_cc_score7"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 0 "register_operand" "d,e,d")
+                    (match_operand:SI 1 "arith_operand" "L,e,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   cmpi.c  %0, %c1
+   cmp!    %0, %1
+   cmp.c   %0, %1"
+  [(set_attr "type" "cmp")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*branch_n_score7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "branch_n_operator"
+                         [(reg:CC_N CC_REGNUM)
+                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "b%C0    %1"
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_nz_score7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "branch_nz_operator"
+                         [(reg:CC_NZ CC_REGNUM)
+                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "b%C0    %1"
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_cc_score7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "comparison_operator"
+                         [(reg:CC CC_REGNUM)
+                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "b%C0    %1"
+  [(set_attr "type" "branch")])
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (!flag_pic)
+    return \"j\t%0\";
+  else
+    return \"b\t%0\";
+}
+  [(set_attr "type" "jump")
+   (set_attr "length" "4")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "" "")
+                    (match_operand 1 "" ""))
+              (use (match_operand 2 "" ""))])]
+  ""
+{
+  score_call (operands, true);
+  DONE;
+})
+
+(define_insn "sibcall_internal_score7"
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "t,Z"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RT_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && SIBLING_CALL_P (insn)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"br%S0\t%0\";
+      case 1: return \"j\t%0\";
+      default: gcc_unreachable ();
+      }
+  else
+    switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %0\;br\tr29\";
+      case 1: return \"la\tr29, %0\;br\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+              (call (match_operand 1 "" "") (match_operand 2 "" "")))
+              (use (match_operand 3 "" ""))])]
+  ""
+{
+  score_call_value (operands, true);
+  DONE;
+})
+
+(define_insn "sibcall_value_internal_score7"
+  [(set (match_operand 0 "register_operand" "=d,d")
+        (call (mem:SI (match_operand:SI 1 "call_insn_operand" "t,Z"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RT_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && SIBLING_CALL_P (insn)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"br%S1\t%1\";
+      case 1: return \"j\t%1\";
+      default: gcc_unreachable ();
+      }
+  else
+    switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %1\;br\tr29\";
+      case 1: return \"la\tr29, %1\;br\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "" "") (match_operand 1 "" ""))
+              (use (match_operand 2 "" ""))])]
+  ""
+{
+  score_call (operands, false);
+  DONE;
+})
+
+(define_insn "call_internal_score7"
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "d,Z"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RA_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"brl%S0\t%0\";
+      case 1: return \"jl\t%0\";
+      default: gcc_unreachable ();
+      }
+  else
+     switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %0\;brl\tr29\";
+      case 1: return \"la\tr29, %0\;brl\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "") (match_operand 2 "" "")))
+              (use (match_operand 3 "" ""))])]
+  ""
+{
+  score_call_value (operands, false);
+  DONE;
+})
+
+(define_insn "call_value_internal_score7"
+  [(set (match_operand 0 "register_operand" "=d,d")
+        (call (mem:SI (match_operand:SI 1 "call_insn_operand" "d,Z"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RA_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"brl%S1\t%1\";
+      case 1: return \"jl\t%1\";
+      default: gcc_unreachable ();
+      }
+  else
+    switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %1\;brl\tr29\";
+      case 1: return \"la\tr29, %1\;brl\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "score_register_operand" "d"))]
+  ""
+{
+  rtx dest;
+  dest = operands[0];
+  if (GET_CODE (dest) != REG
+      || GET_MODE (dest) != Pmode)
+    operands[0] = copy_to_mode_reg (Pmode, dest);
+
+  emit_jump_insn (gen_indirect_jump_internal_score (operands[0]));
+  DONE;
+})
+
+(define_insn "indirect_jump_internal_score"
+  [(set (pc) (match_operand:SI 0 "score_register_operand" "d"))]
+  ""
+  "br%S0   %0"
+  [(set_attr "type" "jump")])
+
+(define_expand "tablejump"
+  [(set (pc)
+        (match_operand 0 "score_register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_jump_insn (gen_tablejump_internal_score7 (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "tablejump_internal_score7"
+  [(set (pc)
+        (match_operand:SI 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  if (flag_pic)
+    return \"mv\tr29, %0\;.cpadd\tr29\;br\tr29\";
+  else
+    return \"br%S0\t%0\";
+}
+  [(set_attr "type" "jump")])
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  score_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+{
+  score_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(const_int 2)]
+  ""
+{
+  score_epilogue (true);
+  DONE;
+})
+
+(define_insn "return_internal_score7"
+  [(return)
+   (use (match_operand 0 "pmode_register_operand" "d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "br%S0\t%0")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "#nop!"
+)
+
+(define_insn "cpload_score7"
+  [(unspec_volatile:SI [(const_int 1)] CPLOAD)]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && flag_pic"
+  ".cpload\tr29"
+)
+
+(define_insn "cprestore_use_fp_score7"
+  [(unspec_volatile:SI [(match_operand:SI 0 "" "")] CPRESTORE)
+   (use (reg:SI FP_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && flag_pic"
+  ".cprestore\tr2, %0"
+)
+
+(define_insn "cprestore_use_sp_score7"
+  [(unspec_volatile:SI [(match_operand:SI 0 "" "")] CPRESTORE)
+   (use (reg:SI SP_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && flag_pic"
+  ".cprestore\tr0, %0"
+)
+
+(define_insn "pushsi_score7"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+        (match_operand:SI 1 "register_operand" "d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "push!\t%1, [r0]"
+  [(set_attr "type" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "popsi_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (match_operand:SI 1 "pop_operand" ">"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "pop!\t%0, [r0]"
+  [(set_attr "type" "store")
+   (set_attr "mode" "SI")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "g32reg_operand" "")
+        (match_operand:SI 1 "loreg_operand" ""))
+   (set (match_operand:SI 2 "g32reg_operand" "")
+        (match_operand:SI 3 "hireg_operand" ""))]
+  ""
+  [(parallel
+       [(set (match_dup 0) (match_dup 1))
+        (set (match_dup 2) (match_dup 3))])])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "g32reg_operand" "")
+        (match_operand:SI 1 "hireg_operand" ""))
+   (set (match_operand:SI 2 "g32reg_operand" "")
+        (match_operand:SI 3 "loreg_operand" ""))]
+  ""
+  [(parallel
+       [(set (match_dup 2) (match_dup 3))
+        (set (match_dup 0) (match_dup 1))])])
+
+(define_insn "movhilo"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+          (match_operand:SI 1 "loreg_operand" ""))
+     (set (match_operand:SI 2 "register_operand" "=d")
+          (match_operand:SI 3 "hireg_operand" ""))])]
+  ""
+  "mfcehl\t%2, %0"
+  [(set_attr "type" "fce")
+   (set_attr "mode" "SI")])
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (if_then_else:SI (match_operator 1 "comparison_operator"
+                          [(reg:CC CC_REGNUM) (const_int 0)])
+                         (match_operand:SI 2 "register_operand" "")
+                         (match_operand:SI 3 "register_operand" "")))]
+  ""
+{
+  score_movsicc (operands);
+})
+
+(define_insn "movsicc_internal_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (if_then_else:SI (match_operator 1 "comparison_operator"
+                          [(reg:CC CC_REGNUM) (const_int 0)])
+                         (match_operand:SI 2 "arith_operand" "d")
+                         (match_operand:SI 3 "arith_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mv%C1\t%0, %2"
+  [(set_attr "type" "cndmv")
+   (set_attr "mode" "SI")])
+
+(define_insn "zero_extract_bittst_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (unspec:SI
+                        [(match_operand:SI 0 "register_operand" "*e,d")
+                         (match_operand:SI 1 "const_uimm5" "")]
+                        BITTST)
+                       (const_int 0)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   bittst!\t%0, %c1
+   bittst.c\t%0, %c1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "andsi3_extzh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (and:SI (match_operand:SI 1 "register_operand" "d")
+                (const_int 65535)))]
+  ""
+  "extzh\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")
+   (set_attr "mode" "SI")])
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (clz:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "clz\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (smax:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (smin:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (abs:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "abs\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "sffs"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "d")] SFFS))]
+  "(TARGET_SCORE7D)"
+  "bitrev\t%0, %1, r0\;clz\t%0, %0\;addi\t%0, 0x1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "register_operand")
+        (ffs:SI (match_operand:SI 1 "register_operand")))]
+  "(TARGET_SCORE7D)"
+{
+  emit_insn (gen_sffs (operands[0], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CC_NZmode, CC_REGNUM),
+                          gen_rtx_COMPARE (CC_NZmode, operands[0],
+                                           GEN_INT (33))));
+  if (TARGET_SCORE7D)
+    emit_insn (gen_movsicc_internal_score7 (operands[0],
+               gen_rtx_fmt_ee (EQ, VOIDmode, operands[0], GEN_INT (33)),
+               GEN_INT (0),
+               operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "loreg_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "hireg_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "(TARGET_SCORE7D)"
+  [(parallel
+       [(set (match_dup 0) (match_dup 1))
+        (set (match_dup 2) (match_dup 3))])])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "hireg_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "loreg_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "(TARGET_SCORE7D)"
+  [(parallel
+       [(set (match_dup 2) (match_dup 3))
+        (set (match_dup 0) (match_dup 1))])])
+
+(define_insn "movtohilo"
+  [(parallel
+       [(set (match_operand:SI 0 "loreg_operand" "=l")
+             (match_operand:SI 1 "register_operand" "d"))
+        (set (match_operand:SI 2 "hireg_operand" "=h")
+             (match_operand:SI 3 "register_operand" "d"))])]
+  "(TARGET_SCORE7D)"
+  "mtcehl\t%3, %1"
+  [(set_attr "type" "fce")
+   (set_attr "mode" "SI")])
+
+(define_insn "mulsi3addsi"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,d")
+        (plus:SI (mult:SI (match_operand:SI 2 "register_operand" "d,d,d")
+                          (match_operand:SI 3 "register_operand" "d,d,d"))
+                 (match_operand:SI 1 "register_operand" "0,d,l")))
+   (clobber (reg:SI HI_REGNUM))]
+  "(TARGET_SCORE7D)"
+  "@
+   mad\t%2, %3
+   mtcel%S1\t%1\;mad\t%2, %3
+   mad\t%2, %3\;mfcel%S0\t%0"
+  [(set_attr "mode" "SI")])
+
+(define_insn "mulsi3subsi"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,d,l")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d,d")
+                           (match_operand:SI 3 "register_operand" "d,d,d"))))
+   (clobber (reg:SI HI_REGNUM))]
+  "(TARGET_SCORE7D)"
+  "@
+   msb\t%2, %3
+   mtcel%S1\t%1\;msb\t%2, %3
+   msb\t%2, %3\;mfcel%S0\t%0"
+  [(set_attr "mode" "SI")])
+
+(define_insn "mulsidi3adddi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (plus:DI (mult:DI
+                  (sign_extend:DI (match_operand:SI 2 "register_operand" "%d"))
+                  (sign_extend:DI (match_operand:SI 3 "register_operand" "d")))
+                 (match_operand:DI 1 "register_operand" "0")))]
+  "(TARGET_SCORE7D)"
+  "mad\t%2, %3"
+  [(set_attr "mode" "DI")])
+
+(define_insn "umulsidi3adddi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (plus:DI (mult:DI
+                  (zero_extend:DI (match_operand:SI 2 "register_operand" "%d"))
+                  (zero_extend:DI (match_operand:SI 3 "register_operand" "d")))
+                 (match_operand:DI 1 "register_operand" "0")))]
+  "(TARGET_SCORE7D)"
+  "madu\t%2, %3"
+  [(set_attr "mode" "DI")])
+
+(define_insn "mulsidi3subdi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (minus:DI
+         (match_operand:DI 1 "register_operand" "0")
+         (mult:DI
+          (sign_extend:DI (match_operand:SI 2 "register_operand" "%d"))
+          (sign_extend:DI (match_operand:SI 3 "register_operand" "d")))))]
+  "(TARGET_SCORE7D)"
+  "msb\t%2, %3"
+  [(set_attr "mode" "DI")])
+
+(define_insn "umulsidi3subdi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (minus:DI
+         (match_operand:DI 1 "register_operand" "0")
+         (mult:DI (zero_extend:DI
+                   (match_operand:SI 2 "register_operand" "%d"))
+                  (zero_extend:DI
+                   (match_operand:SI 3 "register_operand" "d")))))]
+  "(TARGET_SCORE7D)"
+  "msbu\t%2, %3"
+  [(set_attr "mode" "DI")])
+
diff --git a/gcc-4.9/gcc/config/score/score.opt b/gcc-4.9/gcc/config/score/score.opt
new file mode 100644
index 000000000..7761fe093
--- /dev/null
+++ b/gcc-4.9/gcc/config/score/score.opt
@@ -0,0 +1,57 @@
+; Options for the Sunnorth port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+meb
+Target RejectNegative Report InverseMask(LITTLE_ENDIAN)
+Generate big-endian code
+
+mel
+Target RejectNegative Report Mask(LITTLE_ENDIAN)
+Generate little-endian code
+
+mnhwloop
+Target RejectNegative Report Mask(NHWLOOP)
+Disable bcnz instruction
+
+muls
+Target RejectNegative Report Mask(ULS)
+Enable unaligned load/store instruction
+
+mscore7
+Target RejectNegative Report Mask(SCORE7)
+Support SCORE 7 ISA
+
+mscore7d
+Target RejectNegative Report Mask(SCORE7D)
+Support SCORE 7D ISA
+
+march=
+Target RejectNegative Joined Enum(score_arch)
+Specify the name of the target architecture
+
+Enum
+Name(score_arch) Type(int)
+Known SCORE architectures (for use with the -march= option):
+
+EnumValue
+Enum(score_arch) String(score7) Value(MASK_SCORE7)
+
+EnumValue
+Enum(score_arch) String(score7d) Value(MASK_SCORE7 | MASK_SCORE7D)
diff --git a/gcc-4.9/gcc/config/sh/constraints.md b/gcc-4.9/gcc/config/sh/constraints.md
new file mode 100644
index 000000000..17a448fc0
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/constraints.md
@@ -0,0 +1,324 @@
+;; Constraint definitions for Renesas / SuperH SH.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Overview of uppercase letter constraints:
+;; Bxx: miscellaneous constraints
+;;  Bsc: SCRATCH - for the scratch register in movsi_ie in the
+;;       fldi0 / fldi0 cases
+;; Cxx: Constants other than only CONST_INT
+;;  Css: signed 16-bit constant, literal or symbolic
+;;  Csu: unsigned 16-bit constant, literal or symbolic
+;;  Csy: label or symbol
+;;  Cpg: non-explicit constants that can be directly loaded into a general
+;;       purpose register in PIC code.  Like 's' except we don't allow
+;;       PIC_ADDR_P
+;; IJKLMNOP: CONT_INT constants
+;;  Ixx: signed xx bit
+;;  J16: 0xffffffff00000000 | 0x00000000ffffffff
+;;  Jmb: 0x000000FF
+;;  Jmw: 0x0000FFFF
+;;  Jhb: 0x80000000
+;;  Kxx: unsigned xx bit
+;;  M: 1
+;;  N: 0
+;;  P27: 1 | 2 | 8 | 16
+;;  Pso: 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128
+;;  Psz: ~1 | ~2 | ~4 | ~8 | ~16 | ~32 | ~64 | ~128
+;; G: Floating point 0
+;; H: Floating point 1
+;; Q: pc relative load operand
+;; Rxx: reserved for exotic register classes.
+;; Sxx: extra memory constraints
+;;  Sua: unaligned memory address
+;;  Sbv: QImode address without displacement
+;;  Sbw: QImode address with 12 bit displacement
+;;  Snd: address without displacement
+;;  Sdd: address with displacement
+;;  Sra: simple register address
+;; W: vector
+;; Z: zero in any mode
+;;
+;; unused CONST_INT constraint letters: LO
+;; unused EXTRA_CONSTRAINT letters: D T U Y
+
+;; Register constraints
+(define_register_constraint "a" "ALL_REGS"
+  "@internal")
+
+(define_register_constraint "b" "TARGET_REGS"
+  "Branch target registers.")
+
+(define_register_constraint "c" "FPSCR_REGS"
+  "Floating-point status register.")
+
+(define_register_constraint "d" "DF_REGS"
+  "Double precision floating-point register.")
+
+(define_register_constraint "e" "TARGET_FMOVD ? NO_REGS : FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "f" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "k" "SIBCALL_REGS"
+  "@internal")
+
+(define_register_constraint "l" "PR_REGS"
+  "PR register.")
+
+(define_register_constraint "t" "T_REGS"
+  "T register.")
+
+(define_register_constraint "u" "NON_SP_REGS"
+  "Non-stack-pointer register.")
+
+(define_register_constraint "w" "FP0_REGS"
+  "Floating-point register 0.")
+
+(define_register_constraint "x" "MAC_REGS"
+  "MACH and MACL registers.")
+
+(define_register_constraint "y" "FPUL_REGS"
+  "FPUL register.")
+
+(define_register_constraint "z" "R0_REGS"
+  "R0 register.")
+
+;; Integer constraints
+(define_constraint "I06"
+  "A signed 6-bit constant, as used in SHmedia beqi, bnei and xori."
+  (and (match_code "const_int")
+       (match_test "ival >= -32 && ival <= 31")))
+
+(define_constraint "I08"
+  "A signed 8-bit constant, as used in add, sub, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+(define_constraint "I10"
+  "A signed 10-bit constant, as used in SHmedia andi, ori."
+  (and (match_code "const_int")
+       (match_test "ival >= -512 && ival <= 511")))
+
+(define_constraint "I16"
+  "A signed 16-bit constant, as used in SHmedia movi."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "I20"
+  "A signed 20-bit constant, as used in SH2A movi20."
+  (and (match_code "const_int")
+       (match_test "ival >= -524288 && ival <= 524287")
+       (match_test "TARGET_SH2A")))
+
+(define_constraint "I28"
+  "A signed 28-bit constant, as used in SH2A movi20s."
+  (and (match_code "const_int")
+       (match_test "ival >=  -134217728 && ival <= 134217727")
+       (match_test "(ival & 255) == 0")
+       (match_test "TARGET_SH2A")))
+
+(define_constraint "J16"
+  "0xffffffff00000000 or 0x00000000ffffffff."
+  (and (match_code "const_int")
+       (match_test "CONST_OK_FOR_J16 (ival)")))
+
+(define_constraint "Jmb"
+  "Low byte mask constant 0x000000FF"
+  (and (match_code "const_int")
+       (match_test "ival == 0xFF")))
+
+(define_constraint "Jmw"
+  "Low word mask constant 0x0000FFFF"
+  (and (match_code "const_int")
+       (match_test "ival == 0xFFFF")))
+
+(define_constraint "Jhb"
+  "Highest bit constant"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xFFFFFFFF) == 0x80000000")))
+
+(define_constraint "K03"
+  "An unsigned 3-bit constant, as used in SH2A bclr, bset, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+(define_constraint "K04"
+  "An unsigned 4-bit constant, as used in mov.b displacement addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 15")))
+
+(define_constraint "K05"
+  "An unsigned 5-bit constant, as used in mov.w displacement addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "K08"
+  "An unsigned 8-bit constant, as used in and, or, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 255")))
+ 
+(define_constraint "K12"
+  "An unsigned 12-bit constant, as used in SH2A 12-bit mov.b displacement
+   addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 4095")))
+
+(define_constraint "K13"
+  "An unsigned 13-bit constant, as used in SH2A 12-bit mov.w displacement
+   addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 8191")))
+
+(define_constraint "K16"
+  "An unsigned 16-bit constant, as used in SHmedia shori."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+ 
+(define_constraint "P27"
+  "A constant for shift operand 1,2,8 or 16."
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 2 || ival == 8 || ival == 16")))
+
+(define_constraint "M"
+  "Integer constant 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "N"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Double constant 0."
+  (and (match_code "const_double")
+       (match_test "fp_zero_operand (op) && fldi_ok ()")))
+
+(define_constraint "H"
+  "Double constant 1."
+  (and (match_code "const_double")
+       (match_test "fp_one_operand (op) && fldi_ok ()")))
+
+;; Extra constraints
+(define_constraint "Q"
+  "A pc relative load operand."
+  (and (match_code "mem")
+       (match_test "GET_MODE (op) != QImode")
+       (match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "Bsc"
+  "Constraint for selecting FLDI0 or FLDI1 instruction.  If the clobber
+   operand is not SCRATCH (i.e. REG) then R0 is probably being used,
+   hence mova is being used, hence do not select this pattern."
+  (match_code "scratch"))
+
+(define_constraint "Css"
+  "A signed 16-bit constant, literal or symbolic."
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_S16")))
+
+(define_constraint "Csu"
+  "An unsigned 16-bit constant, literal or symbolic."
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_U16")))
+
+(define_constraint "Csy"
+  "A label or a symbol."
+  (ior (match_test "NON_PIC_REFERENCE_P (op)")
+       (match_test "PIC_ADDR_P (op)")))
+
+(define_constraint "Z"
+  "A zero in any shape or form."
+  (match_test "op == CONST0_RTX (GET_MODE (op))"))
+
+(define_constraint "W"
+  "Any vector constant we can handle."
+  (and (match_code "const_vector")
+       (ior (match_test "sh_rep_vec (op, VOIDmode)")
+	    (match_test "HOST_BITS_PER_WIDE_INT >= 64
+			 ? sh_const_vec (op, VOIDmode)
+			 : sh_1el_vec (op, VOIDmode)"))))
+
+(define_constraint "Cpg"
+  "A non-explicit constant that can be loaded directly into a general
+   purpose register.  This is like 's' except we don't allow
+   PIC_ADDR_P."
+  (match_test "IS_NON_EXPLICIT_CONSTANT_P (op)"))
+
+(define_constraint "Pso"
+  "Integer constant with a single bit set in its lower 8-bit."
+  (and (match_code "const_int")
+       (ior (match_test "ival == 1")
+	    (match_test "ival == 2")
+	    (match_test "ival == 4")
+	    (match_test "ival == 8")
+	    (match_test "ival == 16")
+	    (match_test "ival == 32")
+	    (match_test "ival == 64")
+	    (match_test "ival == 128"))))
+
+(define_constraint "Psz"
+  "Integer constant with a single zero bit in the lower 8-bit."
+  (and (match_code "const_int")
+       (ior (match_test "~ival == 1")
+	    (match_test "~ival == 2")
+	    (match_test "~ival == 4")
+	    (match_test "~ival == 8")
+	    (match_test "~ival == 16")
+	    (match_test "~ival == 32")
+	    (match_test "~ival == 64")
+	    (match_test "~ival == 128"))))
+
+(define_memory_constraint "Sua"
+  "@internal"
+  (and (match_test "memory_operand (op, GET_MODE (op))")
+       (match_test "GET_CODE (XEXP (op, 0)) != PLUS")))
+
+(define_memory_constraint "Sdd"
+  "A memory reference that uses displacement addressing."
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "CONST_INT_P (XEXP (XEXP (op, 0), 1))")))
+
+(define_memory_constraint "Snd"
+  "A memory reference that excludes displacement addressing."
+  (and (match_code "mem")
+       (match_test "! satisfies_constraint_Sdd (op)")))
+
+(define_memory_constraint "Sbv"
+  "A memory reference, as used in SH2A bclr.b, bset.b, etc."
+  (and (match_test "MEM_P (op) && GET_MODE (op) == QImode")
+       (match_test "REG_P (XEXP (op, 0))")))
+
+(define_memory_constraint "Sbw"
+  "A memory reference, as used in SH2A bclr.b, bset.b, etc."
+  (and (match_test "satisfies_constraint_Sdd (op)")
+       (match_test "GET_MODE (op) == QImode")
+       (match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
+
+(define_memory_constraint "Sra"
+  "A memory reference that uses simple register addressing."
+  (and (match_test "MEM_P (op)")
+       (match_test "REG_P (XEXP (op, 0))")))
+
diff --git a/gcc-4.9/gcc/config/sh/divcost-analysis b/gcc-4.9/gcc/config/sh/divcost-analysis
new file mode 100644
index 000000000..9fb6e6fa5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divcost-analysis
@@ -0,0 +1,88 @@
+Analysis of cycle costs for SH4:
+
+-> udiv_le128:            5
+-> udiv_ge64k:            6
+-> udiv udiv_25:         10
+-> pos_divisor:           3
+-> pos_result linear:     5
+-> pos_result - -:        5
+-> div_le128:             7
+-> div_ge64k:             9
+sdivsi3 -> udiv_25             13
+udiv25 -> div_ge64k_end:       15
+div_ge64k_end -> rts:          13
+div_le128 -> div_le128_2:       2, r1 latency 3
+udiv_le128 -> div_le128_2:      2, r1 latency 3
+(u)div_le128 -> div_by_1:       9
+(u)div_le128 -> rts:           17
+div_by_1(_neg) -> rts:          4
+div_ge64k -> div_r8:            2
+div_ge64k -> div_ge64k_2:       3
+udiv_ge64k -> udiv_r8:          3
+udiv_ge64k -> div_ge64k_2:      3 + LS
+(u)div_ge64k -> div_ge64k_end: 13
+div_r8 -> div_r8_2:             2
+udiv_r8 -> div_r8_2:            2 + LS
+(u)div_r8 -> rts:              21
+
+-> - + neg_result:             5
+-> + - neg_result:             5
+-> div_le128_neg:              7
+-> div_ge64k_neg:              9
+-> div_r8_neg:                11
+-> <64k div_ge64k_neg_end:    28
+-> >=64k div_ge64k_neg_end:   22
+div_ge64k_neg_end ft -> rts:  14
+div_r8_neg_end -> rts:         4
+div_r8_neg -> div_r8_neg_end: 18
+div_le128_neg -> div_by_1_neg: 4
+div_le128_neg -> rts          18
+
+         sh4-200    absolute divisor range:
+            1  [2..128]  [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256|
+udiv       18     22         38            32                   30
+sdiv pos:  20     24         41            35                   32
+sdiv neg:  15     25         42            36                   33
+
+         sh4-300    absolute divisor range:
+                 8 bit      16 bit       24 bit              > 24 bit
+udiv              15         35            28                   25
+sdiv              14         36            34                   31
+
+
+fp-based:
+
+unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
+signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
+
+call-div1:    divisor range:
+              [1..64K)  >= 64K
+unsigned:       63        58
+signed:         76        76
+
+SFUNC_STATIC call overhead:
+mov.l 0f,r1
+bsrf r1
+
+SFUNC_GOT call overhead - current:
+mov.l 0f,r1
+mova 0f,r0
+mov.l 1f,r2
+add r1,r0
+mov.l @(r0,r2),r0
+jmp @r0
+; 3 cycles worse than SFUNC_STATIC
+
+SFUNC_GOT call overhead - improved assembler:
+mov.l 0f,r1
+mova 0f,r0
+mov.l @(r0,r1),r0
+jmp @r0
+; 2 cycles worse than SFUNC_STATIC
+
+
+Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc-4.9/gcc/config/sh/divtab-sh4-300.c b/gcc-4.9/gcc/config/sh/divtab-sh4-300.c
new file mode 100644
index 000000000..4941626a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divtab-sh4-300.c
@@ -0,0 +1,77 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Calculate division table for ST40-300 integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@st.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  double q, r, err, max_err = 0, max_s_err = 0;
+
+  puts("/* This table has been generated by divtab-sh4.c.  */");
+  puts ("\t.balign 4");
+  for (i = -128; i < 128; i++)
+    {
+      int n = 0;
+      if (i == 0)
+	{
+	  /* output some dummy number for 1/0.  */
+	  puts ("LOCAL(div_table_clz):\n\t.byte\t0");
+	  continue;
+	}
+      for (j = i < 0 ? -i : i; j < 128; j += j)
+	n++;
+      printf ("\t.byte\t%d\n", n - 7);
+    }
+  puts("\
+/* 1/-128 .. 1/127, normalized.  There is an implicit leading 1 in bit 32,\n\
+   or in bit 33 for powers of two.  */\n\
+	.balign 4");
+  for (i = -128; i < 128; i++)
+    {
+      if (i == 0)
+	{
+	  puts ("LOCAL(div_table_inv):\n\t.long\t0x0");
+	  continue;
+	}
+      j = i < 0 ? -i : i;
+      while (j < 64)
+	j += j;
+      q = 4.*(1<<30)*128/j;
+      r = ceil (q);
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+      err = err * j / 128;
+      if (err > max_s_err)
+	max_s_err = err;
+    }
+  printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+  exit (0);
+}
diff --git a/gcc-4.9/gcc/config/sh/divtab-sh4.c b/gcc-4.9/gcc/config/sh/divtab-sh4.c
new file mode 100644
index 000000000..421571e1e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divtab-sh4.c
@@ -0,0 +1,85 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Calculate division table for SH2..4 integer division
+   Contributed by Joern Rernnecke
+   joern.rennecke@superh.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  double q, r, err, max_err = 0, max_s_err = 0;
+
+  puts("/* This table has been generated by divtab-sh4.c.  */");
+  puts ("\t.balign 4");
+  puts ("LOCAL(div_table_clz):");
+  /* output some dummy number for 1/0.  */
+  printf ("\t.byte\t%d\n", 0);
+  for (i = 1; i <= 128; i++)
+    {
+      int n = 0;
+      if (i == 128)
+	puts ("\
+/* Lookup table translating positive divisor to index into table of\n\
+   normalized inverse.  N.B. the '0' entry is also the last entry of the\n\
+ previous table, and causes an unaligned access for division by zero.  */\n\
+LOCAL(div_table_ix):");
+      for (j = i; j <= 128; j += j)
+	n++;
+      printf ("\t.byte\t%d\n", n - 7);
+    }
+  for (i = 1; i <= 128; i++)
+    {
+      j = i < 0 ? -i : i;
+      while (j < 128)
+	j += j;
+      printf ("\t.byte\t%d\n", j * 2 - 96*4);
+    }
+  puts("\
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */\n\
+	.balign 4\n\
+LOCAL(zero_l):");
+  for (i = 64; i < 128; i++)
+    {
+      if (i == 96)
+	puts ("LOCAL(div_table):");
+      q = 4.*(1<<30)*128/i;
+      r = ceil (q);
+      /* The value for 64 is actually differently scaled that it would
+	 appear from this calculation.  The implicit part is %01, not 10.
+	 Still, since the value in the table is 0 either way, this
+	 doesn't matter here.  Still, the 1/64 entry is effectively a 1/128
+	 entry.  */
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+      err = err * i / 128;
+      if (err > max_s_err)
+	max_s_err = err;
+    }
+  printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+  exit (0);
+}
diff --git a/gcc-4.9/gcc/config/sh/divtab.c b/gcc-4.9/gcc/config/sh/divtab.c
new file mode 100644
index 000000000..40a26eb74
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divtab.c
@@ -0,0 +1,203 @@
+/* Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Calculate division table for SH5Media integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@superh.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+#define BITS 5
+#define N_ENTRIES (1 << BITS)
+#define CUTOFF_BITS 20
+
+#define BIAS (-330)
+
+double max_defect = 0.;
+double max_defect_x;
+
+double min_defect = 1e9;
+double min_defect_x;
+
+double max_defect2 = 0.;
+double max_defect2_x;
+
+double min_defect2 = 0.;
+double min_defect2_x;
+
+double min_defect3 = 01e9;
+double min_defect3_x;
+int min_defect3_val;
+
+double max_defect3 = 0.;
+double max_defect3_x;
+int max_defect3_val;
+
+static double
+note_defect3 (int val, double d2, double y2d, double x)
+{
+  int cutoff_val = val >> CUTOFF_BITS;
+  double cutoff;
+  double defect;
+
+  if (val < 0)
+    cutoff_val++;
+  cutoff = (cutoff_val * (1<<CUTOFF_BITS) - val) * y2d;
+  defect = cutoff + val * d2;
+  if (val < 0)
+    defect = - defect;
+  if (defect > max_defect3)
+    {
+      max_defect3 = defect;
+      max_defect3_x = x;
+      max_defect3_val = val;
+    }
+  if (defect < min_defect3)
+    {
+      min_defect3 = defect;
+      min_defect3_x = x;
+      min_defect3_val = val;
+    }
+}
+
+/* This function assumes 32-bit integers.  */
+static double
+calc_defect (double x, int constant, int factor)
+{
+  double y0 = (constant - (int) floor ((x * factor * 64.))) / 16384.;
+  double y1 = 2 * y0 -y0 * y0 * (x + BIAS / (1.*(1LL<<30)));
+  double y2d0, y2d;
+  int y2d1;
+  double d, d2;
+
+  y1 = floor (y1 * (1024 * 1024 * 1024)) / (1024 * 1024 * 1024);
+  d = y1 - 1 / x;
+  if (d > max_defect)
+    {
+      max_defect = d;
+      max_defect_x = x;
+    }
+  if (d < min_defect)
+    {
+      min_defect = d;
+      min_defect_x = x;
+    }
+  y2d0 = floor (y1 * x * (1LL << 60-16));
+  y2d1 = (int) (long long) y2d0;
+  y2d = - floor ((y1 - y0 / (1<<30-14)) * y2d1) / (1LL<<44);
+  d2 = y1 + y2d - 1/x;
+  if (d2 > max_defect2)
+    {
+      max_defect2 = d2;
+      max_defect2_x = x;
+    }
+  if (d2 < min_defect2)
+    {
+      min_defect2 = d2;
+      min_defect2_x = x;
+    }
+  /* zero times anything is trivially zero.  */
+  note_defect3 ((1 << CUTOFF_BITS) - 1, d2, y2d, x);
+  note_defect3 (1 << CUTOFF_BITS, d2, y2d, x);
+  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS), d2, y2d, x);
+  note_defect3 ((1U << 31) - 1, d2, y2d, x);
+  note_defect3 (-1, d2, y2d, x);
+  note_defect3 (-(1 << CUTOFF_BITS), d2, y2d, x);
+  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS) + 1, d2, y2d, x);
+  note_defect3 (-(1U << 31), d2, y2d, x);
+  return d;
+}
+
+int
+main ()
+{
+  int i;
+  unsigned char factors[N_ENTRIES];
+  short constants[N_ENTRIES];
+  int steps = N_ENTRIES / 2;
+  double step = 1. / steps;
+  double eps30 = 1. / (1024 * 1024 * 1024);
+
+  for (i = 0; i < N_ENTRIES; i++)
+    {
+      double x_low = (i < steps ? 1. : -3.) + i * step;
+      double x_high = x_low + step - eps30;
+      double x_med;
+      int factor, constant;
+      double low_defect, med_defect, high_defect, max_defect;
+
+      factor = (1./x_low- 1./x_high) / step * 256. + 0.5;
+      if (factor == 256)
+	factor = 255;
+      factors[i] = factor;
+      /* Use minimum of error function for x_med.  */
+      x_med = sqrt (256./factor);
+      if (x_low < 0)
+	x_med = - x_med;
+      low_defect = 1. / x_low + x_low * factor / 256.;
+      high_defect = 1. / x_high + x_high * factor / 256.;
+      med_defect = 1. / x_med + x_med * factor / 256.;
+      max_defect
+	= ((low_defect > high_defect) ^ (x_med < 0)) ? low_defect : high_defect;
+      constant = (med_defect + max_defect) * 0.5 * 16384. + 0.5;
+      if (constant < -32768 || constant > 32767)
+	abort ();
+      constants[i] = constant;
+      calc_defect (x_low, constant, factor);
+      calc_defect (x_med, constant, factor);
+      calc_defect (x_high, constant, factor);
+    }
+    printf ("/* This table has been generated by divtab.c .\n");
+    printf ("Defects for bias %d:\n", BIAS);
+    printf ("   Max defect: %e at %e\n", max_defect, max_defect_x);
+    printf ("   Min defect: %e at %e\n", min_defect, min_defect_x);
+    printf ("   Max 2nd step defect: %e at %e\n", max_defect2, max_defect2_x);
+    printf ("   Min 2nd step defect: %e at %e\n", min_defect2, min_defect2_x);
+    printf ("   Max div defect: %e at %d:%e\n", max_defect3, max_defect3_val,
+						max_defect3_x);
+    printf ("   Min div defect: %e at %d:%e\n", min_defect3, min_defect3_val,
+						min_defect3_x);
+    printf ("   Defect at 1: %e\n",
+	    calc_defect (1., constants[0], factors[0]));
+    printf ("   Defect at -2: %e */\n",
+	    calc_defect (-2., constants[steps], factors[steps]));
+    printf ("\t.section\t.rodata\n");
+    printf ("\t.balign 2\n");
+    printf ("/* negative division constants */\n");
+    for (i = steps; i < 2 * steps; i++)
+      printf ("\t.word\t%d\n", constants[i]);
+    printf ("/* negative division factors */\n");
+    for (i = steps; i < 2*steps; i++)
+      printf ("\t.byte\t%d\n", factors[i]);
+    printf ("\t.skip %d\n", steps);
+    printf ("\t.global	GLOBAL(div_table):\n");
+    printf ("GLOBAL(div_table):\n");
+    printf ("\t.skip %d\n", steps);
+    printf ("/* positive division factors */\n");
+    for (i = 0; i < steps; i++)
+      printf ("\t.byte\t%d\n", factors[i]);
+    printf ("/* positive division constants */\n");
+    for (i = 0; i < steps; i++)
+      printf ("\t.word\t%d\n", constants[i]);
+  exit (0);
+}
diff --git a/gcc-4.9/gcc/config/sh/elf.h b/gcc-4.9/gcc/config/sh/elf.h
new file mode 100644
index 000000000..24b5c9815
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/elf.h
@@ -0,0 +1,88 @@
+/* Definitions of target machine for gcc for Renesas / SuperH SH using ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <ian@cygnus.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Let sh.c know this is ELF.  */
+#undef TARGET_ELF
+#define TARGET_ELF 1
+
+/* Generate DWARF2 debugging information and make it the default.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Use a more compact format for line information.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#undef WCHAR_TYPE
+/* #define WCHAR_TYPE (TARGET_SH5 ? "int" : "long int")  */
+#define WCHAR_TYPE SH_ELF_WCHAR_TYPE
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* The prefix to add to user-visible assembler symbols.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int")
+
+/* Pass -ml and -mrelax to the assembler and linker.  */
+#undef ASM_SPEC
+#define ASM_SPEC SH_ASM_SPEC
+
+#undef LINK_SPEC
+#define LINK_SPEC SH_LINK_SPEC
+#undef LINK_EMUL_PREFIX
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define LINK_EMUL_PREFIX "sh%{!mb:l}elf"
+#else
+#define LINK_EMUL_PREFIX "sh%{ml:l}elf"
+#endif
+
+#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO)
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+  sprintf ((STRING), "*%s%s%ld", LOCAL_LABEL_PREFIX, (PREFIX), (long)(NUM))
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1.o%s} crti.o%s \
+   %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* ASM_OUTPUT_CASE_LABEL is defined in elfos.h.  With it,
+   a redundant .align was generated.  */
+#undef  ASM_OUTPUT_CASE_LABEL
diff --git a/gcc-4.9/gcc/config/sh/embed-elf.h b/gcc-4.9/gcc/config/sh/embed-elf.h
new file mode 100644
index 000000000..380425751
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/embed-elf.h
@@ -0,0 +1,36 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH 
+   non-Linux embedded targets.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by J"orn Rennecke <joern.rennecke@superh.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* While the speed-optimized implementations of udivsi3_i4i / sdivsi3_i4i
+   in libgcc are not available for SH2, the space-optimized ones in
+   libgcc-Os-4-200 are.  Thus, when not optimizing for space, link
+   libgcc-Os-4-200 after libgcc, so that -mdiv=call-table works for -m2.  */
+#define LIBGCC_SPEC "%{!shared: \
+  %{m4-100*:-lic_invalidate_array_4-100} \
+  %{m4-200*:-lic_invalidate_array_4-200} \
+  %{m4-300*|m4-340:-lic_invalidate_array_4a %{!Os: -lgcc-4-300}} \
+  %{m4a*:-lic_invalidate_array_4a}} \
+  %{Os: -lgcc-Os-4-200} \
+  -lgcc \
+  %{!Os: -lgcc-Os-4-200}"
diff --git a/gcc-4.9/gcc/config/sh/iterators.md b/gcc-4.9/gcc/config/sh/iterators.md
new file mode 100644
index 000000000..5f020c72a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/iterators.md
@@ -0,0 +1,46 @@
+;; Iterator definitions for GCC SH machine description files.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator QIHISIDI [QI HI SI DI])
+(define_mode_iterator QIHISI [QI HI SI])
+(define_mode_iterator QIHI [QI HI])
+(define_mode_iterator HISI [HI SI])
+(define_mode_iterator SIDI [SI DI])
+
+;; Mode attributes that can be used as the instruction suffix for mode
+;; variant instructions.
+(define_mode_attr bw [(QI "b") (HI "w")])
+(define_mode_attr bwl [(QI "b") (HI "w") (SI "l")])
+
+;; Sign/zero-extension code iterator.
+(define_code_iterator SZ_EXTEND [sign_extend zero_extend])
+
+;; Mode attributes for mov.b and mov.w displacement constraints.
+(define_mode_attr disp04 [(QI "K04") (HI "K05")])
+(define_mode_attr disp12 [(QI "K12") (HI "K13")])
+
+;; Return codes.
+(define_code_iterator any_return [return simple_return])
+
+;; Lowpart subreg byte position code attributes for big and little endian.
+(define_mode_attr lowpart_be [(QI "3") (HI "2")])
+(define_mode_attr lowpart_le [(QI "0") (HI "0")])
+
+;; Signed minimum/maximum code iterator.
+(define_code_iterator SMIN_SMAX [smin smax])
diff --git a/gcc-4.9/gcc/config/sh/linux.h b/gcc-4.9/gcc/config/sh/linux.h
new file mode 100644
index 000000000..c0a4ebd3e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/linux.h
@@ -0,0 +1,152 @@
+/* Definitions for SH running Linux-based GNU systems using ELF
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+
+/* Enable DWARF 2 exceptions.  */
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 1
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "\
+   %{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS} \
+"
+
+#define TARGET_OS_CPP_BUILTINS() \
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+    }						\
+  while (0)
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT | TARGET_OPT_DEFAULT)
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#undef SUBTARGET_LINK_SPEC
+#define SUBTARGET_LINK_SPEC \
+  "%{shared:-shared} \
+   %{!static: \
+     %{rdynamic:-export-dynamic} \
+     -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+   %{static:-static}"
+
+/* Output assembler code to STREAM to call the profiler.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM,LABELNO)				\
+  do {									\
+    if (TARGET_SHMEDIA)							\
+      {									\
+	fprintf (STREAM, "\tpt\t1f,tr1\n");				\
+	fprintf (STREAM, "\taddi.l\tr15,-8,r15\n");			\
+	fprintf (STREAM, "\tst.l\tr15,0,r18\n");			\
+	if (flag_pic)							\
+	  {								\
+	    const char *gofs = "(datalabel _GLOBAL_OFFSET_TABLE_-(0f-.))"; \
+	    fprintf (STREAM, "\tmovi\t((%s>>16)&0xffff),r21\n", gofs);	\
+	    fprintf (STREAM, "\tshori\t(%s & 0xffff),r21\n", gofs);	\
+	    fprintf (STREAM, "0:\tptrel/u\tr21,tr0\n");			\
+	    fprintf (STREAM, "\tmovi\t((mcount@GOTPLT)&0xffff),r22\n");	\
+	    fprintf (STREAM, "\tgettr\ttr0,r21\n");			\
+	    fprintf (STREAM, "\tadd.l\tr21,r22,r21\n");			\
+	    fprintf (STREAM, "\tld.l\tr21,0,r21\n");			\
+	    fprintf (STREAM, "\tptabs\tr21,tr0\n");			\
+	  }								\
+	else								\
+	  fprintf (STREAM, "\tpt\tmcount,tr0\n");			\
+	fprintf (STREAM, "\tgettr\ttr1,r18\n");				\
+	fprintf (STREAM, "\tblink\ttr0,r63\n");				\
+	fprintf (STREAM, "1:\tld.l\tr15,0,r18\n");			\
+	fprintf (STREAM, "\taddi.l\tr15,8,r15\n");			\
+      }									\
+    else								\
+      {									\
+	if (flag_pic)							\
+	  {								\
+	    fprintf (STREAM, "\tmov.l\t3f,r1\n");			\
+	    fprintf (STREAM, "\tmova\t3f,r0\n");			\
+	    fprintf (STREAM, "\tadd\tr1,r0\n");				\
+	    fprintf (STREAM, "\tmov.l\t1f,r1\n");			\
+	    fprintf (STREAM, "\tmov.l\t@(r0,r1),r1\n");			\
+	  }								\
+	else								\
+	  fprintf (STREAM, "\tmov.l\t1f,r1\n");				\
+	fprintf (STREAM, "\tsts.l\tpr,@-r15\n");			\
+	fprintf (STREAM, "\tmova\t2f,r0\n");				\
+	fprintf (STREAM, "\tjmp\t@r1\n");				\
+	fprintf (STREAM, "\tlds\tr0,pr\n");				\
+	fprintf (STREAM, "\t.align\t2\n");				\
+	if (flag_pic)							\
+	  {								\
+	    fprintf (STREAM, "1:\t.long\tmcount@GOT\n");		\
+	    fprintf (STREAM, "3:\t.long\t_GLOBAL_OFFSET_TABLE_\n");	\
+	  }								\
+	else								\
+	  fprintf (STREAM, "1:\t.long\tmcount\n");			\
+	fprintf (STREAM, "2:\tlds.l\t@r15+,pr\n");			\
+      }									\
+  } while (0)
+
+/* For SH3 and SH4, we use a slot of the unwind frame which correspond
+   to a fake register number 16 as a placeholder for the return address
+   in MD_FALLBACK_FRAME_STATE_FOR and its content will be read with
+   _Unwind_GetGR which uses dwarf_reg_size_table to get the size of
+   the register.  So the entry of dwarf_reg_size_table corresponding to
+   this slot must be set.  To do this, we redefine DBX_REGISTER_NUMBER
+   so as to return itself for 16.  */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((! TARGET_SH5 && (REGNO) == 16) ? 16 : SH_DBX_REGISTER_NUMBER (REGNO))
+
+/* Since libgcc is compiled with -fpic for this target, we can't use
+   __sdivsi3_1 as the division strategy for -O0 and -Os.  */
+#undef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2
+#undef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call2"
+
+/* Install the __sync libcalls.  */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS  sh_init_sync_libfuncs
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+  do									\
+    {									\
+      /* Set default atomic model if it hasn't been specified.  */	\
+      if (global_options_set.x_sh_atomic_model_str == 0)		\
+	{								\
+	  if (TARGET_SH3)						\
+	    sh_atomic_model_str = "soft-gusa";				\
+	  else if (TARGET_SH1)						\
+	    sh_atomic_model_str = "soft-imask";				\
+	}								\
+      /* Set -musermode if it hasn't been specified.  */		\
+      if (global_options_set.x_TARGET_USERMODE == 0)			\
+	TARGET_USERMODE = true;						\
+    }									\
+  while (0)
diff --git a/gcc-4.9/gcc/config/sh/little.h b/gcc-4.9/gcc/config/sh/little.h
new file mode 100644
index 000000000..8ab61ea5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/little.h
@@ -0,0 +1,21 @@
+/* Definition of little endian SH machine for GNU compiler.
+
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_ENDIAN_DEFAULT MASK_LITTLE_ENDIAN
diff --git a/gcc-4.9/gcc/config/sh/netbsd-elf.h b/gcc-4.9/gcc/config/sh/netbsd-elf.h
new file mode 100644
index 000000000..8100cee5d
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/netbsd-elf.h
@@ -0,0 +1,106 @@
+/* Definitions for SH running NetBSD using ELF
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+
+/* Extra specs needed for NetBSD SuperH ELF targets.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+      NETBSD_OS_CPP_BUILTINS_ELF();					\
+      builtin_define ("__NO_LEADING_UNDERSCORES__");			\
+    }									\
+  while (0)
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/sh ELF target.
+   We use the SH_LINK_SPEC from sh/sh.h, and define the appropriate
+   SUBTARGET_LINK_SPEC that pulls in what we need from a generic
+   NetBSD ELF LINK_SPEC.  */
+
+/* LINK_EMUL_PREFIX from sh/elf.h */
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_nbsd"
+
+#undef SUBTARGET_LINK_SPEC
+#define SUBTARGET_LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#undef LINK_SPEC
+#define LINK_SPEC SH_LINK_SPEC
+
+#define NETBSD_ENTRY_POINT "__start"
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT)
+
+/* Define because we use the label and we do not need them.  */
+#define NO_PROFILE_COUNTERS 1
+ 
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM,LABELNO)				\
+do									\
+  {									\
+    if (TARGET_SHMEDIA32 || TARGET_SHMEDIA64)				\
+      {									\
+	/* FIXME */							\
+	sorry ("unimplemented-shmedia profiling");			\
+      }									\
+    else								\
+      {									\
+        fprintf((STREAM), "\tmov.l\t%sLP%d,r1\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "\tmova\t%sLP%dr,r0\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "\tjmp\t@r1\n");				\
+        fprintf((STREAM), "\tnop\n");					\
+        fprintf((STREAM), "\t.align\t2\n");				\
+        fprintf((STREAM), "%sLP%d:\t.long\t__mcount\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "%sLP%dr:\n", LOCAL_LABEL_PREFIX, (LABELNO));	\
+      }									\
+  }									\
+while (0)
+
+/* Since libgcc is compiled with -fpic for this target, we can't use
+   __sdivsi3_1 as the division strategy for -O0 and -Os.  */
+#undef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2
+#undef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call2"
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+  do									\
+    {									\
+      /* Set -musermode if it hasn't been specified.  */		\
+      if (global_options_set.x_TARGET_USERMODE == 0)			\
+	TARGET_USERMODE = true;						\
+    }									\
+  while (0)
diff --git a/gcc-4.9/gcc/config/sh/newlib.h b/gcc-4.9/gcc/config/sh/newlib.h
new file mode 100644
index 000000000..d3fcf150c
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/newlib.h
@@ -0,0 +1,29 @@
+/* Definitions of target machine for gcc for Super-H using sh-superh-elf.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This header file is used when with_libgloss is enabled during gcc
+   configuration.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lgloss"
+
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C 1
+
diff --git a/gcc-4.9/gcc/config/sh/predicates.md b/gcc-4.9/gcc/config/sh/predicates.md
new file mode 100644
index 000000000..31f2e1f5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/predicates.md
@@ -0,0 +1,1201 @@
+;; Predicate definitions for Renesas / SuperH SH.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; TODO: Add a comment here.
+(define_predicate "trapping_target_operand"
+  (match_code "if_then_else")
+{
+  rtx cond, mem, res, tar, and_expr;
+
+  if (GET_MODE (op) != PDImode)
+    return 0;
+  cond = XEXP (op, 0);
+  mem = XEXP (op, 1);
+  res = XEXP (op, 2);
+  if (!MEM_P (mem)
+      || (GET_CODE (res) != SIGN_EXTEND && GET_CODE (res) != TRUNCATE))
+    return 0;
+  tar = XEXP (res, 0);
+  if (!rtx_equal_p (XEXP (mem, 0), tar)
+      || GET_MODE (tar) != Pmode)
+    return 0;
+  if (GET_CODE (cond) == CONST)
+    {
+      cond = XEXP (cond, 0);
+      if (!satisfies_constraint_Csy (tar))
+	return 0;
+      if (GET_CODE (tar) == CONST)
+	tar = XEXP (tar, 0);
+    }
+  else if (!arith_reg_operand (tar, VOIDmode)
+	   && ! satisfies_constraint_Csy (tar))
+    return 0;
+  if (GET_CODE (cond) != EQ)
+    return 0;
+  and_expr = XEXP (cond, 0);
+  return (GET_CODE (and_expr) == AND
+	  && rtx_equal_p (XEXP (and_expr, 0), tar)
+	  && CONST_INT_P (XEXP (and_expr, 1))
+	  && CONST_INT_P (XEXP (cond, 1))
+	  && INTVAL (XEXP (and_expr, 1)) == 3
+	  && INTVAL (XEXP (cond, 1)) == 3);
+})
+
+;; A logical operand that can be used in an shmedia and insn.
+(define_predicate "and_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (logical_operand (op, mode))
+    return 1;
+
+  /* Check mshflo.l / mshflhi.l opportunities.  */
+  if (TARGET_SHMEDIA
+      && mode == DImode
+      && satisfies_constraint_J16 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like arith_reg_dest, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+(define_special_predicate "any_arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  return arith_reg_dest (op, mode);
+})
+
+;; Like register_operand, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+(define_special_predicate "any_register_operand"
+  (match_code "subreg,reg")
+{
+  return register_operand (op, mode);
+})
+
+;; Returns 1 if OP is a valid source operand for an arithmetic insn.
+(define_predicate "arith_operand"
+  (match_code "subreg,reg,const_int,truncate")
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    {
+      /* FIXME: We should be checking whether the CONST_INT fits in a
+	 signed 16-bit here, but this causes reload_cse to crash when
+	 attempting to transform a sequence of two 64-bit sets of the
+	 same register from literal constants into a set and an add,
+	 when the difference is too wide for an add.  */
+      if (CONST_INT_P (op)
+	  || satisfies_constraint_Css (op))
+	return 1;
+      else if (GET_CODE (op) == TRUNCATE
+	       && REG_P (XEXP (op, 0))
+	       && ! system_reg_operand (XEXP (op, 0), VOIDmode)
+	       && (mode == VOIDmode || mode == GET_MODE (op))
+	       && (GET_MODE_SIZE (GET_MODE (op))
+		   < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
+	       && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
+		   || GET_MODE_SIZE (GET_MODE (op)) == 4))
+	return register_operand (XEXP (op, 0), VOIDmode);
+      else
+	return 0;
+    }
+  else if (satisfies_constraint_I08 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like above, but for DImode destinations: forbid paradoxical DImode
+;; subregs, because this would lead to missing sign extensions when
+;; truncating from DImode to SImode.
+(define_predicate "arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  if (mode == DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
+      && TARGET_SHMEDIA)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns 1 if OP is a normal arithmetic register.
+(define_predicate "arith_reg_operand"
+  (match_code "subreg,reg,sign_extend")
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (REG_P (op))
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)))
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno != T_REG && regno != PR_REG
+	      && ! TARGET_REGISTER_P (regno)
+	      && regno != FPUL_REG
+	      && regno != MACH_REG && regno != MACL_REG);
+    }
+  /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
+     We allow SImode here, as not using an FP register is just a matter of
+     proper register allocation.  */
+  if (TARGET_SHMEDIA
+      && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
+      && GET_MODE (XEXP (op, 0)) == SImode
+      && GET_CODE (XEXP (op, 0)) != SUBREG)
+    return register_operand (XEXP (op, 0), VOIDmode);
+#if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars.  */
+  if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
+      && GET_MODE (XEXP (op, 0)) == HImode
+      && REG_P (XEXP (op, 0))
+      && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
+    return register_operand (XEXP (op, 0), VOIDmode);
+#endif
+  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
+      && GET_CODE (op) == SUBREG
+      && GET_MODE (SUBREG_REG (op)) == DImode
+      && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
+      && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
+      && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
+    return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
+  return 0;
+})
+
+;; Returns 1 if OP is a valid source operand for a compare insn.
+(define_predicate "arith_reg_or_0_operand"
+  (match_code "subreg,reg,const_int,const_vector")
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (satisfies_constraint_Z (op))
+    return 1;
+
+  return 0;
+})
+
+;; Returns true if OP is either a register or constant 0 or constant 1.
+(define_predicate "arith_reg_or_0_or_1_operand"
+  (match_code "subreg,reg,const_int,const_vector")
+{
+  return arith_reg_or_0_operand (op, mode) || satisfies_constraint_M (op);
+})
+
+;; Returns true if OP is a suitable constant for the minimum value of a
+;; clips.b or clips.w insn.
+(define_predicate "clips_min_const_int"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == -128")
+	    (match_test "INTVAL (op) == -32768"))))
+
+;; Returns true if OP is a suitable constant for the maximum value of a
+;; clips.b or clips.w insn.
+(define_predicate "clips_max_const_int"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == 127")
+	    (match_test "INTVAL (op) == 32767"))))
+
+;; Returns true if OP is a suitable constant for the maximum value of a
+;; clipu.b or clipu.w insn.
+(define_predicate "clipu_max_const_int"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == 255")
+	    (match_test "INTVAL (op) == 65535"))))
+
+;; Returns 1 if OP is a floating point operator with two operands.
+(define_predicate "binary_float_operator"
+  (and (match_code "plus,minus,mult,div")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Returns 1 if OP is a logical operator with two operands.
+(define_predicate "binary_logical_operator"
+  (and (match_code "and,ior,xor")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return 1 if OP is an address suitable for a cache manipulation operation.
+;; MODE has the meaning as in address_operand.
+(define_special_predicate "cache_address_operand"
+  (match_code "plus,reg")
+{
+  if (GET_CODE (op) == PLUS)
+    {
+      if (!REG_P (XEXP (op, 0)))
+	return 0;
+      if (!CONST_INT_P (XEXP (op, 1))
+	  || (INTVAL (XEXP (op, 1)) & 31))
+	return 0;
+    }
+  else if (!REG_P (op))
+    return 0;
+  return address_operand (op, mode);
+})
+
+;; Returns 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu.
+(define_predicate "cmp_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (satisfies_constraint_N (op))
+    return 1;
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP is an operand that can be used as the first operand in
+;; the cstoresi4 expander pattern.
+(define_predicate "cmpsi_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (REG_P (op) && REGNO (op) == T_REG
+      && GET_MODE (op) == SImode
+      && TARGET_SH1)
+    return 1;
+  return arith_operand (op, mode);
+})
+
+;; Returns true if OP is a comutative float operator.
+;; This predicate is currently unused.
+;;(define_predicate "commutative_float_operator"
+;;  (and (match_code "plus,mult")
+;;       (match_test "GET_MODE (op) == mode")))
+
+;; Returns true if OP is a equal or not equal operator.
+(define_predicate "equality_comparison_operator"
+  (match_code "eq,ne"))
+
+;; Returns true if OP is an arithmetic operand that is zero extended during
+;; an operation.
+(define_predicate "extend_reg_operand"
+  (match_code "subreg,reg,truncate")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : arith_reg_operand) (op, mode);
+})
+
+;; Like extend_reg_operand, but also allow a constant 0.
+(define_predicate "extend_reg_or_0_operand"
+  (match_code "subreg,reg,truncate,const_int")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : arith_reg_or_0_operand) (op, mode);
+})
+
+;; Like arith_reg_operand, but this predicate does not accept SIGN_EXTEND.
+(define_predicate "ext_dest_operand"
+  (match_code "subreg,reg")
+{
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP can be used as a destination register for shmedia floating
+;; point to integer conversions.
+(define_predicate "fp_arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  if (mode == DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
+    return 0;
+  return fp_arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP is a floating point register that can be used in floating
+;; point arithmetic operations.
+(define_predicate "fp_arith_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (REG_P (op))
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)))
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno >= FIRST_PSEUDO_REGISTER
+	      || FP_REGISTER_P (regno));
+    }
+  return 0;
+})
+
+;; Returns true if OP is the FPSCR.
+(define_predicate "fpscr_operand"
+  (match_code "reg")
+{
+  return (REG_P (op)
+	  && (REGNO (op) == FPSCR_REG
+	      || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		  && !(reload_in_progress || reload_completed)))
+	  && GET_MODE (op) == PSImode);
+})
+
+;; Returns true if OP is an operand that is either the fpul hard reg or
+;; a pseudo.  This prevents combine from propagating function arguments
+;; in hard regs into insns that need the operand in fpul.  If it's a pseudo
+;; reload can fix it up.
+(define_predicate "fpul_operand"
+  (match_code "reg")
+{
+  if (TARGET_SHMEDIA)
+    return fp_arith_reg_operand (op, mode);
+
+  return (REG_P (op)
+	  && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
+	  && GET_MODE (op) == mode);
+})
+
+;; Returns true if OP is a valid fpul input operand for the fsca insn.
+;; The value in fpul is a fixed-point value and its scaling is described
+;; in the fsca insn by a mult:SF.  To allow pre-scaled fixed-point inputs
+;; in fpul we have to permit things like
+;;   (reg:SI)
+;;   (fix:SF (float:SF (reg:SI)))
+(define_predicate "fpul_fsca_operand"
+  (match_code "fix,reg")
+{
+  if (fpul_operand (op, SImode))
+    return true;
+  if (GET_CODE (op) == FIX && GET_MODE (op) == SImode
+      && GET_CODE (XEXP (op, 0)) == FLOAT && GET_MODE (XEXP (op, 0)) == SFmode)
+    return fpul_fsca_operand (XEXP (XEXP (op, 0), 0),
+			      GET_MODE (XEXP (XEXP (op, 0), 0)));
+  return false;
+})
+
+;; Returns true if OP is a valid constant scale factor for the fsca insn.
+(define_predicate "fsca_scale_factor"
+  (and (match_code "const_double")
+       (match_test "op == sh_fsca_int2sf ()")))
+
+;; Returns true if OP is an operand that is zero extended during an operation.
+(define_predicate "general_extend_operand"
+  (match_code "subreg,reg,mem,truncate")
+{
+  if (GET_CODE (op) == TRUNCATE)
+    return arith_operand (op, mode);
+
+  if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))
+    return general_movsrc_operand (op, mode);
+
+  return nonimmediate_operand (op, mode);
+})
+
+;; Returns 1 if OP is a simple register address.
+(define_predicate "simple_mem_operand"
+  (and (match_code "mem")
+       (match_test "arith_reg_operand (XEXP (op, 0), SImode)")))
+
+;; Returns 1 if OP is a valid displacement address.
+(define_predicate "displacement_mem_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "arith_reg_operand (XEXP (XEXP (op, 0), 0), SImode)")
+       (match_test "sh_legitimate_index_p (GET_MODE (op),
+					   XEXP (XEXP (op, 0), 1),
+					   TARGET_SH2A, true)")))
+
+;; Returns true if OP is a displacement address that can fit into a
+;; 16 bit (non-SH2A) memory load / store insn.
+(define_predicate "short_displacement_mem_operand"
+  (match_test "sh_disp_addr_displacement (op)
+	       <= sh_max_mov_insn_displacement (GET_MODE (op), false)"))
+
+;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn.
+(define_predicate "zero_extend_movu_operand"
+  (and (match_operand 0 "displacement_mem_operand")
+       (match_test "GET_MODE (op) == QImode || GET_MODE (op) == HImode")))
+
+;; Returns 1 if the operand can be used in a zero_extend.
+(define_predicate "zero_extend_operand"
+  (ior (and (match_test "TARGET_SHMEDIA")
+	    (match_operand 0 "general_extend_operand"))
+       (and (match_test "! TARGET_SHMEDIA")
+	    (match_operand 0 "arith_reg_operand"))
+       (and (match_test "TARGET_SH2A")
+	    (match_operand 0 "zero_extend_movu_operand"))))
+
+;; Returns 1 if OP can be source of a simple move operation. Same as
+;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as
+;; are subregs of system registers.
+(define_predicate "general_movsrc_operand"
+  (match_code "subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,
+	       const,const_vector")
+{
+  if (t_reg_operand (op, mode))
+    return 0;
+
+  /* Disallow PC relative QImode loads, since these is no insn to do that
+     and an imm8 load should be used instead.  */
+  if (IS_PC_RELATIVE_LOAD_ADDR_P (op) && GET_MODE (op) == QImode)
+    return false;
+
+  if (MEM_P (op))
+    {
+      rtx inside = XEXP (op, 0);
+
+      /* Disallow mems with GBR address here.  They have to go through
+	 separate special patterns.  */
+      if ((REG_P (inside) && REGNO (inside) == GBR_REG)
+	  || (GET_CODE (inside) == PLUS && REG_P (XEXP (inside, 0))
+	      && REGNO (XEXP (inside, 0)) == GBR_REG))
+	return 0;
+
+      if (GET_CODE (inside) == CONST)
+	inside = XEXP (inside, 0);
+
+      if (GET_CODE (inside) == LABEL_REF)
+	return 1;
+
+      if (GET_CODE (inside) == PLUS
+	  && GET_CODE (XEXP (inside, 0)) == LABEL_REF
+	  && CONST_INT_P (XEXP (inside, 1)))
+	return 1;
+
+      /* Only post inc allowed.  */
+      if (GET_CODE (inside) == PRE_DEC)
+	return 0;
+    }
+
+  if (mode == GET_MODE (op)
+      && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+    {
+      rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op);
+      rtx x = XEXP (mem_rtx, 0);
+
+      if ((mode == QImode || mode == HImode)
+	  && GET_CODE (x) == PLUS
+	  && REG_P (XEXP (x, 0))
+	  && CONST_INT_P (XEXP (x, 1)))
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
+
+      /* Allow reg+reg addressing here without validating the register
+	 numbers.  Usually one of the regs must be R0 or a pseudo reg.
+	 In some cases it can happen that arguments from hard regs are
+	 propagated directly into address expressions.  In this cases reload
+	 will have to fix it up later.  However, allow this only for native
+	 1, 2 or 4 byte addresses.  */
+      if (can_create_pseudo_p () && GET_CODE (x) == PLUS
+	  && GET_MODE_SIZE (mode) <= 4
+	  && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
+	return true;
+
+      /* 'general_operand' does not allow volatile mems during RTL expansion to
+	 avoid matching arithmetic that operates on mems, it seems.
+	 On SH this leads to redundant sign extensions for QImode or HImode
+	 loads.  Thus we mimic the behavior but allow volatile mems.  */
+        if (memory_address_addr_space_p (GET_MODE (mem_rtx), x,
+					 MEM_ADDR_SPACE (mem_rtx)))
+	  return true;
+    }
+
+  if (TARGET_SHMEDIA
+      && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
+      && sh_rep_vec (op, mode))
+    return 1;
+  if (TARGET_SHMEDIA && 1
+      && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
+      && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
+    /* FIXME */ abort (); /* return 1; */
+
+  return general_operand (op, mode);
+})
+
+;; Returns 1 if OP is a MEM that does not use displacement addressing.
+(define_predicate "movsrc_no_disp_mem_operand"
+  (match_code "mem")
+{
+  return general_movsrc_operand (op, mode) && satisfies_constraint_Snd (op);
+})
+
+;; Returns 1 if OP can be a destination of a move. Same as
+;; general_operand, but no preinc allowed.
+(define_predicate "general_movdst_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (t_reg_operand (op, mode))
+    return 0;
+
+  if (MEM_P (op))
+    {
+      rtx inside = XEXP (op, 0);
+      /* Disallow mems with GBR address here.  They have to go through
+	 separate special patterns.  */
+      if ((REG_P (inside) && REGNO (inside) == GBR_REG)
+	  || (GET_CODE (inside) == PLUS && REG_P (XEXP (inside, 0))
+	      && REGNO (XEXP (inside, 0)) == GBR_REG))
+	return 0;
+    }
+
+  /* Only pre dec allowed.  */
+  if (MEM_P (op) && GET_CODE (XEXP (op, 0)) == POST_INC)
+    return 0;
+  if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
+      && ! (reload_in_progress || reload_completed))
+    return 0;
+
+  if (mode == GET_MODE (op)
+      && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+    {
+      rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op);
+      rtx x = XEXP (mem_rtx, 0);
+
+      if ((mode == QImode || mode == HImode)
+	  && GET_CODE (x) == PLUS
+	  && REG_P (XEXP (x, 0))
+	  && CONST_INT_P (XEXP (x, 1)))
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
+
+      /* Allow reg+reg addressing here without validating the register
+	 numbers.  Usually one of the regs must be R0 or a pseudo reg.
+	 In some cases it can happen that arguments from hard regs are
+	 propagated directly into address expressions.  In this cases reload
+	 will have to fix it up later.  However, allow this only for native
+	 1, 2 or 4 byte addresses.  */
+      if (can_create_pseudo_p () && GET_CODE (x) == PLUS
+	  && GET_MODE_SIZE (mode) <= 4
+	  && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
+	return true;
+
+      /* 'general_operand' does not allow volatile mems during RTL expansion to
+	 avoid matching arithmetic that operates on mems, it seems.
+	 On SH this leads to redundant sign extensions for QImode or HImode
+	 stores.  Thus we mimic the behavior but allow volatile mems.  */
+        if (memory_address_addr_space_p (GET_MODE (mem_rtx), x,
+					 MEM_ADDR_SPACE (mem_rtx)))
+	  return true;
+    }
+
+  return general_operand (op, mode);
+})
+
+;; Returns 1 if OP is a POST_INC on stack pointer register.
+(define_predicate "sh_no_delay_pop_operand"
+  (match_code "mem")
+{
+  rtx inside;
+  inside = XEXP (op, 0);
+
+  if (GET_CODE (op) == MEM && GET_MODE (op) == SImode 
+      && GET_CODE (inside) == POST_INC 
+      && GET_CODE (XEXP (inside, 0)) == REG
+      && REGNO (XEXP (inside, 0)) == SP_REG)
+    return 1;
+
+  return 0;
+})
+
+;; Returns 1 if OP is a MEM that can be source of a simple move operation.
+(define_predicate "unaligned_load_operand"
+  (match_code "mem")
+{
+  rtx inside;
+
+  if (!MEM_P (op) || GET_MODE (op) != mode)
+    return 0;
+
+  inside = XEXP (op, 0);
+
+  if (GET_CODE (inside) == POST_INC)
+    inside = XEXP (inside, 0);
+
+  if (REG_P (inside))
+    return 1;
+
+  return 0;
+})
+
+;; Returns 1 if OP is a MEM that can be used in "index_disp" combiner
+;; patterns.
+(define_predicate "mem_index_disp_operand"
+  (match_code "mem")
+{
+  rtx plus0_rtx, plus1_rtx, mult_rtx;
+
+  plus0_rtx = XEXP (op, 0);
+  if (GET_CODE (plus0_rtx) != PLUS)
+    return 0;
+
+  plus1_rtx = XEXP (plus0_rtx, 0);
+  if (GET_CODE (plus1_rtx) != PLUS)
+    return 0;
+  if (! arith_reg_operand (XEXP (plus1_rtx, 1), GET_MODE (XEXP (plus1_rtx, 1))))
+    return 0;
+
+  mult_rtx = XEXP (plus1_rtx, 0);
+  if (GET_CODE (mult_rtx) != MULT)
+    return 0;
+  if (! arith_reg_operand (XEXP (mult_rtx, 0), GET_MODE (XEXP (mult_rtx, 0)))
+      || ! CONST_INT_P (XEXP (mult_rtx, 1)))
+    return 0;
+
+  return exact_log2 (INTVAL (XEXP (mult_rtx, 1))) > 0
+	 && sh_legitimate_index_p (mode, XEXP (plus0_rtx, 1), TARGET_SH2A, true);
+})
+
+;; Returns true if OP is some kind of greater comparision.
+(define_predicate "greater_comparison_operator"
+  (match_code "gt,ge,gtu,geu"))
+
+;; Returns true if OP is an operand suitable for shmedia reload_inqi and
+;; reload_inhi insns.
+(define_predicate "inqhi_operand"
+  (match_code "truncate")
+{
+  if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
+    return 0;
+  op = XEXP (op, 0);
+  /* Can't use true_regnum here because copy_cost wants to know about
+     SECONDARY_INPUT_RELOAD_CLASS.  */
+  return REG_P (op) && FP_REGISTER_P (REGNO (op));
+})
+
+;; Returns true if OP is a general purpose integer register.
+;; This predicate is currently unused.
+;;(define_special_predicate "int_gpr_dest"
+;;  (match_code "subreg,reg")
+;;{
+;;  enum machine_mode op_mode = GET_MODE (op);
+;;
+;;  if (GET_MODE_CLASS (op_mode) != MODE_INT
+;;      || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
+;;    return 0;
+;;  if (! reload_completed)
+;;    return 0;
+;;  return true_regnum (op) <= LAST_GENERAL_REG;
+;;})
+
+;; Returns true if OP is some kind of less comparison.
+(define_predicate "less_comparison_operator"
+  (match_code "lt,le,ltu,leu"))
+
+;; Returns 1 if OP is a valid source operand for a logical operation.
+(define_predicate "logical_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    {
+      if (satisfies_constraint_I10 (op))
+	return 1;
+      else
+	return 0;
+    }
+  else if (satisfies_constraint_K08 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like logical_operand but allows additional constant values which can be
+;; done with zero extensions.  Used for the second operand of and insns.
+(define_predicate "logical_and_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (logical_operand (op, mode))
+    return 1;
+
+  if (! TARGET_SHMEDIA
+      && (satisfies_constraint_Jmb (op) || satisfies_constraint_Jmw (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Returns true if OP is a logical operator.
+(define_predicate "logical_operator"
+  (match_code "and,ior,xor"))
+
+;; Like arith_reg_operand, but for register source operands of narrow
+;; logical SHMEDIA operations: forbid subregs of DImode / TImode regs.
+(define_predicate "logical_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (TARGET_SHMEDIA
+      && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
+      && mode != DImode)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP is a valid bit offset value for the shmedia mextr insns.
+(define_predicate "mextr_bit_offset"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+
+  if (!CONST_INT_P (op))
+    return 0;
+  i = INTVAL (op);
+  return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
+})
+
+;; Returns true if OP is a constant -1, 0 or an zero extended register that
+;; can be used as an operator in the *subsi3_media insn.
+(define_predicate "minuend_operand"
+  (match_code "subreg,reg,truncate,const_int")
+{
+  return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
+})
+
+;; Returns true if OP is a noncommutative floating point operator.
+;; This predicate is currently unused.
+;;(define_predicate "noncommutative_float_operator"
+;;  (and (match_code "minus,div")
+;;       (match_test "GET_MODE (op) == mode")))
+
+;; UNORDERED is only supported on SHMEDIA.
+
+(define_predicate "sh_float_comparison_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (and (match_test "TARGET_SHMEDIA")
+	    (match_code "unordered"))))
+
+(define_predicate "shmedia_cbranch_comparison_operator"
+  (ior (match_operand 0 "equality_comparison_operator")
+       (match_operand 0 "greater_comparison_operator")))
+
+;; Returns true if OP is a constant vector.
+(define_predicate "sh_const_vec"
+  (match_code "const_vector")
+{
+  int i;
+
+  if (GET_CODE (op) != CONST_VECTOR
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  i = XVECLEN (op, 0) - 1;
+  for (; i >= 0; i--)
+    if (!CONST_INT_P (XVECEXP (op, 0, i)))
+      return 0;
+  return 1;
+})
+
+;; Determine if OP is a constant vector matching MODE with only one
+;; element that is not a sign extension.  Two byte-sized elements
+;; count as one.
+(define_predicate "sh_1el_vec"
+  (match_code "const_vector")
+{
+  int unit_size;
+  int i, last, least, sign_ix;
+  rtx sign;
+
+  if (GET_CODE (op) != CONST_VECTOR
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  /* Determine numbers of last and of least significant elements.  */
+  last = XVECLEN (op, 0) - 1;
+  least = TARGET_LITTLE_ENDIAN ? 0 : last;
+  if (!CONST_INT_P (XVECEXP (op, 0, least)))
+    return 0;
+  sign_ix = least;
+  if (GET_MODE_UNIT_SIZE (mode) == 1)
+    sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
+  if (!CONST_INT_P (XVECEXP (op, 0, sign_ix)))
+    return 0;
+  unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
+  sign = (INTVAL (XVECEXP (op, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
+	  ? constm1_rtx : const0_rtx);
+  i = XVECLEN (op, 0) - 1;
+  do
+    if (i != least && i != sign_ix && XVECEXP (op, 0, i) != sign)
+      return 0;
+  while (--i);
+  return 1;
+})
+
+;; Like register_operand, but take into account that SHMEDIA can use
+;; the constant zero like a general register.
+(define_predicate "sh_register_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
+    return 1;
+  return register_operand (op, mode);
+})
+
+;; Returns true if OP is a vector which is composed of one element that is
+;; repeated.
+(define_predicate "sh_rep_vec"
+  (match_code "const_vector,parallel")
+{
+  int i;
+  rtx x, y;
+
+  if ((GET_CODE (op) != CONST_VECTOR && GET_CODE (op) != PARALLEL)
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  i = XVECLEN (op, 0) - 2;
+  x = XVECEXP (op, 0, i + 1);
+  if (GET_MODE_UNIT_SIZE (mode) == 1)
+    {
+      y = XVECEXP (op, 0, i);
+      for (i -= 2; i >= 0; i -= 2)
+	if (! rtx_equal_p (XVECEXP (op, 0, i + 1), x)
+	    || ! rtx_equal_p (XVECEXP (op, 0, i), y))
+	  return 0;
+    }
+  else
+    for (; i >= 0; i--)
+      if (XVECEXP (op, 0, i) != x)
+	return 0;
+  return 1;
+})
+
+;; Returns true if OP is a valid shift count operand for shift operations.
+(define_predicate "shift_count_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,
+	       zero_extend,sign_extend")
+{
+  /* Allow T_REG as shift count for dynamic shifts, although it is not
+     really possible.  It will then be copied to a general purpose reg.  */
+  if (! TARGET_SHMEDIA)
+    return const_int_operand (op, mode) || arith_reg_operand (op, mode)
+	   || (TARGET_DYNSHIFT && t_reg_operand (op, mode));
+
+  return (CONSTANT_P (op)
+	  ? (CONST_INT_P (op)
+	     ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
+	     : nonmemory_operand (op, mode))
+	  : shift_count_reg_operand (op, mode));
+})
+
+;; Returns true if OP is a valid shift count operand in a register which can
+;; be used by shmedia shift insns.
+(define_predicate "shift_count_reg_operand"
+  (match_code "subreg,reg,zero_extend,sign_extend")
+{
+  if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
+       || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
+      && (mode == VOIDmode || mode == GET_MODE (op))
+      && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
+      && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
+    {
+      mode = VOIDmode;
+      do
+	op = XEXP (op, 0);
+      while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
+	      || GET_CODE (op) == TRUNCATE)
+	     && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
+	     && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
+
+    }
+  return arith_reg_operand (op, mode);
+})
+
+;; Predicates for matching operands that are constant shift
+;; amounts 1, 2, 8, 16.
+(define_predicate "p27_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_P27 (op)")))
+
+(define_predicate "not_p27_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "! satisfies_constraint_P27 (op)")))
+
+;; For right shifts the constant 1 is a special case because the shlr insn
+;; clobbers the T_REG and is handled by the T_REG clobbering version of the
+;; insn, which is also used for non-P27 shift sequences.
+(define_predicate "p27_rshift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_P27 (op)")
+       (match_test "! satisfies_constraint_M (op)")))
+
+(define_predicate "not_p27_rshift_count_operand"
+  (and (match_code "const_int")
+       (ior (match_test "! satisfies_constraint_P27 (op)")
+	    (match_test "satisfies_constraint_M (op)"))))
+
+;; Returns true if OP is some kind of a shift operator.
+(define_predicate "shift_operator"
+  (match_code "ashift,ashiftrt,lshiftrt"))
+
+;; Returns true if OP is a symbol reference.
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; Same as target_reg_operand, except that label_refs and symbol_refs
+;; are accepted before reload.
+(define_special_predicate "target_operand"
+  (match_code "subreg,reg,label_ref,symbol_ref,const,unspec")
+{
+  if (mode != VOIDmode && mode != Pmode)
+    return 0;
+
+  if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
+      && satisfies_constraint_Csy (op))
+    return ! reload_completed;
+
+  return target_reg_operand (op, mode);
+})
+
+;; A predicate that accepts pseudos and branch target registers.
+(define_special_predicate "target_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (mode == VOIDmode
+     ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
+     : mode != GET_MODE (op))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+
+  if (!REG_P (op))
+    return 0;
+
+  /* We must protect ourselves from matching pseudos that are virtual
+     register, because they will eventually be replaced with hardware
+     registers that aren't branch-target registers.  */
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER
+      || TARGET_REGISTER_P (REGNO (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Returns true if OP is a valid operand for the shmedia mperm.w insn.
+(define_special_predicate "trunc_hi_operand"
+  (match_code "subreg,reg,truncate")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (op_mode != SImode && op_mode != DImode
+      && op_mode != V4HImode && op_mode != V2SImode)
+    return 0;
+  return extend_reg_operand (op, mode);
+})
+
+;; Returns true if OP is an address suitable for an unaligned access
+;; instruction.
+(define_special_predicate "ua_address_operand"
+  (match_code "subreg,reg,plus")
+{
+  if (GET_CODE (op) == PLUS
+      && (! satisfies_constraint_I06 (XEXP (op, 1))))
+    return 0;
+  return address_operand (op, QImode);
+})
+
+;; Returns true if OP is a valid offset for an unaligned memory address.
+(define_predicate "ua_offset"
+  (match_code "const_int")
+{
+  return satisfies_constraint_I06 (op);
+})
+
+;; Returns true if OP is a floating point operator with one operand.
+(define_predicate "unary_float_operator"
+  (and (match_code "abs,neg,sqrt")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return 1 if OP is a valid source operand for xor.
+(define_predicate "xor_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (CONST_INT_P (op))
+    return (TARGET_SHMEDIA
+	    ? (satisfies_constraint_I06 (op)
+	       || (!can_create_pseudo_p () && INTVAL (op) == 0xff))
+	    : satisfies_constraint_K08 (op));
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+(define_predicate "bitwise_memory_operand"
+  (match_code "mem")
+{
+  if (MEM_P (op))
+    {
+      if (REG_P (XEXP (op, 0)))
+	return 1;
+
+      if (GET_CODE (XEXP (op, 0)) == PLUS
+	  && REG_P (XEXP (XEXP (op, 0), 0))
+	  && satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1)))
+        return 1;
+    }
+  return 0;
+})
+
+;; The atomic_* operand predicates are used for the atomic patterns.
+;; Depending on the particular pattern some operands can be immediate
+;; values.  Using these predicates avoids the usage of 'force_reg' in the
+;; expanders.
+(define_predicate "atomic_arith_operand"
+  (ior (match_code "subreg,reg")
+       (and (match_test "satisfies_constraint_I08 (op)")
+	    (match_test "mode != QImode")
+	    (match_test "mode != HImode")
+	    (match_test "TARGET_SH4A_ARCH"))))
+
+(define_predicate "atomic_logical_operand"
+  (ior (match_code "subreg,reg")
+       (and (match_test "satisfies_constraint_K08 (op)")
+	    (match_test "mode != QImode")
+	    (match_test "mode != HImode")
+	    (match_test "TARGET_SH4A_ARCH"))))
+
+;; A predicate describing the T bit register in any form.
+(define_predicate "t_reg_operand"
+  (match_code "reg,subreg,sign_extend,zero_extend")
+{
+  switch (GET_CODE (op))
+    {
+      case REG:
+	return REGNO (op) == T_REG;
+
+      case SUBREG:
+	return REG_P (SUBREG_REG (op)) && REGNO (SUBREG_REG (op)) == T_REG;
+
+      case ZERO_EXTEND:
+      case SIGN_EXTEND:
+	return GET_CODE (XEXP (op, 0)) == SUBREG
+	       && REG_P (SUBREG_REG (XEXP (op, 0)))
+	       && REGNO (SUBREG_REG (XEXP (op, 0))) == T_REG;
+
+      default:
+	return 0;
+    }
+})
+
+;; A predicate describing a negated T bit register.
+(define_predicate "negt_reg_operand"
+  (match_code "subreg,xor")
+{
+  switch (GET_CODE (op))
+    {
+      case XOR:
+	return t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))
+	       && satisfies_constraint_M (XEXP (op, 1));
+
+      case SUBREG:
+	return negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+
+      default:
+	return 0;
+    }
+})
+
+;; A predicate that returns true if OP is a valid construct around the T bit
+;; that can be used as an operand for conditional branches.
+(define_predicate "cbranch_treg_value"
+  (match_code "eq,ne,reg,subreg,xor,sign_extend,zero_extend")
+{
+  return sh_eval_treg_value (op) >= 0;
+})
+
+;; Returns true if OP is arith_reg_operand or t_reg_operand.
+(define_predicate "arith_reg_or_t_reg_operand"
+  (ior (match_operand 0 "arith_reg_operand")
+       (match_operand 0 "t_reg_operand")))
+
+;; A predicate describing the negated value of the T bit register shifted
+;; left by 31.
+(define_predicate "negt_reg_shl31_operand"
+  (match_code "plus,minus,if_then_else")
+{
+  /* (plus:SI (mult:SI (match_operand:SI 1 "t_reg_operand")
+		       (const_int -2147483648))  ;; 0xffffffff80000000
+	      (const_int -2147483648))
+  */
+  if (GET_CODE (op) == PLUS && satisfies_constraint_Jhb (XEXP (op, 1))
+      && GET_CODE (XEXP (op, 0)) == MULT
+      && t_reg_operand (XEXP (XEXP (op, 0), 0), SImode)
+      && satisfies_constraint_Jhb (XEXP (XEXP (op, 0), 1)))
+    return true;
+
+  /* (minus:SI (const_int -2147483648)  ;; 0xffffffff80000000
+	       (mult:SI (match_operand:SI 1 "t_reg_operand")
+			(const_int -2147483648)))
+  */
+  if (GET_CODE (op) == MINUS
+      && satisfies_constraint_Jhb (XEXP (op, 0))
+      && GET_CODE (XEXP (op, 1)) == MULT
+      && t_reg_operand (XEXP (XEXP (op, 1), 0), SImode)
+      && satisfies_constraint_Jhb (XEXP (XEXP (op, 1), 1)))
+    return true;
+
+  /*  (if_then_else:SI (match_operand:SI 1 "t_reg_operand")
+		       (const_int 0)
+		       (const_int -2147483648))  ;; 0xffffffff80000000
+  */
+  if (GET_CODE (op) == IF_THEN_ELSE && t_reg_operand (XEXP (op, 0), SImode)
+      && satisfies_constraint_Z (XEXP (op, 1))
+      && satisfies_constraint_Jhb (XEXP (op, 2)))
+    return true;
+
+  return false;
+})
+
+;; A predicate that determines whether a given constant is a valid
+;; displacement for a GBR load/store of the specified mode.
+(define_predicate "gbr_displacement"
+  (match_code "const_int")
+{
+  const int mode_sz = GET_MODE_SIZE (mode);
+  const int move_sz = mode_sz > GET_MODE_SIZE (SImode)
+				? GET_MODE_SIZE (SImode)
+				: mode_sz;
+  int max_disp = 255 * move_sz;
+  if (mode_sz > move_sz)
+    max_disp -= mode_sz - move_sz;
+
+  return INTVAL (op) >= 0 && INTVAL (op) <= max_disp;
+})
+
+;; A predicate that determines whether OP is a valid GBR addressing mode
+;; memory reference.
+(define_predicate "gbr_address_mem"
+  (match_code "mem")
+{
+  rtx addr = XEXP (op, 0);
+
+  if (REG_P (addr) && REGNO (addr) == GBR_REG)
+    return true;
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == GBR_REG
+      && gbr_displacement (XEXP (addr, 1), mode))
+    return true;
+
+  return false;
+})
diff --git a/gcc-4.9/gcc/config/sh/rtems.h b/gcc-4.9/gcc/config/sh/rtems.h
new file mode 100644
index 000000000..bbedc5b3a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/rtems.h
@@ -0,0 +1,26 @@
+/* Definitions for rtems targeting a SH using COFF.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc-4.9/gcc/config/sh/rtemself.h b/gcc-4.9/gcc/config/sh/rtemself.h
new file mode 100644
index 000000000..25d8b27c4
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/rtemself.h
@@ -0,0 +1,26 @@
+/* Definitions for rtems targeting a SH using elf.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc-4.9/gcc/config/sh/sh-c.c b/gcc-4.9/gcc/config/sh/sh-c.c
new file mode 100644
index 000000000..43ff7ad22
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-c.c
@@ -0,0 +1,148 @@
+/* Pragma handling for GCC for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Joern Rennecke <joern.rennecke@st.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "c-family/c-common.h"
+#include "target.h"
+
+/* Handle machine specific pragmas to be semi-compatible with Renesas
+   compiler.  */
+
+/* Add ATTR to the attributes of the current function.  If there is no
+   such function, save it to be added to the attributes of the next
+   function.  */
+static void
+sh_add_function_attribute (const char *attr)
+{
+  tree id = get_identifier (attr);
+
+  if (current_function_decl)
+    decl_attributes (&current_function_decl,
+		     tree_cons (id, NULL_TREE, NULL_TREE), 0);
+  else
+    {
+      *sh_deferred_function_attributes_tail
+	= tree_cons (id, NULL_TREE, *sh_deferred_function_attributes_tail);
+      sh_deferred_function_attributes_tail
+	= &TREE_CHAIN (*sh_deferred_function_attributes_tail);
+    }
+}
+
+void
+sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("interrupt_handler");
+}
+
+void
+sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("trapa_handler");
+}
+
+void
+sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("nosave_low_regs");
+}
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Implement the TARGET_CPU_CPP_BUILTINS macro  */
+void
+sh_cpu_cpp_builtins (cpp_reader* pfile)
+{
+  builtin_define ("__sh__");
+  builtin_assert ("cpu=sh");
+  builtin_assert ("machine=sh");
+  switch ((int) sh_cpu)
+    {
+    case PROCESSOR_SH1:
+      builtin_define ("__sh1__");
+      builtin_define ("__SH1__");
+      break;
+    case PROCESSOR_SH2:
+      builtin_define ("__sh2__");
+      builtin_define ("__SH2__");
+      break;
+    case PROCESSOR_SH2E:
+      builtin_define ("__SH2E__");
+      break;
+    case PROCESSOR_SH2A:
+      builtin_define ("__SH2A__");
+      if (TARGET_SH2A_DOUBLE)
+	builtin_define (TARGET_FPU_SINGLE
+			? "__SH2A_SINGLE__" : "__SH2A_DOUBLE__");
+      else
+	builtin_define (TARGET_FPU_ANY
+			? "__SH2A_SINGLE_ONLY__" : "__SH2A_NOFPU__");
+      break;
+    case PROCESSOR_SH3:
+      builtin_define ("__sh3__");
+      builtin_define ("__SH3__");
+      if (TARGET_HARD_SH4)
+	builtin_define ("__SH4_NOFPU__");
+      break;
+    case PROCESSOR_SH3E:
+      builtin_define (TARGET_HARD_SH4 ? "__SH4_SINGLE_ONLY__" : "__SH3E__");
+      break;
+    case PROCESSOR_SH4:
+      builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__");
+      break;
+    case PROCESSOR_SH4A: \
+      builtin_define ("__SH4A__");
+      builtin_define (TARGET_SH4
+		      ? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__")
+		      : TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__"
+		      : "__SH4_NOFPU__");
+      break;
+    case PROCESSOR_SH5:
+      {
+	builtin_define_with_value ("__SH5__",
+				   TARGET_SHMEDIA64 ? "64" : "32", 0);
+	builtin_define_with_value ("__SHMEDIA__",
+				   TARGET_SHMEDIA ? "1" : "0", 0);
+	if (! TARGET_FPU_DOUBLE)
+	  builtin_define ("__SH4_NOFPU__");
+      }
+    }
+  if (TARGET_FPU_ANY)
+    builtin_define ("__SH_FPU_ANY__");
+  if (TARGET_FPU_DOUBLE)
+    builtin_define ("__SH_FPU_DOUBLE__");
+  if (TARGET_HITACHI)
+    builtin_define ("__HITACHI__");
+  if (TARGET_FMOVD)
+    builtin_define ("__FMOVD_ENABLED__");
+  builtin_define (TARGET_LITTLE_ENDIAN
+		  ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
+
+  cpp_define_formatted (pfile, "__SH_ATOMIC_MODEL_%s__",
+			selected_atomic_model ().cdef_name);
+}
diff --git a/gcc-4.9/gcc/config/sh/sh-mem.cc b/gcc-4.9/gcc/config/sh/sh-mem.cc
new file mode 100644
index 000000000..45af23acb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-mem.cc
@@ -0,0 +1,610 @@
+/* Helper routines for memory move and comparison insns.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "machmode.h"
+#include "rtl.h"
+#include "tree.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "basic-block.h"
+
+/* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
+static void
+force_into (rtx value, rtx target)
+{
+  value = force_operand (value, target);
+  if (! rtx_equal_p (value, target))
+    emit_insn (gen_move_insn (target, value));
+}
+
+/* Emit code to perform a block move.  Choose the best method.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the size.
+   OPERANDS[3] is the alignment safe to use.  */
+bool
+expand_block_move (rtx *operands)
+{
+  int align = INTVAL (operands[3]);
+  int constp = (CONST_INT_P (operands[2]));
+  int bytes = (constp ? INTVAL (operands[2]) : 0);
+
+  if (! constp)
+    return false;
+
+  /* If we could use mov.l to move words and dest is word-aligned, we
+     can use movua.l for loads and still generate a relatively short
+     and efficient sequence.  */
+  if (TARGET_SH4A_ARCH && align < 4
+      && MEM_ALIGN (operands[0]) >= 32
+      && can_move_by_pieces (bytes, 32))
+    {
+      rtx dest = copy_rtx (operands[0]);
+      rtx src = copy_rtx (operands[1]);
+      /* We could use different pseudos for each copied word, but
+	 since movua can only load into r0, it's kind of
+	 pointless.  */
+      rtx temp = gen_reg_rtx (SImode);
+      rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
+      int copied = 0;
+
+      while (copied + 4 <= bytes)
+	{
+	  rtx to = adjust_address (dest, SImode, copied);
+	  rtx from = adjust_automodify_address (src, BLKmode,
+						src_addr, copied);
+
+	  set_mem_size (from, 4);
+	  emit_insn (gen_movua (temp, from));
+	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
+	  emit_move_insn (to, temp);
+	  copied += 4;
+	}
+
+      if (copied < bytes)
+	move_by_pieces (adjust_address (dest, BLKmode, copied),
+			adjust_automodify_address (src, BLKmode,
+						   src_addr, copied),
+			bytes - copied, align, 0);
+
+      return true;
+    }
+
+  /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
+     alignment, or if it isn't a multiple of 4 bytes, then fail.  */
+  if (align < 4 || (bytes % 4 != 0))
+    return false;
+
+  if (TARGET_HARD_SH4)
+    {
+      if (bytes < 12)
+	return false;
+      else if (bytes == 12)
+	{
+	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
+	  rtx r4 = gen_rtx_REG (SImode, 4);
+	  rtx r5 = gen_rtx_REG (SImode, 5);
+
+	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+	  force_into (XEXP (operands[0], 0), r4);
+	  force_into (XEXP (operands[1], 0), r5);
+	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+	  return true;
+	}
+      else if (! optimize_size)
+	{
+	  const char *entry_name;
+	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
+	  int dwords;
+	  rtx r4 = gen_rtx_REG (SImode, 4);
+	  rtx r5 = gen_rtx_REG (SImode, 5);
+	  rtx r6 = gen_rtx_REG (SImode, 6);
+
+	  entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
+	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+	  force_into (XEXP (operands[0], 0), r4);
+	  force_into (XEXP (operands[1], 0), r5);
+
+	  dwords = bytes >> 3;
+	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+	  return true;
+	}
+      else
+	return false;
+    }
+  if (bytes < 64)
+    {
+      char entry[30];
+      rtx func_addr_rtx = gen_reg_rtx (Pmode);
+      rtx r4 = gen_rtx_REG (SImode, 4);
+      rtx r5 = gen_rtx_REG (SImode, 5);
+
+      sprintf (entry, "__movmemSI%d", bytes);
+      function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+      force_into (XEXP (operands[0], 0), r4);
+      force_into (XEXP (operands[1], 0), r5);
+      emit_insn (gen_block_move_real (func_addr_rtx));
+      return true;
+    }
+
+  /* This is the same number of bytes as a memcpy call, but to a different
+     less common function name, so this will occasionally use more space.  */
+  if (! optimize_size)
+    {
+      rtx func_addr_rtx = gen_reg_rtx (Pmode);
+      int final_switch, while_loop;
+      rtx r4 = gen_rtx_REG (SImode, 4);
+      rtx r5 = gen_rtx_REG (SImode, 5);
+      rtx r6 = gen_rtx_REG (SImode, 6);
+
+      function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+      force_into (XEXP (operands[0], 0), r4);
+      force_into (XEXP (operands[1], 0), r5);
+
+      /* r6 controls the size of the move.  16 is decremented from it
+	 for each 64 bytes moved.  Then the negative bit left over is used
+	 as an index into a list of move instructions.  e.g., a 72 byte move
+	 would be set up with size(r6) = 14, for one iteration through the
+	 big while loop, and a switch of -2 for the last part.  */
+
+      final_switch = 16 - ((bytes / 4) % 16);
+      while_loop = ((bytes / 4) / 16 - 1) * 16;
+      emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
+      emit_insn (gen_block_lump_real (func_addr_rtx));
+      return true;
+    }
+
+  return false;
+}
+
+static int prob_unlikely = REG_BR_PROB_BASE / 10;
+static int prob_likely = REG_BR_PROB_BASE / 4;
+
+/* Emit code to perform a strcmp.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the first string.
+   OPERANDS[2] is the second string.
+   OPERANDS[3] is the known alignment.  */
+bool
+sh_expand_cmpstr (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx addr2 = operands[2];
+  rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  rtx jump;
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+  rtx L_end_loop_byte = gen_label_rtx ();
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
+
+  int align = INTVAL (operands[3]);
+
+  emit_move_insn (tmp0, const0_rtx);
+
+  if (align < 4)
+    {
+      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+      emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+    }
+
+  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
+  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
+
+  /* tmp2 is aligned, OK to load.  */
+  emit_move_insn (tmp3, addr2);
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
+
+  /*start long loop.  */
+  emit_label (L_loop_long);
+
+  emit_move_insn (tmp2, tmp3);
+
+  /* tmp1 is aligned, OK to load.  */
+  emit_move_insn (tmp1, addr1);
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
+
+  /* Is there a 0 byte ?  */
+  emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
+
+  emit_insn (gen_cmpstr_t (tmp0, tmp3));
+  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+
+  /* tmp2 is aligned, OK to load.  */
+  emit_move_insn (tmp3, addr2);
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
+
+  jump = emit_jump_insn (gen_branch_true (L_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  /* Fallthu, substract words.  */
+  if (TARGET_LITTLE_ENDIAN)
+    {
+      rtx low_1 = gen_lowpart (HImode, tmp1);
+      rtx low_2 = gen_lowpart (HImode, tmp2);
+
+      emit_insn (gen_rotlhi3_8 (low_1, low_1));
+      emit_insn (gen_rotlhi3_8 (low_2, low_2));
+      emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
+      emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
+      emit_insn (gen_rotlhi3_8 (low_1, low_1));
+      emit_insn (gen_rotlhi3_8 (low_2, low_2));
+    }
+
+  jump = emit_jump_insn (gen_jump_compact (L_return));
+  emit_barrier_after (jump);
+
+  emit_label (L_end_loop_long);
+
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
+
+  /* start byte loop.  */
+  addr1 = adjust_address (addr1, QImode, 0);
+  addr2 = adjust_address (addr2, QImode, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp2, addr2));
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+  if (flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  jump = emit_jump_insn (gen_branch_true (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  emit_label (L_end_loop_byte);
+
+  if (! flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
+
+  emit_label (L_return);
+
+  emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
+
+  return true;
+}
+
+/* Emit code to perform a strncmp.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the first string.
+   OPERANDS[2] is the second string.
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the known alignment.  */
+bool
+sh_expand_cmpnstr (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx addr2 = operands[2];
+  rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+
+  rtx jump;
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+  rtx L_end_loop_byte = gen_label_rtx ();
+
+  rtx len = force_reg (SImode, operands[3]);
+  int constp = CONST_INT_P (operands[3]);
+
+  /* Loop on a register count. */
+  if (constp)
+    {
+      rtx tmp0 = gen_reg_rtx (SImode);
+      rtx tmp3 = gen_reg_rtx (SImode);
+      rtx lenw = gen_reg_rtx (SImode);
+
+      rtx L_loop_long = gen_label_rtx ();
+      rtx L_end_loop_long = gen_label_rtx ();
+
+      int align = INTVAL (operands[4]);
+      int bytes = INTVAL (operands[3]);
+      int witers = bytes / 4;
+
+      if (witers > 1)
+        {
+          addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
+          addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
+
+          emit_move_insn (tmp0, const0_rtx);
+
+          if (align < 4)
+            {
+              emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+              emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+              jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+              add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+            }
+
+          /* word count. Do we have iterations ? */
+          emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
+
+          /*start long loop.  */
+          emit_label (L_loop_long);
+
+          /* tmp2 is aligned, OK to load.  */
+          emit_move_insn (tmp2, addr2);
+          emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
+                                                  GET_MODE_SIZE (SImode)));
+
+          /* tmp1 is aligned, OK to load.  */
+          emit_move_insn (tmp1, addr1);
+          emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
+                                                  GET_MODE_SIZE (SImode)));
+
+          /* Is there a 0 byte ?  */
+          emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
+
+          emit_insn (gen_cmpstr_t (tmp0, tmp3));
+          jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+          jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          if (TARGET_SH2)
+            emit_insn (gen_dect (lenw, lenw));
+          else
+            {
+              emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
+              emit_insn (gen_tstsi_t (lenw, lenw));
+            }
+
+          jump = emit_jump_insn (gen_branch_false (L_loop_long));
+          add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+         int sbytes = bytes % 4;
+
+          /* end loop.  Reached max iterations.  */
+          if (! sbytes)
+            {
+              jump = emit_jump_insn (gen_jump_compact (L_return));
+              emit_barrier_after (jump);
+            }
+          else
+            {
+              /* Remaining bytes to check.  */
+
+              addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+              addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+              while (sbytes--)
+                {
+                  emit_insn (gen_extendqisi2 (tmp1, addr1));
+                  emit_insn (gen_extendqisi2 (tmp2, addr2));
+
+                  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+                  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+                  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+                  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+                  if (flag_delayed_branch)
+                    emit_insn (gen_zero_extendqisi2 (tmp2,
+                                                     gen_lowpart (QImode,
+                                                                  tmp2)));
+                  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+                  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+                  addr1 = adjust_address (addr1, QImode,
+                                          GET_MODE_SIZE (QImode));
+                  addr2 = adjust_address (addr2, QImode,
+                                          GET_MODE_SIZE (QImode));
+                }
+
+              jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
+              emit_barrier_after (jump);
+            }
+
+          emit_label (L_end_loop_long);
+
+          /* Found last word.  Restart it byte per byte. */
+
+          emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
+                                                  -GET_MODE_SIZE (SImode)));
+          emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
+                                                  -GET_MODE_SIZE (SImode)));
+
+          /* fall thru.  */
+        }
+
+      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+      while (bytes--)
+        {
+          emit_insn (gen_extendqisi2 (tmp1, addr1));
+          emit_insn (gen_extendqisi2 (tmp2, addr2));
+
+          emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+          jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+          if (flag_delayed_branch)
+            emit_insn (gen_zero_extendqisi2 (tmp2,
+                                             gen_lowpart (QImode, tmp2)));
+          jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
+          addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
+        }
+
+      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
+      emit_barrier_after (jump);
+    }
+
+  addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+  addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp2, addr2));
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+  if (flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  if (TARGET_SH2)
+    emit_insn (gen_dect (len, len));
+  else
+    {
+      emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
+      emit_insn (gen_tstsi_t (len, len));
+    }
+
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end byte loop.  */
+
+  emit_label (L_end_loop_byte);
+
+  if (! flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
+
+  emit_label (L_return);
+
+  emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
+
+  return true;
+}
+
+/* Emit code to perform a strlen
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the string.
+   OPERANDS[2] is the char to search.
+   OPERANDS[3] is the alignment.  */
+bool
+sh_expand_strlen (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx start_addr = gen_reg_rtx (Pmode);
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+
+  rtx jump;
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
+
+  int align = INTVAL (operands[3]);
+
+  emit_move_insn (operands[0], GEN_INT (-1));
+
+  /* remember start of string.  */
+  emit_move_insn (start_addr, current_addr);
+
+  if (align < 4)
+    {
+      emit_insn (gen_tstsi_t (GEN_INT (3), current_addr));
+      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+    }
+
+  emit_move_insn (tmp0, operands[2]);
+
+  addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
+
+  /*start long loop.  */
+  emit_label (L_loop_long);
+
+  /* tmp1 is aligned, OK to load.  */
+  emit_move_insn (tmp1, addr1);
+  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
+
+  /* Is there a 0 byte ?  */
+  emit_insn (gen_cmpstr_t (tmp0, tmp1));
+
+  jump = emit_jump_insn (gen_branch_false (L_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  emit_label (L_end_loop_long);
+
+  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
+
+  /* start byte loop.  */
+  addr1 = adjust_address (addr1, QImode, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+  /* end loop.  */
+
+  emit_label (L_return);
+
+  emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
+
+  emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
+
+  return true;
+}
diff --git a/gcc-4.9/gcc/config/sh/sh-modes.def b/gcc-4.9/gcc/config/sh/sh-modes.def
new file mode 100644
index 000000000..3aa3046e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-modes.def
@@ -0,0 +1,34 @@
+/* SH extra machine modes. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The SH uses a partial integer mode to represent the FPSCR register.  */
+PARTIAL_INT_MODE (SI, 22, PSI);
+/* PDI mode is used to represent a function address in a target register.  */
+PARTIAL_INT_MODE (DI, 64, PDI);
+
+/* Vector modes.  */
+VECTOR_MODE  (INT, QI, 2);    /*                 V2QI */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
+VECTOR_MODE (INT, DI, 8);     /*                 V8DI */
+VECTOR_MODE (FLOAT, SF, 16);  /*                V16SF */
diff --git a/gcc-4.9/gcc/config/sh/sh-protos.h b/gcc-4.9/gcc/config/sh/sh-protos.h
new file mode 100644
index 000000000..defc76a32
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-protos.h
@@ -0,0 +1,235 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SH_PROTOS_H
+#define GCC_SH_PROTOS_H
+
+enum sh_function_kind {
+  /* A function with normal C ABI  */
+  FUNCTION_ORDINARY,
+  /* A special function that guarantees that some otherwise call-clobbered
+     registers are not clobbered.  These can't go through the SH5 resolver,
+     because it only saves argument passing registers.  */
+  SFUNC_GOT,
+  /* A special function that should be linked statically.  These are typically
+     smaller or not much larger than a PLT entry.
+     Some also have a non-standard ABI which precludes dynamic linking.  */
+  SFUNC_STATIC
+};
+
+/* Atomic model.  */
+struct sh_atomic_model
+{
+  enum enum_type
+  {
+    none = 0,
+    soft_gusa,
+    hard_llcs,
+    soft_tcb,
+    soft_imask,
+
+    num_models
+  };
+
+  /*  If strict is set, disallow mixing of different models, as it would
+      happen on SH4A.  */
+  bool strict;
+  enum_type type;
+
+  /* Name string as it was specified on the command line.  */
+  const char* name;
+
+  /* Name string as it is used in C/C++ defines.  */
+  const char* cdef_name;
+
+  /* GBR offset variable for TCB model.  */
+  int tcb_gbr_offset;
+};
+
+extern const sh_atomic_model& selected_atomic_model (void);
+
+/* Shortcuts to check the currently selected atomic model.  */
+#define TARGET_ATOMIC_ANY \
+  selected_atomic_model ().type != sh_atomic_model::none
+
+#define TARGET_ATOMIC_STRICT \
+  selected_atomic_model ().strict
+
+#define TARGET_ATOMIC_SOFT_GUSA \
+  selected_atomic_model ().type == sh_atomic_model::soft_gusa
+
+#define TARGET_ATOMIC_HARD_LLCS \
+  selected_atomic_model ().type == sh_atomic_model::hard_llcs
+
+#define TARGET_ATOMIC_SOFT_TCB \
+  selected_atomic_model ().type == sh_atomic_model::soft_tcb
+
+#define TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX \
+  GEN_INT (selected_atomic_model ().tcb_gbr_offset)
+
+#define TARGET_ATOMIC_SOFT_IMASK \
+  selected_atomic_model ().type == sh_atomic_model::soft_imask
+
+#ifdef RTX_CODE
+extern rtx sh_fsca_sf2int (void);
+extern rtx sh_fsca_int2sf (void);
+
+/* Declare functions defined in sh.c and used in templates.  */
+
+extern const char *output_branch (int, rtx, rtx *);
+extern const char *output_ieee_ccmpeq (rtx, rtx *);
+extern const char *output_branchy_insn (enum rtx_code, const char *, rtx, rtx *);
+extern const char *output_movedouble (rtx, rtx[], enum machine_mode);
+extern const char *output_movepcrel (rtx, rtx[], enum machine_mode);
+extern const char *output_far_jump (rtx, rtx);
+
+extern rtx sfunc_uses_reg (rtx);
+extern int barrier_align (rtx);
+extern int sh_loop_align (rtx);
+extern bool fp_zero_operand (rtx);
+extern bool fp_one_operand (rtx);
+extern rtx get_fpscr_rtx (void);
+extern bool sh_legitimate_index_p (enum machine_mode, rtx, bool, bool);
+extern bool sh_legitimize_reload_address (rtx *, enum machine_mode, int, int);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern bool nonpic_symbol_mentioned_p (rtx);
+extern void emit_sf_insn (rtx);
+extern void emit_df_insn (rtx);
+extern void output_pic_addr_const (FILE *, rtx);
+extern bool expand_block_move (rtx *);
+extern void prepare_move_operands (rtx[], enum machine_mode mode);
+extern bool sh_expand_cmpstr (rtx *);
+extern bool sh_expand_cmpnstr (rtx *);
+extern bool sh_expand_strlen  (rtx *);
+extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
+					       enum rtx_code comparison);
+extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
+extern bool expand_cbranchdi4 (rtx *operands, enum rtx_code comparison);
+extern void sh_emit_scc_to_t (enum rtx_code, rtx, rtx);
+extern rtx sh_emit_cheap_store_flag (enum machine_mode, enum rtx_code, rtx, rtx);
+extern void sh_emit_compare_and_branch (rtx *, enum machine_mode);
+extern void sh_emit_compare_and_set (rtx *, enum machine_mode);
+extern bool sh_ashlsi_clobbers_t_reg_p (rtx);
+extern bool sh_lshrsi_clobbers_t_reg_p (rtx);
+extern void gen_shifty_op (int, rtx *);
+extern void gen_shifty_hi_op (int, rtx *);
+extern bool expand_ashiftrt (rtx *);
+extern bool sh_dynamicalize_shift_p (rtx);
+extern int shl_and_kind (rtx, rtx, int *);
+extern int shl_and_length (rtx);
+extern int shl_and_scr_length (rtx);
+extern bool gen_shl_and (rtx, rtx, rtx, rtx);
+extern int shl_sext_kind (rtx, rtx, int *);
+extern int shl_sext_length (rtx);
+extern bool gen_shl_sext (rtx, rtx, rtx, rtx);
+extern rtx gen_datalabel_ref (rtx);
+extern int regs_used (rtx, int);
+extern void fixup_addr_diff_vecs (rtx);
+extern int get_dest_uid (rtx, int);
+extern void final_prescan_insn (rtx, rtx *, int);
+extern enum tls_model tls_symbolic_operand (rtx, enum machine_mode);
+extern bool system_reg_operand (rtx, enum machine_mode);
+extern bool reg_unused_after (rtx, rtx);
+extern void expand_sf_unop (rtx (*)(rtx, rtx, rtx), rtx *);
+extern void expand_sf_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
+extern void expand_df_unop (rtx (*)(rtx, rtx, rtx), rtx *);
+extern void expand_df_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
+extern int sh_insn_length_adjustment (rtx);
+extern bool sh_can_redirect_branch (rtx, rtx);
+extern void sh_expand_unop_v2sf (enum rtx_code, rtx, rtx);
+extern void sh_expand_binop_v2sf (enum rtx_code, rtx, rtx, rtx);
+extern bool sh_expand_t_scc (rtx *);
+extern rtx sh_gen_truncate (enum machine_mode, rtx, int);
+extern bool sh_vector_mode_supported_p (enum machine_mode);
+extern bool sh_cfun_trap_exit_p (void);
+extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem);
+extern int sh_eval_treg_value (rtx op);
+extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
+extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a);
+
+/* Result value of sh_find_set_of_reg.  */
+struct set_of_reg
+{
+  /* The insn where sh_find_set_of_reg stopped looking.
+     Can be NULL_RTX if the end of the insn list was reached.  */
+  rtx insn;
+
+  /* The set rtx of the specified reg if found, NULL_RTX otherwise.  */
+  const_rtx set_rtx;
+
+  /* The set source rtx of the specified reg if found, NULL_RTX otherwise.
+     Usually, this is the most interesting return value.  */
+  rtx set_src;
+};
+
+extern set_of_reg sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx));
+extern bool sh_is_logical_t_store_expr (rtx op, rtx insn);
+extern rtx sh_try_omit_signzero_extend (rtx extended_op, rtx insn);
+#endif /* RTX_CODE */
+
+extern void sh_cpu_cpp_builtins (cpp_reader* pfile);
+
+extern const char *output_jump_label_table (void);
+extern rtx get_t_reg_rtx (void);
+extern rtx get_fpscr_rtx (void);
+extern int sh_media_register_for_return (void);
+extern void sh_expand_prologue (void);
+extern void sh_expand_epilogue (bool);
+extern void sh_set_return_address (rtx, rtx);
+extern int initial_elimination_offset (int, int);
+extern bool fldi_ok (void);
+extern bool sh_hard_regno_rename_ok (unsigned int, unsigned int);
+extern bool sh_cfun_interrupt_handler_p (void);
+extern bool sh_cfun_resbank_handler_p (void);
+extern bool sh_attr_renesas_p (const_tree);
+extern bool sh_cfun_attr_renesas_p (void);
+extern bool sh_cannot_change_mode_class
+	      (enum machine_mode, enum machine_mode, enum reg_class);
+extern bool sh_small_register_classes_for_mode_p (enum machine_mode);
+extern void sh_mark_label (rtx, int);
+extern bool check_use_sfunc_addr (rtx, rtx);
+
+#ifdef HARD_CONST
+extern void fpscr_set_from_mem (int, HARD_REG_SET);
+#endif
+
+extern void sh_pr_interrupt (struct cpp_reader *);
+extern void sh_pr_trapa (struct cpp_reader *);
+extern void sh_pr_nosave_low_regs (struct cpp_reader *);
+extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+extern rtx sh_get_pr_initial_val (void);
+
+extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
+				     signed int, enum machine_mode);
+extern rtx sh_dwarf_register_span (rtx);
+
+extern rtx replace_n_hard_rtx (rtx, rtx *, int , int);
+extern int shmedia_cleanup_truncate (rtx *, void *);
+
+extern bool sh_contains_memref_p (rtx);
+extern bool sh_loads_bankedreg_p (rtx);
+extern rtx shmedia_prepare_call_address (rtx fnaddr, int is_sibcall);
+extern int sh2a_get_function_vector_number (rtx);
+extern bool sh2a_is_function_vector_call (rtx);
+extern void sh_fix_range (const char *);
+extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool sh_can_use_simple_return_p (void);
+#endif /* ! GCC_SH_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/sh/sh.c b/gcc-4.9/gcc/config/sh/sh.c
new file mode 100644
index 000000000..6d909c79e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.c
@@ -0,0 +1,13504 @@
+/* Output routines for GCC for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "insn-config.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "flags.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "function.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "dwarf2.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "basic-block.h"
+#include "df.h"
+#include "intl.h"
+#include "sched-int.h"
+#include "params.h"
+#include "ggc.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "cfgloop.h"
+#include "alloc-pool.h"
+#include "tm-constrs.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "pass_manager.h"
+#include "context.h"
+
+#include <sstream>
+#include <vector>
+#include <algorithm>
+
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
+/* These are some macros to abstract register modes.  */
+#define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 511)
+
+#define CONST_OK_FOR_ADD(size) \
+  (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
+#define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
+#define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
+#define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
+
+/* Used to simplify the logic below.  Find the attributes wherever
+   they may be.  */
+#define SH_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+		  : DECL_ATTRIBUTES (decl) \
+		  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+/* Set to 1 by expand_prologue() when the function is an interrupt handler.  */
+int current_function_interrupt;
+
+tree sh_deferred_function_attributes;
+tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
+
+/* Global variables for machine-dependent things.  */
+
+/* Which cpu are we scheduling for.  */
+enum processor_type sh_cpu;
+
+/* Definitions used in ready queue reordering for first scheduling pass.  */
+
+/* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID.  */
+static short *regmode_weight[2];
+
+/* Total SFmode and SImode weights of scheduled insns.  */
+static int curr_regmode_pressure[2];
+
+/* Number of r0 life regions.  */
+static int r0_life_regions;
+
+/* If true, skip cycles for Q -> R movement.  */
+static int skip_cycles = 0;
+
+/* Cached value of can_issue_more.  This is cached in sh_variable_issue hook
+   and returned from sh_reorder2.  */
+static short cached_can_issue_more;
+
+/* Unique number for UNSPEC_BBR pattern.  */
+static unsigned int unspec_bbr_uid = 1;
+
+/* Provides the class number of the smallest class containing
+   reg number.  */
+enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
+  TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
+  MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
+  GENERAL_REGS, GENERAL_REGS,
+};
+
+char sh_register_names[FIRST_PSEUDO_REGISTER] \
+  [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
+
+char sh_additional_register_names[ADDREGNAMES_SIZE] \
+  [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
+  = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
+
+int assembler_dialect;
+
+static bool shmedia_space_reserved_for_target_registers;
+
+static void split_branches (rtx);
+static int branch_dest (rtx);
+static void print_slot (rtx);
+static rtx add_constant (rtx, enum machine_mode, rtx);
+static void dump_table (rtx, rtx);
+static bool broken_move (rtx);
+static bool mova_p (rtx);
+static rtx find_barrier (int, rtx, rtx);
+static bool noncall_uses_reg (rtx, rtx, rtx *);
+static rtx gen_block_redirect (rtx, int, int);
+static void sh_reorg (void);
+static void sh_option_override (void);
+static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
+static rtx frame_insn (rtx);
+static rtx push (int);
+static void pop (int);
+static void push_regs (HARD_REG_SET *, int);
+static int calc_live_regs (HARD_REG_SET *);
+static HOST_WIDE_INT rounded_frame_size (int);
+static bool sh_frame_pointer_required (void);
+static rtx mark_constant_pool_use (rtx);
+static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
+						   int, bool *);
+static tree sh_handle_resbank_handler_attribute (tree *, tree,
+						 tree, int, bool *);
+static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
+							   tree, int, bool *);
+static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
+static void sh_print_operand (FILE *, rtx, int);
+static void sh_print_operand_address (FILE *, rtx);
+static bool sh_print_operand_punct_valid_p (unsigned char code);
+static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
+static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void sh_insert_attributes (tree, tree *);
+static const char *sh_check_pch_target_flags (int);
+static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int sh_adjust_cost (rtx, rtx, rtx, int);
+static int sh_issue_rate (void);
+static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
+static short find_set_regmode_weight (rtx, enum machine_mode);
+static short find_insn_regmode_weight (rtx, enum machine_mode);
+static void find_regmode_weight (basic_block, enum machine_mode);
+static int find_r0_life_regions (basic_block);
+static void  sh_md_init_global (FILE *, int, int);
+static void  sh_md_finish_global (FILE *, int);
+static int rank_for_reorder (const void *, const void *);
+static void swap_reorder (rtx *, int);
+static void ready_reorder (rtx *, int);
+static bool high_pressure (enum machine_mode);
+static int sh_reorder (FILE *, int, rtx *, int *, int);
+static int sh_reorder2 (FILE *, int, rtx *, int *, int);
+static void sh_md_init (FILE *, int, int);
+static int sh_variable_issue (FILE *, int, rtx, int);
+
+static bool sh_function_ok_for_sibcall (tree, tree);
+
+static bool sh_cannot_modify_jumps_p (void);
+static reg_class_t sh_target_reg_class (void);
+static bool sh_optimize_target_register_callee_saved (bool);
+static bool sh_ms_bitfield_layout_p (const_tree);
+
+static void sh_init_builtins (void);
+static tree sh_builtin_decl (unsigned, bool);
+static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				HOST_WIDE_INT, tree);
+static void sh_file_start (void);
+static bool flow_dependent_p (rtx, rtx);
+static void flow_dependent_p_1 (rtx, const_rtx, void *);
+static int shiftcosts (rtx);
+static int and_xor_ior_costs (rtx, int);
+static int addsubcosts (rtx);
+static int multcosts (rtx);
+static bool unspec_caller_rtx_p (rtx);
+static bool sh_cannot_copy_insn_p (rtx);
+static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
+static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static int sh_pr_n_sets (void);
+static rtx sh_allocate_initial_value (rtx);
+static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
+                                        enum machine_mode,
+                                        struct secondary_reload_info *);
+static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx sh_delegitimize_address (rtx);
+static int shmedia_target_regs_stack_space (HARD_REG_SET *);
+static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
+static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
+static int scavenge_reg (HARD_REG_SET *s);
+struct save_schedule_s;
+static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
+						struct save_schedule_s *, int);
+
+static rtx sh_struct_value_rtx (tree, int);
+static rtx sh_function_value (const_tree, const_tree, bool);
+static bool sh_function_value_regno_p (const unsigned int);
+static rtx sh_libcall_value (enum machine_mode, const_rtx);
+static bool sh_return_in_memory (const_tree, const_tree);
+static rtx sh_builtin_saveregs (void);
+static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+				       tree, int *, int);
+static bool sh_strict_argument_naming (cumulative_args_t);
+static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
+static tree sh_build_builtin_va_list (void);
+static void sh_va_start (tree, rtx);
+static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool sh_promote_prototypes (const_tree);
+static enum machine_mode sh_promote_function_mode (const_tree type,
+						   enum machine_mode,
+						   int *punsignedp,
+						   const_tree funtype,
+						   int for_return);
+static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
+				  const_tree, bool);
+static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+			         tree, bool);
+static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
+				     const_tree, bool);
+static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
+			    const_tree, bool);
+static bool sh_scalar_mode_supported_p (enum machine_mode);
+static int sh_dwarf_calling_convention (const_tree);
+static void sh_encode_section_info (tree, rtx, int);
+static bool sh2a_function_vector_p (tree);
+static void sh_trampoline_init (rtx, tree, rtx);
+static rtx sh_trampoline_adjust_address (rtx);
+static void sh_conditional_register_usage (void);
+static bool sh_legitimate_constant_p (enum machine_mode, rtx);
+static int mov_insn_size (enum machine_mode, bool);
+static int mov_insn_alignment_mask (enum machine_mode, bool);
+static bool sequence_insn_p (rtx);
+static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
+static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
+					enum machine_mode, bool);
+static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
+
+static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
+
+static const struct attribute_spec sh_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt_handler", 0, 0, true,  false, false,
+    sh_handle_interrupt_handler_attribute, false },
+  { "sp_switch",         1, 1, true,  false, false,
+     sh_handle_sp_switch_attribute, false },
+  { "trap_exit",         1, 1, true,  false, false,
+    sh_handle_trap_exit_attribute, false },
+  { "renesas",           0, 0, false, true, false,
+    sh_handle_renesas_attribute, false },
+  { "trapa_handler",     0, 0, true,  false, false,
+    sh_handle_interrupt_handler_attribute, false },
+  { "nosave_low_regs",   0, 0, true,  false, false,
+    sh_handle_interrupt_handler_attribute, false },
+  { "resbank",           0, 0, true,  false, false,
+    sh_handle_resbank_handler_attribute, false },
+  { "function_vector",   1, 1, true,  false, false,
+    sh2a_handle_function_vector_handler_attribute, false },
+  { NULL,                0, 0, false, false, false, NULL, false }
+};
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE sh_attribute_table
+
+/* The next two are used for debug info when compiling with -gdwarf.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
+
+/* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE.  */
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE sh_option_override
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND sh_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
+ 
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START sh_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST sh_register_move_cost
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST sh_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sh_issue_rate
+
+/* The next 5 hooks have been implemented for reenabling sched1.  With the
+   help of these macros we are limiting the movement of insns in sched1 to
+   reduce the register pressure.  The overall idea is to keep count of SImode
+   and SFmode regs required by already scheduled insns. When these counts
+   cross some threshold values; give priority to insns that free registers.
+   The insn that frees registers is most likely to be the insn with lowest
+   LUID (original insn order); but such an insn might be there in the stalled
+   queue (Q) instead of the ready queue (R).  To solve this, we skip cycles
+   up to a max of 8 cycles so that such insns may move from Q -> R.
+
+   The description of the hooks are as below:
+
+   TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
+   scheduler; it is called inside the sched_init function just after
+   find_insn_reg_weights function call. It is used to calculate the SImode
+   and SFmode weights of insns of basic blocks; much similar to what
+   find_insn_reg_weights does.
+   TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
+
+   TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
+   indicated by TARGET_SCHED_REORDER2; doing this may move insns from
+   (Q)->(R).
+
+   TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
+   high; reorder the ready queue so that the insn with lowest LUID will be
+   issued next.
+
+   TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
+   TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
+
+   TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
+   can be returned from TARGET_SCHED_REORDER2.
+
+   TARGET_SCHED_INIT: Reset the register pressure counting variables.  */
+
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
+
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER sh_reorder
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 sh_reorder2
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT sh_md_init
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
+
+#undef TARGET_CANNOT_MODIFY_JUMPS_P
+#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
+#undef TARGET_BRANCH_TARGET_REGISTER_CLASS
+#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
+#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
+#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
+  sh_optimize_target_register_callee_saved
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sh_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL sh_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sh_expand_builtin
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
+
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS sh_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST sh_address_cost
+#undef TARGET_ALLOCATE_INITIAL_VALUE
+#define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE sh_function_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE sh_libcall_value
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY sh_return_in_memory
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES sh_callee_copies
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG sh_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
+
+#undef TARGET_CHECK_PCH_TARGET_FLAGS
+#define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
+
+#undef TARGET_DWARF_CALLING_CONVENTION
+#define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
+
+/* Return regmode weight for insn.  */
+#define INSN_REGMODE_WEIGHT(INSN, MODE)\
+  regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
+
+/* Return current register pressure for regmode.  */
+#define CURR_REGMODE_PRESSURE(MODE)\
+  curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO	sh_encode_section_info
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD sh_secondary_reload
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	sh_legitimate_address_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		sh_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P	sh_legitimate_constant_p
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON	sh_canonicalize_comparison
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
+
+/* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80.  This value
+   is used by optabs.c atomic op expansion code as well as in sync.md.  */
+#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
+#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Information on the currently selected atomic model.
+   This is initialized in sh_option_override.  */
+static sh_atomic_model selected_atomic_model_;
+
+const sh_atomic_model&
+selected_atomic_model (void)
+{
+  return selected_atomic_model_;
+}
+
+static sh_atomic_model
+parse_validate_atomic_model_option (const char* str)
+{
+  const char* model_names[sh_atomic_model::num_models];
+  model_names[sh_atomic_model::none] = "none";
+  model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
+  model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
+  model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
+  model_names[sh_atomic_model::soft_imask] = "soft-imask";
+
+  const char* model_cdef_names[sh_atomic_model::num_models];
+  model_cdef_names[sh_atomic_model::none] = "NONE";
+  model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
+  model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
+  model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
+  model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
+
+  sh_atomic_model ret;
+  ret.type = sh_atomic_model::none;
+  ret.name = model_names[sh_atomic_model::none];
+  ret.cdef_name = model_cdef_names[sh_atomic_model::none];
+  ret.strict = false;
+  ret.tcb_gbr_offset = -1;
+
+  /* Handle empty string as 'none'.  */
+  if (str == NULL || *str == '\0')
+    return ret;
+
+#define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
+
+  std::vector<std::string> tokens;
+  for (std::stringstream ss (str); ss.good (); )
+  {
+    tokens.push_back (std::string ());
+    std::getline (ss, tokens.back (), ',');
+  }
+
+  if (tokens.empty ())
+    err_ret ("invalid atomic model option");
+
+  /* The first token must be the atomic model name.  */
+  {
+    for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
+      if (tokens.front () == model_names[i])
+	{
+	  ret.type = (sh_atomic_model::enum_type)i;
+	  ret.name = model_names[i];
+	  ret.cdef_name = model_cdef_names[i];
+	  goto got_mode_name;
+	}
+
+    err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
+got_mode_name:;
+  }
+
+  /* Go through the remaining tokens.  */
+  for (size_t i = 1; i < tokens.size (); ++i)
+    {
+      if (tokens[i] == "strict")
+	ret.strict = true;
+      else if (tokens[i].find ("gbr-offset=") == 0)
+	{
+	  std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
+	  ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
+	  if (offset_str.empty () || ret.tcb_gbr_offset == -1)
+	    err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
+		     "option", offset_str.c_str ());
+	}
+      else
+	err_ret ("unknown parameter \"%s\" in atomic model option",
+		 tokens[i].c_str ());
+    }
+
+  /* Check that the selection makes sense.  */
+  if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
+    err_ret ("atomic operations are not supported on SHmedia");
+
+  if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
+    err_ret ("atomic model %s is only available on SH3 and SH4 targets",
+	     ret.name);
+
+  if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
+    err_ret ("atomic model %s is only available on SH4A targets", ret.name);
+
+  if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
+    err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
+
+  if (ret.type == sh_atomic_model::soft_tcb
+      && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
+          || (ret.tcb_gbr_offset & 3) != 0))
+    err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
+	     "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
+	     ret.name);
+
+  if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
+    err_ret ("cannot use atomic model %s in user mode", ret.name);
+
+  return ret;
+
+#undef err_ret
+}
+
+/* Register SH specific RTL passes.  */
+extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
+					    const char* name);
+extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
+						  const char* name);
+static void
+register_sh_passes (void)
+{
+  if (!TARGET_SH1)
+    return;
+
+/* Running the sh_treg_combine pass after ce1 generates better code when
+   comparisons are combined and reg-reg moves are introduced, because
+   reg-reg moves will be eliminated afterwards.  However, there are quite
+   some cases where combine will be unable to fold comparison related insns,
+   thus for now don't do it.
+  register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
+		 PASS_POS_INSERT_AFTER, "ce1", 1);
+*/
+
+  /* Run sh_treg_combine pass after combine but before register allocation.  */
+  register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
+		 PASS_POS_INSERT_AFTER, "split1", 1);
+
+  /* Run sh_treg_combine pass after register allocation and basic block
+     reordering as this sometimes creates new opportunities.  */
+  register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
+		 PASS_POS_INSERT_AFTER, "split4", 1);
+
+  /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
+     is known after a conditional branch.
+     This must be done after basic blocks and branch conditions have
+     stabilized and won't be changed by further passes.  */
+  register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
+		 PASS_POS_INSERT_BEFORE, "sched2", 1);
+}
+
+/* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override 
+   various options, and do some machine dependent initialization.  */
+static void
+sh_option_override (void)
+{
+  int regno;
+
+  SUBTARGET_OVERRIDE_OPTIONS;
+  if (optimize > 1 && !optimize_size)
+    target_flags |= MASK_SAVE_ALL_TARGET_REGS;
+
+  /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T.  */
+  TARGET_CBRANCHDI4 = 1;
+  TARGET_CMPEQDI_T = 0;
+
+  sh_cpu = PROCESSOR_SH1;
+  assembler_dialect = 0;
+  if (TARGET_SH2)
+    sh_cpu = PROCESSOR_SH2;
+  if (TARGET_SH2E)
+    sh_cpu = PROCESSOR_SH2E;
+  if (TARGET_SH2A)
+    sh_cpu = PROCESSOR_SH2A;
+  if (TARGET_SH3)
+    sh_cpu = PROCESSOR_SH3;
+  if (TARGET_SH3E)
+    sh_cpu = PROCESSOR_SH3E;
+  if (TARGET_SH4)
+    {
+      assembler_dialect = 1;
+      sh_cpu = PROCESSOR_SH4;
+    }
+  if (TARGET_SH4A_ARCH)
+    {
+      assembler_dialect = 1;
+      sh_cpu = PROCESSOR_SH4A;
+    }
+  if (TARGET_SH5)
+    {
+      sh_cpu = PROCESSOR_SH5;
+      target_flags |= MASK_ALIGN_DOUBLE;
+      if (TARGET_SHMEDIA_FPU)
+	target_flags |= MASK_FMOVD;
+      if (TARGET_SHMEDIA)
+	{
+	  /* There are no delay slots on SHmedia.  */
+	  flag_delayed_branch = 0;
+	  /* Relaxation isn't yet supported for SHmedia */
+	  target_flags &= ~MASK_RELAX;
+	  /* After reload, if conversion does little good but can cause
+	     ICEs:
+	     - find_if_block doesn't do anything for SH because we don't
+	       have conditional execution patterns.  (We use conditional
+	       move patterns, which are handled differently, and only
+	       before reload).
+	     - find_cond_trap doesn't do anything for the SH because we
+	       don't have conditional traps.
+	     - find_if_case_1 uses redirect_edge_and_branch_force in
+	       the only path that does an optimization, and this causes
+	       an ICE when branch targets are in registers.
+	     - find_if_case_2 doesn't do anything for the SHmedia after
+	       reload except when it can redirect a tablejump - and
+	       that's rather rare.  */
+	  flag_if_conversion2 = 0;
+	  if (! strcmp (sh_div_str, "call"))
+	    sh_div_strategy = SH_DIV_CALL;
+	  else if (! strcmp (sh_div_str, "call2"))
+	    sh_div_strategy = SH_DIV_CALL2;
+	  if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
+	    sh_div_strategy = SH_DIV_FP;
+	  else if (! strcmp (sh_div_str, "inv"))
+	    sh_div_strategy = SH_DIV_INV;
+	  else if (! strcmp (sh_div_str, "inv:minlat"))
+	    sh_div_strategy = SH_DIV_INV_MINLAT;
+	  else if (! strcmp (sh_div_str, "inv20u"))
+	    sh_div_strategy = SH_DIV_INV20U;
+	  else if (! strcmp (sh_div_str, "inv20l"))
+	    sh_div_strategy = SH_DIV_INV20L;
+	  else if (! strcmp (sh_div_str, "inv:call2"))
+	    sh_div_strategy = SH_DIV_INV_CALL2;
+	  else if (! strcmp (sh_div_str, "inv:call"))
+	    sh_div_strategy = SH_DIV_INV_CALL;
+	  else if (! strcmp (sh_div_str, "inv:fp"))
+	    {
+	      if (TARGET_FPU_ANY)
+		sh_div_strategy = SH_DIV_INV_FP;
+	      else
+		sh_div_strategy = SH_DIV_INV;
+	    }
+	  TARGET_CBRANCHDI4 = 0;
+	  /* Assembler CFI isn't yet fully supported for SHmedia.  */
+	  flag_dwarf2_cfi_asm = 0;
+	}
+    }
+  else
+    {
+       /* Only the sh64-elf assembler fully supports .quad properly.  */
+       targetm.asm_out.aligned_op.di = NULL;
+       targetm.asm_out.unaligned_op.di = NULL;
+    }
+  if (TARGET_SH1)
+    {
+      if (! strcmp (sh_div_str, "call-div1"))
+	sh_div_strategy = SH_DIV_CALL_DIV1;
+      else if (! strcmp (sh_div_str, "call-fp")
+	       && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
+		   || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
+	sh_div_strategy = SH_DIV_CALL_FP;
+      else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
+	sh_div_strategy = SH_DIV_CALL_TABLE;
+      else
+	/* Pick one that makes most sense for the target in general.
+	   It is not much good to use different functions depending
+	   on -Os, since then we'll end up with two different functions
+	   when some of the code is compiled for size, and some for
+	   speed.  */
+
+	/* SH4 tends to emphasize speed.  */
+	if (TARGET_HARD_SH4)
+	  sh_div_strategy = SH_DIV_CALL_TABLE;
+	/* These have their own way of doing things.  */
+	else if (TARGET_SH2A)
+	  sh_div_strategy = SH_DIV_INTRINSIC;
+	/* ??? Should we use the integer SHmedia function instead?  */
+	else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
+	  sh_div_strategy = SH_DIV_CALL_FP;
+	/* SH1 .. SH3 cores often go into small-footprint systems, so
+	   default to the smallest implementation available.  */
+	else
+	  sh_div_strategy = SH_DIV_CALL_DIV1;
+    }
+  if (!TARGET_SH1)
+    TARGET_PRETEND_CMOVE = 0;
+  if (sh_divsi3_libfunc[0])
+    ; /* User supplied - leave it alone.  */
+  else if (TARGET_DIVIDE_CALL_FP)
+    sh_divsi3_libfunc = "__sdivsi3_i4";
+  else if (TARGET_DIVIDE_CALL_TABLE)
+    sh_divsi3_libfunc = "__sdivsi3_i4i";
+  else if (TARGET_SH5)
+    sh_divsi3_libfunc = "__sdivsi3_1";
+  else
+    sh_divsi3_libfunc = "__sdivsi3";
+  if (sh_branch_cost == -1)
+    {
+      sh_branch_cost = 1;
+
+      /*  The SH1 does not have delay slots, hence we get a pipeline stall
+	  at every branch.  The SH4 is superscalar, so the single delay slot
+	  is not sufficient to keep both pipelines filled.  */
+      if (! TARGET_SH2 || TARGET_HARD_SH4)
+	sh_branch_cost = 2;
+    }
+
+  /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user.  */
+  if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
+    TARGET_ZDCBRANCH = 1;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (! VALID_REGISTER_P (regno))
+      sh_register_names[regno][0] = '\0';
+
+  for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
+    if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
+      sh_additional_register_names[regno][0] = '\0';
+
+  if ((flag_pic && ! TARGET_PREFERGOT)
+      || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
+    flag_no_function_cse = 1;
+
+  if (targetm.small_register_classes_for_mode_p (VOIDmode))
+    {
+      /* Never run scheduling before reload, since that can
+	 break global alloc, and generates slower code anyway due
+	 to the pressure on R0.  */
+      /* Enable sched1 for SH4 if the user explicitly requests.
+	 When sched1 is enabled, the ready queue will be reordered by
+	 the target hooks if pressure is high.  We can not do this for
+	 PIC, SH3 and lower as they give spill failures for R0.  */
+      if (!TARGET_HARD_SH4 || flag_pic)
+	flag_schedule_insns = 0;
+      /* ??? Current exception handling places basic block boundaries
+	 after call_insns.  It causes the high pressure on R0 and gives
+	 spill failures for R0 in reload.  See PR 22553 and the thread
+	 on gcc-patches
+	 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>.  */
+      else if (flag_exceptions)
+	{
+	  if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
+	    warning (0, "ignoring -fschedule-insns because of exception "
+			"handling bug");
+	  flag_schedule_insns = 0;
+	}
+      else if (flag_schedule_insns
+	       && !global_options_set.x_flag_schedule_insns)
+	flag_schedule_insns = 0;
+    }
+
+  /* Unwind info is not correct around the CFG unless either a frame
+     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
+     unwind info generation to be aware of the CFG and propagating states
+     around edges.  */
+  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+       || flag_exceptions || flag_non_call_exceptions)
+      && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
+    {
+      warning (0, "unwind tables currently require either a frame pointer "
+	       "or -maccumulate-outgoing-args for correctness");
+      TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
+    }
+
+  /* Unwinding with -freorder-blocks-and-partition does not work on this
+     architecture, because it requires far jumps to label crossing between
+     hot/cold sections which are rejected on this architecture.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      if (flag_exceptions)
+	{
+	  inform (input_location, 
+		  "-freorder-blocks-and-partition does not work with "
+		  "exceptions on this architecture");
+	  flag_reorder_blocks_and_partition = 0;
+	  flag_reorder_blocks = 1;
+	}
+      else if (flag_unwind_tables)
+	{
+	  inform (input_location,
+		  "-freorder-blocks-and-partition does not support unwind "
+		  "info on this architecture");
+	  flag_reorder_blocks_and_partition = 0;
+	  flag_reorder_blocks = 1;
+	}
+    }
+
+  /*  Adjust loop, jump and function alignment values (in bytes), if those
+      were not specified by the user using -falign-loops, -falign-jumps
+      and -falign-functions options.
+      32 bit alignment is better for speed, because instructions can be
+      fetched as a pair from a longword boundary.  For size use 16 bit
+      alignment to get more compact code.
+      Aligning all jumps increases the code size, even if it might
+      result in slightly faster code.  Thus, it is set to the smallest 
+      alignment possible if not specified by the user.  */
+  if (align_loops == 0)
+    {
+      if (TARGET_SH5)
+	align_loops = 8;
+      else
+	align_loops = optimize_size ? 2 : 4;
+    }
+
+  if (align_jumps == 0)
+    {
+      if (TARGET_SHMEDIA)
+	align_jumps = 1 << CACHE_LOG;
+      else
+	align_jumps = 2;
+    }
+  else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
+    align_jumps = TARGET_SHMEDIA ? 4 : 2;
+
+  if (align_functions == 0)
+    {
+      if (TARGET_SHMEDIA)
+	align_functions = optimize_size
+			  ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+      else
+	align_functions = optimize_size ? 2 : 4;
+    }
+
+  /* The linker relaxation code breaks when a function contains
+     alignments that are larger than that at the start of a
+     compilation unit.  */
+  if (TARGET_RELAX)
+    {
+      int min_align = align_loops > align_jumps ? align_loops : align_jumps;
+
+      /* Also take possible .long constants / mova tables into account.	*/
+      if (min_align < 4)
+	min_align = 4;
+      if (align_functions < min_align)
+	align_functions = min_align;
+    }
+
+  if (flag_unsafe_math_optimizations)
+    {
+      /* Enable fsca insn for SH4A if not otherwise specified by the user.  */
+      if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
+	TARGET_FSCA = 1;
+
+      /* Enable fsrra insn for SH4A if not otherwise specified by the user.  */
+      if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
+	TARGET_FSRRA = 1;
+    }
+
+  /*  Allow fsrra insn only if -funsafe-math-optimizations and
+      -ffinite-math-only is enabled.  */
+  TARGET_FSRRA = TARGET_FSRRA
+		 && flag_unsafe_math_optimizations
+		 && flag_finite_math_only;
+
+  /* If the -mieee option was not explicitly set by the user, turn it on
+     unless -ffinite-math-only was specified.  See also PR 33135.  */
+  if (! global_options_set.x_TARGET_IEEE)
+    TARGET_IEEE = ! flag_finite_math_only;
+
+  if (sh_fixed_range_str)
+    sh_fix_range (sh_fixed_range_str);
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* Parse atomic model option and make sure it is valid for the current
+     target CPU.  */
+  selected_atomic_model_
+    = parse_validate_atomic_model_option (sh_atomic_model_str);
+
+  register_sh_passes ();
+}
+
+/* Print the operand address in x to the stream.  */
+static void
+sh_print_operand_address (FILE *stream, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      fprintf (stream, "@%s", reg_names[true_regnum (x)]);
+      break;
+
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx index = XEXP (x, 1);
+
+	switch (GET_CODE (index))
+	  {
+	  case CONST_INT:
+	    fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
+		     reg_names[true_regnum (base)]);
+	    break;
+
+	  case REG:
+	  case SUBREG:
+	    {
+	      int base_num = true_regnum (base);
+	      int index_num = true_regnum (index);
+
+	      fprintf (stream, "@(r0,%s)",
+		       reg_names[MAX (base_num, index_num)]);
+	      break;
+	    }
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+      break;
+
+    case PRE_DEC:
+      fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    default:
+      x = mark_constant_pool_use (x);
+      output_addr_const (stream, x);
+      break;
+    }
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+   according to modifier code.
+
+   '.'  print a .s if insn needs delay slot
+   ','  print LOCAL_LABEL_PREFIX
+   '@'  print trap, rte or rts depending upon pragma interruptness
+   '#'  output a nop if there is nothing to put in the delay slot
+   '''  print likelihood suffix (/u for unlikely).
+   '>'  print branch target if -fverbose-asm
+   'O'  print a constant without the #
+   'R'  print the LSW of a dp value - changes if in little endian
+   'S'  print the MSW of a dp value - changes if in little endian
+   'T'  print the next word of a dp value - same as 'R' in big endian mode.
+   'M'  SHMEDIA: print an `x' if `m' will print `base,index'.
+        otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
+   'N'  print 'r63' if the operand is (const_int 0).
+   'd'  print a V2SF reg as dN instead of fpN.
+   'm'  print a pair `base,offset' or `base,index', for LD and ST.
+   'U'  Likewise for {LD,ST}{HI,LO}.
+   'V'  print the position of a single bit set.
+   'W'  print the position of a single bit cleared.
+   't'  print a memory address which is a register.
+   'u'  prints the lowest 16 bits of CONST_INT, as an unsigned value.
+   'o'  output an operator.  */
+static void
+sh_print_operand (FILE *stream, rtx x, int code)
+{
+  int regno;
+  enum machine_mode mode;
+
+  switch (code)
+    {
+      tree trapa_attr;
+
+    case '.':
+      if (final_sequence
+	  && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
+	  && get_attr_length (XVECEXP (final_sequence, 0, 1)))
+	fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+      break;
+    case ',':
+      fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
+      break;
+    case '@':
+      trapa_attr = lookup_attribute ("trap_exit",
+				      DECL_ATTRIBUTES (current_function_decl));
+      if (trapa_attr)
+	fprintf (stream, "trapa	#%ld",
+		 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
+      else if (sh_cfun_interrupt_handler_p ())
+	{
+	  if (sh_cfun_resbank_handler_p ())
+	    fprintf (stream, "resbank\n");
+	  fprintf (stream, "rte");
+	}
+      else
+	fprintf (stream, "rts");
+      break;
+    case '#':
+      /* Output a nop if there's nothing in the delay slot.  */
+      if (dbr_sequence_length () == 0)
+	fprintf (stream, "\n\tnop");
+      break;
+    case '\'':
+      {
+	rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+
+	if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
+	  fputs ("/u", stream);
+	break;
+      }
+    case '>':
+      if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
+	{
+	  fputs ("\t! target: ", stream);
+	  output_addr_const (stream, JUMP_LABEL (current_output_insn));
+	}
+      break;
+    case 'O':
+      x = mark_constant_pool_use (x);
+      output_addr_const (stream, x);
+      break;
+    /* N.B.: %R / %S / %T adjust memory addresses by four.
+       For SHMEDIA, that means they can be used to access the first and
+       second 32 bit part of a 64 bit (or larger) value that
+       might be held in floating point registers or memory.
+       While they can be used to access 64 bit parts of a larger value
+       held in general purpose registers, that won't work with memory -
+       neither for fp registers, since the frxx names are used.  */
+    case 'R':
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	{
+	  regno = true_regnum (x);
+	  regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
+	  fputs (reg_names[regno], (stream));
+	}
+      else if (MEM_P (x))
+	{
+	  x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	}
+      else
+	{
+	  rtx sub = NULL_RTX;
+
+	  mode = GET_MODE (x);
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  if (GET_MODE_SIZE (mode) >= 8)
+	    sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
+	  if (sub)
+	    sh_print_operand (stream, sub, 0);
+	  else
+	    output_operand_lossage ("invalid operand to %%R");
+	}
+      break;
+    case 'S':
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	{
+	  regno = true_regnum (x);
+	  regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
+	  fputs (reg_names[regno], (stream));
+	}
+      else if (MEM_P (x))
+	{
+	  x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	}
+      else
+	{
+	  rtx sub = NULL_RTX;
+
+	  mode = GET_MODE (x);
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  if (GET_MODE_SIZE (mode) >= 8)
+	    sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
+	  if (sub)
+	    sh_print_operand (stream, sub, 0);
+	  else
+	    output_operand_lossage ("invalid operand to %%S");
+	}
+      break;
+    case 'T':
+      /* Next word of a double.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], (stream));
+	  break;
+	case MEM:
+	  if (GET_CODE (XEXP (x, 0)) != PRE_DEC
+	      && GET_CODE (XEXP (x, 0)) != POST_INC)
+	    x = adjust_address (x, SImode, 4);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 't':
+      gcc_assert (MEM_P (x));
+      x = XEXP (x, 0);
+      switch (GET_CODE (x))
+	{
+	case REG:
+	case SUBREG:
+	  sh_print_operand (stream, x, 0);
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 'o':
+      switch (GET_CODE (x))
+	{
+	case PLUS:  fputs ("add", stream); break;
+	case MINUS: fputs ("sub", stream); break;
+	case MULT:  fputs ("mul", stream); break;
+	case DIV:   fputs ("div", stream); break;
+	case EQ:    fputs ("eq",  stream); break;
+	case NE:    fputs ("ne",  stream); break;
+	case GT:  case LT:  fputs ("gt",  stream); break;
+	case GE:  case LE:  fputs ("ge",  stream); break;
+	case GTU: case LTU: fputs ("gtu", stream); break;
+	case GEU: case LEU: fputs ("geu", stream); break;
+	default:
+	  break;
+	}
+      break;
+    case 'M':
+      if (TARGET_SHMEDIA)
+	{
+	  if (MEM_P (x)
+	      && GET_CODE (XEXP (x, 0)) == PLUS
+	      && (REG_P (XEXP (XEXP (x, 0), 1))
+		  || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
+	    fputc ('x', stream);
+	}
+      else
+	{
+	  if (MEM_P (x))
+	    {
+	      switch (GET_MODE (x))
+		{
+		case QImode: fputs (".b", stream); break;
+		case HImode: fputs (".w", stream); break;
+		case SImode: fputs (".l", stream); break;
+		case SFmode: fputs (".s", stream); break;
+		case DFmode: fputs (".d", stream); break;
+		default: gcc_unreachable ();
+		}
+	    }
+	}
+      break;
+
+    case 'm':
+      gcc_assert (MEM_P (x));
+      x = XEXP (x, 0);
+      /* Fall through.  */
+    case 'U':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	case SUBREG:
+	  sh_print_operand (stream, x, 0);
+	  fputs (", 0", stream);
+	  break;
+
+	case PLUS:
+	  sh_print_operand (stream, XEXP (x, 0), 0);
+	  fputs (", ", stream);
+	  sh_print_operand (stream, XEXP (x, 1), 0);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'V':
+      {
+	int num = exact_log2 (INTVAL (x));
+	gcc_assert (num >= 0);
+	fprintf (stream, "#%d", num);
+      }
+      break;
+
+    case 'W':
+      {
+	int num = exact_log2 (~INTVAL (x));
+	gcc_assert (num >= 0);
+	fprintf (stream, "#%d", num);
+      }
+      break;
+
+    case 'd':
+      gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
+
+      fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
+      break;
+
+    case 'N':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	{
+	  fprintf ((stream), "r63");
+	  break;
+	}
+      goto default_output;
+    case 'u':
+      if (CONST_INT_P (x))
+	{
+	  fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
+	  break;
+	}
+      /* Fall through.  */
+
+    default_output:
+    default:
+      regno = 0;
+      mode = GET_MODE (x);
+
+      switch (GET_CODE (x))
+	{
+	case TRUNCATE:
+	  {
+	    rtx inner = XEXP (x, 0);
+	    int offset = 0;
+	    enum machine_mode inner_mode;
+
+	    /* We might see SUBREGs with vector mode registers inside.  */
+	    if (GET_CODE (inner) == SUBREG
+		&& (GET_MODE_SIZE (GET_MODE (inner))
+		    == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+		&& subreg_lowpart_p (inner))
+	      inner = SUBREG_REG (inner);
+	    if (CONST_INT_P (inner))
+	      {
+		x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
+		goto default_output;
+	      }
+	    inner_mode = GET_MODE (inner);
+	    if (GET_CODE (inner) == SUBREG
+		&& (GET_MODE_SIZE (GET_MODE (inner))
+		    < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+		&& REG_P (SUBREG_REG (inner)))
+	      {
+		offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
+					      GET_MODE (SUBREG_REG (inner)),
+					      SUBREG_BYTE (inner),
+					      GET_MODE (inner));
+		inner = SUBREG_REG (inner);
+	      }
+	    if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
+	      abort ();
+	    /* Floating point register pairs are always big endian;
+	       general purpose registers are 64 bit wide.  */
+	    regno = REGNO (inner);
+	    regno = (HARD_REGNO_NREGS (regno, inner_mode)
+		     - HARD_REGNO_NREGS (regno, mode))
+		     + offset;
+	    x = inner;
+	    goto reg;
+	  }
+	case SIGN_EXTEND:
+	  x = XEXP (x, 0);
+	  goto reg;
+	  /* FIXME: We need this on SHmedia32 because reload generates
+	     some sign-extended HI or QI loads into DImode registers
+	     but, because Pmode is SImode, the address ends up with a
+	     subreg:SI of the DImode register.  Maybe reload should be
+	     fixed so as to apply alter_subreg to such loads?  */
+	case IF_THEN_ELSE:
+	  gcc_assert (trapping_target_operand (x, VOIDmode));
+	  x = XEXP (XEXP (x, 2), 0);
+	  goto default_output;
+	case SUBREG:
+	  gcc_assert (SUBREG_BYTE (x) == 0
+		      && REG_P (SUBREG_REG (x)));
+
+	  x = SUBREG_REG (x);
+	  /* Fall through.  */
+
+	reg:
+	case REG:
+	  regno += REGNO (x);
+	  if (FP_REGISTER_P (regno)
+	      && mode == V16SFmode)
+	    fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
+	  else if (FP_REGISTER_P (REGNO (x))
+		   && mode == V4SFmode)
+	    fprintf ((stream), "fv%s", reg_names[regno] + 2);
+	  else if (REG_P (x)
+		   && mode == V2SFmode)
+	    fprintf ((stream), "fp%s", reg_names[regno] + 2);
+	  else if (FP_REGISTER_P (REGNO (x))
+		   && GET_MODE_SIZE (mode) > 4)
+	    fprintf ((stream), "d%s", reg_names[regno] + 1);
+	  else
+	    fputs (reg_names[regno], (stream));
+	  break;
+
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  break;
+
+	default:
+	  if (TARGET_SH1)
+	    fputc ('#', stream);
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+static bool
+sh_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '@' || code == ','
+	  || code == '$' || code == '\'' || code == '>');
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+static bool
+sh_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_DATALABEL:
+	  fputs ("datalabel ", file);
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_PIC:
+	  /* GLOBAL_OFFSET_TABLE or local symbols, no suffix.  */
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_GOTPLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTPLT", file);
+	  break;
+	case UNSPEC_DTPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@DTPOFF", file);
+	  break;
+	case UNSPEC_GOTTPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTTPOFF", file);
+	  break;
+	case UNSPEC_TPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", file);
+	  break;
+	case UNSPEC_CALLER:
+	  {
+	    char name[32];
+	    /* LPCS stands for Label for PIC Call Site.  */
+	    targetm.asm_out.generate_internal_label (name, "LPCS",
+						     INTVAL (XVECEXP (x, 0, 0)));
+	    assemble_name (file, name);
+	  }
+	  break;
+	case UNSPEC_EXTRACT_S16:
+	case UNSPEC_EXTRACT_U16:
+	  {
+	    rtx val, shift;
+
+	    val = XVECEXP (x, 0, 0);
+	    shift = XVECEXP (x, 0, 1);
+	    fputc ('(', file);
+	    if (shift != const0_rtx)
+		fputc ('(', file);
+	    if (GET_CODE (val) == CONST
+		|| GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
+	      {
+		fputc ('(', file);
+		output_addr_const (file, val);
+		fputc (')', file);
+	      }
+	    else
+	      output_addr_const (file, val);
+	    if (shift != const0_rtx)
+	      {
+		fputs (" >> ", file);
+		output_addr_const (file, shift);
+		fputc (')', file);
+	      }
+	    fputs (" & 65535)", file);
+	  }
+	  break;
+	case UNSPEC_SYMOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputc ('-', file);
+	  if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
+	    {
+	      fputc ('(', file);
+	      output_addr_const (file, XVECEXP (x, 0, 1));
+	      fputc (')', file);
+	    }
+	  else
+	    output_addr_const (file, XVECEXP (x, 0, 1));
+	  break;
+	case UNSPEC_PCREL_SYMOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("-(", file);
+	  output_addr_const (file, XVECEXP (x, 0, 1));
+	  fputs ("-.)", file);
+	  break;
+	default:
+	  return false;
+	}
+      return true;
+    }
+  else
+    return false;
+}
+
+/* Encode symbol attributes of a SYMBOL_REF into its
+   SYMBOL_REF_FLAGS.  */
+static void
+sh_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && sh2a_function_vector_p (decl) && TARGET_SH2A)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
+}
+
+/* Prepare operands for a move define_expand; specifically, one of the
+   operands must be in a register.  */
+void
+prepare_move_operands (rtx operands[], enum machine_mode mode)
+{
+  if ((mode == SImode || mode == DImode)
+      && flag_pic
+      && ! ((mode == Pmode || mode == ptr_mode)
+	    && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
+    {
+      rtx temp;
+      if (SYMBOLIC_CONST_P (operands[1]))
+	{
+	  if (MEM_P (operands[0]))
+	    operands[1] = force_reg (Pmode, operands[1]);
+	  else if (TARGET_SHMEDIA
+		   && GET_CODE (operands[1]) == LABEL_REF
+		   && target_reg_operand (operands[0], mode))
+	    /* It's ok.  */;
+	  else
+	    {
+	      temp = (!can_create_pseudo_p ()
+		      ? operands[0]
+		      : gen_reg_rtx (Pmode));
+	      operands[1] = legitimize_pic_address (operands[1], mode, temp);
+	    }
+	}
+      else if (GET_CODE (operands[1]) == CONST
+	       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
+	{
+	  temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+	  temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
+					 mode, temp);
+	  operands[1] = expand_binop (mode, add_optab, temp,
+				      XEXP (XEXP (operands[1], 0), 1),
+				      (!can_create_pseudo_p ()
+				       ? temp
+				       : gen_reg_rtx (Pmode)),
+				      0, OPTAB_LIB_WIDEN);
+	}
+    }
+
+  if (! reload_in_progress && ! reload_completed)
+    {
+      /* Copy the source to a register if both operands aren't registers.  */
+      if (! register_operand (operands[0], mode)
+	  && ! sh_register_operand (operands[1], mode))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+
+      if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
+	{
+	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
+	     except that we can't use that function because it is static.  */
+	  rtx new_rtx = change_address (operands[0], mode, 0);
+	  MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
+	  operands[0] = new_rtx;
+	}
+
+      /* This case can happen while generating code to move the result
+	 of a library call to the target.  Reject `st r0,@(rX,rY)' because
+	 reload will fail to find a spill register for rX, since r0 is already
+	 being used for the source.  */
+      else if (TARGET_SH1
+	       && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
+	       && MEM_P (operands[0])
+	       && GET_CODE (XEXP (operands[0], 0)) == PLUS
+	       && REG_P (XEXP (XEXP (operands[0], 0), 1)))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+    }
+
+  if (mode == Pmode || mode == ptr_mode)
+    {
+      rtx op0, op1, opc;
+      enum tls_model tls_kind;
+
+      op0 = operands[0];
+      op1 = operands[1];
+      if (GET_CODE (op1) == CONST
+	  && GET_CODE (XEXP (op1, 0)) == PLUS
+	  && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
+	      != TLS_MODEL_NONE))
+	{
+	  opc = XEXP (XEXP (op1, 0), 1);
+	  op1 = XEXP (XEXP (op1, 0), 0);
+	}
+      else
+	opc = NULL_RTX;
+
+      if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
+	{
+	  rtx tga_op1, tga_ret, tmp, tmp2;
+
+	  if (! flag_pic
+	      && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
+		  || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
+		  || tls_kind == TLS_MODEL_INITIAL_EXEC))
+	    {
+	      /* Don't schedule insns for getting GOT address when
+		 the first scheduling is enabled, to avoid spill
+		 failures for R0.  */
+	      if (flag_schedule_insns)
+		emit_insn (gen_blockage ());
+	      emit_insn (gen_GOTaddr2picreg ());
+	      emit_use (gen_rtx_REG (SImode, PIC_REG));
+	      if (flag_schedule_insns)
+		emit_insn (gen_blockage ());
+	}
+
+	  switch (tls_kind)
+	    {
+	    case TLS_MODEL_GLOBAL_DYNAMIC:
+	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_move_insn (tmp, tga_ret);
+	      op1 = tmp;
+	      break;
+
+	    case TLS_MODEL_LOCAL_DYNAMIC:
+	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
+
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_move_insn (tmp, tga_ret);
+
+	      if (register_operand (op0, Pmode))
+		tmp2 = op0;
+	      else
+		tmp2 = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
+	      op1 = tmp2;
+	      break;
+
+	    case TLS_MODEL_INITIAL_EXEC:
+	      tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
+	      tmp = gen_sym2GOTTPOFF (op1);
+	      emit_insn (gen_tls_initial_exec (tga_op1, tmp));
+	      op1 = tga_op1;
+	      break;
+
+	    case TLS_MODEL_LOCAL_EXEC:
+	      tmp2 = gen_reg_rtx (Pmode);
+	      emit_insn (gen_store_gbr (tmp2));
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_insn (gen_symTPOFF2reg (tmp, op1));
+
+	      if (register_operand (op0, Pmode))
+		op1 = op0;
+	      else
+		op1 = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_addsi3 (op1, tmp, tmp2));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  if (opc)
+	    emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
+	  operands[1] = op1;
+	}
+    }
+}
+
+/* Implement the canonicalize_comparison target hook for the combine
+   pass.  For the target hook this function is invoked via
+   sh_canonicalize_comparison.  This function is also re-used to
+   canonicalize comparisons in cbranch pattern expanders.  */
+static void
+sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
+			    enum machine_mode mode,
+			    bool op0_preserve_value)
+{
+  /* When invoked from within the combine pass the mode is not specified,
+     so try to get it from one of the operands.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
+
+  // We need to have a mode to do something useful here.
+  if (mode == VOIDmode)
+    return;
+
+  // Currently, we don't deal with floats here.
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return;
+
+  // Make sure that the constant operand is the second operand.
+  if (CONST_INT_P (op0) && !CONST_INT_P (op1))
+    {
+      if (op0_preserve_value)
+	return;
+
+      std::swap (op0, op1);
+      cmp = swap_condition (cmp);
+    }
+
+  if (CONST_INT_P (op1))
+    {
+      /* Try to adjust the constant operand in such a way that available
+	 comparison insns can be utilized better and the constant can be
+	 loaded with a 'mov #imm,Rm' insn.  This avoids a load from the
+	 constant pool.  */
+      const HOST_WIDE_INT val = INTVAL (op1);
+
+      /* x > -1		  --> x >= 0
+	 x > 0xFFFFFF7F	  --> x >= 0xFFFFFF80
+	 x <= -1	  --> x < 0
+	 x <= 0xFFFFFF7F  --> x < 0xFFFFFF80  */
+      if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
+	{
+	  cmp = cmp == GT ? GE : LT;
+	  op1 = gen_int_mode (val + 1, mode);
+        }
+
+      /* x >= 1     --> x > 0
+	 x >= 0x80  --> x > 0x7F
+	 x < 1      --> x <= 0
+	 x < 0x80   --> x <= 0x7F  */
+      else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
+	{
+	  cmp = cmp == GE ? GT : LE;
+	  op1 = gen_int_mode (val - 1, mode);
+	}
+
+      /* unsigned x >= 1  --> x != 0
+	 unsigned x < 1   --> x == 0  */
+      else if (val == 1 && (cmp == GEU || cmp == LTU))
+	{
+	  cmp = cmp == GEU ? NE : EQ;
+	  op1 = CONST0_RTX (mode);
+	}
+
+      /* unsigned x >= 0x80  --> unsigned x > 0x7F
+	 unsigned x < 0x80   --> unsigned x < 0x7F  */
+      else if (val == 0x80 && (cmp == GEU || cmp == LTU))
+	{
+	  cmp = cmp == GEU ? GTU : LEU;
+	  op1 = gen_int_mode (val - 1, mode);
+	}
+
+      /* unsigned x > 0   --> x != 0
+	 unsigned x <= 0  --> x == 0  */
+      else if (val == 0 && (cmp == GTU || cmp == LEU))
+	cmp = cmp == GTU ? NE : EQ;
+
+      /* unsigned x > 0x7FFFFFFF   --> signed x < 0
+	 unsigned x <= 0x7FFFFFFF  --> signed x >= 0  */
+      else if (mode == SImode && (cmp == GTU || cmp == LEU)
+	       && val == 0x7FFFFFFF)
+	{
+	  cmp = cmp == GTU ? LT : GE;
+	  op1 = const0_rtx;
+	}
+
+      /* unsigned x >= 0x80000000  --> signed x < 0
+	 unsigned x < 0x80000000   --> signed x >= 0  */
+      else if (mode == SImode && (cmp == GEU || cmp == LTU)
+	       && (unsigned HOST_WIDE_INT)val
+		   == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
+	{
+	  cmp = cmp == GEU ? LT : GE;
+	  op1 = const0_rtx;
+	}
+    }
+}
+
+/* This function implements the canonicalize_comparison target hook.
+   This wrapper around the internally used sh_canonicalize_comparison
+   function is needed to do the enum rtx_code <-> int conversion.
+   Target hooks cannot use enum rtx_code in its definition.  */
+static void
+sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			    bool op0_preserve_value)
+{
+  enum rtx_code tmp_code = (enum rtx_code)*code;
+  sh_canonicalize_comparison (tmp_code, *op0, *op1,
+			      VOIDmode, op0_preserve_value);
+  *code = (int)tmp_code;
+}
+
+bool
+sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
+{
+  *p1 = T_REG;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+enum rtx_code
+prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
+			  enum rtx_code comparison)
+{
+  /* The scratch reg is only available when this is invoked from within
+     the cbranchdi4_i splitter, through expand_cbranchdi4.  */
+  rtx scratch = NULL_RTX;
+
+  if (comparison == LAST_AND_UNUSED_RTX_CODE)
+    comparison = GET_CODE (operands[0]);
+  else
+    scratch = operands[4];
+
+  sh_canonicalize_comparison (comparison, operands[1], operands[2],
+			      mode, false);
+
+  /* Notice that this function is also invoked after reload by
+     the cbranchdi4_i pattern, through expand_cbranchdi4.  */
+  rtx op1 = operands[1];
+
+  if (can_create_pseudo_p ())
+    operands[1] = force_reg (mode, op1);
+  /* When we are handling DImode comparisons, we want to keep constants so
+     that we can optimize the component comparisons; however, memory loads
+     are better issued as a whole so that they can be scheduled well.
+     SImode equality comparisons allow I08 constants, but only when they
+     compare r0.  Hence, if operands[1] has to be loaded from somewhere else
+     into a register, that register might as well be r0, and we allow the
+     constant.  If it is already in a register, this is likely to be
+     allocated to a different hard register, thus we load the constant into
+     a register unless it is zero.  */
+  if (!REG_P (operands[2])
+      && (!CONST_INT_P (operands[2])
+	  || (mode == SImode && operands[2] != CONST0_RTX (SImode)
+	      && ((comparison != EQ && comparison != NE)
+		  || (REG_P (op1) && REGNO (op1) != R0_REG)
+		  || !satisfies_constraint_I08 (operands[2])))))
+    {
+      if (scratch && GET_MODE (scratch) == mode)
+	{
+	  emit_move_insn (scratch, operands[2]);
+	  operands[2] = scratch;
+	}
+      else if (can_create_pseudo_p ())
+	operands[2] = force_reg (mode, operands[2]);
+    }
+  return comparison;
+}
+
+void
+expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
+{
+  rtx (*branch_expander) (rtx) = gen_branch_true;
+  comparison = prepare_cbranch_operands (operands, SImode, comparison);
+  switch (comparison)
+    {
+    case NE: case LT: case LE: case LTU: case LEU:
+      comparison = reverse_condition (comparison);
+      branch_expander = gen_branch_false;
+    default: ;
+    }
+  emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
+			  gen_rtx_fmt_ee (comparison, SImode,
+					  operands[1], operands[2])));
+  rtx jump = emit_jump_insn (branch_expander (operands[3]));
+  if (probability >= 0)
+    add_int_reg_note (jump, REG_BR_PROB, probability);
+}
+
+/* ??? How should we distribute probabilities when more than one branch
+   is generated.  So far we only have some ad-hoc observations:
+   - If the operands are random, they are likely to differ in both parts.
+   - If comparing items in a hash chain, the operands are random or equal;
+     operation should be EQ or NE.
+   - If items are searched in an ordered tree from the root, we can expect
+     the highpart to be unequal about half of the time; operation should be
+     an inequality comparison, operands non-constant, and overall probability
+     about 50%.  Likewise for quicksort.
+   - Range checks will be often made against constants.  Even if we assume for
+     simplicity an even distribution of the non-constant operand over a
+     sub-range here, the same probability could be generated with differently
+     wide sub-ranges - as long as the ratio of the part of the subrange that
+     is before the threshold to the part that comes after the threshold stays
+     the same.  Thus, we can't really tell anything here;
+     assuming random distribution is at least simple.
+ */
+bool
+expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
+{
+  enum rtx_code msw_taken, msw_skip, lsw_taken;
+  rtx skip_label = NULL_RTX;
+  rtx op1h, op1l, op2h, op2l;
+  int num_branches;
+  int prob, rev_prob;
+  int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
+  rtx scratch = operands[4];
+
+  comparison = prepare_cbranch_operands (operands, DImode, comparison);
+  op1h = gen_highpart_mode (SImode, DImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
+  prob = split_branch_probability;
+  rev_prob = REG_BR_PROB_BASE - prob;
+  switch (comparison)
+    {
+    /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
+       That costs 1 cycle more when the first branch can be predicted taken,
+       but saves us mispredicts because only one branch needs prediction.
+       It also enables generating the cmpeqdi_t-1 pattern.  */
+    case EQ:
+      if (TARGET_CMPEQDI_T)
+	{
+	  emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+	  emit_jump_insn (gen_branch_true (operands[3]));
+	  return true;
+	}
+      msw_skip = NE;
+      lsw_taken = EQ;
+      if (prob >= 0)
+	{
+	  // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
+	  msw_skip_prob = rev_prob;
+	  if (REG_BR_PROB_BASE <= 65535)
+	    lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
+	  else
+	    {
+	      gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
+	      lsw_taken_prob
+		= (prob
+		   ? (REG_BR_PROB_BASE
+		      - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
+			 / ((HOST_WIDEST_INT) prob << 32)))
+		   : 0);
+	    }
+	}
+      break;
+    case NE:
+      if (TARGET_CMPEQDI_T)
+	{
+	  emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+	  emit_jump_insn (gen_branch_false (operands[3]));
+	  return true;
+	}
+      msw_taken = NE;
+      msw_taken_prob = prob;
+      lsw_taken = NE;
+      lsw_taken_prob = 0;
+      break;
+    case GTU: case GT:
+      msw_taken = comparison;
+      if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
+	break;
+      if (comparison != GTU || op2h != CONST0_RTX (SImode))
+	msw_skip = swap_condition (msw_taken);
+      lsw_taken = GTU;
+      break;
+    case GEU: case GE:
+      if (op2l == CONST0_RTX (SImode))
+	msw_taken = comparison;
+      else
+	{
+	  msw_taken = comparison == GE ? GT : GTU;
+	  msw_skip = swap_condition (msw_taken);
+	  lsw_taken = GEU;
+	}
+      break;
+    case LTU: case LT:
+      msw_taken = comparison;
+      if (op2l == CONST0_RTX (SImode))
+	break;
+      msw_skip = swap_condition (msw_taken);
+      lsw_taken = LTU;
+      break;
+    case LEU: case LE:
+      if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
+	msw_taken = comparison;
+      else
+	{
+	  lsw_taken = LEU;
+	  if (comparison == LE)
+	    msw_taken = LT;
+	  else if (op2h != CONST0_RTX (SImode))
+	    msw_taken = LTU;
+	  else
+	    {
+	      msw_skip = swap_condition (LTU);
+	      break;
+	    }
+	  msw_skip = swap_condition (msw_taken);
+	}
+      break;
+    default: return false;
+    }
+  num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
+		  + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+		  + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
+  if (comparison != EQ && comparison != NE && num_branches > 1)
+    {
+      if (!CONSTANT_P (operands[2])
+	  && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
+	  && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
+	{
+	  msw_taken_prob = prob / 2U;
+	  msw_skip_prob
+	    = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
+	  lsw_taken_prob = prob;
+	}
+      else
+	{
+	  msw_taken_prob = prob;
+	  msw_skip_prob = REG_BR_PROB_BASE;
+	  /* ??? If we have a constant op2h, should we use that when
+	     calculating lsw_taken_prob?  */
+	  lsw_taken_prob = prob;
+	}
+    }
+  operands[1] = op1h;
+  operands[2] = op2h;
+  operands[4] = NULL_RTX;
+  if (reload_completed
+      && ! arith_reg_or_0_operand (op2h, SImode)
+      && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
+      && (msw_taken != LAST_AND_UNUSED_RTX_CODE
+	  || msw_skip != LAST_AND_UNUSED_RTX_CODE))
+    {
+      emit_move_insn (scratch, operands[2]);
+      operands[2] = scratch;
+    }
+  if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
+    expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
+  if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+    {
+      rtx taken_label = operands[3];
+
+      /* Operands were possibly modified, but msw_skip doesn't expect this.
+	 Always use the original ones.  */
+      if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
+	{
+	  operands[1] = op1h;
+	  operands[2] = op2h;
+	  if (reload_completed
+	      && ! arith_reg_or_0_operand (op2h, SImode)
+	      && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
+	    {
+	      emit_move_insn (scratch, operands[2]);
+	      operands[2] = scratch;
+	    }
+	}
+
+      operands[3] = skip_label = gen_label_rtx ();
+      expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
+      operands[3] = taken_label;
+    }
+  operands[1] = op1l;
+  operands[2] = op2l;
+  if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
+    {
+      if (reload_completed
+	  && ! arith_reg_or_0_operand (op2l, SImode)
+	  && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
+	{
+	  emit_move_insn (scratch, operands[2]);
+	  operands[2] = scratch;
+	}
+      expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
+    }
+  if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+    emit_label (skip_label);
+  return true;
+}
+
+/* Given an operand, return 1 if the evaluated operand plugged into an
+   if_then_else will result in a branch_true, 0 if branch_false, or
+   -1 if neither nor applies.  The truth table goes like this:
+
+       op   | cmpval |   code  | result
+   ---------+--------+---------+--------------------
+      T (0) |   0    |  EQ (1) |  0 = 0 ^ (0 == 1)
+      T (0) |   1    |  EQ (1) |  1 = 0 ^ (1 == 1)
+      T (0) |   0    |  NE (0) |  1 = 0 ^ (0 == 0)
+      T (0) |   1    |  NE (0) |  0 = 0 ^ (1 == 0)
+     !T (1) |   0    |  EQ (1) |  1 = 1 ^ (0 == 1)
+     !T (1) |   1    |  EQ (1) |  0 = 1 ^ (1 == 1)
+     !T (1) |   0    |  NE (0) |  0 = 1 ^ (0 == 0)
+     !T (1) |   1    |  NE (0) |  1 = 1 ^ (1 == 0)  */
+int
+sh_eval_treg_value (rtx op)
+{
+  enum rtx_code code = GET_CODE (op);
+  if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
+    return -1;
+
+  int cmpop = code == EQ ? 1 : 0;
+  int cmpval = INTVAL (XEXP (op, 1));
+  if (cmpval != 0 && cmpval != 1)
+    return -1;
+
+  int t;
+  if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
+    t = 0;
+  else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
+    t = 1;
+  else
+    return -1;
+  
+  return t ^ (cmpval == cmpop);
+}
+
+/* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4.  */
+
+static void
+sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
+{
+  if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      insn = gen_rtx_PARALLEL (VOIDmode,
+		       gen_rtvec (2, insn,
+			          gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
+      (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
+    }
+  else
+    emit_insn (insn);
+}
+
+/* Prepare the operands for an scc instruction; make sure that the
+   compare has been done and the result is in T_REG.  */
+void
+sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx t_reg = get_t_reg_rtx ();
+  enum rtx_code oldcode = code;
+  enum machine_mode mode;
+
+  /* First need a compare insn.  */
+  switch (code)
+    {
+    case NE:
+      /* It isn't possible to handle this case.  */
+      gcc_unreachable ();
+    case LT:
+      code = GT;
+      break;
+    case LE:
+      code = GE;
+      break;
+    case LTU:
+      code = GTU;
+      break;
+    case LEU:
+      code = GEU;
+      break;
+    default:
+      break;
+    }
+  if (code != oldcode)
+    {
+      rtx tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
+
+  op0 = force_reg (mode, op0);
+  if ((code != EQ && code != NE
+       && (op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || (mode == DImode && op1 != const0_rtx)
+      || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    op1 = force_reg (mode, op1);
+
+  sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
+			           gen_rtx_fmt_ee (code, SImode, op0, op1)),
+		      mode);
+}
+
+rtx
+sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
+			  rtx op0, rtx op1)
+{
+  rtx target = gen_reg_rtx (SImode);
+  rtx tmp;
+
+  gcc_assert (TARGET_SHMEDIA);
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case LT:
+    case UNORDERED:
+    case GTU:
+    case LTU:
+      tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
+      emit_insn (gen_cstore4_media (target, tmp, op0, op1));
+      code = NE;
+      break;
+
+    case NE:
+    case GE:
+    case LE:
+    case ORDERED:
+    case GEU:
+    case LEU:
+      tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
+      emit_insn (gen_cstore4_media (target, tmp, op0, op1));
+      code = EQ;
+      break;
+
+    case UNEQ:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
+    case LTGT:
+      return NULL_RTX;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (mode == DImode)
+    {
+      rtx t2 = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (t2, target));
+      target = t2;
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
+}
+
+/* Called from the md file, set up the operands of a compare instruction.  */
+void
+sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  enum rtx_code branch_code;
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx insn, tem;
+  bool need_ccmpeq = false;
+
+  if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      op0 = force_reg (mode, op0);
+      op1 = force_reg (mode, op1);
+    }
+  else
+    {
+      if (code != EQ || mode == DImode)
+	{
+	  /* Force args into regs, since we can't use constants here.  */
+	  op0 = force_reg (mode, op0);
+	  if (op1 != const0_rtx || code == GTU  || code == GEU)
+	    op1 = force_reg (mode, op1);
+        }
+    }
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      if (code == LT
+	  || (code == LE && TARGET_IEEE && TARGET_SH2E)
+	  || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
+	{
+	  tem = op0, op0 = op1, op1 = tem;
+	  code = swap_condition (code);
+	}
+
+      /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only.  */
+      if (code == GE)
+	{
+	  gcc_assert (TARGET_IEEE && TARGET_SH2E);
+	  need_ccmpeq = true;
+	  code = GT;
+	}
+
+      /* Now we can have EQ, NE, GT, LE.  NE and LE are then transformed
+	 to EQ/GT respectively.  */
+      gcc_assert (code == EQ || code == GT || code == NE || code == LE);
+    }
+
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case GE:
+    case GTU:
+    case GEU:
+      branch_code = code;
+      break;
+    case NE:
+    case LT:
+    case LE:
+    case LTU:
+    case LEU:
+      branch_code = reverse_condition (code);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  insn = gen_rtx_SET (VOIDmode,
+		      get_t_reg_rtx (),
+		      gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
+
+  sh_emit_set_t_insn (insn, mode);
+  if (need_ccmpeq)
+    sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
+
+  if (branch_code == code)
+    emit_jump_insn (gen_branch_true (operands[3]));
+  else
+    emit_jump_insn (gen_branch_false (operands[3]));
+}
+
+void
+sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx lab = NULL_RTX;
+  bool invert = false;
+  rtx tem;
+
+  op0 = force_reg (mode, op0);
+  if ((code != EQ && code != NE
+       && (op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || (mode == DImode && op1 != const0_rtx)
+      || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    op1 = force_reg (mode, op1);
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      if (code == LT || code == LE)
+	{
+	  code = swap_condition (code);
+	  tem = op0, op0 = op1, op1 = tem;
+	}
+      if (code == GE)
+	{
+	  if (TARGET_IEEE)
+	    {
+	      lab = gen_label_rtx ();
+	      sh_emit_scc_to_t (EQ, op0, op1);
+	      emit_jump_insn (gen_branch_true (lab));
+	      code = GT;
+	   }
+	  else
+	    {
+	      code = LT;
+	      invert = true;
+	    }
+	}
+    }
+
+  if (code == NE)
+    {
+      code = EQ;
+      invert = true;
+    }
+
+  sh_emit_scc_to_t (code, op0, op1);
+  if (lab)
+    emit_label (lab);
+  if (invert)
+    emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  else
+    emit_move_insn (operands[0], get_t_reg_rtx ());
+}
+
+/* Functions to output assembly code.  */
+
+/* Return a sequence of instructions to perform DI or DF move.
+
+   Since the SH cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+const char *
+output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+		   enum machine_mode mode)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (MEM_P (dst)
+      && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+    return     "mov.l	%T1,%0"	"\n"
+	   "	mov.l	%1,%0";
+
+  if (register_operand (dst, mode)
+      && register_operand (src, mode))
+    {
+      if (REGNO (src) == MACH_REG)
+	return         "sts	mach,%S0" "\n"
+	       "	sts	macl,%R0";
+
+      /* When mov.d r1,r2 do r2->r3 then r1->r2;
+         when mov.d r1,r0 do r1->r0 then r2->r1.  */
+      if (REGNO (src) + 1 == REGNO (dst))
+	return         "mov	%T1,%T0" "\n"
+	       "	mov	%1,%0";
+      else
+	return         "mov	%1,%0" "\n"
+	       "	mov	%T1,%T0";
+    }
+  else if (CONST_INT_P (src))
+    {
+      if (INTVAL (src) < 0)
+	output_asm_insn ("mov	#-1,%S0", operands);
+      else
+	output_asm_insn ("mov	#0,%S0", operands);
+
+      return "mov	%1,%R0";
+    }
+  else if (MEM_P (src))
+    {
+      int ptrreg = -1;
+      int dreg = REGNO (dst);
+      rtx inside = XEXP (src, 0);
+
+      switch (GET_CODE (inside))
+	{
+	case REG:
+	  ptrreg = REGNO (inside);
+	  break;
+
+	case SUBREG:
+	  ptrreg = subreg_regno (inside);
+	  break;
+
+	case PLUS:
+	  ptrreg = REGNO (XEXP (inside, 0));
+	  /* ??? A r0+REG address shouldn't be possible here, because it isn't
+	     an offsettable address.  Unfortunately, offsettable addresses use
+	     QImode to check the offset, and a QImode offsettable address
+	     requires r0 for the other operand, which is not currently
+	     supported, so we can't use the 'o' constraint.
+	     Thus we must check for and handle r0+REG addresses here.
+	     We punt for now, since this is likely very rare.  */
+	  gcc_assert (!REG_P (XEXP (inside, 1)));
+	  break;
+	  
+	case LABEL_REF:
+	  return       "mov.l	%1,%0" "\n"
+		 "	mov.l	%1+4,%T0";
+	case POST_INC:
+	  return       "mov.l	%1,%0" "\n"
+		 "	mov.l	%1,%T0";
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Work out the safe way to copy.  Copy into the second half first.  */
+      if (dreg == ptrreg)
+	return         "mov.l	%T1,%T0" "\n"
+	       "	mov.l	%1,%0";
+    }
+
+  return       "mov.l	%1,%0" "\n"
+	 "	mov.l	%T1,%T0";
+}
+
+/* Print an instruction which would have gone into a delay slot after
+   another instruction, but couldn't because the other instruction expanded
+   into a sequence where putting the slot insn at the end wouldn't work.  */
+static void
+print_slot (rtx insn)
+{
+  final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
+
+  INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
+}
+
+const char *
+output_far_jump (rtx insn, rtx op)
+{
+  struct { rtx lab, reg, op; } this_jmp;
+  rtx braf_base_lab = NULL_RTX;
+  const char *jump;
+  int far;
+  int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+  rtx prev;
+
+  this_jmp.lab = gen_label_rtx ();
+
+  if (TARGET_SH2
+      && offset >= -32764
+      && offset - get_attr_length (insn) <= 32766)
+    {
+      far = 0;
+      jump =   "mov.w	%O0,%1" "\n"
+	     "	braf	%1";
+    }
+  else
+    {
+      far = 1;
+      if (flag_pic)
+	{
+	  if (TARGET_SH2)
+	    jump =     "mov.l	%O0,%1" "\n"
+		   "	braf	%1";
+	  else
+	    jump =     "mov.l	r0,@-r15"	"\n"
+		   "	mova	%O0,r0"		"\n"
+		   "	mov.l	@r0,%1"		"\n"
+		   "	add	r0,%1"		"\n"
+		   "	mov.l	@r15+,r0"	"\n"
+		   "	jmp	@%1";
+	}
+      else
+	jump =         "mov.l	%O0,%1" "\n"
+	       "	jmp	@%1";
+    }
+  /* If we have a scratch register available, use it.  */
+  if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
+      && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+    {
+      this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
+      if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
+	jump =         "mov.l	r1,@-r15"	"\n"
+	       "	mova	%O0,r0"		"\n"
+	       "	mov.l	@r0,r1"		"\n"
+	       "	add	r1,r0"		"\n"
+	       "	mov.l	@r15+,r1"	"\n"
+	       "	jmp	@%1";
+      output_asm_insn (jump, &this_jmp.lab);
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+      else
+	output_asm_insn ("nop", 0);
+    }
+  else
+    {
+      /* Output the delay slot insn first if any.  */
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+
+      this_jmp.reg = gen_rtx_REG (SImode, 13);
+      /* We must keep the stack aligned to 8-byte boundaries on SH5.
+	 Fortunately, MACL is fixed and call-clobbered, and we never
+	 need its value across jumps, so save r13 in it instead of in
+	 the stack.  */
+      if (TARGET_SH5)
+	output_asm_insn ("lds	r13,macl", 0);
+      else
+	output_asm_insn ("mov.l	r13,@-r15", 0);
+      output_asm_insn (jump, &this_jmp.lab);
+      if (TARGET_SH5)
+	output_asm_insn ("sts	macl,r13", 0);
+      else
+	output_asm_insn ("mov.l	@r15+,r13", 0);
+    }
+  if (far && flag_pic && TARGET_SH2)
+    {
+      braf_base_lab = gen_label_rtx ();
+      (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				 CODE_LABEL_NUMBER (braf_base_lab));
+    }
+  if (far)
+    output_asm_insn (".align	2", 0);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
+  this_jmp.op = op;
+  if (far && flag_pic)
+    {
+      if (TARGET_SH2)
+	this_jmp.lab = braf_base_lab;
+      output_asm_insn (".long	%O2-%O0", &this_jmp.lab);
+    }
+  else
+    output_asm_insn (far ? ".long	%O2" : ".word %O2-%O0", &this_jmp.lab);
+  return "";
+}
+
+/* Local label counter, used for constants in the pool and inside
+   pattern branches.  */
+static int lf = 100;
+
+/* Output code for ordinary branches.  */
+const char *
+output_branch (int logic, rtx insn, rtx *operands)
+{
+  switch (get_attr_length (insn))
+    {
+    case 6:
+      /* This can happen if filling the delay slot has caused a forward
+	 branch to exceed its range (we could reverse it, but only
+	 when we know we won't overextend other branches; this should
+	 best be handled by relaxation).
+	 It can also happen when other condbranches hoist delay slot insn
+	 from their destination, thus leading to code size increase.
+	 But the branch will still be in the range -4092..+4098 bytes.  */
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+	  /* The call to print_slot will clobber the operands.  */
+	  rtx op0 = operands[0];
+
+	  /* If the instruction in the delay slot is annulled (true), then
+	     there is no delay slot where we can put it now.  The only safe
+	     place for it is after the label.  final will do that by default.  */
+
+	  if (final_sequence
+	      && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
+	      && get_attr_length (XVECEXP (final_sequence, 0, 1)))
+	    {
+	      asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+	                   ASSEMBLER_DIALECT ? "/" : ".", label);
+	      print_slot (final_sequence);
+	    }
+	  else
+	    asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
+
+	  output_asm_insn ("bra\t%l0", &op0);
+	  fprintf (asm_out_file, "\tnop\n");
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
+
+	  return "";
+	}
+      /* When relaxing, handle this like a short branch.  The linker
+	 will fix it up if it still doesn't fit after relaxation.  */
+    case 2:
+      return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+
+      /* These are for SH2e, in which we have to account for the
+	 extra nop because of the hardware bug in annulled branches.  */
+    case 8:
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+
+	  gcc_assert (!final_sequence
+		      || !(INSN_ANNULLED_BRANCH_P
+			   (XVECEXP (final_sequence, 0, 0))));
+	  asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
+		       logic ? "f" : "t",
+		       ASSEMBLER_DIALECT ? "/" : ".", label);
+	  fprintf (asm_out_file, "\tnop\n");
+	  output_asm_insn ("bra\t%l0", operands);
+	  fprintf (asm_out_file, "\tnop\n");
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
+
+	  return "";
+	}
+      /* When relaxing, fall through.  */
+    case 4:
+      {
+	char buffer[10];
+
+	sprintf (buffer, "b%s%ss\t%%l0",
+		 logic ? "t" : "f",
+		 ASSEMBLER_DIALECT ? "/" : ".");
+	output_asm_insn (buffer, &operands[0]);
+	return "nop";
+      }
+
+    default:
+      /* There should be no longer branches now - that would
+	 indicate that something has destroyed the branches set
+	 up in machine_dependent_reorg.  */
+      gcc_unreachable ();
+    }
+}
+
+/* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
+   fill in operands 9 as a label to the successor insn.
+   We try to use jump threading where possible.
+   IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
+   we assume the jump is taken.  I.e. EQ means follow jmp and bf, NE means
+   follow jmp and bt, if the address is in range.  */
+const char *
+output_branchy_insn (enum rtx_code code, const char *templ,
+		     rtx insn, rtx *operands)
+{
+  rtx next_insn = NEXT_INSN (insn);
+
+  if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
+    {
+      rtx src = SET_SRC (PATTERN (next_insn));
+      if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+	{
+	  /* Following branch not taken */
+	  operands[9] = gen_label_rtx ();
+	  emit_label_after (operands[9], next_insn);
+	  INSN_ADDRESSES_NEW (operands[9],
+			      INSN_ADDRESSES (INSN_UID (next_insn))
+			      + get_attr_length (next_insn));
+	  return templ;
+	}
+      else
+	{
+	  int offset = (branch_dest (next_insn)
+			- INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
+	  if (offset >= -252 && offset <= 258)
+	    {
+	      if (GET_CODE (src) == IF_THEN_ELSE)
+		/* branch_true */
+		src = XEXP (src, 1);
+	      operands[9] = src;
+	      return templ;
+	    }
+	}
+    }
+  operands[9] = gen_label_rtx ();
+  emit_label_after (operands[9], insn);
+  INSN_ADDRESSES_NEW (operands[9],
+		      INSN_ADDRESSES (INSN_UID (insn))
+		      + get_attr_length (insn));
+  return templ;
+}
+
+const char *
+output_ieee_ccmpeq (rtx insn, rtx *operands)
+{
+  return output_branchy_insn (NE,      "bt	%l9" "\n"
+				  "	fcmp/eq	%1,%0",
+			      insn, operands);
+}
+
+/* Output the start of the assembler file.  */
+static void
+sh_file_start (void)
+{
+  default_file_start ();
+
+  if (TARGET_ELF)
+    /* We need to show the text section with the proper
+       attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
+       emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
+       will complain.  We can teach GAS specifically about the
+       default attributes for our choice of text section, but
+       then we would have to change GAS again if/when we change
+       the text section name.  */
+    fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
+  else
+    /* Switch to the data section so that the coffsem symbol
+       isn't in the text section.  */
+    switch_to_section (data_section);
+
+  if (TARGET_LITTLE_ENDIAN)
+    fputs ("\t.little\n", asm_out_file);
+
+  if (!TARGET_ELF)
+    {
+      if (TARGET_SHCOMPACT)
+	fputs ("\t.mode\tSHcompact\n", asm_out_file);
+      else if (TARGET_SHMEDIA)
+	fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
+		 TARGET_SHMEDIA64 ? 64 : 32);
+    }
+}
+
+/* Check if PAT includes UNSPEC_CALLER unspec pattern.  */
+static bool
+unspec_caller_rtx_p (rtx pat)
+{
+  rtx base, offset;
+  int i;
+
+  split_const (pat, &base, &offset);
+  if (GET_CODE (base) == UNSPEC)
+    {
+      if (XINT (base, 1) == UNSPEC_CALLER)
+	return true;
+      for (i = 0; i < XVECLEN (base, 0); i++)
+	if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
+	  return true;
+    }
+  return false;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is true for insn
+   that generates a unique label.  */
+static bool
+sh_cannot_copy_insn_p (rtx insn)
+{
+  rtx pat;
+
+  if (!reload_completed || !flag_pic)
+    return false;
+
+  if (!NONJUMP_INSN_P (insn))
+    return false;
+  if (asm_noperands (insn) >= 0)
+    return false;
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) != SET)
+    return false;
+  pat = SET_SRC (pat);
+
+  if (unspec_caller_rtx_p (pat))
+    return true;
+
+  return false;
+}
+
+/* Number of instructions used to make an arithmetic right shift by N.  */
+static const char ashiftrt_insns[] =
+  { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
+
+/* Description of a logical left or right shift, when expanded to a sequence
+   of 1/2/8/16 shifts.
+   Notice that one bit right shifts clobber the T bit.  One bit left shifts
+   are done with an 'add Rn,Rm' insn and thus do not clobber the T bit.  */
+enum
+{
+  ASHL_CLOBBERS_T = 1 << 0,
+  LSHR_CLOBBERS_T = 1 << 1
+};
+
+struct ashl_lshr_sequence
+{
+  char insn_count;
+  char amount[6];
+  char clobbers_t;
+};
+
+static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
+{
+  { 0, { 0 },		    0 },		// 0
+  { 1, { 1 },		    LSHR_CLOBBERS_T },
+  { 1, { 2 },		    0 },
+  { 2, { 2, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 2, 2 },	    0 },		// 4
+  { 3, { 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 2, 2, 2 },	    0 },
+  { 4, { 2, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 1, { 8 },		    0 },		// 8
+  { 2, { 8, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 8, 2 },	    0 },
+  { 3, { 8, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 8, 2, 2 },	    0 },		// 12
+  { 4, { 8, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 8, -2, 8 },	    0 },
+  { 3, { 8, -1, 8 },	    ASHL_CLOBBERS_T },
+  { 1, { 16 },		    0 },		// 16
+  { 2, { 16, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 16, 2 },	    0 },
+  { 3, { 16, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 2, 2 },	    0 },		// 20
+  { 4, { 16, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, -2, 8 },	    0 },
+  { 3, { 16, -1, 8 },	    ASHL_CLOBBERS_T },
+  { 2, { 16, 8 },	    0 },		// 24
+  { 3, { 16, 1, 8 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 8, 2 },	    0 },
+  { 4, { 16, 8, 1, 2 },     LSHR_CLOBBERS_T },
+  { 4, { 16, 8, 2, 2 },	    0 },		// 28
+  { 4, { 16, -1, -2, 16 },  ASHL_CLOBBERS_T },
+  { 3, { 16, -2, 16 },	    0 },
+
+  /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
+     For a left shift by 31 a 2 insn and-rotl sequences can be used.
+     However, the shift-and combiner code needs this entry here to be in
+     terms of real shift insns.  */
+  { 3, { 16, -1, 16 },	    ASHL_CLOBBERS_T }
+};
+
+/* Individual shift amounts for shift amounts < 16, up to three highmost
+   bits might be clobbered.  This is typically used when combined with some
+   kind of sign or zero extension.  */
+static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
+{
+  { 0, { 0 },		    0 },		// 0
+  { 1, { 1 },		    LSHR_CLOBBERS_T },
+  { 1, { 2 },		    0 },
+  { 2, { 2, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 2, 2 },	    0 },		// 4
+  { 3, { 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 2, { 8, -2 },	    0 },
+  { 2, { 8, -1 },	    ASHL_CLOBBERS_T },
+  { 1, { 8 },		    0 },		// 8
+  { 2, { 8, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 8, 2 },	    0 },
+  { 3, { 8, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 8, 2, 2 },	    0 },		// 12
+  { 3, { 16, -2, -1 },	    ASHL_CLOBBERS_T },
+  { 2, { 16, -2 },	    0 },
+  { 2, { 16, -1 },	    ASHL_CLOBBERS_T },
+  { 1, { 16 },		    0 },		// 16
+  { 2, { 16, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 16, 2 },	    0 },
+  { 3, { 16, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 2, 2 },	    0 },		// 20
+  { 4, { 16, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, -2, 8 },	    0 },
+  { 3, { 16, -1, 8 },	    ASHL_CLOBBERS_T },
+  { 2, { 16, 8 },	    0 },		// 24
+  { 3, { 16, 1, 8 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 8, 2 },	    0 },
+  { 4, { 16, 8, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 4, { 16, 8, 2, 2 },	    0 },		// 28
+  { 4, { 16, -1, -2, 16 },  ASHL_CLOBBERS_T },
+  { 3, { 16, -2, 16 },	    0 },
+  { 3, { 16, -1, 16 },	    ASHL_CLOBBERS_T }
+};
+
+/* Return true if a shift left consisting of 1/2/8/16 shift instructions
+   will clobber the T bit.  */
+bool
+sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
+{
+  gcc_assert (CONST_INT_P (shift_amount));
+  
+  const int shift_amount_i = INTVAL (shift_amount) & 31;
+
+  /* Special case for shift count of 31: use and-rotl sequence.  */
+  if (shift_amount_i == 31)
+    return true;
+
+  return (ashl_lshr_seq[shift_amount_i].clobbers_t
+	  & ASHL_CLOBBERS_T) != 0;
+}
+
+/* Return true if a logical right shift consisting of 1/2/8/16 shift
+   instructions will clobber the T bit.  */
+bool
+sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
+{
+  gcc_assert (CONST_INT_P (shift_amount));
+
+  const int shift_amount_i = INTVAL (shift_amount) & 31;
+ 
+  /* Special case for shift count of 31: use shll-movt sequence.  */
+  if (shift_amount_i == 31)
+    return true;
+
+  return (ashl_lshr_seq[shift_amount_i].clobbers_t
+	  & LSHR_CLOBBERS_T) != 0;
+}
+
+/* Return true if it is potentially beneficial to use a dynamic shift
+   instruction (shad / shar) instead of a combination of 1/2/8/16 
+   shift instructions for the specified shift count.
+   If dynamic shifts are not available, always return false.  */
+bool
+sh_dynamicalize_shift_p (rtx count)
+{
+  gcc_assert (CONST_INT_P (count));
+
+  const int shift_amount_i = INTVAL (count) & 31;
+  int insn_count;
+
+  /* For left and right shifts, there are shorter 2 insn sequences for
+     shift amounts of 31.  */
+  if (shift_amount_i == 31)
+    insn_count = 2;
+  else
+    insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
+
+  return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
+}
+
+/* Assuming we have a value that has been sign-extended by at least one bit,
+   can we use the ext_shift_amounts with the last shift turned to an
+   arithmetic shift to shift it by N without data loss, and quicker than by
+   other means?  */
+#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
+
+/* Return the cost of a shift.  */
+static inline int
+shiftcosts (rtx x)
+{
+  int value;
+
+  if (TARGET_SHMEDIA)
+    return 1;
+
+  if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+    {
+      if (GET_MODE (x) == DImode
+	  && CONST_INT_P (XEXP (x, 1))
+	  && INTVAL (XEXP (x, 1)) == 1)
+	return 2;
+
+      /* Everything else is invalid, because there is no pattern for it.  */
+      return -1;
+    }
+  /* If shift by a non constant, then this will be expensive.  */
+  if (!CONST_INT_P (XEXP (x, 1)))
+    return SH_DYNAMIC_SHIFT_COST;
+
+  /* Otherwise, return the true cost in instructions.  Cope with out of range
+     shift counts more or less arbitrarily.  */
+  value = INTVAL (XEXP (x, 1)) & 31;
+
+  if (GET_CODE (x) == ASHIFTRT)
+    {
+      int cost = ashiftrt_insns[value];
+      /* If dynamic shifts are available and profitable in this case, then we
+	 put the constant in a reg and use shad.  */
+      if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+	cost = 1 + SH_DYNAMIC_SHIFT_COST;
+      return cost;
+    }
+  else
+    return ashl_lshr_seq[value].insn_count;
+}
+
+/* Return the cost of an AND/XOR/IOR operation.  */
+static inline int
+and_xor_ior_costs (rtx x, int code)
+{
+  /* On SH1-4 we have only max. SImode operations.
+     Double the cost for modes > SImode.  */
+  const int cost_scale = !TARGET_SHMEDIA
+			 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
+			 ? 2 : 1;
+
+  /* A logical operation with two registers is a single cycle
+     instruction.  */
+  if (!CONST_INT_P (XEXP (x, 1)))
+    return 1 * cost_scale;
+
+  int i = INTVAL (XEXP (x, 1));
+
+  if (TARGET_SHMEDIA)
+    {
+      if (satisfies_constraint_I10 (XEXP (x, 1))
+	  || satisfies_constraint_J16 (XEXP (x, 1)))
+	return 1;
+      else
+	return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
+    }
+
+  /* These constants are single cycle extu.[bw] instructions.  */
+  if ((i == 0xff || i == 0xffff) && code == AND)
+    return 1 * cost_scale;
+  /* Constants that can be used in an instruction as an immediate are
+     a single cycle, but this requires r0, so make it a little more
+     expensive.  */
+  if (CONST_OK_FOR_K08 (i))
+    return 2 * cost_scale;
+  /* Constants that can be loaded with a mov immediate need one more cycle.
+     This case is probably unnecessary.  */
+  if (CONST_OK_FOR_I08 (i))
+    return 2 * cost_scale;
+  /* Any other constant requires an additional 2 cycle pc-relative load.
+     This case is probably unnecessary.  */
+  return 3 * cost_scale;
+}
+
+/* Return the cost of an addition or a subtraction.  */
+static inline int
+addsubcosts (rtx x)
+{
+  if (GET_MODE (x) == SImode)
+    {
+      /* The addc or subc patterns will eventually become one or two
+	 instructions.  Below are some costs for some of the patterns
+	 which combine would reject because the costs of the individual
+	 insns in the patterns are lower.
+
+	 FIXME: It would be much easier if we had something like insn cost
+	 attributes and the cost calculation machinery used those attributes
+	 in the first place.  This would eliminate redundant recog-like C
+	 code to calculate costs of complex patterns.  */
+      rtx op0 = XEXP (x, 0);
+      rtx op1 = XEXP (x, 1);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (GET_CODE (op0) == AND
+	      && XEXP (op0, 1) == const1_rtx
+	      && (GET_CODE (op1) == PLUS
+		  || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
+	    return 1;
+
+	  if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
+	      && GET_CODE (op1) == LSHIFTRT
+	      && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
+	    return 1;
+	}
+    }
+
+  /* On SH1-4 we have only max. SImode operations.
+     Double the cost for modes > SImode.  */
+  const int cost_scale = !TARGET_SHMEDIA
+			 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
+			 ? 2 : 1;
+
+  /* Adding a register is a single cycle insn.  */
+  if (REG_P (XEXP (x, 1))
+      || GET_CODE (XEXP (x, 1)) == SUBREG)
+    return 1 * cost_scale;
+
+  /* Likewise for small constants.  */
+  if (CONST_INT_P (XEXP (x, 1))
+      && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
+    return 1 * cost_scale;
+
+  if (TARGET_SHMEDIA)
+    switch (GET_CODE (XEXP (x, 1)))
+      {
+      case CONST:
+      case LABEL_REF:
+      case SYMBOL_REF:
+	return TARGET_SHMEDIA64 ? 5 : 3;
+
+      case CONST_INT:
+	if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
+	  return 2;
+	else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
+	  return 3;
+	else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
+	  return 4;
+
+	/* Fall through.  */
+      default:
+	return 5;
+      }
+
+  /* Any other constant requires a 2 cycle pc-relative load plus an
+     addition.  */
+  return 3 * cost_scale;
+}
+
+/* Return the cost of a multiply.  */
+static inline int
+multcosts (rtx x ATTRIBUTE_UNUSED)
+{
+  if (sh_multcost >= 0)
+    return sh_multcost;
+  if (TARGET_SHMEDIA)
+    /* ??? We have a mul insn, but it has a latency of three, and doesn't
+       accept constants.  Ideally, we would use a cost of one or two and
+       add the cost of the operand, but disregard the latter when inside loops
+       and loop invariant code motion is still to follow.
+       Using a multiply first and splitting it later if it's a loss
+       doesn't work because of different sign / zero extension semantics
+       of multiplies vs. shifts.  */
+    return optimize_size ? 2 : 3;
+
+  if (TARGET_SH2)
+    {
+      /* We have a mul insn, so we can never take more than the mul and the
+	 read of the mac reg, but count more because of the latency and extra
+	 reg usage.  */
+      if (optimize_size)
+	return 2;
+      return 3;
+    }
+
+  /* If we're aiming at small code, then just count the number of
+     insns in a multiply call sequence.  */
+  if (optimize_size)
+    return 5;
+
+  /* Otherwise count all the insns in the routine we'd be calling too.  */
+  return 20;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+static bool
+sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+	      int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      /* The lower-subreg pass decides whether to split multi-word regs
+	 into individual regs by looking at the cost for a SET of certain
+	 modes with the following patterns:
+	   (set (reg) (reg)) 
+	   (set (reg) (const_int 0))
+	 On machines that support vector-move operations a multi-word move
+	 is the same cost as individual reg move.  On SH there is no
+	 vector-move, so we have to provide the correct cost in the number
+	 of move insns to load/store the reg of the mode in question.  */
+    case SET:
+      if (register_operand (SET_DEST (x), VOIDmode)
+	    && (register_operand (SET_SRC (x), VOIDmode)
+		|| satisfies_constraint_Z (SET_SRC (x))))
+	{
+	  const enum machine_mode mode = GET_MODE (SET_DEST (x));
+	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
+				  / mov_insn_size (mode, TARGET_SH2A));
+	  return true;
+        }
+      return false;
+
+    /* The cost of a mem access is mainly the cost of the address mode.  */
+    case MEM:
+      *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
+				true);
+      return true;
+
+    /* The cost of a sign or zero extend depends on whether the source is a
+       reg or a mem.  In case of a mem take the address into acount.  */
+    case SIGN_EXTEND:
+      if (REG_P (XEXP (x, 0)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      if (MEM_P (XEXP (x, 0)))
+	{
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
+				    GET_MODE (XEXP (x, 0)),
+				    MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  return true;
+	}
+      return false;
+
+    case ZERO_EXTEND:
+      if (REG_P (XEXP (x, 0)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
+	       && (GET_MODE (XEXP (x, 0)) == QImode
+		   || GET_MODE (XEXP (x, 0)) == HImode))
+	{
+	  /* Handle SH2A's movu.b and movu.w insn.  */
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 
+				    GET_MODE (XEXP (x, 0)), 
+				    MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  return true;
+	}
+      return false;
+
+    /* mems for SFmode and DFmode can be inside a parallel due to
+       the way the fpscr is handled.  */
+    case PARALLEL:
+      for (int i = 0; i < XVECLEN (x, 0); i++)
+	{
+	  rtx xx = XVECEXP (x, 0, i);
+	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
+	    {
+	      *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 
+					GET_MODE (XEXP (xx, 0)),
+					MEM_ADDR_SPACE (XEXP (xx, 0)), true);
+	      return true;
+	    }
+	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
+	    {
+	      *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
+					GET_MODE (XEXP (xx, 1)),
+					MEM_ADDR_SPACE (XEXP (xx, 1)), true);
+	      return true;
+	    }
+	}
+
+      if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+      else if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+      else
+	*total = COSTS_N_INSNS (3) + (outer_code != SET);
+      return true;
+
+    case CONST_INT:
+      if (TARGET_SHMEDIA)
+	{
+	  if (INTVAL (x) == 0)
+	    *total = 0;
+	  else if (outer_code == AND && and_operand ((x), DImode))
+	    *total = 0;
+	  else if ((outer_code == IOR || outer_code == XOR
+	            || outer_code == PLUS)
+		   && CONST_OK_FOR_I10 (INTVAL (x)))
+	    *total = 0;
+	  else if (CONST_OK_FOR_I16 (INTVAL (x)))
+	    *total = COSTS_N_INSNS (outer_code != SET);
+	  else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 1);
+	  else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 2);
+	  else
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 3);
+	  return true;
+	}
+      if (CONST_OK_FOR_I08 (INTVAL (x)))
+        *total = 0;
+      else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
+	       && CONST_OK_FOR_K08 (INTVAL (x)))
+        *total = 1;
+      /* prepare_cmp_insn will force costly constants int registers before
+	 the cbranch[sd]i4 patterns can see them, so preserve potentially
+	 interesting ones not covered by I08 above.  */
+      else if (outer_code == COMPARE
+	       && ((unsigned HOST_WIDE_INT) INTVAL (x)
+		    == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
+		    || INTVAL (x) == 0x7fffffff
+		   || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
+        *total = 1;
+      else
+        *total = 8;
+      return true;
+
+    case EQ:
+      /* An and with a constant compared against zero is
+	 most likely going to be a TST #imm, R0 instruction.
+	 Notice that this does not catch the zero_extract variants from
+	 the md file.  */
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
+	{
+	  *total = 1;
+	  return true;
+	}
+      else
+	return false;
+
+    case SMIN:
+    case SMAX:
+      /* This is most likely a clips.b or clips.w insn that is being made up
+	 by combine.  */
+      if (TARGET_SH2A
+	  && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && REG_P (XEXP (XEXP (x, 0), 0))
+	  && CONST_INT_P (XEXP (x, 1)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+	return false;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_SHMEDIA64)
+	*total = COSTS_N_INSNS (4);
+      else if (TARGET_SHMEDIA32)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 5;
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_SHMEDIA)
+	*total = COSTS_N_INSNS (4);
+      /* prepare_cmp_insn will force costly constants int registers before
+	 the cbranchdi4 pattern can see them, so preserve potentially
+	 interesting ones.  */
+      else if (outer_code == COMPARE && GET_MODE (x) == DImode)
+	*total = 1;
+      else
+	*total = 10;
+      return true;
+
+    case CONST_VECTOR:
+    /* FIXME: This looks broken.  Only the last statement has any effect.
+       Probably this could be folded with the PARALLEL case?  */
+      if (x == CONST0_RTX (GET_MODE (x)))
+	*total = 0;
+      else if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+      if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+      *total = COSTS_N_INSNS (3) + (outer_code != SET);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      *total = COSTS_N_INSNS (addsubcosts (x));
+      return true;
+
+    case AND:
+    case XOR:
+    case IOR:
+      *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
+      return true;
+
+    case MULT:
+      *total = COSTS_N_INSNS (multcosts (x));
+      return true;
+
+    case LT:
+    case GE:
+      /* div0s sign comparison.  */
+      if (GET_CODE (XEXP (x, 0)) == XOR
+	  && REG_P ((XEXP (XEXP (x, 0), 0)))
+	  && REG_P ((XEXP (XEXP (x, 0), 1)))
+	  && satisfies_constraint_Z (XEXP (x, 1)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+	return false;
+
+    case LSHIFTRT:
+      /* div0s sign comparison.  */
+      if (GET_CODE (XEXP (x, 0)) == XOR
+	  && REG_P ((XEXP (XEXP (x, 0), 0)))
+	  && REG_P ((XEXP (XEXP (x, 0), 1)))
+	  && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      /* Fall through to shiftcosts.  */
+    case ASHIFT:
+    case ASHIFTRT:
+      {
+	int cost = shiftcosts (x);
+	if (cost < 0)
+	  return false;
+	*total = COSTS_N_INSNS (cost);
+	return true;
+      }
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (20);
+      return true;
+
+    case FLOAT:
+    case FIX:
+      *total = 100;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Determine the size of the fundamental move insn that will be used
+   for the specified mode.  */
+static inline int
+mov_insn_size (enum machine_mode mode, bool consider_sh2a)
+{
+  const int mode_sz = GET_MODE_SIZE (mode);
+
+  if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
+      || (TARGET_FMOVD && mode == DFmode))
+    return mode_sz;
+  else
+    {
+      /* The max. available mode for actual move insns is SImode.
+	 Larger accesses will be split into multiple loads/stores.  */
+      const int max_mov_sz = GET_MODE_SIZE (SImode);
+      return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
+    }
+}
+
+/* Determine the maximum possible displacement for a move insn for the
+   specified mode.  */
+int
+sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
+{
+  /* The 4 byte displacement move insns are the same as the 2 byte
+     versions but take a 12 bit displacement.  All we need to do is to
+     scale the max. displacement value accordingly.  */
+  const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
+
+  /* SH2A supports FPU move insns with 12 bit displacements.
+     Other variants to do not support any kind of displacements for
+     FPU move insns.  */
+  if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return 0;
+  else
+    {
+      const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
+      const int mode_sz = GET_MODE_SIZE (mode);
+      int r = 15 * mov_insn_sz * disp_scale;
+    
+      /* If the mov insn will be split into multiple loads/stores, the
+	 maximum possible displacement is a bit smaller.  */
+      if (mode_sz > mov_insn_sz)
+	r -= mode_sz - mov_insn_sz;
+      return r;
+    }
+}
+
+/* Determine the alignment mask for a move insn of the
+   specified mode.  */
+static inline int
+mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
+{
+  const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
+  return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
+}
+
+/* Return the displacement value of a displacement address.  */
+HOST_WIDE_INT
+sh_disp_addr_displacement (rtx x)
+{
+  gcc_assert (satisfies_constraint_Sdd (x));
+  return INTVAL (XEXP (XEXP (x, 0), 1));
+}
+
+/* Compute the cost of an address.  */
+static int
+sh_address_cost (rtx x, enum machine_mode mode,
+		 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
+{
+  /* 'GBR + 0'.  Account one more because of R0 restriction.  */
+  if (REG_P (x) && REGNO (x) == GBR_REG)
+    return 2;
+
+  /* Simple reg, post-inc, pre-dec addressing.  */
+  if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
+    return 1;
+
+  /* 'reg + disp' addressing.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+    {
+      /* 'GBR + disp'.  Account one more because of R0 restriction.  */
+      if (REGNO (XEXP (x, 0)) == GBR_REG
+	  && gbr_displacement (XEXP (x, 1), mode))
+	return 2;
+
+      const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
+
+      if (offset == 0)
+	return 1;
+
+      /* The displacement would fit into a 2 byte move insn.
+	 HImode and QImode loads/stores with displacement put pressure on
+	 R0 which will most likely require another reg copy.  Thus account
+	 a higher cost for that.  */
+      if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
+	return (mode == HImode || mode == QImode) ? 2 : 1;
+
+      /* The displacement would fit into a 4 byte move insn (SH2A).  */
+      if (TARGET_SH2A
+	  && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
+	return 2;
+
+      /* The displacement is probably out of range and will require extra
+	 calculations.  */
+      return 3;
+    }
+
+  /* 'reg + reg' addressing.  Account a slightly higher cost because of 
+     increased pressure on R0.  */
+  if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
+      && ! TARGET_SHMEDIA)
+    return 3;
+
+  /* Not sure what it is - probably expensive.  */
+  return 10;
+}
+
+/* Code to expand a shift.  */
+static void
+gen_ashift (int type, int n, rtx reg)
+{
+  rtx n_rtx;
+
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  n_rtx = GEN_INT (n);
+  gcc_assert (satisfies_constraint_P27 (n_rtx));
+
+  switch (type)
+    {
+    case ASHIFTRT:
+      emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
+      break;
+    case LSHIFTRT:
+      if (n == 1)
+	emit_insn (gen_shlr (reg, reg));
+      else
+	emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
+      break;
+    case ASHIFT:
+      emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Code to expand a HImode shift.  */
+static void
+gen_ashift_hi (int type, int n, rtx reg)
+{
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  switch (type)
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* We don't have HImode right shift operations because using the
+	 ordinary 32 bit shift instructions for that doesn't generate proper
+	 zero/sign extension.
+	 gen_ashift_hi is only called in contexts where we know that the
+	 sign extension works out correctly.  */
+      {
+	int offset = 0;
+	if (GET_CODE (reg) == SUBREG)
+	  {
+	    offset = SUBREG_BYTE (reg);
+	    reg = SUBREG_REG (reg);
+	  }
+	gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
+	break;
+      }
+    case ASHIFT:
+      emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
+      break;
+    }
+}
+
+/* Output RTL to split a constant shift into its component SH constant
+   shift instructions.  */
+void
+gen_shifty_op (int code, rtx *operands)
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+
+  /* Truncate the shift count in case it is out of bounds.  */
+  value = value & 31;
+
+  if (value == 31)
+    {
+      if (code == LSHIFTRT)
+	{
+	  emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+	  emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
+	  return;
+	}
+      else if (code == ASHIFT)
+	{
+	  /* There is a two instruction sequence for 31 bit left shifts,
+	     but it requires r0.  */
+	  if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
+	    {
+	      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+	      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+	      return;
+	    }
+	}
+    }
+  else if (value == 0)
+    {
+      /* This can happen even when optimizing, if there were subregs before
+	 reload.  Don't output a nop here, as this is never optimized away;
+	 use a no-op move instead.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
+      return;
+    }
+
+  max = ashl_lshr_seq[value].insn_count;
+  for (i = 0; i < max; i++)
+    gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
+}
+
+/* Same as gen_shifty_op, but optimized for values where the topmost bits
+   don't matter.  */
+void
+gen_shifty_hi_op (int code, rtx *operands)
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+  void (*gen_fun) (int, int, rtx);
+
+  /* This operation is used by and_shl for SImode values with a few
+     high bits known to be cleared.  */
+  value &= 31;
+  if (value == 0)
+    {
+      emit_insn (gen_nop ());
+      return;
+    }
+
+  gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
+  if (code == ASHIFT)
+    {
+      max = ext_ashl_lshr_seq[value].insn_count;
+      for (i = 0; i < max; i++)
+	gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
+    }
+  else
+    /* When shifting right, emit the shifts in reverse order, so that
+       solitary negative values come first.  */
+    for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
+      gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
+}
+
+/* Output RTL for an arithmetic right shift.
+   ??? Rewrite to use super-optimizer sequences.  */
+bool
+expand_ashiftrt (rtx *operands)
+{
+  rtx wrk;
+  char func[18];
+  int value;
+
+  if (TARGET_DYNSHIFT)
+    {
+      if (!CONST_INT_P (operands[2]))
+	{
+	  rtx count = copy_to_mode_reg (SImode, operands[2]);
+	  emit_insn (gen_negsi2 (count, count));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return true;
+	}
+      else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+	       > 1 + SH_DYNAMIC_SHIFT_COST)
+	{
+	  rtx count
+	    = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return true;
+	}
+    }
+  if (!CONST_INT_P (operands[2]))
+    return false;
+
+  value = INTVAL (operands[2]) & 31;
+
+  if (value == 31)
+    {
+      /* If we are called from abs expansion, arrange things so that we
+	 we can use a single MT instruction that doesn't clobber the source,
+	 if LICM can hoist out the load of the constant zero.  */
+      if (currently_expanding_to_rtl)
+	{
+	  emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
+				    operands[1]));
+	  emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
+	  return true;
+	}
+      emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
+      return true;
+    }
+  else if (value >= 16 && value <= 19)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
+      value -= 16;
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return true;
+    }
+  /* Expand a short sequence inline, longer call a magic routine.  */
+  else if (value <= 5)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_move_insn (wrk, operands[1]);
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return true;
+    }
+
+  wrk = gen_reg_rtx (Pmode);
+
+  /* Load the value into an arg reg and call a helper.  */
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  sprintf (func, "__ashiftrt_r4_%d", value);
+  function_symbol (wrk, func, SFUNC_STATIC);
+  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
+  return true;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                           (match_operand:SI 2 "const_int_operand" "n"))
+                (match_operand:SI 3 "const_int_operand" "n"))) .
+  LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
+  return 0 for simple right / left or left/right shift combination.
+  return 1 for a combination of shifts with zero_extend.
+  return 2 for a combination of shifts with an AND that needs r0.
+  return 3 for a combination of shifts with an AND that needs an extra
+    scratch register, when the three highmost bits of the AND mask are clear.
+  return 4 for a combination of shifts with an AND that needs an extra
+    scratch register, when any of the three highmost bits of the AND mask
+    is set.
+  If ATTRP is set, store an initial right shift width in ATTRP[0],
+  and the instruction length in ATTRP[1] .  These values are not valid
+  when returning 0.
+  When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
+  shift_amounts for the last shift value that is to be used before the
+  sign extend.  */
+int
+shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
+{
+  unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
+  int left = INTVAL (left_rtx), right;
+  int best = 0;
+  int cost, best_cost = 10000;
+  int best_right = 0, best_len = 0;
+  int i;
+  int can_ext;
+
+  if (left < 0 || left > 31)
+    return 0;
+  if (CONST_INT_P (mask_rtx))
+    mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
+  else
+    mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
+  /* Can this be expressed as a right shift / left shift pair?  */
+  lsb = ((mask ^ (mask - 1)) >> 1) + 1;
+  right = exact_log2 (lsb);
+  mask2 = ~(mask + lsb - 1);
+  lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
+  /* mask has no zeroes but trailing zeroes <==> ! mask2 */
+  if (! mask2)
+    best_cost = ashl_lshr_seq[right].insn_count
+		+ ashl_lshr_seq[right + left].insn_count;
+  /* mask has no trailing zeroes <==> ! right */
+  else if (! right && mask2 == ~(lsb2 - 1))
+    {
+      int late_right = exact_log2 (lsb2);
+      best_cost = ashl_lshr_seq[left + late_right].insn_count
+		  + ashl_lshr_seq[late_right].insn_count;
+    }
+  /* Try to use zero extend.  */
+  if (mask2 == ~(lsb2 - 1))
+    {
+      int width, first;
+
+      for (width = 8; width <= 16; width += 8)
+	{
+	  /* Can we zero-extend right away?  */
+	  if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
+	    {
+	      cost = 1 + ext_ashl_lshr_seq[right].insn_count
+		       + ext_ashl_lshr_seq[left + right].insn_count;
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = -1;
+		}
+	      continue;
+	    }
+	  /* ??? Could try to put zero extend into initial right shift,
+	     or even shift a bit left before the right shift.  */
+	  /* Determine value of first part of left shift, to get to the
+	     zero extend cut-off point.  */
+	  first = width - exact_log2 (lsb2) + right;
+	  if (first >= 0 && right + left - first >= 0)
+	    {
+	      cost = ext_ashl_lshr_seq[right].insn_count
+		     + ext_ashl_lshr_seq[first].insn_count + 1
+		     + ext_ashl_lshr_seq[right + left - first].insn_count;
+
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = first;
+		}
+	    }
+	}
+    }
+  /* Try to use r0 AND pattern */
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      if (! CONST_OK_FOR_K08 (mask >> i))
+	continue;
+      cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
+      if (cost < best_cost)
+	{
+	  best = 2;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1;
+	}
+    }
+  /* Try to use a scratch register to hold the AND operand.  */
+  can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
+	     + (can_ext
+		? ext_ashl_lshr_seq
+		: ashl_lshr_seq)[left + i].insn_count;
+      if (cost < best_cost)
+	{
+	  best = 4 - can_ext;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
+	}
+    }
+
+  if (attrp)
+    {
+      attrp[0] = best_right;
+      attrp[1] = best_len;
+    }
+  return best;
+}
+
+/* This is used in length attributes of the unnamed instructions
+   corresponding to shl_and_kind return values of 1 and 2.  */
+int
+shl_and_length (rtx insn)
+{
+  rtx set_src, left_rtx, mask_rtx;
+  int attributes[3];
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  mask_rtx = XEXP (set_src, 1);
+  shl_and_kind (left_rtx, mask_rtx, attributes);
+  return attributes[1];
+}
+
+/* This is used in length attribute of the and_shl_scratch instruction.  */
+int
+shl_and_scr_length (rtx insn)
+{
+  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
+  rtx op = XEXP (set_src, 0);
+  len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
+  op = XEXP (XEXP (op, 0), 0);
+  return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
+}
+
+/* Generate rtl for instructions for which shl_and_kind advised a particular
+   method of generating them, i.e. returned zero.  */
+bool
+gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
+{
+  int attributes[3];
+  unsigned HOST_WIDE_INT mask;
+  int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
+  int right, total_shift;
+  void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
+
+  right = attributes[0];
+  total_shift = INTVAL (left_rtx) + right;
+  mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
+  switch (kind)
+    {
+    default:
+      return true;
+    case 1:
+      {
+	int first = attributes[2];
+	rtx operands[3];
+
+	if (first < 0)
+	  {
+	    emit_insn ((mask << right) <= 0xff
+		       ? gen_zero_extendqisi2 (dest,
+					       gen_lowpart (QImode, source))
+		       : gen_zero_extendhisi2 (dest,
+					       gen_lowpart (HImode, source)));
+	    source = dest;
+	  }
+	if (source != dest)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (right)
+	  {
+	    operands[2] = GEN_INT (right);
+	    gen_shifty_hi_op (LSHIFTRT, operands);
+	  }
+	if (first > 0)
+	  {
+	    operands[2] = GEN_INT (first);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    total_shift -= first;
+	    mask <<= first;
+	  }
+	if (first >= 0)
+	  emit_insn (mask <= 0xff
+		     ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		     : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	if (total_shift > 0)
+	  {
+	    operands[2] = GEN_INT (total_shift);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	break;
+      }
+    case 4:
+      shift_gen_fun = gen_shifty_op;
+    case 3:
+      /* If the topmost bit that matters is set, set the topmost bits
+	 that don't matter.  This way, we might be able to get a shorter
+	 signed constant.  */
+      if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
+	mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
+    case 2:
+      /* Don't expand fine-grained when combining, because that will
+         make the pattern fail.  */
+      if (currently_expanding_to_rtl
+	  || reload_in_progress || reload_completed)
+	{
+	  rtx operands[3];
+
+	  /* Cases 3 and 4 should be handled by this split
+	     only while combining  */
+	  gcc_assert (kind <= 2);
+	  if (right)
+	    {
+	      emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
+	      source = dest;
+	    }
+	  emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
+	  if (total_shift)
+	    {
+	      operands[0] = dest;
+	      operands[1] = dest;
+	      operands[2] = GEN_INT (total_shift);
+	      shift_gen_fun (ASHIFT, operands);
+	    }
+	  break;
+	}
+      else
+	{
+	  int neg = 0;
+	  if (kind != 4 && total_shift < 16)
+	    {
+	      neg = -ext_ashl_lshr_seq[total_shift].amount[1];
+	      if (neg > 0)
+		neg -= ext_ashl_lshr_seq[total_shift].amount[2];
+	      else
+		neg = 0;
+	    }
+	  emit_insn (gen_and_shl_scratch (dest, source,
+					  GEN_INT (right),
+					  GEN_INT (mask),
+					  GEN_INT (total_shift + neg),
+					  GEN_INT (neg)));
+	  emit_insn (gen_movsi (dest, dest));
+	  break;
+	}
+    }
+  return false;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                                    (match_operand:SI 2 "const_int_operand" "n")
+                         (match_operand:SI 3 "const_int_operand" "n")
+                         (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
+  return 0 for simple left / right shift combination.
+  return 1 for left shift / 8 bit sign extend / left shift.
+  return 2 for left shift / 16 bit sign extend / left shift.
+  return 3 for left shift / 8 bit sign extend / shift / sign extend.
+  return 4 for left shift / 16 bit sign extend / shift / sign extend.
+  return 5 for left shift / 16 bit sign extend / right shift
+  return 6 for < 8 bit sign extend / left shift.
+  return 7 for < 8 bit sign extend / left shift / single right shift.
+  If COSTP is nonzero, assign the calculated cost to *COSTP.  */
+int
+shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
+{
+  int left, size, insize, ext;
+  int cost = 0, best_cost;
+  int kind;
+
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  gcc_assert (insize > 0);
+  /* Default to left / right shift.  */
+  kind = 0;
+  best_cost = ashl_lshr_seq[32 - insize].insn_count
+	      + ashl_lshr_seq[32 - size].insn_count;
+  if (size <= 16)
+    {
+      /* 16 bit shift / sign extend / 16 bit shift */
+      cost = ashl_lshr_seq[16 - insize].insn_count + 1
+	     + ashl_lshr_seq[16 - size].insn_count;
+      /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
+	 below, by alternative 3 or something even better.  */
+      if (cost < best_cost)
+	{
+	  kind = 5;
+	  best_cost = cost;
+	}
+    }
+  /* Try a plain sign extend between two shifts.  */
+  for (ext = 16; ext >= insize; ext -= 8)
+    {
+      if (ext <= size)
+	{
+	  cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
+		 + ashl_lshr_seq[size - ext].insn_count;
+	  if (cost < best_cost)
+	    {
+	      kind = ext / (unsigned) 8;
+	      best_cost = cost;
+	    }
+	}
+      /* Check if we can do a sloppy shift with a final signed shift
+	 restoring the sign.  */
+      if (EXT_SHIFT_SIGNED (size - ext))
+	cost = ext_ashl_lshr_seq[ext - insize].insn_count
+	       + ext_ashl_lshr_seq[size - ext].insn_count + 1;
+      /* If not, maybe it's still cheaper to do the second shift sloppy,
+	 and do a final sign extend?  */
+      else if (size <= 16)
+	cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
+	  + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
+	  + 1;
+      else
+	continue;
+      if (cost < best_cost)
+	{
+	  kind = ext / (unsigned) 8 + 2;
+	  best_cost = cost;
+	}
+    }
+  /* Check if we can sign extend in r0 */
+  if (insize < 8)
+    {
+      cost = 3 + ashl_lshr_seq[left].insn_count;
+      if (cost < best_cost)
+	{
+	  kind = 6;
+	  best_cost = cost;
+	}
+      /* Try the same with a final signed shift.  */
+      if (left < 31)
+	{
+	  cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
+	  if (cost < best_cost)
+	    {
+	      kind = 7;
+	      best_cost = cost;
+	    }
+	}
+    }
+  if (TARGET_DYNSHIFT)
+    {
+      /* Try to use a dynamic shift.  */
+      cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
+      if (cost < best_cost)
+	{
+	  kind = 0;
+	  best_cost = cost;
+	}
+    }
+  if (costp)
+    *costp = cost;
+  return kind;
+}
+
+/* Function to be used in the length attribute of the instructions
+   implementing this pattern.  */
+int
+shl_sext_length (rtx insn)
+{
+  rtx set_src, left_rtx, size_rtx;
+  int cost;
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  size_rtx = XEXP (set_src, 1);
+  shl_sext_kind (left_rtx, size_rtx, &cost);
+  return cost;
+}
+
+/* Generate rtl for this pattern */
+bool
+gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
+{
+  int kind;
+  int left, size, insize, cost;
+  rtx operands[3];
+
+  kind = shl_sext_kind (left_rtx, size_rtx, &cost);
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  switch (kind)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      {
+	int ext = kind & 1 ? 8 : 16;
+	int shift2 = size - ext;
+
+	/* Don't expand fine-grained when combining, because that will
+	   make the pattern fail.  */
+	if (! currently_expanding_to_rtl
+	    && ! reload_in_progress && ! reload_completed)
+	  {
+	    emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	    emit_insn (gen_movsi (dest, source));
+	    break;
+	  }
+	if (dest != source)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (ext - insize)
+	  {
+	    operands[2] = GEN_INT (ext - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	emit_insn (kind & 1
+		   ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		   : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	if (kind <= 2)
+	  {
+	    if (shift2)
+	      {
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_op (ASHIFT, operands);
+	      }
+	  }
+	else
+	  {
+	    if (shift2 > 0)
+	      {
+		if (EXT_SHIFT_SIGNED (shift2))
+		  {
+		    operands[2] = GEN_INT (shift2 + 1);
+		    gen_shifty_op (ASHIFT, operands);
+		    operands[2] = const1_rtx;
+		    gen_shifty_op (ASHIFTRT, operands);
+		    break;
+		  }
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_hi_op (ASHIFT, operands);
+	      }
+	    else if (shift2)
+	      {
+		operands[2] = GEN_INT (-shift2);
+		gen_shifty_hi_op (LSHIFTRT, operands);
+	      }
+	    emit_insn (size <= 8
+		       ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		       : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	break;
+      }
+    case 5:
+      {
+	int i = 16 - size;
+	if (! currently_expanding_to_rtl
+	    && ! reload_in_progress && ! reload_completed)
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	else
+	  {
+	    operands[0] = dest;
+	    operands[2] = GEN_INT (16 - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	/* Don't use gen_ashrsi3 because it generates new pseudos.  */
+	while (--i >= 0)
+	  gen_ashift (ASHIFTRT, 1, dest);
+	break;
+      }
+    case 6:
+    case 7:
+      /* Don't expand fine-grained when combining, because that will
+	 make the pattern fail.  */
+      if (! currently_expanding_to_rtl
+	  && ! reload_in_progress && ! reload_completed)
+	{
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	  emit_insn (gen_movsi (dest, source));
+	  break;
+	}
+      emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
+      emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
+      emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
+      operands[0] = dest;
+      operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
+      gen_shifty_op (ASHIFT, operands);
+      if (kind == 7)
+	emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
+      break;
+    default:
+      return true;
+    }
+  return false;
+}
+
+/* Prefix a symbol_ref name with "datalabel".  */
+rtx
+gen_datalabel_ref (rtx sym)
+{
+  const char *str;
+
+  if (GET_CODE (sym) == LABEL_REF)
+    return gen_rtx_CONST (GET_MODE (sym),
+			  gen_rtx_UNSPEC (GET_MODE (sym),
+					  gen_rtvec (1, sym),
+					  UNSPEC_DATALABEL));
+
+  gcc_assert (GET_CODE (sym) == SYMBOL_REF);
+
+  str = XSTR (sym, 0);
+  /* Share all SYMBOL_REF strings with the same value - that is important
+     for cse.  */
+  str = IDENTIFIER_POINTER (get_identifier (str));
+  XSTR (sym, 0) = str;
+
+  return sym;
+}
+
+
+static alloc_pool label_ref_list_pool;
+
+typedef struct label_ref_list_d
+{
+  rtx label;
+  struct label_ref_list_d *next;
+} *label_ref_list_t;
+
+/* The SH cannot load a large constant into a register, constants have to
+   come from a pc relative load.  The reference of a pc relative load
+   instruction must be less than 1k in front of the instruction.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow things
+   down and make things bigger.
+
+   Worst case code looks like:
+
+   mov.l L1,rn
+   bra   L2
+   nop
+   align
+   L1:   .long value
+   L2:
+   ..
+
+   mov.l L3,rn
+   bra   L4
+   nop
+   align
+   L3:   .long value
+   L4:
+   ..
+
+   We fix this by performing a scan before scheduling, which notices which
+   instructions need to have their operands fetched from the constant table
+   and builds the table.
+
+   The algorithm is:
+
+   scan, find an instruction which needs a pcrel move.  Look forward, find the
+   last barrier which is within MAX_COUNT bytes of the requirement.
+   If there isn't one, make one.  Process all the instructions between
+   the find and the barrier.
+
+   In the above example, we can tell that L3 is within 1k of L1, so
+   the first move can be shrunk from the 3 insn+constant sequence into
+   just 1 insn, and the constant moved to L3 to make:
+
+   mov.l        L1,rn
+   ..
+   mov.l        L3,rn
+   bra          L4
+   nop
+   align
+   L3:.long value
+   L4:.long value
+
+   Then the second move becomes the target for the shortening process.  */
+
+typedef struct
+{
+  rtx value;			/* Value in table.  */
+  rtx label;			/* Label of value.  */
+  label_ref_list_t wend;	/* End of window.  */
+  enum machine_mode mode;	/* Mode of value.  */
+
+  /* True if this constant is accessed as part of a post-increment
+     sequence.  Note that HImode constants are never accessed in this way.  */
+  bool part_of_sequence_p;
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+   constants in the range 0..510 are at least 2 bytes long, and in the
+   range from there to 1018 at least 4 bytes.  */
+
+#define MAX_POOL_SIZE 372
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+static rtx pool_window_label;
+static int pool_window_last;
+
+static int max_labelno_before_reorg;
+
+/* ??? If we need a constant in HImode which is the truncated value of a
+   constant we need in SImode, we could combine the two entries thus saving
+   two bytes.  Is this common enough to be worth the effort of implementing
+   it?  */
+
+/* ??? This stuff should be done at the same time that we shorten branches.
+   As it is now, we must assume that all branches are the maximum size, and
+   this causes us to almost always output constant pools sooner than
+   necessary.  */
+
+/* Add a constant to the pool and return its label.  */
+static rtx
+add_constant (rtx x, enum machine_mode mode, rtx last_value)
+{
+  int i;
+  rtx lab, new_rtx;
+  label_ref_list_t ref, newref;
+
+  /* First see if we've already got it.  */
+  for (i = 0; i < pool_size; i++)
+    {
+      if (x->code == pool_vector[i].value->code
+	  && mode == pool_vector[i].mode)
+	{
+	  if (x->code == CODE_LABEL)
+	    {
+	      if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
+		continue;
+	    }
+	  if (rtx_equal_p (x, pool_vector[i].value))
+	    {
+	      lab = new_rtx = 0;
+	      if (! last_value
+		  || ! i
+		  || ! rtx_equal_p (last_value, pool_vector[i-1].value))
+		{
+		  new_rtx = gen_label_rtx ();
+		  LABEL_REFS (new_rtx) = pool_vector[i].label;
+		  pool_vector[i].label = lab = new_rtx;
+		}
+	      if (lab && pool_window_label)
+		{
+		  newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+		  newref->label = pool_window_label;
+		  ref = pool_vector[pool_window_last].wend;
+		  newref->next = ref;
+		  pool_vector[pool_window_last].wend = newref;
+		}
+	      if (new_rtx)
+		pool_window_label = new_rtx;
+	      pool_window_last = i;
+	      return lab;
+	    }
+	}
+    }
+
+  /* Need a new one.  */
+  pool_vector[pool_size].value = x;
+  if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
+    {
+      lab = 0;
+      pool_vector[pool_size - 1].part_of_sequence_p = true;
+    }
+  else
+    lab = gen_label_rtx ();
+  pool_vector[pool_size].mode = mode;
+  pool_vector[pool_size].label = lab;
+  pool_vector[pool_size].wend = NULL;
+  pool_vector[pool_size].part_of_sequence_p = (lab == 0);
+  if (lab && pool_window_label)
+    {
+      newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+      newref->label = pool_window_label;
+      ref = pool_vector[pool_window_last].wend;
+      newref->next = ref;
+      pool_vector[pool_window_last].wend = newref;
+    }
+  if (lab)
+    pool_window_label = lab;
+  pool_window_last = pool_size;
+  pool_size++;
+  return lab;
+}
+
+/* Output the literal table.  START, if nonzero, is the first instruction
+   this table is needed for, and also indicates that there is at least one
+   casesi_worker_2 instruction; We have to emit the operand3 labels from
+   these insns at a 4-byte  aligned position.  BARRIER is the barrier
+   after which we are to place the table.  */
+static void
+dump_table (rtx start, rtx barrier)
+{
+  rtx scan = barrier;
+  int i;
+  bool need_align = true;
+  rtx lab;
+  label_ref_list_t ref;
+  bool have_df = false;
+
+  /* Do two passes, first time dump out the HI sized constants.  */
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      if (p->mode == HImode)
+	{
+	  if (need_align)
+	    {
+	      scan = emit_insn_after (gen_align_2 (), scan);
+	      need_align = false;
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
+				  scan);
+	  for (ref = p->wend; ref; ref = ref->next)
+	    {
+	      lab = ref->label;
+	      scan = emit_insn_after (gen_consttable_window_end (lab), scan);
+	    }
+	}
+      else if (p->mode == DFmode)
+	have_df = true;
+    }
+
+  need_align = true;
+
+  if (start)
+    {
+      scan = emit_insn_after (gen_align_4 (), scan);
+      need_align = false;
+      for (; start != barrier; start = NEXT_INSN (start))
+	if (NONJUMP_INSN_P (start)
+	    && recog_memoized (start) == CODE_FOR_casesi_worker_2)
+	  {
+	    rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
+	    rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
+
+	    scan = emit_label_after (lab, scan);
+	  }
+    }
+  if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
+    {
+      rtx align_insn = NULL_RTX;
+
+      scan = emit_label_after (gen_label_rtx (), scan);
+      scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
+      need_align = false;
+
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  switch (p->mode)
+	    {
+	    case HImode:
+	      break;
+	    case SImode:
+	    case SFmode:
+	      if (align_insn && !p->part_of_sequence_p)
+		{
+		  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		    emit_label_before (lab, align_insn);
+		  emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
+				    align_insn);
+		  for (ref = p->wend; ref; ref = ref->next)
+		    {
+		      lab = ref->label;
+		      emit_insn_before (gen_consttable_window_end (lab),
+					align_insn);
+		    }
+		  delete_insn (align_insn);
+		  align_insn = NULL_RTX;
+		  continue;
+		}
+	      else
+		{
+		  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		    scan = emit_label_after (lab, scan);
+		  scan = emit_insn_after (gen_consttable_4 (p->value,
+							    const0_rtx), scan);
+		  need_align = ! need_align;
+		}
+	      break;
+	    case DFmode:
+	      if (need_align)
+		{
+		  scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
+		  align_insn = scan;
+		  need_align = false;
+		}
+	    case DImode:
+	      for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		scan = emit_label_after (lab, scan);
+	      scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
+				      scan);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  if (p->mode != HImode)
+	    {
+	      for (ref = p->wend; ref; ref = ref->next)
+		{
+		  lab = ref->label;
+		  scan = emit_insn_after (gen_consttable_window_end (lab),
+					  scan);
+		}
+	    }
+	}
+
+      pool_size = 0;
+    }
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      switch (p->mode)
+	{
+	case HImode:
+	  break;
+	case SImode:
+	case SFmode:
+	  if (need_align)
+	    {
+	      need_align = false;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
+				  scan);
+	  break;
+	case DFmode:
+	case DImode:
+	  if (need_align)
+	    {
+	      need_align = false;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
+				  scan);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (p->mode != HImode)
+	{
+	  for (ref = p->wend; ref; ref = ref->next)
+	    {
+	      lab = ref->label;
+	      scan = emit_insn_after (gen_consttable_window_end (lab), scan);
+	    }
+	}
+    }
+
+  scan = emit_insn_after (gen_consttable_end (), scan);
+  scan = emit_barrier_after (scan);
+  pool_size = 0;
+  pool_window_label = NULL_RTX;
+  pool_window_last = 0;
+}
+
+#define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
+
+/* Nonzero if the insn is a move instruction which needs to be fixed.  */
+
+/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
+   CONST_DOUBLE input value is CONST_OK_FOR_I08.  For a SFmode move, we don't
+   need to fix it if the input value is CONST_OK_FOR_I08.  */
+static bool
+broken_move (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn))
+    {
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      if (GET_CODE (pat) == SET
+	  /* We can load any 8-bit value if we don't care what the high
+	     order bits end up as.  */
+	  && GET_MODE (SET_DEST (pat)) != QImode
+	  && (CONSTANT_P (SET_SRC (pat))
+	      || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
+		  && XINT (SET_SRC (pat), 1) ==  UNSPECV_SP_SWITCH_B)
+	      /* Match mova_const.  */
+	      || (GET_CODE (SET_SRC (pat)) == UNSPEC
+		  && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
+		  && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
+	  && ! (TARGET_SH2E
+		&& GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+		&& (fp_zero_operand (SET_SRC (pat))
+		    || fp_one_operand (SET_SRC (pat)))
+		/* In general we don't know the current setting of fpscr, so
+		   disable fldi.
+		   There is an exception if this was a register-register move
+		   before reload - and hence it was ascertained that we have
+		   single precision setting - and in a post-reload optimization
+		   we changed this to do a constant load.  In that case
+		   we don't have an r0 clobber, hence we must use fldi.  */
+		&& (TARGET_FMOVD
+		    || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
+			== SCRATCH))
+		&& REG_P (SET_DEST (pat))
+		&& FP_REGISTER_P (REGNO (SET_DEST (pat))))
+	  && ! (TARGET_SH2A
+		&& GET_MODE (SET_DEST (pat)) == SImode
+		&& (satisfies_constraint_I20 (SET_SRC (pat))
+		   || satisfies_constraint_I28 (SET_SRC (pat))))
+	  && ! satisfies_constraint_I08 (SET_SRC (pat)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if the specified insn is a mova insn.  */
+static bool
+mova_p (rtx insn)
+{
+  return (NONJUMP_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
+	  /* Don't match mova_const.  */
+	  && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
+}
+
+/* Fix up a mova from a switch that went out of range.  */
+static void
+fixup_mova (rtx mova)
+{
+  PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
+  if (! flag_pic)
+    {
+      SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
+      INSN_CODE (mova) = -1;
+    }
+  else
+    {
+      rtx worker = mova;
+      rtx lab = gen_label_rtx ();
+      rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
+
+      do
+	{
+	  worker = NEXT_INSN (worker);
+	  gcc_assert (worker
+		      && !LABEL_P (worker)
+		      && !JUMP_P (worker));
+	} while (NOTE_P (worker)
+		 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
+      wpat = PATTERN (worker);
+      wpat0 = XVECEXP (wpat, 0, 0);
+      wpat1 = XVECEXP (wpat, 0, 1);
+      wsrc = SET_SRC (wpat0);
+      PATTERN (worker) = (gen_casesi_worker_2
+			  (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
+			   XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
+			   XEXP (wpat1, 0)));
+      INSN_CODE (worker) = -1;
+      target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+      base = gen_rtx_LABEL_REF (Pmode, lab);
+      diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
+      SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
+      INSN_CODE (mova) = -1;
+    }
+}
+
+/* NEW_MOVA is a mova we've just encountered while scanning forward.  Update
+   *num_mova, and check if the new mova is not nested within the first one.
+   return 0 if *first_mova was replaced, 1 if new_mova was replaced,
+   2 if new_mova has been assigned to *first_mova, -1 otherwise..  */
+static int
+untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
+{
+  int n_addr = 0; /* Initialization to shut up spurious warning.  */
+  int f_target, n_target = 0; /* Likewise.  */
+
+  if (optimize)
+    {
+      /* If NEW_MOVA has no address yet, it will be handled later.  */
+      if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
+	return -1;
+
+      n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
+      n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
+      if (n_addr > n_target || n_addr + 1022 < n_target)
+	{
+	  /* Change the mova into a load.
+	     broken_move will then return true for it.  */
+	  fixup_mova (new_mova);
+	  return 1;
+	}
+    }
+  if (!(*num_mova)++)
+    {
+      *first_mova = new_mova;
+      return 2;
+    }
+  if (!optimize
+      || ((f_target
+	   = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
+	  >= n_target))
+    return -1;
+
+  (*num_mova)--;
+  if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
+      > n_target - n_addr)
+    {
+      fixup_mova (*first_mova);
+      return 0;
+    }
+  else
+    {
+      fixup_mova (new_mova);
+      return 1;
+    }
+}
+
+/* Find the last barrier from insn FROM which is close enough to hold the
+   constant pool.  If we can't find one, then create one near the end of
+   the range.  */
+static rtx
+find_barrier (int num_mova, rtx mova, rtx from)
+{
+  int count_si = 0;
+  int count_hi = 0;
+  int found_hi = 0;
+  int found_si = 0;
+  int found_di = 0;
+  int hi_align = 2;
+  int si_align = 2;
+  int leading_mova = num_mova;
+  rtx barrier_before_mova = NULL_RTX;
+  rtx found_barrier = NULL_RTX;
+  rtx good_barrier = NULL_RTX;
+  int si_limit;
+  int hi_limit;
+  rtx orig = from;
+  rtx last_got = NULL_RTX;
+  rtx last_symoff = NULL_RTX;
+
+  /* For HImode: range is 510, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 for the jump instruction
+     that we may need to emit before the table, subtract 2 for the instruction
+     that fills the jump delay slot (in very rare cases, reorg will take an
+     instruction from after the constant pool or will leave the delay slot
+     empty).  This gives 510.
+     For SImode: range is 1020, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 in case pc is 2 byte
+     aligned, subtract 2 for the jump instruction that we may need to emit
+     before the table, subtract 2 for the instruction that fills the jump
+     delay slot.  This gives 1018.  */
+
+  /* The branch will always be shortened now that the reference address for
+     forward branches is the successor address, thus we need no longer make
+     adjustments to the [sh]i_limit for -O0.  */
+
+  si_limit = 1018;
+  hi_limit = 510;
+
+  while (from && count_si < si_limit && count_hi < hi_limit)
+    {
+      int inc = get_attr_length (from);
+      int new_align = 1;
+
+      /* If this is a label that existed at the time of the compute_alignments
+	 call, determine the alignment.  N.B.  When find_barrier recurses for
+	 an out-of-reach mova, we might see labels at the start of previously
+	 inserted constant tables.  */
+      if (LABEL_P (from)
+	  && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
+	{
+	  if (optimize)
+	    new_align = 1 << label_to_alignment (from);
+	  else if (BARRIER_P (prev_nonnote_insn (from)))
+	    new_align = 1 << barrier_align (from);
+	  else
+	    new_align = 1;
+	  inc = 0;
+	}
+      /* In case we are scanning a constant table because of recursion, check
+	 for explicit alignments.  If the table is long, we might be forced
+	 to emit the new table in front of it; the length of the alignment
+	 might be the last straw.  */
+      else if (NONJUMP_INSN_P (from)
+	       && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
+	new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
+      /* When we find the end of a constant table, paste the new constant
+	 at the end.  That is better than putting it in front because
+	 this way, we don't need extra alignment for adding a 4-byte-aligned
+	 mov(a) label to a 2/4 or 8/4 byte aligned table.  */
+      else if (NONJUMP_INSN_P (from)
+	       && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
+	return from;
+
+      if (BARRIER_P (from))
+	{
+	  rtx next;
+
+	  found_barrier = from;
+
+	  /* If we are at the end of the function, or in front of an alignment
+	     instruction, we need not insert an extra alignment.  We prefer
+	     this kind of barrier.  */
+	  if (barrier_align (from) > 2)
+	    good_barrier = from;
+
+	  /* If we are at the end of a hot/cold block, dump the constants
+	     here.  */
+	  next = NEXT_INSN (from);
+	  if (next
+	      && NOTE_P (next)
+	      && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	    break;
+	}
+
+      if (broken_move (from))
+	{
+	  rtx pat, src, dst;
+	  enum machine_mode mode;
+
+	  pat = PATTERN (from);
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+	  src = SET_SRC (pat);
+	  dst = SET_DEST (pat);
+	  mode = GET_MODE (dst);
+
+	  /* GOT pcrelat setting comes in pair of
+	     mova	.L8,r0
+	     mov.l	.L8,r12
+	     instructions.  (plus add r0,r12).
+	     Remember if we see one without the other.  */
+	  if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
+	    last_got = last_got ? NULL_RTX : from;
+	  else if (PIC_ADDR_P (src))
+	    last_got = last_got ? NULL_RTX : from;
+
+	  /* We must explicitly check the mode, because sometimes the
+	     front end will generate code to load unsigned constants into
+	     HImode targets without properly sign extending them.  */
+	  if (mode == HImode
+	      || (mode == SImode && satisfies_constraint_I16 (src)
+		  && REGNO (dst) != FPUL_REG))
+	    {
+	      found_hi += 2;
+	      /* We put the short constants before the long constants, so
+		 we must count the length of short constants in the range
+		 for the long constants.  */
+	      /* ??? This isn't optimal, but is easy to do.  */
+	      si_limit -= 2;
+	    }
+	  else
+	    {
+	      /* We dump DF/DI constants before SF/SI ones, because
+		 the limit is the same, but the alignment requirements
+		 are higher.  We may waste up to 4 additional bytes
+		 for alignment, and the DF/DI constant may have
+		 another SF/SI constant placed before it.  */
+	      if (TARGET_SHCOMPACT
+		  && ! found_di
+		  && (mode == DFmode || mode == DImode))
+		{
+		  found_di = 1;
+		  si_limit -= 8;
+		}
+	      while (si_align > 2 && found_si + si_align - 2 > count_si)
+		si_align >>= 1;
+	      if (found_si > count_si)
+		count_si = found_si;
+	      found_si += GET_MODE_SIZE (mode);
+	      if (num_mova)
+		si_limit -= GET_MODE_SIZE (mode);
+	    }
+	}
+
+      if (mova_p (from))
+	{
+	  switch (untangle_mova (&num_mova, &mova, from))
+	    {
+	      case 1:
+		if (flag_pic)
+		  {
+		    rtx src = SET_SRC (PATTERN (from));
+		    if (GET_CODE (src) == CONST
+			&& GET_CODE (XEXP (src, 0)) == UNSPEC
+			&& XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
+		      last_symoff = from;
+		  }
+		break;
+	      case 0:	return find_barrier (0, 0, mova);
+	      case 2:
+		{
+		  leading_mova = 0;
+		  barrier_before_mova
+		    = good_barrier ? good_barrier : found_barrier;
+		}
+	      default:	break;
+	    }
+	  if (found_si > count_si)
+	    count_si = found_si;
+	}
+      else if (JUMP_TABLE_DATA_P (from)
+	       && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
+	{
+	  if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
+	      || (num_mova
+		  && (prev_nonnote_insn (from)
+		      == XEXP (MOVA_LABELREF (mova), 0))))
+	    num_mova--;
+	  if (barrier_align (next_real_insn (from)) == align_jumps_log)
+	    {
+	      /* We have just passed the barrier in front of the
+		 ADDR_DIFF_VEC, which is stored in found_barrier.  Since
+		 the ADDR_DIFF_VEC is accessed as data, just like our pool
+		 constants, this is a good opportunity to accommodate what
+		 we have gathered so far.
+		 If we waited any longer, we could end up at a barrier in
+		 front of code, which gives worse cache usage for separated
+		 instruction / data caches.  */
+	      good_barrier = found_barrier;
+	      break;
+	    }
+	  else
+	    {
+	      rtx body = PATTERN (from);
+	      inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
+	    }
+	}
+      /* For the SH1, we generate alignments even after jumps-around-jumps.  */
+      else if (JUMP_P (from)
+	       && ! TARGET_SH2
+	       && ! optimize_size)
+	new_align = 4;
+
+      /* There is a possibility that a bf is transformed into a bf/s by the
+	 delay slot scheduler.  */
+      if (JUMP_P (from)
+	  && get_attr_type (from) == TYPE_CBRANCH
+	  && ! sequence_insn_p (from))
+	inc += 2;
+
+      if (found_si)
+	{
+	  count_si += inc;
+	  if (new_align > si_align)
+	    {
+	      si_limit -= (count_si - 1) & (new_align - si_align);
+	      si_align = new_align;
+	    }
+	  count_si = (count_si + new_align - 1) & -new_align;
+	}
+      if (found_hi)
+	{
+	  count_hi += inc;
+	  if (new_align > hi_align)
+	    {
+	      hi_limit -= (count_hi - 1) & (new_align - hi_align);
+	      hi_align = new_align;
+	    }
+	  count_hi = (count_hi + new_align - 1) & -new_align;
+	}
+      from = NEXT_INSN (from);
+    }
+
+  if (num_mova)
+    {
+      if (leading_mova)
+	{
+	  /* Try as we might, the leading mova is out of range.  Change
+	     it into a load (which will become a pcload) and retry.  */
+	  fixup_mova (mova);
+	  return find_barrier (0, 0, mova);
+	}
+      else
+	{
+	  /* Insert the constant pool table before the mova instruction,
+	     to prevent the mova label reference from going out of range.  */
+	  from = mova;
+	  good_barrier = found_barrier = barrier_before_mova;
+	}
+    }
+
+  if (found_barrier)
+    {
+      if (good_barrier && next_real_insn (found_barrier))
+	found_barrier = good_barrier;
+    }
+  else
+    {
+      /* We didn't find a barrier in time to dump our stuff,
+	 so we'll make one.  */
+      rtx label = gen_label_rtx ();
+
+      /* Don't emit a constant table in the middle of insns for
+	 casesi_worker_2.  This is a bit overkill but is enough
+	 because casesi_worker_2 wouldn't appear so frequently.  */
+      if (last_symoff)
+	from = last_symoff;
+
+      /* If we exceeded the range, then we must back up over the last
+	 instruction we looked at.  Otherwise, we just need to undo the
+	 NEXT_INSN at the end of the loop.  */
+      if (PREV_INSN (from) != orig
+	  && (count_hi > hi_limit || count_si > si_limit))
+	from = PREV_INSN (PREV_INSN (from));
+      else
+	from = PREV_INSN (from);
+
+      /* Don't emit a constant table int the middle of global pointer setting,
+	 since that that would move the addressing base GOT into another table. 
+	 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
+	 in the pool anyway, so just move up the whole constant pool.
+
+	 However, avoid doing so when the last single GOT mov is the starting
+	 insn itself. Going past above the start insn would create a negative
+	 offset, causing errors.  */
+      if (last_got && last_got != orig)
+        from = PREV_INSN (last_got);
+
+      /* Don't insert the constant pool table at the position which
+	 may be the landing pad.  */
+      if (flag_exceptions
+	  && CALL_P (from)
+	  && find_reg_note (from, REG_EH_REGION, NULL_RTX))
+	from = PREV_INSN (from);
+
+      /* Walk back to be just before any jump or label.
+	 Putting it before a label reduces the number of times the branch
+	 around the constant pool table will be hit.  Putting it before
+	 a jump makes it more likely that the bra delay slot will be
+	 filled.  */
+      while (NOTE_P (from) || JUMP_P (from)
+	     || LABEL_P (from))
+	from = PREV_INSN (from);
+
+      /* Make sure we do not split between a call and its corresponding
+	 CALL_ARG_LOCATION note.  */
+      if (CALL_P (from))
+	{
+	  rtx next = NEXT_INSN (from);
+	  if (next && NOTE_P (next)
+	      && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
+	    from = next;
+	}
+
+      from = emit_jump_insn_after (gen_jump (label), from);
+      JUMP_LABEL (from) = label;
+      LABEL_NUSES (label) = 1;
+      found_barrier = emit_barrier_after (from);
+      emit_label_after (label, found_barrier);
+    }
+
+  return found_barrier;
+}
+
+/* If the instruction INSN is implemented by a special function, and we can
+   positively find the register that is used to call the sfunc, and this
+   register is not used anywhere else in this instruction - except as the
+   destination of a set, return this register; else, return 0.  */
+rtx
+sfunc_uses_reg (rtx insn)
+{
+  int i;
+  rtx pattern, part, reg_part, reg;
+
+  if (!NONJUMP_INSN_P (insn))
+    return NULL_RTX;
+  pattern = PATTERN (insn);
+  if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
+    return NULL_RTX;
+
+  for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
+	reg_part = part;
+    }
+  if (! reg_part)
+    return NULL_RTX;
+  reg = XEXP (reg_part, 0);
+  for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (part == reg_part || GET_CODE (part) == CLOBBER)
+	continue;
+      if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
+				  && REG_P (SET_DEST (part)))
+				 ? SET_SRC (part) : part)))
+	return NULL_RTX;
+    }
+  return reg;
+}
+
+/* See if the only way in which INSN uses REG is by calling it, or by
+   setting it while calling it.  Set *SET to a SET rtx if the register
+   is set by INSN.  */
+static bool
+noncall_uses_reg (rtx reg, rtx insn, rtx *set)
+{
+  rtx pattern, reg2;
+
+  *set = NULL_RTX;
+
+  reg2 = sfunc_uses_reg (insn);
+  if (reg2 && REGNO (reg2) == REGNO (reg))
+    {
+      pattern = single_set (insn);
+      if (pattern
+	  && REG_P (SET_DEST (pattern))
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	*set = pattern;
+      return false;
+    }
+  if (!CALL_P (insn))
+    {
+      /* We don't use rtx_equal_p because we don't care if the mode is
+	 different.  */
+      pattern = single_set (insn);
+      if (pattern
+	  && REG_P (SET_DEST (pattern))
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	{
+	  rtx par, part;
+	  int i;
+
+	  *set = pattern;
+	  par = PATTERN (insn);
+	  if (GET_CODE (par) == PARALLEL)
+	    for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+	      {
+		part = XVECEXP (par, 0, i);
+		if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
+		  return true;
+	      }
+	  return reg_mentioned_p (reg, SET_SRC (pattern));
+	}
+
+      return true;
+    }
+
+  pattern = PATTERN (insn);
+
+  if (GET_CODE (pattern) == PARALLEL)
+    {
+      int i;
+
+      for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+	if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
+	  return true;
+      pattern = XVECEXP (pattern, 0, 0);
+    }
+
+  if (GET_CODE (pattern) == SET)
+    {
+      if (reg_mentioned_p (reg, SET_DEST (pattern)))
+	{
+	  /* We don't use rtx_equal_p, because we don't care if the
+	     mode is different.  */
+	  if (!REG_P (SET_DEST (pattern))
+	      || REGNO (reg) != REGNO (SET_DEST (pattern)))
+	    return true;
+
+	  *set = pattern;
+	}
+
+      pattern = SET_SRC (pattern);
+    }
+
+  if (GET_CODE (pattern) != CALL
+      || !MEM_P (XEXP (pattern, 0))
+      || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
+    return true;
+
+  return false;
+}
+
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+   general registers.  Bits 0..15 mean that the respective registers
+   are used as inputs in the instruction.  Bits 16..31 mean that the
+   registers 0..15, respectively, are used as outputs, or are clobbered.
+   IS_DEST should be set to 16 if X is the destination of a SET, else to 0.  */
+int
+regs_used (rtx x, int is_dest)
+{
+  enum rtx_code code;
+  const char *fmt;
+  int i, used = 0;
+
+  if (! x)
+    return used;
+  code = GET_CODE (x);
+  switch (code)
+    {
+    case REG:
+      if (REGNO (x) < 16)
+	return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		<< (REGNO (x) + is_dest));
+      return 0;
+    case SUBREG:
+      {
+	rtx y = SUBREG_REG (x);
+
+	if (!REG_P (y))
+	  break;
+	if (REGNO (y) < 16)
+	  return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		  << (REGNO (y) +
+		      subreg_regno_offset (REGNO (y),
+					   GET_MODE (y),
+					   SUBREG_BYTE (x),
+					   GET_MODE (x)) + is_dest));
+	return 0;
+      }
+    case SET:
+      return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+    case RETURN:
+      /* If there was a return value, it must have been indicated with USE.  */
+      return 0x00ffff00;
+    case CLOBBER:
+      is_dest = 1;
+      break;
+    case MEM:
+      is_dest = 0;
+      break;
+    case CALL:
+      used |= 0x00ff00f0;
+      break;
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    used |= regs_used (XVECEXP (x, i, j), is_dest);
+	}
+      else if (fmt[i] == 'e')
+	used |= regs_used (XEXP (x, i), is_dest);
+    }
+  return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+   to the destination of the JUMP with address ADDR.
+   If the branch needs to be implemented as an indirect jump, try to find
+   a scratch register for it.
+   If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+   If any preceding insn that doesn't fit into a delay slot is good enough,
+   pass 1.  Pass 2 if a definite blocking insn is needed.
+   -1 is used internally to avoid deep recursion.
+   If a blocking instruction is made or recognized, return it.  */
+static rtx
+gen_block_redirect (rtx jump, int addr, int need_block)
+{
+  int dead = 0;
+  rtx prev = prev_nonnote_insn (jump);
+  rtx dest;
+
+  /* First, check if we already have an instruction that satisfies our need.  */
+  if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
+    {
+      if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+	return prev;
+      if (GET_CODE (PATTERN (prev)) == USE
+	  || GET_CODE (PATTERN (prev)) == CLOBBER
+	  || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	prev = jump;
+      else if ((need_block &= ~1) < 0)
+	return prev;
+      else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+	need_block = 0;
+    }
+  if (GET_CODE (PATTERN (jump)) == RETURN)
+    {
+      if (! need_block)
+	return prev;
+      /* Reorg even does nasty things with return insns that cause branches
+	 to go out of range - see find_end_label and callers.  */
+      return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
+    }
+  /* We can't use JUMP_LABEL here because it might be undefined
+     when not optimizing.  */
+  dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+  /* If the branch is out of range, try to find a scratch register for it.  */
+  if (optimize
+      && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
+	  > 4092 + 4098))
+    {
+      rtx scan;
+      /* Don't look for the stack pointer as a scratch register,
+	 it would cause trouble if an interrupt occurred.  */
+      unsigned attempt = 0x7fff, used;
+      int jump_left = flag_expensive_optimizations + 1;
+
+      /* It is likely that the most recent eligible instruction is wanted for
+	 the delay slot.  Therefore, find out which registers it uses, and
+	 try to avoid using them.  */
+
+      for (scan = jump; (scan = PREV_INSN (scan)); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (code == CODE_LABEL || code == JUMP_INSN)
+	    break;
+	  if (code == INSN
+	      && GET_CODE (PATTERN (scan)) != USE
+	      && GET_CODE (PATTERN (scan)) != CLOBBER
+	      && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+	    {
+	      attempt &= ~regs_used (PATTERN (scan), 0);
+	      break;
+	    }
+	}
+      for (used = dead = 0, scan = JUMP_LABEL (jump);
+	   (scan = NEXT_INSN (scan)); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (INSN_P (scan))
+	    {
+	      used |= regs_used (PATTERN (scan), 0);
+	      if (code == CALL_INSN)
+		used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+	      dead |= (used >> 16) & ~used;
+	      if (dead & attempt)
+		{
+		  dead &= attempt;
+		  break;
+		}
+	      if (code == JUMP_INSN)
+		{
+		  if (jump_left-- && simplejump_p (scan))
+		    scan = JUMP_LABEL (scan);
+		  else
+		    break;
+		}
+	    }
+	}
+      /* Mask out the stack pointer again, in case it was
+	 the only 'free' register we have found.  */
+      dead &= 0x7fff;
+    }
+  /* If the immediate destination is still in range, check for possible
+     threading with a jump beyond the delay slot insn.
+     Don't check if we are called recursively; the jump has been or will be
+     checked in a different invocation then.  */
+
+  else if (optimize && need_block >= 0)
+    {
+      rtx next = next_active_insn (next_active_insn (dest));
+      if (next && JUMP_P (next)
+	  && GET_CODE (PATTERN (next)) == SET
+	  && recog_memoized (next) == CODE_FOR_jump_compact)
+	{
+	  dest = JUMP_LABEL (next);
+	  if (dest
+	      && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
+		  > 4092 + 4098))
+	    gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
+	}
+    }
+
+  if (dead)
+    {
+      rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
+
+      /* It would be nice if we could convert the jump into an indirect
+	 jump / far branch right now, and thus exposing all constituent
+	 instructions to further optimization.  However, reorg uses
+	 simplejump_p to determine if there is an unconditional jump where
+	 it should try to schedule instructions from the target of the
+	 branch; simplejump_p fails for indirect jumps even if they have
+	 a JUMP_LABEL.  */
+      rtx insn = emit_insn_before (gen_indirect_jump_scratch
+				   (reg, GEN_INT (unspec_bbr_uid++)),
+				   jump);
+      /* ??? We would like this to have the scope of the jump, but that
+	 scope will change when a delay slot insn of an inner scope is added.
+	 Hence, after delay slot scheduling, we'll have to expect
+	 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
+	 the jump.  */
+
+      INSN_LOCATION (insn) = INSN_LOCATION (jump);
+      INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+      return insn;
+    }
+  else if (need_block)
+    /* We can't use JUMP_LABEL here because it might be undefined
+       when not optimizing.  */
+    return emit_insn_before (gen_block_branch_redirect
+			     (GEN_INT (unspec_bbr_uid++)),
+			     jump);
+  return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+  /* A label (to be placed) in front of the jump
+     that jumps to our ultimate destination.  */
+  rtx near_label;
+  /* Where we are going to insert it if we cannot move the jump any farther,
+     or the jump itself if we have picked up an existing jump.  */
+  rtx insert_place;
+  /* The ultimate destination.  */
+  rtx far_label;
+  struct far_branch *prev;
+  /* If the branch has already been created, its address;
+     else the address of its first prospective user.  */
+  int address;
+};
+
+static void gen_far_branch (struct far_branch *);
+enum mdep_reorg_phase_e mdep_reorg_phase;
+static void
+gen_far_branch (struct far_branch *bp)
+{
+  rtx insn = bp->insert_place;
+  rtx jump;
+  rtx label = gen_label_rtx ();
+  int ok;
+
+  emit_label_after (label, insn);
+  if (bp->far_label)
+    {
+      jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+      LABEL_NUSES (bp->far_label)++;
+    }
+  else
+    jump = emit_jump_insn_after (gen_return (), insn);
+
+  /* Emit a barrier so that reorg knows that any following instructions
+     are not reachable via a fall-through path.
+     But don't do this when not optimizing, since we wouldn't suppress the
+     alignment for the barrier then, and could end up with out-of-range
+     pc-relative loads.  */
+  if (optimize)
+    emit_barrier_after (jump);
+  emit_label_after (bp->near_label, insn);
+
+  if (bp->far_label)
+    JUMP_LABEL (jump) = bp->far_label;
+  else
+    {
+      rtx pat = PATTERN (jump);
+      gcc_assert (ANY_RETURN_P (pat));
+      JUMP_LABEL (jump) = pat;
+    }
+
+  ok = invert_jump (insn, label, 1);
+  gcc_assert (ok);
+
+  /* If we are branching around a jump (rather than a return), prevent
+     reorg from using an insn from the jump target as the delay slot insn -
+     when reorg did this, it pessimized code (we rather hide the delay slot)
+     and it could cause branches to go out of range.  */
+  if (bp->far_label)
+    (emit_insn_after
+     (gen_stuff_delay_slot
+      (GEN_INT (unspec_bbr_uid++),
+       GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
+      insn));
+  /* Prevent reorg from undoing our splits.  */
+  gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+/* Fix up ADDR_DIFF_VECs.  */
+void
+fixup_addr_diff_vecs (rtx first)
+{
+  rtx insn;
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    {
+      rtx vec_lab, pat, prev, prevpat, x, braf_label;
+
+      if (! JUMP_TABLE_DATA_P (insn)
+	  || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+	continue;
+      pat = PATTERN (insn);
+      vec_lab = XEXP (XEXP (pat, 0), 0);
+
+      /* Search the matching casesi_jump_2.  */
+      for (prev = vec_lab; ; prev = PREV_INSN (prev))
+	{
+	  if (!JUMP_P (prev))
+	    continue;
+	  prevpat = PATTERN (prev);
+	  if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+	    continue;
+	  x = XVECEXP (prevpat, 0, 1);
+	  if (GET_CODE (x) != USE)
+	    continue;
+	  x = XEXP (x, 0);
+	  if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+	    break;
+	}
+      /* FIXME: This is a bug in the optimizer, but it seems harmless
+	 to just avoid panicing.  */
+      if (!prev)
+	continue;
+
+      /* Emit the reference label of the braf where it belongs, right after
+	 the casesi_jump_2 (i.e. braf).  */
+      braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+      emit_label_after (braf_label, prev);
+
+      /* Fix up the ADDR_DIF_VEC to be relative
+	 to the reference address of the braf.  */
+      XEXP (XEXP (pat, 0), 0) = braf_label;
+    }
+}
+
+/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
+   a barrier.  Return the base 2 logarithm of the desired alignment.  */
+int
+barrier_align (rtx barrier_or_label)
+{
+  rtx next, pat;
+
+  if (! barrier_or_label)
+    return 0;
+
+  if (LABEL_P (barrier_or_label)
+      && NEXT_INSN (barrier_or_label)
+      && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
+    return 2;
+
+  if (BARRIER_P (barrier_or_label)
+      && PREV_INSN (barrier_or_label)
+      && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
+    {
+      pat = PATTERN (PREV_INSN (barrier_or_label));
+      /* If this is a very small table, we want to keep the alignment after
+	 the table to the minimum for proper code alignment.  */
+      return ((optimize_size
+	       || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+		   <= (unsigned) 1 << (CACHE_LOG - 2)))
+	      ? 1 << TARGET_SHMEDIA : align_jumps_log);
+    }
+
+  next = next_active_insn (barrier_or_label);
+
+  if (! next)
+    return 0;
+
+  pat = PATTERN (next);
+
+  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
+    /* This is a barrier in front of a constant table.  */
+    return 0;
+
+  if (optimize_size)
+    return 0;
+
+  if (! TARGET_SH2 || ! optimize)
+    return align_jumps_log;
+
+  /* When fixing up pcloads, a constant table might be inserted just before
+     the basic block that ends with the barrier.  Thus, we can't trust the
+     instruction lengths before that.  */
+  if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
+    {
+      /* Check if there is an immediately preceding branch to the insn beyond
+	 the barrier.  We must weight the cost of discarding useful information
+	 from the current cache line when executing this branch and there is
+	 an alignment, against that of fetching unneeded insn in front of the
+	 branch target when there is no alignment.  */
+
+      /* There are two delay_slot cases to consider.  One is the simple case
+	 where the preceding branch is to the insn beyond the barrier (simple
+	 delay slot filling), and the other is where the preceding branch has
+	 a delay slot that is a duplicate of the insn after the barrier
+	 (fill_eager_delay_slots) and the branch is to the insn after the insn
+	 after the barrier.  */
+
+      int slot, credit;
+      bool jump_to_next = false;
+
+      /* Skip to the insn before the JUMP_INSN before the barrier under
+	 investigation.  */
+      rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
+
+      for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
+	   credit >= 0 && prev && NONJUMP_INSN_P (prev);
+	   prev = prev_real_insn (prev))
+	{
+	  jump_to_next = false;
+	  if (GET_CODE (PATTERN (prev)) == USE
+	      || GET_CODE (PATTERN (prev)) == CLOBBER)
+	    continue;
+	  if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+	    {
+	      prev = XVECEXP (PATTERN (prev), 0, 1);
+	      if (INSN_UID (prev) == INSN_UID (next))
+		{
+	  	  /* Delay slot was filled with insn at jump target.  */
+		  jump_to_next = true;
+		  continue;
+  		}
+	    }
+
+	  if (slot &&
+	      get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	    slot = 0;
+	  credit -= get_attr_length (prev);
+	}
+      if (prev && jump_to_label_p (prev))
+	{
+	  rtx x;
+	  if (jump_to_next
+	      || next_real_insn (JUMP_LABEL (prev)) == next
+	      /* If relax_delay_slots() decides NEXT was redundant
+		 with some previous instruction, it will have
+		 redirected PREV's jump to the following insn.  */
+	      || JUMP_LABEL (prev) == next_nonnote_insn (next)
+	      /* There is no upper bound on redundant instructions
+		 that might have been skipped, but we must not put an
+		 alignment where none had been before.  */
+	      || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
+		  (INSN_P (x)
+		   && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
+		       || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
+		       || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
+	    {
+	      rtx pat = PATTERN (prev);
+	      if (GET_CODE (pat) == PARALLEL)
+		pat = XVECEXP (pat, 0, 0);
+	      if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
+		return 0;
+	    }
+	}
+    }
+
+  return align_jumps_log;
+}
+
+/* If we are inside a phony loop, almost any kind of label can turn up as the
+   first one in the loop.  Aligning a braf label causes incorrect switch
+   destination addresses; we can detect braf labels because they are
+   followed by a BARRIER.
+   Applying loop alignment to small constant or switch tables is a waste
+   of space, so we suppress this too.  */
+int
+sh_loop_align (rtx label)
+{
+  rtx next = label;
+
+  if (! optimize || optimize_size)
+    return 0;
+
+  do
+    next = next_nonnote_insn (next);
+  while (next && LABEL_P (next));
+
+  if (! next
+      || ! INSN_P (next)
+      || recog_memoized (next) == CODE_FOR_consttable_2)
+    return 0;
+
+  return align_loops_log;
+}
+
+/* Do a final pass over the function, just before delayed branch
+   scheduling.  */
+static void
+sh_reorg (void)
+{
+  rtx first, insn, mova = NULL_RTX;
+  int num_mova;
+  rtx r0_rtx = gen_rtx_REG (Pmode, 0);
+  rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
+
+  first = get_insns ();
+  max_labelno_before_reorg = max_label_num ();
+
+  /* We must split call insns before introducing `mova's.  If we're
+     optimizing, they'll have already been split.  Otherwise, make
+     sure we don't split them too late.  */
+  if (! optimize)
+    split_all_insns_noflow ();
+
+  if (TARGET_SHMEDIA)
+    return;
+
+  /* If relaxing, generate pseudo-ops to associate function calls with
+     the symbols they call.  It does no harm to not generate these
+     pseudo-ops.  However, when we can generate them, it enables the
+     linker to potentially relax the jsr to a bsr, and eliminate the
+     register load and, possibly, the constant pool entry.  */
+
+  mdep_reorg_phase = SH_INSERT_USES_LABELS;
+  if (TARGET_RELAX)
+    {
+      /* Remove all REG_LABEL_OPERAND notes.  We want to use them for our
+	 own purposes.  This works because none of the remaining passes
+	 need to look at them.
+
+	 ??? But it may break in the future.  We should use a machine
+	 dependent REG_NOTE, or some other approach entirely.  */
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  if (INSN_P (insn))
+	    {
+	      rtx note;
+
+	      while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
+					    NULL_RTX)) != 0)
+		remove_note (insn, note);
+	    }
+	}
+
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx pattern, reg, link, set, scan, dies, label;
+	  int rescan = 0, foundinsn = 0;
+
+	  if (CALL_P (insn))
+	    {
+	      pattern = PATTERN (insn);
+
+	      if (GET_CODE (pattern) == PARALLEL)
+		pattern = XVECEXP (pattern, 0, 0);
+	      if (GET_CODE (pattern) == SET)
+		pattern = SET_SRC (pattern);
+
+	      if (GET_CODE (pattern) != CALL
+		  || !MEM_P (XEXP (pattern, 0)))
+		continue;
+
+	      reg = XEXP (XEXP (pattern, 0), 0);
+	    }
+	  else
+	    {
+	      reg = sfunc_uses_reg (insn);
+	      if (! reg)
+		continue;
+	    }
+
+	  if (!REG_P (reg))
+	    continue;
+
+	  /* Try scanning backward to find where the register is set.  */
+	  link = NULL;
+	  for (scan = PREV_INSN (insn);
+	       scan && !LABEL_P (scan);
+	       scan = PREV_INSN (scan))
+	    {
+	      if (! INSN_P (scan))
+		continue;
+
+	      if (! reg_mentioned_p (reg, scan))
+		continue;
+
+	      if (noncall_uses_reg (reg, scan, &set))
+		break;
+
+	      if (set)
+		{
+		  link = scan;
+		  break;
+		}
+	    }
+
+	  if (! link)
+	    continue;
+
+	  /* The register is set at LINK.  */
+
+	  /* We can only optimize the function call if the register is
+	     being set to a symbol.  In theory, we could sometimes
+	     optimize calls to a constant location, but the assembler
+	     and linker do not support that at present.  */
+	  if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
+	      && GET_CODE (SET_SRC (set)) != LABEL_REF)
+	    continue;
+
+	  /* Scan forward from LINK to the place where REG dies, and
+	     make sure that the only insns which use REG are
+	     themselves function calls.  */
+
+	  /* ??? This doesn't work for call targets that were allocated
+	     by reload, since there may not be a REG_DEAD note for the
+	     register.  */
+
+	  dies = NULL_RTX;
+	  for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
+	    {
+	      rtx scanset;
+
+	      /* Don't try to trace forward past a CODE_LABEL if we haven't
+		 seen INSN yet.  Ordinarily, we will only find the setting insn
+		 if it is in the same basic block.  However,
+		 cross-jumping can insert code labels in between the load and
+		 the call, and can result in situations where a single call
+		 insn may have two targets depending on where we came from.  */
+
+	      if (LABEL_P (scan) && ! foundinsn)
+		break;
+
+	      if (! INSN_P (scan))
+		continue;
+
+	      /* Don't try to trace forward past a JUMP.  To optimize
+		 safely, we would have to check that all the
+		 instructions at the jump destination did not use REG.  */
+
+	      if (JUMP_P (scan))
+		break;
+
+	      if (! reg_mentioned_p (reg, scan))
+		continue;
+
+	      if (noncall_uses_reg (reg, scan, &scanset))
+		break;
+
+	      if (scan == insn)
+		foundinsn = 1;
+
+	      if (scan != insn
+		  && (CALL_P (scan) || sfunc_uses_reg (scan)))
+		{
+		  /* There is a function call to this register other
+		     than the one we are checking.  If we optimize
+		     this call, we need to rescan again below.  */
+		  rescan = 1;
+		}
+
+	      /* ??? We shouldn't have to worry about SCANSET here.
+		 We should just be able to check for a REG_DEAD note
+		 on a function call.  However, the REG_DEAD notes are
+		 apparently not dependable around libcalls; c-torture
+		 execute/920501-2 is a test case.  If SCANSET is set,
+		 then this insn sets the register, so it must have
+		 died earlier.  Unfortunately, this will only handle
+		 the cases in which the register is, in fact, set in a
+		 later insn.  */
+
+	      /* ??? We shouldn't have to use FOUNDINSN here.
+		 This dates back to when we used LOG_LINKS to find 
+		 the most recent insn which sets the register.  */
+
+	      if (foundinsn
+		  && (scanset
+		      || find_reg_note (scan, REG_DEAD, reg)))
+		{
+		  dies = scan;
+		  break;
+		}
+	    }
+
+	  if (! dies)
+	    {
+	      /* Either there was a branch, or some insn used REG
+		 other than as a function call address.  */
+	      continue;
+	    }
+
+	  /* Create a code label, and put it in a REG_LABEL_OPERAND note
+	     on the insn which sets the register, and on each call insn
+	     which uses the register.  In final_prescan_insn we look for
+	     the REG_LABEL_OPERAND notes, and output the appropriate label
+	     or pseudo-op.  */
+
+	  label = gen_label_rtx ();
+	  add_reg_note (link, REG_LABEL_OPERAND, label);
+	  add_reg_note (insn, REG_LABEL_OPERAND, label);
+	  if (rescan)
+	    {
+	      scan = link;
+	      do
+		{
+		  rtx reg2;
+
+		  scan = NEXT_INSN (scan);
+		  if (scan != insn
+		      && ((CALL_P (scan)
+			   && reg_mentioned_p (reg, scan))
+			  || ((reg2 = sfunc_uses_reg (scan))
+			      && REGNO (reg2) == REGNO (reg))))
+		    add_reg_note (scan, REG_LABEL_OPERAND, label);
+		}
+	      while (scan != dies);
+	    }
+	}
+    }
+
+  if (TARGET_SH2)
+    fixup_addr_diff_vecs (first);
+
+  if (optimize)
+    {
+      mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+      shorten_branches (first);
+    }
+
+  /* Scan the function looking for move instructions which have to be
+     changed to pc-relative loads and insert the literal tables.  */
+  label_ref_list_pool = create_alloc_pool ("label references list",
+					   sizeof (struct label_ref_list_d),
+					   30);
+  mdep_reorg_phase = SH_FIXUP_PCLOAD;
+  for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
+    {
+      if (mova_p (insn))
+	{
+	  /* ??? basic block reordering can move a switch table dispatch
+	     below the switch table.  Check if that has happened.
+	     We only have the addresses available when optimizing; but then,
+	     this check shouldn't be needed when not optimizing.  */
+	  if (!untangle_mova (&num_mova, &mova, insn))
+	    {
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	}
+      else if (JUMP_TABLE_DATA_P (insn)
+	       && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+	       && num_mova
+	       /* ??? loop invariant motion can also move a mova out of a
+		  loop.  Since loop does this code motion anyway, maybe we
+		  should wrap UNSPEC_MOVA into a CONST, so that reload can
+		  move it back.  */
+	       && ((num_mova > 1
+		    && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
+		   || (prev_nonnote_insn (insn)
+		       == XEXP (MOVA_LABELREF (mova), 0))))
+	{
+	  rtx scan;
+	  int total;
+
+	  num_mova--;
+
+	  /* Some code might have been inserted between the mova and
+	     its ADDR_DIFF_VEC.  Check if the mova is still in range.  */
+	  for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+	    total += get_attr_length (scan);
+
+	  /* range of mova is 1020, add 4 because pc counts from address of
+	     second instruction after this one, subtract 2 in case pc is 2
+	     byte aligned.  Possible alignment needed for the ADDR_DIFF_VEC
+	     cancels out with alignment effects of the mova itself.  */
+	  if (total > 1022)
+	    {
+	      /* Change the mova into a load, and restart scanning
+		 there.  broken_move will then return true for mova.  */
+	      fixup_mova (mova);
+	      insn = mova;
+	    }
+	}
+      if (broken_move (insn)
+	  || (NONJUMP_INSN_P (insn)
+	      && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
+	{
+	  rtx scan;
+	  /* Scan ahead looking for a barrier to stick the constant table
+	     behind.  */
+	  rtx barrier = find_barrier (num_mova, mova, insn);
+	  rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
+	  int need_aligned_label = 0;
+
+	  if (num_mova && ! mova_p (mova))
+	    {
+	      /* find_barrier had to change the first mova into a
+		 pcload; thus, we have to start with this new pcload.  */
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	  /* Now find all the moves between the points and modify them.  */
+	  for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
+	    {
+	      if (LABEL_P (scan))
+		last_float = 0;
+	      if (NONJUMP_INSN_P (scan)
+		  && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
+		need_aligned_label = 1;
+	      if (broken_move (scan))
+		{
+		  rtx *patp = &PATTERN (scan), pat = *patp;
+		  rtx src, dst;
+		  rtx lab;
+		  rtx newsrc;
+		  enum machine_mode mode;
+
+		  if (GET_CODE (pat) == PARALLEL)
+		    patp = &XVECEXP (pat, 0, 0), pat = *patp;
+		  src = SET_SRC (pat);
+		  dst = SET_DEST (pat);
+		  mode = GET_MODE (dst);
+
+		  if (mode == SImode && satisfies_constraint_I16 (src)
+		      && REGNO (dst) != FPUL_REG)
+		    {
+		      int offset = 0;
+
+		      mode = HImode;
+		      while (GET_CODE (dst) == SUBREG)
+			{
+			  offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
+							 GET_MODE (SUBREG_REG (dst)),
+							 SUBREG_BYTE (dst),
+							 GET_MODE (dst));
+			  dst = SUBREG_REG (dst);
+			}
+		      dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
+		    }
+		  if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
+		    {
+		      /* This must be an insn that clobbers r0.  */
+		      rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
+						XVECLEN (PATTERN (scan), 0)
+						- 1);
+		      rtx clobber = *clobberp;
+
+		      gcc_assert (GET_CODE (clobber) == CLOBBER
+				  && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
+
+		      if (last_float
+			  && reg_set_between_p (r0_rtx, last_float_move, scan))
+			last_float = 0;
+		      if (last_float
+			  && TARGET_SHCOMPACT
+			  && GET_MODE_SIZE (mode) != 4
+			  && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
+			last_float = 0;
+		      lab = add_constant (src, mode, last_float);
+		      if (lab)
+			emit_insn_before (gen_mova (lab), scan);
+		      else
+			{
+			  /* There will be a REG_UNUSED note for r0 on
+			     LAST_FLOAT_MOVE; we have to change it to REG_INC,
+			     lest reorg:mark_target_live_regs will not
+			     consider r0 to be used, and we end up with delay
+			     slot insn in front of SCAN that clobbers r0.  */
+			  rtx note
+			    = find_regno_note (last_float_move, REG_UNUSED, 0);
+
+			  /* If we are not optimizing, then there may not be
+			     a note.  */
+			  if (note)
+			    PUT_REG_NOTE_KIND (note, REG_INC);
+
+			  *last_float_addr = r0_inc_rtx;
+			}
+		      last_float_move = scan;
+		      last_float = src;
+		      newsrc = gen_const_mem (mode,
+					(((TARGET_SH4 && ! TARGET_FMOVD)
+					  || REGNO (dst) == FPUL_REG)
+					 ? r0_inc_rtx
+					 : r0_rtx));
+		      last_float_addr = &XEXP (newsrc, 0);
+
+		      /* Remove the clobber of r0.  */
+		      *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
+						   gen_rtx_SCRATCH (Pmode));
+		    }
+		  /* This is a mova needing a label.  Create it.  */
+		  else if (GET_CODE (src) == UNSPEC
+			   && XINT (src, 1) == UNSPEC_MOVA
+			   && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
+		    {
+		      lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
+		      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+		      newsrc = gen_rtx_UNSPEC (SImode,
+					       gen_rtvec (1, newsrc),
+					       UNSPEC_MOVA);
+		    }
+		  else if (GET_CODE (src) == UNSPEC_VOLATILE
+			   && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
+		    {
+		      newsrc = XVECEXP (src, 0, 0);
+		      XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
+		      INSN_CODE (scan) = -1;
+		      continue;
+		    }
+		  else
+		    {
+		      lab = add_constant (src, mode, 0);
+		      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+		      newsrc = gen_const_mem (mode, newsrc);
+		    }
+		  *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
+		  INSN_CODE (scan) = -1;
+		}
+	    }
+	  dump_table (need_aligned_label ? insn : 0, barrier);
+	  insn = barrier;
+	}
+    }
+  free_alloc_pool (label_ref_list_pool);
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    PUT_MODE (insn, VOIDmode);
+
+  mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+  INSN_ADDRESSES_FREE ();
+  split_branches (first);
+
+  /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+     also has an effect on the register that holds the address of the sfunc.
+     Insert an extra dummy insn in front of each sfunc that pretends to
+     use this register.  */
+  if (flag_delayed_branch)
+    {
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx reg = sfunc_uses_reg (insn);
+
+	  if (! reg)
+	    continue;
+	  emit_insn_before (gen_use_sfunc_addr (reg), insn);
+	}
+    }
+#if 0
+  /* fpscr is not actually a user variable, but we pretend it is for the
+     sake of the previous optimization passes, since we want it handled like
+     one.  However, we don't have any debugging information for it, so turn
+     it into a non-user variable now.  */
+  if (TARGET_SH4)
+    REG_USERVAR_P (get_fpscr_rtx ()) = 0;
+#endif
+  mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+/* Return the UID of the insn that follows the specified label.  */
+int
+get_dest_uid (rtx label, int max_uid)
+{
+  rtx dest = next_real_insn (label);
+  int dest_uid;
+  if (! dest)
+    /* This can happen for an undefined label.  */
+    return 0;
+  dest_uid = INSN_UID (dest);
+  /* If this is a newly created branch redirection blocking instruction,
+     we cannot index the branch_uid or insn_addresses arrays with its
+     uid.  But then, we won't need to, because the actual destination is
+     the following branch.  */
+  while (dest_uid >= max_uid)
+    {
+      dest = NEXT_INSN (dest);
+      dest_uid = INSN_UID (dest);
+    }
+  if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
+    return 0;
+  return dest_uid;
+}
+
+/* Split condbranches that are out of range.  Also add clobbers for
+   scratch registers that are needed in far jumps.
+   We do this before delay slot scheduling, so that it can take our
+   newly created instructions into account.  It also allows us to
+   find branches with common targets more easily.  */
+static void
+split_branches (rtx first)
+{
+  rtx insn;
+  struct far_branch **uid_branch, *far_branch_list = 0;
+  int max_uid = get_max_uid ();
+  int ok;
+
+  /* Find out which branches are out of range.  */
+  shorten_branches (first);
+
+  uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+  memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    if (! INSN_P (insn))
+      continue;
+    else if (INSN_DELETED_P (insn))
+      {
+	/* Shorten_branches would split this instruction again,
+	   so transform it into a note.  */
+	SET_INSN_DELETED (insn);
+      }
+    else if (JUMP_P (insn))
+      {
+	enum attr_type type = get_attr_type (insn);
+	if (type == TYPE_CBRANCH)
+	  {
+	    rtx next, beyond;
+
+	    if (get_attr_length (insn) > 4)
+	      {
+		rtx src = SET_SRC (PATTERN (insn));
+		rtx olabel = XEXP (XEXP (src, 1), 0);
+		int addr = INSN_ADDRESSES (INSN_UID (insn));
+		rtx label = 0;
+		int dest_uid = get_dest_uid (olabel, max_uid);
+		struct far_branch *bp = uid_branch[dest_uid];
+
+		/* redirect_jump needs a valid JUMP_LABEL, and it might delete
+		   the label if the LABEL_NUSES count drops to zero.  There is
+		   always a jump_optimize pass that sets these values, but it
+		   proceeds to delete unreferenced code, and then if not
+		   optimizing, to un-delete the deleted instructions, thus
+		   leaving labels with too low uses counts.  */
+		if (! optimize)
+		  {
+		    JUMP_LABEL (insn) = olabel;
+		    LABEL_NUSES (olabel)++;
+		  }
+		if (! bp)
+		  {
+		    bp = (struct far_branch *) alloca (sizeof *bp);
+		    uid_branch[dest_uid] = bp;
+		    bp->prev = far_branch_list;
+		    far_branch_list = bp;
+		    bp->far_label
+		      = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+		    LABEL_NUSES (bp->far_label)++;
+		  }
+		else
+		  {
+		    label = bp->near_label;
+		    if (! label && bp->address - addr >= CONDJUMP_MIN)
+		      {
+			rtx block = bp->insert_place;
+
+			if (GET_CODE (PATTERN (block)) == RETURN)
+			  block = PREV_INSN (block);
+			else
+			  block = gen_block_redirect (block,
+						      bp->address, 2);
+			label = emit_label_after (gen_label_rtx (),
+						  PREV_INSN (block));
+			bp->near_label = label;
+		      }
+		    else if (label && ! NEXT_INSN (label))
+		      {
+			if (addr + 2 - bp->address <= CONDJUMP_MAX)
+			  bp->insert_place = insn;
+			else
+			  gen_far_branch (bp);
+		      }
+		  }
+		if (! label
+		    || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
+		  {
+		    bp->near_label = label = gen_label_rtx ();
+		    bp->insert_place = insn;
+		    bp->address = addr;
+		  }
+		ok = redirect_jump (insn, label, 0);
+		gcc_assert (ok);
+	      }
+	    else
+	      {
+		/* get_attr_length (insn) == 2 */
+		/* Check if we have a pattern where reorg wants to redirect
+		   the branch to a label from an unconditional branch that
+		   is too far away.  */
+		/* We can't use JUMP_LABEL here because it might be undefined
+		   when not optimizing.  */
+		/* A syntax error might cause beyond to be NULL_RTX.  */
+		beyond
+		  = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+					    0));
+
+		if (beyond
+		    && (JUMP_P (beyond)
+			|| ((beyond = next_active_insn (beyond))
+			    && JUMP_P (beyond)))
+		    && GET_CODE (PATTERN (beyond)) == SET
+		    && recog_memoized (beyond) == CODE_FOR_jump_compact
+		    && ((INSN_ADDRESSES
+			 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
+			 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
+			> 252 + 258 + 2))
+		  gen_block_redirect (beyond,
+				      INSN_ADDRESSES (INSN_UID (beyond)), 1);
+	      }
+
+	    next = next_active_insn (insn);
+
+	    if (next
+		&& (JUMP_P (next)
+		    || ((next = next_active_insn (next))
+			&& JUMP_P (next)))
+		&& GET_CODE (PATTERN (next)) == SET
+		&& recog_memoized (next) == CODE_FOR_jump_compact
+		&& ((INSN_ADDRESSES
+		     (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
+		     - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
+		    > 252 + 258 + 2))
+	      gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
+	  }
+	else if (type == TYPE_JUMP || type == TYPE_RETURN)
+	  {
+	    int addr = INSN_ADDRESSES (INSN_UID (insn));
+	    rtx far_label = 0;
+	    int dest_uid = 0;
+	    struct far_branch *bp;
+
+	    if (type == TYPE_JUMP)
+	      {
+		far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+		dest_uid = get_dest_uid (far_label, max_uid);
+		if (! dest_uid)
+		  {
+		    /* Parse errors can lead to labels outside
+		      the insn stream.  */
+		    if (! NEXT_INSN (far_label))
+		      continue;
+
+		    if (! optimize)
+		      {
+			JUMP_LABEL (insn) = far_label;
+			LABEL_NUSES (far_label)++;
+		      }
+		    redirect_jump (insn, ret_rtx, 1);
+		    far_label = 0;
+		  }
+	      }
+	    bp = uid_branch[dest_uid];
+	    if (! bp)
+	      {
+		bp = (struct far_branch *) alloca (sizeof *bp);
+		uid_branch[dest_uid] = bp;
+		bp->prev = far_branch_list;
+		far_branch_list = bp;
+		bp->near_label = 0;
+		bp->far_label = far_label;
+		if (far_label)
+		  LABEL_NUSES (far_label)++;
+	      }
+	    else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+	      if (addr - bp->address <= CONDJUMP_MAX)
+		emit_label_after (bp->near_label, PREV_INSN (insn));
+	      else
+		{
+		  gen_far_branch (bp);
+		  bp->near_label = 0;
+		}
+	    else
+	      bp->near_label = 0;
+	    bp->address = addr;
+	    bp->insert_place = insn;
+	    if (! far_label)
+	      emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+	    else
+	      gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+	  }
+      }
+  /* Generate all pending far branches,
+     and free our references to the far labels.  */
+  while (far_branch_list)
+    {
+      if (far_branch_list->near_label
+	  && ! NEXT_INSN (far_branch_list->near_label))
+	gen_far_branch (far_branch_list);
+      if (optimize
+	  && far_branch_list->far_label
+	  && ! --LABEL_NUSES (far_branch_list->far_label))
+	delete_insn (far_branch_list->far_label);
+      far_branch_list = far_branch_list->prev;
+    }
+
+  /* Instruction length information is no longer valid due to the new
+     instructions that have been generated.  */
+  init_insn_lengths ();
+}
+
+/* Dump out instruction addresses, which is useful for debugging the
+   constant pool table stuff.
+
+   If relaxing, output the label and pseudo-ops used to link together
+   calls and the instruction which set the registers.
+
+   ??? The addresses printed by this routine for insns are nonsense for
+   insns which are inside of a sequence where none of the inner insns have
+   variable length.  This is because the second pass of shorten_branches
+   does not bother to update them.  */
+void
+final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
+		    int noperands ATTRIBUTE_UNUSED)
+{
+  if (TARGET_DUMPISIZE)
+    fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
+
+  if (TARGET_RELAX)
+    {
+      rtx note;
+
+      note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
+      if (note)
+	{
+	  rtx pattern;
+
+	  pattern = PATTERN (insn);
+	  if (GET_CODE (pattern) == PARALLEL)
+	    pattern = XVECEXP (pattern, 0, 0);
+	  switch (GET_CODE (pattern))
+	    {
+	    case SET:
+	      if (GET_CODE (SET_SRC (pattern)) != CALL
+		  && get_attr_type (insn) != TYPE_SFUNC)
+		{
+		  targetm.asm_out.internal_label
+		    (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
+		  break;
+		}
+	      /* else FALLTHROUGH */
+	    case CALL:
+	      asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
+			   CODE_LABEL_NUMBER (XEXP (note, 0)));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+}
+
+/* Dump out any constants accumulated in the final pass.  These will
+   only be labels.  */
+const char *
+output_jump_label_table (void)
+{
+  int i;
+
+  if (pool_size)
+    {
+      fprintf (asm_out_file, "\t.align 2\n");
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (p->label));
+	  output_asm_insn (".long	%O0", &p->value);
+	}
+      pool_size = 0;
+    }
+
+  return "";
+}
+
+/* A full frame looks like:
+
+   arg-5
+   arg-4
+   [ if current_function_anonymous_args
+   arg-3
+   arg-2
+   arg-1
+   arg-0 ]
+   saved-fp
+   saved-r10
+   saved-r11
+   saved-r12
+   saved-pr
+   local-n
+   ..
+   local-1
+   local-0        <- fp points here.
+
+   Number of bytes pushed for anonymous args, used to pass information
+   between expand_prologue and expand_epilogue.
+
+   Adjust the stack by SIZE bytes.  REG holds the rtl of the register to be
+   adjusted.  If epilogue_p is zero, this is for a prologue; otherwise, it's
+   for an epilogue and a negative value means that it's for a sibcall
+   epilogue.  If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
+   all the registers that are about to be restored, and hence dead.  */
+static void
+output_stack_adjust (int size, rtx reg, int epilogue_p,
+		     HARD_REG_SET *live_regs_mask, bool frame_p)
+{
+  rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
+  if (size)
+    {
+      HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
+
+/* This test is bogus, as output_stack_adjust is used to re-align the
+   stack.  */
+#if 0
+      gcc_assert (!(size % align));
+#endif
+
+      if (CONST_OK_FOR_ADD (size))
+	emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
+      /* Try to do it with two partial adjustments; however, we must make
+	 sure that the stack is properly aligned at all times, in case
+	 an interrupt occurs between the two partial adjustments.  */
+      else if (CONST_OK_FOR_ADD (size / 2 & -align)
+	       && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
+	{
+	  emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
+	  emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
+	}
+      else
+	{
+	  rtx const_reg;
+	  rtx insn;
+	  int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
+	  int i;
+
+	  /* If TEMP is invalid, we could temporarily save a general
+	     register to MACL.  However, there is currently no need
+	     to handle this case, so just die when we see it.  */
+	  if (epilogue_p < 0
+	      || current_function_interrupt
+	      || ! call_really_used_regs[temp] || fixed_regs[temp])
+	    temp = -1;
+	  if (temp < 0 && ! current_function_interrupt
+	      && (TARGET_SHMEDIA || epilogue_p >= 0))
+	    {
+	      HARD_REG_SET temps;
+	      COPY_HARD_REG_SET (temps, call_used_reg_set);
+	      AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
+	      if (epilogue_p > 0)
+		{
+		  int nreg = 0;
+		  if (crtl->return_rtx)
+		    {
+		      enum machine_mode mode;
+		      mode = GET_MODE (crtl->return_rtx);
+		      if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
+			nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
+		    }
+		  for (i = 0; i < nreg; i++)
+		    CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
+		  if (crtl->calls_eh_return)
+		    {
+		      CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
+		      for (i = 0; i <= 3; i++)
+			CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
+		    }
+		}
+	      if (TARGET_SHMEDIA && epilogue_p < 0)
+		for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+		  CLEAR_HARD_REG_BIT (temps, i);
+	      if (epilogue_p <= 0)
+		{
+		  for (i = FIRST_PARM_REG;
+		       i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
+		    CLEAR_HARD_REG_BIT (temps, i);
+		  if (cfun->static_chain_decl != NULL)
+		    CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
+		}
+	      temp = scavenge_reg (&temps);
+	    }
+	  if (temp < 0 && live_regs_mask)
+	    {
+	      HARD_REG_SET temps;
+
+	      COPY_HARD_REG_SET (temps, *live_regs_mask);
+	      CLEAR_HARD_REG_BIT (temps, REGNO (reg));
+	      temp = scavenge_reg (&temps);
+	    }
+	  if (temp < 0)
+	    {
+	      rtx adj_reg, tmp_reg, mem;
+	      
+	      /* If we reached here, the most likely case is the (sibcall)
+		 epilogue for non SHmedia.  Put a special push/pop sequence
+		 for such case as the last resort.  This looks lengthy but
+		 would not be problem because it seems to be very
+		 rare.  */
+	      
+	      gcc_assert (!TARGET_SHMEDIA && epilogue_p);
+	      
+
+	       /* ??? There is still the slight possibility that r4 or
+		  r5 have been reserved as fixed registers or assigned
+		  as global registers, and they change during an
+		  interrupt.  There are possible ways to handle this:
+		     
+		  - If we are adjusting the frame pointer (r14), we can do
+		    with a single temp register and an ordinary push / pop
+		    on the stack.
+		  - Grab any call-used or call-saved registers (i.e. not
+		    fixed or globals) for the temps we need.  We might
+		    also grab r14 if we are adjusting the stack pointer.
+		    If we can't find enough available registers, issue
+		    a diagnostic and die - the user must have reserved
+		    way too many registers.
+		 But since all this is rather unlikely to happen and
+		 would require extra testing, we just die if r4 / r5
+		 are not available.  */
+	      gcc_assert (!fixed_regs[4] && !fixed_regs[5]
+			  && !global_regs[4] && !global_regs[5]);
+
+	      adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
+	      tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
+	      emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
+	      emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
+	      emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+	      emit_move_insn (mem, tmp_reg);
+	      emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+	      emit_move_insn (mem, tmp_reg);
+	      emit_move_insn (reg, adj_reg);
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
+	      emit_move_insn (adj_reg, mem);
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
+	      emit_move_insn (tmp_reg, mem);
+	      /* Tell flow the insns that pop r4/r5 aren't dead.  */
+	      emit_use (tmp_reg);
+	      emit_use (adj_reg);
+	      return;
+	    }
+	  const_reg = gen_rtx_REG (GET_MODE (reg), temp);
+
+	  /* If SIZE is negative, subtract the positive value.
+	     This sometimes allows a constant pool entry to be shared
+	     between prologue and epilogue code.  */
+	  if (size < 0)
+	    {
+	      emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
+	      insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
+	    }
+	  else
+	    {
+	      emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
+	      insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
+	    }
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (VOIDmode, reg,
+				     gen_rtx_PLUS (SImode, reg,
+						   GEN_INT (size))));
+	}
+    }
+}
+
+/* Emit the specified insn and mark it as frame related.
+   FIXME: Rename this to emit_frame_insn.  */
+static rtx
+frame_insn (rtx x)
+{
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Output RTL to push register RN onto the stack.  */
+static rtx
+push (int rn)
+{
+  rtx x;
+  if (rn == FPUL_REG)
+    x = gen_push_fpul ();
+  else if (rn == FPSCR_REG)
+    x = gen_push_fpscr ();
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
+	   && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
+    {
+      if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
+	return NULL_RTX;
+      x = gen_push_4 (gen_rtx_REG (DFmode, rn));
+    }
+  else if (TARGET_SH2E && FP_REGISTER_P (rn))
+    x = gen_push_e (gen_rtx_REG (SFmode, rn));
+  else
+    x = gen_push (gen_rtx_REG (SImode, rn));
+
+  x = frame_insn (x);
+  add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
+  return x;
+}
+
+/* Output RTL to pop register RN from the stack.  */
+static void
+pop (int rn)
+{
+  rtx x, sp_reg, reg;
+  if (rn == FPUL_REG)
+    x = gen_pop_fpul ();
+  else if (rn == FPSCR_REG)
+    x = gen_pop_fpscr ();
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
+	   && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
+    {
+      if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
+	return;
+      x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
+    }
+  else if (TARGET_SH2E && FP_REGISTER_P (rn))
+    x = gen_pop_e (gen_rtx_REG (SFmode, rn));
+  else
+    x = gen_pop (gen_rtx_REG (SImode, rn));
+
+  x = emit_insn (x);
+
+  sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+  reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
+		  ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
+		  : SET_DEST (PATTERN (x)));
+  add_reg_note (x, REG_CFA_RESTORE, reg);
+  add_reg_note (x, REG_CFA_ADJUST_CFA,
+		gen_rtx_SET (SImode, sp_reg,
+			     plus_constant (SImode, sp_reg,
+					    GET_MODE_SIZE (GET_MODE (reg)))));
+  add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
+  RTX_FRAME_RELATED_P (x) = 1;
+}
+
+/* Generate code to push the regs specified in the mask.  */
+static void
+push_regs (HARD_REG_SET *mask, int interrupt_handler)
+{
+  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+  int skip_fpscr = 0;
+
+  /* Push PR last; this gives better latencies after the prologue, and
+     candidates for the return delay slot when there are no general
+     registers pushed.  */
+  for (; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      /* If this is an interrupt handler, and the SZ bit varies,
+	 and we have to push any floating point register, we need
+	 to switch to the correct precision first.  */
+      if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
+	  && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
+	{
+	  HARD_REG_SET unsaved;
+
+	  push (FPSCR_REG);
+	  COMPL_HARD_REG_SET (unsaved, *mask);
+	  fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
+	  skip_fpscr = 1;
+	}
+      if (i != PR_REG
+	  && (i != FPSCR_REG || ! skip_fpscr)
+	  && TEST_HARD_REG_BIT (*mask, i))
+	{
+	/* If the ISR has RESBANK attribute assigned, don't push any of
+	   the following registers - R0-R14, MACH, MACL and GBR.  */
+      if (! (sh_cfun_resbank_handler_p ()
+	     && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
+		 || i == MACH_REG
+		 || i == MACL_REG
+		 || i == GBR_REG)))
+	  push (i);
+	}
+    }
+
+  /* Push banked registers last to improve delay slot opportunities.  */
+  if (interrupt_handler)
+    {
+      bool use_movml = false;
+
+      if (TARGET_SH2A)
+	{
+	  unsigned int count = 0;
+
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    if (TEST_HARD_REG_BIT (*mask, i))
+	      count++;
+	    else
+	      break;
+
+	  /* Use movml when all banked registers are pushed.  */
+	  if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+	    use_movml = true;
+	}
+
+      if (sh_cfun_resbank_handler_p ())
+	; /* Do nothing.  */
+      else if (use_movml)
+	{
+	  rtx x, mem, reg, set;
+	  rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	  /* We must avoid scheduling multiple store insn with another
+	     insns.  */
+	  emit_insn (gen_blockage ());
+	  x = gen_movml_push_banked (sp_reg);
+	  x = frame_insn (x);
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    {
+	      mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
+	      reg = gen_rtx_REG (SImode, i);
+	      add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
+	    }
+
+	  set = gen_rtx_SET (SImode, sp_reg,
+			     plus_constant (Pmode, sp_reg, - 32));
+	  add_reg_note (x, REG_CFA_ADJUST_CFA, set);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  if (TEST_HARD_REG_BIT (*mask, i))
+	    push (i);
+    }
+
+  /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
+  if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
+    push (PR_REG);
+}
+
+/* Calculate how much extra space is needed to save all callee-saved
+   target registers.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+static int
+shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
+{
+  int reg;
+  int stack_space = 0;
+  int interrupt_handler = sh_cfun_interrupt_handler_p ();
+
+  for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+    if ((! call_really_used_regs[reg] || interrupt_handler)
+	&& ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
+      /* Leave space to save this target register on the stack,
+	 in case target register allocation wants to use it.  */
+      stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+  return stack_space;
+}
+
+/* Decide whether we should reserve space for callee-save target registers,
+   in case target register allocation wants to use them.  REGS_SAVED is
+   the space, in bytes, that is already required for register saves.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+static int
+shmedia_reserve_space_for_target_registers_p (int regs_saved,
+					      HARD_REG_SET *live_regs_mask)
+{
+  if (optimize_size)
+    return 0;
+  return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
+}
+
+/* Decide how much space to reserve for callee-save target registers
+   in case target register allocation wants to use them.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+static int
+shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
+{
+  if (shmedia_space_reserved_for_target_registers)
+    return shmedia_target_regs_stack_space (live_regs_mask);
+  else
+    return 0;
+}
+
+/* Work out the registers which need to be saved, both as a mask and a
+   count of saved words.  Return the count.
+
+   If doing a pragma interrupt function, then push all regs used by the
+   function, and if we call another function (we can tell by looking at PR),
+   make sure that all the regs it clobbers are safe too.  */
+static int
+calc_live_regs (HARD_REG_SET *live_regs_mask)
+{
+  unsigned int reg;
+  int count;
+  tree attrs;
+  bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
+  bool nosave_low_regs;
+  int pr_live, has_call;
+
+  attrs = DECL_ATTRIBUTES (current_function_decl);
+  interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
+  trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
+  interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
+  nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
+
+  CLEAR_HARD_REG_SET (*live_regs_mask);
+  if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
+      && df_regs_ever_live_p (FPSCR_REG))
+    target_flags &= ~MASK_FPU_SINGLE;
+  /* If we can save a lot of saves by switching to double mode, do that.  */
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
+	   && TARGET_FPU_SINGLE)
+    for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+      if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
+	  && (! call_really_used_regs[reg]
+	      || interrupt_handler)
+	  && ++count > 2)
+	{
+	  target_flags &= ~MASK_FPU_SINGLE;
+	  break;
+	}
+  /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
+     knows how to use it.  That means the pseudo originally allocated for
+     the initial value can become the PR_MEDIA_REG hard register, as seen for
+     execute/20010122-1.c:test9.  */
+  if (TARGET_SHMEDIA)
+    /* ??? this function is called from initial_elimination_offset, hence we
+       can't use the result of sh_media_register_for_return here.  */
+    pr_live = sh_pr_n_sets ();
+  else
+    {
+      rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
+      pr_live = (pr_initial
+		 ? (!REG_P (pr_initial)
+		    || REGNO (pr_initial) != (PR_REG))
+		 : df_regs_ever_live_p (PR_REG));
+      /* For Shcompact, if not optimizing, we end up with a memory reference
+	 using the return address pointer for __builtin_return_address even
+	 though there is no actual need to put the PR register on the stack.  */
+      pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
+    }
+  /* Force PR to be live if the prologue has to call the SHmedia
+     argument decoder or register saver.  */
+  if (TARGET_SHCOMPACT
+      && ((crtl->args.info.call_cookie
+	   & ~ CALL_COOKIE_RET_TRAMP (1))
+	  || crtl->saves_all_registers))
+    pr_live = 1;
+  has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
+  for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
+    {
+      if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
+	  ? pr_live
+	  : interrupt_handler
+	  ? (/* Need to save all the regs ever live.  */
+	     (df_regs_ever_live_p (reg)
+	      || (call_really_used_regs[reg]
+		  && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
+		      || reg == PIC_OFFSET_TABLE_REGNUM)
+		  && has_call)
+	      || (TARGET_SHMEDIA && has_call
+		  && REGISTER_NATURAL_MODE (reg) == SImode
+		  && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
+	     && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+	     && reg != RETURN_ADDRESS_POINTER_REGNUM
+	     && reg != T_REG && reg != GBR_REG
+	     /* Push fpscr only on targets which have FPU */
+	     && (reg != FPSCR_REG || TARGET_FPU_ANY))
+	  : (/* Only push those regs which are used and need to be saved.  */
+	     (TARGET_SHCOMPACT
+	      && flag_pic
+	      && crtl->args.info.call_cookie
+	      && reg == PIC_OFFSET_TABLE_REGNUM)
+	     || (df_regs_ever_live_p (reg)
+		 && ((!call_really_used_regs[reg]
+		      && !(reg != PIC_OFFSET_TABLE_REGNUM
+			   && fixed_regs[reg] && call_used_regs[reg]))
+		     || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
+	     || (crtl->calls_eh_return
+		 && (reg == EH_RETURN_DATA_REGNO (0)
+		     || reg == EH_RETURN_DATA_REGNO (1)
+		     || reg == EH_RETURN_DATA_REGNO (2)
+		     || reg == EH_RETURN_DATA_REGNO (3)))
+	     || ((reg == MACL_REG || reg == MACH_REG)
+		 && df_regs_ever_live_p (reg)
+		 && sh_cfun_attr_renesas_p ())
+	     ))
+	{
+	  SET_HARD_REG_BIT (*live_regs_mask, reg);
+	  count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+
+	  if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
+	      && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
+	    {
+	      if (FP_REGISTER_P (reg))
+		{
+		  if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
+		    {
+		      SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
+		      count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
+		    }
+		}
+	      else if (XD_REGISTER_P (reg))
+		{
+		  /* Must switch to double mode to access these registers.  */
+		  target_flags &= ~MASK_FPU_SINGLE;
+		}
+	    }
+	}
+      if (nosave_low_regs && reg == R8_REG)
+	break;
+    }
+  /* If we have a target register optimization pass after prologue / epilogue
+     threading, we need to assume all target registers will be live even if
+     they aren't now.  */
+  if (flag_branch_target_load_optimize2
+      && TARGET_SAVE_ALL_TARGET_REGS
+      && shmedia_space_reserved_for_target_registers)
+    for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+      if ((! call_really_used_regs[reg] || interrupt_handler)
+	  && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
+	{
+	  SET_HARD_REG_BIT (*live_regs_mask, reg);
+	  count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+	}
+  /* If this is an interrupt handler, we don't have any call-clobbered
+     registers we can conveniently use for target register save/restore.
+     Make sure we save at least one general purpose register when we need
+     to save target registers.  */
+  if (interrupt_handler
+      && hard_reg_set_intersect_p (*live_regs_mask,
+				   reg_class_contents[TARGET_REGS])
+      && ! hard_reg_set_intersect_p (*live_regs_mask,
+				     reg_class_contents[GENERAL_REGS]))
+    {
+      SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
+      count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
+    }
+
+  return count;
+}
+
+/* Code to generate prologue and epilogue sequences */
+
+/* PUSHED is the number of bytes that are being pushed on the
+   stack for register saves.  Return the frame size, padded
+   appropriately so that the stack stays properly aligned.  */
+static HOST_WIDE_INT
+rounded_frame_size (int pushed)
+{
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
+
+  if (ACCUMULATE_OUTGOING_ARGS)
+    size += crtl->outgoing_args_size;
+
+  return ((size + pushed + align - 1) & -align) - pushed;
+}
+
+/* Choose a call-clobbered target-branch register that remains
+   unchanged along the whole function.  We set it up as the return
+   value in the prologue.  */
+int
+sh_media_register_for_return (void)
+{
+  int regno;
+  int tr0_used;
+
+  if (! crtl->is_leaf)
+    return -1;
+  if (lookup_attribute ("interrupt_handler",
+			DECL_ATTRIBUTES (current_function_decl)))
+    return -1;
+  if (sh_cfun_interrupt_handler_p ())
+    return -1;
+
+  tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+
+  for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
+    if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
+      return regno;
+
+  return -1;
+}
+
+/* The maximum registers we need to save are:
+   - 62 general purpose registers (r15 is stack pointer, r63 is zero)
+   - 32 floating point registers (for each pair, we save none,
+         one single precision value, or a double precision value).
+   -  8 target registers
+   -  add 1 entry for a delimiter.  */
+#define MAX_SAVED_REGS (62+32+8)
+
+typedef struct save_entry_s
+{
+  unsigned char reg;
+  unsigned char mode;
+  short offset;
+} save_entry;
+
+#define MAX_TEMPS 4
+
+/* There will be a delimiter entry with VOIDmode both at the start and the
+   end of a filled in schedule.  The end delimiter has the offset of the
+   save with the smallest (i.e. most negative) offset.  */
+typedef struct save_schedule_s
+{
+  save_entry entries[MAX_SAVED_REGS + 2];
+  int temps[MAX_TEMPS+1];
+} save_schedule;
+
+/* Fill in SCHEDULE according to LIVE_REGS_MASK.  If RESTORE is nonzero,
+   use reverse order.  Returns the last entry written to (not counting
+   the delimiter).  OFFSET_BASE is a number to be added to all offset
+   entries.  */
+static save_entry *
+sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
+		    int offset_base)
+{
+  int align, i;
+  save_entry *entry = schedule->entries;
+  int tmpx = 0;
+  int offset;
+
+  if (! current_function_interrupt)
+    for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
+      if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
+	  && ! FUNCTION_ARG_REGNO_P (i)
+	  && i != FIRST_RET_REG
+	  && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
+	  && ! (crtl->calls_eh_return
+		&& (i == EH_RETURN_STACKADJ_REGNO
+		    || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
+			&& (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
+	schedule->temps[tmpx++] = i;
+  entry->reg = -1;
+  entry->mode = VOIDmode;
+  entry->offset = offset_base;
+  entry++;
+  /* We loop twice: first, we save 8-byte aligned registers in the
+     higher addresses, that are known to be aligned.  Then, we
+     proceed to saving 32-bit registers that don't need 8-byte
+     alignment.
+     If this is an interrupt function, all registers that need saving
+     need to be saved in full.  moreover, we need to postpone saving
+     target registers till we have saved some general purpose registers
+     we can then use as scratch registers.  */
+  offset = offset_base;
+  for (align = 1; align >= 0; align--)
+    {
+      for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
+	if (TEST_HARD_REG_BIT (*live_regs_mask, i))
+	  {
+	    enum machine_mode mode = REGISTER_NATURAL_MODE (i);
+	    int reg = i;
+
+	    if (current_function_interrupt)
+	      {
+		if (TARGET_REGISTER_P (i))
+		  continue;
+		if (GENERAL_REGISTER_P (i))
+		  mode = DImode;
+	      }
+	    if (mode == SFmode && (i % 2) == 1
+		&& ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
+		&& (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
+	      {
+		mode = DFmode;
+		i--;
+		reg--;
+	      }
+
+	    /* If we're doing the aligned pass and this is not aligned,
+	       or we're doing the unaligned pass and this is aligned,
+	       skip it.  */
+	    if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
+		!= align)
+	      continue;
+
+	    if (current_function_interrupt
+		&& GENERAL_REGISTER_P (i)
+		&& tmpx < MAX_TEMPS)
+	      schedule->temps[tmpx++] = i;
+
+	    offset -= GET_MODE_SIZE (mode);
+	    entry->reg = i;
+	    entry->mode = mode;
+	    entry->offset = offset;
+	    entry++;
+	  }
+      if (align && current_function_interrupt)
+	for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
+	  if (TEST_HARD_REG_BIT (*live_regs_mask, i))
+	    {
+	      offset -= GET_MODE_SIZE (DImode);
+	      entry->reg = i;
+	      entry->mode = DImode;
+	      entry->offset = offset;
+	      entry++;
+	    }
+    }
+  entry->reg = -1;
+  entry->mode = VOIDmode;
+  entry->offset = offset;
+  schedule->temps[tmpx] = -1;
+  return entry - 1;
+}
+
+/* Expand code for the function prologue.  */
+void
+sh_expand_prologue (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d, i;
+  int d_rounding = 0;
+  int save_flags = target_flags;
+  int pretend_args;
+  int stack_usage;
+  tree sp_switch_attr
+    = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
+
+  current_function_interrupt = sh_cfun_interrupt_handler_p ();
+
+  /* We have pretend args if we had an object sent partially in registers
+     and partially on the stack, e.g. a large structure.  */
+  pretend_args = crtl->args.pretend_args_size;
+  if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
+      && (NPARM_REGS(SImode)
+	  > crtl->args.info.arg_count[(int) SH_ARG_INT]))
+    pretend_args = 0;
+
+  output_stack_adjust (-pretend_args
+		       - crtl->args.info.stack_regs * 8,
+		       stack_pointer_rtx, 0, NULL, true);
+  stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
+
+  if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
+    /* We're going to use the PIC register to load the address of the
+       incoming-argument decoder and/or of the return trampoline from
+       the GOT, so make sure the PIC register is preserved and
+       initialized.  */
+    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
+    {
+      int reg;
+
+      /* First, make all registers with incoming arguments that will
+	 be pushed onto the stack live, so that register renaming
+	 doesn't overwrite them.  */
+      for (reg = 0; reg < NPARM_REGS (SImode); reg++)
+	if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
+	    >= NPARM_REGS (SImode) - reg)
+	  for (; reg < NPARM_REGS (SImode); reg++)
+	    emit_insn (gen_shcompact_preserve_incoming_args
+		       (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
+	else if (CALL_COOKIE_INT_REG_GET
+		 (crtl->args.info.call_cookie, reg) == 1)
+	  emit_insn (gen_shcompact_preserve_incoming_args
+		     (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
+
+      emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
+		      stack_pointer_rtx);
+      emit_move_insn (gen_rtx_REG (SImode, R0_REG),
+		      GEN_INT (crtl->args.info.call_cookie));
+      emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
+		      gen_rtx_REG (SImode, R0_REG));
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      int tr = sh_media_register_for_return ();
+
+      if (tr >= 0)
+	emit_move_insn (gen_rtx_REG (DImode, tr),
+			gen_rtx_REG (DImode, PR_MEDIA_REG));
+    }
+
+  /* Emit the code for SETUP_VARARGS.  */
+  if (cfun->stdarg)
+    {
+      if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
+	{
+	  /* Push arg regs as if they'd been provided by caller in stack.  */
+	  for (i = 0; i < NPARM_REGS(SImode); i++)
+	    {
+	      int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
+
+	      if (i >= (NPARM_REGS(SImode)
+			- crtl->args.info.arg_count[(int) SH_ARG_INT]
+			))
+		break;
+	      push (rn);
+	      stack_usage += GET_MODE_SIZE (SImode);
+	    }
+	}
+    }
+
+  /* If we're supposed to switch stacks at function entry, do so now.  */
+  if (sp_switch_attr)
+    {
+      rtx lab, newsrc;
+      /* The argument specifies a variable holding the address of the
+	 stack the interrupt function should switch to/from at entry/exit.  */
+      tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
+      const char *s
+	= ggc_strdup (TREE_STRING_POINTER (arg));
+      rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
+
+      lab = add_constant (sp_switch, SImode, 0);
+      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+
+      emit_insn (gen_sp_switch_1 (newsrc));
+    }
+
+  d = calc_live_regs (&live_regs_mask);
+  /* ??? Maybe we could save some switching if we can move a mode switch
+     that already happens to be at the function start into the prologue.  */
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+
+  if (TARGET_SH5)
+    {
+      int offset_base, offset;
+      rtx r0 = NULL_RTX;
+      int offset_in_r0 = -1;
+      int sp_in_r0 = 0;
+      int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
+      int total_size, save_size;
+      save_schedule schedule;
+      save_entry *entry;
+      int *tmp_pnt;
+
+      if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
+	  && ! current_function_interrupt)
+	r0 = gen_rtx_REG (Pmode, R0_REG);
+
+      /* D is the actual number of bytes that we need for saving registers,
+	 however, in initial_elimination_offset we have committed to using
+	 an additional TREGS_SPACE amount of bytes - in order to keep both
+	 addresses to arguments supplied by the caller and local variables
+	 valid, we must keep this gap.  Place it between the incoming
+	 arguments and the actually saved registers in a bid to optimize
+	 locality of reference.  */
+      total_size = d + tregs_space;
+      total_size += rounded_frame_size (total_size);
+      save_size = total_size - rounded_frame_size (d);
+      if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
+	d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+			- save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+      /* If adjusting the stack in a single step costs nothing extra, do so.
+	 I.e. either if a single addi is enough, or we need a movi anyway,
+	 and we don't exceed the maximum offset range (the test for the
+	 latter is conservative for simplicity).  */
+      if (TARGET_SHMEDIA
+	  && (CONST_OK_FOR_I10 (-total_size)
+	      || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
+		  && total_size <= 2044)))
+	d_rounding = total_size - save_size;
+
+      offset_base = d + d_rounding;
+
+      output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
+			   0, NULL, true);
+      stack_usage += save_size + d_rounding;
+
+      sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
+      tmp_pnt = schedule.temps;
+      for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
+        {
+	  enum machine_mode mode = (enum machine_mode) entry->mode;
+	  unsigned int reg = entry->reg;
+	  rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
+	  rtx orig_reg_rtx;
+
+	  offset = entry->offset;
+
+	  reg_rtx = gen_rtx_REG (mode, reg);
+
+	  mem_rtx = gen_frame_mem (mode,
+				   gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (offset)));
+
+	  if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
+	    {
+	      gcc_assert (r0);
+	      mem_rtx = NULL_RTX;
+	    }
+
+	  if (HAVE_PRE_DECREMENT
+	      && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
+		  || mem_rtx == NULL_RTX
+		  || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
+	    {
+	      pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
+
+	      if (!memory_address_p (mode, XEXP (pre_dec, 0)))
+		pre_dec = NULL_RTX;
+	      else
+		{
+		  mem_rtx = NULL_RTX;
+		  offset += GET_MODE_SIZE (mode);
+		}
+	    }
+
+	  if (mem_rtx != NULL_RTX)
+	    goto addr_ok;
+
+	  if (offset_in_r0 == -1)
+	    {
+	      emit_move_insn (r0, GEN_INT (offset));
+	      offset_in_r0 = offset;
+	    }
+	  else if (offset != offset_in_r0)
+	    {
+	      emit_move_insn (r0,
+			      gen_rtx_PLUS
+			      (Pmode, r0,
+			       GEN_INT (offset - offset_in_r0)));
+	      offset_in_r0 += offset - offset_in_r0;
+	    }
+
+	  if (pre_dec != NULL_RTX)
+	    {
+	      if (! sp_in_r0)
+		{
+		  emit_move_insn (r0,
+				  gen_rtx_PLUS
+				  (Pmode, r0, stack_pointer_rtx));
+		  sp_in_r0 = 1;
+		}
+
+	      offset -= GET_MODE_SIZE (mode);
+	      offset_in_r0 -= GET_MODE_SIZE (mode);
+
+	      mem_rtx = pre_dec;
+	    }
+	  else if (sp_in_r0)
+	    mem_rtx = gen_frame_mem (mode, r0);
+	  else
+	    mem_rtx = gen_frame_mem (mode,
+				     gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   r0));
+
+	  /* We must not use an r0-based address for target-branch
+	     registers or for special registers without pre-dec
+	     memory addresses, since we store their values in r0
+	     first.  */
+	  gcc_assert (!TARGET_REGISTER_P (reg)
+		      && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
+			  || mem_rtx == pre_dec));
+	  
+	addr_ok:
+	  orig_reg_rtx = reg_rtx;
+	  if (TARGET_REGISTER_P (reg)
+	      || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
+		  && mem_rtx != pre_dec))
+	    {
+	      rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
+
+	      emit_move_insn (tmp_reg, reg_rtx);
+
+	      if (REGNO (tmp_reg) == R0_REG)
+		{
+		  offset_in_r0 = -1;
+		  sp_in_r0 = 0;
+		  gcc_assert (!refers_to_regno_p
+			      (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
+		}
+
+	      if (*++tmp_pnt <= 0)
+		tmp_pnt = schedule.temps;
+
+	      reg_rtx = tmp_reg;
+	    }
+	  {
+	    rtx insn;
+
+	    /* Mark as interesting for dwarf cfi generator */
+	    insn = emit_move_insn (mem_rtx, reg_rtx);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    /* If we use an intermediate register for the save, we can't
+	       describe this exactly in cfi as a copy of the to-be-saved
+	       register into the temporary register and then the temporary
+	       register on the stack, because the temporary register can
+	       have a different natural size than the to-be-saved register.
+	       Thus, we gloss over the intermediate copy and pretend we do
+	       a direct save from the to-be-saved register.  */
+	    if (REGNO (reg_rtx) != reg)
+	      {
+		rtx set;
+
+		set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	      }
+
+	    if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
+	      {
+		rtx reg_rtx = gen_rtx_REG (mode, reg);
+		rtx set;
+		rtx mem_rtx = gen_frame_mem (mode,
+					     gen_rtx_PLUS (Pmode,
+							   stack_pointer_rtx,
+							   GEN_INT (offset)));
+
+		set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	      }
+	  }
+	}
+
+      gcc_assert (entry->offset == d_rounding);
+    }
+  else
+    {
+      push_regs (&live_regs_mask, current_function_interrupt);
+      stack_usage += d;
+    }
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    emit_insn (gen_GOTaddr2picreg ());
+
+  if (SHMEDIA_REGS_STACK_ADJUST ())
+    {
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		       (TARGET_FPU_ANY
+			? "__GCC_push_shmedia_regs"
+			: "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
+      emit_insn (gen_shmedia_save_restore_regs_compact
+		 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
+    }
+
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+
+  target_flags = save_flags;
+
+  output_stack_adjust (-rounded_frame_size (d) + d_rounding,
+		       stack_pointer_rtx, 0, NULL, true);
+  stack_usage += rounded_frame_size (d) - d_rounding;
+
+  if (frame_pointer_needed)
+    frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
+    {
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		      "__GCC_shcompact_incoming_args", SFUNC_GOT);
+      emit_insn (gen_shcompact_incoming_args ());
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  Similarly if some call instructions are swapped
+     before frame related insns, it'll confuse the unwinder because
+     currently SH has no unwind info for function epilogues.  */
+  if (crtl->profile || flag_exceptions || flag_unwind_tables)
+    emit_insn (gen_blockage ());
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = stack_usage;
+}
+
+/* Expand code for the function epilogue.  */
+void
+sh_expand_epilogue (bool sibcall_p)
+{
+  HARD_REG_SET live_regs_mask;
+  int d, i;
+  int d_rounding = 0;
+
+  int save_flags = target_flags;
+  int frame_size, save_size;
+  int fpscr_deferred = 0;
+  int e = sibcall_p ? -1 : 1;
+
+  d = calc_live_regs (&live_regs_mask);
+
+  save_size = d;
+  frame_size = rounded_frame_size (d);
+
+  if (TARGET_SH5)
+    {
+      int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
+      int total_size;
+      if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
+	d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+		      - d % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+      total_size = d + tregs_space;
+      total_size += rounded_frame_size (total_size);
+      save_size = total_size - frame_size;
+
+      /* If adjusting the stack in a single step costs nothing extra, do so.
+	 I.e. either if a single addi is enough, or we need a movi anyway,
+	 and we don't exceed the maximum offset range (the test for the
+	 latter is conservative for simplicity).  */
+      if (TARGET_SHMEDIA
+	  && ! frame_pointer_needed
+	  && (CONST_OK_FOR_I10 (total_size)
+	      || (! CONST_OK_FOR_I10 (save_size + d_rounding)
+		  && total_size <= 2044)))
+	d_rounding = frame_size;
+
+      frame_size -= d_rounding;
+    }
+
+  if (frame_pointer_needed)
+    {
+      /* We must avoid scheduling the epilogue with previous basic blocks.
+	 See PR/18032 and PR/40313.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
+			   &live_regs_mask, true);
+
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
+    }
+  else if (frame_size)
+    {
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, stack_pointer_rtx, e,
+			   &live_regs_mask, true);
+    }
+
+  if (SHMEDIA_REGS_STACK_ADJUST ())
+    {
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		       (TARGET_FPU_ANY
+			? "__GCC_pop_shmedia_regs"
+			: "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      emit_insn (gen_shmedia_save_restore_regs_compact
+		 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
+    }
+
+  /* Pop all the registers.  */
+
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+  if (TARGET_SH5)
+    {
+      int offset_base, offset;
+      int offset_in_r0 = -1;
+      int sp_in_r0 = 0;
+      rtx r0 = gen_rtx_REG (Pmode, R0_REG);
+      save_schedule schedule;
+      save_entry *entry;
+      int *tmp_pnt;
+
+      entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
+      offset_base = -entry[1].offset + d_rounding;
+      tmp_pnt = schedule.temps;
+      for (; entry->mode != VOIDmode; entry--)
+	{
+	  enum machine_mode mode = (enum machine_mode) entry->mode;
+	  int reg = entry->reg;
+	  rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
+
+	  offset = offset_base + entry->offset;
+	  reg_rtx = gen_rtx_REG (mode, reg);
+
+	  mem_rtx = gen_frame_mem (mode,
+				   gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (offset)));
+
+	  if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
+	    mem_rtx = NULL_RTX;
+
+	  if (HAVE_POST_INCREMENT
+	      && (offset == offset_in_r0
+		  || (offset + GET_MODE_SIZE (mode) != d + d_rounding
+		      && mem_rtx == NULL_RTX)
+		  || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
+	    {
+	      post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
+
+	      if (!memory_address_p (mode, XEXP (post_inc, 0)))
+		post_inc = NULL_RTX;
+	      else
+		mem_rtx = NULL_RTX;
+	    }
+
+	  if (mem_rtx != NULL_RTX)
+	    goto addr_ok;
+
+	  if (offset_in_r0 == -1)
+	    {
+	      emit_move_insn (r0, GEN_INT (offset));
+	      offset_in_r0 = offset;
+	    }
+	  else if (offset != offset_in_r0)
+	    {
+	      emit_move_insn (r0,
+			      gen_rtx_PLUS
+			      (Pmode, r0,
+			       GEN_INT (offset - offset_in_r0)));
+	      offset_in_r0 += offset - offset_in_r0;
+	    }
+
+	  if (post_inc != NULL_RTX)
+	    {
+	      if (! sp_in_r0)
+		{
+		  emit_move_insn (r0,
+				  gen_rtx_PLUS
+				  (Pmode, r0, stack_pointer_rtx));
+		  sp_in_r0 = 1;
+		}
+
+	      mem_rtx = post_inc;
+
+	      offset_in_r0 += GET_MODE_SIZE (mode);
+	    }
+	  else if (sp_in_r0)
+	    mem_rtx = gen_frame_mem (mode, r0);
+	  else
+	    mem_rtx = gen_frame_mem (mode,
+				     gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   r0));
+
+	  gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
+		      || mem_rtx == post_inc);
+
+	addr_ok:
+	  if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
+	      && mem_rtx != post_inc)
+	    {
+	      emit_move_insn (r0, mem_rtx);
+	      mem_rtx = r0;
+	    }
+	  else if (TARGET_REGISTER_P (reg))
+	    {
+	      rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
+
+	      /* Give the scheduler a bit of freedom by using up to
+		 MAX_TEMPS registers in a round-robin fashion.  */
+	      emit_move_insn (tmp_reg, mem_rtx);
+	      mem_rtx = tmp_reg;
+	      if (*++tmp_pnt < 0)
+		tmp_pnt = schedule.temps;
+	    }
+
+	  emit_move_insn (reg_rtx, mem_rtx);
+	}
+
+      gcc_assert (entry->offset + offset_base == d + d_rounding);
+    }
+  else /* ! TARGET_SH5 */
+    {
+      int last_reg;
+
+      save_size = 0;
+	/* For an ISR with RESBANK attribute assigned, don't pop PR
+	   register.  */
+      if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
+	  && !sh_cfun_resbank_handler_p ())	
+	{
+	  if (!frame_pointer_needed)
+	    emit_insn (gen_blockage ());
+	  pop (PR_REG);
+	}
+
+      /* Banked registers are popped first to avoid being scheduled in the
+	 delay slot. RTE switches banks before the ds instruction.  */
+      if (current_function_interrupt)
+	{
+	  bool use_movml = false;
+
+	  if (TARGET_SH2A)
+	    {
+	      unsigned int count = 0;
+
+	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+		if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		  count++;
+		else
+		  break;
+
+	      /* Use movml when all banked register are poped.  */
+	      if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+		use_movml = true;
+	    }
+
+	  if (sh_cfun_resbank_handler_p ())
+	    ; /* Do nothing.  */
+	  else if (use_movml)
+	    {
+	      rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	      /* We must avoid scheduling multiple load insn with another
+		 insns.  */
+	      emit_insn (gen_blockage ());
+	      emit_insn (gen_movml_pop_banked (sp_reg));
+	      emit_insn (gen_blockage ());
+	    }
+	  else
+	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		pop (i);
+
+	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
+	}
+      else
+	last_reg = FIRST_PSEUDO_REGISTER;
+
+      for (i = 0; i < last_reg; i++)
+	{
+	  int j = (FIRST_PSEUDO_REGISTER - 1) - i;
+
+	  if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
+	      && hard_reg_set_intersect_p (live_regs_mask,
+					  reg_class_contents[DF_REGS]))
+	    fpscr_deferred = 1;
+	  /* For an ISR with RESBANK attribute assigned, don't pop
+	     following registers, R0-R14, MACH, MACL and GBR.  */
+	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 
+		   && ! (sh_cfun_resbank_handler_p ()
+			 && ((j >= FIRST_GENERAL_REG
+			      && j < LAST_GENERAL_REG)
+			      || j == MACH_REG
+			      || j == MACL_REG
+			      || j == GBR_REG)))
+	    pop (j);
+
+	  if (j == FIRST_FP_REG && fpscr_deferred)
+	    pop (FPSCR_REG);
+	}
+    }
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+  target_flags = save_flags;
+
+  output_stack_adjust (crtl->args.pretend_args_size
+		       + save_size + d_rounding
+		       + crtl->args.info.stack_regs * 8,
+		       stack_pointer_rtx, e, NULL, true);
+
+  if (crtl->calls_eh_return)
+    emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
+			 EH_RETURN_STACKADJ_RTX));
+
+  /* Switch back to the normal stack if necessary.  */
+  if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
+    emit_insn (gen_sp_switch_2 ());
+
+  /* Tell flow the insn that pops PR isn't dead.  */
+  /* PR_REG will never be live in SHmedia mode, and we don't need to
+     USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
+     by the return pattern.  */
+  if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
+    emit_use (gen_rtx_REG (SImode, PR_REG));
+}
+
+/* Emit code to change the current function's return address to RA.
+   TEMP is available as a scratch register, if needed.  */
+void
+sh_set_return_address (rtx ra, rtx tmp)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+  int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
+  int pr_offset;
+
+  d = calc_live_regs (&live_regs_mask);
+
+  /* If pr_reg isn't life, we can set it (or the register given in
+     sh_media_register_for_return) directly.  */
+  if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
+    {
+      rtx rr;
+
+      if (TARGET_SHMEDIA)
+	{
+	  int rr_regno = sh_media_register_for_return ();
+
+	  if (rr_regno < 0)
+	    rr_regno = pr_reg;
+
+	  rr = gen_rtx_REG (DImode, rr_regno);
+	}
+      else
+	rr = gen_rtx_REG (SImode, pr_reg);
+
+      emit_insn (GEN_MOV (rr, ra));
+      /* Tell flow the register for return isn't dead.  */
+      emit_use (rr);
+      return;
+    }
+
+  if (TARGET_SH5)
+    {
+      int offset;
+      save_schedule schedule;
+      save_entry *entry;
+
+      entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
+      offset = entry[1].offset;
+      for (; entry->mode != VOIDmode; entry--)
+	if (entry->reg == pr_reg)
+	  goto found;
+
+      /* We can't find pr register.  */
+      gcc_unreachable ();
+
+    found:
+      offset = entry->offset - offset;
+      pr_offset = (rounded_frame_size (d) + offset
+		   + SHMEDIA_REGS_STACK_ADJUST ());
+    }
+  else
+    pr_offset = rounded_frame_size (d);
+
+  emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
+
+  if (frame_pointer_needed)
+    emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
+  else
+    emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
+
+  tmp = gen_frame_mem (Pmode, tmp);
+  emit_insn (GEN_MOV (tmp, ra));
+  /* Tell this store isn't dead.  */
+  emit_use (tmp);
+}
+
+/* Clear variables at function end.  */
+static void
+sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+}
+
+static rtx
+sh_builtin_saveregs (void)
+{
+  /* First unnamed integer register.  */
+  int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
+  /* Number of integer registers we need to save.  */
+  int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
+  /* First unnamed SFmode float reg */
+  int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
+  /* Number of SFmode float regs to save.  */
+  int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
+  rtx regbuf, fpregs;
+  int bufsize, regno;
+  alias_set_type alias_set;
+
+  if (TARGET_SH5)
+    {
+      if (n_intregs)
+	{
+	  int pushregs = n_intregs;
+
+	  while (pushregs < NPARM_REGS (SImode) - 1
+		 && (CALL_COOKIE_INT_REG_GET
+			(crtl->args.info.call_cookie,
+			 NPARM_REGS (SImode) - pushregs)
+		     == 1))
+	    {
+	      crtl->args.info.call_cookie
+		&= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
+					  - pushregs, 1);
+	      pushregs++;
+	    }
+
+	  if (pushregs == NPARM_REGS (SImode))
+	    crtl->args.info.call_cookie
+	      |= (CALL_COOKIE_INT_REG (0, 1)
+		  | CALL_COOKIE_STACKSEQ (pushregs - 1));
+	  else
+	    crtl->args.info.call_cookie
+	      |= CALL_COOKIE_STACKSEQ (pushregs);
+
+	  crtl->args.pretend_args_size += 8 * n_intregs;
+	}
+      if (TARGET_SHCOMPACT)
+	return const0_rtx;
+    }
+
+  if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
+    {
+      error ("__builtin_saveregs not supported by this subtarget");
+      return const0_rtx;
+    }
+
+  if (TARGET_SHMEDIA)
+    n_floatregs = 0;
+
+  /* Allocate block of memory for the regs.  */
+  /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
+     Or can assign_stack_local accept a 0 SIZE argument?  */
+  bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
+
+  if (TARGET_SHMEDIA)
+    regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
+  else if (n_floatregs & 1)
+    {
+      rtx addr;
+
+      regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
+      addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
+      emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
+      regbuf = change_address (regbuf, BLKmode, addr);
+    }
+  else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
+    {
+      rtx addr, mask;
+
+      regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
+      addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
+						     XEXP (regbuf, 0), 4));
+      mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
+      emit_insn (gen_andsi3 (addr, addr, mask));
+      regbuf = change_address (regbuf, BLKmode, addr);
+    }
+  else
+    regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
+  alias_set = get_varargs_alias_set ();
+  set_mem_alias_set (regbuf, alias_set);
+
+  /* Save int args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.  */
+  if (n_intregs > 0)
+    move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
+			 adjust_address (regbuf, BLKmode,
+					 n_floatregs * UNITS_PER_WORD),
+			 n_intregs);
+
+  if (TARGET_SHMEDIA)
+    /* Return the address of the regbuf.  */
+    return XEXP (regbuf, 0);
+
+  /* Save float args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.
+     We explicitly build a pointer to the buffer because it halves the insn
+     count when not optimizing (otherwise the pointer is built for each reg
+     saved).
+     We emit the moves in reverse order so that we can use predecrement.  */
+
+  fpregs = copy_to_mode_reg (Pmode,
+			     plus_constant (Pmode, XEXP (regbuf, 0),
+					    n_floatregs * UNITS_PER_WORD));
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      rtx mem;
+      for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs,
+				 GEN_INT (-2 * UNITS_PER_WORD)));
+	  mem = change_address (regbuf, DFmode, fpregs);
+	  emit_move_insn (mem,
+			  gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
+	}
+      regno = first_floatreg;
+      if (regno & 1)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
+	  mem = change_address (regbuf, SFmode, fpregs);
+	  emit_move_insn (mem,
+			  gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
+					       + regno - SH_REG_MSW_OFFSET));
+	}
+    }
+  else
+    for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+      {
+        rtx mem;
+
+	emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
+	mem = change_address (regbuf, SFmode, fpregs);
+	emit_move_insn (mem,
+			gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
+      }
+
+  /* Return the address of the regbuf.  */
+  return XEXP (regbuf, 0);
+}
+
+/* Define the `__builtin_va_list' type for the ABI.  */
+static tree
+sh_build_builtin_va_list (void)
+{
+  tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+  tree record, type_decl;
+
+  if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
+      || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
+    return ptr_type_node;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_next_o = build_decl (BUILTINS_LOCATION,
+			 FIELD_DECL, get_identifier ("__va_next_o"),
+			 ptr_type_node);
+  f_next_o_limit = build_decl (BUILTINS_LOCATION,
+			       FIELD_DECL,
+			       get_identifier ("__va_next_o_limit"),
+			       ptr_type_node);
+  f_next_fp = build_decl (BUILTINS_LOCATION,
+			  FIELD_DECL, get_identifier ("__va_next_fp"),
+			  ptr_type_node);
+  f_next_fp_limit = build_decl (BUILTINS_LOCATION,
+				FIELD_DECL,
+				get_identifier ("__va_next_fp_limit"),
+				ptr_type_node);
+  f_next_stack = build_decl (BUILTINS_LOCATION,
+			     FIELD_DECL, get_identifier ("__va_next_stack"),
+			     ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_next_o) = record;
+  DECL_FIELD_CONTEXT (f_next_o_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_fp) = record;
+  DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_stack) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_next_o;
+  DECL_CHAIN (f_next_o) = f_next_o_limit;
+  DECL_CHAIN (f_next_o_limit) = f_next_fp;
+  DECL_CHAIN (f_next_fp) = f_next_fp_limit;
+  DECL_CHAIN (f_next_fp_limit) = f_next_stack;
+
+  layout_type (record);
+
+  return record;
+}
+
+/* Implement `va_start' for varargs and stdarg.  */
+static void
+sh_va_start (tree valist, rtx nextarg)
+{
+  tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+  tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
+  tree t, u;
+  int nfp, nint;
+
+  if (TARGET_SH5)
+    {
+      expand_builtin_saveregs ();
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  if ((! TARGET_SH2E && ! TARGET_SH4)
+      || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
+    {
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  f_next_o = TYPE_FIELDS (va_list_type_node);
+  f_next_o_limit = DECL_CHAIN (f_next_o);
+  f_next_fp = DECL_CHAIN (f_next_o_limit);
+  f_next_fp_limit = DECL_CHAIN (f_next_fp);
+  f_next_stack = DECL_CHAIN (f_next_fp_limit);
+
+  next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
+		   NULL_TREE);
+  next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
+			 valist, f_next_o_limit, NULL_TREE);
+  next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
+		    NULL_TREE);
+  next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
+			  valist, f_next_fp_limit, NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* Call __builtin_saveregs.  */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
+  if (nfp < 8)
+    nfp = 8 - nfp;
+  else
+    nfp = 0;
+  u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  nint = crtl->args.info.arg_count[SH_ARG_INT];
+  if (nint < 4)
+    nint = 4 - nint;
+  else
+    nint = 0;
+  u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  u = make_tree (ptr_type_node, nextarg);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* TYPE is a RECORD_TYPE.  If there is only a single nonzero-sized
+   member, return it.  */
+static tree
+find_sole_member (tree type)
+{
+  tree field, member = NULL_TREE;
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+      if (!DECL_SIZE (field))
+	return NULL_TREE;
+      if (integer_zerop (DECL_SIZE (field)))
+	continue;
+      if (member)
+	return NULL_TREE;
+      member = field;
+    }
+  return member;
+}
+
+/* Implement `va_arg'.  */
+static tree
+sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			 gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size, rsize;
+  tree tmp, pptr_type_node;
+  tree addr, lab_over = NULL, result = NULL;
+  bool pass_by_ref;
+  tree eff_type;
+
+  if (!VOID_TYPE_P (type))
+    pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
+  else
+    pass_by_ref = false;
+
+  if (pass_by_ref)
+    type = build_pointer_type (type);
+
+  size = int_size_in_bytes (type);
+  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+  pptr_type_node = build_pointer_type (ptr_type_node);
+
+  if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
+      && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
+    {
+      tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+      tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
+      int pass_as_float;
+      tree lab_false;
+      tree member;
+
+      f_next_o = TYPE_FIELDS (va_list_type_node);
+      f_next_o_limit = DECL_CHAIN (f_next_o);
+      f_next_fp = DECL_CHAIN (f_next_o_limit);
+      f_next_fp_limit = DECL_CHAIN (f_next_fp);
+      f_next_stack = DECL_CHAIN (f_next_fp_limit);
+
+      next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
+		       NULL_TREE);
+      next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
+			     valist, f_next_o_limit, NULL_TREE);
+      next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
+			valist, f_next_fp, NULL_TREE);
+      next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
+			      valist, f_next_fp_limit, NULL_TREE);
+      next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+			   valist, f_next_stack, NULL_TREE);
+
+      /* Structures with a single member with a distinct mode are passed
+	 like their member.  This is relevant if the latter has a REAL_TYPE
+	 or COMPLEX_TYPE type.  */
+      eff_type = type;
+      while (TREE_CODE (eff_type) == RECORD_TYPE
+	     && (member = find_sole_member (eff_type))
+	     && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
+		 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
+		 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
+	{
+	  tree field_type = TREE_TYPE (member);
+
+	  if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
+	    eff_type = field_type;
+	  else
+	    {
+	      gcc_assert ((TYPE_ALIGN (eff_type)
+			   < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
+			  || (TYPE_ALIGN (eff_type)
+			      > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
+	      break;
+	    }
+	}
+
+      if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+	{
+	  pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
+			   || (TREE_CODE (eff_type) == COMPLEX_TYPE
+			       && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
+			       && size <= 16));
+	}
+      else
+	{
+	  pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
+	}
+
+      addr = create_tmp_var (pptr_type_node, NULL);
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      valist = build_simple_mem_ref (addr);
+
+      if (pass_as_float)
+	{
+	  tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
+	  tree cmp;
+	  bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+
+	  gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
+	  tmp = next_fp_limit;
+	  if (size > 4 && !is_double)
+	    tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
+	  tmp = build2 (GE_EXPR, boolean_type_node,
+			unshare_expr (next_fp_tmp), unshare_expr (tmp));
+	  cmp = build3 (COND_EXPR, void_type_node, tmp,
+		        build1 (GOTO_EXPR, void_type_node,
+				unshare_expr (lab_false)), NULL_TREE);
+	  if (!is_double)
+	    gimplify_and_add (cmp, pre_p);
+
+	  if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
+	      || (is_double || size == 16))
+	    {
+	      tmp = fold_convert (sizetype, next_fp_tmp);
+	      tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
+			    size_int (UNITS_PER_WORD));
+	      tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
+	      gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
+	    }
+	  if (is_double)
+	    gimplify_and_add (cmp, pre_p);
+
+#ifdef FUNCTION_ARG_SCmode_WART
+	  if (TYPE_MODE (eff_type) == SCmode
+	      && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
+	    {
+	      tree subtype = TREE_TYPE (eff_type);
+	      tree real, imag;
+
+	      imag
+		= std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
+	      imag = get_initialized_tmp_var (imag, pre_p, NULL);
+
+	      real
+		= std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
+	      real = get_initialized_tmp_var (real, pre_p, NULL);
+
+	      result = build2 (COMPLEX_EXPR, eff_type, real, imag);
+	      if (type != eff_type)
+		result = build1 (VIEW_CONVERT_EXPR, type, result);
+	      result = get_initialized_tmp_var (result, pre_p, NULL);
+	    }
+#endif /* FUNCTION_ARG_SCmode_WART */
+
+	  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+	  gimplify_assign (unshare_expr (next_fp_tmp),
+			   unshare_expr (valist), pre_p);
+
+	  gimplify_assign (unshare_expr (valist),
+			   unshare_expr (next_fp_tmp), post_p);
+	  valist = next_fp_tmp;
+	}
+      else
+	{
+	  tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
+	  tmp = build2 (GT_EXPR, boolean_type_node, tmp,
+			unshare_expr (next_o_limit));
+	  tmp = build3 (COND_EXPR, void_type_node, tmp,
+		        build1 (GOTO_EXPR, void_type_node,
+				unshare_expr (lab_false)),
+			NULL_TREE);
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+
+	  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
+	  gimplify_and_add (tmp, pre_p);
+
+	  if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
+	    gimplify_assign (unshare_expr (next_o),
+			     unshare_expr (next_o_limit), pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+	}
+
+      if (!result)
+	{
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+	}
+    }
+
+  /* ??? In va-sh.h, there had been code to make values larger than
+     size 8 indirect.  This does not match the FUNCTION_ARG macros.  */
+
+  tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
+  if (result)
+    {
+      gimplify_assign (result, tmp, pre_p);
+      result = build1 (NOP_EXPR, TREE_TYPE (result), result);
+      tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
+      gimplify_and_add (tmp, pre_p);
+    }
+  else
+    result = tmp;
+
+  if (pass_by_ref)
+    result = build_va_arg_indirect_ref (result);
+
+  return result;
+}
+
+/* 64 bit floating points memory transfers are paired single precision loads
+   or store.  So DWARF information needs fixing in little endian (unless
+   PR=SZ=1 in FPSCR).  */
+rtx
+sh_dwarf_register_span (rtx reg)
+{
+  unsigned regno = REGNO (reg);
+
+  if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
+    return NULL_RTX;
+
+  return
+    gen_rtx_PARALLEL (VOIDmode,
+		      gen_rtvec (2,
+				 gen_rtx_REG (SFmode, regno + 1),
+				 gen_rtx_REG (SFmode, regno)));
+}
+
+static enum machine_mode
+sh_promote_function_mode (const_tree type, enum machine_mode mode,
+			  int *punsignedp, const_tree funtype,
+			  int for_return)
+{
+  if (sh_promote_prototypes (funtype))
+    return promote_mode (type, mode, punsignedp);
+  else
+    return default_promote_function_mode (type, mode, punsignedp, funtype,
+					  for_return);
+}
+
+static bool
+sh_promote_prototypes (const_tree type)
+{
+  if (TARGET_HITACHI)
+    return false;
+  if (! type)
+    return true;
+  return ! sh_attr_renesas_p (type);
+}
+
+/* Whether an argument must be passed by reference.  On SHcompact, we
+   pretend arguments wider than 32-bits that would have been passed in
+   registers are passed by reference, so that an SHmedia trampoline
+   loads them into the full 64-bits registers.  */
+static int
+shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
+      && (!named
+	  || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
+	  || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
+	      && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
+      && size > 4
+      && !SHCOMPACT_FORCE_ON_STACK (mode, type)
+      && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
+    return size;
+  else
+    return 0;
+}
+
+static bool
+sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
+		      const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return true;
+
+  /* ??? std_gimplify_va_arg_expr passes NULL for cum.  That function
+     wants to know about pass-by-reference semantics for incoming
+     arguments.  */
+  if (! cum)
+    return false;
+
+  if (TARGET_SHCOMPACT)
+    {
+      cum->byref = shcompact_byref (cum, mode, type, named);
+      return cum->byref != 0;
+    }
+
+  return false;
+}
+
+static bool
+sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* ??? How can it possibly be correct to return true only on the
+     caller side of the equation?  Is there someplace else in the
+     sh backend that's magically producing the copies?  */
+  return (get_cumulative_args (cum)->outgoing
+	  && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
+	      % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
+}
+
+static int
+sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+		      tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int words = 0;
+
+  if (!TARGET_SH5
+      && PASS_IN_REG_P (*cum, mode, type)
+      && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+      && (ROUND_REG (*cum, mode)
+	  + (mode != BLKmode
+	     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+	     : ROUND_ADVANCE (int_size_in_bytes (type)))
+	  > NPARM_REGS (mode)))
+    words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
+
+  else if (!TARGET_SHCOMPACT
+	   && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
+    words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
+
+  return words * UNITS_PER_WORD;
+}
+
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On SH the first args are normally in registers
+   and the rest are pushed.  Any arg that starts within the first
+   NPARM_REGS words is at least partially passed in a register unless
+   its data type forbids.  */
+static rtx
+sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if (! TARGET_SH5 && mode == VOIDmode)
+    return GEN_INT (ca->renesas_abi ? 1 : 0);
+
+  if (! TARGET_SH5
+      && PASS_IN_REG_P (*ca, mode, type)
+      && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
+    {
+      int regno;
+
+      if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
+	  && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
+	{
+	  rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SFmode,
+						   BASE_ARG_REG (mode)
+						   + (ROUND_REG (*ca, mode) ^ 1)),
+				      const0_rtx);
+	  rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SFmode,
+						   BASE_ARG_REG (mode)
+						   + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
+				      GEN_INT (4));
+	  return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
+	}
+
+     /* If the alignment of a DF value causes an SF register to be
+	skipped, we will use that skipped register for the next SF
+	value.  */
+      if ((TARGET_HITACHI || ca->renesas_abi)
+	  && ca->free_single_fp_reg
+	  && mode == SFmode)
+	return gen_rtx_REG (mode, ca->free_single_fp_reg);
+
+      regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
+	       ^ (mode == SFmode && TARGET_SH4
+		  && TARGET_LITTLE_ENDIAN
+		  && ! TARGET_HITACHI && ! ca->renesas_abi);
+      return gen_rtx_REG (mode, regno);
+
+    }
+
+  if (TARGET_SH5)
+    {
+      if (mode == VOIDmode && TARGET_SHCOMPACT)
+	return GEN_INT (ca->call_cookie);
+
+      /* The following test assumes unnamed arguments are promoted to
+	 DFmode.  */
+      if (mode == SFmode && ca->free_single_fp_reg)
+	return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
+
+      if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
+	  && (named || ! ca->prototype_p)
+	  && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
+	{
+	  if (! ca->prototype_p && TARGET_SHMEDIA)
+	    return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
+
+	  return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
+					   FIRST_FP_PARM_REG
+					   + ca->arg_count[(int) SH_ARG_FLOAT]);
+	}
+
+      if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
+	  && (! TARGET_SHCOMPACT
+	      || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
+		  && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
+						   type, named))))
+	{
+	  return gen_rtx_REG (mode, (FIRST_PARM_REG
+				       + ca->arg_count[(int) SH_ARG_INT]));
+	}
+
+      return NULL_RTX;
+    }
+
+  return NULL_RTX;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be
+   available.)  */
+static void
+sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
+			 const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if (ca->force_mem)
+    ca->force_mem = 0;
+  else if (TARGET_SH5)
+    {
+      const_tree type2 = (ca->byref && type
+			  ? TREE_TYPE (type)
+			  : type);
+      enum machine_mode mode2 = (ca->byref && type
+				 ? TYPE_MODE (type2)
+				 : mode);
+      int dwords = ((ca->byref
+		     ? ca->byref
+		     : mode2 == BLKmode
+		     ? int_size_in_bytes (type2)
+		     : GET_MODE_SIZE (mode2)) + 7) / 8;
+      int numregs = MIN (dwords, NPARM_REGS (SImode)
+			 - ca->arg_count[(int) SH_ARG_INT]);
+
+      if (numregs)
+	{
+	  ca->arg_count[(int) SH_ARG_INT] += numregs;
+	  if (TARGET_SHCOMPACT
+	      && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
+	    {
+	      ca->call_cookie
+		|= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					- numregs, 1);
+	      /* N.B. We want this also for outgoing.  */
+	      ca->stack_regs += numregs;
+	    }
+	  else if (ca->byref)
+	    {
+	      if (! ca->outgoing)
+		ca->stack_regs += numregs;
+	      ca->byref_regs += numregs;
+	      ca->byref = 0;
+	      do
+		ca->call_cookie
+		  |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					  - numregs, 2);
+	      while (--numregs);
+	      ca->call_cookie
+		|= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					- 1, 1);
+	    }
+	  else if (dwords > numregs)
+	    {
+	      int pushregs = numregs;
+
+	      if (TARGET_SHCOMPACT)
+		ca->stack_regs += numregs;
+	      while (pushregs < NPARM_REGS (SImode) - 1
+		     && (CALL_COOKIE_INT_REG_GET
+			 (ca->call_cookie,
+			  NPARM_REGS (SImode) - pushregs)
+			 == 1))
+		{
+		  ca->call_cookie
+		    &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
+					      - pushregs, 1);
+		  pushregs++;
+		}
+	      if (numregs == NPARM_REGS (SImode))
+		ca->call_cookie
+		  |= CALL_COOKIE_INT_REG (0, 1)
+		  | CALL_COOKIE_STACKSEQ (numregs - 1);
+	      else
+		ca->call_cookie
+		  |= CALL_COOKIE_STACKSEQ (numregs);
+	    }
+	}
+      if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
+	  && (named || ! ca->prototype_p))
+	{
+	  if (mode2 == SFmode && ca->free_single_fp_reg)
+	    ca->free_single_fp_reg = 0;
+	  else if (ca->arg_count[(int) SH_ARG_FLOAT]
+		   < NPARM_REGS (SFmode))
+	    {
+	      int numfpregs
+		= MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
+		       NPARM_REGS (SFmode)
+		       - ca->arg_count[(int) SH_ARG_FLOAT]);
+
+	      ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
+
+	      if (TARGET_SHCOMPACT && ! ca->prototype_p)
+		{
+		  if (ca->outgoing && numregs > 0)
+		    do
+		      {
+			ca->call_cookie
+			  |= (CALL_COOKIE_INT_REG
+			      (ca->arg_count[(int) SH_ARG_INT]
+			       - numregs + ((numfpregs - 2) / 2),
+			       4 + (ca->arg_count[(int) SH_ARG_FLOAT]
+				    - numfpregs) / 2));
+		      }
+		    while (numfpregs -= 2);
+		}
+	      else if (mode2 == SFmode && (named)
+		       && (ca->arg_count[(int) SH_ARG_FLOAT]
+			   < NPARM_REGS (SFmode)))
+		ca->free_single_fp_reg
+		  = FIRST_FP_PARM_REG - numfpregs
+		  + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
+	    }
+	}
+      return;
+    }
+
+  if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
+    {
+      /* Note that we've used the skipped register.  */
+      if (mode == SFmode && ca->free_single_fp_reg)
+	{
+	  ca->free_single_fp_reg = 0;
+	  return;
+	}
+      /* When we have a DF after an SF, there's an SF register that get
+	 skipped in order to align the DF value.  We note this skipped
+	 register, because the next SF value will use it, and not the
+	 SF that follows the DF.  */
+      if (mode == DFmode
+	  && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
+	{
+	  ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
+				    + BASE_ARG_REG (mode));
+	}
+    }
+
+  if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
+      || PASS_IN_REG_P (*ca, mode, type))
+    (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
+     = (ROUND_REG (*ca, mode)
+	+ (mode == BLKmode
+	   ? ROUND_ADVANCE (int_size_in_bytes (type))
+	   : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
+}
+
+/* The Renesas calling convention doesn't quite fit into this scheme since
+   the address is passed like an invisible argument, but one that is always
+   passed in memory.  */
+static rtx
+sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
+{
+  if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
+    return NULL_RTX;
+  return gen_rtx_REG (Pmode, 2);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   For the SH, this is like LIBCALL_VALUE, except that we must change the
+   mode like PROMOTE_MODE does.
+   ??? PROMOTE_MODE is ignored for non-scalar types.  The set of types
+   tested here has to be kept in sync with the one in
+   explow.c:promote_mode.  */
+static rtx
+sh_function_value (const_tree valtype,
+		   const_tree fn_decl_or_type,
+		   bool outgoing ATTRIBUTE_UNUSED)
+{
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    fn_decl_or_type = NULL;
+
+  return gen_rtx_REG (
+	   ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
+	     && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
+	     && (TREE_CODE (valtype) == INTEGER_TYPE
+		 || TREE_CODE (valtype) == ENUMERAL_TYPE
+		 || TREE_CODE (valtype) == BOOLEAN_TYPE
+		 || TREE_CODE (valtype) == REAL_TYPE
+		 || TREE_CODE (valtype) == OFFSET_TYPE))
+	    && sh_promote_prototypes (fn_decl_or_type)
+	    ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
+	   BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+static rtx
+sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
+}
+
+/* Return true if N is a possible register number of function value.  */
+static bool
+sh_function_value_regno_p (const unsigned int regno)
+{
+  return ((regno) == FIRST_RET_REG 
+	  || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
+	  || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+static bool
+sh_return_in_memory (const_tree type, const_tree fndecl)
+{
+  if (TARGET_SH5)
+    {
+      if (TYPE_MODE (type) == BLKmode)
+	return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
+      else
+	return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
+    }
+  else
+    {
+      return (TYPE_MODE (type) == BLKmode
+	      || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
+		  && TREE_CODE (type) == RECORD_TYPE));
+    }
+}
+
+/* We actually emit the code in sh_expand_prologue.  We used to use
+   a static variable to flag that we need to emit this code, but that
+   doesn't when inlining, when functions are deferred and then emitted
+   later.  Fortunately, we already have two flags that are part of struct
+   function that tell if a function uses varargs or stdarg.  */
+static void
+sh_setup_incoming_varargs (cumulative_args_t ca,
+			   enum machine_mode mode,
+			   tree type,
+			   int *pretend_arg_size,
+			   int second_time ATTRIBUTE_UNUSED)
+{
+  gcc_assert (cfun->stdarg);
+  if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
+    {
+      int named_parm_regs, anon_parm_regs;
+
+      named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
+			 + (mode == BLKmode
+			    ? ROUND_ADVANCE (int_size_in_bytes (type))
+			    : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
+      anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
+      if (anon_parm_regs > 0)
+	*pretend_arg_size = anon_parm_regs * 4;
+    }
+}
+
+static bool
+sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
+{
+  return TARGET_SH5;
+}
+
+static bool
+sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
+}
+
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+int
+initial_elimination_offset (int from, int to)
+{
+  int regs_saved;
+  int regs_saved_rounding = 0;
+  int total_saved_regs_space;
+  int total_auto_space;
+  int save_flags = target_flags;
+  int copy_flags;
+  HARD_REG_SET live_regs_mask;
+
+  shmedia_space_reserved_for_target_registers = false;
+  regs_saved = calc_live_regs (&live_regs_mask);
+  regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
+
+  if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
+    {
+      shmedia_space_reserved_for_target_registers = true;
+      regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
+    }
+
+  if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
+    regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+			   - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+  total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
+  copy_flags = target_flags;
+  target_flags = save_flags;
+
+  total_saved_regs_space = regs_saved + regs_saved_rounding;
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space
+	   + crtl->args.info.byref_regs * 8;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space
+	   + crtl->args.info.byref_regs * 8;
+
+  /* Initial gap between fp and sp is 0.  */
+  if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return 0;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return rounded_frame_size (0);
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return rounded_frame_size (0);
+
+  gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
+	      && (to == HARD_FRAME_POINTER_REGNUM
+		  || to == STACK_POINTER_REGNUM));
+  if (TARGET_SH5)
+    {
+      int n = total_saved_regs_space;
+      int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
+      save_schedule schedule;
+      save_entry *entry;
+
+      n += total_auto_space;
+
+      /* If it wasn't saved, there's not much we can do.  */
+      if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
+	return n;
+
+      target_flags = copy_flags;
+
+      sh5_schedule_saves (&live_regs_mask, &schedule, n);
+      for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
+	if (entry->reg == pr_reg)
+	  {
+	    target_flags = save_flags;
+	    return entry->offset;
+	  }
+      gcc_unreachable ();
+    }
+  else
+    return total_auto_space;
+}
+
+/* Parse the -mfixed-range= option string.  */
+void
+sh_fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  */
+
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Insert any deferred function attributes from earlier pragmas.  */
+static void
+sh_insert_attributes (tree node, tree *attributes)
+{
+  tree attrs;
+
+  if (TREE_CODE (node) != FUNCTION_DECL)
+    return;
+
+  /* We are only interested in fields.  */
+  if (!DECL_P (node))
+    return;
+
+  /* Append the attributes to the deferred attributes.  */
+  *sh_deferred_function_attributes_tail = *attributes;
+  attrs = sh_deferred_function_attributes;
+  if (!attrs)
+    return;
+
+  /* Some attributes imply or require the interrupt attribute.  */
+  if (!lookup_attribute ("interrupt_handler", attrs)
+      && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
+    {
+      /* If we have a trapa_handler, but no interrupt_handler attribute,
+	 insert an interrupt_handler attribute.  */
+      if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
+	/* We can't use sh_pr_interrupt here because that's not in the
+	   java frontend.  */
+	attrs
+	  = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
+      /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
+	 if the interrupt attribute is missing, we ignore the attribute
+	 and warn.  */
+      else if (lookup_attribute ("sp_switch", attrs)
+	       || lookup_attribute ("trap_exit", attrs)
+	       || lookup_attribute ("nosave_low_regs", attrs)
+	       || lookup_attribute ("resbank", attrs))
+	{
+	  tree *tail;
+
+	  for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
+	    {
+	      if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
+		warning (OPT_Wattributes,
+			 "%qE attribute only applies to interrupt functions",
+			 TREE_PURPOSE (attrs));
+	      else
+		{
+		  *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
+				     NULL_TREE);
+		  tail = &TREE_CHAIN (*tail);
+		}
+	    }
+	  attrs = *attributes;
+	}
+    }
+
+  /* Install the processed list.  */
+  *attributes = attrs;
+
+  /* Clear deferred attributes.  */
+  sh_deferred_function_attributes = NULL_TREE;
+  sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
+
+  return;
+}
+
+/*------------------------------------------------------------------------------
+  Target specific attributes
+  Supported attributes are:
+
+   * interrupt_handler
+	Specifies this function is an interrupt handler.
+
+   * trapa_handler
+	Like interrupt_handler, but don't save all registers.
+
+   * sp_switch
+	Specifies an alternate stack for an interrupt handler to run on.
+
+   * trap_exit
+	Use a trapa to exit an interrupt function instead of rte.
+
+   * nosave_low_regs
+	Don't save r0..r7 in an interrupt handler function.
+	This is useful on SH3* and SH4*, which have a separate set of low
+	regs for user and privileged modes.
+	This is mainly to be used for non-reentrant interrupt handlers (i.e.
+	those that run with interrupts disabled and thus can't be
+	interrupted thenselves).
+
+   * renesas
+	Use Renesas calling/layout conventions (functions and structures).
+
+   * resbank
+	In case of an interrupt handler function, use a register bank to
+	save registers R0-R14, MACH, MACL, GBR and PR.
+	This is available only on SH2A targets.
+
+   * function_vector
+	Declares a function to be called using the TBR relative addressing
+	mode.  Takes an argument that specifies the slot number in the table
+	where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
+*/
+
+/* Handle a 'resbank' attribute.  */
+static tree
+sh_handle_resbank_handler_attribute (tree * node, tree name,
+				     tree args ATTRIBUTE_UNUSED,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool * no_add_attrs)
+{
+  if (!TARGET_SH2A)
+    {
+      warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
+	       name);
+      *no_add_attrs = true;
+    }
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt_handler" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_interrupt_handler_attribute (tree *node, tree name,
+				       tree args ATTRIBUTE_UNUSED,
+				       int flags ATTRIBUTE_UNUSED,
+				       bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      error ("attribute interrupt_handler is not compatible with -m5-compact");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an 'function_vector' attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
+					       tree args ATTRIBUTE_UNUSED,
+					       int flags ATTRIBUTE_UNUSED,
+					       bool * no_add_attrs)
+{
+  if (!TARGET_SH2A)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes,
+	       "%qE attribute argument not an integer constant",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
+    {
+      /* The argument value must be between 0 to 255.  */
+      warning (OPT_Wattributes,
+	       "%qE attribute argument should be between 0 to 255",
+	       name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+/* Returns true if current function has been assigned the attribute
+   'function_vector'.  */
+bool
+sh2a_is_function_vector_call (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      tree tr = SYMBOL_REF_DECL (x);
+
+      if (sh2a_function_vector_p (tr))
+        return true;
+    }
+
+  return false;
+}
+
+/* Returns the function vector number, if the attribute
+   'function_vector' is assigned, otherwise returns zero.  */
+int
+sh2a_get_function_vector_number (rtx x)
+{
+  int num;
+  tree list, t;
+
+  if ((GET_CODE (x) == SYMBOL_REF)
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      t = SYMBOL_REF_DECL (x);
+
+      if (TREE_CODE (t) != FUNCTION_DECL)
+	return 0;
+
+      list = SH_ATTRIBUTES (t);
+      while (list)
+	{
+	  if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+	    {
+	      num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
+	      return num;
+	    }
+
+	  list = TREE_CHAIN (list);
+	}
+
+      return 0;
+    }
+  else
+    return 0;
+}
+
+/* Handle an "sp_switch" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
+			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+    {
+      /* The argument must be a constant string.  */
+      warning (OPT_Wattributes, "%qE attribute argument not a string constant",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "trap_exit" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
+			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  /* The argument specifies a trap number to be used in a trapa instruction
+     at function exit (instead of an rte instruction).  */
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes, "%qE attribute argument not an "
+	       "integer constant", name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
+			     tree name ATTRIBUTE_UNUSED,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
+
+/* True if __attribute__((renesas)) or -mrenesas.  */
+bool
+sh_attr_renesas_p (const_tree td)
+{
+  if (TARGET_HITACHI)
+    return true;
+  if (td == NULL_TREE)
+    return false;
+  if (DECL_P (td))
+    td = TREE_TYPE (td);
+  if (td == error_mark_node)
+    return false;
+  return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
+	  != NULL_TREE);
+}
+
+/* True if __attribute__((renesas)) or -mrenesas, for the current
+   function.  */
+bool
+sh_cfun_attr_renesas_p (void)
+{
+  return sh_attr_renesas_p (current_function_decl);
+}
+
+/* Returns true if the current function has the "interrupt_handler"
+   attribute set.  */
+bool
+sh_cfun_interrupt_handler_p (void)
+{
+  return (lookup_attribute ("interrupt_handler",
+			    DECL_ATTRIBUTES (current_function_decl))
+	  != NULL_TREE);
+}
+
+/* Returns true if FUNC has been assigned the attribute
+   "function_vector".  */
+bool
+sh2a_function_vector_p (tree func)
+{
+  tree list;
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return false;
+
+  list = SH_ATTRIBUTES (func);
+  while (list)
+    {
+      if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+	return true;
+
+      list = TREE_CHAIN (list);
+    }
+  return false;
+}
+
+/* Returns true if given tree has the "resbank" attribute set.  */
+bool
+sh_cfun_resbank_handler_p (void)
+{
+  return ((lookup_attribute ("resbank",
+			     DECL_ATTRIBUTES (current_function_decl))
+	  != NULL_TREE)
+	  && (lookup_attribute ("interrupt_handler",
+				DECL_ATTRIBUTES (current_function_decl))
+	      != NULL_TREE) && TARGET_SH2A);
+}
+
+/* Returns true if the current function has a "trap_exit" attribute set.  */
+bool
+sh_cfun_trap_exit_p (void)
+{
+  return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
+	 != NULL_TREE;
+}
+
+/* Implement TARGET_CHECK_PCH_TARGET_FLAGS.  */
+static const char *
+sh_check_pch_target_flags (int old_flags)
+{
+  if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
+				    | MASK_SH_E | MASK_HARD_SH4
+				    | MASK_FPU_SINGLE | MASK_SH4))
+    return _("created and used with different architectures / ABIs");
+  if ((old_flags ^ target_flags) & MASK_HITACHI)
+    return _("created and used with different ABIs");
+  if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
+    return _("created and used with different endianness");
+  return NULL;
+}
+
+/* Predicates used by the templates.  */
+
+/* Returns true if OP is MACL, MACH or PR.  The input must be a REG rtx.
+   Used only in general_movsrc_operand.  */
+bool
+system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (REGNO (op))
+    {
+    case PR_REG:
+    case MACL_REG:
+    case MACH_REG:
+      return true;
+    }
+  return false;
+}
+
+/* Returns true if OP is a floating point value with value 0.0.  */
+bool
+fp_zero_operand (rtx op)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
+}
+
+/* Returns true if OP is a floating point value with value 1.0.  */
+bool
+fp_one_operand (rtx op)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst1);
+}
+
+/* In general mode switching is used.  If we are
+   compiling without -mfmovd, movsf_ie isn't taken into account for
+   mode switching.  We could check in machine_dependent_reorg for
+   cases where we know we are in single precision mode, but there is
+   interface to find that out during reload, so we must avoid
+   choosing an fldi alternative during reload and thus failing to
+   allocate a scratch register for the constant loading.  */
+bool
+fldi_ok (void)
+{
+  return true;
+}
+
+/* Return the TLS type for TLS symbols.  */
+enum tls_model
+tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (op) != SYMBOL_REF)
+    return TLS_MODEL_NONE;
+  return SYMBOL_REF_TLS_MODEL (op);
+}
+
+/* Return the destination address of a branch.  */
+static int
+branch_dest (rtx branch)
+{
+  rtx dest = SET_SRC (PATTERN (branch));
+  int dest_uid;
+
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, 1);
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+  return INSN_ADDRESSES (dest_uid);
+}
+
+/* Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+bool
+reg_unused_after (rtx reg, rtx insn)
+{
+  enum rtx_code code;
+  rtx set;
+
+  /* If the reg is set by this instruction, then it is safe for our
+     case.  Disregard the case where this is a store to memory, since
+     we are checking a register used in the store address.  */
+  set = single_set (insn);
+  if (set && !MEM_P (SET_DEST (set))
+      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+    return true;
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      rtx set;
+      if (!INSN_P (insn))
+	continue;
+
+      code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 is dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return 1;
+      /* else */
+#endif
+
+      if (code == JUMP_INSN)
+	return false;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+	      rtx set = single_set (this_insn);
+
+	      if (CALL_P (this_insn))
+		code = CALL_INSN;
+	      else if (JUMP_P (this_insn))
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return false;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return false;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (!MEM_P (SET_DEST (set)))
+		    retval = true;
+		  else
+		    return false;
+		}
+	      if (set == NULL_RTX
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return false;
+	    }
+	  if (retval == 1)
+	    return true;
+	  else if (code == JUMP_INSN)
+	    return false;
+	}
+
+      set = single_set (insn);
+      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	return false;
+      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return !MEM_P (SET_DEST (set));
+      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	return false;
+
+      if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
+	return true;
+    }
+  return true;
+}
+
+#include "ggc.h"
+
+static GTY(()) rtx t_reg_rtx;
+rtx
+get_t_reg_rtx (void)
+{
+  if (! t_reg_rtx)
+    t_reg_rtx = gen_rtx_REG (SImode, T_REG);
+  return t_reg_rtx;
+}
+
+static GTY(()) rtx fpscr_rtx;
+rtx
+get_fpscr_rtx (void)
+{
+  if (! fpscr_rtx)
+    {
+      fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
+      REG_USERVAR_P (fpscr_rtx) = 1;
+      mark_user_reg (fpscr_rtx);
+    }
+  if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
+    mark_user_reg (fpscr_rtx);
+  return fpscr_rtx;
+}
+
+static GTY(()) tree fpscr_values;
+
+static void
+emit_fpu_switch (rtx scratch, int index)
+{
+  rtx dst, src;
+
+  if (fpscr_values == NULL)
+    {
+      tree t;
+
+      t = build_index_type (integer_one_node);
+      t = build_array_type (integer_type_node, t);
+      t = build_decl (BUILTINS_LOCATION,
+		      VAR_DECL, get_identifier ("__fpscr_values"), t);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_IGNORED_P (t) = 1;
+      DECL_EXTERNAL (t) = 1;
+      TREE_STATIC (t) = 1;
+      TREE_PUBLIC (t) = 1;
+      TREE_USED (t) = 1;
+
+      fpscr_values = t;
+    }
+
+  src = DECL_RTL (fpscr_values);
+  if (!can_create_pseudo_p ())
+    {
+      emit_move_insn (scratch, XEXP (src, 0));
+      if (index != 0)
+	emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
+      src = adjust_automodify_address (src, PSImode, scratch, index * 4);
+    }
+  else
+    src = adjust_address (src, PSImode, index * 4);
+
+  dst = get_fpscr_rtx ();
+  emit_move_insn (dst, src);
+}
+
+void
+emit_sf_insn (rtx pat)
+{
+  emit_insn (pat);
+}
+
+void
+emit_df_insn (rtx pat)
+{
+  emit_insn (pat);
+}
+
+void
+expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
+			 get_fpscr_rtx ()));
+}
+
+void
+expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
+			get_fpscr_rtx ()));
+}
+
+static rtx get_free_reg (HARD_REG_SET);
+
+/* This function returns a register to use to load the address to load
+   the fpscr from.  Currently it always returns r1 or r7, but when we are
+   able to use pseudo registers after combine, or have a better mechanism
+   for choosing a register, it should be done here.  */
+/* REGS_LIVE is the liveness information for the point for which we
+   need this allocation.  In some bare-bones exit blocks, r1 is live at the
+   start.  We can even have all of r0..r3 being live:
+__complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
+   INSN before which new insns are placed with will clobber the register
+   we return.  If a basic block consists only of setting the return value
+   register to a pseudo and using that register, the return value is not
+   live before or after this block, yet we we'll insert our insns right in
+   the middle.  */
+static rtx
+get_free_reg (HARD_REG_SET regs_live)
+{
+  if (! TEST_HARD_REG_BIT (regs_live, 1))
+    return gen_rtx_REG (Pmode, 1);
+
+  /* Hard reg 1 is live; since this is a small register classes target,
+     there shouldn't be anything but a jump before the function end.  */
+  gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
+  return gen_rtx_REG (Pmode, 7);
+}
+
+/* This function will set the fpscr from memory.
+   MODE is the mode we are setting it to.  */
+void
+fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
+{
+  enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
+  enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
+  rtx addr_reg;
+
+  addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
+  emit_fpu_switch (addr_reg, fp_mode == norm_mode);
+}
+
+/* Is the given character a logical line separator for the assembler?  */
+#ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
+#endif
+
+static bool
+sequence_insn_p (rtx insn)
+{
+  rtx prev, next;
+
+  prev = PREV_INSN (insn);
+  if (prev == NULL)
+    return false;
+
+  next = NEXT_INSN (prev);
+  if (next == NULL)
+    return false;
+
+  return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
+}
+
+int
+sh_insn_length_adjustment (rtx insn)
+{
+  /* Instructions with unfilled delay slots take up an extra two bytes for
+     the nop in the delay slot.  */
+  if (((NONJUMP_INSN_P (insn)
+	&& GET_CODE (PATTERN (insn)) != USE
+	&& GET_CODE (PATTERN (insn)) != CLOBBER)
+       || CALL_P (insn) || JUMP_P (insn))
+      && ! sequence_insn_p (insn)
+      && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
+    return 2;
+
+  /* SH2e has a bug that prevents the use of annulled branches, so if
+     the delay slot is not filled, we'll have to put a NOP in it.  */
+  if (sh_cpu_attr == CPU_SH2E
+      && JUMP_P (insn)
+      && get_attr_type (insn) == TYPE_CBRANCH
+      && ! sequence_insn_p (insn))
+    return 2;
+
+  /* sh-dsp parallel processing insn take four bytes instead of two.  */
+
+  if (NONJUMP_INSN_P (insn))
+    {
+      int sum = 0;
+      rtx body = PATTERN (insn);
+      const char *templ;
+      char c;
+      bool maybe_label = true;
+
+      if (GET_CODE (body) == ASM_INPUT)
+	templ = XSTR (body, 0);
+      else if (asm_noperands (body) >= 0)
+	templ
+	  = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
+      else
+	return 0;
+      do
+	{
+	  int ppi_adjust = 0;
+
+	  do
+	    c = *templ++;
+	  while (c == ' ' || c == '\t');
+	  /* all sh-dsp parallel-processing insns start with p.
+	     The only non-ppi sh insn starting with p is pref.
+	     The only ppi starting with pr is prnd.  */
+	  if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
+	    ppi_adjust = 2;
+	  /* The repeat pseudo-insn expands two three insns, a total of
+	     six bytes in size.  */
+	  else if ((c == 'r' || c == 'R')
+		   && ! strncasecmp ("epeat", templ, 5))
+	    ppi_adjust = 4;
+	  while (c && c != '\n'
+		 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
+	    {
+	      /* If this is a label, it is obviously not a ppi insn.  */
+	      if (c == ':' && maybe_label)
+		{
+		  ppi_adjust = 0;
+		  break;
+		}
+	      else if (c == '\'' || c == '"')
+		maybe_label = false;
+	      c = *templ++;
+	    }
+	  sum += ppi_adjust;
+	  maybe_label = c != ':';
+	}
+      while (c);
+      return sum;
+    }
+  return 0;
+}
+
+/* Return TRUE for a valid displacement for the REG+disp addressing
+   with MODE.  */
+bool
+sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
+		       bool allow_zero)
+{
+  if (! CONST_INT_P (op))
+    return false;
+
+  if (TARGET_SHMEDIA)
+    {
+      int size;
+
+      /* Check if this is the address of an unaligned load / store.  */
+      if (mode == VOIDmode)
+	return satisfies_constraint_I06 (op);
+
+      size = GET_MODE_SIZE (mode);
+      return (!(INTVAL (op) & (size - 1))
+	      && INTVAL (op) >= -512 * size
+	      && INTVAL (op) < 512 * size);
+    }
+  else
+    {
+      const HOST_WIDE_INT offset = INTVAL (op);
+      const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
+      const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
+
+      /* If the mode does not support any displacement always return false.
+	 Even though an index of '0' is actually always valid, it will cause
+	 troubles when e.g. a DFmode move is split into two SFmode moves,
+	 where one SFmode move will have index '0' and the other move will
+	 have index '4'.  */
+       if (!allow_zero && max_disp < 1)
+	return false;
+
+      return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
+    }
+}
+
+/* Recognize an RTL expression that is a valid memory address for
+   an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+   Allow  REG
+	  REG+disp
+	  REG+r0
+	  REG++
+	  --REG
+	  GBR
+	  GBR+disp  */
+static bool
+sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (REG_P (x) && REGNO (x) == GBR_REG)
+    return true;
+
+  if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
+    return true;
+  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
+	   && ! TARGET_SHMEDIA
+	   && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
+    return true;
+  else if (GET_CODE (x) == PLUS
+	   && (mode != PSImode || reload_completed))
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
+	return gbr_displacement (xop1, mode);
+
+      if (GET_MODE_SIZE (mode) <= 8
+	  && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
+	  && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
+	return true;
+
+      if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
+	   || ((xop0 == stack_pointer_rtx
+		|| xop0 == hard_frame_pointer_rtx)
+	       && REG_P (xop1) && REGNO (xop1) == R0_REG)
+	   || ((xop1 == stack_pointer_rtx
+		|| xop1 == hard_frame_pointer_rtx)
+	       && REG_P (xop0) && REGNO (xop0) == R0_REG))
+	  && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
+	      || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
+	      || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
+		  && TARGET_FMOVD && mode == DFmode)))
+	{
+	  if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
+	      && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
+	    return true;
+	  if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
+	      && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec.  */
+bool
+nonpic_symbol_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
+      || GET_CODE (x) == PC)
+    return true;
+
+  /* We don't want to look into the possible MEM location of a
+     CONST_DOUBLE, since we're not going to use it, in general.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    return false;
+
+  if (GET_CODE (x) == UNSPEC
+      && (XINT (x, 1) == UNSPEC_PIC
+	  || XINT (x, 1) == UNSPEC_GOT
+	  || XINT (x, 1) == UNSPEC_GOTOFF
+	  || XINT (x, 1) == UNSPEC_GOTPLT
+	  || XINT (x, 1) == UNSPEC_GOTTPOFF
+	  || XINT (x, 1) == UNSPEC_DTPOFF
+	  || XINT (x, 1) == UNSPEC_TPOFF
+	  || XINT (x, 1) == UNSPEC_PLT
+	  || XINT (x, 1) == UNSPEC_SYMOFF
+	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+    return false;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return true;
+	}
+      else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Convert a non-PIC address in `orig' to a PIC address using @GOT or
+   @GOTOFF in `reg'.  */
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
+			rtx reg)
+{
+  if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
+    return orig;
+
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
+    {
+      if (reg == NULL_RTX)
+	reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTOFF2reg (reg, orig));
+      return reg;
+    }
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      if (reg == NULL_RTX)
+	reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, orig));
+      return reg;
+    }
+  return orig;
+}
+
+/* Given a (logical) mode size and an offset in bytes, try to find a the
+   appropriate displacement value for a mov insn.  On SH the displacements
+   are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
+   15 bytes in QImode.  To compensate this we create a new base address by
+   adding an adjustment value to it.
+
+   If the originally requested offset is greater than 127 we prefer using
+   values 124..127 over 128..131 to increase opportunities to use the
+   add #imm, Rn insn.
+
+   In some cases it is possible that a requested offset might seem unaligned
+   or inappropriate for the mode size, like offset = 2 and mode size = 4.
+   This is compensated by adjusting the base address so that the effective
+   address of the displacement move insn will be aligned. 
+
+   This is not the best possible way of rebasing the base address, as it
+   does not look at other present displacement addressings around it.
+   In some cases this can create more base address adjustments than would
+   actually be necessary.  */
+struct disp_adjust
+{
+  rtx offset_adjust;
+  rtx mov_disp;
+};
+
+static struct disp_adjust
+sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
+{
+  struct disp_adjust res = { NULL_RTX, NULL_RTX };
+
+  /* Do not try to use SH2A's large displacements here, because this would
+     effectively disable the small displacement insns.  */
+  const int mode_sz = GET_MODE_SIZE (mode);
+  const int mov_insn_sz = mov_insn_size (mode, false);
+  const int max_disp = sh_max_mov_insn_displacement (mode, false);
+  const int max_disp_next = max_disp + mov_insn_sz;
+  HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
+  HOST_WIDE_INT offset_adjust;
+
+  /* In some cases this actually does happen and we must check for it.  */
+  if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
+    return res;
+
+  /* Keeps the previous behavior for QImode displacement addressing.
+     This just decides how the offset is re-based.  Removing this special
+     case will result in slightly bigger code on average, but it's not that
+     bad actually.  */
+  if (mov_insn_sz == 1)
+    align_modifier = 0;
+
+  offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
+
+  if (mode_sz + offset - offset_adjust <= max_disp_next)
+    {
+      res.offset_adjust = GEN_INT (offset_adjust);
+      res.mov_disp = GEN_INT (offset - offset_adjust);
+    }
+
+  return res;
+}
+
+/* Try to modify an illegitimate address and make it legitimate.
+   If we find one, return the new, valid address.
+   Otherwise, return the original address.  */
+static rtx
+sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  if (flag_pic)
+    x = legitimize_pic_address (oldx, mode, NULL_RTX);
+
+  if (TARGET_SHMEDIA)
+    return x;
+
+  if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
+      || (TARGET_SH2E && mode == SFmode))
+    return x;
+
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
+      && BASE_REGISTER_RTX_P (XEXP (x, 0)))
+    {
+      struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
+							INTVAL (XEXP (x, 1)));
+
+      if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
+	{
+	  rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
+				  adj.offset_adjust, NULL_RTX, 0,
+				  OPTAB_LIB_WIDEN);
+	  return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
+	}
+    }
+
+  return x;
+}
+
+/* Attempt to replace *p, which is an address that needs reloading, with
+   a valid memory address for an operand of mode MODE.
+   Like for sh_legitimize_address, for the SH we try to get a normal form
+   of the address.  That will allow inheritance of the address reloads.  */
+bool
+sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
+			      int itype)
+{
+  enum reload_type type = (enum reload_type) itype;
+  const int mode_sz = GET_MODE_SIZE (mode);
+
+  if (TARGET_SHMEDIA)
+    return false;
+
+  if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
+      && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
+      && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
+      && (ALLOW_INDEXED_ADDRESS
+	  || XEXP (*p, 0) == stack_pointer_rtx
+	  || XEXP (*p, 0) == hard_frame_pointer_rtx))
+    {
+      const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
+      struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
+
+      if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
+	{
+	  push_reload (*p, NULL_RTX, p, NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  return true;
+	}
+
+      if (TARGET_SH2E && mode == SFmode)
+	{
+	  *p = copy_rtx (*p);
+	  push_reload (*p, NULL_RTX, p, NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  return true;
+	}
+
+      /* FIXME: Do not allow to legitimize QImode and HImode displacement
+	 moves because then reload has a problem figuring the constraint
+	 that the move insn target/source reg must be R0.
+	 Or maybe some handling is wrong in sh_secondary_reload for this
+	 to work properly? */
+      if ((mode_sz == 4 || mode_sz == 8)
+	  && ! (TARGET_SH4 && mode == DFmode)
+	  && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
+	{
+	  rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
+	  *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
+	  push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  return true;
+	}
+    }
+
+  /* We must re-recognize what we created before.  */
+  if (GET_CODE (*p) == PLUS
+      && (mode_sz == 4 || mode_sz == 8)
+      && GET_CODE (XEXP (*p, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
+      && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
+      && CONST_INT_P (XEXP (*p, 1))
+      && ! (TARGET_SH2E && mode == SFmode))
+    {
+      /* Because this address is so complex, we know it must have
+	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
+	 it is already unshared, and needs no further unsharing.  */
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+      return true;
+    }
+
+  return false;
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+static rtx
+sh_delegitimize_address (rtx orig_x)
+{
+  rtx x, y;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) == CONST)
+    {
+      y = XEXP (x, 0);
+      if (GET_CODE (y) == UNSPEC)
+	{
+	  if (XINT (y, 1) == UNSPEC_GOT
+	      || XINT (y, 1) == UNSPEC_GOTOFF
+	      || XINT (y, 1) == UNSPEC_SYMOFF)
+	    return XVECEXP (y, 0, 0);
+	  else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
+	    {
+	      if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
+		{
+		  rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
+
+		  if (GET_CODE (symplt) == UNSPEC
+		      && XINT (symplt, 1) == UNSPEC_PLT)
+		    return XVECEXP (symplt, 0, 0);
+		}
+	    }
+	  else if (TARGET_SHMEDIA
+		   && (XINT (y, 1) == UNSPEC_EXTRACT_S16
+		       || XINT (y, 1) == UNSPEC_EXTRACT_U16))
+	    {
+	      rtx offset = XVECEXP (y, 0, 1);
+
+	      x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
+	      if (MEM_P (orig_x))
+		x = replace_equiv_address_nv (orig_x, x);
+	      return x;
+	    }
+	}
+    }
+
+  return orig_x;
+}
+
+/* Mark the use of a constant in the literal table. If the constant
+   has multiple labels, make it unique.  */
+static rtx
+mark_constant_pool_use (rtx x)
+{
+  rtx insn, lab, pattern;
+
+  if (x == NULL_RTX)
+    return x;
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+      x = XEXP (x, 0);
+    case CODE_LABEL:
+      break;
+    default:
+      return x;
+    }
+
+  /* Get the first label in the list of labels for the same constant
+     and delete another labels in the list.  */
+  lab = x;
+  for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
+    {
+      if (!LABEL_P (insn)
+	  || LABEL_REFS (insn) != NEXT_INSN (insn))
+	break;
+      lab = insn;
+    }
+
+  for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
+    INSN_DELETED_P (insn) = 1;
+
+  /* Mark constants in a window.  */
+  for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
+    {
+      if (!NONJUMP_INSN_P (insn))
+	continue;
+
+      pattern = PATTERN (insn);
+      if (GET_CODE (pattern) != UNSPEC_VOLATILE)
+	continue;
+
+      switch (XINT (pattern, 1))
+	{
+	case UNSPECV_CONST2:
+	case UNSPECV_CONST4:
+	case UNSPECV_CONST8:
+	  XVECEXP (pattern, 0, 1) = const1_rtx;
+	  break;
+	case UNSPECV_WINDOW_END:
+	  if (XVECEXP (pattern, 0, 0) == x)
+	    return lab;
+	  break;
+	case UNSPECV_CONST_END:
+	  return lab;
+	default:
+	  break;
+	}
+    }
+
+  return lab;
+}
+
+/* Return true if it's possible to redirect BRANCH1 to the destination
+   of an unconditional jump BRANCH2.  We only want to do this if the
+   resulting branch will have a short displacement.  */
+bool
+sh_can_redirect_branch (rtx branch1, rtx branch2)
+{
+  if (flag_expensive_optimizations && simplejump_p (branch2))
+    {
+      rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
+      rtx insn;
+      int distance;
+
+      for (distance = 0, insn = NEXT_INSN (branch1);
+	   insn && distance < 256;
+	   insn = PREV_INSN (insn))
+	{
+	  if (insn == dest)
+	    return true;
+	  else
+	    distance += get_attr_length (insn);
+	}
+      for (distance = 0, insn = NEXT_INSN (branch1);
+	   insn && distance < 256;
+	   insn = NEXT_INSN (insn))
+	{
+	  if (insn == dest)
+	    return true;
+	  else
+	    distance += get_attr_length (insn);
+	}
+    }
+  return false;
+}
+
+/* Return nonzero if register old_reg can be renamed to register new_reg.  */
+bool
+sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			 unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+  if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
+    return false;
+
+  return true;
+}
+
+/* Function to update the integer COST
+   based on the relationship between INSN that is dependent on
+   DEP_INSN through the dependence LINK.  The default is to make no
+   adjustment to COST.  This can be used for example to specify to
+   the scheduler that an output- or anti-dependence does not incur
+   the same cost as a data-dependence.  The return value should be
+   the new value for COST.  */
+static int
+sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
+{
+  rtx reg, use_pat;
+
+  if (TARGET_SHMEDIA)
+    {
+      /* On SHmedia, if the dependence is an anti-dependence or
+         output-dependence, there is no cost.  */
+      if (REG_NOTE_KIND (link) != 0)
+	{
+	  /* However, dependencies between target register loads and
+	     uses of the register in a subsequent block that are separated
+	     by a conditional branch are not modelled - we have to do with
+	     the anti-dependency between the target register load and the
+	     conditional branch that ends the current block.  */
+	  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+	      && GET_CODE (PATTERN (dep_insn)) == SET
+	      && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
+		  || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
+	      && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
+	    {
+	      int orig_cost = cost;
+	      rtx note = find_reg_note (insn, REG_BR_PROB, 0);
+	      rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
+			    ? insn : JUMP_LABEL (insn));
+	      /* On the likely path, the branch costs 1, on the unlikely path,
+		 it costs 3.  */
+	      cost--;
+	      do
+		target = next_active_insn (target);
+	      while (target && ! flow_dependent_p (target, dep_insn)
+		     && --cost > 0);
+	      /* If two branches are executed in immediate succession, with the
+		 first branch properly predicted, this causes a stall at the
+		 second branch, hence we won't need the target for the
+		 second branch for two cycles after the launch of the first
+		 branch.  */
+	      if (cost > orig_cost - 2)
+		cost = orig_cost - 2;
+	    }
+	  else
+	    cost = 0;
+	}
+
+      else if (get_attr_is_mac_media (insn)
+	       && get_attr_is_mac_media (dep_insn))
+	cost = 1;
+
+      else if (! reload_completed
+	       && GET_CODE (PATTERN (insn)) == SET
+	       && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
+	       && GET_CODE (PATTERN (dep_insn)) == SET
+	       && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
+	       && cost < 4)
+	cost = 4;
+      /* Schedule the ptabs for a casesi_jump_media in preference to stuff
+	 that is needed at the target.  */
+      else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
+	       && ! flow_dependent_p (insn, dep_insn))
+	cost--;
+    }
+  else if (REG_NOTE_KIND (link) == 0)
+    {
+      enum attr_type type;
+      rtx dep_set;
+
+      if (recog_memoized (insn) < 0
+	  || recog_memoized (dep_insn) < 0)
+	return cost;
+
+      dep_set = single_set (dep_insn);
+
+      /* The latency that we specify in the scheduling description refers
+	 to the actual output, not to an auto-increment register; for that,
+	 the latency is one.  */
+      if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
+	{
+	  rtx set = single_set (insn);
+
+	  if (set
+	      && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
+	      && (!MEM_P (SET_DEST (set))
+		  || !reg_mentioned_p (SET_DEST (dep_set),
+				       XEXP (SET_DEST (set), 0))))
+	    cost = 1;
+	}
+      /* The only input for a call that is timing-critical is the
+	 function's address.  */
+      if (CALL_P (insn))
+	{
+	  rtx call = get_call_rtx_from (insn);
+	  if (call
+		  /* sibcalli_thunk uses a symbol_ref in an unspec.  */
+	      && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
+		  || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
+	    cost -= TARGET_SH4_300 ? 3 : 6;
+	}
+      /* Likewise, the most timing critical input for an sfuncs call
+	 is the function address.  However, sfuncs typically start
+	 using their arguments pretty quickly.
+	 Assume a four cycle delay for SH4 before they are needed.
+	 Cached ST40-300 calls are quicker, so assume only a one
+	 cycle delay there.
+	 ??? Maybe we should encode the delays till input registers
+	 are needed by sfuncs into the sfunc call insn.  */
+      /* All sfunc calls are parallels with at least four components.
+	 Exploit this to avoid unnecessary calls to sfunc_uses_reg.  */
+      else if (GET_CODE (PATTERN (insn)) == PARALLEL
+	       && XVECLEN (PATTERN (insn), 0) >= 4
+	       && (reg = sfunc_uses_reg (insn)))
+	{
+	  if (! reg_set_p (reg, dep_insn))
+	    cost -= TARGET_SH4_300 ? 1 : 4;
+	}
+      if (TARGET_HARD_SH4 && !TARGET_SH4_300)
+	{
+	  enum attr_type dep_type = get_attr_type (dep_insn);
+
+	  if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
+	    cost--;
+	  else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
+		   && (type = get_attr_type (insn)) != TYPE_CALL
+		   && type != TYPE_SFUNC)
+	    cost--;
+	  /* When the preceding instruction loads the shift amount of
+	     the following SHAD/SHLD, the latency of the load is increased
+	     by 1 cycle.  */
+	  if (get_attr_type (insn) == TYPE_DYN_SHIFT
+	      && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
+	      && reg_overlap_mentioned_p (SET_DEST (dep_set),
+					  XEXP (SET_SRC (single_set (insn)),
+						1)))
+	    cost++;
+	  /* When an LS group instruction with a latency of less than
+	     3 cycles is followed by a double-precision floating-point
+	     instruction, FIPR, or FTRV, the latency of the first
+	     instruction is increased to 3 cycles.  */
+	  else if (cost < 3
+		   && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
+		   && get_attr_dfp_comp (insn) == DFP_COMP_YES)
+	    cost = 3;
+	  /* The lsw register of a double-precision computation is ready one
+	     cycle earlier.  */
+	  else if (reload_completed
+		   && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
+		   && (use_pat = single_set (insn))
+		   && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
+				      SET_SRC (use_pat)))
+	    cost -= 1;
+
+	  if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
+	      && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
+	    cost -= 1;
+	}
+      else if (TARGET_SH4_300)
+	{
+	  /* Stores need their input register two cycles later.  */
+	  if (dep_set && cost >= 1
+	      && ((type = get_attr_type (insn)) == TYPE_STORE
+		  || type == TYPE_PSTORE
+		  || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
+	    {
+	      rtx set = single_set (insn);
+
+	      if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
+		  && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
+		{
+		  cost -= 2;
+		  /* But don't reduce the cost below 1 if the address depends
+		     on a side effect of dep_insn.  */
+		  if (cost < 1
+		      && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
+		    cost = 1;
+		}
+	    }
+	}
+    }
+  /* An anti-dependence penalty of two applies if the first insn is a double
+     precision fadd / fsub / fmul.  */
+  else if (!TARGET_SH4_300
+	   && REG_NOTE_KIND (link) == REG_DEP_ANTI
+	   && recog_memoized (dep_insn) >= 0
+	   && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
+	       || get_attr_type (dep_insn) == TYPE_DFP_MUL)
+	   /* A lot of alleged anti-flow dependences are fake,
+	      so check this one is real.  */
+	   && flow_dependent_p (dep_insn, insn))
+    cost = 2;
+
+  return cost;
+}
+
+/* Check if INSN is flow-dependent on DEP_INSN.  Can also be used to check
+   if DEP_INSN is anti-flow dependent on INSN.  */
+static bool
+flow_dependent_p (rtx insn, rtx dep_insn)
+{
+  rtx tmp = PATTERN (insn);
+
+  note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
+  return tmp == NULL_RTX;
+}
+
+/* A helper function for flow_dependent_p called through note_stores.  */
+static void
+flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx * pinsn = (rtx *) data;
+
+  if (*pinsn && reg_referenced_p (x, *pinsn))
+    *pinsn = NULL_RTX;
+}
+
+/* For use by sh_allocate_initial_value.  Note that sh.md contains some
+   'special function' patterns (type sfunc) that clobber pr, but that
+   do not look like function calls to leaf_function_p.  Hence we must
+   do this extra check.  */
+static int
+sh_pr_n_sets (void)
+{
+  return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
+}
+
+/* Return where to allocate pseudo for a given hard register initial
+   value.  */
+static rtx
+sh_allocate_initial_value (rtx hard_reg)
+{
+  rtx x;
+
+  if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
+    {
+      if (crtl->is_leaf
+	  && ! sh_pr_n_sets ()
+	  && ! (TARGET_SHCOMPACT
+		&& ((crtl->args.info.call_cookie
+		     & ~ CALL_COOKIE_RET_TRAMP (1))
+		    || crtl->saves_all_registers)))
+	x = hard_reg;
+      else
+	x = gen_frame_mem (Pmode, return_address_pointer_rtx);
+    }
+  else
+    x = NULL_RTX;
+
+  return x;
+}
+
+/* This function returns "2" to indicate dual issue for the SH4
+   processor.  To be used by the DFA pipeline description.  */
+static int
+sh_issue_rate (void)
+{
+  if (TARGET_SUPERSCALAR)
+    return 2;
+  else
+    return 1;
+}
+
+/* Functions for ready queue reordering for sched1.  */
+
+/* Get weight for mode for a set x.  */
+static short
+find_set_regmode_weight (rtx x, enum machine_mode mode)
+{
+  if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
+    return 1;
+  if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
+    {
+      if (REG_P (SET_DEST (x)))
+	{
+	  if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
+	    return 1;
+	  else
+	    return 0;
+	}
+      return 1;
+    }
+  return 0;
+}
+
+/* Get regmode weight for insn.  */
+static short
+find_insn_regmode_weight (rtx insn, enum machine_mode mode)
+{
+  short reg_weight = 0;
+  rtx x;
+
+  /* Increment weight for each register born here.  */
+  x = PATTERN (insn);
+  reg_weight += find_set_regmode_weight (x, mode);
+  if (GET_CODE (x) == PARALLEL)
+    {
+      int j;
+      for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
+	{
+	  x = XVECEXP (PATTERN (insn), 0, j);
+	  reg_weight += find_set_regmode_weight (x, mode);
+	}
+    }
+  /* Decrement weight for each register that dies here.  */
+  for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
+    {
+      if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
+	{
+	  rtx note = XEXP (x, 0);
+	  if (REG_P (note) && GET_MODE (note) == mode)
+	    reg_weight--;
+	}
+    }
+  return reg_weight;
+}
+
+/* Calculate regmode weights for all insns of a basic block.  */
+static void
+find_regmode_weight (basic_block b, enum machine_mode mode)
+{
+  rtx insn, next_tail, head, tail;
+
+  get_ebb_head_tail (b, b, &head, &tail);
+  next_tail = NEXT_INSN (tail);
+
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      /* Handle register life information.  */
+      if (!INSN_P (insn))
+	continue;
+
+      if (mode == SFmode)
+	INSN_REGMODE_WEIGHT (insn, mode) =
+	  find_insn_regmode_weight (insn, mode)
+	  + 2 * find_insn_regmode_weight (insn, DFmode);
+      else if (mode == SImode)
+	INSN_REGMODE_WEIGHT (insn, mode) =
+	  find_insn_regmode_weight (insn, mode)
+	  + 2 * find_insn_regmode_weight (insn, DImode);
+    }
+}
+
+/* Comparison function for ready queue sorting.  */
+static int
+rank_for_reorder (const void *x, const void *y)
+{
+  rtx tmp = *(const rtx *) y;
+  rtx tmp2 = *(const rtx *) x;
+
+  /* The insn in a schedule group should be issued the first.  */
+  if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
+    return SCHED_GROUP_P (tmp2) ? 1 : -1;
+
+  /* If insns are equally good, sort by INSN_LUID (original insn order), This
+     minimizes instruction movement, thus minimizing sched's effect on
+     register pressure.  */
+  return INSN_LUID (tmp) - INSN_LUID (tmp2);
+}
+
+/* Resort the array A in which only element at index N may be out of order.  */
+static void
+swap_reorder (rtx *a, int n)
+{
+  rtx insn = a[n - 1];
+  int i = n - 2;
+
+  while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
+    {
+      a[i + 1] = a[i];
+      i -= 1;
+    }
+  a[i + 1] = insn;
+}
+
+/* Sort the ready list by ascending priority.  */
+static void
+ready_reorder (rtx *ready, int nready)
+{
+  if (nready == 2)
+    swap_reorder (ready, nready);
+  else if (nready > 2)
+     qsort (ready, nready, sizeof (rtx), rank_for_reorder);
+}
+
+/* Count life regions of r0 for a block.  */
+static int
+find_r0_life_regions (basic_block b)
+{
+  rtx end, insn;
+  rtx pset;
+  rtx r0_reg;
+  int live;
+  int set;
+  int death = 0;
+
+  if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
+    {
+      set = 1;
+      live = 1;
+    }
+  else
+    {
+      set = 0;
+      live = 0;
+    }
+
+  insn = BB_HEAD (b);
+  end = BB_END (b);
+  r0_reg = gen_rtx_REG (SImode, R0_REG);
+  while (1)
+    {
+      if (INSN_P (insn))
+	{
+	  if (find_regno_note (insn, REG_DEAD, R0_REG))
+	    {
+	      death++;
+	      live = 0;
+	    }
+	  if (!live
+	      && (pset = single_set (insn))
+	      && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
+	      && !find_regno_note (insn, REG_UNUSED, R0_REG))
+	    {
+	      set++;
+	      live = 1;
+	    }
+	}
+      if (insn == end)
+	break;
+      insn = NEXT_INSN (insn);
+    }
+  return set - death;
+}
+
+/* Calculate regmode weights for all insns of all basic block.  */
+static void
+sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
+		   int verbose ATTRIBUTE_UNUSED,
+		   int old_max_uid)
+{
+  basic_block b;
+
+  regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
+  regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
+  r0_life_regions = 0;
+
+  FOR_EACH_BB_REVERSE_FN (b, cfun)
+  {
+    find_regmode_weight (b, SImode);
+    find_regmode_weight (b, SFmode);
+    if (!reload_completed)
+      r0_life_regions += find_r0_life_regions (b);
+  }
+
+  CURR_REGMODE_PRESSURE (SImode) = 0;
+  CURR_REGMODE_PRESSURE (SFmode) = 0;
+}
+
+/* Cleanup.  */
+static void
+sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+		     int verbose ATTRIBUTE_UNUSED)
+{
+  if (regmode_weight[0])
+    {
+      free (regmode_weight[0]);
+      regmode_weight[0] = NULL;
+    }
+  if (regmode_weight[1])
+    {
+      free (regmode_weight[1]);
+      regmode_weight[1] = NULL;
+    }
+}
+
+/* The scalar modes supported differs from the default version in TImode
+   for 32-bit SHMEDIA.  */
+static bool
+sh_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_SHMEDIA32 && mode == TImode)
+    return false;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Cache the can_issue_more so that we can return it from reorder2. Also,
+   keep count of register pressures on SImode and SFmode. */
+static int
+sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+		   int sched_verbose ATTRIBUTE_UNUSED,
+		   rtx insn,
+		   int can_issue_more)
+{
+  if (GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    cached_can_issue_more = can_issue_more - 1;
+  else
+    cached_can_issue_more = can_issue_more;
+
+  if (reload_completed)
+    return cached_can_issue_more;
+
+  CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
+  CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
+
+  return cached_can_issue_more;
+}
+
+static void
+sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
+	    int verbose ATTRIBUTE_UNUSED,
+	    int veclen ATTRIBUTE_UNUSED)
+{
+  CURR_REGMODE_PRESSURE (SImode) = 0;
+  CURR_REGMODE_PRESSURE (SFmode) = 0;
+}
+
+/* Some magic numbers.  */
+/* Pressure on register r0 can lead to spill failures. so avoid sched1 for
+   functions that already have high pressure on r0. */
+#define R0_MAX_LIFE_REGIONS 2
+/* Register Pressure thresholds for SImode and SFmode registers.  */
+#define SIMODE_MAX_WEIGHT 5
+#define SFMODE_MAX_WEIGHT 10
+
+/* Return true if the pressure is high for MODE.  */
+static bool
+high_pressure (enum machine_mode mode)
+{
+  /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
+     functions that already have high pressure on r0. */
+   if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
+     return true;
+
+  if (mode == SFmode)
+    return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
+  else
+    return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
+}
+
+/* Reorder ready queue if register pressure is high.  */
+static int
+sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
+	    int sched_verbose ATTRIBUTE_UNUSED,
+	    rtx *ready,
+	    int *n_readyp,
+	    int clock_var ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return sh_issue_rate ();
+
+  if (high_pressure (SFmode) || high_pressure (SImode))
+    {
+      ready_reorder (ready, *n_readyp);
+    }
+
+  return sh_issue_rate ();
+}
+
+/* Skip cycles if the current register pressure is high.  */
+static int
+sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+	     int sched_verbose ATTRIBUTE_UNUSED,
+	     rtx *ready ATTRIBUTE_UNUSED,
+	     int *n_readyp ATTRIBUTE_UNUSED,
+	     int clock_var ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return cached_can_issue_more;
+
+  if (high_pressure(SFmode) || high_pressure (SImode))
+    skip_cycles = 1;
+
+  return cached_can_issue_more;
+}
+
+/* Skip cycles without sorting the ready queue. This will move insn from
+   Q->R. If this is the last cycle we are skipping; allow sorting of ready
+   queue by sh_reorder.  */
+
+/* Generally, skipping these many cycles are sufficient for all insns to move
+   from Q -> R.  */
+#define MAX_SKIPS 8
+
+static int
+sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
+		  int sched_verbose ATTRIBUTE_UNUSED,
+		  rtx insn ATTRIBUTE_UNUSED,
+		  int last_clock_var,
+		  int clock_var,
+		  int *sort_p)
+{
+  if (reload_completed)
+    return 0;
+
+  if (skip_cycles)
+    {
+      if ((clock_var - last_clock_var) < MAX_SKIPS)
+	{
+	  *sort_p = 0;
+	  return 1;
+	}
+      /* If this is the last cycle we are skipping, allow reordering of R.  */
+      if ((clock_var - last_clock_var) == MAX_SKIPS)
+	{
+	  *sort_p = 1;
+	  return 1;
+	}
+    }
+
+  skip_cycles = 0;
+
+  return 0;
+}
+
+/* SHmedia requires registers for branches, so we can't generate new
+   branches past reload.  */
+static bool
+sh_cannot_modify_jumps_p (void)
+{
+  return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
+}
+
+static reg_class_t
+sh_target_reg_class (void)
+{
+  return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
+}
+
+static bool
+sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
+{
+  if (! shmedia_space_reserved_for_target_registers)
+    return 0;
+  if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
+    return 0;
+
+  HARD_REG_SET dummy;
+  if (calc_live_regs (&dummy) >= 6 * 8)
+    return 1;
+  return 0;
+}
+
+static bool
+sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
+}
+
+/*
+   On the SH1..SH4, the trampoline looks like
+   2 0002 D202     	   	mov.l	l2,r2
+   1 0000 D301     		mov.l	l1,r3
+   3 0004 422B     		jmp	@r2
+   4 0006 0009     		nop
+   5 0008 00000000 	l1:  	.long   area
+   6 000c 00000000 	l2:	.long   function
+
+   SH5 (compact) uses r1 instead of r3 for the static chain.  */
+
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+static void
+sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
+
+  if (TARGET_SHMEDIA64)
+    {
+      rtx tramp_templ;
+      int fixed_len;
+
+      rtx movi1 = GEN_INT (0xcc000010);
+      rtx shori1 = GEN_INT (0xc8000010);
+      rtx src, dst;
+
+      /* The following trampoline works within a +- 128 KB range for cxt:
+	 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
+	 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
+	 gettr tr1,r1; blink tr0,r63  */
+      /* Address rounding makes it hard to compute the exact bounds of the
+	 offset for this trampoline, but we have a rather generous offset
+	 range, so frame_offset should do fine as an upper bound.  */
+      if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
+	{
+	  /* ??? could optimize this trampoline initialization
+	     by writing DImode words with two insns each.  */
+	  rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
+	  rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
+	  insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  /* Or in ptb/u .,tr1 pattern */
+	  insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
+	  insn = force_operand (insn, NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
+	  insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 20),
+			  GEN_INT (0x6bf10600));
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 24),
+			  GEN_INT (0x4415fc10));
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 28),
+			  GEN_INT (0x4401fff0));
+	  emit_insn (gen_ic_invalidate_line (tramp));
+	  return;
+	}
+      tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
+      fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
+
+      tramp_templ = gen_datalabel_ref (tramp_templ);
+      dst = tramp_mem;
+      src = gen_const_mem (BLKmode, tramp_templ);
+      set_mem_align (dst, 256);
+      set_mem_align (src, 64);
+      emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
+
+      emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
+      emit_move_insn (adjust_address (tramp_mem, Pmode,
+				      fixed_len + GET_MODE_SIZE (Pmode)),
+		      cxt);
+      emit_insn (gen_ic_invalidate_line (tramp));
+      return;
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
+         movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63  */
+      rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
+      rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
+      /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010  concatenated,
+	 rotated 10 right, and higher 16 bit of every 32 selected.  */
+      rtx movishori
+	= force_reg (V2HImode, (simplify_gen_subreg
+				(V2HImode, GEN_INT (0x4330432), SImode, 0)));
+      rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
+      rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
+
+      fnaddr = force_reg (SImode, fnaddr);
+      cxt = force_reg (SImode, cxt);
+      emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
+				 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
+				 movishori));
+      emit_insn (gen_rotrdi3_mextr (quad0, quad0,
+				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
+      emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
+      emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
+      emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
+				 gen_rtx_SUBREG (V2HImode, cxt, 0),
+				 movishori));
+      emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
+				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
+      emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
+      if (TARGET_LITTLE_ENDIAN)
+	{
+	  emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
+	  emit_insn (gen_mextr4 (quad2, cxtload, blink));
+	}
+      else
+	{
+	  emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
+	  emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
+	}
+      emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
+      emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
+      emit_insn (gen_ic_invalidate_line (tramp));
+      return;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
+      return;
+    }
+  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
+				SImode));
+  emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
+				SImode));
+  emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
+  emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+  if (TARGET_HARD_SH4 || TARGET_SH5)
+    {
+      if (!TARGET_INLINE_IC_INVALIDATE
+	  || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
+	emit_library_call (function_symbol (NULL, "__ic_invalidate",
+					    FUNCTION_ORDINARY),
+			   LCT_NORMAL, VOIDmode, 1, tramp, SImode);
+      else
+	emit_insn (gen_ic_invalidate_line (tramp));
+    }
+}
+
+/* On SH5, trampolines are SHmedia code, so add 1 to the address.  */
+static rtx
+sh_trampoline_adjust_address (rtx tramp)
+{
+  if (TARGET_SHMEDIA)
+    tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
+				 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
+  return tramp;
+}
+
+/* FIXME: This is overly conservative.  A SHcompact function that
+   receives arguments ``by reference'' will have them stored in its
+   own stack frame, so it must not pass pointers or references to
+   these arguments to other functions by means of sibling calls.  */
+/* If PIC, we cannot make sibling calls to global functions
+   because the PLT requires r12 to be live.  */
+static bool
+sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return (1
+	  && (! TARGET_SHCOMPACT
+	      || crtl->args.info.stack_regs == 0)
+	  && ! sh_cfun_interrupt_handler_p ()
+	  && (! flag_pic
+	      || (decl && ! TREE_PUBLIC (decl))
+	      || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
+}
+
+/* Machine specific built-in functions.  */
+
+struct builtin_description
+{
+  bool (* const is_enabled) (void);
+  const enum insn_code icode;
+  const char *const name;
+  int signature;
+  tree fndecl;
+};
+
+static bool
+shmedia_builtin_p (void)
+{
+  return TARGET_SHMEDIA;
+}
+
+/* This function can be used if there are any built-ins that are not for
+   SHmedia.  It's commented out to avoid the defined-but-unused warning.
+static bool
+sh1_builtin_p (void)
+{
+  return TARGET_SH1;
+}
+*/
+
+/* describe number and signedness of arguments; arg[0] == result
+   (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
+/* 9: 64-bit pointer, 10: 32-bit pointer */
+static const char signature_args[][4] =
+{
+#define SH_BLTIN_V2SI2 0
+  { 4, 4 },
+#define SH_BLTIN_V4HI2 1
+  { 4, 4 },
+#define SH_BLTIN_V2SI3 2
+  { 4, 4, 4 },
+#define SH_BLTIN_V4HI3 3
+  { 4, 4, 4 },
+#define SH_BLTIN_V8QI3 4
+  { 4, 4, 4 },
+#define SH_BLTIN_MAC_HISI 5
+  { 1, 4, 4, 1 },
+#define SH_BLTIN_SH_HI 6
+  { 4, 4, 1 },
+#define SH_BLTIN_SH_SI 7
+  { 4, 4, 1 },
+#define SH_BLTIN_V4HI2V2SI 8
+  { 4, 4, 4 },
+#define SH_BLTIN_V4HI2V8QI 9
+  { 4, 4, 4 },
+#define SH_BLTIN_SISF 10
+  { 4, 2 },
+#define SH_BLTIN_LDUA_L 11
+  { 2, 10 },
+#define SH_BLTIN_LDUA_Q 12
+  { 1, 10 },
+#define SH_BLTIN_STUA_L 13
+  { 0, 10, 2 },
+#define SH_BLTIN_STUA_Q 14
+  { 0, 10, 1 },
+#define SH_BLTIN_LDUA_L64 15
+  { 2, 9 },
+#define SH_BLTIN_LDUA_Q64 16
+  { 1, 9 },
+#define SH_BLTIN_STUA_L64 17
+  { 0, 9, 2 },
+#define SH_BLTIN_STUA_Q64 18
+  { 0, 9, 1 },
+#define SH_BLTIN_NUM_SHARED_SIGNATURES 19
+#define SH_BLTIN_2 19
+#define SH_BLTIN_SU 19
+  { 1, 2 },
+#define SH_BLTIN_3 20
+#define SH_BLTIN_SUS 20
+  { 2, 2, 1 },
+#define SH_BLTIN_PSSV 21
+  { 0, 8, 2, 2 },
+#define SH_BLTIN_XXUU 22
+#define SH_BLTIN_UUUU 22
+  { 1, 1, 1, 1 },
+#define SH_BLTIN_PV 23
+  { 0, 8 },
+#define SH_BLTIN_VP 24
+  { 8, 0 },
+};
+/* mcmv: operands considered unsigned.  */
+/* mmulsum_wq, msad_ubq: result considered unsigned long long.  */
+/* mperm: control value considered unsigned int.  */
+/* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int.  */
+/* mshards_q: returns signed short.  */
+/* nsb: takes long long arg, returns unsigned char.  */
+static struct builtin_description bdesc[] =
+{
+  { shmedia_builtin_p,
+    CODE_FOR_absv2si2,	"__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_absv4hi2,	"__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_addv2si3,	"__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_addv4hi3,	"__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_alloco_i,	"__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcmv,	"__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcnvs_lw,	"__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcnvs_wb,	"__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcnvs_wub,	"__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr1,	"__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr2,	"__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr3,	"__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr4,	"__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr5,	"__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr6,	"__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr7,	"__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmacfx_wl,	"__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mulv2si3,	"__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mulv4hi3,	"__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulfx_l,	"__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulfx_w,	"__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulhi_wl,	"__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmullo_wl,	"__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mperm_w,	"__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_msad_ubq,	"__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshalds_l,	"__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshalds_w,	"__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashrv2si3,	"__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashrv4hi3,	"__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshards_q,	"__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshfhi_b,	"__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshfhi_l,	"__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshfhi_w,	"__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshflo_b,	"__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshflo_l,	"__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshflo_w,	"__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashlv2si3,	"__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashlv4hi3,	"__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_lshrv2si3,	"__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_lshrv4hi3,	"__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_subv2si3,	"__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_subv4hi3,	"__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fcosa_s,	"__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fsina_s,	"__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fipr,	"__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ftrv,	"__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sqrtdf2,	"__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sqrtsf2,	"__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fsrra_s,	"__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_l,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_q,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_l,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_q,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_l,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_q,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_l,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_q,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_l64,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_q64,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_l64,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_q64,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_l64,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_q64,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_l64,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_q64,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_nsb,	"__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_byterev,	"__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_prefetch,	"__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
+};
+
+static void
+sh_init_builtins (void)
+{
+  tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
+  memset (shared, 0, sizeof shared);
+
+  for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
+    {
+      builtin_description* d = &bdesc[di];
+
+      if (!d->is_enabled ())
+	continue;
+
+      tree type, arg_type = NULL_TREE;
+      int signature = d->signature;
+
+      if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
+	type = shared[signature];
+      else
+	{
+	  int has_result = signature_args[signature][0] != 0;
+	  tree args[3];
+
+	  if ((signature_args[signature][1] & 8)
+	      && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
+		  || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
+	    continue;
+	  if (! TARGET_FPU_ANY
+	      && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
+	    continue;
+	  for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
+	    args[i] = NULL_TREE;
+	  for (int i = 3; ; i--)
+	    {
+	      int arg = signature_args[signature][i];
+	      int opno = i - 1 + has_result;
+
+	      if (arg & 8)
+		arg_type = ptr_type_node;
+	      else if (arg)
+		arg_type = (*lang_hooks.types.type_for_mode)
+		  (insn_data[d->icode].operand[opno].mode, (arg & 1));
+	      else if (i)
+		continue;
+	      else
+		arg_type = void_type_node;
+	      if (i == 0)
+		break;
+	      args[i-1] = arg_type;
+	    }
+	  type = build_function_type_list (arg_type, args[0], args[1],
+					   args[2], NULL_TREE);
+	  if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
+	    shared[signature] = type;
+	}
+      d->fndecl =
+	add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
+			      NULL, NULL_TREE);
+    }
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+bool
+sh_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_FPU_ANY
+      && ((mode == V2SFmode)
+	  || (mode == V4SFmode)
+	  || (mode == V16SFmode)))
+    return true;
+
+  else if (TARGET_SHMEDIA
+	   && ((mode == V8QImode)
+	       || (mode == V2HImode)
+	       || (mode == V4HImode)
+	       || (mode == V2SImode)))
+    return true;
+
+  return false;
+}
+
+bool
+sh_frame_pointer_required (void)
+{
+/* If needed override this in other tm.h files to cope with various OS 
+   lossage requiring a frame pointer.  */
+  if (SUBTARGET_FRAME_POINTER_REQUIRED)
+    return true;
+
+  if (crtl->profile)
+    return true;
+
+  return false;
+}
+
+/* Implements target hook dwarf_calling_convention.  Return an enum
+   of dwarf_calling_convention.  */
+int
+sh_dwarf_calling_convention (const_tree func)
+{
+  if (sh_attr_renesas_p (func))
+    return DW_CC_GNU_renesas_sh;
+
+  return DW_CC_normal;
+}
+
+/* Returns the sh builtin decl for CODE.  */
+static tree
+sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ARRAY_SIZE (bdesc))
+    return error_mark_node;
+
+  if (!bdesc[code].is_enabled ())
+    return error_mark_node;
+
+  return bdesc[code].fndecl;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+static rtx
+sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d = &bdesc[fcode];
+  enum insn_code icode = d->icode;
+  int signature = d->signature;
+  int nop = 0;
+  rtx op[4];
+
+  if (signature_args[signature][0])
+    {
+      if (ignore)
+	return NULL_RTX;
+
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (! target || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      op[nop++] = target;
+    }
+  else
+    target = NULL_RTX;
+
+  for (int i = 1; i <= 3; i++, nop++)
+    {
+      tree arg;
+      enum machine_mode opmode, argmode;
+      tree optype;
+
+      if (! signature_args[signature][i])
+	break;
+      arg = CALL_EXPR_ARG (exp, i - 1);
+      if (arg == error_mark_node)
+	return const0_rtx;
+      if (signature_args[signature][i] & 8)
+	{
+	  opmode = ptr_mode;
+	  optype = ptr_type_node;
+	}
+      else
+	{
+	  opmode = insn_data[icode].operand[nop].mode;
+	  optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
+	}
+      argmode = TYPE_MODE (TREE_TYPE (arg));
+      if (argmode != opmode)
+	arg = build1 (NOP_EXPR, optype, arg);
+      op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
+      if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
+	op[nop] = copy_to_mode_reg (opmode, op[nop]);
+    }
+
+  rtx pat = NULL_RTX;
+
+  switch (nop)
+    {
+    case 1:
+      pat = (*insn_data[d->icode].genfun) (op[0]);
+      break;
+    case 2:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
+      break;
+    case 3:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
+      break;
+    case 4:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (! pat)
+    return NULL_RTX;
+  emit_insn (pat);
+  return target;
+}
+
+void
+sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx sel0 = const0_rtx;
+  rtx sel1 = const1_rtx;
+  rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
+  rtx op = gen_rtx_fmt_e (code, SFmode, op1);
+
+  emit_insn ((*fn) (op0, op1, op, sel0, sel0));
+  emit_insn ((*fn) (op0, op1, op, sel1, sel1));
+}
+
+void
+sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
+{
+  rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
+
+  emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
+  emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
+}
+
+/* Return true if hard register REGNO can hold a value of machine-mode MODE.
+   We can allow any mode in any general register.  The special registers
+   only allow SImode.  Don't allow any mode in the PR.
+
+   We cannot hold DCmode values in the XD registers because alter_reg
+   handles subregs of them incorrectly.  We could work around this by
+   spacing the XD registers like the DR registers, but this would require
+   additional memory in every compilation to hold larger register vectors.
+   We could hold SFmode / SCmode values in XD registers, but that
+   would require a tertiary reload when reloading from / to memory,
+   and a secondary reload to reload from / to general regs; that
+   seems to be a losing proposition.
+
+   We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
+   it won't be ferried through GP registers first.  */
+bool
+sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (SPECIAL_REGISTER_P (regno))
+    return mode == SImode;
+
+  if (regno == FPUL_REG)
+    return (mode == SImode || mode == SFmode);
+
+  if (FP_REGISTER_P (regno) && mode == SFmode)
+    return true;
+
+  if (mode == V2SFmode)
+    {
+      if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
+	   || GENERAL_REGISTER_P (regno)))
+	return true;
+      else
+	return false;
+    }
+
+  if (mode == V4SFmode)
+    {
+      if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
+	  || GENERAL_REGISTER_P (regno))
+	return true;
+      else
+	return false;
+    }
+
+  if (mode == V16SFmode)
+    {
+      if (TARGET_SHMEDIA)
+	{
+	  if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
+	    return true;
+	  else
+	    return false;
+	}
+      else
+	return regno == FIRST_XD_REG;
+    }
+
+  if (FP_REGISTER_P (regno))
+    {
+      if (mode == SFmode
+	  || mode == SImode
+	  || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
+	  || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
+	       || mode == DCmode
+	       || (TARGET_SHMEDIA
+		   && (mode == DFmode || mode == DImode
+		       || mode == V2SFmode || mode == TImode)))
+	      && ((regno - FIRST_FP_REG) & 1) == 0)
+	  || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
+	      && ((regno - FIRST_FP_REG) & 3) == 0))
+	return true;
+      else
+	return false;
+    }
+
+  if (XD_REGISTER_P (regno))
+    return mode == DFmode;
+
+  if (TARGET_REGISTER_P (regno))
+    return (mode == DImode || mode == SImode || mode == PDImode);
+
+  if (regno == PR_REG)
+    return mode == SImode;
+
+  if (regno == FPSCR_REG)
+    return mode == PSImode;
+
+  /* FIXME.  This works around PR target/37633 for -O0.  */
+  if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
+    {
+      unsigned int n = GET_MODE_SIZE (mode) / 8;
+
+      if (regno >= FIRST_GENERAL_REG + 10 - n + 1
+	  && regno <= FIRST_GENERAL_REG + 14)
+	return false;
+    }
+
+  return true;
+}
+
+/* Return the class of registers for which a mode change from FROM to TO
+   is invalid.  */
+bool
+sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			     enum reg_class rclass)
+{
+  /* We want to enable the use of SUBREGs as a means to
+     VEC_SELECT a single element of a vector.  */
+
+  /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
+     This can be problematic when SFmode vector subregs need to be accessed
+     on the stack with displacement addressing, as it happens with -O0.
+     Thus we disallow the mode change for -O0.  */
+  if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
+    return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
+
+  if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
+    {
+      if (TARGET_LITTLE_ENDIAN)
+	{
+	  if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
+	    return reg_classes_intersect_p (DF_REGS, rclass);
+	}
+      else
+	{
+	  if (GET_MODE_SIZE (from) < 8)
+	    return reg_classes_intersect_p (DF_REGS, rclass);
+	}
+    }
+  return false;
+}
+
+/* Return true if registers in machine mode MODE will likely be
+   allocated to registers in small register classes.  */
+bool
+sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return (! TARGET_SHMEDIA);
+}
+
+/* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
+   that label is used.  */
+void
+sh_mark_label (rtx address, int nuses)
+{
+  if (GOTOFF_P (address))
+    {
+      /* Extract the label or symbol.  */
+      address = XEXP (address, 0);
+      if (GET_CODE (address) == PLUS)
+	address = XEXP (address, 0);
+      address = XVECEXP (address, 0, 0);
+    }
+  if (GET_CODE (address) == LABEL_REF
+      && LABEL_P (XEXP (address, 0)))
+    LABEL_NUSES (XEXP (address, 0)) += nuses;
+}
+
+/* Compute extra cost of moving data between one register class
+   and another.
+
+   If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
+   uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+static int
+sh_register_move_cost (enum machine_mode mode,
+		       reg_class_t srcclass, reg_class_t dstclass)
+{
+  if (dstclass == T_REGS || dstclass == PR_REGS)
+    return 10;
+
+  if (dstclass == MAC_REGS && srcclass == MAC_REGS)
+    return 4;
+
+  if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
+      && REGCLASS_HAS_FP_REG (srcclass)
+      && REGCLASS_HAS_FP_REG (dstclass))
+    return 4;
+
+  if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
+    return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
+
+  if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
+      || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
+    return 9;
+
+  if ((REGCLASS_HAS_FP_REG (dstclass)
+       && REGCLASS_HAS_GENERAL_REG (srcclass))
+      || (REGCLASS_HAS_GENERAL_REG (dstclass)
+	  && REGCLASS_HAS_FP_REG (srcclass)))
+    {
+      /* Discourage trying to use fp regs for a pointer.  This also
+	 discourages fp regs with SImode because Pmode is an alias
+	 of SImode on this target.  See PR target/48596.  */
+      int addend = (mode == Pmode) ? 40 : 0;
+
+      return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
+	      * ((GET_MODE_SIZE (mode) + 7) / 8U));
+    }
+
+  if ((dstclass == FPUL_REGS
+       && REGCLASS_HAS_GENERAL_REG (srcclass))
+      || (srcclass == FPUL_REGS
+	  && REGCLASS_HAS_GENERAL_REG (dstclass)))
+    return 5;
+
+  if ((dstclass == FPUL_REGS
+       && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
+      || (srcclass == FPUL_REGS
+	  && (dstclass == PR_REGS || dstclass == MAC_REGS)))
+    return 7;
+
+  if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
+      || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
+    return 20;
+
+  /* ??? ptabs faults on (value & 0x3) == 0x3  */
+  if (TARGET_SHMEDIA
+      && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
+    {
+      if (sh_gettrcost >= 0)
+	return sh_gettrcost;
+      else if (!TARGET_PT_FIXED)
+	return 100;
+    }
+
+  if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
+      || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
+  return 4;
+
+  if (TARGET_SHMEDIA
+      || (TARGET_FMOVD
+	  && ! REGCLASS_HAS_GENERAL_REG (srcclass)
+	  && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
+    return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
+
+  return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
+}
+
+static rtx
+emit_load_ptr (rtx reg, rtx addr)
+{
+  rtx mem = gen_const_mem (ptr_mode, addr);
+
+  if (Pmode != ptr_mode)
+    mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
+  return emit_move_insn (reg, mem);
+}
+
+static void
+sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		    HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		    tree function)
+{
+  CUMULATIVE_ARGS cum;
+  int structure_value_byref = 0;
+  rtx this_rtx, this_value, sibcall, insns, funexp;
+  tree funtype = TREE_TYPE (function);
+  int simple_add = CONST_OK_FOR_ADD (delta);
+  int did_load = 0;
+  rtx scratch0, scratch1, scratch2;
+  unsigned i;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+  crtl->uses_only_leaf_regs = 1;
+
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  We have such a wide range of ABIs for the
+     SH that it's best to do this completely machine independently.
+     "this" is passed as first argument, unless a structure return pointer
+     comes first, in which case "this" comes second.  */
+  INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
+#ifndef PCC_STATIC_STRUCT_RETURN
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    structure_value_byref = 1;
+#endif /* not PCC_STATIC_STRUCT_RETURN */
+  if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
+    {
+      tree ptype = build_pointer_type (TREE_TYPE (funtype));
+
+      sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
+    }
+  this_rtx
+    = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
+
+  /* For SHcompact, we only have r0 for a scratch register: r1 is the
+     static chain pointer (even if you can't have nested virtual functions
+     right now, someone might implement them sometime), and the rest of the
+     registers are used for argument passing, are callee-saved, or reserved.  */
+  /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
+     -ffixed-reg has been used.  */
+  if (! call_used_regs[0] || fixed_regs[0])
+    error ("r0 needs to be available as a call-clobbered register");
+  scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
+  if (! TARGET_SH5)
+    {
+      if (call_used_regs[1] && ! fixed_regs[1])
+	scratch1 = gen_rtx_REG (ptr_mode, 1);
+      /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
+	 pointing where to return struct values.  */
+      if (call_used_regs[3] && ! fixed_regs[3])
+	scratch2 = gen_rtx_REG (Pmode, 3);
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
+	if (i != REGNO (scratch0) &&
+	    call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
+	  {
+	    scratch1 = gen_rtx_REG (ptr_mode, i);
+	    break;
+	  }
+      if (scratch1 == scratch0)
+	error ("need a second call-clobbered general purpose register");
+      for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+	if (call_used_regs[i] && ! fixed_regs[i])
+	  {
+	    scratch2 = gen_rtx_REG (Pmode, i);
+	    break;
+	  }
+      if (scratch2 == scratch0)
+	error ("need a call-clobbered target register");
+    }
+
+  this_value = plus_constant (Pmode, this_rtx, delta);
+  if (vcall_offset
+      && (simple_add || scratch0 != scratch1)
+      && strict_memory_address_p (ptr_mode, this_value))
+    {
+      emit_load_ptr (scratch0, this_value);
+      did_load = 1;
+    }
+
+  if (!delta)
+    ; /* Do nothing.  */
+  else if (simple_add)
+    emit_move_insn (this_rtx, this_value);
+  else
+    {
+      emit_move_insn (scratch1, GEN_INT (delta));
+      emit_insn (gen_add2_insn (this_rtx, scratch1));
+    }
+
+  if (vcall_offset)
+    {
+      rtx offset_addr;
+
+      if (!did_load)
+	emit_load_ptr (scratch0, this_rtx);
+
+      offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
+      if (strict_memory_address_p (ptr_mode, offset_addr))
+	; /* Do nothing.  */
+      else if (! TARGET_SH5 && scratch0 != scratch1)
+	{
+	  /* scratch0 != scratch1, and we have indexed loads.  Get better
+	     schedule by loading the offset into r1 and using an indexed
+	     load - then the load of r1 can issue before the load from
+	     (this_rtx + delta) finishes.  */
+	  emit_move_insn (scratch1, GEN_INT (vcall_offset));
+	  offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
+	}
+      else if (CONST_OK_FOR_ADD (vcall_offset))
+	{
+	  emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
+	  offset_addr = scratch0;
+	}
+      else if (scratch0 != scratch1)
+	{
+	  emit_move_insn (scratch1, GEN_INT (vcall_offset));
+	  emit_insn (gen_add2_insn (scratch0, scratch1));
+	  offset_addr = scratch0;
+	}
+      else
+	gcc_unreachable (); /* FIXME */
+      emit_load_ptr (scratch0, offset_addr);
+
+      if (Pmode != ptr_mode)
+	scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
+      emit_insn (gen_add2_insn (this_rtx, scratch0));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  /* If the function is overridden, so is the thunk, hence we don't
+     need GOT addressing even if this is a public symbol.  */
+#if 0
+  if (TARGET_SH1 && ! flag_weak)
+    sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
+  else
+#endif
+  if (TARGET_SH2 && flag_pic)
+    {
+      sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+      XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+    }
+  else
+    {
+      if (TARGET_SHMEDIA && flag_pic)
+	{
+	  funexp = gen_sym2PIC (funexp);
+	  PUT_MODE (funexp, Pmode);
+	}
+      emit_move_insn (scratch2, funexp);
+      funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
+      sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
+    }
+  sibcall = emit_call_insn (sibcall);
+  SIBLING_CALL_P (sibcall) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to do scheduling and get
+     the insns emitted.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+
+  insns = get_insns ();
+
+  if (optimize > 0)
+    {
+      if (! cfun->cfg)
+	init_flow (cfun);
+      split_all_insns_noflow ();
+    }
+
+  sh_reorg ();
+  shorten_branches (insns);
+  final_start_function (insns, file, 1);
+  final (insns, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+rtx
+function_symbol (rtx target, const char *name, enum sh_function_kind kind)
+{
+  rtx sym;
+
+  /* If this is not an ordinary function, the name usually comes from a
+     string literal or an sprintf buffer.  Make sure we use the same
+     string consistently, so that cse will be able to unify address loads.  */
+  if (kind != FUNCTION_ORDINARY)
+    name = IDENTIFIER_POINTER (get_identifier (name));
+  sym = gen_rtx_SYMBOL_REF (Pmode, name);
+  SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
+  if (flag_pic)
+    switch (kind)
+      {
+      case FUNCTION_ORDINARY:
+	break;
+      case SFUNC_GOT:
+	{
+	  rtx reg = target ? target : gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOT2reg (reg, sym));
+	  sym = reg;
+	  break;
+	}
+      case SFUNC_STATIC:
+	{
+	  /* ??? To allow cse to work, we use GOTOFF relocations.
+	     We could add combiner patterns to transform this into
+	     straight pc-relative calls with sym2PIC / bsrf when
+	     label load and function call are still 1:1 and in the
+	     same basic block during combine.  */
+	  rtx reg = target ? target : gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOTOFF2reg (reg, sym));
+	  sym = reg;
+	  break;
+	}
+      }
+  if (target && sym != target)
+    {
+      emit_move_insn (target, sym);
+      return target;
+    }
+  return sym;
+}
+
+/* Find the number of a general purpose register in S.  */
+static int
+scavenge_reg (HARD_REG_SET *s)
+{
+  int r;
+  for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
+    if (TEST_HARD_REG_BIT (*s, r))
+      return r;
+  return -1;
+}
+
+rtx
+sh_get_pr_initial_val (void)
+{
+  rtx val;
+
+  /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
+     PR register on SHcompact, because it might be clobbered by the prologue.
+     We check first if that is known to be the case.  */
+  if (TARGET_SHCOMPACT
+      && ((crtl->args.info.call_cookie
+	   & ~ CALL_COOKIE_RET_TRAMP (1))
+	  || crtl->saves_all_registers))
+    return gen_frame_mem (SImode, return_address_pointer_rtx);
+
+  /* If we haven't finished rtl generation, there might be a nonlocal label
+     that we haven't seen yet.
+     ??? get_hard_reg_initial_val fails if it is called after register
+     allocation has started, unless it has been called before for the
+     same register.  And even then, we end in trouble if we didn't use
+     the register in the same basic block before.  So call
+     get_hard_reg_initial_val now and wrap it in an unspec if we might
+     need to replace it.  */
+  /* ??? We also must do this for TARGET_SH1 in general, because otherwise
+     combine can put the pseudo returned by get_hard_reg_initial_val into
+     instructions that need a general purpose registers, which will fail to
+     be recognized when the pseudo becomes allocated to PR.  */
+  val
+    = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
+  if (TARGET_SH1)
+    return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
+  return val;
+}
+
+bool
+sh_expand_t_scc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx target = operands[0];
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx result = target;
+  HOST_WIDE_INT val;
+
+  if (!REG_P (op0) || REGNO (op0) != T_REG
+      || !CONST_INT_P (op1))
+    return false;
+  if (!REG_P (result))
+    result = gen_reg_rtx (SImode);
+  val = INTVAL (op1);
+  if ((code == EQ && val == 1) || (code == NE && val == 0))
+    emit_insn (gen_movt (result, get_t_reg_rtx ()));
+  else if ((code == EQ && val == 0) || (code == NE && val == 1))
+    emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
+  else if (code == EQ || code == NE)
+    emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
+  else
+    return false;
+  if (result != target)
+    emit_move_insn (target, result);
+  return true;
+}
+
+/* INSN is an sfunc; return the rtx that describes the address used.  */
+static rtx
+extract_sfunc_addr (rtx insn)
+{
+  rtx pattern, part = NULL_RTX;
+  int len, i;
+
+  pattern = PATTERN (insn);
+  len = XVECLEN (pattern, 0);
+  for (i = 0; i < len; i++)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
+	  && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
+	return XEXP (part, 0);
+    }
+  gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
+  return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
+}
+
+/* Verify that the register in use_sfunc_addr still agrees with the address
+   used in the sfunc.  This prevents fill_slots_from_thread from changing
+   use_sfunc_addr.
+   INSN is the use_sfunc_addr instruction, and REG is the register it
+   guards.  */
+bool
+check_use_sfunc_addr (rtx insn, rtx reg)
+{
+  /* Search for the sfunc.  It should really come right after INSN.  */
+  while ((insn = NEXT_INSN (insn)))
+    {
+      if (LABEL_P (insn) || JUMP_P (insn))
+	break;
+      if (! INSN_P (insn))
+	continue;
+
+      if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+	insn = XVECEXP (PATTERN (insn), 0, 0);
+      if (GET_CODE (PATTERN (insn)) != PARALLEL
+	  || get_attr_type (insn) != TYPE_SFUNC)
+	continue;
+      return rtx_equal_p (extract_sfunc_addr (insn), reg);
+    }
+  gcc_unreachable ();
+}
+
+/* This function returns a constant rtx that represents 2**15 / pi in
+   SFmode.  It's used to scale a fixed-point signed 16.16-bit fraction
+   of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi.  */
+static GTY(()) rtx sh_fsca_sf2int_rtx;
+
+rtx
+sh_fsca_sf2int (void)
+{
+  if (! sh_fsca_sf2int_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "10430.378350470453");
+      sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
+    }
+
+  return sh_fsca_sf2int_rtx;
+}
+
+/* This function returns a constant rtx that represents pi / 2**15 in
+   SFmode.  It's used to scale SFmode angles, in radians, to a
+   fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
+   maps to 0x10000.  */
+static GTY(()) rtx sh_fsca_int2sf_rtx;
+
+rtx
+sh_fsca_int2sf (void)
+{
+  if (! sh_fsca_int2sf_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "9.587379924285257e-5");
+      sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
+    }
+
+  return sh_fsca_int2sf_rtx;
+}
+
+/* Initialize the CUMULATIVE_ARGS structure.  */
+void
+sh_init_cumulative_args (CUMULATIVE_ARGS *  pcum,
+			 tree		    fntype,
+			 rtx		    libname ATTRIBUTE_UNUSED,
+			 tree		    fndecl,
+			 signed int	    n_named_args,
+			 enum machine_mode  mode)
+{
+  pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
+  pcum->free_single_fp_reg = 0;
+  pcum->stack_regs = 0;
+  pcum->byref_regs = 0;
+  pcum->byref = 0;
+  pcum->outgoing = (n_named_args == -1) ? 0 : 1;
+
+  /* XXX - Should we check TARGET_HITACHI here ???  */
+  pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
+
+  if (fntype)
+    {
+      pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
+			 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
+      pcum->prototype_p = prototype_p (fntype);
+      pcum->arg_count [(int) SH_ARG_INT]
+	= TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
+
+      pcum->call_cookie
+	= CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
+				 && pcum->arg_count [(int) SH_ARG_INT] == 0
+				 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
+				     ? int_size_in_bytes (TREE_TYPE (fntype))
+				     : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
+				 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
+				     == FIRST_RET_REG));
+    }
+  else
+    {
+      pcum->arg_count [(int) SH_ARG_INT] = 0;
+      pcum->prototype_p = FALSE;
+      if (mode != VOIDmode)
+	{
+	  pcum->call_cookie =
+	    CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
+				   && GET_MODE_SIZE (mode) > 4
+				   && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
+
+	  /* If the default ABI is the Renesas ABI then all library
+	     calls must assume that the library will be using the
+	     Renesas ABI.  So if the function would return its result
+	     in memory then we must force the address of this memory
+	     block onto the stack.  Ideally we would like to call
+	     targetm.calls.return_in_memory() here but we do not have
+	     the TYPE or the FNDECL available so we synthesize the
+	     contents of that function as best we can.  */
+	  pcum->force_mem =
+	    (TARGET_DEFAULT & MASK_HITACHI)
+	    && (mode == BLKmode
+		|| (GET_MODE_SIZE (mode) > 4
+		    && !(mode == DFmode
+			 && TARGET_FPU_DOUBLE)));
+	}
+      else
+	{
+	  pcum->call_cookie = 0;
+	  pcum->force_mem = FALSE;
+	}
+    }
+}
+
+/* Replace any occurrence of FROM(n) in X with TO(n).  The function does
+   not enter into CONST_DOUBLE for the replace.
+
+   Note that copying is not done so X must not be shared unless all copies
+   are to be modified.
+
+   This is like replace_rtx, except that we operate on N_REPLACEMENTS
+   replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
+   replacements[n*2+1] - and that we take mode changes into account.
+
+   If a replacement is ambiguous, return NULL_RTX.
+
+   If MODIFY is zero, don't modify any rtl in place,
+   just return zero or nonzero for failure / success.  */
+rtx
+replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
+{
+  int i, j;
+  const char *fmt;
+
+  /* The following prevents loops occurrence when we change MEM in
+     CONST_DOUBLE onto the same CONST_DOUBLE.  */
+  if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
+    return x;
+
+  for (i = n_replacements - 1; i >= 0 ; i--)
+  if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
+    return replacements[i*2+1];
+
+  /* Allow this function to make replacements in EXPR_LISTs.  */
+  if (x == NULL_RTX)
+    return NULL_RTX;
+
+  if (GET_CODE (x) == SUBREG)
+    {
+      rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
+				    n_replacements, modify);
+
+      if (CONST_INT_P (new_rtx))
+	{
+	  x = simplify_subreg (GET_MODE (x), new_rtx,
+			       GET_MODE (SUBREG_REG (x)),
+			       SUBREG_BYTE (x));
+	  if (! x)
+	    abort ();
+	}
+      else if (modify)
+	SUBREG_REG (x) = new_rtx;
+
+      return x;
+    }
+  else if (REG_P (x))
+    {
+      unsigned regno = REGNO (x);
+      unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
+			? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
+      rtx result = NULL_RTX;
+
+      for (i = n_replacements - 1; i >= 0; i--)
+	{
+	  rtx from = replacements[i*2];
+	  rtx to = replacements[i*2+1];
+	  unsigned from_regno, from_nregs, to_regno, new_regno;
+
+	  if (!REG_P (from))
+	    continue;
+	  from_regno = REGNO (from);
+	  from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
+			? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
+	  if (regno < from_regno + from_nregs && regno + nregs > from_regno)
+	    {
+	      if (regno < from_regno
+		  || regno + nregs > from_regno + nregs
+		  || !REG_P (to)
+		  || result)
+		return NULL_RTX;
+	      to_regno = REGNO (to);
+	      if (to_regno < FIRST_PSEUDO_REGISTER)
+		{
+		  new_regno = regno + to_regno - from_regno;
+		  if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
+		      != nregs)
+		    return NULL_RTX;
+		  result = gen_rtx_REG (GET_MODE (x), new_regno);
+		}
+	      else if (GET_MODE (x) <= GET_MODE (to))
+		result = gen_lowpart_common (GET_MODE (x), to);
+	      else
+		result = gen_lowpart_SUBREG (GET_MODE (x), to);
+	    }
+	}
+      return result ? result : x;
+    }
+  else if (GET_CODE (x) == ZERO_EXTEND)
+    {
+      rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
+				    n_replacements, modify);
+
+      if (CONST_INT_P (new_rtx))
+	{
+	  x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
+					new_rtx, GET_MODE (XEXP (x, 0)));
+	  if (! x)
+	    abort ();
+	}
+      else if (modify)
+	XEXP (x, 0) = new_rtx;
+
+      return x;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      rtx new_rtx;
+
+      if (fmt[i] == 'e')
+	{
+	  new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
+				    n_replacements, modify);
+	  if (!new_rtx)
+	    return NULL_RTX;
+	  if (modify)
+	    XEXP (x, i) = new_rtx;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  {
+	    new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
+				      n_replacements, modify);
+	  if (!new_rtx)
+	    return NULL_RTX;
+	    if (modify)
+	      XVECEXP (x, i, j) = new_rtx;
+	  }
+    }
+
+  return x;
+}
+
+rtx
+sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
+{
+  enum rtx_code code = TRUNCATE;
+
+  if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
+    {
+      rtx inner = XEXP (x, 0);
+      enum machine_mode inner_mode = GET_MODE (inner);
+
+      if (inner_mode == mode)
+	return inner;
+      else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
+	x = inner;
+      else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
+	       && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
+	{
+	  code = GET_CODE (x);
+	  x = inner;
+	}
+    }
+  return gen_rtx_fmt_e (code, mode, x);
+}
+
+/* Called via for_each_rtx after reload, to clean up truncates of
+   registers that span multiple actual hard registers.  */
+int
+shmedia_cleanup_truncate (rtx *p, void *n_changes)
+{
+  rtx x = *p, reg;
+
+  if (GET_CODE (x) != TRUNCATE)
+    return 0;
+  reg = XEXP (x, 0);
+  if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
+    {
+      enum machine_mode reg_mode = GET_MODE (reg);
+      XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
+				     subreg_lowpart_offset (DImode, reg_mode));
+      *(int*) n_changes += 1;
+      return -1;
+    }
+  return 0;
+}
+
+/* Load and store depend on the highpart of the address.  However,
+   set_attr_alternative does not give well-defined results before reload,
+   so we must look at the rtl ourselves to see if any of the feeding
+   registers is used in a memref.
+
+   Called by sh_contains_memref_p via for_each_rtx.  */
+static int
+sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  return (MEM_P (*loc));
+}
+
+/* Return true iff INSN contains a MEM.  */
+bool
+sh_contains_memref_p (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
+}
+
+/* Return true iff INSN loads a banked register.  */
+bool
+sh_loads_bankedreg_p (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx op = SET_DEST (PATTERN(insn));
+      if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
+	return true;
+    }
+
+  return false;
+}
+
+/* FNADDR is the MEM expression from a call expander.  Return an address
+   to use in an SHmedia insn pattern.  */
+rtx
+shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
+{
+  int is_sym;
+
+  fnaddr = XEXP (fnaddr, 0);
+  is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
+  if (flag_pic && is_sym)
+    {
+      if (! SYMBOL_REF_LOCAL_P (fnaddr))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+
+	  /* We must not use GOTPLT for sibcalls, because PIC_REG
+	     must be restored before the PLT code gets to run.  */
+	  if (is_sibcall)
+	    emit_insn (gen_symGOT2reg (reg, fnaddr));
+	  else
+	    emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
+	  fnaddr = reg;
+	}
+      else
+	{
+	  fnaddr = gen_sym2PIC (fnaddr);
+	  PUT_MODE (fnaddr, Pmode);
+	}
+    }
+  /* If ptabs might trap, make this visible to the rest of the compiler.
+     We generally assume that symbols pertain to valid locations, but
+     it is possible to generate invalid symbols with asm or linker tricks.
+     In a list of functions where each returns its successor, an invalid
+     symbol might denote an empty list.  */
+  if (!TARGET_PT_FIXED
+      && (!is_sym || TARGET_INVALID_SYMBOLS)
+      && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
+    {
+      rtx tr = gen_reg_rtx (PDImode);
+
+      emit_insn (gen_ptabs (tr, fnaddr));
+      fnaddr = tr;
+    }
+  else if (! target_reg_operand (fnaddr, Pmode))
+    fnaddr = copy_to_mode_reg (Pmode, fnaddr);
+  return fnaddr;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+static reg_class_t
+sh_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == NO_REGS
+      && TARGET_SHMEDIA
+      && (CONST_DOUBLE_P (x)
+	  || GET_CODE (x) == SYMBOL_REF
+	  || PIC_ADDR_P (x)))
+    return GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+static reg_class_t
+sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		     enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
+    return rclass == R0_REGS ? NO_REGS : R0_REGS;
+
+  if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
+    return rclass == R0_REGS ? NO_REGS : R0_REGS;
+
+  if (REG_P (x) && REGNO (x) == GBR_REG)
+    return NO_REGS;
+
+  if (in_p)
+    {
+      if (REGCLASS_HAS_FP_REG (rclass)
+	  && ! TARGET_SHMEDIA
+	  && immediate_operand ((x), mode)
+	  && ! ((fp_zero_operand (x) || fp_one_operand (x))
+		&& mode == SFmode && fldi_ok ()))
+	switch (mode)
+	  {
+	  case SFmode:
+	    sri->icode = CODE_FOR_reload_insf__frn;
+	    return NO_REGS;
+	  case DFmode:
+	    sri->icode = CODE_FOR_reload_indf__frn;
+	    return NO_REGS;
+	  case SImode:
+	    /* ??? If we knew that we are in the appropriate mode -
+	       single precision - we could use a reload pattern directly.  */
+	    return FPUL_REGS;
+	  default:
+	    abort ();
+	  }
+      if (rclass == FPUL_REGS
+	  && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
+			     || REGNO (x) == T_REG))
+	      || GET_CODE (x) == PLUS))
+	return GENERAL_REGS;
+      if (rclass == FPUL_REGS && immediate_operand (x, mode))
+	{
+	  if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
+	    return GENERAL_REGS;
+	  else if (mode == SFmode)
+	    return FP_REGS;
+	  sri->icode = CODE_FOR_reload_insi__i_fpul;
+	  return NO_REGS;
+	}
+      if (rclass == FPSCR_REGS
+	  && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
+	      || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
+        return GENERAL_REGS;
+      if (REGCLASS_HAS_FP_REG (rclass)
+	  && TARGET_SHMEDIA
+	  && immediate_operand (x, mode)
+	  && x != CONST0_RTX (GET_MODE (x))
+	  && GET_MODE (x) != V4SFmode)
+	return GENERAL_REGS;
+      if ((mode == QImode || mode == HImode)
+	  && TARGET_SHMEDIA && inqhi_operand (x, mode))
+	{
+	  sri->icode = ((mode == QImode)
+			? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
+	  return NO_REGS;
+	}
+      if (TARGET_SHMEDIA && rclass == GENERAL_REGS
+	  && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
+	return TARGET_REGS;
+    } /* end of input-only processing.  */
+
+  if (((REGCLASS_HAS_FP_REG (rclass)
+	&& (REG_P (x)
+	    && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
+		|| (FP_REGISTER_P (REGNO (x)) && mode == SImode
+		    && TARGET_FMOVD))))
+       || (REGCLASS_HAS_GENERAL_REG (rclass)
+	   && REG_P (x)
+	   && FP_REGISTER_P (REGNO (x))))
+      && ! TARGET_SHMEDIA
+      && (mode == SFmode || mode == SImode))
+    return FPUL_REGS;
+  if ((rclass == FPUL_REGS
+       || (REGCLASS_HAS_FP_REG (rclass)
+	   && ! TARGET_SHMEDIA && mode == SImode))
+      && (MEM_P (x)
+	  || (REG_P (x)
+	      && (REGNO (x) >= FIRST_PSEUDO_REGISTER
+		  || REGNO (x) == T_REG
+		  || system_reg_operand (x, VOIDmode)))))
+    {
+      if (rclass == FPUL_REGS)
+	return GENERAL_REGS;
+      return FPUL_REGS;
+    }
+  if ((rclass == TARGET_REGS
+       || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
+      && !satisfies_constraint_Csy (x)
+      && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
+    return GENERAL_REGS;
+  if ((rclass == MAC_REGS || rclass == PR_REGS)
+      && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
+      && rclass != REGNO_REG_CLASS (REGNO (x)))
+    return GENERAL_REGS;
+  if (rclass != GENERAL_REGS && REG_P (x)
+      && TARGET_REGISTER_P (REGNO (x)))
+    return GENERAL_REGS;
+
+ /* If here fall back to loading FPUL register through general registers.
+    This case can happen when movsi_ie insn is picked initially to
+    load/store the FPUL register from/to another register, and then the
+    other register is allocated on the stack.  */
+  if (rclass == FPUL_REGS && true_regnum (x) == -1)
+    return GENERAL_REGS;
+
+  /* Force mov.b / mov.w displacement addressing insn to use R0 as
+     the other operand.
+     On SH2A could also just leave it alone here, which would result in a
+     4 byte move insn being generated instead.  However, for this to work
+     the insns must have the appropriate alternatives.  */
+  if ((mode == QImode || mode == HImode) && rclass != R0_REGS
+      && satisfies_constraint_Sdd (x)
+      && sh_disp_addr_displacement (x)
+	 <= sh_max_mov_insn_displacement (mode, false))
+    return R0_REGS;
+
+  /* When reload is trying to address a QImode or HImode subreg on the stack, 
+     force any subreg byte into R0_REGS, as this is going to become a
+     displacement address.
+     We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
+     is on the stack, the memref to it might already require a displacement
+     and that has to be added to the final address.  At this point we don't
+     know the cumulative displacement so we assume the worst case.  */
+  if ((mode == QImode || mode == HImode) && rclass != R0_REGS 
+      && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
+    return R0_REGS;
+
+  return NO_REGS;
+}
+
+static void
+sh_conditional_register_usage (void)
+{
+  int regno;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
+    if (! VALID_REGISTER_P (regno))
+      fixed_regs[regno] = call_used_regs[regno] = 1;
+  /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.  */
+  if (TARGET_SH5)
+    {
+      call_used_regs[FIRST_GENERAL_REG + 8]
+	= call_used_regs[FIRST_GENERAL_REG + 9] = 1;
+      call_really_used_regs[FIRST_GENERAL_REG + 8]
+	= call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
+    }
+  if (TARGET_SHMEDIA)
+    {
+      regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
+      CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
+      regno_reg_class[FIRST_FP_REG] = FP_REGS;
+    }
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  /* Renesas saves and restores mac registers on call.  */
+  if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
+    {
+      call_really_used_regs[MACH_REG] = 0;
+      call_really_used_regs[MACL_REG] = 0;
+    }
+
+  if (TARGET_SHMEDIA)
+    {
+      for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
+	if (! fixed_regs[regno] && call_really_used_regs[regno])
+	  SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+    }
+  else
+    for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
+      if (! fixed_regs[regno] && call_really_used_regs[regno])
+	SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P
+
+   can_store_by_pieces constructs VOIDmode CONST_DOUBLEs.  */
+static bool
+sh_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  return (TARGET_SHMEDIA
+	  ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+	     || x == CONST0_RTX (mode)
+	     || !TARGET_SHMEDIA_FPU
+	     || TARGET_SHMEDIA64)
+	  : (GET_CODE (x) != CONST_DOUBLE
+	     || mode == DFmode || mode == SFmode
+	     || mode == DImode || GET_MODE (x) == VOIDmode));
+}
+
+enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
+
+static void
+sh_init_sync_libfuncs (void)
+{
+  init_sync_libfuncs (UNITS_PER_WORD);
+}
+
+/* Return true if it is appropriate to emit `ret' instructions in the
+   body of a function.  */
+bool
+sh_can_use_simple_return_p (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+
+  /* Some targets require special return insns.  */
+  if (TARGET_SHMEDIA
+      || (TARGET_SHCOMPACT
+	  && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
+    return false;
+
+  if (! reload_completed || frame_pointer_needed)
+    return false;
+
+  /* Moving prologue around does't reduce the size.  */
+  if (optimize_function_for_size_p (cfun))
+    return false;
+
+  /* Finally, allow for pr save.  */
+  d = calc_live_regs (&live_regs_mask);
+
+  if (rounded_frame_size (d) > 4)
+   return false;
+
+  return true;
+}
+
+/*------------------------------------------------------------------------------
+  Address mode optimization support code
+*/
+
+typedef HOST_WIDE_INT disp_t;
+static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
+static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
+static const disp_t INVALID_DISP = MAX_DISP;
+
+/* A memory reference which is described by a base register and a
+   displacement.  */
+class base_reg_disp
+{
+public:
+  base_reg_disp (rtx br, disp_t d);
+
+  bool is_reg (void) const;
+  bool is_disp (void) const;
+  rtx reg (void) const;
+  disp_t disp (void) const;
+
+private:
+  rtx reg_;
+  disp_t disp_;
+};
+
+inline
+base_reg_disp::base_reg_disp (rtx br, disp_t d)
+: reg_ (br), disp_ (d)
+{
+}
+ 
+inline bool
+base_reg_disp::is_reg (void) const
+{
+  return reg_ != NULL_RTX && disp_ != INVALID_DISP;
+}
+
+inline bool
+base_reg_disp::is_disp (void) const
+{
+  return reg_ == NULL_RTX && disp_ != INVALID_DISP;
+}
+
+inline rtx
+base_reg_disp::reg (void) const
+{
+  return reg_;
+}
+
+inline disp_t
+base_reg_disp::disp (void) const
+{
+  return disp_;
+}
+
+/* Find the base register and calculate the displacement for a given
+   address rtx 'x'.
+   This is done by walking the insn list backwards and following SET insns
+   that set the value of the specified reg 'x'.  */
+static base_reg_disp
+sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
+{
+  if (REG_P (x))
+    {
+      if (REGNO (x) == GBR_REG)
+	return base_reg_disp (x, disp);
+
+      /* We've reached a hard-reg.  This is probably the point where
+	 function args are copied to pseudos.  Do not go any further and
+	 stick to the pseudo.  If the original mem addr was in a hard reg
+	 from the beginning, it will become the base reg.  */
+      if (REGNO (x) < FIRST_PSEUDO_REGISTER)
+	return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
+
+      /* Try to find the previous insn that sets the reg.  */
+      for (rtx i = prev_nonnote_insn (insn); i != NULL;
+	   i = prev_nonnote_insn (i))
+	{
+	  if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
+	      && CALL_P (i))
+	    break;
+
+	  if (!NONJUMP_INSN_P (i))
+	    continue;
+
+	  rtx p = PATTERN (i);
+	  if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
+	      && REGNO (XEXP (p, 0)) == REGNO (x))
+	    {
+	      /* If the recursion can't find out any more details about the
+		 source of the set, then this reg becomes our new base reg.  */
+	      return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
+	    }
+	}
+
+    /* When here, no previous insn was found that sets the reg.
+       The input reg is already the base reg.  */
+    return base_reg_disp (x, disp);
+  }
+
+  else if (GET_CODE (x) == PLUS)
+    {
+      base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
+      base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
+
+      /* Either left or right val must be a reg.
+	 We don't handle the case of 'reg + reg' here.  */
+      if (left_val.is_reg () && right_val.is_disp ())
+	return base_reg_disp (left_val.reg (), left_val.disp ()
+					       + right_val.disp () + disp);
+      else if (right_val.is_reg () && left_val.is_disp ())
+	return base_reg_disp (right_val.reg (), right_val.disp ()
+						+ left_val.disp () + disp);
+      else
+	return base_reg_disp (base_reg, disp);
+    }
+
+  else if (CONST_INT_P (x))
+    return base_reg_disp (NULL, disp + INTVAL (x));
+
+  /* Didn't find anything useful.  */
+  return base_reg_disp (base_reg, disp);
+}
+
+/* Given an insn and a memory operand, try to find an equivalent GBR
+   based memory address and return the corresponding new memory address.
+   Return NULL_RTX if not found.  */
+rtx
+sh_find_equiv_gbr_addr (rtx insn, rtx mem)
+{
+  if (!MEM_P (mem))
+    return NULL_RTX;
+
+  /* Leave post/pre inc/dec or any other side effect addresses alone.  */
+  if (side_effects_p (XEXP (mem, 0)))
+    return NULL_RTX;
+
+  base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
+
+  if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
+    {
+      rtx disp = GEN_INT (gbr_disp.disp ());
+      if (gbr_displacement (disp, GET_MODE (mem)))
+	return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
+    }
+
+  return NULL_RTX;
+}
+
+/*------------------------------------------------------------------------------
+  Manual insn combine support code.
+*/
+
+/* Given a reg rtx and a start insn, try to find the insn that sets the
+   specified reg by using the specified insn stepping function, such as 
+   'prev_nonnote_insn_bb'.  When the insn is found, try to extract the rtx
+   of the reg set.  */
+set_of_reg
+sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
+{
+  set_of_reg result;
+  result.insn = insn;
+  result.set_rtx = NULL_RTX;
+  result.set_src = NULL_RTX;
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
+       result.insn = stepfunc (result.insn))
+    {
+      if (BARRIER_P (result.insn))
+	return result;
+      if (!NONJUMP_INSN_P (result.insn))
+	continue;
+      if (reg_set_p (reg, result.insn))
+	{
+	  result.set_rtx = set_of (reg, result.insn);
+
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    return result;
+
+	  result.set_src = XEXP (result.set_rtx, 1);
+	  return result;
+	}
+    }
+
+  return result;
+}
+
+/* Given an op rtx and an insn, try to find out whether the result of the
+   specified op consists only of logical operations on T bit stores.  */
+bool
+sh_is_logical_t_store_expr (rtx op, rtx insn)
+{
+  if (!logical_operator (op, SImode))
+    return false;
+
+  rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
+  int op_is_t_count = 0;
+
+  for (int i = 0; i < 2; ++i)
+    {
+      if (t_reg_operand (ops[i], VOIDmode)
+	  || negt_reg_operand (ops[i], VOIDmode))
+	op_is_t_count++;
+
+      else
+	{
+	  set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
+						  prev_nonnote_insn_bb);
+	  if (op_set.set_src == NULL_RTX)
+	    continue;
+
+	  if (t_reg_operand (op_set.set_src, VOIDmode)
+	      || negt_reg_operand (op_set.set_src, VOIDmode)
+	      || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
+	      op_is_t_count++;
+	}
+    }
+  
+  return op_is_t_count == 2;
+}
+
+/* Given the operand that is extended in a sign/zero extend insn, and the
+   insn, try to figure out whether the sign/zero extension can be replaced
+   by a simple reg-reg copy.  If so, the replacement reg rtx is returned,
+   NULL_RTX otherwise.  */
+rtx
+sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
+{
+  if (REG_P (extended_op))
+    extended_op = extended_op;
+  else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
+    extended_op = SUBREG_REG (extended_op);
+  else
+    return NULL_RTX;
+
+  /* Reg moves must be of the same mode.  */
+  if (GET_MODE (extended_op) != SImode)
+    return NULL_RTX;
+
+  set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
+  if (s.set_src == NULL_RTX)
+    return NULL_RTX;
+
+  if (t_reg_operand (s.set_src, VOIDmode)
+      || negt_reg_operand (s.set_src, VOIDmode))
+    return extended_op;
+
+  /* If the zero extended reg was formed by a logical operation, check the
+     operands of the logical operation.  If both originated from T bit
+     stores the zero extension can be eliminated.  */
+  else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
+    return extended_op;
+
+  return NULL_RTX;
+}
+
+#include "gt-sh.h"
diff --git a/gcc-4.9/gcc/config/sh/sh.h b/gcc-4.9/gcc/config/sh/sh.h
new file mode 100644
index 000000000..881930011
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.h
@@ -0,0 +1,2311 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SH_H
+#define GCC_SH_H
+
+#include "config/vxworks-dummy.h"
+
+/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h.  We can't
+   include it here, because bconfig.h is also included by gencodes.c .  */
+/* ??? No longer true.  */
+extern int code_for_indirect_jump_scratch;
+
+#define TARGET_CPU_CPP_BUILTINS() sh_cpu_cpp_builtins (pfile)
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may be accessed
+   via the stack pointer) in functions that seem suitable.  */
+
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+
+/* Nonzero if this is an ELF target - compile time only */
+#define TARGET_ELF 0
+
+/* Nonzero if we should generate code using type 2E insns.  */
+#define TARGET_SH2E (TARGET_SH2 && TARGET_SH_E)
+
+/* Nonzero if we should generate code using type 2A insns.  */
+#define TARGET_SH2A TARGET_HARD_SH2A
+/* Nonzero if we should generate code using type 2A SF insns.  */
+#define TARGET_SH2A_SINGLE (TARGET_SH2A && TARGET_SH2E)
+/* Nonzero if we should generate code using type 2A DF insns.  */
+#define TARGET_SH2A_DOUBLE (TARGET_HARD_SH2A_DOUBLE && TARGET_SH2A)
+
+/* Nonzero if we should generate code using type 3E insns.  */
+#define TARGET_SH3E (TARGET_SH3 && TARGET_SH_E)
+
+/* Nonzero if we schedule for a superscalar implementation.  */
+#define TARGET_SUPERSCALAR (TARGET_HARD_SH4 || TARGET_SH2A)
+
+/* Nonzero if a double-precision FPU is available.  */
+#define TARGET_FPU_DOUBLE \
+  ((target_flags & MASK_SH4) != 0 || TARGET_SH2A_DOUBLE)
+
+/* Nonzero if an FPU is available.  */
+#define TARGET_FPU_ANY (TARGET_SH2E || TARGET_FPU_DOUBLE)
+
+/* Nonzero if we should generate code using type 4 insns.  */
+#undef TARGET_SH4
+#define TARGET_SH4 ((target_flags & MASK_SH4) != 0 && TARGET_SH1)
+
+/* Nonzero if we're generating code for the common subset of
+   instructions present on both SH4a and SH4al-dsp.  */
+#define TARGET_SH4A_ARCH TARGET_SH4A
+
+/* Nonzero if we're generating code for SH4a, unless the use of the
+   FPU is disabled (which makes it compatible with SH4al-dsp).  */
+#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY)
+
+/* Nonzero if we should generate code using the SHcompact instruction
+   set and 32-bit ABI.  */
+#define TARGET_SHCOMPACT (TARGET_SH5 && TARGET_SH1)
+
+/* Nonzero if we should generate code using the SHmedia instruction
+   set and ABI.  */
+#define TARGET_SHMEDIA (TARGET_SH5 && ! TARGET_SH1)
+
+/* Nonzero if we should generate code using the SHmedia ISA and 32-bit
+   ABI.  */
+#define TARGET_SHMEDIA32 (TARGET_SH5 && ! TARGET_SH1 && TARGET_SH_E)
+
+/* Nonzero if we should generate code using the SHmedia ISA and 64-bit
+   ABI.  */
+#define TARGET_SHMEDIA64 (TARGET_SH5 && ! TARGET_SH1 && ! TARGET_SH_E)
+
+/* Nonzero if we should generate code using SHmedia FPU instructions.  */
+#define TARGET_SHMEDIA_FPU (TARGET_SHMEDIA && TARGET_FPU_DOUBLE)
+
+/* This is not used by the SH2E calling convention  */
+#define TARGET_VARARGS_PRETEND_ARGS(FUN_DECL) \
+  (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 \
+   && ! (TARGET_HITACHI || sh_attr_renesas_p (FUN_DECL)))
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT SELECT_SH1
+#define SUPPORT_SH1 1
+#define SUPPORT_SH2E 1
+#define SUPPORT_SH4 1
+#define SUPPORT_SH4_SINGLE 1
+#define SUPPORT_SH2A 1
+#define SUPPORT_SH2A_SINGLE 1
+#endif
+
+#define TARGET_DIVIDE_INV \
+  (sh_div_strategy == SH_DIV_INV || sh_div_strategy == SH_DIV_INV_MINLAT \
+   || sh_div_strategy == SH_DIV_INV20U || sh_div_strategy == SH_DIV_INV20L \
+   || sh_div_strategy == SH_DIV_INV_CALL \
+   || sh_div_strategy == SH_DIV_INV_CALL2 || sh_div_strategy == SH_DIV_INV_FP)
+#define TARGET_DIVIDE_FP (sh_div_strategy == SH_DIV_FP)
+#define TARGET_DIVIDE_INV_FP (sh_div_strategy == SH_DIV_INV_FP)
+#define TARGET_DIVIDE_CALL2 (sh_div_strategy == SH_DIV_CALL2)
+#define TARGET_DIVIDE_INV_MINLAT (sh_div_strategy == SH_DIV_INV_MINLAT)
+#define TARGET_DIVIDE_INV20U (sh_div_strategy == SH_DIV_INV20U)
+#define TARGET_DIVIDE_INV20L (sh_div_strategy == SH_DIV_INV20L)
+#define TARGET_DIVIDE_INV_CALL (sh_div_strategy == SH_DIV_INV_CALL)
+#define TARGET_DIVIDE_INV_CALL2 (sh_div_strategy == SH_DIV_INV_CALL2)
+#define TARGET_DIVIDE_CALL_DIV1 (sh_div_strategy == SH_DIV_CALL_DIV1)
+#define TARGET_DIVIDE_CALL_FP (sh_div_strategy == SH_DIV_CALL_FP)
+#define TARGET_DIVIDE_CALL_TABLE (sh_div_strategy == SH_DIV_CALL_TABLE)
+
+#define SELECT_SH1		 (MASK_SH1)
+#define SELECT_SH2		 (MASK_SH2 | SELECT_SH1)
+#define SELECT_SH2E		 (MASK_SH_E | MASK_SH2 | MASK_SH1 \
+				  | MASK_FPU_SINGLE)
+#define SELECT_SH2A		 (MASK_SH_E | MASK_HARD_SH2A \
+				  | MASK_HARD_SH2A_DOUBLE \
+				  | MASK_SH2 | MASK_SH1)
+#define SELECT_SH2A_NOFPU	 (MASK_HARD_SH2A | MASK_SH2 | MASK_SH1)
+#define SELECT_SH2A_SINGLE_ONLY  (MASK_SH_E | MASK_HARD_SH2A | MASK_SH2 \
+				  | MASK_SH1 | MASK_FPU_SINGLE \
+				  | MASK_FPU_SINGLE_ONLY)
+#define SELECT_SH2A_SINGLE	 (MASK_SH_E | MASK_HARD_SH2A \
+				  | MASK_FPU_SINGLE | MASK_HARD_SH2A_DOUBLE \
+				  | MASK_SH2 | MASK_SH1)
+#define SELECT_SH3		 (MASK_SH3 | SELECT_SH2)
+#define SELECT_SH3E		 (MASK_SH_E | MASK_FPU_SINGLE | SELECT_SH3)
+#define SELECT_SH4_NOFPU	 (MASK_HARD_SH4 | SELECT_SH3)
+#define SELECT_SH4_SINGLE_ONLY	 (MASK_HARD_SH4 | SELECT_SH3E \
+				  | MASK_FPU_SINGLE_ONLY)
+#define SELECT_SH4		 (MASK_SH4 | MASK_SH_E | MASK_HARD_SH4 \
+				  | SELECT_SH3)
+#define SELECT_SH4_SINGLE	 (MASK_FPU_SINGLE | SELECT_SH4)
+#define SELECT_SH4A_NOFPU	 (MASK_SH4A | SELECT_SH4_NOFPU)
+#define SELECT_SH4A_SINGLE_ONLY  (MASK_SH4A | SELECT_SH4_SINGLE_ONLY)
+#define SELECT_SH4A		 (MASK_SH4A | SELECT_SH4)
+#define SELECT_SH4A_SINGLE	 (MASK_SH4A | SELECT_SH4_SINGLE)
+#define SELECT_SH5_64MEDIA	 (MASK_SH5 | MASK_SH4)
+#define SELECT_SH5_64MEDIA_NOFPU (MASK_SH5)
+#define SELECT_SH5_32MEDIA	 (MASK_SH5 | MASK_SH4 | MASK_SH_E)
+#define SELECT_SH5_32MEDIA_NOFPU (MASK_SH5 | MASK_SH_E)
+#define SELECT_SH5_COMPACT	 (MASK_SH5 | MASK_SH4 | SELECT_SH3E)
+#define SELECT_SH5_COMPACT_NOFPU (MASK_SH5 | SELECT_SH3)
+
+#if SUPPORT_SH1
+#define SUPPORT_SH2 1
+#endif
+#if SUPPORT_SH2
+#define SUPPORT_SH3 1
+#define SUPPORT_SH2A_NOFPU 1
+#endif
+#if SUPPORT_SH3
+#define SUPPORT_SH4_NOFPU 1
+#endif
+#if SUPPORT_SH4_NOFPU
+#define SUPPORT_SH4A_NOFPU 1
+#define SUPPORT_SH4AL 1
+#endif
+
+#if SUPPORT_SH2E
+#define SUPPORT_SH3E 1
+#define SUPPORT_SH2A_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH3E
+#define SUPPORT_SH4_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH4_SINGLE_ONLY
+#define SUPPORT_SH4A_SINGLE_ONLY 1
+#endif
+
+#if SUPPORT_SH4
+#define SUPPORT_SH4A 1
+#endif
+
+#if SUPPORT_SH4_SINGLE
+#define SUPPORT_SH4A_SINGLE 1
+#endif
+
+#if SUPPORT_SH5_COMPAT
+#define SUPPORT_SH5_32MEDIA 1
+#endif
+
+#if SUPPORT_SH5_COMPACT_NOFPU
+#define SUPPORT_SH5_32MEDIA_NOFPU 1
+#endif
+
+#define SUPPORT_ANY_SH5_32MEDIA \
+  (SUPPORT_SH5_32MEDIA || SUPPORT_SH5_32MEDIA_NOFPU)
+#define SUPPORT_ANY_SH5_64MEDIA \
+  (SUPPORT_SH5_64MEDIA || SUPPORT_SH5_64MEDIA_NOFPU)
+#define SUPPORT_ANY_SH5 \
+  (SUPPORT_ANY_SH5_32MEDIA || SUPPORT_ANY_SH5_64MEDIA)
+
+/* Reset all target-selection flags.  */
+#define MASK_ARCH (MASK_SH1 | MASK_SH2 | MASK_SH3 | MASK_SH_E | MASK_SH4 \
+		   | MASK_HARD_SH2A | MASK_HARD_SH2A_DOUBLE | MASK_SH4A \
+		   | MASK_HARD_SH4 | MASK_FPU_SINGLE | MASK_SH5 \
+		   | MASK_FPU_SINGLE_ONLY)
+
+/* This defaults us to big-endian.  */
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+#ifndef TARGET_OPT_DEFAULT
+#define TARGET_OPT_DEFAULT  0
+#endif
+
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT | TARGET_OPT_DEFAULT)
+
+#ifndef SH_MULTILIB_CPU_DEFAULT
+#define SH_MULTILIB_CPU_DEFAULT "m1"
+#endif
+
+#if TARGET_ENDIAN_DEFAULT
+#define MULTILIB_DEFAULTS { "ml", SH_MULTILIB_CPU_DEFAULT }
+#else
+#define MULTILIB_DEFAULTS { "mb", SH_MULTILIB_CPU_DEFAULT }
+#endif
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS						\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },			\
+  { "link_emul_prefix", LINK_EMUL_PREFIX },			\
+  { "link_default_cpu_emul", LINK_DEFAULT_CPU_EMUL },		\
+  { "subtarget_link_emul_suffix", SUBTARGET_LINK_EMUL_SUFFIX },	\
+  { "subtarget_link_spec", SUBTARGET_LINK_SPEC },		\
+  { "subtarget_asm_endian_spec", SUBTARGET_ASM_ENDIAN_SPEC },	\
+  { "subtarget_asm_relax_spec", SUBTARGET_ASM_RELAX_SPEC },	\
+  { "subtarget_asm_isa_spec", SUBTARGET_ASM_ISA_SPEC },		\
+  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH4
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m1:%{!m2:%{!m3*:%{!m5*:-isa=sh4-up}}}}"
+#else
+#define SUBTARGET_ASM_RELAX_SPEC "%{m4*:-isa=sh4-up}"
+#endif
+
+#define SH_ASM_SPEC \
+ "%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)} \
+%(subtarget_asm_isa_spec) %(subtarget_asm_spec) \
+%{m2a:--isa=sh2a} \
+%{m2a-single:--isa=sh2a} \
+%{m2a-single-only:--isa=sh2a} \
+%{m2a-nofpu:--isa=sh2a-nofpu} \
+%{m5-compact*:--isa=SHcompact} \
+%{m5-32media*:--isa=SHmedia --abi=32} \
+%{m5-64media*:--isa=SHmedia --abi=64} \
+%{m4al:-dsp} %{mcut2-workaround:-cut2-workaround}"
+
+#define ASM_SPEC SH_ASM_SPEC
+
+#ifndef SUBTARGET_ASM_ENDIAN_SPEC
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define SUBTARGET_ASM_ENDIAN_SPEC "%{mb:-big} %{!mb:-little}"
+#else
+#define SUBTARGET_ASM_ENDIAN_SPEC "%{ml:-little} %{!ml:-big}"
+#endif
+#endif
+
+#if STRICT_NOFPU == 1
+/* Strict nofpu means that the compiler should tell the assembler
+   to reject FPU instructions. E.g. from ASM inserts.  */
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH4 && !(TARGET_CPU_DEFAULT & MASK_SH_E)
+#define SUBTARGET_ASM_ISA_SPEC "%{!m1:%{!m2:%{!m3*:%{m4-nofpu|!m4*:%{!m5:-isa=sh4-nofpu}}}}}"
+#else
+/* If there were an -isa option for sh5-nofpu then it would also go here. */
+#define SUBTARGET_ASM_ISA_SPEC \
+ "%{m4-nofpu:-isa=sh4-nofpu} " ASM_ISA_DEFAULT_SPEC
+#endif
+#else /* ! STRICT_NOFPU */
+#define SUBTARGET_ASM_ISA_SPEC ASM_ISA_DEFAULT_SPEC
+#endif
+
+#ifndef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC ""
+#endif
+
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define LINK_EMUL_PREFIX "sh%{!mb:l}"
+#else
+#define LINK_EMUL_PREFIX "sh%{ml:l}"
+#endif
+
+#if TARGET_CPU_DEFAULT & MASK_SH5
+#if TARGET_CPU_DEFAULT & MASK_SH_E
+#define LINK_DEFAULT_CPU_EMUL "32"
+#if TARGET_CPU_DEFAULT & MASK_SH1
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHcompact"
+#else
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=32"
+#endif /* MASK_SH1 */
+#else /* !MASK_SH_E */
+#define LINK_DEFAULT_CPU_EMUL "64"
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=64"
+#endif /* MASK_SH_E */
+#define ASM_ISA_DEFAULT_SPEC \
+" %{!m1:%{!m2*:%{!m3*:%{!m4*:%{!m5*:" ASM_ISA_SPEC_DEFAULT "}}}}}"
+#else /* !MASK_SH5 */
+#define LINK_DEFAULT_CPU_EMUL ""
+#define ASM_ISA_DEFAULT_SPEC ""
+#endif /* MASK_SH5 */
+
+#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_SPEC ""
+
+/* Go via SH_LINK_SPEC to avoid code replication.  */
+#define LINK_SPEC SH_LINK_SPEC
+
+#define SH_LINK_SPEC "\
+-m %(link_emul_prefix)\
+%{m5-compact*|m5-32media*:32}\
+%{m5-64media*:64}\
+%{!m1:%{!m2:%{!m3*:%{!m4*:%{!m5*:%(link_default_cpu_emul)}}}}}\
+%(subtarget_link_emul_suffix) \
+%{mrelax:-relax} %(subtarget_link_spec)"
+
+#ifndef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call"
+#endif
+
+/* SH2A does not support little-endian.  Catch such combinations
+   taking into account the default configuration.  */
+#if TARGET_ENDIAN_DEFAULT == MASK_BIG_ENDIAN
+#define IS_LITTLE_ENDIAN_OPTION "%{ml:"
+#else
+#define IS_LITTLE_ENDIAN_OPTION "%{!mb:"
+#endif
+ 
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH2A
+#define UNSUPPORTED_SH2A IS_LITTLE_ENDIAN_OPTION \
+"%{m2a*|!m1:%{!m2*:%{!m3*:%{!m4*:{!m5*:%eSH2a does not support little-endian}}}}}}"
+#else
+#define UNSUPPORTED_SH2A IS_LITTLE_ENDIAN_OPTION \
+"%{m2a*:%eSH2a does not support little-endian}}"
+#endif
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+
+#define ASSEMBLER_DIALECT assembler_dialect
+
+extern int assembler_dialect;
+
+enum sh_divide_strategy_e {
+  /* SH5 strategies.  */
+  SH_DIV_CALL,
+  SH_DIV_CALL2,
+  SH_DIV_FP, /* We could do this also for SH4.  */
+  SH_DIV_INV,
+  SH_DIV_INV_MINLAT,
+  SH_DIV_INV20U,
+  SH_DIV_INV20L,
+  SH_DIV_INV_CALL,
+  SH_DIV_INV_CALL2,
+  SH_DIV_INV_FP,
+  /* SH1 .. SH4 strategies.  Because of the small number of registers
+     available, the compiler uses knowledge of the actual set of registers
+     being clobbered by the different functions called.  */
+  SH_DIV_CALL_DIV1, /* No FPU, medium size, highest latency.  */
+  SH_DIV_CALL_FP,     /* FPU needed, small size, high latency.  */
+  SH_DIV_CALL_TABLE,  /* No FPU, large size, medium latency. */
+  SH_DIV_INTRINSIC
+};
+
+extern enum sh_divide_strategy_e sh_div_strategy;
+
+#ifndef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL
+#endif
+
+#define SUBTARGET_OVERRIDE_OPTIONS (void) 0
+
+
+/* Target machine storage layout.  */
+
+#define TARGET_BIG_ENDIAN (!TARGET_LITTLE_ENDIAN)
+
+#define SH_REG_MSW_OFFSET (TARGET_LITTLE_ENDIAN ? 1 : 0)
+#define SH_REG_LSW_OFFSET (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN TARGET_BIG_ENDIAN
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN TARGET_BIG_ENDIAN
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width in bits of an `int'.  We want just 32-bits, even if words are
+   longer.  */
+#define INT_TYPE_SIZE 32
+
+/* Width in bits of a `long'.  */
+#define LONG_TYPE_SIZE (TARGET_SHMEDIA64 ? 64 : 32)
+
+/* Width in bits of a `long long'.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* Width in bits of a `long double'.  */
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD	(TARGET_SHMEDIA ? 8 : 4)
+#define MIN_UNITS_PER_WORD 4
+
+/* Scaling factor for Dwarf data offsets for CFI information.
+   The dwarf2out.c default would use -UNITS_PER_WORD, which is -8 for
+   SHmedia; however, since we do partial register saves for the registers
+   visible to SHcompact, and for target registers for SHMEDIA32, we have
+   to allow saves that are only 4-byte aligned.  */
+#define DWARF_CIE_DATA_ALIGNMENT -4
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE  (TARGET_SHMEDIA64 ? 64 : 32)
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY  	(TARGET_SH5 ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY  BIGGEST_ALIGNMENT
+
+/* The log (base 2) of the cache line size, in bytes.  Processors prior to
+   SH2 have no actual cache, but they fetch code in chunks of 4 bytes.
+   The SH2/3 have 16 byte cache lines, and the SH4 has a 32 byte cache line */
+#define CACHE_LOG ((TARGET_HARD_SH4 || TARGET_SH5) ? 5 : TARGET_SH2 ? 4 : 2)
+
+/* ABI given & required minimum allocation boundary (in *bits*) for the
+   code of a function.  */
+#define FUNCTION_BOUNDARY (16 << TARGET_SHMEDIA)
+
+/* On SH5, the lowest bit is used to indicate SHmedia functions, so
+   the vbit must go into the delta field of
+   pointers-to-member-functions.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION \
+  (TARGET_SH5 ? ptrmemfunc_vbit_in_delta : ptrmemfunc_vbit_in_pfn)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT  (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT (TARGET_SH5 ? 64 : 32)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)	\
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+    ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* get_mode_alignment assumes complex values are always held in multiple
+   registers, but that is not the case on the SH; CQImode and CHImode are
+   held in a single integer register.  SH5 also holds CSImode and SCmode
+   values in integer registers.  This is relevant for argument passing on
+   SHcompact as we use a stack temp in order to pass CSImode by reference.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  ((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \
+    || GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \
+   ? (unsigned) MIN (BIGGEST_ALIGNMENT, GET_MODE_BITSIZE (TYPE_MODE (TYPE))) \
+   : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Number of bits which any structure or union's size must be a
+   multiple of.  Each structure or union's size is rounded up to a
+   multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm.  */
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
+  barrier_align (LABEL_AFTER_BARRIER)
+
+#define LOOP_ALIGN(A_LABEL) sh_loop_align (A_LABEL)
+
+#define LABEL_ALIGN(A_LABEL) \
+(									\
+  (PREV_INSN (A_LABEL)							\
+   && NONJUMP_INSN_P (PREV_INSN (A_LABEL))				\
+   && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE	\
+   && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == UNSPECV_ALIGN)		\
+   /* explicit alignment insn in constant tables.  */			\
+  ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0))		\
+  : 0)
+
+/* Jump tables must be 32 bit aligned, no matter the size of the element.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* The base two logarithm of the known minimum alignment of an insn length.  */
+#define INSN_LENGTH_ALIGNMENT(A_INSN)					\
+  (NONJUMP_INSN_P (A_INSN)						\
+   ? 1 << TARGET_SHMEDIA						\
+   : JUMP_P (A_INSN) || CALL_P (A_INSN)					\
+   ? 1 << TARGET_SHMEDIA						\
+   : CACHE_LOG)
+
+/* Standard register usage.  */
+
+/* Register allocation for the Renesas calling convention:
+
+	r0		arg return
+	r1..r3		scratch
+	r4..r7		args in
+	r8..r13		call saved
+	r14		frame pointer/call saved
+	r15		stack pointer
+	ap		arg pointer (doesn't really exist, always eliminated)
+	pr		subroutine return address
+	t		t bit
+	mach		multiply/accumulate result, high part
+	macl		multiply/accumulate result, low part.
+	fpul		fp/int communication register
+	rap		return address pointer register
+	fr0		fp arg return
+	fr1..fr3	scratch floating point registers
+	fr4..fr11	fp args in
+	fr12..fr15	call saved floating point registers  */
+
+#define MAX_REGISTER_NAME_LENGTH 5
+extern char sh_register_names[][MAX_REGISTER_NAME_LENGTH + 1];
+
+#define SH_REGISTER_NAMES_INITIALIZER					\
+{									\
+  "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7", 	\
+  "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",	\
+  "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",	\
+  "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",	\
+  "r32",  "r33",  "r34",  "r35",  "r36",  "r37",  "r38",  "r39", 	\
+  "r40",  "r41",  "r42",  "r43",  "r44",  "r45",  "r46",  "r47",	\
+  "r48",  "r49",  "r50",  "r51",  "r52",  "r53",  "r54",  "r55",	\
+  "r56",  "r57",  "r58",  "r59",  "r60",  "r61",  "r62",  "r63",	\
+  "fr0",  "fr1",  "fr2",  "fr3",  "fr4",  "fr5",  "fr6",  "fr7", 	\
+  "fr8",  "fr9",  "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",	\
+  "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",	\
+  "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31",	\
+  "fr32", "fr33", "fr34", "fr35", "fr36", "fr37", "fr38", "fr39", 	\
+  "fr40", "fr41", "fr42", "fr43", "fr44", "fr45", "fr46", "fr47",	\
+  "fr48", "fr49", "fr50", "fr51", "fr52", "fr53", "fr54", "fr55",	\
+  "fr56", "fr57", "fr58", "fr59", "fr60", "fr61", "fr62", "fr63",	\
+  "tr0",  "tr1",  "tr2",  "tr3",  "tr4",  "tr5",  "tr6",  "tr7", 	\
+  "xd0",  "xd2",  "xd4",  "xd6",  "xd8",  "xd10", "xd12", "xd14",	\
+  "gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr",	\
+  "rap",  "sfp"								\
+}
+
+#define REGNAMES_ARR_INDEX_1(index) \
+  (sh_register_names[index])
+#define REGNAMES_ARR_INDEX_2(index) \
+  REGNAMES_ARR_INDEX_1 ((index)), REGNAMES_ARR_INDEX_1 ((index)+1)
+#define REGNAMES_ARR_INDEX_4(index) \
+  REGNAMES_ARR_INDEX_2 ((index)), REGNAMES_ARR_INDEX_2 ((index)+2)
+#define REGNAMES_ARR_INDEX_8(index) \
+  REGNAMES_ARR_INDEX_4 ((index)), REGNAMES_ARR_INDEX_4 ((index)+4)
+#define REGNAMES_ARR_INDEX_16(index) \
+  REGNAMES_ARR_INDEX_8 ((index)), REGNAMES_ARR_INDEX_8 ((index)+8)
+#define REGNAMES_ARR_INDEX_32(index) \
+  REGNAMES_ARR_INDEX_16 ((index)), REGNAMES_ARR_INDEX_16 ((index)+16)
+#define REGNAMES_ARR_INDEX_64(index) \
+  REGNAMES_ARR_INDEX_32 ((index)), REGNAMES_ARR_INDEX_32 ((index)+32)
+
+#define REGISTER_NAMES \
+{ \
+  REGNAMES_ARR_INDEX_64 (0), \
+  REGNAMES_ARR_INDEX_64 (64), \
+  REGNAMES_ARR_INDEX_8 (128), \
+  REGNAMES_ARR_INDEX_8 (136), \
+  REGNAMES_ARR_INDEX_8 (144), \
+  REGNAMES_ARR_INDEX_2 (152) \
+}
+
+#define ADDREGNAMES_SIZE 32
+#define MAX_ADDITIONAL_REGISTER_NAME_LENGTH 4
+extern char sh_additional_register_names[ADDREGNAMES_SIZE] \
+  [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1];
+
+#define SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER			\
+{									\
+  "dr0",  "dr2",  "dr4",  "dr6",  "dr8",  "dr10", "dr12", "dr14",	\
+  "dr16", "dr18", "dr20", "dr22", "dr24", "dr26", "dr28", "dr30",	\
+  "dr32", "dr34", "dr36", "dr38", "dr40", "dr42", "dr44", "dr46",	\
+  "dr48", "dr50", "dr52", "dr54", "dr56", "dr58", "dr60", "dr62"	\
+}
+
+#define ADDREGNAMES_REGNO(index) \
+  ((index < 32) ? (FIRST_FP_REG + (index) * 2) \
+   : (-1))
+
+#define ADDREGNAMES_ARR_INDEX_1(index) \
+  { (sh_additional_register_names[index]), ADDREGNAMES_REGNO (index) }
+#define ADDREGNAMES_ARR_INDEX_2(index) \
+  ADDREGNAMES_ARR_INDEX_1 ((index)), ADDREGNAMES_ARR_INDEX_1 ((index)+1)
+#define ADDREGNAMES_ARR_INDEX_4(index) \
+  ADDREGNAMES_ARR_INDEX_2 ((index)), ADDREGNAMES_ARR_INDEX_2 ((index)+2)
+#define ADDREGNAMES_ARR_INDEX_8(index) \
+  ADDREGNAMES_ARR_INDEX_4 ((index)), ADDREGNAMES_ARR_INDEX_4 ((index)+4)
+#define ADDREGNAMES_ARR_INDEX_16(index) \
+  ADDREGNAMES_ARR_INDEX_8 ((index)), ADDREGNAMES_ARR_INDEX_8 ((index)+8)
+#define ADDREGNAMES_ARR_INDEX_32(index) \
+  ADDREGNAMES_ARR_INDEX_16 ((index)), ADDREGNAMES_ARR_INDEX_16 ((index)+16)
+
+#define ADDITIONAL_REGISTER_NAMES \
+{					\
+  ADDREGNAMES_ARR_INDEX_32 (0)		\
+}
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+/* There are many other relevant definitions in sh.md's md_constants.  */
+
+#define FIRST_GENERAL_REG R0_REG
+#define LAST_GENERAL_REG (FIRST_GENERAL_REG + (TARGET_SHMEDIA ? 63 : 15))
+#define FIRST_FP_REG DR0_REG
+#define LAST_FP_REG  (FIRST_FP_REG + \
+		      (TARGET_SHMEDIA_FPU ? 63 : TARGET_SH2E ? 15 : -1))
+#define FIRST_XD_REG XD0_REG
+#define LAST_XD_REG  (FIRST_XD_REG + ((TARGET_SH4 && TARGET_FMOVD) ? 7 : -1))
+#define FIRST_TARGET_REG TR0_REG
+#define LAST_TARGET_REG  (FIRST_TARGET_REG + (TARGET_SHMEDIA ? 7 : -1))
+
+/* Registers that can be accessed through bank0 or bank1 depending on sr.md.  */
+#define FIRST_BANKED_REG R0_REG
+#define LAST_BANKED_REG R7_REG
+
+#define BANKED_REGISTER_P(REGNO) \
+  IN_RANGE ((REGNO), \
+	    (unsigned HOST_WIDE_INT) FIRST_BANKED_REG, \
+	    (unsigned HOST_WIDE_INT) LAST_BANKED_REG)
+
+#define GENERAL_REGISTER_P(REGNO) \
+  IN_RANGE ((REGNO), \
+	    (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \
+	    (unsigned HOST_WIDE_INT) LAST_GENERAL_REG)
+
+#define GENERAL_OR_AP_REGISTER_P(REGNO) \
+  (GENERAL_REGISTER_P (REGNO) || ((REGNO) == AP_REG) \
+   || ((REGNO) == FRAME_POINTER_REGNUM))
+
+#define FP_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_FP_REG && (int) (REGNO) <= LAST_FP_REG)
+
+#define XD_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_XD_REG && (int) (REGNO) <= LAST_XD_REG)
+
+#define FP_OR_XD_REGISTER_P(REGNO) \
+  (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO))
+
+#define FP_ANY_REGISTER_P(REGNO) \
+  (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) || (REGNO) == FPUL_REG)
+
+#define SPECIAL_REGISTER_P(REGNO) \
+  ((REGNO) == GBR_REG || (REGNO) == T_REG \
+   || (REGNO) == MACH_REG || (REGNO) == MACL_REG)
+
+#define TARGET_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_TARGET_REG && (int) (REGNO) <= LAST_TARGET_REG)
+
+#define SHMEDIA_REGISTER_P(REGNO) \
+  (GENERAL_REGISTER_P (REGNO) || FP_REGISTER_P (REGNO) \
+   || TARGET_REGISTER_P (REGNO))
+
+/* This is to be used in TARGET_CONDITIONAL_REGISTER_USAGE, to mark
+   registers that should be fixed.  */
+#define VALID_REGISTER_P(REGNO) \
+  (SHMEDIA_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) \
+   || (REGNO) == AP_REG || (REGNO) == RAP_REG \
+   || (REGNO) == FRAME_POINTER_REGNUM \
+   || (TARGET_SH1 && (SPECIAL_REGISTER_P (REGNO) || (REGNO) == PR_REG)) \
+   || (TARGET_SH2E && (REGNO) == FPUL_REG))
+
+/* The mode that should be generally used to store a register by
+   itself in the stack, or to load it back.  */
+#define REGISTER_NATURAL_MODE(REGNO) \
+  (FP_REGISTER_P (REGNO) ? SFmode \
+   : XD_REGISTER_P (REGNO) ? DFmode \
+   : TARGET_SHMEDIA && ! HARD_REGNO_CALL_PART_CLOBBERED ((REGNO), DImode) \
+   ? DImode \
+   : SImode)
+
+#define FIRST_PSEUDO_REGISTER 154
+
+/* Don't count soft frame pointer.  */
+#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   Mach register is fixed 'cause it's only 10 bits wide for SH1.
+   It is 32 bits wide for SH2.  */
+#define FIXED_REGISTERS							\
+{									\
+/* Regular registers.  */						\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+  /* r16 is reserved, r18 is the former pr.  */				\
+  1,      0,      0,      0,      0,      0,      0,      0,		\
+  /* r24 is reserved for the OS; r25, for the assembler or linker.  */	\
+  /* r26 is a global variable data pointer; r27 is for constants.  */	\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+/* FP registers.  */							\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* Branch target registers.  */						\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* XD registers.  */							\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/*"gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr", */	\
+  1,      1,      1,      1,      1,      1,      0,      1,		\
+/*"rap",  "sfp" */							\
+  1,	  1,								\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS						\
+{									\
+/* Regular registers.  */						\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.	\
+     Only the lower 32bits of R10-R14 are guaranteed to be preserved	\
+     across SH5 function calls.  */					\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      1,      1,      1,      1,		\
+/* FP registers.  */							\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* Branch target registers.  */						\
+  1,      1,      1,      1,      1,      0,      0,      0,		\
+/* XD registers.  */							\
+  1,      1,      1,      1,      1,      1,      0,      0,		\
+/*"gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr", */	\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+/*"rap",  "sfp" */							\
+  1,	  1,								\
+}
+
+/* TARGET_CONDITIONAL_REGISTER_USAGE might want to make a register
+   call-used, yet fixed, like PIC_OFFSET_TABLE_REGNUM.  */
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+/* Only the lower 32-bits of R10-R14 are guaranteed to be preserved
+   across SHcompact function calls.  We can't tell whether a called
+   function is SHmedia or SHcompact, so we assume it may be when
+   compiling SHmedia code with the 32-bit ABI, since that's the only
+   ABI that can be linked with SHcompact code.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO,MODE) \
+  (TARGET_SHMEDIA32 \
+   && GET_MODE_SIZE (MODE) > 4 \
+   && (((REGNO) >= FIRST_GENERAL_REG + 10 \
+	&& (REGNO) <= FIRST_GENERAL_REG + 15) \
+       || TARGET_REGISTER_P (REGNO) \
+       || (REGNO) == PR_MEDIA_REG))
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the SH all but the XD regs are UNITS_PER_WORD bits wide.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+   (XD_REGISTER_P (REGNO) \
+    ? ((GET_MODE_SIZE (MODE) + (2*UNITS_PER_WORD - 1)) / (2*UNITS_PER_WORD)) \
+    : (TARGET_SHMEDIA && FP_REGISTER_P (REGNO)) \
+    ? ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2)) \
+    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	\
+  sh_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.
+   That's the case for xd registers: we don't hold SFmode values in
+   them, so we can't tie an SFmode pseudos with one in another
+   floating-point mode.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2) \
+   || (TARGET_SHMEDIA \
+       && GET_MODE_SIZE (MODE1) == GET_MODE_SIZE (MODE2) \
+       && INTEGRAL_MODE_P (MODE1) && INTEGRAL_MODE_P (MODE2)) \
+   || (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2) \
+       && (TARGET_SHMEDIA ? ((GET_MODE_SIZE (MODE1) <= 4) \
+			      && (GET_MODE_SIZE (MODE2) <= 4)) \
+			  : ((MODE1) != SFmode && (MODE2) != SFmode))))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register */
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+   sh_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Define this if the program counter is overloaded on a register.  */
+/* #define PC_REGNUM		15*/
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM	SP_REG
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM	FP_REG
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM	153
+
+/* Fake register that holds the address on the stack of the
+   current function's return address.  */
+#define RETURN_ADDRESS_POINTER_REGNUM RAP_REG
+
+/* Register to hold the addressing base for position independent
+   code access to data items.  */
+#define PIC_OFFSET_TABLE_REGNUM	(flag_pic ? PIC_REG : INVALID_REGNUM)
+
+#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
+
+/* Definitions for register eliminations.
+
+   We have three registers that can be eliminated on the SH.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.
+   Third, there is the return address pointer, which can also be replaced
+   with either the stack or the frame pointer.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   If you add any registers here that are not actually hard registers,
+   and that have any alternative of elimination that doesn't always
+   apply, you need to amend calc_live_regs to exclude it, because
+   reload spills all eliminable registers where it sees an
+   can_eliminate == 0 entry, thus making them 'live' .
+   If you add any hard registers that can be eliminated in different
+   ways, you have to patch reload to spill them only when all alternatives
+   of elimination fail.  */
+#define ELIMINABLE_REGS						\
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = initial_elimination_offset ((FROM), (TO))
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	AP_REG
+
+/* Register in which the static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	(TARGET_SH5 ? 1 : 3)
+
+/* Don't default to pcc-struct-return, because we have already specified
+   exactly how to return structures in the TARGET_RETURN_IN_MEMORY
+   target hook.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define SHMEDIA_REGS_STACK_ADJUST() \
+  (TARGET_SHCOMPACT && crtl->saves_all_registers \
+   ? (8 * (/* r28-r35 */ 8 + /* r44-r59 */ 16 + /* tr5-tr7 */ 3) \
+      + (TARGET_FPU_ANY ? 4 * (/* fr36 - fr63 */ 28) : 0)) \
+   : 0)
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   The SH has two sorts of general registers, R0 and the rest.  R0 can
+   be used as the destination of some of the arithmetic ops. There are
+   also some special purpose registers; the T bit register, the
+   Procedure Return Register and the Multiply Accumulate Registers.
+
+   Place GENERAL_REGS after FPUL_REGS so that it will be preferred by
+   reg_class_subunion.  We don't want to have an actual union class
+   of these, because it would only be used when both classes are calculated
+   to give the same cost, but there is only one FPUL register.
+   Besides, regclass fails to notice the different REGISTER_MOVE_COSTS
+   applying to the actual instruction alternative considered.  E.g., the
+   y/r alternative of movsi_ie is considered to have no more cost that
+   the r/r alternative, which is patently untrue.  */
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  PR_REGS,
+  T_REGS,
+  MAC_REGS,
+  FPUL_REGS,
+  SIBCALL_REGS,
+  NON_SP_REGS,
+  GENERAL_REGS,
+  FP0_REGS,
+  FP_REGS,
+  DF_REGS,
+  FPSCR_REGS,
+  GENERAL_FP_REGS,
+  GENERAL_DF_REGS,
+  TARGET_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES	\
+{			\
+  "NO_REGS",		\
+  "R0_REGS",		\
+  "PR_REGS",		\
+  "T_REGS",		\
+  "MAC_REGS",		\
+  "FPUL_REGS",		\
+  "SIBCALL_REGS",	\
+  "NON_SP_REGS",	\
+  "GENERAL_REGS",	\
+  "FP0_REGS",		\
+  "FP_REGS",		\
+  "DF_REGS",		\
+  "FPSCR_REGS",		\
+  "GENERAL_FP_REGS",	\
+  "GENERAL_DF_REGS",	\
+  "TARGET_REGS",	\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+#define REG_CLASS_CONTENTS						\
+{									\
+/* NO_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* R0_REGS:  */								\
+  { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* PR_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00040000 },	\
+/* T_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00080000 },	\
+/* MAC_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00300000 },	\
+/* FPUL_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00400000 },	\
+/* SIBCALL_REGS: Initialized in TARGET_CONDITIONAL_REGISTER_USAGE.  */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* NON_SP_REGS:  */							\
+  { 0xffff7fff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 },	\
+/* GENERAL_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 },	\
+/* FP0_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000 },	\
+/* FP_REGS:  */								\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 },	\
+/* DF_REGS:  */								\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x0000ff00 },	\
+/* FPSCR_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00800000 },	\
+/* GENERAL_FP_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03020000 },	\
+/* GENERAL_DF_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0302ff00 },	\
+/* TARGET_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff },	\
+/* ALL_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03ffffff },	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)]
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  sh_small_register_classes_for_mode_p
+
+/* The order in which register should be allocated.  */
+/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo,
+   and GENERAL_FP_REGS the alternate class.  Since FP0 is likely to be
+   spilled or used otherwise, we better have the FP_REGS allocated first.  */
+#define REG_ALLOC_ORDER \
+  {/* Caller-saved FPRs */ \
+    65, 66, 67, 68, 69, 70, 71, 64, \
+    72, 73, 74, 75, 80, 81, 82, 83, \
+    84, 85, 86, 87, 88, 89, 90, 91, \
+    92, 93, 94, 95, 96, 97, 98, 99, \
+   /* Callee-saved FPRs */ \
+    76, 77, 78, 79,100,101,102,103, \
+   104,105,106,107,108,109,110,111, \
+   112,113,114,115,116,117,118,119, \
+   120,121,122,123,124,125,126,127, \
+   136,137,138,139,140,141,142,143, \
+   /* FPSCR */ 151, \
+   /* Caller-saved GPRs (except 8/9 on SH1-4) */ \
+     1,  2,  3,  7,  6,  5,  4,  0, \
+     8,  9, 17, 19, 20, 21, 22, 23, \
+    36, 37, 38, 39, 40, 41, 42, 43, \
+    60, 61, 62, \
+   /* SH1-4 callee-saved saved GPRs / SH5 partially-saved GPRs */ \
+    10, 11, 12, 13, 14, 18, \
+    /* SH5 callee-saved GPRs */ \
+    28, 29, 30, 31, 32, 33, 34, 35, \
+    44, 45, 46, 47, 48, 49, 50, 51, \
+    52, 53, 54, 55, 56, 57, 58, 59, \
+   /* FPUL */ 150, \
+   /* SH5 branch target registers */ \
+   128,129,130,131,132,133,134,135, \
+   /* Fixed registers */ \
+    15, 16, 24, 25, 26, 27, 63,144, \
+   145,146,147,148,149,152,153 }
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS \
+  (!ALLOW_INDEXED_ADDRESS ? NO_REGS : TARGET_SHMEDIA ? GENERAL_REGS : R0_REGS)
+#define BASE_REG_CLASS	 GENERAL_REGS
+
+/* Defines for sh.md and constraints.md.  */
+
+#define CONST_OK_FOR_I08(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 127)
+#define CONST_OK_FOR_I16(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -32768 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 32767)
+
+#define CONST_OK_FOR_J16(VALUE) \
+  ((HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) 0xffffffff) \
+   || (HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) -1 << 32))
+
+#define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 255)
+
+#define ZERO_EXTRACT_ANDMASK(EXTRACT_SZ_RTX, EXTRACT_POS_RTX)\
+  (((1 << INTVAL (EXTRACT_SZ_RTX)) - 1) << INTVAL (EXTRACT_POS_RTX))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+
+   If TARGET_SHMEDIA, we need two FP registers per word.
+   Otherwise we will need at most one register per word.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+    (TARGET_SHMEDIA \
+     && TEST_HARD_REG_BIT (reg_class_contents[CLASS], FIRST_FP_REG) \
+     ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2) \
+     : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.
+   ??? We need to renumber the internal numbers for the frnn registers
+   when in little endian in order to allow mode size changes.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  sh_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define the number of registers that can hold parameters.
+   These macros are used only in other macro definitions below.  */
+#define NPARM_REGS(MODE) \
+  (TARGET_FPU_ANY && (MODE) == SFmode \
+   ? (TARGET_SH5 ? 12 : 8) \
+   : (TARGET_SH4 || TARGET_SH2A_DOUBLE) \
+     && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+	 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? (TARGET_SH5 ? 12 : 8) \
+   : (TARGET_SH5 ? 8 : 4))
+
+#define FIRST_PARM_REG (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 4))
+#define FIRST_RET_REG  (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 0))
+
+#define FIRST_FP_PARM_REG (FIRST_FP_REG + (TARGET_SH5 ? 0 : 4))
+#define FIRST_FP_RET_REG FIRST_FP_REG
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/*  Define this macro to nonzero if the addresses of local variable slots
+    are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.  */
+/* Don't define PUSH_ROUNDING, since the hardware doesn't do this.
+   When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to
+   do correct alignment.  */
+#if 0
+#define PUSH_ROUNDING(NPUSHED)  (((NPUSHED) + 3) & ~3)
+#endif
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+/* Value is the number of bytes of arguments automatically popped when
+   calling a subroutine.
+   CUM is the accumulated argument list.
+
+   On SHcompact, the call trampoline pops arguments off the stack.  */
+#define CALL_POPS_ARGS(CUM) (TARGET_SHCOMPACT ? (CUM).stack_regs * 8 : 0)
+
+/* Some subroutine macros specific to this machine.  */
+
+#define BASE_RETURN_VALUE_REG(MODE) \
+  ((TARGET_FPU_ANY && ((MODE) == SFmode))		\
+   ? FIRST_FP_RET_REG					\
+   : TARGET_FPU_ANY && (MODE) == SCmode			\
+   ? FIRST_FP_RET_REG					\
+   : (TARGET_FPU_DOUBLE					\
+      && ((MODE) == DFmode || (MODE) == SFmode		\
+	  || (MODE) == DCmode || (MODE) == SCmode ))	\
+   ? FIRST_FP_RET_REG					\
+   : FIRST_RET_REG)
+
+#define BASE_ARG_REG(MODE) \
+  ((TARGET_SH2E && ((MODE) == SFmode))			\
+   ? FIRST_FP_PARM_REG					\
+   : (TARGET_SH4 || TARGET_SH2A_DOUBLE) && (GET_MODE_CLASS (MODE) == MODE_FLOAT	\
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\
+   ? FIRST_FP_PARM_REG					\
+   : FIRST_PARM_REG)
+
+/* 1 if N is a possible register number for function argument passing.  */
+/* ??? There are some callers that pass REGNO as int, and others that pass
+   it as unsigned.  We get warnings unless we do casts everywhere.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  (((unsigned) (REGNO) >= (unsigned) FIRST_PARM_REG			\
+    && (unsigned) (REGNO) < (unsigned) (FIRST_PARM_REG + NPARM_REGS (SImode)))\
+   || (TARGET_FPU_ANY							\
+       && (unsigned) (REGNO) >= (unsigned) FIRST_FP_PARM_REG		\
+       && (unsigned) (REGNO) < (unsigned) (FIRST_FP_PARM_REG		\
+					   + NPARM_REGS (SFmode))))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On SH, this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus NARGREGS or more means all following args should go on the stack.  */
+enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 };
+struct sh_args {
+    int arg_count[2];
+    int force_mem;
+  /* Nonzero if a prototype is available for the function.  */
+    int prototype_p;
+  /* The number of an odd floating-point register, that should be used
+     for the next argument of type float.  */
+    int free_single_fp_reg;
+  /* Whether we're processing an outgoing function call.  */
+    int outgoing;
+  /* The number of general-purpose registers that should have been
+     used to pass partial arguments, that are passed totally on the
+     stack.  On SHcompact, a call trampoline will pop them off the
+     stack before calling the actual function, and, if the called
+     function is implemented in SHcompact mode, the incoming arguments
+     decoder will push such arguments back onto the stack.  For
+     incoming arguments, STACK_REGS also takes into account other
+     arguments passed by reference, that the decoder will also push
+     onto the stack.  */
+    int stack_regs;
+  /* The number of general-purpose registers that should have been
+     used to pass arguments, if the arguments didn't have to be passed
+     by reference.  */
+    int byref_regs;
+  /* Set as by shcompact_byref if the current argument is to be passed
+     by reference.  */
+    int byref;
+
+  /* call_cookie is a bitmask used by call expanders, as well as
+     function prologue and epilogues, to allow SHcompact to comply
+     with the SH5 32-bit ABI, that requires 64-bit registers to be
+     used even though only the lower 32-bit half is visible in
+     SHcompact mode.  The strategy is to call SHmedia trampolines.
+
+     The alternatives for each of the argument-passing registers are
+     (a) leave it unchanged; (b) pop it off the stack; (c) load its
+     contents from the address in it; (d) add 8 to it, storing the
+     result in the next register, then (c); (e) copy it from some
+     floating-point register,
+
+     Regarding copies from floating-point registers, r2 may only be
+     copied from dr0.  r3 may be copied from dr0 or dr2.  r4 maybe
+     copied from dr0, dr2 or dr4.  r5 maybe copied from dr0, dr2,
+     dr4 or dr6.  r6 may be copied from dr0, dr2, dr4, dr6 or dr8.
+     r7 through to r9 may be copied from dr0, dr2, dr4, dr8, dr8 or
+     dr10.
+
+     The bit mask is structured as follows:
+
+     - 1 bit to tell whether to set up a return trampoline.
+
+     - 3 bits to count the number consecutive registers to pop off the
+       stack.
+
+     - 4 bits for each of r9, r8, r7 and r6.
+
+     - 3 bits for each of r5, r4, r3 and r2.
+
+     - 3 bits set to 0 (the most significant ones)
+
+        3           2            1           0
+       1098 7654 3210 9876 5432 1098 7654 3210
+       FLPF LPFL PFLP FFLP FFLP FFLP FFLP SSST
+       2223 3344 4555 6666 7777 8888 9999 SSS-
+
+     - If F is set, the register must be copied from an FP register,
+       whose number is encoded in the remaining bits.
+
+     - Else, if L is set, the register must be loaded from the address
+       contained in it.  If the P bit is *not* set, the address of the
+       following dword should be computed first, and stored in the
+       following register.
+
+     - Else, if P is set, the register alone should be popped off the
+       stack.
+
+     - After all this processing, the number of registers represented
+       in SSS will be popped off the stack.  This is an optimization
+       for pushing/popping consecutive registers, typically used for
+       varargs and large arguments partially passed in registers.
+
+     - If T is set, a return trampoline will be set up for 64-bit
+     return values to be split into 2 32-bit registers.  */
+    long call_cookie;
+
+  /* This is set to nonzero when the call in question must use the Renesas ABI,
+     even without the -mrenesas option.  */
+    int renesas_abi;
+};
+
+#define CALL_COOKIE_RET_TRAMP_SHIFT 0
+#define CALL_COOKIE_RET_TRAMP(VAL) ((VAL) << CALL_COOKIE_RET_TRAMP_SHIFT)
+#define CALL_COOKIE_STACKSEQ_SHIFT 1
+#define CALL_COOKIE_STACKSEQ(VAL) ((VAL) << CALL_COOKIE_STACKSEQ_SHIFT)
+#define CALL_COOKIE_STACKSEQ_GET(COOKIE) \
+  (((COOKIE) >> CALL_COOKIE_STACKSEQ_SHIFT) & 7)
+#define CALL_COOKIE_INT_REG_SHIFT(REG) \
+  (4 * (7 - (REG)) + (((REG) <= 2) ? ((REG) - 2) : 1) + 3)
+#define CALL_COOKIE_INT_REG(REG, VAL) \
+  ((VAL) << CALL_COOKIE_INT_REG_SHIFT (REG))
+#define CALL_COOKIE_INT_REG_GET(COOKIE, REG) \
+  (((COOKIE) >> CALL_COOKIE_INT_REG_SHIFT (REG)) & ((REG) < 4 ? 7 : 15))
+
+#define CUMULATIVE_ARGS  struct sh_args
+
+#define GET_SH_ARG_CLASS(MODE) \
+  ((TARGET_FPU_ANY && (MODE) == SFmode) \
+   ? SH_ARG_FLOAT \
+   /* There's no mention of complex float types in the SH5 ABI, so we
+      should presumably handle them as aggregate types.  */ \
+   : TARGET_SH5 && GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT \
+   ? SH_ARG_INT \
+   : TARGET_FPU_DOUBLE && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+			   || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? SH_ARG_FLOAT : SH_ARG_INT)
+
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode
+   MODE.
+
+   The SH doesn't care about double alignment, so we only
+   round doubles to even regs when asked to explicitly.  */
+#define ROUND_REG(CUM, MODE) \
+   (((TARGET_ALIGN_DOUBLE						\
+      || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) 				\
+	  && ((MODE) == DFmode || (MODE) == DCmode)			\
+	  && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))	\
+     && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD)			\
+    ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]			\
+       + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1))		\
+    : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)])
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On SH, the offset always starts at 0: the first parm reg is always
+   the same reg for a given argument class.
+
+   For TARGET_HITACHI, the structure value pointer is passed in memory.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  sh_init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL),\
+			   (N_NAMED_ARGS), VOIDmode)
+
+#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \
+  sh_init_cumulative_args (& (CUM), NULL_TREE, (LIBNAME), NULL_TREE, 0, (MODE))
+
+/* Return boolean indicating arg of mode MODE will be passed in a reg.
+   This macro is only used in this file.  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (((TYPE) == 0 \
+    || (! TREE_ADDRESSABLE ((TYPE)) \
+	&& (! (TARGET_HITACHI || (CUM).renesas_abi) \
+	    || ! (AGGREGATE_TYPE_P (TYPE) \
+		  || (!TARGET_FPU_ANY \
+		      && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+			  && GET_MODE_SIZE (MODE) > GET_MODE_SIZE (SFmode))))))) \
+   && ! (CUM).force_mem \
+   && (TARGET_SH2E \
+       ? ((MODE) == BLKmode \
+	  ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \
+	      + int_size_in_bytes (TYPE)) \
+	     <= NPARM_REGS (SImode) * UNITS_PER_WORD) \
+	  : ((ROUND_REG((CUM), (MODE)) \
+	      + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \
+	     <= NPARM_REGS (MODE))) \
+       : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE)))
+
+/* By accident we got stuck with passing SCmode on SH4 little endian
+   in two registers that are nominally successive - which is different from
+   two single SFmode values, where we take endianness translation into
+   account.  That does not work at all if an odd number of registers is
+   already in use, so that got fixed, but library functions are still more
+   likely to use complex numbers without mixing them with SFmode arguments
+   (which in C would have to be structures), so for the sake of ABI
+   compatibility the way SCmode values are passed when an even number of
+   FP registers is in use remains different from a pair of SFmode values for
+   now.
+   I.e.:
+   foo (double); a: fr5,fr4
+   foo (float a, float b); a: fr5 b: fr4
+   foo (__complex float a); a.real fr4 a.imag: fr5 - for consistency,
+			    this should be the other way round...
+   foo (float a, __complex float b); a: fr5 b.real: fr4 b.imag: fr7  */
+#define FUNCTION_ARG_SCmode_WART 1
+
+/* If an argument of size 5, 6 or 7 bytes is to be passed in a 64-bit
+   register in SHcompact mode, it must be padded in the most
+   significant end.  This means that passing it by reference wouldn't
+   pad properly on a big-endian machine.  In this particular case, we
+   pass this argument on the stack, in a way that the call trampoline
+   will load its value into the appropriate register.  */
+#define SHCOMPACT_FORCE_ON_STACK(MODE,TYPE) \
+  ((MODE) == BLKmode \
+   && TARGET_SHCOMPACT \
+   && TARGET_BIG_ENDIAN \
+   && int_size_in_bytes (TYPE) > 4 \
+   && int_size_in_bytes (TYPE) < 8)
+
+/* Minimum alignment for an argument to be passed by callee-copy
+   reference.  We need such arguments to be aligned to 8 byte
+   boundaries, because they'll be loaded using quad loads.  */
+#define SH_MIN_ALIGN_FOR_CALLEE_COPY (8 * BITS_PER_UNIT)
+
+/* The SH5 ABI requires floating-point arguments to be passed to
+   functions without a prototype in both an FP register and a regular
+   register or the stack.  When passing the argument in both FP and
+   general-purpose registers, list the FP register first.  */
+#define SH5_PROTOTYPELESS_FLOAT_ARG(CUM,MODE) \
+  (gen_rtx_PARALLEL							\
+   ((MODE),								\
+    gen_rtvec (2,							\
+	       gen_rtx_EXPR_LIST					\
+	       (VOIDmode,						\
+		((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \
+		 ? gen_rtx_REG ((MODE), FIRST_FP_PARM_REG		\
+				+ (CUM).arg_count[(int) SH_ARG_FLOAT])	\
+		 : NULL_RTX),						\
+		const0_rtx),						\
+	       gen_rtx_EXPR_LIST					\
+	       (VOIDmode,						\
+		((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \
+		 ? gen_rtx_REG ((MODE), FIRST_PARM_REG			\
+				+ (CUM).arg_count[(int) SH_ARG_INT])	\
+		 : gen_rtx_REG ((MODE), FIRST_FP_PARM_REG		\
+				+ (CUM).arg_count[(int) SH_ARG_FLOAT])), \
+		const0_rtx))))
+
+/* The SH5 ABI requires regular registers or stack slots to be
+   reserved for floating-point arguments.  Registers are taken care of
+   in FUNCTION_ARG_ADVANCE, but stack slots must be reserved here.
+   Unfortunately, there's no way to just reserve a stack slot, so
+   we'll end up needlessly storing a copy of the argument in the
+   stack.  For incoming arguments, however, the PARALLEL will be
+   optimized to the register-only form, and the value in the stack
+   slot won't be used at all.  */
+#define SH5_PROTOTYPED_FLOAT_ARG(CUM,MODE,REG) \
+  ((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)		\
+   ? gen_rtx_REG ((MODE), (REG))					\
+   : gen_rtx_PARALLEL ((MODE),						\
+		       gen_rtvec (2,					\
+				  gen_rtx_EXPR_LIST			\
+				  (VOIDmode, NULL_RTX,			\
+				   const0_rtx),				\
+				  gen_rtx_EXPR_LIST			\
+				  (VOIDmode, gen_rtx_REG ((MODE),	\
+							  (REG)),	\
+				   const0_rtx))))
+
+#define SH5_WOULD_BE_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
+  (TARGET_SH5							\
+   && ((MODE) == BLKmode || (MODE) == TImode || (MODE) == CDImode \
+       || (MODE) == DCmode) \
+   && ((CUM).arg_count[(int) SH_ARG_INT]			\
+       + (((MODE) == BLKmode ? int_size_in_bytes (TYPE)		\
+			     : GET_MODE_SIZE (MODE))		\
+	  + 7) / 8) > NPARM_REGS (SImode))
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+/* Call the function profiler with a given profile label.
+   We use two .aligns, so as to make sure that both the .long is aligned
+   on a 4 byte boundary, and that the .long is a fixed distance (2 bytes)
+   from the trapa instruction.  */
+#define FUNCTION_PROFILER(STREAM,LABELNO)			\
+{								\
+  if (TARGET_SHMEDIA)						\
+    {								\
+      fprintf((STREAM), "\tmovi\t33,r0\n");			\
+      fprintf((STREAM), "\ttrapa\tr0\n");			\
+      asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+    }								\
+  else								\
+    {								\
+      fprintf((STREAM), "\t.align\t2\n");			\
+      fprintf((STREAM), "\ttrapa\t#33\n");			\
+      fprintf((STREAM), "\t.align\t2\n");			\
+      asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+    }								\
+}
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+#define PROFILE_BEFORE_PROLOGUE
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 1
+
+/*
+   On the SH, the trampoline looks like
+   2 0002 D202			mov.l	l2,r2
+   1 0000 D301			mov.l	l1,r3
+   3 0004 422B			jmp	@r2
+   4 0006 0009			nop
+   5 0008 00000000 	l1:  	.long   area
+   6 000c 00000000 	l2:	.long   function  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+
+/* Alignment required for a trampoline in bits.  */
+#define TRAMPOLINE_ALIGNMENT \
+  ((CACHE_LOG < 3 \
+    || (optimize_size && ! (TARGET_HARD_SH4 || TARGET_SH5))) ? 32 \
+   : TARGET_SHMEDIA ? 256 : 64)
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, so we
+   can ignore COUNT.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)	\
+  (((COUNT) == 0) ? sh_get_pr_initial_val () : NULL_RTX)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before the
+   prologue.  This RTL is either a REG, indicating that the return
+   value is saved in REG, or a MEM representing a location in
+   the stack.  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_REG (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
+
+/* Addressing modes, and classification of registers for them.  */
+#define HAVE_POST_INCREMENT  TARGET_SH1
+#define HAVE_PRE_DECREMENT   TARGET_SH1
+
+#define USE_LOAD_POST_INCREMENT(mode)    ((mode == SImode || mode == DImode) \
+					  ? 0 : TARGET_SH1)
+#define USE_LOAD_PRE_DECREMENT(mode)     0
+#define USE_STORE_POST_INCREMENT(mode)   0
+#define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
+					  ? 0 : TARGET_SH1)
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  (GENERAL_OR_AP_REGISTER_P (REGNO) \
+   || GENERAL_OR_AP_REGISTER_P (reg_renumber[(REGNO)]))
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+  (TARGET_SHMEDIA \
+   ? (GENERAL_REGISTER_P (REGNO) \
+      || GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
+   : (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
+
+/* Maximum number of registers that can appear in a valid memory
+   address.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X)	(GET_CODE (X) == LABEL_REF)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   The suitable hard regs are always accepted and all pseudo regs
+   are also accepted if STRICT is not set.  */
+
+/* Nonzero if X is a reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X, STRICT)			\
+  (GENERAL_OR_AP_REGISTER_P (REGNO (X))			\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X is a reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X, STRICT)			\
+  ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X))	\
+    : REGNO (X) == R0_REG)				\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X/OFFSET is a reg that can be used as an index.  */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET, STRICT)	\
+  ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X))	\
+    : REGNO (X) == R0_REG && OFFSET == 0)		\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Macros for extra constraints.  */
+
+#define IS_PC_RELATIVE_LOAD_ADDR_P(OP)					\
+  ((GET_CODE ((OP)) == LABEL_REF)					\
+   || (GET_CODE ((OP)) == CONST						\
+       && GET_CODE (XEXP ((OP), 0)) == PLUS				\
+       && GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF		\
+       && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))
+
+#define IS_NON_EXPLICIT_CONSTANT_P(OP)					\
+  (CONSTANT_P (OP)							\
+   && !CONST_INT_P (OP)							\
+   && GET_CODE (OP) != CONST_DOUBLE					\
+   && (!flag_pic							\
+       || (LEGITIMATE_PIC_OPERAND_P (OP)				\
+	   && !PIC_ADDR_P (OP)						\
+	   && GET_CODE (OP) != LABEL_REF)))
+
+/* Check whether OP is a datalabel unspec.  */
+#define DATALABEL_REF_NO_CONST_P(OP) \
+  (GET_CODE (OP) == UNSPEC \
+   && XINT ((OP), 1) == UNSPEC_DATALABEL \
+   && XVECLEN ((OP), 0) == 1 \
+   && GET_CODE (XVECEXP ((OP), 0, 0)) == LABEL_REF)
+
+#define GOT_ENTRY_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOT)
+
+#define GOTPLT_ENTRY_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOTPLT)
+
+#define UNSPEC_GOTOFF_P(OP) \
+  (GET_CODE (OP) == UNSPEC && XINT ((OP), 1) == UNSPEC_GOTOFF)
+
+#define GOTOFF_P(OP) \
+  (GET_CODE (OP) == CONST \
+   && (UNSPEC_GOTOFF_P (XEXP ((OP), 0)) \
+       || (GET_CODE (XEXP ((OP), 0)) == PLUS \
+	   && UNSPEC_GOTOFF_P (XEXP (XEXP ((OP), 0), 0)) \
+	   && CONST_INT_P (XEXP (XEXP ((OP), 0), 1)))))
+
+#define PIC_ADDR_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_PIC)
+
+#define PCREL_SYMOFF_P(OP) \
+  (GET_CODE (OP) == CONST \
+   && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_PCREL_SYMOFF)
+
+#define NON_PIC_REFERENCE_P(OP) \
+  (GET_CODE (OP) == LABEL_REF || GET_CODE (OP) == SYMBOL_REF \
+   || (GET_CODE (OP) == CONST \
+       && (GET_CODE (XEXP ((OP), 0)) == LABEL_REF \
+	   || GET_CODE (XEXP ((OP), 0)) == SYMBOL_REF \
+	   || DATALABEL_REF_NO_CONST_P (XEXP ((OP), 0)))) \
+   || (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == PLUS \
+       && (GET_CODE (XEXP (XEXP ((OP), 0), 0)) == SYMBOL_REF \
+	   || GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF \
+	   || DATALABEL_REF_NO_CONST_P (XEXP (XEXP ((OP), 0), 0))) \
+       && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))
+
+#define PIC_REFERENCE_P(OP) \
+  (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP) \
+   || GOTOFF_P (OP) || PIC_ADDR_P (OP))
+
+#define MOVI_SHORI_BASE_OPERAND_P(OP) \
+  (flag_pic \
+   ? (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP)  || GOTOFF_P (OP) \
+      || PCREL_SYMOFF_P (OP)) \
+   : NON_PIC_REFERENCE_P (OP))
+
+#define MAYBE_BASE_REGISTER_RTX_P(X, STRICT)			\
+  ((REG_P (X) && REG_OK_FOR_BASE_P (X, STRICT))	\
+   || (GET_CODE (X) == SUBREG					\
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))),	\
+				 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \
+       && REG_P (SUBREG_REG (X))			\
+       && REG_OK_FOR_BASE_P (SUBREG_REG (X), STRICT)))
+
+/* Since this must be r0, which is a single register class, we must check
+   SUBREGs more carefully, to be sure that we don't accept one that extends
+   outside the class.  */
+#define MAYBE_INDEX_REGISTER_RTX_P(X, STRICT)				\
+  ((REG_P (X) && REG_OK_FOR_INDEX_P (X, STRICT))	\
+   || (GET_CODE (X) == SUBREG					\
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))), \
+				 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \
+       && REG_P (SUBREG_REG (X))		\
+       && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_BYTE (X), STRICT)))
+
+#ifdef REG_OK_STRICT
+#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, true)
+#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, true)
+#else
+#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, false)
+#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, false)
+#endif
+
+#define ALLOW_INDEXED_ADDRESS \
+  ((!TARGET_SHMEDIA32 && !TARGET_SHCOMPACT) || TARGET_ALLOW_INDEXED_ADDRESS)
+
+/* A C compound statement that attempts to replace X, which is an address
+   that needs reloading, with a valid memory address for an operand of
+   mode MODE.  WIN is a C statement label elsewhere in the code.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	\
+  do {									\
+    if (sh_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE)))	\
+      goto WIN;								\
+  } while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE ((! optimize || TARGET_BIGTABLE) ? SImode : HImode)
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \
+ : SImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define it here, so that it doesn't get bumped to 64-bits on SHmedia.  */
+#define FLOAT_TYPE_SIZE 32
+
+/* Since the SH2e has only `float' support, it is desirable to make all
+   floating point types equivalent to `float'.  */
+#define DOUBLE_TYPE_SIZE ((TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH2A_DOUBLE)\
+			  ? 32 : 64)
+
+/* 'char' is signed by default.  */
+#define DEFAULT_SIGNED_CHAR  1
+
+/* The type of size_t unsigned int.  */
+#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int")
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int")
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+#define SH_ELF_WCHAR_TYPE "long int"
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX (TARGET_SHMEDIA ? 8 : 4)
+
+/* Maximum value possibly taken by MOVE_MAX.  Must be defined whenever
+   MOVE_MAX is not a compile-time constant.  */
+#define MAX_MOVE_MAX 8
+
+/* Max number of bytes we want move_by_pieces to be able to copy
+   efficiently.  */
+#define MOVE_MAX_PIECES (TARGET_SH4 || TARGET_SHMEDIA ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.
+   For SHmedia, we can truncate to QImode easier using zero extension.
+   FP registers can load SImode values, but don't implicitly sign-extend
+   them to DImode.  */
+#define LOAD_EXTEND_OP(MODE) \
+ (((MODE) == QImode  && TARGET_SHMEDIA) ? ZERO_EXTEND \
+  : (MODE) != SImode ? SIGN_EXTEND : UNKNOWN)
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Nonzero if access to memory by bytes is no faster than for words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Nonzero if the target supports dynamic shift instructions
+   like shad and shld.  */
+#define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A)
+
+/* The cost of using the dynamic shift insns (shad, shld) are the same
+   if they are available.  If they are not available a library function will
+   be emitted instead, which is more expensive.  */
+#define SH_DYNAMIC_SHIFT_COST (TARGET_DYNSHIFT ? 1 : 20)
+
+/* Defining SHIFT_COUNT_TRUNCATED tells the combine pass that code like
+   (X << (Y % 32)) for register X, Y is equivalent to (X << Y).
+   This is not generally true when hardware dynamic shifts (shad, shld) are
+   used, because they check the sign bit _before_ the modulo op.  The sign
+   bit determines whether it is a left shift or a right shift:
+     if (Y < 0)
+       return X << (Y & 31);
+     else
+       return X >> (-Y) & 31);
+ 
+   The dynamic shift library routines in lib1funcs.S do not use the sign bit
+   like the hardware dynamic shifts and truncate the shift count to 31.
+   We define SHIFT_COUNT_TRUNCATED to 0 and express the implied shift count
+   truncation in the library function call patterns, as this gives slightly
+   more compact code.  */
+#define SHIFT_COUNT_TRUNCATED (0)
+
+/* All integers have the same format so truncation is easy.  */
+/* But SHmedia must sign-extend DImode when truncating to SImode.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \
+ (!TARGET_SHMEDIA || (INPREC) < 64 || (OUTPREC) >= 64)
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/*#define NO_FUNCTION_CSE 1*/
+
+/* The machine modes of pointers and functions.  */
+#define Pmode  (TARGET_SHMEDIA64 ? DImode : SImode)
+#define FUNCTION_MODE  Pmode
+
+/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2
+   are actually function calls with some special constraints on arguments
+   and register usage.
+
+   These macros tell reorg that the references to arguments and
+   register clobbers for insns of type sfunc do not appear to happen
+   until after the millicode call.  This allows reorg to put insns
+   which set the argument registers into the delay slot of the millicode
+   call -- thus they act more like traditional CALL_INSNs.
+
+   get_attr_is_sfunc will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+
+#define INSN_SETS_ARE_DELAYED(X) 		\
+  ((NONJUMP_INSN_P (X)				\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+#define INSN_REFERENCES_ARE_DELAYED(X) 		\
+  ((NONJUMP_INSN_P (X)				\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+
+/* Position Independent Code.  */
+
+/* We can't directly access anything that contains a symbol,
+   nor can we indirect via the constant pool.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)				\
+	((! nonpic_symbol_mentioned_p (X)			\
+	  && (GET_CODE (X) != SYMBOL_REF			\
+	      || ! CONSTANT_POOL_ADDRESS_P (X)			\
+	      || ! nonpic_symbol_mentioned_p (get_pool_constant (X)))) \
+	 || (TARGET_SHMEDIA && GET_CODE (X) == LABEL_REF))
+
+#define SYMBOLIC_CONST_P(X)	\
+((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == LABEL_REF)	\
+  && nonpic_symbol_mentioned_p (X))
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+
+/* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
+   uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+#define REGCLASS_HAS_GENERAL_REG(CLASS) \
+  ((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS || (CLASS) == NON_SP_REGS \
+    || (! TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS))
+
+#define REGCLASS_HAS_FP_REG(CLASS) \
+  ((CLASS) == FP0_REGS || (CLASS) == FP_REGS \
+   || (CLASS) == DF_REGS)
+
+/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option?  This
+   would be so that people with slow memory systems could generate
+   different code that does fewer memory accesses.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) sh_branch_cost
+
+/* Assembler output control.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+#define ASM_COMMENT_START "!"
+
+#define ASM_APP_ON  		""
+#define ASM_APP_OFF  		""
+#define FILE_ASM_OP 		"\t.file\n"
+#define SET_ASM_OP		"\t.set\t"
+
+/* How to change between sections.  */
+#define TEXT_SECTION_ASM_OP	(TARGET_SHMEDIA32 \
+				? "\t.section\t.text..SHmedia32,\"ax\"" \
+				: "\t.text")
+#define DATA_SECTION_ASM_OP	"\t.data"
+
+#if defined CRT_BEGIN || defined CRT_END
+/* Arrange for TEXT_SECTION_ASM_OP to be a compile-time constant.  */
+# undef TEXT_SECTION_ASM_OP
+# if __SHMEDIA__ == 1 && __SH5__ == 32
+#  define TEXT_SECTION_ASM_OP "\t.section\t.text..SHmedia32,\"ax\""
+# else
+#  define TEXT_SECTION_ASM_OP "\t.text"
+# endif
+#endif
+
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#endif
+
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
+
+/* Define this so that jump tables go in same section as the current function,
+   which could be text or it could be a user defined section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+#undef DO_GLOBAL_CTORS_BODY
+#define DO_GLOBAL_CTORS_BODY			\
+{						\
+  typedef void (*pfunc) (void);			\
+  extern pfunc __ctors[];			\
+  extern pfunc __ctors_end[];			\
+  pfunc *p;					\
+  for (p = __ctors_end; p > __ctors; )		\
+    {						\
+      (*--p)();					\
+    }						\
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY			\
+{						\
+  typedef void (*pfunc) (void);			\
+  extern pfunc __dtors[];			\
+  extern pfunc __dtors_end[];			\
+  pfunc *p;					\
+  for (p = __dtors; p < __dtors_end; p++)	\
+    {						\
+      (*p)();					\
+    }						\
+}
+
+#define ASM_OUTPUT_REG_PUSH(file, v) \
+{							\
+  if (TARGET_SHMEDIA)					\
+    {							\
+      fprintf ((file), "\taddi.l\tr15,-8,r15\n");	\
+      fprintf ((file), "\tst.q\tr15,0,r%d\n", (v));	\
+    }							\
+  else							\
+    fprintf ((file), "\tmov.l\tr%d,@-r15\n", (v));	\
+}
+
+#define ASM_OUTPUT_REG_POP(file, v) \
+{							\
+  if (TARGET_SHMEDIA)					\
+    {							\
+      fprintf ((file), "\tld.q\tr15,0,r%d\n", (v));	\
+      fprintf ((file), "\taddi.l\tr15,8,r15\n");	\
+    }							\
+  else							\
+    fprintf ((file), "\tmov.l\t@r15+,r%d\n", (v));	\
+}
+
+/* DBX register number for a given compiler register number.  */
+/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers
+   to match gdb.  */
+/* expand_builtin_init_dwarf_reg_sizes uses this to test if a
+   register exists, so we should return -1 for invalid register numbers.  */
+#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO)
+
+/* SHcompact PR_REG used to use the encoding 241, and SHcompact FP registers
+   used to use the encodings 245..260, but that doesn't make sense:
+   PR_REG and PR_MEDIA_REG are actually the same register, and likewise
+   the FP registers stay the same when switching between compact and media
+   mode.  Hence, we also need to use the same dwarf frame columns.
+   Likewise, we need to support unwind information for SHmedia registers
+   even in compact code.  */
+#define SH_DBX_REGISTER_NUMBER(REGNO) \
+  (IN_RANGE ((REGNO), \
+	     (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \
+	     FIRST_GENERAL_REG + (TARGET_SH5 ? 63U :15U)) \
+   ? ((unsigned) (REGNO) - FIRST_GENERAL_REG) \
+   : ((int) (REGNO) >= FIRST_FP_REG \
+     && ((int) (REGNO) \
+	 <= (FIRST_FP_REG + \
+	     ((TARGET_SH5 && TARGET_FPU_ANY) ? 63 : TARGET_SH2E ? 15 : -1)))) \
+   ? ((unsigned) (REGNO) - FIRST_FP_REG \
+      + (TARGET_SH5 ? 77 : 25)) \
+   : XD_REGISTER_P (REGNO) \
+   ? ((unsigned) (REGNO) - FIRST_XD_REG + (TARGET_SH5 ? 289 : 87)) \
+   : TARGET_REGISTER_P (REGNO) \
+   ? ((unsigned) (REGNO) - FIRST_TARGET_REG + 68) \
+   : (REGNO) == PR_REG \
+   ? (TARGET_SH5 ? 18 : 17) \
+   : (REGNO) == PR_MEDIA_REG \
+   ? (TARGET_SH5 ? 18 : (unsigned) -1) \
+   : (REGNO) == GBR_REG \
+   ? (TARGET_SH5 ? 238 : 18) \
+   : (REGNO) == MACH_REG \
+   ? (TARGET_SH5 ? 239 : 20) \
+   : (REGNO) == MACL_REG \
+   ? (TARGET_SH5 ? 240 : 21) \
+   : (REGNO) == T_REG \
+   ? (TARGET_SH5 ? 242 : 22) \
+   : (REGNO) == FPUL_REG \
+   ? (TARGET_SH5 ? 244 : 23) \
+   : (REGNO) == FPSCR_REG \
+   ? (TARGET_SH5 ? 243 : 24) \
+   : (unsigned) -1)
+
+/* This is how to output a reference to a symbol_ref.  On SH5,
+   references to non-code symbols must be preceded by `datalabel'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM)			\
+  do 							\
+    {							\
+      if (TARGET_SH5 && !SYMBOL_REF_FUNCTION_P (SYM))	\
+	fputs ("datalabel ", (FILE));			\
+      assemble_name ((FILE), XSTR ((SYM), 0));		\
+    }							\
+  while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE)  */
+
+/* Output a relative address table.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)			\
+  switch (GET_MODE (BODY))						\
+    {									\
+    case SImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.long\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case HImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.word\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case QImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.byte\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    default:								\
+      break;								\
+    }
+
+/* Output an absolute table element.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  				\
+  if (! optimize || TARGET_BIGTABLE)					\
+    asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); 		\
+  else									\
+    asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE));
+
+
+/* A C statement to be executed just prior to the output of
+   assembler code for INSN, to modify the extracted operands so
+   they will be output differently.
+
+   Here the argument OPVEC is the vector containing the operands
+   extracted from INSN, and NOPERANDS is the number of elements of
+   the vector which contain meaningful data for this insn.
+   The contents of this vector are what will be used to convert the insn
+   template into assembler code, so you can change the assembler output
+   by changing the contents of the vector.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  final_prescan_insn ((INSN), (OPVEC), (NOPERANDS))
+
+
+extern rtx sh_compare_op0;
+extern rtx sh_compare_op1;
+
+/* Which processor to schedule for.  The elements of the enumeration must
+   match exactly the cpu attribute in the sh.md file.  */
+enum processor_type {
+  PROCESSOR_SH1,
+  PROCESSOR_SH2,
+  PROCESSOR_SH2E,
+  PROCESSOR_SH2A,
+  PROCESSOR_SH3,
+  PROCESSOR_SH3E,
+  PROCESSOR_SH4,
+  PROCESSOR_SH4A,
+  PROCESSOR_SH5
+};
+
+#define sh_cpu_attr ((enum attr_cpu)sh_cpu)
+extern enum processor_type sh_cpu;
+
+enum mdep_reorg_phase_e
+{
+  SH_BEFORE_MDEP_REORG,
+  SH_INSERT_USES_LABELS,
+  SH_SHORTEN_BRANCHES0,
+  SH_FIXUP_PCLOAD,
+  SH_SHORTEN_BRANCHES1,
+  SH_AFTER_MDEP_REORG
+};
+
+extern enum mdep_reorg_phase_e mdep_reorg_phase;
+
+/* Handle Renesas compiler's pragmas.  */
+#define REGISTER_TARGET_PRAGMAS() do {					\
+  c_register_pragma (0, "interrupt", sh_pr_interrupt);			\
+  c_register_pragma (0, "trapa", sh_pr_trapa);				\
+  c_register_pragma (0, "nosave_low_regs", sh_pr_nosave_low_regs);	\
+} while (0)
+
+extern tree sh_deferred_function_attributes;
+extern tree *sh_deferred_function_attributes_tail;
+
+/* Set when processing a function with interrupt attribute.  */
+extern int current_function_interrupt;
+
+
+/* Instructions with unfilled delay slots take up an
+   extra two bytes for the nop in the delay slot.
+   sh-dsp parallel processing insns are four bytes long.  */
+#define ADJUST_INSN_LENGTH(X, LENGTH)				\
+  (LENGTH) += sh_insn_length_adjustment (X);
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   Leaving the unsignedp unchanged gives better code than always setting it
+   to 0.  This is despite the fact that we have only signed char and short
+   load instructions.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  if (GET_MODE_CLASS (MODE) == MODE_INT			\
+      && GET_MODE_SIZE (MODE) < 4/* ! UNITS_PER_WORD */)\
+    (UNSIGNEDP) = ((MODE) == SImode ? 0 : (UNSIGNEDP)),	\
+    (MODE) = (TARGET_SH1 ? SImode \
+	      : TARGET_SHMEDIA32 ? SImode : DImode);
+
+#define MAX_FIXED_MODE_SIZE (TARGET_SH5 ? 128 : 64)
+
+/* Better to allocate once the maximum space for outgoing args in the
+   prologue rather than duplicate around each call.  */
+#define ACCUMULATE_OUTGOING_ARGS TARGET_ACCUMULATE_OUTGOING_ARGS
+
+#define NUM_MODES_FOR_MODE_SWITCHING { FP_MODE_NONE }
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+
+#define ACTUAL_NORMAL_MODE(ENTITY) \
+  (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
+
+#define NORMAL_MODE(ENTITY) \
+  (sh_cfun_interrupt_handler_p () \
+   ? (TARGET_FMOVD ? FP_MODE_DOUBLE : FP_MODE_NONE) \
+   : ACTUAL_NORMAL_MODE (ENTITY))
+
+#define MODE_ENTRY(ENTITY) NORMAL_MODE (ENTITY)
+
+#define MODE_EXIT(ENTITY) \
+  (sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (ENTITY))
+
+#define EPILOGUE_USES(REGNO) ((TARGET_SH2E || TARGET_SH4) \
+			      && (REGNO) == FPSCR_REG)
+
+#define MODE_NEEDED(ENTITY, INSN)					\
+  (recog_memoized (INSN) >= 0						\
+   ? get_attr_fp_mode (INSN)						\
+   : FP_MODE_NONE)
+
+#define MODE_AFTER(ENTITY, MODE, INSN)		\
+     (TARGET_HITACHI				\
+      && recog_memoized (INSN) >= 0		\
+      && get_attr_fp_set (INSN) != FP_SET_NONE	\
+      ? (int) get_attr_fp_set (INSN)		\
+      : (MODE))
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) \
+  ((TARGET_FPU_SINGLE != 0) ^ (N) ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+  fpscr_set_from_mem ((MODE), (HARD_REGS_LIVE))
+
+#define MD_CAN_REDIRECT_BRANCH(INSN, SEQ) \
+  sh_can_redirect_branch ((INSN), (SEQ))
+
+#define DWARF_FRAME_RETURN_COLUMN \
+  (TARGET_SH5 ? DWARF_FRAME_REGNUM (PR_MEDIA_REG) : DWARF_FRAME_REGNUM (PR_REG))
+
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 4 ? (N) + (TARGET_SH5 ? 2U : 4U) : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_REGNO STATIC_CHAIN_REGNUM
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, EH_RETURN_STACKADJ_REGNO)
+
+/* We have to distinguish between code and data, so that we apply
+   datalabel where and only where appropriate.  Use sdataN for data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+  | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
+  | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do { \
+    if (((ENCODING) & 0xf) != DW_EH_PE_sdata4 \
+	&& ((ENCODING) & 0xf) != DW_EH_PE_sdata8) \
+      { \
+	gcc_assert (GET_CODE (ADDR) == SYMBOL_REF); \
+	SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
+	if (0) goto DONE; \
+      } \
+  } while (0)
+
+#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
+/* SH constant pool breaks the devices in crtstuff.c to control section
+   in where code resides.  We have to write it as asm code.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+   asm (SECTION_OP "\n\
+	mov.l	1f,r1\n\
+	mova	2f,r0\n\
+	braf	r1\n\
+	lds	r0,pr\n\
+0:	.p2align 2\n\
+1:	.long	" USER_LABEL_PREFIX #FUNC " - 0b\n\
+2:\n" TEXT_SECTION_ASM_OP);
+#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
+
+#endif /* ! GCC_SH_H */
diff --git a/gcc-4.9/gcc/config/sh/sh.md b/gcc-4.9/gcc/config/sh/sh.md
new file mode 100644
index 000000000..ab1f0a51c
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.md
@@ -0,0 +1,15960 @@
+;;- Machine description for Renesas / SuperH SH.
+;;  Copyright (C) 1993-2014 Free Software Foundation, Inc.
+;;  Contributed by Steve Chamberlain (sac@cygnus.com).
+;;  Improved by Jim Wilson (wilson@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ??? Should prepend a * to all pattern names which are not used.
+;; This will make the compiler smaller, and rebuilds after changes faster.
+
+;; ??? Should be enhanced to include support for many more GNU superoptimizer
+;; sequences.  Especially the sequences for arithmetic right shifts.
+
+;; ??? Should check all DImode patterns for consistency and usefulness.
+
+;; ??? The MAC.W and MAC.L instructions are not supported.  There is no
+;; way to generate them.
+
+;; BSR is not generated by the compiler proper, but when relaxing, it
+;; generates .uses pseudo-ops that allow linker relaxation to create
+;; BSR.  This is actually implemented in bfd/{coff,elf32}-sh.c
+
+;; Special constraints for SH machine description:
+;;
+;;    t -- T
+;;    x -- mac
+;;    l -- pr
+;;    z -- r0
+;;
+;; Special formats used for outputting SH instructions:
+;;
+;;   %.  --  print a .s if insn needs delay slot
+;;   %@  --  print rte/rts if is/isn't an interrupt function
+;;   %#  --  output a nop if there is nothing to put in the delay slot
+;;   %O  --  print a constant without the #
+;;   %R  --  print the lsw reg of a double
+;;   %S  --  print the msw reg of a double
+;;   %T  --  print next word of a double REG or MEM
+;;
+;; Special predicates:
+;;
+;;  arith_operand          -- operand is valid source for arithmetic op
+;;  arith_reg_operand      -- operand is valid register for arithmetic op
+;;  general_movdst_operand -- operand is valid move destination
+;;  general_movsrc_operand -- operand is valid move source
+;;  logical_operand        -- operand is valid source for logical op
+
+;; -------------------------------------------------------------------------
+;; Constants
+;; -------------------------------------------------------------------------
+
+(define_constants [
+  (AP_REG	145)
+  (PR_REG	146)
+  (T_REG	147)
+  (GBR_REG	144)
+  (MACH_REG	148)
+  (MACL_REG	149)
+  (FPUL_REG	150)
+  (RAP_REG	152)
+
+  (FPSCR_REG	151)
+
+  (PIC_REG	12)
+  (FP_REG	14)
+  (SP_REG	15)
+
+  (PR_MEDIA_REG	18)
+  (T_MEDIA_REG	19)
+
+  (R0_REG	0)
+  (R1_REG	1)
+  (R2_REG	2)
+  (R3_REG	3)
+  (R4_REG	4)
+  (R5_REG	5)
+  (R6_REG	6)
+  (R7_REG	7)
+  (R8_REG	8)
+  (R9_REG	9)
+  (R10_REG	10)
+  (R20_REG	20)
+  (R21_REG	21)
+  (R22_REG	22)
+  (R23_REG	23)
+
+  (DR0_REG	64)
+  (DR2_REG	66)
+  (DR4_REG	68)
+  (FR23_REG	87)
+
+  (TR0_REG	128)
+  (TR1_REG	129)
+  (TR2_REG	130)
+
+  (XD0_REG	136)
+
+  ;; These are used with unspec.
+  (UNSPEC_COMPACT_ARGS	0)
+  (UNSPEC_MOVA		1)
+  (UNSPEC_CASESI	2)
+  (UNSPEC_DATALABEL	3)
+  (UNSPEC_BBR		4)
+  (UNSPEC_SFUNC		5)
+  (UNSPEC_PIC		6)
+  (UNSPEC_GOT		7)
+  (UNSPEC_GOTOFF	8)
+  (UNSPEC_PLT		9)
+  (UNSPEC_CALLER	10)
+  (UNSPEC_GOTPLT	11)
+  (UNSPEC_ICACHE	12)
+  (UNSPEC_INIT_TRAMP	13)
+  (UNSPEC_FCOSA		14)
+  (UNSPEC_FSRRA		15)
+  (UNSPEC_FSINA		16)
+  (UNSPEC_NSB		17)
+  (UNSPEC_ALLOCO	18)
+  (UNSPEC_TLSGD		20)
+  (UNSPEC_TLSLDM	21)
+  (UNSPEC_TLSIE		22)
+  (UNSPEC_DTPOFF	23)
+  (UNSPEC_GOTTPOFF	24)
+  (UNSPEC_TPOFF		25)
+  (UNSPEC_RA		26)
+  (UNSPEC_DIV_INV_M0	30)
+  (UNSPEC_DIV_INV_M1	31)
+  (UNSPEC_DIV_INV_M2	32)
+  (UNSPEC_DIV_INV_M3	33)
+  (UNSPEC_DIV_INV20	34)
+  (UNSPEC_DIV_INV_TABLE	37)
+  (UNSPEC_ASHIFTRT	35)
+  (UNSPEC_THUNK		36)
+  (UNSPEC_CHKADD	38)
+  (UNSPEC_SP_SET	40)
+  (UNSPEC_SP_TEST	41)
+  (UNSPEC_MOVUA		42)
+
+  ;; (unspec [VAL SHIFT] UNSPEC_EXTRACT_S16) computes (short) (VAL >> SHIFT).
+  ;; UNSPEC_EXTRACT_U16 is the unsigned equivalent.
+  (UNSPEC_EXTRACT_S16	43)
+  (UNSPEC_EXTRACT_U16	44)
+
+  ;; (unspec [TARGET ANCHOR] UNSPEC_SYMOFF) == TARGET - ANCHOR.
+  (UNSPEC_SYMOFF	45)
+
+  ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
+  (UNSPEC_PCREL_SYMOFF	46)
+
+  ;; Misc builtins
+  (UNSPEC_BUILTIN_STRLEN 47)
+
+  ;; These are used with unspec_volatile.
+  (UNSPECV_BLOCKAGE	0)
+  (UNSPECV_ALIGN	1)
+  (UNSPECV_CONST2	2)
+  (UNSPECV_CONST4	4)
+  (UNSPECV_CONST8	6)
+  (UNSPECV_WINDOW_END	10)
+  (UNSPECV_CONST_END	11)
+  (UNSPECV_EH_RETURN	12)
+  (UNSPECV_GBR		13)
+  (UNSPECV_SP_SWITCH_B  14)
+  (UNSPECV_SP_SWITCH_E  15)
+])
+
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+;; Target CPU.
+
+(define_attr "cpu"
+ "sh1,sh2,sh2e,sh2a,sh3,sh3e,sh4,sh4a,sh5"
+  (const (symbol_ref "sh_cpu_attr")))
+
+(define_attr "endian" "big,little"
+ (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
+		      (const_string "little") (const_string "big"))))
+
+;; Indicate if the default fpu mode is single precision.
+(define_attr "fpu_single" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FPU_SINGLE")
+		       (const_string "yes") (const_string "no"))))
+
+(define_attr "fmovd" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FMOVD")
+		       (const_string "yes") (const_string "no"))))
+;; pipeline model
+(define_attr "pipe_model" "sh1,sh4,sh5media"
+  (const
+   (cond [(symbol_ref "TARGET_SHMEDIA") (const_string "sh5media")
+          (symbol_ref "TARGET_SUPERSCALAR") (const_string "sh4")]
+         (const_string "sh1"))))
+
+;; cbranch	conditional branch instructions
+;; jump		unconditional jumps
+;; arith	ordinary arithmetic
+;; arith3	a compound insn that behaves similarly to a sequence of
+;;		three insns of type arith
+;; arith3b	like above, but might end with a redirected branch
+;; load		from memory
+;; load_si	Likewise, SImode variant for general register.
+;; fload	Likewise, but load to fp register.
+;; store	to memory
+;; fstore	floating point register to memory
+;; move		general purpose register to register
+;; movi8	8-bit immediate to general purpose register
+;; mt_group	other sh4 mt instructions
+;; fmove	register to register, floating point
+;; smpy		word precision integer multiply
+;; dmpy		longword or doublelongword precision integer multiply
+;; return	rts
+;; pload	load of pr reg, which can't be put into delay slot of rts
+;; prset	copy register to pr reg, ditto
+;; pstore	store of pr reg, which can't be put into delay slot of jsr
+;; prget	copy pr to register, ditto
+;; pcload	pc relative load of constant value
+;; pcfload	Likewise, but load to fp register.
+;; pcload_si	Likewise, SImode variant for general register.
+;; rte		return from exception
+;; sfunc	special function call with known used registers
+;; call		function call
+;; fp		floating point
+;; fpscr_toggle	toggle a bit in the fpscr
+;; fdiv		floating point divide (or square root)
+;; gp_fpul	move from general purpose register to fpul
+;; fpul_gp	move from fpul to general purpose register
+;; mac_gp	move from mac[lh] to general purpose register
+;; gp_mac	move from general purpose register to mac[lh]
+;; mac_mem	move from mac[lh] to memory
+;; mem_mac	move from memory to mac[lh]
+;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv
+;; ftrc_s	fix_truncsfsi2_i4
+;; dfdiv	double precision floating point divide (or square root)
+;; cwb		ic_invalidate_line_i
+;; movua	SH4a unaligned load
+;; fsrra	square root reciprocal approximate
+;; fsca		sine and cosine approximate
+;; tls_load     load TLS related address
+;; arith_media	SHmedia arithmetic, logical, and shift instructions
+;; cbranch_media SHmedia conditional branch instructions
+;; cmp_media	SHmedia compare instructions
+;; dfdiv_media	SHmedia double precision divide and square root
+;; dfmul_media	SHmedia double precision multiply instruction
+;; dfparith_media SHmedia double precision floating point arithmetic
+;; dfpconv_media SHmedia double precision floating point conversions
+;; dmpy_media	SHmedia longword multiply
+;; fcmp_media	SHmedia floating point compare instructions
+;; fdiv_media	SHmedia single precision divide and square root
+;; fload_media	SHmedia floating point register load instructions
+;; fmove_media	SHmedia floating point register moves (inc. fabs and fneg)
+;; fparith_media SHmedia single precision floating point arithmetic
+;; fpconv_media	SHmedia single precision floating point conversions
+;; fstore_media	SHmedia floating point register store instructions
+;; gettr_media	SHmedia gettr instruction
+;; invalidate_line_media SHmedia invalidate_line sequence
+;; jump_media	SHmedia unconditional branch instructions
+;; load_media	SHmedia general register load instructions
+;; pt_media	SHmedia pt instruction (expanded by assembler)
+;; ptabs_media	SHmedia ptabs instruction
+;; store_media	SHmedia general register store instructions
+;; mcmp_media	SHmedia multimedia compare, absolute, saturating ops
+;; mac_media	SHmedia mac-style fixed point operations
+;; d2mpy_media	SHmedia: two 32-bit integer multiplies
+;; atrans_media	SHmedia approximate transcendental functions
+;; ustore_media	SHmedia unaligned stores
+;; nil		no-op move, will be deleted.
+
+(define_attr "type"
+ "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,
+  fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore,
+  prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s,
+  dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,
+  gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,
+  arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,
+  dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,
+  fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,
+  jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,
+  d2mpy_media,atrans_media,ustore_media,nil,other"
+  (const_string "other"))
+
+;; We define a new attribute namely "insn_class".We use
+;; this for the DFA based pipeline description.
+;;
+;; mt_group      SH4 "mt" group instructions.
+;;
+;; ex_group      SH4 "ex" group instructions.
+;;
+;; ls_group      SH4 "ls" group instructions.
+;;
+(define_attr "insn_class"
+  "mt_group,ex_group,ls_group,br_group,fe_group,co_group,none"
+  (cond [(eq_attr "type" "move,mt_group") (const_string "mt_group")
+	 (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group")
+	 (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,
+			  store,fstore,gp_fpul,fpul_gp") (const_string "ls_group")
+	 (eq_attr "type" "cbranch,jump") (const_string "br_group")
+	 (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
+	   (const_string "fe_group")
+	 (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,
+			  prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb,
+			  gp_mac,mac_mem,mem_mac") (const_string "co_group")]
+	(const_string "none")))
+
+;; nil are zero instructions, and arith3 / arith3b are multiple instructions,
+;; so these do not belong in an insn group, although they are modeled
+;; with their own define_insn_reservations.
+
+;; Indicate what precision must be selected in fpscr for this insn, if any.
+(define_attr "fp_mode" "single,double,none" (const_string "none"))
+
+;; Indicate if the fpu mode is set by this instruction
+;; "unknown" must have the value as "none" in fp_mode, and means
+;; that the instruction/abi has left the processor in an unknown
+;; state.
+;; "none" means that nothing has changed and no mode is set.
+;; This attribute is only used for the Renesas ABI.
+(define_attr "fp_set" "single,double,unknown,none" (const_string "none"))
+
+; If a conditional branch destination is within -252..258 bytes away
+; from the instruction it can be 2 bytes long.  Something in the
+; range -4090..4100 bytes can be 6 bytes long.  All other conditional
+; branches are initially assumed to be 16 bytes long.
+; In machine_dependent_reorg, we split all branches that are longer than
+; 2 bytes.
+
+;; The maximum range used for SImode constant pool entries is 1018.  A final
+;; instruction can add 8 bytes while only being 4 bytes in size, thus we
+;; can have a total of 1022 bytes in the pool.  Add 4 bytes for a branch
+;; instruction around the pool table, 2 bytes of alignment before the table,
+;; and 30 bytes of alignment after the table.  That gives a maximum total
+;; pool size of 1058 bytes.
+;; Worst case code/pool content size ratio is 1:2 (using asms).
+;; Thus, in the worst case, there is one instruction in front of a maximum
+;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of
+;; code.  For the last n bytes of code, there are 2n + 36 bytes of pool.
+;; If we have a forward branch, the initial table will be put after the
+;; unconditional branch.
+;;
+;; ??? We could do much better by keeping track of the actual pcloads within
+;; the branch range and in the pcload range in front of the branch range.
+
+;; ??? This looks ugly because genattrtab won't allow if_then_else or cond
+;; inside an le.
+(define_attr "short_cbranch_p" "no,yes"
+  (cond [(match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506))
+	 (const_string "yes")
+	 (match_test "NEXT_INSN (PREV_INSN (insn)) != insn")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "med_branch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990))
+	      (const_int 1988))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4092))
+	      (const_int 8186))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "med_cbranch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988))
+	      (const_int 1986))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4090))
+	       (const_int 8184))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_branch_p" "no,yes"
+  (cond [(match_test "! TARGET_SH2")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10330))
+	      (const_int 20660))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32764))
+	      (const_int 65530))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_cbranch_p" "no,yes"
+  (cond [(match_test "! TARGET_SH2")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10328))
+	      (const_int 20658))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32762))
+	      (const_int 65528))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+;; An unconditional jump in the range -4092..4098 can be 2 bytes long.
+;; For wider ranges, we need a combination of a code and a data part.
+;; If we can get a scratch register for a long range jump, the code
+;; part can be 4 bytes long; otherwise, it must be 8 bytes long.
+;; If the jump is in the range -32764..32770, the data part can be 2 bytes
+;; long; otherwise, it must be 6 bytes long.
+
+;; All other instructions are two bytes long by default.
+
+;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)),
+;; but getattrtab doesn't understand this.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "cbranch")
+	 (cond [(eq_attr "short_cbranch_p" "yes")
+		(const_int 2)
+		(eq_attr "med_cbranch_p" "yes")
+		(const_int 6)
+		(eq_attr "braf_cbranch_p" "yes")
+		(const_int 12)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 14)
+		(match_test "flag_pic")
+		(const_int 24)
+		] (const_int 16))
+	 (eq_attr "type" "jump")
+	 (cond [(eq_attr "med_branch_p" "yes")
+		(const_int 2)
+		(and (match_test "prev_nonnote_insn (insn)")
+		     (and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "INSN"))
+			  (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "code_for_indirect_jump_scratch"))))
+		(cond [(eq_attr "braf_branch_p" "yes")
+		       (const_int 6)
+		       (not (match_test "flag_pic"))
+		       (const_int 10)
+		       (match_test "TARGET_SH2")
+		       (const_int 10)] (const_int 18))
+		(eq_attr "braf_branch_p" "yes")
+		(const_int 10)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 12)
+		(match_test "flag_pic")
+		(const_int 22)
+		] (const_int 14))
+	 (eq_attr "type" "pt_media")
+	 (if_then_else (match_test "TARGET_SHMEDIA64")
+		       (const_int 20) (const_int 12))
+	 (and (eq_attr "type" "jump_media")
+	      (match_test "TARGET_SH5_CUT2_WORKAROUND"))
+	 (const_int 8)
+	 ] (if_then_else (match_test "TARGET_SHMEDIA")
+			 (const_int 4)
+			 (const_int 2))))
+
+;; DFA descriptions for the pipelines
+
+(include "sh1.md")
+(include "shmedia.md")
+(include "sh4.md")
+
+(include "iterators.md")
+(include "predicates.md")
+(include "constraints.md")
+
+;; Definitions for filling delay slots
+
+(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
+
+(define_attr "banked" "yes,no" 
+	(cond [(match_test "sh_loads_bankedreg_p (insn)")
+	       (const_string "yes")]
+	      (const_string "no")))
+
+;; ??? This should be (nil) instead of (const_int 0)
+(define_attr "hit_stack" "yes,no"
+	(cond [(not (match_test "find_regno_note (insn, REG_INC, SP_REG)"))
+	       (const_string "no")]
+	      (const_string "yes")))
+
+(define_attr "interrupt_function" "no,yes"
+  (const (symbol_ref "current_function_interrupt")))
+
+(define_attr "in_delay_slot" "yes,no"
+  (cond [(eq_attr "type" "cbranch") (const_string "no")
+	 (eq_attr "type" "pcload,pcload_si") (const_string "no")
+	 (eq_attr "needs_delay_slot" "yes") (const_string "no")
+	 (eq_attr "length" "2") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "cond_delay_slot" "yes,no"
+  (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "is_sfunc" ""
+  (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))
+
+(define_attr "is_mac_media" ""
+  (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0)))
+
+(define_attr "branch_zero" "yes,no"
+  (cond [(eq_attr "type" "!cbranch") (const_string "no")
+	 (ne (symbol_ref "(next_active_insn (insn)\
+			   == (prev_active_insn\
+			       (XEXP (SET_SRC (PATTERN (insn)), 1))))\
+			  && get_attr_length (next_active_insn (insn)) == 2")
+	     (const_int 0))
+	 (const_string "yes")]
+	(const_string "no")))
+
+;; SH4 Double-precision computation with double-precision result -
+;; the two halves are ready at different times.
+(define_attr "dfp_comp" "yes,no"
+  (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")]
+	(const_string "no")))
+
+;; Insns for which the latency of a preceding fp insn is decreased by one.
+(define_attr "late_fp_use" "yes,no" (const_string "no"))
+;; And feeding insns for which this relevant.
+(define_attr "any_fp_comp" "yes,no"
+  (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "any_int_load" "yes,no"
+  (cond [(eq_attr "type" "load,load_si,pcload,pcload_si")
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "highpart" "user, ignore, extend, depend, must_split"
+  (const_string "user"))
+
+(define_delay
+  (eq_attr "needs_delay_slot" "yes")
+  [(eq_attr "in_delay_slot" "yes") (nil) (nil)])
+
+;; Since a normal return (rts) implicitly uses the PR register,
+;; we can't allow PR register loads in an rts delay slot.
+;; On the SH1* and SH2*, the rte instruction reads the return pc from the
+;; stack, and thus we can't put a pop instruction in its delay slot.
+;; On the SH3* and SH4*, the rte instruction does not use the stack, so a
+;; pop instruction can go in the delay slot, unless it references a banked
+;; register (the register bank is switched by rte).
+(define_delay
+  (eq_attr "type" "return")
+  [(and (eq_attr "in_delay_slot" "yes")
+	(ior (and (eq_attr "interrupt_function" "no")
+		  (eq_attr "type" "!pload,prset"))
+	     (and (eq_attr "interrupt_function" "yes")
+		  (ior (match_test "TARGET_SH3") (eq_attr "hit_stack" "no"))
+		  (eq_attr "banked" "no"))))
+   (nil) (nil)])
+
+;; Since a call implicitly uses the PR register, we can't allow
+;; a PR register store in a jsr delay slot.
+
+(define_delay
+  (ior (eq_attr "type" "call") (eq_attr "type" "sfunc"))
+  [(and (eq_attr "in_delay_slot" "yes")
+	(eq_attr "type" "!pstore,prget")) (nil) (nil)])
+
+;; Say that we have annulled true branches, since this gives smaller and
+;; faster code when branches are predicted as not taken.
+
+;; ??? The non-annulled condition should really be "in_delay_slot",
+;; but insns that can be filled in non-annulled get priority over insns
+;; that can only be filled in anulled.
+
+(define_delay
+  (and (eq_attr "type" "cbranch")
+       (match_test "TARGET_SH2"))
+  ;; SH2e has a hardware bug that pretty much prohibits the use of
+  ;; annulled delay slots.
+  [(eq_attr "cond_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes")
+					  (not (eq_attr "cpu" "sh2e"))) (nil)])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+;; Various patterns to generate the TST #imm, R0 instruction.
+;; Although this adds some pressure on the R0 register, it can potentially
+;; result in faster code, even if the operand has to be moved to R0 first.
+;; This is because on SH4 TST #imm, R0 and MOV Rm, Rn are both MT group 
+;; instructions and thus will be executed in parallel.  On SH4A TST #imm, R0
+;; is an EX group instruction but still can be executed in parallel with the
+;; MT group MOV Rm, Rn instruction.
+
+;; Usual TST #imm, R0 patterns for SI, HI and QI
+;; This is usually used for bit patterns other than contiguous bits 
+;; and single bits.
+(define_insn "tstsi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (and:SI (match_operand:SI 0 "logical_operand" "%z,r")
+		       (match_operand:SI 1 "logical_operand" "K08,r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "tst	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "tsthi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (subreg:SI (and:HI (match_operand:HI 0 "logical_operand" "%z")
+				  (match_operand 1 "const_int_operand")) 0)
+	       (const_int 0)))]
+  "TARGET_SH1
+   && CONST_OK_FOR_K08 (INTVAL (operands[1]))"
+  "tst	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "tstqi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (subreg:SI (and:QI (match_operand:QI 0 "logical_operand" "%z")
+				  (match_operand 1 "const_int_operand")) 0)
+	       (const_int 0)))]
+  "TARGET_SH1
+   && (CONST_OK_FOR_K08 (INTVAL (operands[1])) 
+       || CONST_OK_FOR_I08 (INTVAL (operands[1])))"
+{
+  operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+  return "tst	%1,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; Test low QI subreg against zero.
+;; This avoids unnecessary zero extension before the test.
+(define_insn "*tstqi_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:QI 0 "logical_operand" "z") (const_int 0)))]
+  "TARGET_SH1"
+  "tst	#255,%0"
+  [(set_attr "type" "mt_group")])
+
+;; This pattern might be risky because it also tests the upper bits and not
+;; only the subreg.  However, it seems that combine will get to this only
+;; when testing sign/zero extended values.  In this case the extended upper
+;; bits do not matter.
+(define_insn "*tst<mode>_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI
+	  (subreg:QIHI
+	    (and:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		    (match_operand:SI 1 "arith_reg_operand" "r")) <lowpart_le>)
+	  (const_int 0)))]
+  "TARGET_SH1 && TARGET_LITTLE_ENDIAN"
+  "tst	%0,%1"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "*tst<mode>_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI
+	  (subreg:QIHI
+	    (and:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		    (match_operand:SI 1 "arith_reg_operand" "r")) <lowpart_be>)
+	  (const_int 0)))]
+  "TARGET_SH1 && TARGET_BIG_ENDIAN"
+  "tst	%0,%1"
+  [(set_attr "type" "mt_group")])
+
+;; Extract LSB, negate and store in T bit.
+(define_insn "tstsi_t_and_not"
+  [(set (reg:SI T_REG)
+	 (and:SI (not:SI (match_operand:SI 0 "logical_operand" "z"))
+		 (const_int 1)))]
+  "TARGET_SH1"
+  "tst	#1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract contiguous bits and compare them against zero.
+(define_insn "tst<mode>_t_zero_extract_eq"
+  [(set (reg:SI T_REG)
+	(eq:SI (zero_extract:SI (match_operand:QIHISIDI 0 "logical_operand" "z")
+				(match_operand:SI 1 "const_int_operand")
+				(match_operand:SI 2 "const_int_operand"))
+	       (const_int 0)))]
+  "TARGET_SH1
+   && CONST_OK_FOR_K08 (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]))"
+{
+  operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]));
+  return "tst	%1,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; This split is required when testing bits in a QI subreg.
+(define_split
+  [(set (reg:SI T_REG)
+	(eq:SI
+	  (if_then_else:SI
+	    (zero_extract:SI (match_operand 0 "logical_operand")
+			     (match_operand 1 "const_int_operand")
+			     (match_operand 2 "const_int_operand"))
+	    (match_operand 3 "const_int_operand")
+	    (const_int 0))
+	  (const_int 0)))]
+  "TARGET_SH1
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+  [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 3))
+			      (const_int 0)))]
+{
+  if (GET_MODE (operands[0]) == QImode)
+    operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+})
+
+;; Extract single bit, negate and store it in the T bit.
+;; Not used for SH4A.
+(define_insn "tstsi_t_zero_extract_xor"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+				 (match_operand:SI 3 "const_int_operand"))
+			 (match_operand:SI 1 "const_int_operand")
+			 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+  "tst	%3,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A little endian.
+(define_insn "tstsi_t_zero_extract_subreg_xor_little"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	 (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			    (match_operand:SI 3 "const_int_operand")) 0)
+	 (match_operand:SI 1 "const_int_operand")
+	 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1 && TARGET_LITTLE_ENDIAN
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+      == (INTVAL (operands[3]) & 255)
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+  return "tst	%3,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A big endian.
+(define_insn "tstsi_t_zero_extract_subreg_xor_big"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	 (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			    (match_operand:SI 3 "const_int_operand")) 3)
+	 (match_operand:SI 1 "const_int_operand")
+	 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1 && TARGET_BIG_ENDIAN
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+      == (INTVAL (operands[3]) & 255)
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+  return "tst	%3,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+(define_insn "cmpeqsi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r")
+	       (match_operand:SI 1 "arith_operand" "N,rI08,r")))]
+  "TARGET_SH1"
+  "@
+	tst	%0,%0
+	cmp/eq	%1,%0
+	cmp/eq	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; FIXME: For some reason, on SH4A and SH2A combine fails to simplify this
+;; pattern by itself.  What this actually does is:
+;;	x == 0: (1 >> 0-0) & 1 = 1
+;;	x != 0: (1 >> 0-x) & 1 = 0
+;; Without this the test pr51244-8.c fails on SH2A and SH4A.
+(define_insn_and_split "*cmpeqsi_t"
+  [(set (reg:SI T_REG)
+	(and:SI (lshiftrt:SI
+		  (const_int 1)
+		  (neg:SI (match_operand:SI 0 "arith_reg_operand" "r")))
+		(const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))])
+
+(define_insn "cmpgtsi_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+	       (match_operand:SI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+  "@
+	cmp/pl	%0
+	cmp/gt	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgesi_t"
+  [(set (reg:SI T_REG)
+	(ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+	       (match_operand:SI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+  "@
+	cmp/pz	%0
+	cmp/ge	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; FIXME: This is actually wrong.  There is no way to literally move a
+;; general reg to t reg.  Luckily, it seems that this pattern will be only
+;; used when the general reg is known be either '0' or '1' during combine.
+;; What we actually need is reg != 0 -> T, but we have only reg == 0 -> T.
+;; Due to interactions with other patterns, combine fails to pick the latter
+;; and invert the dependent logic.
+(define_insn "*negtstsi"
+  [(set (reg:SI T_REG) (match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "cmp/pl	%0"
+  [(set_attr "type" "mt_group")])
+
+;; Some integer sign comparison patterns can be realized with the div0s insn.
+;;	div0s	Rm,Rn		T = (Rm >> 31) ^ (Rn >> 31)
+(define_insn "cmp_div0s_0"
+  [(set (reg:SI T_REG)
+	(lshiftrt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+			     (match_operand:SI 1 "arith_reg_operand" "r"))
+		     (const_int 31)))]
+  "TARGET_SH1"
+  "div0s	%0,%1"
+  [(set_attr "type" "arith")])
+
+(define_insn "cmp_div0s_1"
+  [(set (reg:SI T_REG)
+	(lt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		       (match_operand:SI 1 "arith_reg_operand" "r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "div0s	%0,%1"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*cmp_div0s_0"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(lshiftrt:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "")
+			     (match_operand:SI 2 "arith_reg_operand" ""))
+		     (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(lshiftrt:SI (xor:SI (match_dup 1) (match_dup 2)) (const_int 31)))
+   (set (match_dup 0) (reg:SI T_REG))])
+
+(define_insn "*cmp_div0s_0"
+  [(set (reg:SI T_REG)
+	(eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand")
+			    (const_int 31))
+	       (ge:SI (match_operand:SI 1 "arith_reg_operand")
+		      (const_int 0))))]
+  "TARGET_SH1"
+  "div0s	%0,%1"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*cmp_div0s_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ge:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "")
+		       (match_operand:SI 2 "arith_reg_operand" ""))
+	       (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+;; We have to go through the movnegt expander here which will handle the
+;; SH2A vs non-SH2A cases.
+{
+  emit_insn (gen_cmp_div0s_1 (operands[1], operands[2]));
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn_and_split "*cmp_div0s_1"
+  [(set (reg:SI T_REG)
+	(ge:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "")
+		       (match_operand:SI 1 "arith_reg_operand" ""))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1))
+			      (const_int 0)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
+(define_insn_and_split "*cmp_div0s_1"
+  [(set (reg:SI T_REG)
+	(eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand")
+			    (const_int 31))
+	       (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			    (const_int 31))))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1))
+			      (const_int 0)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
+;; -------------------------------------------------------------------------
+;; SImode compare and branch
+;; -------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "arith_operand" "")
+			 (match_operand:SI 2 "arith_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
+					   operands[2], operands[3]));
+  else
+    expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1);
+
+  DONE;
+})
+
+;; Combine patterns to invert compare and branch operations for which we
+;; don't have actual comparison insns.  These patterns are used in cases
+;; which appear after the initial cbranchsi expansion, which also does
+;; some condition inversion.
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "arith_reg_operand" "")
+			  (match_operand:SI 1 "arith_reg_or_0_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+;; FIXME: Similar to the *cmpeqsi_t pattern above, for some reason, on SH4A
+;; and SH2A combine fails to simplify this pattern by itself.
+;; What this actually does is:
+;;	x == 0: (1 >> 0-0) & 1 = 1
+;;	x != 0: (1 >> 0-x) & 1 = 0
+;; Without this the test pr51244-8.c fails on SH2A and SH4A.
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (eq (and:SI (lshiftrt:SI
+			(const_int 1)
+			(neg:SI (match_operand:SI 0 "arith_reg_operand" "")))
+		      (const_int 1))
+	      (const_int 0))
+	  (label_ref (match_operand 2))
+	  (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+;; FIXME: These could probably use code iterators for the compare op.
+(define_split
+  [(set (pc)
+	(if_then_else (le (match_operand:SI 0 "arith_reg_operand" "")
+			  (match_operand:SI 1 "arith_reg_or_0_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (gt:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+(define_split
+  [(set (pc)
+	(if_then_else (lt (match_operand:SI 0 "arith_reg_operand" "")
+			  (match_operand:SI 1 "arith_reg_or_0_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+(define_split
+  [(set (pc)
+	(if_then_else (leu (match_operand:SI 0 "arith_reg_operand" "")
+			   (match_operand:SI 1 "arith_reg_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (gtu:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ltu (match_operand:SI 0 "arith_reg_operand" "")
+			   (match_operand:SI 1 "arith_reg_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (geu:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+;; Compare and branch combine patterns for div0s comparisons.
+(define_insn_and_split "*cbranch_div0s"
+  [(set (pc)
+	(if_then_else (lt (xor:SI (match_operand:SI 0 "arith_reg_operand" "")
+				  (match_operand:SI 1 "arith_reg_operand" ""))
+			  (const_int 0))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0)))
+   (set (pc)
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (label_ref (match_dup 2))
+		      (pc)))])
+
+(define_insn_and_split "*cbranch_div0s"
+  [(set (pc)
+	(if_then_else (ge (xor:SI (match_operand:SI 0 "arith_reg_operand" "")
+				  (match_operand:SI 1 "arith_reg_operand" ""))
+			  (const_int 0))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+		      (label_ref (match_dup 2))
+		      (pc)))])
+
+;; Conditional move combine pattern for div0s comparisons.
+;; This is used when TARGET_PRETEND_CMOVE is in effect.
+(define_insn_and_split "*movsicc_div0s"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (ge (xor:SI (match_operand:SI 1 "arith_reg_operand" "")
+				     (match_operand:SI 2 "arith_reg_operand" ""))
+			     (const_int 0))
+			 (match_operand:SI 3 "arith_reg_operand" "")
+			 (match_operand:SI 4 "general_movsrc_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_PRETEND_CMOVE"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 1) (match_dup 2))
+			      (const_int 0)))
+   (set (match_dup 0)
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_dup 4)
+		      (match_dup 3)))])
+
+(define_insn_and_split "*movsicc_div0s"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(if_then_else:SI (eq (lshiftrt:SI
+				(match_operand:SI 1 "arith_reg_operand")
+				(const_int 31))
+			     (lshiftrt:SI
+				(match_operand:SI 2 "arith_reg_operand")
+				(const_int 31)))
+			 (match_operand:SI 3 "arith_reg_operand")
+			 (match_operand:SI 4 "general_movsrc_operand")))
+   (clobber (reg:SI T_REG))]
+   "TARGET_PRETEND_CMOVE"
+   "#"
+   "&& 1"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 1) (match_dup 2))
+			      (const_int 0)))
+   (set (match_dup 0)
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_dup 4)
+		      (match_dup 3)))])
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+;; Usually comparisons of 'unsigned int >= 0' are optimized away completely.
+;; However, especially when optimizations are off (e.g. -O0) such comparisons
+;; might remain and we have to handle them.  If the '>= 0' case wasn't
+;; handled here, something else would just load a '0' into the second operand
+;; and do the comparison.  We can do slightly better by just setting the
+;; T bit to '1'.
+(define_insn_and_split "cmpgeusi_t"
+  [(set (reg:SI T_REG)
+	(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		(match_operand:SI 1 "arith_reg_or_0_operand" "r")))]
+  "TARGET_SH1"
+  "cmp/hs	%1,%0"
+  "&& satisfies_constraint_Z (operands[1])"
+  [(set (reg:SI T_REG) (const_int 1))]
+  ""
+  [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgtusi_t"
+  [(set (reg:SI T_REG)
+	(gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		(match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "cmp/hi	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; -------------------------------------------------------------------------
+;; DImode compare and branch
+;; -------------------------------------------------------------------------
+
+;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism.
+;; Therefore, we aim to have a set of three branches that go straight to the
+;; destination, i.e. only one of them is taken at any one time.
+;; This mechanism should also be slightly better for the sh4-200.
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:DI 1 "arith_operand" "")
+			 (match_operand:DI 2 "arith_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_dup 4))
+   (clobber (reg:SI T_REG))]
+  "TARGET_CBRANCHDI4 || TARGET_SH2 || TARGET_SHMEDIA"
+{
+  enum rtx_code comparison;
+
+  if (TARGET_SHMEDIA)
+    {
+      emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
+					     operands[2], operands[3]));
+      DONE;
+    }
+  else if (!TARGET_CBRANCHDI4)
+    {
+      sh_emit_compare_and_branch (operands, DImode);
+      DONE;
+    }
+  else
+    {
+      if (expand_cbranchdi4 (operands, LAST_AND_UNUSED_RTX_CODE))
+	DONE;
+
+      comparison = prepare_cbranch_operands (operands, DImode,
+					     LAST_AND_UNUSED_RTX_CODE);
+      if (comparison != GET_CODE (operands[0]))
+	operands[0]
+	  = gen_rtx_fmt_ee (comparison, VOIDmode, operands[1], operands[2]);
+       operands[4] = gen_rtx_SCRATCH (SImode);
+    }
+})
+
+(define_insn_and_split "cbranchdi4_i"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:DI 1 "arith_operand" "r,r")
+			 (match_operand:DI 2 "arith_operand" "rN,I08")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_scratch:SI 4 "=X,&r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_CBRANCHDI4"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  if (!expand_cbranchdi4 (operands, GET_CODE (operands[0])))
+    FAIL;
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI T_REG)
+	(eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")
+		       (match_operand:DI 1 "arith_operand" "r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+{
+  return output_branchy_insn (EQ, "tst\t%S1,%S0;bf\t%l9;tst\t%R1,%R0",
+			      insn, operands);
+}
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_insn "cmpeqdi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+{
+  static const char* alt[] =
+  {
+       "tst	%S0,%S0"	"\n"
+    "	bf	0f"		"\n"
+    "	tst	%R0,%R0"	"\n"
+    "0:",
+
+       "cmp/eq	%S1,%S0"	"\n"
+    "	bf	0f"		"\n"
+    "	cmp/eq	%R1,%R0"	"\n"
+    "0:"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_split
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DI 0 "arith_reg_operand" "")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "")))]
+;; If we applied this split when not optimizing, it would only be
+;; applied during the machine-dependent reorg, when no new basic blocks
+;; may be created.
+  "TARGET_SH1 && reload_completed && optimize"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 6))
+			   (pc)))
+   (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5)))
+   (match_dup 6)]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = operands[1] == const0_rtx
+		? const0_rtx
+		: gen_highpart (SImode, operands[1]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+  operands[5] = gen_lowpart (SImode, operands[1]);
+  operands[6] = gen_label_rtx ();
+})
+
+(define_insn "cmpgtdi_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+{
+  static const char* alt[] =
+  {
+       "cmp/eq	%S1,%S0"	"\n"
+    "	bf{.|/}s	0f"	"\n"
+    "	cmp/gt	%S1,%S0"	"\n"
+    "	cmp/hi	%R1,%R0"	"\n"
+    "0:",
+
+        "tst	%S0,%S0"	"\n"
+    "	bf{.|/}s	0f"	"\n"
+    "	cmp/pl	%S0"		"\n"
+    "	cmp/hi	%S0,%R0"	"\n"
+    "0:"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgedi_t"
+  [(set (reg:SI T_REG)
+	(ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+{
+  static const char* alt[] =
+  {
+       "cmp/eq	%S1,%S0"	"\n"
+    "	bf{.|/}s	0f"	"\n"
+    "	cmp/ge	%S1,%S0"	"\n"
+    "	cmp/hs	%R1,%R0"	"\n"
+    "0:",
+
+       "cmp/pz	%S0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "8,2")
+   (set_attr "type" "arith3,mt_group")])
+
+;; -------------------------------------------------------------------------
+;; DImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeudi_t"
+  [(set (reg:SI T_REG)
+	(geu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+		(match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+{
+  return       "cmp/eq	%S1,%S0"	"\n"
+	 "	bf{.|/}s	0f"	"\n"
+	 "	cmp/hs	%S1,%S0"	"\n"
+	 "	cmp/hs	%R1,%R0"	"\n"
+	 "0:";
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgtudi_t"
+  [(set (reg:SI T_REG)
+	(gtu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+		(match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+{
+  return       "cmp/eq	%S1,%S0"	"\n"
+	 "	bf{.|/}s	0f"	"\n"
+	 "	cmp/hi	%S1,%S0"	"\n"
+	 "	cmp/hi	%R1,%R0"	"\n"
+	 "0:";
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpeqsi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "logical_operand" "%r")
+	       (match_operand:SI 2 "cmp_operand" "Nr")))]
+  "TARGET_SHMEDIA"
+  "cmpeq	%1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpeqdi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:DI 1 "register_operand" "%r")
+	       (match_operand:DI 2 "cmp_operand" "Nr")))]
+  "TARGET_SHMEDIA"
+  "cmpeq	%1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtsi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:SI 1 "cmp_operand" "Nr")
+	       (match_operand:SI 2 "cmp_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgt	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtdi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr")
+	       (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgt	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtusi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gtu:SI (match_operand:SI 1 "cmp_operand" "Nr")
+		(match_operand:SI 2 "cmp_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtudi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gtu:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr")
+		(match_operand:DI 2 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+; This pattern is for combine.
+(define_insn "*cmpne0sisi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "arith_reg_operand" "r") (const_int 0)))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%1,r63,%0"
+  [(set_attr "type" "cmp_media")])
+
+;; -------------------------------------------------------------------------
+;; Conditional move instructions
+;; -------------------------------------------------------------------------
+
+;; The insn names may seem reversed, but note that cmveq performs the move
+;; if op1 == 0, and cmvne does it if op1 != 0.
+
+(define_insn "movdicc_false"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(if_then_else:DI (eq (match_operand:DI 1 "arith_reg_operand" "r")
+			     (const_int 0))
+	 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:DI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmveq	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "movdicc_true"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(if_then_else:DI (ne (match_operand:DI 1 "arith_reg_operand" "r")
+			     (const_int 0))
+	 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:DI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmvne	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(if_then_else:DI (match_operator 3 "equality_comparison_operator"
+			   [(match_operand:DI 1 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_operand:DI 2 "arith_reg_dest" "")
+	 (match_dup 0)))
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:DI (match_dup 3) (match_dup 0) (match_dup 2)))]
+{
+  operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])),
+				VOIDmode, operands[1], CONST0_RTX (DImode));
+})
+
+(define_peephole2
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "arith_reg_or_0_operand" ""))
+   (set (match_operand:DI 2 "arith_reg_dest" "")
+	(if_then_else:DI (match_operator 4 "equality_comparison_operator"
+			   [(match_operand:DI 3 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 2)))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:DI (match_dup 4) (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (match_operand 1 "comparison_operator" "")
+			 (match_operand:DI 2 "register_operand" "")
+			 (match_operand:DI 3 "register_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == DImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    ;
+  else
+    {
+      if (!can_create_pseudo_p ())
+	FAIL;
+
+      operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]),
+					      GET_CODE (operands[1]),
+					      XEXP (operands[1], 0),
+					      XEXP (operands[1], 1));
+      if (!operands[1])
+	FAIL;
+    }
+})
+
+;; Add SImode variants for cmveq / cmvne to compensate for not promoting
+;; SImode to DImode.
+(define_insn "movsicc_false"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (eq (match_operand:SI 1 "arith_reg_operand" "r")
+			  (const_int 0))
+	 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:SI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmveq	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "movsicc_true"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (ne (match_operand:SI 1 "arith_reg_operand" "r")
+			  (const_int 0))
+	 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:SI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmvne	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (match_operator 3 "equality_comparison_operator"
+			   [(match_operand:SI 1 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_operand:SI 2 "arith_reg_dest" "")
+	 (match_dup 0)))
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 3) (match_dup 0) (match_dup 2)))]
+{
+  operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])),
+				VOIDmode, operands[1], CONST0_RTX (SImode));
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "arith_reg_or_0_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(if_then_else:SI (match_operator 4 "equality_comparison_operator"
+			   [(match_operand:SI 3 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 2)))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])
+   && (!REG_P (operands[1]) || GENERAL_REGISTER_P (REGNO (operands[1])))"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 4) (match_dup 1) (match_dup 2)))]
+{
+  replace_rtx (operands[4], operands[0], operands[1]);
+})
+
+(define_peephole2
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operand 1 "any_register_operand" ""))
+   (set (match_operand 2 "any_register_operand" "") (match_operand 3 "" ""))
+   (set (match_operand 4 "" "") (match_operand 5 "" ""))]
+  "(HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[2]))
+    <= HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[0])))
+   && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2])
+   && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[0])
+   && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[2])
+   && ! reg_overlap_mentioned_p (operands[0], operands[3])
+   && ! reg_overlap_mentioned_p (operands[2], operands[0])
+   && ! reg_overlap_mentioned_p (operands[0], operands[1])
+   && (REGNO_REG_CLASS (REGNO (operands[0]))
+       == REGNO_REG_CLASS (REGNO (operands[2])))
+   && (REGNO_REG_CLASS (REGNO (operands[1]))
+       == REGNO_REG_CLASS (REGNO (operands[0])))"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx set1, set2, insn2;
+  rtx replacements[4];
+
+  /* We want to replace occurrences of operands[0] with operands[1] and
+     operands[2] with operands[0] in operands[4]/operands[5].
+     Doing just two replace_rtx calls naively would result in the second
+     replacement undoing all that the first did if operands[1] and operands[2]
+     are identical, so we must do this simultaneously.  */
+  replacements[0] = operands[0];
+  replacements[1] = operands[1];
+  replacements[2] = operands[2];
+  replacements[3] = operands[0];
+  if (!replace_n_hard_rtx (operands[5], replacements, 2, 0)
+      || !replace_n_hard_rtx (operands[4], replacements, 2, 0)
+      || !replace_n_hard_rtx (operands[2], replacements, 2, 0))
+    FAIL;
+
+  operands[5] = replace_n_hard_rtx (operands[5], replacements, 2, 1);
+  replace_n_hard_rtx (operands[4], replacements, 2, 1);
+  operands[2] = replace_n_hard_rtx (operands[2], replacements, 2, 1);
+  /* The operands array is aliased to recog_data.operand, which gets
+     clobbered by extract_insn, so finish with it now.  */
+  set1 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
+  set2 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
+  /* ??? The last insn might be a jump insn, but the generic peephole2 code
+     always uses emit_insn.  */
+  /* Check that we don't violate matching constraints or earlyclobbers.  */
+  extract_insn (emit_insn (set1));
+  if (! constrain_operands (1))
+    goto failure;
+  insn2 = emit (set2);
+  if (GET_CODE (insn2) == BARRIER)
+    goto failure;
+  extract_insn (insn2);
+  if (! constrain_operands (1))
+    {
+      rtx tmp;
+    failure:
+      tmp = replacements[0];
+      replacements[0] = replacements[1];
+      replacements[1] = tmp;
+      tmp = replacements[2];
+      replacements[2] = replacements[3];
+      replacements[3] = tmp;
+      replace_n_hard_rtx (SET_DEST (set1), replacements, 2, 1);
+      replace_n_hard_rtx (SET_DEST (set2), replacements, 2, 1);
+      replace_n_hard_rtx (SET_SRC (set2), replacements, 2, 1);
+      FAIL;
+    }
+  DONE;
+})
+
+;; The register allocator is rather clumsy in handling multi-way conditional
+;; moves, so allow the combiner to make them, and we split them up after
+;; reload.  */
+(define_insn_and_split "*movsicc_umin"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=&r")
+	(umin:SI (if_then_else:SI
+		   (eq (match_operand:SI 1 "arith_reg_operand" "r")
+		       (const_int 0))
+		   (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+		   (match_operand:SI 3 "register_operand" "0"))
+		 (match_operand:SI 4 "arith_reg_or_0_operand" "r")))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "TARGET_SHMEDIA && !can_create_pseudo_p ()"
+  "#"
+  "TARGET_SHMEDIA && reload_completed"
+  [(pc)]
+{
+  emit_insn (gen_movsicc_false (operands[0], operands[1], operands[2],
+				operands[3]));
+  emit_insn (gen_cmpgtusi_media (operands[5], operands[4], operands[0]));
+  emit_insn (gen_movsicc_false (operands[0], operands[5], operands[4],
+				operands[0]));
+  DONE;
+})
+
+(define_insn "*movsicc_t_false"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "general_movsrc_operand" "r,I08")
+		      (match_operand:SI 2 "arith_reg_operand" "0,0")))]
+  "TARGET_PRETEND_CMOVE
+   && (arith_reg_operand (operands[1], SImode)
+       || (immediate_operand (operands[1], SImode)
+	   && satisfies_constraint_I08 (operands[1])))"
+{
+  return       "bt	0f"	"\n"
+	 "	mov	%1,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "mt_group,arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn "*movsicc_t_true"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "general_movsrc_operand" "r,I08")
+		      (match_operand:SI 2 "arith_reg_operand" "0,0")))]
+  "TARGET_PRETEND_CMOVE
+   && (arith_reg_operand (operands[1], SImode)
+       || (immediate_operand (operands[1], SImode)
+	   && satisfies_constraint_I08 (operands[1])))"
+{
+  return       "bf	0f"	"\n"
+	 "	mov	%1,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "mt_group,arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "")
+			 (match_operand:SI 3 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA || TARGET_PRETEND_CMOVE"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && (TARGET_SHMEDIA
+	  || (REG_P (XEXP (operands[1], 0))
+	      && REGNO (XEXP (operands[1], 0)) == T_REG))
+      && XEXP (operands[1], 1) == const0_rtx)
+    ;
+
+  else if (TARGET_PRETEND_CMOVE)
+    {
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx op0 = XEXP (operands[1], 0);
+      rtx op1 = XEXP (operands[1], 1);
+
+      if (! currently_expanding_to_rtl)
+	FAIL;
+      switch (code)
+	{
+	case LT: case LE: case LEU: case LTU:
+	  if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_INT)
+	    break;
+	case NE:
+	  new_code = reverse_condition (code);
+	  break;
+	case EQ: case GT: case GE: case GEU: case GTU:
+	  break;
+	default:
+	  FAIL;
+	}
+      sh_emit_scc_to_t (new_code, op0, op1);
+      operands[1] = gen_rtx_fmt_ee (new_code == code ? NE : EQ, VOIDmode,
+				    gen_rtx_REG (SImode, T_REG), const0_rtx);
+    }
+  else
+    {
+      if (!can_create_pseudo_p ())
+	FAIL;
+
+      operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]),
+					      GET_CODE (operands[1]),
+					      XEXP (operands[1], 0),
+					      XEXP (operands[1], 1));
+      if (!operands[1])
+	FAIL;
+    }
+})
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(if_then_else:QI (match_operand 1 "comparison_operator" "")
+			 (match_operand:QI 2 "register_operand" "")
+			 (match_operand:QI 3 "register_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], QImode, 0);
+  emit (gen_movsicc (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Addition instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "arith_reg_operand")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand")
+		 (match_operand:DI 2 "arith_operand")))]
+  ""
+{
+  if (TARGET_SH1)
+    {
+      operands[2] = force_reg (DImode, operands[2]);
+      emit_insn (gen_adddi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*adddi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add	%1, %2, %0
+	addi	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*adddisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r,r") 0)
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "adddi3z_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI
+	 (plus:SI (match_operand:SI 1 "extend_reg_operand" "r")
+		  (match_operand:SI 2 "extend_reg_or_0_operand" "rN"))))]
+  "TARGET_SHMEDIA"
+  "addz.l	%1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "adddi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand")
+		 (match_operand:DI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_clrt ());
+  emit_insn (gen_addc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1]),
+		       gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_addc (gen_highpart (SImode, operands[0]),
+		       gen_highpart (SImode, operands[1]),
+		       gen_highpart (SImode, operands[2])));
+  DONE;
+})
+
+(define_insn "addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+			  (match_operand:SI 2 "arith_reg_operand" "r"))
+		 (reg:SI T_REG)))
+   (set (reg:SI T_REG)
+	(ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of the addc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; We allow a reg or 0 for one of the operands in order to be able to
+;; do 'reg + T' sequences.  Reload will load the constant 0 into the reg
+;; as needed.
+(define_insn "*addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+			  (match_operand:SI 2 "arith_reg_or_0_operand" "r"))
+		 (match_operand:SI 3 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Split 'reg + reg + 1' into a sett addc sequence, as it can be scheduled
+;; better, if the sett insn can be done early.
+(define_insn_and_split "*addc_r_r_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" ""))
+		 (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Left shifts by one are usually done with an add insn to avoid T_REG
+;; clobbers.  Thus addc can also be used to do something like '(x << 1) + 1'.
+(define_insn_and_split "*addc_2r_1"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand")
+			  (const_int 2))
+		 (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 1))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Sometimes combine will try to do 'reg + (0-reg) + 1' if the *addc pattern
+;; matched.  Split this up into a simple sub add sequence, as this will save
+;; us one sett insn.
+(define_insn_and_split "*minus_plus_one"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			   (match_operand:SI 2 "arith_reg_operand" ""))
+		 (const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))])
+
+;; Split 'reg + T' into 'reg + 0 + T' to utilize the addc insn.
+;; If the 0 constant can be CSE-ed, this becomes a one instruction
+;; operation, as opposed to sequences such as
+;;	movt	r2
+;;	add	r2,r3
+;;
+;; Even if the constant is not CSE-ed, a sequence such as
+;;	mov	#0,r2
+;;	addc	r2,r3
+;; can be scheduled much better since the load of the constant can be
+;; done earlier, before any comparison insns that store the result in
+;; the T bit.
+(define_insn_and_split "*addc_r_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (match_operand:SI 1 "t_reg_operand" "")
+		 (match_operand:SI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (match_dup 2) (const_int 0))
+			    (match_dup 1)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Use shlr-addc to do 'reg + (reg & 1)'.
+(define_insn_and_split "*addc_r_lsb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			 (const_int 1))
+		 (match_operand:SI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Use shlr-addc to do 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_r_r_lsb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+				  (const_int 1))
+			  (match_operand:SI 2 "arith_reg_operand"))
+		 (match_operand:SI 3 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Canonicalize 'reg + (reg & 1) + reg' into 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_r_lsb_r"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			 (const_int 1))
+		 (plus:SI (match_operand:SI 2 "arith_reg_operand")
+			  (match_operand:SI 3 "arith_reg_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1))
+				     (match_dup 2))
+			    (match_dup 3)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Canonicalize '2 * reg + (reg & 1)' into 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_2r_lsb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			 (const_int 1))
+		 (mult:SI (match_operand:SI 2 "arith_reg_operand")
+			  (const_int 2))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1))
+				     (match_dup 2))
+			    (match_dup 2)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Use shll-addc to do 'reg + ((unsigned int)reg >> 31)'.
+(define_insn_and_split "*addc_r_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			      (const_int 31))
+		 (match_operand:SI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Use shll-addc to do 'reg + reg + ((unsigned int)reg >> 31)'.
+(define_insn_and_split "*addc_r_r_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+				       (const_int 31))
+		 	  (match_operand:SI 2 "arith_reg_operand"))
+		 (match_operand:SI 3 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Canonicalize '2 * reg + ((unsigned int)reg >> 31)'
+;; into 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_2r_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand")
+			  (const_int 2))
+		 (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			      (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (lshiftrt:SI (match_dup 2) (const_int 31))
+				     (match_dup 1))
+			    (match_dup 1)))
+	      (clobber (reg:SI T_REG))])])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "arith_operand" "")
+		 (match_operand:SI 2 "arith_operand" "")))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "addsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(plus:SI (match_operand:SI 1 "extend_reg_operand" "%r,r")
+		 (match_operand:SI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "addsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (plus:SI (match_operand:SI 1 "extend_reg_operand"
+				  "%r,r")
+				 (match_operand:SI 2 "arith_operand"
+				  "r,I10"))))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*addsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_operand:SI 1 "arith_operand" "%0")
+		 (match_operand:SI 2 "arith_operand" "rI08")))]
+  "TARGET_SH1"
+  "add	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Subtraction instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "")
+		  (match_operand:DI 2 "arith_reg_operand" "")))]
+  ""
+{
+  if (TARGET_SH1)
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_subdi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*subdi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub	%N1, %2, %0"
+  [(set_attr "type" "arith_media")])
+  
+(define_insn "subdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub.l	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "subdi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(minus:DI (match_operand:DI 1 "arith_reg_operand")
+		 (match_operand:DI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_clrt ());
+  emit_insn (gen_subc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1]),
+		       gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_subc (gen_highpart (SImode, operands[0]),
+		       gen_highpart (SImode, operands[1]),
+		       gen_highpart (SImode, operands[2])));
+  DONE;
+})
+
+(define_insn "subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_operand" "r"))
+		  (reg:SI T_REG)))
+   (set (reg:SI T_REG)
+	(gtu:SI (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+			  (reg:SI T_REG))
+		(match_dup 1)))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of the subc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; We allow a reg or 0 for one of the operands in order to be able to
+;; do 'reg - T' sequences.  Reload will load the constant 0 into the reg
+;; as needed.
+(define_insn "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_or_0_operand" "r"))
+		  (match_operand:SI 3 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Split reg - reg - 1 into a sett subc sequence, as it can be scheduled
+;; better, if the sett insn can be done early.
+;; Notice that combine turns 'a - b - 1' into 'a + (~b)'.
+(define_insn_and_split "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (not:SI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (match_operand:SI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0)
+		   (minus:SI (minus:SI (match_dup 2) (match_dup 1))
+			     (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Split 'reg - T' into 'reg - 0 - T' to utilize the subc insn.
+;; If the 0 constant can be CSE-ed, this becomes a one instruction
+;; operation, as opposed to sequences such as
+;;	movt	r2
+;;	sub	r2,r3
+;;
+;; Even if the constant is not CSE-ed, a sequence such as
+;;	mov	#0,r2
+;;	subc	r2,r3
+;; can be scheduled much better since the load of the constant can be
+;; done earlier, before any comparison insns that store the result in
+;; the T bit.
+(define_insn_and_split "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (minus:SI (match_dup 1) (const_int 0))
+			     (match_dup 2)))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn "*subsi3_internal"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		  (match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "sub	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*subsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (match_operand:SI 1 "minuend_operand" "rN")
+		  (match_operand:SI 2 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA
+   && (operands[1] != constm1_rtx
+       || (GET_CODE (operands[2]) != TRUNCATE
+	   && GET_CODE (operands[2]) != SUBREG))"
+  "sub.l	%N1, %2, %0"
+  "operands[1] == constm1_rtx"
+  [(set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))]
+  ""
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1
+						       "general_extend_operand"
+						       "") 0)) 0)))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1
+						       "general_extend_operand"
+						       "") 0)) 3)))]
+  "TARGET_SHMEDIA && TARGET_BIG_ENDIAN"
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))]
+  "")
+
+;; Convert
+;;	constant - reg
+;; to
+;;	neg reg
+;;	add reg, #const
+;; since this will sometimes save one instruction.
+;; Otherwise we might get a sequence like
+;;	mov #const, rY
+;;	sub rY, rX
+;;	mov rX, rY
+;; if the source and dest regs are the same.
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(minus:SI (match_operand:SI 1 "arith_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))]
+  ""
+{
+  if (TARGET_SH1 && CONST_INT_P (operands[1]))
+    {
+      emit_insn (gen_negsi2 (operands[0], operands[2]));
+      emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SHMEDIA)
+    {
+      if (!can_create_pseudo_p ()
+	  && ! arith_reg_or_0_operand (operands[1], SImode))
+	FAIL;
+      if (operands[1] != const0_rtx && GET_CODE (operands[1]) != SUBREG)
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+})
+
+;; -------------------------------------------------------------------------
+;; Division instructions
+;; -------------------------------------------------------------------------
+
+;; We take advantage of the library routines which don't clobber as many
+;; registers as a normal function call would.
+
+;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+;; also has an effect on the register that holds the address of the sfunc.
+;; To make this work, we have an extra dummy insn that shows the use
+;; of this register for reorg.
+
+(define_insn "use_sfunc_addr"
+  [(set (reg:SI PR_REG)
+	(unspec:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_SFUNC))]
+  "TARGET_SH1 && check_use_sfunc_addr (insn, operands[0])"
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "udivsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(udiv:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		(match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "divu	%2,%1"
+  [(set_attr "type" "arith")
+   (set_attr "in_delay_slot" "no")])
+
+;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
+;; hard register 0.  If we used hard register 0, then the next instruction
+;; would be a move from hard register 0 to a pseudo-reg.  If the pseudo-reg
+;; gets allocated to a stack slot that needs its address reloaded, then
+;; there is nothing to prevent reload from using r0 to reload the address.
+;; This reload would clobber the value in r0 we are trying to store.
+;; If we let reload allocate r0, then this problem can never happen.
+(define_insn "udivsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+; Since shmedia-nofpu code could be linked against shcompact code, and
+; the udivsi3 libcall has the same name, we must consider all registers
+; clobbered that are in the union of the registers clobbered by the
+; shmedia and the shcompact implementation.  Note, if the shcompact
+; implementation actually used shcompact code, we'd need to clobber
+; also r23 and fr23.
+(define_insn "udivsi3_i1_media"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R20_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI R22_REG))
+   (clobber (reg:DI TR0_REG))
+   (clobber (reg:DI TR1_REG))
+   (clobber (reg:DI TR2_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "udivsi3_i4_media"
+  [(set (match_dup 3)
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:DI (match_operand:SI 2 "register_operand" "")))
+   (set (match_dup 5) (float:DF (match_dup 3)))
+   (set (match_dup 6) (float:DF (match_dup 4)))
+   (set (match_dup 7) (div:DF (match_dup 5) (match_dup 6)))
+   (set (match_dup 8) (fix:DI (match_dup 7)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI (match_dup 8)))]
+  "TARGET_SHMEDIA_FPU"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DFmode);
+  operands[6] = gen_reg_rtx (DFmode);
+  operands[7] = gen_reg_rtx (DFmode);
+  operands[8] = gen_reg_rtx (DImode);
+})
+
+(define_insn "udivsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:DF DR4_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (use (reg:PSI FPSCR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "fp_mode" "double")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:DF DR4_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
+   && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_int"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+
+(define_expand "udivsi3"
+  [(set (match_dup 3) (symbol_ref:SI "__udivsi3"))
+   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (udiv:SI (reg:SI R4_REG)
+			    (reg:SI R5_REG)))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R4_REG))
+	      (use (match_dup 3))])]
+  ""
+{
+  rtx last;
+
+  operands[3] = gen_reg_rtx (Pmode);
+  /* Emit the move of the address to a pseudo outside of the libcall.  */
+  if (TARGET_DIVIDE_CALL_TABLE)
+    {
+      /* libgcc2:__udivmoddi4 is not supposed to use an actual division, since
+	 that causes problems when the divide code is supposed to come from a
+	 separate library.  Division by zero is undefined, so dividing 1 can be
+	 implemented by comparing with the divisor.  */
+      if (operands[1] == const1_rtx && currently_expanding_to_rtl)
+	{
+	  rtx test = gen_rtx_GEU (VOIDmode, operands[1], operands[2]);
+	  emit_insn (gen_cstoresi4 (operands[0], test,
+				    operands[1], operands[2]));
+	  DONE;
+	}
+      else if (operands[2] == const0_rtx)
+	{
+	  emit_move_insn (operands[0], operands[2]);
+	  DONE;
+	}
+      function_symbol (operands[3], "__udivsi3_i4i", SFUNC_GOT);
+      last = gen_udivsi3_i4_int (operands[0], operands[3]);
+    }
+  else if (TARGET_DIVIDE_CALL_FP)
+    {
+      function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+      if (TARGET_FPU_SINGLE)
+	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_udivsi3_i4 (operands[0], operands[3]);
+    }
+  else if (TARGET_SHMEDIA_FPU)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_udivsi3_i4_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH2A)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH5)
+    {
+      function_symbol (operands[3],
+		       TARGET_FPU_ANY ? "__udivsi3_i4" : "__udivsi3",
+		       SFUNC_STATIC);
+
+      if (TARGET_SHMEDIA)
+	last = gen_udivsi3_i1_media (operands[0], operands[3]);
+      else if (TARGET_FPU_ANY)
+	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_udivsi3_i1 (operands[0], operands[3]);
+    }
+  else
+    {
+      function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
+      last = gen_udivsi3_i1 (operands[0], operands[3]);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_insn (last);
+  DONE;
+})
+
+(define_insn "divsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(div:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		(match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "divs	%2,%1"
+  [(set_attr "type" "arith")
+   (set_attr "in_delay_slot" "no")])
+
+(define_insn "divsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R3_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i1_media"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R20_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")])
+
+(define_insn "divsi3_media_2"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (use (reg:SI R20_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")])
+
+;; This pattern acts as a placeholder for -mdiv=inv:call to carry
+;; hard reg clobbers and data dependencies that we need when we want
+;; to rematerialize the division into a call.
+(define_insn_and_split "divsi_inv_call"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (clobber (reg:SI R20_REG))
+   (use (match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& (reload_in_progress || reload_completed)"
+  [(set (match_dup 0) (match_dup 3))]
+  ""
+  [(set_attr "highpart" "must_split")])
+
+;; This is the combiner pattern for -mdiv=inv:call .
+(define_insn_and_split "*divsi_inv_call_combine"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (clobber (reg:SI R20_REG))
+   (use (unspec:SI [(match_dup 1)
+		    (match_operand:SI 3 "" "")
+		    (unspec:SI [(match_operand:SI 4 "" "")
+				(match_dup 3)
+				(match_operand:DI 5 "" "")]
+		     UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "" "")
+		    (const_int 0)
+		    (const_int 0)]
+	 UNSPEC_DIV_INV_M3))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& (reload_in_progress || reload_completed)"
+  [(pc)]
+{
+  const char *name = sh_divsi3_libfunc;
+  enum sh_function_kind kind = SFUNC_GOT;
+  rtx sym;
+
+  emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, R5_REG), operands[2]);
+  while (TARGET_DIVIDE_INV_CALL2)
+    {
+      rtx x = operands[3];
+
+      if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_DIV_INV_M1)
+	break;
+      x = XVECEXP (x, 0, 0);
+      name = "__sdivsi3_2";
+      kind = SFUNC_STATIC;
+      emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
+      break;
+    }
+  sym = function_symbol (NULL, name, kind);
+  emit_insn (gen_divsi3_media_2 (operands[0], sym));
+  DONE;
+}
+  [(set_attr "highpart" "must_split")])
+
+(define_expand "divsi3_i4_media"
+  [(set (match_dup 3) (float:DF (match_operand:SI 1 "register_operand" "r")))
+   (set (match_dup 4) (float:DF (match_operand:SI 2 "register_operand" "r")))
+   (set (match_dup 5) (div:DF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (match_dup 5)))]
+  "TARGET_SHMEDIA_FPU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (DFmode);
+  operands[5] = gen_reg_rtx (DFmode);
+})
+
+(define_insn "divsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (use (reg:PSI FPSCR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "fp_mode" "double")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:SI R2_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
+   && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_int"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "divsi3"
+  [(set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
+   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (reg:SI R4_REG)
+			   (reg:SI R5_REG)))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))
+	      (clobber (reg:SI R3_REG))
+	      (use (match_dup 3))])]
+  ""
+{
+  rtx last;
+
+  operands[3] = gen_reg_rtx (Pmode);
+  /* Emit the move of the address to a pseudo outside of the libcall.  */
+  if (TARGET_DIVIDE_CALL_TABLE)
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      last = gen_divsi3_i4_int (operands[0], operands[3]);
+    }
+  else if (TARGET_DIVIDE_CALL_FP)
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      if (TARGET_FPU_SINGLE)
+	last = gen_divsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_divsi3_i4 (operands[0], operands[3]);
+    }
+  else if (TARGET_SH2A)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_divsi3_sh2a (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_DIVIDE_INV)
+    {
+      rtx dividend = operands[1];
+      rtx divisor = operands[2];
+      rtx tab_base;
+      rtx nsb_res = gen_reg_rtx (DImode);
+      rtx norm64 = gen_reg_rtx (DImode);
+      rtx tab_ix = gen_reg_rtx (DImode);
+      rtx norm32 = gen_reg_rtx (SImode);
+      rtx i92 = force_reg (DImode, GEN_INT (92));
+      rtx scratch0a = gen_reg_rtx (DImode);
+      rtx scratch0b = gen_reg_rtx (DImode);
+      rtx inv0 = gen_reg_rtx (SImode);
+      rtx scratch1a = gen_reg_rtx (DImode);
+      rtx scratch1b = gen_reg_rtx (DImode);
+      rtx shift = gen_reg_rtx (DImode);
+      rtx i2p27, i43;
+      rtx inv1 = gen_reg_rtx (SImode);
+      rtx scratch2a = gen_reg_rtx (DImode);
+      rtx scratch2b = gen_reg_rtx (SImode);
+      rtx inv2 = gen_reg_rtx (SImode);
+      rtx scratch3a = gen_reg_rtx (DImode);
+      rtx scratch3b = gen_reg_rtx (DImode);
+      rtx scratch3c = gen_reg_rtx (DImode);
+      rtx scratch3d = gen_reg_rtx (SImode);
+      rtx scratch3e = gen_reg_rtx (DImode);
+      rtx result = gen_reg_rtx (SImode);
+
+      if (! arith_reg_or_0_operand (dividend, SImode))
+	dividend = force_reg (SImode, dividend);
+      if (! arith_reg_operand (divisor, SImode))
+	divisor = force_reg (SImode, divisor);
+      if (flag_pic && Pmode != DImode)
+	{
+	  tab_base = gen_rtx_SYMBOL_REF (Pmode, "__div_table");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  tab_base = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, tab_base));
+	}
+      else
+	{
+	  tab_base = gen_rtx_SYMBOL_REF (DImode, "__div_table");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  tab_base = force_reg (DImode, tab_base);
+	}
+      if (TARGET_DIVIDE_INV20U)
+	i2p27 = force_reg (DImode, GEN_INT (-2 << 27));
+      else
+	i2p27 = GEN_INT (0);
+      if (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)
+	i43 = force_reg (DImode, GEN_INT (43));
+      else
+	i43 = GEN_INT (0);
+      emit_insn (gen_nsbdi (nsb_res,
+			    simplify_gen_subreg (DImode, divisor, SImode, 0)));
+      emit_insn (gen_ashldi3_media (norm64,
+				    gen_rtx_SUBREG (DImode, divisor, 0),
+				    nsb_res));
+      emit_insn (gen_ashrdi3_media (tab_ix, norm64, GEN_INT (58)));
+      emit_insn (gen_ashrdisi3_media_high (norm32, norm64, GEN_INT (32)));
+      emit_insn (gen_divsi_inv_m1 (inv1, tab_base, tab_ix, norm32,
+				   inv0, scratch0a, scratch0b,
+				   scratch1a, scratch1b));
+      emit_insn (gen_subdi3 (shift, i92, nsb_res));
+      emit_insn (gen_divsi_inv_m2 (inv2, norm32, inv1, i92,
+				   scratch2a));
+      emit_insn (gen_divsi_inv_m3 (result, dividend, inv1, inv2, shift,
+				   i2p27, i43,
+				   scratch3a, scratch3b, scratch3c,
+				   scratch2a, scratch2b, scratch3d, scratch3e));
+      if (TARGET_DIVIDE_INV_CALL || TARGET_DIVIDE_INV_CALL2)
+	emit_insn (gen_divsi_inv_call (operands[0], dividend, divisor, result));
+      else if (TARGET_DIVIDE_INV_FP)
+	emit_insn (gen_divsi_inv_fp (operands[0], dividend, divisor, result,
+				     gen_reg_rtx (SImode), gen_reg_rtx (SImode),
+				     gen_reg_rtx (DFmode), gen_reg_rtx (DFmode),
+				     gen_reg_rtx (DFmode)));
+      else
+	emit_move_insn (operands[0], result);
+      DONE;
+    }
+  else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH5)
+    {
+      if (TARGET_DIVIDE_CALL2)
+	{
+	  rtx tab_base = gen_rtx_SYMBOL_REF (Pmode, "__div_table");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  emit_move_insn (gen_rtx_REG (Pmode, R20_REG), tab_base);
+	}
+      if (TARGET_FPU_ANY && TARGET_SH1)
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      else if (TARGET_DIVIDE_CALL2)
+	function_symbol (operands[3], "__sdivsi3_2", SFUNC_STATIC);
+      else
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+
+      if (TARGET_SHMEDIA)
+	last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
+		(operands[0], operands[3]));
+      else if (TARGET_FPU_ANY)
+	last = gen_divsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_divsi3_i1 (operands[0], operands[3]);
+    }
+  else
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      last = gen_divsi3_i1 (operands[0], operands[3]);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_insn (last);
+  DONE;
+})
+
+;; operands: scratch, tab_base, tab_ix
+;; These are unspecs because we could generate an indexed addressing mode
+;; even if -m5-32media, where INDEX_REG_CLASS == NO_REGS, and this would
+;; confuse reload.  See PR27117.
+(define_insn "divsi_inv_qitable"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (unspec:QI [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "register_operand" "r")]
+			 UNSPEC_DIV_INV_TABLE)))]
+  "TARGET_SHMEDIA"
+  "ldx.ub	%1, %2, %0"
+  [(set_attr "type" "load_media")
+   (set_attr "highpart" "user")])
+
+;; operands: scratch, tab_base, tab_ix
+(define_insn "divsi_inv_hitable"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (unspec:HI [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "register_operand" "r")]
+			 UNSPEC_DIV_INV_TABLE)))]
+  "TARGET_SHMEDIA"
+  "ldx.w	%1, %2, %0"
+  [(set_attr "type" "load_media")
+   (set_attr "highpart" "user")])
+
+;; operands: inv0, tab_base, tab_ix, norm32
+;; scratch equiv in sdivsi3_2: r19, r21
+(define_expand "divsi_inv_m0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M0))
+   (clobber (match_operand:DI 4 "register_operand" "=r"))
+   (clobber (match_operand:DI 5 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+{
+/*
+tab_base: r20
+tab_ix: r21
+norm32: r25
+ ldx.ub r20, r21, r19 // u0.8
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+*/
+
+  rtx inv0 = operands[0];
+  rtx tab_base = operands[1];
+  rtx tab_ix = operands[2];
+  rtx norm32 = operands[3];
+  rtx scratch0 = operands[4];
+  rtx scratch0_si = gen_lowpart (SImode, scratch0);
+  rtx scratch1 = operands[5];
+
+  emit_insn (gen_divsi_inv_qitable (scratch0, tab_base, tab_ix));
+  emit_insn (gen_ashldi3_media (scratch1, tab_ix, GEN_INT (1)));
+  emit_insn (gen_mulsidi3_media (scratch0, norm32, scratch0_si));
+  emit_insn (gen_divsi_inv_hitable (scratch1, tab_base, scratch1));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (24)));
+  emit_insn (gen_subdisi3_media (inv0, scratch1, scratch0));
+  DONE;
+})
+
+;; operands: inv1, tab_base, tab_ix, norm32
+(define_insn_and_split "divsi_inv_m1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M1))
+   (clobber (match_operand:SI 4 "register_operand" "=r"))
+   (clobber (match_operand:DI 5 "register_operand" "=r"))
+   (clobber (match_operand:DI 6 "register_operand" "=r"))
+   (clobber (match_operand:DI 7 "register_operand" "=r"))
+   (clobber (match_operand:DI 8 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+/* inv0: r19
+ muls.l r19, r19, r18 // u0.28
+ muls.l r25, r18, r18 // s2.58
+ shlli r19, 45, r0    // multiply by two and convert to s2.58
+ sub r0, r18, r18
+ shari r18, 28, r18   // some 18 bit inverse in s1.30
+*/
+
+  rtx inv1 = operands[0];
+  rtx tab_base = operands[1];
+  rtx tab_ix = operands[2];
+  rtx norm32 = operands[3];
+  rtx inv0 = operands[4];
+  rtx inv0_di = simplify_gen_subreg (DImode, inv0, SImode, 0);
+  rtx scratch0a = operands[5];
+  rtx scratch0b = operands[6];
+  rtx scratch0 = operands[7];
+  rtx scratch1 = operands[8];
+  rtx scratch1_si = gen_lowpart (SImode, scratch1);
+
+  emit_insn (gen_divsi_inv_m0 (inv0, tab_base, tab_ix, norm32,
+			       scratch0a, scratch0b));
+  emit_insn (gen_mulsidi3_media (scratch1, inv0, inv0));
+  emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si));
+  emit_insn (gen_ashldi3_media (scratch0, inv0_di, GEN_INT (45)));
+  emit_insn (gen_subdi3 (scratch1, scratch0, scratch1));
+  emit_insn (gen_ashrdisi3_media_opaque (inv1, scratch1, GEN_INT (28)));
+  DONE;
+})
+
+;; operands: inv2, norm32, inv1, i92
+(define_insn_and_split "divsi_inv_m2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:DI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M2))
+   (clobber (match_operand:DI 4 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+/*
+ muls.l r18, r25, r0  // s2.60
+ shari r0, 16, r0     // s-16.44
+  sub
+ muls.l r0, r18, r19  // s-16.74
+ shari r19, 30, r19   // s-16.44
+*/
+  rtx inv2 = operands[0];
+  rtx norm32 = operands[1];
+  rtx inv1 = operands[2];
+  rtx i92 = operands[3];
+  rtx scratch0 = operands[4];
+  rtx scratch0_si = gen_lowpart (SImode, scratch0);
+
+  emit_insn (gen_mulsidi3_media (scratch0, inv1, norm32));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (16)));
+  emit_insn (gen_subdi3 (scratch0, i92, scratch0));
+  emit_insn (gen_mulsidi3_media (scratch0, scratch0_si, inv1));
+  emit_insn (gen_ashrdisi3_media_opaque (inv2, scratch0, GEN_INT (30)));
+  DONE;
+})
+
+(define_insn_and_split "divsi_inv_m3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")
+		    (match_operand:DI 4 "register_operand" "r")
+		    (match_operand:DI 5 "arith_reg_or_0_operand" "rN")
+		    (match_operand:DI 6 "arith_reg_or_0_operand" "rN")]
+	 UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:DI 7 "register_operand" "=r"))
+   (clobber (match_operand:DI 8 "register_operand" "=r"))
+   (clobber (match_operand:DI 9 "register_operand" "=r"))
+   (clobber (match_operand:DI 10 "register_operand" "=r"))
+   (clobber (match_operand:SI 11 "register_operand" "=r"))
+   (clobber (match_operand:SI 12 "register_operand" "=r"))
+   (clobber (match_operand:DI 13 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+/*
+  r0: result  r1: shift  r4: dividend  r18: inv1  r19: inv2
+  r0: scratch0  r19: scratch1 r21: scratch2
+
+  muls.l r18, r4, r25 // s32.30
+ muls.l r19, r4, r19  // s15.30
+ shari r25, 63, r21
+  shari r19, 14, r19  // s18.-14
+ sub r25, r19, r0
+ shard r0, r1, r0
+ sub r0, r21, r0
+*/
+
+  rtx result = operands[0];
+  rtx dividend = operands[1];
+  rtx inv1 = operands[2];
+  rtx inv2 = operands[3];
+  rtx shift = operands[4];
+  rtx scratch0 = operands[7];
+  rtx scratch1 = operands[8];
+  rtx scratch2 = operands[9];
+
+  if (satisfies_constraint_N (dividend))
+    {
+      emit_move_insn (result, dividend);
+      DONE;
+    }
+
+  emit_insn (gen_mulsidi3_media (scratch0, inv1, dividend));
+  emit_insn (gen_mulsidi3_media (scratch1, inv2, dividend));
+  emit_insn (gen_ashrdi3_media (scratch2, scratch0, GEN_INT (63)));
+  emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (14)));
+  emit_insn (gen_adddi3 (scratch0, scratch0, scratch1));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, shift));
+  emit_insn (gen_subdisi3_media (result, scratch0, scratch2));
+  DONE;
+})
+
+;; operands: quotient, dividend, inv1, inv2, shift, i2p27, i43
+;; inv1: tab_base, tab_ix, norm32
+;; inv2: norm32, inv1, i92
+(define_insn_and_split "divsi_inv_m1_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+		    (unspec:SI [(match_operand:DI 2 "register_operand" "r")
+				(match_operand:DI 3 "register_operand" "r")
+				(match_operand:SI 4 "register_operand" "r")]
+		     UNSPEC_DIV_INV_M1)
+		    (unspec:SI [(match_dup 4)
+				(unspec:SI [(match_dup 2)
+					    (match_dup 3)
+					    (match_dup 4)] UNSPEC_DIV_INV_M1)
+				(match_operand:SI 5 "" "")]
+		     UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "register_operand" "r")
+		    (match_operand:DI 7 "arith_reg_or_0_operand" "rN")
+		    (match_operand:DI 8 "arith_reg_or_0_operand" "rN")]
+	 UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:DI 9 "register_operand" "=r"))
+   (clobber (match_operand:DI 10 "register_operand" "=r"))
+   (clobber (match_operand:DI 11 "register_operand" "=r"))
+   (clobber (match_operand:DI 12 "register_operand" "=r"))
+   (clobber (match_operand:SI 13 "register_operand" "=r"))
+   (clobber (match_operand:SI 14 "register_operand" "=r"))
+   (clobber (match_operand:DI 15 "register_operand" "=r"))]
+  "TARGET_SHMEDIA
+   && (TARGET_DIVIDE_INV_MINLAT
+       || TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+  rtx result = operands[0];
+  rtx dividend = operands[1];
+  rtx tab_base = operands[2];
+  rtx tab_ix = operands[3];
+  rtx norm32 = operands[4];
+  /* rtx i92 = operands[5]; */
+  rtx shift = operands[6];
+  rtx i2p27 = operands[7];
+  rtx i43 = operands[8];
+  rtx scratch0 = operands[9];
+  rtx scratch0_si = gen_lowpart (SImode, scratch0);
+  rtx scratch1 = operands[10];
+  rtx scratch1_si = gen_lowpart (SImode, scratch1);
+  rtx scratch2 = operands[11];
+  rtx scratch3 = operands[12];
+  rtx scratch4 = operands[13];
+  rtx scratch4_di = simplify_gen_subreg (DImode, scratch4, SImode, 0);
+  rtx scratch5 = operands[14];
+  rtx scratch5_di = simplify_gen_subreg (DImode, scratch5, SImode, 0);
+  rtx scratch6 = operands[15];
+
+  emit_insn (gen_divsi_inv_m0 (scratch4, tab_base, tab_ix, norm32,
+			       scratch0, scratch1));
+  /* inv0 == scratch4 */
+  if (! TARGET_DIVIDE_INV20U)
+    {
+      emit_insn (gen_mulsidi3_media (scratch0, scratch4, scratch4));
+      i2p27 = scratch0;
+      emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch0_si));
+    }
+  else
+    {
+      emit_insn (gen_mulsidi3_media (scratch1, scratch4, scratch4));
+      emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si));
+    }
+  emit_insn (gen_ashldi3_media (scratch2, scratch4_di, GEN_INT (45)));
+  emit_insn (gen_subdi3 (scratch1, scratch2, scratch1));
+  emit_insn (gen_ashrdisi3_media_opaque (scratch4, scratch1, GEN_INT (28)));
+  /* inv1 == scratch4 */
+
+  if (TARGET_DIVIDE_INV_MINLAT)
+    {
+      emit_insn (gen_mulsidi3_media (scratch1, scratch4, norm32));
+      emit_insn (gen_mulsidi3_media (scratch2, dividend, scratch4));
+      emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (16)));
+      emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch4));
+      emit_insn (gen_ashrdi3_media (scratch3, scratch2, GEN_INT (63)));
+      emit_insn (gen_ashrsi3_media (scratch5, dividend, GEN_INT (14)));
+      emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (30)));
+      emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch5));
+      emit_insn (gen_xordi3 (scratch0, scratch3, i2p27));
+      emit_insn (gen_adddi3 (scratch2, scratch2, scratch0));
+      emit_insn (gen_subdi3 (scratch2, scratch2, scratch1));
+    }
+  else
+    {
+      rtx label = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
+      /* Use separate scratch regs for nsb and sign to allow scheduling.  */
+      emit_insn (gen_nsbdi (scratch6,
+			    simplify_gen_subreg (DImode, dividend, SImode, 0)));
+      emit_insn (gen_xorsi3 (scratch5, dividend, norm32));
+      emit_insn (gen_ashrdi3_media (scratch3, scratch5_di, GEN_INT (63)));
+      emit_insn (gen_divsi_inv20 (scratch2,
+				  norm32, scratch4, dividend,
+				  scratch6, scratch3, i43,
+				  /* scratch0 may be shared with i2p27.  */
+				  scratch0, scratch1, scratch5,
+				  label, label, i2p27));
+    }
+  emit_insn (gen_ashrdi3_media (scratch2, scratch2, shift));
+  emit_insn (gen_subdisi3_media (result, scratch2, scratch3));
+  DONE;
+})
+
+(define_insn "divsi_inv20"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")
+		    (match_operand:DI 4 "register_operand" "r")
+		    (match_operand:DI 5 "register_operand" "r")
+		    (match_operand:DI 6 "register_operand" "r")
+		    (match_operand:DI 12 "register_operand" "r")
+		    (match_operand 10 "target_operand" "b")
+		    (match_operand 11 "immediate_operand" "i")]
+	 UNSPEC_DIV_INV20))
+   (clobber (match_operand:DI 7 "register_operand" "=&r"))
+   (clobber (match_operand:DI 8 "register_operand" "=&r"))
+   (clobber (match_operand:SI 9 "register_operand" "=r"))]
+  "TARGET_SHMEDIA
+   && (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)"
+{
+/* operands: %0 div_result, %1 norm32, %2 inv1, %3 dividend,
+	     %4 dividend_nsb, %5 result_sign, %6 i43, %12 i2p27,
+	     %7 round_scratch, %8 scratch0 (di), %9 scratch1 (si)
+	     %10 label (tr), %11 label (imm)
+
+ muls.l inv1, norm32, scratch0  // s2.60
+  muls.l inv1, dividend, result // s32.30
+  xor i2p27, result_sign, round_scratch
+ bge/u dividend_nsb, i43, tr.. (label)
+ shari scratch0, 16, scratch0   // s-16.44
+ muls.l sratch0_si, inv1, scratch0 // s-16.74
+  sub result, round_scratch, result
+  shari dividend, 14, scratch1   // s19.-14
+ shari scratch0, 30, scratch0   // s-16.44
+ muls.l scratch0, scratch1, round_scratch // s15.30
+label:
+ sub result, round_scratch, result */
+
+  const bool likely = TARGET_DIVIDE_INV20L;
+  if (likely)
+    return
+	       "muls.l	%2, %3, %0"	"\n"
+	"	xor	%12, %5, %7"	"\n"
+	"	bge/l	%4, %6, %10"	"\n"
+	"	muls.l	%2, %1, %8"	"\n"
+	"	shari	%8, 16, %8"	"\n"
+	"	muls.l	%8, %2, %8"	"\n"
+	"	shari	%3, 14, %9"	"\n"
+	"	shari	%8, 30, %8"	"\n"
+	"	muls.l	%8, %9, %8"	"\n"
+	"	sub	%0, %8, %0"	"\n"
+	"%11:	add	%0, %7, %0";
+  else
+    return
+	       "muls.l	%2, %1, %8"	"\n"
+	"	muls.l	%2, %3, %0"	"\n"
+	"	xor	%12, %5, %7"	"\n"
+	"	bge/u	%4, %6, %10"	"\n"
+	"	shari	%8, 16, %8"	"\n"
+	"	muls.l	%8, %2, %8"	"\n"
+	"	sub	%0, %7, %0"	"\n"
+	"	shari	%3, 14, %9"	"\n"
+	"	shari	%8, 30, %8"	"\n"
+	"	muls.l	%8, %9, %7"	"\n"
+	"%11:	sub	%0, %7, %0";
+})
+
+(define_insn_and_split "divsi_inv_fp"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=rf")
+	(div:SI (match_operand:SI 1 "general_movsrc_operand" "rf")
+		(match_operand:SI 2 "register_operand" "rf")))
+   (use (match_operand:SI 3 "general_movsrc_operand" "r"))
+   (clobber (match_operand:SI 4 "register_operand" "=r"))
+   (clobber (match_operand:SI 5 "register_operand" "=r"))
+   (clobber (match_operand:DF 6 "register_operand" "=r"))
+   (clobber (match_operand:DF 7 "register_operand" "=r"))
+   (clobber (match_operand:DF 8 "register_operand" "=r"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& (reload_in_progress || reload_completed)"
+  [(set (match_dup 0) (match_dup 3))]
+  ""
+  [(set_attr "highpart" "must_split")])
+
+;; If a matching group of divide-by-inverse instructions is in the same
+;; basic block after gcse & loop optimizations, we want to transform them
+;; to a straight division using floating point for TARGET_DIVIDE_INV_FP.
+(define_insn_and_split "*divsi_inv_fp_combine"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(div:SI (match_operand:SI 1 "register_operand" "f")
+		(match_operand:SI 2 "register_operand" "f")))
+   (use (unspec:SI [(match_dup 1)
+		    (match_operand:SI 3 "" "")
+		    (unspec:SI [(match_operand:SI 4 "" "")
+				(match_dup 3)
+				(match_operand:DI 5 "" "")] UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "" "")
+		    (const_int 0)
+		    (const_int 0)] UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:SI 7 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:SI 8 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 9 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 10 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))]
+  "TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV_FP && !can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 9) (float:DF (match_dup 1)))
+   (set (match_dup 10) (float:DF (match_dup 2)))
+   (set (match_dup 11) (div:DF (match_dup 9) (match_dup 10)))
+   (set (match_dup 8)
+	(fix:SI (match_dup 11)))
+   (set (match_dup 0) (match_dup 8))]
+{
+  if (! fp_arith_reg_operand (operands[1], SImode))
+    {
+      emit_move_insn (operands[7], operands[1]);
+      operands[1] = operands[7];
+    }
+  if (! fp_arith_reg_operand (operands[2], SImode))
+    {
+      emit_move_insn (operands[8], operands[2]);
+      operands[2] = operands[8];
+    }
+}
+  [(set_attr "highpart" "must_split")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "umulhisi3_i"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "mulu.w	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_insn "mulhisi3_i"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "muls.w	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_expand "mulhisi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+{
+  rtx insn, macl;
+
+  macl = gen_rtx_REG (SImode, MACL_REG);
+  start_sequence ();
+  emit_insn (gen_mulhisi3_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in umul_widen_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also smulsi3_highpart.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_expr.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+(define_expand "umulhisi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+{
+  rtx insn, macl;
+
+  macl = gen_rtx_REG (SImode, MACL_REG);
+  start_sequence ();
+  emit_insn (gen_umulhisi3_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in umul_widen_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also smulsi3_highpart.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_expr.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate
+;; a call to a routine which clobbers known registers.
+(define_insn ""
+  [(set (match_operand:SI 1 "register_operand" "=z")
+	(mult:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI MACL_REG))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R3_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "mulsi3_call"
+  [(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel[(set (match_operand:SI 0 "register_operand" "")
+		  (mult:SI (reg:SI R4_REG)
+			   (reg:SI R5_REG)))
+	     (clobber (reg:SI MACL_REG))
+	     (clobber (reg:SI T_REG))
+	     (clobber (reg:SI PR_REG))
+	     (clobber (reg:SI R3_REG))
+	     (clobber (reg:SI R2_REG))
+	     (clobber (reg:SI R1_REG))
+	     (use (match_operand:SI 3 "register_operand" ""))])]
+  "TARGET_SH1"
+  "")
+
+(define_insn "mul_r"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(mult:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		 (match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "mulr	%2,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_insn "mul_l"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		 (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "mul.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI  (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+{
+  if (!TARGET_SH2)
+    {
+      /* The address must be set outside the libcall,
+	 since it goes into a pseudo.  */
+      rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+      rtx addr = force_reg (SImode, sym);
+      rtx insns = gen_mulsi3_call (operands[0], operands[1],
+				   operands[2], addr);
+      emit_insn (insns);
+    }
+  else
+    {
+      rtx macl = gen_rtx_REG (SImode, MACL_REG);
+
+      emit_insn (gen_mul_l (operands[1], operands[2]));
+      /* consec_sets_giv can only recognize the first insn that sets a
+	 giv as the giv insn.  So we must tag this also with a REG_EQUAL
+	 note.  */
+      emit_insn (gen_movsi_i ((operands[0]), macl));
+    }
+  DONE;
+})
+
+(define_insn "mulsidi3_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (set (reg:SI MACL_REG)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+{
+  if (TARGET_SH2)
+    {
+      emit_insn (gen_mulsidi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "mulsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r"))
+		 (sign_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "muls.l	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "mulsidi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+	 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_mulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+})
+
+(define_insn "umulsidi3_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (set (reg:SI MACL_REG)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+{
+  if (TARGET_SH2)
+    {
+      emit_insn (gen_umulsidi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "umulsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r"))
+		 (zero_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "mulu.l	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "umulsidi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+	 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_umulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+})
+
+(define_insn "smulsi3_highpart_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (reg:SI MACH_REG)
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	     (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+	    (const_int 32))))
+    (clobber (reg:SI MACL_REG))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACH_REG))]
+  "TARGET_SH2"
+{
+  rtx insn, mach;
+
+  mach = gen_rtx_REG (SImode, MACH_REG);
+  start_sequence ();
+  emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in mul_highpart_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also {,u}mulhisi.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_mult_highpart.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+(define_insn "umulsi3_highpart_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (reg:SI MACH_REG)
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	     (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+	    (const_int 32))))
+    (clobber (reg:SI MACL_REG))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACH_REG))]
+  "TARGET_SH2"
+{
+  rtx insn, mach;
+
+  mach = gen_rtx_REG (SImode, MACH_REG);
+  start_sequence ();
+  emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+(define_insn_and_split "muldi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (match_scratch:DI 4 "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op3_v2si, op2_v2si;
+
+  op3_v2si = operands[3];
+  if (GET_CODE (op3_v2si) == SIGN_EXTEND)
+    {
+      op3_v2si = XEXP (op3_v2si, 0);
+      op3_v2si = simplify_gen_subreg (DImode, op3_v2si, GET_MODE (op3_v2si), 0);
+    }
+  op3_v2si = simplify_gen_subreg (V2SImode, op3_v2si, DImode, 0);
+  op2_v2si = operands[2];
+  if (GET_CODE (op2_v2si) == SIGN_EXTEND)
+    {
+      op2_v2si = XEXP (op2_v2si, 0);
+      op2_v2si = simplify_gen_subreg (DImode, op2_v2si, GET_MODE (op2_v2si), 0);
+    }
+  op2_v2si = simplify_gen_subreg (V2SImode, op2_v2si, DImode, 0);
+  emit_insn (gen_rotldi3 (operands[3], operands[1], GEN_INT (32)));
+  emit_insn (gen_mulv2si3 (op3_v2si, op3_v2si, op2_v2si));
+  emit_insn (gen_umulsidi3_media (operands[4],
+				 sh_gen_truncate (SImode, operands[1], 0),
+				 sh_gen_truncate (SImode, operands[2], 0)));
+  emit_insn (gen_anddi3 (operands[0], operands[3], GEN_INT (0xffffffff00000000LL)));
+  emit_insn (gen_ashldi3_media (operands[3], operands[3], GEN_INT (32)));
+  emit_insn (gen_adddi3 (operands[0], operands[3], operands[0]));
+  emit_insn (gen_adddi3 (operands[0], operands[4], operands[0]));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(and:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "logical_and_operand" "")))]
+  ""
+{
+  /* If it is possible to turn the and insn into a zero extension
+     already, redundant zero extensions will be folded, which results
+     in better code.  
+     Ideally the splitter of *andsi_compact would be enough, if redundant
+     zero extensions were detected after the combine pass, which does not
+     happen at the moment.  */
+  if (TARGET_SH1)
+    {
+      if (satisfies_constraint_Jmb (operands[2]))
+	{
+	  emit_insn (gen_zero_extendqisi2 (operands[0],
+					   gen_lowpart (QImode, operands[1])));
+	  DONE;
+	}
+      else if (satisfies_constraint_Jmw (operands[2]))
+	{
+	  emit_insn (gen_zero_extendhisi2 (operands[0],
+					   gen_lowpart (HImode, operands[1])));
+	  DONE;
+	}
+    }
+})
+
+(define_insn_and_split "*andsi_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r,z,r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,0,0")
+		(match_operand:SI 2 "logical_and_operand" "Jmb,Jmw,K08,r")))]
+  "TARGET_SH1"
+  "@
+	extu.b	%1,%0
+	extu.w	%1,%0
+	and	%2,%0
+	and	%2,%0"
+  "&& 1"
+ [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
+{
+  if (satisfies_constraint_Jmb (operands[2]))
+    operands[1] = gen_lowpart (QImode, operands[1]);
+  else if (satisfies_constraint_Jmw (operands[2]))
+    operands[1] = gen_lowpart (HImode, operands[1]);
+  else
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+(define_insn "*andsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(and:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	and	%1, %2, %0
+	andi	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*andsi3_bclr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+		(match_operand:SI 2 "const_int_operand" "Psz")))]
+  "TARGET_SH2A && satisfies_constraint_Psz (operands[2])"
+  "bclr	%W2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "anddi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r,r")
+	(and:DI (match_operand:DI 1 "arith_reg_operand" "%r,r,r")
+		(match_operand:DI 2 "and_operand" "r,I10,J16")))]
+  "TARGET_SHMEDIA"
+  "@
+	and	%1, %2, %0
+	andi	%1, %2, %0
+	#"
+  "reload_completed
+   && ! logical_operand (operands[2], DImode)"
+  [(const_int 0)]
+{
+  if ((unsigned)INTVAL (operands[2]) == (unsigned) 0xffffffff)
+    emit_insn (gen_mshflo_l_di (operands[0], operands[1], CONST0_RTX (DImode)));
+  else
+    emit_insn (gen_mshfhi_l_di (operands[0], CONST0_RTX (DImode), operands[1]));
+  DONE;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "andcsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		(not:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "andc	%1,%2,%0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "andcdi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(and:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		(not:DI (match_operand:DI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "andc	%1,%2,%0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "logical_operand" "")))]
+  ""
+  "")
+
+(define_insn "*iorsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,z")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "r,K08")))]
+  "TARGET_SH1
+   && !(TARGET_SH2A && satisfies_constraint_Pso (operands[2]))"
+  "or	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*iorsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ior:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	or	%1, %2, %0
+	ori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*iorsi3_bset"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+	(match_operand:SI 2 "const_int_operand" "Pso")))]
+  "TARGET_SH2A && satisfies_constraint_Pso (operands[2])"
+  "bset	%V2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(ior:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		(match_operand:DI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	or	%1, %2, %0
+	ori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn_and_split "*logical_sidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")])))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  operands[3]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+		      simplify_gen_subreg (DImode, operands[1], SImode, 0),
+		      simplify_gen_subreg (DImode, operands[2], SImode, 0));
+})
+
+(define_insn_and_split "*logical_sidisi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(truncate:SI (sign_extend:DI
+			(match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")]))))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 3))])
+
+(define_insn_and_split "*logical_sidi3_2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (truncate:SI (sign_extend:DI
+			(match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")])))))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (sign_extend:DI (match_dup 3)))])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(xor:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "xor_operand" "")))]
+  ""
+  "")
+
+(define_insn "*xorsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+	(xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "K08,r")))]
+  "TARGET_SH1"
+  "xor	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; The *logical_op_t pattern helps combine eliminating sign/zero extensions
+;; of results where one of the inputs is a T bit store.  Notice that this
+;; pattern must not match during reload.  If reload picks this pattern it
+;; will be impossible to split it afterwards.
+(define_insn_and_split "*logical_op_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operator:SI 3 "logical_operator"
+	  [(match_operand:SI 1 "arith_reg_operand")
+	   (match_operand:SI 2 "t_reg_operand")]))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 4) (reg:SI T_REG))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[4] = gen_reg_rtx (SImode);
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[1], operands[4]);
+})
+
+(define_insn "*xorsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "xor_operand" "r,I06")))]
+  "TARGET_SHMEDIA"
+  "@
+	xor	%1, %2, %0
+	xori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(xor:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		(match_operand:DI 2 "xor_operand" "r,I06")))]
+  "TARGET_SHMEDIA"
+  "@
+	xor	%1, %2, %0
+	xori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+;; Combiner bridge pattern for 2 * sign extend -> logical op -> truncate.
+;; converts 2 * sign extend -> logical op into logical op -> sign extend
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(sign_extend:DI (match_operator 4 "binary_logical_operator"
+			  [(match_operand 1 "any_register_operand" "")
+			   (match_operand 2 "any_register_operand" "")])))]
+  "TARGET_SHMEDIA"
+  [(set (match_dup 5) (match_dup 4))
+   (set (match_dup 0) (sign_extend:DI (match_dup 5)))]
+{
+  enum machine_mode inmode = GET_MODE (operands[1]);
+  int offset = 0;
+
+  if (GET_CODE (operands[0]) == SUBREG)
+    {
+      offset = SUBREG_BYTE (operands[0]);
+      operands[0] = SUBREG_REG (operands[0]);
+    }
+  gcc_assert (REG_P (operands[0]));
+  if (TARGET_BIG_ENDIAN)
+    offset += 8 - GET_MODE_SIZE (inmode);
+  operands[5] = gen_rtx_SUBREG (inmode, operands[0], offset);
+})
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+(define_expand "rotldi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(rotate:DI (match_operand:DI 1 "arith_reg_operand" "")
+		   (match_operand:HI 2 "mextr_bit_offset" "")))]
+  "TARGET_SHMEDIA"
+{
+  if (! mextr_bit_offset (operands[2], HImode))
+    FAIL;
+})
+
+(define_insn "rotldi3_mextr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotate:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+{
+  static char templ[16];
+  sprintf (templ, "mextr%d	%%1,%%1,%%0",
+	   8 - (int) (INTVAL (operands[2]) >> 3));
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(rotatert:DI (match_operand:DI 1 "arith_reg_operand" "")
+		     (match_operand:HI 2 "mextr_bit_offset" "")))]
+  "TARGET_SHMEDIA"
+{
+  if (! mextr_bit_offset (operands[2], HImode))
+    FAIL;
+})
+
+(define_insn "rotrdi3_mextr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+{
+  static char templ[16];
+  sprintf (templ, "mextr%d	%%1,%%1,%%0", (int) INTVAL (operands[2]) >> 3);
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ior:DI (zero_extend:DI (mem:QI (match_operand 1
+					 "ua_address_operand" "")))
+		(ashift:DI (match_operand:DI 2 "arith_reg_operand" "")
+			   (const_int 8))))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "TARGET_SHMEDIA"
+  [(match_dup 4) (match_dup 5)]
+{
+  operands[4] = ((TARGET_LITTLE_ENDIAN ? gen_ldhi_q : gen_ldlo_q)
+		 (operands[3], operands[1]));
+  operands[5] = gen_mextr_rl (operands[0], operands[3], operands[2],
+			      GEN_INT (56), GEN_INT (8));
+})
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(rotatert:SI (match_operand:SI 1 "arith_reg_operand")
+		     (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1"
+{
+  HOST_WIDE_INT ival = INTVAL (operands[2]);
+  if (ival == 1)
+    {
+      emit_insn (gen_rotrsi3_1 (operands[0], operands[1]));
+      DONE;
+    }
+
+  FAIL;
+})
+
+(define_insn "rotrsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+;; A slimplified version of rotr for combine.
+(define_insn "*rotrsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (set (reg:SI T_REG)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  "TARGET_SH1"
+  "rotl	%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of rotl for combine.
+(define_insn "*rotlsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotl	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_31"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_16"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		   (const_int 16)))]
+  "TARGET_SH1"
+  "swap.w	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand")
+		   (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1"
+{
+  static const char rot_tab[] = {
+    000, 000, 000, 000, 000, 000, 010, 001,
+    001, 001, 011, 013, 003, 003, 003, 003,
+    003, 003, 003, 003, 003, 013, 012, 002,
+    002, 002, 010, 000, 000, 000, 000, 000,
+  };
+
+  int count = INTVAL (operands[2]);
+  int choice = rot_tab[count];
+  if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1)
+    FAIL;
+  choice &= 7;
+  switch (choice)
+    {
+    case 0:
+      emit_move_insn (operands[0], operands[1]);
+      count -= (count & 16) * 2;
+      break;
+    case 3:
+     emit_insn (gen_rotlsi3_16 (operands[0], operands[1]));
+     count -= 16;
+     break;
+    case 1:
+    case 2:
+      {
+	rtx parts[2];
+	parts[0] = gen_reg_rtx (SImode);
+	parts[1] = gen_reg_rtx (SImode);
+	emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1]));
+	emit_move_insn (parts[choice-1], operands[1]);
+	emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8)));
+	emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8)));
+	emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1]));
+	count = (count & ~16) - 8;
+      }
+    }
+
+  for (; count > 0; count--)
+    emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+  for (; count < 0; count++)
+    emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+
+  DONE;
+})
+
+(define_insn "rotlhi3_8"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
+		   (const_int 8)))]
+  "TARGET_SH1"
+  "swap.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "arith_reg_operand")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand")
+		   (match_operand:HI 2 "const_int_operand")))]
+  "TARGET_SH1"
+{
+  if (INTVAL (operands[2]) != 8)
+    FAIL;
+})
+
+;; The rotcr and rotcl insns are used primarily in DImode shifts by one.
+;; They can also be used to implement things like
+;;	bool t = a == b;
+;;	int x0 = (y >> 1) | (t << 31);	// rotcr
+;;	int x1 = (y << 1) | t;		// rotcl
+(define_insn "rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			     (const_int 1))
+		(ashift:SI (match_operand:SI 2 "t_reg_operand")
+			   (const_int 31))))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "rotcr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			   (const_int 1))
+		(match_operand:SI 2 "t_reg_operand")))
+   (set (reg:SI T_REG)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  "TARGET_SH1"
+  "rotcl	%0"
+  [(set_attr "type" "arith")])
+
+;; Simplified rotcr version for combine, which allows arbitrary shift
+;; amounts for the reg.  If the shift amount is '1' rotcr can be used
+;; directly.  Otherwise we have to insert a shift in between.
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			     (match_operand:SI 2 "const_int_operand"))
+		(ashift:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand")
+			   (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[2]) > 1)
+    {
+      const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1);
+      rtx prev_set_t_insn = NULL_RTX;
+      rtx tmp_t_reg = NULL_RTX;
+
+      /* If we're going to emit a shift sequence that clobbers the T_REG,
+	 try to find the previous insn that sets the T_REG and emit the 
+	 shift insn before that insn, to remove the T_REG dependency.
+	 If the insn that sets the T_REG cannot be found, store the T_REG
+	 in a temporary reg and restore it after the shift.  */
+      if (sh_lshrsi_clobbers_t_reg_p (shift_count)
+	  && ! sh_dynamicalize_shift_p (shift_count))
+	{
+	  prev_set_t_insn = prev_nonnote_insn_bb (curr_insn);
+
+	  /* Skip the nott insn, which was probably inserted by the splitter
+	     of *rotcr_neg_t.  Don't use one of the recog functions
+	     here during insn splitting, since that causes problems in later
+	     passes.  */
+	  if (prev_set_t_insn != NULL_RTX)
+	    {
+	      rtx pat = PATTERN (prev_set_t_insn);
+	      if (GET_CODE (pat) == SET
+		  && t_reg_operand (XEXP (pat, 0), SImode)
+		  && negt_reg_operand (XEXP (pat, 1), SImode))
+	      prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn);
+	    }
+
+	  if (! (prev_set_t_insn != NULL_RTX
+		 && reg_set_p (get_t_reg_rtx (), prev_set_t_insn)
+		 && ! reg_referenced_p (get_t_reg_rtx (),
+					PATTERN (prev_set_t_insn))))
+	    {
+	      prev_set_t_insn = NULL_RTX;
+	      tmp_t_reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ()));
+	    } 
+	}
+
+      rtx shift_result = gen_reg_rtx (SImode);
+      rtx shift_insn = gen_lshrsi3 (shift_result, operands[1], shift_count);
+      operands[1] = shift_result;
+
+      /* Emit the shift insn before the insn that sets T_REG, if possible.  */
+      if (prev_set_t_insn != NULL_RTX)
+	emit_insn_before (shift_insn, prev_set_t_insn);
+      else
+	emit_insn (shift_insn);
+
+      /* Restore T_REG if it has been saved before.  */
+      if (tmp_t_reg != NULL_RTX)
+	emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx));
+    }
+
+  /* For the rotcr insn to work, operands[3] must be in T_REG.
+     If it is not we can get it there by shifting it right one bit.
+     In this case T_REG is not an input for this insn, thus we don't have to
+     pay attention as of where to insert the shlr insn.  */
+  if (! t_reg_operand (operands[3], SImode))
+    {
+      /* We don't care about the shifted result here, only the T_REG.  */
+      emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3]));
+      operands[3] = get_t_reg_rtx ();
+    }
+
+  emit_insn (gen_rotcr (operands[0], operands[1], operands[3]));
+  DONE;
+})
+
+;; If combine tries the same as above but with swapped operands, split
+;; it so that it will try the pattern above.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand")
+			   (const_int 31))
+		(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			     (match_operand:SI 3 "const_int_operand"))))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3))
+			   (ashift:SI (match_dup 1) (const_int 31))))
+	      (clobber (reg:SI T_REG))])])
+
+;; Basically the same as the rotcr pattern above, but for rotcl.
+;; FIXME: Fold copy pasted split code for rotcr and rotcl.
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(and:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand")
+			(const_int 1))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  gcc_assert (INTVAL (operands[2]) > 0);
+
+  if (INTVAL (operands[2]) > 1)
+    {
+      const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1);
+      rtx prev_set_t_insn = NULL_RTX;
+      rtx tmp_t_reg = NULL_RTX;
+
+      /* If we're going to emit a shift sequence that clobbers the T_REG,
+	 try to find the previous insn that sets the T_REG and emit the 
+	 shift insn before that insn, to remove the T_REG dependency.
+	 If the insn that sets the T_REG cannot be found, store the T_REG
+	 in a temporary reg and restore it after the shift.  */
+      if (sh_ashlsi_clobbers_t_reg_p (shift_count)
+	  && ! sh_dynamicalize_shift_p (shift_count))
+	{
+	  prev_set_t_insn = prev_nonnote_insn_bb (curr_insn);
+
+	  /* Skip the nott insn, which was probably inserted by the splitter
+	     of *rotcl_neg_t.  Don't use one of the recog functions
+	     here during insn splitting, since that causes problems in later
+	     passes.  */
+	  if (prev_set_t_insn != NULL_RTX)
+	    {
+	      rtx pat = PATTERN (prev_set_t_insn);
+	      if (GET_CODE (pat) == SET
+		  && t_reg_operand (XEXP (pat, 0), SImode)
+		  && negt_reg_operand (XEXP (pat, 1), SImode))
+	      prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn);
+	    }
+
+	  if (! (prev_set_t_insn != NULL_RTX
+		 && reg_set_p (get_t_reg_rtx (), prev_set_t_insn)
+		 && ! reg_referenced_p (get_t_reg_rtx (),
+					PATTERN (prev_set_t_insn))))
+	    {
+	      prev_set_t_insn = NULL_RTX;
+	      tmp_t_reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ()));
+	    } 
+	}
+
+      rtx shift_result = gen_reg_rtx (SImode);
+      rtx shift_insn = gen_ashlsi3 (shift_result, operands[1], shift_count);
+      operands[1] = shift_result;
+
+      /* Emit the shift insn before the insn that sets T_REG, if possible.  */
+      if (prev_set_t_insn != NULL_RTX)
+	emit_insn_before (shift_insn, prev_set_t_insn);
+      else
+	emit_insn (shift_insn);
+
+      /* Restore T_REG if it has been saved before.  */
+      if (tmp_t_reg != NULL_RTX)
+	emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx));
+    }
+
+  /* For the rotcl insn to work, operands[3] must be in T_REG.
+     If it is not we can get it there by shifting it right one bit.
+     In this case T_REG is not an input for this insn, thus we don't have to
+     pay attention as of where to insert the shlr insn.  */
+  if (! t_reg_operand (operands[3], SImode))
+    {
+      /* We don't care about the shifted result here, only the T_REG.  */
+      emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3]));
+      operands[3] = get_t_reg_rtx ();
+    }
+
+  emit_insn (gen_rotcl (operands[0], operands[1], operands[3]));
+  DONE;
+})
+
+;; rotcl combine pattern variations
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(match_operand:SI 3 "t_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (match_dup 3) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand")
+			(const_int 1))
+		(ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (match_dup 1) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(lshiftrt:SI (match_operand:SI 3 "arith_reg_operand")
+			     (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])]
+{
+  /* We don't care about the result of the left shift, only the T_REG.  */
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3]));
+})
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand")
+			     (const_int 31))
+		(ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])]
+{
+  /* We don't care about the result of the left shift, only the T_REG.  */
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3]));
+})
+
+;; rotcr combine bridge pattern which will make combine try out more
+;; complex patterns.
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ashift:SI (match_operand:SI 1 "t_reg_operand") (const_int 31)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 0) (const_int 1))
+			   (ashift:SI (match_dup 1) (const_int 31))))
+	      (set (reg:SI T_REG)
+		   (and:SI (match_dup 0) (const_int 1)))])])
+
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			(const_int -2147483648)) ;; 0xffffffff80000000
+		(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			     (const_int 1))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_shll (tmp, operands[1]));
+  emit_insn (gen_rotcr (operands[0], operands[2], get_t_reg_rtx ()));
+  DONE;
+})
+
+;; rotcr combine patterns for rotating in the negated T_REG value.
+(define_insn_and_split "*rotcr_neg_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (match_operand:SI 1 "negt_reg_shl31_operand")
+		(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			     (match_operand:SI 3 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3))
+			   (ashift:SI (reg:SI T_REG) (const_int 31))))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_nott (get_t_reg_rtx ()));
+})
+
+(define_insn_and_split "*rotcr_neg_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			     (match_operand:SI 2 "const_int_operand"))
+		(match_operand:SI 3 "negt_reg_shl31_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 1) (match_dup 2))
+			   (ashift:SI (reg:SI T_REG) (const_int 31))))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_nott (get_t_reg_rtx ()));
+})
+
+;; rotcl combine patterns for rotating in the negated T_REG value.
+;; For some strange reason these have to be specified as splits which combine
+;; will pick up.  If they are specified as insn_and_split like the
+;; *rotcr_neg_t patterns above, combine would recognize them successfully
+;; but not emit them on non-SH2A targets.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (match_operand:SI 1 "negt_reg_operand")
+		(ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))
+		(match_operand:SI 1 "negt_reg_operand")))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; SImode shift left
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "shift_count_operand" "")))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (TARGET_DYNSHIFT
+      && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2]))
+      operands[2] = force_reg (SImode, operands[2]);
+
+  /*  If the ashlsi3_* insn is going to clobber the T_REG it must be
+      expanded here.  */
+  if (CONST_INT_P (operands[2])
+      && sh_ashlsi_clobbers_t_reg_p (operands[2])
+      && ! sh_dynamicalize_shift_p (operands[2]))
+    {
+      emit_insn (gen_ashlsi3_n_clobbers_t (operands[0], operands[1],
+					   operands[2]));
+      DONE;
+    }
+
+  /* Expand a library call for the dynamic shift.  */
+  if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+      rtx funcaddr = gen_reg_rtx (Pmode);
+      function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
+      emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+
+      DONE;
+    }
+})
+
+(define_insn "ashlsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0")
+		   (match_operand:SI 2 "p27_shift_count_operand" "M,P27")))]
+  "TARGET_SH1"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "ashlsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "shift_count_operand" "r")))]
+  "TARGET_DYNSHIFT"
+  "shld	%2,%0"
+  "&& CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2])
+   && ! sh_ashlsi_clobbers_t_reg_p (operands[2])"
+  [(const_int 0)]
+{
+  if (satisfies_constraint_P27 (operands[2]))
+    {
+      emit_insn (gen_ashlsi3_k (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (! satisfies_constraint_P27 (operands[2]))
+    {
+      /* This must happen before reload, otherwise the constant will be moved
+	 into a register due to the "r" constraint, after which this split
+	 cannot be done anymore.
+	 Unfortunately the move insn will not always be eliminated.
+	 Also, here we must not create a shift sequence that clobbers the
+	 T_REG.  */
+      emit_move_insn (operands[0], operands[1]);
+      gen_shifty_op (ASHIFT, operands);
+      DONE;
+    }
+
+  FAIL;
+}
+  [(set_attr "type" "dyn_shift")])
+
+;; If dynamic shifts are not available use a library function.
+;; By specifying the pattern we reduce the number of call clobbered regs.
+;; In order to make combine understand the truncation of the shift amount
+;; operand we have to allow it to use pseudo regs for the shift operands.
+(define_insn "ashlsi3_d_call"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(ashift:SI (reg:SI R4_REG)
+		   (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+			   (const_int 31))))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1 && !TARGET_DYNSHIFT"
+  "jsr	@%2%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn_and_split "ashlsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "not_p27_shift_count_operand" "")))]
+  "TARGET_SH1 && ! sh_ashlsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (ASHIFT, operands);
+
+  DONE;
+})
+
+(define_insn_and_split "ashlsi3_n_clobbers_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "not_p27_shift_count_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && sh_ashlsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed || INTVAL (operands[2]) == 31
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[2]) == 31)
+    {
+      /* If the shift amount is 31 we split into a different sequence before
+	 reload so that it gets a chance to allocate R0 for the sequence.
+	 If it fails to do so (due to pressure on R0), it will take one insn
+	 more for the and.  */
+      emit_insn (gen_andsi3 (operands[0], operands[1], const1_rtx));
+      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+    }
+  else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (ASHIFT, operands);
+
+  DONE;
+})
+
+(define_insn "shll"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
+   (set (reg:SI T_REG)
+	(lt:SI (match_dup 1) (const_int 0)))]
+  "TARGET_SH1"
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*ashlsi_c_void"
+  [(set (reg:SI T_REG)
+	(lt:SI (match_operand:SI 0 "arith_reg_operand" "r") (const_int 0)))
+   (clobber (match_scratch:SI 1 "=0"))]
+  "TARGET_SH1 && cse_not_expected"
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "") (const_int 0))
+   (set (reg:SI T_REG)
+	(gt:SI (match_dup 0) (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (2, operands[0])
+   && peep2_reg_dead_p (2, operands[1])"
+  [(const_int 0)]
+{
+  emit_insn (gen_shll (operands[1], operands[1]));
+  DONE;
+})
+
+(define_insn "ashlsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashift:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		   (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlld.l	%1, %2, %0
+	shlli.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; HImode shift left
+
+(define_expand "ashlhi3"
+  [(parallel [(set (match_operand:HI 0 "arith_reg_operand" "")
+		   (ashift:HI (match_operand:HI 1 "arith_reg_operand" "")
+			      (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1"
+{
+  if (!CONST_INT_P (operands[2]))
+    FAIL;
+  /* It may be possible to call gen_ashlhi3 directly with more generic
+     operands.  Make sure operands[1] is a HImode register here.  */
+  if (!arith_reg_operand (operands[1], HImode))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+})
+
+(define_insn "ashlhi3_k"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r,r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0")
+		   (match_operand:HI 2 "const_int_operand" "M,P27")))]
+  "TARGET_SH1 && satisfies_constraint_P27 (operands[2])"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*ashlhi3_n"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0")
+		   (match_operand:HI 2 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(use (reg:SI R0_REG))]
+{
+  gen_shifty_hi_op (ASHIFT, operands);
+  DONE;
+})
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; DImode shift left
+
+(define_expand "ashldi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashift:DI (match_operand:DI 1 "arith_reg_operand" "")
+			      (match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+    {
+      emit_insn (gen_ashldi3_k (operands[0], operands[1]));
+      DONE;
+    }
+  else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 32)
+    {
+      emit_insn (gen_ashldi3_std (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+;; Expander for DImode shift left with SImode operations.
+(define_expand "ashldi3_std"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+                   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SH1 && INTVAL (operands[2]) < 32"
+{
+  rtx low_src = gen_lowpart (SImode, operands[1]);
+  rtx high_src = gen_highpart (SImode, operands[1]);
+  rtx dst = gen_reg_rtx (DImode);
+  rtx low_dst = gen_lowpart (SImode, dst);
+  rtx high_dst = gen_highpart (SImode, dst);
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_lshrsi3 (tmp0, low_src, GEN_INT (32 - INTVAL (operands[2]))));
+  emit_insn (gen_ashlsi3 (low_dst, low_src, operands[2]));  
+  emit_insn (gen_ashlsi3 (tmp1, high_src, operands[2]));  
+  emit_insn (gen_iorsi3 (high_dst, tmp0, tmp1));
+  emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_insn_and_split "ashldi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shll (low, low));
+  emit_insn (gen_rotcl (high, high, get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "ashldi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		   (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlld	%1, %2, %0
+	shlli	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*ashldisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shlli.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; SImode arithmetic shift right
+;;
+;; We can't do HImode right shifts correctly unless we start out with an
+;; explicit zero / sign extension; doing that would result in worse overall
+;; code, so just let the machine independent code widen the mode.
+;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 .
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest" "")
+		   (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+				(match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (expand_ashiftrt (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "shar"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "shar	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "M")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && INTVAL (operands[2]) == 1"
+  "shar	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "ashrsi2_16"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+                     (const_int 16)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16)))
+   (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (HImode, operands[0]);
+})
+
+(define_insn_and_split "ashrsi2_31"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_shll (operands[0], operands[1]));
+  emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "ashrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_DYNSHIFT"
+  "shad	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashrsi3_n"
+  [(set (reg:SI R4_REG)
+	(ashiftrt:SI (reg:SI R4_REG)
+		     (match_operand:SI 0 "const_int_operand" "i")))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "ashrsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		     (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shard.l	%1, %2, %0
+	shari.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; DImode arithmetic shift right
+
+(define_expand "ashrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+				(match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 1)
+    FAIL;
+})
+
+(define_insn_and_split "ashrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shar (high, high));
+  emit_insn (gen_rotcr (low, low, get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "ashrdi3_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		     (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA
+   && (arith_reg_dest (operands[0], DImode)
+       || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 32))"
+  "@
+	shard	%1, %2, %0
+	shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*ashrdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shari.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "ashrdisi3_media_high"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(truncate:SI
+	   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+			(match_operand:DI 2 "const_int_operand" "n"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) >= 32"
+  "shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "ashrdisi3_media_opaque"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(unspec:SI [(match_operand:DI 1 "arith_reg_operand" "r")
+		    (match_operand:DI 2 "const_int_operand" "n")]
+	 UNSPEC_ASHIFTRT))]
+  "TARGET_SHMEDIA"
+  "shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; SImode logical shift right
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+		     (match_operand:SI 2 "shift_count_operand" "")))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  /* If a dynamic shift is supposed to be used, expand the lshrsi3_d insn
+     here, otherwise the pattern will never match due to the shift amount reg
+     negation.  */
+  if (TARGET_DYNSHIFT
+      && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2]))
+    {
+      rtx neg_count = force_reg (SImode,
+			         gen_int_mode (- INTVAL (operands[2]), SImode));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], neg_count));
+      DONE;
+    }
+
+  if (TARGET_DYNSHIFT && ! CONST_INT_P (operands[2]))
+    {
+      rtx neg_count = gen_reg_rtx (SImode);
+      emit_insn (gen_negsi2 (neg_count, operands[2]));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], neg_count));
+      DONE;
+    }
+
+  /* If the lshrsi3_* insn is going to clobber the T_REG it must be
+     expanded here.  */
+  if (CONST_INT_P (operands[2])
+      && sh_lshrsi_clobbers_t_reg_p (operands[2])
+      && ! sh_dynamicalize_shift_p (operands[2]))
+    {
+      emit_insn (gen_lshrsi3_n_clobbers_t (operands[0], operands[1],
+		 operands[2]));
+      DONE;
+    }
+
+  /* Expand a library call for the dynamic shift.  */
+  if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+      rtx funcaddr = gen_reg_rtx (Pmode);
+      function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
+      emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+      DONE;
+    }
+})
+
+(define_insn "lshrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "p27_rshift_count_operand" "P27")))]
+  "TARGET_SH1"
+  "shlr%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "lshrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "shift_count_operand" "r"))))]
+  "TARGET_DYNSHIFT"
+  "shld	%2,%0"
+  "&& CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2])
+   && ! sh_lshrsi_clobbers_t_reg_p (operands[2])"
+  [(const_int 0)]
+{
+  if (satisfies_constraint_P27 (operands[2]))
+    {
+      /* This will not be done for a shift amount of 1, because it would
+	 clobber the T_REG.  */
+      emit_insn (gen_lshrsi3_k (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (! satisfies_constraint_P27 (operands[2]))
+    {
+      /* This must happen before reload, otherwise the constant will be moved
+	 into a register due to the "r" constraint, after which this split
+	 cannot be done anymore.
+	 Unfortunately the move insn will not always be eliminated.
+	 Also, here we must not create a shift sequence that clobbers the
+	 T_REG.  */
+      emit_move_insn (operands[0], operands[1]);
+      gen_shifty_op (LSHIFTRT, operands);
+      DONE;
+    }
+
+  FAIL;
+}
+  [(set_attr "type" "dyn_shift")])
+
+;; If dynamic shifts are not available use a library function.
+;; By specifying the pattern we reduce the number of call clobbered regs.
+;; In order to make combine understand the truncation of the shift amount
+;; operand we have to allow it to use pseudo regs for the shift operands.
+(define_insn "lshrsi3_d_call"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(lshiftrt:SI (reg:SI R4_REG)
+		     (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+			     (const_int 31))))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1 && !TARGET_DYNSHIFT"
+  "jsr	@%2%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn_and_split "lshrsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "not_p27_rshift_count_operand")))]
+  "TARGET_SH1 && ! sh_lshrsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode,
+			       gen_int_mode (- INTVAL (operands[2]), SImode));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (LSHIFTRT, operands);
+
+  DONE;
+})
+
+;; The lshrsi3_n_clobbers_t pattern also works as a simplified version of
+;; the shlr pattern.
+(define_insn_and_split "lshrsi3_n_clobbers_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "not_p27_rshift_count_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && sh_lshrsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed || INTVAL (operands[2]) == 31
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[2]) == 31)
+    {
+      emit_insn (gen_shll (operands[0], operands[1]));
+      emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
+    }
+  else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode,
+			       gen_int_mode (- INTVAL (operands[2]), SImode));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (LSHIFTRT, operands);
+
+  DONE;
+})
+
+(define_insn "shlr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "shlr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		     (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlrd.l	%1, %2, %0
+	shlri.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; DImode logical shift right
+
+(define_expand "lshrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+			       (match_operand:DI 2 "immediate_operand" "")))
+	     (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 1)
+    FAIL;
+})
+
+(define_insn_and_split "lshrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shlr (high, high));
+  emit_insn (gen_rotcr (low, low, get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "lshrdi3_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		     (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA
+   && (arith_reg_dest (operands[0], DImode)
+       || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > 32))"
+  "@
+	shlrd	%1, %2, %0
+	shlri	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*lshrdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shlri.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; Combined left/right shifts
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI R0_REG))]
+{
+  if (gen_shl_and (operands[0], operands[2], operands[3], operands[1]))
+    FAIL;
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI R0_REG))]
+{
+  if (gen_shl_and (operands[0], operands[2], operands[3], operands[1]))
+    FAIL;
+  DONE;
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 1"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+   (set_attr "type" "arith")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 2"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")]
+	      (const_string "10")))
+   (set_attr "type" "arith")])
+
+;; shift left / and combination with a scratch register: The combine pass
+;; does not accept the individual instructions, even though they are
+;; cheap.  But it needs a precise description so that it is usable after
+;; reload.
+(define_insn "and_shl_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(lshiftrt:SI
+	 (ashift:SI
+	  (and:SI
+	   (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+			(match_operand:SI 2 "const_int_operand" "N,n"))
+	   (match_operand:SI 3 "" "0,r"))
+	  (match_operand:SI 4 "const_int_operand" "n,n"))
+	 (match_operand:SI 5 "const_int_operand" "n,n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5))
+	       (const_string "10")]
+	      (const_string "12")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI
+	 (ashift:SI
+	  (and:SI
+	   (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" ""))
+	   (match_operand:SI 3 "register_operand" ""))
+	  (match_operand:SI 4 "const_int_operand" ""))
+	 (match_operand:SI 5 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(use (reg:SI R0_REG))]
+{
+  rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1];
+
+  if (INTVAL (operands[2]))
+    {
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  emit_insn (gen_andsi3 (operands[0], operands[0], and_source));
+  operands[2] = operands[4];
+  gen_shifty_op (ASHIFT, operands);
+  if (INTVAL (operands[5]))
+    {
+      operands[2] = operands[5];
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  DONE;
+})
+
+;; signed left/right shift combination.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" ""))
+	 (match_operand:SI 3 "const_int_operand" "")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(use (reg:SI R0_REG))]
+{
+  if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1]))
+    FAIL;
+  DONE;
+})
+
+(define_insn "shl_sext_ext"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "const_int_operand" "n"))
+	 (match_operand:SI 3 "const_int_operand" "n")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && (unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5"
+  "#"
+  [(set (attr "length")
+	(cond [(match_test "shl_sext_length (insn)")
+	       (const_string "2")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+    (set_attr "type" "arith")])
+
+(define_insn "shl_sext_sub"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "const_int_operand" "n"))
+	 (match_operand:SI 3 "const_int_operand" "n")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && (shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")]
+	      (const_string "14")))
+    (set_attr "type" "arith")])
+
+;; The xtrct_left and xtrct_right patterns are used in expansions of DImode
+;; shifts by 16, and allow the xtrct instruction to be generated from C
+;; source.
+(define_insn "xtrct_left"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+			   (const_int 16))
+ 	        (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0")
+			     (const_int 16))))]
+  "TARGET_SH1"
+  "xtrct	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "xtrct_right"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			     (const_int 16))
+ 	        (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r")
+			   (const_int 16))))]
+  "TARGET_SH1"
+  "xtrct	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (plus:SI (reg:SI T_REG)
+			 (match_operand:SI 1 "arith_reg_operand" "r"))))
+   (set (reg:SI T_REG)
+	(ne:SI (ior:SI (reg:SI T_REG) (match_dup 1))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of the negc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; Notice that '0 - x - 1' is the same as '~x', thus we don't specify
+;; extra patterns for this case.
+(define_insn "*negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+		  (match_operand:SI 2 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*negdi_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub	r63, %1, %0"
+  [(set_attr "type" "arith_media")])
+
+;; Don't split into individual negc insns immediately so that neg:DI (abs:DI)
+;; can be combined.
+(define_expand "negdi2"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_dest")
+		   (neg:DI (match_operand:DI 1 "arith_reg_operand")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1")
+
+(define_insn_and_split "*negdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_clrt ());
+  emit_insn (gen_negc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1])));
+  emit_insn (gen_negc (gen_highpart (SImode, operands[0]),
+		       gen_highpart (SImode, operands[1])));
+  DONE;
+})
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "neg	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "one_cmplsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "not	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0)))
+   (set (match_dup 0) (reg:SI T_REG))]
+{
+/* PR 54685
+   If the result of 'unsigned int <= 0x7FFFFFFF' ends up as the following
+   sequence:
+
+     (set (reg0) (not:SI (reg0) (reg1)))
+     (parallel [(set (reg2) (lshiftrt:SI (reg0) (const_int 31)))
+		(clobber (reg:SI T_REG))])
+
+   ... match and combine the sequence manually in the split pass after the
+   combine pass.  Notice that combine does try the target pattern of this
+   split, but if the pattern is added it interferes with other patterns, in
+   particular with the div0s comparisons.
+   This could also be done with a peephole but doing it here before register
+   allocation can save one temporary.
+   When we're here, the not:SI pattern obviously has been matched already
+   and we only have to see whether the following insn is the left shift.  */
+
+  rtx i = next_nonnote_insn_bb (curr_insn);
+  if (i == NULL_RTX || !NONJUMP_INSN_P (i))
+    FAIL;
+
+  rtx p = PATTERN (i);
+  if (GET_CODE (p) != PARALLEL || XVECLEN (p, 0) != 2)
+    FAIL;
+
+  rtx p0 = XVECEXP (p, 0, 0);
+  rtx p1 = XVECEXP (p, 0, 1);
+
+  if (/* (set (reg2) (lshiftrt:SI (reg0) (const_int 31)))  */
+      GET_CODE (p0) == SET
+      && GET_CODE (XEXP (p0, 1)) == LSHIFTRT
+      && REG_P (XEXP (XEXP (p0, 1), 0))
+      && REGNO (XEXP (XEXP (p0, 1), 0)) == REGNO (operands[0])
+      && CONST_INT_P (XEXP (XEXP (p0, 1), 1))
+      && INTVAL (XEXP (XEXP (p0, 1), 1)) == 31
+
+      /* (clobber (reg:SI T_REG))  */
+      && GET_CODE (p1) == CLOBBER && REG_P (XEXP (p1, 0))
+      && REGNO (XEXP (p1, 0)) == T_REG)
+    {
+      operands[0] = XEXP (p0, 0);
+      set_insn_deleted (i);
+    }
+  else
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(xor:DI (match_operand:DI 1 "arith_reg_operand" "")
+		(const_int -1)))]
+  "TARGET_SHMEDIA" "")
+
+(define_expand "abs<mode>2"
+  [(parallel [(set (match_operand:SIDI 0 "arith_reg_dest")
+		   (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1")
+
+(define_insn_and_split "*abs<mode>2"
+  [(set (match_operand:SIDI 0 "arith_reg_dest")
+  	(abs:SIDI (match_operand:SIDI 1 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (<MODE>mode == SImode)
+    emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  else
+    {
+      rtx high_src = gen_highpart (SImode, operands[1]);
+      emit_insn (gen_cmpgesi_t (high_src, const0_rtx));
+    }
+
+  emit_insn (gen_neg<mode>_cond (operands[0], operands[1], operands[1],
+				 const1_rtx));
+  DONE;
+})
+
+(define_insn_and_split "*negabs<mode>2"
+  [(set (match_operand:SIDI 0 "arith_reg_dest")
+	(neg:SIDI (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (<MODE>mode == SImode)
+    emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  else
+    {
+      rtx high_src = gen_highpart (SImode, operands[1]);
+      emit_insn (gen_cmpgesi_t (high_src, const0_rtx));
+    }
+
+  emit_insn (gen_neg<mode>_cond (operands[0], operands[1], operands[1],
+				 const0_rtx));
+  DONE;
+})
+
+;; The SH4 202 can do zero-offset branches without pipeline stalls.
+;; This can be used as some kind of conditional execution, which is useful
+;; for abs.
+;; Actually the instruction scheduling should decide whether to use a
+;; zero-offset branch or not for any generic case involving a single
+;; instruction on SH4 202.
+(define_insn_and_split "negsi_cond"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else
+	  (eq:SI (reg:SI T_REG) (match_operand:SI 3 "const_int_operand" "M,N"))
+	  (match_operand:SI 1 "arith_reg_operand" "0,0")
+	  (neg:SI (match_operand:SI 2 "arith_reg_operand" "r,r"))))]
+  "TARGET_SH1 && TARGET_ZDCBRANCH"
+{
+  static const char* alt[] =
+  {
+       "bt	0f"	"\n"
+    "	neg	%2,%0"	"\n"
+    "0:",
+
+       "bf	0f"	"\n"
+    "	neg	%2,%0"	"\n"
+    "0:"
+  };
+  return alt[which_alternative];
+}
+  "TARGET_SH1 && ! TARGET_ZDCBRANCH"
+  [(const_int 0)]
+{
+  rtx skip_neg_label = gen_label_rtx ();
+
+  emit_move_insn (operands[0], operands[1]);
+
+  emit_jump_insn (INTVAL (operands[3])
+		  ? gen_branch_true (skip_neg_label)
+		  : gen_branch_false (skip_neg_label));
+
+  emit_label_after (skip_neg_label,
+		    emit_insn (gen_negsi2 (operands[0], operands[1])));
+  DONE;
+}
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn_and_split "negdi_cond"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(if_then_else
+	  (eq:SI (reg:SI T_REG) (match_operand:SI 3 "const_int_operand"))
+	  (match_operand:DI 1 "arith_reg_operand")
+	  (neg:DI (match_operand:DI 2 "arith_reg_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx skip_neg_label = gen_label_rtx ();
+
+  emit_move_insn (operands[0], operands[1]);
+
+  emit_jump_insn (INTVAL (operands[3]) 
+		  ? gen_branch_true (skip_neg_label)
+		  : gen_branch_false (skip_neg_label));
+
+  if (!INTVAL (operands[3]))
+    emit_insn (gen_clrt ());
+
+  emit_insn (gen_negc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1])));
+  emit_label_after (skip_neg_label,
+		    emit_insn (gen_negc (gen_highpart (SImode, operands[0]),
+					 gen_highpart (SImode, operands[1]))));
+  DONE;
+})
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(bswap:SI (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SH1"
+{
+  if (! can_create_pseudo_p ())
+    FAIL;
+  else
+    {
+      rtx tmp0 = gen_reg_rtx (SImode);
+      rtx tmp1 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_swapbsi2 (tmp0, operands[1]));
+      emit_insn (gen_rotlsi3_16 (tmp1, tmp0));
+      emit_insn (gen_swapbsi2 (operands[0], tmp1));
+      DONE;
+    }
+})
+
+(define_insn "swapbsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "r")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))]
+  "TARGET_SH1"
+  "swap.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; The *swapbisi2_and_shl8 pattern helps the combine pass simplifying
+;; partial byte swap expressions such as...
+;;   ((x & 0xFF) << 8) | ((x >> 8) & 0xFF).
+;; ...which are currently not handled by the tree optimizers.
+;; The combine pass will not initially try to combine the full expression,
+;; but only some sub-expressions.  In such a case the *swapbisi2_and_shl8
+;; pattern acts as an intermediate pattern that will eventually lead combine
+;; to the swapbsi2 pattern above.
+;; As a side effect this also improves code that does (x & 0xFF) << 8
+;; or (x << 8) & 0xFF00.
+(define_insn_and_split "*swapbisi2_and_shl8"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendqisi2 (tmp0, gen_lowpart (QImode, operands[1])));
+  emit_insn (gen_swapbsi2 (tmp1, tmp0));
+  emit_insn (gen_iorsi3 (operands[0], tmp1, operands[2]));
+  DONE;
+})
+
+;; The *swapbhisi2 pattern is, like the *swapbisi2_and_shl8 pattern, another
+;; intermediate pattern that will help the combine pass arriving at swapbsi2.
+(define_insn_and_split "*swapbhisi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendhisi2 (tmp, gen_lowpart (HImode, operands[1])));
+  emit_insn (gen_swapbsi2 (operands[0], tmp));
+  DONE;
+})
+
+;; In some cases the swapbsi2 pattern might leave a sequence such as...
+;;   swap.b  r4,r4
+;;   mov     r4,r0
+;;
+;; which can be simplified to...
+;;   swap.b  r4,r0
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))])
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "addz.l	%1, r63, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "extend")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.uw	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+})
+
+;; ??? when a truncated input to a zero_extend is reloaded, reload will
+;; reload the entire truncate expression.
+(define_insn_and_split "*loaddi_trunc"
+  [(set (match_operand 0 "any_register_operand" "=r")
+	(truncate (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_SHMEDIA && reload_completed"
+  "#"
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+})
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	ld%M1.ub	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(zero_extend:SI (match_operand:QIHI 1 "zero_extend_operand")))])
+
+(define_insn_and_split "*zero_extend<mode>si2_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.<bw>	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  /* Sometimes combine fails to combine a T bit or negated T bit store to a
+     reg with a following zero extension.  In the split pass after combine,
+     try to figure out how the extended reg was set.  If it originated from
+     the T bit we can replace the zero extension with a reg move, which will
+     be eliminated.  Notice that this also helps the *cbranch_t splitter when
+     it tries to post-combine tests and conditional branches, as it does not
+     check for zero extensions.  */
+  operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn);
+  if (operands[2] == NULL_RTX)
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+(define_insn "*zero_extendhisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.uw	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  rtx op1 = operands[1];
+
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+})
+
+(define_insn "*zero_extendqisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	ld%M1.ub	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; SH2A supports two zero extending load instructions: movu.b and movu.w.
+;; They could also be used for simple memory addresses like @Rn by setting
+;; the displacement value to zero.  However, doing so too early results in
+;; missed opportunities for other optimizations such as post-inc or index
+;; addressing loads.
+;; Although the 'zero_extend_movu_operand' predicate does not allow simple
+;; register addresses (an address without a displacement, index, post-inc),
+;; zero-displacement addresses might be generated during reload, wich are
+;; simplified to simple register addresses in turn.  Thus, we have to
+;; provide the Sdd and Sra alternatives in the patterns.
+(define_insn "*zero_extend<mode>si2_disp_mem"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(zero_extend:SI
+	  (match_operand:QIHI 1 "zero_extend_movu_operand" "Sdd,Sra")))]
+  "TARGET_SH2A"
+  "@
+	movu.<bw>	%1,%0
+	movu.<bw>	@(0,%t1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+;; Convert the zero extending loads in sequences such as:
+;;	movu.b	@(1,r5),r0	movu.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; back to sign extending loads like:
+;;	mov.b	@(1,r5),r0	mov.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; if the extension type is irrelevant.  The sign extending mov.{b|w} insn
+;; is only 2 bytes in size if the displacement is {K04|K05}.
+;; If the displacement is greater it doesn't matter, so we convert anyways.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (match_operand 1 "displacement_mem_operand" "")))
+   (set (match_operand 2 "nonimmediate_operand" "")
+	(match_operand 3 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[2]))
+      <= GET_MODE_SIZE (GET_MODE (operands[1]))"
+  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 2) (match_dup 3))])
+
+;; Fold sequences such as
+;;	mov.b	@r3,r7
+;;	extu.b	r7,r7
+;; into
+;;	movu.b	@(0,r3),r7
+;; This does not reduce the code size but the number of instructions is
+;; halved, which results in faster code.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI (match_operand 1 "simple_mem_operand" "")))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(zero_extend:SI (match_operand 3 "arith_reg_operand" "")))]
+  "TARGET_SH2A
+   && GET_MODE (operands[1]) == GET_MODE (operands[3])
+   && (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && (REGNO (operands[2]) == REGNO (operands[0])
+       || peep2_reg_dead_p (2, operands[0]))"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 4)))]
+{
+  operands[4]
+    = replace_equiv_address (operands[1],
+			     gen_rtx_PLUS (SImode, XEXP (operands[1], 0),
+					   const0_rtx));
+})
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+;; ??? Or perhaps it should be dropped?
+
+;; convert_move generates good code for SH[1-4].
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,?f")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, r63, %0
+	ld%M1.l	%m1, %0
+	fmov.sl	%1, %0"
+  [(set_attr "type" "arith_media,load_media,fpconv_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.w	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+})
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.b	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:QI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 56)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+})
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))])
+
+(define_insn "*extendhisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.w	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  rtx op1 = operands[1];
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+})
+
+(define_insn_and_split "*extend<mode>si2_compact_reg"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "exts.<bw>	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  /* Sometimes combine fails to combine a T bit or negated T bit store to a
+     reg with a following sign extension.  In the split pass after combine,
+     try to figure the extended reg was set.  If it originated from the T
+     bit we can replace the sign extension with a reg move, which will be
+     eliminated.  */
+  operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn);
+  if (operands[2] == NULL_RTX)
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute.
+;; See movqi insns.
+(define_insn "*extend<mode>si2_compact_mem_disp"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+	(sign_extend:SI
+	  (mem:QIHI
+	    (plus:SI
+	      (match_operand:SI 1 "arith_reg_operand" "%r,r")
+	      (match_operand:SI 2 "const_int_operand" "<disp04>,N")))))]
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (<MODE>mode, operands[2], false, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*extend<mode>si2_compact_mem_disp"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r,r")
+	(sign_extend:SI
+	  (mem:QIHI
+	    (plus:SI
+	      (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+	      (match_operand:SI 2 "const_int_operand" "<disp04>,N,<disp12>")))))]
+  "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[2], true, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0
+	mov.<bw>	@(%O2,%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "2,2,4")])
+
+;; The *_snd patterns will take care of other QImode/HImode addressing
+;; modes than displacement addressing.  They must be defined _after_ the
+;; displacement addressing patterns.  Otherwise the displacement addressing
+;; patterns will not be picked.
+(define_insn "*extend<mode>si2_compact_snd"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(sign_extend:SI
+	  (match_operand:QIHI 1 "movsrc_no_disp_mem_operand" "Snd")))]
+  "TARGET_SH1"
+  "mov.<bw>	%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*extendqisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.b	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))]
+{
+  rtx op1 = operands[1];
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+})
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_dest" "")
+	(sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendqihi2_compact_reg"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "exts.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; It would seem useful to combine the truncXi patterns into the movXi
+;; patterns, but unary operators are ignored when matching constraints,
+;; so we need separate patterns.
+(define_insn "truncdisi2"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=r,m,m,f,r,f")
+	(truncate:SI (match_operand:DI 1 "register_operand" "r,r,f,r,f,f")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, r63, %0
+	st%M0.l	%m0, %1
+	fst%M0.s	%m0, %T1
+	fmov.ls	%1, %0
+	fmov.sl	%T1, %0
+	fmov.s	%T1, %0"
+  [(set_attr "type" "arith_media,store_media,fstore_media,fload_media,
+		     fpconv_media,fmove_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+(define_insn "truncdihi2"
+  [(set (match_operand:HI 0 "general_movdst_operand" "=?r,m")
+	(truncate:HI (match_operand:DI 1 "register_operand" "r,r")))]
+  "TARGET_SHMEDIA"
+{
+  static const char* alt[] =
+  {
+       "shlli	%1,48,%0"	"\n"
+    "	shlri	%0,48,%0",
+
+       "st%M0.w	%m0, %1"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "type"   "arith_media,store_media")
+   (set_attr "length" "8,4")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+; N.B. This should agree with LOAD_EXTEND_OP and movqi.
+; Because we use zero extension, we can't provide signed QImode compares
+; using a simple compare or conditional branch insn.
+(define_insn "truncdiqi2"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,m")
+	(truncate:QI (match_operand:DI 1 "register_operand" "r,r")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	st%M0.b	%m0, %1"
+  [(set_attr "type"   "arith_media,store")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; define push and pop so it is easy for sh.c
+;; We can't use push and pop on SHcompact because the stack must always
+;; be 8-byte aligned.
+(define_expand "push"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" "r,l,x"))]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "pop"
+  [(set (match_operand:SI 0 "register_operand" "=r,l,x")
+	(mem:SI (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "push_e"
+  [(parallel [(set (mem:SF (pre_dec:SI (reg:SI SP_REG)))
+		   (match_operand:SF 0 "" ""))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_insn "push_fpul"
+  [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))]
+  "TARGET_SH2E && ! TARGET_SH5"
+  "sts.l	fpul,@-r15"
+  [(set_attr "type" "fstore")
+   (set_attr "late_fp_use" "yes")
+   (set_attr "hit_stack" "yes")])
+
+;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4,
+;; so use that.
+(define_expand "push_4"
+  [(parallel [(set (mem:DF (pre_dec:SI (reg:SI SP_REG)))
+		   (match_operand:DF 0 "" ""))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "pop_e"
+  [(parallel [(set (match_operand:SF 0 "" "")
+	      (mem:SF (post_inc:SI (reg:SI SP_REG))))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_insn "pop_fpul"
+  [(set (reg:SF FPUL_REG) (mem:SF (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_SH2E && ! TARGET_SH5"
+  "lds.l	@r15+,fpul"
+  [(set_attr "type" "load")
+   (set_attr "hit_stack" "yes")])
+
+(define_expand "pop_4"
+  [(parallel [(set (match_operand:DF 0 "" "")
+		   (mem:DF (post_inc:SI (reg:SI SP_REG))))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "push_fpscr"
+  [(const_int 0)]
+  "TARGET_SH2E"
+{
+  rtx insn = emit_insn (gen_fpu_switch (gen_frame_mem (PSImode,
+						 gen_rtx_PRE_DEC (Pmode,
+							  stack_pointer_rtx)),
+					get_fpscr_rtx ()));
+  add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+})
+
+(define_expand "pop_fpscr"
+  [(const_int 0)]
+  "TARGET_SH2E"
+{
+  rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+					gen_frame_mem (PSImode,
+						 gen_rtx_POST_INC (Pmode,
+							  stack_pointer_rtx))));
+  add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+})
+
+;; The clrt and sett patterns can happen as the result of optimization and
+;; insn expansion.
+;; Comparisons might get simplified to a move of zero or 1 into the T reg.
+;; In this case they might not disappear completely, because the T reg is
+;; a fixed hard reg.
+;; When DImode operations that use the T reg as carry/borrow are split into
+;; individual SImode operations, the T reg is usually cleared before the
+;; first SImode insn.
+(define_insn "clrt"
+  [(set (reg:SI T_REG) (const_int 0))]
+  "TARGET_SH1"
+  "clrt"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "sett"
+  [(set (reg:SI T_REG) (const_int 1))]
+  "TARGET_SH1"
+  "sett"
+  [(set_attr "type" "mt_group")])
+
+;; Use the combine pass to transform sequences such as
+;;	mov	r5,r0
+;;	add	#1,r0
+;;	shll2	r0
+;;	mov.l	@(r0,r4),r0
+;; into
+;;	shll2	r5
+;;	add	r4,r5
+;;	mov.l	@(4,r5),r0
+;;
+;; See also PR 39423.
+;; Notice that these patterns have a T_REG clobber, because the shift
+;; sequence that will be split out might clobber the T_REG.  Ideally, the
+;; clobber would be added conditionally, depending on the result of
+;; sh_ashlsi_clobbers_t_reg_p.  When splitting out the shifts we must go
+;; through the ashlsi3 expander in order to get the right shift insn --
+;; a T_REG clobbering or non-clobbering shift sequence or dynamic shift.
+;; FIXME: Combine never tries this kind of patterns for DImode.
+(define_insn_and_split "*movsi_index_disp_load"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(match_operand:SI 1 "mem_index_disp_operand" "m"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 6) (plus:SI (match_dup 5) (match_dup 3)))
+   (set (match_dup 0) (match_dup 7))]
+{
+  rtx mem = operands[1];
+  rtx plus0_rtx = XEXP (mem, 0);
+  rtx plus1_rtx = XEXP (plus0_rtx, 0);
+  rtx mult_rtx = XEXP (plus1_rtx, 0);
+
+  operands[1] = XEXP (mult_rtx, 0);
+  operands[2] = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1))));
+  operands[3] = XEXP (plus1_rtx, 1);
+  operands[4] = XEXP (plus0_rtx, 1);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] =
+    replace_equiv_address (mem,
+			   gen_rtx_PLUS (SImode, operands[6], operands[4]));
+
+  emit_insn (gen_ashlsi3 (operands[5], operands[1], operands[2]));
+})
+
+(define_insn_and_split "*movhi_index_disp_load"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(SZ_EXTEND:SI (match_operand:HI 1 "mem_index_disp_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx mem = operands[1];
+  rtx plus0_rtx = XEXP (mem, 0);
+  rtx plus1_rtx = XEXP (plus0_rtx, 0);
+  rtx mult_rtx = XEXP (plus1_rtx, 0);
+
+  rtx op_1 = XEXP (mult_rtx, 0);
+  rtx op_2 = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1))));
+  rtx op_3 = XEXP (plus1_rtx, 1);
+  rtx op_4 = XEXP (plus0_rtx, 1);
+  rtx op_5 = gen_reg_rtx (SImode);
+  rtx op_6 = gen_reg_rtx (SImode);
+  rtx op_7 = replace_equiv_address (mem, gen_rtx_PLUS (SImode, op_6, op_4));
+
+  emit_insn (gen_ashlsi3 (op_5, op_1, op_2));
+  emit_insn (gen_addsi3 (op_6, op_5, op_3));
+
+  if (<CODE> == SIGN_EXTEND)
+    {
+      emit_insn (gen_extendhisi2 (operands[0], op_7));
+      DONE;
+    }
+  else if (<CODE> == ZERO_EXTEND)
+    {
+      /* On SH2A the movu.w insn can be used for zero extending loads.  */
+      if (TARGET_SH2A)
+	emit_insn (gen_zero_extendhisi2 (operands[0], op_7));
+      else
+	{
+	  emit_insn (gen_extendhisi2 (operands[0], op_7));
+	  emit_insn (gen_zero_extendhisi2 (operands[0],
+				           gen_lowpart (HImode, operands[0])));
+	}
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn_and_split "*mov<mode>_index_disp_store"
+  [(set (match_operand:HISI 0 "mem_index_disp_operand" "=m")
+	(match_operand:HISI 1 "arith_reg_operand" "r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 6) (plus:SI (match_dup 5) (match_dup 3)))
+   (set (match_dup 7) (match_dup 1))]
+{
+  rtx mem = operands[0];
+  rtx plus0_rtx = XEXP (mem, 0);
+  rtx plus1_rtx = XEXP (plus0_rtx, 0);
+  rtx mult_rtx = XEXP (plus1_rtx, 0);
+
+  operands[0] = XEXP (mult_rtx, 0);
+  operands[2] = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1))));
+  operands[3] = XEXP (plus1_rtx, 1);
+  operands[4] = XEXP (plus0_rtx, 1);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] =
+    replace_equiv_address (mem,
+			   gen_rtx_PLUS (SImode, operands[6], operands[4]));
+
+  emit_insn (gen_ashlsi3 (operands[5], operands[0], operands[2]));
+})
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (set (subreg:SI (mem:QI (plus:SI (reg:SI SP_REG) (const_int 12)) 0) 0)
+;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T.
+(define_insn "movsi_i"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	    "=r,r,r,r,r,r,m,<,<,x,l,x,l,r")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "Q,r,I08,mr,x,l,r,x,l,r,r,>,>,i"))]
+  "TARGET_SH1
+   && ! TARGET_SH2E
+   && ! TARGET_SH2A
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload_si,move,movi8,load_si,mac_gp,prget,store,mac_mem,
+		     pstore,gp_mac,prset,mem_mac,pload,pcload_si")
+   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2)
+;; ??? This allows moves from macl to fpul to be recognized, but these moves
+;; will require a reload.
+;; ??? We can't include f/f because we need the proper FPSCR setting when
+;; TARGET_FMOVD is in effect, and mode switching is done before reload.
+(define_insn "movsi_ie"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	    "=r,r,r,r,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "Q,r,I08,I20,I28,mr,x,l,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
+  "(TARGET_SH2E || TARGET_SH2A)
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	movi20	%1,%0
+	movi20s	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	sts.l	%1,%0
+	fake	%1,%0
+	lds	%1,%0
+	sts	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	fmov	%1,%0
+	! move optimized away"
+  [(set_attr "type" "pcload_si,move,movi8,move,move,load_si,mac_gp,prget,store,
+		     mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore,
+		     pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
+   (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 4)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 0)])])
+
+(define_insn "movsi_i_lowpart"
+  [(set (strict_low_part
+	  (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r"))
+	(match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,r,i"))]
+   "TARGET_SH1
+    && (register_operand (operands[0], SImode)
+        || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	mov.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,store,pcload")])
+
+(define_insn_and_split "load_ra"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")] UNSPEC_RA))]
+  "TARGET_SH1"
+  "#"
+  "&& ! currently_expanding_to_rtl"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (TARGET_SHCOMPACT && crtl->saves_all_registers)
+    operands[1] = gen_frame_mem (SImode, return_address_pointer_rtx);
+})
+
+;; The '?'s in the following constraints may not reflect the time taken
+;; to perform the move. They are there to discourage the use of floating-
+;; point registers for storing integer values.
+(define_insn "*movsi_media"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	        "=r,r,r,r,m,f?,m,f?,r,f?,*b,r,b")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpg,m,rZ,m,f?,rZ,f?,f?,r,*b,Csy"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], SImode)
+       || sh_register_operand (operands[1], SImode)
+       || GET_CODE (operands[1]) == TRUNCATE)"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1
+	fld%M1.s	%m1, %0
+	fst%M0.s	%m0, %1
+	fmov.ls	%N1, %0
+	fmov.sl	%1, %0
+	fmov.s	%1, %0
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media,
+		       fload_media,fstore_media,fload_media,fpconv_media,
+		       fmove_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,8,4,4,4,4,4,4,4,4,4,12")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "*movsi_media_nofpu"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	        "=r,r,r,r,m,*b,r,*b")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpg,m,rZ,r,*b,Csy"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], SImode)
+       || sh_register_operand (operands[1], SImode)
+       || GET_CODE (operands[1]) == TRUNCATE)"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,
+		     ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,8,4,4,4,4,12")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movsi_const"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 16)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 0) (const_int 16))
+		(const:SI (unspec:SI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+{
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && GET_CODE (XEXP (operands[1], 0)) == CODE_LABEL)
+    LABEL_NUSES (XEXP (operands[1], 0)) += 2;
+  else if (GOTOFF_P (operands[1]))
+    {
+      rtx unspec = XEXP (operands[1], 0);
+
+      if (! UNSPEC_GOTOFF_P (unspec))
+	{
+	  unspec = XEXP (unspec, 0);
+	  if (! UNSPEC_GOTOFF_P (unspec))
+	    abort ();
+	}
+      if (GET_CODE (XVECEXP (unspec , 0, 0)) == LABEL_REF
+	  && (GET_CODE (XEXP (XVECEXP (unspec, 0, 0), 0)) == CODE_LABEL))
+	LABEL_NUSES (XEXP (XVECEXP (unspec, 0, 0), 0)) += 2;
+    }
+})
+
+(define_expand "movsi_const_16bit"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 0)] UNSPEC_EXTRACT_S16)))]
+  "TARGET_SHMEDIA && flag_pic && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  [(const_int 0)]
+{
+  rtx insn = emit_insn (gen_movsi_const (operands[0], operands[1]));
+
+  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1]));
+
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ((CONST_INT_P (operands[1])
+	&& ! satisfies_constraint_I16 (operands[1]))
+       || GET_CODE (operands[1]) == CONST_DOUBLE)"
+  [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, SImode);
+})
+
+(define_expand "ic_invalidate_line"
+  [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r")
+				(match_dup 1)] UNSPEC_ICACHE)
+	      (clobber (scratch:SI))])]
+  "TARGET_HARD_SH4 || TARGET_SH5"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ic_invalidate_line_media (operands[0]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+      operands[1] = force_reg (Pmode, operands[1]);
+      emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SH4A_ARCH || TARGET_SH4_300)
+    {
+      emit_insn (gen_ic_invalidate_line_sh4a (operands[0]));
+      DONE;
+    }
+  operands[0] = force_reg (Pmode, operands[0]);
+  operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008,
+							       Pmode)));
+})
+
+;; The address %0 is assumed to be 4-aligned at least.  Thus, by ORing
+;; 0xf0000008, we get the low-oder bits *1*00 (binary), which fits
+;; the requirement *1*00 for associative address writes.  The alignment of
+;; %0 implies that its least significant bit is cleared,
+;; thus we clear the V bit of a matching entry if there is one.
+(define_insn "ic_invalidate_line_i"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "register_operand" "r")]
+		     UNSPEC_ICACHE)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_HARD_SH4"
+{
+  return       "ocbwb	@%0"	"\n"
+	 "	extu.w	%0,%2"	"\n"
+	 "	or	%1,%2"	"\n"
+	 "	mov.l	%0,@%2";
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "cwb")])
+
+(define_insn "ic_invalidate_line_sh4a"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+		    UNSPEC_ICACHE)]
+  "TARGET_SH4A_ARCH || TARGET_SH4_300"
+{
+  return       "ocbwb	@%0"	"\n"
+	 "	synco"		"\n"
+	 "	icbi	@%0";
+}
+  [(set_attr "length" "16")	;; FIXME: Why 16 and not 6?  Looks like typo.
+   (set_attr "type" "cwb")])
+
+;; ??? could make arg 0 an offsettable memory operand to allow to save
+;; an add in the code that calculates the address.
+(define_insn "ic_invalidate_line_media"
+  [(unspec_volatile [(match_operand 0 "any_register_operand" "r")]
+		    UNSPEC_ICACHE)]
+  "TARGET_SHMEDIA"
+{
+  return       "ocbwb	%0,0"	"\n"
+	 "	synco"		"\n"
+	 "	icbi	%0,0"	"\n"
+	 "	synci";
+}
+  [(set_attr "length" "16")
+   (set_attr "type" "invalidate_line_media")])
+
+(define_insn "ic_invalidate_line_compact"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_ICACHE)
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr @%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "initialize_trampoline"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")]
+  "TARGET_SHCOMPACT"
+{
+  rtx sfun, tramp;
+
+  tramp = force_reg (Pmode, operands[0]);
+  sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
+					    SFUNC_STATIC));
+  emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
+
+  emit_insn (gen_initialize_trampoline_compact (tramp, sfun));
+  DONE;
+})
+
+(define_insn "initialize_trampoline_compact"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
+		     (match_operand:SI 1 "register_operand" "r")
+		     (reg:SI R2_REG) (reg:SI R3_REG)]
+		    UNSPEC_INIT_TRAMP)
+
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr @%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_movdst_operand" "")
+	(match_operand:HI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, HImode);
+})
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, QImode);
+})
+
+;; Specifying the displacement addressing load / store patterns separately
+;; before the generic movqi / movhi pattern allows controlling the order
+;; in which load / store insns are selected in a more fine grained way.
+;; FIXME: The non-SH2A and SH2A variants should be combined by adding
+;; "enabled" attribute as it is done in other targets.
+(define_insn "*mov<mode>_store_mem_disp04"
+  [(set (mem:QIHI
+	  (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r,r")
+		   (match_operand:SI 1 "const_int_operand" "<disp04>,N")))
+	(match_operand:QIHI 2 "arith_reg_operand" "z,r"))]
+  "TARGET_SH1 && sh_legitimate_index_p (<MODE>mode, operands[1], false, true)"
+  "@
+	mov.<bw>	%2,@(%O1,%0)
+	mov.<bw>	%2,@%0"
+  [(set_attr "type" "store")])
+
+(define_insn "*mov<mode>_store_mem_disp12"
+  [(set (mem:QIHI
+	  (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		   (match_operand:SI 1 "const_int_operand" "<disp12>")))
+	(match_operand:QIHI 2 "arith_reg_operand" "r"))]
+  "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[1], true, true)"
+  "mov.<bw>	%2,@(%O1,%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "*mov<mode>_load_mem_disp04"
+  [(set (match_operand:QIHI 0 "arith_reg_dest" "=z,r")
+	(mem:QIHI
+	  (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
+		   (match_operand:SI 2 "const_int_operand" "<disp04>,N"))))]
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (<MODE>mode, operands[2], false, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_load_mem_disp12"
+  [(set (match_operand:QIHI 0 "arith_reg_dest" "=z,r,r")
+	(mem:QIHI
+	  (plus:SI
+	    (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+	    (match_operand:SI 2 "const_int_operand" "<disp04>,N,<disp12>"))))]
+  "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[2], true, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0
+	mov.<bw>	@(%O2,%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "2,2,4")])
+
+;; The order of the constraint alternatives is important here.
+;; Q/r has to come first, otherwise PC relative loads might wrongly get
+;; placed into delay slots.  Since there is no QImode PC relative load, the
+;; Q constraint and general_movsrc_operand will reject it for QImode.
+;; The Snd alternatives should come before Sdd in order to avoid a preference
+;; of using r0 als the register operand for addressing modes other than
+;; displacement addressing.
+;; The Sdd alternatives allow only r0 as register operand, even though on
+;; SH2A any register could be allowed by switching to a 32 bit insn.
+;; Generally sticking to the r0 is preferrable, since it generates smaller
+;; code.  Obvious r0 reloads can then be eliminated with a peephole on SH2A.
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHI 0 "general_movdst_operand"
+			      "=r,r,r,Snd,r,  Sdd,z,  r,l")
+	(match_operand:QIHI 1 "general_movsrc_operand"
+			       "Q,r,i,r,  Snd,z,  Sdd,l,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], <MODE>mode)
+       || arith_reg_operand (operands[1], <MODE>mode))"
+  "@
+	mov.<bw>	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	sts	%1,%0
+	lds	%1,%0"
+  [(set_attr "type" "pcload,move,movi8,store,load,store,load,prget,prset")
+   (set (attr "length")
+	(cond [(and (match_operand 0 "displacement_mem_operand")
+		    (not (match_operand 0 "short_displacement_mem_operand")))
+	       (const_int 4)
+	       (and (match_operand 1 "displacement_mem_operand")
+		    (not (match_operand 1 "short_displacement_mem_operand")))
+	       (const_int 4)]
+	      (const_int 2)))])
+
+(define_insn "*movqi_media"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:QI 1 "general_movsrc_operand" "r,I16Css,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (arith_reg_operand (operands[0], QImode)
+       || extend_reg_or_0_operand (operands[1], QImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	ld%M1.ub	%m1, %0
+	st%M0.b	%m0, %N1"
+  [(set_attr "type" "arith_media,arith_media,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "reload_inqi"
+  [(set (match_operand:SI 2 "" "=&r")
+	(match_operand:QI 1 "inqhi_operand" ""))
+   (set (match_operand:QI 0 "arith_reg_operand" "=r")
+	(truncate:QI (match_dup 3)))]
+  "TARGET_SHMEDIA"
+{
+  rtx inner = XEXP (operands[1], 0);
+  int regno = REGNO (inner);
+
+  regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1;
+  operands[1] = gen_rtx_REG (SImode, regno);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
+})
+
+(define_insn "*movhi_media"
+  [(set (match_operand:HI 0 "general_movdst_operand"     "=r,r,r,r,m")
+	(match_operand:HI 1 "general_movsrc_operand" "r,I16Css,n,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (arith_reg_operand (operands[0], HImode)
+       || arith_reg_or_0_operand (operands[1], HImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.w	%m1, %0
+	st%M0.w	%m0, %N1"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(match_operand:HI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ! satisfies_constraint_I16 (operands[1])"
+  [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
+
+(define_expand "reload_inhi"
+  [(set (match_operand:SI 2 "" "=&r")
+	(match_operand:HI 1 "inqhi_operand" ""))
+   (set (match_operand:HI 0 "arith_reg_operand" "=r")
+	(truncate:HI (match_dup 3)))]
+  "TARGET_SHMEDIA"
+{
+  rtx inner = XEXP (operands[1], 0);
+  int regno = REGNO (inner);
+
+  regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1;
+  operands[1] = gen_rtx_REG (SImode, regno);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
+})
+
+;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
+;; compiled with -m2 -ml -O3 -funroll-loops
+(define_insn "*movdi_i"
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
+	(match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I08,i,x,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], DImode)
+       || arith_reg_operand (operands[1], DImode))"
+{
+  return output_movedouble (insn, operands, DImode);
+}
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+(define_split
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int regno;
+
+  if ((MEM_P (operands[0])
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      regno = REGNO (operands[0]);
+      break;
+    case SUBREG:
+      regno = subreg_regno (operands[0]);
+      break;
+    case MEM:
+      regno = -1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DImode);
+      operands[3] = operand_subword (operands[1], 0, 0, DImode);
+      operands[4] = operand_subword (operands[0], 1, 0, DImode);
+      operands[5] = operand_subword (operands[1], 1, 0, DImode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DImode);
+      operands[3] = operand_subword (operands[1], 1, 0, DImode);
+      operands[4] = operand_subword (operands[0], 0, 0, DImode);
+      operands[5] = operand_subword (operands[1], 0, 0, DImode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+})
+
+;; The '?'s in the following constraints may not reflect the time taken
+;; to perform the move. They are there to discourage the use of floating-
+;; point registers for storing integer values.
+(define_insn "*movdi_media"
+  [(set (match_operand:DI 0 "general_movdst_operand"
+	         "=r,r,r,rl,m,f?,m,f?,r,f?,*b,r,*b")
+	(match_operand:DI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpgF,m,rlZ,m,f?,rZ,f?,f?,r,*b,Csy"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], DImode)
+       || sh_register_operand (operands[1], DImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1
+	fld%M1.d	%m1, %0
+	fst%M0.d	%m0, %1
+	fmov.qd	%N1, %0
+	fmov.dq	%1, %0
+	fmov.d	%1, %0
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,
+		     fload_media,fstore_media,fload_media,dfpconv_media,
+		     fmove_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,16,4,4,4,4,4,4,4,4,4,*")])
+
+(define_insn "*movdi_media_nofpu"
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,rl,m,*b,r,*b");
+	(match_operand:DI 1 "general_movsrc_operand" "r,I16Css,nCpgF,m,rlZ,r,*b,Csy"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], DImode)
+       || sh_register_operand (operands[1], DImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,
+		     ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,16,4,4,4,4,*")])
+
+(define_insn "*movdi_media_I16"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r")
+	(match_operand:DI 1 "const_int_operand" "I16"))]
+  "TARGET_SHMEDIA && reload_completed"
+  "movi	%1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx insn;
+
+  if (TARGET_SHMEDIA64)
+    insn = emit_insn (gen_movdi_const (operands[0], operands[1]));
+  else
+    insn = emit_insn (gen_movdi_const_32bit (operands[0], operands[1]));
+
+  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1]));
+
+  DONE;
+})
+
+(define_expand "movdi_const"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+		  	      (const_int 48)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 32)] UNSPEC_EXTRACT_U16))))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 16)] UNSPEC_EXTRACT_U16))))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA64 && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+{
+  sh_mark_label (operands[1], 4);
+})
+
+(define_expand "movdi_const_32bit"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 16)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA32 && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+{
+  sh_mark_label (operands[1], 2);
+})
+
+(define_expand "movdi_const_16bit"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 0)] UNSPEC_EXTRACT_S16)))]
+  "TARGET_SHMEDIA && flag_pic && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "ext_dest_operand" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && CONST_INT_P (operands[1])
+   && ! satisfies_constraint_I16 (operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (match_dup 1)]
+{
+  unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
+  unsigned HOST_WIDE_INT low = val;
+  unsigned HOST_WIDE_INT high = val;
+  unsigned HOST_WIDE_INT sign;
+  unsigned HOST_WIDE_INT val2 = val ^ (val-1);
+
+  /* Zero-extend the 16 least-significant bits.  */
+  low &= 0xffff;
+
+  /* Arithmetic shift right the word by 16 bits.  */
+  high >>= 16;
+  if (GET_CODE (operands[0]) == SUBREG
+      && GET_MODE (SUBREG_REG (operands[0])) == SImode)
+    {
+      high &= 0xffff;
+      high ^= 0x8000;
+      high -= 0x8000;
+    }
+  else
+    {
+      sign = 1;
+      sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1);
+      high ^= sign;
+      high -= sign;
+    }
+  do
+    {
+      /* If we can't generate the constant with a two-insn movi / shori
+	 sequence, try some other strategies.  */
+      if (! CONST_OK_FOR_I16 (high))
+	{
+	  /* Try constant load / left shift.  We know VAL != 0.  */
+	  val2 = val ^ (val-1);
+	  if (val2 > 0x1ffff)
+	    {
+	      int trailing_zeroes = exact_log2 ((val2 >> 16) + 1) + 15;
+
+	      if (CONST_OK_FOR_I16 (val >> trailing_zeroes)
+		  || (! CONST_OK_FOR_I16 (high >> 16)
+		      && CONST_OK_FOR_I16 (val >> (trailing_zeroes + 16))))
+		{
+		  val2 = (HOST_WIDE_INT) val >> trailing_zeroes;
+		  operands[1] = gen_ashldi3_media (operands[0], operands[0],
+						   GEN_INT (trailing_zeroes));
+		  break;
+		}
+	    }
+	  /* Try constant load / right shift.  */
+	  val2 = (val >> 15) + 1;
+	  if (val2 == (val2 & -val2))
+	    {
+	      int shift = 49 - exact_log2 (val2);
+
+	      val2 = trunc_int_for_mode (val << shift, DImode);
+	      if (CONST_OK_FOR_I16 (val2))
+		{
+		  operands[1] = gen_lshrdi3_media (operands[0], operands[0],
+						   GEN_INT (shift));
+		  break;
+		}
+	    }
+	  /* Try mperm.w .  */
+	  val2 = val & 0xffff;
+	  if ((val >> 16 & 0xffff) == val2
+	      && (val >> 32 & 0xffff) == val2
+	      && (val >> 48 & 0xffff) == val2)
+	    {
+	      val2 = (HOST_WIDE_INT) val >> 48;
+	      operands[1] = gen_rtx_REG (V4HImode, true_regnum (operands[0]));
+	      operands[1] = gen_mperm_w0 (operands[1], operands[1]);
+	      break;
+	    }
+	  /* Try movi / mshflo.l  */
+	  val2 = (HOST_WIDE_INT) val >> 32;
+	  if (val2 == ((unsigned HOST_WIDE_INT)
+			trunc_int_for_mode (val, SImode)))
+	    {
+	      operands[1] = gen_mshflo_l_di (operands[0], operands[0],
+					     operands[0]);
+	      break;
+	    }
+	  /* Try movi / mshflo.l w/ r63.  */
+	  val2 = val + ((HOST_WIDE_INT) -1 << 32);
+	  if ((HOST_WIDE_INT) val2 < 0 && CONST_OK_FOR_I16 (val2))
+	    {
+	      operands[1] = gen_mshflo_l_di (operands[0], operands[0],
+					     const0_rtx);
+	      break;
+	    }
+	}
+      val2 = high;
+      operands[1] = gen_shori_media (operands[0], operands[0], GEN_INT (low));
+    }
+  while (0);
+  operands[2] = GEN_INT (val2);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "ext_dest_operand" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_CODE (operands[1]) == CONST_DOUBLE"
+  [(set (match_dup 0) (match_dup 2))
+  (set (match_dup 0)
+       (ior:DI (ashift:DI (match_dup 0) (const_int 16)) (match_dup 1)))]
+{
+  unsigned HOST_WIDE_INT low = CONST_DOUBLE_LOW (operands[1]);
+  unsigned HOST_WIDE_INT high = CONST_DOUBLE_HIGH (operands[1]);
+  unsigned HOST_WIDE_INT val = low;
+  unsigned HOST_WIDE_INT sign;
+
+  /* Zero-extend the 16 least-significant bits.  */
+  val &= 0xffff;
+  operands[1] = GEN_INT (val);
+
+  /* Arithmetic shift right the double-word by 16 bits.  */
+  low >>= 16;
+  low |= (high & 0xffff) << (HOST_BITS_PER_WIDE_INT - 16);
+  high >>= 16;
+  sign = 1;
+  sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1);
+  high ^= sign;
+  high -= sign;
+
+  /* This will only be true if high is a sign-extension of low, i.e.,
+     it must be either 0 or (unsigned)-1, and be zero iff the
+     most-significant bit of low is set.  */
+  if (high + (low >> (HOST_BITS_PER_WIDE_INT - 1)) == 0)
+    operands[2] = GEN_INT (low);
+  else
+    operands[2] = immed_double_const (low, high, DImode);
+})
+
+(define_insn "shori_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0,0")
+			   (const_int 16))
+		(match_operand:DI 2 "immediate_operand" "K16Csu,nF")))]
+  "TARGET_SHMEDIA && (reload_completed || arith_reg_dest (operands[0], DImode))"
+  "@
+	shori	%u2, %0
+	#"
+  [(set_attr "type" "arith_media,*")])
+
+(define_insn "*shori_media_si"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			   (const_int 16))
+		(match_operand:SI 2 "immediate_operand" "K16Csu")))]
+  "TARGET_SHMEDIA"
+  "shori	%u2, %0")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, DImode);
+})
+
+(define_insn "movdf_media"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], DFmode)
+       || sh_register_operand (operands[1], DFmode))"
+  "@
+	fmov.d	%1, %0
+	fmov.qd	%N1, %0
+	fmov.dq	%1, %0
+	add	%1, r63, %0
+	#
+	fld%M1.d	%m1, %0
+	fst%M0.d	%m0, %1
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type" "fmove_media,fload_media,dfpconv_media,arith_media,*,
+		     fload_media,fstore_media,load_media,store_media")])
+
+(define_insn "movdf_media_nofpu"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,F,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], DFmode)
+       || sh_register_operand (operands[1], DFmode))"
+  "@
+	add	%1, r63, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type" "arith_media,*,load_media,store_media")])
+
+(define_split
+  [(set (match_operand:DF 0 "arith_reg_dest" "")
+	(match_operand:DF 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 3) (match_dup 2))]
+{
+  int endian = WORDS_BIG_ENDIAN ? 1 : 0;
+  long values[2];
+  REAL_VALUE_TYPE value;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_DOUBLE (value, values);
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    operands[2] = immed_double_const ((unsigned long) values[endian]
+				      | ((HOST_WIDE_INT) values[1 - endian]
+					 << 32), 0, DImode);
+  else
+    {
+      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
+      operands[2] = immed_double_const (values[endian], values[1 - endian],
+	  			        DImode);
+    }
+
+  operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+})
+
+;; FIXME: This should be a define_insn_and_split.
+(define_insn "movdf_k"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
+  "TARGET_SH1
+   && (! (TARGET_SH4 || TARGET_SH2A_DOUBLE) || reload_completed
+       /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
+       || (REG_P (operands[0]) && REGNO (operands[0]) == 3)
+       || (REG_P (operands[1]) && REGNO (operands[1]) == 3))
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+{
+  return output_movedouble (insn, operands, DFmode);
+}
+  [(set_attr "length" "4")
+   (set_attr "type" "move,pcload,load,store")])
+
+;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
+;; However, the d/F/c/z alternative cannot be split directly; it is converted
+;; with special code in machine_dependent_reorg into a load of the R0_REG and
+;; the d/m/c/X alternative, which is split later into single-precision
+;; instructions.  And when not optimizing, no splits are done before fixing
+;; up pcloads, so we need usable length information for that.
+(define_insn "movdf_i4"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
+	(match_operand:DF 1 "general_movsrc_operand"  "d,r,F,m,d,FQ,m,r,d,r"))
+   (use (match_operand:PSI 2 "fpscr_operand"          "c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3                      "=X,X,&z,X,X,X,X,X,X,X"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+  {
+    switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_FMOVD)
+	return "fmov	%1,%0";
+      else if (REGNO (operands[0]) != REGNO (operands[1]) + 1)
+	return         "fmov	%R1,%R0"	"\n"
+	       "	fmov	%S1,%S0";
+      else
+	return         "fmov	%S1,%S0"	"\n"
+	       "	fmov	%R1,%R0";
+    case 3:
+    case 4:
+      return "fmov.d	%1,%0";
+    default:
+      return "#";
+    }
+  }
+  [(set_attr_alternative "length"
+     [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))
+      (const_int 4)
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (const_int 4)
+      (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
+      ;; We can't use 4-byte push/pop on SHcompact, so we have to
+      ;; increment or decrement r15 explicitly.
+      (if_then_else
+       (match_test "TARGET_SHCOMPACT")
+       (const_int 10) (const_int 8))
+      (if_then_else
+       (match_test "TARGET_SHCOMPACT")
+       (const_int 10) (const_int 8))])
+   (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload")
+   (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*")
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+					   (const_string "double")
+					   (const_string "none")))])
+
+;; Moving DFmode between fp/general registers through memory
+;; (the top of the stack) is faster than moving through fpul even for
+;; little endian.  Because the type of an instruction is important for its
+;; scheduling,  it is beneficial to split these operations, rather than
+;; emitting them in one single chunk, even if this will expose a stack
+;; use that will prevent scheduling of other stack accesses beyond this
+;; instruction.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 "=X"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed
+   && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
+  [(const_int 0)]
+{
+  rtx insn, tos;
+
+  if (TARGET_SH5 && true_regnum (operands[1]) < 16)
+    {
+      emit_move_insn (stack_pointer_rtx,
+		      plus_constant (Pmode, stack_pointer_rtx, -8));
+      tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx);
+    }
+  else
+    tos = gen_tmp_stack_mem (DFmode,
+			     gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2]));
+  if (! (TARGET_SH5 && true_regnum (operands[1]) < 16))
+    add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  if (TARGET_SH5 && true_regnum (operands[0]) < 16)
+    tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx);
+  else
+    tos = gen_tmp_stack_mem (DFmode,
+			     gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2]));
+  if (TARGET_SH5 && true_regnum (operands[0]) < 16)
+    emit_move_insn (stack_pointer_rtx,
+		    plus_constant (Pmode, stack_pointer_rtx, 8));
+  else
+    add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+})
+
+;; local-alloc sometimes allocates scratch registers even when not required,
+;; so we must be prepared to handle these.
+
+;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k.
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && reload_completed
+   && true_regnum (operands[0]) < 16
+   && true_regnum (operands[1]) < 16"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  /* If this was a reg <-> mem operation with base + index reg addressing,
+     we have to handle this in a special way.  */
+  rtx mem = operands[0];
+  int store_p = 1;
+  if (! memory_operand (mem, DFmode))
+    {
+      mem = operands[1];
+      store_p = 0;
+    }
+  if (GET_CODE (mem) == SUBREG && SUBREG_BYTE (mem) == 0)
+    mem = SUBREG_REG (mem);
+  if (MEM_P (mem))
+    {
+      rtx addr = XEXP (mem, 0);
+      if (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && REG_P (XEXP (addr, 1)))
+	{
+	  int offset;
+	  rtx reg0 = gen_rtx_REG (Pmode, 0);
+	  rtx regop = operands[store_p], word0 ,word1;
+
+	  if (GET_CODE (regop) == SUBREG)
+	    alter_subreg (&regop, true);
+	  if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1)))
+	    offset = 2;
+	  else
+	    offset = 4;
+	  mem = copy_rtx (mem);
+	  PUT_MODE (mem, SImode);
+	  word0 = gen_rtx_SUBREG (SImode, regop, 0);
+	  alter_subreg (&word0, true);
+	  word1 = gen_rtx_SUBREG (SImode, regop, 4);
+	  alter_subreg (&word1, true);
+	  if (store_p || ! refers_to_regno_p (REGNO (word0),
+					      REGNO (word0) + 1, addr, 0))
+	    {
+	      emit_insn (store_p
+			 ? gen_movsi_ie (mem, word0)
+			 : gen_movsi_ie (word0, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	      mem = copy_rtx (mem);
+	      emit_insn (store_p
+			 ? gen_movsi_ie (mem, word1)
+			 : gen_movsi_ie (word1, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	    }
+	  else
+	    {
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	      emit_insn (gen_movsi_ie (word1, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	      mem = copy_rtx (mem);
+	      emit_insn (gen_movsi_ie (word0, mem));
+	    }
+	  DONE;
+	}
+    }
+})
+
+;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (reg:SI R0_REG))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "reload_indf__frn"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=a")
+		   (match_operand:DF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "reload_outdf__RnFRm"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f")
+		   (match_operand:DF 1 "register_operand" "af,r"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])]
+  "TARGET_SH1"
+  "")
+
+;; Simplify no-op moves.
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_SH2E && reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 0) (match_dup 0))]
+  "")
+
+;; fmovd substitute post-reload splits
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int dst = true_regnum (operands[0]), src = true_regnum (operands[1]);
+  emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst),
+			   gen_rtx_REG (SFmode, src), operands[2]));
+  emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst + 1),
+			   gen_rtx_REG (SFmode, src + 1), operands[2]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(mem:DF (match_operand:SI 1 "register_operand" "")))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
+   && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[0]);
+  rtx insn;
+  rtx mem = SET_SRC (XVECEXP (PATTERN (curr_insn), 0, 0));
+  rtx mem2
+    = change_address (mem, SFmode, gen_rtx_POST_INC (Pmode, operands[1]));
+  insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode,
+					       regno + SH_REG_MSW_OFFSET),
+				  mem2, operands[2]));
+  add_reg_note (insn, REG_INC, operands[1]);
+  insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode,
+					       regno + SH_REG_LSW_OFFSET),
+				  change_address (mem, SFmode, NULL_RTX),
+				  operands[2]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[0]);
+  rtx addr, insn;
+  rtx mem2 = change_address (operands[1], SFmode, NULL_RTX);
+  rtx reg0 = gen_rtx_REG (SFmode, regno + SH_REG_MSW_OFFSET);
+  rtx reg1 = gen_rtx_REG (SFmode, regno + SH_REG_LSW_OFFSET);
+
+  operands[1] = copy_rtx (mem2);
+  addr = XEXP (mem2, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* This is complicated.  If the register is an arithmetic register
+	 we can just fall through to the REG+DISP case below.  Otherwise
+	 we have to use a combination of POST_INC and REG addressing...  */
+      if (! arith_reg_operand (operands[1], SFmode))
+	{
+	  XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
+	  insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2]));
+	  add_reg_note (insn, REG_INC, XEXP (addr, 0));
+	  
+	  emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+
+	  /* If we have modified the stack pointer, the value that we have
+	     read with post-increment might be modified by an interrupt,
+	     so write it back.  */
+	  if (REGNO (XEXP (addr, 0)) == STACK_POINTER_REGNUM)
+	    emit_insn (gen_push_e (reg0));
+	  else
+	    emit_insn (gen_addsi3 (XEXP (operands[1], 0), XEXP (operands[1], 0),
+				   GEN_INT (-4)));
+	  break;
+	}
+      /* Fall through.  */
+
+    case PLUS:
+      emit_insn (gen_movsf_ie (reg0, operands[1], operands[2]));
+      operands[1] = copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = plus_constant (Pmode, addr, 4);
+      emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+      break;
+
+    case POST_INC:
+      insn = emit_insn (gen_movsf_ie (reg0, operands[1], operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+
+      insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+      break;
+
+    default:
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[1]);
+  rtx insn, addr;
+  rtx reg0 = gen_rtx_REG (SFmode, regno + SH_REG_MSW_OFFSET);
+  rtx reg1 = gen_rtx_REG (SFmode, regno + SH_REG_LSW_OFFSET);
+
+  operands[0] = copy_rtx (operands[0]);
+  PUT_MODE (operands[0], SFmode);
+  addr = XEXP (operands[0], 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* This is complicated.  If the register is an arithmetic register
+	 we can just fall through to the REG+DISP case below.  Otherwise
+	 we have to use a combination of REG and PRE_DEC addressing...  */
+      if (! arith_reg_operand (operands[0], SFmode))
+	{
+	  emit_insn (gen_addsi3 (addr, addr, GEN_INT (4)));
+	  emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+
+	  operands[0] = copy_rtx (operands[0]);
+	  XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr);
+	  
+	  insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+	  add_reg_note (insn, REG_INC, XEXP (addr, 0));
+	  break;
+	}
+      /* Fall through.  */
+
+    case PLUS:
+      /* Since REG+DISP addressing has already been decided upon by gcc
+	 we can rely upon it having chosen an arithmetic register as the
+	 register component of the address.  Just emit the lower numbered
+	 register first, to the lower address, then the higher numbered
+	 register to the higher address.  */
+      emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+
+      operands[0] = copy_rtx (operands[0]);
+      XEXP (operands[0], 0) = plus_constant (Pmode, addr, 4);
+
+      emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+      break;
+
+    case PRE_DEC:
+      /* This is easy.  Output the word to go to the higher address
+         first (ie the word in the higher numbered register) then the
+	 word to go to the lower address.  */
+
+      insn = emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+
+      insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+      break;
+
+    default:
+      /* FAIL; */
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+
+  DONE;
+})
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int regno;
+
+  if ((MEM_P (operands[0])
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      regno = REGNO (operands[0]);
+      break;
+    case SUBREG:
+      regno = subreg_regno (operands[0]);
+      break;
+    case MEM:
+      regno = -1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 0, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 1, 0, DFmode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 1, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 0, 0, DFmode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+})
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, DFmode);
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA_FPU)
+	emit_insn (gen_movdf_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_movdf_media_nofpu (operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+;;This is incompatible with the way gcc uses subregs.
+;;(define_insn "movv2sf_i"
+;;  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,m")
+;;	(match_operand:V2SF 1 "nonimmediate_operand" "f,m,f"))]
+;;  "TARGET_SHMEDIA_FPU
+;;   && (fp_arith_reg_operand (operands[0], V2SFmode)
+;;       || fp_arith_reg_operand (operands[1], V2SFmode))"
+;;  "@
+;;	#
+;;	fld%M1.p	%m1, %0
+;;	fst%M0.p	%m0, %1"
+;;  [(set_attr "type" "*,fload_media,fstore_media")])
+(define_insn_and_split "movv2sf_i"
+  [(set (match_operand:V2SF 0 "general_movdst_operand" "=f,rf,r,m,mf")
+	(match_operand:V2SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "TARGET_SHMEDIA_FPU && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = simplify_gen_subreg (DFmode, operands[0], V2SFmode, 0);
+  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2SFmode, 0);
+})
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_movdst_operand" "")
+	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
+  "TARGET_SHMEDIA_FPU"
+{
+  prepare_move_operands (operands, V2SFmode);
+})
+
+(define_expand "addv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (PLUS, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "subv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (MINUS, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "mulv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (MULT, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "divv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (DIV, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn_and_split "*movv4sf_i"
+  [(set (match_operand:V4SF 0 "general_movdst_operand" "=f,rf,r,m,mf")
+	(match_operand:V4SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  for (int i = 0; i < 4/2; i++)
+    {
+      rtx x, y;
+
+      if (MEM_P (operands[0]))
+	x = adjust_address (operands[0], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	x = simplify_gen_subreg (V2SFmode, operands[0], V4SFmode, i * 8);
+
+      if (MEM_P (operands[1]))
+	y = adjust_address (operands[1], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	y = simplify_gen_subreg (V2SFmode, operands[1], V4SFmode, i * 8);
+
+      emit_insn (gen_movv2sf_i (x, y));
+    }
+
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_expand "movv4sf"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(match_operand:V4SF 1 "general_operand" ""))]
+  "TARGET_SHMEDIA_FPU"
+{
+  prepare_move_operands (operands, V4SFmode);
+})
+
+(define_insn_and_split "*movv16sf_i"
+  [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  for (int i = 0; i < 16/2; i++)
+    {
+      rtx x, y;
+
+      if (MEM_P (operands[0]))
+	x = adjust_address (operands[0], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	{
+	  x = gen_rtx_SUBREG (V2SFmode, operands[0], i * 8);
+	  alter_subreg (&x, true);
+	}
+
+      if (MEM_P (operands[1]))
+	y = adjust_address (operands[1], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	{
+	  y = gen_rtx_SUBREG (V2SFmode, operands[1], i * 8);
+	  alter_subreg (&y, true);
+	}
+
+      emit_insn (gen_movv2sf_i (x, y));
+    }
+
+  DONE;
+}
+  [(set_attr "length" "32")])
+
+(define_expand "movv16sf"
+  [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))]
+  "TARGET_SHMEDIA_FPU"
+{
+  prepare_move_operands (operands, V16SFmode);
+})
+
+(define_insn "movsf_media"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m")
+	(match_operand:SF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], SFmode)
+       || sh_register_operand (operands[1], SFmode))"
+  "@
+	fmov.s	%1, %0
+	fmov.ls	%N1, %0
+	fmov.sl	%1, %0
+	add.l	%1, r63, %0
+	#
+	fld%M1.s	%m1, %0
+	fst%M0.s	%m0, %1
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type" "fmove_media,fload_media,fpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "movsf_media_nofpu"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:SF 1 "general_movsrc_operand" "r,F,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], SFmode)
+       || sh_register_operand (operands[1], SFmode))"
+  "@
+	add.l	%1, r63, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type" "arith_media,*,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SF 0 "arith_reg_dest" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ! FP_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 2))]
+{
+  long values;
+  REAL_VALUE_TYPE value;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (value, values);
+  operands[2] = GEN_INT (values);
+
+  operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+})
+
+(define_insn "movsf_i"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
+	(match_operand:SF 1 "general_movsrc_operand"  "r,G,FQ,mr,r,r,l"))]
+  "TARGET_SH1
+   && (! TARGET_SH2E
+       /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */
+       || (REG_P (operands[0]) && REGNO (operands[0]) == 3)
+       || (REG_P (operands[1]) && REGNO (operands[1]) == 3))
+   && (arith_reg_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode))"
+  "@
+	mov	%1,%0
+	mov	#0,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	lds	%1,%0
+	sts	%1,%0"
+  [(set_attr "type" "move,move,pcload,load,store,move,move")])
+
+;; We may not split the ry/yr/XX alternatives to movsi_ie, since
+;; update_flow_info would not know where to put REG_EQUAL notes
+;; when the destination changes mode.
+(define_insn "movsf_ie"
+  [(set (match_operand:SF 0 "general_movdst_operand"
+	 "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y")
+	(match_operand:SF 1 "general_movsrc_operand"
+	  "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y"))
+   (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3 "=X,X,Bsc,Bsc,&z,X,X,X,X,X,X,X,X,y,X,X,X,X,X"))]
+  "TARGET_SH2E
+   && (arith_reg_operand (operands[0], SFmode) || fpul_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode) || fpul_operand (operands[1], SFmode)
+       || arith_reg_operand (operands[3], SImode))"
+  "@
+	fmov	%1,%0
+	mov	%1,%0
+	fldi0	%0
+	fldi1	%0
+	#
+	fmov.s	%1,%0
+	fmov.s	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	lds.l	%1,%0
+	#
+	sts	%1,%0
+	lds	%1,%0
+	sts.l	%1,%0
+	lds.l	%1,%0
+	! move optimized away"
+  [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load,
+		     store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil")
+   (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 0)])
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+					   (const_string "single")
+					   (const_string "single")))])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (reg:SI FPUL_REG))]
+  "TARGET_SH1"
+  [(parallel [(set (reg:SF FPUL_REG) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_dup 0) (reg:SF FPUL_REG))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_movdst_operand" "")
+        (match_operand:SF 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, SFmode);
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA_FPU)
+	emit_insn (gen_movsf_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_movsf_media_nofpu (operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SH2E)
+    {
+      emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "mov_nop"
+  [(set (match_operand 0 "any_register_operand" "") (match_dup 0))]
+  "TARGET_SH2E"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "nil")])
+
+(define_expand "reload_insf__frn"
+  [(parallel [(set (match_operand:SF 0 "register_operand" "=a")
+		   (match_operand:SF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "reload_insi__i_fpul"
+  [(parallel [(set (match_operand:SI 0 "fpul_operand" "=y")
+		   (match_operand:SI 1 "immediate_operand" "i"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "ptabs"
+  [(set (match_operand 0 "" "=b") (match_operand 1 "" "r"))]
+  "TARGET_SHMEDIA"
+{
+  if (!TARGET_PT_FIXED)
+    {
+      rtx eq = operands[1];
+
+      /* ??? For canonical RTL we really should remove any CONST from EQ
+	 before wrapping it in the AND, and finally wrap the EQ into a
+	 const if is constant.  However, for reload we must expose the
+	 input register or symbolic constant, and we can't have
+	 different insn structures outside of the operands for different
+	 alternatives of the same pattern.  */
+      eq = gen_rtx_EQ (SImode, gen_rtx_AND (Pmode, eq, GEN_INT (3)),
+		       GEN_INT (3));
+      operands[1]
+	= (gen_rtx_IF_THEN_ELSE
+	    (PDImode,
+	     eq,
+	     gen_rtx_MEM (PDImode, operands[1]),
+	     gen_rtx_fmt_e (TARGET_SHMEDIA32 ? SIGN_EXTEND : TRUNCATE,
+			    PDImode, operands[1])));
+    }
+})
+
+;; expanded by ptabs expander.
+(define_insn "*extendsipdi_media"
+  [(set (match_operand:PDI 0 "target_reg_operand" "=b,b");
+	(if_then_else:PDI (eq (and:SI (match_operand:SI 1 "target_operand"
+							  "r,Csy")
+				      (const_int 3))
+			      (const_int 3))
+			  (mem:PDI (match_dup 1))
+			  (sign_extend:PDI (match_dup 1))))]
+  "TARGET_SHMEDIA && !TARGET_PT_FIXED"
+  "@
+	ptabs	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "ptabs_media,pt_media")
+   (set_attr "length" "4,*")])
+
+(define_insn "*truncdipdi_media"
+  [(set (match_operand:PDI 0 "target_reg_operand" "=b,b");
+	(if_then_else:PDI (eq (and:DI (match_operand:DI 1 "target_operand"
+							  "r,Csy")
+				      (const_int 3))
+			      (const_int 3))
+			  (mem:PDI (match_dup 1))
+			  (truncate:PDI (match_dup 1))))]
+  "TARGET_SHMEDIA && !TARGET_PT_FIXED"
+  "@
+	ptabs	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "ptabs_media,pt_media")
+   (set_attr "length" "4,*")])
+
+(define_insn "*movsi_y"
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
+	(match_operand:SI 1 "immediate_operand" "Qi,I08"))
+   (clobber (match_scratch:SI 2 "=&z,r"))]
+  "TARGET_SH2E
+   && (reload_in_progress || reload_completed)"
+  "#"
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "TARGET_SH1"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+(define_expand "branch_true"
+  [(set (pc) (if_then_else (ne (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_operand 0))
+			   (pc)))]
+  "TARGET_SH1")
+
+(define_expand "branch_false"
+  [(set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_operand 0))
+			   (pc)))]
+  "TARGET_SH1")
+
+(define_insn_and_split "*cbranch_t"
+  [(set (pc) (if_then_else (match_operand 1 "cbranch_treg_value")
+			   (label_ref (match_operand 0))
+			   (pc)))]
+  "TARGET_SH1"
+{
+  return output_branch (sh_eval_treg_value (operands[1]), insn, operands);
+}
+  "&& 1"
+  [(const_int 0)]
+{
+  /* Try to canonicalize the branch condition if it is not one of:
+	(ne (reg:SI T_REG) (const_int 0))
+	(eq (reg:SI T_REG) (const_int 0))
+
+     Instead of splitting out a new insn, we modify the current insn's
+     operands as needed.  This preserves things such as REG_DEAD notes.  */
+
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && REG_P (XEXP (operands[1], 0)) && REGNO (XEXP (operands[1], 0)) == T_REG
+      && XEXP (operands[1], 1) == const0_rtx)
+    DONE;
+
+  int branch_cond = sh_eval_treg_value (operands[1]);
+  rtx new_cond_rtx = NULL_RTX;
+
+  if (branch_cond == 0)
+    new_cond_rtx = gen_rtx_EQ (VOIDmode, get_t_reg_rtx (), const0_rtx);
+  else if (branch_cond == 1)
+    new_cond_rtx = gen_rtx_NE (VOIDmode, get_t_reg_rtx (), const0_rtx);
+
+  if (new_cond_rtx != NULL_RTX)
+    validate_change (curr_insn, &XEXP (XEXP (PATTERN (curr_insn), 1), 0),
+		     new_cond_rtx, false);
+  DONE;
+}
+  [(set_attr "type" "cbranch")])
+
+;; Patterns to prevent reorg from re-combining a condbranch with a branch
+;; which destination is too far away.
+;; The const_int_operand is distinct for each branch target; it avoids
+;; unwanted matches with redundant_insn.
+(define_insn "block_branch_redirect"
+  [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")])
+
+;; This one has the additional purpose to record a possible scratch register
+;; for the following branch.
+;; ??? Unfortunately, just setting the scratch register is not good enough,
+;; because the insn then might be deemed dead and deleted.  And we can't
+;; make the use in the jump insn explicit because that would disable
+;; delay slot scheduling from the target.
+(define_insn "indirect_jump_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR))
+   (set (pc) (unspec [(const_int 0)] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")])
+
+;; This one is used to preemt an insn from beyond the bra / braf / jmp
+;; being pulled into the delay slot of a condbranch that has been made to
+;; jump around the unconditional jump because it was out of range.
+(define_insn "stuff_delay_slot"
+  [(set (pc)
+	(unspec [(match_operand:SI 0 "const_int_operand" "") (pc)
+		 (match_operand:SI 1 "const_int_operand" "")] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "cond_delay_slot" "yes")])
+
+;; Conditional branch insns
+
+(define_expand "cbranchint4_media"
+  [(set (pc)
+	(if_then_else (match_operator 0 "shmedia_cbranch_comparison_operator"
+		       [(match_operand 1 "" "")
+			(match_operand 2 "" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+{
+  enum machine_mode mode = GET_MODE (operands[1]);
+  if (mode == VOIDmode)
+    mode = GET_MODE (operands[2]);
+  if (GET_CODE (operands[0]) == EQ || GET_CODE (operands[0]) == NE)
+    {
+      operands[1] = force_reg (mode, operands[1]);
+      if (CONSTANT_P (operands[2])
+          && (! satisfies_constraint_I06 (operands[2])))
+        operands[2] = force_reg (mode, operands[2]);
+    }
+  else
+    {
+      if (operands[1] != const0_rtx)
+        operands[1] = force_reg (mode, operands[1]);
+      if (operands[2] != const0_rtx)
+        operands[2] = force_reg (mode, operands[2]);
+    }
+  switch (GET_CODE (operands[0]))
+    {
+    case LEU:
+    case LE:
+    case LTU:
+    case LT:
+      operands[0] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[0])),
+				    VOIDmode, operands[2], operands[1]);
+      operands[1] = XEXP (operands[0], 0);
+      operands[2] = XEXP (operands[0], 1);
+      break;
+    default:
+      operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]),
+				    VOIDmode, operands[1], operands[2]);
+      break;
+    }
+  operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
+})
+
+(define_expand "cbranchfp4_media"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand 1 "" "")
+			(match_operand 2 "" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx cmp;
+  if (GET_CODE (operands[0]) == NE)
+    cmp = gen_rtx_EQ (SImode, operands[1], operands[2]);
+  else
+    cmp = gen_rtx_fmt_ee (GET_CODE (operands[0]), SImode,
+			  operands[1], operands[2]);
+
+  emit_insn (gen_cstore4_media (tmp, cmp, operands[1], operands[2]));
+
+  if (GET_CODE (cmp) == GET_CODE (operands[0]))
+    operands[0] = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+  else
+    operands[0] = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  operands[1] = tmp;
+  operands[2] = const0_rtx;
+  operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
+})
+
+(define_insn "*beq_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_operand" "r,r")
+			 (match_operand:DI 2 "arith_operand" "r,I06")])
+		      (match_operand 0 "target_operand" "b,b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "@
+	b%o3%'	%1, %2, %0%>
+	b%o3i%'	%1, %2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*beq_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_operand" "r,r")
+			 (match_operand:SI 2 "arith_operand" "r,I06")])
+		      (match_operand 0 "target_operand" "b,b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "@
+	b%o3%'	%1, %2, %0%>
+	b%o3i%'	%1, %2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*bgt_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "greater_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N1, %N2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*bgt_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "greater_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N1, %N2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+;; These are only needed to make invert_jump() happy - otherwise, jump
+;; optimization will be silently disabled.
+(define_insn "*blt_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "less_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N2, %N1, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*blt_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "less_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N2, %N1, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+;; combiner splitter for test-and-branch on single bit in register.  This
+;; is endian dependent because the non-paradoxical subreg looks different
+;; on big endian.
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "equality_comparison_operator"
+	    [(subreg:SI
+	       (zero_extract:DI
+		 (subreg:DI (match_operand:SI 1 "extend_reg_operand" "") 0)
+		 (const_int 1)
+		 (match_operand 2 "const_int_operand" "")) 0)
+	     (const_int 0)])
+	  (match_operand 0 "target_operand" "")
+	  (pc)))
+   (clobber (match_operand:SI 4 "arith_reg_dest" ""))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 5)))
+   (set (pc) (if_then_else (match_dup 6) (match_dup 0) (pc)))]
+{
+  operands[5] = GEN_INT (31 - INTVAL (operands[2]));
+  operands[6] = (GET_CODE (operands[3]) == EQ
+		 ? gen_rtx_GE (VOIDmode, operands[4], const0_rtx)
+		 : gen_rtx_GT (VOIDmode, const0_rtx, operands[4]));
+})
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc)
+		   (if_then_else (ne:SI (match_operand:SI 0 "" "")
+				        (const_int 1))
+				 (label_ref (match_operand 1 "" ""))
+				 (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0) (const_int -1)))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH2"
+{
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  emit_jump_insn (gen_doloop_end_split (operands[0], operands[1], operands[0]));
+  DONE;
+})
+
+(define_insn_and_split "doloop_end_split"
+  [(set (pc)
+	(if_then_else (ne:SI (match_operand:SI 2 "arith_reg_dest" "0")
+			     (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_dup 2) (const_int -1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2"
+  "#"
+  ""
+  [(parallel [(set (reg:SI T_REG)
+		   (eq:SI (match_dup 2) (const_int 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))])
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  ""
+  [(set_attr "type" "cbranch")])
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump_compact"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+{
+  /* The length is 16 if the delay slot is unfilled.  */
+  if (get_attr_length(insn) > 4)
+    return output_far_jump(insn, operands[0]);
+  else
+    return "bra	%l0%#";
+}
+  [(set_attr "type" "jump")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; ??? It would be much saner to explicitly use the scratch register
+;; in the jump insn, and have indirect_jump_scratch only set it,
+;; but fill_simple_delay_slots would refuse to do delay slot filling
+;; from the target then, as it uses simplejump_p.
+;;(define_insn "jump_compact_far"
+;;  [(set (pc)
+;;	(label_ref (match_operand 0 "" "")))
+;;   (use (match_operand 1 "register_operand" "r")]
+;;  "TARGET_SH1"
+;;  "* return output_far_jump(insn, operands[0], operands[1]);"
+;;  [(set_attr "type" "jump")
+;;   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "jump_media"
+  [(set (pc)
+	(match_operand 0 "target_operand" "b"))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63%>"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (TARGET_SH1)
+    emit_jump_insn (gen_jump_compact (operands[0]));
+  else if (TARGET_SHMEDIA)
+    {
+      if (reload_in_progress || reload_completed)
+	FAIL;
+      emit_jump_insn (gen_jump_media (gen_rtx_LABEL_REF (Pmode, operands[0])));
+    }
+  DONE;
+})
+
+(define_insn "force_mode_for_call"
+  [(use (reg:PSI FPSCR_REG))]
+  "TARGET_SHCOMPACT"
+  ""
+  [(set_attr "length" "0")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))])
+
+(define_insn "calli"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+    return "jsr/n	@%0";
+  else
+    return "jsr	@%0%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+;; This is TBR relative jump instruction for SH2A architecture.
+;; Its use is enabled by assigning an attribute "function_vector"
+;; and the vector number to a function during its declaration.
+(define_insn "calli_tbr_rel"
+  [(call (mem (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2A && sh2a_is_function_vector_call (operands[0])"
+{
+  unsigned HOST_WIDE_INT vect_num;
+  vect_num = sh2a_get_function_vector_number (operands[0]);
+  operands[2] = GEN_INT (vect_num * 4);
+
+  return "jsr/n	@@(%O2,tbr)";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "no")
+   (set_attr "fp_set" "unknown")])
+
+;; This is a pc-rel call, using bsrf, for use with PIC.
+(define_insn "calli_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2"
+{
+  return       "bsrf	%0"	"\n"
+	 "%O2:%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn_and_split "call_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (match_scratch:SI 2 "=r"))]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  if (SYMBOL_REF_LOCAL_P (operands[0]))
+    emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  else
+    emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab));
+  emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab)));
+  DONE;
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_compact"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_compact_rettramp"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_media"
+  [(call (mem:DI (match_operand 0 "target_reg_operand" "b"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI PR_MEDIA_REG))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r18"
+  [(set_attr "type" "jump_media")])
+
+(define_insn "call_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+    return "jsr/n	@%1";
+  else
+    return "jsr	@%1%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+;; This is TBR relative jump instruction for SH2A architecture.
+;; Its use is enabled by assigning an attribute "function_vector"
+;; and the vector number to a function during its declaration.
+(define_insn "call_valuei_tbr_rel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2A && sh2a_is_function_vector_call (operands[1])"
+{
+  unsigned HOST_WIDE_INT vect_num;
+  vect_num = sh2a_get_function_vector_number (operands[1]);
+  operands[3] = GEN_INT (vect_num * 4);
+
+  return "jsr/n	@@(%O3,tbr)";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "no")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_valuei_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2"
+{
+  return       "bsrf	%1"	"\n"
+	 "%O3:%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn_and_split "call_value_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  if (SYMBOL_REF_LOCAL_P (operands[1]))
+    emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  else
+    emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab));
+  emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3],
+					 operands[2], copy_rtx (lab)));
+  DONE;
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_value_compact"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_compact_rettramp"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_media"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:DI (match_operand 1 "target_reg_operand" "b"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI PR_MEDIA_REG))]
+  "TARGET_SHMEDIA"
+  "blink	%1, r18"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "call"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+			    (match_operand 1 "" ""))
+	      (match_operand 2 "" "")
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (reg:SI PR_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[0] = shmedia_prepare_call_address (operands[0], 0);
+      emit_call_insn (gen_call_media (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[2] && INTVAL (operands[2]))
+    {
+      rtx cookie_rtx = operands[2];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[0], 0);
+      rtx r0, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOTPLT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      r0 = gen_rtx_REG (SImode, R0_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[0]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[0] = force_reg (SImode, operands[0]);
+
+      emit_move_insn (r0, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	emit_call_insn (gen_call_compact_rettramp (operands[0], operands[1],
+						   operands[2]));
+      else
+	emit_call_insn (gen_call_compact (operands[0], operands[1],
+					  operands[2]));
+
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0)));
+      XEXP (operands[0], 0) = reg;
+    }
+  if (!flag_pic && TARGET_SH2A
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      if (sh2a_is_function_vector_call (XEXP (operands[0], 0)))
+	{
+	  emit_call_insn (gen_calli_tbr_rel (XEXP (operands[0], 0),
+					     operands[1]));
+	  DONE;
+	}
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      emit_call_insn (gen_call_pcrel (XEXP (operands[0], 0), operands[1]));
+      DONE;
+    }
+  else
+  {
+    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+    operands[1] = operands[2];
+  }
+
+  emit_call_insn (gen_calli (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "call_pop_compact"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 3 "immediate_operand" "n")))
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_pop_compact_rettramp"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 3 "immediate_operand" "n")))
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call_pop"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+		    (match_operand 1 "" ""))
+	     (match_operand 2 "" "")
+	     (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+					   (match_operand 3 "" "")))])]
+  "TARGET_SHCOMPACT"
+{
+  rtx cookie_rtx;
+  long cookie;
+  rtx func;
+  rtx r0, r1;
+
+  gcc_assert (operands[2] && INTVAL (operands[2]));
+  cookie_rtx = operands[2];
+  cookie = INTVAL (cookie_rtx);
+  func = XEXP (operands[0], 0);
+
+  if (flag_pic)
+    {
+      if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+	  emit_insn (gen_symGOTPLT2reg (reg, func));
+	  func = reg;
+	}
+      else
+	func = legitimize_pic_address (func, Pmode, 0);
+    }
+
+  r0 = gen_rtx_REG (SImode, R0_REG);
+  r1 = gen_rtx_REG (SImode, R1_REG);
+
+  /* Since such a call function may use all call-clobbered
+     registers, we force a mode switch earlier, so that we don't
+     run out of registers when adjusting fpscr for the call.  */
+  emit_insn (gen_force_mode_for_call ());
+
+  operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				 SFUNC_GOT);
+  operands[0] = force_reg (SImode, operands[0]);
+
+  emit_move_insn (r0, func);
+  emit_move_insn (r1, cookie_rtx);
+
+  if (cookie & CALL_COOKIE_RET_TRAMP (1))
+    emit_call_insn (gen_call_pop_compact_rettramp
+	   	     (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_call_insn (gen_call_pop_compact
+	  	     (operands[0], operands[1], operands[2], operands[3]));
+
+  DONE;
+})
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (match_operand 3 "" "")
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (reg:SI PR_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[1] = shmedia_prepare_call_address (operands[1], 0);
+      emit_call_insn (gen_call_value_media (operands[0], operands[1],
+					    operands[2]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3]))
+    {
+      rtx cookie_rtx = operands[3];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[1], 0);
+      rtx r0, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOTPLT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      r0 = gen_rtx_REG (SImode, R0_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[1]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[1] = force_reg (SImode, operands[1]);
+
+      emit_move_insn (r0, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	emit_call_insn (gen_call_value_compact_rettramp (operands[0],
+							 operands[1],
+							 operands[2],
+							 operands[3]));
+      else
+	emit_call_insn (gen_call_value_compact (operands[0], operands[1],
+						operands[2], operands[3]));
+
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0)));
+      XEXP (operands[1], 0) = reg;
+    }
+  if (!flag_pic && TARGET_SH2A
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      if (sh2a_is_function_vector_call (XEXP (operands[1], 0)))
+	{
+	  emit_call_insn (gen_call_valuei_tbr_rel (operands[0],
+				 XEXP (operands[1], 0), operands[2]));
+	  DONE;
+	}
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      emit_call_insn (gen_call_value_pcrel (operands[0], XEXP (operands[1], 0),
+					    operands[2]));
+      DONE;
+    }
+  else
+    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+
+  emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "sibcalli"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH2"
+{
+  return       "braf	%0"	"\n"
+	 "%O2:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+;; This uses an unspec to describe that the symbol_ref is very close.
+(define_insn "sibcalli_thunk"
+  [(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")]
+			     UNSPEC_THUNK))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "bra	%O0"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump")
+   (set_attr "length" "2")])
+
+(define_insn_and_split "sibcall_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (match_scratch:SI 2 "=k"))
+   (return)]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1],
+						  copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_compact"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
+	 (match_operand 1 "" ""))
+   (return)
+   (use (match_operand:SI 2 "register_operand" "z,x"))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   ;; We want to make sure the `x' above will only match MACH_REG
+   ;; because sibcall_epilogue may clobber MACL_REG.
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SHCOMPACT"
+{
+  static const char* alt[] =
+  {
+       "jmp	@%0%#",
+
+       "jmp	@%0"	"\n"
+    "	sts	%2,r0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "needs_delay_slot" "yes,no")
+   (set_attr "length" "2,4")
+   (set (attr "fp_mode") (const_string "single"))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_media"
+  [(call (mem:DI (match_operand 0 "target_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI PR_MEDIA_REG))
+   (return)]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "sibcall"
+  [(parallel
+    [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+	   (match_operand 1 "" ""))
+     (match_operand 2 "" "")
+     (use (reg:PSI FPSCR_REG))
+     (return)])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[0] = shmedia_prepare_call_address (operands[0], 1);
+      emit_call_insn (gen_sibcall_media (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[2]
+	   && (INTVAL (operands[2]) & ~ CALL_COOKIE_RET_TRAMP (1)))
+    {
+      rtx cookie_rtx = operands[2];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[0], 0);
+      rtx mach, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      /* FIXME: if we could tell whether all argument registers are
+	 already taken, we could decide whether to force the use of
+	 MACH_REG or to stick to R0_REG.  Unfortunately, there's no
+	 simple way to tell.  We could use the CALL_COOKIE, but we
+	 can't currently tell a register used for regular argument
+	 passing from one that is unused.  If we leave it up to reload
+	 to decide which register to use, it seems to always choose
+	 R0_REG, which leaves no available registers in SIBCALL_REGS
+	 to hold the address of the trampoline.  */
+      mach = gen_rtx_REG (SImode, MACH_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[0]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[0] = force_reg (SImode, operands[0]);
+
+      /* We don't need a return trampoline, since the callee will
+	 return directly to the upper caller.  */
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	{
+	  cookie &= ~ CALL_COOKIE_RET_TRAMP (1);
+	  cookie_rtx = GEN_INT (cookie);
+	}
+
+      emit_move_insn (mach, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      emit_call_insn (gen_sibcall_compact (operands[0], operands[1], mach));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0)));
+      XEXP (operands[0], 0) = reg;
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+      /* The PLT needs the PIC register, but the epilogue would have
+	 to restore it, so we can only use PC-relative PIC calls for
+	 static functions.  */
+      && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+      DONE;
+    }
+  else
+    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+
+  emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "sibcall_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "jmp	@%1%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH2"
+{
+  return       "braf	%1"	"\n"
+	 "%O3:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn_and_split "sibcall_value_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (match_scratch:SI 3 "=k"))
+   (return)]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0],
+							operands[3],
+							operands[2],
+							copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_value_compact"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k,k"))
+	      (match_operand 2 "" "")))
+   (return)
+   (use (match_operand:SI 3 "register_operand" "z,x"))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   ;; We want to make sure the `x' above will only match MACH_REG
+   ;; because sibcall_epilogue may clobber MACL_REG.
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SHCOMPACT"
+{
+  static const char* alt[] =
+  {
+       "jmp	@%1%#",
+
+       "jmp	@%1"	"\n"
+    "	sts	%3,r0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "needs_delay_slot" "yes,no")
+   (set_attr "length" "2,4")
+   (set (attr "fp_mode") (const_string "single"))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_value_media"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:DI (match_operand 1 "target_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI PR_MEDIA_REG))
+   (return)]
+  "TARGET_SHMEDIA"
+  "blink	%1, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "arith_reg_operand" "")
+	  (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+	  	(match_operand 2 "" "")))
+     (match_operand 3 "" "")
+     (use (reg:PSI FPSCR_REG))
+     (return)])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[1] = shmedia_prepare_call_address (operands[1], 1);
+      emit_call_insn (gen_sibcall_value_media (operands[0], operands[1],
+					       operands[2]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[3]
+	   && (INTVAL (operands[3]) & ~ CALL_COOKIE_RET_TRAMP (1)))
+    {
+      rtx cookie_rtx = operands[3];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[1], 0);
+      rtx mach, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      /* FIXME: if we could tell whether all argument registers are
+	 already taken, we could decide whether to force the use of
+	 MACH_REG or to stick to R0_REG.  Unfortunately, there's no
+	 simple way to tell.  We could use the CALL_COOKIE, but we
+	 can't currently tell a register used for regular argument
+	 passing from one that is unused.  If we leave it up to reload
+	 to decide which register to use, it seems to always choose
+	 R0_REG, which leaves no available registers in SIBCALL_REGS
+	 to hold the address of the trampoline.  */
+      mach = gen_rtx_REG (SImode, MACH_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[1]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[1] = force_reg (SImode, operands[1]);
+
+      /* We don't need a return trampoline, since the callee will
+	 return directly to the upper caller.  */
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	{
+	  cookie &= ~ CALL_COOKIE_RET_TRAMP (1);
+	  cookie_rtx = GEN_INT (cookie);
+	}
+
+      emit_move_insn (mach, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      emit_call_insn (gen_sibcall_value_compact (operands[0], operands[1],
+						 operands[2], mach));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0)));
+      XEXP (operands[1], 0) = reg;
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+      /* The PLT needs the PIC register, but the epilogue would have
+	 to restore it, so we can only use PC-relative PIC calls for
+	 static functions.  */
+      && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+					       XEXP (operands[1], 0),
+					       operands[2]));
+      DONE;
+    }
+  else
+    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+
+  emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "call_value_pop_compact"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 4 "immediate_operand" "n")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_pop_compact_rettramp"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 4 "immediate_operand" "n")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (match_operand 3 "" "")
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+					    (match_operand 4 "" "")))])]
+  "TARGET_SHCOMPACT"
+{
+  rtx cookie_rtx;
+  long cookie;
+  rtx func;
+  rtx r0, r1;
+
+  gcc_assert (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3]));
+  cookie_rtx = operands[3];
+  cookie = INTVAL (cookie_rtx);
+  func = XEXP (operands[1], 0);
+
+  if (flag_pic)
+    {
+      if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOTPLT2reg (reg, func));
+	  func = reg;
+	}
+      else
+	func = legitimize_pic_address (func, Pmode, 0);
+    }
+
+  r0 = gen_rtx_REG (SImode, R0_REG);
+  r1 = gen_rtx_REG (SImode, R1_REG);
+
+  /* Since such a call function may use all call-clobbered
+     registers, we force a mode switch earlier, so that we don't
+     run out of registers when adjusting fpscr for the call.  */
+  emit_insn (gen_force_mode_for_call ());
+
+  operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				 SFUNC_GOT);
+  operands[1] = force_reg (SImode, operands[1]);
+
+  emit_move_insn (r0, func);
+  emit_move_insn (r1, cookie_rtx);
+
+  if (cookie & CALL_COOKIE_RET_TRAMP (1))
+    emit_call_insn (gen_call_value_pop_compact_rettramp
+			(operands[0], operands[1], operands[2],
+			 operands[3], operands[4]));
+  else
+    emit_call_insn (gen_call_value_pop_compact
+			(operands[0], operands[1], operands[2],
+			 operands[3], operands[4]));
+
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  sh_expand_epilogue (true);
+  if (TARGET_SHCOMPACT)
+    {
+      rtx insn, set;
+
+      /* If epilogue clobbers r0, preserve it in macl.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if ((set = single_set (insn))
+	    && REG_P (SET_DEST (set))
+	    && REGNO (SET_DEST (set)) == R0_REG)
+	  {
+	    rtx r0 = gen_rtx_REG (SImode, R0_REG);
+	    rtx tmp = gen_rtx_REG (SImode, MACL_REG);
+
+	    /* We can't tell at this point whether the sibcall is a
+	       sibcall_compact and, if it is, whether it uses r0 or
+	       mach as operand 2, so let the instructions that
+	       preserve r0 be optimized away if r0 turns out to be
+	       dead.  */
+	    emit_insn_before (gen_rtx_SET (SImode, tmp, r0), insn);
+	    emit_move_insn (r0, tmp);
+	    break;
+	  }
+    }
+  DONE;
+})
+
+(define_insn "indirect_jump_compact"
+  [(set (pc)
+	(match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "register_operand" ""))]
+  ""
+{
+  if (GET_MODE (operands[0]) != Pmode)
+    operands[0] = gen_rtx_SUBREG (Pmode, operands[0], 0);
+})
+
+;; The use of operand 1 / 2 helps us distinguish case table jumps
+;; which can be present in structured code from indirect jumps which can not
+;; be present in structured code.  This allows -fprofile-arcs to work.
+
+;; For SH1 processors.
+(define_insn "casesi_jump_1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+;; For all later processors.
+(define_insn "casesi_jump_2"
+  [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r")
+		      (label_ref (match_operand 1 "" ""))))
+   (use (label_ref (match_operand 2 "" "")))]
+  "TARGET_SH2
+   && (! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn)"
+  "braf	%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+(define_insn "casesi_jump_media"
+  [(set (pc) (match_operand 0 "target_reg_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+;; Call subroutine returning any type.
+;; ??? This probably doesn't work.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "(TARGET_SH2E || TARGET_SH2A) || TARGET_SHMEDIA"
+{
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (int i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "dect"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SI 1 "arith_reg_dest" "0") (const_int 1)))
+   (set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_dup 1) (const_int -1)))]
+  "TARGET_SH2"
+  "dt	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; Load address of a label. This is only generated by the casesi expand,
+;; and by machine_dependent_reorg (fixing up fp moves).
+;; This must use unspec, because this only works for labels that are
+;; within range.
+(define_insn "mova"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(label_ref (match_operand 0 "" ""))] UNSPEC_MOVA))]
+  "TARGET_SH1"
+  "mova	%O0,r0"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+;; machine_dependent_reorg will make this a `mova'.
+(define_insn "mova_const"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(match_operand 0 "immediate_operand" "i")] UNSPEC_MOVA))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+(define_expand "GOTaddr2picreg"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))]
+		   UNSPEC_MOVA))
+   (set (match_dup 0) (const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))]
+  ""
+{
+  if (TARGET_VXWORKS_RTP)
+    {
+      rtx gott_base = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+      rtx gott_index = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      emit_insn (gen_vxworks_picreg (gott_base, gott_index));
+      DONE;
+    }
+
+  operands[0] = gen_rtx_REG (Pmode, PIC_REG);
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+
+  if (TARGET_SHMEDIA)
+    {
+      rtx tr = gen_rtx_REG (Pmode, TR0_REG);
+      rtx pic = operands[0];
+      rtx lab = PATTERN (gen_call_site ());
+      rtx insn, equiv;
+
+      equiv = operands[1];
+      operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], lab),
+				    UNSPEC_PCREL_SYMOFF);
+      operands[1] = gen_rtx_CONST (Pmode, operands[1]);
+
+      if (Pmode == SImode)
+	{
+	  emit_insn (gen_movsi_const (pic, operands[1]));
+	  emit_insn (gen_ptrel_si (tr, pic, copy_rtx (lab)));
+	}
+      else
+	{
+	  emit_insn (gen_movdi_const (pic, operands[1]));
+	  emit_insn (gen_ptrel_di (tr, pic, copy_rtx (lab)));
+	}
+
+      insn = emit_move_insn (operands[0], tr);
+
+      set_unique_reg_note (insn, REG_EQUAL, equiv);
+
+      DONE;
+    }
+})
+
+;; A helper for GOTaddr2picreg to finish up the initialization of the
+;; PIC register.
+(define_expand "vxworks_picreg"
+  [(set (reg:SI PIC_REG)
+	(const:SI (unspec:SI [(match_operand:SI 0 "" "")] UNSPEC_PIC)))
+   (set (reg:SI R0_REG)
+	(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PIC)))
+   (set (reg:SI PIC_REG)
+	(mem:SI (reg:SI PIC_REG)))
+   (set (reg:SI PIC_REG)
+	(mem:SI (plus:SI (reg:SI PIC_REG)
+			 (reg:SI R0_REG))))]
+  "TARGET_VXWORKS_RTP")
+
+(define_insn "*ptb"
+  [(set (match_operand 0 "target_reg_operand" "=b")
+	(const (unspec [(match_operand 1 "" "Csy")]
+			     UNSPEC_DATALABEL)))]
+  "TARGET_SHMEDIA && flag_pic
+   && satisfies_constraint_Csy (operands[1])"
+  "ptb/u	datalabel %1, %0"
+  [(set_attr "type" "ptabs_media")
+   (set_attr "length" "*")])
+
+(define_insn "ptrel_si"
+  [(set (match_operand:SI 0 "target_reg_operand" "=b")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+	      (pc)))
+   (match_operand:SI 2 "" "")]
+  "TARGET_SHMEDIA"
+  "%O2: ptrel/u	%1, %0"
+  [(set_attr "type" "ptabs_media")])
+
+(define_insn "ptrel_di"
+  [(set (match_operand:DI 0 "target_reg_operand" "=b")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+	      (pc)))
+   (match_operand:DI 2 "" "")]
+  "TARGET_SHMEDIA"
+  "%O2: ptrel/u	%1, %0"
+  [(set_attr "type" "ptabs_media")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+{
+  emit_insn (gen_GOTaddr2picreg ());
+  DONE;
+})
+
+(define_expand "call_site"
+  [(unspec [(match_dup 0)] UNSPEC_CALLER)]
+  "TARGET_SH1"
+{
+  static HOST_WIDE_INT i = 0;
+  operands[0] = GEN_INT (i);
+  i++;
+})
+
+;; op0 = op1 + r12 but hide it before reload completed.  See the comment
+;; in symGOT_load expand.
+(define_insn_and_split "chk_guard_add"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (reg:SI PIC_REG)]
+		   UNSPEC_CHKADD))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 0) (reg:SI PIC_REG))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  [(set_attr "type" "arith")])
+
+(define_expand "sym_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI (unspec:SI [(match_operand:SI 1 "" "")
+			      (const (plus:SI (match_operand:SI 2 "" "")
+					      (const_int 2)))]
+			     UNSPEC_SYMOFF)))]
+  "TARGET_SH1" "")
+
+(define_expand "symGOT_load"
+  [(set (match_dup 2) (match_operand 1 "" ""))
+   (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG)))
+   (set (match_operand 0 "" "") (mem (match_dup 3)))]
+  ""
+{
+  rtx mem;
+
+  operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (TARGET_SHMEDIA)
+    {
+      rtx reg = operands[2];
+
+      if (Pmode == DImode)
+	{      
+	  if (flag_pic > 1)
+	    emit_insn (gen_movdi_const_32bit (reg, operands[1]));
+	  else
+	    emit_insn (gen_movdi_const_16bit (reg, operands[1]));
+	}
+      else
+	{
+	  if (flag_pic > 1)
+	    emit_insn (gen_movsi_const (reg, operands[1]));
+	  else
+	    emit_insn (gen_movsi_const_16bit (reg, operands[1]));
+	}
+    }
+  else
+    emit_move_insn (operands[2], operands[1]);
+
+  /* When stack protector inserts codes after the result is set to
+     R0, @(rX, r12) will cause a spill failure for R0.  Use a unspec
+     insn to avoid combining (set A (plus rX r12)) and (set op0 (mem A))
+     when rX is a GOT address for the guard symbol.  Ugly but doesn't
+     matter because this is a rare situation.  */
+  if (!TARGET_SHMEDIA
+      && flag_stack_protect
+      && GET_CODE (operands[1]) == CONST
+      && GET_CODE (XEXP (operands[1], 0)) == UNSPEC
+      && GET_CODE (XVECEXP (XEXP (operands[1], 0), 0, 0)) == SYMBOL_REF
+      && strcmp (XSTR (XVECEXP (XEXP (operands[1], 0), 0, 0), 0),
+		 "__stack_chk_guard") == 0)
+    emit_insn (gen_chk_guard_add (operands[3], operands[2]));
+  else
+    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
+					       gen_rtx_REG (Pmode, PIC_REG)));
+
+  /* N.B. This is not constant for a GOTPLT relocation.  */
+  mem = gen_rtx_MEM (Pmode, operands[3]);
+  MEM_NOTRAP_P (mem) = 1;
+  /* ??? Should we have a special alias set for the GOT?  */
+  emit_move_insn (operands[0], mem);
+
+  DONE;
+})
+
+(define_expand "sym2GOT"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOT))]
+  ""
+  "")
+
+(define_expand "symGOT2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx gotsym, insn;
+
+  gotsym = gen_sym2GOT (operands[1]);
+  PUT_MODE (gotsym, Pmode);
+  insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  DONE;
+})
+
+(define_expand "symGOTPLT2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx pltsym = gen_rtx_CONST (Pmode,
+			      gen_rtx_UNSPEC (Pmode,
+					      gen_rtvec (1, operands[1]),
+					      UNSPEC_GOTPLT));
+  emit_insn (gen_symGOT_load (operands[0], pltsym));
+  DONE;
+})
+
+(define_expand "sym2GOTOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTOFF))]
+  ""
+  "")
+
+(define_expand "symGOTOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx gotoffsym, insn;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  gotoffsym = gen_sym2GOTOFF (operands[1]);
+  PUT_MODE (gotoffsym, Pmode);
+  emit_move_insn (t, gotoffsym);
+  insn = emit_move_insn (operands[0],
+			 gen_rtx_PLUS (Pmode, t,
+				       gen_rtx_REG (Pmode, PIC_REG)));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+})
+
+(define_expand "symPLT_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI
+	 (unspec:SI
+	  [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PLT))
+	   (const:SI (plus:SI (match_operand:SI 2 "" "")
+			      (const_int 2)))] UNSPEC_PCREL_SYMOFF)))
+   ;; Even though the PIC register is not really used by the call
+   ;; sequence in which this is expanded, the PLT code assumes the PIC
+   ;; register is set, so we must not skip its initialization.  Since
+   ;; we only use this expand as part of calling sequences, and never
+   ;; to take the address of a function, this is the best point to
+   ;; insert the (use).  Using the PLT to take the address of a
+   ;; function would be wrong, not only because the PLT entry could
+   ;; then be called from a function that doesn't initialize the PIC
+   ;; register to the proper GOT, but also because pointers to the
+   ;; same function might not compare equal, should they be set by
+   ;; different shared libraries.
+   (use (reg:SI PIC_REG))]
+  "TARGET_SH1"
+  "")
+
+(define_expand "sym2PIC"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PIC))]
+  ""
+  "")
+
+;; -------------------------------------------------------------------------
+;; TLS code generation.
+
+;; FIXME: The multi-insn asm blocks should be converted to use
+;; define_insn_and_split.
+;; See the thread [PATCH/RFA] SH TLS support on gcc-patches
+;; <http://gcc.gnu.org/ml/gcc-patches/2003-02/msg01898.html>
+;; for details.
+
+(define_insn "tls_global_dynamic"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+				  UNSPEC_TLSGD))
+	      (const_int 0)))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (scratch:SI))]
+  "TARGET_SH1"
+{
+  return       "mov.l	1f,r4"			"\n"
+	 "	mova	2f,r0"			"\n"
+	 "	mov.l	2f,r1"			"\n"
+	 "	add	r0,r1"			"\n"
+	 "	jsr	@r1"			"\n"
+	 "	add	r12,r4"			"\n"
+	 "	bra	3f"			"\n"
+	 "	nop"				"\n"
+	 "	.align	2"			"\n"
+	 "1:	.long	%a1@TLSGD"		"\n"
+	 "2:	.long	__tls_get_addr@PLT"	"\n"
+	 "3:";
+}
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "26")])
+
+(define_insn "tls_local_dynamic"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+				  UNSPEC_TLSLDM))
+	      (const_int 0)))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (scratch:SI))]
+  "TARGET_SH1"
+{
+  return       "mov.l	1f,r4"			"\n"
+	 "	mova	2f,r0"			"\n"
+	 "	mov.l	2f,r1"			"\n"
+	 "	add	r0,r1"			"\n"
+	 "	jsr	@r1"			"\n"
+	 "	add	r12,r4"			"\n"
+	 "	bra	3f"			"\n"
+	 "	nop"				"\n"
+	 "	.align	2"			"\n"
+	 "1:	.long	%a1@TLSLDM"		"\n"
+	 "2:	.long	__tls_get_addr@PLT"	"\n"
+	 "3:";
+}
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "26")])
+
+(define_expand "sym2DTPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_DTPOFF))]
+  ""
+  "")
+
+(define_expand "symDTPOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "")]
+  ""
+{
+  rtx dtpoffsym;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  dtpoffsym = gen_sym2DTPOFF (operands[1]);
+  PUT_MODE (dtpoffsym, Pmode);
+  emit_move_insn (t, dtpoffsym);
+  emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, operands[2]));
+  DONE;
+})
+
+(define_expand "sym2GOTTPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTTPOFF))]
+  ""
+  "")
+
+(define_insn "tls_initial_exec"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "" "")]
+		    UNSPEC_TLSIE))
+   (use (reg:SI GBR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI R0_REG))]
+  ""
+{
+  return       "mov.l	1f,r0"		"\n"
+	 "	stc	gbr,%0"		"\n"
+	 "	mov.l	@(r0,r12),r0"	"\n"
+	 "	bra	2f"		"\n"
+	 "	add	r0,%0"		"\n"
+	 "	.align	2"		"\n"
+	 "1:	.long	%a1"		"\n"
+	 "2:";
+}
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "16")])
+
+(define_expand "sym2TPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_TPOFF))]
+  ""
+  "")
+
+(define_expand "symTPOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx tpoffsym;
+
+  tpoffsym = gen_sym2TPOFF (operands[1]);
+  PUT_MODE (tpoffsym, Pmode);
+  emit_move_insn (operands[0], tpoffsym);
+  DONE;
+})
+
+;;------------------------------------------------------------------------------
+;; Thread pointer getter and setter.
+;;
+;; On SH the thread pointer is kept in the GBR.
+;; These patterns are usually expanded from the respective built-in functions.
+(define_expand "get_thread_pointersi"
+  [(set (match_operand:SI 0 "register_operand") (reg:SI GBR_REG))]
+  "TARGET_SH1")
+
+;; The store_gbr insn can also be used on !TARGET_SH1 for doing TLS accesses.
+(define_insn "store_gbr"
+  [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GBR_REG))]
+  ""
+  "stc	gbr,%0"
+  [(set_attr "type" "tls_load")])
+
+(define_expand "set_thread_pointersi"
+  [(set (reg:SI GBR_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand")]
+	 UNSPECV_GBR))]
+  "TARGET_SH1")
+
+(define_insn "load_gbr"
+  [(set (reg:SI GBR_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+	 UNSPECV_GBR))]
+  "TARGET_SH1"
+  "ldc	%0,gbr"
+  [(set_attr "type" "move")])
+
+;;------------------------------------------------------------------------------
+;; Thread pointer relative memory loads and stores.
+;;
+;; On SH there are GBR displacement address modes which can be utilized to
+;; access memory behind the thread pointer.
+;; Since we do not allow using GBR for general purpose memory accesses, these
+;; GBR addressing modes are formed by the combine pass.
+;; This could be done with fewer patterns than below by using a mem predicate
+;; for the GBR mem, but then reload would try to reload addresses with a
+;; zero displacement for some strange reason.
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:QIHISI 0 "register_operand" "=z")
+	(mem:QIHISI (plus:SI (reg:SI GBR_REG)
+			     (match_operand:QIHISI 1 "gbr_displacement"))))]
+  "TARGET_SH1"
+  "mov.<bwl>	@(%O1,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:QIHISI 0 "register_operand" "=z")
+	(mem:QIHISI (reg:SI GBR_REG)))]
+  "TARGET_SH1"
+  "mov.<bwl>	@(0,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(sign_extend:SI
+	  (mem:QIHI (plus:SI (reg:SI GBR_REG)
+			     (match_operand:QIHI 1 "gbr_displacement")))))]
+  "TARGET_SH1"
+  "mov.<bw>	@(%O1,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(sign_extend:SI (mem:QIHI (reg:SI GBR_REG))))]
+  "TARGET_SH1"
+  "mov.<bw>	@(0,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_store"
+  [(set (mem:QIHISI (plus:SI (reg:SI GBR_REG)
+			     (match_operand:QIHISI 0 "gbr_displacement")))
+	(match_operand:QIHISI 1 "register_operand" "z"))]
+  "TARGET_SH1"
+  "mov.<bwl>	%1,@(%O0,gbr)"
+  [(set_attr "type" "store")])
+
+(define_insn "*mov<mode>_gbr_store"
+  [(set (mem:QIHISI (reg:SI GBR_REG))
+	(match_operand:QIHISI 0 "register_operand" "z"))]
+  "TARGET_SH1"
+  "mov.<bwl>	%0,@(0,gbr)"
+  [(set_attr "type" "store")])
+
+;; DImode memory accesses have to be split in two SImode accesses.
+;; Split them before reload, so that it gets a better chance to figure out
+;; how to deal with the R0 restriction for the individual SImode accesses.
+;; Do not match this insn during or after reload because it can't be split
+;; afterwards.
+(define_insn_and_split "*movdi_gbr_load"
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gbr_address_mem"))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 6))]
+{
+  /* Swap low/high part load order on little endian, so that the result reg
+     of the second load can be used better.  */
+  int off = TARGET_LITTLE_ENDIAN ? 1 : 0;
+  operands[3 + off] = gen_lowpart (SImode, operands[0]);
+  operands[5 + off] = gen_lowpart (SImode, operands[1]);
+  operands[4 - off] = gen_highpart (SImode, operands[0]);
+  operands[6 - off] = gen_highpart (SImode, operands[1]);
+})
+
+(define_insn_and_split "*movdi_gbr_store"
+  [(set (match_operand:DI 0 "gbr_address_mem")
+	(match_operand:DI 1 "register_operand"))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 6))]
+{
+  /* Swap low/high part store order on big endian, so that stores of function
+     call results can save a reg copy.  */
+  int off = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  operands[3 + off] = gen_lowpart (SImode, operands[0]);
+  operands[5 + off] = gen_lowpart (SImode, operands[1]);
+  operands[4 - off] = gen_highpart (SImode, operands[0]);
+  operands[6 - off] = gen_highpart (SImode, operands[1]);
+})
+
+;; Sometimes memory accesses do not get combined with the store_gbr insn,
+;; in particular when the displacements are in the range of the regular move
+;; insns.  Thus, in the first split pass after the combine pass we search
+;; for missed opportunities and try to fix them up ourselves.
+;; If an equivalent GBR address can be determined the load / store is split
+;; into one of the GBR load / store patterns.
+;; All of that must happen before reload (GBR address modes use R0 as the
+;; other operand) and there's no point of doing it if the GBR is not
+;; referenced in a function at all.
+(define_split
+  [(set (match_operand:QIHISIDI 0 "register_operand")
+	(match_operand:QIHISIDI 1 "memory_operand"))]
+  "TARGET_SH1 && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
+  if (gbr_mem != NULL_RTX)
+    operands[1] = replace_equiv_address (operands[1], gbr_mem);
+  else
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand")))]
+  "TARGET_SH1 && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
+  if (gbr_mem != NULL_RTX)
+    operands[1] = replace_equiv_address (operands[1], gbr_mem);
+  else
+    FAIL;
+})
+
+;; On SH2A we've got movu.b and movu.w for doing zero-extending mem loads.
+;; Split those so that a GBR load can be used.
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(zero_extend:SI (match_operand:QIHI 1 "memory_operand")))]
+  "TARGET_SH2A && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
+  if (gbr_mem != NULL_RTX)
+    {
+      operands[2] = gen_reg_rtx (GET_MODE (operands[1]));
+      operands[1] = replace_equiv_address (operands[1], gbr_mem);
+    }
+  else
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand:QIHISIDI 0 "memory_operand")
+	(match_operand:QIHISIDI 1 "register_operand"))]
+  "TARGET_SH1 && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[0]);
+  if (gbr_mem != NULL_RTX)
+    operands[0] = replace_equiv_address (operands[0], gbr_mem);
+  else
+    FAIL;
+})
+
+;;------------------------------------------------------------------------------
+;; case instruction for switch statements.
+
+;; operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+(define_expand "casesi"
+  [(match_operand:SI 0 "arith_reg_operand" "")
+   (match_operand:SI 1 "arith_reg_operand" "")
+   (match_operand:SI 2 "arith_reg_operand" "")
+   (match_operand 3 "" "") (match_operand 4 "" "")]
+  ""
+{
+  rtx reg = gen_reg_rtx (SImode);
+  rtx reg2 = gen_reg_rtx (SImode);
+  if (TARGET_SHMEDIA)
+    {
+      rtx reg = gen_reg_rtx (DImode);
+      rtx reg2 = gen_reg_rtx (DImode);
+      rtx reg3 = gen_reg_rtx (Pmode);
+      rtx reg4 = gen_reg_rtx (Pmode);
+      rtx reg5 = gen_reg_rtx (Pmode);
+      rtx load, test;
+
+      operands[0] = convert_modes (DImode, SImode, operands[0], 0);
+      operands[1] = convert_modes (DImode, SImode, operands[1], 0);
+      operands[2] = convert_modes (DImode, SImode, operands[2], 1);
+
+      test = gen_rtx_GT (VOIDmode, operands[1], operands[0]);
+      emit_jump_insn (gen_cbranchdi4 (test, operands[1], operands[0],
+				      operands[4]));
+      emit_move_insn (reg, gen_rtx_MINUS (DImode, operands[0], operands[1]));
+      test = gen_rtx_GTU (VOIDmode, reg, operands[2]);
+      emit_jump_insn (gen_cbranchdi4 (test, reg, operands[2], operands[4]));
+      emit_insn (gen_casesi_shift_media (reg2, reg, operands[3]));
+      emit_move_insn (reg3, gen_datalabel_ref (gen_rtx_LABEL_REF
+					       (Pmode, operands[3])));
+      /* Messy: can we subreg to clean this up? */
+      if (Pmode == DImode)
+	load = gen_casesi_load_media (reg4, reg3, reg2, operands[3]);
+      else
+	load = gen_casesi_load_media (reg4,
+				      gen_rtx_SUBREG (DImode, reg3, 0),
+				      reg2, operands[3]);
+      PUT_MODE (SET_SRC (load), Pmode);
+      emit_insn (load);
+      /* ??? The following add could be eliminated if we used ptrel.  */
+      emit_move_insn (reg5, gen_rtx_PLUS (Pmode, reg3, reg4));
+      emit_jump_insn (gen_casesi_jump_media (reg5, operands[3]));
+      emit_barrier ();
+      DONE;
+    }
+  operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  /* If optimizing, casesi_worker depends on the mode of the instruction
+     before label it 'uses' - operands[3].  */
+  emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4],
+			   reg));
+  emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3]));
+  if (TARGET_SH2)
+    emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3]));
+  else
+    emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3]));
+  /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to
+     operands[3], but to lab.  We will fix this up in
+     machine_dependent_reorg.  */
+  emit_barrier ();
+  DONE;
+})
+
+(define_expand "casesi_0"
+  [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" ""))
+   (set (match_dup 4) (minus:SI (match_dup 4)
+				(match_operand:SI 1 "arith_operand" "")))
+   (set (reg:SI T_REG)
+	(gtu:SI (match_dup 4)
+		(match_operand:SI 2 "arith_reg_operand" "")))
+   (set (pc)
+	(if_then_else (ne (reg:SI T_REG)
+			  (const_int 0))
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_SH1"
+  "")
+
+;; ??? reload might clobber r0 if we use it explicitly in the RTL before
+;; reload; using a R0_REGS pseudo reg is likely to give poor code.
+;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload.
+(define_insn "casesi_worker_0"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0,r")
+		 (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 "=X,1"))
+   (clobber (match_scratch:SI 4 "=&z,z"))]
+  "TARGET_SH1"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH1 && ! TARGET_SH2 && reload_completed"
+  [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA))
+   (parallel [(set (match_dup 0)
+	      (unspec:SI [(reg:SI R0_REG) (match_dup 1)
+			  (label_ref (match_dup 2))] UNSPEC_CASESI))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))]
+{
+  if (GET_CODE (operands[2]) == CODE_LABEL)
+    LABEL_NUSES (operands[2])++;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH2 && reload_completed"
+  [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA))
+   (parallel [(set (match_dup 0)
+	      (unspec:SI [(reg:SI R0_REG) (match_dup 1)
+			  (label_ref (match_dup 2))] UNSPEC_CASESI))
+	      (clobber (match_dup 3))])]
+{
+  if (GET_CODE (operands[2]) == CODE_LABEL)
+    LABEL_NUSES (operands[2])++;
+})
+
+(define_insn "casesi_worker_1"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(reg:SI R0_REG)
+		    (match_operand:SI 1 "register_operand" "0,r")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 "=X,1"))]
+  "TARGET_SH1"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return   "shll2	%1"	"\n"
+	     "	mov.l	@(r0,%1),%0";
+    case HImode:
+      return   "add	%1,%1"	"\n"
+	     "	mov.w	@(r0,%1),%0";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return         "mov.b	@(r0,%1),%0"	"\n"
+	       "	extu.b	%0,%0";
+      else
+	return "mov.b	@(r0,%1),%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "length" "4")])
+
+(define_insn "casesi_worker_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(reg:SI R0_REG)
+		    (match_operand:SI 1 "register_operand" "0,r")
+		    (label_ref (match_operand 2 "" ""))
+		    (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))
+   (clobber (match_operand:SI 4 "" "=X,1"))]
+  "TARGET_SH2 && reload_completed && flag_pic"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return   "shll2	%1"		"\n"
+	     "	add	r0,%1"		"\n"
+	     "	mova	%O3,r0"		"\n"
+	     "  mov.l	@(r0,%1),%0";
+    case HImode:
+      return   "add	%1,%1"		"\n"
+	     "	add	r0,%1"		"\n"
+	     "	mova	%O3,r0"		"\n"
+	     "	mov.w	@(r0,%1),%0";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return	       "add	r0,%1"		"\n"
+		"	mova	%O3,r0"		"\n"
+		"	mov.b	@(r0,%1),%0"	"\n"
+		"	extu.b	%0,%0";
+      else
+	return	       "add	r0,%1"		"\n"
+		"	mova	%O3,r0"		"\n"
+		"	mov.b	@(r0,%1),%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "length" "8")])
+
+(define_insn "casesi_shift_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (unspec:DI [(label_ref:DI (match_operand 2 "" ""))]
+		    UNSPEC_CASESI)))]
+  "TARGET_SHMEDIA"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return "shlli	%1, 2, %0";
+    case HImode:
+      return "shlli	%1, 1, %0";
+    case QImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	return "";
+      return "add	%1, r63, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "casesi_load_media"
+  [(set (match_operand 0 "any_arith_reg_dest" "=r")
+	(mem (unspec [(match_operand:DI 1 "arith_reg_operand" "r")
+		      (match_operand:DI 2 "arith_reg_operand" "r")
+		      (label_ref:DI (match_operand 3 "" ""))] UNSPEC_CASESI)))]
+  "TARGET_SHMEDIA"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[3]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return "ldx.l	%1, %2, %0";
+    case HImode:
+#if 0
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return "ldx.uw	%1, %2, %0";
+#endif
+      return "ldx.w	%1, %2, %0";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return "ldx.ub	%1, %2, %0";
+      return "ldx.b	%1, %2, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "load_media")])
+
+(define_expand "simple_return"
+  [(simple_return)]
+ "sh_can_use_simple_return_p ()")
+
+(define_expand "return"
+  [(return)]
+ "reload_completed && epilogue_completed"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_jump_insn (gen_return_media ());
+      DONE;
+    }
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1)))
+    {
+      emit_jump_insn (gen_shcompact_return_tramp ());
+      DONE;
+    }
+})
+
+(define_insn "*<code>_i"
+  [(any_return)]
+  "TARGET_SH1 && ! (TARGET_SHCOMPACT
+		    && (crtl->args.info.call_cookie
+			& CALL_COOKIE_RET_TRAMP (1)))
+   && reload_completed
+   && ! sh_cfun_trap_exit_p ()"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0)
+      && !current_function_interrupt)
+    return "rts/n";
+  else
+    return "%@	%#";
+}
+  [(set_attr "type" "return")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; trapa has no delay slot.
+(define_insn "*return_trapa"
+  [(return)]
+  "TARGET_SH1 && !TARGET_SHCOMPACT
+   && reload_completed"
+  "%@"
+  [(set_attr "type" "return")])
+
+(define_expand "shcompact_return_tramp"
+  [(return)]
+  "TARGET_SHCOMPACT
+   && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))"
+{
+  rtx reg = gen_rtx_REG (Pmode, R0_REG);
+
+  function_symbol (reg, "__GCC_shcompact_return_trampoline", SFUNC_STATIC);
+  emit_jump_insn (gen_shcompact_return_tramp_i ());
+  DONE;
+})
+
+(define_insn "shcompact_return_tramp_i"
+  [(parallel [(return) (use (reg:SI R0_REG))])]
+  "TARGET_SHCOMPACT
+   && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))"
+  "jmp	@r0%#"
+  [(set_attr "type" "jump_ind")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "return_media_i"
+  [(parallel [(return) (use (match_operand 0 "target_reg_operand" "k"))])]
+  "TARGET_SHMEDIA && reload_completed"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_insn "return_media_rte"
+  [(return)]
+  "TARGET_SHMEDIA && reload_completed && current_function_interrupt"
+  "rte"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "return_media"
+  [(return)]
+  "TARGET_SHMEDIA && reload_completed"
+{
+  int tr_regno = sh_media_register_for_return ();
+  rtx tr;
+
+  if (current_function_interrupt)
+    {
+      emit_jump_insn (gen_return_media_rte ());
+      DONE;
+    }
+  if (tr_regno < 0)
+    {
+      rtx r18 = gen_rtx_REG (Pmode, PR_MEDIA_REG);
+
+      gcc_assert (call_really_used_regs[TR0_REG] && !fixed_regs[TR0_REG]);
+      tr_regno = TR0_REG;
+      tr = gen_rtx_REG (Pmode, tr_regno);
+      emit_move_insn (tr, r18);
+    }
+  else
+    tr = gen_rtx_REG (Pmode, tr_regno);
+
+  emit_jump_insn (gen_return_media_i (tr));
+  DONE;
+})
+
+(define_insn "shcompact_preserve_incoming_args"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(unspec:SI [(match_dup 0)] UNSPEC_COMPACT_ARGS))]
+  "TARGET_SHCOMPACT"
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "shcompact_incoming_args"
+  [(set (reg:SI R2_REG) (unspec:SI [(reg:SI R2_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R3_REG) (unspec:SI [(reg:SI R3_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R4_REG) (unspec:SI [(reg:SI R4_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R5_REG) (unspec:SI [(reg:SI R5_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R6_REG) (unspec:SI [(reg:SI R6_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R7_REG) (unspec:SI [(reg:SI R7_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R8_REG) (unspec:SI [(reg:SI R8_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R9_REG) (unspec:SI [(reg:SI R9_REG)] UNSPEC_COMPACT_ARGS))
+   (set (mem:BLK (reg:SI MACL_REG))
+	(unspec:BLK [(reg:SI MACH_REG)] UNSPEC_COMPACT_ARGS))
+   (use (reg:SI R0_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI MACL_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr	@r0%#"
+  [(set_attr "needs_delay_slot" "yes")])
+
+(define_insn "shmedia_save_restore_regs_compact"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 0 "immediate_operand" "i")))
+   (use (reg:SI R0_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT
+   && (INTVAL (operands[0]) == SHMEDIA_REGS_STACK_ADJUST ()
+       || INTVAL (operands[0]) == - SHMEDIA_REGS_STACK_ADJUST ())"
+  "jsr @r0%#"
+  [(set_attr "needs_delay_slot" "yes")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  sh_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  sh_expand_epilogue (false);
+  if (TARGET_SHMEDIA
+      || (TARGET_SHCOMPACT
+	  && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
+    {
+      emit_jump_insn (gen_return ());
+      DONE;
+    }
+})
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx ra = operands[0];
+
+  if (TARGET_SHMEDIA64)
+    emit_insn (gen_eh_set_ra_di (ra));
+  else
+    emit_insn (gen_eh_set_ra_si (ra));
+
+  DONE;
+})
+
+;; Clobber the return address on the stack.  We can't expand this
+;; until we know where it will be put in the stack frame.
+
+(define_insn "eh_set_ra_si"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "! TARGET_SHMEDIA64"
+  "#")
+
+(define_insn "eh_set_ra_di"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch:DI 1 "=&r"))]
+  "TARGET_SHMEDIA64"
+  "#")
+
+(define_split
+  [(unspec_volatile [(match_operand 0 "register_operand" "")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch 1 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  sh_set_return_address (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Define movml instructions for SH2A target.  Currently they are
+;; used to push and pop all banked registers only.
+
+(define_insn "movml_push_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int -32)))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l	r7,@-r15"
+  [(set_attr "in_delay_slot" "no")])
+
+(define_insn "movml_pop_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int 32)))
+   (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32))))
+   (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28))))
+   (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24))))
+   (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20))))
+   (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16))))
+   (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12))))
+   (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8))))
+   (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l	@r15+,r7"
+  [(set_attr "in_delay_slot" "no")])
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "movt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(match_operand:SI 1 "t_reg_operand"))]
+  "TARGET_SH1"
+  "movt	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "movrt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH2A"
+  "movrt	%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "cstore4_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand 2 "logical_operand" "")
+	  (match_operand 3 "cmp_operand" "")]))]
+  "TARGET_SHMEDIA"
+{
+  enum machine_mode mode = GET_MODE (operands[2]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  bool invert, swap;
+  if (mode == VOIDmode)
+    mode = GET_MODE (operands[3]);
+  if (operands[2] == const0_rtx)
+    {
+      if (code == EQ || code == NE)
+	operands[2] = operands[3], operands[3] = const0_rtx;
+    }
+  else
+    operands[2] = force_reg (mode, operands[2]);
+  if (operands[3] != const0_rtx)
+    operands[3] = force_reg (mode, operands[3]);
+
+  switch (code)
+    {
+    case GEU:
+    case GE:
+      swap = invert = !FLOAT_MODE_P (mode);
+      break;
+
+    case LEU:
+    case LE:
+      swap = FLOAT_MODE_P (mode), invert = !swap;
+      break;
+
+    case LTU:
+    case LT:
+      swap = true, invert = false;
+      break;
+
+    case GTU:
+    case GT:
+    case EQ:
+    case UNORDERED:
+      swap = invert = false;
+      break;
+
+    case NE:
+      swap = invert = true;
+      break;
+
+    default:
+      gcc_unreachable ();
+  }
+
+  if (swap)
+    {
+      rtx tem = operands[2];
+      operands[2] = operands[3];
+      operands[3] = tem;
+      code = swap_condition (code);
+    }
+
+  if (invert)
+    {
+      rtx tem = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+      code = reverse_condition (code);
+      operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]);
+      emit_insn (gen_cstore4_media (tem, operands[1],
+				    operands[2], operands[3]));
+      code = EQ;
+      operands[2] = tem;
+      operands[3] = const0_rtx;
+    }
+
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]);
+})
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:SI 2 "cmpsi_operand" "")
+	  (match_operand:SI 3 "arith_operand" "")]))]
+  "TARGET_SH1 || TARGET_SHMEDIA"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+   if (sh_expand_t_scc (operands))
+     DONE;
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, SImode);
+   DONE;
+})
+
+(define_expand "cstoredi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:DI 2 "arith_operand" "")
+	  (match_operand:DI 3 "arith_operand" "")]))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+   if (sh_expand_t_scc (operands))
+     DONE;
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, DImode);
+   DONE;
+})
+
+;; Move the complement of the T reg to a reg.
+;; On SH2A the movrt insn can be used.
+;; On anything else than SH2A this has to be done with multiple instructions.
+;; One obvious way would be:
+;;	cmp/eq	...
+;;	movt	r0
+;;	xor	#1,r0
+;;
+;; However, this puts pressure on r0 in most cases and thus the following is
+;; more appealing:
+;;	cmp/eq	...
+;;	mov	#-1,temp
+;;	negc	temp,dest
+;;
+;; If the constant -1 can be CSE-ed or lifted out of a loop it effectively
+;; becomes a one instruction operation.  Moreover, care must be taken that
+;; the insn can still be combined with inverted compare and branch code
+;; around it.  On the other hand, if a function returns the complement of
+;; a previous comparison result in the T bit, the xor #1,r0 approach might
+;; lead to better code.
+(define_expand "movnegt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH1"
+{
+  if (TARGET_SH2A)
+    emit_insn (gen_movrt (operands[0], operands[1]));
+  else
+    {
+      rtx val = force_reg (SImode, gen_int_mode (-1, SImode));
+      emit_insn (gen_movrt_negc (operands[0], operands[1], val));
+    }
+  DONE;
+})
+
+(define_insn "movrt_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
+   (set (reg:SI T_REG) (const_int 1))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "negc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; The -1 constant will not be CSE-ed for the *movrt_negc pattern, but the
+;; pattern can be used by the combine pass.  Using a scratch reg for the
+;; -1 constant results in slightly better register allocations compared to
+;; generating a pseudo reg before reload.
+(define_insn_and_split "*movrt_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
+   (clobber (match_scratch:SI 2 "=r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (const_int -1))
+   (parallel
+       [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1)))
+	(set (reg:SI T_REG) (const_int 1))
+	(use (match_dup 2))])])
+
+;; Store the negated T bit in a reg using r0 and xor.  This one doesn't
+;; clobber the T bit, which is useful when storing the T bit and the
+;; negated T bit in parallel.  On SH2A the movrt insn can be used for that.
+;; Usually we don't want this insn to be matched, except for cases where the
+;; T bit clobber is really not appreciated.  Hence the extra use on T_REG.
+(define_insn_and_split "movrt_xor"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(xor:SI (match_operand:SI 1 "t_reg_operand") (const_int 1)))
+   (use (reg:SI T_REG))]
+  "TARGET_SH1 && !TARGET_SH2A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (reg:SI T_REG))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 1)))])
+
+;; Store the T bit and the negated T bit in two regs in parallel.  There is
+;; no real insn to do that, but specifying this pattern will give combine
+;; some opportunities.
+(define_insn_and_split "*movt_movrt"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+		   (match_operand:SI 1 "negt_reg_operand"))
+	      (set (match_operand:SI 2 "arith_reg_dest")
+		   (match_operand:SI 3 "t_reg_operand"))])]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx i = TARGET_SH2A
+	  ? gen_movrt (operands[0], get_t_reg_rtx ())
+	  : gen_movrt_xor (operands[0], get_t_reg_rtx ());
+  
+  emit_insn (i);
+  emit_insn (gen_movt (operands[2], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn_and_split "*movt_movrt"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+		   (match_operand:SI 1 "t_reg_operand"))
+	      (set (match_operand:SI 2 "arith_reg_dest")
+		   (match_operand:SI 3 "negt_reg_operand"))])]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2) (match_dup 3))
+	      (set (match_dup 0) (match_dup 1))])])
+
+;; Use negc to store the T bit in a MSB of a reg in the following way:
+;;	T = 1: 0x80000000 -> reg
+;;	T = 0: 0x7FFFFFFF -> reg
+;; This works because 0 - 0x80000000 = 0x80000000.
+;;
+;; This insn must not match again after it has been split into the constant
+;; load and negc.  This is accomplished by the special negc insn that
+;; has a use on the operand.
+(define_insn_and_split "*mov_t_msb_neg"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(minus:SI (const_int -2147483648)  ;; 0x80000000
+		  (match_operand 1 "t_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 2) (const_int -2147483648))
+   (parallel [(set (match_dup 0) (minus:SI (neg:SI (match_dup 2))
+				 	   (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))
+	      (use (match_dup 2))])]
+{
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*mov_t_msb_neg_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+		  (match_operand:SI 2 "t_reg_operand")))
+   (clobber (reg:SI T_REG))
+   (use (match_dup 1))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; These are essentially the same as above, but with the inverted T bit.
+;; Combine recognizes the split patterns, but does not take them sometimes
+;; if the T_REG clobber is specified.  Instead it tries to split out the
+;; T bit negation.  Since these splits are supposed to be taken only by
+;; combine, it will see the T_REG clobber of the *mov_t_msb_neg insn, so this
+;; should be fine.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (match_operand 1 "negt_reg_operand")
+		 (const_int 2147483647)))]  ;; 0x7fffffff
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (const_int -2147483648) (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(if_then_else:SI (match_operand 1 "t_reg_operand")
+			 (const_int 2147483647)  ;; 0x7fffffff
+			 (const_int -2147483648)))]  ;; 0x80000000
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (const_int -2147483648) (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; The *negnegt pattern helps the combine pass to figure out how to fold 
+;; an explicit double T bit negation.
+(define_insn_and_split "*negnegt"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand 0 "negt_reg_operand" "") (const_int 0)))]
+  "TARGET_SH1"
+  "#"
+  ""
+  [(const_int 0)])
+
+;; Store T bit as all zeros or ones in a reg.
+(define_insn "mov_neg_si_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (match_operand 1 "t_reg_operand" "")))]
+  "TARGET_SH1"
+  "subc	%0,%0"
+  [(set_attr "type" "arith")])
+
+;; Store negated T bit as all zeros or ones in a reg.
+;; Use the following sequence:
+;;	subc	Rn,Rn	! Rn = Rn - Rn - T; T = T
+;;	not	Rn,Rn	! Rn = 0 - Rn
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(neg:SI (match_operand 1 "negt_reg_operand" "")))]
+  "TARGET_SH1"
+  [(set (match_dup 0) (neg:SI (reg:SI T_REG)))
+   (set (match_dup 0) (not:SI (match_dup 0)))])
+
+;; The *movtt pattern eliminates redundant T bit to T bit moves / tests.
+(define_insn_and_split "*movtt"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand 0 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  ""
+  [(const_int 0)])
+
+;; Invert the T bit.
+;; On SH2A we can use the nott insn.  On anything else this must be done with
+;; multiple insns like:
+;;	movt	Rn
+;;	tst	Rn,Rn
+;; This requires an additional pseudo.  The SH specific sh_treg_combine RTL
+;; pass will look for this insn.  Disallow using it if pseudos can't be
+;; created.
+(define_insn_and_split "nott"
+  [(set (reg:SI T_REG)
+	(xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))]
+  "TARGET_SH2A || (TARGET_SH1 && can_create_pseudo_p ())"
+{
+  gcc_assert (TARGET_SH2A);
+  return "nott";
+}
+  "! TARGET_SH2A && can_create_pseudo_p ()"
+  [(set (match_dup 0) (reg:SI T_REG))
+   (set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))]
+{
+  operands[0] = gen_reg_rtx (SImode);
+})
+
+;; Store T bit as MSB in a reg.
+;; T = 0: 0x00000000 -> reg
+;; T = 1: 0x80000000 -> reg
+(define_insn_and_split "*movt_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(mult:SI (match_operand:SI 1 "t_reg_operand")
+		 (const_int -2147483648)))  ;; 0xffffffff80000000
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashift:SI (reg:SI T_REG) (const_int 31)))])
+
+;; Store inverted T bit as MSB in a reg.
+;; T = 0: 0x80000000 -> reg
+;; T = 1: 0x00000000 -> reg
+;; On SH2A we can get away without clobbering the T_REG.
+(define_insn_and_split "*negt_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operand:SI 1 "negt_reg_shl31_operand"))]
+  "TARGET_SH2A"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_movrt (tmp, get_t_reg_rtx ()));
+  emit_insn (gen_rotrsi3 (operands[0], tmp, const1_rtx));
+  DONE;
+})
+
+(define_insn_and_split "*negt_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operand:SI 1 "negt_reg_shl31_operand"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && !TARGET_SH2A"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (tmp, get_t_reg_rtx ());
+  emit_insn (gen_cmpeqsi_t (tmp, const0_rtx));
+  emit_insn (gen_rotcr (operands[0], tmp, get_t_reg_rtx ()));
+  DONE;
+})
+
+;; The *cset_zero patterns convert optimizations such as
+;;	"if (test) x = 0;"
+;; to
+;;	"x &= -(test == 0);"
+;; back to conditional branch sequences if zero-displacement branches
+;; are enabled.
+;; FIXME: These patterns can be removed when conditional execution patterns
+;; are implemented, since ifcvt will not perform these optimizations if
+;; conditional execution is supported.
+(define_insn "*cset_zero"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (plus:SI (match_operand:SI 1 "t_reg_operand")
+			 (const_int -1))
+		(match_operand:SI 2 "arith_reg_operand" "0")))]
+  "TARGET_SH1 && TARGET_ZDCBRANCH"
+{
+  return       "bf	0f"	"\n"
+	 "	mov	#0,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn "*cset_zero"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (match_operand:SI 1 "t_reg_operand")
+			 (match_operand:SI 2 "arith_reg_operand" "0")
+			 (const_int 0)))]
+  "TARGET_SH1 && TARGET_ZDCBRANCH"
+{
+  return       "bt	0f"	"\n"
+	 "	mov	#0,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_expand "cstoresf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand:SF 2 "arith_operand" "")
+	  (match_operand:SF 3 "arith_operand" "")]))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+  if (! currently_expanding_to_rtl)
+    FAIL;
+   
+  sh_emit_compare_and_set (operands, SFmode);
+  DONE;
+})
+
+(define_expand "cstoredf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand:DF 2 "arith_operand" "")
+	  (match_operand:DF 3 "arith_operand" "")]))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+  if (! currently_expanding_to_rtl)
+    FAIL;
+   
+  sh_emit_compare_and_set (operands, DFmode);
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Instructions to cope with inline literal tables
+;; -------------------------------------------------------------------------
+
+;; 2 byte integer in line
+(define_insn "consttable_2"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST2)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    assemble_integer (operands[0], 2, BITS_PER_UNIT * 2, 1);
+  return "";
+}
+ [(set_attr "length" "2")
+ (set_attr "in_delay_slot" "no")])
+
+;; 4 byte integer in line
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST4)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    {
+      assemble_integer (operands[0], 4, BITS_PER_UNIT * 4, 1);
+      mark_symbol_refs_as_used (operands[0]);
+    }
+  return "";
+}
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+;; 8 byte integer in line
+(define_insn "consttable_8"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST8)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    assemble_integer (operands[0], 8, BITS_PER_UNIT * 8, 1);
+  return "";
+}
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+;; 4 byte floating point
+(define_insn "consttable_sf"
+ [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST4)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+      assemble_real (d, SFmode, GET_MODE_ALIGNMENT (SFmode));
+    }
+  return "";
+}
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+;; 8 byte floating point
+(define_insn "consttable_df"
+ [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST8)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+      assemble_real (d, DFmode, GET_MODE_ALIGNMENT (DFmode));
+    }
+  return "";
+}
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+;; Alignment is needed for some constant tables; it may also be added for
+;; Instructions at the start of loops, or after unconditional branches.
+;; ??? We would get more accurate lengths if we did instruction
+;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used
+;; here is too conservative.
+
+;; align to a two byte boundary
+(define_expand "align_2"
+ [(unspec_volatile [(const_int 1)] UNSPECV_ALIGN)]
+ ""
+ "")
+
+;; Align to a four byte boundary.
+;; align_4 and align_log are instructions for the starts of loops, or
+;; after unconditional branches, which may take up extra room.
+(define_expand "align_4"
+ [(unspec_volatile [(const_int 2)] UNSPECV_ALIGN)]
+ ""
+ "")
+
+;; Align to a cache line boundary.
+(define_insn "align_log"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPECV_ALIGN)]
+ ""
+ ""
+ [(set_attr "length" "0")
+  (set_attr "in_delay_slot" "no")])
+
+;; Emitted at the end of the literal table, used to emit the
+;; 32bit branch labels if needed.
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CONST_END)]
+  ""
+{
+  return output_jump_label_table ();
+}
+  [(set_attr "in_delay_slot" "no")])
+
+;; Emitted at the end of the window in the literal table.
+(define_insn "consttable_window_end"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_WINDOW_END)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "in_delay_slot" "no")])
+
+;; -------------------------------------------------------------------------
+;; Minimum / maximum operations.
+;; -------------------------------------------------------------------------
+
+;; The SH2A clips.b and clips.w insns do a signed min-max function.  If smin
+;; and smax standard name patterns are defined, they will be used during
+;; initial expansion and combine will then be able to form the actual min-max
+;; pattern.
+;; The clips.b and clips.w set the SR.CS bit if the value in the register is
+;; clipped, but there is currently no way of making use of this information.
+;; The only way to read or reset the SR.CS bit is by accessing the SR.
+(define_expand "<code>si3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+		   (SMIN_SMAX:SI (match_operand:SI 1 "arith_reg_operand")
+				 (match_operand 2 "const_int_operand")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH2A"
+{
+  /* Force the comparison value into a register, because greater-than
+     comparisons can work only on registers.  Combine will be able to pick up
+     the constant value from the REG_EQUAL note when trying to form a min-max
+     pattern.  */
+  operands[2] = force_reg (SImode, operands[2]);
+})
+
+;; Convert
+;;	smax (smin (...))
+;; to
+;;	smin (smax (...))
+(define_insn_and_split "*clips"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(smax:SI (smin:SI (match_operand:SI 1 "arith_reg_operand")
+			  (match_operand 2 "clips_max_const_int"))
+		 (match_operand 3 "clips_min_const_int")))]
+  "TARGET_SH2A"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(smin:SI (smax:SI (match_dup 1) (match_dup 3)) (match_dup 2)))])
+
+(define_insn "*clips"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(smin:SI (smax:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			  (match_operand 2 "clips_min_const_int"))
+		 (match_operand 3 "clips_max_const_int")))]
+  "TARGET_SH2A"
+{
+  if (INTVAL (operands[3]) == 127)
+    return "clips.b	%0";
+  else if (INTVAL (operands[3]) == 32767)
+    return "clips.w	%0";
+  else
+    gcc_unreachable ();
+}
+  [(set_attr "type" "arith")])
+
+;; If the expanded smin or smax patterns were not combined, split them into
+;; a compare and branch sequence, because there are no real smin or smax
+;; insns.
+(define_insn_and_split "*<code>si3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(SMIN_SMAX:SI (match_operand:SI 1 "arith_reg_operand")
+		      (match_operand:SI 2 "arith_reg_or_0_or_1_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2A && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx skip_label = gen_label_rtx ();
+  emit_move_insn (operands[0], operands[1]);
+
+  rtx cmp_val = operands[2];
+  if (satisfies_constraint_M (cmp_val))
+    cmp_val = const0_rtx;
+
+  emit_insn (gen_cmpgtsi_t (operands[0], cmp_val));
+  emit_jump_insn (<CODE> == SMIN
+			    ? gen_branch_false (skip_label)
+			    : gen_branch_true (skip_label));
+
+  emit_label_after (skip_label, emit_move_insn (operands[0], operands[2]));
+  DONE;
+})
+
+;; The SH2A clipu.b and clipu.w insns can be used to implement a min function
+;; with a register and a constant.
+;; The clipu.b and clipu.w set the SR.CS bit if the value in the register is
+;; clipped, but there is currently no way of making use of this information.
+;; The only way to read or reset the SR.CS bit is by accessing the SR.
+(define_expand "uminsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(umin:SI (match_operand:SI 1 "arith_reg_operand")
+		 (match_operand 2 "const_int_operand")))]
+  "TARGET_SH2A"
+{
+  if (INTVAL (operands[2]) == 1)
+    {
+      emit_insn (gen_clipu_one (operands[0], operands[1]));
+      DONE;
+    }
+  else if (! clipu_max_const_int (operands[2], VOIDmode))
+    FAIL;
+})
+
+(define_insn "*clipu"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(umin:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		 (match_operand 2 "clipu_max_const_int")))]
+  "TARGET_SH2A"
+{
+  if (INTVAL (operands[2]) == 255)
+    return "clipu.b	%0";
+  else if (INTVAL (operands[2]) == 65535)
+    return "clipu.w	%0";
+  else
+    gcc_unreachable ();
+}
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "clipu_one"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(umin:SI (match_operand:SI 1 "arith_reg_operand") (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2A"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_cmpeqsi_t (operands[1], const0_rtx));
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Misc
+;; -------------------------------------------------------------------------
+
+;; String/block move insn.
+
+(define_expand "movmemsi"
+  [(parallel [(set (mem:BLK (match_operand:BLK 0))
+		   (mem:BLK (match_operand:BLK 1)))
+	      (use (match_operand:SI 2 "nonmemory_operand"))
+	      (use (match_operand:SI 3 "immediate_operand"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+{
+  if (expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "block_move_real"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI R6_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R6_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_move_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R0_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI R6_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R6_REG))
+	      (clobber (reg:SI R0_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))
+	      (clobber (reg:SI R3_REG))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; byte compare pattern
+;; temp = a ^ b;
+;; !((temp & 0xF000) && (temp & 0x0F00) && (temp & 0x00F0) && (temp & 0x000F))
+(define_insn "cmpstr_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (and:SI
+		 (and:SI
+		   (and:SI
+		     (zero_extract:SI
+		       (xor:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			       (match_operand:SI 1 "arith_reg_operand" "r"))
+		       (const_int 8) (const_int 0))
+		     (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+				      (const_int 8) (const_int 8)))
+		    (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+				     (const_int 8) (const_int 16)))
+		 (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+				  (const_int 8) (const_int 24)))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "cmp/str	%0,%1"
+  [(set_attr "type" "mt_group")])
+
+(define_expand "cmpstrsi"
+  [(set (match_operand:SI 0 "register_operand")
+	(compare:SI (match_operand:BLK 1 "memory_operand")
+		    (match_operand:BLK 2 "memory_operand")))
+   (use (match_operand 3 "immediate_operand"))]
+  "TARGET_SH1 && optimize"
+{
+  if (! optimize_insn_for_size_p () && sh_expand_cmpstr (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI 0 "register_operand")
+	(compare:SI (match_operand:BLK 1 "memory_operand")
+		    (match_operand:BLK 2 "memory_operand")))
+   (use (match_operand:SI 3 "immediate_operand"))
+   (use (match_operand:SI 4 "immediate_operand"))]
+  "TARGET_SH1 && optimize"
+{
+  if (! optimize_insn_for_size_p () && sh_expand_cmpnstr (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(match_operand:BLK 1 "memory_operand")
+		   (match_operand:SI 2 "immediate_operand")
+		   (match_operand:SI 3 "immediate_operand")]
+		  UNSPEC_BUILTIN_STRLEN))]
+  "TARGET_SH1 && optimize"
+{
+ if (! optimize_insn_for_size_p () && sh_expand_strlen (operands))
+   DONE;
+ else
+   FAIL;
+})
+
+
+;; -------------------------------------------------------------------------
+;; Floating point instructions.
+;; -------------------------------------------------------------------------
+
+;; ??? All patterns should have a type attribute.
+
+(define_expand "movpsi"
+  [(set (match_operand:PSI 0 "register_operand" "")
+	(match_operand:PSI 1 "general_movsrc_operand" ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "")
+
+;; The c / m alternative is a fake to guide reload to load directly into
+;; fpscr, since reload doesn't know how to use post-increment.
+;; TARGET_LEGITIMATE_ADDRESS_P guards about bogus addresses before reload,
+;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's
+;; predicate after reload.
+;; The mac_gp type for r/!c might look a bit odd, but it actually schedules
+;; like a mac -> gpr move.
+(define_insn "fpu_switch"
+  [(set (match_operand:PSI 0 "general_movdst_operand" "=c,c,r,c,c,r,m,r,<")
+	(match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c,c"))]
+  "TARGET_SH2E
+   && (! reload_completed
+       || true_regnum (operands[0]) != FPSCR_REG
+       || !MEM_P (operands[1])
+       || GET_CODE (XEXP (operands[1], 0)) != PLUS)"
+  "@
+	! precision stays the same
+	lds.l	%1,fpscr
+	mov.l	%1,%0
+	#
+	lds	%1,fpscr
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	fpscr,%0
+	sts.l	fpscr,%0"
+  [(set_attr "length" "0,2,2,4,2,2,2,2,2")
+   (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,
+		     mac_gp,fstore")])
+
+(define_peephole2
+  [(set (reg:PSI FPSCR_REG)
+	(mem:PSI (match_operand:SI 0 "register_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && peep2_reg_dead_p (1, operands[0])"
+  [(const_int 0)]
+{
+  rtx fpscr, mem, new_insn;
+
+  fpscr = SET_DEST (PATTERN (curr_insn));
+  mem = SET_SRC (PATTERN (curr_insn));
+  mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0]));
+
+  new_insn = emit_insn (gen_fpu_switch (fpscr, mem));
+  add_reg_note (new_insn, REG_INC, operands[0]);
+  DONE;
+})
+
+(define_split
+  [(set (reg:PSI FPSCR_REG)
+	(mem:PSI (match_operand:SI 0 "register_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && (flag_peephole2 ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+{
+  rtx fpscr, mem, new_insn;
+
+  fpscr = SET_DEST (PATTERN (curr_insn));
+  mem = SET_SRC (PATTERN (curr_insn));
+  mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0]));
+
+  new_insn = emit_insn (gen_fpu_switch (fpscr, mem));
+  add_reg_note (new_insn, REG_INC, operands[0]);
+
+  if (!find_regno_note (curr_insn, REG_DEAD, true_regnum (operands[0])))
+    emit_insn (gen_addsi3 (operands[0], operands[0], GEN_INT (-4)));
+  DONE;
+})
+
+;; ??? This uses the fp unit, but has no type indicating that.
+;; If we did that, this would either give a bogus latency or introduce
+;; a bogus FIFO constraint.
+;; Since this insn is currently only used for prologues/epilogues,
+;; it is probably best to claim no function unit, which matches the
+;; current setting.
+(define_insn "toggle_sz"
+  [(set (reg:PSI FPSCR_REG)
+	(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fschg"
+  [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")])
+
+;; There's no way we can use it today, since optimize mode switching
+;; doesn't enable us to know from which mode we're switching to the
+;; mode it requests, to tell whether we can use a relative mode switch
+;; (like toggle_pr) or an absolute switch (like loading fpscr from
+;; memory).
+(define_insn "toggle_pr"
+  [(set (reg:PSI FPSCR_REG)
+	(xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
+  "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
+  "fpchg"
+  [(set_attr "type" "fpscr_toggle")])
+
+(define_expand "addsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		 (match_operand:SF 2 "fp_arith_reg_operand")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_addsf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*addsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fadd.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "unary_sf_op"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_select:V2SF
+	 (vec_concat:V2SF
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(not:BI (match_operand 3 "const_int_operand" "n"))]))
+	  (match_operator:SF 2 "unary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(match_operand 4
+					"const_int_operand" "n")]))]))
+	 (parallel [(not:BI (match_dup 3)) (match_dup 3)])))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "TARGET_SHMEDIA_FPU && reload_completed"
+  [(set (match_dup 5) (match_dup 6))]
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 (true_regnum (operands[1])
+			  + (INTVAL (operands[4]) ^ endian)));
+
+  operands[7] = gen_rtx_REG (SFmode,
+			     (true_regnum (operands[0])
+			      + (INTVAL (operands[3]) ^ endian)));
+  operands[6] = gen_rtx_fmt_e (GET_CODE (operands[2]), SFmode, op1);
+}
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "binary_sf_op0"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	  (match_operator:SF 3 "binary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0)]))
+	     (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0)]))])
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(const_int 1)]))))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))]
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[1]) + endian);
+  rtx op2 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[2]) + endian);
+
+  operands[4] = gen_rtx_REG (SFmode,
+			     true_regnum (operands[0]) + endian);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2);
+}
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "binary_sf_op1"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(const_int 0)]))
+	  (match_operator:SF 3 "binary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 1)]))
+	     (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 1)]))])))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))]
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode, true_regnum (operands[1]) + (1 ^ endian));
+  rtx op2 = gen_rtx_REG (SFmode, true_regnum (operands[2]) + (1 ^ endian));
+
+  operands[4] = gen_rtx_REG (SFmode, true_regnum (operands[0]) + (1 ^ endian));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2);
+}
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "addsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fadd	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "subsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		  (match_operand:SF 2 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_subsf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*subsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		  (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsub.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "subsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fsub	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      emit_insn (gen_mulsf3_i (operands[0], operands[1], operands[2],
+		 get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*mulsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmul.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "mulsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fmul	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+;; FMA (fused multiply-add) patterns
+(define_expand "fmasf4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		(match_operand:SF 2 "fp_arith_reg_operand")
+		(match_operand:SF 3 "fp_arith_reg_operand")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      emit_sf_insn (gen_fmasf4_i (operands[0], operands[1], operands[2],
+				  operands[3], get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "fmasf4_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "w")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")
+		(match_operand:SF 3 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fmac	%1,%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "fmasf4_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")
+		(match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+;; For some cases such as 'a * b + a' the FMA pattern is not generated by
+;; previous transformations.  If FMA is generally allowed, let the combine
+;; pass utilize it.
+(define_insn_and_split "*fmasf4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand"))]
+  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "fmac	%1,%2,%0"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (fma:SF (match_dup 1) (match_dup 2) (match_dup 3)))
+	      (use (match_dup 4))])]
+{
+  /* Change 'b * a + a' into 'a * b + a'.
+     This is better for register allocation.  */
+  if (REGNO (operands[2]) == REGNO (operands[3]))
+    {
+      rtx tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*fmasf4_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		(match_operand:SF 2 "fp_arith_reg_operand")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_divsf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*divsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fdiv.s	%1, %2, %0"
+  [(set_attr "type" "fdiv_media")])
+
+(define_insn "divsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_dest" "=f")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fdiv	%2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.qs %1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_expand "floatsisf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(float:SF (match_operand:SI 1 "fpul_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_SINGLE)
+    {
+      emit_sf_insn (gen_floatsisf2_i4 (operands[0], operands[1],
+				       get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*floatsisf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.ls	%1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_insn "floatsisf2_i4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "float	%1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*floatsisf2_ie"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpul_operand" "y")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "float	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f")
+	(fix:DI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.sq %1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_SINGLE)
+    {
+      emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[0], operands[1],
+					   get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*fix_truncsfsi2_media"
+  [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.sl	%1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_insn "fix_truncsfsi2_i4"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "ftrc_s")
+   (set_attr "fp_mode" "single")])
+
+;; ??? This pattern is used nowhere.  fix_truncsfsi2 always expands to
+;; fix_truncsfsi2_i4.
+;; (define_insn "fix_truncsfsi2_i4_2"
+;;  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;;	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+;;   (use (reg:PSI FPSCR_REG))
+;;   (clobber (reg:SI FPUL_REG))]
+;;  "TARGET_SH4"
+;;  "#"
+;;  [(set_attr "length" "4")
+;;   (set_attr "fp_mode" "single")])
+
+;;(define_split
+;;  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;;	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+;;   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;   (clobber (reg:SI FPUL_REG))]
+;;  "TARGET_SH4"
+;;  [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1)))
+;;	      (use (match_dup 2))])
+;;   (set (match_dup 0) (reg:SI FPUL_REG))])
+
+(define_insn "*fixsfsi"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "cmpgtsf_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "ieee_ccmpeqsf_t"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:SF 1 "fp_arith_reg_operand" "f"))))]
+  "TARGET_SH2E && TARGET_IEEE && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+{
+  return output_ieee_ccmpeq (insn, operands);
+}
+  [(set_attr "length" "4")])
+
+
+(define_insn "cmpgtsf_t_i4"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_t_i4"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*ieee_ccmpeqsf_t_4"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:SF 1 "fp_arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+{
+  return output_ieee_ccmpeq (insn, operands);
+}
+  [(set_attr "length" "4")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpeq.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgtsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpgt.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgesf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ge:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpge.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpunsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unordered:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		      (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpun.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand:SF 1 "arith_operand" "")
+			(match_operand:SF 2 "arith_operand" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2],
+					  operands[3]));
+  else
+    sh_emit_compare_and_branch (operands, SFmode);
+  DONE;
+})
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_unop (&gen_negsf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*negsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fneg.s	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "negsf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fneg	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH3E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH3E)
+    {
+      expand_sf_unop (&gen_sqrtsf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*sqrtsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsqrt.s	%1, %0"
+  [(set_attr "type" "fdiv_media")])
+
+(define_insn "sqrtsf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fsqrt	%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "immediate_operand" "i")
+		(sqrt:SF (match_operand:SF 2 "fp_arith_reg_operand" "0"))))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_FPU_ANY && TARGET_FSRRA
+   && operands[1] == CONST1_RTX (SFmode)"
+  "fsrra	%0"
+  [(set_attr "type" "fsrra")
+   (set_attr "fp_mode" "single")])
+
+;; When the sincos pattern is defined, the builtin functions sin and cos
+;; will be expanded to the sincos pattern and one of the output values will
+;; remain unused.
+(define_expand "sincossf3"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(unspec:SF [(match_operand:SF 2 "fp_arith_reg_operand")] UNSPEC_FCOSA))
+   (set (match_operand:SF 1 "nonimmediate_operand")
+	(unspec:SF [(match_dup 2)] UNSPEC_FSINA))]
+  "TARGET_FPU_ANY && TARGET_FSCA"
+{
+  rtx scaled = gen_reg_rtx (SFmode);
+  rtx truncated = gen_reg_rtx (SImode);
+  rtx fsca = gen_reg_rtx (V2SFmode);
+  rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+  emit_sf_insn (gen_mulsf3 (scaled, operands[2], scale_reg));
+  emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+			  get_fpscr_rtx ()));
+
+  emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4));
+  emit_move_insn (operands[1], gen_rtx_SUBREG (SFmode, fsca, 0));
+  DONE;
+})
+
+(define_insn_and_split "fsca"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (unspec:SF [(mult:SF
+		      (float:SF (match_operand:SI 1 "fpul_fsca_operand" "y"))
+		      (match_operand:SF 2 "fsca_scale_factor" "i"))
+		    ] UNSPEC_FSINA)
+	 (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2))
+		    ] UNSPEC_FCOSA)))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_FPU_ANY && TARGET_FSCA"
+  "fsca	fpul,%d0"
+  "&& !fpul_operand (operands[1], SImode)"
+  [(const_int 0)]
+{
+  /* If operands[1] is something like (fix:SF (float:SF (reg:SI))) reduce it
+     to a simple reg, otherwise reload will have trouble reloading the
+     pseudo into fpul.  */
+  rtx x = XEXP (operands[1], 0);
+  while (x != NULL_RTX && !fpul_operand (x, SImode))
+    {
+      gcc_assert (GET_CODE (x) == FIX || GET_CODE (x) == FLOAT);
+      x = XEXP (x, 0);
+    }
+
+  gcc_assert (x != NULL_RTX && fpul_operand (x, SImode));
+  emit_insn (gen_fsca (operands[0], x, operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "fsca")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_unop (&gen_abssf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*abssf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fabs.s	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "abssf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fabs	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_adddf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*adddf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fadd.d	%1, %2, %0"
+  [(set_attr "type" "dfparith_media")])
+
+(define_insn "adddf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fadd	%2,%0"
+  [(set_attr "type" "dfp_arith")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_subdf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*subdf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsub.d	%1, %2, %0"
+  [(set_attr "type" "dfparith_media")])
+
+(define_insn "subdf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fsub	%2,%0"
+  [(set_attr "type" "dfp_arith")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_muldf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*muldf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmul.d	%1, %2, %0"
+  [(set_attr "type" "dfmul_media")])
+
+(define_insn "muldf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fmul	%2,%0"
+  [(set_attr "type" "dfp_mul")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		(match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_divdf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*divdf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		(match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fdiv.d	%1, %2, %0"
+  [(set_attr "type" "dfdiv_media")])
+
+(define_insn "divdf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
+		(match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fdiv	%2,%0"
+  [(set_attr "type" "dfdiv")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:DI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.qd	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_expand "floatsidf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(float:DF (match_operand:SI 1 "fpul_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_floatsidf2_i (operands[0], operands[1],
+				      get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*floatsidf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:SI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.ld	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "floatsidf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:SI 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "float	%1,%0"
+  [(set_attr "type" "dfp_conv")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f")
+	(fix:DI (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.dq	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "fpul_operand" "")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_fix_truncdfsi2_i (operands[0], operands[1],
+					  get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*fix_truncdfsi2_media"
+  [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.dl	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "fix_truncdfsi2_i"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "dfp_conv")
+   (set_attr "dfp_comp" "no")
+   (set_attr "fp_mode" "double")])
+
+;; ??? This pattern is used nowhere.  fix_truncdfsi2 always expands to
+;; fix_truncdfsi2_i.
+;; (define_insn "fix_truncdfsi2_i4"
+;;   [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;; 	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+;;    (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;    (clobber (reg:SI FPUL_REG))]
+;;   "TARGET_SH4"
+;;   "#"
+;;   [(set_attr "length" "4")
+;;    (set_attr "fp_mode" "double")])
+;;
+;; (define_split
+;;   [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;; 	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+;;    (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;    (clobber (reg:SI FPUL_REG))]
+;;   "TARGET_SH4"
+;;   [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1)))
+;; 	      (use (match_dup 2))])
+;;    (set (match_dup 0) (reg:SI FPUL_REG))])
+
+(define_insn "cmpgtdf_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:DF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "dfp_cmp")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "cmpeqdf_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "dfp_cmp")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "*ieee_ccmpeqdf_t"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:DF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:DF 1 "fp_arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+{
+  return output_ieee_ccmpeq (insn, operands);
+}
+  [(set_attr "length" "4")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "cmpeqdf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpeq.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgtdf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpgt.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgedf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ge:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpge.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpundf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unordered:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		      (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpun.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand:DF 1 "arith_operand" "")
+			(match_operand:DF 2 "arith_operand" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2],
+					  operands[3]));
+  else
+    sh_emit_compare_and_branch (operands, DFmode);
+  DONE;
+})
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_negdf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*negdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fneg.d	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "negdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fneg	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_sqrtdf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*sqrtdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsqrt.d	%1, %0"
+  [(set_attr "type" "dfdiv_media")])
+
+(define_insn "sqrtdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fsqrt	%0"
+  [(set_attr "type" "dfdiv")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_absdf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*absdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fabs.d	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "absdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fabs	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_extendsfdf2_i4 (operands[0], operands[1],
+					get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*extendsfdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcnv.sd	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "extendsfdf2_i4"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcnvsd  %1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "fpul_operand" "")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_truncdfsf2_i4 (operands[0], operands[1],
+				       get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*truncdfsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcnv.ds	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "truncdfsf2_i4"
+  [(set (match_operand:SF 0 "fpul_operand" "=y")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcnvds  %1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "double")])
+
+;; -------------------------------------------------------------------------
+;; Bit field extract patterns.
+;; -------------------------------------------------------------------------
+
+;; These give better code for packed bitfields,  because they allow
+;; auto-increment addresses to be generated.
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "")
+			 (match_operand:SI 1 "immediate_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))]
+  "TARGET_SH1 && TARGET_BIG_ENDIAN"
+{
+  rtx addr_target, orig_address, shift_reg, qi_val;
+  HOST_WIDE_INT bitsize, size, v = 0;
+  rtx x = operands[3];
+
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[0])
+	  || satisfies_constraint_Sbv (operands[0]))
+      && satisfies_constraint_M (operands[1])
+      && satisfies_constraint_K03 (operands[2]))
+    {
+      if (satisfies_constraint_N (operands[3]))
+	{
+	  emit_insn (gen_bclr_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if (satisfies_constraint_M (operands[3]))
+	{
+	  emit_insn (gen_bset_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if ((REG_P (operands[3]) && REGNO (operands[3]) == T_REG)
+		&& satisfies_constraint_M (operands[1]))
+	{
+	  emit_insn (gen_bst_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if (REG_P (operands[3])
+	       && satisfies_constraint_M (operands[1]))
+	{
+	  emit_insn (gen_bld_reg (operands[3], const0_rtx));
+	  emit_insn (gen_bst_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+    }
+  /* ??? expmed doesn't care for non-register predicates.  */
+  if (! memory_operand (operands[0], VOIDmode)
+      || ! immediate_operand (operands[1], VOIDmode)
+      || ! immediate_operand (operands[2], VOIDmode)
+      || ! general_operand (x, VOIDmode))
+    FAIL;
+  /* If this isn't a 16 / 24 / 32 bit field, or if
+     it doesn't start on a byte boundary, then fail.  */
+  bitsize = INTVAL (operands[1]);
+  if (bitsize < 16 || bitsize > 32 || bitsize % 8 != 0
+      || (INTVAL (operands[2]) % 8) != 0)
+    FAIL;
+
+  size = bitsize / 8;
+  orig_address = XEXP (operands[0], 0);
+  shift_reg = gen_reg_rtx (SImode);
+  if (CONST_INT_P (x))
+    {
+      v = INTVAL (x);
+      qi_val = force_reg (QImode, GEN_INT (trunc_int_for_mode (v, QImode)));
+    }
+  else
+    {
+      emit_insn (gen_movsi (shift_reg, operands[3]));
+      qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3);
+    }
+  addr_target = copy_addr_to_reg (plus_constant (Pmode,
+						 orig_address, size - 1));
+
+  operands[0] = replace_equiv_address (operands[0], addr_target);
+  emit_insn (gen_movqi (operands[0], qi_val));
+
+  while (size -= 1)
+    {
+      if (CONST_INT_P (x))
+	qi_val
+	  = force_reg (QImode, GEN_INT (trunc_int_for_mode (v >>= 8, QImode)));
+      else
+	{
+	  emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8)));
+	  qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3);
+	}
+      emit_insn (gen_addsi3 (addr_target, addr_target, constm1_rtx));
+      emit_insn (gen_movqi (operands[0], qi_val));
+    }
+
+  DONE;
+})
+
+(define_insn "movua"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(unspec:SI [(match_operand:BLK 1 "unaligned_load_operand" "Sua>")]
+		   UNSPEC_MOVUA))]
+  "TARGET_SH4A_ARCH"
+  "movua.l	%1,%0"
+  [(set_attr "type" "movua")])
+
+;; We shouldn't need this, but cse replaces increments with references
+;; to other regs before flow has a chance to create post_inc
+;; addressing modes, and only postreload's cse_move2add brings the
+;; increments back to a usable form.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" ""))
+			 (const_int 32) (const_int 0)))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (mem:SI (post_inc:SI
+				  (match_operand:SI 1 "register_operand" "")))
+			 (const_int 32) (const_int 0)))]
+  "")
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  "TARGET_SH4A_ARCH || TARGET_SH2A"
+{
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[1])
+	  || satisfies_constraint_Sbv (operands[1]))
+      && satisfies_constraint_M (operands[2])
+      && satisfies_constraint_K03 (operands[3]))
+   {
+      emit_insn (gen_bldsign_m2a (operands[1], operands[3]));
+      if (REGNO (operands[0]) != T_REG)
+	emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+      DONE;
+   }
+  if (TARGET_SH4A_ARCH
+      && INTVAL (operands[2]) == 32
+      && INTVAL (operands[3]) == 0
+      && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32)
+    {
+      rtx src = adjust_address (operands[1], BLKmode, 0);
+      set_mem_size (src, 4);
+      emit_insn (gen_movua (operands[0], src));
+      DONE;
+    }
+
+  FAIL;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  "TARGET_SH4A_ARCH || TARGET_SH2A"
+{
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[1])
+	  || satisfies_constraint_Sbv (operands[1]))
+      && satisfies_constraint_M (operands[2])
+      && satisfies_constraint_K03 (operands[3]))
+    {
+      emit_insn (gen_bld_m2a (operands[1], operands[3]));
+      if (REGNO (operands[0]) != T_REG)
+	emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+      DONE;
+    }
+  if (TARGET_SH4A_ARCH
+      && INTVAL (operands[2]) == 32
+      && INTVAL (operands[3]) == 0
+      && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32)
+    {
+      rtx src = adjust_address (operands[1], BLKmode, 0);
+      set_mem_size (src, 4);
+      emit_insn (gen_movua (operands[0], src));
+      DONE;
+    }
+
+  FAIL;
+})
+
+;; SH2A instructions for bitwise operations.
+;; FIXME: Convert multiple instruction insns to insn_and_split.
+;; FIXME: Use iterators to fold at least and,xor,or insn variations.
+
+;; Clear a bit in a memory location.
+(define_insn "bclr_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(and:QI
+	    (not:QI (ashift:QI (const_int 1)
+			(match_operand:QI 1 "const_int_operand" "K03,K03")))
+	    (match_dup 0)))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bclr.b	%1,%0
+	bclr.b	%1,@(0,%t0)"
+[(set_attr "length" "4,4")])
+
+(define_insn "bclrmem_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+        (and:QI (match_dup 0)
+                (match_operand:QI 1 "const_int_operand" "Psz,Psz")))]
+  "TARGET_SH2A && satisfies_constraint_Psz (operands[1]) && TARGET_BITOPS"
+  "@
+        bclr.b	%W1,%0
+        bclr.b	%W1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Set a bit in a memory location.
+(define_insn "bset_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(ior:QI
+	    (ashift:QI (const_int 1)
+		       (match_operand:QI 1 "const_int_operand" "K03,K03"))
+	    (match_dup 0)))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bset.b	%1,%0
+	bset.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bsetmem_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(ior:QI (match_dup 0)
+		(match_operand:QI 1 "const_int_operand" "Pso,Pso")))]
+  "TARGET_SH2A && satisfies_constraint_Pso (operands[1]) && TARGET_BITOPS"
+  "@
+        bset.b	%V1,%0
+        bset.b	%V1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;;; Transfer the contents of the T bit to a specified bit of memory.
+(define_insn "bst_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,m")
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+	    (and:QI
+		(not:QI (ashift:QI (const_int 1)
+			(match_operand:QI 1 "const_int_operand" "K03,K03")))
+		(match_dup 0))
+	    (ior:QI
+		(ashift:QI (const_int 1) (match_dup 1))
+		(match_dup 0))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bst.b	%1,%0
+	bst.b	%1,@(0,%t0)"
+  [(set_attr "length" "4")])
+
+;; Store a specified bit of memory in the T bit.
+(define_insn "bld_m2a"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,Sbv")
+	    (const_int 1)
+	    (match_operand 1 "const_int_operand" "K03,K03")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bld.b	%1,%0
+	bld.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Store a specified bit of memory in the T bit.
+(define_insn "bldsign_m2a"
+  [(set (reg:SI T_REG)
+	(sign_extract:SI
+	    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+	    (const_int 1)
+	    (match_operand 1 "const_int_operand" "K03,K03")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bld.b	%1,%0
+	bld.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Store a specified bit of the LSB 8 bits of a register in the T bit.
+(define_insn "bld_reg"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			 (const_int 1)
+			 (match_operand 1 "const_int_operand" "K03")))]
+  "TARGET_SH2A && satisfies_constraint_K03 (operands[1])"
+  "bld	%1,%0")
+
+(define_insn "*bld_regqi"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (match_operand:QI 0 "arith_reg_operand" "r")
+			 (const_int 1)
+			 (match_operand 1 "const_int_operand" "K03")))]
+  "TARGET_SH2A && satisfies_constraint_K03 (operands[1])"
+  "bld	%1,%0")
+
+;; Take logical and of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "band_m2a"
+  [(set (reg:SI T_REG)
+	(and:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	band.b	%1,%0
+	band.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bandreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+        	(match_operand:SI 3 "register_operand" "r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])"
+{
+  static const char* alt[] =
+  {
+       "band.b	%2,%1"		"\n"
+    "	movt	%0",
+
+       "band.b	%2,@(0,%t1)"	"\n"
+    "	movt	%0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6,6")])
+
+;; Take logical or of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "bor_m2a"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bor.b	%1,%0
+	bor.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "borreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+		(match_operand:SI 3 "register_operand" "=r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])"
+{
+  static const char* alt[] =
+  {
+       "bor.b	%2,%1"		"\n"
+    "	movt	%0",
+
+       "bor.b	%2,@(0,%t1)"	"\n"
+    "	movt	%0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6,6")])
+
+;; Take exclusive or of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "bxor_m2a"
+  [(set (reg:SI T_REG)
+	(xor:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bxor.b	%1,%0
+	bxor.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bxorreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+		(match_operand:SI 3 "register_operand" "=r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])"
+{
+  static const char* alt[] =
+  {
+       "bxor.b	%2,%1"		"\n"
+    "	movt	%0",
+
+       "bxor.b	%2,@(0,%t1)"	"\n"
+    "	movt	%0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6,6")])
+
+;; -------------------------------------------------------------------------
+;; Peepholes
+;; -------------------------------------------------------------------------
+;; This matches cases where the bit in a memory location is set.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand")))
+   (set (match_dup 0)
+	(ior:SI (match_dup 0)
+	(match_operand:SI 2 "const_int_operand")))
+   (set (match_dup 1)
+	(match_operand 3 "arith_reg_operand"))]
+  "TARGET_SH2A && TARGET_BITOPS
+   && satisfies_constraint_Pso (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])"
+  [(set (match_dup 1)
+        (ior:QI (match_dup 1) (match_dup 2)))]
+  "")
+
+;; This matches cases where the bit in a memory location is cleared.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+	(match_operand:SI 2 "const_int_operand")))
+   (set (match_dup 1)
+	(match_operand 3 "arith_reg_operand"))]
+  "TARGET_SH2A && TARGET_BITOPS
+   && satisfies_constraint_Psz (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])"
+  [(set (match_dup 1)
+        (and:QI (match_dup 1) (match_dup 2)))]
+  "")
+
+;; This matches cases where a stack pointer increment at the start of the
+;; epilogue combines with a stack slot read loading the return value.
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(mem:SI (match_operand:SI 1 "arith_reg_operand" "")))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "TARGET_SH1 && REGNO (operands[1]) != REGNO (operands[0])"
+  "mov.l	@%1+,%0")
+
+;; See the comment on the dt combiner pattern above.
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (reg:SI T_REG)
+	(eq:SI (match_dup 0) (const_int 0)))]
+  "TARGET_SH2"
+  "dt	%0")
+
+;; The following peepholes fold load sequences for which reload was not
+;; able to generate a displacement addressing move insn.
+;; This can happen when reload has to transform a move insn 
+;; without displacement into one with displacement.  Or when reload can't
+;; fit a displacement into the insn's constraints.  In the latter case, the
+;; load destination reg remains at r0, which reload compensates by inserting
+;; another mov insn.
+
+;; Fold sequence:
+;;	mov #54,r0
+;;	mov.{b,w} @(r0,r15),r0
+;;	mov r0,r3
+;; into:
+;;	mov.{b,w} @(54,r15),r3
+;;
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:QI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))
+   (set (match_operand:QI 4 "arith_reg_dest" "")
+	(match_operand:QI 5 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (QImode, operands[1], true, true)
+   && REGNO (operands[2]) == REGNO (operands[5])
+   && peep2_reg_dead_p (3, operands[5])"
+  [(set (match_dup 4) (mem:QI (plus:SI (match_dup 3) (match_dup 1))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))
+   (set (match_operand:HI 4 "arith_reg_dest" "")
+	(match_operand:HI 5 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[1], true, true)
+   && REGNO (operands[2]) == REGNO (operands[5])
+   && peep2_reg_dead_p (3, operands[5])"
+  [(set (match_dup 4) (mem:HI (plus:SI (match_dup 3) (match_dup 1))))]
+  "")
+
+;; Fold sequence:
+;;	mov #54,r0
+;;	mov.{b,w} @(r0,r15),r1
+;; into:
+;;	mov.{b,w} @(54,r15),r1
+;;
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	 (sign_extend:SI
+	 (mem:QI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (QImode, operands[1], true, true)
+   && (peep2_reg_dead_p (2, operands[0])
+       || REGNO (operands[0]) == REGNO (operands[2]))"
+  [(set (match_dup 2)
+	(sign_extend:SI (mem:QI (plus:SI (match_dup 3) (match_dup 1)))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	 (sign_extend:SI
+	 (mem:HI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[1], true, true)
+   && (peep2_reg_dead_p (2, operands[0])
+       || REGNO (operands[0]) == REGNO (operands[2]))"
+  [(set (match_dup 2)
+	(sign_extend:SI (mem:HI (plus:SI (match_dup 3) (match_dup 1)))))]
+  "")
+
+;; Fold sequence:
+;;	mov.{b,w} @(r0,r15),r0
+;;	mov r0,r3
+;; into:
+;;	mov.{b,w} @(r0,r15),r3
+;;
+;; This can happen when initially a displacement address is picked, where
+;; the destination reg is fixed to r0, and then the address is transformed
+;; into 'r0 + reg'.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" "")))))
+   (set (match_operand:QI 3 "arith_reg_dest" "")
+	(match_operand:QI 4 "arith_reg_operand" ""))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 3)
+	(mem:QI (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" "")))))
+   (set (match_operand:HI 3 "arith_reg_dest" "")
+	(match_operand:HI 4 "arith_reg_operand" ""))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 3)
+	(mem:HI (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2]) && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2]) && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "TARGET_SH2E && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2])
+        && FP_OR_XD_REGISTER_P (REGNO (operands[2])))
+       || (GET_CODE (operands[2]) == SUBREG
+	   && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2])))))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "TARGET_SH2E && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2])
+	&& FP_OR_XD_REGISTER_P (REGNO (operands[2])))
+       || (GET_CODE (operands[2]) == SUBREG
+	   && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2])))))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	@(%0,%1),%2")
+
+;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF).
+(define_insn "sp_switch_1"
+  [(set (reg:SI SP_REG) (unspec_volatile [(match_operand:SI 0 "" "")]
+    UNSPECV_SP_SWITCH_B))]
+  "TARGET_SH1"
+{
+  return       "mov.l	r0,@-r15"	"\n"
+	 "	mov.l	%0,r0"		"\n"
+	 "	mov.l	@r0,r0"		"\n"
+	 "	mov.l	r15,@-r0"	"\n"
+	 "	mov	r0,r15";
+}
+  [(set_attr "length" "10")])
+
+;; Switch back to the original stack for interrupt functions with the
+;; sp_switch attribute.
+(define_insn "sp_switch_2"
+  [(unspec_volatile [(const_int 0)]
+    UNSPECV_SP_SWITCH_E)]
+  "TARGET_SH1"
+{
+  return       "mov.l	@r15,r15"	"\n"
+	 "	mov.l	@r15+,r0";
+}
+  [(set_attr "length" "4")])
+
+;; -------------------------------------------------------------------------
+;; Integer vector moves
+;; -------------------------------------------------------------------------
+
+(define_expand "movv8qi"
+  [(set (match_operand:V8QI 0 "general_movdst_operand" "")
+	(match_operand:V8QI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V8QImode);
+})
+
+(define_insn "movv8qi_i"
+  [(set (match_operand:V8QI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V8QI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V8QImode)
+       || sh_register_operand (operands[1], V8QImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")])
+
+(define_split
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "")
+	(subreg:V8QI (const_int 0) 0))]
+  "TARGET_SHMEDIA"
+  [(set (match_dup 0)
+	(const_vector:V8QI [(const_int 0) (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))])
+
+(define_split
+  [(set (match_operand 0 "arith_reg_dest" "")
+	(match_operand 1 "sh_rep_vec" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && sh_vector_mode_supported_p (GET_MODE (operands[0]))
+   && GET_MODE_SIZE (GET_MODE (operands[0])) == 8
+   && (XVECEXP (operands[1], 0, 0) != const0_rtx
+       || XVECEXP (operands[1], 0, 1) != const0_rtx)
+   && (XVECEXP (operands[1], 0, 0) != constm1_rtx
+       || XVECEXP (operands[1], 0, 1) != constm1_rtx)"
+  [(set (match_dup 0) (match_dup 1))
+   (match_dup 2)]
+{
+  int unit_size = GET_MODE_UNIT_SIZE (GET_MODE (operands[1]));
+  rtx elt1 = XVECEXP (operands[1], 0, 1);
+
+  if (unit_size > 2)
+    operands[2] = gen_mshflo_l (operands[0], operands[0], operands[0]);
+  else
+    {
+      if (unit_size < 2)
+	operands[0] = gen_rtx_REG (V4HImode, true_regnum (operands[0]));
+      operands[2] = gen_mperm_w0 (operands[0], operands[0]);
+    }
+  operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+  operands[1] = XVECEXP (operands[1], 0, 0);
+  if (unit_size < 2)
+    {
+      if (CONST_INT_P (operands[1]) && CONST_INT_P (elt1))
+	operands[1]
+	  = GEN_INT (TARGET_LITTLE_ENDIAN
+		     ? (INTVAL (operands[1]) & 0xff) + (INTVAL (elt1) << 8)
+		     : (INTVAL (operands[1]) << 8) + (INTVAL (elt1) & 0xff));
+      else
+	{
+	  operands[0] = gen_rtx_REG (V2QImode, true_regnum (operands[0]));
+	  operands[1]
+	    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, operands[1], elt1));
+	}
+    }
+})
+
+(define_split
+  [(set (match_operand 0 "arith_reg_dest" "")
+	(match_operand 1 "sh_const_vec" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && sh_vector_mode_supported_p (GET_MODE (operands[0]))"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx v = operands[1];
+  enum machine_mode new_mode
+    = mode_for_size (GET_MODE_BITSIZE (GET_MODE (v)), MODE_INT, 0);
+
+  operands[0] = gen_rtx_REG (new_mode, true_regnum (operands[0]));
+  operands[1]
+    = simplify_subreg (new_mode, operands[1], GET_MODE (operands[1]), 0);
+})
+
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "general_movdst_operand" "")
+	(match_operand:V2HI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V2HImode);
+})
+
+(define_insn "movv2hi_i"
+  [(set (match_operand:V2HI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V2HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V2HImode)
+       || sh_register_operand (operands[1], V2HImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "general_movdst_operand" "")
+	(match_operand:V4HI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V4HImode);
+})
+
+(define_insn "movv4hi_i"
+  [(set (match_operand:V4HI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V4HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V4HImode)
+       || sh_register_operand (operands[1], V4HImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set_attr "highpart" "depend")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_movdst_operand" "")
+	(match_operand:V2SI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V2SImode);
+})
+
+(define_insn "movv2si_i"
+  [(set (match_operand:V2SI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V2SI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V2SImode)
+       || sh_register_operand (operands[1], V2SImode))"
+  "@
+	add	%1, r63, %0
+	#
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set_attr "highpart" "depend")])
+
+;; -------------------------------------------------------------------------
+;; Multimedia Intrinsics
+;; -------------------------------------------------------------------------
+
+(define_insn "absv2si2"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(abs:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mabs.l	%1, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "absv4hi2"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(abs:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mabs.w	%1, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r")
+		   (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madd.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r")
+		   (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madd.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn_and_split "addv2hi3"
+  [(set (match_operand:V2HI 0 "arith_reg_dest" "=r")
+	(plus:V2HI (match_operand:V2HI 1 "extend_reg_operand" "%r")
+		   (match_operand:V2HI 2 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "#"
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+{
+  rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0);
+  rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0);
+  rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0);
+  rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0);
+  rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0);
+
+  emit_insn (gen_addv4hi3 (v4hi_dst, src0, src1));
+  emit_insn (gen_truncdisi2 (si_dst, di_dst));
+  DONE;
+}
+  [(set_attr "highpart" "must_split")])
+
+(define_insn "ssaddv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r")
+		      (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.l	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(us_plus:V8QI (match_operand:V8QI 1 "arith_reg_operand" "%r")
+		      (match_operand:V8QI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.ub	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r")
+		      (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.w	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv8qi"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(neg:V8QI (eq:V8QI
+		    (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.b	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv2si"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(neg:V2SI (eq:V2SI
+		    (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.l	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv4hi"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(neg:V4HI (eq:V4HI
+		    (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.w	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtuv8qi"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(neg:V8QI (gtu:V8QI
+		    (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.ub	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtv2si"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(neg:V2SI (gt:V2SI
+		    (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.l	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtv4hi"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(neg:V4HI (gt:V4HI
+		    (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.w	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mcmv"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(match_operand:DI 2 "arith_reg_operand" "r"))
+		(and:DI (match_operand:DI 3 "arith_reg_operand" "0")
+			(not:DI (match_dup 2)))))]
+  "TARGET_SHMEDIA"
+  "mcmv	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mcnvs_lw"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_concat:V4HI
+	 (ss_truncate:V2HI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ"))
+	 (ss_truncate:V2HI
+	   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.lw	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mcnvs_wb"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_concat:V8QI
+	 (ss_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ"))
+	 (ss_truncate:V4QI
+	   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.wb	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mcnvs_wub"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_concat:V8QI
+	 (us_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ"))
+	 (us_truncate:V4QI
+	   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.wub	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mextr_rl"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			     (match_operand:HI 3 "mextr_bit_offset" "i"))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			   (match_operand:HI 4 "mextr_bit_offset" "i"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+{
+  static char templ[21];
+  sprintf (templ, "mextr%d	%%N1, %%N2, %%0",
+	   (int) INTVAL (operands[3]) >> 3);
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*mextr_lr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (match_operand:HI 3 "mextr_bit_offset" "i"))
+		(lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			     (match_operand:HI 4 "mextr_bit_offset" "i"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+{
+  static char templ[21];
+  sprintf (templ, "mextr%d	%%N2, %%N1, %%0",
+	   (int) INTVAL (operands[4]) >> 3);
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+; mextrN can be modelled with vec_select / vec_concat, but the selection
+; vector then varies depending on endianness.
+(define_expand "mextr1"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (1 * 8), GEN_INT (7 * 8)));
+  DONE;
+})
+
+(define_expand "mextr2"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (2 * 8), GEN_INT (6 * 8)));
+  DONE;
+})
+
+(define_expand "mextr3"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (3 * 8), GEN_INT (5 * 8)));
+  DONE;
+})
+
+(define_expand "mextr4"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (4 * 8), GEN_INT (4 * 8)));
+  DONE;
+})
+
+(define_expand "mextr5"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (5 * 8), GEN_INT (3 * 8)));
+  DONE;
+})
+
+(define_expand "mextr6"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (6 * 8), GEN_INT (2 * 8)));
+  DONE;
+})
+
+(define_expand "mextr7"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (7 * 8), GEN_INT (1 * 8)));
+  DONE;
+})
+
+(define_expand "mmacfx_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2HI 1 "extend_reg_operand" "")
+   (match_operand:V2HI 2 "extend_reg_operand" "")
+   (match_operand:V2SI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mmacfx_wl_i (operands[0], operands[3],
+			      operands[1], operands[2]));
+  DONE;
+})
+
+;; This could be highpart ignore if it only had inputs 2 or 3, but input 1
+;; is depend
+(define_insn "mmacfx_wl_i"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_plus:V2SI
+	 (match_operand:V2SI 1 "arith_reg_operand" "0")
+	 (ss_truncate:V2SI
+	  (ashift:V2DI
+	   (sign_extend:V2DI
+	    (mult:V2SI
+	     (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r"))
+	     (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r"))))
+	   (const_int 1)))))]
+  "TARGET_SHMEDIA"
+  "mmacfx.wl	%2, %3, %0"
+  [(set_attr "type" "mac_media")
+   (set_attr "highpart" "depend")])
+
+(define_expand "mmacnfx_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2HI 1 "extend_reg_operand" "")
+   (match_operand:V2HI 2 "extend_reg_operand" "")
+   (match_operand:V2SI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mmacnfx_wl_i (operands[0], operands[3],
+			       operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mmacnfx_wl_i"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_minus:V2SI
+	 (match_operand:V2SI 1 "arith_reg_operand" "0")
+	 (ss_truncate:V2SI
+	  (ashift:V2DI
+	   (sign_extend:V2DI
+	    (mult:V2SI
+	     (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r"))
+	     (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r"))))
+	   (const_int 1)))))]
+  "TARGET_SHMEDIA"
+  "mmacnfx.wl	%2, %3, %0"
+  [(set_attr "type" "mac_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mulv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(mult:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		   (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mmul.l	%1, %2, %0"
+  [(set_attr "type" "d2mpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(mult:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		   (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mmul.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfx_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V2SI
+	 (ashiftrt:V2DI
+	  (mult:V2DI
+	   (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r"))
+	   (sign_extend:V2DI (match_operand:V2SI 2 "arith_reg_operand" "r")))
+	  (const_int 31))))]
+  "TARGET_SHMEDIA"
+  "mmulfx.l	%1, %2, %0"
+  [(set_attr "type" "d2mpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfx_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashiftrt:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	   (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	  (const_int 15))))]
+  "TARGET_SHMEDIA"
+  "mmulfx.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfxrp_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashiftrt:V4SI
+	  (plus:V4SI
+	   (mult:V4SI
+	    (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	    (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	   (const_int 16384))
+	  (const_int 15))))]
+  "TARGET_SHMEDIA"
+  "mmulfxrp.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+
+(define_expand "mmulhi_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul23_wl : gen_mmul01_wl)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mmullo_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul01_wl : gen_mmul23_wl)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mmul23_wl"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (mult:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	 (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mmulhi.wl	%1, %2, %0"
+	  : "mmullo.wl	%1, %2, %0");
+}
+  [(set_attr "type" "dmpy_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mmul01_wl"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (mult:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	 (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mmullo.wl	%1, %2, %0"
+	  : "mmulhi.wl	%1, %2, %0");
+}
+  [(set_attr "type" "dmpy_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+
+(define_expand "mmulsum_wq"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")
+   (match_operand:DI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mmulsum_wq_i (operands[0], operands[3],
+			       operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mmulsum_wq_i"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+	 (plus:DI
+	  (plus:DI
+	   (vec_select:DI
+	    (mult:V4DI
+	     (sign_extend:V4DI (match_operand:V4HI 2 "arith_reg_operand" "r"))
+	     (sign_extend:V4DI (match_operand:V4HI 3 "arith_reg_operand" "r")))
+	    (parallel [(const_int 0)]))
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 1)])))
+	  (plus:DI
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 2)]))
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 3)]))))))]
+  "TARGET_SHMEDIA"
+  "mmulsum.wq	%2, %3, %0"
+  [(set_attr "type" "mac_media")])
+
+(define_expand "mperm_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "=r")
+   (match_operand:V4HI 1 "arith_reg_operand" "r")
+   (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mperm_w_little : gen_mperm_w_big)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+; This use of vec_select isn't exactly correct according to rtl.texi
+; (because not constant), but it seems a straightforward extension.
+(define_insn "mperm_w_little"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (match_operand:V4HI 1 "arith_reg_operand" "r")
+	 (parallel
+	  [(zero_extract:QI (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")
+			    (const_int 2) (const_int 0))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 2))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 4))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 6))])))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  "mperm.w	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "mperm_w_big"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (match_operand:V4HI 1 "arith_reg_operand" "r")
+	 (parallel
+	  [(zero_extract:QI (not:QI (match_operand:QI 2
+				     "extend_reg_or_0_operand" "rZ"))
+			    (const_int 2) (const_int 0))
+	   (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 2))
+	   (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 4))
+	   (zero_extract:QI (not:QI (match_dup 2))
+			    (const_int 2) (const_int 6))])))]
+  "TARGET_SHMEDIA && TARGET_BIG_ENDIAN"
+  "mperm.w	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "mperm_w0"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_duplicate:V4HI (truncate:HI (match_operand 1
+					  "trunc_hi_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "mperm.w	%1, r63, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "msad_ubq"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "")
+   (match_operand:DI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_msad_ubq_i (operands[0], operands[3],
+			     operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "msad_ubq_i"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(plus:DI
+	 (plus:DI
+	  (plus:DI
+	   (plus:DI
+	    (match_operand:DI 1 "arith_reg_operand" "0")
+	    (abs:DI (vec_select:DI
+		     (minus:V8DI
+		      (zero_extend:V8DI
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+		      (zero_extend:V8DI
+		       (match_operand:V8QI 3 "arith_reg_or_0_operand" "rZ")))
+		     (parallel [(const_int 0)]))))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 1)]))))
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 2)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 3)])))))
+	 (plus:DI
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 4)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 5)]))))
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 6)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 7)])))))))]
+  "TARGET_SHMEDIA"
+  "msad.ubq	%N2, %N3, %0"
+  [(set_attr "type" "mac_media")])
+
+(define_insn "mshalds_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V2SI
+	 (ashift:V2DI
+	  (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r"))
+	  (and:DI (match_operand:DI 2 "arith_reg_operand" "r")
+		  (const_int 31)))))]
+  "TARGET_SHMEDIA"
+  "mshalds.l	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mshalds_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashift:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (and:DI (match_operand:DI 2 "arith_reg_operand" "r")
+		  (const_int 15)))))]
+  "TARGET_SHMEDIA"
+  "mshalds.w	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshard.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ashiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshard.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mshards_q"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:HI
+	 (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		      (match_operand:DI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mshards.q	%1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_expand "mshfhi_b"
+  [(match_operand:V8QI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_b : gen_mshf0_b)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mshflo_b"
+  [(match_operand:V8QI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_b : gen_mshf4_b)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mshf4_b"
+  [(set
+    (match_operand:V8QI 0 "arith_reg_dest" "=r")
+    (vec_select:V8QI
+     (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+     (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13)
+		(const_int 6) (const_int 14) (const_int 7) (const_int 15)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshfhi.b	%N1, %N2, %0"
+	  : "mshflo.b	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_b"
+  [(set
+    (match_operand:V8QI 0 "arith_reg_dest" "=r")
+    (vec_select:V8QI
+     (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+     (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9)
+		(const_int 2) (const_int 10) (const_int 3) (const_int 11)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshflo.b	%N1, %N2, %0"
+	  : "mshfhi.b	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_expand "mshfhi_l"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_l : gen_mshf0_l)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mshflo_l"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_l : gen_mshf4_l)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mshf4_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshfhi.l	%N1, %N2, %0"
+	  : "mshflo.l	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshflo.l	%N1, %N2, %0"
+	  : "mshfhi.l	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_expand "mshfhi_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_w : gen_mshf0_w)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mshflo_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_w : gen_mshf4_w)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mshf4_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshfhi.w	%N1, %N2, %0"
+	  : "mshflo.w	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshflo.w	%N1, %N2, %0"
+	  : "mshfhi.w	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshflo_w_x"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V4HI (match_operand:V2HI 1 "extend_reg_or_0_operand" "rZ")
+			  (match_operand:V2HI 2 "extend_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 2) (const_int 0) (const_int 3) (const_int 1)])))]
+  "TARGET_SHMEDIA"
+  "mshflo.w	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; These are useful to expand ANDs and as combiner patterns.
+(define_insn_and_split "mshfhi_l_di"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,f")
+	(ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ,f")
+			     (const_int 32))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ,?f")
+			(const_int -4294967296))))]
+  "TARGET_SHMEDIA"
+  "@
+	mshfhi.l	%N1, %N2, %0
+	#"
+  "TARGET_SHMEDIA && reload_completed
+   && ! GENERAL_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_highpart (SImode, operands[1]);
+  operands[5] = gen_highpart (SImode, operands[0]);
+  operands[6] = gen_highpart (SImode, operands[2]);
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*mshfhi_l_di_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(const_int -4294967296))
+		(lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			     (const_int 32))))]
+  "TARGET_SHMEDIA"
+  "mshfhi.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ior:DI (zero_extend:DI (match_operand:SI 1
+					      "extend_reg_or_0_operand" ""))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "")
+			(const_int -4294967296))))
+   (clobber (match_operand:DI 3 "arith_reg_dest" ""))]
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+{
+  emit_insn (gen_ashldi3_media (operands[3],
+				simplify_gen_subreg (DImode, operands[1],
+						     SImode, 0),
+				GEN_INT (32)));
+  emit_insn (gen_mshfhi_l_di (operands[0], operands[3], operands[2]));
+  DONE;
+})
+
+(define_insn "mshflo_l_di"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(const_int 4294967295))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			(const_int 4294967295))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; Combiner pattern for trampoline initialization.
+(define_insn_and_split "*double_shori"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+			   (const_int 32))
+		(match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA
+   && ! (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xffffffffUL)"
+  "#"
+  "rtx_equal_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT v = INTVAL (operands[2]);
+
+  emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v >> 16)));
+  emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v & 65535)));
+  DONE;
+}
+  [(set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_x"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand"
+				 "rZ"))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))))]
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "concat_v2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=r,f,f?")
+;;	(vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,0,f")
+	(vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,f,f")
+			 (match_operand:SF 2 "register_operand" "rZ,f,f")))]
+  "TARGET_SHMEDIA"
+  "@
+	mshflo.l	%N1, %N2, %0
+	#
+	#"
+  "TARGET_SHMEDIA && reload_completed
+   && ! GENERAL_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (match_dup 2))]
+{
+  operands[3] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0);
+  operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 4);
+}
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_x_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))
+		(zero_extend:DI
+		  (match_operand:SI 2 "extend_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "ashlv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ashift:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlld.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_split
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operator 3 "shift_operator"
+	  [(match_operand 1 "any_register_operand" "")
+	   (match_operand 2 "shift_count_reg_operand" "")]))]
+  "TARGET_SHMEDIA && ! register_operand (operands[2], VOIDmode)"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  rtx count = operands[2];
+  enum machine_mode outer_mode = GET_MODE (operands[2]), inner_mode;
+
+  while (GET_CODE (count) == ZERO_EXTEND || GET_CODE (count) == SIGN_EXTEND
+	 || (GET_CODE (count) == SUBREG && SUBREG_BYTE (count) == 0)
+	 || GET_CODE (count) == TRUNCATE)
+    count = XEXP (count, 0);
+  inner_mode = GET_MODE (count);
+  count = simplify_gen_subreg (outer_mode, count, inner_mode,
+			       subreg_lowpart_offset (outer_mode, inner_mode));
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+				operands[1], count);
+})
+
+(define_insn "ashlv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ashift:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlld.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "lshrv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlrd.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(lshiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlrd.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+		    (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msub.l	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+		    (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msub.w	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn_and_split "subv2hi3"
+  [(set (match_operand:V2HI 0 "arith_reg_dest" "=r")
+	(minus:V2HI (match_operand:V2HI 1 "arith_reg_or_0_operand" "rZ")
+		   (match_operand:V2HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "#"
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+{
+  rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0);
+  rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0);
+  rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0);
+  rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0);
+  rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0);
+
+  emit_insn (gen_subv4hi3 (v4hi_dst, src0, src1));
+  emit_insn (gen_truncdisi2 (si_dst, di_dst));
+  DONE;
+}
+  [(set_attr "highpart" "must_split")])
+
+(define_insn "sssubv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.l	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(us_minus:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.ub	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.w	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+;; -------------------------------------------------------------------------
+;; Floating Point Intrinsics
+;; -------------------------------------------------------------------------
+
+(define_insn "fcosa_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FCOSA))]
+  "TARGET_SHMEDIA"
+  "fcosa.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "fsina_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FSINA))]
+  "TARGET_SHMEDIA"
+  "fsina.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "fipr"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (plus:SF (vec_select:SF (mult:V4SF (match_operand:V4SF 1
+						    "fp_arith_reg_operand" "f")
+						   (match_operand:V4SF 2
+						    "fp_arith_reg_operand" "f"))
+					 (parallel [(const_int 0)]))
+			  (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 1)])))
+		 (plus:SF (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 2)]))
+			  (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 3)])))))]
+  "TARGET_SHMEDIA"
+  "fipr.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "fsrra_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FSRRA))]
+  "TARGET_SHMEDIA"
+  "fsrra.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "ftrv"
+  [(set (match_operand:V4SF 0 "fp_arith_reg_operand" "=f")
+	(plus:V4SF
+	 (plus:V4SF
+	  (mult:V4SF
+	   (vec_select:V4SF (match_operand:V16SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0) (const_int 5)
+				       (const_int 10) (const_int 15)]))
+	   (match_operand:V4SF 2 "fp_arith_reg_operand" "f"))
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 4) (const_int 9)
+				       (const_int 14) (const_int 3)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 1) (const_int 2)
+				       (const_int 3) (const_int 0)]))))
+	 (plus:V4SF
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 8) (const_int 13)
+				       (const_int 2) (const_int 7)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 2) (const_int 3)
+				       (const_int 0) (const_int 1)])))
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 12) (const_int 1)
+				       (const_int 6) (const_int 11)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 3) (const_int 0)
+				       (const_int 1) (const_int 2)]))))))]
+  "TARGET_SHMEDIA"
+  "ftrv.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "ldhi_l"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 3)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32"
+  "ldhi.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldhi_q"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 7)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32"
+  "ldhi.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn_and_split "*ldhi_q_comb0"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 1 "register_operand" "r")
+				    (match_operand:SI 2 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 7))
+		  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldhi_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn_and_split "*ldhi_q_comb1"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 1 "register_operand" "r")
+				    (match_operand:SI 2 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (plus:SI (match_dup 1)
+				   (match_operand:SI 3 "ua_offset" "I06"))
+			  (const_int 7))
+		  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8)
+   && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldhi_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn "ldlo_l"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (and:SI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:SI (const_int 4) (and:SI (match_dup 1) (const_int 3)))
+	 (and:SI (match_dup 1) (const_int 3))))]
+  "TARGET_SHMEDIA32"
+  "ldlo.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_q"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7)))
+	 (and:SI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA32"
+  "ldlo.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn_and_split "*ldlo_q_comb0"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7)))
+	 (and:SI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldlo_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn_and_split "*ldlo_q_comb1"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8)
+		   (and:SI (plus:SI (match_dup 1)
+				    (match_operand:SI 3 "ua_offset" "I06"))
+			   (const_int 7)))
+	 (and:SI (plus:SI (match_dup 1) (match_dup 3)) (const_int 7))))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8)
+   && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldlo_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn "sthi_l"
+  [(set (zero_extract:SI
+	 (mem:SI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 3)) (const_int 1))
+	 (const_int 0))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "sthi.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+;; All unaligned stores are considered to be 'narrow' because they typically
+;; operate on less that a quadword, and when they operate on a full quadword,
+;; the vanilla store high / store low sequence will cause a stall if not
+;; scheduled apart.
+(define_insn "sthi_q"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "sthi.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn_and_split "*sthi_q_comb0"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 0 "register_operand" "r")
+				    (match_operand:SI 1 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 2 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*sthi_q_comb1"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 0 "register_operand" "r")
+				    (match_operand:SI 1 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (plus:SI (match_dup 0)
+				   (match_operand:SI 2 "ua_offset" "I06"))
+			  (const_int 7))
+		  (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 3 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & -8)
+   && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[3]));
+  DONE;
+})
+
+;; This is highpart user because the address is used as full 64 bit.
+(define_insn "stlo_l"
+  [(set (zero_extract:SI
+	 (mem:SI (and:SI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:SI (const_int 4) (and:SI (match_dup 0) (const_int 3)))
+	 (and:SI (match_dup 0) (const_int 3)))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "stlo.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_q"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7)))
+	 (and:SI (match_dup 0) (const_int 7)))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "stlo.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn_and_split "*stlo_q_comb0"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7)))
+	 (and:SI (match_dup 0) (const_int 7)))
+	(match_operand:DI 2 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*stlo_q_comb1"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8)
+		   (and:SI (plus:SI (match_dup 0)
+				    (match_operand:SI 2 "ua_offset" "I06"))
+			   (const_int 7)))
+	 (and:SI (plus:SI (match_dup 0) (match_dup 2)) (const_int 7)))
+	(match_operand:DI 3 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[3]));
+   DONE;
+})
+
+(define_insn "ldhi_l64"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:DI (and:DI (match_dup 1) (const_int 3)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA64"
+  "ldhi.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldhi_q64"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:DI (and:DI (match_dup 1) (const_int 7)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA64"
+  "ldhi.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_l64"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (and:DI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:DI (const_int 4) (and:DI (match_dup 1) (const_int 3)))
+	 (and:DI (match_dup 1) (const_int 3))))]
+  "TARGET_SHMEDIA64"
+  "ldlo.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_q64"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:DI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:DI (const_int 8) (and:DI (match_dup 1) (const_int 7)))
+	 (and:DI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA64"
+  "ldlo.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "sthi_l64"
+  [(set (zero_extract:SI
+	 (mem:SI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:DI (and:DI (match_dup 0) (const_int 3)) (const_int 1))
+	 (const_int 0))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "sthi.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "sthi_q64"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:DI (and:DI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "sthi.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_l64"
+  [(set (zero_extract:SI
+	 (mem:SI (and:DI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:DI (const_int 4) (and:DI (match_dup 0) (const_int 3)))
+	 (and:DI (match_dup 0) (const_int 3)))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "stlo.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_q64"
+  [(set (zero_extract:DI
+	 (mem:DI (and:DI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:DI (const_int 8) (and:DI (match_dup 0) (const_int 7)))
+	 (and:DI (match_dup 0) (const_int 7)))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "stlo.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "nsb"
+  [(set (match_operand:QI 0 "arith_reg_dest" "=r")
+	(unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		   UNSPEC_NSB))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "nsbsi"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI
+	 (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		    UNSPEC_NSB)))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "nsbdi"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI
+	 (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		    UNSPEC_NSB)))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "ffsdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ffs:DI (match_operand:DI 1 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  rtx scratch = gen_reg_rtx (DImode);
+  rtx last;
+
+  emit_insn (gen_adddi3 (scratch, operands[1], constm1_rtx));
+  emit_insn (gen_xordi3 (scratch, operands[1], scratch));
+  emit_insn (gen_lshrdi3_media (scratch, scratch, const1_rtx));
+  emit_insn (gen_nsbdi (scratch, scratch));
+  emit_insn (gen_adddi3 (scratch, scratch, GEN_INT (-64)));
+  emit_insn (gen_movdicc_false (scratch, operands[1], const0_rtx, scratch));
+  last = emit_insn (gen_subdi3 (operands[0], const0_rtx, scratch));
+  set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (DImode, operands[0]));
+
+  DONE;
+})
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ffs:SI (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  rtx scratch = gen_reg_rtx (SImode);
+  rtx discratch = gen_reg_rtx (DImode);
+  rtx last;
+
+  emit_insn (gen_adddi3 (discratch,
+			 simplify_gen_subreg (DImode, operands[1], SImode, 0),
+			 constm1_rtx));
+  emit_insn (gen_andcdi3 (discratch,
+			  simplify_gen_subreg (DImode, operands[1], SImode, 0),
+			  discratch));
+  emit_insn (gen_nsbsi (scratch, discratch));
+  last = emit_insn (gen_subsi3 (operands[0],
+				force_reg (SImode, GEN_INT (63)), scratch));
+  set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (SImode, operands[0]));
+
+  DONE;
+})
+
+(define_insn "byterev"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_select:V8QI (match_operand:V8QI 1 "arith_reg_operand" "r")
+			 (parallel [(const_int 7) (const_int 6) (const_int 5)
+				    (const_int 4) (const_int 3) (const_int 2)
+				    (const_int 1) (const_int 0)])))]
+  "TARGET_SHMEDIA"
+  "byterev	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+;; In user mode, the "pref" instruction will raise a RADDERR exception
+;; for accesses to [0x80000000,0xffffffff].  This makes it an unsuitable
+;; implementation of __builtin_prefetch for VxWorks RTPs.
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "")
+	     (match_operand:SI 1 "const_int_operand" "")
+	     (match_operand:SI 2 "const_int_operand" ""))]
+  "(TARGET_SH2A || TARGET_SH3 || TARGET_SH5)
+   && (TARGET_SHMEDIA || ! TARGET_VXWORKS_RTP)")
+
+(define_insn "*prefetch"
+  [(prefetch (match_operand:SI 0 "register_operand" "r")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "(TARGET_SH2A || TARGET_SH3 || TARGET_SHCOMPACT) && ! TARGET_VXWORKS_RTP"
+  "pref	@%0"
+  [(set_attr "type" "other")])
+
+(define_insn "*prefetch_media"
+  [(prefetch (match_operand:QI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_SHMEDIA"
+{
+  operands[0] = gen_rtx_MEM (QImode, operands[0]);
+  output_asm_insn ("ld%M0.b	%m0,r63", operands);
+  return "";
+}
+  [(set_attr "type" "other")])
+
+(define_insn "alloco_i"
+  [(set (mem:BLK (match_operand:QI 0 "cache_address_operand" "p"))
+	(unspec:BLK [(const_int 0)] UNSPEC_ALLOCO))]
+  "TARGET_SHMEDIA32"
+{
+  rtx xops[2];
+
+  if (GET_CODE (operands[0]) == PLUS)
+    {
+      xops[0] = XEXP (operands[0], 0);
+      xops[1] = XEXP (operands[0], 1);
+    }
+  else
+    {
+      xops[0] = operands[0];
+      xops[1] = const0_rtx;
+    }
+  output_asm_insn ("alloco	%0, %1", xops);
+  return "";
+}
+  [(set_attr "type" "other")])
+
+(define_split
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operand 1 "" ""))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  int n_changes = 0;
+
+  for_each_rtx (&operands[1], shmedia_cleanup_truncate, &n_changes);
+  if (!n_changes)
+    FAIL;
+})
+
+;; -------------------------------------------------------------------------
+;; Stack Protector Patterns
+;; -------------------------------------------------------------------------
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "memory_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA64)
+	emit_insn (gen_stack_protect_set_di_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_stack_protect_set_si_media (operands[0], operands[1]));
+    }
+  else
+    emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "!TARGET_SHMEDIA"
+{
+  return       "mov.l	%1,%2"	"\n"
+	 "	mov.l	%2,%0"	"\n"
+	 "	mov	#0,%2";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "6")])
+
+(define_insn "stack_protect_set_si_media"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA"
+{
+  return       "ld%M1.l	%m1,%2"	"\n"
+	 "	st%M0.l	%m0,%2"	"\n"
+	 "	movi	0,%2";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "12")])
+
+(define_insn "stack_protect_set_di_media"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA64"
+{
+  return       "ld%M1.q	%m1,%2"	"\n"
+	 "	st%M0.q	%m0,%2"	"\n"
+	 "	movi	0,%2";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "12")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
+      rtx test;
+
+      test = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+      if (TARGET_SHMEDIA64)
+	{
+	  emit_insn (gen_stack_protect_test_di_media (tmp, operands[0],
+						      operands[1]));
+	  emit_jump_insn (gen_cbranchdi4 (test, tmp, const0_rtx, operands[2]));
+	}
+      else
+	{
+	  emit_insn (gen_stack_protect_test_si_media (tmp, operands[0],
+						      operands[1]));
+	  emit_jump_insn (gen_cbranchsi4 (test, tmp, const0_rtx, operands[2]));
+	}
+    }
+  else
+    {
+      emit_insn (gen_stack_protect_test_si (operands[0], operands[1]));
+      emit_jump_insn (gen_branch_true (operands[2]));
+    }
+
+  DONE;
+})
+
+(define_insn "stack_protect_test_si"
+  [(set (reg:SI T_REG)
+	(unspec:SI [(match_operand:SI 0 "memory_operand" "m")
+		    (match_operand:SI 1 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:SI 2 "=&r") (const_int 0))
+  (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  "!TARGET_SHMEDIA"
+{
+  return       "mov.l	%0,%2"	"\n"
+	 "	mov.l	%1,%3"	"\n"
+	 "	cmp/eq	%2,%3"	"\n"
+	 "	mov	#0,%2"	"\n"
+	 "	mov	#0,%3";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "10")])
+
+(define_insn "stack_protect_test_si_media"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+		    (match_operand:SI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA"
+{
+  return       "ld%M1.l	%m1,%0"		"\n"
+	 "	ld%M2.l	%m2,%3"		"\n"
+	 "	cmpeq	%0,%3,%0"	"\n"
+	 "	movi	0,%3";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "16")])
+
+(define_insn "stack_protect_test_di_media"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:DI 3 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA64"
+{
+  return       "ld%M1.q	%m1,%0"		"\n"
+	 "	ld%M2.q	%m2,%3"		"\n"
+	 "	cmpeq	%0,%3,%0"	"\n"
+	 "	movi	0,%3";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "16")])
+
+;; -------------------------------------------------------------------------
+;; Atomic operations
+;; -------------------------------------------------------------------------
+
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/sh/sh.opt b/gcc-4.9/gcc/config/sh/sh.opt
new file mode 100644
index 000000000..1834c6bde
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.opt
@@ -0,0 +1,362 @@
+; Options for the SH port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; Used for various architecture options.
+Mask(SH_E)
+
+;; Set if the default precision of the FPU is single.
+Mask(FPU_SINGLE)
+
+;; Set if the a double-precision FPU is present but is restricted to
+;; single precision usage only.
+Mask(FPU_SINGLE_ONLY)
+
+;; Set if we should generate code using type 2A insns.
+Mask(HARD_SH2A)
+
+;; Set if we should generate code using type 2A DF insns.
+Mask(HARD_SH2A_DOUBLE)
+
+;; Set if compiling for SH4 hardware (to be used for insn costs etc.)
+Mask(HARD_SH4)
+
+;; Set if we should generate code for a SH5 CPU (either ISA).
+Mask(SH5)
+
+;; Set if we should save all target registers.
+Mask(SAVE_ALL_TARGET_REGS)
+
+m1
+Target RejectNegative Mask(SH1) Condition(SUPPORT_SH1)
+Generate SH1 code
+
+m2
+Target RejectNegative Mask(SH2) Condition(SUPPORT_SH2)
+Generate SH2 code
+
+m2a
+Target RejectNegative Condition(SUPPORT_SH2A)
+Generate default double-precision SH2a-FPU code
+
+m2a-nofpu
+Target RejectNegative Condition(SUPPORT_SH2A_NOFPU)
+Generate SH2a FPU-less code
+
+m2a-single
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE)
+Generate default single-precision SH2a-FPU code
+
+m2a-single-only
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE_ONLY)
+Generate only single-precision SH2a-FPU code
+
+m2e
+Target RejectNegative Condition(SUPPORT_SH2E)
+Generate SH2e code
+
+m3
+Target RejectNegative Mask(SH3) Condition(SUPPORT_SH3)
+Generate SH3 code
+
+m3e
+Target RejectNegative Condition(SUPPORT_SH3E)
+Generate SH3e code
+
+m4
+Target RejectNegative Mask(SH4) Condition(SUPPORT_SH4)
+Generate SH4 code
+
+m4-100
+Target RejectNegative Condition(SUPPORT_SH4)
+Generate SH4-100 code
+
+m4-200
+Target RejectNegative Condition(SUPPORT_SH4)
+Generate SH4-200 code
+
+;; TARGET_SH4_300 indicates if we have the ST40-300 instruction set and
+;; pipeline - irrespective of ABI.
+m4-300
+Target RejectNegative Condition(SUPPORT_SH4) Var(TARGET_SH4_300)
+Generate SH4-300 code
+
+m4-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4 FPU-less code
+
+m4-100-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-100 FPU-less code
+
+m4-200-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-200 FPU-less code
+
+m4-300-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300)
+Generate SH4-300 FPU-less code
+
+m4-340
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300)
+Generate code for SH4 340 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
+m4-400
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate code for SH4 400 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
+m4-500
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate code for SH4 500 series (FPU-less).
+;; passes -isa=sh4-nofpu to the assembler.
+
+m4-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4 code
+
+m4-100-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4-100 code
+
+m4-200-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4-200 code
+
+m4-300-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Var(TARGET_SH4_300)
+Generate default single-precision SH4-300 code
+
+m4-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4 code
+
+m4-100-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4-100 code
+
+m4-200-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4-200 code
+
+m4-300-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Var(TARGET_SH4_300)
+Generate only single-precision SH4-300 code
+
+m4a
+Target RejectNegative Mask(SH4A) Condition(SUPPORT_SH4A)
+Generate SH4a code
+
+m4a-nofpu
+Target RejectNegative Condition(SUPPORT_SH4A_NOFPU)
+Generate SH4a FPU-less code
+
+m4a-single
+Target RejectNegative Condition(SUPPORT_SH4A_SINGLE)
+Generate default single-precision SH4a code
+
+m4a-single-only
+Target RejectNegative Condition(SUPPORT_SH4A_SINGLE_ONLY)
+Generate only single-precision SH4a code
+
+m4al
+Target RejectNegative Condition(SUPPORT_SH4AL)
+Generate SH4al-dsp code
+
+m5-32media
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA)
+Generate 32-bit SHmedia code
+
+m5-32media-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU)
+Generate 32-bit FPU-less SHmedia code
+
+m5-64media
+Target RejectNegative Condition(SUPPORT_SH5_64MEDIA)
+Generate 64-bit SHmedia code
+
+m5-64media-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_64MEDIA_NOFPU)
+Generate 64-bit FPU-less SHmedia code
+
+m5-compact
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA)
+Generate SHcompact code
+
+m5-compact-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU)
+Generate FPU-less SHcompact code
+
+maccumulate-outgoing-args
+Target Report Var(TARGET_ACCUMULATE_OUTGOING_ARGS) Init(1)
+Reserve space for outgoing arguments in the function prologue
+
+madjust-unroll
+Target Ignore
+Does nothing.  Preserved for backward compatibility.
+
+mb
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Generate code in big endian mode
+
+mbigtable
+Target Report RejectNegative Mask(BIGTABLE)
+Generate 32-bit offsets in switch tables
+
+mbitops
+Target Report RejectNegative Mask(BITOPS)
+Generate bit instructions
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(sh_branch_cost) Init(-1)
+Cost to assume for a branch insn
+
+mzdcbranch
+Target Var(TARGET_ZDCBRANCH)
+Assume that zero displacement conditional branches are fast
+
+mcbranchdi
+Target Undocumented Var(TARGET_CBRANCHDI4) Warn(%qs is deprecated and has no effect)
+Enable cbranchdi4 pattern
+
+mcmpeqdi
+Target Undocumented Var(TARGET_CMPEQDI_T) Warn(%qs is deprecated and has no effect)
+Emit cmpeqdi_t pattern even when -mcbranchdi is in effect.
+
+mcut2-workaround
+Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND)
+Enable SH5 cut2 workaround
+
+mdalign
+Target Report RejectNegative Mask(ALIGN_DOUBLE)
+Align doubles at 64-bit boundaries
+
+mdiv=
+Target RejectNegative Joined Var(sh_div_str) Init("")
+Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table
+
+mdivsi3_libfunc=
+Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
+Specify name for 32 bit signed division function
+
+mfmovd
+Target RejectNegative Mask(FMOVD)
+Enable the use of 64-bit floating point registers in fmov instructions.  See -mdalign if 64-bit alignment is required.
+
+mfixed-range=
+Target RejectNegative Joined Var(sh_fixed_range_str)
+Specify range of registers to make fixed
+
+mgettrcost=
+Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1)
+Cost to assume for gettr insn
+
+mhitachi
+Target Report RejectNegative Mask(HITACHI)
+Follow Renesas (formerly Hitachi) / SuperH calling conventions
+
+mieee
+Target Var(TARGET_IEEE)
+Increase the IEEE compliance for floating-point comparisons
+
+mindexed-addressing
+Target Report Mask(ALLOW_INDEXED_ADDRESS) Condition(SUPPORT_ANY_SH5_32MEDIA)
+Enable the use of the indexed addressing mode for SHmedia32/SHcompact
+
+minline-ic_invalidate
+Target Report Var(TARGET_INLINE_IC_INVALIDATE)
+inline code to invalidate instruction cache entries after setting up nested function trampolines
+
+minvalid-symbols
+Target Report Mask(INVALID_SYMBOLS) Condition(SUPPORT_ANY_SH5)
+Assume symbols might be invalid
+
+misize
+Target Report RejectNegative Mask(DUMPISIZE)
+Annotate assembler instructions with estimated addresses
+
+ml
+Target Report RejectNegative Mask(LITTLE_ENDIAN)
+Generate code in little endian mode
+
+mnomacsave
+Target Report RejectNegative Mask(NOMACSAVE)
+Mark MAC register as call-clobbered
+
+;; ??? This option is not useful, but is retained in case there are people
+;; who are still relying on it.  It may be deleted in the future.
+mpadstruct
+Target Report RejectNegative Mask(PADSTRUCT)
+Make structs a multiple of 4 bytes (warning: ABI altered)
+
+mprefergot
+Target Report RejectNegative Mask(PREFERGOT)
+Emit function-calls using global offset table when generating PIC
+
+mpt-fixed
+Target Report Mask(PT_FIXED) Condition(SUPPORT_ANY_SH5)
+Assume pt* instructions won't trap
+
+mrelax
+Target Report RejectNegative Mask(RELAX)
+Shorten address references during linking
+
+mrenesas
+Target Mask(HITACHI)
+Follow Renesas (formerly Hitachi) / SuperH calling conventions
+
+msoft-atomic
+Target Undocumented Alias(matomic-model=, soft-gusa, none)
+Deprecated.  Use -matomic= instead to select the atomic model
+
+matomic-model=
+Target Report RejectNegative Joined Var(sh_atomic_model_str)
+Specify the model for atomic operations
+
+mtas
+Target Report RejectNegative Var(TARGET_ENABLE_TAS)
+Use tas.b instruction for __atomic_test_and_set
+
+mspace
+Target RejectNegative Alias(Os)
+Deprecated.  Use -Os instead
+
+multcost=
+Target RejectNegative Joined UInteger Var(sh_multcost) Init(-1)
+Cost to assume for a multiply insn
+
+musermode
+Target Report RejectNegative Var(TARGET_USERMODE)
+Don't generate privileged-mode only code; implies -mno-inline-ic_invalidate if the inline code would not work in user mode.
+
+;; We might want to enable this by default for TARGET_HARD_SH4, because
+;; zero-offset branches have zero latency.  Needs some benchmarking.
+mpretend-cmove
+Target Var(TARGET_PRETEND_CMOVE)
+Pretend a branch-around-a-move is a conditional move.
+
+mfsca
+Target Var(TARGET_FSCA)
+Enable the use of the fsca instruction
+
+mfsrra
+Target Var(TARGET_FSRRA)
+Enable the use of the fsrra instruction
+
diff --git a/gcc-4.9/gcc/config/sh/sh1.md b/gcc-4.9/gcc/config/sh/sh1.md
new file mode 100644
index 000000000..08b212447
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh1.md
@@ -0,0 +1,85 @@
+;; DFA scheduling description for Renesas / SuperH SH.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; SH-1 scheduling.  This is just a conversion of the old scheduling
+;; model, using define_function_unit.
+
+(define_automaton "sh1")
+(define_cpu_unit "sh1memory,sh1int,sh1mpy,sh1fp" "sh1")
+
+;; Loads have a latency of two.
+;; However, call insns can have a delay slot, so that we want one more
+;; insn to be scheduled between the load of the function address and the call.
+;; This is equivalent to a latency of three.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+(define_insn_reservation "sh1_load_si" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "load_si,pcload_si"))
+  "sh1memory*2")
+
+(define_insn_reservation "sh1_load_store" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "load,pcload,pload,mem_mac,store,fstore,pstore,mac_mem"))
+  "sh1memory*2")
+
+(define_insn_reservation "sh1_arith3" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "arith3,arith3b"))
+  "sh1int*3")
+
+(define_insn_reservation "sh1_dyn_shift" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "dyn_shift"))
+  "sh1int*2")
+
+(define_insn_reservation "sh1_int" 1
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "!arith3,arith3b,dyn_shift"))
+  "sh1int")
+
+;; ??? These are approximations.
+(define_insn_reservation "sh1_smpy" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "smpy"))
+  "sh1mpy*2")
+
+(define_insn_reservation "sh1_dmpy" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "dmpy"))
+  "sh1mpy*3")
+
+(define_insn_reservation "sh1_fp" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "fp,fpscr_toggle,fp_cmp,fmove"))
+  "sh1fp")
+
+(define_insn_reservation "sh1_fdiv" 13
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "fdiv"))
+  "sh1fp*12")
+
diff --git a/gcc-4.9/gcc/config/sh/sh4-300.md b/gcc-4.9/gcc/config/sh/sh4-300.md
new file mode 100644
index 000000000..c0c0a5c55
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh4-300.md
@@ -0,0 +1,281 @@
+;; DFA scheduling description for ST40-300.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the ST40-300 pipeline using the DFA based
+;; scheduler.
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+
+(define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+(define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline")
+
+;; The floating point units.
+(define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe")
+
+;; integer multiplier unit
+(define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline")
+
+;; LS unit
+(define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+(define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation  "sh4_300_issue"  "sh4_300_pipe_01|sh4_300_pipe_02")
+
+(define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02")
+
+;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; MOV RM,RN / MOV #imm8,RN / STS PR,RN
+(define_insn_reservation "sh4_300_mov" 0
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "move,movi8,prget"))
+  "sh4_300_issue")
+
+;; Fixed STS from MACL / MACH
+(define_insn_reservation "sh4_300_mac_gp" 0
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mac_gp"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; Fixed LDS to MACL / MACH
+(define_insn_reservation "sh4_300_gp_mac" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_mac"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; Instructions without specific resource requirements with latency 1.
+(define_insn_reservation "sh4_300_simple_arith" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mt_group,arith,dyn_shift,prset"))
+  "sh4_300_issue")
+
+;; Load and store instructions have no alignment peculiarities for the ST40-300,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of three.
+
+;; Load Store instructions.
+(define_insn_reservation "sh4_300_load" 3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "load,pcload,load_si,pcload_si,pload"))
+  "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_mac_load" 3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mem_mac"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_mul")
+
+(define_insn_reservation "sh4_300_fload" 4
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fload,pcfload"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; sh_adjust_cost describes the reduced latency of the feeding insns of a store.
+;; The latency of an auto-increment register is 1; the latency of the memory
+;; output is not actually considered here anyway.
+(define_insn_reservation "sh4_300_store" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "store,pstore"))
+  "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_fstore" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fstore"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; Fixed STS.L from MACL / MACH
+(define_insn_reservation "sh4_300_mac_store" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mac_mem"))
+  "sh4_300_issue+sh4_300_mul+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_gp_fpul" 2
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_fpul"))
+  "sh4_300_issue+sh4_300_fpt")
+
+(define_insn_reservation "sh4_300_fpul_gp" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fpul_gp"))
+  "sh4_300_issue+sh4_300_fpt")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Branch Far (JMP,RTS,BRAF)
+;; Group:	BR
+;; When displacement is 0 for BF / BT, we have effectively conditional
+;; execution of one instruction, without pipeline disruption.
+;; Otherwise, the latency depends on prediction success.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, and not paired with a preceding insn,
+;; or likely and likely not predicted, we might want to fill the delay slot.
+;; However, there appears to be no machinery to make the compiler
+;; recognize these scenarios.
+(define_insn_reservation "sh4_300_branch"  1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "cbranch,jump,return,jump_ind"))
+  "sh4_300_issue+sh4_300_br")
+
+;; RTE
+(define_insn_reservation "sh4_300_return_from_exp" 9
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "rte"))
+  "sh4_300_pipe_01+sh4_300_pipe_02*9")
+
+;; OCBP, OCBWB
+;; Group:	CO
+;; Latency: 	1-5
+;; Issue Rate: 	1
+;; cwb is used for the sequence
+;;	ocbwb  @%0
+;;	extu.w %0,%2
+;;	or     %1,%2
+;;	mov.l  %0,@%2
+;; This description is likely inexact, but this pattern should not actually
+;; appear when compiling for sh4-300; we should use isbi instead.
+;; If a -mtune option is added later, we should use the icache array
+;; dispatch method instead.
+(define_insn_reservation "sh4_300_ocbwb"  3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "cwb"))
+  "all*3")
+
+;; JSR,BSR,BSRF
+;; Calls have a mandatory delay slot, which we'd like to fill with an insn
+;; that can be paired with the call itself.
+;; Scheduling runs before reorg, so we approximate this by saying that we
+;; want the call to be paired with a preceding insn.
+;; In most cases, the insn that loads the address of the call should have
+;; a nonzero latency (mov rn,rm doesn't make sense since we could use rn
+;; for the address then).  Thus, a preceding insn that can be paired with
+;; a call should be eligible for the delay slot.
+;;
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn.  But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+(define_insn_reservation "sh4_300_call" 16
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "call,sfunc"))
+  "sh4_300_issue+sh4_300_br,all*15")
+
+;; FMOV.S / FMOV.D
+(define_insn_reservation "sh4_300_fmov" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fmove"))
+  "sh4_300_issue+sh4_300_fpt")
+
+;; LDS to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load" 8
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_fpscr"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; LDS.L to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load_mem" 8
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type"  "mem_fpscr"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+(define_insn_reservation "multi" 2
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "smpy,dmpy"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; FPCHG, FRCHG, FSCHG
+(define_insn_reservation "fpscr_toggle"  1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fpscr_toggle"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; FCMP/EQ, FCMP/GT
+(define_insn_reservation "fp_cmp"  3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fp_cmp,dfp_cmp"))
+  "sh4_300_issue+sh4_300_fpu")
+
+;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC)
+;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC)
+(define_insn_reservation "fp_arith"  6
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv"))
+  "sh4_300_issue+sh4_300_fpu")
+
+;; Single Precision FDIV/SQRT
+(define_insn_reservation "fp_div" 19
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fdiv"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15")
+
+;; Double-precision floating-point FMUL
+(define_insn_reservation "dfp_mul" 9
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "dfp_mul"))
+  "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3")
+
+;; Double precision FDIV/SQRT
+(define_insn_reservation "dp_div" 35
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "dfdiv"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31")
+
+;; ??? We don't really want these for sh4-300.
+;; this pattern itself is likely to finish in 3 cycles, but also
+;; to disrupt branch prediction for taken branches for the following
+;; condbranch.
+(define_insn_reservation "sh4_300_arith3" 5
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "arith3"))
+  "sh4_300_issue,all*4")
+
+;; arith3b insns without brach redirection make use of the 0-offset 0-latency
+;; branch feature, and thus schedule the same no matter if the branch is taken
+;; or not.  If the branch is redirected, the taken branch might take longer,
+;; but then, we don't have to take the next branch.
+;; ??? should we suppress branch redirection for sh4-300 to improve branch
+;; target hit rates?
+(define_insn_reservation "arith3b" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,all")
diff --git a/gcc-4.9/gcc/config/sh/sh4.md b/gcc-4.9/gcc/config/sh/sh4.md
new file mode 100644
index 000000000..0ff6a0b57
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh4.md
@@ -0,0 +1,454 @@
+;; DFA scheduling description for SH4.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the SH4 pipeline using the DFA based
+;; scheduler.  The DFA based description is better way to model a
+;; superscalar pipeline as compared to function unit reservation model.
+;; 1. The function unit based model is oriented to describe at most one
+;;    unit reservation by each insn. It is difficult to model unit reservations
+;;    in multiple pipeline units by same insn.  This can be done using DFA
+;;    based description.
+;; 2. The execution performance of DFA based scheduler does not depend on
+;;    processor complexity.
+;; 3. Writing all unit reservations for an instruction class is a more natural
+;;    description of the pipeline and makes the interface to the hazard
+;;    recognizer simpler than the old function unit based model.
+;; 4. The DFA model is richer and is a part of greater overall framework
+;;    of RCSP.
+
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+(define_automaton "inst_pipeline,fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
+
+
+;; The fixed point arithmetic calculator(?? EX Unit).
+(define_cpu_unit  "int" "inst_pipeline")
+
+;; f1_1 and f1_2 are floating point units.Actually there is
+;; a f1 unit which can overlap with other f1 unit but
+;; not another F1 unit.It is as though there were two
+;; f1 units.
+(define_cpu_unit "f1_1,f1_2" "fpu_pipe")
+
+;; The floating point units (except FS - F2 always precedes it.)
+(define_cpu_unit "F0,F1,F2,F3" "fpu_pipe")
+
+;; This is basically the MA unit of SH4
+;; used in LOAD/STORE pipeline.
+(define_cpu_unit "memory" "inst_pipeline")
+
+;; However, there are LS group insns that don't use it, even ones that
+;; complete in 0 cycles.  So we use an extra unit for the issue of LS insns.
+(define_cpu_unit "load_store" "inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+
+(define_cpu_unit "pcr_addrcalc" "inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+(define_reservation  "issue"  "pipe_01|pipe_02")
+
+;; This is to express the locking of D stage.
+;; Note that the issue of a CO group insn also effectively locks the D stage.
+(define_reservation  "d_lock" "pipe_01+pipe_02")
+
+;; Every FE instruction but fipr / ftrv starts with issue and this.
+(define_reservation "F01" "F0+F1")
+
+;; This is to simplify description where F1,F2,FS
+;; are used simultaneously.
+(define_reservation "fpu" "F1+F2")
+
+;; This is to highlight the fact that f1
+;; cannot overlap with F1.
+(exclusion_set  "f1_1,f1_2" "F1")
+
+(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; Although reg moves have a latency of zero
+;; we need to highlight that they use D stage
+;; for one cycle.
+
+;; Group:	MT
+(define_insn_reservation "reg_mov" 0
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "move"))
+  "issue")
+
+;; Group:	LS
+(define_insn_reservation "freg_mov" 0
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fmove"))
+  "issue+load_store")
+
+;; We don't model all pipeline stages; we model the issue ('D') stage
+;; inasmuch as we allow only two instructions to issue simultaneously,
+;; and CO instructions prevent any simultaneous issue of another instruction.
+;; (This uses pipe_01 and pipe_02).
+;; Double issue of EX insns is prevented by using the int unit in the EX stage.
+;; Double issue of EX / BR insns is prevented by using the int unit /
+;; pcr_addrcalc unit in the EX stage.
+;; Double issue of BR / LS instructions is prevented by using the
+;; pcr_addrcalc / load_store unit in the issue cycle.
+;; Double issue of FE instructions is prevented by using F0 in the first
+;; pipeline stage after the first D stage.
+;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage
+;; (except in the cases outlined above), nor to describe the FS stage after
+;; the F2 stage.
+
+;; Other MT  group instructions(1 step operations)
+;; Group:	MT
+;; Latency: 	1
+;; Issue Rate: 	1
+(define_insn_reservation "mt" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mt_group"))
+  "issue")
+
+;; Fixed Point Arithmetic Instructions(1 step operations)
+;; Group:	EX
+;; Latency: 	1
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_simple_arith" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "insn_class" "ex_group"))
+  "issue,int")
+
+;; Load and store instructions have no alignment peculiarities for the SH4,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of two.
+;; However, call insns can only paired with a preceding insn, and have
+;; a delay slot, so that we want two more insns to be scheduled between the
+;; load of the function address and the call.  This is equivalent to a
+;; latency of three.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here, which gets multiplied by 10 to yield 30.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+
+;; Load Store instructions. (MOV.[BWL]@(d,GBR)
+;; Group:	LS
+;; Latency: 	2
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_load" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "load,pcload"))
+  "issue+load_store,nothing,memory")
+
+;; calls / sfuncs need an extra instruction for their delay slot.
+;; Moreover, estimating the latency for SImode loads as 3 will also allow
+;; adjust_cost to meaningfully bump it back up to 3 if they load the shift
+;; count of a dynamic shift.
+(define_insn_reservation "sh4_load_si" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "load_si,pcload_si"))
+  "issue+load_store,nothing,memory")
+
+;; (define_bypass 2 "sh4_load_si" "!sh4_call")
+
+;; The load latency is upped to three higher if the dependent insn does
+;; double precision computation.  We want the 'default' latency to reflect
+;; that increased latency because otherwise the insn priorities won't
+;; allow proper scheduling.
+(define_insn_reservation "sh4_fload" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fload,pcfload"))
+  "issue+load_store,nothing,memory")
+
+;; (define_bypass 2 "sh4_fload" "!")
+
+(define_insn_reservation "sh4_store" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "store,fstore"))
+  "issue+load_store,nothing,memory")
+
+(define_insn_reservation "mac_mem" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mac_mem"))
+  "d_lock,nothing,memory")
+
+;; Load Store instructions.
+;; Group:	LS
+;; Latency: 	1
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_gp_fpul" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "gp_fpul"))
+  "issue+load_store")
+
+;; Load Store instructions.
+;; Group:	LS
+;; Latency: 	3
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_fpul_gp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fpul_gp"))
+  "issue+load_store")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Group:	BR
+;; Latency when taken: 	2 (or 1)
+;; Issue Rate: 	1
+;; The latency is 1 when displacement is 0.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, we might want to fill the delay slot;
+;; if the branch is likely, but not very likely, should we pretend to use
+;; a resource that CO instructions use, to get a pairable delay slot insn?
+(define_insn_reservation "sh4_branch"  1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "cbranch,jump"))
+  "issue+pcr_addrcalc")
+
+;; Branch Far (JMP,RTS,BRAF)
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; ??? Scheduling happens before branch shortening, and hence jmp and braf
+;; can't be distinguished from bra for the "jump" pattern.
+(define_insn_reservation "sh4_return" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "return,jump_ind"))
+         "d_lock*2")
+
+;; RTE
+;; Group:	CO
+;; Latency: 	5
+;; Issue Rate: 	5
+;; this instruction can be executed in any of the pipelines
+;; and blocks the pipeline for next 4 stages.
+(define_insn_reservation "sh4_return_from_exp" 5
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "rte"))
+  "d_lock*5")
+
+;; OCBP, OCBWB
+;; Group:	CO
+;; Latency: 	1-5
+;; Issue Rate: 	1
+;; cwb is used for the sequence
+;;	ocbwb  @%0
+;;	extu.w %0,%2
+;;	or     %1,%2
+;;	mov.l  %0,@%2
+;; ocbwb on its own would be "d_lock,nothing,memory*5"
+(define_insn_reservation "ocbwb"  6
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "cwb"))
+  "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2")
+
+;; LDS to PR,JSR
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; The SX stage is blocked for last 2 cycles.
+;; OTOH, the only time that has an effect for insns generated by the compiler
+;; is when lds to PR is followed by sts from PR - and that is highly unlikely -
+;; or when we are doing a function call - and we don't do inter-function
+;; scheduling.  For the function call case, it's really best that we end with
+;; something that models an rts.
+(define_insn_reservation "sh4_lds_to_pr" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "prset") )
+  "d_lock*2")
+
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn.  But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+(define_insn_reservation "sh4_call" 16
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "call,sfunc"))
+  "d_lock*16")
+
+;; LDS.L to PR
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; The SX unit is blocked for last 2 cycles.
+(define_insn_reservation "ldsmem_to_pr"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "pload"))
+  "d_lock*2")
+
+;; STS from PR
+;; Group:	CO
+;; Latency: 	2
+;; Issue Rate: 	2
+;; The SX unit in second and third cycles.
+(define_insn_reservation "sts_from_pr" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "prget"))
+  "d_lock*2")
+
+;; STS.L from PR
+;; Group:	CO
+;; Latency: 	2
+;; Issue Rate: 	2
+(define_insn_reservation "sh4_prstore_mem" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "pstore"))
+  "d_lock*2,nothing,memory")
+
+;; LDS to FPSCR
+;; Group:	CO
+;; Latency: 	4
+;; Issue Rate: 	1
+;; F1 is blocked for last three cycles.
+(define_insn_reservation "fpscr_load" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "gp_fpscr"))
+  "d_lock,nothing,F1*3")
+
+;; LDS.L to FPSCR
+;; Group:	CO
+;; Latency: 	1 / 4
+;; Latency to update Rn is 1 and latency to update FPSCR is 4
+;; Issue Rate: 	1
+;; F1 is blocked for last three cycles.
+(define_insn_reservation "fpscr_load_mem" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type"  "mem_fpscr"))
+  "d_lock,nothing,(F1+memory),F1*2")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+;; Group:	CO
+;; Latency: 	4 / 4
+;; Issue Rate: 	2
+(define_insn_reservation "multi" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "smpy,dmpy"))
+  "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2")
+
+;; Fixed STS from, and LDS to MACL / MACH
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_mac_gp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mac_gp,gp_mac,mem_mac"))
+  "d_lock")
+
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG
+;; Group:	FE
+;; Latency: 	3/4
+;; Issue Rate: 	1
+(define_insn_reservation "fp_arith"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fp,fp_cmp"))
+  "issue,F01,F2")
+
+;; We don't model the resource usage of this exactly because that would
+;; introduce a bogus latency.
+(define_insn_reservation "sh4_fpscr_toggle"  1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fpscr_toggle"))
+  "issue")
+
+(define_insn_reservation "fp_arith_ftrc"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "ftrc_s"))
+  "issue,F01,F2")
+
+(define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp")
+
+;; Single Precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	12/13 (FDIV); 11/12 (FSQRT)
+;; Issue Rate: 	1
+;; We describe fdiv here; fsqrt is actually one cycle faster.
+(define_insn_reservation "fp_div" 12
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fdiv"))
+  "issue,F01+F3,F2+F3,F3*7,F1+F3,F2")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group:	FE
+;; Latency: 	(3,4)/5
+;; Issue Rate: 	1
+(define_insn_reservation "dp_float" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_conv"))
+  "issue,F01,F1+F2,F2")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group:	FE
+;; Latency: 	(7,8)/9
+;; Issue Rate: 	1
+(define_insn_reservation "fp_double_arith" 8
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_arith,dfp_mul"))
+  "issue,F01,F1+F2,fpu*4,F2")
+
+;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
+;; Group:	CO
+;; Latency: 	3/5
+;; Issue Rate: 	2
+(define_insn_reservation "fp_double_cmp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_cmp"))
+  "d_lock,(d_lock+F01),F1+F2,F2")
+
+;; Double precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	(24,25)/26
+;; Issue Rate: 	1
+(define_insn_reservation "dp_div" 25
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfdiv"))
+  "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2")
+
+
+;; Use the branch-not-taken case to model arith3 insns.  For the branch taken
+;; case, we'd get a d_lock instead of issue at the end.
+(define_insn_reservation "arith3" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,d_lock+pcr_addrcalc,issue")
+
+;; arith3b insns schedule the same no matter if the branch is taken or not.
+(define_insn_reservation "arith3b" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,d_lock+pcr_addrcalc")
diff --git a/gcc-4.9/gcc/config/sh/sh4a.md b/gcc-4.9/gcc/config/sh/sh4a.md
new file mode 100644
index 000000000..694185181
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh4a.md
@@ -0,0 +1,234 @@
+;; Scheduling description for Renesas SH4a
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following description models the SH4A pipeline
+;; using the DFA based scheduler.
+(define_automaton "sh4a")
+
+(define_cpu_unit "sh4a_ex"   "sh4a")
+(define_cpu_unit "sh4a_ls"   "sh4a")
+(define_cpu_unit "sh4a_fex"  "sh4a")
+(define_cpu_unit "sh4a_fls"  "sh4a")
+(define_cpu_unit "sh4a_mult" "sh4a")
+(define_cpu_unit "sh4a_fdiv" "sh4a")
+
+;; Decoding is done on the integer pipeline like the
+;; sh4. Define issue to be the | of the two pipelines
+;; to control how often instructions are issued.
+(define_reservation "ID_or" "sh4a_ex|sh4a_ls")
+(define_reservation "ID_and" "sh4a_ex+sh4a_ls")
+
+;; =======================================================
+;; Locking Descriptions
+
+;; Sh4a_Memory access on the LS pipeline.
+(define_cpu_unit "sh4a_memory" "sh4a")
+
+;; Other access on the LS pipeline.
+(define_cpu_unit "sh4a_load_store" "sh4a")
+
+;;  The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+(define_reservation "sh4a_addrcalc" "sh4a_ex")
+
+;; =======================================================
+;; Reservations
+
+;; Branch (BF,BF/S,BT,BT/S,BRA,BSR)
+;; Group: BR
+;; Latency when taken: 2
+(define_insn_reservation "sh4a_branch" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "cbranch,jump"))
+  "ID_or+sh4a_addrcalc")
+
+;; Jump (JSR,JMP,RTS)
+;; Group: BR
+;; Latency: 3
+(define_insn_reservation "sh4a_jump" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "return,jump_ind"))
+  "ID_or+sh4a_addrcalc")
+
+;; RTE
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_rte" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "rte"))
+  "ID_and*4")
+
+;; EX Group Single
+;; Group: EX
+;; Latency: 0
+(define_insn_reservation "sh4a_ex" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "insn_class" "ex_group"))
+  "sh4a_ex")
+
+;; MOVA
+;; Group: LS
+;; Latency: 1
+(define_insn_reservation "sh4a_mova" 1
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mova"))
+  "sh4a_ls+sh4a_load_store")
+
+;; MOV
+;; Group: MT
+;; Latency: 0
+;; ??? not sure if movi8 belongs here, but that's where it was
+;; effectively before.
+(define_insn_reservation "sh4a_mov" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "move,movi8,gp_mac"))
+  "ID_or")
+
+;; Load
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_load" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "load,pcload,mem_mac"))
+  "sh4a_ls+sh4a_memory")
+
+(define_insn_reservation "sh4a_load_si" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "load_si,pcload_si"))
+  "sh4a_ls+sh4a_memory")
+
+;; Store
+;; Group: LS
+;; Latency: 0
+(define_insn_reservation "sh4a_store" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "store,fstore,mac_mem"))
+  "sh4a_ls+sh4a_memory")
+
+;; CWB TYPE
+
+;; MOVUA
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_movua" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "movua"))
+  "sh4a_ls+sh4a_memory*2")
+
+;; Fixed point multiplication (single)
+;; Group: CO
+;; Latency: 2
+(define_insn_reservation "sh4a_smult" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "smpy"))
+  "ID_or+sh4a_mult")
+
+;; Fixed point multiplication (double)
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_dmult" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dmpy"))
+  "ID_or+sh4a_mult")
+
+(define_insn_reservation "sh4a_mac_gp" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mac_gp"))
+  "ID_and")
+
+;; Other MT  group instructions(1 step operations)
+;; Group:	MT
+;; Latency: 	1
+(define_insn_reservation "sh4a_mt" 1
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mt_group"))
+  "ID_or")
+
+;; Floating point reg move
+;; Group: LS
+;; Latency: 2
+(define_insn_reservation "sh4a_freg_mov" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fmove"))
+  "sh4a_ls,sh4a_fls")
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
+;; Group:	FE
+;; Latency: 	3
+(define_insn_reservation "sh4a_fp_arith"  3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fp,fp_cmp,fpscr_toggle"))
+  "ID_or,sh4a_fex")
+
+(define_insn_reservation "sh4a_fp_arith_ftrc"  3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "ftrc_s"))
+  "ID_or,sh4a_fex")
+
+;; Single-precision FDIV/FSQRT
+;; Group: FE
+;; Latency: 20
+(define_insn_reservation "sh4a_fdiv" 20
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fdiv"))
+  "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group:	FE
+;; Latency: 	3
+(define_insn_reservation "sh4a_dp_float" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfp_conv"))
+  "ID_or,sh4a_fex")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group:	FE
+;; Latency: 	5
+(define_insn_reservation "sh4a_fp_double_arith" 5
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfp_arith,dfp_mul"))
+  "ID_or,sh4a_fex*3")
+
+;; Double precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	36
+(define_insn_reservation "sh4a_dp_div" 36
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfdiv"))
+  "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2")
+
+;; FSRRA
+;; Group: FE
+;; Latency: 5
+(define_insn_reservation "sh4a_fsrra" 5
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fsrra"))
+  "ID_or,sh4a_fex")
+
+;; FSCA
+;; Group: FE
+;; Latency: 7
+(define_insn_reservation "sh4a_fsca" 7
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fsca"))
+  "ID_or,sh4a_fex*3")
diff --git a/gcc-4.9/gcc/config/sh/sh64.h b/gcc-4.9/gcc/config/sh/sh64.h
new file mode 100644
index 000000000..73d91caa3
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh64.h
@@ -0,0 +1,22 @@
+/* Definitions of target machine for GNU compiler for SuperH SH 5.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Alexandre Oliva <aoliva@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SH_ELF_WCHAR_TYPE
+#define SH_ELF_WCHAR_TYPE "int"
diff --git a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
new file mode 100644
index 000000000..313e5b5f4
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -0,0 +1,473 @@
+/* An SH specific RTL pass that tries to optimize clrt and sett insns.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "tree-pass.h"
+#include "target.h"
+
+#include <vector>
+#include <algorithm>
+
+/*
+This pass tries to eliminate unnecessary sett or clrt instructions in cases
+where the ccreg value is already known to be the same as the constant set
+would set it to.  This is done as follows:
+
+Check every BB's insn and see if it's a sett or clrt.
+Once a sett or clrt insn is hit, walk insns and predecessor basic blocks
+backwards from that insn and determine all possible ccreg values from all
+basic block paths.
+Insns that set the ccreg value in some way (simple set, clobber etc) are
+recorded.  Conditional branches where one edge leads to the sett / clrt insn
+are also recorded, since for each edge in the conditional branch the ccreg
+value is known constant.
+After collecting all possible ccreg values at the sett / clrt insn, check that
+all the values are the same.  If that value is the same as the sett / clrt
+insn would set the ccreg to, the sett / clrt insn can be eliminated.
+*/
+
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Helper functions
+
+#define log_msg(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0)
+
+#define log_insn(i)\
+  do { if (dump_file != NULL) print_rtl_single (dump_file, \
+						(const_rtx)i); } while (0)
+
+#define log_rtx(r)\
+  do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0)
+
+#define log_return(retval, ...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return retval; } while (0)
+
+#define log_return_void(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return; } while (0)
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// RTL pass class
+
+class sh_optimize_sett_clrt : public rtl_opt_pass
+{
+public:
+  sh_optimize_sett_clrt (gcc::context* ctx, const char* name);
+  virtual ~sh_optimize_sett_clrt (void);
+  virtual bool gate (void);
+  virtual unsigned int execute (void);
+
+private:
+  static const pass_data default_pass_data;
+
+  struct ccreg_value
+  {
+    // The insn at which the ccreg value was determined.
+    // Might be NULL_RTX if e.g. an unknown value is recorded for an
+    // empty basic block.
+    rtx insn;
+
+    // The basic block where the insn was discovered.
+    basic_block bb;
+
+    // The value of ccreg.  If NULL_RTX, the exact value is not known, but
+    // the ccreg is changed in some way (e.g. clobbered).
+    rtx value;
+  };
+
+  // Update the mode of the captured m_ccreg with the specified mode.
+  void update_ccreg_mode (machine_mode m);
+
+  // Given an insn pattern, check if it sets the ccreg to a constant value
+  // of either zero or STORE_FLAG_VALUE.  If so, return the value rtx,
+  // NULL_RTX otherwise.
+  rtx const_setcc_value (rtx pat) const;
+
+  // Given a start insn and its basic block, recursively determine all
+  // possible ccreg values in all basic block paths that can lead to the
+  // start insn.
+  void find_last_ccreg_values (rtx start_insn, basic_block bb,
+			       std::vector<ccreg_value>& values_out,
+			       std::vector<basic_block>& prev_visited_bb) const;
+
+  // Given a cbranch insn, its basic block and another basic block, determine
+  // the value to which the ccreg will be set after jumping/falling through to
+  // the specified target basic block.
+  bool sh_cbranch_ccreg_value (rtx cbranch_insn,
+			       basic_block cbranch_insn_bb,
+			       basic_block branch_target_bb) const;
+
+  // Check whether all of the ccreg values are the same.
+  static bool all_ccreg_values_equal (const std::vector<ccreg_value>& values);
+
+  // Remove REG_DEAD and REG_UNUSED notes from insns of the specified
+  // ccreg_value entries.
+  void remove_ccreg_dead_unused_notes (std::vector<ccreg_value>& values) const;
+
+  // rtx of the ccreg that is obtained from the target.
+  rtx m_ccreg;
+};
+
+const pass_data sh_optimize_sett_clrt::default_pass_data =
+{
+  RTL_PASS,		// type
+  "",			// name (overwritten by the constructor)
+  OPTGROUP_NONE,	// optinfo_flags
+  true,			// has_gate
+  true,			// has_execute
+  TV_OPTIMIZE,		// tv_id
+  0,			// properties_required
+  0,			// properties_provided
+  0,			// properties_destroyed
+  0,			// todo_flags_start
+  0			// todo_flags_finish
+};
+
+sh_optimize_sett_clrt::sh_optimize_sett_clrt (gcc::context* ctx,
+					      const char* name)
+: rtl_opt_pass (default_pass_data, ctx),
+  m_ccreg (NULL_RTX)
+{
+  // Overwrite default name in pass_data base class.
+  this->name = name;
+}
+
+sh_optimize_sett_clrt::~sh_optimize_sett_clrt (void)
+{
+}
+
+bool
+sh_optimize_sett_clrt::gate (void)
+{
+  return optimize > 0;
+}
+
+unsigned int
+sh_optimize_sett_clrt::execute (void)
+{
+  unsigned int ccr0 = INVALID_REGNUM;
+  unsigned int ccr1 = INVALID_REGNUM;
+
+  if (targetm.fixed_condition_code_regs (&ccr0, &ccr1)
+      && ccr0 != INVALID_REGNUM)
+    {
+      // Initially create a reg rtx with VOIDmode.
+      // When the constant setcc is discovered, the mode is changed
+      // to the mode that is actually used by the target.
+      m_ccreg = gen_rtx_REG (VOIDmode, ccr0);
+    }
+
+  if (m_ccreg == NULL_RTX)
+    log_return (0, "no ccreg.\n\n");
+
+  if (STORE_FLAG_VALUE != 1)
+    log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE);
+
+  log_msg ("ccreg: ");
+  log_rtx (m_ccreg);
+  log_msg ("  STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE);
+
+  if (!df_regs_ever_live_p (ccr0))
+    log_return (0, "ccreg never live\n\n");
+
+  // Output vector for find_known_ccreg_values.
+  std::vector<ccreg_value> ccreg_values;
+  ccreg_values.reserve (32);
+
+  // Something for recording visited basic blocks to avoid infinite recursion.
+  std::vector<basic_block> visited_bbs;
+  visited_bbs.reserve (32);
+
+  // Look for insns that set the ccreg to a constant value and see if it can
+  // be optimized.
+  basic_block bb;
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
+    for (rtx next_i, i = NEXT_INSN (BB_HEAD (bb));
+	 i != NULL_RTX && i != BB_END (bb); i = next_i)
+      {
+	next_i = NEXT_INSN (i);
+
+	if (!INSN_P (i) || !NONDEBUG_INSN_P (i))
+	  continue;
+
+	rtx setcc_val = const_setcc_value (PATTERN (i));
+	if (setcc_val != NULL_RTX)
+	  {
+	    update_ccreg_mode (GET_MODE (XEXP (PATTERN (i), 0)));
+
+	    log_msg ("\n\nfound const setcc insn in [bb %d]: \n", bb->index);
+	    log_insn (i);
+	    log_msg ("\n");
+
+	    ccreg_values.clear ();
+	    visited_bbs.clear ();
+	    find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
+				    visited_bbs);
+
+	    log_msg ("number of ccreg values collected: %u\n",
+		     (unsigned int)ccreg_values.size ());
+
+	    // If all the collected values are equal and are equal to the
+	    // constant value of the setcc insn, the setcc insn can be
+	    // removed.
+	    if (all_ccreg_values_equal (ccreg_values)
+		&& rtx_equal_p (ccreg_values.front ().value, setcc_val))
+	      {
+		log_msg ("all values are ");
+		log_rtx (setcc_val);
+		log_msg ("\n");
+
+		delete_insn (i);
+		remove_ccreg_dead_unused_notes (ccreg_values);
+	      }
+	  }
+      }
+
+  log_return (0, "\n\n");
+}
+
+void
+sh_optimize_sett_clrt::update_ccreg_mode (machine_mode m)
+{
+  if (GET_MODE (m_ccreg) == m)
+    return;
+
+  PUT_MODE (m_ccreg, m);
+  log_msg ("updated ccreg mode: ");
+  log_rtx (m_ccreg);
+  log_msg ("\n\n");
+}
+
+rtx
+sh_optimize_sett_clrt::const_setcc_value (rtx pat) const
+{
+  if (GET_CODE (pat) == SET
+      && REG_P (XEXP (pat, 0)) && REGNO (XEXP (pat, 0)) == REGNO (m_ccreg)
+      && CONST_INT_P (XEXP (pat, 1))
+      && (INTVAL (XEXP (pat, 1)) == 0
+	  || INTVAL (XEXP (pat, 1)) == STORE_FLAG_VALUE))
+    return XEXP (pat, 1);
+  else
+    return NULL_RTX;
+}
+
+bool
+sh_optimize_sett_clrt
+::sh_cbranch_ccreg_value (rtx cbranch_insn, basic_block cbranch_insn_bb,
+			  basic_block branch_target_bb) const
+{
+  rtx pc_set_rtx = pc_set (cbranch_insn);
+  gcc_assert (pc_set_rtx != NULL_RTX);
+  gcc_assert (branch_target_bb != NULL);
+
+  rtx cond = XEXP (XEXP (pc_set_rtx, 1), 0);
+  bool branch_if;
+
+  if (GET_CODE (cond) == NE
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    branch_if = true;
+
+  else if (GET_CODE (cond) == EQ
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    branch_if = false;
+
+  else
+    gcc_unreachable ();
+
+  if (branch_target_bb == BRANCH_EDGE (cbranch_insn_bb)->dest)
+    return branch_if;
+  else if (branch_target_bb == FALLTHRU_EDGE (cbranch_insn_bb)->dest)
+    return !branch_if;
+  else
+    gcc_unreachable ();
+}
+
+void
+sh_optimize_sett_clrt
+::find_last_ccreg_values (rtx start_insn, basic_block bb,
+			  std::vector<ccreg_value>& values_out,
+			  std::vector<basic_block>& prev_visited_bb) const
+{
+  // FIXME: For larger CFGs this will unnecessarily re-visit basic blocks.
+  // Once a basic block has been visited, the result should be stored in
+  // some container so that it can be looked up quickly eliminating the
+  // re-visits.
+  log_msg ("looking for ccreg values in [bb %d] ", bb->index);
+  if (!prev_visited_bb.empty ())
+    log_msg ("(prev visited [bb %d])", prev_visited_bb.back ()->index);
+  log_msg ("\n");
+
+  for (rtx i = start_insn; i != NULL_RTX && i != PREV_INSN (BB_HEAD (bb));
+       i = PREV_INSN (i))
+    {
+      if (!INSN_P (i))
+	continue;
+
+      if (reg_set_p (m_ccreg, i))
+	{
+	  const_rtx set_rtx = set_of (m_ccreg, i);
+
+	  ccreg_value v;
+	  v.insn = i;
+	  v.bb = bb;
+	  v.value = set_rtx != NULL_RTX && GET_CODE (set_rtx) == SET
+		    ? XEXP (set_rtx, 1)
+		    : NULL_RTX;
+
+	  log_msg ("found setcc in [bb %d] in insn:\n", bb->index);
+	  log_insn (i);
+	  log_msg ("\nccreg value: ");
+	  log_rtx (v.value);
+	  log_msg ("\n");
+
+	  values_out.push_back (v);
+	  return;
+	}
+
+      if (any_condjump_p (i) && onlyjump_p (i) && !prev_visited_bb.empty ())
+	{
+	  // For a conditional branch the ccreg value will be a known constant
+	  // of either 0 or STORE_FLAG_VALUE after branching/falling through
+	  // to one of the two successor BBs.  Record the value for the BB
+	  // where we came from.
+	  log_msg ("found cbranch in [bb %d]:\n", bb->index);
+	  log_insn (i);
+
+	  ccreg_value v;
+	  v.insn = i;
+	  v.bb = bb;
+	  v.value = GEN_INT (sh_cbranch_ccreg_value (i, bb,
+						     prev_visited_bb.back ()));
+
+	  log_msg ("    branches to [bb %d] with ccreg value ",
+		   prev_visited_bb.back ()->index);
+	  log_rtx (v.value);
+	  log_msg ("\n");
+
+	  values_out.push_back (v);
+	  return;
+	}
+    }
+
+  // If here, we've walked up all the insns of the current basic block
+  // and none of them seems to modify the ccreg.
+  // In this case, check the predecessor basic blocks.
+  unsigned int pred_bb_count = 0;
+
+  // If the current basic block is not in the stack of previously visited
+  // basic blocks yet, we can recursively check the predecessor basic blocks.
+  // Otherwise we have a loop in the CFG and recursing again will result in
+  // an infinite loop.
+  if (std::find (prev_visited_bb.rbegin (), prev_visited_bb.rend (), bb)
+      == prev_visited_bb.rend ())
+    {
+      prev_visited_bb.push_back (bb);
+
+      for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei);
+	   ei_next (&ei))
+	{
+	  basic_block pred_bb = ei_edge (ei)->src;
+	  pred_bb_count += 1;
+	  find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out,
+				  prev_visited_bb);
+	}
+
+      prev_visited_bb.pop_back ();
+    }
+  else
+    log_msg ("loop detected for [bb %d]\n", bb->index);
+
+  log_msg ("[bb %d] pred_bb_count = %u\n", bb->index, pred_bb_count);
+
+  if (pred_bb_count == 0)
+  {
+    // If we haven't checked a single predecessor basic block, the current
+    // basic block is probably a leaf block and we don't know the ccreg value.
+    log_msg ("unknown ccreg value for [bb %d]\n", bb->index);
+
+    ccreg_value v;
+    v.insn = BB_END (bb);
+    v.bb = bb;
+    v.value = NULL_RTX;
+
+    values_out.push_back (v);
+  }
+}
+
+bool
+sh_optimize_sett_clrt
+::all_ccreg_values_equal (const std::vector<ccreg_value>& values)
+{
+  if (values.empty ())
+    return false;
+
+  rtx last_value = values.front ().value;
+
+  // If the ccreg is modified in the insn but the exact value is not known
+  // the value rtx might be null.
+  if (last_value == NULL_RTX)
+    return false;
+
+  for (std::vector<ccreg_value>::const_iterator i = values.begin ();
+       i != values.end (); ++i)
+    if (i->value == NULL_RTX || !rtx_equal_p (last_value, i->value))
+      return false;
+
+  return true;
+}
+
+void
+sh_optimize_sett_clrt
+::remove_ccreg_dead_unused_notes (std::vector<ccreg_value>& values) const
+{
+  for (std::vector<ccreg_value>::iterator i = values.begin ();
+       i != values.end (); ++i)
+    {
+      if (i->insn == NULL_RTX)
+	continue;
+
+      rtx n = find_regno_note (i->insn, REG_DEAD, REGNO (m_ccreg));
+      if (n != NULL_RTX)
+	remove_note (i->insn, n);
+
+      n = find_regno_note (i->insn, REG_UNUSED, REGNO (m_ccreg));
+      if (n != NULL_RTX)
+	remove_note (i->insn, n);
+    }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// This allows instantiating the pass somewhere else without having to pull
+// in a header file.
+opt_pass*
+make_pass_sh_optimize_sett_clrt (gcc::context* ctx, const char* name)
+{
+  return new sh_optimize_sett_clrt (ctx, name);
+}
diff --git a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
new file mode 100644
index 000000000..e73604022
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
@@ -0,0 +1,1509 @@
+/* An SH specific RTL pass that tries to combine comparisons and redundant
+   condition code register stores across multiple basic blocks.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "tree-pass.h"
+#include "target.h"
+#include "expr.h"
+
+#include <algorithm>
+#include <list>
+#include <vector>
+
+/*
+This pass tries to optimize for example this:
+	mov.l	@(4,r4),r1
+	tst	r1,r1
+	movt	r1
+	tst	r1,r1
+	bt/s	.L5
+
+into something simpler:
+	mov.l	@(4,r4),r1
+	tst	r1,r1
+	bf/s	.L5
+
+Such sequences can be identified by looking for conditional branches and
+checking whether the ccreg is set before the conditional branch
+by testing another register for != 0, which was set by a ccreg store.
+This can be optimized by eliminating the redundant comparison and
+inverting the branch condition.  There can be multiple comparisons in
+different basic blocks that all end up in the redunant test insn before the
+conditional branch.  Some example RTL ...
+
+Example 1)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 0)))
+(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 50) (pc)))
+
+In [bb 4] elimination of the comparison would require inversion of the branch
+condition and compensation of other BBs.
+Instead an inverting reg-move can be used:
+
+[bb 3]
+(set (reg:SI 167) (reg:SI 173))
+-> bb 5
+
+[BB 4]
+(set (reg:SI 167) (not:SI (reg:SI 177)))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)))
+                        (label_ref:SI 50) (pc)))
+
+
+Example 2)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (gt:SI (reg:SI 177) (reg:SI 179)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+The common comparison is factored out and the branch condition is inverted:
+
+[bb 3]
+(set (reg:SI 167) (reg:SI 173))
+(set (reg:SI 200) (reg:SI 175))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 167) (reg:SI 177))
+(set (reg:SI 200) (reg:SI 179))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (gt:SI (reg:SI 167) (reg:SI 200)))
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+
+Example 3)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+The T bit lifetime is extended and the branch condition is inverted:
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177)))
+-> bb 5
+
+[bb 5]
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+
+Example 4)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5)))
+(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 50) (pc)))
+
+In this case the comparisons are the same and could be combined, but the
+branch condition is different for [bb 3] and [bb 5].  Since the comparison
+is not a zero comparison, we can't negate one of the operands.  The best thing
+we can do here is to eliminate the comparison before the cbranch and invert
+the ccreg in one of the BBs.  On SH2A this will utilize the 'nott' instruction.
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5)))
+(set (reg:SI 147 t) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 5]
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))  // inverted
+                        (label_ref:SI 50) (pc)))
+
+
+In order to handle cases such as above the RTL pass does the following:
+
+- Find the ccreg sets (comparisons) and ccreg stores
+  (inverting and non-inverting) in all related BBs.
+
+- If the comparison types in the BBs are all the same, try to combine the
+  comparisons in the BBs and replace the zero comparison before the cbranch
+  with the common comparison.
+
+    - If the cstores are the same, move the comparison before the cbranch
+      and replace the comparisons in the BBs with reg-reg copies to get the
+      operands in place (create new pseudo regs).
+
+    - If the cstores differ, try to apply the special case
+        (eq (reg) (const_int 0)) -> inverted = (not (reg)).
+      for the subordinate cstore types and eliminate the dominating ones.
+
+- If the comparison types in the BBs are not the same, or the first approach
+  doesn't work out for some reason, try to eliminate the comparison before the
+  cbranch by extending the lifetime of the ccreg by leaving the individual
+  comparisons but eliminating the cstores.
+  If the cstores are all the same this is straight forward.
+  If they're not, try to reverse the ccreg for the subordinate cstore type
+  and eliminate the dominating one.
+*/
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Helper functions
+
+#define log_msg(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0)
+
+#define log_insn(i)\
+  do { if (dump_file != NULL) print_rtl_single (dump_file, \
+						(const_rtx)i); } while (0)
+
+#define log_rtx(r)\
+  do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0)
+
+#define log_return(retval, ...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return retval; } while (0)
+
+#define log_return_void(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return; } while (0)
+
+struct set_of_reg
+{
+  // The insn where the search stopped or NULL_RTX.
+  rtx insn;
+
+  // The set rtx of the specified reg if found, NULL_RTX otherwise.
+  // Notice that the set rtx can also be in a parallel.
+  const_rtx set_rtx;
+
+  // The set source operand rtx if found, NULL_RTX otherwise.
+  rtx
+  set_src (void) const
+  {
+    return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 1);
+  }
+
+  // The set destination operand rtx if found, NULL_RTX otherwise.
+  rtx
+  set_dst (void) const
+  {
+    return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 0);
+  }
+
+  bool
+  empty (void) const
+  {
+    return insn == NULL_RTX || set_rtx == NULL_RTX;
+  }
+};
+
+// Given a reg rtx and a start insn find the insn (in the same basic block)
+// that sets the reg.
+static set_of_reg
+find_set_of_reg_bb (rtx reg, rtx insn)
+{
+  set_of_reg result = { insn, NULL_RTX };
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (result.insn = insn; result.insn != NULL_RTX;
+       result.insn = prev_nonnote_insn_bb (result.insn))
+    {
+      if (BARRIER_P (result.insn))
+	return result;
+      if (!NONJUMP_INSN_P (result.insn))
+	continue;
+      if (reg_set_p (reg, result.insn))
+	{
+	  result.set_rtx = set_of (reg, result.insn);
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    result.set_rtx = NULL_RTX;
+	  return result;
+	}
+    }
+
+  return result;
+}
+
+static bool
+reg_dead_after_insn (const_rtx reg, const_rtx insn)
+{
+  return find_regno_note (insn, REG_DEAD, REGNO (reg)) != NULL_RTX;
+}
+
+static bool
+reg_unused_after_insn (const_rtx reg, const_rtx insn)
+{
+  return find_regno_note (insn, REG_UNUSED, REGNO (reg)) != NULL_RTX;
+}
+
+// Check whether the two specified basic blocks are adjacent, i.e. there's no
+// other basic block in between them.
+static bool
+is_adjacent_bb (basic_block a, basic_block b)
+{
+  basic_block bb0[] = { a, b };
+  basic_block bb1[] = { b, a };
+
+  for (int i = 0; i < 2; ++i)
+    for (edge_iterator ei = ei_start (bb0[i]->succs);
+	 !ei_end_p (ei); ei_next (&ei))
+      if (ei_edge (ei)->dest == bb1[i])
+	return true;
+
+  return false;
+}
+
+// Internal function of trace_reg_uses.
+static void
+trace_reg_uses_1 (rtx reg, rtx start_insn, basic_block bb, int& count,
+		  std::vector<basic_block>& visited_bb, rtx abort_at_insn)
+{
+  if (bb == NULL)
+    return;
+
+  if (std::find (visited_bb.begin (), visited_bb.end (), bb)
+      != visited_bb.end ())
+    log_return_void ("[bb %d] already visited\n", bb->index);
+
+  visited_bb.push_back (bb);
+
+  if (BB_END (bb) == NULL_RTX)
+    log_return_void ("[bb %d] BB_END is null\n", bb->index);
+
+  if (start_insn == NULL_RTX)
+    log_return_void ("[bb %d] start_insn is null\n", bb->index);
+
+  rtx end_insn = NEXT_INSN (BB_END (bb));
+  if (end_insn == NULL_RTX)
+    log_return_void ("[bb %d] end_insn is null\n", bb->index);
+
+  for (rtx i = NEXT_INSN (start_insn); i != end_insn; i = NEXT_INSN (i))
+    {
+      if (INSN_P (i))
+	{
+	  if (NONDEBUG_INSN_P (i)
+	      && (reg_overlap_mentioned_p (reg, PATTERN (i))
+		  || (CALL_P (i) && find_reg_fusage (i, USE, reg))))
+	    {
+	      log_msg ("found use in [bb %d] at insn:\n", bb->index);
+	      log_insn (i);
+	      log_msg ("\n");
+	      count += 1;
+	    }
+
+	  // Stop following this BB if the reg is set or dies along the way.
+	  if (reg_set_p (reg, i) || reg_dead_after_insn (reg, i))
+	    return;
+	}
+
+      if (abort_at_insn != NULL_RTX && abort_at_insn == i)
+	return;
+    }
+
+  for (edge_iterator ei = ei_start (bb->succs); !ei_end_p (ei); ei_next (&ei))
+    {
+      basic_block succ_bb = ei_edge (ei)->dest;
+      trace_reg_uses_1 (reg, BB_HEAD (succ_bb), succ_bb, count, visited_bb,
+			abort_at_insn);
+    }
+}
+
+// Trace uses of the specified reg in all basic blocks that are reachable from
+// the specified insn.  If 'abort_at_insn' is not null, abort the trace at
+// that insn.  If the insn 'abort_at_insn' uses the specified reg, it is also
+// counted.
+static int
+trace_reg_uses (rtx reg, rtx start_insn, rtx abort_at_insn)
+{
+  log_msg ("\ntrace_reg_uses\nreg = ");
+  log_rtx (reg);
+  log_msg ("\nstart_insn = ");
+  log_insn (start_insn);
+
+  int count = 0;
+  std::vector<basic_block> visited_bb;
+  visited_bb.reserve (32);
+
+  trace_reg_uses_1 (reg, start_insn, BLOCK_FOR_INSN (start_insn),
+		    count, visited_bb, abort_at_insn);
+  return count;
+}
+
+// FIXME: Remove dependency on SH predicate function somehow.
+extern int t_reg_operand (rtx, machine_mode);
+extern int negt_reg_operand (rtx, machine_mode);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// RTL pass class
+
+class sh_treg_combine : public rtl_opt_pass
+{
+public:
+  sh_treg_combine (gcc::context* ctx, bool split_insns, const char* name);
+  virtual ~sh_treg_combine (void);
+  virtual bool gate (void);
+  virtual unsigned int execute (void);
+
+private:
+  // Type of ccreg store that is supported.
+  enum cstore_type_t
+  {
+    cstore_normal = 0,
+    cstore_inverted = 1,
+    cstore_unknown = -1
+  };
+
+  // Type of branch condition that is supported.
+  enum branch_condition_type_t
+  {
+    branch_if_true = 1,
+    branch_if_false = 0,
+    unknown_branch_condition = -1
+  };
+
+  // For each basic block there can be a trace entry which consists of an
+  // insn that sets the ccreg (usually a comparison) and a ccreg store.
+  struct bb_entry
+  {
+    basic_block bb;
+    set_of_reg setcc;
+    set_of_reg cstore;
+    cstore_type_t cstore_type;
+    std::vector<set_of_reg> cstore_reg_reg_copies;
+
+    bb_entry (basic_block b)
+    : bb (b), setcc (), cstore (), cstore_type (cstore_unknown) { }
+
+    rtx comparison_rtx (void) const { return setcc.set_src (); }
+  };
+
+  // A ccreg trace for a conditional branch.
+  struct cbranch_trace
+  {
+    rtx cbranch_insn;
+    branch_condition_type_t cbranch_type;
+
+    // The comparison against zero right before the conditional branch.
+    set_of_reg setcc;
+
+    // All BBs that are related to the cbranch.  The last BB in the list is
+    // the BB of the cbranch itself and might be empty.
+    std::list<bb_entry> bb_entries;
+
+    cbranch_trace (rtx insn)
+    : cbranch_insn (insn),
+      cbranch_type (unknown_branch_condition),
+      setcc ()
+    {
+    }
+
+    basic_block bb (void) const { return BLOCK_FOR_INSN (cbranch_insn); }
+
+    rtx
+    branch_condition_rtx (void) const
+    {
+      rtx x = pc_set (cbranch_insn);
+      return x == NULL_RTX ? NULL_RTX : XEXP (XEXP (x, 1), 0);
+    }
+
+    bool
+    can_invert_condition (void) const
+    {
+      // The branch condition can be inverted safely only if the condition
+      // reg is dead after the cbranch.
+      return reg_dead_after_insn (XEXP (branch_condition_rtx (), 0),
+				  cbranch_insn);
+    }
+  };
+
+  static const pass_data default_pass_data;
+
+  // Tells whether modified or newly added insns are to be split at the end
+  // of the pass.
+  const bool m_split_insns;
+
+  // rtx of the ccreg that is obtained from the target.
+  rtx m_ccreg;
+
+  // Newly added or modified insns.
+  std::vector<rtx> m_touched_insns;
+
+  // Given an rtx determine whether it's a comparison with a constant zero.
+  static bool is_cmp_eq_zero (const_rtx i);
+
+  // Update the stored mode of the ccreg from the given branch condition rtx.
+  void update_ccreg_mode (const_rtx cond);
+
+  // Given an rtx, figure out the branch condition, assuming that it is
+  // in canonical form:
+  //   (ne (reg) (const_int 0))
+  //   (eq (reg) (const_int 0))
+  branch_condition_type_t branch_condition_type (const_rtx cond) const;
+
+  // Return true if the specified rtx is either a normal ccreg or
+  // a negated form of the ccreg.
+  bool is_normal_ccreg (const_rtx x) const;
+  bool is_inverted_ccreg (const_rtx x) const;
+
+  // Given a reg rtx and a start insn rtx, try to find the insn in the same
+  // basic block that sets the specified reg.
+  // Return how the search ended and the insn where it stopped or NULL_RTX.
+  enum record_return_t
+  {
+    set_found,
+    set_not_found,
+    other_set_found
+  };
+  record_return_t record_set_of_reg (rtx reg, rtx start_insn, bb_entry& e);
+
+  // Tells whether the cbranch insn of the specified bb_entry can be removed
+  // safely without triggering any side effects.
+  bool can_remove_cstore (const bb_entry& e,
+			  const cbranch_trace& trace) const;
+
+  // Tells whether the setcc insn of the specified bb_entry can be removed
+  // safely without triggering any side effects.
+  bool can_remove_comparison (const bb_entry& e,
+			      const cbranch_trace& trace) const;
+
+  // Tells whether the two specified comparison rtx can be combined into a
+  // single comparison.
+  bool can_combine_comparisons (const_rtx x, const_rtx y) const;
+
+  // Tells whether the ccreg usage can be extended from the bb_entry on until
+  // the final cbranch of the trace.
+  bool can_extend_ccreg_usage (const bb_entry& e,
+			       const cbranch_trace& trace) const;
+
+  // Create an insn rtx that is a negating reg move (not operation).
+  rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const;
+
+  // Create an insn rtx that inverts the ccreg.
+  rtx make_inv_ccreg_insn (void) const;
+
+  // Adds the specified insn to the set of modified or newly added insns that
+  // might need splitting at the end of the pass.
+  rtx touched_insn (rtx i);
+
+  // Try to invert the branch condition of the specified trace.
+  bool try_invert_branch_condition (cbranch_trace& trace);
+
+  // Try to optimize a cbranch trace by combining comparisons in BBs and
+  // eliminate the cstores.
+  bool try_combine_comparisons (cbranch_trace& trace,
+				int cstore_count, int inv_cstore_count,
+				cstore_type_t dominating_cstore);
+
+  // Try to optimize a cbranch trace by eliminating the cstores in BBs only.
+  bool try_eliminate_cstores (cbranch_trace& trace,
+			      int cstore_count, int inv_cstore_count,
+			      cstore_type_t dominating_cstore);
+
+  // Given a branch insn, try to optimize its branch condition.
+  // If any insns are modified or added they are added to 'm_touched_insns'.
+  void try_optimize_cbranch (rtx i);
+};
+
+
+const pass_data sh_treg_combine::default_pass_data =
+{
+  RTL_PASS,		// type
+  "",			// name (overwritten by the constructor)
+  OPTGROUP_NONE,	// optinfo_flags
+  true,			// has_gate
+  true,			// has_execute
+  TV_OPTIMIZE,		// tv_id
+  0,			// properties_required
+  0,			// properties_provided
+  0,			// properties_destroyed
+  0,			// todo_flags_start
+  TODO_df_finish | TODO_df_verify	// todo_flags_finish
+  | TODO_verify_rtl_sharing
+};
+
+sh_treg_combine::sh_treg_combine (gcc::context* ctx, bool split_insns,
+				  const char* name)
+: rtl_opt_pass (default_pass_data, ctx),
+  m_split_insns (split_insns),
+  m_ccreg (NULL_RTX)
+{
+  // Overwrite default name in pass_data base class. 
+  this->name = name;
+}
+
+sh_treg_combine::~sh_treg_combine (void)
+{
+}
+
+void sh_treg_combine::update_ccreg_mode (const_rtx cond)
+{
+  if (REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) != REGNO (m_ccreg))
+    return;
+
+  machine_mode m = GET_MODE (XEXP (cond, 0));
+  if (m == GET_MODE (m_ccreg))
+    return;
+
+  PUT_MODE (m_ccreg, m);
+  log_msg ("updated ccreg mode: ");
+  log_rtx (m_ccreg);
+  log_msg ("\n");
+}
+
+bool
+sh_treg_combine::is_cmp_eq_zero (const_rtx i)
+{
+  return i != NULL_RTX && GET_CODE (i) == EQ
+	 && REG_P (XEXP (i, 0)) && XEXP (i, 1) == const0_rtx;
+}
+
+sh_treg_combine::branch_condition_type_t
+sh_treg_combine::branch_condition_type (const_rtx cond) const
+{
+  if (cond == NULL_RTX)
+    return unknown_branch_condition;
+
+  if (GET_CODE (cond) == NE
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    return branch_if_true;
+
+  else if (GET_CODE (cond) == EQ
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    return branch_if_false;
+
+  else
+    return unknown_branch_condition;
+}
+
+bool
+sh_treg_combine::is_normal_ccreg (const_rtx x) const
+{
+  return t_reg_operand (const_cast<rtx> (x), VOIDmode);
+}
+
+bool
+sh_treg_combine::is_inverted_ccreg (const_rtx x) const
+{
+  return negt_reg_operand (const_cast<rtx> (x), VOIDmode);
+}
+
+sh_treg_combine::record_return_t
+sh_treg_combine::record_set_of_reg (rtx reg, rtx start_insn,
+				    bb_entry& new_entry)
+{
+  log_msg ("\n[bb %d]\n", new_entry.bb->index);
+
+  if (start_insn == NULL_RTX)
+    log_return (set_not_found, "set of reg not found.  empty BB?\n");
+
+  new_entry.cstore_type = cstore_unknown;
+
+  for (rtx i = start_insn; i != NULL_RTX; )
+    {
+      new_entry.cstore = find_set_of_reg_bb (reg, i);
+
+      if (new_entry.cstore.set_src () == NULL_RTX)
+	log_return (set_not_found, "set of reg not found (cstore)\n");
+
+      log_insn (new_entry.cstore.insn);
+      log_msg ("\n");
+
+      if (is_normal_ccreg (new_entry.cstore.set_src ()))
+	{
+	  log_msg ("normal condition store\n");
+	  new_entry.cstore_type = cstore_normal;
+	}
+      else if (is_inverted_ccreg (new_entry.cstore.set_src ()))
+	{
+	  log_msg ("inverted condition store\n");
+	  new_entry.cstore_type = cstore_inverted;
+	}
+      else if (REG_P (new_entry.cstore.set_src ()))
+	{
+	  // If it's a reg-reg copy follow the copied reg.
+	  new_entry.cstore_reg_reg_copies.push_back (new_entry.cstore);
+	  reg = new_entry.cstore.set_src ();
+	  i = new_entry.cstore.insn;
+
+	  log_msg ("reg-reg copy.  tracing ");
+	  log_rtx (reg);
+	  log_msg ("\n");
+	  continue;
+	}
+      else
+	log_return (other_set_found, "not a condition store\n");
+
+      gcc_assert (new_entry.cstore_type != cstore_unknown);
+
+      // Now see how the ccreg was set.
+      // For now it must be in the same BB.
+      log_msg ("tracing ccreg\n");
+      new_entry.setcc =
+	  find_set_of_reg_bb (m_ccreg,
+			      prev_nonnote_insn_bb (new_entry.cstore.insn));
+
+      // If cstore was found but setcc was not found continue anyway, as
+      // for some of the optimization types the setcc is irrelevant.
+      if (new_entry.setcc.set_src () == NULL_RTX)
+	log_return (set_found, "set of ccreg not found\n");
+
+      else if (GET_CODE (new_entry.setcc.set_rtx) == SET)
+	{
+	  // Also allow insns that set the ccreg, but are not true comparison
+	  // insns, as long as they are sets and not e.g. clobbers.
+	  log_insn (new_entry.setcc.insn);
+	  log_msg ("\n");
+	  return set_found;
+	}
+      else
+	// If cstore was found but setcc was not found continue anyway, as
+	// for some of the optimization types the setcc is irrelevant.
+ 	log_return (set_found, "unknown set of ccreg\n");
+    }
+
+  log_return (set_not_found, "set of reg not found\n");
+}
+
+bool
+sh_treg_combine::can_remove_cstore (const bb_entry& e,
+				    const cbranch_trace& trace) const
+{
+  if (volatile_insn_p (PATTERN (e.cstore.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it's volatile\n");
+    }
+
+  // On SH there are parallel patterns which store the ccreg multiple times.
+  // In this case it's not safe.
+  rtx cstore_pat = PATTERN (e.cstore.insn);
+  if (GET_CODE (cstore_pat) == PARALLEL)
+    for (int i = 0; i < XVECLEN (cstore_pat, 0); ++i)
+      {
+	rtx x = XVECEXP (cstore_pat, 0, i);
+
+	// It's the cstore set that we're referring to, ignore that one.
+	if (x != e.cstore.set_rtx
+	    && GET_CODE (x) == SET && reg_referenced_p (m_ccreg, x))
+	  {
+	    log_msg ("can't remove insn\n");
+	    log_insn (e.cstore.insn);
+	    log_return (false, "\nbecause it's a multiple ccreg store\n");
+	  }
+      }
+
+  // If the cstore sets the ccreg (e.g. negc) and the ccreg is used afterwards
+  // it's not safe.
+  if (modified_in_p (m_ccreg, e.cstore.insn)
+      && !(reg_dead_after_insn (m_ccreg, e.cstore.insn)
+	   || reg_unused_after_insn (m_ccreg, e.cstore.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it sets the ccreg\n");
+    }
+
+  // If the cstore destination reg is copied around check the reg-reg
+  // copies.  At every reg-reg copy the copied reg must be dead and there
+  // must not be a usage of the copied regs between the reg-reg copies.
+  // Otherwise we assume that the result of the cstore is used in some
+  // other way.
+  rtx prev_insn = e.cstore.insn;
+  for (std::vector<set_of_reg>::const_reverse_iterator i =
+	   e.cstore_reg_reg_copies.rbegin ();
+       i != e.cstore_reg_reg_copies.rend (); ++i)
+    {
+      if (!reg_dead_after_insn (i->set_src (), i->insn))
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (i->insn);
+	  log_return (false, "\nbecause source of reg-reg copy doesn't die\n");
+	}
+
+     if (reg_used_between_p (i->set_src (), prev_insn, i->insn))
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (i->insn);
+	  log_return (false, "\nbecause reg %d is otherwise used\n",
+			     REGNO (i->set_src ()));
+	}
+
+      prev_insn = i->insn;
+    }
+
+  // The cstore_dst reg must die after the test before the cbranch, otherwise
+  // it's not safe to remove the cstore.
+  // If the cstore destination reg is copied around check the effective
+  // destination reg of the cstore.  The reg-reg copies are recorded in
+  // reverse order, i.e. the most recent reg-reg copy in the insn list
+  // comes first.
+  rtx cstore_dst = e.cstore_reg_reg_copies.empty ()
+		   ? e.cstore.set_dst ()
+		   : e.cstore_reg_reg_copies.front ().set_dst ();
+
+  if (!reg_dead_after_insn (cstore_dst, trace.setcc.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause its effective target reg %d doesn't die "
+			 "after trace.setcc.insn\n", REGNO (cstore_dst));
+    }
+
+  // Also check that the cstore_dst reg is not used in other reachable code
+  // paths before it dies.
+  // Count the uses of the effective cstore_dst reg (i.e. the last known reg
+  // that holds the cstore value after reg-reg copies) in all BBs that can be
+  // reached from bb_entry's BB including the BB of the cstore insn.
+  // If we get more than 1 uses we assume that it's used somewhere else and is
+  // not safe to be removed.
+  int cstore_dst_use_count = trace_reg_uses (cstore_dst, e.cstore.insn,
+					     trace.setcc.insn);
+  if (cstore_dst_use_count > 1)
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause its effective target reg %d is used "
+			 "in %d other places\n", REGNO (cstore_dst),
+			  cstore_dst_use_count - 1);
+    }
+
+  return true;
+}
+
+bool
+sh_treg_combine::can_remove_comparison (const bb_entry& e,
+					const cbranch_trace&/* trace*/) const
+{
+  // If the ccreg is used otherwise between the comparison and the cstore,
+  // it's not safe.
+  if (reg_used_between_p (m_ccreg, e.setcc.insn, e.cstore.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.setcc.insn);
+      log_return (false, "\nbecause the ccreg is used otherwise\n");
+    }
+
+  if (!reg_dead_after_insn (m_ccreg, e.cstore.insn)
+      && !reg_unused_after_insn (m_ccreg, e.cstore.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause ccreg is not dead or unused afterwards\n");
+    }
+
+  // On SH there are also multiple set patterns that can be used for
+  // comparisons, such as "shll".  It's not safe to remove those.
+  if (multiple_sets (e.setcc.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it's a multiple set\n");
+    }
+
+  return true;
+}
+
+rtx
+sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
+{
+  // This will to go through expanders and may output multiple insns
+  // for multi-word regs.
+  start_sequence ();
+  expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0);
+  rtx i = get_insns ();
+  end_sequence ();
+  return i;
+}
+
+rtx
+sh_treg_combine::make_inv_ccreg_insn (void) const
+{
+  start_sequence ();
+  rtx i = emit_insn (gen_rtx_SET (VOIDmode, m_ccreg,
+				  gen_rtx_fmt_ee (XOR, GET_MODE (m_ccreg),
+						  m_ccreg, const1_rtx)));
+  end_sequence ();
+  return i;
+}
+
+rtx
+sh_treg_combine::touched_insn (rtx i)
+{
+  m_touched_insns.push_back (i);
+  return i;
+}
+
+bool
+sh_treg_combine::can_combine_comparisons (const_rtx x, const_rtx y) const
+{
+  if (GET_CODE (x) != GET_CODE (y))
+    return false;
+
+  rtx x_op0 = XEXP (x, 0);
+  rtx x_op1 = XEXP (x, 1);
+
+  rtx y_op0 = XEXP (y, 0);
+  rtx y_op1 = XEXP (y, 1);
+
+  if (!REG_P (x_op0) || !REG_P (y_op0))
+    return false;
+
+  if (GET_MODE (x_op0) != GET_MODE (y_op0))
+    return false;
+
+  // rtx_equal_p also compares the reg numbers which we do not care about
+  // here, as long as both are regs and the modes are the same.
+  if (REG_P (x_op1))
+    return REG_P (y_op1) && GET_MODE (x_op1) == GET_MODE (y_op1);
+
+  return rtx_equal_p (x_op1, y_op1);
+}
+
+bool
+sh_treg_combine::can_extend_ccreg_usage (const bb_entry& e,
+					 const cbranch_trace& trace) const
+{
+  // Check if the ccreg is not modified by other insins in the BB path until
+  // the final cbranch of the trace.
+  // Start checking after the cstore that follows the setcc, assuming that
+  // the cstore will be removed.
+
+  // The assumption here is that the specified bb_entry's BB is a direct
+  // predecessor of the trace.cbranch_insn's BB.
+  if (e.bb != trace.bb () && !is_adjacent_bb (e.bb, trace.bb ()))
+    log_return (false,
+	"can't extend ccreg usage -- [bb %d] and [bb %d] are not adjacent\n",
+	e.bb->index, trace.bb ()->index);
+
+  if (e.cstore.empty ())
+    log_return (false, "can't extend ccreg usage -- no cstore\n");
+
+  // The entry's cstore is in the same BB as the final cbranch.
+  if (e.bb == trace.bb ())
+    {
+      if (reg_set_between_p (m_ccreg, e.cstore.insn, trace.setcc.insn))
+	log_return (false,
+	    "can't extend ccreg usage -- it's modified between e.cstore.insn "
+	    "and trace.setcc.insn");
+      else
+	return true;
+    }
+
+  // The entry's cstore and the final cbranch are in different BBs.
+  if (reg_set_between_p (m_ccreg, e.cstore.insn, NEXT_INSN (BB_END (e.bb))))
+    log_return (false,
+	"can't extend ccreg usage -- it's modified in [bb %d]", e.bb->index);
+
+  if (reg_set_between_p (m_ccreg, PREV_INSN (BB_HEAD (trace.bb ())),
+			 trace.setcc.insn))
+    log_return (false,
+	"can't extend ccreg usage -- it's modified in [bb %d]",
+	trace.bb ()->index);
+
+  return true;
+}
+
+bool
+sh_treg_combine::try_invert_branch_condition (cbranch_trace& trace)
+{
+  log_msg ("inverting branch condition\n");
+
+  if (!invert_jump_1 (trace.cbranch_insn, JUMP_LABEL (trace.cbranch_insn)))
+    log_return (false, "invert_jump_1 failed\n");
+
+  if (verify_changes (num_validated_changes ()))
+    confirm_change_group ();
+  else
+    log_return (false, "verify_changed failed\n");
+
+  touched_insn (trace.cbranch_insn);
+  return true;
+}
+
+bool
+sh_treg_combine::try_combine_comparisons (cbranch_trace& trace,
+					  int cstore_count,
+					  int inv_cstore_count,
+					  cstore_type_t dominating_cstore)
+{
+  log_msg ("\ntry_combine_comparisons\n");
+
+  // This function will always try to create new pseudos.
+  if (!can_create_pseudo_p ())
+    log_return (false, "can't create pseudos\n");
+
+  // Check that all ccset insns are comparisons and all comparison types in
+  // all BBs are the same and could be combined into one single comparison.
+  rtx comp = NULL_RTX;
+  rtx comp_insn = NULL_RTX;
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      int i_empty_count = i->setcc.empty () + i->cstore.empty ();
+
+      // A completly empty entry is OK (could be the BB of the cbranch).
+      if (i_empty_count == 2)
+	continue;
+
+      // Otherwise we need both, the setcc and the cstore.
+      if (i_empty_count != 0)
+	log_return (false, "bb entry is not a setcc cstore pair\n");
+
+      rtx other_comp = i->comparison_rtx ();
+
+      if (!COMPARISON_P (other_comp))
+	{
+	  log_msg ("setcc is not a comparison:\n");
+	  log_rtx (other_comp);
+	  log_return (false, "\n");
+	}
+
+      if (comp_insn == NULL_RTX)
+	{
+	  comp = other_comp;
+	  comp_insn = i->setcc.insn;
+	}
+      else if (!can_combine_comparisons (comp, other_comp))
+	return false;
+
+      // The goal here is to eliminate all cstores and comparisons in the BBs.
+      // Thus check if every cstore can actually be removed safely.
+      if (!can_remove_cstore (*i, trace) || !can_remove_comparison (*i, trace))
+	return false;
+    }
+
+  // FIXME: The first operand of the comparison must be a simple reg.
+  // This effectively prohibits combining div0s comparisons such as
+  //    (lt:SI (xor:SI (reg:SI) (reg:SI)))
+  if (!REG_P (XEXP (comp, 0)))
+    {
+      log_msg ("comparison operand 0\n");
+      log_rtx (XEXP (comp, 0));
+      log_return (false, "\nis not a reg\n");
+    }
+
+  rtx comp_op0 = gen_reg_rtx (GET_MODE (XEXP (comp, 0)));
+  rtx comp_op1 = REG_P (XEXP (comp, 1))
+		 ? gen_reg_rtx (GET_MODE (XEXP (comp, 1)))
+		 : XEXP (comp, 1);
+
+  // If there are both, inverting and non-inverting cstores, they can only
+  // be eliminated if the comparison can be inverted.  We assume that the
+  // comparison insns that we find are already minimal and canonicalized.
+  // There is one special case though, where an integer comparison
+  //     (eq (reg) (const_int 0))
+  // can be inverted with a sequence
+  //     (eq (not (reg)) (const_int 0))
+  if (inv_cstore_count != 0 && cstore_count != 0)
+    {
+      if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX)
+	log_return (false, "make_not_reg_insn failed.\n");
+
+      for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+	   i != trace.bb_entries.end (); ++i)
+	{
+	  if (i->setcc.empty () || i->cstore.empty ())
+	    continue;
+
+	  if (i->cstore_type != dominating_cstore
+	      && !is_cmp_eq_zero (i->comparison_rtx ()))
+	    {
+	      log_msg ("can't invert comparison in insn\n");
+	      log_insn (i->setcc.insn);
+	      log_return (false,
+		"\nbecause it's not a (eq (reg) (const_int 0))\n");
+	    }
+	}
+    }
+
+  if (dominating_cstore == cstore_normal
+      && !try_invert_branch_condition (trace))
+    return false;
+
+  // Replace the test insn before the cbranch with the common comparison.
+  // Instead of creating a new insn from scratch we copy the common comparison
+  // pattern.  This simplifies handling parallel comparison patterns, such as
+  // FP comparisons on SH, which have an extra use on FPSCR.
+  log_msg ("installing common comparison in [bb %d]\n", trace.bb ()->index);
+
+  rtx common_comp_pat = copy_rtx (PATTERN (comp_insn));
+  rtx common_comp = const_cast<rtx> (set_of (m_ccreg, common_comp_pat));
+
+  gcc_assert (common_comp != NULL_RTX);
+
+  XEXP (XEXP (common_comp, 1), 0) = comp_op0;
+  XEXP (XEXP (common_comp, 1), 1) = comp_op1;
+
+  log_rtx (common_comp_pat);
+  log_msg ("\n");
+
+  rtx common_comp_insn = touched_insn (emit_insn_after (common_comp_pat,
+							trace.setcc.insn));
+
+  if (REG_P (comp_op0))
+    add_reg_note (common_comp_insn, REG_DEAD, copy_rtx (comp_op0));
+  if (REG_P (comp_op1))
+    add_reg_note (common_comp_insn, REG_DEAD, copy_rtx (comp_op1));
+
+  delete_insn (trace.setcc.insn);
+
+  // Replace comparison and cstore insns with reg-reg moves in all BBs.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->setcc.empty () || i->cstore.empty ())
+	continue;
+
+      rtx i_comp_op0 = XEXP (i->comparison_rtx (), 0);
+      rtx i_comp_op1 = XEXP (i->comparison_rtx (), 1);
+
+      if (i->cstore_type == dominating_cstore)
+	{
+	  log_msg ("replacing comparison and cstore with reg move "
+		   "in [bb %d]\n", i->bb->index);
+
+	  rtx new_i = touched_insn (
+		emit_insn_after (gen_move_insn (comp_op0, i_comp_op0),
+				 i->setcc.insn));
+
+	  if (REG_P (i_comp_op0)
+	      && reg_dead_after_insn (i_comp_op0, i->setcc.insn))
+	    add_reg_note (new_i, REG_DEAD, copy_rtx (i_comp_op0));
+
+	  // If the second operand is a reg, have to emit a move insn.
+	  // Otherwise assume it's a const_int and just reference it.
+	  if (REG_P (comp_op1))
+	    {
+	      new_i = touched_insn (
+		  emit_insn_after (gen_move_insn (comp_op1, i_comp_op1),
+				   i->setcc.insn));
+
+	      if (reg_dead_after_insn (i_comp_op1, i->setcc.insn))
+		add_reg_note (new_i, REG_DEAD, copy_rtx (i_comp_op1));
+	    }
+	}
+      else
+	{
+	  log_msg ("replacing comparison and cstore with inverting reg move "
+		   "in [bb %d]\n", i->bb->index);
+
+	  rtx new_i = make_not_reg_insn (comp_op0, i_comp_op0);
+	  if (REG_P (i_comp_op0)
+	      && reg_dead_after_insn (i_comp_op0, i->setcc.insn))
+	    add_reg_note (new_i, REG_DEAD, copy_rtx (i_comp_op0));
+
+	  touched_insn (emit_insn_after (new_i, i->setcc.insn));
+	}
+
+      delete_insn (i->cstore.insn);
+      delete_insn (i->setcc.insn);
+    }
+
+  return true;
+}
+
+bool
+sh_treg_combine::try_eliminate_cstores (cbranch_trace& trace,
+					int cstore_count, int inv_cstore_count,
+					cstore_type_t dominating_cstore)
+{
+  log_msg ("\ntry_eliminate_cstores\n");
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      // A completly empty entry is OK (could be the BB of the cbranch).
+      if (i->setcc.empty () && i->cstore.empty ())
+	continue;
+
+      // We're going to eliminate cstores, but for that they have to be
+      // there.  We don't care about the setcc in this case.
+      if (i->cstore.empty ())
+	log_return (false, "bb entry cstore empty -- aborting\n");
+
+      // The goal here is to eliminate all cstores in the BBs and extend the
+      // ccreg usage.
+      if (!can_extend_ccreg_usage (*i, trace))
+	return false;
+
+      // If the cstore can't be removed we can keep it around as long as
+      // it doesn't modify the ccreg.
+      if (!can_remove_cstore (*i, trace)
+	  && modified_in_p (m_ccreg, i->cstore.insn))
+	log_return (false, "cstore sets ccreg -- aborting\n");
+    }
+
+  // If there are both, inverting and non-inverting cstores, we'll have to
+  // invert the ccreg as a replacement for one of them.
+  if (cstore_count != 0 && inv_cstore_count != 0)
+    {
+      rtx i = make_inv_ccreg_insn ();
+      if (recog_memoized (i) < 0)
+	{
+	  log_msg ("failed to match ccreg inversion insn:\n");
+	  log_rtx (PATTERN (i));
+	  log_return (false, "\naborting\n");
+	}
+    }
+
+  if (dominating_cstore == cstore_normal
+      && !try_invert_branch_condition (trace))
+    return false;
+
+  // Eliminate cstores in all BBs.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->cstore.empty ())
+	continue;
+
+      if (i->cstore_type == dominating_cstore)
+	log_msg ("removing cstore in [bb %d]\n", i->bb->index);
+      else
+	{
+	  log_msg ("replacing cstore with ccreg inversion in [bb %d]\n",
+		   i->bb->index);
+
+	  touched_insn (
+	    emit_insn_after (make_inv_ccreg_insn (), i->cstore.insn));
+	}
+
+      if (can_remove_cstore (*i, trace))
+	delete_insn (i->cstore.insn);
+    }
+
+  log_msg ("removing test insn before cbranch\n");
+  delete_insn (trace.setcc.insn);
+  return true;
+}
+
+void
+sh_treg_combine::try_optimize_cbranch (rtx insn)
+{
+  cbranch_trace trace (insn);
+
+  log_msg ("\n\n--------------------------------------\n");
+  log_msg ("found cbranch insn in [bb %d]:\n", trace.bb ()->index);
+  log_insn (insn);
+
+  trace.cbranch_type = branch_condition_type (trace.branch_condition_rtx ());
+
+  if (trace.cbranch_type == branch_if_true)
+    log_msg ("condition: branch if true\n");
+  else if (trace.cbranch_type == branch_if_false)
+    log_msg ("condition: branch if false\n");
+  else
+    {
+      log_msg ("unknown branch condition\n");
+      log_rtx (trace.branch_condition_rtx ());
+      log_return_void ("\n");
+    }
+
+  update_ccreg_mode (trace.branch_condition_rtx ());
+
+  // Scan the insns backwards for an insn that sets the ccreg by testing a
+  // reg against zero like
+  //   (set (reg ccreg) (eq (reg) (const_int 0)))
+  // The testing insn could also be outside of the current basic block, but
+  // for now we limit the search to the current basic block.
+  trace.setcc = find_set_of_reg_bb (m_ccreg, prev_nonnote_insn_bb (insn));
+
+  if (!is_cmp_eq_zero (trace.setcc.set_src ()))
+    log_return_void ("could not find set of ccreg in current BB\n");
+
+  rtx trace_reg = XEXP (trace.setcc.set_src (), 0);
+
+  log_msg ("set of ccreg:\n");
+  log_insn (trace.setcc.insn);
+
+  // See if we can remove the trace.setcc insn safely.
+  if (reg_used_between_p (m_ccreg, trace.setcc.insn, trace.cbranch_insn))
+    log_return_void ("ccreg used between testing insn and branch insn\n");
+
+  if (volatile_insn_p (PATTERN (trace.setcc.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (trace.setcc.insn);
+      log_return_void ("\nbecause it's volatile\n");
+    }
+
+  // Now that we have an insn which tests some reg and sets the condition
+  // reg before the conditional branch, try to figure out how that tested
+  // reg was formed, i.e. find all the insns that set the tested reg in
+  // some way.
+  // The tested reg might be set in multiple basic blocks so we need to
+  // check all basic blocks which can reach this current basic block.
+  // If the set of reg is an inverting or non-inverting store of the condition
+  // register, check how the ccreg value was obtained.
+  log_msg ("\ntracing ");
+  log_rtx (trace_reg);
+  log_msg ("\n");
+
+
+  // First check the basic block where the conditional branch is in.
+  // If we find it here there's no point in checking other BBs.
+  trace.bb_entries.push_front (bb_entry (trace.bb ()));
+
+  record_return_t res =
+      record_set_of_reg (trace_reg, prev_nonnote_insn_bb (trace.setcc.insn),
+			 trace.bb_entries.front ());
+
+  if (res == other_set_found)
+    log_return_void ("other set found - aborting trace\n");
+  else if (res == set_not_found)
+    {
+      // It seems the initial search in the BB of the conditional branch
+      // didn't find anything.  Now look in all predecessor BBs.
+      for (edge_iterator ei = ei_start (trace.bb ()->preds);
+	   !ei_end_p (ei); ei_next (&ei))
+	{
+	  edge e = ei_edge (ei);
+	  trace.bb_entries.push_front (bb_entry (e->src));
+
+	  res = record_set_of_reg (trace_reg, BB_END (e->src),
+				   trace.bb_entries.front ());
+	  if (res != set_found)
+	    log_return_void ("set not found - aborting trace\n");
+	}
+    }
+
+  if (dump_file != NULL)
+    {
+      log_msg ("\ncbranch trace summary:\n");
+      for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+	   i != trace.bb_entries.end (); ++i)
+	{
+	  log_msg ("\n[bb %d]\n", i->bb->index);
+	  if (!i->setcc.empty ())
+	    {
+	      log_rtx (i->setcc.set_rtx);
+	      log_msg ("\n");
+	    }
+	  if (!i->cstore.empty ())
+	    {
+	      log_rtx (i->cstore.set_rtx);
+	      log_msg ("\n");
+	    }
+
+	  for (std::vector<set_of_reg>::const_reverse_iterator j =
+		   i->cstore_reg_reg_copies.rbegin ();
+	       j != i->cstore_reg_reg_copies.rend (); ++j)
+	    {
+	      log_rtx (j->set_rtx);
+	      log_msg ("\n");
+	    }
+	}
+
+      log_rtx (trace.setcc.set_rtx);
+      log_msg ("\n");
+      log_rtx (PATTERN (trace.cbranch_insn));
+      log_msg ("\n");
+    }
+
+  // Check that we don't have any empty BBs.
+  // Only the BB with the cbranch may be empty.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    if (i->setcc.empty () && i->cstore.empty () && i->bb != trace.bb ())
+      log_return_void ("\n[bb %d] is empty - aborting.\n", i->bb->index);
+
+  // Determine the dominating cstore type
+  // FIXME: Try to take the probabilities of the BBs into account somehow.
+  int cstore_count = 0;
+  int inv_cstore_count = 0;
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->cstore_type == cstore_normal)
+	cstore_count += 1;
+      else if (i->cstore_type == cstore_inverted)
+	inv_cstore_count += 1;
+    }
+
+  log_msg ("cstore count = %d  inverted cstore count = %d\n",
+	   cstore_count, inv_cstore_count);
+
+  // This puts a priority on inverting cstores.
+  cstore_type_t dominating_cstore = inv_cstore_count >= cstore_count
+				    ? cstore_inverted
+				    : cstore_normal;
+
+  if (dominating_cstore == cstore_inverted)
+      log_msg ("will try to eliminate inverted cstore\n");
+  else if (dominating_cstore == cstore_normal)
+    {
+      log_msg ("will try to eliminate normal cstore\n");
+      if (!trace.can_invert_condition ())
+	log_return_void ("branch condition can't be inverted - aborting\n");
+    }
+  else
+    gcc_unreachable ();
+
+  if (try_combine_comparisons (trace, cstore_count, inv_cstore_count,
+			       dominating_cstore))
+    return;
+
+  try_eliminate_cstores (trace, cstore_count, inv_cstore_count,
+			 dominating_cstore);
+}
+
+bool
+sh_treg_combine::gate (void)
+{
+  return optimize > 0;
+}
+
+unsigned int
+sh_treg_combine::execute (void)
+{
+  unsigned int ccr0 = INVALID_REGNUM;
+  unsigned int ccr1 = INVALID_REGNUM;
+
+  if (targetm.fixed_condition_code_regs (&ccr0, &ccr1)
+      && ccr0 != INVALID_REGNUM)
+    {
+      // Initially create a reg rtx with VOIDmode.
+      // When the first conditional branch is discovered, the mode is changed
+      // to the mode that is actually used by the target.
+      m_ccreg = gen_rtx_REG (VOIDmode, ccr0);
+    }
+
+  if (m_ccreg == NULL_RTX)
+    log_return (0, "no ccreg.\n\n");
+
+  if (STORE_FLAG_VALUE != 1)
+    log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE);
+
+  log_msg ("ccreg: ");
+  log_rtx (m_ccreg);
+  log_msg ("  STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE);
+
+  // Look for basic blocks that end with a conditional branch and try to
+  // optimize them.
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx i = BB_END (bb);
+      if (any_condjump_p (i) && onlyjump_p (i))
+	try_optimize_cbranch (i);
+    }
+
+  log_msg ("\n\n");
+
+  // If new insns are created and this pass is executed after all insns
+  // have been split already, we must split the insns we've changed or added
+  // ourselves here.
+  // FIXME: Multi-word operations (which emit multiple insns) are not handled
+  // properly here, since only one insn will end up in 'm_touched_insns'.
+  // On SH this is not a problem though.
+  if (m_split_insns)
+    for (std::vector<rtx>::const_iterator i = m_touched_insns.begin ();
+	 i != m_touched_insns.end (); ++i)
+      {
+	log_msg ("trying to split insn:\n");
+	log_insn (*i);
+	log_msg ("\n");
+	try_split (PATTERN (*i), *i, 0);
+      }
+
+  m_touched_insns.clear ();
+  log_return (0, "\n\n");
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// This allows instantiating the pass somewhere else without having to pull
+// in a header file.
+opt_pass*
+make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
+			   const char* name)
+{
+  return new sh_treg_combine (ctx, split_insns, name);
+}
diff --git a/gcc-4.9/gcc/config/sh/shmedia.h b/gcc-4.9/gcc/config/sh/shmedia.h
new file mode 100644
index 000000000..3df996274
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/shmedia.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _SHMEDIA_H
+#define _SHMEDIA_H
+
+#include <ushmedia.h>
+#include <sshmedia.h>
+
+#endif
diff --git a/gcc-4.9/gcc/config/sh/shmedia.md b/gcc-4.9/gcc/config/sh/shmedia.md
new file mode 100644
index 000000000..9ca829d2d
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/shmedia.md
@@ -0,0 +1,94 @@
+;; DFA scheduling description for SH-5 SHmedia instructions.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This is just a conversion of the old model using define_function_unit.
+
+;; When executing SHmedia code, the SH-5 is a fairly straightforward
+;; single-issue machine.  It has four pipelines, the branch unit (br),
+;; the integer and multimedia unit (imu), the load/store unit (lsu), and
+;; the floating point unit (fpu).
+
+(define_automaton "sh5inst_pipe, sh5fpu_pipe")
+
+(define_cpu_unit "sh5issue" "sh5inst_pipe")
+
+(define_cpu_unit "sh5fds" "sh5fpu_pipe")
+
+;; Every instruction on SH-5 occupies the issue resource for at least one
+;; cycle.
+(define_insn_reservation "shmedia1" 1
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media"))
+  "sh5issue")
+
+;; Specify the various types of instruction which have latency > 1
+(define_insn_reservation "shmedia2" 2
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "mcmp_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia3" 3
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media"))
+  "sh5issue")
+;; but see sh_adjust_cost for mac_media exception.
+
+(define_insn_reservation "shmedia4" 4
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "fload_media,fmove_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia_d2mpy" 4
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "d2mpy_media"))
+  "sh5issue*2")
+
+(define_insn_reservation "shmedia5" 5
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "pt_media,ptabs_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia6" 6
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia_invalidate" 7
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "invalidate_line_media"))
+  "sh5issue*7")
+
+(define_insn_reservation "shmedia_dfmul" 9
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media"))
+  "sh5issue*4")
+
+(define_insn_reservation "shmedia_atrans" 10
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media"))
+  "sh5issue*5")
+
+;; Floating-point divide and square-root occupy an additional resource,
+;; which is not internally pipelined.  However, other instructions
+;; can continue to issue.
+(define_insn_reservation "shmedia_fdiv" 19
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media"))
+  "sh5issue+sh5fds,sh5fds*18")
+
+(define_insn_reservation "shmedia_dfdiv" 35
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media"))
+  "sh5issue+sh5fds,sh5fds*34")
diff --git a/gcc-4.9/gcc/config/sh/sshmedia.h b/gcc-4.9/gcc/config/sh/sshmedia.h
new file mode 100644
index 000000000..5cf164733
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sshmedia.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* sshmedia.h: Intrinsics corresponding to SHmedia instructions that
+   may only be executed in privileged mode.  */
+
+#ifndef _SSHMEDIA_H
+#define _SSHMEDIA_H
+
+#if __SHMEDIA__
+__inline__ static unsigned long long sh_media_GETCON (unsigned int k)
+  __attribute__((always_inline));
+
+__inline__ static
+unsigned long long
+sh_media_GETCON (unsigned int k)
+{
+  unsigned long long res;
+  __asm__ __volatile__ ("getcon	cr%1, %0" : "=r" (res) : "n" (k));
+  return res;
+}
+
+__inline__ static void sh_media_PUTCON (unsigned long long mm, unsigned int k)
+  __attribute__((always_inline));
+
+__inline__ static
+void
+sh_media_PUTCON (unsigned long long mm, unsigned int k)
+{
+  __asm__ __volatile__ ("putcon	%0, cr%1" : : "r" (mm), "n" (k));
+}
+
+__inline__ static
+unsigned long long
+sh_media_GETCFG (unsigned long long mm, int s)
+{
+  unsigned long long res;
+  __asm__ __volatile__ ("getcfg	%1, %2, %0" : "=r" (res) : "r" (mm), "n" (s));
+  return res;
+}
+
+__inline__ static
+void
+sh_media_PUTCFG (unsigned long long mm, int s, unsigned long long mw)
+{
+  __asm__ __volatile__ ("putcfg	%0, %1, %2" : : "r" (mm), "n" (s), "r" (mw));
+}
+
+__inline__ static
+void
+sh_media_SLEEP (void)
+{
+  __asm__ __volatile__ ("sleep");
+}
+#endif
+
+#endif
diff --git a/gcc-4.9/gcc/config/sh/superh.h b/gcc-4.9/gcc/config/sh/superh.h
new file mode 100644
index 000000000..98bc197a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/superh.h
@@ -0,0 +1,104 @@
+/* Definitions of target machine for gcc for Super-H using sh-superh-elf.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This header file is used when the vendor name is set to 'superh'.
+   config.gcc already configured the compiler for SH4 only and switched
+   the default endianess to little (although big endian is still available).
+   This file configures the spec file to the default board configuration
+   but in such a way that it can be overridden by a boardspecs file
+   (using the -specs= option). This file is expected to disable the
+   defaults and provide options --defsym _start and --defsym _stack
+   which are required by the SuperH configuration of GNU ld.
+
+   This file is intended to override sh.h.  */
+
+#ifndef _SUPERH_H
+#define _SUPERH_H
+#endif
+
+
+/* Override the linker spec strings to use the new emulation
+   The specstrings are concatenated as follows
+   LINK_EMUL_PREFIX.(''|'32'|'64'|LINK_DEFAULT_CPU_EMUL).SUBTARGET_LINK_EMUL_SUFFIX
+*/
+#undef LINK_EMUL_PREFIX
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+
+#define LINK_EMUL_PREFIX "superh"
+#define SUBTARGET_LINK_EMUL_SUFFIX ""
+
+/* Add the SUBTARGET_LINK_SPEC to add the board and runtime support and
+   change the endianness */
+#undef SUBTARGET_LINK_SPEC
+#if  TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml|!mb:-EL}%{mb:-EB}"
+#else
+#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml:-EL}%{mb|!ml:-EB}"
+#endif
+
+
+/* This is used by the link spec if the boardspecs file is not used
+   (for whatever reason).
+   If the boardspecs file overrides this then an alternative can be used. */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+{ "board_link", "--defsym _start=0x1000 --defsym _stack=0x30000" }, \
+{ "asruntime", "" }, \
+{ "cppruntime", "-D__GDB_SIM__" }, \
+{ "cc1runtime", "" }, \
+{ "ldruntime", "" }, \
+{ "libruntime", "-lc -lgloss" }
+
+
+/* Set the SUBTARGET_CPP_SPEC to define __EMBEDDED_CROSS__ which has an effect
+   on newlib and provide the runtime support */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC \
+"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-300*:-D__SH4_300__} %{m4-340:-D__SH4_340__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \
+%(cppruntime)"
+
+/* Override the SUBTARGET_ASM_SPEC to add the runtime support */
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400|m4-340:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)"
+
+/* Override the SUBTARGET_ASM_RELAX_SPEC so it doesn't interfere with the
+   runtime support by adding -isa=sh4 in the wrong place.  */
+#undef SUBTARGET_ASM_RELAX_SPEC
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-300*:%{!m4-340:%{!m4-400:%{!m4-500:-isa=sh4}}}}}}"
+
+/* Create the CC1_SPEC to add the runtime support */
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1runtime)"
+
+#undef CC1PLUS_SPEC
+#define CC1PLUS_SPEC "%(cc1runtime)"
+
+
+/* Override the LIB_SPEC to add the runtime support */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:%(libruntime) -lc}} %{pg:-lprofile -lc}"
+
+/* Override STARTFILE_SPEC to add profiling and MMU support.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{!m4-400*:%{!m4-340*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}}} \
+   %{!shared: %{m4-340*|m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \
+   crti.o%s \
+   %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
diff --git a/gcc-4.9/gcc/config/sh/superh.opt b/gcc-4.9/gcc/config/sh/superh.opt
new file mode 100644
index 000000000..b85abddaf
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/superh.opt
@@ -0,0 +1,10 @@
+;; The -mboard and -mruntime options need only be accepted here, they are
+;; actually processed by supplementary specs files.
+
+mboard=
+Target RejectNegative Joined
+Board name [and memory region].
+
+mruntime=
+Target RejectNegative Joined
+Runtime name.
diff --git a/gcc-4.9/gcc/config/sh/sync.md b/gcc-4.9/gcc/config/sh/sync.md
new file mode 100644
index 000000000..a0a22a1f5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sync.md
@@ -0,0 +1,1388 @@
+;; GCC machine description for SH synchronization instructions.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;;
+;; Atomic integer operations for the Renesas / SuperH SH CPUs.
+;;
+;; On SH CPUs atomic integer operations can be done either in 'software' or
+;; in 'hardware' in various styles.  True hardware support was introduced
+;; with the SH4A.  Some SH2A dual-core models (e.g. SH7205) also come with
+;; 'semaphore' hardware registers, but these are currently unsupported.
+;; All SH CPUs support the 'tas.b' instruction, which can be optionally used
+;; to implement the 'atomic_test_and_set' builtin.
+;; The following atomic options and models are supported.
+;;
+;; tas.b atomic_test_and_set (-mtas)
+;;
+;; Depending on the particular hardware configuration, usage of the 'tas.b'
+;; instruction might be undesired or even unsafe.  Thus, it has to be
+;; enabled by the user explicitly.  If it is not enabled, the
+;; 'atomic_test_and_set' builtin is implemented either with hardware or with
+;; software atomics, depending on which is enabled.  It is also possible to
+;; enable the 'tas.b' instruction only, without enabling support for the 
+;; other atomic operations.
+;;
+;;
+;; Hardware Atomics (-matomic-model=hard-llcs; SH4A only)
+;;
+;; Hardware atomics implement all atomic operations using the 'movli.l' and
+;; 'movco.l' instructions that are availble on SH4A.  On multi-core hardware
+;; configurations hardware atomics is the only safe mode.
+;; However, it can also be safely used on single-core configurations.
+;; Since these instructions operate on SImode memory only, QImode and HImode
+;; have to be emulated with SImode and subreg masking, which results in
+;; larger code.
+;;
+;;
+;; gUSA Software Atomics (-matomic-model=soft-gusa; SH3*, SH4* only)
+;;
+;; On single-core systems there can only be one execution context running
+;; at a given point in time.  This allows the usage of rewindable atomic
+;; sequences, which effectively emulate locked-load / conditional-store
+;; operations.  This requires complementary support in the interrupt / 
+;; exception handling code (e.g. kernel) and does not work safely on multi-
+;; core configurations.
+;;
+;; When an execution context is interrupted while it is an atomic
+;; sequence, the interrupted context's PC is rewound to the beginning of
+;; the atomic sequence by the interrupt / exception handling code, before
+;; transferring control to another execution context.  This is done by
+;; something like...
+;;
+;;	if (interrupted_context_in_atomic_sequence
+;;	    && interrupted_pc < atomic_exitpoint)
+;;	  interrupted_pc = atomic_entrypoint;
+;;
+;; This method is also known as gUSA ("g" User Space Atomicity) and the
+;; Linux kernel for SH3/SH4 implements support for such software atomic
+;; sequences.  It can also be implemented in freestanding environments.
+;;
+;; For this the following atomic sequence ABI is used.
+;;
+;; r15 >= 0:	Execution context is not in an atomic sequence.
+;;
+;; r15  < 0:	Execution context is in an atomic sequence and r15
+;;		holds the negative byte length of the atomic sequence.
+;;		In this case the following applies:
+;;
+;;		r0:	PC of the first instruction after the atomic
+;;			write-back instruction (exit point).
+;;			The entry point PC of the atomic sequence can be 
+;;			determined by doing r0 + r15.
+;;
+;;		r1:	Saved r15 stack pointer before entering the
+;;			atomic sequence.
+;;
+;; An example atomic add sequence would look like:
+;;
+;;	mova	.Lend,r0		! .Lend must be 4-byte aligned.
+;;	mov	r15,r1
+;;	.align 2			! Insert aligning nop if needed.
+;;	mov	#(.Lstart - .Lend),r15	! Enter atomic sequence
+;;.Lstart:
+;;	mov.l	@r4,r2			! read value
+;;	add	r2,r5			! modify value
+;;	mov.l	r5,@r4			! write-back
+;;.Lend:
+;;	mov	r1,r15			! Exit atomic sequence
+;;					! r2 holds the previous value.
+;;					! r5 holds the new value.
+;;
+;; Notice that due to the restrictions of the mova instruction, the .Lend
+;; label must always be 4-byte aligned.  Aligning the .Lend label would
+;; potentially insert a nop after the write-back instruction which could
+;; make the sequence to be rewound, although it has already passed the
+;; write-back instruction.  This would make it execute twice.
+;; For correct operation the atomic sequences must not be rewound after
+;; they have passed the write-back instruction.
+;;
+;; This is model works only on SH3* and SH4* because the stack pointer (r15)
+;; is set to an invalid pointer temporarily.  SH1* and SH2* CPUs will try
+;; to push SR and PC registers on the stack when an interrupt / exception
+;; occurs, and thus require the stack pointer (r15) always to be valid.
+;;
+;;
+;; TCB Software Atomics (-matomic-model=soft-tcb)
+;;
+;; This model is a variation of the gUSA model.  The concept of rewindable
+;; atomic sequences is the same, but it does not use the stack pointer (r15)
+;; for signaling the 'is in atomic sequence' condition.  Instead, a variable
+;; in the thread control block (TCB) is set to hold the exit point of the
+;; atomic sequence.  This assumes that the GBR is used as a thread pointer
+;; register.  The offset of the variable in the TCB to be used must be
+;; specified with an additional option 'gbr-offset', such as:
+;;	-matomic-model=soft-tcb,gbr-offset=4
+;;
+;; For this model the following atomic sequence ABI is used.
+;; 
+;; @(#x,gbr) == 0:  Execution context is not in an atomic sequence.
+;;
+;; @(#x,gbr) != 0:  Execution context is in an atomic sequence.  In this
+;;		    case the following applies:
+;;
+;;		    @(#x,gbr):	PC of the first instruction after the atomic
+;;				write-back instruction (exit point).
+;;
+;;		    r1:		Negative byte length of the atomic sequence.
+;;				The entry point PC of the sequence can be
+;;				determined by doing @(#x,gbr) + r1
+;;
+;; Note: #x is the user specified gbr-offset.
+;;
+;;
+;; Interrupt-Flipping Software Atomics (-matomic-model=soft-imask)
+;;
+;; This model achieves atomicity by temporarily disabling interrupts for
+;; the duration of the atomic sequence.  This works only when the program
+;; runs in privileged mode but does not require any support from the
+;; interrupt / exception handling code.  There is no particular ABI.
+;; To disable interrupts the SR.IMASK bits are set to '1111'.
+;; This method is not as efficient as the other software atomic models,
+;; since loading and storing SR (in order to flip interrupts on / off)
+;; requires using multi-cycle instructions.  Moreover, it can potentially
+;; increase the interrupt latency which might be important for hard-realtime
+;; applications.
+;;
+;;
+;; Compatibility Notes
+;;
+;; On single-core SH4A CPUs software atomic aware interrupt / exception code
+;; is actually compatible with user code that utilizes hardware atomics.
+;; Since SImode hardware atomic sequences are more compact on SH4A they are
+;; always used, regardless of the selected atomic model.  This atomic model
+;; mixing can be disabled by setting the 'strict' flag, like:
+;;	-matomic-model=soft-gusa,strict
+;;
+;; The software atomic models are generally compatible with each other,
+;; but the interrupt / exception handling code has to support both gUSA and
+;; TCB models.
+;;
+;; The current atomic support is limited to QImode, HImode and SImode 
+;; atomic operations.  DImode operations could also be implemented but
+;; would require some ABI modifications to support multiple-instruction
+;; write-back.  This is because SH1/SH2/SH3/SH4 does not have a DImode
+;; store instruction.  DImode stores must be split into two SImode stores.
+
+(define_c_enum "unspec" [
+  UNSPEC_ATOMIC
+])
+ 
+(define_c_enum "unspecv" [
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+])
+
+(define_mode_attr i124extend_insn [(QI "exts.b") (HI "exts.w") (SI "mov")])
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+
+(define_code_attr fetchop_predicate
+  [(plus "atomic_arith_operand") (minus "register_operand")
+   (ior "atomic_logical_operand") (xor "atomic_logical_operand")
+   (and "atomic_logical_operand")])
+
+(define_code_attr fetchop_constraint
+  [(plus "rI08") (minus "r") (ior "rK08") (xor "rK08") (and "rK08")])
+
+;;------------------------------------------------------------------------------
+;; comapre and swap
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")		;; bool success output
+   (match_operand:QIHISI 1 "register_operand" "")	;; oldval output
+   (match_operand:QIHISI 2 "memory_operand" "")		;; memory
+   (match_operand:QIHISI 3 "atomic_arith_operand" "")	;; expected input
+   (match_operand:QIHISI 4 "atomic_arith_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[2], 0));
+  rtx old_val = gen_lowpart (SImode, operands[1]);
+  rtx exp_val = operands[3];
+  rtx new_val = operands[4];
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_compare_and_swap<mode>_hard (old_val, addr,
+							  exp_val, new_val);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_compare_and_swap<mode>_soft_gusa (old_val, addr,
+		      exp_val, new_val);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_compare_and_swap<mode>_soft_tcb (old_val, addr,
+		      exp_val, new_val, TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_compare_and_swap<mode>_soft_imask (old_val, addr,
+		      exp_val, new_val);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[1]),
+				     operands[1]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[1]),
+				     operands[1]));
+  emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+  DONE;
+})
+
+(define_insn "atomic_compare_and_swapsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	  [(mem:SI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:SI 2 "arith_operand" "rI08")
+	   (match_operand:SI 3 "arith_operand" "rI08")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:SI (match_dup 1))
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	cmp/eq	%2,r0"		"\n"
+	 "	bf{.|/}s	0f"	"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	mov	%3,r0"		"\n"
+	 "	movco.l	r0,@%1"		"\n"
+	 "	bf	0b"		"\n"
+	 "0:";
+}
+  [(set_attr "length" "14")])
+
+(define_insn "atomic_compare_and_swap<mode>_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	  [(mem:QIHI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:QIHI 2 "register_operand" "r")
+	   (match_operand:QIHI 3 "register_operand" "r")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHI (match_dup 1))
+	(unspec_volatile:QIHI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))
+   (clobber (match_scratch:SI 6 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%5"			"\n"
+	 "	<i124extend_insn>	%2,%4"	"\n"
+	 "	and	%1,%5"			"\n"
+	 "	xor	%5,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%5,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,%0"		"\n"
+	 "	mov.<bw>	%3,@%1"		"\n"
+	 "	cmp/eq	%4,%0"			"\n"
+	 "	bf{.|/}s	0f"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%5"			"\n"
+	 "	bf	0b"			"\n"
+	 "0:";
+}
+  [(set_attr "length" "30")])
+
+(define_insn "atomic_compare_and_swap<mode>_soft_gusa"
+  [(set (match_operand:SI 0 "register_operand" "=&u")
+	(unspec_volatile:SI
+	  [(mem:QIHISI (match_operand:SI 1 "register_operand" "u"))
+	   (match_operand:QIHISI 2 "register_operand" "u")
+	   (match_operand:QIHISI 3 "register_operand" "u")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec_volatile:QIHISI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (match_scratch:SI 4 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	<i124extend_insn>	%2,%4"	"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	cmp/eq	%0,%4"			"\n"
+	 "	bf	1f"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_compare_and_swap<mode>_soft_tcb"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	  [(mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:QIHISI 2 "register_operand" "r")
+	   (match_operand:QIHISI 3 "register_operand" "r")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec_volatile:QIHISI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (use (match_operand:SI 4 "gbr_displacement"))
+   (clobber (match_scratch:SI 5 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	<i124extend_insn>	%2,%5"	"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	mov.l	r0,@(%O4,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	cmp/eq	%0,%5"			"\n"
+	 "	bf	1f"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O4,gbr)";
+}
+  [(set_attr "length" "22")])
+
+(define_insn "atomic_compare_and_swap<mode>_soft_imask"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(unspec_volatile:SI
+	  [(mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:QIHISI 2 "register_operand" "r")
+	   (match_operand:QIHISI 3 "register_operand" "r")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec_volatile:QIHISI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  /* The comparison result is supposed to be in T_REG.
+     Notice that restoring SR will overwrite the T_REG.  We handle this by
+     rotating the T_REG into the saved SR before restoring SR.  On SH2A we
+     can do one insn shorter by using the bst insn.  */
+  if (!TARGET_SH2A)
+    return "\r	stc	sr,%0"			"\n"
+	   "	<i124extend_insn>	%2,%4"	"\n"
+	   "	mov	%0,%5"			"\n"
+	   "	or	#0xF0,%0"		"\n"
+	   "	shlr	%5"			"\n"
+	   "	ldc	%0,sr"			"\n"
+	   "	mov.<bwl>	@%1,%0"		"\n"
+	   "	cmp/eq	%4,%0"			"\n"
+	   "	bf	1f"			"\n"
+	   "	mov.<bwl>	%3,@%1"		"\n"
+	   "1:	rotcl	%5"			"\n"
+	   "	ldc	%5,sr";
+  else
+    return "\r	stc	sr,%0"			"\n"
+	   "	<i124extend_insn>	%2,%4"	"\n"
+	   "	mov	%0,%5"			"\n"
+	   "	or	#0xF0,%0"		"\n"
+	   "	ldc	%0,sr"			"\n"
+	   "	mov.<bwl>	@%1,%0"		"\n"
+	   "	cmp/eq	%4,%0"			"\n"
+	   "	bst	#0,%5"			"\n"
+	   "	bf	1f"			"\n"
+	   "	mov.<bwl>	%3,@%1"		"\n"
+	   "1:	ldc	%5,sr";
+}
+  [(set (attr "length") (if_then_else (match_test "!TARGET_SH2A")
+				      (const_string "24")
+				      (const_string "22")))])
+
+;;------------------------------------------------------------------------------
+;; read - write - return old value
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:QIHISI 0 "register_operand" "")	;; oldval output
+   (match_operand:QIHISI 1 "memory_operand" "")		;; memory
+   (match_operand:QIHISI 2 "atomic_arith_operand" "")	;; newval input
+   (match_operand:SI 3 "const_int_operand" "")]		;; memory model
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx val = operands[2];
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_exchange<mode>_hard (operands[0], addr, val);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_exchange<mode>_soft_gusa (operands[0], addr, val);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_exchange<mode>_soft_tcb (operands[0], addr, val,
+		      TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_exchange<mode>_soft_imask (operands[0], addr, val);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_exchangesi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mem:SI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(match_operand:SI 2 "arith_operand" "rI08")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	mov	%2,r0"		"\n"
+	 "	movco.l r0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "10")])
+
+(define_insn "atomic_exchange<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(mem:QIHI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(match_operand:QIHI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,%0"		"\n"
+	 "	mov.<bw>	%2,@%1" 	"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "24")])
+
+(define_insn "atomic_exchange<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(match_operand:QIHISI 2 "register_operand" "u")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov.<bwl>	%2,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "14")])
+
+(define_insn "atomic_exchange<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(match_operand:QIHISI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 3 "gbr_displacement"))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	.align 2"			"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	mov.<bwl>	%2,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_exchange<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(match_operand:QIHISI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov.<bwl>	%2,@%1"		"\n"
+	 "	ldc	%3,sr";
+}
+  [(set_attr "length" "14")])
+
+;;------------------------------------------------------------------------------
+;; read - add|sub|or|and|xor|nand - write - return old value
+
+(define_expand "atomic_fetch_<fetchop_name><mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(match_operand:QIHISI 1 "memory_operand" ""))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (match_dup 1)
+	     (match_operand:QIHISI 2 "<fetchop_predicate>" ""))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_hard (operands[0], addr,
+							      operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_soft_gusa (operands[0],
+		      addr, operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_soft_tcb (operands[0],
+		      addr, operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_soft_imask (operands[0],
+		      addr, operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_fetch_<fetchop_name>si_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mem:SI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1))
+	     (match_operand:SI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	<fetchop_name>	%2,r0"	"\n"
+	 "	movco.l	r0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "10")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(mem:QIHI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(FETCHOP:QIHI (mem:QIHI (match_dup 1))
+	     (match_operand:QIHI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	mov	r0,%0"			"\n"
+	 "	<fetchop_name>	%2,r0"		"\n"
+	 "	mov.<bw>	r0,@%1"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "28")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "u"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	<fetchop_name>	%2,%3"		"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r"))]
+	  UNSPEC_ATOMIC))
+   (use (match_operand:SI 3 "gbr_displacement"))
+   (clobber (match_scratch:QIHISI 4 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	.align 2"			"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	mov	%0,%4"			"\n"
+	 "	<fetchop_name>	%2,%4"		"\n"
+	 "	mov.<bwl>	%4,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%4"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	<fetchop_name>	%2,%3"		"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "	ldc	%4,sr";
+}
+  [(set_attr "length" "18")])
+
+(define_expand "atomic_fetch_nand<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(match_operand:QIHISI 1 "memory_operand" ""))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (match_dup 1)
+		       (match_operand:QIHISI 2 "atomic_logical_operand" "")))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_fetch_nand<mode>_hard (operands[0], addr,
+						    operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_fetch_nand<mode>_soft_gusa (operands[0], addr,
+							 operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_fetch_nand<mode>_soft_tcb (operands[0], addr,
+		      operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_fetch_nand<mode>_soft_imask (operands[0], addr,
+							  operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_fetch_nandsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mem:SI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(not:SI (and:SI (mem:SI (match_dup 1))
+		   (match_operand:SI 2 "logical_operand" "rK08")))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	and	%2,r0"		"\n"
+	 "	not	r0,r0"		"\n"
+	 "	movco.l	r0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "12")])
+
+(define_insn "atomic_fetch_nand<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(mem:QIHI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1))
+		     (match_operand:QIHI 2 "logical_operand" "rK08")))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	mov	r0,%0"			"\n"
+	 "	and	%2,r0"			"\n"
+	 "	not	r0,r0"			"\n"
+	 "	mov.<bw>	r0,@%1"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "30")])
+
+(define_insn "atomic_fetch_nand<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "u")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%2,%3"			"\n"
+	 "	and	%0,%3"			"\n"
+	 "	not	%3,%3"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_fetch_nand<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (use (match_operand:SI 3 "gbr_displacement"))
+   (clobber (match_scratch:QIHISI 4 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	mov	%2,%4"			"\n"
+	 "	and	%0,%4"			"\n"
+	 "	not	%4,%4"			"\n"
+	 "	mov.<bwl>	%4,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "22")])
+
+(define_insn "atomic_fetch_nand<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%4"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%2,%3"			"\n"
+	 "	and	%0,%3"			"\n"
+	 "	not	%3,%3"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "	stc	%4,sr";
+}
+  [(set_attr "length" "20")])
+
+;;------------------------------------------------------------------------------
+;; read - add|sub|or|and|xor|nand - write - return new value
+
+(define_expand "atomic_<fetchop_name>_fetch<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(FETCHOP:QIHISI
+	  (match_operand:QIHISI 1 "memory_operand" "")
+	  (match_operand:QIHISI 2 "<fetchop_predicate>" "")))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (match_dup 1) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_hard (operands[0], addr,
+							      operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_soft_gusa (operands[0],
+		      addr, operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_soft_tcb (operands[0],
+		      addr, operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_soft_imask (operands[0],
+		      addr, operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_<fetchop_name>_fetchsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(FETCHOP:SI
+	  (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "<fetchop_predicate>" "<fetchop_constraint>")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,%0"		"\n"
+	 "	<fetchop_name>	%2,%0"	"\n"
+	 "	movco.l	%0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "8")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(FETCHOP:QIHI
+	  (mem:QIHI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHI 2 "<fetchop_predicate>" "<fetchop_constraint>")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(FETCHOP:QIHI (mem:QIHI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	<fetchop_name>	%2,r0"		"\n"
+	 "	mov.<bw>	r0,@%1"		"\n"
+	 "	mov	r0,%0"			"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "28")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(FETCHOP:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "u"))
+	  (match_operand:QIHISI 2 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	<fetchop_name>	%2,%0"		"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(FETCHOP:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 3 "gbr_displacement"))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	<fetchop_name>	%2,%0"		"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(FETCHOP:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	<fetchop_name>	%2,%0"		"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "	ldc	%3,sr";
+}
+  [(set_attr "length" "16")])
+
+(define_expand "atomic_nand_fetch<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(not:QIHISI (and:QIHISI
+	  (match_operand:QIHISI 1 "memory_operand" "")
+	  (match_operand:QIHISI 2 "atomic_logical_operand" ""))))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (match_dup 1) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_nand_fetch<mode>_hard (operands[0], addr,
+						    operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_nand_fetch<mode>_soft_gusa (operands[0], addr,
+							 operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_nand_fetch<mode>_soft_tcb (operands[0], addr,
+		      operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_nand_fetch<mode>_soft_imask (operands[0], addr,
+							  operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_nand_fetchsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(not:SI (and:SI (mem:SI (match_operand:SI 1 "register_operand" "r"))
+			(match_operand:SI 2 "logical_operand" "rK08"))))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(not:SI (and:SI (mem:SI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,%0"		"\n"
+	 "	and	%2,%0"		"\n"
+	 "	not	%0,%0"		"\n"
+	 "	movco.l	%0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "10")])
+
+(define_insn "atomic_nand_fetch<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(not:QIHI
+	  (and:QIHI (mem:QIHI (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QIHI 2 "logical_operand" "rK08"))))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	and	%2,r0"			"\n"
+	 "	not	r0,%0"			"\n"
+	 "	mov.<bw>	%0,@%1"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "28")])
+
+(define_insn "atomic_nand_fetch<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(not:QIHISI (and:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "u"))
+	  (match_operand:QIHISI 2 "register_operand" "u"))))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	and	%2,%0"			"\n"
+	 "	not	%0,%0"			"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_nand_fetch<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(not:QIHISI (and:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r"))))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 3 "gbr_displacement"))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	.align 2"			"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	and	%2,%0"			"\n"
+	 "	not	%0,%0"			"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_nand_fetch<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(not:QIHISI (and:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r"))))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	and	%2,%0"			"\n"
+	 "	not	%0,%0"			"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "	ldc	%3,sr";
+}
+  [(set_attr "length" "18")])
+
+;;------------------------------------------------------------------------------
+;; read - test against zero - or with 0x80 - write - return test result
+
+(define_expand "atomic_test_and_set"
+  [(match_operand:SI 0 "register_operand" "")		;; bool result output
+   (match_operand:QI 1 "memory_operand" "")		;; memory
+   (match_operand:SI 2 "const_int_operand" "")]		;; model
+  "(TARGET_ATOMIC_ANY || TARGET_ENABLE_TAS) && !TARGET_SHMEDIA"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+
+  if (TARGET_ENABLE_TAS)
+    emit_insn (gen_tasb (addr));
+  else
+    {
+      rtx val = gen_int_mode (targetm.atomic_test_and_set_trueval, QImode);
+      val = force_reg (QImode, val);
+
+      if (TARGET_ATOMIC_HARD_LLCS)
+	  emit_insn (gen_atomic_test_and_set_hard (addr, val));
+      else if (TARGET_ATOMIC_SOFT_GUSA)
+	  emit_insn (gen_atomic_test_and_set_soft_gusa (addr, val));
+      else if (TARGET_ATOMIC_SOFT_TCB)
+	  emit_insn (gen_atomic_test_and_set_soft_tcb (addr, val,
+			 TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX));
+      else if (TARGET_ATOMIC_SOFT_IMASK)
+	  emit_insn (gen_atomic_test_and_set_soft_imask (addr, val));
+      else
+	FAIL;
+    }
+
+  /* The result of the test op is the inverse of what we are
+     supposed to return.  Thus invert the T bit.  The inversion will be
+     potentially optimized away and integrated into surrounding code.  */
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "tasb"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(const_int 128)] UNSPEC_ATOMIC))]
+  "TARGET_ENABLE_TAS && !TARGET_SHMEDIA"
+  "tas.b	@%0"
+  [(set_attr "insn_class" "co_group")])
+
+(define_insn "atomic_test_and_set_soft_gusa"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "u"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "u")] UNSPEC_ATOMIC))
+   (clobber (match_scratch:QI 2 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA && !TARGET_ENABLE_TAS"
+{
+  return "\r	mova	1f,r0"		"\n"
+	 "	.align 2"		"\n"
+	 "	mov	r15,r1"		"\n"
+	 "	mov	#(0f-1f),r15"	"\n"
+	 "0:	mov.b	@%0,%2"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "1:	mov	r1,r15"		"\n"
+	 "	tst	%2,%2";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_test_and_set_soft_tcb"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "r")] UNSPEC_ATOMIC))
+   (use (match_operand:SI 2 "gbr_displacement"))
+   (clobber (match_scratch:QI 3 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB && !TARGET_ENABLE_TAS"
+{
+  return "\r	mova	1f,r0"		"\n"
+	 "	mov	#(0f-1f),r1"	"\n"
+	 "	.align 2"		"\n"
+	 "	mov.l	r0,@(%O2,gbr)"	"\n"
+	 "0:	mov.b	@%0,%3"		"\n"
+	 "	mov	#0,r0"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "1:	mov.l	r0,@(%O2,gbr)"	"\n"
+	 "	tst	%3,%3";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_test_and_set_soft_imask"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 2 "=&r"))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_SOFT_IMASK && !TARGET_ENABLE_TAS"
+{
+  return "\r	stc	sr,r0"		"\n"
+	 "	mov	r0,%2"		"\n"
+	 "	or	#0xF0,r0"	"\n"
+	 "	ldc	r0,sr"		"\n"
+	 "	mov.b	@%0,r0"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "	stc	%2,sr"		"\n"
+	 "	tst	r0,r0";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_test_and_set_hard"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 2 "=&r"))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=0"))]
+  "TARGET_ATOMIC_HARD_LLCS && !TARGET_ENABLE_TAS"
+{
+  return "\r	mov	#-4,%2"		"\n"
+	 "	and	%0,%2"		"\n"
+	 "	xor	%2,%0"		"\n"
+	 "	add	r15,%0"		"\n"
+	 "	add	#-4,%0"		"\n"
+	 "0:	movli.l	@%2,r0"		"\n"
+	 "	mov.l	r0,@-r15"	"\n"
+	 "	mov.b	@%0,%3"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "	mov.l	@r15+,r0"	"\n"
+	 "	movco.l	r0,@%2"		"\n"
+	 "	bf	0b"		"\n"
+	 "	tst	%3,%3";
+}
+  [(set_attr "length" "26")])
+
diff --git a/gcc-4.9/gcc/config/sh/t-linux b/gcc-4.9/gcc/config/sh/t-linux
new file mode 100644
index 000000000..d33c63839
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-linux
@@ -0,0 +1,2 @@
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = 
diff --git a/gcc-4.9/gcc/config/sh/t-netbsd-sh5-64 b/gcc-4.9/gcc/config/sh/t-netbsd-sh5-64
new file mode 100644
index 000000000..8fc6bd1ea
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-netbsd-sh5-64
@@ -0,0 +1 @@
+MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES:/media64=)
diff --git a/gcc-4.9/gcc/config/sh/t-rtems b/gcc-4.9/gcc/config/sh/t-rtems
new file mode 100644
index 000000000..9fd262cf8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-rtems
@@ -0,0 +1,7 @@
+# Custom multilibs for RTEMS
+
+MULTILIB_ENDIAN = ml
+MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) m2/m2e/m4-single-only/m4-single/m4
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu
+MULTILIB_EXCEPTIONS = ml
diff --git a/gcc-4.9/gcc/config/sh/t-sh b/gcc-4.9/gcc/config/sh/t-sh
new file mode 100644
index 000000000..d9f2b3d93
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-sh
@@ -0,0 +1,101 @@
+# Copyright (C) 1993-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+sh-mem.o: $(srcdir)/config/sh/sh-mem.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+sh-c.o: $(srcdir)/config/sh/sh-c.c \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/sh/sh-c.c
+
+sh_treg_combine.o: $(srcdir)/config/sh/sh_treg_combine.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+sh_optimize_sett_clrt.o: $(srcdir)/config/sh/sh_optimize_sett_clrt.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
+OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG))
+
+MULTILIB_OPTIONS= $(OTHER_ENDIAN) $(TM_MULTILIB_CONFIG)
+MULTILIB_DIRNAMES= 
+
+# The separate entries for m2a-nofpu and m2a-single-only with
+# duplicate base libraries are there to make sure we don't ever use an
+# m4* multilib for m2a or vice-versa; they are not compatible.  This
+# is why sh2a and sh2a-single need their own multilibs.
+MULTILIB_MATCHES = $(shell \
+  multilibs="$(MULTILIB_OPTIONS)" ; \
+  for abi in m1,m2,m3,m4-nofpu,m4-100-nofpu,m4-200-nofpu,m4-400,m4-500,m4-340,m4-300-nofpu,m4al,m4a-nofpu \
+             m1,m2,m2a-nofpu \
+             m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4-300-single-only,m4a-single-only \
+             m2a-single,m2a-single-only \
+             m4-single,m4-100-single,m4-200-single,m4-300-single,m4a-single \
+             m4,m4-100,m4-200,m4-300,m4a \
+             m5-32media,m5-compact,m5-32media \
+             m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \
+    subst= ; \
+    for lib in `echo $$abi|tr , ' '` ; do \
+      if test "`echo $$multilibs|sed s/$$lib//`" != "$$multilibs"; then \
+        subst=$$lib ; \
+      elif test x$$subst != x ; then \
+        echo $$subst=$$lib ; \
+      fi \
+    done \
+  done)
+
+# SH1 only supports big endian.
+MULTILIB_EXCEPTIONS = ml/m1 ml/m2a* $(TM_MULTILIB_EXCEPTIONS_CONFIG)
+
+MULTILIB_OSDIRNAMES = \
+	$(OTHER_ENDIAN)=!$(OTHER_ENDIAN) \
+	m1=!m1 $(OTHER_ENDIAN)/m1=!$(OTHER_ENDIAN)/m1 \
+	m2a=!m2a $(OTHER_ENDIAN)/m2a=!$(OTHER_ENDIAN)/m2a \
+	m2a-nofpu=!m2a-nofpu $(OTHER_ENDIAN)/m2a-nofpu=!$(OTHER_ENDIAN)/m2a-nofpu \
+	m2a-single-only=!m2a-single-only $(OTHER_ENDIAN)/m2a-single-only=!$(OTHER_ENDIAN)/m2a-single-only \
+	m2a-single=!m2a-single $(OTHER_ENDIAN)/m2a-single=!$(OTHER_ENDIAN)/m2a-single \
+	m2e=!m2e $(OTHER_ENDIAN)/m2e=!$(OTHER_ENDIAN)/m2e \
+	m2=!m2 $(OTHER_ENDIAN)/m2=!$(OTHER_ENDIAN)/m2 \
+	m3e=!m3e $(OTHER_ENDIAN)/m3e=!$(OTHER_ENDIAN)/m3e \
+	m3=!m3 $(OTHER_ENDIAN)/m3=!$(OTHER_ENDIAN)/m3 \
+	m4-nofpu=!m4-nofpu $(OTHER_ENDIAN)/m4-nofpu=!$(OTHER_ENDIAN)/m4-nofpu \
+	m4-single-only=!m4-single-only $(OTHER_ENDIAN)/m4-single-only=!$(OTHER_ENDIAN)/m4-single-only \
+	m4-single=!m4-single $(OTHER_ENDIAN)/m4-single=!$(OTHER_ENDIAN)/m4-single \
+	m4=!m4 $(OTHER_ENDIAN)/m4=!$(OTHER_ENDIAN)/m4 \
+	m4a-nofpu=!m4a-nofpu $(OTHER_ENDIAN)/m4a-nofpu=!$(OTHER_ENDIAN)/m4a-nofpu \
+	m4a-single-only=!m4a-single-only $(OTHER_ENDIAN)/m4a-single-only=!$(OTHER_ENDIAN)/m4a-single-only \
+	m4a-single=!m4a-single $(OTHER_ENDIAN)/m4a-single=!$(OTHER_ENDIAN)/m4a-single \
+	m4a=!m4a $(OTHER_ENDIAN)/m4a=!$(OTHER_ENDIAN)/m4a \
+	m4al=!m4al $(OTHER_ENDIAN)/m4al=!$(OTHER_ENDIAN)/m4al \
+	m5-32media=!m5-32media $(OTHER_ENDIAN)/m5-32media=!$(OTHER_ENDIAN)/m5-32media \
+	m5-32media-nofpu=!m5-32media-nofpu $(OTHER_ENDIAN)/m5-32media-nofpu=!$(OTHER_ENDIAN)/m5-32media-nofpu \
+	m5-compact=!m5-compact $(OTHER_ENDIAN)/m5-compact=!$(OTHER_ENDIAN)/m5-compact \
+	m5-compact-nofpu=!m5-compact-nofpu $(OTHER_ENDIAN)/m5-compact-nofpu=!$(OTHER_ENDIAN)/m5-compact-nofpu \
+	m5-64media=!m5-64media $(OTHER_ENDIAN)/m5-64media=!$(OTHER_ENDIAN)/m5-64media \
+	m5-64media-nofpu=!m5-64media-nofpu $(OTHER_ENDIAN)/m5-64media-nofpu=!$(OTHER_ENDIAN)/m5-64media-nofpu
+
+$(out_object_file): gt-sh.h
+gt-sh.h : s-gtype ; @true
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc-4.9/gcc/config/sh/t-sh64 b/gcc-4.9/gcc/config/sh/t-sh64
new file mode 100644
index 000000000..3222099b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-sh64
@@ -0,0 +1,22 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64)
+
+MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=)
+MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES)
diff --git a/gcc-4.9/gcc/config/sh/t-vxworks b/gcc-4.9/gcc/config/sh/t-vxworks
new file mode 100644
index 000000000..d7ccc9b7f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-vxworks
@@ -0,0 +1,6 @@
+# Multilibs for VxWorks.
+
+MULTILIB_OPTIONS = mrtp fPIC m2/m3/m4/m4a ml
+# Don't build -fPIC without -mrtp, or -ml without -m3/-m4.
+MULTILIB_EXCEPTIONS = fPIC* ml* mrtp/ml* mrtp/fPIC/ml* *m2/ml*
+MULTILIB_MATCHES = m2=m4-nofpu fPIC=fpic
diff --git a/gcc-4.9/gcc/config/sh/ushmedia.h b/gcc-4.9/gcc/config/sh/ushmedia.h
new file mode 100644
index 000000000..03064e964
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/ushmedia.h
@@ -0,0 +1,1091 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* ushmedia.h: Intrinsics corresponding to SHmedia instructions that
+   may be executed in both user and privileged mode.  */
+
+#ifndef _USHMEDIA_H
+#define _USHMEDIA_H
+
+#if __SHMEDIA__
+#if ! __SH4_NO_FPU
+typedef float __GCC_FV __attribute__ ((vector_size (4 * sizeof (float))));
+typedef float __GCC_MTRX __attribute__ ((vector_size (16 * sizeof (float))));
+#endif
+
+static __inline unsigned long long
+sh_media_MABS_L (unsigned long long mm)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_absv2si2 ((v2si) mm);
+}
+
+static __inline unsigned long long
+sh_media_MABS_W (unsigned long long mm)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_absv4hi2 ((v4hi) mm);
+}
+
+static __inline unsigned long long
+sh_media_MADD_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_addv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADD_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_addv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ssaddv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_usaddv8qi3 ((v8qi) mm, (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ssaddv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_UB ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+#define sh_media_MCMV __builtin_sh_media_MCMV
+
+static __inline unsigned long long
+sh_media_MCNVS_LW (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_LW ((v2si) mm,
+							   (uv2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCNVS_WB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_WB ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCNVS_WUB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_WUB ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR1 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR1 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR2 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR2 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR3 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR3 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR4 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR4 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR5 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR5 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR6 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR6 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR7 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR7 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMACFX_WL (unsigned long long mm, unsigned long long mn,
+		    unsigned long long mw)
+{
+  typedef float v2hi __attribute__ ((mode(V2HI)));
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  long mm_l = (long) mm;
+  long mn_l = (long) mn;
+
+  return ((unsigned long long)
+    __builtin_sh_media_MMACFX_WL ((v2hi) mm_l, (v2hi) mn_l,
+				  (uv2si) mw));
+}
+
+static __inline unsigned long long
+sh_media_MMACNFX_WL (unsigned long long mm, unsigned long long mn,
+		     unsigned long long mw)
+{
+  typedef float v2hi __attribute__ ((mode(V2HI)));
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  long mm_l = (long) mm;
+  long mn_l = (long) mn;
+
+  return ((unsigned long long)
+    __builtin_sh_media_MMACNFX_WL ((v2hi) mm_l, (v2hi) mn_l,
+				   (uv2si) mw));
+}
+
+static __inline unsigned long long
+sh_media_MMUL_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_mulv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMUL_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_mulv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFX_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFX_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFX_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFX_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFXRP_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFXRP_W ((v4hi) mm,
+							     (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULHI_WL (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULHI_WL ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULLO_WL (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULLO_WL ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULSUM_WQ (unsigned long long mm, unsigned long long mn,
+		     unsigned long long mw)
+{
+  typedef unsigned int uv4hi __attribute__ ((mode(V4HI)));
+
+  return __builtin_sh_media_MMULSUM_WQ ((uv4hi) mm, (uv4hi) mn, mw);
+}
+
+static __inline unsigned long long
+sh_media_MPERM_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MPERM_W ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSAD_UBQ (unsigned long long mm, unsigned long long mn,
+		   unsigned long long mw)
+{
+  typedef unsigned int uv8qi __attribute__ ((mode(V8QI)));
+
+  return __builtin_sh_media_MSAD_UBQ ((uv8qi) mm, (uv8qi) mn, mw);
+}
+
+static __inline unsigned long long
+sh_media_MSHALDS_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHALDS_L ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHALDS_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHALDS_W ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHARD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ashrv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHARD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ashrv4hi3 ((v4hi) mm, mn);
+}
+
+#define sh_media_MSHARDS_Q __builtin_sh_media_MSHARDS_Q
+
+static __inline unsigned long long
+sh_media_MSHFHI_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFHI_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFHI_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLLD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ashlv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLLD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ashlv4hi3 ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLRD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_lshrv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLRD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_lshrv4hi3 ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUB_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_subv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUB_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_subv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sssubv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_ussubv8qi3 ((v8qi) mm, (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sssubv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+#if ! __SH4_NOFPU__
+/* Floating-point Intrinsics */
+
+#define sh_media_FABS_D __builtin_fabs
+#define sh_media_FABS_S __builtin_fabsf
+#define sh_media_FCMPUN_D __builtin_isunordered
+#define sh_media_FCMPUN_S __builtin_isunordered
+
+static __inline float sh_media_FCOSA_S (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return __builtin_sh_media_FCOSA_S (u.i);
+}
+
+static __inline float
+sh_media_FGETSCR (void)
+{ 
+  float f;
+
+  __asm volatile ("fgetscr %0" : "=f" (f));
+  return f;
+}
+
+static __inline float
+sh_media_FIPR_S (const void *fvg, const void *fvh)
+{
+  typedef float v4sf __attribute__ ((mode(V4SF)));
+  v4sf vg = *(v4sf*) fvg;
+  v4sf vh = *(v4sf*) fvh;
+
+  return __builtin_sh_media_FIPR_S (vg, vh);
+}
+
+#if 0
+/* This gives different results for -O0  */
+static __inline float
+sh_media_FMAC_S (float fg, float fh, float fq)
+{
+  return fg * fh + fq;
+}
+#else
+
+#define sh_media_FMAC_S __builtin_sh_media_FMAC_S
+#endif
+
+static __inline long long
+sh_media_FMOV_DQ (double dg)
+{
+  union { long long l; double d; } u;
+
+  u.d = dg;
+  return u.l;
+}
+
+static __inline float
+sh_media_FMOV_LS (int mm)
+{
+  union { int i; float f; } u;
+
+  u.i = mm;
+  return u.f;
+}
+
+static __inline double
+sh_media_FMOV_QD (long long mm)
+{
+  union { long long l; double d; } u;
+
+  u.l = mm;
+  return u.d;
+}
+
+static __inline int
+sh_media_FMOV_SL (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return u.i;
+}
+
+static __inline void
+sh_media_FPUTSCR (float fg)
+{ 
+  __asm volatile ("fputscr %0" : : "f" (fg));
+}
+
+static __inline float sh_media_FSINA_S (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return __builtin_sh_media_FSINA_S (u.i);
+}
+
+/* Can't use __builtin_sqrt / __builtin_sqrtf because they still implement
+   error handling unless -ffast-math is used.  */
+#define sh_media_FSQRT_D __builtin_sh_media_FSQRT_D
+#define sh_media_FSQRT_S __builtin_sh_media_FSQRT_S
+#define sh_media_FSRRA_S __builtin_sh_media_FSRRA_S
+
+static __inline void
+sh_media_FTRV_S (const void *mtrxg, const void *fvh, void *fvf)
+{
+  typedef float v16sf __attribute__ ((mode(V16SF)));
+  typedef float v4sf __attribute__ ((mode(V4SF)));
+  v16sf mtrx = *(v16sf*) mtrxg;
+  v4sf vh = *(v4sf*) fvh;
+
+  *(v4sf*) fvf = __builtin_sh_media_FTRV_S (mtrx, vh);
+}
+#endif /* ! __SH4_NOFPU__ */
+
+/* Not implemented here: Control and Configuration intrinsics.  */
+/* Misaligned Access Support intrinsics */
+
+static __inline unsigned long long
+sh_media_LDHI_L (void *p, int s)
+{
+  return __builtin_sh_media_LDHI_L ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDHI_Q (void *p, int s)
+{
+  return __builtin_sh_media_LDHI_Q ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDLO_L (void *p, int s)
+{
+  return __builtin_sh_media_LDLO_L ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDLO_Q (void *p, int s)
+{
+  return __builtin_sh_media_LDLO_Q ((char *)p + s);
+}
+
+static __inline void
+sh_media_STHI_L (void *p, int s, unsigned int mw)
+{
+  __builtin_sh_media_STHI_L ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STHI_Q (void *p, int s, unsigned long long mw)
+{
+  __builtin_sh_media_STHI_Q ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STLO_L (void *p, int s, unsigned int mw)
+{
+  __builtin_sh_media_STLO_L ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STLO_Q (void *p, int s, unsigned long long mw)
+{
+  __builtin_sh_media_STLO_Q ((char*)p + s, mw);
+}
+
+/* Miscellaneous intrinsics */
+
+#define sh_media_NSB __builtin_sh_media_NSB
+
+static __inline unsigned long long
+sh_media_BYTEREV (unsigned long long mm)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_BYTEREV ((v8qi) mm);
+}
+
+__inline__ static unsigned long long
+sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline));
+
+__inline__ static unsigned long long
+sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw)
+{
+  return mm == 0 ? mn : mw;
+}
+
+__inline__ static unsigned long long
+sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline));
+
+__inline__ static unsigned long long
+sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw)
+{
+  return mm != 0 ? mn : mw;
+}
+
+static __inline long long
+sh_media_ADDZ_L (unsigned int mm, unsigned int mn)
+{
+  return mm + mn;
+}
+
+/* NOP and Synchronization intrinsics not implemented here.  */
+
+static __inline__ void sh_media_PREFO(void *mm, int s)
+{
+  __builtin_sh_media_PREFO (mm + s, 0, 0);
+}
+
+/* Event Handling intrinsics not implemented here.  */
+
+/* Old asm stuff */
+
+static __inline__
+void
+sh_media_NOP (void)
+{
+  __asm__ ("nop" : :);
+}
+
+__inline__ static
+unsigned long long
+sh_media_SWAP_Q (void *mm, long long mn, unsigned long long mw)
+{
+  unsigned long long res;
+  unsigned long long *addr = (unsigned long long *)((char *)mm + mn);
+  __asm__ ("swap.q	%m1, %0" : "=r" (res), "+o" (*addr) : "0" (mw));
+  return res;
+}
+
+__inline__ static
+void     
+sh_media_SYNCI (void)
+{
+  __asm__ __volatile__ ("synci");
+}
+
+__inline__ static
+void     
+sh_media_SYNCO (void)
+{
+  __asm__ __volatile__ ("synco");
+}
+
+__inline__ static
+void
+sh_media_ALLOCO (void *mm, int s)
+{
+  __builtin_sh_media_ALLOCO (mm + s);
+}
+
+__inline__ static
+void
+sh_media_ICBI (void *mm, int s)
+{
+  __asm__ __volatile__ ("icbi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBI (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBP (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbp	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBWB (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbwb	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_PREFI (void *mm, int s)
+{
+  __asm__ __volatile__ ("prefi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_BRK (void)
+{
+  __asm__ __volatile__ ("brk");
+}
+
+__inline__ static
+void
+sh_media_TRAPA (unsigned long long mm)
+{
+  __asm__ __volatile__ ("trapa	%%0" : : "r" (mm));
+}
+
+__inline__ static
+short         
+sh_media_unaligned_LD_W (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (((unsigned char *)p)[0]
+	  | (((short)((__signed__ char *)p)[1]) << 8));
+#else
+  return ((((short)((__signed__ char *)p)[0]) << 8)
+	  | ((unsigned char *)p)[1]);
+#endif
+}
+
+__inline__ static
+unsigned short
+sh_media_unaligned_LD_UW (void *p)
+{
+  unsigned char *addr = p;
+#if __LITTLE_ENDIAN__
+  return sh_media_MSHFLO_B (addr[0], addr[1]);
+#else
+  return sh_media_MSHFLO_B (addr[1], addr[0]);
+#endif
+}
+
+/* We don't use the sh_media_LD* functions here because that turned out
+   to impede constant propagation of the offsets into the ldhi / ldlo
+   instructions.  */
+__inline__ static
+int           
+sh_media_unaligned_LD_L (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (__builtin_sh_media_LDHI_L ((char *)p + 3)
+	  | __builtin_sh_media_LDLO_L (p));
+#else
+  return (__builtin_sh_media_LDLO_L ((char *)p + 3)
+	  | __builtin_sh_media_LDHI_L (p));
+#endif
+}
+
+__inline__ static
+long long     
+sh_media_unaligned_LD_Q (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (__builtin_sh_media_LDHI_Q ((char *)p + 7)
+	  | __builtin_sh_media_LDLO_Q (p));
+#else
+  return (__builtin_sh_media_LDLO_Q ((char *)p + 7)
+	  | __builtin_sh_media_LDHI_Q (p));
+#endif
+}
+
+__inline__ static
+void
+sh_media_unaligned_ST_W (void *p, unsigned int k)
+{
+  char *addr = p;
+#if __LITTLE_ENDIAN__
+  addr[0] = k;
+  addr[1] = k >> 8;
+#else
+  addr[1] = k;
+  addr[0] = k >> 8;
+#endif
+}
+
+/* We don't use the sh_media_ST* functions here because that turned out
+   to impede constant propagation of the offsets into the ldhi / ldlo
+   instructions.  */
+__inline__ static
+void
+sh_media_unaligned_ST_L (void *p, unsigned int k)
+{
+#if __LITTLE_ENDIAN__
+  __builtin_sh_media_STHI_L (p + 3, k);
+  __builtin_sh_media_STLO_L (p, k);
+#else
+  __builtin_sh_media_STLO_L (p + 3, k);
+  __builtin_sh_media_STHI_L (p, k);
+#endif
+}
+
+__inline__ static
+void
+sh_media_unaligned_ST_Q (void *p, unsigned long long k)
+{
+#if __LITTLE_ENDIAN__
+  __builtin_sh_media_STHI_Q (p + 7, k);
+  __builtin_sh_media_STLO_Q (p, k);
+#else
+  __builtin_sh_media_STLO_Q (p + 7, k);
+  __builtin_sh_media_STHI_Q (p, k);
+#endif
+}
+
+#if ! __SH4_NOFPU__
+__inline__ static
+void
+sh_media_FVCOPY_S (const void *fvg, void *fvf)
+{
+  const __GCC_FV *g = fvg;
+  __GCC_FV *f = fvf;
+  *f = *g;
+}
+
+__inline__ static
+void
+sh_media_FVADD_S (const void *fvg, const void *fvh, void *fvf)
+{
+  const float *g = fvg, *h = fvh;
+  float *f = fvf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    f[i] = g[i] + h[i];
+#else
+  f[0] = g[0] + h[0];
+  f[1] = g[1] + h[1];
+  f[2] = g[2] + h[2];
+  f[3] = g[3] + h[3];
+#endif
+}
+
+__inline__ static
+void
+sh_media_FVSUB_S (const void *fvg, const void *fvh, void *fvf)
+{
+  const float *g = fvg, *h = fvh;
+  float *f = fvf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    f[i] = g[i] - h[i];
+#else
+  f[0] = g[0] - h[0];
+  f[1] = g[1] - h[1];
+  f[2] = g[2] - h[2];
+  f[3] = g[3] - h[3];
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXCOPY_S (const void *mtrxg, void *mtrxf)
+{
+  const __GCC_MTRX *g = mtrxg;
+  __GCC_MTRX *f = mtrxf;
+  *f = *g;
+}
+
+__inline__ static
+void
+sh_media_FMTRXADD_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *h = mtrxh;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sh_media_FVADD_S (&g[i], &h[i], &f[i]);
+#else
+  sh_media_FVADD_S (&g[0], &h[0], &f[0]);
+  sh_media_FVADD_S (&g[1], &h[1], &f[1]);
+  sh_media_FVADD_S (&g[2], &h[2], &f[2]);
+  sh_media_FVADD_S (&g[3], &h[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXSUB_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *h = mtrxh;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sh_media_FVSUB_S (&g[i], &h[i], &f[i]);
+#else
+  sh_media_FVSUB_S (&g[0], &h[0], &f[0]);
+  sh_media_FVSUB_S (&g[1], &h[1], &f[1]);
+  sh_media_FVSUB_S (&g[2], &h[2], &f[2]);
+  sh_media_FVSUB_S (&g[3], &h[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FTRVADD_S (const void *mtrxg, const void *fvh, const void *fvi,
+		    void *fvf)
+{
+  sh_media_FTRV_S (mtrxg, fvh, fvf);
+  sh_media_FVADD_S (fvf, fvi, fvf);
+}
+
+__inline__ static
+void
+sh_media_FTRVSUB_S (const void *mtrxg, const void *fvh, const void *fvi,
+		    void *fvf)
+{
+  sh_media_FTRV_S (mtrxg, fvh, fvf);
+  sh_media_FVSUB_S (fvf, fvi, fvf);
+}
+
+__inline__ static
+void
+sh_media_FMTRXMUL_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRV_S (mtrxh, &g[j], &f[j]);
+#else
+  sh_media_FTRV_S (mtrxh, &g[0], &f[0]);
+  sh_media_FTRV_S (mtrxh, &g[1], &f[1]);
+  sh_media_FTRV_S (mtrxh, &g[2], &f[2]);
+  sh_media_FTRV_S (mtrxh, &g[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXMULADD_S (const void *mtrxg, const void *mtrxh,
+			const void *mtrxi, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *i = mtrxi;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRVADD_S (mtrxh, &g[j], &i[j], &f[j]);
+#else
+  sh_media_FTRVADD_S (mtrxh, &g[0], &i[0], &f[0]);
+  sh_media_FTRVADD_S (mtrxh, &g[1], &i[1], &f[1]);
+  sh_media_FTRVADD_S (mtrxh, &g[2], &i[2], &f[2]);
+  sh_media_FTRVADD_S (mtrxh, &g[3], &i[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXMULSUB_S (const void *mtrxg, const void *mtrxh,
+			const void *mtrxi, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *i = mtrxi;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRVSUB_S (mtrxh, &g[j], &i[j], &f[j]);
+#else
+  sh_media_FTRVSUB_S (mtrxh, &g[0], &i[0], &f[0]);
+  sh_media_FTRVSUB_S (mtrxh, &g[1], &i[1], &f[1]);
+  sh_media_FTRVSUB_S (mtrxh, &g[2], &i[2], &f[2]);
+  sh_media_FTRVSUB_S (mtrxh, &g[3], &i[3], &f[3]);
+#endif
+}
+#endif /* ! __SH4_NOFPU__ */
+
+#endif /* __SHMEDIA__ */
+
+#endif /* _USHMEDIA_H */
diff --git a/gcc-4.9/gcc/config/sh/vxworks.h b/gcc-4.9/gcc/config/sh/vxworks.h
new file mode 100644
index 000000000..15dae73c7
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/vxworks.h
@@ -0,0 +1,66 @@
+/* Definitions of target machine for GCC,
+   for SuperH with targeting the VXWorks run time environment. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+   
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS()	\
+  do					\
+    {					\
+      builtin_define ("CPU=SH7000");	\
+      VXWORKS_OS_CPP_BUILTINS ();	\
+    }					\
+  while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      VXWORKS_OVERRIDE_OPTIONS;					\
+      /* The kernel loader cannot handle the relaxation		\
+	 relocations, so it cannot load kernel modules		\
+	 (which are ET_REL) or RTP executables (which are	\
+	 linked with --emit-relocs).  No relaxation relocations	\
+	 appear in shared libraries, so relaxation is OK	\
+	 for RTP PIC.  */					\
+      if (TARGET_RELAX && !(TARGET_VXWORKS_RTP && flag_pic))	\
+	error ("-mrelax is only supported for RTP PIC");	\
+    }								\
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_vxworks"
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " SH_LINK_SPEC
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
diff --git a/gcc-4.9/gcc/config/sol2-10.h b/gcc-4.9/gcc/config/sol2-10.h
new file mode 100644
index 000000000..4488a40cb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-10.h
@@ -0,0 +1,24 @@
+/* Operating system specific defines to be used when targeting GCC for any
+   Solaris 2 system starting from Solaris 10.
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Solaris 10 has the float and long double forms of math functions.
+   We redefine this hook so the version from elfos.h header won't be used.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
diff --git a/gcc-4.9/gcc/config/sol2-bi.h b/gcc-4.9/gcc/config/sol2-bi.h
new file mode 100644
index 000000000..fdb2a2817
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-bi.h
@@ -0,0 +1,135 @@
+/* Definitions of target machine for GCC, for bi-arch Solaris 2.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* wchar_t is called differently in <wchar.h> for 32 and 64-bit
+   compilations.  This is called for by SCD 2.4.1, p. 6-83, Figure 6-65
+   (32-bit) and p. 6P-10, Figure 6.38 (64-bit).  */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Same for wint_t.  See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit).  There's
+   no corresponding 64-bit definition, but this is what Solaris 8
+   <iso/wchar_iso.h> uses.  */
+
+#undef WINT_TYPE
+#define WINT_TYPE (TARGET_64BIT ? "int" : "long int")
+
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 32
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#if DEFAULT_ARCH32_P
+#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}"
+#else
+#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}"
+#endif
+
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
+/* This should be the same as LINK_ARCH32_SPEC_BASE, except with
+   ARCH64_SUBDIR appended to the paths and /usr/ccs/lib is no longer
+   necessary.  */
+#undef LINK_ARCH64_SPEC_BASE
+#define LINK_ARCH64_SPEC_BASE \
+  "%{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/" ARCH64_SUBDIR ":%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}	\
+	   %{!p:%{!pg:-Y P,%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}}}"
+
+#undef LINK_ARCH64_SPEC
+#ifndef USE_GLD
+/* FIXME: Used to be SPARC-only.  Not SPARC-specfic but for the model name!  */
+#define LINK_ARCH64_SPEC \
+  "%{mcmodel=medlow:-M /usr/lib/ld/" ARCH64_SUBDIR "/map.below4G} " \
+  LINK_ARCH64_SPEC_BASE
+#else
+#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE
+#endif
+
+#ifdef USE_GLD
+#if DEFAULT_ARCH32_P
+#define ARCH_DEFAULT_EMULATION ARCH32_EMULATION
+#else
+#define ARCH_DEFAULT_EMULATION ARCH64_EMULATION
+#endif
+#define TARGET_LD_EMULATION "%{m32:-m " ARCH32_EMULATION "}" \
+			    "%{m64:-m " ARCH64_EMULATION "}" \
+			    "%{!m32:%{!m64:-m " ARCH_DEFAULT_EMULATION "}} "
+#else
+#define TARGET_LD_EMULATION ""
+#endif
+
+#undef LINK_ARCH_SPEC
+#if DISABLE_MULTILIB
+#if DEFAULT_ARCH32_P
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
+%{m32:%(link_arch32)} \
+%{m64:%edoes not support multilib} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#else
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
+%{m32:%edoes not support multilib} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#endif
+#else
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}}"
+#endif
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "startfile_arch",	 STARTFILE_ARCH_SPEC },		\
+  { "link_arch32",       LINK_ARCH32_SPEC },            \
+  { "link_arch64",       LINK_ARCH64_SPEC },            \
+  { "link_arch_default", LINK_ARCH_DEFAULT_SPEC },	\
+  { "link_arch",	 LINK_ARCH_SPEC },		\
+  SUBTARGET_CPU_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/sol2-c.c b/gcc-4.9/gcc/config/sol2-c.c
new file mode 100644
index 000000000..f6c26047f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-c.c
@@ -0,0 +1,274 @@
+/* Solaris support needed only by C/C++ frontends.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "tm.h"
+#include "tm_p.h"
+
+#include "c-family/c-format.h"
+#include "intl.h"
+
+#include "cpplib.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-common.h"
+
+/* cmn_err only accepts "l" and "ll".  */
+static const format_length_info cmn_err_length_specs[] =
+{
+  { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
+  { NULL, FMT_LEN_none, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 }
+};
+
+static const format_flag_spec cmn_err_flag_specs[] =
+{
+  { 'w',  0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
+  { 'L',  0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+
+static const format_flag_pair cmn_err_flag_pairs[] =
+{
+  { 0, 0, 0, 0 }
+};
+
+static const format_char_info bitfield_string_type =
+  { "b",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "cR", NULL };
+
+static const format_char_info cmn_err_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "dD",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "oOxX",0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "c",   0, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w", "c",  NULL },
+  { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "cR", NULL },
+  { "b",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",   "",   &bitfield_string_type },
+  { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+EXPORTED_CONST format_kind_info solaris_format_types[] = {
+  { "cmn_err",  cmn_err_length_specs,  cmn_err_char_table, "", NULL,
+    cmn_err_flag_specs, cmn_err_flag_pairs,
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_EMPTY_PREC_OK,
+    'w', 0, 0, 0, 'L', 0,
+    &integer_type_node, &integer_type_node
+  }
+};
+
+/* Handle #pragma align ALIGNMENT (VAR [, VAR]...)  */
+
+static void
+solaris_pragma_align (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree t, x;
+  enum cpp_ttype ttype;
+  HOST_WIDE_INT low;
+
+  if (pragma_lex (&x) != CPP_NUMBER
+      || pragma_lex (&t) != CPP_OPEN_PAREN)
+    {
+      warning (0, "malformed %<#pragma align%>, ignoring");
+      return;
+    }
+
+  low = TREE_INT_CST_LOW (x);
+  if (TREE_INT_CST_HIGH (x) != 0
+      || (low != 1 && low != 2 && low != 4 && low != 8 && low != 16
+	  && low != 32 && low != 64 && low != 128))
+    {
+      warning (0, "invalid alignment for %<#pragma align%>, ignoring");
+      return;
+    }
+
+  ttype = pragma_lex (&t);
+  if (ttype != CPP_NAME)
+    {
+      warning (0, "malformed %<#pragma align%>, ignoring");
+      return;
+    }
+
+  while (1)
+    {
+      tree decl = identifier_global_value (t);
+      if (decl && DECL_P (decl))
+	warning (0, "%<#pragma align%> must appear before the declaration of "
+		 "%D, ignoring", decl);
+      else
+	solaris_pending_aligns = tree_cons (t, build_tree_list (NULL, x),
+					    solaris_pending_aligns);
+
+      ttype = pragma_lex (&t);
+      if (ttype == CPP_COMMA)
+	{
+	  ttype = pragma_lex (&t);
+	  if (ttype != CPP_NAME)
+	    {
+	      warning (0, "malformed %<#pragma align%>");
+	      return;
+	    }
+	}
+      else if (ttype == CPP_CLOSE_PAREN)
+	{
+	  if (pragma_lex (&t) != CPP_EOF)
+	    warning (0, "junk at end of %<#pragma align%>");
+	  return;
+	}
+      else
+	{
+	  warning (0, "malformed %<#pragma align%>");
+	  return;
+	}
+    }
+}
+
+/* Handle #pragma init (function [, function]...)  */
+
+static void
+solaris_pragma_init (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree t;
+  enum cpp_ttype ttype;
+
+  if (pragma_lex (&t) != CPP_OPEN_PAREN)
+    {
+      warning (0, "malformed %<#pragma init%>, ignoring");
+      return;
+    }
+
+  ttype = pragma_lex (&t);
+  if (ttype != CPP_NAME)
+    {
+      warning (0, "malformed %<#pragma init%>, ignoring");
+      return;
+    }
+
+  while (1)
+    {
+      tree decl = identifier_global_value (t);
+      if (decl && DECL_P (decl))
+	{
+	  tree attrs = build_tree_list (get_identifier ("init"),
+					NULL);
+	  TREE_USED (decl) = 1;
+	  DECL_PRESERVE_P (decl) = 1;
+	  decl_attributes (&decl, attrs, 0);
+	}
+      else
+	solaris_pending_inits = tree_cons (t, NULL, solaris_pending_inits);
+
+      ttype = pragma_lex (&t);
+      if (ttype == CPP_COMMA)
+	{
+	  ttype = pragma_lex (&t);
+	  if (ttype != CPP_NAME)
+	    {
+	      warning (0, "malformed %<#pragma init%>");
+	      return;
+	    }
+	}
+      else if (ttype == CPP_CLOSE_PAREN)
+	{
+	  if (pragma_lex (&t) != CPP_EOF)
+	    warning (0, "junk at end of %<#pragma init%>");
+	  return;
+	}
+      else
+	{
+	  warning (0, "malformed %<#pragma init%>");
+	  return;
+	}
+    }
+}
+
+/* Handle #pragma fini (function [, function]...)  */
+
+static void
+solaris_pragma_fini (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree t;
+  enum cpp_ttype ttype;
+
+  if (pragma_lex (&t) != CPP_OPEN_PAREN)
+    {
+      warning (0, "malformed %<#pragma fini%>, ignoring");
+      return;
+    }
+
+  ttype = pragma_lex (&t);
+  if (ttype != CPP_NAME)
+    {
+      warning (0, "malformed %<#pragma fini%>, ignoring");
+      return;
+    }
+
+  while (1)
+    {
+      tree decl = identifier_global_value (t);
+      if (decl && DECL_P (decl))
+	{
+	  tree attrs = build_tree_list (get_identifier ("fini"),
+					NULL);
+	  TREE_USED (decl) = 1;
+	  DECL_PRESERVE_P (decl) = 1;
+	  decl_attributes (&decl, attrs, 0);
+	}
+      else
+	solaris_pending_finis = tree_cons (t, NULL, solaris_pending_finis);
+
+      ttype = pragma_lex (&t);
+      if (ttype == CPP_COMMA)
+	{
+	  ttype = pragma_lex (&t);
+	  if (ttype != CPP_NAME)
+	    {
+	      warning (0, "malformed %<#pragma fini%>");
+	      return;
+	    }
+	}
+      else if (ttype == CPP_CLOSE_PAREN)
+	{
+	  if (pragma_lex (&t) != CPP_EOF)
+	    warning (0, "junk at end of %<#pragma fini%>");
+	  return;
+	}
+      else
+	{
+	  warning (0, "malformed %<#pragma fini%>");
+	  return;
+	}
+    }
+}
+
+/* Register Solaris-specific #pragma directives.  */
+
+void
+solaris_register_pragmas (void)
+{
+  c_register_pragma_with_expansion (0, "align", solaris_pragma_align);
+  c_register_pragma (0, "init", solaris_pragma_init);
+  c_register_pragma (0, "fini", solaris_pragma_fini);
+}
diff --git a/gcc-4.9/gcc/config/sol2-cxx.c b/gcc-4.9/gcc/config/sol2-cxx.c
new file mode 100644
index 000000000..56bcec2c5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-cxx.c
@@ -0,0 +1,65 @@
+/* C++ specific Solaris system support.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "cp/cp-tree.h"
+#include "tm.h"
+#include "tm_p.h"
+
+/* Before GCC 4.7, g++ defined __cplusplus 1 to avoid coping with the C++98
+   overloads in Solaris system headers.  Since this was fixed, 4 structure
+   types would move to namespace std, breaking the Solaris libstdc++ ABI.
+   To avoid this, we forcefully keep those types in the global namespace.
+   This can be removed once the next major version of libstdc++ is
+   released.  */
+
+/* Cache the identifiers of the affected types to speed up lookup.  */
+#define NUM_FGID 4
+static GTY(()) tree force_global_identifiers[NUM_FGID];
+
+/* Check if DECL is one of the affected types and move it to the global
+   namespace if so.  */
+tree
+solaris_cxx_decl_mangling_context (const_tree decl)
+{
+  static bool init = false;
+  int i = 0;
+
+  if (!init)
+    {
+      force_global_identifiers[i++] = get_identifier ("div_t");
+      force_global_identifiers[i++] = get_identifier ("ldiv_t");
+      force_global_identifiers[i++] = get_identifier ("lconv");
+      force_global_identifiers[i++] = get_identifier ("tm");
+      init = true;
+    }
+
+  if (!(DECL_P (decl) && DECL_NAMESPACE_STD_P (CP_DECL_CONTEXT (decl))))
+    return NULL_TREE;
+
+  for (i = 0; i < NUM_FGID; i++)
+    if (DECL_NAME (decl) == force_global_identifiers[i])
+	return global_namespace;
+
+  return NULL_TREE;
+}
diff --git a/gcc-4.9/gcc/config/sol2-protos.h b/gcc-4.9/gcc/config/sol2-protos.h
new file mode 100644
index 000000000..2dc07f79f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-protos.h
@@ -0,0 +1,33 @@
+/* Operating system specific prototypes to be used when targeting GCC for any
+   Solaris 2 system.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* In sol2.c.  */
+extern void solaris_assemble_visibility (tree, int);
+extern void solaris_elf_asm_comdat_section (const char *, unsigned int, tree);
+extern void solaris_file_end (void);
+extern void solaris_insert_attributes (tree, tree *);
+extern void solaris_output_init_fini (FILE *, tree);
+extern void solaris_override_options (void);
+
+/* In sol2-c.c.  */
+extern void solaris_register_pragmas (void);
+
+/* In sol2-cxx.c.  */
+extern tree solaris_cxx_decl_mangling_context (const_tree);
diff --git a/gcc-4.9/gcc/config/sol2-stubs.c b/gcc-4.9/gcc/config/sol2-stubs.c
new file mode 100644
index 000000000..506a97542
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-stubs.c
@@ -0,0 +1,33 @@
+/* Stubs for C++ specific Solaris system support.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "tm.h"
+#include "tm_p.h"
+
+/* Stub implemenation of TARGET_CXX_DECL_MANGLING_CONTEXT for non-C++
+   frontends.  */
+tree
+solaris_cxx_decl_mangling_context (const_tree decl ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
diff --git a/gcc-4.9/gcc/config/sol2.c b/gcc-4.9/gcc/config/sol2.c
new file mode 100644
index 000000000..d07a9411f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2.c
@@ -0,0 +1,300 @@
+/* General Solaris system support.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "output.h"
+#include "tm.h"
+#include "rtl.h"
+#include "target.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "hash-table.h"
+
+tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis;
+
+/* Attach any pending attributes for DECL to the list in *ATTRIBUTES.
+   Pending attributes come from #pragma or _Pragma, so this code is
+   only useful in the C family front ends, but it is included in
+   all languages to avoid changing the target machine initializer
+   depending on the language.  */
+
+void
+solaris_insert_attributes (tree decl, tree *attributes)
+{
+  tree *x, next;
+
+  if (solaris_pending_aligns != NULL && TREE_CODE (decl) == VAR_DECL)
+    for (x = &solaris_pending_aligns; *x; x = &TREE_CHAIN (*x))
+      {
+	tree name = TREE_PURPOSE (*x);
+	tree value = TREE_VALUE (*x);
+	if (DECL_NAME (decl) == name)
+	  {
+	    if (lookup_attribute ("aligned", DECL_ATTRIBUTES (decl))
+		|| lookup_attribute ("aligned", *attributes))
+	      warning (0, "ignoring %<#pragma align%> for explicitly "
+		       "aligned %q+D", decl);
+	    else
+	      *attributes = tree_cons (get_identifier ("aligned"), value,
+				       *attributes);
+	    next = TREE_CHAIN (*x);
+	    ggc_free (*x);
+	    *x = next;
+	    break;
+	  }
+      }
+
+  if (solaris_pending_inits != NULL && TREE_CODE (decl) == FUNCTION_DECL)
+    for (x = &solaris_pending_inits; *x; x = &TREE_CHAIN (*x))
+      {
+	tree name = TREE_PURPOSE (*x);
+	if (DECL_NAME (decl) == name)
+	  {
+	    *attributes = tree_cons (get_identifier ("init"), NULL,
+				     *attributes);
+	    TREE_USED (decl) = 1;
+	    DECL_PRESERVE_P (decl) = 1;
+	    next = TREE_CHAIN (*x);
+	    ggc_free (*x);
+	    *x = next;
+	    break;
+	  }
+      }
+
+  if (solaris_pending_finis != NULL && TREE_CODE (decl) == FUNCTION_DECL)
+    for (x = &solaris_pending_finis; *x; x = &TREE_CHAIN (*x))
+      {
+	tree name = TREE_PURPOSE (*x);
+	if (DECL_NAME (decl) == name)
+	  {
+	    *attributes = tree_cons (get_identifier ("fini"), NULL,
+				     *attributes);
+	    TREE_USED (decl) = 1;
+	    DECL_PRESERVE_P (decl) = 1;
+	    next = TREE_CHAIN (*x);
+	    ggc_free (*x);
+	    *x = next;
+	    break;
+	  }
+      }
+}
+
+/* Output initializer or finalizer entries for DECL to FILE.  */
+
+void
+solaris_output_init_fini (FILE *file, tree decl)
+{
+  if (lookup_attribute ("init", DECL_ATTRIBUTES (decl)))
+    {
+      fprintf (file, "\t.pushsection\t" SECTION_NAME_FORMAT "\n", ".init");
+      ASM_OUTPUT_CALL (file, decl);
+      fprintf (file, "\t.popsection\n");
+    }
+
+  if (lookup_attribute ("fini", DECL_ATTRIBUTES (decl)))
+    {
+      fprintf (file, "\t.pushsection\t" SECTION_NAME_FORMAT "\n", ".fini");
+      ASM_OUTPUT_CALL (file, decl);
+      fprintf (file, "\t.popsection\n");
+    }
+}
+
+/* Emit an assembler directive to set symbol for DECL visibility to
+   the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */
+
+void
+solaris_assemble_visibility (tree decl, int vis ATTRIBUTE_UNUSED)
+{
+#ifdef HAVE_GAS_HIDDEN
+  /* Sun as uses .symbolic for STV_PROTECTED.  STV_INTERNAL is marked as
+     `currently reserved', but the linker treats it like STV_HIDDEN.  Sun
+     Studio 12.1 cc emits .hidden instead.
+
+     There are 3 Sun extensions GCC doesn't yet know about: STV_EXPORTED,
+     STV_SINGLETON, and STV_ELIMINATE.
+
+     See Linker and Libraries Guide, Ch. 2, Link-Editor, Defining
+     Additional Symbols, and Ch. 7, Object-File Format, Symbol Table
+     Section.  */
+
+  static const char * const visibility_types[] = {
+    NULL, "symbolic", "hidden", "hidden"
+  };
+
+  const char *name, *type;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  type = visibility_types[vis];
+
+  fprintf (asm_out_file, "\t.%s\t", type);
+  assemble_name (asm_out_file, name);
+  fprintf (asm_out_file, "\n");
+#else
+  if (!DECL_ARTIFICIAL (decl))
+    warning (OPT_Wattributes, "visibility attribute not supported "
+			      "in this configuration; ignored");
+#endif
+}
+
+/* Group section information entry stored in solaris_comdat_htab.  */
+
+typedef struct comdat_entry
+{
+  const char *name;
+  unsigned int flags;
+  tree decl;
+  const char *sig;
+} comdat_entry;
+
+/* Helpers for maintaining solaris_comdat_htab.  */
+
+struct comdat_entry_hasher : typed_noop_remove <comdat_entry>
+{
+  typedef comdat_entry value_type;
+  typedef comdat_entry compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+inline hashval_t
+comdat_entry_hasher::hash (const value_type *entry)
+{
+  return htab_hash_string (entry->sig);
+}
+
+inline bool
+comdat_entry_hasher::equal (const value_type *entry1,
+			    const compare_type *entry2)
+{
+  return strcmp (entry1->sig, entry2->sig) == 0;
+}
+
+/* Hash table of group signature symbols.  */
+
+static hash_table <comdat_entry_hasher> solaris_comdat_htab;
+
+/* Output assembly to switch to COMDAT group section NAME with attributes
+   FLAGS and group signature symbol DECL, using Sun as syntax.  */
+
+void
+solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl)
+{
+  const char *signature;
+  char *section;
+  comdat_entry entry, **slot;
+
+  if (TREE_CODE (decl) == IDENTIFIER_NODE)
+    signature = IDENTIFIER_POINTER (decl);
+  else
+    signature = IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl));
+
+  /* Sun as requires group sections to be fragmented, i.e. to have names of
+     the form <section>%<fragment>.  Strictly speaking this is only
+     necessary to support cc -xF, but is enforced globally in violation of
+     the ELF gABI.  We keep the section names generated by GCC (generally
+     of the form .text.<signature>) and append %<signature> to pacify as,
+     despite the redundancy.  */
+  section = concat (name, "%", signature, NULL);
+
+  /* Clear SECTION_LINKONCE flag so targetm.asm_out.named_section only
+     emits this as a regular section.  Emit section before .group
+     directive since Sun as treats undeclared sections as @progbits,
+     which conflicts with .bss* sections which are @nobits.  */
+  targetm.asm_out.named_section (section, flags & ~SECTION_LINKONCE, decl);
+  
+  /* Sun as separates declaration of a group section and of the group
+     itself, using the .group directive and the #comdat flag.  */
+  fprintf (asm_out_file, "\t.group\t%s," SECTION_NAME_FORMAT ",#comdat\n",
+	   signature, section);
+
+  /* Unlike GNU as, group signature symbols need to be defined explicitly
+     for Sun as.  With a few exceptions, this is already the case.  To
+     identify the missing ones without changing the affected frontents,
+     remember the signature symbols and emit those not marked
+     TREE_SYMBOL_REFERENCED in solaris_file_end.  */
+  if (!solaris_comdat_htab.is_created ())
+    solaris_comdat_htab.create (37);
+
+  entry.sig = signature;
+  slot = solaris_comdat_htab.find_slot (&entry, INSERT);
+
+  if (*slot == NULL)
+    {
+      *slot = XCNEW (comdat_entry);
+      /* Remember fragmented section name.  */
+      (*slot)->name = section;
+      /* Emit as regular section, .group declaration has already been done.  */
+      (*slot)->flags = flags & ~SECTION_LINKONCE;
+      (*slot)->decl = decl;
+      (*slot)->sig = signature;
+    }
+}
+
+/* Define unreferenced COMDAT group signature symbol corresponding to SLOT.  */
+
+int
+solaris_define_comdat_signature (comdat_entry **slot,
+				 void *aux ATTRIBUTE_UNUSED)
+{
+  comdat_entry *entry = *slot;
+  tree decl = entry->decl;
+
+  if (TREE_CODE (decl) != IDENTIFIER_NODE)
+    decl = DECL_COMDAT_GROUP (decl);
+
+  if (!TREE_SYMBOL_REFERENCED (decl))
+    {
+      /* Switch to group section, otherwise Sun as complains
+	 `Group Id symbol defined outside of group'.  */
+      switch_to_section (get_section (entry->name, entry->flags, entry->decl));
+
+      ASM_OUTPUT_LABEL (asm_out_file, entry->sig);
+    }
+
+  /* Continue with scan.  */
+  return 1;
+}
+
+/* Emit unreferenced COMDAT group signature symbols for Sun as.  */
+
+void
+solaris_file_end (void)
+{
+  if (!solaris_comdat_htab.is_created ())
+    return;
+
+  solaris_comdat_htab.traverse <void *, solaris_define_comdat_signature> (NULL);
+}
+
+void
+solaris_override_options (void)
+{
+  /* Older versions of Solaris ld cannot handle CIE version 3 in .eh_frame.
+     Don't emit DWARF3/4 unless specifically selected if so.  */
+  if (!HAVE_LD_EH_FRAME_CIEV3 && !global_options_set.x_dwarf_version)
+    dwarf_version = 2;
+}
diff --git a/gcc-4.9/gcc/config/sol2.h b/gcc-4.9/gcc/config/sol2.h
new file mode 100644
index 000000000..2a657db59
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2.h
@@ -0,0 +1,300 @@
+/* Operating system specific defines to be used when targeting GCC for any
+   Solaris 2 system.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We are compiling for Solaris 2 now.  */
+#define TARGET_SOLARIS 1
+
+/* Solaris 2 (at least as of 2.5.1) uses a 32-bit wchar_t.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Solaris 2 uses a wint_t different from the default. This is required
+   by the SCD 2.4.1, p. 6-83, Figure 6-66.  */
+#undef	WINT_TYPE
+#define	WINT_TYPE "long int"
+
+#undef	WINT_TYPE_SIZE
+#define	WINT_TYPE_SIZE BITS_PER_WORD
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* ??? This definition of int8_t follows the system header but does
+   not conform to C99.  Likewise int_fast8_t, int_least8_t.  */
+#define INT8_TYPE "char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "char"
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define UINTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "\
+%{pthreads|pthread:-D_REENTRANT -D_PTHREADS}"
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_SUB_OS_CPP_BUILTINS()
+#define TARGET_OS_CPP_BUILTINS()			\
+    do {						\
+	builtin_define_std ("unix");			\
+	builtin_define_std ("sun");			\
+	builtin_define ("__svr4__");			\
+	builtin_define ("__SVR4");			\
+	builtin_assert ("system=unix");			\
+	builtin_assert ("system=svr4");			\
+	/* For C++ we need to add some additional macro	\
+	   definitions required by the C++ standard	\
+	   library.  */					\
+	if (c_dialect_cxx ())				\
+	  {						\
+	    builtin_define ("__STDC_VERSION__=199901L");\
+	    builtin_define ("_XOPEN_SOURCE=600");	\
+	    builtin_define ("_LARGEFILE_SOURCE=1");	\
+	    builtin_define ("_LARGEFILE64_SOURCE=1");	\
+	    builtin_define ("__EXTENSIONS__");		\
+	  }						\
+	TARGET_SUB_OS_CPP_BUILTINS();			\
+    } while (0)
+
+#define SUBTARGET_OVERRIDE_OPTIONS			\
+  do {							\
+    solaris_override_options ();			\
+  } while (0)
+
+
+/* It's safe to pass -s always, even if -g is not used.  Those options are
+   handled by both Sun as and GNU as.  */
+#define ASM_SPEC_BASE \
+"%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} -s %(asm_cpu)"
+
+#define ASM_PIC_SPEC " %{fpic|fpie|fPIC|fPIE:-K PIC}"
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{!symbolic:\
+     %{pthreads|pthread:-lpthread} \
+     %{pthreads|pthread|fprofile-generate*:" LIB_TLS_SPEC "} \
+     %{p|pg:-ldl} -lc}"
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#endif
+
+#undef STARTFILE_ARCH32_SPEC
+#define STARTFILE_ARCH32_SPEC "%{ansi:values-Xc.o%s} \
+			    %{!ansi:values-Xa.o%s}"
+
+#undef STARTFILE_ARCH_SPEC
+#define STARTFILE_ARCH_SPEC STARTFILE_ARCH32_SPEC
+
+/* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{p:mcrt1.o%s} \
+                          %{!p: \
+	                    %{pg:gcrt1.o%s gmon.o%s} \
+                            %{!pg:crt1.o%s}}}} \
+			crti.o%s %(startfile_arch) \
+			crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   crtend.o%s crtn.o%s"
+
+#undef LINK_ARCH32_SPEC_BASE
+#define LINK_ARCH32_SPEC_BASE \
+  "%{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{!YP,*:%{p|pg:-Y P,%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/lib:%R/usr/lib} \
+	   %{!p:%{!pg:-Y P,%R/usr/ccs/lib:%R/lib:%R/usr/lib}}}"
+
+#undef LINK_ARCH32_SPEC
+#define LINK_ARCH32_SPEC LINK_ARCH32_SPEC_BASE
+
+#undef LINK_ARCH_SPEC
+#define LINK_ARCH_SPEC LINK_ARCH32_SPEC
+
+/* C++11 programs need -lrt for nanosleep.  */
+#define TIME_LIBRARY "rt"
+
+#ifndef USE_GLD
+/* With Sun ld, -rdynamic is a no-op.  */
+#define RDYNAMIC_SPEC ""
+#else
+/* GNU ld needs --export-dynamic to implement -rdynamic.  */
+#define RDYNAMIC_SPEC "--export-dynamic"
+#endif
+
+#ifndef USE_GLD
+/* With Sun ld, use mapfile to enforce direct binding to libgcc_s unwinder.  */
+#define LINK_LIBGCC_MAPFILE_SPEC \
+  "%{shared|shared-libgcc:-M %slibgcc-unwind.map}"
+#else
+/* GNU ld doesn't support direct binding.  */
+#define LINK_LIBGCC_MAPFILE_SPEC ""
+#endif
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{h*} %{v:-V} \
+   %{!shared:%{!static:%{rdynamic: " RDYNAMIC_SPEC "}}} \
+   %{static:-dn -Bstatic} \
+   %{shared:-G -dy %{!mimpure-text:-z text}} " LINK_LIBGCC_MAPFILE_SPEC " \
+   %{symbolic:-Bsymbolic -G -dy -z text} \
+   %(link_arch) \
+   %{Qy:} %{!Qn:-Qy}"
+
+/* Use --as-needed/-z ignore -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#ifdef USE_GLD
+/* Solaris 11 build 135+ implements dl_iterate_phdr.  GNU ld needs
+   --eh-frame-hdr to create the required .eh_frame_hdr sections.  */
+#if defined(HAVE_LD_EH_FRAME_HDR) && defined(TARGET_DL_ITERATE_PHDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif /* HAVE_LD_EH_FRAME && TARGET_DL_ITERATE_PHDR */
+#endif
+
+/* collect2.c can only parse GNU nm -n output.  Solaris nm needs -png to
+   produce the same format.  */
+#define NM_FLAGS "-png"
+
+/* The system headers under Solaris 2 are C++-aware since 2.0.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#define STDC_0_IN_SYSTEM_HEADERS 1
+
+/* Support Solaris-specific format checking for cmn_err.  */
+#define TARGET_N_FORMAT_TYPES 1
+#define TARGET_FORMAT_TYPES solaris_format_types
+
+/* #pragma init and #pragma fini are implemented on top of init and
+   fini attributes.  */
+#define SOLARIS_ATTRIBUTE_TABLE						\
+  { "init",      0, 0, true,  false,  false, NULL, false },		\
+  { "fini",      0, 0, true,  false,  false, NULL, false }
+
+/* Solaris-specific #pragmas are implemented on top of attributes.  Hook in
+   the bits from config/sol2.c.  */
+#define SUBTARGET_INSERT_ATTRIBUTES solaris_insert_attributes
+#define SUBTARGET_ATTRIBUTE_TABLE SOLARIS_ATTRIBUTE_TABLE
+
+/* Allow macro expansion in #pragma pack.  */
+#define HANDLE_PRAGMA_PACK_WITH_EXPANSION
+
+#define TARGET_CXX_DECL_MANGLING_CONTEXT solaris_cxx_decl_mangling_context
+
+/* Solaris/x86 as and gas support unquoted section names.  */
+#define SECTION_NAME_FORMAT	"%s"
+
+/* This is how to declare the size of a function.  For Solaris, we output
+   any .init or .fini entries here.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do								\
+    {								\
+      if (!flag_inhibit_size_directive)				\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+      solaris_output_init_fini (FILE, DECL);			\
+    }								\
+  while (0)
+
+/* Solaris as has a bug: a .common directive in .tbss or .tdata section
+   behaves as .tls_common rather than normal non-TLS .common.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_SUN_TLS						\
+	  && in_section							\
+	  && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS))	\
+	switch_to_section (bss_section);				\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+    }									\
+  while (0)
+
+#ifndef USE_GAS
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY solaris_assemble_visibility
+
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
+/* The Solaris assembler cannot grok .stabd directives.  */
+#undef NO_DBX_BNSYM_ENSYM
+#define NO_DBX_BNSYM_ENSYM 1
+#endif
+
+#ifndef USE_GLD
+/* The Solaris linker doesn't understand constructor priorities.  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY 0
+#endif
+
+/* Solaris has an implementation of __enable_execute_stack.  */
+#define HAVE_ENABLE_EXECUTE_STACK
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#define TARGET_POSIX_IO
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+extern GTY(()) tree solaris_pending_aligns;
+extern GTY(()) tree solaris_pending_inits;
+extern GTY(()) tree solaris_pending_finis;
diff --git a/gcc-4.9/gcc/config/sol2.opt b/gcc-4.9/gcc/config/sol2.opt
new file mode 100644
index 000000000..a5ae7c510
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2.opt
@@ -0,0 +1,41 @@
+; Options for the Solaris 2 port of the compiler
+;
+; Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+G
+Driver
+
+YP,
+Driver Joined
+
+Ym,
+Driver Joined
+
+mimpure-text
+Target Report
+Pass -z text to linker
+
+pthread
+Driver
+
+pthreads
+Driver
+
+rdynamic
+Driver
diff --git a/gcc-4.9/gcc/config/sparc/biarch64.h b/gcc-4.9/gcc/config/sparc/biarch64.h
new file mode 100644
index 000000000..12eed3d22
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/biarch64.h
@@ -0,0 +1,23 @@
+/* Definitions of target machine for GCC, for Sun SPARC.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify this in a cover file to provide bi-architecture (32/64) support.  */
+
+#define SPARC_BI_ARCH
diff --git a/gcc-4.9/gcc/config/sparc/constraints.md b/gcc-4.9/gcc/config/sparc/constraints.md
new file mode 100644
index 000000000..6295be0ef
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/constraints.md
@@ -0,0 +1,201 @@
+;; Constraint definitions for SPARC.
+;; Copyright (C) 2008-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;     B
+;;;    a        jkl    q  tuv xyz
+
+
+;; Register constraints
+
+(define_register_constraint "b" "(TARGET_V9 && TARGET_VIS ? EXTRA_FP_REGS : NO_REGS)"
+ "Any floating-point register in VIS mode")
+
+(define_register_constraint "c" "FPCC_REGS"
+ "Floating-point condition code register")
+
+(define_register_constraint "d" "(TARGET_V9 && TARGET_VIS ? FP_REGS : NO_REGS)"
+ "Lower floating-point register in VIS mode")
+
+;; In the non-V9 case, coerce V9 'e' class to 'f', so we can use 'e' in the
+;; MD file for V8 and V9.
+(define_register_constraint "e" "(TARGET_FPU ? (TARGET_V9 ? EXTRA_FP_REGS : FP_REGS) : NO_REGS)"
+ "Any floating-point register")
+
+(define_register_constraint "f" "(TARGET_FPU ? FP_REGS : NO_REGS)"
+ "Lower floating-point register")
+ 
+(define_register_constraint "h" "(TARGET_V9 && TARGET_V8PLUS ? I64_REGS : NO_REGS)"
+ "64-bit global or out register in V8+ mode")
+
+;; Floating-point constant constraints
+
+(define_constraint "G"
+ "The floating-point zero constant"
+ (and (match_code "const_double")
+      (match_test "const_zero_operand (op, mode)")))
+
+(define_constraint "C"
+ "The floating-point all-ones constant"
+ (and (match_code "const_double")
+      (match_test "const_all_ones_operand (op, mode)")))
+
+;; Integer constant constraints
+
+(define_constraint "A"
+ "Signed 5-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM5_P (ival)")))
+
+(define_constraint "H"
+ "Valid operand of double arithmetic operation"
+ (and (match_code "const_double")
+      (match_test "arith_double_operand (op, DImode)")))
+
+(define_constraint "I"
+ "Signed 13-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM13_P (ival)")))
+
+(define_constraint "J"
+ "The integer zero constant"
+ (and (match_code "const_int")
+      (match_test "ival == 0")))
+
+(define_constraint "K"
+ "Signed 32-bit constant that can be loaded with a sethi instruction"
+ (and (match_code "const_int")
+      (match_test "SPARC_SETHI32_P (ival)")))
+
+(define_constraint "L"
+ "Signed 11-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM11_P (ival)")))
+
+(define_constraint "M"
+ "Signed 10-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM10_P (ival)")))
+
+(define_constraint "N"
+ "Signed constant that can be loaded with a sethi instruction"
+ (and (match_code "const_int")
+      (match_test "SPARC_SETHI_P (ival)")))
+
+(define_constraint "O"
+ "The 4096 constant"
+ (and (match_code "const_int")
+      (match_test "ival == 4096")))
+
+(define_constraint "P"
+ "The integer constant -1"
+ (and (match_code "const_int")
+      (match_test "ival == -1")))
+
+(define_constraint "D"
+ "const_vector"
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT")))
+
+(define_constraint "Q"
+ "Floating-point constant that can be loaded with a sethi instruction"
+ (and (match_code "const_double")
+      (match_test "fp_sethi_p (op)")))
+
+(define_constraint "R"
+ "Floating-point constant that can be loaded with a move instruction"
+ (and (match_code "const_double")
+      (match_test "fp_mov_p (op)")))
+
+(define_constraint "S"
+ "Floating-point constant that can be loaded with a high/lo_sum sequence"
+ (and (match_code "const_double")
+      (match_test "fp_high_losum_p (op)")))
+
+;; Not needed in 64-bit mode
+(define_memory_constraint "T"
+ "Memory reference whose address is aligned to 8-byte boundary"
+ (and (match_test "TARGET_ARCH32")
+      (match_code "mem")
+      (match_test "memory_ok_for_ldd (op)")))
+
+;; This awkward register constraint is necessary because it is not
+;; possible to express the "must be even numbered register" condition
+;; using register classes.  The problem is that membership in a
+;; register class requires that all registers of a multi-regno
+;; register be included in the set.  It is add_to_hard_reg_set
+;; and in_hard_reg_set_p which populate and test regsets with these
+;; semantics.
+;;
+;; So this means that we would have to put both the even and odd
+;; register into the register class, which would not restrict things
+;; at all.
+;;
+;; Using a combination of GENERAL_REGS and HARD_REGNO_MODE_OK is not a
+;; full solution either.  In fact, even though IRA uses the macro
+;; HARD_REGNO_MODE_OK to calculate which registers are prohibited from
+;; use in certain modes, it still can allocate an odd hard register
+;; for DImode values.  This is due to how IRA populates the table
+;; ira_useful_class_mode_regs[][].  It suffers from the same problem
+;; as using a register class to describe this restriction.  Namely, it
+;; sets both the odd and even part of an even register pair in the
+;; regset.  Therefore IRA can and will allocate odd registers for
+;; DImode values on 32-bit.
+;;
+;; There are legitimate cases where DImode values can end up in odd
+;; hard registers, the most notable example is argument passing.
+;;
+;; What saves us is reload and the DImode splitters.  Both are
+;; necessary.  The odd register splitters cannot match if, for
+;; example, we have a non-offsetable MEM.  Reload will notice this
+;; case and reload the address into a single hard register.
+;;
+;; The real downfall of this awkward register constraint is that it does
+;; not evaluate to a true register class like a bonafide use of
+;; define_register_constraint would.  This currently means that we cannot
+;; use LRA on Sparc, since the constraint processing of LRA really depends
+;; upon whether an extra constraint is for registers or not.  It uses
+;; REG_CLASS_FROM_CONSTRAINT, and checks it against NO_REGS.
+(define_constraint "U"
+ "Pseudo-register or hard even-numbered integer register"
+ (and (match_test "TARGET_ARCH32")
+      (match_code "reg")
+      (ior (match_test "REGNO (op) < FIRST_PSEUDO_REGISTER")
+	   (not (match_test "reload_in_progress && reg_renumber [REGNO (op)] < 0")))
+      (match_test "register_ok_for_ldd (op)")))
+
+;; Equivalent to 'T' but available in 64-bit mode
+(define_memory_constraint "W"
+ "Memory reference for 'e' constraint floating-point register"
+ (and (match_code "mem")
+      (match_test "memory_ok_for_ldd (op)")))
+
+(define_memory_constraint "w"
+  "A memory with only a base register"
+  (match_operand 0 "mem_noofs_operand"))
+
+(define_constraint "Y"
+ "The vector zero constant"
+ (and (match_code "const_vector")
+      (match_test "const_zero_operand (op, mode)")))
+
+(define_constraint "Z"
+ "The vector all ones constant"
+ (and (match_code "const_vector")
+      (match_test "const_all_ones_operand (op, mode)")))
diff --git a/gcc-4.9/gcc/config/sparc/cypress.md b/gcc-4.9/gcc/config/sparc/cypress.md
new file mode 100644
index 000000000..848adca3b
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/cypress.md
@@ -0,0 +1,50 @@
+;; Scheduling description for SPARC Cypress.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The Cypress is a pretty simple single-issue processor.
+
+(define_automaton "cypress_0,cypress_1")
+
+(define_cpu_unit "cyp_memory, cyp_fpalu" "cypress_0")
+(define_cpu_unit "cyp_fpmds" "cypress_1")
+
+(define_insn_reservation "cyp_load" 2
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "load,sload,fpload"))
+  "cyp_memory, nothing")
+
+(define_insn_reservation "cyp_fp_alu" 5
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fp,fpmove"))
+  "cyp_fpalu, nothing*3")
+
+(define_insn_reservation "cyp_fp_mult" 7
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpmul"))
+  "cyp_fpmds, nothing*5")
+
+(define_insn_reservation "cyp_fp_div" 37
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpdivs,fpdivd"))
+  "cyp_fpmds, nothing*35")
+
+(define_insn_reservation "cyp_fp_sqrt" 63
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "cyp_fpmds, nothing*61")
diff --git a/gcc-4.9/gcc/config/sparc/default-64.h b/gcc-4.9/gcc/config/sparc/default-64.h
new file mode 100644
index 000000000..e4342a4a0
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/default-64.h
@@ -0,0 +1,22 @@
+/* Definitions of target machine for GCC, for bi-arch SPARC,
+   defaulting to 64-bit code generation.
+
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_64BIT_DEFAULT 1
diff --git a/gcc-4.9/gcc/config/sparc/driver-sparc.c b/gcc-4.9/gcc/config/sparc/driver-sparc.c
new file mode 100644
index 000000000..7896561dc
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/driver-sparc.c
@@ -0,0 +1,172 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+static const struct cpu_names {
+  const char *const name;
+  const char *const cpu;
+} cpu_names[] = {
+#if defined __sun__ && defined __svr4__
+  { "TMS390S10",	"supersparc" },	/* Texas Instruments microSPARC I */
+  { "TMS390Z50",	"supersparc" },	/* Texas Instruments SuperSPARC I */
+  { "TMS390Z55",	"supersparc" },	/* Texas Instruments
+					   SuperSPARC I with SuperCache */
+  { "MB86904",		"supersparc" },	/* Fujitsu microSPARC II */
+  { "MB86907",		"supersparc" },	/* Fujitsu TurboSPARC */
+  { "RT623",		"hypersparc" },	/* Ross hyperSPARC */
+  { "RT625",		"hypersparc" },
+  { "RT626",		"hypersparc" },
+  { "UltraSPARC-I",	"ultrasparc" },
+  { "UltraSPARC-II",	"ultrasparc" },
+  { "UltraSPARC-IIe",	"ultrasparc" },
+  { "UltraSPARC-IIi",	"ultrasparc" },
+  { "SPARC64-III",	"ultrasparc" },
+  { "SPARC64-IV",	"ultrasparc" },
+  { "UltraSPARC-III",	"ultrasparc3" },
+  { "UltraSPARC-III+",	"ultrasparc3" },
+  { "UltraSPARC-IIIi",	"ultrasparc3" },
+  { "UltraSPARC-IIIi+",	"ultrasparc3" },
+  { "UltraSPARC-IV",	"ultrasparc3" },
+  { "UltraSPARC-IV+",	"ultrasparc3" },
+  { "SPARC64-V",	"ultrasparc3" },
+  { "SPARC64-VI",	"ultrasparc3" },
+  { "SPARC64-VII",	"ultrasparc3" },
+  { "UltraSPARC-T1",	"niagara" },
+  { "UltraSPARC-T2",	"niagara2" },
+  { "UltraSPARC-T2",	"niagara2" },
+  { "UltraSPARC-T2+",	"niagara2" },
+  { "SPARC-T3",		"niagara3" },
+  { "SPARC-T4",		"niagara4" },
+#else
+  { "SuperSparc",	"supersparc" },
+  { "HyperSparc",	"hypersparc" },
+  { "SpitFire",		"ultrasparc" },
+  { "BlackBird",	"ultrasparc" },
+  { "Sabre",		"ultrasparc" },
+  { "Hummingbird",	"ultrasparc" },
+  { "Cheetah",		"ultrasparc3" },
+  { "Jalapeno",		"ultrasparc3" },
+  { "Jaguar",		"ultrasparc3" },
+  { "Panther",		"ultrasparc3" },
+  { "Serrano",		"ultrasparc3" },
+  { "UltraSparc T1",	"niagara" },
+  { "UltraSparc T2",	"niagara2" },
+  { "UltraSparc T3",	"niagara3" },
+  { "UltraSparc T4",	"niagara4" },
+#endif
+  { NULL,	NULL }
+  };
+
+#if defined __sun__ && defined __svr4__
+#include <kstat.h>
+#endif
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "cpu" or "tune" as argument depending on if -mcpu=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-mcpu=ultrasparc3" on an UltraSPARC III for
+   -mcpu=native.  If the routine can't detect a known processor,
+   the -mcpu or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu = NULL;
+#if defined __sun__ && defined __svr4__
+  char *buf = NULL;
+  kstat_ctl_t *kc;
+  kstat_t *ksp;
+  kstat_named_t *brand = NULL;
+#else
+  char buf[128];
+  FILE *f;
+#endif
+  int i;
+
+  if (argc < 1)
+    return NULL;
+
+  if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune"))
+    return NULL;
+
+#if defined __sun__ && defined __svr4__
+  kc = kstat_open ();
+  if (kc != NULL)
+    {
+      ksp = kstat_lookup (kc, CONST_CAST2 (char *, const char *, "cpu_info"),
+			  -1, NULL);
+      if (ksp != NULL
+	  && kstat_read (kc, ksp, NULL) != -1
+	  && ksp->ks_type == KSTAT_TYPE_NAMED)
+	brand = (kstat_named_t *)
+	  kstat_data_lookup (ksp, CONST_CAST2 (char *, const char *, "brand"));
+      /* "brand" was only introduced in Solaris 10.  */
+      if (brand == NULL)
+	  brand = (kstat_named_t *)
+	    kstat_data_lookup (ksp, CONST_CAST2 (char *, const char *,
+						 "implementation"));
+      /* KSTAT_DATA_STRING was introduced in Solaris 9.  */
+#ifdef KSTAT_DATA_STRING
+      if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
+	buf = KSTAT_NAMED_STR_PTR (brand);
+#else
+      if (brand != NULL && brand->data_type == KSTAT_DATA_CHAR)
+	buf = brand->value.c;
+#endif
+    }
+  kstat_close (kc);
+
+  for (i = 0; cpu_names[i].name != NULL; i++)
+    if (strcmp (buf, cpu_names[i].name) == 0)
+      cpu = cpu_names[i].cpu;
+#else
+  f = fopen ("/proc/cpuinfo", "r");
+  if (f == NULL)
+    return NULL;
+
+  while (fgets (buf, sizeof (buf), f) != NULL)
+    if (strncmp (buf, "cpu\t\t:", sizeof ("cpu\t\t:") - 1) == 0)
+      {
+        for (i = 0; cpu_names [i].name; i++)
+          if (strstr (buf, cpu_names [i].name) != NULL)
+	    {
+	      cpu = cpu_names [i].cpu;
+	      break;
+	    }
+	break;
+      }
+
+  fclose (f);
+#endif
+
+  if (cpu == NULL)
+    return NULL;
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
diff --git a/gcc-4.9/gcc/config/sparc/freebsd.h b/gcc-4.9/gcc/config/sparc/freebsd.h
new file mode 100644
index 000000000..371312bb4
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/freebsd.h
@@ -0,0 +1,149 @@
+/* Definitions for Sun SPARC64 running FreeBSD using the ELF format
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+/* FreeBSD needs the platform name (sparc64) defined.
+   Emacs etc needs to know if the arch is 64 or 32-bits.
+   This also selects which targets are available via -mcpu.  */
+
+#undef  FBSD_TARGET_CPU_CPP_BUILTINS
+#define FBSD_TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__sparc64__");		\
+      builtin_define ("__sparc__");		\
+      builtin_define ("__sparc_v9__");		\
+      builtin_define ("__sparcv9");		\
+    }						\
+  while (0)
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
+#define LINK_SPEC "%(link_arch)						\
+  %{!mno-relax:%{!r:-relax}}						\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{symbolic:-Bsymbolic}						\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* Earlier headers may get this wrong for FreeBSD.
+   We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#undef  LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#if defined(__arch64__) || defined(__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Definitions for 64-bit SPARC running systems with ELF. */
+
+#define TARGET_ELF		1
+
+/* XXX */
+/* A 64 bit v9 compiler with stack-bias,
+   in a Medium/mid code model environment.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_64BIT + MASK_PTR64 /* + MASK_FASTER_STRUCTS */ \
+   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU \
+   + MASK_LONG_DOUBLE_128 /* + MASK_HARD_QUAD */)
+
+/* The default code model.  */
+#undef  SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL	CM_MEDLOW
+
+#define HAVE_ENABLE_EXECUTE_STACK
+
+/************************[  Assembler stuff  ]********************************/
+
+#undef	LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* XXX2 */
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf (LABEL, "*.L%s%lu", PREFIX, (unsigned long)(NUM))
+
+
+/************************[  Debugger stuff  ]*********************************/
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+
+#undef  DBX_CONTIN_CHAR
+#define DBX_CONTIN_CHAR	'?'
+
+/* DWARF bits.  */
+
+/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. 
+   Obviously the Dwarf2 folks havn't tried to actually build systems
+   with their spec.  On a 64-bit system, only 64-bit relocs become
+   RELATIVE relocations.  */
+
+/* #define DWARF_OFFSET_SIZE PTR_SIZE */
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC						\
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "	\
+  FBSD_ENDFILE_SPEC
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
diff --git a/gcc-4.9/gcc/config/sparc/hypersparc.md b/gcc-4.9/gcc/config/sparc/hypersparc.md
new file mode 100644
index 000000000..ba10fb037
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/hypersparc.md
@@ -0,0 +1,82 @@
+;; Scheduling description for HyperSPARC.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The HyperSPARC is a dual-issue processor.  It is not all that fancy.
+
+;; ??? There are some things not modelled.  For example, sethi+or
+;; ??? coming right after each other are specifically identified and
+;; ??? dual-issued by the processor.  Similarly for sethi+ld[reg+lo].
+;; ??? Actually, to be more precise that rule is sort of modelled now.
+
+(define_automaton "hypersparc_0,hypersparc_1")
+
+;; HyperSPARC/sparclite86x scheduling
+
+(define_cpu_unit "hs_memory,hs_branch,hs_shift,hs_fpalu" "hypersparc_0")
+(define_cpu_unit "hs_fpmds" "hypersparc_1")
+
+(define_insn_reservation "hs_load" 1
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "load,sload,fpload"))
+  "hs_memory")
+
+(define_insn_reservation "hs_store" 2
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "store,fpstore"))
+  "hs_memory, nothing")
+
+(define_insn_reservation "hs_slbranch" 1
+  (and (eq_attr "cpu" "sparclite86x")
+    (eq_attr "type" "branch"))
+  "hs_branch")
+
+(define_insn_reservation "hs_slshift" 1
+  (and (eq_attr "cpu" "sparclite86x")
+    (eq_attr "type" "shift"))
+  "hs_shift")
+
+(define_insn_reservation "hs_fp_alu" 1
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fp,fpmove,fpcmp"))
+  "hs_fpalu")
+
+(define_insn_reservation "hs_fp_mult" 1
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpmul"))
+  "hs_fpmds")
+
+(define_insn_reservation "hs_fp_divs" 8
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpdivs"))
+  "hs_fpmds*6, nothing*2")
+
+(define_insn_reservation "hs_fp_divd" 12
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpdivd"))
+  "hs_fpmds*10, nothing*2")
+
+(define_insn_reservation "hs_fp_sqrt" 17
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "hs_fpmds*15, nothing*2")
+
+(define_insn_reservation "hs_imul" 17
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "imul"))
+  "hs_fpmds*15, nothing*2")
diff --git a/gcc-4.9/gcc/config/sparc/leon.md b/gcc-4.9/gcc/config/sparc/leon.md
new file mode 100644
index 000000000..82b6a0d96
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/leon.md
@@ -0,0 +1,64 @@
+;; Scheduling description for LEON.
+;;   Copyright (C) 2010-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Leon is a single-issue processor.
+
+(define_automaton "leon")
+
+(define_cpu_unit "leon_memory" "leon")
+
+(define_insn_reservation "leon_load" 1
+  (and (eq_attr "cpu" "leon") (eq_attr "type" "load,sload"))
+  "leon_memory")
+
+;; Use a double reservation to work around the load pipeline hazard on UT699.
+(define_insn_reservation "leon3_load" 1
+  (and (eq_attr "cpu" "leon3") (eq_attr "type" "load,sload"))
+  "leon_memory*2")
+
+(define_insn_reservation "leon_store" 2
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "store"))
+  "leon_memory*2")
+
+;; This describes Gaisler Research's FPU
+
+(define_automaton "grfpu")
+
+(define_cpu_unit "grfpu_alu" "grfpu")
+(define_cpu_unit "grfpu_ds" "grfpu")
+
+(define_insn_reservation "leon_fp_alu" 4
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fp,fpcmp,fpmul"))
+  "grfpu_alu, nothing*3")
+
+(define_insn_reservation "leon_fp_divs" 16
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivs"))
+  "grfpu_ds*14, nothing*2")
+
+(define_insn_reservation "leon_fp_divd" 17
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivd"))
+  "grfpu_ds*15, nothing*2")
+
+(define_insn_reservation "leon_fp_sqrts" 24
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrts"))
+  "grfpu_ds*22, nothing*2")
+
+(define_insn_reservation "leon_fp_sqrtd" 25
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrtd"))
+  "grfpu_ds*23, nothing*2")
diff --git a/gcc-4.9/gcc/config/sparc/linux.h b/gcc-4.9/gcc/config/sparc/linux.h
new file mode 100644
index 000000000..c54ba2cb5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/linux.h
@@ -0,0 +1,169 @@
+/* Definitions for SPARC running Linux-based GNU systems with ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Eddie C. Dost (ecd@skynet.be)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+      if (TARGET_LONG_DOUBLE_128)       	\
+	builtin_define ("__LONG_DOUBLE_128__");	\
+    }						\
+  while (0)
+
+/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
+   the GNU/Linux magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU/Linux "finalizer" file, `crtn.o'.  */
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
+   %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+
+/* -mcpu=native handling only makes sense with compiler running on
+   a SPARC chip.  */
+#if defined(__sparc__) && defined(__linux__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#undef	CC1_SPEC
+#define	CC1_SPEC "%{profile:-p} \
+"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+ 
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+  
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC \
+"%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Provide a LINK_SPEC appropriate for GNU/Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "-m elf32_sparc %{shared:-shared} \
+  %{!mno-relax:%{!r:-relax}} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s \
+%{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{!.c:%{findirect-dispatch:-K PIC}} \
+%(asm_cpu) %(asm_relax)"
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  fputs ("\t.local\t", (FILE));		\
+  assemble_name ((FILE), (NAME));					\
+  putc ('\n', (FILE));							\
+  ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP "\t.common\t"
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+#undef DITF_CONVERSION_LIBFUNCS
+#define DITF_CONVERSION_LIBFUNCS 1
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
+   TMO, and TMO in multiprocessor mode.  But they reserve the right to
+   change their minds.  */
+#undef SPARC_RELAXED_ORDERING
+#define SPARC_RELAXED_ORDERING true
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* sparc glibc provides __stack_chk_guard in [%g7 + 0x14].  */
+#define TARGET_THREAD_SSP_OFFSET	0x14
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* We use glibc _mcount for profiling.  */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
diff --git a/gcc-4.9/gcc/config/sparc/linux64.h b/gcc-4.9/gcc/config/sparc/linux64.h
new file mode 100644
index 000000000..f00fb42ff
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/linux64.h
@@ -0,0 +1,284 @@
+/* Definitions for 64-bit SPARC running Linux-based GNU systems with ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by David S. Miller (davem@caip.rutgers.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+      if (TARGET_ARCH64)			\
+        builtin_define ("_LONGLONG");		\
+      if (TARGET_ARCH32				\
+          && TARGET_LONG_DOUBLE_128)		\
+	builtin_define ("__LONG_DOUBLE_128__");	\
+    }						\
+  while (0)
+
+/* On Linux, the combination sparc64-* --with-cpu=v8 is supported and
+   selects a 32-bit compiler.  */
+#if defined(TARGET_64BIT_DEFAULT) && TARGET_CPU_DEFAULT >= TARGET_CPU_v9
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_STACK_BIAS + \
+   MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+#endif
+
+/* This must be v9a not just v9 because by default we enable
+   -mvis.  */
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
+
+/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
+   the GNU/Linux magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU/Linux "finalizer" file, `crtn.o'.  */
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
+   %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+
+/* The default code model.  */
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDLOW
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#if defined(__arch64__) || defined(__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "\
+%{posix:-D_POSIX_SOURCE} \
+%{pthread:-D_REENTRANT} \
+"
+
+/* Provide a LINK_SPEC appropriate for GNU/Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux.so.2"
+
+#ifdef SPARC_BI_ARCH
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_arch32",       LINK_ARCH32_SPEC },              \
+  { "link_arch64",       LINK_ARCH64_SPEC },              \
+  { "link_arch_default", LINK_ARCH_DEFAULT_SPEC },	  \
+  { "link_arch",	 LINK_ARCH_SPEC },
+
+#define LINK_ARCH32_SPEC "-m elf32_sparc %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "} \
+      %{static:-static}} \
+"
+
+#define LINK_ARCH64_SPEC "-m elf64_sparc %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "} \
+      %{static:-static}} \
+"
+
+#define LINK_ARCH_SPEC "\
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
+#undef  LINK_SPEC
+#define LINK_SPEC "\
+%(link_arch) \
+%{!mno-relax:%{!r:-relax}} \
+"
+
+/* -mcpu=native handling only makes sense with compiler running on
+   a SPARC chip.  */
+#if defined(__sparc__) && defined(__linux__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+
+#undef	CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC "%{profile:-p} \
+%{m32:%{m64:%emay not use both -m32 and -m64}} \
+%{m64:-mptr64 -mstack-bias -mlong-double-128 \
+  %{!mcpu*:-mcpu=ultrasparc} \
+  %{!mno-vis:%{!mcpu=v9:-mvis}}} \
+"
+#else
+#define CC1_SPEC "%{profile:-p} \
+%{m32:%{m64:%emay not use both -m32 and -m64}} \
+%{m32:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
+  %{!mcpu*:-mcpu=cypress}} \
+%{mv8plus:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
+  %{!mcpu*:-mcpu=v9}} \
+%{!m32:%{!mcpu*:-mcpu=ultrasparc}} \
+%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}} \
+"
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.
+   --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu
+     are specified.
+   In the SPARC_BI_ARCH compiler we cannot pass %{!mcpu=*:-mcpu=%(VALUE)}
+   here, otherwise say -mcpu=v7 would be passed even when -m64.
+   CC1_SPEC above takes care of this instead.  */
+#undef OPTION_DEFAULT_SPECS
+#if DEFAULT_ARCH32_P
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m64:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#else
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m32:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#else /* !SPARC_BI_ARCH */
+
+#undef LINK_SPEC
+#define LINK_SPEC "-m elf64_sparc -Y P,%R/usr/lib64 %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "} \
+    %{static:-static}} \
+%{!mno-relax:%{!r:-relax}} \
+"
+
+#endif /* !SPARC_BI_ARCH */
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s \
+%{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{!.c:%{findirect-dispatch:-K PIC}} \
+%(asm_cpu) %(asm_arch) %(asm_relax)"
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  fputs ("\t.local\t", (FILE));		\
+  assemble_name ((FILE), (NAME));					\
+  putc ('\n', (FILE));							\
+  ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP "\t.common\t"
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* DWARF bits.  */
+
+/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. 
+   Obviously the Dwarf2 folks haven't tried to actually build systems
+   with their spec.  On a 64-bit system, only 64-bit relocs become
+   RELATIVE relocations.  */
+
+/* #define DWARF_OFFSET_SIZE PTR_SIZE */
+
+#undef DITF_CONVERSION_LIBFUNCS
+#define DITF_CONVERSION_LIBFUNCS 1
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
+   TMO, and TMO in multiprocessor mode.  But they reserve the right to
+   change their minds.  */
+#undef SPARC_RELAXED_ORDERING
+#define SPARC_RELAXED_ORDERING true
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* sparc glibc provides __stack_chk_guard in [%g7 + 0x14],
+   sparc64 glibc provides it at [%g7 + 0x28].  */
+#define TARGET_THREAD_SSP_OFFSET	(TARGET_ARCH64 ? 0x28 : 0x14)
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* We use glibc _mcount for profiling.  */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
diff --git a/gcc-4.9/gcc/config/sparc/long-double-switch.opt b/gcc-4.9/gcc/config/sparc/long-double-switch.opt
new file mode 100644
index 000000000..50b69834b
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/long-double-switch.opt
@@ -0,0 +1,27 @@
+; Options for the SPARC port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mlong-double-128
+Target Report RejectNegative Mask(LONG_DOUBLE_128)
+Use 128-bit long double
+
+mlong-double-64
+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double
diff --git a/gcc-4.9/gcc/config/sparc/netbsd-elf.h b/gcc-4.9/gcc/config/sparc/netbsd-elf.h
new file mode 100644
index 000000000..949d333f6
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/netbsd-elf.h
@@ -0,0 +1,226 @@
+/* Definitions of target machine for GCC, for ELF on NetBSD/sparc
+   and NetBSD/sparc64.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Matthew Green (mrg@eterna.com.au).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      NETBSD_OS_CPP_BUILTINS_ELF();			\
+      if (TARGET_ARCH64)				\
+	{						\
+	  builtin_define ("__sparc64__");		\
+	  builtin_define ("__sparc_v9__");		\
+	  builtin_define ("__sparcv9");			\
+	}						\
+      else						\
+	builtin_define ("__sparc");			\
+      builtin_define ("__sparc__");			\
+    }							\
+  while (0)
+
+/* CPP defines used by all NetBSD targets.  */
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "%(netbsd_cpp_spec)"
+
+/* SIZE_TYPE and PTRDIFF_TYPE are wrong from sparc/sparc.h.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+#undef DBX_CONTIN_CHAR
+#define DBX_CONTIN_CHAR '?'
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM))
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} \
+%(asm_cpu) %(asm_arch) %(asm_relax)"
+
+#undef STDC_0_IN_SYSTEM_HEADERS
+
+#define HAVE_ENABLE_EXECUTE_STACK
+
+/* Below here exists the merged NetBSD/sparc & NetBSD/sparc64 compiler
+   description, allowing one to build 32-bit or 64-bit applications
+   on either.  We define the sparc & sparc64 versions of things,
+   occasionally a neutral version (should be the same as "netbsd-elf.h")
+   and then based on SPARC_BI_ARCH, DEFAULT_ARCH32_P, and TARGET_CPU_DEFAULT,
+   we choose the correct version.  */
+
+/* We use the default NetBSD ELF STARTFILE_SPEC and ENDFILE_SPEC
+   definitions, even for the SPARC_BI_ARCH compiler, because NetBSD does
+   not have a default place to find these libraries..  */
+
+/* TARGET_CPU_DEFAULT is set in Makefile.in.  We test for 64-bit default
+   platform here.  */
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+/* A 64 bit v9 compiler with stack-bias,
+   in a Medium/Low code model environment.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \
+   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDANY
+
+#endif
+
+/* CC1_SPEC for NetBSD/sparc.  */
+#define CC1_SPEC32 \
+ "%{m32:%{m64:%emay not use both -m32 and -m64}} \
+  %{m64: \
+    -mptr64 -mstack-bias -mno-v8plus -mlong-double-128 \
+    %{!mcpu*:%{!mv8plus:-mcpu=ultrasparc}} \
+    %{!mno-vis:%{!mcpu=v9:-mvis}} \
+    %{p:-mcmodel=medlow} \
+    %{pg:-mcmodel=medlow}}"
+
+#define CC1_SPEC64 \
+ "%{m32:%{m64:%emay not use both -m32 and -m64}} \
+  %{m32: \
+    -mptr32 -mno-stack-bias \
+    %{!mlong-double-128:-mlong-double-64} \
+    %{!mcpu*:%{!mv8plus:-mcpu=cypress}}} \
+  %{!m32: \
+    %{p:-mcmodel=medlow} \
+    %{pg:-mcmodel=medlow}}"
+
+/* Make sure we use the right output format.  Pick a default and then
+   make sure -m32/-m64 switch to the right one.  */
+
+#define LINK_ARCH32_SPEC "-m elf32_sparc"
+
+#define LINK_ARCH64_SPEC "-m elf64_sparc"
+
+#define LINK_ARCH_SPEC \
+ "%{m32:%(link_arch32)} \
+  %{m64:%(link_arch64)} \
+  %{!m32:%{!m64:%(link_arch_default)}}"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%(link_arch) \
+  %{!mno-relax:%{!r:-relax}} \
+  %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#if DEFAULT_ARCH32_P
+#define LINK_ARCH_DEFAULT_SPEC LINK_ARCH32_SPEC
+#else
+#define LINK_ARCH_DEFAULT_SPEC LINK_ARCH64_SPEC
+#endif
+
+/* What extra spec entries do we need?  */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_arch32",		LINK_ARCH32_SPEC }, \
+  { "link_arch64",		LINK_ARCH64_SPEC }, \
+  { "link_arch_default",	LINK_ARCH_DEFAULT_SPEC }, \
+  { "link_arch",		LINK_ARCH_SPEC }, \
+  { "netbsd_cpp_spec",		NETBSD_CPP_SPEC }, \
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF }, \
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },
+
+
+/* Build a compiler that supports -m32 and -m64?  */
+
+#ifdef SPARC_BI_ARCH
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+#if defined(__arch64__) || defined(__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+#undef  CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC CC1_SPEC32
+#else
+#define CC1_SPEC CC1_SPEC64
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#else	/* SPARC_BI_ARCH */
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+
+#undef  CC1_SPEC
+#define CC1_SPEC CC1_SPEC64
+
+#else	/* TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+	|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc */
+
+/* A 32-bit only compiler.  NetBSD don't support 128 bit `long double'
+   for 32-bit code, unlike Solaris.  */
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+
+#undef  CC1_SPEC
+#define CC1_SPEC CC1_SPEC32
+
+#endif	/* TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+	|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc */
+
+#endif	/* SPARC_BI_ARCH */
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
diff --git a/gcc-4.9/gcc/config/sparc/niagara.md b/gcc-4.9/gcc/config/sparc/niagara.md
new file mode 100644
index 000000000..d0d2d39bd
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/niagara.md
@@ -0,0 +1,118 @@
+;; Scheduling description for Niagara.
+;;   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Niagara is a single-issue processor.
+
+(define_automaton "niagara_0")
+
+(define_cpu_unit "niag_pipe" "niagara_0")
+
+(define_insn_reservation "niag_5cycle" 5
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "multi,flushw,iflush,trap"))
+  "niag_pipe*5")
+
+(define_insn_reservation "niag_4cycle" 4
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "savew"))
+  "niag_pipe*4")
+
+/* Most basic operations are single-cycle. */
+(define_insn_reservation "niag_ialu" 1
+ (and (eq_attr "cpu" "niagara")
+   (eq_attr "type" "ialu,shift,compare,cmove"))
+ "niag_pipe")
+
+(define_insn_reservation "niag_imul" 11
+ (and (eq_attr "cpu" "niagara")
+   (eq_attr "type" "imul"))
+ "niag_pipe*11")
+
+(define_insn_reservation "niag_idiv" 72
+ (and (eq_attr "cpu" "niagara")
+   (eq_attr "type" "idiv"))
+ "niag_pipe*72")
+
+(define_insn_reservation "niag_branch" 3
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch"))
+  "niag_pipe*3")
+
+(define_insn_reservation "niag_3cycle_load" 3
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "load"))
+  "niag_pipe*3")
+
+(define_insn_reservation "niag_9cycle_load" 9
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpload"))
+  "niag_pipe*9")
+
+(define_insn_reservation "niag_1cycle_store" 1
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "store"))
+  "niag_pipe")
+
+(define_insn_reservation "niag_8cycle_store" 8
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpstore"))
+  "niag_pipe*8")
+
+/* Things incorrectly modelled here:
+ *  FPADD{s,d}: 26 cycles
+ *  FPSUB{s,d}: 26 cycles
+ *  FABSD: 26 cycles
+ *  F{s,d}TO{s,d}: 26 cycles
+ *  F{s,d}TO{i,x}: 26 cycles
+ *  FSMULD: 29 cycles
+ */
+(define_insn_reservation "niag_fmov" 8
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpmove,fpcmove,fpcrmove"))
+  "niag_pipe*8")
+
+(define_insn_reservation "niag_fpcmp" 26
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpcmp"))
+  "niag_pipe*26")
+
+(define_insn_reservation "niag_fmult" 29
+ (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpmul"))
+  "niag_pipe*29")
+
+(define_insn_reservation "niag_fdivs" 54
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpdivs"))
+  "niag_pipe*54")
+
+(define_insn_reservation "niag_fdivd" 83
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpdivd"))
+  "niag_pipe*83")
+
+/* Things incorrectly modelled here:
+ *  FPADD{16,32}: 10 cycles
+ *  FPSUB{16,32}: 10 cycles
+ *  FALIGNDATA: 10 cycles
+ */
+(define_insn_reservation "niag_vis" 8
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fga,visl,vismv,fgm_pack,fgm_mul,pdist,edge,edgen,gsr,array"))
+  "niag_pipe*8")
diff --git a/gcc-4.9/gcc/config/sparc/niagara2.md b/gcc-4.9/gcc/config/sparc/niagara2.md
new file mode 100644
index 000000000..e4aa87bd9
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/niagara2.md
@@ -0,0 +1,120 @@
+;; Scheduling description for Niagara-2 and Niagara-3.
+;;   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>. 
+
+;; Niagara-2 and Niagara-3 are single-issue processors.
+
+(define_automaton "niagara2_0")
+
+(define_cpu_unit "niag2_pipe" "niagara2_0")
+
+(define_insn_reservation "niag2_25cycle" 25
+  (and (eq_attr "cpu" "niagara2,niagara3")
+    (eq_attr "type" "flushw"))
+  "niag2_pipe*25")
+
+(define_insn_reservation "niag2_5cycle" 5
+  (and (eq_attr "cpu" "niagara2,niagara3")
+    (eq_attr "type" "multi,flushw,iflush,trap"))
+  "niag2_pipe*5")
+
+(define_insn_reservation "niag2_6cycle" 4
+  (and (eq_attr "cpu" "niagara2,niagara3")
+    (eq_attr "type" "savew"))
+  "niag2_pipe*4")
+
+/* Most basic operations are single-cycle. */
+(define_insn_reservation "niag2_ialu" 1
+ (and (eq_attr "cpu" "niagara2,niagara3")
+   (eq_attr "type" "ialu,shift,compare,cmove"))
+ "niag2_pipe")
+
+(define_insn_reservation "niag2_imul" 5
+ (and (eq_attr "cpu" "niagara2")
+   (eq_attr "type" "imul"))
+ "niag2_pipe*5")
+
+(define_insn_reservation "niag3_imul" 9
+ (and (eq_attr "cpu" "niagara3")
+   (eq_attr "type" "imul"))
+ "niag2_pipe*9")
+
+(define_insn_reservation "niag2_idiv" 26
+ (and (eq_attr "cpu" "niagara2")
+   (eq_attr "type" "idiv"))
+ "niag2_pipe*26")
+
+(define_insn_reservation "niag3_idiv" 31
+ (and (eq_attr "cpu" "niagara3")
+   (eq_attr "type" "idiv"))
+ "niag2_pipe*31")
+
+(define_insn_reservation "niag2_branch" 5
+  (and (eq_attr "cpu" "niagara2,niagara3")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch"))
+  "niag2_pipe*5")
+
+(define_insn_reservation "niag2_3cycle_load" 3
+  (and (eq_attr "cpu" "niagara2,niagara3")
+    (eq_attr "type" "load,fpload"))
+  "niag2_pipe*3")
+
+(define_insn_reservation "niag2_1cycle_store" 1
+  (and (eq_attr "cpu" "niagara2,niagara3")
+    (eq_attr "type" "store,fpstore"))
+  "niag2_pipe")
+
+(define_insn_reservation "niag2_fp" 6
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fpmove,fpcmove,fpcrmove,fpcmp,fpmul"))
+  "niag2_pipe*6")
+
+(define_insn_reservation "niag3_fp" 9
+  (and (eq_attr "cpu" "niagara3")
+    (eq_attr "type" "fpmove,fpcmove,fpcrmove,fpcmp,fpmul"))
+  "niag2_pipe*9")
+
+(define_insn_reservation "niag2_fdivs" 19
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fpdivs"))
+  "niag2_pipe*19")
+
+(define_insn_reservation "niag3_fdivs" 23
+  (and (eq_attr "cpu" "niagara3")
+    (eq_attr "type" "fpdivs"))
+  "niag2_pipe*23")
+
+(define_insn_reservation "niag2_fdivd" 33
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fpdivd"))
+  "niag2_pipe*33")
+
+(define_insn_reservation "niag3_fdivd" 37
+  (and (eq_attr "cpu" "niagara3")
+    (eq_attr "type" "fpdivd"))
+  "niag2_pipe*37")
+
+(define_insn_reservation "niag2_vis" 6
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,edge,edgen,array,gsr"))
+  "niag2_pipe*6")
+
+(define_insn_reservation "niag3_vis" 9
+  (and (eq_attr "cpu" "niagara3")
+    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,array,gsr"))
+  "niag2_pipe*9")
diff --git a/gcc-4.9/gcc/config/sparc/niagara4.md b/gcc-4.9/gcc/config/sparc/niagara4.md
new file mode 100644
index 000000000..6e9fde515
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/niagara4.md
@@ -0,0 +1,114 @@
+;; Scheduling description for Niagara-4
+;;   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "niagara4_0")
+
+(define_cpu_unit "n4_slot0,n4_slot1,n4_slot2" "niagara4_0")
+(define_reservation "n4_single_issue" "n4_slot0 + n4_slot1 + n4_slot2")
+
+(define_cpu_unit "n4_load_store" "niagara4_0")
+
+(define_insn_reservation "n4_single" 1
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "multi,savew,flushw,iflush,trap"))
+  "n4_single_issue")
+
+(define_insn_reservation "n4_integer" 1
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
+  "(n4_slot0 | n4_slot1)")
+
+(define_insn_reservation "n4_imul" 12
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "imul"))
+  "n4_slot1, nothing*11")
+
+(define_insn_reservation "n4_idiv" 35
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "idiv"))
+  "n4_slot1, nothing*34")
+
+(define_insn_reservation "n4_load" 5
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "load,fpload,sload"))
+  "(n4_slot0 + n4_load_store), nothing*4")
+
+(define_insn_reservation "n4_store" 1
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "store,fpstore"))
+  "(n4_slot0 | n4_slot2) + n4_load_store")
+
+(define_insn_reservation "n4_cti" 1
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
+  "n4_slot1")
+
+(define_insn_reservation "n4_fp" 11
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul"))
+  "n4_slot1, nothing*10")
+
+(define_insn_reservation "n4_array" 12
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "array,edge,edgen"))
+  "n4_slot1, nothing*11")
+
+(define_insn_reservation "n4_vis_move_1cycle" 1
+  (and (eq_attr "cpu" "niagara4")
+    (and (eq_attr "type" "vismv")
+      (eq_attr "fptype" "double")))
+  "n4_slot1")
+
+(define_insn_reservation "n4_vis_move_11cycle" 11
+  (and (eq_attr "cpu" "niagara4")
+    (and (eq_attr "type" "vismv")
+      (eq_attr "fptype" "single")))
+  "n4_slot1, nothing*10")
+
+(define_insn_reservation "n4_vis_logical" 3
+  (and (eq_attr "cpu" "niagara4")
+    (and (eq_attr "type" "visl,pdistn")
+      (eq_attr "fptype" "double")))
+  "n4_slot1, nothing*2")
+
+(define_insn_reservation "n4_vis_logical_11cycle" 11
+  (and (eq_attr "cpu" "niagara4")
+    (and (eq_attr "type" "visl")
+      (eq_attr "fptype" "single")))
+  "n4_slot1, nothing*10")
+
+(define_insn_reservation "n4_vis_fga" 11
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "fga,gsr"))
+  "n4_slot1, nothing*10")
+
+(define_insn_reservation "n4_vis_fgm" 11
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "fgm_pack,fgm_mul,pdist"))
+  "n4_slot1, nothing*10")
+
+(define_insn_reservation "n4_fpdivs" 24
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "fpdivs,fpsqrts"))
+  "n4_slot1, nothing*23")
+
+(define_insn_reservation "n4_fpdivd" 37
+  (and (eq_attr "cpu" "niagara4")
+    (eq_attr "type" "fpdivd,fpsqrtd"))
+  "n4_slot1, nothing*36")
diff --git a/gcc-4.9/gcc/config/sparc/openbsd1-64.h b/gcc-4.9/gcc/config/sparc/openbsd1-64.h
new file mode 100644
index 000000000..6ff6478b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/openbsd1-64.h
@@ -0,0 +1,23 @@
+/* Configuration file for sparc64 OpenBSD target.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define OBSD_HAS_DECLARE_FUNCTION_NAME
+#define OBSD_HAS_DECLARE_FUNCTION_SIZE
+#define OBSD_HAS_DECLARE_OBJECT
+
diff --git a/gcc-4.9/gcc/config/sparc/openbsd64.h b/gcc-4.9/gcc/config/sparc/openbsd64.h
new file mode 100644
index 000000000..e158a636b
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/openbsd64.h
@@ -0,0 +1,78 @@
+/* Configuration file for sparc64 OpenBSD target.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* XXX - do we really want HARD_QUAD? */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+(MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \
+ + MASK_APP_REGS + MASK_FPU + MASK_STACK_BIAS + MASK_LONG_DOUBLE_128)
+
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDMID
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__unix__");		\
+	builtin_define ("__OpenBSD__");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=OpenBSD");	\
+	builtin_define ("__sparc64__");		\
+	builtin_define ("__sparcv9__");		\
+	builtin_define ("__sparc_v9__");	\
+	builtin_define ("__arch64__");		\
+    }						\
+  while (0)
+
+/* Inherited from sp64-elf.  */
+#undef NO_IMPLICIT_EXTERN_C
+
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s %{fpic|fPIC|fpie|fPIE:-K PIC} \
+%(asm_cpu) %(asm_arch) \
+"
+
+/* Layout of source language data types.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{!nostdlib:%{!r:%{!e*:-e __start}}}} \
+   %{shared:-shared} %{R*} \
+   %{static:-Bstatic} \
+   %{!static:-Bdynamic} \
+   %{assert*} \
+   -dynamic-linker /usr/libexec/ld.so"
+
+/* As an elf system, we need crtbegin/crtend stuff.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+        %{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \
+        crtbegin%O%s} %{shared:crtbeginS%O%s}"
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
diff --git a/gcc-4.9/gcc/config/sparc/predicates.md b/gcc-4.9/gcc/config/sparc/predicates.md
new file mode 100644
index 000000000..98ab4a3ae
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/predicates.md
@@ -0,0 +1,535 @@
+;; Predicate definitions for SPARC.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Predicates for numerical constants.
+
+;; Return true if OP is the zero constant for MODE.
+(define_predicate "const_zero_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return true if the integer representation of OP is
+;; all-ones.
+(define_predicate "const_all_ones_operand"
+  (match_code "const_int,const_double,const_vector")
+{
+  if (GET_CODE (op) == CONST_INT && INTVAL (op) == -1)
+    return true;
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (op) == CONST_DOUBLE
+      && GET_MODE (op) == VOIDmode
+      && CONST_DOUBLE_HIGH (op) == ~(HOST_WIDE_INT)0
+      && CONST_DOUBLE_LOW (op) == ~(HOST_WIDE_INT)0)
+    return true;
+#endif
+  if (GET_CODE (op) == CONST_VECTOR)
+    {
+      int i, num_elem = CONST_VECTOR_NUNITS (op);
+
+      for (i = 0; i < num_elem; i++)
+        {
+          rtx n = CONST_VECTOR_ELT (op, i);
+          if (! const_all_ones_operand (n, mode))
+            return false;
+        }
+      return true;
+    }
+  return false;
+})
+
+;; Return true if OP is the integer constant 4096.
+(define_predicate "const_4096_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 4096")))
+
+;; Return true if OP is a constant that is representable by a 13-bit
+;; signed field.  This is an acceptable immediate operand for most
+;; 3-address instructions.
+(define_predicate "small_int_operand"
+  (and (match_code "const_int")
+       (match_test "SPARC_SIMM13_P (INTVAL (op))")))
+
+;; Return true if OP is a constant operand for the umul instruction.  That
+;; instruction sign-extends immediate values just like all other SPARC
+;; instructions, but interprets the extended result as an unsigned number.
+(define_predicate "uns_small_int_operand"
+  (match_code "const_int,const_double")
+{
+#if HOST_BITS_PER_WIDE_INT == 32
+  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && CONST_DOUBLE_HIGH (op) == 0
+	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
+#else
+  return (GET_CODE (op) == CONST_INT
+	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
+	      || (INTVAL (op) >= 0xFFFFF000
+                  && INTVAL (op) <= 0xFFFFFFFF)));
+#endif
+})
+
+;; Return true if OP is a constant that can be loaded by the sethi instruction.
+;; The first test avoids emitting sethi to load zero for example.
+(define_predicate "const_high_operand"
+  (and (match_code "const_int")
+       (and (not (match_operand 0 "small_int_operand"))
+            (match_test "SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))"))))
+
+;; Return true if OP is a constant whose 1's complement can be loaded by the
+;; sethi instruction.
+(define_predicate "const_compl_high_operand"
+  (and (match_code "const_int")
+       (and (not (match_operand 0 "small_int_operand"))
+            (match_test "SPARC_SETHI_P (~INTVAL (op) & GET_MODE_MASK (mode))"))))
+
+;; Return true if OP is a FP constant that needs to be loaded by the sethi/losum
+;; pair of instructions.
+(define_predicate "fp_const_high_losum_operand"
+  (match_operand 0 "const_double_operand")
+{
+  gcc_assert (mode == SFmode);
+  return fp_high_losum_p (op);
+})
+
+;; Return true if OP is a const_double or const_vector.
+(define_predicate "const_double_or_vector_operand"
+  (match_code "const_double,const_vector"))
+
+;; Return true if OP is Zero, or if the target is V7.
+(define_predicate "zero_or_v7_operand"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == 0")
+	    (match_test "!TARGET_V8 && !TARGET_V9"))))
+
+;; Predicates for symbolic constants.
+
+;; Return true if OP is either a symbol reference or a sum of a symbol
+;; reference and a constant.
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  enum machine_mode omode = GET_MODE (op);
+
+  if (omode != mode && omode != VOIDmode && mode != VOIDmode)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return !SYMBOL_REF_TLS_MODEL (op);
+
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      op = XEXP (op, 0);
+      return (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+		&& !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
+	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a symbolic operand for the TLS Global Dynamic model.
+(define_predicate "tgd_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Dynamic model.
+(define_predicate "tld_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Initial Exec model.
+(define_predicate "tie_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Exec model.
+(define_predicate "tle_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC")))
+
+;; Return true if the operand is an argument used in generating PIC references
+;; in either the medium/low or embedded medium/anywhere code models on V9.
+;; Check for (const (minus (symbol_ref:GOT)
+;;                         (const (minus (label) (pc)))))
+(define_predicate "medium_pic_operand"
+  (match_code "const")
+{
+  /* Check for (const (minus (symbol_ref:GOT)
+                             (const (minus (label) (pc))))).  */
+  op = XEXP (op, 0);
+  return GET_CODE (op) == MINUS
+         && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+         && GET_CODE (XEXP (op, 1)) == CONST
+         && GET_CODE (XEXP (XEXP (op, 1), 0)) == MINUS;
+})
+
+;; Return true if OP is a LABEL_REF of mode MODE.
+(define_predicate "label_ref_operand"
+  (and (match_code "label_ref")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return true if OP is a data segment reference.  This includes the readonly
+;; data segment or, in other words, anything but the text segment.
+;; This is needed in the embedded medium/anywhere code model on V9.  These
+;; values are accessed with EMBMEDANY_BASE_REG.  */
+(define_predicate "data_segment_operand"
+  (match_code "symbol_ref,plus,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+      return ! SYMBOL_REF_FUNCTION_P (op);
+    case PLUS :
+      /* Assume canonical format of symbol + constant.
+	 Fall through.  */
+    case CONST :
+      return data_segment_operand (XEXP (op, 0), VOIDmode);
+    default :
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a text segment reference.
+;; This is needed in the embedded medium/anywhere code model on V9.
+(define_predicate "text_segment_operand"
+  (match_code "label_ref,symbol_ref,plus,const")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF :
+      return true;
+    case SYMBOL_REF :
+      return SYMBOL_REF_FUNCTION_P (op);
+    case PLUS :
+      /* Assume canonical format of symbol + constant.
+	 Fall through.  */
+    case CONST :
+      return text_segment_operand (XEXP (op, 0), VOIDmode);
+    default :
+      gcc_unreachable ();
+    }
+})
+
+
+;; Predicates for registers.
+
+;; Return true if OP is either the zero constant or a register.
+(define_predicate "register_or_zero_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_zero_operand")))
+
+(define_predicate "register_or_v9_zero_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "TARGET_V9")
+	    (match_operand 0 "const_zero_operand"))))
+
+;; Return true if OP is either the zero constant, the all-ones
+;; constant, or a register.
+(define_predicate "register_or_zero_or_all_ones_operand"
+  (ior (match_operand 0 "register_or_zero_operand")
+       (match_operand 0 "const_all_ones_operand")))
+
+;; Return true if OP is a register operand in a floating point register.
+(define_predicate "fp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op); /* Possibly a MEM */
+  return REG_P (op) && SPARC_FP_REG_P (REGNO (op));
+})
+
+;; Return true if OP is an integer register.
+(define_special_predicate "int_register_operand"
+  (ior (match_test "register_operand (op, SImode)")
+       (match_test "TARGET_ARCH64 && register_operand (op, DImode)")))
+
+;; Return true if OP is an integer register of the appropriate mode
+;; for a cstore result.
+(define_special_predicate "cstore_result_operand"
+  (match_test "register_operand (op, TARGET_ARCH64 ? DImode : SImode)"))
+
+;; Return true if OP is a floating point condition code register.
+(define_predicate "fcc_register_operand"
+  (match_code "reg")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return false;
+  if (mode == VOIDmode
+      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
+    return false;
+
+#if 0 /* ??? 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
+  if (reg_renumber == 0)
+    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
+  return REGNO_OK_FOR_CCFP_P (REGNO (op));
+#else
+  return ((unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG) < 4;
+#endif
+})
+
+;; Return true if OP is the floating point condition code register fcc0.
+(define_predicate "fcc0_register_operand"
+  (match_code "reg")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return false;
+  if (mode == VOIDmode
+      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
+    return false;
+
+  return REGNO (op) == SPARC_FCC_REG;
+})
+
+;; Return true if OP is an integer or floating point condition code register.
+(define_predicate "icc_or_fcc_register_operand"
+  (match_code "reg")
+{
+  if (REGNO (op) == SPARC_ICC_REG)
+    {
+      if (mode != VOIDmode && mode != GET_MODE (op))
+	return false;
+      if (mode == VOIDmode
+	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
+	return false;
+
+      return true;
+    }
+
+  return fcc_register_operand (op, mode);
+})
+
+
+;; Predicates for arithmetic instructions.
+
+;; Return true if OP is a register, or is a constant that is representable
+;; by a 13-bit signed field.  This is an acceptable operand for most
+;; 3-address instructions.
+(define_predicate "arith_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "small_int_operand")))
+
+;; 64-bit: Same as above.
+;; 32-bit: Return true if OP is a register, or is a constant that is 
+;; representable by a couple of 13-bit signed fields.  This is an
+;; acceptable operand for most 3-address splitters.
+(define_predicate "arith_double_operand"
+  (match_code "const_int,const_double,reg,subreg")
+{
+  bool arith_simple_operand = arith_operand (op, mode);
+  HOST_WIDE_INT m1, m2;
+
+  if (TARGET_ARCH64 || arith_simple_operand)
+    return arith_simple_operand;
+
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (op) != CONST_DOUBLE)
+    return false;
+  m1 = CONST_DOUBLE_LOW (op);
+  m2 = CONST_DOUBLE_HIGH (op);
+#else
+  if (GET_CODE (op) != CONST_INT)
+    return false;
+  m1 = trunc_int_for_mode (INTVAL (op), SImode);
+  m2 = trunc_int_for_mode (INTVAL (op) >> 32, SImode);
+#endif
+
+  return SPARC_SIMM13_P (m1) && SPARC_SIMM13_P (m2);
+})
+
+;; Return true if OP is suitable as second operand for add/sub.
+(define_predicate "arith_add_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_4096_operand")))
+
+;; Return true if OP is suitable as second double operand for add/sub.
+(define_predicate "arith_double_add_operand"
+  (match_code "const_int,const_double,reg,subreg")
+{
+  bool _arith_double_operand = arith_double_operand (op, mode);
+
+  if (_arith_double_operand)
+    return true;
+
+  return TARGET_ARCH64 && const_4096_operand (op, mode);
+})
+
+;; Return true if OP is a register, or is a CONST_INT that can fit in a
+;; signed 10-bit immediate field.  This is an acceptable SImode operand for
+;; the movrcc instructions.
+(define_predicate "arith10_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+            (match_test "SPARC_SIMM10_P (INTVAL (op))"))))
+
+;; Return true if OP is a register, or is a CONST_INT that can fit in a
+;; signed 11-bit immediate field.  This is an acceptable SImode operand for
+;; the movcc instructions.
+(define_predicate "arith11_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+            (match_test "SPARC_SIMM11_P (INTVAL (op))"))))
+
+;; Return true if OP is a register or a constant for the umul instruction.
+(define_predicate "uns_arith_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "uns_small_int_operand")))
+
+;; Return true if OP is a register, or is a CONST_INT that can fit in a
+;; signed 5-bit immediate field.  This is an acceptable second operand for
+;; the cbcond instructions.
+(define_predicate "arith5_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+            (match_test "SPARC_SIMM5_P (INTVAL (op))"))))
+
+
+;; Predicates for miscellaneous instructions.
+
+;; Return true if OP is valid for the lhs of a comparison insn.
+(define_predicate "compare_operand"
+  (match_code "reg,subreg,zero_extract")
+{
+  if (GET_CODE (op) == ZERO_EXTRACT)
+    return (register_operand (XEXP (op, 0), mode)
+	    && small_int_operand (XEXP (op, 1), mode)
+	    && small_int_operand (XEXP (op, 2), mode)
+	    /* This matches cmp_zero_extract.  */
+	    && ((mode == SImode
+		 && INTVAL (XEXP (op, 2)) > 19)
+		/* This matches cmp_zero_extract_sp64.  */
+		|| (TARGET_ARCH64
+		    && mode == DImode
+		    && INTVAL (XEXP (op, 2)) > 51)));
+  else
+    return register_operand (op, mode);
+})
+
+;; Return true if OP is a valid operand for the source of a move insn.
+(define_predicate "input_operand"
+  (match_code "const_int,const_double,const_vector,reg,subreg,mem")
+{
+  enum mode_class mclass;
+
+  /* If both modes are non-void they must be the same.  */
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
+    return false;
+
+  mclass = GET_MODE_CLASS (mode);
+
+  /* Allow any 1-instruction integer constant.  */
+  if (mclass == MODE_INT
+      && mode != TImode
+      && (small_int_operand (op, mode) || const_high_operand (op, mode)))
+    return true;
+
+  /* If 32-bit mode and this is a DImode constant, allow it
+     so that the splits can be generated.  */
+  if (TARGET_ARCH32
+      && mode == DImode
+      && (GET_CODE (op) == CONST_DOUBLE || GET_CODE (op) == CONST_INT))
+    return true;
+
+  if (mclass == MODE_FLOAT && GET_CODE (op) == CONST_DOUBLE)
+    return true;
+
+  if (mclass == MODE_VECTOR_INT && const_all_ones_operand (op, mode))
+    return true;
+
+  if (register_or_zero_operand (op, mode))
+    return true;
+
+  /* If this is a SUBREG, look inside so that we handle paradoxical ones.  */
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* Check for valid MEM forms.  */
+  if (GET_CODE (op) == MEM)
+    return memory_address_p (mode, XEXP (op, 0));
+
+  return false;
+})
+
+;; Return true if OP is an address suitable for a call insn.
+;; Call insn on SPARC can take a PC-relative constant address
+;; or any regular memory address.
+(define_predicate "call_address_operand"
+  (ior (match_operand 0 "symbolic_operand")
+       (match_test "memory_address_p (Pmode, op)")))
+
+;; Return true if OP is an operand suitable for a call insn.
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "call_address_operand (XEXP (op, 0), mode)")))
+
+
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+;; Predicates for operators.
+
+;; Return true if OP is a comparison operator.  This allows the use of
+;; MATCH_OPERATOR to recognize all the branch insns.
+(define_predicate "noov_compare_operator"
+  (match_code "ne,eq,ge,gt,le,lt,geu,gtu,leu,ltu")
+{
+  enum rtx_code code = GET_CODE (op);
+  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode
+      || GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
+    /* These are the only branches which work with CC_NOOVmode.  */
+    return (code == EQ || code == NE || code == GE || code == LT);
+  return true;
+})
+
+;; Return true if OP is a 64-bit comparison operator.  This allows the use of
+;; MATCH_OPERATOR to recognize all the branch insns.
+(define_predicate "noov_compare64_operator"
+  (and (match_code "ne,eq,ge,gt,le,lt,geu,gtu,leu,ltu")
+       (match_test "TARGET_V9"))
+{
+  enum rtx_code code = GET_CODE (op);
+  if (GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
+    /* These are the only branches which work with CCX_NOOVmode.  */
+    return (code == EQ || code == NE || code == GE || code == LT);
+  return (GET_MODE (XEXP (op, 0)) == CCXmode);
+})
+
+;; Return true if OP is a comparison operator suitable for use in V9
+;; conditional move or branch on register contents instructions.
+(define_predicate "v9_register_compare_operator"
+  (match_code "eq,ne,ge,lt,le,gt"))
+
+;; Return true if OP is an operator which can set the condition codes
+;; explicitly.  We do not include PLUS and MINUS because these
+;; require CC_NOOVmode, which we handle explicitly.
+(define_predicate "cc_arith_operator"
+  (match_code "and,ior,xor"))
+
+;; Return true if OP is an operator which can bitwise complement its
+;; second operand and set the condition codes explicitly.
+;; XOR is not here because combine canonicalizes (xor (not ...) ...)
+;; and (xor ... (not ...)) to (not (xor ...)).  */
+(define_predicate "cc_arith_not_operator"
+  (match_code "and,ior"))
diff --git a/gcc-4.9/gcc/config/sparc/rtemself.h b/gcc-4.9/gcc/config/sparc/rtemself.h
new file mode 100644
index 000000000..d6c2dd68e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/rtemself.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a SPARC using ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc-4.9/gcc/config/sparc/sol2.h b/gcc-4.9/gcc/config/sparc/sol2.h
new file mode 100644
index 000000000..b50a937b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sol2.h
@@ -0,0 +1,411 @@
+/* Definitions of target machine for GCC, for SPARC running Solaris 2
+   Copyright (C) 1992-2014 Free Software Foundation, Inc.
+   Contributed by Ron Guilmette (rfg@netcom.com).
+   Additional changes by David V. Henkel-Wallace (gumby@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Solaris allows 64-bit out and global registers to be used in 32-bit mode.
+   sparc_override_options will disable V8+ if either not generating V9 code
+   or generating 64-bit code.  */
+#undef TARGET_DEFAULT
+#ifdef TARGET_64BIT_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_64BIT + MASK_PTR64 + MASK_STACK_BIAS + \
+   MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+#else
+#define TARGET_DEFAULT \
+  (MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+#endif
+
+/* The default code model used to be CM_MEDANY on Solaris
+   but even Sun eventually found it to be quite wasteful
+   and changed it to CM_MEDMID in the Studio 9 compiler.  */
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDMID
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   Some Solaris dynamic linkers don't handle unaligned section relative
+   relocs properly, so force them to be aligned.  */
+#ifndef HAVE_AS_SPARC_UA_PCREL
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)		\
+  ((flag_pic || GLOBAL) ? DW_EH_PE_aligned : DW_EH_PE_absptr)
+#endif
+
+
+
+/* Supposedly the same as vanilla sparc svr4, except for the stuff below: */
+
+/* If the assembler supports -xarch=sparc4, we switch to the explicit
+   word size selection mechanism available both in GNU as and Sun as,
+   for the Niagara4 and above configurations.  */
+#ifdef HAVE_AS_SPARC4
+
+#define AS_SPARC32_FLAG ""
+#define AS_SPARC64_FLAG ""
+
+#ifndef USE_GAS
+#undef ASM_ARCH32_SPEC
+#define ASM_ARCH32_SPEC "-m32"
+#undef ASM_ARCH64_SPEC
+#define ASM_ARCH64_SPEC "-m64"
+#endif
+
+/* Both Sun as and GNU as understand -K PIC.  */
+#undef ASM_SPEC
+#define ASM_SPEC ASM_SPEC_BASE " %(asm_arch)" ASM_PIC_SPEC
+
+#else /* HAVE_AS_SPARC4 */
+
+#define AS_SPARC32_FLAG "-xarch=v8plus"
+#define AS_SPARC64_FLAG "-xarch=v9"
+
+#undef AS_NIAGARA4_FLAG
+#define AS_NIAGARA4_FLAG AS_NIAGARA3_FLAG
+
+#undef ASM_ARCH32_SPEC
+#define ASM_ARCH32_SPEC ""
+
+#undef ASM_ARCH64_SPEC
+#define ASM_ARCH64_SPEC ""
+
+#undef ASM_ARCH_DEFAULT_SPEC
+#define ASM_ARCH_DEFAULT_SPEC ""
+
+#undef ASM_ARCH_SPEC
+#define ASM_ARCH_SPEC ""
+
+/* Both Sun as and GNU as understand -K PIC.  */
+#undef ASM_SPEC
+#define ASM_SPEC ASM_SPEC_BASE ASM_PIC_SPEC
+
+#endif /* HAVE_AS_SPARC4 */
+
+
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC	""
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC	"-xarch=v9"
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus"
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9a"
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara3
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" AS_NIAGARA3_FLAG
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9" AS_NIAGARA3_FLAG
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara4
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC AS_SPARC32_FLAG AS_NIAGARA4_FLAG
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA4_FLAG
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
+#endif
+
+#undef CPP_CPU_SPEC
+#define CPP_CPU_SPEC "\
+%{mcpu=sparclet|mcpu=tsc701:-D__sparclet__} \
+%{mcpu=sparclite|mcpu-f930|mcpu=f934:-D__sparclite__} \
+%{mcpu=v8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=supersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3|mcpu=niagara|mcpu=niagara2|mcpu=niagara3|mcpu=niagara4:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{!mcpu*:%(cpp_cpu_default)} \
+"
+
+#undef CPP_CPU_DEFAULT_SPEC
+#define CPP_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" CPP_CPU64_DEFAULT_SPEC "} \
+%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" CPP_CPU32_DEFAULT_SPEC "} \
+%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \
+")
+
+#undef CPP_ARCH32_SPEC
+#define CPP_ARCH32_SPEC ""
+#undef CPP_ARCH64_SPEC
+#define CPP_ARCH64_SPEC "-D__arch64__ -D__sparcv9"
+
+#undef CPP_ARCH_SPEC
+#define CPP_ARCH_SPEC "\
+%{m32:%(cpp_arch32)} \
+%{m64:%(cpp_arch64)} \
+%{!m32:%{!m64:%(cpp_arch_default)}} \
+"
+
+/* -mcpu=native handling only makes sense with compiler running on
+   a SPARC chip.  */
+#if defined(__sparc__) && defined(__SVR4)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+
+#undef	CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC "\
+%{m64:%{m32:%emay not use both -m32 and -m64}} \
+%{m64:-mptr64 -mstack-bias -mno-v8plus \
+  %{!mcpu*:-%{!mv8plus:mcpu=v9}}} \
+"
+#else
+#define CC1_SPEC "\
+%{m32:%{m64:%emay not use both -m32 and -m64}} \
+%{m32:-mptr32 -mno-stack-bias \
+  %{!mcpu*:%{!mv8plus:-mcpu=v9}}} \
+%{mv8plus:-m32 -mptr32 -mno-stack-bias \
+  %{!mcpu*:-mcpu=v9}} \
+"
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.
+   --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu
+     are specified.
+   In the SPARC_BI_ARCH compiler we cannot pass %{!mcpu=*:-mcpu=%(VALUE)}
+   here, otherwise say -mcpu=v7 would be passed even when -m64.
+   CC1_SPEC above takes care of this instead.  */
+#undef OPTION_DEFAULT_SPECS
+#if DEFAULT_ARCH32_P
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m64:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#else
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m32:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#endif
+
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC "\
+%{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC("-xarch=v9") "} \
+%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC("-xarch=v9a") "} \
+%{mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC("-xarch=v9b") "} \
+%{mcpu=niagara:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC("-xarch=v9b") "} \
+%{mcpu=niagara2:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC("-xarch=v9b") "} \
+%{mcpu=niagara3:" DEF_ARCH32_SPEC("-xarch=v8plus" AS_NIAGARA3_FLAG) DEF_ARCH64_SPEC("-xarch=v9" AS_NIAGARA3_FLAG) "} \
+%{mcpu=niagara4:" DEF_ARCH32_SPEC(AS_SPARC32_FLAG AS_NIAGARA4_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_NIAGARA4_FLAG) "} \
+%{!mcpu=niagara4:%{!mcpu=niagara3:%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC("-xarch=v9") "}}}}}}}} \
+%{!mcpu*:%(asm_cpu_default)} \
+"
+
+#ifdef USE_GLD
+/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
+   follow the Solaris 2 ABI.  Prefer them if present.  */
+#ifdef HAVE_LD_SOL2_EMULATION
+#define ARCH32_EMULATION "elf32_sparc_sol2"
+#define ARCH64_EMULATION "elf64_sparc_sol2"
+#else
+#define ARCH32_EMULATION "elf32_sparc"
+#define ARCH64_EMULATION "elf64_sparc"
+#endif
+#endif
+
+#define ARCH64_SUBDIR "sparcv9"
+
+#define SUBTARGET_CPU_EXTRA_SPECS
+
+
+
+/* Register the Solaris-specific #pragma directives.  */
+#define REGISTER_TARGET_PRAGMAS() solaris_register_pragmas ()
+
+#if defined(USE_GAS) && defined(HAVE_AS_TLS)
+/* Use GNU extensions to TLS support.  */
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* The Solaris 2 assembler uses .skip, not .zero, so put this back.  */
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.skip %u\n", (int)(SIZE))
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%lu", (PREFIX), (unsigned long)(NUM))
+
+/* The native TLS-enabled assembler requires the directive #tls_object
+   to be put on objects in TLS sections (as of v7.1).  This is not
+   required by GNU as but supported on SPARC.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+								\
+      if (targetm.have_tls && DECL_THREAD_LOCAL_P (DECL))	\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "tls_object");	\
+      else							\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");	\
+								\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+								\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+
+/* Output a simple call for .init/.fini.  */
+#define ASM_OUTPUT_CALL(FILE, FN)				        \
+  do									\
+    {									\
+      fprintf (FILE, "\tcall\t");					\
+      targetm.asm_out.print_operand (FILE, XEXP (DECL_RTL (FN), 0), 0);	\
+      fprintf (FILE, "\n\tnop\n");					\
+    }									\
+  while (0)
+
+#ifndef USE_GAS
+/* This is how to output an assembler line that says to advance
+   the location counter to a multiple of 2**LOG bytes using the
+   NOP instruction as padding.  The filler pattern doesn't work
+   with GNU as. */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG)   \
+  if ((LOG) != 0)                             \
+    fprintf (FILE, "\t.align %d,0x1000000\n", (1<<(LOG)))
+
+/* Use Solaris ELF section syntax with Sun as.  */
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION sparc_solaris_elf_asm_named_section
+
+/* Sun as requires doublequoted section names on SPARC.  While GNU as
+   supports that, too, we prefer the standard variant.  */
+#undef SECTION_NAME_FORMAT
+#define SECTION_NAME_FORMAT	"\"%s\""
+#endif /* !USE_GAS */
+
+/* Undefine this so that attribute((init_priority)) works with GNU ld.  */
+#ifdef USE_GLD
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#endif
+
+
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+/* Solaris's _Qp_* library routine implementation clobbers the output
+   memory before the inputs are fully consumed.  */
+
+#undef TARGET_BUGGY_QP_LIB
+#define TARGET_BUGGY_QP_LIB	1
+
+#undef SUN_CONVERSION_LIBFUNCS
+#define SUN_CONVERSION_LIBFUNCS 1
+
+#undef DITF_CONVERSION_LIBFUNCS
+#define DITF_CONVERSION_LIBFUNCS 1
+
+#undef SUN_INTEGER_MULTIPLY_64
+#define SUN_INTEGER_MULTIPLY_64 1
+
+#undef SPARC_LOW_FE_EXCEPT_VALUES
+#define SPARC_LOW_FE_EXCEPT_VALUES 1
diff --git a/gcc-4.9/gcc/config/sparc/sp-elf.h b/gcc-4.9/gcc/config/sparc/sp-elf.h
new file mode 100644
index 000000000..28366e64f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sp-elf.h
@@ -0,0 +1,66 @@
+/* Definitions of target machine for GCC,
+   for SPARC running in an embedded environment using the ELF file format.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC \
+  "-s \
+   %{fpic|fpie|fPIC|fPIE:-K PIC} %(asm_cpu)"
+
+/* Use the default.  */
+#undef LINK_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   crtend.o%s crtn.o%s"
+
+/* Don't set the target flags, this is done by the linker script */
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM))
+
+/* ??? Inherited from sol2.h.  Probably wrong.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* ??? until fixed.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
diff --git a/gcc-4.9/gcc/config/sparc/sp64-elf.h b/gcc-4.9/gcc/config/sparc/sp64-elf.h
new file mode 100644
index 000000000..f0a5fe1d4
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sp64-elf.h
@@ -0,0 +1,76 @@
+/* Definitions of target machine for GCC, for SPARC64, ELF.
+   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Contributed by Doug Evans, dje@cygnus.com.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* A 64 bit v9 compiler in a Medium/Anywhere code model environment.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+(MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \
+ + MASK_APP_REGS + MASK_FPU + MASK_STACK_BIAS + MASK_LONG_DOUBLE_128)
+
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_EMBMEDANY
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s %{fpic|fPIC|fpie|fPIE:-K PIC} \
+%(asm_cpu) %(asm_arch) \
+"
+
+/* This is taken from sol2.h.  */
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%{v:-V} \
+"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   crtend.o%s crtn.o%s"
+
+/* Use the default (for now).  */
+#undef LIB_SPEC
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM))
+
+/* ??? This should be 32 bits for v9 but what can we do?  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
diff --git a/gcc-4.9/gcc/config/sparc/sparc-c.c b/gcc-4.9/gcc/config/sparc/sparc-c.c
new file mode 100644
index 000000000..bb4e51ffb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc-c.c
@@ -0,0 +1,62 @@
+/* Subroutines used for macro/preprocessor support on SPARC.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "flags.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "cpplib.h"
+
+void
+sparc_target_macros (void)
+{
+  builtin_define_std ("sparc");
+
+  if (TARGET_64BIT)
+    {
+      cpp_assert (parse_in, "cpu=sparc64");
+      cpp_assert (parse_in, "machine=sparc64");
+    }
+  else
+    {
+      cpp_assert (parse_in, "cpu=sparc");
+      cpp_assert (parse_in, "machine=sparc");
+    }
+
+  if (TARGET_VIS3)
+    {
+      cpp_define (parse_in, "__VIS__=0x300");
+      cpp_define (parse_in, "__VIS=0x300");
+    }
+  else if (TARGET_VIS2)
+    {
+      cpp_define (parse_in, "__VIS__=0x200");
+      cpp_define (parse_in, "__VIS=0x200");
+    }
+  else if (TARGET_VIS)
+    {
+      cpp_define (parse_in, "__VIS__=0x100");
+      cpp_define (parse_in, "__VIS=0x100");
+    }
+}
diff --git a/gcc-4.9/gcc/config/sparc/sparc-modes.def b/gcc-4.9/gcc/config/sparc/sparc-modes.def
new file mode 100644
index 000000000..8dc9625b6
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc-modes.def
@@ -0,0 +1,50 @@
+/* Definitions of target machine for GCC, for Sun SPARC.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Add any extra modes needed to represent the condition code.
+
+   On the SPARC, we have a "no-overflow" mode which is used when an add or
+   subtract insn is used to set the condition code.  Different branches are
+   used in this case for some operations.
+
+   We also have two modes to indicate that the relevant condition code is
+   in the floating-point condition code register.  One for comparisons which
+   will generate an exception if the result is unordered (CCFPEmode) and
+   one for comparisons which will never trap (CCFPmode).
+
+   CCXmode and CCX_NOOVmode are only used by v9.  */
+
+CC_MODE (CCX);
+CC_MODE (CC_NOOV);
+CC_MODE (CCX_NOOV);
+CC_MODE (CCFP);
+CC_MODE (CCFPE);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 4);        /*       V4QI V2HI      */
+VECTOR_MODE (INT, DI, 1);     /*                 V1DI */
+VECTOR_MODE (INT, SI, 1);     /*                 V1SI */
diff --git a/gcc-4.9/gcc/config/sparc/sparc-opts.h b/gcc-4.9/gcc/config/sparc/sparc-opts.h
new file mode 100644
index 000000000..13b375ae1
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc-opts.h
@@ -0,0 +1,60 @@
+/* Definitions for option handling for SPARC.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef SPARC_OPTS_H
+#define SPARC_OPTS_H
+
+/* Processor type.
+   These must match the values for the cpu attribute in sparc.md and
+   the table in sparc_option_override.  */
+enum processor_type {
+  PROCESSOR_V7,
+  PROCESSOR_CYPRESS,
+  PROCESSOR_V8,
+  PROCESSOR_SUPERSPARC,
+  PROCESSOR_HYPERSPARC,
+  PROCESSOR_LEON,
+  PROCESSOR_LEON3,
+  PROCESSOR_SPARCLITE,
+  PROCESSOR_F930,
+  PROCESSOR_F934,
+  PROCESSOR_SPARCLITE86X,
+  PROCESSOR_SPARCLET,
+  PROCESSOR_TSC701,
+  PROCESSOR_V9,
+  PROCESSOR_ULTRASPARC,
+  PROCESSOR_ULTRASPARC3,
+  PROCESSOR_NIAGARA,
+  PROCESSOR_NIAGARA2,
+  PROCESSOR_NIAGARA3,
+  PROCESSOR_NIAGARA4,
+  PROCESSOR_NATIVE
+};
+
+/* Sparc system memory model.  See Appendix D in the Sparc V9 manual
+   for formal specification, and Appendix J for more discussion.  */
+enum sparc_memory_model_type {
+  SMM_DEFAULT,	/* Uninitialized.  */
+  SMM_RMO,	/* Relaxed Memory Order.  */
+  SMM_PSO,	/* Partial Store Order.  */
+  SMM_TSO,	/* Total Store Order.  */
+  SMM_SC	/* Sequential Consistency.  */
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/sparc/sparc-protos.h b/gcc-4.9/gcc/config/sparc/sparc-protos.h
new file mode 100644
index 000000000..1d63e4640
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc-protos.h
@@ -0,0 +1,116 @@
+/* Prototypes of target machine for SPARC.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __SPARC_PROTOS_H__
+#define __SPARC_PROTOS_H__
+
+#ifdef TREE_CODE
+#ifdef RTX_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+#endif
+extern unsigned long sparc_type_code (tree);
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction' */
+extern enum direction function_arg_padding (enum machine_mode, const_tree);
+#endif /* ARGS_SIZE_RTX */
+#endif /* TREE_CODE */
+
+extern void order_regs_for_local_alloc (void);
+extern HOST_WIDE_INT sparc_compute_frame_size (HOST_WIDE_INT, int);
+extern int sparc_initial_elimination_offset (int);
+extern void sparc_expand_prologue (void);
+extern void sparc_flat_expand_prologue (void);
+extern void sparc_expand_epilogue (bool);
+extern void sparc_flat_expand_epilogue (bool);
+extern bool sparc_can_use_return_insn_p (void);
+extern int check_pic (int);
+extern void sparc_profile_hook (int);
+extern void sparc_override_options (void);
+extern void sparc_output_scratch_registers (FILE *);
+extern void sparc_target_macros (void);
+
+#ifdef RTX_CODE
+extern enum machine_mode select_cc_mode (enum rtx_code, rtx, rtx);
+/* Define the function that build the compare insn for scc and bcc.  */
+extern rtx gen_compare_reg (rtx cmp);
+extern rtx sparc_emit_float_lib_cmp (rtx, rtx, enum rtx_code);
+extern void sparc_emit_floatunsdi (rtx [2], enum machine_mode);
+extern void sparc_emit_fixunsdi (rtx [2], enum machine_mode);
+extern void emit_tfmode_binop (enum rtx_code, rtx *);
+extern void emit_tfmode_unop (enum rtx_code, rtx *);
+extern void emit_tfmode_cvt (enum rtx_code, rtx *);
+extern bool constant_address_p (rtx);
+extern bool legitimate_pic_operand_p (rtx);
+extern rtx sparc_legitimize_reload_address (rtx, enum machine_mode, int, int,
+					    int, int *win);
+extern void load_got_register (void);
+extern void sparc_emit_call_insn (rtx, rtx);
+extern void sparc_defer_case_vector (rtx, rtx, int);
+extern bool sparc_expand_move (enum machine_mode, rtx *);
+extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx);
+extern int sparc_splitdi_legitimate (rtx, rtx);
+extern int sparc_split_regreg_legitimate (rtx, rtx);
+extern int sparc_absnegfloat_split_legitimate (rtx, rtx);
+extern const char *output_ubranch (rtx, rtx);
+extern const char *output_cbranch (rtx, rtx, int, int, int, rtx);
+extern const char *output_return (rtx);
+extern const char *output_sibcall (rtx, rtx);
+extern const char *output_v8plus_shift (rtx, rtx *, const char *);
+extern const char *output_v8plus_mult (rtx, rtx *, const char *);
+extern const char *output_v9branch (rtx, rtx, int, int, int, int, rtx);
+extern const char *output_probe_stack_range (rtx, rtx);
+extern const char *output_cbcond (rtx, rtx, rtx);
+extern bool emit_scc_insn (rtx []);
+extern void emit_conditional_branch_insn (rtx []);
+extern int registers_ok_for_ldd_peep (rtx, rtx);
+extern int mems_ok_for_ldd_peep (rtx, rtx, rtx);
+extern rtx widen_mem_for_ldd_peep (rtx, rtx, enum machine_mode);
+extern int empty_delay_slot (rtx);
+extern int emit_cbcond_nop (rtx);
+extern int eligible_for_call_delay (rtx);
+extern int eligible_for_return_delay (rtx);
+extern int eligible_for_sibcall_delay (rtx);
+extern int emit_move_sequence (rtx, enum machine_mode);
+extern int fp_sethi_p (rtx);
+extern int fp_mov_p (rtx);
+extern int fp_high_losum_p (rtx);
+extern int mem_min_alignment (rtx, int);
+extern int pic_address_needs_scratch (rtx);
+extern int register_ok_for_ldd (rtx);
+extern int memory_ok_for_ldd (rtx);
+extern int v9_regcmp_p (enum rtx_code);
+/* Function used for V8+ code generation.  Returns 1 if the high
+   32 bits of REG are 0 before INSN.  */   
+extern int sparc_check_64 (rtx, rtx);
+extern rtx gen_df_reg (rtx, int);
+extern void sparc_expand_compare_and_swap (rtx op[]);
+extern void sparc_expand_vector_init (rtx, rtx);
+extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx);
+extern bool sparc_expand_conditional_move (enum machine_mode, rtx *);
+extern void sparc_expand_vcond (enum machine_mode, rtx *, int, int);
+unsigned int sparc_regmode_natural_size (enum machine_mode);
+bool sparc_modes_tieable_p (enum machine_mode, enum machine_mode);
+#endif /* RTX_CODE */
+
+extern void sparc_emit_membar_for_model (enum memmodel, int, int);
+
+#endif /* __SPARC_PROTOS_H__ */
diff --git a/gcc-4.9/gcc/config/sparc/sparc.c b/gcc-4.9/gcc/config/sparc/sparc.c
new file mode 100644
index 000000000..f52b9761a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc.c
@@ -0,0 +1,12704 @@
+/* Subroutines for insn-output.c for SPARC.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com)
+   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "except.h"
+#include "expr.h"
+#include "optabs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "common/common-target.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "params.h"
+#include "df.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "context.h"
+
+/* Processor costs */
+
+struct processor_costs {
+  /* Integer load */
+  const int int_load;
+
+  /* Integer signed load */
+  const int int_sload;
+
+  /* Integer zeroed load */
+  const int int_zload;
+
+  /* Float load */
+  const int float_load;
+
+  /* fmov, fneg, fabs */
+  const int float_move;
+
+  /* fadd, fsub */
+  const int float_plusminus;
+
+  /* fcmp */
+  const int float_cmp;
+
+  /* fmov, fmovr */
+  const int float_cmove;
+
+  /* fmul */
+  const int float_mul;
+
+  /* fdivs */
+  const int float_div_sf;
+
+  /* fdivd */
+  const int float_div_df;
+
+  /* fsqrts */
+  const int float_sqrt_sf;
+
+  /* fsqrtd */
+  const int float_sqrt_df;
+
+  /* umul/smul */
+  const int int_mul;
+
+  /* mulX */
+  const int int_mulX;
+
+  /* integer multiply cost for each bit set past the most
+     significant 3, so the formula for multiply cost becomes:
+
+	if (rs1 < 0)
+	  highest_bit = highest_clear_bit(rs1);
+	else
+	  highest_bit = highest_set_bit(rs1);
+	if (highest_bit < 3)
+	  highest_bit = 3;
+	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
+
+     A value of zero indicates that the multiply costs is fixed,
+     and not variable.  */
+  const int int_mul_bit_factor;
+
+  /* udiv/sdiv */
+  const int int_div;
+
+  /* divX */
+  const int int_divX;
+
+  /* movcc, movr */
+  const int int_cmove;
+
+  /* penalty for shifts, due to scheduling rules etc. */
+  const int shift_penalty;
+};
+
+static const
+struct processor_costs cypress_costs = {
+  COSTS_N_INSNS (2), /* int load */
+  COSTS_N_INSNS (2), /* int signed load */
+  COSTS_N_INSNS (2), /* int zeroed load */
+  COSTS_N_INSNS (2), /* float load */
+  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (5), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (7), /* fmul */
+  COSTS_N_INSNS (37), /* fdivs */
+  COSTS_N_INSNS (37), /* fdivd */
+  COSTS_N_INSNS (63), /* fsqrts */
+  COSTS_N_INSNS (63), /* fsqrtd */
+  COSTS_N_INSNS (1), /* imul */
+  COSTS_N_INSNS (1), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (1), /* idiv */
+  COSTS_N_INSNS (1), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs supersparc_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (0), /* float load */
+  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (3), /* fadd, fsub */
+  COSTS_N_INSNS (3), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (3), /* fmul */
+  COSTS_N_INSNS (6), /* fdivs */
+  COSTS_N_INSNS (9), /* fdivd */
+  COSTS_N_INSNS (12), /* fsqrts */
+  COSTS_N_INSNS (12), /* fsqrtd */
+  COSTS_N_INSNS (4), /* imul */
+  COSTS_N_INSNS (4), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (4), /* idiv */
+  COSTS_N_INSNS (4), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  1, /* shift penalty */
+};
+
+static const
+struct processor_costs hypersparc_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (8), /* fdivs */
+  COSTS_N_INSNS (12), /* fdivd */
+  COSTS_N_INSNS (17), /* fsqrts */
+  COSTS_N_INSNS (17), /* fsqrtd */
+  COSTS_N_INSNS (17), /* imul */
+  COSTS_N_INSNS (17), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (17), /* idiv */
+  COSTS_N_INSNS (17), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs leon_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (15), /* fdivs */
+  COSTS_N_INSNS (15), /* fdivd */
+  COSTS_N_INSNS (23), /* fsqrts */
+  COSTS_N_INSNS (23), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (5), /* idiv */
+  COSTS_N_INSNS (5), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs leon3_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (14), /* fdivs */
+  COSTS_N_INSNS (15), /* fdivd */
+  COSTS_N_INSNS (22), /* fsqrts */
+  COSTS_N_INSNS (23), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (35), /* idiv */
+  COSTS_N_INSNS (35), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs sparclet_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (1), /* fdivs */
+  COSTS_N_INSNS (1), /* fdivd */
+  COSTS_N_INSNS (1), /* fsqrts */
+  COSTS_N_INSNS (1), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (5), /* idiv */
+  COSTS_N_INSNS (5), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs ultrasparc_costs = {
+  COSTS_N_INSNS (2), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (2), /* int zeroed load */
+  COSTS_N_INSNS (2), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (4), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (2), /* fmov, fmovr */
+  COSTS_N_INSNS (4), /* fmul */
+  COSTS_N_INSNS (13), /* fdivs */
+  COSTS_N_INSNS (23), /* fdivd */
+  COSTS_N_INSNS (13), /* fsqrts */
+  COSTS_N_INSNS (23), /* fsqrtd */
+  COSTS_N_INSNS (4), /* imul */
+  COSTS_N_INSNS (4), /* imulX */
+  2, /* imul bit factor */
+  COSTS_N_INSNS (37), /* idiv */
+  COSTS_N_INSNS (68), /* idivX */
+  COSTS_N_INSNS (2), /* movcc/movr */
+  2, /* shift penalty */
+};
+
+static const
+struct processor_costs ultrasparc3_costs = {
+  COSTS_N_INSNS (2), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (2), /* float load */
+  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (4), /* fadd, fsub */
+  COSTS_N_INSNS (5), /* fcmp */
+  COSTS_N_INSNS (3), /* fmov, fmovr */
+  COSTS_N_INSNS (4), /* fmul */
+  COSTS_N_INSNS (17), /* fdivs */
+  COSTS_N_INSNS (20), /* fdivd */
+  COSTS_N_INSNS (20), /* fsqrts */
+  COSTS_N_INSNS (29), /* fsqrtd */
+  COSTS_N_INSNS (6), /* imul */
+  COSTS_N_INSNS (6), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (40), /* idiv */
+  COSTS_N_INSNS (71), /* idivX */
+  COSTS_N_INSNS (2), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (9), /* float load */
+  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (8), /* fadd, fsub */
+  COSTS_N_INSNS (26), /* fcmp */
+  COSTS_N_INSNS (8), /* fmov, fmovr */
+  COSTS_N_INSNS (29), /* fmul */
+  COSTS_N_INSNS (54), /* fdivs */
+  COSTS_N_INSNS (83), /* fdivd */
+  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
+  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
+  COSTS_N_INSNS (11), /* imul */
+  COSTS_N_INSNS (11), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (72), /* idiv */
+  COSTS_N_INSNS (72), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara2_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (3), /* float load */
+  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (6), /* fadd, fsub */
+  COSTS_N_INSNS (6), /* fcmp */
+  COSTS_N_INSNS (6), /* fmov, fmovr */
+  COSTS_N_INSNS (6), /* fmul */
+  COSTS_N_INSNS (19), /* fdivs */
+  COSTS_N_INSNS (33), /* fdivd */
+  COSTS_N_INSNS (19), /* fsqrts */
+  COSTS_N_INSNS (33), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
+  COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara3_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (3), /* float load */
+  COSTS_N_INSNS (9), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (9), /* fadd, fsub */
+  COSTS_N_INSNS (9), /* fcmp */
+  COSTS_N_INSNS (9), /* fmov, fmovr */
+  COSTS_N_INSNS (9), /* fmul */
+  COSTS_N_INSNS (23), /* fdivs */
+  COSTS_N_INSNS (37), /* fdivd */
+  COSTS_N_INSNS (23), /* fsqrts */
+  COSTS_N_INSNS (37), /* fsqrtd */
+  COSTS_N_INSNS (9), /* imul */
+  COSTS_N_INSNS (9), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
+  COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara4_costs = {
+  COSTS_N_INSNS (5), /* int load */
+  COSTS_N_INSNS (5), /* int signed load */
+  COSTS_N_INSNS (5), /* int zeroed load */
+  COSTS_N_INSNS (5), /* float load */
+  COSTS_N_INSNS (11), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (11), /* fadd, fsub */
+  COSTS_N_INSNS (11), /* fcmp */
+  COSTS_N_INSNS (11), /* fmov, fmovr */
+  COSTS_N_INSNS (11), /* fmul */
+  COSTS_N_INSNS (24), /* fdivs */
+  COSTS_N_INSNS (37), /* fdivd */
+  COSTS_N_INSNS (24), /* fsqrts */
+  COSTS_N_INSNS (37), /* fsqrtd */
+  COSTS_N_INSNS (12), /* imul */
+  COSTS_N_INSNS (12), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
+  COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const struct processor_costs *sparc_costs = &cypress_costs;
+
+#ifdef HAVE_AS_RELAX_OPTION
+/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
+   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
+   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
+   somebody does not branch between the sethi and jmp.  */
+#define LEAF_SIBCALL_SLOT_RESERVED_P 1
+#else
+#define LEAF_SIBCALL_SLOT_RESERVED_P \
+  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
+#endif
+
+/* Vector to say how input registers are mapped to output registers.
+   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
+   eliminate it.  You must use -fomit-frame-pointer to get that.  */
+char leaf_reg_remap[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7,
+  -1, -1, -1, -1, -1, -1, 14, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
+  8, 9, 10, 11, 12, 13, -1, 15,
+
+  32, 33, 34, 35, 36, 37, 38, 39,
+  40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55,
+  56, 57, 58, 59, 60, 61, 62, 63,
+  64, 65, 66, 67, 68, 69, 70, 71,
+  72, 73, 74, 75, 76, 77, 78, 79,
+  80, 81, 82, 83, 84, 85, 86, 87,
+  88, 89, 90, 91, 92, 93, 94, 95,
+  96, 97, 98, 99, 100, 101, 102};
+
+/* Vector, indexed by hard register number, which contains 1
+   for a register that is allowable in a candidate for leaf
+   function treatment.  */
+char sparc_leaf_regs[] =
+{ 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 1, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,
+  1, 1, 1, 1, 1, 1, 0, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1};
+
+struct GTY(()) machine_function
+{
+  /* Size of the frame of the function.  */
+  HOST_WIDE_INT frame_size;
+
+  /* Size of the frame of the function minus the register window save area
+     and the outgoing argument area.  */
+  HOST_WIDE_INT apparent_frame_size;
+
+  /* Register we pretend the frame pointer is allocated to.  Normally, this
+     is %fp, but if we are in a leaf procedure, this is (%sp + offset).  We
+     record "offset" separately as it may be too big for (reg + disp).  */
+  rtx frame_base_reg;
+  HOST_WIDE_INT frame_base_offset;
+
+  /* Some local-dynamic TLS symbol name.  */
+  const char *some_ld_name;
+
+  /* Number of global or FP registers to be saved (as 4-byte quantities).  */
+  int n_global_fp_regs;
+
+  /* True if the current function is leaf and uses only leaf regs,
+     so that the SPARC leaf function optimization can be applied.
+     Private version of crtl->uses_only_leaf_regs, see
+     sparc_expand_prologue for the rationale.  */
+  int leaf_function_p;
+
+  /* True if the prologue saves local or in registers.  */
+  bool save_local_in_regs_p;
+
+  /* True if the data calculated by sparc_expand_prologue are valid.  */
+  bool prologue_data_valid_p;
+};
+
+#define sparc_frame_size		cfun->machine->frame_size
+#define sparc_apparent_frame_size	cfun->machine->apparent_frame_size
+#define sparc_frame_base_reg		cfun->machine->frame_base_reg
+#define sparc_frame_base_offset		cfun->machine->frame_base_offset
+#define sparc_n_global_fp_regs		cfun->machine->n_global_fp_regs
+#define sparc_leaf_function_p		cfun->machine->leaf_function_p
+#define sparc_save_local_in_regs_p	cfun->machine->save_local_in_regs_p
+#define sparc_prologue_data_valid_p	cfun->machine->prologue_data_valid_p
+
+/* 1 if the next opcode is to be specially indented.  */
+int sparc_indent_opcode = 0;
+
+static void sparc_option_override (void);
+static void sparc_init_modes (void);
+static void scan_record_type (const_tree, int *, int *, int *);
+static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool, bool, int *, int *);
+
+static int supersparc_adjust_cost (rtx, rtx, rtx, int);
+static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
+
+static void sparc_emit_set_const32 (rtx, rtx);
+static void sparc_emit_set_const64 (rtx, rtx);
+static void sparc_output_addr_vec (rtx);
+static void sparc_output_addr_diff_vec (rtx);
+static void sparc_output_deferred_case_vectors (void);
+static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
+static rtx sparc_builtin_saveregs (void);
+static int epilogue_renumber (rtx *, int);
+static bool sparc_assemble_integer (rtx, unsigned int, int);
+static int set_extends (rtx);
+static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
+static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
+#ifdef TARGET_SOLARIS
+static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
+						 tree) ATTRIBUTE_UNUSED;
+#endif
+static int sparc_adjust_cost (rtx, rtx, rtx, int);
+static int sparc_issue_rate (void);
+static void sparc_sched_init (FILE *, int, int);
+static int sparc_use_sched_lookahead (void);
+
+static void emit_soft_tfmode_libcall (const char *, int, rtx *);
+static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
+static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
+static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
+static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
+
+static bool sparc_function_ok_for_sibcall (tree, tree);
+static void sparc_init_libfuncs (void);
+static void sparc_init_builtins (void);
+static void sparc_fpu_init_builtins (void);
+static void sparc_vis_init_builtins (void);
+static tree sparc_builtin_decl (unsigned, bool);
+static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static tree sparc_fold_builtin (tree, int, tree *, bool);
+static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				   HOST_WIDE_INT, tree);
+static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
+				       HOST_WIDE_INT, const_tree);
+static struct machine_function * sparc_init_machine_status (void);
+static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
+static rtx sparc_tls_get_addr (void);
+static rtx sparc_tls_got (void);
+static const char *get_some_local_dynamic_name (void);
+static int get_some_local_dynamic_name_1 (rtx *, void *);
+static int sparc_register_move_cost (enum machine_mode,
+				     reg_class_t, reg_class_t);
+static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
+static rtx sparc_function_value (const_tree, const_tree, bool);
+static rtx sparc_libcall_value (enum machine_mode, const_rtx);
+static bool sparc_function_value_regno_p (const unsigned int);
+static rtx sparc_struct_value_rtx (tree, int);
+static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
+						      int *, const_tree, int);
+static bool sparc_return_in_memory (const_tree, const_tree);
+static bool sparc_strict_argument_naming (cumulative_args_t);
+static void sparc_va_start (tree, rtx);
+static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
+static bool sparc_vector_mode_supported_p (enum machine_mode);
+static bool sparc_tls_referenced_p (rtx);
+static rtx sparc_legitimize_tls_address (rtx);
+static rtx sparc_legitimize_pic_address (rtx, rtx);
+static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx sparc_delegitimize_address (rtx);
+static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
+static bool sparc_pass_by_reference (cumulative_args_t,
+				     enum machine_mode, const_tree, bool);
+static void sparc_function_arg_advance (cumulative_args_t,
+					enum machine_mode, const_tree, bool);
+static rtx sparc_function_arg_1 (cumulative_args_t,
+				 enum machine_mode, const_tree, bool, bool);
+static rtx sparc_function_arg (cumulative_args_t,
+			       enum machine_mode, const_tree, bool);
+static rtx sparc_function_incoming_arg (cumulative_args_t,
+					enum machine_mode, const_tree, bool);
+static unsigned int sparc_function_arg_boundary (enum machine_mode,
+						 const_tree);
+static int sparc_arg_partial_bytes (cumulative_args_t,
+				    enum machine_mode, tree, bool);
+static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static void sparc_file_end (void);
+static bool sparc_frame_pointer_required (void);
+static bool sparc_can_eliminate (const int, const int);
+static rtx sparc_builtin_setjmp_frame_value (void);
+static void sparc_conditional_register_usage (void);
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+static const char *sparc_mangle_type (const_tree);
+#endif
+static void sparc_trampoline_init (rtx, tree, rtx);
+static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
+static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
+static bool sparc_print_operand_punct_valid_p (unsigned char);
+static void sparc_print_operand (FILE *, rtx, int);
+static void sparc_print_operand_address (FILE *, rtx);
+static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
+					   enum machine_mode,
+					   secondary_reload_info *);
+static enum machine_mode sparc_cstore_mode (enum insn_code icode);
+static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
+
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+/* Table of valid machine attributes.  */
+static const struct attribute_spec sparc_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       do_diagnostic } */
+  SUBTARGET_ATTRIBUTE_TABLE,
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+#endif
+
+/* Option handling.  */
+
+/* Parsed value.  */
+enum cmodel sparc_cmodel;
+
+char sparc_hard_reg_printed[8];
+
+/* Initialize the GCC target structure.  */
+
+/* The default is to use .half rather than .short for aligned HI objects.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
+
+/* The target hook has to handle DI-mode values.  */
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sparc_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT sparc_sched_init
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sparc_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL sparc_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN sparc_fold_builtin
+
+#if TARGET_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS sparc_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE sparc_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE sparc_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG sparc_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
+#endif
+
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE sparc_option_override
+
+#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
+#endif
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END sparc_file_end
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE sparc_can_eliminate
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE sparc_mangle_type
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
+
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND sparc_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
+
+/* The value stored by LDSTUB.  */
+#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
+#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
+
+#undef TARGET_CSTORE_MODE
+#define TARGET_CSTORE_MODE sparc_cstore_mode
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Return the memory reference contained in X if any, zero otherwise.  */
+
+static rtx
+mem_ref (rtx x)
+{
+  if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
+    x = XEXP (x, 0);
+
+  if (MEM_P (x))
+    return x;
+
+  return NULL_RTX;
+}
+
+/* We use a machine specific pass to enable workarounds for errata.
+   We need to have the (essentially) final form of the insn stream in order
+   to properly detect the various hazards.  Therefore, this machine specific
+   pass runs as late as possible.  The pass is inserted in the pass pipeline
+   at the end of sparc_option_override.  */
+
+static bool
+sparc_gate_work_around_errata (void)
+{
+  /* The only errata we handle are those of the AT697F and UT699.  */
+  return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
+}
+
+static unsigned int
+sparc_do_work_around_errata (void)
+{
+  rtx insn, next;
+
+  /* Force all instructions to be split into their final form.  */
+  split_all_insns_noflow ();
+
+  /* Now look for specific patterns in the insn stream.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      bool insert_nop = false;
+      rtx set;
+
+      /* Look into the instruction in a delay slot.  */
+      if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	insn = XVECEXP (PATTERN (insn), 0, 1);
+
+      /* Look for a single-word load into an odd-numbered FP register.  */
+      if (sparc_fix_at697f
+	  && NONJUMP_INSN_P (insn)
+	  && (set = single_set (insn)) != NULL_RTX
+	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+	  && MEM_P (SET_SRC (set))
+	  && REG_P (SET_DEST (set))
+	  && REGNO (SET_DEST (set)) > 31
+	  && REGNO (SET_DEST (set)) % 2 != 0)
+	{
+	  /* The wrong dependency is on the enclosing double register.  */
+	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
+	  unsigned int src1, src2, dest;
+	  int code;
+
+	  next = next_active_insn (insn);
+	  if (!next)
+	    break;
+	  /* If the insn is a branch, then it cannot be problematic.  */
+	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
+	    continue;
+
+	  extract_insn (next);
+	  code = INSN_CODE (next);
+
+	  switch (code)
+	    {
+	    case CODE_FOR_adddf3:
+	    case CODE_FOR_subdf3:
+	    case CODE_FOR_muldf3:
+	    case CODE_FOR_divdf3:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      src2 = REGNO (recog_data.operand[2]);
+	      if (src1 != src2)
+		{
+		  /* Case [1-4]:
+				 ld [address], %fx+1
+				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
+		  if ((src1 == x || src2 == x)
+		      && (dest == src1 || dest == src2))
+		    insert_nop = true;
+		}
+	      else
+		{
+		  /* Case 5:
+			     ld [address], %fx+1
+			     FPOPd %fx, %fx, %fx  */
+		  if (src1 == x
+		      && dest == src1
+		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
+		    insert_nop = true;
+		}
+	      break;
+
+	    case CODE_FOR_sqrtdf2:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      /* Case 6:
+			 ld [address], %fx+1
+			 fsqrtd %fx, %fx  */
+	      if (src1 == x && dest == src1)
+		insert_nop = true;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+
+      /* Look for a single-word load into an integer register.  */
+      else if (sparc_fix_ut699
+	       && NONJUMP_INSN_P (insn)
+	       && (set = single_set (insn)) != NULL_RTX
+	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
+	       && mem_ref (SET_SRC (set)) != NULL_RTX
+	       && REG_P (SET_DEST (set))
+	       && REGNO (SET_DEST (set)) < 32)
+	{
+	  /* There is no problem if the second memory access has a data
+	     dependency on the first single-cycle load.  */
+	  rtx x = SET_DEST (set);
+
+	  next = next_active_insn (insn);
+	  if (!next)
+	    break;
+	  /* If the insn is a branch, then it cannot be problematic.  */
+	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
+	    continue;
+
+	  /* Look for a second memory access to/from an integer register.  */
+	  if ((set = single_set (next)) != NULL_RTX)
+	    {
+	      rtx src = SET_SRC (set);
+	      rtx dest = SET_DEST (set);
+	      rtx mem;
+
+	      /* LDD is affected.  */
+	      if ((mem = mem_ref (src)) != NULL_RTX
+		  && REG_P (dest)
+		  && REGNO (dest) < 32
+		  && !reg_mentioned_p (x, XEXP (mem, 0)))
+		insert_nop = true;
+
+	      /* STD is *not* affected.  */
+	      else if (MEM_P (dest)
+		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
+		       && (src == CONST0_RTX (GET_MODE (dest))
+			   || (REG_P (src)
+			       && REGNO (src) < 32
+			       && REGNO (src) != REGNO (x)))
+		       && !reg_mentioned_p (x, XEXP (dest, 0)))
+		insert_nop = true;
+	    }
+	}
+
+      /* Look for a single-word load/operation into an FP register.  */
+      else if (sparc_fix_ut699
+	       && NONJUMP_INSN_P (insn)
+	       && (set = single_set (insn)) != NULL_RTX
+	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+	       && REG_P (SET_DEST (set))
+	       && REGNO (SET_DEST (set)) > 31)
+	{
+	  /* Number of instructions in the problematic window.  */
+	  const int n_insns = 4;
+	  /* The problematic combination is with the sibling FP register.  */
+	  const unsigned int x = REGNO (SET_DEST (set));
+	  const unsigned int y = x ^ 1;
+	  rtx after;
+	  int i;
+
+	  next = next_active_insn (insn);
+	  if (!next)
+	    break;
+	  /* If the insn is a branch, then it cannot be problematic.  */
+	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
+	    continue;
+
+	  /* Look for a second load/operation into the sibling FP register.  */
+	  if (!((set = single_set (next)) != NULL_RTX
+		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+		&& REG_P (SET_DEST (set))
+		&& REGNO (SET_DEST (set)) == y))
+	    continue;
+
+	  /* Look for a (possible) store from the FP register in the next N
+	     instructions, but bail out if it is again modified or if there
+	     is a store from the sibling FP register before this store.  */
+	  for (after = next, i = 0; i < n_insns; i++)
+	    {
+	      bool branch_p;
+
+	      after = next_active_insn (after);
+	      if (!after)
+		break;
+
+	      /* This is a branch with an empty delay slot.  */
+	      if (!NONJUMP_INSN_P (after))
+		{
+		  if (++i == n_insns)
+		    break;
+		  branch_p = true;
+		  after = NULL_RTX;
+		}
+	      /* This is a branch with a filled delay slot.  */
+	      else if (GET_CODE (PATTERN (after)) == SEQUENCE)
+		{
+		  if (++i == n_insns)
+		    break;
+		  branch_p = true;
+		  after = XVECEXP (PATTERN (after), 0, 1);
+		}
+	      /* This is a regular instruction.  */
+	      else
+		branch_p = false;
+
+	      if (after && (set = single_set (after)) != NULL_RTX)
+		{
+		  const rtx src = SET_SRC (set);
+		  const rtx dest = SET_DEST (set);
+		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
+
+		  /* If the FP register is again modified before the store,
+		     then the store isn't affected.  */
+		  if (REG_P (dest)
+		      && (REGNO (dest) == x
+			  || (REGNO (dest) == y && size == 8)))
+		    break;
+
+		  if (MEM_P (dest) && REG_P (src))
+		    {
+		      /* If there is a store from the sibling FP register
+			 before the store, then the store is not affected.  */
+		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
+			break;
+
+		      /* Otherwise, the store is affected.  */
+		      if (REGNO (src) == x && size == 4)
+			{
+			  insert_nop = true;
+			  break;
+			}
+		    }
+		}
+
+	      /* If we have a branch in the first M instructions, then we
+		 cannot see the (M+2)th instruction so we play safe.  */
+	      if (branch_p && i <= (n_insns - 2))
+		{
+		  insert_nop = true;
+		  break;
+		}
+	    }
+	}
+
+      else
+	next = NEXT_INSN (insn);
+
+      if (insert_nop)
+	emit_insn_before (gen_nop (), next);
+    }
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_work_around_errata =
+{
+  RTL_PASS, /* type */
+  "errata", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_verify_rtl_sharing, /* todo_flags_finish */
+};
+
+class pass_work_around_errata : public rtl_opt_pass
+{
+public:
+  pass_work_around_errata(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_work_around_errata, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return sparc_gate_work_around_errata (); }
+  unsigned int execute () { return sparc_do_work_around_errata (); }
+
+}; // class pass_work_around_errata
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_work_around_errata (gcc::context *ctxt)
+{
+  return new pass_work_around_errata (ctxt);
+}
+
+/* Helpers for TARGET_DEBUG_OPTIONS.  */
+static void
+dump_target_flag_bits (const int flags)
+{
+  if (flags & MASK_64BIT)
+    fprintf (stderr, "64BIT ");
+  if (flags & MASK_APP_REGS)
+    fprintf (stderr, "APP_REGS ");
+  if (flags & MASK_FASTER_STRUCTS)
+    fprintf (stderr, "FASTER_STRUCTS ");
+  if (flags & MASK_FLAT)
+    fprintf (stderr, "FLAT ");
+  if (flags & MASK_FMAF)
+    fprintf (stderr, "FMAF ");
+  if (flags & MASK_FPU)
+    fprintf (stderr, "FPU ");
+  if (flags & MASK_HARD_QUAD)
+    fprintf (stderr, "HARD_QUAD ");
+  if (flags & MASK_POPC)
+    fprintf (stderr, "POPC ");
+  if (flags & MASK_PTR64)
+    fprintf (stderr, "PTR64 ");
+  if (flags & MASK_STACK_BIAS)
+    fprintf (stderr, "STACK_BIAS ");
+  if (flags & MASK_UNALIGNED_DOUBLES)
+    fprintf (stderr, "UNALIGNED_DOUBLES ");
+  if (flags & MASK_V8PLUS)
+    fprintf (stderr, "V8PLUS ");
+  if (flags & MASK_VIS)
+    fprintf (stderr, "VIS ");
+  if (flags & MASK_VIS2)
+    fprintf (stderr, "VIS2 ");
+  if (flags & MASK_VIS3)
+    fprintf (stderr, "VIS3 ");
+  if (flags & MASK_CBCOND)
+    fprintf (stderr, "CBCOND ");
+  if (flags & MASK_DEPRECATED_V8_INSNS)
+    fprintf (stderr, "DEPRECATED_V8_INSNS ");
+  if (flags & MASK_SPARCLET)
+    fprintf (stderr, "SPARCLET ");
+  if (flags & MASK_SPARCLITE)
+    fprintf (stderr, "SPARCLITE ");
+  if (flags & MASK_V8)
+    fprintf (stderr, "V8 ");
+  if (flags & MASK_V9)
+    fprintf (stderr, "V9 ");
+}
+
+static void
+dump_target_flags (const char *prefix, const int flags)
+{
+  fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
+  dump_target_flag_bits (flags);
+  fprintf(stderr, "]\n");
+}
+
+/* Validate and override various options, and do some machine dependent
+   initialization.  */
+
+static void
+sparc_option_override (void)
+{
+  static struct code_model {
+    const char *const name;
+    const enum cmodel value;
+  } const cmodels[] = {
+    { "32", CM_32 },
+    { "medlow", CM_MEDLOW },
+    { "medmid", CM_MEDMID },
+    { "medany", CM_MEDANY },
+    { "embmedany", CM_EMBMEDANY },
+    { NULL, (enum cmodel) 0 }
+  };
+  const struct code_model *cmodel;
+  /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
+  static struct cpu_default {
+    const int cpu;
+    const enum processor_type processor;
+  } const cpu_default[] = {
+    /* There must be one entry here for each TARGET_CPU value.  */
+    { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
+    { TARGET_CPU_v8, PROCESSOR_V8 },
+    { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
+    { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
+    { TARGET_CPU_leon, PROCESSOR_LEON },
+    { TARGET_CPU_leon3, PROCESSOR_LEON3 },
+    { TARGET_CPU_sparclite, PROCESSOR_F930 },
+    { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
+    { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
+    { TARGET_CPU_v9, PROCESSOR_V9 },
+    { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
+    { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
+    { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
+    { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
+    { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
+    { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
+    { -1, PROCESSOR_V7 }
+  };
+  const struct cpu_default *def;
+  /* Table of values for -m{cpu,tune}=.  This must match the order of
+     the enum processor_type in sparc-opts.h.  */
+  static struct cpu_table {
+    const char *const name;
+    const int disable;
+    const int enable;
+  } const cpu_table[] = {
+    { "v7",		MASK_ISA, 0 },
+    { "cypress",	MASK_ISA, 0 },
+    { "v8",		MASK_ISA, MASK_V8 },
+    /* TI TMS390Z55 supersparc */
+    { "supersparc",	MASK_ISA, MASK_V8 },
+    { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
+    { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
+    { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
+    { "sparclite",	MASK_ISA, MASK_SPARCLITE },
+    /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
+    { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
+    /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
+    { "f934",		MASK_ISA, MASK_SPARCLITE|MASK_FPU },
+    { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
+    { "sparclet",	MASK_ISA, MASK_SPARCLET },
+    /* TEMIC sparclet */
+    { "tsc701",		MASK_ISA, MASK_SPARCLET },
+    { "v9",		MASK_ISA, MASK_V9 },
+    /* UltraSPARC I, II, IIi */
+    { "ultrasparc",	MASK_ISA,
+    /* Although insns using %y are deprecated, it is a clear win.  */
+      MASK_V9|MASK_DEPRECATED_V8_INSNS },
+    /* UltraSPARC III */
+    /* ??? Check if %y issue still holds true.  */
+    { "ultrasparc3",	MASK_ISA,
+      MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
+    /* UltraSPARC T1 */
+    { "niagara",	MASK_ISA,
+      MASK_V9|MASK_DEPRECATED_V8_INSNS },
+    /* UltraSPARC T2 */
+    { "niagara2",	MASK_ISA,
+      MASK_V9|MASK_POPC|MASK_VIS2 },
+    /* UltraSPARC T3 */
+    { "niagara3",	MASK_ISA,
+      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
+    /* UltraSPARC T4 */
+    { "niagara4",	MASK_ISA,
+      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
+  };
+  const struct cpu_table *cpu;
+  unsigned int i;
+  int fpu;
+
+  if (sparc_debug_string != NULL)
+    {
+      const char *q;
+      char *p;
+
+      p = ASTRDUP (sparc_debug_string);
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  bool invert;
+	  int mask;
+
+	  p = NULL;
+	  if (*q == '!')
+	    {
+	      invert = true;
+	      q++;
+	    }
+	  else
+	    invert = false;
+
+	  if (! strcmp (q, "all"))
+	    mask = MASK_DEBUG_ALL;
+	  else if (! strcmp (q, "options"))
+	    mask = MASK_DEBUG_OPTIONS;
+	  else
+	    error ("unknown -mdebug-%s switch", q);
+
+	  if (invert)
+	    sparc_debug &= ~mask;
+	  else
+	    sparc_debug |= mask;
+	}
+    }
+
+  if (TARGET_DEBUG_OPTIONS)
+    {
+      dump_target_flags("Initial target_flags", target_flags);
+      dump_target_flags("target_flags_explicit", target_flags_explicit);
+    }
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifndef SPARC_BI_ARCH
+  /* Check for unsupported architecture size.  */
+  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
+    error ("%s is not supported by this configuration",
+	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
+#endif
+
+  /* We force all 64bit archs to use 128 bit long double */
+  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
+    {
+      error ("-mlong-double-64 not allowed with -m64");
+      target_flags |= MASK_LONG_DOUBLE_128;
+    }
+
+  /* Code model selection.  */
+  sparc_cmodel = SPARC_DEFAULT_CMODEL;
+
+#ifdef SPARC_BI_ARCH
+  if (TARGET_ARCH32)
+    sparc_cmodel = CM_32;
+#endif
+
+  if (sparc_cmodel_string != NULL)
+    {
+      if (TARGET_ARCH64)
+	{
+	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
+	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
+	      break;
+	  if (cmodel->name == NULL)
+	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
+	  else
+	    sparc_cmodel = cmodel->value;
+	}
+      else
+	error ("-mcmodel= is not supported on 32 bit systems");
+    }
+
+  /* Check that -fcall-saved-REG wasn't specified for out registers.  */
+  for (i = 8; i < 16; i++)
+    if (!call_used_regs [i])
+      {
+	error ("-fcall-saved-REG is not supported for out registers");
+        call_used_regs [i] = 1;
+      }
+
+  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
+
+  /* Set the default CPU.  */
+  if (!global_options_set.x_sparc_cpu_and_features)
+    {
+      for (def = &cpu_default[0]; def->cpu != -1; ++def)
+	if (def->cpu == TARGET_CPU_DEFAULT)
+	  break;
+      gcc_assert (def->cpu != -1);
+      sparc_cpu_and_features = def->processor;
+    }
+
+  if (!global_options_set.x_sparc_cpu)
+    sparc_cpu = sparc_cpu_and_features;
+
+  cpu = &cpu_table[(int) sparc_cpu_and_features];
+
+  if (TARGET_DEBUG_OPTIONS)
+    {
+      fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
+      fprintf (stderr, "sparc_cpu: %s\n",
+	       cpu_table[(int) sparc_cpu].name);
+      dump_target_flags ("cpu->disable", cpu->disable);
+      dump_target_flags ("cpu->enable", cpu->enable);
+    }
+
+  target_flags &= ~cpu->disable;
+  target_flags |= (cpu->enable
+#ifndef HAVE_AS_FMAF_HPC_VIS3
+		   & ~(MASK_FMAF | MASK_VIS3)
+#endif
+#ifndef HAVE_AS_SPARC4
+		   & ~MASK_CBCOND
+#endif
+#ifndef HAVE_AS_LEON
+		   & ~(MASK_LEON | MASK_LEON3)
+#endif
+		   );
+
+  /* If -mfpu or -mno-fpu was explicitly used, don't override with
+     the processor default.  */
+  if (target_flags_explicit & MASK_FPU)
+    target_flags = (target_flags & ~MASK_FPU) | fpu;
+
+  /* -mvis2 implies -mvis */
+  if (TARGET_VIS2)
+    target_flags |= MASK_VIS;
+
+  /* -mvis3 implies -mvis2 and -mvis */
+  if (TARGET_VIS3)
+    target_flags |= MASK_VIS2 | MASK_VIS;
+
+  /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
+     disabled.  */
+  if (! TARGET_FPU)
+    target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
+
+  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
+     are available.
+     -m64 also implies v9.  */
+  if (TARGET_VIS || TARGET_ARCH64)
+    {
+      target_flags |= MASK_V9;
+      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
+    }
+
+  /* -mvis also implies -mv8plus on 32-bit */
+  if (TARGET_VIS && ! TARGET_ARCH64)
+    target_flags |= MASK_V8PLUS;
+
+  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
+  if (TARGET_V9 && TARGET_ARCH32)
+    target_flags |= MASK_DEPRECATED_V8_INSNS;
+
+  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
+  if (! TARGET_V9 || TARGET_ARCH64)
+    target_flags &= ~MASK_V8PLUS;
+
+  /* Don't use stack biasing in 32 bit mode.  */
+  if (TARGET_ARCH32)
+    target_flags &= ~MASK_STACK_BIAS;
+
+  /* Supply a default value for align_functions.  */
+  if (align_functions == 0
+      && (sparc_cpu == PROCESSOR_ULTRASPARC
+	  || sparc_cpu == PROCESSOR_ULTRASPARC3
+	  || sparc_cpu == PROCESSOR_NIAGARA
+	  || sparc_cpu == PROCESSOR_NIAGARA2
+	  || sparc_cpu == PROCESSOR_NIAGARA3
+	  || sparc_cpu == PROCESSOR_NIAGARA4))
+    align_functions = 32;
+
+  /* Validate PCC_STRUCT_RETURN.  */
+  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
+    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
+
+  /* Only use .uaxword when compiling for a 64-bit target.  */
+  if (!TARGET_ARCH64)
+    targetm.asm_out.unaligned_op.di = NULL;
+
+  /* Do various machine dependent initializations.  */
+  sparc_init_modes ();
+
+  /* Set up function hooks.  */
+  init_machine_status = sparc_init_machine_status;
+
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_V7:
+    case PROCESSOR_CYPRESS:
+      sparc_costs = &cypress_costs;
+      break;
+    case PROCESSOR_V8:
+    case PROCESSOR_SPARCLITE:
+    case PROCESSOR_SUPERSPARC:
+      sparc_costs = &supersparc_costs;
+      break;
+    case PROCESSOR_F930:
+    case PROCESSOR_F934:
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      sparc_costs = &hypersparc_costs;
+      break;
+    case PROCESSOR_LEON:
+      sparc_costs = &leon_costs;
+      break;
+    case PROCESSOR_LEON3:
+      sparc_costs = &leon3_costs;
+      break;
+    case PROCESSOR_SPARCLET:
+    case PROCESSOR_TSC701:
+      sparc_costs = &sparclet_costs;
+      break;
+    case PROCESSOR_V9:
+    case PROCESSOR_ULTRASPARC:
+      sparc_costs = &ultrasparc_costs;
+      break;
+    case PROCESSOR_ULTRASPARC3:
+      sparc_costs = &ultrasparc3_costs;
+      break;
+    case PROCESSOR_NIAGARA:
+      sparc_costs = &niagara_costs;
+      break;
+    case PROCESSOR_NIAGARA2:
+      sparc_costs = &niagara2_costs;
+      break;
+    case PROCESSOR_NIAGARA3:
+      sparc_costs = &niagara3_costs;
+      break;
+    case PROCESSOR_NIAGARA4:
+      sparc_costs = &niagara4_costs;
+      break;
+    case PROCESSOR_NATIVE:
+      gcc_unreachable ();
+    };
+
+  if (sparc_memory_model == SMM_DEFAULT)
+    {
+      /* Choose the memory model for the operating system.  */
+      enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
+      if (os_default != SMM_DEFAULT)
+	sparc_memory_model = os_default;
+      /* Choose the most relaxed model for the processor.  */
+      else if (TARGET_V9)
+	sparc_memory_model = SMM_RMO;
+      else if (TARGET_LEON3)
+	sparc_memory_model = SMM_TSO;
+      else if (TARGET_LEON)
+	sparc_memory_model = SMM_SC;
+      else if (TARGET_V8)
+	sparc_memory_model = SMM_PSO;
+      else
+	sparc_memory_model = SMM_SC;
+    }
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+
+  if (TARGET_DEBUG_OPTIONS)
+    dump_target_flags ("Final target_flags", target_flags);
+
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			 ((sparc_cpu == PROCESSOR_ULTRASPARC
+			   || sparc_cpu == PROCESSOR_NIAGARA
+			   || sparc_cpu == PROCESSOR_NIAGARA2
+			   || sparc_cpu == PROCESSOR_NIAGARA3
+			   || sparc_cpu == PROCESSOR_NIAGARA4)
+			  ? 2
+			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
+			     ? 8 : 3)),
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+			 ((sparc_cpu == PROCESSOR_ULTRASPARC
+			   || sparc_cpu == PROCESSOR_ULTRASPARC3
+			   || sparc_cpu == PROCESSOR_NIAGARA
+			   || sparc_cpu == PROCESSOR_NIAGARA2
+			   || sparc_cpu == PROCESSOR_NIAGARA3
+			   || sparc_cpu == PROCESSOR_NIAGARA4)
+			  ? 64 : 32),
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+
+  /* Disable save slot sharing for call-clobbered registers by default.
+     The IRA sharing algorithm works on single registers only and this
+     pessimizes for double floating-point registers.  */
+  if (!global_options_set.x_flag_ira_share_save_slots)
+    flag_ira_share_save_slots = 0;
+
+  /* We register a machine specific pass to work around errata, if any.
+     The pass mut be scheduled as late as possible so that we have the
+     (essentially) final form of the insn stream to work on.
+     Registering the pass must be done at start up.  It's convenient to
+     do it here.  */
+  opt_pass *errata_pass = make_pass_work_around_errata (g);
+  struct register_pass_info insert_pass_work_around_errata =
+    {
+      errata_pass,		/* pass */
+      "dbr",			/* reference_pass_name */
+      1,			/* ref_pass_instance_number */
+      PASS_POS_INSERT_AFTER	/* po_op */
+    };
+  register_pass (&insert_pass_work_around_errata);
+}
+
+/* Miscellaneous utilities.  */
+
+/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
+   or branch on register contents instructions.  */
+
+int
+v9_regcmp_p (enum rtx_code code)
+{
+  return (code == EQ || code == NE || code == GE || code == LT
+	  || code == LE || code == GT);
+}
+
+/* Nonzero if OP is a floating point constant which can
+   be loaded into an integer register using a single
+   sethi instruction.  */
+
+int
+fp_sethi_p (rtx op)
+{
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
+    }
+
+  return 0;
+}
+
+/* Nonzero if OP is a floating point constant which can
+   be loaded into an integer register using a single
+   mov instruction.  */
+
+int
+fp_mov_p (rtx op)
+{
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      return SPARC_SIMM13_P (i);
+    }
+
+  return 0;
+}
+
+/* Nonzero if OP is a floating point constant which can
+   be loaded into an integer register using a high/losum
+   instruction sequence.  */
+
+int
+fp_high_losum_p (rtx op)
+{
+  /* The constraints calling this should only be in
+     SFmode move insns, so any constant which cannot
+     be moved using a single insn will do.  */
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
+    }
+
+  return 0;
+}
+
+/* Return true if the address of LABEL can be loaded by means of the
+   mov{si,di}_pic_label_ref patterns in PIC mode.  */
+
+static bool
+can_use_mov_pic_label_ref (rtx label)
+{
+  /* VxWorks does not impose a fixed gap between segments; the run-time
+     gap can be different from the object-file gap.  We therefore can't
+     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
+     are absolutely sure that X is in the same segment as the GOT.
+     Unfortunately, the flexibility of linker scripts means that we
+     can't be sure of that in general, so assume that GOT-relative
+     accesses are never valid on VxWorks.  */
+  if (TARGET_VXWORKS_RTP)
+    return false;
+
+  /* Similarly, if the label is non-local, it might end up being placed
+     in a different section than the current one; now mov_pic_label_ref
+     requires the label and the code to be in the same section.  */
+  if (LABEL_REF_NONLOCAL_P (label))
+    return false;
+
+  /* Finally, if we are reordering basic blocks and partition into hot
+     and cold sections, this might happen for any label.  */
+  if (flag_reorder_blocks_and_partition)
+    return false;
+
+  return true;
+}
+
+/* Expand a move instruction.  Return true if all work is done.  */
+
+bool
+sparc_expand_move (enum machine_mode mode, rtx *operands)
+{
+  /* Handle sets of MEM first.  */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      /* 0 is a register (or a pair of registers) on SPARC.  */
+      if (register_or_zero_operand (operands[1], mode))
+	return false;
+
+      if (!reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (mode, operands[1]);
+	}
+    }
+
+  /* Fixup TLS cases.  */
+  if (TARGET_HAVE_TLS
+      && CONSTANT_P (operands[1])
+      && sparc_tls_referenced_p (operands [1]))
+    {
+      operands[1] = sparc_legitimize_tls_address (operands[1]);
+      return false;
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic && CONSTANT_P (operands[1]))
+    {
+      if (pic_address_needs_scratch (operands[1]))
+	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
+
+      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
+      if (GET_CODE (operands[1]) == LABEL_REF
+	  && can_use_mov_pic_label_ref (operands[1]))
+	{
+	  if (mode == SImode)
+	    {
+	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
+	      return true;
+	    }
+
+	  if (mode == DImode)
+	    {
+	      gcc_assert (TARGET_ARCH64);
+	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
+	      return true;
+	    }
+	}
+
+      if (symbolic_operand (operands[1], mode))
+	{
+	  operands[1]
+	    = sparc_legitimize_pic_address (operands[1],
+					    reload_in_progress
+					    ? operands[0] : NULL_RTX);
+	  return false;
+	}
+    }
+
+  /* If we are trying to toss an integer constant into FP registers,
+     or loading a FP or vector constant, force it into memory.  */
+  if (CONSTANT_P (operands[1])
+      && REG_P (operands[0])
+      && (SPARC_FP_REG_P (REGNO (operands[0]))
+	  || SCALAR_FLOAT_MODE_P (mode)
+	  || VECTOR_MODE_P (mode)))
+    {
+      /* emit_group_store will send such bogosity to us when it is
+         not storing directly into memory.  So fix this up to avoid
+         crashes in output_constant_pool.  */
+      if (operands [1] == const0_rtx)
+	operands[1] = CONST0_RTX (mode);
+
+      /* We can clear or set to all-ones FP registers if TARGET_VIS, and
+	 always other regs.  */
+      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
+	  && (const_zero_operand (operands[1], mode)
+	      || const_all_ones_operand (operands[1], mode)))
+	return false;
+
+      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
+	  /* We are able to build any SF constant in integer registers
+	     with at most 2 instructions.  */
+	  && (mode == SFmode
+	      /* And any DF constant in integer registers.  */
+	      || (mode == DFmode
+		  && ! can_create_pseudo_p ())))
+	return false;
+
+      operands[1] = force_const_mem (mode, operands[1]);
+      if (!reload_in_progress)
+	operands[1] = validize_mem (operands[1]);
+      return false;
+    }
+
+  /* Accept non-constants and valid constants unmodified.  */
+  if (!CONSTANT_P (operands[1])
+      || GET_CODE (operands[1]) == HIGH
+      || input_operand (operands[1], mode))
+    return false;
+
+  switch (mode)
+    {
+    case QImode:
+      /* All QImode constants require only one insn, so proceed.  */
+      break;
+
+    case HImode:
+    case SImode:
+      sparc_emit_set_const32 (operands[0], operands[1]);
+      return true;
+
+    case DImode:
+      /* input_operand should have filtered out 32-bit mode.  */
+      sparc_emit_set_const64 (operands[0], operands[1]);
+      return true;
+
+    case TImode:
+      {
+	rtx high, low;
+	/* TImode isn't available in 32-bit mode.  */
+	split_double (operands[1], &high, &low);
+	emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
+			      high));
+	emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
+			      low));
+      }
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return false;
+}
+
+/* Load OP1, a 32-bit constant, into OP0, a register.
+   We know it can't be done in one insn when we get
+   here, the move expander guarantees this.  */
+
+static void
+sparc_emit_set_const32 (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx temp = op0;
+
+  if (can_create_pseudo_p ())
+    temp = gen_reg_rtx (mode);
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      gcc_assert (!small_int_operand (op1, mode)
+		  && !const_high_operand (op1, mode));
+
+      /* Emit them as real moves instead of a HIGH/LO_SUM,
+	 this way CSE can see everything and reuse intermediate
+	 values if it wants.  */
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      GEN_INT (INTVAL (op1)
+			        & ~(HOST_WIDE_INT)0x3ff)));
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      op0,
+			      gen_rtx_IOR (mode, temp,
+					   GEN_INT (INTVAL (op1) & 0x3ff))));
+    }
+  else
+    {
+      /* A symbol, emit in the traditional way.  */
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_HIGH (mode, op1)));
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
+    }
+}
+
+/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
+   If TEMP is nonzero, we are forbidden to use any other scratch
+   registers.  Otherwise, we are allowed to generate them as needed.
+
+   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
+   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
+
+void
+sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
+{
+  rtx temp1, temp2, temp3, temp4, temp5;
+  rtx ti_temp = 0;
+
+  if (temp && GET_MODE (temp) == TImode)
+    {
+      ti_temp = temp;
+      temp = gen_rtx_REG (DImode, REGNO (temp));
+    }
+
+  /* SPARC-V9 code-model support.  */
+  switch (sparc_cmodel)
+    {
+    case CM_MEDLOW:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable must be in the low 4TB of the virtual address
+	 space.
+
+	 sethi	%hi(symbol), %temp1
+	 or	%temp1, %lo(symbol), %reg  */
+      if (temp)
+	temp1 = temp;  /* op0 is allowed.  */
+      else
+	temp1 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
+      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
+      break;
+
+    case CM_MEDMID:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable must be in the low 16TB of the virtual address
+	 space.
+
+	 sethi	%h44(symbol), %temp1
+	 or	%temp1, %m44(symbol), %temp2
+	 sllx	%temp2, 12, %temp3
+	 or	%temp3, %l44(symbol), %reg  */
+      if (temp)
+	{
+	  temp1 = op0;
+	  temp2 = op0;
+	  temp3 = temp;  /* op0 is allowed.  */
+	}
+      else
+	{
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+	  temp3 = gen_reg_rtx (DImode);
+	}
+
+      emit_insn (gen_seth44 (temp1, op1));
+      emit_insn (gen_setm44 (temp2, temp1, op1));
+      emit_insn (gen_rtx_SET (VOIDmode, temp3,
+			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
+      emit_insn (gen_setl44 (op0, temp3, op1));
+      break;
+
+    case CM_MEDANY:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable can be placed anywhere in the virtual address
+	 space.
+
+	 sethi	%hh(symbol), %temp1
+	 sethi	%lm(symbol), %temp2
+	 or	%temp1, %hm(symbol), %temp3
+	 sllx	%temp3, 32, %temp4
+	 or	%temp4, %temp2, %temp5
+	 or	%temp5, %lo(symbol), %reg  */
+      if (temp)
+	{
+	  /* It is possible that one of the registers we got for operands[2]
+	     might coincide with that of operands[0] (which is why we made
+	     it TImode).  Pick the other one to use as our scratch.  */
+	  if (rtx_equal_p (temp, op0))
+	    {
+	      gcc_assert (ti_temp);
+	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
+	    }
+	  temp1 = op0;
+	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
+	  temp3 = op0;
+	  temp4 = op0;
+	  temp5 = op0;
+	}
+      else
+	{
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+	  temp3 = gen_reg_rtx (DImode);
+	  temp4 = gen_reg_rtx (DImode);
+	  temp5 = gen_reg_rtx (DImode);
+	}
+
+      emit_insn (gen_sethh (temp1, op1));
+      emit_insn (gen_setlm (temp2, op1));
+      emit_insn (gen_sethm (temp3, temp1, op1));
+      emit_insn (gen_rtx_SET (VOIDmode, temp4,
+			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
+      emit_insn (gen_rtx_SET (VOIDmode, temp5,
+			      gen_rtx_PLUS (DImode, temp4, temp2)));
+      emit_insn (gen_setlo (op0, temp5, op1));
+      break;
+
+    case CM_EMBMEDANY:
+      /* Old old old backwards compatibility kruft here.
+	 Essentially it is MEDLOW with a fixed 64-bit
+	 virtual base added to all data segment addresses.
+	 Text-segment stuff is computed like MEDANY, we can't
+	 reuse the code above because the relocation knobs
+	 look different.
+
+	 Data segment:	sethi	%hi(symbol), %temp1
+			add	%temp1, EMBMEDANY_BASE_REG, %temp2
+			or	%temp2, %lo(symbol), %reg  */
+      if (data_segment_operand (op1, GET_MODE (op1)))
+	{
+	  if (temp)
+	    {
+	      temp1 = temp;  /* op0 is allowed.  */
+	      temp2 = op0;
+	    }
+	  else
+	    {
+	      temp1 = gen_reg_rtx (DImode);
+	      temp2 = gen_reg_rtx (DImode);
+	    }
+
+	  emit_insn (gen_embmedany_sethi (temp1, op1));
+	  emit_insn (gen_embmedany_brsum (temp2, temp1));
+	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
+	}
+
+      /* Text segment:	sethi	%uhi(symbol), %temp1
+			sethi	%hi(symbol), %temp2
+			or	%temp1, %ulo(symbol), %temp3
+			sllx	%temp3, 32, %temp4
+			or	%temp4, %temp2, %temp5
+			or	%temp5, %lo(symbol), %reg  */
+      else
+	{
+	  if (temp)
+	    {
+	      /* It is possible that one of the registers we got for operands[2]
+		 might coincide with that of operands[0] (which is why we made
+		 it TImode).  Pick the other one to use as our scratch.  */
+	      if (rtx_equal_p (temp, op0))
+		{
+		  gcc_assert (ti_temp);
+		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
+		}
+	      temp1 = op0;
+	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
+	      temp3 = op0;
+	      temp4 = op0;
+	      temp5 = op0;
+	    }
+	  else
+	    {
+	      temp1 = gen_reg_rtx (DImode);
+	      temp2 = gen_reg_rtx (DImode);
+	      temp3 = gen_reg_rtx (DImode);
+	      temp4 = gen_reg_rtx (DImode);
+	      temp5 = gen_reg_rtx (DImode);
+	    }
+
+	  emit_insn (gen_embmedany_textuhi (temp1, op1));
+	  emit_insn (gen_embmedany_texthi  (temp2, op1));
+	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
+	  emit_insn (gen_rtx_SET (VOIDmode, temp4,
+				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
+	  emit_insn (gen_rtx_SET (VOIDmode, temp5,
+				  gen_rtx_PLUS (DImode, temp4, temp2)));
+	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+#if HOST_BITS_PER_WIDE_INT == 32
+static void
+sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+#else
+/* These avoid problems when cross compiling.  If we do not
+   go through all this hair then the optimizer will see
+   invalid REG_EQUAL notes or in some cases none at all.  */
+static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
+static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
+static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
+static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
+
+/* The optimizer is not to assume anything about exactly
+   which bits are set for a HIGH, they are unspecified.
+   Unfortunately this leads to many missed optimizations
+   during CSE.  We mask out the non-HIGH bits, and matches
+   a plain movdi, to alleviate this problem.  */
+static rtx
+gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
+{
+  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
+}
+
+static rtx
+gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
+{
+  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
+}
+
+static rtx
+gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
+{
+  return gen_rtx_IOR (DImode, src, GEN_INT (val));
+}
+
+static rtx
+gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
+{
+  return gen_rtx_XOR (DImode, src, GEN_INT (val));
+}
+
+/* Worker routines for 64-bit constant formation on arch64.
+   One of the key things to be doing in these emissions is
+   to create as many temp REGs as possible.  This makes it
+   possible for half-built constants to be used later when
+   such values are similar to something required later on.
+   Without doing this, the optimizer cannot see such
+   opportunities.  */
+
+static void sparc_emit_set_const64_quick1 (rtx, rtx,
+					   unsigned HOST_WIDE_INT, int);
+
+static void
+sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
+			       unsigned HOST_WIDE_INT low_bits, int is_neg)
+{
+  unsigned HOST_WIDE_INT high_bits;
+
+  if (is_neg)
+    high_bits = (~low_bits) & 0xffffffff;
+  else
+    high_bits = low_bits;
+
+  emit_insn (gen_safe_HIGH64 (temp, high_bits));
+  if (!is_neg)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+    }
+  else
+    {
+      /* If we are XOR'ing with -1, then we should emit a one's complement
+	 instead.  This way the combiner will notice logical operations
+	 such as ANDN later on and substitute.  */
+      if ((low_bits & 0x3ff) == 0x3ff)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_NOT (DImode, temp)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_safe_XOR64 (temp,
+						  (-(HOST_WIDE_INT)0x400
+						   | (low_bits & 0x3ff)))));
+	}
+    }
+}
+
+static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
+					   unsigned HOST_WIDE_INT, int);
+
+static void
+sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
+			       unsigned HOST_WIDE_INT high_bits,
+			       unsigned HOST_WIDE_INT low_immediate,
+			       int shift_count)
+{
+  rtx temp2 = op0;
+
+  if ((high_bits & 0xfffffc00) != 0)
+    {
+      emit_insn (gen_safe_HIGH64 (temp, high_bits));
+      if ((high_bits & ~0xfffffc00) != 0)
+	emit_insn (gen_rtx_SET (VOIDmode, op0,
+				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+      else
+	temp2 = temp;
+    }
+  else
+    {
+      emit_insn (gen_safe_SET64 (temp, high_bits));
+      temp2 = temp;
+    }
+
+  /* Now shift it up into place.  */
+  emit_insn (gen_rtx_SET (VOIDmode, op0,
+			  gen_rtx_ASHIFT (DImode, temp2,
+					  GEN_INT (shift_count))));
+
+  /* If there is a low immediate part piece, finish up by
+     putting that in as well.  */
+  if (low_immediate != 0)
+    emit_insn (gen_rtx_SET (VOIDmode, op0,
+			    gen_safe_OR64 (op0, low_immediate)));
+}
+
+static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
+					    unsigned HOST_WIDE_INT);
+
+/* Full 64-bit constant decomposition.  Even though this is the
+   'worst' case, we still optimize a few things away.  */
+static void
+sparc_emit_set_const64_longway (rtx op0, rtx temp,
+				unsigned HOST_WIDE_INT high_bits,
+				unsigned HOST_WIDE_INT low_bits)
+{
+  rtx sub_temp = op0;
+
+  if (can_create_pseudo_p ())
+    sub_temp = gen_reg_rtx (DImode);
+
+  if ((high_bits & 0xfffffc00) != 0)
+    {
+      emit_insn (gen_safe_HIGH64 (temp, high_bits));
+      if ((high_bits & ~0xfffffc00) != 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				sub_temp,
+				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+      else
+	sub_temp = temp;
+    }
+  else
+    {
+      emit_insn (gen_safe_SET64 (temp, high_bits));
+      sub_temp = temp;
+    }
+
+  if (can_create_pseudo_p ())
+    {
+      rtx temp2 = gen_reg_rtx (DImode);
+      rtx temp3 = gen_reg_rtx (DImode);
+      rtx temp4 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, temp4,
+			      gen_rtx_ASHIFT (DImode, sub_temp,
+					      GEN_INT (32))));
+
+      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
+      if ((low_bits & ~0xfffffc00) != 0)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
+				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, temp4, temp3)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, temp4, temp2)));
+	}
+    }
+  else
+    {
+      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
+      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
+      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
+      int to_shift = 12;
+
+      /* We are in the middle of reload, so this is really
+	 painful.  However we do still make an attempt to
+	 avoid emitting truly stupid code.  */
+      if (low1 != const0_rtx)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, sub_temp,
+						  GEN_INT (to_shift))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_IOR (DImode, op0, low1)));
+	  sub_temp = op0;
+	  to_shift = 12;
+	}
+      else
+	{
+	  to_shift += 12;
+	}
+      if (low2 != const0_rtx)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, sub_temp,
+						  GEN_INT (to_shift))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_IOR (DImode, op0, low2)));
+	  sub_temp = op0;
+	  to_shift = 8;
+	}
+      else
+	{
+	  to_shift += 8;
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_rtx_ASHIFT (DImode, sub_temp,
+					      GEN_INT (to_shift))));
+      if (low3 != const0_rtx)
+	emit_insn (gen_rtx_SET (VOIDmode, op0,
+				gen_rtx_IOR (DImode, op0, low3)));
+      /* phew...  */
+    }
+}
+
+/* Analyze a 64-bit constant for certain properties.  */
+static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
+				    unsigned HOST_WIDE_INT,
+				    int *, int *, int *);
+
+static void
+analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
+			unsigned HOST_WIDE_INT low_bits,
+			int *hbsp, int *lbsp, int *abbasp)
+{
+  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+  int i;
+
+  lowest_bit_set = highest_bit_set = -1;
+  i = 0;
+  do
+    {
+      if ((lowest_bit_set == -1)
+	  && ((low_bits >> i) & 1))
+	lowest_bit_set = i;
+      if ((highest_bit_set == -1)
+	  && ((high_bits >> (32 - i - 1)) & 1))
+	highest_bit_set = (64 - i - 1);
+    }
+  while (++i < 32
+	 && ((highest_bit_set == -1)
+	     || (lowest_bit_set == -1)));
+  if (i == 32)
+    {
+      i = 0;
+      do
+	{
+	  if ((lowest_bit_set == -1)
+	      && ((high_bits >> i) & 1))
+	    lowest_bit_set = i + 32;
+	  if ((highest_bit_set == -1)
+	      && ((low_bits >> (32 - i - 1)) & 1))
+	    highest_bit_set = 32 - i - 1;
+	}
+      while (++i < 32
+	     && ((highest_bit_set == -1)
+		 || (lowest_bit_set == -1)));
+    }
+  /* If there are no bits set this should have gone out
+     as one instruction!  */
+  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
+  all_bits_between_are_set = 1;
+  for (i = lowest_bit_set; i <= highest_bit_set; i++)
+    {
+      if (i < 32)
+	{
+	  if ((low_bits & (1 << i)) != 0)
+	    continue;
+	}
+      else
+	{
+	  if ((high_bits & (1 << (i - 32))) != 0)
+	    continue;
+	}
+      all_bits_between_are_set = 0;
+      break;
+    }
+  *hbsp = highest_bit_set;
+  *lbsp = lowest_bit_set;
+  *abbasp = all_bits_between_are_set;
+}
+
+static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
+
+static int
+const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
+		   unsigned HOST_WIDE_INT low_bits)
+{
+  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+  if (high_bits == 0
+      || high_bits == 0xffffffff)
+    return 1;
+
+  analyze_64bit_constant (high_bits, low_bits,
+			  &highest_bit_set, &lowest_bit_set,
+			  &all_bits_between_are_set);
+
+  if ((highest_bit_set == 63
+       || lowest_bit_set == 0)
+      && all_bits_between_are_set != 0)
+    return 1;
+
+  if ((highest_bit_set - lowest_bit_set) < 21)
+    return 1;
+
+  return 0;
+}
+
+static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
+							unsigned HOST_WIDE_INT,
+							int, int);
+
+static unsigned HOST_WIDE_INT
+create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
+			  unsigned HOST_WIDE_INT low_bits,
+			  int lowest_bit_set, int shift)
+{
+  HOST_WIDE_INT hi, lo;
+
+  if (lowest_bit_set < 32)
+    {
+      lo = (low_bits >> lowest_bit_set) << shift;
+      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+    }
+  else
+    {
+      lo = 0;
+      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+    }
+  gcc_assert (! (hi & lo));
+  return (hi | lo);
+}
+
+/* Here we are sure to be arch64 and this is an integer constant
+   being loaded into a register.  Emit the most efficient
+   insn sequence possible.  Detection of all the 1-insn cases
+   has been done already.  */
+static void
+sparc_emit_set_const64 (rtx op0, rtx op1)
+{
+  unsigned HOST_WIDE_INT high_bits, low_bits;
+  int lowest_bit_set, highest_bit_set;
+  int all_bits_between_are_set;
+  rtx temp = 0;
+
+  /* Sanity check that we know what we are working with.  */
+  gcc_assert (TARGET_ARCH64
+	      && (GET_CODE (op0) == SUBREG
+		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
+
+  if (! can_create_pseudo_p ())
+    temp = op0;
+
+  if (GET_CODE (op1) != CONST_INT)
+    {
+      sparc_emit_set_symbolic_const64 (op0, op1, temp);
+      return;
+    }
+
+  if (! temp)
+    temp = gen_reg_rtx (DImode);
+
+  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
+  low_bits = (INTVAL (op1) & 0xffffffff);
+
+  /* low_bits	bits 0  --> 31
+     high_bits	bits 32 --> 63  */
+
+  analyze_64bit_constant (high_bits, low_bits,
+			  &highest_bit_set, &lowest_bit_set,
+			  &all_bits_between_are_set);
+
+  /* First try for a 2-insn sequence.  */
+
+  /* These situations are preferred because the optimizer can
+   * do more things with them:
+   * 1) mov	-1, %reg
+   *    sllx	%reg, shift, %reg
+   * 2) mov	-1, %reg
+   *    srlx	%reg, shift, %reg
+   * 3) mov	some_small_const, %reg
+   *    sllx	%reg, shift, %reg
+   */
+  if (((highest_bit_set == 63
+	|| lowest_bit_set == 0)
+       && all_bits_between_are_set != 0)
+      || ((highest_bit_set - lowest_bit_set) < 12))
+    {
+      HOST_WIDE_INT the_const = -1;
+      int shift = lowest_bit_set;
+
+      if ((highest_bit_set != 63
+	   && lowest_bit_set != 0)
+	  || all_bits_between_are_set == 0)
+	{
+	  the_const =
+	    create_simple_focus_bits (high_bits, low_bits,
+				      lowest_bit_set, 0);
+	}
+      else if (lowest_bit_set == 0)
+	shift = -(63 - highest_bit_set);
+
+      gcc_assert (SPARC_SIMM13_P (the_const));
+      gcc_assert (shift != 0);
+
+      emit_insn (gen_safe_SET64 (temp, the_const));
+      if (shift > 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_ASHIFT (DImode,
+						temp,
+						GEN_INT (shift))));
+      else if (shift < 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_LSHIFTRT (DImode,
+						  temp,
+						  GEN_INT (-shift))));
+      return;
+    }
+
+  /* Now a range of 22 or less bits set somewhere.
+   * 1) sethi	%hi(focus_bits), %reg
+   *    sllx	%reg, shift, %reg
+   * 2) sethi	%hi(focus_bits), %reg
+   *    srlx	%reg, shift, %reg
+   */
+  if ((highest_bit_set - lowest_bit_set) < 21)
+    {
+      unsigned HOST_WIDE_INT focus_bits =
+	create_simple_focus_bits (high_bits, low_bits,
+				  lowest_bit_set, 10);
+
+      gcc_assert (SPARC_SETHI_P (focus_bits));
+      gcc_assert (lowest_bit_set != 10);
+
+      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
+
+      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
+      if (lowest_bit_set < 10)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_LSHIFTRT (DImode, temp,
+						  GEN_INT (10 - lowest_bit_set))));
+      else if (lowest_bit_set > 10)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_ASHIFT (DImode, temp,
+						GEN_INT (lowest_bit_set - 10))));
+      return;
+    }
+
+  /* 1) sethi	%hi(low_bits), %reg
+   *    or	%reg, %lo(low_bits), %reg
+   * 2) sethi	%hi(~low_bits), %reg
+   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
+   */
+  if (high_bits == 0
+      || high_bits == 0xffffffff)
+    {
+      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
+				     (high_bits == 0xffffffff));
+      return;
+    }
+
+  /* Now, try 3-insn sequences.  */
+
+  /* 1) sethi	%hi(high_bits), %reg
+   *    or	%reg, %lo(high_bits), %reg
+   *    sllx	%reg, 32, %reg
+   */
+  if (low_bits == 0)
+    {
+      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
+      return;
+    }
+
+  /* We may be able to do something quick
+     when the constant is negated, so try that.  */
+  if (const64_is_2insns ((~high_bits) & 0xffffffff,
+			 (~low_bits) & 0xfffffc00))
+    {
+      /* NOTE: The trailing bits get XOR'd so we need the
+	 non-negated bits, not the negated ones.  */
+      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
+
+      if ((((~high_bits) & 0xffffffff) == 0
+	   && ((~low_bits) & 0x80000000) == 0)
+	  || (((~high_bits) & 0xffffffff) == 0xffffffff
+	      && ((~low_bits) & 0x80000000) != 0))
+	{
+	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
+
+	  if ((SPARC_SETHI_P (fast_int)
+	       && (~high_bits & 0xffffffff) == 0)
+	      || SPARC_SIMM13_P (fast_int))
+	    emit_insn (gen_safe_SET64 (temp, fast_int));
+	  else
+	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
+	}
+      else
+	{
+	  rtx negated_const;
+	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
+				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
+	  sparc_emit_set_const64 (temp, negated_const);
+	}
+
+      /* If we are XOR'ing with -1, then we should emit a one's complement
+	 instead.  This way the combiner will notice logical operations
+	 such as ANDN later on and substitute.  */
+      if (trailing_bits == 0x3ff)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_NOT (DImode, temp)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  op0,
+				  gen_safe_XOR64 (temp,
+						  (-0x400 | trailing_bits))));
+	}
+      return;
+    }
+
+  /* 1) sethi	%hi(xxx), %reg
+   *    or	%reg, %lo(xxx), %reg
+   *	sllx	%reg, yyy, %reg
+   *
+   * ??? This is just a generalized version of the low_bits==0
+   * thing above, FIXME...
+   */
+  if ((highest_bit_set - lowest_bit_set) < 32)
+    {
+      unsigned HOST_WIDE_INT focus_bits =
+	create_simple_focus_bits (high_bits, low_bits,
+				  lowest_bit_set, 0);
+
+      /* We can't get here in this state.  */
+      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
+
+      /* So what we know is that the set bits straddle the
+	 middle of the 64-bit word.  */
+      sparc_emit_set_const64_quick2 (op0, temp,
+				     focus_bits, 0,
+				     lowest_bit_set);
+      return;
+    }
+
+  /* 1) sethi	%hi(high_bits), %reg
+   *    or	%reg, %lo(high_bits), %reg
+   *    sllx	%reg, 32, %reg
+   *	or	%reg, low_bits, %reg
+   */
+  if (SPARC_SIMM13_P(low_bits)
+      && ((int)low_bits > 0))
+    {
+      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
+      return;
+    }
+
+  /* The easiest way when all else fails, is full decomposition.  */
+  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
+}
+#endif /* HOST_BITS_PER_WIDE_INT == 32 */
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point,
+   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
+   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
+   processing is needed.  */
+
+enum machine_mode
+select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	case UNORDERED:
+	case ORDERED:
+	case UNLT:
+	case UNLE:
+	case UNGT:
+	case UNGE:
+	case UNEQ:
+	case LTGT:
+	  return CCFPmode;
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  return CCFPEmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
+	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
+    {
+      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
+	return CCX_NOOVmode;
+      else
+	return CC_NOOVmode;
+    }
+  else
+    {
+      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
+	return CCXmode;
+      else
+	return CCmode;
+    }
+}
+
+/* Emit the compare insn and return the CC reg for a CODE comparison
+   with operands X and Y.  */
+
+static rtx
+gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
+{
+  enum machine_mode mode;
+  rtx cc_reg;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
+    return x;
+
+  mode = SELECT_CC_MODE (code, x, y);
+
+  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
+     fcc regs (cse can't tell they're really call clobbered regs and will
+     remove a duplicate comparison even if there is an intervening function
+     call - it will then try to reload the cc reg via an int reg which is why
+     we need the movcc patterns).  It is possible to provide the movcc
+     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
+     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
+     to tell cse that CCFPE mode registers (even pseudos) are call
+     clobbered.  */
+
+  /* ??? This is an experiment.  Rather than making changes to cse which may
+     or may not be easy/clean, we do our own cse.  This is possible because
+     we will generate hard registers.  Cse knows they're call clobbered (it
+     doesn't know the same thing about pseudos). If we guess wrong, no big
+     deal, but if we win, great!  */
+
+  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+#if 1 /* experiment */
+    {
+      int reg;
+      /* We cycle through the registers to ensure they're all exercised.  */
+      static int next_fcc_reg = 0;
+      /* Previous x,y for each fcc reg.  */
+      static rtx prev_args[4][2];
+
+      /* Scan prev_args for x,y.  */
+      for (reg = 0; reg < 4; reg++)
+	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
+	  break;
+      if (reg == 4)
+	{
+	  reg = next_fcc_reg;
+	  prev_args[reg][0] = x;
+	  prev_args[reg][1] = y;
+	  next_fcc_reg = (next_fcc_reg + 1) & 3;
+	}
+      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
+    }
+#else
+    cc_reg = gen_reg_rtx (mode);
+#endif /* ! experiment */
+  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
+  else
+    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
+
+  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
+     will only result in an unrecognizable insn so no point in asserting.  */
+  emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
+
+  return cc_reg;
+}
+
+
+/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
+
+rtx
+gen_compare_reg (rtx cmp)
+{
+  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
+}
+
+/* This function is used for v9 only.
+   DEST is the target of the Scc insn.
+   CODE is the code for an Scc's comparison.
+   X and Y are the values we compare.
+
+   This function is needed to turn
+
+	   (set (reg:SI 110)
+	       (gt (reg:CCX 100 %icc)
+	           (const_int 0)))
+   into
+	   (set (reg:SI 110)
+	       (gt:DI (reg:CCX 100 %icc)
+	           (const_int 0)))
+
+   IE: The instruction recognizer needs to see the mode of the comparison to
+   find the right instruction. We could use "gt:DI" right in the
+   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
+
+static int
+gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
+{
+  if (! TARGET_ARCH64
+      && (GET_MODE (x) == DImode
+	  || GET_MODE (dest) == DImode))
+    return 0;
+
+  /* Try to use the movrCC insns.  */
+  if (TARGET_ARCH64
+      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
+      && y == const0_rtx
+      && v9_regcmp_p (compare_code))
+    {
+      rtx op0 = x;
+      rtx temp;
+
+      /* Special case for op0 != 0.  This can be done with one instruction if
+	 dest == x.  */
+
+      if (compare_code == NE
+	  && GET_MODE (dest) == DImode
+	  && rtx_equal_p (op0, dest))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_IF_THEN_ELSE (DImode,
+				       gen_rtx_fmt_ee (compare_code, DImode,
+						       op0, const0_rtx),
+				       const1_rtx,
+				       dest)));
+	  return 1;
+	}
+
+      if (reg_overlap_mentioned_p (dest, op0))
+	{
+	  /* Handle the case where dest == x.
+	     We "early clobber" the result.  */
+	  op0 = gen_reg_rtx (GET_MODE (x));
+	  emit_move_insn (op0, x);
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+      if (GET_MODE (op0) != DImode)
+	{
+	  temp = gen_reg_rtx (DImode);
+	  convert_move (temp, op0, 0);
+	}
+      else
+	temp = op0;
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
+				   gen_rtx_fmt_ee (compare_code, DImode,
+						   temp, const0_rtx),
+				   const1_rtx,
+				   dest)));
+      return 1;
+    }
+  else
+    {
+      x = gen_compare_reg_1 (compare_code, x, y);
+      y = const0_rtx;
+
+      gcc_assert (GET_MODE (x) != CC_NOOVmode
+		  && GET_MODE (x) != CCX_NOOVmode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
+				   gen_rtx_fmt_ee (compare_code,
+						   GET_MODE (x), x, y),
+				    const1_rtx, dest)));
+      return 1;
+    }
+}
+
+
+/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
+   without jumps using the addx/subx instructions.  */
+
+bool
+emit_scc_insn (rtx operands[])
+{
+  rtx tem;
+  rtx x;
+  rtx y;
+  enum rtx_code code;
+
+  /* The quad-word fp compare library routines all return nonzero to indicate
+     true, which is different from the equivalent libgcc routines, so we must
+     handle them specially here.  */
+  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
+    {
+      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
+					      GET_CODE (operands[1]));
+      operands[2] = XEXP (operands[1], 0);
+      operands[3] = XEXP (operands[1], 1);
+    }
+
+  code = GET_CODE (operands[1]);
+  x = operands[2];
+  y = operands[3];
+
+  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
+     more applications).  The exception to this is "reg != 0" which can
+     be done in one instruction on v9 (so we do it).  */
+  if (code == EQ)
+    {
+      if (GET_MODE (x) == SImode)
+        {
+	  rtx pat;
+	  if (TARGET_ARCH64)
+	    pat = gen_seqsidi_special (operands[0], x, y);
+	  else
+	    pat = gen_seqsisi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+      else if (GET_MODE (x) == DImode)
+        {
+	  rtx pat = gen_seqdi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+    }
+
+  if (code == NE)
+    {
+      if (GET_MODE (x) == SImode)
+        {
+          rtx pat;
+	  if (TARGET_ARCH64)
+	    pat = gen_snesidi_special (operands[0], x, y);
+	  else
+	    pat = gen_snesisi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+      else if (GET_MODE (x) == DImode)
+        {
+	  rtx pat;
+	  if (TARGET_VIS3)
+	    pat = gen_snedi_special_vis3 (operands[0], x, y);
+	  else
+	    pat = gen_snedi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+    }
+
+  if (TARGET_V9
+      && TARGET_ARCH64
+      && GET_MODE (x) == DImode
+      && !(TARGET_VIS3
+	   && (code == GTU || code == LTU))
+      && gen_v9_scc (operands[0], code, x, y))
+    return true;
+
+  /* We can do LTU and GEU using the addx/subx instructions too.  And
+     for GTU/LEU, if both operands are registers swap them and fall
+     back to the easy case.  */
+  if (code == GTU || code == LEU)
+    {
+      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
+        {
+          tem = x;
+          x = y;
+          y = tem;
+          code = swap_condition (code);
+        }
+    }
+
+  if (code == LTU
+      || (!TARGET_VIS3 && code == GEU))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
+					      gen_compare_reg_1 (code, x, y),
+					      const0_rtx)));
+      return true;
+    }
+
+  /* All the posibilities to use addx/subx based sequences has been
+     exhausted, try for a 3 instruction sequence using v9 conditional
+     moves.  */
+  if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
+    return true;
+
+  /* Nope, do branches.  */
+  return false;
+}
+
+/* Emit a conditional jump insn for the v9 architecture using comparison code
+   CODE and jump target LABEL.
+   This function exists to take advantage of the v9 brxx insns.  */
+
+static void
+emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
+{
+  emit_jump_insn (gen_rtx_SET (VOIDmode,
+			   pc_rtx,
+			   gen_rtx_IF_THEN_ELSE (VOIDmode,
+				    gen_rtx_fmt_ee (code, GET_MODE (op0),
+						    op0, const0_rtx),
+				    gen_rtx_LABEL_REF (VOIDmode, label),
+				    pc_rtx)));
+}
+
+/* Emit a conditional jump insn for the UA2011 architecture using
+   comparison code CODE and jump target LABEL.  This function exists
+   to take advantage of the UA2011 Compare and Branch insns.  */
+
+static void
+emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
+{
+  rtx if_then_else;
+
+  if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
+				       gen_rtx_fmt_ee(code, GET_MODE(op0),
+						      op0, op1),
+				       gen_rtx_LABEL_REF (VOIDmode, label),
+				       pc_rtx);
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
+}
+
+void
+emit_conditional_branch_insn (rtx operands[])
+{
+  /* The quad-word fp compare library routines all return nonzero to indicate
+     true, which is different from the equivalent libgcc routines, so we must
+     handle them specially here.  */
+  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
+    {
+      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
+					      GET_CODE (operands[0]));
+      operands[1] = XEXP (operands[0], 0);
+      operands[2] = XEXP (operands[0], 1);
+    }
+
+  /* If we can tell early on that the comparison is against a constant
+     that won't fit in the 5-bit signed immediate field of a cbcond,
+     use one of the other v9 conditional branch sequences.  */
+  if (TARGET_CBCOND
+      && GET_CODE (operands[1]) == REG
+      && (GET_MODE (operands[1]) == SImode
+	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
+      && (GET_CODE (operands[2]) != CONST_INT
+	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
+    {
+      emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
+      return;
+    }
+
+  if (TARGET_ARCH64 && operands[2] == const0_rtx
+      && GET_CODE (operands[1]) == REG
+      && GET_MODE (operands[1]) == DImode)
+    {
+      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
+      return;
+    }
+
+  operands[1] = gen_compare_reg (operands[0]);
+  operands[2] = const0_rtx;
+  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
+				operands[1], operands[2]);
+  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
+				  operands[3]));
+}
+
+
+/* Generate a DFmode part of a hard TFmode register.
+   REG is the TFmode hard register, LOW is 1 for the
+   low 64bit of the register and 0 otherwise.
+ */
+rtx
+gen_df_reg (rtx reg, int low)
+{
+  int regno = REGNO (reg);
+
+  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
+    regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
+  return gen_rtx_REG (DFmode, regno);
+}
+
+/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
+   Unlike normal calls, TFmode operands are passed by reference.  It is
+   assumed that no more than 3 operands are required.  */
+
+static void
+emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
+{
+  rtx ret_slot = NULL, arg[3], func_sym;
+  int i;
+
+  /* We only expect to be called for conversions, unary, and binary ops.  */
+  gcc_assert (nargs == 2 || nargs == 3);
+
+  for (i = 0; i < nargs; ++i)
+    {
+      rtx this_arg = operands[i];
+      rtx this_slot;
+
+      /* TFmode arguments and return values are passed by reference.  */
+      if (GET_MODE (this_arg) == TFmode)
+	{
+	  int force_stack_temp;
+
+	  force_stack_temp = 0;
+	  if (TARGET_BUGGY_QP_LIB && i == 0)
+	    force_stack_temp = 1;
+
+	  if (GET_CODE (this_arg) == MEM
+	      && ! force_stack_temp)
+	    {
+	      tree expr = MEM_EXPR (this_arg);
+	      if (expr)
+		mark_addressable (expr);
+	      this_arg = XEXP (this_arg, 0);
+	    }
+	  else if (CONSTANT_P (this_arg)
+		   && ! force_stack_temp)
+	    {
+	      this_slot = force_const_mem (TFmode, this_arg);
+	      this_arg = XEXP (this_slot, 0);
+	    }
+	  else
+	    {
+	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
+
+	      /* Operand 0 is the return value.  We'll copy it out later.  */
+	      if (i > 0)
+		emit_move_insn (this_slot, this_arg);
+	      else
+		ret_slot = this_slot;
+
+	      this_arg = XEXP (this_slot, 0);
+	    }
+	}
+
+      arg[i] = this_arg;
+    }
+
+  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
+
+  if (GET_MODE (operands[0]) == TFmode)
+    {
+      if (nargs == 2)
+	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
+			   arg[0], GET_MODE (arg[0]),
+			   arg[1], GET_MODE (arg[1]));
+      else
+	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
+			   arg[0], GET_MODE (arg[0]),
+			   arg[1], GET_MODE (arg[1]),
+			   arg[2], GET_MODE (arg[2]));
+
+      if (ret_slot)
+	emit_move_insn (operands[0], ret_slot);
+    }
+  else
+    {
+      rtx ret;
+
+      gcc_assert (nargs == 2);
+
+      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
+				     GET_MODE (operands[0]), 1,
+				     arg[1], GET_MODE (arg[1]));
+
+      if (ret != operands[0])
+	emit_move_insn (operands[0], ret);
+    }
+}
+
+/* Expand soft-float TFmode calls to sparc abi routines.  */
+
+static void
+emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
+{
+  const char *func;
+
+  switch (code)
+    {
+    case PLUS:
+      func = "_Qp_add";
+      break;
+    case MINUS:
+      func = "_Qp_sub";
+      break;
+    case MULT:
+      func = "_Qp_mul";
+      break;
+    case DIV:
+      func = "_Qp_div";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_soft_tfmode_libcall (func, 3, operands);
+}
+
+static void
+emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
+{
+  const char *func;
+
+  gcc_assert (code == SQRT);
+  func = "_Qp_sqrt";
+
+  emit_soft_tfmode_libcall (func, 2, operands);
+}
+
+static void
+emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
+{
+  const char *func;
+
+  switch (code)
+    {
+    case FLOAT_EXTEND:
+      switch (GET_MODE (operands[1]))
+	{
+	case SFmode:
+	  func = "_Qp_stoq";
+	  break;
+	case DFmode:
+	  func = "_Qp_dtoq";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FLOAT_TRUNCATE:
+      switch (GET_MODE (operands[0]))
+	{
+	case SFmode:
+	  func = "_Qp_qtos";
+	  break;
+	case DFmode:
+	  func = "_Qp_qtod";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FLOAT:
+      switch (GET_MODE (operands[1]))
+	{
+	case SImode:
+	  func = "_Qp_itoq";
+	  if (TARGET_ARCH64)
+	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
+	  break;
+	case DImode:
+	  func = "_Qp_xtoq";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case UNSIGNED_FLOAT:
+      switch (GET_MODE (operands[1]))
+	{
+	case SImode:
+	  func = "_Qp_uitoq";
+	  if (TARGET_ARCH64)
+	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
+	  break;
+	case DImode:
+	  func = "_Qp_uxtoq";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FIX:
+      switch (GET_MODE (operands[0]))
+	{
+	case SImode:
+	  func = "_Qp_qtoi";
+	  break;
+	case DImode:
+	  func = "_Qp_qtox";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case UNSIGNED_FIX:
+      switch (GET_MODE (operands[0]))
+	{
+	case SImode:
+	  func = "_Qp_qtoui";
+	  break;
+	case DImode:
+	  func = "_Qp_qtoux";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_soft_tfmode_libcall (func, 2, operands);
+}
+
+/* Expand a hard-float tfmode operation.  All arguments must be in
+   registers.  */
+
+static void
+emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
+{
+  rtx op, dest;
+
+  if (GET_RTX_CLASS (code) == RTX_UNARY)
+    {
+      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
+    }
+  else
+    {
+      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
+			   operands[1], operands[2]);
+    }
+
+  if (register_operand (operands[0], VOIDmode))
+    dest = operands[0];
+  else
+    dest = gen_reg_rtx (GET_MODE (operands[0]));
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, op));
+
+  if (dest != operands[0])
+    emit_move_insn (operands[0], dest);
+}
+
+void
+emit_tfmode_binop (enum rtx_code code, rtx *operands)
+{
+  if (TARGET_HARD_QUAD)
+    emit_hard_tfmode_operation (code, operands);
+  else
+    emit_soft_tfmode_binop (code, operands);
+}
+
+void
+emit_tfmode_unop (enum rtx_code code, rtx *operands)
+{
+  if (TARGET_HARD_QUAD)
+    emit_hard_tfmode_operation (code, operands);
+  else
+    emit_soft_tfmode_unop (code, operands);
+}
+
+void
+emit_tfmode_cvt (enum rtx_code code, rtx *operands)
+{
+  if (TARGET_HARD_QUAD)
+    emit_hard_tfmode_operation (code, operands);
+  else
+    emit_soft_tfmode_cvt (code, operands);
+}
+
+/* Return nonzero if a branch/jump/call instruction will be emitting
+   nop into its delay slot.  */
+
+int
+empty_delay_slot (rtx insn)
+{
+  rtx seq;
+
+  /* If no previous instruction (should not happen), return true.  */
+  if (PREV_INSN (insn) == NULL)
+    return 1;
+
+  seq = NEXT_INSN (PREV_INSN (insn));
+  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
+    return 0;
+
+  return 1;
+}
+
+/* Return nonzero if we should emit a nop after a cbcond instruction.
+   The cbcond instruction does not have a delay slot, however there is
+   a severe performance penalty if a control transfer appears right
+   after a cbcond.  Therefore we emit a nop when we detect this
+   situation.  */
+
+int
+emit_cbcond_nop (rtx insn)
+{
+  rtx next = next_active_insn (insn);
+
+  if (!next)
+    return 1;
+
+  if (NONJUMP_INSN_P (next)
+      && GET_CODE (PATTERN (next)) == SEQUENCE)
+    next = XVECEXP (PATTERN (next), 0, 0);
+  else if (CALL_P (next)
+	   && GET_CODE (PATTERN (next)) == PARALLEL)
+    {
+      rtx delay = XVECEXP (PATTERN (next), 0, 1);
+
+      if (GET_CODE (delay) == RETURN)
+	{
+	  /* It's a sibling call.  Do not emit the nop if we're going
+	     to emit something other than the jump itself as the first
+	     instruction of the sibcall sequence.  */
+	  if (sparc_leaf_function_p || TARGET_FLAT)
+	    return 0;
+	}
+    }
+
+  if (NONJUMP_INSN_P (next))
+    return 0;
+
+  return 1;
+}
+
+/* Return nonzero if TRIAL can go into the call delay slot.  */
+
+int
+eligible_for_call_delay (rtx trial)
+{
+  rtx pat;
+
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
+  /* Binutils allows
+       call __tls_get_addr, %tgd_call (foo)
+        add %l7, %o0, %o0, %tgd_add (foo)
+     while Sun as/ld does not.  */
+  if (TARGET_GNU_TLS || !TARGET_TLS)
+    return 1;
+
+  pat = PATTERN (trial);
+
+  /* We must reject tgd_add{32|64}, i.e.
+       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
+     and tldm_add{32|64}, i.e.
+       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
+     for Sun as/ld.  */
+  if (GET_CODE (pat) == SET
+      && GET_CODE (SET_SRC (pat)) == PLUS)
+    {
+      rtx unspec = XEXP (SET_SRC (pat), 1);
+
+      if (GET_CODE (unspec) == UNSPEC
+	  && (XINT (unspec, 1) == UNSPEC_TLSGD
+	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
+   instruction.  RETURN_P is true if the v9 variant 'return' is to be
+   considered in the test too.
+
+   TRIAL must be a SET whose destination is a REG appropriate for the
+   'restore' instruction or, if RETURN_P is true, for the 'return'
+   instruction.  */
+
+static int
+eligible_for_restore_insn (rtx trial, bool return_p)
+{
+  rtx pat = PATTERN (trial);
+  rtx src = SET_SRC (pat);
+  bool src_is_freg = false;
+  rtx src_reg;
+
+  /* Since we now can do moves between float and integer registers when
+     VIS3 is enabled, we have to catch this case.  We can allow such
+     moves when doing a 'return' however.  */
+  src_reg = src;
+  if (GET_CODE (src_reg) == SUBREG)
+    src_reg = SUBREG_REG (src_reg);
+  if (GET_CODE (src_reg) == REG
+      && SPARC_FP_REG_P (REGNO (src_reg)))
+    src_is_freg = true;
+
+  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
+  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
+      && arith_operand (src, GET_MODE (src))
+      && ! src_is_freg)
+    {
+      if (TARGET_ARCH64)
+        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+      else
+        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+    }
+
+  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
+  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
+	   && arith_double_operand (src, GET_MODE (src))
+	   && ! src_is_freg)
+    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+
+  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
+  else if (! TARGET_FPU && register_operand (src, SFmode))
+    return 1;
+
+  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
+  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
+    return 1;
+
+  /* If we have the 'return' instruction, anything that does not use
+     local or output registers and can go into a delay slot wins.  */
+  else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
+    return 1;
+
+  /* The 'restore src1,src2,dest' pattern for SImode.  */
+  else if (GET_CODE (src) == PLUS
+	   && register_operand (XEXP (src, 0), SImode)
+	   && arith_operand (XEXP (src, 1), SImode))
+    return 1;
+
+  /* The 'restore src1,src2,dest' pattern for DImode.  */
+  else if (GET_CODE (src) == PLUS
+	   && register_operand (XEXP (src, 0), DImode)
+	   && arith_double_operand (XEXP (src, 1), DImode))
+    return 1;
+
+  /* The 'restore src1,%lo(src2),dest' pattern.  */
+  else if (GET_CODE (src) == LO_SUM
+	   && ! TARGET_CM_MEDMID
+	   && ((register_operand (XEXP (src, 0), SImode)
+	        && immediate_operand (XEXP (src, 1), SImode))
+	       || (TARGET_ARCH64
+		   && register_operand (XEXP (src, 0), DImode)
+		   && immediate_operand (XEXP (src, 1), DImode))))
+    return 1;
+
+  /* The 'restore src,src,dest' pattern.  */
+  else if (GET_CODE (src) == ASHIFT
+	   && (register_operand (XEXP (src, 0), SImode)
+	       || register_operand (XEXP (src, 0), DImode))
+	   && XEXP (src, 1) == const1_rtx)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if TRIAL can go into the function return's delay slot.  */
+
+int
+eligible_for_return_delay (rtx trial)
+{
+  int regno;
+  rtx pat;
+
+  /* If the function uses __builtin_eh_return, the eh_return machinery
+     occupies the delay slot.  */
+  if (crtl->calls_eh_return)
+    return 0;
+
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
+  /* In the case of a leaf or flat function, anything can go into the slot.  */
+  if (sparc_leaf_function_p || TARGET_FLAT)
+    return 1;
+
+  if (!NONJUMP_INSN_P (trial))
+    return 0;
+
+  pat = PATTERN (trial);
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      if (! TARGET_V9)
+	return 0;
+      for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
+	{
+	  rtx expr = XVECEXP (pat, 0, i);
+	  if (GET_CODE (expr) != SET)
+	    return 0;
+	  if (GET_CODE (SET_DEST (expr)) != REG)
+	    return 0;
+	  regno = REGNO (SET_DEST (expr));
+	  if (regno >= 8 && regno < 24)
+	    return 0;
+	}
+      return !epilogue_renumber (&pat, 1);
+    }
+
+  if (GET_CODE (pat) != SET)
+    return 0;
+
+  if (GET_CODE (SET_DEST (pat)) != REG)
+    return 0;
+
+  regno = REGNO (SET_DEST (pat));
+
+  /* Otherwise, only operations which can be done in tandem with
+     a `restore' or `return' insn can go into the delay slot.  */
+  if (regno >= 8 && regno < 24)
+    return 0;
+
+  /* If this instruction sets up floating point register and we have a return
+     instruction, it can probably go in.  But restore will not work
+     with FP_REGS.  */
+  if (! SPARC_INT_REG_P (regno))
+    return TARGET_V9 && !epilogue_renumber (&pat, 1);
+
+  return eligible_for_restore_insn (trial, true);
+}
+
+/* Return nonzero if TRIAL can go into the sibling call's delay slot.  */
+
+int
+eligible_for_sibcall_delay (rtx trial)
+{
+  rtx pat;
+
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
+  if (!NONJUMP_INSN_P (trial))
+    return 0;
+
+  pat = PATTERN (trial);
+
+  if (sparc_leaf_function_p || TARGET_FLAT)
+    {
+      /* If the tail call is done using the call instruction,
+	 we have to restore %o7 in the delay slot.  */
+      if (LEAF_SIBCALL_SLOT_RESERVED_P)
+	return 0;
+
+      /* %g1 is used to build the function address */
+      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
+	return 0;
+
+      return 1;
+    }
+
+  if (GET_CODE (pat) != SET)
+    return 0;
+
+  /* Otherwise, only operations which can be done in tandem with
+     a `restore' insn can go into the delay slot.  */
+  if (GET_CODE (SET_DEST (pat)) != REG
+      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
+      || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
+    return 0;
+
+  /* If it mentions %o7, it can't go in, because sibcall will clobber it
+     in most cases.  */
+  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
+    return 0;
+
+  return eligible_for_restore_insn (trial, false);
+}
+
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible if X contains the address of a symbol that is
+   not constant (TLS) or not known at final link time (PIC).  */
+
+static bool
+sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+      /* Accept all non-symbolic constants.  */
+      return false;
+
+    case LABEL_REF:
+      /* Labels are OK iff we are non-PIC.  */
+      return flag_pic != 0;
+
+    case SYMBOL_REF:
+      /* 'Naked' TLS symbol references are never OK,
+	 non-TLS symbols are OK iff we are non-PIC.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return true;
+      else
+	return flag_pic != 0;
+
+    case CONST:
+      return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
+    case PLUS:
+    case MINUS:
+      return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
+         || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
+    case UNSPEC:
+      return true;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Global Offset Table support.  */
+static GTY(()) rtx got_helper_rtx = NULL_RTX;
+static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
+
+/* Return the SYMBOL_REF for the Global Offset Table.  */
+
+static GTY(()) rtx sparc_got_symbol = NULL_RTX;
+
+static rtx
+sparc_got (void)
+{
+  if (!sparc_got_symbol)
+    sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+  return sparc_got_symbol;
+}
+
+/* Ensure that we are not using patterns that are not OK with PIC.  */
+
+int
+check_pic (int i)
+{
+  rtx op;
+
+  switch (flag_pic)
+    {
+    case 1:
+      op = recog_data.operand[i];
+      gcc_assert (GET_CODE (op) != SYMBOL_REF
+	  	  && (GET_CODE (op) != CONST
+		      || (GET_CODE (XEXP (op, 0)) == MINUS
+			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
+			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
+    case 2:
+    default:
+      return 1;
+    }
+}
+
+/* Return true if X is an address which needs a temporary register when
+   reloaded while generating PIC code.  */
+
+int
+pic_address_needs_scratch (rtx x)
+{
+  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
+    return 1;
+
+  return 0;
+}
+
+/* Determine if a given RTX is a valid constant.  We already know this
+   satisfies CONSTANT_P.  */
+
+static bool
+sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case SYMBOL_REF:
+      if (sparc_tls_referenced_p (x))
+	return false;
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+        return true;
+
+      /* Floating point constants are generally not ok.
+	 The only exception is 0.0 and all-ones in VIS.  */
+      if (TARGET_VIS
+	  && SCALAR_FLOAT_MODE_P (mode)
+	  && (const_zero_operand (x, mode)
+	      || const_all_ones_operand (x, mode)))
+	return true;
+
+      return false;
+
+    case CONST_VECTOR:
+      /* Vector constants are generally not ok.
+	 The only exception is 0 or -1 in VIS.  */
+      if (TARGET_VIS
+	  && (const_zero_operand (x, mode)
+	      || const_all_ones_operand (x, mode)))
+	return true;
+
+      return false;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+/* Determine if a given RTX is a valid constant address.  */
+
+bool
+constant_address_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+    case CONST_INT:
+    case HIGH:
+      return true;
+
+    case CONST:
+      if (flag_pic && pic_address_needs_scratch (x))
+	return false;
+      return sparc_legitimate_constant_p (Pmode, x);
+
+    case SYMBOL_REF:
+      return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
+
+    default:
+      return false;
+    }
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+  if (pic_address_needs_scratch (x))
+    return false;
+  if (sparc_tls_referenced_p (x))
+    return false;
+  return true;
+}
+
+#define RTX_OK_FOR_OFFSET_P(X, MODE)			\
+  (CONST_INT_P (X)					\
+   && INTVAL (X) >= -0x1000				\
+   && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
+
+#define RTX_OK_FOR_OLO10_P(X, MODE)			\
+  (CONST_INT_P (X)					\
+   && INTVAL (X) >= -0x1000				\
+   && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
+
+/* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
+
+   On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
+   ordinarily.  This changes a bit when generating PIC.  */
+
+static bool
+sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
+
+  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+    rs1 = addr;
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rs1 = XEXP (addr, 0);
+      rs2 = XEXP (addr, 1);
+
+      /* Canonicalize.  REG comes first, if there are no regs,
+	 LO_SUM comes first.  */
+      if (!REG_P (rs1)
+	  && GET_CODE (rs1) != SUBREG
+	  && (REG_P (rs2)
+	      || GET_CODE (rs2) == SUBREG
+	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
+	{
+	  rs1 = XEXP (addr, 1);
+	  rs2 = XEXP (addr, 0);
+	}
+
+      if ((flag_pic == 1
+	   && rs1 == pic_offset_table_rtx
+	   && !REG_P (rs2)
+	   && GET_CODE (rs2) != SUBREG
+	   && GET_CODE (rs2) != LO_SUM
+	   && GET_CODE (rs2) != MEM
+	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
+	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
+	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
+	  || ((REG_P (rs1)
+	       || GET_CODE (rs1) == SUBREG)
+	      && RTX_OK_FOR_OFFSET_P (rs2, mode)))
+	{
+	  imm1 = rs2;
+	  rs2 = NULL;
+	}
+      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
+	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
+	{
+	  /* We prohibit REG + REG for TFmode when there are no quad move insns
+	     and we consequently need to split.  We do this because REG+REG
+	     is not an offsettable address.  If we get the situation in reload
+	     where source and destination of a movtf pattern are both MEMs with
+	     REG+REG address, then only one of them gets converted to an
+	     offsettable address.  */
+	  if (mode == TFmode
+	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
+	    return 0;
+
+	  /* Likewise for TImode, but in all cases.  */
+	  if (mode == TImode)
+	    return 0;
+
+	  /* We prohibit REG + REG on ARCH32 if not optimizing for
+	     DFmode/DImode because then mem_min_alignment is likely to be zero
+	     after reload and the  forced split would lack a matching splitter
+	     pattern.  */
+	  if (TARGET_ARCH32 && !optimize
+	      && (mode == DFmode || mode == DImode))
+	    return 0;
+	}
+      else if (USE_AS_OFFSETABLE_LO10
+	       && GET_CODE (rs1) == LO_SUM
+	       && TARGET_ARCH64
+	       && ! TARGET_CM_MEDMID
+	       && RTX_OK_FOR_OLO10_P (rs2, mode))
+	{
+	  rs2 = NULL;
+	  imm1 = XEXP (rs1, 1);
+	  rs1 = XEXP (rs1, 0);
+	  if (!CONSTANT_P (imm1)
+	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
+	    return 0;
+	}
+    }
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      rs1 = XEXP (addr, 0);
+      imm1 = XEXP (addr, 1);
+
+      if (!CONSTANT_P (imm1)
+	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
+	return 0;
+
+      /* We can't allow TFmode in 32-bit mode, because an offset greater
+	 than the alignment (8) may cause the LO_SUM to overflow.  */
+      if (mode == TFmode && TARGET_ARCH32)
+	return 0;
+    }
+  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
+    return 1;
+  else
+    return 0;
+
+  if (GET_CODE (rs1) == SUBREG)
+    rs1 = SUBREG_REG (rs1);
+  if (!REG_P (rs1))
+    return 0;
+
+  if (rs2)
+    {
+      if (GET_CODE (rs2) == SUBREG)
+	rs2 = SUBREG_REG (rs2);
+      if (!REG_P (rs2))
+	return 0;
+    }
+
+  if (strict)
+    {
+      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
+	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
+	return 0;
+    }
+  else
+    {
+      if ((! SPARC_INT_REG_P (REGNO (rs1))
+	   && REGNO (rs1) != FRAME_POINTER_REGNUM
+	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
+	  || (rs2
+	      && (! SPARC_INT_REG_P (REGNO (rs2))
+		  && REGNO (rs2) != FRAME_POINTER_REGNUM
+		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
+	return 0;
+    }
+  return 1;
+}
+
+/* Return the SYMBOL_REF for the tls_get_addr function.  */
+
+static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
+
+static rtx
+sparc_tls_get_addr (void)
+{
+  if (!sparc_tls_symbol)
+    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
+
+  return sparc_tls_symbol;
+}
+
+/* Return the Global Offset Table to be used in TLS mode.  */
+
+static rtx
+sparc_tls_got (void)
+{
+  /* In PIC mode, this is just the PIC offset table.  */
+  if (flag_pic)
+    {
+      crtl->uses_pic_offset_table = 1;
+      return pic_offset_table_rtx;
+    }
+
+  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
+     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
+  if (TARGET_SUN_TLS && TARGET_ARCH32)
+    {
+      load_got_register ();
+      return global_offset_table_rtx;
+    }
+
+  /* In all other cases, we load a new pseudo with the GOT symbol.  */
+  return copy_to_reg (sparc_got ());
+}
+
+/* Return true if X contains a thread-local symbol.  */
+
+static bool
+sparc_tls_referenced_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = XEXP (XEXP (x, 0), 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
+    return true;
+
+  /* That's all we handle in sparc_legitimize_tls_address for now.  */
+  return false;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+
+static rtx
+sparc_legitimize_tls_address (rtx addr)
+{
+  rtx temp1, temp2, temp3, ret, o0, got, insn;
+
+  gcc_assert (can_create_pseudo_p ());
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (SYMBOL_REF_TLS_MODEL (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	start_sequence ();
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	ret = gen_reg_rtx (Pmode);
+	o0 = gen_rtx_REG (Pmode, 8);
+	got = sparc_tls_got ();
+	emit_insn (gen_tgd_hi22 (temp1, addr));
+	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
+	if (TARGET_ARCH32)
+	  {
+	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
+	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
+						   addr, const1_rtx));
+	  }
+	else
+	  {
+	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
+	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
+						   addr, const1_rtx));
+	  }
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
+	insn = get_insns ();
+	end_sequence ();
+	emit_libcall_block (insn, ret, o0, addr);
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	start_sequence ();
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	temp3 = gen_reg_rtx (Pmode);
+	ret = gen_reg_rtx (Pmode);
+	o0 = gen_rtx_REG (Pmode, 8);
+	got = sparc_tls_got ();
+	emit_insn (gen_tldm_hi22 (temp1));
+	emit_insn (gen_tldm_lo10 (temp2, temp1));
+	if (TARGET_ARCH32)
+	  {
+	    emit_insn (gen_tldm_add32 (o0, got, temp2));
+	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
+						    const1_rtx));
+	  }
+	else
+	  {
+	    emit_insn (gen_tldm_add64 (o0, got, temp2));
+	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
+						    const1_rtx));
+	  }
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
+	insn = get_insns ();
+	end_sequence ();
+	emit_libcall_block (insn, temp3, o0,
+			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+					    UNSPEC_TLSLD_BASE));
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	emit_insn (gen_tldo_hix22 (temp1, addr));
+	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
+	if (TARGET_ARCH32)
+	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
+	else
+	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	temp3 = gen_reg_rtx (Pmode);
+	got = sparc_tls_got ();
+	emit_insn (gen_tie_hi22 (temp1, addr));
+	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
+	if (TARGET_ARCH32)
+	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
+	else
+	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
+        if (TARGET_SUN_TLS)
+	  {
+	    ret = gen_reg_rtx (Pmode);
+	    if (TARGET_ARCH32)
+	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
+					temp3, addr));
+	    else
+	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
+					temp3, addr));
+	  }
+	else
+	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	temp1 = gen_reg_rtx (Pmode);
+	temp2 = gen_reg_rtx (Pmode);
+	if (TARGET_ARCH32)
+	  {
+	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
+	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
+	  }
+	else
+	  {
+	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
+	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
+	  }
+	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  else if (GET_CODE (addr) == CONST)
+    {
+      rtx base, offset;
+
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
+
+      base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
+      offset = XEXP (XEXP (addr, 0), 1);
+
+      base = force_operand (base, NULL_RTX);
+      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
+	offset = force_reg (Pmode, offset);
+      ret = gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  else
+    gcc_unreachable ();  /* for now ... */
+
+  return ret;
+}
+
+/* Legitimize PIC addresses.  If the address is already position-independent,
+   we return ORIG.  Newly generated position-independent addresses go into a
+   reg.  This is REG if nonzero, otherwise we allocate register(s) as
+   necessary.  */
+
+static rtx
+sparc_legitimize_pic_address (rtx orig, rtx reg)
+{
+  bool gotdata_op = false;
+
+  if (GET_CODE (orig) == SYMBOL_REF
+      /* See the comment in sparc_expand_move.  */
+      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
+    {
+      rtx pic_ref, address;
+      rtx insn;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      if (flag_pic == 2)
+	{
+	  /* If not during reload, allocate another temp reg here for loading
+	     in the address, so that these instructions can be optimized
+	     properly.  */
+	  rtx temp_reg = (! can_create_pseudo_p ()
+			  ? reg : gen_reg_rtx (Pmode));
+
+	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
+	     won't get confused into thinking that these two instructions
+	     are loading in the true address of the symbol.  If in the
+	     future a PIC rtx exists, that should be used instead.  */
+	  if (TARGET_ARCH64)
+	    {
+	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
+	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
+	    }
+	  else
+	    {
+	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
+	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
+	    }
+	  address = temp_reg;
+	  gotdata_op = true;
+	}
+      else
+	address = orig;
+
+      crtl->uses_pic_offset_table = 1;
+      if (gotdata_op)
+	{
+	  if (TARGET_ARCH64)
+	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
+							pic_offset_table_rtx,
+							address, orig));
+	  else
+	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
+							pic_offset_table_rtx,
+							address, orig));
+	}
+      else
+	{
+	  pic_ref
+	    = gen_const_mem (Pmode,
+			     gen_rtx_PLUS (Pmode,
+					   pic_offset_table_rtx, address));
+	  insn = emit_move_insn (reg, pic_ref);
+	}
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized
+	 by loop.  */
+      set_unique_reg_note (insn, REG_EQUAL, orig);
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
+      offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
+			 		     base == reg ? NULL_RTX : reg);
+
+      if (GET_CODE (offset) == CONST_INT)
+	{
+	  if (SMALL_INT (offset))
+	    return plus_constant (Pmode, base, INTVAL (offset));
+	  else if (can_create_pseudo_p ())
+	    offset = force_reg (Pmode, offset);
+	  else
+	    /* If we reach here, then something is seriously wrong.  */
+	    gcc_unreachable ();
+	}
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+  else if (GET_CODE (orig) == LABEL_REF)
+    /* ??? We ought to be checking that the register is live instead, in case
+       it is eliminated.  */
+    crtl->uses_pic_offset_table = 1;
+
+  return orig;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address X
+   to be legitimate.  If we find one, return the new, valid address.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   MODE is the mode of the operand pointed to by X.
+
+   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
+
+static rtx
+sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode)
+{
+  rtx orig_x = x;
+
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
+		      force_operand (XEXP (x, 0), NULL_RTX));
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
+		      force_operand (XEXP (x, 1), NULL_RTX));
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
+		      XEXP (x, 1));
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
+		      force_operand (XEXP (x, 1), NULL_RTX));
+
+  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
+    return x;
+
+  if (sparc_tls_referenced_p (x))
+    x = sparc_legitimize_tls_address (x);
+  else if (flag_pic)
+    x = sparc_legitimize_pic_address (x, NULL_RTX);
+  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
+		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
+  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
+		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
+  else if (GET_CODE (x) == SYMBOL_REF
+	   || GET_CODE (x) == CONST
+	   || GET_CODE (x) == LABEL_REF)
+    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
+
+  return x;
+}
+
+/* Delegitimize an address that was legitimized by the above function.  */
+
+static rtx
+sparc_delegitimize_address (rtx x)
+{
+  x = delegitimize_mem_from_attrs (x);
+
+  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
+    switch (XINT (XEXP (x, 1), 1))
+      {
+      case UNSPEC_MOVE_PIC:
+      case UNSPEC_TLSLE:
+	x = XVECEXP (XEXP (x, 1), 0, 0);
+	gcc_assert (GET_CODE (x) == SYMBOL_REF);
+	break;
+      default:
+	break;
+      }
+
+  /* This is generated by mov{si,di}_pic_label_ref in PIC mode.  */
+  if (GET_CODE (x) == MINUS
+      && REG_P (XEXP (x, 0))
+      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
+      && GET_CODE (XEXP (x, 1)) == LO_SUM
+      && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
+      && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
+    {
+      x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
+      gcc_assert (GET_CODE (x) == LABEL_REF);
+    }
+
+  return x;
+}
+
+/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
+   replace the input X, or the original X if no replacement is called for.
+   The output parameter *WIN is 1 if the calling macro should goto WIN,
+   0 if it should not.
+
+   For SPARC, we wish to handle addresses by splitting them into
+   HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
+   This cuts the number of extra insns by one.
+
+   Do nothing when generating PIC code and the address is a symbolic
+   operand or requires a scratch register.  */
+
+rtx
+sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED, int *win)
+{
+  /* Decompose SImode constants into HIGH+LO_SUM.  */
+  if (CONSTANT_P (x)
+      && (mode != TFmode || TARGET_ARCH64)
+      && GET_MODE (x) == SImode
+      && GET_CODE (x) != LO_SUM
+      && GET_CODE (x) != HIGH
+      && sparc_cmodel <= CM_MEDLOW
+      && !(flag_pic
+	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
+    {
+      x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  /* We have to recognize what we have already generated above.  */
+  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  *win = 0;
+  return x;
+}
+
+/* Return true if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   In PIC mode,
+
+      (mem:HI [%l7+a])
+
+   is not equivalent to
+
+      (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
+
+   because [%l7+a+1] is interpreted as the address of (a+1).  */
+
+
+static bool
+sparc_mode_dependent_address_p (const_rtx addr,
+				addr_space_t as ATTRIBUTE_UNUSED)
+{
+  if (flag_pic && GET_CODE (addr) == PLUS)
+    {
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+      if (op0 == pic_offset_table_rtx
+	  && symbolic_operand (op1, VOIDmode))
+	return true;
+    }
+
+  return false;
+}
+
+#ifdef HAVE_GAS_HIDDEN
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+  const char *reg_name = reg_names[regno];
+
+  /* Skip the leading '%' as that cannot be used in a
+     symbol name.  */
+  reg_name += 1;
+
+  if (USE_HIDDEN_LINKONCE)
+    sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
+  else
+    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
+}
+
+/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
+
+static rtx
+gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+  int orig_flag_pic = flag_pic;
+  rtx insn;
+
+  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
+  flag_pic = 0;
+  if (TARGET_ARCH64)
+    insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
+  else
+    insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
+  flag_pic = orig_flag_pic;
+
+  return insn;
+}
+
+/* Emit code to load the GOT register.  */
+
+void
+load_got_register (void)
+{
+  /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
+  if (!global_offset_table_rtx)
+    global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
+
+  if (TARGET_VXWORKS_RTP)
+    emit_insn (gen_vxworks_load_got ());
+  else
+    {
+      /* The GOT symbol is subject to a PC-relative relocation so we need a
+	 helper function to add the PC value and thus get the final value.  */
+      if (!got_helper_rtx)
+	{
+	  char name[32];
+	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
+	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+	}
+
+      emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
+				     got_helper_rtx,
+				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
+    }
+
+  /* Need to emit this whether or not we obey regdecls,
+     since setjmp/longjmp can cause life info to screw up.
+     ??? In the case where we don't obey regdecls, this is not sufficient
+     since we may not fall out the bottom.  */
+  emit_use (global_offset_table_rtx);
+}
+
+/* Emit a call instruction with the pattern given by PAT.  ADDR is the
+   address of the call target.  */
+
+void
+sparc_emit_call_insn (rtx pat, rtx addr)
+{
+  rtx insn;
+
+  insn = emit_call_insn (pat);
+
+  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
+  if (TARGET_VXWORKS_RTP
+      && flag_pic
+      && GET_CODE (addr) == SYMBOL_REF
+      && (SYMBOL_REF_DECL (addr)
+	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
+	  : !SYMBOL_REF_LOCAL_P (addr)))
+    {
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+      crtl->uses_pic_offset_table = 1;
+    }
+}
+
+/* Return 1 if RTX is a MEM which is known to be aligned to at
+   least a DESIRED byte boundary.  */
+
+int
+mem_min_alignment (rtx mem, int desired)
+{
+  rtx addr, base, offset;
+
+  /* If it's not a MEM we can't accept it.  */
+  if (GET_CODE (mem) != MEM)
+    return 0;
+
+  /* Obviously...  */
+  if (!TARGET_UNALIGNED_DOUBLES
+      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
+    return 1;
+
+  /* ??? The rest of the function predates MEM_ALIGN so
+     there is probably a bit of redundancy.  */
+  addr = XEXP (mem, 0);
+  base = offset = NULL_RTX;
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	{
+	  base = XEXP (addr, 0);
+
+	  /* What we are saying here is that if the base
+	     REG is aligned properly, the compiler will make
+	     sure any REG based index upon it will be so
+	     as well.  */
+	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	    offset = XEXP (addr, 1);
+	  else
+	    offset = const0_rtx;
+	}
+    }
+  else if (GET_CODE (addr) == REG)
+    {
+      base = addr;
+      offset = const0_rtx;
+    }
+
+  if (base != NULL_RTX)
+    {
+      int regno = REGNO (base);
+
+      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
+	{
+	  /* Check if the compiler has recorded some information
+	     about the alignment of the base REG.  If reload has
+	     completed, we already matched with proper alignments.
+	     If not running global_alloc, reload might give us
+	     unaligned pointer to local stack though.  */
+	  if (((cfun != 0
+		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
+	       || (optimize && reload_completed))
+	      && (INTVAL (offset) & (desired - 1)) == 0)
+	    return 1;
+	}
+      else
+	{
+	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
+	    return 1;
+	}
+    }
+  else if (! TARGET_UNALIGNED_DOUBLES
+	   || CONSTANT_P (addr)
+	   || GET_CODE (addr) == LO_SUM)
+    {
+      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
+	 is true, in which case we can only assume that an access is aligned if
+	 it is to a constant address, or the address involves a LO_SUM.  */
+      return 1;
+    }
+
+  /* An obviously unaligned address.  */
+  return 0;
+}
+
+
+/* Vectors to keep interesting information about registers where it can easily
+   be got.  We used to use the actual mode value as the bit number, but there
+   are more than 32 modes now.  Instead we use two tables: one indexed by
+   hard register number, and one indexed by mode.  */
+
+/* The purpose of sparc_mode_class is to shrink the range of modes so that
+   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
+   mapped into one sparc_mode_class mode.  */
+
+enum sparc_mode_class {
+  H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
+  CC_MODE, CCFP_MODE
+};
+
+/* Modes for single-word and smaller quantities.  */
+#define S_MODES \
+  ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-word and smaller quantities.  */
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Modes for quad-word and smaller quantities.  */
+#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+
+/* Modes for 8-word and smaller quantities.  */
+#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
+
+/* Modes for single-float quantities.  */
+#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-float and smaller quantities.  */
+#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Modes for quad-float and smaller quantities.  */
+#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
+
+/* Modes for quad-float pairs and smaller quantities.  */
+#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
+
+/* Modes for double-float only quantities.  */
+#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
+
+/* Modes for quad-float and double-float only quantities.  */
+#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
+
+/* Modes for quad-float pairs and double-float only quantities.  */
+#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
+
+/* Modes for condition codes.  */
+#define CC_MODES (1 << (int) CC_MODE)
+#define CCFP_MODES (1 << (int) CCFP_MODE)
+
+/* Value is 1 if register/mode pair is acceptable on sparc.
+   The funny mixture of D and T modes is because integer operations
+   do not specially operate on tetra quantities, so non-quad-aligned
+   registers can hold quadword quantities (except %o4 and %i4 because
+   they cross fixed registers).  */
+
+/* This points to either the 32 bit or the 64 bit version.  */
+const int *hard_regno_mode_classes;
+
+static const int hard_32bit_mode_classes[] = {
+  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
+  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
+  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
+  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
+
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
+
+  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
+     and none can hold SFmode/SImode values.  */
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+
+  /* %fcc[0123] */
+  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
+
+  /* %icc, %sfp, %gsr */
+  CC_MODES, 0, D_MODES
+};
+
+static const int hard_64bit_mode_classes[] = {
+  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
+
+  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
+     and none can hold SFmode/SImode values.  */
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+
+  /* %fcc[0123] */
+  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
+
+  /* %icc, %sfp, %gsr */
+  CC_MODES, 0, D_MODES
+};
+
+int sparc_mode_class [NUM_MACHINE_MODES];
+
+enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+static void
+sparc_init_modes (void)
+{
+  int i;
+
+  for (i = 0; i < NUM_MACHINE_MODES; i++)
+    {
+      switch (GET_MODE_CLASS (i))
+	{
+	case MODE_INT:
+	case MODE_PARTIAL_INT:
+	case MODE_COMPLEX_INT:
+	  if (GET_MODE_SIZE (i) < 4)
+	    sparc_mode_class[i] = 1 << (int) H_MODE;
+	  else if (GET_MODE_SIZE (i) == 4)
+	    sparc_mode_class[i] = 1 << (int) S_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    sparc_mode_class[i] = 1 << (int) D_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    sparc_mode_class[i] = 1 << (int) T_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    sparc_mode_class[i] = 1 << (int) O_MODE;
+	  else
+	    sparc_mode_class[i] = 0;
+	  break;
+	case MODE_VECTOR_INT:
+	  if (GET_MODE_SIZE (i) == 4)
+	    sparc_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    sparc_mode_class[i] = 1 << (int) DF_MODE;
+	  else
+	    sparc_mode_class[i] = 0;
+	  break;
+	case MODE_FLOAT:
+	case MODE_COMPLEX_FLOAT:
+	  if (GET_MODE_SIZE (i) == 4)
+	    sparc_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    sparc_mode_class[i] = 1 << (int) DF_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    sparc_mode_class[i] = 1 << (int) TF_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    sparc_mode_class[i] = 1 << (int) OF_MODE;
+	  else
+	    sparc_mode_class[i] = 0;
+	  break;
+	case MODE_CC:
+	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
+	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
+	  else
+	    sparc_mode_class[i] = 1 << (int) CC_MODE;
+	  break;
+	default:
+	  sparc_mode_class[i] = 0;
+	  break;
+	}
+    }
+
+  if (TARGET_ARCH64)
+    hard_regno_mode_classes = hard_64bit_mode_classes;
+  else
+    hard_regno_mode_classes = hard_32bit_mode_classes;
+
+  /* Initialize the array used by REGNO_REG_CLASS.  */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (i < 16 && TARGET_V8PLUS)
+	sparc_regno_reg_class[i] = I64_REGS;
+      else if (i < 32 || i == FRAME_POINTER_REGNUM)
+	sparc_regno_reg_class[i] = GENERAL_REGS;
+      else if (i < 64)
+	sparc_regno_reg_class[i] = FP_REGS;
+      else if (i < 96)
+	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
+      else if (i < 100)
+	sparc_regno_reg_class[i] = FPCC_REGS;
+      else
+	sparc_regno_reg_class[i] = NO_REGS;
+    }
+}
+
+/* Return whether REGNO, a global or FP register, must be saved/restored.  */
+
+static inline bool
+save_global_or_fp_reg_p (unsigned int regno,
+			 int leaf_function ATTRIBUTE_UNUSED)
+{
+  return !call_used_regs[regno] && df_regs_ever_live_p (regno);
+}
+
+/* Return whether the return address register (%i7) is needed.  */
+
+static inline bool
+return_addr_reg_needed_p (int leaf_function)
+{
+  /* If it is live, for example because of __builtin_return_address (0).  */
+  if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
+    return true;
+
+  /* Otherwise, it is needed as save register if %o7 is clobbered.  */
+  if (!leaf_function
+      /* Loading the GOT register clobbers %o7.  */
+      || crtl->uses_pic_offset_table
+      || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
+    return true;
+
+  return false;
+}
+
+/* Return whether REGNO, a local or in register, must be saved/restored.  */
+
+static bool
+save_local_or_in_reg_p (unsigned int regno, int leaf_function)
+{
+  /* General case: call-saved registers live at some point.  */
+  if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
+    return true;
+
+  /* Frame pointer register (%fp) if needed.  */
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return true;
+
+  /* Return address register (%i7) if needed.  */
+  if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
+    return true;
+
+  /* GOT register (%l7) if needed.  */
+  if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
+    return true;
+
+  /* If the function accesses prior frames, the frame pointer and the return
+     address of the previous frame must be saved on the stack.  */
+  if (crtl->accesses_prior_frames
+      && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
+    return true;
+
+  return false;
+}
+
+/* Compute the frame size required by the function.  This function is called
+   during the reload pass and also by sparc_expand_prologue.  */
+
+HOST_WIDE_INT
+sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
+{
+  HOST_WIDE_INT frame_size, apparent_frame_size;
+  int args_size, n_global_fp_regs = 0;
+  bool save_local_in_regs_p = false;
+  unsigned int i;
+
+  /* If the function allocates dynamic stack space, the dynamic offset is
+     computed early and contains REG_PARM_STACK_SPACE, so we need to cope.  */
+  if (leaf_function && !cfun->calls_alloca)
+    args_size = 0;
+  else
+    args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
+
+  /* Calculate space needed for global registers.  */
+  if (TARGET_ARCH64)
+    for (i = 0; i < 8; i++)
+      if (save_global_or_fp_reg_p (i, 0))
+	n_global_fp_regs += 2;
+  else
+    for (i = 0; i < 8; i += 2)
+      if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
+	n_global_fp_regs += 2;
+
+  /* In the flat window model, find out which local and in registers need to
+     be saved.  We don't reserve space in the current frame for them as they
+     will be spilled into the register window save area of the caller's frame.
+     However, as soon as we use this register window save area, we must create
+     that of the current frame to make it the live one.  */
+  if (TARGET_FLAT)
+    for (i = 16; i < 32; i++)
+      if (save_local_or_in_reg_p (i, leaf_function))
+	{
+	 save_local_in_regs_p = true;
+	 break;
+	}
+
+  /* Calculate space needed for FP registers.  */
+  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
+    if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
+      n_global_fp_regs += 2;
+
+  if (size == 0
+      && n_global_fp_regs == 0
+      && args_size == 0
+      && !save_local_in_regs_p)
+    frame_size = apparent_frame_size = 0;
+  else
+    {
+      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
+      apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
+      apparent_frame_size += n_global_fp_regs * 4;
+
+      /* We need to add the size of the outgoing argument area.  */
+      frame_size = apparent_frame_size + ((args_size + 7) & -8);
+
+      /* And that of the register window save area.  */
+      frame_size += FIRST_PARM_OFFSET (cfun->decl);
+
+      /* Finally, bump to the appropriate alignment.  */
+      frame_size = SPARC_STACK_ALIGN (frame_size);
+    }
+
+  /* Set up values for use in prologue and epilogue.  */
+  sparc_frame_size = frame_size;
+  sparc_apparent_frame_size = apparent_frame_size;
+  sparc_n_global_fp_regs = n_global_fp_regs;
+  sparc_save_local_in_regs_p = save_local_in_regs_p;
+
+  return frame_size;
+}
+
+/* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
+
+int
+sparc_initial_elimination_offset (int to)
+{
+  int offset;
+
+  if (to == STACK_POINTER_REGNUM)
+    offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
+  else
+    offset = 0;
+
+  offset += SPARC_STACK_BIAS;
+  return offset;
+}
+
+/* Output any necessary .register pseudo-ops.  */
+
+void
+sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
+{
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+  int i;
+
+  if (TARGET_ARCH32)
+    return;
+
+  /* Check if %g[2367] were used without
+     .register being printed for them already.  */
+  for (i = 2; i < 8; i++)
+    {
+      if (df_regs_ever_live_p (i)
+	  && ! sparc_hard_reg_printed [i])
+	{
+	  sparc_hard_reg_printed [i] = 1;
+	  /* %g7 is used as TLS base register, use #ignore
+	     for it instead of #scratch.  */
+	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
+		   i == 7 ? "ignore" : "scratch");
+	}
+      if (i == 3) i = 5;
+    }
+#endif
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if PROBE_INTERVAL > 4096
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.
+
+   Note that we don't use the REG+REG addressing mode for the probes because
+   of the stack bias in 64-bit mode.  And it doesn't really buy us anything
+   so the advantages of having a single code win here.  */
+
+static void
+sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx g1 = gen_rtx_REG (Pmode, 1);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+    {
+      emit_move_insn (g1, GEN_INT (first));
+      emit_insn (gen_rtx_SET (VOIDmode, g1,
+			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
+      emit_stack_probe (plus_constant (Pmode, g1, -size));
+    }
+
+  /* The run-time loop is made up of 10 insns in the generic case while the
+     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 5 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i;
+
+      emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
+      emit_insn (gen_rtx_SET (VOIDmode, g1,
+			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
+      emit_stack_probe (g1);
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+	 it exceeds SIZE.  If only two probes are needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, g1,
+				  plus_constant (Pmode, g1, -PROBE_INTERVAL)));
+	  emit_stack_probe (g1);
+	}
+
+      emit_stack_probe (plus_constant (Pmode, g1,
+				       (i - PROBE_INTERVAL) - size));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx g4 = gen_rtx_REG (Pmode, 4);
+
+      emit_move_insn (g1, GEN_INT (first));
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+      emit_move_insn (g4, GEN_INT (rounded_size));
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_insn (gen_rtx_SET (VOIDmode, g1,
+			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+
+      if (TARGET_ARCH64)
+	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
+      else
+	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+   /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg1;
+  xops[1] = reg2;
+  output_asm_insn ("cmp\t%0, %1", xops);
+  if (TARGET_ARCH64)
+    fputs ("\tbe,pn\t%xcc,", asm_out_file);
+  else
+    fputs ("\tbe\t", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  output_asm_insn (" add\t%0, %1, %0", xops);
+
+  /* Probe at TEST_ADDR and branch.  */
+  if (TARGET_ARCH64)
+    fputs ("\tba,pt\t%xcc,", asm_out_file);
+  else
+    fputs ("\tba\t", asm_out_file);
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+  xops[1] = GEN_INT (SPARC_STACK_BIAS);
+  output_asm_insn (" st\t%%g0, [%0+%1]", xops);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
+   needed.  LOW is supposed to be double-word aligned for 32-bit registers.
+   SAVE_P decides whether a register must be saved/restored.  ACTION_TRUE
+   is the action to be performed if SAVE_P returns true and ACTION_FALSE
+   the action to be performed if it returns false.  Return the new offset.  */
+
+typedef bool (*sorr_pred_t) (unsigned int, int);
+typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
+
+static int
+emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
+			   int offset, int leaf_function, sorr_pred_t save_p,
+			   sorr_act_t action_true, sorr_act_t action_false)
+{
+  unsigned int i;
+  rtx mem, insn;
+
+  if (TARGET_ARCH64 && high <= 32)
+    {
+      int fp_offset = -1;
+
+      for (i = low; i < high; i++)
+	{
+	  if (save_p (i, leaf_function))
+	    {
+	      mem = gen_frame_mem (DImode, plus_constant (Pmode,
+							  base, offset));
+	      if (action_true == SORR_SAVE)
+		{
+		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		}
+	      else  /* action_true == SORR_RESTORE */
+		{
+		  /* The frame pointer must be restored last since its old
+		     value may be used as base address for the frame.  This
+		     is problematic in 64-bit mode only because of the lack
+		     of double-word load instruction.  */
+		  if (i == HARD_FRAME_POINTER_REGNUM)
+		    fp_offset = offset;
+		  else
+		    emit_move_insn (gen_rtx_REG (DImode, i), mem);
+		}
+	      offset += 8;
+	    }
+	  else if (action_false == SORR_ADVANCE)
+	    offset += 8;
+	}
+
+      if (fp_offset >= 0)
+	{
+	  mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
+	  emit_move_insn (hard_frame_pointer_rtx, mem);
+	}
+    }
+  else
+    {
+      for (i = low; i < high; i += 2)
+	{
+	  bool reg0 = save_p (i, leaf_function);
+	  bool reg1 = save_p (i + 1, leaf_function);
+	  enum machine_mode mode;
+	  int regno;
+
+	  if (reg0 && reg1)
+	    {
+	      mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
+	      regno = i;
+	    }
+	  else if (reg0)
+	    {
+	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
+	      regno = i;
+	    }
+	  else if (reg1)
+	    {
+	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
+	      regno = i + 1;
+	      offset += 4;
+	    }
+	  else
+	    {
+	      if (action_false == SORR_ADVANCE)
+		offset += 8;
+	      continue;
+	    }
+
+	  mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
+	  if (action_true == SORR_SAVE)
+	    {
+	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      if (mode == DImode)
+		{
+		  rtx set1, set2;
+		  mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
+							      offset));
+		  set1 = gen_rtx_SET (VOIDmode, mem,
+				      gen_rtx_REG (SImode, regno));
+		  RTX_FRAME_RELATED_P (set1) = 1;
+		  mem
+		    = gen_frame_mem (SImode, plus_constant (Pmode, base,
+							    offset + 4));
+		  set2 = gen_rtx_SET (VOIDmode, mem,
+				      gen_rtx_REG (SImode, regno + 1));
+		  RTX_FRAME_RELATED_P (set2) = 1;
+		  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+				gen_rtx_PARALLEL (VOIDmode,
+						  gen_rtvec (2, set1, set2)));
+		}
+	    }
+	  else  /* action_true == SORR_RESTORE */
+	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
+
+	  /* Always preserve double-word alignment.  */
+	  offset = (offset + 8) & -8;
+	}
+    }
+
+  return offset;
+}
+
+/* Emit code to adjust BASE to OFFSET.  Return the new base.  */
+
+static rtx
+emit_adjust_base_to_offset (rtx base, int offset)
+{
+  /* ??? This might be optimized a little as %g1 might already have a
+     value close enough that a single add insn will do.  */
+  /* ??? Although, all of this is probably only a temporary fix because
+     if %g1 can hold a function result, then sparc_expand_epilogue will
+     lose (the result will be clobbered).  */
+  rtx new_base = gen_rtx_REG (Pmode, 1);
+  emit_move_insn (new_base, GEN_INT (offset));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  new_base, gen_rtx_PLUS (Pmode, base, new_base)));
+  return new_base;
+}
+
+/* Emit code to save/restore call-saved global and FP registers.  */
+
+static void
+emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
+{
+  if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
+    {
+      base = emit_adjust_base_to_offset  (base, offset);
+      offset = 0;
+    }
+
+  offset
+    = emit_save_or_restore_regs (0, 8, base, offset, 0,
+				 save_global_or_fp_reg_p, action, SORR_NONE);
+  emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
+			     save_global_or_fp_reg_p, action, SORR_NONE);
+}
+
+/* Emit code to save/restore call-saved local and in registers.  */
+
+static void
+emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
+{
+  if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
+    {
+      base = emit_adjust_base_to_offset  (base, offset);
+      offset = 0;
+    }
+
+  emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
+			     save_local_or_in_reg_p, action, SORR_ADVANCE);
+}
+
+/* Emit a window_save insn.  */
+
+static rtx
+emit_window_save (rtx increment)
+{
+  rtx insn = emit_insn (gen_window_save (increment));
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  /* The incoming return address (%o7) is saved in %i7.  */
+  add_reg_note (insn, REG_CFA_REGISTER,
+		gen_rtx_SET (VOIDmode,
+			     gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
+			     gen_rtx_REG (Pmode,
+					  INCOMING_RETURN_ADDR_REGNUM)));
+
+  /* The window save event.  */
+  add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
+
+  /* The CFA is %fp, the hard frame pointer.  */
+  add_reg_note (insn, REG_CFA_DEF_CFA,
+		plus_constant (Pmode, hard_frame_pointer_rtx,
+			       INCOMING_FRAME_SP_OFFSET));
+
+  return insn;
+}
+
+/* Generate an increment for the stack pointer.  */
+
+static rtx
+gen_stack_pointer_inc (rtx increment)
+{
+  return gen_rtx_SET (VOIDmode,
+		      stack_pointer_rtx,
+		      gen_rtx_PLUS (Pmode,
+				    stack_pointer_rtx,
+				    increment));
+}
+
+/* Expand the function prologue.  The prologue is responsible for reserving
+   storage for the frame, saving the call-saved registers and loading the
+   GOT register if needed.  */
+
+void
+sparc_expand_prologue (void)
+{
+  HOST_WIDE_INT size;
+  rtx insn;
+
+  /* Compute a snapshot of crtl->uses_only_leaf_regs.  Relying
+     on the final value of the flag means deferring the prologue/epilogue
+     expansion until just before the second scheduling pass, which is too
+     late to emit multiple epilogues or return insns.
+
+     Of course we are making the assumption that the value of the flag
+     will not change between now and its final value.  Of the three parts
+     of the formula, only the last one can reasonably vary.  Let's take a
+     closer look, after assuming that the first two ones are set to true
+     (otherwise the last value is effectively silenced).
+
+     If only_leaf_regs_used returns false, the global predicate will also
+     be false so the actual frame size calculated below will be positive.
+     As a consequence, the save_register_window insn will be emitted in
+     the instruction stream; now this insn explicitly references %fp
+     which is not a leaf register so only_leaf_regs_used will always
+     return false subsequently.
+
+     If only_leaf_regs_used returns true, we hope that the subsequent
+     optimization passes won't cause non-leaf registers to pop up.  For
+     example, the regrename pass has special provisions to not rename to
+     non-leaf registers in a leaf function.  */
+  sparc_leaf_function_p
+    = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
+
+  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					  size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
+
+  if (size == 0)
+    ; /* do nothing.  */
+  else if (sparc_leaf_function_p)
+    {
+      rtx size_int_rtx = GEN_INT (-size);
+
+      if (size <= 4096)
+	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
+      else if (size <= 8192)
+	{
+	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  /* %sp is still the CFA register.  */
+	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
+	}
+      else
+	{
+	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (size_rtx, size_int_rtx);
+	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_stack_pointer_inc (size_int_rtx));
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      rtx size_int_rtx = GEN_INT (-size);
+
+      if (size <= 4096)
+	emit_window_save (size_int_rtx);
+      else if (size <= 8192)
+	{
+	  emit_window_save (GEN_INT (-4096));
+
+	  /* %sp is not the CFA register anymore.  */
+	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
+
+	  /* Make sure no %fp-based store is issued until after the frame is
+	     established.  The offset between the frame pointer and the stack
+	     pointer is calculated relative to the value of the stack pointer
+	     at the end of the function prologue, and moving instructions that
+	     access the stack via the frame pointer between the instructions
+	     that decrement the stack pointer could result in accessing the
+	     register window save area, which is volatile.  */
+	  emit_insn (gen_frame_blockage ());
+	}
+      else
+	{
+	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (size_rtx, size_int_rtx);
+	  emit_window_save (size_rtx);
+	}
+    }
+
+  if (sparc_leaf_function_p)
+    {
+      sparc_frame_base_reg = stack_pointer_rtx;
+      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
+    }
+  else
+    {
+      sparc_frame_base_reg = hard_frame_pointer_rtx;
+      sparc_frame_base_offset = SPARC_STACK_BIAS;
+    }
+
+  if (sparc_n_global_fp_regs > 0)
+    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
+				         sparc_frame_base_offset
+					   - sparc_apparent_frame_size,
+					 SORR_SAVE);
+
+  /* Load the GOT register if needed.  */
+  if (crtl->uses_pic_offset_table)
+    load_got_register ();
+
+  /* Advertise that the data calculated just above are now valid.  */
+  sparc_prologue_data_valid_p = true;
+}
+
+/* Expand the function prologue.  The prologue is responsible for reserving
+   storage for the frame, saving the call-saved registers and loading the
+   GOT register if needed.  */
+
+void
+sparc_flat_expand_prologue (void)
+{
+  HOST_WIDE_INT size;
+  rtx insn;
+
+  sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
+
+  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					  size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
+
+  if (sparc_save_local_in_regs_p)
+    emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
+					SORR_SAVE);
+
+  if (size == 0)
+    ; /* do nothing.  */
+  else
+    {
+      rtx size_int_rtx, size_rtx;
+
+      size_rtx = size_int_rtx = GEN_INT (-size);
+
+      /* We establish the frame (i.e. decrement the stack pointer) first, even
+	 if we use a frame pointer, because we cannot clobber any call-saved
+	 registers, including the frame pointer, if we haven't created a new
+	 register save area, for the sake of compatibility with the ABI.  */
+      if (size <= 4096)
+	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
+      else if (size <= 8192 && !frame_pointer_needed)
+	{
+	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
+	}
+      else
+	{
+	  size_rtx = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (size_rtx, size_int_rtx);
+	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			gen_stack_pointer_inc (size_int_rtx));
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Ensure nothing is scheduled until after the frame is established.  */
+      emit_insn (gen_blockage ());
+
+      if (frame_pointer_needed)
+	{
+	  insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+					 gen_rtx_MINUS (Pmode,
+							stack_pointer_rtx,
+							size_rtx)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    size)));
+	}
+
+      if (return_addr_reg_needed_p (sparc_leaf_function_p))
+	{
+	  rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
+	  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+
+	  insn = emit_move_insn (i7, o7);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode, i7, o7));
+
+	  /* Prevent this instruction from ever being considered dead,
+	     even if this function has no epilogue.  */
+	  emit_use (i7);
+	}
+    }
+
+  if (frame_pointer_needed)
+    {
+      sparc_frame_base_reg = hard_frame_pointer_rtx;
+      sparc_frame_base_offset = SPARC_STACK_BIAS;
+    }
+  else
+    {
+      sparc_frame_base_reg = stack_pointer_rtx;
+      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
+    }
+
+  if (sparc_n_global_fp_regs > 0)
+    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
+				         sparc_frame_base_offset
+					   - sparc_apparent_frame_size,
+					 SORR_SAVE);
+
+  /* Load the GOT register if needed.  */
+  if (crtl->uses_pic_offset_table)
+    load_got_register ();
+
+  /* Advertise that the data calculated just above are now valid.  */
+  sparc_prologue_data_valid_p = true;
+}
+
+/* This function generates the assembly code for function entry, which boils
+   down to emitting the necessary .register directives.  */
+
+static void
+sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
+  if (!TARGET_FLAT)
+    gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
+
+  sparc_output_scratch_registers (file);
+}
+
+/* Expand the function epilogue, either normal or part of a sibcall.
+   We emit all the instructions except the return or the call.  */
+
+void
+sparc_expand_epilogue (bool for_eh)
+{
+  HOST_WIDE_INT size = sparc_frame_size;
+
+  if (sparc_n_global_fp_regs > 0)
+    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
+				         sparc_frame_base_offset
+					   - sparc_apparent_frame_size,
+					 SORR_RESTORE);
+
+  if (size == 0 || for_eh)
+    ; /* do nothing.  */
+  else if (sparc_leaf_function_p)
+    {
+      if (size <= 4096)
+	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
+      else if (size <= 8192)
+	{
+	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
+	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (reg, GEN_INT (size));
+	  emit_insn (gen_stack_pointer_inc (reg));
+	}
+    }
+}
+
+/* Expand the function epilogue, either normal or part of a sibcall.
+   We emit all the instructions except the return or the call.  */
+
+void
+sparc_flat_expand_epilogue (bool for_eh)
+{
+  HOST_WIDE_INT size = sparc_frame_size;
+
+  if (sparc_n_global_fp_regs > 0)
+    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
+				         sparc_frame_base_offset
+					   - sparc_apparent_frame_size,
+					 SORR_RESTORE);
+
+  /* If we have a frame pointer, we'll need both to restore it before the
+     frame is destroyed and use its current value in destroying the frame.
+     Since we don't have an atomic way to do that in the flat window model,
+     we save the current value into a temporary register (%g1).  */
+  if (frame_pointer_needed && !for_eh)
+    emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
+
+  if (return_addr_reg_needed_p (sparc_leaf_function_p))
+    emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
+		    gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
+
+  if (sparc_save_local_in_regs_p)
+    emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
+					sparc_frame_base_offset,
+					SORR_RESTORE);
+
+  if (size == 0 || for_eh)
+    ; /* do nothing.  */
+  else if (frame_pointer_needed)
+    {
+      /* Make sure the frame is destroyed after everything else is done.  */
+      emit_insn (gen_blockage ());
+
+      emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
+    }
+  else
+    {
+      /* Likewise.  */
+      emit_insn (gen_blockage ());
+
+      if (size <= 4096)
+	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
+      else if (size <= 8192)
+	{
+	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
+	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (reg, GEN_INT (size));
+	  emit_insn (gen_stack_pointer_inc (reg));
+	}
+    }
+}
+
+/* Return true if it is appropriate to emit `return' instructions in the
+   body of a function.  */
+
+bool
+sparc_can_use_return_insn_p (void)
+{
+  return sparc_prologue_data_valid_p
+	 && sparc_n_global_fp_regs == 0
+	 && TARGET_FLAT
+	    ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
+	    : (sparc_frame_size == 0 || !sparc_leaf_function_p);
+}
+
+/* This function generates the assembly code for function exit.  */
+
+static void
+sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* If the last two instructions of a function are "call foo; dslot;"
+     the return address might point to the first instruction in the next
+     function and we have to output a dummy nop for the sake of sane
+     backtraces in such cases.  This is pointless for sibling calls since
+     the return address is explicitly adjusted.  */
+
+  rtx insn, last_real_insn;
+
+  insn = get_last_insn ();
+
+  last_real_insn = prev_real_insn (insn);
+  if (last_real_insn
+      && NONJUMP_INSN_P (last_real_insn)
+      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
+    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
+
+  if (last_real_insn
+      && CALL_P (last_real_insn)
+      && !SIBLING_CALL_P (last_real_insn))
+    fputs("\tnop\n", file);
+
+  sparc_output_deferred_case_vectors ();
+}
+
+/* Output a 'restore' instruction.  */
+
+static void
+output_restore (rtx pat)
+{
+  rtx operands[3];
+
+  if (! pat)
+    {
+      fputs ("\t restore\n", asm_out_file);
+      return;
+    }
+
+  gcc_assert (GET_CODE (pat) == SET);
+
+  operands[0] = SET_DEST (pat);
+  pat = SET_SRC (pat);
+
+  switch (GET_CODE (pat))
+    {
+      case PLUS:
+	operands[1] = XEXP (pat, 0);
+	operands[2] = XEXP (pat, 1);
+	output_asm_insn (" restore %r1, %2, %Y0", operands);
+	break;
+      case LO_SUM:
+	operands[1] = XEXP (pat, 0);
+	operands[2] = XEXP (pat, 1);
+	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
+	break;
+      case ASHIFT:
+	operands[1] = XEXP (pat, 0);
+	gcc_assert (XEXP (pat, 1) == const1_rtx);
+	output_asm_insn (" restore %r1, %r1, %Y0", operands);
+	break;
+      default:
+	operands[1] = pat;
+	output_asm_insn (" restore %%g0, %1, %Y0", operands);
+	break;
+    }
+}
+
+/* Output a return.  */
+
+const char *
+output_return (rtx insn)
+{
+  if (crtl->calls_eh_return)
+    {
+      /* If the function uses __builtin_eh_return, the eh_return
+	 machinery occupies the delay slot.  */
+      gcc_assert (!final_sequence);
+
+      if (flag_delayed_branch)
+	{
+	  if (!TARGET_FLAT && TARGET_V9)
+	    fputs ("\treturn\t%i7+8\n", asm_out_file);
+	  else
+	    {
+	      if (!TARGET_FLAT)
+		fputs ("\trestore\n", asm_out_file);
+
+	      fputs ("\tjmp\t%o7+8\n", asm_out_file);
+	    }
+
+	  fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
+	}
+      else
+	{
+	  if (!TARGET_FLAT)
+	    fputs ("\trestore\n", asm_out_file);
+
+	  fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
+	  fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
+	}
+    }
+  else if (sparc_leaf_function_p || TARGET_FLAT)
+    {
+      /* This is a leaf or flat function so we don't have to bother restoring
+	 the register window, which frees us from dealing with the convoluted
+	 semantics of restore/return.  We simply output the jump to the
+	 return address and the insn in the delay slot (if any).  */
+
+      return "jmp\t%%o7+%)%#";
+    }
+  else
+    {
+      /* This is a regular function so we have to restore the register window.
+	 We may have a pending insn for the delay slot, which will be either
+	 combined with the 'restore' instruction or put in the delay slot of
+	 the 'return' instruction.  */
+
+      if (final_sequence)
+	{
+	  rtx delay, pat;
+
+	  delay = NEXT_INSN (insn);
+	  gcc_assert (delay);
+
+	  pat = PATTERN (delay);
+
+	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
+	    {
+	      epilogue_renumber (&pat, 0);
+	      return "return\t%%i7+%)%#";
+	    }
+	  else
+	    {
+	      output_asm_insn ("jmp\t%%i7+%)", NULL);
+	      output_restore (pat);
+	      PATTERN (delay) = gen_blockage ();
+	      INSN_CODE (delay) = -1;
+	    }
+	}
+      else
+        {
+	  /* The delay slot is empty.  */
+	  if (TARGET_V9)
+	    return "return\t%%i7+%)\n\t nop";
+	  else if (flag_delayed_branch)
+	    return "jmp\t%%i7+%)\n\t restore";
+	  else
+	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
+	}
+    }
+
+  return "";
+}
+
+/* Output a sibling call.  */
+
+const char *
+output_sibcall (rtx insn, rtx call_operand)
+{
+  rtx operands[1];
+
+  gcc_assert (flag_delayed_branch);
+
+  operands[0] = call_operand;
+
+  if (sparc_leaf_function_p || TARGET_FLAT)
+    {
+      /* This is a leaf or flat function so we don't have to bother restoring
+	 the register window.  We simply output the jump to the function and
+	 the insn in the delay slot (if any).  */
+
+      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
+
+      if (final_sequence)
+	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
+			 operands);
+      else
+	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
+	   it into branch if possible.  */
+	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
+			 operands);
+    }
+  else
+    {
+      /* This is a regular function so we have to restore the register window.
+	 We may have a pending insn for the delay slot, which will be combined
+	 with the 'restore' instruction.  */
+
+      output_asm_insn ("call\t%a0, 0", operands);
+
+      if (final_sequence)
+	{
+	  rtx delay = NEXT_INSN (insn);
+	  gcc_assert (delay);
+
+	  output_restore (PATTERN (delay));
+
+	  PATTERN (delay) = gen_blockage ();
+	  INSN_CODE (delay) = -1;
+	}
+      else
+	output_restore (NULL_RTX);
+    }
+
+  return "";
+}
+
+/* Functions for handling argument passing.
+
+   For 32-bit, the first 6 args are normally in registers and the rest are
+   pushed.  Any arg that starts within the first 6 words is at least
+   partially passed in a register unless its data type forbids.
+
+   For 64-bit, the argument registers are laid out as an array of 16 elements
+   and arguments are added sequentially.  The first 6 int args and up to the
+   first 16 fp args (depending on size) are passed in regs.
+
+   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
+   ----    -----   --------   -----   ------------------   ------   -----------
+    15   [SP+248]              %f31       %f30,%f31         %d30
+    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
+    13   [SP+232]              %f27       %f26,%f27         %d26
+    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
+    11   [SP+216]              %f23       %f22,%f23         %d22
+    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
+     9   [SP+200]              %f19       %f18,%f19         %d18
+     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
+     7   [SP+184]              %f15       %f14,%f15         %d14
+     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
+     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
+     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
+     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
+     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
+     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
+     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
+
+   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
+
+   Integral arguments are always passed as 64-bit quantities appropriately
+   extended.
+
+   Passing of floating point values is handled as follows.
+   If a prototype is in scope:
+     If the value is in a named argument (i.e. not a stdarg function or a
+     value not part of the `...') then the value is passed in the appropriate
+     fp reg.
+     If the value is part of the `...' and is passed in one of the first 6
+     slots then the value is passed in the appropriate int reg.
+     If the value is part of the `...' and is not passed in one of the first 6
+     slots then the value is passed in memory.
+   If a prototype is not in scope:
+     If the value is one of the first 6 arguments the value is passed in the
+     appropriate integer reg and the appropriate fp reg.
+     If the value is not one of the first 6 arguments the value is passed in
+     the appropriate fp reg and in memory.
+
+
+   Summary of the calling conventions implemented by GCC on the SPARC:
+
+   32-bit ABI:
+                                size      argument     return value
+
+      small integer              <4       int. reg.      int. reg.
+      word                        4       int. reg.      int. reg.
+      double word                 8       int. reg.      int. reg.
+
+      _Complex small integer     <8       int. reg.      int. reg.
+      _Complex word               8       int. reg.      int. reg.
+      _Complex double word       16        memory        int. reg.
+
+      vector integer            <=8       int. reg.       FP reg.
+      vector integer             >8        memory         memory
+
+      float                       4       int. reg.       FP reg.
+      double                      8       int. reg.       FP reg.
+      long double                16        memory         memory
+
+      _Complex float              8        memory         FP reg.
+      _Complex double            16        memory         FP reg.
+      _Complex long double       32        memory         FP reg.
+
+      vector float              any        memory         memory
+
+      aggregate                 any        memory         memory
+
+
+
+    64-bit ABI:
+                                size      argument     return value
+
+      small integer              <8       int. reg.      int. reg.
+      word                        8       int. reg.      int. reg.
+      double word                16       int. reg.      int. reg.
+
+      _Complex small integer    <16       int. reg.      int. reg.
+      _Complex word              16       int. reg.      int. reg.
+      _Complex double word       32        memory        int. reg.
+
+      vector integer           <=16        FP reg.        FP reg.
+      vector integer       16<s<=32        memory         FP reg.
+      vector integer            >32        memory         memory
+
+      float                       4        FP reg.        FP reg.
+      double                      8        FP reg.        FP reg.
+      long double                16        FP reg.        FP reg.
+
+      _Complex float              8        FP reg.        FP reg.
+      _Complex double            16        FP reg.        FP reg.
+      _Complex long double       32        memory         FP reg.
+
+      vector float             <=16        FP reg.        FP reg.
+      vector float         16<s<=32        memory         FP reg.
+      vector float              >32        memory         memory
+
+      aggregate                <=16         reg.           reg.
+      aggregate            16<s<=32        memory          reg.
+      aggregate                 >32        memory         memory
+
+
+
+Note #1: complex floating-point types follow the extended SPARC ABIs as
+implemented by the Sun compiler.
+
+Note #2: integral vector types follow the scalar floating-point types
+conventions to match what is implemented by the Sun VIS SDK.
+
+Note #3: floating-point vector types follow the aggregate types
+conventions.  */
+
+
+/* Maximum number of int regs for args.  */
+#define SPARC_INT_ARG_MAX 6
+/* Maximum number of fp regs for args.  */
+#define SPARC_FP_ARG_MAX 16
+
+#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Handle the INIT_CUMULATIVE_ARGS macro.
+   Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+void
+init_cumulative_args (struct sparc_args *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED,
+		      tree fndecl ATTRIBUTE_UNUSED)
+{
+  cum->words = 0;
+  cum->prototype_p = fntype && prototype_p (fntype);
+  cum->libcall_p = fntype == 0;
+}
+
+/* Handle promotion of pointer and integer arguments.  */
+
+static enum machine_mode
+sparc_promote_function_mode (const_tree type,
+                             enum machine_mode mode,
+                             int *punsignedp,
+                             const_tree fntype ATTRIBUTE_UNUSED,
+                             int for_return ATTRIBUTE_UNUSED)
+{
+  if (type != NULL_TREE && POINTER_TYPE_P (type))
+    {
+      *punsignedp = POINTERS_EXTEND_UNSIGNED;
+      return Pmode;
+    }
+
+  /* Integral arguments are passed as full words, as per the ABI.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+    return word_mode;
+
+  return mode;
+}
+
+/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
+
+static bool
+sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
+{
+  return TARGET_ARCH64 ? true : false;
+}
+
+/* Scan the record type TYPE and return the following predicates:
+    - INTREGS_P: the record contains at least one field or sub-field
+      that is eligible for promotion in integer registers.
+    - FP_REGS_P: the record contains at least one field or sub-field
+      that is eligible for promotion in floating-point registers.
+    - PACKED_P: the record contains at least one field that is packed.
+
+   Sub-fields are not taken into account for the PACKED_P predicate.  */
+
+static void
+scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
+		  int *packed_p)
+{
+  tree field;
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
+	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
+		   || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+		  && TARGET_FPU)
+	    *fpregs_p = 1;
+	  else
+	    *intregs_p = 1;
+
+	  if (packed_p && DECL_PACKED (field))
+	    *packed_p = 1;
+	}
+    }
+}
+
+/* Compute the slot number to pass an argument in.
+   Return the slot number or -1 if passing on the stack.
+
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
+   *PREGNO records the register number to use if scalar type.
+   *PPADDING records the amount of padding needed in words.  */
+
+static int
+function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
+		     const_tree type, bool named, bool incoming_p,
+		     int *pregno, int *ppadding)
+{
+  int regbase = (incoming_p
+		 ? SPARC_INCOMING_INT_ARG_FIRST
+		 : SPARC_OUTGOING_INT_ARG_FIRST);
+  int slotno = cum->words;
+  enum mode_class mclass;
+  int regno;
+
+  *ppadding = 0;
+
+  if (type && TREE_ADDRESSABLE (type))
+    return -1;
+
+  if (TARGET_ARCH32
+      && mode == BLKmode
+      && type
+      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
+    return -1;
+
+  /* For SPARC64, objects requiring 16-byte alignment get it.  */
+  if (TARGET_ARCH64
+      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
+      && (slotno & 1) != 0)
+    slotno++, *ppadding = 1;
+
+  mclass = GET_MODE_CLASS (mode);
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    {
+      /* Vector types deserve special treatment because they are
+	 polymorphic wrt their mode, depending upon whether VIS
+	 instructions are enabled.  */
+      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+	{
+	  /* The SPARC port defines no floating-point vector modes.  */
+	  gcc_assert (mode == BLKmode);
+	}
+      else
+	{
+	  /* Integral vector types should either have a vector
+	     mode or an integral mode, because we are guaranteed
+	     by pass_by_reference that their size is not greater
+	     than 16 bytes and TImode is 16-byte wide.  */
+	  gcc_assert (mode != BLKmode);
+
+	  /* Vector integers are handled like floats according to
+	     the Sun VIS SDK.  */
+	  mclass = MODE_FLOAT;
+	}
+    }
+
+  switch (mclass)
+    {
+    case MODE_FLOAT:
+    case MODE_COMPLEX_FLOAT:
+    case MODE_VECTOR_INT:
+      if (TARGET_ARCH64 && TARGET_FPU && named)
+	{
+	  if (slotno >= SPARC_FP_ARG_MAX)
+	    return -1;
+	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
+	  /* Arguments filling only one single FP register are
+	     right-justified in the outer double FP register.  */
+	  if (GET_MODE_SIZE (mode) <= 4)
+	    regno++;
+	  break;
+	}
+      /* fallthrough */
+
+    case MODE_INT:
+    case MODE_COMPLEX_INT:
+      if (slotno >= SPARC_INT_ARG_MAX)
+	return -1;
+      regno = regbase + slotno;
+      break;
+
+    case MODE_RANDOM:
+      if (mode == VOIDmode)
+	/* MODE is VOIDmode when generating the actual call.  */
+	return -1;
+
+      gcc_assert (mode == BLKmode);
+
+      if (TARGET_ARCH32
+	  || !type
+	  || (TREE_CODE (type) != VECTOR_TYPE
+	      && TREE_CODE (type) != RECORD_TYPE))
+	{
+	  if (slotno >= SPARC_INT_ARG_MAX)
+	    return -1;
+	  regno = regbase + slotno;
+	}
+      else  /* TARGET_ARCH64 && type */
+	{
+	  int intregs_p = 0, fpregs_p = 0, packed_p = 0;
+
+	  /* First see what kinds of registers we would need.  */
+	  if (TREE_CODE (type) == VECTOR_TYPE)
+	    fpregs_p = 1;
+	  else
+	    scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
+
+	  /* The ABI obviously doesn't specify how packed structures
+	     are passed.  These are defined to be passed in int regs
+	     if possible, otherwise memory.  */
+	  if (packed_p || !named)
+	    fpregs_p = 0, intregs_p = 1;
+
+	  /* If all arg slots are filled, then must pass on stack.  */
+	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
+	    return -1;
+
+	  /* If there are only int args and all int arg slots are filled,
+	     then must pass on stack.  */
+	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
+	    return -1;
+
+	  /* Note that even if all int arg slots are filled, fp members may
+	     still be passed in regs if such regs are available.
+	     *PREGNO isn't set because there may be more than one, it's up
+	     to the caller to compute them.  */
+	  return slotno;
+	}
+      break;
+
+    default :
+      gcc_unreachable ();
+    }
+
+  *pregno = regno;
+  return slotno;
+}
+
+/* Handle recursive register counting for structure field layout.  */
+
+struct function_arg_record_value_parms
+{
+  rtx ret;		/* return expression being built.  */
+  int slotno;		/* slot number of the argument.  */
+  int named;		/* whether the argument is named.  */
+  int regbase;		/* regno of the base register.  */
+  int stack;		/* 1 if part of the argument is on the stack.  */
+  int intoffset;	/* offset of the first pending integer field.  */
+  unsigned int nregs;	/* number of words passed in registers.  */
+};
+
+static void function_arg_record_value_3
+ (HOST_WIDE_INT, struct function_arg_record_value_parms *);
+static void function_arg_record_value_2
+ (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
+static void function_arg_record_value_1
+ (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
+static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
+static rtx function_arg_union_value (int, enum machine_mode, int, int);
+
+/* A subroutine of function_arg_record_value.  Traverse the structure
+   recursively and determine how many registers will be required.  */
+
+static void
+function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
+			     struct function_arg_record_value_parms *parms,
+			     bool packed_p)
+{
+  tree field;
+
+  /* We need to compute how many registers are needed so we can
+     allocate the PARALLEL but before we can do that we need to know
+     whether there are any packed fields.  The ABI obviously doesn't
+     specify how structures are passed in this case, so they are
+     defined to be passed in int regs if possible, otherwise memory,
+     regardless of whether there are fp values present.  */
+
+  if (! packed_p)
+    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+      {
+	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
+	  {
+	    packed_p = true;
+	    break;
+	  }
+      }
+
+  /* Compute how many registers we need.  */
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  HOST_WIDE_INT bitpos = startbitpos;
+
+	  if (DECL_SIZE (field) != 0)
+	    {
+	      if (integer_zerop (DECL_SIZE (field)))
+		continue;
+
+	      if (tree_fits_uhwi_p (bit_position (field)))
+		bitpos += int_bit_position (field);
+	    }
+
+	  /* ??? FIXME: else assume zero offset.  */
+
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    function_arg_record_value_1 (TREE_TYPE (field),
+	    				 bitpos,
+					 parms,
+					 packed_p);
+	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
+		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+		   && TARGET_FPU
+		   && parms->named
+		   && ! packed_p)
+	    {
+	      if (parms->intoffset != -1)
+		{
+		  unsigned int startbit, endbit;
+		  int intslots, this_slotno;
+
+		  startbit = parms->intoffset & -BITS_PER_WORD;
+		  endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+
+		  intslots = (endbit - startbit) / BITS_PER_WORD;
+		  this_slotno = parms->slotno + parms->intoffset
+		    / BITS_PER_WORD;
+
+		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
+		    {
+		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
+		      /* We need to pass this field on the stack.  */
+		      parms->stack = 1;
+		    }
+
+		  parms->nregs += intslots;
+		  parms->intoffset = -1;
+		}
+
+	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
+		 If it wasn't true we wouldn't be here.  */
+	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
+		  && DECL_MODE (field) == BLKmode)
+		parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
+	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
+		parms->nregs += 2;
+	      else
+		parms->nregs += 1;
+	    }
+	  else
+	    {
+	      if (parms->intoffset == -1)
+		parms->intoffset = bitpos;
+	    }
+	}
+    }
+}
+
+/* A subroutine of function_arg_record_value.  Assign the bits of the
+   structure between parms->intoffset and bitpos to integer registers.  */
+
+static void
+function_arg_record_value_3 (HOST_WIDE_INT bitpos,
+			     struct function_arg_record_value_parms *parms)
+{
+  enum machine_mode mode;
+  unsigned int regno;
+  unsigned int startbit, endbit;
+  int this_slotno, intslots, intoffset;
+  rtx reg;
+
+  if (parms->intoffset == -1)
+    return;
+
+  intoffset = parms->intoffset;
+  parms->intoffset = -1;
+
+  startbit = intoffset & -BITS_PER_WORD;
+  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+  intslots = (endbit - startbit) / BITS_PER_WORD;
+  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
+
+  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
+  if (intslots <= 0)
+    return;
+
+  /* If this is the trailing part of a word, only load that much into
+     the register.  Otherwise load the whole register.  Note that in
+     the latter case we may pick up unwanted bits.  It's not a problem
+     at the moment but may wish to revisit.  */
+
+  if (intoffset % BITS_PER_WORD != 0)
+    mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+			  	   MODE_INT);
+  else
+    mode = word_mode;
+
+  intoffset /= BITS_PER_UNIT;
+  do
+    {
+      regno = parms->regbase + this_slotno;
+      reg = gen_rtx_REG (mode, regno);
+      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
+	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
+
+      this_slotno += 1;
+      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
+      mode = word_mode;
+      parms->nregs += 1;
+      intslots -= 1;
+    }
+  while (intslots > 0);
+}
+
+/* A subroutine of function_arg_record_value.  Traverse the structure
+   recursively and assign bits to floating point registers.  Track which
+   bits in between need integer registers; invoke function_arg_record_value_3
+   to make that happen.  */
+
+static void
+function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
+			     struct function_arg_record_value_parms *parms,
+			     bool packed_p)
+{
+  tree field;
+
+  if (! packed_p)
+    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+      {
+	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
+	  {
+	    packed_p = true;
+	    break;
+	  }
+      }
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  HOST_WIDE_INT bitpos = startbitpos;
+
+	  if (DECL_SIZE (field) != 0)
+	    {
+	      if (integer_zerop (DECL_SIZE (field)))
+		continue;
+
+	      if (tree_fits_uhwi_p (bit_position (field)))
+		bitpos += int_bit_position (field);
+	    }
+
+	  /* ??? FIXME: else assume zero offset.  */
+
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    function_arg_record_value_2 (TREE_TYPE (field),
+	    				 bitpos,
+					 parms,
+					 packed_p);
+	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
+		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+		   && TARGET_FPU
+		   && parms->named
+		   && ! packed_p)
+	    {
+	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
+	      int regno, nregs, pos;
+	      enum machine_mode mode = DECL_MODE (field);
+	      rtx reg;
+
+	      function_arg_record_value_3 (bitpos, parms);
+
+	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
+		  && mode == BLKmode)
+	        {
+		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
+		  nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
+		}
+	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
+	        {
+		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
+		  nregs = 2;
+		}
+	      else
+	        nregs = 1;
+
+	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
+	      if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
+		regno++;
+	      reg = gen_rtx_REG (mode, regno);
+	      pos = bitpos / BITS_PER_UNIT;
+	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
+		= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
+	      parms->nregs += 1;
+	      while (--nregs > 0)
+		{
+		  regno += GET_MODE_SIZE (mode) / 4;
+	  	  reg = gen_rtx_REG (mode, regno);
+		  pos += GET_MODE_SIZE (mode);
+		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
+		    = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
+		  parms->nregs += 1;
+		}
+	    }
+	  else
+	    {
+	      if (parms->intoffset == -1)
+		parms->intoffset = bitpos;
+	    }
+	}
+    }
+}
+
+/* Used by function_arg and sparc_function_value_1 to implement the complex
+   conventions of the 64-bit ABI for passing and returning structures.
+   Return an expression valid as a return value for the FUNCTION_ARG
+   and TARGET_FUNCTION_VALUE.
+
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   MODE is the argument's machine mode.
+   SLOTNO is the index number of the argument's slot in the parameter array.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   REGBASE is the regno of the base register for the parameter array.  */
+
+static rtx
+function_arg_record_value (const_tree type, enum machine_mode mode,
+			   int slotno, int named, int regbase)
+{
+  HOST_WIDE_INT typesize = int_size_in_bytes (type);
+  struct function_arg_record_value_parms parms;
+  unsigned int nregs;
+
+  parms.ret = NULL_RTX;
+  parms.slotno = slotno;
+  parms.named = named;
+  parms.regbase = regbase;
+  parms.stack = 0;
+
+  /* Compute how many registers we need.  */
+  parms.nregs = 0;
+  parms.intoffset = 0;
+  function_arg_record_value_1 (type, 0, &parms, false);
+
+  /* Take into account pending integer fields.  */
+  if (parms.intoffset != -1)
+    {
+      unsigned int startbit, endbit;
+      int intslots, this_slotno;
+
+      startbit = parms.intoffset & -BITS_PER_WORD;
+      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+      intslots = (endbit - startbit) / BITS_PER_WORD;
+      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
+
+      if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
+        {
+	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
+	  /* We need to pass this field on the stack.  */
+	  parms.stack = 1;
+        }
+
+      parms.nregs += intslots;
+    }
+  nregs = parms.nregs;
+
+  /* Allocate the vector and handle some annoying special cases.  */
+  if (nregs == 0)
+    {
+      /* ??? Empty structure has no value?  Duh?  */
+      if (typesize <= 0)
+	{
+	  /* Though there's nothing really to store, return a word register
+	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
+	     leads to breakage due to the fact that there are zero bytes to
+	     load.  */
+	  return gen_rtx_REG (mode, regbase);
+	}
+      else
+	{
+	  /* ??? C++ has structures with no fields, and yet a size.  Give up
+	     for now and pass everything back in integer registers.  */
+	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	}
+      if (nregs + slotno > SPARC_INT_ARG_MAX)
+	nregs = SPARC_INT_ARG_MAX - slotno;
+    }
+  gcc_assert (nregs != 0);
+
+  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
+
+  /* If at least one field must be passed on the stack, generate
+     (parallel [(expr_list (nil) ...) ...]) so that all fields will
+     also be passed on the stack.  We can't do much better because the
+     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
+     of structures for which the fields passed exclusively in registers
+     are not at the beginning of the structure.  */
+  if (parms.stack)
+    XVECEXP (parms.ret, 0, 0)
+      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+
+  /* Fill in the entries.  */
+  parms.nregs = 0;
+  parms.intoffset = 0;
+  function_arg_record_value_2 (type, 0, &parms, false);
+  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
+
+  gcc_assert (parms.nregs == nregs);
+
+  return parms.ret;
+}
+
+/* Used by function_arg and sparc_function_value_1 to implement the conventions
+   of the 64-bit ABI for passing and returning unions.
+   Return an expression valid as a return value for the FUNCTION_ARG
+   and TARGET_FUNCTION_VALUE.
+
+   SIZE is the size in bytes of the union.
+   MODE is the argument's machine mode.
+   REGNO is the hard register the union will be passed in.  */
+
+static rtx
+function_arg_union_value (int size, enum machine_mode mode, int slotno,
+			  int regno)
+{
+  int nwords = ROUND_ADVANCE (size), i;
+  rtx regs;
+
+  /* See comment in previous function for empty structures.  */
+  if (nwords == 0)
+    return gen_rtx_REG (mode, regno);
+
+  if (slotno == SPARC_INT_ARG_MAX - 1)
+    nwords = 1;
+
+  regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
+
+  for (i = 0; i < nwords; i++)
+    {
+      /* Unions are passed left-justified.  */
+      XVECEXP (regs, 0, i)
+	= gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_REG (word_mode, regno),
+			     GEN_INT (UNITS_PER_WORD * i));
+      regno++;
+    }
+
+  return regs;
+}
+
+/* Used by function_arg and sparc_function_value_1 to implement the conventions
+   for passing and returning large (BLKmode) vectors.
+   Return an expression valid as a return value for the FUNCTION_ARG
+   and TARGET_FUNCTION_VALUE.
+
+   SIZE is the size in bytes of the vector (at least 8 bytes).
+   REGNO is the FP hard register the vector will be passed in.  */
+
+static rtx
+function_arg_vector_value (int size, int regno)
+{
+  int i, nregs = size / 8;
+  rtx regs;
+
+  regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
+
+  for (i = 0; i < nregs; i++)
+    {
+      XVECEXP (regs, 0, i)
+	= gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_REG (DImode, regno + 2*i),
+			     GEN_INT (i*8));
+    }
+
+  return regs;
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   NAMED is true if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   INCOMING_P is false for TARGET_FUNCTION_ARG, true for
+    TARGET_FUNCTION_INCOMING_ARG.  */
+
+static rtx
+sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
+		      const_tree type, bool named, bool incoming_p)
+{
+  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  int regbase = (incoming_p
+		 ? SPARC_INCOMING_INT_ARG_FIRST
+		 : SPARC_OUTGOING_INT_ARG_FIRST);
+  int slotno, regno, padding;
+  enum mode_class mclass = GET_MODE_CLASS (mode);
+
+  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
+				&regno, &padding);
+  if (slotno == -1)
+    return 0;
+
+  /* Vector types deserve special treatment because they are polymorphic wrt
+     their mode, depending upon whether VIS instructions are enabled.  */
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert ((TARGET_ARCH32 && size <= 8)
+		  || (TARGET_ARCH64 && size <= 16));
+
+      if (mode == BLKmode)
+	return function_arg_vector_value (size,
+					  SPARC_FP_ARG_FIRST + 2*slotno);
+      else
+	mclass = MODE_FLOAT;
+    }
+
+  if (TARGET_ARCH32)
+    return gen_rtx_REG (mode, regno);
+
+  /* Structures up to 16 bytes in size are passed in arg slots on the stack
+     and are promoted to registers if possible.  */
+  if (type && TREE_CODE (type) == RECORD_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert (size <= 16);
+
+      return function_arg_record_value (type, mode, slotno, named, regbase);
+    }
+
+  /* Unions up to 16 bytes in size are passed in integer registers.  */
+  else if (type && TREE_CODE (type) == UNION_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert (size <= 16);
+
+      return function_arg_union_value (size, mode, slotno, regno);
+    }
+
+  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
+     but also have the slot allocated for them.
+     If no prototype is in scope fp values in register slots get passed
+     in two places, either fp regs and int regs or fp regs and memory.  */
+  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
+	   && SPARC_FP_REG_P (regno))
+    {
+      rtx reg = gen_rtx_REG (mode, regno);
+      if (cum->prototype_p || cum->libcall_p)
+	{
+	  /* "* 2" because fp reg numbers are recorded in 4 byte
+	     quantities.  */
+#if 0
+	  /* ??? This will cause the value to be passed in the fp reg and
+	     in the stack.  When a prototype exists we want to pass the
+	     value in the reg but reserve space on the stack.  That's an
+	     optimization, and is deferred [for a bit].  */
+	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
+	    return gen_rtx_PARALLEL (mode,
+			    gen_rtvec (2,
+				       gen_rtx_EXPR_LIST (VOIDmode,
+						NULL_RTX, const0_rtx),
+				       gen_rtx_EXPR_LIST (VOIDmode,
+						reg, const0_rtx)));
+	  else
+#else
+	  /* ??? It seems that passing back a register even when past
+	     the area declared by REG_PARM_STACK_SPACE will allocate
+	     space appropriately, and will not copy the data onto the
+	     stack, exactly as we desire.
+
+	     This is due to locate_and_pad_parm being called in
+	     expand_call whenever reg_parm_stack_space > 0, which
+	     while beneficial to our example here, would seem to be
+	     in error from what had been intended.  Ho hum...  -- r~ */
+#endif
+	    return reg;
+	}
+      else
+	{
+	  rtx v0, v1;
+
+	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
+	    {
+	      int intreg;
+
+	      /* On incoming, we don't need to know that the value
+		 is passed in %f0 and %i0, and it confuses other parts
+		 causing needless spillage even on the simplest cases.  */
+	      if (incoming_p)
+		return reg;
+
+	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
+			+ (regno - SPARC_FP_ARG_FIRST) / 2);
+
+	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
+				      const0_rtx);
+	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
+	    }
+	  else
+	    {
+	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
+	    }
+	}
+    }
+
+  /* All other aggregate types are passed in an integer register in a mode
+     corresponding to the size of the type.  */
+  else if (type && AGGREGATE_TYPE_P (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert (size <= 16);
+
+      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+    }
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Handle the TARGET_FUNCTION_ARG target hook.  */
+
+static rtx
+sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  return sparc_function_arg_1 (cum, mode, type, named, false);
+}
+
+/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
+
+static rtx
+sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
+			     const_tree type, bool named)
+{
+  return sparc_function_arg_1 (cum, mode, type, named, true);
+}
+
+/* For sparc64, objects requiring 16 byte alignment are passed that way.  */
+
+static unsigned int
+sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return ((TARGET_ARCH64
+	   && (GET_MODE_ALIGNMENT (mode) == 128
+	       || (type && TYPE_ALIGN (type) == 128)))
+	  ? 128
+	  : PARM_BOUNDARY);
+}
+
+/* For an arg passed partly in registers and partly in memory,
+   this is the number of bytes of registers used.
+   For args passed entirely in registers or entirely in memory, zero.
+
+   Any arg that starts in the first 6 regs but won't entirely fit in them
+   needs partial registers on v8.  On v9, structures with integer
+   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
+   values that begin in the last fp reg [where "last fp reg" varies with the
+   mode] will be split between that reg and memory.  */
+
+static int
+sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
+			 tree type, bool named)
+{
+  int slotno, regno, padding;
+
+  /* We pass false for incoming_p here, it doesn't matter.  */
+  slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
+				false, &regno, &padding);
+
+  if (slotno == -1)
+    return 0;
+
+  if (TARGET_ARCH32)
+    {
+      if ((slotno + (mode == BLKmode
+		     ? ROUND_ADVANCE (int_size_in_bytes (type))
+		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
+	  > SPARC_INT_ARG_MAX)
+	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
+    }
+  else
+    {
+      /* We are guaranteed by pass_by_reference that the size of the
+	 argument is not greater than 16 bytes, so we only need to return
+	 one word if the argument is partially passed in registers.  */
+
+      if (type && AGGREGATE_TYPE_P (type))
+	{
+	  int size = int_size_in_bytes (type);
+
+	  if (size > UNITS_PER_WORD
+	      && slotno == SPARC_INT_ARG_MAX - 1)
+	    return UNITS_PER_WORD;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+		   && ! (TARGET_FPU && named)))
+	{
+	  /* The complex types are passed as packed types.  */
+	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+	      && slotno == SPARC_INT_ARG_MAX - 1)
+	    return UNITS_PER_WORD;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	{
+	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
+	      > SPARC_FP_ARG_MAX)
+	    return UNITS_PER_WORD;
+	}
+    }
+
+  return 0;
+}
+
+/* Handle the TARGET_PASS_BY_REFERENCE target hook.
+   Specify whether to pass the argument by reference.  */
+
+static bool
+sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			 enum machine_mode mode, const_tree type,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ARCH32)
+    /* Original SPARC 32-bit ABI says that structures and unions,
+       and quad-precision floats are passed by reference.  For Pascal,
+       also pass arrays by reference.  All other base types are passed
+       in registers.
+
+       Extended ABI (as implemented by the Sun compiler) says that all
+       complex floats are passed by reference.  Pass complex integers
+       in registers up to 8 bytes.  More generally, enforce the 2-word
+       cap for passing arguments in registers.
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are passed like floats of the same size, that is in
+       registers up to 8 bytes.  Pass all vector floats by reference
+       like structure and unions.  */
+    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
+	    || mode == SCmode
+	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
+	    || GET_MODE_SIZE (mode) > 8
+	    || (type
+		&& TREE_CODE (type) == VECTOR_TYPE
+		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
+  else
+    /* Original SPARC 64-bit ABI says that structures and unions
+       smaller than 16 bytes are passed in registers, as well as
+       all other base types.
+
+       Extended ABI (as implemented by the Sun compiler) says that
+       complex floats are passed in registers up to 16 bytes.  Pass
+       all complex integers in registers up to 16 bytes.  More generally,
+       enforce the 2-word cap for passing arguments in registers.
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are passed like floats of the same size, that is in
+       registers (up to 16 bytes).  Pass all vector floats like structure
+       and unions.  */
+    return ((type
+	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
+	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
+	    /* Catch CTImode and TCmode.  */
+	    || GET_MODE_SIZE (mode) > 16);
+}
+
+/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
+   Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   TYPE is null for libcalls where that information may not be available.  */
+
+static void
+sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int regno, padding;
+
+  /* We pass false for incoming_p here, it doesn't matter.  */
+  function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
+
+  /* If argument requires leading padding, add it.  */
+  cum->words += padding;
+
+  if (TARGET_ARCH32)
+    {
+      cum->words += (mode != BLKmode
+		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+		     : ROUND_ADVANCE (int_size_in_bytes (type)));
+    }
+  else
+    {
+      if (type && AGGREGATE_TYPE_P (type))
+	{
+	  int size = int_size_in_bytes (type);
+
+	  if (size <= 8)
+	    ++cum->words;
+	  else if (size <= 16)
+	    cum->words += 2;
+	  else /* passed by reference */
+	    ++cum->words;
+	}
+      else
+	{
+	  cum->words += (mode != BLKmode
+			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+			 : ROUND_ADVANCE (int_size_in_bytes (type)));
+	}
+    }
+}
+
+/* Handle the FUNCTION_ARG_PADDING macro.
+   For the 64 bit ABI structs are always stored left shifted in their
+   argument slot.  */
+
+enum direction
+function_arg_padding (enum machine_mode mode, const_tree type)
+{
+  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
+    return upward;
+
+  /* Fall back to the default.  */
+  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+/* Handle the TARGET_RETURN_IN_MEMORY target hook.
+   Specify whether to return the return value in memory.  */
+
+static bool
+sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ARCH32)
+    /* Original SPARC 32-bit ABI says that structures and unions,
+       and quad-precision floats are returned in memory.  All other
+       base types are returned in registers.
+
+       Extended ABI (as implemented by the Sun compiler) says that
+       all complex floats are returned in registers (8 FP registers
+       at most for '_Complex long double').  Return all complex integers
+       in registers (4 at most for '_Complex long long').
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are returned like floats of the same size, that is in
+       registers up to 8 bytes and in memory otherwise.  Return all
+       vector floats in memory like structure and unions; note that
+       they always have BLKmode like the latter.  */
+    return (TYPE_MODE (type) == BLKmode
+	    || TYPE_MODE (type) == TFmode
+	    || (TREE_CODE (type) == VECTOR_TYPE
+		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
+  else
+    /* Original SPARC 64-bit ABI says that structures and unions
+       smaller than 32 bytes are returned in registers, as well as
+       all other base types.
+
+       Extended ABI (as implemented by the Sun compiler) says that all
+       complex floats are returned in registers (8 FP registers at most
+       for '_Complex long double').  Return all complex integers in
+       registers (4 at most for '_Complex TItype').
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are returned like floats of the same size, that is in
+       registers.  Return all vector floats like structure and unions;
+       note that they always have BLKmode like the latter.  */
+    return (TYPE_MODE (type) == BLKmode
+	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
+}
+
+/* Handle the TARGET_STRUCT_VALUE target hook.
+   Return where to find the structure return value address.  */
+
+static rtx
+sparc_struct_value_rtx (tree fndecl, int incoming)
+{
+  if (TARGET_ARCH64)
+    return 0;
+  else
+    {
+      rtx mem;
+
+      if (incoming)
+	mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
+						   STRUCT_VALUE_OFFSET));
+      else
+	mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
+						   STRUCT_VALUE_OFFSET));
+
+      /* Only follow the SPARC ABI for fixed-size structure returns.
+         Variable size structure returns are handled per the normal
+         procedures in GCC. This is enabled by -mstd-struct-return */
+      if (incoming == 2
+	  && sparc_std_struct_return
+	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
+	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
+	{
+	  /* We must check and adjust the return address, as it is
+	     optional as to whether the return object is really
+	     provided.  */
+	  rtx ret_reg = gen_rtx_REG (Pmode, 31);
+	  rtx scratch = gen_reg_rtx (SImode);
+	  rtx endlab = gen_label_rtx ();
+
+	  /* Calculate the return object size */
+	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
+	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
+	  /* Construct a temporary return value */
+	  rtx temp_val
+	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
+
+	  /* Implement SPARC 32-bit psABI callee return struct checking:
+
+	     Fetch the instruction where we will return to and see if
+	     it's an unimp instruction (the most significant 10 bits
+	     will be zero).  */
+	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
+						plus_constant (Pmode,
+							       ret_reg, 8)));
+	  /* Assume the size is valid and pre-adjust */
+	  emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
+	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
+				   0, endlab);
+	  emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
+	  /* Write the address of the memory pointed to by temp_val into
+	     the memory pointed to by mem */
+	  emit_move_insn (mem, XEXP (temp_val, 0));
+	  emit_label (endlab);
+	}
+
+      return mem;
+    }
+}
+
+/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
+   For v9, function return values are subject to the same rules as arguments,
+   except that up to 32 bytes may be returned in registers.  */
+
+static rtx
+sparc_function_value_1 (const_tree type, enum machine_mode mode,
+			bool outgoing)
+{
+  /* Beware that the two values are swapped here wrt function_arg.  */
+  int regbase = (outgoing
+		 ? SPARC_INCOMING_INT_ARG_FIRST
+		 : SPARC_OUTGOING_INT_ARG_FIRST);
+  enum mode_class mclass = GET_MODE_CLASS (mode);
+  int regno;
+
+  /* Vector types deserve special treatment because they are polymorphic wrt
+     their mode, depending upon whether VIS instructions are enabled.  */
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert ((TARGET_ARCH32 && size <= 8)
+		  || (TARGET_ARCH64 && size <= 32));
+
+      if (mode == BLKmode)
+	return function_arg_vector_value (size,
+					  SPARC_FP_ARG_FIRST);
+      else
+	mclass = MODE_FLOAT;
+    }
+
+  if (TARGET_ARCH64 && type)
+    {
+      /* Structures up to 32 bytes in size are returned in registers.  */
+      if (TREE_CODE (type) == RECORD_TYPE)
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (type);
+	  gcc_assert (size <= 32);
+
+	  return function_arg_record_value (type, mode, 0, 1, regbase);
+	}
+
+      /* Unions up to 32 bytes in size are returned in integer registers.  */
+      else if (TREE_CODE (type) == UNION_TYPE)
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (type);
+	  gcc_assert (size <= 32);
+
+	  return function_arg_union_value (size, mode, 0, regbase);
+	}
+
+      /* Objects that require it are returned in FP registers.  */
+      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
+	;
+
+      /* All other aggregate types are returned in an integer register in a
+	 mode corresponding to the size of the type.  */
+      else if (AGGREGATE_TYPE_P (type))
+	{
+	  /* All other aggregate types are passed in an integer register
+	     in a mode corresponding to the size of the type.  */
+	  HOST_WIDE_INT size = int_size_in_bytes (type);
+	  gcc_assert (size <= 32);
+
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+
+	  /* ??? We probably should have made the same ABI change in
+	     3.4.0 as the one we made for unions.   The latter was
+	     required by the SCD though, while the former is not
+	     specified, so we favored compatibility and efficiency.
+
+	     Now we're stuck for aggregates larger than 16 bytes,
+	     because OImode vanished in the meantime.  Let's not
+	     try to be unduly clever, and simply follow the ABI
+	     for unions in that case.  */
+	  if (mode == BLKmode)
+	    return function_arg_union_value (size, mode, 0, regbase);
+	  else
+	    mclass = MODE_INT;
+	}
+
+      /* We should only have pointer and integer types at this point.  This
+	 must match sparc_promote_function_mode.  */
+      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+	mode = word_mode;
+    }
+
+  /* We should only have pointer and integer types at this point.  This must
+     match sparc_promote_function_mode.  */
+  else if (TARGET_ARCH32
+	   && mclass == MODE_INT
+	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+    mode = word_mode;
+
+  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
+    regno = SPARC_FP_ARG_FIRST;
+  else
+    regno = regbase;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Handle TARGET_FUNCTION_VALUE.
+   On the SPARC, the value is found in the first "output" register, but the
+   called function leaves it in the first "input" register.  */
+
+static rtx
+sparc_function_value (const_tree valtype,
+		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		      bool outgoing)
+{
+  return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
+}
+
+/* Handle TARGET_LIBCALL_VALUE.  */
+
+static rtx
+sparc_libcall_value (enum machine_mode mode,
+		     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return sparc_function_value_1 (NULL_TREE, mode, false);
+}
+
+/* Handle FUNCTION_VALUE_REGNO_P.
+   On the SPARC, the first "output" reg is used for integer values, and the
+   first floating point register is used for floating point values.  */
+
+static bool
+sparc_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 8 || regno == 32);
+}
+
+/* Do what is necessary for `va_start'.  We look at the current function
+   to determine if stdarg or varargs is used and return the address of
+   the first unnamed parameter.  */
+
+static rtx
+sparc_builtin_saveregs (void)
+{
+  int first_reg = crtl->args.info.words;
+  rtx address;
+  int regno;
+
+  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
+    emit_move_insn (gen_rtx_MEM (word_mode,
+				 gen_rtx_PLUS (Pmode,
+					       frame_pointer_rtx,
+					       GEN_INT (FIRST_PARM_OFFSET (0)
+							+ (UNITS_PER_WORD
+							   * regno)))),
+		    gen_rtx_REG (word_mode,
+				 SPARC_INCOMING_INT_ARG_FIRST + regno));
+
+  address = gen_rtx_PLUS (Pmode,
+			  frame_pointer_rtx,
+			  GEN_INT (FIRST_PARM_OFFSET (0)
+				   + UNITS_PER_WORD * first_reg));
+
+  return address;
+}
+
+/* Implement `va_start' for stdarg.  */
+
+static void
+sparc_va_start (tree valist, rtx nextarg)
+{
+  nextarg = expand_builtin_saveregs ();
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Implement `va_arg' for stdarg.  */
+
+static tree
+sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		       gimple_seq *post_p)
+{
+  HOST_WIDE_INT size, rsize, align;
+  tree addr, incr;
+  bool indirect;
+  tree ptrtype = build_pointer_type (type);
+
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      indirect = true;
+      size = rsize = UNITS_PER_WORD;
+      align = 0;
+    }
+  else
+    {
+      indirect = false;
+      size = int_size_in_bytes (type);
+      rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+      align = 0;
+
+      if (TARGET_ARCH64)
+	{
+	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
+	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
+	    align = 2 * UNITS_PER_WORD;
+
+	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
+	     are left-justified in their slots.  */
+	  if (AGGREGATE_TYPE_P (type))
+	    {
+	      if (size == 0)
+		size = rsize = UNITS_PER_WORD;
+	      else
+		size = rsize;
+	    }
+	}
+    }
+
+  incr = valist;
+  if (align)
+    {
+      incr = fold_build_pointer_plus_hwi (incr, align - 1);
+      incr = fold_convert (sizetype, incr);
+      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
+			  size_int (-align));
+      incr = fold_convert (ptr_type_node, incr);
+    }
+
+  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
+  addr = incr;
+
+  if (BYTES_BIG_ENDIAN && size < rsize)
+    addr = fold_build_pointer_plus_hwi (incr, rsize - size);
+
+  if (indirect)
+    {
+      addr = fold_convert (build_pointer_type (ptrtype), addr);
+      addr = build_va_arg_indirect_ref (addr);
+    }
+
+  /* If the address isn't aligned properly for the type, we need a temporary.
+     FIXME: This is inefficient, usually we can do this in registers.  */
+  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
+    {
+      tree tmp = create_tmp_var (type, "va_arg_tmp");
+      tree dest_addr = build_fold_addr_expr (tmp);
+      tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
+				   3, dest_addr, addr, size_int (rsize));
+      TREE_ADDRESSABLE (tmp) = 1;
+      gimplify_and_add (copy, pre_p);
+      addr = dest_addr;
+    }
+
+  else
+    addr = fold_convert (ptrtype, addr);
+
+  incr = fold_build_pointer_plus_hwi (incr, rsize);
+  gimplify_assign (valist, incr, post_p);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
+   Specify whether the vector mode is supported by the hardware.  */
+
+static bool
+sparc_vector_mode_supported_p (enum machine_mode mode)
+{
+  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
+}
+
+/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
+
+static enum machine_mode
+sparc_preferred_simd_mode (enum machine_mode mode)
+{
+  if (TARGET_VIS)
+    switch (mode)
+      {
+      case SImode:
+	return V2SImode;
+      case HImode:
+	return V4HImode;
+      case QImode:
+	return V8QImode;
+
+      default:;
+      }
+
+  return word_mode;
+}
+
+/* Return the string to output an unconditional branch to LABEL, which is
+   the operand number of the label.
+
+   DEST is the destination insn (i.e. the label), INSN is the source.  */
+
+const char *
+output_ubranch (rtx dest, rtx insn)
+{
+  static char string[64];
+  bool v9_form = false;
+  int delta;
+  char *p;
+
+  /* Even if we are trying to use cbcond for this, evaluate
+     whether we can use V9 branches as our backup plan.  */
+
+  delta = 5000000;
+  if (INSN_ADDRESSES_SET_P ())
+    delta = (INSN_ADDRESSES (INSN_UID (dest))
+	     - INSN_ADDRESSES (INSN_UID (insn)));
+
+  /* Leave some instructions for "slop".  */
+  if (TARGET_V9 && delta >= -260000 && delta < 260000)
+    v9_form = true;
+
+  if (TARGET_CBCOND)
+    {
+      bool emit_nop = emit_cbcond_nop (insn);
+      bool far = false;
+      const char *rval;
+
+      if (delta < -500 || delta > 500)
+	far = true;
+
+      if (far)
+	{
+	  if (v9_form)
+	    rval = "ba,a,pt\t%%xcc, %l0";
+	  else
+	    rval = "b,a\t%l0";
+	}
+      else
+	{
+	  if (emit_nop)
+	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
+	  else
+	    rval = "cwbe\t%%g0, %%g0, %l0";
+	}
+      return rval;
+    }
+
+  if (v9_form)
+    strcpy (string, "ba%*,pt\t%%xcc, ");
+  else
+    strcpy (string, "b%*\t");
+
+  p = strchr (string, '\0');
+  *p++ = '%';
+  *p++ = 'l';
+  *p++ = '0';
+  *p++ = '%';
+  *p++ = '(';
+  *p = '\0';
+
+  return string;
+}
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label.  OP is the conditional expression.
+   XEXP (OP, 0) is assumed to be a condition code register (integer or
+   floating point) and its mode specifies what kind of comparison we made.
+
+   DEST is the destination insn (i.e. the label), INSN is the source.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   ANNUL is nonzero if we should generate an annulling branch.  */
+
+const char *
+output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
+		rtx insn)
+{
+  static char string[64];
+  enum rtx_code code = GET_CODE (op);
+  rtx cc_reg = XEXP (op, 0);
+  enum machine_mode mode = GET_MODE (cc_reg);
+  const char *labelno, *branch;
+  int spaces = 8, far;
+  char *p;
+
+  /* v9 branches are limited to +-1MB.  If it is too far away,
+     change
+
+     bne,pt %xcc, .LC30
+
+     to
+
+     be,pn %xcc, .+12
+      nop
+     ba .LC30
+
+     and
+
+     fbne,a,pn %fcc2, .LC29
+
+     to
+
+     fbe,pt %fcc2, .+16
+      nop
+     ba .LC29  */
+
+  far = TARGET_V9 && (get_attr_length (insn) >= 3);
+  if (reversed ^ far)
+    {
+      /* Reversal of FP compares takes care -- an ordered compare
+	 becomes an unordered compare and vice versa.  */
+      if (mode == CCFPmode || mode == CCFPEmode)
+	code = reverse_condition_maybe_unordered (code);
+      else
+	code = reverse_condition (code);
+    }
+
+  /* Start by writing the branch condition.  */
+  if (mode == CCFPmode || mode == CCFPEmode)
+    {
+      switch (code)
+	{
+	case NE:
+	  branch = "fbne";
+	  break;
+	case EQ:
+	  branch = "fbe";
+	  break;
+	case GE:
+	  branch = "fbge";
+	  break;
+	case GT:
+	  branch = "fbg";
+	  break;
+	case LE:
+	  branch = "fble";
+	  break;
+	case LT:
+	  branch = "fbl";
+	  break;
+	case UNORDERED:
+	  branch = "fbu";
+	  break;
+	case ORDERED:
+	  branch = "fbo";
+	  break;
+	case UNGT:
+	  branch = "fbug";
+	  break;
+	case UNLT:
+	  branch = "fbul";
+	  break;
+	case UNEQ:
+	  branch = "fbue";
+	  break;
+	case UNGE:
+	  branch = "fbuge";
+	  break;
+	case UNLE:
+	  branch = "fbule";
+	  break;
+	case LTGT:
+	  branch = "fblg";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* ??? !v9: FP branches cannot be preceded by another floating point
+	 insn.  Because there is currently no concept of pre-delay slots,
+	 we can fix this only by always emitting a nop before a floating
+	 point branch.  */
+
+      string[0] = '\0';
+      if (! TARGET_V9)
+	strcpy (string, "nop\n\t");
+      strcat (string, branch);
+    }
+  else
+    {
+      switch (code)
+	{
+	case NE:
+	  branch = "bne";
+	  break;
+	case EQ:
+	  branch = "be";
+	  break;
+	case GE:
+	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	    branch = "bpos";
+	  else
+	    branch = "bge";
+	  break;
+	case GT:
+	  branch = "bg";
+	  break;
+	case LE:
+	  branch = "ble";
+	  break;
+	case LT:
+	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	    branch = "bneg";
+	  else
+	    branch = "bl";
+	  break;
+	case GEU:
+	  branch = "bgeu";
+	  break;
+	case GTU:
+	  branch = "bgu";
+	  break;
+	case LEU:
+	  branch = "bleu";
+	  break;
+	case LTU:
+	  branch = "blu";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      strcpy (string, branch);
+    }
+  spaces -= strlen (branch);
+  p = strchr (string, '\0');
+
+  /* Now add the annulling, the label, and a possible noop.  */
+  if (annul && ! far)
+    {
+      strcpy (p, ",a");
+      p += 2;
+      spaces -= 2;
+    }
+
+  if (TARGET_V9)
+    {
+      rtx note;
+      int v8 = 0;
+
+      if (! far && insn && INSN_ADDRESSES_SET_P ())
+	{
+	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
+		       - INSN_ADDRESSES (INSN_UID (insn)));
+	  /* Leave some instructions for "slop".  */
+	  if (delta < -260000 || delta >= 260000)
+	    v8 = 1;
+	}
+
+      if (mode == CCFPmode || mode == CCFPEmode)
+	{
+	  static char v9_fcc_labelno[] = "%%fccX, ";
+	  /* Set the char indicating the number of the fcc reg to use.  */
+	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
+	  labelno = v9_fcc_labelno;
+	  if (v8)
+	    {
+	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
+	      labelno = "";
+	    }
+	}
+      else if (mode == CCXmode || mode == CCX_NOOVmode)
+	{
+	  labelno = "%%xcc, ";
+	  gcc_assert (! v8);
+	}
+      else
+	{
+	  labelno = "%%icc, ";
+	  if (v8)
+	    labelno = "";
+	}
+
+      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
+	{
+	  strcpy (p,
+		  ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
+		  ? ",pt" : ",pn");
+	  p += 3;
+	  spaces -= 3;
+	}
+    }
+  else
+    labelno = "";
+
+  if (spaces > 0)
+    *p++ = '\t';
+  else
+    *p++ = ' ';
+  strcpy (p, labelno);
+  p = strchr (p, '\0');
+  if (far)
+    {
+      strcpy (p, ".+12\n\t nop\n\tb\t");
+      /* Skip the next insn if requested or
+	 if we know that it will be a nop.  */
+      if (annul || ! final_sequence)
+        p[3] = '6';
+      p += 14;
+    }
+  *p++ = '%';
+  *p++ = 'l';
+  *p++ = label + '0';
+  *p++ = '%';
+  *p++ = '#';
+  *p = '\0';
+
+  return string;
+}
+
+/* Emit a library call comparison between floating point X and Y.
+   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
+   Return the new operator to be used in the comparison sequence.
+
+   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
+   values as arguments instead of the TFmode registers themselves,
+   that's why we cannot call emit_float_lib_cmp.  */
+
+rtx
+sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
+{
+  const char *qpfunc;
+  rtx slot0, slot1, result, tem, tem2, libfunc;
+  enum machine_mode mode;
+  enum rtx_code new_comparison;
+
+  switch (comparison)
+    {
+    case EQ:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
+      break;
+
+    case NE:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
+      break;
+
+    case GT:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
+      break;
+
+    case GE:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
+      break;
+
+    case LT:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
+      break;
+
+    case LE:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
+      break;
+
+    case ORDERED:
+    case UNORDERED:
+    case UNGT:
+    case UNLT:
+    case UNEQ:
+    case UNGE:
+    case UNLE:
+    case LTGT:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (TARGET_ARCH64)
+    {
+      if (MEM_P (x))
+	{
+	  tree expr = MEM_EXPR (x);
+	  if (expr)
+	    mark_addressable (expr);
+	  slot0 = x;
+	}
+      else
+	{
+	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
+	  emit_move_insn (slot0, x);
+	}
+
+      if (MEM_P (y))
+	{
+	  tree expr = MEM_EXPR (y);
+	  if (expr)
+	    mark_addressable (expr);
+	  slot1 = y;
+	}
+      else
+	{
+	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
+	  emit_move_insn (slot1, y);
+	}
+
+      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+      emit_library_call (libfunc, LCT_NORMAL,
+			 DImode, 2,
+			 XEXP (slot0, 0), Pmode,
+			 XEXP (slot1, 0), Pmode);
+      mode = DImode;
+    }
+  else
+    {
+      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+      emit_library_call (libfunc, LCT_NORMAL,
+			 SImode, 2,
+			 x, TFmode, y, TFmode);
+      mode = SImode;
+    }
+
+
+  /* Immediately move the result of the libcall into a pseudo
+     register so reload doesn't clobber the value if it needs
+     the return register for a spill reg.  */
+  result = gen_reg_rtx (mode);
+  emit_move_insn (result, hard_libcall_value (mode, libfunc));
+
+  switch (comparison)
+    {
+    default:
+      return gen_rtx_NE (VOIDmode, result, const0_rtx);
+    case ORDERED:
+    case UNORDERED:
+      new_comparison = (comparison == UNORDERED ? EQ : NE);
+      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
+    case UNGT:
+    case UNGE:
+      new_comparison = (comparison == UNGT ? GT : NE);
+      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
+    case UNLE:
+      return gen_rtx_NE (VOIDmode, result, const2_rtx);
+    case UNLT:
+      tem = gen_reg_rtx (mode);
+      if (TARGET_ARCH32)
+	emit_insn (gen_andsi3 (tem, result, const1_rtx));
+      else
+	emit_insn (gen_anddi3 (tem, result, const1_rtx));
+      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
+    case UNEQ:
+    case LTGT:
+      tem = gen_reg_rtx (mode);
+      if (TARGET_ARCH32)
+	emit_insn (gen_addsi3 (tem, result, const1_rtx));
+      else
+	emit_insn (gen_adddi3 (tem, result, const1_rtx));
+      tem2 = gen_reg_rtx (mode);
+      if (TARGET_ARCH32)
+	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
+      else
+	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
+      new_comparison = (comparison == UNEQ ? EQ : NE);
+      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Generate an unsigned DImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.  */
+
+void
+sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+
+  out = operands[0];
+  in = force_reg (DImode, operands[1]);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
+  emit_insn (gen_anddi3 (i1, in, const1_rtx));
+  emit_insn (gen_iordi3 (i0, i0, i1));
+  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* Generate an FP to unsigned DImode conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.  */
+
+void
+sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
+{
+  rtx neglab, donelab, i0, i1, f0, in, out, limit;
+
+  out = operands[0];
+  in = force_reg (mode, operands[1]);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  limit = gen_reg_rtx (mode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_move_insn (limit,
+		  CONST_DOUBLE_FROM_REAL_VALUE (
+		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
+  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  out,
+			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  i0,
+			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
+  emit_insn (gen_movdi (i1, const1_rtx));
+  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
+  emit_insn (gen_xordi3 (out, i0, i1));
+
+  emit_label (donelab);
+}
+
+/* Return the string to output a compare and branch instruction to DEST.
+   DEST is the destination insn (i.e. the label), INSN is the source,
+   and OP is the conditional expression.  */
+
+const char *
+output_cbcond (rtx op, rtx dest, rtx insn)
+{
+  enum machine_mode mode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+  const char *cond_str, *tmpl;
+  int far, emit_nop, len;
+  static char string[64];
+  char size_char;
+
+  /* Compare and Branch is limited to +-2KB.  If it is too far away,
+     change
+
+     cxbne X, Y, .LC30
+
+     to
+
+     cxbe X, Y, .+16
+     nop
+     ba,pt xcc, .LC30
+      nop  */
+
+  len = get_attr_length (insn);
+
+  far = len == 4;
+  emit_nop = len == 2;
+
+  if (far)
+    code = reverse_condition (code);
+
+  size_char = ((mode == SImode) ? 'w' : 'x');
+
+  switch (code)
+    {
+    case NE:
+      cond_str = "ne";
+      break;
+
+    case EQ:
+      cond_str = "e";
+      break;
+
+    case GE:
+      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	cond_str = "pos";
+      else
+	cond_str = "ge";
+      break;
+
+    case GT:
+      cond_str = "g";
+      break;
+
+    case LE:
+      cond_str = "le";
+      break;
+
+    case LT:
+      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	cond_str = "neg";
+      else
+	cond_str = "l";
+      break;
+
+    case GEU:
+      cond_str = "cc";
+      break;
+
+    case GTU:
+      cond_str = "gu";
+      break;
+
+    case LEU:
+      cond_str = "leu";
+      break;
+
+    case LTU:
+      cond_str = "cs";
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (far)
+    {
+      int veryfar = 1, delta;
+
+      if (INSN_ADDRESSES_SET_P ())
+	{
+	  delta = (INSN_ADDRESSES (INSN_UID (dest))
+		   - INSN_ADDRESSES (INSN_UID (insn)));
+	  /* Leave some instructions for "slop".  */
+	  if (delta >= -260000 && delta < 260000)
+	    veryfar = 0;
+	}
+
+      if (veryfar)
+	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
+      else
+	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
+    }
+  else
+    {
+      if (emit_nop)
+	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
+      else
+	tmpl = "c%cb%s\t%%1, %%2, %%3";
+    }
+
+  snprintf (string, sizeof(string), tmpl, size_char, cond_str);
+
+  return string;
+}
+
+/* Return the string to output a conditional branch to LABEL, testing
+   register REG.  LABEL is the operand number of the label; REG is the
+   operand number of the reg.  OP is the conditional expression.  The mode
+   of REG says what kind of comparison we made.
+
+   DEST is the destination insn (i.e. the label), INSN is the source.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   ANNUL is nonzero if we should generate an annulling branch.  */
+
+const char *
+output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
+		 int annul, rtx insn)
+{
+  static char string[64];
+  enum rtx_code code = GET_CODE (op);
+  enum machine_mode mode = GET_MODE (XEXP (op, 0));
+  rtx note;
+  int far;
+  char *p;
+
+  /* branch on register are limited to +-128KB.  If it is too far away,
+     change
+
+     brnz,pt %g1, .LC30
+
+     to
+
+     brz,pn %g1, .+12
+      nop
+     ba,pt %xcc, .LC30
+
+     and
+
+     brgez,a,pn %o1, .LC29
+
+     to
+
+     brlz,pt %o1, .+16
+      nop
+     ba,pt %xcc, .LC29  */
+
+  far = get_attr_length (insn) >= 3;
+
+  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
+  if (reversed ^ far)
+    code = reverse_condition (code);
+
+  /* Only 64 bit versions of these instructions exist.  */
+  gcc_assert (mode == DImode);
+
+  /* Start by writing the branch condition.  */
+
+  switch (code)
+    {
+    case NE:
+      strcpy (string, "brnz");
+      break;
+
+    case EQ:
+      strcpy (string, "brz");
+      break;
+
+    case GE:
+      strcpy (string, "brgez");
+      break;
+
+    case LT:
+      strcpy (string, "brlz");
+      break;
+
+    case LE:
+      strcpy (string, "brlez");
+      break;
+
+    case GT:
+      strcpy (string, "brgz");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  p = strchr (string, '\0');
+
+  /* Now add the annulling, reg, label, and nop.  */
+  if (annul && ! far)
+    {
+      strcpy (p, ",a");
+      p += 2;
+    }
+
+  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
+    {
+      strcpy (p,
+	      ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
+	      ? ",pt" : ",pn");
+      p += 3;
+    }
+
+  *p = p < string + 8 ? '\t' : ' ';
+  p++;
+  *p++ = '%';
+  *p++ = '0' + reg;
+  *p++ = ',';
+  *p++ = ' ';
+  if (far)
+    {
+      int veryfar = 1, delta;
+
+      if (INSN_ADDRESSES_SET_P ())
+	{
+	  delta = (INSN_ADDRESSES (INSN_UID (dest))
+		   - INSN_ADDRESSES (INSN_UID (insn)));
+	  /* Leave some instructions for "slop".  */
+	  if (delta >= -260000 && delta < 260000)
+	    veryfar = 0;
+	}
+
+      strcpy (p, ".+12\n\t nop\n\t");
+      /* Skip the next insn if requested or
+	 if we know that it will be a nop.  */
+      if (annul || ! final_sequence)
+        p[3] = '6';
+      p += 12;
+      if (veryfar)
+	{
+	  strcpy (p, "b\t");
+	  p += 2;
+	}
+      else
+	{
+	  strcpy (p, "ba,pt\t%%xcc, ");
+	  p += 13;
+	}
+    }
+  *p++ = '%';
+  *p++ = 'l';
+  *p++ = '0' + label;
+  *p++ = '%';
+  *p++ = '#';
+  *p = '\0';
+
+  return string;
+}
+
+/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
+   Such instructions cannot be used in the delay slot of return insn on v9.
+   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
+ */
+
+static int
+epilogue_renumber (register rtx *where, int test)
+{
+  register const char *fmt;
+  register int i;
+  register enum rtx_code code;
+
+  if (*where == 0)
+    return 0;
+
+  code = GET_CODE (*where);
+
+  switch (code)
+    {
+    case REG:
+      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
+	return 1;
+      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
+	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
+    case SCRATCH:
+    case CC0:
+    case PC:
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return 0;
+
+      /* Do not replace the frame pointer with the stack pointer because
+	 it can cause the delayed instruction to load below the stack.
+	 This occurs when instructions like:
+
+	 (set (reg/i:SI 24 %i0)
+	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
+                       (const_int -20 [0xffffffec])) 0))
+
+	 are in the return delayed slot.  */
+    case PLUS:
+      if (GET_CODE (XEXP (*where, 0)) == REG
+	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
+	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
+	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
+	return 1;
+      break;
+
+    case MEM:
+      if (SPARC_STACK_BIAS
+	  && GET_CODE (XEXP (*where, 0)) == REG
+	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
+	return 1;
+      break;
+
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
+	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
+	      return 1;
+	}
+      else if (fmt[i] == 'e'
+	       && epilogue_renumber (&(XEXP (*where, i)), test))
+	return 1;
+    }
+  return 0;
+}
+
+/* Leaf functions and non-leaf functions have different needs.  */
+
+static const int
+reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
+
+static const int
+reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
+
+static const int *const reg_alloc_orders[] = {
+  reg_leaf_alloc_order,
+  reg_nonleaf_alloc_order};
+
+void
+order_regs_for_local_alloc (void)
+{
+  static int last_order_nonleaf = 1;
+
+  if (df_regs_ever_live_p (15) != last_order_nonleaf)
+    {
+      last_order_nonleaf = !last_order_nonleaf;
+      memcpy ((char *) reg_alloc_order,
+	      (const char *) reg_alloc_orders[last_order_nonleaf],
+	      FIRST_PSEUDO_REGISTER * sizeof (int));
+    }
+}
+
+/* Return 1 if REG and MEM are legitimate enough to allow the various
+   mem<-->reg splits to be run.  */
+
+int
+sparc_splitdi_legitimate (rtx reg, rtx mem)
+{
+  /* Punt if we are here by mistake.  */
+  gcc_assert (reload_completed);
+
+  /* We must have an offsettable memory reference.  */
+  if (! offsettable_memref_p (mem))
+    return 0;
+
+  /* If we have legitimate args for ldd/std, we do not want
+     the split to happen.  */
+  if ((REGNO (reg) % 2) == 0
+      && mem_min_alignment (mem, 8))
+    return 0;
+
+  /* Success.  */
+  return 1;
+}
+
+/* Like sparc_splitdi_legitimate but for REG <--> REG moves.  */
+
+int
+sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
+{
+  int regno1, regno2;
+
+  if (GET_CODE (reg1) == SUBREG)
+    reg1 = SUBREG_REG (reg1);
+  if (GET_CODE (reg1) != REG)
+    return 0;
+  regno1 = REGNO (reg1);
+
+  if (GET_CODE (reg2) == SUBREG)
+    reg2 = SUBREG_REG (reg2);
+  if (GET_CODE (reg2) != REG)
+    return 0;
+  regno2 = REGNO (reg2);
+
+  if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
+    return 1;
+
+  if (TARGET_VIS3)
+    {
+      if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
+	  || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return 1 if x and y are some kind of REG and they refer to
+   different hard registers.  This test is guaranteed to be
+   run after reload.  */
+
+int
+sparc_absnegfloat_split_legitimate (rtx x, rtx y)
+{
+  if (GET_CODE (x) != REG)
+    return 0;
+  if (GET_CODE (y) != REG)
+    return 0;
+  if (REGNO (x) == REGNO (y))
+    return 0;
+  return 1;
+}
+
+/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
+   This makes them candidates for using ldd and std insns.
+
+   Note reg1 and reg2 *must* be hard registers.  */
+
+int
+registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
+{
+  /* We might have been passed a SUBREG.  */
+  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
+    return 0;
+
+  if (REGNO (reg1) % 2 != 0)
+    return 0;
+
+  /* Integer ldd is deprecated in SPARC V9 */
+  if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
+    return 0;
+
+  return (REGNO (reg1) == REGNO (reg2) - 1);
+}
+
+/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
+   an ldd or std insn.
+
+   This can only happen when addr1 and addr2, the addresses in mem1
+   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
+   addr1 must also be aligned on a 64-bit boundary.
+
+   Also iff dependent_reg_rtx is not null it should not be used to
+   compute the address for mem1, i.e. we cannot optimize a sequence
+   like:
+   	ld [%o0], %o0
+	ld [%o0 + 4], %o1
+   to
+   	ldd [%o0], %o0
+   nor:
+	ld [%g3 + 4], %g3
+	ld [%g3], %g2
+   to
+        ldd [%g3], %g2
+
+   But, note that the transformation from:
+	ld [%g2 + 4], %g3
+        ld [%g2], %g2
+   to
+	ldd [%g2], %g2
+   is perfectly fine.  Thus, the peephole2 patterns always pass us
+   the destination register of the first load, never the second one.
+
+   For stores we don't have a similar problem, so dependent_reg_rtx is
+   NULL_RTX.  */
+
+int
+mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
+{
+  rtx addr1, addr2;
+  unsigned int reg1;
+  HOST_WIDE_INT offset1;
+
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
+    return 0;
+
+  /* MEM1 should be aligned on a 64-bit boundary.  */
+  if (MEM_ALIGN (mem1) < 64)
+    return 0;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  /* Extract a register number and offset (if used) from the first addr.  */
+  if (GET_CODE (addr1) == PLUS)
+    {
+      /* If not a REG, return zero.  */
+      if (GET_CODE (XEXP (addr1, 0)) != REG)
+	return 0;
+      else
+	{
+          reg1 = REGNO (XEXP (addr1, 0));
+	  /* The offset must be constant!  */
+	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
+            return 0;
+          offset1 = INTVAL (XEXP (addr1, 1));
+	}
+    }
+  else if (GET_CODE (addr1) != REG)
+    return 0;
+  else
+    {
+      reg1 = REGNO (addr1);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset1 = 0;
+    }
+
+  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
+  if (GET_CODE (addr2) != PLUS)
+    return 0;
+
+  if (GET_CODE (XEXP (addr2, 0)) != REG
+      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
+    return 0;
+
+  if (reg1 != REGNO (XEXP (addr2, 0)))
+    return 0;
+
+  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
+    return 0;
+
+  /* The first offset must be evenly divisible by 8 to ensure the
+     address is 64 bit aligned.  */
+  if (offset1 % 8 != 0)
+    return 0;
+
+  /* The offset for the second addr must be 4 more than the first addr.  */
+  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
+    return 0;
+
+  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
+     instructions.  */
+  return 1;
+}
+
+/* Return the widened memory access made of MEM1 and MEM2 in MODE.  */
+
+rtx
+widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
+{
+  rtx x = widen_memory_access (mem1, mode, 0);
+  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
+  return x;
+}
+
+/* Return 1 if reg is a pseudo, or is the first register in
+   a hard register pair.  This makes it suitable for use in
+   ldd and std insns.  */
+
+int
+register_ok_for_ldd (rtx reg)
+{
+  /* We might have been passed a SUBREG.  */
+  if (!REG_P (reg))
+    return 0;
+
+  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
+    return (REGNO (reg) % 2 == 0);
+
+  return 1;
+}
+
+/* Return 1 if OP, a MEM, has an address which is known to be
+   aligned to an 8-byte boundary.  */
+
+int
+memory_ok_for_ldd (rtx op)
+{
+  /* In 64-bit mode, we assume that the address is word-aligned.  */
+  if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
+    return 0;
+
+  if (! can_create_pseudo_p ()
+      && !strict_memory_address_p (Pmode, XEXP (op, 0)))
+    return 0;
+
+  return 1;
+}
+
+/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+sparc_print_operand_punct_valid_p (unsigned char code)
+{
+  if (code == '#'
+      || code == '*'
+      || code == '('
+      || code == ')'
+      || code == '_'
+      || code == '&')
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_PRINT_OPERAND.
+   Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+static void
+sparc_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case '#':
+      /* Output an insn in a delay slot.  */
+      if (final_sequence)
+        sparc_indent_opcode = 1;
+      else
+	fputs ("\n\t nop", file);
+      return;
+    case '*':
+      /* Output an annul flag if there's nothing for the delay slot and we
+	 are optimizing.  This is always used with '(' below.
+         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
+	 this is a dbx bug.  So, we only do this when optimizing.
+         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
+	 Always emit a nop in case the next instruction is a branch.  */
+      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
+	fputs (",a", file);
+      return;
+    case '(':
+      /* Output a 'nop' if there's nothing for the delay slot and we are
+	 not optimizing.  This is always used with '*' above.  */
+      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
+	fputs ("\n\t nop", file);
+      else if (final_sequence)
+        sparc_indent_opcode = 1;
+      return;
+    case ')':
+      /* Output the right displacement from the saved PC on function return.
+	 The caller may have placed an "unimp" insn immediately after the call
+	 so we have to account for it.  This insn is used in the 32-bit ABI
+	 when calling a function that returns a non zero-sized structure.  The
+	 64-bit ABI doesn't have it.  Be careful to have this test be the same
+	 as that for the call.  The exception is when sparc_std_struct_return
+	 is enabled, the psABI is followed exactly and the adjustment is made
+	 by the code in sparc_struct_value_rtx.  The call emitted is the same
+	 when sparc_std_struct_return is enabled. */
+     if (!TARGET_ARCH64
+	 && cfun->returns_struct
+	 && !sparc_std_struct_return
+	 && DECL_SIZE (DECL_RESULT (current_function_decl))
+	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
+	     == INTEGER_CST
+	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
+	fputs ("12", file);
+      else
+        fputc ('8', file);
+      return;
+    case '_':
+      /* Output the Embedded Medium/Anywhere code model base register.  */
+      fputs (EMBMEDANY_BASE_REG, file);
+      return;
+    case '&':
+      /* Print some local dynamic TLS name.  */
+      assemble_name (file, get_some_local_dynamic_name ());
+      return;
+
+    case 'Y':
+      /* Adjust the operand to take into account a RESTORE operation.  */
+      if (GET_CODE (x) == CONST_INT)
+	break;
+      else if (GET_CODE (x) != REG)
+	output_operand_lossage ("invalid %%Y operand");
+      else if (REGNO (x) < 8)
+	fputs (reg_names[REGNO (x)], file);
+      else if (REGNO (x) >= 24 && REGNO (x) < 32)
+	fputs (reg_names[REGNO (x)-16], file);
+      else
+	output_operand_lossage ("invalid %%Y operand");
+      return;
+    case 'L':
+      /* Print out the low order register name of a register pair.  */
+      if (WORDS_BIG_ENDIAN)
+	fputs (reg_names[REGNO (x)+1], file);
+      else
+	fputs (reg_names[REGNO (x)], file);
+      return;
+    case 'H':
+      /* Print out the high order register name of a register pair.  */
+      if (WORDS_BIG_ENDIAN)
+	fputs (reg_names[REGNO (x)], file);
+      else
+	fputs (reg_names[REGNO (x)+1], file);
+      return;
+    case 'R':
+      /* Print out the second register name of a register pair or quad.
+	 I.e., R (%o0) => %o1.  */
+      fputs (reg_names[REGNO (x)+1], file);
+      return;
+    case 'S':
+      /* Print out the third register name of a register quad.
+	 I.e., S (%o0) => %o2.  */
+      fputs (reg_names[REGNO (x)+2], file);
+      return;
+    case 'T':
+      /* Print out the fourth register name of a register quad.
+	 I.e., T (%o0) => %o3.  */
+      fputs (reg_names[REGNO (x)+3], file);
+      return;
+    case 'x':
+      /* Print a condition code register.  */
+      if (REGNO (x) == SPARC_ICC_REG)
+	{
+	  /* We don't handle CC[X]_NOOVmode because they're not supposed
+	     to occur here.  */
+	  if (GET_MODE (x) == CCmode)
+	    fputs ("%icc", file);
+	  else if (GET_MODE (x) == CCXmode)
+	    fputs ("%xcc", file);
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	/* %fccN register */
+	fputs (reg_names[REGNO (x)], file);
+      return;
+    case 'm':
+      /* Print the operand's address only.  */
+      output_address (XEXP (x, 0));
+      return;
+    case 'r':
+      /* In this case we need a register.  Use %g0 if the
+	 operand is const0_rtx.  */
+      if (x == const0_rtx
+	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
+	{
+	  fputs ("%g0", file);
+	  return;
+	}
+      else
+	break;
+
+    case 'A':
+      switch (GET_CODE (x))
+	{
+	case IOR: fputs ("or", file); break;
+	case AND: fputs ("and", file); break;
+	case XOR: fputs ("xor", file); break;
+	default: output_operand_lossage ("invalid %%A operand");
+	}
+      return;
+
+    case 'B':
+      switch (GET_CODE (x))
+	{
+	case IOR: fputs ("orn", file); break;
+	case AND: fputs ("andn", file); break;
+	case XOR: fputs ("xnor", file); break;
+	default: output_operand_lossage ("invalid %%B operand");
+	}
+      return;
+
+      /* This is used by the conditional move instructions.  */
+    case 'C':
+      {
+	enum rtx_code rc = GET_CODE (x);
+	
+	switch (rc)
+	  {
+	  case NE: fputs ("ne", file); break;
+	  case EQ: fputs ("e", file); break;
+	  case GE: fputs ("ge", file); break;
+	  case GT: fputs ("g", file); break;
+	  case LE: fputs ("le", file); break;
+	  case LT: fputs ("l", file); break;
+	  case GEU: fputs ("geu", file); break;
+	  case GTU: fputs ("gu", file); break;
+	  case LEU: fputs ("leu", file); break;
+	  case LTU: fputs ("lu", file); break;
+	  case LTGT: fputs ("lg", file); break;
+	  case UNORDERED: fputs ("u", file); break;
+	  case ORDERED: fputs ("o", file); break;
+	  case UNLT: fputs ("ul", file); break;
+	  case UNLE: fputs ("ule", file); break;
+	  case UNGT: fputs ("ug", file); break;
+	  case UNGE: fputs ("uge", file); break;
+	  case UNEQ: fputs ("ue", file); break;
+	  default: output_operand_lossage ("invalid %%C operand");
+	  }
+	return;
+      }
+
+      /* This are used by the movr instruction pattern.  */
+    case 'D':
+      {
+	enum rtx_code rc = GET_CODE (x);
+	switch (rc)
+	  {
+	  case NE: fputs ("ne", file); break;
+	  case EQ: fputs ("e", file); break;
+	  case GE: fputs ("gez", file); break;
+	  case LT: fputs ("lz", file); break;
+	  case LE: fputs ("lez", file); break;
+	  case GT: fputs ("gz", file); break;
+	  default: output_operand_lossage ("invalid %%D operand");
+	  }
+	return;
+      }
+
+    case 'b':
+      {
+	/* Print a sign-extended character.  */
+	int i = trunc_int_for_mode (INTVAL (x), QImode);
+	fprintf (file, "%d", i);
+	return;
+      }
+
+    case 'f':
+      /* Operand must be a MEM; write its address.  */
+      if (GET_CODE (x) != MEM)
+	output_operand_lossage ("invalid %%f operand");
+      output_address (XEXP (x, 0));
+      return;
+
+    case 's':
+      {
+	/* Print a sign-extended 32-bit value.  */
+	HOST_WIDE_INT i;
+	if (GET_CODE(x) == CONST_INT)
+	  i = INTVAL (x);
+	else if (GET_CODE(x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else
+	  {
+	    output_operand_lossage ("invalid %%s operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i, SImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 0:
+      /* Do nothing special.  */
+      break;
+
+    default:
+      /* Undocumented flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  if (GET_CODE (x) == REG)
+    fputs (reg_names[REGNO (x)], file);
+  else if (GET_CODE (x) == MEM)
+    {
+      fputc ('[', file);
+	/* Poor Sun assembler doesn't understand absolute addressing.  */
+      if (CONSTANT_P (XEXP (x, 0)))
+	fputs ("%g0+", file);
+      output_address (XEXP (x, 0));
+      fputc (']', file);
+    }
+  else if (GET_CODE (x) == HIGH)
+    {
+      fputs ("%hi(", file);
+      output_addr_const (file, XEXP (x, 0));
+      fputc (')', file);
+    }
+  else if (GET_CODE (x) == LO_SUM)
+    {
+      sparc_print_operand (file, XEXP (x, 0), 0);
+      if (TARGET_CM_MEDMID)
+	fputs ("+%l44(", file);
+      else
+	fputs ("+%lo(", file);
+      output_addr_const (file, XEXP (x, 1));
+      fputc (')', file);
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE
+	   && (GET_MODE (x) == VOIDmode
+	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
+    {
+      if (CONST_DOUBLE_HIGH (x) == 0)
+	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
+      else if (CONST_DOUBLE_HIGH (x) == -1
+	       && CONST_DOUBLE_LOW (x) < 0)
+	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
+      else
+	output_operand_lossage ("long long constant not a valid immediate operand");
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    output_operand_lossage ("floating point constant not a valid immediate operand");
+  else { output_addr_const (file, x); }
+}
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+sparc_print_operand_address (FILE *file, rtx x)
+{
+  register rtx base, index = 0;
+  int offset = 0;
+  register rtx addr = x;
+
+  if (REG_P (addr))
+    fputs (reg_names[REGNO (addr)], file);
+  else if (GET_CODE (addr) == PLUS)
+    {
+      if (CONST_INT_P (XEXP (addr, 0)))
+	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
+      else if (CONST_INT_P (XEXP (addr, 1)))
+	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+      if (GET_CODE (base) == LO_SUM)
+	{
+	  gcc_assert (USE_AS_OFFSETABLE_LO10
+		      && TARGET_ARCH64
+		      && ! TARGET_CM_MEDMID);
+	  output_operand (XEXP (base, 0), 0);
+	  fputs ("+%lo(", file);
+	  output_address (XEXP (base, 1));
+	  fprintf (file, ")+%d", offset);
+	}
+      else
+	{
+	  fputs (reg_names[REGNO (base)], file);
+	  if (index == 0)
+	    fprintf (file, "%+d", offset);
+	  else if (REG_P (index))
+	    fprintf (file, "+%s", reg_names[REGNO (index)]);
+	  else if (GET_CODE (index) == SYMBOL_REF
+		   || GET_CODE (index) == LABEL_REF
+		   || GET_CODE (index) == CONST)
+	    fputc ('+', file), output_addr_const (file, index);
+	  else gcc_unreachable ();
+	}
+    }
+  else if (GET_CODE (addr) == MINUS
+	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
+    {
+      output_addr_const (file, XEXP (addr, 0));
+      fputs ("-(", file);
+      output_addr_const (file, XEXP (addr, 1));
+      fputs ("-.)", file);
+    }
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      output_operand (XEXP (addr, 0), 0);
+      if (TARGET_CM_MEDMID)
+        fputs ("+%l44(", file);
+      else
+        fputs ("+%lo(", file);
+      output_address (XEXP (addr, 1));
+      fputc (')', file);
+    }
+  else if (flag_pic
+	   && GET_CODE (addr) == CONST
+	   && GET_CODE (XEXP (addr, 0)) == MINUS
+	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
+	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
+	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
+    {
+      addr = XEXP (addr, 0);
+      output_addr_const (file, XEXP (addr, 0));
+      /* Group the args of the second CONST in parenthesis.  */
+      fputs ("-(", file);
+      /* Skip past the second CONST--it does nothing for us.  */
+      output_addr_const (file, XEXP (XEXP (addr, 1), 0));
+      /* Close the parenthesis.  */
+      fputc (')', file);
+    }
+  else
+    {
+      output_addr_const (file, addr);
+    }
+}
+
+/* Target hook for assembling integer objects.  The sparc version has
+   special handling for aligned DI-mode objects.  */
+
+static bool
+sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  /* ??? We only output .xword's for symbols and only then in environments
+     where the assembler can handle them.  */
+  if (aligned_p && size == 8
+      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
+    {
+      if (TARGET_V9)
+	{
+	  assemble_integer_with_op ("\t.xword\t", x);
+	  return true;
+	}
+      else
+	{
+	  assemble_aligned_integer (4, const0_rtx);
+	  assemble_aligned_integer (4, x);
+	  return true;
+	}
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Return the value of a code used in the .proc pseudo-op that says
+   what kind of result this function returns.  For non-C types, we pick
+   the closest C type.  */
+
+#ifndef SHORT_TYPE_SIZE
+#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
+#endif
+
+#ifndef INT_TYPE_SIZE
+#define INT_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef LONG_LONG_TYPE_SIZE
+#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
+#endif
+
+#ifndef FLOAT_TYPE_SIZE
+#define FLOAT_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef DOUBLE_TYPE_SIZE
+#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
+#endif
+
+#ifndef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
+#endif
+
+unsigned long
+sparc_type_code (register tree type)
+{
+  register unsigned long qualifiers = 0;
+  register unsigned shift;
+
+  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
+     setting more, since some assemblers will give an error for this.  Also,
+     we must be careful to avoid shifts of 32 bits or more to avoid getting
+     unpredictable results.  */
+
+  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
+    {
+      switch (TREE_CODE (type))
+	{
+	case ERROR_MARK:
+	  return qualifiers;
+
+	case ARRAY_TYPE:
+	  qualifiers |= (3 << shift);
+	  break;
+
+	case FUNCTION_TYPE:
+	case METHOD_TYPE:
+	  qualifiers |= (2 << shift);
+	  break;
+
+	case POINTER_TYPE:
+	case REFERENCE_TYPE:
+	case OFFSET_TYPE:
+	  qualifiers |= (1 << shift);
+	  break;
+
+	case RECORD_TYPE:
+	  return (qualifiers | 8);
+
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  return (qualifiers | 9);
+
+	case ENUMERAL_TYPE:
+	  return (qualifiers | 10);
+
+	case VOID_TYPE:
+	  return (qualifiers | 16);
+
+	case INTEGER_TYPE:
+	  /* If this is a range type, consider it to be the underlying
+	     type.  */
+	  if (TREE_TYPE (type) != 0)
+	    break;
+
+	  /* Carefully distinguish all the standard types of C,
+	     without messing up if the language is not C.  We do this by
+	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
+	     look at both the names and the above fields, but that's redundant.
+	     Any type whose size is between two C types will be considered
+	     to be the wider of the two types.  Also, we do not have a
+	     special code to use for "long long", so anything wider than
+	     long is treated the same.  Note that we can't distinguish
+	     between "int" and "long" in this code if they are the same
+	     size, but that's fine, since neither can the assembler.  */
+
+	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
+
+	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
+
+	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
+
+	  else
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
+
+	case REAL_TYPE:
+	  /* If this is a range type, consider it to be the underlying
+	     type.  */
+	  if (TREE_TYPE (type) != 0)
+	    break;
+
+	  /* Carefully distinguish all the standard types of C,
+	     without messing up if the language is not C.  */
+
+	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
+	    return (qualifiers | 6);
+
+	  else
+	    return (qualifiers | 7);
+
+	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
+	  /* ??? We need to distinguish between double and float complex types,
+	     but I don't know how yet because I can't reach this code from
+	     existing front-ends.  */
+	  return (qualifiers | 7);	/* Who knows? */
+
+	case VECTOR_TYPE:
+	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
+	case LANG_TYPE:
+	case NULLPTR_TYPE:
+	  return qualifiers;
+
+	default:
+	  gcc_unreachable ();		/* Not a type! */
+        }
+    }
+
+  return qualifiers;
+}
+
+/* Nested function support.  */
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.
+
+   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
+   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
+   (to store insns).  This is a bit excessive.  Perhaps a different
+   mechanism would be better here.
+
+   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
+
+static void
+sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
+{
+  /* SPARC 32-bit trampoline:
+
+ 	sethi	%hi(fn), %g1
+ 	sethi	%hi(static), %g2
+ 	jmp	%g1+%lo(fn)
+ 	or	%g2, %lo(static), %g2
+
+    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
+    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
+   */
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 0),
+     expand_binop (SImode, ior_optab,
+		   expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
+		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 4),
+     expand_binop (SImode, ior_optab,
+		   expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
+		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 8),
+     expand_binop (SImode, ior_optab,
+		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
+		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 12),
+     expand_binop (SImode, ior_optab,
+		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
+		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
+     aligned on a 16 byte boundary so one flush clears it all.  */
+  emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
+  if (sparc_cpu != PROCESSOR_ULTRASPARC
+      && sparc_cpu != PROCESSOR_ULTRASPARC3
+      && sparc_cpu != PROCESSOR_NIAGARA
+      && sparc_cpu != PROCESSOR_NIAGARA2
+      && sparc_cpu != PROCESSOR_NIAGARA3
+      && sparc_cpu != PROCESSOR_NIAGARA4)
+    emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
+
+  /* Call __enable_execute_stack after writing onto the stack to make sure
+     the stack address is accessible.  */
+#ifdef HAVE_ENABLE_EXECUTE_STACK
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+
+}
+
+/* The 64-bit version is simpler because it makes more sense to load the
+   values as "immediate" data out of the trampoline.  It's also easier since
+   we can read the PC without clobbering a register.  */
+
+static void
+sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
+{
+  /* SPARC 64-bit trampoline:
+
+	rd	%pc, %g1
+	ldx	[%g1+24], %g5
+	jmp	%g5
+	ldx	[%g1+16], %g5
+	+16 bytes data
+   */
+
+  emit_move_insn (adjust_address (m_tramp, SImode, 0),
+		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
+  emit_move_insn (adjust_address (m_tramp, SImode, 4),
+		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
+  emit_move_insn (adjust_address (m_tramp, SImode, 8),
+		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
+  emit_move_insn (adjust_address (m_tramp, SImode, 12),
+		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
+  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
+  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
+  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
+
+  if (sparc_cpu != PROCESSOR_ULTRASPARC
+      && sparc_cpu != PROCESSOR_ULTRASPARC3
+      && sparc_cpu != PROCESSOR_NIAGARA
+      && sparc_cpu != PROCESSOR_NIAGARA2
+      && sparc_cpu != PROCESSOR_NIAGARA3
+      && sparc_cpu != PROCESSOR_NIAGARA4)
+    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
+
+  /* Call __enable_execute_stack after writing onto the stack to make sure
+     the stack address is accessible.  */
+#ifdef HAVE_ENABLE_EXECUTE_STACK
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+}
+
+/* Worker for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
+  cxt = force_reg (Pmode, cxt);
+  if (TARGET_ARCH64)
+    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
+  else
+    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type;
+
+  if (! recog_memoized (insn))
+    return 0;
+
+  insn_type = get_attr_type (insn);
+
+  if (REG_NOTE_KIND (link) == 0)
+    {
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */
+
+      /* if a load, then the dependence must be on the memory address;
+	 add an extra "cycle".  Note that the cost could be two cycles
+	 if the reg was written late in an instruction group; we ca not tell
+	 here.  */
+      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
+	return cost + 3;
+
+      /* Get the delay only if the address of the store is the dependence.  */
+      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
+	{
+	  rtx pat = PATTERN(insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;  /* This should not happen!  */
+
+	  /* The dependency between the two instructions was on the data that
+	     is being stored.  Assume that this implies that the address of the
+	     store is not dependent.  */
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return cost;
+
+	  return cost + 3;  /* An approximation.  */
+	}
+
+      /* A shift instruction cannot receive its data from an instruction
+	 in the same cycle; add a one cycle penalty.  */
+      if (insn_type == TYPE_SHIFT)
+	return cost + 3;   /* Split before cascade into shift.  */
+    }
+  else
+    {
+      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
+	 INSN writes some cycles later.  */
+
+      /* These are only significant for the fpu unit; writing a fp reg before
+         the fpu has finished with it stalls the processor.  */
+
+      /* Reusing an integer register causes no problems.  */
+      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+	return 0;
+    }
+	
+  return cost;
+}
+
+static int
+hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type, dep_type;
+  rtx pat = PATTERN(insn);
+  rtx dep_pat = PATTERN (dep_insn);
+
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
+
+  insn_type = get_attr_type (insn);
+  dep_type = get_attr_type (dep_insn);
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case 0:
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */
+
+      switch (insn_type)
+	{
+	case TYPE_STORE:
+	case TYPE_FPSTORE:
+	  /* Get the delay iff the address of the store is the dependence.  */
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;
+
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return cost;
+	  return cost + 3;
+
+	case TYPE_LOAD:
+	case TYPE_SLOAD:
+	case TYPE_FPLOAD:
+	  /* If a load, then the dependence must be on the memory address.  If
+	     the addresses aren't equal, then it might be a false dependency */
+	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+	    {
+	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+		  || GET_CODE (SET_DEST (dep_pat)) != MEM
+		  || GET_CODE (SET_SRC (pat)) != MEM
+		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
+				    XEXP (SET_SRC (pat), 0)))
+		return cost + 2;
+
+	      return cost + 8;
+	    }
+	  break;
+
+	case TYPE_BRANCH:
+	  /* Compare to branch latency is 0.  There is no benefit from
+	     separating compare and branch.  */
+	  if (dep_type == TYPE_COMPARE)
+	    return 0;
+	  /* Floating point compare to branch latency is less than
+	     compare to conditional move.  */
+	  if (dep_type == TYPE_FPCMP)
+	    return cost - 1;
+	  break;
+	default:
+	  break;
+	}
+	break;
+
+    case REG_DEP_ANTI:
+      /* Anti-dependencies only penalize the fpu unit.  */
+      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+        return 0;
+      break;
+
+    default:
+      break;
+    }
+
+  return cost;
+}
+
+static int
+sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
+{
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_SUPERSPARC:
+      cost = supersparc_adjust_cost (insn, link, dep, cost);
+      break;
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      cost = hypersparc_adjust_cost (insn, link, dep, cost);
+      break;
+    default:
+      break;
+    }
+  return cost;
+}
+
+static void
+sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		  int sched_verbose ATTRIBUTE_UNUSED,
+		  int max_ready ATTRIBUTE_UNUSED)
+{}
+
+static int
+sparc_use_sched_lookahead (void)
+{
+  if (sparc_cpu == PROCESSOR_NIAGARA
+      || sparc_cpu == PROCESSOR_NIAGARA2
+      || sparc_cpu == PROCESSOR_NIAGARA3)
+    return 0;
+  if (sparc_cpu == PROCESSOR_NIAGARA4)
+    return 2;
+  if (sparc_cpu == PROCESSOR_ULTRASPARC
+      || sparc_cpu == PROCESSOR_ULTRASPARC3)
+    return 4;
+  if ((1 << sparc_cpu) &
+      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
+       (1 << PROCESSOR_SPARCLITE86X)))
+    return 3;
+  return 0;
+}
+
+static int
+sparc_issue_rate (void)
+{
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_NIAGARA:
+    case PROCESSOR_NIAGARA2:
+    case PROCESSOR_NIAGARA3:
+    default:
+      return 1;
+    case PROCESSOR_NIAGARA4:
+    case PROCESSOR_V9:
+      /* Assume V9 processors are capable of at least dual-issue.  */
+      return 2;
+    case PROCESSOR_SUPERSPARC:
+      return 3;
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      return 2;
+    case PROCESSOR_ULTRASPARC:
+    case PROCESSOR_ULTRASPARC3:
+      return 4;
+    }
+}
+
+static int
+set_extends (rtx insn)
+{
+  register rtx pat = PATTERN (insn);
+
+  switch (GET_CODE (SET_SRC (pat)))
+    {
+      /* Load and some shift instructions zero extend.  */
+    case MEM:
+    case ZERO_EXTEND:
+      /* sethi clears the high bits */
+    case HIGH:
+      /* LO_SUM is used with sethi.  sethi cleared the high
+	 bits and the values used with lo_sum are positive */
+    case LO_SUM:
+      /* Store flag stores 0 or 1 */
+    case LT: case LTU:
+    case GT: case GTU:
+    case LE: case LEU:
+    case GE: case GEU:
+    case EQ:
+    case NE:
+      return 1;
+    case AND:
+      {
+	rtx op0 = XEXP (SET_SRC (pat), 0);
+	rtx op1 = XEXP (SET_SRC (pat), 1);
+	if (GET_CODE (op1) == CONST_INT)
+	  return INTVAL (op1) >= 0;
+	if (GET_CODE (op0) != REG)
+	  return 0;
+	if (sparc_check_64 (op0, insn) == 1)
+	  return 1;
+	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
+      }
+    case IOR:
+    case XOR:
+      {
+	rtx op0 = XEXP (SET_SRC (pat), 0);
+	rtx op1 = XEXP (SET_SRC (pat), 1);
+	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
+	  return 0;
+	if (GET_CODE (op1) == CONST_INT)
+	  return INTVAL (op1) >= 0;
+	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
+      }
+    case LSHIFTRT:
+      return GET_MODE (SET_SRC (pat)) == SImode;
+      /* Positive integers leave the high bits zero.  */
+    case CONST_DOUBLE:
+      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
+    case CONST_INT:
+      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
+    case ASHIFTRT:
+    case SIGN_EXTEND:
+      return - (GET_MODE (SET_SRC (pat)) == SImode);
+    case REG:
+      return sparc_check_64 (SET_SRC (pat), insn);
+    default:
+      return 0;
+    }
+}
+
+/* We _ought_ to have only one kind per function, but...  */
+static GTY(()) rtx sparc_addr_diff_list;
+static GTY(()) rtx sparc_addr_list;
+
+void
+sparc_defer_case_vector (rtx lab, rtx vec, int diff)
+{
+  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
+  if (diff)
+    sparc_addr_diff_list
+      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
+  else
+    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
+}
+
+static void
+sparc_output_addr_vec (rtx vec)
+{
+  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
+  int idx, vlen = XVECLEN (body, 0);
+
+#ifdef ASM_OUTPUT_ADDR_VEC_START
+  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
+#endif
+
+#ifdef ASM_OUTPUT_CASE_LABEL
+  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
+			 NEXT_INSN (lab));
+#else
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
+#endif
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_VEC_ELT
+	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
+    }
+
+#ifdef ASM_OUTPUT_ADDR_VEC_END
+  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
+#endif
+}
+
+static void
+sparc_output_addr_diff_vec (rtx vec)
+{
+  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
+  rtx base = XEXP (XEXP (body, 0), 0);
+  int idx, vlen = XVECLEN (body, 1);
+
+#ifdef ASM_OUTPUT_ADDR_VEC_START
+  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
+#endif
+
+#ifdef ASM_OUTPUT_CASE_LABEL
+  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
+			 NEXT_INSN (lab));
+#else
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
+#endif
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_DIFF_ELT
+        (asm_out_file,
+         body,
+         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
+         CODE_LABEL_NUMBER (base));
+    }
+
+#ifdef ASM_OUTPUT_ADDR_VEC_END
+  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
+#endif
+}
+
+static void
+sparc_output_deferred_case_vectors (void)
+{
+  rtx t;
+  int align;
+
+  if (sparc_addr_list == NULL_RTX
+      && sparc_addr_diff_list == NULL_RTX)
+    return;
+
+  /* Align to cache line in the function's code section.  */
+  switch_to_section (current_function_section ());
+
+  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+  if (align > 0)
+    ASM_OUTPUT_ALIGN (asm_out_file, align);
+
+  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
+    sparc_output_addr_vec (XEXP (t, 0));
+  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
+    sparc_output_addr_diff_vec (XEXP (t, 0));
+
+  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
+}
+
+/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
+   unknown.  Return 1 if the high bits are zero, -1 if the register is
+   sign extended.  */
+int
+sparc_check_64 (rtx x, rtx insn)
+{
+  /* If a register is set only once it is safe to ignore insns this
+     code does not know how to handle.  The loop will either recognize
+     the single set and return the correct value or fail to recognize
+     it and return 0.  */
+  int set_once = 0;
+  rtx y = x;
+
+  gcc_assert (GET_CODE (x) == REG);
+
+  if (GET_MODE (x) == DImode)
+    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
+
+  if (flag_expensive_optimizations
+      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
+    set_once = 1;
+
+  if (insn == 0)
+    {
+      if (set_once)
+	insn = get_last_insn_anywhere ();
+      else
+	return 0;
+    }
+
+  while ((insn = PREV_INSN (insn)))
+    {
+      switch (GET_CODE (insn))
+	{
+	case JUMP_INSN:
+	case NOTE:
+	  break;
+	case CODE_LABEL:
+	case CALL_INSN:
+	default:
+	  if (! set_once)
+	    return 0;
+	  break;
+	case INSN:
+	  {
+	    rtx pat = PATTERN (insn);
+	    if (GET_CODE (pat) != SET)
+	      return 0;
+	    if (rtx_equal_p (x, SET_DEST (pat)))
+	      return set_extends (insn);
+	    if (y && rtx_equal_p (y, SET_DEST (pat)))
+	      return set_extends (insn);
+	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
+	      return 0;
+	  }
+	}
+    }
+  return 0;
+}
+
+/* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
+   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
+
+const char *
+output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
+{
+  static char asm_code[60];
+
+  /* The scratch register is only required when the destination
+     register is not a 64-bit global or out register.  */
+  if (which_alternative != 2)
+    operands[3] = operands[0];
+
+  /* We can only shift by constants <= 63. */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      output_asm_insn ("mov\t%1, %3", operands);
+    }
+  else
+    {
+      output_asm_insn ("sllx\t%H1, 32, %3", operands);
+      if (sparc_check_64 (operands[1], insn) <= 0)
+	output_asm_insn ("srl\t%L1, 0, %L1", operands);
+      output_asm_insn ("or\t%L1, %3, %3", operands);
+    }
+
+  strcpy (asm_code, opcode);
+
+  if (which_alternative != 2)
+    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
+  else
+    return
+      strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
+}
+
+/* Output rtl to increment the profiler label LABELNO
+   for profiling a function entry.  */
+
+void
+sparc_profile_hook (int labelno)
+{
+  char buf[32];
+  rtx lab, fun;
+
+  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
+  if (NO_PROFILE_COUNTERS)
+    {
+      emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
+    }
+  else
+    {
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+      emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
+    }
+}
+
+#ifdef TARGET_SOLARIS
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
+
+static void
+sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
+				     tree decl ATTRIBUTE_UNUSED)
+{
+  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+    {
+      solaris_elf_asm_comdat_section (name, flags, decl);
+      return;
+    }
+
+  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
+
+  if (!(flags & SECTION_DEBUG))
+    fputs (",#alloc", asm_out_file);
+  if (flags & SECTION_WRITE)
+    fputs (",#write", asm_out_file);
+  if (flags & SECTION_TLS)
+    fputs (",#tls", asm_out_file);
+  if (flags & SECTION_CODE)
+    fputs (",#execinstr", asm_out_file);
+
+  /* Sun as only supports #nobits/#progbits since Solaris 10.  */
+  if (HAVE_AS_SPARC_NOBITS)
+    {
+      if (flags & SECTION_BSS)
+	fputs (",#nobits", asm_out_file);
+      else
+	fputs (",#progbits", asm_out_file);
+    }
+
+  fputc ('\n', asm_out_file);
+}
+#endif /* TARGET_SOLARIS */
+
+/* We do not allow indirect calls to be optimized into sibling calls.
+
+   We cannot use sibling calls when delayed branches are disabled
+   because they will likely require the call delay slot to be filled.
+
+   Also, on SPARC 32-bit we cannot emit a sibling call when the
+   current function returns a structure.  This is because the "unimp
+   after call" convention would cause the callee to return to the
+   wrong place.  The generic code already disallows cases where the
+   function being called returns a structure.
+
+   It may seem strange how this last case could occur.  Usually there
+   is code after the call which jumps to epilogue code which dumps the
+   return value into the struct return area.  That ought to invalidate
+   the sibling call right?  Well, in the C++ case we can end up passing
+   the pointer to the struct return area to a constructor (which returns
+   void) and then nothing else happens.  Such a sibling call would look
+   valid without the added check here.
+
+   VxWorks PIC PLT entries require the global pointer to be initialized
+   on entry.  We therefore can't emit sibling calls to them.  */
+static bool
+sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return (decl
+	  && flag_delayed_branch
+	  && (TARGET_ARCH64 || ! cfun->returns_struct)
+	  && !(TARGET_VXWORKS_RTP
+	       && flag_pic
+	       && !targetm.binds_local_p (decl)));
+}
+
+/* libfunc renaming.  */
+
+static void
+sparc_init_libfuncs (void)
+{
+  if (TARGET_ARCH32)
+    {
+      /* Use the subroutines that Sun's library provides for integer
+	 multiply and divide.  The `*' prevents an underscore from
+	 being prepended by the compiler. .umul is a little faster
+	 than .mul.  */
+      set_optab_libfunc (smul_optab, SImode, "*.umul");
+      set_optab_libfunc (sdiv_optab, SImode, "*.div");
+      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
+      set_optab_libfunc (smod_optab, SImode, "*.rem");
+      set_optab_libfunc (umod_optab, SImode, "*.urem");
+
+      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
+      set_optab_libfunc (add_optab, TFmode, "_Q_add");
+      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
+      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
+      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
+      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
+
+      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
+	 is because with soft-float, the SFmode and DFmode sqrt
+	 instructions will be absent, and the compiler will notice and
+	 try to use the TFmode sqrt instruction for calls to the
+	 builtin function sqrt, but this fails.  */
+      if (TARGET_FPU)
+	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
+
+      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
+      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
+      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
+      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
+      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
+      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
+
+      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
+      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
+      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
+      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
+
+      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
+      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
+      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
+      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
+
+      if (DITF_CONVERSION_LIBFUNCS)
+	{
+	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
+	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
+	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
+	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
+	}
+
+      if (SUN_CONVERSION_LIBFUNCS)
+	{
+	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
+	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
+	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
+	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
+	}
+    }
+  if (TARGET_ARCH64)
+    {
+      /* In the SPARC 64bit ABI, SImode multiply and divide functions
+	 do not exist in the library.  Make sure the compiler does not
+	 emit calls to them by accident.  (It should always use the
+         hardware instructions.)  */
+      set_optab_libfunc (smul_optab, SImode, 0);
+      set_optab_libfunc (sdiv_optab, SImode, 0);
+      set_optab_libfunc (udiv_optab, SImode, 0);
+      set_optab_libfunc (smod_optab, SImode, 0);
+      set_optab_libfunc (umod_optab, SImode, 0);
+
+      if (SUN_INTEGER_MULTIPLY_64)
+	{
+	  set_optab_libfunc (smul_optab, DImode, "__mul64");
+	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
+	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
+	  set_optab_libfunc (smod_optab, DImode, "__rem64");
+	  set_optab_libfunc (umod_optab, DImode, "__urem64");
+	}
+
+      if (SUN_CONVERSION_LIBFUNCS)
+	{
+	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
+	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
+	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
+	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
+	}
+    }
+}
+
+/* SPARC builtins.  */
+enum sparc_builtins
+{
+  /* FPU builtins.  */
+  SPARC_BUILTIN_LDFSR,
+  SPARC_BUILTIN_STFSR,
+
+  /* VIS 1.0 builtins.  */
+  SPARC_BUILTIN_FPACK16,
+  SPARC_BUILTIN_FPACK32,
+  SPARC_BUILTIN_FPACKFIX,
+  SPARC_BUILTIN_FEXPAND,
+  SPARC_BUILTIN_FPMERGE,
+  SPARC_BUILTIN_FMUL8X16,
+  SPARC_BUILTIN_FMUL8X16AU,
+  SPARC_BUILTIN_FMUL8X16AL,
+  SPARC_BUILTIN_FMUL8SUX16,
+  SPARC_BUILTIN_FMUL8ULX16,
+  SPARC_BUILTIN_FMULD8SUX16,
+  SPARC_BUILTIN_FMULD8ULX16,
+  SPARC_BUILTIN_FALIGNDATAV4HI,
+  SPARC_BUILTIN_FALIGNDATAV8QI,
+  SPARC_BUILTIN_FALIGNDATAV2SI,
+  SPARC_BUILTIN_FALIGNDATADI,
+  SPARC_BUILTIN_WRGSR,
+  SPARC_BUILTIN_RDGSR,
+  SPARC_BUILTIN_ALIGNADDR,
+  SPARC_BUILTIN_ALIGNADDRL,
+  SPARC_BUILTIN_PDIST,
+  SPARC_BUILTIN_EDGE8,
+  SPARC_BUILTIN_EDGE8L,
+  SPARC_BUILTIN_EDGE16,
+  SPARC_BUILTIN_EDGE16L,
+  SPARC_BUILTIN_EDGE32,
+  SPARC_BUILTIN_EDGE32L,
+  SPARC_BUILTIN_FCMPLE16,
+  SPARC_BUILTIN_FCMPLE32,
+  SPARC_BUILTIN_FCMPNE16,
+  SPARC_BUILTIN_FCMPNE32,
+  SPARC_BUILTIN_FCMPGT16,
+  SPARC_BUILTIN_FCMPGT32,
+  SPARC_BUILTIN_FCMPEQ16,
+  SPARC_BUILTIN_FCMPEQ32,
+  SPARC_BUILTIN_FPADD16,
+  SPARC_BUILTIN_FPADD16S,
+  SPARC_BUILTIN_FPADD32,
+  SPARC_BUILTIN_FPADD32S,
+  SPARC_BUILTIN_FPSUB16,
+  SPARC_BUILTIN_FPSUB16S,
+  SPARC_BUILTIN_FPSUB32,
+  SPARC_BUILTIN_FPSUB32S,
+  SPARC_BUILTIN_ARRAY8,
+  SPARC_BUILTIN_ARRAY16,
+  SPARC_BUILTIN_ARRAY32,
+
+  /* VIS 2.0 builtins.  */
+  SPARC_BUILTIN_EDGE8N,
+  SPARC_BUILTIN_EDGE8LN,
+  SPARC_BUILTIN_EDGE16N,
+  SPARC_BUILTIN_EDGE16LN,
+  SPARC_BUILTIN_EDGE32N,
+  SPARC_BUILTIN_EDGE32LN,
+  SPARC_BUILTIN_BMASK,
+  SPARC_BUILTIN_BSHUFFLEV4HI,
+  SPARC_BUILTIN_BSHUFFLEV8QI,
+  SPARC_BUILTIN_BSHUFFLEV2SI,
+  SPARC_BUILTIN_BSHUFFLEDI,
+
+  /* VIS 3.0 builtins.  */
+  SPARC_BUILTIN_CMASK8,
+  SPARC_BUILTIN_CMASK16,
+  SPARC_BUILTIN_CMASK32,
+  SPARC_BUILTIN_FCHKSM16,
+  SPARC_BUILTIN_FSLL16,
+  SPARC_BUILTIN_FSLAS16,
+  SPARC_BUILTIN_FSRL16,
+  SPARC_BUILTIN_FSRA16,
+  SPARC_BUILTIN_FSLL32,
+  SPARC_BUILTIN_FSLAS32,
+  SPARC_BUILTIN_FSRL32,
+  SPARC_BUILTIN_FSRA32,
+  SPARC_BUILTIN_PDISTN,
+  SPARC_BUILTIN_FMEAN16,
+  SPARC_BUILTIN_FPADD64,
+  SPARC_BUILTIN_FPSUB64,
+  SPARC_BUILTIN_FPADDS16,
+  SPARC_BUILTIN_FPADDS16S,
+  SPARC_BUILTIN_FPSUBS16,
+  SPARC_BUILTIN_FPSUBS16S,
+  SPARC_BUILTIN_FPADDS32,
+  SPARC_BUILTIN_FPADDS32S,
+  SPARC_BUILTIN_FPSUBS32,
+  SPARC_BUILTIN_FPSUBS32S,
+  SPARC_BUILTIN_FUCMPLE8,
+  SPARC_BUILTIN_FUCMPNE8,
+  SPARC_BUILTIN_FUCMPGT8,
+  SPARC_BUILTIN_FUCMPEQ8,
+  SPARC_BUILTIN_FHADDS,
+  SPARC_BUILTIN_FHADDD,
+  SPARC_BUILTIN_FHSUBS,
+  SPARC_BUILTIN_FHSUBD,
+  SPARC_BUILTIN_FNHADDS,
+  SPARC_BUILTIN_FNHADDD,
+  SPARC_BUILTIN_UMULXHI,
+  SPARC_BUILTIN_XMULX,
+  SPARC_BUILTIN_XMULXHI,
+
+  SPARC_BUILTIN_MAX
+};
+
+static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
+static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
+
+/* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
+   function decl or NULL_TREE if the builtin was not added.  */
+
+static tree
+def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
+	     tree type)
+{
+  tree t
+    = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
+
+  if (t)
+    {
+      sparc_builtins[code] = t;
+      sparc_builtins_icode[code] = icode;
+    }
+
+  return t;
+}
+
+/* Likewise, but also marks the function as "const".  */
+
+static tree
+def_builtin_const (const char *name, enum insn_code icode,
+		   enum sparc_builtins code, tree type)
+{
+  tree t = def_builtin (name, icode, code, type);
+
+  if (t)
+    TREE_READONLY (t) = 1;
+
+  return t;
+}
+
+/* Implement the TARGET_INIT_BUILTINS target hook.
+   Create builtin functions for special SPARC instructions.  */
+
+static void
+sparc_init_builtins (void)
+{
+  if (TARGET_FPU)
+    sparc_fpu_init_builtins ();
+
+  if (TARGET_VIS)
+    sparc_vis_init_builtins ();
+}
+
+/* Create builtin functions for FPU instructions.  */
+
+static void
+sparc_fpu_init_builtins (void)
+{
+  tree ftype
+    = build_function_type_list (void_type_node,
+				build_pointer_type (unsigned_type_node), 0);
+  def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
+	       SPARC_BUILTIN_LDFSR, ftype);
+  def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
+	       SPARC_BUILTIN_STFSR, ftype);
+}
+
+/* Create builtin functions for VIS instructions.  */
+
+static void
+sparc_vis_init_builtins (void)
+{
+  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
+  tree v4hi = build_vector_type (intHI_type_node, 4);
+  tree v2hi = build_vector_type (intHI_type_node, 2);
+  tree v2si = build_vector_type (intSI_type_node, 2);
+  tree v1si = build_vector_type (intSI_type_node, 1);
+
+  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
+  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
+  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
+  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
+  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
+  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
+  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
+  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
+  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
+  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
+  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
+  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
+  tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
+  tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
+  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
+							 v8qi, v8qi,
+							 intDI_type_node, 0);
+  tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
+						      v8qi, v8qi, 0);
+  tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
+						      v8qi, v8qi, 0);
+  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
+						  intDI_type_node,
+						  intDI_type_node, 0);
+  tree si_ftype_si_si = build_function_type_list (intSI_type_node,
+						  intSI_type_node,
+						  intSI_type_node, 0);
+  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
+		        			    ptr_type_node,
+					            intSI_type_node, 0);
+  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
+		        			    ptr_type_node,
+					            intDI_type_node, 0);
+  tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
+		        			    ptr_type_node,
+					            ptr_type_node, 0);
+  tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
+		        			    ptr_type_node,
+					            ptr_type_node, 0);
+  tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
+						      v4hi, v4hi, 0);
+  tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
+						      v2si, v2si, 0);
+  tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
+						      v4hi, v4hi, 0);
+  tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
+						      v2si, v2si, 0);
+  tree void_ftype_di = build_function_type_list (void_type_node,
+						 intDI_type_node, 0);
+  tree di_ftype_void = build_function_type_list (intDI_type_node,
+						 void_type_node, 0);
+  tree void_ftype_si = build_function_type_list (void_type_node,
+						 intSI_type_node, 0);
+  tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
+						  float_type_node,
+						  float_type_node, 0);
+  tree df_ftype_df_df = build_function_type_list (double_type_node,
+						  double_type_node,
+						  double_type_node, 0);
+
+  /* Packing and expanding vectors.  */
+  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
+	       SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
+  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
+	       SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
+  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
+	       SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
+  def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
+		     SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
+  def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
+		     SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
+
+  /* Multiplications.  */
+  def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
+		     SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
+  def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
+		     SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
+  def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
+		     SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
+  def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
+		     SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
+  def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
+		     SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
+  def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
+		     SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
+  def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
+		     SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
+
+  /* Data aligning.  */
+  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
+	       SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
+  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
+	       SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
+  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
+	       SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
+  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
+	       SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
+
+  def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
+	       SPARC_BUILTIN_WRGSR, void_ftype_di);
+  def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
+	       SPARC_BUILTIN_RDGSR, di_ftype_void);
+
+  if (TARGET_ARCH64)
+    {
+      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
+		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
+      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
+		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
+    }
+  else
+    {
+      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
+		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
+      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
+		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
+    }
+
+  /* Pixel distance.  */
+  def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
+		     SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
+
+  /* Edge handling.  */
+  if (TARGET_ARCH64)
+    {
+      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
+			 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
+			 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
+			 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
+			 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
+			 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
+			 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
+    }
+  else
+    {
+      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
+			 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
+			 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
+			 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
+			 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
+			 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
+      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
+			 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
+    }
+
+  /* Pixel compare.  */
+  if (TARGET_ARCH64)
+    {
+      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
+			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
+			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
+			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
+			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
+			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
+			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
+			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
+			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
+    }
+  else
+    {
+      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
+			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
+			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
+			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
+			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
+			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
+			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
+			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
+			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
+    }
+
+  /* Addition and subtraction.  */
+  def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
+		     SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
+  def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
+		     SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
+  def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
+		     SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
+  def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
+		     SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
+  def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
+		     SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
+  def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
+		     SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
+  def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
+		     SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
+  def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
+		     SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
+
+  /* Three-dimensional array addressing.  */
+  if (TARGET_ARCH64)
+    {
+      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
+			 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
+      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
+			 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
+      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
+			 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
+    }
+  else
+    {
+      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
+			 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
+      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
+			 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
+      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
+			 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
+    }
+
+  if (TARGET_VIS2)
+    {
+      /* Edge handling.  */
+      if (TARGET_ARCH64)
+	{
+	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
+			     SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
+			     SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
+			     SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
+			     SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
+			     SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
+			     SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
+	}
+      else
+	{
+	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
+			     SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
+			     SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
+			     SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
+			     SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
+			     SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
+	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
+			     SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
+	}
+
+      /* Byte mask and shuffle.  */
+      if (TARGET_ARCH64)
+	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
+		     SPARC_BUILTIN_BMASK, di_ftype_di_di);
+      else
+	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
+		     SPARC_BUILTIN_BMASK, si_ftype_si_si);
+      def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
+		   SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
+      def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
+		   SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
+      def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
+		   SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
+      def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
+		   SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
+    }
+
+  if (TARGET_VIS3)
+    {
+      if (TARGET_ARCH64)
+	{
+	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
+		       SPARC_BUILTIN_CMASK8, void_ftype_di);
+	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
+		       SPARC_BUILTIN_CMASK16, void_ftype_di);
+	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
+		       SPARC_BUILTIN_CMASK32, void_ftype_di);
+	}
+      else
+	{
+	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
+		       SPARC_BUILTIN_CMASK8, void_ftype_si);
+	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
+		       SPARC_BUILTIN_CMASK16, void_ftype_si);
+	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
+		       SPARC_BUILTIN_CMASK32, void_ftype_si);
+	}
+
+      def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
+			 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
+
+      def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
+			 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
+			 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
+			 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
+			 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
+			 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
+			 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
+			 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
+			 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
+
+      if (TARGET_ARCH64)
+	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
+			   SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
+      else
+	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
+			   SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
+
+      def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
+			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
+			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
+      def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
+			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
+
+      def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
+			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
+			 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
+      def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
+			 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
+      def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
+			 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
+      def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
+			 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
+			 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
+      def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
+			 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
+      def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
+			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
+
+      if (TARGET_ARCH64)
+	{
+	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
+			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
+			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
+			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
+			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
+	}
+      else
+	{
+	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
+			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
+			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
+			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
+			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
+	}
+
+      def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
+			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
+      def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
+			 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
+      def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
+			 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
+      def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
+			 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
+      def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
+			 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
+      def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
+			 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
+
+      def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
+			 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
+      def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
+			 SPARC_BUILTIN_XMULX, di_ftype_di_di);
+      def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
+			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
+    }
+}
+
+/* Implement TARGET_BUILTIN_DECL hook.  */
+
+static tree
+sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= SPARC_BUILTIN_MAX)
+    return error_mark_node;
+
+  return sparc_builtins[code];
+}
+
+/* Implemented TARGET_EXPAND_BUILTIN hook.  */
+
+static rtx
+sparc_expand_builtin (tree exp, rtx target,
+		      rtx subtarget ATTRIBUTE_UNUSED,
+		      enum machine_mode tmode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode = sparc_builtins_icode[code];
+  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+  call_expr_arg_iterator iter;
+  int arg_count = 0;
+  rtx pat, op[4];
+  tree arg;
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	op[0] = gen_reg_rtx (tmode);
+      else
+	op[0] = target;
+    }
+
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+      int idx;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+
+      arg_count++;
+      idx = arg_count - !nonvoid;
+      insn_op = &insn_data[icode].operand[idx];
+      op[arg_count] = expand_normal (arg);
+
+      if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
+	{
+	  if (!address_operand (op[arg_count], SImode))
+	    {
+	      op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
+	      op[arg_count] = copy_addr_to_reg (op[arg_count]);
+	    }
+	  op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
+	}
+
+      else if (insn_op->mode == V1DImode
+	       && GET_MODE (op[arg_count]) == DImode)
+	op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
+
+      else if (insn_op->mode == V1SImode
+	       && GET_MODE (op[arg_count]) == SImode)
+	op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
+
+      if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
+							insn_op->mode))
+	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
+    }
+
+  switch (arg_count)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 1:
+      if (nonvoid)
+	pat = GEN_FCN (icode) (op[0], op[1]);
+      else
+	pat = GEN_FCN (icode) (op[1]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+
+  return (nonvoid ? op[0] : const0_rtx);
+}
+
+/* Return the upper 16 bits of the 8x16 multiplication.  */
+
+static int
+sparc_vis_mul8x16 (int e8, int e16)
+{
+  return (e8 * e16 + 128) / 256;
+}
+
+/* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
+   the result into the array N_ELTS, whose elements are of INNER_TYPE.  */
+
+static void
+sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
+			  tree inner_type, tree cst0, tree cst1)
+{
+  unsigned i, num = VECTOR_CST_NELTS (cst0);
+  int scale;
+
+  switch (fncode)
+    {
+    case SPARC_BUILTIN_FMUL8X16:
+      for (i = 0; i < num; ++i)
+	{
+	  int val
+	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
+				 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
+	  n_elts[i] = build_int_cst (inner_type, val);
+	}
+      break;
+
+    case SPARC_BUILTIN_FMUL8X16AU:
+      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
+
+      for (i = 0; i < num; ++i)
+	{
+	  int val
+	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
+				 scale);
+	  n_elts[i] = build_int_cst (inner_type, val);
+	}
+      break;
+
+    case SPARC_BUILTIN_FMUL8X16AL:
+      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
+
+      for (i = 0; i < num; ++i)
+	{
+	  int val
+	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
+				 scale);
+	  n_elts[i] = build_int_cst (inner_type, val);
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_FOLD_BUILTIN hook.
+
+   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
+   result of the function call is ignored.  NULL_TREE is returned if the
+   function could not be folded.  */
+
+static tree
+sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
+		    tree *args, bool ignore)
+{
+  enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
+  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
+  tree arg0, arg1, arg2;
+
+  if (ignore)
+    switch (code)
+      {
+      case SPARC_BUILTIN_LDFSR:
+      case SPARC_BUILTIN_STFSR:
+      case SPARC_BUILTIN_ALIGNADDR:
+      case SPARC_BUILTIN_WRGSR:
+      case SPARC_BUILTIN_BMASK:
+      case SPARC_BUILTIN_CMASK8:
+      case SPARC_BUILTIN_CMASK16:
+      case SPARC_BUILTIN_CMASK32:
+	break;
+
+      default:
+	return build_zero_cst (rtype);
+      }
+
+  switch (code)
+    {
+    case SPARC_BUILTIN_FEXPAND:
+      arg0 = args[0];
+      STRIP_NOPS (arg0);
+
+      if (TREE_CODE (arg0) == VECTOR_CST)
+	{
+	  tree inner_type = TREE_TYPE (rtype);
+	  tree *n_elts;
+	  unsigned i;
+
+	  n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
+	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
+	    n_elts[i] = build_int_cst (inner_type,
+				       TREE_INT_CST_LOW
+				         (VECTOR_CST_ELT (arg0, i)) << 4);
+	  return build_vector (rtype, n_elts);
+	}
+      break;
+
+    case SPARC_BUILTIN_FMUL8X16:
+    case SPARC_BUILTIN_FMUL8X16AU:
+    case SPARC_BUILTIN_FMUL8X16AL:
+      arg0 = args[0];
+      arg1 = args[1];
+      STRIP_NOPS (arg0);
+      STRIP_NOPS (arg1);
+
+      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
+	{
+	  tree inner_type = TREE_TYPE (rtype);
+	  tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
+	  sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
+	  return build_vector (rtype, n_elts);
+	}
+      break;
+
+    case SPARC_BUILTIN_FPMERGE:
+      arg0 = args[0];
+      arg1 = args[1];
+      STRIP_NOPS (arg0);
+      STRIP_NOPS (arg1);
+
+      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
+	{
+	  tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
+	  unsigned i;
+	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
+	    {
+	      n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
+	      n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
+	    }
+
+	  return build_vector (rtype, n_elts);
+	}
+      break;
+
+    case SPARC_BUILTIN_PDIST:
+    case SPARC_BUILTIN_PDISTN:
+      arg0 = args[0];
+      arg1 = args[1];
+      STRIP_NOPS (arg0);
+      STRIP_NOPS (arg1);
+      if (code == SPARC_BUILTIN_PDIST)
+	{
+	  arg2 = args[2];
+	  STRIP_NOPS (arg2);
+	}
+      else
+	arg2 = integer_zero_node;
+
+      if (TREE_CODE (arg0) == VECTOR_CST
+	  && TREE_CODE (arg1) == VECTOR_CST
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  bool overflow = false;
+	  double_int result = TREE_INT_CST (arg2);
+	  double_int tmp;
+	  unsigned i;
+
+	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
+	    {
+	      double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
+	      double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
+
+	      bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
+
+	      tmp = e1.neg_with_overflow (&neg1_ovf);
+	      tmp = e0.add_with_sign (tmp, false, &add1_ovf);
+	      if (tmp.is_negative ())
+		tmp = tmp.neg_with_overflow (&neg2_ovf);
+	      else
+		neg2_ovf = false;
+	      result = result.add_with_sign (tmp, false, &add2_ovf);
+	      overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
+	    }
+
+	  gcc_assert (!overflow);
+
+	  return build_int_cst_wide (rtype, result.low, result.high);
+	}
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* ??? This duplicates information provided to the compiler by the
+   ??? scheduler description.  Some day, teach genautomata to output
+   ??? the latencies and then CSE will just use that.  */
+
+static bool
+sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		 int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case HIGH:
+      *total = 2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 4;
+      return true;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode
+	  && ((CONST_DOUBLE_HIGH (x) == 0
+	       && CONST_DOUBLE_LOW (x) < 0x1000)
+	      || (CONST_DOUBLE_HIGH (x) == -1
+		  && CONST_DOUBLE_LOW (x) < 0
+		  && CONST_DOUBLE_LOW (x) >= -0x1000)))
+	*total = 0;
+      else
+	*total = 8;
+      return true;
+
+    case MEM:
+      /* If outer-code was a sign or zero extension, a cost
+	 of COSTS_N_INSNS (1) was already added in.  This is
+	 why we are subtracting it back out.  */
+      if (outer_code == ZERO_EXTEND)
+	{
+	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
+	}
+      else if (outer_code == SIGN_EXTEND)
+	{
+	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
+	}
+      else if (float_mode_p)
+	{
+	  *total = sparc_costs->float_load;
+	}
+      else
+	{
+	  *total = sparc_costs->int_load;
+	}
+
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (float_mode_p)
+	*total = sparc_costs->float_plusminus;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case FMA:
+      {
+	rtx sub;
+
+	gcc_assert (float_mode_p);
+	*total = sparc_costs->float_mul;
+
+	sub = XEXP (x, 0);
+	if (GET_CODE (sub) == NEG)
+	  sub = XEXP (sub, 0);
+	*total += rtx_cost (sub, FMA, 0, speed);
+
+	sub = XEXP (x, 2);
+	if (GET_CODE (sub) == NEG)
+	  sub = XEXP (sub, 0);
+	*total += rtx_cost (sub, FMA, 2, speed);
+	return true;
+      }
+
+    case MULT:
+      if (float_mode_p)
+	*total = sparc_costs->float_mul;
+      else if (! TARGET_HARD_MUL)
+	*total = COSTS_N_INSNS (25);
+      else
+	{
+	  int bit_cost;
+
+	  bit_cost = 0;
+	  if (sparc_costs->int_mul_bit_factor)
+	    {
+	      int nbits;
+
+	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+		{
+		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+		  for (nbits = 0; value != 0; value &= value - 1)
+		    nbits++;
+		}
+	      else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+		       && GET_MODE (XEXP (x, 1)) == VOIDmode)
+		{
+		  rtx x1 = XEXP (x, 1);
+		  unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
+		  unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
+
+		  for (nbits = 0; value1 != 0; value1 &= value1 - 1)
+		    nbits++;
+		  for (; value2 != 0; value2 &= value2 - 1)
+		    nbits++;
+		}
+	      else
+		nbits = 7;
+
+	      if (nbits < 3)
+		nbits = 3;
+	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
+	      bit_cost = COSTS_N_INSNS (bit_cost);
+	    }
+
+	  if (mode == DImode)
+	    *total = sparc_costs->int_mulX + bit_cost;
+	  else
+	    *total = sparc_costs->int_mul + bit_cost;
+	}
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (float_mode_p)
+	{
+	  if (mode == DFmode)
+	    *total = sparc_costs->float_div_df;
+	  else
+	    *total = sparc_costs->float_div_sf;
+	}
+      else
+	{
+	  if (mode == DImode)
+	    *total = sparc_costs->int_divX;
+	  else
+	    *total = sparc_costs->int_div;
+	}
+      return false;
+
+    case NEG:
+      if (! float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ABS:
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+      *total = sparc_costs->float_move;
+      return false;
+
+    case SQRT:
+      if (mode == DFmode)
+	*total = sparc_costs->float_sqrt_df;
+      else
+	*total = sparc_costs->float_sqrt_sf;
+      return false;
+
+    case COMPARE:
+      if (float_mode_p)
+	*total = sparc_costs->float_cmp;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case IF_THEN_ELSE:
+      if (float_mode_p)
+	*total = sparc_costs->float_cmove;
+      else
+	*total = sparc_costs->int_cmove;
+      return false;
+
+    case IOR:
+      /* Handle the NAND vector patterns.  */
+      if (sparc_vector_mode_supported_p (GET_MODE (x))
+	  && GET_CODE (XEXP (x, 0)) == NOT
+	  && GET_CODE (XEXP (x, 1)) == NOT)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+        return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if CLASS is either GENERAL_REGS or I64_REGS.  */
+
+static inline bool
+general_or_i64_p (reg_class_t rclass)
+{
+  return (rclass == GENERAL_REGS || rclass == I64_REGS);
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.  */
+
+static int
+sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  reg_class_t from, reg_class_t to)
+{
+  bool need_memory = false;
+
+  if (from == FPCC_REGS || to == FPCC_REGS)
+    need_memory = true;
+  else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
+	   || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
+    {
+      if (TARGET_VIS3)
+	{
+	  int size = GET_MODE_SIZE (mode);
+	  if (size == 8 || size == 4)
+	    {
+	      if (! TARGET_ARCH32 || size == 4)
+		return 4;
+	      else
+		return 6;
+	    }
+	}
+      need_memory = true;
+    }
+
+  if (need_memory)
+    {
+      if (sparc_cpu == PROCESSOR_ULTRASPARC
+	  || sparc_cpu == PROCESSOR_ULTRASPARC3
+	  || sparc_cpu == PROCESSOR_NIAGARA
+	  || sparc_cpu == PROCESSOR_NIAGARA2
+	  || sparc_cpu == PROCESSOR_NIAGARA3
+	  || sparc_cpu == PROCESSOR_NIAGARA4)
+	return 12;
+
+      return 6;
+    }
+
+  return 2;
+}
+
+/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
+   This is achieved by means of a manual dynamic stack space allocation in
+   the current frame.  We make the assumption that SEQ doesn't contain any
+   function calls, with the possible exception of calls to the GOT helper.  */
+
+static void
+emit_and_preserve (rtx seq, rtx reg, rtx reg2)
+{
+  /* We must preserve the lowest 16 words for the register save area.  */
+  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
+  /* We really need only 2 words of fresh stack space.  */
+  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
+
+  rtx slot
+    = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
+					     SPARC_STACK_BIAS + offset));
+
+  emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
+  emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
+  if (reg2)
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    adjust_address (slot, word_mode, UNITS_PER_WORD),
+			    reg2));
+  emit_insn (seq);
+  if (reg2)
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    reg2,
+			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
+  emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
+  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
+   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
+
+static void
+sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		       tree function)
+{
+  rtx this_rtx, insn, funexp;
+  unsigned int int_arg_first;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  if (TARGET_FLAT)
+    {
+      sparc_leaf_function_p = 1;
+
+      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
+    }
+  else if (flag_delayed_branch)
+    {
+      /* We will emit a regular sibcall below, so we need to instruct
+	 output_sibcall that we are in a leaf function.  */
+      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
+
+      /* This will cause final.c to invoke leaf_renumber_regs so we
+	 must behave as if we were in a not-yet-leafified function.  */
+      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
+    }
+  else
+    {
+      /* We will emit the sibcall manually below, so we will need to
+	 manually spill non-leaf registers.  */
+      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
+
+      /* We really are in a leaf function.  */
+      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
+    }
+
+  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
+     returns a structure, the structure return pointer is there instead.  */
+  if (TARGET_ARCH64
+      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
+
+  /* Add DELTA.  When possible use a plain add, otherwise load it into
+     a register first.  */
+  if (delta)
+    {
+      rtx delta_rtx = GEN_INT (delta);
+
+      if (! SPARC_SIMM13_P (delta))
+	{
+	  rtx scratch = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (scratch, delta_rtx);
+	  delta_rtx = scratch;
+	}
+
+      /* THIS_RTX += DELTA.  */
+      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
+    }
+
+  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
+  if (vcall_offset)
+    {
+      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+      rtx scratch = gen_rtx_REG (Pmode, 1);
+
+      gcc_assert (vcall_offset < 0);
+
+      /* SCRATCH = *THIS_RTX.  */
+      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
+	 may not have any available scratch register at this point.  */
+      if (SPARC_SIMM13_P (vcall_offset))
+	;
+      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
+      else if (! fixed_regs[5]
+	       /* The below sequence is made up of at least 2 insns,
+		  while the default method may need only one.  */
+	       && vcall_offset < -8192)
+	{
+	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
+	  emit_move_insn (scratch2, vcall_offset_rtx);
+	  vcall_offset_rtx = scratch2;
+	}
+      else
+	{
+	  rtx increment = GEN_INT (-4096);
+
+	  /* VCALL_OFFSET is a negative number whose typical range can be
+	     estimated as -32768..0 in 32-bit mode.  In almost all cases
+	     it is therefore cheaper to emit multiple add insns than
+	     spilling and loading the constant into a register (at least
+	     6 insns).  */
+	  while (! SPARC_SIMM13_P (vcall_offset))
+	    {
+	      emit_insn (gen_add2_insn (scratch, increment));
+	      vcall_offset += 4096;
+	    }
+	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
+	}
+
+      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
+      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
+					    gen_rtx_PLUS (Pmode,
+							  scratch,
+							  vcall_offset_rtx)));
+
+      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
+      emit_insn (gen_add2_insn (this_rtx, scratch));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+
+  if (flag_delayed_branch)
+    {
+      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+      insn = emit_call_insn (gen_sibcall (funexp));
+      SIBLING_CALL_P (insn) = 1;
+    }
+  else
+    {
+      /* The hoops we have to jump through in order to generate a sibcall
+	 without using delay slots...  */
+      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
+
+      if (flag_pic)
+        {
+	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
+	  start_sequence ();
+	  load_got_register ();  /* clobbers %o7 */
+	  scratch = sparc_legitimize_pic_address (funexp, scratch);
+	  seq = get_insns ();
+	  end_sequence ();
+	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
+	}
+      else if (TARGET_ARCH32)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch,
+				  gen_rtx_HIGH (SImode, funexp)));
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch,
+				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
+	}
+      else  /* TARGET_ARCH64 */
+        {
+	  switch (sparc_cmodel)
+	    {
+	    case CM_MEDLOW:
+	    case CM_MEDMID:
+	      /* The destination can serve as a temporary.  */
+	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
+	      break;
+
+	    case CM_MEDANY:
+	    case CM_EMBMEDANY:
+	      /* The destination cannot serve as a temporary.  */
+	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
+	      start_sequence ();
+	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
+	      seq = get_insns ();
+	      end_sequence ();
+	      emit_and_preserve (seq, spill_reg, 0);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      emit_jump_insn (gen_indirect_jump (scratch));
+    }
+
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+/* Return true if sparc_output_mi_thunk would be able to output the
+   assembler code for the thunk function specified by the arguments
+   it is passed, and false otherwise.  */
+static bool
+sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT vcall_offset,
+			   const_tree function ATTRIBUTE_UNUSED)
+{
+  /* Bound the loop used in the default method above.  */
+  return (vcall_offset >= -32768 || ! fixed_regs[5]);
+}
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+sparc_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in local-dynamic base patterns.  */
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (x
+      && GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.word\t%r_tls_dtpoff32(", file);
+      break;
+    case 8:
+      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs (")", file);
+}
+
+/* Do whatever processing is required at the end of a file.  */
+
+static void
+sparc_file_end (void)
+{
+  /* If we need to emit the special GOT helper function, do so now.  */
+  if (got_helper_rtx)
+    {
+      const char *name = XSTR (got_helper_rtx, 0);
+      const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
+#ifdef DWARF2_UNWIND_INFO
+      bool do_cfi;
+#endif
+
+      if (USE_HIDDEN_LINKONCE)
+	{
+	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+				  get_identifier (name),
+				  build_function_type_list (void_type_node,
+                                                            NULL_TREE));
+	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+					   NULL_TREE, void_type_node);
+	  TREE_PUBLIC (decl) = 1;
+	  TREE_STATIC (decl) = 1;
+	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
+	  resolve_unique_section (decl, 0, flag_function_sections);
+	  allocate_struct_function (decl, true);
+	  cfun->is_thunk = 1;
+	  current_function_decl = decl;
+	  init_varasm_status ();
+	  assemble_start_function (decl, name);
+	}
+      else
+	{
+	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+          switch_to_section (text_section);
+	  if (align > 0)
+	    ASM_OUTPUT_ALIGN (asm_out_file, align);
+	  ASM_OUTPUT_LABEL (asm_out_file, name);
+	}
+
+#ifdef DWARF2_UNWIND_INFO
+      do_cfi = dwarf2out_do_cfi_asm ();
+      if (do_cfi)
+	fprintf (asm_out_file, "\t.cfi_startproc\n");
+#endif
+      if (flag_delayed_branch)
+	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
+		 reg_name, reg_name);
+      else
+	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
+		 reg_name, reg_name);
+#ifdef DWARF2_UNWIND_INFO
+      if (do_cfi)
+	fprintf (asm_out_file, "\t.cfi_endproc\n");
+#endif
+    }
+
+  if (NEED_INDICATE_EXEC_STACK)
+    file_end_indicate_exec_stack ();
+
+#ifdef TARGET_SOLARIS
+  solaris_file_end ();
+#endif
+}
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+sparc_mangle_type (const_tree type)
+{
+  if (!TARGET_64BIT
+      && TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* Expand a membar instruction for various use cases.  Both the LOAD_STORE
+   and BEFORE_AFTER arguments of the form X_Y.  They are two-bit masks where
+   bit 0 indicates that X is true, and bit 1 indicates Y is true.  */
+
+void
+sparc_emit_membar_for_model (enum memmodel model,
+			     int load_store, int before_after)
+{
+  /* Bits for the MEMBAR mmask field.  */
+  const int LoadLoad = 1;
+  const int StoreLoad = 2;
+  const int LoadStore = 4;
+  const int StoreStore = 8;
+
+  int mm = 0, implied = 0;
+
+  switch (sparc_memory_model)
+    {
+    case SMM_SC:
+      /* Sequential Consistency.  All memory transactions are immediately
+	 visible in sequential execution order.  No barriers needed.  */
+      implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
+      break;
+
+    case SMM_TSO:
+      /* Total Store Ordering: all memory transactions with store semantics
+	 are followed by an implied StoreStore.  */
+      implied |= StoreStore;
+
+      /* If we're not looking for a raw barrer (before+after), then atomic
+	 operations get the benefit of being both load and store.  */
+      if (load_store == 3 && before_after == 1)
+	implied |= StoreLoad;
+      /* FALLTHRU */
+
+    case SMM_PSO:
+      /* Partial Store Ordering: all memory transactions with load semantics
+	 are followed by an implied LoadLoad | LoadStore.  */
+      implied |= LoadLoad | LoadStore;
+
+      /* If we're not looking for a raw barrer (before+after), then atomic
+	 operations get the benefit of being both load and store.  */
+      if (load_store == 3 && before_after == 2)
+	implied |= StoreLoad | StoreStore;
+      /* FALLTHRU */
+
+    case SMM_RMO:
+      /* Relaxed Memory Ordering: no implicit bits.  */
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (before_after & 1)
+    {
+      if (model == MEMMODEL_RELEASE
+	  || model == MEMMODEL_ACQ_REL
+	  || model == MEMMODEL_SEQ_CST)
+	{
+	  if (load_store & 1)
+	    mm |= LoadLoad | StoreLoad;
+	  if (load_store & 2)
+	    mm |= LoadStore | StoreStore;
+	}
+    }
+  if (before_after & 2)
+    {
+      if (model == MEMMODEL_ACQUIRE
+	  || model == MEMMODEL_ACQ_REL
+	  || model == MEMMODEL_SEQ_CST)
+	{
+	  if (load_store & 1)
+	    mm |= LoadLoad | LoadStore;
+	  if (load_store & 2)
+	    mm |= StoreLoad | StoreStore;
+	}
+    }
+
+  /* Remove the bits implied by the system memory model.  */
+  mm &= ~implied;
+
+  /* For raw barriers (before+after), always emit a barrier.
+     This will become a compile-time barrier if needed.  */
+  if (mm || before_after == 3)
+    emit_insn (gen_membar (GEN_INT (mm)));
+}
+
+/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
+   compare and swap on the word containing the byte or half-word.  */
+
+static void
+sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
+				  rtx oldval, rtx newval)
+{
+  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
+  rtx addr = gen_reg_rtx (Pmode);
+  rtx off = gen_reg_rtx (SImode);
+  rtx oldv = gen_reg_rtx (SImode);
+  rtx newv = gen_reg_rtx (SImode);
+  rtx oldvalue = gen_reg_rtx (SImode);
+  rtx newvalue = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx resv = gen_reg_rtx (SImode);
+  rtx memsi, val, mask, end_label, loop_label, cc;
+
+  emit_insn (gen_rtx_SET (VOIDmode, addr,
+			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
+
+  if (Pmode != SImode)
+    addr1 = gen_lowpart (SImode, addr1);
+  emit_insn (gen_rtx_SET (VOIDmode, off,
+			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
+
+  memsi = gen_rtx_MEM (SImode, addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  val = copy_to_reg (memsi);
+
+  emit_insn (gen_rtx_SET (VOIDmode, off,
+			  gen_rtx_XOR (SImode, off,
+				       GEN_INT (GET_MODE (mem) == QImode
+						? 3 : 2))));
+
+  emit_insn (gen_rtx_SET (VOIDmode, off,
+			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
+
+  if (GET_MODE (mem) == QImode)
+    mask = force_reg (SImode, GEN_INT (0xff));
+  else
+    mask = force_reg (SImode, GEN_INT (0xffff));
+
+  emit_insn (gen_rtx_SET (VOIDmode, mask,
+			  gen_rtx_ASHIFT (SImode, mask, off)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, val,
+			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+				       val)));
+
+  oldval = gen_lowpart (SImode, oldval);
+  emit_insn (gen_rtx_SET (VOIDmode, oldv,
+			  gen_rtx_ASHIFT (SImode, oldval, off)));
+
+  newval = gen_lowpart_common (SImode, newval);
+  emit_insn (gen_rtx_SET (VOIDmode, newv,
+			  gen_rtx_ASHIFT (SImode, newval, off)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, oldv,
+			  gen_rtx_AND (SImode, oldv, mask)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, newv,
+			  gen_rtx_AND (SImode, newv, mask)));
+
+  end_label = gen_label_rtx ();
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
+			  gen_rtx_IOR (SImode, oldv, val)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, newvalue,
+			  gen_rtx_IOR (SImode, newv, val)));
+
+  emit_move_insn (bool_result, const1_rtx);
+
+  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
+
+  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
+
+  emit_insn (gen_rtx_SET (VOIDmode, resv,
+			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+				       res)));
+
+  emit_move_insn (bool_result, const0_rtx);
+
+  cc = gen_compare_reg_1 (NE, resv, val);
+  emit_insn (gen_rtx_SET (VOIDmode, val, resv));
+
+  /* Use cbranchcc4 to separate the compare and branch!  */
+  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
+				  cc, const0_rtx, loop_label));
+
+  emit_label (end_label);
+
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_AND (SImode, res, mask)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_LSHIFTRT (SImode, res, off)));
+
+  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
+}
+
+/* Expand code to perform a compare-and-swap.  */
+
+void
+sparc_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, retval, mem, oldval, newval;
+  enum machine_mode mode;
+  enum memmodel model;
+
+  bval = operands[0];
+  retval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  model = (enum memmodel) INTVAL (operands[6]);
+  mode = GET_MODE (mem);
+
+  sparc_emit_membar_for_model (model, 3, 1);
+
+  if (reg_overlap_mentioned_p (retval, oldval))
+    oldval = copy_to_reg (oldval);
+
+  if (mode == QImode || mode == HImode)
+    sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
+  else
+    {
+      rtx (*gen) (rtx, rtx, rtx, rtx);
+      rtx x;
+
+      if (mode == SImode)
+	gen = gen_atomic_compare_and_swapsi_1;
+      else
+	gen = gen_atomic_compare_and_swapdi_1;
+      emit_insn (gen (retval, mem, oldval, newval));
+
+      x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
+      if (x != bval)
+	convert_move (bval, x, 1);
+    }
+
+  sparc_emit_membar_for_model (model, 3, 2);
+}
+
+void
+sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
+{
+  rtx t_1, t_2, t_3;
+
+  sel = gen_lowpart (DImode, sel);
+  switch (vmode)
+    {
+    case V2SImode:
+      /* inp = xxxxxxxAxxxxxxxB */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ....xxxxxxxAxxx. */
+      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
+				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
+      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
+				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .......B */
+      /* t_1 = ...A.... */
+      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
+      /* sel = ...A...B */
+      sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
+      /* sel = AAAABBBB * 4 */
+      t_1 = force_reg (SImode, GEN_INT (0x01230123));
+      /* sel = { A*4, A*4+1, A*4+2, ... } */
+      break;
+
+    case V4HImode:
+      /* inp = xxxAxxxBxxxCxxxD */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ..xxxAxxxBxxxCxx */
+      /* t_2 = ....xxxAxxxBxxxC */
+      /* t_3 = ......xxxAxxxBxx */
+      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
+				 GEN_INT (0x07),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
+				 GEN_INT (0x0700),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
+				 GEN_INT (0x070000),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
+				 GEN_INT (0x07000000),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .......D */
+      /* t_1 = .....C.. */
+      /* t_2 = ...B.... */
+      /* t_3 = .A...... */
+      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
+      t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
+      sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
+      /* sel = .A.B.C.D */
+      sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
+      /* sel = AABBCCDD * 2 */
+      t_1 = force_reg (SImode, GEN_INT (0x01010101));
+      /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
+      break;
+  
+    case V8QImode:
+      /* input = xAxBxCxDxExFxGxH */
+      sel = expand_simple_binop (DImode, AND, sel,
+				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
+					  | 0x0f0f0f0f),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .A.B.C.D.E.F.G.H */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ..A.B.C.D.E.F.G. */
+      sel = expand_simple_binop (DImode, IOR, sel, t_1,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .AABBCCDDEEFFGGH */
+      sel = expand_simple_binop (DImode, AND, sel,
+				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
+					  | 0xff00ff),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = ..AB..CD..EF..GH */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ....AB..CD..EF.. */
+      sel = expand_simple_binop (DImode, IOR, sel, t_1,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = ..ABABCDCDEFEFGH */
+      sel = expand_simple_binop (DImode, AND, sel,
+				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = ....ABCD....EFGH */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ........ABCD.... */
+      sel = gen_lowpart (SImode, sel);
+      t_1 = gen_lowpart (SImode, t_1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Always perform the final addition/merge within the bmask insn.  */
+  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+static bool
+sparc_frame_pointer_required (void)
+{
+  /* If the stack pointer is dynamically modified in the function, it cannot
+     serve as the frame pointer.  */
+  if (cfun->calls_alloca)
+    return true;
+
+  /* If the function receives nonlocal gotos, it needs to save the frame
+     pointer in the nonlocal_goto_save_area object.  */
+  if (cfun->has_nonlocal_label)
+    return true;
+
+  /* In flat mode, that's it.  */
+  if (TARGET_FLAT)
+    return false;
+
+  /* Otherwise, the frame pointer is required if the function isn't leaf.  */
+  return !(crtl->is_leaf && only_leaf_regs_used ());
+}
+
+/* The way this is structured, we can't eliminate SFP in favor of SP
+   if the frame pointer is required: we want to use the SFP->HFP elimination
+   in that case.  But the test in update_eliminables doesn't know we are
+   assuming below that we only do the former elimination.  */
+
+static bool
+sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
+}
+
+/* Return the hard frame pointer directly to bypass the stack bias.  */
+
+static rtx
+sparc_builtin_setjmp_frame_value (void)
+{
+  return hard_frame_pointer_rtx;
+}
+
+/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
+   they won't be allocated.  */
+
+static void
+sparc_conditional_register_usage (void)
+{
+  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
+  /* then honor it.  */
+  if (TARGET_ARCH32 && fixed_regs[5])
+    fixed_regs[5] = 1;
+  else if (TARGET_ARCH64 && fixed_regs[5] == 2)
+    fixed_regs[5] = 0;
+  if (! TARGET_V9)
+    {
+      int regno;
+      for (regno = SPARC_FIRST_V9_FP_REG;
+	   regno <= SPARC_LAST_V9_FP_REG;
+	   regno++)
+	fixed_regs[regno] = 1;
+      /* %fcc0 is used by v8 and v9.  */
+      for (regno = SPARC_FIRST_V9_FCC_REG + 1;
+	   regno <= SPARC_LAST_V9_FCC_REG;
+	   regno++)
+	fixed_regs[regno] = 1;
+    }
+  if (! TARGET_FPU)
+    {
+      int regno;
+      for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
+	fixed_regs[regno] = 1;
+    }
+  /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
+  /* then honor it.  Likewise with g3 and g4.  */
+  if (fixed_regs[2] == 2)
+    fixed_regs[2] = ! TARGET_APP_REGS;
+  if (fixed_regs[3] == 2)
+    fixed_regs[3] = ! TARGET_APP_REGS;
+  if (TARGET_ARCH32 && fixed_regs[4] == 2)
+    fixed_regs[4] = ! TARGET_APP_REGS;
+  else if (TARGET_CM_EMBMEDANY)
+    fixed_regs[4] = 1;
+  else if (fixed_regs[4] == 2)
+    fixed_regs[4] = 0;
+  if (TARGET_FLAT)
+    {
+      int regno;
+      /* Disable leaf functions.  */
+      memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	leaf_reg_remap [regno] = regno;
+    }
+  if (TARGET_VIS)
+    global_regs[SPARC_GSR_REG] = 1;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS:
+
+   - We can't load constants into FP registers.
+   - We can't load FP constants into integer registers when soft-float,
+     because there is no soft-float pattern with a r/F constraint.
+   - We can't load FP constants into integer registers for TFmode unless
+     it is 0.0L, because there is no movtf pattern with a r/F constraint.
+   - Try and reload integer constants (symbolic or otherwise) back into
+     registers directly, rather than having them dumped to memory.  */
+
+static reg_class_t
+sparc_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+  if (CONSTANT_P (x))
+    {
+      if (FP_REG_CLASS_P (rclass)
+	  || rclass == GENERAL_OR_FP_REGS
+	  || rclass == GENERAL_OR_EXTRA_FP_REGS
+	  || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
+	  || (mode == TFmode && ! const_zero_operand (x, mode)))
+	return NO_REGS;
+
+      if (GET_MODE_CLASS (mode) == MODE_INT)
+	return GENERAL_REGS;
+
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  if (! FP_REG_CLASS_P (rclass)
+	      || !(const_zero_operand (x, mode)
+		   || const_all_ones_operand (x, mode)))
+	    return NO_REGS;
+	}
+    }
+
+  if (TARGET_VIS3
+      && ! TARGET_ARCH64
+      && (rclass == EXTRA_FP_REGS
+	  || rclass == GENERAL_OR_EXTRA_FP_REGS))
+    {
+      int regno = true_regnum (x);
+
+      if (SPARC_INT_REG_P (regno))
+	return (rclass == EXTRA_FP_REGS
+		? FP_REGS : GENERAL_OR_FP_REGS);
+    }
+
+  return rclass;
+}
+
+/* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
+   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
+
+const char *
+output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
+{
+  char mulstr[32];
+
+  gcc_assert (! TARGET_ARCH64);
+
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%L1, 0, %L1", operands);
+  if (which_alternative == 1)
+    output_asm_insn ("sllx\t%H1, 32, %H1", operands);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (which_alternative == 1)
+	{
+	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
+	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
+	  output_asm_insn (mulstr, operands);
+	  return "srlx\t%L0, 32, %H0";
+	}
+      else
+	{
+	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
+          output_asm_insn ("or\t%L1, %3, %3", operands);
+          sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
+	  output_asm_insn (mulstr, operands);
+	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
+          return "mov\t%3, %L0";
+	}
+    }
+  else if (rtx_equal_p (operands[1], operands[2]))
+    {
+      if (which_alternative == 1)
+	{
+	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
+          sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
+	  output_asm_insn (mulstr, operands);
+	  return "srlx\t%L0, 32, %H0";
+	}
+      else
+	{
+	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
+          output_asm_insn ("or\t%L1, %3, %3", operands);
+	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
+	  output_asm_insn (mulstr, operands);
+	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
+          return "mov\t%3, %L0";
+	}
+    }
+  if (sparc_check_64 (operands[2], insn) <= 0)
+    output_asm_insn ("srl\t%L2, 0, %L2", operands);
+  if (which_alternative == 1)
+    {
+      output_asm_insn ("or\t%L1, %H1, %H1", operands);
+      output_asm_insn ("sllx\t%H2, 32, %L1", operands);
+      output_asm_insn ("or\t%L2, %L1, %L1", operands);
+      sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
+      output_asm_insn (mulstr, operands);
+      return "srlx\t%L0, 32, %H0";
+    }
+  else
+    {
+      output_asm_insn ("sllx\t%H1, 32, %3", operands);
+      output_asm_insn ("sllx\t%H2, 32, %4", operands);
+      output_asm_insn ("or\t%L1, %3, %3", operands);
+      output_asm_insn ("or\t%L2, %4, %4", operands);
+      sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
+      output_asm_insn (mulstr, operands);
+      output_asm_insn ("srlx\t%3, 32, %H0", operands);
+      return "mov\t%3, %L0";
+    }
+}
+
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
+   all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn.  MODE
+   and INNER_MODE are the modes describing TARGET.  */
+
+static void
+vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
+		      enum machine_mode inner_mode)
+{
+  rtx t1, final_insn, sel;
+  int bmask;
+
+  t1 = gen_reg_rtx (mode);
+
+  elt = convert_modes (SImode, inner_mode, elt, true);
+  emit_move_insn (gen_lowpart(SImode, t1), elt);
+
+  switch (mode)
+    {
+    case V2SImode:
+      final_insn = gen_bshufflev2si_vis (target, t1, t1);
+      bmask = 0x45674567;
+      break;
+    case V4HImode:
+      final_insn = gen_bshufflev4hi_vis (target, t1, t1);
+      bmask = 0x67676767;
+      break;
+    case V8QImode:
+      final_insn = gen_bshufflev8qi_vis (target, t1, t1);
+      bmask = 0x77777777;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  sel = force_reg (SImode, GEN_INT (bmask));
+  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
+  emit_insn (final_insn);
+}
+
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
+   all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn.  */
+
+static void
+vector_init_fpmerge (rtx target, rtx elt)
+{
+  rtx t1, t2, t2_low, t3, t3_low;
+
+  t1 = gen_reg_rtx (V4QImode);
+  elt = convert_modes (SImode, QImode, elt, true);
+  emit_move_insn (gen_lowpart (SImode, t1), elt);
+
+  t2 = gen_reg_rtx (V8QImode);
+  t2_low = gen_lowpart (V4QImode, t2);
+  emit_insn (gen_fpmerge_vis (t2, t1, t1));
+
+  t3 = gen_reg_rtx (V8QImode);
+  t3_low = gen_lowpart (V4QImode, t3);
+  emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
+
+  emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
+}
+
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
+   all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn.  */
+
+static void
+vector_init_faligndata (rtx target, rtx elt)
+{
+  rtx t1 = gen_reg_rtx (V4HImode);
+  int i;
+
+  elt = convert_modes (SImode, HImode, elt, true);
+  emit_move_insn (gen_lowpart (SImode, t1), elt);
+
+  emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+				  force_reg (SImode, GEN_INT (6)),
+				  const0_rtx));
+
+  for (i = 0; i < 4; i++)
+    emit_insn (gen_faligndatav4hi_vis (target, t1, target));
+}
+
+/* Emit code to initialize TARGET to values for individual fields VALS.  */
+
+void
+sparc_expand_vector_init (rtx target, rtx vals)
+{
+  const enum machine_mode mode = GET_MODE (target);
+  const enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  const int n_elts = GET_MODE_NUNITS (mode);
+  int i, n_var = 0;
+  bool all_same;
+  rtx mem;
+
+  all_same = true;
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx x = XVECEXP (vals, 0, i);
+      if (!CONSTANT_P (x))
+	n_var++;
+
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (n_var == 0)
+    {
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+      return;
+    }
+
+  if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
+    {
+      if (GET_MODE_SIZE (inner_mode) == 4)
+	{
+	  emit_move_insn (gen_lowpart (SImode, target),
+			  gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
+	  return;
+	}
+      else if (GET_MODE_SIZE (inner_mode) == 8)
+	{
+	  emit_move_insn (gen_lowpart (DImode, target),
+			  gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
+	  return;
+	}
+    }
+  else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
+	   && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
+    {
+      emit_move_insn (gen_highpart (word_mode, target),
+		      gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
+      emit_move_insn (gen_lowpart (word_mode, target),
+		      gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
+      return;
+    }
+
+  if (all_same && GET_MODE_SIZE (mode) == 8)
+    {
+      if (TARGET_VIS2)
+	{
+	  vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
+	  return;
+	}
+      if (mode == V8QImode)
+	{
+	  vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
+	  return;
+	}
+      if (mode == V4HImode)
+	{
+	  vector_init_faligndata (target, XVECEXP (vals, 0, 0));
+	  return;
+	}
+    }
+
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner_mode,
+				       i * GET_MODE_SIZE (inner_mode)),
+		    XVECEXP (vals, 0, i));
+  emit_move_insn (target, mem);
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+			enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  sri->icode = CODE_FOR_nothing;
+  sri->extra_cost = 0;
+
+  /* We need a temporary when loading/storing a HImode/QImode value
+     between memory and the FPU registers.  This can happen when combine puts
+     a paradoxical subreg in a float/fix conversion insn.  */
+  if (FP_REG_CLASS_P (rclass)
+      && (mode == HImode || mode == QImode)
+      && (GET_CODE (x) == MEM
+	  || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	      && true_regnum (x) == -1)))
+    return GENERAL_REGS;
+
+  /* On 32-bit we need a temporary when loading/storing a DFmode value
+     between unaligned memory and the upper FPU registers.  */
+  if (TARGET_ARCH32
+      && rclass == EXTRA_FP_REGS
+      && mode == DFmode
+      && GET_CODE (x) == MEM
+      && ! mem_min_alignment (x, 8))
+    return FP_REGS;
+
+  if (((TARGET_CM_MEDANY
+	&& symbolic_operand (x, mode))
+       || (TARGET_CM_EMBMEDANY
+	   && text_segment_operand (x, mode)))
+      && ! flag_pic)
+    {
+      if (in_p)
+	sri->icode = direct_optab_handler (reload_in_optab, mode);
+      else
+	sri->icode = direct_optab_handler (reload_out_optab, mode);
+      return NO_REGS;
+    }
+
+  if (TARGET_VIS3 && TARGET_ARCH32)
+    {
+      int regno = true_regnum (x);
+
+      /* When using VIS3 fp<-->int register moves, on 32-bit we have
+	 to move 8-byte values in 4-byte pieces.  This only works via
+	 FP_REGS, and not via EXTRA_FP_REGS.  Therefore if we try to
+	 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
+	 an FP_REGS intermediate move.  */
+      if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
+	  || ((general_or_i64_p (rclass)
+	       || rclass == GENERAL_OR_FP_REGS)
+	      && SPARC_FP_REG_P (regno)))
+	{
+	  sri->extra_cost = 2;
+	  return FP_REGS;
+	}
+    }
+
+  return NO_REGS;
+}
+
+/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
+   OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
+
+bool
+sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
+{
+  enum rtx_code rc = GET_CODE (operands[1]);
+  enum machine_mode cmp_mode;
+  rtx cc_reg, dst, cmp;
+
+  cmp = operands[1];
+  if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
+    return false;
+
+  if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
+    cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
+
+  cmp_mode = GET_MODE (XEXP (cmp, 0));
+  rc = GET_CODE (cmp);
+
+  dst = operands[0];
+  if (! rtx_equal_p (operands[2], dst)
+      && ! rtx_equal_p (operands[3], dst))
+    {
+      if (reg_overlap_mentioned_p (dst, cmp))
+	dst = gen_reg_rtx (mode);
+
+      emit_move_insn (dst, operands[3]);
+    }
+  else if (operands[2] == dst)
+    {
+      operands[2] = operands[3];
+
+      if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
+        rc = reverse_condition_maybe_unordered (rc);
+      else
+        rc = reverse_condition (rc);
+    }
+
+  if (XEXP (cmp, 1) == const0_rtx
+      && GET_CODE (XEXP (cmp, 0)) == REG
+      && cmp_mode == DImode
+      && v9_regcmp_p (rc))
+    cc_reg = XEXP (cmp, 0);
+  else
+    cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
+
+  cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
+
+  emit_insn (gen_rtx_SET (VOIDmode, dst,
+			  gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
+
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  return true;
+}
+
+/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
+   into OPERANDS[0] in MODE, depending on the outcome of the comparison of
+   OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
+   FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
+   code to be used for the condition mask.  */
+
+void
+sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
+{
+  rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
+  enum rtx_code code = GET_CODE (operands[3]);
+
+  mask = gen_reg_rtx (Pmode);
+  cop0 = operands[4];
+  cop1 = operands[5];
+  if (code == LT || code == GE)
+    {
+      rtx t;
+
+      code = swap_condition (code);
+      t = cop0; cop0 = cop1; cop1 = t;
+    }
+
+  gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
+
+  fcmp = gen_rtx_UNSPEC (Pmode,
+			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
+			 fcode);
+
+  cmask = gen_rtx_UNSPEC (DImode,
+			  gen_rtvec (2, mask, gsr),
+			  ccode);
+
+  bshuf = gen_rtx_UNSPEC (mode,
+			  gen_rtvec (3, operands[1], operands[2], gsr),
+			  UNSPEC_BSHUFFLE);
+
+  emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
+  emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
+}
+
+/* On sparc, any mode which naturally allocates into the float
+   registers should return 4 here.  */
+
+unsigned int
+sparc_regmode_natural_size (enum machine_mode mode)
+{
+  int size = UNITS_PER_WORD;
+
+  if (TARGET_ARCH64)
+    {
+      enum mode_class mclass = GET_MODE_CLASS (mode);
+
+      if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
+	size = 4;
+    }
+
+  return size;
+}
+
+/* Return TRUE if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be FALSE for correct output.
+
+   For V9 we have to deal with the fact that only the lower 32 floating
+   point registers are 32-bit addressable.  */
+
+bool
+sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  enum mode_class mclass1, mclass2;
+  unsigned short size1, size2;
+
+  if (mode1 == mode2)
+    return true;
+
+  mclass1 = GET_MODE_CLASS (mode1);
+  mclass2 = GET_MODE_CLASS (mode2);
+  if (mclass1 != mclass2)
+    return false;
+
+  if (! TARGET_V9)
+    return true;
+
+  /* Classes are the same and we are V9 so we have to deal with upper
+     vs. lower floating point registers.  If one of the modes is a
+     4-byte mode, and the other is not, we have to mark them as not
+     tieable because only the lower 32 floating point register are
+     addressable 32-bits at a time.
+
+     We can't just test explicitly for SFmode, otherwise we won't
+     cover the vector mode cases properly.  */
+
+  if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
+    return true;
+
+  size1 = GET_MODE_SIZE (mode1);
+  size2 = GET_MODE_SIZE (mode2);
+  if ((size1 > 4 && size2 == 4)
+      || (size2 > 4 && size1 == 4))
+    return false;
+
+  return true;
+}
+
+/* Implement TARGET_CSTORE_MODE.  */
+
+static enum machine_mode
+sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
+{
+  return (TARGET_ARCH64 ? DImode : SImode);
+}
+
+/* Return the compound expression made of T1 and T2.  */
+
+static inline tree
+compound_expr (tree t1, tree t2)
+{
+  return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
+}
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
+
+static void
+sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  if (!TARGET_FPU)
+    return;
+
+  const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
+  const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
+
+  /* We generate the equivalent of feholdexcept (&fenv_var):
+
+       unsigned int fenv_var;
+       __builtin_store_fsr (&fenv_var);
+
+       unsigned int tmp1_var;
+       tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
+
+       __builtin_load_fsr (&tmp1_var);  */
+
+  tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  mark_addressable (fenv_var);
+  tree fenv_addr = build_fold_addr_expr (fenv_var);
+  tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
+  tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
+
+  tree tmp1_var = create_tmp_var (unsigned_type_node, NULL);
+  mark_addressable (tmp1_var);
+  tree masked_fenv_var
+    = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
+	      build_int_cst (unsigned_type_node,
+			     ~(accrued_exception_mask | trap_enable_mask)));
+  tree hold_mask
+    = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
+
+  tree tmp1_addr = build_fold_addr_expr (tmp1_var);
+  tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
+  tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
+
+  *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
+
+  /* We reload the value of tmp1_var to clear the exceptions:
+
+       __builtin_load_fsr (&tmp1_var);  */
+
+  *clear = build_call_expr (ldfsr, 1, tmp1_addr);
+
+  /* We generate the equivalent of feupdateenv (&fenv_var):
+
+       unsigned int tmp2_var;
+       __builtin_store_fsr (&tmp2_var);
+
+       __builtin_load_fsr (&fenv_var);
+
+       if (SPARC_LOW_FE_EXCEPT_VALUES)
+         tmp2_var >>= 5;
+       __atomic_feraiseexcept ((int) tmp2_var);  */
+
+  tree tmp2_var = create_tmp_var (unsigned_type_node, NULL);
+  mark_addressable (tmp2_var);
+  tree tmp3_addr = build_fold_addr_expr (tmp2_var);
+  tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
+
+  tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
+
+  tree atomic_feraiseexcept
+    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  tree update_call
+    = build_call_expr (atomic_feraiseexcept, 1,
+		       fold_convert (integer_type_node, tmp2_var));
+
+  if (SPARC_LOW_FE_EXCEPT_VALUES)
+    {
+      tree shifted_tmp2_var
+	= build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
+		  build_int_cst (unsigned_type_node, 5));
+      tree update_shift
+	= build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
+      update_call = compound_expr (update_shift, update_call);
+    }
+
+  *update
+    = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
+}
+
+#include "gt-sparc.h"
diff --git a/gcc-4.9/gcc/config/sparc/sparc.h b/gcc-4.9/gcc/config/sparc/sparc.h
new file mode 100644
index 000000000..dd2b5ad9c
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc.h
@@ -0,0 +1,1782 @@
+/* Definitions of target machine for GNU compiler, for Sun SPARC.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config/vxworks-dummy.h"
+
+/* Note that some other tm.h files include this one and then override
+   whatever definitions are necessary.  */
+
+#define TARGET_CPU_CPP_BUILTINS() sparc_target_macros ()
+
+/* Specify this in a cover file to provide bi-architecture (32/64) support.  */
+/* #define SPARC_BI_ARCH */
+
+/* Macro used later in this file to determine default architecture.  */
+#define DEFAULT_ARCH32_P ((TARGET_DEFAULT & MASK_64BIT) == 0)
+
+/* TARGET_ARCH{32,64} are the main macros to decide which of the two
+   architectures to compile for.  We allow targets to choose compile time or
+   runtime selection.  */
+#ifdef IN_LIBGCC2
+#if defined(__sparcv9) || defined(__arch64__)
+#define TARGET_ARCH32 0
+#else
+#define TARGET_ARCH32 1
+#endif /* sparc64 */
+#else
+#ifdef SPARC_BI_ARCH
+#define TARGET_ARCH32 (! TARGET_64BIT)
+#else
+#define TARGET_ARCH32 (DEFAULT_ARCH32_P)
+#endif /* SPARC_BI_ARCH */
+#endif /* IN_LIBGCC2 */
+#define TARGET_ARCH64 (! TARGET_ARCH32)
+
+/* Code model selection in 64-bit environment.
+
+   The machine mode used for addresses is 32-bit wide:
+
+   TARGET_CM_32:     32-bit address space.
+                     It is the code model used when generating 32-bit code.
+
+   The machine mode used for addresses is 64-bit wide:
+
+   TARGET_CM_MEDLOW: 32-bit address space.
+                     The executable must be in the low 32 bits of memory.
+                     This avoids generating %uhi and %ulo terms.  Programs
+                     can be statically or dynamically linked.
+
+   TARGET_CM_MEDMID: 44-bit address space.
+                     The executable must be in the low 44 bits of memory,
+                     and the %[hml]44 terms are used.  The text and data
+                     segments have a maximum size of 2GB (31-bit span).
+                     The maximum offset from any instruction to the label
+                     _GLOBAL_OFFSET_TABLE_ is 2GB (31-bit span).
+
+   TARGET_CM_MEDANY: 64-bit address space.
+                     The text and data segments have a maximum size of 2GB
+                     (31-bit span) and may be located anywhere in memory.
+                     The maximum offset from any instruction to the label
+                     _GLOBAL_OFFSET_TABLE_ is 2GB (31-bit span).
+
+   TARGET_CM_EMBMEDANY: 64-bit address space.
+                     The text and data segments have a maximum size of 2GB
+                     (31-bit span) and may be located anywhere in memory.
+                     The global register %g4 contains the start address of
+                     the data segment.  Programs are statically linked and
+                     PIC is not supported.
+
+   Different code models are not supported in 32-bit environment.  */
+
+enum cmodel {
+  CM_32,
+  CM_MEDLOW,
+  CM_MEDMID,
+  CM_MEDANY,
+  CM_EMBMEDANY
+};
+
+/* One of CM_FOO.  */
+extern enum cmodel sparc_cmodel;
+
+/* V9 code model selection.  */
+#define TARGET_CM_MEDLOW    (sparc_cmodel == CM_MEDLOW)
+#define TARGET_CM_MEDMID    (sparc_cmodel == CM_MEDMID)
+#define TARGET_CM_MEDANY    (sparc_cmodel == CM_MEDANY)
+#define TARGET_CM_EMBMEDANY (sparc_cmodel == CM_EMBMEDANY)
+
+#define SPARC_DEFAULT_CMODEL CM_32
+
+/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO)
+   which requires the following macro to be true if enabled.  Prior to V9,
+   there are no instructions to even talk about memory synchronization.
+   Note that the UltraSPARC III processors don't implement RMO, unlike the
+   UltraSPARC II processors.  Niagara, Niagara-2, and Niagara-3 do not
+   implement RMO either.
+
+   Default to false; for example, Solaris never enables RMO, only ever uses
+   total memory ordering (TMO).  */
+#define SPARC_RELAXED_ORDERING false
+
+/* Do not use the .note.GNU-stack convention by default.  */
+#define NEED_INDICATE_EXEC_STACK 0
+
+/* This is call-clobbered in the normal ABI, but is reserved in the
+   home grown (aka upward compatible) embedded ABI.  */
+#define EMBMEDANY_BASE_REG "%g4"
+
+/* Values of TARGET_CPU_DEFAULT, set via -D in the Makefile,
+   and specified by the user via --with-cpu=foo.
+   This specifies the cpu implementation, not the architecture size.  */
+/* Note that TARGET_CPU_v9 is assumed to start the list of 64-bit
+   capable cpu's.  */
+#define TARGET_CPU_sparc	0
+#define TARGET_CPU_v7		0	/* alias */
+#define TARGET_CPU_cypress	0       /* alias */
+#define TARGET_CPU_v8		1	/* generic v8 implementation */
+#define TARGET_CPU_supersparc	2
+#define TARGET_CPU_hypersparc	3
+#define TARGET_CPU_leon		4
+#define TARGET_CPU_leon3	5
+#define TARGET_CPU_sparclite	6
+#define TARGET_CPU_f930		6       /* alias */
+#define TARGET_CPU_f934		6       /* alias */
+#define TARGET_CPU_sparclite86x	7
+#define TARGET_CPU_sparclet	8
+#define TARGET_CPU_tsc701	8       /* alias */
+#define TARGET_CPU_v9		9	/* generic v9 implementation */
+#define TARGET_CPU_sparcv9	9	/* alias */
+#define TARGET_CPU_sparc64	9	/* alias */
+#define TARGET_CPU_ultrasparc	10
+#define TARGET_CPU_ultrasparc3	11
+#define TARGET_CPU_niagara	12
+#define TARGET_CPU_niagara2	13
+#define TARGET_CPU_niagara3	14
+#define TARGET_CPU_niagara4	15
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara3 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara4
+
+#define CPP_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC ""
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9
+/* ??? What does Sun's CC pass?  */
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+/* ??? It's not clear how other assemblers will handle this, so by default
+   use GAS.  Sun's Solaris assembler recognizes -xarch=v8plus, but this case
+   is handled in sol2.h.  */
+#define ASM_CPU64_DEFAULT_SPEC "-Av9"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara3
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9" AS_NIAGARA3_FLAG
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara4
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC AS_NIAGARA4_FLAG
+#endif
+
+#else
+
+#define CPP_CPU64_DEFAULT_SPEC ""
+#define ASM_CPU64_DEFAULT_SPEC ""
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_v8
+#define CPP_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclet
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclet__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclet"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclite"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite86x
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite86x__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclite"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_supersparc
+#define CPP_CPU32_DEFAULT_SPEC "-D__supersparc__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_hypersparc
+#define CPP_CPU32_DEFAULT_SPEC "-D__hypersparc__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_leon \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_leon3
+#define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG
+#endif
+
+#endif
+
+#if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC)
+ #error Unrecognized value in TARGET_CPU_DEFAULT.
+#endif
+
+#ifdef SPARC_BI_ARCH
+
+#define CPP_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" CPP_CPU64_DEFAULT_SPEC "} \
+%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" CPP_CPU32_DEFAULT_SPEC "} \
+%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \
+")
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
+#else /* !SPARC_BI_ARCH */
+
+#define CPP_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? CPP_CPU32_DEFAULT_SPEC : CPP_CPU64_DEFAULT_SPEC)
+#define ASM_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? ASM_CPU32_DEFAULT_SPEC : ASM_CPU64_DEFAULT_SPEC)
+
+#endif /* !SPARC_BI_ARCH */
+
+/* Define macros to distinguish architectures.  */
+
+/* Common CPP definitions used by CPP_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#define CPP_CPU_SPEC "\
+%{mcpu=sparclet:-D__sparclet__} %{mcpu=tsc701:-D__sparclet__} \
+%{mcpu=sparclite:-D__sparclite__} \
+%{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
+%{mcpu=sparclite86x:-D__sparclite86x__} \
+%{mcpu=v8:-D__sparc_v8__} \
+%{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
+%{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \
+%{mcpu=leon:-D__leon__ -D__sparc_v8__} \
+%{mcpu=leon3:-D__leon__ -D__sparc_v8__} \
+%{mcpu=v9:-D__sparc_v9__} \
+%{mcpu=ultrasparc:-D__sparc_v9__} \
+%{mcpu=ultrasparc3:-D__sparc_v9__} \
+%{mcpu=niagara:-D__sparc_v9__} \
+%{mcpu=niagara2:-D__sparc_v9__} \
+%{mcpu=niagara3:-D__sparc_v9__} \
+%{mcpu=niagara4:-D__sparc_v9__} \
+%{!mcpu*:%(cpp_cpu_default)} \
+"
+#define CPP_ARCH32_SPEC ""
+#define CPP_ARCH64_SPEC "-D__arch64__"
+
+#define CPP_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? CPP_ARCH32_SPEC : CPP_ARCH64_SPEC)
+
+#define CPP_ARCH_SPEC "\
+%{m32:%(cpp_arch32)} \
+%{m64:%(cpp_arch64)} \
+%{!m32:%{!m64:%(cpp_arch_default)}} \
+"
+
+/* Macros to distinguish the endianness, window model and FP support.  */
+#define CPP_OTHER_SPEC "\
+%{mflat:-D_FLAT} \
+%{msoft-float:-D_SOFT_FLOAT} \
+"
+
+/* Macros to distinguish the particular subtarget.  */
+#define CPP_SUBTARGET_SPEC ""
+
+#define CPP_SPEC \
+  "%(cpp_cpu) %(cpp_arch) %(cpp_endian) %(cpp_other) %(cpp_subtarget)"
+
+/* This used to translate -dalign to -malign, but that is no good
+   because it can't turn off the usual meaning of making debugging dumps.  */
+
+#define CC1_SPEC ""
+
+/* Override in target specific files.  */
+#define ASM_CPU_SPEC "\
+%{mcpu=sparclet:-Asparclet} %{mcpu=tsc701:-Asparclet} \
+%{mcpu=sparclite:-Asparclite} \
+%{mcpu=sparclite86x:-Asparclite} \
+%{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
+%{mcpu=v8:-Av8} \
+%{mcpu=supersparc:-Av8} \
+%{mcpu=hypersparc:-Av8} \
+%{mcpu=leon:" AS_LEON_FLAG "} \
+%{mcpu=leon3:" AS_LEON_FLAG "} \
+%{mv8plus:-Av8plus} \
+%{mcpu=v9:-Av9} \
+%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
+%{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \
+%{mcpu=niagara:%{!mv8plus:-Av9b}} \
+%{mcpu=niagara2:%{!mv8plus:-Av9b}} \
+%{mcpu=niagara3:%{!mv8plus:-Av9" AS_NIAGARA3_FLAG "}} \
+%{mcpu=niagara4:%{!mv8plus:" AS_NIAGARA4_FLAG "}} \
+%{!mcpu*:%(asm_cpu_default)} \
+"
+
+/* Word size selection, among other things.
+   This is what GAS uses.  Add %(asm_arch) to ASM_SPEC to enable.  */
+
+#define ASM_ARCH32_SPEC "-32"
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+#define ASM_ARCH64_SPEC "-64 -no-undeclared-regs"
+#else
+#define ASM_ARCH64_SPEC "-64"
+#endif
+#define ASM_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? ASM_ARCH32_SPEC : ASM_ARCH64_SPEC)
+
+#define ASM_ARCH_SPEC "\
+%{m32:%(asm_arch32)} \
+%{m64:%(asm_arch64)} \
+%{!m32:%{!m64:%(asm_arch_default)}} \
+"
+
+#ifdef HAVE_AS_RELAX_OPTION
+#define ASM_RELAX_SPEC "%{!mno-relax:-relax}"
+#else
+#define ASM_RELAX_SPEC ""
+#endif
+
+/* Special flags to the Sun-4 assembler when using pipe for input.  */
+
+#define ASM_SPEC "\
+%{!pg:%{!p:%{fpic|fPIC|fpie|fPIE:-k}}} %{keep-local-as-symbols:-L} \
+%(asm_cpu) %(asm_relax)"
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define EXTRA_SPECS \
+  { "cpp_cpu",		CPP_CPU_SPEC },		\
+  { "cpp_cpu_default",	CPP_CPU_DEFAULT_SPEC },	\
+  { "cpp_arch32",	CPP_ARCH32_SPEC },	\
+  { "cpp_arch64",	CPP_ARCH64_SPEC },	\
+  { "cpp_arch_default",	CPP_ARCH_DEFAULT_SPEC },\
+  { "cpp_arch",		CPP_ARCH_SPEC },	\
+  { "cpp_other",	CPP_OTHER_SPEC },	\
+  { "cpp_subtarget",	CPP_SUBTARGET_SPEC },	\
+  { "asm_cpu",		ASM_CPU_SPEC },		\
+  { "asm_cpu_default",	ASM_CPU_DEFAULT_SPEC },	\
+  { "asm_arch32",	ASM_ARCH32_SPEC },	\
+  { "asm_arch64",	ASM_ARCH64_SPEC },	\
+  { "asm_relax",	ASM_RELAX_SPEC },	\
+  { "asm_arch_default",	ASM_ARCH_DEFAULT_SPEC },\
+  { "asm_arch",		ASM_ARCH_SPEC },	\
+  SUBTARGET_EXTRA_SPECS
+
+#define SUBTARGET_EXTRA_SPECS
+
+/* Because libgcc can generate references back to libc (via .umul etc.) we have
+   to list libc again after the second libgcc.  */
+#define LINK_GCC_C_SEQUENCE_SPEC "%G %L %G %L"
+
+
+#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int")
+#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int")
+
+/* ??? This should be 32 bits for v9 but what can we do?  */
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Mask of all CPU selection flags.  */
+#define MASK_ISA \
+(MASK_V8 + MASK_SPARCLITE + MASK_SPARCLET + MASK_V9 + MASK_DEPRECATED_V8_INSNS)
+
+/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y.
+   TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y
+   to get high 32 bits.  False in V8+ or V9 because multiply stores
+   a 64-bit result in a register.  */
+
+#define TARGET_HARD_MUL32				\
+  ((TARGET_V8 || TARGET_SPARCLITE			\
+    || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS)	\
+   && ! TARGET_V8PLUS && TARGET_ARCH32)
+
+#define TARGET_HARD_MUL					\
+  (TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET	\
+   || TARGET_DEPRECATED_V8_INSNS || TARGET_V8PLUS)
+
+/* MASK_APP_REGS must always be the default because that's what
+   FIXED_REGISTERS is set to and -ffixed- is processed before
+   TARGET_CONDITIONAL_REGISTER_USAGE is called (where we process
+   -mno-app-regs).  */
+#define TARGET_DEFAULT (MASK_APP_REGS + MASK_FPU)
+
+/* Recast the cpu class to be the cpu attribute.
+   Every file includes us, but not every file includes insn-attr.h.  */
+#define sparc_cpu_attr ((enum attr_cpu) sparc_cpu)
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.
+   --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu
+     are specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+
+/* target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD	64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		(TARGET_ARCH64 ? 8 : 4)
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD	UNITS_PER_WORD
+#else
+#define MIN_UNITS_PER_WORD	4
+#endif
+
+/* Now define the sizes of the C data types.  */
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		(TARGET_ARCH64 ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+
+/* LONG_DOUBLE_TYPE_SIZE is defined per OS even though the
+   SPARC ABI says that it is 128-bit wide.  */
+/* #define LONG_DOUBLE_TYPE_SIZE	128 */
+
+/* The widest floating-point format really supported by the hardware.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Width in bits of a pointer.  This is the size of ptr_mode.  */
+#define POINTER_SIZE (TARGET_PTR64 ? 64 : 32)
+
+/* This is the machine mode used for addresses.  */
+#define Pmode (TARGET_ARCH64 ? DImode : SImode)
+
+/* If we have to extend pointers (only when TARGET_ARCH64 and not
+   TARGET_PTR64), we want to do it unsigned.   This macro does nothing
+   if ptr_mode and Pmode are the same.  */
+#define POINTERS_EXTEND_UNSIGNED 1
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_ARCH64 ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+/* FIXME, this is wrong when TARGET_ARCH64 and TARGET_STACK_BIAS, because
+   then %sp+2047 is 128-bit aligned so %sp is really only byte-aligned.  */
+#define STACK_BOUNDARY (TARGET_ARCH64 ? 128 : 64)
+/* Temporary hack until the FIXME above is fixed.  */
+#define SPARC_STACK_BOUNDARY_HACK (TARGET_ARCH64 && TARGET_STACK_BIAS)
+
+/* ALIGN FRAMES on double word boundaries */
+#define SPARC_STACK_ALIGN(LOC) \
+  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY (TARGET_ARCH64 ? 64 : 32)
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT (TARGET_ARCH64 ? 128 : 64)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 64
+
+/* Define this macro as an expression for the alignment of a structure
+   (given by STRUCT as a tree node) if the alignment computed in the
+   usual way is COMPUTED and the alignment explicitly specified was
+   SPECIFIED.
+
+   The default is to use SPECIFIED if it is larger; otherwise, use
+   the smaller of COMPUTED and `BIGGEST_ALIGNMENT' */
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)	\
+ (TARGET_FASTER_STRUCTS ?				\
+  ((TREE_CODE (STRUCT) == RECORD_TYPE			\
+    || TREE_CODE (STRUCT) == UNION_TYPE                 \
+    || TREE_CODE (STRUCT) == QUAL_UNION_TYPE)           \
+   && TYPE_FIELDS (STRUCT) != 0                         \
+     ? MAX (MAX ((COMPUTED), (SPECIFIED)), BIGGEST_ALIGNMENT) \
+     : MAX ((COMPUTED), (SPECIFIED)))			\
+   :  MAX ((COMPUTED), (SPECIFIED)))
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  We allow pairs of registers.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_ARCH64 ? TImode : DImode)
+
+/* We need 2 words, so we can save the stack pointer and the return register
+   of the function containing a non-local goto target.  */
+#define STACK_SAVEAREA_MODE(LEVEL) \
+  ((LEVEL) == SAVE_NONLOCAL ? (TARGET_ARCH64 ? TImode : DImode) : Pmode)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make local arrays of chars word-aligned for the same reasons.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Things that must be doubleword aligned cannot go in the text section,
+   because the linker fails to align the text section enough!
+   Put them in the data section.  This macro is only used in this file.  */
+#define MAX_TEXT_ALIGN 32
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   SPARC has 32 integer registers and 32 floating point registers.
+   64-bit SPARC has 32 additional fp regs, but the odd numbered ones are not
+   accessible.  We still account for them to simplify register computations
+   (e.g.: in CLASS_MAX_NREGS).  There are also 4 fp condition code registers, so
+   32+32+32+4 == 100.
+   Register 100 is used as the integer condition code register.
+   Register 101 is used as the soft frame pointer register.  */
+
+#define FIRST_PSEUDO_REGISTER 103
+
+#define SPARC_FIRST_INT_REG     0
+#define SPARC_LAST_INT_REG     31
+#define SPARC_FIRST_FP_REG     32
+/* Additional V9 fp regs.  */
+#define SPARC_FIRST_V9_FP_REG  64
+#define SPARC_LAST_V9_FP_REG   95
+/* V9 %fcc[0123].  V8 uses (figuratively) %fcc0.  */
+#define SPARC_FIRST_V9_FCC_REG 96
+#define SPARC_LAST_V9_FCC_REG  99
+/* V8 fcc reg.  */
+#define SPARC_FCC_REG 96
+/* Integer CC reg.  We don't distinguish %icc from %xcc.  */
+#define SPARC_ICC_REG 100
+#define SPARC_GSR_REG 102
+
+/* Nonzero if REGNO is an fp reg.  */
+#define SPARC_FP_REG_P(REGNO) \
+((REGNO) >= SPARC_FIRST_FP_REG && (REGNO) <= SPARC_LAST_V9_FP_REG)
+
+/* Nonzero if REGNO is an int reg.  */
+#define SPARC_INT_REG_P(REGNO) \
+(((unsigned) (REGNO)) <= SPARC_LAST_INT_REG)
+
+/* Argument passing regs.  */
+#define SPARC_OUTGOING_INT_ARG_FIRST 8
+#define SPARC_INCOMING_INT_ARG_FIRST (TARGET_FLAT ? 8 : 24)
+#define SPARC_FP_ARG_FIRST           32
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On non-v9 systems:
+   g1 is free to use as temporary.
+   g2-g4 are reserved for applications.  Gcc normally uses them as
+   temporaries, but this can be disabled via the -mno-app-regs option.
+   g5 through g7 are reserved for the operating system.
+
+   On v9 systems:
+   g1,g5 are free to use as temporaries, and are free to use between calls
+   if the call is to an external function via the PLT.
+   g4 is free to use as a temporary in the non-embedded case.
+   g4 is reserved in the embedded case.
+   g2-g3 are reserved for applications.  Gcc normally uses them as
+   temporaries, but this can be disabled via the -mno-app-regs option.
+   g6-g7 are reserved for the operating system (or application in
+   embedded case).
+   ??? Register 1 is used as a temporary by the 64 bit sethi pattern, so must
+   currently be a fixed register until this pattern is rewritten.
+   Register 1 is also used when restoring call-preserved registers in large
+   stack frames.
+
+   Registers fixed in arch32 and not arch64 (or vice-versa) are marked in
+   TARGET_CONDITIONAL_REGISTER_USAGE in order to properly handle -ffixed-.
+*/
+
+#define FIXED_REGISTERS  \
+ {1, 0, 2, 2, 2, 2, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 1, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 1,	\
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+				\
+  0, 0, 0, 0, 0, 1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On SPARC, ordinary registers hold 32 bits worth;
+   this means both integer and floating point registers.
+   On v9, integer regs hold 64 bits worth; floating point regs hold
+   32 bits worth (this includes the new fp regs as even the odd ones are
+   included in the hard register count).  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((REGNO) == SPARC_GSR_REG ? 1 :					\
+   (TARGET_ARCH64							\
+    ? (SPARC_INT_REG_P (REGNO) || (REGNO) == FRAME_POINTER_REGNUM			\
+       ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD	\
+       : (GET_MODE_SIZE (MODE) + 3) / 4)				\
+    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
+
+/* Due to the ARCH64 discrepancy above we must override this next
+   macro too.  */
+#define REGMODE_NATURAL_SIZE(MODE) sparc_regmode_natural_size (MODE)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   See sparc.c for how we initialize this.  */
+extern const int *hard_regno_mode_classes;
+extern int sparc_mode_class[];
+
+/* ??? Because of the funny way we pass parameters we should allow certain
+   ??? types of float/complex values to be in integer registers during
+   ??? RTL generation.  This only matters on arch32.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((hard_regno_mode_classes[REGNO] & sparc_mode_class[MODE]) != 0)
+
+/* Value is 1 if it is OK to rename a hard register FROM to another hard
+   register TO.  We cannot rename %g1 as it may be used before the save
+   register window instruction in the prologue.  */
+#define HARD_REGNO_RENAME_OK(FROM, TO) ((FROM) != 1)
+
+#define MODES_TIEABLE_P(MODE1, MODE2) sparc_modes_tieable_p (MODE1, MODE2)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 14
+
+/* The stack bias (amount by which the hardware register is offset by).  */
+#define SPARC_STACK_BIAS ((TARGET_ARCH64 && TARGET_STACK_BIAS) ? 2047 : 0)
+
+/* Actual top-of-stack address is 92/176 greater than the contents of the
+   stack pointer register for !v9/v9.  That is:
+   - !v9: 64 bytes for the in and local registers, 4 bytes for structure return
+     address, and 6*4 bytes for the 6 register parameters.
+   - v9: 128 bytes for the in and local registers + 6*8 bytes for the integer
+     parameter regs.  */
+#define STACK_POINTER_OFFSET (FIRST_PARM_OFFSET(0) + SPARC_STACK_BIAS)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 30
+
+/* The soft frame pointer does not have the stack bias applied.  */
+#define FRAME_POINTER_REGNUM 101
+
+/* Given the stack bias, the stack pointer isn't actually aligned.  */
+#define INIT_EXPANDERS							 \
+  do {									 \
+    if (crtl->emit.regno_pointer_align && SPARC_STACK_BIAS)	 \
+      {									 \
+	REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = BITS_PER_UNIT;	 \
+	REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_UNIT; \
+      }									 \
+  } while (0)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM
+
+/* Register in which static-chain is passed to a function.  This must
+   not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM (TARGET_ARCH64 ? 5 : 2)
+
+/* Register which holds the global offset table, if any.  */
+
+#define GLOBAL_OFFSET_TABLE_REGNUM 23
+
+/* Register which holds offset table for position-independent
+   data references.  */
+
+#define PIC_OFFSET_TABLE_REGNUM \
+  (flag_pic ? GLOBAL_OFFSET_TABLE_REGNUM : INVALID_REGNUM)
+
+/* Pick a default value we can notice from override_options:
+   !v9: Default is on.
+   v9: Default is off.
+   Originally it was -1, but later on the container of options changed to
+   unsigned byte, so we decided to pick 127 as default value, which does
+   reflect an undefined default value in case of 0/1.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 127
+
+/* Functions which return large structures get the address
+   to place the wanted value at offset 64 from the frame.
+   Must reserve 64 bytes for the in and local registers.
+   v9: Functions which return large structures get the address to place the
+   wanted value from an invisible first argument.  */
+#define STRUCT_VALUE_OFFSET 64
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The SPARC has various kinds of registers: general, floating point,
+   and condition codes [well, it has others as well, but none that we
+   care directly about].
+
+   For v9 we must distinguish between the upper and lower floating point
+   registers because the upper ones can't hold SFmode values.
+   HARD_REGNO_MODE_OK won't help here because reload assumes that register(s)
+   satisfying a group need for a class will also satisfy a single need for
+   that class.  EXTRA_FP_REGS is a bit of a misnomer as it covers all 64 fp
+   regs.
+
+   It is important that one class contains all the general and all the standard
+   fp regs.  Otherwise find_reg() won't properly allocate int regs for moves,
+   because reg_class_record() will bias the selection in favor of fp regs,
+   because reg_class_subunion[GENERAL_REGS][FP_REGS] will yield FP_REGS,
+   because FP_REGS > GENERAL_REGS.
+
+   It is also important that one class contain all the general and all
+   the fp regs.  Otherwise when spilling a DFmode reg, it may be from
+   EXTRA_FP_REGS but find_reloads() may use class
+   GENERAL_OR_FP_REGS. This will cause allocate_reload_reg() to die
+   because the compiler thinks it doesn't have a spill reg when in
+   fact it does.
+
+   v9 also has 4 floating point condition code registers.  Since we don't
+   have a class that is the union of FPCC_REGS with either of the others,
+   it is important that it appear first.  Otherwise the compiler will die
+   trying to compile _fixunsdfsi because fix_truncdfsi2 won't match its
+   constraints.
+
+   It is important that SPARC_ICC_REG have class NO_REGS.  Otherwise combine
+   may try to use it to hold an SImode value.  See register_operand.
+   ??? Should %fcc[0123] be handled similarly?
+*/
+
+enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS,
+		 EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
+		 ALL_REGS, LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+  { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS",	\
+     "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS",	\
+     "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS				\
+  {{0, 0, 0, 0},	/* NO_REGS */			\
+   {0, 0, 0, 0xf},	/* FPCC_REGS */			\
+   {0xffff, 0, 0, 0},	/* I64_REGS */			\
+   {-1, 0, 0, 0x20},	/* GENERAL_REGS */		\
+   {0, -1, 0, 0},	/* FP_REGS */			\
+   {0, -1, -1, 0},	/* EXTRA_FP_REGS */		\
+   {-1, -1, 0, 0x20},	/* GENERAL_OR_FP_REGS */	\
+   {-1, -1, -1, 0x20},	/* GENERAL_OR_EXTRA_FP_REGS */	\
+   {-1, -1, -1, 0x7f}}	/* ALL_REGS */
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+#define REGNO_REG_CLASS(REGNO) sparc_regno_reg_class[(REGNO)]
+
+/* Defines invalid mode changes.  Borrowed from the PA port.
+
+   SImode loads to floating-point registers are not zero-extended.
+   The definition for LOAD_EXTEND_OP specifies that integer loads
+   narrower than BITS_PER_WORD will be zero-extended.  As a result,
+   we inhibit changes from SImode unless they are to a mode that is
+   identical in size.
+
+   Likewise for SFmode, since word-mode paradoxical subregs are
+   problematic on big-endian architectures.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (TARGET_ARCH64						\
+   && GET_MODE_SIZE (FROM) == 4					\
+   && GET_MODE_SIZE (TO) != 4					\
+   ? reg_classes_intersect_p (CLASS, FP_REGS) : 0)
+
+/* This is the order in which to allocate registers normally.
+
+   We put %f0-%f7 last among the float registers, so as to make it more
+   likely that a pseudo-register which dies in the float return register
+   area will get allocated to the float return register, thus saving a move
+   instruction at the end of the function.
+
+   Similarly for integer return value registers.
+
+   We know in this case that we will not end up with a leaf function.
+
+   The register allocator is given the global and out registers first
+   because these registers are call clobbered and thus less useful to
+   global register allocation.
+
+   Next we list the local and in registers.  They are not call clobbered
+   and thus very useful for global register allocation.  We list the input
+   registers before the locals so that it is more likely the incoming
+   arguments received in those registers can just stay there and not be
+   reloaded.  */
+
+#define REG_ALLOC_ORDER \
+{ 1, 2, 3, 4, 5, 6, 7,			/* %g1-%g7 */	\
+  13, 12, 11, 10, 9, 8, 		/* %o5-%o0 */	\
+  15,					/* %o7 */	\
+  16, 17, 18, 19, 20, 21, 22, 23,	/* %l0-%l7 */ 	\
+  29, 28, 27, 26, 25, 24, 31,		/* %i5-%i0,%i7 */\
+  40, 41, 42, 43, 44, 45, 46, 47,	/* %f8-%f15 */  \
+  48, 49, 50, 51, 52, 53, 54, 55,	/* %f16-%f23 */ \
+  56, 57, 58, 59, 60, 61, 62, 63,	/* %f24-%f31 */ \
+  64, 65, 66, 67, 68, 69, 70, 71,	/* %f32-%f39 */ \
+  72, 73, 74, 75, 76, 77, 78, 79,	/* %f40-%f47 */ \
+  80, 81, 82, 83, 84, 85, 86, 87,	/* %f48-%f55 */ \
+  88, 89, 90, 91, 92, 93, 94, 95,	/* %f56-%f63 */ \
+  39, 38, 37, 36, 35, 34, 33, 32,	/* %f7-%f0 */   \
+  96, 97, 98, 99,			/* %fcc0-3 */   \
+  100, 0, 14, 30, 101, 102 }		/* %icc, %g0, %o6, %i6, %sfp, %gsr */
+
+/* This is the order in which to allocate registers for
+   leaf functions.  If all registers can fit in the global and
+   output registers, then we have the possibility of having a leaf
+   function.
+
+   The macro actually mentioned the input registers first,
+   because they get renumbered into the output registers once
+   we know really do have a leaf function.
+
+   To be more precise, this register allocation order is used
+   when %o7 is found to not be clobbered right before register
+   allocation.  Normally, the reason %o7 would be clobbered is
+   due to a call which could not be transformed into a sibling
+   call.
+
+   As a consequence, it is possible to use the leaf register
+   allocation order and not end up with a leaf function.  We will
+   not get suboptimal register allocation in that case because by
+   definition of being potentially leaf, there were no function
+   calls.  Therefore, allocation order within the local register
+   window is not critical like it is when we do have function calls.  */
+
+#define REG_LEAF_ALLOC_ORDER \
+{ 1, 2, 3, 4, 5, 6, 7, 			/* %g1-%g7 */	\
+  29, 28, 27, 26, 25, 24,		/* %i5-%i0 */	\
+  15,					/* %o7 */	\
+  13, 12, 11, 10, 9, 8,			/* %o5-%o0 */	\
+  16, 17, 18, 19, 20, 21, 22, 23,	/* %l0-%l7 */	\
+  40, 41, 42, 43, 44, 45, 46, 47,	/* %f8-%f15 */	\
+  48, 49, 50, 51, 52, 53, 54, 55,	/* %f16-%f23 */	\
+  56, 57, 58, 59, 60, 61, 62, 63,	/* %f24-%f31 */	\
+  64, 65, 66, 67, 68, 69, 70, 71,	/* %f32-%f39 */	\
+  72, 73, 74, 75, 76, 77, 78, 79,	/* %f40-%f47 */	\
+  80, 81, 82, 83, 84, 85, 86, 87,	/* %f48-%f55 */	\
+  88, 89, 90, 91, 92, 93, 94, 95,	/* %f56-%f63 */	\
+  39, 38, 37, 36, 35, 34, 33, 32,	/* %f7-%f0 */	\
+  96, 97, 98, 99,			/* %fcc0-3 */	\
+  100, 0, 14, 30, 31, 101, 102 }	/* %icc, %g0, %o6, %i6, %i7, %sfp, %gsr */
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+extern char sparc_leaf_regs[];
+#define LEAF_REGISTERS sparc_leaf_regs
+
+extern char leaf_reg_remap[];
+#define LEAF_REG_REMAP(REGNO) (leaf_reg_remap[REGNO])
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Local macro to handle the two v9 classes of FP regs.  */
+#define FP_REG_CLASS_P(CLASS) ((CLASS) == FP_REGS || (CLASS) == EXTRA_FP_REGS)
+
+/* Predicates for 5-bit, 10-bit, 11-bit and 13-bit signed constants.  */
+#define SPARC_SIMM5_P(X)  ((unsigned HOST_WIDE_INT) (X) + 0x10 < 0x20)
+#define SPARC_SIMM10_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x200 < 0x400)
+#define SPARC_SIMM11_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x400 < 0x800)
+#define SPARC_SIMM13_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x1000 < 0x2000)
+
+/* 10- and 11-bit immediates are only used for a few specific insns.
+   SMALL_INT is used throughout the port so we continue to use it.  */
+#define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X)))
+
+/* Predicate for constants that can be loaded with a sethi instruction.
+   This is the general, 64-bit aware, bitwise version that ensures that
+   only constants whose representation fits in the mask
+
+     0x00000000fffffc00
+
+   are accepted.  It will reject, for example, negative SImode constants
+   on 64-bit hosts, so correct handling is to mask the value beforehand
+   according to the mode of the instruction.  */
+#define SPARC_SETHI_P(X) \
+  (((unsigned HOST_WIDE_INT) (X) \
+    & ((unsigned HOST_WIDE_INT) 0x3ff - GET_MODE_MASK (SImode) - 1)) == 0)
+
+/* Version of the above predicate for SImode constants and below.  */
+#define SPARC_SETHI32_P(X) \
+  (SPARC_SETHI_P ((unsigned HOST_WIDE_INT) (X) & GET_MODE_MASK (SImode)))
+
+/* On SPARC when not VIS3 it is not possible to directly move data
+   between GENERAL_REGS and FP_REGS.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+  ((FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)) \
+   && (! TARGET_VIS3 \
+       || GET_MODE_SIZE (MODE) > 8 \
+       || GET_MODE_SIZE (MODE) < 4))
+
+/* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
+   because the movsi and movsf patterns don't handle r/f moves.
+   For v8 we copy the default definition.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
+  (TARGET_ARCH64						\
+   ? (GET_MODE_BITSIZE (MODE) < 32				\
+      ? mode_for_size (32, GET_MODE_CLASS (MODE), 0)		\
+      : MODE)							\
+   : (GET_MODE_BITSIZE (MODE) < BITS_PER_WORD			\
+      ? mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0)	\
+      : MODE))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+/* On SPARC, this is the size of MODE in words.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+  (FP_REG_CLASS_P (CLASS) ? (GET_MODE_SIZE (MODE) + 3) / 4 \
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.
+   !v9: This is 64 for the ins and locals, plus 4 for the struct-return reg
+   even if this function isn't going to use it.
+   v9: This is 128 for the ins and locals.  */
+#define FIRST_PARM_OFFSET(FNDECL) \
+  (TARGET_ARCH64 ? 16 * UNITS_PER_WORD : STRUCT_VALUE_OFFSET + UNITS_PER_WORD)
+
+/* Offset from the argument pointer register value to the CFA.
+   This is different from FIRST_PARM_OFFSET because the register window
+   comes between the CFA and the arguments.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL)  0
+
+/* When a parameter is passed in a register, stack space is still
+   allocated for it.
+   !v9: All 6 possible integer registers have backing store allocated.
+   v9: Only space for the arguments passed is allocated.  */
+/* ??? Ideally, we'd use zero here (as the minimum), but zero has special
+   meaning to the backend.  Further, we need to be able to detect if a
+   varargs/unprototyped function is called, as they may want to spill more
+   registers than we've provided space.  Ugly, ugly.  So for now we retain
+   all 6 slots even for v9.  */
+#define REG_PARM_STACK_SPACE(DECL) (6 * UNITS_PER_WORD)
+
+/* Definitions for register elimination.  */
+
+#define ELIMINABLE_REGS \
+  {{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+   { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} }
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) 		\
+  do								\
+    {								\
+      (OFFSET) = sparc_initial_elimination_offset ((TO));	\
+    }								\
+  while (0)
+
+/* Keep the stack pointer constant throughout the function.
+   This is both an optimization and a necessity: longjmp
+   doesn't behave itself when the stack pointer moves within
+   the function!  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define this macro if the target machine has "register windows".  This
+   C expression returns the register number as seen by the called function
+   corresponding to register number OUT as seen by the calling function.
+   Return OUT if register number OUT is not an outbound register.  */
+
+#define INCOMING_REGNO(OUT) \
+ ((TARGET_FLAT || (OUT) < 8 || (OUT) > 15) ? (OUT) : (OUT) + 16)
+
+/* Define this macro if the target machine has "register windows".  This
+   C expression returns the register number as seen by the calling function
+   corresponding to register number IN as seen by the called function.
+   Return IN if register number IN is not an inbound register.  */
+
+#define OUTGOING_REGNO(IN) \
+ ((TARGET_FLAT || (IN) < 24 || (IN) > 31) ? (IN) : (IN) - 16)
+
+/* Define this macro if the target machine has register windows.  This
+   C expression returns true if the register is call-saved but is in the
+   register window.  */
+
+#define LOCAL_REGNO(REGNO) \
+  (!TARGET_FLAT && (REGNO) >= 16 && (REGNO) <= 31)
+
+/* Define the size of space to allocate for the return value of an
+   untyped_call.  */
+
+#define APPLY_RESULT_SIZE (TARGET_ARCH64 ? 24 : 16)
+
+/* 1 if N is a possible register number for function argument passing.
+   On SPARC, these are the "output" registers.  v9 also uses %f0-%f31.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+(TARGET_ARCH64 \
+ ? (((N) >= 8 && (N) <= 13) || ((N) >= 32 && (N) <= 63)) \
+ : ((N) >= 8 && (N) <= 13))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On SPARC (!v9), this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus 7 or more means all following args should go on the stack.
+
+   For v9, we also need to know whether a prototype is present.  */
+
+struct sparc_args {
+  int words;       /* number of words passed so far */
+  int prototype_p; /* nonzero if a prototype is present */
+  int libcall_p;   /* nonzero if a library call */
+};
+#define CUMULATIVE_ARGS struct sparc_args
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL));
+
+/* If defined, a C expression which determines whether, and in which direction,
+   to pad out an argument with extra space.  The value should be of type
+   `enum direction': either `upward' to pad above the argument,
+   `downward' to pad below, or `none' to inhibit padding.  */
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+function_arg_padding ((MODE), (TYPE))
+
+
+/* Generate the special assembly code needed to tell the assembler whatever
+   it might need to know about the return value of a function.
+
+   For SPARC assemblers, we need to output a .proc pseudo-op which conveys
+   information to the assembler relating to peephole optimization (done in
+   the assembler).  */
+
+#define ASM_DECLARE_RESULT(FILE, RESULT) \
+  fprintf ((FILE), "\t.proc\t0%lo\n", sparc_type_code (TREE_TYPE (RESULT)))
+
+/* Output the special assembly code needed to tell the assembler some
+   register is used as global register variable.
+
+   SPARC 64bit psABI declares registers %g2 and %g3 as application
+   registers and %g6 and %g7 as OS registers.  Any object using them
+   should declare (for %g2/%g3 has to, for %g6/%g7 can) that it uses them
+   and how they are used (scratch or some global variable).
+   Linker will then refuse to link together objects which use those
+   registers incompatibly.
+
+   Unless the registers are used for scratch, two different global
+   registers cannot be declared to the same name, so in the unlikely
+   case of a global register variable occupying more than one register
+   we prefix the second and following registers with .gnu.part1. etc.  */
+
+extern GTY(()) char sparc_hard_reg_printed[8];
+
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+#define ASM_DECLARE_REGISTER_GLOBAL(FILE, DECL, REGNO, NAME)		\
+do {									\
+  if (TARGET_ARCH64)							\
+    {									\
+      int end = HARD_REGNO_NREGS ((REGNO), DECL_MODE (decl)) + (REGNO); \
+      int reg;								\
+      for (reg = (REGNO); reg < 8 && reg < end; reg++)			\
+	if ((reg & ~1) == 2 || (reg & ~1) == 6)				\
+	  {								\
+	    if (reg == (REGNO))						\
+	      fprintf ((FILE), "\t.register\t%%g%d, %s\n", reg, (NAME)); \
+	    else							\
+	      fprintf ((FILE), "\t.register\t%%g%d, .gnu.part%d.%s\n",	\
+		       reg, reg - (REGNO), (NAME));			\
+	    sparc_hard_reg_printed[reg] = 1;				\
+	  }								\
+    }									\
+} while (0)
+#endif
+
+
+/* Emit rtl for profiling.  */
+#define PROFILE_HOOK(LABEL)   sparc_profile_hook (LABEL)
+
+/* All the work done in PROFILE_HOOK, but still required.  */
+#define FUNCTION_PROFILER(FILE, LABELNO) do { } while (0)
+
+/* Set the name of the mcount function for the system.  */
+#define MCOUNT_FUNCTION "*mcount"
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16)
+
+/* Alignment required for trampolines, in bits.  */
+#define TRAMPOLINE_ALIGNMENT 128
+
+/* Generate RTL to flush the register windows so as to make arbitrary frames
+   available.  */
+#define SETUP_FRAME_ADDRESSES()			\
+  do {						\
+    if (!TARGET_FLAT)				\
+      emit_insn (gen_flush_register_windows ());\
+  } while (0)
+
+/* Given an rtx for the address of a frame,
+   return an rtx for the address of the word in the frame
+   that holds the dynamic chain--the previous frame's address.  */
+#define DYNAMIC_CHAIN_ADDRESS(frame)	\
+  plus_constant (Pmode, frame, 14 * UNITS_PER_WORD + SPARC_STACK_BIAS)
+
+/* Given an rtx for the frame pointer,
+   return an rtx for the address of the frame.  */
+#define FRAME_ADDR_RTX(frame) plus_constant (Pmode, frame, SPARC_STACK_BIAS)
+
+/* The return address isn't on the stack, it is in a register, so we can't
+   access it from the current frame pointer.  We can access it from the
+   previous frame pointer though by reading a value from the register window
+   save area.  */
+#define RETURN_ADDR_IN_PREVIOUS_FRAME
+
+/* This is the offset of the return address to the true next instruction to be
+   executed for the current function.  */
+#define RETURN_ADDR_OFFSET \
+  (8 + 4 * (! TARGET_ARCH64 && cfun->returns_struct))
+
+/* The current return address is in %i7.  The return address of anything
+   farther back is in the register window save area at [%fp+60].  */
+/* ??? This ignores the fact that the actual return address is +8 for normal
+   returns, and +12 for structure returns.  */
+#define RETURN_ADDR_REGNUM 31
+#define RETURN_ADDR_RTX(count, frame)		\
+  ((count == -1)				\
+   ? gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)			\
+   : gen_rtx_MEM (Pmode,			\
+		  memory_address (Pmode, plus_constant (Pmode, frame, \
+							15 * UNITS_PER_WORD \
+							+ SPARC_STACK_BIAS))))
+
+/* Before the prologue, the return address is %o7 + 8.  OK, sometimes it's
+   +12, but always using +8 is close enough for frame unwind purposes.
+   Actually, just using %o7 is close enough for unwinding, but %o7+8
+   is something you can return to.  */
+#define INCOMING_RETURN_ADDR_REGNUM 15
+#define INCOMING_RETURN_ADDR_RTX \
+  plus_constant (word_mode, \
+		 gen_rtx_REG (word_mode, INCOMING_RETURN_ADDR_REGNUM), 8)
+#define DWARF_FRAME_RETURN_COLUMN \
+  DWARF_FRAME_REGNUM (INCOMING_RETURN_ADDR_REGNUM)
+
+/* The offset from the incoming value of %sp to the top of the stack frame
+   for the current function.  On sparc64, we have to account for the stack
+   bias if present.  */
+#define INCOMING_FRAME_SP_OFFSET SPARC_STACK_BIAS
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_REGNUM 1
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 24 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, EH_RETURN_REGNUM)
+
+/* Define registers used by the epilogue and return instruction.  */
+#define EPILOGUE_USES(REGNO)					\
+  ((REGNO) == RETURN_ADDR_REGNUM				\
+   || (TARGET_FLAT						\
+       && epilogue_completed					\
+       && (REGNO) == INCOMING_RETURN_ADDR_REGNUM)		\
+   || (crtl->calls_eh_return && (REGNO) == EH_RETURN_REGNUM))
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   If assembler and linker properly support .uaword %r_disp32(foo),
+   then use PC relative 32-bit relocations instead of absolute relocs
+   for shared libraries.  On sparc64, use pc relative 32-bit relocs even
+   for binaries, to save memory.
+
+   binutils 2.12 would emit a R_SPARC_DISP32 dynamic relocation if the
+   symbol %r_disp32() is against was not local, but .hidden.  In that
+   case, we have to use DW_EH_PE_absptr for pic personality.  */
+#ifdef HAVE_AS_SPARC_UA_PCREL
+#ifdef HAVE_AS_SPARC_UA_PCREL_HIDDEN
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (flag_pic								\
+   ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4\
+   : ((TARGET_ARCH64 && ! GLOBAL)					\
+      ? (DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+      : DW_EH_PE_absptr))
+#else
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (flag_pic								\
+   ? (GLOBAL ? DW_EH_PE_absptr : (DW_EH_PE_pcrel | DW_EH_PE_sdata4))	\
+   : ((TARGET_ARCH64 && ! GLOBAL)					\
+      ? (DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+      : DW_EH_PE_absptr))
+#endif
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    fprintf (FILE, "%%r_disp%d(", SIZE * 8);		\
+    assemble_name (FILE, LABEL);			\
+    fputc (')', FILE);					\
+  } while (0)
+#endif
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+(SPARC_INT_REG_P (REGNO) || SPARC_INT_REG_P (reg_renumber[REGNO]) \
+ || (REGNO) == FRAME_POINTER_REGNUM				  \
+ || reg_renumber[REGNO] == FRAME_POINTER_REGNUM)
+
+#define REGNO_OK_FOR_BASE_P(REGNO)  REGNO_OK_FOR_INDEX_P (REGNO)
+
+#define REGNO_OK_FOR_FP_P(REGNO) \
+  (((unsigned) (REGNO) - 32 < (TARGET_V9 ? (unsigned)64 : (unsigned)32)) \
+   || ((unsigned) reg_renumber[REGNO] - 32 < (TARGET_V9 ? (unsigned)64 : (unsigned)32)))
+
+#define REGNO_OK_FOR_CCFP_P(REGNO) \
+ (TARGET_V9 \
+  && (((unsigned) (REGNO) - 96 < (unsigned)4) \
+      || ((unsigned) reg_renumber[REGNO] - 96 < (unsigned)4)))
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.
+   When PIC, we do not accept an address that would require a scratch reg
+   to load into a register.  */
+
+#define CONSTANT_ADDRESS_P(X) constant_address_p (X)
+
+/* Define this, so that when PIC, reload won't try to reload invalid
+   addresses which require two reload registers.  */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+
+/* Should gcc use [%reg+%lo(xx)+offset] addresses?  */
+
+#ifdef HAVE_AS_OFFSETABLE_LO10
+#define USE_AS_OFFSETABLE_LO10 1
+#else
+#define USE_AS_OFFSETABLE_LO10 0
+#endif
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	   \
+do {									   \
+  int win;								   \
+  (X) = sparc_legitimize_reload_address ((X), (MODE), (OPNUM),		   \
+					 (int)(TYPE), (IND_LEVELS), &win); \
+  if (win)								   \
+    goto WIN;								   \
+} while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+/* If we ever implement any of the full models (such as CM_FULLANY),
+   this has to be DImode in that case */
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+#define CASE_VECTOR_MODE \
+(! TARGET_PTR64 ? SImode : flag_pic ? SImode : TARGET_CM_MEDLOW ? SImode : DImode)
+#else
+/* If assembler does not have working .subsection -1, we use DImode for pic, as otherwise
+   we have to sign extend which slows things down.  */
+#define CASE_VECTOR_MODE \
+(! TARGET_PTR64 ? SImode : flag_pic ? DImode : TARGET_CM_MEDLOW ? SImode : DImode)
+#endif
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.  */
+
+#define MOVE_RATIO(speed) ((speed) ? 8 : 3)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* For SImode, we make sure the top 32-bits of the register are clear and
+   then we subtract 32 from the lzd instruction result.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point,
+   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
+   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
+   processing is needed.  */
+#define SELECT_CC_MODE(OP,X,Y)  select_cc_mode ((OP), (X), (Y))
+
+/* Return nonzero if MODE implies a floating point inequality can be
+   reversed.  For SPARC this is always true because we have a full
+   compliment of ordered and unordered comparisons, but until generic
+   code knows how to reverse it correctly we keep the old definition.  */
+#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPEmode && (MODE) != CCFPmode)
+
+/* A function address in a call instruction for indexing purposes.  */
+#define FUNCTION_MODE Pmode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+#define NO_FUNCTION_CSE
+
+/* The _Q_* comparison libcalls return booleans.  */
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode)
+
+/* Assume by default that the _Qp_* 64-bit libcalls are implemented such
+   that the inputs are fully consumed before the output memory is clobbered.  */
+
+#define TARGET_BUGGY_QP_LIB	0
+
+/* Assume by default that we do not have the Solaris-specific conversion
+   routines nor 64-bit integer multiply and divide routines.  */
+
+#define SUN_CONVERSION_LIBFUNCS 	0
+#define DITF_CONVERSION_LIBFUNCS	0
+#define SUN_INTEGER_MULTIPLY_64 	0
+
+/* Provide the cost of a branch.  For pre-v9 processors we use
+   a value of 3 to take into account the potential annulling of
+   the delay slot (which ends up being a bubble in the pipeline slot)
+   plus a cycle to take into consideration the instruction cache
+   effects.
+
+   On v9 and later, which have branch prediction facilities, we set
+   it to the depth of the pipeline as that is the cost of a
+   mispredicted branch.
+
+   On Niagara, normal branches insert 3 bubbles into the pipe
+   and annulled branches insert 4 bubbles.
+
+   On Niagara-2 and Niagara-3, a not-taken branch costs 1 cycle whereas
+   a taken branch costs 6 cycles.  */
+
+#define BRANCH_COST(speed_p, predictable_p) \
+	((sparc_cpu == PROCESSOR_V9 \
+	  || sparc_cpu == PROCESSOR_ULTRASPARC) \
+	 ? 7 \
+         : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
+            ? 9 \
+	 : (sparc_cpu == PROCESSOR_NIAGARA \
+	    ? 4 \
+	 : ((sparc_cpu == PROCESSOR_NIAGARA2 \
+	     || sparc_cpu == PROCESSOR_NIAGARA3) \
+	    ? 5 \
+	 : 3))))
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+
+#define ASM_COMMENT_START "!"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",		\
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",		\
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",		\
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",		\
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",		\
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",		\
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",	\
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",	\
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",	\
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",	\
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",	\
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",	\
+ "%fcc0", "%fcc1", "%fcc2", "%fcc3", "%icc", "%sfp", "%gsr" }
+
+/* Define additional names for use in asm clobbers and asm declarations.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{{"ccr", SPARC_ICC_REG}, {"cc", SPARC_ICC_REG}}
+
+/* On Sun 4, this limit is 2048.  We use 1000 to be safe, since the length
+   can run past this up to a continuation point.  Once we used 1500, but
+   a single entry in C++ can run more than 500 bytes, due to the length of
+   mangled symbol names.  dbxout.c should really be fixed to do
+   continuations when they are actually needed instead of trying to
+   guess...  */
+#define DBX_CONTIN_LENGTH 1000
+
+/* This is how to output a command to make the user-level label named NAME
+   defined for reference from other files.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*%s%ld", (PREFIX), (long)(NUM))
+
+/* This is how we hook in and defer the case-vector until the end of
+   the function.  */
+#define ASM_OUTPUT_ADDR_VEC(LAB,VEC) \
+  sparc_defer_case_vector ((LAB),(VEC), 0)
+
+#define ASM_OUTPUT_ADDR_DIFF_VEC(LAB,VEC) \
+  sparc_defer_case_vector ((LAB),(VEC), 1)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+do {									\
+  char label[30];							\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);			\
+  if (CASE_VECTOR_MODE == SImode)					\
+    fprintf (FILE, "\t.word\t");					\
+  else									\
+    fprintf (FILE, "\t.xword\t");					\
+  assemble_name (FILE, label);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* This is how to output an element of a case-vector that is relative.
+   (SPARC uses such vectors only when generating PIC.)  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
+do {									\
+  char label[30];							\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));			\
+  if (CASE_VECTOR_MODE == SImode)					\
+    fprintf (FILE, "\t.word\t");					\
+  else									\
+    fprintf (FILE, "\t.xword\t");					\
+  assemble_name (FILE, label);						\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL));			\
+  fputc ('-', FILE);							\
+  assemble_name (FILE, label);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* This is what to output before and after case-vector (both
+   relative and absolute).  If .subsection -1 works, we put case-vectors
+   at the beginning of the current section.  */
+
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+
+#define ASM_OUTPUT_ADDR_VEC_START(FILE)					\
+  fprintf(FILE, "\t.subsection\t-1\n")
+
+#define ASM_OUTPUT_ADDR_VEC_END(FILE)					\
+  fprintf(FILE, "\t.previous\n")
+
+#endif
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (1<<(LOG)))
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.skip "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs ("\t.common ", (FILE)),		\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",\"bss\"\n", (SIZE)))
+
+/* This says how to output an assembler line to define a local common
+   symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGNED)		\
+( fputs ("\t.reserve ", (FILE)),					\
+  assemble_name ((FILE), (NAME)),					\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",\"bss\",%u\n",	\
+	   (SIZE), ((ALIGNED) / BITS_PER_UNIT)))
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)	\
+  do {								\
+    ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);		\
+  } while (0)
+
+/* Output #ident as a .ident.  */
+
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT default_asm_output_ident_directive
+
+/* Prettify the assembly.  */
+
+extern int sparc_indent_opcode;
+
+#define ASM_OUTPUT_OPCODE(FILE, PTR)	\
+  do {					\
+    if (sparc_indent_opcode)		\
+      {					\
+	putc (' ', FILE);		\
+	sparc_indent_opcode = 0;	\
+      }					\
+  } while (0)
+
+/* TLS support defaulting to original Sun flavor.  GNU extensions
+   must be activated in separate configuration files.  */
+#ifdef HAVE_AS_TLS
+#define TARGET_TLS 1
+#else
+#define TARGET_TLS 0
+#endif
+
+#define TARGET_SUN_TLS TARGET_TLS
+#define TARGET_GNU_TLS 0
+
+#ifdef HAVE_AS_FMAF_HPC_VIS3
+#define AS_NIAGARA3_FLAG "d"
+#else
+#define AS_NIAGARA3_FLAG "b"
+#endif
+
+#ifdef HAVE_AS_SPARC4
+#define AS_NIAGARA4_FLAG "-xarch=sparc4"
+#else
+#define AS_NIAGARA4_FLAG "-Av9" AS_NIAGARA3_FLAG
+#endif
+
+#ifdef HAVE_AS_LEON
+#define AS_LEON_FLAG "-Aleon"
+#else
+#define AS_LEON_FLAG "-Av8"
+#endif
+
+/* We use gcc _mcount for profiling.  */
+#define NO_PROFILE_COUNTERS 0
+
+/* Debug support */
+#define MASK_DEBUG_OPTIONS		0x01	/* debug option handling */
+#define MASK_DEBUG_ALL			MASK_DEBUG_OPTIONS
+
+#define TARGET_DEBUG_OPTIONS		(sparc_debug & MASK_DEBUG_OPTIONS)
+
+/* By default, use the weakest memory model for the cpu.  */
+#ifndef SUBTARGET_DEFAULT_MEMORY_MODEL
+#define SUBTARGET_DEFAULT_MEMORY_MODEL	SMM_DEFAULT
+#endif
+
+/* Define this to 1 if the FE_EXCEPT values defined in fenv.h start at 1.  */
+#define SPARC_LOW_FE_EXCEPT_VALUES 0
diff --git a/gcc-4.9/gcc/config/sparc/sparc.md b/gcc-4.9/gcc/config/sparc/sparc.md
new file mode 100644
index 000000000..8b6c647fc
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc.md
@@ -0,0 +1,9024 @@
+;; Machine description for SPARC chip for GCC
+;;  Copyright (C) 1987-2014 Free Software Foundation, Inc.
+;;  Contributed by Michael Tiemann (tiemann@cygnus.com)
+;;  64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+;;  at Cygnus Support.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+(define_c_enum "unspec" [
+  UNSPEC_MOVE_PIC
+  UNSPEC_UPDATE_RETURN
+  UNSPEC_LOAD_PCREL_SYM
+  UNSPEC_FRAME_BLOCKAGE
+  UNSPEC_MOVE_PIC_LABEL
+  UNSPEC_SETH44
+  UNSPEC_SETM44
+  UNSPEC_SETHH
+  UNSPEC_SETLM
+  UNSPEC_EMB_HISUM
+  UNSPEC_EMB_TEXTUHI
+  UNSPEC_EMB_TEXTHI
+  UNSPEC_EMB_TEXTULO
+  UNSPEC_EMB_SETHM
+  UNSPEC_MOVE_GOTDATA
+
+  UNSPEC_MEMBAR
+  UNSPEC_ATOMIC
+
+  UNSPEC_TLSGD
+  UNSPEC_TLSLDM
+  UNSPEC_TLSLDO
+  UNSPEC_TLSIE
+  UNSPEC_TLSLE
+  UNSPEC_TLSLD_BASE
+
+  UNSPEC_FPACK16
+  UNSPEC_FPACK32
+  UNSPEC_FPACKFIX
+  UNSPEC_FEXPAND
+  UNSPEC_MUL16AU
+  UNSPEC_MUL16AL
+  UNSPEC_MUL8UL
+  UNSPEC_MULDUL
+  UNSPEC_ALIGNDATA
+  UNSPEC_FCMP
+  UNSPEC_PDIST
+  UNSPEC_EDGE8
+  UNSPEC_EDGE8L
+  UNSPEC_EDGE16
+  UNSPEC_EDGE16L
+  UNSPEC_EDGE32
+  UNSPEC_EDGE32L
+  UNSPEC_ARRAY8
+  UNSPEC_ARRAY16
+  UNSPEC_ARRAY32
+
+  UNSPEC_SP_SET
+  UNSPEC_SP_TEST
+
+  UNSPEC_EDGE8N
+  UNSPEC_EDGE8LN
+  UNSPEC_EDGE16N
+  UNSPEC_EDGE16LN
+  UNSPEC_EDGE32N
+  UNSPEC_EDGE32LN
+  UNSPEC_BSHUFFLE
+  UNSPEC_CMASK8
+  UNSPEC_CMASK16
+  UNSPEC_CMASK32
+  UNSPEC_FCHKSM16
+  UNSPEC_PDISTN
+  UNSPEC_FUCMP
+  UNSPEC_FHADD
+  UNSPEC_FHSUB
+  UNSPEC_XMUL
+  UNSPEC_MUL8
+  UNSPEC_MUL8SU
+  UNSPEC_MULDSU
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_BLOCKAGE
+  UNSPECV_PROBE_STACK_RANGE
+
+  UNSPECV_FLUSHW
+  UNSPECV_SAVEW
+
+  UNSPECV_FLUSH
+
+  UNSPECV_LDSTUB
+  UNSPECV_SWAP
+  UNSPECV_CAS
+
+  UNSPECV_LDFSR
+  UNSPECV_STFSR
+])
+
+(define_constants
+ [(G0_REG			0)
+  (G1_REG			1)
+  (G2_REG			2)
+  (G3_REG			3)
+  (G4_REG			4)
+  (G5_REG			5)
+  (G6_REG			6)
+  (G7_REG			7)
+  (O0_REG			8)
+  (O1_REG			9)
+  (O2_REG			10)
+  (O3_REG			11)
+  (O4_REG			12)
+  (O5_REG			13)
+  (O6_REG			14)
+  (O7_REG			15)
+  (L0_REG			16)
+  (L1_REG			17)
+  (L2_REG			18)
+  (L3_REG			19)
+  (L4_REG			20)
+  (L5_REG			21)
+  (L6_REG			22)
+  (L7_REG			23)
+  (I0_REG			24)
+  (I1_REG			25)
+  (I2_REG			26)
+  (I3_REG			27)
+  (I4_REG			28)
+  (I5_REG			29)
+  (I6_REG			30)
+  (I7_REG			31)
+  (F0_REG			32)
+  (F1_REG			33)
+  (F2_REG			34)
+  (F3_REG			35)
+  (F4_REG			36)
+  (F5_REG			37)
+  (F6_REG			38)
+  (F7_REG			39)
+  (F8_REG			40)
+  (F9_REG			41)
+  (F10_REG			42)
+  (F11_REG			43)
+  (F12_REG			44)
+  (F13_REG			45)
+  (F14_REG			46)
+  (F15_REG			47)
+  (F16_REG			48)
+  (F17_REG			49)
+  (F18_REG			50)
+  (F19_REG			51)
+  (F20_REG			52)
+  (F21_REG			53)
+  (F22_REG			54)
+  (F23_REG			55)
+  (F24_REG			56)
+  (F25_REG			57)
+  (F26_REG			58)
+  (F27_REG			59)
+  (F28_REG			60)
+  (F29_REG			61)
+  (F30_REG			62)
+  (F31_REG			63)
+  (F32_REG			64)
+  (F34_REG			66)
+  (F36_REG			68)
+  (F38_REG			70)
+  (F40_REG			72)
+  (F42_REG			74)
+  (F44_REG			76)
+  (F46_REG			78)
+  (F48_REG			80)
+  (F50_REG			82)
+  (F52_REG			84)
+  (F54_REG			86)
+  (F56_REG			88)
+  (F58_REG			90)
+  (F60_REG			92)
+  (F62_REG			94)
+  (FCC0_REG			96)
+  (FCC1_REG			97)
+  (FCC2_REG			98)
+  (FCC3_REG			99)
+  (CC_REG			100)
+  (SFP_REG			101)
+  (GSR_REG			102)
+ ])
+
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+(define_mode_iterator I [QI HI SI DI])
+(define_mode_iterator F [SF DF TF])
+
+;; The upper 32 fp regs on the v9 can't hold SFmode values.  To deal with this
+;; a second register class, EXTRA_FP_REGS, exists for the v9 chip.  The name
+;; is a bit of a misnomer as it covers all 64 fp regs.  The corresponding
+;; constraint letter is 'e'.  To avoid any confusion, 'e' is used instead of
+;; 'f' for all DF/TFmode values, including those that are specific to the v8.
+
+;; Attribute for cpu type.
+;; These must match the values of the enum processor_type in sparc-opts.h.
+(define_attr "cpu"
+  "v7,
+   cypress,
+   v8,
+   supersparc,
+   hypersparc,
+   leon,
+   leon3,
+   sparclite,
+   f930,
+   f934,
+   sparclite86x,
+   sparclet,
+   tsc701,
+   v9,
+   ultrasparc,
+   ultrasparc3,
+   niagara,
+   niagara2,
+   niagara3,
+   niagara4"
+  (const (symbol_ref "sparc_cpu_attr")))
+
+;; Attribute for the instruction set.
+;; At present we only need to distinguish v9/!v9, but for clarity we
+;; test TARGET_V8 too.
+(define_attr "isa" "v7,v8,v9,sparclet"
+ (const
+  (cond [(symbol_ref "TARGET_V9") (const_string "v9")
+	 (symbol_ref "TARGET_V8") (const_string "v8")
+	 (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")]
+	(const_string "v7"))))
+
+(define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3" (const_string "none"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "cpu_feature" "none") (const_int 1)
+         (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU")
+	 (eq_attr "cpu_feature" "fpunotv9") (symbol_ref "TARGET_FPU && ! TARGET_V9")
+         (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9")
+         (eq_attr "cpu_feature" "vis") (symbol_ref "TARGET_VIS")
+         (eq_attr "cpu_feature" "vis3") (symbol_ref "TARGET_VIS3")]
+        (const_int 0)))
+
+;; Insn type.
+(define_attr "type"
+  "ialu,compare,shift,
+   load,sload,store,
+   uncond_branch,branch,call,sibcall,call_no_delay_slot,return,
+   cbcond,uncond_cbcond,
+   imul,idiv,
+   fpload,fpstore,
+   fp,fpmove,
+   fpcmove,fpcrmove,
+   fpcmp,
+   fpmul,fpdivs,fpdivd,
+   fpsqrts,fpsqrtd,
+   fga,visl,vismv,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array,
+   cmove,
+   ialuX,
+   multi,savew,flushw,iflush,trap"
+  (const_string "ialu"))
+
+;; True if branch/call has empty delay slot and will emit a nop in it
+(define_attr "empty_delay_slot" "false,true"
+  (symbol_ref "(empty_delay_slot (insn)
+		? EMPTY_DELAY_SLOT_TRUE : EMPTY_DELAY_SLOT_FALSE)"))
+
+;; True if we are making use of compare-and-branch instructions.
+;; True if we should emit a nop after a cbcond instruction
+(define_attr "emit_cbcond_nop" "false,true"
+  (symbol_ref "(emit_cbcond_nop (insn)
+                ? EMIT_CBCOND_NOP_TRUE : EMIT_CBCOND_NOP_FALSE)"))
+
+(define_attr "branch_type" "none,icc,fcc,reg"
+  (const_string "none"))
+
+(define_attr "pic" "false,true"
+  (symbol_ref "(flag_pic != 0
+		? PIC_TRUE : PIC_FALSE)"))
+
+(define_attr "calls_alloca" "false,true"
+  (symbol_ref "(cfun->calls_alloca != 0
+		? CALLS_ALLOCA_TRUE : CALLS_ALLOCA_FALSE)"))
+
+(define_attr "calls_eh_return" "false,true"
+   (symbol_ref "(crtl->calls_eh_return != 0
+		 ? CALLS_EH_RETURN_TRUE : CALLS_EH_RETURN_FALSE)"))
+
+(define_attr "leaf_function" "false,true"
+  (symbol_ref "(crtl->uses_only_leaf_regs != 0
+		? LEAF_FUNCTION_TRUE : LEAF_FUNCTION_FALSE)"))
+
+(define_attr "delayed_branch" "false,true"
+  (symbol_ref "(flag_delayed_branch != 0
+		? DELAYED_BRANCH_TRUE : DELAYED_BRANCH_FALSE)"))
+
+(define_attr "flat" "false,true"
+  (symbol_ref "(TARGET_FLAT != 0
+		? FLAT_TRUE : FLAT_FALSE)"))
+
+(define_attr "fix_ut699" "false,true"
+   (symbol_ref "(sparc_fix_ut699 != 0
+		 ? FIX_UT699_TRUE : FIX_UT699_FALSE)"))
+
+;; Length (in # of insns).
+;; Beware that setting a length greater or equal to 3 for conditional branches
+;; has a side-effect (see output_cbranch and output_v9branch).
+(define_attr "length" ""
+  (cond [(eq_attr "type" "uncond_branch,call")
+	   (if_then_else (eq_attr "empty_delay_slot" "true")
+	     (const_int 2)
+	     (const_int 1))
+	 (eq_attr "type" "sibcall")
+	   (if_then_else (eq_attr "leaf_function" "true")
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (const_int 3)
+	       (const_int 2))
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (const_int 2)
+	       (const_int 1)))
+	 (eq_attr "branch_type" "icc")
+	   (if_then_else (match_operand 0 "noov_compare64_operator" "")
+	     (if_then_else (lt (pc) (match_dup 1))
+	       (if_then_else (lt (minus (match_dup 1) (pc)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3)))
+	       (if_then_else (lt (minus (pc) (match_dup 1)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3))))
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (const_int 2)
+	       (const_int 1)))
+	 (eq_attr "branch_type" "fcc")
+	   (if_then_else (match_operand 0 "fcc0_register_operand" "")
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (if_then_else (not (match_test "TARGET_V9"))
+		 (const_int 3)
+		 (const_int 2))
+	       (if_then_else (not (match_test "TARGET_V9"))
+		 (const_int 2)
+		 (const_int 1)))
+	     (if_then_else (lt (pc) (match_dup 2))
+	       (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3)))
+	       (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3)))))
+	 (eq_attr "branch_type" "reg")
+	   (if_then_else (lt (pc) (match_dup 2))
+	     (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 32000))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 2)
+		 (const_int 1))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 4)
+		 (const_int 3)))
+	     (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 32000))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 2)
+		 (const_int 1))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 4)
+		 (const_int 3))))
+         (eq_attr "type" "cbcond")
+	   (if_then_else (lt (pc) (match_dup 3))
+	     (if_then_else (lt (minus (match_dup 3) (pc)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 4))
+	     (if_then_else (lt (minus (pc) (match_dup 3)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 4)))
+         (eq_attr "type" "uncond_cbcond")
+	   (if_then_else (lt (pc) (match_dup 0))
+	     (if_then_else (lt (minus (match_dup 0) (pc)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 1))
+	     (if_then_else (lt (minus (pc) (match_dup 0)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 1)))
+	 ] (const_int 1)))
+
+;; FP precision.
+(define_attr "fptype" "single,double"
+  (const_string "single"))
+
+;; UltraSPARC-III integer load type.
+(define_attr "us3load_type" "2cycle,3cycle"
+  (const_string "2cycle"))
+
+(define_asm_attributes
+  [(set_attr "length" "2")
+   (set_attr "type" "multi")])
+
+;; Attributes for branch scheduling
+(define_attr "in_call_delay" "false,true"
+  (symbol_ref "(eligible_for_call_delay (insn)
+		? IN_CALL_DELAY_TRUE : IN_CALL_DELAY_FALSE)"))
+
+(define_attr "in_sibcall_delay" "false,true"
+  (symbol_ref "(eligible_for_sibcall_delay (insn)
+		? IN_SIBCALL_DELAY_TRUE : IN_SIBCALL_DELAY_FALSE)"))
+
+(define_attr "in_return_delay" "false,true"
+  (symbol_ref "(eligible_for_return_delay (insn)
+		? IN_RETURN_DELAY_TRUE : IN_RETURN_DELAY_FALSE)"))
+
+;; ??? !v9: Should implement the notion of predelay slots for floating-point
+;; branches.  This would allow us to remove the nop always inserted before
+;; a floating point branch.
+
+;; ??? It is OK for fill_simple_delay_slots to put load/store instructions
+;; in a delay slot, but it is not OK for fill_eager_delay_slots to do so.
+;; This is because doing so will add several pipeline stalls to the path
+;; that the load/store did not come from.  Unfortunately, there is no way
+;; to prevent fill_eager_delay_slots from using load/store without completely
+;; disabling them.  For the SPEC benchmark set, this is a serious lose,
+;; because it prevents us from moving back the final store of inner loops.
+
+(define_attr "in_branch_delay" "false,true"
+  (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
+	   (const_string "false")
+	 (and (eq_attr "fix_ut699" "true") (eq_attr "type" "load,sload"))
+	   (const_string "false")
+	 (and (eq_attr "fix_ut699" "true")
+	      (and (eq_attr "type" "fpload,fp,fpmove,fpmul,fpdivs,fpsqrts")
+		   (eq_attr "fptype" "single")))
+	   (const_string "false")
+	 (eq_attr "length" "1")
+	   (const_string "true")
+	] (const_string "false")))
+
+(define_delay (eq_attr "type" "call")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "sibcall")
+  [(eq_attr "in_sibcall_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "return")
+  [(eq_attr "in_return_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "branch")
+  [(eq_attr "in_branch_delay" "true") (nil) (eq_attr "in_branch_delay" "true")])
+
+(define_delay (eq_attr "type" "uncond_branch")
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
+
+
+;; Include SPARC DFA schedulers
+
+(include "cypress.md")
+(include "supersparc.md")
+(include "hypersparc.md")
+(include "leon.md")
+(include "sparclet.md")
+(include "ultra1_2.md")
+(include "ultra3.md")
+(include "niagara.md")
+(include "niagara2.md")
+(include "niagara4.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare instructions.
+
+;; These are just the DEFINE_INSNs to match the patterns and the
+;; DEFINE_SPLITs for some of the scc insns that actually require
+;; more than one machine instruction.  DEFINE_EXPANDs are further down.
+
+;; The compare DEFINE_INSNs.
+
+(define_insn "*cmpsi_insn"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 0 "register_operand" "r")
+		    (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "cmp\t%0, %1"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmpdi_sp64"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (match_operand:DI 0 "register_operand" "r")
+		     (match_operand:DI 1 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "cmp\t%0, %1"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmpsf_fpe"
+  [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
+	(compare:CCFPE (match_operand:SF 1 "register_operand" "f")
+		       (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmpes\t%0, %1, %2";
+  return "fcmpes\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+(define_insn "*cmpdf_fpe"
+  [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
+	(compare:CCFPE (match_operand:DF 1 "register_operand" "e")
+		       (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmped\t%0, %1, %2";
+  return "fcmped\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")
+   (set_attr "fptype" "double")])
+
+(define_insn "*cmptf_fpe"
+  [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
+	(compare:CCFPE (match_operand:TF 1 "register_operand" "e")
+		       (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+{
+  if (TARGET_V9)
+    return "fcmpeq\t%0, %1, %2";
+  return "fcmpeq\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+(define_insn "*cmpsf_fp"
+  [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
+	(compare:CCFP (match_operand:SF 1 "register_operand" "f")
+		      (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmps\t%0, %1, %2";
+  return "fcmps\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+(define_insn "*cmpdf_fp"
+  [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
+	(compare:CCFP (match_operand:DF 1 "register_operand" "e")
+		      (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmpd\t%0, %1, %2";
+  return "fcmpd\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")
+   (set_attr "fptype" "double")])
+
+(define_insn "*cmptf_fp"
+  [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
+	(compare:CCFP (match_operand:TF 1 "register_operand" "e")
+		      (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+{
+  if (TARGET_V9)
+    return "fcmpq\t%0, %1, %2";
+  return "fcmpq\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+;; Next come the scc insns.
+
+;; Note that the boolean result (operand 0) takes on DImode
+;; (not SImode) when TARGET_ARCH64.
+
+(define_expand "cstoresi4"
+  [(use (match_operator 1 "comparison_operator"
+         [(match_operand:SI 2 "compare_operand" "")
+          (match_operand:SI 3 "arith_operand" "")]))
+   (clobber (match_operand:SI 0 "cstore_result_operand"))]
+  ""
+{
+  if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx)
+    operands[2] = force_reg (SImode, operands[2]);
+  if (emit_scc_insn (operands)) DONE; else FAIL;
+})
+
+(define_expand "cstoredi4"
+  [(use (match_operator 1 "comparison_operator"
+         [(match_operand:DI 2 "compare_operand" "")
+          (match_operand:DI 3 "arith_operand" "")]))
+   (clobber (match_operand:SI 0 "cstore_result_operand"))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx)
+    operands[2] = force_reg (DImode, operands[2]);
+  if (emit_scc_insn (operands)) DONE; else FAIL;
+})
+
+(define_expand "cstore<F:mode>4"
+  [(use (match_operator 1 "comparison_operator"
+         [(match_operand:F 2 "register_operand" "")
+          (match_operand:F 3 "register_operand" "")]))
+   (clobber (match_operand:SI 0 "cstore_result_operand"))]
+  "TARGET_FPU"
+  { if (emit_scc_insn (operands)) DONE; else FAIL; })
+
+
+
+;; Seq_special[_xxx] and sne_special[_xxx] clobber the CC reg, because they
+;; generate addcc/subcc instructions.
+
+(define_expand "seqsi<P:mode>_special"
+  [(set (match_dup 3)
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:P 0 "register_operand" "")
+		   (eq:P (match_dup 3) (const_int 0)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  { operands[3] = gen_reg_rtx (SImode); })
+
+(define_expand "seqdi_special"
+  [(set (match_dup 3)
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(eq:DI (match_dup 3) (const_int 0)))]
+  "TARGET_ARCH64"
+  { operands[3] = gen_reg_rtx (DImode); })
+
+(define_expand "snesi<P:mode>_special"
+  [(set (match_dup 3)
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:P 0 "register_operand" "")
+		   (ne:P (match_dup 3) (const_int 0)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  { operands[3] = gen_reg_rtx (SImode); })
+
+(define_expand "snedi_special"
+  [(set (match_dup 3)
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 3) (const_int 0)))]
+  "TARGET_ARCH64 && ! TARGET_VIS3"
+  { operands[3] = gen_reg_rtx (DImode); })
+
+(define_expand "snedi_special_vis3"
+  [(set (match_dup 3)
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (ne:DI (match_dup 3) (const_int 0)))
+	      (clobber (reg:CCX CC_REG))])]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  { operands[3] = gen_reg_rtx (DImode); })
+
+
+;; Now the DEFINE_INSNs for the scc cases.
+
+;; The SEQ and SNE patterns are special because they can be done
+;; without any branching and do not involve a COMPARE.  We want
+;; them to always use the splits below so the results can be
+;; scheduled.
+
+(define_insn_and_split "*snesi<P:mode>_zero"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(ne:P (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (ltu:P (reg:CC CC_REG) (const_int 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_snesisi_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (ne:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (neg:SI (ltu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_snesidi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (ne:DI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_ARCH64"
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (sign_extend:DI (neg:SI (ltu:SI (reg:CC CC_REG)
+                                                      (const_int 0)))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snedi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (ne:DI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64 && ! TARGET_VIS3"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snedi_zero_vis3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ne:DI (match_operand:DI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CCX CC_REG))]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  "#"
+  ""
+  [(set (reg:CCX_NOOV CC_REG) (compare:CCX_NOOV (neg:DI (match_dup 1))
+					        (const_int 0)))
+   (set (match_dup 0) (ltu:DI (reg:CCX CC_REG) (const_int 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_snedi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (neg:DI (ne:DI (match_operand:DI 1 "register_operand" "r")
+                       (const_int 0))))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int -1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snedi_zero_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (ne:SI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64 && ! TARGET_VIS3"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:SI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snedi_zero_trunc_vis3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:DI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CCX CC_REG))]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  "#"
+  ""
+  [(set (reg:CCX_NOOV CC_REG) (compare:CCX_NOOV (neg:DI (match_dup 1))
+					        (const_int 0)))
+   (set (match_dup 0) (ltu:SI (reg:CCX CC_REG) (const_int 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*seqsi<P:mode>_zero"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(eq:P (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (geu:P (reg:CC CC_REG) (const_int 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_seqsisi_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (eq:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (neg:SI (geu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_seqsidi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (eq:DI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_ARCH64"
+  "#"
+  "&& 1"
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (sign_extend:DI (neg:SI (geu:SI (reg:CC CC_REG)
+                                                      (const_int 0)))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*seqdi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (eq:DI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_seqdi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (neg:DI (eq:DI (match_operand:DI 1 "register_operand" "r")
+                       (const_int 0))))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int -1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")]) 
+
+(define_insn_and_split "*seqdi_zero_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (eq:SI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:SI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+;; We can also do (x + (i == 0)) and related, so put them in.
+;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
+;; versions for v9.
+
+(define_insn_and_split "*x_plus_i_ne_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (ne:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 0))
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (plus:SI (ltu:SI (reg:CC CC_REG) (const_int 0))
+			       (match_dup 2)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*x_minus_i_ne_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 2 "register_operand" "r")
+		  (ne:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (minus:SI (match_dup 2)
+				(ltu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*x_plus_i_eq_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (eq:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 0))
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (plus:SI (geu:SI (reg:CC CC_REG) (const_int 0))
+			       (match_dup 2)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*x_minus_i_eq_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 2 "register_operand" "r")
+		  (eq:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV CC_REG) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (minus:SI (match_dup 2)
+				(geu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+;; We can also do GEU and LTU directly, but these operate after a compare.
+;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
+;; versions for v9.
+
+(define_insn "*sltu<P:mode>_insn"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(ltu:P (reg:CC CC_REG) (const_int 0)))]
+  ""
+  "addx\t%%g0, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sltu_insn_vis3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ltu:DI (reg:CCX CC_REG) (const_int 0)))]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  "addxc\t%%g0, %%g0, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sltu_insn_vis3_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ltu:SI (reg:CCX CC_REG) (const_int 0)))]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  "addxc\t%%g0, %%g0, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sltusi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (ltu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  "subx\t%%g0, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sltudi_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (neg:SI (ltu:SI (reg:CC CC_REG) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "subx\t%%g0, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sltu_minus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (neg:SI (ltu:SI (reg:CC CC_REG) (const_int 0)))
+		  (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "subx\t%%g0, %1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sltu_plus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (plus:SI (ltu:SI (reg:CC CC_REG) (const_int 0))
+			 (match_operand:SI 1 "arith_operand" "rI"))))]
+  ""
+  "subx\t%%g0, %1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sgeu<P:mode>_insn"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(geu:P (reg:CC CC_REG) (const_int 0)))]
+  ""
+  "subx\t%%g0, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sgeusi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (geu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  "addx\t%%g0, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sgeudi_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (neg:SI (geu:SI (reg:CC CC_REG) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "addx\t%%g0, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+;; We can also do (x + ((unsigned) i >= 0)) and related, so put them in.
+;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
+;; versions for v9.
+
+(define_insn "*sltu_plus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (ltu:SI (reg:CC CC_REG) (const_int 0))
+		 (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "addx\t%%g0, %1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sltu_plus_x_plus_y"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (ltu:SI (reg:CC CC_REG) (const_int 0))
+		 (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))))]
+  ""
+  "addx\t%1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*x_minus_sltu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (ltu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  "subx\t%1, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+;; ??? Combine should canonicalize these next two to the same pattern.
+(define_insn "*x_minus_y_minus_sltu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		  (ltu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*x_minus_sltu_plus_y"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+		  (plus:SI (ltu:SI (reg:CC CC_REG) (const_int 0))
+			   (match_operand:SI 2 "arith_operand" "rI"))))]
+  ""
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sgeu_plus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (geu:SI (reg:CC CC_REG) (const_int 0))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "subx\t%1, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*x_minus_sgeu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (geu:SI (reg:CC CC_REG) (const_int 0))))]
+  ""
+  "addx\t%1, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 2 "noov_compare_operator"
+			   [(match_operand 1 "icc_or_fcc_register_operand" "")
+			    (const_int 0)]))]
+  "TARGET_V9
+   && REGNO (operands[1]) == SPARC_ICC_REG
+   && (GET_MODE (operands[1]) == CCXmode
+       /* 32-bit LTU/GEU are better implemented using addx/subx.  */
+       || (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0)
+	(if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)])
+			 (const_int 1)
+			 (match_dup 0)))]
+  "")
+
+
+;; These control RTL generation for conditional jump insns
+
+(define_expand "cbranchcc4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		          [(match_operand 1 "compare_operand" "")
+		           (match_operand 2 "const_zero_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cbranchsi4"
+  [(use (match_operator 0 "comparison_operator"
+         [(match_operand:SI 1 "compare_operand" "")
+          (match_operand:SI 2 "arith_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+{
+  if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx)
+    operands[1] = force_reg (SImode, operands[1]);
+  emit_conditional_branch_insn (operands);
+  DONE;
+})
+
+(define_expand "cbranchdi4"
+  [(use (match_operator 0 "comparison_operator"
+         [(match_operand:DI 1 "compare_operand" "")
+          (match_operand:DI 2 "arith_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx)
+    operands[1] = force_reg (DImode, operands[1]);
+  emit_conditional_branch_insn (operands);
+  DONE;
+})
+
+(define_expand "cbranch<F:mode>4"
+  [(use (match_operator 0 "comparison_operator"
+         [(match_operand:F 1 "register_operand" "")
+          (match_operand:F 2 "register_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_FPU"
+  { emit_conditional_branch_insn (operands); DONE; })
+
+
+;; Now match both normal and inverted jump.
+
+;; XXX fpcmp nop braindamage
+(define_insn "*normal_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "noov_compare_operator"
+				      [(reg CC_REG) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+{
+  return output_cbranch (operands[0], operands[1], 1, 0,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "icc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*inverted_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "noov_compare_operator"
+				      [(reg CC_REG) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+{
+  return output_cbranch (operands[0], operands[1], 1, 1,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "icc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*normal_fp_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFP 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 0,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*inverted_fp_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFP 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 1,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*normal_fpe_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFPE 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 0,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*inverted_fpe_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFPE 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 1,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; SPARC V9-specific jump insns.  None of these are guaranteed to be
+;; in the architecture.
+
+(define_insn "*cbcond_sp32"
+  [(set (pc)
+        (if_then_else (match_operator 0 "noov_compare_operator"
+                       [(match_operand:SI 1 "register_operand" "r")
+                        (match_operand:SI 2 "arith5_operand" "rA")])
+                      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_CBCOND"
+{
+  return output_cbcond (operands[0], operands[3], insn);
+}
+  [(set_attr "type" "cbcond")])
+
+(define_insn "*cbcond_sp64"
+  [(set (pc)
+        (if_then_else (match_operator 0 "noov_compare_operator"
+                       [(match_operand:DI 1 "register_operand" "r")
+                        (match_operand:DI 2 "arith5_operand" "rA")])
+                      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_ARCH64 && TARGET_CBCOND"
+{
+  return output_cbcond (operands[0], operands[3], insn);
+}
+  [(set_attr "type" "cbcond")])
+
+;; There are no 32 bit brreg insns.
+
+;; XXX
+(define_insn "*normal_int_branch_sp64"
+  [(set (pc)
+	(if_then_else (match_operator 0 "v9_register_compare_operator"
+				      [(match_operand:DI 1 "register_operand" "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_ARCH64"
+{
+  return output_v9branch (operands[0], operands[2], 1, 2, 0,
+			  final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			  insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "reg")])
+
+;; XXX
+(define_insn "*inverted_int_branch_sp64"
+  [(set (pc)
+	(if_then_else (match_operator 0 "v9_register_compare_operator"
+				      [(match_operand:DI 1 "register_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  "TARGET_ARCH64"
+{
+  return output_v9branch (operands[0], operands[2], 1, 2, 1,
+			  final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			  insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "reg")])
+
+
+;; Load in operand 0 the (absolute) address of operand 1, which is a symbolic
+;; value subject to a PC-relative relocation.  Operand 2 is a helper function
+;; that adds the PC value at the call point to register #(operand 3).
+;;
+;; Even on V9 we use this call sequence with a stub, instead of "rd %pc, ..."
+;; because the RDPC instruction is extremely expensive and incurs a complete
+;; instruction pipeline flush.
+
+(define_insn "load_pcrel_sym<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand:P 1 "symbolic_operand" "")
+		   (match_operand:P 2 "call_address_operand" "")
+		   (match_operand:P 3 "const_int_operand" "")] UNSPEC_LOAD_PCREL_SYM))
+   (clobber (reg:P O7_REG))]
+  "REGNO (operands[0]) == INTVAL (operands[3])"
+{
+  if (flag_delayed_branch)
+    return "sethi\t%%hi(%a1-4), %0\n\tcall\t%a2\n\t add\t%0, %%lo(%a1+4), %0";
+  else
+    return "sethi\t%%hi(%a1-8), %0\n\tadd\t%0, %%lo(%a1-4), %0\n\tcall\t%a2\n\t nop";
+}
+  [(set (attr "type") (const_string "multi"))
+   (set (attr "length")
+	(if_then_else (eq_attr "delayed_branch" "true")
+		      (const_int 3)
+		      (const_int 4)))])
+
+
+;; Integer move instructions
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (QImode, operands))
+    DONE;
+})
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:QI 1 "input_operand"   "rI,m,rJ"))]
+  "(register_operand (operands[0], QImode)
+    || register_or_zero_operand (operands[1], QImode))"
+  "@
+   mov\t%1, %0
+   ldub\t%1, %0
+   stb\t%r1, %0"
+  [(set_attr "type" "*,load,store")
+   (set_attr "us3load_type" "*,3cycle,*")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (HImode, operands))
+    DONE;
+})
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "input_operand"   "rI,K,m,rJ"))]
+  "(register_operand (operands[0], HImode)
+    || register_or_zero_operand (operands[1], HImode))"
+  "@
+   mov\t%1, %0
+   sethi\t%%hi(%a1), %0
+   lduh\t%1, %0
+   sth\t%r1, %0"
+  [(set_attr "type" "*,*,load,store")
+   (set_attr "us3load_type" "*,*,3cycle,*")])
+
+;; We always work with constants here.
+(define_insn "*movhi_lo_sum"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%r")
+                (match_operand:HI 2 "small_int_operand" "I")))]
+  ""
+  "or\t%1, %2, %0")
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (SImode, operands))
+    DONE;
+})
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m, r,*f,*f,*f, m,d,d")
+	(match_operand:SI 1 "input_operand"        "rI,K,m,rJ,*f, r, f, m,*f,J,P"))]
+  "register_operand (operands[0], SImode)
+   || register_or_zero_or_all_ones_operand (operands[1], SImode)"
+  "@
+   mov\t%1, %0
+   sethi\t%%hi(%a1), %0
+   ld\t%1, %0
+   st\t%r1, %0
+   movstouw\t%1, %0
+   movwtos\t%1, %0
+   fmovs\t%1, %0
+   ld\t%1, %0
+   st\t%1, %0
+   fzeros\t%0
+   fones\t%0"
+  [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
+   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
+
+(define_insn "*movsi_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "immediate_operand" "in")))]
+  ""
+  "or\t%1, %%lo(%a2), %0")
+
+(define_insn "*movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" "in")))]
+  ""
+  "sethi\t%%hi(%a1), %0")
+
+;; The next two patterns must wrap the SYMBOL_REF in an UNSPEC
+;; so that CSE won't optimize the address computation away.
+(define_insn "movsi_lo_sum_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+                   (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] UNSPEC_MOVE_PIC)))]
+  "flag_pic"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "xor\t%1, %%gdop_lox10(%a2), %0";
+#else
+  return "or\t%1, %%lo(%a2), %0";
+#endif
+})
+
+(define_insn "movsi_high_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))]
+  "flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "sethi\t%%gdop_hix22(%a1), %0";
+#else
+  return "sethi\t%%hi(%a1), %0";
+#endif
+})
+
+(define_insn "movsi_pic_gotdata_op"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+	            (match_operand:SI 2 "register_operand" "r")
+		    (match_operand 3 "symbolic_operand" "")] UNSPEC_MOVE_GOTDATA))]
+  "flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "ld\t[%1 + %2], %0, %%gdop(%a3)";
+#else
+  return "ld\t[%1 + %2], %0";
+#endif
+}
+  [(set_attr "type" "load")])
+
+(define_expand "movsi_pic_label_ref"
+  [(set (match_dup 3) (high:SI
+     (unspec:SI [(match_operand:SI 1 "label_ref_operand" "")
+		 (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_dup 4) (lo_sum:SI (match_dup 3)
+     (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 5) (match_dup 4)))]
+  "flag_pic"
+{
+  crtl->uses_pic_offset_table = 1;
+  operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  if (!can_create_pseudo_p ())
+    {
+      operands[3] = operands[0];
+      operands[4] = operands[0];
+    }
+  else
+    {
+      operands[3] = gen_reg_rtx (SImode);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_insn "*movsi_high_pic_label_ref"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (high:SI
+        (unspec:SI [(match_operand:SI 1 "label_ref_operand" "")
+		    (match_operand:SI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "flag_pic"
+  "sethi\t%%hi(%a2-(%a1-.)), %0")
+
+(define_insn "*movsi_lo_sum_pic_label_ref"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 2 "label_ref_operand" "")
+		    (match_operand:SI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "flag_pic"
+  "or\t%1, %%lo(%a3-(%a2-.)), %0")
+
+;; Set up the PIC register for VxWorks.
+
+(define_expand "vxworks_load_got"
+  [(set (match_dup 0)
+	(high:SI (match_dup 1)))
+   (set (match_dup 0)
+	(mem:SI (lo_sum:SI (match_dup 0) (match_dup 1))))
+   (set (match_dup 0)
+	(mem:SI (lo_sum:SI (match_dup 0) (match_dup 2))))]
+  "TARGET_VXWORKS_RTP"
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_BASE);
+  operands[2] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_INDEX);
+})
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (DImode, operands))
+    DONE;
+})
+
+;; Be careful, fmovd does not exist when !v9.
+;; We match MEM moves directly when we have correct even
+;; numbered registers, but fall into splits otherwise.
+;; The constraint ordering here is really important to
+;; avoid insane problems in reload, especially for patterns
+;; of the form:
+;;
+;; (set (mem:DI (plus:SI (reg:SI 30 %fp)
+;;                       (const_int -5016)))
+;;      (reg:DI 2 %g2))
+;;
+
+(define_insn "*movdi_insn_sp32"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+					"=T,o,T,U,o,r,r,r,?T,?*f,?*f,?o,?*e,?*e,  r,?*f,?*e,?W,b,b")
+        (match_operand:DI 1 "input_operand"
+					" J,J,U,T,r,o,i,r,*f,  T,  o,*f, *e, *e,?*f,  r,  W,*e,J,P"))]
+  "! TARGET_ARCH64
+   && (register_operand (operands[0], DImode)
+       || register_or_zero_operand (operands[1], DImode))"
+  "@
+   stx\t%%g0, %0
+   #
+   std\t%1, %0
+   ldd\t%1, %0
+   #
+   #
+   #
+   #
+   std\t%1, %0
+   ldd\t%1, %0
+   #
+   #
+   fmovd\t%1, %0
+   #
+   #
+   #
+   ldd\t%1, %0
+   std\t%1, %0
+   fzero\t%0
+   fone\t%0"
+  [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
+   (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
+   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
+   (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")])
+
+(define_insn "*movdi_insn_sp64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,?W,b,b")
+        (match_operand:DI 1 "input_operand"        "rI,N,m,rJ,*e, r, *e,  W,*e,J,P"))]
+  "TARGET_ARCH64
+   && (register_operand (operands[0], DImode)
+       || register_or_zero_or_all_ones_operand (operands[1], DImode))"
+  "@
+   mov\t%1, %0
+   sethi\t%%hi(%a1), %0
+   ldx\t%1, %0
+   stx\t%r1, %0
+   movdtox\t%1, %0
+   movxtod\t%1, %0
+   fmovd\t%1, %0
+   ldd\t%1, %0
+   std\t%1, %0
+   fzero\t%0
+   fone\t%0"
+  [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
+   (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
+   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
+
+(define_expand "movdi_pic_label_ref"
+  [(set (match_dup 3) (high:DI
+     (unspec:DI [(match_operand:DI 1 "label_ref_operand" "")
+                 (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_dup 4) (lo_sum:DI (match_dup 3)
+     (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (minus:DI (match_dup 5) (match_dup 4)))]
+  "TARGET_ARCH64 && flag_pic"
+{
+  crtl->uses_pic_offset_table = 1;
+  operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  if (!can_create_pseudo_p ())
+    {
+      operands[3] = operands[0];
+      operands[4] = operands[0];
+    }
+  else
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      operands[4] = gen_reg_rtx (DImode);
+    }
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi_high_pic_label_ref"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI
+          (unspec:DI [(match_operand:DI 1 "label_ref_operand" "")
+                      (match_operand:DI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "TARGET_ARCH64 && flag_pic"
+  "sethi\t%%hi(%a2-(%a1-.)), %0")
+
+(define_insn "*movdi_lo_sum_pic_label_ref"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 2 "label_ref_operand" "")
+                    (match_operand:DI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "TARGET_ARCH64 && flag_pic"
+  "or\t%1, %%lo(%a3-(%a2-.)), %0")
+
+;; SPARC-v9 code model support insns.  See sparc_emit_set_symbolic_const64
+;; in sparc.c to see what is going on here... PIC stuff comes first.
+
+(define_insn "movdi_lo_sum_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "immediate_operand" "in")] UNSPEC_MOVE_PIC)))]
+  "TARGET_ARCH64 && flag_pic"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "xor\t%1, %%gdop_lox10(%a2), %0";
+#else
+  return "or\t%1, %%lo(%a2), %0";
+#endif
+})
+
+(define_insn "movdi_high_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))]
+  "TARGET_ARCH64 && flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "sethi\t%%gdop_hix22(%a1), %0";
+#else
+  return "sethi\t%%hi(%a1), %0";
+#endif
+})
+
+(define_insn "movdi_pic_gotdata_op"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+	            (match_operand:DI 2 "register_operand" "r")
+		    (match_operand 3 "symbolic_operand" "")] UNSPEC_MOVE_GOTDATA))]
+  "TARGET_ARCH64 && flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "ldx\t[%1 + %2], %0, %%gdop(%a3)";
+#else
+  return "ldx\t[%1 + %2], %0";
+#endif
+}
+  [(set_attr "type" "load")])
+
+(define_insn "*sethi_di_medlow_embmedany_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (match_operand:DI 1 "medium_pic_operand" "")))]
+  "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "*sethi_di_medlow"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (match_operand:DI 1 "symbolic_operand" "")))]
+  "TARGET_CM_MEDLOW && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "*losum_di_medlow"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDLOW"
+  "or\t%1, %%lo(%a2), %0")
+
+(define_insn "seth44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETH44)))]
+  "TARGET_CM_MEDMID"
+  "sethi\t%%h44(%a1), %0")
+
+(define_insn "setm44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] UNSPEC_SETM44)))]
+  "TARGET_CM_MEDMID"
+  "or\t%1, %%m44(%a2), %0")
+
+(define_insn "setl44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDMID"
+  "or\t%1, %%l44(%a2), %0")
+
+(define_insn "sethh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETHH)))]
+  "TARGET_CM_MEDANY"
+  "sethi\t%%hh(%a1), %0")
+
+(define_insn "setlm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETLM)))]
+  "TARGET_CM_MEDANY"
+  "sethi\t%%lm(%a1), %0")
+
+(define_insn "sethm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] UNSPEC_EMB_SETHM)))]
+  "TARGET_CM_MEDANY"
+  "or\t%1, %%hm(%a2), %0")
+
+(define_insn "setlo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDANY"
+  "or\t%1, %%lo(%a2), %0")
+
+(define_insn "embmedany_sethi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "data_segment_operand" "")] UNSPEC_EMB_HISUM)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "embmedany_losum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "data_segment_operand" "")))]
+  "TARGET_CM_EMBMEDANY"
+  "add\t%1, %%lo(%a2), %0")
+
+(define_insn "embmedany_brsum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_EMB_HISUM))]
+  "TARGET_CM_EMBMEDANY"
+  "add\t%1, %_, %0")
+
+(define_insn "embmedany_textuhi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] UNSPEC_EMB_TEXTUHI)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\t%%uhi(%a1), %0")
+
+(define_insn "embmedany_texthi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] UNSPEC_EMB_TEXTHI)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "embmedany_textulo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")] UNSPEC_EMB_TEXTULO)))]
+  "TARGET_CM_EMBMEDANY"
+  "or\t%1, %%ulo(%a2), %0")
+
+(define_insn "embmedany_textlo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "text_segment_operand" "")))]
+  "TARGET_CM_EMBMEDANY"
+  "or\t%1, %%lo(%a2), %0")
+
+;; Now some patterns to help reload out a bit.
+(define_expand "reload_indi"
+  [(parallel [(match_operand:DI 0 "register_operand" "=r")
+              (match_operand:DI 1 "immediate_operand" "")
+              (match_operand:TI 2 "register_operand" "=&r")])]
+  "(TARGET_CM_MEDANY
+    || TARGET_CM_EMBMEDANY)
+   && ! flag_pic"
+{
+  sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "reload_outdi"
+  [(parallel [(match_operand:DI 0 "register_operand" "=r")
+              (match_operand:DI 1 "immediate_operand" "")
+              (match_operand:TI 2 "register_operand" "=&r")])]
+  "(TARGET_CM_MEDANY
+    || TARGET_CM_EMBMEDANY)
+   && ! flag_pic"
+{
+  sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+;; Split up putting CONSTs and REGs into DI regs when !arch64
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "const_int_operand" ""))]
+  "! TARGET_ARCH64
+   && ((GET_CODE (operands[0]) == REG
+        && SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))
+   && reload_completed"
+  [(clobber (const_int 0))]
+{
+#if HOST_BITS_PER_WIDE_INT == 32
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			(INTVAL (operands[1]) < 0) ?
+			constm1_rtx :
+			const0_rtx));
+  emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			operands[1]));
+#else
+  unsigned int low, high;
+
+  low = trunc_int_for_mode (INTVAL (operands[1]), SImode);
+  high = trunc_int_for_mode (INTVAL (operands[1]) >> 32, SImode);
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), GEN_INT (high)));
+
+  /* Slick... but this trick loses if this subreg constant part
+     can be done in one insn.  */
+  if (low == high
+      && ! SPARC_SETHI32_P (high)
+      && ! SPARC_SIMM13_P (high))
+    emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			  gen_highpart (SImode, operands[0])));
+  else
+    emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), GEN_INT (low)));
+#endif
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "const_double_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+           && ((GET_CODE (operands[0]) == REG
+                && SPARC_INT_REG_P (REGNO (operands[0])))
+               || (GET_CODE (operands[0]) == SUBREG
+                   && GET_CODE (SUBREG_REG (operands[0])) == REG
+                   && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))))"
+  [(clobber (const_int 0))]
+{
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			GEN_INT (CONST_DOUBLE_HIGH (operands[1]))));
+
+  /* Slick... but this trick loses if this subreg constant part
+     can be done in one insn.  */
+  if (CONST_DOUBLE_LOW (operands[1]) == CONST_DOUBLE_HIGH (operands[1])
+      && ! SPARC_SETHI32_P (CONST_DOUBLE_HIGH (operands[1]))
+      && ! SPARC_SIMM13_P (CONST_DOUBLE_HIGH (operands[1])))
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			    gen_highpart (SImode, operands[0])));
+    }
+  else
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			    GEN_INT (CONST_DOUBLE_LOW (operands[1]))));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+           && sparc_split_regreg_legitimate (operands[0],
+                                             operands[1])))"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (SImode, set_dest);
+  dest2 = gen_lowpart (SImode, set_dest);
+  src1 = gen_highpart (SImode, set_src);
+  src2 = gen_lowpart (SImode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movsi (dest2, src2));
+      emit_insn (gen_movsi (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movsi (dest1, src1));
+      emit_insn (gen_movsi (dest2, src2));
+    }
+  DONE;
+})
+
+;; Now handle the cases of memory moves from/to non-even
+;; DI mode register pairs.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "(! TARGET_ARCH64
+    && reload_completed
+    && sparc_splitdi_legitimate (operands[0], operands[1]))"
+  [(clobber (const_int 0))]
+{
+  rtx word0 = adjust_address (operands[1], SImode, 0);
+  rtx word1 = adjust_address (operands[1], SImode, 4);
+  rtx high_part = gen_highpart (SImode, operands[0]);
+  rtx low_part = gen_lowpart (SImode, operands[0]);
+
+  if (reg_overlap_mentioned_p (high_part, word1))
+    {
+      emit_insn (gen_movsi (low_part, word1));
+      emit_insn (gen_movsi (high_part, word0));
+    }
+  else
+    {
+      emit_insn (gen_movsi (high_part, word0));
+      emit_insn (gen_movsi (low_part, word1));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "(! TARGET_ARCH64
+    && reload_completed
+    && sparc_splitdi_legitimate (operands[1], operands[0]))"
+  [(clobber (const_int 0))]
+{
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0),
+			gen_highpart (SImode, operands[1])));
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4),
+			gen_lowpart (SImode, operands[1])));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "const_zero_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+	   && ! mem_min_alignment (operands[0], 8)))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0), const0_rtx));
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4), const0_rtx));
+  DONE;
+})
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  "TARGET_ARCH64"
+{
+  if (sparc_expand_move (TImode, operands))
+    DONE;
+})
+
+;; We need to prevent reload from splitting TImode moves, because it
+;; might decide to overwrite a pointer with the value it points to.
+;; In that case we have to do the loads in the appropriate order so
+;; that the pointer is not destroyed too early.
+
+(define_insn "*movti_insn_sp64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r , o,?*e,?o,b")
+        (match_operand:TI 1 "input_operand"        "roJ,rJ, eo, e,J"))]
+  "TARGET_ARCH64
+   && ! TARGET_HARD_QUAD
+   && (register_operand (operands[0], TImode)
+       || register_or_zero_operand (operands[1], TImode))"
+  "#"
+  [(set_attr "length" "2,2,2,2,2")
+   (set_attr "cpu_feature" "*,*,fpu,fpu,vis")])
+
+(define_insn "*movti_insn_sp64_hq"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r , o,?*e,?*e,?m,b")
+        (match_operand:TI 1 "input_operand"        "roJ,rJ,  e,  m, e,J"))]
+  "TARGET_ARCH64
+   && TARGET_HARD_QUAD
+   && (register_operand (operands[0], TImode)
+       || register_or_zero_operand (operands[1], TImode))"
+  "@
+  #
+  #
+  fmovq\t%1, %0
+  ldq\t%1, %0
+  stq\t%1, %0
+  #"
+  [(set_attr "type" "*,*,fpmove,fpload,fpstore,*")
+   (set_attr "length" "2,2,*,*,*,2")])
+
+;; Now all the splits to handle multi-insn TI mode moves.
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+        (match_operand:TI 1 "register_operand" ""))]
+  "reload_completed
+   && ((TARGET_FPU
+        && ! TARGET_HARD_QUAD)
+       || (! fp_register_operand (operands[0], TImode)
+           && ! fp_register_operand (operands[1], TImode)))"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (DImode, set_dest);
+  dest2 = gen_lowpart (DImode, set_dest);
+  src1 = gen_highpart (DImode, set_src);
+  src2 = gen_lowpart (DImode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movdi (dest2, src2));
+      emit_insn (gen_movdi (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movdi (dest1, src1));
+      emit_insn (gen_movdi (dest2, src2));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "const_zero_operand" ""))]
+  "reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx dest1, dest2;
+
+  switch (GET_CODE (set_dest))
+    {
+    case REG:
+      dest1 = gen_highpart (DImode, set_dest);
+      dest2 = gen_lowpart (DImode, set_dest);
+      break;
+    case MEM:
+      dest1 = adjust_address (set_dest, DImode, 0);
+      dest2 = adjust_address (set_dest, DImode, 8);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_movdi (dest1, const0_rtx));
+  emit_insn (gen_movdi (dest2, const0_rtx));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+        (match_operand:TI 1 "memory_operand" ""))]
+  "reload_completed
+   && offsettable_memref_p (operands[1])
+   && (! TARGET_HARD_QUAD
+       || ! fp_register_operand (operands[0], TImode))"
+  [(clobber (const_int 0))]
+{
+  rtx word0 = adjust_address (operands[1], DImode, 0);
+  rtx word1 = adjust_address (operands[1], DImode, 8);
+  rtx set_dest, dest1, dest2;
+
+  set_dest = operands[0];
+
+  dest1 = gen_highpart (DImode, set_dest);
+  dest2 = gen_lowpart (DImode, set_dest);
+
+  /* Now output, ordering such that we don't clobber any registers
+     mentioned in the address.  */
+  if (reg_overlap_mentioned_p (dest1, word1))
+
+    {
+      emit_insn (gen_movdi (dest2, word1));
+      emit_insn (gen_movdi (dest1, word0));
+    }
+  else
+   {
+      emit_insn (gen_movdi (dest1, word0));
+      emit_insn (gen_movdi (dest2, word1));
+   }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TI 0 "memory_operand" "")
+	(match_operand:TI 1 "register_operand" ""))]
+  "reload_completed
+   && offsettable_memref_p (operands[0])
+   && (! TARGET_HARD_QUAD
+       || ! fp_register_operand (operands[1], TImode))"
+  [(clobber (const_int 0))]
+{
+  rtx set_src = operands[1];
+
+  emit_insn (gen_movdi (adjust_address (operands[0], DImode, 0),
+			gen_highpart (DImode, set_src)));
+  emit_insn (gen_movdi (adjust_address (operands[0], DImode, 8),
+			gen_lowpart (DImode, set_src)));
+  DONE;
+})
+
+
+;; Floating point move instructions
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (SFmode, operands))
+    DONE;
+})
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,*r,m,  m")
+	(match_operand:SF 1 "input_operand"         "G,C,f,*rR, Q, S, f,*r,m, m,f,*rG"))]
+  "(register_operand (operands[0], SFmode)
+    || register_or_zero_or_all_ones_operand (operands[1], SFmode))"
+{
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && (which_alternative == 3
+          || which_alternative == 4
+          || which_alternative == 5))
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      operands[1] = GEN_INT (i);
+    }
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "fzeros\t%0";
+    case 1:
+      return "fones\t%0";
+    case 2:
+      return "fmovs\t%1, %0";
+    case 3:
+      return "mov\t%1, %0";
+    case 4:
+      return "sethi\t%%hi(%a1), %0";
+    case 5:
+      return "#";
+    case 6:
+      return "movstouw\t%1, %0";
+    case 7:
+      return "movwtos\t%1, %0";
+    case 8:
+    case 9:
+      return "ld\t%1, %0";
+    case 10:
+    case 11:
+      return "st\t%r1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
+   (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")])
+
+;; The following 3 patterns build SFmode constants in integer registers.
+
+(define_insn "*movsf_lo_sum"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (lo_sum:SF (match_operand:SF 1 "register_operand" "r")
+                   (match_operand:SF 2 "fp_const_high_losum_operand" "S")))]
+  ""
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[2]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[2] = GEN_INT (i);
+  return "or\t%1, %%lo(%a2), %0";
+})
+
+(define_insn "*movsf_high"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (high:SF (match_operand:SF 1 "fp_const_high_losum_operand" "S")))]
+  ""
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[1] = GEN_INT (i);
+  return "sethi\t%%hi(%1), %0";
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "fp_const_high_losum_operand" ""))]
+  "REG_P (operands[0]) && SPARC_INT_REG_P (REGNO (operands[0]))"
+  [(set (match_dup 0) (high:SF (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (DFmode, operands))
+    DONE;
+})
+
+(define_insn "*movdf_insn_sp32"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f,  e,T,W,U,T,  f,  *r,  o,o")
+	(match_operand:DF 1 "input_operand"         "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roF,*rG,f"))]
+  "! TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
+  "@
+  fzero\t%0
+  fone\t%0
+  fmovd\t%1, %0
+  #
+  #
+  #
+  ldd\t%1, %0
+  stx\t%r1, %0
+  std\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  #
+  #
+  #
+  #"
+  [(set_attr "type" "visl,visl,fpmove,*,*,*,fpload,store,fpstore,load,store,*,*,*,*")
+   (set_attr "length" "*,*,*,2,2,2,*,*,*,*,*,2,2,2,2")
+   (set_attr "fptype" "double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "cpu_feature" "vis,vis,v9,fpunotv9,vis3,vis3,fpu,v9,fpu,*,*,fpu,*,*,fpu")])
+
+(define_insn "*movdf_insn_sp64"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,W, *r,*r,  m,*r")
+	(match_operand:DF 1 "input_operand"         "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
+  "TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
+  "@
+  fzero\t%0
+  fone\t%0
+  fmovd\t%1, %0
+  movdtox\t%1, %0
+  movxtod\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  mov\t%r1, %0
+  ldx\t%1, %0
+  stx\t%r1, %0
+  #"
+  [(set_attr "type" "visl,visl,fpmove,vismv,vismv,load,store,*,load,store,*")
+   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
+   (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
+   (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")])
+
+;; This pattern builds DFmode constants in integer registers.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "const_double_operand" ""))]
+  "REG_P (operands[0])
+   && SPARC_INT_REG_P (REGNO (operands[0]))
+   && ! const_zero_operand (operands[1], GET_MODE (operands[0]))
+   && reload_completed"
+  [(clobber (const_int 0))]
+{
+  operands[0] = gen_rtx_raw_REG (DImode, REGNO (operands[0]));
+
+  if (TARGET_ARCH64)
+    {
+#if HOST_BITS_PER_WIDE_INT == 32
+      gcc_unreachable ();
+#else
+      enum machine_mode mode = GET_MODE (operands[1]);
+      rtx tem = simplify_subreg (DImode, operands[1], mode, 0);
+      emit_insn (gen_movdi (operands[0], tem));
+#endif
+    }
+  else
+    {
+      enum machine_mode mode = GET_MODE (operands[1]);
+      rtx hi = simplify_subreg (SImode, operands[1], mode, 0);
+      rtx lo = simplify_subreg (SImode, operands[1], mode, 4);
+
+      gcc_assert (GET_CODE (hi) == CONST_INT);
+      gcc_assert (GET_CODE (lo) == CONST_INT);
+
+      emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), hi));
+
+      /* Slick... but this trick loses if this subreg constant part
+         can be done in one insn.  */
+      if (lo == hi
+	  && ! SPARC_SETHI32_P (INTVAL (hi))
+	  && ! SPARC_SIMM13_P (INTVAL (hi)))
+        {
+          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			        gen_highpart (SImode, operands[0])));
+        }
+      else
+        {
+          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lo));
+        }
+    }
+  DONE;
+})
+
+;; Ok, now the splits to handle all the multi insn and
+;; mis-aligned memory address cases.
+;; In these splits please take note that we must be
+;; careful when V9 but not ARCH64 because the integer
+;; register DFmode cases must be handled.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "register_operand" ""))]
+  "(! TARGET_V9
+    || (! TARGET_ARCH64
+        && sparc_split_regreg_legitimate (operands[0],
+                                          operands[1])))
+   && reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (SFmode, set_dest);
+  dest2 = gen_lowpart (SFmode, set_dest);
+  src1 = gen_highpart (SFmode, set_src);
+  src2 = gen_lowpart (SFmode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_move_insn_1 (dest2, src2);
+      emit_move_insn_1 (dest1, src1);
+    }
+  else
+    {
+      emit_move_insn_1 (dest1, src1);
+      emit_move_insn_1 (dest2, src2);
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))]
+  "reload_completed
+   && ! TARGET_ARCH64
+   && (((REGNO (operands[0]) % 2) != 0)
+       || ! mem_min_alignment (operands[1], 8))
+   && offsettable_memref_p (operands[1])"
+  [(clobber (const_int 0))]
+{
+  rtx word0, word1;
+
+  word0 = adjust_address (operands[1], SFmode, 0);
+  word1 = adjust_address (operands[1], SFmode, 4);
+
+  if (reg_overlap_mentioned_p (gen_highpart (SFmode, operands[0]), word1))
+    {
+      emit_move_insn_1 (gen_lowpart (SFmode, operands[0]), word1);
+      emit_move_insn_1 (gen_highpart (SFmode, operands[0]), word0);
+    }
+  else
+    {
+      emit_move_insn_1 (gen_highpart (SFmode, operands[0]), word0);
+      emit_move_insn_1 (gen_lowpart (SFmode, operands[0]), word1);
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && ! TARGET_ARCH64
+   && (((REGNO (operands[1]) % 2) != 0)
+       || ! mem_min_alignment (operands[0], 8))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  rtx word0, word1;
+
+  word0 = adjust_address (operands[0], SFmode, 0);
+  word1 = adjust_address (operands[0], SFmode, 4);
+
+  emit_move_insn_1 (word0, gen_highpart (SFmode, operands[1]));
+  emit_move_insn_1 (word1, gen_lowpart (SFmode, operands[1]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+        (match_operand:DF 1 "const_zero_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+	   && ! mem_min_alignment (operands[0], 8)))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  rtx dest1, dest2;
+
+  dest1 = adjust_address (operands[0], SFmode, 0);
+  dest2 = adjust_address (operands[0], SFmode, 4);
+
+  emit_move_insn_1 (dest1, CONST0_RTX (SFmode));
+  emit_move_insn_1 (dest2, CONST0_RTX (SFmode));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "const_zero_operand" ""))]
+  "reload_completed
+   && ! TARGET_ARCH64
+   && ((GET_CODE (operands[0]) == REG
+	&& SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx dest1, dest2;
+
+  dest1 = gen_highpart (SFmode, set_dest);
+  dest2 = gen_lowpart (SFmode, set_dest);
+  emit_move_insn_1 (dest1, CONST0_RTX (SFmode));
+  emit_move_insn_1 (dest2, CONST0_RTX (SFmode));
+  DONE;
+})
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (TFmode, operands))
+    DONE;
+})
+
+(define_insn "*movtf_insn_sp32"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o,  o,U,  r")
+	(match_operand:TF 1 "input_operand"        " G,oe,e,rGU,o,roG"))]
+  "! TARGET_ARCH64
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "4,4,4,4,4,4")
+   (set_attr "cpu_feature" "fpu,fpu,fpu,*,*,*")])
+
+(define_insn "*movtf_insn_sp64"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o,  r")
+	(match_operand:TF 1 "input_operand"         "G,oe,e,rG,roG"))]
+  "TARGET_ARCH64
+   && ! TARGET_HARD_QUAD
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "2,2,2,2,2")
+   (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")])
+
+(define_insn "*movtf_insn_sp64_hq"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o,  r")
+	(match_operand:TF 1 "input_operand"         "G,e,m,e,rG,roG"))]
+  "TARGET_ARCH64
+   && TARGET_HARD_QUAD
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "@
+  #
+  fmovq\t%1, %0
+  ldq\t%1, %0
+  stq\t%1, %0
+  #
+  #"
+  [(set_attr "type" "*,fpmove,fpload,fpstore,*,*")
+   (set_attr "length" "2,*,*,*,2,2")])
+
+;; Now all the splits to handle multi-insn TF mode moves.
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+        (match_operand:TF 1 "register_operand" ""))]
+  "reload_completed
+   && (! TARGET_ARCH64
+       || (TARGET_FPU
+           && ! TARGET_HARD_QUAD)
+       || (! fp_register_operand (operands[0], TFmode)
+           && ! fp_register_operand (operands[1], TFmode)))"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  src1 = gen_df_reg (set_src, 0);
+  src2 = gen_df_reg (set_src, 1);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movdf (dest2, src2));
+      emit_insn (gen_movdf (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movdf (dest1, src1));
+      emit_insn (gen_movdf (dest2, src2));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "const_zero_operand" ""))]
+  "reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx dest1, dest2;
+
+  switch (GET_CODE (set_dest))
+    {
+    case REG:
+      dest1 = gen_df_reg (set_dest, 0);
+      dest2 = gen_df_reg (set_dest, 1);
+      break;
+    case MEM:
+      dest1 = adjust_address (set_dest, DFmode, 0);
+      dest2 = adjust_address (set_dest, DFmode, 8);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_movdf (dest1, CONST0_RTX (DFmode)));
+  emit_insn (gen_movdf (dest2, CONST0_RTX (DFmode)));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+        (match_operand:TF 1 "memory_operand" ""))]
+  "(reload_completed
+    && offsettable_memref_p (operands[1])
+    && (! TARGET_ARCH64
+	|| ! TARGET_HARD_QUAD
+	|| ! fp_register_operand (operands[0], TFmode)))"
+  [(clobber (const_int 0))]
+{
+  rtx word0 = adjust_address (operands[1], DFmode, 0);
+  rtx word1 = adjust_address (operands[1], DFmode, 8);
+  rtx set_dest, dest1, dest2;
+
+  set_dest = operands[0];
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+
+  /* Now output, ordering such that we don't clobber any registers
+     mentioned in the address.  */
+  if (reg_overlap_mentioned_p (dest1, word1))
+
+    {
+      emit_insn (gen_movdf (dest2, word1));
+      emit_insn (gen_movdf (dest1, word0));
+    }
+  else
+   {
+      emit_insn (gen_movdf (dest1, word0));
+      emit_insn (gen_movdf (dest2, word1));
+   }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TF 0 "memory_operand" "")
+	(match_operand:TF 1 "register_operand" ""))]
+  "(reload_completed
+    && offsettable_memref_p (operands[0])
+    && (! TARGET_ARCH64
+	|| ! TARGET_HARD_QUAD
+	|| ! fp_register_operand (operands[1], TFmode)))"
+  [(clobber (const_int 0))]
+{
+  rtx set_src = operands[1];
+
+  emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 0),
+			gen_df_reg (set_src, 0)));
+  emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 8),
+			gen_df_reg (set_src, 1)));
+  DONE;
+})
+
+
+;; SPARC-V9 conditional move instructions
+
+;; We can handle larger constants here for some flavors, but for now we keep
+;; it simple and only allow those constants supported by all flavors.
+;; Note that emit_conditional_move canonicalizes operands 2,3 so that operand
+;; 3 contains the constant if one is present, but we handle either for
+;; generality (sparc.c puts a constant in operand 2).
+;;
+;; Our instruction patterns, on the other hand, canonicalize such that
+;; operand 3 must be the set destination.
+
+(define_expand "mov<I:mode>cc"
+  [(set (match_operand:I 0 "register_operand" "")
+	(if_then_else:I (match_operand 1 "comparison_operator" "")
+			(match_operand:I 2 "arith10_operand" "")
+			(match_operand:I 3 "arith10_operand" "")))]
+  "TARGET_V9 && !(<I:MODE>mode == DImode && TARGET_ARCH32)"
+{
+  if (! sparc_expand_conditional_move (<I:MODE>mode, operands))
+    FAIL;
+  DONE;
+})
+
+(define_expand "mov<F:mode>cc"
+  [(set (match_operand:F 0 "register_operand" "")
+	(if_then_else:F (match_operand 1 "comparison_operator" "")
+			(match_operand:F 2 "register_operand" "")
+			(match_operand:F 3 "register_operand" "")))]
+  "TARGET_V9 && TARGET_FPU"
+{
+  if (! sparc_expand_conditional_move (<F:MODE>mode, operands))
+    FAIL;
+  DONE;
+})
+
+;; Conditional move define_insns
+
+(define_insn "*mov<I:mode>_cc_v9"
+  [(set (match_operand:I 0 "register_operand" "=r")
+	(if_then_else:I (match_operator 1 "comparison_operator"
+			       [(match_operand 2 "icc_or_fcc_register_operand" "X")
+				(const_int 0)])
+			(match_operand:I 3 "arith11_operand" "rL")
+			(match_operand:I 4 "register_operand" "0")))]
+  "TARGET_V9 && !(<I:MODE>mode == DImode && TARGET_ARCH32)"
+  "mov%C1\t%x2, %3, %0"
+  [(set_attr "type" "cmove")])
+
+(define_insn "*mov<I:mode>_cc_reg_sp64"
+  [(set (match_operand:I 0 "register_operand" "=r")
+	(if_then_else:I (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r")
+				 (const_int 0)])
+			(match_operand:I 3 "arith10_operand" "rM")
+			(match_operand:I 4 "register_operand" "0")))]
+  "TARGET_ARCH64"
+  "movr%D1\t%2, %r3, %0"
+  [(set_attr "type" "cmove")])
+
+(define_insn "*movsf_cc_v9"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(if_then_else:SF (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_register_operand" "X")
+				 (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "f")
+			 (match_operand:SF 4 "register_operand" "0")))]
+  "TARGET_V9 && TARGET_FPU"
+  "fmovs%C1\t%x2, %3, %0"
+  [(set_attr "type" "fpcmove")])
+
+(define_insn "*movsf_cc_reg_sp64"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(if_then_else:SF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r")
+				 (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "f")
+			 (match_operand:SF 4 "register_operand" "0")))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "fmovrs%D1\t%2, %3, %0"
+  [(set_attr "type" "fpcrmove")])
+
+;; Named because invoked by movtf_cc_v9
+(define_insn "movdf_cc_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(if_then_else:DF (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_register_operand" "X")
+				 (const_int 0)])
+			 (match_operand:DF 3 "register_operand" "e")
+			 (match_operand:DF 4 "register_operand" "0")))]
+  "TARGET_V9 && TARGET_FPU"
+  "fmovd%C1\t%x2, %3, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "fptype" "double")])
+
+;; Named because invoked by movtf_cc_reg_sp64
+(define_insn "movdf_cc_reg_sp64"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(if_then_else:DF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r")
+				 (const_int 0)])
+			 (match_operand:DF 3 "register_operand" "e")
+			 (match_operand:DF 4 "register_operand" "0")))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "fmovrd%D1\t%2, %3, %0"
+  [(set_attr "type" "fpcrmove")
+   (set_attr "fptype" "double")])
+
+(define_insn "*movtf_cc_hq_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(if_then_else:TF (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_register_operand" "X")
+				 (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e")
+			 (match_operand:TF 4 "register_operand" "0")))]
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
+  "fmovq%C1\t%x2, %3, %0"
+  [(set_attr "type" "fpcmove")])
+
+(define_insn "*movtf_cc_reg_hq_sp64"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(if_then_else:TF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r")
+				 (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e")
+			 (match_operand:TF 4 "register_operand" "0")))]
+  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+  "fmovrq%D1\t%2, %3, %0"
+  [(set_attr "type" "fpcrmove")])
+
+(define_insn_and_split "*movtf_cc_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(if_then_else:TF (match_operator 1 "comparison_operator"
+			    [(match_operand 2 "icc_or_fcc_register_operand" "X")
+			     (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e")
+			 (match_operand:TF 4 "register_operand" "0")))]
+  "TARGET_V9 && TARGET_FPU && !TARGET_HARD_QUAD"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_srca = operands[3];
+  rtx dest1, dest2;
+  rtx srca1, srca2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  srca1 = gen_df_reg (set_srca, 0);
+  srca2 = gen_df_reg (set_srca, 1);
+
+  if (reg_overlap_mentioned_p (dest1, srca2))
+    {
+      emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2], srca2, dest2));
+      emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2], srca1, dest1));
+    }
+  else
+    {
+      emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2], srca1, dest1));
+      emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2], srca2, dest2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*movtf_cc_reg_sp64"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(if_then_else:TF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r")
+				 (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e")
+			 (match_operand:TF 4 "register_operand" "0")))]
+  "TARGET_ARCH64 && TARGET_FPU && ! TARGET_HARD_QUAD"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_srca = operands[3];
+  rtx dest1, dest2;
+  rtx srca1, srca2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  srca1 = gen_df_reg (set_srca, 0);
+  srca2 = gen_df_reg (set_srca, 1);
+
+  if (reg_overlap_mentioned_p (dest1, srca2))
+    {
+      emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2], srca2, dest2));
+      emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2], srca1, dest1));
+    }
+  else
+    {
+      emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2], srca1, dest1));
+      emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2], srca2, dest2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
+
+
+;; Zero-extension instructions
+
+;; These patterns originally accepted general_operands, however, slightly
+;; better code is generated by only accepting register_operands, and then
+;; letting combine generate the ldu[hb] insns.
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_16 = GEN_INT (16);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_16));
+  emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
+  DONE;
+})
+
+(define_insn "*zero_extendhisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
+  ""
+  "lduh\t%1, %0"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendqihi2_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "input_operand" "r,m")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   and\t%1, 0xff, %0
+   ldub\t%1, %0"
+  [(set_attr "type" "*,load")
+   (set_attr "us3load_type" "*,3cycle")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendqisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "input_operand" "r,m")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   and\t%1, 0xff, %0
+   ldub\t%1, %0"
+  [(set_attr "type" "*,load")
+   (set_attr "us3load_type" "*,3cycle")])
+
+(define_expand "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+  "")
+
+(define_insn "*zero_extendqidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "input_operand" "r,m")))]
+  "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   and\t%1, 0xff, %0
+   ldub\t%1, %0"
+  [(set_attr "type" "*,load")
+   (set_attr "us3load_type" "*,3cycle")])
+
+(define_expand "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+{
+  rtx temp = gen_reg_rtx (DImode);
+  rtx shift_48 = GEN_INT (48);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (DImode);
+      op1_subbyte *= GET_MODE_SIZE (DImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+			  shift_48));
+  emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
+  DONE;
+})
+
+(define_insn "*zero_extendhidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_ARCH64"
+  "lduh\t%1, %0"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+;; ??? Write truncdisi pattern using sra?
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendsidi2_insn_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI (match_operand:SI 1 "input_operand" "r,m,*f")))]
+  "TARGET_ARCH64
+   && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   srl\t%1, 0, %0
+   lduw\t%1, %0
+   movstouw\t%1, %0"
+  [(set_attr "type" "shift,load,*")
+   (set_attr "cpu_feature" "*,*,vis3")])
+
+(define_insn_and_split "*zero_extendsidi2_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx dest1, dest2;
+
+  dest1 = gen_highpart (SImode, operands[0]);
+  dest2 = gen_lowpart (SImode, operands[0]);
+
+  /* Swap the order in case of overlap.  */
+  if (REGNO (dest1) == REGNO (operands[1]))
+    {
+      operands[2] = dest2;
+      operands[3] = operands[1];
+      operands[4] = dest1;
+      operands[5] = const0_rtx;
+    }
+  else
+    {
+      operands[2] = dest1;
+      operands[3] = const0_rtx;
+      operands[4] = dest2;
+      operands[5] = operands[1];
+    }
+}
+  [(set_attr "length" "2")])
+
+;; Simplify comparisons of extended values.
+
+(define_insn "*cmp_zero_extendqisi2"
+  [(set (reg:CC CC_REG)
+	(compare:CC (zero_extend:SI (match_operand:QI 0 "register_operand" "r"))
+		    (const_int 0)))]
+  ""
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_qi"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:QI 0 "register_operand" "r")
+		    (const_int 0)))]
+  ""
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqisi2_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqisi2_andcc_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (and:SI (match_operand:SI 1 "register_operand" "r")
+			    (const_int 255))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (subreg:QI (match_dup 1) 0)))]
+  ""
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqidi2"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (zero_extend:DI (match_operand:QI 0 "register_operand" "r"))
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_qi_sp64"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (match_operand:QI 0 "register_operand" "r")
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqidi2_set"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqidi2_andcc_set"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (and:DI (match_operand:DI 1 "register_operand" "r")
+			     (const_int 255))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (subreg:QI (match_dup 1) 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+;; Similarly, handle {SI,DI}->QI mode truncation followed by a compare.
+
+(define_insn "*cmp_siqi_trunc"
+  [(set (reg:CC CC_REG)
+	(compare:CC (subreg:QI (match_operand:SI 0 "register_operand" "r") 3)
+		    (const_int 0)))]
+  ""
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_siqi_trunc_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (subreg:QI (match_operand:SI 1 "register_operand" "r") 3)
+		    (const_int 0)))
+   (set (match_operand:QI 0 "register_operand" "=r")
+	(subreg:QI (match_dup 1) 3))]
+  ""
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_diqi_trunc"
+  [(set (reg:CC CC_REG)
+	(compare:CC (subreg:QI (match_operand:DI 0 "register_operand" "r") 7)
+		    (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_diqi_trunc_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (subreg:QI (match_operand:DI 1 "register_operand" "r") 7)
+		    (const_int 0)))
+   (set (match_operand:QI 0 "register_operand" "=r")
+	(subreg:QI (match_dup 1) 7))]
+  "TARGET_ARCH64"
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+
+;; Sign-extension instructions
+
+;; These patterns originally accepted general_operands, however, slightly
+;; better code is generated by only accepting register_operands, and then
+;; letting combine generate the lds[hb] insns.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_16 = GEN_INT (16);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_16));
+  emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
+  DONE;
+})
+
+(define_insn "*sign_extendhisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
+  ""
+  "ldsh\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_24 = GEN_INT (24);
+  int op1_subbyte = 0;
+  int op0_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+  if (GET_CODE (operand0) == SUBREG)
+    {
+      op0_subbyte = SUBREG_BYTE (operand0);
+      op0_subbyte /= GET_MODE_SIZE (SImode);
+      op0_subbyte *= GET_MODE_SIZE (SImode);
+      operand0 = XEXP (operand0, 0);
+    }
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_24));
+  if (GET_MODE (operand0) != SImode)
+    operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte);
+  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  DONE;
+})
+
+(define_insn "*sign_extendqihi2_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
+  ""
+  "ldsb\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_24 = GEN_INT (24);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_24));
+  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  DONE;
+})
+
+(define_insn "*sign_extendqisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
+  ""
+  "ldsb\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+{
+  rtx temp = gen_reg_rtx (DImode);
+  rtx shift_56 = GEN_INT (56);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (DImode);
+      op1_subbyte *= GET_MODE_SIZE (DImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+			  shift_56));
+  emit_insn (gen_ashrdi3 (operand0, temp, shift_56));
+  DONE;
+})
+
+(define_insn "*sign_extendqidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
+  "TARGET_ARCH64"
+  "ldsb\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+{
+  rtx temp = gen_reg_rtx (DImode);
+  rtx shift_48 = GEN_INT (48);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (DImode);
+      op1_subbyte *= GET_MODE_SIZE (DImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+			  shift_48));
+  emit_insn (gen_ashrdi3 (operand0, temp, shift_48));
+  DONE;
+})
+
+(define_insn "*sign_extendhidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_ARCH64"
+  "ldsh\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+  "")
+
+(define_insn "*sign_extendsidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(sign_extend:DI (match_operand:SI 1 "input_operand" "r,m,*f")))]
+  "TARGET_ARCH64"
+  "@
+  sra\t%1, 0, %0
+  ldsw\t%1, %0
+  movstosw\t%1, %0"
+  [(set_attr "type" "shift,sload,*")
+   (set_attr "us3load_type" "*,3cycle,*")
+   (set_attr "cpu_feature" "*,*,vis3")])
+
+
+;; Special pattern for optimizing bit-field compares.  This is needed
+;; because combine uses this as a canonical form.
+
+(define_insn "*cmp_zero_extract"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "small_int_operand" "I")
+			  (match_operand:SI 2 "small_int_operand" "I"))
+	 (const_int 0)))]
+  "INTVAL (operands[2]) > 19"
+{
+  int len = INTVAL (operands[1]);
+  int pos = 32 - INTVAL (operands[2]) - len;
+  HOST_WIDE_INT mask = ((1 << len) - 1) << pos;
+  operands[1] = GEN_INT (mask);
+  return "andcc\t%0, %1, %%g0";
+}
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extract_sp64"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			  (match_operand:SI 1 "small_int_operand" "I")
+			  (match_operand:SI 2 "small_int_operand" "I"))
+	 (const_int 0)))]
+  "TARGET_ARCH64 && INTVAL (operands[2]) > 51"
+{
+  int len = INTVAL (operands[1]);
+  int pos = 64 - INTVAL (operands[2]) - len;
+  HOST_WIDE_INT mask = (((unsigned HOST_WIDE_INT) 1 << len) - 1) << pos;
+  operands[1] = GEN_INT (mask);
+  return "andcc\t%0, %1, %%g0";
+}
+  [(set_attr "type" "compare")])
+
+
+;; Conversions between float, double and long double.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float_extend:DF
+	 (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fstod\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "extendsftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF
+	 (match_operand:SF 1 "register_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*extendsftf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float_extend:TF
+	 (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fstoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "extenddftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF
+	 (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*extenddftf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float_extend:TF
+	 (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fdtoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fdtos\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "trunctfsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float_truncate:SF
+	 (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_insn "*trunctfsf2_hq"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtos\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "trunctfdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float_truncate:DF
+	 (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_insn "*trunctfdf2_hq"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float_truncate:DF
+	 (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtod\t%1, %0"
+  [(set_attr "type" "fp")])
+
+
+;; Conversion between fixed point and floating point.
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fitos\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fitod\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "floatsitf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float:TF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT, operands); DONE;")
+
+(define_insn "*floatsitf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float:TF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fitoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "floatunssitf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(unsigned_float:TF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+;; Now the same for 64 bit sources.
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU"
+  "fxtos\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_floatunsdi (operands, SFmode); DONE;")
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float:DF (match_operand:DI 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU"
+  "fxtod\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_floatunsdi (operands, DFmode); DONE;")
+
+(define_expand "floatditf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float:TF (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_FPU && TARGET_V9 && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT, operands); DONE;")
+
+(define_insn "*floatditf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float:TF (match_operand:DI 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
+  "fxtoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "floatunsditf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(unsigned_float:TF (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+;; Convert a float to an actual integer.
+;; Truncation is performed as part of the conversion.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_FPU"
+  "fstoi\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
+  "TARGET_FPU"
+  "fdtoi\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "fix_trunctfsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(fix:SI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FIX, operands); DONE;")
+
+(define_insn "*fix_trunctfsi2_hq"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtoi\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "fixuns_trunctfsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unsigned_fix:SI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;")
+
+;; Now the same, for V9 targets
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_V9 && TARGET_FPU"
+  "fstox\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "fixuns_truncsfdi2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:SF 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_fixunsdi (operands, SFmode); DONE;")
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
+  "TARGET_V9 && TARGET_FPU"
+  "fdtox\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "fixuns_truncdfdi2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DF 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_fixunsdi (operands, DFmode); DONE;")
+
+(define_expand "fix_trunctfdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(fix:DI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_V9 && TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FIX, operands); DONE;")
+
+(define_insn "*fix_trunctfdi2_hq"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtox\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "fixuns_trunctfdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unsigned_fix:DI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;")
+
+
+;; Integer addition/subtraction instructions.
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "arith_double_add_operand" "")))]
+  ""
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
+			  gen_rtx_SET (VOIDmode, operands[0],
+				   gen_rtx_PLUS (DImode, operands[1],
+						 operands[2])),
+			  gen_rtx_CLOBBER (VOIDmode,
+				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*adddi3_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		 (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:CC CC_REG))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV CC_REG)
+		   (compare:CC_NOOV (plus:SI (match_dup 4)
+					     (match_dup 5))
+				    (const_int 0)))
+	      (set (match_dup 3)
+		   (plus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+	(plus:SI (plus:SI (match_dup 7)
+			  (match_dup 8))
+		 (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))]
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[1]);
+  operands[5] = gen_lowpart (SImode, operands[2]);
+  operands[6] = gen_highpart (SImode, operands[0]);
+  operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) < 0)
+	operands[8] = constm1_rtx;
+      else
+	operands[8] = const0_rtx;
+    }
+  else
+#endif
+    operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
+}
+  [(set_attr "length" "2")])
+
+;; LTU here means "carry set"
+(define_insn "addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))
+		 (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))]
+  ""
+  "addx\t%1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "addxc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (plus:DI (match_operand:DI 1 "register_or_zero_operand" "%rJ")
+			  (match_operand:DI 2 "register_or_zero_operand" "rJ"))
+		 (ltu:DI (reg:CCX_NOOV CC_REG) (const_int 0))))]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  "addxc\t%r1, %r2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn_and_split "*addx_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (plus:SI
+                                  (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+                                  (match_operand:SI 2 "arith_operand" "rI"))
+                                 (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0)))))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+                               (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))
+   (set (match_dup 4) (const_int 0))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_highpart_mode (SImode, DImode, operands[1]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*addx_extend_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (plus:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+                                          (match_operand:SI 2 "register_or_zero_operand" "rJ"))
+                                 (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "addx\t%r1, %r2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*addxc_trunc_sp64_vis3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+                          (match_operand:SI 2 "register_or_zero_operand" "rJ"))
+                 (ltu:SI (reg:CCX_NOOV CC_REG) (const_int 0))))]
+  "TARGET_ARCH64 && TARGET_VIS3"
+  "addxc\t%r1, %r2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn_and_split "*adddi3_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+                 (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:CC CC_REG))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV CC_REG)
+                   (compare:CC_NOOV (plus:SI (match_dup 3) (match_dup 1))
+                                    (const_int 0)))
+              (set (match_dup 5) (plus:SI (match_dup 3) (match_dup 1)))])
+   (set (match_dup 6)
+        (plus:SI (plus:SI (match_dup 4) (const_int 0))
+                 (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[2]);
+   operands[4] = gen_highpart (SImode, operands[2]);
+   operands[5] = gen_lowpart (SImode, operands[0]);
+   operands[6] = gen_highpart (SImode, operands[0]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*adddi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "arith_add_operand" "rI,O")))]
+  "TARGET_ARCH64"
+  "@
+   add\t%1, %2, %0
+   sub\t%1, -%2, %0")
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,r")
+		 (match_operand:SI 2 "arith_add_operand" "rI,O")))]
+  ""
+  "@
+   add\t%1, %2, %0
+   sub\t%1, -%2, %0"
+  [(set_attr "type" "*,*")
+   (set_attr "fptype" "*,*")])
+
+(define_insn "*cmp_cc_plus"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (plus:SI (match_operand:SI 0 "arith_operand" "%r")
+				  (match_operand:SI 1 "arith_operand" "rI"))
+			 (const_int 0)))]
+  ""
+  "addcc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_plus"
+  [(set (reg:CCX_NOOV CC_REG)
+	(compare:CCX_NOOV (plus:DI (match_operand:DI 0 "arith_operand" "%r")
+				   (match_operand:DI 1 "arith_operand" "rI"))
+			  (const_int 0)))]
+  "TARGET_ARCH64"
+  "addcc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_plus_set"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+				  (match_operand:SI 2 "arith_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "addcc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_plus_set"
+  [(set (reg:CCX_NOOV CC_REG)
+	(compare:CCX_NOOV (plus:DI (match_operand:DI 1 "arith_operand" "%r")
+				   (match_operand:DI 2 "arith_operand" "rI"))
+			  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARCH64"
+  "addcc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "register_operand" "")
+		  (match_operand:DI 2 "arith_double_add_operand" "")))]
+  ""
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
+			  gen_rtx_SET (VOIDmode, operands[0],
+				   gen_rtx_MINUS (DImode, operands[1],
+						  operands[2])),
+			  gen_rtx_CLOBBER (VOIDmode,
+				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*subdi3_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:CC CC_REG))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV CC_REG)
+		   (compare:CC_NOOV (minus:SI (match_dup 4)
+					      (match_dup 5))
+				    (const_int 0)))
+	      (set (match_dup 3)
+		   (minus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+	(minus:SI (minus:SI (match_dup 7)
+			    (match_dup 8))
+		  (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))]
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[1]);
+  operands[5] = gen_lowpart (SImode, operands[2]);
+  operands[6] = gen_highpart (SImode, operands[0]);
+  operands[7] = gen_highpart (SImode, operands[1]);
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) < 0)
+	operands[8] = constm1_rtx;
+      else
+	operands[8] = const0_rtx;
+    }
+  else
+#endif
+    operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
+}
+  [(set_attr "length" "2")])
+
+;; LTU here means "carry set"
+(define_insn "subx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		  (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))]
+  ""
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*subx_extend_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn_and_split "*subx_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0)))))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+                                (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))
+   (set (match_dup 4) (const_int 0))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[0]);"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*subdi3_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (minus:DI (match_operand:DI 1 "register_operand" "r")
+                (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (reg:CC CC_REG))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV CC_REG)
+                   (compare:CC_NOOV (minus:SI (match_dup 3) (match_dup 2))
+                                    (const_int 0)))
+              (set (match_dup 5) (minus:SI (match_dup 3) (match_dup 2)))])
+   (set (match_dup 6)
+        (minus:SI (minus:SI (match_dup 4) (const_int 0))
+                  (ltu:SI (reg:CC_NOOV CC_REG) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[1]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[0]);
+   operands[6] = gen_highpart (SImode, operands[0]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*subdi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r,r")
+		  (match_operand:DI 2 "arith_add_operand" "rI,O")))]
+  "TARGET_ARCH64"
+  "@
+   sub\t%1, %2, %0
+   add\t%1, -%2, %0")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r,r")
+		  (match_operand:SI 2 "arith_add_operand" "rI,O")))]
+  ""
+  "@
+   sub\t%1, %2, %0
+   add\t%1, -%2, %0"
+  [(set_attr "type" "*,*")
+   (set_attr "fptype" "*,*")])
+
+(define_insn "*cmp_minus_cc"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (minus:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+				   (match_operand:SI 1 "arith_operand" "rI"))
+			 (const_int 0)))]
+  ""
+  "subcc\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_minus_ccx"
+  [(set (reg:CCX_NOOV CC_REG)
+	(compare:CCX_NOOV (minus:DI (match_operand:DI 0 "register_operand" "r")
+				    (match_operand:DI 1 "arith_operand" "rI"))
+			  (const_int 0)))]
+  "TARGET_ARCH64"
+  "subcc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "cmp_minus_cc_set"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+				   (match_operand:SI 2 "arith_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "subcc\t%r1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_minus_ccx_set"
+  [(set (reg:CCX_NOOV CC_REG)
+	(compare:CCX_NOOV (minus:DI (match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "arith_operand" "rI"))
+			  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARCH64"
+  "subcc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+
+;; Integer multiply/divide instructions.
+
+;; The 32-bit multiply/divide instructions are deprecated on v9, but at
+;; least in UltraSPARC I, II and IIi it is a win tick-wise.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "arith_operand" "%r")
+		 (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_HARD_MUL"
+  "smul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (match_operand:DI 1 "arith_operand" "")
+		 (match_operand:DI 2 "arith_operand" "")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*muldi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (match_operand:DI 1 "arith_operand" "%r")
+		 (match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "mulx\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+;; V8plus wide multiply.
+;; XXX
+(define_insn "muldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+	(mult:DI (match_operand:DI 1 "arith_operand" "%r,0")
+		 (match_operand:DI 2 "arith_operand" "rI,rI")))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_mult (insn, operands, \"mulx\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "9,8")])
+
+(define_insn "*cmp_mul_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (mult:SI (match_operand:SI 1 "arith_operand" "%r")
+		    (match_operand:SI 2 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_V8 || TARGET_SPARCLITE || TARGET_DEPRECATED_V8_INSNS"
+  "smulcc\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "arith_operand" ""))))]
+  "TARGET_HARD_MUL"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	emit_insn (gen_const_mulsidi3_v8plus (operands[0], operands[1],
+					      operands[2]));
+      else if (TARGET_ARCH32)
+	emit_insn (gen_const_mulsidi3_sp32 (operands[0], operands[1],
+					    operands[2]));
+      else 
+	emit_insn (gen_const_mulsidi3_sp64 (operands[0], operands[1],
+					    operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_mulsidi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+;; V9 puts the 64-bit product in a 64-bit register.  Only out or global
+;; registers can hold 64-bit values in the V8plus environment.
+;; XXX
+(define_insn "mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
+   smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+;; XXX
+(define_insn "const_mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (match_operand:DI 2 "small_int_operand" "I,I")))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
+   smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+;; XXX
+(define_insn "*mulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "smuld\t%1, %2, %L0"
+         : "smul\t%1, %2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "*mulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "smul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+;; Extra pattern, because sign_extend of a constant isn't valid.
+
+;; XXX
+(define_insn "const_mulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "small_int_operand" "I")))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "smuld\t%1, %2, %L0"
+         : "smul\t%1, %2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "const_mulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "small_int_operand" "I")))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "smul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_expand "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+			       (sign_extend:DI (match_operand:SI 2 "arith_operand" "")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL && TARGET_ARCH32"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	{
+	  emit_insn (gen_const_smulsi3_highpart_v8plus (operands[0],
+							operands[1],
+							operands[2],
+							GEN_INT (32)));
+	  DONE;
+	}
+      emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_smulsi3_highpart_v8plus (operands[0], operands[1],
+					      operands[2], GEN_INT (32)));
+      DONE;
+    }
+})
+
+;; XXX
+(define_insn "smulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %0\;srlx\t%0, %3, %0
+   smul\t%1, %2, %4\;srlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
+;; XXX
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(subreg:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		   (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+	  (match_operand:SI 3 "small_int_operand" "I,I"))
+	 4))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
+   smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_smulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (match_operand:DI 2 "small_int_operand" "I,I"))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
+   smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "*smulsi3_highpart_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (match_operand:DI 2 "small_int_operand" "i"))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" ""))))]
+  "TARGET_HARD_MUL"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	emit_insn (gen_const_umulsidi3_v8plus (operands[0], operands[1],
+					       operands[2]));
+      else if (TARGET_ARCH32)
+	emit_insn (gen_const_umulsidi3_sp32 (operands[0], operands[1],
+					     operands[2]));
+      else 
+	emit_insn (gen_const_umulsidi3_sp64 (operands[0], operands[1],
+					     operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+;; XXX
+(define_insn "umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
+   umul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+;; XXX
+(define_insn "*umulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "umuld\t%1, %2, %L0"
+         : "umul\t%1, %2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+        (if_then_else (eq_attr "isa" "sparclet")
+                      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "*umulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "umul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+;; Extra pattern, because sign_extend of a constant isn't valid.
+
+;; XXX
+(define_insn "const_umulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "uns_small_int_operand" "")))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "umuld\t%1, %s2, %L0"
+         : "umul\t%1, %s2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "const_umulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "uns_small_int_operand" "")))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "umul\t%1, %s2, %0"
+  [(set_attr "type" "imul")])
+
+;; XXX
+(define_insn "const_umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (match_operand:DI 2 "uns_small_int_operand" "")))
+   (clobber (match_scratch:SI 3 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %s2, %L0\n\tsrlx\t%L0, 32, %H0
+   umul\t%1, %s2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+			       (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" "")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL && TARGET_ARCH32"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	{
+	  emit_insn (gen_const_umulsi3_highpart_v8plus (operands[0],
+							operands[1],
+							operands[2],
+							GEN_INT (32)));
+	  DONE;
+	}
+      emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsi3_highpart_v8plus (operands[0], operands[1],
+					      operands[2], GEN_INT (32)));
+      DONE;
+    }
+})
+
+;; XXX
+(define_insn "umulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
+   umul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_umulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (match_operand:DI 2 "uns_small_int_operand" ""))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %s2, %0\n\tsrlx\t%0, %3, %0
+   umul\t%1, %s2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "*umulsi3_highpart_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "umul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (match_operand:DI 2 "uns_small_int_operand" ""))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "umul\t%1, %s2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_expand "divsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "input_operand" "")))
+	      (clobber (match_scratch:SI 3 ""))])]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+{
+  if (TARGET_ARCH64)
+    {
+      operands[3] = gen_reg_rtx(SImode);
+      emit_insn (gen_ashrsi3 (operands[3], operands[1], GEN_INT (31)));
+      emit_insn (gen_divsi3_sp64 (operands[0], operands[1], operands[2],
+				  operands[3]));
+      DONE;
+    }
+})
+
+;; The V8 architecture specifies that there must be at least 3 instructions
+;; between a write to the Y register and a use of it for correct results.
+;; We try to fill one of them with a simple constant or a memory load.
+
+(define_insn "divsi3_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(div:SI (match_operand:SI 1 "register_operand" "r,r,r")
+		(match_operand:SI 2 "input_operand" "rI,K,m")))
+   (clobber (match_scratch:SI 3 "=&r,&r,&r"))]
+  "(TARGET_V8 || TARGET_DEPRECATED_V8_INSNS) && TARGET_ARCH32"
+{
+  output_asm_insn ("sra\t%1, 31, %3", operands);
+  output_asm_insn ("wr\t%3, 0, %%y", operands);
+
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_V9)
+	return "sdiv\t%1, %2, %0";
+      else
+	return "nop\n\tnop\n\tnop\n\tsdiv\t%1, %2, %0";
+    case 1:
+      if (TARGET_V9)
+	return "sethi\t%%hi(%a2), %3\n\tsdiv\t%1, %3, %0";
+      else
+	return "sethi\t%%hi(%a2), %3\n\tnop\n\tnop\n\tsdiv\t%1, %3, %0";
+    case 2:
+      if (TARGET_V9)
+	return "ld\t%2, %3\n\tsdiv\t%1, %3, %0";
+      else
+	return "ld\t%2, %3\n\tnop\n\tnop\n\tsdiv\t%1, %3, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 4) (const_int 6)))])
+
+(define_insn "divsi3_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "input_operand" "rI")))
+   (use (match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "wr\t%%g0, %3, %%y\n\tsdiv\t%1, %2, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_insn "divdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(div:DI (match_operand:DI 1 "register_operand" "r")
+		(match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "sdivx\t%1, %2, %0"
+  [(set_attr "type" "idiv")])
+
+(define_insn "*cmp_sdiv_cc_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (div:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+{
+  output_asm_insn ("sra\t%1, 31, %3", operands);
+  output_asm_insn ("wr\t%3, 0, %%y", operands);
+
+  if (TARGET_V9)
+    return "sdivcc\t%1, %2, %0";
+  else
+    return "nop\n\tnop\n\tnop\n\tsdivcc\t%1, %2, %0";
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 3) (const_int 6)))])
+
+;; XXX
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "input_operand" "")))]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+  "")
+
+;; The V8 architecture specifies that there must be at least 3 instructions
+;; between a write to the Y register and a use of it for correct results.
+;; We try to fill one of them with a simple constant or a memory load.
+
+(define_insn "udivsi3_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r,&r,&r")
+	(udiv:SI (match_operand:SI 1 "nonimmediate_operand" "r,r,r,m")
+		 (match_operand:SI 2 "input_operand" "rI,K,m,r")))]
+  "(TARGET_V8 || TARGET_DEPRECATED_V8_INSNS) && TARGET_ARCH32"
+{
+  output_asm_insn ("wr\t%%g0, 0, %%y", operands);
+
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_V9)
+	return "udiv\t%1, %2, %0";
+      else
+	return "nop\n\tnop\n\tnop\n\tudiv\t%1, %2, %0";
+    case 1:
+      if (TARGET_V9)
+	return "sethi\t%%hi(%a2), %0\n\tudiv\t%1, %0, %0";
+      else
+	return "sethi\t%%hi(%a2), %0\n\tnop\n\tnop\n\tudiv\t%1, %0, %0";
+    case 2:
+      if (TARGET_V9)
+	return "ld\t%2, %0\n\tudiv\t%1, %0, %0";
+      else
+	return "ld\t%2, %0\n\tnop\n\tnop\n\tudiv\t%1, %0, %0";
+    case 3:
+      if (TARGET_V9)
+	return "ld\t%1, %0\n\tudiv\t%0, %2, %0";
+      else
+	return "ld\t%1, %0\n\tnop\n\tnop\n\tudiv\t%0, %2, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 3) (const_int 5)))])
+
+(define_insn "udivsi3_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "nonimmediate_operand" "r")
+		 (match_operand:SI 2 "input_operand" "rI")))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "wr\t%%g0, 0, %%y\n\tudiv\t%1, %2, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_insn "udivdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(udiv:DI (match_operand:DI 1 "register_operand" "r")
+		 (match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "udivx\t%1, %2, %0"
+  [(set_attr "type" "idiv")])
+
+(define_insn "*cmp_udiv_cc_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC (udiv:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:SI 2 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+{
+  output_asm_insn ("wr\t%%g0, 0, %%y", operands);
+
+  if (TARGET_V9)
+    return "udivcc\t%1, %2, %0";
+  else
+    return "nop\n\tnop\n\tnop\n\tudivcc\t%1, %2, %0";
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 2) (const_int 5)))])
+
+; sparclet multiply/accumulate insns
+
+(define_insn "*smacsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))
+		 (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_SPARCLET"
+  "smac\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_insn "*smacdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (sign_extend:DI
+			   (match_operand:SI 1 "register_operand" "%r"))
+			  (sign_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "register_operand" "0")))]
+  "TARGET_SPARCLET"
+  "smacd\t%1, %2, %L0"
+  [(set_attr "type" "imul")])
+
+(define_insn "*umacdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (zero_extend:DI
+			   (match_operand:SI 1 "register_operand" "%r"))
+			  (zero_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "register_operand" "0")))]
+  "TARGET_SPARCLET"
+  "umacd\t%1, %2, %L0"
+  [(set_attr "type" "imul")])
+
+
+;; Boolean instructions.
+
+;; We define DImode `and' so with DImode `not' we can get
+;; DImode `andn'.  Other combinations are possible.
+
+(define_expand "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (match_operand:DI 1 "arith_double_operand" "")
+		(match_operand:DI 2 "arith_double_operand" "")))]
+  ""
+  "")
+
+(define_insn "*anddi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  "! TARGET_ARCH64"
+  "#")
+
+(define_insn "*anddi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (match_operand:DI 1 "arith_operand" "%r")
+		(match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "and\t%1, %2, %0")
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_operand:SI 1 "arith_operand" "%r")
+		(match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  "and\t%1, %2, %0")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "const_compl_high_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+(define_insn_and_split "*and_not_di_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "%r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
+  [(set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (and:SI (not:SI (match_dup 7)) (match_dup 8)))]
+  "operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*and_not_di_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "%r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_ARCH64"
+  "andn\t%2, %1, %0")
+
+(define_insn "*and_not_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "%r"))
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "andn\t%2, %1, %0")
+
+(define_expand "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ior:DI (match_operand:DI 1 "arith_double_operand" "")
+		(match_operand:DI 2 "arith_double_operand" "")))]
+  ""
+  "")
+
+(define_insn "*iordi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  "! TARGET_ARCH64"
+  "#"
+  [(set_attr "length" "2")])
+
+(define_insn "*iordi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operand:DI 1 "arith_operand" "%r")
+		(match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "or\t%1, %2, %0")
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "arith_operand" "%r")
+		(match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  "or\t%1, %2, %0")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "const_compl_high_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+(define_insn_and_split "*or_not_di_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
+  [(set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (ior:SI (not:SI (match_dup 7)) (match_dup 8)))]
+  "operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*or_not_di_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_ARCH64"
+  "orn\t%2, %1, %0")
+
+(define_insn "*or_not_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "orn\t%2, %1, %0")
+
+(define_expand "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(xor:DI (match_operand:DI 1 "arith_double_operand" "")
+		(match_operand:DI 2 "arith_double_operand" "")))]
+  ""
+  "")
+
+(define_insn "*xordi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  "! TARGET_ARCH64"
+  "#"
+  [(set_attr "length" "2")])
+
+(define_insn "*xordi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "arith_operand" "%rJ")
+		(match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "xor\t%r1, %2, %0")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "arith_operand" "%rJ")
+		  (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  "xor\t%r1, %2, %0")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "const_compl_high_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+   ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (xor:SI (match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "const_compl_high_operand" ""))))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+;; Split DImode logical operations requiring two instructions.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operator:DI 1 "cc_arith_operator"	; AND, IOR, XOR
+			   [(match_operand:DI 2 "register_operand" "")
+			    (match_operand:DI 3 "arith_double_operand" "")]))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
+  [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
+   (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
+{
+  operands[4] = gen_highpart (SImode, operands[0]);
+  operands[5] = gen_lowpart (SImode, operands[0]);
+  operands[6] = gen_highpart (SImode, operands[2]);
+  operands[7] = gen_lowpart (SImode, operands[2]);
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (operands[3]) == CONST_INT)
+    {
+      if (INTVAL (operands[3]) < 0)
+	operands[8] = constm1_rtx;
+      else
+	operands[8] = const0_rtx;
+    }
+  else
+#endif
+    operands[8] = gen_highpart_mode (SImode, DImode, operands[3]);
+  operands[9] = gen_lowpart (SImode, operands[3]);
+})
+
+;; xnor patterns.  Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
+;; Combine now canonicalizes to the rightmost expression.
+(define_insn_and_split "*xor_not_di_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (xor:DI (match_operand:DI 1 "register_operand" "r")
+			(match_operand:DI 2 "register_operand" "r"))))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
+  [(set (match_dup 3) (not:SI (xor:SI (match_dup 4) (match_dup 5))))
+   (set (match_dup 6) (not:SI (xor:SI (match_dup 7) (match_dup 8))))]
+  "operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*xor_not_di_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (xor:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
+			(match_operand:DI 2 "arith_operand" "rI"))))]
+  "TARGET_ARCH64"
+  "xnor\t%r1, %2, %0")
+
+(define_insn "*xor_not_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (xor:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+			(match_operand:SI 2 "arith_operand" "rI"))))]
+  ""
+  "xnor\t%r1, %2, %0")
+
+;; These correspond to the above in the case where we also (or only)
+;; want to set the condition code.  
+
+(define_insn "*cmp_cc_arith_op"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (match_operator:SI 2 "cc_arith_operator"
+			    [(match_operand:SI 0 "arith_operand" "%r")
+			     (match_operand:SI 1 "arith_operand" "rI")])
+	 (const_int 0)))]
+  ""
+  "%A2cc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (match_operator:DI 2 "cc_arith_operator"
+			    [(match_operand:DI 0 "arith_operand" "%r")
+			     (match_operand:DI 1 "arith_operand" "rI")])
+	 (const_int 0)))]
+  "TARGET_ARCH64"
+  "%A2cc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_arith_op_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (match_operator:SI 3 "cc_arith_operator"
+			    [(match_operand:SI 1 "arith_operand" "%r")
+			     (match_operand:SI 2 "arith_operand" "rI")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 4 "cc_arith_operator" [(match_dup 1) (match_dup 2)]))]
+  "GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%A3cc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op_set"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (match_operator:DI 3 "cc_arith_operator"
+			    [(match_operand:DI 1 "arith_operand" "%r")
+			     (match_operand:DI 2 "arith_operand" "rI")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 4 "cc_arith_operator" [(match_dup 1) (match_dup 2)]))]
+  "TARGET_ARCH64 && GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%A3cc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_xor_not"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (not:SI (xor:SI (match_operand:SI 0 "register_or_zero_operand" "%rJ")
+			 (match_operand:SI 1 "arith_operand" "rI")))
+	 (const_int 0)))]
+  ""
+  "xnorcc\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_xor_not"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (not:DI (xor:DI (match_operand:DI 0 "register_or_zero_operand" "%rJ")
+			 (match_operand:DI 1 "arith_operand" "rI")))
+	 (const_int 0)))]
+  "TARGET_ARCH64"
+  "xnorcc\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_xor_not_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (not:SI (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+			 (match_operand:SI 2 "arith_operand" "rI")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (xor:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "xnorcc\t%r1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_xor_not_set"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (not:DI (xor:DI (match_operand:DI 1 "register_or_zero_operand" "%rJ")
+			 (match_operand:DI 2 "arith_operand" "rI")))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (xor:DI (match_dup 1) (match_dup 2))))]
+  "TARGET_ARCH64"
+  "xnorcc\t%r1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_arith_op_not"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (match_operator:SI 2 "cc_arith_not_operator"
+			    [(not:SI (match_operand:SI 0 "arith_operand" "rI"))
+			     (match_operand:SI 1 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))]
+  ""
+  "%B2cc\t%r1, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op_not"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (match_operator:DI 2 "cc_arith_not_operator"
+			    [(not:DI (match_operand:DI 0 "arith_operand" "rI"))
+			     (match_operand:DI 1 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))]
+  "TARGET_ARCH64"
+  "%B2cc\t%r1, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_arith_op_not_set"
+  [(set (reg:CC CC_REG)
+	(compare:CC
+	 (match_operator:SI 3 "cc_arith_not_operator"
+			    [(not:SI (match_operand:SI 1 "arith_operand" "rI"))
+			     (match_operand:SI 2 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 4 "cc_arith_not_operator"
+			    [(not:SI (match_dup 1)) (match_dup 2)]))]
+  "GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%B3cc\t%r2, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op_not_set"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX
+	 (match_operator:DI 3 "cc_arith_not_operator"
+			    [(not:DI (match_operand:DI 1 "arith_operand" "rI"))
+			     (match_operand:DI 2 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 4 "cc_arith_not_operator"
+			    [(not:DI (match_dup 1)) (match_dup 2)]))]
+  "TARGET_ARCH64 && GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%B3cc\t%r2, %1, %0"
+  [(set_attr "type" "compare")])
+
+;; We cannot use the "neg" pseudo insn because the Sun assembler
+;; does not know how to make it work for constants.
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rtx_PARALLEL
+		 (VOIDmode,
+		  gen_rtvec (2,
+			     gen_rtx_SET (VOIDmode, operand0,
+					  gen_rtx_NEG (DImode, operand1)),
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_REG (CCmode,
+							   SPARC_ICC_REG)))));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*negdi2_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REG))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV CC_REG)
+                   (compare:CC_NOOV (minus:SI (const_int 0) (match_dup 5))
+                                    (const_int 0)))
+              (set (match_dup 4) (minus:SI (const_int 0) (match_dup 5)))])
+   (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
+                                (ltu:SI (reg:CC CC_REG) (const_int 0))))]
+  "operands[2] = gen_highpart (SImode, operands[0]);
+   operands[3] = gen_highpart (SImode, operands[1]);
+   operands[4] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*negdi2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_ARCH64"
+  "sub\t%%g0, %1, %0")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (neg:SI (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "sub\t%%g0, %1, %0")
+
+(define_insn "*cmp_cc_neg"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (neg:SI (match_operand:SI 0 "arith_operand" "rI"))
+			 (const_int 0)))]
+  ""
+  "subcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_neg"
+  [(set (reg:CCX_NOOV CC_REG)
+	(compare:CCX_NOOV (neg:DI (match_operand:DI 0 "arith_operand" "rI"))
+			  (const_int 0)))]
+  "TARGET_ARCH64"
+  "subcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set_neg"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (neg:SI (match_operand:SI 1 "arith_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_dup 1)))]
+  ""
+  "subcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_set_neg"
+  [(set (reg:CCX_NOOV CC_REG)
+	(compare:CCX_NOOV (neg:DI (match_operand:DI 1 "arith_operand" "rI"))
+			  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "subcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+;; We cannot use the "not" pseudo insn because the Sun assembler
+;; does not know how to make it work for constants.
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(not:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn_and_split "*one_cmpldi2_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
+  [(set (match_dup 2) (not:SI (xor:SI (match_dup 3) (const_int 0))))
+   (set (match_dup 4) (not:SI (xor:SI (match_dup 5) (const_int 0))))]
+  "operands[2] = gen_highpart (SImode, operands[0]);
+   operands[3] = gen_highpart (SImode, operands[1]);
+   operands[4] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*one_cmpldi2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "xnor\t%%g0, %1, %0")
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "xnor\t%%g0, %1, %0")
+
+(define_insn "*cmp_cc_not"
+  [(set (reg:CC CC_REG)
+	(compare:CC (not:SI (match_operand:SI 0 "arith_operand" "rI"))
+		    (const_int 0)))]
+  ""
+  "xnorcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_not"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (not:DI (match_operand:DI 0 "arith_operand" "rI"))
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "xnorcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set_not"
+  [(set (reg:CC CC_REG)
+	(compare:CC (not:SI (match_operand:SI 1 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_dup 1)))]
+  ""
+  "xnorcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_set_not"
+  [(set (reg:CCX CC_REG)
+	(compare:CCX (not:DI (match_operand:DI 1 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "xnorcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "register_operand" "r"))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_dup 1)
+		    (const_int 0)))]
+  ""
+  "orcc\t%1, 0, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_set64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "register_operand" "r"))
+   (set (reg:CCX CC_REG)
+	(compare:CCX (match_dup 1)
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "orcc\t%1, 0, %0"
+   [(set_attr "type" "compare")])
+
+
+;; Floating point arithmetic instructions.
+
+(define_expand "addtf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(plus:TF (match_operand:TF 1 "general_operand" "")
+		 (match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (PLUS, operands); DONE;")
+
+(define_insn "*addtf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(plus:TF (match_operand:TF 1 "register_operand" "e")
+		 (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "faddq\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(plus:DF (match_operand:DF 1 "register_operand" "e")
+		 (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "faddd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fadds\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "subtf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(minus:TF (match_operand:TF 1 "general_operand" "")
+		  (match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (MINUS, operands); DONE;")
+
+(define_insn "*subtf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(minus:TF (match_operand:TF 1 "register_operand" "e")
+		  (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fsubq\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(minus:DF (match_operand:DF 1 "register_operand" "e")
+		  (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fsubd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fsubs\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "multf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(mult:TF (match_operand:TF 1 "general_operand" "")
+		 (match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (MULT, operands); DONE;")
+
+(define_insn "*multf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(mult:TF (match_operand:TF 1 "register_operand" "e")
+		 (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fmulq\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(mult:DF (match_operand:DF 1 "register_operand" "e")
+		 (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fmuld\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fmuls\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "fmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (fma:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")
+		(match_operand:DF 3 "register_operand" "e")))]
+  "TARGET_FMAF"
+  "fmaddd\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "fmsdf4"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (fma:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")
+		(neg:DF (match_operand:DF 3 "register_operand" "e"))))]
+  "TARGET_FMAF"
+  "fmsubd\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*nfmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (neg:DF (fma:DF (match_operand:DF 1 "register_operand" "e")
+			(match_operand:DF 2 "register_operand" "e")
+			(match_operand:DF 3 "register_operand" "e"))))]
+  "TARGET_FMAF"
+  "fnmaddd\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*nfmsdf4"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (neg:DF (fma:DF (match_operand:DF 1 "register_operand" "e")
+			(match_operand:DF 2 "register_operand" "e")
+			(neg:DF (match_operand:DF 3 "register_operand" "e")))))]
+  "TARGET_FMAF"
+  "fnmsubd\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))]
+  "TARGET_FMAF"
+  "fmadds\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "TARGET_FMAF"
+  "fmsubs\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*nfmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (neg:SF (fma:SF (match_operand:SF 1 "register_operand" "f")
+			(match_operand:SF 2 "register_operand" "f")
+			(match_operand:SF 3 "register_operand" "f"))))]
+  "TARGET_FMAF"
+  "fnmadds\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*nfmssf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (neg:SF (fma:SF (match_operand:SF 1 "register_operand" "f")
+			(match_operand:SF 2 "register_operand" "f")
+			(neg:SF (match_operand:SF 3 "register_operand" "f")))))]
+  "TARGET_FMAF"
+  "fnmsubs\t%1, %2, %3, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*muldf3_extend"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f"))
+		 (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))]
+  "(TARGET_V8 || TARGET_V9) && TARGET_FPU && !sparc_fix_ut699"
+  "fsmuld\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "*multf3_extend"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(mult:TF (float_extend:TF (match_operand:DF 1 "register_operand" "e"))
+		 (float_extend:TF (match_operand:DF 2 "register_operand" "e"))))]
+  "(TARGET_V8 || TARGET_V9) && TARGET_FPU && TARGET_HARD_QUAD"
+  "fdmulq\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_expand "divtf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(div:TF (match_operand:TF 1 "general_operand" "")
+		(match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (DIV, operands); DONE;")
+
+;; don't have timing for quad-prec. divide.
+(define_insn "*divtf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(div:TF (match_operand:TF 1 "register_operand" "e")
+		(match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fdivq\t%1, %2, %0"
+  [(set_attr "type" "fpdivs")])
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn "*divdf3_nofix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU && !sparc_fix_ut699"
+  "fdivd\t%1, %2, %0"
+  [(set_attr "type" "fpdivd")
+   (set_attr "fptype" "double")])
+
+(define_insn "*divdf3_fix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU && sparc_fix_ut699"
+  "fdivd\t%1, %2, %0\n\tstd\t%0, [%%sp-8]"
+  [(set_attr "type" "fpdivd")
+   (set_attr "fptype" "double")
+   (set_attr "length" "2")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU && !sparc_fix_ut699"
+  "fdivs\t%1, %2, %0"
+  [(set_attr "type" "fpdivs")])
+
+(define_expand "negtf2"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*negtf2_notv9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU
+   && ! TARGET_V9"
+  "@
+  fnegs\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
+   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn_and_split "*negtf2_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU && TARGET_V9"
+  "@
+  fnegd\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:DF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")
+   (set_attr "fptype" "double")])
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*negdf2_notv9"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(neg:DF (match_operand:DF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fnegs\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn "*negdf2_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(neg:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_V9"
+  "fnegd\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "fptype" "double")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fnegs\t%1, %0"
+  [(set_attr "type" "fpmove")])
+
+(define_expand "abstf2"
+  [(set (match_operand:TF 0 "register_operand" "")
+	(abs:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*abstf2_notv9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fabss\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
+   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn "*abstf2_hq_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && TARGET_V9 && TARGET_HARD_QUAD"
+  "@
+  fabsd\t%0, %0
+  fabsq\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "fptype" "double,*")])
+
+(define_insn_and_split "*abstf2_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && TARGET_V9 && !TARGET_HARD_QUAD"
+  "@
+  fabsd\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:DF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")
+   (set_attr "fptype" "double,*")])
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(abs:DF (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*absdf2_notv9"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(abs:DF (match_operand:DF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fabss\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn "*absdf2_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(abs:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_V9"
+  "fabsd\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "fptype" "double")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fabss\t%1, %0"
+  [(set_attr "type" "fpmove")])
+
+(define_expand "sqrttf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(sqrt:TF (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_unop (SQRT, operands); DONE;")
+
+(define_insn "*sqrttf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(sqrt:TF (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fsqrtq\t%1, %0"
+  [(set_attr "type" "fpsqrts")])
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn "*sqrtdf2_nofix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && !sparc_fix_ut699"
+  "fsqrtd\t%1, %0"
+  [(set_attr "type" "fpsqrtd")
+   (set_attr "fptype" "double")])
+
+(define_insn "*sqrtdf2_fix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && sparc_fix_ut699"
+  "fsqrtd\t%1, %0\n\tstd\t%0, [%%sp-8]"
+  [(set_attr "type" "fpsqrtd")
+   (set_attr "fptype" "double")
+   (set_attr "length" "2")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU && !sparc_fix_ut699"
+  "fsqrts\t%1, %0"
+  [(set_attr "type" "fpsqrts")])
+
+
+;; Arithmetic shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+  return "sll\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn "*ashlsi3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI"))))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+  return "sll\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+	FAIL;
+      emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*ashldi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+  return "sllx\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+;; XXX UGH!
+(define_insn "ashldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
+		   (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_shift (insn ,operands, \"sllx\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5,5,6")])
+
+;; Optimize (1LL<<x)-1
+;; XXX this also needs to be fixed to handle equal subregs
+;; XXX first before we could re-enable it.
+;(define_insn ""
+;  [(set (match_operand:DI 0 "register_operand" "=h")
+;	(plus:DI (ashift:DI (const_int 1)
+;			    (match_operand:SI 1 "arith_operand" "rI"))
+;		 (const_int -1)))]
+;  "0 && TARGET_V8PLUS"
+;{
+;  if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == REGNO (operands[0]))
+;    return "mov\t1, %L0\;sllx\t%L0, %1, %L0\;sub\t%L0, 1, %L0\;srlx\t%L0, 32, %H0";
+;  return "mov\t1, %H0\;sllx\t%H0, %1, %L0\;sub\t%L0, 1, %L0\;srlx\t%L0, 32, %H0";
+;}
+;  [(set_attr "type" "multi")
+;   (set_attr "length" "4")])
+
+(define_insn "*cmp_cc_ashift_1"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
+				    (const_int 1))
+			 (const_int 0)))]
+  ""
+  "addcc\t%0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set_ashift_1"
+  [(set (reg:CC_NOOV CC_REG)
+	(compare:CC_NOOV (ashift:SI (match_operand:SI 1 "register_operand" "r")
+				    (const_int 1))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_dup 1) (const_int 1)))]
+  ""
+  "addcc\t%1, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  {
+     if (GET_CODE (operands[2]) == CONST_INT)
+       operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+     return "sra\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+(define_insn "*ashrsi3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (match_operand:SI 2 "arith_operand" "r"))))]
+  "TARGET_ARCH64"
+  "sra\t%1, %2, %0"
+  [(set_attr "type" "shift")])
+
+;; This handles the case as above, but with constant shift instead of
+;; register. Combiner "simplifies" it for us a little bit though.
+(define_insn "*ashrsi3_extend2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+				(const_int 32))
+		     (match_operand:SI 2 "small_int_operand" "I")))]
+  "TARGET_ARCH64 && INTVAL (operands[2]) >= 32 && INTVAL (operands[2]) < 64"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  return "sra\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        FAIL;	/* prefer generic code in this case */
+      emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*ashrdi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+    return "srax\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+;; XXX
+(define_insn "ashrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
+		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_shift (insn, operands, \"srax\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5,5,6")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+    return "srl\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+(define_insn "*lshrsi3_extend0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		       (match_operand:SI 2 "arith_operand" "rI"))))]
+  "TARGET_ARCH64"
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+    return "srl\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+;; This handles the case where
+;; (zero_extend:DI (lshiftrt:SI (match_operand:SI) (match_operand:SI))),
+;; but combiner "simplifies" it for us.
+(define_insn "*lshrsi3_extend1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (subreg:DI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 2 "arith_operand" "r")) 0)
+		(match_operand 3 "const_int_operand" "")))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) == 0xffffffff"
+  "srl\t%1, %2, %0"
+  [(set_attr "type" "shift")])
+
+;; This handles the case where
+;; (lshiftrt:DI (zero_extend:DI (match_operand:SI)) (const_int >=0 < 32))
+;; but combiner "simplifies" it for us.
+(define_insn "*lshrsi3_extend2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+			 (match_operand 2 "small_int_operand" "I")
+			 (const_int 32)))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32"
+{
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  return "srl\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        FAIL;
+      emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*lshrdi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+    return "srlx\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+;; XXX
+(define_insn "lshrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
+		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_shift (insn, operands, \"srlx\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5,5,6")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (subreg:SI (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (const_int 32)) 4)
+		     (match_operand:SI 2 "small_int_operand" "I")))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 32);
+  return "srax\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (subreg:SI (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (const_int 32)) 4)
+		     (match_operand:SI 2 "small_int_operand" "I")))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 32);
+  return "srlx\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (subreg:SI (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (match_operand:SI 2 "small_int_operand" "I")) 4)
+		     (match_operand:SI 3 "small_int_operand" "I")))]
+  "TARGET_ARCH64
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 32
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) < 32
+   && (unsigned HOST_WIDE_INT) (INTVAL (operands[2]) + INTVAL (operands[3])) < 64"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]));
+
+  return "srax\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (subreg:SI (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (match_operand:SI 2 "small_int_operand" "I")) 4)
+		     (match_operand:SI 3 "small_int_operand" "I")))]
+  "TARGET_ARCH64
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 32
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) < 32
+   && (unsigned HOST_WIDE_INT) (INTVAL (operands[2]) + INTVAL (operands[3])) < 64"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]));
+
+  return "srlx\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+
+;; Unconditional and other jump instructions.
+
+(define_expand "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "")
+
+(define_insn "*jump_ubranch"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "! TARGET_CBCOND"
+  "* return output_ubranch (operands[0], insn);"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "*jump_cbcond"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "TARGET_CBCOND"
+  "* return output_ubranch (operands[0], insn);"
+  [(set_attr "type" "uncond_cbcond")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  gcc_assert (GET_MODE (operands[0]) == CASE_VECTOR_MODE);
+
+  /* In pic mode, our address differences are against the base of the
+     table.  Add that base value back in; CSE ought to be able to combine
+     the two address loads.  */
+  if (flag_pic)
+    {
+      rtx tmp, tmp2;
+      tmp = gen_rtx_LABEL_REF (Pmode, operands[1]);
+      tmp2 = operands[0];
+      if (CASE_VECTOR_MODE != Pmode)
+        tmp2 = gen_rtx_SIGN_EXTEND (Pmode, tmp2);
+      tmp = gen_rtx_PLUS (Pmode, tmp2, tmp);
+      operands[0] = memory_address (Pmode, tmp);
+    }
+})
+
+(define_insn "*tablejump_sp32"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "! TARGET_ARCH64"
+  "jmp\t%a0%#"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "*tablejump_sp64"
+  [(set (pc) (match_operand:DI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_ARCH64"
+  "jmp\t%a0%#"
+  [(set_attr "type" "uncond_branch")])
+
+
+;; Jump to subroutine instructions.
+
+(define_expand "call"
+  ;; Note that this expression is not used for generating RTL.
+  ;; All the RTL is generated explicitly below.
+  [(call (match_operand 0 "call_operand" "")
+	 (match_operand 3 "" "i"))]
+  ;; operands[2] is next_arg_register
+  ;; operands[3] is struct_value_size_rtx.
+  ""
+{
+  rtx fn_rtx;
+
+  gcc_assert (MEM_P (operands[0]) && GET_MODE (operands[0]) == FUNCTION_MODE);
+
+  gcc_assert (GET_CODE (operands[3]) == CONST_INT);
+
+  if (GET_CODE (XEXP (operands[0], 0)) == LABEL_REF)
+    {
+      /* This is really a PIC sequence.  We want to represent
+	 it as a funny jump so its delay slots can be filled. 
+
+	 ??? But if this really *is* a CALL, will not it clobber the
+	 call-clobbered registers?  We lose this if it is a JUMP_INSN.
+	 Why cannot we have delay slots filled if it were a CALL?  */
+
+      /* We accept negative sizes for untyped calls.  */
+      if (! TARGET_ARCH64 && INTVAL (operands[3]) != 0)
+	emit_jump_insn
+	  (gen_rtx_PARALLEL
+	   (VOIDmode,
+	    gen_rtvec (3,
+		       gen_rtx_SET (VOIDmode, pc_rtx, XEXP (operands[0], 0)),
+		       operands[3],
+		       gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))));
+      else
+	emit_jump_insn
+	  (gen_rtx_PARALLEL
+	   (VOIDmode,
+	    gen_rtvec (2,
+		       gen_rtx_SET (VOIDmode, pc_rtx, XEXP (operands[0], 0)),
+		       gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))));
+      goto finish_call;
+    }
+
+  fn_rtx = operands[0];
+
+  /* We accept negative sizes for untyped calls.  */
+  if (! TARGET_ARCH64 && INTVAL (operands[3]) != 0)
+    sparc_emit_call_insn
+      (gen_rtx_PARALLEL
+       (VOIDmode,
+	gen_rtvec (3, gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx),
+		   operands[3],
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))),
+       XEXP (fn_rtx, 0));
+  else
+    sparc_emit_call_insn
+      (gen_rtx_PARALLEL
+       (VOIDmode,
+	gen_rtvec (2, gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx),
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))),
+       XEXP (fn_rtx, 0));
+
+ finish_call:
+
+  DONE;
+})
+
+;; We can't use the same pattern for these two insns, because then registers
+;; in the address may not be properly reloaded.
+
+(define_insn "*call_address_sp32"
+  [(call (mem:SI (match_operand:SI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_symbolic_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_address_sp64"
+  [(call (mem:DI (match_operand:DI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_symbolic_sp64"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+;; This is a call that wants a structure value.
+;; There is no such critter for v9 (??? we may need one anyway).
+(define_insn "*call_address_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) > 0"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0xfff);
+  return "call\t%a0, %1\n\t nop\n\tunimp\t%2";
+}
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+;; This is a call that wants a structure value.
+;; There is no such critter for v9 (??? we may need one anyway).
+(define_insn "*call_symbolic_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) > 0"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0xfff);
+  return "call\t%a0, %1\n\t nop\n\tunimp\t%2";
+}
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+;; This is a call that may want a structure value.  This is used for
+;; untyped_calls.
+(define_insn "*call_address_untyped_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0"
+  "call\t%a0, %1\n\t nop\n\tnop"
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+;; This is a call that may want a structure value.  This is used for
+;; untyped_calls.
+(define_insn "*call_symbolic_untyped_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0"
+  "call\t%a0, %1\n\t nop\n\tnop"
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+(define_expand "call_value"
+  ;; Note that this expression is not used for generating RTL.
+  ;; All the RTL is generated explicitly below.
+  [(set (match_operand 0 "register_operand" "=rf")
+	(call (match_operand 1 "" "")
+	      (match_operand 4 "" "")))]
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  ""
+{
+  rtx fn_rtx;
+  rtvec vec;
+
+  gcc_assert (MEM_P (operands[1]) && GET_MODE (operands[1]) == FUNCTION_MODE);
+
+  fn_rtx = operands[1];
+
+  vec = gen_rtvec (2,
+		   gen_rtx_SET (VOIDmode, operands[0],
+				gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx)),
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)));
+
+  sparc_emit_call_insn (gen_rtx_PARALLEL (VOIDmode, vec), XEXP (fn_rtx, 0));
+
+  DONE;
+})
+
+(define_insn "*call_value_address_sp32"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "address_operand" "p"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 2 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_symbolic_sp32"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI O7_REG))]
+  ;;- Do not use operand 2 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_address_sp64"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "address_operand" "p"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI O7_REG))]
+  ;;- Do not use operand 2 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_symbolic_sp64"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI O7_REG))]
+  ;;- Do not use operand 2 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand:BLK 1 "memory_operand" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  rtx valreg1 = gen_rtx_REG (DImode, 8);
+  rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32);
+  rtx result = operands[1];
+
+  /* Pass constm1 to indicate that it may expect a structure value, but
+     we don't know what size it is.  */
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, constm1_rtx));
+
+  /* Save the function value registers.  */
+  emit_move_insn (adjust_address (result, DImode, 0), valreg1);
+  emit_move_insn (adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8),
+				  valreg2);
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;;  Tail call instructions.
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "call_operand" "") (const_int 0))
+	      (return)])]
+  ""
+  "")
+
+(define_insn "*sibcall_symbolic_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (return)]
+  "! TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[0]);"
+  [(set_attr "type" "sibcall")])
+
+(define_insn "*sibcall_symbolic_sp64"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (return)]
+  "TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[0]);"
+  [(set_attr "type" "sibcall")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "=rf")
+		(call (match_operand 1 "" "") (const_int 0)))
+	      (return)])]
+  ""
+  "")
+
+(define_insn "*sibcall_value_symbolic_sp32"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (return)]
+  "! TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[1]);"
+  [(set_attr "type" "sibcall")])
+
+(define_insn "*sibcall_value_symbolic_sp64"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (return)]
+  "TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[1]);"
+  [(set_attr "type" "sibcall")])
+
+
+;; Special instructions.
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  if (TARGET_FLAT)
+    sparc_flat_expand_prologue ();
+  else
+    sparc_expand_prologue ();
+  DONE;
+})
+
+;; The "register window save" insn is modelled as follows.  The dwarf2
+;; information is manually added in emit_window_save.
+
+(define_insn "window_save"
+  [(unspec_volatile
+	[(match_operand 0 "arith_operand" "rI")]
+	UNSPECV_SAVEW)]
+  "!TARGET_FLAT"
+  "save\t%%sp, %0, %%sp"
+  [(set_attr "type" "savew")])
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  if (TARGET_FLAT)
+    sparc_flat_expand_epilogue (false);
+  else
+    sparc_expand_epilogue (false);
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  if (TARGET_FLAT)
+    sparc_flat_expand_epilogue (false);
+  else
+    sparc_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand" ""))]
+  ""
+{
+  emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM), operands[0]);
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(return)]
+{
+  if (TARGET_FLAT)
+    sparc_flat_expand_epilogue (true);
+  else
+    sparc_expand_epilogue (true);
+})
+
+(define_expand "return"
+  [(return)]
+  "sparc_can_use_return_insn_p ()"
+  "")
+
+(define_insn "*return_internal"
+  [(return)]
+  ""
+  "* return output_return (insn);"
+  [(set_attr "type" "return")
+   (set (attr "length")
+	(cond [(eq_attr "calls_eh_return" "true")
+	         (if_then_else (eq_attr "delayed_branch" "true")
+				(if_then_else (ior (eq_attr "isa" "v9")
+						   (eq_attr "flat" "true"))
+					(const_int 2)
+					(const_int 3))
+				(if_then_else (eq_attr "flat" "true")
+					(const_int 3)
+					(const_int 4)))
+	       (ior (eq_attr "leaf_function" "true") (eq_attr "flat" "true"))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+			       (const_int 2)
+			       (const_int 1))
+	       (eq_attr "empty_delay_slot" "true")
+		 (if_then_else (eq_attr "delayed_branch" "true")
+			       (const_int 2)
+			       (const_int 3))
+	      ] (const_int 1)))])
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Do not schedule instructions accessing memory before this point.
+
+(define_expand "frame_blockage"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+  operands[1] = stack_pointer_rtx;
+})
+
+(define_insn "*frame_blockage<P:mode>"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_operand:P 1 "" "")] UNSPEC_FRAME_BLOCKAGE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+(define_expand "probe_stack"
+  [(set (match_operand 0 "memory_operand" "") (const_int 0))]
+  ""
+{
+  operands[0]
+    = adjust_address (operands[0], GET_MODE (operands[0]), SPARC_STACK_BIAS);
+})
+
+(define_insn "probe_stack_range<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "register_operand" "r")]
+			    UNSPECV_PROBE_STACK_RANGE))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "multi")])
+
+;; Prepare to return any type including a structure value.
+
+(define_expand "untyped_return"
+  [(match_operand:BLK 0 "memory_operand" "")
+   (match_operand 1 "" "")]
+  ""
+{
+  rtx valreg1 = gen_rtx_REG (DImode, 24);
+  rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32);
+  rtx result = operands[0];
+
+  if (! TARGET_ARCH64)
+    {
+      rtx rtnreg = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
+      rtx value = gen_reg_rtx (SImode);
+
+      /* Fetch the instruction where we will return to and see if it's an unimp
+	 instruction (the most significant 10 bits will be zero).  If so,
+	 update the return address to skip the unimp instruction.  */
+      emit_move_insn (value,
+		      gen_rtx_MEM (SImode, plus_constant (SImode, rtnreg, 8)));
+      emit_insn (gen_lshrsi3 (value, value, GEN_INT (22)));
+      emit_insn (gen_update_return (rtnreg, value));
+    }
+
+  /* Reload the function value registers.  */
+  emit_move_insn (valreg1, adjust_address (result, DImode, 0));
+  emit_move_insn (valreg2,
+		  adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8));
+
+  /* Put USE insns before the return.  */
+  emit_use (valreg1);
+  emit_use (valreg2);
+
+  /* Construct the return.  */
+  expand_naked_return ();
+
+  DONE;
+})
+
+;; Adjust the return address conditionally. If the value of op1 is equal
+;; to all zero then adjust the return address i.e. op0 = op0 + 4.
+;; This is technically *half* the check required by the 32-bit SPARC
+;; psABI. This check only ensures that an "unimp" insn was written by
+;; the caller, but doesn't check to see if the expected size matches
+;; (this is encoded in the 12 lower bits). This check is obsolete and
+;; only used by the above code "untyped_return".
+
+(define_insn "update_return"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")] UNSPEC_UPDATE_RETURN)]
+  "! TARGET_ARCH64"
+{
+  if (flag_delayed_branch)
+    return "cmp\t%1, 0\n\tbe,a\t.+8\n\t add\t%0, 4, %0";
+  else
+    return "cmp\t%1, 0\n\tbne\t.+12\n\t nop\n\tadd\t%0, 4, %0";
+}
+  [(set (attr "type") (const_string "multi"))
+   (set (attr "length")
+	(if_then_else (eq_attr "delayed_branch" "true")
+		      (const_int 3)
+		      (const_int 4)))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "address_operand" "p"))]
+  ""
+  "")
+
+(define_insn "*branch_sp32"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  "! TARGET_ARCH64"
+ "jmp\t%a0%#"
+ [(set_attr "type" "uncond_branch")])
+ 
+(define_insn "*branch_sp64"
+  [(set (pc) (match_operand:DI 0 "address_operand" "p"))]
+  "TARGET_ARCH64"
+  "jmp\t%a0%#"
+  [(set_attr "type" "uncond_branch")])
+
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand" "")
+	(match_operand 1 "register_operand" ""))
+   (set (match_dup 2) (match_dup 3))]
+  ""
+{
+  operands[0] = adjust_address_nv (operands[0], Pmode, 0);
+  operands[2] = adjust_address_nv (operands[0], Pmode, GET_MODE_SIZE (Pmode));
+  operands[3] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  operands[1] = adjust_address_nv (operands[1], Pmode, 0);
+})
+
+(define_expand "nonlocal_goto"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")
+   (match_operand 2 "memory_operand" "")
+   (match_operand 3 "memory_operand" "")]
+  ""
+{
+  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+  rtx r_label = copy_to_reg (operands[1]);
+  rtx r_sp = adjust_address_nv (operands[2], Pmode, 0);
+  rtx r_fp = operands[3];
+  rtx r_i7 = adjust_address_nv (operands[2], Pmode, GET_MODE_SIZE (Pmode));
+
+  /* We need to flush all the register windows so that their contents will
+     be re-synchronized by the restore insn of the target function.  */
+  if (!TARGET_FLAT)
+    emit_insn (gen_flush_register_windows ());
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  /* Restore frame pointer for containing function.  */
+  emit_move_insn (hard_frame_pointer_rtx, r_fp);
+  emit_stack_restore (SAVE_NONLOCAL, r_sp);
+  emit_move_insn (i7, r_i7);
+
+  /* USE of hard_frame_pointer_rtx added for consistency;
+     not clear if really needed.  */
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+  emit_use (i7);
+
+  emit_jump_insn (gen_indirect_jump (r_label));
+  emit_barrier ();
+  DONE;
+})
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+{
+  load_got_register ();
+  DONE;
+})
+
+;; Special insn to flush register windows.
+
+(define_insn "flush_register_windows"
+  [(unspec_volatile [(const_int 0)] UNSPECV_FLUSHW)]
+  ""
+  { return TARGET_V9 ? "flushw" : "ta\t3"; }
+  [(set_attr "type" "flushw")])
+
+;; Special pattern for the FLUSH instruction.
+
+(define_insn "flush<P:mode>"
+  [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")] UNSPECV_FLUSH)]
+  ""
+  { return TARGET_V9 ? "flush\t%f0" : "iflush\t%f0"; }
+  [(set_attr "type" "iflush")])
+
+;; Special insns to load and store the 32-bit FP Status Register.
+
+(define_insn "ldfsr"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_LDFSR)]
+  "TARGET_FPU"
+  "ld\t%0, %%fsr"
+  [(set_attr "type" "load")])
+
+(define_insn "stfsr"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_STFSR))]
+  "TARGET_FPU"
+  "st\t%%fsr, %0"
+  [(set_attr "type" "store")])
+
+
+;; Find first set instructions.
+
+;; The scan instruction searches from the most significant bit while ffs
+;; searches from the least significant bit.  The bit index and treatment of
+;; zero also differ.  It takes at least 7 instructions to get the proper
+;; result.  Here is an obvious 8 instruction sequence.
+
+;; XXX
+(define_insn "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ffs:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_SPARCLITE || TARGET_SPARCLET"
+{
+  return "sub\t%%g0, %1, %0\;and\t%0, %1, %0\;scan\t%0, 0, %0\;mov\t32, %2\;sub\t%2, %0, %0\;sra\t%0, 31, %2\;and\t%2, 31, %2\;add\t%2, %0, %0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_expand "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (popcount:DI (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_POPC"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_popcountdi_v8plus (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*popcountdi_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (popcount:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_POPC && TARGET_ARCH64"
+  "popc\t%1, %0")
+
+(define_insn "popcountdi_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (popcount:DI (match_operand:DI 1 "register_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&h"))]
+  "TARGET_POPC && ! TARGET_ARCH64"
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%L1, 0, %L1", operands);
+  return "sllx\t%H1, 32, %2\n\tor\t%L1, %2, %2\n\tpopc\t%2, %L0\n\tclr\t%H0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "5")])
+
+(define_expand "popcountsi2"
+  [(set (match_dup 2)
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+        (truncate:SI (popcount:DI (match_dup 2))))]
+  "TARGET_POPC"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_popcountsi_v8plus (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_insn "*popcountsi_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (truncate:SI
+          (popcount:DI (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_POPC && TARGET_ARCH64"
+  "popc\t%1, %0")
+
+(define_insn "popcountsi_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (popcount:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_POPC && ! TARGET_ARCH64"
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%1, 0, %1", operands);
+  return "popc\t%1, %0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_expand "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (clz:DI (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_VIS3"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_clzdi_v8plus (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*clzdi_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (clz:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_VIS3 && TARGET_ARCH64"
+  "lzd\t%1, %0")
+
+(define_insn "clzdi_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (clz:DI (match_operand:DI 1 "register_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&h"))]
+  "TARGET_VIS3 && ! TARGET_ARCH64"
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%L1, 0, %L1", operands);
+  return "sllx\t%H1, 32, %2\n\tor\t%L1, %2, %2\n\tlzd\t%2, %L0\n\tclr\t%H0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "5")])
+
+(define_expand "clzsi2"
+  [(set (match_dup 2)
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 3)
+        (truncate:SI (clz:DI (match_dup 2))))
+   (set (match_operand:SI 0 "register_operand" "")
+        (minus:SI (match_dup 3) (const_int 32)))]
+  "TARGET_VIS3"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_clzsi_v8plus (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    {
+      operands[2] = gen_reg_rtx (DImode);
+      operands[3] = gen_reg_rtx (SImode);
+    }
+})
+
+(define_insn "*clzsi_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (truncate:SI
+          (clz:DI (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_VIS3 && TARGET_ARCH64"
+  "lzd\t%1, %0")
+
+(define_insn "clzsi_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_VIS3 && ! TARGET_ARCH64"
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%1, 0, %1", operands);
+  return "lzd\t%1, %0\n\tsub\t%0, 32, %0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "3")])
+
+
+;; Peepholes go at the end.
+
+;; Optimize consecutive loads or stores into ldd and std when possible.
+;; The conditions in which we do this are very restricted and are 
+;; explained in the code for {registers,memory}_ok_for_ldd functions.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+      (const_int 0))
+   (set (match_operand:SI 1 "memory_operand" "")
+      (const_int 0))]
+  "TARGET_V9
+   && mems_ok_for_ldd_peep (operands[0], operands[1], NULL_RTX)"
+  [(set (match_dup 0) (const_int 0))]
+{
+  operands[0] = widen_mem_for_ldd_peep (operands[0], operands[1], DImode);
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+      (const_int 0))
+   (set (match_operand:SI 1 "memory_operand" "")
+      (const_int 0))]
+  "TARGET_V9
+   && mems_ok_for_ldd_peep (operands[1], operands[0], NULL_RTX)"
+  [(set (match_dup 1) (const_int 0))]
+{
+  operands[1] = widen_mem_for_ldd_peep (operands[1], operands[0], DImode);
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])" 
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = widen_mem_for_ldd_peep (operands[1], operands[3], DImode);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[1], operands[3]) 
+   && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = widen_mem_for_ldd_peep (operands[0], operands[2], DImode);
+  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+})
+
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "memory_operand" ""))
+   (set (match_operand:SF 2 "register_operand" "")
+        (match_operand:SF 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = widen_mem_for_ldd_peep (operands[1], operands[3], DFmode);
+  operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0]));
+})
+
+(define_peephole2
+  [(set (match_operand:SF 0 "memory_operand" "")
+        (match_operand:SF 1 "register_operand" ""))
+   (set (match_operand:SF 2 "memory_operand" "")
+        (match_operand:SF 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[1], operands[3]) 
+  && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = widen_mem_for_ldd_peep (operands[0], operands[2], DFmode);
+  operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[2], operands[0]) 
+  && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])"
+  [(set (match_dup 2) (match_dup 3))]
+{
+  operands[3] = widen_mem_for_ldd_peep (operands[3], operands[1], DImode);
+  operands[2] = gen_rtx_REG (DImode, REGNO (operands[2]));
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[3], operands[1]) 
+  && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)" 
+  [(set (match_dup 2) (match_dup 3))]
+{
+  operands[2] = widen_mem_for_ldd_peep (operands[2],  operands[0], DImode);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+ 
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "memory_operand" ""))
+   (set (match_operand:SF 2 "register_operand" "")
+        (match_operand:SF 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[2], operands[0]) 
+  && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])"
+  [(set (match_dup 2) (match_dup 3))]
+{
+  operands[3] = widen_mem_for_ldd_peep (operands[3], operands[1], DFmode);
+  operands[2] = gen_rtx_REG (DFmode, REGNO (operands[2]));
+})
+
+(define_peephole2
+  [(set (match_operand:SF 0 "memory_operand" "")
+        (match_operand:SF 1 "register_operand" ""))
+   (set (match_operand:SF 2 "memory_operand" "")
+        (match_operand:SF 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[3], operands[1]) 
+  && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)"
+  [(set (match_dup 2) (match_dup 3))]
+{
+  operands[2] = widen_mem_for_ldd_peep (operands[2], operands[0], DFmode);
+  operands[3] = gen_rtx_REG (DFmode, REGNO (operands[3]));
+})
+ 
+;; Optimize the case of following a reg-reg move with a test
+;; of reg just moved.  Don't allow floating point regs for operand 0 or 1.
+;; This can result from a float to fix conversion.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 2 "register_operand" "")
+		    (const_int 0)))]
+  "(rtx_equal_p (operands[2], operands[0])
+    || rtx_equal_p (operands[2], operands[1]))
+    && ! SPARC_FP_REG_P (REGNO (operands[0]))
+    && ! SPARC_FP_REG_P (REGNO (operands[1]))"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (reg:CC CC_REG)
+		   (compare:CC (match_dup 1) (const_int 0)))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))
+   (set (reg:CCX CC_REG)
+	(compare:CCX (match_operand:DI 2 "register_operand" "")
+		    (const_int 0)))]
+  "TARGET_ARCH64
+   && (rtx_equal_p (operands[2], operands[0])
+       || rtx_equal_p (operands[2], operands[1]))
+   && ! SPARC_FP_REG_P (REGNO (operands[0]))
+   && ! SPARC_FP_REG_P (REGNO (operands[1]))"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (reg:CCX CC_REG)
+		   (compare:CCX (match_dup 1) (const_int 0)))])]
+  "")
+
+
+;; Prefetch instructions.
+
+;; ??? UltraSPARC-III note: A memory operation loading into the floating point register
+;; ??? file, if it hits the prefetch cache, has a chance to dual-issue with other memory
+;; ??? operations.  With DFA we might be able to model this, but it requires a lot of
+;; ??? state.
+(define_expand "prefetch"
+  [(match_operand 0 "address_operand" "")
+   (match_operand 1 "const_int_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_V9"
+{
+  if (TARGET_ARCH64)
+    emit_insn (gen_prefetch_64 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_prefetch_32 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "prefetch_64"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  ""
+{
+  static const char * const prefetch_instr[2][2] = {
+    {
+      "prefetch\t[%a0], 1", /* no locality: prefetch for one read */
+      "prefetch\t[%a0], 0", /* medium to high locality: prefetch for several reads */
+    },
+    {
+      "prefetch\t[%a0], 3", /* no locality: prefetch for one write */
+      "prefetch\t[%a0], 2", /* medium to high locality: prefetch for several writes */
+    }
+  };
+  int read_or_write = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (read_or_write == 0 || read_or_write == 1);
+  gcc_assert (locality >= 0 && locality < 4);
+  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+}
+  [(set_attr "type" "load")])
+
+(define_insn "prefetch_32"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  ""
+{
+  static const char * const prefetch_instr[2][2] = {
+    {
+      "prefetch\t[%a0], 1", /* no locality: prefetch for one read */
+      "prefetch\t[%a0], 0", /* medium to high locality: prefetch for several reads */
+    },
+    {
+      "prefetch\t[%a0], 3", /* no locality: prefetch for one write */
+      "prefetch\t[%a0], 2", /* medium to high locality: prefetch for several writes */
+    }
+  };
+  int read_or_write = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (read_or_write == 0 || read_or_write == 1);
+  gcc_assert (locality >= 0 && locality < 4);
+  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+}
+  [(set_attr "type" "load")])
+
+
+;; Trap instructions.
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 5))]
+  ""
+  "ta\t5"
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrapsi4"
+  [(trap_if (match_operator 0 "noov_compare_operator"
+	     [(match_operand:SI 1 "compare_operand" "")
+	      (match_operand:SI 2 "arith_operand" "")])
+	   (match_operand 3 "arith_operand"))]
+  ""
+  "operands[1] = gen_compare_reg (operands[0]);
+   if (GET_MODE (operands[1]) != CCmode && GET_MODE (operands[1]) != CCXmode)
+     FAIL;
+   operands[2] = const0_rtx;")
+
+(define_expand "ctrapdi4"
+  [(trap_if (match_operator 0 "noov_compare_operator"
+	     [(match_operand:DI 1 "compare_operand" "")
+	      (match_operand:DI 2 "arith_operand" "")])
+	   (match_operand 3 "arith_operand"))]
+  "TARGET_ARCH64"
+  "operands[1] = gen_compare_reg (operands[0]);
+   if (GET_MODE (operands[1]) != CCmode && GET_MODE (operands[1]) != CCXmode)
+     FAIL;
+   operands[2] = const0_rtx;")
+
+
+(define_insn ""
+  [(trap_if (match_operator 0 "noov_compare_operator" [(reg:CC CC_REG) (const_int 0)])
+	    (match_operand:SI 1 "arith_operand" "rM"))]
+  ""
+{
+  if (TARGET_V9)
+    return "t%C0\t%%icc, %1";
+  else
+    return "t%C0\t%1";
+}
+  [(set_attr "type" "trap")])
+
+(define_insn ""
+  [(trap_if (match_operator 0 "noov_compare_operator" [(reg:CCX CC_REG) (const_int 0)])
+	    (match_operand:SI 1 "arith_operand" "rM"))]
+  "TARGET_V9"
+  "t%C0\t%%xcc, %1"
+  [(set_attr "type" "trap")])
+
+
+;; TLS support instructions.
+
+(define_insn "tgd_hi22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")]
+			    UNSPEC_TLSGD)))]
+  "TARGET_TLS"
+  "sethi\\t%%tgd_hi22(%a1), %0")
+
+(define_insn "tgd_lo10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tgd_symbolic_operand" "")]
+			      UNSPEC_TLSGD)))]
+  "TARGET_TLS"
+  "add\\t%1, %%tgd_lo10(%a2), %0")
+
+(define_insn "tgd_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tgd_symbolic_operand" "")]
+			    UNSPEC_TLSGD)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tgd_add(%a3)")
+
+(define_insn "tgd_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tgd_symbolic_operand" "")]
+			    UNSPEC_TLSGD)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tgd_add(%a3)")
+
+(define_insn "tgd_call32"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "s")
+				  (match_operand 2 "tgd_symbolic_operand" "")]
+				 UNSPEC_TLSGD))
+	      (match_operand 3 "" "")))
+   (clobber (reg:SI O7_REG))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "call\t%a1, %%tgd_call(%a2)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tgd_call64"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "s")
+				  (match_operand 2 "tgd_symbolic_operand" "")]
+				 UNSPEC_TLSGD))
+	      (match_operand 3 "" "")))
+   (clobber (reg:DI O7_REG))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "call\t%a1, %%tgd_call(%a2)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tldm_hi22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(const_int 0)] UNSPEC_TLSLDM)))]
+  "TARGET_TLS"
+  "sethi\\t%%tldm_hi22(%&), %0")
+
+(define_insn "tldm_lo10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		    (unspec:SI [(const_int 0)] UNSPEC_TLSLDM)))]
+  "TARGET_TLS"
+  "add\\t%1, %%tldm_lo10(%&), %0")
+
+(define_insn "tldm_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")]
+			    UNSPEC_TLSLDM)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tldm_add(%&)")
+
+(define_insn "tldm_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:SI 2 "register_operand" "r")]
+			    UNSPEC_TLSLDM)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tldm_add(%&)")
+
+(define_insn "tldm_call32"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "s")]
+				 UNSPEC_TLSLDM))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI O7_REG))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "call\t%a1, %%tldm_call(%&)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tldm_call64"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "s")]
+				 UNSPEC_TLSLDM))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI O7_REG))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "call\t%a1, %%tldm_call(%&)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tldo_hix22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")]
+			    UNSPEC_TLSLDO)))]
+  "TARGET_TLS"
+  "sethi\\t%%tldo_hix22(%a1), %0")
+
+(define_insn "tldo_lox10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tld_symbolic_operand" "")]
+			      UNSPEC_TLSLDO)))]
+  "TARGET_TLS"
+  "xor\\t%1, %%tldo_lox10(%a2), %0")
+
+(define_insn "tldo_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tld_symbolic_operand" "")]
+			    UNSPEC_TLSLDO)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tldo_add(%a3)")
+
+(define_insn "tldo_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tld_symbolic_operand" "")]
+			    UNSPEC_TLSLDO)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tldo_add(%a3)")
+
+(define_insn "tie_hi22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")]
+			    UNSPEC_TLSIE)))]
+  "TARGET_TLS"
+  "sethi\\t%%tie_hi22(%a1), %0")
+
+(define_insn "tie_lo10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tie_symbolic_operand" "")]
+			      UNSPEC_TLSIE)))]
+  "TARGET_TLS"
+  "add\\t%1, %%tie_lo10(%a2), %0")
+
+(define_insn "tie_ld32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand 3 "tie_symbolic_operand" "")]
+		   UNSPEC_TLSIE))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ld\\t[%1 + %2], %0, %%tie_ld(%a3)"
+  [(set_attr "type" "load")])
+
+(define_insn "tie_ld64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand 3 "tie_symbolic_operand" "")]
+		   UNSPEC_TLSIE))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldx\\t[%1 + %2], %0, %%tie_ldx(%a3)"
+  [(set_attr "type" "load")])
+
+(define_insn "tie_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tie_symbolic_operand" "")]
+			    UNSPEC_TLSIE)))]
+  "TARGET_SUN_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tie_add(%a3)")
+
+(define_insn "tie_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:DI 2 "register_operand" "r")
+			     (match_operand 3 "tie_symbolic_operand" "")]
+			    UNSPEC_TLSIE)))]
+  "TARGET_SUN_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tie_add(%a3)")
+
+(define_insn "tle_hix22_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tle_symbolic_operand" "")]
+			    UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "sethi\\t%%tle_hix22(%a1), %0")
+
+(define_insn "tle_lox10_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tle_symbolic_operand" "")]
+			      UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "xor\\t%1, %%tle_lox10(%a2), %0")
+
+(define_insn "tle_hix22_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand 1 "tle_symbolic_operand" "")]
+			    UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "sethi\\t%%tle_hix22(%a1), %0")
+
+(define_insn "tle_lox10_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (unspec:DI [(match_operand 2 "tle_symbolic_operand" "")]
+			      UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "xor\\t%1, %%tle_lox10(%a2), %0")
+
+;; Now patterns combining tldo_add{32,64} with some integer loads or stores
+(define_insn "*tldo_ldub_sp32"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub1_sp32"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub2_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb1_sp32"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb2_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub_sp64"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub1_sp64"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub2_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb1_sp64"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb2_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh_sp32"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh1_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsh1_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldsh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh_sp64"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh1_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsh1_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsh2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduw_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ld\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_lduw_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduw\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_lduw1_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduw\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_ldsw1_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsw\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldx_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mem:DI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldx\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_stb_sp32"
+  [(set (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r")))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "stb\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stb_sp64"
+  [(set (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "stb\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_sth_sp32"
+  [(set (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r")))
+	(match_operand:HI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "sth\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_sth_sp64"
+  [(set (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:HI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "sth\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stw_sp32"
+  [(set (mem:SI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r")))
+	(match_operand:SI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "st\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stw_sp64"
+  [(set (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:SI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "stw\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stx_sp64"
+  [(set (mem:DI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:DI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "stx\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+
+;; Stack protector instructions.
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, 7);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  if (TARGET_ARCH64)
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_setsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_ARCH32"
+  "ld\t%1, %2\;st\t%2, %0\;mov\t0, %2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "3")])
+
+(define_insn "stack_protect_setdi"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_ARCH64"
+  "ldx\t%1, %2\;stx\t%2, %0\;mov\t0, %2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "3")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx result, test;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, 7);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  if (TARGET_ARCH64)
+    {
+      result = gen_reg_rtx (Pmode);
+      emit_insn (gen_stack_protect_testdi (result, operands[0], operands[1]));
+      test = gen_rtx_EQ (VOIDmode, result, const0_rtx);
+      emit_jump_insn (gen_cbranchdi4 (test, result, const0_rtx, operands[2]));
+    }
+  else
+    {
+      emit_insn (gen_stack_protect_testsi (operands[0], operands[1]));
+      result = gen_rtx_REG (CCmode, SPARC_ICC_REG);
+      test = gen_rtx_EQ (VOIDmode, result, const0_rtx);
+      emit_jump_insn (gen_cbranchcc4 (test, result, const0_rtx, operands[2]));
+    }
+  DONE;
+})
+
+(define_insn "stack_protect_testsi"
+  [(set (reg:CC CC_REG)
+	(unspec:CC [(match_operand:SI 0 "memory_operand" "m")
+		    (match_operand:SI 1 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+   (set (match_scratch:SI 3 "=r") (const_int 0))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_ARCH32"
+  "ld\t%0, %2\;ld\t%1, %3\;xorcc\t%2, %3, %2\;mov\t0, %3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_insn "stack_protect_testdi"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+   (set (match_scratch:DI 3 "=r") (const_int 0))]
+  "TARGET_ARCH64"
+  "ldx\t%1, %0\;ldx\t%2, %3\;xor\t%0, %3, %0\;mov\t0, %3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+;; Vector instructions.
+
+(define_mode_iterator VM32 [V1SI V2HI V4QI])
+(define_mode_iterator VM64 [V1DI V2SI V4HI V8QI])
+(define_mode_iterator VMALL [V1SI V2HI V4QI V1DI V2SI V4HI V8QI])
+
+(define_mode_attr vbits [(V2SI "32") (V4HI "16") (V1SI "32s") (V2HI "16s")])
+(define_mode_attr vconstr [(V1SI "f") (V2HI "f") (V4QI "f")
+			   (V1DI "e") (V2SI "e") (V4HI "e") (V8QI "e")])
+(define_mode_attr vfptype [(V1SI "single") (V2HI "single") (V4QI "single")
+			   (V1DI "double") (V2SI "double") (V4HI "double")
+			   (V8QI "double")])
+
+(define_expand "mov<VMALL:mode>"
+  [(set (match_operand:VMALL 0 "nonimmediate_operand" "")
+	(match_operand:VMALL 1 "general_operand" ""))]
+  "TARGET_VIS"
+{
+  if (sparc_expand_move (<VMALL:MODE>mode, operands))
+    DONE;
+})
+
+(define_insn "*mov<VM32:mode>_insn"
+  [(set (match_operand:VM32 0 "nonimmediate_operand" "=f,f,f,f,m,m,*r, m,*r,*r, f")
+	(match_operand:VM32 1 "input_operand"         "Y,Z,f,m,f,Y, m,*r,*r, f,*r"))]
+  "TARGET_VIS
+   && (register_operand (operands[0], <VM32:MODE>mode)
+       || register_or_zero_or_all_ones_operand (operands[1], <VM32:MODE>mode))"
+  "@
+  fzeros\t%0
+  fones\t%0
+  fsrc2s\t%1, %0
+  ld\t%1, %0
+  st\t%1, %0
+  st\t%r1, %0
+  ld\t%1, %0
+  st\t%1, %0
+  mov\t%1, %0
+  movstouw\t%1, %0
+  movwtos\t%1, %0"
+  [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv")
+   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
+
+(define_insn "*mov<VM64:mode>_insn_sp64"
+  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,m,m,*r, m,*r, e,*r")
+	(match_operand:VM64 1 "input_operand"         "Y,C,e,m,e,Y, m,*r, e,*r,*r"))]
+  "TARGET_VIS
+   && TARGET_ARCH64
+   && (register_operand (operands[0], <VM64:MODE>mode)
+       || register_or_zero_or_all_ones_operand (operands[1], <VM64:MODE>mode))"
+  "@
+  fzero\t%0
+  fone\t%0
+  fsrc2\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  stx\t%r1, %0
+  ldx\t%1, %0
+  stx\t%1, %0
+  movdtox\t%1, %0
+  movxtod\t%1, %0
+  mov\t%1, %0"
+  [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*")
+   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")])
+
+(define_insn "*mov<VM64:mode>_insn_sp32"
+  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,*r, f,e,m,m,U,T, o,*r")
+	(match_operand:VM64 1 "input_operand"         "Y,C,e, f,*r,m,e,Y,T,U,*r,*r"))]
+  "TARGET_VIS
+   && ! TARGET_ARCH64
+   && (register_operand (operands[0], <VM64:MODE>mode)
+       || register_or_zero_or_all_ones_operand (operands[1], <VM64:MODE>mode))"
+  "@
+  fzero\t%0
+  fone\t%0
+  fsrc2\t%1, %0
+  #
+  #
+  ldd\t%1, %0
+  std\t%1, %0
+  stx\t%r1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  #
+  #"
+  [(set_attr "type" "visl,visl,vismv,*,*,fpload,fpstore,store,load,store,*,*")
+   (set_attr "length" "*,*,*,2,2,*,*,*,*,*,2,2")
+   (set_attr "cpu_feature" "vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*")])
+
+(define_split
+  [(set (match_operand:VM64 0 "memory_operand" "")
+        (match_operand:VM64 1 "register_operand" ""))]
+  "reload_completed
+   && TARGET_VIS
+   && ! TARGET_ARCH64
+   && (((REGNO (operands[1]) % 2) != 0)
+       || ! mem_min_alignment (operands[0], 8))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  rtx word0, word1;
+
+  word0 = adjust_address (operands[0], SImode, 0);
+  word1 = adjust_address (operands[0], SImode, 4);
+
+  emit_move_insn_1 (word0, gen_highpart (SImode, operands[1]));
+  emit_move_insn_1 (word1, gen_lowpart (SImode, operands[1]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:VM64 0 "register_operand" "")
+        (match_operand:VM64 1 "register_operand" ""))]
+  "reload_completed
+   && TARGET_VIS
+   && ! TARGET_ARCH64
+   && sparc_split_regreg_legitimate (operands[0], operands[1])"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (SImode, set_dest);
+  dest2 = gen_lowpart (SImode, set_dest);
+  src1 = gen_highpart (SImode, set_src);
+  src2 = gen_lowpart (SImode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movsi (dest2, src2));
+      emit_insn (gen_movsi (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movsi (dest1, src1));
+      emit_insn (gen_movsi (dest2, src2));
+    }
+  DONE;
+})
+
+(define_expand "vec_init<mode>"
+  [(match_operand:VMALL 0 "register_operand" "")
+   (match_operand:VMALL 1 "" "")]
+  "TARGET_VIS"
+{
+  sparc_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_code_iterator plusminus [plus minus])
+(define_code_attr plusminus_insn [(plus "add") (minus "sub")])
+
+(define_mode_iterator VADDSUB [V1SI V2SI V2HI V4HI])
+
+(define_insn "<plusminus_insn><mode>3"
+  [(set (match_operand:VADDSUB 0 "register_operand" "=<vconstr>")
+	(plusminus:VADDSUB (match_operand:VADDSUB 1 "register_operand" "<vconstr>")
+			   (match_operand:VADDSUB 2 "register_operand" "<vconstr>")))]
+  "TARGET_VIS"
+  "fp<plusminus_insn><vbits>\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "<vfptype>")])
+
+(define_mode_iterator VL [V1SI V2HI V4QI V1DI V2SI V4HI V8QI])
+(define_mode_attr vlsuf [(V1SI "s") (V2HI "s") (V4QI "s")
+			 (V1DI  "") (V2SI  "") (V4HI  "") (V8QI "")])
+(define_code_iterator vlop [ior and xor])
+(define_code_attr vlinsn [(ior "or") (and "and") (xor "xor")])
+(define_code_attr vlninsn [(ior "nor") (and "nand") (xor "xnor")])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
+	(vlop:VL (match_operand:VL 1 "register_operand" "<vconstr>")
+		 (match_operand:VL 2 "register_operand" "<vconstr>")))]
+  "TARGET_VIS"
+  "f<vlinsn><vlsuf>\t%1, %2, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "<vfptype>")])
+
+(define_insn "*not_<code><mode>3"
+  [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
+        (not:VL (vlop:VL (match_operand:VL 1 "register_operand" "<vconstr>")
+			 (match_operand:VL 2 "register_operand" "<vconstr>"))))]
+  "TARGET_VIS"
+  "f<vlninsn><vlsuf>\t%1, %2, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "<vfptype>")])
+
+;; (ior (not (op1)) (not (op2))) is the canonical form of NAND.
+(define_insn "*nand<mode>_vis"
+  [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
+	(ior:VL (not:VL (match_operand:VL 1 "register_operand" "<vconstr>"))
+		(not:VL (match_operand:VL 2 "register_operand" "<vconstr>"))))]
+  "TARGET_VIS"
+  "fnand<vlsuf>\t%1, %2, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "<vfptype>")])
+
+(define_code_iterator vlnotop [ior and])
+
+(define_insn "*<code>_not1<mode>_vis"
+  [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
+	(vlnotop:VL (not:VL (match_operand:VL 1 "register_operand" "<vconstr>"))
+		    (match_operand:VL 2 "register_operand" "<vconstr>")))]
+  "TARGET_VIS"
+  "f<vlinsn>not1<vlsuf>\t%1, %2, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "<vfptype>")])
+
+(define_insn "*<code>_not2<mode>_vis"
+  [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
+	(vlnotop:VL (match_operand:VL 1 "register_operand" "<vconstr>")
+		    (not:VL (match_operand:VL 2 "register_operand" "<vconstr>"))))]
+  "TARGET_VIS"
+  "f<vlinsn>not2<vlsuf>\t%1, %2, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "<vfptype>")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
+	(not:VL (match_operand:VL 1 "register_operand" "<vconstr>")))]
+  "TARGET_VIS"
+  "fnot1<vlsuf>\t%1, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "<vfptype>")])
+
+;; Hard to generate VIS instructions.  We have builtins for these.
+
+(define_insn "fpack16_vis"
+  [(set (match_operand:V4QI 0 "register_operand" "=f")
+        (unspec:V4QI [(match_operand:V4HI 1 "register_operand" "e")
+                      (reg:DI GSR_REG)]
+		      UNSPEC_FPACK16))]
+  "TARGET_VIS"
+  "fpack16\t%1, %0"
+  [(set_attr "type" "fgm_pack")
+   (set_attr "fptype" "double")])
+
+(define_insn "fpackfix_vis"
+  [(set (match_operand:V2HI 0 "register_operand" "=f")
+        (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "e")
+                      (reg:DI GSR_REG)]
+		      UNSPEC_FPACKFIX))]
+  "TARGET_VIS"
+  "fpackfix\t%1, %0"
+  [(set_attr "type" "fgm_pack")
+   (set_attr "fptype" "double")])
+
+(define_insn "fpack32_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (unspec:V8QI [(match_operand:V2SI 1 "register_operand" "e")
+        	      (match_operand:V8QI 2 "register_operand" "e")
+                      (reg:DI GSR_REG)]
+                     UNSPEC_FPACK32))]
+  "TARGET_VIS"
+  "fpack32\t%1, %2, %0"
+  [(set_attr "type" "fgm_pack")
+   (set_attr "fptype" "double")])
+
+(define_insn "fexpand_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")]
+         UNSPEC_FEXPAND))]
+ "TARGET_VIS"
+ "fexpand\t%1, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+(define_insn "fpmerge_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (vec_select:V8QI
+          (vec_concat:V8QI (match_operand:V4QI 1 "register_operand" "f")
+                           (match_operand:V4QI 2 "register_operand" "f"))
+          (parallel [(const_int 0) (const_int 4)
+                     (const_int 1) (const_int 5)
+                     (const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+ "TARGET_VIS"
+ "fpmerge\t%1, %2, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+(define_insn "vec_interleave_lowv8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (vec_select:V8QI
+          (vec_concat:V16QI (match_operand:V8QI 1 "register_operand" "f")
+                            (match_operand:V8QI 2 "register_operand" "f"))
+          (parallel [(const_int 0) (const_int 8)
+                     (const_int 1) (const_int 9)
+                     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+ "TARGET_VIS"
+ "fpmerge\t%L1, %L2, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+(define_insn "vec_interleave_highv8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (vec_select:V8QI
+          (vec_concat:V16QI (match_operand:V8QI 1 "register_operand" "f")
+                            (match_operand:V8QI 2 "register_operand" "f"))
+          (parallel [(const_int 4) (const_int 12)
+                     (const_int 5) (const_int 13)
+                     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+ "TARGET_VIS"
+ "fpmerge\t%H1, %H2, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+;; Partitioned multiply instructions
+(define_insn "fmul8x16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V4HI 2 "register_operand" "e")]
+         UNSPEC_MUL8))]
+  "TARGET_VIS"
+  "fmul8x16\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8x16au_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MUL16AU))]
+  "TARGET_VIS"
+  "fmul8x16au\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8x16al_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MUL16AL))]
+  "TARGET_VIS"
+  "fmul8x16al\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8sux16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V8QI 1 "register_operand" "e")
+                      (match_operand:V4HI 2 "register_operand" "e")]
+         UNSPEC_MUL8SU))]
+  "TARGET_VIS"
+  "fmul8sux16\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8ulx16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V8QI 1 "register_operand" "e")
+                      (match_operand:V4HI 2 "register_operand" "e")]
+         UNSPEC_MUL8UL))]
+  "TARGET_VIS"
+  "fmul8ulx16\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmuld8sux16_vis"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+        (unspec:V2SI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MULDSU))]
+  "TARGET_VIS"
+  "fmuld8sux16\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmuld8ulx16_vis"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+        (unspec:V2SI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MULDUL))]
+  "TARGET_VIS"
+  "fmuld8ulx16\t%1, %2, %0"
+  [(set_attr "type" "fgm_mul")
+   (set_attr "fptype" "double")])
+
+(define_expand "wrgsr_vis"
+  [(set (reg:DI GSR_REG) (match_operand:DI 0 "arith_operand" ""))]
+  "TARGET_VIS"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_wrgsr_v8plus (operands[0]));
+      DONE;
+    }
+})
+
+(define_insn "*wrgsr_sp64"
+  [(set (reg:DI GSR_REG) (match_operand:DI 0 "arith_operand" "rI"))]
+  "TARGET_VIS && TARGET_ARCH64"
+  "wr\t%%g0, %0, %%gsr"
+  [(set_attr "type" "gsr")])
+
+(define_insn "wrgsr_v8plus"
+  [(set (reg:DI GSR_REG) (match_operand:DI 0 "arith_operand" "I,r"))
+   (clobber (match_scratch:SI 1 "=X,&h"))]
+  "TARGET_VIS && ! TARGET_ARCH64"
+{
+  if (GET_CODE (operands[0]) == CONST_INT
+      || sparc_check_64 (operands[0], insn))
+    return "wr\t%%g0, %0, %%gsr";
+
+  output_asm_insn("srl\t%L0, 0, %L0", operands);
+  return "sllx\t%H0, 32, %1\n\tor\t%L0, %1, %1\n\twr\t%%g0, %1, %%gsr";
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "rdgsr_vis"
+  [(set (match_operand:DI 0 "register_operand" "") (reg:DI GSR_REG))]
+  "TARGET_VIS"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rdgsr_v8plus (operands[0]));
+      DONE;
+    }
+})
+
+(define_insn "*rdgsr_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r") (reg:DI GSR_REG))]
+  "TARGET_VIS && TARGET_ARCH64"
+  "rd\t%%gsr, %0"
+  [(set_attr "type" "gsr")])
+
+(define_insn "rdgsr_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r") (reg:DI GSR_REG))
+   (clobber (match_scratch:SI 1 "=&h"))]
+  "TARGET_VIS && ! TARGET_ARCH64"
+{
+  return "rd\t%%gsr, %1\n\tsrlx\t%1, 32, %H0\n\tmov %1, %L0";
+}
+  [(set_attr "type" "multi")])
+
+;; Using faligndata only makes sense after an alignaddr since the choice of
+;; bytes to take out of each operand is dependent on the results of the last
+;; alignaddr.
+(define_insn "faligndata<VM64:mode>_vis"
+  [(set (match_operand:VM64 0 "register_operand" "=e")
+        (unspec:VM64 [(match_operand:VM64 1 "register_operand" "e")
+                      (match_operand:VM64 2 "register_operand" "e")
+                      (reg:DI GSR_REG)]
+         UNSPEC_ALIGNDATA))]
+  "TARGET_VIS"
+  "faligndata\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "alignaddrsi_vis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_or_zero_operand" "rJ")))
+   (set (zero_extract:DI (reg:DI GSR_REG) (const_int 3) (const_int 0))
+        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_VIS"
+  "alignaddr\t%r1, %r2, %0"
+  [(set_attr "type" "gsr")])
+
+(define_insn "alignaddrdi_vis"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:DI 2 "register_or_zero_operand" "rJ")))
+   (set (zero_extract:DI (reg:DI GSR_REG) (const_int 3) (const_int 0))
+        (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_VIS"
+  "alignaddr\t%r1, %r2, %0"
+  [(set_attr "type" "gsr")])
+
+(define_insn "alignaddrlsi_vis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_or_zero_operand" "rJ")))
+   (set (zero_extract:DI (reg:DI GSR_REG) (const_int 3) (const_int 0))
+        (xor:DI (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2)))
+                (const_int 7)))]
+  "TARGET_VIS"
+  "alignaddrl\t%r1, %r2, %0"
+  [(set_attr "type" "gsr")])
+
+(define_insn "alignaddrldi_vis"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:DI 2 "register_or_zero_operand" "rJ")))
+   (set (zero_extract:DI (reg:DI GSR_REG) (const_int 3) (const_int 0))
+        (xor:DI (plus:DI (match_dup 1) (match_dup 2))
+                (const_int 7)))]
+  "TARGET_VIS"
+  "alignaddrl\t%r1, %r2, %0"
+  [(set_attr "type" "gsr")])
+
+(define_insn "pdist_vis"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+        (unspec:DI [(match_operand:V8QI 1 "register_operand" "e")
+                    (match_operand:V8QI 2 "register_operand" "e")
+                    (match_operand:DI 3 "register_operand" "0")]
+         UNSPEC_PDIST))]
+  "TARGET_VIS"
+  "pdist\t%1, %2, %0"
+  [(set_attr "type" "pdist")
+   (set_attr "fptype" "double")])
+
+;; Edge instructions produce condition codes equivalent to a 'subcc'
+;; with the same operands.
+(define_insn "edge8<P:mode>_vis"
+  [(set (reg:CC_NOOV CC_REG)
+        (compare:CC_NOOV (minus:P (match_operand:P 1 "register_or_zero_operand" "rJ")
+			  	  (match_operand:P 2 "register_or_zero_operand" "rJ"))
+			 (const_int 0)))
+   (set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_dup 1) (match_dup 2)] UNSPEC_EDGE8))]
+  "TARGET_VIS"
+  "edge8\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+(define_insn "edge8l<P:mode>_vis"
+  [(set (reg:CC_NOOV CC_REG)
+        (compare:CC_NOOV (minus:P (match_operand:P 1 "register_or_zero_operand" "rJ")
+			  	  (match_operand:P 2 "register_or_zero_operand" "rJ"))
+			 (const_int 0)))
+   (set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_dup 1) (match_dup 2)] UNSPEC_EDGE8L))]
+  "TARGET_VIS"
+  "edge8l\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+(define_insn "edge16<P:mode>_vis"
+  [(set (reg:CC_NOOV CC_REG)
+        (compare:CC_NOOV (minus:P (match_operand:P 1 "register_or_zero_operand" "rJ")
+			  	  (match_operand:P 2 "register_or_zero_operand" "rJ"))
+			 (const_int 0)))
+   (set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_dup 1) (match_dup 2)] UNSPEC_EDGE16))]
+  "TARGET_VIS"
+  "edge16\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+(define_insn "edge16l<P:mode>_vis"
+  [(set (reg:CC_NOOV CC_REG)
+        (compare:CC_NOOV (minus:P (match_operand:P 1 "register_or_zero_operand" "rJ")
+			  	  (match_operand:P 2 "register_or_zero_operand" "rJ"))
+			 (const_int 0)))
+   (set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_dup 1) (match_dup 2)] UNSPEC_EDGE16L))]
+  "TARGET_VIS"
+  "edge16l\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+(define_insn "edge32<P:mode>_vis"
+  [(set (reg:CC_NOOV CC_REG)
+        (compare:CC_NOOV (minus:P (match_operand:P 1 "register_or_zero_operand" "rJ")
+			  	  (match_operand:P 2 "register_or_zero_operand" "rJ"))
+			 (const_int 0)))
+   (set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_dup 1) (match_dup 2)] UNSPEC_EDGE32))]
+  "TARGET_VIS"
+  "edge32\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+(define_insn "edge32l<P:mode>_vis"
+  [(set (reg:CC_NOOV CC_REG)
+        (compare:CC_NOOV (minus:P (match_operand:P 1 "register_or_zero_operand" "rJ")
+			  	  (match_operand:P 2 "register_or_zero_operand" "rJ"))
+			 (const_int 0)))
+   (set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_dup 1) (match_dup 2)] UNSPEC_EDGE32L))]
+  "TARGET_VIS"
+  "edge32l\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+(define_code_iterator gcond [le ne gt eq])
+(define_mode_iterator GCM [V4HI V2SI])
+(define_mode_attr gcm_name [(V4HI "16") (V2SI "32")])
+
+(define_insn "fcmp<code><GCM:gcm_name><P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+  	(unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
+		              (match_operand:GCM 2 "register_operand" "e"))]
+	 UNSPEC_FCMP))]
+  "TARGET_VIS"
+  "fcmp<code><GCM:gcm_name>\t%1, %2, %0"
+  [(set_attr "type" "visl")
+   (set_attr "fptype" "double")])
+
+(define_expand "vcond<mode><mode>"
+  [(match_operand:GCM 0 "register_operand" "")
+   (match_operand:GCM 1 "register_operand" "")
+   (match_operand:GCM 2 "register_operand" "")
+   (match_operator 3 ""
+     [(match_operand:GCM 4 "register_operand" "")
+      (match_operand:GCM 5 "register_operand" "")])]
+  "TARGET_VIS3"
+{
+  sparc_expand_vcond (<MODE>mode, operands,
+                      UNSPEC_CMASK<gcm_name>,
+                      UNSPEC_FCMP);
+  DONE;
+})
+
+(define_expand "vconduv8qiv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")
+   (match_operand:V8QI 2 "register_operand" "")
+   (match_operator 3 ""
+     [(match_operand:V8QI 4 "register_operand" "")
+      (match_operand:V8QI 5 "register_operand" "")])]
+  "TARGET_VIS3"
+{
+  sparc_expand_vcond (V8QImode, operands,
+                      UNSPEC_CMASK8,
+                      UNSPEC_FUCMP);
+  DONE;
+})
+
+(define_insn "array8<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_ARRAY8))]
+  "TARGET_VIS"
+  "array8\t%r1, %r2, %0"
+  [(set_attr "type" "array")])
+
+(define_insn "array16<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_ARRAY16))]
+  "TARGET_VIS"
+  "array16\t%r1, %r2, %0"
+  [(set_attr "type" "array")])
+
+(define_insn "array32<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_ARRAY32))]
+  "TARGET_VIS"
+  "array32\t%r1, %r2, %0"
+  [(set_attr "type" "array")])
+
+(define_insn "bmaskdi_vis"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:DI 2 "register_or_zero_operand" "rJ")))
+   (set (zero_extract:DI (reg:DI GSR_REG) (const_int 32) (const_int 32))
+        (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_VIS2"
+  "bmask\t%r1, %r2, %0"
+  [(set_attr "type" "array")])
+
+(define_insn "bmasksi_vis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_or_zero_operand" "rJ")))
+   (set (zero_extract:DI (reg:DI GSR_REG) (const_int 32) (const_int 32))
+        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_VIS2"
+  "bmask\t%r1, %r2, %0"
+  [(set_attr "type" "array")])
+
+(define_insn "bshuffle<VM64:mode>_vis"
+  [(set (match_operand:VM64 0 "register_operand" "=e")
+        (unspec:VM64 [(match_operand:VM64 1 "register_operand" "e")
+	              (match_operand:VM64 2 "register_operand" "e")
+		      (reg:DI GSR_REG)]
+                     UNSPEC_BSHUFFLE))]
+  "TARGET_VIS2"
+  "bshuffle\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+;; The rtl expanders will happily convert constant permutations on other
+;; modes down to V8QI.  Rely on this to avoid the complexity of the byte
+;; order of the permutation.
+(define_expand "vec_perm_constv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")
+   (match_operand:V8QI 2 "register_operand" "")
+   (match_operand:V8QI 3 "" "")]
+  "TARGET_VIS2"
+{
+  unsigned int i, mask;
+  rtx sel = operands[3];
+
+  for (i = mask = 0; i < 8; ++i)
+    mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4);
+  sel = force_reg (SImode, gen_int_mode (mask, SImode));
+
+  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
+  emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Unlike constant permutation, we can vastly simplify the compression of
+;; the 64-bit selector input to the 32-bit %gsr value by knowing what the
+;; width of the input is.
+(define_expand "vec_perm<mode>"
+  [(match_operand:VM64 0 "register_operand" "")
+   (match_operand:VM64 1 "register_operand" "")
+   (match_operand:VM64 2 "register_operand" "")
+   (match_operand:VM64 3 "register_operand" "")]
+  "TARGET_VIS2"
+{
+  sparc_expand_vec_perm_bmask (<MODE>mode, operands[3]);
+  emit_insn (gen_bshuffle<mode>_vis (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; VIS 2.0 adds edge variants which do not set the condition codes
+(define_insn "edge8n<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+	           (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_EDGE8N))]
+  "TARGET_VIS2"
+  "edge8n\t%r1, %r2, %0"
+  [(set_attr "type" "edgen")])
+
+(define_insn "edge8ln<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+	           (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_EDGE8LN))]
+  "TARGET_VIS2"
+  "edge8ln\t%r1, %r2, %0"
+  [(set_attr "type" "edgen")])
+
+(define_insn "edge16n<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_EDGE16N))]
+  "TARGET_VIS2"
+  "edge16n\t%r1, %r2, %0"
+  [(set_attr "type" "edgen")])
+
+(define_insn "edge16ln<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_EDGE16LN))]
+  "TARGET_VIS2"
+  "edge16ln\t%r1, %r2, %0"
+  [(set_attr "type" "edgen")])
+
+(define_insn "edge32n<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_EDGE32N))]
+  "TARGET_VIS2"
+  "edge32n\t%r1, %r2, %0"
+  [(set_attr "type" "edgen")])
+
+(define_insn "edge32ln<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+                  UNSPEC_EDGE32LN))]
+  "TARGET_VIS2"
+  "edge32ln\t%r1, %r2, %0"
+  [(set_attr "type" "edge")])
+
+;; Conditional moves are possible via fcmpX --> cmaskX -> bshuffle
+(define_insn "cmask8<P:mode>_vis"
+  [(set (reg:DI GSR_REG)
+        (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
+	            (reg:DI GSR_REG)]
+                   UNSPEC_CMASK8))]
+  "TARGET_VIS3"
+  "cmask8\t%r0"
+  [(set_attr "type" "fga")])
+
+(define_insn "cmask16<P:mode>_vis"
+  [(set (reg:DI GSR_REG)
+        (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
+	            (reg:DI GSR_REG)]
+                   UNSPEC_CMASK16))]
+  "TARGET_VIS3"
+  "cmask16\t%r0"
+  [(set_attr "type" "fga")])
+
+(define_insn "cmask32<P:mode>_vis"
+  [(set (reg:DI GSR_REG)
+        (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
+	            (reg:DI GSR_REG)]
+                   UNSPEC_CMASK32))]
+  "TARGET_VIS3"
+  "cmask32\t%r0"
+  [(set_attr "type" "fga")])
+
+(define_insn "fchksm16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "e")
+                      (match_operand:V4HI 2 "register_operand" "e")]
+                     UNSPEC_FCHKSM16))]
+  "TARGET_VIS3"
+  "fchksm16\t%1, %2, %0"
+  [(set_attr "type" "fga")])
+
+(define_code_iterator vis3_shift [ashift ss_ashift lshiftrt ashiftrt])
+(define_code_attr vis3_shift_insn
+  [(ashift "fsll") (ss_ashift "fslas") (lshiftrt "fsrl") (ashiftrt "fsra")])
+(define_code_attr vis3_shift_patname
+  [(ashift "ashl") (ss_ashift "ssashl") (lshiftrt "lshr") (ashiftrt "ashr")])
+   
+(define_insn "v<vis3_shift_patname><mode>3"
+  [(set (match_operand:GCM 0 "register_operand" "=<vconstr>")
+	(vis3_shift:GCM (match_operand:GCM 1 "register_operand" "<vconstr>")
+			(match_operand:GCM 2 "register_operand" "<vconstr>")))]
+  "TARGET_VIS3"
+  "<vis3_shift_insn><vbits>\t%1, %2, %0"
+  [(set_attr "type" "fga")])
+
+(define_insn "pdistn<mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:V8QI 1 "register_operand" "e")
+                   (match_operand:V8QI 2 "register_operand" "e")]
+         UNSPEC_PDISTN))]
+  "TARGET_VIS3"
+  "pdistn\t%1, %2, %0"
+  [(set_attr "type" "pdistn")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmean16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (truncate:V4HI
+          (lshiftrt:V4SI
+            (plus:V4SI
+              (plus:V4SI
+                (zero_extend:V4SI
+                  (match_operand:V4HI 1 "register_operand" "e"))
+                (zero_extend:V4SI
+                  (match_operand:V4HI 2 "register_operand" "e")))
+              (const_vector:V4SI [(const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)]))
+          (const_int 1))))]
+  "TARGET_VIS3"
+  "fmean16\t%1, %2, %0"
+  [(set_attr "type" "fga")])
+
+(define_insn "fp<plusminus_insn>64_vis"
+  [(set (match_operand:V1DI 0 "register_operand" "=e")
+	(plusminus:V1DI (match_operand:V1DI 1 "register_operand" "e")
+			(match_operand:V1DI 2 "register_operand" "e")))]
+  "TARGET_VIS3"
+  "fp<plusminus_insn>64\t%1, %2, %0"
+  [(set_attr "type" "fga")])
+
+(define_mode_iterator VASS [V4HI V2SI V2HI V1SI])
+(define_code_iterator vis3_addsub_ss [ss_plus ss_minus])
+(define_code_attr vis3_addsub_ss_insn
+  [(ss_plus "fpadds") (ss_minus "fpsubs")])
+(define_code_attr vis3_addsub_ss_patname
+  [(ss_plus "ssadd") (ss_minus "sssub")])
+
+(define_insn "<vis3_addsub_ss_patname><mode>3"
+  [(set (match_operand:VASS 0 "register_operand" "=<vconstr>")
+        (vis3_addsub_ss:VASS (match_operand:VASS 1 "register_operand" "<vconstr>")
+                             (match_operand:VASS 2 "register_operand" "<vconstr>")))]
+  "TARGET_VIS3"
+  "<vis3_addsub_ss_insn><vbits>\t%1, %2, %0"
+  [(set_attr "type" "fga")])
+
+(define_insn "fucmp<code>8<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+  	(unspec:P [(gcond:V8QI (match_operand:V8QI 1 "register_operand" "e")
+		               (match_operand:V8QI 2 "register_operand" "e"))]
+	 UNSPEC_FUCMP))]
+  "TARGET_VIS3"
+  "fucmp<code>8\t%1, %2, %0"
+  [(set_attr "type" "visl")])
+
+(define_insn "*naddsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (neg:SF (plus:SF (match_operand:SF 1 "register_operand" "f")
+                         (match_operand:SF 2 "register_operand" "f"))))]
+  "TARGET_VIS3"
+  "fnadds\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "*nadddf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (neg:DF (plus:DF (match_operand:DF 1 "register_operand" "e")
+                         (match_operand:DF 2 "register_operand" "e"))))]
+  "TARGET_VIS3"
+  "fnaddd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "*nmulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (mult:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+                 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_VIS3"
+  "fnmuls\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*nmuldf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (mult:DF (neg:DF (match_operand:DF 1 "register_operand" "e"))
+                 (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_VIS3"
+  "fnmuld\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "*nmuldf3_extend"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+        (mult:DF (neg:DF (float_extend:DF
+                           (match_operand:SF 1 "register_operand" "f")))
+                 (float_extend:DF
+                   (match_operand:SF 2 "register_operand" "f"))))]
+  "TARGET_VIS3"
+  "fnsmuld\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fhaddsf_vis"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+                    (match_operand:SF 2 "register_operand" "f")]
+                   UNSPEC_FHADD))]
+  "TARGET_VIS3"
+  "fhadds\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "fhadddf_vis"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+        (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+                    (match_operand:DF 2 "register_operand" "f")]
+                   UNSPEC_FHADD))]
+  "TARGET_VIS3"
+  "fhaddd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "fhsubsf_vis"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+                    (match_operand:SF 2 "register_operand" "f")]
+                   UNSPEC_FHSUB))]
+  "TARGET_VIS3"
+  "fhsubs\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "fhsubdf_vis"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+        (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+                    (match_operand:DF 2 "register_operand" "f")]
+                   UNSPEC_FHSUB))]
+  "TARGET_VIS3"
+  "fhsubd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "fnhaddsf_vis"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (neg:SF (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+                            (match_operand:SF 2 "register_operand" "f")]
+                           UNSPEC_FHADD)))]
+  "TARGET_VIS3"
+  "fnhadds\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "fnhadddf_vis"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+        (neg:DF (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+                            (match_operand:DF 2 "register_operand" "f")]
+                           UNSPEC_FHADD)))]
+  "TARGET_VIS3"
+  "fnhaddd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "umulxhi_vis"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (truncate:DI
+          (lshiftrt:TI
+            (mult:TI (zero_extend:TI
+                       (match_operand:DI 1 "arith_operand" ""))
+                     (zero_extend:TI
+                       (match_operand:DI 2 "arith_operand" "")))
+           (const_int 64))))]
+ "TARGET_VIS3"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_umulxhi_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*umulxhi_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (truncate:DI
+          (lshiftrt:TI
+            (mult:TI (zero_extend:TI
+                       (match_operand:DI 1 "arith_operand" "%r"))
+                     (zero_extend:TI
+                       (match_operand:DI 2 "arith_operand" "rI")))
+           (const_int 64))))]
+  "TARGET_VIS3 && TARGET_ARCH64"
+  "umulxhi\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_insn "umulxhi_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+        (truncate:DI
+          (lshiftrt:TI
+            (mult:TI (zero_extend:TI
+                       (match_operand:DI 1 "arith_operand" "%r,0"))
+                     (zero_extend:TI
+                       (match_operand:DI 2 "arith_operand" "rI,rI")))
+           (const_int 64))))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_VIS3 && ! TARGET_ARCH64"
+  "* return output_v8plus_mult (insn, operands, \"umulxhi\");"
+  [(set_attr "type" "imul")
+   (set_attr "length" "9,8")])
+
+(define_expand "xmulx_vis"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (truncate:DI
+          (unspec:TI [(zero_extend:TI
+                        (match_operand:DI 1 "arith_operand" ""))
+                      (zero_extend:TI
+                        (match_operand:DI 2 "arith_operand" ""))]
+           UNSPEC_XMUL)))]
+  "TARGET_VIS3"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_xmulx_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*xmulx_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (truncate:DI
+          (unspec:TI [(zero_extend:TI
+                        (match_operand:DI 1 "arith_operand" "%r"))
+                      (zero_extend:TI
+                        (match_operand:DI 2 "arith_operand" "rI"))]
+           UNSPEC_XMUL)))]
+  "TARGET_VIS3 && TARGET_ARCH64"
+  "xmulx\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_insn "xmulx_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+        (truncate:DI
+          (unspec:TI [(zero_extend:TI
+                        (match_operand:DI 1 "arith_operand" "%r,0"))
+                      (zero_extend:TI
+                        (match_operand:DI 2 "arith_operand" "rI,rI"))]
+           UNSPEC_XMUL)))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_VIS3 && ! TARGET_ARCH64"
+  "* return output_v8plus_mult (insn, operands, \"xmulx\");"
+  [(set_attr "type" "imul")
+   (set_attr "length" "9,8")])
+
+(define_expand "xmulxhi_vis"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (truncate:DI
+          (lshiftrt:TI
+            (unspec:TI [(zero_extend:TI
+                          (match_operand:DI 1 "arith_operand" ""))
+                        (zero_extend:TI
+                          (match_operand:DI 2 "arith_operand" ""))]
+             UNSPEC_XMUL)
+           (const_int 64))))]
+  "TARGET_VIS3"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_xmulxhi_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*xmulxhi_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (truncate:DI
+          (lshiftrt:TI
+            (unspec:TI [(zero_extend:TI
+                          (match_operand:DI 1 "arith_operand" "%r"))
+                        (zero_extend:TI
+                          (match_operand:DI 2 "arith_operand" "rI"))]
+             UNSPEC_XMUL)
+           (const_int 64))))]
+  "TARGET_VIS3 && TARGET_ARCH64"
+  "xmulxhi\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_insn "xmulxhi_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+        (truncate:DI
+          (lshiftrt:TI
+            (unspec:TI [(zero_extend:TI
+                          (match_operand:DI 1 "arith_operand" "%r,0"))
+                        (zero_extend:TI
+                          (match_operand:DI 2 "arith_operand" "rI,rI"))]
+             UNSPEC_XMUL)
+           (const_int 64))))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_VIS3 && !TARGET_ARCH64"
+  "* return output_v8plus_mult (insn, operands, \"xmulxhi\");"
+  [(set_attr "type" "imul")
+   (set_attr "length" "9,8")])
+
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/sparc/sparc.opt b/gcc-4.9/gcc/config/sparc/sparc.opt
new file mode 100644
index 000000000..c02aec59f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparc.opt
@@ -0,0 +1,256 @@
+; Options for the SPARC port of the compiler
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/sparc/sparc-opts.h
+
+;; Debug flags
+TargetVariable
+unsigned int sparc_debug
+
+mfpu
+Target Report Mask(FPU)
+Use hardware FP
+
+mhard-float
+Target RejectNegative Mask(FPU)
+Use hardware FP
+
+msoft-float
+Target RejectNegative InverseMask(FPU)
+Do not use hardware FP
+
+mflat
+Target Report Mask(FLAT)
+Use flat register window model
+
+munaligned-doubles
+Target Report Mask(UNALIGNED_DOUBLES)
+Assume possible double misalignment
+
+mapp-regs
+Target Report Mask(APP_REGS)
+Use ABI reserved registers
+
+mhard-quad-float
+Target Report RejectNegative Mask(HARD_QUAD)
+Use hardware quad FP instructions
+
+msoft-quad-float
+Target Report RejectNegative InverseMask(HARD_QUAD)
+Do not use hardware quad fp instructions
+
+mv8plus
+Target Report Mask(V8PLUS)
+Compile for V8+ ABI
+
+mvis
+Target Report Mask(VIS)
+Use UltraSPARC Visual Instruction Set version 1.0 extensions
+
+mvis2
+Target Report Mask(VIS2)
+Use UltraSPARC Visual Instruction Set version 2.0 extensions
+
+mvis3
+Target Report Mask(VIS3)
+Use UltraSPARC Visual Instruction Set version 3.0 extensions
+
+mcbcond
+Target Report Mask(CBCOND)
+Use UltraSPARC Compare-and-Branch extensions
+
+mfmaf
+Target Report Mask(FMAF)
+Use UltraSPARC Fused Multiply-Add extensions
+
+mpopc
+Target Report Mask(POPC)
+Use UltraSPARC Population-Count instruction
+
+mptr64
+Target Report RejectNegative Mask(PTR64)
+Pointers are 64-bit
+
+mptr32
+Target Report RejectNegative InverseMask(PTR64)
+Pointers are 32-bit
+
+m64
+Target Report RejectNegative Mask(64BIT)
+Use 64-bit ABI
+
+m32
+Target Report RejectNegative InverseMask(64BIT)
+Use 32-bit ABI
+
+mstack-bias
+Target Report Mask(STACK_BIAS)
+Use stack bias
+
+mfaster-structs
+Target Report Mask(FASTER_STRUCTS)
+Use structs on stronger alignment for double-word copies
+
+mrelax
+Target
+Optimize tail call instructions in assembler and linker
+
+mcpu=
+Target RejectNegative Joined Var(sparc_cpu_and_features) Enum(sparc_processor_type) Init(PROCESSOR_V7)
+Use features of and schedule code for given CPU
+
+mtune=
+Target RejectNegative Joined Var(sparc_cpu) Enum(sparc_processor_type) Init(PROCESSOR_V7)
+Schedule code for given CPU
+
+Enum
+Name(sparc_processor_type) Type(enum processor_type)
+
+EnumValue
+Enum(sparc_processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly
+
+EnumValue
+Enum(sparc_processor_type) String(v7) Value(PROCESSOR_V7)
+
+EnumValue
+Enum(sparc_processor_type) String(cypress) Value(PROCESSOR_CYPRESS)
+
+EnumValue
+Enum(sparc_processor_type) String(v8) Value(PROCESSOR_V8)
+
+EnumValue
+Enum(sparc_processor_type) String(supersparc) Value(PROCESSOR_SUPERSPARC)
+
+EnumValue
+Enum(sparc_processor_type) String(hypersparc) Value(PROCESSOR_HYPERSPARC)
+
+EnumValue
+Enum(sparc_processor_type) String(leon) Value(PROCESSOR_LEON)
+
+EnumValue
+Enum(sparc_processor_type) String(leon3) Value(PROCESSOR_LEON3)
+
+EnumValue
+Enum(sparc_processor_type) String(sparclite) Value(PROCESSOR_SPARCLITE)
+
+EnumValue
+Enum(sparc_processor_type) String(f930) Value(PROCESSOR_F930)
+
+EnumValue
+Enum(sparc_processor_type) String(f934) Value(PROCESSOR_F934)
+
+EnumValue
+Enum(sparc_processor_type) String(sparclite86x) Value(PROCESSOR_SPARCLITE86X)
+
+EnumValue
+Enum(sparc_processor_type) String(sparclet) Value(PROCESSOR_SPARCLET)
+
+EnumValue
+Enum(sparc_processor_type) String(tsc701) Value(PROCESSOR_TSC701)
+
+EnumValue
+Enum(sparc_processor_type) String(v9) Value(PROCESSOR_V9)
+
+EnumValue
+Enum(sparc_processor_type) String(ultrasparc) Value(PROCESSOR_ULTRASPARC)
+
+EnumValue
+Enum(sparc_processor_type) String(ultrasparc3) Value(PROCESSOR_ULTRASPARC3)
+
+EnumValue
+Enum(sparc_processor_type) String(niagara) Value(PROCESSOR_NIAGARA)
+
+EnumValue
+Enum(sparc_processor_type) String(niagara2) Value(PROCESSOR_NIAGARA2)
+
+EnumValue
+Enum(sparc_processor_type) String(niagara3) Value(PROCESSOR_NIAGARA3)
+
+EnumValue
+Enum(sparc_processor_type) String(niagara4) Value(PROCESSOR_NIAGARA4)
+
+mcmodel=
+Target RejectNegative Joined Var(sparc_cmodel_string)
+Use given SPARC-V9 code model
+
+mdebug=
+Target RejectNegative Joined Var(sparc_debug_string)
+Enable debug output
+
+mstd-struct-return
+Target Report RejectNegative Var(sparc_std_struct_return)
+Enable strict 32-bit psABI struct return checking.
+
+mfix-at697f
+Target Report RejectNegative Var(sparc_fix_at697f)
+Enable workaround for single erratum of AT697F processor
+(corresponding to erratum #13 of AT697E processor)
+
+mfix-ut699
+Target Report RejectNegative Var(sparc_fix_ut699)
+Enable workarounds for the errata of the UT699 processor
+
+Mask(LONG_DOUBLE_128)
+;; Use 128-bit long double
+
+Mask(LEON)
+;; Generate code for LEON
+
+Mask(LEON3)
+;; Generate code for LEON3
+
+Mask(SPARCLITE)
+;; Generate code for SPARClite
+
+Mask(SPARCLET)
+;; Generate code for SPARClet
+
+Mask(V8)
+;; Generate code for SPARC-V8
+
+Mask(V9)
+;; Generate code for SPARC-V9
+
+Mask(DEPRECATED_V8_INSNS)
+;; Generate code that uses the V8 instructions deprecated
+;; in the V9 architecture.
+
+mmemory-model=
+Target RejectNegative Joined Var(sparc_memory_model) Enum(sparc_memory_model) Init(SMM_DEFAULT)
+Specify the memory model in effect for the program.
+
+Enum
+Name(sparc_memory_model) Type(enum sparc_memory_model_type)
+
+EnumValue
+Enum(sparc_memory_model) String(default) Value(SMM_DEFAULT)
+
+EnumValue
+Enum(sparc_memory_model) String(rmo) Value(SMM_RMO)
+
+EnumValue
+Enum(sparc_memory_model) String(pso) Value(SMM_PSO)
+
+EnumValue
+Enum(sparc_memory_model) String(tso) Value(SMM_TSO)
+
+EnumValue
+Enum(sparc_memory_model) String(sc) Value(SMM_SC)
diff --git a/gcc-4.9/gcc/config/sparc/sparclet.md b/gcc-4.9/gcc/config/sparc/sparclet.md
new file mode 100644
index 000000000..2ee3be695
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sparclet.md
@@ -0,0 +1,43 @@
+;; Scheduling description for SPARClet.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The SPARClet is a single-issue processor.
+
+(define_automaton "sparclet")
+
+(define_cpu_unit "sl_load0,sl_load1,sl_load2,sl_load3" "sparclet")
+(define_cpu_unit "sl_store,sl_imul" "sparclet")
+
+(define_reservation "sl_load_any" "(sl_load0 | sl_load1 | sl_load2 | sl_load3)")
+(define_reservation "sl_load_all" "(sl_load0 + sl_load1 + sl_load2 + sl_load3)")
+
+(define_insn_reservation "sl_ld" 3
+  (and (eq_attr "cpu" "tsc701")
+   (eq_attr "type" "load,sload"))
+  "sl_load_any, sl_load_any, sl_load_any")
+
+(define_insn_reservation "sl_st" 3
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "store"))
+  "(sl_store+sl_load_all)*3")
+
+(define_insn_reservation "sl_imul" 5
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "imul"))
+  "sl_imul*5")
diff --git a/gcc-4.9/gcc/config/sparc/supersparc.md b/gcc-4.9/gcc/config/sparc/supersparc.md
new file mode 100644
index 000000000..2825d55b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/supersparc.md
@@ -0,0 +1,92 @@
+;; Scheduling description for SuperSPARC.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The SuperSPARC is a tri-issue, which was considered quite parallel
+;; at the time it was released.  Much like UltraSPARC-I and UltraSPARC-II
+;; there are two integer units but only one of them may take shifts.
+;;
+;; ??? If SuperSPARC has the same slotting rules as ultrasparc for these
+;; ??? shifts, we should model that.
+
+(define_automaton "supersparc_0,supersparc_1")
+
+(define_cpu_unit "ss_memory, ss_shift, ss_iwport0, ss_iwport1" "supersparc_0")
+(define_cpu_unit "ss_fpalu" "supersparc_0")
+(define_cpu_unit "ss_fpmds" "supersparc_1")
+
+(define_reservation "ss_iwport" "(ss_iwport0 | ss_iwport1)")
+
+(define_insn_reservation "ss_iuload" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "load,sload"))
+  "ss_memory")
+
+;; Ok, fpu loads deliver the result in zero cycles.  But we
+;; have to show the ss_memory reservation somehow, thus...
+(define_insn_reservation "ss_fpload" 0
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpload"))
+  "ss_memory")
+
+(define_bypass 0 "ss_fpload" "ss_fp_alu,ss_fp_mult,ss_fp_divs,ss_fp_divd,ss_fp_sqrt")
+
+(define_insn_reservation "ss_store" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "store,fpstore"))
+  "ss_memory")
+
+(define_insn_reservation "ss_ialu_shift" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "shift"))
+  "ss_shift + ss_iwport")
+
+(define_insn_reservation "ss_ialu_any" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "load,sload,store,shift,ialu"))
+  "ss_iwport")
+
+(define_insn_reservation "ss_fp_alu" 3
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fp,fpmove,fpcmp"))
+  "ss_fpalu, nothing*2")
+
+(define_insn_reservation "ss_fp_mult" 3
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpmul"))
+  "ss_fpmds, nothing*2")
+
+(define_insn_reservation "ss_fp_divs" 6
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpdivs"))
+  "ss_fpmds*4, nothing*2")
+
+(define_insn_reservation "ss_fp_divd" 9
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpdivd"))
+  "ss_fpmds*7, nothing*2")
+
+(define_insn_reservation "ss_fp_sqrt" 12
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "ss_fpmds*10, nothing*2")
+
+(define_insn_reservation "ss_imul" 4
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "imul"))
+  "ss_fpmds*4")
diff --git a/gcc-4.9/gcc/config/sparc/sync.md b/gcc-4.9/gcc/config/sparc/sync.md
new file mode 100644
index 000000000..fd5691f73
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sync.md
@@ -0,0 +1,286 @@
+;; GCC machine description for SPARC synchronization instructions.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator I12MODE [QI HI])
+(define_mode_iterator I124MODE [QI HI SI])
+(define_mode_iterator I24MODE [HI SI])
+(define_mode_iterator I48MODE [SI (DI "TARGET_ARCH64 || TARGET_V8PLUS")])
+(define_mode_attr modesuffix [(SI "") (DI "x")])
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand")]
+  "TARGET_V8 || TARGET_V9"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[0]);
+  sparc_emit_membar_for_model (model, 3, 3);
+  DONE;
+})
+
+(define_expand "membar"
+  [(set (match_dup 1)
+	(unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+		    UNSPEC_MEMBAR))]
+  "TARGET_V8 || TARGET_V9"
+{
+  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[1]) = 1;
+})
+
+;; A compiler-only memory barrier.  Generic code, when checking for the
+;; existence of various named patterns, uses asm("":::"memory") when we
+;; don't need an actual instruction.  Here, it's easiest to pretend that
+;; membar 0 is such a barrier.  Further, this gives us a nice hook to 
+;; ignore all such barriers on Sparc V7.
+(define_insn "*membar_empty"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (match_operand:SI 1 "zero_or_v7_operand")]
+		    UNSPEC_MEMBAR))]
+  ""
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "length" "0")])
+
+;; For V8, STBAR is exactly membar #StoreStore, by definition.
+(define_insn "*membar_storestore"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (const_int 8)] UNSPEC_MEMBAR))]
+  "TARGET_V8"
+  "stbar"
+  [(set_attr "type" "multi")])
+
+;; For V8, LDSTUB has the effect of membar #StoreLoad.
+(define_insn "*membar_storeload"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (const_int 2)] UNSPEC_MEMBAR))]
+  "TARGET_V8"
+  "ldstub\t[%%sp-1], %%g0"
+  [(set_attr "type" "multi")])
+
+;; Put the two together, in combination with the fact that V8 implements PSO
+;; as its weakest memory model, means a full barrier.  Match all remaining
+;; instances of the membar pattern for Sparc V8.
+(define_insn "*membar_v8"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+		    UNSPEC_MEMBAR))]
+  "TARGET_V8"
+  "stbar\n\tldstub\t[%%sp-1], %%g0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; For V9, we have the full membar instruction.
+(define_insn "*membar"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+		    UNSPEC_MEMBAR))]
+  "TARGET_V9"
+  "membar\t%1"
+  [(set_attr "type" "multi")])
+
+(define_peephole2
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+		    UNSPEC_MEMBAR))
+   (set (match_operand:BLK 2 "" "")
+	(unspec:BLK [(match_dup 2) (match_operand:SI 3 "const_int_operand")]
+		    UNSPEC_MEMBAR))]
+  ""
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (match_dup 1)] UNSPEC_MEMBAR))]
+{ operands[1] = GEN_INT (UINTVAL (operands[1]) | UINTVAL (operands[3])); })
+
+(define_expand "atomic_load<mode>"
+  [(match_operand:I 0 "register_operand" "")
+   (match_operand:I 1 "memory_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  sparc_emit_membar_for_model (model, 1, 1);
+
+  if (TARGET_ARCH64 || <MODE>mode != DImode)
+    emit_move_insn (operands[0], operands[1]);
+  else
+    emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
+
+  sparc_emit_membar_for_model (model, 1, 2);
+  DONE;
+})
+
+(define_insn "atomic_loaddi_1"
+  [(set (match_operand:DI 0 "register_operand" "=U,?*f")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m")]
+		   UNSPEC_ATOMIC))]
+  "!TARGET_ARCH64"
+  "ldd\t%1, %0"
+  [(set_attr "type" "load,fpload")])
+
+(define_expand "atomic_store<mode>"
+  [(match_operand:I 0 "memory_operand" "")
+   (match_operand:I 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  sparc_emit_membar_for_model (model, 2, 1);
+
+  if (TARGET_ARCH64 || <MODE>mode != DImode)
+    emit_move_insn (operands[0], operands[1]);
+  else
+    emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
+
+  sparc_emit_membar_for_model (model, 2, 2);
+  DONE;
+})
+
+(define_insn "atomic_storedi_1"
+  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
+	(unspec:DI
+	  [(match_operand:DI 1 "register_or_v9_zero_operand" "J,U,?*f")]
+	  UNSPEC_ATOMIC))]
+  "!TARGET_ARCH64"
+  "@
+   stx\t%r1, %0
+   std\t%1, %0
+   std\t%1, %0"
+  [(set_attr "type" "store,store,fpstore")
+   (set_attr "cpu_feature" "v9,*,*")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")		;; bool output
+   (match_operand:I 1 "register_operand" "")		;; val output
+   (match_operand:I 2 "mem_noofs_operand" "")		;; memory
+   (match_operand:I 3 "register_operand" "")		;; expected
+   (match_operand:I 4 "register_operand" "")		;; desired
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; mod_s
+   (match_operand:SI 7 "const_int_operand" "")]		;; mod_f
+  "(TARGET_V9 || TARGET_LEON3)
+   && (<MODE>mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)"
+{
+  sparc_expand_compare_and_swap (operands);
+  DONE;
+})
+
+(define_expand "atomic_compare_and_swap<mode>_1"
+  [(parallel
+     [(set (match_operand:I48MODE 0 "register_operand" "")
+	   (match_operand:I48MODE 1 "mem_noofs_operand" ""))
+      (set (match_dup 1)
+	   (unspec_volatile:I48MODE
+	     [(match_operand:I48MODE 2 "register_operand" "")
+	      (match_operand:I48MODE 3 "register_operand" "")]
+	     UNSPECV_CAS))])]
+  "TARGET_V9 || TARGET_LEON3"
+  "")
+
+(define_insn "*atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(match_operand:I48MODE 1 "mem_noofs_operand" "+w"))
+   (set (match_dup 1)
+	(unspec_volatile:I48MODE
+	  [(match_operand:I48MODE 2 "register_operand" "r")
+	   (match_operand:I48MODE 3 "register_operand" "0")]
+	  UNSPECV_CAS))]
+  "(TARGET_V9 || TARGET_LEON3) && (<MODE>mode != DImode || TARGET_ARCH64)"
+  "cas<modesuffix>\t%1, %2, %0"
+  [(set_attr "type" "multi")])
+
+(define_insn "*atomic_compare_and_swapdi_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h")
+	(match_operand:DI 1 "mem_noofs_operand" "+w"))
+   (set (match_dup 1)
+	(unspec_volatile:DI
+	  [(match_operand:DI 2 "register_operand" "h")
+	   (match_operand:DI 3 "register_operand" "0")]
+	  UNSPECV_CAS))]
+  "TARGET_V8PLUS"
+{
+  if (sparc_check_64 (operands[3], insn) <= 0)
+    output_asm_insn ("srl\t%L3, 0, %L3", operands);
+  output_asm_insn ("sllx\t%H3, 32, %H3", operands);
+  output_asm_insn ("or\t%L3, %H3, %L3", operands);
+  if (sparc_check_64 (operands[2], insn) <= 0)
+    output_asm_insn ("srl\t%L2, 0, %L2", operands);
+  output_asm_insn ("sllx\t%H2, 32, %H3", operands);
+  output_asm_insn ("or\t%L2, %H3, %H3", operands);
+  output_asm_insn ("casx\t%1, %H3, %L3", operands);
+  return "srlx\t%L3, 32, %H3";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_expand "atomic_exchangesi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "memory_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "(TARGET_V8 || TARGET_V9) && !sparc_fix_ut699"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  sparc_emit_membar_for_model (model, 3, 1);
+  emit_insn (gen_swapsi (operands[0], operands[1], operands[2]));
+  sparc_emit_membar_for_model (model, 3, 2);
+  DONE;
+})
+
+(define_insn "swapsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "memory_operand" "+m")]
+			    UNSPECV_SWAP))
+   (set (match_dup 1)
+	(match_operand:SI 2 "register_operand" "0"))]
+  "(TARGET_V8 || TARGET_V9) && !sparc_fix_ut699"
+  "swap\t%1, %0"
+  [(set_attr "type" "multi")])
+
+(define_expand "atomic_test_and_set"
+  [(match_operand:QI 0 "register_operand" "")
+   (match_operand:QI 1 "memory_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "!sparc_fix_ut699"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+  rtx ret;
+
+  sparc_emit_membar_for_model (model, 3, 1);
+  emit_insn (gen_ldstub (operands[0], operands[1]));
+  sparc_emit_membar_for_model (model, 3, 2);
+
+  /* Convert the 0/0xff result we would otherwise have to a boolean.
+     I.e. ignore all but bit 0.  */
+  ret = expand_simple_binop (QImode, AND, operands[0], const1_rtx,
+			     operands[0], true, OPTAB_LIB_WIDEN);
+  if (ret != operands[0])
+    emit_move_insn (operands[0], ret);
+
+  DONE;
+})
+
+(define_insn "ldstub"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "+m")]
+			    UNSPECV_LDSTUB))
+   (set (match_dup 1) (const_int -1))]
+  "!sparc_fix_ut699"
+  "ldstub\t%1, %0"
+  [(set_attr "type" "multi")])
diff --git a/gcc-4.9/gcc/config/sparc/sysv4.h b/gcc-4.9/gcc/config/sparc/sysv4.h
new file mode 100644
index 000000000..413d1fc15
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/sysv4.h
@@ -0,0 +1,119 @@
+/* Target definitions for GNU compiler for SPARC running System V.4
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   Contributed by Ron Guilmette (rfg@monkeys.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int")
+
+/* Undefined some symbols which are appropriate only for typical svr4
+   systems, but not for the specific case of svr4 running on a
+   SPARC.  */
+
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#undef READONLY_DATA_SECTION_ASM_OP
+#undef TYPE_OPERAND_FMT
+#undef STRING_ASM_OP
+#undef COMMON_ASM_OP
+#undef SKIP_ASM_OP
+#undef SET_ASM_OP	/* Has no equivalent.  See ASM_OUTPUT_DEF below.  */
+
+/* Pass -K to the assembler when PIC.  */
+#undef ASM_SPEC
+#define ASM_SPEC \
+  "%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} \
+   %{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
+/* Define the names of various pseudo-op used by the SPARC/svr4 assembler.
+   Note that many of these are different from the typical pseudo-ops used
+   by most svr4 assemblers.  That is probably due to a (misguided?) attempt
+   to keep the SPARC/svr4 assembler somewhat compatible with the SPARC/SunOS
+   assembler.  */
+
+#define STRING_ASM_OP		"\t.asciz\t"
+#define COMMON_ASM_OP		"\t.common\t"
+#define SKIP_ASM_OP		"\t.skip\t"
+
+/* This is the format used to print the second operand of a .type pseudo-op
+   for the SPARC/svr4 assembler.  */
+
+#define TYPE_OPERAND_FMT      "#%s"
+
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)		\
+do { ASM_OUTPUT_ALIGN ((FILE), Pmode == SImode ? 2 : 3);		\
+     (*targetm.asm_out.internal_label) ((FILE), PREFIX, NUM);		\
+   } while (0)
+
+/* This is how to equate one symbol to another symbol.  The syntax used is
+   `SYM1=SYM2'.  Note that this is different from the way equates are done
+   with most svr4 assemblers, where the syntax is `.set SYM1,SYM2'.  */
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {	fprintf ((FILE), "\t");						\
+	assemble_name (FILE, LABEL1);					\
+	fprintf (FILE, " = ");						\
+	assemble_name (FILE, LABEL2);					\
+	fprintf (FILE, "\n");						\
+  } while (0)
+
+/* A set of symbol definitions for assembly pseudo-ops which will
+   get us switched to various sections of interest.  These are used
+   in all places where we simply want to switch to a section, and
+   *not* to push the previous section name onto the assembler's
+   section names stack (as we do often in dwarfout.c).  */
+
+#define TEXT_SECTION_ASM_OP	"\t.section\t\".text\""
+#define DATA_SECTION_ASM_OP	"\t.section\t\".data\""
+#define BSS_SECTION_ASM_OP	"\t.section\t\".bss\""
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t\".rodata\""
+#define INIT_SECTION_ASM_OP	"\t.section\t\".init\""
+#define FINI_SECTION_ASM_OP	"\t.section\t\".fini\""
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+ 
+   Note that we want to give these sections the SHF_WRITE attribute
+   because these sections will actually contain data (i.e. tables of
+   addresses of functions in the current root executable or shared library
+   file) and, in the case of a shared library, the relocatable addresses
+   will have to be properly resolved/relocated (and then written into) by
+   the dynamic linker when it actually attaches the given shared library
+   to the executing process.  (Note that on SVR4, you may wish to use the
+   `-z text' option to the ELF linker, when building a shared library, as
+   an additional check that you are doing everything right.  But if you do
+   use the `-z text' option when building a shared library, you will get
+   errors unless the .ctors and .dtors sections are marked as writable
+   via the SHF_WRITE attribute.)  */
+ 
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP    "\t.section\t\".ctors\",#alloc,#write"
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP    "\t.section\t\".dtors\",#alloc,#write"
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Override the name of the mcount profiling function.  */
+
+#undef MCOUNT_FUNCTION
+#define MCOUNT_FUNCTION "*_mcount"
diff --git a/gcc-4.9/gcc/config/sparc/t-elf b/gcc-4.9/gcc/config/sparc/t-elf
new file mode 100644
index 000000000..9234a60ad
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-elf
@@ -0,0 +1,21 @@
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat
+MULTILIB_DIRNAMES = soft v8 flat
+MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc-4.9/gcc/config/sparc/t-leon b/gcc-4.9/gcc/config/sparc/t-leon
new file mode 100644
index 000000000..16b3450fd
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-leon
@@ -0,0 +1,24 @@
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Multilibs for LEON
+# LEON is a SPARC-V8, but the AT697 implementation has a bug in the
+# V8-specific instructions.
+MULTILIB_OPTIONS = mcpu=v7 msoft-float mflat
+MULTILIB_DIRNAMES = v7 soft flat
+MULTILIB_MATCHES = mcpu?v7=mv7 msoft-float=mno-fpu
diff --git a/gcc-4.9/gcc/config/sparc/t-leon3 b/gcc-4.9/gcc/config/sparc/t-leon3
new file mode 100644
index 000000000..ca34ed20c
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-leon3
@@ -0,0 +1,22 @@
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Multilibs for LEON3
+MULTILIB_OPTIONS = msoft-float
+MULTILIB_DIRNAMES = soft
+MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc-4.9/gcc/config/sparc/t-linux b/gcc-4.9/gcc/config/sparc/t-linux
new file mode 100644
index 000000000..bb8fc29e5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-linux
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,sparc-linux-gnu)
diff --git a/gcc-4.9/gcc/config/sparc/t-linux64 b/gcc-4.9/gcc/config/sparc/t-linux64
new file mode 100644
index 000000000..cb2bce9cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-linux64
@@ -0,0 +1,29 @@
+# Copyright (C) 1998-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:sparc64-linux-gnu)
+MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:sparc-linux-gnu)
diff --git a/gcc-4.9/gcc/config/sparc/t-netbsd64 b/gcc-4.9/gcc/config/sparc/t-netbsd64
new file mode 100644
index 000000000..bc783c193
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-netbsd64
@@ -0,0 +1,5 @@
+# Disable multilib for now, as NetBSD/sparc64 does not ship with
+# a 32-bit environment.
+#MULTILIB_OPTIONS = m32/m64
+#MULTILIB_DIRNAMES = 32 64
+#MULTILIB_MATCHES =
diff --git a/gcc-4.9/gcc/config/sparc/t-rtems b/gcc-4.9/gcc/config/sparc/t-rtems
new file mode 100644
index 000000000..86a230261
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-rtems
@@ -0,0 +1,22 @@
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+
+MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3
+MULTILIB_DIRNAMES = soft v8 leon3
+MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc-4.9/gcc/config/sparc/t-rtems-64 b/gcc-4.9/gcc/config/sparc/t-rtems-64
new file mode 100644
index 000000000..b094546fc
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-rtems-64
@@ -0,0 +1,22 @@
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+
+MULTILIB_OPTIONS = msoft-float
+MULTILIB_DIRNAMES = soft
+MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc-4.9/gcc/config/sparc/t-sol2-64 b/gcc-4.9/gcc/config/sparc/t-sol2-64
new file mode 100644
index 000000000..ec7e4eba6
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-sol2-64
@@ -0,0 +1,4 @@
+MULTILIB_OPTIONS = m32/m64
+MULTILIB_DIRNAMES = sparcv8plus sparcv9
+MULTILIB_MATCHES =
+MULTILIB_OSDIRNAMES = . sparcv9
diff --git a/gcc-4.9/gcc/config/sparc/t-sparc b/gcc-4.9/gcc/config/sparc/t-sparc
new file mode 100644
index 000000000..828f9f733
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-sparc
@@ -0,0 +1,23 @@
+# General rules that all sparc/ targets must have.
+#
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+sparc-c.o: $(srcdir)/config/sparc/sparc-c.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/sparc/t-vxworks b/gcc-4.9/gcc/config/sparc/t-vxworks
new file mode 100644
index 000000000..2aabf1a43
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/t-vxworks
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks.
+
+MULTILIB_OPTIONS = mrtp fPIC
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC
diff --git a/gcc-4.9/gcc/config/sparc/tso.h b/gcc-4.9/gcc/config/sparc/tso.h
new file mode 100644
index 000000000..f496f0329
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/tso.h
@@ -0,0 +1,23 @@
+/* Include fragment for Sparc TSO operating systems.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This operating system sets PSTATE.MM to the TSO memory model.  */
+#undef SUBTARGET_DEFAULT_MEMORY_MODEL
+#define SUBTARGET_DEFAULT_MEMORY_MODEL	SMM_TSO
diff --git a/gcc-4.9/gcc/config/sparc/ultra1_2.md b/gcc-4.9/gcc/config/sparc/ultra1_2.md
new file mode 100644
index 000000000..0635d4ecb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/ultra1_2.md
@@ -0,0 +1,301 @@
+;; Scheduling description for UltraSPARC-I/II.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; UltraSPARC-I and II are quad-issue processors.  Interesting features
+;; to note:
+;;
+;; - Buffered loads, they can queue waiting for the actual data until
+;;   an instruction actually tries to reference the destination register
+;;   as an input
+;; - Two integer units.  Only one of them can do shifts, and the other
+;;   is the only one which may do condition code setting instructions.
+;;   Complicating things further, a shift may go only into the first
+;;   slot in a dispatched group.  And if you have a non-condition code
+;;   setting instruction and one that does set the condition codes.  The
+;;   former must be issued first in order for both of them to issue.
+;; - Stores can issue before the value being stored is available.  As long
+;;   as the input data becomes ready before the store is to move out of the
+;;   store buffer, it will not cause a stall.
+;; - Branches may issue in the same cycle as an instruction setting the
+;;   condition codes being tested by that branch.  This does not apply
+;;   to floating point, only integer.
+
+(define_automaton "ultrasparc_0,ultrasparc_1")
+
+(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0");
+(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1")
+(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1")
+(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1")
+(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1")
+
+(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)")
+(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)")
+(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3")
+
+(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)")
+(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)")
+
+;; This is a simplified representation of the issue at hand.
+;; For most cases, going from one FP precision type insn to another
+;; just breaks up the insn group.  However for some cases, such
+;; a situation causes the second insn to stall 2 more cycles.
+(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1")
+
+;; If we have to schedule an ieu1 specific instruction and we want
+;; to reserve the ieu0 unit as well, we must reserve it first.  So for
+;; example we could not schedule this sequence:
+;;	COMPARE		IEU1
+;;	IALU		IEU0
+;; but we could schedule them together like this:
+;;	IALU		IEU0
+;;	COMPARE		IEU1
+;; This basically requires that ieu0 is reserved before ieu1 when
+;; it is required that both be reserved.
+(absence_set "us1_ieu0" "us1_ieu1")
+
+;; This defines the slotting order.  Most IEU instructions can only
+;; execute in the first three slots, FPU and branches can go into
+;; any slot.  We represent instructions which "break the group"
+;; as requiring reservation of us1_slot0.
+(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3")
+(absence_set "us1_slot1" "us1_slot2,us1_slot3")
+(absence_set "us1_slot2" "us1_slot3")
+
+(define_insn_reservation "us1_single" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "multi,savew,flushw,iflush,trap,gsr"))
+  "us1_single_issue")
+
+(define_insn_reservation "us1_simple_ieuN" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "ialu"))
+  "(us1_ieu0 | us1_ieu1) + us1_slot012")
+
+(define_insn_reservation "us1_simple_ieu0" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "shift"))
+  "us1_ieu0 + us1_slot012")
+
+(define_insn_reservation "us1_simple_ieu1" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "compare,edge,edgen,array"))
+  "us1_ieu1 + us1_slot012")
+
+(define_insn_reservation "us1_ialuX" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "ialuX"))
+  "us1_single_issue")
+
+(define_insn_reservation "us1_cmove" 2
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "cmove"))
+  "us1_single_issue, nothing")
+
+(define_insn_reservation "us1_imul" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "imul"))
+  "us1_single_issue")
+
+(define_insn_reservation "us1_idiv" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "idiv"))
+  "us1_single_issue")
+
+;; For loads, the "delayed return mode" behavior of the chip
+;; is represented using the us1_load_writeback resource.
+(define_insn_reservation "us1_load" 2
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "load,fpload"))
+  "us1_lsu + us1_slot012, us1_load_writeback")
+
+(define_insn_reservation "us1_load_signed" 3
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "sload"))
+  "us1_lsu + us1_slot012, nothing, us1_load_writeback")
+
+(define_insn_reservation "us1_store" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "store,fpstore"))
+  "us1_lsu + us1_slot012")
+
+(define_insn_reservation "us1_branch" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "branch"))
+  "us1_cti + us1_slotany")
+
+(define_insn_reservation "us1_call_jmpl" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
+  "us1_cti + us1_ieu1 + us1_slot0")
+
+(define_insn_reservation "us1_fmov_single" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmove"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany")
+
+(define_insn_reservation "us1_fmov_double" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmove"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany")
+
+(define_insn_reservation "us1_fcmov_single" 2
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmove,fpcrmove"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany, nothing")
+
+(define_insn_reservation "us1_fcmov_double" 2
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmove,fpcrmove"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany, nothing")
+
+(define_insn_reservation "us1_faddsub_single" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fp"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_faddsub_double" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fp"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_fpcmp_single" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmp"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany")
+
+(define_insn_reservation "us1_fpcmp_double" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmp"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany")
+
+(define_insn_reservation "us1_fmult_single" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmul"))
+       (eq_attr "fptype" "single"))
+  "us1_fpm + us1_fp_single + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_fmult_double" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmul"))
+       (eq_attr "fptype" "double"))
+  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+;; This is actually in theory dangerous, because it is possible
+;; for the chip to prematurely dispatch the dependent instruction
+;; in the G stage, resulting in a 9 cycle stall.  However I have never
+;; been able to trigger this case myself even with hand written code,
+;; so it must require some rare complicated pipeline state.
+(define_bypass 3
+   "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double"
+   "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+;; Floating point divide and square root use the multiplier unit
+;; for final rounding 3 cycles before the divide/sqrt is complete.
+
+(define_insn_reservation "us1_fdivs"
+  13
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpdivs,fpsqrts"))
+  "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2"
+  )
+
+(define_bypass
+  12
+  "us1_fdivs"
+  "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+(define_insn_reservation "us1_fdivd"
+  23
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpdivd,fpsqrtd"))
+  "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2"
+  )
+(define_bypass
+  22
+  "us1_fdivd"
+  "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+;; Any store may multi issue with the insn creating the source
+;; data as long as that creating insn is not an FPU div/sqrt.
+;; We need a special guard function because this bypass does
+;; not apply to the address inputs of the store.
+(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store"
+   "store_data_bypass_p")
+
+;; An integer branch may execute in the same cycle as the compare
+;; creating the condition codes.
+(define_bypass 0 "us1_simple_ieu1" "us1_branch")
+
+;; VIS scheduling
+(define_insn_reservation "us1_fga_single"
+  2
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fga,visl,vismv"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany, nothing")
+
+(define_bypass 1 "us1_fga_single" "us1_fga_single")
+
+(define_insn_reservation "us1_fga_double"
+  2
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fga,visl,vismv"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany, nothing")
+
+(define_bypass 1 "us1_fga_double" "us1_fga_double")
+
+(define_insn_reservation "us1_fgm_single"
+  4
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fgm_pack,fgm_mul"))
+       (eq_attr "fptype" "single"))
+  "us1_fpm + us1_fp_single + us1_slotany, nothing*3")
+
+(define_bypass 3 "us1_fgm_single" "us1_fga_single")
+
+(define_insn_reservation "us1_fgm_double"
+  4
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fgm_pack,fgm_mul"))
+       (eq_attr "fptype" "double"))
+  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+(define_bypass 3 "us1_fgm_double" "us1_fga_double")
+
+(define_insn_reservation "us1_pdist"
+  4
+  (and (eq_attr "cpu" "ultrasparc")
+       (eq_attr "type" "pdist"))
+  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+(define_bypass 3 "us1_pdist" "us1_fga_double,us1_fga_single")
+(define_bypass 1 "us1_pdist" "us1_pdist")
diff --git a/gcc-4.9/gcc/config/sparc/ultra3.md b/gcc-4.9/gcc/config/sparc/ultra3.md
new file mode 100644
index 000000000..f3c8eb690
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/ultra3.md
@@ -0,0 +1,194 @@
+;; Scheduling description for UltraSPARC-III.
+;;   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; UltraSPARC-III is a quad-issue processor.
+;;
+;; It is also a much simpler beast than Ultra-I/II, no silly
+;; slotting rules and both integer units are fully symmetric.
+;; It does still have single-issue instructions though.
+
+(define_automaton "ultrasparc3_0,ultrasparc3_1")
+
+(define_cpu_unit "us3_ms,us3_br,us3_fpm" "ultrasparc3_0")
+(define_cpu_unit "us3_a0,us3_a1,us3_slot0,\
+                  us3_slot1,us3_slot2,us3_slot3,us3_fpa" "ultrasparc3_1")
+(define_cpu_unit "us3_load_writeback" "ultrasparc3_1")
+
+(define_reservation "us3_slotany" "(us3_slot0 | us3_slot1 | us3_slot2 | us3_slot3)")
+(define_reservation "us3_single_issue" "us3_slot0 + us3_slot1 + us3_slot2 + us3_slot3")
+(define_reservation "us3_ax" "(us3_a0 | us3_a1)")
+
+(define_insn_reservation "us3_single" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "multi,savew,flushw,iflush,trap,edge,gsr"))
+  "us3_single_issue")
+
+(define_insn_reservation "us3_integer" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "ialu,shift,compare"))
+  "us3_ax + us3_slotany")
+
+(define_insn_reservation "us3_ialuX" 5
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "ialuX"))
+  "us3_single_issue*4, nothing")
+
+(define_insn_reservation "us3_cmove" 2
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "cmove"))
+  "us3_ms + us3_br + us3_slotany, nothing")
+
+(define_insn_reservation "us3_array" 2
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "array,edgen"))
+  "us3_ms + us3_slotany, nothing")
+
+;; ??? Not entirely accurate.
+;; ??? It can run from 6 to 9 cycles.  The first cycle the MS pipe
+;; ??? is needed, and the instruction group is broken right after
+;; ??? the imul.  Then 'helper' instructions are generated to perform
+;; ??? each further stage of the multiplication, each such 'helper' is
+;; ??? single group.  So, the reservation aspect is represented accurately
+;; ??? here, but the variable cycles are not.
+;; ??? Currently I have no idea how to determine the variability, but once
+;; ??? known we can simply add a define_bypass or similar to model it.
+(define_insn_reservation "us3_imul" 7
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "imul"))
+  "us3_ms + us3_slotany, us3_single_issue*4, nothing*2")
+
+(define_insn_reservation "us3_idiv" 72
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "idiv"))
+  "us3_ms + us3_slotany, us3_single_issue*69, nothing*2")
+
+;; UltraSPARC-III has a similar load delay as UltraSPARC-I/II except
+;; that all loads except 32-bit/64-bit unsigned loads take the extra
+;; delay for sign/zero extension.
+(define_insn_reservation "us3_2cycle_load" 2
+  (and (eq_attr "cpu" "ultrasparc3")
+    (and (eq_attr "type" "load,fpload")
+      (eq_attr "us3load_type" "2cycle")))
+  "us3_ms + us3_slotany, us3_load_writeback")
+
+(define_insn_reservation "us3_load_delayed" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (and (eq_attr "type" "load,sload")
+      (eq_attr "us3load_type" "3cycle")))
+  "us3_ms + us3_slotany, nothing, us3_load_writeback")
+
+(define_insn_reservation "us3_store" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "store,fpstore"))
+  "us3_ms + us3_slotany")
+
+(define_insn_reservation "us3_branch" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "branch"))
+  "us3_br + us3_slotany")
+
+(define_insn_reservation "us3_call_jmpl" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
+  "us3_br + us3_ms + us3_slotany")
+
+(define_insn_reservation "us3_fmov" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpmove"))
+  "us3_fpa + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fcmov" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpcmove"))
+  "us3_fpa + us3_br + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fcrmov" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpcrmove"))
+  "us3_fpa + us3_ms + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_faddsub" 4
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fp"))
+  "us3_fpa + us3_slotany, nothing*3")
+
+(define_insn_reservation "us3_fpcmp" 5
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpcmp"))
+  "us3_fpa + us3_slotany, nothing*4")
+
+(define_insn_reservation "us3_fmult" 4
+ (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpmul"))
+  "us3_fpm + us3_slotany, nothing*3")
+
+(define_insn_reservation "us3_fdivs" 17
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpdivs"))
+  "(us3_fpm + us3_slotany), us3_fpm*14, nothing*2")
+
+(define_insn_reservation "us3_fsqrts" 20
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpsqrts"))
+  "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
+
+(define_insn_reservation "us3_fdivd" 20
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpdivd"))
+  "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
+
+(define_insn_reservation "us3_fsqrtd" 29
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpsqrtd"))
+  "(us3_fpm + us3_slotany), us3_fpm*26, nothing*2")
+
+;; Any store may multi issue with the insn creating the source
+;; data as long as that creating insn is not an FPU div/sqrt.
+;; We need a special guard function because this bypass does
+;; not apply to the address inputs of the store.
+(define_bypass 0 "us3_integer,us3_faddsub,us3_fmov,us3_fcmov,us3_fmult" "us3_store"
+   "store_data_bypass_p")
+
+;; An integer branch may execute in the same cycle as the compare
+;; creating the condition codes.
+(define_bypass 0 "us3_integer" "us3_branch")
+
+;; If FMOVfcc is user of FPCMP, latency is only 1 cycle.
+(define_bypass 1 "us3_fpcmp" "us3_fcmov")
+
+;; VIS scheduling
+(define_insn_reservation "us3_fga"
+  3
+  (and (eq_attr "cpu" "ultrasparc3")
+       (eq_attr "type" "fga,visl,vismv"))
+  "us3_fpa + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fgm"
+  4
+  (and (eq_attr "cpu" "ultrasparc3")
+       (eq_attr "type" "fgm_pack,fgm_mul"))
+  "us3_fpm + us3_slotany, nothing*3")
+
+(define_insn_reservation "us3_pdist"
+  4
+  (and (eq_attr "cpu" "ultrasparc3")
+       (eq_attr "type" "pdist"))
+  "us3_fpm + us3_slotany, nothing*3")
+
+(define_bypass 1 "us3_pdist" "us3_pdist")
diff --git a/gcc-4.9/gcc/config/sparc/visintrin.h b/gcc-4.9/gcc/config/sparc/visintrin.h
new file mode 100644
index 000000000..7f881f7cc
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/visintrin.h
@@ -0,0 +1,709 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _VISINTRIN_H_INCLUDED
+#define _VISINTRIN_H_INCLUDED
+
+#ifdef __VIS__
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef int __v1si __attribute__ ((__vector_size__ (4)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef unsigned char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef unsigned char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef int __i64 __attribute__ ((__mode__ (DI)));
+
+#if __VIS__ >= 0x200
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_write_gsr (__i64 __A)
+{
+  __builtin_vis_write_gsr (__A);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_read_gsr (void)
+{
+  return __builtin_vis_read_gsr ();
+}
+
+#endif /* __VIS__ >= 0x200 */
+
+extern __inline void *
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_alignaddr (void *__A, long __B)
+{
+  return __builtin_vis_alignaddr (__A, __B);
+}
+
+extern __inline void *
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_alignaddrl (void *__A, long __B)
+{
+  return __builtin_vis_alignaddrl (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_faligndatadi (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_faligndatadi (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_faligndatav2si (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_faligndatav2si (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_faligndatav4hi (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_faligndatav4hi (__A, __B);
+}
+
+extern __inline __v8qi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_faligndatav8qi (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_faligndatav8qi (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fexpand (__v4qi __A)
+{
+  return __builtin_vis_fexpand (__A);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmul8x16 (__v4qi __A, __v4hi __B)
+{
+  return __builtin_vis_fmul8x16 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmul8x16au (__v4qi __A, __v2hi __B)
+{
+  return __builtin_vis_fmul8x16au (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmul8x16al (__v4qi __A, __v2hi __B)
+{
+  return __builtin_vis_fmul8x16al (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmul8sux16 (__v8qi __A, __v4hi __B)
+{
+  return __builtin_vis_fmul8sux16 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmul8ulx16 (__v8qi __A, __v4hi __B)
+{
+  return __builtin_vis_fmul8ulx16 (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmuld8sux16 (__v4qi __A, __v2hi __B)
+{
+  return __builtin_vis_fmuld8sux16 (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmuld8ulx16 (__v4qi __A, __v2hi __B)
+{
+  return __builtin_vis_fmuld8ulx16 (__A, __B);
+}
+
+extern __inline __v4qi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpack16 (__v4hi __A)
+{
+  return __builtin_vis_fpack16 (__A);
+}
+
+extern __inline __v8qi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpack32 (__v2si __A, __v8qi __B)
+{
+  return __builtin_vis_fpack32 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpackfix (__v2si __A)
+{
+  return __builtin_vis_fpackfix (__A);
+}
+
+extern __inline __v8qi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpmerge (__v4qi __A, __v4qi __B)
+{
+  return __builtin_vis_fpmerge (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_pdist (__v8qi __A, __v8qi __B, __i64 __C)
+{
+  return __builtin_vis_pdist (__A, __B, __C);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge8 (void *__A, void *__B)
+{
+  return __builtin_vis_edge8 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge8l (void *__A, void *__B)
+{
+  return __builtin_vis_edge8l (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge16 (void *__A, void *__B)
+{
+  return __builtin_vis_edge16 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge16l (void *__A, void *__B)
+{
+  return __builtin_vis_edge16l (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge32 (void *__A, void *__B)
+{
+  return __builtin_vis_edge32 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge32l (void *__A, void *__B)
+{
+  return __builtin_vis_edge32l (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmple16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fcmple16 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmple32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fcmple32 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmpne16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fcmpne16 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmpne32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fcmpne32 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmpgt16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fcmpgt16 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmpgt32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fcmpgt32 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmpeq16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fcmpeq16 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fcmpeq32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fcmpeq32 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fpadd16 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd16s (__v2hi __A, __v2hi __B)
+{
+  return __builtin_vis_fpadd16s (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fpadd32 (__A, __B);
+}
+
+extern __inline __v1si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd32s (__v1si __A, __v1si __B)
+{
+  return __builtin_vis_fpadd32s (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fpsub16 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub16s (__v2hi __A, __v2hi __B)
+{
+  return __builtin_vis_fpsub16s (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fpsub32 (__A, __B);
+}
+
+extern __inline __v1si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub32s (__v1si __A, __v1si __B)
+{
+  return __builtin_vis_fpsub32s (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_array8 (long __A, long __B)
+{
+  return __builtin_vis_array8 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_array16 (long __A, long __B)
+{
+  return __builtin_vis_array16 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_array32 (long __A, long __B)
+{
+  return __builtin_vis_array32 (__A, __B);
+}
+
+#if __VIS__ >= 0x200
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_bmask (long __A, long __B)
+{
+  return __builtin_vis_bmask (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_bshuffledi (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_bshuffledi (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_bshufflev2si (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_bshufflev2si (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_bshufflev4hi (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_bshufflev4hi (__A, __B);
+}
+
+extern __inline __v8qi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_bshufflev8qi (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_bshufflev8qi (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge8n (void *__A, void *__B)
+{
+  return __builtin_vis_edge8n (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge8ln (void *__A, void *__B)
+{
+  return __builtin_vis_edge8ln (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge16n (void *__A, void *__B)
+{
+  return __builtin_vis_edge16n (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge16ln (void *__A, void *__B)
+{
+  return __builtin_vis_edge16ln (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge32n (void *__A, void *__B)
+{
+  return __builtin_vis_edge32n (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_edge32ln (void *__A, void *__B)
+{
+  return __builtin_vis_edge32ln (__A, __B);
+}
+
+#endif /* __VIS__ >= 0x200 */
+
+#if __VIS__ >= 0x300
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_cmask8 (long __A)
+{
+  return __builtin_vis_cmask8 (__A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_cmask16 (long __A)
+{
+  return __builtin_vis_cmask16 (__A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_cmask32 (long __A)
+{
+  return __builtin_vis_cmask32 (__A);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fchksm16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fchksm16 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fsll16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fsll16 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fslas16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fslas16 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fsrl16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fsrl16 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fsra16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fsra16 (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fsll32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fsll32 (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fslas32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fslas32 (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fsrl32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fsrl32 (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fsra32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fsra32 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_pdistn (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_pdistn (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fmean16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fmean16 (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadd64 (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_fpadd64 (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsub64 (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_fpsub64 (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadds16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fpadds16 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadds16s (__v2hi __A, __v2hi __B)
+{
+  return __builtin_vis_fpadds16s (__A, __B);
+}
+
+extern __inline __v4hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsubs16 (__v4hi __A, __v4hi __B)
+{
+  return __builtin_vis_fpsubs16 (__A, __B);
+}
+
+extern __inline __v2hi
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsubs16s (__v2hi __A, __v2hi __B)
+{
+  return __builtin_vis_fpsubs16s (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadds32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fpadds32 (__A, __B);
+}
+
+extern __inline __v1si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpadds32s (__v1si __A, __v1si __B)
+{
+  return __builtin_vis_fpadds32s (__A, __B);
+}
+
+extern __inline __v2si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsubs32 (__v2si __A, __v2si __B)
+{
+  return __builtin_vis_fpsubs32 (__A, __B);
+}
+
+extern __inline __v1si
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fpsubs32s (__v1si __A, __v1si __B)
+{
+  return __builtin_vis_fpsubs32s (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fucmple8 (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_fucmple8 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fucmpne8 (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_fucmpne8 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fucmpgt8 (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_fucmpgt8 (__A, __B);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fucmpeq8 (__v8qi __A, __v8qi __B)
+{
+  return __builtin_vis_fucmpeq8 (__A, __B);
+}
+
+extern __inline float
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fhadds (float __A, float __B)
+{
+  return __builtin_vis_fhadds (__A, __B);
+}
+
+extern __inline double
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fhaddd (double __A, double __B)
+{
+  return __builtin_vis_fhaddd (__A, __B);
+}
+
+extern __inline float
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fhsubs (float __A, float __B)
+{
+  return __builtin_vis_fhsubs (__A, __B);
+}
+
+extern __inline double
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fhsubd (double __A, double __B)
+{
+  return __builtin_vis_fhsubd (__A, __B);
+}
+
+extern __inline float
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fnhadds (float __A, float __B)
+{
+  return __builtin_vis_fnhadds (__A, __B);
+}
+
+extern __inline double
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_fnhaddd (double __A, double __B)
+{
+  return __builtin_vis_fnhaddd (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_umulxhi (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_umulxhi (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_xmulx (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_xmulx (__A, __B);
+}
+
+extern __inline __i64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_xmulxhi (__i64 __A, __i64 __B)
+{
+  return __builtin_vis_xmulxhi (__A, __B);
+}
+
+#endif /* __VIS__ >= 0x300 */
+
+#endif /* __VIS__ */
+
+#endif  /* _VISINTRIN_H_INCLUDED */
diff --git a/gcc-4.9/gcc/config/sparc/vxworks.h b/gcc-4.9/gcc/config/sparc/vxworks.h
new file mode 100644
index 000000000..9e093a664
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/vxworks.h
@@ -0,0 +1,57 @@
+/* Definitions of target machine for GNU compiler,
+   for SPARC targeting the VxWorks run time environment.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__sparc");		\
+      builtin_define ("CPU=SIMSPARCSOLARIS");	\
+      VXWORKS_OS_CPP_BUILTINS ();		\
+    }						\
+  while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+/* Use standard numbered ctors/dtors sections.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+/* We cannot use PC-relative accesses for VxWorks PIC because there is no
+   fixed gap between segments.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
diff --git a/gcc-4.9/gcc/config/sparc/x-sparc b/gcc-4.9/gcc/config/sparc/x-sparc
new file mode 100644
index 000000000..5fc65be68
--- /dev/null
+++ b/gcc-4.9/gcc/config/sparc/x-sparc
@@ -0,0 +1,4 @@
+driver-sparc.o: $(srcdir)/config/sparc/driver-sparc.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
diff --git a/gcc-4.9/gcc/config/spu/constraints.md b/gcc-4.9/gcc/config/spu/constraints.md
new file mode 100644
index 000000000..8fd325e19
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/constraints.md
@@ -0,0 +1,179 @@
+;; Constraint definitions for SPU
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;       ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+;; GCC:      ffffiiiiiiii     x x        x x   xxxx xx
+;; SPU:  xxxx    xxx xxxx xxxx x xxx xx x   xxx         xx
+;; FREE:     ffff   i    a          a  a  a        a  aa  aaa
+;; x - used
+;; a - available
+;; i - available for integer immediates
+;; f - available for floating point immediates
+
+;; For most immediate constraints we have 3 variations to deal with the
+;; fact const_int has no mode.  One variation treats const_int as 32 bit,
+;; another treats it as 64 bit, and the third sign extends it to 128 bit.
+
+(define_constraint "A"
+  "An immediate which can be loaded with the il/ila/ilh/ilhu instructions.  const_int is treated as a 32-bit value."
+  (ior (and (match_code "const_int,const_double,const_vector")
+	    (match_test "immediate_load_p (op, SImode)"))
+       (match_code "symbol_ref,label_ref,high,const")))
+
+(define_constraint "B"
+  "An immediate for arithmetic instructions (e.g., ai, ceqi).  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "arith_immediate_p (op, SImode, -0x200, 0x1ff)")))
+
+(define_constraint "C"
+  "An immediate for and/xor/or instructions.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "logical_immediate_p (op, SImode)")))
+
+(define_constraint "D"
+  "An immediate for iohl instruction.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "iohl_immediate_p (op, SImode)")))
+
+(define_constraint "U"
+  "An immediate which can be loaded with the il/ila/ilh/ilhu instructions.  const_int is sign extended to 128 bit."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "immediate_load_p (op, TImode)")))
+
+(define_constraint "W"
+  "An immediate for shift and rotate instructions.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "arith_immediate_p (op, SImode, -0x80000000ll, 0x7fffffffll)")))
+
+(define_constraint "Y"
+  "An immediate for and/xor/or instructions.  const_int is sign extended as a 128 bit."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "logical_immediate_p (op, TImode)")))
+
+(define_constraint "Z"
+  "An immediate for iohl instruction.  const_int is sign extended to 128 bit."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "iohl_immediate_p (op, TImode)")))
+
+(define_constraint "a"
+  "An immediate which can be loaded with the il/ila/ilh/ilhu instructions.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int")
+       (match_test "immediate_load_p (op, DImode)")))
+
+(define_constraint "c"
+  "An immediate for and/xor/or instructions.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int")
+       (match_test "logical_immediate_p (op, DImode)")))
+
+(define_constraint "d"
+  "An immediate for iohl instruction.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int")
+       (match_test "iohl_immediate_p (op, DImode)")))
+
+(define_constraint "f"
+  "An immediate which can be loaded with fsmbi."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "fsmbi_const_p (op)")))
+
+(define_constraint "j"
+  "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "cpat_const_p (op, SImode)")))
+
+(define_constraint "k"
+  "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "cpat_const_p (op, DImode)")))
+
+(define_constraint "l"
+  "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions."
+  (and (match_code "const_double,const_vector")
+       (match_test "cpat_const_p (op, TImode)")))
+
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A constant in the range [-64, 63] for shift/rotate instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x40 && ival <= 0x3f")))
+
+(define_constraint "J"
+  "An unsigned 7-bit constant for conversion/nop/channel instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 0x7f")))
+
+(define_constraint "K"
+  "A signed 10-bit constant for most arithmetic instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x200 && ival <= 0x1ff")))
+ 
+(define_constraint "M"
+  "A signed 16-bit immediate for @code{stop}."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x8000ll && ival <= 0x7fffll")))
+
+(define_constraint "N"
+  "An unsigned 16-bit constant for @code{iohl} and @code{fsmbi}."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 0xffff")))
+
+(define_constraint "O"
+  "An unsigned 7-bit constant whose 3 least significant bits are 0."
+  (and (match_code "const_int")
+       (match_test "(ival & 7) == 0")))
+
+(define_constraint "P"
+  "An unsigned 3-bit constant for 16-byte rotates and shifts"
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+
+;; Memory constraints
+
+(define_memory_constraint "R"
+  "Call operand, reg, for indirect calls"
+  (and (match_code "mem")
+       (match_test "GET_CODE(XEXP(op, 0)) == REG")))
+
+(define_memory_constraint "S"
+  "Call operand, symbol, for relative calls."
+  (and (match_code "mem")
+       (match_test "!TARGET_LARGE_MEM
+		    && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+			 || GET_CODE (XEXP (op, 0)) == LABEL_REF))")))
+
+(define_memory_constraint "T"
+  "Call operand, const_int, for absolute calls."
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == CONST_INT
+		    && INTVAL (XEXP (op, 0)) >= 0
+		    && INTVAL (XEXP (op, 0)) <= 0x3ffff")))
+
+
+;; Floating-point constant constraints.
+
+(define_constraint "v"
+  "Floating point power of 2 with exponent in [0..127]"
+  (and (match_code "const_double,const_vector")
+       (match_test "exp2_immediate_p (op, VOIDmode, 0, 127)")))
+
+(define_constraint "w"
+  "Floating point power of 2 with exponent in [-126..0]"
+  (and (match_code "const_double,const_vector")
+       (match_test "exp2_immediate_p (op, VOIDmode, -126, 0)")))
diff --git a/gcc-4.9/gcc/config/spu/predicates.md b/gcc-4.9/gcc/config/spu/predicates.md
new file mode 100644
index 000000000..d41f5fb58
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/predicates.md
@@ -0,0 +1,122 @@
+;; Predicate definitions for CELL SPU
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if operand is constant zero of its mode
+(define_predicate "const_zero_operand"
+  (and (match_code "const_int,const,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_predicate "const_one_operand"
+  (and (match_code "const_int,const,const_double,const_vector")
+       (match_test "op == CONST1_RTX (mode)")))
+
+(define_predicate "spu_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (ior (not (match_code "subreg"))
+            (match_test "valid_subreg (op)"))))
+
+(define_predicate "spu_nonimm_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (ior (not (match_code "subreg"))
+            (match_test "valid_subreg (op)"))))
+
+(define_predicate "spu_nonmem_operand"
+  (and (match_operand 0 "nonmemory_operand")
+       (ior (not (match_code "subreg"))
+            (match_test "valid_subreg (op)"))))
+
+(define_predicate "spu_mov_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "spu_nonmem_operand")))
+
+(define_predicate "spu_dest_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "spu_reg_operand")))
+
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op))
+		    || (satisfies_constraint_R (op)
+			&& REGNO (XEXP (op, 0)) != FRAME_POINTER_REGNUM
+			&& REGNO (XEXP (op, 0)) != ARG_POINTER_REGNUM
+			&& (REGNO (XEXP (op, 0)) < FIRST_PSEUDO_REGISTER
+			    || REGNO (XEXP (op, 0)) > LAST_VIRTUAL_REGISTER))")))
+
+(define_predicate "vec_imm_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "spu_legitimate_constant_p (mode, op)")))
+
+(define_predicate "spu_arith_operand"
+  (match_code "reg,subreg,const_int,const_vector")
+  {
+    if (spu_reg_operand (op, mode))
+      return 1;
+    if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_VECTOR)
+      return arith_immediate_p (op, mode, -0x200, 0x1ff);
+    return 0;
+  })
+
+(define_predicate "spu_logical_operand"
+  (match_code "reg,subreg,const_int,const_double,const_vector")
+  {
+    if (spu_reg_operand (op, mode))
+      return 1;
+    if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	|| GET_CODE (op) == CONST_VECTOR)
+      return logical_immediate_p (op, mode);
+    return 0;
+  })
+
+(define_predicate "spu_ior_operand"
+  (match_code "reg,subreg,const_int,const_double,const_vector")
+  {
+    if (spu_reg_operand (op, mode))
+      return 1;
+    if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	|| GET_CODE (op) == CONST_VECTOR)
+      return logical_immediate_p (op, mode)
+	     || iohl_immediate_p (op, mode);
+    return 0;
+  })
+
+(define_predicate "imm_K_operand"
+  (and (match_code "const_int")
+       (match_test "arith_immediate_p (op, mode, -0x200, 0x1ff)")))
+
+;; Return 1 if OP is a comparison operation that is valid for a branch insn.
+;; We only check the opcode against the mode of the register value here. 
+(define_predicate "branch_comparison_operator"
+  (and (match_code "eq,ne")
+       (ior (match_test "GET_MODE (XEXP (op, 0)) == HImode")
+	    (match_test "GET_MODE (XEXP (op, 0)) == SImode"))))
+
+(define_predicate "spu_inv_exp2_operand"
+  (and (match_code "const_double,const_vector")
+       (and (match_operand 0 "immediate_operand")
+	    (match_test "exp2_immediate_p (op, mode, -126, 0)"))))
+
+(define_predicate "spu_exp2_operand"
+  (and (match_code "const_double,const_vector")
+       (and (match_operand 0 "immediate_operand")
+	    (match_test "exp2_immediate_p (op, mode, 0, 127)"))))
+
+(define_predicate "shiftrt_operator"
+  (match_code "lshiftrt,ashiftrt"))
+
+(define_predicate "extend_operator"
+  (match_code "sign_extend,zero_extend"))
+
diff --git a/gcc-4.9/gcc/config/spu/spu-builtins.def b/gcc-4.9/gcc/config/spu/spu-builtins.def
new file mode 100644
index 000000000..babc008d3
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu-builtins.def
@@ -0,0 +1,781 @@
+/* Definitions of builtin functions for the Synergistic Processing Unit (SPU).  */
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* The first argument to these macros is the return type of the builtin,
+ * the rest are arguments of the builtin. */
+#define _A1(a)       {a, SPU_BTI_END_OF_PARAMS}
+#define _A2(a,b)     {a, b, SPU_BTI_END_OF_PARAMS}
+#define _A3(a,b,c)   {a, b, c, SPU_BTI_END_OF_PARAMS}
+#define _A4(a,b,c,d) {a, b, c, d, SPU_BTI_END_OF_PARAMS}
+
+/* definitions to support si intrinsic functions: (These and other builtin
+ * definitions must precede definitions of the overloaded generic intrinsics */
+
+DEF_BUILTIN (SI_LQD,         CODE_FOR_spu_lqd,       "si_lqd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10_4))
+DEF_BUILTIN (SI_LQX,         CODE_FOR_spu_lqx,       "si_lqx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_LQA,         CODE_FOR_spu_lqa,       "si_lqa",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U16_2))
+DEF_BUILTIN (SI_LQR,         CODE_FOR_spu_lqr,       "si_lqr",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_S16_2))
+DEF_BUILTIN (SI_STQD,        CODE_FOR_spu_stqd,      "si_stqd",        B_INSN,   _A4(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10_4))
+DEF_BUILTIN (SI_STQX,        CODE_FOR_spu_stqx,      "si_stqx",        B_INSN,   _A4(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_STQA,        CODE_FOR_spu_stqa,      "si_stqa",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_U16_2))
+DEF_BUILTIN (SI_STQR,        CODE_FOR_spu_stqr,      "si_stqr",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S16_2))
+DEF_BUILTIN (SI_CBD,         CODE_FOR_spu_cbx,       "si_cbd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CBX,         CODE_FOR_spu_cbx,       "si_cbx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CHD,         CODE_FOR_spu_chx,       "si_chd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CHX,         CODE_FOR_spu_chx,       "si_chx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CWD,         CODE_FOR_spu_cwx,       "si_cwd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CWX,         CODE_FOR_spu_cwx,       "si_cwx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CDD,         CODE_FOR_spu_cdx,       "si_cdd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CDX,         CODE_FOR_spu_cdx,       "si_cdx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ILH,         CODE_FOR_movv8hi,       "si_ilh",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_16))
+DEF_BUILTIN (SI_ILHU,        CODE_FOR_spu_ilhu,      "si_ilhu",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_16))
+DEF_BUILTIN (SI_IL,          CODE_FOR_movv4si,       "si_il",          B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_S16))
+DEF_BUILTIN (SI_ILA,         CODE_FOR_movv4si,       "si_ila",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U18))
+DEF_BUILTIN (SI_IOHL,        CODE_FOR_iorv4si3,      "si_iohl",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U16))
+DEF_BUILTIN (SI_FSMBI,       CODE_FOR_spu_fsmb,      "si_fsmbi",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_16))
+DEF_BUILTIN (SI_AH,          CODE_FOR_addv8hi3,      "si_ah",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AHI,         CODE_FOR_addv8hi3,      "si_ahi",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_A,           CODE_FOR_addv4si3,      "si_a",           B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AI,          CODE_FOR_addv4si3,      "si_ai",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ADDX,        CODE_FOR_addx_v4si,     "si_addx",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CG,          CODE_FOR_cg_v4si,       "si_cg",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGX,         CODE_FOR_cgx_v4si,      "si_cgx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SFH,         CODE_FOR_spu_sfh,       "si_sfh",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SFHI,        CODE_FOR_spu_sfh,       "si_sfhi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_SF,          CODE_FOR_spu_sf,        "si_sf",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SFI,         CODE_FOR_spu_sf,        "si_sfi",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_SFX,         CODE_FOR_spu_sfx,       "si_sfx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_BG,          CODE_FOR_spu_bg,        "si_bg",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_BGX,         CODE_FOR_spu_bgx,       "si_bgx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPY, CODE_FOR_vec_widen_smult_odd_v8hi, "si_mpy",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYU, CODE_FOR_vec_widen_umult_odd_v8hi, "si_mpyu",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYI, CODE_FOR_vec_widen_smult_odd_v8hi, "si_mpyi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_MPYUI, CODE_FOR_vec_widen_umult_odd_v8hi, "si_mpyui",  B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_MPYA,        CODE_FOR_spu_mpya,      "si_mpya",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYH,        CODE_FOR_spu_mpyh,      "si_mpyh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYS,        CODE_FOR_spu_mpys,      "si_mpys",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHH, CODE_FOR_vec_widen_smult_even_v8hi, "si_mpyhh", B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHU, CODE_FOR_vec_widen_umult_even_v8hi, "si_mpyhhu", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHA,      CODE_FOR_spu_mpyhha,    "si_mpyhha",      B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHAU,     CODE_FOR_spu_mpyhhau,   "si_mpyhhau",     B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLZ,         CODE_FOR_clzv4si2,      "si_clz",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CNTB,        CODE_FOR_cntb_v16qi,    "si_cntb",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSMB,        CODE_FOR_spu_fsmb,      "si_fsmb",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSMH,        CODE_FOR_spu_fsmh,      "si_fsmh",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSM,         CODE_FOR_spu_fsm,       "si_fsm",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_GBB,         CODE_FOR_spu_gbb,       "si_gbb",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_GBH,         CODE_FOR_spu_gbh,       "si_gbh",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_GB,          CODE_FOR_spu_gb,        "si_gb",          B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AVGB,        CODE_FOR_spu_avgb,      "si_avgb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ABSDB,       CODE_FOR_spu_absdb,     "si_absdb",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SUMB,        CODE_FOR_spu_sumb,      "si_sumb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XSBH,        CODE_FOR_spu_xsbh,      "si_xsbh",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XSHW,        CODE_FOR_spu_xshw,      "si_xshw",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XSWD,        CODE_FOR_spu_xswd,      "si_xswd",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AND,         CODE_FOR_andv16qi3,     "si_and",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ANDC,        CODE_FOR_andc_v16qi,    "si_andc",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ANDBI,       CODE_FOR_andv16qi3,     "si_andbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ANDHI,       CODE_FOR_andv8hi3,      "si_andhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ANDI,        CODE_FOR_andv4si3,      "si_andi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_OR,          CODE_FOR_iorv16qi3,     "si_or",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ORC,         CODE_FOR_orc_v16qi,     "si_orc",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ORBI,        CODE_FOR_iorv16qi3,     "si_orbi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ORHI,        CODE_FOR_iorv8hi3,      "si_orhi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ORI,         CODE_FOR_iorv4si3,      "si_ori",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ORX,         CODE_FOR_spu_orx,       "si_orx",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XOR,         CODE_FOR_xorv16qi3,     "si_xor",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XORBI,       CODE_FOR_xorv16qi3,     "si_xorbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_XORHI,       CODE_FOR_xorv8hi3,      "si_xorhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_XORI,        CODE_FOR_xorv4si3,      "si_xori",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_NAND,        CODE_FOR_nand_v16qi,    "si_nand",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_NOR,         CODE_FOR_nor_v16qi,     "si_nor",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_EQV,         CODE_FOR_eqv_v16qi,     "si_eqv",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SELB,        CODE_FOR_selb,          "si_selb",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHUFB,       CODE_FOR_shufb,         "si_shufb",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLH,        CODE_FOR_vashlv8hi3,     "si_shlh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLHI,       CODE_FOR_vashlv8hi3,     "si_shlhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHL,         CODE_FOR_vashlv4si3,     "si_shl",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLI,        CODE_FOR_vashlv4si3,     "si_shli",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLQBI,      CODE_FOR_shlqbi_ti,     "si_shlqbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLQBII,     CODE_FOR_shlqbi_ti,     "si_shlqbii",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLQBY,      CODE_FOR_shlqby_ti,     "si_shlqby",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLQBYI,     CODE_FOR_shlqby_ti,     "si_shlqbyi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLQBYBI,    CODE_FOR_shlqbybi_ti,   "si_shlqbybi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTH,        CODE_FOR_vrotlv8hi3,    "si_roth",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTHI,       CODE_FOR_vrotlv8hi3,    "si_rothi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROT,         CODE_FOR_vrotlv4si3,    "si_rot",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTI,        CODE_FOR_vrotlv4si3,    "si_roti",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQBY,      CODE_FOR_rotqby_ti,     "si_rotqby",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQBYI,     CODE_FOR_rotqby_ti,     "si_rotqbyi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQBYBI,    CODE_FOR_rotqbybi_ti,   "si_rotqbybi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQBI,      CODE_FOR_rotqbi_ti,     "si_rotqbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQBII,     CODE_FOR_rotqbi_ti,     "si_rotqbii",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTHM,       CODE_FOR_rotm_v8hi,     "si_rothm",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTHMI,      CODE_FOR_rotm_v8hi,     "si_rothmi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTM,        CODE_FOR_rotm_v4si,     "si_rotm",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMI,       CODE_FOR_rotm_v4si,     "si_rotmi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQMBY,     CODE_FOR_rotqmby_ti,    "si_rotqmby",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQMBYI,    CODE_FOR_rotqmby_ti,    "si_rotqmbyi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQMBI,     CODE_FOR_rotqmbi_ti,    "si_rotqmbi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQMBII,    CODE_FOR_rotqmbi_ti,    "si_rotqmbii",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQMBYBI,   CODE_FOR_rotqmbybi_ti,  "si_rotqmbybi",   B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMAH,      CODE_FOR_rotma_v8hi,    "si_rotmah",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMAHI,     CODE_FOR_rotma_v8hi,    "si_rotmahi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTMA,       CODE_FOR_rotma_v4si,    "si_rotma",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMAI,      CODE_FOR_rotma_v4si,    "si_rotmai",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_HEQ,         CODE_FOR_spu_heq,       "si_heq",         B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_HEQI,        CODE_FOR_spu_heq,       "si_heqi",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_HGT,         CODE_FOR_spu_hgt,       "si_hgt",         B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_HGTI,        CODE_FOR_spu_hgt,       "si_hgti",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_HLGT,        CODE_FOR_spu_hlgt,      "si_hlgt",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_HLGTI,       CODE_FOR_spu_hlgt,      "si_hlgti",       B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CEQB,        CODE_FOR_ceq_v16qi,     "si_ceqb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CEQBI,       CODE_FOR_ceq_v16qi,     "si_ceqbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CEQH,        CODE_FOR_ceq_v8hi,      "si_ceqh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CEQHI,       CODE_FOR_ceq_v8hi,      "si_ceqhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CEQ,         CODE_FOR_ceq_v4si,      "si_ceq",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CEQI,        CODE_FOR_ceq_v4si,      "si_ceqi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CGTB,        CODE_FOR_cgt_v16qi,     "si_cgtb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGTBI,       CODE_FOR_cgt_v16qi,     "si_cgtbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CGTH,        CODE_FOR_cgt_v8hi,      "si_cgth",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGTHI,       CODE_FOR_cgt_v8hi,      "si_cgthi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CGT,         CODE_FOR_cgt_v4si,      "si_cgt",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGTI,        CODE_FOR_cgt_v4si,      "si_cgti",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CLGTB,       CODE_FOR_clgt_v16qi,    "si_clgtb",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLGTBI,      CODE_FOR_clgt_v16qi,    "si_clgtbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CLGTH,       CODE_FOR_clgt_v8hi,     "si_clgth",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLGTHI,      CODE_FOR_clgt_v8hi,     "si_clgthi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CLGT,        CODE_FOR_clgt_v4si,     "si_clgt",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLGTI,       CODE_FOR_clgt_v4si,     "si_clgti",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_BISLED,      CODE_FOR_spu_bisled,    "si_bisled",      B_BISLED, _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_PTR))
+DEF_BUILTIN (SI_BISLEDD,     CODE_FOR_spu_bisledd,   "si_bisledd",     B_BISLED, _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_PTR))
+DEF_BUILTIN (SI_BISLEDE,     CODE_FOR_spu_bislede,   "si_bislede",     B_BISLED, _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_PTR))
+DEF_BUILTIN (SI_FA,          CODE_FOR_addv4sf3,      "si_fa",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFA,         CODE_FOR_addv2df3,      "si_dfa",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FS,          CODE_FOR_subv4sf3,      "si_fs",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFS,         CODE_FOR_subv2df3,      "si_dfs",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FM,          CODE_FOR_mulv4sf3,      "si_fm",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFM,         CODE_FOR_mulv2df3,      "si_dfm",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FMA,         CODE_FOR_fmav4sf4,      "si_fma",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFMA,        CODE_FOR_fmav2df4,      "si_dfma",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFNMA,       CODE_FOR_nfmav2df4,     "si_dfnma",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FNMS,        CODE_FOR_fnmav4sf4,     "si_fnms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFNMS,       CODE_FOR_nfmsv2df4,     "si_dfnms",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FMS,         CODE_FOR_fmsv4sf4,      "si_fms",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFMS,        CODE_FOR_fmsv2df4,      "si_dfms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FREST,       CODE_FOR_frest_v4sf,    "si_frest",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FRSQEST,     CODE_FOR_frsqest_v4sf,  "si_frsqest",     B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FI,          CODE_FOR_fi_v4sf,       "si_fi",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CSFLT,       CODE_FOR_spu_csflt,     "si_csflt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_CFLTS,       CODE_FOR_spu_cflts,     "si_cflts",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_CUFLT,       CODE_FOR_spu_cuflt,     "si_cuflt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_CFLTU,       CODE_FOR_spu_cfltu,     "si_cfltu",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_FRDS,        CODE_FOR_spu_frds,      "si_frds",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FESD,        CODE_FOR_spu_fesd,      "si_fesd",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCEQ,        CODE_FOR_ceq_v4sf,      "si_fceq",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCEQ,       CODE_FOR_ceq_v2df,      "si_dfceq",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCMEQ,       CODE_FOR_cmeq_v4sf,     "si_fcmeq",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMEQ,      CODE_FOR_cmeq_v2df,     "si_dfcmeq",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCGT,        CODE_FOR_cgt_v4sf,      "si_fcgt",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCGT,       CODE_FOR_cgt_v2df,      "si_dfcgt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCMGT,       CODE_FOR_cmgt_v4sf,     "si_fcmgt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMGT,      CODE_FOR_cmgt_v2df,     "si_dfcmgt",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFTSV,       CODE_FOR_dftsv,         "si_dftsv",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_STOP,        CODE_FOR_spu_stop,      "si_stop",        B_INSN,   _A2(SPU_BTI_VOID,     SPU_BTI_U14))
+DEF_BUILTIN (SI_STOPD,       CODE_FOR_spu_stopd,     "si_stopd",       B_INSN,   _A4(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_LNOP,        CODE_FOR_lnop,          "si_lnop",        B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_NOP,         CODE_FOR_nop,           "si_nop",         B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_SYNC,        CODE_FOR_sync,          "si_sync",        B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_SYNCC,       CODE_FOR_syncc,         "si_syncc",       B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_DSYNC,       CODE_FOR_dsync,         "si_dsync",       B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_MFSPR,       CODE_FOR_spu_mfspr,     "si_mfspr",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_MTSPR,       CODE_FOR_spu_mtspr,     "si_mtspr",       B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_U7,       SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSCRRD,      CODE_FOR_spu_fscrrd,    "si_fscrrd",      B_INSN,   _A1(SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSCRWR,      CODE_FOR_spu_fscrwr,    "si_fscrwr",      B_INSN,   _A2(SPU_BTI_VOID,     SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_RDCH,        CODE_FOR_spu_rdch,      "si_rdch",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_RCHCNT,      CODE_FOR_spu_rchcnt,    "si_rchcnt",      B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_WRCH,        CODE_FOR_spu_wrch,      "si_wrch",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_U7,       SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_CHAR,     CODE_FOR_spu_convert,   "si_to_char",     B_INSN,   _A2(SPU_BTI_INTQI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_UCHAR,    CODE_FOR_spu_convert,   "si_to_uchar",    B_INSN,   _A2(SPU_BTI_UINTQI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_SHORT,    CODE_FOR_spu_convert,   "si_to_short",    B_INSN,   _A2(SPU_BTI_INTHI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_USHORT,   CODE_FOR_spu_convert,   "si_to_ushort",   B_INSN,   _A2(SPU_BTI_UINTHI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_INT,      CODE_FOR_spu_convert,   "si_to_int",      B_INSN,   _A2(SPU_BTI_INTSI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_UINT,     CODE_FOR_spu_convert,   "si_to_uint",     B_INSN,   _A2(SPU_BTI_UINTSI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_LONG,     CODE_FOR_spu_convert,   "si_to_long",     B_INSN,   _A2(SPU_BTI_INTDI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_ULONG,    CODE_FOR_spu_convert,   "si_to_ulong",    B_INSN,   _A2(SPU_BTI_UINTDI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_FLOAT,    CODE_FOR_spu_convert,   "si_to_float",    B_INSN,   _A2(SPU_BTI_FLOAT,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_DOUBLE,   CODE_FOR_spu_convert,   "si_to_double",   B_INSN,   _A2(SPU_BTI_DOUBLE,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_PTR,      CODE_FOR_spu_convert,   "si_to_ptr",      B_INSN,   _A2(SPU_BTI_PTR,      SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FROM_CHAR,   CODE_FOR_spu_convert,   "si_from_char",   B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTQI))
+DEF_BUILTIN (SI_FROM_UCHAR,  CODE_FOR_spu_convert,   "si_from_uchar",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTQI))
+DEF_BUILTIN (SI_FROM_SHORT,  CODE_FOR_spu_convert,   "si_from_short",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTHI))
+DEF_BUILTIN (SI_FROM_USHORT, CODE_FOR_spu_convert,   "si_from_ushort", B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTHI))
+DEF_BUILTIN (SI_FROM_INT,    CODE_FOR_spu_convert,   "si_from_int",    B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTSI))
+DEF_BUILTIN (SI_FROM_UINT,   CODE_FOR_spu_convert,   "si_from_uint",   B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTSI))
+DEF_BUILTIN (SI_FROM_LONG,   CODE_FOR_spu_convert,   "si_from_long",   B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTDI))
+DEF_BUILTIN (SI_FROM_ULONG,  CODE_FOR_spu_convert,   "si_from_ulong",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTDI))
+DEF_BUILTIN (SI_FROM_FLOAT,  CODE_FOR_spu_convert,   "si_from_float",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_FLOAT))
+DEF_BUILTIN (SI_FROM_DOUBLE, CODE_FOR_spu_convert,   "si_from_double", B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_DOUBLE))
+DEF_BUILTIN (SI_FROM_PTR,    CODE_FOR_spu_convert,   "si_from_ptr",    B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_PTR))
+
+/* definitions to support generic builtin functions: */
+
+DEF_BUILTIN (SPU_CONVTS,     CODE_FOR_spu_cflts,      "spu_convts",     B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CONVTU,     CODE_FOR_spu_cfltu,      "spu_convtu",     B_INSN,     _A3(SPU_BTI_UV4SI,    SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ROUNDTF,    CODE_FOR_spu_frds,       "spu_roundtf",    B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MULH,       CODE_FOR_spu_mpyh,       "spu_mulh",       B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MULSR,      CODE_FOR_spu_mpys,       "spu_mulsr",      B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_FREST,      CODE_FOR_frest_v4sf,     "spu_frest",      B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_FRSQEST,    CODE_FOR_frsqest_v4sf,   "spu_frsqest",    B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NMADD,      CODE_FOR_nfmav2df4,      "spu_nmadd",      B_INSN,     _A4(SPU_BTI_V2DF,     SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_ABSD,       CODE_FOR_spu_absdb,      "spu_absd",       B_INSN,     _A3(SPU_BTI_UV16QI,   SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_AVG,        CODE_FOR_spu_avgb,       "spu_avg",        B_INSN,     _A3(SPU_BTI_UV16QI,   SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SUMB,       CODE_FOR_spu_sumb,       "spu_sumb",       B_INSN,     _A3(SPU_BTI_UV8HI,    SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_BISLED,     CODE_FOR_spu_bisled,     "spu_bisled",     B_BISLED,   _A3(SPU_BTI_VOID,    SPU_BTI_PTR,   SPU_BTI_PTR))
+DEF_BUILTIN (SPU_BISLED_D,   CODE_FOR_spu_bisledd,    "spu_bisled_d",   B_BISLED,   _A3(SPU_BTI_VOID,    SPU_BTI_PTR,   SPU_BTI_PTR))
+DEF_BUILTIN (SPU_BISLED_E,   CODE_FOR_spu_bislede,    "spu_bisled_e",   B_BISLED,   _A3(SPU_BTI_VOID,    SPU_BTI_PTR,   SPU_BTI_PTR))
+DEF_BUILTIN (SPU_IDISABLE,   CODE_FOR_spu_idisable,   "spu_idisable",   B_INSN,     _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_IENABLE,    CODE_FOR_spu_ienable,    "spu_ienable",    B_INSN,     _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASK_FOR_LOAD,    CODE_FOR_spu_lvsr, "spu_lvsr",       B_INSN,     _A2(SPU_BTI_V16QI, SPU_BTI_PTR))
+DEF_BUILTIN (SPU_TESTSV,     CODE_FOR_dftsv,          "spu_testsv",     B_INSN,     _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7))
+
+/* definitions to support overloaded generic builtin functions:  */
+
+DEF_BUILTIN (SPU_CONVTF,           CODE_FOR_nothing,       "spu_convtf",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CONVTF_0,         CODE_FOR_spu_cuflt,     "spu_convtf_0",         B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CONVTF_1,         CODE_FOR_spu_csflt,     "spu_convtf_1",         B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_EXTEND,           CODE_FOR_nothing,       "spu_extend",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_EXTEND_0,         CODE_FOR_spu_xsbh,      "spu_extend_0",         B_INTERNAL, _A2(SPU_BTI_V8HI,   SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_EXTEND_1,         CODE_FOR_spu_xshw,      "spu_extend_1",         B_INTERNAL, _A2(SPU_BTI_V4SI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_EXTEND_2,         CODE_FOR_spu_xswd,      "spu_extend_2",         B_INTERNAL, _A2(SPU_BTI_V2DI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_EXTEND_3,         CODE_FOR_spu_fesd,      "spu_extend_3",         B_INTERNAL, _A2(SPU_BTI_V2DF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ADD,              CODE_FOR_nothing,       "spu_add",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ADD_0,            CODE_FOR_addv4si3,      "spu_add_0",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_ADD_1,            CODE_FOR_addv4si3,      "spu_add_1",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ADD_2,            CODE_FOR_addv8hi3,      "spu_add_2",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_ADD_3,            CODE_FOR_addv8hi3,      "spu_add_3",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_ADD_4,            CODE_FOR_addv4sf3,      "spu_add_4",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ADD_5,            CODE_FOR_addv2df3,      "spu_add_5",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_ADD_6,            CODE_FOR_addv8hi3,      "spu_add_6",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_ADD_7,            CODE_FOR_addv8hi3,      "spu_add_7",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_ADD_8,            CODE_FOR_addv4si3,      "spu_add_8",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_ADD_9,            CODE_FOR_addv4si3,      "spu_add_9",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ADDX,             CODE_FOR_nothing,       "spu_addx",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ADDX_0,           CODE_FOR_addx_v4si,     "spu_addx_0",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ADDX_1,           CODE_FOR_addx_v4si,     "spu_addx_1",           B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENC,             CODE_FOR_nothing,       "spu_genc",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENC_0,           CODE_FOR_cg_v4si,       "spu_genc_0",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENC_1,           CODE_FOR_cg_v4si,       "spu_genc_1",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENCX,            CODE_FOR_nothing,       "spu_gencx",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENCX_0,          CODE_FOR_cgx_v4si,      "spu_gencx_0",          B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENCX_1,          CODE_FOR_cgx_v4si,      "spu_gencx_1",          B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_MADD,             CODE_FOR_nothing,       "spu_madd",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MADD_0,           CODE_FOR_spu_mpya,      "spu_madd_0",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_MADD_1,           CODE_FOR_fmav4sf4,      "spu_madd_1",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MADD_2,           CODE_FOR_fmav2df4,      "spu_madd_2",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MSUB,             CODE_FOR_nothing,       "spu_msub",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MSUB_0,           CODE_FOR_fmsv4sf4,      "spu_msub_0",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MSUB_1,           CODE_FOR_fmsv2df4,      "spu_msub_1",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MHHADD,           CODE_FOR_nothing,       "spu_mhhadd",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MHHADD_0,         CODE_FOR_spu_mpyhhau,   "spu_mhhadd_0",         B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_MHHADD_1,         CODE_FOR_spu_mpyhha,    "spu_mhhadd_1",         B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_MULE,             CODE_FOR_nothing,       "spu_mule",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MULE_0, CODE_FOR_vec_widen_umult_even_v8hi, "spu_mule_0",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_MULE_1, CODE_FOR_vec_widen_smult_even_v8hi, "spu_mule_1",         B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MUL,              CODE_FOR_nothing,       "spu_mul",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MUL_0,            CODE_FOR_mulv4sf3,      "spu_mul_0",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MUL_1,            CODE_FOR_mulv2df3,      "spu_mul_1",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MULO,             CODE_FOR_nothing,       "spu_mulo",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MULO_0, CODE_FOR_vec_widen_smult_odd_v8hi, "spu_mulo_0",          B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MULO_1, CODE_FOR_vec_widen_umult_odd_v8hi, "spu_mulo_1",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_MULO_2, CODE_FOR_vec_widen_smult_odd_v8hi, "spu_mulo_2",          B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MULO_3, CODE_FOR_vec_widen_umult_odd_v8hi, "spu_mulo_3",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_NMSUB,            CODE_FOR_nothing,       "spu_nmsub",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_NMSUB_0,          CODE_FOR_fnmav4sf4,     "spu_nmsub_0",          B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NMSUB_1,          CODE_FOR_nfmsv2df4,     "spu_nmsub_1",          B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_SUB,              CODE_FOR_nothing,       "spu_sub",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SUB_0,            CODE_FOR_subv8hi3,      "spu_sub_0",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SUB_1,            CODE_FOR_subv8hi3,      "spu_sub_1",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_SUB_2,            CODE_FOR_subv4si3,      "spu_sub_2",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SUB_3,            CODE_FOR_subv4si3,      "spu_sub_3",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_SUB_4,            CODE_FOR_subv4sf3,      "spu_sub_4",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_SUB_5,            CODE_FOR_subv2df3,      "spu_sub_5",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_SUB_6,            CODE_FOR_subv8hi3,      "spu_sub_6",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UINTHI, SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SUB_7,            CODE_FOR_subv8hi3,      "spu_sub_7",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_INTHI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_SUB_8,            CODE_FOR_subv4si3,      "spu_sub_8",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UINTSI, SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SUB_9,            CODE_FOR_subv4si3,      "spu_sub_9",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_INTSI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_SUBX,             CODE_FOR_nothing,       "spu_subx",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SUBX_0,           CODE_FOR_sfx_v4si,      "spu_subx_0",           B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SUBX_1,           CODE_FOR_sfx_v4si,      "spu_subx_1",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENB,             CODE_FOR_nothing,       "spu_genb",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENB_0,           CODE_FOR_bg_v4si,       "spu_genb_0",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENB_1,           CODE_FOR_bg_v4si,       "spu_genb_1",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENBX,            CODE_FOR_nothing,       "spu_genbx",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENBX_0,          CODE_FOR_bgx_v4si,      "spu_genbx_0",          B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENBX_1,          CODE_FOR_bgx_v4si,      "spu_genbx_1",          B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CMPEQ,            CODE_FOR_nothing,       "spu_cmpeq",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPEQ_0,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_0",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_CMPEQ_1,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_1",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_CMPEQ_2,          CODE_FOR_ceq_v8hi,      "spu_cmpeq_2",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_CMPEQ_3,          CODE_FOR_ceq_v8hi,      "spu_cmpeq_3",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_CMPEQ_4,          CODE_FOR_ceq_v4si,      "spu_cmpeq_4",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_CMPEQ_5,          CODE_FOR_ceq_v4si,      "spu_cmpeq_5",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CMPEQ_6,          CODE_FOR_ceq_v4sf,      "spu_cmpeq_6",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPEQ_7,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_7",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_CMPEQ_8,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_8",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_CMPEQ_9,          CODE_FOR_ceq_v8hi,      "spu_cmpeq_9",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_CMPEQ_10,         CODE_FOR_ceq_v8hi,      "spu_cmpeq_10",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_CMPEQ_11,         CODE_FOR_ceq_v4si,      "spu_cmpeq_11",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CMPEQ_12,         CODE_FOR_ceq_v4si,      "spu_cmpeq_12",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CMPEQ_13,         CODE_FOR_ceq_v2df,      "spu_cmpeq_13",         B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSEQ,         CODE_FOR_nothing,       "spu_cmpabseq",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSEQ_0,       CODE_FOR_cmeq_v4sf,     "spu_cmpabseq_0",       B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSEQ_1,       CODE_FOR_cmeq_v2df,     "spu_cmpabseq_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPGT,            CODE_FOR_nothing,       "spu_cmpgt",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPGT_0,          CODE_FOR_clgt_v16qi,    "spu_cmpgt_0",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_CMPGT_1,          CODE_FOR_cgt_v16qi,     "spu_cmpgt_1",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_CMPGT_2,          CODE_FOR_clgt_v8hi,     "spu_cmpgt_2",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_CMPGT_3,          CODE_FOR_cgt_v8hi,      "spu_cmpgt_3",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_CMPGT_4,          CODE_FOR_clgt_v4si,     "spu_cmpgt_4",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_CMPGT_5,          CODE_FOR_cgt_v4si,      "spu_cmpgt_5",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CMPGT_6,          CODE_FOR_cgt_v4sf,      "spu_cmpgt_6",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPGT_7,          CODE_FOR_clgt_v16qi,    "spu_cmpgt_7",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_CMPGT_8,          CODE_FOR_cgt_v16qi,     "spu_cmpgt_8",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_CMPGT_9,          CODE_FOR_clgt_v8hi,     "spu_cmpgt_9",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_CMPGT_10,         CODE_FOR_cgt_v8hi,      "spu_cmpgt_10",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_CMPGT_11,         CODE_FOR_cgt_v4si,      "spu_cmpgt_11",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CMPGT_12,         CODE_FOR_clgt_v4si,     "spu_cmpgt_12",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CMPGT_13,         CODE_FOR_cgt_v2df,      "spu_cmpgt_13",         B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSGT,         CODE_FOR_nothing,       "spu_cmpabsgt",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSGT_0,       CODE_FOR_cmgt_v4sf,     "spu_cmpabsgt_0",       B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSGT_1,       CODE_FOR_cmgt_v2df,     "spu_cmpabsgt_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_HCMPEQ,           CODE_FOR_nothing,       "spu_hcmpeq",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_HCMPEQ_0,         CODE_FOR_spu_heq,       "spu_hcmpeq_0",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_INTSI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_HCMPEQ_1,         CODE_FOR_spu_heq,       "spu_hcmpeq_1",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_UINTSI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_HCMPGT,           CODE_FOR_nothing,       "spu_hcmpgt",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_HCMPGT_0,         CODE_FOR_spu_hgt,       "spu_hcmpgt_0",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_INTSI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_HCMPGT_1,         CODE_FOR_spu_hlgt,      "spu_hcmpgt_1",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_UINTSI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CNTB,             CODE_FOR_nothing,       "spu_cntb",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CNTB_0,           CODE_FOR_cntb_v16qi,    "spu_cntb_0",           B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_CNTB_1,           CODE_FOR_cntb_v16qi,    "spu_cntb_1",           B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_CNTLZ,            CODE_FOR_nothing,       "spu_cntlz",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CNTLZ_0,          CODE_FOR_clzv4si2,      "spu_cntlz_0",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CNTLZ_1,          CODE_FOR_clzv4si2,      "spu_cntlz_1",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_CNTLZ_2,          CODE_FOR_clzv4si2,      "spu_cntlz_2",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_GATHER,           CODE_FOR_nothing,       "spu_gather",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GATHER_0,         CODE_FOR_spu_gb,        "spu_gather_0",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GATHER_1,         CODE_FOR_spu_gb,        "spu_gather_1",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GATHER_2,         CODE_FOR_spu_gbh,       "spu_gather_2",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_GATHER_3,         CODE_FOR_spu_gbh,       "spu_gather_3",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_GATHER_4,         CODE_FOR_spu_gbb,       "spu_gather_4",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_GATHER_5,         CODE_FOR_spu_gbb,       "spu_gather_5",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_GATHER_6,         CODE_FOR_spu_gb,        "spu_gather_6",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MASKB,            CODE_FOR_nothing,       "spu_maskb",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASKB_0,          CODE_FOR_spu_fsmb,      "spu_maskb_0",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MASKB_1,          CODE_FOR_spu_fsmb,      "spu_maskb_1",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MASKB_2,          CODE_FOR_spu_fsmb,      "spu_maskb_2",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_MASKB_3,          CODE_FOR_spu_fsmb,      "spu_maskb_3",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_MASKH,            CODE_FOR_nothing,       "spu_maskh",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASKH_0,          CODE_FOR_spu_fsmh,      "spu_maskh_0",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_MASKH_1,          CODE_FOR_spu_fsmh,      "spu_maskh_1",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_MASKH_2,          CODE_FOR_spu_fsmh,      "spu_maskh_2",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MASKH_3,          CODE_FOR_spu_fsmh,      "spu_maskh_3",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MASKH_4,          CODE_FOR_spu_fsmh,      "spu_maskh_4",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_MASKH_5,          CODE_FOR_spu_fsmh,      "spu_maskh_5",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_MASKW,            CODE_FOR_nothing,       "spu_maskw",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASKW_0,          CODE_FOR_spu_fsm,       "spu_maskw_0",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_MASKW_1,          CODE_FOR_spu_fsm,       "spu_maskw_1",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_MASKW_2,          CODE_FOR_spu_fsm,       "spu_maskw_2",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MASKW_3,          CODE_FOR_spu_fsm,       "spu_maskw_3",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MASKW_4,          CODE_FOR_spu_fsm,       "spu_maskw_4",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_MASKW_5,          CODE_FOR_spu_fsm,       "spu_maskw_5",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_SEL,              CODE_FOR_nothing,       "spu_sel",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SEL_0,            CODE_FOR_selb,          "spu_sel_0",            B_INTERNAL, _A4(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_SEL_1,            CODE_FOR_selb,          "spu_sel_1",            B_INTERNAL, _A4(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_SEL_2,            CODE_FOR_selb,          "spu_sel_2",            B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SEL_3,            CODE_FOR_selb,          "spu_sel_3",            B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SEL_4,            CODE_FOR_selb,          "spu_sel_4",            B_INTERNAL, _A4(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SEL_5,            CODE_FOR_selb,          "spu_sel_5",            B_INTERNAL, _A4(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SEL_6,            CODE_FOR_selb,          "spu_sel_6",            B_INTERNAL, _A4(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SEL_7,            CODE_FOR_selb,          "spu_sel_7",            B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SEL_8,            CODE_FOR_selb,          "spu_sel_8",            B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SEL_9,            CODE_FOR_selb,          "spu_sel_9",            B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_SHUFFLE,          CODE_FOR_nothing,       "spu_shuffle",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SHUFFLE_0,        CODE_FOR_shufb,         "spu_shuffle_0",        B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_1,        CODE_FOR_shufb,         "spu_shuffle_1",        B_INTERNAL, _A4(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_2,        CODE_FOR_shufb,         "spu_shuffle_2",        B_INTERNAL, _A4(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_3,        CODE_FOR_shufb,         "spu_shuffle_3",        B_INTERNAL, _A4(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_4,        CODE_FOR_shufb,         "spu_shuffle_4",        B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_5,        CODE_FOR_shufb,         "spu_shuffle_5",        B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_6,        CODE_FOR_shufb,         "spu_shuffle_6",        B_INTERNAL, _A4(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_7,        CODE_FOR_shufb,         "spu_shuffle_7",        B_INTERNAL, _A4(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_8,        CODE_FOR_shufb,         "spu_shuffle_8",        B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_9,        CODE_FOR_shufb,         "spu_shuffle_9",        B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_AND,              CODE_FOR_nothing,       "spu_and",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_AND_0,            CODE_FOR_andv16qi3,     "spu_and_0",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_AND_1,            CODE_FOR_andv16qi3,     "spu_and_1",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_AND_2,            CODE_FOR_andv8hi3,      "spu_and_2",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_AND_3,            CODE_FOR_andv8hi3,      "spu_and_3",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_AND_4,            CODE_FOR_andv4si3,      "spu_and_4",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_AND_5,            CODE_FOR_andv4si3,      "spu_and_5",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_AND_6,            CODE_FOR_andv2di3,      "spu_and_6",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_AND_7,            CODE_FOR_andv2di3,      "spu_and_7",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_AND_8,            CODE_FOR_andv4si3,      "spu_and_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_AND_9,            CODE_FOR_andv2di3,      "spu_and_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_AND_10,           CODE_FOR_andv16qi3,     "spu_and_10",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_AND_11,           CODE_FOR_andv16qi3,     "spu_and_11",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_AND_12,           CODE_FOR_andv8hi3,      "spu_and_12",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_AND_13,           CODE_FOR_andv8hi3,      "spu_and_13",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_AND_14,           CODE_FOR_andv4si3,      "spu_and_14",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_AND_15,           CODE_FOR_andv4si3,      "spu_and_15",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ANDC,             CODE_FOR_nothing,       "spu_andc",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ANDC_0,           CODE_FOR_andc_v2di,     "spu_andc_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_ANDC_1,           CODE_FOR_andc_v2di,     "spu_andc_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_ANDC_2,           CODE_FOR_andc_v4si,     "spu_andc_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ANDC_3,           CODE_FOR_andc_v4si,     "spu_andc_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_ANDC_4,           CODE_FOR_andc_v8hi,     "spu_andc_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_ANDC_5,           CODE_FOR_andc_v8hi,     "spu_andc_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_ANDC_6,           CODE_FOR_andc_v16qi,    "spu_andc_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_ANDC_7,           CODE_FOR_andc_v16qi,    "spu_andc_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_ANDC_8,           CODE_FOR_andc_v4si,     "spu_andc_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ANDC_9,           CODE_FOR_andc_v2di,     "spu_andc_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_EQV,              CODE_FOR_nothing,       "spu_eqv",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_EQV_0,            CODE_FOR_eqv_v2di,      "spu_eqv_0",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_EQV_1,            CODE_FOR_eqv_v2di,      "spu_eqv_1",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_EQV_2,            CODE_FOR_eqv_v4si,      "spu_eqv_2",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_EQV_3,            CODE_FOR_eqv_v4si,      "spu_eqv_3",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_EQV_4,            CODE_FOR_eqv_v8hi,      "spu_eqv_4",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_EQV_5,            CODE_FOR_eqv_v8hi,      "spu_eqv_5",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_EQV_6,            CODE_FOR_eqv_v16qi,     "spu_eqv_6",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_EQV_7,            CODE_FOR_eqv_v16qi,     "spu_eqv_7",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_EQV_8,            CODE_FOR_eqv_v4si,      "spu_eqv_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_EQV_9,            CODE_FOR_eqv_v2di,      "spu_eqv_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_NAND,             CODE_FOR_nothing,       "spu_nand",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_NAND_0,           CODE_FOR_nand_v2di,     "spu_nand_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_NAND_1,           CODE_FOR_nand_v2di,     "spu_nand_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_NAND_2,           CODE_FOR_nand_v4si,     "spu_nand_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_NAND_3,           CODE_FOR_nand_v4si,     "spu_nand_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_NAND_4,           CODE_FOR_nand_v8hi,     "spu_nand_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_NAND_5,           CODE_FOR_nand_v8hi,     "spu_nand_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_NAND_6,           CODE_FOR_nand_v16qi,    "spu_nand_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_NAND_7,           CODE_FOR_nand_v16qi,    "spu_nand_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_NAND_8,           CODE_FOR_nand_v4si,     "spu_nand_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NAND_9,           CODE_FOR_nand_v2di,     "spu_nand_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_NOR,              CODE_FOR_nothing,       "spu_nor",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_NOR_0,            CODE_FOR_nor_v2di,      "spu_nor_0",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_NOR_1,            CODE_FOR_nor_v2di,      "spu_nor_1",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_NOR_2,            CODE_FOR_nor_v4si,      "spu_nor_2",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_NOR_3,            CODE_FOR_nor_v4si,      "spu_nor_3",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_NOR_4,            CODE_FOR_nor_v8hi,      "spu_nor_4",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_NOR_5,            CODE_FOR_nor_v8hi,      "spu_nor_5",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_NOR_6,            CODE_FOR_nor_v16qi,     "spu_nor_6",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_NOR_7,            CODE_FOR_nor_v16qi,     "spu_nor_7",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_NOR_8,            CODE_FOR_nor_v4si,      "spu_nor_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NOR_9,            CODE_FOR_nor_v2di,      "spu_nor_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_OR,               CODE_FOR_nothing,       "spu_or",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_OR_0,             CODE_FOR_iorv16qi3,     "spu_or_0",             B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_OR_1,             CODE_FOR_iorv16qi3,     "spu_or_1",             B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_OR_2,             CODE_FOR_iorv8hi3,      "spu_or_2",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_OR_3,             CODE_FOR_iorv8hi3,      "spu_or_3",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_OR_4,             CODE_FOR_iorv4si3,      "spu_or_4",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_OR_5,             CODE_FOR_iorv4si3,      "spu_or_5",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_OR_6,             CODE_FOR_iorv2di3,      "spu_or_6",             B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_OR_7,             CODE_FOR_iorv2di3,      "spu_or_7",             B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_OR_8,             CODE_FOR_iorv4si3,      "spu_or_8",             B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_OR_9,             CODE_FOR_iorv2di3,      "spu_or_9",             B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_OR_10,            CODE_FOR_iorv16qi3,     "spu_or_10",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_OR_11,            CODE_FOR_iorv16qi3,     "spu_or_11",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_OR_12,            CODE_FOR_iorv8hi3,      "spu_or_12",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_OR_13,            CODE_FOR_iorv8hi3,      "spu_or_13",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_OR_14,            CODE_FOR_iorv4si3,      "spu_or_14",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_OR_15,            CODE_FOR_iorv4si3,      "spu_or_15",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ORC,              CODE_FOR_nothing,       "spu_orc",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ORC_0,            CODE_FOR_orc_v2di,      "spu_orc_0",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_ORC_1,            CODE_FOR_orc_v2di,      "spu_orc_1",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_ORC_2,            CODE_FOR_orc_v4si,      "spu_orc_2",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ORC_3,            CODE_FOR_orc_v4si,      "spu_orc_3",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_ORC_4,            CODE_FOR_orc_v8hi,      "spu_orc_4",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_ORC_5,            CODE_FOR_orc_v8hi,      "spu_orc_5",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_ORC_6,            CODE_FOR_orc_v16qi,     "spu_orc_6",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_ORC_7,            CODE_FOR_orc_v16qi,     "spu_orc_7",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_ORC_8,            CODE_FOR_orc_v4si,      "spu_orc_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ORC_9,            CODE_FOR_orc_v2di,      "spu_orc_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_ORX,              CODE_FOR_nothing,       "spu_orx",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ORX_0,            CODE_FOR_spu_orx,       "spu_orx_0",            B_INTERNAL, _A2(SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ORX_1,            CODE_FOR_spu_orx,       "spu_orx_1",            B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_XOR,              CODE_FOR_nothing,       "spu_xor",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_XOR_0,            CODE_FOR_xorv16qi3,     "spu_xor_0",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_XOR_1,            CODE_FOR_xorv16qi3,     "spu_xor_1",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_XOR_2,            CODE_FOR_xorv8hi3,      "spu_xor_2",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_XOR_3,            CODE_FOR_xorv8hi3,      "spu_xor_3",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_XOR_4,            CODE_FOR_xorv4si3,      "spu_xor_4",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_XOR_5,            CODE_FOR_xorv4si3,      "spu_xor_5",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_XOR_6,            CODE_FOR_xorv2di3,      "spu_xor_6",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_XOR_7,            CODE_FOR_xorv2di3,      "spu_xor_7",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_XOR_8,            CODE_FOR_xorv4si3,      "spu_xor_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_XOR_9,            CODE_FOR_xorv2di3,      "spu_xor_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_XOR_10,           CODE_FOR_xorv16qi3,     "spu_xor_10",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_XOR_11,           CODE_FOR_xorv16qi3,     "spu_xor_11",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_XOR_12,           CODE_FOR_xorv8hi3,      "spu_xor_12",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_XOR_13,           CODE_FOR_xorv8hi3,      "spu_xor_13",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_XOR_14,           CODE_FOR_xorv4si3,      "spu_xor_14",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_XOR_15,           CODE_FOR_xorv4si3,      "spu_xor_15",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RL,               CODE_FOR_nothing,       "spu_rl",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RL_0,             CODE_FOR_vrotlv8hi3,    "spu_rl_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RL_1,             CODE_FOR_vrotlv8hi3,    "spu_rl_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RL_2,             CODE_FOR_vrotlv4si3,    "spu_rl_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RL_3,             CODE_FOR_vrotlv4si3,    "spu_rl_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RL_4,             CODE_FOR_vrotlv8hi3,    "spu_rl_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_RL_5,             CODE_FOR_vrotlv8hi3,    "spu_rl_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_RL_6,             CODE_FOR_vrotlv4si3,    "spu_rl_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RL_7,             CODE_FOR_vrotlv4si3,    "spu_rl_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW,             CODE_FOR_nothing,       "spu_rlqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLQW_0,           CODE_FOR_rotqbi_ti,     "spu_rlqw_0",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_1,           CODE_FOR_rotqbi_ti,     "spu_rlqw_1",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_2,           CODE_FOR_rotqbi_ti,     "spu_rlqw_2",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_3,           CODE_FOR_rotqbi_ti,     "spu_rlqw_3",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_4,           CODE_FOR_rotqbi_ti,     "spu_rlqw_4",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_5,           CODE_FOR_rotqbi_ti,     "spu_rlqw_5",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_6,           CODE_FOR_rotqbi_ti,     "spu_rlqw_6",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_7,           CODE_FOR_rotqbi_ti,     "spu_rlqw_7",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_8,           CODE_FOR_rotqbi_ti,     "spu_rlqw_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_9,           CODE_FOR_rotqbi_ti,     "spu_rlqw_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE,         CODE_FOR_nothing,       "spu_rlqwbyte",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLQWBYTE_0,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_0",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_1,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_1",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_2,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_2",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_3,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_3",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_4,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_4",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_5,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_5",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_6,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_6",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_7,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_7",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_8,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_9,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC,       CODE_FOR_nothing,       "spu_rlqwbytebc",       B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLQWBYTEBC_0,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_0",     B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_1,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_1",     B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_2,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_2",     B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_3,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_3",     B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_4,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_4",     B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_5,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_5",     B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_6,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_6",     B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_7,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_7",     B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_8,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_8",     B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_9,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_9",     B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK,           CODE_FOR_nothing,       "spu_rlmask",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASK_0,         CODE_FOR_rotm_v8hi,     "spu_rlmask_0",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASK_1,         CODE_FOR_rotm_v8hi,     "spu_rlmask_1",         B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASK_2,         CODE_FOR_rotm_v4si,     "spu_rlmask_2",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASK_3,         CODE_FOR_rotm_v4si,     "spu_rlmask_3",         B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASK_4,         CODE_FOR_rotm_v8hi,     "spu_rlmask_4",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK_5,         CODE_FOR_rotm_v8hi,     "spu_rlmask_5",         B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK_6,         CODE_FOR_rotm_v4si,     "spu_rlmask_6",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK_7,         CODE_FOR_rotm_v4si,     "spu_rlmask_7",         B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA,          CODE_FOR_nothing,       "spu_rlmaska",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKA_0,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_0",        B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASKA_1,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_1",        B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASKA_2,        CODE_FOR_rotma_v4si,    "spu_rlmaska_2",        B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASKA_3,        CODE_FOR_rotma_v4si,    "spu_rlmaska_3",        B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASKA_4,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_4",        B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA_5,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_5",        B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA_6,        CODE_FOR_rotma_v4si,    "spu_rlmaska_6",        B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA_7,        CODE_FOR_rotma_v4si,    "spu_rlmaska_7",        B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW,         CODE_FOR_nothing,       "spu_rlmaskqw",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKQW_0,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_0",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_1,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_1",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_2,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_2",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_3,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_3",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_4,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_4",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_5,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_5",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_6,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_6",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_7,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_7",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_8,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_9,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE,     CODE_FOR_nothing,       "spu_rlmaskqwbyte",     B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_0,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_0",   B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_1,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_1",   B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_2,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_2",   B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_3,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_3",   B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_4,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_4",   B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_5,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_5",   B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_6,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_6",   B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_7,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_7",   B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_8,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_8",   B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_9,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_9",   B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC,   CODE_FOR_nothing,       "spu_rlmaskqwbytebc",   B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_0, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_1, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_1", B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_2, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_2", B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_3, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_3", B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_4, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_4", B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_5, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_5", B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_6, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_6", B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_7, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_7", B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_8, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_9, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_SL,               CODE_FOR_nothing,       "spu_sl",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SL_0,             CODE_FOR_vashlv8hi3,     "spu_sl_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SL_1,             CODE_FOR_vashlv8hi3,     "spu_sl_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SL_2,             CODE_FOR_vashlv4si3,     "spu_sl_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SL_3,             CODE_FOR_vashlv4si3,     "spu_sl_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SL_4,             CODE_FOR_vashlv8hi3,     "spu_sl_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_5,             CODE_FOR_vashlv8hi3,     "spu_sl_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_6,             CODE_FOR_vashlv4si3,     "spu_sl_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_7,             CODE_FOR_vashlv4si3,     "spu_sl_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW,             CODE_FOR_nothing,       "spu_slqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SLQW_0,           CODE_FOR_shlqbi_ti,     "spu_slqw_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_1,           CODE_FOR_shlqbi_ti,     "spu_slqw_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_2,           CODE_FOR_shlqbi_ti,     "spu_slqw_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_3,           CODE_FOR_shlqbi_ti,     "spu_slqw_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_4,           CODE_FOR_shlqbi_ti,     "spu_slqw_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_5,           CODE_FOR_shlqbi_ti,     "spu_slqw_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_6,           CODE_FOR_shlqbi_ti,     "spu_slqw_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_7,           CODE_FOR_shlqbi_ti,     "spu_slqw_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_8,           CODE_FOR_shlqbi_ti,     "spu_slqw_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_9,           CODE_FOR_shlqbi_ti,     "spu_slqw_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE,         CODE_FOR_nothing,       "spu_slqwbyte",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SLQWBYTE_0,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_0",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_1,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_2,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_2",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_3,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_3",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_4,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_4",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_5,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_5",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_6,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_6",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_7,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_7",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_8,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_9,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC,       CODE_FOR_nothing,       "spu_slqwbytebc",       B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SLQWBYTEBC_0,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_0",     B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_1,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_1",     B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_2,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_2",     B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_3,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_3",     B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_4,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_4",     B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_5,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_5",     B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_6,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_6",     B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_7,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_7",     B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_8,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_8",     B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_9,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_9",     B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR,               CODE_FOR_nothing,       "spu_sr",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SR_0,             CODE_FOR_vlshrv8hi3,    "spu_sr_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SR_1,             CODE_FOR_vlshrv8hi3,    "spu_sr_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SR_2,             CODE_FOR_vlshrv4si3,    "spu_sr_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SR_3,             CODE_FOR_vlshrv4si3,    "spu_sr_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SR_4,             CODE_FOR_vlshrv8hi3,    "spu_sr_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR_5,             CODE_FOR_vlshrv8hi3,    "spu_sr_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR_6,             CODE_FOR_vlshrv4si3,    "spu_sr_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR_7,             CODE_FOR_vlshrv4si3,    "spu_sr_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA,              CODE_FOR_nothing,       "spu_sra",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRA_0,            CODE_FOR_vashrv8hi3,    "spu_sra_0",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SRA_1,            CODE_FOR_vashrv8hi3,    "spu_sra_1",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SRA_2,            CODE_FOR_vashrv4si3,    "spu_sra_2",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SRA_3,            CODE_FOR_vashrv4si3,    "spu_sra_3",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SRA_4,            CODE_FOR_vashrv8hi3,    "spu_sra_4",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA_5,            CODE_FOR_vashrv8hi3,    "spu_sra_5",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA_6,            CODE_FOR_vashrv4si3,    "spu_sra_6",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA_7,            CODE_FOR_vashrv4si3,    "spu_sra_7",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW,             CODE_FOR_nothing,       "spu_srqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRQW_0,           CODE_FOR_shrqbi_ti,     "spu_srqw_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_1,           CODE_FOR_shrqbi_ti,     "spu_srqw_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_2,           CODE_FOR_shrqbi_ti,     "spu_srqw_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_3,           CODE_FOR_shrqbi_ti,     "spu_srqw_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_4,           CODE_FOR_shrqbi_ti,     "spu_srqw_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_5,           CODE_FOR_shrqbi_ti,     "spu_srqw_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_6,           CODE_FOR_shrqbi_ti,     "spu_srqw_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_7,           CODE_FOR_shrqbi_ti,     "spu_srqw_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_8,           CODE_FOR_shrqbi_ti,     "spu_srqw_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_9,           CODE_FOR_shrqbi_ti,     "spu_srqw_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE,         CODE_FOR_nothing,       "spu_srqwbyte",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRQWBYTE_0,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_0",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_1,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_2,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_2",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_3,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_3",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_4,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_4",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_5,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_5",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_6,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_6",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_7,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_7",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_8,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_9,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC,       CODE_FOR_nothing,       "spu_srqwbytebc",       B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRQWBYTEBC_0,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_0",     B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_1,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_1",     B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_2,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_2",     B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_3,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_3",     B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_4,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_4",     B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_5,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_5",     B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_6,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_6",     B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_7,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_7",     B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_8,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_8",     B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_9,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_9",     B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+
+DEF_BUILTIN (SPU_SPLATS,           CODE_FOR_nothing,       "spu_splats",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SPLATS_0,         CODE_FOR_spu_splats,    "spu_splats_0",         B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_SPLATS_1,         CODE_FOR_spu_splats,    "spu_splats_1",         B_INTERNAL, _A2(SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_SPLATS_2,         CODE_FOR_spu_splats,    "spu_splats_2",         B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_SPLATS_3,         CODE_FOR_spu_splats,    "spu_splats_3",         B_INTERNAL, _A2(SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_SPLATS_4,         CODE_FOR_spu_splats,    "spu_splats_4",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SPLATS_5,         CODE_FOR_spu_splats,    "spu_splats_5",         B_INTERNAL, _A2(SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_SPLATS_6,         CODE_FOR_spu_splats,    "spu_splats_6",         B_INTERNAL, _A2(SPU_BTI_UV2DI,  SPU_BTI_UINTDI))
+DEF_BUILTIN (SPU_SPLATS_7,         CODE_FOR_spu_splats,    "spu_splats_7",         B_INTERNAL, _A2(SPU_BTI_V2DI,   SPU_BTI_INTDI))
+DEF_BUILTIN (SPU_SPLATS_8,         CODE_FOR_spu_splats,    "spu_splats_8",         B_INTERNAL, _A2(SPU_BTI_V4SF,   SPU_BTI_FLOAT))
+DEF_BUILTIN (SPU_SPLATS_9,         CODE_FOR_spu_splats,    "spu_splats_9",         B_INTERNAL, _A2(SPU_BTI_V2DF,   SPU_BTI_DOUBLE))
+DEF_BUILTIN (SPU_EXTRACT,          CODE_FOR_nothing,       "spu_extract",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_EXTRACT_0,        CODE_FOR_spu_extract,   "spu_extract_0",        B_INTERNAL, _A3(SPU_BTI_UINTQI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_1,        CODE_FOR_spu_extract,   "spu_extract_1",        B_INTERNAL, _A3(SPU_BTI_INTQI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_2,        CODE_FOR_spu_extract,   "spu_extract_2",        B_INTERNAL, _A3(SPU_BTI_UINTHI, SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_3,        CODE_FOR_spu_extract,   "spu_extract_3",        B_INTERNAL, _A3(SPU_BTI_INTHI,  SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_4,        CODE_FOR_spu_extract,   "spu_extract_4",        B_INTERNAL, _A3(SPU_BTI_UINTSI, SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_5,        CODE_FOR_spu_extract,   "spu_extract_5",        B_INTERNAL, _A3(SPU_BTI_INTSI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_6,        CODE_FOR_spu_extract,   "spu_extract_6",        B_INTERNAL, _A3(SPU_BTI_UINTDI, SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_7,        CODE_FOR_spu_extract,   "spu_extract_7",        B_INTERNAL, _A3(SPU_BTI_INTDI,  SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_8,        CODE_FOR_spu_extract,   "spu_extract_8",        B_INTERNAL, _A3(SPU_BTI_FLOAT,  SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_9,        CODE_FOR_spu_extract,   "spu_extract_9",        B_INTERNAL, _A3(SPU_BTI_DOUBLE, SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT,           CODE_FOR_nothing,       "spu_insert",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_INSERT_0,         CODE_FOR_spu_insert,    "spu_insert_0",         B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UINTQI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_1,         CODE_FOR_spu_insert,    "spu_insert_1",         B_INTERNAL, _A4(SPU_BTI_V16QI,  SPU_BTI_INTQI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_2,         CODE_FOR_spu_insert,    "spu_insert_2",         B_INTERNAL, _A4(SPU_BTI_UV8HI,  SPU_BTI_UINTHI, SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_3,         CODE_FOR_spu_insert,    "spu_insert_3",         B_INTERNAL, _A4(SPU_BTI_V8HI,   SPU_BTI_INTHI,  SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_4,         CODE_FOR_spu_insert,    "spu_insert_4",         B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UINTSI, SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_5,         CODE_FOR_spu_insert,    "spu_insert_5",         B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_INTSI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_6,         CODE_FOR_spu_insert,    "spu_insert_6",         B_INTERNAL, _A4(SPU_BTI_UV2DI,  SPU_BTI_UINTDI, SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_7,         CODE_FOR_spu_insert,    "spu_insert_7",         B_INTERNAL, _A4(SPU_BTI_V2DI,   SPU_BTI_INTDI,  SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_8,         CODE_FOR_spu_insert,    "spu_insert_8",         B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_FLOAT,  SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_9,         CODE_FOR_spu_insert,    "spu_insert_9",         B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_DOUBLE, SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE,          CODE_FOR_nothing,       "spu_promote",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_PROMOTE_0,        CODE_FOR_spu_promote,   "spu_promote_0",        B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UINTQI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_1,        CODE_FOR_spu_promote,   "spu_promote_1",        B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_INTQI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_2,        CODE_FOR_spu_promote,   "spu_promote_2",        B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UINTHI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_3,        CODE_FOR_spu_promote,   "spu_promote_3",        B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_INTHI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_4,        CODE_FOR_spu_promote,   "spu_promote_4",        B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UINTSI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_5,        CODE_FOR_spu_promote,   "spu_promote_5",        B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_INTSI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_6,        CODE_FOR_spu_promote,   "spu_promote_6",        B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UINTDI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_7,        CODE_FOR_spu_promote,   "spu_promote_7",        B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_INTDI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_8,        CODE_FOR_spu_promote,   "spu_promote_8",        B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_FLOAT,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_9,        CODE_FOR_spu_promote,   "spu_promote_9",        B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_DOUBLE, SPU_BTI_INTSI))
+
+/* We need something that is not B_INTERNAL as a sentinel.  */
+
+/* These are for the convenience of implementing fma() in the standard
+   libraries.  */
+DEF_BUILTIN (SCALAR_FMA,           CODE_FOR_fmasf4,        "fmas",                 B_INSN,     _A4(SPU_BTI_FLOAT,  SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT))
+DEF_BUILTIN (SCALAR_DFMA,          CODE_FOR_fmadf4,        "dfmas",                B_INSN,     _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE))
+
+DEF_BUILTIN (SPU_ALIGN_HINT,       CODE_FOR_spu_align_hint,"spu_align_hint",       B_INSN,     _A4(SPU_BTI_VOID,   SPU_BTI_PTR,    SPU_BTI_7,      SPU_BTI_7))
+#undef _A1
+#undef _A2
+#undef _A3
+#undef _A4
diff --git a/gcc-4.9/gcc/config/spu/spu-builtins.md b/gcc-4.9/gcc/config/spu/spu-builtins.md
new file mode 100644
index 000000000..5df6fe039
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu-builtins.md
@@ -0,0 +1,864 @@
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This includes expands for all the intrinsics.
+;; spu_expand_builtin looks at the mode of match_operand.
+
+
+;; load/store
+
+(define_expand "spu_lqd"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+				 (match_operand:SI 2 "spu_nonmem_operand" ""))
+		        (const_int -16))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT
+	&& (INTVAL (operands[2]) & 15) != 0)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & -16);
+    if (GET_CODE (operands[2]) != CONST_INT)
+      {
+	rtx op2 = operands[2];
+	operands[2] = force_reg (Pmode, operands[2]);
+	if (!ALIGNED_SYMBOL_REF_P (op2))
+	  emit_insn (gen_andsi3 (operands[2], operands[2], GEN_INT (-16)));
+      }
+  })
+
+(define_expand "spu_lqx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+                                 (match_operand:SI 2 "spu_reg_operand" ""))
+                        (const_int -16))))]
+  ""
+  "")
+
+(define_expand "spu_lqa"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "")
+                        (const_int -16))))]
+  ""
+  {
+    if (GET_CODE (operands[1]) == CONST_INT
+	&& (INTVAL (operands[1]) & 15) != 0)
+      operands[1] = GEN_INT (INTVAL (operands[1]) & -16);
+  })
+
+(define_expand "spu_lqr"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+	(mem:TI (and:SI (match_operand:SI 1 "address_operand" "")
+			(const_int -16))))]
+  ""
+  "")
+
+(define_expand "spu_stqd"
+  [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+				 (match_operand:SI 2 "spu_nonmem_operand" ""))
+		        (const_int -16)))
+        (match_operand:TI 0 "spu_reg_operand" "r,r"))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT
+	&& (INTVAL (operands[2]) & 15) != 0)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & -16);
+    if (GET_CODE (operands[2]) != CONST_INT)
+      {
+	rtx op2 = operands[2];
+	operands[2] = force_reg (Pmode, operands[2]);
+	if (!ALIGNED_SYMBOL_REF_P (op2))
+	  emit_insn (gen_andsi3 (operands[2], operands[2], GEN_INT (-16)));
+      }
+  })
+
+(define_expand "spu_stqx"
+  [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+				 (match_operand:SI 2 "spu_reg_operand" ""))
+		        (const_int -16)))
+        (match_operand:TI 0 "spu_reg_operand" "r"))]
+  ""
+  "")
+
+(define_expand "spu_stqa"
+  [(set (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "")
+			(const_int -16)))
+        (match_operand:TI 0 "spu_reg_operand" "r"))]
+  ""
+  {
+    if (GET_CODE (operands[1]) == CONST_INT
+	&& (INTVAL (operands[1]) & 15) != 0)
+      operands[1] = GEN_INT (INTVAL (operands[1]) & -16);
+  })
+
+(define_expand "spu_stqr"
+    [(set (mem:TI (and:SI (match_operand:SI 1 "address_operand" "")
+			  (const_int -16)))
+	  (match_operand:TI 0 "spu_reg_operand" ""))]
+  ""
+  "")
+
+
+;; generate control word
+
+(define_expand "spu_cbx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 1)] UNSPEC_CPAT))]
+  ""
+  "")
+
+(define_expand "spu_chx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 2)] UNSPEC_CPAT))]
+  ""
+  "")
+
+(define_expand "spu_cwx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 4)] UNSPEC_CPAT))]
+  ""
+  "")
+
+(define_expand "spu_cdx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 8)] UNSPEC_CPAT))]
+  ""
+  "")
+
+
+
+;; Constant formation
+
+(define_expand "spu_ilhu"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (const_vector:V4SI [(match_operand:SI 1 "immediate_operand" "")]))]
+  ""
+  "{ emit_insn(gen_movv4si(operands[0], spu_const(V4SImode, (INTVAL(operands[1]) << 16))));
+     DONE;
+   }")
+
+
+;; integer subtract
+(define_expand "spu_sfh"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "")
+        (minus:V8HI (match_operand:V8HI 2 "spu_nonmem_operand" "")
+                    (match_operand:V8HI 1 "spu_reg_operand" "")))]
+  ""
+  "")
+
+(define_expand "spu_sf"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (minus:V4SI (match_operand:V4SI 2 "spu_nonmem_operand" "")
+                    (match_operand:V4SI 1 "spu_reg_operand" "")))]
+  ""
+  "")
+
+(define_expand "spu_sfx"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "")
+		      (match_operand:V4SI 1 "spu_reg_operand" "")
+		      (match_operand:V4SI 3 "spu_reg_operand" "")] UNSPEC_SFX))]
+  ""
+  "")
+
+(define_expand "spu_bg"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "")
+		      (match_operand:V4SI 1 "spu_reg_operand" "")] UNSPEC_BG))]
+  ""
+  "")
+
+(define_expand "spu_bgx"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "")
+		      (match_operand:V4SI 1 "spu_reg_operand" "")
+		      (match_operand:V4SI 3 "spu_reg_operand" "")] UNSPEC_BGX))]
+  ""
+  "")
+
+(define_insn "spu_mpya"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))
+	(match_operand:V4SI 3 "spu_reg_operand" "r")))]
+  ""
+  "mpya\t%0,%1,%2,%3"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyh"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (ashift:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))
+	  (const_vector:V4SI [(const_int 16)(const_int 16)(const_int 16)(const_int 16)])))]
+  ""
+  "mpyh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpys"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (ashiftrt:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))
+	  (const_vector:V4SI [(const_int 16)(const_int 16)(const_int 16)(const_int 16)])))]
+  ""
+  "mpys\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyhhau"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (plus:V4SI
+	  (mult:V4SI
+	    (zero_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	    (zero_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))
+	  (match_operand:V4SI 3 "spu_reg_operand" "0")))]
+  ""
+  "mpyhhau\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyhha"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))
+	  (match_operand:V4SI 3 "spu_reg_operand" "0")))]
+  ""
+  "mpyhha\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+;; form select mask
+(define_insn "spu_fsmb"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r,r")
+        (unspec:V16QI [(match_operand:SI 1 "spu_nonmem_operand" "r,MN")] UNSPEC_FSMB))]
+  ""
+  "@
+  fsmb\t%0,%1
+  fsmbi\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_fsmh"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (unspec:V8HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_FSMH))]
+  ""
+  "fsmh\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_fsm"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
+  ""
+  "fsm\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+
+;; gather bits
+(define_insn "spu_gbb"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:V16QI 1 "spu_reg_operand" "r")] UNSPEC_GBB))]
+  ""
+  "gbb\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_gbh"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "spu_reg_operand" "r")] UNSPEC_GBH))]
+  ""
+  "gbh\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_gb"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_GB))]
+  ""
+  "gb\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+;; misc byte operations
+(define_insn "spu_avgb"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")
+		       (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_AVGB))]
+  ""
+  "avgb\t%0,%1,%2"
+  [(set_attr "type" "fxb")])
+
+(define_insn "spu_absdb"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")
+		       (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_ABSDB))]
+  ""
+  "absdb\t%0,%1,%2"
+  [(set_attr "type" "fxb")])
+
+(define_insn "spu_sumb"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "spu_reg_operand" "r")
+		      (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_SUMB))]
+  ""
+  "sumb\t%0,%1,%2"
+  [(set_attr "type" "fxb")])
+
+;; sign extend
+(define_insn "spu_xsbh"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (sign_extend:V8HI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)
+	               (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))))]
+  ""
+  "xsbh\t%0,%1")
+
+(define_insn "spu_xshw"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (sign_extend:V4SI
+	  (vec_select:V4HI
+	    (match_operand:V8HI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))]
+  ""
+  "xshw\t%0,%1")
+
+(define_insn "spu_xswd"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (sign_extend:V2DI
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1)(const_int 3)]))))]
+  ""
+  "xswd\t%0,%1")
+
+;; or across
+
+(define_insn "spu_orx"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(unspec:V4SI [(match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_ORX))]
+  ""
+  "orx\t%0,%1")
+
+
+;; compare & halt
+(define_insn "spu_heq"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r")
+	             (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HEQ)]
+  ""
+  "@
+  heq\t%0,%1
+  heqi\t%0,%1")
+
+(define_insn "spu_hgt"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r")
+	             (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HGT)]
+  ""
+  "@
+  hgt\t%0,%1
+  hgti\t%0,%1")
+
+(define_insn "spu_hlgt"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r")
+	             (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HLGT)]
+  ""
+  "@
+  hlgt\t%0,%1
+  hlgti\t%0,%1")
+
+;; branches
+
+;; The description below hides the fact that bisled conditionally
+;; executes the call depending on the value in channel 0.  This was 
+;; done so that the description would conform to the format of a call 
+;; insn.  Otherwise (if this were not part of call insn), the link 
+;; register, $lr, would not be saved/restored in the prologue/epilogue.
+
+(define_insn "spu_bisled"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r"))
+            (const_int 0))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))
+     (use (match_operand:SI 1 "address_operand" ""))
+     (use (const_int 0))])]
+  ""
+  "bisled\t$lr,%0"
+  [(set_attr "type" "br")])
+
+(define_insn "spu_bisledd"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r"))
+            (const_int 0))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))
+     (use (match_operand:SI 1 "address_operand" ""))
+     (use (const_int 1))])]
+  ""
+  "bisledd\t$lr,%0"
+  [(set_attr "type" "br")])
+
+(define_insn "spu_bislede"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r"))
+            (const_int 0))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))
+     (use (match_operand:SI 1 "address_operand" ""))
+     (use (const_int 2))])]
+  ""
+  "bislede\t$lr,%0"
+  [(set_attr "type" "br")])
+
+;; float convert
+(define_expand "spu_csflt"
+  [(set (match_operand:V4SF 0 "spu_reg_operand")
+	(unspec:V4SF [(match_operand:V4SI 1 "spu_reg_operand")
+		      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convtf expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx exp2;
+      rtx cnv = gen_reg_rtx (V4SFmode);
+      rtx scale = gen_reg_rtx (SImode);
+      rtx op2 = force_reg (SImode, operands[2]);
+      rtx m1 = spu_gen_exp2 (V4SFmode, GEN_INT (-1));
+      emit_insn (gen_subsi3 (scale, const1_rtx, op2));
+      exp2 = spu_gen_exp2 (V4SFmode, scale);
+      emit_insn (gen_floatv4siv4sf2_mul (cnv, operands[1], m1));
+      emit_insn (gen_mulv4sf3 (operands[0], cnv, exp2));
+    }
+  else
+    {
+      rtx exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+      emit_insn (gen_floatv4siv4sf2_div (operands[0], operands[1], exp2));
+    }
+  DONE;
+})
+
+(define_expand "spu_cflts"
+  [(set (match_operand:V4SI 0 "spu_reg_operand")
+	(unspec:V4SI [(match_operand:V4SF 1 "spu_reg_operand")
+                      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  rtx exp2;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convts expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx mul = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_mulv4sf3 (mul, operands[1], exp2));
+      emit_insn (gen_fix_truncv4sfv4si2 (operands[0], mul));
+    }
+  else 
+    emit_insn (gen_fix_truncv4sfv4si2_mul (operands[0], operands[1], exp2));
+  DONE;
+})
+
+(define_expand "spu_cuflt"
+  [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+	(unspec:V4SF [(match_operand:V4SI 1 "spu_reg_operand")
+		      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convtf expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx exp2;
+      rtx cnv = gen_reg_rtx (V4SFmode);
+      rtx scale = gen_reg_rtx (SImode);
+      rtx op2 = force_reg (SImode, operands[2]);
+      rtx m1 = spu_gen_exp2 (V4SFmode, GEN_INT (-1));
+      emit_insn (gen_subsi3 (scale, const1_rtx, op2));
+      exp2 = spu_gen_exp2 (V4SFmode, scale);
+      emit_insn (gen_floatunsv4siv4sf2_mul (cnv, operands[1], m1));
+      emit_insn (gen_mulv4sf3 (operands[0], cnv, exp2));
+    }
+  else
+    {
+      rtx exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+      emit_insn (gen_floatunsv4siv4sf2_div (operands[0], operands[1], exp2));
+    }
+  DONE;
+})
+
+(define_expand "spu_cfltu"
+  [(set (match_operand:V4SI 0 "spu_reg_operand")
+	(unspec:V4SI [(match_operand:V4SF 1 "spu_reg_operand")
+		      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  rtx exp2;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convtu expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx mul = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_mulv4sf3 (mul, operands[1], exp2));
+      emit_insn (gen_fixuns_truncv4sfv4si2 (operands[0], mul));
+    }
+  else 
+    emit_insn (gen_fixuns_truncv4sfv4si2_mul (operands[0], operands[1], exp2));
+  DONE;
+})
+
+(define_expand "spu_frds"
+   [(set (match_operand:V4SF 0 "spu_reg_operand" "")
+         (vec_select:V4SF
+	   (vec_concat:V4SF
+	     (float_truncate:V2SF (match_operand:V2DF 1 "spu_reg_operand" ""))
+	     (match_dup:V2SF 2))
+	   (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+  ""
+  "operands[2] = spu_const(V2SFmode, 0);")
+
+(define_insn "_frds"
+   [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+        (vec_select:V4SF
+	  (vec_concat:V4SF
+	    (float_truncate:V2SF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+	    (match_operand:V2SF 2 "vec_imm_operand" "i"))
+	  (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+  ""
+  "frds\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+(define_insn "spu_fesd"
+  [(set (match_operand:V2DF 0 "spu_reg_operand" "=r")
+        (float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)]))))]
+  ""
+  "fesd\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+;; control
+(define_insn "spu_stop"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "M")] UNSPEC_STOP)]
+  ""
+  "stop\t%0"
+  [(set_attr "type" "br")])
+
+(define_insn "spu_stopd"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r")
+		     (match_operand:SI 1 "spu_reg_operand" "r")
+		     (match_operand:SI 2 "spu_reg_operand" "r")] UNSPEC_STOPD)]
+  ""
+  "stopd\t%0,%1,%2"
+  [(set_attr "type" "br")])
+
+;; interrupt disable/enable
+(define_expand "spu_idisable"
+  [(parallel
+    [(unspec_volatile [(const_int 0)] UNSPEC_SET_INTR)
+     (clobber (match_dup:SI 0))
+     (clobber (mem:BLK (scratch)))])]
+  ""
+  "operands[0] = gen_reg_rtx (SImode);")
+
+(define_expand "spu_ienable"
+  [(parallel
+    [(unspec_volatile [(const_int 1)] UNSPEC_SET_INTR)
+     (clobber (match_dup:SI 0))
+     (clobber (mem:BLK (scratch)))])]
+  ""
+  "operands[0] = gen_reg_rtx (SImode);")
+
+(define_insn "set_intr"
+  [(unspec_volatile [(match_operand 1 "const_int_operand" "i")] UNSPEC_SET_INTR)
+   (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+   (clobber (mem:BLK (scratch)))]
+  "! flag_pic"
+  "ila\t%0,.+8\;bi%I1\t%0"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi0")])
+
+(define_insn "set_intr_pic"
+  [(unspec_volatile [(match_operand 1 "const_int_operand" "i")] UNSPEC_SET_INTR)
+   (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+   (clobber (mem:BLK (scratch)))]
+  "flag_pic"
+  "brsl\t%0,.+4\;ai\t%0,%0,8\;bi%I1\t%0"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi1")])
+
+(define_insn "set_intr_cc"
+  [(cond_exec (match_operator 1 "branch_comparison_operator"
+		[(match_operand 2 "spu_reg_operand" "r")
+		 (const_int 0)])
+              (parallel [(unspec_volatile [(match_operand:SI 3 "const_int_operand" "i")] UNSPEC_SET_INTR)
+                         (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+			 (clobber (mem:BLK (scratch)))]))]
+  "! flag_pic"
+  "ila\t%0,.+8\;bi%b2%b1z%I3\t%2,%0"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi0")])
+
+(define_insn "set_intr_cc_pic"
+  [(cond_exec (match_operator 1 "branch_comparison_operator"
+		[(match_operand 2 "spu_reg_operand" "r")
+		 (const_int 0)])
+              (parallel [(unspec_volatile [(match_operand:SI 3 "const_int_operand" "i")] UNSPEC_SET_INTR)
+                         (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+			 (clobber (mem:BLK (scratch)))]))]
+  "flag_pic"
+  "brsl\t%0,.+4\;ai\t%0,%0,8\;bi%b2%b1z%I3\t%2,%0"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi1")])
+
+(define_insn "set_intr_return"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")] UNSPEC_SET_INTR)
+   (return)]
+  ""
+  "bi%I0\t$lr"
+  [(set_attr "type" "br")])
+
+(define_peephole2
+  [(parallel
+    [(unspec_volatile [(match_operand:SI 0 "const_int_operand")] UNSPEC_SET_INTR)
+     (clobber (match_operand:SI 1 "spu_reg_operand"))
+     (clobber (mem:BLK (scratch)))])
+   (use (reg:SI 0))
+   (return)]
+  ""
+  [(use (reg:SI 0))
+   (parallel
+    [(unspec_volatile [(match_dup:SI 0)] UNSPEC_SET_INTR)
+     (return)])]
+  "")
+
+;; special purpose registers
+(define_insn "spu_fscrrd"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:V4SI [(const_int 6)] UNSPEC_FSCRRD))]
+  ""
+  "fscrrd\t%0"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_fscrwr"
+  [(unspec_volatile [(match_operand:V4SI 0 "spu_reg_operand" "r")] UNSPEC_FSCRWR)]
+  ""
+  "fscrwr\t$0,%0"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_mfspr"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_MFSPR))]
+  ""
+  "mfspr\t%0,$sp%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_mtspr"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J")
+	             (match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_MTSPR)]
+  ""
+  "mtspr\t$sp%0,%1"
+  [(set_attr "type" "spr")])
+
+;; channels
+(define_expand "spu_rdch"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_RDCH))]
+  ""
+  "{
+    if (spu_safe_dma (INTVAL (operands[1])))
+      {
+        emit_insn (gen_spu_rdch_clobber (operands[0], operands[1]));
+        DONE;
+      }
+   }")
+
+(define_expand "spu_rchcnt"
+  [(set (match_operand:SI 0 "spu_reg_operand" "")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_RCHCNT))]
+  ""
+  "{
+    if (spu_safe_dma (INTVAL (operands[1])))
+      {
+        emit_insn (gen_spu_rchcnt_clobber (operands[0], operands[1]));
+        DONE;
+      }
+   }")
+
+(define_expand "spu_wrch"
+   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "")
+ 	              (match_operand:V4SI 1 "spu_reg_operand" "")] UNSPEC_WRCH)]
+   ""
+  "{
+    if (spu_safe_dma (INTVAL (operands[0])))
+      {
+        emit_insn (gen_spu_wrch_clobber (operands[0], operands[1]));
+        DONE;
+      }
+   }")
+
+(define_insn "spu_rdch_noclobber"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RDCH))]
+  ""
+  "rdch\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_rchcnt_noclobber"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RCHCNT))]
+  ""
+  "rchcnt\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_wrch_noclobber"
+   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J")
+ 	              (match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_WRCH)]
+   ""
+   "wrch\t$ch%0,%1"
+   [(set_attr "type" "spr")])
+
+(define_insn "spu_rdch_clobber"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RDCH))
+    (clobber (mem:BLK (scratch)))]
+  ""
+  "rdch\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_rchcnt_clobber"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RCHCNT))
+    (clobber (mem:BLK (scratch)))]
+  ""
+  "rchcnt\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_wrch_clobber"
+   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J")
+ 	              (match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_WRCH)
+    (clobber (mem:BLK (scratch)))]
+   ""
+   "wrch\t$ch%0,%1"
+   [(set_attr "type" "spr")])
+
+(define_expand "spu_splats" 
+  [(set (match_operand 0 "spu_reg_operand" "")
+        (vec_duplicate (match_operand 1 "spu_nonmem_operand" "")))]
+  ""
+  {
+    spu_builtin_splats(operands);
+    DONE;
+  })
+
+(define_expand "spu_extract"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(unspec [(match_operand 1 "spu_reg_operand" "")
+		 (match_operand 2 "spu_nonmem_operand" "")] 0))]
+  ""
+  {
+    spu_builtin_extract (operands);
+    DONE;
+  })
+
+(define_expand "spu_insert"
+  [(set (match_operand 0 "spu_reg_operand" "")
+        (unspec [(match_operand 1 "spu_reg_operand" "")
+                 (match_operand 2 "spu_reg_operand" "")
+                 (match_operand:SI 3 "spu_nonmem_operand" "")] 0))] 
+  ""
+  {
+    spu_builtin_insert(operands);
+    DONE;
+  })
+
+(define_expand "spu_promote"
+  [(set (match_operand 0 "spu_reg_operand" "")
+        (unspec [(match_operand 1 "spu_reg_operand" "")
+                 (match_operand:SI 2 "immediate_operand" "")] 0))] 
+  ""
+  {
+    spu_builtin_promote(operands);
+    DONE;
+  })
+
+;; Currently doing nothing with this but expanding its args.
+(define_expand "spu_align_hint"
+  [(unspec [(match_operand:SI 0 "address_operand" "")
+            (match_operand:SI 1 "immediate_operand" "")
+            (match_operand:SI 2 "immediate_operand" "")] 0)]
+  ""
+  {
+     DONE;
+  })
+
diff --git a/gcc-4.9/gcc/config/spu/spu-c.c b/gcc-4.9/gcc/config/spu/spu-c.c
new file mode 100644
index 000000000..9d7aa5a7b
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu-c.c
@@ -0,0 +1,235 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "target.h"
+
+
+/* Keep the vector keywords handy for fast comparisons.  */
+static GTY(()) tree __vector_keyword;
+static GTY(()) tree vector_keyword;
+
+static cpp_hashnode *
+spu_categorize_keyword (const cpp_token *tok)
+{
+  if (tok->type == CPP_NAME)
+    {
+      cpp_hashnode *ident = tok->val.node.node;
+
+      if (ident == C_CPP_HASHNODE (vector_keyword)
+	  || ident == C_CPP_HASHNODE (__vector_keyword))
+	return C_CPP_HASHNODE (__vector_keyword);
+      else
+	return ident;
+    }
+  return 0;
+}
+
+/* Called to decide whether a conditional macro should be expanded.
+   Since we have exactly one such macro (i.e, 'vector'), we do not
+   need to examine the 'tok' parameter.  */
+
+static cpp_hashnode *
+spu_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
+{
+  cpp_hashnode *expand_this = tok->val.node.node;
+  cpp_hashnode *ident;
+
+  ident = spu_categorize_keyword (tok);
+  if (ident == C_CPP_HASHNODE (__vector_keyword))
+    {
+      tok = cpp_peek_token (pfile, 0);
+      ident = spu_categorize_keyword (tok);
+
+      if (ident)
+	{
+	  enum rid rid_code = (enum rid)(ident->rid_code);
+	  if (ident->type == NT_MACRO)
+	    {
+	      (void) cpp_get_token (pfile);
+	      tok = cpp_peek_token (pfile, 0);
+	      ident = spu_categorize_keyword (tok);
+	      if (ident)
+		rid_code = (enum rid)(ident->rid_code);
+	    }
+	  
+	  if (rid_code == RID_UNSIGNED || rid_code == RID_LONG
+	      || rid_code == RID_SHORT || rid_code == RID_SIGNED
+	      || rid_code == RID_INT || rid_code == RID_CHAR
+	      || rid_code == RID_FLOAT || rid_code == RID_DOUBLE)
+	    expand_this = C_CPP_HASHNODE (__vector_keyword);
+	}
+    }
+  return expand_this;
+}
+
+/* target hook for resolve_overloaded_builtin(). Returns a function call
+   RTX if we can resolve the overloaded builtin */
+tree
+spu_resolve_overloaded_builtin (location_t loc, tree fndecl, void *passed_args)
+{
+#define SCALAR_TYPE_P(t) (INTEGRAL_TYPE_P (t) \
+			  || SCALAR_FLOAT_TYPE_P (t) \
+			  || POINTER_TYPE_P (t))
+  vec<tree, va_gc> *fnargs = static_cast <vec<tree, va_gc> *> (passed_args);
+  unsigned int nargs = vec_safe_length (fnargs);
+  int new_fcode, fcode = DECL_FUNCTION_CODE (fndecl);
+  struct spu_builtin_description *desc;
+  tree match = NULL_TREE;
+
+  /* The vector types are not available if the backend is not initialized.  */
+  gcc_assert (!flag_preprocess_only);
+
+  desc = &spu_builtins[fcode];
+  if (desc->type != B_OVERLOAD)
+    return NULL_TREE;
+
+  /* Compare the signature of each internal builtin function with the
+     function arguments until a match is found. */
+
+  for (new_fcode = fcode + 1; spu_builtins[new_fcode].type == B_INTERNAL;
+       new_fcode++)
+    {
+      tree decl = targetm.builtin_decl (new_fcode, true);
+      tree params = TYPE_ARG_TYPES (TREE_TYPE (decl));
+      tree param;
+      bool all_scalar;
+      unsigned int p;
+
+      /* Check whether all parameters are scalar.  */
+      all_scalar = true;
+      for (param = params; param != void_list_node; param = TREE_CHAIN (param))
+      if (!SCALAR_TYPE_P (TREE_VALUE (param)))
+	all_scalar = false;
+
+      for (param = params, p = 0;
+	   param != void_list_node;
+	   param = TREE_CHAIN (param), p++)
+	{
+	  tree var, arg_type, param_type = TREE_VALUE (param);
+
+	  if (p >= nargs)
+	    {
+	      error ("insufficient arguments to overloaded function %s",
+		     desc->name);
+	      return error_mark_node;
+	    }
+
+	  var = (*fnargs)[p];
+
+	  if (TREE_CODE (var) == NON_LVALUE_EXPR)
+	    var = TREE_OPERAND (var, 0);
+
+	  if (TREE_CODE (var) == ERROR_MARK)
+	    return NULL_TREE;	/* Let somebody else deal with the problem. */
+
+	  arg_type = TREE_TYPE (var);
+
+	  /* The intrinsics spec does not specify precisely how to
+	     resolve generic intrinsics.  We require an exact match
+	     for vector types and let C do it's usual parameter type
+	     checking/promotions for scalar arguments, except for the
+	     first argument of intrinsics which don't have a vector
+	     parameter. */
+	  if ((!SCALAR_TYPE_P (param_type)
+	       || !SCALAR_TYPE_P (arg_type)
+	       || (all_scalar && p == 0))
+	      && !lang_hooks.types_compatible_p (param_type, arg_type))
+	    break;
+	}
+      if (param == void_list_node)
+	{
+	  if (p != nargs)
+	    {
+	      error ("too many arguments to overloaded function %s",
+		     desc->name);
+	      return error_mark_node;
+	    }
+
+	  match = decl;
+	  break;
+	}
+    }
+
+  if (match == NULL_TREE)
+    {
+      error ("parameter list does not match a valid signature for %s()",
+	     desc->name);
+      return error_mark_node;
+    }
+
+  return build_function_call_vec (loc, vNULL, match, fnargs, NULL);
+#undef SCALAR_TYPE_P
+}
+
+
+void
+spu_cpu_cpp_builtins (struct cpp_reader *pfile)
+{
+  cpp_define (pfile, "__SPU__");
+  cpp_assert (pfile, "cpu=spu");
+  cpp_assert (pfile, "machine=spu");
+  if (spu_arch == PROCESSOR_CELLEDP)
+    cpp_define (pfile, "__SPU_EDP__");
+  cpp_define (pfile, "__vector=__attribute__((__spu_vector__))");
+  switch (spu_ea_model)
+    {
+    case 32:
+      cpp_define (pfile, "__EA32__");
+      break;
+    case 64:
+      cpp_define (pfile, "__EA64__");
+      break;
+    default:
+       gcc_unreachable ();
+    }
+
+  if (!flag_iso)
+    {
+      /* Define this when supporting context-sensitive keywords.  */
+      cpp_define (pfile, "__VECTOR_KEYWORD_SUPPORTED__");
+      cpp_define (pfile, "vector=vector");
+
+      /* Initialize vector keywords.  */
+      __vector_keyword = get_identifier ("__vector");
+      C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
+      vector_keyword = get_identifier ("vector");
+      C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL;
+
+      /* Enable context-sensitive macros.  */
+      cpp_get_callbacks (pfile)->macro_to_expand = spu_macro_to_expand;
+    }
+}
+
+void
+spu_c_common_override_options (void)
+{ 
+  if (!TARGET_STD_MAIN)
+    {
+      /* Don't give warnings about the main() function.  */
+      warn_main = 0;
+    }
+}
diff --git a/gcc-4.9/gcc/config/spu/spu-elf.h b/gcc-4.9/gcc/config/spu/spu-elf.h
new file mode 100644
index 000000000..e1dcdf0d1
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu-elf.h
@@ -0,0 +1,75 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef OBJECT_FORMAT_ELF
+ #error elf.h included before elfos.h
+#endif
+
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+            asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+
+/* The following macros define "native" directory locations; on the SPU,
+   these are used only when building the compiler with --with-sysroot.
+   This can be used to build a pair of PPU and SPU cross-compilers with
+   a common sysroot; the SPU compiler will search for its files in
+   ${sysroot}/include and ${sysroot}/lib.  */
+
+/* STANDARD_STARTFILE_PREFIX_1 is "/lib", which we keep.
+   STANDARD_STARTFILE_PREFIX_2 is "/usr/lib" -- we remove this.  */
+#undef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 ""
+
+/* We do not provide any "/usr/local/include" directory on SPU.  */
+#undef LOCAL_INCLUDE_DIR
+
+/* Provide a STARTFILE_SPEC appropriate for GNU/Linux.  Here we add
+   the GNU/Linux magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+
+#undef  STARTFILE_SPEC 
+#define STARTFILE_SPEC "%{mstdmain: %{pg|p:gcrt2.o%s;:crt2.o%s}}\
+                        %{!mstdmain: %{pg|p:gcrt1.o%s;:crt1.o%s}}\
+                        crti.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC   "crtend.o%s crtn.o%s"
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#define SET_ASM_OP		"\t.set\t"
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#define EH_FRAME_IN_DATA_SECTION 1
+
+#define LINK_SPEC "%{mlarge-mem: --defsym __stack=0xfffffff0 }"
+
+#define LIB_SPEC "-( %{!shared:%{g*:-lg}} -lc -lgloss -) \
+    %{mno-atomic-updates:-lgcc_cachemgr_nonatomic; :-lgcc_cachemgr} \
+    %{mcache-size=128:-lgcc_cache128k; \
+      mcache-size=64 :-lgcc_cache64k; \
+      mcache-size=32 :-lgcc_cache32k; \
+      mcache-size=16 :-lgcc_cache16k; \
+      mcache-size=8  :-lgcc_cache8k; \
+                     :-lgcc_cache64k}"
diff --git a/gcc-4.9/gcc/config/spu/spu-modes.def b/gcc-4.9/gcc/config/spu/spu-modes.def
new file mode 100644
index 000000000..9b894ab52
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu-modes.def
@@ -0,0 +1,29 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 2);        /*                 V2QI */ 
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+        
+        
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */ 
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */ 
+        
+/* cse_insn needs an INT_MODE larger than WORD_MODE, otherwise some
+   parts of it will go into an infinite loop. */
+INT_MODE (OI, 32);
diff --git a/gcc-4.9/gcc/config/spu/spu-protos.h b/gcc-4.9/gcc/config/spu/spu-protos.h
new file mode 100644
index 000000000..c5672b632
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu-protos.h
@@ -0,0 +1,93 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU_PROTOS_
+#define _SPU_PROTOS_
+
+extern void spu_cpu_cpp_builtins (struct cpp_reader * pfile);
+extern void builtin_define_std (const char *);
+extern void spu_c_common_override_options (void);
+extern int valid_subreg (rtx op);
+extern void spu_expand_extv (rtx * ops, int unsignedp);
+extern void spu_expand_insv (rtx * ops);
+extern int spu_expand_block_move (rtx * ops);
+extern void spu_emit_branch_or_set (int is_set, rtx cmp, rtx * operands);
+extern int spu_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
+extern HOST_WIDE_INT const_double_to_hwint (rtx x);
+extern void print_operand_address (FILE * file, register rtx addr);
+extern void print_operand (FILE * file, rtx x, int code);
+extern int spu_split_immediate (rtx * ops);
+extern int spu_saved_regs_size (void);
+extern int direct_return (void);
+extern void spu_expand_prologue (void);
+extern void spu_expand_epilogue (bool sibcall_p);
+extern rtx spu_return_addr (int count, rtx frame);
+
+#ifdef RTX_CODE
+extern rtx hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v);
+extern rtx spu_const (enum machine_mode mode, HOST_WIDE_INT val);
+extern rtx spu_const_from_ints (enum machine_mode mode, 
+			        int a, int b, int c, int d);
+extern rtx spu_float_const (const char *string,
+			    enum machine_mode mode);
+extern int immediate_load_p (rtx op, enum machine_mode mode);
+extern int logical_immediate_p (rtx op, enum machine_mode mode);
+extern int iohl_immediate_p (rtx op, enum machine_mode mode);
+extern int arith_immediate_p (rtx op, enum machine_mode mode,
+			      HOST_WIDE_INT low, HOST_WIDE_INT high);
+extern bool exp2_immediate_p (rtx op, enum machine_mode mode, int low,
+			      int high);
+extern int spu_constant_address_p (rtx x);
+extern bool spu_legitimate_constant_p (enum machine_mode, rtx);
+extern int spu_initial_elimination_offset (int from, int to);
+extern rtx spu_function_value (const_tree type, const_tree func);
+extern int spu_expand_mov (rtx * ops, enum machine_mode mode);
+extern int spu_split_load (rtx * ops);
+extern int spu_split_store (rtx * ops);
+extern int fsmbi_const_p (rtx x);
+extern int cpat_const_p (rtx x, enum machine_mode mode);
+extern rtx gen_cpat_const (rtx * ops);
+extern void constant_to_array (enum machine_mode mode, rtx x,
+			       unsigned char *arr);
+extern rtx array_to_constant (enum machine_mode mode, const unsigned char *arr);
+extern rtx spu_gen_exp2 (enum machine_mode mode, rtx x);
+extern void spu_allocate_stack (rtx op0, rtx op1);
+extern void spu_restore_stack_nonlocal (rtx op0, rtx op1);
+extern void spu_restore_stack_block (rtx op0, rtx op1);
+extern rtx spu_gen_subreg (enum machine_mode mode, rtx x);
+extern int spu_safe_dma(HOST_WIDE_INT channel);
+extern void spu_builtin_splats (rtx ops[]);
+extern void spu_builtin_extract (rtx ops[]);
+extern void spu_builtin_insert (rtx ops[]);
+extern void spu_builtin_promote (rtx ops[]);
+extern void spu_expand_sign_extend (rtx ops[]);
+extern void spu_expand_vector_init (rtx target, rtx vals);
+extern rtx spu_legitimize_reload_address (rtx, enum machine_mode, int, int);
+#endif /* RTX_CODE  */
+
+extern void spu_init_expanders (void);
+extern void spu_split_convert (rtx *);
+extern void spu_function_profiler (FILE *, int);
+
+/* spu-c.c */
+extern tree spu_resolve_overloaded_builtin (location_t, tree fndecl,
+    					    void *fnargs);
+extern rtx spu_expand_builtin (tree exp, rtx target, rtx subtarget,
+			       enum machine_mode mode, int ignore);
+extern rtx spu_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+#endif /* _SPU_PROTOS_  */
+
diff --git a/gcc-4.9/gcc/config/spu/spu.c b/gcc-4.9/gcc/config/spu/spu.c
new file mode 100644
index 000000000..302d7e06b
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu.c
@@ -0,0 +1,7349 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "output.h"
+#include "basic-block.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "sched-int.h"
+#include "params.h"
+#include "machmode.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "tm-constrs.h"
+#include "ddg.h"
+#include "sbitmap.h"
+#include "timevar.h"
+#include "df.h"
+#include "dumpfile.h"
+#include "cfgloop.h"
+
+/* Builtin types, data and prototypes. */
+
+enum spu_builtin_type_index
+{
+  SPU_BTI_END_OF_PARAMS,
+
+  /* We create new type nodes for these. */
+  SPU_BTI_V16QI,
+  SPU_BTI_V8HI,
+  SPU_BTI_V4SI,
+  SPU_BTI_V2DI,
+  SPU_BTI_V4SF,
+  SPU_BTI_V2DF,
+  SPU_BTI_UV16QI,
+  SPU_BTI_UV8HI,
+  SPU_BTI_UV4SI,
+  SPU_BTI_UV2DI,
+
+  /* A 16-byte type. (Implemented with V16QI_type_node) */
+  SPU_BTI_QUADWORD,
+
+  /* These all correspond to intSI_type_node */
+  SPU_BTI_7,
+  SPU_BTI_S7,
+  SPU_BTI_U7,
+  SPU_BTI_S10,
+  SPU_BTI_S10_4,
+  SPU_BTI_U14,
+  SPU_BTI_16,
+  SPU_BTI_S16,
+  SPU_BTI_S16_2,
+  SPU_BTI_U16,
+  SPU_BTI_U16_2,
+  SPU_BTI_U18,
+
+  /* These correspond to the standard types */
+  SPU_BTI_INTQI, 
+  SPU_BTI_INTHI, 
+  SPU_BTI_INTSI, 
+  SPU_BTI_INTDI, 
+
+  SPU_BTI_UINTQI,
+  SPU_BTI_UINTHI,
+  SPU_BTI_UINTSI,
+  SPU_BTI_UINTDI,
+
+  SPU_BTI_FLOAT, 
+  SPU_BTI_DOUBLE,
+
+  SPU_BTI_VOID,   
+  SPU_BTI_PTR,   
+
+  SPU_BTI_MAX
+};
+
+#define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
+#define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
+#define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
+#define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
+#define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
+#define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
+#define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
+#define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
+#define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
+#define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
+
+static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
+
+struct spu_builtin_range
+{
+  int low, high;
+};
+
+static struct spu_builtin_range spu_builtin_range[] = {
+  {-0x40ll, 0x7fll},		/* SPU_BTI_7     */
+  {-0x40ll, 0x3fll},		/* SPU_BTI_S7    */
+  {0ll, 0x7fll},		/* SPU_BTI_U7    */
+  {-0x200ll, 0x1ffll},		/* SPU_BTI_S10   */
+  {-0x2000ll, 0x1fffll},	/* SPU_BTI_S10_4 */
+  {0ll, 0x3fffll},		/* SPU_BTI_U14   */
+  {-0x8000ll, 0xffffll},	/* SPU_BTI_16    */
+  {-0x8000ll, 0x7fffll},	/* SPU_BTI_S16   */
+  {-0x20000ll, 0x1ffffll},	/* SPU_BTI_S16_2 */
+  {0ll, 0xffffll},		/* SPU_BTI_U16   */
+  {0ll, 0x3ffffll},		/* SPU_BTI_U16_2 */
+  {0ll, 0x3ffffll},		/* SPU_BTI_U18   */
+};
+
+
+/*  Target specific attribute specifications.  */
+char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
+
+/*  Prototypes and external defs.  */
+static int get_pipe (rtx insn);
+static int spu_naked_function_p (tree func);
+static int mem_is_padded_component_ref (rtx x);
+static void fix_range (const char *);
+static rtx spu_expand_load (rtx, rtx, rtx, int);
+
+/* Which instruction set architecture to use.  */
+int spu_arch;
+/* Which cpu are we tuning for.  */
+int spu_tune;
+
+/* The hardware requires 8 insns between a hint and the branch it
+   effects.  This variable describes how many rtl instructions the
+   compiler needs to see before inserting a hint, and then the compiler
+   will insert enough nops to make it at least 8 insns.  The default is
+   for the compiler to allow up to 2 nops be emitted.  The nops are
+   inserted in pairs, so we round down. */
+int spu_hint_dist = (8*4) - (2*4);
+
+enum spu_immediate {
+  SPU_NONE,
+  SPU_IL,
+  SPU_ILA,
+  SPU_ILH,
+  SPU_ILHU,
+  SPU_ORI,
+  SPU_ORHI,
+  SPU_ORBI,
+  SPU_IOHL
+};
+enum immediate_class
+{
+  IC_POOL,			/* constant pool */
+  IC_IL1,			/* one il* instruction */
+  IC_IL2,			/* both ilhu and iohl instructions */
+  IC_IL1s,			/* one il* instruction */
+  IC_IL2s,			/* both ilhu and iohl instructions */
+  IC_FSMBI,			/* the fsmbi instruction */
+  IC_CPAT,			/* one of the c*d instructions */
+  IC_FSMBI2			/* fsmbi plus 1 other instruction */
+};
+
+static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
+static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
+static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
+static enum immediate_class classify_immediate (rtx op,
+						enum machine_mode mode);
+
+/* Pointer mode for __ea references.  */
+#define EAmode (spu_ea_model != 32 ? DImode : SImode)
+
+
+/* Define the structure for the machine field in struct function.  */
+struct GTY(()) machine_function
+{
+  /* Register to use for PIC accesses.  */
+  rtx pic_reg;
+};
+
+/* How to allocate a 'struct machine_function'.  */
+static struct machine_function *
+spu_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+static void
+spu_option_override (void)
+{
+  /* Set up function hooks.  */
+  init_machine_status = spu_init_machine_status;
+
+  /* Small loops will be unpeeled at -O3.  For SPU it is more important
+     to keep code small by default.  */
+  if (!flag_unroll_loops && !flag_peel_loops)
+    maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+
+  flag_omit_frame_pointer = 1;
+
+  /* Functions must be 8 byte aligned so we correctly handle dual issue */
+  if (align_functions < 8)
+    align_functions = 8;
+
+  spu_hint_dist = 8*4 - spu_max_nops*4;
+  if (spu_hint_dist < 0) 
+    spu_hint_dist = 0;
+
+  if (spu_fixed_range_string)
+    fix_range (spu_fixed_range_string);
+
+  /* Determine processor architectural level.  */
+  if (spu_arch_string)
+    {
+      if (strcmp (&spu_arch_string[0], "cell") == 0)
+        spu_arch = PROCESSOR_CELL;
+      else if (strcmp (&spu_arch_string[0], "celledp") == 0)
+        spu_arch = PROCESSOR_CELLEDP;
+      else
+        error ("bad value (%s) for -march= switch", spu_arch_string);
+    }
+
+  /* Determine processor to tune for.  */
+  if (spu_tune_string)
+    {
+      if (strcmp (&spu_tune_string[0], "cell") == 0)
+        spu_tune = PROCESSOR_CELL;
+      else if (strcmp (&spu_tune_string[0], "celledp") == 0)
+        spu_tune = PROCESSOR_CELLEDP;
+      else
+        error ("bad value (%s) for -mtune= switch", spu_tune_string);
+    }
+
+  /* Change defaults according to the processor architecture.  */
+  if (spu_arch == PROCESSOR_CELLEDP)
+    {
+      /* If no command line option has been otherwise specified, change
+	 the default to -mno-safe-hints on celledp -- only the original
+	 Cell/B.E. processors require this workaround.  */
+      if (!(target_flags_explicit & MASK_SAFE_HINTS))
+	target_flags &= ~MASK_SAFE_HINTS;
+    }
+
+  REAL_MODE_FORMAT (SFmode) = &spu_single_format;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+
+/* True if MODE is valid for the target.  By "valid", we mean able to
+   be manipulated in non-trivial ways.  In particular, this means all
+   the arithmetic is supported.  */
+static bool
+spu_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case SFmode:
+    case DImode:
+    case TImode:
+    case DFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Similarly for vector modes.  "Supported" here is less strict.  At
+   least some operations are supported; need to check optabs or builtins
+   for further details.  */
+static bool
+spu_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V4SFmode:
+    case V2DFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
+   least significant bytes of the outer mode.  This function returns
+   TRUE for the SUBREG's where this is correct.  */
+int
+valid_subreg (rtx op)
+{
+  enum machine_mode om = GET_MODE (op);
+  enum machine_mode im = GET_MODE (SUBREG_REG (op));
+  return om != VOIDmode && im != VOIDmode
+    && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
+	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
+	|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
+}
+
+/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
+   and adjust the start offset.  */
+static rtx
+adjust_operand (rtx op, HOST_WIDE_INT * start)
+{
+  enum machine_mode mode;
+  int op_size;
+  /* Strip any paradoxical SUBREG.  */
+  if (GET_CODE (op) == SUBREG
+      && (GET_MODE_BITSIZE (GET_MODE (op))
+	  > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
+    {
+      if (start)
+	*start -=
+	  GET_MODE_BITSIZE (GET_MODE (op)) -
+	  GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
+      op = SUBREG_REG (op);
+    }
+  /* If it is smaller than SI, assure a SUBREG */
+  op_size = GET_MODE_BITSIZE (GET_MODE (op));
+  if (op_size < 32)
+    {
+      if (start)
+	*start += 32 - op_size;
+      op_size = 32;
+    }
+  /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
+  mode = mode_for_size (op_size, MODE_INT, 0);
+  if (mode != GET_MODE (op))
+    op = gen_rtx_SUBREG (mode, op, 0);
+  return op;
+}
+
+void
+spu_expand_extv (rtx ops[], int unsignedp)
+{
+  rtx dst = ops[0], src = ops[1];
+  HOST_WIDE_INT width = INTVAL (ops[2]);
+  HOST_WIDE_INT start = INTVAL (ops[3]);
+  HOST_WIDE_INT align_mask;
+  rtx s0, s1, mask, r0;
+
+  gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
+
+  if (MEM_P (src))
+    {
+      /* First, determine if we need 1 TImode load or 2.  We need only 1
+         if the bits being extracted do not cross the alignment boundary
+         as determined by the MEM and its address. */
+
+      align_mask = -MEM_ALIGN (src);
+      if ((start & align_mask) == ((start + width - 1) & align_mask))
+	{
+	  /* Alignment is sufficient for 1 load. */
+	  s0 = gen_reg_rtx (TImode);
+	  r0 = spu_expand_load (s0, 0, src, start / 8);
+	  start &= 7;
+	  if (r0)
+	    emit_insn (gen_rotqby_ti (s0, s0, r0));
+	}
+      else
+	{
+	  /* Need 2 loads. */
+	  s0 = gen_reg_rtx (TImode);
+	  s1 = gen_reg_rtx (TImode);
+	  r0 = spu_expand_load (s0, s1, src, start / 8);
+	  start &= 7;
+
+	  gcc_assert (start + width <= 128);
+	  if (r0)
+	    {
+	      rtx r1 = gen_reg_rtx (SImode);
+	      mask = gen_reg_rtx (TImode);
+	      emit_move_insn (mask, GEN_INT (-1));
+	      emit_insn (gen_rotqby_ti (s0, s0, r0));
+	      emit_insn (gen_rotqby_ti (s1, s1, r0));
+	      if (GET_CODE (r0) == CONST_INT)
+		r1 = GEN_INT (INTVAL (r0) & 15);
+	      else
+		emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
+	      emit_insn (gen_shlqby_ti (mask, mask, r1));
+	      emit_insn (gen_selb (s0, s1, s0, mask));
+	    }
+	}
+
+    }
+  else if (GET_CODE (src) == SUBREG)
+    {
+      rtx r = SUBREG_REG (src);
+      gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
+      s0 = gen_reg_rtx (TImode);
+      if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
+	emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
+      else
+	emit_move_insn (s0, src);
+    }
+  else 
+    {
+      gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
+      s0 = gen_reg_rtx (TImode);
+      emit_move_insn (s0, src);
+    }
+
+  /* Now s0 is TImode and contains the bits to extract at start. */
+
+  if (start)
+    emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
+
+  if (128 - width)
+    s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
+
+  emit_move_insn (dst, s0);
+}
+
+void
+spu_expand_insv (rtx ops[])
+{
+  HOST_WIDE_INT width = INTVAL (ops[1]);
+  HOST_WIDE_INT start = INTVAL (ops[2]);
+  HOST_WIDE_INT maskbits;
+  enum machine_mode dst_mode;
+  rtx dst = ops[0], src = ops[3];
+  int dst_size;
+  rtx mask;
+  rtx shift_reg;
+  int shift;
+
+
+  if (GET_CODE (ops[0]) == MEM)
+    dst = gen_reg_rtx (TImode);
+  else
+    dst = adjust_operand (dst, &start);
+  dst_mode = GET_MODE (dst);
+  dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
+
+  if (CONSTANT_P (src))
+    {
+      enum machine_mode m =
+	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
+      src = force_reg (m, convert_to_mode (m, src, 0));
+    }
+  src = adjust_operand (src, 0);
+
+  mask = gen_reg_rtx (dst_mode);
+  shift_reg = gen_reg_rtx (dst_mode);
+  shift = dst_size - start - width;
+
+  /* It's not safe to use subreg here because the compiler assumes
+     that the SUBREG_REG is right justified in the SUBREG. */
+  convert_move (shift_reg, src, 1);
+
+  if (shift > 0)
+    {
+      switch (dst_mode)
+	{
+	case SImode:
+	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
+	  break;
+	case DImode:
+	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
+	  break;
+	case TImode:
+	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  else if (shift < 0)
+    abort ();
+
+  switch (dst_size)
+    {
+    case 32:
+      maskbits = (-1ll << (32 - width - start));
+      if (start)
+	maskbits += (1ll << (32 - start));
+      emit_move_insn (mask, GEN_INT (maskbits));
+      break;
+    case 64:
+      maskbits = (-1ll << (64 - width - start));
+      if (start)
+	maskbits += (1ll << (64 - start));
+      emit_move_insn (mask, GEN_INT (maskbits));
+      break;
+    case 128:
+      {
+	unsigned char arr[16];
+	int i = start / 8;
+	memset (arr, 0, sizeof (arr));
+	arr[i] = 0xff >> (start & 7);
+	for (i++; i <= (start + width - 1) / 8; i++)
+	  arr[i] = 0xff;
+	arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
+	emit_move_insn (mask, array_to_constant (TImode, arr));
+      }
+      break;
+    default:
+      abort ();
+    }
+  if (GET_CODE (ops[0]) == MEM)
+    {
+      rtx low = gen_reg_rtx (SImode);
+      rtx rotl = gen_reg_rtx (SImode);
+      rtx mask0 = gen_reg_rtx (TImode);
+      rtx addr;
+      rtx addr0;
+      rtx addr1;
+      rtx mem;
+
+      addr = force_reg (Pmode, XEXP (ops[0], 0));
+      addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
+      emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
+      emit_insn (gen_negsi2 (rotl, low));
+      emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
+      emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
+      mem = change_address (ops[0], TImode, addr0);
+      set_mem_alias_set (mem, 0);
+      emit_move_insn (dst, mem);
+      emit_insn (gen_selb (dst, dst, shift_reg, mask0));
+      if (start + width > MEM_ALIGN (ops[0]))
+	{
+	  rtx shl = gen_reg_rtx (SImode);
+	  rtx mask1 = gen_reg_rtx (TImode);
+	  rtx dst1 = gen_reg_rtx (TImode);
+	  rtx mem1;
+	  addr1 = plus_constant (Pmode, addr, 16);
+	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
+	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
+	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
+	  mem1 = change_address (ops[0], TImode, addr1);
+	  set_mem_alias_set (mem1, 0);
+	  emit_move_insn (dst1, mem1);
+	  emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
+	  emit_move_insn (mem1, dst1);
+	}
+      emit_move_insn (mem, dst);
+    }
+  else
+    emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
+}
+
+
+int
+spu_expand_block_move (rtx ops[])
+{
+  HOST_WIDE_INT bytes, align, offset;
+  rtx src, dst, sreg, dreg, target;
+  int i;
+  if (GET_CODE (ops[2]) != CONST_INT
+      || GET_CODE (ops[3]) != CONST_INT
+      || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
+    return 0;
+
+  bytes = INTVAL (ops[2]);
+  align = INTVAL (ops[3]);
+
+  if (bytes <= 0)
+    return 1;
+
+  dst = ops[0];
+  src = ops[1];
+
+  if (align == 16)
+    {
+      for (offset = 0; offset + 16 <= bytes; offset += 16)
+	{
+	  dst = adjust_address (ops[0], V16QImode, offset);
+	  src = adjust_address (ops[1], V16QImode, offset);
+	  emit_move_insn (dst, src);
+	}
+      if (offset < bytes)
+	{
+	  rtx mask;
+	  unsigned char arr[16] = { 0 };
+	  for (i = 0; i < bytes - offset; i++)
+	    arr[i] = 0xff;
+	  dst = adjust_address (ops[0], V16QImode, offset);
+	  src = adjust_address (ops[1], V16QImode, offset);
+	  mask = gen_reg_rtx (V16QImode);
+	  sreg = gen_reg_rtx (V16QImode);
+	  dreg = gen_reg_rtx (V16QImode);
+	  target = gen_reg_rtx (V16QImode);
+	  emit_move_insn (mask, array_to_constant (V16QImode, arr));
+	  emit_move_insn (dreg, dst);
+	  emit_move_insn (sreg, src);
+	  emit_insn (gen_selb (target, dreg, sreg, mask));
+	  emit_move_insn (dst, target);
+	}
+      return 1;
+    }
+  return 0;
+}
+
+enum spu_comp_code
+{ SPU_EQ, SPU_GT, SPU_GTU };
+
+int spu_comp_icode[12][3] = {
+ {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
+ {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
+ {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
+ {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
+ {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
+ {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
+ {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
+ {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
+ {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
+ {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
+ {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
+ {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
+};
+
+/* Generate a compare for CODE.  Return a brand-new rtx that represents
+   the result of the compare.   GCC can figure this out too if we don't
+   provide all variations of compares, but GCC always wants to use
+   WORD_MODE, we can generate better code in most cases if we do it
+   ourselves.  */
+void
+spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
+{
+  int reverse_compare = 0;
+  int reverse_test = 0;
+  rtx compare_result, eq_result;
+  rtx comp_rtx, eq_rtx;
+  enum machine_mode comp_mode;
+  enum machine_mode op_mode;
+  enum spu_comp_code scode, eq_code;
+  enum insn_code ior_code;
+  enum rtx_code code = GET_CODE (cmp);
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+  int index;
+  int eq_test = 0;
+
+  /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
+     and so on, to keep the constant in operand 1. */
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (op1) - 1;
+      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
+	switch (code)
+	  {
+	  case GE:
+	    op1 = GEN_INT (val);
+	    code = GT;
+	    break;
+	  case LT:
+	    op1 = GEN_INT (val);
+	    code = LE;
+	    break;
+	  case GEU:
+	    op1 = GEN_INT (val);
+	    code = GTU;
+	    break;
+	  case LTU:
+	    op1 = GEN_INT (val);
+	    code = LEU;
+	    break;
+	  default:
+	    break;
+	  }
+    }
+
+  /* However, if we generate an integer result, performing a reverse test
+     would require an extra negation, so avoid that where possible.  */
+  if (GET_CODE (op1) == CONST_INT && is_set == 1)
+    {
+      HOST_WIDE_INT val = INTVAL (op1) + 1;
+      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
+	switch (code)
+	  {
+	  case LE:
+	    op1 = GEN_INT (val);
+	    code = LT;
+	    break;
+	  case LEU:
+	    op1 = GEN_INT (val);
+	    code = LTU;
+	    break;
+	  default:
+	    break;
+	  }
+    }
+
+  comp_mode = SImode;
+  op_mode = GET_MODE (op0);
+
+  switch (code)
+    {
+    case GE:
+      scode = SPU_GT;
+      if (HONOR_NANS (op_mode))
+	{
+	  reverse_compare = 0;
+	  reverse_test = 0;
+	  eq_test = 1;
+	  eq_code = SPU_EQ;
+	}
+      else
+	{
+	  reverse_compare = 1;
+	  reverse_test = 1;
+	}
+      break;
+    case LE:
+      scode = SPU_GT;
+      if (HONOR_NANS (op_mode))
+	{
+	  reverse_compare = 1;
+	  reverse_test = 0;
+	  eq_test = 1;
+	  eq_code = SPU_EQ;
+	}
+      else
+	{
+	  reverse_compare = 0;
+	  reverse_test = 1;
+	}
+      break;
+    case LT:
+      reverse_compare = 1;
+      reverse_test = 0;
+      scode = SPU_GT;
+      break;
+    case GEU:
+      reverse_compare = 1;
+      reverse_test = 1;
+      scode = SPU_GTU;
+      break;
+    case LEU:
+      reverse_compare = 0;
+      reverse_test = 1;
+      scode = SPU_GTU;
+      break;
+    case LTU:
+      reverse_compare = 1;
+      reverse_test = 0;
+      scode = SPU_GTU;
+      break;
+    case NE:
+      reverse_compare = 0;
+      reverse_test = 1;
+      scode = SPU_EQ;
+      break;
+
+    case EQ:
+      scode = SPU_EQ;
+      break;
+    case GT:
+      scode = SPU_GT;
+      break;
+    case GTU:
+      scode = SPU_GTU;
+      break;
+    default:
+      scode = SPU_EQ;
+      break;
+    }
+
+  switch (op_mode)
+    {
+    case QImode:
+      index = 0;
+      comp_mode = QImode;
+      break;
+    case HImode:
+      index = 1;
+      comp_mode = HImode;
+      break;
+    case SImode:
+      index = 2;
+      break;
+    case DImode:
+      index = 3;
+      break;
+    case TImode:
+      index = 4;
+      break;
+    case SFmode:
+      index = 5;
+      break;
+    case DFmode:
+      index = 6;
+      break;
+    case V16QImode:
+      index = 7;
+      comp_mode = op_mode;
+      break;
+    case V8HImode:
+      index = 8;
+      comp_mode = op_mode;
+      break;
+    case V4SImode:
+      index = 9;
+      comp_mode = op_mode;
+      break;
+    case V4SFmode:
+      index = 10;
+      comp_mode = V4SImode;
+      break;
+    case V2DFmode:
+      index = 11;
+      comp_mode = V2DImode;
+      break;
+    case V2DImode:
+    default:
+      abort ();
+    }
+
+  if (GET_MODE (op1) == DFmode
+      && (scode != SPU_GT && scode != SPU_EQ))
+    abort ();
+
+  if (is_set == 0 && op1 == const0_rtx
+      && (GET_MODE (op0) == SImode
+	  || GET_MODE (op0) == HImode
+	  || GET_MODE (op0) == QImode) && scode == SPU_EQ)
+    {
+      /* Don't need to set a register with the result when we are 
+         comparing against zero and branching. */
+      reverse_test = !reverse_test;
+      compare_result = op0;
+    }
+  else
+    {
+      compare_result = gen_reg_rtx (comp_mode);
+
+      if (reverse_compare)
+	{
+	  rtx t = op1;
+	  op1 = op0;
+	  op0 = t;
+	}
+
+      if (spu_comp_icode[index][scode] == 0)
+	abort ();
+
+      if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
+	  (op0, op_mode))
+	op0 = force_reg (op_mode, op0);
+      if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
+	  (op1, op_mode))
+	op1 = force_reg (op_mode, op1);
+      comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
+							 op0, op1);
+      if (comp_rtx == 0)
+	abort ();
+      emit_insn (comp_rtx);
+
+      if (eq_test)
+        {
+          eq_result = gen_reg_rtx (comp_mode);
+          eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
+							     op0, op1);
+          if (eq_rtx == 0)
+	    abort ();
+          emit_insn (eq_rtx);
+          ior_code = optab_handler (ior_optab, comp_mode);
+          gcc_assert (ior_code != CODE_FOR_nothing);
+          emit_insn (GEN_FCN (ior_code)
+		     (compare_result, compare_result, eq_result));
+        }
+    }
+
+  if (is_set == 0)
+    {
+      rtx bcomp;
+      rtx loc_ref;
+
+      /* We don't have branch on QI compare insns, so we convert the
+         QI compare result to a HI result. */
+      if (comp_mode == QImode)
+	{
+	  rtx old_res = compare_result;
+	  compare_result = gen_reg_rtx (HImode);
+	  comp_mode = HImode;
+	  emit_insn (gen_extendqihi2 (compare_result, old_res));
+	}
+
+      if (reverse_test)
+	bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
+      else
+	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
+
+      loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+							 loc_ref, pc_rtx)));
+    }
+  else if (is_set == 2)
+    {
+      rtx target = operands[0];
+      int compare_size = GET_MODE_BITSIZE (comp_mode);
+      int target_size = GET_MODE_BITSIZE (GET_MODE (target));
+      enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
+      rtx select_mask;
+      rtx op_t = operands[2];
+      rtx op_f = operands[3];
+
+      /* The result of the comparison can be SI, HI or QI mode.  Create a
+         mask based on that result. */
+      if (target_size > compare_size)
+	{
+	  select_mask = gen_reg_rtx (mode);
+	  emit_insn (gen_extend_compare (select_mask, compare_result));
+	}
+      else if (target_size < compare_size)
+	select_mask =
+	  gen_rtx_SUBREG (mode, compare_result,
+			  (compare_size - target_size) / BITS_PER_UNIT);
+      else if (comp_mode != mode)
+	select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
+      else
+	select_mask = compare_result;
+
+      if (GET_MODE (target) != GET_MODE (op_t)
+	  || GET_MODE (target) != GET_MODE (op_f))
+	abort ();
+
+      if (reverse_test)
+	emit_insn (gen_selb (target, op_t, op_f, select_mask));
+      else
+	emit_insn (gen_selb (target, op_f, op_t, select_mask));
+    }
+  else
+    {
+      rtx target = operands[0];
+      if (reverse_test)
+	emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+				gen_rtx_NOT (comp_mode, compare_result)));
+      if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
+	emit_insn (gen_extendhisi2 (target, compare_result));
+      else if (GET_MODE (target) == SImode
+	       && GET_MODE (compare_result) == QImode)
+	emit_insn (gen_extend_compare (target, compare_result));
+      else
+	emit_move_insn (target, compare_result);
+    }
+}
+
+HOST_WIDE_INT
+const_double_to_hwint (rtx x)
+{
+  HOST_WIDE_INT val;
+  REAL_VALUE_TYPE rv;
+  if (GET_MODE (x) == SFmode)
+    {
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+    }
+  else if (GET_MODE (x) == DFmode)
+    {
+      long l[2];
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+      val = l[0];
+      val = (val << 32) | (l[1] & 0xffffffff);
+    }
+  else
+    abort ();
+  return val;
+}
+
+rtx
+hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
+{
+  long tv[2];
+  REAL_VALUE_TYPE rv;
+  gcc_assert (mode == SFmode || mode == DFmode);
+
+  if (mode == SFmode)
+    tv[0] = (v << 32) >> 32;
+  else if (mode == DFmode)
+    {
+      tv[1] = (v << 32) >> 32;
+      tv[0] = v >> 32;
+    }
+  real_from_target (&rv, tv, mode);
+  return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
+}
+
+void
+print_operand_address (FILE * file, register rtx addr)
+{
+  rtx reg;
+  rtx offset;
+
+  if (GET_CODE (addr) == AND
+      && GET_CODE (XEXP (addr, 1)) == CONST_INT
+      && INTVAL (XEXP (addr, 1)) == -16)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
+      break;
+
+    case PLUS:
+      reg = XEXP (addr, 0);
+      offset = XEXP (addr, 1);
+      if (GET_CODE (offset) == REG)
+	{
+	  fprintf (file, "%s,%s", reg_names[REGNO (reg)],
+		   reg_names[REGNO (offset)]);
+	}
+      else if (GET_CODE (offset) == CONST_INT)
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
+		   INTVAL (offset), reg_names[REGNO (reg)]);
+	}
+      else
+	abort ();
+      break;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_INT:
+      output_addr_const (file, addr);
+      break;
+
+    default:
+      debug_rtx (addr);
+      abort ();
+    }
+}
+
+void
+print_operand (FILE * file, rtx x, int code)
+{
+  enum machine_mode mode = GET_MODE (x);
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int xcode = GET_CODE (x);
+  int i, info;
+  if (GET_MODE (x) == VOIDmode)
+    switch (code)
+      {
+      case 'L':			/* 128 bits, signed */
+      case 'm':			/* 128 bits, signed */
+      case 'T':			/* 128 bits, signed */
+      case 't':			/* 128 bits, signed */
+	mode = TImode;
+	break;
+      case 'K':			/* 64 bits, signed */
+      case 'k':			/* 64 bits, signed */
+      case 'D':			/* 64 bits, signed */
+      case 'd':			/* 64 bits, signed */
+	mode = DImode;
+	break;
+      case 'J':			/* 32 bits, signed */
+      case 'j':			/* 32 bits, signed */
+      case 's':			/* 32 bits, signed */
+      case 'S':			/* 32 bits, signed */
+	mode = SImode;
+	break;
+      }
+  switch (code)
+    {
+
+    case 'j':			/* 32 bits, signed */
+    case 'k':			/* 64 bits, signed */
+    case 'm':			/* 128 bits, signed */
+      if (xcode == CONST_INT
+	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
+	{
+	  gcc_assert (logical_immediate_p (x, mode));
+	  constant_to_array (mode, x, arr);
+	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	  val = trunc_int_for_mode (val, SImode);
+	  switch (which_logical_immediate (val))
+	  {
+	  case SPU_ORI:
+	    break;
+	  case SPU_ORHI:
+	    fprintf (file, "h");
+	    break;
+	  case SPU_ORBI:
+	    fprintf (file, "b");
+	    break;
+	  default:
+	    gcc_unreachable();
+	  }
+	}
+      else
+	gcc_unreachable();
+      return;
+
+    case 'J':			/* 32 bits, signed */
+    case 'K':			/* 64 bits, signed */
+    case 'L':			/* 128 bits, signed */
+      if (xcode == CONST_INT
+	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
+	{
+	  gcc_assert (logical_immediate_p (x, mode)
+		      || iohl_immediate_p (x, mode));
+	  constant_to_array (mode, x, arr);
+	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	  val = trunc_int_for_mode (val, SImode);
+	  switch (which_logical_immediate (val))
+	  {
+	  case SPU_ORI:
+	  case SPU_IOHL:
+	    break;
+	  case SPU_ORHI:
+	    val = trunc_int_for_mode (val, HImode);
+	    break;
+	  case SPU_ORBI:
+	    val = trunc_int_for_mode (val, QImode);
+	    break;
+	  default:
+	    gcc_unreachable();
+	  }
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+	}
+      else
+	gcc_unreachable();
+      return;
+
+    case 't':			/* 128 bits, signed */
+    case 'd':			/* 64 bits, signed */
+    case 's':			/* 32 bits, signed */
+      if (CONSTANT_P (x))
+	{
+	  enum immediate_class c = classify_immediate (x, mode);
+	  switch (c)
+	    {
+	    case IC_IL1:
+	      constant_to_array (mode, x, arr);
+	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	      val = trunc_int_for_mode (val, SImode);
+	      switch (which_immediate_load (val))
+		{
+		case SPU_IL:
+		  break;
+		case SPU_ILA:
+		  fprintf (file, "a");
+		  break;
+		case SPU_ILH:
+		  fprintf (file, "h");
+		  break;
+		case SPU_ILHU:
+		  fprintf (file, "hu");
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      break;
+	    case IC_CPAT:
+	      constant_to_array (mode, x, arr);
+	      cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
+	      if (info == 1)
+		fprintf (file, "b");
+	      else if (info == 2)
+		fprintf (file, "h");
+	      else if (info == 4)
+		fprintf (file, "w");
+	      else if (info == 8)
+		fprintf (file, "d");
+	      break;
+	    case IC_IL1s:
+	      if (xcode == CONST_VECTOR)
+		{
+		  x = CONST_VECTOR_ELT (x, 0);
+		  xcode = GET_CODE (x);
+		}
+	      if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
+		fprintf (file, "a");
+	      else if (xcode == HIGH)
+		fprintf (file, "hu");
+	      break;
+	    case IC_FSMBI:
+	    case IC_FSMBI2:
+	    case IC_IL2:
+	    case IC_IL2s:
+	    case IC_POOL:
+	      abort ();
+	    }
+	}
+      else
+	gcc_unreachable ();
+      return;
+
+    case 'T':			/* 128 bits, signed */
+    case 'D':			/* 64 bits, signed */
+    case 'S':			/* 32 bits, signed */
+      if (CONSTANT_P (x))
+	{
+	  enum immediate_class c = classify_immediate (x, mode);
+	  switch (c)
+	    {
+	    case IC_IL1:
+	      constant_to_array (mode, x, arr);
+	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	      val = trunc_int_for_mode (val, SImode);
+	      switch (which_immediate_load (val))
+		{
+		case SPU_IL:
+		case SPU_ILA:
+		  break;
+		case SPU_ILH:
+		case SPU_ILHU:
+		  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+	      break;
+	    case IC_FSMBI:
+	      constant_to_array (mode, x, arr);
+	      val = 0;
+	      for (i = 0; i < 16; i++)
+		{
+		  val <<= 1;
+		  val |= arr[i] & 1;
+		}
+	      print_operand (file, GEN_INT (val), 0);
+	      break;
+	    case IC_CPAT:
+	      constant_to_array (mode, x, arr);
+	      cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
+	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
+	      break;
+	    case IC_IL1s:
+	      if (xcode == HIGH)
+		x = XEXP (x, 0);
+	      if (GET_CODE (x) == CONST_VECTOR)
+		x = CONST_VECTOR_ELT (x, 0);
+	      output_addr_const (file, x);
+	      if (xcode == HIGH)
+		fprintf (file, "@h");
+	      break;
+	    case IC_IL2:
+	    case IC_IL2s:
+	    case IC_FSMBI2:
+	    case IC_POOL:
+	      abort ();
+	    }
+	}
+      else
+	gcc_unreachable ();
+      return;
+
+    case 'C':
+      if (xcode == CONST_INT)
+	{
+	  /* Only 4 least significant bits are relevant for generate
+	     control word instructions. */
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
+	  return;
+	}
+      break;
+
+    case 'M':			/* print code for c*d */
+      if (GET_CODE (x) == CONST_INT)
+	switch (INTVAL (x))
+	  {
+	  case 1:
+	    fprintf (file, "b");
+	    break;
+	  case 2:
+	    fprintf (file, "h");
+	    break;
+	  case 4:
+	    fprintf (file, "w");
+	    break;
+	  case 8:
+	    fprintf (file, "d");
+	    break;
+	  default:
+	    gcc_unreachable();
+	  }
+      else
+	gcc_unreachable();
+      return;
+
+    case 'N':			/* Negate the operand */
+      if (xcode == CONST_INT)
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
+      else if (xcode == CONST_VECTOR)
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 -INTVAL (CONST_VECTOR_ELT (x, 0)));
+      return;
+
+    case 'I':			/* enable/disable interrupts */
+      if (xcode == CONST_INT)
+	fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
+      return;
+
+    case 'b':			/* branch modifiers */
+      if (xcode == REG)
+	fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
+      else if (COMPARISON_P (x))
+	fprintf (file, "%s", xcode == NE ? "n" : "");
+      return;
+
+    case 'i':			/* indirect call */
+      if (xcode == MEM)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == REG)
+	    /* Used in indirect function calls. */
+	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
+	  else
+	    output_address (XEXP (x, 0));
+	}
+      return;
+
+    case 'p':			/* load/store */
+      if (xcode == MEM)
+	{
+	  x = XEXP (x, 0);
+	  xcode = GET_CODE (x);
+	}
+      if (xcode == AND)
+	{
+	  x = XEXP (x, 0);
+	  xcode = GET_CODE (x);
+	}
+      if (xcode == REG)
+	fprintf (file, "d");
+      else if (xcode == CONST_INT)
+	fprintf (file, "a");
+      else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
+	fprintf (file, "r");
+      else if (xcode == PLUS || xcode == LO_SUM)
+	{
+	  if (GET_CODE (XEXP (x, 1)) == REG)
+	    fprintf (file, "x");
+	  else
+	    fprintf (file, "d");
+	}
+      return;
+
+    case 'e':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val &= 0x7;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'f':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val &= 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'g':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val &= 0x3f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'h':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = (val >> 3) & 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'E':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -val;
+      val &= 0x7;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'F':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -val;
+      val &= 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'G':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -val;
+      val &= 0x3f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'H':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -(val & -8ll);
+      val = (val >> 3) & 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'v':
+    case 'w':
+      constant_to_array (mode, x, arr);
+      val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
+      output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
+      return;
+
+    case 0:
+      if (xcode == REG)
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (xcode == MEM)
+	output_address (XEXP (x, 0));
+      else if (xcode == CONST_VECTOR)
+	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
+      else
+	output_addr_const (file, x);
+      return;
+
+      /* unused letters
+	              o qr  u   yz
+	AB            OPQR  UVWXYZ */
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+  gcc_unreachable ();
+}
+
+/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
+   caller saved register.  For leaf functions it is more efficient to
+   use a volatile register because we won't need to save and restore the
+   pic register.  This routine is only valid after register allocation
+   is completed, so we can pick an unused register.  */
+static rtx
+get_pic_reg (void)
+{
+  if (!reload_completed && !reload_in_progress)
+    abort ();
+
+  /* If we've already made the decision, we need to keep with it.  Once we've
+     decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
+     return true since the register is now live; this should not cause us to
+     "switch back" to using pic_offset_table_rtx.  */
+  if (!cfun->machine->pic_reg)
+    {
+      if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
+	cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
+      else
+	cfun->machine->pic_reg = pic_offset_table_rtx;
+    }
+
+  return cfun->machine->pic_reg;
+}
+
+/* Split constant addresses to handle cases that are too large. 
+   Add in the pic register when in PIC mode.
+   Split immediates that require more than 1 instruction. */
+int
+spu_split_immediate (rtx * ops)
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  enum immediate_class c = classify_immediate (ops[1], mode);
+
+  switch (c)
+    {
+    case IC_IL2:
+      {
+	unsigned char arrhi[16];
+	unsigned char arrlo[16];
+	rtx to, temp, hi, lo;
+	int i;
+	enum machine_mode imode = mode;
+	/* We need to do reals as ints because the constant used in the
+	   IOR might not be a legitimate real constant. */
+	imode = int_mode_for_mode (mode);
+	constant_to_array (mode, ops[1], arrhi);
+	if (imode != mode)
+	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
+	else
+	  to = ops[0];
+	temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
+	for (i = 0; i < 16; i += 4)
+	  {
+	    arrlo[i + 2] = arrhi[i + 2];
+	    arrlo[i + 3] = arrhi[i + 3];
+	    arrlo[i + 0] = arrlo[i + 1] = 0;
+	    arrhi[i + 2] = arrhi[i + 3] = 0;
+	  }
+	hi = array_to_constant (imode, arrhi);
+	lo = array_to_constant (imode, arrlo);
+	emit_move_insn (temp, hi);
+	emit_insn (gen_rtx_SET
+		   (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
+	return 1;
+      }
+    case IC_FSMBI2:
+      {
+	unsigned char arr_fsmbi[16];
+	unsigned char arr_andbi[16];
+	rtx to, reg_fsmbi, reg_and;
+	int i;
+	enum machine_mode imode = mode;
+	/* We need to do reals as ints because the constant used in the
+	 * AND might not be a legitimate real constant. */
+	imode = int_mode_for_mode (mode);
+	constant_to_array (mode, ops[1], arr_fsmbi);
+	if (imode != mode)
+	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
+	else
+	  to = ops[0];
+	for (i = 0; i < 16; i++)
+	  if (arr_fsmbi[i] != 0)
+	    {
+	      arr_andbi[0] = arr_fsmbi[i];
+	      arr_fsmbi[i] = 0xff;
+	    }
+	for (i = 1; i < 16; i++)
+	  arr_andbi[i] = arr_andbi[0];
+	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
+	reg_and = array_to_constant (imode, arr_andbi);
+	emit_move_insn (to, reg_fsmbi);
+	emit_insn (gen_rtx_SET
+		   (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
+	return 1;
+      }
+    case IC_POOL:
+      if (reload_in_progress || reload_completed)
+	{
+	  rtx mem = force_const_mem (mode, ops[1]);
+	  if (TARGET_LARGE_MEM)
+	    {
+	      rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
+	      emit_move_insn (addr, XEXP (mem, 0));
+	      mem = replace_equiv_address (mem, addr);
+	    }
+	  emit_move_insn (ops[0], mem);
+	  return 1;
+	}
+      break;
+    case IC_IL1s:
+    case IC_IL2s:
+      if (reload_completed && GET_CODE (ops[1]) != HIGH)
+	{
+	  if (c == IC_IL2s)
+	    {
+	      emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
+	      emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
+	    }
+	  else if (flag_pic)
+	    emit_insn (gen_pic (ops[0], ops[1]));
+	  if (flag_pic)
+	    {
+	      rtx pic_reg = get_pic_reg ();
+	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
+	    }
+	  return flag_pic || c == IC_IL2s;
+	}
+      break;
+    case IC_IL1:
+    case IC_FSMBI:
+    case IC_CPAT:
+      break;
+    }
+  return 0;
+}
+
+/* SAVING is TRUE when we are generating the actual load and store
+   instructions for REGNO.  When determining the size of the stack
+   needed for saving register we must allocate enough space for the
+   worst case, because we don't always have the information early enough
+   to not allocate it.  But we can at least eliminate the actual loads
+   and stores during the prologue/epilogue.  */
+static int
+need_to_save_reg (int regno, int saving)
+{
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return 1;
+  if (flag_pic
+      && regno == PIC_OFFSET_TABLE_REGNUM
+      && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
+    return 1;
+  return 0;
+}
+
+/* This function is only correct starting with local register
+   allocation */
+int
+spu_saved_regs_size (void)
+{
+  int reg_save_size = 0;
+  int regno;
+
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
+    if (need_to_save_reg (regno, 0))
+      reg_save_size += 0x10;
+  return reg_save_size;
+}
+
+static rtx
+frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
+{
+  rtx reg = gen_rtx_REG (V4SImode, regno);
+  rtx mem =
+    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
+  return emit_insn (gen_movv4si (mem, reg));
+}
+
+static rtx
+frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
+{
+  rtx reg = gen_rtx_REG (V4SImode, regno);
+  rtx mem =
+    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
+  return emit_insn (gen_movv4si (reg, mem));
+}
+
+/* This happens after reload, so we need to expand it.  */
+static rtx
+frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
+{
+  rtx insn;
+  if (satisfies_constraint_K (GEN_INT (imm)))
+    {
+      insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
+    }
+  else
+    {
+      emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
+      insn = emit_insn (gen_addsi3 (dst, src, scratch));
+      if (REGNO (src) == REGNO (scratch))
+	abort ();
+    }
+  return insn;
+}
+
+/* Return nonzero if this function is known to have a null epilogue.  */
+
+int
+direct_return (void)
+{
+  if (reload_completed)
+    {
+      if (cfun->static_chain_decl == 0
+	  && (spu_saved_regs_size ()
+	      + get_frame_size ()
+	      + crtl->outgoing_args_size
+	      + crtl->args.pretend_args_size == 0)
+	  && crtl->is_leaf)
+	return 1;
+    }
+  return 0;
+}
+
+/*
+   The stack frame looks like this:
+         +-------------+
+         |  incoming   | 
+         |    args     | 
+   AP -> +-------------+
+         | $lr save    |
+         +-------------+
+ prev SP | back chain  | 
+         +-------------+
+         |  var args   | 
+         |  reg save   | crtl->args.pretend_args_size bytes
+         +-------------+
+         |    ...      | 
+         | saved regs  | spu_saved_regs_size() bytes
+   FP -> +-------------+
+         |    ...      | 
+         |   vars      | get_frame_size()  bytes
+  HFP -> +-------------+
+         |    ...      | 
+         |  outgoing   | 
+         |    args     | crtl->outgoing_args_size bytes
+         +-------------+
+         | $lr of next |
+         |   frame     | 
+         +-------------+
+         | back chain  | 
+   SP -> +-------------+
+
+*/
+void
+spu_expand_prologue (void)
+{
+  HOST_WIDE_INT size = get_frame_size (), offset, regno;
+  HOST_WIDE_INT total_size;
+  HOST_WIDE_INT saved_regs_size;
+  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx scratch_reg_0, scratch_reg_1;
+  rtx insn, real;
+
+  if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
+    cfun->machine->pic_reg = pic_offset_table_rtx;
+
+  if (spu_naked_function_p (current_function_decl))
+    return;
+
+  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
+  scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
+
+  saved_regs_size = spu_saved_regs_size ();
+  total_size = size + saved_regs_size
+    + crtl->outgoing_args_size
+    + crtl->args.pretend_args_size;
+
+  if (!crtl->is_leaf
+      || cfun->calls_alloca || total_size > 0)
+    total_size += STACK_POINTER_OFFSET;
+
+  /* Save this first because code after this might use the link
+     register as a scratch register. */
+  if (!crtl->is_leaf)
+    {
+      insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (total_size > 0)
+    {
+      offset = -crtl->args.pretend_args_size;
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
+	if (need_to_save_reg (regno, 1))
+	  {
+	    offset -= 16;
+	    insn = frame_emit_store (regno, sp_reg, offset);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+    }
+
+  if (flag_pic && cfun->machine->pic_reg)
+    {
+      rtx pic_reg = cfun->machine->pic_reg;
+      insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
+      insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
+    }
+
+  if (total_size > 0)
+    {
+      if (flag_stack_check)
+	{
+	  /* We compare against total_size-1 because
+	     ($sp >= total_size) <=> ($sp > total_size-1) */
+	  rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
+	  rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
+	  rtx size_v4si = spu_const (V4SImode, total_size - 1);
+	  if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
+	    {
+	      emit_move_insn (scratch_v4si, size_v4si);
+	      size_v4si = scratch_v4si;
+	    }
+	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
+	  emit_insn (gen_vec_extractv4si
+		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
+	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
+	}
+
+      /* Adjust the stack pointer, and make sure scratch_reg_0 contains
+         the value of the previous $sp because we save it as the back
+         chain. */
+      if (total_size <= 2000)
+	{
+	  /* In this case we save the back chain first. */
+	  insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
+	  insn =
+	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
+	}
+      else
+	{
+	  insn = emit_move_insn (scratch_reg_0, sp_reg);
+	  insn =
+	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+      real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+
+      if (total_size > 2000)
+	{
+	  /* Save the back chain ptr */
+	  insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
+	}
+
+      if (frame_pointer_needed)
+	{
+	  rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
+	  HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
+	    + crtl->outgoing_args_size;
+	  /* Set the new frame_pointer */
+	  insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+          REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
+	}
+    }
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = total_size;
+}
+
+void
+spu_expand_epilogue (bool sibcall_p)
+{
+  int size = get_frame_size (), offset, regno;
+  HOST_WIDE_INT saved_regs_size, total_size;
+  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx scratch_reg_0;
+
+  if (spu_naked_function_p (current_function_decl))
+    return;
+
+  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
+
+  saved_regs_size = spu_saved_regs_size ();
+  total_size = size + saved_regs_size
+    + crtl->outgoing_args_size
+    + crtl->args.pretend_args_size;
+
+  if (!crtl->is_leaf
+      || cfun->calls_alloca || total_size > 0)
+    total_size += STACK_POINTER_OFFSET;
+
+  if (total_size > 0)
+    {
+      if (cfun->calls_alloca)
+	frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
+      else
+	frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
+
+
+      if (saved_regs_size > 0)
+	{
+	  offset = -crtl->args.pretend_args_size;
+	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
+	    if (need_to_save_reg (regno, 1))
+	      {
+		offset -= 0x10;
+		frame_emit_load (regno, sp_reg, offset);
+	      }
+	}
+    }
+
+  if (!crtl->is_leaf)
+    frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
+
+  if (!sibcall_p)
+    {
+      emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
+      emit_jump_insn (gen__return ());
+    }
+}
+
+rtx
+spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return 0;
+  /* This is inefficient because it ends up copying to a save-register
+     which then gets saved even though $lr has already been saved.  But
+     it does generate better code for leaf functions and we don't need
+     to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
+     used for __builtin_return_address anyway, so maybe we don't care if
+     it's inefficient. */
+  return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
+}
+
+
+/* Given VAL, generate a constant appropriate for MODE.
+   If MODE is a vector mode, every element will be VAL.
+   For TImode, VAL will be zero extended to 128 bits. */
+rtx
+spu_const (enum machine_mode mode, HOST_WIDE_INT val)
+{
+  rtx inner;
+  rtvec v;
+  int units, i;
+
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
+	      || GET_MODE_CLASS (mode) == MODE_FLOAT
+	      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
+
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    return immed_double_const (val, 0, mode);
+
+  /* val is the bit representation of the float */
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return hwint_to_const_double (mode, val);
+
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+    inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
+  else 
+    inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
+
+  units = GET_MODE_NUNITS (mode);
+
+  v = rtvec_alloc (units);
+
+  for (i = 0; i < units; ++i)
+    RTVEC_ELT (v, i) = inner;
+
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+/* Create a MODE vector constant from 4 ints. */
+rtx
+spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
+{
+  unsigned char arr[16];
+  arr[0] = (a >> 24) & 0xff;
+  arr[1] = (a >> 16) & 0xff;
+  arr[2] = (a >> 8) & 0xff;
+  arr[3] = (a >> 0) & 0xff;
+  arr[4] = (b >> 24) & 0xff;
+  arr[5] = (b >> 16) & 0xff;
+  arr[6] = (b >> 8) & 0xff;
+  arr[7] = (b >> 0) & 0xff;
+  arr[8] = (c >> 24) & 0xff;
+  arr[9] = (c >> 16) & 0xff;
+  arr[10] = (c >> 8) & 0xff;
+  arr[11] = (c >> 0) & 0xff;
+  arr[12] = (d >> 24) & 0xff;
+  arr[13] = (d >> 16) & 0xff;
+  arr[14] = (d >> 8) & 0xff;
+  arr[15] = (d >> 0) & 0xff;
+  return array_to_constant(mode, arr);
+}
+
+/* branch hint stuff */
+
+/* An array of these is used to propagate hints to predecessor blocks. */
+struct spu_bb_info
+{
+  rtx prop_jump; /* propagated from another block */
+  int bb_index;  /* the original block. */
+};
+static struct spu_bb_info *spu_bb_info;
+
+#define STOP_HINT_P(INSN) \
+		(CALL_P(INSN) \
+		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
+		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
+
+/* 1 when RTX is a hinted branch or its target.  We keep track of
+   what has been hinted so the safe-hint code can test it easily.  */
+#define HINTED_P(RTX)						\
+  (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
+
+/* 1 when RTX is an insn that must be scheduled on an even boundary. */
+#define SCHED_ON_EVEN_P(RTX)						\
+  (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
+
+/* Emit a nop for INSN such that the two will dual issue.  This assumes
+   INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
+   We check for TImode to handle a MULTI1 insn which has dual issued its
+   first instruction.  get_pipe returns -1 for MULTI0 or inline asm.  */
+static void
+emit_nop_for_insn (rtx insn)
+{
+  int p;
+  rtx new_insn;
+
+  /* We need to handle JUMP_TABLE_DATA separately.  */
+  if (JUMP_TABLE_DATA_P (insn))
+    {
+      new_insn = emit_insn_after (gen_lnop(), insn);
+      recog_memoized (new_insn);
+      INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
+      return;
+    }
+
+  p = get_pipe (insn);
+  if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
+    new_insn = emit_insn_after (gen_lnop (), insn);
+  else if (p == 1 && GET_MODE (insn) == TImode)
+    {
+      new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
+      PUT_MODE (new_insn, TImode);
+      PUT_MODE (insn, VOIDmode);
+    }
+  else
+    new_insn = emit_insn_after (gen_lnop (), insn);
+  recog_memoized (new_insn);
+  INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
+}
+
+/* Insert nops in basic blocks to meet dual issue alignment
+   requirements.  Also make sure hbrp and hint instructions are at least
+   one cycle apart, possibly inserting a nop.  */
+static void
+pad_bb(void)
+{
+  rtx insn, next_insn, prev_insn, hbr_insn = 0;
+  int length;
+  int addr;
+
+  /* This sets up INSN_ADDRESSES. */
+  shorten_branches (get_insns ());
+
+  /* Keep track of length added by nops. */
+  length = 0;
+
+  prev_insn = 0;
+  insn = get_insns ();
+  if (!active_insn_p (insn))
+    insn = next_active_insn (insn);
+  for (; insn; insn = next_insn)
+    {
+      next_insn = next_active_insn (insn);
+      if (INSN_CODE (insn) == CODE_FOR_iprefetch
+	  || INSN_CODE (insn) == CODE_FOR_hbr)
+	{
+	  if (hbr_insn)
+	    {
+	      int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
+	      int a1 = INSN_ADDRESSES (INSN_UID (insn));
+	      if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
+		  || (a1 - a0 == 4))
+		{
+		  prev_insn = emit_insn_before (gen_lnop (), insn);
+		  PUT_MODE (prev_insn, GET_MODE (insn));
+		  PUT_MODE (insn, TImode);
+		  INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
+		  length += 4;
+		}
+	    }
+	  hbr_insn = insn;
+	}
+      if (INSN_CODE (insn) == CODE_FOR_blockage)
+	{
+	  if (GET_MODE (insn) == TImode)
+	    PUT_MODE (next_insn, TImode);
+	  insn = next_insn;
+	  next_insn = next_active_insn (insn);
+	}
+      addr = INSN_ADDRESSES (INSN_UID (insn));
+      if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
+	{
+	  if (((addr + length) & 7) != 0)
+	    {
+	      emit_nop_for_insn (prev_insn);
+	      length += 4;
+	    }
+	}
+      else if (GET_MODE (insn) == TImode
+	       && ((next_insn && GET_MODE (next_insn) != TImode)
+		   || get_attr_type (insn) == TYPE_MULTI0)
+	       && ((addr + length) & 7) != 0)
+	{
+	  /* prev_insn will always be set because the first insn is
+	     always 8-byte aligned. */
+	  emit_nop_for_insn (prev_insn);
+	  length += 4;
+	}
+      prev_insn = insn;
+    }
+}
+
+
+/* Routines for branch hints. */
+
+static void
+spu_emit_branch_hint (rtx before, rtx branch, rtx target,
+		      int distance, sbitmap blocks)
+{
+  rtx branch_label = 0;
+  rtx hint;
+  rtx insn;
+  rtx table;
+
+  if (before == 0 || branch == 0 || target == 0)
+    return;
+
+  /* While scheduling we require hints to be no further than 600, so
+     we need to enforce that here too */
+  if (distance > 600)
+    return;
+
+  /* If we have a Basic block note, emit it after the basic block note.  */
+  if (NOTE_INSN_BASIC_BLOCK_P (before))
+    before = NEXT_INSN (before);
+
+  branch_label = gen_label_rtx ();
+  LABEL_NUSES (branch_label)++;
+  LABEL_PRESERVE_P (branch_label) = 1;
+  insn = emit_label_before (branch_label, branch);
+  branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
+  bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
+
+  hint = emit_insn_before (gen_hbr (branch_label, target), before);
+  recog_memoized (hint);
+  INSN_LOCATION (hint) = INSN_LOCATION (branch);
+  HINTED_P (branch) = 1;
+
+  if (GET_CODE (target) == LABEL_REF)
+    HINTED_P (XEXP (target, 0)) = 1;
+  else if (tablejump_p (branch, 0, &table))
+    {
+      rtvec vec;
+      int j;
+      if (GET_CODE (PATTERN (table)) == ADDR_VEC)
+	vec = XVEC (PATTERN (table), 0);
+      else
+	vec = XVEC (PATTERN (table), 1);
+      for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
+	HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
+    }
+
+  if (distance >= 588)
+    {
+      /* Make sure the hint isn't scheduled any earlier than this point,
+         which could make it too far for the branch offest to fit */
+      insn = emit_insn_before (gen_blockage (), hint);
+      recog_memoized (insn);
+      INSN_LOCATION (insn) = INSN_LOCATION (hint);
+    }
+  else if (distance <= 8 * 4)
+    {
+      /* To guarantee at least 8 insns between the hint and branch we
+         insert nops. */
+      int d;
+      for (d = distance; d < 8 * 4; d += 4)
+	{
+	  insn =
+	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
+	  recog_memoized (insn);
+	  INSN_LOCATION (insn) = INSN_LOCATION (hint);
+	}
+
+      /* Make sure any nops inserted aren't scheduled before the hint. */
+      insn = emit_insn_after (gen_blockage (), hint);
+      recog_memoized (insn);
+      INSN_LOCATION (insn) = INSN_LOCATION (hint);
+
+      /* Make sure any nops inserted aren't scheduled after the call. */
+      if (CALL_P (branch) && distance < 8 * 4)
+	{
+	  insn = emit_insn_before (gen_blockage (), branch);
+	  recog_memoized (insn);
+	  INSN_LOCATION (insn) = INSN_LOCATION (branch);
+	}
+    }
+}
+
+/* Returns 0 if we don't want a hint for this branch.  Otherwise return
+   the rtx for the branch target. */
+static rtx
+get_branch_target (rtx branch)
+{
+  if (JUMP_P (branch))
+    {
+      rtx set, src;
+
+      /* Return statements */
+      if (GET_CODE (PATTERN (branch)) == RETURN)
+	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
+
+     /* ASM GOTOs. */
+     if (extract_asm_operands (PATTERN (branch)) != NULL)
+	return NULL;
+
+      set = single_set (branch);
+      src = SET_SRC (set);
+      if (GET_CODE (SET_DEST (set)) != PC)
+	abort ();
+
+      if (GET_CODE (src) == IF_THEN_ELSE)
+	{
+	  rtx lab = 0;
+	  rtx note = find_reg_note (branch, REG_BR_PROB, 0);
+	  if (note)
+	    {
+	      /* If the more probable case is not a fall through, then
+	         try a branch hint.  */
+	      int prob = XINT (note, 0);
+	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
+		  && GET_CODE (XEXP (src, 1)) != PC)
+		lab = XEXP (src, 1);
+	      else if (prob < (REG_BR_PROB_BASE * 4 / 10)
+		       && GET_CODE (XEXP (src, 2)) != PC)
+		lab = XEXP (src, 2);
+	    }
+	  if (lab)
+	    {
+	      if (GET_CODE (lab) == RETURN)
+		return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
+	      return lab;
+	    }
+	  return 0;
+	}
+
+      return src;
+    }
+  else if (CALL_P (branch))
+    {
+      rtx call;
+      /* All of our call patterns are in a PARALLEL and the CALL is
+         the first pattern in the PARALLEL. */
+      if (GET_CODE (PATTERN (branch)) != PARALLEL)
+	abort ();
+      call = XVECEXP (PATTERN (branch), 0, 0);
+      if (GET_CODE (call) == SET)
+	call = SET_SRC (call);
+      if (GET_CODE (call) != CALL)
+	abort ();
+      return XEXP (XEXP (call, 0), 0);
+    }
+  return 0;
+}
+
+/* The special $hbr register is used to prevent the insn scheduler from
+   moving hbr insns across instructions which invalidate them.  It
+   should only be used in a clobber, and this function searches for
+   insns which clobber it.  */
+static bool
+insn_clobbers_hbr (rtx insn)
+{
+  if (INSN_P (insn)
+      && GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      rtx parallel = PATTERN (insn);
+      rtx clobber;
+      int j;
+      for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
+	{
+	  clobber = XVECEXP (parallel, 0, j);
+	  if (GET_CODE (clobber) == CLOBBER
+	      && GET_CODE (XEXP (clobber, 0)) == REG
+	      && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
+	    return 1;
+	}
+    }
+  return 0;
+}
+
+/* Search up to 32 insns starting at FIRST:
+   - at any kind of hinted branch, just return
+   - at any unconditional branch in the first 15 insns, just return
+   - at a call or indirect branch, after the first 15 insns, force it to
+     an even address and return
+   - at any unconditional branch, after the first 15 insns, force it to
+     an even address. 
+   At then end of the search, insert an hbrp within 4 insns of FIRST,
+   and an hbrp within 16 instructions of FIRST.
+ */
+static void
+insert_hbrp_for_ilb_runout (rtx first)
+{
+  rtx insn, before_4 = 0, before_16 = 0;
+  int addr = 0, length, first_addr = -1;
+  int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
+  int insert_lnop_after = 0;
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	if (first_addr == -1)
+	  first_addr = INSN_ADDRESSES (INSN_UID (insn));
+	addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
+	length = get_attr_length (insn);
+
+	if (before_4 == 0 && addr + length >= 4 * 4)
+	  before_4 = insn;
+	/* We test for 14 instructions because the first hbrp will add
+	   up to 2 instructions. */
+	if (before_16 == 0 && addr + length >= 14 * 4)
+	  before_16 = insn;
+
+	if (INSN_CODE (insn) == CODE_FOR_hbr)
+	  {
+	    /* Make sure an hbrp is at least 2 cycles away from a hint. 
+	       Insert an lnop after the hbrp when necessary. */
+	    if (before_4 == 0 && addr > 0)
+	      {
+		before_4 = insn;
+		insert_lnop_after |= 1;
+	      }
+	    else if (before_4 && addr <= 4 * 4)
+	      insert_lnop_after |= 1;
+	    if (before_16 == 0 && addr > 10 * 4)
+	      {
+		before_16 = insn;
+		insert_lnop_after |= 2;
+	      }
+	    else if (before_16 && addr <= 14 * 4)
+	      insert_lnop_after |= 2;
+	  }
+
+	if (INSN_CODE (insn) == CODE_FOR_iprefetch)
+	  {
+	    if (addr < hbrp_addr0)
+	      hbrp_addr0 = addr;
+	    else if (addr < hbrp_addr1)
+	      hbrp_addr1 = addr;
+	  }
+
+	if (CALL_P (insn) || JUMP_P (insn))
+	  {
+	    if (HINTED_P (insn))
+	      return;
+
+	    /* Any branch after the first 15 insns should be on an even
+	       address to avoid a special case branch.  There might be
+	       some nops and/or hbrps inserted, so we test after 10
+	       insns. */
+	    if (addr > 10 * 4)
+	      SCHED_ON_EVEN_P (insn) = 1;
+	  }
+
+	if (CALL_P (insn) || tablejump_p (insn, 0, 0))
+	  return;
+
+
+	if (addr + length >= 32 * 4)
+	  {
+	    gcc_assert (before_4 && before_16);
+	    if (hbrp_addr0 > 4 * 4)
+	      {
+		insn =
+		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
+		recog_memoized (insn);
+		INSN_LOCATION (insn) = INSN_LOCATION (before_4);
+		INSN_ADDRESSES_NEW (insn,
+				    INSN_ADDRESSES (INSN_UID (before_4)));
+		PUT_MODE (insn, GET_MODE (before_4));
+		PUT_MODE (before_4, TImode);
+		if (insert_lnop_after & 1)
+		  {
+		    insn = emit_insn_before (gen_lnop (), before_4);
+		    recog_memoized (insn);
+		    INSN_LOCATION (insn) = INSN_LOCATION (before_4);
+		    INSN_ADDRESSES_NEW (insn,
+					INSN_ADDRESSES (INSN_UID (before_4)));
+		    PUT_MODE (insn, TImode);
+		  }
+	      }
+	    if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
+		&& hbrp_addr1 > 16 * 4)
+	      {
+		insn =
+		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
+		recog_memoized (insn);
+		INSN_LOCATION (insn) = INSN_LOCATION (before_16);
+		INSN_ADDRESSES_NEW (insn,
+				    INSN_ADDRESSES (INSN_UID (before_16)));
+		PUT_MODE (insn, GET_MODE (before_16));
+		PUT_MODE (before_16, TImode);
+		if (insert_lnop_after & 2)
+		  {
+		    insn = emit_insn_before (gen_lnop (), before_16);
+		    recog_memoized (insn);
+		    INSN_LOCATION (insn) = INSN_LOCATION (before_16);
+		    INSN_ADDRESSES_NEW (insn,
+					INSN_ADDRESSES (INSN_UID
+							(before_16)));
+		    PUT_MODE (insn, TImode);
+		  }
+	      }
+	    return;
+	  }
+      }
+    else if (BARRIER_P (insn))
+      return;
+
+}
+
+/* The SPU might hang when it executes 48 inline instructions after a
+   hinted branch jumps to its hinted target.  The beginning of a
+   function and the return from a call might have been hinted, and
+   must be handled as well.  To prevent a hang we insert 2 hbrps.  The
+   first should be within 6 insns of the branch target.  The second
+   should be within 22 insns of the branch target.  When determining
+   if hbrps are necessary, we look for only 32 inline instructions,
+   because up to 12 nops and 4 hbrps could be inserted.  Similarily,
+   when inserting new hbrps, we insert them within 4 and 16 insns of
+   the target.  */
+static void
+insert_hbrp (void)
+{
+  rtx insn;
+  if (TARGET_SAFE_HINTS)
+    {
+      shorten_branches (get_insns ());
+      /* Insert hbrp at beginning of function */
+      insn = next_active_insn (get_insns ());
+      if (insn)
+	insert_hbrp_for_ilb_runout (insn);
+      /* Insert hbrp after hinted targets. */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
+	  insert_hbrp_for_ilb_runout (next_active_insn (insn));
+    }
+}
+
+static int in_spu_reorg;
+
+static void
+spu_var_tracking (void)
+{
+  if (flag_var_tracking)
+    {
+      df_analyze ();
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+      df_finish_pass (false);
+    }
+}
+
+/* Insert branch hints.  There are no branch optimizations after this
+   pass, so it's safe to set our branch hints now. */
+static void
+spu_machine_dependent_reorg (void)
+{
+  sbitmap blocks;
+  basic_block bb;
+  rtx branch, insn;
+  rtx branch_target = 0;
+  int branch_addr = 0, insn_addr, required_dist = 0;
+  int i;
+  unsigned int j;
+
+  if (!TARGET_BRANCH_HINTS || optimize == 0)
+    {
+      /* We still do it for unoptimized code because an external
+         function might have hinted a call or return. */
+      compute_bb_for_insn ();
+      insert_hbrp ();
+      pad_bb ();
+      spu_var_tracking ();
+      free_bb_for_insn ();
+      return;
+    }
+
+  blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
+  bitmap_clear (blocks);
+
+  in_spu_reorg = 1;
+  compute_bb_for_insn ();
+
+  /* (Re-)discover loops so that bb->loop_father can be used
+     in the analysis below.  */
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+  compact_blocks ();
+
+  spu_bb_info =
+    (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
+				    sizeof (struct spu_bb_info));
+
+  /* We need exact insn addresses and lengths.  */
+  shorten_branches (get_insns ());
+
+  for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
+    {
+      bb = BASIC_BLOCK_FOR_FN (cfun, i);
+      branch = 0;
+      if (spu_bb_info[i].prop_jump)
+	{
+	  branch = spu_bb_info[i].prop_jump;
+	  branch_target = get_branch_target (branch);
+	  branch_addr = INSN_ADDRESSES (INSN_UID (branch));
+	  required_dist = spu_hint_dist;
+	}
+      /* Search from end of a block to beginning.   In this loop, find
+         jumps which need a branch and emit them only when:
+         - it's an indirect branch and we're at the insn which sets
+         the register  
+         - we're at an insn that will invalidate the hint. e.g., a
+         call, another hint insn, inline asm that clobbers $hbr, and
+         some inlined operations (divmodsi4).  Don't consider jumps
+         because they are only at the end of a block and are
+         considered when we are deciding whether to propagate
+         - we're getting too far away from the branch.  The hbr insns
+         only have a signed 10 bit offset
+         We go back as far as possible so the branch will be considered
+         for propagation when we get to the beginning of the block.  */
+      for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
+	{
+	  if (INSN_P (insn))
+	    {
+	      insn_addr = INSN_ADDRESSES (INSN_UID (insn));
+	      if (branch
+		  && ((GET_CODE (branch_target) == REG
+		       && set_of (branch_target, insn) != NULL_RTX)
+		      || insn_clobbers_hbr (insn)
+		      || branch_addr - insn_addr > 600))
+		{
+		  rtx next = NEXT_INSN (insn);
+		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
+		  if (insn != BB_END (bb)
+		      && branch_addr - next_addr >= required_dist)
+		    {
+		      if (dump_file)
+			fprintf (dump_file,
+				 "hint for %i in block %i before %i\n",
+				 INSN_UID (branch), bb->index,
+				 INSN_UID (next));
+		      spu_emit_branch_hint (next, branch, branch_target,
+					    branch_addr - next_addr, blocks);
+		    }
+		  branch = 0;
+		}
+
+	      /* JUMP_P will only be true at the end of a block.  When
+	         branch is already set it means we've previously decided
+	         to propagate a hint for that branch into this block. */
+	      if (CALL_P (insn) || (JUMP_P (insn) && !branch))
+		{
+		  branch = 0;
+		  if ((branch_target = get_branch_target (insn)))
+		    {
+		      branch = insn;
+		      branch_addr = insn_addr;
+		      required_dist = spu_hint_dist;
+		    }
+		}
+	    }
+	  if (insn == BB_HEAD (bb))
+	    break;
+	}
+
+      if (branch)
+	{
+	  /* If we haven't emitted a hint for this branch yet, it might
+	     be profitable to emit it in one of the predecessor blocks,
+	     especially for loops.  */
+	  rtx bbend;
+	  basic_block prev = 0, prop = 0, prev2 = 0;
+	  int loop_exit = 0, simple_loop = 0;
+	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
+
+	  for (j = 0; j < EDGE_COUNT (bb->preds); j++)
+	    if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
+	      prev = EDGE_PRED (bb, j)->src;
+	    else
+	      prev2 = EDGE_PRED (bb, j)->src;
+
+	  for (j = 0; j < EDGE_COUNT (bb->succs); j++)
+	    if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
+	      loop_exit = 1;
+	    else if (EDGE_SUCC (bb, j)->dest == bb)
+	      simple_loop = 1;
+
+	  /* If this branch is a loop exit then propagate to previous
+	     fallthru block. This catches the cases when it is a simple
+	     loop or when there is an initial branch into the loop. */
+	  if (prev && (loop_exit || simple_loop)
+	      && bb_loop_depth (prev) <= bb_loop_depth (bb))
+	    prop = prev;
+
+	  /* If there is only one adjacent predecessor.  Don't propagate
+	     outside this loop.  */
+	  else if (prev && single_pred_p (bb)
+		   && prev->loop_father == bb->loop_father)
+	    prop = prev;
+
+	  /* If this is the JOIN block of a simple IF-THEN then
+	     propagate the hint to the HEADER block. */
+	  else if (prev && prev2
+		   && EDGE_COUNT (bb->preds) == 2
+		   && EDGE_COUNT (prev->preds) == 1
+		   && EDGE_PRED (prev, 0)->src == prev2
+		   && prev2->loop_father == bb->loop_father
+		   && GET_CODE (branch_target) != REG)
+	    prop = prev;
+
+	  /* Don't propagate when:
+	     - this is a simple loop and the hint would be too far
+	     - this is not a simple loop and there are 16 insns in
+	     this block already
+	     - the predecessor block ends in a branch that will be
+	     hinted
+	     - the predecessor block ends in an insn that invalidates
+	     the hint */
+	  if (prop
+	      && prop->index >= 0
+	      && (bbend = BB_END (prop))
+	      && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
+	      (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
+	      && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
+			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
+			 bb->index, prop->index, bb_loop_depth (bb),
+			 INSN_UID (branch), loop_exit, simple_loop,
+			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
+
+	      spu_bb_info[prop->index].prop_jump = branch;
+	      spu_bb_info[prop->index].bb_index = i;
+	    }
+	  else if (branch_addr - next_addr >= required_dist)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "hint for %i in block %i before %i\n",
+			 INSN_UID (branch), bb->index,
+			 INSN_UID (NEXT_INSN (insn)));
+	      spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
+				    branch_addr - next_addr, blocks);
+	    }
+	  branch = 0;
+	}
+    }
+  free (spu_bb_info);
+
+  if (!bitmap_empty_p (blocks))
+    find_many_sub_basic_blocks (blocks);
+
+  /* We have to schedule to make sure alignment is ok. */
+  FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
+
+  /* The hints need to be scheduled, so call it again. */
+  schedule_insns ();
+  df_finish_pass (true);
+
+  insert_hbrp ();
+
+  pad_bb ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
+      {
+	/* Adjust the LABEL_REF in a hint when we have inserted a nop
+	   between its branch label and the branch .  We don't move the
+	   label because GCC expects it at the beginning of the block. */
+	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+	rtx label_ref = XVECEXP (unspec, 0, 0);
+	rtx label = XEXP (label_ref, 0);
+	rtx branch;
+	int offset = 0;
+	for (branch = NEXT_INSN (label);
+	     !JUMP_P (branch) && !CALL_P (branch);
+	     branch = NEXT_INSN (branch))
+	  if (NONJUMP_INSN_P (branch))
+	    offset += get_attr_length (branch);
+	if (offset > 0)
+	  XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
+      }
+
+  spu_var_tracking ();
+
+  loop_optimizer_finalize ();
+
+  free_bb_for_insn ();
+
+  in_spu_reorg = 0;
+}
+
+
+/* Insn scheduling routines, primarily for dual issue. */
+static int
+spu_sched_issue_rate (void)
+{
+  return 2;
+}
+
+static int
+uses_ls_unit(rtx insn)
+{
+  rtx set = single_set (insn);
+  if (set != 0
+      && (GET_CODE (SET_DEST (set)) == MEM
+	  || GET_CODE (SET_SRC (set)) == MEM))
+    return 1;
+  return 0;
+}
+
+static int
+get_pipe (rtx insn)
+{
+  enum attr_type t;
+  /* Handle inline asm */
+  if (INSN_CODE (insn) == -1)
+    return -1;
+  t = get_attr_type (insn);
+  switch (t)
+    {
+    case TYPE_CONVERT:
+      return -2;
+    case TYPE_MULTI0:
+      return -1;
+
+    case TYPE_FX2:
+    case TYPE_FX3:
+    case TYPE_SPR:
+    case TYPE_NOP:
+    case TYPE_FXB:
+    case TYPE_FPD:
+    case TYPE_FP6:
+    case TYPE_FP7:
+      return 0;
+
+    case TYPE_LNOP:
+    case TYPE_SHUF:
+    case TYPE_LOAD:
+    case TYPE_STORE:
+    case TYPE_BR:
+    case TYPE_MULTI1:
+    case TYPE_HBR:
+    case TYPE_IPREFETCH:
+      return 1;
+    default:
+      abort ();
+    }
+}
+
+
+/* haifa-sched.c has a static variable that keeps track of the current
+   cycle.  It is passed to spu_sched_reorder, and we record it here for
+   use by spu_sched_variable_issue.  It won't be accurate if the
+   scheduler updates it's clock_var between the two calls. */
+static int clock_var;
+
+/* This is used to keep track of insn alignment.  Set to 0 at the
+   beginning of each block and increased by the "length" attr of each
+   insn scheduled. */
+static int spu_sched_length;
+
+/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
+   ready list appropriately in spu_sched_reorder(). */
+static int pipe0_clock;
+static int pipe1_clock;
+
+static int prev_clock_var;
+
+static int prev_priority;
+
+/* The SPU needs to load the next ilb sometime during the execution of
+   the previous ilb.  There is a potential conflict if every cycle has a
+   load or store.  To avoid the conflict we make sure the load/store
+   unit is free for at least one cycle during the execution of insns in
+   the previous ilb. */
+static int spu_ls_first;
+static int prev_ls_clock;
+
+static void
+spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		       int max_ready ATTRIBUTE_UNUSED)
+{
+  spu_sched_length = 0;
+}
+
+static void
+spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		int max_ready ATTRIBUTE_UNUSED)
+{
+  if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
+    {
+      /* When any block might be at least 8-byte aligned, assume they
+         will all be at least 8-byte aligned to make sure dual issue
+         works out correctly. */
+      spu_sched_length = 0;
+    }
+  spu_ls_first = INT_MAX;
+  clock_var = -1;
+  prev_ls_clock = -1;
+  pipe0_clock = -1;
+  pipe1_clock = -1;
+  prev_clock_var = -1;
+  prev_priority = -1;
+}
+
+static int
+spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
+			  int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
+{
+  int len;
+  int p;
+  if (GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER
+      || (len = get_attr_length (insn)) == 0)
+    return more;
+
+  spu_sched_length += len;
+
+  /* Reset on inline asm */
+  if (INSN_CODE (insn) == -1)
+    {
+      spu_ls_first = INT_MAX;
+      pipe0_clock = -1;
+      pipe1_clock = -1;
+      return 0;
+    }
+  p = get_pipe (insn);
+  if (p == 0)
+    pipe0_clock = clock_var;
+  else
+    pipe1_clock = clock_var;
+
+  if (in_spu_reorg)
+    {
+      if (clock_var - prev_ls_clock > 1
+	  || INSN_CODE (insn) == CODE_FOR_iprefetch)
+	spu_ls_first = INT_MAX;
+      if (uses_ls_unit (insn))
+	{
+	  if (spu_ls_first == INT_MAX)
+	    spu_ls_first = spu_sched_length;
+	  prev_ls_clock = clock_var;
+	}
+
+      /* The scheduler hasn't inserted the nop, but we will later on.
+         Include those nops in spu_sched_length. */
+      if (prev_clock_var == clock_var && (spu_sched_length & 7))
+	spu_sched_length += 4;
+      prev_clock_var = clock_var;
+
+      /* more is -1 when called from spu_sched_reorder for new insns
+         that don't have INSN_PRIORITY */
+      if (more >= 0)
+	prev_priority = INSN_PRIORITY (insn);
+    }
+
+  /* Always try issuing more insns.  spu_sched_reorder will decide 
+     when the cycle should be advanced. */
+  return 1;
+}
+
+/* This function is called for both TARGET_SCHED_REORDER and
+   TARGET_SCHED_REORDER2.  */
+static int
+spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		   rtx *ready, int *nreadyp, int clock)
+{
+  int i, nready = *nreadyp;
+  int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
+  rtx insn;
+
+  clock_var = clock;
+
+  if (nready <= 0 || pipe1_clock >= clock)
+    return 0;
+
+  /* Find any rtl insns that don't generate assembly insns and schedule
+     them first. */
+  for (i = nready - 1; i >= 0; i--)
+    {
+      insn = ready[i];
+      if (INSN_CODE (insn) == -1
+	  || INSN_CODE (insn) == CODE_FOR_blockage
+	  || (INSN_P (insn) && get_attr_length (insn) == 0))
+	{
+	  ready[i] = ready[nready - 1];
+	  ready[nready - 1] = insn;
+	  return 1;
+	}
+    }
+
+  pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
+  for (i = 0; i < nready; i++)
+    if (INSN_CODE (ready[i]) != -1)
+      {
+	insn = ready[i];
+	switch (get_attr_type (insn))
+	  {
+	  default:
+	  case TYPE_MULTI0:
+	  case TYPE_CONVERT:
+	  case TYPE_FX2:
+	  case TYPE_FX3:
+	  case TYPE_SPR:
+	  case TYPE_NOP:
+	  case TYPE_FXB:
+	  case TYPE_FPD:
+	  case TYPE_FP6:
+	  case TYPE_FP7:
+	    pipe_0 = i;
+	    break;
+	  case TYPE_LOAD:
+	  case TYPE_STORE:
+	    pipe_ls = i;
+	  case TYPE_LNOP:
+	  case TYPE_SHUF:
+	  case TYPE_BR:
+	  case TYPE_MULTI1:
+	  case TYPE_HBR:
+	    pipe_1 = i;
+	    break;
+	  case TYPE_IPREFETCH:
+	    pipe_hbrp = i;
+	    break;
+	  }
+      }
+
+  /* In the first scheduling phase, schedule loads and stores together
+     to increase the chance they will get merged during postreload CSE. */
+  if (!reload_completed && pipe_ls >= 0)
+    {
+      insn = ready[pipe_ls];
+      ready[pipe_ls] = ready[nready - 1];
+      ready[nready - 1] = insn;
+      return 1;
+    }
+
+  /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
+  if (pipe_hbrp >= 0)
+    pipe_1 = pipe_hbrp;
+
+  /* When we have loads/stores in every cycle of the last 15 insns and
+     we are about to schedule another load/store, emit an hbrp insn
+     instead. */
+  if (in_spu_reorg
+      && spu_sched_length - spu_ls_first >= 4 * 15
+      && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
+    {
+      insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
+      recog_memoized (insn);
+      if (pipe0_clock < clock)
+	PUT_MODE (insn, TImode);
+      spu_sched_variable_issue (file, verbose, insn, -1);
+      return 0;
+    }
+
+  /* In general, we want to emit nops to increase dual issue, but dual
+     issue isn't faster when one of the insns could be scheduled later
+     without effecting the critical path.  We look at INSN_PRIORITY to
+     make a good guess, but it isn't perfect so -mdual-nops=n can be
+     used to effect it. */
+  if (in_spu_reorg && spu_dual_nops < 10)
+    {
+      /* When we are at an even address and we are not issuing nops to
+         improve scheduling then we need to advance the cycle.  */
+      if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
+	  && (spu_dual_nops == 0
+	      || (pipe_1 != -1
+		  && prev_priority >
+		  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
+	return 0;
+
+      /* When at an odd address, schedule the highest priority insn
+         without considering pipeline. */
+      if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
+	  && (spu_dual_nops == 0
+	      || (prev_priority >
+		  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
+	return 1;
+    }
+
+
+  /* We haven't issued a pipe0 insn yet this cycle, if there is a
+     pipe0 insn in the ready list, schedule it. */
+  if (pipe0_clock < clock && pipe_0 >= 0)
+    schedule_i = pipe_0;
+
+  /* Either we've scheduled a pipe0 insn already or there is no pipe0
+     insn to schedule.  Put a pipe1 insn at the front of the ready list. */
+  else
+    schedule_i = pipe_1;
+
+  if (schedule_i > -1)
+    {
+      insn = ready[schedule_i];
+      ready[schedule_i] = ready[nready - 1];
+      ready[nready - 1] = insn;
+      return 1;
+    }
+  return 0;
+}
+
+/* INSN is dependent on DEP_INSN. */
+static int
+spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  rtx set;
+
+  /* The blockage pattern is used to prevent instructions from being
+     moved across it and has no cost. */
+  if (INSN_CODE (insn) == CODE_FOR_blockage
+      || INSN_CODE (dep_insn) == CODE_FOR_blockage)
+    return 0;
+
+  if ((INSN_P (insn) && get_attr_length (insn) == 0)
+      || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
+    return 0;
+
+  /* Make sure hbrps are spread out. */
+  if (INSN_CODE (insn) == CODE_FOR_iprefetch
+      && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
+    return 8;
+
+  /* Make sure hints and hbrps are 2 cycles apart. */
+  if ((INSN_CODE (insn) == CODE_FOR_iprefetch
+       || INSN_CODE (insn) == CODE_FOR_hbr)
+       && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
+	   || INSN_CODE (dep_insn) == CODE_FOR_hbr))
+    return 2;
+
+  /* An hbrp has no real dependency on other insns. */
+  if (INSN_CODE (insn) == CODE_FOR_iprefetch
+      || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
+    return 0;
+
+  /* Assuming that it is unlikely an argument register will be used in
+     the first cycle of the called function, we reduce the cost for
+     slightly better scheduling of dep_insn.  When not hinted, the
+     mispredicted branch would hide the cost as well.  */
+  if (CALL_P (insn))
+  {
+    rtx target = get_branch_target (insn);
+    if (GET_CODE (target) != REG || !set_of (target, insn))
+      return cost - 2;
+    return cost;
+  }
+
+  /* And when returning from a function, let's assume the return values
+     are completed sooner too. */
+  if (CALL_P (dep_insn))
+    return cost - 2;
+
+  /* Make sure an instruction that loads from the back chain is schedule
+     away from the return instruction so a hint is more likely to get
+     issued. */
+  if (INSN_CODE (insn) == CODE_FOR__return
+      && (set = single_set (dep_insn))
+      && GET_CODE (SET_DEST (set)) == REG
+      && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
+    return 20;
+
+  /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
+     scheduler makes every insn in a block anti-dependent on the final
+     jump_insn.  We adjust here so higher cost insns will get scheduled
+     earlier. */
+  if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
+    return insn_cost (dep_insn) - 3;
+
+  return cost;
+}
+
+/* Create a CONST_DOUBLE from a string.  */
+rtx
+spu_float_const (const char *string, enum machine_mode mode)
+{
+  REAL_VALUE_TYPE value;
+  value = REAL_VALUE_ATOF (string, mode);
+  return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
+}
+
+int
+spu_constant_address_p (rtx x)
+{
+  return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
+	  || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
+	  || GET_CODE (x) == HIGH);
+}
+
+static enum spu_immediate
+which_immediate_load (HOST_WIDE_INT val)
+{
+  gcc_assert (val == trunc_int_for_mode (val, SImode));
+
+  if (val >= -0x8000 && val <= 0x7fff)
+    return SPU_IL;
+  if (val >= 0 && val <= 0x3ffff)
+    return SPU_ILA;
+  if ((val & 0xffff) == ((val >> 16) & 0xffff))
+    return SPU_ILH;
+  if ((val & 0xffff) == 0)
+    return SPU_ILHU;
+
+  return SPU_NONE;
+}
+
+/* Return true when OP can be loaded by one of the il instructions, or
+   when flow2 is not completed and OP can be loaded using ilhu and iohl. */
+int
+immediate_load_p (rtx op, enum machine_mode mode)
+{
+  if (CONSTANT_P (op))
+    {
+      enum immediate_class c = classify_immediate (op, mode);
+      return c == IC_IL1 || c == IC_IL1s
+	     || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
+    }
+  return 0;
+}
+
+/* Return true if the first SIZE bytes of arr is a constant that can be
+   generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
+   represent the size and offset of the instruction to use. */
+static int
+cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
+{
+  int cpat, run, i, start;
+  cpat = 1;
+  run = 0;
+  start = -1;
+  for (i = 0; i < size && cpat; i++)
+    if (arr[i] != i+16)
+      { 
+	if (!run)
+	  {
+	    start = i;
+	    if (arr[i] == 3)
+	      run = 1;
+	    else if (arr[i] == 2 && arr[i+1] == 3)
+	      run = 2;
+	    else if (arr[i] == 0)
+	      {
+		while (arr[i+run] == run && i+run < 16)
+		  run++;
+		if (run != 4 && run != 8)
+		  cpat = 0;
+	      }
+	    else
+	      cpat = 0;
+	    if ((i & (run-1)) != 0)
+	      cpat = 0;
+	    i += run;
+	  }
+	else
+	  cpat = 0;
+      }
+  if (cpat && (run || size < 16))
+    {
+      if (run == 0)
+	run = 1;
+      if (prun)
+	*prun = run;
+      if (pstart)
+	*pstart = start == -1 ? 16-run : start;
+      return 1;
+    }
+  return 0;
+}
+
+/* OP is a CONSTANT_P.  Determine what instructions can be used to load
+   it into a register.  MODE is only valid when OP is a CONST_INT. */
+static enum immediate_class
+classify_immediate (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int i, j, repeated, fsmbi, repeat;
+
+  gcc_assert (CONSTANT_P (op));
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  /* A V4SI const_vector with all identical symbols is ok. */
+  if (!flag_pic
+      && mode == V4SImode
+      && GET_CODE (op) == CONST_VECTOR
+      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
+      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
+      && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
+      && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
+      && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
+    op = CONST_VECTOR_ELT (op, 0);
+
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
+
+    case CONST:
+      /* We can never know if the resulting address fits in 18 bits and can be
+	 loaded with ila.  For now, assume the address will not overflow if
+	 the displacement is "small" (fits 'K' constraint).  */
+      if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
+	{
+	  rtx sym = XEXP (XEXP (op, 0), 0);
+	  rtx cst = XEXP (XEXP (op, 0), 1);
+
+	  if (GET_CODE (sym) == SYMBOL_REF
+	      && GET_CODE (cst) == CONST_INT
+	      && satisfies_constraint_K (cst))
+	    return IC_IL1s;
+	}
+      return IC_IL2s;
+
+    case HIGH:
+      return IC_IL1s;
+
+    case CONST_VECTOR:
+      for (i = 0; i < GET_MODE_NUNITS (mode); i++)
+	if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
+	    && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
+	  return IC_POOL;
+      /* Fall through. */
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      constant_to_array (mode, op, arr);
+
+      /* Check that each 4-byte slot is identical. */
+      repeated = 1;
+      for (i = 4; i < 16; i += 4)
+	for (j = 0; j < 4; j++)
+	  if (arr[j] != arr[i + j])
+	    repeated = 0;
+
+      if (repeated)
+	{
+	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	  val = trunc_int_for_mode (val, SImode);
+
+	  if (which_immediate_load (val) != SPU_NONE)
+	    return IC_IL1;
+	}
+
+      /* Any mode of 2 bytes or smaller can be loaded with an il
+         instruction. */
+      gcc_assert (GET_MODE_SIZE (mode) > 2);
+
+      fsmbi = 1;
+      repeat = 0;
+      for (i = 0; i < 16 && fsmbi; i++)
+	if (arr[i] != 0 && repeat == 0)
+	  repeat = arr[i];
+	else if (arr[i] != 0 && arr[i] != repeat)
+	  fsmbi = 0;
+      if (fsmbi)
+	return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
+
+      if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
+	return IC_CPAT;
+
+      if (repeated)
+	return IC_IL2;
+
+      return IC_POOL;
+    default:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+static enum spu_immediate
+which_logical_immediate (HOST_WIDE_INT val)
+{
+  gcc_assert (val == trunc_int_for_mode (val, SImode));
+
+  if (val >= -0x200 && val <= 0x1ff)
+    return SPU_ORI;
+  if (val >= 0 && val <= 0xffff)
+    return SPU_IOHL;
+  if ((val & 0xffff) == ((val >> 16) & 0xffff))
+    {
+      val = trunc_int_for_mode (val, HImode);
+      if (val >= -0x200 && val <= 0x1ff)
+	return SPU_ORHI;
+      if ((val & 0xff) == ((val >> 8) & 0xff))
+	{
+	  val = trunc_int_for_mode (val, QImode);
+	  if (val >= -0x200 && val <= 0x1ff)
+	    return SPU_ORBI;
+	}
+    }
+  return SPU_NONE;
+}
+
+/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
+   CONST_DOUBLEs. */
+static int
+const_vector_immediate_p (rtx x)
+{
+  int i;
+  gcc_assert (GET_CODE (x) == CONST_VECTOR);
+  for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
+    if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
+	&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
+      return 0;
+  return 1;
+}
+
+int
+logical_immediate_p (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  /* Check that bytes are repeated. */
+  for (i = 4; i < 16; i += 4)
+    for (j = 0; j < 4; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+  val = trunc_int_for_mode (val, SImode);
+
+  i = which_logical_immediate (val);
+  return i != SPU_NONE && i != SPU_IOHL;
+}
+
+int
+iohl_immediate_p (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  /* Check that bytes are repeated. */
+  for (i = 4; i < 16; i += 4)
+    for (j = 0; j < 4; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+  val = trunc_int_for_mode (val, SImode);
+
+  return val >= 0 && val <= 0xffff;
+}
+
+int
+arith_immediate_p (rtx op, enum machine_mode mode,
+		   HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int bytes, i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  bytes = GET_MODE_SIZE (mode);
+  mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+
+  /* Check that bytes are repeated. */
+  for (i = bytes; i < 16; i += bytes)
+    for (j = 0; j < bytes; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = arr[0];
+  for (j = 1; j < bytes; j++)
+    val = (val << 8) | arr[j];
+
+  val = trunc_int_for_mode (val, mode);
+
+  return val >= low && val <= high;
+}
+
+/* TRUE when op is an immediate and an exact power of 2, and given that
+   OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
+   all entries must be the same. */
+bool
+exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
+{
+  enum machine_mode int_mode;
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int bytes, i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  bytes = GET_MODE_SIZE (mode);
+  int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+
+  /* Check that bytes are repeated. */
+  for (i = bytes; i < 16; i += bytes)
+    for (j = 0; j < bytes; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = arr[0];
+  for (j = 1; j < bytes; j++)
+    val = (val << 8) | arr[j];
+
+  val = trunc_int_for_mode (val, int_mode);
+
+  /* Currently, we only handle SFmode */
+  gcc_assert (mode == SFmode);
+  if (mode == SFmode)
+    {
+      int exp = (val >> 23) - 127;
+      return val > 0 && (val & 0x007fffff) == 0
+	     &&  exp >= low && exp <= high;
+    }
+  return FALSE;
+}
+
+/* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
+
+static int
+ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+  tree decl;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      rtx plus = XEXP (x, 0);
+      rtx op0 = XEXP (plus, 0);
+      rtx op1 = XEXP (plus, 1);
+      if (GET_CODE (op1) == CONST_INT)
+	x = op0;
+    }
+
+  return (GET_CODE (x) == SYMBOL_REF
+ 	  && (decl = SYMBOL_REF_DECL (x)) != 0
+ 	  && TREE_CODE (decl) == VAR_DECL
+ 	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
+}
+
+/* We accept:
+   - any 32-bit constant (SImode, SFmode)
+   - any constant that can be generated with fsmbi (any mode)
+   - a 64-bit constant where the high and low bits are identical
+     (DImode, DFmode)
+   - a 128-bit constant where the four 32-bit words match.  */
+bool
+spu_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
+
+  /* Reject any __ea qualified reference.  These can't appear in
+     instructions but must be forced to the constant pool.  */
+  if (for_each_rtx (&x, ea_symbol_ref, 0))
+    return 0;
+
+  /* V4SI with all identical symbols is valid. */
+  if (!flag_pic
+      && mode == V4SImode
+      && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
+	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
+	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
+    return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
+	   && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
+	   && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
+
+  if (GET_CODE (x) == CONST_VECTOR
+      && !const_vector_immediate_p (x))
+    return 0;
+  return 1;
+}
+
+/* Valid address are:
+   - symbol_ref, label_ref, const
+   - reg
+   - reg + const_int, where const_int is 16 byte aligned
+   - reg + reg, alignment doesn't matter
+  The alignment matters in the reg+const case because lqd and stqd
+  ignore the 4 least significant bits of the const.  We only care about
+  16 byte modes because the expand phase will change all smaller MEM
+  references to TImode.  */
+static bool
+spu_legitimate_address_p (enum machine_mode mode,
+			  rtx x, bool reg_ok_strict)
+{
+  int aligned = GET_MODE_SIZE (mode) >= 16;
+  if (aligned
+      && GET_CODE (x) == AND
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
+    x = XEXP (x, 0);
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+      return !TARGET_LARGE_MEM;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* Keep __ea references until reload so that spu_expand_mov can see them
+	 in MEMs.  */
+      if (ea_symbol_ref (&x, 0))
+	return !reload_in_progress && !reload_completed;
+      return !TARGET_LARGE_MEM;
+
+    case CONST_INT:
+      return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
+
+    case SUBREG:
+      x = XEXP (x, 0);
+      if (REG_P (x))
+	return 0;
+
+    case REG:
+      return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
+
+    case PLUS:
+    case LO_SUM:
+      {
+	rtx op0 = XEXP (x, 0);
+	rtx op1 = XEXP (x, 1);
+	if (GET_CODE (op0) == SUBREG)
+	  op0 = XEXP (op0, 0);
+	if (GET_CODE (op1) == SUBREG)
+	  op1 = XEXP (op1, 0);
+	if (GET_CODE (op0) == REG
+	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
+	    && GET_CODE (op1) == CONST_INT
+	    && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
+		/* If virtual registers are involved, the displacement will
+		   change later on anyway, so checking would be premature.
+		   Reload will make sure the final displacement after
+		   register elimination is OK.  */
+		|| op0 == arg_pointer_rtx
+		|| op0 == frame_pointer_rtx
+		|| op0 == virtual_stack_vars_rtx)
+	    && (!aligned || (INTVAL (op1) & 15) == 0))
+	  return TRUE;
+	if (GET_CODE (op0) == REG
+	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
+	    && GET_CODE (op1) == REG
+	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
+	  return TRUE;
+      }
+      break;
+
+    default:
+      break;
+    }
+  return FALSE;
+}
+
+/* Like spu_legitimate_address_p, except with named addresses.  */
+static bool
+spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+				     bool reg_ok_strict, addr_space_t as)
+{
+  if (as == ADDR_SPACE_EA)
+    return (REG_P (x) && (GET_MODE (x) == EAmode));
+
+  else if (as != ADDR_SPACE_GENERIC)
+    gcc_unreachable ();
+
+  return spu_legitimate_address_p (mode, x, reg_ok_strict);
+}
+
+/* When the address is reg + const_int, force the const_int into a
+   register.  */
+static rtx
+spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx op0, op1;
+  /* Make sure both operands are registers.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (ALIGNED_SYMBOL_REF_P (op0))
+	{
+	  op0 = force_reg (Pmode, op0);
+	  mark_reg_pointer (op0, 128);
+	}
+      else if (GET_CODE (op0) != REG)
+	op0 = force_reg (Pmode, op0);
+      if (ALIGNED_SYMBOL_REF_P (op1))
+	{
+	  op1 = force_reg (Pmode, op1);
+	  mark_reg_pointer (op1, 128);
+	}
+      else if (GET_CODE (op1) != REG)
+	op1 = force_reg (Pmode, op1);
+      x = gen_rtx_PLUS (Pmode, op0, op1);
+    }
+  return x;
+}
+
+/* Like spu_legitimate_address, except with named address support.  */
+static rtx
+spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+				   addr_space_t as)
+{
+  if (as != ADDR_SPACE_GENERIC)
+    return x;
+
+  return spu_legitimize_address (x, oldx, mode);
+}
+
+/* Reload reg + const_int for out-of-range displacements.  */
+rtx
+spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
+			       int opnum, int type)
+{
+  bool removed_and = false;
+
+  if (GET_CODE (ad) == AND
+      && CONST_INT_P (XEXP (ad, 1))
+      && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
+    {
+      ad = XEXP (ad, 0);
+      removed_and = true;
+    }
+
+  if (GET_CODE (ad) == PLUS
+      && REG_P (XEXP (ad, 0))
+      && CONST_INT_P (XEXP (ad, 1))
+      && !(INTVAL (XEXP (ad, 1)) >= -0x2000
+	   && INTVAL (XEXP (ad, 1)) <= 0x1fff))
+    {
+      /* Unshare the sum.  */
+      ad = copy_rtx (ad);
+
+      /* Reload the displacement.  */
+      push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
+		   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+
+      /* Add back AND for alignment if we stripped it.  */
+      if (removed_and)
+	ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
+
+      return ad;
+    }
+
+  return NULL_RTX;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+spu_handle_fndecl_attribute (tree * node,
+			     tree name,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (0, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle the "vector" attribute.  */
+static tree
+spu_handle_vector_attribute (tree * node, tree name,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
+{
+  tree type = *node, result = NULL_TREE;
+  enum machine_mode mode;
+  int unsigned_p;
+
+  while (POINTER_TYPE_P (type)
+	 || TREE_CODE (type) == FUNCTION_TYPE
+	 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
+    type = TREE_TYPE (type);
+
+  mode = TYPE_MODE (type);
+
+  unsigned_p = TYPE_UNSIGNED (type);
+  switch (mode)
+    {
+    case DImode:
+      result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+      break;
+    case SImode:
+      result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+      break;
+    case HImode:
+      result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+      break;
+    case QImode:
+      result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+      break;
+    case SFmode:
+      result = V4SF_type_node;
+      break;
+    case DFmode:
+      result = V2DF_type_node;
+      break;
+    default:
+      break;
+    }
+
+  /* Propagate qualifiers attached to the element type
+     onto the vector type.  */
+  if (result && result != type && TYPE_QUALS (type))
+    result = build_qualified_type (result, TYPE_QUALS (type));
+
+  *no_add_attrs = true;		/* No need to hang on to the attribute.  */
+
+  if (!result)
+    warning (0, "%qE attribute ignored", name);
+  else
+    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
+
+  return NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a naked function.  */
+static int
+spu_naked_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+int
+spu_initial_elimination_offset (int from, int to)
+{
+  int saved_regs_size = spu_saved_regs_size ();
+  int sp_offset = 0;
+  if (!crtl->is_leaf || crtl->outgoing_args_size
+      || get_frame_size () || saved_regs_size)
+    sp_offset = STACK_POINTER_OFFSET;
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return get_frame_size () + crtl->outgoing_args_size + sp_offset;
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return get_frame_size ();
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return sp_offset + crtl->outgoing_args_size
+      + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return get_frame_size () + saved_regs_size + sp_offset;
+  else
+    gcc_unreachable ();
+}
+
+rtx
+spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+  int byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+
+  /* Make sure small structs are left justified in a register. */
+  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
+      && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
+    {
+      enum machine_mode smode;
+      rtvec v;
+      int i;
+      int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+      int n = byte_size / UNITS_PER_WORD;
+      v = rtvec_alloc (nregs);
+      for (i = 0; i < n; i++)
+	{
+	  RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
+						gen_rtx_REG (TImode,
+							     FIRST_RETURN_REGNUM
+							     + i),
+						GEN_INT (UNITS_PER_WORD * i));
+	  byte_size -= UNITS_PER_WORD;
+	}
+
+      if (n < nregs)
+	{
+	  if (byte_size < 4)
+	    byte_size = 4;
+	  smode =
+	    smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
+	  RTVEC_ELT (v, n) =
+	    gen_rtx_EXPR_LIST (VOIDmode,
+			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
+			       GEN_INT (UNITS_PER_WORD * n));
+	}
+      return gen_rtx_PARALLEL (mode, v);
+    }
+  return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
+}
+
+static rtx
+spu_function_arg (cumulative_args_t cum_v,
+		  enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int byte_size;
+
+  if (*cum >= MAX_REGISTER_ARGS)
+    return 0;
+
+  byte_size = ((mode == BLKmode)
+	       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+
+  /* The ABI does not allow parameters to be passed partially in
+     reg and partially in stack. */
+  if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
+    return 0;
+
+  /* Make sure small structs are left justified in a register. */
+  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
+      && byte_size < UNITS_PER_WORD && byte_size > 0)
+    {
+      enum machine_mode smode;
+      rtx gr_reg;
+      if (byte_size < 4)
+	byte_size = 4;
+      smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
+      gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
+				  const0_rtx);
+      return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
+    }
+  else
+    return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
+}
+
+static void
+spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	   ? 1
+	   : mode == BLKmode
+	   ? ((int_size_in_bytes (type) + 15) / 16)
+	   : mode == VOIDmode
+	   ? 1
+	   : HARD_REGNO_NREGS (cum, mode));
+}
+
+/* Variable sized types are passed by reference.  */
+static bool
+spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
+}
+
+
+/* Var args. */
+
+/* Create and return the va_list datatype.
+
+   On SPU, va_list is an array type equivalent to
+
+      typedef struct __va_list_tag
+        {
+            void *__args __attribute__((__aligned(16)));
+            void *__skip __attribute__((__aligned(16)));
+            
+        } va_list[1];
+
+   where __args points to the arg that will be returned by the next
+   va_arg(), and __skip points to the previous stack frame such that
+   when __args == __skip we should advance __args by 32 bytes. */
+static tree
+spu_build_builtin_va_list (void)
+{
+  tree f_args, f_skip, record, type_decl;
+  bool owp;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+
+  type_decl =
+    build_decl (BUILTINS_LOCATION,
+		TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_args = build_decl (BUILTINS_LOCATION,
+		       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
+  f_skip = build_decl (BUILTINS_LOCATION,
+		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_args) = record;
+  DECL_ALIGN (f_args) = 128;
+  DECL_USER_ALIGN (f_args) = 1;
+
+  DECL_FIELD_CONTEXT (f_skip) = record;
+  DECL_ALIGN (f_skip) = 128;
+  DECL_USER_ALIGN (f_skip) = 1;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_args;
+  DECL_CHAIN (f_args) = f_skip;
+
+  /* We know this is being padded and we want it too.  It is an internal
+     type so hide the warnings from the user. */
+  owp = warn_padded;
+  warn_padded = false;
+
+  layout_type (record);
+
+  warn_padded = owp;
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Implement va_start by filling the va_list structure VALIST.
+   NEXTARG points to the first anonymous stack argument.
+
+   The following global variables are used to initialize
+   the va_list structure:
+
+     crtl->args.info;
+       the CUMULATIVE_ARGS for this function
+
+     crtl->args.arg_offset_rtx:
+       holds the offset of the first anonymous stack argument
+       (relative to the virtual arg pointer).  */
+
+static void
+spu_va_start (tree valist, rtx nextarg)
+{
+  tree f_args, f_skip;
+  tree args, skip, t;
+
+  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_skip = DECL_CHAIN (f_args);
+
+  valist = build_simple_mem_ref (valist);
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  /* Find the __args area.  */
+  t = make_tree (TREE_TYPE (args), nextarg);
+  if (crtl->args.pretend_args_size > 0)
+    t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Find the __skip area.  */
+  t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
+  t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
+				       - STACK_POINTER_OFFSET));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Gimplify va_arg by updating the va_list structure 
+   VALIST as required to retrieve an argument of type
+   TYPE, and returning that argument. 
+   
+   ret = va_arg(VALIST, TYPE);
+
+   generates code equivalent to:
+   
+    paddedsize = (sizeof(TYPE) + 15) & -16;
+    if (VALIST.__args + paddedsize > VALIST.__skip
+	&& VALIST.__args <= VALIST.__skip)
+      addr = VALIST.__skip + 32;
+    else
+      addr = VALIST.__args;
+    VALIST.__args = addr + paddedsize;
+    ret = *(TYPE *)addr;
+ */
+static tree
+spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
+			  gimple_seq * post_p ATTRIBUTE_UNUSED)
+{
+  tree f_args, f_skip;
+  tree args, skip;
+  HOST_WIDE_INT size, rsize;
+  tree addr, tmp;
+  bool pass_by_reference_p;
+
+  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_skip = DECL_CHAIN (f_args);
+
+  valist = build_simple_mem_ref (valist);
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  addr = create_tmp_var (ptr_type_node, "va_arg");
+
+  /* if an object is dynamically sized, a pointer to it is passed
+     instead of the object itself. */
+  pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
+					   false);
+  if (pass_by_reference_p)
+    type = build_pointer_type (type);
+  size = int_size_in_bytes (type);
+  rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
+
+  /* build conditional expression to calculate addr. The expression
+     will be gimplified later. */
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
+  tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
+		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
+		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
+		unshare_expr (skip)));
+
+  tmp = build3 (COND_EXPR, ptr_type_node, tmp,
+		fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
+		unshare_expr (args));
+
+  gimplify_assign (addr, tmp, pre_p);
+
+  /* update VALIST.__args */
+  tmp = fold_build_pointer_plus_hwi (addr, rsize);
+  gimplify_assign (unshare_expr (args), tmp, pre_p);
+
+  addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
+		       addr);
+
+  if (pass_by_reference_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Save parameter registers starting with the register that corresponds
+   to the first unnamed parameters.  If the first unnamed parameter is
+   in the stack then save no registers.  Set pretend_args_size to the
+   amount of space needed to save the registers. */
+static void
+spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+			    tree type, int *pretend_size, int no_rtl)
+{
+  if (!no_rtl)
+    {
+      rtx tmp;
+      int regno;
+      int offset;
+      int ncum = *get_cumulative_args (cum);
+
+      /* cum currently points to the last named argument, we want to
+         start at the next argument. */
+      spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
+
+      offset = -STACK_POINTER_OFFSET;
+      for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
+	{
+	  tmp = gen_frame_mem (V4SImode,
+			       plus_constant (Pmode, virtual_incoming_args_rtx,
+					      offset));
+	  emit_move_insn (tmp,
+			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
+	  offset += 16;
+	}
+      *pretend_size = offset + STACK_POINTER_OFFSET;
+    }
+}
+
+static void
+spu_conditional_register_usage (void)
+{
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+/* This is called any time we inspect the alignment of a register for
+   addresses.  */
+static int
+reg_aligned_for_addr (rtx x)
+{
+  int regno =
+    REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
+  return REGNO_POINTER_ALIGN (regno) >= 128;
+}
+
+/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
+   into its SYMBOL_REF_FLAGS.  */
+static void
+spu_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  /* If a variable has a forced alignment to < 16 bytes, mark it with
+     SYMBOL_FLAG_ALIGN1.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
+}
+
+/* Return TRUE if we are certain the mem refers to a complete object
+   which is both 16-byte aligned and padded to a 16-byte boundary.  This
+   would make it safe to store with a single instruction. 
+   We guarantee the alignment and padding for static objects by aligning
+   all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
+   FIXME: We currently cannot guarantee this for objects on the stack
+   because assign_parm_setup_stack calls assign_stack_local with the
+   alignment of the parameter mode and in that case the alignment never
+   gets adjusted by LOCAL_ALIGNMENT. */
+static int
+store_with_one_insn_p (rtx mem)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx addr = XEXP (mem, 0);
+  if (mode == BLKmode)
+    return 0;
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 1;
+  /* Only static objects. */
+  if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      /* We use the associated declaration to make sure the access is
+         referring to the whole object.
+         We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
+         if it is necessary.  Will there be cases where one exists, and
+         the other does not?  Will there be cases where both exist, but
+         have different types?  */
+      tree decl = MEM_EXPR (mem);
+      if (decl
+	  && TREE_CODE (decl) == VAR_DECL
+	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
+	return 1;
+      decl = SYMBOL_REF_DECL (addr);
+      if (decl
+	  && TREE_CODE (decl) == VAR_DECL
+	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
+	return 1;
+    }
+  return 0;
+}
+
+/* Return 1 when the address is not valid for a simple load and store as
+   required by the '_mov*' patterns.   We could make this less strict
+   for loads, but we prefer mem's to look the same so they are more
+   likely to be merged.  */
+static int
+address_needs_split (rtx mem)
+{
+  if (GET_MODE_SIZE (GET_MODE (mem)) < 16
+      && (GET_MODE_SIZE (GET_MODE (mem)) < 4
+	  || !(store_with_one_insn_p (mem)
+	       || mem_is_padded_component_ref (mem))))
+    return 1;
+
+  return 0;
+}
+
+static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
+static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
+static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
+
+/* MEM is known to be an __ea qualified memory access.  Emit a call to
+   fetch the ppu memory to local store, and return its address in local
+   store.  */
+
+static void
+ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+  if (is_store)
+    {
+      rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
+      if (!cache_fetch_dirty)
+	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
+      emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
+			       2, ea_addr, EAmode, ndirty, SImode);
+    }
+  else
+    {
+      if (!cache_fetch)
+	cache_fetch = init_one_libfunc ("__cache_fetch");
+      emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
+			       1, ea_addr, EAmode);
+    }
+}
+
+/* Like ea_load_store, but do the cache tag comparison and, for stores,
+   dirty bit marking, inline.
+
+   The cache control data structure is an array of
+
+   struct __cache_tag_array
+     {
+        unsigned int tag_lo[4];
+        unsigned int tag_hi[4];
+        void *data_pointer[4];
+        int reserved[4];
+        vector unsigned short dirty_bits[4];
+     }  */
+
+static void
+ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+  rtx ea_addr_si;
+  HOST_WIDE_INT v;
+  rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
+  rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
+  rtx index_mask = gen_reg_rtx (SImode);
+  rtx tag_arr = gen_reg_rtx (Pmode);
+  rtx splat_mask = gen_reg_rtx (TImode);
+  rtx splat = gen_reg_rtx (V4SImode);
+  rtx splat_hi = NULL_RTX;
+  rtx tag_index = gen_reg_rtx (Pmode);
+  rtx block_off = gen_reg_rtx (SImode);
+  rtx tag_addr = gen_reg_rtx (Pmode);
+  rtx tag = gen_reg_rtx (V4SImode);
+  rtx cache_tag = gen_reg_rtx (V4SImode);
+  rtx cache_tag_hi = NULL_RTX;
+  rtx cache_ptrs = gen_reg_rtx (TImode);
+  rtx cache_ptrs_si = gen_reg_rtx (SImode);
+  rtx tag_equal = gen_reg_rtx (V4SImode);
+  rtx tag_equal_hi = NULL_RTX;
+  rtx tag_eq_pack = gen_reg_rtx (V4SImode);
+  rtx tag_eq_pack_si = gen_reg_rtx (SImode);
+  rtx eq_index = gen_reg_rtx (SImode);
+  rtx bcomp, hit_label, hit_ref, cont_label, insn;
+
+  if (spu_ea_model != 32)
+    {
+      splat_hi = gen_reg_rtx (V4SImode);
+      cache_tag_hi = gen_reg_rtx (V4SImode);
+      tag_equal_hi = gen_reg_rtx (V4SImode);
+    }
+
+  emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
+  emit_move_insn (tag_arr, tag_arr_sym);
+  v = 0x0001020300010203LL;
+  emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
+  ea_addr_si = ea_addr;
+  if (spu_ea_model != 32)
+    ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
+
+  /* tag_index = ea_addr & (tag_array_size - 128)  */
+  emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
+
+  /* splat ea_addr to all 4 slots.  */
+  emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
+  /* Similarly for high 32 bits of ea_addr.  */
+  if (spu_ea_model != 32)
+    emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
+
+  /* block_off = ea_addr & 127  */
+  emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
+
+  /* tag_addr = tag_arr + tag_index  */
+  emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
+
+  /* Read cache tags.  */
+  emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
+  if (spu_ea_model != 32)
+    emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
+					       plus_constant (Pmode,
+							      tag_addr, 16)));
+
+  /* tag = ea_addr & -128  */
+  emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
+
+  /* Read all four cache data pointers.  */
+  emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
+					   plus_constant (Pmode,
+							  tag_addr, 32)));
+
+  /* Compare tags.  */
+  emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
+  if (spu_ea_model != 32)
+    {
+      emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
+      emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
+    }
+
+  /* At most one of the tags compare equal, so tag_equal has one
+     32-bit slot set to all 1's, with the other slots all zero.
+     gbb picks off low bit from each byte in the 128-bit registers,
+     so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
+     we have a hit.  */
+  emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
+  emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
+
+  /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
+  emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
+
+  /* Allowing us to rotate the corresponding cache data pointer to slot0.
+     (rotating eq_index mod 16 bytes).  */
+  emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
+  emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
+
+  /* Add block offset to form final data address.  */
+  emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
+
+  /* Check that we did hit.  */
+  hit_label = gen_label_rtx ();
+  hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
+  bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+							    hit_ref, pc_rtx)));
+  /* Say that this branch is very likely to happen.  */
+  v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
+  add_int_reg_note (insn, REG_BR_PROB, v);
+
+  ea_load_store (mem, is_store, ea_addr, data_addr);
+  cont_label = gen_label_rtx ();
+  emit_jump_insn (gen_jump (cont_label));
+  emit_barrier ();
+
+  emit_label (hit_label);
+
+  if (is_store)
+    {
+      HOST_WIDE_INT v_hi;
+      rtx dirty_bits = gen_reg_rtx (TImode);
+      rtx dirty_off = gen_reg_rtx (SImode);
+      rtx dirty_128 = gen_reg_rtx (TImode);
+      rtx neg_block_off = gen_reg_rtx (SImode);
+
+      /* Set up mask with one dirty bit per byte of the mem we are
+	 writing, starting from top bit.  */
+      v_hi = v = -1;
+      v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
+      if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
+	{
+	  v_hi = v;
+	  v = 0;
+	}
+      emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
+
+      /* Form index into cache dirty_bits.  eq_index is one of
+	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
+	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
+	 offset to each of the four dirty_bits elements.  */
+      emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
+
+      emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
+
+      /* Rotate bit mask to proper bit.  */
+      emit_insn (gen_negsi2 (neg_block_off, block_off));
+      emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
+      emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
+
+      /* Or in the new dirty bits.  */
+      emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
+
+      /* Store.  */
+      emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
+    }
+
+  emit_label (cont_label);
+}
+
+static rtx
+expand_ea_mem (rtx mem, bool is_store)
+{
+  rtx ea_addr;
+  rtx data_addr = gen_reg_rtx (Pmode);
+  rtx new_mem;
+
+  ea_addr = force_reg (EAmode, XEXP (mem, 0));
+  if (optimize_size || optimize == 0)
+    ea_load_store (mem, is_store, ea_addr, data_addr);
+  else
+    ea_load_store_inline (mem, is_store, ea_addr, data_addr);
+
+  if (ea_alias_set == -1)
+    ea_alias_set = new_alias_set ();
+
+  /* We generate a new MEM RTX to refer to the copy of the data
+     in the cache.  We do not copy memory attributes (except the
+     alignment) from the original MEM, as they may no longer apply
+     to the cache copy.  */
+  new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
+  set_mem_alias_set (new_mem, ea_alias_set);
+  set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
+
+  return new_mem;
+}
+
+int
+spu_expand_mov (rtx * ops, enum machine_mode mode)
+{
+  if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
+    {
+      /* Perform the move in the destination SUBREG's inner mode.  */
+      ops[0] = SUBREG_REG (ops[0]);
+      mode = GET_MODE (ops[0]);
+      ops[1] = gen_lowpart_common (mode, ops[1]);
+      gcc_assert (ops[1]);
+    }
+
+  if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
+    {
+      rtx from = SUBREG_REG (ops[1]);
+      enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
+
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
+		  && GET_MODE_CLASS (imode) == MODE_INT
+		  && subreg_lowpart_p (ops[1]));
+
+      if (GET_MODE_SIZE (imode) < 4)
+	imode = SImode;
+      if (imode != GET_MODE (from))
+	from = gen_rtx_SUBREG (imode, from, 0);
+
+      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
+	{
+	  enum insn_code icode = convert_optab_handler (trunc_optab,
+							mode, imode);
+	  emit_insn (GEN_FCN (icode) (ops[0], from));
+	}
+      else
+	emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
+      return 1;
+    }
+
+  /* At least one of the operands needs to be a register. */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
+    {
+      rtx temp = force_reg (mode, ops[1]);
+      emit_move_insn (ops[0], temp);
+      return 1;
+    }
+  if (reload_in_progress || reload_completed)
+    {
+      if (CONSTANT_P (ops[1]))
+	return spu_split_immediate (ops);
+      return 0;
+    }
+
+  /* Catch the SImode immediates greater than 0x7fffffff, and sign
+     extend them. */
+  if (GET_CODE (ops[1]) == CONST_INT)
+    {
+      HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
+      if (val != INTVAL (ops[1]))
+	{
+	  emit_move_insn (ops[0], GEN_INT (val));
+	  return 1;
+	}
+    }
+  if (MEM_P (ops[0]))
+    {
+      if (MEM_ADDR_SPACE (ops[0]))
+	ops[0] = expand_ea_mem (ops[0], true);
+      return spu_split_store (ops);
+    }
+  if (MEM_P (ops[1]))
+    {
+      if (MEM_ADDR_SPACE (ops[1]))
+	ops[1] = expand_ea_mem (ops[1], false);
+      return spu_split_load (ops);
+    }
+
+  return 0;
+}
+
+static void
+spu_convert_move (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+  rtx reg;
+  gcc_assert (GET_MODE (src) == TImode);
+  reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
+  emit_insn (gen_rtx_SET (VOIDmode, reg,
+	       gen_rtx_TRUNCATE (int_mode,
+		 gen_rtx_LSHIFTRT (TImode, src,
+		   GEN_INT (int_mode == DImode ? 64 : 96)))));
+  if (int_mode != mode)
+    {
+      reg = simplify_gen_subreg (mode, reg, int_mode, 0);
+      emit_move_insn (dst, reg);
+    }
+}
+
+/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
+   the address from SRC and SRC+16.  Return a REG or CONST_INT that 
+   specifies how many bytes to rotate the loaded registers, plus any
+   extra from EXTRA_ROTQBY.  The address and rotate amounts are
+   normalized to improve merging of loads and rotate computations. */
+static rtx
+spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
+{
+  rtx addr = XEXP (src, 0);
+  rtx p0, p1, rot, addr0, addr1;
+  int rot_amt;
+
+  rot = 0;
+  rot_amt = 0;
+
+  if (MEM_ALIGN (src) >= 128)
+    /* Address is already aligned; simply perform a TImode load.  */ ;
+  else if (GET_CODE (addr) == PLUS)
+    {
+      /* 8 cases:
+         aligned reg   + aligned reg     => lqx
+         aligned reg   + unaligned reg   => lqx, rotqby
+         aligned reg   + aligned const   => lqd
+         aligned reg   + unaligned const => lqd, rotqbyi
+         unaligned reg + aligned reg     => lqx, rotqby
+         unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
+         unaligned reg + aligned const   => lqd, rotqby
+         unaligned reg + unaligned const -> not allowed by legitimate address
+       */
+      p0 = XEXP (addr, 0);
+      p1 = XEXP (addr, 1);
+      if (!reg_aligned_for_addr (p0))
+	{
+	  if (REG_P (p1) && !reg_aligned_for_addr (p1))
+	    {
+	      rot = gen_reg_rtx (SImode);
+	      emit_insn (gen_addsi3 (rot, p0, p1));
+	    }
+	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
+	    {
+	      if (INTVAL (p1) > 0
+		  && REG_POINTER (p0)
+		  && INTVAL (p1) * BITS_PER_UNIT
+		     < REGNO_POINTER_ALIGN (REGNO (p0)))
+		{
+		  rot = gen_reg_rtx (SImode);
+		  emit_insn (gen_addsi3 (rot, p0, p1));
+		  addr = p0;
+		}
+	      else
+		{
+		  rtx x = gen_reg_rtx (SImode);
+		  emit_move_insn (x, p1);
+		  if (!spu_arith_operand (p1, SImode))
+		    p1 = x;
+		  rot = gen_reg_rtx (SImode);
+		  emit_insn (gen_addsi3 (rot, p0, p1));
+		  addr = gen_rtx_PLUS (Pmode, p0, x);
+		}
+	    }
+	  else
+	    rot = p0;
+	}
+      else
+	{
+	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
+	    {
+	      rot_amt = INTVAL (p1) & 15;
+	      if (INTVAL (p1) & -16)
+		{
+		  p1 = GEN_INT (INTVAL (p1) & -16);
+		  addr = gen_rtx_PLUS (SImode, p0, p1);
+		}
+	      else
+		addr = p0;
+	    }
+	  else if (REG_P (p1) && !reg_aligned_for_addr (p1))
+	    rot = p1;
+	}
+    }
+  else if (REG_P (addr))
+    {
+      if (!reg_aligned_for_addr (addr))
+	rot = addr;
+    }
+  else if (GET_CODE (addr) == CONST)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == PLUS
+	  && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
+	  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+	{
+	  rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
+	  if (rot_amt & -16)
+	    addr = gen_rtx_CONST (Pmode,
+				  gen_rtx_PLUS (Pmode,
+						XEXP (XEXP (addr, 0), 0),
+						GEN_INT (rot_amt & -16)));
+	  else
+	    addr = XEXP (XEXP (addr, 0), 0);
+	}
+      else
+	{
+	  rot = gen_reg_rtx (Pmode);
+	  emit_move_insn (rot, addr);
+	}
+    }
+  else if (GET_CODE (addr) == CONST_INT)
+    {
+      rot_amt = INTVAL (addr);
+      addr = GEN_INT (rot_amt & -16);
+    }
+  else if (!ALIGNED_SYMBOL_REF_P (addr))
+    {
+      rot = gen_reg_rtx (Pmode);
+      emit_move_insn (rot, addr);
+    }
+
+  rot_amt += extra_rotby;
+
+  rot_amt &= 15;
+
+  if (rot && rot_amt)
+    {
+      rtx x = gen_reg_rtx (SImode);
+      emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
+      rot = x;
+      rot_amt = 0;
+    }
+  if (!rot && rot_amt)
+    rot = GEN_INT (rot_amt);
+
+  addr0 = copy_rtx (addr);
+  addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
+  emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
+
+  if (dst1)
+    {
+      addr1 = plus_constant (SImode, copy_rtx (addr), 16);
+      addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
+      emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
+    }
+
+  return rot;
+}
+
+int
+spu_split_load (rtx * ops)
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  rtx addr, load, rot;
+  int rot_amt;
+
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 0;
+
+  addr = XEXP (ops[1], 0);
+  gcc_assert (GET_CODE (addr) != AND);
+
+  if (!address_needs_split (ops[1]))
+    {
+      ops[1] = change_address (ops[1], TImode, addr);
+      load = gen_reg_rtx (TImode);
+      emit_insn (gen__movti (load, ops[1]));
+      spu_convert_move (ops[0], load);
+      return 1;
+    }
+
+  rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
+
+  load = gen_reg_rtx (TImode);
+  rot = spu_expand_load (load, 0, ops[1], rot_amt);
+
+  if (rot)
+    emit_insn (gen_rotqby_ti (load, load, rot));
+
+  spu_convert_move (ops[0], load);
+  return 1;
+}
+
+int
+spu_split_store (rtx * ops)
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  rtx reg;
+  rtx addr, p0, p1, p1_lo, smem;
+  int aform;
+  int scalar;
+
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 0;
+
+  addr = XEXP (ops[0], 0);
+  gcc_assert (GET_CODE (addr) != AND);
+
+  if (!address_needs_split (ops[0]))
+    {
+      reg = gen_reg_rtx (TImode);
+      emit_insn (gen_spu_convert (reg, ops[1]));
+      ops[0] = change_address (ops[0], TImode, addr);
+      emit_move_insn (ops[0], reg);
+      return 1;
+    }
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      /* 8 cases:
+         aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
+         aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
+         aligned reg   + aligned const   => lqd, c?d, shuf, stqx
+         aligned reg   + unaligned const => lqd, c?d, shuf, stqx
+         unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
+         unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
+         unaligned reg + aligned const   => lqd, c?d, shuf, stqx
+         unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
+       */
+      aform = 0;
+      p0 = XEXP (addr, 0);
+      p1 = p1_lo = XEXP (addr, 1);
+      if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
+	{
+	  p1_lo = GEN_INT (INTVAL (p1) & 15);
+	  if (reg_aligned_for_addr (p0))
+	    {
+	      p1 = GEN_INT (INTVAL (p1) & -16);
+	      if (p1 == const0_rtx)
+		addr = p0;
+	      else
+		addr = gen_rtx_PLUS (SImode, p0, p1);
+	    }
+	  else
+	    {
+	      rtx x = gen_reg_rtx (SImode);
+	      emit_move_insn (x, p1);
+	      addr = gen_rtx_PLUS (SImode, p0, x);
+	    }
+	}
+    }
+  else if (REG_P (addr))
+    {
+      aform = 0;
+      p0 = addr;
+      p1 = p1_lo = const0_rtx;
+    }
+  else
+    {
+      aform = 1;
+      p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+      p1 = 0;			/* aform doesn't use p1 */
+      p1_lo = addr;
+      if (ALIGNED_SYMBOL_REF_P (addr))
+	p1_lo = const0_rtx;
+      else if (GET_CODE (addr) == CONST
+	       && GET_CODE (XEXP (addr, 0)) == PLUS
+	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
+	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+	{
+	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
+	  if ((v & -16) != 0)
+	    addr = gen_rtx_CONST (Pmode,
+				  gen_rtx_PLUS (Pmode,
+						XEXP (XEXP (addr, 0), 0),
+						GEN_INT (v & -16)));
+	  else
+	    addr = XEXP (XEXP (addr, 0), 0);
+	  p1_lo = GEN_INT (v & 15);
+	}
+      else if (GET_CODE (addr) == CONST_INT)
+	{
+	  p1_lo = GEN_INT (INTVAL (addr) & 15);
+	  addr = GEN_INT (INTVAL (addr) & -16);
+	}
+      else
+	{
+	  p1_lo = gen_reg_rtx (SImode);
+	  emit_move_insn (p1_lo, addr);
+	}
+    }
+
+  gcc_assert (aform == 0 || aform == 1);
+  reg = gen_reg_rtx (TImode);
+
+  scalar = store_with_one_insn_p (ops[0]);
+  if (!scalar)
+    {
+      /* We could copy the flags from the ops[0] MEM to mem here,
+         We don't because we want this load to be optimized away if
+         possible, and copying the flags will prevent that in certain
+         cases, e.g. consider the volatile flag. */
+
+      rtx pat = gen_reg_rtx (TImode);
+      rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
+      set_mem_alias_set (lmem, 0);
+      emit_insn (gen_movti (reg, lmem));
+
+      if (!p0 || reg_aligned_for_addr (p0))
+	p0 = stack_pointer_rtx;
+      if (!p1_lo)
+	p1_lo = const0_rtx;
+
+      emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
+      emit_insn (gen_shufb (reg, ops[1], reg, pat));
+    }
+  else
+    {
+      if (GET_CODE (ops[1]) == REG)
+	emit_insn (gen_spu_convert (reg, ops[1]));
+      else if (GET_CODE (ops[1]) == SUBREG)
+	emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
+      else
+	abort ();
+    }
+
+  if (GET_MODE_SIZE (mode) < 4 && scalar)
+    emit_insn (gen_ashlti3
+	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
+
+  smem = change_address (ops[0], TImode, copy_rtx (addr));
+  /* We can't use the previous alias set because the memory has changed
+     size and can potentially overlap objects of other types.  */
+  set_mem_alias_set (smem, 0);
+
+  emit_insn (gen_movti (smem, reg));
+  return 1;
+}
+
+/* Return TRUE if X is MEM which is a struct member reference
+   and the member can safely be loaded and stored with a single
+   instruction because it is padded. */
+static int
+mem_is_padded_component_ref (rtx x)
+{
+  tree t = MEM_EXPR (x);
+  tree r;
+  if (!t || TREE_CODE (t) != COMPONENT_REF)
+    return 0;
+  t = TREE_OPERAND (t, 1);
+  if (!t || TREE_CODE (t) != FIELD_DECL
+      || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
+    return 0;
+  /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
+  r = DECL_FIELD_CONTEXT (t);
+  if (!r || TREE_CODE (r) != RECORD_TYPE)
+    return 0;
+  /* Make sure they are the same mode */
+  if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
+    return 0;
+  /* If there are no following fields then the field alignment assures
+     the structure is padded to the alignment which means this field is
+     padded too.  */
+  if (TREE_CHAIN (t) == 0)
+    return 1;
+  /* If the following field is also aligned then this field will be
+     padded. */
+  t = TREE_CHAIN (t);
+  if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
+    return 1;
+  return 0;
+}
+
+/* Parse the -mfixed-range= option string.  */
+static void
+fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+  
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  */
+  
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+  
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+      
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+      
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+      
+      *dash = '-';
+      
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+      
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
+   can be generated using the fsmbi instruction. */
+int
+fsmbi_const_p (rtx x)
+{
+  if (CONSTANT_P (x))
+    {
+      /* We can always choose TImode for CONST_INT because the high bits
+         of an SImode will always be all 1s, i.e., valid for fsmbi. */
+      enum immediate_class c = classify_immediate (x, TImode);
+      return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
+    }
+  return 0;
+}
+
+/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
+   can be generated using the cbd, chd, cwd or cdd instruction. */
+int
+cpat_const_p (rtx x, enum machine_mode mode)
+{
+  if (CONSTANT_P (x))
+    {
+      enum immediate_class c = classify_immediate (x, mode);
+      return c == IC_CPAT;
+    }
+  return 0;
+}
+
+rtx
+gen_cpat_const (rtx * ops)
+{
+  unsigned char dst[16];
+  int i, offset, shift, isize;
+  if (GET_CODE (ops[3]) != CONST_INT
+      || GET_CODE (ops[2]) != CONST_INT
+      || (GET_CODE (ops[1]) != CONST_INT
+	  && GET_CODE (ops[1]) != REG))
+    return 0;
+  if (GET_CODE (ops[1]) == REG
+      && (!REG_POINTER (ops[1])
+	  || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
+    return 0;
+
+  for (i = 0; i < 16; i++)
+    dst[i] = i + 16;
+  isize = INTVAL (ops[3]);
+  if (isize == 1)
+    shift = 3;
+  else if (isize == 2)
+    shift = 2;
+  else
+    shift = 0;
+  offset = (INTVAL (ops[2]) +
+	    (GET_CODE (ops[1]) ==
+	     CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
+  for (i = 0; i < isize; i++)
+    dst[offset + i] = i + shift;
+  return array_to_constant (TImode, dst);
+}
+
+/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
+   array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
+   than 16 bytes, the value is repeated across the rest of the array. */
+void
+constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
+{
+  HOST_WIDE_INT val;
+  int i, j, first;
+
+  memset (arr, 0, 16);
+  mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
+  if (GET_CODE (x) == CONST_INT
+      || (GET_CODE (x) == CONST_DOUBLE
+	  && (mode == SFmode || mode == DFmode)))
+    {
+      gcc_assert (mode != VOIDmode && mode != BLKmode);
+
+      if (GET_CODE (x) == CONST_DOUBLE)
+	val = const_double_to_hwint (x);
+      else
+	val = INTVAL (x);
+      first = GET_MODE_SIZE (mode) - 1;
+      for (i = first; i >= 0; i--)
+	{
+	  arr[i] = val & 0xff;
+	  val >>= 8;
+	}
+      /* Splat the constant across the whole array. */
+      for (j = 0, i = first + 1; i < 16; i++)
+	{
+	  arr[i] = arr[j];
+	  j = (j == first) ? 0 : j + 1;
+	}
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      val = CONST_DOUBLE_LOW (x);
+      for (i = 15; i >= 8; i--)
+	{
+	  arr[i] = val & 0xff;
+	  val >>= 8;
+	}
+      val = CONST_DOUBLE_HIGH (x);
+      for (i = 7; i >= 0; i--)
+	{
+	  arr[i] = val & 0xff;
+	  val >>= 8;
+	}
+    }
+  else if (GET_CODE (x) == CONST_VECTOR)
+    {
+      int units;
+      rtx elt;
+      mode = GET_MODE_INNER (mode);
+      units = CONST_VECTOR_NUNITS (x);
+      for (i = 0; i < units; i++)
+	{
+	  elt = CONST_VECTOR_ELT (x, i);
+	  if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
+	    {
+	      if (GET_CODE (elt) == CONST_DOUBLE)
+		val = const_double_to_hwint (elt);
+	      else
+		val = INTVAL (elt);
+	      first = GET_MODE_SIZE (mode) - 1;
+	      if (first + i * GET_MODE_SIZE (mode) > 16)
+		abort ();
+	      for (j = first; j >= 0; j--)
+		{
+		  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
+		  val >>= 8;
+		}
+	    }
+	}
+    }
+  else
+    gcc_unreachable();
+}
+
+/* Convert a 16 byte array to a constant of mode MODE.  When MODE is
+   smaller than 16 bytes, use the bytes that would represent that value
+   in a register, e.g., for QImode return the value of arr[3].  */
+rtx
+array_to_constant (enum machine_mode mode, const unsigned char arr[16])
+{
+  enum machine_mode inner_mode;
+  rtvec v;
+  int units, size, i, j, k;
+  HOST_WIDE_INT val;
+
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+    {
+      j = GET_MODE_SIZE (mode);
+      i = j < 4 ? 4 - j : 0;
+      for (val = 0; i < j; i++)
+	val = (val << 8) | arr[i];
+      val = trunc_int_for_mode (val, mode);
+      return GEN_INT (val);
+    }
+
+  if (mode == TImode)
+    {
+      HOST_WIDE_INT high;
+      for (i = high = 0; i < 8; i++)
+	high = (high << 8) | arr[i];
+      for (i = 8, val = 0; i < 16; i++)
+	val = (val << 8) | arr[i];
+      return immed_double_const (val, high, TImode);
+    }
+  if (mode == SFmode)
+    {
+      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+      val = trunc_int_for_mode (val, SImode);
+      return hwint_to_const_double (SFmode, val);
+    }
+  if (mode == DFmode)
+    {
+      for (i = 0, val = 0; i < 8; i++)
+	val = (val << 8) | arr[i];
+      return hwint_to_const_double (DFmode, val);
+    }
+
+  if (!VECTOR_MODE_P (mode))
+    abort ();
+
+  units = GET_MODE_NUNITS (mode);
+  size = GET_MODE_UNIT_SIZE (mode);
+  inner_mode = GET_MODE_INNER (mode);
+  v = rtvec_alloc (units);
+
+  for (k = i = 0; i < units; ++i)
+    {
+      val = 0;
+      for (j = 0; j < size; j++, k++)
+	val = (val << 8) | arr[k];
+
+      if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
+	RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
+      else
+	RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
+    }
+  if (k > 16)
+    abort ();
+
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+static void
+reloc_diagnostic (rtx x)
+{
+  tree decl = 0;
+  if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
+    return;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    decl = SYMBOL_REF_DECL (x);
+  else if (GET_CODE (x) == CONST
+	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
+    decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
+
+  /* SYMBOL_REF_DECL is not necessarily a DECL. */
+  if (decl && !DECL_P (decl))
+    decl = 0;
+
+  /* The decl could be a string constant.  */
+  if (decl && DECL_P (decl))
+    {
+      location_t loc;
+      /* We use last_assemble_variable_decl to get line information.  It's
+	 not always going to be right and might not even be close, but will
+	 be right for the more common cases. */
+      if (!last_assemble_variable_decl || in_section == ctors_section)
+	loc = DECL_SOURCE_LOCATION (decl);
+      else
+	loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
+
+      if (TARGET_WARN_RELOC)
+	warning_at (loc, 0,
+		    "creating run-time relocation for %qD", decl);
+      else
+	error_at (loc,
+		  "creating run-time relocation for %qD", decl);
+    }
+  else 
+    {
+      if (TARGET_WARN_RELOC)
+	warning_at (input_location, 0, "creating run-time relocation");
+      else
+	error_at (input_location, "creating run-time relocation");
+    }
+}
+
+/* Hook into assemble_integer so we can generate an error for run-time
+   relocations.  The SPU ABI disallows them. */
+static bool
+spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  /* By default run-time relocations aren't supported, but we allow them
+     in case users support it in their own run-time loader.  And we provide
+     a warning for those users that don't.  */
+  if ((GET_CODE (x) == SYMBOL_REF)
+      || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
+    reloc_diagnostic (x);
+
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+static void
+spu_asm_globalize_label (FILE * file, const char *name)
+{
+  fputs ("\t.global\t", file);
+  assemble_name (file, name);
+  fputs ("\n", file);
+}
+
+static bool
+spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+	       int opno ATTRIBUTE_UNUSED, int *total,
+	       bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int cost = COSTS_N_INSNS (2);
+
+  /* Folding to a CONST_VECTOR will use extra space but there might
+     be only a small savings in cycles.  We'd like to use a CONST_VECTOR
+     only if it allows us to fold away multiple insns.  Changing the cost
+     of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
+     because this cost will only be compared against a single insn. 
+     if (code == CONST_VECTOR)
+       return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
+   */
+
+  /* Use defaults for float operations.  Not accurate but good enough. */
+  if (mode == DFmode)
+    {
+      *total = COSTS_N_INSNS (13);
+      return true;
+    }
+  if (mode == SFmode)
+    {
+      *total = COSTS_N_INSNS (6);
+      return true;
+    }
+  switch (code)
+    {
+    case CONST_INT:
+      if (satisfies_constraint_K (x))
+	*total = 0;
+      else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (3);
+      return true;
+
+    case CONST:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (0);
+      return true;
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (5);
+      return true;
+
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+      *total = COSTS_N_INSNS (7);
+      return true;
+
+    case PLUS:
+      if (mode == TImode)
+	{
+	  *total = COSTS_N_INSNS (9);
+	  return true;
+	}
+      break;
+
+    case MULT:
+      cost =
+	GET_CODE (XEXP (x, 0)) ==
+	REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
+      if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
+	{
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+	      cost = COSTS_N_INSNS (14);
+	      if ((val & 0xffff) == 0)
+		cost = COSTS_N_INSNS (9);
+	      else if (val > 0 && val < 0x10000)
+		cost = COSTS_N_INSNS (11);
+	    }
+	}
+      *total = cost;
+      return true;
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (20);
+      return true;
+    case ROTATE:
+    case ROTATERT:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (4);
+      return true;
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_CONVERT)
+	*total = COSTS_N_INSNS (0);
+      else
+	*total = COSTS_N_INSNS (4);
+      return true;
+    }
+  /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
+    cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
+      * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+  *total = cost;
+  return true;
+}
+
+static enum machine_mode
+spu_unwind_word_mode (void)
+{
+  return SImode;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+static bool
+spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return decl && !TARGET_LARGE_MEM;
+}
+
+/* We need to correctly update the back chain pointer and the Available
+   Stack Size (which is in the second slot of the sp register.) */
+void
+spu_allocate_stack (rtx op0, rtx op1)
+{
+  HOST_WIDE_INT v;
+  rtx chain = gen_reg_rtx (V4SImode);
+  rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
+  rtx sp = gen_reg_rtx (V4SImode);
+  rtx splatted = gen_reg_rtx (V4SImode);
+  rtx pat = gen_reg_rtx (TImode);
+
+  /* copy the back chain so we can save it back again. */
+  emit_move_insn (chain, stack_bot);
+
+  op1 = force_reg (SImode, op1);
+
+  v = 0x1020300010203ll;
+  emit_move_insn (pat, immed_double_const (v, v, TImode));
+  emit_insn (gen_shufb (splatted, op1, op1, pat));
+
+  emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
+  emit_insn (gen_subv4si3 (sp, sp, splatted));
+
+  if (flag_stack_check)
+    {
+      rtx avail = gen_reg_rtx(SImode);
+      rtx result = gen_reg_rtx(SImode);
+      emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
+      emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
+      emit_insn (gen_spu_heq (result, GEN_INT(0) ));
+    }
+
+  emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
+
+  emit_move_insn (stack_bot, chain);
+
+  emit_move_insn (op0, virtual_stack_dynamic_rtx);
+}
+
+void
+spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
+{
+  static unsigned char arr[16] =
+    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
+  rtx temp = gen_reg_rtx (SImode);
+  rtx temp2 = gen_reg_rtx (SImode);
+  rtx temp3 = gen_reg_rtx (V4SImode);
+  rtx temp4 = gen_reg_rtx (V4SImode);
+  rtx pat = gen_reg_rtx (TImode);
+  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
+
+  /* Restore the backchain from the first word, sp from the second.  */
+  emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
+  emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
+
+  emit_move_insn (pat, array_to_constant (TImode, arr));
+
+  /* Compute Available Stack Size for sp */
+  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
+  emit_insn (gen_shufb (temp3, temp, temp, pat));
+
+  /* Compute Available Stack Size for back chain */
+  emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
+  emit_insn (gen_shufb (temp4, temp2, temp2, pat));
+  emit_insn (gen_addv4si3 (temp4, sp, temp4));
+
+  emit_insn (gen_addv4si3 (sp, sp, temp3));
+  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
+}
+
+static void
+spu_init_libfuncs (void)
+{
+  set_optab_libfunc (smul_optab, DImode, "__muldi3");
+  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
+  set_optab_libfunc (smod_optab, DImode, "__moddi3");
+  set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
+  set_optab_libfunc (umod_optab, DImode, "__umoddi3");
+  set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
+  set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
+  set_optab_libfunc (clz_optab, DImode, "__clzdi2");
+  set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
+  set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
+  set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
+  set_optab_libfunc (parity_optab, DImode, "__paritydi2");
+
+  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
+  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
+
+  set_optab_libfunc (addv_optab, SImode, "__addvsi3");
+  set_optab_libfunc (subv_optab, SImode, "__subvsi3");
+  set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
+  set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
+  set_optab_libfunc (negv_optab, SImode, "__negvsi2");
+  set_optab_libfunc (absv_optab, SImode, "__absvsi2");
+  set_optab_libfunc (addv_optab, DImode, "__addvdi3");
+  set_optab_libfunc (subv_optab, DImode, "__subvdi3");
+  set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
+  set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
+  set_optab_libfunc (negv_optab, DImode, "__negvdi2");
+  set_optab_libfunc (absv_optab, DImode, "__absvdi2");
+
+  set_optab_libfunc (smul_optab, TImode, "__multi3");
+  set_optab_libfunc (sdiv_optab, TImode, "__divti3");
+  set_optab_libfunc (smod_optab, TImode, "__modti3");
+  set_optab_libfunc (udiv_optab, TImode, "__udivti3");
+  set_optab_libfunc (umod_optab, TImode, "__umodti3");
+  set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
+}
+
+/* Make a subreg, stripping any existing subreg.  We could possibly just
+   call simplify_subreg, but in this case we know what we want. */
+rtx
+spu_gen_subreg (enum machine_mode mode, rtx x)
+{
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+  if (GET_MODE (x) == mode)
+    return x;
+  return gen_rtx_SUBREG (mode, x, 0);
+}
+
+static bool
+spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return (TYPE_MODE (type) == BLKmode
+	  && ((type) == 0
+	      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	      || int_size_in_bytes (type) >
+	      (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
+}
+
+/* Create the built-in types and functions */
+
+enum spu_function_code
+{
+#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
+#include "spu-builtins.def"
+#undef DEF_BUILTIN
+   NUM_SPU_BUILTINS
+};
+
+extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
+
+struct spu_builtin_description spu_builtins[] = {
+#define DEF_BUILTIN(fcode, icode, name, type, params) \
+  {fcode, icode, name, type, params},
+#include "spu-builtins.def"
+#undef DEF_BUILTIN
+};
+
+static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
+
+/* Returns the spu builtin decl for CODE.  */
+
+static tree
+spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{           
+  if (code >= NUM_SPU_BUILTINS)
+    return error_mark_node;
+          
+  return spu_builtin_decls[code];
+}
+
+
+static void
+spu_init_builtins (void)
+{
+  struct spu_builtin_description *d;
+  unsigned int i;
+
+  V16QI_type_node = build_vector_type (intQI_type_node, 16);
+  V8HI_type_node = build_vector_type (intHI_type_node, 8);
+  V4SI_type_node = build_vector_type (intSI_type_node, 4);
+  V2DI_type_node = build_vector_type (intDI_type_node, 2);
+  V4SF_type_node = build_vector_type (float_type_node, 4);
+  V2DF_type_node = build_vector_type (double_type_node, 2);
+
+  unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
+  unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
+  unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
+  unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
+
+  spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
+
+  spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
+
+  spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
+  spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
+  spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
+
+  spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
+  spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
+
+  spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
+
+  spu_builtin_types[SPU_BTI_PTR] =
+    build_pointer_type (build_qualified_type
+			(void_type_node,
+			 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
+
+  /* For each builtin we build a new prototype.  The tree code will make
+     sure nodes are shared. */
+  for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
+    {
+      tree p;
+      char name[64];		/* build_function will make a copy. */
+      int parm;
+
+      if (d->name == 0)
+	continue;
+
+      /* Find last parm.  */
+      for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
+	;
+
+      p = void_list_node;
+      while (parm > 1)
+	p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
+
+      p = build_function_type (spu_builtin_types[d->parm[0]], p);
+
+      sprintf (name, "__builtin_%s", d->name);
+      spu_builtin_decls[i] =
+	add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
+      if (d->fcode == SPU_MASK_FOR_LOAD)
+	TREE_READONLY (spu_builtin_decls[i]) = 1;	
+
+      /* These builtins don't throw.  */
+      TREE_NOTHROW (spu_builtin_decls[i]) = 1;
+    }
+}
+
+void
+spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
+{
+  static unsigned char arr[16] =
+    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
+
+  rtx temp = gen_reg_rtx (Pmode);
+  rtx temp2 = gen_reg_rtx (V4SImode);
+  rtx temp3 = gen_reg_rtx (V4SImode);
+  rtx pat = gen_reg_rtx (TImode);
+  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
+
+  emit_move_insn (pat, array_to_constant (TImode, arr));
+
+  /* Restore the sp.  */
+  emit_move_insn (temp, op1);
+  emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
+
+  /* Compute available stack size for sp.  */
+  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
+  emit_insn (gen_shufb (temp3, temp, temp, pat));
+
+  emit_insn (gen_addv4si3 (sp, sp, temp3));
+  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
+}
+
+int
+spu_safe_dma (HOST_WIDE_INT channel)
+{
+  return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
+}
+
+void
+spu_builtin_splats (rtx ops[])
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
+    {
+      unsigned char arr[16];
+      constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
+      emit_move_insn (ops[0], array_to_constant (mode, arr));
+    }
+  else
+    {
+      rtx reg = gen_reg_rtx (TImode);
+      rtx shuf;
+      if (GET_CODE (ops[1]) != REG
+	  && GET_CODE (ops[1]) != SUBREG)
+	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
+      switch (mode)
+	{
+	case V2DImode:
+	case V2DFmode:
+	  shuf =
+	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
+				TImode);
+	  break;
+	case V4SImode:
+	case V4SFmode:
+	  shuf =
+	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
+				TImode);
+	  break;
+	case V8HImode:
+	  shuf =
+	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
+				TImode);
+	  break;
+	case V16QImode:
+	  shuf =
+	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
+				TImode);
+	  break;
+	default:
+	  abort ();
+	}
+      emit_move_insn (reg, shuf);
+      emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
+    }
+}
+
+void
+spu_builtin_extract (rtx ops[])
+{
+  enum machine_mode mode;
+  rtx rot, from, tmp;
+
+  mode = GET_MODE (ops[1]);
+
+  if (GET_CODE (ops[2]) == CONST_INT)
+    {
+      switch (mode)
+	{
+	case V16QImode:
+	  emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
+	  break;
+	case V8HImode:
+	  emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
+	  break;
+	case V4SFmode:
+	  emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
+	  break;
+	case V4SImode:
+	  emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
+	  break;
+	case V2DImode:
+	  emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
+	  break;
+	case V2DFmode:
+	  emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
+	  break;
+	default:
+	  abort ();
+	}
+      return;
+    }
+
+  from = spu_gen_subreg (TImode, ops[1]);
+  rot = gen_reg_rtx (TImode);
+  tmp = gen_reg_rtx (SImode);
+
+  switch (mode)
+    {
+    case V16QImode:
+      emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
+      break;
+    case V8HImode:
+      emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
+      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
+      break;
+    case V4SFmode:
+    case V4SImode:
+      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
+      break;
+    case V2DImode:
+    case V2DFmode:
+      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
+      break;
+    default:
+      abort ();
+    }
+  emit_insn (gen_rotqby_ti (rot, from, tmp));
+
+  emit_insn (gen_spu_convert (ops[0], rot));
+}
+
+void
+spu_builtin_insert (rtx ops[])
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  enum machine_mode imode = GET_MODE_INNER (mode);
+  rtx mask = gen_reg_rtx (TImode);
+  rtx offset;
+
+  if (GET_CODE (ops[3]) == CONST_INT)
+    offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
+  else
+    {
+      offset = gen_reg_rtx (SImode);
+      emit_insn (gen_mulsi3
+		 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
+    }
+  emit_insn (gen_cpat
+	     (mask, stack_pointer_rtx, offset,
+	      GEN_INT (GET_MODE_SIZE (imode))));
+  emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
+}
+
+void
+spu_builtin_promote (rtx ops[])
+{
+  enum machine_mode mode, imode;
+  rtx rot, from, offset;
+  HOST_WIDE_INT pos;
+
+  mode = GET_MODE (ops[0]);
+  imode = GET_MODE_INNER (mode);
+
+  from = gen_reg_rtx (TImode);
+  rot = spu_gen_subreg (TImode, ops[0]);
+
+  emit_insn (gen_spu_convert (from, ops[1]));
+
+  if (GET_CODE (ops[2]) == CONST_INT)
+    {
+      pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
+      if (GET_MODE_SIZE (imode) < 4)
+	pos += 4 - GET_MODE_SIZE (imode);
+      offset = GEN_INT (pos & 15);
+    }
+  else
+    {
+      offset = gen_reg_rtx (SImode);
+      switch (mode)
+	{
+	case V16QImode:
+	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
+	  break;
+	case V8HImode:
+	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
+	  emit_insn (gen_addsi3 (offset, offset, offset));
+	  break;
+	case V4SFmode:
+	case V4SImode:
+	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
+	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
+	  break;
+	case V2DImode:
+	case V2DFmode:
+	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  emit_insn (gen_rotqby_ti (rot, from, offset));
+}
+
+static void
+spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx shuf = gen_reg_rtx (V4SImode);
+  rtx insn = gen_reg_rtx (V4SImode);
+  rtx shufc;
+  rtx insnc;
+  rtx mem;
+
+  fnaddr = force_reg (SImode, fnaddr);
+  cxt = force_reg (SImode, cxt);
+
+  if (TARGET_LARGE_MEM)
+    {
+      rtx rotl = gen_reg_rtx (V4SImode);
+      rtx mask = gen_reg_rtx (V4SImode);
+      rtx bi = gen_reg_rtx (SImode);
+      static unsigned char const shufa[16] = {
+	2, 3, 0, 1, 18, 19, 16, 17,
+	0, 1, 2, 3, 16, 17, 18, 19
+      };
+      static unsigned char const insna[16] = {
+	0x41, 0, 0, 79,
+	0x41, 0, 0, STATIC_CHAIN_REGNUM,
+	0x60, 0x80, 0, 79,
+	0x60, 0x80, 0, STATIC_CHAIN_REGNUM
+      };
+
+      shufc = force_reg (TImode, array_to_constant (TImode, shufa));
+      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
+
+      emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
+      emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
+      emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
+      emit_insn (gen_selb (insn, insnc, rotl, mask));
+
+      mem = adjust_address (m_tramp, V4SImode, 0);
+      emit_move_insn (mem, insn);
+
+      emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
+      mem = adjust_address (m_tramp, Pmode, 16);
+      emit_move_insn (mem, bi);
+    }
+  else
+    {
+      rtx scxt = gen_reg_rtx (SImode);
+      rtx sfnaddr = gen_reg_rtx (SImode);
+      static unsigned char const insna[16] = {
+	0x42, 0, 0, STATIC_CHAIN_REGNUM,
+	0x30, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+      };
+
+      shufc = gen_reg_rtx (TImode);
+      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
+
+      /* By or'ing all of cxt with the ila opcode we are assuming cxt
+	 fits 18 bits and the last 4 are zeros.  This will be true if
+	 the stack pointer is initialized to 0x3fff0 at program start,
+	 otherwise the ila instruction will be garbage. */
+
+      emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
+      emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
+      emit_insn (gen_cpat
+		 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
+      emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
+      emit_insn (gen_iorv4si3 (insn, insnc, shuf));
+
+      mem = adjust_address (m_tramp, V4SImode, 0);
+      emit_move_insn (mem, insn);
+    }
+  emit_insn (gen_sync ());
+}
+
+static bool
+spu_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return !spu_naked_function_p (decl);
+}
+
+void
+spu_expand_sign_extend (rtx ops[])
+{
+  unsigned char arr[16];
+  rtx pat = gen_reg_rtx (TImode);
+  rtx sign, c;
+  int i, last;
+  last = GET_MODE (ops[0]) == DImode ? 7 : 15;
+  if (GET_MODE (ops[1]) == QImode)
+    {
+      sign = gen_reg_rtx (HImode);
+      emit_insn (gen_extendqihi2 (sign, ops[1]));
+      for (i = 0; i < 16; i++)
+	arr[i] = 0x12;
+      arr[last] = 0x13;
+    }
+  else
+    {
+      for (i = 0; i < 16; i++)
+	arr[i] = 0x10;
+      switch (GET_MODE (ops[1]))
+	{
+	case HImode:
+	  sign = gen_reg_rtx (SImode);
+	  emit_insn (gen_extendhisi2 (sign, ops[1]));
+	  arr[last] = 0x03;
+	  arr[last - 1] = 0x02;
+	  break;
+	case SImode:
+	  sign = gen_reg_rtx (SImode);
+	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
+	  for (i = 0; i < 4; i++)
+	    arr[last - i] = 3 - i;
+	  break;
+	case DImode:
+	  sign = gen_reg_rtx (SImode);
+	  c = gen_reg_rtx (SImode);
+	  emit_insn (gen_spu_convert (c, ops[1]));
+	  emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
+	  for (i = 0; i < 8; i++)
+	    arr[last - i] = 7 - i;
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  emit_move_insn (pat, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
+}
+
+/* expand vector initialization. If there are any constant parts,
+   load constant parts first. Then load any non-constant parts.  */
+void
+spu_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0;
+  bool all_same = true;
+  rtx first, x = NULL_RTX, first_constant = NULL_RTX;
+  int i;
+
+  first = XVECEXP (vals, 0, 0); 
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
+	++n_var;
+      else
+	{
+	  if (first_constant == NULL_RTX)
+	    first_constant = x;
+	}
+      if (i > 0 && !rtx_equal_p (x, first))
+	all_same = false;
+    }
+
+  /* if all elements are the same, use splats to repeat elements */
+  if (all_same)
+    {
+      if (!CONSTANT_P (first)
+	  && !register_operand (first, GET_MODE (x)))
+	first = force_reg (GET_MODE (first), first);
+      emit_insn (gen_spu_splats (target, first));
+      return;
+    }
+
+  /* load constant parts */
+  if (n_var != n_elts)
+    {
+      if (n_var == 0)
+	{
+	  emit_move_insn (target,
+			  gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+	}
+      else
+	{
+	  rtx constant_parts_rtx = copy_rtx (vals);
+
+	  gcc_assert (first_constant != NULL_RTX);
+	  /* fill empty slots with the first constant, this increases
+	     our chance of using splats in the recursive call below. */
+	  for (i = 0; i < n_elts; ++i)
+	    {
+	      x = XVECEXP (constant_parts_rtx, 0, i);
+	      if (!(CONST_INT_P (x)
+		    || GET_CODE (x) == CONST_DOUBLE
+		    || GET_CODE (x) == CONST_FIXED))
+		XVECEXP (constant_parts_rtx, 0, i) = first_constant;
+	    }
+
+	  spu_expand_vector_init (target, constant_parts_rtx);
+	}
+    }
+
+  /* load variable parts */
+  if (n_var != 0)
+    {
+      rtx insert_operands[4];
+
+      insert_operands[0] = target;
+      insert_operands[2] = target;
+      for (i = 0; i < n_elts; ++i)
+	{
+	  x = XVECEXP (vals, 0, i);
+	  if (!(CONST_INT_P (x)
+		|| GET_CODE (x) == CONST_DOUBLE
+		|| GET_CODE (x) == CONST_FIXED))
+	    {
+	      if (!register_operand (x, GET_MODE (x)))
+		x = force_reg (GET_MODE (x), x);
+	      insert_operands[1] = x;
+	      insert_operands[3] = GEN_INT (i);
+	      spu_builtin_insert (insert_operands);
+	    }
+	}
+    }
+}
+
+/* Return insn index for the vector compare instruction for given CODE,
+   and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
+
+static int
+get_vec_cmp_insn (enum rtx_code code,
+                  enum machine_mode dest_mode,
+                  enum machine_mode op_mode)
+
+{
+  switch (code)
+    {
+    case EQ:
+      if (dest_mode == V16QImode && op_mode == V16QImode)
+        return CODE_FOR_ceq_v16qi;
+      if (dest_mode == V8HImode && op_mode == V8HImode)
+        return CODE_FOR_ceq_v8hi;
+      if (dest_mode == V4SImode && op_mode == V4SImode)
+        return CODE_FOR_ceq_v4si;
+      if (dest_mode == V4SImode && op_mode == V4SFmode)
+        return CODE_FOR_ceq_v4sf;
+      if (dest_mode == V2DImode && op_mode == V2DFmode)
+        return CODE_FOR_ceq_v2df;
+      break;
+    case GT:
+      if (dest_mode == V16QImode && op_mode == V16QImode)
+        return CODE_FOR_cgt_v16qi;
+      if (dest_mode == V8HImode && op_mode == V8HImode)
+        return CODE_FOR_cgt_v8hi;
+      if (dest_mode == V4SImode && op_mode == V4SImode)
+        return CODE_FOR_cgt_v4si;
+      if (dest_mode == V4SImode && op_mode == V4SFmode)
+        return CODE_FOR_cgt_v4sf;
+      if (dest_mode == V2DImode && op_mode == V2DFmode)
+        return CODE_FOR_cgt_v2df;
+      break;
+    case GTU:
+      if (dest_mode == V16QImode && op_mode == V16QImode)
+        return CODE_FOR_clgt_v16qi;
+      if (dest_mode == V8HImode && op_mode == V8HImode)
+        return CODE_FOR_clgt_v8hi;
+      if (dest_mode == V4SImode && op_mode == V4SImode)
+        return CODE_FOR_clgt_v4si;
+      break;
+    default:
+      break;
+    }
+  return -1;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+   DMODE is expected destination mode. This is a recursive function.  */
+
+static rtx
+spu_emit_vector_compare (enum rtx_code rcode,
+                         rtx op0, rtx op1,
+                         enum machine_mode dmode)
+{
+  int vec_cmp_insn;
+  rtx mask;
+  enum machine_mode dest_mode;
+  enum machine_mode op_mode = GET_MODE (op1);
+
+  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+  /* Floating point vector compare instructions uses destination V4SImode.
+     Double floating point vector compare instructions uses destination V2DImode.
+     Move destination to appropriate mode later.  */
+  if (dmode == V4SFmode)
+    dest_mode = V4SImode;
+  else if (dmode == V2DFmode)
+    dest_mode = V2DImode;
+  else
+    dest_mode = dmode;
+
+  mask = gen_reg_rtx (dest_mode);
+  vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+  if (vec_cmp_insn == -1)
+    {
+      bool swap_operands = false;
+      bool try_again = false;
+      switch (rcode)
+        {
+        case LT:
+          rcode = GT;
+          swap_operands = true;
+          try_again = true;
+          break;
+        case LTU:
+          rcode = GTU;
+          swap_operands = true;
+          try_again = true;
+          break;
+        case NE:
+	case UNEQ:
+	case UNLE:
+	case UNLT:
+	case UNGE:
+	case UNGT:
+	case UNORDERED:
+          /* Treat A != B as ~(A==B).  */
+          {
+	    enum rtx_code rev_code;
+            enum insn_code nor_code;
+	    rtx rev_mask;
+
+	    rev_code = reverse_condition_maybe_unordered (rcode);
+            rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
+
+            nor_code = optab_handler (one_cmpl_optab, dest_mode);
+            gcc_assert (nor_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        case GE:
+        case GEU:
+        case LE:
+        case LEU:
+          /* Try GT/GTU/LT/LTU OR EQ */
+          {
+            rtx c_rtx, eq_rtx;
+            enum insn_code ior_code;
+            enum rtx_code new_code;
+
+            switch (rcode)
+              {
+              case GE:  new_code = GT;  break;
+              case GEU: new_code = GTU; break;
+              case LE:  new_code = LT;  break;
+              case LEU: new_code = LTU; break;
+              default:
+                gcc_unreachable ();
+              }
+
+            c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
+            eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+
+            ior_code = optab_handler (ior_optab, dest_mode);
+            gcc_assert (ior_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        case LTGT:
+          /* Try LT OR GT */
+          {
+            rtx lt_rtx, gt_rtx;
+            enum insn_code ior_code;
+
+            lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
+            gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
+
+            ior_code = optab_handler (ior_optab, dest_mode);
+            gcc_assert (ior_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        case ORDERED:
+          /* Implement as (A==A) & (B==B) */
+          {
+            rtx a_rtx, b_rtx;
+            enum insn_code and_code;
+
+            a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
+            b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
+
+            and_code = optab_handler (and_optab, dest_mode);
+            gcc_assert (and_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        default:
+          gcc_unreachable ();
+        }
+
+      /* You only get two chances.  */
+      if (try_again)
+          vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+      gcc_assert (vec_cmp_insn != -1);
+
+      if (swap_operands)
+        {
+          rtx tmp;
+          tmp = op0;
+          op0 = op1;
+          op1 = tmp;
+        }
+    }
+
+  emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
+  if (dmode != dest_mode)
+    {
+      rtx temp = gen_reg_rtx (dest_mode);
+      convert_move (temp, mask, 0);
+      return temp;
+    }
+  return mask;
+}
+
+
+/* Emit vector conditional expression.
+   DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+   CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
+
+int
+spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+                           rtx cond, rtx cc_op0, rtx cc_op1)
+{   
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum rtx_code rcode = GET_CODE (cond);
+  rtx mask;
+    
+  /* Get the vector mask for the given relational operations.  */
+  mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
+
+  emit_insn(gen_selb (dest, op2, op1, mask));
+
+  return 1;
+}
+
+static rtx
+spu_force_reg (enum machine_mode mode, rtx op)
+{
+  rtx x, r;
+  if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
+    {
+      if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
+	  || GET_MODE (op) == BLKmode)
+	return force_reg (mode, convert_to_mode (mode, op, 0));
+      abort ();
+    }
+
+  r = force_reg (GET_MODE (op), op);
+  if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
+    {
+      x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
+      if (x)
+	return x;
+    }
+
+  x = gen_reg_rtx (mode);
+  emit_insn (gen_spu_convert (x, r));
+  return x;
+}
+
+static void
+spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
+{
+  HOST_WIDE_INT v = 0;
+  int lsbits;
+  /* Check the range of immediate operands. */
+  if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
+    {
+      int range = p - SPU_BTI_7;
+
+      if (!CONSTANT_P (op))
+	error ("%s expects an integer literal in the range [%d, %d]",
+	       d->name,
+	       spu_builtin_range[range].low, spu_builtin_range[range].high);
+
+      if (GET_CODE (op) == CONST
+	  && (GET_CODE (XEXP (op, 0)) == PLUS
+	      || GET_CODE (XEXP (op, 0)) == MINUS))
+	{
+	  v = INTVAL (XEXP (XEXP (op, 0), 1));
+	  op = XEXP (XEXP (op, 0), 0);
+	}
+      else if (GET_CODE (op) == CONST_INT)
+	v = INTVAL (op);
+      else if (GET_CODE (op) == CONST_VECTOR
+	       && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
+	v = INTVAL (CONST_VECTOR_ELT (op, 0));
+
+      /* The default for v is 0 which is valid in every range. */
+      if (v < spu_builtin_range[range].low
+	  || v > spu_builtin_range[range].high)
+	error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
+	       d->name,
+	       spu_builtin_range[range].low, spu_builtin_range[range].high,
+	       v);
+
+      switch (p)
+	{
+	case SPU_BTI_S10_4:
+	  lsbits = 4;
+	  break;
+	case SPU_BTI_U16_2:
+	  /* This is only used in lqa, and stqa.  Even though the insns
+	     encode 16 bits of the address (all but the 2 least
+	     significant), only 14 bits are used because it is masked to
+	     be 16 byte aligned. */
+	  lsbits = 4;
+	  break;
+	case SPU_BTI_S16_2:
+	  /* This is used for lqr and stqr. */
+	  lsbits = 2;
+	  break;
+	default:
+	  lsbits = 0;
+	}
+
+      if (GET_CODE (op) == LABEL_REF
+	  || (GET_CODE (op) == SYMBOL_REF
+	      && SYMBOL_REF_FUNCTION_P (op))
+	  || (v & ((1 << lsbits) - 1)) != 0)
+	warning (0, "%d least significant bits of %s are ignored", lsbits,
+		 d->name);
+    }
+}
+
+
+static int
+expand_builtin_args (struct spu_builtin_description *d, tree exp,
+		     rtx target, rtx ops[])
+{
+  enum insn_code icode = (enum insn_code) d->icode;
+  int i = 0, a;
+
+  /* Expand the arguments into rtl. */
+
+  if (d->parm[0] != SPU_BTI_VOID)
+    ops[i++] = target;
+
+  for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, a);
+      if (arg == 0)
+	abort ();
+      ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+    }
+
+  gcc_assert (i == insn_data[icode].n_generator_args);
+  return i;
+}
+
+static rtx
+spu_expand_builtin_1 (struct spu_builtin_description *d,
+		      tree exp, rtx target)
+{
+  rtx pat;
+  rtx ops[8];
+  enum insn_code icode = (enum insn_code) d->icode;
+  enum machine_mode mode, tmode;
+  int i, p;
+  int n_operands;
+  tree return_type;
+
+  /* Set up ops[] with values from arglist. */
+  n_operands = expand_builtin_args (d, exp, target, ops);
+
+  /* Handle the target operand which must be operand 0. */
+  i = 0;
+  if (d->parm[0] != SPU_BTI_VOID)
+    {
+
+      /* We prefer the mode specified for the match_operand otherwise
+         use the mode from the builtin function prototype. */
+      tmode = insn_data[d->icode].operand[0].mode;
+      if (tmode == VOIDmode)
+	tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
+
+      /* Try to use target because not using it can lead to extra copies
+         and when we are using all of the registers extra copies leads
+         to extra spills.  */
+      if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
+	ops[0] = target;
+      else
+	target = ops[0] = gen_reg_rtx (tmode);
+
+      if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
+	abort ();
+
+      i++;
+    }
+
+  if (d->fcode == SPU_MASK_FOR_LOAD)
+    {
+      enum machine_mode mode = insn_data[icode].operand[1].mode;
+      tree arg;
+      rtx addr, op, pat;
+
+      /* get addr */
+      arg = CALL_EXPR_ARG (exp, 0);
+      gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+      op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+      addr = memory_address (mode, op);
+
+      /* negate addr */
+      op = gen_reg_rtx (GET_MODE (addr));
+      emit_insn (gen_rtx_SET (VOIDmode, op,
+                 gen_rtx_NEG (GET_MODE (addr), addr)));
+      op = gen_rtx_MEM (mode, op);
+
+      pat = GEN_FCN (icode) (target, op);
+      if (!pat) 
+        return 0;
+      emit_insn (pat);
+      return target;
+    }   
+
+  /* Ignore align_hint, but still expand it's args in case they have
+     side effects. */
+  if (icode == CODE_FOR_spu_align_hint)
+    return 0;
+
+  /* Handle the rest of the operands. */
+  for (p = 1; i < n_operands; i++, p++)
+    {
+      if (insn_data[d->icode].operand[i].mode != VOIDmode)
+	mode = insn_data[d->icode].operand[i].mode;
+      else
+	mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
+
+      /* mode can be VOIDmode here for labels */
+
+      /* For specific intrinsics with an immediate operand, e.g.,
+         si_ai(), we sometimes need to convert the scalar argument to a
+         vector argument by splatting the scalar. */
+      if (VECTOR_MODE_P (mode)
+	  && (GET_CODE (ops[i]) == CONST_INT
+	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
+	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
+	{
+	  if (GET_CODE (ops[i]) == CONST_INT)
+	    ops[i] = spu_const (mode, INTVAL (ops[i]));
+	  else
+	    {
+	      rtx reg = gen_reg_rtx (mode);
+	      enum machine_mode imode = GET_MODE_INNER (mode);
+	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
+		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
+	      if (imode != GET_MODE (ops[i]))
+		ops[i] = convert_to_mode (imode, ops[i],
+					  TYPE_UNSIGNED (spu_builtin_types
+							 [d->parm[i]]));
+	      emit_insn (gen_spu_splats (reg, ops[i]));
+	      ops[i] = reg;
+	    }
+	}
+
+      spu_check_builtin_parm (d, ops[i], d->parm[p]);
+
+      if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
+	ops[i] = spu_force_reg (mode, ops[i]);
+    }
+
+  switch (n_operands)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (0);
+      break;
+    case 1:
+      pat = GEN_FCN (icode) (ops[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (ops[0], ops[1]);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
+      break;
+    case 4:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
+      break;
+    case 5:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
+      break;
+    case 6:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
+      break;
+    default:
+      abort ();
+    }
+
+  if (!pat)
+    abort ();
+
+  if (d->type == B_CALL || d->type == B_BISLED)
+    emit_call_insn (pat);
+  else if (d->type == B_JUMP)
+    {
+      emit_jump_insn (pat);
+      emit_barrier ();
+    }
+  else
+    emit_insn (pat);
+
+  return_type = spu_builtin_types[d->parm[0]];
+  if (d->parm[0] != SPU_BTI_VOID
+      && GET_MODE (target) != TYPE_MODE (return_type))
+    {
+      /* target is the return value.  It should always be the mode of
+         the builtin function prototype. */
+      target = spu_force_reg (TYPE_MODE (return_type), target);
+    }
+
+  return target;
+}
+
+rtx
+spu_expand_builtin (tree exp,
+		    rtx target,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  struct spu_builtin_description *d;
+
+  if (fcode < NUM_SPU_BUILTINS)
+    {
+      d = &spu_builtins[fcode];
+
+      return spu_expand_builtin_1 (d, exp, target);
+    }
+  abort ();
+}
+
+/* Implement targetm.vectorize.builtin_mask_for_load.  */
+static tree
+spu_builtin_mask_for_load (void)
+{
+  return spu_builtin_decls[SPU_MASK_FOR_LOAD];
+}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int 
+spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                tree vectype,
+                                int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+      case vector_stmt:
+      case vector_load:
+      case vector_store:
+      case vec_to_scalar:
+      case scalar_to_vec:
+      case cond_branch_not_taken:
+      case vec_perm:
+      case vec_promote_demote:
+        return 1;
+
+      case scalar_store:
+        return 10;
+
+      case scalar_load:
+        /* Load + rotate.  */
+        return 2;
+
+      case unaligned_load:
+        return 2;
+
+      case cond_branch_taken:
+        return 6;
+
+      case vec_construct:
+	elements = TYPE_VECTOR_SUBPARTS (vectype);
+	return elements / 2 + 1;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.init_cost.  */
+
+static void *
+spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
+{
+  unsigned *cost = XNEWVEC (unsigned, 3);
+  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  return cost;
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+
+static unsigned
+spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		   struct _stmt_vec_info *stmt_info, int misalign,
+		   enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+	 vectorized are weighted more heavily.  The value here is
+	 arbitrary and could potentially be improved with analysis.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+	count *= 50;  /* FIXME.  */
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
+
+/* Implement targetm.vectorize.finish_cost.  */
+
+static void
+spu_finish_cost (void *data, unsigned *prologue_cost,
+		 unsigned *body_cost, unsigned *epilogue_cost)
+{
+  unsigned *cost = (unsigned *) data;
+  *prologue_cost = cost[vect_prologue];
+  *body_cost     = cost[vect_body];
+  *epilogue_cost = cost[vect_epilogue];
+}
+
+/* Implement targetm.vectorize.destroy_cost_data.  */
+
+static void
+spu_destroy_cost_data (void *data)
+{
+  free (data);
+}
+
+/* Return true iff, data reference of TYPE can reach vector alignment (16)
+   after applying N number of iterations.  This routine does not determine
+   how may iterations are required to reach desired alignment.  */
+
+static bool
+spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
+{
+  if (is_packed)
+    return false;
+
+  /* All other types are naturally aligned.  */
+  return true;
+}
+
+/* Return the appropriate mode for a named address pointer.  */
+static enum machine_mode
+spu_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return ptr_mode;
+    case ADDR_SPACE_EA:
+      return EAmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the appropriate mode for a named address address.  */
+static enum machine_mode
+spu_addr_space_address_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return Pmode;
+    case ADDR_SPACE_EA:
+      return EAmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Determine if one named address space is a subset of another.  */
+
+static bool
+spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
+  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
+
+  if (subset == superset)
+    return true;
+
+  /* If we have -mno-address-space-conversion, treat __ea and generic as not
+     being subsets but instead as disjoint address spaces.  */
+  else if (!TARGET_ADDRESS_SPACE_CONVERSION)
+    return false;
+
+  else
+    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
+}
+
+/* Convert from one address space to another.  */
+static rtx
+spu_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+
+  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
+  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
+
+  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
+    {
+      rtx result, ls;
+
+      ls = gen_const_mem (DImode,
+			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+      set_mem_align (ls, 128);
+
+      result = gen_reg_rtx (Pmode);
+      ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
+      op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
+      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+					  ls, const0_rtx, Pmode, 1);
+
+      emit_insn (gen_subsi3 (result, op, ls));
+
+      return result;
+    }
+
+  else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
+    {
+      rtx result, ls;
+
+      ls = gen_const_mem (DImode,
+			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+      set_mem_align (ls, 128);
+
+      result = gen_reg_rtx (EAmode);
+      ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
+      op = force_reg (Pmode, op);
+      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+					  ls, const0_rtx, EAmode, 1);
+      op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
+
+      if (EAmode == SImode)
+	emit_insn (gen_addsi3 (result, op, ls));
+      else
+	emit_insn (gen_adddi3 (result, op, ls));
+
+      return result;
+    }
+
+  else
+    gcc_unreachable ();
+}
+
+
+/* Count the total number of instructions in each pipe and return the
+   maximum, which is used as the Minimum Iteration Interval (MII)
+   in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
+   -2 are instructions that can go in pipe0 or pipe1.  */
+static int
+spu_sms_res_mii (struct ddg *g)
+{
+  int i;
+  unsigned t[4] = {0, 0, 0, 0};
+
+  for (i = 0; i < g->num_nodes; i++)
+    {
+      rtx insn = g->nodes[i].insn;
+      int p = get_pipe (insn) + 2;
+
+      gcc_assert (p >= 0);
+      gcc_assert (p < 4);
+
+      t[p]++;
+      if (dump_file && INSN_P (insn))
+            fprintf (dump_file, "i%d %s %d %d\n",
+                     INSN_UID (insn),
+                     insn_data[INSN_CODE(insn)].name,
+                     p, t[p]);
+    }
+  if (dump_file)
+    fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
+
+  return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
+}
+
+
+void
+spu_init_expanders (void)
+{
+  if (cfun)
+    {
+      rtx r0, r1;
+      /* HARD_FRAME_REGISTER is only 128 bit aligned when
+         frame_pointer_needed is true.  We don't know that until we're
+         expanding the prologue. */
+      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
+
+      /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
+	 LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
+	 to be treated as aligned, so generate them here. */
+      r0 = gen_reg_rtx (SImode);
+      r1 = gen_reg_rtx (SImode);
+      mark_reg_pointer (r0, 128);
+      mark_reg_pointer (r1, 128);
+      gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
+		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
+    }
+}
+
+static enum machine_mode
+spu_libgcc_cmp_return_mode (void)
+{
+
+/* For SPU word mode is TI mode so it is better to use SImode
+   for compare returns.  */
+  return SImode;
+}
+
+static enum machine_mode
+spu_libgcc_shift_count_mode (void)
+{
+/* For SPU word mode is TI mode so it is better to use SImode
+   for shift counts.  */
+  return SImode;
+}
+
+/* Implement targetm.section_type_flags.  */
+static unsigned int
+spu_section_type_flags (tree decl, const char *name, int reloc)
+{
+  /* .toe needs to have type @nobits.  */
+  if (strcmp (name, ".toe") == 0)
+    return SECTION_BSS;
+  /* Don't load _ea into the current address space.  */
+  if (strcmp (name, "._ea") == 0)
+    return SECTION_WRITE | SECTION_DEBUG;
+  return default_section_type_flags (decl, name, reloc);
+}
+
+/* Implement targetm.select_section.  */
+static section *
+spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  /* Variables and constants defined in the __ea address space
+     go into a special section named "._ea".  */
+  if (TREE_TYPE (decl) != error_mark_node
+      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
+    {
+      /* We might get called with string constants, but get_named_section
+	 doesn't like them as they are not DECLs.  Also, we need to set
+	 flags in that case.  */
+      if (!DECL_P (decl))
+	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
+
+      return get_named_section (decl, "._ea", reloc);
+    }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Implement targetm.unique_section.  */
+static void
+spu_unique_section (tree decl, int reloc)
+{
+  /* We don't support unique section names in the __ea address
+     space for now.  */
+  if (TREE_TYPE (decl) != error_mark_node
+      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
+    return;
+
+  default_unique_section (decl, reloc);
+}
+
+/* Generate a constant or register which contains 2^SCALE.  We assume
+   the result is valid for MODE.  Currently, MODE must be V4SFmode and
+   SCALE must be SImode. */
+rtx
+spu_gen_exp2 (enum machine_mode mode, rtx scale)
+{
+  gcc_assert (mode == V4SFmode);
+  gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
+  if (GET_CODE (scale) != CONST_INT)
+    {
+      /* unsigned int exp = (127 + scale) << 23;
+	__vector float m = (__vector float) spu_splats (exp); */
+      rtx reg = force_reg (SImode, scale);
+      rtx exp = gen_reg_rtx (SImode);
+      rtx mul = gen_reg_rtx (mode);
+      emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
+      emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
+      emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
+      return mul;
+    }
+  else 
+    {
+      HOST_WIDE_INT exp = 127 + INTVAL (scale);
+      unsigned char arr[16];
+      arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
+      arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
+      arr[2] = arr[6] = arr[10] = arr[14] = 0;
+      arr[3] = arr[7] = arr[11] = arr[15] = 0;
+      return array_to_constant (mode, arr);
+    }
+}
+
+/* After reload, just change the convert into a move instruction
+   or a dead instruction. */
+void
+spu_split_convert (rtx ops[])
+{
+  if (REGNO (ops[0]) == REGNO (ops[1]))
+    emit_note (NOTE_INSN_DELETED);
+  else
+    {
+      /* Use TImode always as this might help hard reg copyprop.  */
+      rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
+      rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
+      emit_insn (gen_move_insn (op0, op1));
+    }
+}
+
+void
+spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
+{
+  fprintf (file, "# profile\n");
+  fprintf (file, "brsl $75,  _mcount\n");
+}
+
+/* Implement targetm.ref_may_alias_errno.  */
+static bool
+spu_ref_may_alias_errno (ao_ref *ref)
+{
+  tree base = ao_ref_base (ref);
+
+  /* With SPU newlib, errno is defined as something like
+         _impure_data._errno
+     The default implementation of this target macro does not
+     recognize such expressions, so special-code for it here.  */
+
+  if (TREE_CODE (base) == VAR_DECL
+      && !TREE_STATIC (base)
+      && DECL_EXTERNAL (base)
+      && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
+      && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
+		 "_impure_data") == 0
+      /* _errno is the first member of _impure_data.  */
+      && ref->offset == 0)
+    return true;
+
+  return default_ref_may_alias_errno (ref);
+}
+
+/* Output thunk to FILE that implements a C++ virtual function call (with
+   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
+   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
+   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
+   relative to the resulting this pointer.  */
+
+static void
+spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		     tree function)
+{
+  rtx op[8];
+
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), file, 1);
+
+  /* Operand 0 is the target function.  */
+  op[0] = XEXP (DECL_RTL (function), 0);
+
+  /* Operand 1 is the 'this' pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
+  else
+    op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
+
+  /* Operands 2/3 are the low/high halfwords of delta.  */
+  op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
+  op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
+
+  /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
+  op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
+  op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
+
+  /* Operands 6/7 are temporary registers.  */
+  op[6] = gen_rtx_REG (Pmode, 79);
+  op[7] = gen_rtx_REG (Pmode, 78);
+
+  /* Add DELTA to this pointer.  */
+  if (delta)
+    {
+      if (delta >= -0x200 && delta < 0x200)
+	output_asm_insn ("ai\t%1,%1,%2", op);
+      else if (delta >= -0x8000 && delta < 0x8000)
+	{
+	  output_asm_insn ("il\t%6,%2", op);
+	  output_asm_insn ("a\t%1,%1,%6", op);
+	}
+      else
+	{
+	  output_asm_insn ("ilhu\t%6,%3", op);
+	  output_asm_insn ("iohl\t%6,%2", op);
+	  output_asm_insn ("a\t%1,%1,%6", op);
+	}
+    }
+
+  /* Perform vcall adjustment.  */
+  if (vcall_offset)
+    {
+      output_asm_insn ("lqd\t%7,0(%1)", op);
+      output_asm_insn ("rotqby\t%7,%7,%1", op);
+
+      if (vcall_offset >= -0x200 && vcall_offset < 0x200)
+	output_asm_insn ("ai\t%7,%7,%4", op);
+      else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
+	{
+	  output_asm_insn ("il\t%6,%4", op);
+	  output_asm_insn ("a\t%7,%7,%6", op);
+	}
+      else
+	{
+	  output_asm_insn ("ilhu\t%6,%5", op);
+	  output_asm_insn ("iohl\t%6,%4", op);
+	  output_asm_insn ("a\t%7,%7,%6", op);
+	}
+
+      output_asm_insn ("lqd\t%6,0(%7)", op);
+      output_asm_insn ("rotqby\t%6,%6,%7", op);
+      output_asm_insn ("a\t%1,%1,%6", op);
+    }
+
+  /* Jump to target.  */
+  output_asm_insn ("br\t%0", op);
+
+  final_end_function ();
+}
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+static void
+spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			     bool op0_preserve_value)
+{
+  if (!op0_preserve_value
+      && (*code == LE || *code == LT || *code == LEU || *code == LTU))
+    {
+      rtx tem = *op0;
+      *op0 = *op1;
+      *op1 = tem;
+      *code = (int)swap_condition ((enum rtx_code)*code);
+    }
+}
+
+/*  Table of machine attributes.  */
+static const struct attribute_spec spu_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute,
+    false },
+  { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute,
+    false },
+  { NULL,             0, 0, false, false, false, NULL, false }
+};
+
+/*  TARGET overrides.  */
+
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
+
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
+
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+  spu_addr_space_legitimate_address_p
+
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
+
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS spu_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL spu_builtin_decl
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN spu_expand_builtin
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
+
+/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
+   and .quad for the debugger.  When it is known that the assembler is fixed,
+   these can be removed.  */
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
+
+/* The .8byte directive doesn't seem to work well for a 32 bit
+   architecture. */
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP NULL
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS spu_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT spu_sched_init
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER spu_sched_reorder
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 spu_sched_reorder
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
+
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER spu_assemble_integer
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG spu_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
+
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
+
+#undef TARGET_VECTORIZE_INIT_COST
+#define TARGET_VECTORIZE_INIT_COST spu_init_cost
+
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
+
+#undef TARGET_VECTORIZE_FINISH_COST
+#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
+
+#undef TARGET_VECTORIZE_DESTROY_COST_DATA
+#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
+
+#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
+#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
+
+#undef TARGET_SCHED_SMS_RES_MII
+#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  spu_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE spu_option_override
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
+
+#undef TARGET_REF_MAY_ALIAS_ERRNO
+#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-spu.h"
diff --git a/gcc-4.9/gcc/config/spu/spu.h b/gcc-4.9/gcc/config/spu/spu.h
new file mode 100644
index 000000000..ac05f4c7c
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu.h
@@ -0,0 +1,551 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Run-time Target */
+#define TARGET_CPU_CPP_BUILTINS()	spu_cpu_cpp_builtins(pfile)
+
+#define C_COMMON_OVERRIDE_OPTIONS spu_c_common_override_options()
+
+#define INIT_EXPANDERS spu_init_expanders()
+
+/* Which processor to generate code or schedule for.  */
+enum processor_type
+{
+  PROCESSOR_CELL,
+  PROCESSOR_CELLEDP
+};
+
+extern GTY(()) int spu_arch;
+extern GTY(()) int spu_tune;
+
+/* Support for a compile-time default architecture and tuning.  The rules are:
+   --with-arch is ignored if -march is specified.
+   --with-tune is ignored if -mtune is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+/* Default target_flags if no switches specified.  */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS \
+			| MASK_SAFE_HINTS | MASK_ADDRESS_SPACE_CONVERSION)
+#endif
+
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 1
+
+#define BYTES_BIG_ENDIAN 1
+
+#define WORDS_BIG_ENDIAN 1
+
+/* GCC uses word_mode in many places, assuming that it is the fastest
+   integer mode.  That is not the case for SPU though.  We can't use
+   32 here because (of some reason I can't remember.) */
+#define BITS_PER_WORD 128
+
+#define UNITS_PER_WORD (BITS_PER_WORD/BITS_PER_UNIT)
+
+/* When building libgcc, we need to assume 4 words per units even
+   though UNITS_PER_WORD is 16, because the SPU has basically a 32-bit
+   instruction set although register size is 128 bits.  In particular,
+   this causes libgcc to contain __divdi3 instead of __divti3 etc.
+   However, we allow this default to be re-defined on the command
+   line, so that we can use the LIB2_SIDITI_CONV_FUNCS mechanism
+   to get (in addition) TImode versions of some routines.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 4
+#endif
+
+#define POINTER_SIZE 32
+
+#define PARM_BOUNDARY 128
+
+#define STACK_BOUNDARY 128
+
+/* We want it 8-byte aligned so we can properly use dual-issue
+   instructions, which can only happen on an 8-byte aligned address. */
+#define FUNCTION_BOUNDARY 64
+
+/* We would like to allow a larger alignment for data objects (for DMA)
+   but the aligned attribute is limited by BIGGEST_ALIGNMENT.  We don't
+   define BIGGEST_ALIGNMENT as larger because it is used in other places
+   and would end up wasting space.  (Is this still true?)  */
+#define BIGGEST_ALIGNMENT 128
+
+#define MINIMUM_ATOMIC_ALIGNMENT 128
+
+/* Make all static objects 16-byte aligned.  This allows us to assume
+   they are also padded to 16-bytes, which means we can use a single
+   load or store instruction to access them.  Do the same for objects
+   on the stack.  (Except a bug (?) allows some stack objects to be
+   unaligned.)  */
+#define DATA_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+#define CONSTANT_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+#define LOCAL_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRICT_ALIGNMENT 1
+
+/* symbol_ref's of functions are not aligned to 16 byte boundary. */
+#define ALIGNED_SYMBOL_REF_P(X) \
+	(GET_CODE (X) == SYMBOL_REF \
+          && (SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ALIGN1) == 0 \
+	  && (! SYMBOL_REF_FUNCTION_P (X) \
+	      || align_functions >= 16))
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+#define MAX_FIXED_MODE_SIZE 128
+
+#define STACK_SAVEAREA_MODE(save_level) \
+  (save_level == SAVE_FUNCTION ? VOIDmode \
+    : save_level == SAVE_NONLOCAL ? SImode \
+      : Pmode)
+
+#define STACK_SIZE_MODE SImode
+
+
+/* Type Layout */
+
+#define INT_TYPE_SIZE 32
+
+#define LONG_TYPE_SIZE 32
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 0
+
+#define STDINT_LONG32 0
+
+
+/* Register Basics */
+
+/* 128-130 are special registers that never appear in assembly code. */
+#define FIRST_PSEUDO_REGISTER 131
+
+#define FIXED_REGISTERS {			    \
+    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    1, 1, 1 \
+}
+
+#define CALL_USED_REGISTERS {			    \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    1, 1, 1 \
+}
+
+
+/* Values in Registers */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+    ((GET_MODE_BITSIZE(MODE)+MAX_FIXED_MODE_SIZE-1)/MAX_FIXED_MODE_SIZE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  (GET_MODE_BITSIZE (MODE1) <= MAX_FIXED_MODE_SIZE \
+   && GET_MODE_BITSIZE (MODE2) <= MAX_FIXED_MODE_SIZE)
+
+
+/* Register Classes */
+
+enum reg_class { 
+   NO_REGS, 
+   GENERAL_REGS,
+   ALL_REGS,
+   LIM_REG_CLASSES 
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES \
+{  "NO_REGS", \
+   "GENERAL_REGS", \
+   "ALL_REGS" \
+}
+
+#define REG_CLASS_CONTENTS { \
+    {0, 0, 0, 0, 0}, /* no regs */ \
+    {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x3}, /* general regs */ \
+    {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x3}} /* all regs */
+
+#define REGNO_REG_CLASS(REGNO) (GENERAL_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(regno) \
+   ((regno) < FIRST_PSEUDO_REGISTER || (regno > LAST_VIRTUAL_REGISTER && reg_renumber[regno] >= 0))
+
+#define REGNO_OK_FOR_INDEX_P(regno)  \
+   ((regno) < FIRST_PSEUDO_REGISTER || (regno > LAST_VIRTUAL_REGISTER && reg_renumber[regno] >= 0))
+
+#define INT_REG_OK_FOR_INDEX_P(X,STRICT) \
+	((!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X))))
+#define INT_REG_OK_FOR_BASE_P(X,STRICT) \
+	((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X))))
+
+/* GCC assumes that modes are in the lowpart of a register, which is
+   only true for SPU. */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+        ((GET_MODE_SIZE (FROM) > 4 || GET_MODE_SIZE (TO) > 4) \
+	 && (GET_MODE_SIZE (FROM) < 16 || GET_MODE_SIZE (TO) < 16) \
+	 && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO))
+
+#define REGISTER_TARGET_PRAGMAS() do {					\
+c_register_addr_space ("__ea", ADDR_SPACE_EA);				\
+targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin;	\
+}while (0);
+
+
+/* Frame Layout */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET (0)
+
+#define STACK_POINTER_OFFSET 32
+
+#define FIRST_PARM_OFFSET(FNDECL) (0)
+
+#define DYNAMIC_CHAIN_ADDRESS(FP) plus_constant (Pmode, (FP), -16)
+
+#define RETURN_ADDR_RTX(COUNT,FP) (spu_return_addr (COUNT, FP))
+
+/* Should this be defined?  Would it simplify our implementation. */
+/* #define RETURN_ADDR_IN_PREVIOUS_FRAME */
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG(Pmode, LINK_REGISTER_REGNUM)
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LINK_REGISTER_REGNUM)
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (crtl->args.pretend_args_size - STACK_POINTER_OFFSET)
+
+
+/* Stack Checking */
+
+/* We store the Available Stack Size in the second slot of the stack
+   register.   We emit stack checking code during the prologue.  */
+#define STACK_CHECK_BUILTIN 1
+
+
+/* Frame Registers, and other registers */
+
+#define STACK_POINTER_REGNUM 1
+
+/* Will be eliminated. */
+#define FRAME_POINTER_REGNUM 128
+
+/* This is not specified in any ABI, so could be set to anything. */
+#define HARD_FRAME_POINTER_REGNUM 127
+
+/* Will be eliminated. */
+#define ARG_POINTER_REGNUM 129
+
+#define STATIC_CHAIN_REGNUM 2
+
+#define LINK_REGISTER_REGNUM 0
+
+/* Used to keep track of instructions that have clobbered the hint
+ * buffer.  Users can also specify it in inline asm. */
+#define HBR_REGNUM 130
+
+#define MAX_REGISTER_ARGS    72
+#define FIRST_ARG_REGNUM     3
+#define LAST_ARG_REGNUM      (FIRST_ARG_REGNUM + MAX_REGISTER_ARGS - 1)
+
+#define MAX_REGISTER_RETURN  72
+#define FIRST_RETURN_REGNUM  3
+#define LAST_RETURN_REGNUM   (FIRST_RETURN_REGNUM + MAX_REGISTER_RETURN - 1)
+
+
+/* Elimination */
+
+#define ELIMINABLE_REGS  \
+  {{ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},			\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = spu_initial_elimination_offset((FROM),(TO)))
+
+
+/* Stack Arguments */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define REG_PARM_STACK_SPACE(FNDECL) 0
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+
+/* Register Arguments */
+
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+		((CUM) = 0)
+
+/* The SPU ABI wants 32/64-bit types at offset 0 in the quad-word on the
+   stack.  8/16-bit types should be at offsets 3/2 respectively.  */
+#define FUNCTION_ARG_OFFSET(MODE, TYPE)					\
+(((TYPE) && INTEGRAL_TYPE_P (TYPE) && GET_MODE_SIZE (MODE) < 4)		\
+ ? (4 - GET_MODE_SIZE (MODE))						\
+ : 0)
+
+#define FUNCTION_ARG_PADDING(MODE,TYPE) upward
+
+#define PAD_VARARGS_DOWN 0
+
+#define FUNCTION_ARG_REGNO_P(N) ((N) >= (FIRST_ARG_REGNUM) && (N) <= (LAST_ARG_REGNUM))
+
+/* Scalar Return */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+        (spu_function_value((VALTYPE),(FUNC)))
+
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, FIRST_RETURN_REGNUM)
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) >= (FIRST_RETURN_REGNUM) && (N) <= (LAST_RETURN_REGNUM))
+
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_ALIGN1	(SYMBOL_FLAG_MACH_DEP << 0)
+
+/* Aggregate Return */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+
+/* Function Entry */
+
+#define EXIT_IGNORE_STACK 0
+
+#define EPILOGUE_USES(REGNO) ((REGNO)==1 ? 1 : 0)
+
+
+/* Profiling */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  spu_function_profiler ((FILE), (LABELNO));
+
+#define NO_PROFILE_COUNTERS 1
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+
+/* Trampolines */
+
+#define TRAMPOLINE_SIZE (TARGET_LARGE_MEM ? 20 : 16)
+
+#define TRAMPOLINE_ALIGNMENT 128
+
+/* Addressing Modes */
+
+#define CONSTANT_ADDRESS_P(X)   spu_constant_address_p(X)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define LEGITIMIZE_RELOAD_ADDRESS(AD, MODE, OPNUM, TYPE, IND, WIN)	\
+do {									\
+  rtx new_rtx = spu_legitimize_reload_address (AD, MODE, OPNUM,		\
+					       (int)(TYPE));		\
+  if (new_rtx)								\
+    {									\
+      (AD) = new_rtx;							\
+      goto WIN;								\
+    }									\
+} while (0)
+
+
+/* Costs */
+
+#define BRANCH_COST(speed_p, predictable_p) spu_branch_cost
+
+#define SLOW_BYTE_ACCESS 0
+
+#define MOVE_RATIO(speed) ((speed)? 32 : 4)
+
+#define NO_FUNCTION_CSE
+
+
+/* Sections */
+
+#define TEXT_SECTION_ASM_OP ".text"
+
+#define DATA_SECTION_ASM_OP ".data"
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* PIC */
+#define PIC_OFFSET_TABLE_REGNUM 126
+
+
+/* File Framework */
+
+#define ASM_APP_ON ""
+
+#define ASM_APP_OFF ""
+
+
+/* Uninitialized Data */
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%d\n", (ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%d\n", (ROUNDED)))
+
+
+/* Label Output */
+#define ASM_OUTPUT_LABEL(FILE,NAME)	\
+  do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME))
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \
+  do							\
+    {							\
+      tree decl;					\
+      assemble_name (FILE, XSTR ((X), 0));		\
+      if ((decl = SYMBOL_REF_DECL ((X))) != 0		\
+	  && TREE_CODE (decl) == VAR_DECL		\
+	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)))	\
+	fputs ("@ppu", FILE);				\
+    } while (0)
+
+
+/* Instruction Output */
+#define REGISTER_NAMES \
+{"$lr", "$sp", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", \
+ "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31", \
+ "$32", "$33", "$34", "$35", "$36", "$37", "$38", "$39", "$40", "$41", "$42", "$43", "$44", "$45", "$46", "$47", \
+ "$48", "$49", "$50", "$51", "$52", "$53", "$54", "$55", "$56", "$57", "$58", "$59", "$60", "$61", "$62", "$63", \
+ "$64", "$65", "$66", "$67", "$68", "$69", "$70", "$71", "$72", "$73", "$74", "$75", "$76", "$77", "$78", "$79", \
+ "$80", "$81", "$82", "$83", "$84", "$85", "$86", "$87", "$88", "$89", "$90", "$91", "$92", "$93", "$94", "$95", \
+ "$96", "$97", "$98", "$99", "$100", "$101", "$102", "$103", "$104", "$105", "$106", "$107", "$108", "$109", "$110", "$111", \
+ "$112", "$113", "$114", "$115", "$116", "$117", "$118", "$119", "$120", "$121", "$122", "$123", "$124", "$125", "$126", "$127", \
+ "$vfp", "$vap", "hbr" \
+}
+
+#define PRINT_OPERAND(FILE, X, CODE)  print_operand(FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+ print_operand_address (FILE, ADDR)
+
+#define LOCAL_LABEL_PREFIX "."
+
+#define USER_LABEL_PREFIX ""
+
+#define ASM_COMMENT_START "#"
+
+
+/* Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
+  fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  fprintf (FILE, "\t.word .L%d\n", VALUE)
+
+
+/* Alignment Output */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)  \
+  do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
+
+
+/* Misc */
+
+#define CASE_VECTOR_MODE SImode
+
+#define MOVE_MAX 16 
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) ((INPREC) <= 32 && (OUTPREC) <= (INPREC))
+
+#define STORE_FLAG_VALUE -1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE QImode
+
+#define NO_IMPLICIT_EXTERN_C 1
+
+
+/* Address spaces.  */
+#define ADDR_SPACE_EA	1
+
+
+/* Builtins.  */
+
+enum spu_builtin_type
+{
+  B_INSN,
+  B_JUMP,
+  B_BISLED,
+  B_CALL,
+  B_HINT,
+  B_OVERLOAD,
+  B_INTERNAL
+};
+
+struct spu_builtin_description
+{
+  int fcode;
+  int icode;
+  const char *name;
+  enum spu_builtin_type type;
+
+  /* The first element of parm is always the return type.  The rest
+     are a zero terminated list of parameters.  */
+  int parm[5];
+};
+
+extern struct spu_builtin_description spu_builtins[];
+
diff --git a/gcc-4.9/gcc/config/spu/spu.md b/gcc-4.9/gcc/config/spu/spu.md
new file mode 100644
index 000000000..228b22859
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu.md
@@ -0,0 +1,5093 @@
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations.
+;; multi0 is a multiple insn rtl whose first insn is in pipe0
+;; multi1 is a multiple insn rtl whose first insn is in pipe1
+(define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert"
+  (const_string "fx2"))
+
+;; Length (in bytes).
+(define_attr "length" ""
+		(const_int 4))
+
+(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in spu.h.
+
+(define_attr "cpu" "spu"
+  (const (symbol_ref "spu_cpu_attr")))
+
+; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
+;			TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST])
+
+(define_cpu_unit "pipe0,pipe1,fp,ls")
+
+(define_insn_reservation "NOP" 1 (eq_attr "type" "nop")
+    "pipe0")
+
+(define_insn_reservation "FX2" 2 (eq_attr "type" "fx2")
+    "pipe0, nothing")
+
+(define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb")
+    "pipe0, nothing*3")
+
+(define_insn_reservation "FP6" 6 (eq_attr "type" "fp6")
+    "pipe0 + fp, nothing*5")
+
+(define_insn_reservation "FP7" 7 (eq_attr "type" "fp7")
+    "pipe0, fp, nothing*5")
+
+;; The behavior of the double precision is that both pipes stall
+;; for 6 cycles and the rest of the operation pipelines for
+;; 7 cycles.  The simplest way to model this is to simply ignore
+;; the 6 cyle stall.
+(define_insn_reservation "FPD" 7 
+  (and (eq_attr "tune" "cell")
+       (eq_attr "type" "fpd"))
+    "pipe0 + pipe1, fp, nothing*5")
+
+;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
+(define_insn_reservation "FPD_CELLEDP" 9
+  (and (eq_attr "tune" "celledp")
+       (eq_attr "type" "fpd"))
+  "pipe0 + fp, nothing*8")
+
+(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
+    "pipe1")
+
+(define_insn_reservation "STORE" 1 (eq_attr "type" "store")
+    "pipe1 + ls")
+
+(define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch")
+    "pipe1 + ls")
+
+(define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr")
+    "pipe1, nothing*3")
+
+(define_insn_reservation "LOAD" 6 (eq_attr "type" "load")
+    "pipe1 + ls, nothing*5")
+
+(define_insn_reservation "HBR" 18 (eq_attr "type" "hbr")
+    "pipe1, nothing*15")
+
+(define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0")
+    "pipe0+pipe1, nothing*3")
+
+(define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1")
+    "pipe1, nothing*3")
+
+(define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert")
+    "nothing")
+
+;; Force pipe0 to occur before pipe 1 in a cycle.
+(absence_set "pipe0" "pipe1")
+
+
+(define_c_enum "unspec" [
+  UNSPEC_IPREFETCH
+  UNSPEC_FREST
+  UNSPEC_FRSQEST
+  UNSPEC_FI
+  UNSPEC_EXTEND_CMP
+  UNSPEC_CG
+  UNSPEC_CGX
+  UNSPEC_ADDX
+  UNSPEC_BG
+  UNSPEC_BGX
+  UNSPEC_SFX
+  UNSPEC_FSM
+  UNSPEC_HBR
+  UNSPEC_NOP
+  UNSPEC_CONVERT
+  UNSPEC_SELB
+  UNSPEC_SHUFB
+  UNSPEC_CPAT
+  UNSPEC_CNTB
+  UNSPEC_SUMB
+  UNSPEC_FSMB
+  UNSPEC_FSMH
+  UNSPEC_GBB
+  UNSPEC_GBH
+  UNSPEC_GB
+  UNSPEC_AVGB
+  UNSPEC_ABSDB
+  UNSPEC_ORX
+  UNSPEC_HEQ
+  UNSPEC_HGT
+  UNSPEC_HLGT
+  UNSPEC_STOP
+  UNSPEC_STOPD
+  UNSPEC_SET_INTR
+  UNSPEC_FSCRRD
+  UNSPEC_FSCRWR
+  UNSPEC_MFSPR
+  UNSPEC_MTSPR
+  UNSPEC_RDCH
+  UNSPEC_RCHCNT
+  UNSPEC_WRCH
+  UNSPEC_SPU_REALIGN_LOAD
+  UNSPEC_SPU_MASK_FOR_LOAD
+  UNSPEC_DFTSV
+  UNSPEC_FLOAT_EXTEND
+  UNSPEC_FLOAT_TRUNCATE
+  UNSPEC_SP_SET
+  UNSPEC_SP_TEST
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_BLOCKAGE
+  UNSPECV_LNOP
+  UNSPECV_NOP
+  UNSPECV_SYNC
+])
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Mode iterators
+
+(define_mode_iterator ALL [QI V16QI
+			HI V8HI
+			SI V4SI
+			DI V2DI
+			TI
+                        SF V4SF
+                        DF V2DF])
+
+; Everything except DI and TI which are handled separately because
+; they need different constraints to correctly test VOIDmode constants
+(define_mode_iterator MOV [QI V16QI
+			HI V8HI
+			SI V4SI
+			V2DI
+                        SF V4SF
+                        DF V2DF])
+
+(define_mode_iterator QHSI  [QI HI SI])
+(define_mode_iterator QHSDI  [QI HI SI DI])
+(define_mode_iterator DTI  [DI TI])
+
+(define_mode_iterator VINT [QI V16QI
+			 HI V8HI
+			 SI V4SI
+			 DI V2DI
+			 TI])
+
+(define_mode_iterator VQHSI [QI V16QI
+			  HI V8HI
+			  SI V4SI])
+
+(define_mode_iterator VHSI [HI V8HI
+			 SI V4SI])
+
+(define_mode_iterator VSDF [SF V4SF
+                         DF V2DF])
+
+(define_mode_iterator VSI [SI V4SI])
+(define_mode_iterator VDI [DI V2DI])
+(define_mode_iterator VSF [SF V4SF])
+(define_mode_iterator VDF [DF V2DF])
+
+(define_mode_iterator VCMP [V16QI
+			 V8HI
+			 V4SI
+                         V4SF
+                         V2DF])
+
+(define_mode_iterator VCMPU [V16QI
+			  V8HI
+			  V4SI])
+
+(define_mode_attr v	 [(V8HI  "v") (V4SI  "v")
+			  (HI    "") (SI    "")])
+
+(define_mode_attr bh  [(QI "b")  (V16QI "b")
+		       (HI "h")  (V8HI "h")
+		       (SI "")   (V4SI "")])
+
+(define_mode_attr d   [(SF "")   (V4SF "")
+                       (DF "d")  (V2DF "d")])
+(define_mode_attr d6  [(SF "6")  (V4SF "6")
+                       (DF "d")  (V2DF "d")])
+
+(define_mode_attr f2i [(SF "si") (V4SF "v4si")
+                       (DF "di") (V2DF "v2di")])
+(define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
+                       (DF "DI") (V2DF "V2DI")])
+(define_mode_attr i2f [(SI "sf") (V4SI "v4sf")
+                       (DI "df") (V2DI "v2df")])
+(define_mode_attr I2F [(SI "SF") (V4SI "V4SF")
+                       (DI "DF") (V2DI "V2DF")])
+
+(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
+
+(define_mode_attr umask  [(HI "f")  (V8HI "f")
+		          (SI "g")  (V4SI "g")])
+(define_mode_attr nmask  [(HI "F")  (V8HI "F")
+		          (SI "G")  (V4SI "G")])
+
+;; Used for carry and borrow instructions.
+(define_mode_iterator CBOP  [SI DI V4SI V2DI])
+
+;; Used in vec_set and vec_extract
+(define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF])
+(define_mode_attr inner  [(V16QI "QI")
+			  (V8HI  "HI")
+			  (V4SI  "SI")
+			  (V2DI  "DI")
+			  (V4SF  "SF")
+			  (V2DF  "DF")])
+(define_mode_attr vmult  [(V16QI "1")
+			  (V8HI  "2")
+			  (V4SI  "4")
+			  (V2DI  "8")
+			  (V4SF  "4")
+			  (V2DF  "8")])
+(define_mode_attr voff   [(V16QI "13")
+			  (V8HI  "14")
+			  (V4SI  "0")
+			  (V2DI  "0")
+			  (V4SF  "0")
+			  (V2DF  "0")])
+
+
+;; mov
+
+(define_expand "mov<mode>"
+  [(set (match_operand:ALL 0 "nonimmediate_operand" "")
+	(match_operand:ALL 1 "general_operand" ""))]
+  ""
+  {
+    if (spu_expand_mov(operands, <MODE>mode))
+      DONE;
+  })
+
+(define_split 
+  [(set (match_operand 0 "spu_reg_operand")
+	(match_operand 1 "immediate_operand"))]
+
+  ""
+  [(set (match_dup 0)
+	(high (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum (match_dup 0)
+	        (match_dup 1)))]
+  {
+    if (spu_split_immediate (operands))
+      DONE;
+    FAIL;
+  })
+
+(define_insn "pic"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(match_operand:SI 1 "immediate_operand" "s"))
+   (use (const_int 0))]
+  "flag_pic"
+  "ila\t%0,%%pic(%1)")
+
+;; Whenever a function generates the 'pic' pattern above we need to
+;; load the pic_offset_table register.
+;; GCC doesn't deal well with labels in the middle of a block so we
+;; hardcode the offsets in the asm here.
+(define_insn "load_pic_offset"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(unspec:SI [(const_int 0)] 0))
+   (set (match_operand:SI 1 "spu_reg_operand" "=r")
+	(unspec:SI [(const_int 0)] 0))]
+  "flag_pic"
+  "ila\t%1,.+8\;brsl\t%0,4"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi0")])
+
+
+;; move internal
+
+(define_insn "_mov<mode>"
+  [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m")
+	(match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
+  "register_operand(operands[0], <MODE>mode)
+   || register_operand(operands[1], <MODE>mode)"
+  "@
+   ori\t%0,%1,0
+   il%s1\t%0,%S1
+   fsmbi\t%0,%S1
+   c%s1d\t%0,%S1($sp)
+   lq%p1\t%0,%1
+   stq%p0\t%1,%0"
+  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
+
+(define_insn "low_<mode>"
+  [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
+	(lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0")
+		    (match_operand:VSI 2 "immediate_operand" "i")))]
+  ""
+  "iohl\t%0,%2@l")
+
+(define_insn "_movdi"
+  [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m")
+	(match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
+  "register_operand(operands[0], DImode)
+   || register_operand(operands[1], DImode)"
+  "@
+   ori\t%0,%1,0
+   il%d1\t%0,%D1
+   fsmbi\t%0,%D1
+   c%d1d\t%0,%D1($sp)
+   lq%p1\t%0,%1
+   stq%p0\t%1,%0"
+  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
+
+(define_insn "_movti"
+  [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m")
+	(match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
+  "register_operand(operands[0], TImode)
+   || register_operand(operands[1], TImode)"
+  "@
+   ori\t%0,%1,0
+   il%t1\t%0,%T1
+   fsmbi\t%0,%T1
+   c%t1d\t%0,%T1($sp)
+   lq%p1\t%0,%1
+   stq%p0\t%1,%0"
+  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
+
+(define_split
+  [(set (match_operand 0 "spu_reg_operand")
+	(match_operand 1 "memory_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
+   && GET_MODE(operands[0]) == GET_MODE(operands[1])
+   && !reload_in_progress && !reload_completed" 
+  [(set (match_dup 0)
+	(match_dup 1))]
+  { if (spu_split_load(operands))
+      DONE;
+  })
+
+(define_split
+  [(set (match_operand 0 "memory_operand")
+	(match_operand 1 "spu_reg_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
+   && GET_MODE(operands[0]) == GET_MODE(operands[1])
+   && !reload_in_progress && !reload_completed" 
+  [(set (match_dup 0)
+	(match_dup 1))]
+  { if (spu_split_store(operands))
+      DONE;
+  })
+;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
+
+(define_expand "cpat"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
+		    (match_operand:SI 2 "spu_nonmem_operand" "r,n")
+		    (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
+  ""
+  {
+    rtx x = gen_cpat_const (operands);
+    if (x)
+      {
+        emit_move_insn (operands[0], x);
+        DONE;
+      }
+  })
+
+(define_insn "_cpat"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
+		    (match_operand:SI 2 "spu_nonmem_operand" "r,n")
+		    (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
+  ""
+  "@
+   c%M3x\t%0,%1,%2
+   c%M3d\t%0,%C2(%1)"
+  [(set_attr "type" "shuf")])
+
+(define_split
+  [(set (match_operand:TI 0 "spu_reg_operand")
+	(unspec:TI [(match_operand:SI 1 "spu_nonmem_operand")
+		    (match_operand:SI 2 "immediate_operand")
+		    (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))]
+  ""
+  [(set (match_dup:TI 0)
+        (match_dup:TI 4))]
+  {
+    operands[4] = gen_cpat_const (operands);
+    if (!operands[4])
+      FAIL;
+  })
+
+;; extend
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "xsbh\t%0,%1")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))]
+  ""
+  "xshw\t%0,%1")
+
+(define_expand "extendsidi2"
+  [(set (match_dup:DI 2)
+	(zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "")))
+   (set (match_operand:DI 0 "spu_reg_operand" "")
+	(sign_extend:DI (vec_select:SI (match_dup:V2SI 3)
+				       (parallel [(const_int 1)]))))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (DImode);
+    operands[3] = spu_gen_subreg (V2SImode, operands[2]);
+  })
+
+(define_insn "xswd"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+	(sign_extend:DI
+	  (vec_select:SI
+	    (match_operand:V2SI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1) ]))))]
+  ""
+  "xswd\t%0,%1");
+
+;; By splitting this late we don't allow much opportunity for sharing of
+;; constants.  That's ok because this should really be optimized away.
+(define_insn_and_split "extend<mode>ti2"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(sign_extend:TI (match_operand:QHSDI 1 "register_operand" "")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:TI 0)
+	(sign_extend:TI (match_dup:QHSDI 1)))]
+  {
+    spu_expand_sign_extend(operands);
+    DONE;
+  })
+
+
+;; zero_extend
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "andi\t%0,%1,0x00ff")
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "andi\t%0,%1,0x00ff")
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  ""
+  {
+    rtx mask = gen_reg_rtx (SImode);
+    rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+    emit_move_insn (mask, GEN_INT (0xffff));
+    emit_insn (gen_andsi3(operands[0], op1, mask));
+    DONE;
+  })
+  
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+	(zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))]
+  ""
+  "rotqmbyi\t%0,%1,-4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "zero_extendqiti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "andi\t%0,%1,0x00ff\;rotqmbyi\t%0,%0,-12"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "8")])
+
+(define_insn "zero_extendhiti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:HI 1 "spu_reg_operand" "r")))]
+  ""
+  "shli\t%0,%1,16\;rotqmbyi\t%0,%0,-14"
+  [(set_attr "type" "multi1")
+   (set_attr "length" "8")])
+
+(define_insn "zero_extendsiti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
+  ""
+  "rotqmbyi\t%0,%1,-12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "zero_extendditi2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "rotqmbyi\t%0,%1,-8"
+  [(set_attr "type" "shuf")])
+
+
+;; trunc
+
+(define_insn "truncdiqi2"
+  [(set (match_operand:QI 0 "spu_reg_operand" "=r")
+	(truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "truncdihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "truncdisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctiqi2"
+  [(set (match_operand:QI 0 "spu_reg_operand" "=r")
+	(truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctidi2"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+	(truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,8"
+  [(set_attr "type" "shuf")])
+
+
+;; float conversions
+
+(define_insn "float<mode><i2f>2"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
+  ""
+  "csflt\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "fix_trunc<mode><f2i>2"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
+  ""
+  "cflts\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "floatuns<mode><i2f>2"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
+  ""
+  "cuflt\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "fixuns_trunc<mode><f2i>2"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(unsigned_fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
+  ""
+  "cfltu\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "float<mode><i2f>2_mul"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(mult:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		    (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
+  ""
+  "csflt\t%0,%1,%w2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "float<mode><i2f>2_div"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(div:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		   (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
+  ""
+  "csflt\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+
+(define_insn "fix_trunc<mode><f2i>2_mul"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+			     (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
+  ""
+  "cflts\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "floatuns<mode><i2f>2_mul"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(mult:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		    (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
+  ""
+  "cuflt\t%0,%1,%w2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "floatuns<mode><i2f>2_div"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(div:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		   (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
+  ""
+  "cuflt\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "fixuns_trunc<mode><f2i>2_mul"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(unsigned_fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+				      (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
+  ""
+  "cfltu\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "spu_reg_operand" "=r")
+	(unspec:DF [(match_operand:SF 1 "spu_reg_operand" "r")]
+                   UNSPEC_FLOAT_EXTEND))]
+  ""
+  "fesd\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "spu_reg_operand" "=r")
+	(unspec:SF [(match_operand:DF 1 "spu_reg_operand" "r")]
+                   UNSPEC_FLOAT_TRUNCATE))]
+  ""
+  "frds\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+(define_expand "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float:SF (match_operand:DI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (SImode);
+    rtx r0 = gen_reg_rtx (DImode);
+    rtx r1 = gen_reg_rtx (SFmode);
+    rtx r2 = gen_reg_rtx (SImode);
+    rtx setneg = gen_reg_rtx (SImode);
+    rtx isneg = gen_reg_rtx (SImode);
+    rtx neg = gen_reg_rtx (DImode);
+    rtx mask = gen_reg_rtx (DImode);
+
+    emit_move_insn (c0, GEN_INT (-0x80000000ll));
+
+    emit_insn (gen_negdi2 (neg, operands[1]));
+    emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
+    emit_insn (gen_extend_compare (mask, isneg));
+    emit_insn (gen_selb (r0, neg, operands[1], mask));
+    emit_insn (gen_andc_si (setneg, c0, isneg));
+
+    emit_insn (gen_floatunsdisf2 (r1, r0));
+
+    emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
+    emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
+    DONE;
+  })
+
+(define_insn_and_split "floatunsdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unsigned_float:SF (match_operand:DI 1 "register_operand" "r")))
+   (clobber (match_scratch:SF 2 "=r"))
+   (clobber (match_scratch:SF 3 "=r"))
+   (clobber (match_scratch:SF 4 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SF 0)
+        (unsigned_float:SF (match_dup:DI 1)))]
+  {
+    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
+    rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
+
+    REAL_VALUE_TYPE scale;
+    real_2expN (&scale, 32, SFmode);
+
+    emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
+    emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
+
+    emit_move_insn (operands[4],
+		    CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode));
+    emit_insn (gen_fmasf4 (operands[0],
+			   operands[2], operands[4], operands[3]));
+    DONE;
+  })
+
+(define_expand "floattisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float:SF (match_operand:TI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (SImode);
+    rtx r0 = gen_reg_rtx (TImode);
+    rtx r1 = gen_reg_rtx (SFmode);
+    rtx r2 = gen_reg_rtx (SImode);
+    rtx setneg = gen_reg_rtx (SImode);
+    rtx isneg = gen_reg_rtx (SImode);
+    rtx neg = gen_reg_rtx (TImode);
+    rtx mask = gen_reg_rtx (TImode);
+
+    emit_move_insn (c0, GEN_INT (-0x80000000ll));
+
+    emit_insn (gen_negti2 (neg, operands[1]));
+    emit_insn (gen_cgt_ti_m1 (isneg, operands[1]));
+    emit_insn (gen_extend_compare (mask, isneg));
+    emit_insn (gen_selb (r0, neg, operands[1], mask));
+    emit_insn (gen_andc_si (setneg, c0, isneg));
+
+    emit_insn (gen_floatunstisf2 (r1, r0));
+
+    emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
+    emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
+    DONE;
+  })
+
+(define_insn_and_split "floatunstisf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unsigned_float:SF (match_operand:TI 1 "register_operand" "r")))
+   (clobber (match_scratch:SF 2 "=r"))
+   (clobber (match_scratch:SF 3 "=r"))
+   (clobber (match_scratch:SF 4 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SF 0)
+        (unsigned_float:SF (match_dup:TI 1)))]
+  {
+    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
+    rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
+
+    REAL_VALUE_TYPE scale;
+    real_2expN (&scale, 32, SFmode);
+
+    emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
+    emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
+
+    emit_move_insn (operands[4],
+		    CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode));
+    emit_insn (gen_fmasf4 (operands[2],
+			   operands[2], operands[4], operands[3]));
+
+    emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
+    emit_insn (gen_fmasf4 (operands[2],
+			   operands[2], operands[4], operands[3]));
+
+    emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
+    emit_insn (gen_fmasf4 (operands[0],
+			   operands[2], operands[4], operands[3]));
+    DONE;
+  })
+
+;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000
+(define_expand "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float:DF (match_operand:SI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (SImode);
+    rtx c1 = gen_reg_rtx (DFmode);
+    rtx r0 = gen_reg_rtx (SImode);
+    rtx r1 = gen_reg_rtx (DFmode);
+
+    emit_move_insn (c0, GEN_INT (-0x80000000ll));
+    emit_move_insn (c1, spu_float_const ("2147483648", DFmode));
+    emit_insn (gen_xorsi3 (r0, operands[1], c0));
+    emit_insn (gen_floatunssidf2 (r1, r0));
+    emit_insn (gen_subdf3 (operands[0], r1, c1));
+    DONE;
+  })
+
+(define_expand "floatunssidf2"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:SI 1 "register_operand"   "r")))]
+  ""
+  "{
+    rtx value, insns;
+    rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, 
+                                             0x06071415, 0x16178080);
+    rtx r0 = gen_reg_rtx (V16QImode);
+
+    if (optimize_size)
+    {
+       start_sequence ();
+       value =
+         emit_library_call_value (convert_optab_libfunc (ufloat_optab,
+                                                         DFmode, SImode),
+                   NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], SImode);
+       insns = get_insns ();
+       end_sequence ();
+       emit_libcall_block (insns, operands[0], value,
+                           gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
+     }
+     else
+     {
+      emit_move_insn (r0, c0);
+      emit_insn (gen_floatunssidf2_internal (operands[0], operands[1], r0));
+     }
+    DONE;
+  }")
+
+(define_insn_and_split "floatunssidf2_internal"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:SI 1 "register_operand"   "r")))
+   (use (match_operand:V16QI 2 "register_operand" "r"))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))
+   (clobber (match_scratch:V4SI 6 "=&r"))]
+  ""
+  "clz\t%3,%1\;il\t%6,1023+31\;shl\t%4,%1,%3\;ceqi\t%5,%3,32\;sf\t%6,%3,%6\;a\t%4,%4,%4\;andc\t%6,%6,%5\;shufb\t%6,%6,%4,%2\;shlqbii\t%0,%6,4"
+  "reload_completed"
+  [(set (match_dup:DF 0)
+        (unsigned_float:DF (match_dup:SI 1)))]
+ "{
+    rtx *ops = operands;
+    rtx op1_v4si = gen_rtx_REG(V4SImode, REGNO(ops[1]));
+    rtx op0_ti = gen_rtx_REG (TImode, REGNO (ops[0]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO (ops[2]));
+    rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6]));
+    emit_insn (gen_clzv4si2 (ops[3],op1_v4si));
+    emit_move_insn (ops[6], spu_const (V4SImode, 1023+31));
+    emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3]));
+    emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32)));
+    emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3]));
+    emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4]));
+    emit_insn (gen_andc_v4si  (ops[6],ops[6],ops[5]));
+    emit_insn (gen_shufb (ops[6],ops[6],ops[4],op2_ti));
+    emit_insn (gen_shlqbi_ti (op0_ti,op6_ti,GEN_INT(4)));
+    DONE;
+  }"
+ [(set_attr "length" "32")])
+
+(define_expand "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float:DF (match_operand:DI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (DImode);
+    rtx r0 = gen_reg_rtx (DImode);
+    rtx r1 = gen_reg_rtx (DFmode);
+    rtx r2 = gen_reg_rtx (DImode);
+    rtx setneg = gen_reg_rtx (DImode);
+    rtx isneg = gen_reg_rtx (SImode);
+    rtx neg = gen_reg_rtx (DImode);
+    rtx mask = gen_reg_rtx (DImode);
+
+    emit_move_insn (c0, GEN_INT (0x8000000000000000ull));
+
+    emit_insn (gen_negdi2 (neg, operands[1]));
+    emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
+    emit_insn (gen_extend_compare (mask, isneg));
+    emit_insn (gen_selb (r0, neg, operands[1], mask));
+    emit_insn (gen_andc_di (setneg, c0, mask));
+
+    emit_insn (gen_floatunsdidf2 (r1, r0));
+
+    emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg));
+    emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0));
+    DONE;
+  })
+
+(define_expand "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))]
+  ""
+  "{
+    rtx value, insns;
+    rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, 
+                                             0x06071415, 0x16178080);
+    rtx c1 = spu_const_from_ints (V4SImode, 1023+63, 1023+31, 0, 0);
+    rtx r0 = gen_reg_rtx (V16QImode);
+    rtx r1 = gen_reg_rtx (V4SImode);
+
+    if (optimize_size)
+    {      
+      start_sequence ();
+      value =
+         emit_library_call_value (convert_optab_libfunc (ufloat_optab,
+                                                         DFmode, DImode),
+                   NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], DImode);
+      insns = get_insns ();
+      end_sequence ();
+      emit_libcall_block (insns, operands[0], value,
+                          gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
+    }
+    else
+    {
+      emit_move_insn (r1, c1);
+      emit_move_insn (r0, c0);
+      emit_insn (gen_floatunsdidf2_internal (operands[0], operands[1], r0, r1));
+    }
+    DONE;
+  }")
+
+(define_insn_and_split "floatunsdidf2_internal"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))
+   (use (match_operand:V16QI 2 "register_operand" "r"))
+   (use (match_operand:V4SI 3 "register_operand" "r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))
+   (clobber (match_scratch:V4SI 6 "=&r"))]
+  ""
+  "clz\t%4,%1\;shl\t%5,%1,%4\;ceqi\t%6,%4,32\;sf\t%4,%4,%3\;a\t%5,%5,%5\;andc\t%4,%4,%6\;shufb\t%4,%4,%5,%2\;shlqbii\t%4,%4,4\;shlqbyi\t%5,%4,8\;dfa\t%0,%4,%5"
+  "reload_completed"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))]
+  "{
+    rtx *ops = operands;
+    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO(ops[1]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO(ops[2]));
+    rtx op4_ti = gen_rtx_REG (TImode, REGNO(ops[4]));
+    rtx op5_ti = gen_rtx_REG (TImode, REGNO(ops[5]));
+    rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4]));
+    rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5]));
+    emit_insn (gen_clzv4si2 (ops[4],op1_v4si));
+    emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4]));
+    emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32)));
+    emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4]));
+    emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5]));
+    emit_insn (gen_andc_v4si (ops[4],ops[4],ops[6]));
+    emit_insn (gen_shufb (ops[4],ops[4],ops[5],op2_ti));
+    emit_insn (gen_shlqbi_ti (op4_ti,op4_ti,GEN_INT(4)));
+    emit_insn (gen_shlqby_ti (op5_ti,op4_ti,GEN_INT(8)));
+    emit_insn (gen_adddf3 (ops[0],op4_df,op5_df));
+    DONE;
+  }"
+  [(set_attr "length" "40")])
+
+
+;; add
+
+(define_expand "addv16qi3"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
+		    (match_operand:V16QI 2 "spu_reg_operand" "r")))]
+  ""
+  "{
+    rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
+    rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
+    rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
+    rtx rhs_and = gen_reg_rtx (V8HImode);
+    rtx hi_char = gen_reg_rtx (V8HImode);
+    rtx lo_char = gen_reg_rtx (V8HImode);
+    rtx mask = gen_reg_rtx (V8HImode);
+
+    emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
+    emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
+    emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and));
+    emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short));
+    emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
+    DONE;
+   }")
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		   (match_operand:VHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  a<bh>\t%0,%1,%2
+  a<bh>i\t%0,%1,%2")
+
+(define_expand "add<mode>3"
+  [(set (match_dup:VDI 3) 
+	(unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
+		     (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG))
+   (set (match_dup:VDI 5)
+	(unspec:VDI [(match_dup 3)
+		     (match_dup 3)
+		     (match_dup:TI 4)] UNSPEC_SHUFB))
+   (set (match_operand:VDI 0 "spu_reg_operand" "") 
+	(unspec:VDI [(match_dup 1)
+		     (match_dup 2)
+		     (match_dup 5)] UNSPEC_ADDX))]
+  ""
+  {
+    unsigned char pat[16] = {
+      0x04, 0x05, 0x06, 0x07,
+      0x80, 0x80, 0x80, 0x80,
+      0x0c, 0x0d, 0x0e, 0x0f,
+      0x80, 0x80, 0x80, 0x80
+    };
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    operands[4] = gen_reg_rtx (TImode);
+    operands[5] = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (operands[4], array_to_constant (TImode, pat));
+  })
+
+(define_insn "cg_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))]
+  "operands != NULL"
+  "cg\t%0,%1,%2")
+
+(define_insn "cgx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))]
+  "operands != NULL"
+  "cgx\t%0,%1,%2")
+
+(define_insn "addx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))]
+  "operands != NULL"
+  "addx\t%0,%1,%2")
+
+
+;; This is not the most efficient implementation of addti3.
+;; We include this here because 1) the compiler needs it to be
+;; defined as the word size is 128-bit and 2) sometimes gcc
+;; substitutes an add for a constant left-shift. 2) is unlikely
+;; because we also give addti3 a high cost. In case gcc does
+;; generate TImode add, here is the code to do it.
+;; operand 2 is a nonmemory because the compiler requires it.
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=&r")
+	(plus:TI (match_operand:TI 1 "spu_reg_operand" "r")
+		 (match_operand:TI 2 "spu_nonmem_operand" "r")))
+   (clobber (match_scratch:TI 3 "=&r"))]
+  ""
+  "cg\t%3,%1,%2\n\\
+   shlqbyi\t%3,%3,4\n\\
+   cgx\t%3,%1,%2\n\\
+   shlqbyi\t%3,%3,4\n\\
+   cgx\t%3,%1,%2\n\\
+   shlqbyi\t%0,%3,4\n\\
+   addx\t%0,%1,%2"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "28")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fa\t%0,%1,%2"
+  [(set_attr "type" "fp6")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		  (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  ""
+  "dfa\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+
+;; sub
+
+(define_expand "subv16qi3"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
+		     (match_operand:V16QI 2 "spu_reg_operand" "r")))]
+  ""
+  "{
+    rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
+    rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
+    rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
+    rtx rhs_and = gen_reg_rtx (V8HImode);
+    rtx hi_char = gen_reg_rtx (V8HImode);
+    rtx lo_char = gen_reg_rtx (V8HImode);
+    rtx mask = gen_reg_rtx (V8HImode);
+
+    emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
+    emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
+    emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and));
+    emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short));
+    emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
+    DONE;
+   }")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B")
+		    (match_operand:VHSI 2 "spu_reg_operand" "r,r")))]
+  ""
+  "@
+  sf<bh>\t%0,%2,%1
+  sf<bh>i\t%0,%2,%1")
+
+(define_expand "sub<mode>3"
+  [(set (match_dup:VDI 3) 
+	(unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
+		     (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG))
+   (set (match_dup:VDI 5)
+	(unspec:VDI [(match_dup 3)
+		     (match_dup 3)
+		     (match_dup:TI 4)] UNSPEC_SHUFB))
+   (set (match_operand:VDI 0 "spu_reg_operand" "") 
+	(unspec:VDI [(match_dup 1)
+		     (match_dup 2)
+		     (match_dup 5)] UNSPEC_SFX))]
+  ""
+  {
+    unsigned char pat[16] = {
+      0x04, 0x05, 0x06, 0x07,
+      0xc0, 0xc0, 0xc0, 0xc0,
+      0x0c, 0x0d, 0x0e, 0x0f,
+      0xc0, 0xc0, 0xc0, 0xc0
+    };
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    operands[4] = gen_reg_rtx (TImode);
+    operands[5] = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (operands[4], array_to_constant (TImode, pat));
+  })
+
+(define_insn "bg_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))]
+  "operands != NULL"
+  "bg\t%0,%2,%1")
+
+(define_insn "bgx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))]
+  "operands != NULL"
+  "bgx\t%0,%2,%1")
+
+(define_insn "sfx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))]
+  "operands != NULL"
+  "sfx\t%0,%2,%1")
+
+(define_insn "subti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(minus:TI (match_operand:TI 1 "spu_reg_operand" "r")
+		  (match_operand:TI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:TI 3 "=&r"))
+   (clobber (match_scratch:TI 4 "=&r"))
+   (clobber (match_scratch:TI 5 "=&r"))
+   (clobber (match_scratch:TI 6 "=&r"))]
+  ""
+  "il\t%6,1\n\\
+   bg\t%3,%2,%1\n\\
+   xor\t%3,%3,%6\n\\
+   sf\t%4,%2,%1\n\\
+   shlqbyi\t%5,%3,4\n\\
+   bg\t%3,%5,%4\n\\
+   xor\t%3,%3,%6\n\\
+   sf\t%4,%5,%4\n\\
+   shlqbyi\t%5,%3,4\n\\
+   bg\t%3,%5,%4\n\\
+   xor\t%3,%3,%6\n\\
+   sf\t%4,%5,%4\n\\
+   shlqbyi\t%5,%3,4\n\\
+   sf\t%0,%5,%4"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "56")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		   (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fs\t%0,%1,%2"
+  [(set_attr "type" "fp6")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		   (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  ""
+  "dfs\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+
+;; neg
+
+(define_expand "negv16qi2"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))]
+  ""
+  "{
+    rtx zero = gen_reg_rtx (V16QImode);
+    emit_move_insn (zero, CONST0_RTX (V16QImode));
+    emit_insn (gen_subv16qi3 (operands[0], zero, operands[1]));
+    DONE;
+   }")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
+	(neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))]
+  ""
+  "sf<bh>i\t%0,%1,0")
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "spu_reg_operand" "")
+	(neg:DI (match_operand:DI 1 "spu_reg_operand" "")))]
+  ""
+  {
+    rtx zero = gen_reg_rtx(DImode);
+    emit_move_insn(zero, GEN_INT(0));
+    emit_insn (gen_subdi3(operands[0], zero, operands[1]));
+    DONE;
+  })
+
+(define_expand "negti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+	(neg:TI (match_operand:TI 1 "spu_reg_operand" "")))]
+  ""
+  {
+    rtx zero = gen_reg_rtx(TImode);
+    emit_move_insn(zero, GEN_INT(0));
+    emit_insn (gen_subti3(operands[0], zero, operands[1]));
+    DONE;
+  })
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")
+	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	  (neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
+
+(define_insn_and_split "_neg<mode>2"
+  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
+	(neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
+   (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:<F2I> 3)
+	(xor:<F2I> (match_dup:<F2I> 4)
+		   (match_dup:<F2I> 2)))]
+  {
+    operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+    operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
+  })
+
+
+;; abs
+
+(define_expand "abs<mode>2"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")
+	  (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
+
+(define_expand "abs<mode>2"
+  [(parallel
+    [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	  (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
+
+(define_insn_and_split "_abs<mode>2"
+  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
+	(abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
+   (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:<F2I> 3)
+	(and:<F2I> (match_dup:<F2I> 4)
+		   (match_dup:<F2I> 2)))]
+  {
+    operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+    operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
+  })
+
+
+;; mul
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r,r")
+	(mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r")
+		 (match_operand:HI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  mpy\t%0,%1,%2
+  mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_expand "mulv8hi3"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "")
+	(mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "")
+		   (match_operand:V8HI 2 "spu_reg_operand" "")))]
+  ""
+  "{
+    rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
+    rtx low = gen_reg_rtx (V4SImode);
+    rtx high = gen_reg_rtx (V4SImode);
+    rtx shift = gen_reg_rtx (V4SImode);
+    rtx mask = gen_reg_rtx (V4SImode);
+
+    emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
+    emit_insn (gen_vec_widen_smult_even_v8hi (high, operands[1], operands[2]));
+    emit_insn (gen_vec_widen_smult_odd_v8hi (low, operands[1], operands[2]));
+    emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16)));
+    emit_insn (gen_selb (result, shift, low, mask));
+    DONE;
+   }")
+
+(define_expand "mul<mode>3"
+  [(parallel
+    [(set (match_operand:VSI 0 "spu_reg_operand" "")
+	  (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "")
+		    (match_operand:VSI 2 "spu_reg_operand" "")))
+     (clobber (match_dup:VSI 3))
+     (clobber (match_dup:VSI 4))
+     (clobber (match_dup:VSI 5))
+     (clobber (match_dup:VSI 6))])]
+  ""
+  {
+    operands[3] = gen_reg_rtx(<MODE>mode);
+    operands[4] = gen_reg_rtx(<MODE>mode);
+    operands[5] = gen_reg_rtx(<MODE>mode);
+    operands[6] = gen_reg_rtx(<MODE>mode);
+  })
+
+(define_insn_and_split "_mulsi3"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (match_operand:SI 1 "spu_reg_operand" "r")
+		 (match_operand:SI 2 "spu_arith_operand" "rK")))
+   (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:SI 0)
+	(mult:SI (match_dup:SI 1)
+		 (match_dup:SI 2)))]
+  {
+    HOST_WIDE_INT val = 0;
+    rtx a = operands[3];
+    rtx b = operands[4];
+    rtx c = operands[5];
+    rtx d = operands[6];
+    if (GET_CODE(operands[2]) == CONST_INT)
+      {
+	val = INTVAL(operands[2]);
+	emit_move_insn(d, operands[2]);
+	operands[2] = d;
+      }
+    if (val && (val & 0xffff) == 0)
+      {
+	emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1]));
+      }
+    else if (val > 0 && val < 0x10000)
+      {
+	rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d;
+	emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
+	emit_insn (gen_mpyu_si(c, operands[1], cst));
+	emit_insn (gen_addsi3(operands[0], a, c));
+      }
+    else
+      {
+	emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
+	emit_insn (gen_mpyh_si(b, operands[2], operands[1]));
+	emit_insn (gen_mpyu_si(c, operands[1], operands[2]));
+	emit_insn (gen_addsi3(d, a, b));
+	emit_insn (gen_addsi3(operands[0], d, c));
+      }
+    DONE;
+   })
+
+(define_insn_and_split "_mulv4si3"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r")
+		   (match_operand:V4SI 2 "spu_reg_operand" "r")))
+   (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:V4SI 0)
+	(mult:V4SI (match_dup:V4SI 1)
+		   (match_dup:V4SI 2)))]
+  {
+    rtx a = operands[3];
+    rtx b = operands[4];
+    rtx c = operands[5];
+    rtx d = operands[6];
+    rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0);
+    rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
+    emit_insn (gen_spu_mpyh(a, op1, op2));
+    emit_insn (gen_spu_mpyh(b, op2, op1));
+    emit_insn (gen_vec_widen_umult_odd_v8hi (c, op1, op2));
+    emit_insn (gen_addv4si3(d, a, b));
+    emit_insn (gen_addv4si3(operands[0], d, c));
+    DONE;
+   })
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
+  ""
+  "mpy\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mulhisi3_imm"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (match_operand:SI 2 "imm_K_operand" "K")))]
+  ""
+  "mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
+  ""
+  "mpyu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "umulhisi3_imm"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))]
+  ""
+  "mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyu_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r,r")
+	(mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r")
+			 (const_int 65535))
+		 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K")
+			 (const_int 65535))))]
+  ""
+  "@
+   mpyu\t%0,%1,%2
+   mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+;; This isn't always profitable to use.  Consider r = a * b + c * d.
+;; It's faster to do the multiplies in parallel then add them.  If we
+;; merge a multiply and add it prevents the multiplies from happening in
+;; parallel.
+(define_insn "mpya_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+			  (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
+		 (match_operand:SI 3 "spu_reg_operand" "r")))]
+  "0"
+  "mpya\t%0,%1,%2,%3"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyh_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r")
+			 (const_int -65536))
+	         (and:SI (match_operand:SI 2 "spu_reg_operand" "r")
+			 (const_int 65535))))]
+  ""
+  "mpyh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpys_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(ashiftrt:SI
+	    (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		     (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
+	    (const_int 16)))]
+  ""
+  "mpys\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyhh_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
+			      (const_int 16))
+		 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
+			      (const_int 16))))]
+  ""
+  "mpyhh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyhhu_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
+			      (const_int 16))
+		 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
+			      (const_int 16))))]
+  ""
+  "mpyhhu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyhha_si" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
+				       (const_int 16))
+			  (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
+				       (const_int 16)))
+		 (match_operand:SI 3 "spu_reg_operand" "0")))]
+  "0"
+  "mpyhha\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
+	(mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")
+		   (match_operand:VSDF 2 "spu_reg_operand" "r")))]
+  ""
+  "<d>fm\t%0,%1,%2"
+  [(set_attr "type" "fp<d6>")])
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(fma:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")
+		 (match_operand:VSF 3 "spu_reg_operand" "r")))]
+  ""
+  "fma\t%0,%1,%2,%3"
+  [(set_attr "type"	"fp6")])
+
+;; ??? The official description is (c - a*b), which is exactly (-a*b + c).
+;; Note that this doesn't match the dfnms description.  Incorrect?
+(define_insn "fnma<mode>4"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(fma:VSF
+	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+	  (match_operand:VSF 2 "spu_reg_operand" "r")
+	  (match_operand:VSF 3 "spu_reg_operand" "r")))]
+  ""
+  "fnms\t%0,%1,%2,%3"
+  [(set_attr "type" "fp6")])
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(fma:VSF
+	  (match_operand:VSF 1 "spu_reg_operand" "r")
+	  (match_operand:VSF 2 "spu_reg_operand" "r")
+	  (neg:VSF (match_operand:VSF 3 "spu_reg_operand" "r"))))]
+  ""
+  "fms\t%0,%1,%2,%3"
+  [(set_attr "type" "fp6")])
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		 (match_operand:VDF 2 "spu_reg_operand" "r")
+		 (match_operand:VDF 3 "spu_reg_operand" "0")))]
+  ""
+  "dfma\t%0,%1,%2"
+  [(set_attr "type"	"fpd")])
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(fma:VDF
+	  (match_operand:VDF 1 "spu_reg_operand" "r")
+	  (match_operand:VDF 2 "spu_reg_operand" "r")
+	  (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0"))))]
+  ""
+  "dfms\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "nfma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(neg:VDF
+	  (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		   (match_operand:VDF 2 "spu_reg_operand" "r")
+		   (match_operand:VDF 3 "spu_reg_operand" "0"))))]
+  ""
+  "dfnma\t%0,%1,%2"
+  [(set_attr "type"	"fpd")])
+
+(define_insn "nfms<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "spu_reg_operand" "r")
+	    (match_operand:VDF 2 "spu_reg_operand" "r")
+	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0")))))]
+  ""
+  "dfnms\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
+(define_expand "fnma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "spu_reg_operand" "")
+	    (match_operand:VDF 2 "spu_reg_operand" "")
+	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "")))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_expand "fnms<mode>4"
+  [(set (match_operand:VDF 0 "register_operand" "")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "register_operand" "")
+	    (match_operand:VDF 2 "register_operand" "")
+	    (match_operand:VDF 3 "register_operand" ""))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+;; mul highpart, used for divide by constant optimizations.
+
+(define_expand "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+	             (sign_extend:DI (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))]
+  ""
+  {
+    rtx t0 = gen_reg_rtx (SImode);
+    rtx t1 = gen_reg_rtx (SImode);
+    rtx t2 = gen_reg_rtx (SImode);
+    rtx t3 = gen_reg_rtx (SImode);
+    rtx t4 = gen_reg_rtx (SImode);
+    rtx t5 = gen_reg_rtx (SImode);
+    rtx t6 = gen_reg_rtx (SImode);
+    rtx t7 = gen_reg_rtx (SImode);
+    rtx t8 = gen_reg_rtx (SImode);
+    rtx t9 = gen_reg_rtx (SImode);
+    rtx t11 = gen_reg_rtx (SImode);
+    rtx t12 = gen_reg_rtx (SImode);
+    rtx t14 = gen_reg_rtx (SImode);
+    rtx t15 = gen_reg_rtx (HImode);
+    rtx t16 = gen_reg_rtx (HImode);
+    rtx t17 = gen_reg_rtx (HImode);
+    rtx t18 = gen_reg_rtx (HImode);
+    rtx t19 = gen_reg_rtx (SImode);
+    rtx t20 = gen_reg_rtx (SImode);
+    rtx t21 = gen_reg_rtx (SImode);
+    rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
+    rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
+    rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
+    rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2);
+
+    rtx insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16)));
+    emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16)));
+    emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi));
+    emit_insn (gen_mpyh_si (t3, operands[1], operands[2]));
+    emit_insn (gen_mpyh_si (t4, operands[2], operands[1]));
+    emit_insn (gen_mpyhh_si (t5, operands[1], operands[2]));
+    emit_insn (gen_mpys_si (t6, t0_hi, op2_hi));
+    emit_insn (gen_mpys_si (t7, t1_hi, op1_hi));
+
+    /* Gen carry bits (in t9 and t11). */
+    emit_insn (gen_addsi3 (t8, t2, t3));
+    emit_insn (gen_cg_si (t9, t2, t3));
+    emit_insn (gen_cg_si (t11, t8, t4));
+
+    /* Gen high 32 bits in operand[0].  Correct for mpys. */
+    emit_insn (gen_addx_si (t12, t5, t6, t9));
+    emit_insn (gen_addx_si (t14, t12, t7, t11));
+
+    /* mpys treats both operands as signed when we really want it to treat
+       the first operand as signed and the second operand as unsigned.
+       The code below corrects for that difference.  */
+    emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1)));
+    emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1)));
+    emit_insn (gen_andc_hi (t17, t1_hi, t15));
+    emit_insn (gen_andc_hi (t18, t0_hi, t16));
+    emit_insn (gen_extendhisi2 (t19, t17));
+    emit_insn (gen_extendhisi2 (t20, t18));
+    emit_insn (gen_addsi3 (t21, t19, t20));
+    emit_insn (gen_addsi3 (operands[0], t14, t21));
+    unshare_all_rtl_in_chain (insn);
+    DONE;
+  })
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+	             (zero_extend:DI (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))]
+  ""
+  
+  {
+    rtx t0 = gen_reg_rtx (SImode);
+    rtx t1 = gen_reg_rtx (SImode);
+    rtx t2 = gen_reg_rtx (SImode);
+    rtx t3 = gen_reg_rtx (SImode);
+    rtx t4 = gen_reg_rtx (SImode);
+    rtx t5 = gen_reg_rtx (SImode);
+    rtx t6 = gen_reg_rtx (SImode);
+    rtx t7 = gen_reg_rtx (SImode);
+    rtx t8 = gen_reg_rtx (SImode);
+    rtx t9 = gen_reg_rtx (SImode);
+    rtx t10 = gen_reg_rtx (SImode);
+    rtx t12 = gen_reg_rtx (SImode);
+    rtx t13 = gen_reg_rtx (SImode);
+    rtx t14 = gen_reg_rtx (SImode);
+    rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
+    rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
+    rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
+
+    rtx insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16)));
+    emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi));
+    emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi));
+    emit_insn (gen_mpyhhu_si (t3, operands[1], t0));
+    emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2]));
+    emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16)));
+    emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16)));
+    emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16)));
+    emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16)));
+
+    /* Gen carry bits (in t10 and t12). */
+    emit_insn (gen_addsi3 (t9, t1, t5));
+    emit_insn (gen_cg_si (t10, t1, t5));
+    emit_insn (gen_cg_si (t12, t9, t6));
+
+    /* Gen high 32 bits in operand[0]. */
+    emit_insn (gen_addx_si (t13, t4, t7, t10));
+    emit_insn (gen_addx_si (t14, t13, t8, t12));
+    emit_insn (gen_movsi (operands[0], t14));
+    unshare_all_rtl_in_chain (insn);
+
+    DONE;
+  })
+
+;; div
+
+;; Not necessarily the best implementation of divide but faster then
+;; the default that gcc provides because this is inlined and it uses
+;; clz.
+(define_insn "divmodsi4"
+      [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
+	    (div:SI (match_operand:SI 1 "spu_reg_operand" "r")
+		    (match_operand:SI 2 "spu_reg_operand" "r")))
+       (set (match_operand:SI 3 "spu_reg_operand" "=&r")
+	    (mod:SI (match_dup 1)
+		    (match_dup 2)))
+       (clobber (match_scratch:SI 4 "=&r"))
+       (clobber (match_scratch:SI 5 "=&r"))
+       (clobber (match_scratch:SI 6 "=&r"))
+       (clobber (match_scratch:SI 7 "=&r"))
+       (clobber (match_scratch:SI 8 "=&r"))
+       (clobber (match_scratch:SI 9 "=&r"))
+       (clobber (match_scratch:SI 10 "=&r"))
+       (clobber (match_scratch:SI 11 "=&r"))
+       (clobber (match_scratch:SI 12 "=&r"))
+       (clobber (reg:SI 130))]
+  ""
+  "heqi	%2,0\\n\\
+	hbrr	3f,1f\\n\\
+	sfi	%8,%1,0\\n\\
+	sfi	%9,%2,0\\n\\
+	cgti	%10,%1,-1\\n\\
+	cgti	%11,%2,-1\\n\\
+	selb	%8,%8,%1,%10\\n\\
+	selb	%9,%9,%2,%11\\n\\
+	clz	%4,%8\\n\\
+	clz	%7,%9\\n\\
+	il	%5,1\\n\\
+	fsmbi	%0,0\\n\\
+	sf	%7,%4,%7\\n\\
+	shlqbyi	%3,%8,0\\n\\
+	xor	%11,%10,%11\\n\\
+	shl	%5,%5,%7\\n\\
+	shl	%4,%9,%7\\n\\
+	lnop	\\n\\
+1:	or	%12,%0,%5\\n\\
+	rotqmbii	%5,%5,-1\\n\\
+	clgt	%6,%4,%3\\n\\
+	lnop	\\n\\
+	sf	%7,%4,%3\\n\\
+	rotqmbii	%4,%4,-1\\n\\
+	selb	%0,%12,%0,%6\\n\\
+	lnop	\\n\\
+	selb	%3,%7,%3,%6\\n\\
+3:	brnz	%5,1b\\n\\
+2:	sfi	%8,%3,0\\n\\
+	sfi	%9,%0,0\\n\\
+	selb	%3,%8,%3,%10\\n\\
+	selb	%0,%0,%9,%11"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "128")])
+
+(define_insn "udivmodsi4"
+      [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
+	    (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r")
+		     (match_operand:SI 2 "spu_reg_operand" "r")))
+       (set (match_operand:SI 3 "spu_reg_operand" "=&r")
+	    (umod:SI (match_dup 1)
+		     (match_dup 2)))
+       (clobber (match_scratch:SI 4 "=&r"))
+       (clobber (match_scratch:SI 5 "=&r"))
+       (clobber (match_scratch:SI 6 "=&r"))
+       (clobber (match_scratch:SI 7 "=&r"))
+       (clobber (match_scratch:SI 8 "=&r"))
+       (clobber (reg:SI 130))]
+  ""
+  "heqi	%2,0\\n\\
+	hbrr	3f,1f\\n\\
+	clz	%7,%2\\n\\
+	clz	%4,%1\\n\\
+	il	%5,1\\n\\
+	fsmbi	%0,0\\n\\
+	sf	%7,%4,%7\\n\\
+	ori	%3,%1,0\\n\\
+	shl	%5,%5,%7\\n\\
+	shl	%4,%2,%7\\n\\
+1:	or	%8,%0,%5\\n\\
+	rotqmbii	%5,%5,-1\\n\\
+	clgt	%6,%4,%3\\n\\
+	lnop	\\n\\
+	sf	%7,%4,%3\\n\\
+	rotqmbii	%4,%4,-1\\n\\
+	selb	%0,%8,%0,%6\\n\\
+	lnop	\\n\\
+	selb	%3,%7,%3,%6\\n\\
+3:	brnz	%5,1b\\n\\
+2:"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "80")])
+
+(define_expand "div<mode>3"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")	
+	  (div:VSF (match_operand:VSF 1 "spu_reg_operand" "")
+		   (match_operand:VSF 2 "spu_reg_operand" "")))
+     (clobber (match_scratch:VSF 3 ""))
+     (clobber (match_scratch:VSF 4 ""))
+     (clobber (match_scratch:VSF 5 ""))])]
+  ""
+  "")
+
+(define_insn_and_split "*div<mode>3_fast"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:VSF 3 "=&r"))
+   (clobber (match_scratch:VSF 4 "=&r"))
+   (clobber (scratch:VSF))]
+  "flag_unsafe_math_optimizations"
+  "#"
+  "reload_completed"
+  [(set (match_dup:VSF 0)
+	(div:VSF (match_dup:VSF 1)
+		 (match_dup:VSF 2)))
+   (clobber (match_dup:VSF 3))
+   (clobber (match_dup:VSF 4))
+   (clobber (scratch:VSF))]
+  {
+    emit_insn (gen_frest_<mode>(operands[3], operands[2]));
+    emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
+    emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3]));
+    emit_insn (gen_fnma<mode>4(operands[0], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma<mode>4(operands[0], operands[0], operands[3], operands[4]));
+    DONE;
+  })
+
+(define_insn_and_split "*div<mode>3_adjusted"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:VSF 3 "=&r"))
+   (clobber (match_scratch:VSF 4 "=&r"))
+   (clobber (match_scratch:VSF 5 "=&r"))]
+  "!flag_unsafe_math_optimizations"
+  "#"
+  "reload_completed"
+  [(set (match_dup:VSF 0)
+	(div:VSF (match_dup:VSF 1)
+		 (match_dup:VSF 2)))
+   (clobber (match_dup:VSF 3))
+   (clobber (match_dup:VSF 4))
+   (clobber (match_dup:VSF 5))]
+  {
+    emit_insn (gen_frest_<mode> (operands[3], operands[2]));
+    emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3]));
+    emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3]));
+    emit_insn (gen_fnma<mode>4 (operands[5], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma<mode>4 (operands[3], operands[5], operands[3], operands[4]));
+
+   /* Due to truncation error, the quotient result may be low by 1 ulp.
+      Conditionally add one if the estimate is too small in magnitude.  */
+
+    emit_move_insn (gen_lowpart (<F2I>mode, operands[4]),
+		    spu_const (<F2I>mode, 0x80000000ULL));
+    emit_move_insn (gen_lowpart (<F2I>mode, operands[5]),
+		    spu_const (<F2I>mode, 0x3f800000ULL));
+    emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4]));
+
+    emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]),
+			      gen_lowpart (<F2I>mode, operands[3]),
+			      spu_const (<F2I>mode, 1)));
+    emit_insn (gen_fnma<mode>4 (operands[0], operands[2], operands[4], operands[1]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5]));
+    emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]),
+			      gen_lowpart (<F2I>mode, operands[0]),
+			      spu_const (<F2I>mode, -1)));
+    emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0]));
+    DONE;
+  })
+
+
+;; sqrt
+
+(define_insn_and_split "sqrtsf2"
+  [(set (match_operand:SF 0 "spu_reg_operand" "=r")
+	(sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r")))
+   (clobber (match_scratch:SF 2 "=&r"))
+   (clobber (match_scratch:SF 3 "=&r"))
+   (clobber (match_scratch:SF 4 "=&r"))
+   (clobber (match_scratch:SF 5 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SF 0)
+	(sqrt:SF (match_dup:SF 1)))
+   (clobber (match_dup:SF 2))
+   (clobber (match_dup:SF 3))
+   (clobber (match_dup:SF 4))
+   (clobber (match_dup:SF 5))]
+  {
+    emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode));
+    emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode));
+    emit_insn (gen_frsqest_sf(operands[2],operands[1]));
+    emit_insn (gen_fi_sf(operands[2],operands[1],operands[2]));
+    emit_insn (gen_mulsf3(operands[5],operands[2],operands[1]));
+    emit_insn (gen_mulsf3(operands[3],operands[5],operands[3]));
+    emit_insn (gen_fnmasf4(operands[4],operands[2],operands[5],operands[4]));
+    emit_insn (gen_fmasf4(operands[0],operands[4],operands[3],operands[5]));
+    DONE;
+  })
+
+(define_insn "frest_<mode>"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))]
+  ""
+  "frest\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "frsqest_<mode>"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))]
+  ""
+  "frsqest\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "fi_<mode>"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")
+		    (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))]
+  ""
+  "fi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+
+;; and
+
+(define_insn "and<mode>3"
+  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
+	(and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
+		 (match_operand:MOV 2 "spu_logical_operand" "r,C")))]
+  ""
+  "@
+  and\t%0,%1,%2
+  and%j2i\t%0,%1,%J2")
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+	(and:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+		(match_operand:DI 2 "spu_logical_operand" "r,c")))]
+  ""
+  "@
+  and\t%0,%1,%2
+  and%k2i\t%0,%1,%K2")
+
+(define_insn "andti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(and:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		(match_operand:TI 2 "spu_logical_operand" "r,Y")))]
+  ""
+  "@
+  and\t%0,%1,%2
+  and%m2i\t%0,%1,%L2")
+
+(define_insn "andc_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
+	         (match_operand:ALL 1 "spu_reg_operand" "r")))]
+  ""
+  "andc\t%0,%1,%2")
+
+(define_insn "nand_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r")
+			  (match_operand:ALL 1 "spu_reg_operand" "r"))))]
+  ""
+  "nand\t%0,%1,%2")
+
+
+;; ior
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r")
+	(ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0")
+		 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))]
+  ""
+  "@
+  or\t%0,%1,%2
+  or%j2i\t%0,%1,%J2
+  iohl\t%0,%J2")
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r")
+	(ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0")
+		(match_operand:DI 2 "spu_ior_operand" "r,c,d")))]
+  ""
+  "@
+  or\t%0,%1,%2
+  or%k2i\t%0,%1,%K2
+  iohl\t%0,%K2")
+
+(define_insn "iorti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r")
+	(ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0")
+		(match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))]
+  ""
+  "@
+  or\t%0,%1,%2
+  or%m2i\t%0,%1,%L2
+  iohl\t%0,%L2")
+
+(define_insn "orc_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
+	         (match_operand:ALL 1 "spu_reg_operand" "r")))]
+  ""
+  "orc\t%0,%1,%2")
+
+(define_insn "nor_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
+			  (match_operand:ALL 2 "spu_reg_operand" "r"))))]
+  ""
+  "nor\t%0,%1,%2")
+
+;; xor
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
+	(xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
+		 (match_operand:MOV 2 "spu_logical_operand" "r,B")))]
+  ""
+  "@
+  xor\t%0,%1,%2
+  xor%j2i\t%0,%1,%J2")
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+	(xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+		(match_operand:DI 2 "spu_logical_operand" "r,c")))]
+  ""
+  "@
+  xor\t%0,%1,%2
+  xor%k2i\t%0,%1,%K2")
+
+(define_insn "xorti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		(match_operand:TI 2 "spu_logical_operand" "r,Y")))]
+  ""
+  "@
+  xor\t%0,%1,%2
+  xor%m2i\t%0,%1,%L2")
+
+(define_insn "eqv_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
+			  (match_operand:ALL 2 "spu_reg_operand" "r"))))]
+  ""
+  "eqv\t%0,%1,%2")
+
+;; one_cmpl
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))]
+  ""
+  "nor\t%0,%1,%1")
+
+
+;; selb
+
+(define_expand "selb"
+  [(set (match_operand 0 "spu_reg_operand" "")
+  	(unspec [(match_operand 1 "spu_reg_operand" "")
+		 (match_operand 2 "spu_reg_operand" "")
+		 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))]
+  ""
+  {
+    rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]);
+    PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
+    emit_insn (s);
+    DONE;
+  })
+
+;; This could be defined as a combination of logical operations, but at
+;; one time it caused a crash due to recursive expansion of rtl during CSE.
+(define_insn "_selb"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+  	(unspec [(match_operand 1 "spu_reg_operand" "r")
+		 (match_operand 2 "spu_reg_operand" "r")
+		 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))]
+  "GET_MODE(operands[0]) == GET_MODE(operands[1]) 
+   && GET_MODE(operands[1]) == GET_MODE(operands[2])"
+  "selb\t%0,%1,%2,%3")
+
+
+;; Misc. byte/bit operations
+;; clz/ctz/ffs/popcount/parity
+;; cntb/sumb
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
+	(clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))]
+  ""
+  "clz\t%0,%1")
+
+(define_expand "ctz<mode>2"
+  [(set (match_dup 2)
+	(neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
+   (set (match_dup 3) (and:VSI (match_dup 1)
+			       (match_dup 2)))
+   (set (match_dup 4) (clz:VSI (match_dup 3)))
+   (set (match_operand:VSI 0 "spu_reg_operand" "")
+	(minus:VSI (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = spu_const(<MODE>mode, 31);
+  })
+
+(define_expand "clrsb<mode>2"
+  [(set (match_dup 2)
+        (gt:VSI (match_operand:VSI 1 "spu_reg_operand" "") (match_dup 5)))
+   (set (match_dup 3) (not:VSI (xor:VSI (match_dup 1) (match_dup 2))))
+   (set (match_dup 4) (clz:VSI (match_dup 3)))
+   (set (match_operand:VSI 0 "spu_reg_operand")
+        (plus:VSI (match_dup 4) (match_dup 5)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = spu_const(<MODE>mode, -1);
+  })
+
+(define_expand "ffs<mode>2"
+  [(set (match_dup 2)
+	(neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
+   (set (match_dup 3) (and:VSI (match_dup 1)
+			       (match_dup 2)))
+   (set (match_dup 4) (clz:VSI (match_dup 3)))
+   (set (match_operand:VSI 0 "spu_reg_operand" "")
+	(minus:VSI (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = spu_const(<MODE>mode, 32);
+  })
+
+(define_expand "popcountsi2"
+  [(set (match_dup 2)
+	(unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")]
+		     UNSPEC_CNTB))
+   (set (match_dup 3)
+	(unspec:HI [(match_dup 2)] UNSPEC_SUMB))
+   (set (match_operand:SI 0 "spu_reg_operand" "")
+	(sign_extend:SI (match_dup 3)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (SImode);
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "")
+	(parity:SI (match_operand:SI 1 "spu_reg_operand" "")))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (SImode);
+    emit_insn (gen_popcountsi2(operands[2], operands[1]));
+    emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1)));
+    DONE;
+  })
+
+(define_insn "cntb_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")]
+                   UNSPEC_CNTB))]
+  ""
+  "cntb\t%0,%1"
+  [(set_attr "type" "fxb")])
+
+(define_insn "cntb_v16qi"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")]
+                      UNSPEC_CNTB))]
+  ""
+  "cntb\t%0,%1"
+  [(set_attr "type" "fxb")])
+
+(define_insn "sumb_si"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+        (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))]
+  ""
+  "sumb\t%0,%1,%1"
+  [(set_attr "type" "fxb")])
+
+
+;; ashl, vashl
+
+(define_insn "<v>ashl<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
+  ""
+  "@
+  shl<bh>\t%0,%1,%2
+  shl<bh>i\t%0,%1,%<umask>2"
+  [(set_attr "type" "fx3")])
+
+(define_insn_and_split "ashldi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+	           (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
+   (clobber (match_scratch:SI 3 "=&r,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:DI 0)
+	(ashift:DI (match_dup:DI 1)
+	           (match_dup:SI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+    rtx op2 = operands[2];
+    rtx op3 = operands[3];
+
+    if (GET_CODE (operands[2]) == REG)
+      {
+	emit_insn (gen_addsi3 (op3, op2, GEN_INT (64)));
+	emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
+	emit_insn (gen_shlqbybi_ti (op0, op0, op3));
+	emit_insn (gen_shlqbi_ti (op0, op0, op3));
+      }
+    else
+      {
+	HOST_WIDE_INT val = INTVAL (operands[2]);
+	emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
+	emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8)));
+	if (val % 8)
+	  emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8)));
+      }
+    DONE;
+  })
+
+(define_expand "ashlti3"
+  [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "")
+		   (ashift:TI (match_operand:TI 1 "spu_reg_operand" "")
+			      (match_operand:SI 2 "spu_nonmem_operand" "")))
+	      (clobber (match_dup:TI 3))])]
+  ""
+  "if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2]));
+      DONE;
+    }
+   operands[3] = gen_reg_rtx (TImode);")
+
+(define_insn_and_split "ashlti3_imm"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (match_operand:SI 2 "immediate_operand" "O,P")))]
+  ""
+  "@
+   shlqbyi\t%0,%1,%h2
+   shlqbii\t%0,%1,%e2"
+  "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
+  [(set (match_dup:TI 0)
+	(ashift:TI (match_dup:TI 1)
+		   (match_dup:SI 3)))
+   (set (match_dup:TI 0)
+	(ashift:TI (match_dup:TI 0)
+		   (match_dup:SI 4)))]
+  {
+    HOST_WIDE_INT val = INTVAL(operands[2]);
+    operands[3] = GEN_INT (val&7);
+    operands[4] = GEN_INT (val&-8);
+  }
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn_and_split "ashlti3_reg"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r")
+		   (match_operand:SI 2 "spu_reg_operand" "r")))
+   (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:TI 3)
+	(ashift:TI (match_dup:TI 1)
+		   (and:SI (match_dup:SI 2)
+			   (const_int 7))))
+   (set (match_dup:TI 0)
+	(ashift:TI (match_dup:TI 3)
+		   (and:SI (match_dup:SI 2)
+			   (const_int -8))))]
+  "")
+
+(define_insn "shlqbybi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int -8))))]
+  ""
+  "@
+   shlqbybi\t%0,%1,%2
+   shlqbyi\t%0,%1,%h2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "shlqbi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int 7))))]
+  ""
+  "@
+   shlqbi\t%0,%1,%2
+   shlqbii\t%0,%1,%e2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "shlqby_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			    (const_int 8))))]
+  ""
+  "@
+   shlqby\t%0,%1,%2
+   shlqbyi\t%0,%1,%f2"
+  [(set_attr "type" "shuf,shuf")])
+
+
+;; lshr, vlshr
+
+(define_insn_and_split "<v>lshr<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
+   (clobber (match_scratch:VHSI 3 "=&r,X"))]
+  ""
+  "@
+   #
+   rot<bh>mi\t%0,%1,-%<umask>2"
+  "reload_completed && GET_CODE (operands[2]) == REG"
+  [(set (match_dup:VHSI 3)
+	(neg:VHSI (match_dup:VHSI 2)))
+   (set (match_dup:VHSI 0)
+	(lshiftrt:VHSI (match_dup:VHSI 1)
+		       (neg:VHSI (match_dup:VHSI 3))))]
+  ""
+  [(set_attr "type" "*,fx3")])
+  
+(define_insn "<v>lshr<mode>3_imm"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
+	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
+		       (match_operand:VHSI 2 "immediate_operand" "W")))]
+  ""
+  "rot<bh>mi\t%0,%1,-%<umask>2"
+  [(set_attr "type" "fx3")])
+
+(define_insn "rotm_<mode>"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
+  ""
+  "@
+   rot<bh>m\t%0,%1,%2
+   rot<bh>mi\t%0,%1,-%<nmask>2"
+  [(set_attr "type" "fx3")])
+ 
+(define_insn_and_split "lshr<mode>3"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r,r")
+		      (match_operand:SI 2 "spu_nonmem_operand" "r,O,P")))]
+  ""
+  "@
+   #
+   rotqmbyi\t%0,%1,-%h2
+   rotqmbii\t%0,%1,-%e2"
+  "REG_P (operands[2]) || (!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2]))"
+  [(set (match_dup:DTI 3)
+	(lshiftrt:DTI (match_dup:DTI 1)
+		      (match_dup:SI 4)))
+   (set (match_dup:DTI 0)
+	(lshiftrt:DTI (match_dup:DTI 3)
+		      (match_dup:SI 5)))]
+  {
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT val = INTVAL(operands[2]);
+	operands[4] = GEN_INT (val & 7);
+	operands[5] = GEN_INT (val & -8);
+      }
+    else
+      {
+        rtx t0 = gen_reg_rtx (SImode);
+        rtx t1 = gen_reg_rtx (SImode);
+	emit_insn (gen_subsi3(t0, GEN_INT(0), operands[2]));
+	emit_insn (gen_subsi3(t1, GEN_INT(7), operands[2]));
+        operands[4] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, t0), GEN_INT (7));
+        operands[5] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, gen_rtx_AND (SImode, t1, GEN_INT (-8))), GEN_INT (-8));
+      }
+  }
+  [(set_attr "type" "*,shuf,shuf")])
+
+(define_expand "shrqbybi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+					      (const_int -8)))
+			      (const_int -8))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (7 - INTVAL (operands[2]));
+    else
+      {
+        rtx t0 = gen_reg_rtx (SImode);
+	emit_insn (gen_subsi3 (t0, GEN_INT (7), operands[2]));
+        operands[2] = t0;
+      }
+  })
+
+(define_insn "rotqmbybi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+					      (const_int -8)))
+			      (const_int -8))))]
+  ""
+  "@
+   rotqmbybi\t%0,%1,%2
+   rotqmbyi\t%0,%1,-%H2"
+  [(set_attr "type" "shuf")])
+
+(define_insn_and_split "shrqbi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			      (const_int 7))))
+   (clobber (match_scratch:SI 3 "=&r,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:DTI 0)
+	(lshiftrt:DTI (match_dup:DTI 1)
+		      (and:SI (neg:SI (match_dup:SI 3)) (const_int 7))))]
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[3] = GEN_INT (-INTVAL (operands[2]));
+    else
+      emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2]));
+  }
+  [(set_attr "type" "shuf")])
+
+(define_insn "rotqmbi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
+			      (const_int 7))))]
+  ""
+  "@
+   rotqmbi\t%0,%1,%2
+   rotqmbii\t%0,%1,-%E2"
+  [(set_attr "type" "shuf")])
+
+(define_expand "shrqby_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
+			       (const_int 8))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+    else
+      {
+        rtx t0 = gen_reg_rtx (SImode);
+	emit_insn (gen_subsi3 (t0, GEN_INT (0), operands[2]));
+        operands[2] = t0;
+      }
+  })
+
+(define_insn "rotqmby_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
+			       (const_int 8))))]
+  ""
+  "@
+   rotqmby\t%0,%1,%2
+   rotqmbyi\t%0,%1,-%F2"
+  [(set_attr "type" "shuf")])
+
+
+;; ashr, vashr
+
+(define_insn_and_split "<v>ashr<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
+   (clobber (match_scratch:VHSI 3 "=&r,X"))]
+  ""
+  "@
+   #
+   rotma<bh>i\t%0,%1,-%<umask>2"
+  "reload_completed && GET_CODE (operands[2]) == REG"
+  [(set (match_dup:VHSI 3)
+	(neg:VHSI (match_dup:VHSI 2)))
+   (set (match_dup:VHSI 0)
+	(ashiftrt:VHSI (match_dup:VHSI 1)
+		       (neg:VHSI (match_dup:VHSI 3))))]
+  ""
+  [(set_attr "type" "*,fx3")])
+  
+(define_insn "<v>ashr<mode>3_imm"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
+	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
+		       (match_operand:VHSI 2 "immediate_operand" "W")))]
+  ""
+  "rotma<bh>i\t%0,%1,-%<umask>2"
+  [(set_attr "type" "fx3")])
+  
+
+(define_insn "rotma_<mode>"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
+  ""
+  "@
+   rotma<bh>\t%0,%1,%2
+   rotma<bh>i\t%0,%1,-%<nmask>2"
+  [(set_attr "type" "fx3")])
+ 
+(define_insn_and_split "ashrdi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+        (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+                     (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
+   (clobber (match_scratch:TI 3 "=&r,&r"))
+   (clobber (match_scratch:TI 4 "=&r,&r"))
+   (clobber (match_scratch:SI 5 "=&r,&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:DI 0)
+        (ashiftrt:DI (match_dup:DI 1)
+                     (match_dup:SI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
+    rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0));
+    rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+    rtx op1s = gen_rtx_REG (SImode, REGNO (op1));
+    rtx op2 = operands[2];
+    rtx op3 = operands[3];
+    rtx op4 = operands[4];
+    rtx op5 = operands[5];
+
+    if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63)
+      {
+	rtx op0s = gen_rtx_REG (SImode, REGNO (op0));
+	emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32)));
+	emit_insn (gen_spu_fsm (op0v, op0s));
+      }
+    else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32)
+      {
+	rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0));
+	HOST_WIDE_INT val = INTVAL (op2);
+	emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
+	emit_insn (gen_spu_xswd (op0d, op0v));
+        if (val > 32)
+	  emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
+      }
+    else
+      {
+	rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3));
+	unsigned char arr[16] = {
+	  0xff, 0xff, 0xff, 0xff,
+	  0xff, 0xff, 0xff, 0xff,
+	  0x00, 0x00, 0x00, 0x00,
+	  0x00, 0x00, 0x00, 0x00
+	};
+
+	emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31)));
+	emit_move_insn (op4, array_to_constant (TImode, arr));
+	emit_insn (gen_spu_fsm (op3v, op5));
+
+	if (GET_CODE (operands[2]) == REG)
+	  {
+	    emit_insn (gen_selb (op4, op3, op1, op4));
+	    emit_insn (gen_negsi2 (op5, op2));
+	    emit_insn (gen_rotqbybi_ti (op0, op4, op5));
+	    emit_insn (gen_rotqbi_ti (op0, op0, op5));
+	  }
+	else
+	  {
+	    HOST_WIDE_INT val = -INTVAL (op2);
+	    emit_insn (gen_selb (op0, op3, op1, op4));
+	    if ((val - 7) / 8)
+	      emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8)));
+	    if (val % 8)
+	      emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8)));
+	  }
+      }
+    DONE;
+  })
+
+
+(define_insn_and_split "ashrti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		     (match_operand:SI 2 "spu_nonmem_operand" "r,i")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:TI 0)
+	(ashiftrt:TI (match_dup:TI 1)
+		     (match_dup:SI 2)))]
+  {
+    rtx sign_shift = gen_reg_rtx (SImode);
+    rtx sign_mask = gen_reg_rtx (TImode);
+    rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0);
+    rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
+    rtx t = gen_reg_rtx (TImode);
+    emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
+    emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
+    emit_insn (gen_fsm_ti (sign_mask, sign_mask));
+    emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
+    emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
+    emit_insn (gen_iorti3 (operands[0], t, sign_mask));
+    DONE;
+  })
+
+;; fsm is used after rotam to replicate the sign across the whole register.
+(define_insn "fsm_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
+  ""
+  "fsm\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+
+;; vrotl, rotl
+
+(define_insn "<v>rotl<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
+  ""
+  "@
+  rot<bh>\t%0,%1,%2
+  rot<bh>i\t%0,%1,%<umask>2"
+  [(set_attr "type" "fx3")])
+
+(define_insn "rotlti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r")
+		   (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))]
+  ""
+  "@
+  rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2
+  rotqbyi\t%0,%1,%h2
+  rotqbii\t%0,%1,%e2
+  rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2"
+  [(set_attr "length" "8,4,4,8")
+   (set_attr "type" "multi1,shuf,shuf,multi1")])
+
+(define_insn "rotqbybi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int -8))))]
+  ""
+  "@
+  rotqbybi\t%0,%1,%2
+  rotqbyi\t%0,%1,%h2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "rotqby_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			    (const_int 8))))]
+  ""
+  "@
+  rotqby\t%0,%1,%2
+  rotqbyi\t%0,%1,%f2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "rotqbi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int 7))))]
+  ""
+  "@
+  rotqbi\t%0,%1,%2
+  rotqbii\t%0,%1,%e2"
+  [(set_attr "type" "shuf,shuf")])
+
+
+;; struct extract/insert
+;; We handle mem's because GCC will generate invalid SUBREG's
+;; and inefficient code.
+
+(define_expand "extv"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(sign_extract:TI (match_operand 1 "nonimmediate_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+  {
+    spu_expand_extv (operands, 0);
+    DONE;
+  })
+
+(define_expand "extzv"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(zero_extract:TI (match_operand 1 "nonimmediate_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+  {
+    spu_expand_extv (operands, 1);
+    DONE;
+  })
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+		      (match_operand:SI 1 "const_int_operand" "")
+		      (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand 3 "nonmemory_operand" ""))]
+  ""
+  { spu_expand_insv(operands); DONE; })
+
+;; Simplify a number of patterns that get generated by extv, extzv,
+;; insv, and loads.
+(define_insn_and_split "trunc_shr_ti<mode>"
+  [(set (match_operand:QHSI 0 "spu_reg_operand" "=r")
+        (truncate:QHSI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
+								(const_int 96)])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn_and_split "trunc_shr_tidi"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+        (truncate:DI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
+							      (const_int 64)])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn_and_split "shl_ext_<mode>ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+        (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:QHSI 1 "spu_reg_operand" "0")])
+		   (const_int 96)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn_and_split "shl_ext_diti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+        (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:DI 1 "spu_reg_operand" "0")])
+		   (const_int 64)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn "sext_trunc_lshr_tiqisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (sign_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 120)]))))]
+  ""
+  "rotmai\t%0,%1,-24"
+  [(set_attr "type" "fx3")])
+
+(define_insn "zext_trunc_lshr_tiqisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (zero_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 120)]))))]
+  ""
+  "rotmi\t%0,%1,-24"
+  [(set_attr "type" "fx3")])
+
+(define_insn "sext_trunc_lshr_tihisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (sign_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 112)]))))]
+  ""
+  "rotmai\t%0,%1,-16"
+  [(set_attr "type" "fx3")])
+
+(define_insn "zext_trunc_lshr_tihisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (zero_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 112)]))))]
+  ""
+  "rotmi\t%0,%1,-16"
+  [(set_attr "type" "fx3")])
+
+
+;; String/block move insn.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movstrsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+  {
+    if (spu_expand_block_move (operands))
+      DONE;
+    else
+      FAIL;
+  }")
+
+
+;; jump
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))]
+  ""
+  "bi\t%0"
+  [(set_attr "type" "br")])
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "br\t%0"
+  [(set_attr "type" "br")])
+
+
+;; return
+
+;; This will be used for leaf functions, that don't save any regs and
+;; don't have locals on stack, maybe... that is for functions that
+;; don't change $sp and don't need to save $lr. 
+(define_expand "return"
+    [(return)]
+  "direct_return()"
+  "")
+
+;; used in spu_expand_epilogue to generate return from a function and
+;; explicitly set use of $lr.
+
+(define_insn "_return"
+  [(return)]
+  ""
+  "bi\t$lr"
+  [(set_attr "type" "br")])
+
+
+
+;; ceq
+
+(define_insn "ceq_<mode>"
+  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
+	(eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
+	         (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  ceq<bh>\t%0,%1,%2
+  ceq<bh>i\t%0,%1,%2")
+
+(define_insn_and_split "ceq_di"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (eq:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	       (match_operand:DI 2 "spu_reg_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SI 0)
+        (eq:SI (match_dup:DI 1)
+	       (match_dup:DI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+    emit_insn (gen_ceq_v4si (op0, op1, op2));
+    emit_insn (gen_spu_gb (op0, op0));
+    emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11)));
+    DONE;
+  })
+
+
+;; We provide the TI compares for completeness and because some parts of
+;; gcc/libgcc use them, even though user code might never see it.
+(define_insn "ceq_ti"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(eq:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (match_operand:TI 2 "spu_reg_operand" "r")))]
+  ""
+  "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "12")])
+
+(define_insn "ceq_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
+		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fceq\t%0,%1,%2")
+
+(define_insn "cmeq_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+	          (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
+  ""
+  "fcmeq\t%0,%1,%2")
+
+;; These implementations will ignore checking of NaN or INF if
+;; compiled with option -ffinite-math-only.
+(define_expand "ceq_df"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
+               (match_operand:DF 2 "const_zero_operand" "i")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+      {
+        rtx ra = gen_reg_rtx (V4SImode);
+        rtx rb = gen_reg_rtx (V4SImode);
+        rtx temp = gen_reg_rtx (TImode);
+        rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+        rtx temp2 = gen_reg_rtx (V4SImode);
+        rtx biteq = gen_reg_rtx (V4SImode);
+        rtx ahi_inf = gen_reg_rtx (V4SImode);
+        rtx a_nan = gen_reg_rtx (V4SImode);
+        rtx a_abs = gen_reg_rtx (V4SImode);
+        rtx b_abs = gen_reg_rtx (V4SImode);
+        rtx iszero = gen_reg_rtx (V4SImode);
+        rtx sign_mask = gen_reg_rtx (V4SImode);
+        rtx nan_mask = gen_reg_rtx (V4SImode);
+        rtx hihi_promote = gen_reg_rtx (TImode);
+        rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+                                                 0x7FFFFFFF, 0xFFFFFFFF);
+
+        emit_move_insn (sign_mask, pat);
+        pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+                                             0x7FF00000, 0x0);
+        emit_move_insn (nan_mask, pat);
+        pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+                                           0x08090A0B, 0x18191A1B);
+        emit_move_insn (hihi_promote, pat);
+
+        emit_insn (gen_spu_convert (ra, operands[1]));
+        emit_insn (gen_spu_convert (rb, operands[2]));
+        emit_insn (gen_ceq_v4si (biteq, ra, rb));
+        emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+				GEN_INT (4 * 8)));
+        emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+
+        emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+        emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+	if (!flag_finite_math_only)
+          {
+            emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+            emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+            emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+                                   GEN_INT (4 * 8)));
+            emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+            emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+	  }
+        emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+        emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+        emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+				GEN_INT (4 * 8)));
+        emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+        emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+	if (!flag_finite_math_only)
+          {
+            emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+	  }
+        emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+        DONE;
+      }
+})
+
+(define_insn "ceq_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+                   (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfceq\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "cmeq_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+                   (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfcmeq\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_expand "ceq_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+                 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx biteq = gen_reg_rtx (V4SImode);
+      rtx ahi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx iszero = gen_reg_rtx (V4SImode);
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hihi_promote = gen_reg_rtx (TImode);
+
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+					     0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, 
+					   0x08090A0B, 0x18191A1B);
+      emit_move_insn (hihi_promote, pat);
+
+      emit_insn (gen_ceq_v4si (biteq, ra, rb));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), 
+                              GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                              GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+      emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), 
+                              GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+      emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+      DONE;
+  }
+})
+
+(define_expand "cmeq_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+                 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx biteq = gen_reg_rtx (V4SImode);
+      rtx ahi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hihi_promote = gen_reg_rtx (TImode);
+
+      emit_move_insn (sign_mask, pat);
+
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+                                           0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, 
+                                         0x08090A0B, 0x18191A1B);
+      emit_move_insn (hihi_promote, pat);
+
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
+      emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+      DONE;
+  }
+})
+
+
+;; cgt
+
+(define_insn "cgt_<mode>"
+  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
+	(gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
+	          (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  cgt<bh>\t%0,%1,%2
+  cgt<bh>i\t%0,%1,%2")
+
+(define_insn "cgt_di_m1" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	       (const_int -1)))]
+  ""
+  "cgti\t%0,%1,-1")
+
+(define_insn_and_split "cgt_di" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	       (match_operand:DI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SI 0)
+        (gt:SI (match_dup:DI 1)
+	       (match_dup:DI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+    rtx op3 = operands[3];
+    rtx op4 = operands[4];
+    rtx op5 = operands[5];
+    rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3]));
+    emit_insn (gen_clgt_v4si (op3, op1, op2));
+    emit_insn (gen_ceq_v4si (op4, op1, op2));
+    emit_insn (gen_cgt_v4si (op5, op1, op2));
+    emit_insn (gen_spu_xswd (op3d, op3));
+    emit_insn (gen_selb (op0, op5, op3, op4));
+    DONE;
+  })
+
+(define_insn "cgt_ti_m1" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (const_int -1)))]
+  ""
+  "cgti\t%0,%1,-1")
+
+(define_insn "cgt_ti"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (match_operand:TI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))]
+  ""
+  "clgt\t%4,%1,%2\;\
+ceq\t%3,%1,%2\;\
+cgt\t%5,%1,%2\;\
+shlqbyi\t%0,%4,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%5,%0,%3"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "36")])
+
+(define_insn "cgt_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
+		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fcgt\t%0,%1,%2")
+
+(define_insn "cmgt_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+		  (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
+  ""
+  "fcmgt\t%0,%1,%2")
+
+(define_expand "cgt_df"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
+               (match_operand:DF 2 "const_zero_operand" "i")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = gen_reg_rtx (V4SImode);
+      rtx rb = gen_reg_rtx (V4SImode);
+      rtx zero = gen_reg_rtx (V4SImode);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx hi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx b_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx asel = gen_reg_rtx (V4SImode);
+      rtx bsel = gen_reg_rtx (V4SImode);
+      rtx abor = gen_reg_rtx (V4SImode);
+      rtx bbor = gen_reg_rtx (V4SImode);
+      rtx gt_hi = gen_reg_rtx (V4SImode);
+      rtx gt_lo = gen_reg_rtx (V4SImode);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hi_promote = gen_reg_rtx (TImode);
+      rtx borrow_shuffle = gen_reg_rtx (TImode);
+
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+                                             0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+                                         0x08090A0B, 0x08090A0B);
+      emit_move_insn (hi_promote, pat);
+      pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
+                                         0x0C0D0E0F, 0xC0C0C0C0);
+      emit_move_insn (borrow_shuffle, pat);
+
+      emit_insn (gen_spu_convert (ra, operands[1]));
+      emit_insn (gen_spu_convert (rb, operands[2]));
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+
+      if (!flag_finite_math_only)
+	{
+	  /* check if ra is NaN  */
+          emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+          emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+          emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+                                  GEN_INT (4 * 8)));
+          emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+          emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+          emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+
+	  /* check if rb is NaN  */
+          emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+          emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+          emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+                                  GEN_INT (4 * 8)));
+          emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+          emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+          emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+
+	  /* check if ra or rb is NaN  */
+          emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+	}
+      emit_move_insn (zero, CONST0_RTX (V4SImode));
+      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+      emit_insn (gen_bg_v4si (abor, zero, a_abs));
+      emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+      emit_insn (gen_selb (abor, a_abs, abor, asel));
+
+      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+      emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+      emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+      emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+
+      emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+      emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+      emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+                                GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+      if (!flag_finite_math_only)
+        {
+	  /* correct for NaNs  */
+          emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+	}
+      emit_insn (gen_spu_convert (operands[0], temp2));
+      DONE;
+    }
+})
+
+(define_insn "cgt_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+                   (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfcgt\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "cmgt_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+                   (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfcmgt\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_expand "cgt_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+                 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx zero = gen_reg_rtx (V4SImode);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx hi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx b_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx asel = gen_reg_rtx (V4SImode);
+      rtx bsel = gen_reg_rtx (V4SImode);
+      rtx abor = gen_reg_rtx (V4SImode);
+      rtx bbor = gen_reg_rtx (V4SImode);
+      rtx gt_hi = gen_reg_rtx (V4SImode);
+      rtx gt_lo = gen_reg_rtx (V4SImode);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hi_promote = gen_reg_rtx (TImode);
+      rtx borrow_shuffle = gen_reg_rtx (TImode);
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+                                           0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, 
+                                         0x08090A0B, 0x08090A0B);
+      emit_move_insn (hi_promote, pat);
+      pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, 
+                                         0x0C0D0E0F, 0xC0C0C0C0);
+      emit_move_insn (borrow_shuffle, pat);
+
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+      emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+      emit_move_insn (zero, CONST0_RTX (V4SImode));
+      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+      emit_insn (gen_bg_v4si (abor, zero, a_abs));
+      emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+      emit_insn (gen_selb (abor, a_abs, abor, asel));
+      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+      emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+      emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+      emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+      emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+      emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+      emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+
+      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+      DONE;
+    } 
+})
+
+(define_expand "cmgt_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+                 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx hi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx b_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx gt_hi = gen_reg_rtx (V4SImode);
+      rtx gt_lo = gen_reg_rtx (V4SImode);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hi_promote = gen_reg_rtx (TImode);
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+                                           0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, 
+                                         0x08090A0B, 0x08090A0B);
+      emit_move_insn (hi_promote, pat);
+
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+      emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+
+      emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
+      emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
+      emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+      DONE;
+    }
+})
+
+
+;; clgt
+
+(define_insn "clgt_<mode>"
+  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
+	(gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
+		   (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  clgt<bh>\t%0,%1,%2
+  clgt<bh>i\t%0,%1,%2")
+
+(define_insn_and_split "clgt_di" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gtu:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	        (match_operand:DI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SI 0)
+        (gtu:SI (match_dup:DI 1)
+	        (match_dup:DI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+    rtx op3 = operands[3];
+    rtx op4 = operands[4];
+    rtx op5 = operands[5];
+    rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5]));
+    emit_insn (gen_clgt_v4si (op3, op1, op2));
+    emit_insn (gen_ceq_v4si (op4, op1, op2));
+    emit_insn (gen_spu_xswd (op5d, op3));
+    emit_insn (gen_selb (op0, op3, op5, op4));
+    DONE;
+  })
+
+(define_insn "clgt_ti"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gtu:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (match_operand:TI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))]
+  ""
+  "ceq\t%3,%1,%2\;\
+clgt\t%4,%1,%2\;\
+shlqbyi\t%0,%4,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%4,%0,%3"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "32")])
+
+
+;; dftsv
+(define_insn "dftsv_celledp"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand"  "r")
+		      (match_operand:SI   2 "const_int_operand" "i")]
+		      UNSPEC_DFTSV))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dftsv\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_expand "dftsv"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
+		      (match_operand:SI   2 "const_int_operand" "i")]
+		      UNSPEC_DFTSV))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx result = gen_reg_rtx (V4SImode);
+      emit_move_insn (result, CONST0_RTX (V4SImode));
+
+      if (INTVAL (operands[2]))
+        {
+          rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+          rtx abs = gen_reg_rtx (V4SImode);
+          rtx sign = gen_reg_rtx (V4SImode);
+          rtx temp = gen_reg_rtx (TImode);
+          rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+          rtx temp2 = gen_reg_rtx (V4SImode);
+          rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                                   0x7FFFFFFF, 0xFFFFFFFF);
+          rtx sign_mask = gen_reg_rtx (V4SImode);
+          rtx hi_promote = gen_reg_rtx (TImode);
+          emit_move_insn (sign_mask, pat);
+          pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, 
+                                             0x08090A0B, 0x08090A0B);
+          emit_move_insn (hi_promote, pat);
+
+          emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
+          emit_insn (gen_shufb (sign, sign, sign, hi_promote));
+          emit_insn (gen_andv4si3 (abs, ra, sign_mask));
+
+          /* NaN  or +inf or -inf */
+          if (INTVAL (operands[2]) & 0x70)
+            {
+              rtx nan_mask = gen_reg_rtx (V4SImode);
+              rtx isinf = gen_reg_rtx (V4SImode);
+              pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+		   			           0x7FF00000, 0x0);
+              emit_move_insn (nan_mask, pat);
+              emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
+
+              /* NaN  */
+              if (INTVAL (operands[2]) & 0x40)
+                {
+                  rtx isnan = gen_reg_rtx (V4SImode);
+                  emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
+                  emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan), 
+                                                             GEN_INT (4 * 8)));
+                  emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
+                  emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
+                  emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
+                  emit_insn (gen_iorv4si3 (result, result, isnan));
+                }
+              /* +inf or -inf  */
+              if (INTVAL (operands[2]) & 0x30)
+                {
+                  emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf), 
+                                                             GEN_INT (4 * 8)));
+                  emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
+                  emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
+
+                  /* +inf  */
+                  if (INTVAL (operands[2]) & 0x20)
+                    {
+                      emit_insn (gen_andc_v4si (temp2, isinf, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                  /* -inf  */
+                  if (INTVAL (operands[2]) & 0x10)
+                    {
+                      emit_insn (gen_andv4si3 (temp2, isinf, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                }
+            }
+
+          /* 0 or denorm  */
+          if (INTVAL (operands[2]) & 0xF)
+            {
+              rtx iszero = gen_reg_rtx (V4SImode);
+              emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
+              emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), 
+                                                          GEN_INT (4 * 8)));
+              emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+
+              /* denorm  */
+              if (INTVAL (operands[2]) & 0x3)
+                {
+                  rtx isdenorm = gen_reg_rtx (V4SImode);
+                  rtx denorm_mask = gen_reg_rtx (V4SImode);
+                  emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
+                  emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
+                  emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
+                  emit_insn (gen_shufb (isdenorm, isdenorm, 
+                                        isdenorm, hi_promote));
+                  /* +denorm  */
+                  if (INTVAL (operands[2]) & 0x2)
+                    {
+                      emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                  /* -denorm  */
+                  if (INTVAL (operands[2]) & 0x1)
+                    {
+                      emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                }
+
+              /* 0  */
+              if (INTVAL (operands[2]) & 0xC)
+                {
+                  emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
+                  /* +0  */
+                  if (INTVAL (operands[2]) & 0x8)
+                    {
+                      emit_insn (gen_andc_v4si (temp2, iszero, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                  /* -0  */
+                  if (INTVAL (operands[2]) & 0x4)
+                    {
+                      emit_insn (gen_andv4si3 (temp2, iszero, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                }
+             }
+          }
+      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
+      DONE;
+    }
+})
+
+
+;; branches
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "br%b2%b1z\t%2,%0"
+  [(set_attr "type" "br")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (return)
+		      (pc)))]
+  "direct_return ()"
+  "bi%b1%b0z\t%1,$lr"
+  [(set_attr "type" "br")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "br%b2%b1z\t%2,%0"
+  [(set_attr "type" "br")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (return)))]
+  "direct_return ()"
+  "bi%b1%b0z\t%1,$lr"
+  [(set_attr "type" "br")])
+
+
+;; vector conditional compare patterns
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
+        (if_then_else:VCMP
+          (match_operator 3 "comparison_operator"
+            [(match_operand:VCMP 4 "spu_reg_operand" "r")
+             (match_operand:VCMP 5 "spu_reg_operand" "r")])
+          (match_operand:VCMP 1 "spu_reg_operand" "r")
+          (match_operand:VCMP 2 "spu_reg_operand" "r")))]
+  ""
+  {
+    if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+                                   operands[3], operands[4], operands[5]))
+    DONE;
+    else
+    FAIL;
+  })
+
+(define_expand "vcondu<mode><mode>"
+  [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
+        (if_then_else:VCMPU
+          (match_operator 3 "comparison_operator"
+            [(match_operand:VCMPU 4 "spu_reg_operand" "r")
+             (match_operand:VCMPU 5 "spu_reg_operand" "r")])
+          (match_operand:VCMPU 1 "spu_reg_operand" "r")
+          (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
+  ""
+  {
+    if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+                                   operands[3], operands[4], operands[5]))
+    DONE;
+    else
+    FAIL;
+  })
+	
+
+;; branch on condition
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:VQHSI 1 "spu_reg_operand" "")
+	  (match_operand:VQHSI 2 "spu_nonmem_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:DTI 1 "spu_reg_operand" "")
+	  (match_operand:DTI 2 "spu_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:VSF 1 "spu_reg_operand" "")
+	  (match_operand:VSF 2 "spu_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:DF 1 "spu_reg_operand" "")
+	  (match_operand:DF 2 "spu_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+
+;; set on condition
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:VQHSI 2 "spu_reg_operand" "")
+	  (match_operand:VQHSI 3 "spu_nonmem_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:DTI 2 "spu_reg_operand" "")
+	  (match_operand:DTI 3 "spu_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:VSF 2 "spu_reg_operand" "")
+	  (match_operand:VSF 3 "spu_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+(define_expand "cstoredf4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:DF 2 "spu_reg_operand" "")
+	  (match_operand:DF 3 "spu_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+
+;; conditional move
+
+;; Define this first one so HAVE_conditional_move is defined.
+(define_insn "movcc_dummy"
+  [(set (match_operand 0 "" "")
+       (if_then_else (match_operand 1 "" "")
+		     (match_operand 2 "" "")
+		     (match_operand 3 "" "")))]
+  "!operands[0]"
+  "")
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "")
+	(if_then_else:ALL (match_operand 1 "ordered_comparison_operator" "")
+		      (match_operand:ALL 2 "spu_reg_operand" "")
+		      (match_operand:ALL 3 "spu_reg_operand" "")))]
+  ""
+  {
+    spu_emit_branch_or_set(2, operands[1], operands);
+    DONE;
+  })
+
+;; This pattern is used when the result of a compare is not large
+;; enough to use in a selb when expanding conditional moves.
+(define_expand "extend_compare"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+	(unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
+  ""
+  {
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			    gen_rtx_UNSPEC (GET_MODE (operands[0]),
+			                    gen_rtvec (1, operands[1]),
+					    UNSPEC_EXTEND_CMP)));
+    DONE;
+  })
+
+(define_insn "extend_compare<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
+  "operands != NULL"
+  "fsm\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+
+;; case
+
+;; operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+(define_expand "casesi"
+  [(match_operand:SI 0 "spu_reg_operand" "")
+   (match_operand:SI 1 "immediate_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+  {
+    rtx table = gen_reg_rtx (SImode);
+    rtx index = gen_reg_rtx (SImode);
+    rtx sindex = gen_reg_rtx (SImode);
+    rtx addr = gen_reg_rtx (Pmode);
+
+    emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3]));
+
+    emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1])));
+    emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2)));
+    emit_move_insn (addr, gen_rtx_MEM (SImode,
+				       gen_rtx_PLUS (SImode, table, sindex)));
+    if (flag_pic)
+      emit_insn (gen_addsi3 (addr, addr, table));
+
+    emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]);
+    emit_jump_insn (gen_tablejump (addr, operands[3]));
+    DONE;
+  })
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "bi\t%0"
+  [(set_attr "type" "br")])
+
+
+;; call
+
+;; Note that operand 1 is total size of args, in bytes,
+;; and what the call insn wants is the number of words.
+(define_expand "sibcall"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "")
+	   (match_operand:QI 1 "" ""))
+     (use (reg:SI 0))])]
+  ""
+  {
+    if (! call_operand (operands[0], QImode))
+      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  })
+
+(define_insn "_sibcall"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "R,S")
+	   (match_operand:QI 1 "" "i,i"))
+     (use (reg:SI 0))])]
+  "SIBLING_CALL_P(insn)"
+  "@
+   bi\t%i0
+   br\t%0"
+   [(set_attr "type" "br,br")])
+
+(define_expand "sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "")
+		(match_operand:QI 2 "" "")))
+     (use (reg:SI 0))])]
+  ""
+  {
+    if (! call_operand (operands[1], QImode))
+      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  })
+
+(define_insn "_sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "R,S")
+		(match_operand:QI 2 "" "i,i")))
+     (use (reg:SI 0))])]
+  "SIBLING_CALL_P(insn)"
+  "@
+   bi\t%i1
+   br\t%1"
+   [(set_attr "type" "br,br")])
+
+;; Note that operand 1 is total size of args, in bytes,
+;; and what the call insn wants is the number of words.
+(define_expand "call"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "")
+	   (match_operand:QI 1 "" ""))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  {
+    if (! call_operand (operands[0], QImode))
+      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  })
+
+(define_insn "_call"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "R,S,T")
+	   (match_operand:QI 1 "" "i,i,i"))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  "@
+   bisl\t$lr,%i0
+   brsl\t$lr,%0
+   brasl\t$lr,%0"
+   [(set_attr "type" "br")])
+
+(define_expand "call_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "")
+		(match_operand:QI 2 "" "")))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  {
+    if (! call_operand (operands[1], QImode))
+      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  })
+
+(define_insn "_call_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "R,S,T")
+		(match_operand:QI 2 "" "i,i,i")))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  "@
+   bisl\t$lr,%i1
+   brsl\t$lr,%1
+   brasl\t$lr,%1"
+   [(set_attr "type" "br")])
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  {
+    int i;
+    rtx reg = gen_rtx_REG (TImode, 3);
+
+    /* We need to use call_value so the return value registers don't get
+     * clobbered. */
+    emit_call_insn (gen_call_value (reg, operands[0], const0_rtx));
+
+    for (i = 0; i < XVECLEN (operands[2], 0); i++)
+      {
+	rtx set = XVECEXP (operands[2], 0, i);
+	emit_move_insn (SET_DEST (set), SET_SRC (set));
+      }
+
+    /* The optimizer does not know that the call sets the function value
+       registers we stored in the result block.  We avoid problems by
+       claiming that all hard registers are used and clobbered at this
+       point.  */
+    emit_insn (gen_blockage ());
+
+    DONE;
+  })
+
+
+;; Patterns used for splitting and combining.
+
+
+;; Function prologue and epilogue.
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  { spu_expand_prologue (); DONE; })
+
+;; "blockage" is only emitted in epilogue.  This is what it took to
+;; make "basic block reordering" work with the insns sequence
+;; generated by the spu_expand_epilogue (taken from mips.md)
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  { spu_expand_epilogue (false); DONE; })
+
+(define_expand "sibcall_epilogue"
+  [(const_int 2)]
+  ""
+  { spu_expand_epilogue (true); DONE; })
+
+
+;; stack manipulations
+
+;; An insn to allocate new stack space for dynamic use (e.g., alloca).
+;; We move the back-chain and decrement the stack pointer.
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(minus (reg 1) (match_operand 1 "spu_nonmem_operand" "")))
+   (set (reg 1)
+	(minus (reg 1) (match_dup 1)))]
+  ""
+  "spu_allocate_stack (operands[0], operands[1]); DONE;")
+
+;; These patterns say how to save and restore the stack pointer.  We need not
+;; save the stack pointer at function level since we are careful to preserve 
+;; the backchain.  
+;; 
+
+;; At block level the stack pointer is saved and restored, so that the
+;; stack space allocated within a block is deallocated when leaving
+;; block scope.  By default, according to the SPU ABI, the stack
+;; pointer and available stack size are saved in a register. Upon
+;; restoration, the stack pointer is simply copied back, and the
+;; current available stack size is calculated against the restored
+;; stack pointer.
+;;
+;; For nonlocal gotos, we must save the stack pointer and its
+;; backchain and restore both.  Note that in the nonlocal case, the
+;; save area is a memory location.
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_block"
+  [(match_operand 0 "spu_reg_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+  "
+  {
+    spu_restore_stack_block (operands[0], operands[1]);
+    DONE;
+  }")
+
+(define_expand "save_stack_nonlocal"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "spu_reg_operand" "")]
+  ""
+  "
+  {
+    rtx temp = gen_reg_rtx (Pmode);
+
+    /* Copy the backchain to the first word, sp to the second.  We need to
+       save the back chain because __builtin_apply appears to clobber it. */
+    emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1]));
+    emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp);
+    emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]);
+    DONE;
+  }")
+
+(define_expand "restore_stack_nonlocal"
+  [(match_operand 0 "spu_reg_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+  "
+  {
+    spu_restore_stack_nonlocal(operands[0], operands[1]);
+    DONE;
+  }")
+
+
+;; vector patterns
+
+;; Vector initialization
+(define_expand "vec_init<mode>"
+  [(match_operand:V 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+  {
+    spu_expand_vector_init (operands[0], operands[1]);
+    DONE;
+  })
+
+(define_expand "vec_set<mode>"
+  [(use (match_operand:SI 2 "spu_nonmem_operand" ""))
+   (set (match_dup:TI 3)
+        (unspec:TI [(match_dup:SI 4)
+		    (match_dup:SI 5)
+		    (match_dup:SI 6)] UNSPEC_CPAT))
+   (set (match_operand:V 0 "spu_reg_operand" "")
+	(unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "")
+		   (match_dup:V 0)
+		   (match_dup:TI 3)] UNSPEC_SHUFB))]
+  ""
+  {
+    HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode);
+    rtx offset = GEN_INT (INTVAL (operands[2]) * size);
+    operands[3] = gen_reg_rtx (TImode);
+    operands[4] = stack_pointer_rtx;
+    operands[5] = offset;
+    operands[6] = GEN_INT (size);
+  })
+
+(define_expand "vec_extract<mode>"
+  [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
+	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
+			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
+  ""
+  {
+    if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0)
+      {
+	emit_insn (gen_spu_convert (operands[0], operands[1]));
+	DONE;
+      }
+  })
+
+(define_insn "_vec_extract<mode>"
+  [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
+	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
+			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
+  ""
+  "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16"
+  [(set_attr "type" "shuf")])
+
+(define_insn "_vec_extractv8hi_ze"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r")
+				       (parallel [(const_int 0)]))))]
+  ""
+  "rotqmbyi\t%0,%1,-2"
+  [(set_attr "type" "shuf")])
+
+
+;; misc
+
+(define_expand "shufb"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(unspec [(match_operand 1 "spu_reg_operand" "")
+		 (match_operand 2 "spu_reg_operand" "")
+		 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))]
+  ""
+  {
+    rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]);
+    PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
+    emit_insn (s);
+    DONE;
+  })
+
+(define_insn "_shufb"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+	(unspec [(match_operand 1 "spu_reg_operand" "r")
+		 (match_operand 2 "spu_reg_operand" "r")
+		 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))]
+  "operands != NULL"
+  "shufb\t%0,%1,%2,%3"
+  [(set_attr "type" "shuf")])
+
+; The semantics of vec_permv16qi are nearly identical to those of the SPU
+; shufb instruction, except that we need to reduce the selector modulo 32.
+(define_expand "vec_permv16qi"
+  [(set (match_dup 4) (and:V16QI (match_operand:V16QI 3 "spu_reg_operand" "")
+                                 (match_dup 6)))
+   (set (match_operand:V16QI 0 "spu_reg_operand" "")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "spu_reg_operand" "")
+	   (match_operand:V16QI 2 "spu_reg_operand" "")
+	   (match_dup 5)]
+	  UNSPEC_SHUFB))]
+  ""
+  {
+    operands[4] = gen_reg_rtx (V16QImode);
+    operands[5] = gen_lowpart (TImode, operands[4]);
+    operands[6] = spu_const (V16QImode, 31);
+  })
+
+(define_insn "nop"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NOP)]
+  ""
+  "nop"
+  [(set_attr "type" "nop")])
+
+(define_insn "nopn"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPECV_NOP)]
+  ""
+  "nop\t%0"
+  [(set_attr "type" "nop")])
+
+(define_insn "lnop"
+  [(unspec_volatile [(const_int 0)] UNSPECV_LNOP)]
+  ""
+  "lnop"
+  [(set_attr "type" "lnop")])
+
+;; The operand is so we know why we generated this hbrp.
+;; We clobber mem to make sure it isn't moved over any
+;; loads, stores or calls while scheduling.
+(define_insn "iprefetch"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "n")] UNSPEC_IPREFETCH)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "hbrp\t# %0"
+  [(set_attr "type" "iprefetch")])
+
+;; A non-volatile version so it gets scheduled
+(define_insn "nopn_nv"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_NOP)]
+  ""
+  "nop\t%0"
+  [(set_attr "type" "nop")])
+
+(define_insn "hbr"
+  [(set (reg:SI 130)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i")
+		    (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR))
+   (unspec [(const_int 0)] UNSPEC_HBR)]
+  ""
+  "@
+   hbr\t%0,%1
+   hbrr\t%0,%1
+   hbra\t%0,%1"
+  [(set_attr "type" "hbr")])
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SYNC)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "sync"
+  [(set_attr "type" "br")])
+
+(define_insn "syncc"
+  [(unspec_volatile [(const_int 1)] UNSPECV_SYNC)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "syncc"
+  [(set_attr "type" "br")])
+
+(define_insn "dsync"
+  [(unspec_volatile [(const_int 2)] UNSPECV_SYNC)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "dsync"
+  [(set_attr "type" "br")])
+
+
+
+ ;; Define the subtract-one-and-jump insns so loop.c
+ ;; knows what to generate.
+ (define_expand "doloop_end"
+   [(use (match_operand 0 "" ""))      ; loop pseudo
+    (use (match_operand 1 "" ""))]     ; label
+   ""
+   "
+ {
+   /* Currently SMS relies on the do-loop pattern to recognize loops
+      where (1) the control part comprises of all insns defining and/or
+      using a certain 'count' register and (2) the loop count can be
+      adjusted by modifying this register prior to the loop.
+.     ??? The possible introduction of a new block to initialize the
+      new IV can potentially effects branch optimizations.  */
+   if (optimize > 0 && flag_modulo_sched)
+   {
+     rtx s0;
+     rtx bcomp;
+     rtx loc_ref;
+
+     if (GET_MODE (operands[0]) != SImode)
+       FAIL;
+
+     s0 = operands [0];
+     emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
+     bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
+     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
+     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                        loc_ref, pc_rtx)));
+
+     DONE;
+   }else
+      FAIL;
+ }")
+
+;; convert between any two modes, avoiding any GCC assumptions
+(define_expand "spu_convert"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))]
+  ""
+  {
+    rtx c = gen__spu_convert (operands[0], operands[1]);
+    PUT_MODE (SET_SRC (c), GET_MODE (operands[0]));
+    emit_insn (c);
+    DONE;
+  })
+
+(define_insn_and_split "_spu_convert"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+	(unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+
+;;
+(include "spu-builtins.md")
+
+  
+(define_expand "smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=r")
+        (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
+                 (match_operand:V4SF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[2], operands[1], mask));
+  DONE;
+}") 
+
+(define_expand "sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=r")
+        (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
+                 (match_operand:V4SF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}") 
+
+(define_expand "smaxv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=r")
+        (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
+                 (match_operand:V2DF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V2DImode);
+  emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[2], operands[1], 
+		       spu_gen_subreg (V4SImode, mask)));
+  DONE;
+}")
+
+(define_expand "sminv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=r")
+        (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
+                 (match_operand:V2DF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V2DImode);
+  emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[1], operands[2], 
+		       spu_gen_subreg (V4SImode, mask)));
+  DONE;
+}")
+
+(define_insn "vec_widen_smult_odd_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
+        (mult:V4SI
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+          (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
+  ""
+  "@
+   mpy\t%0,%1,%2
+   mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "vec_widen_umult_odd_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
+        (mult:V4SI
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+          (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
+  ""
+  "@
+   mpyu\t%0,%1,%2
+   mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "vec_widen_smult_even_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
+  ""
+  "mpyhh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "vec_widen_umult_even_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
+  ""
+  "mpyhhu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_expand "vec_widen_umult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+    0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
+  
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 
+    0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+    0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
+  
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 
+    0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_realign_load_<mode>"
+  [(set (match_operand:ALL 0 "register_operand" "=r")
+	(unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
+		     (match_operand:ALL 2 "register_operand" "r")
+		     (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
+  ""
+  "
+{
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3])); 
+  DONE;
+}")
+
+(define_expand "spu_lvsr"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+        (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
+  ""
+  "
+{ 
+  rtx addr;
+  rtx offset = gen_reg_rtx (V8HImode);
+  rtx addr_bits = gen_reg_rtx (SImode);
+  rtx addr_bits_vec = gen_reg_rtx (V8HImode);
+  rtx splatqi = gen_reg_rtx (TImode);
+  rtx result = gen_reg_rtx (V8HImode);
+  unsigned char arr[16] = {
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
+  unsigned char arr2[16] = {
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
+
+  emit_move_insn (offset, array_to_constant (V8HImode, arr));
+  emit_move_insn (splatqi, array_to_constant (TImode, arr2));
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  addr = force_reg (Pmode, XEXP (operands[1], 0));
+  emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF))); 
+  emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
+
+  /* offset - (addr & 0xF) 
+     It is safe to use a single sfh, because each byte of offset is > 15 and
+     each byte of addr is <= 15. */
+  emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
+
+  result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
+  emit_move_insn (operands[0], result);
+
+  DONE;
+}")
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (zero_extend:V4SI 
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  ""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (zero_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+  
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (sign_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  ""
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xshw (tmp2, tmp1)); 
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (sign_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xshw (tmp2, tmp1)); 
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (zero_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  ""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+          (zero_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+         (sign_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V16QImode);
+  rtx tmp2 = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xsbh (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+         (sign_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V16QImode);
+  rtx tmp2 = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xsbh (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+
+(define_expand "vec_pack_trunc_v8hi"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(vec_concat:V16QI
+          (truncate:V8QI (match_operand:V8HI 1 "spu_reg_operand" "r"))
+          (truncate:V8QI (match_operand:V8HI 2 "spu_reg_operand" "r"))))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+    0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+
+  DONE;
+}")
+
+(define_expand "vec_pack_trunc_v4si"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+	(vec_concat:V8HI
+          (truncate:V4HI (match_operand:V4SI 1 "spu_reg_operand" "r"))
+          (truncate:V4HI (match_operand:V4SI 2 "spu_reg_operand" "r"))))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+    0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+
+  DONE;
+}")
+
+(define_insn "stack_protect_set"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  ""
+  "lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi1")]
+)
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx compare_result;
+  rtx bcomp, loc_ref;
+
+  compare_result = gen_reg_rtx (SImode);
+
+  emit_insn (gen_stack_protect_test_si (compare_result,
+                                        operands[0],
+                                        operands[1]));
+
+  bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx);
+
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]);
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                         loc_ref, pc_rtx)));
+
+  DONE;
+})
+
+(define_insn "stack_protect_test_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "memory_operand" "m")]
+                   UNSPEC_SP_TEST))
+   (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  ""
+  "lq%p1\t%0,%1\;lq%p2\t%3,%2\;ceq\t%0,%0,%3\;xor\t%3,%3,%3"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi1")]
+)
+
diff --git a/gcc-4.9/gcc/config/spu/spu.opt b/gcc-4.9/gcc/config/spu/spu.opt
new file mode 100644
index 000000000..3ab562c0a
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu.opt
@@ -0,0 +1,105 @@
+; Options for the SPU port of the compiler
+; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+; This file is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3 of the License, or (at your option)
+; any later version.
+
+; This file is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mwarn-reloc
+Target Report Mask(WARN_RELOC)
+Emit warnings when run-time relocations are generated
+
+merror-reloc
+Target Report Mask(ERROR_RELOC)
+Emit errors when run-time relocations are generated
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(spu_branch_cost) Init(20)
+Specify cost of branches (Default 20)
+
+msafe-dma
+Target Report RejectNegative Mask(SAFE_DMA)
+Make sure loads and stores are not moved past DMA instructions
+
+munsafe-dma
+Target Report RejectNegative InverseMask(SAFE_DMA)
+volatile must be specified on any memory that is effected by DMA
+
+mdual-nops
+Target Report Var(spu_dual_nops,10) Init(10)
+Insert nops when it might improve performance by allowing dual issue (default)
+
+mdual-nops=
+Target RejectNegative Joined UInteger Var(spu_dual_nops)
+Insert nops when it might improve performance by allowing dual issue (default)
+
+mstdmain
+Target Report Mask(STD_MAIN)
+Use standard main function as entry for startup
+
+mbranch-hints
+Target Report Mask(BRANCH_HINTS)
+Generate branch hints for branches
+
+mhint-max-nops=
+Target RejectNegative Joined UInteger Var(spu_max_nops) Init(2)
+Maximum number of nops to insert for a hint (Default 2)
+
+mhint-max-distance=
+Target RejectNegative Joined Var(spu_max_distance_str)
+Approximate maximum number of instructions to allow between a hint and its branch [125]
+
+msmall-mem
+Target Report RejectNegative InverseMask(LARGE_MEM)
+Generate code for 18 bit addressing
+
+mlarge-mem
+Target Report RejectNegative Mask(LARGE_MEM)
+Generate code for 32 bit addressing
+
+mfixed-range=
+Target RejectNegative Joined Var(spu_fixed_range_string)
+Specify range of registers to make fixed
+
+msafe-hints
+Target Report Mask(SAFE_HINTS)
+Insert hbrp instructions after hinted branch targets to avoid the SPU hang issue
+
+march=
+Target RejectNegative Joined Var(spu_arch_string)
+Generate code for given CPU
+
+mtune=
+Target RejectNegative Joined Var(spu_tune_string)
+Schedule code for given CPU
+
+mea32
+Target Report RejectNegative Var(spu_ea_model,32) Init(32)
+Access variables in 32-bit PPU objects (default)
+
+mea64
+Target Report RejectNegative Var(spu_ea_model,64)
+Access variables in 64-bit PPU objects
+
+maddress-space-conversion
+Target Report Mask(ADDRESS_SPACE_CONVERSION)
+Allow conversions between __ea and generic pointers (default)
+
+mcache-size=
+Target Report RejectNegative Joined UInteger
+Size (in KB) of software data cache
+
+matomic-updates
+Target Report
+Atomically write back software data cache lines (default)
+
diff --git a/gcc-4.9/gcc/config/spu/spu_cache.h b/gcc-4.9/gcc/config/spu/spu_cache.h
new file mode 100644
index 000000000..8433f281d
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu_cache.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU_CACHE_H
+#define _SPU_CACHE_H
+
+void *__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty);
+void *__cache_fetch (__ea void *ea);
+void __cache_evict (__ea void *ea);
+void __cache_flush (void);
+void __cache_touch (__ea void *ea);
+
+#define cache_fetch_dirty(_ea, _n_bytes_dirty) \
+     __cache_fetch_dirty(_ea, _n_bytes_dirty)
+
+#define cache_fetch(_ea) __cache_fetch(_ea)
+#define cache_touch(_ea) __cache_touch(_ea)
+#define cache_evict(_ea) __cache_evict(_ea)
+#define cache_flush() __cache_flush()
+
+#endif
diff --git a/gcc-4.9/gcc/config/spu/spu_internals.h b/gcc-4.9/gcc/config/spu/spu_internals.h
new file mode 100644
index 000000000..918229819
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu_internals.h
@@ -0,0 +1,421 @@
+/* Definitions of Synergistic Processing Unit (SPU). */
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef  _SPU_INTERNALS_H
+#define _SPU_INTERNALS_H 
+ 
+/* For a typical GCC implementation, the vector keyword is defined here
+ * as a macro.  If this macro conflicts with user code the user needs to
+ * undefine it.  An extended GCC implementation may implement this
+ * keyword differently, such that it never conflicts,  and will define
+ * the macro __VECTOR_KEYWORD_SUPPORTED__. */
+#ifndef __VECTOR_KEYWORD_SUPPORTED__
+#define vector __vector
+#endif
+
+
+/*  The spu specific instruction macros, si_*(), correspond 1-1 with
+ *  SPU instructions in the ISA.  The arguments are the same with the
+ *  following exceptions:
+ *   -  any instruction which both reads and writes rt will have an
+ *      extra parameter in the macro.
+ *   -  instructions which append zero to the immediate field assume
+ *      the value given in a macro already has the zeroes appended.
+ *   -  integer/float convert functions expect a value from 0 to 127,
+ *      i.e., the bias is added by the compiler.
+ *
+ *  Parameters named 'imm' accept an integer literal.
+ *  Parameters named 'r[abcdt]' accept a qword argument.
+ *  Parameters named 'scalar' accept a scalar argument.
+ */
+
+#define qword __vector signed char
+
+#define si_lqd(ra,imm)       __builtin_si_lqd(ra,imm)
+#define si_lqx(ra,rb)        __builtin_si_lqx(ra,rb)
+#define si_lqa(imm)          __builtin_si_lqa(imm)
+#define si_lqr(imm)          __builtin_si_lqr(imm)
+#define si_stqd(rt,ra,imm)   __builtin_si_stqd(rt,ra,imm)
+#define si_stqx(rt,ra,rb)    __builtin_si_stqx(rt,ra,rb)
+#define si_stqa(rt,imm)      __builtin_si_stqa(rt,imm)
+#define si_stqr(rt,imm)      __builtin_si_stqr(rt,imm)
+#define si_cbd(ra,imm)       __builtin_si_cbd(ra,imm)
+#define si_cbx(ra,rb)        __builtin_si_cbx(ra,rb)
+#define si_chd(ra,imm)       __builtin_si_chd(ra,imm)
+#define si_chx(ra,rb)        __builtin_si_chx(ra,rb)
+#define si_cwd(ra,imm)       __builtin_si_cwd(ra,imm)
+#define si_cwx(ra,rb)        __builtin_si_cwx(ra,rb)
+#define si_cdd(ra,imm)       __builtin_si_cdd(ra,imm)
+#define si_cdx(ra,rb)        __builtin_si_cdx(ra,rb)
+#define si_ilh(imm)          __builtin_si_ilh(imm)
+#define si_ilhu(imm)         __builtin_si_ilhu(imm)
+#define si_il(imm)           __builtin_si_il(imm)
+#define si_ila(imm)          __builtin_si_ila(imm)
+#define si_iohl(ra,imm)      __builtin_si_iohl(ra,imm)
+#define si_fsmbi(imm)        __builtin_si_fsmbi(imm)
+#define si_ah(ra,rb)         __builtin_si_ah(ra,rb)
+#define si_ahi(ra,imm)       __builtin_si_ahi(ra,imm)
+#define si_a(ra,rb)          __builtin_si_a(ra,rb)
+#define si_ai(ra,imm)        __builtin_si_ai(ra,imm)
+#define si_addx(ra,rb,rt)    __builtin_si_addx(ra,rb,rt)
+#define si_cg(ra,rb)         __builtin_si_cg(ra,rb)
+#define si_cgx(ra,rb,rt)     __builtin_si_cgx(ra,rb,rt)
+#define si_sfh(ra,rb)        __builtin_si_sfh(ra,rb)
+#define si_sfhi(imm,ra)      __builtin_si_sfhi(imm,ra)
+#define si_sf(ra,rb)         __builtin_si_sf(ra,rb)
+#define si_sfi(ra,imm)       __builtin_si_sfi(ra,imm)
+#define si_sfx(ra,rb,rt)     __builtin_si_sfx(ra,rb,rt)
+#define si_bg(ra,rb)         __builtin_si_bg(ra,rb)
+#define si_bgx(ra,rb,rt)     __builtin_si_bgx(ra,rb,rt)
+#define si_mpy(ra,rb)        __builtin_si_mpy(ra,rb)
+#define si_mpyu(ra,rb)       __builtin_si_mpyu(ra,rb)
+#define si_mpyi(ra,imm)      __builtin_si_mpyi(ra,imm)
+#define si_mpyui(ra,imm)     __builtin_si_mpyui(ra,imm)
+#define si_mpya(ra,rb,rc)    __builtin_si_mpya(ra,rb,rc)
+#define si_mpyh(ra,rb)       __builtin_si_mpyh(ra,rb)
+#define si_mpys(ra,rb)       __builtin_si_mpys(ra,rb)
+#define si_mpyhh(ra,rb)      __builtin_si_mpyhh(ra,rb)
+#define si_mpyhhu(ra,rb)     __builtin_si_mpyhhu(ra,rb)
+#define si_mpyhha(ra,rb,rc)  __builtin_si_mpyhha(ra,rb,rc)
+#define si_mpyhhau(ra,rb,rc) __builtin_si_mpyhhau(ra,rb,rc)
+#define si_clz(ra)           __builtin_si_clz(ra)
+#define si_cntb(ra)          __builtin_si_cntb(ra)
+#define si_fsmb(ra)          __builtin_si_fsmb(ra)
+#define si_fsmh(ra)          __builtin_si_fsmh(ra)
+#define si_fsm(ra)           __builtin_si_fsm(ra)
+#define si_gbb(ra)           __builtin_si_gbb(ra)
+#define si_gbh(ra)           __builtin_si_gbh(ra)
+#define si_gb(ra)            __builtin_si_gb(ra)
+#define si_avgb(ra,rb)       __builtin_si_avgb(ra,rb)
+#define si_absdb(ra,rb)      __builtin_si_absdb(ra,rb)
+#define si_sumb(ra,rb)       __builtin_si_sumb(ra,rb)
+#define si_xsbh(ra)          __builtin_si_xsbh(ra)
+#define si_xshw(ra)          __builtin_si_xshw(ra)
+#define si_xswd(ra)          __builtin_si_xswd(ra)
+#define si_and(ra,rb)        __builtin_si_and(ra,rb)
+#define si_andc(ra,rb)       __builtin_si_andc(ra,rb)
+#define si_andbi(ra,imm)     __builtin_si_andbi(ra,imm)
+#define si_andhi(ra,imm)     __builtin_si_andhi(ra,imm)
+#define si_andi(ra,imm)      __builtin_si_andi(ra,imm)
+#define si_or(ra,rb)         __builtin_si_or(ra,rb)
+#define si_orc(ra,rb)        __builtin_si_orc(ra,rb)
+#define si_orbi(ra,imm)      __builtin_si_orbi(ra,imm)
+#define si_orhi(ra,imm)      __builtin_si_orhi(ra,imm)
+#define si_ori(ra,imm)       __builtin_si_ori(ra,imm)
+#define si_orx(ra)           __builtin_si_orx(ra)
+#define si_xor(ra,rb)        __builtin_si_xor(ra,rb)
+#define si_xorbi(ra,imm)     __builtin_si_xorbi(ra,imm)
+#define si_xorhi(ra,imm)     __builtin_si_xorhi(ra,imm)
+#define si_xori(ra,imm)      __builtin_si_xori(ra,imm)
+#define si_nand(ra,rb)       __builtin_si_nand(ra,rb)
+#define si_nor(ra,rb)        __builtin_si_nor(ra,rb)
+#define si_eqv(ra,rb)        __builtin_si_eqv(ra,rb)
+#define si_selb(ra,rb,rc)    __builtin_si_selb(ra,rb,rc)
+#define si_shufb(ra,rb,rc)   __builtin_si_shufb(ra,rb,rc)
+#define si_shlh(ra,rb)       __builtin_si_shlh(ra,rb)
+#define si_shlhi(ra,imm)     __builtin_si_shlhi(ra,imm)
+#define si_shl(ra,rb)        __builtin_si_shl(ra,rb)
+#define si_shli(ra,imm)      __builtin_si_shli(ra,imm)
+#define si_shlqbi(ra,rb)     __builtin_si_shlqbi(ra,rb)
+#define si_shlqbii(ra,imm)   __builtin_si_shlqbii(ra,imm)
+#define si_shlqby(ra,rb)     __builtin_si_shlqby(ra,rb)
+#define si_shlqbyi(ra,imm)   __builtin_si_shlqbyi(ra,imm)
+#define si_shlqbybi(ra,rb)   __builtin_si_shlqbybi(ra,rb)
+#define si_roth(ra,rb)       __builtin_si_roth(ra,rb)
+#define si_rothi(ra,imm)     __builtin_si_rothi(ra,imm)
+#define si_rot(ra,rb)        __builtin_si_rot(ra,rb)
+#define si_roti(ra,imm)      __builtin_si_roti(ra,imm)
+#define si_rotqby(ra,rb)     __builtin_si_rotqby(ra,rb)
+#define si_rotqbyi(ra,imm)   __builtin_si_rotqbyi(ra,imm)
+#define si_rotqbybi(ra,rb)   __builtin_si_rotqbybi(ra,rb)
+#define si_rotqbi(ra,rb)     __builtin_si_rotqbi(ra,rb)
+#define si_rotqbii(ra,imm)   __builtin_si_rotqbii(ra,imm)
+#define si_rothm(ra,rb)      __builtin_si_rothm(ra,rb)
+#define si_rothmi(ra,imm)    __builtin_si_rothmi(ra,imm)
+#define si_rotm(ra,rb)       __builtin_si_rotm(ra,rb)
+#define si_rotmi(ra,imm)     __builtin_si_rotmi(ra,imm)
+#define si_rotqmby(ra,rb)    __builtin_si_rotqmby(ra,rb)
+#define si_rotqmbyi(ra,imm)  __builtin_si_rotqmbyi(ra,imm)
+#define si_rotqmbi(ra,rb)    __builtin_si_rotqmbi(ra,rb)
+#define si_rotqmbii(ra,imm)  __builtin_si_rotqmbii(ra,imm)
+#define si_rotqmbybi(ra,rb)  __builtin_si_rotqmbybi(ra,rb)
+#define si_rotmah(ra,rb)     __builtin_si_rotmah(ra,rb)
+#define si_rotmahi(ra,imm)   __builtin_si_rotmahi(ra,imm)
+#define si_rotma(ra,rb)      __builtin_si_rotma(ra,rb)
+#define si_rotmai(ra,imm)    __builtin_si_rotmai(ra,imm)
+#define si_heq(ra,rb)        __builtin_si_heq(ra,rb)
+#define si_heqi(ra,imm)      __builtin_si_heqi(ra,imm)
+#define si_hgt(ra,rb)        __builtin_si_hgt(ra,rb)
+#define si_hgti(ra,imm)      __builtin_si_hgti(ra,imm)
+#define si_hlgt(ra,rb)       __builtin_si_hlgt(ra,rb)
+#define si_hlgti(ra,imm)     __builtin_si_hlgti(ra,imm)
+#define si_ceqb(ra,rb)       __builtin_si_ceqb(ra,rb)
+#define si_ceqbi(ra,imm)     __builtin_si_ceqbi(ra,imm)
+#define si_ceqh(ra,rb)       __builtin_si_ceqh(ra,rb)
+#define si_ceqhi(ra,imm)     __builtin_si_ceqhi(ra,imm)
+#define si_ceq(ra,rb)        __builtin_si_ceq(ra,rb)
+#define si_ceqi(ra,imm)      __builtin_si_ceqi(ra,imm)
+#define si_cgtb(ra,rb)       __builtin_si_cgtb(ra,rb)
+#define si_cgtbi(ra,imm)     __builtin_si_cgtbi(ra,imm)
+#define si_cgth(ra,rb)       __builtin_si_cgth(ra,rb)
+#define si_cgthi(ra,imm)     __builtin_si_cgthi(ra,imm)
+#define si_cgt(ra,rb)        __builtin_si_cgt(ra,rb)
+#define si_cgti(ra,imm)      __builtin_si_cgti(ra,imm)
+#define si_clgtb(ra,rb)      __builtin_si_clgtb(ra,rb)
+#define si_clgtbi(ra,imm)    __builtin_si_clgtbi(ra,imm)
+#define si_clgth(ra,rb)      __builtin_si_clgth(ra,rb)
+#define si_clgthi(ra,imm)    __builtin_si_clgthi(ra,imm)
+#define si_clgt(ra,rb)       __builtin_si_clgt(ra,rb)
+#define si_clgti(ra,imm)     __builtin_si_clgti(ra,imm)
+#define si_bisled(ra)        __builtin_si_bisled(ra,0)
+#define si_bisledd(ra)       __builtin_si_bisledd(ra,0)
+#define si_bislede(ra)       __builtin_si_bislede(ra,0)
+#define si_fa(ra,rb)         __builtin_si_fa(ra,rb)
+#define si_dfa(ra,rb)        __builtin_si_dfa(ra,rb)
+#define si_fs(ra,rb)         __builtin_si_fs(ra,rb)
+#define si_dfs(ra,rb)        __builtin_si_dfs(ra,rb)
+#define si_fm(ra,rb)         __builtin_si_fm(ra,rb)
+#define si_dfm(ra,rb)        __builtin_si_dfm(ra,rb)
+#define si_fma(ra,rb,rc)     __builtin_si_fma(ra,rb,rc)
+#define si_dfma(ra,rb,rc)    __builtin_si_dfma(ra,rb,rc)
+#define si_dfnma(ra,rb,rc)   __builtin_si_dfnma(ra,rb,rc)
+#define si_fnms(ra,rb,rc)    __builtin_si_fnms(ra,rb,rc)
+#define si_dfnms(ra,rb,rc)   __builtin_si_dfnms(ra,rb,rc)
+#define si_fms(ra,rb,rc)     __builtin_si_fms(ra,rb,rc)
+#define si_dfms(ra,rb,rc)    __builtin_si_dfms(ra,rb,rc)
+#define si_frest(ra)         __builtin_si_frest(ra)
+#define si_frsqest(ra)       __builtin_si_frsqest(ra)
+#define si_fi(ra,rb)         __builtin_si_fi(ra,rb)
+#define si_csflt(ra,imm)     __builtin_si_csflt(ra,imm)
+#define si_cflts(ra,imm)     __builtin_si_cflts(ra,imm)
+#define si_cuflt(ra,imm)     __builtin_si_cuflt(ra,imm)
+#define si_cfltu(ra,imm)     __builtin_si_cfltu(ra,imm)
+#define si_frds(ra)          __builtin_si_frds(ra)
+#define si_fesd(ra)          __builtin_si_fesd(ra)
+#define si_fceq(ra,rb)       __builtin_si_fceq(ra,rb)
+#define si_fcmeq(ra,rb)      __builtin_si_fcmeq(ra,rb)
+#define si_fcgt(ra,rb)       __builtin_si_fcgt(ra,rb)
+#define si_fcmgt(ra,rb)      __builtin_si_fcmgt(ra,rb)
+#define si_stop(imm)         __builtin_si_stop(imm)
+#define si_stopd(ra,rb,rc)   __builtin_si_stopd(ra,rb,rc)
+#define si_lnop()            __builtin_si_lnop()
+#define si_nop()             __builtin_si_nop()
+#define si_sync()            __builtin_si_sync()
+#define si_syncc()           __builtin_si_syncc()
+#define si_dsync()           __builtin_si_dsync()
+#define si_mfspr(imm)        __builtin_si_mfspr(imm)
+#define si_mtspr(imm,ra)     __builtin_si_mtspr(imm,ra)
+#define si_fscrrd()          __builtin_si_fscrrd()
+#define si_fscrwr(ra)        __builtin_si_fscrwr(ra)
+#define si_rdch(imm)         __builtin_si_rdch(imm)
+#define si_rchcnt(imm)       __builtin_si_rchcnt(imm)
+#define si_wrch(imm,ra)      __builtin_si_wrch(imm,ra)
+
+/* celledp only instructions  */
+#ifdef __SPU_EDP__
+#define si_dfceq(ra,rb)      __builtin_si_dfceq(ra,rb)
+#define si_dfcmeq(ra,rb)     __builtin_si_dfcmeq(ra,rb)
+#define si_dfcgt(ra,rb)      __builtin_si_dfcgt(ra,rb)
+#define si_dfcmgt(ra,rb)     __builtin_si_dfcmgt(ra,rb)
+#define si_dftsv(ra,imm)     __builtin_si_dftsv(ra,imm)
+#endif /* __SPU_EDP__  */
+
+#define si_from_char(scalar)    __builtin_si_from_char(scalar)
+#define si_from_uchar(scalar)   __builtin_si_from_uchar(scalar)
+#define si_from_short(scalar)   __builtin_si_from_short(scalar)
+#define si_from_ushort(scalar)  __builtin_si_from_ushort(scalar)
+#define si_from_int(scalar)     __builtin_si_from_int(scalar)
+#define si_from_uint(scalar)    __builtin_si_from_uint(scalar)
+#define si_from_llong(scalar)   __builtin_si_from_long(scalar)
+#define si_from_ullong(scalar)  __builtin_si_from_ulong(scalar)
+#define si_from_float(scalar)   __builtin_si_from_float(scalar)
+#define si_from_double(scalar)  __builtin_si_from_double(scalar)
+#define si_from_ptr(scalar)     __builtin_si_from_ptr(scalar)
+
+#define si_to_char(ra)      __builtin_si_to_char(ra)
+#define si_to_uchar(ra)     __builtin_si_to_uchar(ra)
+#define si_to_short(ra)     __builtin_si_to_short(ra)
+#define si_to_ushort(ra)    __builtin_si_to_ushort(ra)
+#define si_to_int(ra)       __builtin_si_to_int(ra)
+#define si_to_uint(ra)      __builtin_si_to_uint(ra)
+#define si_to_llong(ra)     __builtin_si_to_long(ra)
+#define si_to_ullong(ra)    __builtin_si_to_ulong(ra)
+#define si_to_float(ra)     __builtin_si_to_float(ra)
+#define si_to_double(ra)    __builtin_si_to_double(ra)
+#define si_to_ptr(ra)       __builtin_si_to_ptr(ra)
+
+#define __align_hint(ptr,base,offset) __builtin_spu_align_hint(ptr,base,offset)
+
+/* generic spu_* intrinsics */
+
+#define spu_splats(scalar)        __builtin_spu_splats(scalar) 
+#define spu_convtf(ra,imm)        __builtin_spu_convtf(ra,imm)
+#define spu_convts(ra,imm)        __builtin_spu_convts(ra,imm)
+#define spu_convtu(ra,imm)        __builtin_spu_convtu(ra,imm) 
+#define spu_extend(ra)            __builtin_spu_extend(ra) 
+#define spu_roundtf(ra)           __builtin_spu_roundtf(ra) 
+#define spu_add(ra,rb)            __builtin_spu_add(ra,rb) 
+#define spu_addx(ra,rb,rt)        __builtin_spu_addx(ra,rb,rt) 
+#define spu_genc(ra,rb)           __builtin_spu_genc(ra,rb) 
+#define spu_gencx(ra,rb,rt)       __builtin_spu_gencx(ra,rb,rt) 
+#define spu_madd(ra,rb,rc)        __builtin_spu_madd(ra,rb,rc)
+#define spu_nmadd(ra,rb,rc)       __builtin_spu_nmadd(ra,rb,rc)
+#define spu_mhhadd(ra,rb,rc)      __builtin_spu_mhhadd(ra,rb,rc)
+#define spu_msub(ra,rb,rc)        __builtin_spu_msub(ra,rb,rc) 
+#define spu_mul(ra,rb)            __builtin_spu_mul(ra,rb) 
+#define spu_mulh(ra,rb)           __builtin_spu_mulh(ra,rb) 
+#define spu_mule(ra,rb)           __builtin_spu_mule(ra,rb) 
+#define spu_mulo(ra,rb)           __builtin_spu_mulo(ra,rb) 
+#define spu_mulsr(ra,rb)          __builtin_spu_mulsr(ra,rb) 
+#define spu_nmsub(ra,rb,rc)       __builtin_spu_nmsub(ra,rb,rc) 
+#define spu_sub(ra,rb)            __builtin_spu_sub(ra,rb)
+#define spu_subx(ra,rb,rt)        __builtin_spu_subx(ra,rb,rt) 
+#define spu_genb(ra,rb)           __builtin_spu_genb(ra,rb) 
+#define spu_genbx(ra,rb,rt)       __builtin_spu_genbx(ra,rb,rt) 
+#define spu_absd(ra,rb)           __builtin_spu_absd(ra,rb) 
+#define spu_avg(ra,rb)            __builtin_spu_avg(ra,rb) 
+#define spu_sumb(ra,rb)           __builtin_spu_sumb(ra,rb) 
+#define spu_bisled(ra)            __builtin_spu_bisled(ra, 0)
+#define spu_bisled_d(ra)          __builtin_spu_bisled_d(ra, 0)
+#define spu_bisled_e(ra)          __builtin_spu_bisled_e(ra, 0)
+#define spu_cmpabseq(ra,rb)       __builtin_spu_cmpabseq(ra,rb) 
+#define spu_cmpabsgt(ra,rb)       __builtin_spu_cmpabsgt(ra,rb) 
+#define spu_cmpeq(ra,rb)          __builtin_spu_cmpeq(ra,rb) 
+#define spu_cmpgt(ra,rb)          __builtin_spu_cmpgt(ra,rb) 
+#define spu_testsv(ra,imm)        __builtin_spu_testsv(ra,imm) 
+#define spu_hcmpeq(ra,rb)         __builtin_spu_hcmpeq(ra,rb) 
+#define spu_hcmpgt(ra,rb)         __builtin_spu_hcmpgt(ra,rb) 
+#define spu_cntb(ra)              __builtin_spu_cntb(ra) 
+#define spu_cntlz(ra)             __builtin_spu_cntlz(ra) 
+#define spu_gather(ra)            __builtin_spu_gather(ra) 
+#define spu_maskb(ra)             __builtin_spu_maskb(ra) 
+#define spu_maskh(ra)             __builtin_spu_maskh(ra) 
+#define spu_maskw(ra)             __builtin_spu_maskw(ra) 
+#define spu_sel(ra,rb,rc)         __builtin_spu_sel(ra,rb,rc) 
+#define spu_shuffle(ra,rb,rc)     __builtin_spu_shuffle(ra,rb,rc) 
+#define spu_and(ra,rb)            __builtin_spu_and(ra,rb) 
+#define spu_andc(ra,rb)           __builtin_spu_andc(ra,rb) 
+#define spu_eqv(ra,rb)            __builtin_spu_eqv(ra,rb) 
+#define spu_nand(ra,rb)           __builtin_spu_nand(ra,rb)
+#define spu_nor(ra,rb)            __builtin_spu_nor(ra,rb) 
+#define spu_or(ra,rb)             __builtin_spu_or(ra,rb) 
+#define spu_orc(ra,rb)            __builtin_spu_orc(ra,rb) 
+#define spu_orx(ra)               __builtin_spu_orx(ra)
+#define spu_xor(ra,rb)            __builtin_spu_xor(ra,rb) 
+#define spu_rl(ra,rb)             __builtin_spu_rl(ra,rb) 
+#define spu_rlqw(ra,count)        __builtin_spu_rlqw(ra,count) 
+#define spu_rlqwbyte(ra,count)    __builtin_spu_rlqwbyte(ra,count) 
+#define spu_rlqwbytebc(ra,count)  __builtin_spu_rlqwbytebc(ra,count) 
+#define spu_rlmask(ra,rb)         __builtin_spu_rlmask(ra,rb) 
+#define spu_rlmaska(ra,rb)        __builtin_spu_rlmaska(ra,rb) 
+#define spu_rlmaskqw(ra,rb)       __builtin_spu_rlmaskqw(ra,rb) 
+#define spu_rlmaskqwbyte(ra,rb)   __builtin_spu_rlmaskqwbyte(ra,rb) 
+#define spu_rlmaskqwbytebc(ra,rb) __builtin_spu_rlmaskqwbytebc(ra,rb) 
+#define spu_sl(ra,rb)             __builtin_spu_sl(ra,rb) 
+#define spu_slqw(ra,rb)           __builtin_spu_slqw(ra,rb) 
+#define spu_slqwbyte(ra,rb)       __builtin_spu_slqwbyte(ra,rb) 
+#define spu_slqwbytebc(ra,rb)     __builtin_spu_slqwbytebc(ra,rb) 
+#define spu_sr(ra,rb)             __builtin_spu_sr(ra,rb) 
+#define spu_sra(ra,rb)            __builtin_spu_sra(ra,rb) 
+#define spu_srqw(ra,rb)           __builtin_spu_srqw(ra,rb) 
+#define spu_srqwbyte(ra,rb)       __builtin_spu_srqwbyte(ra,rb) 
+#define spu_srqwbytebc(ra,rb)     __builtin_spu_srqwbytebc(ra,rb) 
+#define spu_extract(ra,pos)       __builtin_spu_extract(ra,pos) 
+#define spu_insert(scalar,ra,pos) __builtin_spu_insert(scalar,ra,pos) 
+#define spu_promote(scalar,pos)   __builtin_spu_promote(scalar,pos) 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The type checking for some of these won't be accurate but they need
+ * to be defines because of the immediate values. */
+#define spu_idisable()          __builtin_spu_idisable()
+#define spu_ienable()           __builtin_spu_ienable()
+#define spu_mfspr(imm)          si_to_uint(si_mfspr((imm)))
+#define spu_mtspr(imm, ra)      si_mtspr((imm),si_from_uint (ra))
+#define spu_mffpscr()           ((vec_uint4)si_fscrrd())
+#define spu_mtfpscr(a)          si_fscrwr((qword)a)
+#define spu_dsync()             si_dsync() 
+#define spu_stop(imm)           si_stop(imm)
+#define spu_sync()              si_sync()
+#define spu_sync_c()            si_syncc()
+#define spu_readch(imm)         si_to_uint(si_rdch((imm)))
+#define spu_readchqw(imm)       ((vec_uint4)si_rdch((imm)))
+#define spu_readchcnt(imm)      si_to_uint(si_rchcnt((imm)))
+#define spu_writech(imm, ra)    si_wrch((imm), si_from_uint(ra))
+#define spu_writechqw(imm, ra)  si_wrch((imm), (qword)(ra))
+
+/* The following functions are static and always_inline to make sure
+ * they don't show up in object files which they aren't used in.  */
+
+static __inline__ vec_float4 spu_re (vec_float4 ra) __attribute__((__always_inline__));
+static __inline__ vec_float4 spu_rsqrte (vec_float4 ra) __attribute__((__always_inline__));
+
+static __inline__ vec_float4
+spu_re (vec_float4 ra)
+{
+  return (vec_float4) si_fi ((qword) (ra), si_frest ((qword) (ra)));
+}
+static __inline__ vec_float4
+spu_rsqrte (vec_float4 ra)
+{
+  return (vec_float4) si_fi ((qword) (ra), si_frsqest ((qword) (ra)));
+}
+
+/* composite intrinsics */
+static __inline__ void spu_mfcdma32(volatile void *ls, unsigned int ea, unsigned int size, unsigned int tagid, unsigned int cmd) __attribute__((__always_inline__));
+static __inline__ void spu_mfcdma64(volatile void *ls, unsigned int eahi, unsigned int ealow, unsigned int size, unsigned int tagid, unsigned int cmd) __attribute__((__always_inline__));
+static __inline__ unsigned int spu_mfcstat(unsigned int type) __attribute__((__always_inline__));
+
+static __inline__ void
+spu_mfcdma32(volatile void *ls, unsigned int ea, unsigned int size, unsigned int tagid, unsigned int cmd)
+{
+      si_wrch(MFC_LSA,si_from_ptr(ls));
+      si_wrch(MFC_EAL,si_from_uint(ea));
+      si_wrch(MFC_Size,si_from_uint(size));
+      si_wrch(MFC_TagID,si_from_uint(tagid));
+      si_wrch(MFC_Cmd,si_from_uint(cmd));
+}
+static __inline__ void
+spu_mfcdma64(volatile void *ls, unsigned int eahi, unsigned int ealow, unsigned int size, unsigned int tagid, unsigned int cmd)
+{
+      si_wrch(MFC_LSA,si_from_ptr(ls));
+      si_wrch(MFC_EAH,si_from_uint(eahi));
+      si_wrch(MFC_EAL,si_from_uint(ealow));
+      si_wrch(MFC_Size,si_from_uint(size));
+      si_wrch(MFC_TagID,si_from_uint(tagid));
+      si_wrch(MFC_Cmd,si_from_uint(cmd));
+}
+static __inline__ unsigned int
+spu_mfcstat(unsigned int type)
+{
+      si_wrch(MFC_WrTagUpdate,si_from_uint(type));
+      return si_to_uint(si_rdch(MFC_RdTagStat));
+}
+#ifdef __cplusplus
+
+}
+#endif  /* __cplusplus */
+
+#endif /* SPUINTRIN_H */
+
diff --git a/gcc-4.9/gcc/config/spu/spu_intrinsics.h b/gcc-4.9/gcc/config/spu/spu_intrinsics.h
new file mode 100644
index 000000000..488b428b2
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu_intrinsics.h
@@ -0,0 +1,83 @@
+/* Definitions of Synergistic Processing Unit (SPU). */
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef  _SPU_INTRINSICS_H
+#define _SPU_INTRINSICS_H 
+ 
+#define vec_uchar16             __vector unsigned char
+#define vec_char16              __vector   signed char
+#define vec_ushort8             __vector unsigned short
+#define vec_short8              __vector   signed short
+#define vec_uint4               __vector unsigned int
+#define vec_int4                __vector   signed int
+#define vec_ullong2             __vector unsigned long long
+#define vec_llong2              __vector   signed long long
+#define vec_float4              __vector          float
+#define vec_double2             __vector          double
+
+/* SPU Channel Defines 
+ */
+#define SPU_RdEventStat		 0
+#define SPU_WrEventMask		 1
+#define SPU_WrEventAck		 2
+#define SPU_RdSigNotify1	 3
+#define SPU_RdSigNotify2	 4
+#define SPU_WrDec		 7
+#define SPU_RdDec		 8
+#define SPU_RdEventMask		11
+#define SPU_RdMachStat		13
+#define SPU_WrSRR0		14
+#define SPU_RdSRR0		15
+#define SPU_WrOutMbox		28 
+#define SPU_RdInMbox		29 
+#define SPU_WrOutIntrMbox	30 
+
+/* MFC Channel Defines. 
+ */
+#define MFC_WrMSSyncReq		 9
+#define MFC_RdTagMask		12
+#define MFC_LSA			16 
+#define MFC_EAH			17 
+#define MFC_EAL			18 
+#define MFC_Size		19 
+#define MFC_TagID		20 
+#define MFC_Cmd			21 
+#define MFC_WrTagMask		22 
+#define MFC_WrTagUpdate		23 
+#define MFC_RdTagStat		24 
+#define MFC_RdListStallStat	25 
+#define MFC_WrListStallAck	26 
+#define MFC_RdAtomicStat	27 
+
+/* Bit flag mnemonics for test special value.
+ */
+#define SPU_SV_NEG_DENORM       0x01    /* negative denormalized number  */
+#define SPU_SV_POS_DENORM       0x02    /* positive denormalized number  */
+#define SPU_SV_NEG_ZERO         0x04    /* negative zero                 */
+#define SPU_SV_POS_ZERO         0x08    /* positive zero                 */
+#define SPU_SV_NEG_INFINITY     0x10    /* negative infinity             */
+#define SPU_SV_POS_INFINITY     0x20    /* positive infinity             */
+#define SPU_SV_NAN              0x40    /* not a number                  */
+
+#include <spu_internals.h>
+
+#endif /* _SPU_INTRINSICS_H */
diff --git a/gcc-4.9/gcc/config/spu/spu_mfcio.h b/gcc-4.9/gcc/config/spu/spu_mfcio.h
new file mode 100644
index 000000000..bb3a1cb51
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/spu_mfcio.h
@@ -0,0 +1,342 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __SPU_MFCIO_H__
+#define __SPU_MFCIO_H__ 1
+
+#include <spu_intrinsics.h>
+#ifdef __IN_LIBGCC2
+typedef unsigned long long uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/****************************************************************/
+/* DMA list element structure*/
+/****************************************************************/
+ 
+#ifdef __GNUC__
+__extension__
+#endif
+typedef struct mfc_list_element {
+  uint64_t notify       :  1;   /** Stall-and-notify bit  */
+  uint64_t reserved     : 16;
+  uint64_t size         : 15;   /** Transfer size */
+  uint64_t eal          : 32;   /** Lower word of effective address */
+} mfc_list_element_t;
+ 
+/****************************************************************/
+/* DMA max/min size definitions.                        */
+/****************************************************************/
+
+#define MFC_MIN_DMA_SIZE_SHIFT  4      /* 16 bytes */
+#define MFC_MAX_DMA_SIZE_SHIFT 14      /* 16384 bytes */
+
+#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT)
+#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT)
+
+#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1)
+#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1)
+
+#define MFC_MIN_DMA_LIST_ELEMENTS 1
+#define MFC_MAX_DMA_LIST_ELEMENTS 2048
+
+#define MFC_MIN_DMA_LIST_SIZE (MFC_MIN_DMA_LIST_ELEMENTS << 3) /*   8 bytes */
+#define MFC_MAX_DMA_LIST_SIZE (MFC_MAX_DMA_LIST_ELEMENTS << 3) /* 16K bytes */
+
+/****************************************************************/
+/* MFC DMA command modifiers to identify classes of operations. */
+/****************************************************************/
+
+/* Note: These commands modifier may be used in conjunction with the base
+   command types (i.e. MFC_PUT_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD)
+   to construct the various command permutations.  */
+
+#define MFC_BARRIER_ENABLE    0x0001
+#define MFC_FENCE_ENABLE      0x0002
+#define MFC_LIST_ENABLE       0x0004
+#define MFC_RESULT_ENABLE     0x0010
+
+/****************************************************************/
+/* MFC DMA Put Commands                                 */
+/****************************************************************/
+
+#define MFC_PUT_CMD          0x0020
+#define MFC_PUTB_CMD         (MFC_PUT_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTF_CMD         (MFC_PUT_CMD | MFC_FENCE_ENABLE)
+#define MFC_PUTL_CMD         (MFC_PUT_CMD | MFC_LIST_ENABLE)
+#define MFC_PUTLB_CMD        (MFC_PUTL_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTLF_CMD        (MFC_PUTL_CMD | MFC_FENCE_ENABLE)
+
+#define MFC_PUTR_CMD         (MFC_PUT_CMD | MFC_RESULT_ENABLE)
+#define MFC_PUTRB_CMD        (MFC_PUTR_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTRF_CMD        (MFC_PUTR_CMD | MFC_FENCE_ENABLE)
+#define MFC_PUTRL_CMD        (MFC_PUTR_CMD | MFC_LIST_ENABLE)
+#define MFC_PUTRLB_CMD       (MFC_PUTRL_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTRLF_CMD       (MFC_PUTRL_CMD | MFC_FENCE_ENABLE)
+
+/****************************************************************/
+/* MFC DMA Get Commands                                 */
+/****************************************************************/
+
+#define MFC_GET_CMD          0x0040
+#define MFC_GETB_CMD         (MFC_GET_CMD | MFC_BARRIER_ENABLE)
+#define MFC_GETF_CMD         (MFC_GET_CMD | MFC_FENCE_ENABLE)
+#define MFC_GETL_CMD         (MFC_GET_CMD | MFC_LIST_ENABLE)
+#define MFC_GETLB_CMD        (MFC_GETL_CMD | MFC_BARRIER_ENABLE)
+#define MFC_GETLF_CMD        (MFC_GETL_CMD | MFC_FENCE_ENABLE)
+
+/****************************************************************/
+/* MFC Synchronization Commands                           */
+/****************************************************************/
+
+#define MFC_SNDSIG_CMD       0x00A0
+#define MFC_SNDSIGB_CMD      (MFC_SNDSIG_CMD | MFC_BARRIER_ENABLE)
+#define MFC_SNDSIGF_CMD      (MFC_SNDSIG_CMD | MFC_FENCE_ENABLE)
+#define MFC_BARRIER_CMD      0x00C0
+#define MFC_EIEIO_CMD        0x00C8
+#define MFC_SYNC_CMD         0x00CC
+
+/****************************************************************/
+/* MFC Atomic Commands                                 */
+/****************************************************************/
+
+#define MFC_GETLLAR_CMD      0x00D0
+#define MFC_PUTLLC_CMD       0x00B4
+#define MFC_PUTLLUC_CMD      0x00B0
+#define MFC_PUTQLLUC_CMD     0x00B8
+
+/****************************************************************/
+/* MFC SL1 Storage Control Commands                             */
+/****************************************************************/
+
+#define MFC_SDCRT_CMD        0x0080
+#define MFC_SDCRTST_CMD      0x0081
+#define MFC_SDCRZ_CMD        0x0089
+#define MFC_SDCRST_CMD       0x008D
+#define MFC_SDCRF_CMD        0x008F
+
+/****************************************************************/
+/* Channel Defines                                    */
+/****************************************************************/
+
+/* Events Defines for channels
+ *    0 (SPU_RdEventStat),
+ *    1 (SPU_WrEventMask), and
+ *    2 (SPU_WrEventAck).
+ */
+#define MFC_TAG_STATUS_UPDATE_EVENT         0x00000001
+#define MFC_LIST_STALL_NOTIFY_EVENT         0x00000002
+#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT   0x00000008
+#define MFC_IN_MBOX_AVAILABLE_EVENT         0x00000010
+#define MFC_DECREMENTER_EVENT               0x00000020
+#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT   0x00000040
+#define MFC_OUT_MBOX_AVAILABLE_EVENT        0x00000080
+#define MFC_SIGNAL_NOTIFY_2_EVENT           0x00000100
+#define MFC_SIGNAL_NOTIFY_1_EVENT           0x00000200
+#define MFC_LLR_LOST_EVENT                  0x00000400
+#define MFC_PRIV_ATTN_EVENT                 0x00000800
+#define MFC_MULTI_SRC_SYNC_EVENT            0x00001000
+
+/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate) */
+#define MFC_TAG_UPDATE_IMMEDIATE   0x0
+#define MFC_TAG_UPDATE_ANY         0x1
+#define MFC_TAG_UPDATE_ALL         0x2
+
+/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat) */
+#define MFC_PUTLLC_STATUS    0x00000001
+#define MFC_PUTLLUC_STATUS   0x00000002
+#define MFC_GETLLAR_STATUS   0x00000004
+
+
+/****************************************************************/
+/* Definitions for constructing a 32-bit command word         */
+/* including the transfer and replacement class id and the      */
+/* command opcode.                                    */
+/****************************************************************/
+#define MFC_CMD_WORD(_tid, _rid, _cmd) (((_tid)<<24)|((_rid)<<16)|(_cmd))
+
+
+/* Addressing Utilities */
+#define mfc_ea2h(ea)   (unsigned int)((unsigned long long)(ea)>>32)
+#define mfc_ea2l(ea)   (unsigned int)(ea)
+#define mfc_hl2ea(h,l)   si_to_ullong(si_selb(si_from_uint(h),\
+                                  si_rotqbyi(si_from_uint(l), -4),\
+                                  si_fsmbi(0x0f0f)))
+#define mfc_ceil128(v)   (((v) + 127) & ~127)
+
+/* MFC DMA */
+#define mfc_put(  ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUT_CMD))
+#define mfc_putf( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTF_CMD))
+#define mfc_putb( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTB_CMD))
+#define mfc_get(  ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GET_CMD))
+#define mfc_getf( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETF_CMD))
+#define mfc_getb( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETB_CMD))
+
+/* MFC list DMA */
+#define mfc_putl(  ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTL_CMD))
+#define mfc_putlf( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTLF_CMD))
+#define mfc_putlb( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTLB_CMD))
+#define mfc_getl(  ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETL_CMD))
+#define mfc_getlf( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETLF_CMD))
+#define mfc_getlb( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETLB_CMD))
+
+/* MFC Atomic Update DMA */
+#define mfc_getllar( ls,ea,tid,rid)     spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,  0,MFC_CMD_WORD(tid,rid,MFC_GETLLAR_CMD))
+#define mfc_putllc(  ls,ea,tid,rid)     spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,  0,MFC_CMD_WORD(tid,rid,MFC_PUTLLC_CMD))
+#define mfc_putlluc( ls,ea,tid,rid)     spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,  0,MFC_CMD_WORD(tid,rid,MFC_PUTLLUC_CMD))
+#define mfc_putqlluc(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,tag,MFC_CMD_WORD(tid,rid,MFC_PUTQLLUC_CMD))
+
+/* MFC Synchronization Commands */
+#define mfc_sndsig( ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIG_CMD))
+#define mfc_sndsigb(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIGB_CMD))
+#define mfc_sndsigf(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIGF_CMD))
+#define mfc_barrier(tag)       spu_mfcdma32(0,0,0,tag,MFC_BARRIER_CMD)
+#define mfc_eieio(tag,tid,rid) spu_mfcdma32(0,0,0,tag,MFC_CMD_WORD(tid,rid,MFC_EIEIO_CMD))
+#define mfc_sync(tag)          spu_mfcdma32(0,0,0,tag,MFC_SYNC_CMD)
+
+/* MFC SL1 Storage Control Commands */
+#define mfc_sdcrt(  ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRT_CMD))
+#define mfc_sdcrtst(ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRTST_CMD))
+#define mfc_sdcrz(  ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRZ_CMD))
+#define mfc_sdcrst( ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRST_CMD))
+#define mfc_sdcrf(  ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRF_CMD))
+
+/* DMA Queue */
+#define mfc_stat_cmd_queue()          spu_readchcnt(MFC_Cmd)
+
+/* MFC Tag-Status */
+#define mfc_write_tag_mask(mask)      spu_writech(MFC_WrTagMask,mask)
+#define mfc_read_tag_mask()           spu_readch(MFC_RdTagMask)
+
+#define mfc_write_tag_update(ts)         spu_writech(MFC_WrTagUpdate,ts)
+#define mfc_write_tag_update_immediate() mfc_write_tag_update(MFC_TAG_UPDATE_IMMEDIATE)
+#define mfc_write_tag_update_any()       mfc_write_tag_update(MFC_TAG_UPDATE_ANY)
+#define mfc_write_tag_update_all()       mfc_write_tag_update(MFC_TAG_UPDATE_ALL)
+#define mfc_stat_tag_update()            spu_readchcnt(MFC_WrTagUpdate)
+
+#define mfc_read_tag_status()            spu_readch(MFC_RdTagStat)
+#define mfc_read_tag_status_immediate()  (mfc_write_tag_update_immediate(), mfc_read_tag_status())
+#define mfc_read_tag_status_any()        (mfc_write_tag_update_any(), mfc_read_tag_status())
+#define mfc_read_tag_status_all()        (mfc_write_tag_update_all(), mfc_read_tag_status())
+#define mfc_stat_tag_status()            spu_readchcnt(MFC_RdTagStat)
+
+/* MFC List Stall-and-Notify Tag */
+#define mfc_read_list_stall_status()     spu_readch(MFC_RdListStallStat)
+#define mfc_stat_list_stall_status()     spu_readchcnt(MFC_RdListStallStat)
+#define mfc_write_list_stall_ack(tag)    spu_writech(MFC_WrListStallAck,tag)
+
+/* Atomic DMA */
+#define mfc_read_atomic_status()      spu_readch(MFC_RdAtomicStat)
+#define mfc_stat_atomic_status()      spu_readchcnt(MFC_RdAtomicStat)
+
+/* MFC Multi-source Synchronization */
+#define mfc_write_multi_src_sync_request()   spu_writech(MFC_WrMSSyncReq,0)
+#define mfc_stat_multi_src_sync_request()    spu_readchcnt(MFC_WrMSSyncReq)
+
+/* SPU Signal */
+#define spu_read_signal1()            spu_readch(SPU_RdSigNotify1)
+#define spu_stat_signal1()            spu_readchcnt(SPU_RdSigNotify1)
+#define spu_read_signal2()            spu_readch(SPU_RdSigNotify2)
+#define spu_stat_signal2()            spu_readchcnt(SPU_RdSigNotify2)
+
+/* SPU/PPE Mailbox */
+#define spu_read_in_mbox()            spu_readch(SPU_RdInMbox)
+#define spu_stat_in_mbox()            spu_readchcnt(SPU_RdInMbox)
+#define spu_write_out_mbox(a)         spu_writech(SPU_WrOutMbox,a)
+#define spu_stat_out_mbox()           spu_readchcnt(SPU_WrOutMbox)
+#define spu_write_out_intr_mbox(a)    spu_writech(SPU_WrOutIntrMbox,a)
+#define spu_stat_out_intr_mbox()      spu_readchcnt(SPU_WrOutIntrMbox)
+
+/* SPU Decrementer */
+#define spu_read_decrementer()        spu_readch(SPU_RdDec)
+#define spu_write_decrementer(cnt)    spu_writech(SPU_WrDec,(cnt))
+
+/* SPU Event */
+#define spu_read_event_status()       spu_readch(SPU_RdEventStat)
+#define spu_stat_event_status()       spu_readchcnt(SPU_RdEventStat)
+#define spu_write_event_mask(mask)    spu_writech(SPU_WrEventMask,(mask))
+#define spu_write_event_ack(ack)      spu_writech(SPU_WrEventAck,(ack))
+#define spu_read_event_mask()         spu_readch(SPU_RdEventMask)
+
+/* SPU State Management */
+#define spu_read_machine_status()     spu_readch(SPU_RdMachStat)
+#define spu_write_srr0(srr0)          spu_writech(SPU_WrSRR0,srr0)
+#define spu_read_srr0()               spu_readch(SPU_RdSRR0)
+
+/* Interrupt-Safe Critical Sections */
+
+static __inline__ unsigned int mfc_begin_critical_section (void)
+  __attribute__ ((__always_inline__));
+
+static __inline__ unsigned int
+mfc_begin_critical_section (void)
+{
+#ifdef SPU_MFCIO_INTERRUPT_SAFE
+  unsigned int __status = spu_read_machine_status ();
+  spu_idisable ();
+  return __status;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void mfc_end_critical_section (unsigned int)
+  __attribute__ ((__always_inline__));
+
+static __inline__ void
+mfc_end_critical_section (unsigned int __status __attribute__ ((__unused__)))
+{
+#ifdef SPU_MFCIO_INTERRUPT_SAFE
+  if (__status & 1)
+    spu_ienable ();
+#endif
+}
+
+/* MFC Tag Manager */
+
+#define MFC_TAG_INVALID 0xFFFFFFFF
+#define MFC_TAG_VALID   0x00000000
+
+#define mfc_tag_reserve() \
+	__mfc_tag_reserve()
+#define mfc_tag_release(tag) \
+	__mfc_tag_release((tag))
+#define mfc_multi_tag_reserve(nr_tags) \
+	__mfc_multi_tag_reserve((nr_tags))
+#define mfc_multi_tag_release(tag, nr_tags) \
+	__mfc_multi_tag_release((tag),(nr_tags))
+
+extern unsigned int __mfc_tag_reserve (void);
+extern unsigned int __mfc_tag_release (unsigned int);
+extern unsigned int __mfc_multi_tag_reserve (unsigned int);
+extern unsigned int __mfc_multi_tag_release (unsigned int, unsigned int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __SPU_MFCIO_H__ */
diff --git a/gcc-4.9/gcc/config/spu/t-spu-elf b/gcc-4.9/gcc/config/spu/t-spu-elf
new file mode 100644
index 000000000..4987537f8
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/t-spu-elf
@@ -0,0 +1,34 @@
+#  Copyright (C) 2006-2014 Free Software Foundation, Inc.
+#
+#  This file is free software; you can redistribute it and/or modify it under
+#  the terms of the GNU General Public License as published by the Free
+#  Software Foundation; either version 3 of the License, or (at your option) 
+#  any later version.
+#
+#  This file is distributed in the hope that it will be useful, but WITHOUT
+#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+#  for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.
+
+# Multi-lib support.
+MULTILIB_OPTIONS=mea64
+
+spu.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(REGS_H) hard-reg-set.h dumpfile.h \
+  real.h insn-config.h conditions.h insn-attr.h flags.h $(RECOG_H) \
+  $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \
+  output.h $(BASIC_BLOCK_H) $(GGC_H) $(HASHTAB_H) \
+  $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h $(CFGLOOP_H) \
+  $(srcdir)/config/spu/spu-protos.h \
+  $(srcdir)/config/spu/spu-builtins.def 
+
+spu-c.o: $(srcdir)/config/spu/spu-c.c \
+    $(srcdir)/config/spu/spu-protos.h \
+    $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(CPPLIB_H) \
+    $(TM_P_H) $(C_COMMON_H) $(C_PRAGMA_H) coretypes.h $(TM_H) insn-codes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/spu/spu-c.c
diff --git a/gcc-4.9/gcc/config/spu/vec_types.h b/gcc-4.9/gcc/config/spu/vec_types.h
new file mode 100644
index 000000000..93816193e
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/vec_types.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _VEC_TYPES_H_
+#define _VEC_TYPES_H_	1
+
+#include <spu_intrinsics.h>
+
+/* Define additional PowerPC SIMD/Vector Multi-media eXtension
+ * single keyword vector data types for use in mapping VMX code
+ * to the SPU.
+ */
+#define vec_bchar16	__vector unsigned char
+#define vec_bshort8	__vector unsigned short
+#define vec_pixel8	__vector unsigned short
+#define vec_bint4	__vector unsigned int
+
+#endif /* _VEC_TYPES_H_ */
diff --git a/gcc-4.9/gcc/config/spu/vmx2spu.h b/gcc-4.9/gcc/config/spu/vmx2spu.h
new file mode 100644
index 000000000..d02690d7e
--- /dev/null
+++ b/gcc-4.9/gcc/config/spu/vmx2spu.h
@@ -0,0 +1,3985 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _VMX2SPU_H_
+#define _VMX2SPU_H_	1
+
+#ifdef __cplusplus
+
+#ifdef __SPU__
+
+#include <spu_intrinsics.h>
+#include <vec_types.h>
+
+/* This file maps generic VMX intrinsics and predicates to the SPU using 
+ * overloaded C++ functions.
+ */
+
+/************************************************************************
+ *                        INTRINSICS 
+ ************************************************************************/
+
+/* vec_abs (vector absolute value)
+ * =======
+ */
+static inline vec_char16 vec_abs(vec_char16 a)
+{
+  vec_char16 minus_a;
+
+  minus_a = (vec_char16)(spu_add((vec_ushort8)(spu_and(spu_xor(a, 0xFF), 0x7F)), 0x101));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_short8 vec_abs(vec_short8 a)
+{
+  return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_int4 vec_abs(vec_int4 a)
+{
+  return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_float4 vec_abs(vec_float4 a)
+{
+  return ((vec_float4)(spu_rlmask(spu_sl((vec_uint4)(a), 1), -1)));
+}
+
+/* vec_abss (vector absolute value saturate)
+ * ========
+ */
+static inline vec_char16 vec_abss(vec_char16 a)
+{
+  vec_char16 minus_a;
+
+  minus_a = (vec_char16)spu_add((vec_short8)(spu_xor(a, -1)), 
+				(vec_short8)(spu_and(spu_cmpgt((vec_uchar16)(a), 0x80), 1)));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_short8 vec_abss(vec_short8 a)
+{
+  vec_short8 minus_a;
+
+  minus_a = spu_add(spu_sub(0, a), (vec_short8)(spu_cmpeq(a, ((vec_short8){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}))));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_int4 vec_abss(vec_int4 a)
+{
+  vec_int4 minus_a;
+
+  minus_a = spu_add(spu_sub(0, a), (vec_int4)(spu_cmpeq(a, ((vec_int4){0x80000000,0x80000000,0x80000000,0x80000000}))));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+
+/* vec_add (vector add)
+ * =======
+ */
+static inline vec_uchar16 vec_add(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(spu_sel(spu_add((vec_ushort8)(a), (vec_ushort8)(b)),
+				spu_add(spu_and((vec_ushort8)(a), 0xFF00), spu_and((vec_ushort8)(b), 0xFF00)),
+				spu_splats((unsigned short)(0xFF00)))));
+}
+
+static inline vec_char16 vec_add(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
+}
+
+static inline vec_char16 vec_add(vec_bchar16 a, vec_char16 b)
+{
+  return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
+}
+
+static inline vec_char16 vec_add(vec_char16 a, vec_bchar16 b)
+{
+  return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
+}
+
+static inline vec_ushort8 vec_add(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_short8 vec_add(vec_short8 a, vec_short8 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_short8 vec_add(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_add((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_add(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_add(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_add(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_int4 vec_add(vec_int4 a, vec_int4 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_int4 vec_add(vec_bint4 a, vec_int4 b)
+{
+  return (spu_add((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_add(vec_int4 a, vec_bint4 b)
+{
+  return (spu_add(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_add(vec_float4 a, vec_float4 b)
+{
+  return (spu_add(a, b));
+}
+
+/* vec_addc (vector add carryout unsigned word)
+ * ========
+ */
+#define vec_addc(_a, _b)	spu_genc(_a, _b)
+
+/* vec_adds (vector add saturated)
+ * ========
+ */
+static inline vec_uchar16 vec_adds(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_uchar16 s1, s2, s, d;
+
+  s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)));
+  s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)));
+  s  = spu_shuffle(s1, s2, ((vec_uchar16){0, 16,  2, 18,  4, 20,  6, 22,
+				          8, 24, 10, 26, 12, 28, 14, 30}));
+  d  = spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+				          9, 25, 11, 27, 13, 29, 15, 31}));
+  return (spu_or(d, spu_cmpeq(s, 1)));
+}
+
+static inline vec_char16 vec_adds(vec_char16 a, vec_char16 b)
+{
+  vec_uchar16 s1, s2, s, d;
+
+  s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)));
+  s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)));
+  s  = spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+				          9, 25, 11, 27, 13, 29, 15, 31}));
+  d = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_and(s, (vec_uchar16)(spu_nor(a, b))), 0x7F));
+  d = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_nor(s, (vec_uchar16)(spu_nand(a, b))), 0x7F));
+  return ((vec_char16)(d));
+}
+
+static inline vec_char16 vec_adds(vec_bchar16 a, vec_char16 b)
+{
+  return (vec_adds((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_adds(vec_char16 a, vec_bchar16 b)
+{
+  return (vec_adds(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_adds(vec_ushort8 a, vec_ushort8 b)
+{
+  vec_ushort8 s, d;
+  
+  s = spu_add(a, b);
+  d = spu_or(s, spu_rlmaska(spu_sel(spu_xor(s, -1), a, spu_eqv(a, b)), -15));
+  return (d);
+}
+
+static inline vec_short8 vec_adds(vec_short8 a, vec_short8 b)
+{
+  vec_short8 s, d;
+  
+  s = spu_add(a, b);
+  d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_and(s, spu_nor(a, b)), -15)));
+  d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_nor(s, spu_nand(a, b)), -15)));
+  return (d);
+}
+
+static inline vec_short8 vec_adds(vec_bshort8 a, vec_short8 b)
+{
+  return (vec_adds((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_adds(vec_short8 a, vec_bshort8 b)
+{
+  return (vec_adds(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_adds(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_or(spu_add(a, b), spu_rlmaska(spu_sl(spu_genc(a, b), 31), -31)));
+}
+
+static inline vec_int4 vec_adds(vec_int4 a, vec_int4 b)
+{
+  vec_int4 s, d;
+  
+  s = spu_add(a, b);
+  d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)spu_rlmaska(spu_and(s, spu_nor(a, b)), -31));
+  d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)spu_rlmaska(spu_nor(s, spu_nand(a, b)), -31));
+  return (d);
+}
+
+static inline vec_int4 vec_adds(vec_bint4 a, vec_int4 b)
+{
+  return (vec_adds((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_adds(vec_int4 a, vec_bint4 b)
+{
+  return (vec_adds(a, (vec_int4)(b)));
+}
+
+/* vec_and (vector logical and)
+ * =======
+ */
+static inline vec_uchar16 vec_and(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_char16 vec_and(vec_char16 a, vec_char16 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_char16 vec_and(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_and((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_and(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_and(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_and(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_short8 vec_and(vec_short8 a, vec_short8 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_short8 vec_and(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_and((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_and(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_and(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_and(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_int4 vec_and(vec_int4 a, vec_int4 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_int4 vec_and(vec_bint4 a, vec_int4 b)
+{
+  return (spu_and((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_and(vec_int4 a, vec_bint4 b)
+{
+  return (spu_and(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_and(vec_float4 a, vec_float4 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_float4 vec_and(vec_bint4 a, vec_float4 b)
+{
+  return (spu_and((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_and(vec_float4 a, vec_bint4 b)
+{
+  return (spu_and(a, (vec_float4)(b)));
+}
+
+
+/* vec_andc (vector logical and with complement) 
+ * ========
+ */
+static inline vec_uchar16 vec_andc(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_char16 vec_andc(vec_char16 a, vec_char16 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_char16 vec_andc(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_andc((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_andc(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_andc(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_andc(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_short8 vec_andc(vec_short8 a, vec_short8 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_short8 vec_andc(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_andc((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_andc(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_andc(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_andc(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_int4 vec_andc(vec_int4 a, vec_int4 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_int4 vec_andc(vec_bint4 a, vec_int4 b)
+{
+  return (spu_andc((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_andc(vec_int4 a, vec_bint4 b)
+{
+  return (spu_andc(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_andc(vec_float4 a, vec_float4 b)
+{
+  return (spu_andc(a,b));
+}
+
+static inline vec_float4 vec_andc(vec_bint4 a, vec_float4 b)
+{
+  return (spu_andc((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_andc(vec_float4 a, vec_bint4 b)
+{
+  return (spu_andc(a, (vec_float4)(b)));
+}
+
+/* vec_avg (vector average)
+ * =======
+ */
+static inline vec_uchar16 vec_avg(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_avg(a, b));
+}
+
+static inline vec_char16 vec_avg(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(spu_xor(spu_avg((vec_uchar16)(a), (vec_uchar16)(b)), 
+			       (vec_uchar16)(spu_and(spu_xor(a,b), 0x80)))));
+}
+
+static inline vec_ushort8 vec_avg(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+static inline vec_short8 vec_avg(vec_short8 a, vec_short8 b)
+{
+  return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+static inline vec_uint4 vec_avg(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+static inline vec_int4 vec_avg(vec_int4 a, vec_int4 b)
+{
+  return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+
+/* vec_ceil (vector ceiling)
+ * ========
+ */
+static inline vec_float4 vec_ceil(vec_float4 a)
+{
+  vec_int4  exp;
+  vec_uint4 mask;
+
+  a = spu_add(a, (vec_float4)(spu_and(spu_xor(spu_rlmaska((vec_int4)a, -31), -1), spu_splats((signed int)0x3F7FFFFF))));
+  exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+
+  return ((vec_float4)(spu_andc((vec_uint4)(a), mask)));
+}
+
+
+/* vec_cmpb (vector compare bounds floating-point)
+ * ========
+ */
+static inline vec_int4 vec_cmpb(vec_float4 a, vec_float4 b)
+{
+  vec_int4 b0 = (vec_int4)spu_splats(0x80000000);
+  vec_int4 b1 = (vec_int4)spu_splats(0x40000000);
+
+  return (spu_or(spu_and((vec_int4)spu_cmpgt(a, b), b0), 
+		 spu_and((vec_int4)spu_cmpgt(spu_xor(b, (vec_float4)(b0)), a), b1)));
+}
+
+/* vec_cmpeq (vector compare equal)
+ * =========
+ */
+#define vec_cmpeq(_a, _b)	spu_cmpeq(_a, _b)
+
+
+/* vec_cmpge (vector compare greater than or equal)
+ * =========
+ */
+static inline vec_bint4 vec_cmpge(vec_float4 a, vec_float4 b)
+{
+  return (spu_xor(spu_cmpgt(b, a), -1));
+}
+
+
+/* vec_cmpgt (vector compare greater than)
+ * =========
+ */
+#define vec_cmpgt(_a, _b)	spu_cmpgt(_a, _b)
+
+
+/* vec_cmple (vector compare less than or equal)
+ * =========
+ */
+static inline vec_bint4 vec_cmple(vec_float4 a, vec_float4 b)
+{
+  return (spu_xor(spu_cmpgt(a, b), -1));
+}
+
+
+/* vec_cmplt (vector compare less than)
+ * =========
+ */
+#define vec_cmplt(_a, _b)	spu_cmpgt(_b, _a)
+
+
+/* vec_ctf (vector convert from fixed-point word)
+ * =======
+ */
+#define vec_ctf(_a, _b)		spu_convtf(_a, _b)
+
+
+/* vec_cts (vector convert to signed fixed-point word saturate)
+ * =======
+ */
+#define vec_cts(_a, _b)		spu_convts(_a, _b)
+
+
+/* vec_ctu (vector convert to unsigned fixed-point word saturate)
+ * =======
+ */
+#define vec_ctu(_a, _b)		spu_convtu(_a, _b)
+
+
+/* vec_dss (vector data stream stop)
+ * =======
+ */
+#define vec_dss(_a)
+
+
+/* vec_dssall (vector data stream stop all)
+ * ==========
+ */
+#define vec_dssall()
+
+
+/* vec_dst (vector data stream touch)
+ * =======
+ */
+#define vec_dst(_a, _b, _c)
+
+
+/* vec_dstst (vector data stream touch for store)
+ * =========
+ */
+#define vec_dstst(_a, _b, _c)
+
+
+/* vec_dststt (vector data stream touch for store transient)
+ * ==========
+ */
+#define vec_dststt(_a, _b, _c)
+
+
+/* vec_dstt (vector data stream touch transient)
+ * ========
+ */
+#define vec_dstt(_a, _b, _c)
+
+
+/* vec_expte (vector is 2 raised tp the exponent estimate floating-point)
+ * =========
+ */
+static inline vec_float4 vec_expte(vec_float4 a)
+{
+  vec_float4 bias, frac, exp;
+  vec_int4 ia;
+
+  bias = (vec_float4)(spu_andc(spu_splats((signed int)0x3F7FFFFF), spu_rlmaska((vec_int4)(a), -31)));
+  ia   = spu_convts(spu_add(a, bias), 0);
+  frac = spu_sub(spu_convtf(ia, 0), a);
+  exp  = (vec_float4)(spu_sl(spu_add(ia, 127), 23));
+
+  return (spu_mul(spu_madd(spu_madd(spu_splats(0.17157287f), frac, spu_splats(-0.67157287f)),
+			   frac, spu_splats(1.0f)), exp));
+}
+
+
+/* vec_floor (vector floor)
+ * =========
+ */
+static inline vec_float4 vec_floor(vec_float4 a)
+{
+  vec_int4  exp;
+  vec_uint4 mask;
+
+  a = spu_sub(a, (vec_float4)(spu_and(spu_rlmaska((vec_int4)a, -31), spu_splats((signed int)0x3F7FFFFF))));
+  exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+
+  return ((vec_float4)(spu_andc((vec_uint4)(a), mask)));
+}
+
+
+/* vec_ld (vector load indexed)
+ * ======
+ */
+static inline vec_uchar16 vec_ld(int a, unsigned char *b)
+{
+  return (*((vec_uchar16 *)(b+a)));
+}
+
+static inline vec_uchar16 vec_ld(int a, vec_uchar16 *b)
+{
+  return (*((vec_uchar16 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_char16 vec_ld(int a, signed char *b)
+{
+  return (*((vec_char16 *)(b+a)));
+}
+
+static inline vec_char16 vec_ld(int a, vec_char16 *b)
+{
+  return (*((vec_char16 *)((signed char *)(b)+a)));
+}
+
+static inline vec_ushort8 vec_ld(int a, unsigned short *b)
+{
+  return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_ushort8 vec_ld(int a, vec_ushort8 *b)
+{
+  return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_short8 vec_ld(int a, signed short *b)
+{
+  return (*((vec_short8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_short8 vec_ld(int a, vec_short8 *b)
+{
+  return (*((vec_short8 *)((signed char *)(b)+a)));
+}
+
+static inline vec_uint4 vec_ld(int a, unsigned int *b)
+{
+  return (*((vec_uint4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_uint4 vec_ld(int a, vec_uint4 *b)
+{
+  return (*((vec_uint4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_int4 vec_ld(int a, signed int *b)
+{
+  return (*((vec_int4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_int4 vec_ld(int a, vec_int4 *b)
+{
+  return (*((vec_int4 *)((signed char *)(b)+a)));
+}
+
+static inline vec_float4 vec_ld(int a, float *b)
+{
+  return (*((vec_float4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_float4 vec_ld(int a, vec_float4 *b)
+{
+  return (*((vec_float4 *)((unsigned char *)(b)+a)));
+}
+
+/* vec_lde (vector load element indexed)
+ * =======
+ */
+static inline vec_uchar16 vec_lde(int a, unsigned char *b)
+{
+  return (*((vec_uchar16 *)(b+a)));
+}
+
+static inline vec_char16 vec_lde(int a, signed char *b)
+{
+  return (*((vec_char16 *)(b+a)));
+}
+
+static inline vec_ushort8 vec_lde(int a, unsigned short *b)
+{
+  return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_short8 vec_lde(int a, signed short *b)
+{
+  return (*((vec_short8 *)((unsigned char *)(b)+a)));
+}
+
+
+static inline vec_uint4 vec_lde(int a, unsigned int *b)
+{
+  return (*((vec_uint4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_int4 vec_lde(int a, signed int *b)
+{
+  return (*((vec_int4 *)((unsigned char *)(b)+a)));
+}
+
+
+static inline vec_float4 vec_lde(int a, float *b)
+{
+  return (*((vec_float4 *)((unsigned char *)(b)+a)));
+}
+
+/* vec_ldl (vector load indexed LRU)
+ * =======
+ */
+#define vec_ldl(_a, _b)		vec_ld(_a, _b)
+
+
+/* vec_loge (vector log2 estimate floating-point)
+ * ========
+ */
+static inline vec_float4 vec_loge(vec_float4 a)
+{
+  vec_int4 exp;
+  vec_float4 frac;
+
+  exp  = spu_add((vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)), -127);
+  frac = (vec_float4)(spu_sub((vec_int4)(a), spu_sl(exp, 23)));
+
+  return (spu_madd(spu_madd(spu_splats(-0.33985f), frac, spu_splats(2.01955f)), 
+		   frac, spu_sub(spu_convtf(exp, 0), spu_splats(1.6797f))));
+}
+
+
+/* vec_lvsl (vector load for shift left)
+ * ========
+ */
+static inline vec_uchar16 vec_lvsl(int a, unsigned char *b)
+{
+  return ((vec_uchar16)spu_add((vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))), 
+			       ((vec_ushort8){0x0001, 0x0203, 0x0405, 0x0607,
+				              0x0809, 0x0A0B, 0x0C0D, 0x0E0F})));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, signed char *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, unsigned short *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, short *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, unsigned int *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, int *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, float *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+
+/* vec_lvsr (vector load for shift right)
+ * ========
+ */
+static  inline vec_uchar16 vec_lvsr(int a, unsigned char *b)
+{
+  return ((vec_uchar16)(spu_sub(((vec_ushort8){0x1011, 0x1213, 0x1415, 0x1617,
+				               0x1819, 0x1A1B, 0x1C1D, 0x1E1F}),
+				(vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))))));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, signed char *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, unsigned short *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, short *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, unsigned int *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, int *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, float *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+/* vec_madd (vector multiply add)
+ * ========
+ */
+#define vec_madd(_a, _b, _c)	spu_madd(_a, _b, _c)
+
+
+
+/* vec_madds (vector multiply add saturate)
+ * =========
+ */
+static inline vec_short8 vec_madds(vec_short8 a, vec_short8 b, vec_short8 c)
+{
+  return (vec_adds(c, spu_sel((vec_short8)(spu_sl(spu_mule(a, b), 1)),
+			      (vec_short8)(spu_rlmask(spu_mulo(a, b), -15)),
+			      ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF}))));
+}
+
+/* vec_max (vector maximum)
+ * =======
+ */
+static inline vec_uchar16 vec_max(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_max(vec_char16 a, vec_char16 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_max(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_sel(b, (vec_char16)(a), spu_cmpgt((vec_char16)(a), b)));
+}
+
+static inline vec_char16 vec_max(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_sel((vec_char16)(b), a, spu_cmpgt(a, (vec_char16)(b))));
+}
+
+static inline vec_ushort8 vec_max(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_max(vec_short8 a, vec_short8 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_max(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_sel(b, (vec_short8)(a), spu_cmpgt((vec_short8)(a), b)));
+}
+
+static inline vec_short8 vec_max(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_sel((vec_short8)(b), a, spu_cmpgt(a, (vec_short8)(b))));
+}
+
+static inline vec_uint4 vec_max(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_max(vec_int4 a, vec_int4 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_max(vec_bint4 a, vec_int4 b)
+{
+  return (spu_sel(b, (vec_int4)(a), spu_cmpgt((vec_int4)(a), b)));
+}
+
+static inline vec_int4 vec_max(vec_int4 a, vec_bint4 b)
+{
+  return (spu_sel((vec_int4)(b), a, spu_cmpgt(a, (vec_int4)(b))));
+}
+
+static inline vec_float4 vec_max(vec_float4 a, vec_float4 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+
+/* vec_mergeh (vector merge high)
+ * ==========
+ */
+static inline vec_uchar16 vec_mergeh(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19,
+				           4, 20, 5, 21, 6, 22, 7, 23})));
+}
+
+static inline vec_char16 vec_mergeh(vec_char16 a, vec_char16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19,
+				           4, 20, 5, 21, 6, 22, 7, 23})));
+}
+
+static inline vec_ushort8 vec_mergeh(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19, 
+				           4, 5, 20, 21, 6, 7, 22, 23})));
+}
+
+static inline vec_short8 vec_mergeh(vec_short8 a, vec_short8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19, 
+				           4, 5, 20, 21, 6, 7, 22, 23})));
+}
+
+static inline vec_uint4 vec_mergeh(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, 
+				           4, 5, 6, 7, 20, 21, 22, 23})));
+}
+
+static inline vec_int4 vec_mergeh(vec_int4 a, vec_int4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, 
+				           4, 5, 6, 7, 20, 21, 22, 23})));
+}
+
+static inline vec_float4 vec_mergeh(vec_float4 a, vec_float4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, 
+				           4, 5, 6, 7, 20, 21, 22, 23})));
+}
+
+/* vec_mergel (vector merge low)
+ * ==========
+ */
+static inline vec_uchar16 vec_mergel(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24,  9, 25, 10, 26, 11, 27, 
+				           12, 28, 13, 29, 14, 30, 15, 31})));
+}
+
+static inline vec_char16 vec_mergel(vec_char16 a, vec_char16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24,  9, 25, 10, 26, 11, 27, 
+				           12, 28, 13, 29, 14, 30, 15, 31})));
+}
+
+static inline vec_ushort8 vec_mergel(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 24, 25, 10, 11, 26, 27, 
+				           12, 13, 28, 29, 14, 15, 30, 31})));
+}
+
+static inline vec_short8 vec_mergel(vec_short8 a, vec_short8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 24, 25, 10, 11, 26, 27, 
+				           12, 13, 28, 29, 14, 15, 30, 31})));
+}
+
+static inline vec_uint4 vec_mergel(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 10, 11, 24, 25, 26, 27, 
+				           12, 13, 14, 15, 28, 29, 30, 31})));
+}
+
+static inline vec_int4 vec_mergel(vec_int4 a, vec_int4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 10, 11, 24, 25, 26, 27, 
+				           12, 13, 14, 15, 28, 29, 30, 31})));
+}
+
+static inline vec_float4 vec_mergel(vec_float4 a, vec_float4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 10, 11, 24, 25, 26, 27, 
+				           12, 13, 14, 15, 28, 29, 30, 31})));
+}
+
+/* vec_mfvscr (vector move from vector status and control register)
+ * ==========
+ */
+static inline vec_ushort8 vec_mfvscr()
+{
+  return ((vec_ushort8)spu_splats(0)); 		/* not supported */
+}
+
+
+/* vec_min (vector minimum)
+ * =======
+ */
+static inline vec_uchar16 vec_min(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_min(vec_char16 a, vec_char16 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_min(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_sel((vec_char16)(a), b, spu_cmpgt((vec_char16)(a), b)));
+}
+
+static inline vec_char16 vec_min(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_sel(a, (vec_char16)(b), spu_cmpgt(a, (vec_char16)(b))));
+}
+
+static inline vec_ushort8 vec_min(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_min(vec_short8 a, vec_short8 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_min(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_sel((vec_short8)(a), b, spu_cmpgt((vec_short8)(a), b)));
+}
+
+static inline vec_short8 vec_min(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_sel(a, (vec_short8)(b), spu_cmpgt(a, (vec_short8)(b))));
+}
+
+static inline vec_uint4 vec_min(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_min(vec_int4 a, vec_int4 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_min(vec_bint4 a, vec_int4 b)
+{
+  return (spu_sel((vec_int4)(a), b, spu_cmpgt((vec_int4)(a), b)));
+}
+
+static inline vec_int4 vec_min(vec_int4 a, vec_bint4 b)
+{
+  return (spu_sel(a, (vec_int4)(b), spu_cmpgt(a, (vec_int4)(b))));
+}
+
+static inline vec_float4 vec_min(vec_float4 a, vec_float4 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+/* vec_mladd (vector multiply low and add unsigned half word)
+ * =========
+ */
+static inline vec_short8 vec_mladd(vec_short8 a, vec_short8 b, vec_short8 c)
+{
+  return ((vec_short8)(spu_shuffle(spu_madd((vec_short8)(spu_rl((vec_uint4)(a), -16)),
+					    (vec_short8)(spu_rl((vec_uint4)(b), -16)),
+					    (vec_int4)(spu_rl((vec_uint4)(c), -16))),
+				   spu_madd(a, b, spu_extend(c)),
+				   ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+					          10, 11, 26, 27, 14, 15, 30, 31}))));
+}
+
+
+static inline vec_ushort8 vec_mladd(vec_ushort8 a, vec_ushort8 b, vec_ushort8 c)
+{
+  return ((vec_ushort8)(vec_mladd((vec_short8)(a), (vec_short8)(b), (vec_short8)(c))));
+}
+
+static inline vec_short8 vec_mladd(vec_ushort8 a, vec_short8 b, vec_short8 c)
+{
+  return (vec_mladd((vec_short8)(a), b, c));
+}
+
+static inline vec_short8 vec_mladd(vec_short8 a, vec_ushort8 b, vec_ushort8 c)
+{
+  return (vec_mladd(a, (vec_short8)(b), (vec_short8)(c)));
+}
+
+
+/* vec_mradds (vector multiply round and add saturate)
+ * ==========
+ */
+static inline vec_short8 vec_mradds(vec_short8 a, vec_short8 b, vec_short8 c)
+{
+  vec_int4 round = (vec_int4)spu_splats(0x4000);
+  vec_short8 hi, lo;
+
+  hi = (vec_short8)(spu_sl(spu_add(spu_mule(a, b), round), 1));
+  lo = (vec_short8)(spu_rlmask(spu_add(spu_mulo(a, b), round), -15));
+
+  return (vec_adds(spu_sel(hi, lo, ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF})), c));
+}
+
+
+/* vec_msum (vector multiply sum)
+ * ========
+ */
+static inline vec_uint4 vec_msum(vec_uchar16 a, vec_uchar16 b, vec_uint4 c)
+{
+  vec_ushort8 a1, a2, b1, b2;
+  vec_uint4 p1, p2;
+
+  a1 = spu_and((vec_ushort8)(a), 0xFF);
+  a2 = spu_rlmask((vec_ushort8)(a), -8);
+  b1 = spu_and((vec_ushort8)(b), 0xFF);
+  b2 = spu_rlmask((vec_ushort8)(b), -8);
+
+  p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2)));
+  p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2)));
+  return (spu_add(p2, spu_add(p1, c)));
+}
+
+static inline vec_int4 vec_msum(vec_char16 a, vec_uchar16 b, vec_int4 c)
+{
+  vec_short8 a1, a2, b1, b2;
+  vec_int4 p1, p2;
+
+  a1 = (vec_short8)(spu_extend(a));
+  a2 = spu_rlmaska((vec_short8)(a), -8);
+  b1 = (vec_short8)(spu_and((vec_ushort8)(b), 0xFF));
+  b2 = (vec_short8)spu_rlmask((vec_ushort8)(b), -8);
+
+  p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2)));
+  p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2)));
+  return (spu_add(p2, spu_add(p1, c)));
+}
+
+static inline vec_uint4 vec_msum(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
+{
+  return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
+}
+
+static inline vec_int4 vec_msum(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
+}
+
+
+/* vec_msums (vector multiply sum saturate)
+ * ========
+ */
+static inline vec_uint4 vec_msums(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
+{
+  vec_uint4 p1, p2;
+
+  p1 = spu_mulo(a, b);
+  p2 = spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2));
+
+  return (vec_adds(p2, vec_adds(p1, c)));
+}
+
+static inline vec_int4 vec_msums(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return (vec_adds(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
+}
+
+/* vec_mtvscr (vector move to vector status and control register)
+ * ==========
+ */
+#define vec_mtvscr(_a)		/* not supported */
+
+
+/* vec_mule (vector multiply even)
+ * ========
+ */
+static inline vec_ushort8 vec_mule(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_uint4)(a), -24)), 
+			     (vec_ushort8)(spu_rlmask((vec_uint4)(b), -24)));
+  lo = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_short8)(a), -8)), 
+			     (vec_ushort8)(spu_rlmask((vec_short8)(b), -8)));
+
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_short8 vec_mule(vec_char16 a, vec_char16 b)
+{
+  vec_short8 hi, lo;
+
+  hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(a), -24)), 
+			    (vec_short8)(spu_rlmaska((vec_uint4)(b), -24)));
+  lo = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_short8)(a), -8)), 
+			    (vec_short8)(spu_rlmaska((vec_short8)(b), -8)));
+
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_uint4 vec_mule(vec_ushort8 a, vec_ushort8 b)
+{
+ return (spu_mulo((vec_ushort8)spu_rlmask((vec_uint4)(a), -16),
+		  (vec_ushort8)spu_rlmask((vec_uint4)(b), -16)));
+}
+
+
+static inline vec_int4 vec_mule(vec_short8 a, vec_short8 b)
+{
+ return (spu_mulo((vec_short8)spu_rlmaska((vec_int4)(a), -16),
+		  (vec_short8)spu_rlmaska((vec_int4)(b), -16)));
+}
+
+
+/* vec_mulo (vector multiply odd)
+ * ========
+ */
+static inline vec_ushort8 vec_mulo(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(a), -16), 0xFF)), 
+			     (vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(b), -16), 0xFF)));
+  lo = (vec_ushort8)spu_mulo(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
+
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_short8 vec_mulo(vec_char16 a, vec_char16 b)
+{
+  vec_short8 aa, bb, hi, lo;
+
+  aa = spu_extend(a);
+  bb = spu_extend(b);
+
+  hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(aa), -16)), 
+		(vec_short8)(spu_rlmaska((vec_uint4)(bb), -16)));
+  lo = (vec_short8)spu_mulo(aa, bb);
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_uint4 vec_mulo(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_mulo(a, b));
+}
+
+
+static inline vec_int4 vec_mulo(vec_short8 a, vec_short8 b)
+{
+  return (spu_mulo(a, b));
+}
+
+
+/* vec_nmsub (vector negative multiply subtract)
+ * =========
+ */
+#define vec_nmsub(_a, _b, _c)	spu_nmsub(_a, _b, _c)
+
+
+/* vec_nor (vector logical nor)
+ * =======
+ */
+#define vec_nor(_a, _b)		spu_nor(_a, _b)
+
+
+/* vec_or (vector logical or)
+ * ======
+ */
+static inline vec_uchar16 vec_or(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_char16 vec_or(vec_char16 a, vec_char16 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_char16 vec_or(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_or((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_or(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_or(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_or(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_short8 vec_or(vec_short8 a, vec_short8 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_short8 vec_or(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_or((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_or(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_or(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_or(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_int4 vec_or(vec_int4 a, vec_int4 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_int4 vec_or(vec_bint4 a, vec_int4 b)
+{
+  return (spu_or((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_or(vec_int4 a, vec_bint4 b)
+{
+  return (spu_or(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_or(vec_float4 a, vec_float4 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_float4 vec_or(vec_bint4 a, vec_float4 b)
+{
+  return (spu_or((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_or(vec_float4 a, vec_bint4 b)
+{
+  return (spu_or(a, (vec_float4)(b)));
+}
+
+
+/* vec_pack (vector pack)
+ * ========
+ */
+static inline vec_uchar16 vec_pack(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_uchar16)spu_shuffle(a, b, ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					                17, 19, 21, 23, 25, 27, 29, 31})));
+}
+
+static inline vec_char16 vec_pack(vec_short8 a, vec_short8 b)
+{
+  return ((vec_char16)spu_shuffle(a, b, ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					               17, 19, 21, 23, 25, 27, 29, 31})));
+}
+
+static inline vec_ushort8 vec_pack(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_ushort8)spu_shuffle(a, b, ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					                18, 19, 22, 23, 26, 27, 30, 31})));
+}
+
+static inline vec_short8 vec_pack(vec_int4 a, vec_int4 b)
+{
+  return ((vec_short8)spu_shuffle(a, b, ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					               18, 19, 22, 23, 26, 27, 30, 31})));
+}
+
+
+/* vec_packpx (vector pack pixel)
+ * ==========
+ */
+static inline vec_pixel8 vec_packpx(vec_uint4 a, vec_uint4 b)
+{
+  vec_uint4 x03FF = (vec_uint4)(spu_splats((unsigned short)0x03FF));
+  vec_uint4 x001F = (vec_uint4)(spu_splats((unsigned short)0x001F));
+
+  return ((vec_pixel8)(spu_shuffle(spu_sel(spu_sel(spu_sl(a, 7), spu_sl(a, 10), x03FF),
+					   spu_sl(a, 13), x001F),
+				   spu_sel(spu_sel(spu_sl(b, 7), spu_sl(b, 10), x03FF),
+					   spu_sl(b, 13), x001F),
+				   ((vec_uchar16){ 0,  1,  4,  5,   8,  9, 12, 13,
+					          16, 17, 20, 21, 24, 25, 28, 29}))));
+}
+
+
+/* vec_packs (vector pack saturate)
+ * =========
+ */
+static inline vec_uchar16 vec_packs(vec_ushort8 a, vec_ushort8 b)
+{
+  vec_ushort8 max = spu_splats((unsigned short)0x00FF);
+  
+  return ((vec_uchar16)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, 255)),
+				    spu_sel(b, max, spu_cmpgt(b, 255)),
+				    ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					           17, 19, 21, 23, 25, 27, 29, 31}))));
+}
+
+static inline vec_char16 vec_packs(vec_short8 a, vec_short8 b)
+{
+  vec_short8 max = spu_splats((signed short)0x007F);
+  vec_short8 min = spu_splats((signed short)0xFF80);
+  
+  return ((vec_char16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 127)), spu_cmpgt(a, -128)),
+				    spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 127)), spu_cmpgt(b, -128)),
+				   ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					          17, 19, 21, 23, 25, 27, 29, 31}))));
+}
+
+static inline vec_ushort8 vec_packs(vec_uint4 a, vec_uint4 b)
+{
+  vec_uint4 max = spu_splats((unsigned int)0x0000FFFF);
+  
+  return ((vec_ushort8)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, max)), 
+				    spu_sel(b, max, spu_cmpgt(b, max)), 
+				    ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					           18, 19, 22, 23, 26, 27, 30, 31}))));
+}  
+
+static inline vec_short8 vec_packs(vec_int4 a, vec_int4 b)
+{
+  vec_int4 max = spu_splats((signed int)0x00007FFF);
+  vec_int4 min = spu_splats((signed int)0xFFFF8000);
+  
+  return ((vec_short8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)),
+				   spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)),
+				   ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					          18, 19, 22, 23, 26, 27, 30, 31}))));
+}  
+
+
+/* vec_packsu (vector pack saturate unsigned)
+ * ==========
+ */
+static inline vec_uchar16 vec_packsu(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_uchar16)spu_shuffle(spu_or(a, (vec_ushort8)(spu_cmpgt(a, 255))),
+				   spu_or(b, (vec_ushort8)(spu_cmpgt(b, 255))),
+				   ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					          17, 19, 21, 23, 25, 27, 29, 31})));
+}
+
+static inline vec_uchar16 vec_packsu(vec_short8 a, vec_short8 b)
+{
+  vec_short8 max = spu_splats((signed short)0x00FF);
+  vec_short8 min = spu_splats((signed short)0x0000);
+  
+  return ((vec_uchar16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 255)), spu_cmpgt(a, 0)),
+				    spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 255)), spu_cmpgt(b, 0)),
+				    ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					           17, 19, 21, 23, 25, 27, 29, 31}))));
+
+  return (vec_packsu((vec_ushort8)(a), (vec_ushort8)(b)));
+}
+
+static inline vec_ushort8 vec_packsu(vec_uint4 a, vec_uint4 b)
+{
+  vec_uint4 max = spu_splats((unsigned int)0xFFFF);
+
+  return ((vec_ushort8)spu_shuffle(spu_or(a, (vec_uint4)(spu_cmpgt(a, max))),
+				   spu_or(b, (vec_uint4)(spu_cmpgt(b, max))),
+				   ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					          18, 19, 22, 23, 26, 27, 30, 31})));
+}
+
+static inline vec_ushort8 vec_packsu(vec_int4 a, vec_int4 b)
+{
+  vec_int4 max = spu_splats((signed int)0x0000FFFF);
+  vec_int4 min = spu_splats((signed int)0x00000000);
+  
+  return ((vec_ushort8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)),
+				    spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)),
+				    ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					           18, 19, 22, 23, 26, 27, 30, 31}))));
+}
+
+
+/* vec_perm (vector permute)
+ * ========
+ */
+static inline vec_uchar16 vec_perm(vec_uchar16 a, vec_uchar16 b, vec_uchar16 c)
+{
+  return (spu_shuffle(a, b, spu_and(c, 0x1F)));
+}
+
+static inline vec_char16 vec_perm(vec_char16 a, vec_char16 b, vec_uchar16 c)
+{
+  return ((vec_char16)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_ushort8 vec_perm(vec_ushort8 a, vec_ushort8 b, vec_uchar16 c)
+{
+  return ((vec_ushort8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_short8 vec_perm(vec_short8 a, vec_short8 b, vec_uchar16 c)
+{
+  return ((vec_short8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_uint4 vec_perm(vec_uint4 a, vec_uint4 b, vec_uchar16 c)
+{
+  return ((vec_uint4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_int4 vec_perm(vec_int4 a, vec_int4 b, vec_uchar16 c)
+{
+  return ((vec_int4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_float4 vec_perm(vec_float4 a, vec_float4 b, vec_uchar16 c)
+{
+  return ((vec_float4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+
+/* vec_re (vector reciprocal estimate)
+ * ======
+ */
+#define vec_re(_a)	spu_re(_a)
+
+
+/* vec_rl (vector rotate left)
+ * ======
+ */
+static inline vec_uchar16 vec_rl(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 r1, r2;
+
+  r1 = spu_rl(spu_and((vec_ushort8)(a), 0xFF), (vec_short8)spu_and((vec_ushort8)(b), 7));
+  r2 = spu_rl(spu_and((vec_ushort8)(a), -256), (vec_short8)spu_and(spu_rlmask((vec_ushort8)(b), -8), 7));
+  return ((vec_uchar16)(spu_sel(spu_or(r2, spu_sl(r2, 8)), spu_or(r1, spu_rlmask(r1, -8)), spu_splats((unsigned short)0xFF))));
+}
+
+static inline vec_char16 vec_rl(vec_char16 a, vec_uchar16 b)
+{
+  return ((vec_char16)(vec_rl((vec_uchar16)(a), b)));
+}
+
+static inline vec_ushort8 vec_rl(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_rl(a, (vec_short8)(b)));
+}
+
+static inline vec_short8 vec_rl(vec_short8 a, vec_ushort8 b)
+{
+  return (spu_rl(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_rl(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_rl(a, (vec_int4)(b)));
+}
+
+static inline vec_int4 vec_rl(vec_int4 a, vec_uint4 b)
+{
+  return (spu_rl(a, (vec_int4)(b)));
+}
+
+
+/* vec_round (vector round)
+ * =========
+ */
+static inline vec_float4 vec_round(vec_float4 a)
+{
+  vec_float4 s_half, s_one, d;
+  vec_uint4 odd;
+  vec_uint4 msb = spu_splats((unsigned int)0x80000000);
+  vec_float4 half = spu_splats(0.5f);
+  vec_int4 exp;
+  vec_uint4 mask;
+
+  s_half = (vec_float4)(spu_sel((vec_uint4)(half), (vec_uint4)(a), msb));
+  a = spu_add(a, s_half);
+  s_one = spu_add(s_half, s_half);
+  exp  = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+
+  odd = spu_and((vec_uint4)(spu_convts(a, 0)), 1);
+  s_one = spu_andc(s_one, (vec_float4)spu_cmpeq(mask, 0));
+  s_one = spu_and(s_one, spu_and((vec_float4)spu_cmpeq(spu_and((vec_uint4)(a), mask), 0),
+				 (vec_float4)spu_cmpeq(odd, 1)));
+  d = spu_andc(a, (vec_float4)(mask));
+  d = spu_sub(d, s_one);
+  return (d);
+}
+
+/* vec_rsqrte (vector reciprocal square root estimate)
+ * ==========
+ */
+#define vec_rsqrte(_a)	spu_rsqrte(_a)
+
+
+/* vec_sel (vector select)
+ * =======
+ */
+#define vec_sel(_a, _b, _c)	spu_sel(_a, _b, _c)
+
+
+/* vec_sl (vector shift left)
+ * ======
+ */
+static inline vec_uchar16 vec_sl(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  lo = spu_and(spu_sl((vec_ushort8)(a), spu_and((vec_ushort8)(b), 7)), 0xFF);
+  hi = spu_sl(spu_and((vec_ushort8)(a), -256), spu_and(spu_rlmask((vec_ushort8)(b), -8), 7));
+
+  return ((vec_uchar16)(spu_or(hi, lo)));
+}
+
+static inline vec_char16 vec_sl(vec_char16 a, vec_uchar16 b)
+{
+  return ((vec_char16)(vec_sl((vec_uchar16)(a), b)));
+}
+
+static inline vec_ushort8 vec_sl(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sl(a, spu_and(b, 15)));
+}
+
+static inline vec_short8 vec_sl(vec_short8 a, vec_ushort8 b)
+{
+  return (spu_sl(a, spu_and((vec_ushort8)(b), 15)));
+}
+
+static inline vec_uint4 vec_sl(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sl(a, spu_and(b, 31)));
+}
+
+static inline vec_int4 vec_sl(vec_int4 a, vec_uint4 b)
+{
+  return (spu_sl(a, spu_and(b, 31)));
+}
+
+
+/* vec_sld (vector shift left double)
+ * =======
+ */
+#define vec_sld(_a, _b, _c)	spu_shuffle(_a, _b, ((vec_uchar16){ 0+(_c),  1+(_c),  2+(_c),  3+(_c),  \
+								    4+(_c),  5+(_c),  6+(_c),  7+(_c), 	\
+								    8+(_c),  9+(_c), 10+(_c), 11+(_c), 	\
+							           12+(_c), 13+(_c), 14+(_c), 15+(_c)}))
+
+
+/* vec_sll (vector shift left long)
+ * =======
+ */
+#define vec_sll(_a, _b)		spu_slqw(_a, spu_extract((vec_uint4)(_b), 0))
+
+
+/* vec_slo (vector shift left by octet)
+ * =======
+ */
+#define vec_slo(_a, _b)		spu_slqwbytebc(_a, spu_extract((vec_uint4)(_b), 3) & 0x7F)
+
+
+/* vec_splat (vector splat)
+ * =========
+ */
+#define vec_splat(_a, _b)	spu_splats(spu_extract(_a, _b))
+
+
+/* vec_splat_s8 (vector splat signed byte)
+ * ============
+ */
+#define vec_splat_s8(_a)	spu_splats((signed char)(_a))
+
+
+/* vec_splat_s16 (vector splat signed half-word)
+ * =============
+ */
+#define vec_splat_s16(_a)	spu_splats((signed short)(_a))
+
+
+/* vec_splat_s32 (vector splat signed word)
+ * =============
+ */
+#define vec_splat_s32(_a)	spu_splats((signed int)(_a))
+
+
+/* vec_splat_u8 (vector splat unsigned byte)
+ * ============
+ */
+#define vec_splat_u8(_a)	spu_splats((unsigned char)(_a))
+
+
+/* vec_splat_u16 (vector splat unsigned half-word)
+ * =============
+ */
+#define vec_splat_u16(_a)	spu_splats((unsigned short)(_a))
+
+
+/* vec_splat_u32 (vector splat unsigned word)
+ * =============
+ */
+#define vec_splat_u32(_a)	spu_splats((unsigned int)(_a))
+
+
+/* vec_sr (vector shift right)
+ * ======
+ */
+static inline vec_uchar16 vec_sr(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  lo = spu_rlmask(spu_and((vec_ushort8)(a), 0xFF), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7))));
+  hi = spu_and(spu_rlmask((vec_ushort8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256);
+
+  return ((vec_uchar16)(spu_or(hi, lo)));
+}
+
+static inline vec_char16 vec_sr(vec_char16 a, vec_uchar16 b)
+{
+  return ((vec_char16)(vec_sr((vec_uchar16)(a), b)));
+}
+
+static inline vec_ushort8 vec_sr(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_rlmask(a, spu_sub(0, (vec_short8)(spu_and(b, 15)))));
+}
+
+static inline vec_short8 vec_sr(vec_short8 a, vec_ushort8 b)
+{
+  return ((vec_short8)(vec_sr((vec_ushort8)(a), b)));
+}
+
+static inline vec_uint4 vec_sr(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_rlmask(a, spu_sub(0, (vec_int4)(spu_and(b, 31)))));
+}
+
+static inline vec_int4 vec_sr(vec_int4 a, vec_uint4 b)
+{
+  return ((vec_int4)(vec_sr((vec_uint4)(a), b)));
+}
+
+
+/* vec_sra (vector shift right algebraic)
+ * =======
+ */
+static inline vec_char16 vec_sra(vec_char16 a, vec_uchar16 b)
+{
+  vec_short8 hi, lo;
+
+  lo = spu_and(spu_rlmaska(spu_extend(a), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7)))), 0xFF);
+  hi = spu_and(spu_rlmaska((vec_short8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256);
+
+  return ((vec_char16)(spu_or(hi, lo)));
+}
+
+static inline vec_uchar16 vec_sra(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(vec_sra((vec_char16)(a), b)));
+}
+
+static inline vec_short8 vec_sra(vec_short8 a, vec_ushort8 b)
+{
+  return (spu_rlmaska(a, spu_sub(0, (vec_short8)(spu_and(b, 15)))));
+}
+
+static inline vec_ushort8 vec_sra(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(vec_sra((vec_short8)(a), b)));
+}
+
+static inline vec_int4 vec_sra(vec_int4 a, vec_uint4 b)
+{
+  return (spu_rlmaska(a, spu_sub(0, (vec_int4)(spu_and(b, 31)))));
+}
+
+static inline vec_uint4 vec_sra(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(vec_sra((vec_int4)(a), b)));
+}
+
+
+/* vec_srl (vector shift right long)
+ * =======
+ */
+#define vec_srl(_a, _b)		spu_rlmaskqw(_a, 0-spu_extract((vec_int4)(_b), 3))
+
+
+/* vec_sro (vector shift right by octet)
+ * =======
+ */
+#define vec_sro(_a, _b)		spu_rlmaskqwbyte(_a, 0 - ((spu_extract((vec_int4)(_b), 3) >> 3) & 0xF))
+
+/* vec_st (vector store indexed)
+ * ======
+ */
+static inline void vec_st(vec_uchar16 a, int b, unsigned char *c)
+{
+  *((vec_uchar16 *)(c+b)) = a;
+}
+
+static inline void vec_st(vec_uchar16 a, int b, vec_uchar16 *c)
+{
+  *((vec_uchar16 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_char16 a, int b, signed char *c)
+{
+  *((vec_char16 *)(c+b)) = a;
+}
+
+static inline void vec_st(vec_char16 a, int b, vec_char16 *c)
+{
+  *((vec_char16 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_bchar16 a, int b, signed char *c)
+{
+  *((vec_bchar16 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_ushort8 a, int b, unsigned short *c)
+{
+  *((vec_ushort8 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_ushort8 a, int b, vec_ushort8 *c)
+{
+  *((vec_ushort8 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_short8 a, int b, signed short *c)
+{
+  *((vec_short8 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_short8 a, int b, vec_short8 *c)
+{
+  *((vec_short8 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_bshort8 a, int b, signed short *c)
+{
+  *((vec_bshort8 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_uint4 a, int b, unsigned int *c)
+{
+  *((vec_uint4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_uint4 a, int b, vec_uint4 *c)
+{
+  *((vec_uint4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_int4 a, int b, signed int *c)
+{
+  *((vec_int4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_int4 a, int b, vec_int4 *c)
+{
+  *((vec_int4 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_bint4 a, int b, signed int *c)
+{
+  *((vec_bint4 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_float4 a, int b, float *c)
+{
+  *((vec_float4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_float4 a, int b, vec_float4 *c)
+{
+  *((vec_float4 *)((unsigned char *)(c)+b)) = a;
+}
+
+
+/* vec_ste (vector store element indexed)
+ * =======
+ */
+static inline void vec_ste(vec_uchar16 a, int b, unsigned char *c)
+{
+  unsigned char *ptr;
+
+  ptr = c + b;
+  *ptr = spu_extract(a, (int)(ptr) & 15);
+}
+
+static inline void vec_ste(vec_char16 a, int b, signed char *c)
+{
+  vec_ste((vec_uchar16)(a), b, (unsigned char *)(c));
+}
+
+static inline void vec_ste(vec_bchar16 a, int b, signed char *c)
+{
+  vec_ste((vec_uchar16)(a), b, (unsigned char *)(c));
+}
+
+static inline void vec_ste(vec_ushort8 a, int b, unsigned short *c)
+{
+  unsigned short *ptr;
+
+  ptr = (unsigned short *)(((unsigned int)(c) + b) & ~1);
+  *ptr = spu_extract(a, ((int)(ptr) >> 1) & 7);
+}
+
+static inline void vec_ste(vec_short8 a, int b, signed short *c)
+{
+  vec_ste((vec_ushort8)(a), b, (unsigned short *)(c));
+}
+
+static inline void vec_ste(vec_bshort8 a, int b, signed short *c)
+{
+  vec_ste((vec_ushort8)(a), b, (unsigned short *)(c));
+}
+
+static inline void vec_ste(vec_uint4 a, int b, unsigned int *c)
+{
+  unsigned int *ptr;
+
+  ptr = (unsigned int *)(((unsigned int)(c) + b) & ~3);
+  *ptr = spu_extract(a, ((int)(ptr) >> 2) & 3);
+}
+
+static inline void vec_ste(vec_int4 a, int b, signed int *c)
+{
+  vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
+}
+
+static inline void vec_ste(vec_bint4 a, int b, signed int *c)
+{
+  vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
+}
+
+static inline void vec_ste(vec_float4 a, int b, float *c)
+{
+  vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
+}
+
+
+/* vec_stl (vector store indexed LRU)
+ * =======
+ */
+#define vec_stl(_a, _b, _c)		vec_st(_a, _b, _c)
+
+
+/* vec_sub (vector subtract)
+ * =======
+ */
+static inline vec_uchar16 vec_sub(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(spu_sel(spu_sub((vec_ushort8)(a), (vec_ushort8)(b)),
+				spu_sub(spu_and((vec_ushort8)(a), -256), spu_and((vec_ushort8)(b), -256)),
+				spu_splats((unsigned short)0xFF00))));
+}
+
+static inline vec_char16 vec_sub(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static inline vec_char16 vec_sub(vec_bchar16 a, vec_char16 b)
+{
+  return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static inline vec_char16 vec_sub(vec_char16 a, vec_bchar16 b)
+{
+  return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static inline vec_ushort8 vec_sub(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_short8 vec_sub(vec_short8 a, vec_short8 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_short8 vec_sub(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_sub((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_sub(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_sub(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_sub(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_int4 vec_sub(vec_int4 a, vec_int4 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_int4 vec_sub(vec_bint4 a, vec_int4 b)
+{
+  return (spu_sub((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_sub(vec_int4 a, vec_bint4 b)
+{
+  return (spu_sub(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_sub(vec_float4 a, vec_float4 b)
+{
+  return (spu_sub(a, b));
+}
+
+
+/* vec_subc (vector subtract carryout)
+ * ========
+ */
+#define vec_subc(_a, _b)	spu_genb(_a, _b)
+
+
+/* vec_subs (vector subtract saturate)
+ * ========
+ */
+static inline vec_uchar16 vec_subs(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 s1, s2;
+  vec_uchar16 s, d;
+
+  s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8));
+  s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
+  s  = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){0, 16,  2, 18,  4, 20,  6, 22,
+					                8, 24, 10, 26, 12, 28, 14, 30})));
+  d  = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+					                9, 25, 11, 27, 13, 29, 15, 31})));
+  return (spu_andc(d, s));
+}
+
+static inline vec_char16 vec_subs(vec_char16 a, vec_char16 b)
+{
+  vec_ushort8 s1, s2;
+  vec_uchar16 s, d;
+
+  s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8));
+  s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
+  s  = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+					                9, 25, 11, 27, 13, 29, 15, 31})));
+  d  = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_nor((vec_uchar16)(a), spu_nand(s, (vec_uchar16)(b))), 0x7F));
+  d  = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_and((vec_uchar16)(a), spu_nor(s, (vec_uchar16)(b))), 0x7F));
+  
+  return ((vec_char16)(d));
+}
+
+static inline vec_char16 vec_subs(vec_bchar16 a, vec_char16 b)
+{
+  return (vec_subs((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_subs(vec_char16 a, vec_bchar16 b)
+{
+  return (vec_subs(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_subs(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a)));
+}
+
+static inline vec_short8 vec_subs(vec_short8 a, vec_short8 b)
+{
+  vec_short8 s;
+  vec_short8 d;
+  
+  s = spu_sub(a, b);
+  d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -15)));
+  d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -15)));
+
+  return (d);
+}
+
+static inline vec_short8 vec_subs(vec_bshort8 a, vec_short8 b)
+{
+  return ((vec_short8)(vec_subs((vec_short8)(a), b)));
+}
+
+static inline vec_short8 vec_subs(vec_short8 a, vec_bshort8 b)
+{
+  return ((vec_short8)(vec_subs(a, (vec_short8)(b))));
+}
+
+static inline vec_uint4 vec_subs(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a)));
+}
+
+static inline vec_int4 vec_subs(vec_int4 a, vec_int4 b)
+{
+  vec_int4 s;
+  vec_int4 d;
+  
+  s = spu_sub(a, b);
+  d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -31)));
+  d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -31)));
+
+  return (d);
+}
+
+static inline vec_int4 vec_subs(vec_bint4 a, vec_int4 b)
+{
+  return ((vec_int4)(vec_subs((vec_int4)(a), b)));
+}
+
+static inline vec_int4 vec_subs(vec_int4 a, vec_bint4 b)
+{
+  return ((vec_int4)(vec_subs(a, (vec_int4)(b))));
+}
+
+
+/* vec_sum4s (vector sum across partial (1/4) saturated)
+ * =========
+ */
+static inline vec_uint4 vec_sum4s(vec_uchar16 a, vec_uint4 b)
+{
+  vec_uint4 a01_23, a0123;
+
+  a01_23 = (vec_uint4)(spu_add(spu_rlmask((vec_ushort8)(a), -8),
+			       spu_and((vec_ushort8)(a), 0xFF)));
+  a0123 = spu_add(spu_rlmask(a01_23, -16), spu_and(a01_23, 0x1FF));
+  return (vec_adds(a0123, b));
+}
+
+static inline vec_int4 vec_sum4s(vec_char16 a, vec_int4 b)
+{
+  vec_int4 a01_23, a0123;
+
+  a01_23 = (vec_int4)(spu_add(spu_rlmaska((vec_short8)(a), -8),
+			      spu_extend(a)));
+  a0123 = spu_add(spu_rlmaska(a01_23, -16), spu_extend((vec_short8)(a01_23)));
+  return (vec_adds(a0123, b));
+}
+
+static inline vec_int4 vec_sum4s(vec_short8 a, vec_int4 b)
+{
+  vec_int4 a0123;
+
+  a0123 = spu_add(spu_rlmaska((vec_int4)(a), -16), spu_extend(a));
+  return (vec_adds(a0123, b));
+}
+
+
+/* vec_sum2s (vector sum across partial (1/2) saturated)
+ * =========
+ */
+static inline vec_int4 vec_sum2s(vec_int4 a, vec_int4 b)
+{
+  vec_int4 c, d;
+  vec_int4 sign1, sign2, sign3;
+  vec_int4 carry, sum_l, sum_h, sat, sat_val;
+
+  sign1 = spu_rlmaska(a, -31);
+  sign2 = spu_rlmaska(b, -31);
+
+  c = spu_rlqwbyte(a, -4);
+  sign3 = spu_rlqwbyte(sign1, -4);
+  
+  carry = spu_genc(a, b);
+  sum_l = spu_add(a, b);
+  sum_h = spu_addx(sign1, sign2, carry);
+
+  carry = spu_genc(sum_l, c);
+  sum_l = spu_add(sum_l, c);
+  sum_h = spu_addx(sum_h, sign3, carry);
+  
+  sign1 = spu_rlmaska(sum_l, -31);
+  sign2 = spu_rlmaska(sum_h, -31);
+
+  sat_val = spu_xor(sign2, spu_splats((signed int)0x7FFFFFFF));
+
+  sat = spu_orc(spu_xor(sign1, sign2), (vec_int4)spu_cmpeq(sum_h, sign2));
+
+  d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), (vec_int4){0, -1, 0, -1});
+
+  return (d);
+}
+
+
+/* vec_sums (vector sum saturated)
+ * ========
+ */
+static inline vec_int4 vec_sums(vec_int4 a, vec_int4 b)
+{
+  vec_int4 a0, a1, a2, c0, c1, c2, d;
+  vec_int4 sign_a, sign_b, sign_l, sign_h;
+  vec_int4 sum_l, sum_h, sat, sat_val;
+
+  sign_a = spu_rlmaska(a, -31);
+  sign_b = spu_rlmaska(b, -31);
+
+  a0 = spu_rlqwbyte(a, -12);
+  a1 = spu_rlqwbyte(a, -8);
+  a2 = spu_rlqwbyte(a, -4);
+
+  sum_l = spu_add(a, b);
+  sum_h = spu_addx(sign_a, sign_b, spu_genc(a, b));
+  
+  c2 = spu_genc(sum_l, a2);
+  sum_l = spu_add(sum_l, a2);
+  sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -4), c2);
+
+  c1 = spu_genc(sum_l, a1);
+  sum_l = spu_add(sum_l, a1);
+  sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -8), c1);
+
+  c0 = spu_genc(sum_l, a0);
+  sum_l = spu_add(sum_l, a0);
+  sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -12), c0);
+
+  sign_l = spu_rlmaska(sum_l, -31);
+  sign_h = spu_rlmaska(sum_h, -31);
+
+  sat_val = spu_xor(sign_h, spu_splats((signed int)0x7FFFFFFF));
+
+  sat = spu_orc(spu_xor(sign_l, sign_h), (vec_int4)spu_cmpeq(sum_h, sign_h));
+
+  d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), ((vec_int4){0, 0, 0, -1}));
+
+  return (d);
+}
+
+
+/* vec_trunc (vector truncate) 
+ * =========
+ */
+static inline vec_float4 vec_trunc(vec_float4 a)
+{
+  vec_int4 exp;
+  vec_uint4 mask;
+
+  exp  = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+  return (spu_andc(a, (vec_float4)(mask)));
+}
+
+/* vec_unpackh (vector unpack high element) 
+ * ===========
+ */
+static inline vec_short8 vec_unpackh(vec_char16 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 1, 1, 2, 2, 3, 3, 
+					              4, 4, 5, 5, 6, 6, 7, 7}))));
+}
+
+static inline vec_bshort8 vec_unpackh(vec_bchar16 a)
+{
+  return ((vec_bshort8)(vec_unpackh((vec_char16)(a))));
+}
+
+static inline vec_int4 vec_unpackh(vec_short8 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 0, 1, 0, 0, 2, 3, 
+					              0, 0, 4, 5, 0, 0, 6, 7}))));
+}
+
+#ifdef SUPPORT_UNPACK_PIXEL
+/* Due to type conflicts, unpacking of pixel types and boolean shorts
+ * can not simultaneously be supported. By default, the boolean short is
+ * supported.
+ */
+static inline vec_uint4 vec_unpackh(vec_pixel8 a)
+{
+  vec_ushort8 p1, p2;
+
+  p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a.p), -7)),
+		   spu_and((vec_ushort8)(a.p), 0x1F),
+		   ((vec_uchar16){ 0, 128, 128, 17,  2, 128, 128, 19,
+			           4, 128, 128, 21,  6, 128, 128, 23}));
+  p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a.p), -5), 0x1F),
+		   spu_and(spu_rlmask((vec_ushort8)(a.p), -10), 0x1F),
+		   ((vec_uchar16){ 128,  17, 1, 128, 128,  19, 3, 128,
+			           128,  21, 5, 128, 128,  23, 7, 128}));
+  return ((vec_uint4)(spu_or(p1, p2)));
+}
+
+#else
+
+static inline vec_bint4 vec_unpackh(vec_bshort8 a)
+{
+  return ((vec_bint4)(vec_unpackh((vec_short8)(a))));
+}
+#endif
+
+
+
+
+
+/* vec_unpackl (vector unpack low element) 
+ * ===========
+ */
+static inline vec_short8 vec_unpackl(vec_char16 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){8, 8, 9, 9, 10, 10, 11, 11,
+					              12, 12, 13, 13, 14, 14, 15, 15}))));
+}
+
+static inline vec_bshort8 vec_unpackl(vec_bchar16 a)
+{
+  return ((vec_bshort8)(vec_unpackl((vec_char16)(a))));
+}
+
+
+static inline vec_int4 vec_unpackl(vec_short8 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 8, 9, 0, 0, 10, 11, 
+					              0, 0,12,13, 0, 0, 14, 15}))));
+}
+
+
+#ifdef SUPPORT_UNPACK_PIXEL
+/* Due to type conflicts, unpacking of pixel types and boolean shorts
+ * can not simultaneously be supported. By default, the boolean short is
+ * supported.
+ */
+static inline vec_uint4 vec_unpackl(vec_pixel8 a)
+{
+  vec_ushort8 p1, p2;
+
+  p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a), -7)),
+		   spu_and((vec_ushort8)(a), 0x1F),
+		   ((vec_uchar16){ 8, 128, 128, 25,  10, 128, 128, 27,
+			          12, 128, 128, 29,  14, 128, 128, 31}));
+  p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a), -5), 0x1F),
+		   spu_and(spu_rlmask((vec_ushort8)(a), -10), 0x1F),
+		   ((vec_uchar16){ 128, 25,  9, 128, 128, 27, 11, 128,
+			           128, 29, 13, 128, 128, 31, 15, 128}));
+  return ((vec_uint4)(spu_or(p1, p2)));
+}
+
+#else
+
+static inline vec_bint4 vec_unpackl(vec_bshort8 a)
+{
+  return ((vec_bint4)(vec_unpackl((vec_short8)(a))));
+
+}
+#endif
+
+
+
+/* vec_xor (vector logical xor)
+ * ======
+ */
+static inline vec_uchar16 vec_xor(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_char16 vec_xor(vec_char16 a, vec_char16 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_char16 vec_xor(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_xor((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_xor(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_xor(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_xor(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_short8 vec_xor(vec_short8 a, vec_short8 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_short8 vec_xor(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_xor((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_xor(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_xor(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_xor(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_int4 vec_xor(vec_int4 a, vec_int4 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_int4 vec_xor(vec_bint4 a, vec_int4 b)
+{
+  return (spu_xor((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_xor(vec_int4 a, vec_bint4 b)
+{
+  return (spu_xor(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_xor(vec_float4 a, vec_float4 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_float4 vec_xor(vec_bint4 a, vec_float4 b)
+{
+  return (spu_xor((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_xor(vec_float4 a, vec_bint4 b)
+{
+  return (spu_xor(a, (vec_float4)(b)));
+}
+
+/************************************************************************
+ *                        PREDICATES
+ ************************************************************************/
+
+/* vec_all_eq (all elements equal)
+ * ==========
+ */
+static inline int vec_all_eq(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
+}
+
+
+/* vec_all_ge (all elements greater than or equal)
+ * ==========
+ */
+static inline int vec_all_ge(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline  int vec_all_ge(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+
+/* vec_all_gt (all elements greater than)
+ * ==========
+ */
+static inline int vec_all_gt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+
+/* vec_all_in (all elements in bounds)
+ * ==========
+ */
+static inline int vec_all_in(vec_float4 a, vec_float4 b)
+{
+  return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) == 0xF);
+}
+
+
+/* vec_all_le (all elements less than or equal)
+ * ==========
+ */
+static inline int vec_all_le(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0));
+}
+
+static inline int vec_all_le(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0));
+}
+
+static inline int vec_all_le(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0));
+}
+
+static inline int vec_all_le(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+
+/* vec_all_lt (all elements less than)
+ * ==========
+ */
+static inline int vec_all_lt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+
+/* vec_all_nan (all elements not a number)
+ * ===========
+ */
+static inline int vec_all_nan(vec_float4 a)
+{
+  vec_uint4 exp, man;
+  vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000);
+
+  exp = spu_and((vec_uint4)(a), exp_mask);
+  man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF));
+  return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask), 
+						spu_cmpeq(man, 0))), 0) == 0xF));
+}
+
+#define vec_all_nan(_a)		(0)
+
+
+/* vec_all_ne (all elements not equal)
+ * ==========
+ */
+static inline int vec_all_ne(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+
+/* vec_all_nge (all elements not greater than or equal)
+ * ===========
+ */
+static inline int vec_all_nge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+
+/* vec_all_ngt (all elements not greater than)
+ * ===========
+ */
+static inline int vec_all_ngt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+
+/* vec_all_nle (all elements not less than or equal)
+ * ===========
+ */
+static inline int vec_all_nle(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+
+/* vec_all_nlt (all elements not less than)
+ * ===========
+ */
+static inline int vec_all_nlt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+
+/* vec_all_numeric (all elements numeric)
+ * ===========
+ */
+static inline int vec_all_numeric(vec_float4 a)
+{
+  vec_uint4 exp;
+
+  exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF);
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) == 0));
+}
+
+
+
+/* vec_any_eq (any elements equal)
+ * ==========
+ */
+static inline int vec_any_eq(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq((vec_int4)(a), b), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, (vec_int4)(b)), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
+}
+
+/* vec_any_ge (any elements greater than or equal)
+ * ==========
+ */
+static inline int vec_any_ge(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+
+/* vec_any_gt (any elements greater than)
+ * ==========
+ */
+static inline int vec_any_gt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0));
+}
+
+
+static inline int vec_any_gt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(a), b), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, (vec_int4)(b)), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
+}
+
+/* vec_any_le (any elements less than or equal)
+ * ==========
+ */
+static inline int vec_any_le(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+
+/* vec_any_lt (any elements less than)
+ * ==========
+ */
+static inline int vec_any_lt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, (vec_int4)(a)), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(b), a), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+/* vec_any_nan (any elements not a number)
+ * ===========
+ */
+static inline int vec_any_nan(vec_float4 a)
+{
+  vec_uint4 exp, man;
+  vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000);
+
+  exp = spu_and((vec_uint4)(a), exp_mask);
+  man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF));
+  return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask), 
+						spu_cmpeq(man, 0))), 0) != 0));
+}
+
+
+/* vec_any_ne (any elements not equal)
+ * ==========
+ */
+static inline int vec_any_ne(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
+}
+
+
+/* vec_any_nge (any elements not greater than or equal)
+ * ===========
+ */
+static inline int vec_any_nge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+/* vec_any_ngt (any elements not greater than)
+ * ===========
+ */
+static inline int vec_any_ngt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+
+/* vec_any_nle (any elements not less than or equal)
+ * ===========
+ */
+static inline int vec_any_nle(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+
+/* vec_any_nlt (any elements not less than)
+ * ===========
+ */
+static inline int vec_any_nlt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+
+/* vec_any_numeric (any elements numeric)
+ * ===============
+ */
+static inline int vec_any_numeric(vec_float4 a)
+{
+  vec_uint4 exp;
+
+  exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF);
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) != 0xF));
+}
+
+
+/* vec_any_out (any elements out of bounds)
+ * ===========
+ */
+static inline int vec_any_out(vec_float4 a, vec_float4 b)
+{
+  return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) != 0xF);
+}
+
+
+/* CBE Language Extension Intrinsics
+ */
+
+/* vec_extract (extract element from vector)
+ * ===========
+ */
+#define vec_extract(_a, _element)	spu_extract(_a, _element)
+
+
+/* vec_insert (insert scalar into specified vector element)
+ * ==========
+ */
+#define vec_insert(_a, _b, _element)	spu_insert(_a, _b, _element)
+
+/* vec_lvlx (load vector left indexed)
+ * ========
+ */
+static inline vec_uchar16 vec_lvlx(int a, unsigned char *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_uchar16 vec_lvlx(int a, vec_uchar16 *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_char16 vec_lvlx(int a, signed char *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_char16 vec_lvlx(int a, vec_char16 *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_ushort8 vec_lvlx(int a, unsigned short *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_ushort8 vec_lvlx(int a, vec_ushort8 *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_short8 vec_lvlx(int a, signed short *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_short8 vec_lvlx(int a, vec_short8 *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_uint4 vec_lvlx(int a, unsigned int *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_uint4 vec_lvlx(int a, vec_uint4 *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_int4 vec_lvlx(int a, signed int *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_int4 vec_lvlx(int a, vec_int4 *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_float4 vec_lvlx(int a, float *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_float4 vec_lvlx(int a, vec_float4 *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+
+/* vec_lvlxl (load vector left indexed last)
+ * =========
+ */
+#define vec_lvlxl(_a, _b)	vec_lvlx(_a, _b)
+
+
+/* vec_lvrx (load vector right indexed)
+ * ========
+ */
+static inline vec_uchar16 vec_lvrx(int a, unsigned char *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_uchar16 vec_lvrx(int a, vec_uchar16 *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_char16 vec_lvrx(int a, signed char *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_char16 vec_lvrx(int a, vec_char16 *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_ushort8 vec_lvrx(int a, unsigned short *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_ushort8 vec_lvrx(int a, vec_ushort8 *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_short8 vec_lvrx(int a, signed short *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_short8 vec_lvrx(int a, vec_short8 *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_uint4 vec_lvrx(int a, unsigned int *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_uint4 vec_lvrx(int a, vec_uint4 *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_int4 vec_lvrx(int a, signed int *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_int4 vec_lvrx(int a, vec_int4 *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_float4 vec_lvrx(int a, float *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_float4 vec_lvrx(int a, vec_float4 *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+
+
+/* vec_lvrxl (load vector right indexed last)
+ * =========
+ */
+#define vec_lvrxl(_a, _b)	vec_lvrx(_a, _b)
+
+
+/* vec_promote (promote scalar to a vector)
+ * ===========
+ */
+#define vec_promote(_a, _element)	spu_promote(_a, _element)
+
+
+/* vec_splats (splat scalar to a vector)
+ * ==========
+ */
+#define vec_splats(_a)	spu_splats(_a)
+
+
+/* vec_stvlx (store vector left indexed)
+ * =========
+ */
+static inline void vec_stvlx(vec_uchar16 a, int b, unsigned char *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_uchar16 a, int b, vec_uchar16 *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_char16 a, int b, signed char *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_char16 a, int b, vec_char16 *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_ushort8 a, int b, unsigned short *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_ushort8 a, int b, vec_ushort8 *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_short8 a, int b, signed short *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_short8 a, int b, vec_short8 *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_uint4 a, int b, unsigned int *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_uint4 a, int b, vec_uint4 *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_int4 a, int b, signed int *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_int4 a, int b, vec_int4 *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_float4 a, int b, float *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_float4 a, int b, vec_float4 *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+/* vec_stvlxl (store vector left indexed last)
+ * ==========
+ */
+#define vec_stvlxl(_a, _b, _c)	vec_stvlx(_a, _b, _c)
+
+
+/* vec_stvrx (store vector right indexed)
+ * =========
+ */
+static inline void vec_stvrx(vec_uchar16 a, int b, unsigned char *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_uchar16 a, int b, vec_uchar16 *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_char16 a, int b, signed char *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_char16 a, int b, vec_char16 *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_ushort8 a, int b, unsigned short *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_ushort8 a, int b, vec_ushort8 *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_short8 a, int b, signed short *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_short8 a, int b, vec_short8 *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_uint4 a, int b, unsigned int *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_uint4 a, int b, vec_uint4 *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_int4 a, int b, signed int *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_int4 a, int b, vec_int4 *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_float4 a, int b, float *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_float4 a, int b, vec_float4 *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+/* vec_stvrxl (store vector right indexed last)
+ * ==========
+ */
+#define vec_stvrxl(_a, _b, _c)	vec_stvrx(_a, _b, _c)
+
+
+#endif /* __SPU__ */
+#endif /* __cplusplus */
+#endif /* !_VMX2SPU_H_ */
diff --git a/gcc-4.9/gcc/config/stormy16/constraints.md b/gcc-4.9/gcc/config/stormy16/constraints.md
new file mode 100644
index 000000000..6eafe53d2
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/constraints.md
@@ -0,0 +1,119 @@
+;; Constraint definitions for XSTORMY16.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "R0_REGS"
+  "@internal")
+
+(define_register_constraint "b" "R1_REGS"
+  "@internal")
+
+(define_register_constraint "c" "R2_REGS"
+  "@internal")
+
+(define_register_constraint "d" "R8_REGS"
+  "@internal")
+
+(define_register_constraint "e" "EIGHT_REGS"
+  "@internal")
+
+(define_register_constraint "t" "TWO_REGS"
+  "@internal")
+
+(define_register_constraint "z" "ICALL_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "An integer between 0 and 3."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "J"
+  "A power of two."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (ival) != -1")))
+
+(define_constraint "K"
+  "A power of two when inverted."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (~ival) != -1")))
+
+(define_constraint "L"
+  "An 8-bit unsigned integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "M"
+  "An integer between -255 and 0."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -255, 0)")))
+
+(define_constraint "N"
+  "An integer between -3 and 0."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -3, 0)")))
+
+(define_constraint "O"
+  "An integer between 1 and 4."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 4)")))
+
+(define_constraint "P"
+  "An integer between -4 and -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -4, -1)")))
+
+;; Extra constraints.
+(define_constraint "Q"
+  "A register push operation."
+  (and (match_code "mem")
+       (match_code "post_inc" "0")
+       (match_test "XEXP (XEXP (op, 0), 0) == stack_pointer_rtx")))
+
+(define_constraint "R"
+  "A register pop operation."
+  (and (match_code "mem")
+       (match_code "pre_dec" "0")
+       (match_test "XEXP (XEXP (op, 0), 0) == stack_pointer_rtx")))
+
+(define_constraint "S"
+  "An immediate memory address."
+  (and (match_code "mem")
+       (match_code "const_int" "0")
+       (match_test "xstormy16_legitimate_address_p (VOIDmode, XEXP (op, 0), false)")))
+
+(define_constraint "T"
+  "@internal"
+  ;; For Rx; not implemented yet.
+  (match_test "0"))
+
+(define_constraint "U"
+  "An integer not between 2 and 15."
+  (and (match_code "const_int")
+       (match_test "!IN_RANGE (ival, 2, 15)")))
+
+(define_constraint "W"
+  "@internal"
+  (match_operand 0 "xstormy16_below100_operand"))
+
+(define_constraint "Z"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
\ No newline at end of file
diff --git a/gcc-4.9/gcc/config/stormy16/predicates.md b/gcc-4.9/gcc/config/stormy16/predicates.md
new file mode 100644
index 000000000..fc4797390
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/predicates.md
@@ -0,0 +1,178 @@
+;; Predicate definitions for XSTORMY16.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is a shift operator.
+
+(define_predicate "shift_operator"
+  (match_code "ashift,ashiftrt,lshiftrt")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == ASHIFT
+	  || code == ASHIFTRT
+	  || code == LSHIFTRT);
+})
+
+;; Return 1 if this is an EQ or NE operator.
+
+(define_predicate "equality_operator"
+  (match_code "eq,ne")
+{
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+	  && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
+})
+
+;; Return 1 if this is a comparison operator but not an EQ or NE
+;; operator.
+
+(define_predicate "inequality_operator"
+  (match_code "ge,gt,le,lt,geu,gtu,leu,ltu")
+{
+  return comparison_operator (op, mode) && ! equality_operator (op, mode);
+})
+
+;; Return 1 if this is a LT, GE, LTU, or GEU operator.
+
+(define_predicate "xstormy16_ineqsi_operator"
+  (match_code "lt,ge,ltu,geu")
+{
+  enum rtx_code code = GET_CODE (op);
+  
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+	  && (code == LT || code == GE || code == LTU || code == GEU));
+})
+
+;; Predicate for MEMs that can use special 8-bit addressing.
+
+(define_predicate "xstormy16_below100_operand"
+  (match_code "mem")
+{
+  if (GET_MODE (op) != mode)
+    return 0;
+  if (GET_CODE (op) == MEM)
+    op = XEXP (op, 0);
+  else if (GET_CODE (op) == SUBREG
+	   && GET_CODE (XEXP (op, 0)) == MEM
+	   && !MEM_VOLATILE_P (XEXP (op, 0)))
+    op = XEXP (XEXP (op, 0), 0);
+  else
+    return 0;
+  if (GET_CODE (op) == CONST_INT)
+    {
+      HOST_WIDE_INT i = INTVAL (op);
+      return (i >= 0x7f00 && i < 0x7fff);
+    }
+  return xstormy16_below100_symbol (op, HImode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "xstormy16_below100_or_register"
+  (match_code "mem,reg,subreg")
+{
+  return (xstormy16_below100_operand (op, mode)
+	  || register_operand (op, mode));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "xstormy16_splittable_below100_or_register"
+  (match_code "mem,reg,subreg")
+{
+  if (GET_CODE (op) == MEM && MEM_VOLATILE_P (op))
+    return 0;
+  return (xstormy16_below100_operand (op, mode)
+	  || register_operand (op, mode));
+})
+
+;; Predicate for constants with exactly one bit not set.
+
+(define_predicate "xstormy16_onebit_clr_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  i = ~ INTVAL (op);
+  if (mode == QImode)
+    i &= 0xff;
+  if (mode == HImode)
+    i &= 0xffff;
+  return exact_log2 (i) != -1;
+})
+
+;; Predicate for constants with exactly one bit set.
+
+(define_predicate "xstormy16_onebit_set_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  i = INTVAL (op);
+  if (mode == QImode)
+    i &= 0xff;
+  if (mode == HImode)
+    i &= 0xffff;
+  return exact_log2 (i) != -1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "nonimmediate_nonstack_operand"
+  (match_code "reg,mem,subreg")
+{
+  /* 'Q' is for pushes, 'R' for pops.  */
+  return (nonimmediate_operand (op, mode) 
+	  && ! satisfies_constraint_Q (op)
+	  && ! satisfies_constraint_R (op));
+})
+
+(define_predicate "xstormy16_carry_plus_operand"
+  (match_code "plus")
+{
+  return (GET_CODE (XEXP (op, 1)) == CONST_INT
+	  && (INTVAL (XEXP (op, 1)) < -4 || INTVAL (XEXP (op, 1)) > 4));
+})
+
+(define_predicate "xs_hi_general_operand"
+  (match_code "const_int,reg,subreg,mem,symbol_ref,label_ref,const")
+{
+  if ((GET_CODE (op) == CONST_INT)
+       && ((INTVAL (op) >= 32768) || (INTVAL (op) < -32768)))
+    {
+      error ("constant halfword load operand out of range");
+      return false;
+    }
+    
+  return general_operand (op, mode);
+})
+
+(define_predicate "xs_hi_nonmemory_operand"
+  (match_code "const_int,reg,subreg,const")
+{
+  if ((GET_CODE (op) == CONST_INT) 
+       && ((INTVAL (op) >= 32768) || (INTVAL (op) < -32768)))
+    {
+      error ("constant arithmetic operand out of range");
+      return false;
+    }
+
+  return nonmemory_operand (op, mode);
+})
diff --git a/gcc-4.9/gcc/config/stormy16/stormy-abi b/gcc-4.9/gcc/config/stormy16/stormy-abi
new file mode 100644
index 000000000..a9ceb3197
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/stormy-abi
@@ -0,0 +1,174 @@
+xStormy16 ABI
+************
+
+!!!!! NOTE !!!!!
+This document is a draft and is subject to change.
+!!!!! NOTE !!!!!
+
+This part of the file describes the conventions required to write
+ELF object files that are link-compatible with the ones produced
+by the GNU toolchains.
+
+Bit and Byte Ordering
+=====================
+
+This implementation is little-endian.   Bits are numbered starting 
+from 0 being the LSB.
+
+In this document, 'word' means 16 bits.
+
+Calling Sequence
+================
+
+The registers are allocated as follows:
+
+Register	Purpose
+-------------------------------------------------------------------
+r0, r1          Call-volatile.  May be changed during the execution
+                of a call instruction.
+r2 through r7   Argument passing;  call-clobbered.
+r8, r9		Call-volatile.  May be changed during the execution
+		of a call instruction.
+r10 through r13	Call-saved.
+r14		Program status word.
+r15		Stack pointer.
+
+
+Scalar values are returned in register r2-r7 if the value fits.
+Otherwise, a pointer is passed as a 'hidden' first argument and
+the return value is placed there.
+
+Arguments are passed in registers starting in r2, then on the stack.
+Arguments of size not a multiple of a word are padded to whole words.
+If an argument would otherwise be passed partially in registers, and
+partially on the stack, the whole of it is passed on the stack.  The
+last argument is pushed on the stack first.
+
+After a procedure's arguments are pushed on the stack,
+the return address is pushed on the stack, as if by the call
+instruction.  The return address is on the top of the stack when
+a procedure is called.
+
+Objects whose size is a multiple of 16 bits are aligned to a 16-bit
+boundary.
+
+Pointers are 16 bits, referencing addresses between 0 and 0xFFFF.
+
+Procedure pointers are also implemented as 16-bit pointers.
+
+Variable Argument Functions
+===========================
+
+The C type 'va_list' is implemented as a structure, as follows:
+
+struct {
+  char *base;
+  unsigned count;
+}
+
+Both fields are 16 bits.  An argument of size N bytes
+(N will be even) is accessed as if by the following code:
+
+char *result;
+/* count = #bytes non-variable arguments */
+/* 12 = #bytes for register arguments */
+if (count + N > 12)
+  {
+    if (count < 12)
+      count = 12;
+    result = base - (count + N - 12 + 4);
+  }
+else
+  {
+    result = base + count;
+  }
+count += N;
+/* The argument is at `*result'.  */
+
+
+One implementation of this is if a variadic function first
+pushes registers 2 through 7 in sequence at entry, and
+sets 'base' to the address of the first word pushed, 
+producing a stack that appears like:
+
+SP ->
+	[other data]
+	r7
+	r6
+	r5
+	r4
+	r3
+count->	r2
+	Return address (two words)
+	7th procedure parameter word
+	8th procedure parameter word
+	...
+	last procedure parameter word
+
+and initializes 'count' to be the number of bytes of non-variable
+arguments to the function.
+
+ELF File Format
+===============
+
+ELF file header
+---------------
+
+xStormy16 ELF files are distinguished by the value EM_XSTORMY16 in
+the e_machine field of the ELF file header:
+
+#define EM_XSTORMY16	        0xad45
+
+DWARF Register Number Mapping
+-----------------------------
+
+Registers r0 through r15 are mapped to numbers 0 through 15.
+
+Relocations
+-----------
+
+RELA relocs are used exclusively.  The relocation types defined are:
+
+Name			Value	Field	Calculation	Overflow
+----------------------------------------------------------------
+R_XSTORMY16_NONE           0     none      none           none
+R_XSTORMY16_32             1      32       S + A          none
+R_XSTORMY16_16             2      16       S + A          either
+R_XSTORMY16_8              3       8       S + A          unsigned
+R_XSTORMY16_PC32           4      32       S + A - P      none
+R_XSTORMY16_PC16           5      16       S + A - P      signed
+R_XSTORMY16_PC8            6       8       S + A - P      signed
+R_XSTORMY16_REL_12         7      16:12:0  S + A - P      signed
+R_XSTORMY16_24             8      32:23:1 (S + A) >> 1    unsigned
+R_XSTORMY16_FPTR16         9      16       S + A          either
+R_XSTORMY16_LO16           10     16       S + A          none
+R_XSTORMY16_HI16           11     32:16:16 S + A          none
+R_XSTORMY16_12             12     16:12:0  S + A          signed
+R_XSTORMY16_GNU_VTINHERIT  128    n/a      n/a            n/a
+R_XSTORMY16_GNU_VTENTRY    129    n/a      n/a            n/a
+
+In the 'Field' column, the first number indicates whether the
+relocation refers to a byte, word or doubleword.  The second number,
+if any, indicates the size of the bit-field into which the relocation
+is to occur (and also the size for overflow checking).  The third
+number indicates the first bit of the bit-field in the word or
+doubleword, counting the LSB as bit 0.
+
+In the 'Calculation' column, 'S' is the value of the symbol to which
+the reloc refers, 'A' is the addend, and 'P' represents the place of
+the storage unit being relocated.
+
+In the 'Overflow' column, 'none' means that any overflow of the
+computation performed in the 'Calculation' column is ignored.
+'signed' means that the overflow is only reported if it happens when
+the values are treated as signed quantities.  'unsigned' is the same,
+except that the values are treated as unsigned quantities.  'either'
+means that overflow is reported for either signed or unsigned
+overflow.
+
+
+Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc-4.9/gcc/config/stormy16/stormy16-protos.h b/gcc-4.9/gcc/config/stormy16/stormy16-protos.h
new file mode 100644
index 000000000..2085eb45f
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/stormy16-protos.h
@@ -0,0 +1,69 @@
+/* Prototypes for exported functions defined in xstormy16.c
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+
+extern struct xstormy16_stack_layout xstormy16_compute_stack_layout (void);
+extern void xstormy16_expand_prologue (void);
+extern void xstormy16_expand_epilogue (void);
+extern int xstormy16_initial_elimination_offset (int, int);
+extern int direct_return (void);
+extern int xstormy16_interrupt_function_p (void);
+extern int xstormy16_epilogue_uses (int);
+extern void xstormy16_function_profiler (void);
+
+#if defined (TREE_CODE)
+extern void xstormy16_asm_output_aligned_common (FILE *, tree, const char *,
+						 int, int, int);
+#endif
+
+#if defined (TREE_CODE) && defined (RTX_CODE)
+extern void xstormy16_initialize_trampoline (rtx, rtx, rtx);
+#endif
+
+#ifdef RTX_CODE
+extern void xstormy16_emit_cbranch (enum rtx_code, rtx, rtx, rtx);
+extern char *xstormy16_output_cbranch_hi (rtx, const char *, int, rtx);
+extern char *xstormy16_output_cbranch_si (rtx, const char *, int, rtx);
+
+extern void xstormy16_expand_casesi (rtx, rtx, rtx, rtx, rtx);
+extern void xstormy16_output_addr_vec (FILE *, rtx, rtx);
+extern void xstormy16_expand_call (rtx, rtx, rtx);
+extern void xstormy16_expand_iorqi3 (rtx *);
+extern void xstormy16_expand_andqi3 (rtx *);
+#endif
+
+#if defined (HAVE_MACHINE_MODES) && defined (RTX_CODE)
+extern void xstormy16_split_cbranch (enum machine_mode, rtx, rtx, rtx);
+extern int  short_memory_operand (rtx, enum machine_mode);
+extern int  nonimmediate_nonstack_operand (rtx, enum machine_mode);
+extern enum reg_class xstormy16_secondary_reload_class 
+ (enum reg_class, enum machine_mode, rtx);
+extern void xstormy16_split_move (enum machine_mode, rtx, rtx);
+extern void xstormy16_expand_move (enum machine_mode, rtx, rtx);
+extern void xstormy16_expand_arith (enum machine_mode, enum rtx_code, 
+				    rtx, rtx, rtx);
+extern const char * xstormy16_output_shift (enum machine_mode, enum rtx_code, 
+					    rtx, rtx, rtx);
+extern int  xstormy16_below100_symbol (rtx, enum machine_mode);
+extern int  xstormy16_splittable_below100_operand (rtx, enum machine_mode);
+extern bool xstormy16_legitimate_address_p (enum machine_mode, rtx, bool);
+#endif
+
diff --git a/gcc-4.9/gcc/config/stormy16/stormy16.c b/gcc-4.9/gcc/config/stormy16/stormy16.c
new file mode 100644
index 000000000..6a64fccbe
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/stormy16.c
@@ -0,0 +1,2703 @@
+/* Xstormy16 target functions.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "df.h"
+#include "reload.h"
+
+static rtx emit_addhi3_postreload (rtx, rtx, rtx);
+static void xstormy16_asm_out_constructor (rtx, int);
+static void xstormy16_asm_out_destructor (rtx, int);
+static void xstormy16_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+					   HOST_WIDE_INT, tree);
+
+static void xstormy16_init_builtins (void);
+static rtx xstormy16_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static bool xstormy16_rtx_costs (rtx, int, int, int, int *, bool);
+static int xstormy16_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static bool xstormy16_return_in_memory (const_tree, const_tree);
+
+static GTY(()) section *bss100_section;
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		     int opno ATTRIBUTE_UNUSED, int *total,
+		     bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) < 16 && INTVAL (x) >= 0)
+        *total = COSTS_N_INSNS (1) / 2;
+      else if (INTVAL (x) < 256 && INTVAL (x) >= 0)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case MULT:
+      *total = COSTS_N_INSNS (35 + 6);
+      return true;
+    case DIV:
+      *total = COSTS_N_INSNS (51 - 6);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+static int
+xstormy16_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+			addr_space_t as ATTRIBUTE_UNUSED,
+			bool speed ATTRIBUTE_UNUSED)
+{
+  return (CONST_INT_P (x) ? 2
+	  : GET_CODE (x) == PLUS ? 7
+	  : 5);
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+xstormy16_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+			    bool in)
+{
+  return (5 + memory_move_secondary_cost (mode, rclass, in));
+}
+
+/* Branches are handled as follows:
+
+   1. HImode compare-and-branches.  The machine supports these
+      natively, so the appropriate pattern is emitted directly.
+
+   2. SImode EQ and NE.  These are emitted as pairs of HImode
+      compare-and-branches.
+
+   3. SImode LT, GE, LTU and GEU.  These are emitted as a sequence
+      of a SImode subtract followed by a branch (not a compare-and-branch),
+      like this:
+      sub
+      sbc
+      blt
+
+   4. SImode GT, LE, GTU, LEU.  These are emitted as a sequence like:
+      sub
+      sbc
+      blt
+      or
+      bne.  */
+
+/* Emit a branch of kind CODE to location LOC.  */
+
+void
+xstormy16_emit_cbranch (enum rtx_code code, rtx op0, rtx op1, rtx loc)
+{
+  rtx condition_rtx, loc_ref, branch, cy_clobber;
+  rtvec vec;
+  enum machine_mode mode;
+
+  mode = GET_MODE (op0);
+  gcc_assert (mode == HImode || mode == SImode);
+
+  if (mode == SImode
+      && (code == GT || code == LE || code == GTU || code == LEU))
+    {
+      int unsigned_p = (code == GTU || code == LEU);
+      int gt_p = (code == GT || code == GTU);
+      rtx lab = NULL_RTX;
+
+      if (gt_p)
+	lab = gen_label_rtx ();
+      xstormy16_emit_cbranch (unsigned_p ? LTU : LT, op0, op1, gt_p ? lab : loc);
+      /* This should be generated as a comparison against the temporary
+	 created by the previous insn, but reload can't handle that.  */
+      xstormy16_emit_cbranch (gt_p ? NE : EQ, op0, op1, loc);
+      if (gt_p)
+	emit_label (lab);
+      return;
+    }
+  else if (mode == SImode
+	   && (code == NE || code == EQ)
+	   && op1 != const0_rtx)
+    {
+      rtx op0_word, op1_word;
+      rtx lab = NULL_RTX;
+      int num_words = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+      int i;
+
+      if (code == EQ)
+	lab = gen_label_rtx ();
+
+      for (i = 0; i < num_words - 1; i++)
+	{
+	  op0_word = simplify_gen_subreg (word_mode, op0, mode,
+					  i * UNITS_PER_WORD);
+	  op1_word = simplify_gen_subreg (word_mode, op1, mode,
+					  i * UNITS_PER_WORD);
+	  xstormy16_emit_cbranch (NE, op0_word, op1_word, code == EQ ? lab : loc);
+	}
+      op0_word = simplify_gen_subreg (word_mode, op0, mode,
+				      i * UNITS_PER_WORD);
+      op1_word = simplify_gen_subreg (word_mode, op1, mode,
+				      i * UNITS_PER_WORD);
+      xstormy16_emit_cbranch (code, op0_word, op1_word, loc);
+
+      if (code == EQ)
+	emit_label (lab);
+      return;
+    }
+
+  /* We can't allow reload to try to generate any reload after a branch,
+     so when some register must match we must make the temporary ourselves.  */
+  if (mode != HImode)
+    {
+      rtx tmp;
+      tmp = gen_reg_rtx (mode);
+      emit_move_insn (tmp, op0);
+      op0 = tmp;
+    }
+
+  condition_rtx = gen_rtx_fmt_ee (code, mode, op0, op1);
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, loc);
+  branch = gen_rtx_SET (VOIDmode, pc_rtx,
+			gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
+					      loc_ref, pc_rtx));
+
+  cy_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+
+  if (mode == HImode)
+    vec = gen_rtvec (2, branch, cy_clobber);
+  else if (code == NE || code == EQ)
+    vec = gen_rtvec (2, branch, gen_rtx_CLOBBER (VOIDmode, op0));
+  else
+    {
+      rtx sub;
+#if 0
+      sub = gen_rtx_SET (VOIDmode, op0, gen_rtx_MINUS (SImode, op0, op1));
+#else
+      sub = gen_rtx_CLOBBER (SImode, op0);
+#endif
+      vec = gen_rtvec (3, branch, sub, cy_clobber);
+    }
+
+  emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+}
+
+/* Take a SImode conditional branch, one of GT/LE/GTU/LEU, and split
+   the arithmetic operation.  Most of the work is done by
+   xstormy16_expand_arith.  */
+
+void
+xstormy16_split_cbranch (enum machine_mode mode, rtx label, rtx comparison,
+			 rtx dest)
+{
+  rtx op0 = XEXP (comparison, 0);
+  rtx op1 = XEXP (comparison, 1);
+  rtx seq, last_insn;
+  rtx compare;
+
+  start_sequence ();
+  xstormy16_expand_arith (mode, COMPARE, dest, op0, op1);
+  seq = get_insns ();
+  end_sequence ();
+
+  gcc_assert (INSN_P (seq));
+
+  last_insn = seq;
+  while (NEXT_INSN (last_insn) != NULL_RTX)
+    last_insn = NEXT_INSN (last_insn);
+
+  compare = SET_SRC (XVECEXP (PATTERN (last_insn), 0, 0));
+  PUT_CODE (XEXP (compare, 0), GET_CODE (comparison));
+  XEXP (compare, 1) = gen_rtx_LABEL_REF (VOIDmode, label);
+  emit_insn (seq);
+}
+
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label.
+
+   OP is the conditional expression, or NULL for branch-always.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   INSN is the insn.  */
+
+char *
+xstormy16_output_cbranch_hi (rtx op, const char *label, int reversed, rtx insn)
+{
+  static char string[64];
+  int need_longbranch = (op != NULL_RTX
+			 ? get_attr_length (insn) == 8
+			 : get_attr_length (insn) == 4);
+  int really_reversed = reversed ^ need_longbranch;
+  const char *ccode;
+  const char *templ;
+  const char *operands;
+  enum rtx_code code;
+
+  if (! op)
+    {
+      if (need_longbranch)
+	ccode = "jmpf";
+      else
+	ccode = "br";
+      sprintf (string, "%s %s", ccode, label);
+      return string;
+    }
+
+  code = GET_CODE (op);
+
+  if (! REG_P (XEXP (op, 0)))
+    {
+      code = swap_condition (code);
+      operands = "%3,%2";
+    }
+  else
+      operands = "%2,%3";
+
+  /* Work out which way this really branches.  */
+  if (really_reversed)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case EQ:   ccode = "z";   break;
+    case NE:   ccode = "nz";  break;
+    case GE:   ccode = "ge";  break;
+    case LT:   ccode = "lt";  break;
+    case GT:   ccode = "gt";  break;
+    case LE:   ccode = "le";  break;
+    case GEU:  ccode = "nc";  break;
+    case LTU:  ccode = "c";   break;
+    case GTU:  ccode = "hi";  break;
+    case LEU:  ccode = "ls";  break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (need_longbranch)
+    templ = "b%s %s,.+8 | jmpf %s";
+  else
+    templ = "b%s %s,%s";
+  sprintf (string, templ, ccode, operands, label);
+
+  return string;
+}
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label, but suitable for the tail of a
+   SImode branch.
+
+   OP is the conditional expression (OP is never NULL_RTX).
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   INSN is the insn.  */
+
+char *
+xstormy16_output_cbranch_si (rtx op, const char *label, int reversed, rtx insn)
+{
+  static char string[64];
+  int need_longbranch = get_attr_length (insn) >= 8;
+  int really_reversed = reversed ^ need_longbranch;
+  const char *ccode;
+  const char *templ;
+  char prevop[16];
+  enum rtx_code code;
+
+  code = GET_CODE (op);
+
+  /* Work out which way this really branches.  */
+  if (really_reversed)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case EQ:   ccode = "z";   break;
+    case NE:   ccode = "nz";  break;
+    case GE:   ccode = "ge";  break;
+    case LT:   ccode = "lt";  break;
+    case GEU:  ccode = "nc";  break;
+    case LTU:  ccode = "c";   break;
+
+      /* The missing codes above should never be generated.  */
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case EQ: case NE:
+      {
+	int regnum;
+
+	gcc_assert (REG_P (XEXP (op, 0)));
+
+	regnum = REGNO (XEXP (op, 0));
+	sprintf (prevop, "or %s,%s", reg_names[regnum], reg_names[regnum+1]);
+      }
+      break;
+
+    case GE: case LT: case GEU: case LTU:
+      strcpy (prevop, "sbc %2,%3");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (need_longbranch)
+    templ = "%s | b%s .+6 | jmpf %s";
+  else
+    templ = "%s | b%s %s";
+  sprintf (string, templ, prevop, ccode, label);
+
+  return string;
+}
+
+/* Many machines have some registers that cannot be copied directly to or from
+   memory or even from other types of registers.  An example is the `MQ'
+   register, which on most machines, can only be copied to or from general
+   registers, but not memory.  Some machines allow copying all registers to and
+   from memory, but require a scratch register for stores to some memory
+   locations (e.g., those with symbolic address on the RT, and those with
+   certain symbolic address on the SPARC when compiling PIC).  In some cases,
+   both an intermediate and a scratch register are required.
+
+   You should define these macros to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   RCLASS in MODE requires an intermediate register, you should define
+   `SECONDARY_INPUT_RELOAD_CLASS' to return the largest register class all of
+   whose registers can be used as intermediate registers or scratch registers.
+
+   If copying a register RCLASS in MODE to X requires an intermediate or scratch
+   register, `SECONDARY_OUTPUT_RELOAD_CLASS' should be defined to return the
+   largest register class required.  If the requirements for input and output
+   reloads are the same, the macro `SECONDARY_RELOAD_CLASS' should be used
+   instead of defining both macros identically.
+
+   The values returned by these macros are often `GENERAL_REGS'.  Return
+   `NO_REGS' if no spare register is needed; i.e., if X can be directly copied
+   to or from a register of RCLASS in MODE without requiring a scratch register.
+   Do not define this macro if it would always return `NO_REGS'.
+
+   If a scratch register is required (either with or without an intermediate
+   register), you should define patterns for `reload_inM' or `reload_outM', as
+   required..  These patterns, which will normally be implemented with a
+   `define_expand', should be similar to the `movM' patterns, except that
+   operand 2 is the scratch register.
+
+   Define constraints for the reload register and scratch register that contain
+   a single register class.  If the original reload register (whose class is
+   RCLASS) can meet the constraint given in the pattern, the value returned by
+   these macros is used for the class of the scratch register.  Otherwise, two
+   additional reload registers are required.  Their classes are obtained from
+   the constraints in the insn pattern.
+
+   X might be a pseudo-register or a `subreg' of a pseudo-register, which could
+   either be in a hard register or in memory.  Use `true_regnum' to find out;
+   it will return -1 if the pseudo is in memory and the hard register number if
+   it is in a register.
+
+   These macros should not be used in the case where a particular class of
+   registers can only be copied to memory and not to another class of
+   registers.  In that case, secondary reload registers are not needed and
+   would not be helpful.  Instead, a stack location must be used to perform the
+   copy and the `movM' pattern should use memory as an intermediate storage.
+   This case often occurs between floating-point and general registers.  */
+
+enum reg_class
+xstormy16_secondary_reload_class (enum reg_class rclass,
+				  enum machine_mode mode ATTRIBUTE_UNUSED,
+				  rtx x)
+{
+  /* This chip has the interesting property that only the first eight
+     registers can be moved to/from memory.  */
+  if ((MEM_P (x)
+       || ((GET_CODE (x) == SUBREG || REG_P (x))
+	   && (true_regnum (x) == -1
+	       || true_regnum (x) >= FIRST_PSEUDO_REGISTER)))
+      && ! reg_class_subset_p (rclass, EIGHT_REGS))
+    return EIGHT_REGS;
+
+  return NO_REGS;
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS
+   and TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+xstormy16_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == GENERAL_REGS && MEM_P (x))
+    return EIGHT_REGS;
+
+  return rclass;
+}
+
+/* Predicate for symbols and addresses that reflect special 8-bit
+   addressing.  */
+
+int
+xstormy16_below100_symbol (rtx x,
+			   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_XSTORMY16_BELOW100) != 0;
+
+  if (CONST_INT_P (x))
+    {
+      HOST_WIDE_INT i = INTVAL (x);
+
+      if ((i >= 0x0000 && i <= 0x00ff)
+	  || (i >= 0x7f00 && i <= 0x7fff))
+	return 1;
+    }
+  return 0;
+}
+
+/* Likewise, but only for non-volatile MEMs, for patterns where the
+   MEM will get split into smaller sized accesses.  */
+
+int
+xstormy16_splittable_below100_operand (rtx x, enum machine_mode mode)
+{
+  if (MEM_P (x) && MEM_VOLATILE_P (x))
+    return 0;
+  return xstormy16_below100_operand (x, mode);
+}
+
+/* Expand an 8-bit IOR.  This either detects the one case we can
+   actually do, or uses a 16-bit IOR.  */
+
+void
+xstormy16_expand_iorqi3 (rtx *operands)
+{
+  rtx in, out, outsub, val;
+
+  out = operands[0];
+  in = operands[1];
+  val = operands[2];
+
+  if (xstormy16_onebit_set_operand (val, QImode))
+    {
+      if (!xstormy16_below100_or_register (in, QImode))
+	in = copy_to_mode_reg (QImode, in);
+      if (!xstormy16_below100_or_register (out, QImode))
+	out = gen_reg_rtx (QImode);
+      emit_insn (gen_iorqi3_internal (out, in, val));
+      if (out != operands[0])
+	emit_move_insn (operands[0], out);
+      return;
+    }
+
+  if (! REG_P (in))
+    in = copy_to_mode_reg (QImode, in);
+
+  if (! REG_P (val) && ! CONST_INT_P (val))
+    val = copy_to_mode_reg (QImode, val);
+
+  if (! REG_P (out))
+    out = gen_reg_rtx (QImode);
+
+  in = simplify_gen_subreg (HImode, in, QImode, 0);
+  outsub = simplify_gen_subreg (HImode, out, QImode, 0);
+
+  if (! CONST_INT_P (val))
+    val = simplify_gen_subreg (HImode, val, QImode, 0);
+
+  emit_insn (gen_iorhi3 (outsub, in, val));
+
+  if (out != operands[0])
+    emit_move_insn (operands[0], out);
+}
+
+/* Expand an 8-bit AND.  This either detects the one case we can
+   actually do, or uses a 16-bit AND.  */
+
+void
+xstormy16_expand_andqi3 (rtx *operands)
+{
+  rtx in, out, outsub, val;
+
+  out = operands[0];
+  in = operands[1];
+  val = operands[2];
+
+  if (xstormy16_onebit_clr_operand (val, QImode))
+    {
+      if (!xstormy16_below100_or_register (in, QImode))
+	in = copy_to_mode_reg (QImode, in);
+      if (!xstormy16_below100_or_register (out, QImode))
+	out = gen_reg_rtx (QImode);
+      emit_insn (gen_andqi3_internal (out, in, val));
+      if (out != operands[0])
+	emit_move_insn (operands[0], out);
+      return;
+    }
+
+  if (! REG_P (in))
+    in = copy_to_mode_reg (QImode, in);
+
+  if (! REG_P (val) && ! CONST_INT_P (val))
+    val = copy_to_mode_reg (QImode, val);
+
+  if (! REG_P (out))
+    out = gen_reg_rtx (QImode);
+
+  in = simplify_gen_subreg (HImode, in, QImode, 0);
+  outsub = simplify_gen_subreg (HImode, out, QImode, 0);
+
+  if (! CONST_INT_P (val))
+    val = simplify_gen_subreg (HImode, val, QImode, 0);
+
+  emit_insn (gen_andhi3 (outsub, in, val));
+
+  if (out != operands[0])
+    emit_move_insn (operands[0], out);
+}
+
+#define LEGITIMATE_ADDRESS_INTEGER_P(X, OFFSET)				\
+  (CONST_INT_P (X)							\
+  && (unsigned HOST_WIDE_INT) (INTVAL (X) + (OFFSET) + 2048) < 4096)
+
+#define LEGITIMATE_ADDRESS_CONST_INT_P(X, OFFSET)			 \
+ (CONST_INT_P (X)							 \
+  && INTVAL (X) + (OFFSET) >= 0						 \
+  && INTVAL (X) + (OFFSET) < 0x8000					 \
+  && (INTVAL (X) + (OFFSET) < 0x100 || INTVAL (X) + (OFFSET) >= 0x7F00))
+
+bool
+xstormy16_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				rtx x, bool strict)
+{
+  if (LEGITIMATE_ADDRESS_CONST_INT_P (x, 0))
+    return true;
+
+  if (GET_CODE (x) == PLUS
+      && LEGITIMATE_ADDRESS_INTEGER_P (XEXP (x, 1), 0))
+    {
+      x = XEXP (x, 0);
+      /* PR 31232: Do not allow INT+INT as an address.  */
+      if (CONST_INT_P (x))
+	return false;
+    }
+
+  if ((GET_CODE (x) == PRE_MODIFY && CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+      || GET_CODE (x) == POST_INC
+      || GET_CODE (x) == PRE_DEC)
+    x = XEXP (x, 0);
+
+  if (REG_P (x)
+      && REGNO_OK_FOR_BASE_P (REGNO (x))
+      && (! strict || REGNO (x) < FIRST_PSEUDO_REGISTER))
+    return true;
+
+  if (xstormy16_below100_symbol (x, mode))
+    return true;
+
+  return false;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+   On this chip, this is true if the address is valid with an offset
+   of 0 but not of 6, because in that case it cannot be used as an
+   address for DImode or DFmode, or if the address is a post-increment
+   or pre-decrement address.  */
+
+static bool
+xstormy16_mode_dependent_address_p (const_rtx x,
+				    addr_space_t as ATTRIBUTE_UNUSED)
+{
+  if (LEGITIMATE_ADDRESS_CONST_INT_P (x, 0)
+      && ! LEGITIMATE_ADDRESS_CONST_INT_P (x, 6))
+    return true;
+
+  if (GET_CODE (x) == PLUS
+      && LEGITIMATE_ADDRESS_INTEGER_P (XEXP (x, 1), 0)
+      && ! LEGITIMATE_ADDRESS_INTEGER_P (XEXP (x, 1), 6))
+    return true;
+
+  /* Auto-increment addresses are now treated generically in recog.c.  */
+  return false;
+}
+
+int
+short_memory_operand (rtx x, enum machine_mode mode)
+{
+  if (! memory_operand (x, mode))
+    return 0;
+  return (GET_CODE (XEXP (x, 0)) != PLUS);
+}
+
+/* Splitter for the 'move' patterns, for modes not directly implemented
+   by hardware.  Emit insns to copy a value of mode MODE from SRC to
+   DEST.
+
+   This function is only called when reload_completed.  */
+
+void
+xstormy16_split_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  int num_words = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+  int direction, end, i;
+  int src_modifies = 0;
+  int dest_modifies = 0;
+  int src_volatile = 0;
+  int dest_volatile = 0;
+  rtx mem_operand;
+  rtx auto_inc_reg_rtx = NULL_RTX;
+
+  /* Check initial conditions.  */
+  gcc_assert (reload_completed
+	      && mode != QImode && mode != HImode
+	      && nonimmediate_operand (dest, mode)
+	      && general_operand (src, mode));
+
+  /* This case is not supported below, and shouldn't be generated.  */
+  gcc_assert (! MEM_P (dest) || ! MEM_P (src));
+
+  /* This case is very very bad after reload, so trap it now.  */
+  gcc_assert (GET_CODE (dest) != SUBREG && GET_CODE (src) != SUBREG);
+
+  /* The general idea is to copy by words, offsetting the source and
+     destination.  Normally the least-significant word will be copied
+     first, but for pre-dec operations it's better to copy the
+     most-significant word first.  Only one operand can be a pre-dec
+     or post-inc operand.
+
+     It's also possible that the copy overlaps so that the direction
+     must be reversed.  */
+  direction = 1;
+
+  if (MEM_P (dest))
+    {
+      mem_operand = XEXP (dest, 0);
+      dest_modifies = side_effects_p (mem_operand);
+      if (auto_inc_p (mem_operand))
+        auto_inc_reg_rtx = XEXP (mem_operand, 0);
+      dest_volatile = MEM_VOLATILE_P (dest);
+      if (dest_volatile)
+	{
+	  dest = copy_rtx (dest);
+	  MEM_VOLATILE_P (dest) = 0;
+	}
+    }
+  else if (MEM_P (src))
+    {
+      mem_operand = XEXP (src, 0);
+      src_modifies = side_effects_p (mem_operand);
+      if (auto_inc_p (mem_operand))
+        auto_inc_reg_rtx = XEXP (mem_operand, 0);
+      src_volatile = MEM_VOLATILE_P (src);
+      if (src_volatile)
+	{
+	  src = copy_rtx (src);
+	  MEM_VOLATILE_P (src) = 0;
+	}
+    }
+  else
+    mem_operand = NULL_RTX;
+
+  if (mem_operand == NULL_RTX)
+    {
+      if (REG_P (src)
+	  && REG_P (dest)
+	  && reg_overlap_mentioned_p (dest, src)
+	  && REGNO (dest) > REGNO (src))
+	direction = -1;
+    }
+  else if (GET_CODE (mem_operand) == PRE_DEC
+      || (GET_CODE (mem_operand) == PLUS
+	  && GET_CODE (XEXP (mem_operand, 0)) == PRE_DEC))
+    direction = -1;
+  else if (MEM_P (src) && reg_overlap_mentioned_p (dest, src))
+    {
+      int regno;
+
+      gcc_assert (REG_P (dest));
+      regno = REGNO (dest);
+
+      gcc_assert (refers_to_regno_p (regno, regno + num_words,
+				     mem_operand, 0));
+
+      if (refers_to_regno_p (regno, regno + 1, mem_operand, 0))
+	direction = -1;
+      else if (refers_to_regno_p (regno + num_words - 1, regno + num_words,
+				  mem_operand, 0))
+	direction = 1;
+      else
+	/* This means something like
+	   (set (reg:DI r0) (mem:DI (reg:HI r1)))
+	   which we'd need to support by doing the set of the second word
+	   last.  */
+	gcc_unreachable ();
+    }
+
+  end = direction < 0 ? -1 : num_words;
+  for (i = direction < 0 ? num_words - 1 : 0; i != end; i += direction)
+    {
+      rtx w_src, w_dest, insn;
+
+      if (src_modifies)
+	w_src = gen_rtx_MEM (word_mode, mem_operand);
+      else
+	w_src = simplify_gen_subreg (word_mode, src, mode, i * UNITS_PER_WORD);
+      if (src_volatile)
+	MEM_VOLATILE_P (w_src) = 1;
+      if (dest_modifies)
+	w_dest = gen_rtx_MEM (word_mode, mem_operand);
+      else
+	w_dest = simplify_gen_subreg (word_mode, dest, mode,
+				      i * UNITS_PER_WORD);
+      if (dest_volatile)
+	MEM_VOLATILE_P (w_dest) = 1;
+
+      /* The simplify_subreg calls must always be able to simplify.  */
+      gcc_assert (GET_CODE (w_src) != SUBREG
+		  && GET_CODE (w_dest) != SUBREG);
+
+      insn = emit_insn (gen_rtx_SET (VOIDmode, w_dest, w_src));
+      if (auto_inc_reg_rtx)
+        REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+                                            auto_inc_reg_rtx,
+					    REG_NOTES (insn));
+    }
+}
+
+/* Expander for the 'move' patterns.  Emit insns to copy a value of
+   mode MODE from SRC to DEST.  */
+
+void
+xstormy16_expand_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  if (MEM_P (dest) && (GET_CODE (XEXP (dest, 0)) == PRE_MODIFY))
+    {
+      rtx pmv      = XEXP (dest, 0);
+      rtx dest_reg = XEXP (pmv, 0);
+      rtx dest_mod = XEXP (pmv, 1);
+      rtx set      = gen_rtx_SET (Pmode, dest_reg, dest_mod);
+      rtx clobber  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+
+      dest = gen_rtx_MEM (mode, dest_reg);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+    }
+  else if (MEM_P (src) && (GET_CODE (XEXP (src, 0)) == PRE_MODIFY))
+    {
+      rtx pmv     = XEXP (src, 0);
+      rtx src_reg = XEXP (pmv, 0);
+      rtx src_mod = XEXP (pmv, 1);
+      rtx set     = gen_rtx_SET (Pmode, src_reg, src_mod);
+      rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+
+      src = gen_rtx_MEM (mode, src_reg);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+    }
+
+  /* There are only limited immediate-to-memory move instructions.  */
+  if (! reload_in_progress
+      && ! reload_completed
+      && MEM_P (dest)
+      && (! CONST_INT_P (XEXP (dest, 0))
+	  || ! xstormy16_legitimate_address_p (mode, XEXP (dest, 0), 0))
+      && ! xstormy16_below100_operand (dest, mode)
+      && ! REG_P (src)
+      && GET_CODE (src) != SUBREG)
+    src = copy_to_mode_reg (mode, src);
+
+  /* Don't emit something we would immediately split.  */
+  if (reload_completed
+      && mode != HImode && mode != QImode)
+    {
+      xstormy16_split_move (mode, dest, src);
+      return;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, src));
+}
+
+/* Stack Layout:
+
+   The stack is laid out as follows:
+
+SP->
+FP->	Local variables
+	Register save area (up to 4 words)
+	Argument register save area for stdarg (NUM_ARGUMENT_REGISTERS words)
+
+AP->	Return address (two words)
+	9th procedure parameter word
+	10th procedure parameter word
+	...
+	last procedure parameter word
+
+  The frame pointer location is tuned to make it most likely that all
+  parameters and local variables can be accessed using a load-indexed
+  instruction.  */
+
+/* A structure to describe the layout.  */
+struct xstormy16_stack_layout
+{
+  /* Size of the topmost three items on the stack.  */
+  int locals_size;
+  int register_save_size;
+  int stdarg_save_size;
+  /* Sum of the above items.  */
+  int frame_size;
+  /* Various offsets.  */
+  int first_local_minus_ap;
+  int sp_minus_fp;
+  int fp_minus_ap;
+};
+
+/* Does REGNO need to be saved?  */
+#define REG_NEEDS_SAVE(REGNUM, IFUN)					\
+  ((df_regs_ever_live_p (REGNUM) && ! call_used_regs[REGNUM])		\
+   || (IFUN && ! fixed_regs[REGNUM] && call_used_regs[REGNUM]		\
+       && (REGNUM != CARRY_REGNUM)					\
+       && (df_regs_ever_live_p (REGNUM) || ! crtl->is_leaf)))
+
+/* Compute the stack layout.  */
+
+struct xstormy16_stack_layout
+xstormy16_compute_stack_layout (void)
+{
+  struct xstormy16_stack_layout layout;
+  int regno;
+  const int ifun = xstormy16_interrupt_function_p ();
+
+  layout.locals_size = get_frame_size ();
+
+  layout.register_save_size = 0;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (REG_NEEDS_SAVE (regno, ifun))
+      layout.register_save_size += UNITS_PER_WORD;
+
+  if (cfun->stdarg)
+    layout.stdarg_save_size = NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD;
+  else
+    layout.stdarg_save_size = 0;
+
+  layout.frame_size = (layout.locals_size
+		       + layout.register_save_size
+		       + layout.stdarg_save_size);
+
+  if (crtl->args.size <= 2048 && crtl->args.size != -1)
+    {
+      if (layout.frame_size - INCOMING_FRAME_SP_OFFSET
+	  + crtl->args.size <= 2048)
+	layout.fp_minus_ap = layout.frame_size - INCOMING_FRAME_SP_OFFSET;
+      else
+	layout.fp_minus_ap = 2048 - crtl->args.size;
+    }
+  else
+    layout.fp_minus_ap = (layout.stdarg_save_size
+			  + layout.register_save_size
+			  - INCOMING_FRAME_SP_OFFSET);
+  layout.sp_minus_fp = (layout.frame_size - INCOMING_FRAME_SP_OFFSET
+			- layout.fp_minus_ap);
+  layout.first_local_minus_ap = layout.sp_minus_fp - layout.locals_size;
+  return layout;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+static bool
+xstormy16_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* Determine how all the special registers get eliminated.  */
+
+int
+xstormy16_initial_elimination_offset (int from, int to)
+{
+  struct xstormy16_stack_layout layout;
+  int result;
+
+  layout = xstormy16_compute_stack_layout ();
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    result = layout.sp_minus_fp - layout.locals_size;
+  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    result = - layout.locals_size;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    result = - layout.fp_minus_ap;
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    result = - (layout.sp_minus_fp + layout.fp_minus_ap);
+  else
+    gcc_unreachable ();
+
+  return result;
+}
+
+static rtx
+emit_addhi3_postreload (rtx dest, rtx src0, rtx src1)
+{
+  rtx set, clobber, insn;
+
+  set = gen_rtx_SET (VOIDmode, dest, gen_rtx_PLUS (HImode, src0, src1));
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+  return insn;
+}
+
+/* Called after register allocation to add any instructions needed for
+   the prologue.  Using a prologue insn is favored compared to putting
+   all of the instructions in the TARGET_ASM_FUNCTION_PROLOGUE macro,
+   since it allows the scheduler to intermix instructions with the
+   saves of the caller saved registers.  In some cases, it might be
+   necessary to emit a barrier instruction as the last insn to prevent
+   such scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.  */
+
+void
+xstormy16_expand_prologue (void)
+{
+  struct xstormy16_stack_layout layout;
+  int regno;
+  rtx insn;
+  rtx mem_push_rtx;
+  const int ifun = xstormy16_interrupt_function_p ();
+
+  mem_push_rtx = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+  mem_push_rtx = gen_rtx_MEM (HImode, mem_push_rtx);
+
+  layout = xstormy16_compute_stack_layout ();
+
+  if (layout.locals_size >= 32768)
+    error ("local variable memory requirements exceed capacity");
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = layout.frame_size;
+
+  /* Save the argument registers if necessary.  */
+  if (layout.stdarg_save_size)
+    for (regno = FIRST_ARGUMENT_REGISTER;
+	 regno < FIRST_ARGUMENT_REGISTER + NUM_ARGUMENT_REGISTERS;
+	 regno++)
+      {
+	rtx dwarf;
+	rtx reg = gen_rtx_REG (HImode, regno);
+
+	insn = emit_move_insn (mem_push_rtx, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+	dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
+
+	XVECEXP (dwarf, 0, 0) = gen_rtx_SET (VOIDmode,
+					     gen_rtx_MEM (Pmode, stack_pointer_rtx),
+					     reg);
+	XVECEXP (dwarf, 0, 1) = gen_rtx_SET (Pmode, stack_pointer_rtx,
+					     plus_constant (Pmode,
+							    stack_pointer_rtx,
+							    GET_MODE_SIZE (Pmode)));
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 0)) = 1;
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 1)) = 1;
+      }
+
+  /* Push each of the registers to save.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (REG_NEEDS_SAVE (regno, ifun))
+      {
+	rtx dwarf;
+	rtx reg = gen_rtx_REG (HImode, regno);
+
+	insn = emit_move_insn (mem_push_rtx, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+	dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
+
+	XVECEXP (dwarf, 0, 0) = gen_rtx_SET (VOIDmode,
+					     gen_rtx_MEM (Pmode, stack_pointer_rtx),
+					     reg);
+	XVECEXP (dwarf, 0, 1) = gen_rtx_SET (Pmode, stack_pointer_rtx,
+					     plus_constant (Pmode,
+							    stack_pointer_rtx,
+							    GET_MODE_SIZE (Pmode)));
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 0)) = 1;
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 1)) = 1;
+      }
+
+  /* It's just possible that the SP here might be what we need for
+     the new FP...  */
+  if (frame_pointer_needed && layout.sp_minus_fp == layout.locals_size)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Allocate space for local variables.  */
+  if (layout.locals_size)
+    {
+      insn = emit_addhi3_postreload (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (layout.locals_size));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Set up the frame pointer, if required.  */
+  if (frame_pointer_needed && layout.sp_minus_fp != layout.locals_size)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (layout.sp_minus_fp)
+	{
+	  insn = emit_addhi3_postreload (hard_frame_pointer_rtx,
+					 hard_frame_pointer_rtx,
+					 GEN_INT (- layout.sp_minus_fp));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Do we need an epilogue at all?  */
+
+int
+direct_return (void)
+{
+  return (reload_completed
+	  && xstormy16_compute_stack_layout ().frame_size == 0
+	  && ! xstormy16_interrupt_function_p ());
+}
+
+/* Called after register allocation to add any instructions needed for
+   the epilogue.  Using an epilogue insn is favored compared to putting
+   all of the instructions in the TARGET_ASM_FUNCTION_PROLOGUE macro,
+   since it allows the scheduler to intermix instructions with the
+   saves of the caller saved registers.  In some cases, it might be
+   necessary to emit a barrier instruction as the last insn to prevent
+   such scheduling.  */
+
+void
+xstormy16_expand_epilogue (void)
+{
+  struct xstormy16_stack_layout layout;
+  rtx mem_pop_rtx;
+  int regno;
+  const int ifun = xstormy16_interrupt_function_p ();
+
+  mem_pop_rtx = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+  mem_pop_rtx = gen_rtx_MEM (HImode, mem_pop_rtx);
+
+  layout = xstormy16_compute_stack_layout ();
+
+  /* Pop the stack for the locals.  */
+  if (layout.locals_size)
+    {
+      if (frame_pointer_needed && layout.sp_minus_fp == layout.locals_size)
+	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+      else
+	emit_addhi3_postreload (stack_pointer_rtx, stack_pointer_rtx,
+				GEN_INT (- layout.locals_size));
+    }
+
+  /* Restore any call-saved registers.  */
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+    if (REG_NEEDS_SAVE (regno, ifun))
+      emit_move_insn (gen_rtx_REG (HImode, regno), mem_pop_rtx);
+
+  /* Pop the stack for the stdarg save area.  */
+  if (layout.stdarg_save_size)
+    emit_addhi3_postreload (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (- layout.stdarg_save_size));
+
+  /* Return.  */
+  if (ifun)
+    emit_jump_insn (gen_return_internal_interrupt ());
+  else
+    emit_jump_insn (gen_return_internal ());
+}
+
+int
+xstormy16_epilogue_uses (int regno)
+{
+  if (reload_completed && call_used_regs[regno])
+    {
+      const int ifun = xstormy16_interrupt_function_p ();
+      return REG_NEEDS_SAVE (regno, ifun);
+    }
+  return 0;
+}
+
+void
+xstormy16_function_profiler (void)
+{
+  sorry ("function_profiler support");
+}
+
+/* Update CUM to advance past an argument in the argument list.  The
+   values MODE, TYPE and NAMED describe that argument.  Once this is
+   done, the variable CUM is suitable for analyzing the *following*
+   argument with `TARGET_FUNCTION_ARG', etc.
+
+   This function need not do anything if the argument in question was
+   passed on the stack.  The compiler knows how to track the amount of
+   stack space used for arguments without any special help.  However,
+   it makes life easier for xstormy16_build_va_list if it does update
+   the word count.  */
+
+static void
+xstormy16_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+				const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  /* If an argument would otherwise be passed partially in registers,
+     and partially on the stack, the whole of it is passed on the
+     stack.  */
+  if (*cum < NUM_ARGUMENT_REGISTERS
+      && *cum + XSTORMY16_WORD_SIZE (type, mode) > NUM_ARGUMENT_REGISTERS)
+    *cum = NUM_ARGUMENT_REGISTERS;
+
+  *cum += XSTORMY16_WORD_SIZE (type, mode);
+}
+
+static rtx
+xstormy16_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (mode == VOIDmode)
+    return const0_rtx;
+  if (targetm.calls.must_pass_in_stack (mode, type)
+      || *cum + XSTORMY16_WORD_SIZE (type, mode) > NUM_ARGUMENT_REGISTERS)
+    return NULL_RTX;
+  return gen_rtx_REG (mode, *cum + FIRST_ARGUMENT_REGISTER);
+}
+
+/* Build the va_list type.
+
+   For this chip, va_list is a record containing a counter and a pointer.
+   The counter is of type 'int' and indicates how many bytes
+   have been used to date.  The pointer indicates the stack position
+   for arguments that have not been passed in registers.
+   To keep the layout nice, the pointer is first in the structure.  */
+
+static tree
+xstormy16_build_builtin_va_list (void)
+{
+  tree f_1, f_2, record, type_decl;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_1 = build_decl (BUILTINS_LOCATION,
+		    FIELD_DECL, get_identifier ("base"),
+		      ptr_type_node);
+  f_2 = build_decl (BUILTINS_LOCATION,
+		    FIELD_DECL, get_identifier ("count"),
+		      unsigned_type_node);
+
+  DECL_FIELD_CONTEXT (f_1) = record;
+  DECL_FIELD_CONTEXT (f_2) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_1;
+  DECL_CHAIN (f_1) = f_2;
+
+  layout_type (record);
+
+  return record;
+}
+
+/* Implement the stdarg/varargs va_start macro.  STDARG_P is nonzero if this
+   is stdarg.h instead of varargs.h.  VALIST is the tree of the va_list
+   variable to initialize.  NEXTARG is the machine independent notion of the
+   'next' argument after the variable arguments.  */
+
+static void
+xstormy16_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_base, f_count;
+  tree base, count;
+  tree t,u;
+
+  if (xstormy16_interrupt_function_p ())
+    error ("cannot use va_start in interrupt function");
+
+  f_base = TYPE_FIELDS (va_list_type_node);
+  f_count = DECL_CHAIN (f_base);
+
+  base = build3 (COMPONENT_REF, TREE_TYPE (f_base), valist, f_base, NULL_TREE);
+  count = build3 (COMPONENT_REF, TREE_TYPE (f_count), valist, f_count,
+		  NULL_TREE);
+
+  t = make_tree (TREE_TYPE (base), virtual_incoming_args_rtx);
+  u = build_int_cst (NULL_TREE, - INCOMING_FRAME_SP_OFFSET);
+  u = fold_convert (TREE_TYPE (count), u);
+  t = fold_build_pointer_plus (t, u);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (base), base, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  t = build2 (MODIFY_EXPR, TREE_TYPE (count), count,
+	      build_int_cst (NULL_TREE,
+			     crtl->args.info * UNITS_PER_WORD));
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Implement the stdarg/varargs va_arg macro.  VALIST is the variable
+   of type va_list as a tree, TYPE is the type passed to va_arg.
+   Note:  This algorithm is documented in stormy-abi.  */
+
+static tree
+xstormy16_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+				gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_base, f_count;
+  tree base, count;
+  tree count_tmp, addr, t;
+  tree lab_gotaddr, lab_fromstack;
+  int size, size_of_reg_args, must_stack;
+  tree size_tree;
+
+  f_base = TYPE_FIELDS (va_list_type_node);
+  f_count = DECL_CHAIN (f_base);
+
+  base = build3 (COMPONENT_REF, TREE_TYPE (f_base), valist, f_base, NULL_TREE);
+  count = build3 (COMPONENT_REF, TREE_TYPE (f_count), valist, f_count,
+		  NULL_TREE);
+
+  must_stack = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
+  size_tree = round_up (size_in_bytes (type), UNITS_PER_WORD);
+  gimplify_expr (&size_tree, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+  size_of_reg_args = NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD;
+
+  count_tmp = get_initialized_tmp_var (count, pre_p, NULL);
+  lab_gotaddr = create_artificial_label (UNKNOWN_LOCATION);
+  lab_fromstack = create_artificial_label (UNKNOWN_LOCATION);
+  addr = create_tmp_var (ptr_type_node, NULL);
+
+  if (!must_stack)
+    {
+      tree r;
+
+      t = fold_convert (TREE_TYPE (count), size_tree);
+      t = build2 (PLUS_EXPR, TREE_TYPE (count), count_tmp, t);
+      r = fold_convert (TREE_TYPE (count), size_int (size_of_reg_args));
+      t = build2 (GT_EXPR, boolean_type_node, t, r);
+      t = build3 (COND_EXPR, void_type_node, t,
+		  build1 (GOTO_EXPR, void_type_node, lab_fromstack),
+		  NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      t = fold_build_pointer_plus (base, count_tmp);
+      gimplify_assign (addr, t, pre_p);
+
+      t = build1 (GOTO_EXPR, void_type_node, lab_gotaddr);
+      gimplify_and_add (t, pre_p);
+
+      t = build1 (LABEL_EXPR, void_type_node, lab_fromstack);
+      gimplify_and_add (t, pre_p);
+    }
+
+  /* Arguments larger than a word might need to skip over some
+     registers, since arguments are either passed entirely in
+     registers or entirely on the stack.  */
+  size = PUSH_ROUNDING (int_size_in_bytes (type));
+  if (size > 2 || size < 0 || must_stack)
+    {
+      tree r, u;
+
+      r = size_int (NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD);
+      u = build2 (MODIFY_EXPR, TREE_TYPE (count_tmp), count_tmp, r);
+
+      t = fold_convert (TREE_TYPE (count), r);
+      t = build2 (GE_EXPR, boolean_type_node, count_tmp, t);
+      t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, u);
+      gimplify_and_add (t, pre_p);
+    }
+
+  t = size_int (NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD
+		+ INCOMING_FRAME_SP_OFFSET);
+  t = fold_convert (TREE_TYPE (count), t);
+  t = build2 (MINUS_EXPR, TREE_TYPE (count), count_tmp, t);
+  t = build2 (PLUS_EXPR, TREE_TYPE (count), t,
+	      fold_convert (TREE_TYPE (count), size_tree));
+  t = fold_convert (TREE_TYPE (t), fold (t));
+  t = fold_build1 (NEGATE_EXPR, TREE_TYPE (t), t);
+  t = fold_build_pointer_plus (base, t);
+  gimplify_assign (addr, t, pre_p);
+
+  t = build1 (LABEL_EXPR, void_type_node, lab_gotaddr);
+  gimplify_and_add (t, pre_p);
+
+  t = fold_convert (TREE_TYPE (count), size_tree);
+  t = build2 (PLUS_EXPR, TREE_TYPE (count), count_tmp, t);
+  gimplify_assign (count, t, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+xstormy16_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx temp = gen_reg_rtx (HImode);
+  rtx reg_fnaddr = gen_reg_rtx (HImode);
+  rtx reg_addr, reg_addr_mem;
+
+  reg_addr = copy_to_reg (XEXP (m_tramp, 0));
+  reg_addr_mem = adjust_automodify_address (m_tramp, HImode, reg_addr, 0);
+
+  emit_move_insn (temp, GEN_INT (0x3130 | STATIC_CHAIN_REGNUM));
+  emit_move_insn (reg_addr_mem, temp);
+  emit_insn (gen_addhi3 (reg_addr, reg_addr, const2_rtx));
+  reg_addr_mem = adjust_automodify_address (reg_addr_mem, VOIDmode, NULL, 2);
+
+  emit_move_insn (temp, static_chain);
+  emit_move_insn (reg_addr_mem, temp);
+  emit_insn (gen_addhi3 (reg_addr, reg_addr, const2_rtx));
+  reg_addr_mem = adjust_automodify_address (reg_addr_mem, VOIDmode, NULL, 2);
+
+  emit_move_insn (reg_fnaddr, XEXP (DECL_RTL (fndecl), 0));
+  emit_move_insn (temp, reg_fnaddr);
+  emit_insn (gen_andhi3 (temp, temp, GEN_INT (0xFF)));
+  emit_insn (gen_iorhi3 (temp, temp, GEN_INT (0x0200)));
+  emit_move_insn (reg_addr_mem, temp);
+  emit_insn (gen_addhi3 (reg_addr, reg_addr, const2_rtx));
+  reg_addr_mem = adjust_automodify_address (reg_addr_mem, VOIDmode, NULL, 2);
+
+  emit_insn (gen_lshrhi3 (reg_fnaddr, reg_fnaddr, GEN_INT (8)));
+  emit_move_insn (reg_addr_mem, reg_fnaddr);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+static rtx
+xstormy16_function_value (const_tree valtype,
+			  const_tree func ATTRIBUTE_UNUSED,
+			  bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  mode = TYPE_MODE (valtype);
+  PROMOTE_MODE (mode, 0, valtype);
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+xstormy16_libcall_value (enum machine_mode mode,
+			 const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+xstormy16_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
+/* A C compound statement that outputs the assembler code for a thunk function,
+   used to implement C++ virtual function calls with multiple inheritance.  The
+   thunk acts as a wrapper around a virtual function, adjusting the implicit
+   object parameter before handing control off to the real function.
+
+   First, emit code to add the integer DELTA to the location that contains the
+   incoming first argument.  Assume that this argument contains a pointer, and
+   is the one used to pass the `this' pointer in C++.  This is the incoming
+   argument *before* the function prologue, e.g. `%o0' on a sparc.  The
+   addition must preserve the values of all other incoming arguments.
+
+   After the addition, emit code to jump to FUNCTION, which is a
+   `FUNCTION_DECL'.  This is a direct pure jump, not a call, and does not touch
+   the return address.  Hence returning from FUNCTION will return to whoever
+   called the current `thunk'.
+
+   The effect must be as if @var{function} had been called directly
+   with the adjusted first argument.  This macro is responsible for
+   emitting all of the code for a thunk function;
+   TARGET_ASM_FUNCTION_PROLOGUE and TARGET_ASM_FUNCTION_EPILOGUE are
+   not invoked.
+
+   The THUNK_FNDECL is redundant.  (DELTA and FUNCTION have already been
+   extracted from it.)  It might possibly be useful on some targets, but
+   probably not.  */
+
+static void
+xstormy16_asm_output_mi_thunk (FILE *file,
+			       tree thunk_fndecl ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT delta,
+			       HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			       tree function)
+{
+  int regnum = FIRST_ARGUMENT_REGISTER;
+
+  /* There might be a hidden first argument for a returned structure.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    regnum += 1;
+
+  fprintf (file, "\tadd %s,#0x%x\n", reg_names[regnum], (int) delta & 0xFFFF);
+  fputs ("\tjmpf ", file);
+  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+  putc ('\n', file);
+}
+
+/* The purpose of this function is to override the default behavior of
+   BSS objects.  Normally, they go into .bss or .sbss via ".common"
+   directives, but we need to override that and put them in
+   .bss_below100.  We can't just use a section override (like we do
+   for .data_below100), because that makes them initialized rather
+   than uninitialized.  */
+
+void
+xstormy16_asm_output_aligned_common (FILE *stream,
+				     tree decl,
+				     const char *name,
+				     int size,
+				     int align,
+				     int global)
+{
+  rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl);
+  rtx symbol;
+
+  if (mem != NULL_RTX
+      && MEM_P (mem)
+      && GET_CODE (symbol = XEXP (mem, 0)) == SYMBOL_REF
+      && SYMBOL_REF_FLAGS (symbol) & SYMBOL_FLAG_XSTORMY16_BELOW100)
+    {
+      const char *name2;
+      int p2align = 0;
+
+      switch_to_section (bss100_section);
+
+      while (align > 8)
+	{
+	  align /= 2;
+	  p2align ++;
+	}
+
+      name2 = default_strip_name_encoding (name);
+      if (global)
+	fprintf (stream, "\t.globl\t%s\n", name2);
+      if (p2align)
+	fprintf (stream, "\t.p2align %d\n", p2align);
+      fprintf (stream, "\t.type\t%s, @object\n", name2);
+      fprintf (stream, "\t.size\t%s, %d\n", name2, size);
+      fprintf (stream, "%s:\n\t.space\t%d\n", name2, size);
+      return;
+    }
+
+  if (!global)
+    {
+      fprintf (stream, "\t.local\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+xstormy16_asm_init_sections (void)
+{
+  bss100_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS,
+			   output_section_asm_op,
+			   "\t.section \".bss_below100\",\"aw\",@nobits");
+}
+
+/* Mark symbols with the "below100" attribute so that we can use the
+   special addressing modes for them.  */
+
+static void
+xstormy16_encode_section_info (tree decl, rtx r, int first)
+{
+  default_encode_section_info (decl, r, first);
+
+   if (TREE_CODE (decl) == VAR_DECL
+      && (lookup_attribute ("below100", DECL_ATTRIBUTES (decl))
+	  || lookup_attribute ("BELOW100", DECL_ATTRIBUTES (decl))))
+    {
+      rtx symbol = XEXP (r, 0);
+
+      gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+      SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_XSTORMY16_BELOW100;
+    }
+}
+
+#undef  TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR  xstormy16_asm_out_constructor
+#undef  TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR   xstormy16_asm_out_destructor
+
+/* Output constructors and destructors.  Just like
+   default_named_section_asm_out_* but don't set the sections writable.  */
+
+static void
+xstormy16_asm_out_destructor (rtx symbol, int priority)
+{
+  const char *section = ".dtors";
+  char buf[16];
+
+  /* ??? This only works reliably with the GNU linker.  */
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".dtors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, 0, NULL));
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void
+xstormy16_asm_out_constructor (rtx symbol, int priority)
+{
+  const char *section = ".ctors";
+  char buf[16];
+
+  /* ??? This only works reliably with the GNU linker.  */
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".ctors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, 0, NULL));
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+/* Worker function for TARGET_PRINT_OPERAND_ADDRESS.
+
+   Print a memory address as an operand to reference that memory location.  */
+
+static void
+xstormy16_print_operand_address (FILE *file, rtx address)
+{
+  HOST_WIDE_INT offset;
+  int pre_dec, post_inc;
+
+  /* There are a few easy cases.  */
+  if (CONST_INT_P (address))
+    {
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (address) & 0xFFFF);
+      return;
+    }
+
+  if (CONSTANT_P (address) || LABEL_P (address))
+    {
+      output_addr_const (file, address);
+      return;
+    }
+
+  /* Otherwise, it's hopefully something of the form
+     (plus:HI (pre_dec:HI (reg:HI ...)) (const_int ...)).  */
+  if (GET_CODE (address) == PLUS)
+    {
+      gcc_assert (CONST_INT_P (XEXP (address, 1)));
+      offset = INTVAL (XEXP (address, 1));
+      address = XEXP (address, 0);
+    }
+  else
+    offset = 0;
+
+  pre_dec = (GET_CODE (address) == PRE_DEC);
+  post_inc = (GET_CODE (address) == POST_INC);
+  if (pre_dec || post_inc)
+    address = XEXP (address, 0);
+
+  gcc_assert (REG_P (address));
+
+  fputc ('(', file);
+  if (pre_dec)
+    fputs ("--", file);
+  fputs (reg_names [REGNO (address)], file);
+  if (post_inc)
+    fputs ("++", file);
+  if (offset != 0)
+    fprintf (file, "," HOST_WIDE_INT_PRINT_DEC, offset);
+  fputc (')', file);
+}
+
+/* Worker function for TARGET_PRINT_OPERAND.
+
+   Print an operand to an assembler instruction.  */
+
+static void
+xstormy16_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'B':
+	/* There is either one bit set, or one bit clear, in X.
+	   Print it preceded by '#'.  */
+      {
+	static int bits_set[8] = { 0, 1, 1, 2, 1, 2, 2, 3 };
+	HOST_WIDE_INT xx = 1;
+	HOST_WIDE_INT l;
+
+	if (CONST_INT_P (x))
+	  xx = INTVAL (x);
+	else
+	  output_operand_lossage ("'B' operand is not constant");
+
+	/* GCC sign-extends masks with the MSB set, so we have to
+	   detect all the cases that differ only in sign extension
+	   beyond the bits we care about.  Normally, the predicates
+	   and constraints ensure that we have the right values.  This
+	   works correctly for valid masks.  */
+	if (bits_set[xx & 7] <= 1)
+	  {
+	    /* Remove sign extension bits.  */
+	    if ((~xx & ~(HOST_WIDE_INT)0xff) == 0)
+	      xx &= 0xff;
+	    else if ((~xx & ~(HOST_WIDE_INT)0xffff) == 0)
+	      xx &= 0xffff;
+	    l = exact_log2 (xx);
+	  }
+	else
+	  {
+	    /* Add sign extension bits.  */
+	    if ((xx & ~(HOST_WIDE_INT)0xff) == 0)
+	      xx |= ~(HOST_WIDE_INT)0xff;
+	    else if ((xx & ~(HOST_WIDE_INT)0xffff) == 0)
+	      xx |= ~(HOST_WIDE_INT)0xffff;
+	    l = exact_log2 (~xx);
+	  }
+
+	if (l == -1)
+	  output_operand_lossage ("'B' operand has multiple bits set");
+
+	fprintf (file, IMMEDIATE_PREFIX HOST_WIDE_INT_PRINT_DEC, l);
+	return;
+      }
+
+    case 'C':
+      /* Print the symbol without a surrounding @fptr().  */
+      if (GET_CODE (x) == SYMBOL_REF)
+	assemble_name (file, XSTR (x, 0));
+      else if (LABEL_P (x))
+	output_asm_label (x);
+      else
+	xstormy16_print_operand_address (file, x);
+      return;
+
+    case 'o':
+    case 'O':
+      /* Print the immediate operand less one, preceded by '#'.
+         For 'O', negate it first.  */
+      {
+	HOST_WIDE_INT xx = 0;
+
+	if (CONST_INT_P (x))
+	  xx = INTVAL (x);
+	else
+	  output_operand_lossage ("'o' operand is not constant");
+
+	if (code == 'O')
+	  xx = -xx;
+
+	fprintf (file, IMMEDIATE_PREFIX HOST_WIDE_INT_PRINT_DEC, xx - 1);
+	return;
+      }
+
+    case 'b':
+      /* Print the shift mask for bp/bn.  */
+      {
+	HOST_WIDE_INT xx = 1;
+	HOST_WIDE_INT l;
+
+	if (CONST_INT_P (x))
+	  xx = INTVAL (x);
+	else
+	  output_operand_lossage ("'B' operand is not constant");
+
+	l = 7 - xx;
+
+	fputs (IMMEDIATE_PREFIX, file);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, l);
+	return;
+      }
+
+    case 0:
+      /* Handled below.  */
+      break;
+
+    default:
+      output_operand_lossage ("xstormy16_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      xstormy16_print_operand_address (file, XEXP (x, 0));
+      break;
+
+    default:
+      /* Some kind of constant or label; an immediate operand,
+         so prefix it with '#' for the assembler.  */
+      fputs (IMMEDIATE_PREFIX, file);
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+/* Expander for the `casesi' pattern.
+   INDEX is the index of the switch statement.
+   LOWER_BOUND is a CONST_INT that is the value of INDEX corresponding
+     to the first table entry.
+   RANGE is the number of table entries.
+   TABLE is an ADDR_VEC that is the jump table.
+   DEFAULT_LABEL is the address to branch to if INDEX is outside the
+     range LOWER_BOUND to LOWER_BOUND + RANGE - 1.  */
+
+void
+xstormy16_expand_casesi (rtx index, rtx lower_bound, rtx range,
+			 rtx table, rtx default_label)
+{
+  HOST_WIDE_INT range_i = INTVAL (range);
+  rtx int_index;
+
+  /* This code uses 'br', so it can deal only with tables of size up to
+     8192 entries.  */
+  if (range_i >= 8192)
+    sorry ("switch statement of size %lu entries too large",
+	   (unsigned long) range_i);
+
+  index = expand_binop (SImode, sub_optab, index, lower_bound, NULL_RTX, 0,
+			OPTAB_LIB_WIDEN);
+  emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, SImode, 1,
+			   default_label);
+  int_index = gen_lowpart_common (HImode, index);
+  emit_insn (gen_ashlhi3 (int_index, int_index, const2_rtx));
+  emit_jump_insn (gen_tablejump_pcrel (int_index, table));
+}
+
+/* Output an ADDR_VEC.  It is output as a sequence of 'jmpf'
+   instructions, without label or alignment or any other special
+   constructs.  We know that the previous instruction will be the
+   `tablejump_pcrel' output above.
+
+   TODO: it might be nice to output 'br' instructions if they could
+   all reach.  */
+
+void
+xstormy16_output_addr_vec (FILE *file, rtx label ATTRIBUTE_UNUSED, rtx table)
+{
+  int vlen, idx;
+
+  switch_to_section (current_function_section ());
+
+  vlen = XVECLEN (table, 0);
+  for (idx = 0; idx < vlen; idx++)
+    {
+      fputs ("\tjmpf ", file);
+      output_asm_label (XEXP (XVECEXP (table, 0, idx), 0));
+      fputc ('\n', file);
+    }
+}
+
+/* Expander for the `call' patterns.
+   RETVAL is the RTL for the return register or NULL for void functions.
+   DEST is the function to call, expressed as a MEM.
+   COUNTER is ignored.  */
+
+void
+xstormy16_expand_call (rtx retval, rtx dest, rtx counter)
+{
+  rtx call, temp;
+  enum machine_mode mode;
+
+  gcc_assert (MEM_P (dest));
+  dest = XEXP (dest, 0);
+
+  if (! CONSTANT_P (dest) && ! REG_P (dest))
+    dest = force_reg (Pmode, dest);
+
+  if (retval == NULL)
+    mode = VOIDmode;
+  else
+    mode = GET_MODE (retval);
+
+  call = gen_rtx_CALL (mode, gen_rtx_MEM (FUNCTION_MODE, dest),
+		       counter);
+  if (retval)
+    call = gen_rtx_SET (VOIDmode, retval, call);
+
+  if (! CONSTANT_P (dest))
+    {
+      temp = gen_reg_rtx (HImode);
+      emit_move_insn (temp, const0_rtx);
+    }
+  else
+    temp = const0_rtx;
+
+  call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call,
+						gen_rtx_USE (VOIDmode, temp)));
+  emit_call_insn (call);
+}
+
+/* Expanders for multiword computational operations.  */
+
+/* Expander for arithmetic operations; emit insns to compute
+
+   (set DEST (CODE:MODE SRC0 SRC1))
+
+   When CODE is COMPARE, a branch template is generated
+   (this saves duplicating code in xstormy16_split_cbranch).  */
+
+void
+xstormy16_expand_arith (enum machine_mode mode, enum rtx_code code,
+			rtx dest, rtx src0, rtx src1)
+{
+  int num_words = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+  int i;
+  int firstloop = 1;
+
+  if (code == NEG)
+    emit_move_insn (src0, const0_rtx);
+
+  for (i = 0; i < num_words; i++)
+    {
+      rtx w_src0, w_src1, w_dest;
+      rtx insn;
+
+      w_src0 = simplify_gen_subreg (word_mode, src0, mode,
+				    i * UNITS_PER_WORD);
+      w_src1 = simplify_gen_subreg (word_mode, src1, mode, i * UNITS_PER_WORD);
+      w_dest = simplify_gen_subreg (word_mode, dest, mode, i * UNITS_PER_WORD);
+
+      switch (code)
+	{
+	case PLUS:
+	  if (firstloop
+	      && CONST_INT_P (w_src1)
+	      && INTVAL (w_src1) == 0)
+	    continue;
+
+	  if (firstloop)
+	    insn = gen_addchi4 (w_dest, w_src0, w_src1);
+	  else
+	    insn = gen_addchi5 (w_dest, w_src0, w_src1);
+	  break;
+
+	case NEG:
+	case MINUS:
+	case COMPARE:
+	  if (code == COMPARE && i == num_words - 1)
+	    {
+	      rtx branch, sub, clobber, sub_1;
+
+	      sub_1 = gen_rtx_MINUS (HImode, w_src0,
+				     gen_rtx_ZERO_EXTEND (HImode, gen_rtx_REG (BImode, CARRY_REGNUM)));
+	      sub = gen_rtx_SET (VOIDmode, w_dest,
+				 gen_rtx_MINUS (HImode, sub_1, w_src1));
+	      clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+	      branch = gen_rtx_SET (VOIDmode, pc_rtx,
+				    gen_rtx_IF_THEN_ELSE (VOIDmode,
+							  gen_rtx_EQ (HImode,
+								      sub_1,
+								      w_src1),
+							  pc_rtx,
+							  pc_rtx));
+	      insn = gen_rtx_PARALLEL (VOIDmode,
+				       gen_rtvec (3, branch, sub, clobber));
+	    }
+	  else if (firstloop
+		   && code != COMPARE
+		   && CONST_INT_P (w_src1)
+		   && INTVAL (w_src1) == 0)
+	    continue;
+	  else if (firstloop)
+	    insn = gen_subchi4 (w_dest, w_src0, w_src1);
+	  else
+	    insn = gen_subchi5 (w_dest, w_src0, w_src1);
+	  break;
+
+	case IOR:
+	case XOR:
+	case AND:
+	  if (CONST_INT_P (w_src1)
+	      && INTVAL (w_src1) == -(code == AND))
+	    continue;
+
+	  insn = gen_rtx_SET (VOIDmode, w_dest, gen_rtx_fmt_ee (code, mode,
+								w_src0, w_src1));
+	  break;
+
+	case NOT:
+	  insn = gen_rtx_SET (VOIDmode, w_dest, gen_rtx_NOT (mode, w_src0));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      firstloop = 0;
+      emit (insn);
+    }
+
+  /* If we emit nothing, try_split() will think we failed.  So emit
+     something that does nothing and can be optimized away.  */
+  if (firstloop)
+    emit (gen_nop ());
+}
+
+/* The shift operations are split at output time for constant values;
+   variable-width shifts get handed off to a library routine.
+
+   Generate an output string to do (set X (CODE:MODE X SIZE_R))
+   SIZE_R will be a CONST_INT, X will be a hard register.  */
+
+const char *
+xstormy16_output_shift (enum machine_mode mode, enum rtx_code code,
+			rtx x, rtx size_r, rtx temp)
+{
+  HOST_WIDE_INT size;
+  const char *r0, *r1, *rt;
+  static char r[64];
+
+  gcc_assert (CONST_INT_P (size_r)
+	      && REG_P (x)
+	      && mode == SImode);
+
+  size = INTVAL (size_r) & (GET_MODE_BITSIZE (mode) - 1);
+
+  if (size == 0)
+    return "";
+
+  r0 = reg_names [REGNO (x)];
+  r1 = reg_names [REGNO (x) + 1];
+
+  /* For shifts of size 1, we can use the rotate instructions.  */
+  if (size == 1)
+    {
+      switch (code)
+	{
+	case ASHIFT:
+	  sprintf (r, "shl %s,#1 | rlc %s,#1", r0, r1);
+	  break;
+	case ASHIFTRT:
+	  sprintf (r, "asr %s,#1 | rrc %s,#1", r1, r0);
+	  break;
+	case LSHIFTRT:
+	  sprintf (r, "shr %s,#1 | rrc %s,#1", r1, r0);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return r;
+    }
+
+  /* For large shifts, there are easy special cases.  */
+  if (size == 16)
+    {
+      switch (code)
+	{
+	case ASHIFT:
+	  sprintf (r, "mov %s,%s | mov %s,#0", r1, r0, r0);
+	  break;
+	case ASHIFTRT:
+	  sprintf (r, "mov %s,%s | asr %s,#15", r0, r1, r1);
+	  break;
+	case LSHIFTRT:
+	  sprintf (r, "mov %s,%s | mov %s,#0", r0, r1, r1);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return r;
+    }
+  if (size > 16)
+    {
+      switch (code)
+	{
+	case ASHIFT:
+	  sprintf (r, "mov %s,%s | mov %s,#0 | shl %s,#%d",
+		   r1, r0, r0, r1, (int) size - 16);
+	  break;
+	case ASHIFTRT:
+	  sprintf (r, "mov %s,%s | asr %s,#15 | asr %s,#%d",
+		   r0, r1, r1, r0, (int) size - 16);
+	  break;
+	case LSHIFTRT:
+	  sprintf (r, "mov %s,%s | mov %s,#0 | shr %s,#%d",
+		   r0, r1, r1, r0, (int) size - 16);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return r;
+    }
+
+  /* For the rest, we have to do more work.  In particular, we
+     need a temporary.  */
+  rt = reg_names [REGNO (temp)];
+  switch (code)
+    {
+    case ASHIFT:
+      sprintf (r,
+	       "mov %s,%s | shl %s,#%d | shl %s,#%d | shr %s,#%d | or %s,%s",
+	       rt, r0, r0, (int) size, r1, (int) size, rt, (int) (16 - size),
+	       r1, rt);
+      break;
+    case ASHIFTRT:
+      sprintf (r,
+	       "mov %s,%s | asr %s,#%d | shr %s,#%d | shl %s,#%d | or %s,%s",
+	       rt, r1, r1, (int) size, r0, (int) size, rt, (int) (16 - size),
+	       r0, rt);
+      break;
+    case LSHIFTRT:
+      sprintf (r,
+	       "mov %s,%s | shr %s,#%d | shr %s,#%d | shl %s,#%d | or %s,%s",
+	       rt, r1, r1, (int) size, r0, (int) size, rt, (int) (16 - size),
+	       r0, rt);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return r;
+}
+
+/* Attribute handling.  */
+
+/* Return nonzero if the function is an interrupt function.  */
+
+int
+xstormy16_interrupt_function_p (void)
+{
+  tree attributes;
+
+  /* The dwarf2 mechanism asks for INCOMING_FRAME_SP_OFFSET before
+     any functions are declared, which is demonstrably wrong, but
+     it is worked around here.  FIXME.  */
+  if (!cfun)
+    return 0;
+
+  attributes = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  return lookup_attribute ("interrupt", attributes) != NULL_TREE;
+}
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE  xstormy16_attribute_table
+
+static tree xstormy16_handle_interrupt_attribute
+  (tree *, tree, tree, int, bool *);
+static tree xstormy16_handle_below100_attribute
+  (tree *, tree, tree, int, bool *);
+
+static const struct attribute_spec xstormy16_attribute_table[] =
+{
+  /* name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+     affects_type_identity.  */
+  { "interrupt", 0, 0, false, true,  true,
+    xstormy16_handle_interrupt_attribute , false },
+  { "BELOW100",  0, 0, false, false, false,
+    xstormy16_handle_below100_attribute, false },
+  { "below100",  0, 0, false, false, false,
+    xstormy16_handle_below100_attribute, false },
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
+/* Handle an "interrupt" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+xstormy16_handle_interrupt_attribute (tree *node, tree name,
+				      tree args ATTRIBUTE_UNUSED,
+				      int flags ATTRIBUTE_UNUSED,
+				      bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "below" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+xstormy16_handle_below100_attribute (tree *node,
+				     tree name ATTRIBUTE_UNUSED,
+				     tree args ATTRIBUTE_UNUSED,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != VAR_DECL
+      && TREE_CODE (*node) != POINTER_TYPE
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes,
+	       "%<__BELOW100__%> attribute only applies to variables");
+      *no_add_attrs = true;
+    }
+  else if (args == NULL_TREE && TREE_CODE (*node) == VAR_DECL)
+    {
+      if (! (TREE_PUBLIC (*node) || TREE_STATIC (*node)))
+	{
+	  warning (OPT_Wattributes, "__BELOW100__ attribute not allowed "
+		   "with auto storage class");
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS   xstormy16_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN  xstormy16_expand_builtin
+
+static struct
+{
+  const char * name;
+  int          md_code;
+  const char * arg_ops;   /* 0..9, t for temp register, r for return value.  */
+  const char * arg_types; /* s=short,l=long, upper case for unsigned.  */
+}
+  s16builtins[] =
+{
+  { "__sdivlh", CODE_FOR_sdivlh, "rt01", "sls" },
+  { "__smodlh", CODE_FOR_sdivlh, "tr01", "sls" },
+  { "__udivlh", CODE_FOR_udivlh, "rt01", "SLS" },
+  { "__umodlh", CODE_FOR_udivlh, "tr01", "SLS" },
+  { NULL, 0, NULL, NULL }
+};
+
+static void
+xstormy16_init_builtins (void)
+{
+  tree args[2], ret_type, arg = NULL_TREE, ftype;
+  int i, a, n_args;
+
+  ret_type = void_type_node;
+
+  for (i = 0; s16builtins[i].name; i++)
+    {
+      n_args = strlen (s16builtins[i].arg_types) - 1;
+
+      gcc_assert (n_args <= (int) ARRAY_SIZE (args));
+
+      for (a = n_args - 1; a >= 0; a--)
+	args[a] = NULL_TREE;
+
+      for (a = n_args; a >= 0; a--)
+	{
+	  switch (s16builtins[i].arg_types[a])
+	    {
+	    case 's': arg = short_integer_type_node; break;
+	    case 'S': arg = short_unsigned_type_node; break;
+	    case 'l': arg = long_integer_type_node; break;
+	    case 'L': arg = long_unsigned_type_node; break;
+	    default: gcc_unreachable ();
+	    }
+	  if (a == 0)
+	    ret_type = arg;
+	  else
+	    args[a-1] = arg;
+	}
+      ftype = build_function_type_list (ret_type, args[0], args[1], NULL_TREE);
+      add_builtin_function (s16builtins[i].name, ftype,
+			    i, BUILT_IN_MD, NULL, NULL_TREE);
+    }
+}
+
+static rtx
+xstormy16_expand_builtin (tree exp, rtx target,
+			  rtx subtarget ATTRIBUTE_UNUSED,
+			  enum machine_mode mode ATTRIBUTE_UNUSED,
+			  int ignore ATTRIBUTE_UNUSED)
+{
+  rtx op[10], args[10], pat, copyto[10], retval = 0;
+  tree fndecl, argtree;
+  int i, a, o, code;
+
+  fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  argtree = TREE_OPERAND (exp, 1);
+  i = DECL_FUNCTION_CODE (fndecl);
+  code = s16builtins[i].md_code;
+
+  for (a = 0; a < 10 && argtree; a++)
+    {
+      args[a] = expand_normal (TREE_VALUE (argtree));
+      argtree = TREE_CHAIN (argtree);
+    }
+
+  for (o = 0; s16builtins[i].arg_ops[o]; o++)
+    {
+      char ao = s16builtins[i].arg_ops[o];
+      char c = insn_data[code].operand[o].constraint[0];
+      enum machine_mode omode;
+
+      copyto[o] = 0;
+
+      omode = (enum machine_mode) insn_data[code].operand[o].mode;
+      if (ao == 'r')
+	op[o] = target ? target : gen_reg_rtx (omode);
+      else if (ao == 't')
+	op[o] = gen_reg_rtx (omode);
+      else
+	op[o] = args[(int) hex_value (ao)];
+
+      if (! (*insn_data[code].operand[o].predicate) (op[o], GET_MODE (op[o])))
+	{
+	  if (c == '+' || c == '=')
+	    {
+	      copyto[o] = op[o];
+	      op[o] = gen_reg_rtx (omode);
+	    }
+	  else
+	    op[o] = copy_to_mode_reg (omode, op[o]);
+	}
+
+      if (ao == 'r')
+	retval = op[o];
+    }
+
+  pat = GEN_FCN (code) (op[0], op[1], op[2], op[3], op[4],
+			op[5], op[6], op[7], op[8], op[9]);
+  emit_insn (pat);
+
+  for (o = 0; s16builtins[i].arg_ops[o]; o++)
+    if (copyto[o])
+      {
+	emit_move_insn (copyto[o], op[o]);
+	if (op[o] == retval)
+	  retval = copyto[o];
+      }
+
+  return retval;
+}
+
+/* Look for combinations of insns that can be converted to BN or BP
+   opcodes.  This is, unfortunately, too complex to do with MD
+   patterns.  */
+
+static void
+combine_bnp (rtx insn)
+{
+  int insn_code, regno, need_extend;
+  unsigned int mask;
+  rtx cond, reg, and_insn, load, qireg, mem;
+  enum machine_mode load_mode = QImode;
+  enum machine_mode and_mode = QImode;
+  rtx shift = NULL_RTX;
+
+  insn_code = recog_memoized (insn);
+  if (insn_code != CODE_FOR_cbranchhi
+      && insn_code != CODE_FOR_cbranchhi_neg)
+    return;
+
+  cond = XVECEXP (PATTERN (insn), 0, 0); /* set */
+  cond = XEXP (cond, 1); /* if */
+  cond = XEXP (cond, 0); /* cond */
+  switch (GET_CODE (cond))
+    {
+    case NE:
+    case EQ:
+      need_extend = 0;
+      break;
+    case LT:
+    case GE:
+      need_extend = 1;
+      break;
+    default:
+      return;
+    }
+
+  reg = XEXP (cond, 0);
+  if (! REG_P (reg))
+    return;
+  regno = REGNO (reg);
+  if (XEXP (cond, 1) != const0_rtx)
+    return;
+  if (! find_regno_note (insn, REG_DEAD, regno))
+    return;
+  qireg = gen_rtx_REG (QImode, regno);
+
+  if (need_extend)
+    {
+      /* LT and GE conditionals should have a sign extend before
+	 them.  */
+      for (and_insn = prev_real_insn (insn);
+	   and_insn != NULL_RTX;
+	   and_insn = prev_real_insn (and_insn))
+	{
+	  int and_code = recog_memoized (and_insn);
+
+	  if (and_code == CODE_FOR_extendqihi2
+	      && rtx_equal_p (SET_DEST (PATTERN (and_insn)), reg)
+	      && rtx_equal_p (XEXP (SET_SRC (PATTERN (and_insn)), 0), qireg))
+	    break;
+
+	  if (and_code == CODE_FOR_movhi_internal
+	      && rtx_equal_p (SET_DEST (PATTERN (and_insn)), reg))
+	    {
+	      /* This is for testing bit 15.  */
+	      and_insn = insn;
+	      break;
+	    }
+
+	  if (reg_mentioned_p (reg, and_insn))
+	    return;
+
+	  if (! NOTE_P (and_insn) && ! NONJUMP_INSN_P (and_insn))
+	    return;
+	}
+    }
+  else
+    {
+      /* EQ and NE conditionals have an AND before them.  */
+      for (and_insn = prev_real_insn (insn);
+	   and_insn != NULL_RTX;
+	   and_insn = prev_real_insn (and_insn))
+	{
+	  if (recog_memoized (and_insn) == CODE_FOR_andhi3
+	      && rtx_equal_p (SET_DEST (PATTERN (and_insn)), reg)
+	      && rtx_equal_p (XEXP (SET_SRC (PATTERN (and_insn)), 0), reg))
+	    break;
+
+	  if (reg_mentioned_p (reg, and_insn))
+	    return;
+
+	  if (! NOTE_P (and_insn) && ! NONJUMP_INSN_P (and_insn))
+	    return;
+	}
+
+      if (and_insn)
+	{
+	  /* Some mis-optimizations by GCC can generate a RIGHT-SHIFT
+	     followed by an AND like this:
+
+               (parallel [(set (reg:HI r7) (lshiftrt:HI (reg:HI r7) (const_int 3)))
+                          (clobber (reg:BI carry))]
+
+               (set (reg:HI r7) (and:HI (reg:HI r7) (const_int 1)))
+
+	     Attempt to detect this here.  */
+	  for (shift = prev_real_insn (and_insn); shift;
+	       shift = prev_real_insn (shift))
+	    {
+	      if (recog_memoized (shift) == CODE_FOR_lshrhi3
+		  && rtx_equal_p (SET_DEST (XVECEXP (PATTERN (shift), 0, 0)), reg)
+		  && rtx_equal_p (XEXP (SET_SRC (XVECEXP (PATTERN (shift), 0, 0)), 0), reg))
+		break;
+
+	      if (reg_mentioned_p (reg, shift)
+		  || (! NOTE_P (shift) && ! NONJUMP_INSN_P (shift)))
+		{
+		  shift = NULL_RTX;
+		  break;
+		}
+	    }
+	}
+    }
+
+  if (and_insn == NULL_RTX)
+    return;
+
+  for (load = shift ? prev_real_insn (shift) : prev_real_insn (and_insn);
+       load;
+       load = prev_real_insn (load))
+    {
+      int load_code = recog_memoized (load);
+
+      if (load_code == CODE_FOR_movhi_internal
+	  && rtx_equal_p (SET_DEST (PATTERN (load)), reg)
+	  && xstormy16_below100_operand (SET_SRC (PATTERN (load)), HImode)
+	  && ! MEM_VOLATILE_P (SET_SRC (PATTERN (load))))
+	{
+	  load_mode = HImode;
+	  break;
+	}
+
+      if (load_code == CODE_FOR_movqi_internal
+	  && rtx_equal_p (SET_DEST (PATTERN (load)), qireg)
+	  && xstormy16_below100_operand (SET_SRC (PATTERN (load)), QImode))
+	{
+	  load_mode = QImode;
+	  break;
+	}
+
+      if (load_code == CODE_FOR_zero_extendqihi2
+	  && rtx_equal_p (SET_DEST (PATTERN (load)), reg)
+	  && xstormy16_below100_operand (XEXP (SET_SRC (PATTERN (load)), 0), QImode))
+	{
+	  load_mode = QImode;
+	  and_mode = HImode;
+	  break;
+	}
+
+      if (reg_mentioned_p (reg, load))
+	return;
+
+      if (! NOTE_P (load) && ! NONJUMP_INSN_P (load))
+	return;
+    }
+  if (!load)
+    return;
+
+  mem = SET_SRC (PATTERN (load));
+
+  if (need_extend)
+    {
+      mask = (load_mode == HImode) ? 0x8000 : 0x80;
+
+      /* If the mem includes a zero-extend operation and we are
+	 going to generate a sign-extend operation then move the
+	 mem inside the zero-extend.  */
+      if (GET_CODE (mem) == ZERO_EXTEND)
+	mem = XEXP (mem, 0);
+    }
+  else
+    {
+      if (!xstormy16_onebit_set_operand (XEXP (SET_SRC (PATTERN (and_insn)), 1),
+					 load_mode))
+	return;
+
+      mask = (int) INTVAL (XEXP (SET_SRC (PATTERN (and_insn)), 1));
+
+      if (shift)
+	mask <<= INTVAL (XEXP (SET_SRC (XVECEXP (PATTERN (shift), 0, 0)), 1));
+    }
+
+  if (load_mode == HImode)
+    {
+      rtx addr = XEXP (mem, 0);
+
+      if (! (mask & 0xff))
+	{
+	  addr = plus_constant (Pmode, addr, 1);
+	  mask >>= 8;
+	}
+      mem = gen_rtx_MEM (QImode, addr);
+    }
+
+  if (need_extend)
+    XEXP (cond, 0) = gen_rtx_SIGN_EXTEND (HImode, mem);
+  else
+    XEXP (cond, 0) = gen_rtx_AND (and_mode, mem, GEN_INT (mask));
+
+  INSN_CODE (insn) = -1;
+  delete_insn (load);
+
+  if (and_insn != insn)
+    delete_insn (and_insn);
+
+  if (shift != NULL_RTX)
+    delete_insn (shift);
+}
+
+static void
+xstormy16_reorg (void)
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (! JUMP_P (insn))
+	continue;
+      combine_bnp (insn);
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+xstormy16_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > UNITS_PER_WORD * NUM_ARGUMENT_REGISTERS);
+}
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO xstormy16_encode_section_info
+
+/* Select_section doesn't handle .bss_below100.  */
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS false
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK xstormy16_asm_output_mi_thunk
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND xstormy16_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS xstormy16_print_operand_address
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST xstormy16_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS xstormy16_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST xstormy16_address_cost
+
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST xstormy16_build_builtin_va_list
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START xstormy16_expand_builtin_va_start
+#undef  TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR xstormy16_gimplify_va_arg_expr
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG xstormy16_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE xstormy16_function_arg_advance
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY xstormy16_return_in_memory
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE xstormy16_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE xstormy16_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P xstormy16_function_value_regno_p
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG xstormy16_reorg
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xstormy16_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xstormy16_preferred_reload_class
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	xstormy16_legitimate_address_p
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P xstormy16_mode_dependent_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE xstormy16_can_eliminate
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT xstormy16_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-stormy16.h"
diff --git a/gcc-4.9/gcc/config/stormy16/stormy16.h b/gcc-4.9/gcc/config/stormy16/stormy16.h
new file mode 100644
index 000000000..5bcc5d7e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/stormy16.h
@@ -0,0 +1,504 @@
+/* Xstormy16 cpu description.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Driver configuration.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/* For xstormy16:
+   - If -msim is specified, everything is built and linked as for the sim.
+   - If -T is specified, that linker script is used, and it should provide
+     appropriate libraries.
+   - If neither is specified, everything is built as for the sim, but no
+     I/O support is assumed.  */
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim:-lsim}%{!msim:%{!T*:-lnosys}} -)"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+
+/* Run-time target specifications.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("xstormy16");		\
+      builtin_assert ("machine=xstormy16");	\
+      builtin_assert ("cpu=xstormy16");		\
+    }						\
+  while (0)
+
+/* Storage Layout.  */
+
+#define BITS_BIG_ENDIAN 1
+
+#define BYTES_BIG_ENDIAN 0
+
+#define WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 2
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+	  && GET_MODE_SIZE (MODE) < 2)		\
+	(MODE) = HImode;			\
+    }						\
+  while (0)
+
+#define PARM_BOUNDARY 16
+
+#define STACK_BOUNDARY 16
+
+#define FUNCTION_BOUNDARY 16
+
+#define BIGGEST_ALIGNMENT 16
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE 16
+
+#define SHORT_TYPE_SIZE 16
+
+#define LONG_TYPE_SIZE 32
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 0
+
+#define SIZE_TYPE "unsigned int"
+
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Register Basics.  */
+
+#define FIRST_PSEUDO_REGISTER 19
+
+#define FIXED_REGISTERS \
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1 }
+
+#define CALL_USED_REGISTERS \
+  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1 }
+
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER { 7, 6, 5, 4, 3, 2, 1, 0, 9, 8, 10, 11, 12, 13, 14, 15, 16 }
+
+
+/* How Values Fit in Registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) 				\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) ((REGNO) != 16 || (MODE) == BImode)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) ((MODE1) != BImode && (MODE2) != BImode)
+
+
+/* Register Classes.  */
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  R1_REGS,
+  TWO_REGS,
+  R2_REGS,
+  EIGHT_REGS,
+  R8_REGS,
+  ICALL_REGS,
+  GENERAL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define REG_CLASS_NAMES				\
+{						\
+  "NO_REGS", 					\
+  "R0_REGS", 					\
+  "R1_REGS",					\
+  "TWO_REGS",					\
+  "R2_REGS",					\
+  "EIGHT_REGS",					\
+  "R8_REGS",					\
+  "ICALL_REGS",					\
+  "GENERAL_REGS",				\
+  "ALL_REGS"					\
+}
+
+#define REG_CLASS_CONTENTS			\
+{						\
+  { 0x00000 },					\
+  { 0x00001 },					\
+  { 0x00002 },					\
+  { 0x00003 },					\
+  { 0x00004 },					\
+  { 0x000FF },					\
+  { 0x00100 },					\
+  { 0x00300 },					\
+  { 0x6FFFF },					\
+  { (1 << FIRST_PSEUDO_REGISTER) - 1 }		\
+}
+
+#define REGNO_REG_CLASS(REGNO) 			\
+  (  (REGNO) ==  0 ? R0_REGS			\
+   : (REGNO) ==  1 ? R1_REGS			\
+   : (REGNO) ==  2 ? R2_REGS			\
+   : (REGNO) <   8 ? EIGHT_REGS			\
+   : (REGNO) ==  8 ? R8_REGS			\
+   : (REGNO) <= 18 ? GENERAL_REGS		\
+   : ALL_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(NUM) 1
+
+#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM)
+
+/* This chip has the interesting property that only the first eight
+   registers can be moved to/from memory.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X)			\
+  xstormy16_secondary_reload_class (CLASS, MODE, X)
+
+
+/* Basic Stack Layout.  */
+
+/* We want to use post-increment instructions to push things on the stack,
+   because we don't have any pre-increment ones.  */
+#define STACK_PUSH_CODE POST_INC
+
+#define FRAME_GROWS_DOWNWARD 0
+
+#define ARGS_GROW_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET 0
+
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)	\
+  ((COUNT) == 0					\
+   ? gen_rtx_MEM (Pmode, arg_pointer_rtx)	\
+   : NULL_RTX)
+
+#define INCOMING_RETURN_ADDR_RTX  \
+   gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-4)))
+
+#define INCOMING_FRAME_SP_OFFSET (xstormy16_interrupt_function_p () ? -6 : -4)
+
+
+/* Register That Address the Stack Frame.  */
+
+#define STATIC_CHAIN_REGNUM	 1
+#define HARD_FRAME_POINTER_REGNUM 13
+#define STACK_POINTER_REGNUM	15
+#define CARRY_REGNUM		16
+#define FRAME_POINTER_REGNUM	17
+#define ARG_POINTER_REGNUM	18
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer.  */
+
+#define ELIMINABLE_REGS					\
+{							\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},		\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},	\
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = xstormy16_initial_elimination_offset (FROM, TO)
+
+
+/* Passing Function Arguments on the Stack.  */
+
+#define PUSH_ROUNDING(BYTES) (((BYTES) + 1) & ~1)
+
+
+/* Function Arguments in Registers.  */
+
+#define NUM_ARGUMENT_REGISTERS  6
+#define FIRST_ARGUMENT_REGISTER 2
+
+#define XSTORMY16_WORD_SIZE(TYPE, MODE)				\
+  ((((TYPE) ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE))	\
+    + 1) 							\
+   / 2)
+
+/* For this platform, the value of CUMULATIVE_ARGS is the number of words
+   of arguments that have been passed in registers so far.  */
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+#define FUNCTION_ARG_REGNO_P(REGNO)					\
+  ((REGNO) >= FIRST_ARGUMENT_REGISTER 					\
+   && (REGNO) < FIRST_ARGUMENT_REGISTER + NUM_ARGUMENT_REGISTERS)
+
+
+/* How Scalar Function Values are Returned.  */
+
+/* The number of the hard register that is used to return a scalar value from a
+   function call.  */
+#define RETURN_VALUE_REGNUM	FIRST_ARGUMENT_REGISTER
+
+
+/* Function Entry and Exit.  */
+
+#define EPILOGUE_USES(REGNO) \
+  xstormy16_epilogue_uses (REGNO)
+
+
+/* Generating Code for Profiling.  */
+
+/* This declaration must be present, but it can be an abort if profiling is
+   not implemented.  */
+     
+#define FUNCTION_PROFILER(FILE, LABELNO) xstormy16_function_profiler ()
+
+
+/* Trampolines for Nested Functions.  */
+
+#define TRAMPOLINE_SIZE 8
+#define TRAMPOLINE_ALIGNMENT 16
+
+
+/* Addressing Modes.  */
+
+#define HAVE_POST_INCREMENT 1
+
+#define HAVE_PRE_DECREMENT 1
+
+#define MAX_REGS_PER_ADDRESS 1
+
+
+/* Describing Relative Costs of Operations.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 5
+
+#define SLOW_BYTE_ACCESS 0
+
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the output into sections.  */
+
+#define TEXT_SECTION_ASM_OP ".text"
+
+#define DATA_SECTION_ASM_OP ".data"
+
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+   There are no shared libraries on this target so these sections need
+   not be writable.
+
+   Defined in elfos.h.  */
+
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"a\""
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"a\""
+
+#define TARGET_ASM_INIT_SECTIONS xstormy16_asm_init_sections
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* The Overall Framework of an Assembler File.  */
+
+#define ASM_COMMENT_START ";"
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Output of Data.  */
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '|')
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+  xstormy16_asm_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 1)
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+  xstormy16_asm_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+
+/* Output and Generation of Labels.  */
+#define SYMBOL_FLAG_XSTORMY16_BELOW100	(SYMBOL_FLAG_MACH_DEP << 0)
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYMBOL)	\
+  do						\
+    {						\
+      const char *rn = XSTR (SYMBOL, 0);	\
+						\
+      if (SYMBOL_REF_FUNCTION_P (SYMBOL))	\
+	ASM_OUTPUT_LABEL_REF ((STREAM), rn);	\
+      else					\
+	assemble_name (STREAM, rn);		\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_LABEL_REF(STREAM, NAME)	\
+  do						\
+    {						\
+      fputs ("@fptr(", STREAM);			\
+      assemble_name (STREAM, NAME);		\
+      fputc (')', STREAM);			\
+    }						\
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+
+/* Output of Assembler Instructions.  */
+
+#define REGISTER_NAMES							\
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",	\
+  "r11", "r12", "r13", "psw", "sp", "carry", "fp", "ap" }
+
+#define ADDITIONAL_REGISTER_NAMES		\
+  { { "r14", 14 },				\
+    { "r15", 15 } }
+
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX "#"
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \
+  fprintf (STREAM, "\tpush %d\n", REGNO)
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \
+  fprintf (STREAM, "\tpop %d\n", REGNO)
+
+
+/* Output of dispatch tables.  */
+
+/* This port does not use the ASM_OUTPUT_ADDR_VEC_ELT macro, because
+   this could cause label alignment to appear between the 'br' and the table,
+   which would be bad.  Instead, it controls the output of the table
+   itself.  */
+#define ASM_OUTPUT_ADDR_VEC(LABEL, BODY) \
+  xstormy16_output_addr_vec (file, LABEL, BODY)
+
+/* Alignment for ADDR_VECs is the same as for code.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 1
+
+
+/* Assembler Commands for Exception Regions.  */
+
+#define DWARF2_UNWIND_INFO 		0
+#define DWARF_CIE_DATA_ALIGNMENT	1
+
+/* Assembler Commands for Alignment.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+
+/* Macros for SDB and Dwarf Output.  */
+
+/* Define this macro if addresses in Dwarf 2 debugging info should not
+   be the same size as pointers on the target architecture.  The
+   macro's value should be the size, in bytes, to use for addresses in
+   the debugging info.
+
+   Some architectures use word addresses to refer to code locations,
+   but Dwarf 2 info always uses byte addresses.  On such machines,
+   Dwarf 2 addresses need to be larger than the architecture's
+   pointers.  */
+#define DWARF2_ADDR_SIZE 4
+
+
+/* Miscellaneous Parameters.  */
+
+#define CASE_VECTOR_MODE SImode
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define MOVE_MAX 2
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define Pmode HImode
+
+#define FUNCTION_MODE HImode
+
+#define NO_IMPLICIT_EXTERN_C
diff --git a/gcc-4.9/gcc/config/stormy16/stormy16.md b/gcc-4.9/gcc/config/stormy16/stormy16.md
new file mode 100644
index 000000000..c19321365
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/stormy16.md
@@ -0,0 +1,1250 @@
+;; XSTORMY16 Machine description template
+;; Copyright (C) 1997-2014 Free Software Foundation, Inc.
+;; Contributed by Red Hat, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Constraints
+;; a  $0
+;; b  $1
+;; c  $2
+;; d  $8
+;; e  $0..$7
+;; t  $0..$1
+;; z  $8..$9
+;; I  0..3
+;; J  2**N mask
+;; K  2**N antimask
+;; L  0..255
+;; M  -255..0
+;; N  -3..0
+;; O  1..4
+;; P  -4..-1
+;; Q  post-inc mem (push)
+;; R  pre-dec mem (pop)
+;; S  immediate mem
+;; T  Rx
+;; U  -inf..1 or 16..inf
+;; Z  0
+
+(define_constants
+  [
+    (CARRY_REG 16)
+  ]
+)
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+; Categorize branches for the conditional in the length attribute.
+(define_attr "branch_class" "notdirectbranch,br12,bcc12,bcc8p2,bcc8p4"
+    (const_string "notdirectbranch"))
+
+; The length of an instruction, used for branch shortening.
+(define_attr "length" ""
+  (cond
+   [(eq_attr "branch_class" "br12")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -2046))
+			(lt (minus (match_dup 0) (pc)) (const_int 2048)))
+		   (const_int 2)
+		   (const_int 4))
+    (eq_attr "branch_class" "bcc12")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+			(lt (minus (match_dup 0) (pc)) (const_int 2048)))
+		   (const_int 4)
+		   (const_int 8))
+    (eq_attr "branch_class" "bcc8p2")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -124))
+			(lt (minus (match_dup 0) (pc)) (const_int 128)))
+		   (const_int 4)
+		   (const_int 8))
+    (eq_attr "branch_class" "bcc8p4")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -122))
+			(lt (minus (match_dup 0) (pc)) (const_int 128)))
+		   (const_int 6)
+		   (const_int 10))]
+   (const_int 2)))
+
+; The operand which determines the setting of Rpsw.
+; The numbers indicate the operand number,
+; 'clobber' indicates it is changed in some unspecified way
+; 'nop' means it is not changed.
+(define_attr "psw_operand" "clobber,nop,0,1,2,3,4" (const_string "0"))
+
+(define_asm_attributes [(set_attr "length" "4")
+			(set_attr "psw_operand" "clobber")])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+;; push/pop qi and hi are here as separate insns rather than part of
+;; the movqi/hi patterns because we need to ensure that reload isn't
+;; passed anything it can't cope with.  Without these patterns, we
+;; might end up with
+
+;; (set (mem (post_inc (sp))) mem (post_inc (reg)))
+
+;; If, in this example, reg needs reloading, reload will read reg from
+;; the stack , adjust sp, and store reg back at what is now the wrong
+;; offset.  By using separate patterns for push and pop we ensure that
+;; insns like this one are never generated.
+
+(define_insn "pushqi1"
+  [(set (mem:QI (post_inc:HI (reg:HI 15)))
+	(match_operand:QI 0 "register_operand" "r"))]
+  ""
+  "push %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_insn "popqi1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (pre_dec:HI (reg:HI 15))))]
+  ""
+  "pop %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_nonstack_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  { xstormy16_expand_move (QImode, operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_nonstack_operand" "=r,m,e,e,T,r,S,W,e")
+	(match_operand:QI 1 "general_operand"       "r,e,m,i,i,i,i,ie,W"))]
+  ""
+  "@
+   mov %0,%1
+   mov.b %0,%1
+   mov.b %0,%1
+   mov %0,%1
+   mov Rx,%1
+   mov %0,%1
+   mov.b %0,%1
+   mov.b %0,%1
+   mov.b %0,%1"
+  [(set_attr_alternative "length"
+	     [(const_int 2)
+	      (if_then_else (match_operand:QI 0 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (const_int 2)
+	      (const_int 2)
+	      (const_int 4)
+	      (const_int 4)
+	      (const_int 2)
+	      (const_int 2)])
+   (set_attr "psw_operand" "0,0,0,0,nop,0,nop,0,0")])
+
+(define_insn "pushhi1"
+  [(set (mem:HI (post_inc:HI (reg:HI 15)))
+	(match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "push %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_insn "pophi1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (pre_dec:HI (reg:HI 15))))]
+  ""
+  "pop %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_nonstack_operand" "")
+	(match_operand:HI 1 "xs_hi_general_operand" ""))]
+  ""
+  { xstormy16_expand_move (HImode, operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_nonstack_operand" "=r,m,e,e,T,r,S,W,e")
+	(match_operand:HI 1 "xs_hi_general_operand"          "r,e,m,L,L,i,i,ie,W"))]
+  ""
+  "@
+   mov %0,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w Rx,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w %0,%1"
+  [(set_attr_alternative "length"
+	     [(const_int 2)
+	      (if_then_else (match_operand:QI 0 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (const_int 2)
+	      (const_int 2)
+	      (const_int 4)
+	      (const_int 4)
+	      (const_int 4)
+	      (const_int 4)])
+   (set_attr "psw_operand" "0,0,0,0,nop,0,nop,0,0")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  { xstormy16_expand_move (SImode, operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn_and_split "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Q,r,m,e,&e,e,r,S")
+	(match_operand:SI 1 "general_operand"       "r,r,R,e,o, V,L,i,i"))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_split_move (SImode, operands[0], operands[1]);
+    DONE;
+  }
+  [(set_attr_alternative "length"
+	     [(const_int 4)
+	      (const_int 4)
+	      (const_int 4)
+	      (if_then_else (match_operand:QI 0 "short_memory_operand" "")
+			    (const_int 6)
+			    (const_int 8))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 6)
+			    (const_int 8))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 6)
+			    (const_int 8))
+	      (const_int 4)
+	      (const_int 8)
+	      (const_int 8)])])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "cbw %0")
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI                 0 "register_operand" 	   "=e,r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m,0")))]
+  ""
+  "@
+   mov.b %0, %1
+   shl %0,#8\n\tshr %0,#8"
+  [(set_attr "psw_operand" "nop,0")
+   (set_attr_alternative "length"
+	     [(const_int 4)
+	      (const_int 8)])])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit field extraction
+;; ::
+;; ::::::::::::::::::::
+
+;; Extract an unsigned bit field
+;(define_insn "extzv"
+;  [(set (match_operand:SI 0 "register_operand" "=r")
+;	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+;			 (match_operand:SI 2 "const_int_operand" "n")
+;			 (match_operand:SI 3 "const_int_operand" "n")))]
+;  ""
+;  "extzv %0,%1,%2,%3"
+;  [(set_attr "length" "4")])
+
+;; Insert a bit field
+;(define_insn "insv"
+;  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+;			 (match_operand:SI 1 "const_int_operand" "n")
+;			 (match_operand:SI 2 "const_int_operand" "n"))
+;	(match_operand:SI 3 "nonmemory_operand" "ri"))]
+;  ""
+;  "insv %0,%1,%2,%3"
+;  [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+; Note - the early clobber modifier is no longer needed on operand 3
+; and in fact can cause some reload spill failures if it is present.
+; Note that the 'Z' constraint matches "add $reg,0", which reload
+; will occasionally emit.  We avoid the "add $reg,imm" match because
+; it clobbers the carry.
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,T,T,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0,0,0")
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "O,P,Z,L,M,Ir,N,i")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "@
+   inc %0,%o2
+   dec %0,%O2
+   ;
+   add Rx,%2
+   sub Rx,#%n2
+   add %0,%2
+   sub %0,#%n2
+   add %0,%2"
+  [(set_attr "length" "2,2,0,2,2,2,2,4")])
+
+(define_insn "addchi4"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (plus:SI (zero_extend:SI (match_dup 1))
+					   (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   add Rx,%2
+   add %0,%2
+   add %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "addchi5"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(plus:HI (plus:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+			  (zero_extend:HI (reg:BI CARRY_REG)))
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (plus:SI (plus:SI
+					    (zero_extend:SI (match_dup 1))
+					    (zero_extend:SI (reg:BI CARRY_REG)))
+					   (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   adc Rx,%2
+   adc %0,%2
+   adc %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+;; Subtraction
+; Operand 3 is marked earlyclobber because that helps reload
+; to generate better code---this pattern will never need the
+; carry register as an input, and some output reloads or input
+; reloads might need to use it.  In fact, without the '&' reload
+; will fail in some cases.
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,T,T,r,r,r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0,0,0,0,0,0")
+		  (match_operand:HI 2 "xs_hi_nonmemory_operand" "O,P,L,M,rI,M,i")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "@
+   dec %0,%o2
+   inc %0,%O2
+   sub Rx,%2
+   add Rx,#%n2
+   sub %0,%2
+   add %0,#%n2
+   sub %0,%2"
+  [(set_attr "length" "2,2,2,2,2,2,4")])
+
+(define_insn "subchi4"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0,0")
+		  (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (minus:SI (zero_extend:SI (match_dup 1))
+					    (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   sub Rx,%2
+   sub %0,%2
+   sub %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "subchi5"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(minus:HI (minus:HI (match_operand:HI 1 "register_operand" "0,0,0")
+			  (zero_extend:HI (reg:BI CARRY_REG)))
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (minus:SI (minus:SI
+					     (zero_extend:SI (match_dup 1))
+					     (zero_extend:SI (reg:BI CARRY_REG)))
+					    (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   sbc Rx,%2
+   sbc %0,%2
+   sbc %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+; Basic multiplication
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI (match_operand:HI 1 "register_operand" "%a")
+		 (match_operand:HI 2 "register_operand" "c")))
+   (clobber (match_scratch:HI 3 "=b"))
+   ]
+  ""
+  "mul"
+  [(set_attr "psw_operand" "nop")])
+
+;; Unsigned multiplication producing 64-bit results from 32-bit inputs
+; The constraint on operand 0 is 't' because it is actually two regs
+; long, and both regs must match the constraint.
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=t")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%a"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "c"))))
+   ]
+  ""
+  "mul"
+  [(set_attr "psw_operand" "nop")])
+
+;; Unsigned division giving both quotient and remainder
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(udiv:HI (match_operand:HI 1 "register_operand" "a")
+		 (match_operand:HI 2 "register_operand" "c")))
+   (set (match_operand:HI 3 "register_operand" "=b")
+	(umod:HI (match_dup 1)
+		 (match_dup 2)))]
+  ""
+  "div"
+  [(set_attr "psw_operand" "nop")])
+
+;; Signed division giving both quotient and remainder
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(div:HI (match_operand:HI 1 "register_operand" "a")
+		 (match_operand:HI 2 "register_operand" "c")))
+   (set (match_operand:HI 3 "register_operand" "=b")
+	(mod:HI (match_dup 1)
+		 (match_dup 2)))]
+  ""
+  "sdiv"
+  [(set_attr "psw_operand" "nop")])
+
+;; Signed 32/16 division
+(define_insn "sdivlh"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(div:HI (match_operand:SI 2 "register_operand" "t")
+		 (match_operand:HI 3 "register_operand" "c")))
+   (set (match_operand:HI 1 "register_operand" "=b")
+	(mod:HI (match_dup 2)
+		 (match_dup 3)))]
+  ""
+  "sdivlh"
+  [(set_attr "psw_operand" "nop")])
+
+;; Unsigned 32/16 division
+(define_insn "udivlh"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(udiv:HI (match_operand:SI 2 "register_operand" "t")
+		 (match_operand:HI 3 "register_operand" "c")))
+   (set (match_operand:HI 1 "register_operand" "=b")
+	(umod:HI (match_dup 2)
+		 (match_dup 3)))]
+  ""
+  "divlh"
+  [(set_attr "psw_operand" "nop")])
+
+;; Negation
+
+(define_expand "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(not:HI (match_operand:HI 1 "register_operand" "")))
+   (parallel [(set (match_dup 0) (plus:HI (match_dup 0) (const_int 1)))
+	      (clobber (reg:BI CARRY_REG))])]
+  ""
+  "")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "shl %0,%2")
+
+;; Arithmetic Shift Right
+(define_insn "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "asr %0,%2")
+
+;; Logical Shift Right
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "shr %0,%2")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Logical AND, 16-bit integers
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "xstormy16_splittable_below100_or_register" "=T,r,r,r,W")
+	(and:HI (match_operand:HI 1 "xstormy16_below100_or_register" "%0,0,0,0,0")
+		(match_operand:HI 2 "nonmemory_operand" "L,r,K,i,K")))]
+  ""
+  "@
+   and Rx,%2
+   and %0,%2
+   clr1 %0,%B2
+   and %0,%2
+   #"
+  [(set_attr "length" "2,2,2,4,2")])
+
+(define_split
+  [(set (match_operand:HI 0 "xstormy16_below100_operand" "")
+	(and:HI (match_operand:HI 1 "xstormy16_below100_operand" "")
+		(match_operand:HI 2 "xstormy16_onebit_clr_operand" "")))]
+  ""
+  [(set (match_dup 3)
+	(and:QI (match_dup 4)
+		(match_dup 5)))]
+  { int s = ((INTVAL (operands[2]) & 0xff) == 0xff) ? 1 : 0;
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, s);
+    operands[4] = simplify_gen_subreg (QImode, operands[1], HImode, s);
+    operands[5] = simplify_gen_subreg (QImode, operands[2], HImode, s);
+    operands[5] = GEN_INT (INTVAL (operands[5]) | ~ (HOST_WIDE_INT) 0xff);
+  })
+
+;; Inclusive OR, 16-bit integers
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "xstormy16_splittable_below100_or_register" "=T,r,r,r,W")
+	(ior:HI (match_operand:HI 1 "xstormy16_below100_or_register" "%0,0,0,0,0")
+		(match_operand:HI 2 "nonmemory_operand" "L,r,J,i,J")))]
+  ""
+  "@
+   or Rx,%2
+   or %0,%2
+   set1 %0,%B2
+   or %0,%2
+   #"
+  [(set_attr "length" "2,2,2,4,2")])
+
+(define_split
+  [(set (match_operand:HI 0 "xstormy16_below100_operand" "")
+	(ior:HI (match_operand:HI 1 "xstormy16_below100_operand" "")
+		(match_operand:HI 2 "xstormy16_onebit_set_operand" "")))]
+  ""
+  [(set (match_dup 3)
+	(ior:QI (match_dup 4)
+		(match_dup 5)))]
+  { int s = ((INTVAL (operands[2]) & 0xff) == 0x00) ? 1 : 0;
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, s);
+    operands[4] = simplify_gen_subreg (QImode, operands[1], HImode, s);
+    operands[5] = simplify_gen_subreg (QImode, operands[2], HImode, s);
+    operands[5] = GEN_INT (INTVAL (operands[5]) & 0xff);
+  })
+
+;; Exclusive OR, 16-bit integers
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(xor:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+		(match_operand:HI 2 "nonmemory_operand" "L,r,i")))]
+  ""
+  "@
+   xor Rx,%2
+   xor %0,%2
+   xor %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+;; One's complement, 16-bit integers
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(not:HI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "not %0")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn_and_split "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_expand_arith (SImode, PLUS, operands[0], operands[1],
+			    operands[2]);
+    DONE;
+  }
+  [(set_attr "length" "4")])
+
+;; Subtraction
+(define_insn_and_split "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_expand_arith (SImode, MINUS, operands[0], operands[1],
+			    operands[2]);
+    DONE;
+  }
+  [(set_attr "length" "4")])
+
+(define_expand "negsi2"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (neg:SI (match_operand:SI 1 "register_operand" "")))
+	      (clobber (reg:BI CARRY_REG))])]
+  ""
+  { operands[2] = gen_reg_rtx (HImode); })
+
+(define_insn_and_split "*negsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_expand_arith (SImode, NEG, operands[0], operands[0],
+			    operands[1]);
+    DONE;
+  })
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashift:SI (match_operand:SI 1 "register_operand" "")
+			      (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:BI CARRY_REG))
+	      (clobber (match_dup 3))])]
+  ""
+  { if (! const_int_operand (operands[2], SImode))
+      FAIL;
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+;; Arithmetic Shift Right
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+			        (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:BI CARRY_REG))
+	      (clobber (match_dup 3))])]
+  ""
+  { if (! const_int_operand (operands[2], SImode))
+      FAIL;
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+;; Logical Shift Right
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+			        (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:BI CARRY_REG))
+	      (clobber (match_dup 3))])]
+  ""
+  { if (! const_int_operand (operands[2], SImode))
+      FAIL;
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*shiftsi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI 4 "shift_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "const_int_operand" "U,n")]))
+   (clobber (reg:BI CARRY_REG))
+   (clobber (match_operand:HI 3 "" "=X,r"))]
+  ""
+  "* return xstormy16_output_shift (SImode, GET_CODE (operands[4]),
+				    operands[0], operands[2], operands[3]);"
+  [(set_attr "length" "6,10")
+   (set_attr "psw_operand" "clobber,clobber")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cbranchhi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+				      [(match_operand:HI 1 "register_operand" "")
+				       (match_operand:HI 2 "nonmemory_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  {
+  xstormy16_emit_cbranch (GET_CODE (operands[0]), operands[1], operands[2],
+			  operands[3]);
+  DONE;
+})
+
+(define_insn "cbranchhi"
+  [(set (pc)
+	(if_then_else (match_operator:HI 1 "comparison_operator"
+				      [(match_operand:HI 2 "nonmemory_operand"
+					"r,e,L")
+				       (match_operand:HI 3 "nonmemory_operand"
+						      "r,L,e")])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_hi (operands[1], \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "bcc12")
+   (set_attr "psw_operand" "0,0,1")])
+
+(define_insn "cbranchhi_neg"
+  [(set (pc)
+	(if_then_else (match_operator:HI 1 "comparison_operator"
+				      [(match_operand:HI 2 "nonmemory_operand"
+							 "r,e,L")
+				       (match_operand:HI 3 "nonmemory_operand"
+							 "r,L,e")])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_hi (operands[1], \"%l0\", 1, insn);
+}"
+  [(set_attr "branch_class" "bcc12")
+   (set_attr "psw_operand" "0,0,1")])
+
+(define_insn "*eqbranchsi"
+  [(set (pc)
+	(if_then_else (match_operator:SI 1 "equality_operator"
+				      [(match_operand:SI 2 "register_operand"
+							 "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (match_operand:SI 3 "register_operand" "=2"))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_si (operands[1], \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "bcc8p2")
+   (set_attr "psw_operand" "clobber")])
+
+(define_insn "*ineqbranch_1"
+  [(set (pc)
+	(if_then_else (match_operator:HI 4 "xstormy16_ineqsi_operator"
+		       [(minus:HI (match_operand:HI 1 "register_operand" "T,r,r")
+			   (zero_extend:HI (reg:BI CARRY_REG)))
+			(match_operand:HI 3 "nonmemory_operand" "L,r,i")])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_operand:HI 2 "register_operand" "=1,1,1")
+	(minus:HI (minus:HI (match_dup 1) (zero_extend:HI (reg:BI CARRY_REG)))
+		  (match_dup 3)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_si (operands[4], \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "bcc8p2,bcc8p2,bcc8p4")
+   (set_attr "psw_operand" "2,2,2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(call (match_operand:HI 0 "memory_operand" "m")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" ""))]
+  ""
+  "xstormy16_expand_call (NULL_RTX, operands[0], operands[1]); DONE;")
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (match_operand:HI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "" "")))
+	(use (match_operand 3 "immediate_operand" ""))]
+  ""
+  "xstormy16_expand_call (operands[0], operands[1], operands[2]); DONE;")
+
+(define_insn "*call_internal"
+  [(call (mem:HI (match_operand:HI 0 "nonmemory_operand" "i,r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand:HI 2 "nonmemory_operand" "X,z"))]
+  ""
+  "@
+   callf %C0
+   call %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "psw_operand" "clobber")])
+
+(define_insn "*call_value_internal"
+  [(set (match_operand 3 "register_operand" "=r,r")
+        (call (mem:HI (match_operand:HI 0 "nonmemory_operand" "i,r"))
+	      (match_operand 1 "" "")))
+   (use (match_operand:HI 2 "nonmemory_operand" "X,z"))]
+  ""
+  "@
+   callf %C0
+   call %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "psw_operand" "clobber")])
+
+;; Subroutine return
+(define_expand "return"
+  [(return)]
+  "direct_return()"
+  "")
+
+(define_insn "return_internal"
+  [(return)]
+  ""
+  "ret"
+  [(set_attr "psw_operand" "nop")])
+
+(define_insn "return_internal_interrupt"
+  [(return)
+   (unspec_volatile [(const_int 0)] 1)]
+  ""
+  "iret"
+  [(set_attr "psw_operand" "clobber")])
+
+;; Normal unconditional jump
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_hi (NULL_RTX, \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "br12")
+   (set_attr "psw_operand" "nop")])
+
+;; Indirect jump through a register
+(define_expand "indirect_jump"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (pc) (match_operand:HI 0 "register_operand" ""))
+	      (use (match_dup 1))])]
+  ""
+  "operands[1] = gen_reg_rtx (HImode);")
+
+(define_insn ""
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))
+   (use (match_operand:HI 1 "register_operand" "z"))]
+  ""
+  "jmp %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+;; Table-based switch statements.
+(define_expand "casesi"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "immediate_operand" ""))
+   (use (match_operand:SI 2 "immediate_operand" ""))
+   (use (label_ref (match_operand 3 "" "")))
+   (use (label_ref (match_operand 4 "" "")))]
+  ""
+  "
+{
+  xstormy16_expand_casesi (operands[0], operands[1], operands[2],
+			  operands[3], operands[4]);
+  DONE;
+}")
+
+(define_insn "tablejump_pcrel"
+  [(set (pc) (mem:HI (plus:HI (pc)
+			      (match_operand:HI 0 "register_operand" "r"))))
+   (use (label_ref:SI (match_operand 1 "" "")))]
+  ""
+  "br %0"
+  [(set_attr "psw_operand" "nop")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Called after register allocation to add any instructions needed for
+;; the prologue.  Using a prologue insn is favored compared to putting
+;; all of the instructions in the TARGET_ASM_FUNCTION_PROLOGUE macro,
+;; since it allows the scheduler to intermix instructions with the
+;; saves of the caller saved registers.  In some cases, it might be
+;; necessary to emit a barrier instruction as the last insn to prevent
+;; such scheduling.
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  {
+    xstormy16_expand_prologue ();
+    DONE;
+  })
+
+;; Called after register allocation to add any instructions needed for
+;; the epilogue.  Using an epilogue insn is favored compared to putting
+;; all of the instructions in the TARGET_ASM_FUNCTION_EPILOGUE macro,
+;; since it allows the scheduler to intermix instructions with the
+;; restores of the caller saved registers.  In some cases, it might be
+;; necessary to emit a barrier instruction as the first insn to
+;; prevent such scheduling.
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  {
+    xstormy16_expand_epilogue ();
+    DONE;
+  })
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "psw_operand" "nop")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "psw_operand" "nop")])
+
+;;---------------------------------------------------------------------------
+
+(define_expand "iorqi3"
+  [(match_operand:QI 0 "xstormy16_below100_or_register" "")
+   (match_operand:QI 1 "xstormy16_below100_or_register" "")
+   (match_operand:QI 2 "nonmemory_operand" "")]
+  ""
+  {
+    xstormy16_expand_iorqi3 (operands);
+    DONE;
+  })
+
+(define_insn "iorqi3_internal"
+  [(set (match_operand:QI 0 "xstormy16_below100_or_register" "=Wr")
+	(ior:QI (match_operand:QI 1 "xstormy16_below100_or_register" "0")
+		(match_operand:QI 2 "xstormy16_onebit_set_operand" "i")))]
+  ""
+  "set1 %0,%B2"
+  [(set_attr "length" "2")
+   (set_attr "psw_operand" "0")])
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand" "")
+	(match_operand:QI 1 "xstormy16_below100_operand" ""))
+   (set (match_operand:HI 2 "register_operand" "")
+	(ior:HI (match_operand:HI 3 "register_operand" "")
+		(match_operand:QI 4 "xstormy16_onebit_set_operand" "")))
+   (set (match_operand:QI 5 "xstormy16_below100_operand" "")
+	(match_operand:QI 6 "register_operand" ""))
+   ]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[6])
+   && rtx_equal_p (operands[1], operands[5])"
+  [(set (match_dup 1)
+	(ior:QI (match_dup 1)
+		(match_dup 4)))
+   ]
+  "")
+
+
+(define_expand "andqi3"
+  [(match_operand:QI 0 "xstormy16_below100_or_register" "")
+   (match_operand:QI 1 "xstormy16_below100_or_register" "")
+   (match_operand:QI 2 "nonmemory_operand" "")]
+  ""
+  {
+    xstormy16_expand_andqi3 (operands);
+    DONE;
+  })
+
+(define_insn "andqi3_internal"
+  [(set (match_operand:QI 0 "xstormy16_below100_or_register" "=Wr")
+	(and:QI (match_operand:QI 1 "xstormy16_below100_or_register" "0")
+		(match_operand:QI 2 "xstormy16_onebit_clr_operand" "i")))]
+  ""
+  "clr1 %0,%B2"
+  [(set_attr "length" "2")
+   (set_attr "psw_operand" "0")])
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand 2 "immediate_operand" "")))
+   (set (match_operand:HI 3 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 4 "register_operand" "")));
+   ]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[4])"
+  [(set (match_dup 0)
+	(and:HI (match_dup 1)
+		(match_dup 5)))
+   ]
+  "operands[5] = GEN_INT (INTVAL (operands[2]) & 0xff);")
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand" "")
+	(match_operand:QI 1 "xstormy16_below100_operand" ""))
+   (set (match_operand:HI 2 "register_operand" "")
+	(and:HI (match_operand:HI 3 "register_operand" "")
+		(match_operand:QI 4 "xstormy16_onebit_clr_operand" "")))
+   (set (match_operand:QI 5 "xstormy16_below100_operand" "")
+	(match_operand:QI 6 "register_operand" ""))
+   ]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[6])
+   && rtx_equal_p (operands[1], operands[5])"
+  [(set (match_dup 1)
+	(and:QI (match_dup 1)
+		(match_dup 4)))
+   ]
+  "")
+
+;; GCC uses different techniques to optimize MSB and LSB accesses, so
+;; we have to code those separately.
+
+(define_insn "*bclrx"
+  [(set (pc)
+	(if_then_else (eq:HI (and:QI (match_operand:QI 1 "xstormy16_below100_operand" "W")
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclrx2"
+  [(set (pc)
+	(if_then_else (zero_extract:HI
+		       (xor:HI (subreg:HI
+				(match_operand:QI 1 "xstormy16_below100_operand" "W") 0)
+			       (match_operand:HI 2 "xstormy16_onebit_set_operand" "J"))
+		       (const_int 1)
+		       (match_operand:HI 3 "immediate_operand" "i"))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclrx3"
+  [(set (pc)
+	(if_then_else (eq:HI (and:HI (zero_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclr7"
+  [(set (pc)
+	(if_then_else (xor:HI (lshiftrt:HI (subreg:HI
+					    (match_operand:QI 1 "xstormy16_below100_operand" "W") 0)
+					   (const_int 7))
+			      (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclr15"
+  [(set (pc)
+	(if_then_else (ge:HI (sign_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bsetx"
+  [(set (pc)
+	(if_then_else (ne:HI (and:QI (match_operand:QI 1 "xstormy16_below100_operand" "W")
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bsetx2"
+  [(set (pc)
+	(if_then_else (zero_extract:HI (match_operand:QI 1 "xstormy16_below100_operand" "W")
+				       (const_int 1)
+				       (match_operand:HI 2 "immediate_operand" "i"))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,%b2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bsetx3"
+  [(set (pc)
+	(if_then_else (ne:HI (and:HI (zero_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bset7"
+  [(set (pc)
+	(if_then_else (lshiftrt:HI (subreg:HI (match_operand:QI 1 "xstormy16_below100_operand" "W") 0)
+				   (const_int 7))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bset15"
+  [(set (pc)
+	(if_then_else (lt:HI (sign_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
diff --git a/gcc-4.9/gcc/config/stormy16/stormy16.opt b/gcc-4.9/gcc/config/stormy16/stormy16.opt
new file mode 100644
index 000000000..01efdca60
--- /dev/null
+++ b/gcc-4.9/gcc/config/stormy16/stormy16.opt
@@ -0,0 +1,24 @@
+; Options for the XSTORMY16 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Not used by the compiler
+msim
+Target RejectNegative
+Provide libraries for the simulator
diff --git a/gcc-4.9/gcc/config/t-darwin b/gcc-4.9/gcc/config/t-darwin
new file mode 100644
index 000000000..9cb3604c9
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-darwin
@@ -0,0 +1,36 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/darwin-sections.def
+
+darwin.o: $(srcdir)/config/darwin.c config/darwin-sections.def
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+darwin-c.o: $(srcdir)/config/darwin-c.c
+	$(COMPILE) $(PREPROCESSOR_DEFINES) $<
+	$(POSTCOMPILE)
+
+
+darwin-f.o: $(srcdir)/config/darwin-f.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+darwin-driver.o: $(srcdir)/config/darwin-driver.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/t-glibc b/gcc-4.9/gcc/config/t-glibc
new file mode 100644
index 000000000..7ebb50ca8
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-glibc
@@ -0,0 +1,21 @@
+# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+glibc-c.o: config/glibc-c.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/t-libunwind b/gcc-4.9/gcc/config/t-libunwind
new file mode 100644
index 000000000..4a65935b1
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-libunwind
@@ -0,0 +1,21 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Use the system libunwind library.
+
+T_CFLAGS += -DUSE_LIBUNWIND_EXCEPTIONS
diff --git a/gcc-4.9/gcc/config/t-linux b/gcc-4.9/gcc/config/t-linux
new file mode 100644
index 000000000..617c244df
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-linux
@@ -0,0 +1,21 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+linux.o: $(srcdir)/config/linux.c
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/t-lynx b/gcc-4.9/gcc/config/t-lynx
new file mode 100644
index 000000000..5d6413bd1
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-lynx
@@ -0,0 +1,24 @@
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mthreads
+MULTILIB_DIRNAMES = thread
+
+Local Variables:
+mode: makefile
+End:
diff --git a/gcc-4.9/gcc/config/t-openbsd b/gcc-4.9/gcc/config/t-openbsd
new file mode 100644
index 000000000..7637da073
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-openbsd
@@ -0,0 +1,2 @@
+# We don't need GCC's own include files.
+USER_H = $(EXTRA_HEADERS)
diff --git a/gcc-4.9/gcc/config/t-pnt16-warn b/gcc-4.9/gcc/config/t-pnt16-warn
new file mode 100644
index 000000000..d2c427ad1
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-pnt16-warn
@@ -0,0 +1,27 @@
+# -Werror overrides for targets with 16 bit pointers
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+
+# Because POINTER_SIZE is only 16, in dwarf2out.c,
+# DWARF_ARANGES_PAD_SIZE is 0, thus a loop in output_aranges that checks
+# (i < (unsigned) DWARF_ARANGES_PAD_SIZE) elicits a warning that the
+# comparison is always false.
+# We could say "-Werror -Wno-error=type-limits", alas, not all supported
+# gcc bootstrap compilers support the latter option.
+dwarf2out.o-warn = -Wno-error
diff --git a/gcc-4.9/gcc/config/t-rtems b/gcc-4.9/gcc/config/t-rtems
new file mode 100644
index 000000000..baa00d831
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-rtems
@@ -0,0 +1,2 @@
+# RTEMS always has limits.h.
+LIMITS_H_TEST = true
diff --git a/gcc-4.9/gcc/config/t-slibgcc b/gcc-4.9/gcc/config/t-slibgcc
new file mode 100644
index 000000000..91f2d92e8
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-slibgcc
@@ -0,0 +1,2 @@
+# Cause ENABLE_SHARED_LIBGCC to be defined in gcc/Makefile.in (DRIVER_DEFINES).
+SHLIB = true
diff --git a/gcc-4.9/gcc/config/t-sol2 b/gcc-4.9/gcc/config/t-sol2
new file mode 100644
index 000000000..a4c4af4ad
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-sol2
@@ -0,0 +1,37 @@
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Solaris-specific format checking and pragmas
+sol2-c.o: $(srcdir)/config/sol2-c.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# Solaris-specific C++ mangling.
+sol2-cxx.o: $(srcdir)/config/sol2-cxx.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# Corresponding stub routines.
+sol2-stubs.o: $(srcdir)/config/sol2-stubs.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# Solaris-specific attributes
+sol2.o: $(srcdir)/config/sol2.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/t-sysroot-suffix b/gcc-4.9/gcc/config/t-sysroot-suffix
new file mode 100644
index 000000000..08b4f949e
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-sysroot-suffix
@@ -0,0 +1,7 @@
+# Generate SYSROOT_SUFFIX_SPEC from MULTILIB_OSDIRNAMES
+
+sysroot-suffix.h: $(srcdir)/config/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/print-sysroot-suffix.sh \
+	  "$(MULTILIB_OSDIRNAMES)" "$(MULTILIB_OPTIONS)" \
+	  "$(MULTILIB_MATCHES)" > tmp-sysroot-suffix.h
+	mv tmp-sysroot-suffix.h $@
diff --git a/gcc-4.9/gcc/config/t-vxworks b/gcc-4.9/gcc/config/t-vxworks
new file mode 100644
index 000000000..7d0697f15
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-vxworks
@@ -0,0 +1,24 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Both the kernel and RTP headers provide limits.h.
+LIMITS_H_TEST = true
+
+vxworks.o: $(srcdir)/config/vxworks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+	$(TARGET_H) output.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/t-winnt b/gcc-4.9/gcc/config/t-winnt
new file mode 100644
index 000000000..21b6417eb
--- /dev/null
+++ b/gcc-4.9/gcc/config/t-winnt
@@ -0,0 +1,22 @@
+# Copyright (C) 2013-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+winnt-c.o: config/winnt-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(C_TARGET_H) $(C_TARGET_DEF_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \
+	  $< $(OUTPUT_OPTION)
diff --git a/gcc-4.9/gcc/config/tilegx/constraints.md b/gcc-4.9/gcc/config/tilegx/constraints.md
new file mode 100644
index 000000000..ad9aff18c
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/constraints.md
@@ -0,0 +1,122 @@
+;; Constraint definitions for Tilera TILE-Gx.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "R00" "R0_REGS"  "r0")
+(define_register_constraint "R01" "R1_REGS"  "r1")
+(define_register_constraint "R02" "R2_REGS"  "r2")
+(define_register_constraint "R03" "R3_REGS"  "r3")
+(define_register_constraint "R04" "R4_REGS"  "r4")
+(define_register_constraint "R05" "R5_REGS"  "r5")
+(define_register_constraint "R06" "R6_REGS"  "r6")
+(define_register_constraint "R07" "R7_REGS"  "r7")
+(define_register_constraint "R08" "R8_REGS"  "r8")
+(define_register_constraint "R09" "R9_REGS"  "r9")
+(define_register_constraint "R10" "R10_REGS" "r10")
+
+(define_constraint "I"
+  "A signed 8 bit constant"
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+(define_constraint "J"
+  "Signed 16-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "K"
+  "Unsigned 16-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "(ival >= 0 && ival <= 65535)")))
+
+(define_constraint "L"
+  "Integer constant that fits in one signed byte when incremented"
+  (and (match_code "const_int")
+       (match_test "ival >= -129 && ival <= 126")))
+
+(define_constraint "M"
+  "A bit mask suitable for 'bfins'"
+  (and (match_code "const_int")
+       (match_test "tilegx_bitfield_operand_p (ival, NULL, NULL)")))
+
+(define_constraint "N"
+  "Integer constant that is a byte tiled out eight times"
+  (and (match_code "const_int")
+       (match_test "(ival == (ival & 0xFF) * 0x0101010101010101LL)")))
+
+(define_constraint "O"
+ "The integer zero constant"
+ (and (match_code "const_int")
+      (match_test "ival == 0")))
+
+(define_constraint "P"
+  "Integer constant that is a sign-extended byte tiled out as four shorts"
+  (and (match_code "const_int")
+       (match_test "(ival
+                     == ((trunc_int_for_mode (ival, QImode) & 0xFFFF)
+                         * 0x0001000100010001LL))")))
+
+(define_constraint "Q"
+  "Integer constant that fits in one signed byte when incremented, but not -1"
+  (and (match_code "const_int")
+       (match_test "ival >= -129 && ival <= 126 && ival != -1")))
+
+(define_constraint "S"
+  "Integer constant that has all 1 bits consecutive and starting at bit 0"
+  (and (match_code "const_int")
+       (match_test "ival != 0 && (ival & (ival + 1)) == 0")))
+
+(define_constraint "T"
+  "An unspec wrapper for a symbolc operand"
+  (ior (match_operand 0 "const_last_symbolic_operand")
+       (match_operand 0 "const_symbolic_operand")))
+
+(define_memory_constraint "U"
+  "Non-auto-incrementing memory"
+  (and (match_code "mem")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+(define_constraint "W"
+  "An 8-element vector constant with identical elements"
+  (and (match_code "const_vector")
+       (match_test "CONST_VECTOR_NUNITS (op) == 8")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)")))
+
+(define_constraint "Y"
+  "A 4-element vector constant with identical elements"
+  (and (match_code "const_vector")
+       (match_test "CONST_VECTOR_NUNITS (op) == 4")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))
+(define_constraint "Z0"
+ "The integer constant 0xffffffff"
+ (and (match_code "const_int")
+      (match_test "ival == 0xffffffff")))
+
+(define_constraint "Z1"
+ "The integer constant 0xffffffff00000000"
+ (and (match_code "const_int")
+      (match_test "ival == (HOST_WIDE_INT)0xffffffff00000000LL")))
diff --git a/gcc-4.9/gcc/config/tilegx/feedback.h b/gcc-4.9/gcc/config/tilegx/feedback.h
new file mode 100644
index 000000000..34e48ed88
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/feedback.h
@@ -0,0 +1,14 @@
+#ifndef _FEEDBACK_H
+#define _FEEDBACK_H 1
+
+#ifdef __ASSEMBLER__
+
+/* Stub defines for feedback instrumentation.  */
+#define FEEDBACK_ENTER_EXPLICIT(FUNCNAME, SECNAME, SIZE)
+#define FEEDBACK_ENTER(FUNCNAME)
+#define FEEDBACK_REENTER(FUNCNAME)
+#define FEEDBACK_ENTRY(FUNCNAME, SECNAME, SIZE)
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _FEEDBACK_H */
diff --git a/gcc-4.9/gcc/config/tilegx/linux.h b/gcc-4.9/gcc/config/tilegx/linux.h
new file mode 100644
index 000000000..8cdedcbbe
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/linux.h
@@ -0,0 +1,72 @@
+/* Definitions for TILE-Gx running Linux-based GNU systems with ELF.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%(endian_spec) %{m32:--32} %{m64:--64}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC "%(endian_spec) \
+  %{m64:-m elf64tilegx} %{m32:-m elf32tilegx} \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker \
+        %{ m32: /lib32/ld.so.1} \
+        %{!m32: /lib/ld.so.1}} \
+    %{static:-static}}"
+
+#define MULTILIB_DEFAULTS { "m64" }
+
+#define NO_PROFILE_COUNTERS	1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "__mcount"
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* TILE-Gx glibc provides __stack_chk_guard two pointer-size words before
+   tp. */
+#define TARGET_THREAD_SSP_OFFSET (-2 * GET_MODE_SIZE (ptr_mode))
+#endif
+
+/* For __clear_cache in libgcc2.c.  */
+#ifdef IN_LIBGCC2
+
+#include <arch/icache.h>
+
+/* Use the minimum page size of 4K.  Alternatively we can call
+   getpagesize() but it introduces a libc dependence.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(beg, end) invalidate_icache (beg, end - beg, 4096)
+
+#else
+
+/* define CLEAR_INSN_CACHE so that gcc knows to expand __builtin__clear_cache
+   to the libraray call.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE 1
+
+#endif
diff --git a/gcc-4.9/gcc/config/tilegx/mul-tables.c b/gcc-4.9/gcc/config/tilegx/mul-tables.c
new file mode 100644
index 000000000..4bcff30ae
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/mul-tables.c
@@ -0,0 +1,27243 @@
+/* Constant multiply table for TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tilegx-multiply.h"
+
+const enum insn_code tilegx_multiply_insn_seq_decode_opcode[] = {
+  CODE_FOR_nothing /* must be first */ ,
+  CODE_FOR_adddi3,
+  CODE_FOR_subdi3,
+  CODE_FOR_insn_shl1add,
+  CODE_FOR_insn_shl2add,
+  CODE_FOR_insn_shl3add,
+  CODE_FOR_ashldi3
+};
+
+const struct tilegx_multiply_insn_seq tilegx_multiply_insn_seq_table[] = {
+  {-9223372036854775807ll - 1 /* 0x8000000000000000 */ ,
+   {{6, 1, 63}}			/* shli r2, r1, 63 */
+   },
+  {-9223372036854775807ll /* 0x8000000000000001 */ ,
+   {{6, 1, 63},			/* shli r2, r1, 63 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {-9223372036854775806ll /* 0x8000000000000002 */ ,
+   {{6, 1, 63},			/* shli r2, r1, 63 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {-9223372036854775805ll /* 0x8000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775804ll /* 0x8000000000000004 */ ,
+   {{6, 1, 63},			/* shli r2, r1, 63 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {-9223372036854775803ll /* 0x8000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775802ll /* 0x8000000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-9223372036854775801ll /* 0x8000000000000007 */ ,
+   {{6, 1, 63},			/* shli r2, r1, 63 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {-9223372036854775800ll /* 0x8000000000000008 */ ,
+   {{6, 1, 63},			/* shli r2, r1, 63 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {-9223372036854775799ll /* 0x8000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775798ll /* 0x800000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-9223372036854775792ll /* 0x8000000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775776ll /* 0x8000000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775744ll /* 0x8000000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775680ll /* 0x8000000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775552ll /* 0x8000000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854775296ll /* 0x8000000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854774784ll /* 0x8000000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854773760ll /* 0x8000000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854771712ll /* 0x8000000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854767616ll /* 0x8000000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854759424ll /* 0x8000000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854743040ll /* 0x8000000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854710272ll /* 0x8000000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854644736ll /* 0x8000000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854513664ll /* 0x8000000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036854251520ll /* 0x8000000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036853727232ll /* 0x8000000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036852678656ll /* 0x8000000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036850581504ll /* 0x8000000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036846387200ll /* 0x8000000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036837998592ll /* 0x8000000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036821221376ll /* 0x8000000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036787666944ll /* 0x8000000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036720558080ll /* 0x8000000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036586340352ll /* 0x8000000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372036317904896ll /* 0x8000000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372035781033984ll /* 0x8000000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372034707292160ll /* 0x8000000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372032559808512ll /* 0x8000000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372028264841216ll /* 0x8000000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372019674906624ll /* 0x8000000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223372002495037440ll /* 0x8000000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223371968135299072ll /* 0x8000001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223371899415822336ll /* 0x8000002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223371761976868864ll /* 0x8000004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223371487098961920ll /* 0x8000008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223370937343148032ll /* 0x8000010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223369837831520256ll /* 0x8000020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223367638808264704ll /* 0x8000040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223363240761753600ll /* 0x8000080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223354444668731392ll /* 0x8000100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223336852482686976ll /* 0x8000200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223301668110598144ll /* 0x8000400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223231299366420480ll /* 0x8000800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9223090561878065152ll /* 0x8001000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9222809086901354496ll /* 0x8002000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9222246136947933184ll /* 0x8004000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9221120237041090560ll /* 0x8008000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9218868437227405312ll /* 0x8010000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9214364837600034816ll /* 0x8020000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9205357638345293824ll /* 0x8040000000000000 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9187343239835811840ll /* 0x8080000000000000 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9151314442816847872ll /* 0x8100000000000000 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-9079256848778919936ll /* 0x8200000000000000 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-8935141660703064064ll /* 0x8400000000000000 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-8646911284551352320ll /* 0x8800000000000000 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-8070450532247928832ll /* 0x9000000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 60}}			/* shli r3, r2, 60 */
+   },
+  {-6917529027641081857ll /* 0x9fffffffffffffff */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 61},			/* shli r3, r2, 61 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {-6917529027641081856ll /* 0xa000000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 61}}			/* shli r3, r2, 61 */
+   },
+  {-4611686018427387912ll /* 0xbffffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 62},			/* shli r3, r2, 62 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-4611686018427387911ll /* 0xbffffffffffffff9 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4611686018427387908ll /* 0xbffffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 62},			/* shli r3, r2, 62 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-4611686018427387907ll /* 0xbffffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 62},			/* shli r3, r2, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {-4611686018427387906ll /* 0xbffffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 62},			/* shli r3, r2, 62 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4611686018427387905ll /* 0xbfffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4611686018427387904ll /* 0xc000000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 62}}			/* shli r3, r2, 62 */
+   },
+  {-4611686018427387903ll /* 0xc000000000000001 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4611686018427387902ll /* 0xc000000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4611686018427387901ll /* 0xc000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4611686018427387900ll /* 0xc000000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4611686018427387899ll /* 0xc000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4611686018427387898ll /* 0xc000000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 62},			/* shli r3, r2, 62 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4611686018427387896ll /* 0xc000000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4611686018427387895ll /* 0xc000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3458764513820540929ll /* 0xcfffffffffffffff */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693960ll /* 0xdffffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 61},			/* shli r3, r2, 61 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-2305843009213693959ll /* 0xdffffffffffffff9 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2305843009213693956ll /* 0xdffffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 61},			/* shli r3, r2, 61 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-2305843009213693955ll /* 0xdffffffffffffffd */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2305843009213693954ll /* 0xdffffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 61},			/* shli r3, r2, 61 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-2305843009213693953ll /* 0xdfffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693952ll /* 0xe000000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 61}}			/* shli r3, r2, 61 */
+   },
+  {-2305843009213693951ll /* 0xe000000000000001 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2305843009213693950ll /* 0xe000000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693949ll /* 0xe000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693948ll /* 0xe000000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693947ll /* 0xe000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693944ll /* 0xe000000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2305843009213693943ll /* 0xe000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1729382256910270465ll /* 0xe7ffffffffffffff */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846984ll /* 0xeffffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 60},			/* shli r3, r2, 60 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-1152921504606846983ll /* 0xeffffffffffffff9 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1152921504606846980ll /* 0xeffffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 60},			/* shli r3, r2, 60 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-1152921504606846979ll /* 0xeffffffffffffffd */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1152921504606846978ll /* 0xeffffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 60},			/* shli r3, r2, 60 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-1152921504606846977ll /* 0xefffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846976ll /* 0xf000000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 60}}			/* shli r3, r2, 60 */
+   },
+  {-1152921504606846975ll /* 0xf000000000000001 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1152921504606846974ll /* 0xf000000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846973ll /* 0xf000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846972ll /* 0xf000000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846971ll /* 0xf000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846968ll /* 0xf000000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1152921504606846967ll /* 0xf000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-864691128455135233ll /* 0xf3ffffffffffffff */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423496ll /* 0xf7fffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 59},			/* shli r3, r2, 59 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-576460752303423495ll /* 0xf7fffffffffffff9 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-576460752303423492ll /* 0xf7fffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 59},			/* shli r3, r2, 59 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-576460752303423491ll /* 0xf7fffffffffffffd */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-576460752303423490ll /* 0xf7fffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 59},			/* shli r3, r2, 59 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-576460752303423489ll /* 0xf7ffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423488ll /* 0xf800000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 59}}			/* shli r3, r2, 59 */
+   },
+  {-576460752303423487ll /* 0xf800000000000001 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-576460752303423486ll /* 0xf800000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423485ll /* 0xf800000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423484ll /* 0xf800000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423483ll /* 0xf800000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423480ll /* 0xf800000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-576460752303423479ll /* 0xf800000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-432345564227567617ll /* 0xf9ffffffffffffff */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711752ll /* 0xfbfffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 58},			/* shli r3, r2, 58 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-288230376151711751ll /* 0xfbfffffffffffff9 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-288230376151711748ll /* 0xfbfffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 58},			/* shli r3, r2, 58 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-288230376151711747ll /* 0xfbfffffffffffffd */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-288230376151711746ll /* 0xfbfffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 58},			/* shli r3, r2, 58 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-288230376151711745ll /* 0xfbffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711744ll /* 0xfc00000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 58}}			/* shli r3, r2, 58 */
+   },
+  {-288230376151711743ll /* 0xfc00000000000001 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-288230376151711742ll /* 0xfc00000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711741ll /* 0xfc00000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711740ll /* 0xfc00000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711739ll /* 0xfc00000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711736ll /* 0xfc00000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-288230376151711735ll /* 0xfc00000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-216172782113783809ll /* 0xfcffffffffffffff */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855880ll /* 0xfdfffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 57},			/* shli r3, r2, 57 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-144115188075855879ll /* 0xfdfffffffffffff9 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-144115188075855876ll /* 0xfdfffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 57},			/* shli r3, r2, 57 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-144115188075855875ll /* 0xfdfffffffffffffd */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-144115188075855874ll /* 0xfdfffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 57},			/* shli r3, r2, 57 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-144115188075855873ll /* 0xfdffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855872ll /* 0xfe00000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 57}}			/* shli r3, r2, 57 */
+   },
+  {-144115188075855871ll /* 0xfe00000000000001 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-144115188075855870ll /* 0xfe00000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855869ll /* 0xfe00000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855868ll /* 0xfe00000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855867ll /* 0xfe00000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855864ll /* 0xfe00000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-144115188075855863ll /* 0xfe00000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-108086391056891905ll /* 0xfe7fffffffffffff */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927944ll /* 0xfefffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 56},			/* shli r3, r2, 56 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-72057594037927943ll /* 0xfefffffffffffff9 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-72057594037927940ll /* 0xfefffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 56},			/* shli r3, r2, 56 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-72057594037927939ll /* 0xfefffffffffffffd */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-72057594037927938ll /* 0xfefffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 56},			/* shli r3, r2, 56 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-72057594037927937ll /* 0xfeffffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927936ll /* 0xff00000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 56}}			/* shli r3, r2, 56 */
+   },
+  {-72057594037927935ll /* 0xff00000000000001 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-72057594037927934ll /* 0xff00000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927933ll /* 0xff00000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927932ll /* 0xff00000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927931ll /* 0xff00000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927928ll /* 0xff00000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-72057594037927927ll /* 0xff00000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-54043195528445953ll /* 0xff3fffffffffffff */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963976ll /* 0xff7ffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 55},			/* shli r3, r2, 55 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-36028797018963975ll /* 0xff7ffffffffffff9 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-36028797018963972ll /* 0xff7ffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 55},			/* shli r3, r2, 55 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-36028797018963971ll /* 0xff7ffffffffffffd */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-36028797018963970ll /* 0xff7ffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 55},			/* shli r3, r2, 55 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-36028797018963969ll /* 0xff7fffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963968ll /* 0xff80000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 55}}			/* shli r3, r2, 55 */
+   },
+  {-36028797018963967ll /* 0xff80000000000001 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-36028797018963966ll /* 0xff80000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963965ll /* 0xff80000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963964ll /* 0xff80000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963963ll /* 0xff80000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963960ll /* 0xff80000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-36028797018963959ll /* 0xff80000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-27021597764222977ll /* 0xff9fffffffffffff */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481992ll /* 0xffbffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 54},			/* shli r3, r2, 54 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-18014398509481991ll /* 0xffbffffffffffff9 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-18014398509481988ll /* 0xffbffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 54},			/* shli r3, r2, 54 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-18014398509481987ll /* 0xffbffffffffffffd */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-18014398509481986ll /* 0xffbffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 54},			/* shli r3, r2, 54 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-18014398509481985ll /* 0xffbfffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481984ll /* 0xffc0000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 54}}			/* shli r3, r2, 54 */
+   },
+  {-18014398509481983ll /* 0xffc0000000000001 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-18014398509481982ll /* 0xffc0000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481981ll /* 0xffc0000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481980ll /* 0xffc0000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481979ll /* 0xffc0000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481976ll /* 0xffc0000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-18014398509481975ll /* 0xffc0000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-13510798882111489ll /* 0xffcfffffffffffff */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254741000ll /* 0xffdffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 53},			/* shli r3, r2, 53 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-9007199254740999ll /* 0xffdffffffffffff9 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-9007199254740996ll /* 0xffdffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 53},			/* shli r3, r2, 53 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-9007199254740995ll /* 0xffdffffffffffffd */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-9007199254740994ll /* 0xffdffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 53},			/* shli r3, r2, 53 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-9007199254740993ll /* 0xffdfffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254740992ll /* 0xffe0000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 53}}			/* shli r3, r2, 53 */
+   },
+  {-9007199254740991ll /* 0xffe0000000000001 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-9007199254740990ll /* 0xffe0000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254740989ll /* 0xffe0000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254740988ll /* 0xffe0000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254740987ll /* 0xffe0000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254740984ll /* 0xffe0000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9007199254740983ll /* 0xffe0000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-6755399441055745ll /* 0xffe7ffffffffffff */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370504ll /* 0xffeffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 52},			/* shli r3, r2, 52 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-4503599627370503ll /* 0xffeffffffffffff9 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4503599627370500ll /* 0xffeffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 52},			/* shli r3, r2, 52 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-4503599627370499ll /* 0xffeffffffffffffd */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4503599627370498ll /* 0xffeffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 52},			/* shli r3, r2, 52 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4503599627370497ll /* 0xffefffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370496ll /* 0xfff0000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 52}}			/* shli r3, r2, 52 */
+   },
+  {-4503599627370495ll /* 0xfff0000000000001 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4503599627370494ll /* 0xfff0000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370493ll /* 0xfff0000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370492ll /* 0xfff0000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370491ll /* 0xfff0000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370488ll /* 0xfff0000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4503599627370487ll /* 0xfff0000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3377699720527873ll /* 0xfff3ffffffffffff */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685256ll /* 0xfff7fffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 51},			/* shli r3, r2, 51 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-2251799813685255ll /* 0xfff7fffffffffff9 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2251799813685252ll /* 0xfff7fffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 51},			/* shli r3, r2, 51 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-2251799813685251ll /* 0xfff7fffffffffffd */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2251799813685250ll /* 0xfff7fffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 51},			/* shli r3, r2, 51 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-2251799813685249ll /* 0xfff7ffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685248ll /* 0xfff8000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 51}}			/* shli r3, r2, 51 */
+   },
+  {-2251799813685247ll /* 0xfff8000000000001 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2251799813685246ll /* 0xfff8000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685245ll /* 0xfff8000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685244ll /* 0xfff8000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685243ll /* 0xfff8000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685240ll /* 0xfff8000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2251799813685239ll /* 0xfff8000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1688849860263937ll /* 0xfff9ffffffffffff */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842632ll /* 0xfffbfffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 50},			/* shli r3, r2, 50 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-1125899906842631ll /* 0xfffbfffffffffff9 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1125899906842628ll /* 0xfffbfffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 50},			/* shli r3, r2, 50 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-1125899906842627ll /* 0xfffbfffffffffffd */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1125899906842626ll /* 0xfffbfffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 50},			/* shli r3, r2, 50 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-1125899906842625ll /* 0xfffbffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842624ll /* 0xfffc000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 50}}			/* shli r3, r2, 50 */
+   },
+  {-1125899906842623ll /* 0xfffc000000000001 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1125899906842622ll /* 0xfffc000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842621ll /* 0xfffc000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842620ll /* 0xfffc000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842619ll /* 0xfffc000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842616ll /* 0xfffc000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1125899906842615ll /* 0xfffc000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-844424930131969ll /* 0xfffcffffffffffff */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421320ll /* 0xfffdfffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 49},			/* shli r3, r2, 49 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-562949953421319ll /* 0xfffdfffffffffff9 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-562949953421316ll /* 0xfffdfffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 49},			/* shli r3, r2, 49 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-562949953421315ll /* 0xfffdfffffffffffd */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-562949953421314ll /* 0xfffdfffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 49},			/* shli r3, r2, 49 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-562949953421313ll /* 0xfffdffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421312ll /* 0xfffe000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 49}}			/* shli r3, r2, 49 */
+   },
+  {-562949953421311ll /* 0xfffe000000000001 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-562949953421310ll /* 0xfffe000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421309ll /* 0xfffe000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421308ll /* 0xfffe000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421307ll /* 0xfffe000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421304ll /* 0xfffe000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-562949953421303ll /* 0xfffe000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-422212465065985ll /* 0xfffe7fffffffffff */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710664ll /* 0xfffefffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 48},			/* shli r3, r2, 48 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-281474976710663ll /* 0xfffefffffffffff9 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-281474976710660ll /* 0xfffefffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 48},			/* shli r3, r2, 48 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-281474976710659ll /* 0xfffefffffffffffd */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-281474976710658ll /* 0xfffefffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 48},			/* shli r3, r2, 48 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-281474976710657ll /* 0xfffeffffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710656ll /* 0xffff000000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 48}}			/* shli r3, r2, 48 */
+   },
+  {-281474976710655ll /* 0xffff000000000001 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-281474976710654ll /* 0xffff000000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710653ll /* 0xffff000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710652ll /* 0xffff000000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710651ll /* 0xffff000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710648ll /* 0xffff000000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-281474976710647ll /* 0xffff000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-211106232532993ll /* 0xffff3fffffffffff */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355336ll /* 0xffff7ffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 47},			/* shli r3, r2, 47 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-140737488355335ll /* 0xffff7ffffffffff9 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-140737488355332ll /* 0xffff7ffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 47},			/* shli r3, r2, 47 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-140737488355331ll /* 0xffff7ffffffffffd */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-140737488355330ll /* 0xffff7ffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 47},			/* shli r3, r2, 47 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-140737488355329ll /* 0xffff7fffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355328ll /* 0xffff800000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 47}}			/* shli r3, r2, 47 */
+   },
+  {-140737488355327ll /* 0xffff800000000001 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-140737488355326ll /* 0xffff800000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355325ll /* 0xffff800000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355324ll /* 0xffff800000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355323ll /* 0xffff800000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355320ll /* 0xffff800000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-140737488355319ll /* 0xffff800000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-105553116266497ll /* 0xffff9fffffffffff */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177672ll /* 0xffffbffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 46},			/* shli r3, r2, 46 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-70368744177671ll /* 0xffffbffffffffff9 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-70368744177668ll /* 0xffffbffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 46},			/* shli r3, r2, 46 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-70368744177667ll /* 0xffffbffffffffffd */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-70368744177666ll /* 0xffffbffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 46},			/* shli r3, r2, 46 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-70368744177665ll /* 0xffffbfffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177664ll /* 0xffffc00000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 46}}			/* shli r3, r2, 46 */
+   },
+  {-70368744177663ll /* 0xffffc00000000001 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-70368744177662ll /* 0xffffc00000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177661ll /* 0xffffc00000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177660ll /* 0xffffc00000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177659ll /* 0xffffc00000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177656ll /* 0xffffc00000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-70368744177655ll /* 0xffffc00000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-52776558133249ll /* 0xffffcfffffffffff */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088840ll /* 0xffffdffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 45},			/* shli r3, r2, 45 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-35184372088839ll /* 0xffffdffffffffff9 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-35184372088836ll /* 0xffffdffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 45},			/* shli r3, r2, 45 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-35184372088835ll /* 0xffffdffffffffffd */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-35184372088834ll /* 0xffffdffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 45},			/* shli r3, r2, 45 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-35184372088833ll /* 0xffffdfffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088832ll /* 0xffffe00000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 45}}			/* shli r3, r2, 45 */
+   },
+  {-35184372088831ll /* 0xffffe00000000001 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-35184372088830ll /* 0xffffe00000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088829ll /* 0xffffe00000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088828ll /* 0xffffe00000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088827ll /* 0xffffe00000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088824ll /* 0xffffe00000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-35184372088823ll /* 0xffffe00000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-26388279066625ll /* 0xffffe7ffffffffff */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044424ll /* 0xffffeffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 44},			/* shli r3, r2, 44 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-17592186044423ll /* 0xffffeffffffffff9 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-17592186044420ll /* 0xffffeffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 44},			/* shli r3, r2, 44 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-17592186044419ll /* 0xffffeffffffffffd */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-17592186044418ll /* 0xffffeffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 44},			/* shli r3, r2, 44 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-17592186044417ll /* 0xffffefffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044416ll /* 0xfffff00000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 44}}			/* shli r3, r2, 44 */
+   },
+  {-17592186044415ll /* 0xfffff00000000001 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-17592186044414ll /* 0xfffff00000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044413ll /* 0xfffff00000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044412ll /* 0xfffff00000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044411ll /* 0xfffff00000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044408ll /* 0xfffff00000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17592186044407ll /* 0xfffff00000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-13194139533313ll /* 0xfffff3ffffffffff */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022216ll /* 0xfffff7fffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 43},			/* shli r3, r2, 43 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-8796093022215ll /* 0xfffff7fffffffff9 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8796093022212ll /* 0xfffff7fffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 43},			/* shli r3, r2, 43 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-8796093022211ll /* 0xfffff7fffffffffd */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8796093022210ll /* 0xfffff7fffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 43},			/* shli r3, r2, 43 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-8796093022209ll /* 0xfffff7ffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022208ll /* 0xfffff80000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 43}}			/* shli r3, r2, 43 */
+   },
+  {-8796093022207ll /* 0xfffff80000000001 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-8796093022206ll /* 0xfffff80000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022205ll /* 0xfffff80000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022204ll /* 0xfffff80000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022203ll /* 0xfffff80000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022200ll /* 0xfffff80000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8796093022199ll /* 0xfffff80000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-6597069766657ll /* 0xfffff9ffffffffff */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511112ll /* 0xfffffbfffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 42},			/* shli r3, r2, 42 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-4398046511111ll /* 0xfffffbfffffffff9 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4398046511108ll /* 0xfffffbfffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 42},			/* shli r3, r2, 42 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-4398046511107ll /* 0xfffffbfffffffffd */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4398046511106ll /* 0xfffffbfffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 42},			/* shli r3, r2, 42 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4398046511105ll /* 0xfffffbffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511104ll /* 0xfffffc0000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 42}}			/* shli r3, r2, 42 */
+   },
+  {-4398046511103ll /* 0xfffffc0000000001 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4398046511102ll /* 0xfffffc0000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511101ll /* 0xfffffc0000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511100ll /* 0xfffffc0000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511099ll /* 0xfffffc0000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511096ll /* 0xfffffc0000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4398046511095ll /* 0xfffffc0000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3298534883329ll /* 0xfffffcffffffffff */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255560ll /* 0xfffffdfffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 41},			/* shli r3, r2, 41 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-2199023255559ll /* 0xfffffdfffffffff9 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2199023255556ll /* 0xfffffdfffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 41},			/* shli r3, r2, 41 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-2199023255555ll /* 0xfffffdfffffffffd */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2199023255554ll /* 0xfffffdfffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 41},			/* shli r3, r2, 41 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-2199023255553ll /* 0xfffffdffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255552ll /* 0xfffffe0000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 41}}			/* shli r3, r2, 41 */
+   },
+  {-2199023255551ll /* 0xfffffe0000000001 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2199023255550ll /* 0xfffffe0000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255549ll /* 0xfffffe0000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255548ll /* 0xfffffe0000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255547ll /* 0xfffffe0000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255544ll /* 0xfffffe0000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2199023255543ll /* 0xfffffe0000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1649267441665ll /* 0xfffffe7fffffffff */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627784ll /* 0xfffffefffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 40},			/* shli r3, r2, 40 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-1099511627783ll /* 0xfffffefffffffff9 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1099511627780ll /* 0xfffffefffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 40},			/* shli r3, r2, 40 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-1099511627779ll /* 0xfffffefffffffffd */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1099511627778ll /* 0xfffffefffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 40},			/* shli r3, r2, 40 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-1099511627777ll /* 0xfffffeffffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627776ll /* 0xffffff0000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 40}}			/* shli r3, r2, 40 */
+   },
+  {-1099511627775ll /* 0xffffff0000000001 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1099511627774ll /* 0xffffff0000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627773ll /* 0xffffff0000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627772ll /* 0xffffff0000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627771ll /* 0xffffff0000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627768ll /* 0xffffff0000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1099511627767ll /* 0xffffff0000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-824633720833ll /* 0xffffff3fffffffff */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813896ll /* 0xffffff7ffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 39},			/* shli r3, r2, 39 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-549755813895ll /* 0xffffff7ffffffff9 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-549755813892ll /* 0xffffff7ffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 39},			/* shli r3, r2, 39 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-549755813891ll /* 0xffffff7ffffffffd */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-549755813890ll /* 0xffffff7ffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 39},			/* shli r3, r2, 39 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-549755813889ll /* 0xffffff7fffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813888ll /* 0xffffff8000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 39}}			/* shli r3, r2, 39 */
+   },
+  {-549755813887ll /* 0xffffff8000000001 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-549755813886ll /* 0xffffff8000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813885ll /* 0xffffff8000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813884ll /* 0xffffff8000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813883ll /* 0xffffff8000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813880ll /* 0xffffff8000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-549755813879ll /* 0xffffff8000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-412316860417ll /* 0xffffff9fffffffff */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906952ll /* 0xffffffbffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 38},			/* shli r3, r2, 38 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-274877906951ll /* 0xffffffbffffffff9 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-274877906948ll /* 0xffffffbffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 38},			/* shli r3, r2, 38 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-274877906947ll /* 0xffffffbffffffffd */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-274877906946ll /* 0xffffffbffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 38},			/* shli r3, r2, 38 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-274877906945ll /* 0xffffffbfffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906944ll /* 0xffffffc000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 38}}			/* shli r3, r2, 38 */
+   },
+  {-274877906943ll /* 0xffffffc000000001 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-274877906942ll /* 0xffffffc000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906941ll /* 0xffffffc000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906940ll /* 0xffffffc000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906939ll /* 0xffffffc000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906936ll /* 0xffffffc000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-274877906935ll /* 0xffffffc000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-206158430209ll /* 0xffffffcfffffffff */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953480ll /* 0xffffffdffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 37},			/* shli r3, r2, 37 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-137438953479ll /* 0xffffffdffffffff9 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-137438953476ll /* 0xffffffdffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 37},			/* shli r3, r2, 37 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-137438953475ll /* 0xffffffdffffffffd */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-137438953474ll /* 0xffffffdffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 37},			/* shli r3, r2, 37 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-137438953473ll /* 0xffffffdfffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953472ll /* 0xffffffe000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 37}}			/* shli r3, r2, 37 */
+   },
+  {-137438953471ll /* 0xffffffe000000001 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-137438953470ll /* 0xffffffe000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953469ll /* 0xffffffe000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953468ll /* 0xffffffe000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953467ll /* 0xffffffe000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953464ll /* 0xffffffe000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-137438953463ll /* 0xffffffe000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-103079215105ll /* 0xffffffe7ffffffff */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476744ll /* 0xffffffeffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 36},			/* shli r3, r2, 36 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-68719476743ll /* 0xffffffeffffffff9 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-68719476740ll /* 0xffffffeffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 36},			/* shli r3, r2, 36 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-68719476739ll /* 0xffffffeffffffffd */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-68719476738ll /* 0xffffffeffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 36},			/* shli r3, r2, 36 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-68719476737ll /* 0xffffffefffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476736ll /* 0xfffffff000000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 36}}			/* shli r3, r2, 36 */
+   },
+  {-68719476735ll /* 0xfffffff000000001 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-68719476734ll /* 0xfffffff000000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476733ll /* 0xfffffff000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476732ll /* 0xfffffff000000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476731ll /* 0xfffffff000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476728ll /* 0xfffffff000000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-68719476727ll /* 0xfffffff000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-51539607553ll /* 0xfffffff3ffffffff */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738376ll /* 0xfffffff7fffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 35},			/* shli r3, r2, 35 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-34359738375ll /* 0xfffffff7fffffff9 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-34359738372ll /* 0xfffffff7fffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 35},			/* shli r3, r2, 35 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-34359738371ll /* 0xfffffff7fffffffd */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-34359738370ll /* 0xfffffff7fffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 35},			/* shli r3, r2, 35 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-34359738369ll /* 0xfffffff7ffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738368ll /* 0xfffffff800000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 35}}			/* shli r3, r2, 35 */
+   },
+  {-34359738367ll /* 0xfffffff800000001 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-34359738366ll /* 0xfffffff800000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738365ll /* 0xfffffff800000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738364ll /* 0xfffffff800000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738363ll /* 0xfffffff800000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738360ll /* 0xfffffff800000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34359738359ll /* 0xfffffff800000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-25769803777ll /* 0xfffffff9ffffffff */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869192ll /* 0xfffffffbfffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 34},			/* shli r3, r2, 34 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-17179869191ll /* 0xfffffffbfffffff9 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-17179869188ll /* 0xfffffffbfffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 34},			/* shli r3, r2, 34 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-17179869187ll /* 0xfffffffbfffffffd */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-17179869186ll /* 0xfffffffbfffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 34},			/* shli r3, r2, 34 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-17179869185ll /* 0xfffffffbffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869184ll /* 0xfffffffc00000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 34}}			/* shli r3, r2, 34 */
+   },
+  {-17179869183ll /* 0xfffffffc00000001 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-17179869182ll /* 0xfffffffc00000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869181ll /* 0xfffffffc00000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869180ll /* 0xfffffffc00000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869179ll /* 0xfffffffc00000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869176ll /* 0xfffffffc00000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-17179869175ll /* 0xfffffffc00000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-12884901889ll /* 0xfffffffcffffffff */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934600ll /* 0xfffffffdfffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 33},			/* shli r3, r2, 33 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-8589934599ll /* 0xfffffffdfffffff9 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8589934596ll /* 0xfffffffdfffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 33},			/* shli r3, r2, 33 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-8589934595ll /* 0xfffffffdfffffffd */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8589934594ll /* 0xfffffffdfffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 33},			/* shli r3, r2, 33 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-8589934593ll /* 0xfffffffdffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934592ll /* 0xfffffffe00000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 33}}			/* shli r3, r2, 33 */
+   },
+  {-8589934591ll /* 0xfffffffe00000001 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-8589934590ll /* 0xfffffffe00000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934589ll /* 0xfffffffe00000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934588ll /* 0xfffffffe00000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934587ll /* 0xfffffffe00000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934584ll /* 0xfffffffe00000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8589934583ll /* 0xfffffffe00000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-6442450945ll /* 0xfffffffe7fffffff */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967304ll /* 0xfffffffefffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 32},			/* shli r3, r2, 32 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-4294967303ll /* 0xfffffffefffffff9 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4294967300ll /* 0xfffffffefffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 32},			/* shli r3, r2, 32 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-4294967299ll /* 0xfffffffefffffffd */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4294967298ll /* 0xfffffffefffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 32},			/* shli r3, r2, 32 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4294967297ll /* 0xfffffffeffffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967296ll /* 0xffffffff00000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 32}}			/* shli r3, r2, 32 */
+   },
+  {-4294967295ll /* 0xffffffff00000001 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4294967294ll /* 0xffffffff00000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967293ll /* 0xffffffff00000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967292ll /* 0xffffffff00000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967291ll /* 0xffffffff00000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967288ll /* 0xffffffff00000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4294967287ll /* 0xffffffff00000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3221225473ll /* 0xffffffff3fffffff */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483656ll /* 0xffffffff7ffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 31},			/* shli r3, r2, 31 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-2147483655ll /* 0xffffffff7ffffff9 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2147483652ll /* 0xffffffff7ffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 31},			/* shli r3, r2, 31 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-2147483651ll /* 0xffffffff7ffffffd */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2147483650ll /* 0xffffffff7ffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 31},			/* shli r3, r2, 31 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-2147483649ll /* 0xffffffff7fffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483648ll /* 0xffffffff80000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 31}}			/* shli r3, r2, 31 */
+   },
+  {-2147483647ll /* 0xffffffff80000001 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2147483646ll /* 0xffffffff80000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483645ll /* 0xffffffff80000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483644ll /* 0xffffffff80000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483643ll /* 0xffffffff80000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483640ll /* 0xffffffff80000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2147483639ll /* 0xffffffff80000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1610612737ll /* 0xffffffff9fffffff */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741832ll /* 0xffffffffbffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-1073741831ll /* 0xffffffffbffffff9 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1073741828ll /* 0xffffffffbffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-1073741827ll /* 0xffffffffbffffffd */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1073741826ll /* 0xffffffffbffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-1073741825ll /* 0xffffffffbfffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741824ll /* 0xffffffffc0000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30}}			/* shli r3, r2, 30 */
+   },
+  {-1073741823ll /* 0xffffffffc0000001 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1073741822ll /* 0xffffffffc0000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741821ll /* 0xffffffffc0000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741820ll /* 0xffffffffc0000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741819ll /* 0xffffffffc0000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741816ll /* 0xffffffffc0000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741815ll /* 0xffffffffc0000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-805306369ll /* 0xffffffffcfffffff */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870920ll /* 0xffffffffdffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-536870919ll /* 0xffffffffdffffff9 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-536870916ll /* 0xffffffffdffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-536870915ll /* 0xffffffffdffffffd */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-536870914ll /* 0xffffffffdffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-536870913ll /* 0xffffffffdfffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870912ll /* 0xffffffffe0000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {-536870911ll /* 0xffffffffe0000001 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-536870910ll /* 0xffffffffe0000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870909ll /* 0xffffffffe0000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870908ll /* 0xffffffffe0000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870907ll /* 0xffffffffe0000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870904ll /* 0xffffffffe0000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870903ll /* 0xffffffffe0000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-402653185ll /* 0xffffffffe7ffffff */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435464ll /* 0xffffffffeffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28},			/* shli r3, r2, 28 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-268435463ll /* 0xffffffffeffffff9 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-268435460ll /* 0xffffffffeffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28},			/* shli r3, r2, 28 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-268435459ll /* 0xffffffffeffffffd */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-268435458ll /* 0xffffffffeffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28},			/* shli r3, r2, 28 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-268435457ll /* 0xffffffffefffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435456ll /* 0xfffffffff0000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {-268435455ll /* 0xfffffffff0000001 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-268435454ll /* 0xfffffffff0000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435453ll /* 0xfffffffff0000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435452ll /* 0xfffffffff0000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435451ll /* 0xfffffffff0000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435448ll /* 0xfffffffff0000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435447ll /* 0xfffffffff0000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-201326593ll /* 0xfffffffff3ffffff */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217736ll /* 0xfffffffff7fffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-134217735ll /* 0xfffffffff7fffff9 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-134217732ll /* 0xfffffffff7fffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-134217731ll /* 0xfffffffff7fffffd */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-134217730ll /* 0xfffffffff7fffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-134217729ll /* 0xfffffffff7ffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217728ll /* 0xfffffffff8000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {-134217727ll /* 0xfffffffff8000001 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-134217726ll /* 0xfffffffff8000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217725ll /* 0xfffffffff8000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217724ll /* 0xfffffffff8000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217723ll /* 0xfffffffff8000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217720ll /* 0xfffffffff8000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217719ll /* 0xfffffffff8000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-100663297ll /* 0xfffffffff9ffffff */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108872ll /* 0xfffffffffbfffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-67108871ll /* 0xfffffffffbfffff9 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-67108868ll /* 0xfffffffffbfffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-67108867ll /* 0xfffffffffbfffffd */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-67108866ll /* 0xfffffffffbfffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-67108865ll /* 0xfffffffffbffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108864ll /* 0xfffffffffc000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {-67108863ll /* 0xfffffffffc000001 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-67108862ll /* 0xfffffffffc000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108861ll /* 0xfffffffffc000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108860ll /* 0xfffffffffc000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108859ll /* 0xfffffffffc000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108856ll /* 0xfffffffffc000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108855ll /* 0xfffffffffc000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-50331649ll /* 0xfffffffffcffffff */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554440ll /* 0xfffffffffdfffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-33554439ll /* 0xfffffffffdfffff9 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-33554436ll /* 0xfffffffffdfffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-33554435ll /* 0xfffffffffdfffffd */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-33554434ll /* 0xfffffffffdfffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-33554433ll /* 0xfffffffffdffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554432ll /* 0xfffffffffe000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {-33554431ll /* 0xfffffffffe000001 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-33554430ll /* 0xfffffffffe000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554429ll /* 0xfffffffffe000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554428ll /* 0xfffffffffe000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554427ll /* 0xfffffffffe000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554424ll /* 0xfffffffffe000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554423ll /* 0xfffffffffe000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-25165825ll /* 0xfffffffffe7fffff */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777224ll /* 0xfffffffffefffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-16777223ll /* 0xfffffffffefffff9 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16777220ll /* 0xfffffffffefffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-16777219ll /* 0xfffffffffefffffd */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16777218ll /* 0xfffffffffefffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-16777217ll /* 0xfffffffffeffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777216ll /* 0xffffffffff000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {-16777215ll /* 0xffffffffff000001 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-16777214ll /* 0xffffffffff000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777213ll /* 0xffffffffff000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777212ll /* 0xffffffffff000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777211ll /* 0xffffffffff000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777208ll /* 0xffffffffff000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777207ll /* 0xffffffffff000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-12582913ll /* 0xffffffffff3fffff */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388616ll /* 0xffffffffff7ffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-8388615ll /* 0xffffffffff7ffff9 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8388612ll /* 0xffffffffff7ffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-8388611ll /* 0xffffffffff7ffffd */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8388610ll /* 0xffffffffff7ffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-8388609ll /* 0xffffffffff7fffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388608ll /* 0xffffffffff800000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {-8388607ll /* 0xffffffffff800001 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-8388606ll /* 0xffffffffff800002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388605ll /* 0xffffffffff800003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388604ll /* 0xffffffffff800004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388603ll /* 0xffffffffff800005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388600ll /* 0xffffffffff800008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388599ll /* 0xffffffffff800009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-6291457ll /* 0xffffffffff9fffff */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194312ll /* 0xffffffffffbffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-4194311ll /* 0xffffffffffbffff9 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4194308ll /* 0xffffffffffbffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-4194307ll /* 0xffffffffffbffffd */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4194306ll /* 0xffffffffffbffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4194305ll /* 0xffffffffffbfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194304ll /* 0xffffffffffc00000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {-4194303ll /* 0xffffffffffc00001 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4194302ll /* 0xffffffffffc00002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194301ll /* 0xffffffffffc00003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194300ll /* 0xffffffffffc00004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194299ll /* 0xffffffffffc00005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194296ll /* 0xffffffffffc00008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194295ll /* 0xffffffffffc00009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3145729ll /* 0xffffffffffcfffff */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097160ll /* 0xffffffffffdffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-2097159ll /* 0xffffffffffdffff9 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2097156ll /* 0xffffffffffdffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-2097155ll /* 0xffffffffffdffffd */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2097154ll /* 0xffffffffffdffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-2097153ll /* 0xffffffffffdfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097152ll /* 0xffffffffffe00000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {-2097151ll /* 0xffffffffffe00001 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2097150ll /* 0xffffffffffe00002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097149ll /* 0xffffffffffe00003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097148ll /* 0xffffffffffe00004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097147ll /* 0xffffffffffe00005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097144ll /* 0xffffffffffe00008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097143ll /* 0xffffffffffe00009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1572865ll /* 0xffffffffffe7ffff */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048584ll /* 0xffffffffffeffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-1048583ll /* 0xffffffffffeffff9 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1048580ll /* 0xffffffffffeffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-1048579ll /* 0xffffffffffeffffd */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1048578ll /* 0xffffffffffeffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-1048577ll /* 0xffffffffffefffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048576ll /* 0xfffffffffff00000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {-1048575ll /* 0xfffffffffff00001 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1048574ll /* 0xfffffffffff00002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048573ll /* 0xfffffffffff00003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048572ll /* 0xfffffffffff00004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048571ll /* 0xfffffffffff00005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048568ll /* 0xfffffffffff00008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048567ll /* 0xfffffffffff00009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-786433ll /* 0xfffffffffff3ffff */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524296ll /* 0xfffffffffff7fff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-524295ll /* 0xfffffffffff7fff9 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-524292ll /* 0xfffffffffff7fffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-524291ll /* 0xfffffffffff7fffd */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-524290ll /* 0xfffffffffff7fffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-524289ll /* 0xfffffffffff7ffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524288ll /* 0xfffffffffff80000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {-524287ll /* 0xfffffffffff80001 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-524286ll /* 0xfffffffffff80002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524285ll /* 0xfffffffffff80003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524284ll /* 0xfffffffffff80004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524283ll /* 0xfffffffffff80005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524280ll /* 0xfffffffffff80008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524279ll /* 0xfffffffffff80009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-393217ll /* 0xfffffffffff9ffff */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262152ll /* 0xfffffffffffbfff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-262151ll /* 0xfffffffffffbfff9 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-262148ll /* 0xfffffffffffbfffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-262147ll /* 0xfffffffffffbfffd */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-262146ll /* 0xfffffffffffbfffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-262145ll /* 0xfffffffffffbffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262144ll /* 0xfffffffffffc0000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {-262143ll /* 0xfffffffffffc0001 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-262142ll /* 0xfffffffffffc0002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262141ll /* 0xfffffffffffc0003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262140ll /* 0xfffffffffffc0004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262139ll /* 0xfffffffffffc0005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262136ll /* 0xfffffffffffc0008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262135ll /* 0xfffffffffffc0009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-196609ll /* 0xfffffffffffcffff */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131080ll /* 0xfffffffffffdfff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-131079ll /* 0xfffffffffffdfff9 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-131076ll /* 0xfffffffffffdfffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-131075ll /* 0xfffffffffffdfffd */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-131074ll /* 0xfffffffffffdfffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-131073ll /* 0xfffffffffffdffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131072ll /* 0xfffffffffffe0000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {-131071ll /* 0xfffffffffffe0001 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-131070ll /* 0xfffffffffffe0002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131069ll /* 0xfffffffffffe0003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131068ll /* 0xfffffffffffe0004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131067ll /* 0xfffffffffffe0005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131064ll /* 0xfffffffffffe0008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131063ll /* 0xfffffffffffe0009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-98305ll /* 0xfffffffffffe7fff */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65544ll /* 0xfffffffffffefff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-65543ll /* 0xfffffffffffefff9 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-65540ll /* 0xfffffffffffefffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-65539ll /* 0xfffffffffffefffd */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-65538ll /* 0xfffffffffffefffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-65537ll /* 0xfffffffffffeffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65536ll /* 0xffffffffffff0000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {-65535ll /* 0xffffffffffff0001 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-65534ll /* 0xffffffffffff0002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65533ll /* 0xffffffffffff0003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65532ll /* 0xffffffffffff0004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65531ll /* 0xffffffffffff0005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65528ll /* 0xffffffffffff0008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65527ll /* 0xffffffffffff0009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-49153ll /* 0xffffffffffff3fff */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32776ll /* 0xffffffffffff7ff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-32775ll /* 0xffffffffffff7ff9 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-32772ll /* 0xffffffffffff7ffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-32771ll /* 0xffffffffffff7ffd */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-32770ll /* 0xffffffffffff7ffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-32769ll /* 0xffffffffffff7fff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32768ll /* 0xffffffffffff8000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {-32767ll /* 0xffffffffffff8001 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-32766ll /* 0xffffffffffff8002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32765ll /* 0xffffffffffff8003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32764ll /* 0xffffffffffff8004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32763ll /* 0xffffffffffff8005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32760ll /* 0xffffffffffff8008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32759ll /* 0xffffffffffff8009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-24577ll /* 0xffffffffffff9fff */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16392ll /* 0xffffffffffffbff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-16391ll /* 0xffffffffffffbff9 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16388ll /* 0xffffffffffffbffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-16387ll /* 0xffffffffffffbffd */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16386ll /* 0xffffffffffffbffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-16385ll /* 0xffffffffffffbfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16384ll /* 0xffffffffffffc000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {-16383ll /* 0xffffffffffffc001 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-16382ll /* 0xffffffffffffc002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16381ll /* 0xffffffffffffc003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16380ll /* 0xffffffffffffc004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16379ll /* 0xffffffffffffc005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16376ll /* 0xffffffffffffc008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16375ll /* 0xffffffffffffc009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-12289ll /* 0xffffffffffffcfff */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8200ll /* 0xffffffffffffdff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-8199ll /* 0xffffffffffffdff9 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8196ll /* 0xffffffffffffdffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-8195ll /* 0xffffffffffffdffd */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8194ll /* 0xffffffffffffdffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-8193ll /* 0xffffffffffffdfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8192ll /* 0xffffffffffffe000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {-8191ll /* 0xffffffffffffe001 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-8190ll /* 0xffffffffffffe002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8189ll /* 0xffffffffffffe003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8188ll /* 0xffffffffffffe004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8187ll /* 0xffffffffffffe005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8184ll /* 0xffffffffffffe008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8183ll /* 0xffffffffffffe009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-6145ll /* 0xffffffffffffe7ff */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4104ll /* 0xffffffffffffeff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-4103ll /* 0xffffffffffffeff9 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4100ll /* 0xffffffffffffeffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-4099ll /* 0xffffffffffffeffd */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4098ll /* 0xffffffffffffeffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-4097ll /* 0xffffffffffffefff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4096ll /* 0xfffffffffffff000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {-4095ll /* 0xfffffffffffff001 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4094ll /* 0xfffffffffffff002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4093ll /* 0xfffffffffffff003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4092ll /* 0xfffffffffffff004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4091ll /* 0xfffffffffffff005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4088ll /* 0xfffffffffffff008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4087ll /* 0xfffffffffffff009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3073ll /* 0xfffffffffffff3ff */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3072ll /* 0xfffffffffffff400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2056ll /* 0xfffffffffffff7f8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-2055ll /* 0xfffffffffffff7f9 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2052ll /* 0xfffffffffffff7fc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-2051ll /* 0xfffffffffffff7fd */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2050ll /* 0xfffffffffffff7fe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-2049ll /* 0xfffffffffffff7ff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2048ll /* 0xfffffffffffff800 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {-2047ll /* 0xfffffffffffff801 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2046ll /* 0xfffffffffffff802 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2045ll /* 0xfffffffffffff803 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2044ll /* 0xfffffffffffff804 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2043ll /* 0xfffffffffffff805 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2040ll /* 0xfffffffffffff808 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2039ll /* 0xfffffffffffff809 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1537ll /* 0xfffffffffffff9ff */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1032ll /* 0xfffffffffffffbf8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-1031ll /* 0xfffffffffffffbf9 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1028ll /* 0xfffffffffffffbfc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-1027ll /* 0xfffffffffffffbfd */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1026ll /* 0xfffffffffffffbfe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-1025ll /* 0xfffffffffffffbff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1024ll /* 0xfffffffffffffc00 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {-1023ll /* 0xfffffffffffffc01 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1022ll /* 0xfffffffffffffc02 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1021ll /* 0xfffffffffffffc03 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1020ll /* 0xfffffffffffffc04 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1019ll /* 0xfffffffffffffc05 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1018ll /* 0xfffffffffffffc06 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-1017ll /* 0xfffffffffffffc07 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1016ll /* 0xfffffffffffffc08 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1015ll /* 0xfffffffffffffc09 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1014ll /* 0xfffffffffffffc0a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-1013ll /* 0xfffffffffffffc0b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-1012ll /* 0xfffffffffffffc0c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-1011ll /* 0xfffffffffffffc0d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-1010ll /* 0xfffffffffffffc0e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1009ll /* 0xfffffffffffffc0f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1008ll /* 0xfffffffffffffc10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1007ll /* 0xfffffffffffffc11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1006ll /* 0xfffffffffffffc12 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-1005ll /* 0xfffffffffffffc13 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 1}}			/* shl1add r5, r4, r1 */
+   },
+  {-1004ll /* 0xfffffffffffffc14 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-1003ll /* 0xfffffffffffffc15 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-1001ll /* 0xfffffffffffffc17 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1000ll /* 0xfffffffffffffc18 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-999ll /* 0xfffffffffffffc19 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-997ll /* 0xfffffffffffffc1b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-996ll /* 0xfffffffffffffc1c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 1, 3},			/* shl2add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-994ll /* 0xfffffffffffffc1e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-993ll /* 0xfffffffffffffc1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-992ll /* 0xfffffffffffffc20 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-991ll /* 0xfffffffffffffc21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-990ll /* 0xfffffffffffffc22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-988ll /* 0xfffffffffffffc24 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-987ll /* 0xfffffffffffffc25 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-984ll /* 0xfffffffffffffc28 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-983ll /* 0xfffffffffffffc29 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-979ll /* 0xfffffffffffffc2d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-976ll /* 0xfffffffffffffc30 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-975ll /* 0xfffffffffffffc31 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {-969ll /* 0xfffffffffffffc37 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-968ll /* 0xfffffffffffffc38 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {5, 1, 3},			/* shl3add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-967ll /* 0xfffffffffffffc39 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-965ll /* 0xfffffffffffffc3b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-964ll /* 0xfffffffffffffc3c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 1, 3},			/* shl2add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-963ll /* 0xfffffffffffffc3d */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 1},			/* shl1add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-962ll /* 0xfffffffffffffc3e */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-961ll /* 0xfffffffffffffc3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-960ll /* 0xfffffffffffffc40 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-959ll /* 0xfffffffffffffc41 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-958ll /* 0xfffffffffffffc42 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-957ll /* 0xfffffffffffffc43 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-956ll /* 0xfffffffffffffc44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-955ll /* 0xfffffffffffffc45 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-953ll /* 0xfffffffffffffc47 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-952ll /* 0xfffffffffffffc48 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-951ll /* 0xfffffffffffffc49 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-945ll /* 0xfffffffffffffc4f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {-944ll /* 0xfffffffffffffc50 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-943ll /* 0xfffffffffffffc51 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-942ll /* 0xfffffffffffffc52 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {-940ll /* 0xfffffffffffffc54 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-936ll /* 0xfffffffffffffc58 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-935ll /* 0xfffffffffffffc59 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-929ll /* 0xfffffffffffffc5f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-928ll /* 0xfffffffffffffc60 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-925ll /* 0xfffffffffffffc63 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-924ll /* 0xfffffffffffffc64 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-920ll /* 0xfffffffffffffc68 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-919ll /* 0xfffffffffffffc69 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-916ll /* 0xfffffffffffffc6c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-913ll /* 0xfffffffffffffc6f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-912ll /* 0xfffffffffffffc70 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-911ll /* 0xfffffffffffffc71 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-910ll /* 0xfffffffffffffc72 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-905ll /* 0xfffffffffffffc77 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-904ll /* 0xfffffffffffffc78 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-903ll /* 0xfffffffffffffc79 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {-902ll /* 0xfffffffffffffc7a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-901ll /* 0xfffffffffffffc7b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-900ll /* 0xfffffffffffffc7c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-899ll /* 0xfffffffffffffc7d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-898ll /* 0xfffffffffffffc7e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-897ll /* 0xfffffffffffffc7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-896ll /* 0xfffffffffffffc80 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-895ll /* 0xfffffffffffffc81 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-894ll /* 0xfffffffffffffc82 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-893ll /* 0xfffffffffffffc83 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-892ll /* 0xfffffffffffffc84 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-891ll /* 0xfffffffffffffc85 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-890ll /* 0xfffffffffffffc86 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-889ll /* 0xfffffffffffffc87 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-888ll /* 0xfffffffffffffc88 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {-887ll /* 0xfffffffffffffc89 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-886ll /* 0xfffffffffffffc8a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-884ll /* 0xfffffffffffffc8c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* shl2add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-881ll /* 0xfffffffffffffc8f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-880ll /* 0xfffffffffffffc90 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-879ll /* 0xfffffffffffffc91 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {3, 4, 1}}			/* shl1add r5, r4, r1 */
+   },
+  {-873ll /* 0xfffffffffffffc97 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-872ll /* 0xfffffffffffffc98 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-868ll /* 0xfffffffffffffc9c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-865ll /* 0xfffffffffffffc9f */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-864ll /* 0xfffffffffffffca0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-863ll /* 0xfffffffffffffca1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-861ll /* 0xfffffffffffffca3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-860ll /* 0xfffffffffffffca4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-856ll /* 0xfffffffffffffca8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-855ll /* 0xfffffffffffffca9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-847ll /* 0xfffffffffffffcb1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-837ll /* 0xfffffffffffffcbb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-835ll /* 0xfffffffffffffcbd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-834ll /* 0xfffffffffffffcbe */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {-832ll /* 0xfffffffffffffcc0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {-831ll /* 0xfffffffffffffcc1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-829ll /* 0xfffffffffffffcc3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-828ll /* 0xfffffffffffffcc4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-827ll /* 0xfffffffffffffcc5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-824ll /* 0xfffffffffffffcc8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-823ll /* 0xfffffffffffffcc9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-819ll /* 0xfffffffffffffccd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-816ll /* 0xfffffffffffffcd0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-815ll /* 0xfffffffffffffcd1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-803ll /* 0xfffffffffffffcdd */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-801ll /* 0xfffffffffffffcdf */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-800ll /* 0xfffffffffffffce0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-799ll /* 0xfffffffffffffce1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-797ll /* 0xfffffffffffffce3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-796ll /* 0xfffffffffffffce4 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-795ll /* 0xfffffffffffffce5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-793ll /* 0xfffffffffffffce7 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-792ll /* 0xfffffffffffffce8 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-791ll /* 0xfffffffffffffce9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-789ll /* 0xfffffffffffffceb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-788ll /* 0xfffffffffffffcec */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-785ll /* 0xfffffffffffffcef */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-784ll /* 0xfffffffffffffcf0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-783ll /* 0xfffffffffffffcf1 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-781ll /* 0xfffffffffffffcf3 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-780ll /* 0xfffffffffffffcf4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-779ll /* 0xfffffffffffffcf5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-777ll /* 0xfffffffffffffcf7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-776ll /* 0xfffffffffffffcf8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-775ll /* 0xfffffffffffffcf9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-774ll /* 0xfffffffffffffcfa */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-773ll /* 0xfffffffffffffcfb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-772ll /* 0xfffffffffffffcfc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-771ll /* 0xfffffffffffffcfd */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* shl1add r5, r4, r4 */
+   },
+  {-770ll /* 0xfffffffffffffcfe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-769ll /* 0xfffffffffffffcff */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-768ll /* 0xfffffffffffffd00 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-767ll /* 0xfffffffffffffd01 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-766ll /* 0xfffffffffffffd02 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-765ll /* 0xfffffffffffffd03 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-764ll /* 0xfffffffffffffd04 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {-763ll /* 0xfffffffffffffd05 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-762ll /* 0xfffffffffffffd06 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-761ll /* 0xfffffffffffffd07 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-760ll /* 0xfffffffffffffd08 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-759ll /* 0xfffffffffffffd09 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-757ll /* 0xfffffffffffffd0b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-756ll /* 0xfffffffffffffd0c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-755ll /* 0xfffffffffffffd0d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-753ll /* 0xfffffffffffffd0f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-752ll /* 0xfffffffffffffd10 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-751ll /* 0xfffffffffffffd11 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-749ll /* 0xfffffffffffffd13 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 1, 3},			/* shl3add r4, r1, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-748ll /* 0xfffffffffffffd14 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-744ll /* 0xfffffffffffffd18 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-743ll /* 0xfffffffffffffd19 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-741ll /* 0xfffffffffffffd1b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-739ll /* 0xfffffffffffffd1d */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-737ll /* 0xfffffffffffffd1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-736ll /* 0xfffffffffffffd20 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-735ll /* 0xfffffffffffffd21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-732ll /* 0xfffffffffffffd24 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-729ll /* 0xfffffffffffffd27 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-728ll /* 0xfffffffffffffd28 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-720ll /* 0xfffffffffffffd30 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-719ll /* 0xfffffffffffffd31 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-715ll /* 0xfffffffffffffd35 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-712ll /* 0xfffffffffffffd38 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-711ll /* 0xfffffffffffffd39 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-708ll /* 0xfffffffffffffd3c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-707ll /* 0xfffffffffffffd3d */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-705ll /* 0xfffffffffffffd3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-704ll /* 0xfffffffffffffd40 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {-703ll /* 0xfffffffffffffd41 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-701ll /* 0xfffffffffffffd43 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-700ll /* 0xfffffffffffffd44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-699ll /* 0xfffffffffffffd45 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-695ll /* 0xfffffffffffffd49 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-693ll /* 0xfffffffffffffd4b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-692ll /* 0xfffffffffffffd4c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-688ll /* 0xfffffffffffffd50 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-680ll /* 0xfffffffffffffd58 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-679ll /* 0xfffffffffffffd59 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-675ll /* 0xfffffffffffffd5d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-673ll /* 0xfffffffffffffd5f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-672ll /* 0xfffffffffffffd60 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-671ll /* 0xfffffffffffffd61 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-667ll /* 0xfffffffffffffd65 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-664ll /* 0xfffffffffffffd68 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-660ll /* 0xfffffffffffffd6c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-659ll /* 0xfffffffffffffd6d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-657ll /* 0xfffffffffffffd6f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-656ll /* 0xfffffffffffffd70 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-655ll /* 0xfffffffffffffd71 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-651ll /* 0xfffffffffffffd75 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-650ll /* 0xfffffffffffffd76 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-649ll /* 0xfffffffffffffd77 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-648ll /* 0xfffffffffffffd78 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-647ll /* 0xfffffffffffffd79 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3},			/* shli r4, r3, 3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-645ll /* 0xfffffffffffffd7b */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-644ll /* 0xfffffffffffffd7c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-643ll /* 0xfffffffffffffd7d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-642ll /* 0xfffffffffffffd7e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-641ll /* 0xfffffffffffffd7f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-640ll /* 0xfffffffffffffd80 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 7}}			/* shli r4, r3, 7 */
+   },
+  {-639ll /* 0xfffffffffffffd81 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-638ll /* 0xfffffffffffffd82 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-637ll /* 0xfffffffffffffd83 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-636ll /* 0xfffffffffffffd84 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-635ll /* 0xfffffffffffffd85 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-634ll /* 0xfffffffffffffd86 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-633ll /* 0xfffffffffffffd87 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-632ll /* 0xfffffffffffffd88 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-631ll /* 0xfffffffffffffd89 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-630ll /* 0xfffffffffffffd8a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-629ll /* 0xfffffffffffffd8b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-627ll /* 0xfffffffffffffd8d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-625ll /* 0xfffffffffffffd8f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-624ll /* 0xfffffffffffffd90 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-620ll /* 0xfffffffffffffd94 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-619ll /* 0xfffffffffffffd95 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-616ll /* 0xfffffffffffffd98 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-615ll /* 0xfffffffffffffd99 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-613ll /* 0xfffffffffffffd9b */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 3},			/* shl2add r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-612ll /* 0xfffffffffffffd9c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-611ll /* 0xfffffffffffffd9d */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-609ll /* 0xfffffffffffffd9f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-608ll /* 0xfffffffffffffda0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-607ll /* 0xfffffffffffffda1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-604ll /* 0xfffffffffffffda4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-603ll /* 0xfffffffffffffda5 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-600ll /* 0xfffffffffffffda8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-599ll /* 0xfffffffffffffda9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-595ll /* 0xfffffffffffffdad */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-594ll /* 0xfffffffffffffdae */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-593ll /* 0xfffffffffffffdaf */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-592ll /* 0xfffffffffffffdb0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-591ll /* 0xfffffffffffffdb1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-589ll /* 0xfffffffffffffdb3 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-588ll /* 0xfffffffffffffdb4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 3},			/* shl2add r4, r3, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-585ll /* 0xfffffffffffffdb7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-584ll /* 0xfffffffffffffdb8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-583ll /* 0xfffffffffffffdb9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-581ll /* 0xfffffffffffffdbb */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {4, 3, 3},			/* shl2add r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-580ll /* 0xfffffffffffffdbc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-579ll /* 0xfffffffffffffdbd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-578ll /* 0xfffffffffffffdbe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-577ll /* 0xfffffffffffffdbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-576ll /* 0xfffffffffffffdc0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {-575ll /* 0xfffffffffffffdc1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-574ll /* 0xfffffffffffffdc2 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-573ll /* 0xfffffffffffffdc3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-572ll /* 0xfffffffffffffdc4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-571ll /* 0xfffffffffffffdc5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-569ll /* 0xfffffffffffffdc7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-568ll /* 0xfffffffffffffdc8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-567ll /* 0xfffffffffffffdc9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-566ll /* 0xfffffffffffffdca */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-565ll /* 0xfffffffffffffdcb */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-564ll /* 0xfffffffffffffdcc */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-563ll /* 0xfffffffffffffdcd */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-561ll /* 0xfffffffffffffdcf */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-560ll /* 0xfffffffffffffdd0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-559ll /* 0xfffffffffffffdd1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-558ll /* 0xfffffffffffffdd2 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-557ll /* 0xfffffffffffffdd3 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-556ll /* 0xfffffffffffffdd4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-555ll /* 0xfffffffffffffdd5 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-553ll /* 0xfffffffffffffdd7 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-552ll /* 0xfffffffffffffdd8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-551ll /* 0xfffffffffffffdd9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-549ll /* 0xfffffffffffffddb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-548ll /* 0xfffffffffffffddc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-547ll /* 0xfffffffffffffddd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-546ll /* 0xfffffffffffffdde */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-545ll /* 0xfffffffffffffddf */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-544ll /* 0xfffffffffffffde0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-543ll /* 0xfffffffffffffde1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-542ll /* 0xfffffffffffffde2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-540ll /* 0xfffffffffffffde4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-539ll /* 0xfffffffffffffde5 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-536ll /* 0xfffffffffffffde8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-535ll /* 0xfffffffffffffde9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-533ll /* 0xfffffffffffffdeb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-532ll /* 0xfffffffffffffdec */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-531ll /* 0xfffffffffffffded */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-530ll /* 0xfffffffffffffdee */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-529ll /* 0xfffffffffffffdef */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-528ll /* 0xfffffffffffffdf0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-527ll /* 0xfffffffffffffdf1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-526ll /* 0xfffffffffffffdf2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-525ll /* 0xfffffffffffffdf3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-524ll /* 0xfffffffffffffdf4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-523ll /* 0xfffffffffffffdf5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-522ll /* 0xfffffffffffffdf6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-521ll /* 0xfffffffffffffdf7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-520ll /* 0xfffffffffffffdf8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-519ll /* 0xfffffffffffffdf9 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-518ll /* 0xfffffffffffffdfa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-517ll /* 0xfffffffffffffdfb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-516ll /* 0xfffffffffffffdfc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-515ll /* 0xfffffffffffffdfd */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-514ll /* 0xfffffffffffffdfe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-513ll /* 0xfffffffffffffdff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-512ll /* 0xfffffffffffffe00 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {-511ll /* 0xfffffffffffffe01 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-510ll /* 0xfffffffffffffe02 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-509ll /* 0xfffffffffffffe03 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-508ll /* 0xfffffffffffffe04 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-507ll /* 0xfffffffffffffe05 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-506ll /* 0xfffffffffffffe06 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-505ll /* 0xfffffffffffffe07 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-504ll /* 0xfffffffffffffe08 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-503ll /* 0xfffffffffffffe09 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-502ll /* 0xfffffffffffffe0a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-501ll /* 0xfffffffffffffe0b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-500ll /* 0xfffffffffffffe0c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-499ll /* 0xfffffffffffffe0d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-498ll /* 0xfffffffffffffe0e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-497ll /* 0xfffffffffffffe0f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-496ll /* 0xfffffffffffffe10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-495ll /* 0xfffffffffffffe11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-494ll /* 0xfffffffffffffe12 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-493ll /* 0xfffffffffffffe13 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 1}}			/* shl1add r5, r4, r1 */
+   },
+  {-492ll /* 0xfffffffffffffe14 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-491ll /* 0xfffffffffffffe15 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-489ll /* 0xfffffffffffffe17 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-488ll /* 0xfffffffffffffe18 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-487ll /* 0xfffffffffffffe19 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-485ll /* 0xfffffffffffffe1b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-484ll /* 0xfffffffffffffe1c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 1, 3},			/* shl2add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-483ll /* 0xfffffffffffffe1d */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 1},			/* shl1add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-482ll /* 0xfffffffffffffe1e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-481ll /* 0xfffffffffffffe1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-480ll /* 0xfffffffffffffe20 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-479ll /* 0xfffffffffffffe21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-478ll /* 0xfffffffffffffe22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-477ll /* 0xfffffffffffffe23 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-476ll /* 0xfffffffffffffe24 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-475ll /* 0xfffffffffffffe25 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-472ll /* 0xfffffffffffffe28 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-471ll /* 0xfffffffffffffe29 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-468ll /* 0xfffffffffffffe2c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-467ll /* 0xfffffffffffffe2d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-465ll /* 0xfffffffffffffe2f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {-464ll /* 0xfffffffffffffe30 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-463ll /* 0xfffffffffffffe31 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-462ll /* 0xfffffffffffffe32 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-460ll /* 0xfffffffffffffe34 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-457ll /* 0xfffffffffffffe37 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-456ll /* 0xfffffffffffffe38 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-455ll /* 0xfffffffffffffe39 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {-454ll /* 0xfffffffffffffe3a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-453ll /* 0xfffffffffffffe3b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-452ll /* 0xfffffffffffffe3c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-451ll /* 0xfffffffffffffe3d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-450ll /* 0xfffffffffffffe3e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-449ll /* 0xfffffffffffffe3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-448ll /* 0xfffffffffffffe40 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-447ll /* 0xfffffffffffffe41 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-446ll /* 0xfffffffffffffe42 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-445ll /* 0xfffffffffffffe43 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-444ll /* 0xfffffffffffffe44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-443ll /* 0xfffffffffffffe45 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-442ll /* 0xfffffffffffffe46 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-441ll /* 0xfffffffffffffe47 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-440ll /* 0xfffffffffffffe48 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {-439ll /* 0xfffffffffffffe49 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-438ll /* 0xfffffffffffffe4a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-436ll /* 0xfffffffffffffe4c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* shl2add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-433ll /* 0xfffffffffffffe4f */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-432ll /* 0xfffffffffffffe50 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-431ll /* 0xfffffffffffffe51 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-429ll /* 0xfffffffffffffe53 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-428ll /* 0xfffffffffffffe54 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-424ll /* 0xfffffffffffffe58 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-423ll /* 0xfffffffffffffe59 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-419ll /* 0xfffffffffffffe5d */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-418ll /* 0xfffffffffffffe5e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {-417ll /* 0xfffffffffffffe5f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-416ll /* 0xfffffffffffffe60 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-415ll /* 0xfffffffffffffe61 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-413ll /* 0xfffffffffffffe63 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-412ll /* 0xfffffffffffffe64 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-411ll /* 0xfffffffffffffe65 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-409ll /* 0xfffffffffffffe67 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-408ll /* 0xfffffffffffffe68 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-407ll /* 0xfffffffffffffe69 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-405ll /* 0xfffffffffffffe6b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-404ll /* 0xfffffffffffffe6c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-403ll /* 0xfffffffffffffe6d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-401ll /* 0xfffffffffffffe6f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-400ll /* 0xfffffffffffffe70 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-399ll /* 0xfffffffffffffe71 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-397ll /* 0xfffffffffffffe73 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-396ll /* 0xfffffffffffffe74 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-395ll /* 0xfffffffffffffe75 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-393ll /* 0xfffffffffffffe77 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-392ll /* 0xfffffffffffffe78 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-391ll /* 0xfffffffffffffe79 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-390ll /* 0xfffffffffffffe7a */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-389ll /* 0xfffffffffffffe7b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-388ll /* 0xfffffffffffffe7c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-387ll /* 0xfffffffffffffe7d */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* shl1add r5, r4, r4 */
+   },
+  {-386ll /* 0xfffffffffffffe7e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-385ll /* 0xfffffffffffffe7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-384ll /* 0xfffffffffffffe80 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-383ll /* 0xfffffffffffffe81 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-382ll /* 0xfffffffffffffe82 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-381ll /* 0xfffffffffffffe83 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-380ll /* 0xfffffffffffffe84 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {-379ll /* 0xfffffffffffffe85 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-378ll /* 0xfffffffffffffe86 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-377ll /* 0xfffffffffffffe87 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-376ll /* 0xfffffffffffffe88 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-375ll /* 0xfffffffffffffe89 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-373ll /* 0xfffffffffffffe8b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-372ll /* 0xfffffffffffffe8c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-371ll /* 0xfffffffffffffe8d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-369ll /* 0xfffffffffffffe8f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-368ll /* 0xfffffffffffffe90 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-367ll /* 0xfffffffffffffe91 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-365ll /* 0xfffffffffffffe93 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 1, 3},			/* shl3add r4, r1, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-364ll /* 0xfffffffffffffe94 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-361ll /* 0xfffffffffffffe97 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-360ll /* 0xfffffffffffffe98 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-359ll /* 0xfffffffffffffe99 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-357ll /* 0xfffffffffffffe9b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-356ll /* 0xfffffffffffffe9c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-355ll /* 0xfffffffffffffe9d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3},			/* shli r4, r3, 3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-353ll /* 0xfffffffffffffe9f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-352ll /* 0xfffffffffffffea0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-351ll /* 0xfffffffffffffea1 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-349ll /* 0xfffffffffffffea3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-348ll /* 0xfffffffffffffea4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-347ll /* 0xfffffffffffffea5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-344ll /* 0xfffffffffffffea8 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-343ll /* 0xfffffffffffffea9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-341ll /* 0xfffffffffffffeab */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-340ll /* 0xfffffffffffffeac */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-339ll /* 0xfffffffffffffead */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-337ll /* 0xfffffffffffffeaf */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-336ll /* 0xfffffffffffffeb0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-335ll /* 0xfffffffffffffeb1 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-333ll /* 0xfffffffffffffeb3 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-332ll /* 0xfffffffffffffeb4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-331ll /* 0xfffffffffffffeb5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-330ll /* 0xfffffffffffffeb6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-329ll /* 0xfffffffffffffeb7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-328ll /* 0xfffffffffffffeb8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-327ll /* 0xfffffffffffffeb9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-325ll /* 0xfffffffffffffebb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-324ll /* 0xfffffffffffffebc */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-323ll /* 0xfffffffffffffebd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-322ll /* 0xfffffffffffffebe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-321ll /* 0xfffffffffffffebf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-320ll /* 0xfffffffffffffec0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {-319ll /* 0xfffffffffffffec1 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-318ll /* 0xfffffffffffffec2 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-317ll /* 0xfffffffffffffec3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-316ll /* 0xfffffffffffffec4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-315ll /* 0xfffffffffffffec5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-314ll /* 0xfffffffffffffec6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-313ll /* 0xfffffffffffffec7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-312ll /* 0xfffffffffffffec8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-311ll /* 0xfffffffffffffec9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-310ll /* 0xfffffffffffffeca */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-309ll /* 0xfffffffffffffecb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-307ll /* 0xfffffffffffffecd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-306ll /* 0xfffffffffffffece */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-305ll /* 0xfffffffffffffecf */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-304ll /* 0xfffffffffffffed0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-303ll /* 0xfffffffffffffed1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-301ll /* 0xfffffffffffffed3 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-300ll /* 0xfffffffffffffed4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-299ll /* 0xfffffffffffffed5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-297ll /* 0xfffffffffffffed7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-296ll /* 0xfffffffffffffed8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-295ll /* 0xfffffffffffffed9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-293ll /* 0xfffffffffffffedb */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-292ll /* 0xfffffffffffffedc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-291ll /* 0xfffffffffffffedd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-290ll /* 0xfffffffffffffede */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {-289ll /* 0xfffffffffffffedf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-288ll /* 0xfffffffffffffee0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {-287ll /* 0xfffffffffffffee1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-286ll /* 0xfffffffffffffee2 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-285ll /* 0xfffffffffffffee3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-284ll /* 0xfffffffffffffee4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-283ll /* 0xfffffffffffffee5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-281ll /* 0xfffffffffffffee7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-280ll /* 0xfffffffffffffee8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-279ll /* 0xfffffffffffffee9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-278ll /* 0xfffffffffffffeea */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-277ll /* 0xfffffffffffffeeb */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-276ll /* 0xfffffffffffffeec */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-275ll /* 0xfffffffffffffeed */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-274ll /* 0xfffffffffffffeee */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-273ll /* 0xfffffffffffffeef */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-272ll /* 0xfffffffffffffef0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-271ll /* 0xfffffffffffffef1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-270ll /* 0xfffffffffffffef2 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-269ll /* 0xfffffffffffffef3 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-268ll /* 0xfffffffffffffef4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-267ll /* 0xfffffffffffffef5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-266ll /* 0xfffffffffffffef6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-265ll /* 0xfffffffffffffef7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-264ll /* 0xfffffffffffffef8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-263ll /* 0xfffffffffffffef9 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-262ll /* 0xfffffffffffffefa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-261ll /* 0xfffffffffffffefb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-260ll /* 0xfffffffffffffefc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-259ll /* 0xfffffffffffffefd */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-258ll /* 0xfffffffffffffefe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-257ll /* 0xfffffffffffffeff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-256ll /* 0xffffffffffffff00 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {-255ll /* 0xffffffffffffff01 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-254ll /* 0xffffffffffffff02 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-253ll /* 0xffffffffffffff03 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-252ll /* 0xffffffffffffff04 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-251ll /* 0xffffffffffffff05 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-250ll /* 0xffffffffffffff06 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-249ll /* 0xffffffffffffff07 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-248ll /* 0xffffffffffffff08 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-247ll /* 0xffffffffffffff09 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-246ll /* 0xffffffffffffff0a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-245ll /* 0xffffffffffffff0b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-244ll /* 0xffffffffffffff0c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-243ll /* 0xffffffffffffff0d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-242ll /* 0xffffffffffffff0e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-241ll /* 0xffffffffffffff0f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-240ll /* 0xffffffffffffff10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-239ll /* 0xffffffffffffff11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-238ll /* 0xffffffffffffff12 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-237ll /* 0xffffffffffffff13 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 1}}			/* shl1add r5, r4, r1 */
+   },
+  {-236ll /* 0xffffffffffffff14 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-235ll /* 0xffffffffffffff15 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-234ll /* 0xffffffffffffff16 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-233ll /* 0xffffffffffffff17 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-232ll /* 0xffffffffffffff18 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-231ll /* 0xffffffffffffff19 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {-230ll /* 0xffffffffffffff1a */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-229ll /* 0xffffffffffffff1b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-228ll /* 0xffffffffffffff1c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-227ll /* 0xffffffffffffff1d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-226ll /* 0xffffffffffffff1e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-225ll /* 0xffffffffffffff1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-224ll /* 0xffffffffffffff20 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-223ll /* 0xffffffffffffff21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-222ll /* 0xffffffffffffff22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-221ll /* 0xffffffffffffff23 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-220ll /* 0xffffffffffffff24 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-219ll /* 0xffffffffffffff25 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-218ll /* 0xffffffffffffff26 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-217ll /* 0xffffffffffffff27 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-216ll /* 0xffffffffffffff28 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {-215ll /* 0xffffffffffffff29 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-214ll /* 0xffffffffffffff2a */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-213ll /* 0xffffffffffffff2b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3},			/* shli r4, r3, 3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-212ll /* 0xffffffffffffff2c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* shl2add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-211ll /* 0xffffffffffffff2d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-210ll /* 0xffffffffffffff2e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {-209ll /* 0xffffffffffffff2f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-208ll /* 0xffffffffffffff30 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-207ll /* 0xffffffffffffff31 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-205ll /* 0xffffffffffffff33 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-204ll /* 0xffffffffffffff34 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-203ll /* 0xffffffffffffff35 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-202ll /* 0xffffffffffffff36 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-201ll /* 0xffffffffffffff37 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-200ll /* 0xffffffffffffff38 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-199ll /* 0xffffffffffffff39 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-198ll /* 0xffffffffffffff3a */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-197ll /* 0xffffffffffffff3b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-196ll /* 0xffffffffffffff3c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-195ll /* 0xffffffffffffff3d */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* shl1add r5, r4, r4 */
+   },
+  {-194ll /* 0xffffffffffffff3e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-193ll /* 0xffffffffffffff3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-192ll /* 0xffffffffffffff40 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-191ll /* 0xffffffffffffff41 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-190ll /* 0xffffffffffffff42 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-189ll /* 0xffffffffffffff43 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-188ll /* 0xffffffffffffff44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {-187ll /* 0xffffffffffffff45 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-186ll /* 0xffffffffffffff46 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-185ll /* 0xffffffffffffff47 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-184ll /* 0xffffffffffffff48 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-183ll /* 0xffffffffffffff49 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-181ll /* 0xffffffffffffff4b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-180ll /* 0xffffffffffffff4c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-179ll /* 0xffffffffffffff4d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-178ll /* 0xffffffffffffff4e */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-177ll /* 0xffffffffffffff4f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-176ll /* 0xffffffffffffff50 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-175ll /* 0xffffffffffffff51 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-173ll /* 0xffffffffffffff53 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-172ll /* 0xffffffffffffff54 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-171ll /* 0xffffffffffffff55 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-170ll /* 0xffffffffffffff56 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-169ll /* 0xffffffffffffff57 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-168ll /* 0xffffffffffffff58 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-167ll /* 0xffffffffffffff59 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-165ll /* 0xffffffffffffff5b */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-164ll /* 0xffffffffffffff5c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-163ll /* 0xffffffffffffff5d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-162ll /* 0xffffffffffffff5e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-161ll /* 0xffffffffffffff5f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-160ll /* 0xffffffffffffff60 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {-159ll /* 0xffffffffffffff61 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-158ll /* 0xffffffffffffff62 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-157ll /* 0xffffffffffffff63 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-156ll /* 0xffffffffffffff64 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-155ll /* 0xffffffffffffff65 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-154ll /* 0xffffffffffffff66 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-153ll /* 0xffffffffffffff67 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {-152ll /* 0xffffffffffffff68 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-151ll /* 0xffffffffffffff69 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-150ll /* 0xffffffffffffff6a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-149ll /* 0xffffffffffffff6b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-148ll /* 0xffffffffffffff6c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {-147ll /* 0xffffffffffffff6d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-146ll /* 0xffffffffffffff6e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-145ll /* 0xffffffffffffff6f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-144ll /* 0xffffffffffffff70 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {-143ll /* 0xffffffffffffff71 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-142ll /* 0xffffffffffffff72 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-141ll /* 0xffffffffffffff73 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-140ll /* 0xffffffffffffff74 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-139ll /* 0xffffffffffffff75 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-138ll /* 0xffffffffffffff76 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-137ll /* 0xffffffffffffff77 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-136ll /* 0xffffffffffffff78 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {-135ll /* 0xffffffffffffff79 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134ll /* 0xffffffffffffff7a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-133ll /* 0xffffffffffffff7b */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-132ll /* 0xffffffffffffff7c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-131ll /* 0xffffffffffffff7d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-130ll /* 0xffffffffffffff7e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-129ll /* 0xffffffffffffff7f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-128ll /* 0xffffffffffffff80 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {-127ll /* 0xffffffffffffff81 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-126ll /* 0xffffffffffffff82 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-125ll /* 0xffffffffffffff83 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-124ll /* 0xffffffffffffff84 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-123ll /* 0xffffffffffffff85 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-122ll /* 0xffffffffffffff86 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-121ll /* 0xffffffffffffff87 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-120ll /* 0xffffffffffffff88 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-119ll /* 0xffffffffffffff89 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-118ll /* 0xffffffffffffff8a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-117ll /* 0xffffffffffffff8b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-116ll /* 0xffffffffffffff8c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-115ll /* 0xffffffffffffff8d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-114ll /* 0xffffffffffffff8e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-113ll /* 0xffffffffffffff8f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-112ll /* 0xffffffffffffff90 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-111ll /* 0xffffffffffffff91 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-110ll /* 0xffffffffffffff92 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-109ll /* 0xffffffffffffff93 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-108ll /* 0xffffffffffffff94 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-107ll /* 0xffffffffffffff95 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {-106ll /* 0xffffffffffffff96 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {-105ll /* 0xffffffffffffff97 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-104ll /* 0xffffffffffffff98 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {-103ll /* 0xffffffffffffff99 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {-102ll /* 0xffffffffffffff9a */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {-101ll /* 0xffffffffffffff9b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-100ll /* 0xffffffffffffff9c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-99ll /* 0xffffffffffffff9d */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* shl1add r5, r4, r4 */
+   },
+  {-98ll /* 0xffffffffffffff9e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-97ll /* 0xffffffffffffff9f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-96ll /* 0xffffffffffffffa0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-95ll /* 0xffffffffffffffa1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-94ll /* 0xffffffffffffffa2 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-93ll /* 0xffffffffffffffa3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-92ll /* 0xffffffffffffffa4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {-91ll /* 0xffffffffffffffa5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-90ll /* 0xffffffffffffffa6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-89ll /* 0xffffffffffffffa7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-88ll /* 0xffffffffffffffa8 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-87ll /* 0xffffffffffffffa9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-86ll /* 0xffffffffffffffaa */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-85ll /* 0xffffffffffffffab */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {-84ll /* 0xffffffffffffffac */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-83ll /* 0xffffffffffffffad */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-82ll /* 0xffffffffffffffae */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {-81ll /* 0xffffffffffffffaf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {-80ll /* 0xffffffffffffffb0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {-79ll /* 0xffffffffffffffb1 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-78ll /* 0xffffffffffffffb2 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-77ll /* 0xffffffffffffffb3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-76ll /* 0xffffffffffffffb4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-75ll /* 0xffffffffffffffb5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-74ll /* 0xffffffffffffffb6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-73ll /* 0xffffffffffffffb7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {-72ll /* 0xffffffffffffffb8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {-71ll /* 0xffffffffffffffb9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-70ll /* 0xffffffffffffffba */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-69ll /* 0xffffffffffffffbb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {-68ll /* 0xffffffffffffffbc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {-67ll /* 0xffffffffffffffbd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-66ll /* 0xffffffffffffffbe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-65ll /* 0xffffffffffffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-64ll /* 0xffffffffffffffc0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {-63ll /* 0xffffffffffffffc1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-62ll /* 0xffffffffffffffc2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-61ll /* 0xffffffffffffffc3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-60ll /* 0xffffffffffffffc4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-59ll /* 0xffffffffffffffc5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-58ll /* 0xffffffffffffffc6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-57ll /* 0xffffffffffffffc7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-56ll /* 0xffffffffffffffc8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-55ll /* 0xffffffffffffffc9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-54ll /* 0xffffffffffffffca */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-53ll /* 0xffffffffffffffcb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {-52ll /* 0xffffffffffffffcc */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-51ll /* 0xffffffffffffffcd */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* shl1add r5, r4, r4 */
+   },
+  {-50ll /* 0xffffffffffffffce */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-49ll /* 0xffffffffffffffcf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-48ll /* 0xffffffffffffffd0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-47ll /* 0xffffffffffffffd1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-46ll /* 0xffffffffffffffd2 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-45ll /* 0xffffffffffffffd3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-44ll /* 0xffffffffffffffd4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {-43ll /* 0xffffffffffffffd5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {-42ll /* 0xffffffffffffffd6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-41ll /* 0xffffffffffffffd7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {-40ll /* 0xffffffffffffffd8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {-39ll /* 0xffffffffffffffd9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-38ll /* 0xffffffffffffffda */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-37ll /* 0xffffffffffffffdb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {-36ll /* 0xffffffffffffffdc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {-35ll /* 0xffffffffffffffdd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34ll /* 0xffffffffffffffde */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {-33ll /* 0xffffffffffffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32ll /* 0xffffffffffffffe0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {-31ll /* 0xffffffffffffffe1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-30ll /* 0xffffffffffffffe2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-29ll /* 0xffffffffffffffe3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-28ll /* 0xffffffffffffffe4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-27ll /* 0xffffffffffffffe5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-26ll /* 0xffffffffffffffe6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-25ll /* 0xffffffffffffffe7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-24ll /* 0xffffffffffffffe8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-23ll /* 0xffffffffffffffe9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-22ll /* 0xffffffffffffffea */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-21ll /* 0xffffffffffffffeb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-20ll /* 0xffffffffffffffec */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {-19ll /* 0xffffffffffffffed */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-18ll /* 0xffffffffffffffee */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {-17ll /* 0xffffffffffffffef */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16ll /* 0xfffffffffffffff0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {-15ll /* 0xfffffffffffffff1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-14ll /* 0xfffffffffffffff2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-13ll /* 0xfffffffffffffff3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-12ll /* 0xfffffffffffffff4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-11ll /* 0xfffffffffffffff5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-10ll /* 0xfffffffffffffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9ll /* 0xfffffffffffffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2}}			/* shl3add r3, r2, r2 */
+   },
+  {-8ll /* 0xfffffffffffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {-7ll /* 0xfffffffffffffff9 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-6ll /* 0xfffffffffffffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-5ll /* 0xfffffffffffffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2}}			/* shl2add r3, r2, r2 */
+   },
+  {-4ll /* 0xfffffffffffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {-3ll /* 0xfffffffffffffffd */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2ll /* 0xfffffffffffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {-1ll /* 0xffffffffffffffff */ ,
+   {{2, 0, 1}}			/* sub r2, zero, r1 */
+   },
+  {2ll /* 0x2 */ ,
+   {{6, 1, 1}}			/* shli r2, r1, 1 */
+   },
+  {3ll /* 0x3 */ ,
+   {{3, 1, 1}}			/* shl1add r2, r1, r1 */
+   },
+  {4ll /* 0x4 */ ,
+   {{6, 1, 2}}			/* shli r2, r1, 2 */
+   },
+  {5ll /* 0x5 */ ,
+   {{4, 1, 1}}			/* shl2add r2, r1, r1 */
+   },
+  {6ll /* 0x6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {7ll /* 0x7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8ll /* 0x8 */ ,
+   {{6, 1, 3}}			/* shli r2, r1, 3 */
+   },
+  {9ll /* 0x9 */ ,
+   {{5, 1, 1}}			/* shl3add r2, r1, r1 */
+   },
+  {10ll /* 0xa */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {11ll /* 0xb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1}}			/* shl1add r3, r2, r1 */
+   },
+  {12ll /* 0xc */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {13ll /* 0xd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1}}			/* shl2add r3, r2, r1 */
+   },
+  {14ll /* 0xe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {15ll /* 0xf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {16ll /* 0x10 */ ,
+   {{6, 1, 4}}			/* shli r2, r1, 4 */
+   },
+  {17ll /* 0x11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {18ll /* 0x12 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {19ll /* 0x13 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1}}			/* shl1add r3, r2, r1 */
+   },
+  {20ll /* 0x14 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {21ll /* 0x15 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1}}			/* shl2add r3, r2, r1 */
+   },
+  {22ll /* 0x16 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {23ll /* 0x17 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {24ll /* 0x18 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {25ll /* 0x19 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1}}			/* shl3add r3, r2, r1 */
+   },
+  {26ll /* 0x1a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {27ll /* 0x1b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2}}			/* shl3add r3, r2, r2 */
+   },
+  {28ll /* 0x1c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {29ll /* 0x1d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {30ll /* 0x1e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {31ll /* 0x1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {32ll /* 0x20 */ ,
+   {{6, 1, 5}}			/* shli r2, r1, 5 */
+   },
+  {33ll /* 0x21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {34ll /* 0x22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {35ll /* 0x23 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36ll /* 0x24 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {37ll /* 0x25 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1}}			/* shl2add r3, r2, r1 */
+   },
+  {38ll /* 0x26 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {39ll /* 0x27 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {40ll /* 0x28 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {41ll /* 0x29 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1}}			/* shl3add r3, r2, r1 */
+   },
+  {42ll /* 0x2a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {43ll /* 0x2b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {44ll /* 0x2c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {45ll /* 0x2d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2}}			/* shl3add r3, r2, r2 */
+   },
+  {46ll /* 0x2e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {47ll /* 0x2f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {48ll /* 0x30 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {49ll /* 0x31 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {50ll /* 0x32 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {51ll /* 0x33 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {52ll /* 0x34 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {53ll /* 0x35 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {54ll /* 0x36 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {55ll /* 0x37 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {56ll /* 0x38 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {57ll /* 0x39 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {58ll /* 0x3a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {59ll /* 0x3b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {60ll /* 0x3c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {61ll /* 0x3d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {62ll /* 0x3e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {63ll /* 0x3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {64ll /* 0x40 */ ,
+   {{6, 1, 6}}			/* shli r2, r1, 6 */
+   },
+  {65ll /* 0x41 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {66ll /* 0x42 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {67ll /* 0x43 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68ll /* 0x44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {69ll /* 0x45 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70ll /* 0x46 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {71ll /* 0x47 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {72ll /* 0x48 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {73ll /* 0x49 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1}}			/* shl3add r3, r2, r1 */
+   },
+  {74ll /* 0x4a */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {75ll /* 0x4b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {76ll /* 0x4c */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {77ll /* 0x4d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {78ll /* 0x4e */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {79ll /* 0x4f */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {80ll /* 0x50 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {81ll /* 0x51 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2}}			/* shl3add r3, r2, r2 */
+   },
+  {82ll /* 0x52 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {83ll /* 0x53 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 1}}			/* shl1add r4, r3, r1 */
+   },
+  {84ll /* 0x54 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {85ll /* 0x55 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {86ll /* 0x56 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {87ll /* 0x57 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {88ll /* 0x58 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {89ll /* 0x59 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {90ll /* 0x5a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {91ll /* 0x5b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {3, 3, 1}}			/* shl1add r4, r3, r1 */
+   },
+  {92ll /* 0x5c */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {93ll /* 0x5d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {94ll /* 0x5e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {95ll /* 0x5f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {96ll /* 0x60 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {97ll /* 0x61 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {98ll /* 0x62 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {99ll /* 0x63 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {100ll /* 0x64 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {101ll /* 0x65 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 1}}			/* shl2add r4, r3, r1 */
+   },
+  {102ll /* 0x66 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {103ll /* 0x67 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {104ll /* 0x68 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {105ll /* 0x69 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {106ll /* 0x6a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {107ll /* 0x6b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {108ll /* 0x6c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {109ll /* 0x6d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 1}}			/* shl2add r4, r3, r1 */
+   },
+  {110ll /* 0x6e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {111ll /* 0x6f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {112ll /* 0x70 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {113ll /* 0x71 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {114ll /* 0x72 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {115ll /* 0x73 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {116ll /* 0x74 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {117ll /* 0x75 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {118ll /* 0x76 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {119ll /* 0x77 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {120ll /* 0x78 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {121ll /* 0x79 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {122ll /* 0x7a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {123ll /* 0x7b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {124ll /* 0x7c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {125ll /* 0x7d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {126ll /* 0x7e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {127ll /* 0x7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {128ll /* 0x80 */ ,
+   {{6, 1, 7}}			/* shli r2, r1, 7 */
+   },
+  {129ll /* 0x81 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {130ll /* 0x82 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {131ll /* 0x83 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {132ll /* 0x84 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {133ll /* 0x85 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134ll /* 0x86 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {135ll /* 0x87 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {136ll /* 0x88 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {137ll /* 0x89 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {138ll /* 0x8a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {139ll /* 0x8b */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {140ll /* 0x8c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {141ll /* 0x8d */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {142ll /* 0x8e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {143ll /* 0x8f */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {144ll /* 0x90 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {145ll /* 0x91 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {146ll /* 0x92 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {147ll /* 0x93 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 1}}			/* shl1add r4, r3, r1 */
+   },
+  {148ll /* 0x94 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {149ll /* 0x95 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 1}}			/* shl2add r4, r3, r1 */
+   },
+  {150ll /* 0x96 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {151ll /* 0x97 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {152ll /* 0x98 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {153ll /* 0x99 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {154ll /* 0x9a */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {155ll /* 0x9b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {156ll /* 0x9c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {157ll /* 0x9d */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {158ll /* 0x9e */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {159ll /* 0x9f */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {160ll /* 0xa0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {161ll /* 0xa1 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {162ll /* 0xa2 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {163ll /* 0xa3 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {3, 3, 1}}			/* shl1add r4, r3, r1 */
+   },
+  {164ll /* 0xa4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {165ll /* 0xa5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {166ll /* 0xa6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {167ll /* 0xa7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {168ll /* 0xa8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {169ll /* 0xa9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {170ll /* 0xaa */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {171ll /* 0xab */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {172ll /* 0xac */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {173ll /* 0xad */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {174ll /* 0xae */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {175ll /* 0xaf */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {176ll /* 0xb0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {177ll /* 0xb1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {178ll /* 0xb2 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {179ll /* 0xb3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {180ll /* 0xb4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {181ll /* 0xb5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 1}}			/* shl2add r4, r3, r1 */
+   },
+  {182ll /* 0xb6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {183ll /* 0xb7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {184ll /* 0xb8 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {185ll /* 0xb9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {186ll /* 0xba */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {187ll /* 0xbb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {188ll /* 0xbc */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {189ll /* 0xbd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {190ll /* 0xbe */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {191ll /* 0xbf */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {192ll /* 0xc0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {193ll /* 0xc1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {194ll /* 0xc2 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {195ll /* 0xc3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {196ll /* 0xc4 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {197ll /* 0xc5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {198ll /* 0xc6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {199ll /* 0xc7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {200ll /* 0xc8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {201ll /* 0xc9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {202ll /* 0xca */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {203ll /* 0xcb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {204ll /* 0xcc */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {205ll /* 0xcd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {206ll /* 0xce */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {207ll /* 0xcf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {208ll /* 0xd0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {209ll /* 0xd1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {210ll /* 0xd2 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {211ll /* 0xd3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {212ll /* 0xd4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {213ll /* 0xd5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {214ll /* 0xd6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {215ll /* 0xd7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {216ll /* 0xd8 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {217ll /* 0xd9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {218ll /* 0xda */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {219ll /* 0xdb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {220ll /* 0xdc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {221ll /* 0xdd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {222ll /* 0xde */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {223ll /* 0xdf */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {224ll /* 0xe0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {225ll /* 0xe1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {226ll /* 0xe2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {227ll /* 0xe3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {228ll /* 0xe4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {229ll /* 0xe5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {230ll /* 0xe6 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {231ll /* 0xe7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 8},			/* shli r4, r1, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {232ll /* 0xe8 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {233ll /* 0xe9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {234ll /* 0xea */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {235ll /* 0xeb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {236ll /* 0xec */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {237ll /* 0xed */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {238ll /* 0xee */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {239ll /* 0xef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {240ll /* 0xf0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {241ll /* 0xf1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {242ll /* 0xf2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {243ll /* 0xf3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {244ll /* 0xf4 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {245ll /* 0xf5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {246ll /* 0xf6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {247ll /* 0xf7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {248ll /* 0xf8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {249ll /* 0xf9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {250ll /* 0xfa */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {251ll /* 0xfb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {252ll /* 0xfc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {253ll /* 0xfd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {254ll /* 0xfe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {255ll /* 0xff */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {256ll /* 0x100 */ ,
+   {{6, 1, 8}}			/* shli r2, r1, 8 */
+   },
+  {257ll /* 0x101 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {258ll /* 0x102 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {259ll /* 0x103 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {260ll /* 0x104 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {261ll /* 0x105 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262ll /* 0x106 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {263ll /* 0x107 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {264ll /* 0x108 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {265ll /* 0x109 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {266ll /* 0x10a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {267ll /* 0x10b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {268ll /* 0x10c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {269ll /* 0x10d */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {270ll /* 0x10e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {271ll /* 0x10f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {272ll /* 0x110 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {273ll /* 0x111 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {274ll /* 0x112 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {275ll /* 0x113 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {276ll /* 0x114 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {277ll /* 0x115 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {278ll /* 0x116 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {279ll /* 0x117 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {280ll /* 0x118 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {281ll /* 0x119 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {282ll /* 0x11a */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {283ll /* 0x11b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {284ll /* 0x11c */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {285ll /* 0x11d */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {286ll /* 0x11e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {287ll /* 0x11f */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {288ll /* 0x120 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {289ll /* 0x121 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {290ll /* 0x122 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {291ll /* 0x123 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {292ll /* 0x124 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {293ll /* 0x125 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 1}}			/* shl2add r4, r3, r1 */
+   },
+  {294ll /* 0x126 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {295ll /* 0x127 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {296ll /* 0x128 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {297ll /* 0x129 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {298ll /* 0x12a */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {299ll /* 0x12b */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {300ll /* 0x12c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {301ll /* 0x12d */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {302ll /* 0x12e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {303ll /* 0x12f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {304ll /* 0x130 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {305ll /* 0x131 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {306ll /* 0x132 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {307ll /* 0x133 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {308ll /* 0x134 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {309ll /* 0x135 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {310ll /* 0x136 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {311ll /* 0x137 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {312ll /* 0x138 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {313ll /* 0x139 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {314ll /* 0x13a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {315ll /* 0x13b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {316ll /* 0x13c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {317ll /* 0x13d */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {318ll /* 0x13e */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {319ll /* 0x13f */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {320ll /* 0x140 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {321ll /* 0x141 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {322ll /* 0x142 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {323ll /* 0x143 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {324ll /* 0x144 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {325ll /* 0x145 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {326ll /* 0x146 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {327ll /* 0x147 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {328ll /* 0x148 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {329ll /* 0x149 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {330ll /* 0x14a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {331ll /* 0x14b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {332ll /* 0x14c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {333ll /* 0x14d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {334ll /* 0x14e */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {335ll /* 0x14f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {336ll /* 0x150 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {337ll /* 0x151 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {338ll /* 0x152 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {339ll /* 0x153 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {340ll /* 0x154 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {341ll /* 0x155 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {342ll /* 0x156 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {343ll /* 0x157 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {344ll /* 0x158 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {345ll /* 0x159 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {346ll /* 0x15a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {347ll /* 0x15b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {348ll /* 0x15c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 2},			/* shli r4, r2, 2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {349ll /* 0x15d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {350ll /* 0x15e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {351ll /* 0x15f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {352ll /* 0x160 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {353ll /* 0x161 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {354ll /* 0x162 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {355ll /* 0x163 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {356ll /* 0x164 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {357ll /* 0x165 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {358ll /* 0x166 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {359ll /* 0x167 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {360ll /* 0x168 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {361ll /* 0x169 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {362ll /* 0x16a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {363ll /* 0x16b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {364ll /* 0x16c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 2},			/* shli r4, r1, 2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {365ll /* 0x16d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {366ll /* 0x16e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {367ll /* 0x16f */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {368ll /* 0x170 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {369ll /* 0x171 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {370ll /* 0x172 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {371ll /* 0x173 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {372ll /* 0x174 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {373ll /* 0x175 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {374ll /* 0x176 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {375ll /* 0x177 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {376ll /* 0x178 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {377ll /* 0x179 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {378ll /* 0x17a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {379ll /* 0x17b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {380ll /* 0x17c */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {381ll /* 0x17d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {382ll /* 0x17e */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {383ll /* 0x17f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {384ll /* 0x180 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {385ll /* 0x181 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {386ll /* 0x182 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {387ll /* 0x183 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {388ll /* 0x184 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {389ll /* 0x185 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {390ll /* 0x186 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {391ll /* 0x187 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {392ll /* 0x188 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {393ll /* 0x189 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {394ll /* 0x18a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {395ll /* 0x18b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {396ll /* 0x18c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {397ll /* 0x18d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {398ll /* 0x18e */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {399ll /* 0x18f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {400ll /* 0x190 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {401ll /* 0x191 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {402ll /* 0x192 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {403ll /* 0x193 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {404ll /* 0x194 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {4, 1, 2},			/* shl2add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {405ll /* 0x195 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {406ll /* 0x196 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {407ll /* 0x197 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {408ll /* 0x198 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {409ll /* 0x199 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {410ll /* 0x19a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {411ll /* 0x19b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {412ll /* 0x19c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {413ll /* 0x19d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {414ll /* 0x19e */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {415ll /* 0x19f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {416ll /* 0x1a0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {417ll /* 0x1a1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {418ll /* 0x1a2 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {419ll /* 0x1a3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {420ll /* 0x1a4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {421ll /* 0x1a5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {422ll /* 0x1a6 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {423ll /* 0x1a7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {424ll /* 0x1a8 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {425ll /* 0x1a9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {426ll /* 0x1aa */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {427ll /* 0x1ab */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {428ll /* 0x1ac */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {429ll /* 0x1ad */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {430ll /* 0x1ae */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {431ll /* 0x1af */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {432ll /* 0x1b0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {433ll /* 0x1b1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {434ll /* 0x1b2 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {435ll /* 0x1b3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {436ll /* 0x1b4 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {437ll /* 0x1b5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {438ll /* 0x1b6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {439ll /* 0x1b7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {440ll /* 0x1b8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {441ll /* 0x1b9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {442ll /* 0x1ba */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {443ll /* 0x1bb */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {444ll /* 0x1bc */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {445ll /* 0x1bd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {446ll /* 0x1be */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {447ll /* 0x1bf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {448ll /* 0x1c0 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {449ll /* 0x1c1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {450ll /* 0x1c2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {451ll /* 0x1c3 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {452ll /* 0x1c4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {453ll /* 0x1c5 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {454ll /* 0x1c6 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {455ll /* 0x1c7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {456ll /* 0x1c8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {457ll /* 0x1c9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {458ll /* 0x1ca */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {459ll /* 0x1cb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {460ll /* 0x1cc */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {461ll /* 0x1cd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {462ll /* 0x1ce */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {463ll /* 0x1cf */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {464ll /* 0x1d0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {465ll /* 0x1d1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {467ll /* 0x1d3 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {468ll /* 0x1d4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {469ll /* 0x1d5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {470ll /* 0x1d6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {471ll /* 0x1d7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {472ll /* 0x1d8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {473ll /* 0x1d9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {475ll /* 0x1db */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {476ll /* 0x1dc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {477ll /* 0x1dd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {478ll /* 0x1de */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {479ll /* 0x1df */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {480ll /* 0x1e0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {481ll /* 0x1e1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {482ll /* 0x1e2 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {483ll /* 0x1e3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {484ll /* 0x1e4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {485ll /* 0x1e5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {486ll /* 0x1e6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {487ll /* 0x1e7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {488ll /* 0x1e8 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {489ll /* 0x1e9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {490ll /* 0x1ea */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {491ll /* 0x1eb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {492ll /* 0x1ec */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {493ll /* 0x1ed */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {494ll /* 0x1ee */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {495ll /* 0x1ef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {496ll /* 0x1f0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {497ll /* 0x1f1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {498ll /* 0x1f2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {499ll /* 0x1f3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {500ll /* 0x1f4 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {501ll /* 0x1f5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {502ll /* 0x1f6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {503ll /* 0x1f7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {504ll /* 0x1f8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {505ll /* 0x1f9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {506ll /* 0x1fa */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {507ll /* 0x1fb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {508ll /* 0x1fc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {509ll /* 0x1fd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {510ll /* 0x1fe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {511ll /* 0x1ff */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {512ll /* 0x200 */ ,
+   {{6, 1, 9}}			/* shli r2, r1, 9 */
+   },
+  {513ll /* 0x201 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {514ll /* 0x202 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {515ll /* 0x203 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {516ll /* 0x204 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {517ll /* 0x205 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {518ll /* 0x206 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {519ll /* 0x207 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {520ll /* 0x208 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {521ll /* 0x209 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {522ll /* 0x20a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {523ll /* 0x20b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {524ll /* 0x20c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {525ll /* 0x20d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {526ll /* 0x20e */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {527ll /* 0x20f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {528ll /* 0x210 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {529ll /* 0x211 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {530ll /* 0x212 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {531ll /* 0x213 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {532ll /* 0x214 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {533ll /* 0x215 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {534ll /* 0x216 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {535ll /* 0x217 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {536ll /* 0x218 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {537ll /* 0x219 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {538ll /* 0x21a */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {539ll /* 0x21b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {540ll /* 0x21c */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {541ll /* 0x21d */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {542ll /* 0x21e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {543ll /* 0x21f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {544ll /* 0x220 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {545ll /* 0x221 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {546ll /* 0x222 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {547ll /* 0x223 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {548ll /* 0x224 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {549ll /* 0x225 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {550ll /* 0x226 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {551ll /* 0x227 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {552ll /* 0x228 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {553ll /* 0x229 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {554ll /* 0x22a */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {555ll /* 0x22b */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {556ll /* 0x22c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {557ll /* 0x22d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {558ll /* 0x22e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {559ll /* 0x22f */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {560ll /* 0x230 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {561ll /* 0x231 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {562ll /* 0x232 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {563ll /* 0x233 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {564ll /* 0x234 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {565ll /* 0x235 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {566ll /* 0x236 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {567ll /* 0x237 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {568ll /* 0x238 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {569ll /* 0x239 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {570ll /* 0x23a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {571ll /* 0x23b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {572ll /* 0x23c */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {573ll /* 0x23d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {574ll /* 0x23e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {575ll /* 0x23f */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {576ll /* 0x240 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {577ll /* 0x241 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {578ll /* 0x242 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {579ll /* 0x243 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {580ll /* 0x244 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {581ll /* 0x245 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {582ll /* 0x246 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {583ll /* 0x247 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {584ll /* 0x248 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {585ll /* 0x249 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {586ll /* 0x24a */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {587ll /* 0x24b */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {588ll /* 0x24c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {589ll /* 0x24d */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {590ll /* 0x24e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {591ll /* 0x24f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {592ll /* 0x250 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {593ll /* 0x251 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {594ll /* 0x252 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {595ll /* 0x253 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {596ll /* 0x254 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {597ll /* 0x255 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {598ll /* 0x256 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {599ll /* 0x257 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {600ll /* 0x258 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {601ll /* 0x259 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {602ll /* 0x25a */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {603ll /* 0x25b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {604ll /* 0x25c */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {605ll /* 0x25d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {606ll /* 0x25e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {607ll /* 0x25f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {608ll /* 0x260 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {609ll /* 0x261 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {610ll /* 0x262 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {611ll /* 0x263 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {612ll /* 0x264 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {613ll /* 0x265 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {614ll /* 0x266 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {615ll /* 0x267 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {616ll /* 0x268 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {617ll /* 0x269 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {619ll /* 0x26b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {620ll /* 0x26c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {621ll /* 0x26d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {623ll /* 0x26f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {624ll /* 0x270 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {625ll /* 0x271 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {626ll /* 0x272 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {627ll /* 0x273 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {628ll /* 0x274 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {629ll /* 0x275 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {630ll /* 0x276 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {631ll /* 0x277 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {632ll /* 0x278 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {633ll /* 0x279 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {634ll /* 0x27a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {635ll /* 0x27b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {636ll /* 0x27c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {637ll /* 0x27d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {638ll /* 0x27e */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {639ll /* 0x27f */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {640ll /* 0x280 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {641ll /* 0x281 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {642ll /* 0x282 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {643ll /* 0x283 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {644ll /* 0x284 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {645ll /* 0x285 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {646ll /* 0x286 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {647ll /* 0x287 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {648ll /* 0x288 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {649ll /* 0x289 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {650ll /* 0x28a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {651ll /* 0x28b */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {652ll /* 0x28c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {653ll /* 0x28d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {654ll /* 0x28e */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {655ll /* 0x28f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {656ll /* 0x290 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {657ll /* 0x291 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {658ll /* 0x292 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {659ll /* 0x293 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {660ll /* 0x294 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {661ll /* 0x295 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {662ll /* 0x296 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {663ll /* 0x297 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {664ll /* 0x298 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {665ll /* 0x299 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {666ll /* 0x29a */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {667ll /* 0x29b */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {668ll /* 0x29c */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {669ll /* 0x29d */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 1},			/* shl1add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {670ll /* 0x29e */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {671ll /* 0x29f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {672ll /* 0x2a0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {673ll /* 0x2a1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {674ll /* 0x2a2 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {675ll /* 0x2a3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {676ll /* 0x2a4 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {677ll /* 0x2a5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {679ll /* 0x2a7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 2, 4}}			/* shl3add r5, r2, r4 */
+   },
+  {680ll /* 0x2a8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {681ll /* 0x2a9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {682ll /* 0x2aa */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {684ll /* 0x2ac */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 2},			/* shli r4, r2, 2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {685ll /* 0x2ad */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {688ll /* 0x2b0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 1, 1},			/* shl2add r3, r1, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {689ll /* 0x2b1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {690ll /* 0x2b2 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {692ll /* 0x2b4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {693ll /* 0x2b5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 1},			/* shl3add r3, r1, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {694ll /* 0x2b6 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 3},			/* shl2add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {695ll /* 0x2b7 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {696ll /* 0x2b8 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {697ll /* 0x2b9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {699ll /* 0x2bb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {700ll /* 0x2bc */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {701ll /* 0x2bd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {702ll /* 0x2be */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {703ll /* 0x2bf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {704ll /* 0x2c0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {705ll /* 0x2c1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {706ll /* 0x2c2 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {707ll /* 0x2c3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {708ll /* 0x2c4 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {709ll /* 0x2c5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {710ll /* 0x2c6 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {711ll /* 0x2c7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {712ll /* 0x2c8 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {713ll /* 0x2c9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {714ll /* 0x2ca */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 3},			/* shl2add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {715ll /* 0x2cb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {716ll /* 0x2cc */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {717ll /* 0x2cd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {719ll /* 0x2cf */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 4, 1}}			/* sub r5, r4, r1 */
+   },
+  {720ll /* 0x2d0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {721ll /* 0x2d1 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {722ll /* 0x2d2 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {723ll /* 0x2d3 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {724ll /* 0x2d4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {725ll /* 0x2d5 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {726ll /* 0x2d6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {727ll /* 0x2d7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {728ll /* 0x2d8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {729ll /* 0x2d9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3}}			/* shl3add r4, r3, r3 */
+   },
+  {730ll /* 0x2da */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {731ll /* 0x2db */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {3, 4, 1}}			/* shl1add r5, r4, r1 */
+   },
+  {733ll /* 0x2dd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {735ll /* 0x2df */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {736ll /* 0x2e0 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {737ll /* 0x2e1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {738ll /* 0x2e2 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {739ll /* 0x2e3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {740ll /* 0x2e4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {741ll /* 0x2e5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {743ll /* 0x2e7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {744ll /* 0x2e8 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {745ll /* 0x2e9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {747ll /* 0x2eb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {748ll /* 0x2ec */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {749ll /* 0x2ed */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {750ll /* 0x2ee */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {752ll /* 0x2f0 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {753ll /* 0x2f1 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {755ll /* 0x2f3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {756ll /* 0x2f4 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {757ll /* 0x2f5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {758ll /* 0x2f6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {759ll /* 0x2f7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {760ll /* 0x2f8 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {761ll /* 0x2f9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {762ll /* 0x2fa */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {763ll /* 0x2fb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {764ll /* 0x2fc */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {765ll /* 0x2fd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {766ll /* 0x2fe */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {767ll /* 0x2ff */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {768ll /* 0x300 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {769ll /* 0x301 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {770ll /* 0x302 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {771ll /* 0x303 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {772ll /* 0x304 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {773ll /* 0x305 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {774ll /* 0x306 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {775ll /* 0x307 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {776ll /* 0x308 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {777ll /* 0x309 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {778ll /* 0x30a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {779ll /* 0x30b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {780ll /* 0x30c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {781ll /* 0x30d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {782ll /* 0x30e */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {783ll /* 0x30f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {784ll /* 0x310 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {785ll /* 0x311 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 1, 2},			/* shl3add r4, r1, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {786ll /* 0x312 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {787ll /* 0x313 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 1, 3},			/* shl1add r4, r1, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {788ll /* 0x314 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {789ll /* 0x315 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 2, 4}}			/* shl3add r5, r2, r4 */
+   },
+  {790ll /* 0x316 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {791ll /* 0x317 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {792ll /* 0x318 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {793ll /* 0x319 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {794ll /* 0x31a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {795ll /* 0x31b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {796ll /* 0x31c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {797ll /* 0x31d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {798ll /* 0x31e */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {799ll /* 0x31f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {800ll /* 0x320 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {801ll /* 0x321 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {802ll /* 0x322 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {803ll /* 0x323 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {804ll /* 0x324 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {4, 1, 2},			/* shl2add r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {805ll /* 0x325 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {806ll /* 0x326 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {807ll /* 0x327 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {808ll /* 0x328 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {809ll /* 0x329 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3},			/* shl2add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {810ll /* 0x32a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {811ll /* 0x32b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {812ll /* 0x32c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {813ll /* 0x32d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {815ll /* 0x32f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {816ll /* 0x330 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {817ll /* 0x331 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {818ll /* 0x332 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {819ll /* 0x333 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {820ll /* 0x334 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {821ll /* 0x335 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {822ll /* 0x336 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {823ll /* 0x337 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {824ll /* 0x338 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {825ll /* 0x339 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {827ll /* 0x33b */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {828ll /* 0x33c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {829ll /* 0x33d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {831ll /* 0x33f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {832ll /* 0x340 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {833ll /* 0x341 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {834ll /* 0x342 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 2},			/* shl1add r4, r2, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {835ll /* 0x343 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {836ll /* 0x344 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* shl3add r4, r2, r2 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {837ll /* 0x345 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {838ll /* 0x346 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {840ll /* 0x348 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {841ll /* 0x349 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {842ll /* 0x34a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {843ll /* 0x34b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {844ll /* 0x34c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {845ll /* 0x34d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {846ll /* 0x34e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {848ll /* 0x350 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 2, 2},			/* shl2add r4, r2, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {849ll /* 0x351 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {850ll /* 0x352 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {852ll /* 0x354 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {853ll /* 0x355 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {855ll /* 0x357 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {856ll /* 0x358 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {857ll /* 0x359 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {858ll /* 0x35a */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {859ll /* 0x35b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {861ll /* 0x35d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {862ll /* 0x35e */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {863ll /* 0x35f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 1}}			/* sub r5, r4, r1 */
+   },
+  {864ll /* 0x360 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {865ll /* 0x361 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 1, 4}}			/* add r5, r1, r4 */
+   },
+  {866ll /* 0x362 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {867ll /* 0x363 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {868ll /* 0x364 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {869ll /* 0x365 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {870ll /* 0x366 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {871ll /* 0x367 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {872ll /* 0x368 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {873ll /* 0x369 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {875ll /* 0x36b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {876ll /* 0x36c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {877ll /* 0x36d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {879ll /* 0x36f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {880ll /* 0x370 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {881ll /* 0x371 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 2},			/* shl1add r3, r2, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {882ll /* 0x372 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {884ll /* 0x374 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {885ll /* 0x375 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {888ll /* 0x378 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {889ll /* 0x379 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {890ll /* 0x37a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {891ll /* 0x37b */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {892ll /* 0x37c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {893ll /* 0x37d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {894ll /* 0x37e */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {895ll /* 0x37f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {896ll /* 0x380 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {897ll /* 0x381 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {898ll /* 0x382 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {899ll /* 0x383 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {900ll /* 0x384 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {901ll /* 0x385 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {902ll /* 0x386 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {903ll /* 0x387 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {904ll /* 0x388 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {905ll /* 0x389 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {906ll /* 0x38a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {908ll /* 0x38c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {3, 3, 3},			/* shl1add r4, r3, r3 */
+    {3, 4, 2}}			/* shl1add r5, r4, r2 */
+   },
+  {909ll /* 0x38d */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {910ll /* 0x38e */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {911ll /* 0x38f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {912ll /* 0x390 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {913ll /* 0x391 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* shl1add r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {915ll /* 0x393 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {916ll /* 0x394 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {917ll /* 0x395 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 3, 2},			/* shl3add r4, r3, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {918ll /* 0x396 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {919ll /* 0x397 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {920ll /* 0x398 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {924ll /* 0x39c */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {925ll /* 0x39d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {927ll /* 0x39f */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {4, 3, 2},			/* shl2add r4, r3, r2 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {928ll /* 0x3a0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {929ll /* 0x3a1 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* shl2add r4, r1, r2 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {932ll /* 0x3a4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {4, 4, 2}}			/* shl2add r5, r4, r2 */
+   },
+  {935ll /* 0x3a7 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {936ll /* 0x3a8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3},			/* shl3add r4, r2, r3 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {937ll /* 0x3a9 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {939ll /* 0x3ab */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {941ll /* 0x3ad */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {943ll /* 0x3af */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {944ll /* 0x3b0 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {945ll /* 0x3b1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {948ll /* 0x3b4 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {949ll /* 0x3b5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {950ll /* 0x3b6 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {951ll /* 0x3b7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {952ll /* 0x3b8 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {953ll /* 0x3b9 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {955ll /* 0x3bb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {4, 4, 4}}			/* shl2add r5, r4, r4 */
+   },
+  {956ll /* 0x3bc */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {957ll /* 0x3bd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {958ll /* 0x3be */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {959ll /* 0x3bf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {960ll /* 0x3c0 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {961ll /* 0x3c1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {962ll /* 0x3c2 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {963ll /* 0x3c3 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {964ll /* 0x3c4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {4, 3, 4}}			/* shl2add r5, r3, r4 */
+   },
+  {965ll /* 0x3c5 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {966ll /* 0x3c6 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 3, 2},			/* shl1add r4, r3, r2 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {967ll /* 0x3c7 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {968ll /* 0x3c8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {5, 3, 4}}			/* shl3add r5, r3, r4 */
+   },
+  {969ll /* 0x3c9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* shl1add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {970ll /* 0x3ca */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {971ll /* 0x3cb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 2},			/* shl2add r3, r2, r2 */
+    {5, 3, 1},			/* shl3add r4, r3, r1 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {972ll /* 0x3cc */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {973ll /* 0x3cd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {5, 3, 3},			/* shl3add r4, r3, r3 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {975ll /* 0x3cf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {976ll /* 0x3d0 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {979ll /* 0x3d3 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {980ll /* 0x3d4 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3},			/* shl1add r4, r2, r3 */
+    {3, 4, 3}}			/* shl1add r5, r4, r3 */
+   },
+  {981ll /* 0x3d5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {4, 3, 1},			/* shl2add r4, r3, r1 */
+    {5, 4, 4}}			/* shl3add r5, r4, r4 */
+   },
+  {983ll /* 0x3d7 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {984ll /* 0x3d8 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {985ll /* 0x3d9 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* shl3add r5, r4, r1 */
+   },
+  {987ll /* 0x3db */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {988ll /* 0x3dc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {989ll /* 0x3dd */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {990ll /* 0x3de */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {991ll /* 0x3df */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {992ll /* 0x3e0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {993ll /* 0x3e1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {994ll /* 0x3e2 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* shl1add r5, r3, r4 */
+   },
+  {996ll /* 0x3e4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {997ll /* 0x3e5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* shl3add r4, r2, r1 */
+    {4, 4, 3}}			/* shl2add r5, r4, r3 */
+   },
+  {999ll /* 0x3e7 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1000ll /* 0x3e8 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {1001ll /* 0x3e9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* shl2add r4, r2, r1 */
+    {5, 4, 3}}			/* shl3add r5, r4, r3 */
+   },
+  {1003ll /* 0x3eb */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {1004ll /* 0x3ec */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {1005ll /* 0x3ed */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {1006ll /* 0x3ee */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1007ll /* 0x3ef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1008ll /* 0x3f0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1009ll /* 0x3f1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {1010ll /* 0x3f2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* shl3add r5, r4, r2 */
+   },
+  {1011ll /* 0x3f3 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1012ll /* 0x3f4 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {1013ll /* 0x3f5 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* shl2add r5, r4, r1 */
+   },
+  {1014ll /* 0x3f6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1015ll /* 0x3f7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1016ll /* 0x3f8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1017ll /* 0x3f9 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1018ll /* 0x3fa */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {1019ll /* 0x3fb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1020ll /* 0x3fc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1021ll /* 0x3fd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1022ll /* 0x3fe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1023ll /* 0x3ff */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1024ll /* 0x400 */ ,
+   {{6, 1, 10}}			/* shli r2, r1, 10 */
+   },
+  {1025ll /* 0x401 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1026ll /* 0x402 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {1027ll /* 0x403 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1028ll /* 0x404 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {1029ll /* 0x405 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1030ll /* 0x406 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1031ll /* 0x407 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {1032ll /* 0x408 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {1033ll /* 0x409 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1034ll /* 0x40a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1036ll /* 0x40c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {1040ll /* 0x410 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1041ll /* 0x411 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1042ll /* 0x412 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1044ll /* 0x414 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {1048ll /* 0x418 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {1056ll /* 0x420 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1057ll /* 0x421 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* shl2add r3, r1, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1060ll /* 0x424 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {1064ll /* 0x428 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {1088ll /* 0x440 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1089ll /* 0x441 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* shl3add r3, r1, r2 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1090ll /* 0x442 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {3, 3, 2}}			/* shl1add r4, r3, r2 */
+   },
+  {1092ll /* 0x444 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {1096ll /* 0x448 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {1152ll /* 0x480 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {1280ll /* 0x500 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {1312ll /* 0x520 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {1320ll /* 0x528 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {1344ll /* 0x540 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {1536ll /* 0x600 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {1664ll /* 0x680 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 7}}			/* shli r4, r3, 7 */
+   },
+  {2039ll /* 0x7f7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2040ll /* 0x7f8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2041ll /* 0x7f9 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {2043ll /* 0x7fb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2044ll /* 0x7fc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2045ll /* 0x7fd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2046ll /* 0x7fe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2047ll /* 0x7ff */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2048ll /* 0x800 */ ,
+   {{6, 1, 11}}			/* shli r2, r1, 11 */
+   },
+  {2049ll /* 0x801 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2050ll /* 0x802 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {2051ll /* 0x803 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2052ll /* 0x804 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {2053ll /* 0x805 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2054ll /* 0x806 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2055ll /* 0x807 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {2056ll /* 0x808 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {2057ll /* 0x809 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2058ll /* 0x80a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2064ll /* 0x810 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2080ll /* 0x820 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2112ll /* 0x840 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2176ll /* 0x880 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2184ll /* 0x888 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {2304ll /* 0x900 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {2320ll /* 0x910 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {5, 3, 2}}			/* shl3add r4, r3, r2 */
+   },
+  {2560ll /* 0xa00 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {2592ll /* 0xa20 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {2880ll /* 0xb40 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {3072ll /* 0xc00 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {3328ll /* 0xd00 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 8}}			/* shli r4, r3, 8 */
+   },
+  {4087ll /* 0xff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4088ll /* 0xff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4089ll /* 0xff9 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {4091ll /* 0xffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4092ll /* 0xffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4093ll /* 0xffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4094ll /* 0xffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4095ll /* 0xfff */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4096ll /* 0x1000 */ ,
+   {{6, 1, 12}}			/* shli r2, r1, 12 */
+   },
+  {4097ll /* 0x1001 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4098ll /* 0x1002 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {4099ll /* 0x1003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4100ll /* 0x1004 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {4101ll /* 0x1005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4102ll /* 0x1006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4103ll /* 0x1007 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {4104ll /* 0x1008 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {4105ll /* 0x1009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4106ll /* 0x100a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4108ll /* 0x100c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {4112ll /* 0x1010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4128ll /* 0x1020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4160ll /* 0x1040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4224ll /* 0x1080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4352ll /* 0x1100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4608ll /* 0x1200 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {4864ll /* 0x1300 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {3, 2, 1},			/* shl1add r3, r2, r1 */
+    {6, 3, 8}}			/* shli r4, r3, 8 */
+   },
+  {5120ll /* 0x1400 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {6144ll /* 0x1800 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {6400ll /* 0x1900 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 8}}			/* shli r4, r3, 8 */
+   },
+  {6656ll /* 0x1a00 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {4, 2, 1},			/* shl2add r3, r2, r1 */
+    {6, 3, 9}}			/* shli r4, r3, 9 */
+   },
+  {7169ll /* 0x1c01 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* shl3add r4, r2, r3 */
+   },
+  {8183ll /* 0x1ff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8184ll /* 0x1ff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8185ll /* 0x1ff9 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {8187ll /* 0x1ffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8188ll /* 0x1ffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8189ll /* 0x1ffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8190ll /* 0x1ffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8191ll /* 0x1fff */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8192ll /* 0x2000 */ ,
+   {{6, 1, 13}}			/* shli r2, r1, 13 */
+   },
+  {8193ll /* 0x2001 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {8194ll /* 0x2002 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {8195ll /* 0x2003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8196ll /* 0x2004 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {8197ll /* 0x2005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8198ll /* 0x2006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8199ll /* 0x2007 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {8200ll /* 0x2008 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {8201ll /* 0x2009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8202ll /* 0x200a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8204ll /* 0x200c */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {4, 2, 3}}			/* shl2add r4, r2, r3 */
+   },
+  {8208ll /* 0x2010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8224ll /* 0x2020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8256ll /* 0x2040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8320ll /* 0x2080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8448ll /* 0x2100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8704ll /* 0x2200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9216ll /* 0x2400 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {9217ll /* 0x2401 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {10240ll /* 0x2800 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {10248ll /* 0x2808 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {3, 1, 2},			/* shl1add r3, r1, r2 */
+    {4, 3, 2}}			/* shl2add r4, r3, r2 */
+   },
+  {10368ll /* 0x2880 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 7}}			/* shli r4, r3, 7 */
+   },
+  {12288ll /* 0x3000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {16375ll /* 0x3ff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16376ll /* 0x3ff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16377ll /* 0x3ff9 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {16379ll /* 0x3ffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16380ll /* 0x3ffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16381ll /* 0x3ffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16382ll /* 0x3ffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16383ll /* 0x3fff */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {16384ll /* 0x4000 */ ,
+   {{6, 1, 14}}			/* shli r2, r1, 14 */
+   },
+  {16385ll /* 0x4001 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {16386ll /* 0x4002 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {16387ll /* 0x4003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16388ll /* 0x4004 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {16389ll /* 0x4005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16390ll /* 0x4006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {16391ll /* 0x4007 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {16392ll /* 0x4008 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {16393ll /* 0x4009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16394ll /* 0x400a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {16400ll /* 0x4010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16416ll /* 0x4020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16448ll /* 0x4040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16512ll /* 0x4080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16640ll /* 0x4100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16896ll /* 0x4200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17408ll /* 0x4400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18432ll /* 0x4800 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {20480ll /* 0x5000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {24576ll /* 0x6000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {25600ll /* 0x6400 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {5, 2, 1},			/* shl3add r3, r2, r1 */
+    {6, 3, 10}}			/* shli r4, r3, 10 */
+   },
+  {32759ll /* 0x7ff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32760ll /* 0x7ff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32761ll /* 0x7ff9 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {32763ll /* 0x7ffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32764ll /* 0x7ffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32765ll /* 0x7ffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32766ll /* 0x7ffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32767ll /* 0x7fff */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {32768ll /* 0x8000 */ ,
+   {{6, 1, 15}}			/* shli r2, r1, 15 */
+   },
+  {32769ll /* 0x8001 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {32770ll /* 0x8002 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {32771ll /* 0x8003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32772ll /* 0x8004 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {32773ll /* 0x8005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32774ll /* 0x8006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {32775ll /* 0x8007 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {32776ll /* 0x8008 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {32777ll /* 0x8009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32778ll /* 0x800a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {32784ll /* 0x8010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32800ll /* 0x8020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32832ll /* 0x8040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32896ll /* 0x8080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33024ll /* 0x8100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33280ll /* 0x8200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33792ll /* 0x8400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34816ll /* 0x8800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36864ll /* 0x9000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {40960ll /* 0xa000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {49152ll /* 0xc000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {65527ll /* 0xfff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65528ll /* 0xfff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65529ll /* 0xfff9 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {65531ll /* 0xfffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65532ll /* 0xfffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65533ll /* 0xfffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65534ll /* 0xfffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65535ll /* 0xffff */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {65536ll /* 0x10000 */ ,
+   {{6, 1, 16}}			/* shli r2, r1, 16 */
+   },
+  {65537ll /* 0x10001 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {65538ll /* 0x10002 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {65539ll /* 0x10003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65540ll /* 0x10004 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {65541ll /* 0x10005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65542ll /* 0x10006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {65543ll /* 0x10007 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {65544ll /* 0x10008 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {65545ll /* 0x10009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65546ll /* 0x1000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {65552ll /* 0x10010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65568ll /* 0x10020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65600ll /* 0x10040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65664ll /* 0x10080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65792ll /* 0x10100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {66048ll /* 0x10200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {66560ll /* 0x10400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67584ll /* 0x10800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {69632ll /* 0x11000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {73728ll /* 0x12000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {81920ll /* 0x14000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {98304ll /* 0x18000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {131063ll /* 0x1fff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131064ll /* 0x1fff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131065ll /* 0x1fff9 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {131067ll /* 0x1fffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131068ll /* 0x1fffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131069ll /* 0x1fffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131070ll /* 0x1fffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131071ll /* 0x1ffff */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {131072ll /* 0x20000 */ ,
+   {{6, 1, 17}}			/* shli r2, r1, 17 */
+   },
+  {131073ll /* 0x20001 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {131074ll /* 0x20002 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {131075ll /* 0x20003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131076ll /* 0x20004 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {131077ll /* 0x20005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131078ll /* 0x20006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {131079ll /* 0x20007 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {131080ll /* 0x20008 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {131081ll /* 0x20009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131082ll /* 0x2000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {131088ll /* 0x20010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131104ll /* 0x20020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131136ll /* 0x20040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131200ll /* 0x20080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131328ll /* 0x20100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131584ll /* 0x20200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {132096ll /* 0x20400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {133120ll /* 0x20800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {135168ll /* 0x21000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {139264ll /* 0x22000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {147456ll /* 0x24000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {163840ll /* 0x28000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {196608ll /* 0x30000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {262135ll /* 0x3fff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262136ll /* 0x3fff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262137ll /* 0x3fff9 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {262139ll /* 0x3fffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262140ll /* 0x3fffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262141ll /* 0x3fffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262142ll /* 0x3fffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262143ll /* 0x3ffff */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {262144ll /* 0x40000 */ ,
+   {{6, 1, 18}}			/* shli r2, r1, 18 */
+   },
+  {262145ll /* 0x40001 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {262146ll /* 0x40002 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {262147ll /* 0x40003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262148ll /* 0x40004 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {262149ll /* 0x40005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262150ll /* 0x40006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {262151ll /* 0x40007 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {262152ll /* 0x40008 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {262153ll /* 0x40009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262154ll /* 0x4000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {262160ll /* 0x40010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262176ll /* 0x40020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262208ll /* 0x40040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262272ll /* 0x40080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262400ll /* 0x40100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262656ll /* 0x40200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {263168ll /* 0x40400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {264192ll /* 0x40800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {266240ll /* 0x41000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {270336ll /* 0x42000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {278528ll /* 0x44000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {294912ll /* 0x48000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {327680ll /* 0x50000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {393216ll /* 0x60000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {524279ll /* 0x7fff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524280ll /* 0x7fff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524281ll /* 0x7fff9 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {524283ll /* 0x7fffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524284ll /* 0x7fffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524285ll /* 0x7fffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524286ll /* 0x7fffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524287ll /* 0x7ffff */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {524288ll /* 0x80000 */ ,
+   {{6, 1, 19}}			/* shli r2, r1, 19 */
+   },
+  {524289ll /* 0x80001 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {524290ll /* 0x80002 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {524291ll /* 0x80003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524292ll /* 0x80004 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {524293ll /* 0x80005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524294ll /* 0x80006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {524295ll /* 0x80007 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {524296ll /* 0x80008 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {524297ll /* 0x80009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524298ll /* 0x8000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {524304ll /* 0x80010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524320ll /* 0x80020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524352ll /* 0x80040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524416ll /* 0x80080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524544ll /* 0x80100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524800ll /* 0x80200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {525312ll /* 0x80400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {526336ll /* 0x80800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {528384ll /* 0x81000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {532480ll /* 0x82000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {540672ll /* 0x84000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {557056ll /* 0x88000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {589824ll /* 0x90000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {655360ll /* 0xa0000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {786432ll /* 0xc0000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {1048567ll /* 0xffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048568ll /* 0xffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048569ll /* 0xffff9 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1048571ll /* 0xffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048572ll /* 0xffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048573ll /* 0xffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048574ll /* 0xffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048575ll /* 0xfffff */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1048576ll /* 0x100000 */ ,
+   {{6, 1, 20}}			/* shli r2, r1, 20 */
+   },
+  {1048577ll /* 0x100001 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1048578ll /* 0x100002 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {1048579ll /* 0x100003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048580ll /* 0x100004 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {1048581ll /* 0x100005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048582ll /* 0x100006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1048583ll /* 0x100007 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {1048584ll /* 0x100008 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {1048585ll /* 0x100009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048586ll /* 0x10000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1048592ll /* 0x100010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048608ll /* 0x100020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048640ll /* 0x100040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048704ll /* 0x100080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048832ll /* 0x100100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1049088ll /* 0x100200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1049600ll /* 0x100400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1050624ll /* 0x100800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1052672ll /* 0x101000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1056768ll /* 0x102000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1064960ll /* 0x104000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1081344ll /* 0x108000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1114112ll /* 0x110000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1179648ll /* 0x120000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {1310720ll /* 0x140000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {1572864ll /* 0x180000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {2097143ll /* 0x1ffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097144ll /* 0x1ffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097145ll /* 0x1ffff9 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {2097147ll /* 0x1ffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097148ll /* 0x1ffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097149ll /* 0x1ffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097150ll /* 0x1ffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097151ll /* 0x1fffff */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2097152ll /* 0x200000 */ ,
+   {{6, 1, 21}}			/* shli r2, r1, 21 */
+   },
+  {2097153ll /* 0x200001 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2097154ll /* 0x200002 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {2097155ll /* 0x200003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097156ll /* 0x200004 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {2097157ll /* 0x200005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097158ll /* 0x200006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2097159ll /* 0x200007 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {2097160ll /* 0x200008 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {2097161ll /* 0x200009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097162ll /* 0x20000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2097168ll /* 0x200010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097184ll /* 0x200020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097216ll /* 0x200040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097280ll /* 0x200080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097408ll /* 0x200100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097664ll /* 0x200200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2098176ll /* 0x200400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2099200ll /* 0x200800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2101248ll /* 0x201000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2105344ll /* 0x202000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2113536ll /* 0x204000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2129920ll /* 0x208000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2162688ll /* 0x210000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2228224ll /* 0x220000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2359296ll /* 0x240000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {2621440ll /* 0x280000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {3145728ll /* 0x300000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {4194295ll /* 0x3ffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194296ll /* 0x3ffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194297ll /* 0x3ffff9 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {4194299ll /* 0x3ffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194300ll /* 0x3ffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194301ll /* 0x3ffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194302ll /* 0x3ffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194303ll /* 0x3fffff */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4194304ll /* 0x400000 */ ,
+   {{6, 1, 22}}			/* shli r2, r1, 22 */
+   },
+  {4194305ll /* 0x400001 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4194306ll /* 0x400002 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {4194307ll /* 0x400003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194308ll /* 0x400004 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {4194309ll /* 0x400005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194310ll /* 0x400006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4194311ll /* 0x400007 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {4194312ll /* 0x400008 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {4194313ll /* 0x400009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194314ll /* 0x40000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4194320ll /* 0x400010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194336ll /* 0x400020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194368ll /* 0x400040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194432ll /* 0x400080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194560ll /* 0x400100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194816ll /* 0x400200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4195328ll /* 0x400400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4196352ll /* 0x400800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4198400ll /* 0x401000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4202496ll /* 0x402000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4210688ll /* 0x404000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4227072ll /* 0x408000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4259840ll /* 0x410000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4325376ll /* 0x420000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4456448ll /* 0x440000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4718592ll /* 0x480000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {5242880ll /* 0x500000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {6291456ll /* 0x600000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {8388599ll /* 0x7ffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388600ll /* 0x7ffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388601ll /* 0x7ffff9 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {8388603ll /* 0x7ffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388604ll /* 0x7ffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388605ll /* 0x7ffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388606ll /* 0x7ffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388607ll /* 0x7fffff */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8388608ll /* 0x800000 */ ,
+   {{6, 1, 23}}			/* shli r2, r1, 23 */
+   },
+  {8388609ll /* 0x800001 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {8388610ll /* 0x800002 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {8388611ll /* 0x800003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388612ll /* 0x800004 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {8388613ll /* 0x800005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388614ll /* 0x800006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8388615ll /* 0x800007 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {8388616ll /* 0x800008 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {8388617ll /* 0x800009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388618ll /* 0x80000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8388624ll /* 0x800010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388640ll /* 0x800020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388672ll /* 0x800040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388736ll /* 0x800080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388864ll /* 0x800100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8389120ll /* 0x800200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8389632ll /* 0x800400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8390656ll /* 0x800800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8392704ll /* 0x801000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8396800ll /* 0x802000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8404992ll /* 0x804000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8421376ll /* 0x808000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8454144ll /* 0x810000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8519680ll /* 0x820000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8650752ll /* 0x840000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8912896ll /* 0x880000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9437184ll /* 0x900000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {10485760ll /* 0xa00000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {11796480ll /* 0xb40000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 18}}			/* shli r4, r3, 18 */
+   },
+  {12582912ll /* 0xc00000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {16777207ll /* 0xfffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777208ll /* 0xfffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777209ll /* 0xfffff9 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {16777211ll /* 0xfffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777212ll /* 0xfffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777213ll /* 0xfffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777214ll /* 0xfffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777215ll /* 0xffffff */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {16777216ll /* 0x1000000 */ ,
+   {{6, 1, 24}}			/* shli r2, r1, 24 */
+   },
+  {16777217ll /* 0x1000001 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {16777218ll /* 0x1000002 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {16777219ll /* 0x1000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777220ll /* 0x1000004 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {16777221ll /* 0x1000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777222ll /* 0x1000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {16777223ll /* 0x1000007 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {16777224ll /* 0x1000008 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {16777225ll /* 0x1000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777226ll /* 0x100000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {16777232ll /* 0x1000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777248ll /* 0x1000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777280ll /* 0x1000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777344ll /* 0x1000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777472ll /* 0x1000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777728ll /* 0x1000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16778240ll /* 0x1000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16779264ll /* 0x1000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16781312ll /* 0x1001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16785408ll /* 0x1002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16793600ll /* 0x1004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16809984ll /* 0x1008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16842752ll /* 0x1010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16908288ll /* 0x1020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17039360ll /* 0x1040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17301504ll /* 0x1080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17825792ll /* 0x1100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18874368ll /* 0x1200000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {20971520ll /* 0x1400000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {23592960ll /* 0x1680000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {5, 2, 2},			/* shl3add r3, r2, r2 */
+    {6, 3, 19}}			/* shli r4, r3, 19 */
+   },
+  {25165824ll /* 0x1800000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {33554423ll /* 0x1fffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554424ll /* 0x1fffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554425ll /* 0x1fffff9 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {33554427ll /* 0x1fffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554428ll /* 0x1fffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554429ll /* 0x1fffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554430ll /* 0x1fffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554431ll /* 0x1ffffff */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {33554432ll /* 0x2000000 */ ,
+   {{6, 1, 25}}			/* shli r2, r1, 25 */
+   },
+  {33554433ll /* 0x2000001 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {33554434ll /* 0x2000002 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {33554435ll /* 0x2000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554436ll /* 0x2000004 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {33554437ll /* 0x2000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554438ll /* 0x2000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {33554439ll /* 0x2000007 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {33554440ll /* 0x2000008 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {33554441ll /* 0x2000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554442ll /* 0x200000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {33554448ll /* 0x2000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554464ll /* 0x2000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554496ll /* 0x2000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554560ll /* 0x2000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554688ll /* 0x2000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554944ll /* 0x2000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33555456ll /* 0x2000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33556480ll /* 0x2000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33558528ll /* 0x2001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33562624ll /* 0x2002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33570816ll /* 0x2004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33587200ll /* 0x2008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33619968ll /* 0x2010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33685504ll /* 0x2020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33816576ll /* 0x2040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34078720ll /* 0x2080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34603008ll /* 0x2100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35651584ll /* 0x2200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {37748736ll /* 0x2400000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {41943040ll /* 0x2800000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {50331648ll /* 0x3000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {67108855ll /* 0x3fffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108856ll /* 0x3fffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108857ll /* 0x3fffff9 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {67108859ll /* 0x3fffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108860ll /* 0x3fffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108861ll /* 0x3fffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108862ll /* 0x3fffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108863ll /* 0x3ffffff */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {67108864ll /* 0x4000000 */ ,
+   {{6, 1, 26}}			/* shli r2, r1, 26 */
+   },
+  {67108865ll /* 0x4000001 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {67108866ll /* 0x4000002 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {67108867ll /* 0x4000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108868ll /* 0x4000004 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {67108869ll /* 0x4000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108870ll /* 0x4000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {67108871ll /* 0x4000007 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {67108872ll /* 0x4000008 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {67108873ll /* 0x4000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108874ll /* 0x400000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {67108880ll /* 0x4000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108896ll /* 0x4000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108928ll /* 0x4000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108992ll /* 0x4000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67109120ll /* 0x4000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67109376ll /* 0x4000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67109888ll /* 0x4000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67110912ll /* 0x4000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67112960ll /* 0x4001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67117056ll /* 0x4002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67125248ll /* 0x4004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67141632ll /* 0x4008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67174400ll /* 0x4010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67239936ll /* 0x4020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67371008ll /* 0x4040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67633152ll /* 0x4080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68157440ll /* 0x4100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {69206016ll /* 0x4200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {71303168ll /* 0x4400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {75497472ll /* 0x4800000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {83886080ll /* 0x5000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {100663296ll /* 0x6000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {134217719ll /* 0x7fffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217720ll /* 0x7fffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217721ll /* 0x7fffff9 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {134217723ll /* 0x7fffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217724ll /* 0x7fffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217725ll /* 0x7fffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217726ll /* 0x7fffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217727ll /* 0x7ffffff */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {134217728ll /* 0x8000000 */ ,
+   {{6, 1, 27}}			/* shli r2, r1, 27 */
+   },
+  {134217729ll /* 0x8000001 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {134217730ll /* 0x8000002 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {134217731ll /* 0x8000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217732ll /* 0x8000004 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {134217733ll /* 0x8000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217734ll /* 0x8000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {134217735ll /* 0x8000007 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {134217736ll /* 0x8000008 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {134217737ll /* 0x8000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217738ll /* 0x800000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {134217744ll /* 0x8000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217760ll /* 0x8000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217792ll /* 0x8000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217856ll /* 0x8000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217984ll /* 0x8000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134218240ll /* 0x8000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134218752ll /* 0x8000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134219776ll /* 0x8000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134221824ll /* 0x8001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134225920ll /* 0x8002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134234112ll /* 0x8004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134250496ll /* 0x8008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134283264ll /* 0x8010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134348800ll /* 0x8020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134479872ll /* 0x8040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134742016ll /* 0x8080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {135266304ll /* 0x8100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {136314880ll /* 0x8200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {138412032ll /* 0x8400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {142606336ll /* 0x8800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {150994944ll /* 0x9000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {167772160ll /* 0xa000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {201326592ll /* 0xc000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {268435447ll /* 0xffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435448ll /* 0xffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435449ll /* 0xffffff9 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {268435451ll /* 0xffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435452ll /* 0xffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435453ll /* 0xffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435454ll /* 0xffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435455ll /* 0xfffffff */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {268435456ll /* 0x10000000 */ ,
+   {{6, 1, 28}}			/* shli r2, r1, 28 */
+   },
+  {268435457ll /* 0x10000001 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {268435458ll /* 0x10000002 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {268435459ll /* 0x10000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435460ll /* 0x10000004 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {268435461ll /* 0x10000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435462ll /* 0x10000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {268435463ll /* 0x10000007 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {268435464ll /* 0x10000008 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {268435465ll /* 0x10000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435466ll /* 0x1000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {268435472ll /* 0x10000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435488ll /* 0x10000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435520ll /* 0x10000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435584ll /* 0x10000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435712ll /* 0x10000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435968ll /* 0x10000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268436480ll /* 0x10000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268437504ll /* 0x10000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268439552ll /* 0x10001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268443648ll /* 0x10002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268451840ll /* 0x10004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268468224ll /* 0x10008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268500992ll /* 0x10010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268566528ll /* 0x10020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268697600ll /* 0x10040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268959744ll /* 0x10080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {269484032ll /* 0x10100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {270532608ll /* 0x10200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {272629760ll /* 0x10400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {276824064ll /* 0x10800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {285212672ll /* 0x11000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {301989888ll /* 0x12000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {335544320ll /* 0x14000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {402653184ll /* 0x18000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {536870903ll /* 0x1ffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870904ll /* 0x1ffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870905ll /* 0x1ffffff9 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {536870907ll /* 0x1ffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870908ll /* 0x1ffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870909ll /* 0x1ffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870910ll /* 0x1ffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870911ll /* 0x1fffffff */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {536870912ll /* 0x20000000 */ ,
+   {{6, 1, 29}}			/* shli r2, r1, 29 */
+   },
+  {536870913ll /* 0x20000001 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {536870914ll /* 0x20000002 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {536870915ll /* 0x20000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870916ll /* 0x20000004 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {536870917ll /* 0x20000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870918ll /* 0x20000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {536870919ll /* 0x20000007 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {536870920ll /* 0x20000008 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {536870921ll /* 0x20000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870922ll /* 0x2000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {536870928ll /* 0x20000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870944ll /* 0x20000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870976ll /* 0x20000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871040ll /* 0x20000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871168ll /* 0x20000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871424ll /* 0x20000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871936ll /* 0x20000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536872960ll /* 0x20000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536875008ll /* 0x20001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536879104ll /* 0x20002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536887296ll /* 0x20004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536903680ll /* 0x20008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536936448ll /* 0x20010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537001984ll /* 0x20020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537133056ll /* 0x20040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537395200ll /* 0x20080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537919488ll /* 0x20100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {538968064ll /* 0x20200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {541065216ll /* 0x20400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {545259520ll /* 0x20800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {553648128ll /* 0x21000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {570425344ll /* 0x22000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {603979776ll /* 0x24000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {671088640ll /* 0x28000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {805306368ll /* 0x30000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {1073741815ll /* 0x3ffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741816ll /* 0x3ffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741817ll /* 0x3ffffff9 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1073741819ll /* 0x3ffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741820ll /* 0x3ffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741821ll /* 0x3ffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741822ll /* 0x3ffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741823ll /* 0x3fffffff */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1073741824ll /* 0x40000000 */ ,
+   {{6, 1, 30}}			/* shli r2, r1, 30 */
+   },
+  {1073741825ll /* 0x40000001 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1073741826ll /* 0x40000002 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {1073741827ll /* 0x40000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741828ll /* 0x40000004 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {1073741829ll /* 0x40000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741830ll /* 0x40000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1073741831ll /* 0x40000007 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {1073741832ll /* 0x40000008 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {1073741833ll /* 0x40000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741834ll /* 0x4000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1073741840ll /* 0x40000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741856ll /* 0x40000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741888ll /* 0x40000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741952ll /* 0x40000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073742080ll /* 0x40000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073742336ll /* 0x40000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073742848ll /* 0x40000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073743872ll /* 0x40000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073745920ll /* 0x40001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073750016ll /* 0x40002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073758208ll /* 0x40004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073774592ll /* 0x40008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073807360ll /* 0x40010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073872896ll /* 0x40020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1074003968ll /* 0x40040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1074266112ll /* 0x40080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1074790400ll /* 0x40100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1075838976ll /* 0x40200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1077936128ll /* 0x40400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1082130432ll /* 0x40800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1090519040ll /* 0x41000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1107296256ll /* 0x42000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1140850688ll /* 0x44000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1207959552ll /* 0x48000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {1342177280ll /* 0x50000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {1610612736ll /* 0x60000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {2147483639ll /* 0x7ffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483640ll /* 0x7ffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483641ll /* 0x7ffffff9 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {2147483643ll /* 0x7ffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483644ll /* 0x7ffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483645ll /* 0x7ffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483646ll /* 0x7ffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483647ll /* 0x7fffffff */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2147483648ll /* 0x80000000 */ ,
+   {{6, 1, 31}}			/* shli r2, r1, 31 */
+   },
+  {2147483649ll /* 0x80000001 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2147483650ll /* 0x80000002 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {2147483651ll /* 0x80000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483652ll /* 0x80000004 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {2147483653ll /* 0x80000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483654ll /* 0x80000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2147483655ll /* 0x80000007 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {2147483656ll /* 0x80000008 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {2147483657ll /* 0x80000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483658ll /* 0x8000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2147483664ll /* 0x80000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483680ll /* 0x80000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483712ll /* 0x80000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483776ll /* 0x80000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147483904ll /* 0x80000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147484160ll /* 0x80000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147484672ll /* 0x80000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147485696ll /* 0x80000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147487744ll /* 0x80001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147491840ll /* 0x80002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147500032ll /* 0x80004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147516416ll /* 0x80008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147549184ll /* 0x80010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147614720ll /* 0x80020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2147745792ll /* 0x80040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2148007936ll /* 0x80080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2148532224ll /* 0x80100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2149580800ll /* 0x80200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2151677952ll /* 0x80400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2155872256ll /* 0x80800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2164260864ll /* 0x81000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2181038080ll /* 0x82000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2214592512ll /* 0x84000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2281701376ll /* 0x88000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2415919104ll /* 0x90000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {2684354560ll /* 0xa0000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {3221225472ll /* 0xc0000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 30}}			/* shli r3, r2, 30 */
+   },
+  {4294967287ll /* 0xfffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4294967288ll /* 0xfffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4294967289ll /* 0xfffffff9 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {4294967291ll /* 0xfffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4294967292ll /* 0xfffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4294967293ll /* 0xfffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4294967294ll /* 0xfffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4294967295ll /* 0xffffffff */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4294967296ll /* 0x100000000 */ ,
+   {{6, 1, 32}}			/* shli r2, r1, 32 */
+   },
+  {4294967297ll /* 0x100000001 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4294967298ll /* 0x100000002 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {4294967299ll /* 0x100000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967300ll /* 0x100000004 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {4294967301ll /* 0x100000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967302ll /* 0x100000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4294967303ll /* 0x100000007 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {4294967304ll /* 0x100000008 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {4294967305ll /* 0x100000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967306ll /* 0x10000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4294967312ll /* 0x100000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967328ll /* 0x100000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967360ll /* 0x100000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967424ll /* 0x100000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967552ll /* 0x100000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294967808ll /* 0x100000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294968320ll /* 0x100000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294969344ll /* 0x100000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294971392ll /* 0x100001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294975488ll /* 0x100002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4294983680ll /* 0x100004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4295000064ll /* 0x100008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4295032832ll /* 0x100010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4295098368ll /* 0x100020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4295229440ll /* 0x100040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4295491584ll /* 0x100080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4296015872ll /* 0x100100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4297064448ll /* 0x100200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4299161600ll /* 0x100400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4303355904ll /* 0x100800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4311744512ll /* 0x101000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4328521728ll /* 0x102000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4362076160ll /* 0x104000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4429185024ll /* 0x108000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4563402752ll /* 0x110000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 32},			/* shli r3, r1, 32 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4831838208ll /* 0x120000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {5368709120ll /* 0x140000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 30}}			/* shli r3, r2, 30 */
+   },
+  {6442450944ll /* 0x180000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 31}}			/* shli r3, r2, 31 */
+   },
+  {8589934583ll /* 0x1fffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8589934584ll /* 0x1fffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8589934585ll /* 0x1fffffff9 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {8589934587ll /* 0x1fffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8589934588ll /* 0x1fffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8589934589ll /* 0x1fffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8589934590ll /* 0x1fffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8589934591ll /* 0x1ffffffff */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8589934592ll /* 0x200000000 */ ,
+   {{6, 1, 33}}			/* shli r2, r1, 33 */
+   },
+  {8589934593ll /* 0x200000001 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {8589934594ll /* 0x200000002 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {8589934595ll /* 0x200000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934596ll /* 0x200000004 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {8589934597ll /* 0x200000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934598ll /* 0x200000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8589934599ll /* 0x200000007 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {8589934600ll /* 0x200000008 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {8589934601ll /* 0x200000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934602ll /* 0x20000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8589934608ll /* 0x200000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934624ll /* 0x200000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934656ll /* 0x200000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934720ll /* 0x200000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589934848ll /* 0x200000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589935104ll /* 0x200000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589935616ll /* 0x200000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589936640ll /* 0x200000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589938688ll /* 0x200001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589942784ll /* 0x200002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589950976ll /* 0x200004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8589967360ll /* 0x200008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8590000128ll /* 0x200010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8590065664ll /* 0x200020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8590196736ll /* 0x200040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8590458880ll /* 0x200080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8590983168ll /* 0x200100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8592031744ll /* 0x200200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8594128896ll /* 0x200400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8598323200ll /* 0x200800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8606711808ll /* 0x201000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8623489024ll /* 0x202000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8657043456ll /* 0x204000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8724152320ll /* 0x208000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8858370048ll /* 0x210000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9126805504ll /* 0x220000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 33},			/* shli r3, r1, 33 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9663676416ll /* 0x240000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 30}}			/* shli r3, r2, 30 */
+   },
+  {10737418240ll /* 0x280000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 31}}			/* shli r3, r2, 31 */
+   },
+  {12884901888ll /* 0x300000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 32}}			/* shli r3, r2, 32 */
+   },
+  {17179869175ll /* 0x3fffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17179869176ll /* 0x3fffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17179869177ll /* 0x3fffffff9 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {17179869179ll /* 0x3fffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17179869180ll /* 0x3fffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17179869181ll /* 0x3fffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17179869182ll /* 0x3fffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17179869183ll /* 0x3ffffffff */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {17179869184ll /* 0x400000000 */ ,
+   {{6, 1, 34}}			/* shli r2, r1, 34 */
+   },
+  {17179869185ll /* 0x400000001 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {17179869186ll /* 0x400000002 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {17179869187ll /* 0x400000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869188ll /* 0x400000004 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {17179869189ll /* 0x400000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869190ll /* 0x400000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {17179869191ll /* 0x400000007 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {17179869192ll /* 0x400000008 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {17179869193ll /* 0x400000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869194ll /* 0x40000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {17179869200ll /* 0x400000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869216ll /* 0x400000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869248ll /* 0x400000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869312ll /* 0x400000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869440ll /* 0x400000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179869696ll /* 0x400000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179870208ll /* 0x400000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179871232ll /* 0x400000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179873280ll /* 0x400001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179877376ll /* 0x400002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179885568ll /* 0x400004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179901952ll /* 0x400008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17179934720ll /* 0x400010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17180000256ll /* 0x400020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17180131328ll /* 0x400040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17180393472ll /* 0x400080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17180917760ll /* 0x400100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17181966336ll /* 0x400200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17184063488ll /* 0x400400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17188257792ll /* 0x400800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17196646400ll /* 0x401000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17213423616ll /* 0x402000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17246978048ll /* 0x404000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17314086912ll /* 0x408000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17448304640ll /* 0x410000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17716740096ll /* 0x420000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18253611008ll /* 0x440000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 34},			/* shli r3, r1, 34 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {19327352832ll /* 0x480000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 31}}			/* shli r3, r2, 31 */
+   },
+  {21474836480ll /* 0x500000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 32}}			/* shli r3, r2, 32 */
+   },
+  {25769803776ll /* 0x600000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 33}}			/* shli r3, r2, 33 */
+   },
+  {34359738359ll /* 0x7fffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {34359738360ll /* 0x7fffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {34359738361ll /* 0x7fffffff9 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {34359738363ll /* 0x7fffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {34359738364ll /* 0x7fffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {34359738365ll /* 0x7fffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {34359738366ll /* 0x7fffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {34359738367ll /* 0x7ffffffff */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {34359738368ll /* 0x800000000 */ ,
+   {{6, 1, 35}}			/* shli r2, r1, 35 */
+   },
+  {34359738369ll /* 0x800000001 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {34359738370ll /* 0x800000002 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {34359738371ll /* 0x800000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738372ll /* 0x800000004 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {34359738373ll /* 0x800000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738374ll /* 0x800000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {34359738375ll /* 0x800000007 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {34359738376ll /* 0x800000008 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {34359738377ll /* 0x800000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738378ll /* 0x80000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {34359738384ll /* 0x800000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738400ll /* 0x800000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738432ll /* 0x800000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738496ll /* 0x800000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738624ll /* 0x800000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359738880ll /* 0x800000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359739392ll /* 0x800000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359740416ll /* 0x800000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359742464ll /* 0x800001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359746560ll /* 0x800002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359754752ll /* 0x800004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359771136ll /* 0x800008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359803904ll /* 0x800010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34359869440ll /* 0x800020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34360000512ll /* 0x800040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34360262656ll /* 0x800080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34360786944ll /* 0x800100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34361835520ll /* 0x800200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34363932672ll /* 0x800400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34368126976ll /* 0x800800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34376515584ll /* 0x801000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34393292800ll /* 0x802000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34426847232ll /* 0x804000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34493956096ll /* 0x808000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34628173824ll /* 0x810000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34896609280ll /* 0x820000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35433480192ll /* 0x840000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36507222016ll /* 0x880000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 35},			/* shli r3, r1, 35 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {38654705664ll /* 0x900000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 32}}			/* shli r3, r2, 32 */
+   },
+  {42949672960ll /* 0xa00000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 33}}			/* shli r3, r2, 33 */
+   },
+  {51539607552ll /* 0xc00000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 34}}			/* shli r3, r2, 34 */
+   },
+  {68719476727ll /* 0xffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {68719476728ll /* 0xffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {68719476729ll /* 0xffffffff9 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {68719476731ll /* 0xffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {68719476732ll /* 0xffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {68719476733ll /* 0xffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {68719476734ll /* 0xffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {68719476735ll /* 0xfffffffff */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {68719476736ll /* 0x1000000000 */ ,
+   {{6, 1, 36}}			/* shli r2, r1, 36 */
+   },
+  {68719476737ll /* 0x1000000001 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {68719476738ll /* 0x1000000002 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {68719476739ll /* 0x1000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476740ll /* 0x1000000004 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {68719476741ll /* 0x1000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476742ll /* 0x1000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {68719476743ll /* 0x1000000007 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {68719476744ll /* 0x1000000008 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {68719476745ll /* 0x1000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476746ll /* 0x100000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {68719476752ll /* 0x1000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476768ll /* 0x1000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476800ll /* 0x1000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476864ll /* 0x1000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719476992ll /* 0x1000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719477248ll /* 0x1000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719477760ll /* 0x1000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719478784ll /* 0x1000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719480832ll /* 0x1000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719484928ll /* 0x1000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719493120ll /* 0x1000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719509504ll /* 0x1000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719542272ll /* 0x1000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719607808ll /* 0x1000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68719738880ll /* 0x1000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68720001024ll /* 0x1000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68720525312ll /* 0x1000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68721573888ll /* 0x1000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68723671040ll /* 0x1000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68727865344ll /* 0x1000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68736253952ll /* 0x1001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68753031168ll /* 0x1002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68786585600ll /* 0x1004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68853694464ll /* 0x1008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68987912192ll /* 0x1010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {69256347648ll /* 0x1020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {69793218560ll /* 0x1040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70866960384ll /* 0x1080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {73014444032ll /* 0x1100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 36},			/* shli r3, r1, 36 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {77309411328ll /* 0x1200000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 33}}			/* shli r3, r2, 33 */
+   },
+  {85899345920ll /* 0x1400000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 34}}			/* shli r3, r2, 34 */
+   },
+  {103079215104ll /* 0x1800000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 35}}			/* shli r3, r2, 35 */
+   },
+  {137438953463ll /* 0x1ffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {137438953464ll /* 0x1ffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {137438953465ll /* 0x1ffffffff9 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {137438953467ll /* 0x1ffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {137438953468ll /* 0x1ffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {137438953469ll /* 0x1ffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {137438953470ll /* 0x1ffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {137438953471ll /* 0x1fffffffff */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {137438953472ll /* 0x2000000000 */ ,
+   {{6, 1, 37}}			/* shli r2, r1, 37 */
+   },
+  {137438953473ll /* 0x2000000001 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {137438953474ll /* 0x2000000002 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {137438953475ll /* 0x2000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953476ll /* 0x2000000004 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {137438953477ll /* 0x2000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953478ll /* 0x2000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {137438953479ll /* 0x2000000007 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {137438953480ll /* 0x2000000008 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {137438953481ll /* 0x2000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953482ll /* 0x200000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {137438953488ll /* 0x2000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953504ll /* 0x2000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953536ll /* 0x2000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953600ll /* 0x2000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953728ll /* 0x2000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438953984ll /* 0x2000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438954496ll /* 0x2000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438955520ll /* 0x2000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438957568ll /* 0x2000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438961664ll /* 0x2000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438969856ll /* 0x2000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137438986240ll /* 0x2000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137439019008ll /* 0x2000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137439084544ll /* 0x2000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137439215616ll /* 0x2000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137439477760ll /* 0x2000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137440002048ll /* 0x2000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137441050624ll /* 0x2000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137443147776ll /* 0x2000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137447342080ll /* 0x2000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137455730688ll /* 0x2001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137472507904ll /* 0x2002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137506062336ll /* 0x2004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137573171200ll /* 0x2008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137707388928ll /* 0x2010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {137975824384ll /* 0x2020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {138512695296ll /* 0x2040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {139586437120ll /* 0x2080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {141733920768ll /* 0x2100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {146028888064ll /* 0x2200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 37},			/* shli r3, r1, 37 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {154618822656ll /* 0x2400000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 34}}			/* shli r3, r2, 34 */
+   },
+  {171798691840ll /* 0x2800000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 35}}			/* shli r3, r2, 35 */
+   },
+  {206158430208ll /* 0x3000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 36}}			/* shli r3, r2, 36 */
+   },
+  {274877906935ll /* 0x3ffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {274877906936ll /* 0x3ffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {274877906937ll /* 0x3ffffffff9 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {274877906939ll /* 0x3ffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {274877906940ll /* 0x3ffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {274877906941ll /* 0x3ffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {274877906942ll /* 0x3ffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {274877906943ll /* 0x3fffffffff */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {274877906944ll /* 0x4000000000 */ ,
+   {{6, 1, 38}}			/* shli r2, r1, 38 */
+   },
+  {274877906945ll /* 0x4000000001 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {274877906946ll /* 0x4000000002 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {274877906947ll /* 0x4000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877906948ll /* 0x4000000004 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {274877906949ll /* 0x4000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877906950ll /* 0x4000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {274877906951ll /* 0x4000000007 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {274877906952ll /* 0x4000000008 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {274877906953ll /* 0x4000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877906954ll /* 0x400000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {274877906960ll /* 0x4000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877906976ll /* 0x4000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877907008ll /* 0x4000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877907072ll /* 0x4000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877907200ll /* 0x4000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877907456ll /* 0x4000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877907968ll /* 0x4000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877908992ll /* 0x4000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877911040ll /* 0x4000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877915136ll /* 0x4000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877923328ll /* 0x4000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877939712ll /* 0x4000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274877972480ll /* 0x4000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274878038016ll /* 0x4000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274878169088ll /* 0x4000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274878431232ll /* 0x4000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274878955520ll /* 0x4000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274880004096ll /* 0x4000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274882101248ll /* 0x4000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274886295552ll /* 0x4000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274894684160ll /* 0x4001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274911461376ll /* 0x4002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {274945015808ll /* 0x4004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {275012124672ll /* 0x4008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {275146342400ll /* 0x4010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {275414777856ll /* 0x4020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {275951648768ll /* 0x4040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {277025390592ll /* 0x4080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {279172874240ll /* 0x4100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {283467841536ll /* 0x4200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {292057776128ll /* 0x4400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 38},			/* shli r3, r1, 38 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {309237645312ll /* 0x4800000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 35}}			/* shli r3, r2, 35 */
+   },
+  {343597383680ll /* 0x5000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 36}}			/* shli r3, r2, 36 */
+   },
+  {412316860416ll /* 0x6000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 37}}			/* shli r3, r2, 37 */
+   },
+  {549755813879ll /* 0x7ffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {549755813880ll /* 0x7ffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {549755813881ll /* 0x7ffffffff9 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {549755813883ll /* 0x7ffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {549755813884ll /* 0x7ffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {549755813885ll /* 0x7ffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {549755813886ll /* 0x7ffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {549755813887ll /* 0x7fffffffff */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {549755813888ll /* 0x8000000000 */ ,
+   {{6, 1, 39}}			/* shli r2, r1, 39 */
+   },
+  {549755813889ll /* 0x8000000001 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {549755813890ll /* 0x8000000002 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {549755813891ll /* 0x8000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755813892ll /* 0x8000000004 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {549755813893ll /* 0x8000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755813894ll /* 0x8000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {549755813895ll /* 0x8000000007 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {549755813896ll /* 0x8000000008 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {549755813897ll /* 0x8000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755813898ll /* 0x800000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {549755813904ll /* 0x8000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755813920ll /* 0x8000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755813952ll /* 0x8000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755814016ll /* 0x8000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755814144ll /* 0x8000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755814400ll /* 0x8000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755814912ll /* 0x8000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755815936ll /* 0x8000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755817984ll /* 0x8000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755822080ll /* 0x8000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755830272ll /* 0x8000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755846656ll /* 0x8000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755879424ll /* 0x8000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549755944960ll /* 0x8000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549756076032ll /* 0x8000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549756338176ll /* 0x8000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549756862464ll /* 0x8000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549757911040ll /* 0x8000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549760008192ll /* 0x8000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549764202496ll /* 0x8000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549772591104ll /* 0x8001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549789368320ll /* 0x8002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549822922752ll /* 0x8004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {549890031616ll /* 0x8008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {550024249344ll /* 0x8010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {550292684800ll /* 0x8020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {550829555712ll /* 0x8040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {551903297536ll /* 0x8080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {554050781184ll /* 0x8100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {558345748480ll /* 0x8200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {566935683072ll /* 0x8400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {584115552256ll /* 0x8800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 39},			/* shli r3, r1, 39 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {618475290624ll /* 0x9000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 36}}			/* shli r3, r2, 36 */
+   },
+  {687194767360ll /* 0xa000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 37}}			/* shli r3, r2, 37 */
+   },
+  {824633720832ll /* 0xc000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 38}}			/* shli r3, r2, 38 */
+   },
+  {1099511627767ll /* 0xfffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1099511627768ll /* 0xfffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1099511627769ll /* 0xfffffffff9 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1099511627771ll /* 0xfffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1099511627772ll /* 0xfffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1099511627773ll /* 0xfffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1099511627774ll /* 0xfffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1099511627775ll /* 0xffffffffff */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1099511627776ll /* 0x10000000000 */ ,
+   {{6, 1, 40}}			/* shli r2, r1, 40 */
+   },
+  {1099511627777ll /* 0x10000000001 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1099511627778ll /* 0x10000000002 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {1099511627779ll /* 0x10000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511627780ll /* 0x10000000004 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {1099511627781ll /* 0x10000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511627782ll /* 0x10000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1099511627783ll /* 0x10000000007 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {1099511627784ll /* 0x10000000008 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {1099511627785ll /* 0x10000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511627786ll /* 0x1000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1099511627792ll /* 0x10000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511627808ll /* 0x10000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511627840ll /* 0x10000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511627904ll /* 0x10000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511628032ll /* 0x10000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511628288ll /* 0x10000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511628800ll /* 0x10000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511629824ll /* 0x10000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511631872ll /* 0x10000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511635968ll /* 0x10000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511644160ll /* 0x10000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511660544ll /* 0x10000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511693312ll /* 0x10000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511758848ll /* 0x10000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099511889920ll /* 0x10000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099512152064ll /* 0x10000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099512676352ll /* 0x10000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099513724928ll /* 0x10000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099515822080ll /* 0x10000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099520016384ll /* 0x10000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099528404992ll /* 0x10001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099545182208ll /* 0x10002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099578736640ll /* 0x10004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099645845504ll /* 0x10008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1099780063232ll /* 0x10010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1100048498688ll /* 0x10020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1100585369600ll /* 0x10040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1101659111424ll /* 0x10080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1103806595072ll /* 0x10100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1108101562368ll /* 0x10200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1116691496960ll /* 0x10400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1133871366144ll /* 0x10800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1168231104512ll /* 0x11000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 40},			/* shli r3, r1, 40 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1236950581248ll /* 0x12000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 37}}			/* shli r3, r2, 37 */
+   },
+  {1374389534720ll /* 0x14000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 38}}			/* shli r3, r2, 38 */
+   },
+  {1649267441664ll /* 0x18000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 39}}			/* shli r3, r2, 39 */
+   },
+  {2199023255543ll /* 0x1fffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2199023255544ll /* 0x1fffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2199023255545ll /* 0x1fffffffff9 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {2199023255547ll /* 0x1fffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2199023255548ll /* 0x1fffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2199023255549ll /* 0x1fffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2199023255550ll /* 0x1fffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2199023255551ll /* 0x1ffffffffff */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2199023255552ll /* 0x20000000000 */ ,
+   {{6, 1, 41}}			/* shli r2, r1, 41 */
+   },
+  {2199023255553ll /* 0x20000000001 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2199023255554ll /* 0x20000000002 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {2199023255555ll /* 0x20000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255556ll /* 0x20000000004 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {2199023255557ll /* 0x20000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255558ll /* 0x20000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2199023255559ll /* 0x20000000007 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {2199023255560ll /* 0x20000000008 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {2199023255561ll /* 0x20000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255562ll /* 0x2000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2199023255568ll /* 0x20000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255584ll /* 0x20000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255616ll /* 0x20000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255680ll /* 0x20000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023255808ll /* 0x20000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023256064ll /* 0x20000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023256576ll /* 0x20000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023257600ll /* 0x20000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023259648ll /* 0x20000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023263744ll /* 0x20000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023271936ll /* 0x20000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023288320ll /* 0x20000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023321088ll /* 0x20000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023386624ll /* 0x20000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023517696ll /* 0x20000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199023779840ll /* 0x20000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199024304128ll /* 0x20000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199025352704ll /* 0x20000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199027449856ll /* 0x20000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199031644160ll /* 0x20000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199040032768ll /* 0x20001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199056809984ll /* 0x20002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199090364416ll /* 0x20004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199157473280ll /* 0x20008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199291691008ll /* 0x20010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2199560126464ll /* 0x20020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2200096997376ll /* 0x20040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2201170739200ll /* 0x20080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2203318222848ll /* 0x20100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2207613190144ll /* 0x20200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2216203124736ll /* 0x20400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2233382993920ll /* 0x20800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2267742732288ll /* 0x21000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2336462209024ll /* 0x22000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 41},			/* shli r3, r1, 41 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2473901162496ll /* 0x24000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 38}}			/* shli r3, r2, 38 */
+   },
+  {2748779069440ll /* 0x28000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 39}}			/* shli r3, r2, 39 */
+   },
+  {3298534883328ll /* 0x30000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 40}}			/* shli r3, r2, 40 */
+   },
+  {4398046511095ll /* 0x3fffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4398046511096ll /* 0x3fffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4398046511097ll /* 0x3fffffffff9 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {4398046511099ll /* 0x3fffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4398046511100ll /* 0x3fffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4398046511101ll /* 0x3fffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4398046511102ll /* 0x3fffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4398046511103ll /* 0x3ffffffffff */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4398046511104ll /* 0x40000000000 */ ,
+   {{6, 1, 42}}			/* shli r2, r1, 42 */
+   },
+  {4398046511105ll /* 0x40000000001 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4398046511106ll /* 0x40000000002 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {4398046511107ll /* 0x40000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511108ll /* 0x40000000004 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {4398046511109ll /* 0x40000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511110ll /* 0x40000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4398046511111ll /* 0x40000000007 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {4398046511112ll /* 0x40000000008 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {4398046511113ll /* 0x40000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511114ll /* 0x4000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4398046511120ll /* 0x40000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511136ll /* 0x40000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511168ll /* 0x40000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511232ll /* 0x40000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511360ll /* 0x40000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046511616ll /* 0x40000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046512128ll /* 0x40000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046513152ll /* 0x40000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046515200ll /* 0x40000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046519296ll /* 0x40000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046527488ll /* 0x40000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046543872ll /* 0x40000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046576640ll /* 0x40000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046642176ll /* 0x40000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398046773248ll /* 0x40000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398047035392ll /* 0x40000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398047559680ll /* 0x40000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398048608256ll /* 0x40000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398050705408ll /* 0x40000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398054899712ll /* 0x40000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398063288320ll /* 0x40001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398080065536ll /* 0x40002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398113619968ll /* 0x40004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398180728832ll /* 0x40008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398314946560ll /* 0x40010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4398583382016ll /* 0x40020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4399120252928ll /* 0x40040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4400193994752ll /* 0x40080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4402341478400ll /* 0x40100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4406636445696ll /* 0x40200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4415226380288ll /* 0x40400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4432406249472ll /* 0x40800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4466765987840ll /* 0x41000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4535485464576ll /* 0x42000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4672924418048ll /* 0x44000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 42},			/* shli r3, r1, 42 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4947802324992ll /* 0x48000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 39}}			/* shli r3, r2, 39 */
+   },
+  {5497558138880ll /* 0x50000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 40}}			/* shli r3, r2, 40 */
+   },
+  {6597069766656ll /* 0x60000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 41}}			/* shli r3, r2, 41 */
+   },
+  {8796093022199ll /* 0x7fffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8796093022200ll /* 0x7fffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8796093022201ll /* 0x7fffffffff9 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {8796093022203ll /* 0x7fffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8796093022204ll /* 0x7fffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8796093022205ll /* 0x7fffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8796093022206ll /* 0x7fffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8796093022207ll /* 0x7ffffffffff */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8796093022208ll /* 0x80000000000 */ ,
+   {{6, 1, 43}}			/* shli r2, r1, 43 */
+   },
+  {8796093022209ll /* 0x80000000001 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {8796093022210ll /* 0x80000000002 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {8796093022211ll /* 0x80000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022212ll /* 0x80000000004 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {8796093022213ll /* 0x80000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022214ll /* 0x80000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8796093022215ll /* 0x80000000007 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {8796093022216ll /* 0x80000000008 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {8796093022217ll /* 0x80000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022218ll /* 0x8000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {8796093022224ll /* 0x80000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022240ll /* 0x80000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022272ll /* 0x80000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022336ll /* 0x80000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022464ll /* 0x80000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093022720ll /* 0x80000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093023232ll /* 0x80000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093024256ll /* 0x80000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093026304ll /* 0x80000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093030400ll /* 0x80000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093038592ll /* 0x80000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093054976ll /* 0x80000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093087744ll /* 0x80000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093153280ll /* 0x80000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093284352ll /* 0x80000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796093546496ll /* 0x80000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796094070784ll /* 0x80000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796095119360ll /* 0x80000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796097216512ll /* 0x80000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796101410816ll /* 0x80000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796109799424ll /* 0x80001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796126576640ll /* 0x80002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796160131072ll /* 0x80004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796227239936ll /* 0x80008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796361457664ll /* 0x80010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8796629893120ll /* 0x80020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8797166764032ll /* 0x80040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8798240505856ll /* 0x80080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8800387989504ll /* 0x80100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8804682956800ll /* 0x80200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8813272891392ll /* 0x80400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8830452760576ll /* 0x80800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8864812498944ll /* 0x81000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8933531975680ll /* 0x82000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9070970929152ll /* 0x84000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9345848836096ll /* 0x88000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 43},			/* shli r3, r1, 43 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9895604649984ll /* 0x90000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 40}}			/* shli r3, r2, 40 */
+   },
+  {10995116277760ll /* 0xa0000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 41}}			/* shli r3, r2, 41 */
+   },
+  {13194139533312ll /* 0xc0000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 42}}			/* shli r3, r2, 42 */
+   },
+  {17592186044407ll /* 0xffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17592186044408ll /* 0xffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17592186044409ll /* 0xffffffffff9 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {17592186044411ll /* 0xffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17592186044412ll /* 0xffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17592186044413ll /* 0xffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17592186044414ll /* 0xffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {17592186044415ll /* 0xfffffffffff */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {17592186044416ll /* 0x100000000000 */ ,
+   {{6, 1, 44}}			/* shli r2, r1, 44 */
+   },
+  {17592186044417ll /* 0x100000000001 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {17592186044418ll /* 0x100000000002 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {17592186044419ll /* 0x100000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044420ll /* 0x100000000004 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {17592186044421ll /* 0x100000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044422ll /* 0x100000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {17592186044423ll /* 0x100000000007 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {17592186044424ll /* 0x100000000008 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {17592186044425ll /* 0x100000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044426ll /* 0x10000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {17592186044432ll /* 0x100000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044448ll /* 0x100000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044480ll /* 0x100000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044544ll /* 0x100000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044672ll /* 0x100000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186044928ll /* 0x100000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186045440ll /* 0x100000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186046464ll /* 0x100000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186048512ll /* 0x100000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186052608ll /* 0x100000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186060800ll /* 0x100000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186077184ll /* 0x100000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186109952ll /* 0x100000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186175488ll /* 0x100000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186306560ll /* 0x100000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592186568704ll /* 0x100000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592187092992ll /* 0x100000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592188141568ll /* 0x100000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592190238720ll /* 0x100000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592194433024ll /* 0x100000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592202821632ll /* 0x100001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592219598848ll /* 0x100002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592253153280ll /* 0x100004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592320262144ll /* 0x100008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592454479872ll /* 0x100010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17592722915328ll /* 0x100020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17593259786240ll /* 0x100040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17594333528064ll /* 0x100080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17596481011712ll /* 0x100100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17600775979008ll /* 0x100200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17609365913600ll /* 0x100400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17626545782784ll /* 0x100800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17660905521152ll /* 0x101000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17729624997888ll /* 0x102000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17867063951360ll /* 0x104000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18141941858304ll /* 0x108000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18691697672192ll /* 0x110000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 44},			/* shli r3, r1, 44 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {19791209299968ll /* 0x120000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 41}}			/* shli r3, r2, 41 */
+   },
+  {21990232555520ll /* 0x140000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 42}}			/* shli r3, r2, 42 */
+   },
+  {26388279066624ll /* 0x180000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 43}}			/* shli r3, r2, 43 */
+   },
+  {35184372088823ll /* 0x1ffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {35184372088824ll /* 0x1ffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {35184372088825ll /* 0x1ffffffffff9 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {35184372088827ll /* 0x1ffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {35184372088828ll /* 0x1ffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {35184372088829ll /* 0x1ffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {35184372088830ll /* 0x1ffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {35184372088831ll /* 0x1fffffffffff */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {35184372088832ll /* 0x200000000000 */ ,
+   {{6, 1, 45}}			/* shli r2, r1, 45 */
+   },
+  {35184372088833ll /* 0x200000000001 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {35184372088834ll /* 0x200000000002 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {35184372088835ll /* 0x200000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372088836ll /* 0x200000000004 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {35184372088837ll /* 0x200000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372088838ll /* 0x200000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {35184372088839ll /* 0x200000000007 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {35184372088840ll /* 0x200000000008 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {35184372088841ll /* 0x200000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372088842ll /* 0x20000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {35184372088848ll /* 0x200000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372088864ll /* 0x200000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372088896ll /* 0x200000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372088960ll /* 0x200000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372089088ll /* 0x200000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372089344ll /* 0x200000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372089856ll /* 0x200000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372090880ll /* 0x200000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372092928ll /* 0x200000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372097024ll /* 0x200000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372105216ll /* 0x200000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372121600ll /* 0x200000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372154368ll /* 0x200000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372219904ll /* 0x200000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372350976ll /* 0x200000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184372613120ll /* 0x200000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184373137408ll /* 0x200000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184374185984ll /* 0x200000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184376283136ll /* 0x200000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184380477440ll /* 0x200000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184388866048ll /* 0x200001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184405643264ll /* 0x200002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184439197696ll /* 0x200004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184506306560ll /* 0x200008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184640524288ll /* 0x200010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35184908959744ll /* 0x200020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35185445830656ll /* 0x200040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35186519572480ll /* 0x200080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35188667056128ll /* 0x200100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35192962023424ll /* 0x200200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35201551958016ll /* 0x200400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35218731827200ll /* 0x200800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35253091565568ll /* 0x201000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35321811042304ll /* 0x202000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35459249995776ll /* 0x204000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35734127902720ll /* 0x208000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36283883716608ll /* 0x210000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {37383395344384ll /* 0x220000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 45},			/* shli r3, r1, 45 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {39582418599936ll /* 0x240000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 42}}			/* shli r3, r2, 42 */
+   },
+  {43980465111040ll /* 0x280000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 43}}			/* shli r3, r2, 43 */
+   },
+  {52776558133248ll /* 0x300000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 44}}			/* shli r3, r2, 44 */
+   },
+  {70368744177655ll /* 0x3ffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {70368744177656ll /* 0x3ffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {70368744177657ll /* 0x3ffffffffff9 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {70368744177659ll /* 0x3ffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {70368744177660ll /* 0x3ffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {70368744177661ll /* 0x3ffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {70368744177662ll /* 0x3ffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {70368744177663ll /* 0x3fffffffffff */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {70368744177664ll /* 0x400000000000 */ ,
+   {{6, 1, 46}}			/* shli r2, r1, 46 */
+   },
+  {70368744177665ll /* 0x400000000001 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {70368744177666ll /* 0x400000000002 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {70368744177667ll /* 0x400000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177668ll /* 0x400000000004 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {70368744177669ll /* 0x400000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177670ll /* 0x400000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {70368744177671ll /* 0x400000000007 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {70368744177672ll /* 0x400000000008 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {70368744177673ll /* 0x400000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177674ll /* 0x40000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {70368744177680ll /* 0x400000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177696ll /* 0x400000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177728ll /* 0x400000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177792ll /* 0x400000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744177920ll /* 0x400000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744178176ll /* 0x400000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744178688ll /* 0x400000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744179712ll /* 0x400000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744181760ll /* 0x400000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744185856ll /* 0x400000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744194048ll /* 0x400000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744210432ll /* 0x400000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744243200ll /* 0x400000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744308736ll /* 0x400000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744439808ll /* 0x400000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368744701952ll /* 0x400000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368745226240ll /* 0x400000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368746274816ll /* 0x400000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368748371968ll /* 0x400000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368752566272ll /* 0x400000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368760954880ll /* 0x400001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368777732096ll /* 0x400002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368811286528ll /* 0x400004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70368878395392ll /* 0x400008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70369012613120ll /* 0x400010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70369281048576ll /* 0x400020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70369817919488ll /* 0x400040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70370891661312ll /* 0x400080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70373039144960ll /* 0x400100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70377334112256ll /* 0x400200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70385924046848ll /* 0x400400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70403103916032ll /* 0x400800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70437463654400ll /* 0x401000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70506183131136ll /* 0x402000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70643622084608ll /* 0x404000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70918499991552ll /* 0x408000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {71468255805440ll /* 0x410000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72567767433216ll /* 0x420000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {74766790688768ll /* 0x440000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 46},			/* shli r3, r1, 46 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {79164837199872ll /* 0x480000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 43}}			/* shli r3, r2, 43 */
+   },
+  {87960930222080ll /* 0x500000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 44}}			/* shli r3, r2, 44 */
+   },
+  {105553116266496ll /* 0x600000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 45}}			/* shli r3, r2, 45 */
+   },
+  {140737488355319ll /* 0x7ffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {140737488355320ll /* 0x7ffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {140737488355321ll /* 0x7ffffffffff9 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {140737488355323ll /* 0x7ffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {140737488355324ll /* 0x7ffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {140737488355325ll /* 0x7ffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {140737488355326ll /* 0x7ffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {140737488355327ll /* 0x7fffffffffff */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {140737488355328ll /* 0x800000000000 */ ,
+   {{6, 1, 47}}			/* shli r2, r1, 47 */
+   },
+  {140737488355329ll /* 0x800000000001 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {140737488355330ll /* 0x800000000002 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {140737488355331ll /* 0x800000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355332ll /* 0x800000000004 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {140737488355333ll /* 0x800000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355334ll /* 0x800000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {140737488355335ll /* 0x800000000007 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {140737488355336ll /* 0x800000000008 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {140737488355337ll /* 0x800000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355338ll /* 0x80000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {140737488355344ll /* 0x800000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355360ll /* 0x800000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355392ll /* 0x800000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355456ll /* 0x800000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355584ll /* 0x800000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488355840ll /* 0x800000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488356352ll /* 0x800000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488357376ll /* 0x800000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488359424ll /* 0x800000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488363520ll /* 0x800000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488371712ll /* 0x800000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488388096ll /* 0x800000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488420864ll /* 0x800000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488486400ll /* 0x800000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488617472ll /* 0x800000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737488879616ll /* 0x800000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737489403904ll /* 0x800000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737490452480ll /* 0x800000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737492549632ll /* 0x800000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737496743936ll /* 0x800000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737505132544ll /* 0x800001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737521909760ll /* 0x800002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737555464192ll /* 0x800004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737622573056ll /* 0x800008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140737756790784ll /* 0x800010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140738025226240ll /* 0x800020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140738562097152ll /* 0x800040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140739635838976ll /* 0x800080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140741783322624ll /* 0x800100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140746078289920ll /* 0x800200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140754668224512ll /* 0x800400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140771848093696ll /* 0x800800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140806207832064ll /* 0x801000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {140874927308800ll /* 0x802000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {141012366262272ll /* 0x804000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {141287244169216ll /* 0x808000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {141836999983104ll /* 0x810000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {142936511610880ll /* 0x820000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {145135534866432ll /* 0x840000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {149533581377536ll /* 0x880000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 47},			/* shli r3, r1, 47 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {158329674399744ll /* 0x900000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 44}}			/* shli r3, r2, 44 */
+   },
+  {175921860444160ll /* 0xa00000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 45}}			/* shli r3, r2, 45 */
+   },
+  {211106232532992ll /* 0xc00000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 46}}			/* shli r3, r2, 46 */
+   },
+  {281474976710647ll /* 0xfffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {281474976710648ll /* 0xfffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {281474976710649ll /* 0xfffffffffff9 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {281474976710651ll /* 0xfffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {281474976710652ll /* 0xfffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {281474976710653ll /* 0xfffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {281474976710654ll /* 0xfffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {281474976710655ll /* 0xffffffffffff */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {281474976710656ll /* 0x1000000000000 */ ,
+   {{6, 1, 48}}			/* shli r2, r1, 48 */
+   },
+  {281474976710657ll /* 0x1000000000001 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {281474976710658ll /* 0x1000000000002 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {281474976710659ll /* 0x1000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710660ll /* 0x1000000000004 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {281474976710661ll /* 0x1000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710662ll /* 0x1000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {281474976710663ll /* 0x1000000000007 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {281474976710664ll /* 0x1000000000008 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {281474976710665ll /* 0x1000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710666ll /* 0x100000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {281474976710672ll /* 0x1000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710688ll /* 0x1000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710720ll /* 0x1000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710784ll /* 0x1000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976710912ll /* 0x1000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976711168ll /* 0x1000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976711680ll /* 0x1000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976712704ll /* 0x1000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976714752ll /* 0x1000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976718848ll /* 0x1000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976727040ll /* 0x1000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976743424ll /* 0x1000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976776192ll /* 0x1000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976841728ll /* 0x1000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474976972800ll /* 0x1000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474977234944ll /* 0x1000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474977759232ll /* 0x1000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474978807808ll /* 0x1000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474980904960ll /* 0x1000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474985099264ll /* 0x1000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281474993487872ll /* 0x1000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281475010265088ll /* 0x1000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281475043819520ll /* 0x1000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281475110928384ll /* 0x1000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281475245146112ll /* 0x1000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281475513581568ll /* 0x1000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281476050452480ll /* 0x1000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281477124194304ll /* 0x1000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281479271677952ll /* 0x1000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281483566645248ll /* 0x1000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281492156579840ll /* 0x1000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281509336449024ll /* 0x1000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281543696187392ll /* 0x1001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281612415664128ll /* 0x1002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {281749854617600ll /* 0x1004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {282024732524544ll /* 0x1008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {282574488338432ll /* 0x1010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {283673999966208ll /* 0x1020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {285873023221760ll /* 0x1040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {290271069732864ll /* 0x1080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {299067162755072ll /* 0x1100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 48},			/* shli r3, r1, 48 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {316659348799488ll /* 0x1200000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 45}}			/* shli r3, r2, 45 */
+   },
+  {351843720888320ll /* 0x1400000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 46}}			/* shli r3, r2, 46 */
+   },
+  {422212465065984ll /* 0x1800000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 47}}			/* shli r3, r2, 47 */
+   },
+  {562949953421303ll /* 0x1fffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {562949953421304ll /* 0x1fffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {562949953421305ll /* 0x1fffffffffff9 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {562949953421307ll /* 0x1fffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {562949953421308ll /* 0x1fffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {562949953421309ll /* 0x1fffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {562949953421310ll /* 0x1fffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {562949953421311ll /* 0x1ffffffffffff */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {562949953421312ll /* 0x2000000000000 */ ,
+   {{6, 1, 49}}			/* shli r2, r1, 49 */
+   },
+  {562949953421313ll /* 0x2000000000001 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {562949953421314ll /* 0x2000000000002 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {562949953421315ll /* 0x2000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421316ll /* 0x2000000000004 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {562949953421317ll /* 0x2000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421318ll /* 0x2000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {562949953421319ll /* 0x2000000000007 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {562949953421320ll /* 0x2000000000008 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {562949953421321ll /* 0x2000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421322ll /* 0x200000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {562949953421328ll /* 0x2000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421344ll /* 0x2000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421376ll /* 0x2000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421440ll /* 0x2000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421568ll /* 0x2000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953421824ll /* 0x2000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953422336ll /* 0x2000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953423360ll /* 0x2000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953425408ll /* 0x2000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953429504ll /* 0x2000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953437696ll /* 0x2000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953454080ll /* 0x2000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953486848ll /* 0x2000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953552384ll /* 0x2000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953683456ll /* 0x2000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949953945600ll /* 0x2000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949954469888ll /* 0x2000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949955518464ll /* 0x2000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949957615616ll /* 0x2000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949961809920ll /* 0x2000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949970198528ll /* 0x2000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562949986975744ll /* 0x2000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562950020530176ll /* 0x2000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562950087639040ll /* 0x2000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562950221856768ll /* 0x2000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562950490292224ll /* 0x2000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562951027163136ll /* 0x2000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562952100904960ll /* 0x2000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562954248388608ll /* 0x2000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562958543355904ll /* 0x2000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562967133290496ll /* 0x2000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {562984313159680ll /* 0x2000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {563018672898048ll /* 0x2001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {563087392374784ll /* 0x2002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {563224831328256ll /* 0x2004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {563499709235200ll /* 0x2008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {564049465049088ll /* 0x2010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {565148976676864ll /* 0x2020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {567347999932416ll /* 0x2040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {571746046443520ll /* 0x2080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {580542139465728ll /* 0x2100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {598134325510144ll /* 0x2200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 49},			/* shli r3, r1, 49 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {633318697598976ll /* 0x2400000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 46}}			/* shli r3, r2, 46 */
+   },
+  {703687441776640ll /* 0x2800000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 47}}			/* shli r3, r2, 47 */
+   },
+  {844424930131968ll /* 0x3000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 48}}			/* shli r3, r2, 48 */
+   },
+  {1125899906842615ll /* 0x3fffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1125899906842616ll /* 0x3fffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1125899906842617ll /* 0x3fffffffffff9 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1125899906842619ll /* 0x3fffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1125899906842620ll /* 0x3fffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1125899906842621ll /* 0x3fffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1125899906842622ll /* 0x3fffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1125899906842623ll /* 0x3ffffffffffff */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1125899906842624ll /* 0x4000000000000 */ ,
+   {{6, 1, 50}}			/* shli r2, r1, 50 */
+   },
+  {1125899906842625ll /* 0x4000000000001 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1125899906842626ll /* 0x4000000000002 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {1125899906842627ll /* 0x4000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842628ll /* 0x4000000000004 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {1125899906842629ll /* 0x4000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842630ll /* 0x4000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1125899906842631ll /* 0x4000000000007 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {1125899906842632ll /* 0x4000000000008 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {1125899906842633ll /* 0x4000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842634ll /* 0x400000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1125899906842640ll /* 0x4000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842656ll /* 0x4000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842688ll /* 0x4000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842752ll /* 0x4000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906842880ll /* 0x4000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906843136ll /* 0x4000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906843648ll /* 0x4000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906844672ll /* 0x4000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906846720ll /* 0x4000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906850816ll /* 0x4000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906859008ll /* 0x4000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906875392ll /* 0x4000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906908160ll /* 0x4000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899906973696ll /* 0x4000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899907104768ll /* 0x4000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899907366912ll /* 0x4000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899907891200ll /* 0x4000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899908939776ll /* 0x4000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899911036928ll /* 0x4000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899915231232ll /* 0x4000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899923619840ll /* 0x4000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899940397056ll /* 0x4000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125899973951488ll /* 0x4000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125900041060352ll /* 0x4000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125900175278080ll /* 0x4000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125900443713536ll /* 0x4000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125900980584448ll /* 0x4000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125902054326272ll /* 0x4000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125904201809920ll /* 0x4000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125908496777216ll /* 0x4000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125917086711808ll /* 0x4000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125934266580992ll /* 0x4000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1125968626319360ll /* 0x4001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1126037345796096ll /* 0x4002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1126174784749568ll /* 0x4004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1126449662656512ll /* 0x4008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1126999418470400ll /* 0x4010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1128098930098176ll /* 0x4020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1130297953353728ll /* 0x4040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1134695999864832ll /* 0x4080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1143492092887040ll /* 0x4100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1161084278931456ll /* 0x4200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1196268651020288ll /* 0x4400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 50},			/* shli r3, r1, 50 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1266637395197952ll /* 0x4800000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 47}}			/* shli r3, r2, 47 */
+   },
+  {1407374883553280ll /* 0x5000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 48}}			/* shli r3, r2, 48 */
+   },
+  {1688849860263936ll /* 0x6000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 49}}			/* shli r3, r2, 49 */
+   },
+  {2251799813685239ll /* 0x7fffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2251799813685240ll /* 0x7fffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2251799813685241ll /* 0x7fffffffffff9 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {2251799813685243ll /* 0x7fffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2251799813685244ll /* 0x7fffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2251799813685245ll /* 0x7fffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2251799813685246ll /* 0x7fffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2251799813685247ll /* 0x7ffffffffffff */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2251799813685248ll /* 0x8000000000000 */ ,
+   {{6, 1, 51}}			/* shli r2, r1, 51 */
+   },
+  {2251799813685249ll /* 0x8000000000001 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2251799813685250ll /* 0x8000000000002 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {2251799813685251ll /* 0x8000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685252ll /* 0x8000000000004 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {2251799813685253ll /* 0x8000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685254ll /* 0x8000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2251799813685255ll /* 0x8000000000007 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {2251799813685256ll /* 0x8000000000008 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {2251799813685257ll /* 0x8000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685258ll /* 0x800000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2251799813685264ll /* 0x8000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685280ll /* 0x8000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685312ll /* 0x8000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685376ll /* 0x8000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685504ll /* 0x8000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813685760ll /* 0x8000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813686272ll /* 0x8000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813687296ll /* 0x8000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813689344ll /* 0x8000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813693440ll /* 0x8000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813701632ll /* 0x8000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813718016ll /* 0x8000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813750784ll /* 0x8000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813816320ll /* 0x8000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799813947392ll /* 0x8000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799814209536ll /* 0x8000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799814733824ll /* 0x8000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799815782400ll /* 0x8000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799817879552ll /* 0x8000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799822073856ll /* 0x8000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799830462464ll /* 0x8000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799847239680ll /* 0x8000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799880794112ll /* 0x8000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251799947902976ll /* 0x8000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251800082120704ll /* 0x8000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251800350556160ll /* 0x8000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251800887427072ll /* 0x8000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251801961168896ll /* 0x8000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251804108652544ll /* 0x8000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251808403619840ll /* 0x8000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251816993554432ll /* 0x8000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251834173423616ll /* 0x8000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251868533161984ll /* 0x8001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2251937252638720ll /* 0x8002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2252074691592192ll /* 0x8004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2252349569499136ll /* 0x8008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2252899325313024ll /* 0x8010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2253998836940800ll /* 0x8020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2256197860196352ll /* 0x8040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2260595906707456ll /* 0x8080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2269391999729664ll /* 0x8100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2286984185774080ll /* 0x8200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2322168557862912ll /* 0x8400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2392537302040576ll /* 0x8800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 51},			/* shli r3, r1, 51 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2533274790395904ll /* 0x9000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 48}}			/* shli r3, r2, 48 */
+   },
+  {2814749767106560ll /* 0xa000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 49}}			/* shli r3, r2, 49 */
+   },
+  {3377699720527872ll /* 0xc000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 50}}			/* shli r3, r2, 50 */
+   },
+  {4503599627370487ll /* 0xffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4503599627370488ll /* 0xffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4503599627370489ll /* 0xffffffffffff9 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {4503599627370491ll /* 0xffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4503599627370492ll /* 0xffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4503599627370493ll /* 0xffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4503599627370494ll /* 0xffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4503599627370495ll /* 0xfffffffffffff */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4503599627370496ll /* 0x10000000000000 */ ,
+   {{6, 1, 52}}			/* shli r2, r1, 52 */
+   },
+  {4503599627370497ll /* 0x10000000000001 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4503599627370498ll /* 0x10000000000002 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {4503599627370499ll /* 0x10000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370500ll /* 0x10000000000004 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {4503599627370501ll /* 0x10000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370502ll /* 0x10000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4503599627370503ll /* 0x10000000000007 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {4503599627370504ll /* 0x10000000000008 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {4503599627370505ll /* 0x10000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370506ll /* 0x1000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4503599627370512ll /* 0x10000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370528ll /* 0x10000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370560ll /* 0x10000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370624ll /* 0x10000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627370752ll /* 0x10000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627371008ll /* 0x10000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627371520ll /* 0x10000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627372544ll /* 0x10000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627374592ll /* 0x10000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627378688ll /* 0x10000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627386880ll /* 0x10000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627403264ll /* 0x10000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627436032ll /* 0x10000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627501568ll /* 0x10000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627632640ll /* 0x10000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599627894784ll /* 0x10000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599628419072ll /* 0x10000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599629467648ll /* 0x10000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599631564800ll /* 0x10000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599635759104ll /* 0x10000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599644147712ll /* 0x10000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599660924928ll /* 0x10000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599694479360ll /* 0x10000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599761588224ll /* 0x10000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503599895805952ll /* 0x10000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503600164241408ll /* 0x10000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503600701112320ll /* 0x10000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503601774854144ll /* 0x10000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503603922337792ll /* 0x10000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503608217305088ll /* 0x10000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503616807239680ll /* 0x10000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503633987108864ll /* 0x10000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503668346847232ll /* 0x10001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503737066323968ll /* 0x10002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4503874505277440ll /* 0x10004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4504149383184384ll /* 0x10008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4504699138998272ll /* 0x10010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4505798650626048ll /* 0x10020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4507997673881600ll /* 0x10040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4512395720392704ll /* 0x10080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4521191813414912ll /* 0x10100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4538783999459328ll /* 0x10200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4573968371548160ll /* 0x10400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4644337115725824ll /* 0x10800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4785074604081152ll /* 0x11000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 52},			/* shli r3, r1, 52 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {5066549580791808ll /* 0x12000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 49}}			/* shli r3, r2, 49 */
+   },
+  {5629499534213120ll /* 0x14000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 50}}			/* shli r3, r2, 50 */
+   },
+  {6755399441055744ll /* 0x18000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 51}}			/* shli r3, r2, 51 */
+   },
+  {9007199254740983ll /* 0x1ffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9007199254740984ll /* 0x1ffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9007199254740985ll /* 0x1ffffffffffff9 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {9007199254740987ll /* 0x1ffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9007199254740988ll /* 0x1ffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9007199254740989ll /* 0x1ffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9007199254740990ll /* 0x1ffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9007199254740991ll /* 0x1fffffffffffff */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {9007199254740992ll /* 0x20000000000000 */ ,
+   {{6, 1, 53}}			/* shli r2, r1, 53 */
+   },
+  {9007199254740993ll /* 0x20000000000001 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {9007199254740994ll /* 0x20000000000002 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {9007199254740995ll /* 0x20000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254740996ll /* 0x20000000000004 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {9007199254740997ll /* 0x20000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254740998ll /* 0x20000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {9007199254740999ll /* 0x20000000000007 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {9007199254741000ll /* 0x20000000000008 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {9007199254741001ll /* 0x20000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254741002ll /* 0x2000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {9007199254741008ll /* 0x20000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254741024ll /* 0x20000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254741056ll /* 0x20000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254741120ll /* 0x20000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254741248ll /* 0x20000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254741504ll /* 0x20000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254742016ll /* 0x20000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254743040ll /* 0x20000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254745088ll /* 0x20000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254749184ll /* 0x20000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254757376ll /* 0x20000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254773760ll /* 0x20000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254806528ll /* 0x20000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199254872064ll /* 0x20000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199255003136ll /* 0x20000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199255265280ll /* 0x20000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199255789568ll /* 0x20000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199256838144ll /* 0x20000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199258935296ll /* 0x20000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199263129600ll /* 0x20000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199271518208ll /* 0x20000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199288295424ll /* 0x20000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199321849856ll /* 0x20000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199388958720ll /* 0x20000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199523176448ll /* 0x20000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007199791611904ll /* 0x20000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007200328482816ll /* 0x20000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007201402224640ll /* 0x20000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007203549708288ll /* 0x20000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007207844675584ll /* 0x20000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007216434610176ll /* 0x20000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007233614479360ll /* 0x20000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007267974217728ll /* 0x20001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007336693694464ll /* 0x20002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007474132647936ll /* 0x20004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9007749010554880ll /* 0x20008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9008298766368768ll /* 0x20010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9009398277996544ll /* 0x20020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9011597301252096ll /* 0x20040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9015995347763200ll /* 0x20080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9024791440785408ll /* 0x20100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9042383626829824ll /* 0x20200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9077567998918656ll /* 0x20400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9147936743096320ll /* 0x20800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9288674231451648ll /* 0x21000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9570149208162304ll /* 0x22000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 53},			/* shli r3, r1, 53 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {10133099161583616ll /* 0x24000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 50}}			/* shli r3, r2, 50 */
+   },
+  {11258999068426240ll /* 0x28000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 51}}			/* shli r3, r2, 51 */
+   },
+  {13510798882111488ll /* 0x30000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 52}}			/* shli r3, r2, 52 */
+   },
+  {18014398509481975ll /* 0x3ffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {18014398509481976ll /* 0x3ffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {18014398509481977ll /* 0x3ffffffffffff9 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {18014398509481979ll /* 0x3ffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {18014398509481980ll /* 0x3ffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {18014398509481981ll /* 0x3ffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {18014398509481982ll /* 0x3ffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {18014398509481983ll /* 0x3fffffffffffff */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {18014398509481984ll /* 0x40000000000000 */ ,
+   {{6, 1, 54}}			/* shli r2, r1, 54 */
+   },
+  {18014398509481985ll /* 0x40000000000001 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {18014398509481986ll /* 0x40000000000002 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {18014398509481987ll /* 0x40000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509481988ll /* 0x40000000000004 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {18014398509481989ll /* 0x40000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509481990ll /* 0x40000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {18014398509481991ll /* 0x40000000000007 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {18014398509481992ll /* 0x40000000000008 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {18014398509481993ll /* 0x40000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509481994ll /* 0x4000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {18014398509482000ll /* 0x40000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509482016ll /* 0x40000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509482048ll /* 0x40000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509482112ll /* 0x40000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509482240ll /* 0x40000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509482496ll /* 0x40000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509483008ll /* 0x40000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509484032ll /* 0x40000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509486080ll /* 0x40000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509490176ll /* 0x40000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509498368ll /* 0x40000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509514752ll /* 0x40000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509547520ll /* 0x40000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509613056ll /* 0x40000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398509744128ll /* 0x40000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398510006272ll /* 0x40000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398510530560ll /* 0x40000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398511579136ll /* 0x40000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398513676288ll /* 0x40000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398517870592ll /* 0x40000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398526259200ll /* 0x40000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398543036416ll /* 0x40000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398576590848ll /* 0x40000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398643699712ll /* 0x40000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014398777917440ll /* 0x40000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014399046352896ll /* 0x40000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014399583223808ll /* 0x40000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014400656965632ll /* 0x40000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014402804449280ll /* 0x40000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014407099416576ll /* 0x40000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014415689351168ll /* 0x40000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014432869220352ll /* 0x40000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014467228958720ll /* 0x40001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014535948435456ll /* 0x40002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014673387388928ll /* 0x40004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18014948265295872ll /* 0x40008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18015498021109760ll /* 0x40010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18016597532737536ll /* 0x40020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18018796555993088ll /* 0x40040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18023194602504192ll /* 0x40080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18031990695526400ll /* 0x40100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18049582881570816ll /* 0x40200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18084767253659648ll /* 0x40400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18155135997837312ll /* 0x40800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18295873486192640ll /* 0x41000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18577348462903296ll /* 0x42000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {19140298416324608ll /* 0x44000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 54},			/* shli r3, r1, 54 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {20266198323167232ll /* 0x48000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 51}}			/* shli r3, r2, 51 */
+   },
+  {22517998136852480ll /* 0x50000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 52}}			/* shli r3, r2, 52 */
+   },
+  {27021597764222976ll /* 0x60000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 53}}			/* shli r3, r2, 53 */
+   },
+  {36028797018963959ll /* 0x7ffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {36028797018963960ll /* 0x7ffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {36028797018963961ll /* 0x7ffffffffffff9 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {36028797018963963ll /* 0x7ffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {36028797018963964ll /* 0x7ffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {36028797018963965ll /* 0x7ffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {36028797018963966ll /* 0x7ffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {36028797018963967ll /* 0x7fffffffffffff */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {36028797018963968ll /* 0x80000000000000 */ ,
+   {{6, 1, 55}}			/* shli r2, r1, 55 */
+   },
+  {36028797018963969ll /* 0x80000000000001 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {36028797018963970ll /* 0x80000000000002 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {36028797018963971ll /* 0x80000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018963972ll /* 0x80000000000004 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {36028797018963973ll /* 0x80000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018963974ll /* 0x80000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {36028797018963975ll /* 0x80000000000007 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {36028797018963976ll /* 0x80000000000008 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {36028797018963977ll /* 0x80000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018963978ll /* 0x8000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {36028797018963984ll /* 0x80000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018964000ll /* 0x80000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018964032ll /* 0x80000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018964096ll /* 0x80000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018964224ll /* 0x80000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018964480ll /* 0x80000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018964992ll /* 0x80000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018966016ll /* 0x80000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018968064ll /* 0x80000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018972160ll /* 0x80000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018980352ll /* 0x80000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797018996736ll /* 0x80000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797019029504ll /* 0x80000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797019095040ll /* 0x80000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797019226112ll /* 0x80000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797019488256ll /* 0x80000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797020012544ll /* 0x80000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797021061120ll /* 0x80000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797023158272ll /* 0x80000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797027352576ll /* 0x80000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797035741184ll /* 0x80000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797052518400ll /* 0x80000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797086072832ll /* 0x80000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797153181696ll /* 0x80000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797287399424ll /* 0x80000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028797555834880ll /* 0x80000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028798092705792ll /* 0x80000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028799166447616ll /* 0x80000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028801313931264ll /* 0x80000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028805608898560ll /* 0x80000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028814198833152ll /* 0x80000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028831378702336ll /* 0x80000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028865738440704ll /* 0x80001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36028934457917440ll /* 0x80002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36029071896870912ll /* 0x80004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36029346774777856ll /* 0x80008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36029896530591744ll /* 0x80010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36030996042219520ll /* 0x80020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36033195065475072ll /* 0x80040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36037593111986176ll /* 0x80080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36046389205008384ll /* 0x80100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36063981391052800ll /* 0x80200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36099165763141632ll /* 0x80400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36169534507319296ll /* 0x80800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36310271995674624ll /* 0x81000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36591746972385280ll /* 0x82000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {37154696925806592ll /* 0x84000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {38280596832649216ll /* 0x88000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 55},			/* shli r3, r1, 55 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {40532396646334464ll /* 0x90000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 52}}			/* shli r3, r2, 52 */
+   },
+  {45035996273704960ll /* 0xa0000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 53}}			/* shli r3, r2, 53 */
+   },
+  {54043195528445952ll /* 0xc0000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 54}}			/* shli r3, r2, 54 */
+   },
+  {72057594037927927ll /* 0xfffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {72057594037927928ll /* 0xfffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {72057594037927929ll /* 0xfffffffffffff9 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {72057594037927931ll /* 0xfffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {72057594037927932ll /* 0xfffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {72057594037927933ll /* 0xfffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {72057594037927934ll /* 0xfffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {72057594037927935ll /* 0xffffffffffffff */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {72057594037927936ll /* 0x100000000000000 */ ,
+   {{6, 1, 56}}			/* shli r2, r1, 56 */
+   },
+  {72057594037927937ll /* 0x100000000000001 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {72057594037927938ll /* 0x100000000000002 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {72057594037927939ll /* 0x100000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037927940ll /* 0x100000000000004 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {72057594037927941ll /* 0x100000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037927942ll /* 0x100000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {72057594037927943ll /* 0x100000000000007 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {72057594037927944ll /* 0x100000000000008 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {72057594037927945ll /* 0x100000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037927946ll /* 0x10000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {72057594037927952ll /* 0x100000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037927968ll /* 0x100000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037928000ll /* 0x100000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037928064ll /* 0x100000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037928192ll /* 0x100000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037928448ll /* 0x100000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037928960ll /* 0x100000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037929984ll /* 0x100000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037932032ll /* 0x100000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037936128ll /* 0x100000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037944320ll /* 0x100000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037960704ll /* 0x100000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594037993472ll /* 0x100000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594038059008ll /* 0x100000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594038190080ll /* 0x100000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594038452224ll /* 0x100000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594038976512ll /* 0x100000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594040025088ll /* 0x100000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594042122240ll /* 0x100000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594046316544ll /* 0x100000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594054705152ll /* 0x100000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594071482368ll /* 0x100000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594105036800ll /* 0x100000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594172145664ll /* 0x100000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594306363392ll /* 0x100000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057594574798848ll /* 0x100000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057595111669760ll /* 0x100000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057596185411584ll /* 0x100000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057598332895232ll /* 0x100000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057602627862528ll /* 0x100000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057611217797120ll /* 0x100000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057628397666304ll /* 0x100000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057662757404672ll /* 0x100001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057731476881408ll /* 0x100002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72057868915834880ll /* 0x100004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72058143793741824ll /* 0x100008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72058693549555712ll /* 0x100010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72059793061183488ll /* 0x100020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72061992084439040ll /* 0x100040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72066390130950144ll /* 0x100080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72075186223972352ll /* 0x100100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72092778410016768ll /* 0x100200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72127962782105600ll /* 0x100400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72198331526283264ll /* 0x100800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72339069014638592ll /* 0x101000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {72620543991349248ll /* 0x102000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {73183493944770560ll /* 0x104000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {74309393851613184ll /* 0x108000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {76561193665298432ll /* 0x110000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 56},			/* shli r3, r1, 56 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {81064793292668928ll /* 0x120000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 53}}			/* shli r3, r2, 53 */
+   },
+  {90071992547409920ll /* 0x140000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 54}}			/* shli r3, r2, 54 */
+   },
+  {108086391056891904ll /* 0x180000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 55}}			/* shli r3, r2, 55 */
+   },
+  {144115188075855863ll /* 0x1fffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {144115188075855864ll /* 0x1fffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {144115188075855865ll /* 0x1fffffffffffff9 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {144115188075855867ll /* 0x1fffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {144115188075855868ll /* 0x1fffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {144115188075855869ll /* 0x1fffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {144115188075855870ll /* 0x1fffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {144115188075855871ll /* 0x1ffffffffffffff */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {144115188075855872ll /* 0x200000000000000 */ ,
+   {{6, 1, 57}}			/* shli r2, r1, 57 */
+   },
+  {144115188075855873ll /* 0x200000000000001 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {144115188075855874ll /* 0x200000000000002 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {144115188075855875ll /* 0x200000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075855876ll /* 0x200000000000004 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {144115188075855877ll /* 0x200000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075855878ll /* 0x200000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {144115188075855879ll /* 0x200000000000007 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {144115188075855880ll /* 0x200000000000008 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {144115188075855881ll /* 0x200000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075855882ll /* 0x20000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {144115188075855888ll /* 0x200000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075855904ll /* 0x200000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075855936ll /* 0x200000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075856000ll /* 0x200000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075856128ll /* 0x200000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075856384ll /* 0x200000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075856896ll /* 0x200000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075857920ll /* 0x200000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075859968ll /* 0x200000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075864064ll /* 0x200000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075872256ll /* 0x200000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075888640ll /* 0x200000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075921408ll /* 0x200000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188075986944ll /* 0x200000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188076118016ll /* 0x200000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188076380160ll /* 0x200000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188076904448ll /* 0x200000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188077953024ll /* 0x200000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188080050176ll /* 0x200000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188084244480ll /* 0x200000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188092633088ll /* 0x200000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188109410304ll /* 0x200000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188142964736ll /* 0x200000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188210073600ll /* 0x200000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188344291328ll /* 0x200000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115188612726784ll /* 0x200000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115189149597696ll /* 0x200000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115190223339520ll /* 0x200000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115192370823168ll /* 0x200000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115196665790464ll /* 0x200000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115205255725056ll /* 0x200000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115222435594240ll /* 0x200000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115256795332608ll /* 0x200001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115325514809344ll /* 0x200002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115462953762816ll /* 0x200004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144115737831669760ll /* 0x200008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144116287587483648ll /* 0x200010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144117387099111424ll /* 0x200020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144119586122366976ll /* 0x200040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144123984168878080ll /* 0x200080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144132780261900288ll /* 0x200100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144150372447944704ll /* 0x200200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144185556820033536ll /* 0x200400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144255925564211200ll /* 0x200800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144396663052566528ll /* 0x201000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {144678138029277184ll /* 0x202000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {145241087982698496ll /* 0x204000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {146366987889541120ll /* 0x208000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {148618787703226368ll /* 0x210000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {153122387330596864ll /* 0x220000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 57},			/* shli r3, r1, 57 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {162129586585337856ll /* 0x240000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 54}}			/* shli r3, r2, 54 */
+   },
+  {180143985094819840ll /* 0x280000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 55}}			/* shli r3, r2, 55 */
+   },
+  {216172782113783808ll /* 0x300000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 56}}			/* shli r3, r2, 56 */
+   },
+  {288230376151711735ll /* 0x3fffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {288230376151711736ll /* 0x3fffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {288230376151711737ll /* 0x3fffffffffffff9 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {288230376151711739ll /* 0x3fffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {288230376151711740ll /* 0x3fffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {288230376151711741ll /* 0x3fffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {288230376151711742ll /* 0x3fffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {288230376151711743ll /* 0x3ffffffffffffff */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {288230376151711744ll /* 0x400000000000000 */ ,
+   {{6, 1, 58}}			/* shli r2, r1, 58 */
+   },
+  {288230376151711745ll /* 0x400000000000001 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {288230376151711746ll /* 0x400000000000002 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {288230376151711747ll /* 0x400000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151711748ll /* 0x400000000000004 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {288230376151711749ll /* 0x400000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151711750ll /* 0x400000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {288230376151711751ll /* 0x400000000000007 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {288230376151711752ll /* 0x400000000000008 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {288230376151711753ll /* 0x400000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151711754ll /* 0x40000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {288230376151711760ll /* 0x400000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151711776ll /* 0x400000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151711808ll /* 0x400000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151711872ll /* 0x400000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151712000ll /* 0x400000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151712256ll /* 0x400000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151712768ll /* 0x400000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151713792ll /* 0x400000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151715840ll /* 0x400000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151719936ll /* 0x400000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151728128ll /* 0x400000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151744512ll /* 0x400000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151777280ll /* 0x400000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151842816ll /* 0x400000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376151973888ll /* 0x400000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376152236032ll /* 0x400000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376152760320ll /* 0x400000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376153808896ll /* 0x400000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376155906048ll /* 0x400000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376160100352ll /* 0x400000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376168488960ll /* 0x400000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376185266176ll /* 0x400000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376218820608ll /* 0x400000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376285929472ll /* 0x400000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376420147200ll /* 0x400000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230376688582656ll /* 0x400000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230377225453568ll /* 0x400000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230378299195392ll /* 0x400000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230380446679040ll /* 0x400000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230384741646336ll /* 0x400000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230393331580928ll /* 0x400000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230410511450112ll /* 0x400000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230444871188480ll /* 0x400001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230513590665216ll /* 0x400002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230651029618688ll /* 0x400004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288230925907525632ll /* 0x400008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288231475663339520ll /* 0x400010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288232575174967296ll /* 0x400020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288234774198222848ll /* 0x400040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288239172244733952ll /* 0x400080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288247968337756160ll /* 0x400100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288265560523800576ll /* 0x400200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288300744895889408ll /* 0x400400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288371113640067072ll /* 0x400800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288511851128422400ll /* 0x401000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {288793326105133056ll /* 0x402000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {289356276058554368ll /* 0x404000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {290482175965396992ll /* 0x408000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {292733975779082240ll /* 0x410000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {297237575406452736ll /* 0x420000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {306244774661193728ll /* 0x440000000000000 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {6, 1, 58},			/* shli r3, r1, 58 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {324259173170675712ll /* 0x480000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 55}}			/* shli r3, r2, 55 */
+   },
+  {360287970189639680ll /* 0x500000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 56}}			/* shli r3, r2, 56 */
+   },
+  {432345564227567616ll /* 0x600000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 57}}			/* shli r3, r2, 57 */
+   },
+  {576460752303423479ll /* 0x7fffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {576460752303423480ll /* 0x7fffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {576460752303423481ll /* 0x7fffffffffffff9 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {576460752303423483ll /* 0x7fffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {576460752303423484ll /* 0x7fffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {576460752303423485ll /* 0x7fffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {576460752303423486ll /* 0x7fffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {576460752303423487ll /* 0x7ffffffffffffff */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {576460752303423488ll /* 0x800000000000000 */ ,
+   {{6, 1, 59}}			/* shli r2, r1, 59 */
+   },
+  {576460752303423489ll /* 0x800000000000001 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {576460752303423490ll /* 0x800000000000002 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {576460752303423491ll /* 0x800000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423492ll /* 0x800000000000004 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {576460752303423493ll /* 0x800000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423494ll /* 0x800000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {576460752303423495ll /* 0x800000000000007 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {576460752303423496ll /* 0x800000000000008 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {576460752303423497ll /* 0x800000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423498ll /* 0x80000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {576460752303423504ll /* 0x800000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423520ll /* 0x800000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423552ll /* 0x800000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423616ll /* 0x800000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303423744ll /* 0x800000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303424000ll /* 0x800000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303424512ll /* 0x800000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303425536ll /* 0x800000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303427584ll /* 0x800000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303431680ll /* 0x800000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303439872ll /* 0x800000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303456256ll /* 0x800000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303489024ll /* 0x800000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303554560ll /* 0x800000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303685632ll /* 0x800000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752303947776ll /* 0x800000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752304472064ll /* 0x800000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752305520640ll /* 0x800000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752307617792ll /* 0x800000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752311812096ll /* 0x800000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752320200704ll /* 0x800000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752336977920ll /* 0x800000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752370532352ll /* 0x800000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752437641216ll /* 0x800000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752571858944ll /* 0x800000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460752840294400ll /* 0x800000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460753377165312ll /* 0x800000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460754450907136ll /* 0x800000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460756598390784ll /* 0x800000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460760893358080ll /* 0x800000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460769483292672ll /* 0x800000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460786663161856ll /* 0x800000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460821022900224ll /* 0x800001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576460889742376960ll /* 0x800002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576461027181330432ll /* 0x800004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576461302059237376ll /* 0x800008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576461851815051264ll /* 0x800010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576462951326679040ll /* 0x800020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576465150349934592ll /* 0x800040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576469548396445696ll /* 0x800080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576478344489467904ll /* 0x800100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576495936675512320ll /* 0x800200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576531121047601152ll /* 0x800400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576601489791778816ll /* 0x800800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {576742227280134144ll /* 0x801000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {577023702256844800ll /* 0x802000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {577586652210266112ll /* 0x804000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {578712552117108736ll /* 0x808000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {580964351930793984ll /* 0x810000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {585467951558164480ll /* 0x820000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {594475150812905472ll /* 0x840000000000000 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {612489549322387456ll /* 0x880000000000000 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {6, 1, 59},			/* shli r3, r1, 59 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {648518346341351424ll /* 0x900000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 56}}			/* shli r3, r2, 56 */
+   },
+  {720575940379279360ll /* 0xa00000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 57}}			/* shli r3, r2, 57 */
+   },
+  {864691128455135232ll /* 0xc00000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 58}}			/* shli r3, r2, 58 */
+   },
+  {1152921504606846967ll /* 0xffffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1152921504606846968ll /* 0xffffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1152921504606846969ll /* 0xffffffffffffff9 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {1152921504606846971ll /* 0xffffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1152921504606846972ll /* 0xffffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1152921504606846973ll /* 0xffffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1152921504606846974ll /* 0xffffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1152921504606846975ll /* 0xfffffffffffffff */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1152921504606846976ll /* 0x1000000000000000 */ ,
+   {{6, 1, 60}}			/* shli r2, r1, 60 */
+   },
+  {1152921504606846977ll /* 0x1000000000000001 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1152921504606846978ll /* 0x1000000000000002 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {1152921504606846979ll /* 0x1000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606846980ll /* 0x1000000000000004 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {1152921504606846981ll /* 0x1000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606846982ll /* 0x1000000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1152921504606846983ll /* 0x1000000000000007 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {1152921504606846984ll /* 0x1000000000000008 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {1152921504606846985ll /* 0x1000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606846986ll /* 0x100000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {1152921504606846992ll /* 0x1000000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606847008ll /* 0x1000000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606847040ll /* 0x1000000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606847104ll /* 0x1000000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606847232ll /* 0x1000000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606847488ll /* 0x1000000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606848000ll /* 0x1000000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606849024ll /* 0x1000000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606851072ll /* 0x1000000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606855168ll /* 0x1000000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606863360ll /* 0x1000000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606879744ll /* 0x1000000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606912512ll /* 0x1000000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504606978048ll /* 0x1000000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504607109120ll /* 0x1000000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504607371264ll /* 0x1000000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504607895552ll /* 0x1000000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504608944128ll /* 0x1000000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504611041280ll /* 0x1000000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504615235584ll /* 0x1000000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504623624192ll /* 0x1000000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504640401408ll /* 0x1000000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504673955840ll /* 0x1000000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504741064704ll /* 0x1000000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921504875282432ll /* 0x1000000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921505143717888ll /* 0x1000000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921505680588800ll /* 0x1000000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921506754330624ll /* 0x1000000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921508901814272ll /* 0x1000000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921513196781568ll /* 0x1000000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921521786716160ll /* 0x1000000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921538966585344ll /* 0x1000000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921573326323712ll /* 0x1000001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921642045800448ll /* 0x1000002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152921779484753920ll /* 0x1000004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152922054362660864ll /* 0x1000008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152922604118474752ll /* 0x1000010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152923703630102528ll /* 0x1000020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152925902653358080ll /* 0x1000040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152930300699869184ll /* 0x1000080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152939096792891392ll /* 0x1000100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152956688978935808ll /* 0x1000200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1152991873351024640ll /* 0x1000400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1153062242095202304ll /* 0x1000800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1153202979583557632ll /* 0x1001000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1153484454560268288ll /* 0x1002000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1154047404513689600ll /* 0x1004000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1155173304420532224ll /* 0x1008000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1157425104234217472ll /* 0x1010000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1161928703861587968ll /* 0x1020000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1170935903116328960ll /* 0x1040000000000000 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1188950301625810944ll /* 0x1080000000000000 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1224979098644774912ll /* 0x1100000000000000 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {6, 1, 60},			/* shli r3, r1, 60 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1297036692682702848ll /* 0x1200000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 57}}			/* shli r3, r2, 57 */
+   },
+  {1441151880758558720ll /* 0x1400000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 58}}			/* shli r3, r2, 58 */
+   },
+  {1729382256910270464ll /* 0x1800000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 59}}			/* shli r3, r2, 59 */
+   },
+  {2305843009213693943ll /* 0x1ffffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2305843009213693944ll /* 0x1ffffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2305843009213693945ll /* 0x1ffffffffffffff9 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {2305843009213693947ll /* 0x1ffffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2305843009213693948ll /* 0x1ffffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2305843009213693949ll /* 0x1ffffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2305843009213693950ll /* 0x1ffffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2305843009213693951ll /* 0x1fffffffffffffff */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2305843009213693952ll /* 0x2000000000000000 */ ,
+   {{6, 1, 61}}			/* shli r2, r1, 61 */
+   },
+  {2305843009213693953ll /* 0x2000000000000001 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2305843009213693954ll /* 0x2000000000000002 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {2305843009213693955ll /* 0x2000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213693956ll /* 0x2000000000000004 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {2305843009213693957ll /* 0x2000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213693958ll /* 0x2000000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2305843009213693959ll /* 0x2000000000000007 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {2305843009213693960ll /* 0x2000000000000008 */ ,
+   {{6, 1, 61},			/* shli r2, r1, 61 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {2305843009213693961ll /* 0x2000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213693962ll /* 0x200000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {2305843009213693968ll /* 0x2000000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213693984ll /* 0x2000000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213694016ll /* 0x2000000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213694080ll /* 0x2000000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213694208ll /* 0x2000000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213694464ll /* 0x2000000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213694976ll /* 0x2000000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213696000ll /* 0x2000000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213698048ll /* 0x2000000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213702144ll /* 0x2000000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213710336ll /* 0x2000000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213726720ll /* 0x2000000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213759488ll /* 0x2000000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213825024ll /* 0x2000000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009213956096ll /* 0x2000000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009214218240ll /* 0x2000000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009214742528ll /* 0x2000000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009215791104ll /* 0x2000000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009217888256ll /* 0x2000000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009222082560ll /* 0x2000000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009230471168ll /* 0x2000000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009247248384ll /* 0x2000000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009280802816ll /* 0x2000000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009347911680ll /* 0x2000000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009482129408ll /* 0x2000000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843009750564864ll /* 0x2000000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843010287435776ll /* 0x2000000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843011361177600ll /* 0x2000000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843013508661248ll /* 0x2000000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843017803628544ll /* 0x2000000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843026393563136ll /* 0x2000000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843043573432320ll /* 0x2000000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843077933170688ll /* 0x2000001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843146652647424ll /* 0x2000002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843284091600896ll /* 0x2000004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305843558969507840ll /* 0x2000008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305844108725321728ll /* 0x2000010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305845208236949504ll /* 0x2000020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305847407260205056ll /* 0x2000040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305851805306716160ll /* 0x2000080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305860601399738368ll /* 0x2000100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305878193585782784ll /* 0x2000200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305913377957871616ll /* 0x2000400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2305983746702049280ll /* 0x2000800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2306124484190404608ll /* 0x2001000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2306405959167115264ll /* 0x2002000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2306968909120536576ll /* 0x2004000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2308094809027379200ll /* 0x2008000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2310346608841064448ll /* 0x2010000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2314850208468434944ll /* 0x2020000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2323857407723175936ll /* 0x2040000000000000 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2341871806232657920ll /* 0x2080000000000000 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2377900603251621888ll /* 0x2100000000000000 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2449958197289549824ll /* 0x2200000000000000 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {6, 1, 61},			/* shli r3, r1, 61 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2594073385365405696ll /* 0x2400000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 58}}			/* shli r3, r2, 58 */
+   },
+  {2882303761517117440ll /* 0x2800000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 59}}			/* shli r3, r2, 59 */
+   },
+  {3458764513820540928ll /* 0x3000000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 60}}			/* shli r3, r2, 60 */
+   },
+  {4611686018427387895ll /* 0x3ffffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4611686018427387896ll /* 0x3ffffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4611686018427387897ll /* 0x3ffffffffffffff9 */ ,
+   {{6, 1, 59},			/* shli r2, r1, 59 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {4611686018427387899ll /* 0x3ffffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4611686018427387900ll /* 0x3ffffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4611686018427387901ll /* 0x3ffffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4611686018427387902ll /* 0x3ffffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4611686018427387903ll /* 0x3fffffffffffffff */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4611686018427387904ll /* 0x4000000000000000 */ ,
+   {{6, 1, 62}}			/* shli r2, r1, 62 */
+   },
+  {4611686018427387905ll /* 0x4000000000000001 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4611686018427387906ll /* 0x4000000000000002 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {3, 1, 2}}			/* shl1add r3, r1, r2 */
+   },
+  {4611686018427387907ll /* 0x4000000000000003 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427387908ll /* 0x4000000000000004 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {4, 1, 2}}			/* shl2add r3, r1, r2 */
+   },
+  {4611686018427387909ll /* 0x4000000000000005 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427387910ll /* 0x4000000000000006 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4611686018427387911ll /* 0x4000000000000007 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* shl3add r4, r1, r3 */
+   },
+  {4611686018427387912ll /* 0x4000000000000008 */ ,
+   {{6, 1, 62},			/* shli r2, r1, 62 */
+    {5, 1, 2}}			/* shl3add r3, r1, r2 */
+   },
+  {4611686018427387913ll /* 0x4000000000000009 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427387914ll /* 0x400000000000000a */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {3, 2, 3}}			/* shl1add r4, r2, r3 */
+   },
+  {4611686018427387920ll /* 0x4000000000000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427387936ll /* 0x4000000000000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427387968ll /* 0x4000000000000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427388032ll /* 0x4000000000000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427388160ll /* 0x4000000000000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427388416ll /* 0x4000000000000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427388928ll /* 0x4000000000000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427389952ll /* 0x4000000000000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427392000ll /* 0x4000000000001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427396096ll /* 0x4000000000002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427404288ll /* 0x4000000000004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427420672ll /* 0x4000000000008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427453440ll /* 0x4000000000010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427518976ll /* 0x4000000000020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427650048ll /* 0x4000000000040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018427912192ll /* 0x4000000000080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018428436480ll /* 0x4000000000100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018429485056ll /* 0x4000000000200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018431582208ll /* 0x4000000000400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018435776512ll /* 0x4000000000800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018444165120ll /* 0x4000000001000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018460942336ll /* 0x4000000002000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018494496768ll /* 0x4000000004000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018561605632ll /* 0x4000000008000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018695823360ll /* 0x4000000010000000 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686018964258816ll /* 0x4000000020000000 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686019501129728ll /* 0x4000000040000000 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686020574871552ll /* 0x4000000080000000 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686022722355200ll /* 0x4000000100000000 */ ,
+   {{6, 1, 32},			/* shli r2, r1, 32 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686027017322496ll /* 0x4000000200000000 */ ,
+   {{6, 1, 33},			/* shli r2, r1, 33 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686035607257088ll /* 0x4000000400000000 */ ,
+   {{6, 1, 34},			/* shli r2, r1, 34 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686052787126272ll /* 0x4000000800000000 */ ,
+   {{6, 1, 35},			/* shli r2, r1, 35 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686087146864640ll /* 0x4000001000000000 */ ,
+   {{6, 1, 36},			/* shli r2, r1, 36 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686155866341376ll /* 0x4000002000000000 */ ,
+   {{6, 1, 37},			/* shli r2, r1, 37 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686293305294848ll /* 0x4000004000000000 */ ,
+   {{6, 1, 38},			/* shli r2, r1, 38 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611686568183201792ll /* 0x4000008000000000 */ ,
+   {{6, 1, 39},			/* shli r2, r1, 39 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611687117939015680ll /* 0x4000010000000000 */ ,
+   {{6, 1, 40},			/* shli r2, r1, 40 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611688217450643456ll /* 0x4000020000000000 */ ,
+   {{6, 1, 41},			/* shli r2, r1, 41 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611690416473899008ll /* 0x4000040000000000 */ ,
+   {{6, 1, 42},			/* shli r2, r1, 42 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611694814520410112ll /* 0x4000080000000000 */ ,
+   {{6, 1, 43},			/* shli r2, r1, 43 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611703610613432320ll /* 0x4000100000000000 */ ,
+   {{6, 1, 44},			/* shli r2, r1, 44 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611721202799476736ll /* 0x4000200000000000 */ ,
+   {{6, 1, 45},			/* shli r2, r1, 45 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611756387171565568ll /* 0x4000400000000000 */ ,
+   {{6, 1, 46},			/* shli r2, r1, 46 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611826755915743232ll /* 0x4000800000000000 */ ,
+   {{6, 1, 47},			/* shli r2, r1, 47 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4611967493404098560ll /* 0x4001000000000000 */ ,
+   {{6, 1, 48},			/* shli r2, r1, 48 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4612248968380809216ll /* 0x4002000000000000 */ ,
+   {{6, 1, 49},			/* shli r2, r1, 49 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4612811918334230528ll /* 0x4004000000000000 */ ,
+   {{6, 1, 50},			/* shli r2, r1, 50 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4613937818241073152ll /* 0x4008000000000000 */ ,
+   {{6, 1, 51},			/* shli r2, r1, 51 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4616189618054758400ll /* 0x4010000000000000 */ ,
+   {{6, 1, 52},			/* shli r2, r1, 52 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4620693217682128896ll /* 0x4020000000000000 */ ,
+   {{6, 1, 53},			/* shli r2, r1, 53 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4629700416936869888ll /* 0x4040000000000000 */ ,
+   {{6, 1, 54},			/* shli r2, r1, 54 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4647714815446351872ll /* 0x4080000000000000 */ ,
+   {{6, 1, 55},			/* shli r2, r1, 55 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4683743612465315840ll /* 0x4100000000000000 */ ,
+   {{6, 1, 56},			/* shli r2, r1, 56 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4755801206503243776ll /* 0x4200000000000000 */ ,
+   {{6, 1, 57},			/* shli r2, r1, 57 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4899916394579099648ll /* 0x4400000000000000 */ ,
+   {{6, 1, 58},			/* shli r2, r1, 58 */
+    {6, 1, 62},			/* shli r3, r1, 62 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {5188146770730811392ll /* 0x4800000000000000 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 2, 59}}			/* shli r3, r2, 59 */
+   },
+  {5764607523034234880ll /* 0x5000000000000000 */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 2, 60}}			/* shli r3, r2, 60 */
+   },
+  {6917529027641081855ll /* 0x5fffffffffffffff */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 61},			/* shli r3, r2, 61 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {6917529027641081856ll /* 0x6000000000000000 */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 2, 61}}			/* shli r3, r2, 61 */
+   },
+  {9223372036854775799ll /* 0x7ffffffffffffff7 */ ,
+   {{5, 1, 1},			/* shl3add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9223372036854775800ll /* 0x7ffffffffffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9223372036854775801ll /* 0x7ffffffffffffff9 */ ,
+   {{6, 1, 60},			/* shli r2, r1, 60 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* shl3add r4, r3, r1 */
+   },
+  {9223372036854775803ll /* 0x7ffffffffffffffb */ ,
+   {{4, 1, 1},			/* shl2add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9223372036854775804ll /* 0x7ffffffffffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9223372036854775805ll /* 0x7ffffffffffffffd */ ,
+   {{3, 1, 1},			/* shl1add r2, r1, r1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9223372036854775806ll /* 0x7ffffffffffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 63},			/* shli r3, r1, 63 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {9223372036854775807ll /* 0x7fffffffffffffff */ ,
+   {{6, 1, 63},			/* shli r2, r1, 63 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   }
+};
+
+const int tilegx_multiply_insn_seq_table_size =
+  (int) (sizeof tilegx_multiply_insn_seq_table
+         / sizeof tilegx_multiply_insn_seq_table[0]);
diff --git a/gcc-4.9/gcc/config/tilegx/predicates.md b/gcc-4.9/gcc/config/tilegx/predicates.md
new file mode 100644
index 000000000..5bacc3deb
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/predicates.md
@@ -0,0 +1,298 @@
+;; Predicate definitions for Tilera TILE-Gx.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is the zero constant for MODE.
+(define_predicate "const_zero_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (and (ior (match_operand 0 "register_operand")
+	    (match_operand 0 "const_zero_operand"))
+       (match_test "GET_MODE_SIZE (mode) <= UNITS_PER_WORD")))
+
+; Return 1 if OP is a valid Pmode pointer.
+(define_predicate "pointer_operand"
+  (and (match_operand 0 "address_operand")
+       (ior (match_operand 0 "pmode_register_operand")
+	    (match_operand 0 "const_zero_operand"))))
+
+; Return 1 if OP is a network register identifier.
+(define_predicate "netreg_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 5)")))
+
+; Return 1 if OP is an unsigned 6-bit constant.
+(define_predicate "u6bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == (INTVAL (op) & 0x3F)")))
+
+;; Return 1 if OP is an unsigned 16-bit constant.
+(define_predicate "u16bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)INTVAL (op) < (1U << 16)")))
+
+;; Return 1 if OP is a signed 8-bit constant.
+(define_predicate "s8bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_I (op)")))
+
+;; Return 1 if OP is a signed 16-bit constant.
+(define_predicate "s16bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_J (op)")))
+
+;; Return 1 if OP is an unsigned 14-bit constant.
+(define_predicate "u14bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)INTVAL (op) < (1U << 14)")))
+
+;; Return 1 if OP is a constant or any register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+;; Returns 1 if OP is a "last" unspec wrapper for a symbol, got, or
+;; tls reference.
+(define_predicate "const_last_symbolic_operand"
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op,0)) == UNSPEC")
+       (ior (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_LAST")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW2_LAST")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST_PCREL")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW2_LAST_PCREL")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_LAST_GOT")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST_GOT")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST_TLS_GD")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST_TLS_IE")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST_TLS_LE")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_LAST_PLT_PCREL")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW2_LAST_PLT_PCREL"))))
+
+;; Returns 1 if OP is an unspec wrapper for a symbol, got, or tls
+;; reference.
+(define_predicate "const_symbolic_operand"
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op,0)) == UNSPEC")
+       (ior (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW2")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW3")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_PCREL")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_PCREL")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_GOT")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_TLS_GD")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_TLS_IE")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_TLS_LE")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW0_PLT_PCREL")
+	    (match_test "XINT (XEXP (op,0), 1) == UNSPEC_HW1_PLT_PCREL"))))
+
+;; Return 1 if OP is a 8-element vector constant with identical signed
+;; 8-bit elements or any register.
+(define_predicate "reg_or_v8s8bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_vector")
+	    (match_test "CONST_VECTOR_NUNITS (op) == 8
+                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)"))))
+
+;; Return 1 if OP is a 4-element vector constant with identical signed
+;; 8-bit elements or any register.
+(define_predicate "reg_or_v4s8bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_vector")
+	    (match_test "CONST_VECTOR_NUNITS (op) == 4
+                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))))
+
+;; Return 1 if the operand is a valid second operand to an add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_J (op)")
+    (ior (match_operand 0 "register_operand")
+	 (match_operand 0 "const_last_symbolic_operand"))))
+
+;; Return 1 if the operand is a register or signed 8-bit immediate operand.
+(define_predicate "reg_or_s8bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a register or unsigned 5-bit immediate operand.
+(define_predicate "reg_or_u5bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) == (INTVAL (op) & 0x1F)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a register or unsigned 6-bit immediate operand.
+(define_predicate "reg_or_u6bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) == (INTVAL (op) & 0x3F)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 for an operand suitable for ANDing with a register.
+(define_predicate "and_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op) || satisfies_constraint_M (op)")
+    (match_operand 0 "register_operand")))
+
+; Return 1 if the operand is 2, 4 or 8.
+(define_predicate "cint_248_operand"
+  (and (match_code "const_int")
+       (match_test
+        "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8")))
+
+;; Return true if OP is a TLS symbolic operand.
+(define_predicate "tls_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) !=  TLS_MODEL_NONE")))
+
+;; Return true if OP is a symbolic operand for the TLS Global Dynamic model.
+(define_predicate "tls_gd_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Dynamic model.
+(define_predicate "tls_ld_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand that can be used for the
+;; TLS Initial Exec model.
+(define_predicate "tls_ie_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (ior (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC")
+            (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC"))))
+
+;; Return true if OP is a symbolic operand for the TLS Local Exec model.
+(define_predicate "tls_le_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC")))
+
+;; Returns true if OP is any general operand except for an
+;; auto-incrementing address operand.
+(define_predicate "nonautoinc_operand"
+  (and (match_operand 0 "general_operand")
+       (not (ior (match_code "pre_dec") (match_code "pre_inc")
+		 (match_code "post_dec") (match_code "post_inc")
+		 (match_code "post_modify") (match_code "pre_modify")))))
+ 
+;; Returns true if OP is a non-auto-incrementing memory operand.
+(define_predicate "nonautoincmem_operand"
+  (match_operand 0 "memory_operand")
+{
+  return nonautoinc_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+})
+
+;; Returns true if OP is a non-auto-incrementing memory, general
+;; operand.
+(define_predicate "nonautoincmem_general_operand"
+  (match_operand 0 "general_operand")
+{
+  if (memory_operand (op, mode))
+    return nonautoinc_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+  else
+    return true;
+})
+ 
+;; Returns true if OP is a non-auto-incrementing memory, non-immediate
+;; operand.
+(define_predicate "nonautoincmem_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  if (memory_operand (op, mode))
+    return nonautoinc_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+  else
+    return true;
+})
+ 
+;; Return true if OP is a valid operand for the source of a move insn.
+(define_predicate "move_operand"
+  (match_operand 0 "general_operand")
+{
+  /* If both modes are non-void they must be the same.  */
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      return (satisfies_constraint_J (op)
+              || satisfies_constraint_K (op)
+              || (mode == DImode &&
+                  (satisfies_constraint_N (op)
+                   || satisfies_constraint_P (op))));
+
+    case MEM:
+      return memory_address_p (mode, XEXP (op, 0));
+
+    case CONST:
+      return const_last_symbolic_operand (op, mode);
+
+    default:
+      return register_operand (op, mode);
+    }
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op,0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op,0), 0)) == LABEL_REF)
+			 && CONST_INT_P (XEXP (XEXP (op,0), 1))"))))
+
+;; Return 1 for an unsigned 16 bit or a const symbolc operand.
+(define_predicate "u16bit_or_const_symbolic_operand"
+  (ior (match_operand 0 "u16bit_cint_operand")
+       (match_operand 0 "const_symbolic_operand")))
+
+;; Return true if OP is an address suitable for a call insn.
+;; Call insn on TILE can take a PC-relative constant address
+;; or any regular memory address.
+(define_predicate "call_address_operand"
+  (ior (match_operand 0 "symbolic_operand")
+       (match_test "memory_address_p (Pmode, op)")))
+
+;; Return true if OP is an operand suitable for a call insn.
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "call_address_operand (XEXP (op, 0), mode)")))
+
+;; Return 1 if OP is a signed comparison operation.
+;; We can use these directly in compares against zero.
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt"))
+
+;; Return 1 if OP is a equal or not-equal operation.
+(define_predicate "eqne_operator"
+  (match_code "eq,ne"))
diff --git a/gcc-4.9/gcc/config/tilegx/sync.md b/gcc-4.9/gcc/config/tilegx/sync.md
new file mode 100644
index 000000000..b32b662e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/sync.md
@@ -0,0 +1,227 @@
+;; GCC machine description for Tilera TILE-Gx synchronization
+;; instructions.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_code_iterator fetchop [plus ior and])
+(define_code_attr fetchop_name [(plus "add") (ior "or") (and "and")])
+
+(define_insn "mtspr_cmpexch<mode>"
+  [(set (reg:I48MODE TILEGX_CMPEXCH_REG)
+        (unspec_volatile:I48MODE
+         [(match_operand:I48MODE 0 "reg_or_0_operand" "rO")]
+         UNSPEC_SPR_MOVE))]
+  ""
+  "mtspr\tCMPEXCH_VALUE, %r0"
+  [(set_attr "type" "X1")])
+
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:DI 0 "register_operand" "")		;; bool output
+   (match_operand:I48MODE 1 "register_operand" "")	;; val output
+   (match_operand:I48MODE 2 "nonautoincmem_operand" "") ;; memory
+   (match_operand:I48MODE 3 "reg_or_0_operand" "")	;; expected value
+   (match_operand:I48MODE 4 "reg_or_0_operand" "")	;; desired value
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; mod_s
+   (match_operand:SI 7 "const_int_operand" "")]		;; mod_f
+  ""
+{
+  enum memmodel mod_s = (enum memmodel) INTVAL (operands[6]);
+
+  if (operands[3] != const0_rtx)
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+  if (operands[4] != const0_rtx)
+    operands[4] = force_reg (<MODE>mode, operands[4]);
+
+  tilegx_pre_atomic_barrier (mod_s);
+  emit_insn (gen_mtspr_cmpexch<mode> (operands[3]));
+  emit_insn (gen_atomic_compare_and_swap_bare<mode> (operands[1], operands[2],
+                                                   operands[4]));
+  tilegx_post_atomic_barrier (mod_s);
+  emit_insn (gen_insn_cmpeq_<mode>di (operands[0], operands[1], operands[3]));
+  DONE;
+})
+
+
+(define_insn "atomic_compare_and_swap_bare<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (match_operand:I48MODE 1 "nonautoincmem_operand" "+U"))
+   (set (match_dup 1)
+        (unspec_volatile:I48MODE
+         [(match_dup 1)
+         (reg:I48MODE TILEGX_CMPEXCH_REG)
+         (match_operand:I48MODE 2 "reg_or_0_operand" "rO")]
+         UNSPEC_CMPXCHG))]
+  ""
+  "cmpexch<four_if_si>\t%0, %1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:I48MODE 0 "register_operand" "")      ;; result
+   (match_operand:I48MODE 1 "nonautoincmem_operand" "") ;; memory
+   (match_operand:I48MODE 2 "reg_or_0_operand" "")      ;; input
+   (match_operand:SI 3 "const_int_operand" "")]         ;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  tilegx_pre_atomic_barrier (model);
+  emit_insn (gen_atomic_exchange_bare<mode> (operands[0], operands[1],
+                                             operands[2]));
+  tilegx_post_atomic_barrier (model);
+  DONE;
+})
+
+
+(define_insn "atomic_exchange_bare<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(match_operand:I48MODE 1 "nonautoincmem_operand" "+U"))
+   (set (match_dup 1)
+	(unspec_volatile:I48MODE
+	 [(match_operand:I48MODE 2 "reg_or_0_operand" "rO")]
+	 UNSPEC_XCHG))]
+  ""
+  "exch<four_if_si>\t%0, %1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+
+(define_expand "atomic_fetch_<fetchop_name><mode>"
+  [(match_operand:I48MODE 0 "register_operand" "")      ;; result
+   (match_operand:I48MODE 1 "nonautoincmem_operand" "") ;; memory
+   (unspec_volatile:I48MODE
+    [(fetchop:I48MODE
+      (match_dup 1)
+      (match_operand:I48MODE 2 "reg_or_0_operand" ""))] ;; value
+    UNSPEC_ATOMIC)
+   (match_operand:SI 3 "const_int_operand" "")]         ;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  tilegx_pre_atomic_barrier (model);
+  emit_insn (gen_atomic_fetch_<fetchop_name>_bare<mode> (operands[0],
+                                                         operands[1],
+                                                         operands[2]));
+  tilegx_post_atomic_barrier (model);
+  DONE;
+})
+
+
+(define_insn "atomic_fetch_<fetchop_name>_bare<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(match_operand:I48MODE 1 "nonautoincmem_operand" "+U"))
+   (set (match_dup 1)
+	(unspec_volatile:I48MODE
+	 [(fetchop:I48MODE
+	   (match_dup 1)
+	   (match_operand:I48MODE 2 "reg_or_0_operand" "rO"))]
+	   UNSPEC_ATOMIC))]
+  ""
+  "fetch<fetchop_name><four_if_si>\t%0, %1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+
+(define_expand "atomic_fetch_sub<mode>"
+  [(match_operand:I48MODE 0 "register_operand" "")      ;; result
+   (match_operand:I48MODE 1 "nonautoincmem_operand" "") ;; memory
+   (unspec_volatile:I48MODE
+    [(minus:I48MODE
+      (match_dup 1)
+      (match_operand:I48MODE 2 "reg_or_0_operand" ""))] ;; value
+    UNSPEC_ATOMIC)
+   (match_operand:SI 3 "const_int_operand" "")]         ;; model
+  ""
+{
+  rtx addend;
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  if (operands[2] != const0_rtx)
+    {
+       addend = gen_reg_rtx (<MODE>mode);
+       emit_move_insn (addend,
+                       gen_rtx_MINUS (<MODE>mode, const0_rtx, operands[2]));
+    }
+  else
+    addend = operands[2];
+
+  tilegx_pre_atomic_barrier (model);
+  emit_insn (gen_atomic_fetch_add_bare<mode> (operands[0],
+                                              operands[1],
+                                              addend));
+  tilegx_post_atomic_barrier (model);
+  DONE;
+})
+
+
+(define_expand "atomic_test_and_set"
+  [(match_operand:QI 0 "register_operand" "")           ;; bool output
+   (match_operand:QI 1 "nonautoincmem_operand" "+U")    ;; memory
+   (match_operand:SI 2 "const_int_operand" "")]         ;; model
+  ""
+{
+  rtx addr, aligned_addr, aligned_mem, offset, word, shmt, tmp;
+  rtx result = operands[0];
+  rtx mem = operands[1];
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  addr = force_reg (Pmode, XEXP (mem, 0));
+
+  aligned_addr = gen_reg_rtx (Pmode);
+  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, GEN_INT (-8)));
+
+  aligned_mem = change_address (mem, DImode, aligned_addr);
+  set_mem_alias_set (aligned_mem, 0);
+
+  tmp = gen_reg_rtx (Pmode);
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_move_insn (gen_lowpart (DImode, tmp),
+                      gen_rtx_NOT (DImode, gen_lowpart (DImode, addr)));
+    }
+  else
+    {
+      tmp = addr;
+    }
+
+  offset = gen_reg_rtx (DImode);
+  emit_move_insn (offset, gen_rtx_AND (DImode, gen_lowpart (DImode, tmp),
+                                       GEN_INT (7)));
+
+  tmp = gen_reg_rtx (DImode);
+  emit_move_insn (tmp, GEN_INT (1));
+
+  shmt = gen_reg_rtx (DImode);
+  emit_move_insn (shmt, gen_rtx_ASHIFT (DImode, offset, GEN_INT (3)));
+
+  word = gen_reg_rtx (DImode);
+  emit_move_insn (word, gen_rtx_ASHIFT (DImode, tmp,
+                                        gen_lowpart (SImode, shmt)));
+
+  tmp = gen_reg_rtx (DImode);
+  tilegx_pre_atomic_barrier (model);
+  emit_insn (gen_atomic_fetch_or_baredi (tmp, aligned_mem, word));
+  tilegx_post_atomic_barrier (model);
+
+  emit_move_insn (gen_lowpart (DImode, result),
+                  gen_rtx_LSHIFTRT (DImode, tmp,
+                                    gen_lowpart (SImode, shmt)));
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/tilegx/t-tilegx b/gcc-4.9/gcc/config/tilegx/t-tilegx
new file mode 100644
index 000000000..4e3203cd1
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/t-tilegx
@@ -0,0 +1,21 @@
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib ../lib32
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+tilegx-c.o: $(srcdir)/config/tilegx/tilegx-c.c \
+    $(CONFIG_H) $(SYSTEM_H) coretypes.h $(MACHMODE_H) \
+    $(TM_H) $(TM_P_H) $(CPPLIB_H) $(TREE_H) $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+$(srcdir)/config/tilegx/mul-tables.c: \
+    $(srcdir)/config/tilepro/gen-mul-tables.cc
+	$(CC_FOR_BUILD) $(BUILD_CPPFLAGS) -O2 -o gen-mul-tables -lstdc++ $<;
+	./gen-mul-tables > $@
+
+mul-tables.o: $(srcdir)/config/tilegx/mul-tables.c \
+    $(CONFIG_H) $(SYSTEM_H) coretypes.h $(EXPR_H) $(OPTABS_H) \
+    $(srcdir)/config/tilegx/tilegx-multiply.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-builtins.h b/gcc-4.9/gcc/config/tilegx/tilegx-builtins.h
new file mode 100644
index 000000000..f2b031b08
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-builtins.h
@@ -0,0 +1,325 @@
+/* Enum for builtin intrinsics for TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TILEGX_BUILTINS_H
+#define GCC_TILEGX_BUILTINS_H
+
+enum tilegx_builtin
+{
+  TILEGX_INSN_ADD,
+  TILEGX_INSN_ADDX,
+  TILEGX_INSN_ADDXSC,
+  TILEGX_INSN_AND,
+  TILEGX_INSN_BFEXTS,
+  TILEGX_INSN_BFEXTU,
+  TILEGX_INSN_BFINS,
+  TILEGX_INSN_CLZ,
+  TILEGX_INSN_CMOVEQZ,
+  TILEGX_INSN_CMOVNEZ,
+  TILEGX_INSN_CMPEQ,
+  TILEGX_INSN_CMPEXCH,
+  TILEGX_INSN_CMPEXCH4,
+  TILEGX_INSN_CMPLES,
+  TILEGX_INSN_CMPLEU,
+  TILEGX_INSN_CMPLTS,
+  TILEGX_INSN_CMPLTU,
+  TILEGX_INSN_CMPNE,
+  TILEGX_INSN_CMUL,
+  TILEGX_INSN_CMULA,
+  TILEGX_INSN_CMULAF,
+  TILEGX_INSN_CMULF,
+  TILEGX_INSN_CMULFR,
+  TILEGX_INSN_CMULH,
+  TILEGX_INSN_CMULHR,
+  TILEGX_INSN_CRC32_32,
+  TILEGX_INSN_CRC32_8,
+  TILEGX_INSN_CTZ,
+  TILEGX_INSN_DBLALIGN,
+  TILEGX_INSN_DBLALIGN2,
+  TILEGX_INSN_DBLALIGN4,
+  TILEGX_INSN_DBLALIGN6,
+  TILEGX_INSN_DRAIN,
+  TILEGX_INSN_DTLBPR,
+  TILEGX_INSN_EXCH,
+  TILEGX_INSN_EXCH4,
+  TILEGX_INSN_FDOUBLE_ADD_FLAGS,
+  TILEGX_INSN_FDOUBLE_ADDSUB,
+  TILEGX_INSN_FDOUBLE_MUL_FLAGS,
+  TILEGX_INSN_FDOUBLE_PACK1,
+  TILEGX_INSN_FDOUBLE_PACK2,
+  TILEGX_INSN_FDOUBLE_SUB_FLAGS,
+  TILEGX_INSN_FDOUBLE_UNPACK_MAX,
+  TILEGX_INSN_FDOUBLE_UNPACK_MIN,
+  TILEGX_INSN_FETCHADD,
+  TILEGX_INSN_FETCHADD4,
+  TILEGX_INSN_FETCHADDGEZ,
+  TILEGX_INSN_FETCHADDGEZ4,
+  TILEGX_INSN_FETCHAND,
+  TILEGX_INSN_FETCHAND4,
+  TILEGX_INSN_FETCHOR,
+  TILEGX_INSN_FETCHOR4,
+  TILEGX_INSN_FINV,
+  TILEGX_INSN_FLUSH,
+  TILEGX_INSN_FLUSHWB,
+  TILEGX_INSN_FNOP,
+  TILEGX_INSN_FSINGLE_ADD1,
+  TILEGX_INSN_FSINGLE_ADDSUB2,
+  TILEGX_INSN_FSINGLE_MUL1,
+  TILEGX_INSN_FSINGLE_MUL2,
+  TILEGX_INSN_FSINGLE_PACK1,
+  TILEGX_INSN_FSINGLE_PACK2,
+  TILEGX_INSN_FSINGLE_SUB1,
+  TILEGX_INSN_ICOH,
+  TILEGX_INSN_ILL,
+  TILEGX_INSN_INFO,
+  TILEGX_INSN_INFOL,
+  TILEGX_INSN_INV,
+  TILEGX_INSN_LD,
+  TILEGX_INSN_LD1S,
+  TILEGX_INSN_LD1U,
+  TILEGX_INSN_LD2S,
+  TILEGX_INSN_LD2U,
+  TILEGX_INSN_LD4S,
+  TILEGX_INSN_LD4U,
+  TILEGX_INSN_LDNA,
+  TILEGX_INSN_LDNT,
+  TILEGX_INSN_LDNT1S,
+  TILEGX_INSN_LDNT1U,
+  TILEGX_INSN_LDNT2S,
+  TILEGX_INSN_LDNT2U,
+  TILEGX_INSN_LDNT4S,
+  TILEGX_INSN_LDNT4U,
+  TILEGX_INSN_LD_L2,
+  TILEGX_INSN_LD1S_L2,
+  TILEGX_INSN_LD1U_L2,
+  TILEGX_INSN_LD2S_L2,
+  TILEGX_INSN_LD2U_L2,
+  TILEGX_INSN_LD4S_L2,
+  TILEGX_INSN_LD4U_L2,
+  TILEGX_INSN_LDNA_L2,
+  TILEGX_INSN_LDNT_L2,
+  TILEGX_INSN_LDNT1S_L2,
+  TILEGX_INSN_LDNT1U_L2,
+  TILEGX_INSN_LDNT2S_L2,
+  TILEGX_INSN_LDNT2U_L2,
+  TILEGX_INSN_LDNT4S_L2,
+  TILEGX_INSN_LDNT4U_L2,
+  TILEGX_INSN_LD_MISS,
+  TILEGX_INSN_LD1S_MISS,
+  TILEGX_INSN_LD1U_MISS,
+  TILEGX_INSN_LD2S_MISS,
+  TILEGX_INSN_LD2U_MISS,
+  TILEGX_INSN_LD4S_MISS,
+  TILEGX_INSN_LD4U_MISS,
+  TILEGX_INSN_LDNA_MISS,
+  TILEGX_INSN_LDNT_MISS,
+  TILEGX_INSN_LDNT1S_MISS,
+  TILEGX_INSN_LDNT1U_MISS,
+  TILEGX_INSN_LDNT2S_MISS,
+  TILEGX_INSN_LDNT2U_MISS,
+  TILEGX_INSN_LDNT4S_MISS,
+  TILEGX_INSN_LDNT4U_MISS,
+  TILEGX_INSN_LNK,
+  TILEGX_INSN_MF,
+  TILEGX_INSN_MFSPR,
+  TILEGX_INSN_MM,
+  TILEGX_INSN_MNZ,
+  TILEGX_INSN_MOVE,
+  TILEGX_INSN_MTSPR,
+  TILEGX_INSN_MUL_HS_HS,
+  TILEGX_INSN_MUL_HS_HU,
+  TILEGX_INSN_MUL_HS_LS,
+  TILEGX_INSN_MUL_HS_LU,
+  TILEGX_INSN_MUL_HU_HU,
+  TILEGX_INSN_MUL_HU_LS,
+  TILEGX_INSN_MUL_HU_LU,
+  TILEGX_INSN_MUL_LS_LS,
+  TILEGX_INSN_MUL_LS_LU,
+  TILEGX_INSN_MUL_LU_LU,
+  TILEGX_INSN_MULA_HS_HS,
+  TILEGX_INSN_MULA_HS_HU,
+  TILEGX_INSN_MULA_HS_LS,
+  TILEGX_INSN_MULA_HS_LU,
+  TILEGX_INSN_MULA_HU_HU,
+  TILEGX_INSN_MULA_HU_LS,
+  TILEGX_INSN_MULA_HU_LU,
+  TILEGX_INSN_MULA_LS_LS,
+  TILEGX_INSN_MULA_LS_LU,
+  TILEGX_INSN_MULA_LU_LU,
+  TILEGX_INSN_MULAX,
+  TILEGX_INSN_MULX,
+  TILEGX_INSN_MZ,
+  TILEGX_INSN_NAP,
+  TILEGX_INSN_NOP,
+  TILEGX_INSN_NOR,
+  TILEGX_INSN_OR,
+  TILEGX_INSN_PCNT,
+  TILEGX_INSN_PREFETCH_L1,
+  TILEGX_INSN_PREFETCH_L1_FAULT,
+  TILEGX_INSN_PREFETCH_L2,
+  TILEGX_INSN_PREFETCH_L2_FAULT,
+  TILEGX_INSN_PREFETCH_L3,
+  TILEGX_INSN_PREFETCH_L3_FAULT,
+  TILEGX_INSN_REVBITS,
+  TILEGX_INSN_REVBYTES,
+  TILEGX_INSN_ROTL,
+  TILEGX_INSN_SHL,
+  TILEGX_INSN_SHL16INSLI,
+  TILEGX_INSN_SHL1ADD,
+  TILEGX_INSN_SHL1ADDX,
+  TILEGX_INSN_SHL2ADD,
+  TILEGX_INSN_SHL2ADDX,
+  TILEGX_INSN_SHL3ADD,
+  TILEGX_INSN_SHL3ADDX,
+  TILEGX_INSN_SHLX,
+  TILEGX_INSN_SHRS,
+  TILEGX_INSN_SHRU,
+  TILEGX_INSN_SHRUX,
+  TILEGX_INSN_SHUFFLEBYTES,
+  TILEGX_INSN_SHUFFLEBYTES1,
+  TILEGX_INSN_ST,
+  TILEGX_INSN_ST1,
+  TILEGX_INSN_ST2,
+  TILEGX_INSN_ST4,
+  TILEGX_INSN_STNT,
+  TILEGX_INSN_STNT1,
+  TILEGX_INSN_STNT2,
+  TILEGX_INSN_STNT4,
+  TILEGX_INSN_SUB,
+  TILEGX_INSN_SUBX,
+  TILEGX_INSN_SUBXSC,
+  TILEGX_INSN_TBLIDXB0,
+  TILEGX_INSN_TBLIDXB1,
+  TILEGX_INSN_TBLIDXB2,
+  TILEGX_INSN_TBLIDXB3,
+  TILEGX_INSN_V1ADD,
+  TILEGX_INSN_V1ADDI,
+  TILEGX_INSN_V1ADDUC,
+  TILEGX_INSN_V1ADIFFU,
+  TILEGX_INSN_V1AVGU,
+  TILEGX_INSN_V1CMPEQ,
+  TILEGX_INSN_V1CMPEQI,
+  TILEGX_INSN_V1CMPLES,
+  TILEGX_INSN_V1CMPLEU,
+  TILEGX_INSN_V1CMPLTS,
+  TILEGX_INSN_V1CMPLTSI,
+  TILEGX_INSN_V1CMPLTU,
+  TILEGX_INSN_V1CMPLTUI,
+  TILEGX_INSN_V1CMPNE,
+  TILEGX_INSN_V1DDOTPU,
+  TILEGX_INSN_V1DDOTPUA,
+  TILEGX_INSN_V1DDOTPUS,
+  TILEGX_INSN_V1DDOTPUSA,
+  TILEGX_INSN_V1DOTP,
+  TILEGX_INSN_V1DOTPA,
+  TILEGX_INSN_V1DOTPU,
+  TILEGX_INSN_V1DOTPUA,
+  TILEGX_INSN_V1DOTPUS,
+  TILEGX_INSN_V1DOTPUSA,
+  TILEGX_INSN_V1INT_H,
+  TILEGX_INSN_V1INT_L,
+  TILEGX_INSN_V1MAXU,
+  TILEGX_INSN_V1MAXUI,
+  TILEGX_INSN_V1MINU,
+  TILEGX_INSN_V1MINUI,
+  TILEGX_INSN_V1MNZ,
+  TILEGX_INSN_V1MULTU,
+  TILEGX_INSN_V1MULU,
+  TILEGX_INSN_V1MULUS,
+  TILEGX_INSN_V1MZ,
+  TILEGX_INSN_V1SADAU,
+  TILEGX_INSN_V1SADU,
+  TILEGX_INSN_V1SHL,
+  TILEGX_INSN_V1SHLI,
+  TILEGX_INSN_V1SHRS,
+  TILEGX_INSN_V1SHRSI,
+  TILEGX_INSN_V1SHRU,
+  TILEGX_INSN_V1SHRUI,
+  TILEGX_INSN_V1SUB,
+  TILEGX_INSN_V1SUBUC,
+  TILEGX_INSN_V2ADD,
+  TILEGX_INSN_V2ADDI,
+  TILEGX_INSN_V2ADDSC,
+  TILEGX_INSN_V2ADIFFS,
+  TILEGX_INSN_V2AVGS,
+  TILEGX_INSN_V2CMPEQ,
+  TILEGX_INSN_V2CMPEQI,
+  TILEGX_INSN_V2CMPLES,
+  TILEGX_INSN_V2CMPLEU,
+  TILEGX_INSN_V2CMPLTS,
+  TILEGX_INSN_V2CMPLTSI,
+  TILEGX_INSN_V2CMPLTU,
+  TILEGX_INSN_V2CMPLTUI,
+  TILEGX_INSN_V2CMPNE,
+  TILEGX_INSN_V2DOTP,
+  TILEGX_INSN_V2DOTPA,
+  TILEGX_INSN_V2INT_H,
+  TILEGX_INSN_V2INT_L,
+  TILEGX_INSN_V2MAXS,
+  TILEGX_INSN_V2MAXSI,
+  TILEGX_INSN_V2MINS,
+  TILEGX_INSN_V2MINSI,
+  TILEGX_INSN_V2MNZ,
+  TILEGX_INSN_V2MULFSC,
+  TILEGX_INSN_V2MULS,
+  TILEGX_INSN_V2MULTS,
+  TILEGX_INSN_V2MZ,
+  TILEGX_INSN_V2PACKH,
+  TILEGX_INSN_V2PACKL,
+  TILEGX_INSN_V2PACKUC,
+  TILEGX_INSN_V2SADAS,
+  TILEGX_INSN_V2SADAU,
+  TILEGX_INSN_V2SADS,
+  TILEGX_INSN_V2SADU,
+  TILEGX_INSN_V2SHL,
+  TILEGX_INSN_V2SHLI,
+  TILEGX_INSN_V2SHLSC,
+  TILEGX_INSN_V2SHRS,
+  TILEGX_INSN_V2SHRSI,
+  TILEGX_INSN_V2SHRU,
+  TILEGX_INSN_V2SHRUI,
+  TILEGX_INSN_V2SUB,
+  TILEGX_INSN_V2SUBSC,
+  TILEGX_INSN_V4ADD,
+  TILEGX_INSN_V4ADDSC,
+  TILEGX_INSN_V4INT_H,
+  TILEGX_INSN_V4INT_L,
+  TILEGX_INSN_V4PACKSC,
+  TILEGX_INSN_V4SHL,
+  TILEGX_INSN_V4SHLSC,
+  TILEGX_INSN_V4SHRS,
+  TILEGX_INSN_V4SHRU,
+  TILEGX_INSN_V4SUB,
+  TILEGX_INSN_V4SUBSC,
+  TILEGX_INSN_WH64,
+  TILEGX_INSN_XOR,
+  TILEGX_NETWORK_BARRIER,
+  TILEGX_IDN0_RECEIVE,
+  TILEGX_IDN1_RECEIVE,
+  TILEGX_IDN_SEND,
+  TILEGX_UDN0_RECEIVE,
+  TILEGX_UDN1_RECEIVE,
+  TILEGX_UDN2_RECEIVE,
+  TILEGX_UDN3_RECEIVE,
+  TILEGX_UDN_SEND,
+  TILEGX_BUILTIN_max
+};
+
+#endif /* !GCC_TILEGX_BUILTINS_H */
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-c.c b/gcc-4.9/gcc/config/tilegx/tilegx-c.c
new file mode 100644
index 000000000..e71965cfa
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-c.c
@@ -0,0 +1,57 @@
+/* Definitions of C specific functions for TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+
+/* copy defines in c-cppbuiltin.c */
+# define builtin_define(TXT) cpp_define (pfile, TXT)
+# define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+
+/* Implement TARGET_CPU_CPP_BUILTINS.  */
+void
+tilegx_cpu_cpp_builtins (struct cpp_reader *pfile)
+{
+  builtin_define ("__tile__");
+  builtin_define ("__tilegx__");
+  builtin_define ("__tile_chip__=10");
+  builtin_define ("__tile_chip_rev__=0");
+  builtin_assert ("cpu=tilegx");
+  builtin_assert ("machine=tilegx");
+
+  if (TARGET_32BIT)
+    builtin_define ("__tilegx32__");
+
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+
+  TILEGX_CPU_CPP_ENDIAN_BUILTINS ();
+  GNU_USER_TARGET_OS_CPP_BUILTINS ();
+}
+
+
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-generic.md b/gcc-4.9/gcc/config/tilegx/tilegx-generic.md
new file mode 100644
index 000000000..fbd16f93f
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-generic.md
@@ -0,0 +1,115 @@
+;; Scheduling description for Tilera TILE-Gx chip.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tile")
+
+; Make the scheduling automaton an ndfa.
+(automata_option "ndfa")
+
+; Name the three pipes.
+(define_cpu_unit "X0" "tile")
+(define_cpu_unit "X1" "tile")
+(define_cpu_unit "Y0" "tile")
+(define_cpu_unit "Y1" "tile")
+(define_cpu_unit "Y2" "tile")
+
+(define_insn_reservation "X0" 1
+  (eq_attr "type" "X0")
+  "X0")
+
+(define_insn_reservation "X0_2cycle" 2
+  (eq_attr "type" "X0_2cycle")
+  "X0,nothing")
+
+(define_insn_reservation "X1" 1
+  (eq_attr "type" "X1,X1_branch")
+  "X1")
+
+(define_insn_reservation "X1_2cycle" 2
+  (eq_attr "type" "X1_2cycle")
+  "X1,nothing")
+
+(define_insn_reservation "X1_L2" 11
+  (eq_attr "type" "X1_L2")
+  "X1")
+
+(define_insn_reservation "X1_remote" 50
+  (eq_attr "type" "X1_remote")
+  "X1")
+
+(define_insn_reservation "X1_miss" 80
+  (eq_attr "type" "X1_miss")
+  "X1")
+
+(define_insn_reservation "X01" 1
+  (eq_attr "type" "X01")
+  "X0|X1")
+
+(define_insn_reservation "Y0" 1
+  (eq_attr "type" "Y0")
+  "Y0|X0")
+
+(define_insn_reservation "Y0_2cycle" 2
+  (eq_attr "type" "Y0_2cycle")
+  "Y0|X0,nothing")
+
+(define_insn_reservation "Y1" 1
+  (eq_attr "type" "Y1")
+  "Y1|X1")
+
+(define_insn_reservation "Y2" 1
+  (eq_attr "type" "Y2")
+  "Y2|X1")
+
+(define_insn_reservation "Y2_2cycle" 2
+  (eq_attr "type" "Y2_2cycle")
+  "Y2|X1,nothing")
+
+(define_insn_reservation "Y2_L2" 11
+  (eq_attr "type" "Y2_L2")
+  "Y2|X1")
+
+(define_insn_reservation "Y2_miss" 80
+  (eq_attr "type" "Y2_miss")
+  "Y2|X1")
+
+(define_insn_reservation "Y01" 1
+  (eq_attr "type" "Y01")
+  "Y0|Y1|X0|X1")
+
+(define_insn_reservation "nothing" 0
+  (eq_attr "type" "nothing")
+  "nothing")
+
+(define_insn_reservation "cannot_bundle" 1
+  (eq_attr "type" "cannot_bundle")
+  "X0+X1")
+
+(define_insn_reservation "cannot_bundle_3cycle" 3
+  (eq_attr "type" "cannot_bundle_3cycle")
+  "X0+X1")
+
+(define_insn_reservation "cannot_bundle_4cycle" 4
+  (eq_attr "type" "cannot_bundle_4cycle")
+  "X0+X1")
+
+
+; A bundle must be in either X format or Y format.
+(exclusion_set "X0,X1" "Y0,Y1,Y2")
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-modes.def b/gcc-4.9/gcc/config/tilegx/tilegx-modes.def
new file mode 100644
index 000000000..deaad2adb
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-modes.def
@@ -0,0 +1,37 @@
+/* TILE-Gx extra machine modes. 
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Extra modes for handling struct returns in up to 10 registers. */
+INT_MODE (R3I, 24);
+INT_MODE (R5I, 40);
+INT_MODE (R6I, 48);
+INT_MODE (R7I, 56);
+INT_MODE (R8I, 64);
+INT_MODE (R9I, 72);
+INT_MODE (R10I, 80);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);     /* V8QI V4HI V2SI */
+VECTOR_MODE (INT, QI, 16); /* V16QI */
+VECTOR_MODE (INT, HI, 8);  /* V8HI */
+VECTOR_MODE (INT, SI, 4);  /* V4SI */
+VECTOR_MODE (INT, HI, 2);  /* V2HI */
+
+VECTOR_MODE (INT, QI, 4);  /* V4QI */
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-multiply.h b/gcc-4.9/gcc/config/tilegx/tilegx-multiply.h
new file mode 100644
index 000000000..b59d6b36e
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-multiply.h
@@ -0,0 +1,78 @@
+/* Header for constant multiple table for TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TILEGX_MULTIPLY_H
+#define GCC_TILEGX_MULTIPLY_H
+
+/* A node of a tilegx_multiply_insn_seq, corresponding to a single
+   machine instruction such as 'add', 's1a', or an shl by a constant.  */
+struct tilegx_multiply_insn_seq_entry
+{
+  /* Which operation this node performs (e.g. an add or sub).
+     Don't use this directly, call get_opcode() table to get a insn_code.  */
+  unsigned char compressed_opcode;
+
+  /* The left-hand side of this expression tree.
+     If equal to 0, it refers to 'zero'.
+     If equal to 1, it refers to the original input to the multiply operation.
+     Otherwise, subtract two and it is an index into the containing
+     tilegx_multiply_insn_seq's 'op' array. Since it can only point to some
+     value that has already been computed it will always point to an
+     earlier entry in the array.  */
+  unsigned char lhs;
+
+  /* This is like lhs, but for the right-hand side. However, for shift
+     opcodes this is a shift count rather than an operand index.  */
+  unsigned char rhs;
+};
+
+/* Maximum size of op array.  */
+#define tilegx_multiply_insn_seq_MAX_OPERATIONS 4
+
+/* This defines a DAG describing how to multiply by a constant in
+   terms of one or more machine instructions.  */
+struct tilegx_multiply_insn_seq
+{
+  /* The constant factor by which this expression tree multiplies its input.  */
+  long long multiplier;
+
+  /* The nodes of the parse tree. These are ordered so that instructions
+     can be emitted in the same order that they appear in this array.
+     Entry entry in this array can only refer to earlier entries in
+     the array.  */
+  struct tilegx_multiply_insn_seq_entry
+    op[tilegx_multiply_insn_seq_MAX_OPERATIONS];
+
+};
+
+/* A mapping from the compressed opcode to the corresponding enum
+   insn_code.  */
+extern const enum insn_code tilegx_multiply_insn_seq_decode_opcode[];
+
+/* Table mapping constant int multipliers to an expression
+   tree that efficiently performs that multiplication.
+   This is sorted by its 'multiplier' field so a binary search
+   can look for matches.  */
+extern const struct tilegx_multiply_insn_seq tilegx_multiply_insn_seq_table[];
+
+/* The number of elements in multiply_insn_seq_table.  */
+extern const int tilegx_multiply_insn_seq_table_size;
+
+#endif /* !GCC_TILEGX_MULTIPLY_H */
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-opts.h b/gcc-4.9/gcc/config/tilegx/tilegx-opts.h
new file mode 100644
index 000000000..3ff010477
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-opts.h
@@ -0,0 +1,33 @@
+/* Definitions for option handling for TILE-Gx.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef TILEGX_OPTS_H
+#define TILEGX_OPTS_H
+
+enum cmodel {
+  CM_SMALL,	/* Makes various assumpation about sizes of code and
+		   data fits.  */
+  CM_LARGE,	/* No assumptions.  */
+  CM_SMALL_PIC,	/* Makes various assumpation about sizes of code and
+		   data fits.  */
+  CM_LARGE_PIC	/* No assumptions.  */
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx-protos.h b/gcc-4.9/gcc/config/tilegx/tilegx-protos.h
new file mode 100644
index 000000000..0a9b461fa
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx-protos.h
@@ -0,0 +1,75 @@
+/* Prototypes of target machine for TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TILEGX_PROTOS_H
+#define GCC_TILEGX_PROTOS_H
+
+extern void tilegx_init_expanders (void);
+extern void tilegx_compute_pcrel_address (rtx, rtx);
+extern void tilegx_compute_pcrel_plt_address (rtx, rtx);
+extern bool tilegx_legitimate_pic_operand_p (rtx);
+extern rtx tilegx_simd_int (rtx, enum machine_mode);
+
+#ifdef RTX_CODE
+extern bool tilegx_bitfield_operand_p (HOST_WIDE_INT, int *, int *);
+extern void tilegx_expand_set_const64 (rtx, rtx);
+extern bool tilegx_expand_mov (enum machine_mode, rtx *);
+extern void tilegx_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+					  HOST_WIDE_INT, bool);
+extern void tilegx_expand_movmisalign (enum machine_mode, rtx *);
+extern void tilegx_allocate_stack (rtx, rtx);
+extern bool tilegx_expand_muldi (rtx, rtx, rtx);
+extern void tilegx_expand_smuldi3_highpart (rtx, rtx, rtx);
+extern void tilegx_expand_umuldi3_highpart (rtx, rtx, rtx);
+
+extern bool tilegx_emit_setcc (rtx[], enum machine_mode);
+extern void tilegx_emit_conditional_branch (rtx[], enum machine_mode);
+extern rtx tilegx_emit_conditional_move (rtx);
+extern const char *tilegx_output_cbranch_with_opcode (rtx, rtx *,
+						      const char *,
+						      const char *, int);
+extern const char *tilegx_output_cbranch (rtx, rtx *, bool);
+extern void tilegx_expand_tablejump (rtx, rtx);
+extern void tilegx_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx),
+						enum machine_mode, rtx,
+						enum machine_mode, rtx, rtx,
+						bool);
+extern void tilegx_pre_atomic_barrier (enum memmodel);
+extern void tilegx_post_atomic_barrier (enum memmodel);
+#endif /* RTX_CODE */
+
+extern bool tilegx_can_use_return_insn_p (void);
+extern void tilegx_expand_prologue (void);
+extern void tilegx_expand_epilogue (bool);
+extern int tilegx_initial_elimination_offset (int, int);
+extern rtx tilegx_return_addr (int, rtx);
+extern rtx tilegx_eh_return_handler_rtx (void);
+extern int tilegx_adjust_insn_length (rtx, int);
+
+extern int tilegx_asm_preferred_eh_data_format (int, int);
+extern void tilegx_final_prescan_insn (rtx);
+extern const char *tilegx_asm_output_opcode (FILE *, const char *);
+extern void tilegx_function_profiler (FILE *, int);
+
+/* Declare functions in tilegx-c.c */
+
+extern void tilegx_cpu_cpp_builtins (struct cpp_reader *);
+
+#endif /* GCC_TILEGX_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx.c b/gcc-4.9/gcc/config/tilegx/tilegx.c
new file mode 100644
index 000000000..c3b712615
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx.c
@@ -0,0 +1,5707 @@
+/* Subroutines used for code generation on the Tilera TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "insn-config.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "expr.h"
+#include "langhooks.h"
+#include "optabs.h"
+#include "sched-int.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "function.h"
+#include "dwarf2.h"
+#include "timevar.h"
+#include "tree.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "gimplify.h"
+#include "cfgloop.h"
+#include "tilegx-builtins.h"
+#include "tilegx-multiply.h"
+#include "diagnostic.h"
+
+/* SYMBOL_REF for GOT */
+static GTY(()) rtx g_got_symbol = NULL;
+
+/* In case of a POST_INC or POST_DEC memory reference, we must report
+   the mode of the memory reference from TARGET_PRINT_OPERAND to
+   TARGET_PRINT_OPERAND_ADDRESS.  */
+static enum machine_mode output_memory_reference_mode;
+
+/* Report whether we're printing out the first address fragment of a
+   POST_INC or POST_DEC memory reference, from TARGET_PRINT_OPERAND to
+   TARGET_PRINT_OPERAND_ADDRESS.  */
+static bool output_memory_autoinc_first;
+
+
+
+/* Option handling  */
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+static void
+tilegx_option_override (void)
+{
+  if (global_options_set.x_tilegx_cmodel)
+    {
+      switch (tilegx_cmodel)
+	{
+	case CM_SMALL:
+	case CM_SMALL_PIC:
+	  if (flag_pic)
+	    tilegx_cmodel = CM_SMALL_PIC;
+	  break;
+
+	case CM_LARGE:
+	case CM_LARGE_PIC:
+	  if (flag_pic)
+	    tilegx_cmodel = CM_LARGE_PIC;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    tilegx_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+
+  /* When modulo scheduling is enabled, we still rely on regular
+     scheduler for bundling.  */
+  if (flag_modulo_sched)
+    flag_resched_modulo_sched = 1;
+}
+
+
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
+static bool
+tilegx_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode:
+      return true;
+
+    case SFmode:
+    case DFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+
+/* Implement TARGET_VECTOR_MODE_SUPPORTED_P.  */
+static bool
+tilegx_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
+}
+
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+static bool
+tilegx_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED,
+			       rtx x ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+static bool
+tilegx_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return (tilegx_cmodel != CM_LARGE && tilegx_cmodel != CM_LARGE_PIC
+	  && (decl != NULL));
+}
+
+
+/* Implement TARGET_PASS_BY_REFERENCE.  Variable sized types are
+   passed by reference.  */
+static bool
+tilegx_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			  enum machine_mode mode ATTRIBUTE_UNUSED,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return (type && TYPE_SIZE (type)
+	  && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST);
+}
+
+
+/* Implement TARGET_RETURN_IN_MSB.  We return a value in the most
+   significant part of a register if:
+   - the target is big-endian; and
+   - the value has an aggregate type (e.g., structure or union).  */
+static bool
+tilegx_return_in_msb (const_tree valtype)
+{
+  return (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype));
+}
+
+
+/* Implement TARGET_RETURN_IN_MEMORY.  */
+static bool
+tilegx_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  return !IN_RANGE (int_size_in_bytes (type),
+		    0, TILEGX_NUM_RETURN_REGS * UNITS_PER_WORD);
+}
+
+
+/* Implement TARGET_MODE_REP_EXTENDED.  */
+static int
+tilegx_mode_rep_extended (enum machine_mode mode, enum machine_mode mode_rep)
+{
+  /* SImode register values are sign-extended to DImode.  */
+  if (mode == SImode && mode_rep == DImode)
+    return SIGN_EXTEND;
+
+  return UNKNOWN;
+}
+
+
+/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
+static unsigned int
+tilegx_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+
+/* Implement TARGET_FUNCTION_ARG.  */
+static rtx
+tilegx_function_arg (cumulative_args_t cum_v,
+		     enum machine_mode mode,
+		     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
+  int byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+  bool doubleword_aligned_p;
+
+  if (cum >= TILEGX_NUM_ARG_REGS)
+    return NULL_RTX;
+
+  /* See whether the argument has doubleword alignment.  */
+  doubleword_aligned_p =
+    tilegx_function_arg_boundary (mode, type) > BITS_PER_WORD;
+
+  if (doubleword_aligned_p)
+    cum += cum & 1;
+
+  /* The ABI does not allow parameters to be passed partially in reg
+     and partially in stack.  */
+  if ((cum + (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+      > TILEGX_NUM_ARG_REGS)
+    return NULL_RTX;
+
+  return gen_rtx_REG (mode, cum);
+}
+
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
+static void
+tilegx_function_arg_advance (cumulative_args_t cum_v,
+			     enum machine_mode mode,
+			     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  int byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+  int word_size = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  bool doubleword_aligned_p;
+
+  /* See whether the argument has doubleword alignment.  */
+  doubleword_aligned_p =
+    tilegx_function_arg_boundary (mode, type) > BITS_PER_WORD;
+
+  if (doubleword_aligned_p)
+    *cum += *cum & 1;
+
+  /* If the current argument does not fit in the pretend_args space,
+     skip over it.  */
+  if (*cum < TILEGX_NUM_ARG_REGS
+      && *cum + word_size > TILEGX_NUM_ARG_REGS)
+    *cum = TILEGX_NUM_ARG_REGS;
+
+  *cum += word_size;
+}
+
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+static rtx
+tilegx_function_value (const_tree valtype, const_tree fn_decl_or_type,
+		       bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  int unsigned_p;
+
+  mode = TYPE_MODE (valtype);
+  unsigned_p = TYPE_UNSIGNED (valtype);
+
+  mode = promote_function_mode (valtype, mode, &unsigned_p,
+				fn_decl_or_type, 1);
+
+  return gen_rtx_REG (mode, 0);
+}
+
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+static rtx
+tilegx_libcall_value (enum machine_mode mode,
+		       const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, 0);
+}
+
+
+/* Implement FUNCTION_VALUE_REGNO_P.  */
+static bool
+tilegx_function_value_regno_p (const unsigned int regno)
+{
+  return regno < TILEGX_NUM_RETURN_REGS;
+}
+
+
+/* Implement TARGET_BUILD_BUILTIN_VA_LIST.  */
+static tree
+tilegx_build_builtin_va_list (void)
+{
+  tree f_args, f_skip, record, type_decl;
+  bool owp;
+
+  record = lang_hooks.types.make_type (RECORD_TYPE);
+
+  type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+			  get_identifier ("__va_list_tag"), record);
+
+  f_args = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		       get_identifier ("__args"), ptr_type_node);
+  f_skip = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		       get_identifier ("__skip"), ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_args) = record;
+
+  DECL_FIELD_CONTEXT (f_skip) = record;
+
+  TREE_CHAIN (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_args;
+  TREE_CHAIN (f_args) = f_skip;
+
+  /* We know this is being padded and we want it too.  It is an
+     internal type so hide the warnings from the user.  */
+  owp = warn_padded;
+  warn_padded = false;
+
+  layout_type (record);
+
+  warn_padded = owp;
+
+  /* The correct type is an array type of one element.  */
+  return record;
+}
+
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+static void
+tilegx_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_args, f_skip;
+  tree args, skip, t;
+
+  f_args = TYPE_FIELDS (TREE_TYPE (valist));
+  f_skip = TREE_CHAIN (f_args);
+
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  /* Find the __args area.  */
+  t = make_tree (TREE_TYPE (args), virtual_incoming_args_rtx);
+  t = fold_build_pointer_plus_hwi (t,
+				   UNITS_PER_WORD *
+				   (crtl->args.info - TILEGX_NUM_ARG_REGS));
+
+  if (crtl->args.pretend_args_size > 0)
+    t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
+
+  t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Find the __skip area.  */
+  t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
+  t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+static void
+tilegx_setup_incoming_varargs (cumulative_args_t cum,
+			       enum machine_mode mode,
+			       tree type, int *pretend_args, int no_rtl)
+{
+  CUMULATIVE_ARGS local_cum = *get_cumulative_args (cum);
+  int first_reg;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argument.  Advance a local copy of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  targetm.calls.function_arg_advance (pack_cumulative_args (&local_cum),
+				      mode, type, true);
+  first_reg = local_cum;
+
+  if (local_cum < TILEGX_NUM_ARG_REGS)
+    {
+      *pretend_args = UNITS_PER_WORD * (TILEGX_NUM_ARG_REGS - first_reg);
+
+      if (!no_rtl)
+	{
+	  alias_set_type set = get_varargs_alias_set ();
+	  rtx tmp =
+	    gen_rtx_MEM (BLKmode, plus_constant (Pmode,
+						 virtual_incoming_args_rtx,
+						 -STACK_POINTER_OFFSET -
+						 UNITS_PER_WORD *
+						 (TILEGX_NUM_ARG_REGS -
+						  first_reg)));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (first_reg, tmp,
+			       TILEGX_NUM_ARG_REGS - first_reg);
+	}
+    }
+  else
+    *pretend_args = 0;
+}
+
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  Gimplify va_arg by updating
+   the va_list structure VALIST as required to retrieve an argument of
+   type TYPE, and returning that argument.
+   
+   ret = va_arg(VALIST, TYPE);
+
+   generates code equivalent to:
+  
+    paddedsize = (sizeof(TYPE) + 7) & -8;
+    if (  (VALIST.__args + paddedsize > VALIST.__skip)
+	& (VALIST.__args <= VALIST.__skip))
+      addr = VALIST.__skip + STACK_POINTER_OFFSET;
+    else
+      addr = VALIST.__args;
+    VALIST.__args = addr + paddedsize;
+    if (BYTES_BIG_ENDIAN)
+      ret = *(TYPE *)(addr + paddedsize - sizeof(TYPE));
+    else
+      ret = *(TYPE *)addr;
+ */
+static tree
+tilegx_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			     gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_args, f_skip;
+  tree args, skip;
+  HOST_WIDE_INT size, rsize;
+  tree addr, tmp;
+  bool pass_by_reference_p;
+
+  f_args = TYPE_FIELDS (va_list_type_node);
+  f_skip = TREE_CHAIN (f_args);
+
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  addr = create_tmp_var (ptr_type_node, "va_arg");
+
+  /* If an object is dynamically sized, a pointer to it is passed
+     instead of the object itself.  */
+  pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
+					   false);
+
+  if (pass_by_reference_p)
+    type = build_pointer_type (type);
+
+  size = int_size_in_bytes (type);
+  rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
+
+  /* If the alignment of the type is greater than the default for a
+     parameter, align to the STACK_BOUNDARY. */
+  if (TYPE_ALIGN (type) > PARM_BOUNDARY)
+    {
+      /* Assert the only case we generate code for: when
+	 stack boundary = 2 * parm boundary. */
+      gcc_assert (STACK_BOUNDARY == PARM_BOUNDARY * 2);
+
+      tmp = build2 (BIT_AND_EXPR, sizetype,
+		    fold_convert (sizetype, unshare_expr (args)),
+		    size_int (PARM_BOUNDARY / 8));
+      tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+		    unshare_expr (args), tmp);
+
+      gimplify_assign (unshare_expr (args), tmp, pre_p);
+    }
+ 
+  /* Build conditional expression to calculate addr. The expression
+     will be gimplified later.  */
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
+  tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
+		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
+		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
+			unshare_expr (skip)));
+
+  tmp = build3 (COND_EXPR, ptr_type_node, tmp,
+		build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
+			size_int (STACK_POINTER_OFFSET)),
+		unshare_expr (args));
+
+  /* Adjust the address of va_arg if it is in big endian mode.  */
+  if (BYTES_BIG_ENDIAN && rsize > size)
+    tmp = fold_build_pointer_plus_hwi (tmp, rsize - size);
+  gimplify_assign (addr, tmp, pre_p);
+
+  /* Update VALIST.__args.  */
+  
+  if (BYTES_BIG_ENDIAN && rsize > size)
+    tmp = fold_build_pointer_plus_hwi (addr, size);
+  else
+    tmp = fold_build_pointer_plus_hwi (addr, rsize);
+  gimplify_assign (unshare_expr (args), tmp, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+
+  if (pass_by_reference_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+
+
+/* Implement TARGET_RTX_COSTS.  */
+static bool
+tilegx_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
+		  bool speed)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      /* If this is an 8-bit constant, return zero since it can be
+	 used nearly anywhere with no cost.  If it is a valid operand
+	 for an ADD or AND, likewise return 0 if we know it will be
+	 used in that context.  Otherwise, return 2 since it might be
+	 used there later.  All other constants take at least two
+	 insns.  */
+      if (satisfies_constraint_I (x))
+	{
+	  *total = 0;
+	  return true;
+	}
+      else if (outer_code == PLUS && add_operand (x, VOIDmode))
+	{
+	  /* Slightly penalize large constants even though we can add
+	     them in one instruction, because it forces the use of
+	     2-wide bundling mode.  */
+	  *total = 1;
+	  return true;
+	}
+      else if (move_operand (x, SImode))
+	{
+	  /* We can materialize in one move.  */
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+	{
+	  /* We can materialize in two moves.  */
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+
+      return false;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case HIGH:
+      *total = 0;
+      return true;
+
+    case MEM:
+      /* If outer-code was a sign or zero extension, a cost of
+	 COSTS_N_INSNS (1) was already added in, so account for
+	 that.  */
+      if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+      /* Convey that shl[123]add are efficient.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && cint_248_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
+			      (enum rtx_code) outer_code, opno, speed)
+		    + rtx_cost (XEXP (x, 1),
+				(enum rtx_code) outer_code, opno, speed)
+		    + COSTS_N_INSNS (1));
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      *total = COSTS_N_INSNS (2);
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      /* These are handled by software and are very expensive.  */
+      *total = COSTS_N_INSNS (100);
+      return false;
+
+    case UNSPEC:
+    case UNSPEC_VOLATILE:
+      {
+	int num = XINT (x, 1);
+
+	if (num <= TILEGX_LAST_LATENCY_1_INSN)
+	  *total = COSTS_N_INSNS (1);
+	else if (num <= TILEGX_LAST_LATENCY_2_INSN)
+	  *total = COSTS_N_INSNS (2);
+	else if (num > TILEGX_LAST_LATENCY_INSN)
+	  {
+	    if (num == UNSPEC_NON_TEMPORAL)
+	      {
+		/* These are basically loads.  */
+		if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
+		  *total = COSTS_N_INSNS (1);
+		else
+		  *total = COSTS_N_INSNS (2);
+	      }
+	    else
+	      {
+		if (outer_code == PLUS)
+		  *total = 0;
+		else
+		  *total = COSTS_N_INSNS (1);
+	      }
+	  }
+	else
+	  {
+	    switch (num)
+	      {
+	      case UNSPEC_BLOCKAGE:
+	      case UNSPEC_NETWORK_BARRIER:
+	      case UNSPEC_ATOMIC:
+		*total = 0;
+		break;
+
+	      case UNSPEC_LNK_AND_LABEL:
+	      case UNSPEC_MF:
+	      case UNSPEC_MOV_PCREL_STEP3:
+	      case UNSPEC_NETWORK_RECEIVE:
+	      case UNSPEC_NETWORK_SEND:
+	      case UNSPEC_SPR_MOVE:
+	      case UNSPEC_TLS_GD_ADD:
+		*total = COSTS_N_INSNS (1);
+		break;
+
+	      case UNSPEC_TLS_IE_LOAD:
+	      case UNSPEC_XCHG:
+		*total = COSTS_N_INSNS (2);
+		break;
+
+	      case UNSPEC_SP_SET:
+		*total = COSTS_N_INSNS (3);
+		break;
+
+	      case UNSPEC_SP_TEST:
+		*total = COSTS_N_INSNS (4);
+		break;
+
+	      case UNSPEC_CMPXCHG:
+	      case UNSPEC_INSN_CMPEXCH:
+	      case UNSPEC_LATENCY_L2:
+		*total = COSTS_N_INSNS (11);
+		break;
+
+	      case UNSPEC_TLS_GD_CALL:
+		*total = COSTS_N_INSNS (30);
+		break;
+
+	      case UNSPEC_LATENCY_MISS:
+		*total = COSTS_N_INSNS (80);
+		break;
+
+	      default:
+		*total = COSTS_N_INSNS (1);
+	      }
+	  }
+	return true;
+      }
+
+    default:
+      return false;
+    }
+}
+
+
+
+/* Rtl lowering.  */
+
+/* Create a temporary variable to hold a partial result, to enable
+   CSE.  */
+static rtx
+create_temp_reg_if_possible (enum machine_mode mode, rtx default_reg)
+{
+  return can_create_pseudo_p () ? gen_reg_rtx (mode) : default_reg;
+}
+
+
+/* Functions to save and restore machine-specific function data.  */
+static struct machine_function *
+tilegx_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Do anything needed before RTL is emitted for each function.  */
+void
+tilegx_init_expanders (void)
+{
+  /* Arrange to initialize and mark the machine per-function
+     status.  */
+  init_machine_status = tilegx_init_machine_status;
+
+  if (cfun && cfun->machine && flag_pic)
+    {
+      static int label_num = 0;
+
+      char text_label_name[32];
+
+      struct machine_function *machine = cfun->machine;
+
+      ASM_GENERATE_INTERNAL_LABEL (text_label_name, "L_PICLNK", label_num++);
+
+      machine->text_label_symbol =
+	gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (text_label_name));
+
+      machine->text_label_rtx =
+	gen_rtx_REG (Pmode, TILEGX_PIC_TEXT_LABEL_REGNUM);
+
+      machine->got_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+
+      machine->calls_tls_get_addr = false;
+    }
+}
+
+
+/* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
+static void
+tilegx_expand_to_rtl_hook (void)
+{
+  /* Exclude earlier sets of crtl->uses_pic_offset_table, because we
+     only care about uses actually emitted.  */
+  crtl->uses_pic_offset_table = 0;
+}
+
+
+/* Implement TARGET_SHIFT_TRUNCATION_MASK.  DImode shifts use the mode
+   matching insns and therefore guarantee that the shift count is
+   modulo 64.  SImode shifts sometimes use the 64 bit version so do
+   not hold such guarantee.  */
+static unsigned HOST_WIDE_INT
+tilegx_shift_truncation_mask (enum machine_mode mode)
+{
+  return mode == DImode ? 63 : 0;
+}
+
+
+/* Implement TARGET_INIT_LIBFUNCS.  */
+static void
+tilegx_init_libfuncs (void)
+{
+  /* We need to explicitly generate these libfunc's to support
+     conversion of divide by constant to multiply (the divide stubs in
+     tilegx.md exist also for this reason).  Normally we'd expect gcc
+     to lazily generate them when they are needed, but for some reason
+     it's set up to only generate them if the mode is the word
+     mode.  */
+  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
+  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
+  set_optab_libfunc (smod_optab, SImode, "__modsi3");
+  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
+}
+
+
+/* Return true if X contains a thread-local symbol.  */
+static bool
+tilegx_tls_referenced_p (rtx x)
+{
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = XEXP (XEXP (x, 0), 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
+    return true;
+
+  /* That's all we handle in tilegx_legitimize_tls_address for
+     now.  */
+  return false;
+}
+
+
+/* Return true if X requires a scratch register.  It is given that
+   flag_pic is on and that X satisfies CONSTANT_P.  */
+static int
+tilegx_pic_address_needs_scratch (rtx x)
+{
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
+      && (CONST_INT_P (XEXP (XEXP (x, 0), 1))))
+    return true;
+
+  return false;
+}
+
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for
+   which we are willing to load the value into a register via a move
+   pattern.  TLS cannot be treated as a constant because it can
+   include a function call.  */
+static bool
+tilegx_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case SYMBOL_REF:
+      return !tilegx_tls_referenced_p (x);
+
+    default:
+      return true;
+    }
+}
+
+
+/* Return true if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and that
+   X satisfies CONSTANT_P.  */
+bool
+tilegx_legitimate_pic_operand_p (rtx x)
+{
+  if (tilegx_pic_address_needs_scratch (x))
+    return false;
+
+  if (tilegx_tls_referenced_p (x))
+    return false;
+
+  return true;
+}
+
+
+/* Return true if the rtx X can be used as an address operand.  */
+static bool
+tilegx_legitimate_address_p (enum machine_mode ARG_UNUSED (mode), rtx x,
+			     bool strict)
+{
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  switch (GET_CODE (x))
+    {
+    case POST_INC:
+    case POST_DEC:
+      if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+	return false;
+
+      x = XEXP (x, 0);
+      break;
+
+    case POST_MODIFY:
+      if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+	return false;
+
+      if (GET_CODE (XEXP (x, 1)) != PLUS)
+	return false;
+
+      if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
+	return false;
+
+      if (!satisfies_constraint_I (XEXP (XEXP (x, 1), 1)))
+	return false;
+
+      x = XEXP (x, 0);
+      break;
+
+    case REG:
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Check if x is a valid reg.  */
+  if (!REG_P (x))
+    return false;
+
+  if (strict)
+    return REGNO_OK_FOR_BASE_P (REGNO (x));
+  else
+    return true;
+}
+
+
+/* Return the rtx containing SYMBOL_REF to the text label.  */
+static rtx
+tilegx_text_label_symbol (void)
+{
+  return cfun->machine->text_label_symbol;
+}
+
+
+/* Return the register storing the value of the text label.  */
+static rtx
+tilegx_text_label_rtx (void)
+{
+  return cfun->machine->text_label_rtx;
+}
+
+
+/* Return the register storing the value of the global offset
+   table.  */
+static rtx
+tilegx_got_rtx (void)
+{
+  return cfun->machine->got_rtx;
+}
+
+
+/* Return the SYMBOL_REF for _GLOBAL_OFFSET_TABLE_.  */
+static rtx
+tilegx_got_symbol (void)
+{
+  if (g_got_symbol == NULL)
+    g_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+  return g_got_symbol;
+}
+
+
+/* Return a reference to the got to be used by tls references.  */
+static rtx
+tilegx_tls_got (void)
+{
+  rtx temp;
+  if (flag_pic)
+    {
+      crtl->uses_pic_offset_table = 1;
+      return tilegx_got_rtx ();
+    }
+
+  temp = gen_reg_rtx (Pmode);
+  emit_move_insn (temp, tilegx_got_symbol ());
+
+  return temp;
+}
+
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+static rtx
+tilegx_legitimize_tls_address (rtx addr)
+{
+  rtx ret;
+
+  gcc_assert (can_create_pseudo_p ());
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (SYMBOL_REF_TLS_MODEL (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	{
+	  rtx r0, temp, temp2, temp3, got, last;
+
+	  ret = gen_reg_rtx (Pmode);
+	  r0 = gen_rtx_REG (Pmode, 0);
+	  temp = gen_reg_rtx (Pmode);
+	  temp2 = gen_reg_rtx (Pmode);
+	  temp3 = gen_reg_rtx (Pmode);
+
+	  got = tilegx_tls_got ();
+	  if (TARGET_32BIT)
+	    {
+	      emit_insn (gen_mov_tls_gd_step1_32bit (temp, addr));
+	      emit_insn (gen_mov_tls_gd_step2_32bit (temp2, temp, addr));
+	      emit_insn (gen_tls_add_32bit (temp2, got, temp2, addr));
+	    }
+	  else
+	    {
+	      emit_insn (gen_mov_tls_gd_step1 (temp, addr));
+	      emit_insn (gen_mov_tls_gd_step2 (temp2, temp, addr));
+	      emit_insn (gen_tls_add (temp2, got, temp2, addr));
+	    }
+
+	  emit_move_insn (r0, temp2);
+
+	  if (TARGET_32BIT)
+	    {
+	      emit_insn (gen_tls_gd_call_32bit (addr));
+	    }
+	  else
+	    {
+	      emit_insn (gen_tls_gd_call (addr));
+	    }
+
+	  emit_move_insn (temp3, r0);
+
+	  if (TARGET_32BIT)
+	    last = emit_insn (gen_tls_gd_add_32bit (ret, temp3, addr));
+	  else
+	    last = emit_insn (gen_tls_gd_add (ret, temp3, addr));
+
+	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
+	  break;
+	}
+      case TLS_MODEL_INITIAL_EXEC:
+	{
+	  rtx temp, temp2, temp3, got, last;
+
+	  ret = gen_reg_rtx (Pmode);
+	  temp = gen_reg_rtx (Pmode);
+	  temp2 = gen_reg_rtx (Pmode);
+	  temp3 = gen_reg_rtx (Pmode);
+
+	  got = tilegx_tls_got ();
+	  if (TARGET_32BIT)
+	    {
+	      emit_insn (gen_mov_tls_ie_step1_32bit (temp, addr));
+	      emit_insn (gen_mov_tls_ie_step2_32bit (temp2, temp, addr));
+	      emit_insn (gen_tls_add_32bit (temp2, got, temp2, addr));
+	      emit_insn (gen_tls_ie_load_32bit (temp3, temp2, addr));
+	    }
+	  else
+	    {
+	      emit_insn (gen_mov_tls_ie_step1 (temp, addr));
+	      emit_insn (gen_mov_tls_ie_step2 (temp2, temp, addr));
+	      emit_insn (gen_tls_add (temp2, got, temp2, addr));
+	      emit_insn (gen_tls_ie_load (temp3, temp2, addr));
+	    }
+
+	  last =
+	    emit_move_insn(ret,
+			   gen_rtx_PLUS (Pmode,
+					 gen_rtx_REG (Pmode,
+						      THREAD_POINTER_REGNUM),
+					 temp3));
+	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
+	  break;
+	}
+      case TLS_MODEL_LOCAL_EXEC:
+	{
+	  rtx temp, temp2, last;
+
+	  ret = gen_reg_rtx (Pmode);
+	  temp = gen_reg_rtx (Pmode);
+	  temp2 = gen_reg_rtx (Pmode);
+
+	  if (TARGET_32BIT)
+	    {
+	      emit_insn (gen_mov_tls_le_step1_32bit (temp, addr));
+	      emit_insn (gen_mov_tls_le_step2_32bit (temp2, temp, addr));
+	    }
+	  else
+	    {
+	      emit_insn (gen_mov_tls_le_step1 (temp, addr));
+	      emit_insn (gen_mov_tls_le_step2 (temp2, temp, addr));
+	    }
+
+	  last =
+	    emit_move_insn (ret,
+			    gen_rtx_PLUS (Pmode,
+					  gen_rtx_REG (Pmode,
+						       THREAD_POINTER_REGNUM),
+					  temp2));
+	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
+	  break;
+	}
+      default:
+	gcc_unreachable ();
+      }
+  else if (GET_CODE (addr) == CONST)
+    {
+      rtx base, offset;
+
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
+
+      base = tilegx_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
+      offset = XEXP (XEXP (addr, 0), 1);
+
+      base = force_operand (base, NULL_RTX);
+      ret = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
+    }
+  else
+    gcc_unreachable ();
+
+  return ret;
+}
+
+
+/* Returns a register that points to ADDR, a symbolic address, by
+   computing its address relative to tilegx_text_label_symbol.  */
+void
+tilegx_compute_pcrel_address (rtx result, rtx addr)
+{
+  rtx text_label_symbol = tilegx_text_label_symbol ();
+  rtx text_label_rtx = tilegx_text_label_rtx ();
+  rtx temp, temp2, temp3;
+
+  temp = create_temp_reg_if_possible (Pmode, result);
+  temp2 = create_temp_reg_if_possible (Pmode, result);
+
+  if (TARGET_32BIT)
+    {
+      emit_insn (gen_mov_pcrel_step1_32bit (temp, addr, text_label_symbol));
+      emit_insn (gen_mov_pcrel_step2_32bit (temp2, temp, addr,
+					    text_label_symbol));
+      emit_insn (gen_mov_pcrel_step3_32bit (result, temp2,
+					    text_label_rtx,
+					    addr, text_label_symbol));
+    }
+  else if (tilegx_cmodel == CM_LARGE_PIC)
+    {
+      temp3 = create_temp_reg_if_possible (Pmode, result);
+      emit_insn (gen_mov_large_pcrel_step1 (temp, addr, text_label_symbol));
+      emit_insn (gen_mov_large_pcrel_step2 (temp2, temp, addr,
+					    text_label_symbol));
+      emit_insn (gen_mov_large_pcrel_step3 (temp3, temp2, addr,
+					    text_label_symbol));
+      emit_insn (gen_mov_large_pcrel_step4 (result, temp3,
+					    text_label_rtx,
+					    addr, text_label_symbol));
+    }
+  else
+    {
+      emit_insn (gen_mov_pcrel_step1 (temp, addr, text_label_symbol));
+      emit_insn (gen_mov_pcrel_step2 (temp2, temp, addr, text_label_symbol));
+      emit_insn (gen_mov_pcrel_step3 (result, temp2,
+				      text_label_rtx,
+				      addr, text_label_symbol));
+    }
+}
+
+
+/* Returns a register that points to the plt entry of ADDR, a symbolic
+   address, by computing its address relative to
+   tilegx_text_label_symbol.  */
+void
+tilegx_compute_pcrel_plt_address (rtx result, rtx addr)
+{
+  rtx text_label_symbol = tilegx_text_label_symbol ();
+  rtx text_label_rtx = tilegx_text_label_rtx ();
+  rtx temp, temp2, temp3;
+
+  temp = create_temp_reg_if_possible (Pmode, result);
+  temp2 = create_temp_reg_if_possible (Pmode, result);
+
+  if (TARGET_32BIT)
+    {
+      emit_insn (gen_mov_plt_pcrel_step1_32bit (temp, addr,
+						text_label_symbol));
+      emit_insn (gen_mov_plt_pcrel_step2_32bit (temp2, temp, addr,
+						text_label_symbol));
+      emit_move_insn (result, gen_rtx_PLUS (Pmode, temp2, text_label_rtx));
+    }
+  else
+    {
+      temp3 = create_temp_reg_if_possible (Pmode, result);
+
+      emit_insn (gen_mov_plt_pcrel_step1 (temp, addr, text_label_symbol));
+      emit_insn (gen_mov_plt_pcrel_step2 (temp2, temp, addr,
+					  text_label_symbol));
+      emit_insn (gen_mov_plt_pcrel_step3 (temp3, temp2, addr,
+					  text_label_symbol));
+      emit_move_insn (result, gen_rtx_PLUS (Pmode, temp3, text_label_rtx));
+    }
+}
+
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go into a reg.  This is REG if
+   nonzero, otherwise we allocate register(s) as necessary.  */
+static rtx
+tilegx_legitimize_pic_address (rtx orig,
+			       enum machine_mode mode ATTRIBUTE_UNUSED,
+			       rtx reg)
+{
+  if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      rtx address, pic_ref;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      if (SYMBOL_REF_LOCAL_P (orig))
+	{
+	  /* If not during reload, allocate another temp reg here for
+	     loading in the address, so that these instructions can be
+	     optimized properly.  */
+	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
+	  tilegx_compute_pcrel_address (temp_reg, orig);
+
+	  /* Note: this is conservative.  We use the text_label but we
+	     don't use the pic_offset_table.  However, in some cases
+	     we may need the pic_offset_table (see
+	     tilegx_fixup_pcrel_references).  */
+	  crtl->uses_pic_offset_table = 1;
+
+	  address = temp_reg;
+
+	  emit_move_insn (reg, address);
+	  return reg;
+	}
+      else
+	{
+	  /* If not during reload, allocate another temp reg here for
+	     loading in the address, so that these instructions can be
+	     optimized properly.  */
+	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
+
+	  gcc_assert (flag_pic);
+	  if (flag_pic == 1)
+	    {
+	      if (TARGET_32BIT)
+		{
+		  emit_insn (gen_add_got16_32bit (temp_reg,
+						  tilegx_got_rtx (),
+						  orig));
+		}
+	      else
+		{
+		  emit_insn (gen_add_got16 (temp_reg,
+					    tilegx_got_rtx (), orig));
+		}
+	    }
+	  else
+	    {
+	      rtx temp_reg2 = create_temp_reg_if_possible (Pmode, reg);
+	      rtx temp_reg3 = create_temp_reg_if_possible (Pmode, reg);
+	      if (TARGET_32BIT)
+		{
+		  emit_insn (gen_mov_got32_step1_32bit (temp_reg3, orig));
+		  emit_insn (gen_mov_got32_step2_32bit
+			     (temp_reg2, temp_reg3, orig));
+		}
+	      else
+		{
+		  emit_insn (gen_mov_got32_step1 (temp_reg3, orig));
+		  emit_insn (gen_mov_got32_step2 (temp_reg2, temp_reg3,
+						  orig));
+		}
+	      emit_move_insn (temp_reg,
+			      gen_rtx_PLUS (Pmode,
+					    tilegx_got_rtx (), temp_reg2));
+	    }
+
+	  address = temp_reg;
+
+	  pic_ref = gen_const_mem (Pmode, address);
+	  crtl->uses_pic_offset_table = 1;
+	  emit_move_insn (reg, pic_ref);
+	  /* The following put a REG_EQUAL note on this insn, so that
+	     it can be optimized by loop.  But it causes the label to
+	     be optimized away.  */
+	  /* set_unique_reg_note (insn, REG_EQUAL, orig); */
+	  return reg;
+	}
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == tilegx_got_rtx ())
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      base = tilegx_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
+					    Pmode, reg);
+      offset = tilegx_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+					      base == reg ? 0 : reg);
+
+      if (CONST_INT_P (offset))
+	{
+	  if (can_create_pseudo_p ())
+	    offset = force_reg (Pmode, offset);
+	  else
+	    /* If we reach here, then something is seriously wrong.  */
+	    gcc_unreachable ();
+	}
+
+      if (can_create_pseudo_p ())
+	return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
+      else
+	gcc_unreachable ();
+    }
+  else if (GET_CODE (orig) == LABEL_REF)
+    {
+      rtx address;
+      rtx temp_reg;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      /* If not during reload, allocate another temp reg here for
+	 loading in the address, so that these instructions can be
+	 optimized properly.  */
+      temp_reg = create_temp_reg_if_possible (Pmode, reg);
+      tilegx_compute_pcrel_address (temp_reg, orig);
+
+      /* Note: this is conservative.  We use the text_label but we
+	 don't use the pic_offset_table.  */
+      crtl->uses_pic_offset_table = 1;
+
+      address = temp_reg;
+
+      emit_move_insn (reg, address);
+
+      return reg;
+    }
+
+  return orig;
+}
+
+
+/* Implement TARGET_LEGITIMIZE_ADDRESS.  */
+static rtx
+tilegx_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+      && symbolic_operand (x, Pmode) && tilegx_tls_referenced_p (x))
+    {
+      return tilegx_legitimize_tls_address (x);
+    }
+  else if (flag_pic)
+    {
+      return tilegx_legitimize_pic_address (x, mode, 0);
+    }
+  else
+    return x;
+}
+
+
+/* Implement TARGET_DELEGITIMIZE_ADDRESS.  */
+static rtx
+tilegx_delegitimize_address (rtx x)
+{
+  x = delegitimize_mem_from_attrs (x);
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
+    {
+      switch (XINT (XEXP (x, 0), 1))
+	{
+	  case UNSPEC_HW0:
+	  case UNSPEC_HW1:
+	  case UNSPEC_HW2:
+	  case UNSPEC_HW3:
+	  case UNSPEC_HW0_LAST:
+	  case UNSPEC_HW1_LAST:
+	  case UNSPEC_HW2_LAST:
+	  case UNSPEC_HW0_PCREL:
+	  case UNSPEC_HW1_PCREL:
+	  case UNSPEC_HW1_LAST_PCREL:
+	  case UNSPEC_HW2_LAST_PCREL:
+	  case UNSPEC_HW0_PLT_PCREL:
+	  case UNSPEC_HW1_PLT_PCREL:
+	  case UNSPEC_HW1_LAST_PLT_PCREL:
+	  case UNSPEC_HW2_LAST_PLT_PCREL:
+	  case UNSPEC_HW0_GOT:
+	  case UNSPEC_HW0_LAST_GOT:
+  	  case UNSPEC_HW1_LAST_GOT:
+  	  case UNSPEC_HW0_TLS_GD:
+  	  case UNSPEC_HW1_LAST_TLS_GD:
+  	  case UNSPEC_HW0_TLS_IE:
+  	  case UNSPEC_HW1_LAST_TLS_IE:
+  	  case UNSPEC_HW0_TLS_LE:
+  	  case UNSPEC_HW1_LAST_TLS_LE:
+	    x = XVECEXP (XEXP (x, 0), 0, 0);
+	  break;
+	}
+    }
+
+  return x;
+}
+
+
+/* Emit code to load the PIC register.  */
+static void
+load_pic_register (bool delay_pic_helper ATTRIBUTE_UNUSED)
+{
+  int orig_flag_pic = flag_pic;
+
+  rtx got_symbol = tilegx_got_symbol ();
+  rtx text_label_symbol = tilegx_text_label_symbol ();
+  rtx text_label_rtx = tilegx_text_label_rtx ();
+  flag_pic = 0;
+
+  if (TARGET_32BIT)
+    {
+      emit_insn (gen_insn_lnk_and_label_32bit (text_label_rtx,
+					       text_label_symbol));
+    }
+  else
+    {
+      emit_insn (gen_insn_lnk_and_label (text_label_rtx, text_label_symbol));
+    }
+
+  tilegx_compute_pcrel_address (tilegx_got_rtx (), got_symbol);
+
+  flag_pic = orig_flag_pic;
+
+  /* Need to emit this whether or not we obey regdecls, since
+     setjmp/longjmp can cause life info to screw up.  ??? In the case
+     where we don't obey regdecls, this is not sufficient since we may
+     not fall out the bottom.  */
+  emit_use (tilegx_got_rtx ());
+}
+
+
+/* Return the simd variant of the constant NUM of mode MODE, by
+   replicating it to fill an interger of mode DImode.  NUM is first
+   truncated to fit in MODE.  */
+rtx
+tilegx_simd_int (rtx num, enum machine_mode mode)
+{
+  HOST_WIDE_INT n = 0;
+
+  gcc_assert (CONST_INT_P (num));
+
+  n = INTVAL (num);
+
+  switch (mode)
+    {
+    case QImode:
+      n = 0x0101010101010101LL * (n & 0x000000FF);
+      break;
+    case HImode:
+      n = 0x0001000100010001LL * (n & 0x0000FFFF);
+      break;
+    case SImode:
+      n = 0x0000000100000001LL * (n & 0xFFFFFFFF);
+      break;
+    case DImode:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return GEN_INT (n);
+}
+
+
+/* Returns true iff VAL can be moved into a register in one
+   instruction.  And if it can, it emits the code to move the constant
+   into DEST_REG.
+
+   If THREE_WIDE_ONLY is true, this insists on an instruction that
+   works in a bundle containing three instructions.  */
+static bool
+expand_set_cint64_one_inst (rtx dest_reg,
+			    HOST_WIDE_INT val, bool three_wide_only)
+{
+  if (val == trunc_int_for_mode (val, QImode))
+    {
+      /* Success! */
+      emit_move_insn (dest_reg, GEN_INT (val));
+      return true;
+    }
+  else if (!three_wide_only)
+    {
+      /* Test for the following constraints: J, K, N, P.  We avoid
+	 generating an rtx and using existing predicates because we
+	 can be testing and rejecting a lot of constants, and GEN_INT
+	 is O(N).  */
+      if ((val >= -32768 && val <= 65535)
+	  || ((val == (val & 0xFF) * 0x0101010101010101LL))
+	  || (val == ((trunc_int_for_mode (val, QImode) & 0xFFFF)
+		      * 0x0001000100010001LL)))
+	{
+	  emit_move_insn (dest_reg, GEN_INT (val));
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Implement DImode rotatert.  */
+static HOST_WIDE_INT
+rotate_right (HOST_WIDE_INT n, int count)
+{
+  unsigned HOST_WIDE_INT x = n & 0xFFFFFFFFFFFFFFFFULL;
+  if (count == 0)
+    return x;
+  return ((x >> count) | (x << (64 - count))) & 0xFFFFFFFFFFFFFFFFULL;
+}
+
+
+/* Return true iff n contains exactly one contiguous sequence of 1
+   bits, possibly wrapping around from high bits to low bits.  */
+bool
+tilegx_bitfield_operand_p (HOST_WIDE_INT n, int *first_bit, int *last_bit)
+{
+  int i;
+
+  if (n == 0)
+    return false;
+
+  for (i = 0; i < 64; i++)
+    {
+      unsigned HOST_WIDE_INT x = rotate_right (n, i);
+      if (!(x & 1))
+	continue;
+
+      /* See if x is a power of two minus one, i.e. only consecutive 1
+	 bits starting from bit 0.  */
+      if ((x & (x + 1)) == 0)
+	{
+	  if (first_bit != NULL)
+	    *first_bit = i;
+	  if (last_bit != NULL)
+	    *last_bit = (i + exact_log2 (x ^ (x >> 1))) & 63;
+
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Create code to move the CONST_INT value in src_val to dest_reg.  */
+static void
+expand_set_cint64 (rtx dest_reg, rtx src_val)
+{
+  HOST_WIDE_INT val;
+  int leading_zeroes, trailing_zeroes;
+  int three_wide_only;
+  int shift, ins_shift, zero_cluster_shift;
+  rtx temp, subreg;
+
+  gcc_assert (CONST_INT_P (src_val));
+  val = trunc_int_for_mode (INTVAL (src_val), GET_MODE (dest_reg));
+
+  /* See if we can generate the constant in one instruction.  */
+  if (expand_set_cint64_one_inst (dest_reg, val, false))
+    return;
+
+  /* Force the destination to DImode so we can use DImode instructions
+     to create it.  This both allows instructions like rotl, and
+     certain efficient 3-wide instructions.  */
+  subreg = simplify_gen_subreg (DImode, dest_reg, GET_MODE (dest_reg), 0);
+  gcc_assert (subreg != NULL);
+  dest_reg = subreg;
+
+  temp = create_temp_reg_if_possible (DImode, dest_reg);
+
+  leading_zeroes = 63 - floor_log2 (val & 0xFFFFFFFFFFFFFFFFULL);
+  trailing_zeroes = exact_log2 (val & -val);
+
+  /* First try all three-wide instructions that generate a constant
+     (i.e. movei) followed by various shifts and rotates. If none of
+     those work, try various two-wide ways of generating a constant
+     followed by various shifts and rotates.  */
+  for (three_wide_only = 1; three_wide_only >= 0; three_wide_only--)
+    {
+      int count;
+
+      if (expand_set_cint64_one_inst (temp, val >> trailing_zeroes,
+				      three_wide_only))
+	{
+	  /* 0xFFFFFFFFFFFFA500 becomes:
+	     movei temp, 0xFFFFFFFFFFFFFFA5
+	     shli dest, temp, 8  */
+	  emit_move_insn (dest_reg,
+			  gen_rtx_ASHIFT (DImode, temp,
+					  GEN_INT (trailing_zeroes)));
+	  return;
+	}
+
+      if (expand_set_cint64_one_inst (temp, val << leading_zeroes,
+				      three_wide_only))
+	{
+	  /* 0x7FFFFFFFFFFFFFFF becomes:
+	     movei temp, -2
+	     shrui dest, temp, 1  */
+	  emit_move_insn (dest_reg,
+			  gen_rtx_LSHIFTRT (DImode, temp,
+					    GEN_INT (leading_zeroes)));
+	  return;
+	}
+
+      /* Try rotating a one-instruction immediate.  */
+      for (count = 1; count < 64; count++)
+	{
+	  HOST_WIDE_INT r = rotate_right (val, count);
+	  if (expand_set_cint64_one_inst (temp, r, three_wide_only))
+	    {
+	      /* 0xFFFFFFFFFFA5FFFF becomes:
+		 movei temp, 0xFFFFFFFFFFFFFFA5
+		 rotli dest, temp, 16  */
+	      emit_move_insn (dest_reg,
+			      gen_rtx_ROTATE (DImode, temp, GEN_INT (count)));
+	      return;
+	    }
+	}
+    }
+
+  /* There are two cases here to produce a large constant.
+     In the most general case, we do this:
+
+     moveli x, hw3(NUM)
+     shl16insli x, x, hw2(NUM)
+     shl16insli x, x, hw1(NUM)
+     shl16insli x, x, hw0(NUM)
+
+     However, we can sometimes do better.  shl16insli is a poor way to
+     insert 16 zero bits, because simply shifting left by 16 has more
+     bundling freedom.  So if we see any contiguous aligned sequence
+     of 16 or more zero bits (below the highest set bit), it is always
+     more efficient to materialize the bits above the zero bits, then
+     left shift to put in the zeroes, then insert whatever bits
+     remain.  For example, we might end up with:
+
+     movei x, NUM >> (37 + 16)
+     shli x, x, 37
+     shl16insli x, x, hw0(NUM)      */
+
+  zero_cluster_shift = -1;
+
+  for (shift = 0; shift < 48 - leading_zeroes; shift += 16)
+    {
+      HOST_WIDE_INT x = val >> shift;
+
+      /* Find the least significant group of 16 aligned zero bits.  */
+      if ((x & 0xFFFF) == 0x0000)
+	{
+	  /* Grab any following zero bits as well.  */
+	  zero_cluster_shift = exact_log2 (x & -x);
+	  shift += zero_cluster_shift;
+	  break;
+	}
+    }
+
+  if (zero_cluster_shift >= 0)
+    {
+      unsigned HOST_WIDE_INT leftover;
+
+      /* Recursively create the constant above the lowest 16 zero
+	 bits.  */
+      expand_set_cint64 (temp, GEN_INT (val >> shift));
+
+      /* See if we can easily insert the remaining bits, or if we need
+	 to fall through to the more general case.  */
+      leftover = val - ((val >> shift) << shift);
+      if (leftover == 0)
+	{
+	  /* A simple left shift is enough.  */
+	  emit_move_insn (dest_reg,
+			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (shift)));
+	  return;
+	}
+      else if (leftover <= 32767)
+	{
+	  /* Left shift into position then add in the leftover.  */
+	  rtx temp2 = create_temp_reg_if_possible (DImode, temp);
+	  emit_move_insn (temp2,
+			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (shift)));
+	  emit_move_insn (dest_reg,
+			  gen_rtx_PLUS (DImode, temp2, GEN_INT (leftover)));
+	  return;
+	}
+      else
+	{
+	  /* Shift in the batch of >= 16 zeroes we detected earlier.
+	     After this, shift will be aligned mod 16 so the final
+	     loop can use shl16insli.  */
+	  rtx temp2 = create_temp_reg_if_possible (DImode, temp);
+	  rtx shift_count_rtx = GEN_INT (zero_cluster_shift);
+
+	  emit_move_insn (temp2,
+			  gen_rtx_ASHIFT (DImode, temp, shift_count_rtx));
+
+	  shift -= zero_cluster_shift;
+	  temp = temp2;
+	}
+    }
+  else
+    {
+      /* Set as many high 16-bit blocks as we can with a single
+	 instruction.  We'll insert the remaining 16-bit blocks
+	 below.  */
+      for (shift = 16;; shift += 16)
+	{
+	  gcc_assert (shift < 64);
+	  if (expand_set_cint64_one_inst (temp, val >> shift, false))
+	    break;
+	}
+    }
+
+  /* At this point, temp == val >> shift, shift % 16 == 0, and we
+     still need to insert any bits of 'val' below 'shift'. Those bits
+     are guaranteed to not have 16 contiguous zeroes.  */
+
+  gcc_assert ((shift & 15) == 0);
+
+  for (ins_shift = shift - 16; ins_shift >= 0; ins_shift -= 16)
+    {
+      rtx result;
+      HOST_WIDE_INT bits = (val >> ins_shift) & 0xFFFF;
+      gcc_assert (bits != 0);
+
+      /* On the last iteration we need to store into dest_reg.  */
+      if (ins_shift == 0)
+	result = dest_reg;
+      else
+	result = create_temp_reg_if_possible (DImode, dest_reg);
+
+      emit_insn (gen_insn_shl16insli (result, temp, GEN_INT (bits)));
+
+      temp = result;
+    }
+}
+
+
+/* Load OP1, a 64-bit constant, into OP0, a register.  We know it
+   can't be done in one insn when we get here, the move expander
+   guarantees this.  */
+void
+tilegx_expand_set_const64 (rtx op0, rtx op1)
+{
+  if (CONST_INT_P (op1))
+    {
+      /* TODO: I don't know if we want to split large constants
+	 now, or wait until later (with a define_split).
+
+	 Does splitting early help CSE?  Does it harm other
+	 optimizations that might fold loads?  */
+      expand_set_cint64 (op0, op1);
+    }
+  else
+    {
+      rtx temp = create_temp_reg_if_possible (Pmode, op0);
+
+      if (TARGET_32BIT)
+	{
+	  /* Generate the 2-insn sequence to materialize a symbolic
+	     address.  */
+	  emit_insn (gen_mov_address_32bit_step1 (temp, op1));
+	  emit_insn (gen_mov_address_32bit_step2 (op0, temp, op1));
+	}
+      else
+	{
+	  /* Generate the 3-insn sequence to materialize a symbolic
+	     address.  Note that this assumes that virtual addresses
+	     fit in 48 signed bits, which is currently true.  */
+	  rtx temp2 = create_temp_reg_if_possible (Pmode, op0);
+	  emit_insn (gen_mov_address_step1 (temp, op1));
+	  emit_insn (gen_mov_address_step2 (temp2, temp, op1));
+	  emit_insn (gen_mov_address_step3 (op0, temp2, op1));
+	}
+    }
+}
+
+
+/* Expand a move instruction.  Return true if all work is done.  */
+bool
+tilegx_expand_mov (enum machine_mode mode, rtx *operands)
+{
+  /* Handle sets of MEM first.  */
+  if (MEM_P (operands[0]))
+    {
+      if (can_create_pseudo_p ())
+	operands[0] = validize_mem (operands[0]);
+
+      if (reg_or_0_operand (operands[1], mode))
+	return false;
+
+      if (!reload_in_progress)
+	operands[1] = force_reg (mode, operands[1]);
+    }
+
+  /* Fixup TLS cases.  */
+  if (CONSTANT_P (operands[1]) && tilegx_tls_referenced_p (operands[1]))
+    {
+      operands[1] = tilegx_legitimize_tls_address (operands[1]);
+      return false;
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic && CONSTANT_P (operands[1]))
+    {
+      if (tilegx_pic_address_needs_scratch (operands[1]))
+	operands[1] = tilegx_legitimize_pic_address (operands[1], mode, 0);
+
+      if (symbolic_operand (operands[1], mode))
+	{
+	  operands[1] = tilegx_legitimize_pic_address (operands[1],
+						       mode,
+						       (reload_in_progress ?
+							operands[0] :
+							NULL_RTX));
+	  return false;
+	}
+    }
+
+  /* Accept non-constants and valid constants unmodified.  */
+  if (!CONSTANT_P (operands[1]) || move_operand (operands[1], mode))
+    return false;
+
+  /* Split large integers.  */
+  tilegx_expand_set_const64 (operands[0], operands[1]);
+  return true;
+}
+
+
+/* Expand unaligned loads.  */
+void
+tilegx_expand_unaligned_load (rtx dest_reg, rtx mem, HOST_WIDE_INT bitsize,
+			      HOST_WIDE_INT bit_offset, bool sign)
+{
+  enum machine_mode mode;
+  rtx addr_lo, addr_hi;
+  rtx mem_lo, mem_hi, hi;
+  rtx mema, wide_result;
+  int last_byte_offset;
+  HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
+
+  mode = GET_MODE (dest_reg);
+
+  if (bitsize == 2 * BITS_PER_UNIT && (bit_offset % BITS_PER_UNIT) == 0)
+    {
+      rtx mem_left, mem_right;
+      rtx left = gen_reg_rtx (mode);
+
+      /* When just loading a two byte value, we can load the two bytes
+	 individually and combine them efficiently.  */
+
+      mem_lo = adjust_address (mem, QImode, byte_offset);
+      mem_hi = adjust_address (mem, QImode, byte_offset + 1);
+
+      if (BYTES_BIG_ENDIAN)
+	{
+	  mem_left = mem_lo;
+	  mem_right = mem_hi;
+	}
+      else
+	{
+	  mem_left = mem_hi;
+	  mem_right = mem_lo;
+	}
+
+      if (sign)
+	{
+	  /* Do a signed load of the second byte and use bfins to set
+	     the high bits of the result.  */
+	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, dest_reg),
+					   mem_right));
+	  emit_insn (gen_extendqidi2 (gen_lowpart (DImode, left), mem_left));
+	  emit_insn (gen_insv (gen_lowpart (DImode, dest_reg),
+			       GEN_INT (64 - 8), GEN_INT (8),
+			       gen_lowpart (DImode, left)));
+	}
+      else
+	{
+	  /* Do two unsigned loads and use v1int_l to interleave
+	     them.  */
+	  rtx right = gen_reg_rtx (mode);
+	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, right),
+					   mem_right));
+	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, left),
+					   mem_left));
+	  emit_insn (gen_insn_v1int_l (gen_lowpart (DImode, dest_reg),
+				       gen_lowpart (DImode, left),
+				       gen_lowpart (DImode, right)));
+	}
+
+      return;
+    }
+
+  mema = XEXP (mem, 0);
+
+  /* AND addresses cannot be in any alias set, since they may
+     implicitly alias surrounding code.  Ideally we'd have some alias
+     set that covered all types except those with alignment 8 or
+     higher.  */
+  addr_lo = force_reg (Pmode, plus_constant (Pmode, mema, byte_offset));
+  mem_lo = change_address (mem, mode,
+			   gen_rtx_AND (GET_MODE (mema), addr_lo,
+					GEN_INT (-8)));
+  set_mem_alias_set (mem_lo, 0);
+
+  /* Load the high word at an address that will not fault if the low
+     address is aligned and at the very end of a page.  */
+  last_byte_offset = (bit_offset + bitsize - 1) / BITS_PER_UNIT;
+  addr_hi = force_reg (Pmode, plus_constant (Pmode, mema, last_byte_offset));
+  mem_hi = change_address (mem, mode,
+			   gen_rtx_AND (GET_MODE (mema), addr_hi,
+					GEN_INT (-8)));
+  set_mem_alias_set (mem_hi, 0);
+
+  if (bitsize == 64)
+    {
+      addr_lo = make_safe_from (addr_lo, dest_reg);
+      wide_result = dest_reg;
+    }
+  else
+    {
+      wide_result = gen_reg_rtx (mode);
+    }
+
+  /* Load hi first in case dest_reg is used in mema.  */
+  hi = gen_reg_rtx (mode);
+  emit_move_insn (hi, mem_hi);
+  emit_move_insn (wide_result, mem_lo);
+
+  emit_insn (gen_insn_dblalign (gen_lowpart (DImode, wide_result),
+				gen_lowpart (DImode, wide_result),
+				gen_lowpart (DImode, hi), addr_lo));
+
+  if (bitsize != 64)
+    {
+      rtx extracted =
+	extract_bit_field (gen_lowpart (DImode, wide_result),
+			   bitsize, bit_offset % BITS_PER_UNIT,
+			   !sign, gen_lowpart (DImode, dest_reg),
+			   DImode, DImode);
+
+      if (extracted != dest_reg)
+	emit_move_insn (dest_reg, gen_lowpart (DImode, extracted));
+    }
+}
+
+
+/* Expand unaligned stores.  */
+static void
+tilegx_expand_unaligned_store (rtx mem, rtx src, HOST_WIDE_INT bitsize,
+			       HOST_WIDE_INT bit_offset)
+{
+  HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
+  HOST_WIDE_INT bytesize = bitsize / BITS_PER_UNIT;
+  HOST_WIDE_INT shift_init, shift_increment, shift_amt;
+  HOST_WIDE_INT i;
+  rtx mem_addr;
+  rtx store_val;
+
+  shift_init = BYTES_BIG_ENDIAN ? (bitsize - BITS_PER_UNIT) : 0;
+  shift_increment = BYTES_BIG_ENDIAN ? -BITS_PER_UNIT : BITS_PER_UNIT;
+
+  for (i = 0, shift_amt = shift_init;
+       i < bytesize;
+       i++, shift_amt += shift_increment)
+    {
+      mem_addr = adjust_address (mem, QImode, byte_offset + i);
+
+      if (shift_amt)
+	{
+	  store_val = expand_simple_binop (DImode, LSHIFTRT,
+					   gen_lowpart (DImode, src),
+					   GEN_INT (shift_amt), NULL, 1,
+					   OPTAB_LIB_WIDEN);
+	  store_val = gen_lowpart (QImode, store_val);
+	}
+      else
+	{
+	  store_val = gen_lowpart (QImode, src);
+	}
+
+      emit_move_insn (mem_addr, store_val);
+    }
+}
+
+
+/* Implement the movmisalign patterns.  One of the operands is a
+   memory that is not naturally aligned.  Emit instructions to load
+   it.  */
+void
+tilegx_expand_movmisalign (enum machine_mode mode, rtx *operands)
+{
+  if (MEM_P (operands[1]))
+    {
+      rtx tmp;
+
+      if (register_operand (operands[0], mode))
+	tmp = operands[0];
+      else
+	tmp = gen_reg_rtx (mode);
+
+      tilegx_expand_unaligned_load (tmp, operands[1], GET_MODE_BITSIZE (mode),
+				    0, true);
+
+      if (tmp != operands[0])
+	emit_move_insn (operands[0], tmp);
+    }
+  else if (MEM_P (operands[0]))
+    {
+      if (!reg_or_0_operand (operands[1], mode))
+	operands[1] = force_reg (mode, operands[1]);
+
+      tilegx_expand_unaligned_store (operands[0], operands[1],
+				     GET_MODE_BITSIZE (mode), 0);
+    }
+  else
+    gcc_unreachable ();
+
+}
+
+
+/* Implement the allocate_stack pattern (alloca).  */
+void
+tilegx_allocate_stack (rtx op0, rtx op1)
+{
+  /* Technically the correct way to initialize chain_loc is with
+   * gen_frame_mem() instead of gen_rtx_MEM(), but gen_frame_mem()
+   * sets the alias_set to that of a frame reference.  Some of our
+   * tests rely on some unsafe assumption about when the chaining
+   * update is done, we need to be conservative about reordering the
+   * chaining instructions.
+   */
+  rtx fp_addr = gen_reg_rtx (Pmode);
+  rtx fp_value = gen_reg_rtx (Pmode);
+  rtx fp_loc;
+
+  emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					 GEN_INT (UNITS_PER_WORD)));
+
+  fp_loc = gen_frame_mem (Pmode, fp_addr);
+
+  emit_move_insn (fp_value, fp_loc);
+
+  op1 = force_reg (Pmode, op1);
+
+  emit_move_insn (stack_pointer_rtx,
+		  gen_rtx_MINUS (Pmode, stack_pointer_rtx, op1));
+
+  emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					 GEN_INT (UNITS_PER_WORD)));
+
+  fp_loc = gen_frame_mem (Pmode, fp_addr);
+
+  emit_move_insn (fp_loc, fp_value);
+
+  emit_move_insn (op0, virtual_stack_dynamic_rtx);
+}
+
+
+
+/* Multiplies */
+
+
+/* Returns the insn_code in ENTRY.  */
+static enum insn_code
+tilegx_multiply_get_opcode (const struct tilegx_multiply_insn_seq_entry
+			    *entry)
+{
+  return tilegx_multiply_insn_seq_decode_opcode[entry->compressed_opcode];
+}
+
+
+/* Returns the length of the 'op' array.  */
+static int
+tilegx_multiply_get_num_ops (const struct tilegx_multiply_insn_seq *seq)
+{
+  /* The array either uses all of its allocated slots or is terminated
+     by a bogus opcode. Either way, the array size is the index of the
+     last valid opcode plus one.  */
+  int i;
+  for (i = tilegx_multiply_insn_seq_MAX_OPERATIONS - 1; i >= 0; i--)
+    if (tilegx_multiply_get_opcode (&seq->op[i]) != CODE_FOR_nothing)
+      return i + 1;
+
+  /* An empty array is not allowed.  */
+  gcc_unreachable ();
+}
+
+
+/* We precompute a number of expression trees for multiplying by
+   constants.  This generates code for such an expression tree by
+   walking through the nodes in the tree (which are conveniently
+   pre-linearized) and emitting an instruction for each one.  */
+static void
+tilegx_expand_constant_multiply_given_sequence (rtx result, rtx src,
+						const struct
+						tilegx_multiply_insn_seq *seq)
+{
+  int i;
+  int num_ops;
+
+  /* Keep track of the subexpressions computed so far, so later
+     instructions can refer to them.  We seed the array with zero and
+     the value being multiplied.  */
+  int num_subexprs = 2;
+  rtx subexprs[tilegx_multiply_insn_seq_MAX_OPERATIONS + 2];
+  subexprs[0] = const0_rtx;
+  subexprs[1] = src;
+
+  /* Determine how many instructions we are going to generate.  */
+  num_ops = tilegx_multiply_get_num_ops (seq);
+  gcc_assert (num_ops > 0
+	      && num_ops <= tilegx_multiply_insn_seq_MAX_OPERATIONS);
+
+  for (i = 0; i < num_ops; i++)
+    {
+      const struct tilegx_multiply_insn_seq_entry *entry = &seq->op[i];
+
+      /* Figure out where to store the output of this instruction.  */
+      const bool is_last_op = (i + 1 == num_ops);
+      rtx out = is_last_op ? result : gen_reg_rtx (DImode);
+
+      enum insn_code opcode = tilegx_multiply_get_opcode (entry);
+      if (opcode == CODE_FOR_ashldi3)
+	{
+	  /* Handle shift by immediate. This is a special case because
+	     the meaning of the second operand is a constant shift
+	     count rather than an operand index.  */
+
+	  /* Make sure the shift count is in range. Zero should not
+	     happen.  */
+	  const int shift_count = entry->rhs;
+	  gcc_assert (shift_count > 0 && shift_count < 64);
+
+	  /* Emit the actual instruction.  */
+	  emit_insn (GEN_FCN (opcode)
+		     (out, subexprs[entry->lhs],
+		      gen_rtx_CONST_INT (DImode, shift_count)));
+	}
+      else
+	{
+	  /* Handle a normal two-operand instruction, such as add or
+	     shl1add.  */
+
+	  /* Make sure we are referring to a previously computed
+	     subexpression.  */
+	  gcc_assert (entry->rhs < num_subexprs);
+
+	  /* Emit the actual instruction.  */
+	  emit_insn (GEN_FCN (opcode)
+		     (out, subexprs[entry->lhs], subexprs[entry->rhs]));
+	}
+
+      /* Record this subexpression for use by later expressions.  */
+      subexprs[num_subexprs++] = out;
+    }
+}
+
+
+/* bsearch helper function.  */
+static int
+tilegx_compare_multipliers (const void *key, const void *t)
+{
+  long long delta =
+    (*(const long long *) key
+     - ((const struct tilegx_multiply_insn_seq *) t)->multiplier);
+  return (delta < 0) ? -1 : (delta > 0);
+}
+
+
+/* Returns the tilegx_multiply_insn_seq for multiplier, or NULL if none
+   exists.  */
+static const struct tilegx_multiply_insn_seq *
+tilegx_find_multiply_insn_seq_for_constant (long long multiplier)
+{
+  return ((const struct tilegx_multiply_insn_seq *)
+	  bsearch (&multiplier, tilegx_multiply_insn_seq_table,
+		   tilegx_multiply_insn_seq_table_size,
+		   sizeof tilegx_multiply_insn_seq_table[0],
+		   tilegx_compare_multipliers));
+}
+
+
+/* Try to a expand constant multiply in DImode by looking it up in a
+   precompiled table.  OP0 is the result operand, OP1 is the source
+   operand, and MULTIPLIER is the value of the constant.  Return true
+   if it succeeds.  */
+static bool
+tilegx_expand_const_muldi (rtx op0, rtx op1, long long multiplier)
+{
+  /* See if we have precomputed an efficient way to multiply by this
+     constant.  */
+  const struct tilegx_multiply_insn_seq *seq =
+    tilegx_find_multiply_insn_seq_for_constant (multiplier);
+  if (seq != NULL)
+    {
+      tilegx_expand_constant_multiply_given_sequence (op0, op1, seq);
+      return true;
+    }
+  else
+    return false;
+}
+
+
+/* Expand the muldi pattern.  */
+bool
+tilegx_expand_muldi (rtx op0, rtx op1, rtx op2)
+{
+  if (CONST_INT_P (op2))
+    {
+      HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op2), DImode);
+      return tilegx_expand_const_muldi (op0, op1, n);
+    }
+  return false;
+}
+
+
+/* Expand a high multiply pattern in DImode.  RESULT, OP1, OP2 are the
+   operands, and SIGN is true if it's a signed multiply, and false if
+   it's an unsigned multiply.  */
+static void
+tilegx_expand_high_multiply (rtx result, rtx op1, rtx op2, bool sign)
+{
+  rtx tmp0 = gen_reg_rtx (DImode);
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (DImode);
+  rtx tmp3 = gen_reg_rtx (DImode);
+  rtx tmp4 = gen_reg_rtx (DImode);
+  rtx tmp5 = gen_reg_rtx (DImode);
+  rtx tmp6 = gen_reg_rtx (DImode);
+  rtx tmp7 = gen_reg_rtx (DImode);
+  rtx tmp8 = gen_reg_rtx (DImode);
+  rtx tmp9 = gen_reg_rtx (DImode);
+  rtx tmp10 = gen_reg_rtx (DImode);
+  rtx tmp11 = gen_reg_rtx (DImode);
+  rtx tmp12 = gen_reg_rtx (DImode);
+  rtx tmp13 = gen_reg_rtx (DImode);
+  rtx result_lo = gen_reg_rtx (DImode);
+
+  if (sign)
+    {
+      emit_insn (gen_insn_mul_hs_lu (tmp0, op1, op2));
+      emit_insn (gen_insn_mul_hs_lu (tmp1, op2, op1));
+      emit_insn (gen_insn_mul_lu_lu (tmp2, op1, op2));
+      emit_insn (gen_insn_mul_hs_hs (tmp3, op1, op2));
+    }
+  else
+    {
+      emit_insn (gen_insn_mul_hu_lu (tmp0, op1, op2));
+      emit_insn (gen_insn_mul_hu_lu (tmp1, op2, op1));
+      emit_insn (gen_insn_mul_lu_lu (tmp2, op1, op2));
+      emit_insn (gen_insn_mul_hu_hu (tmp3, op1, op2));
+    }
+
+  emit_move_insn (tmp4, (gen_rtx_ASHIFT (DImode, tmp0, GEN_INT (32))));
+
+  emit_move_insn (tmp5, (gen_rtx_ASHIFT (DImode, tmp1, GEN_INT (32))));
+
+  emit_move_insn (tmp6, (gen_rtx_PLUS (DImode, tmp4, tmp5)));
+  emit_move_insn (result_lo, (gen_rtx_PLUS (DImode, tmp2, tmp6)));
+
+  emit_move_insn (tmp7, gen_rtx_LTU (DImode, tmp6, tmp4));
+  emit_move_insn (tmp8, gen_rtx_LTU (DImode, result_lo, tmp2));
+
+  if (sign)
+    {
+      emit_move_insn (tmp9, (gen_rtx_ASHIFTRT (DImode, tmp0, GEN_INT (32))));
+      emit_move_insn (tmp10, (gen_rtx_ASHIFTRT (DImode, tmp1, GEN_INT (32))));
+    }
+  else
+    {
+      emit_move_insn (tmp9, (gen_rtx_LSHIFTRT (DImode, tmp0, GEN_INT (32))));
+      emit_move_insn (tmp10, (gen_rtx_LSHIFTRT (DImode, tmp1, GEN_INT (32))));
+    }
+
+  emit_move_insn (tmp11, (gen_rtx_PLUS (DImode, tmp3, tmp7)));
+  emit_move_insn (tmp12, (gen_rtx_PLUS (DImode, tmp8, tmp9)));
+  emit_move_insn (tmp13, (gen_rtx_PLUS (DImode, tmp11, tmp12)));
+  emit_move_insn (result, (gen_rtx_PLUS (DImode, tmp13, tmp10)));
+}
+
+
+/* Implement smuldi3_highpart.  */
+void
+tilegx_expand_smuldi3_highpart (rtx op0, rtx op1, rtx op2)
+{
+  tilegx_expand_high_multiply (op0, op1, op2, true);
+}
+
+
+/* Implement umuldi3_highpart.  */
+void
+tilegx_expand_umuldi3_highpart (rtx op0, rtx op1, rtx op2)
+{
+  tilegx_expand_high_multiply (op0, op1, op2, false);
+}
+
+
+
+/* Compare and branches  */
+
+/* Produce the rtx yielding a bool for a floating point
+   comparison.  */
+static bool
+tilegx_emit_fp_setcc (rtx res, enum rtx_code code, enum machine_mode mode,
+		      rtx op0, rtx op1)
+{
+  /* TODO: Certain compares again constants can be done using entirely
+     integer operations. But you have to get the special cases right
+     e.g. NaN, +0 == -0, etc.  */
+
+  rtx flags;
+  int flag_index;
+  rtx a = force_reg (DImode, gen_lowpart (DImode, op0));
+  rtx b = force_reg (DImode, gen_lowpart (DImode, op1));
+
+  flags = gen_reg_rtx (DImode);
+
+  if (mode == SFmode)
+    {
+      emit_insn (gen_insn_fsingle_add1 (flags, a, b));
+    }
+  else
+    {
+      gcc_assert (mode == DFmode);
+      emit_insn (gen_insn_fdouble_add_flags (flags, a, b));
+    }
+
+  switch (code)
+    {
+    case EQ: flag_index = 30; break;
+    case NE: flag_index = 31; break;
+    case LE: flag_index = 27; break;
+    case LT: flag_index = 26; break;
+    case GE: flag_index = 29; break;
+    case GT: flag_index = 28; break;
+    default: gcc_unreachable ();
+    }
+
+  gcc_assert (GET_MODE (res) == DImode);
+  emit_move_insn (res, gen_rtx_ZERO_EXTRACT (DImode, flags, GEN_INT (1),
+					     GEN_INT (flag_index)));
+  return true;
+}
+
+
+/* Certain simplifications can be done to make invalid setcc
+   operations valid.  Return the final comparison, or NULL if we can't
+   work.  */
+static bool
+tilegx_emit_setcc_internal (rtx res, enum rtx_code code, rtx op0, rtx op1,
+			    enum machine_mode cmp_mode)
+{
+  rtx tmp;
+  bool swap = false;
+
+  if (cmp_mode == SFmode || cmp_mode == DFmode)
+    return tilegx_emit_fp_setcc (res, code, cmp_mode, op0, op1);
+
+  /* The general case: fold the comparison code to the types of
+     compares that we have, choosing the branch as necessary.  */
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      /* We have these compares.  */
+      break;
+
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* We do not have these compares, so we reverse the
+	 operands.  */
+      swap = true;
+      break;
+
+    default:
+      /* We should not have called this with any other code.  */
+      gcc_unreachable ();
+    }
+
+  if (swap)
+    {
+      code = swap_condition (code);
+      tmp = op0, op0 = op1, op1 = tmp;
+    }
+
+  if (!reg_or_0_operand (op0, cmp_mode))
+    op0 = force_reg (cmp_mode, op0);
+
+  if (!CONST_INT_P (op1) && !register_operand (op1, cmp_mode))
+    op1 = force_reg (cmp_mode, op1);
+
+  /* Return the setcc comparison.  */
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_fmt_ee (code, DImode, op0, op1)));
+
+  return true;
+}
+
+
+/* Implement cstore patterns.  */
+bool
+tilegx_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
+{
+  return
+    tilegx_emit_setcc_internal (operands[0], GET_CODE (operands[1]),
+				operands[2], operands[3], cmp_mode);
+}
+
+
+/* Return whether CODE is a signed comparison.  */
+static bool
+signed_compare_p (enum rtx_code code)
+{
+  return (code == EQ || code == NE || code == LT || code == LE
+	  || code == GT || code == GE);
+}
+
+
+/* Generate the comparison for a DImode conditional branch.  */
+static rtx
+tilegx_emit_cc_test (enum rtx_code code, rtx op0, rtx op1,
+		     enum machine_mode cmp_mode, bool eq_ne_only)
+{
+  enum rtx_code branch_code;
+  rtx temp;
+
+  if (cmp_mode == SFmode || cmp_mode == DFmode)
+    {
+      /* Compute a boolean saying whether the comparison is true.  */
+      temp = gen_reg_rtx (DImode);
+      tilegx_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
+
+      /* Test that flag.  */
+      return gen_rtx_fmt_ee (NE, VOIDmode, temp, const0_rtx);
+    }
+
+  /* Check for a compare against zero using a comparison we can do
+     directly.  */
+  if (op1 == const0_rtx
+      && (code == EQ || code == NE
+	  || (!eq_ne_only && signed_compare_p (code))))
+    {
+      op0 = force_reg (cmp_mode, op0);
+      return gen_rtx_fmt_ee (code, VOIDmode, op0, const0_rtx);
+    }
+
+  /* The general case: fold the comparison code to the types of
+     compares that we have, choosing the branch as necessary.  */
+  switch (code)
+    {
+    case EQ:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      /* We have these compares.  */
+      branch_code = NE;
+      break;
+
+    case NE:
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* These must be reversed (except NE, but let's
+	 canonicalize).  */
+      code = reverse_condition (code);
+      branch_code = EQ;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (CONST_INT_P (op1) && (!satisfies_constraint_I (op1) || code == LEU))
+    {
+      HOST_WIDE_INT n = INTVAL (op1);
+
+      switch (code)
+	{
+	case EQ:
+	  /* Subtract off the value we want to compare against and see
+	     if we get zero.  This is cheaper than creating a constant
+	     in a register. Except that subtracting -128 is more
+	     expensive than seqi to -128, so we leave that alone.  */
+	  /* ??? Don't do this when comparing against symbols,
+	     otherwise we'll reduce (&x == 0x1234) to (&x-0x1234 ==
+	     0), which will be declared false out of hand (at least
+	     for non-weak).  */
+	  if (n != -128
+	      && add_operand (GEN_INT (-n), DImode)
+	      && !(symbolic_operand (op0, VOIDmode)
+		   || (REG_P (op0) && REG_POINTER (op0))))
+	    {
+	      /* TODO: Use a SIMD add immediate to hit zero for tiled
+		 constants in a single instruction.  */
+	      if (GET_MODE (op0) != DImode)
+		{
+		  /* Convert to DImode so we can use addli.  Note that
+		     this will not actually generate any code because
+		     sign extension from SI -> DI is a no-op.  I don't
+		     know if it's safe just to make a paradoxical
+		     subreg here though.  */
+		  rtx temp2 = gen_reg_rtx (DImode);
+		  emit_insn (gen_extendsidi2 (temp2, op0));
+		  op0 = temp2;
+		}
+	      else
+		{
+		  op0 = force_reg (DImode, op0);
+		}
+	      temp = gen_reg_rtx (DImode);
+	      emit_move_insn (temp, gen_rtx_PLUS (DImode, op0, GEN_INT (-n)));
+	      return gen_rtx_fmt_ee (reverse_condition (branch_code),
+				     VOIDmode, temp, const0_rtx);
+	    }
+	  break;
+
+	case LEU:
+	  if (n == -1)
+	    break;
+	  /* FALLTHRU */
+
+	case LTU:
+	  /* Change ((unsigned)x < 0x1000) into !((int)x >> 12), etc.
+	     We use arithmetic shift right because it's a 3-wide op,
+	     while logical shift right is not.  */
+	  {
+	    int first = exact_log2 (code == LTU ? n : n + 1);
+	    if (first != -1)
+	      {
+		op0 = force_reg (cmp_mode, op0);
+		temp = gen_reg_rtx (cmp_mode);
+		emit_move_insn (temp,
+				gen_rtx_ASHIFTRT (cmp_mode, op0,
+						  GEN_INT (first)));
+		return gen_rtx_fmt_ee (reverse_condition (branch_code),
+				       VOIDmode, temp, const0_rtx);
+	      }
+	  }
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  /* Compute a flag saying whether we should branch.  */
+  temp = gen_reg_rtx (DImode);
+  tilegx_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
+
+  /* Return the branch comparison.  */
+  return gen_rtx_fmt_ee (branch_code, VOIDmode, temp, const0_rtx);
+}
+
+
+/* Generate the comparison for a conditional branch.  */
+void
+tilegx_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
+{
+  rtx cmp_rtx =
+    tilegx_emit_cc_test (GET_CODE (operands[0]), operands[1], operands[2],
+			 cmp_mode, false);
+  rtx branch_rtx = gen_rtx_SET (VOIDmode, pc_rtx,
+				gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+						      gen_rtx_LABEL_REF
+						      (VOIDmode,
+						       operands[3]),
+						      pc_rtx));
+  emit_jump_insn (branch_rtx);
+}
+
+
+/* Implement the mov<mode>cc pattern.  */
+rtx
+tilegx_emit_conditional_move (rtx cmp)
+{
+  return
+    tilegx_emit_cc_test (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1),
+			 GET_MODE (XEXP (cmp, 0)), true);
+}
+
+
+/* Return true if INSN is annotated with a REG_BR_PROB note that
+   indicates it's a branch that's predicted taken.  */
+static bool
+cbranch_predicted_p (rtx insn)
+{
+  rtx x = find_reg_note (insn, REG_BR_PROB, 0);
+
+  if (x)
+    {
+      int pred_val = XINT (x, 0);
+
+      return pred_val >= REG_BR_PROB_BASE / 2;
+    }
+
+  return false;
+}
+
+
+/* Output assembly code for a specific branch instruction, appending
+   the branch prediction flag to the opcode if appropriate.  */
+static const char *
+tilegx_output_simple_cbranch_with_opcode (rtx insn, const char *opcode,
+					  int regop, bool reverse_predicted)
+{
+  static char buf[64];
+  sprintf (buf, "%s%s\t%%r%d, %%l0", opcode,
+	   (cbranch_predicted_p (insn) ^ reverse_predicted) ? "t" : "",
+	   regop);
+  return buf;
+}
+
+
+/* Output assembly code for a specific branch instruction, appending
+   the branch prediction flag to the opcode if appropriate.  */
+const char *
+tilegx_output_cbranch_with_opcode (rtx insn, rtx *operands,
+				   const char *opcode,
+				   const char *rev_opcode, int regop)
+{
+  const char *branch_if_false;
+  rtx taken, not_taken;
+  bool is_simple_branch;
+
+  gcc_assert (LABEL_P (operands[0]));
+
+  is_simple_branch = true;
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      int from_addr = INSN_ADDRESSES (INSN_UID (insn));
+      int to_addr = INSN_ADDRESSES (INSN_UID (operands[0]));
+      int delta = to_addr - from_addr;
+      is_simple_branch = IN_RANGE (delta, -524288, 524280);
+    }
+
+  if (is_simple_branch)
+    {
+      /* Just a simple conditional branch.  */
+      return
+	tilegx_output_simple_cbranch_with_opcode (insn, opcode, regop, false);
+    }
+
+  /* Generate a reversed branch around a direct jump.  This fallback
+     does not use branch-likely instructions.  */
+  not_taken = gen_label_rtx ();
+  taken = operands[0];
+
+  /* Generate the reversed branch to NOT_TAKEN.  */
+  operands[0] = not_taken;
+  branch_if_false =
+    tilegx_output_simple_cbranch_with_opcode (insn, rev_opcode, regop, true);
+  output_asm_insn (branch_if_false, operands);
+
+  output_asm_insn ("j\t%l0", &taken);
+
+  /* Output NOT_TAKEN.  */
+  targetm.asm_out.internal_label (asm_out_file, "L",
+				  CODE_LABEL_NUMBER (not_taken));
+  return "";
+}
+
+
+/* Output assembly code for a conditional branch instruction.  */
+const char *
+tilegx_output_cbranch (rtx insn, rtx *operands, bool reversed)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  const char *opcode;
+  const char *rev_opcode;
+
+  if (reversed)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case NE:
+      opcode = "bnez";
+      rev_opcode = "beqz";
+      break;
+    case EQ:
+      opcode = "beqz";
+      rev_opcode = "bnez";
+      break;
+    case GE:
+      opcode = "bgez";
+      rev_opcode = "bltz";
+      break;
+    case GT:
+      opcode = "bgtz";
+      rev_opcode = "blez";
+      break;
+    case LE:
+      opcode = "blez";
+      rev_opcode = "bgtz";
+      break;
+    case LT:
+      opcode = "bltz";
+      rev_opcode = "bgez";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return tilegx_output_cbranch_with_opcode (insn, operands, opcode,
+					    rev_opcode, 2);
+}
+
+
+/* Implement the tablejump pattern.  */
+void
+tilegx_expand_tablejump (rtx op0, rtx op1)
+{
+  if (flag_pic)
+    {
+      rtx temp = gen_reg_rtx (Pmode);
+      rtx temp2 = gen_reg_rtx (Pmode);
+
+      tilegx_compute_pcrel_address (temp, gen_rtx_LABEL_REF (Pmode, op1));
+      emit_move_insn (temp2,
+		      gen_rtx_PLUS (Pmode,
+				    convert_to_mode (Pmode, op0, false),
+				    temp));
+      op0 = temp2;
+    }
+
+  emit_jump_insn (gen_tablejump_aux (op0, op1));
+}
+
+
+/* Emit barrier before an atomic, as needed for the memory MODEL.  */
+void
+tilegx_pre_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, true))
+    emit_insn (gen_memory_barrier ());
+}
+
+
+/* Emit barrier after an atomic, as needed for the memory MODEL.  */
+void
+tilegx_post_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, false))
+    emit_insn (gen_memory_barrier ());
+}
+
+
+
+/* Expand a builtin vector binary op, by calling gen function GEN with
+   operands in the proper modes.  DEST is converted to DEST_MODE, and
+   src0 and src1 (if DO_SRC1 is true) is converted to SRC_MODE.  */
+void
+tilegx_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
+				    enum machine_mode dest_mode,
+				    rtx dest,
+				    enum machine_mode src_mode,
+				    rtx src0, rtx src1, bool do_src1)
+{
+  dest = gen_lowpart (dest_mode, dest);
+
+  if (src0 == const0_rtx)
+    src0 = CONST0_RTX (src_mode);
+  else
+    src0 = gen_lowpart (src_mode, src0);
+
+  if (do_src1)
+    {
+      if (src1 == const0_rtx)
+	src1 = CONST0_RTX (src_mode);
+      else
+	src1 = gen_lowpart (src_mode, src1);
+    }
+
+  emit_insn ((*gen) (dest, src0, src1));
+}
+
+
+
+/* Intrinsics  */
+
+
+struct tile_builtin_info
+{
+  enum insn_code icode;
+  tree fndecl;
+};
+
+static struct tile_builtin_info tilegx_builtin_info[TILEGX_BUILTIN_max] = {
+  { CODE_FOR_adddi3,                    NULL }, /* add */
+  { CODE_FOR_addsi3,                    NULL }, /* addx */
+  { CODE_FOR_ssaddsi3,                  NULL }, /* addxsc */
+  { CODE_FOR_anddi3,                    NULL }, /* and */
+  { CODE_FOR_insn_bfexts,               NULL }, /* bfexts */
+  { CODE_FOR_insn_bfextu,               NULL }, /* bfextu */
+  { CODE_FOR_insn_bfins,                NULL }, /* bfins */
+  { CODE_FOR_clzdi2,                    NULL }, /* clz */
+  { CODE_FOR_insn_cmoveqz,              NULL }, /* cmoveqz */
+  { CODE_FOR_insn_cmovnez,              NULL }, /* cmovnez */
+  { CODE_FOR_insn_cmpeq_didi,           NULL }, /* cmpeq */
+  { CODE_FOR_insn_cmpexch,              NULL }, /* cmpexch */
+  { CODE_FOR_insn_cmpexch4,             NULL }, /* cmpexch4 */
+  { CODE_FOR_insn_cmples_didi,          NULL }, /* cmples */
+  { CODE_FOR_insn_cmpleu_didi,          NULL }, /* cmpleu */
+  { CODE_FOR_insn_cmplts_didi,          NULL }, /* cmplts */
+  { CODE_FOR_insn_cmpltu_didi,          NULL }, /* cmpltu */
+  { CODE_FOR_insn_cmpne_didi,           NULL }, /* cmpne */
+  { CODE_FOR_insn_cmul,                 NULL }, /* cmul */
+  { CODE_FOR_insn_cmula,                NULL }, /* cmula */
+  { CODE_FOR_insn_cmulaf,               NULL }, /* cmulaf */
+  { CODE_FOR_insn_cmulf,                NULL }, /* cmulf */
+  { CODE_FOR_insn_cmulfr,               NULL }, /* cmulfr */
+  { CODE_FOR_insn_cmulh,                NULL }, /* cmulh */
+  { CODE_FOR_insn_cmulhr,               NULL }, /* cmulhr */
+  { CODE_FOR_insn_crc32_32,             NULL }, /* crc32_32 */
+  { CODE_FOR_insn_crc32_8,              NULL }, /* crc32_8 */
+  { CODE_FOR_ctzdi2,                    NULL }, /* ctz */
+  { CODE_FOR_insn_dblalign,             NULL }, /* dblalign */
+  { CODE_FOR_insn_dblalign2,            NULL }, /* dblalign2 */
+  { CODE_FOR_insn_dblalign4,            NULL }, /* dblalign4 */
+  { CODE_FOR_insn_dblalign6,            NULL }, /* dblalign6 */
+  { CODE_FOR_insn_drain,                NULL }, /* drain */
+  { CODE_FOR_insn_dtlbpr,               NULL }, /* dtlbpr */
+  { CODE_FOR_insn_exch,                 NULL }, /* exch */
+  { CODE_FOR_insn_exch4,                NULL }, /* exch4 */
+  { CODE_FOR_insn_fdouble_add_flags,    NULL }, /* fdouble_add_flags */
+  { CODE_FOR_insn_fdouble_addsub,       NULL }, /* fdouble_addsub */
+  { CODE_FOR_insn_fdouble_mul_flags,    NULL }, /* fdouble_mul_flags */
+  { CODE_FOR_insn_fdouble_pack1,        NULL }, /* fdouble_pack1 */
+  { CODE_FOR_insn_fdouble_pack2,        NULL }, /* fdouble_pack2 */
+  { CODE_FOR_insn_fdouble_sub_flags,    NULL }, /* fdouble_sub_flags */
+  { CODE_FOR_insn_fdouble_unpack_max,   NULL }, /* fdouble_unpack_max */
+  { CODE_FOR_insn_fdouble_unpack_min,   NULL }, /* fdouble_unpack_min */
+  { CODE_FOR_insn_fetchadd,             NULL }, /* fetchadd */
+  { CODE_FOR_insn_fetchadd4,            NULL }, /* fetchadd4 */
+  { CODE_FOR_insn_fetchaddgez,          NULL }, /* fetchaddgez */
+  { CODE_FOR_insn_fetchaddgez4,         NULL }, /* fetchaddgez4 */
+  { CODE_FOR_insn_fetchand,             NULL }, /* fetchand */
+  { CODE_FOR_insn_fetchand4,            NULL }, /* fetchand4 */
+  { CODE_FOR_insn_fetchor,              NULL }, /* fetchor */
+  { CODE_FOR_insn_fetchor4,             NULL }, /* fetchor4 */
+  { CODE_FOR_insn_finv,                 NULL }, /* finv */
+  { CODE_FOR_insn_flush,                NULL }, /* flush */
+  { CODE_FOR_insn_flushwb,              NULL }, /* flushwb */
+  { CODE_FOR_insn_fnop,                 NULL }, /* fnop */
+  { CODE_FOR_insn_fsingle_add1,         NULL }, /* fsingle_add1 */
+  { CODE_FOR_insn_fsingle_addsub2,      NULL }, /* fsingle_addsub2 */
+  { CODE_FOR_insn_fsingle_mul1,         NULL }, /* fsingle_mul1 */
+  { CODE_FOR_insn_fsingle_mul2,         NULL }, /* fsingle_mul2 */
+  { CODE_FOR_insn_fsingle_pack1,        NULL }, /* fsingle_pack1 */
+  { CODE_FOR_insn_fsingle_pack2,        NULL }, /* fsingle_pack2 */
+  { CODE_FOR_insn_fsingle_sub1,         NULL }, /* fsingle_sub1 */
+  { CODE_FOR_insn_icoh,                 NULL }, /* icoh */
+  { CODE_FOR_insn_ill,                  NULL }, /* ill */
+  { CODE_FOR_insn_info,                 NULL }, /* info */
+  { CODE_FOR_insn_infol,                NULL }, /* infol */
+  { CODE_FOR_insn_inv,                  NULL }, /* inv */
+  { CODE_FOR_insn_ld,                   NULL }, /* ld */
+  { CODE_FOR_insn_ld1s,                 NULL }, /* ld1s */
+  { CODE_FOR_insn_ld1u,                 NULL }, /* ld1u */
+  { CODE_FOR_insn_ld2s,                 NULL }, /* ld2s */
+  { CODE_FOR_insn_ld2u,                 NULL }, /* ld2u */
+  { CODE_FOR_insn_ld4s,                 NULL }, /* ld4s */
+  { CODE_FOR_insn_ld4u,                 NULL }, /* ld4u */
+  { CODE_FOR_insn_ldna,                 NULL }, /* ldna */
+  { CODE_FOR_insn_ldnt,                 NULL }, /* ldnt */
+  { CODE_FOR_insn_ldnt1s,               NULL }, /* ldnt1s */
+  { CODE_FOR_insn_ldnt1u,               NULL }, /* ldnt1u */
+  { CODE_FOR_insn_ldnt2s,               NULL }, /* ldnt2s */
+  { CODE_FOR_insn_ldnt2u,               NULL }, /* ldnt2u */
+  { CODE_FOR_insn_ldnt4s,               NULL }, /* ldnt4s */
+  { CODE_FOR_insn_ldnt4u,               NULL }, /* ldnt4u */
+  { CODE_FOR_insn_ld_L2,                NULL }, /* ld_L2 */
+  { CODE_FOR_insn_ld1s_L2,              NULL }, /* ld1s_L2 */
+  { CODE_FOR_insn_ld1u_L2,              NULL }, /* ld1u_L2 */
+  { CODE_FOR_insn_ld2s_L2,              NULL }, /* ld2s_L2 */
+  { CODE_FOR_insn_ld2u_L2,              NULL }, /* ld2u_L2 */
+  { CODE_FOR_insn_ld4s_L2,              NULL }, /* ld4s_L2 */
+  { CODE_FOR_insn_ld4u_L2,              NULL }, /* ld4u_L2 */
+  { CODE_FOR_insn_ldna_L2,              NULL }, /* ldna_L2 */
+  { CODE_FOR_insn_ldnt_L2,              NULL }, /* ldnt_L2 */
+  { CODE_FOR_insn_ldnt1s_L2,            NULL }, /* ldnt1s_L2 */
+  { CODE_FOR_insn_ldnt1u_L2,            NULL }, /* ldnt1u_L2 */
+  { CODE_FOR_insn_ldnt2s_L2,            NULL }, /* ldnt2s_L2 */
+  { CODE_FOR_insn_ldnt2u_L2,            NULL }, /* ldnt2u_L2 */
+  { CODE_FOR_insn_ldnt4s_L2,            NULL }, /* ldnt4s_L2 */
+  { CODE_FOR_insn_ldnt4u_L2,            NULL }, /* ldnt4u_L2 */
+  { CODE_FOR_insn_ld_miss,              NULL }, /* ld_miss */
+  { CODE_FOR_insn_ld1s_miss,            NULL }, /* ld1s_miss */
+  { CODE_FOR_insn_ld1u_miss,            NULL }, /* ld1u_miss */
+  { CODE_FOR_insn_ld2s_miss,            NULL }, /* ld2s_miss */
+  { CODE_FOR_insn_ld2u_miss,            NULL }, /* ld2u_miss */
+  { CODE_FOR_insn_ld4s_miss,            NULL }, /* ld4s_miss */
+  { CODE_FOR_insn_ld4u_miss,            NULL }, /* ld4u_miss */
+  { CODE_FOR_insn_ldna_miss,            NULL }, /* ldna_miss */
+  { CODE_FOR_insn_ldnt_miss,            NULL }, /* ldnt_miss */
+  { CODE_FOR_insn_ldnt1s_miss,          NULL }, /* ldnt1s_miss */
+  { CODE_FOR_insn_ldnt1u_miss,          NULL }, /* ldnt1u_miss */
+  { CODE_FOR_insn_ldnt2s_miss,          NULL }, /* ldnt2s_miss */
+  { CODE_FOR_insn_ldnt2u_miss,          NULL }, /* ldnt2u_miss */
+  { CODE_FOR_insn_ldnt4s_miss,          NULL }, /* ldnt4s_miss */
+  { CODE_FOR_insn_ldnt4u_miss,          NULL }, /* ldnt4u_miss */
+  { CODE_FOR_insn_lnk,                  NULL }, /* lnk */
+  { CODE_FOR_memory_barrier,            NULL }, /* mf */
+  { CODE_FOR_insn_mfspr,                NULL }, /* mfspr */
+  { CODE_FOR_insn_mm,                   NULL }, /* mm */
+  { CODE_FOR_insn_mnz,                  NULL }, /* mnz */
+  { CODE_FOR_movdi,                     NULL }, /* move */
+  { CODE_FOR_insn_mtspr,                NULL }, /* mtspr */
+  { CODE_FOR_insn_mul_hs_hs,            NULL }, /* mul_hs_hs */
+  { CODE_FOR_insn_mul_hs_hu,            NULL }, /* mul_hs_hu */
+  { CODE_FOR_insn_mul_hs_ls,            NULL }, /* mul_hs_ls */
+  { CODE_FOR_insn_mul_hs_lu,            NULL }, /* mul_hs_lu */
+  { CODE_FOR_insn_mul_hu_hu,            NULL }, /* mul_hu_hu */
+  { CODE_FOR_insn_mul_hu_ls,            NULL }, /* mul_hu_ls */
+  { CODE_FOR_insn_mul_hu_lu,            NULL }, /* mul_hu_lu */
+  { CODE_FOR_insn_mul_ls_ls,            NULL }, /* mul_ls_ls */
+  { CODE_FOR_insn_mul_ls_lu,            NULL }, /* mul_ls_lu */
+  { CODE_FOR_insn_mul_lu_lu,            NULL }, /* mul_lu_lu */
+  { CODE_FOR_insn_mula_hs_hs,           NULL }, /* mula_hs_hs */
+  { CODE_FOR_insn_mula_hs_hu,           NULL }, /* mula_hs_hu */
+  { CODE_FOR_insn_mula_hs_ls,           NULL }, /* mula_hs_ls */
+  { CODE_FOR_insn_mula_hs_lu,           NULL }, /* mula_hs_lu */
+  { CODE_FOR_insn_mula_hu_hu,           NULL }, /* mula_hu_hu */
+  { CODE_FOR_insn_mula_hu_ls,           NULL }, /* mula_hu_ls */
+  { CODE_FOR_insn_mula_hu_lu,           NULL }, /* mula_hu_lu */
+  { CODE_FOR_insn_mula_ls_ls,           NULL }, /* mula_ls_ls */
+  { CODE_FOR_insn_mula_ls_lu,           NULL }, /* mula_ls_lu */
+  { CODE_FOR_insn_mula_lu_lu,           NULL }, /* mula_lu_lu */
+  { CODE_FOR_insn_mulax,                NULL }, /* mulax */
+  { CODE_FOR_mulsi3,                    NULL }, /* mulx */
+  { CODE_FOR_insn_mz,                   NULL }, /* mz */
+  { CODE_FOR_insn_nap,                  NULL }, /* nap */
+  { CODE_FOR_nop,                       NULL }, /* nop */
+  { CODE_FOR_insn_nor_di,               NULL }, /* nor */
+  { CODE_FOR_iordi3,                    NULL }, /* or */
+  { CODE_FOR_popcountdi2,               NULL }, /* pcnt */
+  { CODE_FOR_insn_prefetch_l1,          NULL }, /* prefetch_l1 */
+  { CODE_FOR_insn_prefetch_l1_fault,    NULL }, /* prefetch_l1_fault */
+  { CODE_FOR_insn_prefetch_l2,          NULL }, /* prefetch_l2 */
+  { CODE_FOR_insn_prefetch_l2_fault,    NULL }, /* prefetch_l2_fault */
+  { CODE_FOR_insn_prefetch_l3,          NULL }, /* prefetch_l3 */
+  { CODE_FOR_insn_prefetch_l3_fault,    NULL }, /* prefetch_l3_fault */
+  { CODE_FOR_insn_revbits,              NULL }, /* revbits */
+  { CODE_FOR_bswapdi2,                  NULL }, /* revbytes */
+  { CODE_FOR_rotldi3,                   NULL }, /* rotl */
+  { CODE_FOR_ashldi3,                   NULL }, /* shl */
+  { CODE_FOR_insn_shl16insli,           NULL }, /* shl16insli */
+  { CODE_FOR_insn_shl1add,              NULL }, /* shl1add */
+  { CODE_FOR_insn_shl1addx,             NULL }, /* shl1addx */
+  { CODE_FOR_insn_shl2add,              NULL }, /* shl2add */
+  { CODE_FOR_insn_shl2addx,             NULL }, /* shl2addx */
+  { CODE_FOR_insn_shl3add,              NULL }, /* shl3add */
+  { CODE_FOR_insn_shl3addx,             NULL }, /* shl3addx */
+  { CODE_FOR_ashlsi3,                   NULL }, /* shlx */
+  { CODE_FOR_ashrdi3,                   NULL }, /* shrs */
+  { CODE_FOR_lshrdi3,                   NULL }, /* shru */
+  { CODE_FOR_lshrsi3,                   NULL }, /* shrux */
+  { CODE_FOR_insn_shufflebytes,         NULL }, /* shufflebytes */
+  { CODE_FOR_insn_shufflebytes1,        NULL }, /* shufflebytes1 */
+  { CODE_FOR_insn_st,                   NULL }, /* st */
+  { CODE_FOR_insn_st1,                  NULL }, /* st1 */
+  { CODE_FOR_insn_st2,                  NULL }, /* st2 */
+  { CODE_FOR_insn_st4,                  NULL }, /* st4 */
+  { CODE_FOR_insn_stnt,                 NULL }, /* stnt */
+  { CODE_FOR_insn_stnt1,                NULL }, /* stnt1 */
+  { CODE_FOR_insn_stnt2,                NULL }, /* stnt2 */
+  { CODE_FOR_insn_stnt4,                NULL }, /* stnt4 */
+  { CODE_FOR_subdi3,                    NULL }, /* sub */
+  { CODE_FOR_subsi3,                    NULL }, /* subx */
+  { CODE_FOR_sssubsi3,                  NULL }, /* subxsc */
+  { CODE_FOR_insn_tblidxb0,             NULL }, /* tblidxb0 */
+  { CODE_FOR_insn_tblidxb1,             NULL }, /* tblidxb1 */
+  { CODE_FOR_insn_tblidxb2,             NULL }, /* tblidxb2 */
+  { CODE_FOR_insn_tblidxb3,             NULL }, /* tblidxb3 */
+  { CODE_FOR_insn_v1add,                NULL }, /* v1add */
+  { CODE_FOR_insn_v1addi,               NULL }, /* v1addi */
+  { CODE_FOR_insn_v1adduc,              NULL }, /* v1adduc */
+  { CODE_FOR_insn_v1adiffu,             NULL }, /* v1adiffu */
+  { CODE_FOR_insn_v1avgu,               NULL }, /* v1avgu */
+  { CODE_FOR_insn_v1cmpeq,              NULL }, /* v1cmpeq */
+  { CODE_FOR_insn_v1cmpeqi,             NULL }, /* v1cmpeqi */
+  { CODE_FOR_insn_v1cmples,             NULL }, /* v1cmples */
+  { CODE_FOR_insn_v1cmpleu,             NULL }, /* v1cmpleu */
+  { CODE_FOR_insn_v1cmplts,             NULL }, /* v1cmplts */
+  { CODE_FOR_insn_v1cmpltsi,            NULL }, /* v1cmpltsi */
+  { CODE_FOR_insn_v1cmpltu,             NULL }, /* v1cmpltu */
+  { CODE_FOR_insn_v1cmpltui,            NULL }, /* v1cmpltui */
+  { CODE_FOR_insn_v1cmpne,              NULL }, /* v1cmpne */
+  { CODE_FOR_insn_v1ddotpu,             NULL }, /* v1ddotpu */
+  { CODE_FOR_insn_v1ddotpua,            NULL }, /* v1ddotpua */
+  { CODE_FOR_insn_v1ddotpus,            NULL }, /* v1ddotpus */
+  { CODE_FOR_insn_v1ddotpusa,           NULL }, /* v1ddotpusa */
+  { CODE_FOR_insn_v1dotp,               NULL }, /* v1dotp */
+  { CODE_FOR_insn_v1dotpa,              NULL }, /* v1dotpa */
+  { CODE_FOR_insn_v1dotpu,              NULL }, /* v1dotpu */
+  { CODE_FOR_insn_v1dotpua,             NULL }, /* v1dotpua */
+  { CODE_FOR_insn_v1dotpus,             NULL }, /* v1dotpus */
+  { CODE_FOR_insn_v1dotpusa,            NULL }, /* v1dotpusa */
+  { CODE_FOR_insn_v1int_h,              NULL }, /* v1int_h */
+  { CODE_FOR_insn_v1int_l,              NULL }, /* v1int_l */
+  { CODE_FOR_insn_v1maxu,               NULL }, /* v1maxu */
+  { CODE_FOR_insn_v1maxui,              NULL }, /* v1maxui */
+  { CODE_FOR_insn_v1minu,               NULL }, /* v1minu */
+  { CODE_FOR_insn_v1minui,              NULL }, /* v1minui */
+  { CODE_FOR_insn_v1mnz,                NULL }, /* v1mnz */
+  { CODE_FOR_insn_v1multu,              NULL }, /* v1multu */
+  { CODE_FOR_insn_v1mulu,               NULL }, /* v1mulu */
+  { CODE_FOR_insn_v1mulus,              NULL }, /* v1mulus */
+  { CODE_FOR_insn_v1mz,                 NULL }, /* v1mz */
+  { CODE_FOR_insn_v1sadau,              NULL }, /* v1sadau */
+  { CODE_FOR_insn_v1sadu,               NULL }, /* v1sadu */
+  { CODE_FOR_insn_v1shl,                NULL }, /* v1shl */
+  { CODE_FOR_insn_v1shl,                NULL }, /* v1shli */
+  { CODE_FOR_insn_v1shrs,               NULL }, /* v1shrs */
+  { CODE_FOR_insn_v1shrs,               NULL }, /* v1shrsi */
+  { CODE_FOR_insn_v1shru,               NULL }, /* v1shru */
+  { CODE_FOR_insn_v1shru,               NULL }, /* v1shrui */
+  { CODE_FOR_insn_v1sub,                NULL }, /* v1sub */
+  { CODE_FOR_insn_v1subuc,              NULL }, /* v1subuc */
+  { CODE_FOR_insn_v2add,                NULL }, /* v2add */
+  { CODE_FOR_insn_v2addi,               NULL }, /* v2addi */
+  { CODE_FOR_insn_v2addsc,              NULL }, /* v2addsc */
+  { CODE_FOR_insn_v2adiffs,             NULL }, /* v2adiffs */
+  { CODE_FOR_insn_v2avgs,               NULL }, /* v2avgs */
+  { CODE_FOR_insn_v2cmpeq,              NULL }, /* v2cmpeq */
+  { CODE_FOR_insn_v2cmpeqi,             NULL }, /* v2cmpeqi */
+  { CODE_FOR_insn_v2cmples,             NULL }, /* v2cmples */
+  { CODE_FOR_insn_v2cmpleu,             NULL }, /* v2cmpleu */
+  { CODE_FOR_insn_v2cmplts,             NULL }, /* v2cmplts */
+  { CODE_FOR_insn_v2cmpltsi,            NULL }, /* v2cmpltsi */
+  { CODE_FOR_insn_v2cmpltu,             NULL }, /* v2cmpltu */
+  { CODE_FOR_insn_v2cmpltui,            NULL }, /* v2cmpltui */
+  { CODE_FOR_insn_v2cmpne,              NULL }, /* v2cmpne */
+  { CODE_FOR_insn_v2dotp,               NULL }, /* v2dotp */
+  { CODE_FOR_insn_v2dotpa,              NULL }, /* v2dotpa */
+  { CODE_FOR_insn_v2int_h,              NULL }, /* v2int_h */
+  { CODE_FOR_insn_v2int_l,              NULL }, /* v2int_l */
+  { CODE_FOR_insn_v2maxs,               NULL }, /* v2maxs */
+  { CODE_FOR_insn_v2maxsi,              NULL }, /* v2maxsi */
+  { CODE_FOR_insn_v2mins,               NULL }, /* v2mins */
+  { CODE_FOR_insn_v2minsi,              NULL }, /* v2minsi */
+  { CODE_FOR_insn_v2mnz,                NULL }, /* v2mnz */
+  { CODE_FOR_insn_v2mulfsc,             NULL }, /* v2mulfsc */
+  { CODE_FOR_insn_v2muls,               NULL }, /* v2muls */
+  { CODE_FOR_insn_v2mults,              NULL }, /* v2mults */
+  { CODE_FOR_insn_v2mz,                 NULL }, /* v2mz */
+  { CODE_FOR_insn_v2packh,              NULL }, /* v2packh */
+  { CODE_FOR_insn_v2packl,              NULL }, /* v2packl */
+  { CODE_FOR_insn_v2packuc,             NULL }, /* v2packuc */
+  { CODE_FOR_insn_v2sadas,              NULL }, /* v2sadas */
+  { CODE_FOR_insn_v2sadau,              NULL }, /* v2sadau */
+  { CODE_FOR_insn_v2sads,               NULL }, /* v2sads */
+  { CODE_FOR_insn_v2sadu,               NULL }, /* v2sadu */
+  { CODE_FOR_insn_v2shl,                NULL }, /* v2shl */
+  { CODE_FOR_insn_v2shl,                NULL }, /* v2shli */
+  { CODE_FOR_insn_v2shlsc,              NULL }, /* v2shlsc */
+  { CODE_FOR_insn_v2shrs,               NULL }, /* v2shrs */
+  { CODE_FOR_insn_v2shrs,               NULL }, /* v2shrsi */
+  { CODE_FOR_insn_v2shru,               NULL }, /* v2shru */
+  { CODE_FOR_insn_v2shru,               NULL }, /* v2shrui */
+  { CODE_FOR_insn_v2sub,                NULL }, /* v2sub */
+  { CODE_FOR_insn_v2subsc,              NULL }, /* v2subsc */
+  { CODE_FOR_insn_v4add,                NULL }, /* v4add */
+  { CODE_FOR_insn_v4addsc,              NULL }, /* v4addsc */
+  { CODE_FOR_insn_v4int_h,              NULL }, /* v4int_h */
+  { CODE_FOR_insn_v4int_l,              NULL }, /* v4int_l */
+  { CODE_FOR_insn_v4packsc,             NULL }, /* v4packsc */
+  { CODE_FOR_insn_v4shl,                NULL }, /* v4shl */
+  { CODE_FOR_insn_v4shlsc,              NULL }, /* v4shlsc */
+  { CODE_FOR_insn_v4shrs,               NULL }, /* v4shrs */
+  { CODE_FOR_insn_v4shru,               NULL }, /* v4shru */
+  { CODE_FOR_insn_v4sub,                NULL }, /* v4sub */
+  { CODE_FOR_insn_v4subsc,              NULL }, /* v4subsc */
+  { CODE_FOR_insn_wh64,                 NULL }, /* wh64 */
+  { CODE_FOR_xordi3,                    NULL }, /* xor */
+  { CODE_FOR_tilegx_network_barrier,    NULL }, /* network_barrier */
+  { CODE_FOR_tilegx_idn0_receive,       NULL }, /* idn0_receive */
+  { CODE_FOR_tilegx_idn1_receive,       NULL }, /* idn1_receive */
+  { CODE_FOR_tilegx_idn_send,           NULL }, /* idn_send */
+  { CODE_FOR_tilegx_udn0_receive,       NULL }, /* udn0_receive */
+  { CODE_FOR_tilegx_udn1_receive,       NULL }, /* udn1_receive */
+  { CODE_FOR_tilegx_udn2_receive,       NULL }, /* udn2_receive */
+  { CODE_FOR_tilegx_udn3_receive,       NULL }, /* udn3_receive */
+  { CODE_FOR_tilegx_udn_send,           NULL }, /* udn_send */
+};
+
+
+struct tilegx_builtin_def
+{
+  const char *name;
+  enum tilegx_builtin code;
+  bool is_const;
+  /* The first character is the return type.  Subsequent characters
+     are the argument types. See char_to_type.  */
+  const char *type;
+};
+
+
+static const struct tilegx_builtin_def tilegx_builtins[] = {
+  { "__insn_add",                TILEGX_INSN_ADD,                true,  "lll"  },
+  { "__insn_addi",               TILEGX_INSN_ADD,                true,  "lll"  },
+  { "__insn_addli",              TILEGX_INSN_ADD,                true,  "lll"  },
+  { "__insn_addx",               TILEGX_INSN_ADDX,               true,  "iii"  },
+  { "__insn_addxi",              TILEGX_INSN_ADDX,               true,  "iii"  },
+  { "__insn_addxli",             TILEGX_INSN_ADDX,               true,  "iii"  },
+  { "__insn_addxsc",             TILEGX_INSN_ADDXSC,             true,  "iii"  },
+  { "__insn_and",                TILEGX_INSN_AND,                true,  "lll"  },
+  { "__insn_andi",               TILEGX_INSN_AND,                true,  "lll"  },
+  { "__insn_bfexts",             TILEGX_INSN_BFEXTS,             true,  "llll" },
+  { "__insn_bfextu",             TILEGX_INSN_BFEXTU,             true,  "llll" },
+  { "__insn_bfins",              TILEGX_INSN_BFINS,              true,  "lllll"},
+  { "__insn_clz",                TILEGX_INSN_CLZ,                true,  "ll"   },
+  { "__insn_cmoveqz",            TILEGX_INSN_CMOVEQZ,            true,  "llll" },
+  { "__insn_cmovnez",            TILEGX_INSN_CMOVNEZ,            true,  "llll" },
+  { "__insn_cmpeq",              TILEGX_INSN_CMPEQ,              true,  "lll"  },
+  { "__insn_cmpeqi",             TILEGX_INSN_CMPEQ,              true,  "lll"  },
+  { "__insn_cmpexch",            TILEGX_INSN_CMPEXCH,            false, "lpl"  },
+  { "__insn_cmpexch4",           TILEGX_INSN_CMPEXCH4,           false, "ipi"  },
+  { "__insn_cmples",             TILEGX_INSN_CMPLES,             true,  "lll"  },
+  { "__insn_cmpleu",             TILEGX_INSN_CMPLEU,             true,  "lll"  },
+  { "__insn_cmplts",             TILEGX_INSN_CMPLTS,             true,  "lll"  },
+  { "__insn_cmpltsi",            TILEGX_INSN_CMPLTS,             true,  "lll"  },
+  { "__insn_cmpltu",             TILEGX_INSN_CMPLTU,             true,  "lll"  },
+  { "__insn_cmpltui",            TILEGX_INSN_CMPLTU,             true,  "lll"  },
+  { "__insn_cmpne",              TILEGX_INSN_CMPNE,              true,  "lll"  },
+  { "__insn_cmul",               TILEGX_INSN_CMUL,               true,  "lll"  },
+  { "__insn_cmula",              TILEGX_INSN_CMULA,              true,  "llll" },
+  { "__insn_cmulaf",             TILEGX_INSN_CMULAF,             true,  "llll" },
+  { "__insn_cmulf",              TILEGX_INSN_CMULF,              true,  "lll"  },
+  { "__insn_cmulfr",             TILEGX_INSN_CMULFR,             true,  "lll"  },
+  { "__insn_cmulh",              TILEGX_INSN_CMULH,              true,  "lll"  },
+  { "__insn_cmulhr",             TILEGX_INSN_CMULHR,             true,  "lll"  },
+  { "__insn_crc32_32",           TILEGX_INSN_CRC32_32,           true,  "lll"  },
+  { "__insn_crc32_8",            TILEGX_INSN_CRC32_8,            true,  "lll"  },
+  { "__insn_ctz",                TILEGX_INSN_CTZ,                true,  "ll"   },
+  { "__insn_dblalign",           TILEGX_INSN_DBLALIGN,           true,  "lllk" },
+  { "__insn_dblalign2",          TILEGX_INSN_DBLALIGN2,          true,  "lll"  },
+  { "__insn_dblalign4",          TILEGX_INSN_DBLALIGN4,          true,  "lll"  },
+  { "__insn_dblalign6",          TILEGX_INSN_DBLALIGN6,          true,  "lll"  },
+  { "__insn_drain",              TILEGX_INSN_DRAIN,              false, "v"    },
+  { "__insn_dtlbpr",             TILEGX_INSN_DTLBPR,             false, "vl"   },
+  { "__insn_exch",               TILEGX_INSN_EXCH,               false, "lpl"  },
+  { "__insn_exch4",              TILEGX_INSN_EXCH4,              false, "ipi"  },
+  { "__insn_fdouble_add_flags",  TILEGX_INSN_FDOUBLE_ADD_FLAGS,  true,  "lll"  },
+  { "__insn_fdouble_addsub",     TILEGX_INSN_FDOUBLE_ADDSUB,     true,  "llll" },
+  { "__insn_fdouble_mul_flags",  TILEGX_INSN_FDOUBLE_MUL_FLAGS,  true,  "lll"  },
+  { "__insn_fdouble_pack1",      TILEGX_INSN_FDOUBLE_PACK1,      true,  "lll"  },
+  { "__insn_fdouble_pack2",      TILEGX_INSN_FDOUBLE_PACK2,      true,  "llll" },
+  { "__insn_fdouble_sub_flags",  TILEGX_INSN_FDOUBLE_SUB_FLAGS,  true,  "lll"  },
+  { "__insn_fdouble_unpack_max", TILEGX_INSN_FDOUBLE_UNPACK_MAX, true,  "lll"  },
+  { "__insn_fdouble_unpack_min", TILEGX_INSN_FDOUBLE_UNPACK_MIN, true,  "lll"  },
+  { "__insn_fetchadd",           TILEGX_INSN_FETCHADD,           false, "lpl"  },
+  { "__insn_fetchadd4",          TILEGX_INSN_FETCHADD4,          false, "ipi"  },
+  { "__insn_fetchaddgez",        TILEGX_INSN_FETCHADDGEZ,        false, "lpl"  },
+  { "__insn_fetchaddgez4",       TILEGX_INSN_FETCHADDGEZ4,       false, "ipi"  },
+  { "__insn_fetchand",           TILEGX_INSN_FETCHAND,           false, "lpl"  },
+  { "__insn_fetchand4",          TILEGX_INSN_FETCHAND4,          false, "ipi"  },
+  { "__insn_fetchor",            TILEGX_INSN_FETCHOR,            false, "lpl"  },
+  { "__insn_fetchor4",           TILEGX_INSN_FETCHOR4,           false, "ipi"  },
+  { "__insn_finv",               TILEGX_INSN_FINV,               false, "vk"   },
+  { "__insn_flush",              TILEGX_INSN_FLUSH,              false, "vk"   },
+  { "__insn_flushwb",            TILEGX_INSN_FLUSHWB,            false, "v"    },
+  { "__insn_fnop",               TILEGX_INSN_FNOP,               false, "v"    },
+  { "__insn_fsingle_add1",       TILEGX_INSN_FSINGLE_ADD1,       true,  "lll"  },
+  { "__insn_fsingle_addsub2",    TILEGX_INSN_FSINGLE_ADDSUB2,    true,  "llll" },
+  { "__insn_fsingle_mul1",       TILEGX_INSN_FSINGLE_MUL1,       true,  "lll"  },
+  { "__insn_fsingle_mul2",       TILEGX_INSN_FSINGLE_MUL2,       true,  "lll"  },
+  { "__insn_fsingle_pack1",      TILEGX_INSN_FSINGLE_PACK1,      true,  "ll"   },
+  { "__insn_fsingle_pack2",      TILEGX_INSN_FSINGLE_PACK2,      true,  "lll"  },
+  { "__insn_fsingle_sub1",       TILEGX_INSN_FSINGLE_SUB1,       true,  "lll"  },
+  { "__insn_icoh",               TILEGX_INSN_ICOH,               false, "vk"   },
+  { "__insn_ill",                TILEGX_INSN_ILL,                false, "v"    },
+  { "__insn_info",               TILEGX_INSN_INFO,               false, "vl"   },
+  { "__insn_infol",              TILEGX_INSN_INFOL,              false, "vl"   },
+  { "__insn_inv",                TILEGX_INSN_INV,                false, "vp"   },
+  { "__insn_ld",                 TILEGX_INSN_LD,                 false, "lk"   },
+  { "__insn_ld1s",               TILEGX_INSN_LD1S,               false, "lk"   },
+  { "__insn_ld1u",               TILEGX_INSN_LD1U,               false, "lk"   },
+  { "__insn_ld2s",               TILEGX_INSN_LD2S,               false, "lk"   },
+  { "__insn_ld2u",               TILEGX_INSN_LD2U,               false, "lk"   },
+  { "__insn_ld4s",               TILEGX_INSN_LD4S,               false, "lk"   },
+  { "__insn_ld4u",               TILEGX_INSN_LD4U,               false, "lk"   },
+  { "__insn_ldna",               TILEGX_INSN_LDNA,               false, "lk"   },
+  { "__insn_ldnt",               TILEGX_INSN_LDNT,               false, "lk"   },
+  { "__insn_ldnt1s",             TILEGX_INSN_LDNT1S,             false, "lk"   },
+  { "__insn_ldnt1u",             TILEGX_INSN_LDNT1U,             false, "lk"   },
+  { "__insn_ldnt2s",             TILEGX_INSN_LDNT2S,             false, "lk"   },
+  { "__insn_ldnt2u",             TILEGX_INSN_LDNT2U,             false, "lk"   },
+  { "__insn_ldnt4s",             TILEGX_INSN_LDNT4S,             false, "lk"   },
+  { "__insn_ldnt4u",             TILEGX_INSN_LDNT4U,             false, "lk"   },
+  { "__insn_ld_L2",              TILEGX_INSN_LD_L2,              false, "lk"   },
+  { "__insn_ld1s_L2",            TILEGX_INSN_LD1S_L2,            false, "lk"   },
+  { "__insn_ld1u_L2",            TILEGX_INSN_LD1U_L2,            false, "lk"   },
+  { "__insn_ld2s_L2",            TILEGX_INSN_LD2S_L2,            false, "lk"   },
+  { "__insn_ld2u_L2",            TILEGX_INSN_LD2U_L2,            false, "lk"   },
+  { "__insn_ld4s_L2",            TILEGX_INSN_LD4S_L2,            false, "lk"   },
+  { "__insn_ld4u_L2",            TILEGX_INSN_LD4U_L2,            false, "lk"   },
+  { "__insn_ldna_L2",            TILEGX_INSN_LDNA_L2,            false, "lk"   },
+  { "__insn_ldnt_L2",            TILEGX_INSN_LDNT_L2,            false, "lk"   },
+  { "__insn_ldnt1s_L2",          TILEGX_INSN_LDNT1S_L2,          false, "lk"   },
+  { "__insn_ldnt1u_L2",          TILEGX_INSN_LDNT1U_L2,          false, "lk"   },
+  { "__insn_ldnt2s_L2",          TILEGX_INSN_LDNT2S_L2,          false, "lk"   },
+  { "__insn_ldnt2u_L2",          TILEGX_INSN_LDNT2U_L2,          false, "lk"   },
+  { "__insn_ldnt4s_L2",          TILEGX_INSN_LDNT4S_L2,          false, "lk"   },
+  { "__insn_ldnt4u_L2",          TILEGX_INSN_LDNT4U_L2,          false, "lk"   },
+  { "__insn_ld_miss",            TILEGX_INSN_LD_MISS,            false, "lk"   },
+  { "__insn_ld1s_miss",          TILEGX_INSN_LD1S_MISS,          false, "lk"   },
+  { "__insn_ld1u_miss",          TILEGX_INSN_LD1U_MISS,          false, "lk"   },
+  { "__insn_ld2s_miss",          TILEGX_INSN_LD2S_MISS,          false, "lk"   },
+  { "__insn_ld2u_miss",          TILEGX_INSN_LD2U_MISS,          false, "lk"   },
+  { "__insn_ld4s_miss",          TILEGX_INSN_LD4S_MISS,          false, "lk"   },
+  { "__insn_ld4u_miss",          TILEGX_INSN_LD4U_MISS,          false, "lk"   },
+  { "__insn_ldna_miss",          TILEGX_INSN_LDNA_MISS,          false, "lk"   },
+  { "__insn_ldnt_miss",          TILEGX_INSN_LDNT_MISS,          false, "lk"   },
+  { "__insn_ldnt1s_miss",        TILEGX_INSN_LDNT1S_MISS,        false, "lk"   },
+  { "__insn_ldnt1u_miss",        TILEGX_INSN_LDNT1U_MISS,        false, "lk"   },
+  { "__insn_ldnt2s_miss",        TILEGX_INSN_LDNT2S_MISS,        false, "lk"   },
+  { "__insn_ldnt2u_miss",        TILEGX_INSN_LDNT2U_MISS,        false, "lk"   },
+  { "__insn_ldnt4s_miss",        TILEGX_INSN_LDNT4S_MISS,        false, "lk"   },
+  { "__insn_ldnt4u_miss",        TILEGX_INSN_LDNT4U_MISS,        false, "lk"   },
+  { "__insn_lnk",                TILEGX_INSN_LNK,                true,  "l"    },
+  { "__insn_mf",                 TILEGX_INSN_MF,                 false, "v"    },
+  { "__insn_mfspr",              TILEGX_INSN_MFSPR,              false, "ll"   },
+  { "__insn_mm",                 TILEGX_INSN_MM,                 true,  "lllll"},
+  { "__insn_mnz",                TILEGX_INSN_MNZ,                true,  "lll"  },
+  { "__insn_move",               TILEGX_INSN_MOVE,               true,  "ll"   },
+  { "__insn_movei",              TILEGX_INSN_MOVE,               true,  "ll"   },
+  { "__insn_moveli",             TILEGX_INSN_MOVE,               true,  "ll"   },
+  { "__insn_mtspr",              TILEGX_INSN_MTSPR,              false, "vll"  },
+  { "__insn_mul_hs_hs",          TILEGX_INSN_MUL_HS_HS,          true,  "lll"  },
+  { "__insn_mul_hs_hu",          TILEGX_INSN_MUL_HS_HU,          true,  "lll"  },
+  { "__insn_mul_hs_ls",          TILEGX_INSN_MUL_HS_LS,          true,  "lll"  },
+  { "__insn_mul_hs_lu",          TILEGX_INSN_MUL_HS_LU,          true,  "lll"  },
+  { "__insn_mul_hu_hu",          TILEGX_INSN_MUL_HU_HU,          true,  "lll"  },
+  { "__insn_mul_hu_ls",          TILEGX_INSN_MUL_HU_LS,          true,  "lll"  },
+  { "__insn_mul_hu_lu",          TILEGX_INSN_MUL_HU_LU,          true,  "lll"  },
+  { "__insn_mul_ls_ls",          TILEGX_INSN_MUL_LS_LS,          true,  "lll"  },
+  { "__insn_mul_ls_lu",          TILEGX_INSN_MUL_LS_LU,          true,  "lll"  },
+  { "__insn_mul_lu_lu",          TILEGX_INSN_MUL_LU_LU,          true,  "lll"  },
+  { "__insn_mula_hs_hs",         TILEGX_INSN_MULA_HS_HS,         true,  "llll" },
+  { "__insn_mula_hs_hu",         TILEGX_INSN_MULA_HS_HU,         true,  "llll" },
+  { "__insn_mula_hs_ls",         TILEGX_INSN_MULA_HS_LS,         true,  "llll" },
+  { "__insn_mula_hs_lu",         TILEGX_INSN_MULA_HS_LU,         true,  "llll" },
+  { "__insn_mula_hu_hu",         TILEGX_INSN_MULA_HU_HU,         true,  "llll" },
+  { "__insn_mula_hu_ls",         TILEGX_INSN_MULA_HU_LS,         true,  "llll" },
+  { "__insn_mula_hu_lu",         TILEGX_INSN_MULA_HU_LU,         true,  "llll" },
+  { "__insn_mula_ls_ls",         TILEGX_INSN_MULA_LS_LS,         true,  "llll" },
+  { "__insn_mula_ls_lu",         TILEGX_INSN_MULA_LS_LU,         true,  "llll" },
+  { "__insn_mula_lu_lu",         TILEGX_INSN_MULA_LU_LU,         true,  "llll" },
+  { "__insn_mulax",              TILEGX_INSN_MULAX,              true,  "iiii" },
+  { "__insn_mulx",               TILEGX_INSN_MULX,               true,  "iii"  },
+  { "__insn_mz",                 TILEGX_INSN_MZ,                 true,  "lll"  },
+  { "__insn_nap",                TILEGX_INSN_NAP,                false, "v"    },
+  { "__insn_nop",                TILEGX_INSN_NOP,                true,  "v"    },
+  { "__insn_nor",                TILEGX_INSN_NOR,                true,  "lll"  },
+  { "__insn_or",                 TILEGX_INSN_OR,                 true,  "lll"  },
+  { "__insn_ori",                TILEGX_INSN_OR,                 true,  "lll"  },
+  { "__insn_pcnt",               TILEGX_INSN_PCNT,               true,  "ll"   },
+  { "__insn_prefetch",           TILEGX_INSN_PREFETCH_L1,        false, "vk"   },
+  { "__insn_prefetch_l1",        TILEGX_INSN_PREFETCH_L1,        false, "vk"   },
+  { "__insn_prefetch_l1_fault",  TILEGX_INSN_PREFETCH_L1_FAULT,  false, "vk"   },
+  { "__insn_prefetch_l2",        TILEGX_INSN_PREFETCH_L2,        false, "vk"   },
+  { "__insn_prefetch_l2_fault",  TILEGX_INSN_PREFETCH_L2_FAULT,  false, "vk"   },
+  { "__insn_prefetch_l3",        TILEGX_INSN_PREFETCH_L3,        false, "vk"   },
+  { "__insn_prefetch_l3_fault",  TILEGX_INSN_PREFETCH_L3_FAULT,  false, "vk"   },
+  { "__insn_revbits",            TILEGX_INSN_REVBITS,            true,  "ll"   },
+  { "__insn_revbytes",           TILEGX_INSN_REVBYTES,           true,  "ll"   },
+  { "__insn_rotl",               TILEGX_INSN_ROTL,               true,  "lli"  },
+  { "__insn_rotli",              TILEGX_INSN_ROTL,               true,  "lli"  },
+  { "__insn_shl",                TILEGX_INSN_SHL,                true,  "lli"  },
+  { "__insn_shl16insli",         TILEGX_INSN_SHL16INSLI,         true,  "lll"  },
+  { "__insn_shl1add",            TILEGX_INSN_SHL1ADD,            true,  "lll"  },
+  { "__insn_shl1addx",           TILEGX_INSN_SHL1ADDX,           true,  "iii"  },
+  { "__insn_shl2add",            TILEGX_INSN_SHL2ADD,            true,  "lll"  },
+  { "__insn_shl2addx",           TILEGX_INSN_SHL2ADDX,           true,  "iii"  },
+  { "__insn_shl3add",            TILEGX_INSN_SHL3ADD,            true,  "lll"  },
+  { "__insn_shl3addx",           TILEGX_INSN_SHL3ADDX,           true,  "iii"  },
+  { "__insn_shli",               TILEGX_INSN_SHL,                true,  "lli"  },
+  { "__insn_shlx",               TILEGX_INSN_SHLX,               true,  "iii"  },
+  { "__insn_shlxi",              TILEGX_INSN_SHLX,               true,  "iii"  },
+  { "__insn_shrs",               TILEGX_INSN_SHRS,               true,  "lli"  },
+  { "__insn_shrsi",              TILEGX_INSN_SHRS,               true,  "lli"  },
+  { "__insn_shru",               TILEGX_INSN_SHRU,               true,  "lli"  },
+  { "__insn_shrui",              TILEGX_INSN_SHRU,               true,  "lli"  },
+  { "__insn_shrux",              TILEGX_INSN_SHRUX,              true,  "iii"  },
+  { "__insn_shruxi",             TILEGX_INSN_SHRUX,              true,  "iii"  },
+  { "__insn_shufflebytes",       TILEGX_INSN_SHUFFLEBYTES,       true,  "llll" },
+  { "__insn_shufflebytes1",      TILEGX_INSN_SHUFFLEBYTES1,      true,  "lll"  },
+  { "__insn_st",                 TILEGX_INSN_ST,                 false, "vpl"  },
+  { "__insn_st1",                TILEGX_INSN_ST1,                false, "vpl"  },
+  { "__insn_st2",                TILEGX_INSN_ST2,                false, "vpl"  },
+  { "__insn_st4",                TILEGX_INSN_ST4,                false, "vpl"  },
+  { "__insn_stnt",               TILEGX_INSN_STNT,               false, "vpl"  },
+  { "__insn_stnt1",              TILEGX_INSN_STNT1,              false, "vpl"  },
+  { "__insn_stnt2",              TILEGX_INSN_STNT2,              false, "vpl"  },
+  { "__insn_stnt4",              TILEGX_INSN_STNT4,              false, "vpl"  },
+  { "__insn_sub",                TILEGX_INSN_SUB,                true,  "lll"  },
+  { "__insn_subx",               TILEGX_INSN_SUBX,               true,  "iii"  },
+  { "__insn_subxsc",             TILEGX_INSN_SUBXSC,             true,  "iii"  },
+  { "__insn_tblidxb0",           TILEGX_INSN_TBLIDXB0,           true,  "lll"  },
+  { "__insn_tblidxb1",           TILEGX_INSN_TBLIDXB1,           true,  "lll"  },
+  { "__insn_tblidxb2",           TILEGX_INSN_TBLIDXB2,           true,  "lll"  },
+  { "__insn_tblidxb3",           TILEGX_INSN_TBLIDXB3,           true,  "lll"  },
+  { "__insn_v1add",              TILEGX_INSN_V1ADD,              true,  "lll"  },
+  { "__insn_v1addi",             TILEGX_INSN_V1ADDI,             true,  "lll"  },
+  { "__insn_v1adduc",            TILEGX_INSN_V1ADDUC,            true,  "lll"  },
+  { "__insn_v1adiffu",           TILEGX_INSN_V1ADIFFU,           true,  "lll"  },
+  { "__insn_v1avgu",             TILEGX_INSN_V1AVGU,             true,  "lll"  },
+  { "__insn_v1cmpeq",            TILEGX_INSN_V1CMPEQ,            true,  "lll"  },
+  { "__insn_v1cmpeqi",           TILEGX_INSN_V1CMPEQI,           true,  "lll"  },
+  { "__insn_v1cmples",           TILEGX_INSN_V1CMPLES,           true,  "lll"  },
+  { "__insn_v1cmpleu",           TILEGX_INSN_V1CMPLEU,           true,  "lll"  },
+  { "__insn_v1cmplts",           TILEGX_INSN_V1CMPLTS,           true,  "lll"  },
+  { "__insn_v1cmpltsi",          TILEGX_INSN_V1CMPLTSI,          true,  "lll"  },
+  { "__insn_v1cmpltu",           TILEGX_INSN_V1CMPLTU,           true,  "lll"  },
+  { "__insn_v1cmpltui",          TILEGX_INSN_V1CMPLTUI,          true,  "lll"  },
+  { "__insn_v1cmpne",            TILEGX_INSN_V1CMPNE,            true,  "lll"  },
+  { "__insn_v1ddotpu",           TILEGX_INSN_V1DDOTPU,           true,  "lll"  },
+  { "__insn_v1ddotpua",          TILEGX_INSN_V1DDOTPUA,          true,  "llll" },
+  { "__insn_v1ddotpus",          TILEGX_INSN_V1DDOTPUS,          true,  "lll"  },
+  { "__insn_v1ddotpusa",         TILEGX_INSN_V1DDOTPUSA,         true,  "llll" },
+  { "__insn_v1dotp",             TILEGX_INSN_V1DOTP,             true,  "lll"  },
+  { "__insn_v1dotpa",            TILEGX_INSN_V1DOTPA,            true,  "llll" },
+  { "__insn_v1dotpu",            TILEGX_INSN_V1DOTPU,            true,  "lll"  },
+  { "__insn_v1dotpua",           TILEGX_INSN_V1DOTPUA,           true,  "llll" },
+  { "__insn_v1dotpus",           TILEGX_INSN_V1DOTPUS,           true,  "lll"  },
+  { "__insn_v1dotpusa",          TILEGX_INSN_V1DOTPUSA,          true,  "llll" },
+  { "__insn_v1int_h",            TILEGX_INSN_V1INT_H,            true,  "lll"  },
+  { "__insn_v1int_l",            TILEGX_INSN_V1INT_L,            true,  "lll"  },
+  { "__insn_v1maxu",             TILEGX_INSN_V1MAXU,             true,  "lll"  },
+  { "__insn_v1maxui",            TILEGX_INSN_V1MAXUI,            true,  "lll"  },
+  { "__insn_v1minu",             TILEGX_INSN_V1MINU,             true,  "lll"  },
+  { "__insn_v1minui",            TILEGX_INSN_V1MINUI,            true,  "lll"  },
+  { "__insn_v1mnz",              TILEGX_INSN_V1MNZ,              true,  "lll"  },
+  { "__insn_v1multu",            TILEGX_INSN_V1MULTU,            true,  "lll"  },
+  { "__insn_v1mulu",             TILEGX_INSN_V1MULU,             true,  "lll"  },
+  { "__insn_v1mulus",            TILEGX_INSN_V1MULUS,            true,  "lll"  },
+  { "__insn_v1mz",               TILEGX_INSN_V1MZ,               true,  "lll"  },
+  { "__insn_v1sadau",            TILEGX_INSN_V1SADAU,            true,  "llll" },
+  { "__insn_v1sadu",             TILEGX_INSN_V1SADU,             true,  "lll"  },
+  { "__insn_v1shl",              TILEGX_INSN_V1SHL,              true,  "lll"  },
+  { "__insn_v1shli",             TILEGX_INSN_V1SHLI,             true,  "lll"  },
+  { "__insn_v1shrs",             TILEGX_INSN_V1SHRS,             true,  "lll"  },
+  { "__insn_v1shrsi",            TILEGX_INSN_V1SHRSI,            true,  "lll"  },
+  { "__insn_v1shru",             TILEGX_INSN_V1SHRU,             true,  "lll"  },
+  { "__insn_v1shrui",            TILEGX_INSN_V1SHRUI,            true,  "lll"  },
+  { "__insn_v1sub",              TILEGX_INSN_V1SUB,              true,  "lll"  },
+  { "__insn_v1subuc",            TILEGX_INSN_V1SUBUC,            true,  "lll"  },
+  { "__insn_v2add",              TILEGX_INSN_V2ADD,              true,  "lll"  },
+  { "__insn_v2addi",             TILEGX_INSN_V2ADDI,             true,  "lll"  },
+  { "__insn_v2addsc",            TILEGX_INSN_V2ADDSC,            true,  "lll"  },
+  { "__insn_v2adiffs",           TILEGX_INSN_V2ADIFFS,           true,  "lll"  },
+  { "__insn_v2avgs",             TILEGX_INSN_V2AVGS,             true,  "lll"  },
+  { "__insn_v2cmpeq",            TILEGX_INSN_V2CMPEQ,            true,  "lll"  },
+  { "__insn_v2cmpeqi",           TILEGX_INSN_V2CMPEQI,           true,  "lll"  },
+  { "__insn_v2cmples",           TILEGX_INSN_V2CMPLES,           true,  "lll"  },
+  { "__insn_v2cmpleu",           TILEGX_INSN_V2CMPLEU,           true,  "lll"  },
+  { "__insn_v2cmplts",           TILEGX_INSN_V2CMPLTS,           true,  "lll"  },
+  { "__insn_v2cmpltsi",          TILEGX_INSN_V2CMPLTSI,          true,  "lll"  },
+  { "__insn_v2cmpltu",           TILEGX_INSN_V2CMPLTU,           true,  "lll"  },
+  { "__insn_v2cmpltui",          TILEGX_INSN_V2CMPLTUI,          true,  "lll"  },
+  { "__insn_v2cmpne",            TILEGX_INSN_V2CMPNE,            true,  "lll"  },
+  { "__insn_v2dotp",             TILEGX_INSN_V2DOTP,             true,  "lll"  },
+  { "__insn_v2dotpa",            TILEGX_INSN_V2DOTPA,            true,  "llll" },
+  { "__insn_v2int_h",            TILEGX_INSN_V2INT_H,            true,  "lll"  },
+  { "__insn_v2int_l",            TILEGX_INSN_V2INT_L,            true,  "lll"  },
+  { "__insn_v2maxs",             TILEGX_INSN_V2MAXS,             true,  "lll"  },
+  { "__insn_v2maxsi",            TILEGX_INSN_V2MAXSI,            true,  "lll"  },
+  { "__insn_v2mins",             TILEGX_INSN_V2MINS,             true,  "lll"  },
+  { "__insn_v2minsi",            TILEGX_INSN_V2MINSI,            true,  "lll"  },
+  { "__insn_v2mnz",              TILEGX_INSN_V2MNZ,              true,  "lll"  },
+  { "__insn_v2mulfsc",           TILEGX_INSN_V2MULFSC,           true,  "lll"  },
+  { "__insn_v2muls",             TILEGX_INSN_V2MULS,             true,  "lll"  },
+  { "__insn_v2mults",            TILEGX_INSN_V2MULTS,            true,  "lll"  },
+  { "__insn_v2mz",               TILEGX_INSN_V2MZ,               true,  "lll"  },
+  { "__insn_v2packh",            TILEGX_INSN_V2PACKH,            true,  "lll"  },
+  { "__insn_v2packl",            TILEGX_INSN_V2PACKL,            true,  "lll"  },
+  { "__insn_v2packuc",           TILEGX_INSN_V2PACKUC,           true,  "lll"  },
+  { "__insn_v2sadas",            TILEGX_INSN_V2SADAS,            true,  "llll" },
+  { "__insn_v2sadau",            TILEGX_INSN_V2SADAU,            true,  "llll" },
+  { "__insn_v2sads",             TILEGX_INSN_V2SADS,             true,  "lll"  },
+  { "__insn_v2sadu",             TILEGX_INSN_V2SADU,             true,  "lll"  },
+  { "__insn_v2shl",              TILEGX_INSN_V2SHL,              true,  "lll"  },
+  { "__insn_v2shli",             TILEGX_INSN_V2SHLI,             true,  "lll"  },
+  { "__insn_v2shlsc",            TILEGX_INSN_V2SHLSC,            true,  "lll"  },
+  { "__insn_v2shrs",             TILEGX_INSN_V2SHRS,             true,  "lll"  },
+  { "__insn_v2shrsi",            TILEGX_INSN_V2SHRSI,            true,  "lll"  },
+  { "__insn_v2shru",             TILEGX_INSN_V2SHRU,             true,  "lll"  },
+  { "__insn_v2shrui",            TILEGX_INSN_V2SHRUI,            true,  "lll"  },
+  { "__insn_v2sub",              TILEGX_INSN_V2SUB,              true,  "lll"  },
+  { "__insn_v2subsc",            TILEGX_INSN_V2SUBSC,            true,  "lll"  },
+  { "__insn_v4add",              TILEGX_INSN_V4ADD,              true,  "lll"  },
+  { "__insn_v4addsc",            TILEGX_INSN_V4ADDSC,            true,  "lll"  },
+  { "__insn_v4int_h",            TILEGX_INSN_V4INT_H,            true,  "lll"  },
+  { "__insn_v4int_l",            TILEGX_INSN_V4INT_L,            true,  "lll"  },
+  { "__insn_v4packsc",           TILEGX_INSN_V4PACKSC,           true,  "lll"  },
+  { "__insn_v4shl",              TILEGX_INSN_V4SHL,              true,  "lll"  },
+  { "__insn_v4shlsc",            TILEGX_INSN_V4SHLSC,            true,  "lll"  },
+  { "__insn_v4shrs",             TILEGX_INSN_V4SHRS,             true,  "lll"  },
+  { "__insn_v4shru",             TILEGX_INSN_V4SHRU,             true,  "lll"  },
+  { "__insn_v4sub",              TILEGX_INSN_V4SUB,              true,  "lll"  },
+  { "__insn_v4subsc",            TILEGX_INSN_V4SUBSC,            true,  "lll"  },
+  { "__insn_wh64",               TILEGX_INSN_WH64,               false, "vp"   },
+  { "__insn_xor",                TILEGX_INSN_XOR,                true,  "lll"  },
+  { "__insn_xori",               TILEGX_INSN_XOR,                true,  "lll"  },
+  { "__tile_network_barrier",    TILEGX_NETWORK_BARRIER,         false, "v"    },
+  { "__tile_idn0_receive",       TILEGX_IDN0_RECEIVE,            false, "l"    },
+  { "__tile_idn1_receive",       TILEGX_IDN1_RECEIVE,            false, "l"    },
+  { "__tile_idn_send",           TILEGX_IDN_SEND,                false, "vl"   },
+  { "__tile_udn0_receive",       TILEGX_UDN0_RECEIVE,            false, "l"    },
+  { "__tile_udn1_receive",       TILEGX_UDN1_RECEIVE,            false, "l"    },
+  { "__tile_udn2_receive",       TILEGX_UDN2_RECEIVE,            false, "l"    },
+  { "__tile_udn3_receive",       TILEGX_UDN3_RECEIVE,            false, "l"    },
+  { "__tile_udn_send",           TILEGX_UDN_SEND,                false, "vl"   },
+};
+
+
+/* Convert a character in a builtin type string to a tree type.  */
+static tree
+char_to_type (char c)
+{
+  static tree volatile_ptr_type_node = NULL;
+  static tree volatile_const_ptr_type_node = NULL;
+
+  if (volatile_ptr_type_node == NULL)
+    {
+      volatile_ptr_type_node =
+	build_pointer_type (build_qualified_type (void_type_node,
+						  TYPE_QUAL_VOLATILE));
+      volatile_const_ptr_type_node =
+	build_pointer_type (build_qualified_type (void_type_node,
+						  TYPE_QUAL_CONST
+						  | TYPE_QUAL_VOLATILE));
+    }
+
+  switch (c)
+    {
+    case 'v':
+      return void_type_node;
+    case 'i':
+      return unsigned_type_node;
+    case 'l':
+      return long_long_unsigned_type_node;
+    case 'p':
+      return volatile_ptr_type_node;
+    case 'k':
+      return volatile_const_ptr_type_node;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Implement TARGET_INIT_BUILTINS.  */
+static void
+tilegx_init_builtins (void)
+{
+  size_t i;
+
+  for (i = 0; i < ARRAY_SIZE (tilegx_builtins); i++)
+    {
+      const struct tilegx_builtin_def *p = &tilegx_builtins[i];
+      tree ftype, ret_type, arg_type_list = void_list_node;
+      tree decl;
+      int j;
+
+      for (j = strlen (p->type) - 1; j > 0; j--)
+	{
+	  arg_type_list =
+	    tree_cons (NULL_TREE, char_to_type (p->type[j]), arg_type_list);
+	}
+
+      ret_type = char_to_type (p->type[0]);
+
+      ftype = build_function_type (ret_type, arg_type_list);
+
+      decl = add_builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
+				   NULL, NULL);
+
+      if (p->is_const)
+	TREE_READONLY (decl) = 1;
+      TREE_NOTHROW (decl) = 1;
+
+      if (tilegx_builtin_info[p->code].fndecl == NULL)
+	tilegx_builtin_info[p->code].fndecl = decl;
+    }
+}
+
+
+/* Implement TARGET_EXPAND_BUILTIN.  */
+static rtx
+tilegx_expand_builtin (tree exp,
+		       rtx target,
+		       rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_BUILTIN_ARGS 4
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg;
+  call_expr_arg_iterator iter;
+  enum insn_code icode;
+  rtx op[MAX_BUILTIN_ARGS + 1], pat;
+  int opnum;
+  bool nonvoid;
+  insn_gen_fn fn;
+
+  if (fcode >= TILEGX_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = tilegx_builtin_info[fcode].icode;
+  if (icode == 0)
+    internal_error ("bad builtin icode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  opnum = nonvoid;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (opnum > MAX_BUILTIN_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[opnum];
+
+      op[opnum] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
+	{
+	  enum machine_mode opmode = insn_op->mode;
+
+	  /* pointer_operand and pmode_register_operand operands do
+	     not specify a mode, so use the operand's mode instead
+	     (which should always be right by the time we get here,
+	     except for constants, which are VOIDmode).  */
+	  if (opmode == VOIDmode)
+	    {
+	      enum machine_mode m = GET_MODE (op[opnum]);
+	      gcc_assert (m == Pmode || m == VOIDmode);
+	      opmode = Pmode;
+	    }
+
+	  op[opnum] = copy_to_mode_reg (opmode, op[opnum]);
+	}
+
+      if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
+	{
+	  /* We still failed to meet the predicate even after moving
+	     into a register. Assume we needed an immediate.  */
+	  error_at (EXPR_LOCATION (exp),
+		    "operand must be an immediate of the right size");
+	  return const0_rtx;
+	}
+
+      opnum++;
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	{
+	  if (tmode == VOIDmode)
+	    {
+	      /* get the mode from the return type.  */
+	      tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
+	    }
+	  target = gen_reg_rtx (tmode);
+	}
+      op[0] = target;
+    }
+
+  fn = GEN_FCN (icode);
+  switch (opnum)
+    {
+    case 0:
+      pat = fn (NULL_RTX);
+      break;
+    case 1:
+      pat = fn (op[0]);
+      break;
+    case 2:
+      pat = fn (op[0], op[1]);
+      break;
+    case 3:
+      pat = fn (op[0], op[1], op[2]);
+      break;
+    case 4:
+      pat = fn (op[0], op[1], op[2], op[3]);
+      break;
+    case 5:
+      pat = fn (op[0], op[1], op[2], op[3], op[4]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+
+  /* If we are generating a prefetch, tell the scheduler not to move
+     it around.  */
+  if (GET_CODE (pat) == PREFETCH)
+    PREFETCH_SCHEDULE_BARRIER_P (pat) = true;
+
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
+/* Implement TARGET_BUILTIN_DECL.  */
+static tree
+tilegx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= TILEGX_BUILTIN_max)
+    return error_mark_node;
+
+  return tilegx_builtin_info[code].fndecl;
+}
+
+
+
+/* Stack frames  */
+
+/* Return whether REGNO needs to be saved in the stack frame.  */
+static bool
+need_to_save_reg (unsigned int regno)
+{
+  if (!fixed_regs[regno] && !call_used_regs[regno]
+      && df_regs_ever_live_p (regno))
+    return true;
+
+  if (flag_pic
+      && (regno == PIC_OFFSET_TABLE_REGNUM
+	  || regno == TILEGX_PIC_TEXT_LABEL_REGNUM)
+      && (crtl->uses_pic_offset_table || crtl->saves_all_registers))
+    return true;
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned i;
+      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++)
+	{
+	  if (regno == EH_RETURN_DATA_REGNO (i))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Return the size of the register savev area.  This function is only
+   correct starting with local register allocation */
+static int
+tilegx_saved_regs_size (void)
+{
+  int reg_save_size = 0;
+  int regno;
+  int offset_to_frame;
+  int align_mask;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (need_to_save_reg (regno))
+      reg_save_size += UNITS_PER_WORD;
+
+  /* Pad out the register save area if necessary to make
+     frame_pointer_rtx be as aligned as the stack pointer.  */
+  offset_to_frame = crtl->args.pretend_args_size + reg_save_size;
+  align_mask = (STACK_BOUNDARY / BITS_PER_UNIT) - 1;
+  reg_save_size += (-offset_to_frame) & align_mask;
+
+  return reg_save_size;
+}
+
+
+/* Round up frame size SIZE.  */
+static int
+round_frame_size (int size)
+{
+  return ((size + STACK_BOUNDARY / BITS_PER_UNIT - 1)
+	  & -STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+
+/* Emit a store in the stack frame to save REGNO at address ADDR, and
+   emit the corresponding REG_CFA_OFFSET note described by CFA and
+   CFA_OFFSET.  Return the emitted insn.  */
+static rtx
+frame_emit_store (int regno, int regno_note, rtx addr, rtx cfa,
+		  int cfa_offset)
+{
+  rtx reg = gen_rtx_REG (DImode, regno);
+  rtx mem = gen_frame_mem (DImode, addr);
+  rtx mov = gen_movdi (mem, reg);
+
+  /* Describe what just happened in a way that dwarf understands.  We
+     use temporary registers to hold the address to make scheduling
+     easier, and use the REG_CFA_OFFSET to describe the address as an
+     offset from the CFA.  */
+  rtx reg_note = gen_rtx_REG (DImode, regno_note);
+  rtx cfa_relative_addr = gen_rtx_PLUS (Pmode, cfa, GEN_INT (cfa_offset));
+  rtx cfa_relative_mem = gen_frame_mem (DImode, cfa_relative_addr);
+  rtx real = gen_rtx_SET (VOIDmode, cfa_relative_mem, reg_note);
+  add_reg_note (mov, REG_CFA_OFFSET, real);
+
+  return emit_insn (mov);
+}
+
+
+/* Emit a load in the stack frame to load REGNO from address ADDR.
+   Add a REG_CFA_RESTORE note to CFA_RESTORES if CFA_RESTORES is
+   non-null.  Return the emitted insn.  */
+static rtx
+frame_emit_load (int regno, rtx addr, rtx *cfa_restores)
+{
+  rtx reg = gen_rtx_REG (DImode, regno);
+  rtx mem = gen_frame_mem (DImode, addr);
+  if (cfa_restores)
+    *cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, *cfa_restores);
+  return emit_insn (gen_movdi (reg, mem));
+}
+
+
+/* Helper function to set RTX_FRAME_RELATED_P on instructions,
+   including sequences.  */
+static rtx
+set_frame_related_p (void)
+{
+  rtx seq = get_insns ();
+  rtx insn;
+
+  end_sequence ();
+
+  if (!seq)
+    return NULL_RTX;
+
+  if (INSN_P (seq))
+    {
+      insn = seq;
+      while (insn != NULL_RTX)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = NEXT_INSN (insn);
+	}
+      seq = emit_insn (seq);
+    }
+  else
+    {
+      seq = emit_insn (seq);
+      RTX_FRAME_RELATED_P (seq) = 1;
+    }
+  return seq;
+}
+
+
+#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
+
+/* This emits code for 'sp += offset'.
+   
+   The ABI only allows us to modify 'sp' in a single 'addi' or
+   'addli', so the backtracer understands it. Larger amounts cannot
+   use those instructions, so are added by placing the offset into a
+   large register and using 'add'.
+
+   This happens after reload, so we need to expand it ourselves.  */
+static rtx
+emit_sp_adjust (int offset, int *next_scratch_regno, bool frame_related,
+		rtx reg_notes)
+{
+  rtx to_add;
+  rtx imm_rtx = GEN_INT (offset);
+
+  rtx insn;
+  if (satisfies_constraint_J (imm_rtx))
+    {
+      /* We can add this using a single immediate add.  */
+      to_add = imm_rtx;
+    }
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
+      tilegx_expand_set_const64 (tmp, imm_rtx);
+      to_add = tmp;
+    }
+
+  /* Actually adjust the stack pointer.  */
+  if (TARGET_32BIT)
+    insn = gen_sp_adjust_32bit (stack_pointer_rtx, stack_pointer_rtx, to_add);
+  else
+    insn = gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx, to_add);
+
+  insn = emit_insn (insn);
+  REG_NOTES (insn) = reg_notes;
+
+  /* Describe what just happened in a way that dwarf understands.  */
+  if (frame_related)
+    {
+      rtx real = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    imm_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, real);
+    }
+
+  return insn;
+}
+
+
+/* Return whether the current function is leaf.  This takes into
+   account whether the function calls tls_get_addr.  */
+static bool
+tilegx_current_function_is_leaf (void)
+{
+  return crtl->is_leaf && !cfun->machine->calls_tls_get_addr;
+}
+
+
+/* Return the frame size.  */
+static int
+compute_total_frame_size (void)
+{
+  int total_size = (get_frame_size () + tilegx_saved_regs_size ()
+		    + crtl->outgoing_args_size
+		    + crtl->args.pretend_args_size);
+
+  if (!tilegx_current_function_is_leaf () || cfun->calls_alloca)
+    {
+      /* Make room for save area in callee.  */
+      total_size += STACK_POINTER_OFFSET;
+    }
+
+  return round_frame_size (total_size);
+}
+
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack was
+   created.  */
+bool
+tilegx_can_use_return_insn_p (void)
+{
+  return (reload_completed
+	  && cfun->static_chain_decl == 0
+	  && compute_total_frame_size () == 0
+	  && tilegx_current_function_is_leaf ()
+	  && !crtl->profile && !df_regs_ever_live_p (TILEGX_LINK_REGNUM));
+}
+
+
+/* Returns an rtx for a stack slot at 'FP + offset_from_fp'.  If there
+   is a frame pointer, it computes the value relative to
+   that. Otherwise it uses the stack pointer.  */
+static rtx
+compute_frame_addr (int offset_from_fp, int *next_scratch_regno)
+{
+  rtx base_reg_rtx, tmp_reg_rtx, offset_rtx;
+  int offset_from_base;
+
+  if (frame_pointer_needed)
+    {
+      base_reg_rtx = hard_frame_pointer_rtx;
+      offset_from_base = offset_from_fp;
+    }
+  else
+    {
+      int offset_from_sp = compute_total_frame_size () + offset_from_fp;
+      offset_from_base = offset_from_sp;
+      base_reg_rtx = stack_pointer_rtx;
+    }
+
+  if (offset_from_base == 0)
+    return base_reg_rtx;
+
+  /* Compute the new value of the stack pointer.  */
+  tmp_reg_rtx = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
+  offset_rtx = GEN_INT (offset_from_base);
+
+  if (!add_operand (offset_rtx, Pmode))
+    {
+      expand_set_cint64 (tmp_reg_rtx, offset_rtx);
+      offset_rtx = tmp_reg_rtx;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, tmp_reg_rtx,
+			  gen_rtx_PLUS (Pmode, base_reg_rtx, offset_rtx)));
+
+  return tmp_reg_rtx;
+}
+
+
+/* The stack frame looks like this:
+         +-------------+
+         |    ...      | 
+         |  incoming   | 
+         | stack args  | 
+   AP -> +-------------+
+         | caller's HFP|
+         +-------------+
+         | lr save     |
+  HFP -> +-------------+
+         |  var args   | 
+         |  reg save   | crtl->args.pretend_args_size bytes
+         +-------------+
+         |    ...      | 
+         | saved regs  | tilegx_saved_regs_size() bytes
+   FP -> +-------------+
+         |    ...      | 
+         |   vars      | get_frame_size() bytes
+         +-------------+
+         |    ...      | 
+         |  outgoing   | 
+         |  stack args | crtl->outgoing_args_size bytes
+         +-------------+
+         | HFP         | ptr_size bytes (only here if nonleaf / alloca)
+         +-------------+
+         | callee lr   | ptr_size bytes (only here if nonleaf / alloca)
+         | save        | 
+   SP -> +-------------+
+
+  HFP == incoming SP.
+
+  For functions with a frame larger than 32767 bytes, or which use
+  alloca (), r52 is used as a frame pointer.  Otherwise there is no
+  frame pointer.
+
+  FP is saved at SP+ptr_size before calling a subroutine so the callee
+  can chain.  */
+void
+tilegx_expand_prologue (void)
+{
+#define ROUND_ROBIN_SIZE 4
+  /* We round-robin through four scratch registers to hold temporary
+     addresses for saving registers, to make instruction scheduling
+     easier.  */
+  rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
+    NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
+  };
+  rtx insn, cfa;
+  unsigned int which_scratch;
+  int offset, start_offset, regno;
+
+  /* A register that holds a copy of the incoming fp.  */
+  int fp_copy_regno = -1;
+
+  /* A register that holds a copy of the incoming sp.  */
+  int sp_copy_regno = -1;
+
+  /* Next scratch register number to hand out (postdecrementing).  */
+  int next_scratch_regno = 29;
+
+  int total_size = compute_total_frame_size ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = total_size;
+
+  /* Save lr first in its special location because code after this
+     might use the link register as a scratch register.  */
+  if (df_regs_ever_live_p (TILEGX_LINK_REGNUM) || crtl->calls_eh_return)
+    FRP (frame_emit_store (TILEGX_LINK_REGNUM, TILEGX_LINK_REGNUM,
+			   stack_pointer_rtx, stack_pointer_rtx, 0));
+
+  if (total_size == 0)
+    {
+      /* Load the PIC register if needed.  */
+      if (flag_pic && crtl->uses_pic_offset_table)
+	load_pic_register (false);
+
+      return;
+    }
+
+  cfa = stack_pointer_rtx;
+
+  if (frame_pointer_needed)
+    {
+      fp_copy_regno = next_scratch_regno--;
+
+      /* Copy the old frame pointer aside so we can save it later.  */
+      insn =
+	FRP (emit_move_insn (gen_rtx_REG (word_mode, fp_copy_regno),
+			     gen_lowpart (word_mode, hard_frame_pointer_rtx)));
+      add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+      /* Set up the frame pointer.  */
+      insn = FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+      add_reg_note (insn, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
+      cfa = hard_frame_pointer_rtx;
+      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
+
+      /* fp holds a copy of the incoming sp, in case we need to store
+	 it.  */
+      sp_copy_regno = HARD_FRAME_POINTER_REGNUM;
+    }
+  else if (!tilegx_current_function_is_leaf ())
+    {
+      /* Copy the old stack pointer aside so we can save it later.  */
+      sp_copy_regno = next_scratch_regno--;
+      emit_move_insn (gen_rtx_REG (Pmode, sp_copy_regno),
+		      stack_pointer_rtx);
+    }
+
+  if (tilegx_current_function_is_leaf ())
+    {
+      /* No need to store chain pointer to caller's frame.  */
+      emit_sp_adjust (-total_size, &next_scratch_regno,
+		      !frame_pointer_needed, NULL_RTX);
+    }
+  else
+    {
+      /* Save the frame pointer (incoming sp value) to support
+         backtracing.  First we need to create an rtx with the store
+         address.  */
+      rtx chain_addr = gen_rtx_REG (Pmode, next_scratch_regno--);
+      rtx size_rtx = GEN_INT (-(total_size - UNITS_PER_WORD));
+
+      if (add_operand (size_rtx, Pmode))
+	{
+	  /* Expose more parallelism by computing this value from the
+	     original stack pointer, not the one after we have pushed
+	     the frame.  */
+	  rtx p = gen_rtx_PLUS (Pmode, stack_pointer_rtx, size_rtx);
+	  emit_insn (gen_rtx_SET (VOIDmode, chain_addr, p));
+	  emit_sp_adjust (-total_size, &next_scratch_regno,
+			  !frame_pointer_needed, NULL_RTX);
+	}
+      else
+	{
+	  /* The stack frame is large, so just store the incoming sp
+	     value at *(new_sp + UNITS_PER_WORD).  */
+	  rtx p;
+	  emit_sp_adjust (-total_size, &next_scratch_regno,
+			  !frame_pointer_needed, NULL_RTX);
+	  p = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			    GEN_INT (UNITS_PER_WORD));
+	  emit_insn (gen_rtx_SET (VOIDmode, chain_addr, p));
+	}
+
+      /* Save our frame pointer for backtrace chaining.  */
+      emit_insn (gen_movdi (gen_frame_mem (DImode, chain_addr),
+			    gen_rtx_REG (DImode, sp_copy_regno)));
+    }
+
+  /* Compute where to start storing registers we need to save.  */
+  start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
+  offset = start_offset;
+
+  /* Store all registers that need saving.  */
+  which_scratch = 0;
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+    if (need_to_save_reg (regno))
+      {
+	rtx r = reg_save_addr[which_scratch];
+	int from_regno;
+	int cfa_offset = frame_pointer_needed ? offset : total_size + offset;
+
+	if (r == NULL_RTX)
+	  {
+	    int prev_scratch_regno = next_scratch_regno;
+	    r = compute_frame_addr (offset, &next_scratch_regno);
+	    if (prev_scratch_regno != next_scratch_regno)
+	      reg_save_addr[which_scratch] = r;
+	  }
+	else
+	  {
+	    /* Advance to the next stack slot to store this
+	       register.  */
+	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
+	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
+	    emit_insn (gen_rtx_SET (VOIDmode, r, p));
+	  }
+
+	/* Save this register to the stack (but use the old fp value
+	   we copied aside if appropriate).  */
+	from_regno =
+	  (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
+	  ? fp_copy_regno : regno;
+	FRP (frame_emit_store (from_regno, regno, r, cfa, cfa_offset));
+
+	offset -= UNITS_PER_WORD;
+	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
+      }
+
+  /* If profiling, force that to happen after the frame is set up.  */
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+
+  /* Load the PIC register if needed.  */
+  if (flag_pic && crtl->uses_pic_offset_table)
+    load_pic_register (false);
+}
+
+
+/* Implement the epilogue and sibcall_epilogue patterns.  SIBCALL_P is
+   true for a sibcall_epilogue pattern, and false for an epilogue
+   pattern.  */
+void
+tilegx_expand_epilogue (bool sibcall_p)
+{
+  /* We round-robin through four scratch registers to hold temporary
+     addresses for saving registers, to make instruction scheduling
+     easier.  */
+  rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
+    NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
+  };
+  rtx last_insn, insn;
+  unsigned int which_scratch;
+  int offset, start_offset, regno;
+  rtx cfa_restores = NULL_RTX;
+
+  /* A register that holds a copy of the incoming fp.  */
+  int fp_copy_regno = -1;
+
+  /* Next scratch register number to hand out (postdecrementing).  */
+  int next_scratch_regno = 29;
+
+  int total_size = compute_total_frame_size ();
+
+  last_insn = get_last_insn ();
+
+  /* Load lr first since we are going to need it first.  */
+  insn = NULL;
+  if (df_regs_ever_live_p (TILEGX_LINK_REGNUM))
+    {
+      insn = frame_emit_load (TILEGX_LINK_REGNUM,
+			      compute_frame_addr (0, &next_scratch_regno),
+			      &cfa_restores);
+    }
+
+  if (total_size == 0)
+    {
+      if (insn)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  REG_NOTES (insn) = cfa_restores;
+	}
+      goto done;
+    }
+
+  /* Compute where to start restoring registers.  */
+  start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
+  offset = start_offset;
+
+  if (frame_pointer_needed)
+    fp_copy_regno = next_scratch_regno--;
+
+  /* Restore all callee-saved registers.  */
+  which_scratch = 0;
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+    if (need_to_save_reg (regno))
+      {
+	rtx r = reg_save_addr[which_scratch];
+	if (r == NULL_RTX)
+	  {
+	    r = compute_frame_addr (offset, &next_scratch_regno);
+	    reg_save_addr[which_scratch] = r;
+	  }
+	else
+	  {
+	    /* Advance to the next stack slot to store this register.  */
+	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
+	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
+	    emit_insn (gen_rtx_SET (VOIDmode, r, p));
+	  }
+
+	if (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
+	  frame_emit_load (fp_copy_regno, r, NULL);
+	else
+	  frame_emit_load (regno, r, &cfa_restores);
+
+	offset -= UNITS_PER_WORD;
+	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
+      }
+
+  if (!tilegx_current_function_is_leaf ())
+    cfa_restores =
+      alloc_reg_note (REG_CFA_RESTORE, stack_pointer_rtx, cfa_restores);
+
+  emit_insn (gen_blockage ());
+
+  if (frame_pointer_needed)
+    {
+      /* Restore the old stack pointer by copying from the frame
+	 pointer.  */
+      if (TARGET_32BIT)
+	{
+	  insn = emit_insn (gen_sp_restore_32bit (stack_pointer_rtx,
+						  hard_frame_pointer_rtx));
+	}
+      else
+	{
+	  insn = emit_insn (gen_sp_restore (stack_pointer_rtx,
+					    hard_frame_pointer_rtx));
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+    }
+  else
+    {
+      insn = emit_sp_adjust (total_size, &next_scratch_regno, true,
+			     cfa_restores);
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      if (TARGET_32BIT)
+	emit_insn (gen_sp_adjust_32bit (stack_pointer_rtx, stack_pointer_rtx,
+					EH_RETURN_STACKADJ_RTX));
+      else
+	emit_insn (gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx,
+				  EH_RETURN_STACKADJ_RTX));
+    }
+
+  /* Restore the old frame pointer.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (gen_lowpart (DImode, hard_frame_pointer_rtx),
+			     gen_rtx_REG (DImode, fp_copy_regno));
+      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+    }
+
+  /* Mark the pic registers as live outside of the function.  */
+  if (flag_pic)
+    {
+      emit_use (cfun->machine->text_label_rtx);
+      emit_use (cfun->machine->got_rtx);
+    }
+
+done:
+  if (!sibcall_p)
+    {
+      emit_jump_insn (gen__return ());
+    }
+  else
+    {
+      emit_use (gen_rtx_REG (Pmode, TILEGX_LINK_REGNUM));
+    }
+
+  /* Mark all insns we just emitted as frame-related.  */
+  for (; last_insn != NULL_RTX; last_insn = next_insn (last_insn))
+    RTX_FRAME_RELATED_P (last_insn) = 1;
+}
+
+#undef ROUND_ROBIN_SIZE
+
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  */
+int
+tilegx_initial_elimination_offset (int from, int to)
+{
+  int total_size = compute_total_frame_size ();
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      return (total_size - crtl->args.pretend_args_size
+	      - tilegx_saved_regs_size ());
+    }
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    {
+      return -(crtl->args.pretend_args_size + tilegx_saved_regs_size ());
+    }
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      return STACK_POINTER_OFFSET + total_size;
+    }
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    {
+      return STACK_POINTER_OFFSET;
+    }
+  else
+    gcc_unreachable ();
+}
+
+
+/* Return an RTX indicating where the return address to the calling
+   function can be found.  */
+rtx
+tilegx_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, TILEGX_LINK_REGNUM);
+}
+
+
+/* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile to
+   prevent it from being deleted.  */
+rtx
+tilegx_eh_return_handler_rtx (void)
+{
+  rtx tmp = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
+}
+
+
+
+/* Registers  */
+
+/* Implemnet TARGET_CONDITIONAL_REGISTER_USAGE.  */
+static void
+tilegx_conditional_register_usage (void)
+{
+  global_regs[TILEGX_NETORDER_REGNUM] = 1;
+  /* TILEGX_PIC_TEXT_LABEL_REGNUM is conditionally used.  It is a
+     member of fixed_regs, and therefore must be member of
+     call_used_regs, but it is not a member of call_really_used_regs[]
+     because it is not clobbered by a call.  */
+  if (TILEGX_PIC_TEXT_LABEL_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[TILEGX_PIC_TEXT_LABEL_REGNUM] = 1;
+      call_used_regs[TILEGX_PIC_TEXT_LABEL_REGNUM] = 1;
+    }
+  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+static bool
+tilegx_frame_pointer_required (void)
+{
+  return crtl->calls_eh_return || cfun->calls_alloca;
+}
+
+
+
+/* Scheduling and reorg  */
+
+/* Return the length of INSN.  LENGTH is the initial length computed
+   by attributes in the machine-description file.  This is where we
+   account for bundles.  */
+int
+tilegx_adjust_insn_length (rtx insn, int length)
+{
+  enum machine_mode mode = GET_MODE (insn);
+
+  /* A non-termininating instruction in a bundle has length 0.  */
+  if (mode == SImode)
+    return 0;
+
+  /* By default, there is not length adjustment.  */
+  return length;
+}
+
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+static int
+tilegx_issue_rate (void)
+{
+  return 3;
+}
+
+
+/* Return the rtx for the jump target.  */
+static rtx
+get_jump_target (rtx branch)
+{
+  if (CALL_P (branch))
+    {
+      rtx call;
+      call = PATTERN (branch);
+
+      if (GET_CODE (call) == PARALLEL)
+	call = XVECEXP (call, 0, 0);
+
+      if (GET_CODE (call) == SET)
+	call = SET_SRC (call);
+
+      if (GET_CODE (call) == CALL)
+	return XEXP (XEXP (call, 0), 0);
+    }
+  return 0;
+}
+
+
+/* Implement TARGET_SCHED_ADJUST_COST.  */
+static int
+tilegx_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  /* If we have a true dependence, INSN is a call, and DEP_INSN
+     defines a register that is needed by the call (argument or stack
+     pointer) , set its latency to 0 so that it can be bundled with
+     the call.  Explicitly check for and exclude the case when
+     DEP_INSN defines the target of the jump.  */
+  if (CALL_P (insn) && REG_NOTE_KIND (link) == REG_DEP_TRUE)
+    {
+      rtx target = get_jump_target (insn);
+      if (!REG_P (target) || !set_of (target, dep_insn))
+	return 0;
+    }
+
+  return cost;
+}
+
+
+/* Skip over irrelevant NOTEs and such and look for the next insn we
+   would consider bundling.  */
+static rtx
+next_insn_to_bundle (rtx r, rtx end)
+{
+  for (; r != end; r = NEXT_INSN (r))
+    {
+      if (NONDEBUG_INSN_P (r)
+	  && GET_CODE (PATTERN (r)) != USE
+	  && GET_CODE (PATTERN (r)) != CLOBBER)
+	return r;
+    }
+
+  return NULL_RTX;
+}
+
+
+/* Go through all insns, and use the information generated during
+   scheduling to generate SEQUENCEs to represent bundles of
+   instructions issued simultaneously.  */
+static void
+tilegx_gen_bundles (void)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next, prev;
+      rtx end = NEXT_INSN (BB_END (bb));
+
+      prev = NULL_RTX;
+      for (insn = next_insn_to_bundle (BB_HEAD (bb), end); insn;
+	   prev = insn, insn = next)
+	{
+	  next = next_insn_to_bundle (NEXT_INSN (insn), end);
+
+	  /* Never wrap {} around inline asm.  */
+	  if (GET_CODE (PATTERN (insn)) != ASM_INPUT)
+	    {
+	      if (next == NULL_RTX || GET_MODE (next) == TImode
+		  /* NOTE: The scheduler incorrectly believes a call
+		     insn can execute in the same cycle as the insn
+		     after the call.  This is of course impossible.
+		     Really we need to fix the scheduler somehow, so
+		     the code after the call gets scheduled
+		     optimally.  */
+		  || CALL_P (insn))
+		{
+		  /* Mark current insn as the end of a bundle.  */
+		  PUT_MODE (insn, QImode);
+		}
+	      else
+		{
+		  /* Mark it as part of a bundle.  */
+		  PUT_MODE (insn, SImode);
+		}
+	    }
+
+	  /* Delete barrier insns, because they can mess up the
+	     emitting of bundle braces.  If it is end-of-bundle, then
+	     the previous insn must be marked end-of-bundle.  */
+	  if (get_attr_type (insn) == TYPE_NOTHING) {
+	    if (GET_MODE (insn) == QImode && prev != NULL
+		&& GET_MODE (prev) == SImode)
+	      {
+		PUT_MODE (prev, QImode);
+	      }
+	    delete_insn (insn);
+	  }
+	}
+    }
+}
+
+
+/* Replace OLD_INSN with NEW_INSN.  */
+static void
+replace_insns (rtx old_insn, rtx new_insns)
+{
+  if (new_insns)
+    emit_insn_before (new_insns, old_insn);
+
+  delete_insn (old_insn);
+}
+
+
+/* Returns true if INSN is the first instruction of a pc-relative
+   address compuatation.  */
+static bool
+match_pcrel_step1 (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx src;
+
+  if (GET_CODE (pattern) != SET)
+    return false;
+
+  src = SET_SRC (pattern);
+
+  return (GET_CODE (src) == CONST
+	  && GET_CODE (XEXP (src, 0)) == UNSPEC
+	  && XINT (XEXP (src, 0), 1) == UNSPEC_HW1_LAST_PCREL);
+}
+
+
+/* Do the first replacement step in tilegx_fixup_pcrel_references.  */
+static void
+replace_mov_pcrel_step1 (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx unspec;
+  rtx opnds[2];
+  rtx new_insns;
+
+  gcc_assert (GET_CODE (pattern) == SET);
+  opnds[0] = SET_DEST (pattern);
+
+  gcc_assert (GET_CODE (SET_SRC (pattern)) == CONST);
+
+  unspec = XEXP (SET_SRC (pattern), 0);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+  gcc_assert (XINT (unspec, 1) == UNSPEC_HW1_LAST_PCREL);
+  opnds[1] = XVECEXP (unspec, 0, 0);
+
+  /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
+  if (GET_CODE (opnds[1]) != SYMBOL_REF)
+    return;
+
+  start_sequence ();
+
+  if (flag_pic != 1)
+    {
+      if (TARGET_32BIT)
+	emit_insn (gen_mov_got32_step1_32bit (opnds[0], opnds[1]));
+      else
+	emit_insn (gen_mov_got32_step1 (opnds[0], opnds[1]));
+    }
+
+  new_insns = get_insns ();
+  end_sequence ();
+
+  replace_insns (insn, new_insns);
+}
+
+
+/* Returns true if INSN is the second instruction of a pc-relative
+   address compuatation.  */
+static bool
+match_pcrel_step2 (rtx insn)
+{
+  rtx unspec;
+  rtx addr;
+
+  if (TARGET_32BIT)
+    {
+      if (recog_memoized (insn) != CODE_FOR_insn_addr_shl16insli_32bit)
+	return false;
+    }
+  else
+    {
+      if (recog_memoized (insn) != CODE_FOR_insn_addr_shl16insli)
+	return false;
+    }
+
+  unspec = SET_SRC (PATTERN (insn));
+  addr = XVECEXP (unspec, 0, 1);
+
+  return (GET_CODE (addr) == CONST
+	  && GET_CODE (XEXP (addr, 0)) == UNSPEC
+	  && XINT (XEXP (addr, 0), 1) == UNSPEC_HW0_PCREL);
+}
+
+
+/* Do the second replacement step in tilegx_fixup_pcrel_references.  */
+static void
+replace_mov_pcrel_step2 (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx unspec;
+  rtx addr;
+  rtx opnds[3];
+  rtx new_insns;
+  rtx got_rtx = tilegx_got_rtx ();
+
+  gcc_assert (GET_CODE (pattern) == SET);
+  opnds[0] = SET_DEST (pattern);
+
+  unspec = SET_SRC (pattern);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+  gcc_assert (XINT (unspec, 1) == UNSPEC_INSN_ADDR_SHL16INSLI);
+
+  opnds[1] = XVECEXP (unspec, 0, 0);
+
+  addr = XVECEXP (unspec, 0, 1);
+  gcc_assert (GET_CODE (addr) == CONST);
+
+  unspec = XEXP (addr, 0);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+  gcc_assert (XINT (unspec, 1) == UNSPEC_HW0_PCREL);
+  opnds[2] = XVECEXP (unspec, 0, 0);
+
+  /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
+  if (GET_CODE (opnds[2]) != SYMBOL_REF)
+    return;
+
+  start_sequence ();
+
+  if (flag_pic == 1)
+    {
+      if (TARGET_32BIT)
+	emit_insn (gen_add_got16_32bit (opnds[0], got_rtx, opnds[2]));
+      else
+	emit_insn (gen_add_got16 (opnds[0], got_rtx, opnds[2]));
+    }
+  else
+    {
+      if (TARGET_32BIT)
+	emit_insn (gen_mov_got32_step2_32bit
+		   (opnds[0], opnds[1], opnds[2]));
+      else
+	emit_insn (gen_mov_got32_step2 (opnds[0], opnds[1], opnds[2]));
+    }
+
+  new_insns = get_insns ();
+  end_sequence ();
+
+  replace_insns (insn, new_insns);
+}
+
+
+/* Do the third replacement step in tilegx_fixup_pcrel_references.  */
+static void
+replace_mov_pcrel_step3 (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx unspec;
+  rtx opnds[4];
+  rtx new_insns;
+  rtx got_rtx = tilegx_got_rtx ();
+  rtx text_label_rtx = tilegx_text_label_rtx ();
+
+  gcc_assert (GET_CODE (pattern) == SET);
+  opnds[0] = SET_DEST (pattern);
+
+  unspec = SET_SRC (pattern);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+  gcc_assert (XINT (unspec, 1) == UNSPEC_MOV_PCREL_STEP3);
+
+  opnds[1] = got_rtx;
+
+  if (XVECEXP (unspec, 0, 0) == text_label_rtx)
+    opnds[2] = XVECEXP (unspec, 0, 1);
+  else
+    {
+      gcc_assert (XVECEXP (unspec, 0, 1) == text_label_rtx);
+      opnds[2] = XVECEXP (unspec, 0, 0);
+    }
+
+  opnds[3] = XVECEXP (unspec, 0, 2);
+
+  /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
+  if (GET_CODE (opnds[3]) != SYMBOL_REF)
+    return;
+
+  start_sequence ();
+
+  if (flag_pic == 1)
+    {
+      emit_move_insn (opnds[0], gen_const_mem (Pmode, opnds[2]));
+    }
+  else
+    {
+      emit_move_insn (opnds[0], gen_rtx_PLUS (Pmode, opnds[1], opnds[2]));
+      emit_move_insn (opnds[0], gen_const_mem (Pmode, opnds[0]));
+    }
+
+  new_insns = get_insns ();
+  end_sequence ();
+
+  replace_insns (insn, new_insns);
+}
+
+
+/* We generate PC relative SYMBOL_REFs as an optimization, to avoid
+   going through the GOT when the symbol is local to the compilation
+   unit.  But such a symbol requires that the common text_label that
+   we generate at the beginning of the function be in the same section
+   as the reference to the SYMBOL_REF.  This may not be true if we
+   generate hot/cold sections.  This function looks for such cases and
+   replaces such references with the longer sequence going through the
+   GOT.
+
+   We expect following instruction sequence:
+   moveli      tmp1, hw1_last(x-.L_PICLNK)          [1]
+   shl16insli  tmp2, tmp1, hw0(x-.L_PICLNK)         [2]
+   add<x>      tmp3, txt_label_reg, tmp2            [3]
+
+   If we're compiling -fpic, we replace with the following sequence
+   (the numbers in brackets match the instructions they're replacing
+   above).
+
+   add<x>li    tmp2, got_reg, hw0_last_got(x)       [2]
+   ld<4>       tmp3, tmp2                           [3]
+
+   If we're compiling -fPIC, we replace the first instruction with:
+
+   moveli      tmp1, hw1_last_got(x)                [1]
+   shl16insli  tmp2, tmp1, hw0_got(x)               [2]
+   add<x>      tmp3, got_reg, tmp2                  [3]
+   ld<4>       tmp3, tmp3                           [3]
+
+   Note that we're careful to disturb the instruction sequence as
+   little as possible, since it's very late in the compilation
+   process.  */
+static void
+tilegx_fixup_pcrel_references (void)
+{
+  rtx insn, next_insn;
+  bool same_section_as_entry = true;
+
+  for (insn = get_insns (); insn; insn = next_insn)
+    {
+      next_insn = NEXT_INSN (insn);
+
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	{
+	  same_section_as_entry = !same_section_as_entry;
+	  continue;
+	}
+
+      if (same_section_as_entry)
+	continue;
+
+      if (!(INSN_P (insn)
+	    && GET_CODE (PATTERN (insn)) != USE
+	    && GET_CODE (PATTERN (insn)) != CLOBBER))
+	continue;
+
+      if (TARGET_32BIT)
+	{
+	  if (match_pcrel_step1 (insn))
+	    replace_mov_pcrel_step1 (insn);
+	  else if (match_pcrel_step2 (insn))
+	    replace_mov_pcrel_step2 (insn);
+	  else if (recog_memoized (insn) == CODE_FOR_mov_pcrel_step3_32bit)
+	    replace_mov_pcrel_step3 (insn);
+	}
+      else
+	{
+	  if (match_pcrel_step1 (insn))
+	    replace_mov_pcrel_step1 (insn);
+	  else if (match_pcrel_step2 (insn))
+	    replace_mov_pcrel_step2 (insn);
+	  else if (recog_memoized (insn) == CODE_FOR_mov_pcrel_step3)
+	    replace_mov_pcrel_step3 (insn);
+	}
+    }
+}
+
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+   three-instruction bundle.  */
+static void
+reorder_var_tracking_notes (void)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+  {
+    rtx insn, next;
+    rtx queue = NULL_RTX;
+    bool in_bundle = false;
+
+    for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
+      {
+	next = NEXT_INSN (insn);
+
+	if (INSN_P (insn))
+	  {
+	    /* Emit queued up notes at the last instruction of a
+	       bundle.  */
+	    if (GET_MODE (insn) == QImode)
+	      {
+		while (queue)
+		  {
+		    rtx next_queue = PREV_INSN (queue);
+		    PREV_INSN (NEXT_INSN (insn)) = queue;
+		    NEXT_INSN (queue) = NEXT_INSN (insn);
+		    NEXT_INSN (insn) = queue;
+		    PREV_INSN (queue) = insn;
+		    queue = next_queue;
+		  }
+		in_bundle = false;
+	      }
+	    else if (GET_MODE (insn) == SImode)
+	      in_bundle = true;
+	  }
+	else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+	  {
+	    if (in_bundle)
+	      {
+		rtx prev = PREV_INSN (insn);
+		PREV_INSN (next) = prev;
+		NEXT_INSN (prev) = next;
+
+		PREV_INSN (insn) = queue;
+		queue = insn;
+	      }
+	  }
+      }
+  }
+}
+
+
+/* Perform machine dependent operations on the rtl chain INSNS.  */
+static void
+tilegx_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it
+     now.  */
+  compute_bb_for_insn ();
+
+  if (flag_reorder_blocks_and_partition)
+    {
+      tilegx_fixup_pcrel_references ();
+    }
+
+  if (flag_schedule_insns_after_reload)
+    {
+      split_all_insns ();
+
+      timevar_push (TV_SCHED2);
+      schedule_insns ();
+      timevar_pop (TV_SCHED2);
+
+      /* Examine the schedule to group into bundles.  */
+      tilegx_gen_bundles ();
+    }
+
+  df_analyze ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      reorder_var_tracking_notes ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+
+  df_finish_pass (false);
+}
+
+
+
+/* Assembly  */
+
+/* Select a format to encode pointers in exception handling data.
+   CODE is 0 for data, 1 for code labels, 2 for function pointers.
+   GLOBAL is true if the symbol may be affected by dynamic
+   relocations.  */
+int
+tilegx_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
+{
+  int type = TARGET_32BIT ? DW_EH_PE_sdata4 : DW_EH_PE_sdata8;
+  return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+}
+
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  */
+static void
+tilegx_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			tree function)
+{
+  rtx this_rtx, insn, funexp, addend;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in $1.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 0);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (!(delta >= -32868 && delta <= 32767))
+    {
+      addend = gen_rtx_REG (Pmode, 29);
+      emit_move_insn (addend, GEN_INT (delta));
+    }
+  else
+    addend = GEN_INT (delta);
+
+  if (TARGET_32BIT)
+    emit_insn (gen_addsi3 (this_rtx, this_rtx, addend));
+  else
+    emit_insn (gen_adddi3 (this_rtx, this_rtx, addend));
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset)
+    {
+      rtx tmp;
+
+      tmp = gen_rtx_REG (Pmode, 29);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      if (!(vcall_offset >= -32868 && vcall_offset <= 32767))
+	{
+	  addend = gen_rtx_REG (Pmode, 28);
+	  emit_move_insn (addend, GEN_INT (vcall_offset));
+	}
+      else
+	addend = GEN_INT (vcall_offset);
+
+      if (TARGET_32BIT)
+	emit_insn (gen_addsi3 (tmp, tmp, addend));
+      else
+	emit_insn (gen_adddi3 (tmp, tmp, addend));
+
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+
+      if (TARGET_32BIT)
+	emit_insn (gen_addsi3 (this_rtx, this_rtx, tmp));
+      else
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.
+
+     We don't currently bundle, but the instruciton sequence is all
+     serial except for the tail call, so we're only wasting one cycle.
+   */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Stop pretending to be a post-reload pass.  */
+  reload_completed = 0;
+}
+
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+static void
+tilegx_asm_trampoline_template (FILE *file)
+{
+  int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
+  if (TARGET_32BIT)
+    {
+      fprintf (file, "\tlnk      r10\n");
+      fprintf (file, "\taddxi    r10, r10, 32\n");
+      fprintf (file, "\tld4s_add r11, r10, %d\n", ptr_mode_size);
+      fprintf (file, "\tld4s     r10, r10\n");
+      fprintf (file, "\tjr       r11\n");
+      fprintf (file, "\t.word 0 # <function address>\n");
+      fprintf (file, "\t.word 0 # <static chain value>\n");
+    }
+  else
+    {
+      fprintf (file, "\tlnk      r10\n");
+      fprintf (file, "\taddi     r10, r10, 32\n");
+      fprintf (file, "\tld_add   r11, r10, %d\n", ptr_mode_size);
+      fprintf (file, "\tld       r10, r10\n");
+      fprintf (file, "\tjr       r11\n");
+      fprintf (file, "\t.quad 0 # <function address>\n");
+      fprintf (file, "\t.quad 0 # <static chain value>\n");
+    }
+}
+
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+static void
+tilegx_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx fnaddr, chaddr;
+  rtx mem;
+  rtx begin_addr, end_addr;
+  int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
+
+  fnaddr = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
+  chaddr = copy_to_reg (static_chain);
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, ptr_mode,
+			TRAMPOLINE_SIZE - 2 * ptr_mode_size);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, ptr_mode,
+			TRAMPOLINE_SIZE - ptr_mode_size);
+  emit_move_insn (mem, chaddr);
+
+  /* Get pointers to the beginning and end of the code block.  */
+  begin_addr = force_reg (Pmode, XEXP (m_tramp, 0));
+  end_addr = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0),
+					      TRAMPOLINE_SIZE));
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, 2, begin_addr, Pmode,
+		     end_addr, Pmode);
+}
+
+
+/* Implement TARGET_PRINT_OPERAND.  */
+static void
+tilegx_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'c':
+      /* Print the compare operator opcode for conditional moves.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("z", file);
+	  break;
+	case NE:
+	  fputs ("nz", file);
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%c operand");
+	}
+      return;
+
+    case 'C':
+      /* Print the compare operator opcode for conditional moves.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("nz", file);
+	  break;
+	case NE:
+	  fputs ("z", file);
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%C operand");
+	}
+      return;
+
+    case 'd':
+      {
+	/* Print the compare operator opcode for conditional moves.  */
+	switch (GET_CODE (x))
+	  {
+	  case EQ:
+	    fputs ("eq", file);
+	    break;
+	  case NE:
+	    fputs ("ne", file);
+	    break;
+	  default:
+	    output_operand_lossage ("invalid %%d operand");
+	  }
+	return;
+      }
+
+    case 'D':
+      {
+	/* Print the compare operator opcode for conditional moves.  */
+	switch (GET_CODE (x))
+	  {
+	  case EQ:
+	    fputs ("ne", file);
+	    break;
+	  case NE:
+	    fputs ("eq", file);
+	    break;
+	  default:
+	    output_operand_lossage ("invalid %%D operand");
+	  }
+	return;
+      }
+
+    case 'H':
+      {
+      if (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == UNSPEC)
+	{
+	  rtx addr = XVECEXP (XEXP (x, 0), 0, 0);
+	  int unspec = XINT (XEXP (x, 0), 1);
+	  const char *opstr = NULL;
+	  switch (unspec)
+	    {
+	    case UNSPEC_HW0:
+	    case UNSPEC_HW0_PCREL:
+	      opstr = "hw0";
+	      break;
+	    case UNSPEC_HW1:
+	    case UNSPEC_HW1_PCREL:
+	      opstr = "hw1";
+	      break;
+	    case UNSPEC_HW2:
+	      opstr = "hw2";
+	      break;
+	    case UNSPEC_HW3:
+	      opstr = "hw3";
+	      break;
+	    case UNSPEC_HW0_LAST:
+	      opstr = "hw0_last";
+	      break;
+	    case UNSPEC_HW1_LAST:
+	    case UNSPEC_HW1_LAST_PCREL:
+	      opstr = "hw1_last";
+	      break;
+	    case UNSPEC_HW2_LAST:
+	    case UNSPEC_HW2_LAST_PCREL:
+	      opstr = "hw2_last";
+	      break;
+	    case UNSPEC_HW0_GOT:
+	      opstr = "hw0_got";
+	      break;
+	    case UNSPEC_HW0_LAST_GOT:
+	      opstr = "hw0_last_got";
+	      break;
+	    case UNSPEC_HW1_LAST_GOT:
+	      opstr = "hw1_last_got";
+	      break;
+	    case UNSPEC_HW0_TLS_GD:
+	      opstr = "hw0_tls_gd";
+	      break;
+	    case UNSPEC_HW1_LAST_TLS_GD:
+	      opstr = "hw1_last_tls_gd";
+	      break;
+	    case UNSPEC_HW0_TLS_IE:
+	      opstr = "hw0_tls_ie";
+	      break;
+	    case UNSPEC_HW1_LAST_TLS_IE:
+	      opstr = "hw1_last_tls_ie";
+	      break;
+	    case UNSPEC_HW0_TLS_LE:
+	      opstr = "hw0_tls_le";
+	      break;
+	    case UNSPEC_HW1_LAST_TLS_LE:
+	      opstr = "hw1_last_tls_le";
+	      break;
+	    case UNSPEC_HW0_PLT_PCREL:
+	      opstr = "hw0_plt";
+	      break;
+	    case UNSPEC_HW1_PLT_PCREL:
+	      opstr = "hw1_plt";
+	      break;
+	    case UNSPEC_HW1_LAST_PLT_PCREL:
+	      opstr = "hw1_last_plt";
+	      break;
+	    case UNSPEC_HW2_LAST_PLT_PCREL:
+	      opstr = "hw2_last_plt";
+	      break;
+	    default:
+	      output_operand_lossage ("invalid %%H specifier");
+	    }
+
+	  fputs (opstr, file);
+	  fputc ('(', file);
+	  output_addr_const (file, addr);
+
+	  if (unspec == UNSPEC_HW0_PCREL
+	      || unspec == UNSPEC_HW1_PCREL
+	      || unspec == UNSPEC_HW1_LAST_PCREL
+	      || unspec == UNSPEC_HW2_LAST_PCREL
+	      || unspec == UNSPEC_HW0_PLT_PCREL
+	      || unspec == UNSPEC_HW1_PLT_PCREL
+	      || unspec == UNSPEC_HW1_LAST_PLT_PCREL
+	      || unspec == UNSPEC_HW2_LAST_PLT_PCREL)
+	    {
+	      rtx addr2 = XVECEXP (XEXP (x, 0), 0, 1);
+	      fputs (" - " , file);
+	      output_addr_const (file, addr2);
+	    }
+
+	  fputc (')', file);
+	  return;
+	}
+      else if (symbolic_operand (x, VOIDmode))
+	{
+	  output_addr_const (file, x);
+	  return;
+	}
+      }
+      /* FALLTHRU */
+
+    case 'h':
+      {
+	/* Print the low 16 bits of a constant.  */
+	HOST_WIDE_INT i;
+	if (CONST_INT_P (x))
+	  i = INTVAL (x);
+	else if (GET_CODE (x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else
+	  {
+	    output_operand_lossage ("invalid %%h operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i, HImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 'I':
+      /* Print an auto-inc memory operand.  */
+      if (!MEM_P (x))
+	{
+	  output_operand_lossage ("invalid %%I operand");
+	  return;
+	}
+
+      output_memory_reference_mode = GET_MODE (x);
+      output_memory_autoinc_first = true;
+      output_address (XEXP (x, 0));
+      output_memory_reference_mode = VOIDmode;
+      return;
+
+    case 'i':
+      /* Print an auto-inc memory operand.  */
+      if (!MEM_P (x))
+	{
+	  output_operand_lossage ("invalid %%i operand");
+	  return;
+	}
+
+      output_memory_reference_mode = GET_MODE (x);
+      output_memory_autoinc_first = false;
+      output_address (XEXP (x, 0));
+      output_memory_reference_mode = VOIDmode;
+      return;
+
+    case 'j':
+      {
+	/* Print the low 8 bits of a constant.  */
+	HOST_WIDE_INT i;
+	if (CONST_INT_P (x))
+	  i = INTVAL (x);
+	else if (GET_CODE (x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else if (GET_CODE (x) == CONST_VECTOR
+		 && CONST_INT_P (CONST_VECTOR_ELT (x, 0)))
+	  i = INTVAL (CONST_VECTOR_ELT (x, 0));
+	else
+	  {
+	    output_operand_lossage ("invalid %%j operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i, QImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 'P':
+      {
+	/* Print a constant plus one.  */
+	if (!CONST_INT_P (x))
+	  {
+	    output_operand_lossage ("invalid %%P operand");
+	    return;
+	  }
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + 1);
+	return;
+      }
+
+    case 'm':
+    case 'M':
+      {
+	/* Print a bfextu-style bit range.  */
+	int first_bit, last_bit;
+	HOST_WIDE_INT flip = (code == 'm') ? ~0 : 0;
+
+	if (!CONST_INT_P (x)
+	    || !tilegx_bitfield_operand_p (INTVAL (x) ^ flip,
+					   &first_bit, &last_bit))
+	  {
+	    output_operand_lossage ("invalid %%%c operand", code);
+	    return;
+	  }
+
+	fprintf (file, "%d, %d", first_bit, last_bit);
+	return;
+      }
+
+    case 'N':
+      {
+	const char *reg = NULL;
+
+	/* Print a network register.  */
+	if (!CONST_INT_P (x))
+	  {
+	    output_operand_lossage ("invalid %%N operand");
+	    return;
+	  }
+
+	switch (INTVAL (x))
+	  {
+	  case TILEGX_NETREG_IDN0: reg = "idn0"; break;
+	  case TILEGX_NETREG_IDN1: reg = "idn1"; break;
+	  case TILEGX_NETREG_UDN0: reg = "udn0"; break;
+	  case TILEGX_NETREG_UDN1: reg = "udn1"; break;
+	  case TILEGX_NETREG_UDN2: reg = "udn2"; break;
+	  case TILEGX_NETREG_UDN3: reg = "udn3"; break;
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	fprintf (file, reg);
+	return;
+      }
+
+    case 'p':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
+	    fprintf (file, "plt(");
+	  output_addr_const (file, x);
+	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
+	    fprintf (file, ")");
+	}
+      else
+	output_addr_const (file, x);
+      return;
+
+    case 'r':
+      /* In this case we need a register.  Use 'zero' if the operand
+	 is const0_rtx.  */
+      if (x == const0_rtx
+	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
+	{
+	  fputs ("zero", file);
+	  return;
+	}
+      else if (!REG_P (x))
+	{
+	  output_operand_lossage ("invalid operand for 'r' specifier");
+	  return;
+	}
+      /* FALLTHRU */
+
+    case 0:
+      if (REG_P (x))
+	{
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  return;
+	}
+      else if (MEM_P (x))
+	{
+	  output_memory_reference_mode = VOIDmode;
+	  output_address (XEXP (x, 0));
+	  return;
+	}
+      else
+	{
+	  output_addr_const (file, x);
+	  return;
+	}
+    }
+
+  debug_rtx (x);
+  output_operand_lossage ("unable to print out operand yet; code == %d (%c)",
+			  code, code);
+}
+
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+static void
+tilegx_print_operand_address (FILE *file, rtx addr)
+{
+  if (GET_CODE (addr) == POST_DEC
+      || GET_CODE (addr) == POST_INC)
+    {
+      int offset = GET_MODE_SIZE (output_memory_reference_mode);
+
+      gcc_assert (output_memory_reference_mode != VOIDmode);
+
+      if (output_memory_autoinc_first)
+	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
+      else
+	fprintf (file, "%d",
+		 GET_CODE (addr) == POST_DEC ? -offset : offset);
+    }
+  else if (GET_CODE (addr) == POST_MODIFY)
+    {
+      gcc_assert (output_memory_reference_mode != VOIDmode);
+
+      gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
+
+      if (output_memory_autoinc_first)
+	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 INTVAL (XEXP (XEXP (addr, 1), 1)));
+    }
+  else
+    tilegx_print_operand (file, addr, 'r');
+}
+
+
+/* Machine mode of current insn, for determining curly brace
+   placement.  */
+static enum machine_mode insn_mode;
+
+
+/* Implement FINAL_PRESCAN_INSN.  This is used to emit bundles.  */
+void
+tilegx_final_prescan_insn (rtx insn)
+{
+  /* Record this for tilegx_asm_output_opcode to examine.  */
+  insn_mode = GET_MODE (insn);
+}
+
+
+/* While emitting asm, are we currently inside '{' for a bundle?  */
+static bool tilegx_in_bundle = false;
+
+/* Implement ASM_OUTPUT_OPCODE.  Prepend/append curly braces as
+   appropriate given the bundling information recorded by
+   tilegx_gen_bundles.  */
+const char *
+tilegx_asm_output_opcode (FILE *stream, const char *code)
+{
+  bool pseudo = !strcmp (code, "pseudo");
+
+  if (!tilegx_in_bundle && insn_mode == SImode)
+    {
+      /* Start a new bundle.  */
+      fprintf (stream, "{\n\t");
+      tilegx_in_bundle = true;
+    }
+
+  if (tilegx_in_bundle && insn_mode == QImode)
+    {
+      /* Close an existing bundle.  */
+      static char buf[100];
+
+      gcc_assert (strlen (code) + 3 + 1 < sizeof (buf));
+
+      strcpy (buf, pseudo ? "" : code);
+      strcat (buf, "\n\t}");
+      tilegx_in_bundle = false;
+
+      return buf;
+    }
+  else
+    {
+      return pseudo ? "" : code;
+    }
+}
+
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+void
+tilegx_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+  if (tilegx_in_bundle)
+    {
+      fprintf (file, "\t}\n");
+    }
+
+  if (flag_pic)
+    {
+      fprintf (file,
+	       "\t{\n"
+	       "\tmove\tr10, lr\n"
+	       "\tjal\tplt(%s)\n"
+	       "\t}\n", MCOUNT_NAME);
+    }
+  else
+    {
+      fprintf (file,
+	       "\t{\n"
+	       "\tmove\tr10, lr\n"
+	       "\tjal\t%s\n"
+	       "\t}\n", MCOUNT_NAME);
+    }
+
+  tilegx_in_bundle = false;
+}
+
+
+/* Implement TARGET_ASM_FILE_END.  */
+static void
+tilegx_file_end (void)
+{
+  if (NEED_INDICATE_EXEC_STACK)
+    file_end_indicate_exec_stack ();
+}
+
+
+
+#undef  TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE tilegx_option_override
+
+#undef  TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P tilegx_scalar_mode_supported_p
+
+#undef  TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P tilegx_vector_mode_supported_p
+
+#undef  TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM tilegx_cannot_force_const_mem
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL tilegx_function_ok_for_sibcall
+
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE tilegx_pass_by_reference
+
+#undef  TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB tilegx_return_in_msb
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY tilegx_return_in_memory
+
+#undef  TARGET_MODE_REP_EXTENDED
+#define TARGET_MODE_REP_EXTENDED tilegx_mode_rep_extended
+
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY tilegx_function_arg_boundary
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG tilegx_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE tilegx_function_arg_advance
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE tilegx_function_value
+
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE tilegx_libcall_value
+
+#undef  TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P tilegx_function_value_regno_p
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
+
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST tilegx_build_builtin_va_list
+
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START tilegx_va_start
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS tilegx_setup_incoming_varargs
+
+#undef  TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR tilegx_gimplify_va_arg_expr
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS tilegx_rtx_costs
+
+#undef  TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK tilegx_expand_to_rtl_hook
+
+#undef  TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK tilegx_shift_truncation_mask
+
+#undef  TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS tilegx_init_libfuncs
+
+/* Limit to what we can reach in one addli.  */
+#undef  TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -32768
+#undef  TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 32767
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P tilegx_legitimate_constant_p
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P tilegx_legitimate_address_p
+
+#undef  TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS tilegx_legitimize_address
+
+#undef  TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS tilegx_delegitimize_address
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  tilegx_init_builtins
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL tilegx_builtin_decl
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN tilegx_expand_builtin
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE tilegx_conditional_register_usage
+
+#undef  TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED tilegx_frame_pointer_required
+
+#undef  TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+#undef  TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE tilegx_issue_rate
+
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST tilegx_sched_adjust_cost
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG tilegx_reorg
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK tilegx_output_mi_thunk
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE tilegx_asm_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT tilegx_trampoline_init
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND tilegx_print_operand
+
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS tilegx_print_operand_address
+
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END tilegx_file_end
+
+#undef  TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+#undef  TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-tilegx.h"
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx.h b/gcc-4.9/gcc/config/tilegx/tilegx.h
new file mode 100644
index 000000000..f241fe4f2
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx.h
@@ -0,0 +1,556 @@
+/* Definitions of target machine for GNU compiler for TILE-Gx.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Default target_flags if no switches are specified  */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+#endif
+
+#ifndef TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_BIG_ENDIAN_DEFAULT 0
+#endif
+
+#ifndef TARGET_ENDIAN_DEFAULT
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT MASK_BIG_ENDIAN
+#else
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+#endif
+
+/* This is used by tilegx_cpu_cpp_builtins to indicate the byte order
+   we're compiling for.  */
+#define TILEGX_CPU_CPP_ENDIAN_BUILTINS()	\
+  do						\
+    {						\
+      if (TARGET_BIG_ENDIAN)			\
+	builtin_define ("__BIG_ENDIAN__");	\
+      else					\
+	builtin_define ("__LITTLE_ENDIAN__");	\
+    }						\
+  while (0)
+
+#include "config/tilegx/tilegx-opts.h"
+
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS() \
+  tilegx_cpu_cpp_builtins (pfile)
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_32BIT ? "int" : "long int")
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_32BIT ? "unsigned int" : "long unsigned int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Target machine storage layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define FLOAT_WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define UNITS_PER_WORD 8
+#define PARM_BOUNDARY BITS_PER_WORD
+#define STACK_BOUNDARY 128
+#define FUNCTION_BOUNDARY 64
+#define BIGGEST_ALIGNMENT 128
+#define STRICT_ALIGNMENT 1
+
+#define INT_TYPE_SIZE         32
+#define LONG_TYPE_SIZE        (TARGET_32BIT ? 32 : 64)
+#define LONG_LONG_TYPE_SIZE   64
+#define FLOAT_TYPE_SIZE       32
+#define DOUBLE_TYPE_SIZE      64
+#define LONG_DOUBLE_TYPE_SIZE 64
+#define POINTER_SIZE          LONG_TYPE_SIZE
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#define FASTEST_ALIGNMENT 64
+#define BIGGEST_FIELD_ALIGNMENT 128
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Unaligned moves trap and are very slow.  */
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Make strings word-aligned so strcpy from constants will be
+   faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make local arrays of chars word-aligned for the same reasons.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
+
+
+/* Standard register usage.  */
+
+#define FIRST_PSEUDO_REGISTER (64 + 4)
+
+#define FIXED_REGISTERS \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1}
+#define CALL_USED_REGISTERS \
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1}
+
+#define CALL_REALLY_USED_REGISTERS \
+ CALL_USED_REGISTERS
+
+#define REG_ALLOC_ORDER {				\
+      10, 11, 12, 13, 14, /* call used */		\
+      15, 16, 17, 18, 19,				\
+      20, 21, 22, 23, 24,				\
+      25, 26, 27, 28, 29,				\
+							\
+      9, 8, 7, 6, 5,      /* argument */		\
+      4, 3, 2, 1, 0,					\
+							\
+      55,	          /* return address */		\
+							\
+      30, 31, 32, 33, 34, /* call saved registers */	\
+      35, 36, 37, 38, 39,				\
+      40, 41, 42, 43, 44,				\
+      45, 46, 47, 48, 49,				\
+      50, 51,						\
+							\
+      52, 		  /* hard frame pointer */	\
+      53, 54, 		  /* tp, sp */			\
+							\
+      56, 57, 58, 59, 60, /* special purpose */		\
+      61, 62, 63, 64, 65, /* or fake registers */	\
+      66, 67						\
+}
+
+#define HARD_REGNO_NREGS(REGNO, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+#define MODES_TIEABLE_P(MODE1, MODE2)  1
+
+/* Register that holds an address into the text segment that can be
+   used by pic code.  */
+#define TILEGX_PIC_TEXT_LABEL_REGNUM (flag_pic ? 50 : INVALID_REGNUM)
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? 51 : INVALID_REGNUM)
+#define HARD_FRAME_POINTER_REGNUM 52
+#define THREAD_POINTER_REGNUM 53
+#define STACK_POINTER_REGNUM 54
+#define TILEGX_LINK_REGNUM 55
+#define FRAME_POINTER_REGNUM 64
+#define ARG_POINTER_REGNUM 65
+/* SPR storing the comparison value for compare and exchange.  */
+#define TILEGX_CMPEXCH_REGNUM 66
+/* Pseudo registers used to enforce order between instructions that
+   touch the networks.  */
+#define TILEGX_NETORDER_REGNUM 67
+#define STATIC_CHAIN_REGNUM 10
+
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  R1_REGS,
+  R2_REGS,
+  R3_REGS,
+  R4_REGS,
+  R5_REGS,
+  R6_REGS,
+  R7_REGS,
+  R8_REGS,
+  R9_REGS,
+  R10_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Since GENERAL_REGS is the same class as ALL_REGS, don't give it a 
+   different class number; just make it an alias.  */
+#define GENERAL_REGS ALL_REGS
+
+#define REG_CLASS_NAMES	\
+  { \
+    "NO_REGS", \
+    "R0_REGS", \
+    "R1_REGS", \
+    "R2_REGS", \
+    "R3_REGS", \
+    "R4_REGS", \
+    "R5_REGS", \
+    "R6_REGS", \
+    "R7_REGS", \
+    "R8_REGS", \
+    "R9_REGS", \
+    "R10_REGS", \
+    "ALL_REGS" \
+  }
+
+#define REG_CLASS_CONTENTS \
+  { \
+    { 0 }, \
+    { 1 << 0 }, \
+    { 1 << 1 }, \
+    { 1 << 2 }, \
+    { 1 << 3 }, \
+    { 1 << 4 }, \
+    { 1 << 5 }, \
+    { 1 << 6 }, \
+    { 1 << 7 }, \
+    { 1 << 8 }, \
+    { 1 << 9 }, \
+    { 1 << 10 }, \
+    { 0xffffffff, 0xffffffff } \
+  }
+
+#define REGNO_REG_CLASS(REGNO) \
+  ((unsigned)(REGNO) <= 10 ? \
+   (enum reg_class)(R0_REGS + (REGNO)) : ALL_REGS)
+
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS ALL_REGS
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)  (CLASS)
+
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+#define STARTING_FRAME_OFFSET 0
+
+#define DYNAMIC_CHAIN_ADDRESS(FRAME) \
+  plus_constant (Pmode, (FRAME), UNITS_PER_WORD)
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+#define INCOMING_FRAME_SP_OFFSET 0
+
+#define STACK_POINTER_OFFSET (2 * UNITS_PER_WORD)
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL) (-STACK_POINTER_OFFSET)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* The first 10 registers may hold return value.  */
+#define TILEGX_NUM_RETURN_REGS 10
+
+/* The first 10 registers hold function arguments.  */
+#define TILEGX_NUM_ARG_REGS 10
+
+#define FUNCTION_ARG_REGNO_P(N) ((N) < TILEGX_NUM_ARG_REGS)
+
+/* The type used to store the number of words of arguments scanned so
+   far during argument scanning.  This includes any space that is
+   skipped.  */
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+
+#define ELIMINABLE_REGS					\
+  {{ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},		\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = tilegx_initial_elimination_offset((FROM),(TO)))
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  tilegx_function_profiler (FILE, LABELNO)
+
+#define TRAMPOLINE_SIZE (TARGET_32BIT ? 48 : 56)
+#define TRAMPOLINE_ALIGNMENT 64
+#define TRAMPOLINE_SECTION text_section
+
+
+/* Call frame debugging information.  */
+
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, TILEGX_LINK_REGNUM)
+
+#define RETURN_ADDR_RTX tilegx_return_addr
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (TILEGX_LINK_REGNUM)
+
+#define DWARF_ZERO_REG 63
+
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N + 12) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 11)
+#define EH_RETURN_HANDLER_RTX tilegx_eh_return_handler_rtx ()
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+  tilegx_asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_POST_MODIFY_DISP 1
+
+#define REGNO_OK_FOR_INDEX_P(regno) 0
+#define REGNO_OK_FOR_BASE_P(regno)	\
+  ((regno) < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0)
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define CONSTANT_ADDRESS_P(X) 0
+
+#define LEGITIMATE_PIC_OPERAND_P(X) tilegx_legitimate_pic_operand_p (X)
+
+
+#define CASE_VECTOR_MODE Pmode
+#define CASE_VECTOR_PC_RELATIVE 0
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#define MOVE_MAX UNITS_PER_WORD
+
+/* Use a value of 11 for MOVE_RATIO and friends, because TILEPro
+   returns structs as large as 10 words in registers.  Because of some
+   some code generation inefficiency, we never get smaller code for
+   turning that into a memcpy, so pick a value that guarantees this
+   doesn't happen.  */
+#define TILEGX_CALL_RATIO 11
+#define MOVE_RATIO(speed) ((speed) ? 15 : TILEGX_CALL_RATIO)
+#define CLEAR_RATIO(speed) ((speed) ? 15 : TILEGX_CALL_RATIO)
+#define SET_RATIO(speed) ((speed) ? 15 : TILEGX_CALL_RATIO)
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND)
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    {                                           \
+      if ((MODE) == SImode)                     \
+        (UNSIGNEDP) = 0;                        \
+      (MODE) = DImode;                          \
+    }
+
+/* Define SLOW_BYTE_ACCESS to avoid making a QI or HI mode
+   register.  */
+#define SLOW_BYTE_ACCESS 1
+
+#define SHIFT_COUNT_TRUNCATED 0
+
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* We represent all SI values as sign-extended DI values in
+   registers.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) \
+  ((INPREC) <= 32 || (OUTPREC) > 32)
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+
+#define Pmode (TARGET_32BIT ? SImode : DImode)
+
+#define STACK_SIZE_MODE Pmode
+
+#define STORE_FLAG_VALUE 1
+
+#define FUNCTION_MODE DImode
+
+#define NO_FUNCTION_CSE 1
+
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = tilegx_adjust_insn_length ((INSN), (LENGTH)))
+
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+#define BRANCH_COST(speed_p, predictable_p) ((predictable_p) ? 2 : 6)
+
+
+/* Control the assembler format that we output.  */
+
+#undef NO_DOLLAR_IN_LABEL
+
+#define ASM_COMMENT_START "##"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata, \"a\""
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss, \"wa\""
+
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section\t.init, \"ax\""
+
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini, \"ax\""
+
+#define GLOBAL_ASM_OP ".global "
+
+#define SUPPORTS_WEAK 1
+
+#define USER_LABEL_PREFIX ""
+
+#define REGISTER_PREFIX ""
+#define REGISTER_NAMES                                                  \
+  { "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",       \
+    "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",      \
+    "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",      \
+    "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",      \
+    "r32",  "r33",  "r34",  "r35",  "r36",  "r37",  "r38",  "r39",      \
+    "r40",  "r41",  "r42",  "r43",  "r44",  "r45",  "r46",  "r47",      \
+    "r48",  "r49",  "r50",  "r51",  "r52",  "tp",   "sp",   "lr",       \
+    "?r56?","idn0", "idn1", "udn0", "udn1", "udn2", "udn3", "zero",     \
+    "?FRAME?", "?ARG?", "?CMPEXCH?", "?NET?" }
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  tilegx_final_prescan_insn (insn)
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
+  (PTR = tilegx_asm_output_opcode (STREAM, PTR))
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)			\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));	\
+      fprintf (FILE, "%s ",					\
+               integer_asm_op (GET_MODE_SIZE (Pmode), TRUE));	\
+      assemble_name (FILE, label);				\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)	\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));	\
+      fprintf (FILE, "%s ", 					\
+               integer_asm_op (GET_MODE_SIZE (Pmode), TRUE));	\
+      assemble_name (FILE, label);				\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL));		\
+      fprintf (FILE, "-");					\
+      assemble_name (FILE, label);				\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)  \
+  do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0)
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".comm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (unsigned int)(ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".lcomm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (unsigned int)(ROUNDED)))
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)		\
+static void __attribute__((__used__))				\
+call_ ## FUNC (void)						\
+{								\
+  asm (SECTION_OP);						\
+  asm ("{ moveli r0, hw2_last(" #FUNC " - . - 8); lnk r1 }\n");	\
+  asm ("shl16insli r0, r0, hw1(" #FUNC " - .)\n");		\
+  asm ("shl16insli r0, r0, hw0(" #FUNC " - . + 8)\n");		\
+  asm ("add r0, r1, r0\n");					\
+  asm ("jalr r0\n");						\
+  asm (TEXT_SECTION_ASM_OP);					\
+}
+
+
+
+#define INIT_EXPANDERS tilegx_init_expanders ()
+
+/* A C structure for machine-specific, per-function data.  This is
+   added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Symbol for the text label used for pic.  */
+  rtx text_label_symbol;
+
+  /* Register for the text label.  */
+  rtx text_label_rtx;
+
+  /* Register for the pic offset table.  */
+  rtx got_rtx;
+
+  /* The function calls tls_get_addr.  */
+  int calls_tls_get_addr;
+} machine_function;
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#ifndef ENDIAN_SPEC
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define ENDIAN_SPEC \
+  "%{!mlittle-endian:-EB} \
+   %{mlittle-endian:%{mbig-endian: \
+     %e-mbig-endian and -mlittle-endian may not be used together}-EL}"
+#else
+#define ENDIAN_SPEC \
+  "%{!mbig-endian:-EL} \
+   %{mbig-endian:%{mlittle-endian: \
+    %e-mbig-endian and -mlittle-endian may not be used together}-EB}"
+#endif
+#endif
+
+#define EXTRA_SPECS		\
+  { "endian_spec", ENDIAN_SPEC }
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx.md b/gcc-4.9/gcc/config/tilegx/tilegx.md
new file mode 100644
index 000000000..94946bb9b
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx.md
@@ -0,0 +1,5630 @@
+;; Machine description for Tilera TILE-Gx chip for GCC.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants [
+  ;;
+  ;; The following represent intrinsic insns, organized by latency.
+  ;;
+
+  ;; single cycle
+  (UNSPEC_INSN_ADDR_SHL16INSLI         1)
+  (UNSPEC_INSN_BFEXTS                  2)
+  (UNSPEC_INSN_BFEXTU                  3)
+  (UNSPEC_INSN_BFINS                   4)
+  (UNSPEC_INSN_CRC32_32                5)
+  (UNSPEC_INSN_CRC32_8                 6)
+  (UNSPEC_INSN_DBLALIGN                7)
+  (UNSPEC_INSN_DBLALIGN2               8)
+  (UNSPEC_INSN_DBLALIGN4               9)
+  (UNSPEC_INSN_DBLALIGN6               10)
+  (UNSPEC_INSN_DRAIN                   11)
+  (UNSPEC_INSN_DTLBPR                  12)
+  (UNSPEC_INSN_FINV                    13)
+  (UNSPEC_INSN_FLUSH                   14)
+  (UNSPEC_INSN_FLUSHWB                 15)
+  (UNSPEC_INSN_FNOP                    16)
+  (UNSPEC_INSN_ICOH                    17)
+  (UNSPEC_INSN_ILL                     18)
+  (UNSPEC_INSN_INFO                    19)
+  (UNSPEC_INSN_INFOL                   20)
+  (UNSPEC_INSN_INV                     21)
+  (UNSPEC_INSN_LNK                     22)
+  (UNSPEC_INSN_MFSPR                   23)
+  (UNSPEC_INSN_MM                      24)
+  (UNSPEC_INSN_MTSPR                   25)
+  (UNSPEC_INSN_NAP                     26)
+  (UNSPEC_INSN_PREFETCH_L1_FAULT       27)
+  (UNSPEC_INSN_PREFETCH_L2_FAULT       28)
+  (UNSPEC_INSN_PREFETCH_L3_FAULT       29)
+  (UNSPEC_INSN_REVBITS                 30)
+  (UNSPEC_INSN_SHUFFLEBYTES            31)
+  (UNSPEC_INSN_TBLIDXB0                32)
+  (UNSPEC_INSN_TBLIDXB1                33)
+  (UNSPEC_INSN_TBLIDXB2                34)
+  (UNSPEC_INSN_TBLIDXB3                35)
+  (UNSPEC_INSN_V1AVGU                  36)
+  (UNSPEC_INSN_V2AVGS                  37)
+  (UNSPEC_INSN_WH64                    38)
+
+  ;; 2 cycles
+  (UNSPEC_INSN_CMUL                    100)
+  (UNSPEC_INSN_CMULA                   101)
+  (UNSPEC_INSN_CMULAF                  102)
+  (UNSPEC_INSN_CMULFR                  103)
+  (UNSPEC_INSN_CMULHR                  104)
+  (UNSPEC_INSN_CMULF                   105)
+  (UNSPEC_INSN_CMULH                   106)
+  (UNSPEC_INSN_EXCH                    107)
+  (UNSPEC_INSN_FDOUBLE_ADDSUB          108)
+  (UNSPEC_INSN_FDOUBLE_ADD_FLAGS       109)
+  (UNSPEC_INSN_FDOUBLE_MUL_FLAGS       110)
+  (UNSPEC_INSN_FDOUBLE_PACK1           111)
+  (UNSPEC_INSN_FDOUBLE_PACK2           112)
+  (UNSPEC_INSN_FDOUBLE_SUB_FLAGS       113)
+  (UNSPEC_INSN_FDOUBLE_UNPACK_MAX      114)
+  (UNSPEC_INSN_FDOUBLE_UNPACK_MIN      115)
+  (UNSPEC_INSN_FETCHADDGEZ             116)
+  (UNSPEC_INSN_FSINGLE_ADD1            117)
+  (UNSPEC_INSN_FSINGLE_ADDSUB2         118)
+  (UNSPEC_INSN_FSINGLE_MUL1            119)
+  (UNSPEC_INSN_FSINGLE_MUL2            120)
+  (UNSPEC_INSN_FSINGLE_PACK1           121)
+  (UNSPEC_INSN_FSINGLE_PACK2           122)
+  (UNSPEC_INSN_FSINGLE_SUB1            123)
+  (UNSPEC_INSN_MULAX                   124)
+  (UNSPEC_INSN_MULA_HS_HS              125)
+  (UNSPEC_INSN_MULA_HS_HU              126)
+  (UNSPEC_INSN_MULA_HS_LS              127)
+  (UNSPEC_INSN_MULA_HS_LU              128)
+  (UNSPEC_INSN_MULA_HU_HU              129)
+  (UNSPEC_INSN_MULA_HU_LS              130)
+  (UNSPEC_INSN_MULA_HU_LU              131)
+  (UNSPEC_INSN_MULA_LS_LS              132)
+  (UNSPEC_INSN_MULA_LS_LU              133)
+  (UNSPEC_INSN_MULA_LU_LU              134)
+  (UNSPEC_INSN_MUL_HS_HS               135)
+  (UNSPEC_INSN_MUL_HS_HU               136)
+  (UNSPEC_INSN_MUL_HS_LS               137)
+  (UNSPEC_INSN_MUL_HS_LU               138)
+  (UNSPEC_INSN_MUL_HU_HU               139)
+  (UNSPEC_INSN_MUL_HU_LS               140)
+  (UNSPEC_INSN_MUL_HU_LU               141)
+  (UNSPEC_INSN_MUL_LS_LS               142)
+  (UNSPEC_INSN_MUL_LS_LU               143)
+  (UNSPEC_INSN_MUL_LU_LU               144)
+  (UNSPEC_INSN_V1ADIFFU                145)
+  (UNSPEC_INSN_V1DDOTPU                146)
+  (UNSPEC_INSN_V1DDOTPUA               147)
+  (UNSPEC_INSN_V1DDOTPUS               148)
+  (UNSPEC_INSN_V1DDOTPUSA              149)
+  (UNSPEC_INSN_V1DOTP                  150)
+  (UNSPEC_INSN_V1DOTPA                 151)
+  (UNSPEC_INSN_V1DOTPU                 152)
+  (UNSPEC_INSN_V1DOTPUA                153)
+  (UNSPEC_INSN_V1DOTPUS                154)
+  (UNSPEC_INSN_V1DOTPUSA               155)
+  (UNSPEC_INSN_V1SADAU                 156)
+  (UNSPEC_INSN_V1SADU                  157)
+  (UNSPEC_INSN_V2ADIFFS                158)
+  (UNSPEC_INSN_V2DOTP                  159)
+  (UNSPEC_INSN_V2DOTPA                 160)
+  (UNSPEC_INSN_V2MULFSC                161)
+  (UNSPEC_INSN_V2SADAS                 162)
+  (UNSPEC_INSN_V2SADAU                 163)
+  (UNSPEC_INSN_V2SADS                  164)
+  (UNSPEC_INSN_V2SADU                  165)
+
+  ;; 11 cycles
+  (UNSPEC_INSN_CMPEXCH                 200)
+
+  ;;
+  ;; The following are special insns.
+  ;;
+
+  ;; Blockage
+  (UNSPEC_BLOCKAGE                     201)
+
+  ;; Lnk and its label
+  (UNSPEC_LNK_AND_LABEL                202)
+
+  ;; Memory fence
+  (UNSPEC_MF		               203)
+
+  ;; Insns generating difference of two labels
+  (UNSPEC_MOV_PCREL_STEP3              204)
+  (UNSPEC_MOV_LARGE_PCREL_STEP4        205)
+
+  ;; Latency specifying loads.
+  (UNSPEC_LATENCY_L2                   206)
+  (UNSPEC_LATENCY_MISS                 207)
+
+  ;; A pseudo-op that prevents network operations from being ordered.
+  (UNSPEC_NETWORK_BARRIER              208)
+
+  ;; Operations that access network registers.
+  (UNSPEC_NETWORK_RECEIVE              209)
+  (UNSPEC_NETWORK_SEND                 210)
+
+  ;; Stack protector operations
+  (UNSPEC_SP_SET                       211)
+  (UNSPEC_SP_TEST                      212)
+
+  ;; This is used to move a value to a SPR.
+  (UNSPEC_SPR_MOVE                     213)
+
+  ;; A call to __tls_get_addr
+  (UNSPEC_TLS_GD_CALL                  214)
+
+  ;; An opaque TLS "add" operation for TLS general dynamic model
+  ;; access.
+  (UNSPEC_TLS_GD_ADD                   215)
+
+  ;; An opaque TLS "load" operation for TLS initial exec model access.
+  (UNSPEC_TLS_IE_LOAD                  216)
+
+  ;; An opaque TLS "add" operation for TLS access.
+  (UNSPEC_TLS_ADD                      217)
+
+  ;; Atomics
+  (UNSPEC_ATOMIC                       218)
+  (UNSPEC_CMPXCHG                      219)
+  (UNSPEC_XCHG                         220)
+
+  ;;
+  ;; The following are operands.
+  ;;
+  (UNSPEC_HW0                          300)
+  (UNSPEC_HW1                          301)
+  (UNSPEC_HW2                          302)
+  (UNSPEC_HW3                          303)
+  (UNSPEC_HW0_LAST                     304)
+  (UNSPEC_HW1_LAST                     305)
+  (UNSPEC_HW2_LAST                     306)
+
+  (UNSPEC_HW0_PCREL                    307)
+  (UNSPEC_HW1_PCREL                    308)
+  (UNSPEC_HW1_LAST_PCREL               309)
+  (UNSPEC_HW2_LAST_PCREL               310)
+
+  (UNSPEC_HW0_GOT                      311)
+  (UNSPEC_HW0_LAST_GOT                 312)
+  (UNSPEC_HW1_LAST_GOT                 313)
+
+  (UNSPEC_HW0_TLS_GD                   314)
+  (UNSPEC_HW1_LAST_TLS_GD              315)
+
+  (UNSPEC_HW0_TLS_IE                   316)
+  (UNSPEC_HW1_LAST_TLS_IE              317)
+
+  (UNSPEC_HW0_TLS_LE                   318)
+  (UNSPEC_HW1_LAST_TLS_LE              319)
+
+  (UNSPEC_HW0_PLT_PCREL                320)
+  (UNSPEC_HW1_PLT_PCREL                321)
+
+  (UNSPEC_HW1_LAST_PLT_PCREL           322)
+  (UNSPEC_HW2_LAST_PLT_PCREL           323)
+
+  ;; This is used to wrap around the addresses of non-temporal load/store
+  ;; intrinsics.
+  (UNSPEC_NON_TEMPORAL                 324)
+])
+
+;; Mark the last instruction of various latencies, used to
+;; determine the rtx costs of unspec insns.
+(define_constants [
+  (TILEGX_LAST_LATENCY_1_INSN           99)
+  (TILEGX_LAST_LATENCY_2_INSN          199)
+  (TILEGX_LAST_LATENCY_INSN            299)
+])
+
+(define_constants [
+  (TILEGX_NETREG_IDN0 0)
+  (TILEGX_NETREG_IDN1 1)
+  (TILEGX_NETREG_UDN0 2)
+  (TILEGX_NETREG_UDN1 3)
+  (TILEGX_NETREG_UDN2 4)
+  (TILEGX_NETREG_UDN3 5)
+])
+
+(define_constants [
+  (TILEGX_CMPEXCH_REG  66)
+  (TILEGX_NETORDER_REG 67)
+])
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+(include "tilegx-generic.md")
+
+;; Define an insn type attribute.  This defines what pipes things can go in.
+(define_attr "type"
+  "X0,X0_2cycle,X1,X1_branch,X1_2cycle,X1_L2,X1_remote,X1_miss,X01,Y0,Y0_2cycle,Y1,Y2,Y2_2cycle,Y2_L2,Y2_miss,Y01,cannot_bundle,cannot_bundle_3cycle,cannot_bundle_4cycle,nothing"
+  (const_string "Y01"))
+
+(define_attr "length" ""
+   (cond [(eq_attr "type" "X1_branch")
+	  (if_then_else
+	   (and (le (minus (match_dup 0) (pc)) (const_int 524280))
+		(le (minus (pc) (match_dup 0)) (const_int 524288)))
+	   (const_int 8)
+	   (const_int 16))
+	  ]
+	 (const_int 8)))
+
+
+;; Define some iterators.
+(define_mode_iterator IVMODE [SI DI V8QI V4HI V2SI])
+(define_mode_iterator IVNMODE [SI V8QI V4HI V2SI])
+(define_mode_iterator I48MODE [SI DI])
+(define_mode_iterator I48MODE2 [SI DI])
+(define_mode_iterator I124MODE [QI HI SI])
+(define_mode_iterator FI48MODE [SF DF SI DI])
+(define_mode_iterator VEC48MODE [V8QI V4HI])
+(define_mode_iterator VEC248MODE [V8QI V4HI V2SI])
+
+(define_mode_attr n [(QI "1") (HI "2") (SI "4") (DI "")
+		     (V8QI "1") (V4HI "2") (V2SI "4")])
+(define_mode_attr x [(SI "x") (DI "")])
+(define_mode_attr bitsuffix [(SI "_32bit") (DI "")])
+(define_mode_attr four_if_si [(SI "4") (DI "")])
+(define_mode_attr four_s_if_si [(SI "4s") (DI "")])
+(define_mode_attr nbits [(SI "5") (DI "6")])
+(define_mode_attr shift_pipe [(SI "X01") (DI "*")])
+
+;; Code iterator for either extend.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; Code iterator for all three shifts.
+(define_code_iterator any_shift [ashift ashiftrt lshiftrt])
+
+;; Code iterator for all byte ops without immediate variants.
+(define_code_iterator v1op [us_minus us_plus minus ne le leu mult])
+
+;; Code iterator for all 2-byte vector ops without immediate variants.
+(define_code_iterator v2op [ss_minus ss_plus minus ne le leu])
+
+;; Code iterator for all 4-byte vector ops without immediate variants.
+(define_code_iterator v4op [ss_minus ss_plus minus plus])
+
+;; Code iterator for all byte vector ops with immediate variants.
+(define_code_iterator v1op_immed [plus umax umin eq lt ltu])
+
+;; Code iterator for all 2-byte vector ops with immediate variants.
+(define_code_iterator v2op_immed [plus smax smin eq lt ltu])
+
+;; Code iterator for all 2-byte vector shifts without immediate variants.
+(define_code_iterator v2shift [ss_ashift])
+
+;; Code iterator for all 4-byte vector shifts without immediate variants.
+(define_code_iterator v4shift [ashift ashiftrt lshiftrt ss_ashift])
+
+;; <optab> expands to the name of the optab for a particular code.
+(define_code_attr optab [(ashift "ashl")
+			 (ashiftrt "ashr")
+			 (lshiftrt "lshr")
+			 (ss_ashift "ssashl")
+			 (eq "seq")
+			 (ne "sne")
+			 (lt "slt")
+			 (ltu "sltu")
+			 (le "sle")
+			 (leu "sleu")
+			 (minus "sub")
+			 (plus "add")
+			 (mult "mul")
+			 (smax "smax")
+			 (smin "smin")
+			 (ss_minus "sssub")
+			 (ss_plus "ssadd")
+			 (umax "umax")
+			 (umin "umin")
+			 (us_minus "ussub")
+			 (us_plus "usadd")
+			 ])
+
+;; <insn> expands to the name of the insn that implements a particular
+;; code.
+(define_code_attr insn [(ashift "shl")
+			(ashiftrt "shrs")
+			(lshiftrt "shru")
+			(ss_ashift "shlsc")
+			(eq "cmpeq")
+			(ne "cmpne")
+			(lt "cmplts")
+			(ltu "cmpltu")
+			(le "cmples")
+			(leu "cmpleu")
+			(minus "sub")
+			(plus "add")
+			(mult "multu")
+			(smax "maxs")
+			(smin "mins")
+			(umax "maxu")
+			(umin "minu")
+			(ss_minus "subsc")
+			(ss_plus  "addsc")
+                        (us_minus "subuc")
+                        (us_plus  "adduc")
+			])
+
+;; <pipe> expands to the pipeline resource that contains the
+;; particular code.
+(define_code_attr pipe [(ashift "X01")
+			(ashiftrt "X01")
+			(lshiftrt "X01")
+			(ss_ashift "X01")
+			(eq "X01")
+			(ne "X01")
+			(lt "X01")
+			(ltu "X01")
+			(le "X01")
+			(leu "X01")
+			(minus "X01")
+			(plus "X01")
+			(mult "X0_2cycle")
+			(smax "X01")
+			(smin "X01")
+			(umax "X01")
+			(umin "X01")
+			(ss_minus "X01")
+			(ss_plus  "X01")
+                        (us_minus "X01")
+                        (us_plus  "X01")
+			])
+
+;; <comm> indicates whether a particular code is commutative, using
+;; the "%" commutative opterator constraint.
+(define_code_attr comm [(ashift "")
+			(ashiftrt "")
+			(lshiftrt "")
+			(ss_ashift "")
+			(eq "%")
+			(ne "%")
+			(lt "")
+			(ltu "")
+			(le "")
+			(leu "")
+			(minus "")
+			(plus "%")
+			(mult "%")
+			(smin "%")
+			(umin "%")
+			(smax "%")
+			(umax "%")
+			(ss_plus "%")
+			(us_plus "%")
+			(ss_minus "")
+			(us_minus "")
+			])
+
+;; <s> is the load/store extension suffix.
+(define_code_attr s [(zero_extend "u")
+		     (sign_extend "s")])
+
+;; Code for packing two 2-byte vectors.
+(define_code_iterator v2pack [truncate us_truncate])
+
+;; <pack_optab> expands to the part of the optab name describing how
+;; two vectors are packed.
+(define_code_attr pack_optab [(truncate "trunc")
+			      (us_truncate "usat")
+			      (ss_truncate "ssat")])
+
+;; <pack_insn> expands to the insn that implements a particular vector
+;; packing code.
+(define_code_attr pack_insn [(truncate "packl")
+			     (us_truncate "packuc")
+			     (ss_truncate "packsc")])
+
+;;
+;; The basic data move insns.
+;;
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilegx_expand_mov (QImode, operands))
+    DONE;
+})
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,r,U,m")
+	(match_operand:QI 1 "move_operand"         "r,I,U,m,rO,rO"))]
+  "(register_operand (operands[0], QImode)
+    || reg_or_0_operand (operands[1], QImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   ld1u\t%0, %1
+   ld1u_add\t%0, %I1, %i1
+   st1\t%0, %r1
+   st1_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilegx_expand_mov (HImode, operands))
+    DONE;
+})
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,r,U,m")
+	(match_operand:HI 1 "move_operand"         "r,I,JT,U,m,rO,rO"))]
+  "(register_operand (operands[0], HImode)
+    || reg_or_0_operand (operands[1], HImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   moveli\t%0, %H1
+   ld2u\t%0, %1
+   ld2u_add\t%0, %I1, %i1
+   st2\t%0, %r1
+   st2_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,X01,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilegx_expand_mov (SImode, operands))
+    DONE;
+})
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,U,m")
+	(match_operand:SI 1 "move_operand"         "r,I,JT,K,U,m,rO,rO"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   moveli\t%0, %H1
+   shl16insli\t%0, zero, %h1
+   ld4s\t%0, %1
+   ld4s_add\t%0, %I1, %i1
+   st4\t%0, %r1
+   st4_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,X01,X01,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilegx_expand_mov (DImode, operands))
+    DONE;
+})
+
+(define_insn "*movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,r,U,m")
+	(match_operand:DI 1 "move_operand"         "r,I,JT,K,N,P,U,m,rO,rO"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   moveli\t%0, %H1
+   shl16insli\t%0, zero, %h1
+   v1addi\t%0, zero, %j1
+   v2addi\t%0, zero, %h1
+   ld\t%0, %1
+   ld_add\t%0, %I1, %i1
+   st\t%0, %r1
+   st_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,X01,X01,X01,X01,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VEC248MODE 0 "nonautoincmem_nonimmediate_operand" "")
+        (match_operand:VEC248MODE 1 "nonautoincmem_general_operand" ""))]
+  ""
+{
+  tilegx_expand_movmisalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  /* Materialize immediates using clever SImode code, but don't
+     do this after reload starts, since gen_lowpart will choke
+     during reload if given an illegitimate address. */
+  if (immediate_operand (operands[1], SFmode)
+      && operands[1] != const0_rtx
+      && (register_operand (operands[0], SFmode)
+          || (!reload_in_progress && !reload_completed)))
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+                            gen_lowpart (SImode, operands[1])));
+      DONE;
+    }
+})
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,U,m")
+	(match_operand:SF 1 "general_operand" "rO,U,m,rO,rO"))]
+  ""
+  "@
+   move\t%0, %r1
+   ld4s\t%0, %1
+   ld4s_add\t%0, %I1, %i1
+   st4\t%0, %r1
+   st4_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  /* Materialize immediates using clever DImode code, but don't
+     do this after reload starts, since gen_lowpart will choke
+     during reload if given an illegitimate address. */
+  if (immediate_operand (operands[1], DFmode)
+      && operands[1] != const0_rtx
+      && (register_operand (operands[0], DFmode)
+          || (!reload_in_progress && !reload_completed)))
+    {
+      emit_insn (gen_movdi (gen_lowpart (DImode, operands[0]),
+                            gen_lowpart (DImode, operands[1])));
+      DONE;
+    }
+})
+
+(define_insn "*movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,r,U,m")
+	(match_operand:DF 1 "general_operand" "rO,U,m,rO,rO"))]
+  ""
+  "@
+   move\t%0, %r1
+   ld\t%0, %1
+   ld_add\t%0, %I1, %i1
+   st\t%0, %r1
+   st_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC248MODE 0 "nonimmediate_operand" "")
+        (match_operand:VEC248MODE 1 "general_operand" ""))]
+  ""
+{
+  /* Materialize immediates using clever DImode code, but don't
+     do this after reload starts, since gen_lowpart will choke
+     during reload if given an illegitimate address. */
+  if (immediate_operand (operands[1], <MODE>mode)
+      && operands[1] != const0_rtx
+      && (register_operand (operands[0], <MODE>mode)
+          || (!reload_in_progress && !reload_completed)))
+    {
+      emit_insn (gen_movdi (gen_lowpart (DImode, operands[0]),
+                            gen_lowpart (DImode, operands[1])));
+      DONE;
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:VEC248MODE 0 "nonimmediate_operand" "=r,r,r,U,m")
+	(match_operand:VEC248MODE 1 "general_operand" "rO,U,m,rO,rO"))]
+  ""
+  "@
+   move\t%0, %r1
+   ld\t%0, %1
+   ld_add\t%0, %I1, %i1
+   st\t%0, %r1
+   st_add\t%I0, %r1, %i0"
+  [(set_attr "type" "*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+r"))
+	(match_operand:QI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "bfins\t%0, %r1, 0, 7"
+  [(set_attr "type" "X0")])
+  
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+	(match_operand:HI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "bfins\t%0, %r1, 0, 15"
+  [(set_attr "type" "X0")])
+  
+(define_insn "movstrictsi"
+  [(set (strict_low_part (match_operand:SI 0 "register_operand" "+r"))
+	(match_operand:SI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "bfins\t%0, %r1, 0, 31"
+  [(set_attr "type" "X0")])
+
+
+;;
+;; Bit-field extracts/inserts
+;;
+
+(define_expand "insv"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (match_operand:DI 1 "u6bit_cint_operand" "")
+			 (match_operand:DI 2 "u6bit_cint_operand" ""))
+	(match_operand:DI 3 "reg_or_cint_operand" ""))]
+  ""
+{
+  rtx first_rtx = operands[2];
+  HOST_WIDE_INT first = INTVAL (first_rtx);
+  HOST_WIDE_INT width = INTVAL (operands[1]);
+  rtx v = operands[3];
+
+  if (CONST_INT_P (v))
+    {
+      /* Which bits are we affecting? */
+      HOST_WIDE_INT mask = ((((HOST_WIDE_INT) 1) << width) - 1) << first;
+
+      /* Extract just the bits we need, sign extending them to make the
+         constant easier to materialize in a register. */
+      int shift = sizeof(HOST_WIDE_INT) * 8 - width;
+      HOST_WIDE_INT n = (INTVAL (v) << shift) >> shift;
+
+      if (n == 0)
+        {
+	  /* We are setting every bit in the bitfield to zero. Try to use
+             andi instead, since that is more efficient. */
+	  rtx mask_rtx = GEN_INT (~mask);
+	  if (satisfies_constraint_I (mask_rtx))
+            {
+	      emit_insn (gen_anddi3 (operands[0], operands[0], mask_rtx));
+	      DONE;
+            }
+
+	  operands[3] = const0_rtx;
+	}
+      else
+        {
+	  if (n == -1)
+	    {
+	      /* We are setting every bit in the bitfield to one. Try to use
+	         ori instead, since that is more efficient. */
+	      rtx mask_rtx = GEN_INT (mask);
+	      if (satisfies_constraint_I (mask_rtx))
+	        {
+		  emit_insn (gen_iordi3 (operands[0], operands[0], mask_rtx));
+		  DONE;
+		}
+	    }
+
+	  if (!can_create_pseudo_p ())
+            FAIL;
+
+	  operands[3] = force_reg (DImode, GEN_INT (n));
+	}
+    }
+})
+
+(define_insn "*insv_tblidxb0"
+  [(set (zero_extract:DI
+	 (match_operand:DI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(match_operand:DI 1 "register_operand" "rO"))]
+  ""
+  "tblidxb0\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_tblidxb1"
+  [(set (zero_extract:DI
+	 (match_operand:DI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(zero_extract:DI
+	 (const_int 8)
+	 (const_int 8)
+	(match_operand:DI 1 "register_operand" "rO")))]
+  ""
+  "tblidxb1\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_tblidxb2"
+  [(set (zero_extract:DI
+	 (match_operand:DI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(zero_extract:DI
+	 (const_int 8)
+	 (const_int 16)
+	(match_operand:DI 1 "register_operand" "rO")))]
+  ""
+  "tblidxb2\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_tblidxb3"
+  [(set (zero_extract:DI
+	 (match_operand:DI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(zero_extract:DI
+	 (const_int 8)
+	 (const_int 24)
+	(match_operand:DI 1 "register_operand" "rO")))]
+  ""
+  "tblidxb3\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_bfins"
+  [(set (zero_extract:DI
+	 (match_operand:DI 0 "register_operand" "+r")
+	 (match_operand:DI 1 "u6bit_cint_operand" "n")
+	 (match_operand:DI 2 "u6bit_cint_operand" "n"))
+	(match_operand:DI 3 "reg_or_cint_operand" "rO"))]
+  ""
+  "bfins\t%0, %r3, %2, %2+%1-1"
+  [(set_attr "type" "X0")])
+
+(define_insn "*insv_mm"
+  [(set (zero_extract:DI
+	 (match_operand:DI 0 "register_operand" "+r")
+	 (match_operand:DI 1 "u6bit_cint_operand" "n")
+	 (match_operand:DI 2 "u6bit_cint_operand" "n"))
+	(zero_extract:DI
+	 (match_operand:DI 3 "register_operand" "rO")
+	 (match_dup 1)
+	 (match_dup 2)))]
+  ""
+{
+  int n;
+
+  operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]));
+
+  n = INTVAL (operands[2]);
+  n = (n == 0) ? 63 : n - 1;
+  operands[2] = GEN_INT (n);
+
+  return "mm\t%0, %r3, %1, %2";
+}
+  [(set_attr "type" "X0")])
+
+(define_expand "extv"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extract:DI (match_operand 1 "nonautoincmem_general_operand" "")
+			 (match_operand:DI 2 "immediate_operand" "")
+			 (match_operand:DI 3 "immediate_operand" "")))]
+  ""
+{
+  if (MEM_P (operands[1]))
+    {
+      HOST_WIDE_INT bit_offset, bit_width;
+      HOST_WIDE_INT first_byte_offset, last_byte_offset;
+
+      if (GET_MODE (operands[1]) != QImode)
+        FAIL;
+
+      bit_width = INTVAL (operands[2]);
+      bit_offset = INTVAL (operands[3]);
+
+      /* NOTE: bit_offset is relative to the mode of operand
+         1 (QImode).  It will be negative in big-endian mode
+         here.  Convert that back to the real offset.  */
+      if (BYTES_BIG_ENDIAN)
+        bit_offset = GET_MODE_BITSIZE (QImode) - bit_width - bit_offset;
+
+      /* Reject bitfields that can be done with a normal load.  */
+      if (MEM_ALIGN (operands[1]) >= bit_offset + bit_width)
+        FAIL;
+
+      /* The value in memory cannot span more than 8 bytes.  */
+      first_byte_offset = bit_offset / BITS_PER_UNIT;
+      last_byte_offset = (bit_offset + bit_width - 1) / BITS_PER_UNIT;
+      if (last_byte_offset - first_byte_offset > 7)
+        FAIL;
+
+      tilegx_expand_unaligned_load (operands[0], operands[1],
+                                    bit_width, bit_offset, 1);
+
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+})
+
+(define_expand "extzv"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_operand 1 "nonautoincmem_general_operand" "")
+			 (match_operand:DI 2 "immediate_operand" "")
+			 (match_operand:DI 3 "immediate_operand" "")))]
+  ""
+{
+  HOST_WIDE_INT bit_width = INTVAL (operands[2]);
+  HOST_WIDE_INT bit_offset = INTVAL (operands[3]);
+
+  if (MEM_P (operands[1]))
+    {
+      HOST_WIDE_INT first_byte_offset, last_byte_offset;
+
+      if (GET_MODE (operands[1]) != QImode)
+        FAIL;
+
+      /* NOTE: bit_offset is relative to the mode of operand
+         1 (QImode).  It will be negative in big-endian mode
+         here. */
+      if (BYTES_BIG_ENDIAN)
+        bit_offset = GET_MODE_BITSIZE (QImode) - bit_width - bit_offset;
+ 
+      /* Reject bitfields that can be done with a normal load.  */
+      if (MEM_ALIGN (operands[1]) >= bit_offset + bit_width)
+        FAIL;
+
+      /* The value in memory cannot span more than 8 bytes.  */
+      first_byte_offset = bit_offset / BITS_PER_UNIT;
+      last_byte_offset = (bit_offset + bit_width - 1) / BITS_PER_UNIT;
+      if (last_byte_offset - first_byte_offset > 7)
+        FAIL;
+
+      tilegx_expand_unaligned_load (operands[0], operands[1],
+                                    bit_width, bit_offset, 0);
+
+      DONE;
+    }
+
+    operands[1] = force_reg (DImode, operands[1]);
+
+    if (bit_offset == 0)
+      {
+	 /* Extracting the low bits is just a bitwise AND.  */
+	 HOST_WIDE_INT mask = ((HOST_WIDE_INT)1 << bit_width) - 1;
+	 emit_insn (gen_anddi3 (operands[0], operands[1], GEN_INT (mask)));
+	 DONE;
+      }
+})
+
+
+;;
+;; Addresses
+;;
+
+;; The next three patterns are used to to materialize a position
+;; independent address by adding the difference of two labels to a base
+;; label in the text segment, assuming that the difference fits in 32
+;; signed bits.
+(define_expand "mov_address_step1"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(const:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
+			     UNSPEC_HW2_LAST)))])
+
+(define_expand "mov_address_step2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI
+	 [(match_operand:DI 1 "reg_or_0_operand" "")
+	  (const:DI (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
+			       UNSPEC_HW1))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))])
+  
+(define_expand "mov_address_step3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI
+	 [(match_operand:DI 1 "reg_or_0_operand" "")
+	  (const:DI (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
+			       UNSPEC_HW0))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))])
+
+;; First step of the 2-insn sequence to materialize a 32-bit symbolic
+;; address.
+(define_expand "mov_address_32bit_step1"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (const:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "")]
+			     UNSPEC_HW1_LAST)))])
+  
+;; Second step of the 2-insn sequence to materialize a 32-bit symbolic
+;; address.
+(define_expand "mov_address_32bit_step2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI
+	 [(match_operand:SI 1 "reg_or_0_operand" "")
+	  (const:SI (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")]
+			       UNSPEC_HW0))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))])
+
+
+;;
+;; pic related instructions
+;;
+
+;; NOTE: We compute the label in this unusual way because if we place
+;; the label after the lnk, whether it is at the same address as the
+;; lnk will vary depending on whether the optimization level chooses
+;; to insert bundling braces.
+(define_insn "insn_lnk_and_label<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (unspec_volatile:I48MODE
+         [(match_operand:I48MODE 1 "symbolic_operand" "")]
+         UNSPEC_LNK_AND_LABEL))]
+  ""
+  "%1 = . + 8\n\tlnk\t%0"
+  [(set_attr "type" "Y1")])
+
+;; The next three patterns are used to to materialize a position
+;; independent address by adding the difference of two labels to a
+;; base label in the text segment, assuming that the difference fits
+;; in 32 signed bits.
+(define_expand "mov_pcrel_step1<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(const:I48MODE (unspec:I48MODE
+			[(match_operand:I48MODE 1 "symbolic_operand" "")
+			 (match_operand:I48MODE 2 "symbolic_operand" "")]
+                        UNSPEC_HW1_LAST_PCREL)))]
+  "flag_pic")
+  
+(define_expand "mov_pcrel_step2<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(unspec:I48MODE
+	 [(match_operand:I48MODE 1 "reg_or_0_operand" "")
+	  (const:I48MODE
+	   (unspec:I48MODE [(match_operand:I48MODE 2 "symbolic_operand" "")
+			    (match_operand:I48MODE 3 "symbolic_operand" "")]
+			   UNSPEC_HW0_PCREL))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic")
+
+(define_insn "mov_pcrel_step3<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (unspec:I48MODE [(match_operand:I48MODE 1 "reg_or_0_operand" "rO")
+			 (match_operand:I48MODE 2 "reg_or_0_operand" "rO")
+			 (match_operand:I48MODE 3 "symbolic_operand" "in")
+			 (match_operand:I48MODE 4 "symbolic_operand" "in")]
+                        UNSPEC_MOV_PCREL_STEP3))]
+  "flag_pic"
+  "add<x>\t%0, %r1, %r2")
+
+;; The next three patterns are used to to materialize a position
+;; independent 64-bit address by adding the difference of two labels to
+;; a base label in the text segment, without any limitation on the size
+;; of the difference.
+(define_expand "mov_large_pcrel_step1"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(const:DI (unspec:DI
+		   [(match_operand:DI 1 "symbolic_operand" "")
+		    (match_operand:DI 2 "symbolic_operand" "")]
+		   UNSPEC_HW2_LAST_PCREL)))]
+  "flag_pic")
+  
+(define_expand "mov_large_pcrel_step2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI
+	 [(match_operand:DI 1 "reg_or_0_operand" "")
+	  (const:DI
+	   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")
+		       (match_operand:DI 3 "symbolic_operand" "")]
+		      UNSPEC_HW1_PCREL))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic")
+
+;; Note: step 3 is same as move_pcrel_step2.
+(define_expand "mov_large_pcrel_step3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI
+	 [(match_operand:DI 1 "reg_or_0_operand" "")
+	  (const:DI
+	   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")
+		       (match_operand:DI 3 "symbolic_operand" "")]
+		      UNSPEC_HW0_PCREL))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic")
+
+(define_insn "mov_large_pcrel_step4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+		    (match_operand:DI 2 "reg_or_0_operand" "rO")
+			 (match_operand:DI 3 "symbolic_operand" "in")
+			 (match_operand:DI 4 "symbolic_operand" "in")]
+		   UNSPEC_MOV_LARGE_PCREL_STEP4))]
+  "flag_pic"
+  "add\t%0, %r1, %r2")
+
+;; The next three patterns are used to materialize a position
+;; independent 64-bit plt address by adding the difference of two
+;; labels to a base label in the text segment.
+(define_expand "mov_plt_pcrel_step1"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(const:DI (unspec:DI
+			[(match_operand:DI 1 "symbolic_operand" "")
+			 (match_operand:DI 2 "symbolic_operand" "")]
+                        UNSPEC_HW2_LAST_PLT_PCREL)))]
+  "flag_pic")
+  
+(define_expand "mov_plt_pcrel_step2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI
+	 [(match_operand:DI 1 "reg_or_0_operand" "")
+	  (const:DI
+	   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")
+			    (match_operand:DI 3 "symbolic_operand" "")]
+		      UNSPEC_HW1_PLT_PCREL))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic")
+
+(define_expand "mov_plt_pcrel_step3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI
+	 [(match_operand:DI 1 "reg_or_0_operand" "")
+	  (const:DI
+	   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")
+			    (match_operand:DI 3 "symbolic_operand" "")]
+		      UNSPEC_HW0_PLT_PCREL))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic")
+
+;; The next two patterns are used to materialize a position independent
+;; 32-bit plt address by adding the difference of two labels to a base
+;; label in the text segment.
+(define_expand "mov_plt_pcrel_step1_32bit"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(const:SI (unspec:SI
+			[(match_operand:SI 1 "symbolic_operand" "")
+			 (match_operand:SI 2 "symbolic_operand" "")]
+                        UNSPEC_HW1_LAST_PLT_PCREL)))]
+  "flag_pic")
+  
+(define_expand "mov_plt_pcrel_step2_32bit"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI
+	 [(match_operand:SI 1 "reg_or_0_operand" "")
+	  (const:SI
+	   (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")
+			    (match_operand:SI 3 "symbolic_operand" "")]
+		      UNSPEC_HW0_PLT_PCREL))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic")
+
+(define_expand "add_got16<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+        (plus:I48MODE
+	 (match_operand:I48MODE 1 "reg_or_0_operand" "")
+	 (const:I48MODE
+	  (unspec:I48MODE [(match_operand:I48MODE 2 "symbolic_operand" "")]
+			  UNSPEC_HW0_LAST_GOT))))]
+  "flag_pic == 1")
+
+(define_expand "mov_got32_step1<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(const:I48MODE
+	 (unspec:I48MODE [(match_operand:I48MODE 1 "symbolic_operand" "")]
+			 UNSPEC_HW1_LAST_GOT)))]
+  "flag_pic == 2")
+
+(define_expand "mov_got32_step2<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(unspec:I48MODE
+	 [(match_operand:I48MODE 1 "reg_or_0_operand" "")
+	  (const:I48MODE
+	   (unspec:I48MODE [(match_operand:I48MODE 2 "symbolic_operand" "")]
+			   UNSPEC_HW0_GOT))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "flag_pic == 2")
+
+
+;;
+;; TLS
+;;
+ 
+(define_expand "mov_tls_gd_step1<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(const:I48MODE
+	 (unspec:I48MODE [(match_operand:I48MODE 1 "tls_symbolic_operand" "")]
+			 UNSPEC_HW1_LAST_TLS_GD)))]
+  "HAVE_AS_TLS")
+
+(define_expand "mov_tls_gd_step2<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(unspec:I48MODE
+	 [(match_operand:I48MODE 1 "reg_or_0_operand" "")
+	  (const:I48MODE
+	   (unspec:I48MODE [(match_operand:I48MODE 2 "tls_symbolic_operand" "")]
+			   UNSPEC_HW0_TLS_GD))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "HAVE_AS_TLS")
+
+(define_expand "mov_tls_ie_step1<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(const:I48MODE
+	 (unspec:I48MODE [(match_operand:I48MODE 1 "tls_symbolic_operand" "")]
+			 UNSPEC_HW1_LAST_TLS_IE)))]
+  "HAVE_AS_TLS")
+
+(define_expand "mov_tls_ie_step2<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(unspec:I48MODE
+	 [(match_operand:I48MODE 1 "reg_or_0_operand" "")
+	  (const:I48MODE
+	   (unspec:I48MODE [(match_operand:I48MODE 2 "tls_symbolic_operand" "")]
+			   UNSPEC_HW0_TLS_IE))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "HAVE_AS_TLS")
+
+(define_expand "mov_tls_le_step1<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(const:I48MODE
+	 (unspec:I48MODE [(match_operand:I48MODE 1 "tls_symbolic_operand" "")]
+			 UNSPEC_HW1_LAST_TLS_LE)))]
+  "HAVE_AS_TLS")
+
+(define_expand "mov_tls_le_step2<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(unspec:I48MODE
+	 [(match_operand:I48MODE 1 "reg_or_0_operand" "")
+	  (const:I48MODE
+	   (unspec:I48MODE [(match_operand:I48MODE 2 "tls_symbolic_operand" "")]
+			   UNSPEC_HW0_TLS_LE))]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  "HAVE_AS_TLS")
+
+(define_expand "tls_gd_call<bitsuffix>"
+  [(parallel
+    [(set (reg:I48MODE 0)
+	  (unspec:I48MODE [(match_operand:I48MODE 0 "tls_symbolic_operand" "")
+			   (reg:I48MODE 0)]
+			   UNSPEC_TLS_GD_CALL))
+     (clobber (reg:I48MODE 25))
+     (clobber (reg:I48MODE 26))
+     (clobber (reg:I48MODE 27))
+     (clobber (reg:I48MODE 28))
+     (clobber (reg:I48MODE 29))
+     (clobber (reg:I48MODE 55))])]
+   ""
+{
+  cfun->machine->calls_tls_get_addr = true;
+})
+
+(define_insn "*tls_gd_call<bitsuffix>"
+  [(set (reg:I48MODE 0)
+	(unspec:I48MODE [(match_operand:I48MODE 0 "tls_symbolic_operand" "")
+			 (reg:I48MODE 0)]
+			UNSPEC_TLS_GD_CALL))
+   (clobber (reg:I48MODE 25))
+   (clobber (reg:I48MODE 26))
+   (clobber (reg:I48MODE 27))
+   (clobber (reg:I48MODE 28))
+   (clobber (reg:I48MODE 29))
+   (clobber (reg:I48MODE 55))]
+  ""
+  "jal\ttls_gd_call(%0)"
+  [(set_attr "type" "X1")])
+
+(define_insn "tls_gd_add<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec:I48MODE [(match_operand:I48MODE 1 "register_operand" "r")
+			 (match_operand:I48MODE 2 "tls_symbolic_operand" "")]
+			UNSPEC_TLS_GD_ADD))]
+  "HAVE_AS_TLS"
+  "add<x>i\t%0, %1, tls_gd_add(%2)")
+
+(define_insn "tls_add<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec:I48MODE [(match_operand:I48MODE 1 "register_operand" "r")
+			 (match_operand:I48MODE 2 "register_operand" "0")
+			 (match_operand:I48MODE 3 "tls_symbolic_operand" "")]
+			UNSPEC_TLS_ADD))]
+  "HAVE_AS_TLS"
+  "add<x>i\t%0, %1, tls_add(%3)")
+
+(define_insn "tls_ie_load<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec:I48MODE [(match_operand:I48MODE 1 "register_operand" "r")
+			 (match_operand:I48MODE 2 "tls_symbolic_operand" "")]
+			UNSPEC_TLS_IE_LOAD))]
+  "HAVE_AS_TLS"
+  "ld<four_s_if_si>_tls\t%0, %1, tls_ie_load(%2)"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "*zero_extract<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(zero_extract:I48MODE
+         (match_operand:I48MODE 1 "reg_or_0_operand" "r")
+         (match_operand:I48MODE 2 "u6bit_cint_operand" "n")
+         (match_operand:I48MODE 3 "u6bit_cint_operand" "n")))]
+  ""
+  "bfextu\t%0, %r1, %3, %3+%2-1"
+  [(set_attr "type" "X0")])
+
+(define_insn "*sign_extract_low32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI
+         (match_operand:DI 1 "reg_or_0_operand" "r")
+         (match_operand:DI 2 "u6bit_cint_operand" "n")
+         (match_operand:DI 3 "u6bit_cint_operand" "n")))]
+  "INTVAL (operands[3]) == 0 && INTVAL (operands[2]) == 32"
+  "addxi\t%0, %r1, 0")
+
+(define_insn "*sign_extract"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(sign_extract:I48MODE
+         (match_operand:I48MODE 1 "reg_or_0_operand" "r")
+         (match_operand:I48MODE 2 "u6bit_cint_operand" "n")
+         (match_operand:I48MODE 3 "u6bit_cint_operand" "n")))]
+  ""
+  "bfexts\t%0, %r1, %3, %3+%2-1"
+  [(set_attr "type" "X0")])
+
+
+;;
+;; Arithmetic ops
+;;
+
+(define_insn "add<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r,r")
+	(plus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rO,rO,rO")
+		      (match_operand:I48MODE 2 "add_operand" "r,I,JT")))]
+  ""
+  "@
+   add<x>\t%0, %r1, %r2
+   add<x>i\t%0, %r1, %2
+   add<x>li\t%0, %r1, %H2"
+  [(set_attr "type" "*,*,X01")])
+
+(define_insn "*addsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO,rO")
+		  (match_operand:SI 2 "add_operand" "r,I,JT"))))]
+  ""
+  "@
+   addx\t%0, %r1, %r2
+   addxi\t%0, %r1, %2
+   addxli\t%0, %r1, %H2"
+  [(set_attr "type" "*,*,X01")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rO")
+                       (match_operand:I48MODE 2 "reg_or_0_operand" "rO")))]
+  ""
+  "sub<x>\t%0, %r1, %r2")
+
+(define_insn "*subsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+		   (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "subx\t%0, %r1, %r2")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(neg:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rO")))]
+  ""
+  "sub<x>\t%0, zero, %r1")
+
+(define_insn "*negsi2_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (neg:SI (match_operand:SI 1 "reg_or_0_operand" "rO"))))]
+  ""
+  "subx\t%0, zero, %r1")
+
+(define_insn "ssaddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_plus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "addxsc\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "*ssaddsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (ss_plus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+		     (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "addxsc\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "sssubsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_minus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                     (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "subxsc\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "*sssubsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (ss_minus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+		      (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "subxsc\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "")
+        (plus:SF (match_operand:SF 1 "register_operand" "")
+                 (match_operand:SF 2 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+  rtx b = gen_lowpart (DImode, operands[2]);
+
+  rtx tmp = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+
+  emit_insn (gen_insn_fsingle_add1 (tmp, a, b));
+  emit_insn (gen_insn_fsingle_addsub2 (tmp, tmp, a, b));
+  emit_insn (gen_insn_fsingle_pack1 (flags, tmp));
+  emit_insn (gen_insn_fsingle_pack2 (result, tmp, flags));
+
+  DONE;
+})
+
+(define_expand "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "")
+        (minus:SF (match_operand:SF 1 "register_operand" "")
+                  (match_operand:SF 2 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+  rtx b = gen_lowpart (DImode, operands[2]);
+
+  rtx tmp = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+
+  emit_insn (gen_insn_fsingle_sub1 (tmp, a, b));
+  emit_insn (gen_insn_fsingle_addsub2 (tmp, tmp, a, b));
+  emit_insn (gen_insn_fsingle_pack1 (flags, tmp));
+  emit_insn (gen_insn_fsingle_pack2 (result, tmp, flags));
+
+  DONE;
+})
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "")
+        (mult:SF (match_operand:SF 1 "register_operand" "")
+                 (match_operand:SF 2 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+  rtx b = gen_lowpart (DImode, operands[2]);
+
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+
+  emit_insn (gen_insn_fsingle_mul1 (tmp1, a, b));
+  emit_insn (gen_insn_fsingle_mul2 (tmp2, tmp1, b));
+  emit_insn (gen_insn_fsingle_pack1 (flags, tmp2));
+  emit_insn (gen_insn_fsingle_pack2 (result, tmp2, flags));
+
+  DONE;
+})
+
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "")
+        (plus:DF (match_operand:DF 1 "register_operand" "")
+                 (match_operand:DF 2 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+  rtx b = gen_lowpart (DImode, operands[2]);
+
+  rtx min = gen_reg_rtx (DImode);
+  rtx max = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+
+  emit_insn (gen_insn_fdouble_unpack_min (min, a, b));
+  emit_insn (gen_insn_fdouble_unpack_max (max, a, b));
+  emit_insn (gen_insn_fdouble_add_flags (flags, a, b));
+  emit_insn (gen_insn_fdouble_addsub (max, max, min, flags));
+  emit_insn (gen_insn_fdouble_pack1 (result, max, flags));
+  emit_insn (gen_insn_fdouble_pack2 (result, result, max, const0_rtx));
+
+  DONE;
+})
+
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "")
+        (minus:DF (match_operand:DF 1 "register_operand" "")
+                  (match_operand:DF 2 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+  rtx b = gen_lowpart (DImode, operands[2]);
+
+  rtx min = gen_reg_rtx (DImode);
+  rtx max = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+
+  emit_insn (gen_insn_fdouble_unpack_min (min, a, b));
+  emit_insn (gen_insn_fdouble_unpack_max (max, a, b));
+  emit_insn (gen_insn_fdouble_sub_flags (flags, a, b));
+  emit_insn (gen_insn_fdouble_addsub (max, max, min, flags));
+  emit_insn (gen_insn_fdouble_pack1 (result, max, flags));
+  emit_insn (gen_insn_fdouble_pack2 (result, result, max, const0_rtx));
+
+  DONE;
+})
+
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "")
+        (mult:DF (match_operand:DF 1 "register_operand" "")
+                 (match_operand:DF 2 "register_operand" "")))]
+  ""
+  ;; TODO: Decide if we should not inline this with -Os.
+  ;; "optimize_function_for_speed_p (cfun)"
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+  rtx b = gen_lowpart (DImode, operands[2]);
+
+  rtx a_unpacked = gen_reg_rtx (DImode);
+  rtx b_unpacked = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+
+  rtx low1 = gen_reg_rtx (DImode);
+  rtx low = gen_reg_rtx (DImode);
+  rtx low_carry = gen_reg_rtx (DImode);
+
+  rtx mid = gen_reg_rtx (DImode);
+  rtx mid_l32 = gen_reg_rtx (DImode);
+  rtx mid_r32 = gen_reg_rtx (DImode);
+
+  rtx high1 = gen_reg_rtx (DImode);
+  rtx high = gen_reg_rtx (DImode);
+  rtx high1_plus_mid_r32 = gen_reg_rtx (DImode);
+
+  /* NOTE: We compute using max(a, 0) and max(b, 0) rather than
+     min(a, b) and max(a, b) because for multiply we just need to unpack,
+     we don't actually care which is min and which is max. And this
+     formulation gives the scheduler more freedom in case one of a or b
+     would stall at the start of this code sequence. */
+  emit_insn (gen_insn_fdouble_unpack_max (a_unpacked, a, const0_rtx));
+  emit_insn (gen_insn_fdouble_unpack_max (b_unpacked, b, const0_rtx));
+  emit_insn (gen_insn_fdouble_mul_flags (flags, a, b));
+
+  /* This depends on the fact that the high few bits of the unpacked
+     mantissa are zero, so we can't have a carry out from the mid sum. */
+  emit_insn (gen_insn_mul_lu_lu (low1, a_unpacked, b_unpacked));
+  emit_insn (gen_insn_mul_hu_lu (mid, a_unpacked, b_unpacked));
+  emit_insn (gen_insn_mula_hu_lu (mid, mid, b_unpacked, a_unpacked));
+  emit_insn (gen_insn_mul_hu_hu (high1, a_unpacked, b_unpacked));
+
+  emit_insn (gen_ashldi3 (mid_l32, mid, GEN_INT (32)));
+  emit_insn (gen_lshrdi3 (mid_r32, mid, GEN_INT (32)));
+
+  emit_insn (gen_adddi3 (high1_plus_mid_r32, high1, mid_r32));
+
+  emit_insn (gen_adddi3 (low, low1, mid_l32));
+  emit_insn (gen_insn_cmpltu_didi (low_carry, low, mid_l32));
+
+  emit_insn (gen_adddi3 (high, high1_plus_mid_r32, low_carry));
+
+  emit_insn (gen_insn_fdouble_pack1 (result, high, flags));
+  emit_insn (gen_insn_fdouble_pack2 (result, result, high, low));
+
+  DONE;
+})
+
+
+;;
+;; Shifts
+;;
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(ashift:I48MODE
+	 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:SI 2 "reg_or_u<nbits>bit_operand" "I,rO")))]
+  ""
+  "@
+  shl<x>i\t%0, %r1, %2
+  shl<x>\t%0, %r1, %r2"
+  [(set_attr "type" "<shift_pipe>,<shift_pipe>")])
+
+(define_insn "*ashlsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (ashift:SI
+	  (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+	  (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO"))))]
+  ""
+  "@
+  shlxi\t%0, %r1, %2
+  shlx\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(ashiftrt:I48MODE
+	 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:SI 2 "reg_or_u<nbits>bit_operand" "I,rO")))]
+  ""
+  "@
+  shrsi\t%0, %r1, %2
+  shrs\t%0, %r1, %r2")
+
+(define_insn "*ashrsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (ashiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+		      (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO"))))]
+  ""
+  "@
+  shrsi\t%0, %r1, %2
+  shrs\t%0, %r1, %r2")
+
+(define_insn "lshr<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(lshiftrt:I48MODE
+	 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:SI 2 "reg_or_u<nbits>bit_operand" "I,rO")))]
+  ""
+  "@
+  shru<x>i\t%0, %r1, %2
+  shru<x>\t%0, %r1, %r2"
+  [(set_attr "type" "<shift_pipe>,<shift_pipe>")])
+  
+(define_insn "*lshrsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (lshiftrt:SI
+	  (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+	  (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO"))))]
+  ""
+  "@
+  shruxi\t%0, %r1, %2
+  shrux\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+  
+(define_insn "rotldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(rotate:DI (match_operand:DI 1 "reg_or_0_operand" "rO,rO")
+                   (match_operand:SI 2 "reg_or_u6bit_operand" "I,rO")))]
+  ""
+  "@
+  rotli\t%0, %r1, %2
+  rotl\t%0, %r1, %r2")
+
+(define_insn "insn_shl16insli"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+        (ior:DI
+         (ashift:DI
+	  (match_operand:DI 1 "reg_or_0_operand" "rO,rO")
+	  (const_int 16))
+         (match_operand:DI 2 "u16bit_or_const_symbolic_operand" "O,KT")))]
+  ""
+  "@
+   shli\t%0, %r1, 16
+   shl16insli\t%0, %r1, %H2"
+  [(set_attr "type" "*,X01")])
+
+(define_insn "insn_addr_shl16insli<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec:I48MODE
+	 [(match_operand:I48MODE 1 "reg_or_0_operand" "rO")
+	  (match_operand:I48MODE 2 "const_symbolic_operand" "T")]
+	 UNSPEC_INSN_ADDR_SHL16INSLI))]
+  ""
+  "shl16insli\t%0, %r1, %H2"
+  [(set_attr "type" "X01")])
+
+
+;;
+;; Compares
+;;
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:DI 0 "register_operand" "")
+      (match_operator:DI 1 "ordered_comparison_operator"
+         [(match_operand:FI48MODE 2 "reg_or_cint_operand" "")
+          (match_operand:FI48MODE 3 "reg_or_cint_operand" "")]))]
+  ""
+{
+  if (!tilegx_emit_setcc (operands, GET_MODE (operands[2])))
+    FAIL;
+  else
+    DONE;
+})
+
+ 
+(define_insn "insn_cmpne_<I48MODE:mode><I48MODE2:mode>"
+  [(set (match_operand:I48MODE2 0 "register_operand" "=r")
+	(ne:I48MODE2 (match_operand:I48MODE 1 "reg_or_0_operand" "rO")
+		     (match_operand:I48MODE 2 "reg_or_cint_operand" "rO")))]
+  ""
+  "cmpne\t%0, %r1, %r2")
+ 
+(define_insn "insn_cmpeq_<I48MODE:mode><I48MODE2:mode>"
+  [(set (match_operand:I48MODE2 0 "register_operand" "=r,r")
+	(eq:I48MODE2 (match_operand:I48MODE 1 "reg_or_0_operand" "%rO,rO")
+		     (match_operand:I48MODE 2 "reg_or_cint_operand" "I,rO")))]
+  ""
+  "@
+   cmpeqi\t%0, %r1, %2
+   cmpeq\t%0, %r1, %r2")
+
+(define_insn "insn_cmplts_<I48MODE:mode><I48MODE2:mode>"
+  [(set (match_operand:I48MODE2 0 "register_operand" "=r,r")
+	(lt:I48MODE2 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+		     (match_operand:I48MODE 2 "reg_or_cint_operand" "I,rO")))]
+  ""
+  "@
+   cmpltsi\t%0, %r1, %2
+   cmplts\t%0, %r1, %r2")
+
+(define_insn "insn_cmpltu_<I48MODE:mode><I48MODE2:mode>"
+  [(set (match_operand:I48MODE2 0 "register_operand" "=r,r")
+	(ltu:I48MODE2 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+		      (match_operand:I48MODE 2 "reg_or_cint_operand" "I,rO")))]
+  ""
+  "@
+   cmpltui\t%0, %r1, %2
+   cmpltu\t%0, %r1, %r2"
+  [(set_attr "type" "X01,*")])
+
+(define_insn "insn_cmples_<I48MODE:mode><I48MODE2:mode>"
+  [(set (match_operand:I48MODE2 0 "register_operand" "=r,r")
+	(le:I48MODE2 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+		     (match_operand:I48MODE 2 "reg_or_cint_operand" "L,rO")))]
+  ""
+  "@
+   cmpltsi\t%0, %r1, %P2
+   cmples\t%0, %r1, %r2")
+
+(define_insn "insn_cmpleu_<I48MODE:mode><I48MODE2:mode>"
+  [(set (match_operand:I48MODE2 0 "register_operand" "=r,r")
+	(leu:I48MODE2 (match_operand:I48MODE 1 "reg_or_0_operand" "rO,rO")
+		      (match_operand:I48MODE 2 "reg_or_cint_operand" "Q,rO")))]
+  ""
+  "@
+   cmpltui\t%0, %r1, %P2
+   cmpleu\t%0, %r1, %r2"
+  [(set_attr "type" "X01,*")])
+
+
+;;
+;; Logical ops
+;;
+
+(define_insn "and<mode>3"
+  [(set (match_operand:IVNMODE 0 "register_operand" "=r,r,r,r")
+	(and:IVNMODE (match_operand:IVNMODE 1 "reg_or_0_operand" "%rO,rO,0,rO")
+		     (match_operand:IVNMODE 2 "and_operand" "I,S,M,rO")))]
+  ""
+  "@
+   andi\t%0, %r1, %2
+   bfextu\t%0, %r1, %M2
+   bfins\t%0, zero, %m2
+   and\t%0, %r1, %r2"
+  [(set_attr "type" "*,X0,X0,*")])
+  
+(define_insn "*andsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(sign_extend:DI
+	 (and:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO,0,rO")
+		 (match_operand:SI 2 "and_operand" "I,S,M,rO"))))]
+  ""
+  "@
+   andi\t%0, %r1, %2
+   bfextu\t%0, %r1, %M2
+   bfins\t%0, zero, %m2
+   and\t%0, %r1, %r2"
+  [(set_attr "type" "*,X0,X0,*")])
+  
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
+	(and:DI (match_operand:DI 1 "reg_or_0_operand" "%rO,rO,rO,rO,0,rO")
+                    (match_operand:DI 2 "and_operand" "I,Z0,Z1,S,M,rO")))]
+  ""
+  "@
+   andi\t%0, %r1, %2
+   v4int_l\t%0, zero, %r1
+   v4int_h\t%0, %r1, zero
+   bfextu\t%0, %r1, %M2
+   bfins\t%0, zero, %m2
+   and\t%0, %r1, %r2"
+  [(set_attr "type" "*,X01,X01,X0,X0,*")])
+  
+(define_insn "ior<mode>3"
+  [(set (match_operand:IVMODE 0 "register_operand" "=r,r")
+	(ior:IVMODE (match_operand:IVMODE 1 "reg_or_0_operand" "%rO,rO")
+                    (match_operand:IVMODE 2 "reg_or_s8bit_operand" "rO,I")))]
+  ""
+  "@
+   or\t%0, %r1, %r2
+   ori\t%0, %r1, %2"
+  [(set_attr "type" "*,X01")])
+  
+(define_insn "*iorsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO")
+		 (match_operand:SI 2 "reg_or_s8bit_operand" "rO,I"))))]
+  ""
+  "@
+   or\t%0, %r1, %r2
+   ori\t%0, %r1, %2"
+  [(set_attr "type" "*,X01")])
+  
+(define_insn "xor<mode>3"
+  [(set (match_operand:IVMODE 0 "register_operand" "=r,r")
+	(xor:IVMODE (match_operand:IVMODE 1 "reg_or_0_operand" "%rO,rO")
+                    (match_operand:IVMODE 2 "reg_or_s8bit_operand" "rO,I")))]
+  ""
+  "@
+   xor\t%0, %r1, %r2
+   xori\t%0, %r1, %2"
+  [(set_attr "type" "*,X01")])
+
+(define_insn "*xorsi3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO")
+		 (match_operand:SI 2 "reg_or_s8bit_operand" "rO,I"))))]
+  ""
+  "@
+   xor\t%0, %r1, %r2
+   xori\t%0, %r1, %2"
+  [(set_attr "type" "*,X01")])
+
+(define_insn "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "clz\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_expand "clzsi2"
+  [(set (match_dup 2)
+        (ashift:DI (match_operand:SI 1 "reg_or_0_operand" "")
+                   (const_int 32)))
+   (set (subreg:DI (match_operand:SI 0 "register_operand" "") 0)
+	(clz:DI (match_dup 2)))]
+   ""
+   {
+     operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);
+     operands[2] = gen_reg_rtx (DImode);
+   })
+
+(define_insn "ctz<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(ctz:I48MODE (match_operand:DI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "ctz\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(popcount:I48MODE (match_operand:DI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "pcnt\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_expand "parity<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(parity:I48MODE (match_operand:DI 1 "reg_or_0_operand" "")))]
+  ""
+  {
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_popcount<mode>2 (tmp, operands[1]));
+    emit_insn (gen_and<mode>3 (operands[0], tmp, const1_rtx));
+    DONE;
+  })
+
+(define_insn "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(bswap:DI (match_operand:DI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "revbytes\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(bswap:SI (match_operand:SI 1 "reg_or_0_operand" "")))]
+  ""
+  {
+    rtx tmp = gen_reg_rtx (DImode);
+    emit_insn (gen_bswapdi2 (tmp, gen_lowpart (DImode, operands[1])));
+    emit_insn (gen_ashrdi3 (gen_lowpart (DImode, operands[0]),
+			    tmp, GEN_INT (32)));
+    DONE;
+  })
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:IVMODE 0 "register_operand" "=r")
+	(not:IVMODE (match_operand:IVMODE 1 "reg_or_0_operand" "rO")))]
+  ""
+  "nor\t%0, %r1, zero")
+
+
+;;
+;; Conditional moves
+;;
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:I48MODE 0 "register_operand" "")
+	(if_then_else:I48MODE
+         (match_operand 1 "comparison_operator" "")
+         (match_operand:I48MODE 2 "reg_or_0_operand" "")
+         (match_operand:I48MODE 3 "reg_or_0_operand" "")))]
+  ""
+  { operands[1] = tilegx_emit_conditional_move (operands[1]); })
+
+(define_insn "movcc_insn_<I48MODE2:mode><I48MODE:mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r,r,r")
+	(if_then_else:I48MODE
+	 (match_operator 4 "eqne_operator"
+	  [(match_operand:I48MODE2 1 "reg_or_0_operand" "rO,rO,rO,rO")
+	   (const_int 0)])
+	 (match_operand:I48MODE 2 "reg_or_0_operand"	"rO,O,rO,0")
+	 (match_operand:I48MODE 3 "reg_or_0_operand"	"O,rO,0,rO")))]
+  ""
+  "@
+   m%c4\t%0, %r1, %r2
+   m%C4\t%0, %r1, %r3
+   cmov%d4z\t%0, %r1, %r2
+   cmov%D4z\t%0, %r1, %r3"
+  [(set_attr "type" "*,*,Y0,Y0")])
+
+(define_expand "insn_mz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+         (eq (match_operand:DI 1 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:DI 2 "reg_or_0_operand" "")
+         (const_int 0)))])
+
+(define_expand "insn_mnz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+         (ne (match_operand:DI 1 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:DI 2 "reg_or_0_operand" "")
+         (const_int 0)))])
+
+(define_expand "insn_cmoveqz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+         (eq (match_operand:DI 2 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:DI 3 "reg_or_0_operand" "")
+         (match_operand:DI 1 "reg_or_0_operand" "")))])
+
+(define_expand "insn_cmovnez"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+         (ne (match_operand:DI 2 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:DI 3 "reg_or_0_operand" "")
+         (match_operand:DI 1 "reg_or_0_operand" "")))])
+
+
+;;
+;; Conversions
+;;
+
+(define_insn "zero_extendqi<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r,r")
+	(zero_extend:I48MODE (match_operand:QI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   bfextu\t%0, %r1, 0, 7
+   ld1u\t%0, %1
+   ld1u_add\t%0, %I1, %i1"
+  [(set_attr "type" "X0,Y2_2cycle,X1_2cycle")])
+  
+(define_insn "zero_extendhi<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r,r")
+	(zero_extend:I48MODE (match_operand:HI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   bfextu\t%0, %r1, 0, 15
+   ld2u\t%0, %1
+   ld2u_add\t%0, %I1, %i1"
+  [(set_attr "type" "X0,Y2_2cycle,X1_2cycle")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI (match_operand:SI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   v4int_l\t%0, zero, %r1
+   ld4u\t%0, %1
+   ld4u_add\t%0, %I1, %i1"
+  [(set_attr "type" "X01,Y2_2cycle,X1_2cycle")])
+
+(define_insn "extendqi<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r,r")
+	(sign_extend:I48MODE (match_operand:QI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   bfexts\t%0, %r1, 0, 7
+   ld1s\t%0, %1
+   ld1s_add\t%0, %I1, %i1"
+  [(set_attr "type" "X0,Y2_2cycle,X1_2cycle")])
+
+(define_insn "extendhi<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r,r")
+	(sign_extend:I48MODE (match_operand:HI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   bfexts\t%0, %r1, 0, 15
+   ld2s\t%0, %1
+   ld2s_add\t%0, %I1, %i1"
+  [(set_attr "type" "X0,Y2_2cycle,X1_2cycle")])
+
+;; All SImode integer registers should already be in sign-extended
+;; form (see TRULY_NOOP_TRUNCATION and truncdisi2).  We can therefore
+;; get rid of register->register instructions if we constrain the
+;; source to be in the same register as the destination.
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,U,m")))]
+  ""
+  "@
+   #
+   ld4s\t%0, %1
+   ld4s_add\t%0, %I1, %i1"
+  "&& reload_completed && register_operand (operands[1], VOIDmode)"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr "type" "*,Y2_2cycle,X1_2cycle")])
+
+;; Integer truncation patterns.  Truncating SImode values to smaller
+;; modes is a no-op, as it is for most other GCC ports.  Truncating
+;; DImode values to SImode is not a no-op since we
+;; need to make sure that the lower 32 bits are properly sign-extended
+;; (see TRULY_NOOP_TRUNCATION).  Truncating DImode values into modes
+;; smaller than SImode is equivalent to two separate truncations:
+;;
+;;                        A       B
+;;    DI ---> HI  ==  DI ---> SI ---> HI
+;;    DI ---> QI  ==  DI ---> SI ---> QI
+;;
+;; Step A needs a real instruction but step B does not.
+
+(define_insn "truncdisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,U,m")
+        (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO,rO,rO")))]
+  ""
+  "@
+   addxi\t%0, %r1, 0
+   st4\t%0, %r1
+   st4_add\t%I0, %r1, %i0"
+  [(set_attr "type" "Y01,Y2,X1")])
+
+(define_insn "truncdihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,U,m")
+        (truncate:HI (match_operand:DI 1 "reg_or_0_operand" "rO,rO,rO")))]
+  ""
+  "@
+   addxi\t%0, %r1, 0
+   st2\t%0, %r1
+   st2_add\t%I0, %r1, %i0"
+  [(set_attr "type" "Y01,Y2,X1")])
+
+(define_insn "truncdiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,U,m")
+        (truncate:QI (match_operand:DI 1 "reg_or_0_operand" "rO,rO,rO")))]
+  ""
+  "@
+   addxi\t%0, %r1, 0
+   st1\t%0, %r1
+   st1_add\t%I0, %r1, %i0"
+  [(set_attr "type" "Y01,Y2,X1")])
+
+;; Combiner patterns to optimize away unnecessary truncates.
+
+(define_insn "*zero_extendsidi_truncdisi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO"))))]
+  ""
+  "v4int_l\t%0, zero, %r1"
+  [(set_attr "type" "X01")])
+
+(define_insn "*addsi_truncdisi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "%rO,rO,rO"))
+	 (match_operand:SI 2 "add_operand" "r,I,JT")))]
+  ""
+  "@
+   addx\t%0, %r1, %r2
+   addxi\t%0, %r1, %2
+   addxli\t%0, %r1, %H2"
+  [(set_attr "type" "*,*,X01")])
+
+(define_insn "*addsi_truncdisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO"))
+	 (truncate:SI (match_operand:DI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "addx\t%0, %r1, %r2")
+
+(define_insn "*ashldi_truncdisi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rO")
+	 (truncate:SI (match_operand:DI 2 "reg_or_u6bit_operand" "rO"))))]
+  ""
+  "shl\t%0, %r1, %r2")
+
+(define_insn "*ashlsi_truncdisi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO,rO"))
+	 (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+  shlxi\t%0, %r1, %2
+  shlx\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_insn "*ashlsi_truncdisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO"))
+	 (truncate:SI (match_operand:DI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "shlx\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "*ashrdi3_truncdisi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rO")
+	 (truncate:SI (match_operand:DI 2 "reg_or_u6bit_operand" "rO"))))]
+  ""
+  "shrs\t%0, %r1, %r2")
+
+(define_insn "*lshrsi_truncdisi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(lshiftrt:SI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO,rO"))
+	 (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+  shruxi\t%0, %r1, %2
+  shrux\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_insn "*lshrsi_truncdisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI
+	 (truncate:SI (match_operand:DI 1 "reg_or_0_operand" "rO"))
+	 (truncate:SI (match_operand:DI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "shrux\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "*lshrdi_truncdisi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rO")
+	 (truncate:SI (match_operand:DI 2 "reg_or_u6bit_operand" "rO"))))]
+  ""
+  "shru\t%0, %r1, %r2")
+
+(define_insn "*rotldi_truncdisi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(rotate:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rO")
+	 (truncate:SI (match_operand:DI 2 "reg_or_u6bit_operand" "rO"))))]
+  ""
+  "rotl\t%0, %r1, %r2")
+
+;; Integer to floating point conversions
+
+(define_expand "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = operands[1];
+
+  rtx nega = gen_reg_rtx (SImode);
+  rtx exp = gen_reg_rtx (DImode);
+  rtx sign = gen_reg_rtx (DImode);
+  rtx abs = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (DImode);
+
+  emit_move_insn (exp, GEN_INT (0x9e));
+
+  emit_insn (gen_negsi2 (nega, a));
+
+  emit_insn (gen_insn_cmplts_sisi (gen_lowpart (SImode, sign), a, const0_rtx));
+  emit_insn (gen_insn_cmoveqz (abs, gen_lowpart (DImode, nega), sign,
+			       gen_lowpart (DImode, a)));
+
+  emit_insn (gen_insn_bfins (tmp1, exp, sign, GEN_INT (10), GEN_INT (10)));
+  emit_insn (gen_insn_bfins (tmp2, tmp1, abs, GEN_INT (32), GEN_INT (63)));
+  emit_insn (gen_insn_fsingle_pack1 (flags, tmp2));
+  emit_insn (gen_insn_fsingle_pack2 (result, tmp2, flags));
+  DONE;
+})
+  
+(define_expand "floatunssisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = operands[1];
+
+  rtx exp = gen_reg_rtx (DImode);
+  rtx flags = gen_reg_rtx (DImode);
+  rtx tmp = gen_reg_rtx (DImode);
+
+  emit_move_insn (exp, GEN_INT (0x9e));
+  emit_insn (gen_insn_bfins (tmp, exp, gen_lowpart (DImode, a),
+                             GEN_INT (32), GEN_INT (63)));
+  emit_insn (gen_insn_fsingle_pack1 (flags, tmp));
+  emit_insn (gen_insn_fsingle_pack2 (result, tmp, flags));
+  DONE;
+})
+
+(define_expand "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+
+  rtx nega = gen_reg_rtx (DImode);
+  rtx exp = gen_reg_rtx (DImode);
+  rtx sign = gen_reg_rtx (DImode);
+  rtx abs = gen_reg_rtx (DImode);
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (DImode);
+  rtx tmp3 = gen_reg_rtx (DImode);
+
+  emit_move_insn (exp, GEN_INT (0x21b00));
+
+  emit_insn (gen_negdi2 (nega, a));
+
+  emit_insn (gen_insn_cmplts_didi (sign, a, const0_rtx));
+  emit_insn (gen_insn_cmovnez (abs, a, sign, nega));
+
+  emit_insn (gen_ashldi3 (tmp1, abs, GEN_INT (4)));
+  emit_insn (gen_insn_bfins (tmp2, exp, sign, GEN_INT (20), GEN_INT (20)));
+  emit_insn (gen_insn_fdouble_pack1 (tmp3, tmp1, tmp2));
+  emit_insn (gen_insn_fdouble_pack2 (result, tmp3, tmp1, const0_rtx));
+  DONE;
+})
+  
+(define_expand "floatunssidf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx result = gen_lowpart (DImode, operands[0]);
+  rtx a = gen_lowpart (DImode, operands[1]);
+
+  rtx exp = gen_reg_rtx (DImode);
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (DImode);
+
+  emit_move_insn (exp, GEN_INT (0x21b00));
+  emit_insn (gen_insn_bfins (tmp1, const0_rtx, a, GEN_INT (4), GEN_INT (35)));
+  emit_insn (gen_insn_fdouble_pack1 (tmp2, tmp1, exp));
+  emit_insn (gen_insn_fdouble_pack2 (result, tmp2, tmp1, const0_rtx));
+  DONE;
+})
+  
+
+;;
+;; Multiplies
+;;
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rO")
+                 (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "mulx\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI
+                  (match_operand:SI 1 "reg_or_0_operand" "%rO"))
+		 (sign_extend:DI
+                  (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mul_ls_ls\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI
+                  (match_operand:SI 1 "reg_or_0_operand" "%rO"))
+		 (zero_extend:DI
+                  (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mul_lu_lu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (unspec:DI [(match_operand:DI 1 "nonmemory_operand" "")
+                    (match_operand:DI 2 "nonmemory_operand" "")]
+                   UNSPEC_INSN_MUL_HU_LU))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 0) (match_dup 2) (match_dup 1)]
+                   UNSPEC_INSN_MULA_HU_LU))
+   (set (match_dup 0)
+        (ashift:DI (match_dup 0) (const_int 32)))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 0) (match_dup 2) (match_dup 1)]
+                   UNSPEC_INSN_MULA_LU_LU))]
+  ""
+  {
+    operands[1] = force_reg (DImode, operands[1]);
+    operands[1] = make_safe_from (operands[1], operands[0]);
+
+    if (tilegx_expand_muldi (operands[0], operands[1], operands[2]))
+      DONE;
+    else
+      {
+        operands[2] = force_reg (DImode, operands[2]);
+        operands[2] = make_safe_from (operands[2], operands[0]);
+      }
+  })
+
+(define_insn "usmulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI
+		  (match_operand:SI 1 "reg_or_0_operand" "rO"))
+		 (sign_extend:DI
+		  (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mul_ls_lu\t%0, %r2, %r1"
+  [(set_attr "type" "X0_2cycle")])
+  
+(define_insn "maddsidi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI
+         (mult:DI (sign_extend:DI
+                   (match_operand:SI 1 "reg_or_0_operand" "rO"))
+                  (sign_extend:DI
+                   (match_operand:SI 2 "reg_or_0_operand" "rO")))
+         (match_operand:DI 3 "register_operand" "0")))]
+  ""
+  "mula_ls_ls\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "umaddsidi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI
+         (mult:DI (zero_extend:DI
+                   (match_operand:SI 1 "reg_or_0_operand" "rO"))
+                  (zero_extend:DI
+                   (match_operand:SI 2 "reg_or_0_operand" "rO")))
+         (match_operand:DI 3 "register_operand" "0")))]
+  ""
+  "mula_lu_lu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_expand "smulsi3_highpart"
+  [(set (match_dup 3)
+        (mult:DI (sign_extend:DI (match_operand:SI 1 "reg_or_0_operand" ""))
+                 (sign_extend:DI (match_operand:SI 2 "reg_or_0_operand" ""))))
+   (set (match_dup 4)
+        (ashiftrt:DI (match_dup 3) (const_int 32)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI (match_dup 4)))]
+  ""
+  {
+    operands[3] = gen_reg_rtx (DImode);
+    operands[4] = gen_reg_rtx (DImode);
+  })
+
+(define_expand "umulsi3_highpart"
+  [(set (match_dup 3)
+        (mult:DI (zero_extend:DI (match_operand:SI 1 "reg_or_0_operand" ""))
+                 (zero_extend:DI (match_operand:SI 2 "reg_or_0_operand" ""))))
+   (set (match_dup 4)
+        (lshiftrt:DI (match_dup 3) (const_int 32)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI (match_dup 4)))]
+  ""
+  {
+    operands[3] = gen_reg_rtx (DImode);
+    operands[4] = gen_reg_rtx (DImode);
+  })
+
+(define_expand "smuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (truncate:DI
+         (ashiftrt:TI 
+          (mult:TI (sign_extend:TI (match_operand:DI 1 "reg_or_0_operand" ""))
+                   (sign_extend:TI (match_operand:DI 2 "reg_or_0_operand" "")))
+          (const_int 64))))]
+  ""
+  {
+    tilegx_expand_smuldi3_highpart (operands[0], operands[1], operands[2]);
+    DONE;
+  })
+
+(define_expand "umuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI (match_operand:DI 1 "reg_or_0_operand" ""))
+		   (zero_extend:TI (match_operand:DI 2 "reg_or_0_operand" "")))
+	  (const_int 64))))]
+  ""
+{
+  tilegx_expand_umuldi3_highpart (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+
+;;
+;; Divide stubs.  These exist to work around a bug in expmed.c, which
+;; will not attempt to convert a divide by constant into a multiply
+;; unless there is a pattern for a divide of the same mode.  The end
+;; result is a 32-bit divide turns into 64-bit multiply.
+;;
+
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (div:SI (match_operand:SI 1 "reg_or_0_operand" "")
+                (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  FAIL;
+})
+
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (udiv:SI (match_operand:SI 1 "reg_or_0_operand" "")
+                 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  FAIL;
+})
+
+
+;;
+;; Loops
+;;
+
+;; Define the subtract-one-and-jump insns so loop.c knows what to
+;; generate.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))    ;; loop pseudo
+   (use (match_operand 1 "" ""))]   ;; label
+   ""
+{
+  if (optimize > 0 && flag_modulo_sched)
+  {
+     rtx s0;
+     rtx bcomp;
+     rtx loc_ref;
+     enum machine_mode mode = GET_MODE (operands[0]);
+
+     /* only deal with loop counters in SImode or DImode  */
+     if (mode != SImode && mode != DImode)
+       FAIL;
+
+     s0 = operands [0];
+     emit_move_insn (s0, gen_rtx_PLUS (mode, s0, GEN_INT (-1)));
+     bcomp = gen_rtx_NE(mode, s0, const0_rtx);
+     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
+     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                        loc_ref, pc_rtx)));
+     DONE;
+  }
+  else
+     FAIL;
+
+})
+
+;;
+;; Prologue/epilogue
+;;
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  tilegx_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+{
+  tilegx_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+{
+  tilegx_expand_epilogue (true);
+  DONE;
+})
+ 
+;;
+;; Stack manipulations
+;;
+
+;; An insn to allocate new stack space for dynamic use (e.g., alloca).
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "register_operand" "")
+	(minus (reg 54) (match_operand 1 "nonmemory_operand" "")))
+   (set (reg 54)
+	(minus (reg 54) (match_dup 1)))]
+  ""
+  "tilegx_allocate_stack (operands[0], operands[1]); DONE;")
+
+;;
+;; Branches
+;;
+
+(define_expand "call"
+  [(parallel [(call (match_operand:DI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+              (use (reg:DI 54))
+	      (clobber (reg:DI 55))])]
+  ""
+{
+  rtx orig_addr = XEXP (operands[0], 0);
+  rtx addr;
+  if (GET_CODE (orig_addr) == SYMBOL_REF)
+    {
+      if (tilegx_cmodel == CM_LARGE)
+        {
+          addr = gen_reg_rtx (Pmode);
+          tilegx_expand_set_const64 (addr, orig_addr);
+          operands[0] = gen_rtx_MEM (DImode, addr);
+        }
+      else if (tilegx_cmodel == CM_LARGE_PIC)
+        {
+          crtl->uses_pic_offset_table = 1;
+          addr = gen_reg_rtx (Pmode);
+	  if (SYMBOL_REF_LOCAL_P (orig_addr))
+	    tilegx_compute_pcrel_address (addr, orig_addr);
+	  else
+	    tilegx_compute_pcrel_plt_address (addr, orig_addr);
+          operands[0] = gen_rtx_MEM (DImode, addr);
+        }
+    }
+})
+
+(define_insn "*call_insn"
+  [(call (mem:DI (match_operand:I48MODE 0 "call_address_operand" "rO,i"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 54))
+   (clobber (reg:DI 55))]
+  ""
+  "@
+   jalr\t%r0
+   jal\t%p0"
+  [(set_attr "type" "Y1,X1")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (call (match_operand:DI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+              (use (reg:DI 54))
+	      (clobber (reg:DI 55))])]
+  ""
+{
+  rtx orig_addr = XEXP (operands[1], 0);
+  rtx addr;
+  if (GET_CODE (orig_addr) == SYMBOL_REF)
+    {
+      if (tilegx_cmodel == CM_LARGE)
+        {
+          addr = gen_reg_rtx (Pmode);
+          tilegx_expand_set_const64 (addr, orig_addr);
+          operands[1] = gen_rtx_MEM (DImode, addr);
+        }
+      else if (tilegx_cmodel == CM_LARGE_PIC)
+        {
+          crtl->uses_pic_offset_table = 1;
+          addr = gen_reg_rtx (Pmode);
+	  if (SYMBOL_REF_LOCAL_P (orig_addr))
+	    tilegx_compute_pcrel_address (addr, orig_addr);
+	  else
+	    tilegx_compute_pcrel_plt_address (addr, orig_addr);
+          operands[1] = gen_rtx_MEM (DImode, addr);
+        }
+    }
+})
+
+(define_insn "*call_value_insn"
+  [(set (match_operand 0 "register_operand" "=r,r")
+	(call (mem:DI (match_operand:I48MODE 1 "call_address_operand" "rO,i"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 54))
+   (clobber (reg:DI 55))]
+  ""
+  "@
+   jalr\t%r1
+   jal\t%p1"
+  [(set_attr "type" "Y1,X1")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand:DI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	      (use (reg:DI 54))])]
+  ""
+  "")
+
+(define_insn "*sibcall_insn"
+  [(call (mem:DI (match_operand:I48MODE 0 "call_address_operand" "rO,i"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 54))]
+  "SIBLING_CALL_P(insn)"
+  "@
+   jr\t%r0
+   j\t%p0"
+  [(set_attr "type" "Y1,X1")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:DI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	      (use (reg:DI 54))])]
+  ""
+  "")
+
+(define_insn "*sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:I48MODE 1 "call_address_operand" "rO,i"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 54))]
+  "SIBLING_CALL_P(insn)"
+  "@
+   jr\t%r1
+   j\t%p1"
+  [(set_attr "type" "Y1,X1")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "j\t%l0"
+  [(set_attr "type" "X1")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand 0 "pointer_operand" "rO"))]
+  ""
+  "jr\t%r0"
+  [(set_attr "type" "Y1")])
+
+(define_expand "return"
+  [(parallel
+    [(return)
+     (use (reg:DI 55))])]
+  "tilegx_can_use_return_insn_p ()"
+  "")
+
+(define_insn "_return"
+  [(return)
+   (use (reg:DI 55))]
+  "reload_completed"
+  "jrp\tlr"
+  [(set_attr "type" "Y1")])
+
+(define_expand "tablejump"
+  [(set (pc) (match_operand 0 "pointer_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  tilegx_expand_tablejump (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "tablejump_aux"
+  [(set (pc) (match_operand 0 "pointer_operand" "rO"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jr\t%r0"
+  [(set_attr "type" "Y1")])
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers
+;; and all of memory.  This blocks insns from being moved across this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  "pseudo"
+  [(set_attr "type" "nothing")
+   (set_attr "length" "0")])
+
+;; Internal expanders to prevent memory ops from moving around frame
+;; allocation/deallocation.
+;;
+;; TODO: really this clobber should just clobber the frame memory.  Is
+;; this possibly by clobbering memory @ the sp reg (as alpha does?)
+;; or by explicitly setting the alias set to the frame?
+(define_insn "sp_adjust"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+        (plus:DI
+         (match_operand:DI 1 "register_operand" "%r,r,r")
+         (match_operand:DI 2 "add_operand" "r,I,JT")))
+   (clobber (mem:BLK (scratch)))]
+ ""
+ "@
+  add\t%0, %1, %2
+  addi\t%0, %1, %2
+  addli\t%0, %1, %H2"
+ [(set_attr "type" "*,*,X01")])
+
+(define_insn "sp_adjust_32bit"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+        (plus:SI
+         (match_operand:SI 1 "register_operand" "%r,r,r")
+         (match_operand:SI 2 "add_operand" "r,I,JT")))
+   (clobber (mem:BLK (scratch)))]
+ ""
+ "@
+  addx\t%0, %1, %2
+  addxi\t%0, %1, %2
+  addxli\t%0, %1, %H2"
+ [(set_attr "type" "*,*,X01")])
+
+;; Used for move sp, r52, to pop a stack frame.  We need to make sure
+;; that stack frame memory operations have been issued before we do
+;; this.  TODO: see above TODO.
+(define_insn "sp_restore<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (match_operand:I48MODE 1 "register_operand" "r"))
+   (clobber (mem:BLK (scratch)))]
+ ""
+ "move\t%0, %1")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "Y01")])
+
+
+;;
+;; Conditional branches
+;;
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+      (if_then_else (match_operator 0 "ordered_comparison_operator"
+                     [(match_operand:FI48MODE 1 "reg_or_cint_operand")
+                        (match_operand:FI48MODE 2 "reg_or_cint_operand")])
+                      (label_ref (match_operand 3 ""))
+                    (pc)))]
+   ""
+{
+  tilegx_emit_conditional_branch (operands, GET_MODE (operands[1]));
+  DONE;
+})
+
+(define_insn "*bcc_normal<mode>"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:I48MODE 2 "reg_or_0_operand" "rO")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  { return tilegx_output_cbranch (insn, operands, false); }
+  [(set_attr "type" "X1_branch")])
+
+(define_insn "*bcc_reverse<mode>"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:I48MODE 2 "reg_or_0_operand" "rO")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  ""
+  { return tilegx_output_cbranch (insn, operands, true); }
+  [(set_attr "type" "X1_branch")])
+
+(define_insn "*blbs_normal<mode>"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:I48MODE
+              (match_operand:I48MODE 1 "reg_or_0_operand" "rO")
+              (const_int 1)
+              (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  { return tilegx_output_cbranch_with_opcode (insn, operands, "blbs", "blbc",
+					      1); }
+  [(set_attr "type" "X1_branch")])
+
+(define_insn "*blbc_normal<mode>"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:I48MODE
+              (match_operand:I48MODE 1 "reg_or_0_operand" "rO")
+              (const_int 1)
+              (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  { return tilegx_output_cbranch_with_opcode (insn, operands, "blbc", "blbs",
+					      1); }
+  [(set_attr "type" "X1_branch")])
+
+;; Note that __insn_mf() expands to this.
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec_volatile:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+  "mf"
+  [(set_attr "type" "X1")])
+
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "rO")
+             (match_operand 1 "const_int_operand" "")
+             (match_operand 2 "const_int_operand" ""))]
+  ""
+{
+  switch (INTVAL (operands[2]))
+    {
+      case 0:
+      case 1: return "prefetch_l3\t%r0";
+      case 2: return "prefetch_l2\t%r0";
+      case 3: return "prefetch_l1\t%r0";
+      default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "Y2")])
+
+
+;;
+;; "__insn" Intrinsics (some expand directly to normal patterns above).
+;;
+
+(define_insn "insn_bfexts"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "u6bit_cint_operand" "n")
+                    (match_operand:DI 3 "u6bit_cint_operand" "n")]
+                   UNSPEC_INSN_BFEXTS))]
+  ""
+  "bfexts\t%0, %r1, %2, %3"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_bfextu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "u6bit_cint_operand" "n")
+                    (match_operand:DI 3 "u6bit_cint_operand" "n")]
+                   UNSPEC_INSN_BFEXTU))]
+  ""
+  "bfextu\t%0, %r1, %2, %3"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_bfins"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "u6bit_cint_operand" "n")
+                    (match_operand:DI 4 "u6bit_cint_operand" "n")]
+                   UNSPEC_INSN_BFINS))]
+   ""
+   "bfins\t%0, %r2, %3, %4"
+   [(set_attr "type" "X0")])
+
+(define_insn "insn_cmpexch<four_if_si>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (mem:I48MODE (match_operand 1 "pointer_operand" "rO")))
+   (set (mem:I48MODE (match_dup 1))
+        (unspec_volatile:I48MODE
+	 [(mem:I48MODE (match_dup 1))
+	  (reg:I48MODE TILEGX_CMPEXCH_REG)
+	  (match_operand:I48MODE 2 "reg_or_0_operand" "rO")]
+	 UNSPEC_INSN_CMPEXCH))]
+  ""
+  "cmpexch<four_if_si>\t%0, %r1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+(define_insn "insn_cmul"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMUL))]
+  ""
+  "cmul\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_cmula"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMULA))]
+  ""
+  "cmula\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_cmulaf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMULAF))]
+  ""
+  "cmulaf\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_cmulf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMULF))]
+  ""
+  "cmulf\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_cmulfr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMULFR))]
+  ""
+  "cmulfr\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_cmulh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMULH))]
+  ""
+  "cmulh\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_cmulhr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CMULHR))]
+  ""
+  "cmulhr\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_crc32_32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CRC32_32))]
+  ""
+  "crc32_32\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_crc32_8"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_CRC32_8))]
+  ""
+  "crc32_8\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_dblalign"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand 3 "pointer_operand" "rO")]
+                   UNSPEC_INSN_DBLALIGN))]
+  ""
+  "dblalign\t%0, %r2, %r3"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_dblalign2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_DBLALIGN2))]
+  ""
+  "dblalign2\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_dblalign4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_DBLALIGN4))]
+  ""
+  "dblalign4\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_dblalign6"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_DBLALIGN6))]
+  ""
+  "dblalign6\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_dtlbpr"
+  [(unspec_volatile:VOID [(match_operand:DI 0 "reg_or_0_operand" "rO")]
+                         UNSPEC_INSN_DTLBPR)]
+  ""
+  "dtlbpr\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_exch<four_if_si>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (mem:I48MODE (match_operand 1 "pointer_operand" "rO")))
+   (set (mem:I48MODE (match_dup 1))
+	(unspec_volatile:I48MODE
+	 [(match_operand:I48MODE 2 "reg_or_0_operand" "rO")]
+	 UNSPEC_INSN_EXCH))]
+  ""
+  "exch<four_if_si>\t%0, %r1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+(define_insn "insn_fdouble_add_flags"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_ADD_FLAGS))]
+  ""
+  "fdouble_add_flags\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_addsub"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_ADDSUB))]
+  ""
+  "fdouble_addsub\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_mul_flags"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_MUL_FLAGS))]
+  ""
+  "fdouble_mul_flags\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_pack1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_PACK1))]
+  ""
+  "fdouble_pack1\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_pack2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_PACK2))]
+  ""
+  "fdouble_pack2\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_sub_flags"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_SUB_FLAGS))]
+  ""
+  "fdouble_sub_flags\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_unpack_max"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_UNPACK_MAX))]
+  ""
+  "fdouble_unpack_max\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fdouble_unpack_min"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FDOUBLE_UNPACK_MIN))]
+  ""
+  "fdouble_unpack_min\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fetchadd<four_if_si>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (unspec_volatile:I48MODE
+	 [(mem:I48MODE (match_operand 1 "pointer_operand" "rO"))]
+	 UNSPEC_ATOMIC))
+   (set (mem:I48MODE (match_dup 1))
+        (plus:I48MODE (mem:I48MODE (match_dup 1))
+                      (match_operand:I48MODE 2 "reg_or_0_operand" "rO")))]
+  ""
+  "fetchadd<four_if_si>\t%0, %r1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+(define_insn "insn_fetchaddgez<four_if_si>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (unspec_volatile:I48MODE
+	 [(mem:I48MODE (match_operand 1 "pointer_operand" "rO"))]
+	 UNSPEC_ATOMIC))
+   (set (mem:I48MODE (match_dup 1))
+        (unspec:I48MODE [(match_operand:I48MODE 2 "reg_or_0_operand" "rO")
+                         (mem:I48MODE (match_dup 1))]
+                        UNSPEC_INSN_FETCHADDGEZ))]
+  ""
+  "fetchaddgez<four_if_si>\t%0, %r1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+(define_insn "insn_fetchand<four_if_si>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (unspec_volatile:I48MODE
+	 [(mem:I48MODE (match_operand 1 "pointer_operand" "rO"))]
+	 UNSPEC_ATOMIC))
+   (set (mem:I48MODE (match_dup 1))
+        (and:I48MODE (mem:I48MODE (match_dup 1))
+                     (match_operand:I48MODE 2 "reg_or_0_operand" "rO")))]
+  ""
+  "fetchand<four_if_si>\t%0, %r1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+(define_insn "insn_fetchor<four_if_si>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (unspec_volatile:I48MODE
+	 [(mem:I48MODE (match_operand 1 "pointer_operand" "rO"))]
+	 UNSPEC_ATOMIC))
+   (set (mem:I48MODE (match_dup 1))
+        (ior:I48MODE (mem:I48MODE (match_dup 1))
+                     (match_operand:I48MODE 2 "reg_or_0_operand" "rO")))]
+  ""
+  "fetchor<four_if_si>\t%0, %r1, %r2"
+  [(set_attr "type" "X1_remote")])
+
+(define_insn "insn_finv"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_FINV)]
+  ""
+  "finv\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_flush"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_FLUSH)]
+  ""
+  "flush\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_flushwb"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_FLUSHWB)]
+  ""
+  "flushwb"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_fnop"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_FNOP)]
+  ""
+  "fnop")
+
+(define_insn "insn_fsingle_add1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_ADD1))]
+  ""
+  "fsingle_add1\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_fsingle_addsub2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_ADDSUB2))]
+  ""
+  "fsingle_addsub2\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fsingle_mul1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_MUL1))]
+  ""
+  "fsingle_mul1\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_fsingle_mul2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_MUL2))]
+  ""
+  "fsingle_mul2\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fsingle_pack1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_PACK1))]
+  ""
+  "fsingle_pack1\t%0, %r1"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_fsingle_pack2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_PACK2))]
+  ""
+  "fsingle_pack2\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_fsingle_sub1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_FSINGLE_SUB1))]
+  ""
+  "fsingle_sub1\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_drain"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_DRAIN)]
+  ""
+  "drain"
+  [(set_attr "type" "cannot_bundle")])
+
+(define_insn "insn_icoh"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")] 
+                         UNSPEC_INSN_ICOH)]
+  ""
+  "icoh\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_ill"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_ILL)]
+  ""
+  "ill"
+  [(set_attr "type" "cannot_bundle")])
+
+(define_insn "insn_info"
+  [(unspec_volatile:VOID [(match_operand:DI 0 "s8bit_cint_operand" "i")]
+                         UNSPEC_INSN_INFO)]
+  ""
+  "info\t%0")
+
+(define_insn "insn_infol"
+  [(unspec_volatile:VOID [(match_operand:DI 0 "s16bit_cint_operand" "i")]
+                         UNSPEC_INSN_INFOL)]
+  ""
+  "infol\t%0"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_inv"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_INV)]
+  ""
+  "inv\t%r0"
+  [(set_attr "type" "X1")])
+
+;; loads
+
+(define_expand "insn_ld"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mem:DI (match_operand 1 "pointer_operand" "")))]
+  "")
+
+(define_insn "insn_ld_add<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (mem:DI (match_dup 3)))]
+  ""
+  "ld_add\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_ldna"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mem:DI (and:DI (match_operand 1 "pointer_operand" "rO")
+                        (const_int -8))))]
+  ""
+  "ldna\t%0, %r1"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_ldna_add<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (mem:DI (and:DI (match_dup 3) (const_int -8))))]
+  ""
+  "ldna_add\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_expand "insn_ld<n><s>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(any_extend:DI
+	 (mem:I124MODE (match_operand 1 "pointer_operand" ""))))]
+  "")
+
+(define_insn "insn_ld<I124MODE:n><s>_add<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (any_extend:DI (mem:I124MODE (match_dup 3))))]
+  ""
+  "ld<I124MODE:n><s>_add\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+;; non temporal loads
+
+(define_insn "insn_ldnt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand 1 "pointer_operand" "rO"))]
+                   UNSPEC_NON_TEMPORAL))]
+  ""
+  "ldnt\t%0, %r1"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_ldnt_add<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(mem:DI (match_dup 3))]
+                   UNSPEC_NON_TEMPORAL))]
+  ""
+  "ldnt_add\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_ldnt<n><s>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_extend:DI
+	 (unspec:I124MODE
+	  [(mem:I124MODE (match_operand 1 "pointer_operand" "rO"))]
+	  UNSPEC_NON_TEMPORAL)))]
+  ""
+  "ldnt<n><s>\t%0, %r1"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_ldnt<I124MODE:n><s>_add<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (any_extend:DI (unspec:I124MODE [(mem:I124MODE (match_dup 3))]
+					UNSPEC_NON_TEMPORAL)))]
+  ""
+  "ldnt<I124MODE:n><s>_add\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+;; L2 hits
+
+(define_insn "insn_ld_L2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand 1 "pointer_operand" "rO"))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "ld\t%0, %r1"
+  [(set_attr "type" "Y2_L2")])
+
+(define_insn "insn_ld_add_L2<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(mem:DI (match_dup 3))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "ld_add\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_ldna_L2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (and:DI (match_operand 1 "pointer_operand" "rO")
+				    (const_int -8)))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "ldna\t%0, %r1"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_ldna_add_L2<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(mem:DI (and:DI (match_dup 3) (const_int -8)))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "ldna_add\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_ld<n><s>_L2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_extend:DI 
+	 (unspec:I124MODE
+	  [(mem:I124MODE (match_operand 1 "pointer_operand" "rO"))]
+	  UNSPEC_LATENCY_L2)))]
+  ""
+  "ld<n><s>\t%0, %r1"
+  [(set_attr "type" "Y2_L2")])
+
+(define_insn "insn_ld<I124MODE:n><s>_add_L2<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (any_extend:DI (unspec:I124MODE [(mem:I124MODE (match_dup 3))]
+					UNSPEC_LATENCY_L2)))]
+  ""
+  "ld<I124MODE:n><s>_add\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+;; L2 hits, non temporal loads
+
+(define_insn "insn_ldnt_L2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(unspec:DI
+                     [(mem:DI (match_operand 1 "pointer_operand" "rO"))]
+                     UNSPEC_NON_TEMPORAL)]
+                   UNSPEC_LATENCY_L2))]
+  ""
+  "ldnt\t%0, %r1"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_ldnt_add_L2<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(unspec:DI
+                     [(mem:DI (match_dup 3))]
+                     UNSPEC_NON_TEMPORAL)]
+                   UNSPEC_LATENCY_L2))]
+                   ""
+  "ldnt_add\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_ldnt<n><s>_L2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_extend:DI
+	 (unspec:I124MODE
+	  [(unspec:I124MODE
+	    [(mem:I124MODE (match_operand 1 "pointer_operand" "rO"))]
+	    UNSPEC_NON_TEMPORAL)]
+	  UNSPEC_LATENCY_L2)))]
+  ""
+  "ldnt<n><s>\t%0, %r1"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_ldnt<I124MODE:n><s>_add_L2<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (any_extend:DI
+	 (unspec:I124MODE [(unspec:I124MODE
+			    [(mem:I124MODE (match_dup 3))]
+			    UNSPEC_NON_TEMPORAL)]
+			  UNSPEC_LATENCY_L2)))]
+  ""
+  "ldnt<I124MODE:n><s>_add\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+;; L2 miss
+
+(define_insn "insn_ld_miss"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand 1 "pointer_operand" "rO"))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "ld\t%0, %r1"
+  [(set_attr "type" "Y2_miss")])
+
+(define_insn "insn_ld_add_miss<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(mem:DI (match_dup 3))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "ld_add\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_ldna_miss"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (and:DI (match_operand 1 "pointer_operand" "rO")
+				    (const_int -8)))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "ldna\t%0, %r1"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_ldna_add_miss<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(mem:DI (and:DI (match_dup 3) (const_int -8)))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "ldna_add\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_ld<n><s>_miss"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_extend:DI 
+	 (unspec:I124MODE
+	  [(mem:I124MODE (match_operand 1 "pointer_operand" "rO"))]
+	  UNSPEC_LATENCY_MISS)))]
+  ""
+  "ld<n><s>\t%0, %r1"
+  [(set_attr "type" "Y2_miss")])
+
+(define_insn "insn_ld<I124MODE:n><s>_add_miss<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (any_extend:DI (unspec:I124MODE [(mem:I124MODE (match_dup 3))]
+					UNSPEC_LATENCY_MISS)))]
+  ""
+  "ld<I124MODE:n><s>_add\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+;; L2 miss, non temporal loads
+
+(define_insn "insn_ldnt_miss"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(unspec:DI
+                     [(mem:DI (match_operand 1 "pointer_operand" "rO"))]
+                     UNSPEC_NON_TEMPORAL)]
+                   UNSPEC_LATENCY_MISS))]
+  ""
+  "ldnt\t%0, %r1"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_ldnt_add_miss<bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(unspec:DI
+                     [(mem:DI (match_dup 3))]
+                     UNSPEC_NON_TEMPORAL)]
+                   UNSPEC_LATENCY_MISS))]
+                   ""
+  "ldnt_add\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_ldnt<n><s>_miss"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_extend:DI
+	 (unspec:I124MODE
+	  [(unspec:I124MODE
+	    [(mem:I124MODE (match_operand 1 "pointer_operand" "rO"))]
+	    UNSPEC_NON_TEMPORAL)]
+	  UNSPEC_LATENCY_MISS)))]
+  ""
+  "ldnt<n><s>\t%0, %r1"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_ldnt<I124MODE:n><s>_add_miss<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 1 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "1")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (any_extend:DI
+	 (unspec:I124MODE [(unspec:I124MODE
+                      [(mem:I124MODE (match_dup 3))]
+                      UNSPEC_NON_TEMPORAL)]
+                    UNSPEC_LATENCY_MISS)))]
+  ""
+  "ldnt<I124MODE:n><s>_add\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+;; end loads
+
+(define_insn "insn_lnk"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(const_int 0)] UNSPEC_INSN_LNK))]
+  ""
+  "lnk\t%0"
+  [(set_attr "type" "Y1")])
+
+(define_insn "insn_mfspr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec_volatile:DI [(match_operand:DI 1 "u14bit_cint_operand" "i")]
+                            UNSPEC_INSN_MFSPR))
+   (clobber (mem:BLK (const_int 0)))]
+  ""
+  "mfspr\t%0, %1"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_mtspr"
+  [(unspec_volatile:DI [(match_operand:DI 0 "u14bit_cint_operand" "i")
+                        (match_operand:DI 1 "reg_or_0_operand" "rO")]
+                       UNSPEC_INSN_MTSPR)
+   (clobber (mem:BLK (const_int 0)))]
+  ""
+  "mtspr\t%0, %r1"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_mm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "u6bit_cint_operand" "i")
+                    (match_operand:DI 4 "u6bit_cint_operand" "i")]
+                   UNSPEC_INSN_MM))]
+  ""
+  "mm\t%0, %r2, %3, %4"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_mul_hs_hs"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HS_HS))]
+  ""
+  "mul_hs_hs\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mul_hs_hu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HS_HU))]
+  ""
+  "mul_hs_hu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mul_hs_ls"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HS_LS))]
+  ""
+  "mul_hs_ls\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mul_hs_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HS_LU))]
+  ""
+  "mul_hs_lu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mul_hu_hu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HU_HU))]
+  ""
+  "mul_hu_hu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mul_hu_ls"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HU_LS))]
+  ""
+  "mul_hu_ls\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mul_hu_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_HU_LU))]
+  ""
+  "mul_hu_lu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mul_ls_ls"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_LS_LS))]
+  ""
+  "mul_ls_ls\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mul_ls_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_LS_LU))]
+  ""
+  "mul_ls_lu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mul_lu_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MUL_LU_LU))]
+  ""
+  "mul_lu_lu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mula_hs_hs"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HS_HS))]
+  ""
+  "mula_hs_hs\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mula_hs_hu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HS_HU))]
+  ""
+  "mula_hs_hu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mula_hs_ls"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HS_LS))]
+  ""
+  "mula_hs_ls\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mula_hs_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HS_LU))]
+  ""
+  "mula_hs_lu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mula_hu_hu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HU_HU))]
+  ""
+  "mula_hu_hu\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mula_hu_ls"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HU_LS))]
+  ""
+  "mula_hu_ls\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mula_hu_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_HU_LU))]
+  ""
+  "mula_hu_lu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mula_ls_ls"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_LS_LS))]
+  ""
+  "mula_ls_ls\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mula_ls_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_LS_LU))]
+  ""
+  "mula_ls_lu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mula_lu_lu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULA_LU_LU))]
+  ""
+  "mula_lu_lu\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulax"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_MULAX))]
+  ""
+  "mulax\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_nap"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_NAP)]
+  ""
+  "nap"
+  [(set_attr "type" "cannot_bundle")])
+
+(define_insn "insn_nor_<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(and:I48MODE
+         (not:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rO"))
+         (not:I48MODE (match_operand:I48MODE 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "nor\t%0, %r1, %r2")
+
+(define_expand "insn_prefetch_l1"
+  [(prefetch (match_operand 0 "pointer_operand" "")
+             (const_int 0)
+             (const_int 3))]
+  "")
+
+(define_expand "insn_prefetch_l2"
+  [(prefetch (match_operand 0 "pointer_operand" "")
+             (const_int 0)
+             (const_int 2))]
+  "")
+
+(define_expand "insn_prefetch_l3"
+  [(prefetch (match_operand 0 "pointer_operand" "")
+             (const_int 0)
+             (const_int 1))]
+  "")
+
+(define_insn "insn_prefetch_l1_fault"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_PREFETCH_L1_FAULT)]
+  ""
+  "prefetch_l1_fault\t%r0"
+  [(set_attr "type" "Y2")])
+
+(define_insn "insn_prefetch_l2_fault"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_PREFETCH_L2_FAULT)]
+  ""
+  "prefetch_l2_fault\t%r0"
+  [(set_attr "type" "Y2")])
+
+(define_insn "insn_prefetch_l3_fault"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_PREFETCH_L3_FAULT)]
+  ""
+  "prefetch_l3_fault\t%r0"
+  [(set_attr "type" "Y2")])
+
+(define_insn "insn_revbits"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_REVBITS))]
+  ""
+  "revbits\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_shl1add"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (mult:DI (match_operand:DI 1 "reg_or_0_operand" "rO")
+                          (const_int 2))
+                 (match_operand:DI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "shl1add\t%0, %r1, %r2")
+
+(define_insn "insn_shl1addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                          (const_int 2))
+                 (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "shl1addx\t%0, %r1, %r2")
+
+(define_insn "insn_shl2add"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (mult:DI (match_operand:DI 1 "reg_or_0_operand" "rO")
+                          (const_int 4))
+                 (match_operand:DI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "shl2add\t%0, %r1, %r2")
+
+(define_insn "insn_shl2addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                          (const_int 4))
+                 (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "shl2addx\t%0, %r1, %r2")
+
+(define_insn "insn_shl3add"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (mult:DI (match_operand:DI 1 "reg_or_0_operand" "rO")
+                          (const_int 8))
+                 (match_operand:DI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "shl3add\t%0, %r1, %r2")
+
+(define_insn "insn_shl3addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                          (const_int 8))
+                 (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "shl3addx\t%0, %r1, %r2")
+
+(define_insn "insn_shufflebytes"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_SHUFFLEBYTES))]
+  ""
+  "shufflebytes\t%0, %r2, %r3"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_shufflebytes1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_SHUFFLEBYTES))]
+  ""
+  "shufflebytes\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+;; stores
+
+(define_expand "insn_st"
+  [(set (mem:DI (match_operand 0 "pointer_operand" ""))
+        (match_operand:DI 1 "reg_or_0_operand" ""))]
+  "")
+
+(define_insn "insn_st_add<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "0")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (mem:DI (match_dup 3))
+        (match_operand:DI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "st_add\t%0, %r1, %2"
+  [(set_attr "type" "X1")])
+
+(define_expand "insn_st<n>"
+  [(set (mem:I124MODE (match_operand 0 "pointer_operand" ""))
+        (match_operand:DI 1 "reg_or_0_operand" ""))]
+  ""
+{
+  operands[1] = simplify_gen_subreg (<MODE>mode, operands[1], DImode,
+                                     BYTES_BIG_ENDIAN
+				     ? UNITS_PER_WORD - <n>  : 0);
+})
+
+(define_expand "insn_st<I124MODE:n>_add<I48MODE:bitsuffix>"
+  [(parallel
+    [(set (match_operand:I48MODE 0 "register_operand" "")
+	  (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "")
+			(match_operand:I48MODE 2 "s8bit_cint_operand" "")))
+     (set (mem:I124MODE (match_dup 3))
+	  (match_operand:DI 1 "reg_or_0_operand" ""))])]
+  ""
+{
+  operands[1] = simplify_gen_subreg (<I124MODE:MODE>mode, operands[1],
+				     DImode,
+				     BYTES_BIG_ENDIAN
+				     ? UNITS_PER_WORD - <I124MODE:n> : 0);
+})
+
+(define_insn "*insn_st<I124MODE:n>_add<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "0")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (mem:I124MODE (match_dup 3))
+        (match_operand:I124MODE 1 "reg_or_0_operand" "rO"))]
+  ""
+  "st<I124MODE:n>_add\t%0, %r1, %2"
+  [(set_attr "type" "X1")])
+
+;; non-temporal stores
+
+(define_insn "insn_stnt"
+  [(set (mem:DI (unspec [(match_operand 0 "pointer_operand" "rO")]
+			UNSPEC_NON_TEMPORAL))
+        (match_operand:DI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "stnt\t%0, %r1"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_stnt_add<bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "0")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (mem:DI (unspec:I48MODE [(match_dup 3)] UNSPEC_NON_TEMPORAL))
+        (match_operand:DI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "stnt_add\t%0, %r1, %2"
+  [(set_attr "type" "X1")])
+
+(define_expand "insn_stnt<n>"
+  [(set (mem:I124MODE (unspec [(match_operand 0 "pointer_operand" "")]
+			      UNSPEC_NON_TEMPORAL))
+        (match_operand:DI 1 "reg_or_0_operand" ""))]
+  ""
+{
+  operands[1] = simplify_gen_subreg (<MODE>mode, operands[1], DImode,
+                                     BYTES_BIG_ENDIAN
+				     ? UNITS_PER_WORD - <n> : 0);
+})
+
+(define_insn "*insn_stnt<n>"
+  [(set (mem:I124MODE (unspec [(match_operand 0 "pointer_operand" "rO")]
+			      UNSPEC_NON_TEMPORAL))
+	(match_operand:I124MODE 1 "reg_or_0_operand" "rO"))]
+  ""
+  "stnt<n>\t%0, %r1"
+  [(set_attr "type" "X1")])
+
+(define_expand "insn_stnt<I124MODE:n>_add<I48MODE:bitsuffix>"
+  [(parallel
+    [(set (match_operand:I48MODE 0 "register_operand" "")
+	  (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "")
+			(match_operand:I48MODE 2 "s8bit_cint_operand" "")))
+     (set (mem:I124MODE (unspec:I48MODE [(match_dup 3)] UNSPEC_NON_TEMPORAL))
+	  (match_operand:DI 1 "reg_or_0_operand" "rO"))])]
+  ""
+{
+  operands[1] = simplify_gen_subreg (<I124MODE:MODE>mode, operands[1],
+				     DImode,
+				     BYTES_BIG_ENDIAN
+				     ? UNITS_PER_WORD - <n> : 0);
+})
+
+(define_insn "*insn_stnt<I124MODE:n>_add<I48MODE:bitsuffix>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+        (plus:I48MODE (match_operand:I48MODE 3 "register_operand" "0")
+		      (match_operand:I48MODE 2 "s8bit_cint_operand" "i")))
+   (set (mem:I124MODE (unspec:I48MODE [(match_dup 3)] UNSPEC_NON_TEMPORAL))
+        (match_operand:I124MODE 1 "reg_or_0_operand" "rO"))]
+  ""
+  "stnt<I124MODE:n>_add\t%0, %r1, %2"
+  [(set_attr "type" "X1")])
+
+;; end stores
+
+(define_insn "insn_tblidxb0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_TBLIDXB0))]
+  ""
+  "tblidxb0\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_tblidxb1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_TBLIDXB1))]
+  ""
+  "tblidxb1\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_tblidxb2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_TBLIDXB2))]
+  ""
+  "tblidxb2\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_tblidxb3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_TBLIDXB3))]
+  ""
+  "tblidxb3\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+;; insn_v1add
+;; insn_v1addi
+;; insn_v1cmpeq
+;; insn_v1cmpeqi
+;; insn_v1cmplts
+;; insn_v1cmpltsi
+;; insn_v1cmpltu
+;; insn_v1cmpltui
+;; insn_v1maxu
+;; insn_v1maxui
+;; insn_v1minu
+;; insn_v1minui
+(define_insn "<optab>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r,r")
+	(v1op_immed:V8QI
+	 (match_operand:V8QI 1 "reg_or_0_operand" "<comm>rO,rO")
+	 (match_operand:V8QI 2 "reg_or_v8s8bit_operand" "W,rO")))]
+  ""
+  "@
+   v1<insn>i\t%0, %r1, %j2
+   v1<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>,<pipe>")])
+
+(define_expand "insn_v1<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v1op_immed:V8QI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v8qi3, V8QImode, operands[0],
+				      V8QImode, operands[1], operands[2], true);
+  DONE;
+})
+
+(define_expand "insn_v1<insn>i"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v1op_immed:V8QI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "s8bit_cint_operand" "")))]
+  ""
+{
+  /* Tile out immediate and expand to general case. */
+  rtx n = tilegx_simd_int (operands[2], QImode);
+  tilegx_expand_builtin_vector_binop (gen_<optab>v8qi3, V8QImode, operands[0],
+				      V8QImode, operands[1], n, true);
+  DONE;
+})
+
+;; insn_v1shl
+;; insn_v1shli
+;; insn_v1shrs
+;; insn_v1shrsi
+;; insn_v1shru
+;; insn_v1shrui
+(define_insn "<optab>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r,r")
+	(any_shift:V8QI
+	 (match_operand:V8QI 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:DI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+   v1<insn>i\t%0, %r1, %2
+   v1<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>,<pipe>")])
+
+(define_expand "insn_v1<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(any_shift:V8QI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_u5bit_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v8qi3, V8QImode, operands[0],
+				      V8QImode, operands[1], operands[2], false);
+  DONE;
+})
+
+;; insn_v2add
+;; insn_v2addi
+;; insn_v2maxs
+;; insn_v2maxsi
+;; insn_v2mins
+;; insn_v2minsi
+;; insn_v2cmpeq
+;; insn_v2cmpeqi
+;; insn_v2cmplts
+;; insn_v2cmpltsi
+;; insn_v2cmpltu
+;; insn_v2cmpltui
+(define_insn "<optab>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r,r")
+	(v2op_immed:V4HI
+	 (match_operand:V4HI 1 "reg_or_0_operand" "<comm>rO,rO")
+	 (match_operand:V4HI 2 "reg_or_v4s8bit_operand" "Y,rO")))]
+  ""
+  "@
+   v2<insn>i\t%0, %r1, %j2
+   v2<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>,<pipe>")])
+
+(define_expand "insn_v2<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v2op_immed:V4HI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v4hi3, V4HImode, operands[0],
+				      V4HImode, operands[1], operands[2], true);
+  DONE;
+})
+
+(define_expand "insn_v2<insn>i"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v2op_immed:V4HI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "s8bit_cint_operand" "")))]
+  ""
+{
+  /* Tile out immediate and expand to general case. */
+  rtx n = tilegx_simd_int (operands[2], HImode);
+  tilegx_expand_builtin_vector_binop (gen_<optab>v4hi3, V4HImode, operands[0],
+				      V4HImode, operands[1], n, true);
+  DONE;
+})
+
+;; insn_v2shl
+;; insn_v2shli
+;; insn_v2shrs
+;; insn_v2shrsi
+;; insn_v2shru
+;; insn_v2shrui
+(define_insn "<optab>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r,r")
+	(any_shift:V4HI
+	 (match_operand:V4HI 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:DI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+   v2<insn>i\t%0, %r1, %2
+   v2<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>,<pipe>")])
+
+(define_expand "insn_v2<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(any_shift:V4HI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_u5bit_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v4hi3, V4HImode, operands[0],
+				      V4HImode, operands[1], operands[2], false);
+  DONE;
+})
+
+;; insn_v1adduc
+;; insn_v1subuc
+;; insn_v1sub
+;; insn_v1cmpne
+;; insn_v1cmples
+;; insn_v1cmpleu
+;; insn_v1multu
+(define_insn "<optab>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(v1op:V8QI
+	 (match_operand:V8QI 1 "reg_or_0_operand" "<comm>rO")
+	 (match_operand:V8QI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v1<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>")])
+
+(define_expand "insn_v1<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v1op:V8QI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v8qi3, V8QImode, operands[0],
+				      V8QImode, operands[1], operands[2], true);
+  DONE;
+})
+
+;; insn_v2addsc
+;; insn_v2subsc
+;; insn_v2sub
+;; insn_v2cmpne
+;; insn_v2cmples
+;; insn_v2cmpleu
+(define_insn "<optab>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(v2op:V4HI
+	 (match_operand:V4HI 1 "reg_or_0_operand" "<comm>rO")
+	 (match_operand:V4HI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v2<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>")])
+
+(define_expand "insn_v2<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v2op:V4HI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v4hi3, V4HImode, operands[0],
+				      V4HImode, operands[1], operands[2], true);
+  DONE;
+})
+
+;; insn_v2mults
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(mult:V4HI
+	 (match_operand:V4HI 1 "reg_or_0_operand" "%rO")
+	 (match_operand:V4HI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v2mults\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_expand "insn_v2mults"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:V4HI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_mulv4hi3, V4HImode, operands[0],
+				      V4HImode, operands[1], operands[2], true);
+  DONE;
+})
+
+;; insn_v2shlsc
+(define_insn "<optab>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(v2shift:V4HI
+	 (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+	 (match_operand:DI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v2<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>")])
+
+(define_expand "insn_v2<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v2shift:V4HI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v4hi3, V4HImode, operands[0],
+				      V4HImode, operands[1], operands[2], false);
+  DONE;
+})
+
+;; insn_v4addsc
+;; insn_v4subsc
+;; insn_v4add
+;; insn_v4sub
+(define_insn "<optab>v2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(v4op:V2SI
+	 (match_operand:V2SI 1 "reg_or_0_operand" "<comm>rO")
+	 (match_operand:V2SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v4<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>")])
+
+(define_expand "insn_v4<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v4op:V2SI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v2si3, V2SImode, operands[0],
+				      V2SImode, operands[1], operands[2], true);
+  DONE;
+})
+
+;; insn_v4shl
+;; insn_v4shrs
+;; insn_v4shru
+;; insn_v4shlsc
+(define_insn "<optab>v2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(v4shift:V2SI
+	 (match_operand:V2SI 1 "reg_or_0_operand" "rO")
+	 (match_operand:DI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v4<insn>\t%0, %r1, %r2"
+  [(set_attr "type" "<pipe>")])
+
+(define_expand "insn_v4<insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(v4shift:V2SI
+	 (match_operand:DI 1 "reg_or_0_operand" "")
+	 (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_<optab>v2si3, V2SImode, operands[0],
+				      V2SImode, operands[1], operands[2], false);
+  DONE;
+})
+
+;; Byte ordering of these vectors is endian dependent.  gcc concats
+;; right-to-left for little endian, and left-to-right for big endian.
+;; So we need different patterns that depend on endianness.  Our
+;; instructions concat and interleave the way a big-endian target would
+;; work in gcc, so for little endian, we need to reverse the source
+;; operands.
+
+;; insn_v1int_h
+;;    {B7,B6,B5,B4,B3,B2,B1,B0} {A7,A6,A5,A4,A3,A2,A1,A0}
+;; => {A7,A6,A5,A4,A3,A2,A1,A0,B7,B6,B5,B4,B3,B2,B1,B0}
+;; => {A7,B7,A6,B6,A5,B5,A4,B4}
+(define_expand "vec_interleave_highv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "reg_or_0_operand" "")
+   (match_operand:V8QI 2 "reg_or_0_operand" "")]
+  ""
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_highv8qi_be (operands[0], operands[1],
+					       operands[2]));
+  else
+    emit_insn (gen_vec_interleave_highv8qi_le (operands[0], operands[1],
+					       operands[2]));
+  DONE;
+})
+
+(define_insn "vec_interleave_highv8qi_be"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_select:V8QI
+	 (vec_concat:V16QI (match_operand:V8QI 1 "reg_or_0_operand" "rO")
+			   (match_operand:V8QI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 8)
+		    (const_int 1) (const_int 9)
+		    (const_int 2) (const_int 10)
+		    (const_int 3) (const_int 11)])))]
+  "BYTES_BIG_ENDIAN"
+  "v1int_h\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "vec_interleave_highv8qi_le"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_select:V8QI
+	 (vec_concat:V16QI (match_operand:V8QI 1 "reg_or_0_operand" "rO")
+			   (match_operand:V8QI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 4) (const_int 12) 
+		    (const_int 5) (const_int 13) 
+		    (const_int 6) (const_int 14) 
+		    (const_int 7) (const_int 15)])))]
+  "!BYTES_BIG_ENDIAN"
+  "v1int_h\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v1int_h"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* For little endian, our instruction interleaves opposite of the
+     way vec_interleave works, so we need to reverse the source
+     operands.  */
+  rtx opnd1 = BYTES_BIG_ENDIAN ? operands[1] : operands[2];
+  rtx opnd2 = BYTES_BIG_ENDIAN ? operands[2] : operands[1];
+  tilegx_expand_builtin_vector_binop (gen_vec_interleave_highv8qi, V8QImode,
+				      operands[0], V8QImode, opnd1, opnd2,
+				      true);
+  DONE;
+})
+
+;; insn_v1int_l
+;;    {B7,B6,B5,B4,B3,B2,B1,B0} {A7,A6,A5,A4,A3,A2,A1,A0}
+;; => {A7,A6,A5,A4,A3,A2,A1,A0,B7,B6,B5,B4,B3,B2,B1,B0}
+;; => {A3,B3,A2,B2,A1,B1,A0,B0}
+(define_expand "vec_interleave_lowv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "reg_or_0_operand" "")
+   (match_operand:V8QI 2 "reg_or_0_operand" "")]
+  ""
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_lowv8qi_be (operands[0], operands[1],
+					      operands[2]));
+  else
+    emit_insn (gen_vec_interleave_lowv8qi_le (operands[0], operands[1],
+					      operands[2]));
+  DONE;
+})
+
+(define_insn "vec_interleave_lowv8qi_be"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_select:V8QI
+	 (vec_concat:V16QI (match_operand:V8QI 1 "reg_or_0_operand" "rO")
+			   (match_operand:V8QI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 4) (const_int 12)
+		    (const_int 5) (const_int 13)
+		    (const_int 6) (const_int 14)
+		    (const_int 7) (const_int 15)])))]
+  "BYTES_BIG_ENDIAN"
+  "v1int_l\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "vec_interleave_lowv8qi_le"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_select:V8QI
+	 (vec_concat:V16QI (match_operand:V8QI 1 "reg_or_0_operand" "rO")
+			   (match_operand:V8QI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 8)
+		    (const_int 1) (const_int 9)
+		    (const_int 2) (const_int 10)
+		    (const_int 3) (const_int 11)])))]
+  "!BYTES_BIG_ENDIAN"
+  "v1int_l\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v1int_l"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* For little endian, our instruction interleaves opposite of the
+     way vec_interleave works, so we need to reverse the source
+     operands.  */
+  rtx opnd1 = BYTES_BIG_ENDIAN ? operands[1] : operands[2];
+  rtx opnd2 = BYTES_BIG_ENDIAN ? operands[2] : operands[1];
+  tilegx_expand_builtin_vector_binop (gen_vec_interleave_lowv8qi, V8QImode,
+				      operands[0], V8QImode, opnd1, opnd2,
+				      true);
+  DONE;
+})
+
+;; insn_v2int_h
+;;    {B3,B2,B1,B0} {A3,A2,A1,A0}
+;; => {A3,A2,A1,A0,B3,B2,B1,B0}
+;; => {A3,B3,A2,B2}
+(define_expand "vec_interleave_highv4hi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V4HI 1 "reg_or_0_operand" "")
+   (match_operand:V4HI 2 "reg_or_0_operand" "")]
+  ""
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_highv4hi_be (operands[0], operands[1],
+					       operands[2]));
+  else
+    emit_insn (gen_vec_interleave_highv4hi_le (operands[0], operands[1],
+					       operands[2]));
+  DONE;
+})
+
+(define_insn "vec_interleave_highv4hi_be"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V4HI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 4)
+		    (const_int 1) (const_int 5)])))]
+  "BYTES_BIG_ENDIAN"
+  "v2int_h\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "vec_interleave_highv4hi_le"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V4HI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 2) (const_int 6)
+		    (const_int 3) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN"
+  "v2int_h\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v2int_h"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* For little endian, our instruction interleaves opposite of the
+     way vec_interleave works, so we need to reverse the source
+     operands.  */
+  rtx opnd1 = BYTES_BIG_ENDIAN ? operands[1] : operands[2];
+  rtx opnd2 = BYTES_BIG_ENDIAN ? operands[2] : operands[1];
+  tilegx_expand_builtin_vector_binop (gen_vec_interleave_highv4hi, V4HImode,
+                                      operands[0], V4HImode, opnd1, opnd2,
+				      true);
+  DONE;
+})
+
+;; insn_v2int_l
+;;    {B3,B2,B1,B0} {A3,A2,A1,A0}
+;; => {A3,A2,A1,A0,B3,B2,B1,B0}
+;; => {A1,B1,A0,B0}
+(define_expand "vec_interleave_lowv4hi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V4HI 1 "reg_or_0_operand" "")
+   (match_operand:V4HI 2 "reg_or_0_operand" "")]
+  ""
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_lowv4hi_be (operands[0], operands[1],
+					      operands[2]));
+  else
+    emit_insn (gen_vec_interleave_lowv4hi_le (operands[0], operands[1],
+					      operands[2]));
+  DONE;
+})
+
+(define_insn "vec_interleave_lowv4hi_be"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V4HI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 2) (const_int 6)
+		    (const_int 3) (const_int 7)])))]
+  "BYTES_BIG_ENDIAN"
+  "v2int_l\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "vec_interleave_lowv4hi_le"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V4HI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 4)
+		    (const_int 1) (const_int 5)])))]
+  "!BYTES_BIG_ENDIAN"
+  "v2int_l\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v2int_l"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* For little endian, our instruction interleaves opposite of the
+     way vec_interleave works, so we need to reverse the source
+     operands.  */
+  rtx opnd1 = BYTES_BIG_ENDIAN ? operands[1] : operands[2];
+  rtx opnd2 = BYTES_BIG_ENDIAN ? operands[2] : operands[1];
+  tilegx_expand_builtin_vector_binop (gen_vec_interleave_lowv4hi, V4HImode,
+                                      operands[0], V4HImode, opnd1, opnd2,
+				      true);
+  DONE;
+})
+
+;; insn_v4int_h
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+;; => {A1,B1}
+(define_expand "vec_interleave_highv2si"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V2SI 1 "reg_or_0_operand" "")
+   (match_operand:V2SI 2 "reg_or_0_operand" "")]
+  ""
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_highv2si_be (operands[0], operands[1],
+					       operands[2]));
+  else
+    emit_insn (gen_vec_interleave_highv2si_le (operands[0], operands[1],
+					       operands[2]));
+  DONE;
+})
+
+(define_insn "vec_interleave_highv2si_be"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V2SI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 2)])))]
+  "BYTES_BIG_ENDIAN"
+  "v4int_h\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "vec_interleave_highv2si_le"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V2SI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 1) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN"
+  "v4int_h\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v4int_h"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* For little endian, our instruction interleaves opposite of the
+     way vec_interleave works, so we need to reverse the source
+     operands.  */
+  rtx opnd1 = BYTES_BIG_ENDIAN ? operands[1] : operands[2];
+  rtx opnd2 = BYTES_BIG_ENDIAN ? operands[2] : operands[1];
+  tilegx_expand_builtin_vector_binop (gen_vec_interleave_highv2si, V2SImode,
+                                      operands[0], V2SImode, opnd1, opnd2,
+				      true);
+  DONE;
+})
+
+;; insn_v4int_l
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+;; => {A0,B0}
+(define_expand "vec_interleave_lowv2si"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V2SI 1 "reg_or_0_operand" "")
+   (match_operand:V2SI 2 "reg_or_0_operand" "")]
+  ""
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_lowv2si_be (operands[0], operands[1],
+					      operands[2]));
+  else
+    emit_insn (gen_vec_interleave_lowv2si_le (operands[0], operands[1],
+					      operands[2]));
+  DONE;
+})
+
+(define_insn "vec_interleave_lowv2si_be"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V2SI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 1) (const_int 3)])))]
+  "BYTES_BIG_ENDIAN"
+  "v4int_l\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "vec_interleave_lowv2si_le"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V2SI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 2)])))]
+  "!BYTES_BIG_ENDIAN"
+  "v4int_l\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v4int_l"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* For little endian, our instruction interleaves opposite of the
+     way vec_interleave works, so we need to reverse the source
+     operands.  */
+  rtx opnd1 = BYTES_BIG_ENDIAN ? operands[1] : operands[2];
+  rtx opnd2 = BYTES_BIG_ENDIAN ? operands[2] : operands[1];
+  tilegx_expand_builtin_vector_binop (gen_vec_interleave_lowv2si, V2SImode,
+                                      operands[0], V2SImode, opnd1, opnd2,
+				      true);
+  DONE;
+})
+
+;; insn_v1mnz
+;; insn_v1mz
+;; insn_v2mnz
+;; insn_v2mz
+(define_insn "insn_mnz_v8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(if_then_else:V8QI
+         (ne:V8QI
+	  (match_operand:V8QI 1 "reg_or_0_operand" "rO")
+	  (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+         (match_operand:V8QI 2 "reg_or_0_operand" "rO")
+	 (const_vector:V8QI [(const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)])))]
+  ""
+  "v1mnz\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v1mnz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:V8QI
+         (ne:V8QI
+	  (match_operand:DI 1 "reg_or_0_operand" "")
+	  (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)])
+	  )
+         (match_operand:DI 2 "reg_or_0_operand" "")
+	 (const_vector:V8QI [(const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)])))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_insn_mnz_v8qi, V8QImode,
+                                      operands[0], V8QImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+(define_insn "insn_mz_v8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(if_then_else:V8QI
+         (ne:V8QI
+	  (match_operand:V8QI 1 "reg_or_0_operand" "rO")
+	  (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+	 (const_vector:V8QI [(const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)])
+         (match_operand:V8QI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v1mz\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v1mz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:V8QI
+         (ne:V8QI
+	  (match_operand:DI 1 "reg_or_0_operand" "")
+	  (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+	 (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)])
+         (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_insn_mz_v8qi, V8QImode,
+                                      operands[0], V8QImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+(define_insn "insn_mnz_v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(if_then_else:V4HI
+         (ne:V4HI
+	  (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+	  (const_vector:V4HI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+         (match_operand:V4HI 2 "reg_or_0_operand" "rO")
+	 (const_vector:V4HI [(const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)])))]
+  ""
+  "v2mnz\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v2mnz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:V4HI
+         (ne:V4HI
+	  (match_operand:DI 1 "reg_or_0_operand" "")
+	  (const_vector:V4HI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+         (match_operand:DI 2 "reg_or_0_operand" "")
+	 (const_vector:V4HI [(const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)])))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_insn_mnz_v4hi, V4HImode,
+                                      operands[0], V4HImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+(define_insn "insn_mz_v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(if_then_else:V4HI
+         (ne:V4HI
+	  (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+	  (const_vector:V4HI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+	 (const_vector:V4HI [(const_int 0) (const_int 0)
+			     (const_int 0) (const_int 0)])
+         (match_operand:V4HI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "v2mz\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v2mz"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:V4HI
+         (ne:V4HI
+	  (match_operand:DI 1 "reg_or_0_operand" "")
+	  (const_vector:V4HI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)]))
+	 (const_vector:V4HI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)])
+         (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_insn_mz_v4hi, V4HImode,
+                                      operands[0], V4HImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+;; insn_v1mulu
+(define_insn "vec_widen_umult_lo_v8qi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+        (mult:V4HI
+	 (zero_extend:V4HI
+	  (vec_select:V4QI
+	   (match_operand:V8QI 1 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 1)
+		      (const_int 2) (const_int 3)])))
+	 (zero_extend:V4HI
+	  (vec_select:V4QI
+	   (match_operand:V8QI 2 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 1)
+		      (const_int 2) (const_int 3)])))))]
+  ""
+  "v1mulu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_expand "insn_v1mulu"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "register_operand" "")]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_vec_widen_umult_lo_v8qi, V4HImode,
+                                      operands[0], V8QImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+;; insn_v1mulus
+(define_insn "vec_widen_usmult_lo_v8qi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+        (mult:V4HI
+	 (zero_extend:V4HI
+	  (vec_select:V4QI
+	   (match_operand:V8QI 1 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 1)
+		      (const_int 2) (const_int 3)])))
+	 (sign_extend:V4HI
+	  (vec_select:V4QI
+	   (match_operand:V8QI 2 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 1)
+		      (const_int 2) (const_int 3)])))))]
+  ""
+  "v1mulus\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_expand "insn_v1mulus"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "register_operand" "")]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_vec_widen_usmult_lo_v8qi, V4HImode,
+                                      operands[0], V8QImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+;; insn_v2muls
+(define_insn "vec_widen_smult_lo_v4qi"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+        (mult:V2SI
+	 (sign_extend:V2SI
+	  (vec_select:V2HI
+	   (match_operand:V4HI 1 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 1)])))
+	 (sign_extend:V2SI
+	  (vec_select:V2HI
+	   (match_operand:V4HI 2 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 1)])))))]
+  ""
+  "v2muls\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_expand "insn_v2muls"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "register_operand" "")]
+  ""
+{
+  tilegx_expand_builtin_vector_binop (gen_vec_widen_smult_lo_v4qi, V2SImode,
+                                      operands[0], V4HImode, operands[1],
+				      operands[2], true);
+  DONE;
+})
+
+;; v2packl
+;; v2packuc
+;;    {B3,B2,B1,B0} {A3,A2,A1,A0}
+;; => {A3,A2,A1,A0,B3,B2,B1,B0}
+(define_insn "vec_pack_<pack_optab>_v4hi"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	 (v2pack:V4QI (match_operand:V4HI 1 "reg_or_0_operand" "rO"))
+	 (v2pack:V4QI (match_operand:V4HI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "v2<pack_insn>\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v2<pack_insn>"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(vec_concat:V8QI
+	 (v2pack:V4QI (match_operand:DI 2 "reg_or_0_operand" ""))
+	 (v2pack:V4QI (match_operand:DI 1 "reg_or_0_operand" ""))))]
+  ""
+{
+  /* Our instruction concats opposite of the way vec_pack works, so we
+     need to reverse the source operands.  */
+  tilegx_expand_builtin_vector_binop (gen_vec_pack_<pack_optab>_v4hi,
+				      V8QImode, operands[0], V4HImode,
+				      operands[2], operands[1], true);
+  DONE;
+})
+
+;; v2packh
+;;    {B3,B2,B1,B0} {A3,A2,A1,A0}
+;; => {A3_hi,A2_hi,A1_hi,A0_hi,B3_hi,B2_hi,B1_hi,B0_hi}
+(define_insn "vec_pack_hipart_v4hi"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	 (truncate:V4QI
+	  (ashiftrt:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rO")
+			 (const_int 8)))
+	 (truncate:V4QI
+	  (ashiftrt:V4HI (match_operand:V4HI 2 "reg_or_0_operand" "rO")
+			 (const_int 8)))))]
+  ""
+  "v2packh\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v2packh"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(vec_concat:V8QI
+	 (truncate:V4QI
+	  (ashiftrt:V4HI (match_operand:DI 2 "reg_or_0_operand" "")
+			 (const_int 8)))
+	 (truncate:V4QI
+	  (ashiftrt:V4HI (match_operand:DI 1 "reg_or_0_operand" "")
+			 (const_int 8)))))]
+  ""
+{
+  /* Our instruction concats opposite of the way vec_pack works, so we
+     need to reverse the source operands.  */
+  tilegx_expand_builtin_vector_binop (gen_vec_pack_hipart_v4hi, V8QImode,
+                                      operands[0], V4HImode, operands[2],
+				      operands[1], true);
+  DONE;
+})
+
+;; v4packsc
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+(define_insn "vec_pack_ssat_v2si"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	 (us_truncate:V2HI (match_operand:V2SI 1 "reg_or_0_operand" "rO"))
+	 (us_truncate:V2HI (match_operand:V2SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "v4packsc\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_v4packsc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(vec_concat:V4HI
+	 (us_truncate:V2HI (match_operand:DI 2 "reg_or_0_operand" ""))
+	 (us_truncate:V2HI (match_operand:DI 1 "reg_or_0_operand" ""))))]
+  ""
+{
+  /* Our instruction concats opposite of the way vec_pack works, so we
+     need to reverse the source operands.  */
+  tilegx_expand_builtin_vector_binop (gen_vec_pack_ssat_v2si, V4HImode,
+                                      operands[0], V2SImode, operands[2],
+				      operands[1], true);
+  DONE;
+})
+
+;; Rest of the vector intrinsics
+(define_insn "insn_v1adiffu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1ADIFFU))]
+  ""
+  "v1adiffu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1avgu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1AVGU))]
+  ""
+  "v1avgu\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_v1ddotpu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DDOTPU))]
+  ""
+  "v1ddotpu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1ddotpua"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DDOTPUA))]
+  ""
+  "v1ddotpua\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1ddotpus"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DDOTPUS))]
+  ""
+  "v1ddotpus\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1ddotpusa"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DDOTPUSA))]
+  ""
+  "v1ddotpusa\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1dotp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DOTP))]
+  ""
+  "v1dotp\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1dotpa"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DOTPA))]
+  ""
+  "v1dotpa\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1dotpu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DOTPU))]
+  ""
+  "v1dotpu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1dotpua"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DOTPUA))]
+  ""
+  "v1dotpua\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1dotpus"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DOTPUS))]
+  ""
+  "v1dotpus\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1dotpusa"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1DOTPUSA))]
+  ""
+  "v1dotpusa\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1sadau"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1SADAU))]
+  ""
+  "v1sadau\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v1sadu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V1SADU))]
+  ""
+  "v1sadu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "*insn_v1sadu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+		     (match_operand:DI 2 "reg_or_0_operand" "rO")]
+		    UNSPEC_INSN_V1SADU)))]
+   ""
+  "v1sadu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2adiffs"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2ADIFFS))]
+  ""
+  "v2adiffs\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2avgs"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2AVGS))]
+  ""
+  "v2avgs\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_v2dotp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2DOTP))]
+  ""
+  "v2dotp\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2dotpa"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2DOTPA))]
+  ""
+  "v2dotpa\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2mulfsc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2MULFSC))]
+  ""
+  "v2mulfsc\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2sadas"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2SADAS))]
+  ""
+  "v2sadas\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2sadau"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "0")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")
+                    (match_operand:DI 3 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2SADAU))]
+  ""
+  "v2sadau\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2sads"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2SADS))]
+  ""
+  "v2sads\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "*insn_v2sads"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+		     (match_operand:DI 2 "reg_or_0_operand" "rO")]
+		    UNSPEC_INSN_V2SADS)))]
+  ""
+  "v2sads\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_v2sadu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+                    (match_operand:DI 2 "reg_or_0_operand" "rO")]
+                   UNSPEC_INSN_V2SADU))]
+  ""
+  "v2sadu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "*insn_v2sadu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rO")
+		     (match_operand:DI 2 "reg_or_0_operand" "rO")]
+		    UNSPEC_INSN_V2SADU)))]
+  ""
+  "v2sadu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_wh64"
+  [(unspec_volatile:VOID [(match_operand 0 "pointer_operand" "rO")]
+                         UNSPEC_INSN_WH64)
+   (clobber (mem:BLK (const_int 0)))]
+  ""
+  "wh64\t%r0"
+  [(set_attr "type" "X1")])
+
+
+;; Network intrinsics
+
+;; Note the this barrier is of type "nothing," which is deleted after
+;; the final scheduling pass so that nothing is emitted for it.
+(define_insn "tilegx_network_barrier"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_NETWORK_BARRIER)]
+  ""
+  "pseudo"
+  [(set_attr "type" "nothing")
+   (set_attr "length" "0")])
+
+(define_insn "*netreg_receive"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,U,m")
+        (unspec_volatile:DI [(match_operand:DI 1 "netreg_operand" "i,i,i")
+			     (reg:DI TILEGX_NETORDER_REG)]
+                            UNSPEC_NETWORK_RECEIVE))
+   (clobber (reg:DI TILEGX_NETORDER_REG))]
+
+  ""
+  "@
+   move\t%0, %N1
+   st\t%0, %N1
+   st_add\t%I0, %N1, %i0"
+  [(set_attr "type" "*,Y2,X1")])
+
+(define_insn "*netreg_send"
+  [(unspec_volatile:DI
+    [(match_operand:DI 0 "netreg_operand"  "i,i,i,i,i,i")
+     (match_operand:DI 1 "reg_or_cint_operand" "r,I,J,K,N,P")
+     (reg:DI TILEGX_NETORDER_REG)]
+     UNSPEC_NETWORK_SEND)
+   (clobber (reg:DI TILEGX_NETORDER_REG))]
+  ""
+  "@
+   move\t%N0, %r1
+   movei\t%N0, %1
+   moveli\t%N0, %1
+   shl16insli\t%N0, zero, %h1
+   v1addi\t%N0, zero, %j1
+   v2addi\t%N0, zero, %h1"
+  [(set_attr "type" "*,*,X01,X01,X01,X01")])
+
+(define_expand "tilegx_idn0_receive"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec_volatile:DI [(const_int TILEGX_NETREG_IDN0)
+			       (reg:DI TILEGX_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_idn1_receive"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec_volatile:DI [(const_int TILEGX_NETREG_IDN1)
+			       (reg:DI TILEGX_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_idn_send"
+  [(parallel
+    [(unspec_volatile:DI [(const_int TILEGX_NETREG_IDN0)
+			  (match_operand:DI 0 "reg_or_cint_operand" "")
+			  (reg:DI TILEGX_NETORDER_REG)]
+			 UNSPEC_NETWORK_SEND)
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_udn0_receive"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec_volatile:DI [(const_int TILEGX_NETREG_UDN0)
+			       (reg:DI TILEGX_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_udn1_receive"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec_volatile:DI [(const_int TILEGX_NETREG_UDN1)
+			       (reg:DI TILEGX_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_udn2_receive"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec_volatile:DI [(const_int TILEGX_NETREG_UDN2)
+			       (reg:DI TILEGX_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_udn3_receive"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec_volatile:DI [(const_int TILEGX_NETREG_UDN3)
+			       (reg:DI TILEGX_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_expand "tilegx_udn_send"
+  [(parallel
+    [(unspec_volatile:DI [(const_int TILEGX_NETREG_UDN0)
+			  (match_operand:DI 0 "reg_or_cint_operand" "")
+			  (reg:DI TILEGX_NETORDER_REG)]
+			 UNSPEC_NETWORK_SEND)
+    (clobber (reg:DI TILEGX_NETORDER_REG))])]
+  "")
+
+(define_insn "*netreg_adddi_to_network"
+  [(unspec_volatile:DI
+    [(match_operand:DI 0 "netreg_operand" "i,i,i")
+     (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rO,rO,rO")
+              (match_operand:DI 2 "add_operand" "r,I,JT"))
+     (reg:DI TILEGX_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:DI TILEGX_NETORDER_REG))]
+  ""
+  "@
+   add\t%N0, %r1, %2
+   addi\t%N0, %r1, %2
+   addli\t%N0, %r1, %H2"
+  [(set_attr "type" "*,*,X01")])
+
+(define_insn "*netreg_adddi_from_network"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (unspec_volatile:DI
+                  [(match_operand:DI 1 "netreg_operand" "%i,i,i")
+		   (reg:DI TILEGX_NETORDER_REG)]
+                  UNSPEC_NETWORK_RECEIVE)
+		 (match_operand:DI 2 "add_operand" "rO,I,JT")))
+   (clobber (reg:DI TILEGX_NETORDER_REG))]
+  ""
+  "@
+   add\t%0, %N1, %r2
+   addi\t%0, %N1, %2
+   addli\t%0, %N1, %H2"
+  [(set_attr "type" "*,*,X01")])
+
+
+;;
+;; Stack protector instructions.
+;;
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "nonautoincmem_operand" "")
+	(match_operand 1 "nonautoincmem_operand" ""))]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+  rtx ssp_addr = gen_rtx_PLUS (Pmode, tp, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  rtx ssp = gen_reg_rtx (Pmode);
+  
+  emit_insn (gen_rtx_SET (VOIDmode, ssp, ssp_addr));
+
+  operands[1] = gen_rtx_MEM (Pmode, ssp);
+#endif
+
+  if (TARGET_32BIT)
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_setsi"
+  [(set (match_operand:SI 0 "nonautoincmem_operand" "=U")
+        (unspec:SI [(match_operand:SI 1 "nonautoincmem_operand" "U")]
+		   UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  ""
+  "ld4s\t%2, %1; { st4\t%0, %2; move\t%2, zero }"
+  [(set_attr "length" "16")
+   (set_attr "type" "cannot_bundle_3cycle")])
+
+(define_insn "stack_protect_setdi"
+  [(set (match_operand:DI 0 "nonautoincmem_operand" "=U")
+        (unspec:DI [(match_operand:DI 1 "nonautoincmem_operand" "U")]
+		   UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  ""
+  "ld\t%2, %1; { st\t%0, %2; move\t%2, zero }"
+  [(set_attr "length" "16")
+   (set_attr "type" "cannot_bundle_3cycle")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "nonautoincmem_operand" "")
+   (match_operand 1 "nonautoincmem_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx compare_result;
+  rtx bcomp, loc_ref;
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+  rtx ssp_addr = gen_rtx_PLUS (Pmode, tp, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  rtx ssp = gen_reg_rtx (Pmode);
+  
+  emit_insn (gen_rtx_SET (VOIDmode, ssp, ssp_addr));
+
+  operands[1] = gen_rtx_MEM (Pmode, ssp);
+#endif
+
+  compare_result = gen_reg_rtx (Pmode);
+
+  if (TARGET_32BIT)
+    emit_insn (gen_stack_protect_testsi (compare_result, operands[0],
+					 operands[1]));
+  else
+    emit_insn (gen_stack_protect_testdi (compare_result, operands[0],
+					 operands[1]));
+
+  bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx);
+
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]);
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+						     loc_ref, pc_rtx)));
+
+  DONE;
+})
+
+(define_insn "stack_protect_testsi"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (unspec:SI [(match_operand:SI 1 "nonautoincmem_operand" "U")
+                    (match_operand:SI 2 "nonautoincmem_operand" "U")]
+                   UNSPEC_SP_TEST))
+   (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  ""
+  "ld4s\t%0, %1; ld4s\t%3, %2; { cmpeq\t%0, %0, %3; move\t%3, zero }"
+  [(set_attr "length" "24")
+   (set_attr "type" "cannot_bundle_4cycle")])
+
+(define_insn "stack_protect_testdi"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (unspec:DI [(match_operand:DI 1 "nonautoincmem_operand" "U")
+                    (match_operand:DI 2 "nonautoincmem_operand" "U")]
+                   UNSPEC_SP_TEST))
+   (set (match_scratch:DI 3 "=&r") (const_int 0))]
+  ""
+  "ld\t%0, %1; ld\t%3, %2; { cmpeq\t%0, %0, %3; move\t%3, zero }"
+  [(set_attr "length" "24")
+   (set_attr "type" "cannot_bundle_4cycle")])
+
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/tilegx/tilegx.opt b/gcc-4.9/gcc/config/tilegx/tilegx.opt
new file mode 100644
index 000000000..37c12fe1f
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilegx/tilegx.opt
@@ -0,0 +1,63 @@
+; Options for the TILE-Gx port of the compiler.
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+; Contributed by Walter Lee (walt@tilera.com)
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/tilegx/tilegx-opts.h
+
+mcpu=
+Target RejectNegative Joined Enum(tilegx_cpu) Var(tilegx_cpu) Init(0)
+-mcpu=CPU	Use features of and schedule code for given CPU
+
+Enum
+Name(tilegx_cpu) Type(int)
+Known TILE-Gx CPUs (for use with the -mcpu= option):
+
+EnumValue
+Enum(tilegx_cpu) String(tilegx) Value(0)
+
+m32
+Target Report RejectNegative Negative(m64) Mask(32BIT)
+Compile with 32 bit longs and pointers.
+
+m64
+Target Report RejectNegative Negative(m32) InverseMask(32BIT, 64BIT)
+Compile with 64 bit longs and pointers.
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Use big-endian byte order.
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_ENDIAN)
+Use little-endian byte order.
+
+mcmodel=
+Target RejectNegative Joined Enum(cmodel) Var(tilegx_cmodel) Init(CM_SMALL)
+Use given TILE-Gx code model
+
+Enum
+Name(cmodel) Type(enum cmodel)
+Known code models (for use with the -mcmodel= option):
+
+EnumValue
+Enum(cmodel) String(small) Value(CM_SMALL)
+
+EnumValue
+Enum(cmodel) String(large) Value(CM_LARGE)
diff --git a/gcc-4.9/gcc/config/tilepro/constraints.md b/gcc-4.9/gcc/config/tilepro/constraints.md
new file mode 100644
index 000000000..3019ec5f3
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/constraints.md
@@ -0,0 +1,101 @@
+;; Constraint definitions for Tilera TILEPro chip.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "R00" "R0_REGS"  "r0")
+(define_register_constraint "R01" "R1_REGS"  "r1")
+(define_register_constraint "R02" "R2_REGS"  "r2")
+(define_register_constraint "R03" "R3_REGS"  "r3")
+(define_register_constraint "R04" "R4_REGS"  "r4")
+(define_register_constraint "R05" "R5_REGS"  "r5")
+(define_register_constraint "R06" "R6_REGS"  "r6")
+(define_register_constraint "R07" "R7_REGS"  "r7")
+(define_register_constraint "R08" "R8_REGS"  "r8")
+(define_register_constraint "R09" "R9_REGS"  "r9")
+(define_register_constraint "R10" "R10_REGS" "r10")
+
+(define_constraint "I"
+  "A signed 8 bit constant"
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+(define_constraint "J"
+  "Signed 16-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "K"
+  "Nonzero integer constant with low 16 bits zero"
+  (and (match_code "const_int")
+       (match_test "ival && (ival & 0xFFFF) == 0")))
+
+(define_constraint "L"
+  "Integer constant that fits in one signed byte when incremented"
+  (and (match_code "const_int")
+       (match_test "ival >= -129 && ival <= 126")))
+
+(define_constraint "M"
+  "A bit mask suitable for 'mm'"
+  (and (match_code "const_int")
+       (match_test "tilepro_bitfield_operand_p (ival, NULL, NULL)")))
+
+(define_constraint "N"
+  "Integer constant that is a byte tiled out four times"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xFFFFFFFF) == (ival & 0xFF) * 0x01010101")))
+
+(define_constraint "O"
+ "The integer zero constant"
+ (and (match_code "const_int")
+      (match_test "ival == 0")))
+
+(define_constraint "P"
+  "Integer constant that is a sign-extended byte tiled out as two shorts"
+  (and (match_code "const_int")
+       (match_test "((ival & 0xFFFFFFFF)
+                     == ((trunc_int_for_mode (ival, QImode) & 0xFFFF)
+                         * 0x00010001))")))
+
+(define_constraint "Q"
+  "Integer constant that fits in one signed byte when incremented, but not -1"
+  (and (match_code "const_int")
+       (match_test "ival >= -129 && ival <= 126 && ival != -1")))
+
+(define_constraint "T"
+  "A const symbolc operand"
+  (match_operand 0 "const_symbolic_operand"))
+
+(define_memory_constraint "U"
+  "Non-auto-incrementing memory"
+  (and (match_code "mem")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+(define_constraint "W"
+  "A 4-element vector constant with identical elements"
+  (and (match_code "const_vector")
+       (match_test "CONST_VECTOR_NUNITS (op) == 4")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))
+
+(define_constraint "Y"
+  "A 2-element vector constant with identical elements"
+  (and (match_code "const_vector")
+       (match_test "CONST_VECTOR_NUNITS (op) == 2")
+       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")))
diff --git a/gcc-4.9/gcc/config/tilepro/feedback.h b/gcc-4.9/gcc/config/tilepro/feedback.h
new file mode 100644
index 000000000..34e48ed88
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/feedback.h
@@ -0,0 +1,14 @@
+#ifndef _FEEDBACK_H
+#define _FEEDBACK_H 1
+
+#ifdef __ASSEMBLER__
+
+/* Stub defines for feedback instrumentation.  */
+#define FEEDBACK_ENTER_EXPLICIT(FUNCNAME, SECNAME, SIZE)
+#define FEEDBACK_ENTER(FUNCNAME)
+#define FEEDBACK_REENTER(FUNCNAME)
+#define FEEDBACK_ENTRY(FUNCNAME, SECNAME, SIZE)
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _FEEDBACK_H */
diff --git a/gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc b/gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc
new file mode 100644
index 000000000..645fa32ea
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc
@@ -0,0 +1,1361 @@
+/* Multiply table generator for tile.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This program creates a table used to compile multiply by constant
+   efficiently.
+
+   This program should be compiled by a c++ compiler.  If it's
+   compiled with with -DTILEPRO, it generates the multiply table for
+   TILEPro; otherwise it generates the multiply table for TILE-Gx.
+   Running the program produces the table in stdout.
+
+   The program works by generating every possible combination of up to
+   MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
+   shift) and computing the multiplier computed by those instructions.
+   For example,
+
+   s2a r2,r1,r1
+   s2a r3,r2,r2
+
+   multiplies r1 by 25.
+
+   There are usually multiple instruction sequences to multiply by a
+   given constant. This program keeps only the cheapest one.
+   "Cheapest" is defined first by the minimum theoretical schedule
+   length, and if those are equal then by the number of instructions,
+   and if those are equal then by which instructions we "prefer"
+   (e.g. because we think one might take infinitesimally less power
+   than another, or simply because we arbitrarily pick one to be more
+   canonical).
+
+   Once this program has determined the best instruction sequence for
+   each multiplier, it emits them in a table sorted by the multiplier
+   so the user can binary-search it to look for a match.  The table is
+   pruned based on various criteria to keep its sizes reasonable.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#define __STDC_LIMIT_MACROS
+#include <stdint.h>
+
+#include <map>
+
+#ifdef TILEPRO
+
+/* The string representing the architecture.  */
+#define ARCH "tilepro"
+
+/* The type for the multiplication.  */
+typedef int MUL_TYPE;
+
+#else
+
+/* The string representing the architecture.  */
+#define ARCH "tilegx"
+
+/* The type for the multiplication.  */
+typedef long long MUL_TYPE;
+
+#endif
+
+/* Longest instruction sequence this will produce. With the current
+   stupid algorithm runtime grows very quickly with this number.  */
+#define MAX_INSTRUCTIONS 4
+
+/* Maximum number of subexpressions in the expression DAG being
+   generated.  This is the same as the number of instructions, except
+   that zero and the original register we'd like to multiply by a
+   constant are also thrown into the mix.  */
+#define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
+
+#define MIN(x, y)  ((x) <= (y) ? (x) : (y))
+#define MAX(x, y)  ((x) >= (y) ? (x) : (y))
+
+/* For this program a unary op is one which has only one nonconstant
+   operand.  So shift left by 5 is considered unary.  */
+typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
+typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
+
+/* This describes an operation like 'add two registers' or 'left-shift
+   by 7'.
+
+   We call something a 'unary' operator if it only takes in one
+   register as input, even though it might have an implicit second
+   constant operand.  Currently this is used for left-shift by
+   constant.  */
+class Operator
+{
+public:
+  /* Construct for a binary operator.  */
+  Operator (const char *pattern, const char *name, binary_op_func func,
+	    int cost)
+    : m_pattern (pattern), m_name (name), m_top_index (-1),
+      m_unary_func (0), m_binary_func (func), m_cost (cost),
+      m_rhs_if_unary (0)
+  {
+  }
+
+  /* Construct for a unary operator.  */
+  Operator (const char *pattern, const char *name, unary_op_func func,
+	    int rhs_if_unary, int cost)
+    : m_pattern (pattern), m_name (name), m_top_index (-1),
+      m_unary_func (func), m_binary_func (0), m_cost (cost),
+      m_rhs_if_unary (rhs_if_unary)
+  {
+  }
+
+  bool is_unary () const
+  {
+    return m_binary_func == NULL;
+  }
+
+  /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3.  */
+  const char *m_pattern;
+
+  /* Name of the opcode for this operation, e.g. add.  */
+  const char *m_name;
+
+  /* We don't have enough bits in our output representation to store
+     the original insn_code value, so we store a compressed form
+     instead.  These values are decoded back into insn_code via the
+     machine-generated multiply_insn_seq_decode_opcode lookup
+     table.  */
+  int m_top_index;
+
+  /* Unary operator to apply, or NULL if this is a binary operator.  */
+  unary_op_func m_unary_func;
+
+  /* Binary operator to apply, or NULL if this is a unary operator.  */
+  binary_op_func m_binary_func;
+
+  /* Function of how expensive we consider this operator. Higher is
+     worse.  */
+  int m_cost;
+
+  /* the RHS value to write into the C file if unary; used for shift
+     count.  */
+  int m_rhs_if_unary;
+};
+
+
+/* An entry in an expression DAG.  */
+class Expr
+{
+public:
+  Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
+    m_critical_path_length (0)
+  {
+  }
+
+  /* The operator being applied to the operands.  */
+  const Operator *m_op;
+
+  /* The index of the left-hand operand in the array of subexpressions
+     already computed.  */
+  int m_lhs;
+
+  /* For binary ops ,this is the index of the left-hand operand in the
+     array of subexpressions already computed. For unary ops, it is
+     specific to the op (e.g. it might hold a constant shift
+     count).  */
+  int m_rhs;
+
+  /* The multiplier produced by this expression tree. For example, for
+     the tree ((x << 5) + x), the value would be 33.  */
+  MUL_TYPE m_produced_val;
+
+  /* How far is this expression from the root, i.e. how many cycles
+     minimum will it take to compute this?  */
+  int m_critical_path_length;
+};
+
+
+/* Make function pointers for the various linear operators we can
+   apply to compute a multiplicative value.  */
+
+static MUL_TYPE
+add (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 + n2;
+}
+
+static MUL_TYPE
+sub (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 - n2;
+}
+
+static MUL_TYPE
+s1a (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 * 2 + n2;
+}
+
+static MUL_TYPE
+s2a (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 * 4 + n2;
+}
+
+static MUL_TYPE
+s3a (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 * 8 + n2;
+}
+
+#define SHIFT(count)                            \
+static MUL_TYPE                                 \
+shift##count(MUL_TYPE n)                        \
+{                                               \
+  return n << (count);                          \
+}
+
+SHIFT (1);
+SHIFT (2);
+SHIFT (3);
+SHIFT (4);
+SHIFT (5);
+SHIFT (6);
+SHIFT (7);
+SHIFT (8);
+SHIFT (9);
+SHIFT (10);
+SHIFT (11);
+SHIFT (12);
+SHIFT (13);
+SHIFT (14);
+SHIFT (15);
+SHIFT (16);
+SHIFT (17);
+SHIFT (18);
+SHIFT (19);
+SHIFT (20);
+SHIFT (21);
+SHIFT (22);
+SHIFT (23);
+SHIFT (24);
+SHIFT (25);
+SHIFT (26);
+SHIFT (27);
+SHIFT (28);
+SHIFT (29);
+SHIFT (30);
+SHIFT (31);
+#ifndef TILEPRO
+SHIFT (32);
+SHIFT (33);
+SHIFT (34);
+SHIFT (35);
+SHIFT (36);
+SHIFT (37);
+SHIFT (38);
+SHIFT (39);
+SHIFT (40);
+SHIFT (41);
+SHIFT (42);
+SHIFT (43);
+SHIFT (44);
+SHIFT (45);
+SHIFT (46);
+SHIFT (47);
+SHIFT (48);
+SHIFT (49);
+SHIFT (50);
+SHIFT (51);
+SHIFT (52);
+SHIFT (53);
+SHIFT (54);
+SHIFT (55);
+SHIFT (56);
+SHIFT (57);
+SHIFT (58);
+SHIFT (59);
+SHIFT (60);
+SHIFT (61);
+SHIFT (62);
+SHIFT (63);
+#endif
+
+#ifdef TILEPRO
+static Operator ops[] = {
+  Operator ("CODE_FOR_addsi3", "add", add, 1040),
+  Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
+  Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
+  Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
+  Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
+  /* Note: shl by 1 is not necessary, since adding a value to itself
+     produces the same result. But the architecture team thinks
+     left-shift may use slightly less power, so we might as well
+     prefer it.  */
+  Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
+};
+#else
+static Operator ops[] = {
+  Operator ("CODE_FOR_adddi3", "add", add, 1070),
+  Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
+  Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
+  Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
+  Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
+  // Note: shl by 1 is not necessary, since adding a value to itself
+  // produces the same result. But the architecture team thinks left-shift
+  // may use slightly less power, so we might as well prefer it.
+  Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
+  Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
+  Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
+  Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
+  Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
+  Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
+  Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
+  Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
+  Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
+  Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
+  Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
+  Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
+  Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
+  Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
+  Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
+  Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
+  Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
+  Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
+  Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
+  Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
+  Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
+  Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
+  Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
+  Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
+  Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
+  Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
+  Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
+  Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
+  Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
+  Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
+  Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
+  Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
+  Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
+  Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
+  Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
+  Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
+  Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
+  Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
+  Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
+  Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
+  Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
+  Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
+  Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
+  Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
+  Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
+  Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
+  Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
+  Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
+  Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
+  Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
+  Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
+  Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
+  Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
+  Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
+  Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
+  Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
+  Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
+  Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
+  Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
+  Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
+  Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
+  Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
+  Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
+};
+#endif
+
+/* An ExpressionTree is an expression DAG.  */
+class ExpressionTree
+{
+public:
+  ExpressionTree () : m_num_vals (2)
+  {
+    m_exprs[0].m_produced_val = 0;
+    m_exprs[1].m_produced_val = 1;
+  }
+
+  void copy_technique_from (ExpressionTree * other)
+  {
+    /* TODO: write this; other can compute the same products with less
+       cost.  We update this ExpressionTree object because some int is
+       already mapped to it.  */
+  }
+
+  int m_num_vals;
+  Expr m_exprs[MAX_SUBEXPRS];
+
+  int cost () const
+  {
+    int cost = 0;
+    for (int j = 2; j < m_num_vals; j++)
+        cost += m_exprs[j].m_op->m_cost;
+      return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
+  }
+};
+
+
+typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
+
+
+static void
+find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
+{
+  /* Don't look more if we can't add any new values to the expression
+     tree.  */
+  const int num_vals = s.m_num_vals;
+  if (num_vals == MAX_SUBEXPRS)
+    return;
+
+  /* Grow array to make room for new values added as we loop.  */
+  s.m_num_vals = num_vals + 1;
+
+  const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
+  const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
+
+  for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
+    {
+      const Operator *const op = &ops[f];
+
+      for (int i = 0; i < num_vals; i++)
+	{
+	  /* Only allow zero as the first operand to sub, otherwise
+	     it is useless.  */
+	  if (i == 0 && op->m_binary_func != sub)
+	    continue;
+
+	  /* Unary ops don't actually use the second operand, so as a
+	     big hack we trick it into only looping once in the inner
+	     loop.  */
+	  const int j_end = op->is_unary () ? 2 : num_vals;
+
+	  /* We never allow zero as the second operand, as it is
+	     always useless.  */
+	  for (int j = 1; j < j_end; j++)
+	    {
+	      /* Does this expression use the immediately previous
+		 expression?  */
+	      const bool uses_prev_value =
+		(i == num_vals - 1
+		 || (!op->is_unary () && j == num_vals - 1));
+
+	      if (!uses_prev_value)
+		{
+		  /* For search efficiency, prune redundant
+		     instruction orderings.
+
+		     This op does not take the immediately preceding
+		     value as input, which means we could have done it
+		     in the previous slot. If its opcode is less than
+		     the previous instruction's, we'll declare this
+		     instruction order non-canonical and not pursue
+		     this branch of the search.  */
+		  if (op->m_top_index < prev_top_index)
+		    continue;
+		}
+
+	      MUL_TYPE n;
+	      if (op->is_unary ())
+		{
+		  n = op->m_unary_func (s.m_exprs[i].m_produced_val);
+		}
+	      else
+		{
+		  n = op->m_binary_func (s.m_exprs[i].m_produced_val,
+					 s.m_exprs[j].m_produced_val);
+		}
+
+	      bool duplicate = false;
+	      for (int k = 0; k < num_vals; k++)
+		if (n == s.m_exprs[j].m_produced_val)
+		  {
+		    duplicate = true;
+		    break;
+		  }
+
+	      if (duplicate)
+		continue;
+
+	      /* See if we found the best solution for n.  */
+	      Expr *e = &s.m_exprs[num_vals];
+	      e->m_op = op;
+	      e->m_lhs = i;
+	      e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
+	      e->m_produced_val = n;
+	      e->m_critical_path_length =
+		1 + MAX (s.m_exprs[i].m_critical_path_length,
+			 s.m_exprs[j].m_critical_path_length);
+
+	      ExpressionTreeMap::iterator best (best_solution.find (n));
+	      if (best == best_solution.end ()
+		  || (*best).second->cost () > s.cost ())
+		best_solution[n] = new ExpressionTree (s);
+
+	      /* Recurse and find more.  */
+	      find_sequences (s, best_solution);
+	    }
+	}
+    }
+
+  /* Restore old size.  */
+  s.m_num_vals = num_vals;
+}
+
+
+/* For each insn_code used by an operator, choose a compact number so
+   it takes less space in the output data structure. This prints out a
+   lookup table used to map the compactified number back to an
+   insn_code.  */
+static void
+create_insn_code_compression_table ()
+{
+  int next_index = 1;
+
+  /* Entry 0 must hold CODE_FOR_nothing to mean "end of array".  */
+  printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
+	  "  CODE_FOR_nothing /* must be first */ ", ARCH);
+
+  for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
+    {
+      Operator *op = &ops[i];
+      int index = -1;
+
+      /* See if some previous Operator was using the same insn_code.
+	 If so, reuse its table entry.  */
+      for (size_t j = 0; j < i; j++)
+	{
+	  Operator *old = &ops[j];
+	  if (strcmp (old->m_pattern, op->m_pattern) == 0)
+	    {
+	      index = old->m_top_index;
+	      break;
+	    }
+	}
+
+      if (index == -1)
+	{
+	  /* We need to make a new entry in the table.  */
+	  printf (",\n  %s", op->m_pattern);
+	  index = next_index++;
+	}
+
+      op->m_top_index = index;
+    }
+
+  printf ("\n};\n\n");
+}
+
+
+/* These are constants we've seen in code, that we want to keep table
+   entries for.  */
+static int multiply_constants_used[] = {
+  -11796480,
+  -27439,
+  -25148,
+  -22820,
+  -21709,
+  -20995,
+  -20284,
+  -20239,
+  -19595,
+  -19447,
+  -19183,
+  -19165,
+  -18730,
+  -17828,
+  -17799,
+  -17237,
+  -17036,
+  -16549,
+  -16423,
+  -16294,
+  -16244,
+  -16069,
+  -15137,
+  -15083,
+  -15038,
+  -14924,
+  -14905,
+  -14752,
+  -14731,
+  -14529,
+  -14273,
+  -14090,
+  -14084,
+  -14043,
+  -13850,
+  -13802,
+  -13631,
+  -13455,
+  -13275,
+  -12879,
+  -12700,
+  -12534,
+  -12399,
+  -12131,
+  -12112,
+  -12054,
+  -12019,
+  -11759,
+  -11585,
+  -11467,
+  -11395,
+  -11295,
+  -11248,
+  -11148,
+  -11116,
+  -11086,
+  -11059,
+  -11018,
+  -10811,
+  -10538,
+  -10258,
+  -10217,
+  -10033,
+  -9766,
+  -9754,
+  -9534,
+  -9527,
+  -9467,
+  -9262,
+  -9232,
+  -9222,
+  -9198,
+  -9191,
+  -9113,
+  -8825,
+  -8756,
+  -8697,
+  -8693,
+  -8565,
+  -8342,
+  -8208,
+  -8200,
+  -8170,
+  -8102,
+  -7770,
+  -7678,
+  -7562,
+  -7376,
+  -7373,
+  -7221,
+  -7121,
+  -6835,
+  -6810,
+  -6626,
+  -6581,
+  -6461,
+  -6278,
+  -6263,
+  -6163,
+  -6029,
+  -5816,
+  -5540,
+  -5461,
+  -5384,
+  -5329,
+  -4985,
+  -4926,
+  -4815,
+  -4788,
+  -4758,
+  -4433,
+  -4229,
+  -4209,
+  -4176,
+  -4104,
+  -4095,
+  -4078,
+  -3941,
+  -3818,
+  -3600,
+  -3474,
+  -3314,
+  -3264,
+  -3196,
+  -3072,
+  -2912,
+  -2836,
+  -2773,
+  -2269,
+  -2184,
+  -2100,
+  -1730,
+  -1512,
+  -1500,
+  -1396,
+  -1344,
+  -1312,
+  -1297,
+  -1059,
+  -1058,
+  1027,
+  1049,
+  1059,
+  1100,
+  1104,
+  1108,
+  1136,
+  1200,
+  1204,
+  1242,
+  1292,
+  1304,
+  1312,
+  1320,
+  1336,
+  1344,
+  1348,
+  1360,
+  1364,
+  1395,
+  1448,
+  1460,
+  1461,
+  1472,
+  1488,
+  1500,
+  1512,
+  1568,
+  1576,
+  1649,
+  1664,
+  1684,
+  1696,
+  1744,
+  1812,
+  1822,
+  1884,
+  1963,
+  1978,
+  2000,
+  2012,
+  2014,
+  2037,
+  2039,
+  2100,
+  2139,
+  2144,
+  2184,
+  2237,
+  2260,
+  2320,
+  2408,
+  2446,
+  2447,
+  2499,
+  2531,
+  2578,
+  2592,
+  2611,
+  2633,
+  2704,
+  2730,
+  2773,
+  2880,
+  2896,
+  2998,
+  3000,
+  3001,
+  3021,
+  3079,
+  3112,
+  3150,
+  3179,
+  3192,
+  3240,
+  3264,
+  3271,
+  3283,
+  3328,
+  3363,
+  3367,
+  3453,
+  3529,
+  3570,
+  3580,
+  3600,
+  3624,
+  3707,
+  3783,
+  3826,
+  3897,
+  3941,
+  3962,
+  3989,
+  4000,
+  4025,
+  4073,
+  4093,
+  4099,
+  4108,
+  4184,
+  4209,
+  4369,
+  4376,
+  4416,
+  4433,
+  4434,
+  4482,
+  4582,
+  4712,
+  4717,
+  4813,
+  4815,
+  4864,
+  5000,
+  5027,
+  5040,
+  5091,
+  5195,
+  5243,
+  5260,
+  5285,
+  5329,
+  5331,
+  5350,
+  5361,
+  5387,
+  5461,
+  5492,
+  5529,
+  5573,
+  5793,
+  5819,
+  5915,
+  5946,
+  5992,
+  6000,
+  6164,
+  6205,
+  6262,
+  6263,
+  6269,
+  6270,
+  6387,
+  6400,
+  6406,
+  6476,
+  6541,
+  6565,
+  6568,
+  6626,
+  6656,
+  6732,
+  6810,
+  6817,
+  6859,
+  7040,
+  7053,
+  7141,
+  7169,
+  7221,
+  7223,
+  7274,
+  7282,
+  7350,
+  7369,
+  7373,
+  7442,
+  7447,
+  7471,
+  7518,
+  7542,
+  7566,
+  7587,
+  7663,
+  7678,
+  7682,
+  7748,
+  7752,
+  7791,
+  8000,
+  8026,
+  8048,
+  8170,
+  8203,
+  8204,
+  8290,
+  8368,
+  8520,
+  8640,
+  8666,
+  8672,
+  8697,
+  8716,
+  8728,
+  8756,
+  8820,
+  8875,
+  8918,
+  8956,
+  9058,
+  9154,
+  9175,
+  9191,
+  9217,
+  9262,
+  9321,
+  9373,
+  9434,
+  9465,
+  9514,
+  9534,
+  9633,
+  9746,
+  9810,
+  9850,
+  9947,
+  9973,
+  10000,
+  10009,
+  10033,
+  10055,
+  10217,
+  10248,
+  10298,
+  10310,
+  10323,
+  10368,
+  10438,
+  10456,
+  10486,
+  10538,
+  10664,
+  10695,
+  10700,
+  10703,
+  10832,
+  10887,
+  10935,
+  10958,
+  11018,
+  11059,
+  11061,
+  11086,
+  11116,
+  11148,
+  11190,
+  11249,
+  11314,
+  11332,
+  11363,
+  11409,
+  11415,
+  11443,
+  11467,
+  11512,
+  11522,
+  11529,
+  11585,
+  11759,
+  11768,
+  11795,
+  11893,
+  11997,
+  12131,
+  12299,
+  12536,
+  12543,
+  12893,
+  12945,
+  12998,
+  13109,
+  13213,
+  13685,
+  13930,
+  14023,
+  14024,
+  14271,
+  14564,
+  14647,
+  15326,
+  15850,
+  15855,
+  15929,
+  16000,
+  16154,
+  16496,
+  16807,
+  16819,
+  16984,
+  17203,
+  17223,
+  17333,
+  17760,
+  17799,
+  17837,
+  18029,
+  18068,
+  18336,
+  18515,
+  19595,
+  20017,
+  20131,
+  20862,
+  20995,
+  21709,
+  22554,
+  25000,
+  25172,
+  25600,
+  25733,
+  27439,
+  38470,
+  46802,
+  50000,
+  11796480,
+  16843009,
+  23592960,
+};
+
+
+const int num_mult_constants_used =
+  (int)(sizeof multiply_constants_used
+	/ sizeof multiply_constants_used[0]);
+
+
+#define XSIZE (sizeof multiply_constants_used / \
+	       sizeof multiply_constants_used[0] + 32) / 32
+unsigned multiply_constants_avail[XSIZE];
+#undef XSIZE
+
+
+/* bsearch helper function.  */
+static int
+compare_constants (const void *key, const void *t)
+{
+  return (*(int*)key) - *((int*)t);
+}
+
+
+static int *
+find_mult_constants_used (int multiplier)
+{
+  return (int *) bsearch (&multiplier, multiply_constants_used,
+			  num_mult_constants_used,
+			  sizeof multiply_constants_used[0],
+			  compare_constants);
+}
+
+
+int num_ops (ExpressionTree *s)
+{
+  int n = 0;
+  for (int i = 0; i < s->m_num_vals; i++)
+    {
+      Expr *e = &s->m_exprs[i];
+      if (e->m_op != NULL)
+	n++;
+    }
+  return n;
+}
+
+
+#ifdef TILEPRO
+bool
+tilepro_emit (int multiplier, int num_ops)
+{
+  int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
+
+  /* Keep constants in range [-1024, 1024].  */
+  if (abs_multiplier <= 1024)
+    return true;
+
+  /* Keep constants near powers of two.  */
+  int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
+  int next_pow2 = prev_pow2 << 1;
+
+  if ((abs_multiplier - prev_pow2 <= 10)
+      || (next_pow2 - abs_multiplier <= 10))
+    return true;
+
+  /* Keep constants near powers of ten.  */
+  {
+    long long j = 1;
+    long long prev_pow10;
+    long long next_pow10;
+
+    while (((j * 10) < abs_multiplier)
+	   && (j < (j * 10)))
+      j = j * 10;
+
+    prev_pow10 = j;
+    next_pow10 = j * 10;
+
+    if ((abs_multiplier - prev_pow10 <= 10)
+	|| (next_pow10 - abs_multiplier <= 10))
+      return true;
+  }
+
+  /* Keep short sequences that have two or fewer ops.  */
+  if (num_ops <= 2)
+    return true;
+
+  /* Keep constants that are mostly 0's or mostly 1's.  */
+  if (__builtin_popcount (multiplier) <= 2 ||
+      __builtin_popcount (multiplier) >= 30)
+    return true;
+
+  /* Keep constants seen in actual code.  */
+  if ((find_mult_constants_used (multiplier)))
+    return true;
+
+  return false;
+}
+#else
+bool
+tilegx_emit (long long multiplier, int num_ops)
+{
+  long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
+
+  /* Keep constants in range [-1024, 1024].  */
+  if (abs_multiplier <= 1024)
+    return true;
+
+  /* Otherwise exclude sequences with four ops.  */
+  if (num_ops > 3)
+    return false;
+
+  /* Keep constants near powers of two.  */
+  {
+    unsigned long long prev_pow2 =
+      1LL << (63 - __builtin_clzll (abs_multiplier));
+    unsigned long long next_pow2 = prev_pow2 << 1;
+
+    /* handle overflow case. */
+    if (next_pow2 == 0)
+      {
+	if (prev_pow2 - abs_multiplier <= 10)
+	  return true;
+      }
+    else if ((abs_multiplier - prev_pow2 <= 10)
+	     || (next_pow2 - abs_multiplier <= 10))
+      return true;
+  }
+
+  /* Keep constants near powers of ten.  */
+  {
+    long long j = 1;
+    long long prev_pow10;
+    long long next_pow10;
+
+    while (((j * 10) < abs_multiplier)
+	   && (j < (INTMAX_MAX / 10)))
+      j = j * 10;
+
+    prev_pow10 = j;
+    next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10;
+
+    if ((abs_multiplier - prev_pow10 <= 100)
+	|| (next_pow10
+	    && (next_pow10 - abs_multiplier <= 100)))
+      return true;
+  }
+
+  if (num_ops <= 2)
+    return true;
+
+  /* Keep constants that are mostly 0's or mostly 1's.  */
+  if (__builtin_popcountll (multiplier) <= 2 ||
+      __builtin_popcountll (multiplier) >= 62)
+    return true;
+
+  /* Keep constants seen in actual code.  */
+  if (find_mult_constants_used (multiplier))
+    return true;
+
+  return false;
+}
+#endif
+
+
+int
+main ()
+{
+  ExpressionTreeMap best_solution;
+  ExpressionTree s;
+
+#ifdef TILEPRO
+  printf ("/* Constant multiply table for TILEPro.\n");
+#else
+  printf ("/* Constant multiply table for TILE-Gx.\n");
+#endif
+  printf ("   Copyright (C) 2011-2014 Free Software Foundation, Inc.\n");
+  printf ("   Contributed by Walter Lee (walt@tilera.com)\n");
+  printf ("\n");
+  printf ("   This file is part of GCC.\n");
+  printf ("\n");
+  printf ("   GCC is free software; you can redistribute it and/or modify it\n");
+  printf ("   under the terms of the GNU General Public License as published\n");
+  printf ("   by the Free Software Foundation; either version 3, or (at your\n");
+  printf ("   option) any later version.\n");
+  printf ("\n");
+  printf ("   GCC is distributed in the hope that it will be useful, but WITHOUT\n");
+  printf ("   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
+  printf ("   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n");
+  printf ("   License for more details.\n");
+  printf ("\n");
+  printf ("   You should have received a copy of the GNU General Public License\n");
+  printf ("   along with GCC; see the file COPYING3.  If not see\n");
+  printf ("   <http://www.gnu.org/licenses/>.  */\n");
+  printf ("\n");
+  printf ("#include \"config.h\"\n");
+  printf ("#include \"system.h\"\n");
+  printf ("#include \"coretypes.h\"\n");
+  printf ("#include \"expr.h\"\n");
+  printf ("#include \"optabs.h\"\n");
+  printf ("#include \"%s-multiply.h\"\n\n", ARCH);
+  create_insn_code_compression_table ();
+
+  /* Try all combinations of operators and see what constants we
+     produce.  For each possible constant, record the most efficient
+     way to generate it.  */
+  find_sequences (s, best_solution);
+
+  printf ("const struct %s_multiply_insn_seq "
+	  "%s_multiply_insn_seq_table[] = {\n",
+	  ARCH, ARCH);
+
+  const char *comma_separator = "";
+
+  ExpressionTreeMap::iterator i (best_solution.begin ());
+  for (; i != best_solution.end (); ++i)
+    {
+      ExpressionTree *s = (*i).second;
+      const MUL_TYPE n = (*i).first;
+
+      if (n == 0 || n == 1)
+	{
+	  /* Both of these require zero operations, so these entries
+	     are bogus and should never be used.  */
+	  continue;
+	}
+
+      /* Prune the list of entries to keep the table to a reasonable
+	 size.  */
+#ifdef TILEPRO
+      if (!tilepro_emit (n, num_ops (s)))
+	continue;
+#else
+      if (!tilegx_emit (n, num_ops (s)))
+	continue;
+#endif
+
+      printf ("%s", comma_separator);
+
+#ifdef TILEPRO
+      const MUL_TYPE int_min = INT32_MIN;
+#else
+      const MUL_TYPE int_min = INT64_MIN;
+#endif
+      if (n == int_min)
+	{
+	  /* Handle C's woeful lack of unary negation. Without this,
+	     printing out INT_MIN in decimal will yield an unsigned
+	     int which could generate a compiler warning.  */
+#ifdef TILEPRO
+	  printf ("  {%d - 1 /* 0x%x */ ,\n   {", n + 1,
+		  (unsigned) n);
+#else
+	  printf ("  {%lldll - 1 /* 0x%llx */ ,\n   {", n + 1,
+		  (unsigned MUL_TYPE) n);
+#endif
+	}
+      else
+	{
+#ifdef TILEPRO
+	  printf ("  {%d /* 0x%x */ ,\n   {", n, (unsigned) n);
+#else
+	  printf ("  {%lldll /* 0x%llx */ ,\n   {", n, (unsigned MUL_TYPE) n);
+#endif
+	}
+
+      bool first = true;
+      for (int j = 0; j < s->m_num_vals; j++)
+	{
+	  Expr *e = &s->m_exprs[j];
+
+	  const Operator *op = e->m_op;
+	  if (op == NULL)
+	    continue;
+
+	  char buf[1024];
+	  snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
+		    first ? "" : "    ",
+		    op->m_top_index,
+		    e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
+
+	  char opnd0[10];
+	  if (e->m_lhs)
+	    snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
+	  else
+	    snprintf (opnd0, sizeof opnd0, "zero");
+
+	  printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
+		  buf, op->m_name, j, opnd0,
+		  op->is_unary () ? "" : "r", e->m_rhs);
+
+	  first = false;
+	}
+      printf ("   }");
+      comma_separator = ",\n";
+    }
+
+  printf ("\n};\n\n");
+  printf ("const int %s_multiply_insn_seq_table_size =\n"
+	  "  (int) (sizeof %s_multiply_insn_seq_table\n"
+	  "         / sizeof %s_multiply_insn_seq_table[0]);\n",
+	  ARCH, ARCH, ARCH);
+
+  return EXIT_SUCCESS;
+}
diff --git a/gcc-4.9/gcc/config/tilepro/linux.h b/gcc-4.9/gcc/config/tilepro/linux.h
new file mode 100644
index 000000000..5c7cd57cd
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/linux.h
@@ -0,0 +1,64 @@
+/* Definitions for TILEPro running Linux-based GNU systems with ELF.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker /lib/ld.so.1} \
+    %{static:-static}}"
+
+#define NO_PROFILE_COUNTERS	1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "__mcount"
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* TILEPro glibc provides __stack_chk_guard two pointer-size words before
+   tp. */
+#define TARGET_THREAD_SSP_OFFSET (-2 * GET_MODE_SIZE (ptr_mode))
+#endif
+
+/* For __clear_cache in libgcc2.c.  */
+#ifdef IN_LIBGCC2
+
+#include <arch/icache.h>
+
+/* Use the minimum page size of 4K.  Alternatively we can call getpagesize()
+   but it introduces a libc dependence.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(beg, end) invalidate_icache (beg, end - beg, 4096)
+
+#else
+
+/* define CLEAR_INSN_CACHE so that gcc knows to expand __builtin__clear_cache
+   to the libraray call.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE 1
+
+#endif
diff --git a/gcc-4.9/gcc/config/tilepro/mul-tables.c b/gcc-4.9/gcc/config/tilepro/mul-tables.c
new file mode 100644
index 000000000..4411b6eff
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/mul-tables.c
@@ -0,0 +1,17831 @@
+/* Constant multiply table for TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tilepro-multiply.h"
+
+const enum insn_code tilepro_multiply_insn_seq_decode_opcode[] = {
+  CODE_FOR_nothing /* must be first */ ,
+  CODE_FOR_addsi3,
+  CODE_FOR_subsi3,
+  CODE_FOR_insn_s1a,
+  CODE_FOR_insn_s2a,
+  CODE_FOR_insn_s3a,
+  CODE_FOR_ashlsi3
+};
+
+const struct tilepro_multiply_insn_seq tilepro_multiply_insn_seq_table[] = {
+  {-2147483647 - 1 /* 0x80000000 */ ,
+   {{6, 1, 31}}			/* shli r2, r1, 31 */
+   },
+  {-2147483647 /* 0x80000001 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {-2147483646 /* 0x80000002 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {-2147483645 /* 0x80000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483644 /* 0x80000004 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {-2147483643 /* 0x80000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483642 /* 0x80000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-2147483641 /* 0x80000007 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {-2147483640 /* 0x80000008 */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {-2147483639 /* 0x80000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483638 /* 0x8000000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-2147483632 /* 0x80000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483616 /* 0x80000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483584 /* 0x80000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483520 /* 0x80000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483392 /* 0x80000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147483136 /* 0x80000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147482624 /* 0x80000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147481600 /* 0x80000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147479552 /* 0x80001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147475456 /* 0x80002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147467264 /* 0x80004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147450880 /* 0x80008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147418112 /* 0x80010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147352576 /* 0x80020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2147221504 /* 0x80040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2146959360 /* 0x80080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2146435072 /* 0x80100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2145386496 /* 0x80200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2143289344 /* 0x80400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2139095040 /* 0x80800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2130706432 /* 0x81000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2113929216 /* 0x82000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2080374784 /* 0x84000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-2013265920 /* 0x88000000 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {-1879048192 /* 0x90000000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {-1610612737 /* 0x9fffffff */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {-1610612736 /* 0xa0000000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {-1342177281 /* 0xafffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 28},			/* shli r4, r3, 28 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1207959553 /* 0xb7ffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 27},			/* shli r4, r3, 27 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1140850689 /* 0xbbffffff */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-1073741889 /* 0xbfffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1073741857 /* 0xbfffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1073741841 /* 0xbfffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-1073741834 /* 0xbffffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 30},			/* shli r4, r2, 30 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-1073741833 /* 0xbffffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1073741832 /* 0xbffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-1073741831 /* 0xbffffff9 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1073741830 /* 0xbffffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 30},			/* shli r4, r2, 30 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {-1073741829 /* 0xbffffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-1073741828 /* 0xbffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-1073741827 /* 0xbffffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {-1073741826 /* 0xbffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-1073741825 /* 0xbfffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741824 /* 0xc0000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 30}}			/* shli r3, r2, 30 */
+   },
+  {-1073741823 /* 0xc0000001 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1073741822 /* 0xc0000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741821 /* 0xc0000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741820 /* 0xc0000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741819 /* 0xc0000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741818 /* 0xc0000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 30},			/* shli r3, r2, 30 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-1073741817 /* 0xc0000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 27},			/* shli r4, r2, 27 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1073741816 /* 0xc0000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741815 /* 0xc0000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1073741814 /* 0xc000000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-805306369 /* 0xcfffffff */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-671088641 /* 0xd7ffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 27},			/* shli r4, r3, 27 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-603979777 /* 0xdbffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 26},			/* shli r4, r3, 26 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-570425345 /* 0xddffffff */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-536870977 /* 0xdfffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-536870945 /* 0xdfffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-536870929 /* 0xdfffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-536870922 /* 0xdffffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 29},			/* shli r4, r2, 29 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-536870921 /* 0xdffffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-536870920 /* 0xdffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-536870919 /* 0xdffffff9 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-536870918 /* 0xdffffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-536870917 /* 0xdffffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-536870916 /* 0xdffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-536870915 /* 0xdffffffd */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-536870914 /* 0xdffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-536870913 /* 0xdfffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870912 /* 0xe0000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {-536870911 /* 0xe0000001 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-536870910 /* 0xe0000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870909 /* 0xe0000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870908 /* 0xe0000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870907 /* 0xe0000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870906 /* 0xe0000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-536870905 /* 0xe0000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 26},			/* shli r4, r2, 26 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-536870904 /* 0xe0000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870903 /* 0xe0000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-536870902 /* 0xe000000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-402653185 /* 0xe7ffffff */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-335544321 /* 0xebffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 26},			/* shli r4, r3, 26 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-301989889 /* 0xedffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 25},			/* shli r4, r3, 25 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-285212673 /* 0xeeffffff */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-268435521 /* 0xefffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-268435489 /* 0xefffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-268435473 /* 0xefffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-268435466 /* 0xeffffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 28},			/* shli r4, r2, 28 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-268435465 /* 0xeffffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-268435464 /* 0xeffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28},			/* shli r3, r2, 28 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-268435463 /* 0xeffffff9 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-268435462 /* 0xeffffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-268435461 /* 0xeffffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-268435460 /* 0xeffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28},			/* shli r3, r2, 28 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-268435459 /* 0xeffffffd */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-268435458 /* 0xeffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28},			/* shli r3, r2, 28 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-268435457 /* 0xefffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435456 /* 0xf0000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {-268435455 /* 0xf0000001 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-268435454 /* 0xf0000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435453 /* 0xf0000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435452 /* 0xf0000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435451 /* 0xf0000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435450 /* 0xf0000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-268435449 /* 0xf0000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 25},			/* shli r4, r2, 25 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-268435448 /* 0xf0000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435447 /* 0xf0000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-268435446 /* 0xf000000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-201326593 /* 0xf3ffffff */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-167772161 /* 0xf5ffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 25},			/* shli r4, r3, 25 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-150994945 /* 0xf6ffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 24},			/* shli r4, r3, 24 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-142606337 /* 0xf77fffff */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-134217793 /* 0xf7ffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-134217761 /* 0xf7ffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-134217745 /* 0xf7ffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-134217738 /* 0xf7fffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 27},			/* shli r4, r2, 27 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-134217737 /* 0xf7fffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-134217736 /* 0xf7fffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-134217735 /* 0xf7fffff9 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-134217734 /* 0xf7fffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-134217733 /* 0xf7fffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-134217732 /* 0xf7fffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-134217731 /* 0xf7fffffd */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-134217730 /* 0xf7fffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27},			/* shli r3, r2, 27 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-134217729 /* 0xf7ffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217728 /* 0xf8000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {-134217727 /* 0xf8000001 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-134217726 /* 0xf8000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217725 /* 0xf8000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217724 /* 0xf8000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217723 /* 0xf8000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217722 /* 0xf8000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-134217721 /* 0xf8000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 24},			/* shli r4, r2, 24 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-134217720 /* 0xf8000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217719 /* 0xf8000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134217718 /* 0xf800000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-100663297 /* 0xf9ffffff */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-83886081 /* 0xfaffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 24},			/* shli r4, r3, 24 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-75497473 /* 0xfb7fffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 23},			/* shli r4, r3, 23 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-71303169 /* 0xfbbfffff */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-67108929 /* 0xfbffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-67108897 /* 0xfbffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-67108881 /* 0xfbffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-67108874 /* 0xfbfffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 26},			/* shli r4, r2, 26 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-67108873 /* 0xfbfffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-67108872 /* 0xfbfffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-67108871 /* 0xfbfffff9 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-67108870 /* 0xfbfffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-67108869 /* 0xfbfffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-67108868 /* 0xfbfffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-67108867 /* 0xfbfffffd */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-67108866 /* 0xfbfffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26},			/* shli r3, r2, 26 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-67108865 /* 0xfbffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108864 /* 0xfc000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {-67108863 /* 0xfc000001 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-67108862 /* 0xfc000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108861 /* 0xfc000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108860 /* 0xfc000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108859 /* 0xfc000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108858 /* 0xfc000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-67108857 /* 0xfc000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 23},			/* shli r4, r2, 23 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-67108856 /* 0xfc000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108855 /* 0xfc000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-67108854 /* 0xfc00000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-50331649 /* 0xfcffffff */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-41943041 /* 0xfd7fffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 23},			/* shli r4, r3, 23 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-37748737 /* 0xfdbfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 22},			/* shli r4, r3, 22 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-35651585 /* 0xfddfffff */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-33554497 /* 0xfdffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-33554465 /* 0xfdffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-33554449 /* 0xfdffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-33554442 /* 0xfdfffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 25},			/* shli r4, r2, 25 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-33554441 /* 0xfdfffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-33554440 /* 0xfdfffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-33554439 /* 0xfdfffff9 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-33554438 /* 0xfdfffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-33554437 /* 0xfdfffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-33554436 /* 0xfdfffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-33554435 /* 0xfdfffffd */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-33554434 /* 0xfdfffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25},			/* shli r3, r2, 25 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-33554433 /* 0xfdffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554432 /* 0xfe000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {-33554431 /* 0xfe000001 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-33554430 /* 0xfe000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554429 /* 0xfe000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554428 /* 0xfe000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554427 /* 0xfe000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554426 /* 0xfe000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-33554425 /* 0xfe000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 22},			/* shli r4, r2, 22 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-33554424 /* 0xfe000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554423 /* 0xfe000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-33554422 /* 0xfe00000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-25165825 /* 0xfe7fffff */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-20971521 /* 0xfebfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 22},			/* shli r4, r3, 22 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-18874369 /* 0xfedfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 21},			/* shli r4, r3, 21 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-17825793 /* 0xfeefffff */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-16777281 /* 0xfeffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-16777249 /* 0xfeffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-16777233 /* 0xfeffffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-16777226 /* 0xfefffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 24},			/* shli r4, r2, 24 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-16777225 /* 0xfefffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-16777224 /* 0xfefffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-16777223 /* 0xfefffff9 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16777222 /* 0xfefffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-16777221 /* 0xfefffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-16777220 /* 0xfefffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-16777219 /* 0xfefffffd */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16777218 /* 0xfefffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24},			/* shli r3, r2, 24 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-16777217 /* 0xfeffffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777216 /* 0xff000000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {-16777215 /* 0xff000001 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-16777214 /* 0xff000002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777213 /* 0xff000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777212 /* 0xff000004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777211 /* 0xff000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777210 /* 0xff000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-16777209 /* 0xff000007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 21},			/* shli r4, r2, 21 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-16777208 /* 0xff000008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777207 /* 0xff000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16777206 /* 0xff00000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-12582913 /* 0xff3fffff */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-11796480 /* 0xff4c0000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {6, 2, 22},			/* shli r4, r2, 22 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-10485761 /* 0xff5fffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 21},			/* shli r4, r3, 21 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-9437185 /* 0xff6fffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 20},			/* shli r4, r3, 20 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-8912897 /* 0xff77ffff */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-8388673 /* 0xff7fffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-8388641 /* 0xff7fffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-8388625 /* 0xff7fffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-8388618 /* 0xff7ffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 23},			/* shli r4, r2, 23 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-8388617 /* 0xff7ffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-8388616 /* 0xff7ffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-8388615 /* 0xff7ffff9 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8388614 /* 0xff7ffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-8388613 /* 0xff7ffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-8388612 /* 0xff7ffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-8388611 /* 0xff7ffffd */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8388610 /* 0xff7ffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23},			/* shli r3, r2, 23 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-8388609 /* 0xff7fffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388608 /* 0xff800000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {-8388607 /* 0xff800001 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-8388606 /* 0xff800002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388605 /* 0xff800003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388604 /* 0xff800004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388603 /* 0xff800005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388602 /* 0xff800006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-8388601 /* 0xff800007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 20},			/* shli r4, r2, 20 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-8388600 /* 0xff800008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388599 /* 0xff800009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8388598 /* 0xff80000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-6291457 /* 0xff9fffff */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-5242881 /* 0xffafffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 20},			/* shli r4, r3, 20 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-4718593 /* 0xffb7ffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 19},			/* shli r4, r3, 19 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-4456449 /* 0xffbbffff */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-4194369 /* 0xffbfffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4194337 /* 0xffbfffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4194321 /* 0xffbfffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-4194314 /* 0xffbffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 22},			/* shli r4, r2, 22 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-4194313 /* 0xffbffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4194312 /* 0xffbffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-4194311 /* 0xffbffff9 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4194310 /* 0xffbffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-4194309 /* 0xffbffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-4194308 /* 0xffbffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-4194307 /* 0xffbffffd */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4194306 /* 0xffbffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22},			/* shli r3, r2, 22 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-4194305 /* 0xffbfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194304 /* 0xffc00000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {-4194303 /* 0xffc00001 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4194302 /* 0xffc00002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194301 /* 0xffc00003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194300 /* 0xffc00004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194299 /* 0xffc00005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194298 /* 0xffc00006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-4194297 /* 0xffc00007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 19},			/* shli r4, r2, 19 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-4194296 /* 0xffc00008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194295 /* 0xffc00009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4194294 /* 0xffc0000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-3145729 /* 0xffcfffff */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2621441 /* 0xffd7ffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 19},			/* shli r4, r3, 19 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-2359297 /* 0xffdbffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 18},			/* shli r4, r3, 18 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-2228225 /* 0xffddffff */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-2097217 /* 0xffdfffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2097185 /* 0xffdfffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2097169 /* 0xffdfffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-2097162 /* 0xffdffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 21},			/* shli r4, r2, 21 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-2097161 /* 0xffdffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2097160 /* 0xffdffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-2097159 /* 0xffdffff9 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2097158 /* 0xffdffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-2097157 /* 0xffdffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-2097156 /* 0xffdffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-2097155 /* 0xffdffffd */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2097154 /* 0xffdffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21},			/* shli r3, r2, 21 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-2097153 /* 0xffdfffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097152 /* 0xffe00000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {-2097151 /* 0xffe00001 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2097150 /* 0xffe00002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097149 /* 0xffe00003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097148 /* 0xffe00004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097147 /* 0xffe00005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097146 /* 0xffe00006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-2097145 /* 0xffe00007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 18},			/* shli r4, r2, 18 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-2097144 /* 0xffe00008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097143 /* 0xffe00009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2097142 /* 0xffe0000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1572865 /* 0xffe7ffff */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1310721 /* 0xffebffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 18},			/* shli r4, r3, 18 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1179649 /* 0xffedffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 17},			/* shli r4, r3, 17 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1114113 /* 0xffeeffff */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-1048641 /* 0xffefffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1048609 /* 0xffefffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1048593 /* 0xffefffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-1048586 /* 0xffeffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 20},			/* shli r4, r2, 20 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-1048585 /* 0xffeffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1048584 /* 0xffeffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-1048583 /* 0xffeffff9 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1048582 /* 0xffeffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1048581 /* 0xffeffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-1048580 /* 0xffeffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-1048579 /* 0xffeffffd */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1048578 /* 0xffeffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20},			/* shli r3, r2, 20 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-1048577 /* 0xffefffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048576 /* 0xfff00000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {-1048575 /* 0xfff00001 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1048574 /* 0xfff00002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048573 /* 0xfff00003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048572 /* 0xfff00004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048571 /* 0xfff00005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048570 /* 0xfff00006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-1048569 /* 0xfff00007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 17},			/* shli r4, r2, 17 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1048568 /* 0xfff00008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048567 /* 0xfff00009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1048566 /* 0xfff0000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-786433 /* 0xfff3ffff */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-655361 /* 0xfff5ffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 17},			/* shli r4, r3, 17 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-589825 /* 0xfff6ffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 16},			/* shli r4, r3, 16 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-557057 /* 0xfff77fff */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-524353 /* 0xfff7ffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-524321 /* 0xfff7ffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-524305 /* 0xfff7ffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-524298 /* 0xfff7fff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 19},			/* shli r4, r2, 19 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-524297 /* 0xfff7fff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-524296 /* 0xfff7fff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-524295 /* 0xfff7fff9 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-524294 /* 0xfff7fffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-524293 /* 0xfff7fffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-524292 /* 0xfff7fffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-524291 /* 0xfff7fffd */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-524290 /* 0xfff7fffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19},			/* shli r3, r2, 19 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-524289 /* 0xfff7ffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524288 /* 0xfff80000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {-524287 /* 0xfff80001 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-524286 /* 0xfff80002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524285 /* 0xfff80003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524284 /* 0xfff80004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524283 /* 0xfff80005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524282 /* 0xfff80006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-524281 /* 0xfff80007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 16},			/* shli r4, r2, 16 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-524280 /* 0xfff80008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524279 /* 0xfff80009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-524278 /* 0xfff8000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-393217 /* 0xfff9ffff */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-327681 /* 0xfffaffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 16},			/* shli r4, r3, 16 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-294913 /* 0xfffb7fff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 15},			/* shli r4, r3, 15 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-278529 /* 0xfffbbfff */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-262209 /* 0xfffbffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-262177 /* 0xfffbffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-262161 /* 0xfffbffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-262154 /* 0xfffbfff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 18},			/* shli r4, r2, 18 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-262153 /* 0xfffbfff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-262152 /* 0xfffbfff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-262151 /* 0xfffbfff9 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-262150 /* 0xfffbfffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-262149 /* 0xfffbfffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-262148 /* 0xfffbfffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-262147 /* 0xfffbfffd */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-262146 /* 0xfffbfffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18},			/* shli r3, r2, 18 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-262145 /* 0xfffbffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262144 /* 0xfffc0000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {-262143 /* 0xfffc0001 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-262142 /* 0xfffc0002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262141 /* 0xfffc0003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262140 /* 0xfffc0004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262139 /* 0xfffc0005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262138 /* 0xfffc0006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-262137 /* 0xfffc0007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 15},			/* shli r4, r2, 15 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-262136 /* 0xfffc0008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262135 /* 0xfffc0009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-262134 /* 0xfffc000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-196609 /* 0xfffcffff */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-163841 /* 0xfffd7fff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 15},			/* shli r4, r3, 15 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-147457 /* 0xfffdbfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 14},			/* shli r4, r3, 14 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-139265 /* 0xfffddfff */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-131137 /* 0xfffdffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-131105 /* 0xfffdffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-131089 /* 0xfffdffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-131082 /* 0xfffdfff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 17},			/* shli r4, r2, 17 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-131081 /* 0xfffdfff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-131080 /* 0xfffdfff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-131079 /* 0xfffdfff9 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-131078 /* 0xfffdfffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-131077 /* 0xfffdfffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-131076 /* 0xfffdfffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-131075 /* 0xfffdfffd */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-131074 /* 0xfffdfffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17},			/* shli r3, r2, 17 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-131073 /* 0xfffdffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131072 /* 0xfffe0000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {-131071 /* 0xfffe0001 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-131070 /* 0xfffe0002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131069 /* 0xfffe0003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131068 /* 0xfffe0004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131067 /* 0xfffe0005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131066 /* 0xfffe0006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-131065 /* 0xfffe0007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 14},			/* shli r4, r2, 14 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-131064 /* 0xfffe0008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131063 /* 0xfffe0009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-131062 /* 0xfffe000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-98305 /* 0xfffe7fff */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-81921 /* 0xfffebfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 14},			/* shli r4, r3, 14 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-73729 /* 0xfffedfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 13},			/* shli r4, r3, 13 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-69633 /* 0xfffeefff */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-65601 /* 0xfffeffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-65569 /* 0xfffeffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-65553 /* 0xfffeffef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-65546 /* 0xfffefff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 16},			/* shli r4, r2, 16 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-65545 /* 0xfffefff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-65544 /* 0xfffefff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-65543 /* 0xfffefff9 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-65542 /* 0xfffefffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-65541 /* 0xfffefffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-65540 /* 0xfffefffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-65539 /* 0xfffefffd */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-65538 /* 0xfffefffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16},			/* shli r3, r2, 16 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-65537 /* 0xfffeffff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65536 /* 0xffff0000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {-65535 /* 0xffff0001 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-65534 /* 0xffff0002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65533 /* 0xffff0003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65532 /* 0xffff0004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65531 /* 0xffff0005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65530 /* 0xffff0006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-65529 /* 0xffff0007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 13},			/* shli r4, r2, 13 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-65528 /* 0xffff0008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65527 /* 0xffff0009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-65526 /* 0xffff000a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-49153 /* 0xffff3fff */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-40961 /* 0xffff5fff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 13},			/* shli r4, r3, 13 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-36865 /* 0xffff6fff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 12},			/* shli r4, r3, 12 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-34817 /* 0xffff77ff */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-32833 /* 0xffff7fbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-32801 /* 0xffff7fdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-32785 /* 0xffff7fef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-32778 /* 0xffff7ff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 15},			/* shli r4, r2, 15 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-32777 /* 0xffff7ff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-32776 /* 0xffff7ff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-32775 /* 0xffff7ff9 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-32774 /* 0xffff7ffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-32773 /* 0xffff7ffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-32772 /* 0xffff7ffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-32771 /* 0xffff7ffd */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-32770 /* 0xffff7ffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15},			/* shli r3, r2, 15 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-32769 /* 0xffff7fff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32768 /* 0xffff8000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {-32767 /* 0xffff8001 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-32766 /* 0xffff8002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32765 /* 0xffff8003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32764 /* 0xffff8004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32763 /* 0xffff8005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32762 /* 0xffff8006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-32761 /* 0xffff8007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 12},			/* shli r4, r2, 12 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-32760 /* 0xffff8008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32759 /* 0xffff8009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32758 /* 0xffff800a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-24577 /* 0xffff9fff */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-20481 /* 0xffffafff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 12},			/* shli r4, r3, 12 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-19447 /* 0xffffb409 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 10},			/* shli r4, r3, 10 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-18433 /* 0xffffb7ff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 11},			/* shli r4, r3, 11 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-17409 /* 0xffffbbff */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-16449 /* 0xffffbfbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-16423 /* 0xffffbfd9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-16417 /* 0xffffbfdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-16401 /* 0xffffbfef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-16394 /* 0xffffbff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 14},			/* shli r4, r2, 14 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-16393 /* 0xffffbff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-16392 /* 0xffffbff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-16391 /* 0xffffbff9 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16390 /* 0xffffbffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-16389 /* 0xffffbffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-16388 /* 0xffffbffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-16387 /* 0xffffbffd */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-16386 /* 0xffffbffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14},			/* shli r3, r2, 14 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-16385 /* 0xffffbfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16384 /* 0xffffc000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {-16383 /* 0xffffc001 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-16382 /* 0xffffc002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16381 /* 0xffffc003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16380 /* 0xffffc004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16379 /* 0xffffc005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16378 /* 0xffffc006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-16377 /* 0xffffc007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 11},			/* shli r4, r2, 11 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-16376 /* 0xffffc008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16375 /* 0xffffc009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16374 /* 0xffffc00a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-12289 /* 0xffffcfff */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-11248 /* 0xffffd410 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-10241 /* 0xffffd7ff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 11},			/* shli r4, r3, 11 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-9232 /* 0xffffdbf0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-9217 /* 0xffffdbff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 10},			/* shli r4, r3, 10 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-9198 /* 0xffffdc12 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-9191 /* 0xffffdc19 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-8705 /* 0xffffddff */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-8257 /* 0xffffdfbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-8225 /* 0xffffdfdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-8209 /* 0xffffdfef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-8208 /* 0xffffdff0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-8202 /* 0xffffdff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 13},			/* shli r4, r2, 13 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-8201 /* 0xffffdff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-8200 /* 0xffffdff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-8199 /* 0xffffdff9 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8198 /* 0xffffdffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-8197 /* 0xffffdffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-8196 /* 0xffffdffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-8195 /* 0xffffdffd */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-8194 /* 0xffffdffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13},			/* shli r3, r2, 13 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-8193 /* 0xffffdfff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8192 /* 0xffffe000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {-8191 /* 0xffffe001 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-8190 /* 0xffffe002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8189 /* 0xffffe003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8188 /* 0xffffe004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8187 /* 0xffffe005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8186 /* 0xffffe006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-8185 /* 0xffffe007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-8184 /* 0xffffe008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8183 /* 0xffffe009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-8182 /* 0xffffe00a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-7678 /* 0xffffe202 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-6145 /* 0xffffe7ff */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-5121 /* 0xffffebff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 10},			/* shli r4, r3, 10 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-4609 /* 0xffffedff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 9},			/* shli r4, r3, 9 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-4353 /* 0xffffeeff */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-4161 /* 0xffffefbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4129 /* 0xffffefdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4113 /* 0xffffefef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-4106 /* 0xffffeff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 12},			/* shli r4, r2, 12 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-4105 /* 0xffffeff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4104 /* 0xffffeff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-4103 /* 0xffffeff9 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4102 /* 0xffffeffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-4101 /* 0xffffeffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-4100 /* 0xffffeffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-4099 /* 0xffffeffd */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-4098 /* 0xffffeffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12},			/* shli r3, r2, 12 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-4097 /* 0xffffefff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4096 /* 0xfffff000 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {-4095 /* 0xfffff001 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-4094 /* 0xfffff002 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4093 /* 0xfffff003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4092 /* 0xfffff004 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4091 /* 0xfffff005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4090 /* 0xfffff006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-4089 /* 0xfffff007 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-4088 /* 0xfffff008 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4087 /* 0xfffff009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-4086 /* 0xfffff00a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-4078 /* 0xfffff012 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-3600 /* 0xfffff1f0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-3073 /* 0xfffff3ff */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-3072 /* 0xfffff400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2912 /* 0xfffff4a0 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2561 /* 0xfffff5ff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 9},			/* shli r4, r3, 9 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-2305 /* 0xfffff6ff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-2184 /* 0xfffff778 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-2177 /* 0xfffff77f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-2113 /* 0xfffff7bf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2081 /* 0xfffff7df */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2065 /* 0xfffff7ef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-2058 /* 0xfffff7f6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 11},			/* shli r4, r2, 11 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-2057 /* 0xfffff7f7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-2056 /* 0xfffff7f8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-2055 /* 0xfffff7f9 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2054 /* 0xfffff7fa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-2053 /* 0xfffff7fb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-2052 /* 0xfffff7fc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-2051 /* 0xfffff7fd */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-2050 /* 0xfffff7fe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11},			/* shli r3, r2, 11 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-2049 /* 0xfffff7ff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2048 /* 0xfffff800 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {-2047 /* 0xfffff801 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2046 /* 0xfffff802 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2045 /* 0xfffff803 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2044 /* 0xfffff804 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2043 /* 0xfffff805 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2042 /* 0xfffff806 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-2041 /* 0xfffff807 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-2040 /* 0xfffff808 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2039 /* 0xfffff809 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-2038 /* 0xfffff80a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1537 /* 0xfffff9ff */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1512 /* 0xfffffa18 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1500 /* 0xfffffa24 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-1396 /* 0xfffffa8c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-1344 /* 0xfffffac0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1312 /* 0xfffffae0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-1281 /* 0xfffffaff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1153 /* 0xfffffb7f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1089 /* 0xfffffbbf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-1059 /* 0xfffffbdd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1057 /* 0xfffffbdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1041 /* 0xfffffbef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-1034 /* 0xfffffbf6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-1033 /* 0xfffffbf7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1032 /* 0xfffffbf8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-1031 /* 0xfffffbf9 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1030 /* 0xfffffbfa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1029 /* 0xfffffbfb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-1028 /* 0xfffffbfc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-1027 /* 0xfffffbfd */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-1026 /* 0xfffffbfe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-1025 /* 0xfffffbff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1024 /* 0xfffffc00 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {-1023 /* 0xfffffc01 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-1022 /* 0xfffffc02 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1021 /* 0xfffffc03 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1020 /* 0xfffffc04 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1019 /* 0xfffffc05 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1018 /* 0xfffffc06 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-1017 /* 0xfffffc07 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1016 /* 0xfffffc08 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1015 /* 0xfffffc09 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1014 /* 0xfffffc0a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1013 /* 0xfffffc0b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1012 /* 0xfffffc0c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-1011 /* 0xfffffc0d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-1010 /* 0xfffffc0e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1009 /* 0xfffffc0f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1008 /* 0xfffffc10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-1007 /* 0xfffffc11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-1006 /* 0xfffffc12 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-1005 /* 0xfffffc13 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 1}}			/* s1a r5, r4, r1 */
+   },
+  {-1004 /* 0xfffffc14 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-1003 /* 0xfffffc15 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-1001 /* 0xfffffc17 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-1000 /* 0xfffffc18 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-999 /* 0xfffffc19 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-997 /* 0xfffffc1b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-996 /* 0xfffffc1c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 1, 3},			/* s2a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-994 /* 0xfffffc1e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-993 /* 0xfffffc1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-992 /* 0xfffffc20 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-991 /* 0xfffffc21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-990 /* 0xfffffc22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-988 /* 0xfffffc24 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-987 /* 0xfffffc25 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-984 /* 0xfffffc28 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-983 /* 0xfffffc29 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-979 /* 0xfffffc2d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-976 /* 0xfffffc30 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-975 /* 0xfffffc31 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {-969 /* 0xfffffc37 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-968 /* 0xfffffc38 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {5, 1, 3},			/* s3a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-967 /* 0xfffffc39 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-965 /* 0xfffffc3b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-964 /* 0xfffffc3c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {4, 1, 3},			/* s2a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-963 /* 0xfffffc3d */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 1},			/* s1a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-962 /* 0xfffffc3e */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-961 /* 0xfffffc3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-960 /* 0xfffffc40 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-959 /* 0xfffffc41 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-958 /* 0xfffffc42 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-957 /* 0xfffffc43 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-956 /* 0xfffffc44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-955 /* 0xfffffc45 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-953 /* 0xfffffc47 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-952 /* 0xfffffc48 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-951 /* 0xfffffc49 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-945 /* 0xfffffc4f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {-944 /* 0xfffffc50 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-943 /* 0xfffffc51 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-942 /* 0xfffffc52 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {-940 /* 0xfffffc54 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-936 /* 0xfffffc58 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-935 /* 0xfffffc59 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-929 /* 0xfffffc5f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-928 /* 0xfffffc60 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-925 /* 0xfffffc63 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-924 /* 0xfffffc64 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-920 /* 0xfffffc68 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-919 /* 0xfffffc69 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-916 /* 0xfffffc6c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-913 /* 0xfffffc6f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-912 /* 0xfffffc70 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-911 /* 0xfffffc71 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-910 /* 0xfffffc72 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-905 /* 0xfffffc77 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-904 /* 0xfffffc78 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-903 /* 0xfffffc79 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {-902 /* 0xfffffc7a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-901 /* 0xfffffc7b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-900 /* 0xfffffc7c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-899 /* 0xfffffc7d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-898 /* 0xfffffc7e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-897 /* 0xfffffc7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-896 /* 0xfffffc80 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-895 /* 0xfffffc81 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-894 /* 0xfffffc82 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-893 /* 0xfffffc83 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-892 /* 0xfffffc84 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-891 /* 0xfffffc85 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-890 /* 0xfffffc86 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-889 /* 0xfffffc87 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-888 /* 0xfffffc88 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {-887 /* 0xfffffc89 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-886 /* 0xfffffc8a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-884 /* 0xfffffc8c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* s2a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-881 /* 0xfffffc8f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-880 /* 0xfffffc90 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-879 /* 0xfffffc91 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 1}}			/* s1a r5, r4, r1 */
+   },
+  {-873 /* 0xfffffc97 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-872 /* 0xfffffc98 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-868 /* 0xfffffc9c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-865 /* 0xfffffc9f */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-864 /* 0xfffffca0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-863 /* 0xfffffca1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-861 /* 0xfffffca3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-860 /* 0xfffffca4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-856 /* 0xfffffca8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-855 /* 0xfffffca9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-847 /* 0xfffffcb1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-837 /* 0xfffffcbb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-835 /* 0xfffffcbd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-834 /* 0xfffffcbe */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {-832 /* 0xfffffcc0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {-831 /* 0xfffffcc1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-829 /* 0xfffffcc3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-828 /* 0xfffffcc4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-827 /* 0xfffffcc5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-824 /* 0xfffffcc8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-823 /* 0xfffffcc9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-819 /* 0xfffffccd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-816 /* 0xfffffcd0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-815 /* 0xfffffcd1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-803 /* 0xfffffcdd */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-801 /* 0xfffffcdf */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-800 /* 0xfffffce0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-799 /* 0xfffffce1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-797 /* 0xfffffce3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-796 /* 0xfffffce4 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-795 /* 0xfffffce5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-793 /* 0xfffffce7 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-792 /* 0xfffffce8 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-791 /* 0xfffffce9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-789 /* 0xfffffceb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-788 /* 0xfffffcec */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-785 /* 0xfffffcef */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-784 /* 0xfffffcf0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-783 /* 0xfffffcf1 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-781 /* 0xfffffcf3 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-780 /* 0xfffffcf4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-779 /* 0xfffffcf5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-777 /* 0xfffffcf7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-776 /* 0xfffffcf8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-775 /* 0xfffffcf9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-774 /* 0xfffffcfa */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-773 /* 0xfffffcfb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-772 /* 0xfffffcfc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-771 /* 0xfffffcfd */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* s1a r5, r4, r4 */
+   },
+  {-770 /* 0xfffffcfe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-769 /* 0xfffffcff */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-768 /* 0xfffffd00 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-767 /* 0xfffffd01 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-766 /* 0xfffffd02 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-765 /* 0xfffffd03 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-764 /* 0xfffffd04 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {-763 /* 0xfffffd05 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-762 /* 0xfffffd06 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-761 /* 0xfffffd07 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-760 /* 0xfffffd08 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-759 /* 0xfffffd09 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-757 /* 0xfffffd0b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-756 /* 0xfffffd0c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-755 /* 0xfffffd0d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-753 /* 0xfffffd0f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-752 /* 0xfffffd10 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-751 /* 0xfffffd11 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-749 /* 0xfffffd13 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 1, 3},			/* s3a r4, r1, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-748 /* 0xfffffd14 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-744 /* 0xfffffd18 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-743 /* 0xfffffd19 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-741 /* 0xfffffd1b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-739 /* 0xfffffd1d */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-737 /* 0xfffffd1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-736 /* 0xfffffd20 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-735 /* 0xfffffd21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-732 /* 0xfffffd24 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-729 /* 0xfffffd27 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-728 /* 0xfffffd28 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-720 /* 0xfffffd30 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-719 /* 0xfffffd31 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-715 /* 0xfffffd35 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-712 /* 0xfffffd38 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-711 /* 0xfffffd39 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-708 /* 0xfffffd3c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-707 /* 0xfffffd3d */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-705 /* 0xfffffd3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-704 /* 0xfffffd40 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {-703 /* 0xfffffd41 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-701 /* 0xfffffd43 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-700 /* 0xfffffd44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-699 /* 0xfffffd45 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-695 /* 0xfffffd49 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-693 /* 0xfffffd4b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-692 /* 0xfffffd4c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-688 /* 0xfffffd50 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-680 /* 0xfffffd58 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-679 /* 0xfffffd59 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-675 /* 0xfffffd5d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-673 /* 0xfffffd5f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-672 /* 0xfffffd60 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-671 /* 0xfffffd61 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-667 /* 0xfffffd65 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-664 /* 0xfffffd68 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-660 /* 0xfffffd6c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-659 /* 0xfffffd6d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-657 /* 0xfffffd6f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-656 /* 0xfffffd70 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-655 /* 0xfffffd71 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-651 /* 0xfffffd75 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-650 /* 0xfffffd76 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-649 /* 0xfffffd77 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-648 /* 0xfffffd78 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-647 /* 0xfffffd79 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3},			/* shli r4, r3, 3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-645 /* 0xfffffd7b */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-644 /* 0xfffffd7c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-643 /* 0xfffffd7d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-642 /* 0xfffffd7e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-641 /* 0xfffffd7f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-640 /* 0xfffffd80 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 7}}			/* shli r4, r3, 7 */
+   },
+  {-639 /* 0xfffffd81 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-638 /* 0xfffffd82 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-637 /* 0xfffffd83 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-636 /* 0xfffffd84 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-635 /* 0xfffffd85 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-634 /* 0xfffffd86 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-633 /* 0xfffffd87 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-632 /* 0xfffffd88 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-631 /* 0xfffffd89 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-630 /* 0xfffffd8a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-629 /* 0xfffffd8b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-627 /* 0xfffffd8d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-625 /* 0xfffffd8f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-624 /* 0xfffffd90 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-620 /* 0xfffffd94 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-619 /* 0xfffffd95 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-616 /* 0xfffffd98 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-615 /* 0xfffffd99 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-613 /* 0xfffffd9b */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-612 /* 0xfffffd9c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-611 /* 0xfffffd9d */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-609 /* 0xfffffd9f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-608 /* 0xfffffda0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-607 /* 0xfffffda1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-604 /* 0xfffffda4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-603 /* 0xfffffda5 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-600 /* 0xfffffda8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-599 /* 0xfffffda9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-595 /* 0xfffffdad */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-594 /* 0xfffffdae */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-593 /* 0xfffffdaf */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-592 /* 0xfffffdb0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-591 /* 0xfffffdb1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-589 /* 0xfffffdb3 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-588 /* 0xfffffdb4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-585 /* 0xfffffdb7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-584 /* 0xfffffdb8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-583 /* 0xfffffdb9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-581 /* 0xfffffdbb */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-580 /* 0xfffffdbc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-579 /* 0xfffffdbd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-578 /* 0xfffffdbe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-577 /* 0xfffffdbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-576 /* 0xfffffdc0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {-575 /* 0xfffffdc1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-574 /* 0xfffffdc2 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-573 /* 0xfffffdc3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-572 /* 0xfffffdc4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-571 /* 0xfffffdc5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-569 /* 0xfffffdc7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-568 /* 0xfffffdc8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-567 /* 0xfffffdc9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-566 /* 0xfffffdca */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-565 /* 0xfffffdcb */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-564 /* 0xfffffdcc */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-563 /* 0xfffffdcd */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-561 /* 0xfffffdcf */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-560 /* 0xfffffdd0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-559 /* 0xfffffdd1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-558 /* 0xfffffdd2 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-557 /* 0xfffffdd3 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-556 /* 0xfffffdd4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-555 /* 0xfffffdd5 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-553 /* 0xfffffdd7 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-552 /* 0xfffffdd8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-551 /* 0xfffffdd9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-549 /* 0xfffffddb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-548 /* 0xfffffddc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-547 /* 0xfffffddd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-546 /* 0xfffffdde */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-545 /* 0xfffffddf */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-544 /* 0xfffffde0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-543 /* 0xfffffde1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-542 /* 0xfffffde2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-540 /* 0xfffffde4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-539 /* 0xfffffde5 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-536 /* 0xfffffde8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-535 /* 0xfffffde9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-533 /* 0xfffffdeb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-532 /* 0xfffffdec */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-531 /* 0xfffffded */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-530 /* 0xfffffdee */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-529 /* 0xfffffdef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-528 /* 0xfffffdf0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-527 /* 0xfffffdf1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-526 /* 0xfffffdf2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-525 /* 0xfffffdf3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-524 /* 0xfffffdf4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-523 /* 0xfffffdf5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-522 /* 0xfffffdf6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-521 /* 0xfffffdf7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-520 /* 0xfffffdf8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-519 /* 0xfffffdf9 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-518 /* 0xfffffdfa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-517 /* 0xfffffdfb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-516 /* 0xfffffdfc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-515 /* 0xfffffdfd */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-514 /* 0xfffffdfe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9},			/* shli r3, r2, 9 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-513 /* 0xfffffdff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-512 /* 0xfffffe00 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {-511 /* 0xfffffe01 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-510 /* 0xfffffe02 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-509 /* 0xfffffe03 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-508 /* 0xfffffe04 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-507 /* 0xfffffe05 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-506 /* 0xfffffe06 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-505 /* 0xfffffe07 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-504 /* 0xfffffe08 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-503 /* 0xfffffe09 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-502 /* 0xfffffe0a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-501 /* 0xfffffe0b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-500 /* 0xfffffe0c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-499 /* 0xfffffe0d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-498 /* 0xfffffe0e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-497 /* 0xfffffe0f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-496 /* 0xfffffe10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-495 /* 0xfffffe11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-494 /* 0xfffffe12 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-493 /* 0xfffffe13 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 1}}			/* s1a r5, r4, r1 */
+   },
+  {-492 /* 0xfffffe14 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-491 /* 0xfffffe15 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-489 /* 0xfffffe17 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-488 /* 0xfffffe18 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-487 /* 0xfffffe19 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-485 /* 0xfffffe1b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-484 /* 0xfffffe1c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 1, 3},			/* s2a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-483 /* 0xfffffe1d */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 1},			/* s1a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-482 /* 0xfffffe1e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-481 /* 0xfffffe1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-480 /* 0xfffffe20 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-479 /* 0xfffffe21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-478 /* 0xfffffe22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-477 /* 0xfffffe23 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-476 /* 0xfffffe24 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-475 /* 0xfffffe25 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-472 /* 0xfffffe28 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-471 /* 0xfffffe29 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-468 /* 0xfffffe2c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-467 /* 0xfffffe2d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-465 /* 0xfffffe2f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {-464 /* 0xfffffe30 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-463 /* 0xfffffe31 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-462 /* 0xfffffe32 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-460 /* 0xfffffe34 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-457 /* 0xfffffe37 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-456 /* 0xfffffe38 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-455 /* 0xfffffe39 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {-454 /* 0xfffffe3a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-453 /* 0xfffffe3b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-452 /* 0xfffffe3c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-451 /* 0xfffffe3d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-450 /* 0xfffffe3e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-449 /* 0xfffffe3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-448 /* 0xfffffe40 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-447 /* 0xfffffe41 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-446 /* 0xfffffe42 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-445 /* 0xfffffe43 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-444 /* 0xfffffe44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-443 /* 0xfffffe45 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-442 /* 0xfffffe46 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-441 /* 0xfffffe47 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-440 /* 0xfffffe48 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {-439 /* 0xfffffe49 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-438 /* 0xfffffe4a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-436 /* 0xfffffe4c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* s2a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-433 /* 0xfffffe4f */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-432 /* 0xfffffe50 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-431 /* 0xfffffe51 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-429 /* 0xfffffe53 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-428 /* 0xfffffe54 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-424 /* 0xfffffe58 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-423 /* 0xfffffe59 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-419 /* 0xfffffe5d */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-418 /* 0xfffffe5e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {-417 /* 0xfffffe5f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-416 /* 0xfffffe60 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-415 /* 0xfffffe61 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-413 /* 0xfffffe63 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-412 /* 0xfffffe64 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-411 /* 0xfffffe65 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-409 /* 0xfffffe67 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-408 /* 0xfffffe68 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-407 /* 0xfffffe69 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-405 /* 0xfffffe6b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-404 /* 0xfffffe6c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-403 /* 0xfffffe6d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-401 /* 0xfffffe6f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-400 /* 0xfffffe70 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-399 /* 0xfffffe71 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-397 /* 0xfffffe73 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-396 /* 0xfffffe74 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-395 /* 0xfffffe75 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-393 /* 0xfffffe77 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-392 /* 0xfffffe78 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-391 /* 0xfffffe79 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-390 /* 0xfffffe7a */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-389 /* 0xfffffe7b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-388 /* 0xfffffe7c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-387 /* 0xfffffe7d */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* s1a r5, r4, r4 */
+   },
+  {-386 /* 0xfffffe7e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-385 /* 0xfffffe7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-384 /* 0xfffffe80 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-383 /* 0xfffffe81 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-382 /* 0xfffffe82 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-381 /* 0xfffffe83 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-380 /* 0xfffffe84 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {-379 /* 0xfffffe85 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-378 /* 0xfffffe86 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-377 /* 0xfffffe87 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-376 /* 0xfffffe88 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-375 /* 0xfffffe89 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-373 /* 0xfffffe8b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-372 /* 0xfffffe8c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-371 /* 0xfffffe8d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-369 /* 0xfffffe8f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-368 /* 0xfffffe90 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-367 /* 0xfffffe91 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-365 /* 0xfffffe93 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 1, 3},			/* s3a r4, r1, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-364 /* 0xfffffe94 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-361 /* 0xfffffe97 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-360 /* 0xfffffe98 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-359 /* 0xfffffe99 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-357 /* 0xfffffe9b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-356 /* 0xfffffe9c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-355 /* 0xfffffe9d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3},			/* shli r4, r3, 3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-353 /* 0xfffffe9f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-352 /* 0xfffffea0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {-351 /* 0xfffffea1 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-349 /* 0xfffffea3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-348 /* 0xfffffea4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-347 /* 0xfffffea5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-344 /* 0xfffffea8 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-343 /* 0xfffffea9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-341 /* 0xfffffeab */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-340 /* 0xfffffeac */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-339 /* 0xfffffead */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-337 /* 0xfffffeaf */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-336 /* 0xfffffeb0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-335 /* 0xfffffeb1 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-333 /* 0xfffffeb3 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-332 /* 0xfffffeb4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-331 /* 0xfffffeb5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-330 /* 0xfffffeb6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-329 /* 0xfffffeb7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-328 /* 0xfffffeb8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-327 /* 0xfffffeb9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-325 /* 0xfffffebb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-324 /* 0xfffffebc */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-323 /* 0xfffffebd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-322 /* 0xfffffebe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-321 /* 0xfffffebf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-320 /* 0xfffffec0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {-319 /* 0xfffffec1 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-318 /* 0xfffffec2 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-317 /* 0xfffffec3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-316 /* 0xfffffec4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-315 /* 0xfffffec5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-314 /* 0xfffffec6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-313 /* 0xfffffec7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-312 /* 0xfffffec8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-311 /* 0xfffffec9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-310 /* 0xfffffeca */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-309 /* 0xfffffecb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-307 /* 0xfffffecd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-306 /* 0xfffffece */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-305 /* 0xfffffecf */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-304 /* 0xfffffed0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-303 /* 0xfffffed1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-301 /* 0xfffffed3 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-300 /* 0xfffffed4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-299 /* 0xfffffed5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-297 /* 0xfffffed7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-296 /* 0xfffffed8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-295 /* 0xfffffed9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-293 /* 0xfffffedb */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-292 /* 0xfffffedc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-291 /* 0xfffffedd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-290 /* 0xfffffede */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {-289 /* 0xfffffedf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-288 /* 0xfffffee0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {-287 /* 0xfffffee1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-286 /* 0xfffffee2 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-285 /* 0xfffffee3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-284 /* 0xfffffee4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-283 /* 0xfffffee5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-281 /* 0xfffffee7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-280 /* 0xfffffee8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-279 /* 0xfffffee9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-278 /* 0xfffffeea */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-277 /* 0xfffffeeb */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-276 /* 0xfffffeec */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-275 /* 0xfffffeed */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-274 /* 0xfffffeee */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-273 /* 0xfffffeef */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-272 /* 0xfffffef0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-271 /* 0xfffffef1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-270 /* 0xfffffef2 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-269 /* 0xfffffef3 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-268 /* 0xfffffef4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-267 /* 0xfffffef5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-266 /* 0xfffffef6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-265 /* 0xfffffef7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-264 /* 0xfffffef8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-263 /* 0xfffffef9 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-262 /* 0xfffffefa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-261 /* 0xfffffefb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-260 /* 0xfffffefc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-259 /* 0xfffffefd */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-258 /* 0xfffffefe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-257 /* 0xfffffeff */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-256 /* 0xffffff00 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {-255 /* 0xffffff01 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-254 /* 0xffffff02 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-253 /* 0xffffff03 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-252 /* 0xffffff04 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-251 /* 0xffffff05 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-250 /* 0xffffff06 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-249 /* 0xffffff07 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-248 /* 0xffffff08 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-247 /* 0xffffff09 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-246 /* 0xffffff0a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-245 /* 0xffffff0b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-244 /* 0xffffff0c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-243 /* 0xffffff0d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-242 /* 0xffffff0e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-241 /* 0xffffff0f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-240 /* 0xffffff10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-239 /* 0xffffff11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-238 /* 0xffffff12 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-237 /* 0xffffff13 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 1}}			/* s1a r5, r4, r1 */
+   },
+  {-236 /* 0xffffff14 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-235 /* 0xffffff15 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-234 /* 0xffffff16 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-233 /* 0xffffff17 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-232 /* 0xffffff18 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {-231 /* 0xffffff19 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {-230 /* 0xffffff1a */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-229 /* 0xffffff1b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-228 /* 0xffffff1c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-227 /* 0xffffff1d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-226 /* 0xffffff1e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-225 /* 0xffffff1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-224 /* 0xffffff20 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-223 /* 0xffffff21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-222 /* 0xffffff22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-221 /* 0xffffff23 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-220 /* 0xffffff24 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-219 /* 0xffffff25 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-218 /* 0xffffff26 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-217 /* 0xffffff27 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-216 /* 0xffffff28 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {-215 /* 0xffffff29 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-214 /* 0xffffff2a */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-213 /* 0xffffff2b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3},			/* shli r4, r3, 3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-212 /* 0xffffff2c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* s2a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-211 /* 0xffffff2d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-210 /* 0xffffff2e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {-209 /* 0xffffff2f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-208 /* 0xffffff30 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-207 /* 0xffffff31 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-205 /* 0xffffff33 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-204 /* 0xffffff34 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-203 /* 0xffffff35 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-202 /* 0xffffff36 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-201 /* 0xffffff37 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-200 /* 0xffffff38 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-199 /* 0xffffff39 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-198 /* 0xffffff3a */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-197 /* 0xffffff3b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-196 /* 0xffffff3c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-195 /* 0xffffff3d */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* s1a r5, r4, r4 */
+   },
+  {-194 /* 0xffffff3e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-193 /* 0xffffff3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-192 /* 0xffffff40 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-191 /* 0xffffff41 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-190 /* 0xffffff42 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-189 /* 0xffffff43 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-188 /* 0xffffff44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {-187 /* 0xffffff45 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-186 /* 0xffffff46 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-185 /* 0xffffff47 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-184 /* 0xffffff48 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-183 /* 0xffffff49 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-181 /* 0xffffff4b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-180 /* 0xffffff4c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-179 /* 0xffffff4d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-178 /* 0xffffff4e */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-177 /* 0xffffff4f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-176 /* 0xffffff50 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {-175 /* 0xffffff51 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-173 /* 0xffffff53 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-172 /* 0xffffff54 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-171 /* 0xffffff55 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-170 /* 0xffffff56 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-169 /* 0xffffff57 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-168 /* 0xffffff58 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-167 /* 0xffffff59 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-165 /* 0xffffff5b */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-164 /* 0xffffff5c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-163 /* 0xffffff5d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-162 /* 0xffffff5e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-161 /* 0xffffff5f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-160 /* 0xffffff60 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {-159 /* 0xffffff61 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-158 /* 0xffffff62 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-157 /* 0xffffff63 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-156 /* 0xffffff64 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-155 /* 0xffffff65 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-154 /* 0xffffff66 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-153 /* 0xffffff67 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {-152 /* 0xffffff68 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-151 /* 0xffffff69 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-150 /* 0xffffff6a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-149 /* 0xffffff6b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-148 /* 0xffffff6c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {-147 /* 0xffffff6d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-146 /* 0xffffff6e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-145 /* 0xffffff6f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-144 /* 0xffffff70 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {-143 /* 0xffffff71 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-142 /* 0xffffff72 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-141 /* 0xffffff73 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-140 /* 0xffffff74 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-139 /* 0xffffff75 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-138 /* 0xffffff76 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-137 /* 0xffffff77 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-136 /* 0xffffff78 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {-135 /* 0xffffff79 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-134 /* 0xffffff7a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-133 /* 0xffffff7b */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-132 /* 0xffffff7c */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-131 /* 0xffffff7d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-130 /* 0xffffff7e */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-129 /* 0xffffff7f */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-128 /* 0xffffff80 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {-127 /* 0xffffff81 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-126 /* 0xffffff82 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-125 /* 0xffffff83 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-124 /* 0xffffff84 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-123 /* 0xffffff85 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-122 /* 0xffffff86 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-121 /* 0xffffff87 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-120 /* 0xffffff88 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-119 /* 0xffffff89 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-118 /* 0xffffff8a */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-117 /* 0xffffff8b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-116 /* 0xffffff8c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-115 /* 0xffffff8d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-114 /* 0xffffff8e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-113 /* 0xffffff8f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-112 /* 0xffffff90 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-111 /* 0xffffff91 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-110 /* 0xffffff92 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-109 /* 0xffffff93 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-108 /* 0xffffff94 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-107 /* 0xffffff95 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {-106 /* 0xffffff96 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {-105 /* 0xffffff97 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-104 /* 0xffffff98 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {-103 /* 0xffffff99 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {-102 /* 0xffffff9a */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {-101 /* 0xffffff9b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-100 /* 0xffffff9c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-99 /* 0xffffff9d */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* s1a r5, r4, r4 */
+   },
+  {-98 /* 0xffffff9e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-97 /* 0xffffff9f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-96 /* 0xffffffa0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-95 /* 0xffffffa1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-94 /* 0xffffffa2 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-93 /* 0xffffffa3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-92 /* 0xffffffa4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {-91 /* 0xffffffa5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-90 /* 0xffffffa6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-89 /* 0xffffffa7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-88 /* 0xffffffa8 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-87 /* 0xffffffa9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-86 /* 0xffffffaa */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {2, 1, 4}}			/* sub r5, r1, r4 */
+   },
+  {-85 /* 0xffffffab */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {-84 /* 0xffffffac */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-83 /* 0xffffffad */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-82 /* 0xffffffae */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {-81 /* 0xffffffaf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {-80 /* 0xffffffb0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {-79 /* 0xffffffb1 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-78 /* 0xffffffb2 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-77 /* 0xffffffb3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 2, 4}}			/* sub r5, r2, r4 */
+   },
+  {-76 /* 0xffffffb4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-75 /* 0xffffffb5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-74 /* 0xffffffb6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-73 /* 0xffffffb7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {-72 /* 0xffffffb8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {-71 /* 0xffffffb9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-70 /* 0xffffffba */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-69 /* 0xffffffbb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {-68 /* 0xffffffbc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {-67 /* 0xffffffbd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-66 /* 0xffffffbe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-65 /* 0xffffffbf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-64 /* 0xffffffc0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {-63 /* 0xffffffc1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-62 /* 0xffffffc2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-61 /* 0xffffffc3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-60 /* 0xffffffc4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-59 /* 0xffffffc5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-58 /* 0xffffffc6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {-57 /* 0xffffffc7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-56 /* 0xffffffc8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-55 /* 0xffffffc9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-54 /* 0xffffffca */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-53 /* 0xffffffcb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {-52 /* 0xffffffcc */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {-51 /* 0xffffffcd */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {3, 4, 4}}			/* s1a r5, r4, r4 */
+   },
+  {-50 /* 0xffffffce */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-49 /* 0xffffffcf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-48 /* 0xffffffd0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-47 /* 0xffffffd1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-46 /* 0xffffffd2 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-45 /* 0xffffffd3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-44 /* 0xffffffd4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {-43 /* 0xffffffd5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {-42 /* 0xffffffd6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-41 /* 0xffffffd7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {-40 /* 0xffffffd8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {-39 /* 0xffffffd9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-38 /* 0xffffffda */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {-37 /* 0xffffffdb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {-36 /* 0xffffffdc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {-35 /* 0xffffffdd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-34 /* 0xffffffde */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {-33 /* 0xffffffdf */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-32 /* 0xffffffe0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {-31 /* 0xffffffe1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-30 /* 0xffffffe2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-29 /* 0xffffffe3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-28 /* 0xffffffe4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-27 /* 0xffffffe5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-26 /* 0xffffffe6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-25 /* 0xffffffe7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-24 /* 0xffffffe8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-23 /* 0xffffffe9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-22 /* 0xffffffea */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-21 /* 0xffffffeb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-20 /* 0xffffffec */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {-19 /* 0xffffffed */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {2, 1, 3}}			/* sub r4, r1, r3 */
+   },
+  {-18 /* 0xffffffee */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {-17 /* 0xffffffef */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-16 /* 0xfffffff0 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {-15 /* 0xfffffff1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-14 /* 0xfffffff2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-13 /* 0xfffffff3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-12 /* 0xfffffff4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-11 /* 0xfffffff5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-10 /* 0xfffffff6 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-9 /* 0xfffffff7 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 2, 2}}			/* s3a r3, r2, r2 */
+   },
+  {-8 /* 0xfffffff8 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {-7 /* 0xfffffff9 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-6 /* 0xfffffffa */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 3},			/* shli r3, r1, 3 */
+    {2, 2, 3}}			/* sub r4, r2, r3 */
+   },
+  {-5 /* 0xfffffffb */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 2, 2}}			/* s2a r3, r2, r2 */
+   },
+  {-4 /* 0xfffffffc */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {-3 /* 0xfffffffd */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2}}			/* sub r3, r1, r2 */
+   },
+  {-2 /* 0xfffffffe */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {-1 /* 0xffffffff */ ,
+   {{2, 0, 1}}			/* sub r2, zero, r1 */
+   },
+  {2 /* 0x2 */ ,
+   {{6, 1, 1}}			/* shli r2, r1, 1 */
+   },
+  {3 /* 0x3 */ ,
+   {{3, 1, 1}}			/* s1a r2, r1, r1 */
+   },
+  {4 /* 0x4 */ ,
+   {{6, 1, 2}}			/* shli r2, r1, 2 */
+   },
+  {5 /* 0x5 */ ,
+   {{4, 1, 1}}			/* s2a r2, r1, r1 */
+   },
+  {6 /* 0x6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {7 /* 0x7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8 /* 0x8 */ ,
+   {{6, 1, 3}}			/* shli r2, r1, 3 */
+   },
+  {9 /* 0x9 */ ,
+   {{5, 1, 1}}			/* s3a r2, r1, r1 */
+   },
+  {10 /* 0xa */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {11 /* 0xb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1}}			/* s1a r3, r2, r1 */
+   },
+  {12 /* 0xc */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {13 /* 0xd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1}}			/* s2a r3, r2, r1 */
+   },
+  {14 /* 0xe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {15 /* 0xf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {16 /* 0x10 */ ,
+   {{6, 1, 4}}			/* shli r2, r1, 4 */
+   },
+  {17 /* 0x11 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {18 /* 0x12 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1}}			/* shli r3, r2, 1 */
+   },
+  {19 /* 0x13 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1}}			/* s1a r3, r2, r1 */
+   },
+  {20 /* 0x14 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {21 /* 0x15 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1}}			/* s2a r3, r2, r1 */
+   },
+  {22 /* 0x16 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {23 /* 0x17 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {24 /* 0x18 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {25 /* 0x19 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1}}			/* s3a r3, r2, r1 */
+   },
+  {26 /* 0x1a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {27 /* 0x1b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2}}			/* s3a r3, r2, r2 */
+   },
+  {28 /* 0x1c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {29 /* 0x1d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {30 /* 0x1e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {31 /* 0x1f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {32 /* 0x20 */ ,
+   {{6, 1, 5}}			/* shli r2, r1, 5 */
+   },
+  {33 /* 0x21 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {34 /* 0x22 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {35 /* 0x23 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36 /* 0x24 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 2}}			/* shli r3, r2, 2 */
+   },
+  {37 /* 0x25 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1}}			/* s2a r3, r2, r1 */
+   },
+  {38 /* 0x26 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {39 /* 0x27 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {40 /* 0x28 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {41 /* 0x29 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1}}			/* s3a r3, r2, r1 */
+   },
+  {42 /* 0x2a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {43 /* 0x2b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {44 /* 0x2c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {45 /* 0x2d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2}}			/* s3a r3, r2, r2 */
+   },
+  {46 /* 0x2e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {47 /* 0x2f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {48 /* 0x30 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {49 /* 0x31 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {50 /* 0x32 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {51 /* 0x33 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {52 /* 0x34 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {53 /* 0x35 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {54 /* 0x36 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {55 /* 0x37 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {56 /* 0x38 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {57 /* 0x39 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {58 /* 0x3a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {59 /* 0x3b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {60 /* 0x3c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {61 /* 0x3d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {62 /* 0x3e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {63 /* 0x3f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {64 /* 0x40 */ ,
+   {{6, 1, 6}}			/* shli r2, r1, 6 */
+   },
+  {65 /* 0x41 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {66 /* 0x42 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {67 /* 0x43 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68 /* 0x44 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {69 /* 0x45 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {70 /* 0x46 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {71 /* 0x47 */ ,
+   {{2, 0, 1},			/* sub r2, zero, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {72 /* 0x48 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 3}}			/* shli r3, r2, 3 */
+   },
+  {73 /* 0x49 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1}}			/* s3a r3, r2, r1 */
+   },
+  {74 /* 0x4a */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {75 /* 0x4b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {76 /* 0x4c */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {77 /* 0x4d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {78 /* 0x4e */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {79 /* 0x4f */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {80 /* 0x50 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {81 /* 0x51 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2}}			/* s3a r3, r2, r2 */
+   },
+  {82 /* 0x52 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {83 /* 0x53 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 1}}			/* s1a r4, r3, r1 */
+   },
+  {84 /* 0x54 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {85 /* 0x55 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {86 /* 0x56 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {87 /* 0x57 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {88 /* 0x58 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {89 /* 0x59 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {90 /* 0x5a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {91 /* 0x5b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {3, 3, 1}}			/* s1a r4, r3, r1 */
+   },
+  {92 /* 0x5c */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {93 /* 0x5d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {94 /* 0x5e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {95 /* 0x5f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {96 /* 0x60 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {97 /* 0x61 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {98 /* 0x62 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {99 /* 0x63 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {100 /* 0x64 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {101 /* 0x65 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 1}}			/* s2a r4, r3, r1 */
+   },
+  {102 /* 0x66 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {103 /* 0x67 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {104 /* 0x68 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {105 /* 0x69 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {106 /* 0x6a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {107 /* 0x6b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {108 /* 0x6c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {109 /* 0x6d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 1}}			/* s2a r4, r3, r1 */
+   },
+  {110 /* 0x6e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {111 /* 0x6f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {112 /* 0x70 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {113 /* 0x71 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {114 /* 0x72 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {115 /* 0x73 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {116 /* 0x74 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {117 /* 0x75 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {118 /* 0x76 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {119 /* 0x77 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {120 /* 0x78 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {121 /* 0x79 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {122 /* 0x7a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {123 /* 0x7b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {124 /* 0x7c */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {125 /* 0x7d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {126 /* 0x7e */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {127 /* 0x7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {128 /* 0x80 */ ,
+   {{6, 1, 7}}			/* shli r2, r1, 7 */
+   },
+  {129 /* 0x81 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {130 /* 0x82 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {131 /* 0x83 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {132 /* 0x84 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {133 /* 0x85 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134 /* 0x86 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {135 /* 0x87 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {136 /* 0x88 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {137 /* 0x89 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {138 /* 0x8a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {139 /* 0x8b */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {140 /* 0x8c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {141 /* 0x8d */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {142 /* 0x8e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {143 /* 0x8f */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {144 /* 0x90 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4}}			/* shli r3, r2, 4 */
+   },
+  {145 /* 0x91 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {146 /* 0x92 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {147 /* 0x93 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 1}}			/* s1a r4, r3, r1 */
+   },
+  {148 /* 0x94 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {149 /* 0x95 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 1}}			/* s2a r4, r3, r1 */
+   },
+  {150 /* 0x96 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {151 /* 0x97 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {152 /* 0x98 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {153 /* 0x99 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {154 /* 0x9a */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {155 /* 0x9b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {156 /* 0x9c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {157 /* 0x9d */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {158 /* 0x9e */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {159 /* 0x9f */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {160 /* 0xa0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {161 /* 0xa1 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {162 /* 0xa2 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 1}}			/* shli r4, r3, 1 */
+   },
+  {163 /* 0xa3 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {3, 3, 1}}			/* s1a r4, r3, r1 */
+   },
+  {164 /* 0xa4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {165 /* 0xa5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {166 /* 0xa6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {167 /* 0xa7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {168 /* 0xa8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {169 /* 0xa9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {170 /* 0xaa */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {171 /* 0xab */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {172 /* 0xac */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {173 /* 0xad */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {174 /* 0xae */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {175 /* 0xaf */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {176 /* 0xb0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {177 /* 0xb1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {178 /* 0xb2 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {179 /* 0xb3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {180 /* 0xb4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 2}}			/* shli r4, r3, 2 */
+   },
+  {181 /* 0xb5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 1}}			/* s2a r4, r3, r1 */
+   },
+  {182 /* 0xb6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {183 /* 0xb7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {184 /* 0xb8 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {185 /* 0xb9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {186 /* 0xba */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {187 /* 0xbb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {188 /* 0xbc */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {189 /* 0xbd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {190 /* 0xbe */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {191 /* 0xbf */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {192 /* 0xc0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {193 /* 0xc1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {194 /* 0xc2 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {195 /* 0xc3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {196 /* 0xc4 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {197 /* 0xc5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {198 /* 0xc6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {199 /* 0xc7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {200 /* 0xc8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {201 /* 0xc9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {202 /* 0xca */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {203 /* 0xcb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {204 /* 0xcc */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {205 /* 0xcd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {206 /* 0xce */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {207 /* 0xcf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {208 /* 0xd0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {209 /* 0xd1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {210 /* 0xd2 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {211 /* 0xd3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {212 /* 0xd4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {213 /* 0xd5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {214 /* 0xd6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {215 /* 0xd7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {216 /* 0xd8 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {217 /* 0xd9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {218 /* 0xda */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {219 /* 0xdb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {220 /* 0xdc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {221 /* 0xdd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {222 /* 0xde */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {223 /* 0xdf */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {224 /* 0xe0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {225 /* 0xe1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {226 /* 0xe2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {227 /* 0xe3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {228 /* 0xe4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {229 /* 0xe5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {230 /* 0xe6 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {231 /* 0xe7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 8},			/* shli r4, r1, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {232 /* 0xe8 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {233 /* 0xe9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {234 /* 0xea */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {235 /* 0xeb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {236 /* 0xec */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {237 /* 0xed */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {238 /* 0xee */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {239 /* 0xef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {240 /* 0xf0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {241 /* 0xf1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {242 /* 0xf2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {243 /* 0xf3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {244 /* 0xf4 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {245 /* 0xf5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {246 /* 0xf6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {247 /* 0xf7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {248 /* 0xf8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {249 /* 0xf9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {250 /* 0xfa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {251 /* 0xfb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {252 /* 0xfc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {253 /* 0xfd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {254 /* 0xfe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {255 /* 0xff */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {256 /* 0x100 */ ,
+   {{6, 1, 8}}			/* shli r2, r1, 8 */
+   },
+  {257 /* 0x101 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {258 /* 0x102 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {259 /* 0x103 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {260 /* 0x104 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {261 /* 0x105 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262 /* 0x106 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {263 /* 0x107 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {264 /* 0x108 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {265 /* 0x109 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {266 /* 0x10a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {267 /* 0x10b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {268 /* 0x10c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {269 /* 0x10d */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {270 /* 0x10e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {271 /* 0x10f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {272 /* 0x110 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {273 /* 0x111 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {274 /* 0x112 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {275 /* 0x113 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {276 /* 0x114 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {277 /* 0x115 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {278 /* 0x116 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {279 /* 0x117 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {280 /* 0x118 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {281 /* 0x119 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {282 /* 0x11a */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {283 /* 0x11b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {284 /* 0x11c */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {285 /* 0x11d */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {286 /* 0x11e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {287 /* 0x11f */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {288 /* 0x120 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5}}			/* shli r3, r2, 5 */
+   },
+  {289 /* 0x121 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {290 /* 0x122 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {291 /* 0x123 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {292 /* 0x124 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {293 /* 0x125 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 1}}			/* s2a r4, r3, r1 */
+   },
+  {294 /* 0x126 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {295 /* 0x127 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {296 /* 0x128 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {297 /* 0x129 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {298 /* 0x12a */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {299 /* 0x12b */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {300 /* 0x12c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {301 /* 0x12d */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {302 /* 0x12e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {303 /* 0x12f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {304 /* 0x130 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {305 /* 0x131 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {306 /* 0x132 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {307 /* 0x133 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {308 /* 0x134 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {309 /* 0x135 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {310 /* 0x136 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {311 /* 0x137 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {312 /* 0x138 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {313 /* 0x139 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {314 /* 0x13a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {315 /* 0x13b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {316 /* 0x13c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {317 /* 0x13d */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {318 /* 0x13e */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {319 /* 0x13f */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {320 /* 0x140 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {321 /* 0x141 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {322 /* 0x142 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {323 /* 0x143 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {324 /* 0x144 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {325 /* 0x145 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {326 /* 0x146 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {327 /* 0x147 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {328 /* 0x148 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {329 /* 0x149 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {330 /* 0x14a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {331 /* 0x14b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {332 /* 0x14c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {333 /* 0x14d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {334 /* 0x14e */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {335 /* 0x14f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {336 /* 0x150 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {337 /* 0x151 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {338 /* 0x152 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {339 /* 0x153 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {340 /* 0x154 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {341 /* 0x155 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {342 /* 0x156 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {343 /* 0x157 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {344 /* 0x158 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {345 /* 0x159 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {346 /* 0x15a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {347 /* 0x15b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {348 /* 0x15c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 2},			/* shli r4, r2, 2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {349 /* 0x15d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {350 /* 0x15e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {351 /* 0x15f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {352 /* 0x160 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {353 /* 0x161 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {354 /* 0x162 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {355 /* 0x163 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {356 /* 0x164 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {357 /* 0x165 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {358 /* 0x166 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {359 /* 0x167 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {360 /* 0x168 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {361 /* 0x169 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {362 /* 0x16a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 1},			/* shli r4, r1, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {363 /* 0x16b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {364 /* 0x16c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 2},			/* shli r4, r1, 2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {365 /* 0x16d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {366 /* 0x16e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {367 /* 0x16f */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {368 /* 0x170 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {369 /* 0x171 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {370 /* 0x172 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {371 /* 0x173 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {372 /* 0x174 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {373 /* 0x175 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {374 /* 0x176 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {375 /* 0x177 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {376 /* 0x178 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {377 /* 0x179 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {378 /* 0x17a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {379 /* 0x17b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {380 /* 0x17c */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {381 /* 0x17d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {382 /* 0x17e */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {383 /* 0x17f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {384 /* 0x180 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {385 /* 0x181 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {386 /* 0x182 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {387 /* 0x183 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {388 /* 0x184 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {389 /* 0x185 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {390 /* 0x186 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {391 /* 0x187 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {392 /* 0x188 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {393 /* 0x189 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {394 /* 0x18a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {395 /* 0x18b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {396 /* 0x18c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {397 /* 0x18d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {398 /* 0x18e */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {399 /* 0x18f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {400 /* 0x190 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {401 /* 0x191 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {402 /* 0x192 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {403 /* 0x193 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {404 /* 0x194 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {4, 1, 2},			/* s2a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {405 /* 0x195 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {406 /* 0x196 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {407 /* 0x197 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {408 /* 0x198 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {409 /* 0x199 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {410 /* 0x19a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {411 /* 0x19b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {412 /* 0x19c */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {413 /* 0x19d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {414 /* 0x19e */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {415 /* 0x19f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {416 /* 0x1a0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {417 /* 0x1a1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {418 /* 0x1a2 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {419 /* 0x1a3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {420 /* 0x1a4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {421 /* 0x1a5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {422 /* 0x1a6 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {423 /* 0x1a7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {424 /* 0x1a8 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {425 /* 0x1a9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {426 /* 0x1aa */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {427 /* 0x1ab */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {428 /* 0x1ac */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {429 /* 0x1ad */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {430 /* 0x1ae */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {431 /* 0x1af */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {432 /* 0x1b0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {433 /* 0x1b1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {434 /* 0x1b2 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {435 /* 0x1b3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {436 /* 0x1b4 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {437 /* 0x1b5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {438 /* 0x1b6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {439 /* 0x1b7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {440 /* 0x1b8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {441 /* 0x1b9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {442 /* 0x1ba */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {443 /* 0x1bb */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {444 /* 0x1bc */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {445 /* 0x1bd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {446 /* 0x1be */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {447 /* 0x1bf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {448 /* 0x1c0 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {449 /* 0x1c1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {450 /* 0x1c2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {451 /* 0x1c3 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {452 /* 0x1c4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {453 /* 0x1c5 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {454 /* 0x1c6 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {455 /* 0x1c7 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {456 /* 0x1c8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {457 /* 0x1c9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {458 /* 0x1ca */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {459 /* 0x1cb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {460 /* 0x1cc */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {461 /* 0x1cd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {462 /* 0x1ce */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {463 /* 0x1cf */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {464 /* 0x1d0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {465 /* 0x1d1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {467 /* 0x1d3 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {468 /* 0x1d4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {469 /* 0x1d5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {470 /* 0x1d6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {471 /* 0x1d7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {472 /* 0x1d8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {473 /* 0x1d9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {475 /* 0x1db */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {476 /* 0x1dc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {477 /* 0x1dd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {478 /* 0x1de */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {479 /* 0x1df */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {480 /* 0x1e0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {481 /* 0x1e1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {482 /* 0x1e2 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {483 /* 0x1e3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {484 /* 0x1e4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {485 /* 0x1e5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {486 /* 0x1e6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {487 /* 0x1e7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {488 /* 0x1e8 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {489 /* 0x1e9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {490 /* 0x1ea */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {491 /* 0x1eb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {492 /* 0x1ec */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {493 /* 0x1ed */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {494 /* 0x1ee */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {495 /* 0x1ef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {496 /* 0x1f0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {497 /* 0x1f1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {498 /* 0x1f2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {499 /* 0x1f3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 1, 9},			/* shli r4, r1, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {500 /* 0x1f4 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {501 /* 0x1f5 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {502 /* 0x1f6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {503 /* 0x1f7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {504 /* 0x1f8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {505 /* 0x1f9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {506 /* 0x1fa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {507 /* 0x1fb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {508 /* 0x1fc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {509 /* 0x1fd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {510 /* 0x1fe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {511 /* 0x1ff */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {512 /* 0x200 */ ,
+   {{6, 1, 9}}			/* shli r2, r1, 9 */
+   },
+  {513 /* 0x201 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {514 /* 0x202 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {515 /* 0x203 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {516 /* 0x204 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {517 /* 0x205 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {518 /* 0x206 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {519 /* 0x207 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {520 /* 0x208 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {521 /* 0x209 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {522 /* 0x20a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {523 /* 0x20b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {524 /* 0x20c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {525 /* 0x20d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {526 /* 0x20e */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {527 /* 0x20f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {528 /* 0x210 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {529 /* 0x211 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {530 /* 0x212 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {531 /* 0x213 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {532 /* 0x214 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {533 /* 0x215 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {534 /* 0x216 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {535 /* 0x217 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {536 /* 0x218 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {537 /* 0x219 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {538 /* 0x21a */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {539 /* 0x21b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {540 /* 0x21c */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {541 /* 0x21d */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {542 /* 0x21e */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {543 /* 0x21f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {544 /* 0x220 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {545 /* 0x221 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {546 /* 0x222 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {547 /* 0x223 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {548 /* 0x224 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {549 /* 0x225 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {550 /* 0x226 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {551 /* 0x227 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {552 /* 0x228 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {553 /* 0x229 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {554 /* 0x22a */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {555 /* 0x22b */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {556 /* 0x22c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {557 /* 0x22d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {558 /* 0x22e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {559 /* 0x22f */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {560 /* 0x230 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {561 /* 0x231 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {562 /* 0x232 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {563 /* 0x233 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {564 /* 0x234 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {565 /* 0x235 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {566 /* 0x236 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {567 /* 0x237 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {568 /* 0x238 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {569 /* 0x239 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {570 /* 0x23a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {571 /* 0x23b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {572 /* 0x23c */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {573 /* 0x23d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {574 /* 0x23e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {575 /* 0x23f */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {576 /* 0x240 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6}}			/* shli r3, r2, 6 */
+   },
+  {577 /* 0x241 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {578 /* 0x242 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {579 /* 0x243 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {580 /* 0x244 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {581 /* 0x245 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {582 /* 0x246 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {583 /* 0x247 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {584 /* 0x248 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {585 /* 0x249 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {586 /* 0x24a */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {587 /* 0x24b */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {588 /* 0x24c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {589 /* 0x24d */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {590 /* 0x24e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {591 /* 0x24f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {592 /* 0x250 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {593 /* 0x251 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {594 /* 0x252 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {595 /* 0x253 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {596 /* 0x254 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {597 /* 0x255 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {598 /* 0x256 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {599 /* 0x257 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {600 /* 0x258 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {601 /* 0x259 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {602 /* 0x25a */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {603 /* 0x25b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {604 /* 0x25c */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {605 /* 0x25d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {606 /* 0x25e */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {607 /* 0x25f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {608 /* 0x260 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {609 /* 0x261 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {610 /* 0x262 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {611 /* 0x263 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {612 /* 0x264 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {613 /* 0x265 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {614 /* 0x266 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {615 /* 0x267 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {616 /* 0x268 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {617 /* 0x269 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 2},			/* shli r3, r1, 2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {619 /* 0x26b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {620 /* 0x26c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {621 /* 0x26d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {623 /* 0x26f */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {624 /* 0x270 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {625 /* 0x271 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {626 /* 0x272 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {627 /* 0x273 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {628 /* 0x274 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {629 /* 0x275 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {630 /* 0x276 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {631 /* 0x277 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {632 /* 0x278 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {633 /* 0x279 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {634 /* 0x27a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {635 /* 0x27b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {636 /* 0x27c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {637 /* 0x27d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {638 /* 0x27e */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 1},			/* shli r3, r1, 1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {639 /* 0x27f */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {640 /* 0x280 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {641 /* 0x281 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {642 /* 0x282 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {643 /* 0x283 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {644 /* 0x284 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {645 /* 0x285 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {646 /* 0x286 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {647 /* 0x287 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {648 /* 0x288 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 3}}			/* shli r4, r3, 3 */
+   },
+  {649 /* 0x289 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {650 /* 0x28a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {651 /* 0x28b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {652 /* 0x28c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {653 /* 0x28d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {654 /* 0x28e */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {655 /* 0x28f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {656 /* 0x290 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {657 /* 0x291 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {658 /* 0x292 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {659 /* 0x293 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {660 /* 0x294 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {661 /* 0x295 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {662 /* 0x296 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {663 /* 0x297 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {664 /* 0x298 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {665 /* 0x299 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {666 /* 0x29a */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 1},			/* shli r4, r2, 1 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {667 /* 0x29b */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {668 /* 0x29c */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {669 /* 0x29d */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 1},			/* s1a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {670 /* 0x29e */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {671 /* 0x29f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {672 /* 0x2a0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {673 /* 0x2a1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {674 /* 0x2a2 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {675 /* 0x2a3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {676 /* 0x2a4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {677 /* 0x2a5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {679 /* 0x2a7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 2, 4}}			/* s3a r5, r2, r4 */
+   },
+  {680 /* 0x2a8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {681 /* 0x2a9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {682 /* 0x2aa */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {684 /* 0x2ac */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 2},			/* shli r4, r2, 2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {685 /* 0x2ad */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {688 /* 0x2b0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 1, 1},			/* s2a r3, r1, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {689 /* 0x2b1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {690 /* 0x2b2 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {692 /* 0x2b4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {693 /* 0x2b5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 1},			/* s3a r3, r1, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {694 /* 0x2b6 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {695 /* 0x2b7 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {696 /* 0x2b8 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {697 /* 0x2b9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {699 /* 0x2bb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {700 /* 0x2bc */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {701 /* 0x2bd */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {702 /* 0x2be */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {703 /* 0x2bf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {704 /* 0x2c0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {705 /* 0x2c1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {706 /* 0x2c2 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {707 /* 0x2c3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {708 /* 0x2c4 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {709 /* 0x2c5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {710 /* 0x2c6 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {711 /* 0x2c7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {712 /* 0x2c8 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {713 /* 0x2c9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 4},			/* shli r3, r1, 4 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {714 /* 0x2ca */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {715 /* 0x2cb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {716 /* 0x2cc */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {717 /* 0x2cd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {719 /* 0x2cf */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {2, 4, 1}}			/* sub r5, r4, r1 */
+   },
+  {720 /* 0x2d0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4}}			/* shli r4, r3, 4 */
+   },
+  {721 /* 0x2d1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {722 /* 0x2d2 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {723 /* 0x2d3 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {724 /* 0x2d4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {725 /* 0x2d5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 4},			/* shli r4, r3, 4 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {726 /* 0x2d6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {727 /* 0x2d7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {728 /* 0x2d8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {729 /* 0x2d9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3}}			/* s3a r4, r3, r3 */
+   },
+  {730 /* 0x2da */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {731 /* 0x2db */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 1}}			/* s1a r5, r4, r1 */
+   },
+  {733 /* 0x2dd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {735 /* 0x2df */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {736 /* 0x2e0 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {737 /* 0x2e1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {738 /* 0x2e2 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {739 /* 0x2e3 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {740 /* 0x2e4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {741 /* 0x2e5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {743 /* 0x2e7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {744 /* 0x2e8 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {745 /* 0x2e9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {747 /* 0x2eb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {748 /* 0x2ec */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {749 /* 0x2ed */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {750 /* 0x2ee */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {752 /* 0x2f0 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {753 /* 0x2f1 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {755 /* 0x2f3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {756 /* 0x2f4 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 2},			/* shli r3, r2, 2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {757 /* 0x2f5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {758 /* 0x2f6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {759 /* 0x2f7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {760 /* 0x2f8 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {761 /* 0x2f9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {762 /* 0x2fa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 1},			/* shli r3, r2, 1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {763 /* 0x2fb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {764 /* 0x2fc */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {765 /* 0x2fd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {766 /* 0x2fe */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {767 /* 0x2ff */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {768 /* 0x300 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {769 /* 0x301 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {770 /* 0x302 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {771 /* 0x303 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {772 /* 0x304 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {773 /* 0x305 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {774 /* 0x306 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {775 /* 0x307 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {776 /* 0x308 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {777 /* 0x309 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {778 /* 0x30a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {779 /* 0x30b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {780 /* 0x30c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {781 /* 0x30d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {782 /* 0x30e */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {783 /* 0x30f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {784 /* 0x310 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {3, 3, 2}}			/* s1a r4, r3, r2 */
+   },
+  {785 /* 0x311 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 1, 2},			/* s3a r4, r1, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {786 /* 0x312 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {787 /* 0x313 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {3, 1, 3},			/* s1a r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {788 /* 0x314 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {789 /* 0x315 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 2, 4}}			/* s3a r5, r2, r4 */
+   },
+  {790 /* 0x316 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {791 /* 0x317 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {792 /* 0x318 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {793 /* 0x319 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {794 /* 0x31a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {795 /* 0x31b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {796 /* 0x31c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {797 /* 0x31d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {798 /* 0x31e */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {799 /* 0x31f */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {800 /* 0x320 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {801 /* 0x321 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {802 /* 0x322 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {803 /* 0x323 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {804 /* 0x324 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {4, 1, 2},			/* s2a r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {805 /* 0x325 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {806 /* 0x326 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {807 /* 0x327 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {808 /* 0x328 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {809 /* 0x329 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {810 /* 0x32a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {811 /* 0x32b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {812 /* 0x32c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {813 /* 0x32d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {815 /* 0x32f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {816 /* 0x330 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {817 /* 0x331 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {818 /* 0x332 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {819 /* 0x333 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {820 /* 0x334 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {821 /* 0x335 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {822 /* 0x336 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {823 /* 0x337 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {824 /* 0x338 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {825 /* 0x339 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {827 /* 0x33b */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {828 /* 0x33c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {829 /* 0x33d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {831 /* 0x33f */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {832 /* 0x340 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {833 /* 0x341 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {834 /* 0x342 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 2},			/* s1a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {835 /* 0x343 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {836 /* 0x344 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 2},			/* s3a r4, r2, r2 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {837 /* 0x345 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {838 /* 0x346 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {840 /* 0x348 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {841 /* 0x349 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {842 /* 0x34a */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {843 /* 0x34b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {844 /* 0x34c */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {845 /* 0x34d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {846 /* 0x34e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {848 /* 0x350 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {849 /* 0x351 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {850 /* 0x352 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {852 /* 0x354 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {853 /* 0x355 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {855 /* 0x357 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {856 /* 0x358 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {857 /* 0x359 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {858 /* 0x35a */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {859 /* 0x35b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {861 /* 0x35d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {862 /* 0x35e */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {863 /* 0x35f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 1}}			/* sub r5, r4, r1 */
+   },
+  {864 /* 0x360 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {865 /* 0x361 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 1, 4}}			/* add r5, r1, r4 */
+   },
+  {866 /* 0x362 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {867 /* 0x363 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 2, 4}}			/* add r5, r2, r4 */
+   },
+  {868 /* 0x364 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {869 /* 0x365 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {870 /* 0x366 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {871 /* 0x367 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {872 /* 0x368 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {873 /* 0x369 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {875 /* 0x36b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {876 /* 0x36c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {877 /* 0x36d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {879 /* 0x36f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {880 /* 0x370 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {881 /* 0x371 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 2},			/* s1a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {882 /* 0x372 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {884 /* 0x374 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {885 /* 0x375 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {888 /* 0x378 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {889 /* 0x379 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {2, 2, 1},			/* sub r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {890 /* 0x37a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {891 /* 0x37b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {892 /* 0x37c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {893 /* 0x37d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {894 /* 0x37e */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {895 /* 0x37f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {896 /* 0x380 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {897 /* 0x381 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {898 /* 0x382 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {899 /* 0x383 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {900 /* 0x384 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {901 /* 0x385 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {902 /* 0x386 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {903 /* 0x387 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {904 /* 0x388 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {905 /* 0x389 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {2, 1, 2},			/* sub r4, r1, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {906 /* 0x38a */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {908 /* 0x38c */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {909 /* 0x38d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {910 /* 0x38e */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {911 /* 0x38f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {912 /* 0x390 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {913 /* 0x391 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 1, 2},			/* s1a r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {915 /* 0x393 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {916 /* 0x394 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {917 /* 0x395 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {918 /* 0x396 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {919 /* 0x397 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {920 /* 0x398 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {924 /* 0x39c */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {925 /* 0x39d */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {2, 1, 3},			/* sub r4, r1, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {927 /* 0x39f */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {928 /* 0x3a0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {929 /* 0x3a1 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 1, 2},			/* s2a r4, r1, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {932 /* 0x3a4 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {935 /* 0x3a7 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {936 /* 0x3a8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {937 /* 0x3a9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {939 /* 0x3ab */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {941 /* 0x3ad */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {943 /* 0x3af */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {944 /* 0x3b0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {945 /* 0x3b1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 3, 4}}			/* sub r5, r3, r4 */
+   },
+  {948 /* 0x3b4 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {949 /* 0x3b5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {950 /* 0x3b6 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {951 /* 0x3b7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {952 /* 0x3b8 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {953 /* 0x3b9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {955 /* 0x3bb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {4, 4, 4}}			/* s2a r5, r4, r4 */
+   },
+  {956 /* 0x3bc */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {957 /* 0x3bd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {958 /* 0x3be */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {959 /* 0x3bf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {960 /* 0x3c0 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {961 /* 0x3c1 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {962 /* 0x3c2 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {963 /* 0x3c3 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {964 /* 0x3c4 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {965 /* 0x3c5 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {966 /* 0x3c6 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {967 /* 0x3c7 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {968 /* 0x3c8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {969 /* 0x3c9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {3, 2, 1},			/* s1a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {970 /* 0x3ca */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {971 /* 0x3cb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {972 /* 0x3cc */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {973 /* 0x3cd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {975 /* 0x3cf */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {6, 3, 6},			/* shli r4, r3, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {976 /* 0x3d0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {979 /* 0x3d3 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {980 /* 0x3d4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {981 /* 0x3d5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {983 /* 0x3d7 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {984 /* 0x3d8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {985 /* 0x3d9 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {987 /* 0x3db */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {988 /* 0x3dc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {989 /* 0x3dd */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {990 /* 0x3de */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {991 /* 0x3df */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {992 /* 0x3e0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {993 /* 0x3e1 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {994 /* 0x3e2 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {996 /* 0x3e4 */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {997 /* 0x3e5 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {999 /* 0x3e7 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1000 /* 0x3e8 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {1001 /* 0x3e9 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {4, 2, 1},			/* s2a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {1003 /* 0x3eb */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {1004 /* 0x3ec */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {1005 /* 0x3ed */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {1006 /* 0x3ee */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1007 /* 0x3ef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1008 /* 0x3f0 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1009 /* 0x3f1 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {1010 /* 0x3f2 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {1011 /* 0x3f3 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 1, 10},			/* shli r4, r1, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1012 /* 0x3f4 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 2}}			/* shli r5, r4, 2 */
+   },
+  {1013 /* 0x3f5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {1014 /* 0x3f6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1015 /* 0x3f7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1016 /* 0x3f8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1017 /* 0x3f9 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {1018 /* 0x3fa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {1019 /* 0x3fb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1020 /* 0x3fc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1021 /* 0x3fd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1022 /* 0x3fe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1023 /* 0x3ff */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1024 /* 0x400 */ ,
+   {{6, 1, 10}}			/* shli r2, r1, 10 */
+   },
+  {1025 /* 0x401 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1026 /* 0x402 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {1027 /* 0x403 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1028 /* 0x404 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {1029 /* 0x405 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1030 /* 0x406 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {1031 /* 0x407 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {1032 /* 0x408 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {1033 /* 0x409 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1034 /* 0x40a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {1040 /* 0x410 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1049 /* 0x419 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {1056 /* 0x420 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1059 /* 0x423 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 2, 1},			/* s3a r4, r2, r1 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {1088 /* 0x440 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1100 /* 0x44c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {1104 /* 0x450 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {1108 /* 0x454 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {1136 /* 0x470 */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {1152 /* 0x480 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 7}}			/* shli r3, r2, 7 */
+   },
+  {1200 /* 0x4b0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1204 /* 0x4b4 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {1242 /* 0x4da */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {1280 /* 0x500 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {1292 /* 0x50c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {1304 /* 0x518 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {1312 /* 0x520 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {1320 /* 0x528 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {1336 /* 0x538 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {1344 /* 0x540 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {1348 /* 0x544 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {1360 /* 0x550 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {1364 /* 0x554 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {1395 /* 0x573 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5},			/* shli r4, r3, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1448 /* 0x5a8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {1460 /* 0x5b4 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {1461 /* 0x5b5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {1472 /* 0x5c0 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 5},			/* shli r3, r1, 5 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {1488 /* 0x5d0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1512 /* 0x5e8 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 3},			/* shli r3, r2, 3 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1536 /* 0x600 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {1568 /* 0x620 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {1576 /* 0x628 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {1649 /* 0x671 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {1664 /* 0x680 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 7}}			/* shli r4, r3, 7 */
+   },
+  {1684 /* 0x694 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {3, 2, 3},			/* s1a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {1696 /* 0x6a0 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {4, 2, 2},			/* s2a r4, r2, r2 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {1744 /* 0x6d0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {1812 /* 0x714 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {2000 /* 0x7d0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {2012 /* 0x7dc */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2014 /* 0x7de */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2037 /* 0x7f5 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {4, 4, 1}}			/* s2a r5, r4, r1 */
+   },
+  {2038 /* 0x7f6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2039 /* 0x7f7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2040 /* 0x7f8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2041 /* 0x7f9 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {2042 /* 0x7fa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {2043 /* 0x7fb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2044 /* 0x7fc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2045 /* 0x7fd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2046 /* 0x7fe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2047 /* 0x7ff */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2048 /* 0x800 */ ,
+   {{6, 1, 11}}			/* shli r2, r1, 11 */
+   },
+  {2049 /* 0x801 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2050 /* 0x802 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {2051 /* 0x803 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2052 /* 0x804 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {2053 /* 0x805 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2054 /* 0x806 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {2055 /* 0x807 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {2056 /* 0x808 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {2057 /* 0x809 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2058 /* 0x80a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {2064 /* 0x810 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2080 /* 0x820 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2100 /* 0x834 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 1, 11},			/* shli r4, r1, 11 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {2112 /* 0x840 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2144 /* 0x860 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {2176 /* 0x880 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2184 /* 0x888 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {2304 /* 0x900 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 8}}			/* shli r3, r2, 8 */
+   },
+  {2320 /* 0x910 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {5, 3, 2}}			/* s3a r4, r3, r2 */
+   },
+  {2408 /* 0x968 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {2560 /* 0xa00 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {2578 /* 0xa12 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {2592 /* 0xa20 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 5}}			/* shli r4, r3, 5 */
+   },
+  {2633 /* 0xa49 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {2704 /* 0xa90 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {2730 /* 0xaaa */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 5},			/* shli r3, r2, 5 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {2880 /* 0xb40 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 6}}			/* shli r4, r3, 6 */
+   },
+  {2896 /* 0xb50 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {3072 /* 0xc00 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {3079 /* 0xc07 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {3112 /* 0xc28 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {3192 /* 0xc78 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 2},			/* s2a r3, r2, r2 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {3240 /* 0xca8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {6, 4, 3}}			/* shli r5, r4, 3 */
+   },
+  {3264 /* 0xcc0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {3328 /* 0xd00 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 8}}			/* shli r4, r3, 8 */
+   },
+  {3453 /* 0xd7d */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {3529 /* 0xdc9 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3},			/* s3a r4, r2, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {3580 /* 0xdfc */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {3600 /* 0xe10 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 2, 3},			/* sub r4, r2, r3 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {3624 /* 0xe28 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {4000 /* 0xfa0 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 5}}			/* shli r5, r4, 5 */
+   },
+  {4025 /* 0xfb9 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {4073 /* 0xfe9 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 4, 1}}			/* s3a r5, r4, r1 */
+   },
+  {4086 /* 0xff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {4087 /* 0xff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4088 /* 0xff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4089 /* 0xff9 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {4090 /* 0xffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 11},			/* shli r3, r1, 11 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {4091 /* 0xffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4092 /* 0xffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4093 /* 0xffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4094 /* 0xffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4095 /* 0xfff */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4096 /* 0x1000 */ ,
+   {{6, 1, 12}}			/* shli r2, r1, 12 */
+   },
+  {4097 /* 0x1001 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4098 /* 0x1002 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {4099 /* 0x1003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4100 /* 0x1004 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {4101 /* 0x1005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4102 /* 0x1006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {4103 /* 0x1007 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {4104 /* 0x1008 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {4105 /* 0x1009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4106 /* 0x100a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {4108 /* 0x100c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {4112 /* 0x1010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4128 /* 0x1020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4160 /* 0x1040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4184 /* 0x1058 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {4224 /* 0x1080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4352 /* 0x1100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4369 /* 0x1111 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {4376 /* 0x1118 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {4416 /* 0x1140 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {1, 2, 3},			/* add r4, r2, r3 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {4433 /* 0x1151 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {4482 /* 0x1182 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {5, 3, 1},			/* s3a r4, r3, r1 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {4608 /* 0x1200 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 9}}			/* shli r3, r2, 9 */
+   },
+  {4712 /* 0x1268 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {5, 3, 4}}			/* s3a r5, r3, r4 */
+   },
+  {4864 /* 0x1300 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {3, 2, 1},			/* s1a r3, r2, r1 */
+    {6, 3, 8}}			/* shli r4, r3, 8 */
+   },
+  {5000 /* 0x1388 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {5040 /* 0x13b0 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 4},			/* shli r3, r2, 4 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {5120 /* 0x1400 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {5243 /* 0x147b */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 7},			/* shli r4, r3, 7 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {5285 /* 0x14a5 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 7},			/* shli r3, r2, 7 */
+    {4, 2, 3},			/* s2a r4, r2, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {5329 /* 0x14d1 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {6144 /* 0x1800 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {6164 /* 0x1814 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 11},			/* shli r4, r2, 11 */
+    {4, 3, 4}}			/* s2a r5, r3, r4 */
+   },
+  {6400 /* 0x1900 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 8}}			/* shli r4, r3, 8 */
+   },
+  {6406 /* 0x1906 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {3, 3, 3},			/* s1a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {6656 /* 0x1a00 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {4, 2, 1},			/* s2a r3, r2, r1 */
+    {6, 3, 9}}			/* shli r4, r3, 9 */
+   },
+  {7040 /* 0x1b80 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 6},			/* shli r3, r1, 6 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 7}}			/* shli r5, r4, 7 */
+   },
+  {7169 /* 0x1c01 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {5, 2, 3}}			/* s3a r4, r2, r3 */
+   },
+  {7678 /* 0x1dfe */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {7682 /* 0x1e02 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 1, 2},			/* sub r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {3, 3, 4}}			/* s1a r5, r3, r4 */
+   },
+  {8000 /* 0x1f40 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 6}}			/* shli r5, r4, 6 */
+   },
+  {8048 /* 0x1f70 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 4}}			/* shli r5, r4, 4 */
+   },
+  {8182 /* 0x1ff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {8183 /* 0x1ff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8184 /* 0x1ff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8185 /* 0x1ff9 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {8186 /* 0x1ffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 12},			/* shli r3, r1, 12 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {8187 /* 0x1ffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8188 /* 0x1ffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8189 /* 0x1ffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8190 /* 0x1ffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8191 /* 0x1fff */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8192 /* 0x2000 */ ,
+   {{6, 1, 13}}			/* shli r2, r1, 13 */
+   },
+  {8193 /* 0x2001 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {8194 /* 0x2002 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {8195 /* 0x2003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8196 /* 0x2004 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {8197 /* 0x2005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8198 /* 0x2006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {8199 /* 0x2007 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {8200 /* 0x2008 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {8201 /* 0x2009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8202 /* 0x200a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {8203 /* 0x200b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 10},			/* shli r3, r1, 10 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {8204 /* 0x200c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {4, 2, 3}}			/* s2a r4, r2, r3 */
+   },
+  {8208 /* 0x2010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8224 /* 0x2020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8256 /* 0x2040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8320 /* 0x2080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8448 /* 0x2100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8520 /* 0x2148 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 8},			/* shli r3, r1, 8 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {8640 /* 0x21c0 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {8672 /* 0x21e0 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 3, 8},			/* shli r4, r3, 8 */
+    {2, 4, 2}}			/* sub r5, r4, r2 */
+   },
+  {8704 /* 0x2200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8716 /* 0x220c */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {4, 3, 2},			/* s2a r4, r3, r2 */
+    {4, 4, 3}}			/* s2a r5, r4, r3 */
+   },
+  {8728 /* 0x2218 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 9},			/* shli r3, r1, 9 */
+    {3, 3, 2},			/* s1a r4, r3, r2 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {9216 /* 0x2400 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 10}}			/* shli r3, r2, 10 */
+   },
+  {9217 /* 0x2401 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 10},			/* shli r3, r2, 10 */
+    {1, 1, 3}}			/* add r4, r1, r3 */
+   },
+  {9746 /* 0x2612 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {5, 3, 3},			/* s3a r4, r3, r3 */
+    {3, 4, 2}}			/* s1a r5, r4, r2 */
+   },
+  {9810 /* 0x2652 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 6},			/* shli r3, r2, 6 */
+    {5, 3, 2},			/* s3a r4, r3, r2 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {10240 /* 0x2800 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {10248 /* 0x2808 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {4, 3, 2}}			/* s2a r4, r3, r2 */
+   },
+  {10368 /* 0x2880 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 7}}			/* shli r4, r3, 7 */
+   },
+  {10456 /* 0x28d8 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {5, 4, 2}}			/* s3a r5, r4, r2 */
+   },
+  {10832 /* 0x2a50 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {4, 1, 2},			/* s2a r3, r1, r2 */
+    {4, 3, 3},			/* s2a r4, r3, r3 */
+    {4, 4, 2}}			/* s2a r5, r4, r2 */
+   },
+  {11512 /* 0x2cf8 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {2, 3, 1},			/* sub r4, r3, r1 */
+    {5, 4, 3}}			/* s3a r5, r4, r3 */
+   },
+  {11522 /* 0x2d02 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {4, 3, 1},			/* s2a r4, r3, r1 */
+    {3, 4, 3}}			/* s1a r5, r4, r3 */
+   },
+  {11529 /* 0x2d09 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 8},			/* shli r3, r2, 8 */
+    {1, 1, 3},			/* add r4, r1, r3 */
+    {5, 4, 4}}			/* s3a r5, r4, r4 */
+   },
+  {12288 /* 0x3000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {12299 /* 0x300b */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 1, 2},			/* s3a r3, r1, r2 */
+    {6, 2, 12},			/* shli r4, r2, 12 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {16000 /* 0x3e80 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 7},			/* shli r3, r1, 7 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 7}}			/* shli r5, r4, 7 */
+   },
+  {16374 /* 0x3ff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 11},			/* shli r4, r2, 11 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {16375 /* 0x3ff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16376 /* 0x3ff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16377 /* 0x3ff9 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {16378 /* 0x3ffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 13},			/* shli r3, r1, 13 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {16379 /* 0x3ffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16380 /* 0x3ffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16381 /* 0x3ffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16382 /* 0x3ffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16383 /* 0x3fff */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {16384 /* 0x4000 */ ,
+   {{6, 1, 14}}			/* shli r2, r1, 14 */
+   },
+  {16385 /* 0x4001 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {16386 /* 0x4002 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {16387 /* 0x4003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16388 /* 0x4004 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {16389 /* 0x4005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16390 /* 0x4006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {16391 /* 0x4007 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {16392 /* 0x4008 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {16393 /* 0x4009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16394 /* 0x400a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {16400 /* 0x4010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16416 /* 0x4020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16448 /* 0x4040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16496 /* 0x4070 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {5, 2, 4}}			/* s3a r5, r2, r4 */
+   },
+  {16512 /* 0x4080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16640 /* 0x4100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16896 /* 0x4200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17408 /* 0x4400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18432 /* 0x4800 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 11}}			/* shli r3, r2, 11 */
+   },
+  {20480 /* 0x5000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {24576 /* 0x6000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {25600 /* 0x6400 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {5, 2, 1},			/* s3a r3, r2, r1 */
+    {6, 3, 10}}			/* shli r4, r3, 10 */
+   },
+  {32758 /* 0x7ff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 12},			/* shli r4, r2, 12 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {32759 /* 0x7ff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32760 /* 0x7ff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32761 /* 0x7ff9 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {32762 /* 0x7ffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 14},			/* shli r3, r1, 14 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {32763 /* 0x7ffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32764 /* 0x7ffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32765 /* 0x7ffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32766 /* 0x7ffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {32767 /* 0x7fff */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {32768 /* 0x8000 */ ,
+   {{6, 1, 15}}			/* shli r2, r1, 15 */
+   },
+  {32769 /* 0x8001 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {32770 /* 0x8002 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {32771 /* 0x8003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32772 /* 0x8004 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {32773 /* 0x8005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32774 /* 0x8006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {32775 /* 0x8007 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {32776 /* 0x8008 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {32777 /* 0x8009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32778 /* 0x800a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {32784 /* 0x8010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32800 /* 0x8020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32832 /* 0x8040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {32896 /* 0x8080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33024 /* 0x8100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33280 /* 0x8200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33792 /* 0x8400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34816 /* 0x8800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {36864 /* 0x9000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 12}}			/* shli r3, r2, 12 */
+   },
+  {40960 /* 0xa000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {49152 /* 0xc000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {65526 /* 0xfff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 13},			/* shli r4, r2, 13 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {65527 /* 0xfff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65528 /* 0xfff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65529 /* 0xfff9 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {65530 /* 0xfffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 15},			/* shli r3, r1, 15 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {65531 /* 0xfffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65532 /* 0xfffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65533 /* 0xfffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65534 /* 0xfffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {65535 /* 0xffff */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {65536 /* 0x10000 */ ,
+   {{6, 1, 16}}			/* shli r2, r1, 16 */
+   },
+  {65537 /* 0x10001 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {65538 /* 0x10002 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {65539 /* 0x10003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65540 /* 0x10004 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {65541 /* 0x10005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65542 /* 0x10006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {65543 /* 0x10007 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {65544 /* 0x10008 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {65545 /* 0x10009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65546 /* 0x1000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {65552 /* 0x10010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65568 /* 0x10020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65600 /* 0x10040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65664 /* 0x10080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {65792 /* 0x10100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {66048 /* 0x10200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {66560 /* 0x10400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67584 /* 0x10800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {69632 /* 0x11000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {73728 /* 0x12000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 13}}			/* shli r3, r2, 13 */
+   },
+  {81920 /* 0x14000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {98304 /* 0x18000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {131062 /* 0x1fff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 14},			/* shli r4, r2, 14 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {131063 /* 0x1fff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131064 /* 0x1fff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131065 /* 0x1fff9 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {131066 /* 0x1fffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 16},			/* shli r3, r1, 16 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {131067 /* 0x1fffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131068 /* 0x1fffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131069 /* 0x1fffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131070 /* 0x1fffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {131071 /* 0x1ffff */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {131072 /* 0x20000 */ ,
+   {{6, 1, 17}}			/* shli r2, r1, 17 */
+   },
+  {131073 /* 0x20001 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {131074 /* 0x20002 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {131075 /* 0x20003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131076 /* 0x20004 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {131077 /* 0x20005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131078 /* 0x20006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {131079 /* 0x20007 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {131080 /* 0x20008 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {131081 /* 0x20009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131082 /* 0x2000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {131088 /* 0x20010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131104 /* 0x20020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131136 /* 0x20040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131200 /* 0x20080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131328 /* 0x20100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {131584 /* 0x20200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {132096 /* 0x20400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {133120 /* 0x20800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {135168 /* 0x21000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {139264 /* 0x22000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {147456 /* 0x24000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 14}}			/* shli r3, r2, 14 */
+   },
+  {163840 /* 0x28000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {196608 /* 0x30000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {262134 /* 0x3fff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 15},			/* shli r4, r2, 15 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {262135 /* 0x3fff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262136 /* 0x3fff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262137 /* 0x3fff9 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {262138 /* 0x3fffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 17},			/* shli r3, r1, 17 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {262139 /* 0x3fffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262140 /* 0x3fffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262141 /* 0x3fffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262142 /* 0x3fffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {262143 /* 0x3ffff */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {262144 /* 0x40000 */ ,
+   {{6, 1, 18}}			/* shli r2, r1, 18 */
+   },
+  {262145 /* 0x40001 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {262146 /* 0x40002 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {262147 /* 0x40003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262148 /* 0x40004 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {262149 /* 0x40005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262150 /* 0x40006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {262151 /* 0x40007 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {262152 /* 0x40008 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {262153 /* 0x40009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262154 /* 0x4000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {262160 /* 0x40010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262176 /* 0x40020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262208 /* 0x40040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262272 /* 0x40080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262400 /* 0x40100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {262656 /* 0x40200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {263168 /* 0x40400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {264192 /* 0x40800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {266240 /* 0x41000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {270336 /* 0x42000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {278528 /* 0x44000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {294912 /* 0x48000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 15}}			/* shli r3, r2, 15 */
+   },
+  {327680 /* 0x50000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {393216 /* 0x60000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {524278 /* 0x7fff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 16},			/* shli r4, r2, 16 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {524279 /* 0x7fff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524280 /* 0x7fff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524281 /* 0x7fff9 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {524282 /* 0x7fffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 18},			/* shli r3, r1, 18 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {524283 /* 0x7fffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524284 /* 0x7fffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524285 /* 0x7fffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524286 /* 0x7fffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {524287 /* 0x7ffff */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {524288 /* 0x80000 */ ,
+   {{6, 1, 19}}			/* shli r2, r1, 19 */
+   },
+  {524289 /* 0x80001 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {524290 /* 0x80002 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {524291 /* 0x80003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524292 /* 0x80004 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {524293 /* 0x80005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524294 /* 0x80006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {524295 /* 0x80007 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {524296 /* 0x80008 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {524297 /* 0x80009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524298 /* 0x8000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {524304 /* 0x80010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524320 /* 0x80020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524352 /* 0x80040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524416 /* 0x80080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524544 /* 0x80100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {524800 /* 0x80200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {525312 /* 0x80400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {526336 /* 0x80800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {528384 /* 0x81000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {532480 /* 0x82000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {540672 /* 0x84000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {557056 /* 0x88000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {589824 /* 0x90000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 16}}			/* shli r3, r2, 16 */
+   },
+  {655360 /* 0xa0000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {786432 /* 0xc0000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {1048566 /* 0xffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 17},			/* shli r4, r2, 17 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1048567 /* 0xffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048568 /* 0xffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048569 /* 0xffff9 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {1048570 /* 0xffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 19},			/* shli r3, r1, 19 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {1048571 /* 0xffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048572 /* 0xffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048573 /* 0xffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048574 /* 0xffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1048575 /* 0xfffff */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1048576 /* 0x100000 */ ,
+   {{6, 1, 20}}			/* shli r2, r1, 20 */
+   },
+  {1048577 /* 0x100001 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1048578 /* 0x100002 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {1048579 /* 0x100003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048580 /* 0x100004 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {1048581 /* 0x100005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048582 /* 0x100006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {1048583 /* 0x100007 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {1048584 /* 0x100008 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {1048585 /* 0x100009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048586 /* 0x10000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {1048592 /* 0x100010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048608 /* 0x100020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048640 /* 0x100040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048704 /* 0x100080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1048832 /* 0x100100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1049088 /* 0x100200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1049600 /* 0x100400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1050624 /* 0x100800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1052672 /* 0x101000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1056768 /* 0x102000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1064960 /* 0x104000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1081344 /* 0x108000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1114112 /* 0x110000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1179648 /* 0x120000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 17}}			/* shli r3, r2, 17 */
+   },
+  {1310720 /* 0x140000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {1572864 /* 0x180000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {2097142 /* 0x1ffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 18},			/* shli r4, r2, 18 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2097143 /* 0x1ffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097144 /* 0x1ffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097145 /* 0x1ffff9 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {2097146 /* 0x1ffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 20},			/* shli r3, r1, 20 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {2097147 /* 0x1ffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097148 /* 0x1ffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097149 /* 0x1ffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097150 /* 0x1ffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2097151 /* 0x1fffff */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {2097152 /* 0x200000 */ ,
+   {{6, 1, 21}}			/* shli r2, r1, 21 */
+   },
+  {2097153 /* 0x200001 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {2097154 /* 0x200002 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {2097155 /* 0x200003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097156 /* 0x200004 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {2097157 /* 0x200005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097158 /* 0x200006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {2097159 /* 0x200007 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {2097160 /* 0x200008 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {2097161 /* 0x200009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097162 /* 0x20000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {2097168 /* 0x200010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097184 /* 0x200020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097216 /* 0x200040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097280 /* 0x200080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097408 /* 0x200100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2097664 /* 0x200200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2098176 /* 0x200400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2099200 /* 0x200800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2101248 /* 0x201000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2105344 /* 0x202000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2113536 /* 0x204000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2129920 /* 0x208000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2162688 /* 0x210000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2228224 /* 0x220000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {2359296 /* 0x240000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 18}}			/* shli r3, r2, 18 */
+   },
+  {2621440 /* 0x280000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {3145728 /* 0x300000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {4194294 /* 0x3ffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 19},			/* shli r4, r2, 19 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {4194295 /* 0x3ffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194296 /* 0x3ffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194297 /* 0x3ffff9 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {4194298 /* 0x3ffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 21},			/* shli r3, r1, 21 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {4194299 /* 0x3ffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194300 /* 0x3ffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194301 /* 0x3ffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194302 /* 0x3ffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {4194303 /* 0x3fffff */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {4194304 /* 0x400000 */ ,
+   {{6, 1, 22}}			/* shli r2, r1, 22 */
+   },
+  {4194305 /* 0x400001 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {4194306 /* 0x400002 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {4194307 /* 0x400003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194308 /* 0x400004 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {4194309 /* 0x400005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194310 /* 0x400006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {4194311 /* 0x400007 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {4194312 /* 0x400008 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {4194313 /* 0x400009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194314 /* 0x40000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {4194320 /* 0x400010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194336 /* 0x400020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194368 /* 0x400040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194432 /* 0x400080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194560 /* 0x400100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4194816 /* 0x400200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4195328 /* 0x400400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4196352 /* 0x400800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4198400 /* 0x401000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4202496 /* 0x402000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4210688 /* 0x404000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4227072 /* 0x408000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4259840 /* 0x410000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4325376 /* 0x420000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4456448 /* 0x440000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {4718592 /* 0x480000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 19}}			/* shli r3, r2, 19 */
+   },
+  {5242880 /* 0x500000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {6291456 /* 0x600000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {8388598 /* 0x7ffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 20},			/* shli r4, r2, 20 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {8388599 /* 0x7ffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388600 /* 0x7ffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388601 /* 0x7ffff9 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {8388602 /* 0x7ffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 22},			/* shli r3, r1, 22 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {8388603 /* 0x7ffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388604 /* 0x7ffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388605 /* 0x7ffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388606 /* 0x7ffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {8388607 /* 0x7fffff */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {8388608 /* 0x800000 */ ,
+   {{6, 1, 23}}			/* shli r2, r1, 23 */
+   },
+  {8388609 /* 0x800001 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {8388610 /* 0x800002 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {8388611 /* 0x800003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388612 /* 0x800004 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {8388613 /* 0x800005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388614 /* 0x800006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {8388615 /* 0x800007 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {8388616 /* 0x800008 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {8388617 /* 0x800009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388618 /* 0x80000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {8388624 /* 0x800010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388640 /* 0x800020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388672 /* 0x800040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388736 /* 0x800080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8388864 /* 0x800100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8389120 /* 0x800200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8389632 /* 0x800400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8390656 /* 0x800800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8392704 /* 0x801000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8396800 /* 0x802000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8404992 /* 0x804000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8421376 /* 0x808000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8454144 /* 0x810000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8519680 /* 0x820000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8650752 /* 0x840000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {8912896 /* 0x880000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {9437184 /* 0x900000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 20}}			/* shli r3, r2, 20 */
+   },
+  {10485760 /* 0xa00000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {11796480 /* 0xb40000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 18}}			/* shli r4, r3, 18 */
+   },
+  {12582912 /* 0xc00000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {16777206 /* 0xfffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 21},			/* shli r4, r2, 21 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {16777207 /* 0xfffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777208 /* 0xfffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777209 /* 0xfffff9 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {16777210 /* 0xfffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 23},			/* shli r3, r1, 23 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {16777211 /* 0xfffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777212 /* 0xfffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777213 /* 0xfffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777214 /* 0xfffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {16777215 /* 0xffffff */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {16777216 /* 0x1000000 */ ,
+   {{6, 1, 24}}			/* shli r2, r1, 24 */
+   },
+  {16777217 /* 0x1000001 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {16777218 /* 0x1000002 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {16777219 /* 0x1000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777220 /* 0x1000004 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {16777221 /* 0x1000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777222 /* 0x1000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {16777223 /* 0x1000007 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {16777224 /* 0x1000008 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {16777225 /* 0x1000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777226 /* 0x100000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {16777232 /* 0x1000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777248 /* 0x1000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777280 /* 0x1000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777344 /* 0x1000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777472 /* 0x1000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16777728 /* 0x1000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16778240 /* 0x1000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16779264 /* 0x1000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16781312 /* 0x1001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16785408 /* 0x1002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16793600 /* 0x1004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16809984 /* 0x1008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16842752 /* 0x1010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {16843009 /* 0x1010101 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 3, 16},			/* shli r4, r3, 16 */
+    {1, 3, 4}}			/* add r5, r3, r4 */
+   },
+  {16908288 /* 0x1020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17039360 /* 0x1040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17301504 /* 0x1080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {17825792 /* 0x1100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {18874368 /* 0x1200000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 21}}			/* shli r3, r2, 21 */
+   },
+  {20971520 /* 0x1400000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {23592960 /* 0x1680000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {5, 2, 2},			/* s3a r3, r2, r2 */
+    {6, 3, 19}}			/* shli r4, r3, 19 */
+   },
+  {25165824 /* 0x1800000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {33554422 /* 0x1fffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 22},			/* shli r4, r2, 22 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {33554423 /* 0x1fffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554424 /* 0x1fffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554425 /* 0x1fffff9 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {33554426 /* 0x1fffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 24},			/* shli r3, r1, 24 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {33554427 /* 0x1fffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554428 /* 0x1fffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554429 /* 0x1fffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554430 /* 0x1fffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {33554431 /* 0x1ffffff */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {33554432 /* 0x2000000 */ ,
+   {{6, 1, 25}}			/* shli r2, r1, 25 */
+   },
+  {33554433 /* 0x2000001 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {33554434 /* 0x2000002 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {33554435 /* 0x2000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554436 /* 0x2000004 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {33554437 /* 0x2000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554438 /* 0x2000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {33554439 /* 0x2000007 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {33554440 /* 0x2000008 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {33554441 /* 0x2000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554442 /* 0x200000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {33554448 /* 0x2000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554464 /* 0x2000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554496 /* 0x2000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554560 /* 0x2000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554688 /* 0x2000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33554944 /* 0x2000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33555456 /* 0x2000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33556480 /* 0x2000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33558528 /* 0x2001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33562624 /* 0x2002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33570816 /* 0x2004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33587200 /* 0x2008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33619968 /* 0x2010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33685504 /* 0x2020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {33816576 /* 0x2040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34078720 /* 0x2080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {34603008 /* 0x2100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {35651584 /* 0x2200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {37748736 /* 0x2400000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 22}}			/* shli r3, r2, 22 */
+   },
+  {41943040 /* 0x2800000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {50331648 /* 0x3000000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {67108854 /* 0x3fffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 23},			/* shli r4, r2, 23 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {67108855 /* 0x3fffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108856 /* 0x3fffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108857 /* 0x3fffff9 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {67108858 /* 0x3fffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 25},			/* shli r3, r1, 25 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {67108859 /* 0x3fffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108860 /* 0x3fffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108861 /* 0x3fffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108862 /* 0x3fffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {67108863 /* 0x3ffffff */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {67108864 /* 0x4000000 */ ,
+   {{6, 1, 26}}			/* shli r2, r1, 26 */
+   },
+  {67108865 /* 0x4000001 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {67108866 /* 0x4000002 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {67108867 /* 0x4000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108868 /* 0x4000004 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {67108869 /* 0x4000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108870 /* 0x4000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {67108871 /* 0x4000007 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {67108872 /* 0x4000008 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {67108873 /* 0x4000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108874 /* 0x400000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {67108880 /* 0x4000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108896 /* 0x4000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108928 /* 0x4000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67108992 /* 0x4000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67109120 /* 0x4000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67109376 /* 0x4000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67109888 /* 0x4000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67110912 /* 0x4000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67112960 /* 0x4001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67117056 /* 0x4002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67125248 /* 0x4004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67141632 /* 0x4008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67174400 /* 0x4010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67239936 /* 0x4020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67371008 /* 0x4040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {67633152 /* 0x4080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {68157440 /* 0x4100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {69206016 /* 0x4200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {71303168 /* 0x4400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {75497472 /* 0x4800000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 23}}			/* shli r3, r2, 23 */
+   },
+  {83886080 /* 0x5000000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {100663296 /* 0x6000000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {134217718 /* 0x7fffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 24},			/* shli r4, r2, 24 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {134217719 /* 0x7fffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217720 /* 0x7fffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217721 /* 0x7fffff9 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {134217722 /* 0x7fffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 26},			/* shli r3, r1, 26 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {134217723 /* 0x7fffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217724 /* 0x7fffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217725 /* 0x7fffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217726 /* 0x7fffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {134217727 /* 0x7ffffff */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {134217728 /* 0x8000000 */ ,
+   {{6, 1, 27}}			/* shli r2, r1, 27 */
+   },
+  {134217729 /* 0x8000001 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {134217730 /* 0x8000002 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {134217731 /* 0x8000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217732 /* 0x8000004 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {134217733 /* 0x8000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217734 /* 0x8000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {134217735 /* 0x8000007 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {134217736 /* 0x8000008 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {134217737 /* 0x8000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217738 /* 0x800000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {134217744 /* 0x8000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217760 /* 0x8000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217792 /* 0x8000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217856 /* 0x8000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134217984 /* 0x8000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134218240 /* 0x8000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134218752 /* 0x8000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134219776 /* 0x8000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134221824 /* 0x8001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134225920 /* 0x8002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134234112 /* 0x8004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134250496 /* 0x8008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134283264 /* 0x8010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134348800 /* 0x8020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134479872 /* 0x8040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {134742016 /* 0x8080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {135266304 /* 0x8100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {136314880 /* 0x8200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {138412032 /* 0x8400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {142606336 /* 0x8800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {150994944 /* 0x9000000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 24}}			/* shli r3, r2, 24 */
+   },
+  {167772160 /* 0xa000000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {201326592 /* 0xc000000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {268435446 /* 0xffffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 25},			/* shli r4, r2, 25 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {268435447 /* 0xffffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435448 /* 0xffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435449 /* 0xffffff9 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {268435450 /* 0xffffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 27},			/* shli r3, r1, 27 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {268435451 /* 0xffffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435452 /* 0xffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435453 /* 0xffffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435454 /* 0xffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {268435455 /* 0xfffffff */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {268435456 /* 0x10000000 */ ,
+   {{6, 1, 28}}			/* shli r2, r1, 28 */
+   },
+  {268435457 /* 0x10000001 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {268435458 /* 0x10000002 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {268435459 /* 0x10000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435460 /* 0x10000004 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {268435461 /* 0x10000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435462 /* 0x10000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {268435463 /* 0x10000007 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {268435464 /* 0x10000008 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {268435465 /* 0x10000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435466 /* 0x1000000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {268435472 /* 0x10000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435488 /* 0x10000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435520 /* 0x10000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435584 /* 0x10000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435712 /* 0x10000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268435968 /* 0x10000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268436480 /* 0x10000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268437504 /* 0x10000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268439552 /* 0x10001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268443648 /* 0x10002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268451840 /* 0x10004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268468224 /* 0x10008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268500992 /* 0x10010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268566528 /* 0x10020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268697600 /* 0x10040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {268959744 /* 0x10080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {269484032 /* 0x10100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {270532608 /* 0x10200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {272629760 /* 0x10400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {276824064 /* 0x10800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {285212672 /* 0x11000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {301989888 /* 0x12000000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 25}}			/* shli r3, r2, 25 */
+   },
+  {335544320 /* 0x14000000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {402653184 /* 0x18000000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {536870902 /* 0x1ffffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 26},			/* shli r4, r2, 26 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {536870903 /* 0x1ffffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870904 /* 0x1ffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870905 /* 0x1ffffff9 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {536870906 /* 0x1ffffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 28},			/* shli r3, r1, 28 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {536870907 /* 0x1ffffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870908 /* 0x1ffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870909 /* 0x1ffffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870910 /* 0x1ffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {536870911 /* 0x1fffffff */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {536870912 /* 0x20000000 */ ,
+   {{6, 1, 29}}			/* shli r2, r1, 29 */
+   },
+  {536870913 /* 0x20000001 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {536870914 /* 0x20000002 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {536870915 /* 0x20000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870916 /* 0x20000004 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {536870917 /* 0x20000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870918 /* 0x20000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {536870919 /* 0x20000007 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {536870920 /* 0x20000008 */ ,
+   {{6, 1, 29},			/* shli r2, r1, 29 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {536870921 /* 0x20000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870922 /* 0x2000000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {536870928 /* 0x20000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870944 /* 0x20000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536870976 /* 0x20000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871040 /* 0x20000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871168 /* 0x20000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871424 /* 0x20000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536871936 /* 0x20000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536872960 /* 0x20000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536875008 /* 0x20001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536879104 /* 0x20002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536887296 /* 0x20004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536903680 /* 0x20008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {536936448 /* 0x20010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537001984 /* 0x20020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537133056 /* 0x20040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537395200 /* 0x20080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {537919488 /* 0x20100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {538968064 /* 0x20200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {541065216 /* 0x20400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {545259520 /* 0x20800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {553648128 /* 0x21000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {570425344 /* 0x22000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {603979776 /* 0x24000000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 26}}			/* shli r3, r2, 26 */
+   },
+  {671088640 /* 0x28000000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {805306368 /* 0x30000000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {1073741814 /* 0x3ffffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 27},			/* shli r4, r2, 27 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {1073741815 /* 0x3ffffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741816 /* 0x3ffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741817 /* 0x3ffffff9 */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {1073741818 /* 0x3ffffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 29},			/* shli r3, r1, 29 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {1073741819 /* 0x3ffffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741820 /* 0x3ffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741821 /* 0x3ffffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741822 /* 0x3ffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {1073741823 /* 0x3fffffff */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   },
+  {1073741824 /* 0x40000000 */ ,
+   {{6, 1, 30}}			/* shli r2, r1, 30 */
+   },
+  {1073741825 /* 0x40000001 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {1, 1, 2}}			/* add r3, r1, r2 */
+   },
+  {1073741826 /* 0x40000002 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {3, 1, 2}}			/* s1a r3, r1, r2 */
+   },
+  {1073741827 /* 0x40000003 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741828 /* 0x40000004 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {4, 1, 2}}			/* s2a r3, r1, r2 */
+   },
+  {1073741829 /* 0x40000005 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741830 /* 0x40000006 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {1073741831 /* 0x40000007 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 1, 3}}			/* s3a r4, r1, r3 */
+   },
+  {1073741832 /* 0x40000008 */ ,
+   {{6, 1, 30},			/* shli r2, r1, 30 */
+    {5, 1, 2}}			/* s3a r3, r1, r2 */
+   },
+  {1073741833 /* 0x40000009 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741834 /* 0x4000000a */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {3, 2, 3}}			/* s1a r4, r2, r3 */
+   },
+  {1073741840 /* 0x40000010 */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741856 /* 0x40000020 */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741888 /* 0x40000040 */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073741952 /* 0x40000080 */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073742080 /* 0x40000100 */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073742336 /* 0x40000200 */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073742848 /* 0x40000400 */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073743872 /* 0x40000800 */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073745920 /* 0x40001000 */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073750016 /* 0x40002000 */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073758208 /* 0x40004000 */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073774592 /* 0x40008000 */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073807360 /* 0x40010000 */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1073872896 /* 0x40020000 */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1074003968 /* 0x40040000 */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1074266112 /* 0x40080000 */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1074790400 /* 0x40100000 */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1075838976 /* 0x40200000 */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1077936128 /* 0x40400000 */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1082130432 /* 0x40800000 */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1090519040 /* 0x41000000 */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1107296256 /* 0x42000000 */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1140850688 /* 0x44000000 */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {1, 2, 3}}			/* add r4, r2, r3 */
+   },
+  {1207959552 /* 0x48000000 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 2, 27}}			/* shli r3, r2, 27 */
+   },
+  {1342177280 /* 0x50000000 */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 2, 28}}			/* shli r3, r2, 28 */
+   },
+  {1610612735 /* 0x5fffffff */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 29},			/* shli r3, r2, 29 */
+    {2, 3, 1}}			/* sub r4, r3, r1 */
+   },
+  {1610612736 /* 0x60000000 */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 2, 29}}			/* shli r3, r2, 29 */
+   },
+  {1879048191 /* 0x6fffffff */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 3},			/* shli r4, r2, 3 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2013265919 /* 0x77ffffff */ ,
+   {{6, 1, 27},			/* shli r2, r1, 27 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 4},			/* shli r4, r2, 4 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2080374783 /* 0x7bffffff */ ,
+   {{6, 1, 26},			/* shli r2, r1, 26 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 5},			/* shli r4, r2, 5 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2113929215 /* 0x7dffffff */ ,
+   {{6, 1, 25},			/* shli r2, r1, 25 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 6},			/* shli r4, r2, 6 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2130706431 /* 0x7effffff */ ,
+   {{6, 1, 24},			/* shli r2, r1, 24 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 7},			/* shli r4, r2, 7 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2139095039 /* 0x7f7fffff */ ,
+   {{6, 1, 23},			/* shli r2, r1, 23 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 8},			/* shli r4, r2, 8 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2143289343 /* 0x7fbfffff */ ,
+   {{6, 1, 22},			/* shli r2, r1, 22 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 9},			/* shli r4, r2, 9 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2145386495 /* 0x7fdfffff */ ,
+   {{6, 1, 21},			/* shli r2, r1, 21 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 10},			/* shli r4, r2, 10 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2146435071 /* 0x7fefffff */ ,
+   {{6, 1, 20},			/* shli r2, r1, 20 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 11},			/* shli r4, r2, 11 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2146959359 /* 0x7ff7ffff */ ,
+   {{6, 1, 19},			/* shli r2, r1, 19 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 12},			/* shli r4, r2, 12 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147221503 /* 0x7ffbffff */ ,
+   {{6, 1, 18},			/* shli r2, r1, 18 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 13},			/* shli r4, r2, 13 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147352575 /* 0x7ffdffff */ ,
+   {{6, 1, 17},			/* shli r2, r1, 17 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 14},			/* shli r4, r2, 14 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147418111 /* 0x7ffeffff */ ,
+   {{6, 1, 16},			/* shli r2, r1, 16 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 15},			/* shli r4, r2, 15 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147450879 /* 0x7fff7fff */ ,
+   {{6, 1, 15},			/* shli r2, r1, 15 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 16},			/* shli r4, r2, 16 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147467263 /* 0x7fffbfff */ ,
+   {{6, 1, 14},			/* shli r2, r1, 14 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 17},			/* shli r4, r2, 17 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147475455 /* 0x7fffdfff */ ,
+   {{6, 1, 13},			/* shli r2, r1, 13 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 18},			/* shli r4, r2, 18 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147479551 /* 0x7fffefff */ ,
+   {{6, 1, 12},			/* shli r2, r1, 12 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 19},			/* shli r4, r2, 19 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147481599 /* 0x7ffff7ff */ ,
+   {{6, 1, 11},			/* shli r2, r1, 11 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 20},			/* shli r4, r2, 20 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147482623 /* 0x7ffffbff */ ,
+   {{6, 1, 10},			/* shli r2, r1, 10 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 21},			/* shli r4, r2, 21 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483135 /* 0x7ffffdff */ ,
+   {{6, 1, 9},			/* shli r2, r1, 9 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 22},			/* shli r4, r2, 22 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483391 /* 0x7ffffeff */ ,
+   {{6, 1, 8},			/* shli r2, r1, 8 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 23},			/* shli r4, r2, 23 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483519 /* 0x7fffff7f */ ,
+   {{6, 1, 7},			/* shli r2, r1, 7 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 24},			/* shli r4, r2, 24 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483583 /* 0x7fffffbf */ ,
+   {{6, 1, 6},			/* shli r2, r1, 6 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 25},			/* shli r4, r2, 25 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483615 /* 0x7fffffdf */ ,
+   {{6, 1, 5},			/* shli r2, r1, 5 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 26},			/* shli r4, r2, 26 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483631 /* 0x7fffffef */ ,
+   {{6, 1, 4},			/* shli r2, r1, 4 */
+    {1, 1, 2},			/* add r3, r1, r2 */
+    {6, 2, 27},			/* shli r4, r2, 27 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483638 /* 0x7ffffff6 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {3, 1, 2},			/* s1a r3, r1, r2 */
+    {6, 2, 28},			/* shli r4, r2, 28 */
+    {2, 4, 3}}			/* sub r5, r4, r3 */
+   },
+  {2147483639 /* 0x7ffffff7 */ ,
+   {{5, 1, 1},			/* s3a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483640 /* 0x7ffffff8 */ ,
+   {{6, 1, 3},			/* shli r2, r1, 3 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483641 /* 0x7ffffff9 */ ,
+   {{6, 1, 28},			/* shli r2, r1, 28 */
+    {2, 2, 1},			/* sub r3, r2, r1 */
+    {5, 3, 1}}			/* s3a r4, r3, r1 */
+   },
+  {2147483642 /* 0x7ffffffa */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 30},			/* shli r3, r1, 30 */
+    {2, 3, 2},			/* sub r4, r3, r2 */
+    {6, 4, 1}}			/* shli r5, r4, 1 */
+   },
+  {2147483643 /* 0x7ffffffb */ ,
+   {{4, 1, 1},			/* s2a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483644 /* 0x7ffffffc */ ,
+   {{6, 1, 2},			/* shli r2, r1, 2 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483645 /* 0x7ffffffd */ ,
+   {{3, 1, 1},			/* s1a r2, r1, r1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483646 /* 0x7ffffffe */ ,
+   {{6, 1, 1},			/* shli r2, r1, 1 */
+    {6, 1, 31},			/* shli r3, r1, 31 */
+    {2, 3, 2}}			/* sub r4, r3, r2 */
+   },
+  {2147483647 /* 0x7fffffff */ ,
+   {{6, 1, 31},			/* shli r2, r1, 31 */
+    {2, 2, 1}}			/* sub r3, r2, r1 */
+   }
+};
+
+const int tilepro_multiply_insn_seq_table_size =
+  (int) (sizeof tilepro_multiply_insn_seq_table
+         / sizeof tilepro_multiply_insn_seq_table[0]);
diff --git a/gcc-4.9/gcc/config/tilepro/predicates.md b/gcc-4.9/gcc/config/tilepro/predicates.md
new file mode 100644
index 000000000..723f593e5
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/predicates.md
@@ -0,0 +1,260 @@
+;; Predicate definitions for Tilera TILEPro chip.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is the zero constant for MODE.
+(define_predicate "const_zero_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (and (ior (match_operand 0 "register_operand")
+	    (match_operand 0 "const_zero_operand"))
+       (match_test "GET_MODE_SIZE (mode) <= UNITS_PER_WORD")))
+
+; Return 1 if OP is a network register identifier.
+(define_predicate "netreg_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 6)")))
+
+; Return 1 if OP is an unsigned 5-bit constant.
+(define_predicate "u5bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == (INTVAL (op) & 0x1F)")))
+
+;; Return 1 if OP is an unsigned 16-bit constant.
+(define_predicate "u16bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)INTVAL (op) < (1U << 16)")))
+
+;; Return 1 if OP is a signed 8-bit constant.
+(define_predicate "s8bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_I (op)")))
+
+;; Return 1 if OP is a signed 16-bit constant.
+(define_predicate "s16bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_J (op)")))
+
+;; Return 1 if OP is a nonzero integer constant whose low 16 bits are zero.
+(define_predicate "auli_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_K (op)")))
+
+;; Return 1 if OP is an unsigned 15-bit constant.
+(define_predicate "u15bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)INTVAL (op) < (1U << 15)")))
+
+;; Return 1 if OP is a constant or any register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+;; Return 1 if OP is a 4-element vector constant with identical signed
+;; 8-bit elements or any register.
+(define_predicate "reg_or_v4s8bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_vector")
+	    (match_test "CONST_VECTOR_NUNITS (op) == 4
+                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))))
+
+;; Return 1 if OP is a 2-element vector constant with identical signed
+;; 8-bit elements or any register.
+(define_predicate "reg_or_v2s8bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_vector")
+	    (match_test "CONST_VECTOR_NUNITS (op) == 2
+                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
+                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)"))))
+
+;; Return 1 if the operand is a valid second operand to an add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_J (op) || satisfies_constraint_K (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a register or signed 8-bit immediate operand.
+(define_predicate "reg_or_s8bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 for an operand suitable for ANDing with a register.
+(define_predicate "and_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op) || satisfies_constraint_M (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a register or unsigned 5-bit immediate operand.
+(define_predicate "reg_or_u5bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) == (INTVAL (op) & 0x1F)")
+    (match_operand 0 "register_operand")))
+
+; Return 1 if the operand is 2, 4 or 8.
+(define_predicate "cint_248_operand"
+  (and (match_code "const_int")
+       (match_test
+        "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8")))
+
+
+;; Return true if OP is a TLS symbolic operand.
+(define_predicate "tls_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) !=  TLS_MODEL_NONE")))
+
+;; Return true if OP is a symbolic operand for the TLS Global Dynamic model.
+(define_predicate "tls_gd_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Dynamic model.
+(define_predicate "tls_ld_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand that can be used for the
+;; TLS Initial Exec model.
+(define_predicate "tls_ie_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (ior (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC")
+            (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC"))))
+
+;; Return true if OP is a symbolic operand for the TLS Local Exec model.
+(define_predicate "tls_le_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC")))
+
+;; Returns true if OP is any general operand except for an
+;; auto-incrementing address operand.
+(define_predicate "nonautoinc_operand"
+  (and (match_operand 0 "general_operand")
+       (not (ior (match_code "pre_dec") (match_code "pre_inc")
+		 (match_code "post_dec") (match_code "post_inc")
+		 (match_code "post_modify") (match_code "pre_modify")))))
+
+;; Returns true if OP is a non-auto-incrementing memory operand.
+(define_predicate "nonautoincmem_operand"
+  (match_operand 0 "memory_operand")
+{
+  return nonautoinc_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+})
+
+;; Returns true if OP is a non-auto-incrementing memory, general
+;; operand.
+(define_predicate "nonautoincmem_general_operand"
+  (match_operand 0 "general_operand")
+{
+  if (memory_operand (op, mode))
+    return nonautoinc_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+  else
+    return true;
+})
+ 
+;; Returns true if OP is a non-auto-incrementing memory, non-immediate
+;; operand.
+(define_predicate "nonautoincmem_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  if (memory_operand (op, mode))
+    return nonautoinc_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+  else
+    return true;
+})
+ 
+;; Return true if OP is a valid operand for the source of a move insn.
+(define_predicate "move_operand"
+  (match_operand 0 "general_operand")
+{
+  /* If both modes are non-void they must be the same.  */
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
+    return false;
+
+  if (GET_MODE_SIZE (mode) > 4)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      return (satisfies_constraint_J (op)
+              || satisfies_constraint_K (op)
+              || satisfies_constraint_N (op)
+              || satisfies_constraint_P (op));
+
+    case HIGH:
+      return true;
+
+    case MEM:
+      return memory_address_p (mode, XEXP (op, 0));
+
+    default:
+      return register_operand (op, mode);
+    }
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op,0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op,0), 0)) == LABEL_REF)
+			 && CONST_INT_P (XEXP (XEXP (op,0), 1))"))))
+
+;; Returns 1 if OP is a symbolic operand, or a const unspec wrapper
+;; representing a got reference, a tls reference, or pc-relative
+;; reference.
+(define_predicate "const_symbolic_operand"
+  (ior (match_operand 0 "symbolic_operand")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == UNSPEC")
+	    (ior (match_test "XINT (XEXP (op,0), 1) == UNSPEC_GOT16_SYM")
+		 (match_test "XINT (XEXP (op,0), 1) == UNSPEC_GOT32_SYM")
+		 (match_test "XINT (XEXP (op,0), 1) == UNSPEC_PCREL_SYM")
+		 (match_test "XINT (XEXP (op,0), 1) == UNSPEC_TLS_GD")
+		 (match_test "XINT (XEXP (op,0), 1) == UNSPEC_TLS_IE")
+		 (match_test "XINT (XEXP (op,0), 1) == UNSPEC_TLS_LE")))))
+
+;; Return true if OP is an address suitable for a call insn.
+;; Call insn on TILE can take a PC-relative constant address
+;; or any regular memory address.
+(define_predicate "call_address_operand"
+  (ior (match_operand 0 "symbolic_operand")
+       (match_test "memory_address_p (Pmode, op)")))
+
+;; Return true if OP is an operand suitable for a call insn.
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "call_address_operand (XEXP (op, 0), mode)")))
+
+;; Return 1 if OP is a signed comparison operation.
+;; We can use these directly in compares against zero.
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt"))
+
+;; Return 1 if OP is a equal or not-equal operation.
+(define_predicate "eqne_operator"
+  (match_code "eq,ne"))
diff --git a/gcc-4.9/gcc/config/tilepro/t-tilepro b/gcc-4.9/gcc/config/tilepro/t-tilepro
new file mode 100644
index 000000000..bc2a5aa5b
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/t-tilepro
@@ -0,0 +1,15 @@
+tilepro-c.o: $(srcdir)/config/tilepro/tilepro-c.c \
+    $(CONFIG_H) $(SYSTEM_H) coretypes.h $(MACHMODE_H) \
+    $(TM_H) $(TM_P_H) $(CPPLIB_H) $(TREE_H) $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+$(srcdir)/config/tilepro/mul-tables.c: \
+    $(srcdir)/config/tilepro/gen-mul-tables.cc
+	$(CC_FOR_BUILD) $(BUILD_CPPFLAGS) -O2 -DTILEPRO \
+	  -o gen-mul-tables -lstdc++ $<;
+	./gen-mul-tables > $@
+
+mul-tables.o: $(srcdir)/config/tilepro/mul-tables.c \
+    $(CONFIG_H) $(SYSTEM_H) coretypes.h $(EXPR_H) $(OPTABS_H) \
+    $(srcdir)/config/tilepro/tilepro-multiply.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro-builtins.h b/gcc-4.9/gcc/config/tilepro/tilepro-builtins.h
new file mode 100644
index 000000000..9a85b094f
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro-builtins.h
@@ -0,0 +1,216 @@
+/* Enum for builtin intrinsics for TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TILEPRO_BUILTINS_H
+#define GCC_TILEPRO_BUILTINS_H
+
+enum tilepro_builtin
+{
+  TILEPRO_INSN_ADD,
+  TILEPRO_INSN_ADDB,
+  TILEPRO_INSN_ADDBS_U,
+  TILEPRO_INSN_ADDH,
+  TILEPRO_INSN_ADDHS,
+  TILEPRO_INSN_ADDIB,
+  TILEPRO_INSN_ADDIH,
+  TILEPRO_INSN_ADDLIS,
+  TILEPRO_INSN_ADDS,
+  TILEPRO_INSN_ADIFFB_U,
+  TILEPRO_INSN_ADIFFH,
+  TILEPRO_INSN_AND,
+  TILEPRO_INSN_AULI,
+  TILEPRO_INSN_AVGB_U,
+  TILEPRO_INSN_AVGH,
+  TILEPRO_INSN_BITX,
+  TILEPRO_INSN_BYTEX,
+  TILEPRO_INSN_CLZ,
+  TILEPRO_INSN_CRC32_32,
+  TILEPRO_INSN_CRC32_8,
+  TILEPRO_INSN_CTZ,
+  TILEPRO_INSN_DRAIN,
+  TILEPRO_INSN_DTLBPR,
+  TILEPRO_INSN_DWORD_ALIGN,
+  TILEPRO_INSN_FINV,
+  TILEPRO_INSN_FLUSH,
+  TILEPRO_INSN_FNOP,
+  TILEPRO_INSN_ICOH,
+  TILEPRO_INSN_ILL,
+  TILEPRO_INSN_INFO,
+  TILEPRO_INSN_INFOL,
+  TILEPRO_INSN_INTHB,
+  TILEPRO_INSN_INTHH,
+  TILEPRO_INSN_INTLB,
+  TILEPRO_INSN_INTLH,
+  TILEPRO_INSN_INV,
+  TILEPRO_INSN_LB,
+  TILEPRO_INSN_LB_U,
+  TILEPRO_INSN_LH,
+  TILEPRO_INSN_LH_U,
+  TILEPRO_INSN_LNK,
+  TILEPRO_INSN_LW,
+  TILEPRO_INSN_LW_NA,
+  TILEPRO_INSN_LB_L2,
+  TILEPRO_INSN_LB_U_L2,
+  TILEPRO_INSN_LH_L2,
+  TILEPRO_INSN_LH_U_L2,
+  TILEPRO_INSN_LW_L2,
+  TILEPRO_INSN_LW_NA_L2,
+  TILEPRO_INSN_LB_MISS,
+  TILEPRO_INSN_LB_U_MISS,
+  TILEPRO_INSN_LH_MISS,
+  TILEPRO_INSN_LH_U_MISS,
+  TILEPRO_INSN_LW_MISS,
+  TILEPRO_INSN_LW_NA_MISS,
+  TILEPRO_INSN_MAXB_U,
+  TILEPRO_INSN_MAXH,
+  TILEPRO_INSN_MAXIB_U,
+  TILEPRO_INSN_MAXIH,
+  TILEPRO_INSN_MF,
+  TILEPRO_INSN_MFSPR,
+  TILEPRO_INSN_MINB_U,
+  TILEPRO_INSN_MINH,
+  TILEPRO_INSN_MINIB_U,
+  TILEPRO_INSN_MINIH,
+  TILEPRO_INSN_MM,
+  TILEPRO_INSN_MNZ,
+  TILEPRO_INSN_MNZB,
+  TILEPRO_INSN_MNZH,
+  TILEPRO_INSN_MOVE,
+  TILEPRO_INSN_MOVELIS,
+  TILEPRO_INSN_MTSPR,
+  TILEPRO_INSN_MULHH_SS,
+  TILEPRO_INSN_MULHH_SU,
+  TILEPRO_INSN_MULHH_UU,
+  TILEPRO_INSN_MULHHA_SS,
+  TILEPRO_INSN_MULHHA_SU,
+  TILEPRO_INSN_MULHHA_UU,
+  TILEPRO_INSN_MULHHSA_UU,
+  TILEPRO_INSN_MULHL_SS,
+  TILEPRO_INSN_MULHL_SU,
+  TILEPRO_INSN_MULHL_US,
+  TILEPRO_INSN_MULHL_UU,
+  TILEPRO_INSN_MULHLA_SS,
+  TILEPRO_INSN_MULHLA_SU,
+  TILEPRO_INSN_MULHLA_US,
+  TILEPRO_INSN_MULHLA_UU,
+  TILEPRO_INSN_MULHLSA_UU,
+  TILEPRO_INSN_MULLL_SS,
+  TILEPRO_INSN_MULLL_SU,
+  TILEPRO_INSN_MULLL_UU,
+  TILEPRO_INSN_MULLLA_SS,
+  TILEPRO_INSN_MULLLA_SU,
+  TILEPRO_INSN_MULLLA_UU,
+  TILEPRO_INSN_MULLLSA_UU,
+  TILEPRO_INSN_MVNZ,
+  TILEPRO_INSN_MVZ,
+  TILEPRO_INSN_MZ,
+  TILEPRO_INSN_MZB,
+  TILEPRO_INSN_MZH,
+  TILEPRO_INSN_NAP,
+  TILEPRO_INSN_NOP,
+  TILEPRO_INSN_NOR,
+  TILEPRO_INSN_OR,
+  TILEPRO_INSN_PACKBS_U,
+  TILEPRO_INSN_PACKHB,
+  TILEPRO_INSN_PACKHS,
+  TILEPRO_INSN_PACKLB,
+  TILEPRO_INSN_PCNT,
+  TILEPRO_INSN_PREFETCH,
+  TILEPRO_INSN_PREFETCH_L1,
+  TILEPRO_INSN_RL,
+  TILEPRO_INSN_S1A,
+  TILEPRO_INSN_S2A,
+  TILEPRO_INSN_S3A,
+  TILEPRO_INSN_SADAB_U,
+  TILEPRO_INSN_SADAH,
+  TILEPRO_INSN_SADAH_U,
+  TILEPRO_INSN_SADB_U,
+  TILEPRO_INSN_SADH,
+  TILEPRO_INSN_SADH_U,
+  TILEPRO_INSN_SB,
+  TILEPRO_INSN_SEQ,
+  TILEPRO_INSN_SEQB,
+  TILEPRO_INSN_SEQH,
+  TILEPRO_INSN_SEQIB,
+  TILEPRO_INSN_SEQIH,
+  TILEPRO_INSN_SH,
+  TILEPRO_INSN_SHL,
+  TILEPRO_INSN_SHLB,
+  TILEPRO_INSN_SHLH,
+  TILEPRO_INSN_SHLIB,
+  TILEPRO_INSN_SHLIH,
+  TILEPRO_INSN_SHR,
+  TILEPRO_INSN_SHRB,
+  TILEPRO_INSN_SHRH,
+  TILEPRO_INSN_SHRIB,
+  TILEPRO_INSN_SHRIH,
+  TILEPRO_INSN_SLT,
+  TILEPRO_INSN_SLT_U,
+  TILEPRO_INSN_SLTB,
+  TILEPRO_INSN_SLTB_U,
+  TILEPRO_INSN_SLTE,
+  TILEPRO_INSN_SLTE_U,
+  TILEPRO_INSN_SLTEB,
+  TILEPRO_INSN_SLTEB_U,
+  TILEPRO_INSN_SLTEH,
+  TILEPRO_INSN_SLTEH_U,
+  TILEPRO_INSN_SLTH,
+  TILEPRO_INSN_SLTH_U,
+  TILEPRO_INSN_SLTIB,
+  TILEPRO_INSN_SLTIB_U,
+  TILEPRO_INSN_SLTIH,
+  TILEPRO_INSN_SLTIH_U,
+  TILEPRO_INSN_SNE,
+  TILEPRO_INSN_SNEB,
+  TILEPRO_INSN_SNEH,
+  TILEPRO_INSN_SRA,
+  TILEPRO_INSN_SRAB,
+  TILEPRO_INSN_SRAH,
+  TILEPRO_INSN_SRAIB,
+  TILEPRO_INSN_SRAIH,
+  TILEPRO_INSN_SUB,
+  TILEPRO_INSN_SUBB,
+  TILEPRO_INSN_SUBBS_U,
+  TILEPRO_INSN_SUBH,
+  TILEPRO_INSN_SUBHS,
+  TILEPRO_INSN_SUBS,
+  TILEPRO_INSN_SW,
+  TILEPRO_INSN_TBLIDXB0,
+  TILEPRO_INSN_TBLIDXB1,
+  TILEPRO_INSN_TBLIDXB2,
+  TILEPRO_INSN_TBLIDXB3,
+  TILEPRO_INSN_TNS,
+  TILEPRO_INSN_WH64,
+  TILEPRO_INSN_XOR,
+  TILEPRO_NETWORK_BARRIER,
+  TILEPRO_IDN0_RECEIVE,
+  TILEPRO_IDN1_RECEIVE,
+  TILEPRO_IDN_SEND,
+  TILEPRO_SN_RECEIVE,
+  TILEPRO_SN_SEND,
+  TILEPRO_UDN0_RECEIVE,
+  TILEPRO_UDN1_RECEIVE,
+  TILEPRO_UDN2_RECEIVE,
+  TILEPRO_UDN3_RECEIVE,
+  TILEPRO_UDN_SEND,
+  TILEPRO_BUILTIN_max
+};
+
+#endif /* !GCC_TILEPRO_BUILTINS_H */
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro-c.c b/gcc-4.9/gcc/config/tilepro/tilepro-c.c
new file mode 100644
index 000000000..4fd033782
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro-c.c
@@ -0,0 +1,56 @@
+/* Definitions of C specific functions for TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+
+/* copy defines in c-cppbuiltin.c */
+# define builtin_define(TXT) cpp_define (pfile, TXT)
+# define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+
+/* Implement TARGET_CPU_CPP_BUILTINS.  */
+void
+tilepro_cpu_cpp_builtins (struct cpp_reader *pfile)
+{
+  builtin_define ("__tile__");
+  builtin_define ("__tilepro__");
+  builtin_assert ("cpu=tile");
+  builtin_assert ("machine=tile");
+  builtin_define ("__tile_chip__=1");
+  builtin_define ("__tile_chip_rev__=0");
+
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+
+  TILEPRO_CPU_CPP_ENDIAN_BUILTINS ();
+  GNU_USER_TARGET_OS_CPP_BUILTINS ();
+}
+
+
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro-generic.md b/gcc-4.9/gcc/config/tilepro/tilepro-generic.md
new file mode 100644
index 000000000..44dff49e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro-generic.md
@@ -0,0 +1,107 @@
+;; Scheduling description for Tilera TILEPro chip.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tile")
+
+; Make the scheduling automaton an ndfa.
+(automata_option "ndfa")
+
+; Name the three pipes.
+(define_cpu_unit "X0" "tile")
+(define_cpu_unit "X1" "tile")
+(define_cpu_unit "Y0" "tile")
+(define_cpu_unit "Y1" "tile")
+(define_cpu_unit "Y2" "tile")
+
+(define_insn_reservation "X0" 1
+  (eq_attr "type" "X0")
+  "X0")
+
+(define_insn_reservation "X0_2cycle" 2
+  (eq_attr "type" "X0_2cycle")
+  "X0,nothing")
+
+(define_insn_reservation "X1" 1
+  (eq_attr "type" "X1,X1_branch")
+  "X1")
+
+(define_insn_reservation "X1_2cycle" 2
+  (eq_attr "type" "X1_2cycle")
+  "X1,nothing")
+
+(define_insn_reservation "X1_L2" 8
+  (eq_attr "type" "X1_L2")
+  "X1")
+
+(define_insn_reservation "X1_miss" 80
+  (eq_attr "type" "X1_miss")
+  "X1")
+
+(define_insn_reservation "X01" 1
+  (eq_attr "type" "X01")
+  "X0|X1")
+
+(define_insn_reservation "Y0" 1
+  (eq_attr "type" "Y0")
+  "Y0|X0")
+
+(define_insn_reservation "Y0_2cycle" 2
+  (eq_attr "type" "Y0_2cycle")
+  "Y0|X0,nothing")
+
+(define_insn_reservation "Y2" 1
+  (eq_attr "type" "Y2")
+  "Y2|X1")
+
+(define_insn_reservation "Y2_2cycle" 2
+  (eq_attr "type" "Y2_2cycle")
+  "Y2|X1,nothing")
+
+(define_insn_reservation "Y2_L2" 8
+  (eq_attr "type" "Y2_L2")
+  "Y2|X1")
+
+(define_insn_reservation "Y2_miss" 80
+  (eq_attr "type" "Y2_miss")
+  "Y2|X1")
+
+(define_insn_reservation "Y01" 1
+  (eq_attr "type" "Y01")
+  "Y0|Y1|X0|X1")
+
+(define_insn_reservation "nothing" 0
+  (eq_attr "type" "nothing")
+  "nothing")
+
+(define_insn_reservation "cannot_bundle" 1
+  (eq_attr "type" "cannot_bundle")
+  "X0+X1")
+
+(define_insn_reservation "cannot_bundle_3cycle" 3
+  (eq_attr "type" "cannot_bundle_3cycle")
+  "X0+X1")
+
+(define_insn_reservation "cannot_bundle_4cycle" 4
+  (eq_attr "type" "cannot_bundle_4cycle")
+  "X0+X1")
+
+
+; A bundle must be in either X format or Y format.
+(exclusion_set "X0,X1" "Y0,Y1,Y2")
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro-modes.def b/gcc-4.9/gcc/config/tilepro/tilepro-modes.def
new file mode 100644
index 000000000..49af0a051
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro-modes.def
@@ -0,0 +1,34 @@
+/* TILEPro extra machine modes.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Extra modes for handling struct returns in up to 10 registers. */
+INT_MODE (R3I, 12);
+INT_MODE (R5I, 20);
+INT_MODE (R6I, 24);
+INT_MODE (R7I, 28);
+INT_MODE (R8I, 32);
+INT_MODE (R9I, 36);
+INT_MODE (R10I, 40);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);    /* V4QI V2HI */
+VECTOR_MODE (INT, QI, 8); /* V8QI */
+VECTOR_MODE (INT, HI, 4); /* V4HI */
+VECTOR_MODE (INT, QI, 2); /* V2QI */
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro-multiply.h b/gcc-4.9/gcc/config/tilepro/tilepro-multiply.h
new file mode 100644
index 000000000..b4012393e
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro-multiply.h
@@ -0,0 +1,82 @@
+/* Header for constant multiple table for TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TILEPRO_MULTIPLY_H
+#define GCC_TILEPRO_MULTIPLY_H
+
+/* A node of a tilepro_multiply_insn_seq, corresponding to a single
+   machine instruction such as 'add', 's1a', or an shl by a
+   constant.  */
+struct tilepro_multiply_insn_seq_entry
+{
+  /* Which operation this node performs (e.g. an add or sub).  Don't
+     use this directly, call get_opcode() table to get a
+     insn_code.  */
+  unsigned char compressed_opcode;
+
+  /* The left-hand side of this expression tree.
+     If equal to 0, it refers to 'zero'.
+     If equal to 1, it refers to the original input to the multiply
+     operation.
+     Otherwise, subtract two and it is an index into the containing
+     tilepro_multiply_insn_seq's 'op' array. Since it can only point
+     to some value that has already been computed it will always point
+     to an earlier entry in the array.  */
+  unsigned char lhs;
+
+  /* This is like lhs, but for the right-hand side. However, for shift
+     opcodes this is a shift count rather than an operand index.  */
+  unsigned char rhs;
+};
+
+/* Maximum size of op array.  */
+#define tilepro_multiply_insn_seq_MAX_OPERATIONS 4
+
+/* This defines a DAG describing how to multiply by a constant in
+   terms of one or more machine instructions.  */
+struct tilepro_multiply_insn_seq
+{
+  /* The constant factor by which this expression tree multiplies its
+     input.  */
+  int multiplier;
+
+  /* The nodes of the parse tree. These are ordered so that
+     instructions can be emitted in the same order that they appear in
+     this array.  Entry entry in this array can only refer to earlier
+     entries in the array.  */
+  struct tilepro_multiply_insn_seq_entry
+    op[tilepro_multiply_insn_seq_MAX_OPERATIONS];
+
+};
+
+/* A mapping from the compressed opcode to the corresponding enum
+   insn_code.  */
+extern const enum insn_code tilepro_multiply_insn_seq_decode_opcode[];
+
+/* Table mapping constant int multipliers to an expression tree that
+   efficiently performs that multiplication.  This is sorted by its
+   'multiplier' field so a binary search can look for matches.  */
+extern const struct tilepro_multiply_insn_seq
+  tilepro_multiply_insn_seq_table[];
+
+/* The number of elements in multiply_insn_seq_table.  */
+extern const int tilepro_multiply_insn_seq_table_size;
+
+#endif /* !GCC_TILEPRO_MULTIPLY_H */
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro-protos.h b/gcc-4.9/gcc/config/tilepro/tilepro-protos.h
new file mode 100644
index 000000000..fcd29ab82
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro-protos.h
@@ -0,0 +1,76 @@
+/* Prototypes of target machine for TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC__TILEPRO_PROTOS_H
+#define GCC__TILEPRO_PROTOS_H
+
+
+extern void tilepro_init_expanders (void);
+extern bool tilepro_legitimate_pic_operand_p (rtx);
+extern rtx tilepro_simd_int (rtx, enum machine_mode);
+
+#ifdef RTX_CODE
+extern void split_di (rtx[], int, rtx[], rtx[]);
+extern bool tilepro_bitfield_operand_p (HOST_WIDE_INT, int *, int *);
+extern void tilepro_expand_set_const32 (rtx, rtx);
+extern bool tilepro_expand_mov (enum machine_mode, rtx *);
+extern void tilepro_expand_insv (rtx operands[4]);
+extern void tilepro_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+					   HOST_WIDE_INT, bool);
+extern void tilepro_expand_movmisalign (enum machine_mode, rtx *);
+extern bool tilepro_expand_addsi (rtx, rtx, rtx);
+extern void tilepro_allocate_stack (rtx, rtx);
+extern bool tilepro_expand_mulsi (rtx, rtx, rtx);
+extern void tilepro_expand_smulsi3_highpart (rtx, rtx, rtx);
+extern void tilepro_expand_umulsi3_highpart (rtx, rtx, rtx);
+
+extern bool tilepro_emit_setcc (rtx[], enum machine_mode);
+extern void tilepro_emit_conditional_branch (rtx[], enum machine_mode);
+extern rtx tilepro_emit_conditional_move (rtx);
+extern const char *tilepro_output_cbranch_with_opcode (rtx, rtx *,
+						       const char *,
+						       const char *, int,
+						       bool);
+extern const char *tilepro_output_cbranch (rtx, rtx *, bool);
+extern void tilepro_expand_tablejump (rtx, rtx);
+extern void tilepro_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx),
+						 enum machine_mode, rtx,
+						 enum machine_mode, rtx, rtx,
+						 bool);
+#endif /* RTX_CODE */
+
+extern bool tilepro_can_use_return_insn_p (void);
+extern void tilepro_expand_prologue (void);
+extern void tilepro_expand_epilogue (bool);
+extern int tilepro_initial_elimination_offset (int, int);
+extern rtx tilepro_return_addr (int, rtx);
+extern rtx tilepro_eh_return_handler_rtx (void);
+extern int tilepro_adjust_insn_length (rtx, int);
+
+extern int tilepro_asm_preferred_eh_data_format (int, int);
+extern void tilepro_final_prescan_insn (rtx);
+extern const char *tilepro_asm_output_opcode (FILE *, const char *);
+extern void tilepro_function_profiler (FILE *, int);
+
+/* Declare functions in tile-c.c */
+
+extern void tilepro_cpu_cpp_builtins (struct cpp_reader *);
+
+#endif /* GCC_TILEPRO_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro.c b/gcc-4.9/gcc/config/tilepro/tilepro.c
new file mode 100644
index 000000000..74f88008f
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro.c
@@ -0,0 +1,5097 @@
+/* Subroutines used for code generation on the Tilera TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "insn-config.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "expr.h"
+#include "langhooks.h"
+#include "optabs.h"
+#include "sched-int.h"
+#include "sel-sched.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "function.h"
+#include "dwarf2.h"
+#include "timevar.h"
+#include "tree.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "gimplify.h"
+#include "cfgloop.h"
+#include "tilepro-builtins.h"
+#include "tilepro-multiply.h"
+#include "diagnostic.h"
+
+/* SYMBOL_REF for GOT */
+static GTY(()) rtx g_got_symbol = NULL;
+
+/* In case of a POST_INC or POST_DEC memory reference, we must report
+   the mode of the memory reference from TARGET_PRINT_OPERAND to
+   TARGET_PRINT_OPERAND_ADDRESS.  */
+static enum machine_mode output_memory_reference_mode;
+
+/* Report whether we're printing out the first address fragment of a
+   POST_INC or POST_DEC memory reference, from TARGET_PRINT_OPERAND to
+   TARGET_PRINT_OPERAND_ADDRESS.  */
+static bool output_memory_autoinc_first;
+
+
+
+/* Option handling  */
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+static void
+tilepro_option_override (void)
+{
+  /* When modulo scheduling is enabled, we still rely on regular
+     scheduler for bundling.  */
+  if (flag_modulo_sched)
+    flag_resched_modulo_sched = 1;
+}
+
+
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
+static bool
+tilepro_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+      return true;
+
+    case SFmode:
+    case DFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+
+/* Implement TARGET_VECTOR_MODE_SUPPORTED_P.  */
+static bool
+tile_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mode == V4QImode || mode == V2HImode;
+}
+
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+static bool
+tilepro_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED,
+				rtx x ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+static bool
+tilepro_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return decl != NULL;
+}
+
+
+/* Implement TARGET_PASS_BY_REFERENCE.  Variable sized types are
+   passed by reference.  */
+static bool
+tilepro_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			   enum machine_mode mode ATTRIBUTE_UNUSED,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return (type && TYPE_SIZE (type)
+	  && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST);
+}
+
+
+/* Implement TARGET_RETURN_IN_MEMORY.  */
+static bool
+tilepro_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  return !IN_RANGE (int_size_in_bytes (type),
+		    0, TILEPRO_NUM_RETURN_REGS * UNITS_PER_WORD);
+}
+
+
+/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
+static unsigned int
+tilepro_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+
+/* Implement TARGET_FUNCTION_ARG.  */
+static rtx
+tilepro_function_arg (cumulative_args_t cum_v,
+		      enum machine_mode mode,
+		      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
+  int byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+  bool doubleword_aligned_p;
+
+  if (cum >= TILEPRO_NUM_ARG_REGS)
+    return NULL_RTX;
+
+  /* See whether the argument has doubleword alignment.  */
+  doubleword_aligned_p =
+    tilepro_function_arg_boundary (mode, type) > BITS_PER_WORD;
+
+  if (doubleword_aligned_p)
+    cum += cum & 1;
+
+  /* The ABI does not allow parameters to be passed partially in reg
+     and partially in stack.  */
+  if ((cum + (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+      > TILEPRO_NUM_ARG_REGS)
+    return NULL_RTX;
+
+  return gen_rtx_REG (mode, cum);
+}
+
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
+static void
+tilepro_function_arg_advance (cumulative_args_t cum_v,
+			      enum machine_mode mode,
+			      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  int byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+  int word_size = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  bool doubleword_aligned_p;
+
+  /* See whether the argument has doubleword alignment.  */
+  doubleword_aligned_p =
+    tilepro_function_arg_boundary (mode, type) > BITS_PER_WORD;
+
+  if (doubleword_aligned_p)
+    *cum += *cum & 1;
+
+  /* If the current argument does not fit in the pretend_args space,
+     skip over it.  */
+  if (*cum < TILEPRO_NUM_ARG_REGS
+      && *cum + word_size > TILEPRO_NUM_ARG_REGS)
+    *cum = TILEPRO_NUM_ARG_REGS;
+
+  *cum += word_size;
+}
+
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+static rtx
+tilepro_function_value (const_tree valtype, const_tree fn_decl_or_type,
+			bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  int unsigned_p;
+
+  mode = TYPE_MODE (valtype);
+  unsigned_p = TYPE_UNSIGNED (valtype);
+
+  mode = promote_function_mode (valtype, mode, &unsigned_p,
+				fn_decl_or_type, 1);
+
+  return gen_rtx_REG (mode, 0);
+}
+
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+static rtx
+tilepro_libcall_value (enum machine_mode mode,
+		       const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, 0);
+}
+
+
+/* Implement FUNCTION_VALUE_REGNO_P.  */
+static bool
+tilepro_function_value_regno_p (const unsigned int regno)
+{
+  return regno < TILEPRO_NUM_RETURN_REGS;
+}
+
+
+/* Implement TARGET_BUILD_BUILTIN_VA_LIST.  */
+static tree
+tilepro_build_builtin_va_list (void)
+{
+  tree f_args, f_skip, record, type_decl;
+  bool owp;
+
+  record = lang_hooks.types.make_type (RECORD_TYPE);
+
+  type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+			  get_identifier ("__va_list_tag"), record);
+
+  f_args = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		       get_identifier ("__args"), ptr_type_node);
+  f_skip = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		       get_identifier ("__skip"), ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_args) = record;
+
+  DECL_FIELD_CONTEXT (f_skip) = record;
+
+  TREE_CHAIN (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_args;
+  TREE_CHAIN (f_args) = f_skip;
+
+  /* We know this is being padded and we want it too.  It is an
+     internal type so hide the warnings from the user.  */
+  owp = warn_padded;
+  warn_padded = false;
+
+  layout_type (record);
+
+  warn_padded = owp;
+
+  /* The correct type is an array type of one element.  */
+  return record;
+}
+
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+static void
+tilepro_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_args, f_skip;
+  tree args, skip, t;
+
+  f_args = TYPE_FIELDS (TREE_TYPE (valist));
+  f_skip = TREE_CHAIN (f_args);
+
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  /* Find the __args area.  */
+  t = make_tree (TREE_TYPE (args), virtual_incoming_args_rtx);
+  t = fold_build_pointer_plus_hwi (t,
+				   UNITS_PER_WORD *
+				   (crtl->args.info - TILEPRO_NUM_ARG_REGS));
+
+  if (crtl->args.pretend_args_size > 0)
+    t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
+
+  t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Find the __skip area.  */
+  t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
+  t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+static void
+tilepro_setup_incoming_varargs (cumulative_args_t cum,
+				enum machine_mode mode,
+				tree type, int *pretend_args, int no_rtl)
+{
+  CUMULATIVE_ARGS local_cum = *get_cumulative_args (cum);
+  int first_reg;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argument.  Advance a local copy of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  targetm.calls.function_arg_advance (pack_cumulative_args (&local_cum),
+				      mode, type, true);
+  first_reg = local_cum;
+
+  if (local_cum < TILEPRO_NUM_ARG_REGS)
+    {
+      *pretend_args = UNITS_PER_WORD * (TILEPRO_NUM_ARG_REGS - first_reg);
+
+      if (!no_rtl)
+	{
+	  alias_set_type set = get_varargs_alias_set ();
+	  rtx tmp =
+	    gen_rtx_MEM (BLKmode, plus_constant (Pmode, \
+						 virtual_incoming_args_rtx,
+						 -STACK_POINTER_OFFSET -
+						 UNITS_PER_WORD *
+						 (TILEPRO_NUM_ARG_REGS -
+						  first_reg)));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (first_reg, tmp,
+			       TILEPRO_NUM_ARG_REGS - first_reg);
+	}
+    }
+  else
+    *pretend_args = 0;
+}
+
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  Gimplify va_arg by updating
+   the va_list structure VALIST as required to retrieve an argument of
+   type TYPE, and returning that argument.
+   
+   ret = va_arg(VALIST, TYPE);
+
+   generates code equivalent to:
+  
+    paddedsize = (sizeof(TYPE) + 3) & -4;
+    if ((VALIST.__args + paddedsize > VALIST.__skip)
+	& (VALIST.__args <= VALIST.__skip))
+      addr = VALIST.__skip + STACK_POINTER_OFFSET;
+    else
+      addr = VALIST.__args;
+    VALIST.__args = addr + paddedsize;
+    ret = *(TYPE *)addr;                                          */
+static tree
+tilepro_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
+			      gimple_seq * post_p ATTRIBUTE_UNUSED)
+{
+  tree f_args, f_skip;
+  tree args, skip;
+  HOST_WIDE_INT size, rsize;
+  tree addr, tmp;
+  bool pass_by_reference_p;
+
+  f_args = TYPE_FIELDS (va_list_type_node);
+  f_skip = TREE_CHAIN (f_args);
+
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  addr = create_tmp_var (ptr_type_node, "va_arg");
+
+  /* if an object is dynamically sized, a pointer to it is passed
+     instead of the object itself.  */
+  pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
+					   false);
+
+  if (pass_by_reference_p)
+    type = build_pointer_type (type);
+
+  size = int_size_in_bytes (type);
+  rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
+
+  /* If the alignment of the type is greater than the default for a
+     parameter, align to STACK_BOUNDARY.  */
+  if (TYPE_ALIGN (type) > PARM_BOUNDARY)
+    {
+      /* Assert the only case we generate code for: when
+         stack boundary = 2 * parm boundary.  */
+      gcc_assert (STACK_BOUNDARY == PARM_BOUNDARY * 2);
+
+      tmp = build2 (BIT_AND_EXPR, sizetype,
+		    fold_convert (sizetype, unshare_expr (args)),
+		    size_int (PARM_BOUNDARY / 8));
+      tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+		    unshare_expr (args), tmp);
+
+      gimplify_assign (unshare_expr (args), tmp, pre_p);
+    }
+
+  /* Build conditional expression to calculate addr. The expression
+     will be gimplified later.  */
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
+  tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
+		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
+		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
+			unshare_expr (skip)));
+
+  tmp = build3 (COND_EXPR, ptr_type_node, tmp,
+		build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
+			size_int (STACK_POINTER_OFFSET)),
+		unshare_expr (args));
+
+  gimplify_assign (addr, tmp, pre_p);
+
+  /* Update VALIST.__args.  */
+  tmp = fold_build_pointer_plus_hwi (addr, rsize);
+  gimplify_assign (unshare_expr (args), tmp, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+
+  if (pass_by_reference_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+
+
+/* Implement TARGET_RTX_COSTS.  */
+static bool
+tilepro_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
+		   bool speed)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      /* If this is an 8-bit constant, return zero since it can be
+         used nearly anywhere with no cost.  If it is a valid operand
+         for an ADD or AND, likewise return 0 if we know it will be
+         used in that context.  Otherwise, return 2 since it might be
+         used there later.  All other constants take at least two
+         insns.  */
+      if (satisfies_constraint_I (x))
+	{
+	  *total = 0;
+	  return true;
+	}
+      else if (outer_code == PLUS && add_operand (x, VOIDmode))
+	{
+	  /* Slightly penalize large constants even though we can add
+	     them in one instruction, because it forces the use of
+	     2-wide bundling mode.  */
+	  *total = 1;
+	  return true;
+	}
+      else if (move_operand (x, SImode))
+	{
+	  /* We can materialize in one move.  */
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+	{
+	  /* We can materialize in two moves.  */
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+
+      return false;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case HIGH:
+      *total = 0;
+      return true;
+
+    case MEM:
+      /* If outer-code was a sign or zero extension, a cost of
+         COSTS_N_INSNS (1) was already added in, so account for
+         that.  */
+      if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+      /* Convey that s[123]a are efficient.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && cint_248_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
+			      (enum rtx_code) outer_code, opno, speed)
+		    + rtx_cost (XEXP (x, 1),
+				(enum rtx_code) outer_code, opno, speed)
+		    + COSTS_N_INSNS (1));
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      *total = COSTS_N_INSNS (2);
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      if (outer_code == MULT)
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      /* These are handled by software and are very expensive.  */
+      *total = COSTS_N_INSNS (100);
+      return false;
+
+    case UNSPEC:
+    case UNSPEC_VOLATILE:
+      {
+	int num = XINT (x, 1);
+
+	if (num <= TILEPRO_LAST_LATENCY_1_INSN)
+	  *total = COSTS_N_INSNS (1);
+	else if (num <= TILEPRO_LAST_LATENCY_2_INSN)
+	  *total = COSTS_N_INSNS (2);
+	else if (num > TILEPRO_LAST_LATENCY_INSN)
+	  {
+	    if (outer_code == PLUS)
+	      *total = 0;
+	    else
+	      *total = COSTS_N_INSNS (1);
+	  }
+	else
+	  {
+	    switch (num)
+	      {
+	      case UNSPEC_BLOCKAGE:
+	      case UNSPEC_NETWORK_BARRIER:
+		*total = 0;
+		break;
+
+	      case UNSPEC_LNK_AND_LABEL:
+	      case UNSPEC_MF:
+	      case UNSPEC_NETWORK_RECEIVE:
+	      case UNSPEC_NETWORK_SEND:
+	      case UNSPEC_TLS_GD_ADD:
+		*total = COSTS_N_INSNS (1);
+		break;
+
+	      case UNSPEC_TLS_IE_LOAD:
+		*total = COSTS_N_INSNS (2);
+		break;
+
+	      case UNSPEC_SP_SET:
+		*total = COSTS_N_INSNS (3);
+		break;
+
+	      case UNSPEC_SP_TEST:
+		*total = COSTS_N_INSNS (4);
+		break;
+
+	      case UNSPEC_LATENCY_L2:
+		*total = COSTS_N_INSNS (8);
+		break;
+
+	      case UNSPEC_TLS_GD_CALL:
+		*total = COSTS_N_INSNS (30);
+		break;
+
+	      case UNSPEC_LATENCY_MISS:
+		*total = COSTS_N_INSNS (80);
+		break;
+
+	      default:
+		*total = COSTS_N_INSNS (1);
+	      }
+	  }
+	return true;
+      }
+
+    default:
+      return false;
+    }
+}
+
+
+
+/* Returns an SImode integer rtx with value VAL.  */
+static rtx
+gen_int_si (HOST_WIDE_INT val)
+{
+  return gen_int_mode (val, SImode);
+}
+
+
+/* Create a temporary variable to hold a partial result, to enable
+   CSE.  */
+static rtx
+create_temp_reg_if_possible (enum machine_mode mode, rtx default_reg)
+{
+  return can_create_pseudo_p ()? gen_reg_rtx (mode) : default_reg;
+}
+
+
+/* Functions to save and restore machine-specific function data.  */
+static struct machine_function *
+tilepro_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Do anything needed before RTL is emitted for each function.  */
+void
+tilepro_init_expanders (void)
+{
+  /* Arrange to initialize and mark the machine per-function
+     status.  */
+  init_machine_status = tilepro_init_machine_status;
+
+  if (cfun && cfun->machine && flag_pic)
+    {
+      static int label_num = 0;
+
+      char text_label_name[32];
+
+      struct machine_function *machine = cfun->machine;
+
+      ASM_GENERATE_INTERNAL_LABEL (text_label_name, "L_PICLNK", label_num++);
+
+      machine->text_label_symbol =
+	gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (text_label_name));
+
+      machine->text_label_rtx =
+	gen_rtx_REG (Pmode, TILEPRO_PIC_TEXT_LABEL_REGNUM);
+
+      machine->got_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+
+      machine->calls_tls_get_addr = false;
+    }
+}
+
+
+/* Return true if X contains a thread-local symbol.  */
+static bool
+tilepro_tls_referenced_p (rtx x)
+{
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = XEXP (XEXP (x, 0), 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
+    return true;
+
+  /* That's all we handle in tilepro_legitimize_tls_address for
+     now.  */
+  return false;
+}
+
+
+/* Return true if X requires a scratch register.  It is given that
+   flag_pic is on and that X satisfies CONSTANT_P.  */
+static int
+tilepro_pic_address_needs_scratch (rtx x)
+{
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    return true;
+
+  return false;
+}
+
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for
+   which we are willing to load the value into a register via a move
+   pattern.  TLS cannot be treated as a constant because it can
+   include a function call.  */
+static bool
+tilepro_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case SYMBOL_REF:
+      return !tilepro_tls_referenced_p (x);
+
+    default:
+      return true;
+    }
+}
+
+
+/* Return true if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and that
+   X satisfies CONSTANT_P.  */
+bool
+tilepro_legitimate_pic_operand_p (rtx x)
+{
+  if (tilepro_pic_address_needs_scratch (x))
+    return false;
+
+  if (tilepro_tls_referenced_p (x))
+    return false;
+
+  return true;
+}
+
+
+/* Return true if the rtx X can be used as an address operand.  */
+static bool
+tilepro_legitimate_address_p (enum machine_mode ARG_UNUSED (mode), rtx x,
+			      bool strict)
+{
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  switch (GET_CODE (x))
+    {
+    case POST_INC:
+    case POST_DEC:
+      if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+	return false;
+
+      x = XEXP (x, 0);
+      break;
+
+    case POST_MODIFY:
+      if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+	return false;
+
+      if (GET_CODE (XEXP (x, 1)) != PLUS)
+	return false;
+
+      if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
+	return false;
+
+      if (!satisfies_constraint_I (XEXP (XEXP (x, 1), 1)))
+	return false;
+
+      x = XEXP (x, 0);
+      break;
+
+    case REG:
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Check if x is a valid reg.  */
+  if (!REG_P (x))
+    return false;
+
+  if (strict)
+    return REGNO_OK_FOR_BASE_P (REGNO (x));
+  else
+    return true;
+}
+
+
+/* Return the rtx containing SYMBOL_REF to the text label.  */
+static rtx
+tilepro_text_label_symbol (void)
+{
+  return cfun->machine->text_label_symbol;
+}
+
+
+/* Return the register storing the value of the text label.  */
+static rtx
+tilepro_text_label_rtx (void)
+{
+  return cfun->machine->text_label_rtx;
+}
+
+
+/* Return the register storing the value of the global offset
+   table.  */
+static rtx
+tilepro_got_rtx (void)
+{
+  return cfun->machine->got_rtx;
+}
+
+
+/* Return the SYMBOL_REF for _GLOBAL_OFFSET_TABLE_.  */
+static rtx
+tilepro_got_symbol (void)
+{
+  if (g_got_symbol == NULL)
+    g_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+  return g_got_symbol;
+}
+
+
+/* Return a reference to the got to be used by tls references.  */
+static rtx
+tilepro_tls_got (void)
+{
+  rtx temp;
+  if (flag_pic)
+    {
+      crtl->uses_pic_offset_table = 1;
+      return tilepro_got_rtx ();
+    }
+
+  temp = gen_reg_rtx (Pmode);
+  emit_move_insn (temp, tilepro_got_symbol ());
+
+  return temp;
+}
+
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+static rtx
+tilepro_legitimize_tls_address (rtx addr)
+{
+  rtx ret;
+
+  gcc_assert (can_create_pseudo_p ());
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (SYMBOL_REF_TLS_MODEL (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	{
+	  rtx r0, temp1, temp2, temp3, got, last;
+
+	  ret = gen_reg_rtx (Pmode);
+	  r0 = gen_rtx_REG (Pmode, 0);
+	  temp1 = gen_reg_rtx (Pmode);
+	  temp2 = gen_reg_rtx (Pmode);
+	  temp3 = gen_reg_rtx (Pmode);
+
+	  got = tilepro_tls_got ();
+	  emit_insn (gen_tls_gd_addhi (temp1, got, addr));
+	  emit_insn (gen_tls_gd_addlo (temp2, temp1, addr));
+	  emit_move_insn (r0, temp2);
+	  emit_insn (gen_tls_gd_call (addr));
+	  emit_move_insn (temp3, r0);
+	  last = emit_insn (gen_tls_gd_add (ret, temp3, addr));
+	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
+	  break;
+	}
+      case TLS_MODEL_INITIAL_EXEC:
+	{
+	  rtx temp1, temp2, temp3, got, last;
+
+	  ret = gen_reg_rtx (Pmode);
+	  temp1 = gen_reg_rtx (Pmode);
+	  temp2 = gen_reg_rtx (Pmode);
+	  temp3 = gen_reg_rtx (Pmode);
+
+	  got = tilepro_tls_got ();
+	  emit_insn (gen_tls_ie_addhi (temp1, got, addr));
+	  emit_insn (gen_tls_ie_addlo (temp2, temp1, addr));
+	  emit_insn (gen_tls_ie_load (temp3, temp2, addr));
+	  last =
+	    emit_move_insn(ret,
+			   gen_rtx_PLUS (Pmode,
+					 gen_rtx_REG (Pmode,
+						      THREAD_POINTER_REGNUM),
+					 temp3));
+	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
+	  break;
+	}
+      case TLS_MODEL_LOCAL_EXEC:
+	{
+	  rtx temp1, last;
+
+	  ret = gen_reg_rtx (Pmode);
+	  temp1 = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_tls_le_addhi (temp1,
+				       gen_rtx_REG (Pmode,
+						    THREAD_POINTER_REGNUM),
+				       addr));
+	  last = emit_insn (gen_tls_le_addlo (ret, temp1, addr));
+	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
+	  break;
+	}
+      default:
+	gcc_unreachable ();
+      }
+  else if (GET_CODE (addr) == CONST)
+    {
+      rtx base, offset;
+
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
+
+      base = tilepro_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
+      offset = XEXP (XEXP (addr, 0), 1);
+
+      base = force_operand (base, NULL_RTX);
+      ret = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
+    }
+  else
+    gcc_unreachable ();
+
+  return ret;
+}
+
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go into a reg.  This is REG if
+   nonzero, otherwise we allocate register(s) as necessary.  */
+static rtx
+tilepro_legitimize_pic_address (rtx orig,
+				enum machine_mode mode ATTRIBUTE_UNUSED,
+				rtx reg)
+{
+  if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      rtx address, pic_ref;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      if (SYMBOL_REF_LOCAL_P (orig))
+	{
+	  /* If not during reload, allocate another temp reg here for
+	     loading in the address, so that these instructions can be
+	     optimized properly.  */
+	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
+	  rtx text_label_symbol = tilepro_text_label_symbol ();
+	  rtx text_label_rtx = tilepro_text_label_rtx ();
+
+	  emit_insn (gen_addli_pcrel (temp_reg, text_label_rtx, orig,
+				      text_label_symbol));
+	  emit_insn (gen_auli_pcrel (temp_reg, temp_reg, orig,
+				     text_label_symbol));
+
+	  /* Note: this is conservative.  We use the text_label but we
+	     don't use the pic_offset_table.  However, in some cases
+	     we may need the pic_offset_table (see
+	     tilepro_fixup_pcrel_references).  */
+	  crtl->uses_pic_offset_table = 1;
+
+	  address = temp_reg;
+
+	  emit_move_insn (reg, address);
+	  return reg;
+	}
+      else
+	{
+	  /* If not during reload, allocate another temp reg here for
+	     loading in the address, so that these instructions can be
+	     optimized properly.  */
+	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
+
+	  gcc_assert (flag_pic);
+	  if (flag_pic == 1)
+	    {
+	      emit_insn (gen_add_got16 (temp_reg,
+					tilepro_got_rtx (), orig));
+	    }
+	  else
+	    {
+	      rtx temp_reg2 = create_temp_reg_if_possible (Pmode, reg);
+	      emit_insn (gen_addhi_got32 (temp_reg2,
+					  tilepro_got_rtx (), orig));
+	      emit_insn (gen_addlo_got32 (temp_reg, temp_reg2, orig));
+	    }
+
+	  address = temp_reg;
+
+	  pic_ref = gen_const_mem (Pmode, address);
+	  crtl->uses_pic_offset_table = 1;
+	  emit_move_insn (reg, pic_ref);
+	  /* The following put a REG_EQUAL note on this insn, so that
+	     it can be optimized by loop.  But it causes the label to
+	     be optimized away.  */
+	  /* set_unique_reg_note (insn, REG_EQUAL, orig); */
+	  return reg;
+	}
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == tilepro_got_rtx ())
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      base = tilepro_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode,
+					     reg);
+      offset =
+	tilepro_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+					base == reg ? 0 : reg);
+
+      if (CONST_INT_P (offset))
+	{
+	  if (can_create_pseudo_p ())
+	    offset = force_reg (Pmode, offset);
+	  else
+	    /* If we reach here, then something is seriously
+	       wrong.  */
+	    gcc_unreachable ();
+	}
+
+      if (can_create_pseudo_p ())
+	return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
+      else
+	gcc_unreachable ();
+    }
+  else if (GET_CODE (orig) == LABEL_REF)
+    {
+      rtx address, temp_reg;
+      rtx text_label_symbol;
+      rtx text_label_rtx;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      /* If not during reload, allocate another temp reg here for
+         loading in the address, so that these instructions can be
+         optimized properly.  */
+      temp_reg = create_temp_reg_if_possible (Pmode, reg);
+      text_label_symbol = tilepro_text_label_symbol ();
+      text_label_rtx = tilepro_text_label_rtx ();
+
+      emit_insn (gen_addli_pcrel (temp_reg, text_label_rtx, orig,
+				  text_label_symbol));
+      emit_insn (gen_auli_pcrel (temp_reg, temp_reg, orig,
+				 text_label_symbol));
+
+      /* Note: this is conservative.  We use the text_label but we
+         don't use the pic_offset_table.  */
+      crtl->uses_pic_offset_table = 1;
+
+      address = temp_reg;
+
+      emit_move_insn (reg, address);
+
+      return reg;
+    }
+
+  return orig;
+}
+
+
+/* Implement TARGET_LEGITIMIZE_ADDRESS.  */
+static rtx
+tilepro_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			    enum machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+      && symbolic_operand (x, Pmode) && tilepro_tls_referenced_p (x))
+    {
+      return tilepro_legitimize_tls_address (x);
+    }
+  else if (flag_pic)
+    {
+      return tilepro_legitimize_pic_address (x, mode, 0);
+    }
+  else
+    return x;
+}
+
+
+/* Implement TARGET_DELEGITIMIZE_ADDRESS.  */
+static rtx
+tilepro_delegitimize_address (rtx x)
+{
+  x = delegitimize_mem_from_attrs (x);
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
+    {
+      switch (XINT (XEXP (x, 0), 1))
+	{
+	case UNSPEC_PCREL_SYM:
+	case UNSPEC_GOT16_SYM:
+	case UNSPEC_GOT32_SYM:
+	case UNSPEC_TLS_GD:
+	case UNSPEC_TLS_IE:
+	  x = XVECEXP (XEXP (x, 0), 0, 0);
+	  break;
+	}
+    }
+
+  return x;
+}
+
+
+/* Emit code to load the PIC register.  */
+static void
+load_pic_register (bool delay_pic_helper ATTRIBUTE_UNUSED)
+{
+  int orig_flag_pic = flag_pic;
+
+  rtx got_symbol = tilepro_got_symbol ();
+  rtx text_label_symbol = tilepro_text_label_symbol ();
+  rtx text_label_rtx = tilepro_text_label_rtx ();
+  flag_pic = 0;
+
+  emit_insn (gen_insn_lnk_and_label (text_label_rtx, text_label_symbol));
+
+  emit_insn (gen_addli_pcrel (tilepro_got_rtx (),
+			      text_label_rtx, got_symbol, text_label_symbol));
+
+  emit_insn (gen_auli_pcrel (tilepro_got_rtx (),
+			     tilepro_got_rtx (),
+			     got_symbol, text_label_symbol));
+
+  flag_pic = orig_flag_pic;
+
+  /* Need to emit this whether or not we obey regdecls, since
+     setjmp/longjmp can cause life info to screw up.  ??? In the case
+     where we don't obey regdecls, this is not sufficient since we may
+     not fall out the bottom.  */
+  emit_use (tilepro_got_rtx ());
+}
+
+
+/* Return the simd variant of the constant NUM of mode MODE, by
+   replicating it to fill an interger of mode SImode.  NUM is first
+   truncated to fit in MODE.  */
+rtx
+tilepro_simd_int (rtx num, enum machine_mode mode)
+{
+  HOST_WIDE_INT n = 0;
+
+  gcc_assert (CONST_INT_P (num));
+
+  n = INTVAL (num);
+
+  switch (mode)
+    {
+    case QImode:
+      n = 0x01010101 * (n & 0x000000FF);
+      break;
+    case HImode:
+      n = 0x00010001 * (n & 0x0000FFFF);
+      break;
+    case SImode:
+      break;
+    case DImode:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_int_si (n);
+}
+
+
+/* Split one or more DImode RTL references into pairs of SImode
+   references.  The RTL can be REG, offsettable MEM, integer constant,
+   or CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL
+   to split and "num" is its length.  lo_half and hi_half are output
+   arrays that parallel "operands".  */
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuse to split volatile memory addresses,
+         but we still have to handle it.  */
+      if (MEM_P (op))
+	{
+	  lo_half[num] = adjust_address (op, SImode, 0);
+	  hi_half[num] = adjust_address (op, SImode, 4);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 0);
+	  hi_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 4);
+	}
+    }
+}
+
+
+/* Returns true iff val can be moved into a register in one
+   instruction.  And if it can, it emits the code to move the
+   constant.
+
+   If three_wide_only is true, this insists on an instruction that
+   works in a bundle containing three instructions.  */
+static bool
+expand_set_cint32_one_inst (rtx dest_reg,
+			    HOST_WIDE_INT val, bool three_wide_only)
+{
+  val = trunc_int_for_mode (val, SImode);
+
+  if (val == trunc_int_for_mode (val, QImode))
+    {
+      /* Success! */
+      emit_move_insn (dest_reg, GEN_INT (val));
+      return true;
+    }
+  else if (!three_wide_only)
+    {
+      rtx imm_op = GEN_INT (val);
+
+      if (satisfies_constraint_J (imm_op)
+	  || satisfies_constraint_K (imm_op)
+	  || satisfies_constraint_N (imm_op)
+	  || satisfies_constraint_P (imm_op))
+	{
+	  emit_move_insn (dest_reg, imm_op);
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Implement SImode rotatert.  */
+static HOST_WIDE_INT
+rotate_right (HOST_WIDE_INT n, int count)
+{
+  unsigned HOST_WIDE_INT x = n & 0xFFFFFFFF;
+  if (count == 0)
+    return x;
+  return ((x >> count) | (x << (32 - count))) & 0xFFFFFFFF;
+}
+
+
+/* Return true iff n contains exactly one contiguous sequence of 1
+   bits, possibly wrapping around from high bits to low bits.  */
+bool
+tilepro_bitfield_operand_p (HOST_WIDE_INT n, int *first_bit, int *last_bit)
+{
+  int i;
+
+  if (n == 0)
+    return false;
+
+  for (i = 0; i < 32; i++)
+    {
+      unsigned HOST_WIDE_INT x = rotate_right (n, i);
+      if (!(x & 1))
+	continue;
+
+      /* See if x is a power of two minus one, i.e. only consecutive 1
+         bits starting from bit 0.  */
+      if ((x & (x + 1)) == 0)
+	{
+	  if (first_bit != NULL)
+	    *first_bit = i;
+	  if (last_bit != NULL)
+	    *last_bit = (i + exact_log2 (x ^ (x >> 1))) & 31;
+
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Create code to move the CONST_INT value in src_val to dest_reg.  */
+static void
+expand_set_cint32 (rtx dest_reg, rtx src_val)
+{
+  HOST_WIDE_INT val;
+  int leading_zeroes, trailing_zeroes;
+  int lower, upper;
+  int three_wide_only;
+  rtx temp;
+
+  gcc_assert (CONST_INT_P (src_val));
+  val = trunc_int_for_mode (INTVAL (src_val), SImode);
+
+  /* See if we can generate the constant in one instruction.  */
+  if (expand_set_cint32_one_inst (dest_reg, val, false))
+    return;
+
+  /* Create a temporary variable to hold a partial result, to enable
+     CSE.  */
+  temp = create_temp_reg_if_possible (SImode, dest_reg);
+
+  leading_zeroes = 31 - floor_log2 (val & 0xFFFFFFFF);
+  trailing_zeroes = exact_log2 (val & -val);
+
+  lower = trunc_int_for_mode (val, HImode);
+  upper = trunc_int_for_mode ((val - lower) >> 16, HImode);
+
+  /* First try all three-wide instructions that generate a constant
+     (i.e. movei) followed by various shifts and rotates. If none of
+     those work, try various two-wide ways of generating a constant
+     followed by various shifts and rotates.  */
+  for (three_wide_only = 1; three_wide_only >= 0; three_wide_only--)
+    {
+      int count;
+
+      if (expand_set_cint32_one_inst (temp, val >> trailing_zeroes,
+				      three_wide_only))
+	{
+	  /* 0xFFFFA500 becomes:
+	     movei temp, 0xFFFFFFA5
+	     shli dest, temp, 8  */
+	  emit_move_insn (dest_reg,
+			  gen_rtx_ASHIFT (SImode, temp,
+					  GEN_INT (trailing_zeroes)));
+	  return;
+	}
+
+      if (expand_set_cint32_one_inst (temp, val << leading_zeroes,
+				      three_wide_only))
+	{
+	  /* 0x7FFFFFFF becomes:
+	     movei temp, -2
+	     shri dest, temp, 1  */
+	  emit_move_insn (dest_reg,
+			  gen_rtx_LSHIFTRT (SImode, temp,
+					    GEN_INT (leading_zeroes)));
+	  return;
+	}
+
+      /* Try rotating a one-instruction immediate, since rotate is
+         3-wide.  */
+      for (count = 1; count < 32; count++)
+	{
+	  HOST_WIDE_INT r = rotate_right (val, count);
+	  if (expand_set_cint32_one_inst (temp, r, three_wide_only))
+	    {
+	      /* 0xFFA5FFFF becomes:
+	         movei temp, 0xFFFFFFA5
+	         rli dest, temp, 16  */
+	      emit_move_insn (dest_reg,
+			      gen_rtx_ROTATE (SImode, temp, GEN_INT (count)));
+	      return;
+	    }
+	}
+
+      if (lower == trunc_int_for_mode (lower, QImode))
+	{
+	  /* We failed to use two 3-wide instructions, but the low 16
+	     bits are a small number so just use a 2-wide + 3-wide
+	     auli + addi pair rather than anything more exotic.
+
+	     0x12340056 becomes:
+	     auli temp, zero, 0x1234
+	     addi dest, temp, 0x56  */
+	  break;
+	}
+    }
+
+  /* Fallback case: use a auli + addli/addi pair.  */
+  emit_move_insn (temp, GEN_INT (upper << 16));
+  emit_move_insn (dest_reg, (gen_rtx_PLUS (SImode, temp, GEN_INT (lower))));
+}
+
+
+/* Load OP1, a 32-bit constant, into OP0, a register.  We know it
+   can't be done in one insn when we get here, the move expander
+   guarantees this.  */
+void
+tilepro_expand_set_const32 (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx temp;
+
+  if (CONST_INT_P (op1))
+    {
+      /* TODO: I don't know if we want to split large constants now,
+         or wait until later (with a define_split).
+
+         Does splitting early help CSE?  Does it harm other
+         optimizations that might fold loads? */
+      expand_set_cint32 (op0, op1);
+    }
+  else
+    {
+      temp = create_temp_reg_if_possible (mode, op0);
+
+      /* A symbol, emit in the traditional way.  */
+      emit_move_insn (temp, gen_rtx_HIGH (mode, op1));
+      emit_move_insn (op0, gen_rtx_LO_SUM (mode, temp, op1));
+    }
+}
+
+
+/* Expand a move instruction.  Return true if all work is done.  */
+bool
+tilepro_expand_mov (enum machine_mode mode, rtx *operands)
+{
+  /* Handle sets of MEM first.  */
+  if (MEM_P (operands[0]))
+    {
+      if (can_create_pseudo_p ())
+	operands[0] = validize_mem (operands[0]);
+
+      if (reg_or_0_operand (operands[1], mode))
+	return false;
+
+      if (!reload_in_progress)
+	operands[1] = force_reg (mode, operands[1]);
+    }
+
+  /* Fixup TLS cases.  */
+  if (CONSTANT_P (operands[1]) && tilepro_tls_referenced_p (operands[1]))
+    {
+      operands[1] = tilepro_legitimize_tls_address (operands[1]);
+      return false;
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic && CONSTANT_P (operands[1]))
+    {
+      if (tilepro_pic_address_needs_scratch (operands[1]))
+	operands[1] = tilepro_legitimize_pic_address (operands[1], mode, 0);
+
+      if (symbolic_operand (operands[1], mode))
+	{
+	  operands[1] = tilepro_legitimize_pic_address (operands[1],
+							mode,
+							(reload_in_progress ?
+							 operands[0] :
+							 NULL_RTX));
+	  return false;
+	}
+    }
+
+  /* Fixup for UNSPEC addresses.  */
+  if (flag_pic
+      && GET_CODE (operands[1]) == HIGH
+      && GET_CODE (XEXP (operands[1], 0)) == CONST
+      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == UNSPEC)
+    {
+      rtx unspec = XEXP (XEXP (operands[1], 0), 0);
+      int unspec_num = XINT (unspec, 1);
+      if (unspec_num == UNSPEC_PCREL_SYM)
+	{
+	  emit_insn (gen_auli_pcrel (operands[0], const0_rtx,
+				     XVECEXP (unspec, 0, 0),
+				     XVECEXP (unspec, 0, 1)));
+	  return true;
+	}
+      else if (flag_pic == 2 && unspec_num == UNSPEC_GOT32_SYM)
+	{
+	  emit_insn (gen_addhi_got32 (operands[0], const0_rtx,
+				      XVECEXP (unspec, 0, 0)));
+	  return true;
+	}
+      else if (HAVE_AS_TLS && unspec_num == UNSPEC_TLS_GD)
+	{
+	  emit_insn (gen_tls_gd_addhi (operands[0], const0_rtx,
+				       XVECEXP (unspec, 0, 0)));
+	  return true;
+	}
+      else if (HAVE_AS_TLS && unspec_num == UNSPEC_TLS_IE)
+	{
+	  emit_insn (gen_tls_ie_addhi (operands[0], const0_rtx,
+				       XVECEXP (unspec, 0, 0)));
+	  return true;
+	}
+      else if (HAVE_AS_TLS && unspec_num == UNSPEC_TLS_LE)
+	{
+	  emit_insn (gen_tls_le_addhi (operands[0], const0_rtx,
+				       XVECEXP (unspec, 0, 0)));
+	  return true;
+	}
+    }
+
+  /* Accept non-constants and valid constants unmodified.  */
+  if (!CONSTANT_P (operands[1])
+      || GET_CODE (operands[1]) == HIGH || move_operand (operands[1], mode))
+    return false;
+
+  /* Split large integers.  */
+  if (GET_MODE_SIZE (mode) <= 4)
+    {
+      tilepro_expand_set_const32 (operands[0], operands[1]);
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Expand the "insv" pattern.  */
+void
+tilepro_expand_insv (rtx operands[4])
+{
+  rtx first_rtx = operands[2];
+  HOST_WIDE_INT first = INTVAL (first_rtx);
+  HOST_WIDE_INT width = INTVAL (operands[1]);
+  rtx v = operands[3];
+
+  /* Shift the inserted bits into position.  */
+  if (first != 0)
+    {
+      if (CONST_INT_P (v))
+	{
+	  /* Shift the constant into mm position.  */
+	  v = gen_int_si (INTVAL (v) << first);
+	}
+      else
+	{
+	  /* Shift over the value to be inserted.  */
+	  rtx tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_ashlsi3 (tmp, v, first_rtx));
+	  v = tmp;
+	}
+    }
+
+  /* Insert the shifted bits using an 'mm' insn.  */
+  emit_insn (gen_insn_mm (operands[0], v, operands[0], first_rtx,
+			  GEN_INT (first + width - 1)));
+}
+
+
+/* Expand unaligned loads.  */
+void
+tilepro_expand_unaligned_load (rtx dest_reg, rtx mem, HOST_WIDE_INT bitsize,
+			       HOST_WIDE_INT bit_offset, bool sign)
+{
+  enum machine_mode mode;
+  rtx addr_lo, addr_hi;
+  rtx mem_lo, mem_hi, hi;
+  rtx mema, wide_result;
+  int last_byte_offset;
+  HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
+
+  mode = GET_MODE (dest_reg);
+
+  hi = gen_reg_rtx (mode);
+
+  if (bitsize == 2 * BITS_PER_UNIT && (bit_offset % BITS_PER_UNIT) == 0)
+    {
+      rtx lo;
+
+      /* When just loading a two byte value, we can load the two bytes
+         individually and combine them efficiently.  */
+
+      mem_lo = adjust_address (mem, QImode, byte_offset);
+      mem_hi = adjust_address (mem, QImode, byte_offset + 1);
+
+      lo = gen_reg_rtx (mode);
+      emit_insn (gen_zero_extendqisi2 (lo, mem_lo));
+
+      if (sign)
+	{
+	  rtx tmp = gen_reg_rtx (mode);
+
+	  /* Do a signed load of the second byte then shift and OR it
+	     in.  */
+	  emit_insn (gen_extendqisi2 (gen_lowpart (SImode, hi), mem_hi));
+	  emit_insn (gen_ashlsi3 (gen_lowpart (SImode, tmp),
+				  gen_lowpart (SImode, hi), GEN_INT (8)));
+	  emit_insn (gen_iorsi3 (gen_lowpart (SImode, dest_reg),
+				 gen_lowpart (SImode, lo),
+				 gen_lowpart (SImode, tmp)));
+	}
+      else
+	{
+	  /* Do two unsigned loads and use intlb to interleave
+	     them.  */
+	  emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, hi), mem_hi));
+	  emit_insn (gen_insn_intlb (gen_lowpart (SImode, dest_reg),
+				     gen_lowpart (SImode, hi),
+				     gen_lowpart (SImode, lo)));
+	}
+
+      return;
+    }
+
+  mema = XEXP (mem, 0);
+
+  /* AND addresses cannot be in any alias set, since they may
+     implicitly alias surrounding code.  Ideally we'd have some alias
+     set that covered all types except those with alignment 8 or
+     higher.  */
+  addr_lo = force_reg (Pmode, plus_constant (Pmode, mema, byte_offset));
+  mem_lo = change_address (mem, mode,
+			   gen_rtx_AND (Pmode, addr_lo, GEN_INT (-4)));
+  set_mem_alias_set (mem_lo, 0);
+
+  /* Load the high word at an address that will not fault if the low
+     address is aligned and at the very end of a page.  */
+  last_byte_offset = (bit_offset + bitsize - 1) / BITS_PER_UNIT;
+  addr_hi = force_reg (Pmode, plus_constant (Pmode, mema, last_byte_offset));
+  mem_hi = change_address (mem, mode,
+			   gen_rtx_AND (Pmode, addr_hi, GEN_INT (-4)));
+  set_mem_alias_set (mem_hi, 0);
+
+  if (bitsize == 32)
+    {
+      addr_lo = make_safe_from (addr_lo, dest_reg);
+      wide_result = dest_reg;
+    }
+  else
+    {
+      wide_result = gen_reg_rtx (mode);
+    }
+
+  /* Load hi first in case dest_reg is used in mema.  */
+  emit_move_insn (hi, mem_hi);
+  emit_move_insn (wide_result, mem_lo);
+
+  emit_insn (gen_insn_dword_align (gen_lowpart (SImode, wide_result),
+				   gen_lowpart (SImode, wide_result),
+				   gen_lowpart (SImode, hi), addr_lo));
+
+  if (bitsize != 32)
+    {
+      rtx extracted =
+	extract_bit_field (gen_lowpart (SImode, wide_result),
+			   bitsize, bit_offset % BITS_PER_UNIT,
+			   !sign, gen_lowpart (SImode, dest_reg),
+			   SImode, SImode);
+
+      if (extracted != dest_reg)
+	emit_move_insn (dest_reg, gen_lowpart (SImode, extracted));
+    }
+}
+
+
+/* Expand unaligned stores.  */
+static void
+tilepro_expand_unaligned_store (rtx mem, rtx src, HOST_WIDE_INT bitsize,
+				HOST_WIDE_INT bit_offset)
+{
+  HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
+  HOST_WIDE_INT bytesize = bitsize / BITS_PER_UNIT;
+  HOST_WIDE_INT shift_amt;
+  HOST_WIDE_INT i;
+  rtx mem_addr;
+  rtx store_val;
+
+  for (i = 0, shift_amt = 0; i < bytesize; i++, shift_amt += BITS_PER_UNIT)
+    {
+      mem_addr = adjust_address (mem, QImode, byte_offset + i);
+
+      if (shift_amt)
+	{
+	  store_val = expand_simple_binop (SImode, LSHIFTRT,
+					   gen_lowpart (SImode, src),
+					   GEN_INT (shift_amt), NULL, 1,
+					   OPTAB_LIB_WIDEN);
+	  store_val = gen_lowpart (QImode, store_val);
+	}
+      else
+	{
+	  store_val = gen_lowpart (QImode, src);
+	}
+
+      emit_move_insn (mem_addr, store_val);
+    }
+}
+
+
+/* Implement the movmisalign patterns.  One of the operands is a
+   memory that is not naturally aligned.  Emit instructions to load
+   it.  */
+void
+tilepro_expand_movmisalign (enum machine_mode mode, rtx *operands)
+{
+  if (MEM_P (operands[1]))
+    {
+      rtx tmp;
+
+      if (register_operand (operands[0], mode))
+	tmp = operands[0];
+      else
+	tmp = gen_reg_rtx (mode);
+
+      tilepro_expand_unaligned_load (tmp, operands[1],
+				     GET_MODE_BITSIZE (mode), 0, true);
+
+      if (tmp != operands[0])
+	emit_move_insn (operands[0], tmp);
+    }
+  else if (MEM_P (operands[0]))
+    {
+      if (!reg_or_0_operand (operands[1], mode))
+	operands[1] = force_reg (mode, operands[1]);
+
+      tilepro_expand_unaligned_store (operands[0], operands[1],
+				      GET_MODE_BITSIZE (mode), 0);
+    }
+  else
+    gcc_unreachable ();
+}
+
+
+/* Implement the addsi3 pattern.  */
+bool
+tilepro_expand_addsi (rtx op0, rtx op1, rtx op2)
+{
+  rtx temp;
+  HOST_WIDE_INT n;
+  HOST_WIDE_INT high;
+
+  /* Skip anything that only takes one instruction.  */
+  if (add_operand (op2, SImode))
+    return false;
+
+  /* We can only optimize ints here (it should be impossible to get
+     here with any other type, but it is harmless to check.  */
+  if (!CONST_INT_P (op2))
+    return false;
+
+  temp = create_temp_reg_if_possible (SImode, op0);
+  n = INTVAL (op2);
+  high = (n + (n & 0x8000)) & ~0xffff;
+
+  emit_move_insn (temp, gen_rtx_PLUS (SImode, op1, gen_int_si (high)));
+  emit_move_insn (op0, gen_rtx_PLUS (SImode, temp, gen_int_si (n - high)));
+
+  return true;
+}
+
+
+/* Implement the allocate_stack pattern (alloca).  */
+void
+tilepro_allocate_stack (rtx op0, rtx op1)
+{
+  /* Technically the correct way to initialize chain_loc is with
+   * gen_frame_mem() instead of gen_rtx_MEM(), but gen_frame_mem()
+   * sets the alias_set to that of a frame reference.  Some of our
+   * tests rely on some unsafe assumption about when the chaining
+   * update is done, we need to be conservative about reordering the
+   * chaining instructions.
+   */
+  rtx fp_addr = gen_reg_rtx (Pmode);
+  rtx fp_value = gen_reg_rtx (Pmode);
+  rtx fp_loc;
+
+  emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					 GEN_INT (UNITS_PER_WORD)));
+
+  fp_loc = gen_frame_mem (Pmode, fp_addr);
+
+  emit_move_insn (fp_value, fp_loc);
+
+  op1 = force_reg (Pmode, op1);
+
+  emit_move_insn (stack_pointer_rtx,
+		  gen_rtx_MINUS (Pmode, stack_pointer_rtx, op1));
+
+  emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					 GEN_INT (UNITS_PER_WORD)));
+
+  fp_loc = gen_frame_mem (Pmode, fp_addr);
+
+  emit_move_insn (fp_loc, fp_value);
+
+  emit_move_insn (op0, virtual_stack_dynamic_rtx);
+}
+
+
+
+/* Multiplies */
+
+/* Returns the insn_code in ENTRY.  */
+static enum insn_code
+tilepro_multiply_get_opcode (const struct tilepro_multiply_insn_seq_entry
+			     *entry)
+{
+  return tilepro_multiply_insn_seq_decode_opcode[entry->compressed_opcode];
+}
+
+
+/* Returns the length of the 'op' array.  */
+static int
+tilepro_multiply_get_num_ops (const struct tilepro_multiply_insn_seq *seq)
+{
+  /* The array either uses all of its allocated slots or is terminated
+     by a bogus opcode. Either way, the array size is the index of the
+     last valid opcode plus one.  */
+  int i;
+  for (i = tilepro_multiply_insn_seq_MAX_OPERATIONS - 1; i >= 0; i--)
+    if (tilepro_multiply_get_opcode (&seq->op[i]) != CODE_FOR_nothing)
+      return i + 1;
+
+  /* An empty array is not allowed.  */
+  gcc_unreachable ();
+}
+
+
+/* We precompute a number of expression trees for multiplying by
+   constants.  This generates code for such an expression tree by
+   walking through the nodes in the tree (which are conveniently
+   pre-linearized) and emitting an instruction for each one.  */
+static void
+tilepro_expand_constant_multiply_given_sequence (rtx result, rtx src,
+						 const struct
+						 tilepro_multiply_insn_seq
+						 *seq)
+{
+  int i;
+  int num_ops;
+
+  /* Keep track of the subexpressions computed so far, so later
+     instructions can refer to them.  We seed the array with zero and
+     the value being multiplied.  */
+  int num_subexprs = 2;
+  rtx subexprs[tilepro_multiply_insn_seq_MAX_OPERATIONS + 2];
+  subexprs[0] = const0_rtx;
+  subexprs[1] = src;
+
+  /* Determine how many instructions we are going to generate.  */
+  num_ops = tilepro_multiply_get_num_ops (seq);
+  gcc_assert (num_ops > 0
+	      && num_ops <= tilepro_multiply_insn_seq_MAX_OPERATIONS);
+
+  for (i = 0; i < num_ops; i++)
+    {
+      const struct tilepro_multiply_insn_seq_entry *entry = &seq->op[i];
+
+      /* Figure out where to store the output of this instruction.  */
+      const bool is_last_op = (i + 1 == num_ops);
+      rtx out = is_last_op ? result : gen_reg_rtx (SImode);
+
+      enum insn_code opcode = tilepro_multiply_get_opcode (entry);
+      if (opcode == CODE_FOR_ashlsi3)
+	{
+	  /* Handle shift by immediate. This is a special case because
+	     the meaning of the second operand is a constant shift
+	     count rather than an operand index.  */
+
+	  /* Make sure the shift count is in range. Zero should not
+	     happen.  */
+	  const int shift_count = entry->rhs;
+	  gcc_assert (shift_count > 0 && shift_count < 32);
+
+	  /* Emit the actual instruction.  */
+	  emit_insn (GEN_FCN (opcode)
+		     (out, subexprs[entry->lhs],
+		      gen_rtx_CONST_INT (SImode, shift_count)));
+	}
+      else
+	{
+	  /* Handle a normal two-operand instruction, such as add or
+	     s1a.  */
+
+	  /* Make sure we are referring to a previously computed
+	     subexpression.  */
+	  gcc_assert (entry->rhs < num_subexprs);
+
+	  /* Emit the actual instruction.  */
+	  emit_insn (GEN_FCN (opcode)
+		     (out, subexprs[entry->lhs], subexprs[entry->rhs]));
+	}
+
+      /* Record this subexpression for use by later expressions.  */
+      subexprs[num_subexprs++] = out;
+    }
+}
+
+
+/* bsearch helper function.  */
+static int
+tilepro_compare_multipliers (const void *key, const void *t)
+{
+  return *(const int *) key -
+    ((const struct tilepro_multiply_insn_seq *) t)->multiplier;
+}
+
+
+/* Returns the tilepro_multiply_insn_seq for multiplier, or NULL if
+   none exists.  */
+static const struct tilepro_multiply_insn_seq *
+tilepro_find_multiply_insn_seq_for_constant (int multiplier)
+{
+  return ((const struct tilepro_multiply_insn_seq *)
+	  bsearch (&multiplier, tilepro_multiply_insn_seq_table,
+		   tilepro_multiply_insn_seq_table_size,
+		   sizeof tilepro_multiply_insn_seq_table[0],
+		   tilepro_compare_multipliers));
+}
+
+
+/* Try to a expand constant multiply in SImode by looking it up in a
+   precompiled table.  OP0 is the result operand, OP1 is the source
+   operand, and MULTIPLIER is the value of the constant.  Return true
+   if it succeeds.  */
+static bool
+tilepro_expand_const_mulsi (rtx op0, rtx op1, int multiplier)
+{
+  /* See if we have precomputed an efficient way to multiply by this
+     constant.  */
+  const struct tilepro_multiply_insn_seq *seq =
+    tilepro_find_multiply_insn_seq_for_constant (multiplier);
+  if (seq != NULL)
+    {
+      tilepro_expand_constant_multiply_given_sequence (op0, op1, seq);
+      return true;
+    }
+  else
+    return false;
+}
+
+
+/* Expand the mulsi pattern.  */
+bool
+tilepro_expand_mulsi (rtx op0, rtx op1, rtx op2)
+{
+  if (CONST_INT_P (op2))
+    {
+      HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op2), SImode);
+      return tilepro_expand_const_mulsi (op0, op1, n);
+    }
+  return false;
+}
+
+
+/* Expand a high multiply pattern in SImode.  RESULT, OP1, OP2 are the
+   operands, and SIGN is true if it's a signed multiply, and false if
+   it's an unsigned multiply.  */
+static void
+tilepro_expand_high_multiply (rtx result, rtx op1, rtx op2, bool sign)
+{
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+  rtx tmp4 = gen_reg_rtx (SImode);
+  rtx tmp5 = gen_reg_rtx (SImode);
+  rtx tmp6 = gen_reg_rtx (SImode);
+  rtx tmp7 = gen_reg_rtx (SImode);
+  rtx tmp8 = gen_reg_rtx (SImode);
+  rtx tmp9 = gen_reg_rtx (SImode);
+  rtx tmp10 = gen_reg_rtx (SImode);
+  rtx tmp11 = gen_reg_rtx (SImode);
+  rtx tmp12 = gen_reg_rtx (SImode);
+  rtx tmp13 = gen_reg_rtx (SImode);
+  rtx result_lo = gen_reg_rtx (SImode);
+
+  if (sign)
+    {
+      emit_insn (gen_insn_mulhl_su (tmp0, op1, op2));
+      emit_insn (gen_insn_mulhl_su (tmp1, op2, op1));
+      emit_insn (gen_insn_mulll_uu (tmp2, op1, op2));
+      emit_insn (gen_insn_mulhh_ss (tmp3, op1, op2));
+    }
+  else
+    {
+      emit_insn (gen_insn_mulhl_uu (tmp0, op1, op2));
+      emit_insn (gen_insn_mulhl_uu (tmp1, op2, op1));
+      emit_insn (gen_insn_mulll_uu (tmp2, op1, op2));
+      emit_insn (gen_insn_mulhh_uu (tmp3, op1, op2));
+    }
+
+  emit_move_insn (tmp4, (gen_rtx_ASHIFT (SImode, tmp0, GEN_INT (16))));
+
+  emit_move_insn (tmp5, (gen_rtx_ASHIFT (SImode, tmp1, GEN_INT (16))));
+
+  emit_move_insn (tmp6, (gen_rtx_PLUS (SImode, tmp4, tmp5)));
+  emit_move_insn (result_lo, (gen_rtx_PLUS (SImode, tmp2, tmp6)));
+
+  emit_move_insn (tmp7, gen_rtx_LTU (SImode, tmp6, tmp4));
+  emit_move_insn (tmp8, gen_rtx_LTU (SImode, result_lo, tmp2));
+
+  if (sign)
+    {
+      emit_move_insn (tmp9, (gen_rtx_ASHIFTRT (SImode, tmp0, GEN_INT (16))));
+      emit_move_insn (tmp10, (gen_rtx_ASHIFTRT (SImode, tmp1, GEN_INT (16))));
+    }
+  else
+    {
+      emit_move_insn (tmp9, (gen_rtx_LSHIFTRT (SImode, tmp0, GEN_INT (16))));
+      emit_move_insn (tmp10, (gen_rtx_LSHIFTRT (SImode, tmp1, GEN_INT (16))));
+    }
+
+  emit_move_insn (tmp11, (gen_rtx_PLUS (SImode, tmp3, tmp7)));
+  emit_move_insn (tmp12, (gen_rtx_PLUS (SImode, tmp8, tmp9)));
+  emit_move_insn (tmp13, (gen_rtx_PLUS (SImode, tmp11, tmp12)));
+  emit_move_insn (result, (gen_rtx_PLUS (SImode, tmp13, tmp10)));
+}
+
+
+/* Implement smulsi3_highpart.  */
+void
+tilepro_expand_smulsi3_highpart (rtx op0, rtx op1, rtx op2)
+{
+  tilepro_expand_high_multiply (op0, op1, op2, true);
+}
+
+
+/* Implement umulsi3_highpart.  */
+void
+tilepro_expand_umulsi3_highpart (rtx op0, rtx op1, rtx op2)
+{
+  tilepro_expand_high_multiply (op0, op1, op2, false);
+}
+
+
+
+/* Compare and branches  */
+
+/* Helper function to handle DImode for tilepro_emit_setcc_internal.  */
+static bool
+tilepro_emit_setcc_internal_di (rtx res, enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx operands[2], lo_half[2], hi_half[2];
+  rtx tmp, tmp0, tmp1, tmp2;
+  bool swap = false;
+
+  /* Reduce the number of cases we need to handle by reversing the
+     operands.  */
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      /* We handle these compares directly.  */
+      break;
+
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* Reverse the operands.  */
+      swap = true;
+      break;
+
+    default:
+      /* We should not have called this with any other code.  */
+      gcc_unreachable ();
+    }
+
+  if (swap)
+    {
+      code = swap_condition (code);
+      tmp = op0, op0 = op1, op1 = tmp;
+    }
+
+  operands[0] = op0;
+  operands[1] = op1;
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  if (!reg_or_0_operand (lo_half[0], SImode))
+    lo_half[0] = force_reg (SImode, lo_half[0]);
+
+  if (!reg_or_0_operand (hi_half[0], SImode))
+    hi_half[0] = force_reg (SImode, hi_half[0]);
+
+  if (!CONST_INT_P (lo_half[1]) && !register_operand (lo_half[1], SImode))
+    lo_half[1] = force_reg (SImode, lo_half[1]);
+
+  if (!CONST_INT_P (hi_half[1]) && !register_operand (hi_half[1], SImode))
+    hi_half[1] = force_reg (SImode, hi_half[1]);
+
+  tmp0 = gen_reg_rtx (SImode);
+  tmp1 = gen_reg_rtx (SImode);
+  tmp2 = gen_reg_rtx (SImode);
+
+  switch (code)
+    {
+    case EQ:
+      emit_insn (gen_insn_seq (tmp0, lo_half[0], lo_half[1]));
+      emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
+      emit_insn (gen_andsi3 (res, tmp0, tmp1));
+      return true;
+      break;
+    case NE:
+      emit_insn (gen_insn_sne (tmp0, lo_half[0], lo_half[1]));
+      emit_insn (gen_insn_sne (tmp1, hi_half[0], hi_half[1]));
+      emit_insn (gen_iorsi3 (res, tmp0, tmp1));
+      return true;
+      break;
+    case LE:
+      emit_insn (gen_insn_slte (tmp0, hi_half[0], hi_half[1]));
+      emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
+      emit_insn (gen_insn_slte_u (tmp2, lo_half[0], lo_half[1]));
+      emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
+      return true;
+    case LT:
+      if (operands[1] == const0_rtx)
+	{
+	  emit_insn (gen_lshrsi3 (res, hi_half[0], GEN_INT (31)));
+	  return true;
+	}
+      else
+	{
+	  emit_insn (gen_insn_slt (tmp0, hi_half[0], hi_half[1]));
+	  emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
+	  emit_insn (gen_insn_slt_u (tmp2, lo_half[0], lo_half[1]));
+	  emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
+	}
+      return true;
+    case LEU:
+      emit_insn (gen_insn_slte_u (tmp0, hi_half[0], hi_half[1]));
+      emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
+      emit_insn (gen_insn_slte_u (tmp2, lo_half[0], lo_half[1]));
+      emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
+      return true;
+    case LTU:
+      emit_insn (gen_insn_slt_u (tmp0, hi_half[0], hi_half[1]));
+      emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
+      emit_insn (gen_insn_slt_u (tmp2, lo_half[0], lo_half[1]));
+      emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
+      return true;
+    default:
+      gcc_unreachable ();
+    }
+
+  return false;
+}
+
+
+/* Certain simplifications can be done to make invalid setcc
+   operations valid.  Return the final comparison, or NULL if we can't
+   work.  */
+static bool
+tilepro_emit_setcc_internal (rtx res, enum rtx_code code, rtx op0, rtx op1,
+			     enum machine_mode cmp_mode)
+{
+  rtx tmp;
+  bool swap = false;
+
+  if (cmp_mode == DImode)
+    {
+      return tilepro_emit_setcc_internal_di (res, code, op0, op1);
+    }
+
+  /* The general case: fold the comparison code to the types of
+     compares that we have, choosing the branch as necessary.  */
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      /* We have these compares.  */
+      break;
+
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* We do not have these compares, so we reverse the
+         operands.  */
+      swap = true;
+      break;
+
+    default:
+      /* We should not have called this with any other code.  */
+      gcc_unreachable ();
+    }
+
+  if (swap)
+    {
+      code = swap_condition (code);
+      tmp = op0, op0 = op1, op1 = tmp;
+    }
+
+  if (!reg_or_0_operand (op0, SImode))
+    op0 = force_reg (SImode, op0);
+
+  if (!CONST_INT_P (op1) && !register_operand (op1, SImode))
+    op1 = force_reg (SImode, op1);
+
+  /* Return the setcc comparison.  */
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_fmt_ee (code, SImode, op0, op1)));
+
+  return true;
+}
+
+
+/* Implement cstore patterns.  */
+bool
+tilepro_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
+{
+  return
+    tilepro_emit_setcc_internal (operands[0], GET_CODE (operands[1]),
+				 operands[2], operands[3], cmp_mode);
+}
+
+
+/* Return whether CODE is a signed comparison.  */
+static bool
+signed_compare_p (enum rtx_code code)
+{
+  return (code == EQ || code == NE || code == LT || code == LE
+	  || code == GT || code == GE);
+}
+
+
+/* Generate the comparison for an SImode conditional branch.  */
+static rtx
+tilepro_emit_cc_test (enum rtx_code code, rtx op0, rtx op1,
+		      enum machine_mode cmp_mode, bool eq_ne_only)
+{
+  enum rtx_code branch_code;
+  rtx temp;
+
+  /* Check for a compare against zero using a comparison we can do
+     directly.  */
+  if (cmp_mode != DImode
+      && op1 == const0_rtx
+      && (code == EQ || code == NE
+	  || (!eq_ne_only && signed_compare_p (code))))
+    {
+      op0 = force_reg (SImode, op0);
+      return gen_rtx_fmt_ee (code, VOIDmode, op0, const0_rtx);
+    }
+
+  /* The general case: fold the comparison code to the types of
+     compares that we have, choosing the branch as necessary.  */
+  switch (code)
+    {
+    case EQ:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      /* We have these compares.  */
+      branch_code = NE;
+      break;
+
+    case NE:
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* These must be reversed (except NE, but let's
+         canonicalize).  */
+      code = reverse_condition (code);
+      branch_code = EQ;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode != DImode
+      && CONST_INT_P (op1) && (!satisfies_constraint_I (op1) || code == LEU))
+    {
+      HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op1), SImode);
+
+      switch (code)
+	{
+	case EQ:
+	  /* Subtract off the value we want to compare against and see
+	     if we get zero.  This is cheaper than creating a constant
+	     in a register. Except that subtracting -128 is more
+	     expensive than seqi to -128, so we leave that alone.  */
+	  /* ??? Don't do this when comparing against symbols,
+	     otherwise we'll reduce (&x == 0x1234) to (&x-0x1234 ==
+	     0), which will be declared false out of hand (at least
+	     for non-weak).  */
+	  if (!(symbolic_operand (op0, VOIDmode)
+		|| (REG_P (op0) && REG_POINTER (op0))))
+	    {
+	      /* To compare against MIN_INT, we add MIN_INT and check
+	         for 0.  */
+	      HOST_WIDE_INT add;
+	      if (n != -2147483647 - 1)
+		add = -n;
+	      else
+		add = n;
+
+	      op0 = force_reg (SImode, op0);
+	      temp = gen_reg_rtx (SImode);
+	      emit_insn (gen_addsi3 (temp, op0, gen_int_si (add)));
+	      return gen_rtx_fmt_ee (reverse_condition (branch_code),
+				     VOIDmode, temp, const0_rtx);
+	    }
+	  break;
+
+	case LEU:
+	  if (n == -1)
+	    break;
+	  /* FALLTHRU */
+
+	case LTU:
+	  /* Change ((unsigned)x < 0x1000) into !((unsigned)x >> 12),
+	     etc.  */
+	  {
+	    int first = exact_log2 (code == LTU ? n : n + 1);
+	    if (first != -1)
+	      {
+		op0 = force_reg (SImode, op0);
+		temp = gen_reg_rtx (SImode);
+		emit_move_insn (temp,
+				gen_rtx_LSHIFTRT (SImode, op0,
+						  gen_int_si (first)));
+		return gen_rtx_fmt_ee (reverse_condition (branch_code),
+				       VOIDmode, temp, const0_rtx);
+	      }
+	  }
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  /* Compute a flag saying whether we should branch.  */
+  temp = gen_reg_rtx (SImode);
+  tilepro_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
+
+  /* Return the branch comparison.  */
+  return gen_rtx_fmt_ee (branch_code, VOIDmode, temp, const0_rtx);
+}
+
+
+/* Generate the comparison for a conditional branch.  */
+void
+tilepro_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
+{
+  rtx cmp_rtx =
+    tilepro_emit_cc_test (GET_CODE (operands[0]), operands[1], operands[2],
+			  cmp_mode, false);
+  rtx branch_rtx = gen_rtx_SET (VOIDmode, pc_rtx,
+				gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+						      gen_rtx_LABEL_REF
+						      (VOIDmode,
+						       operands[3]),
+						      pc_rtx));
+  emit_jump_insn (branch_rtx);
+}
+
+
+/* Implement the movsicc pattern.  */
+rtx
+tilepro_emit_conditional_move (rtx cmp)
+{
+  return
+    tilepro_emit_cc_test (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1),
+			  GET_MODE (XEXP (cmp, 0)), true);
+}
+
+
+/* Return true if INSN is annotated with a REG_BR_PROB note that
+   indicates it's a branch that's predicted taken.  */
+static bool
+cbranch_predicted_p (rtx insn)
+{
+  rtx x = find_reg_note (insn, REG_BR_PROB, 0);
+
+  if (x)
+    {
+      int pred_val = XINT (x, 0);
+
+      return pred_val >= REG_BR_PROB_BASE / 2;
+    }
+
+  return false;
+}
+
+
+/* Output assembly code for a specific branch instruction, appending
+   the branch prediction flag to the opcode if appropriate.  */
+static const char *
+tilepro_output_simple_cbranch_with_opcode (rtx insn, const char *opcode,
+					   int regop, bool netreg_p,
+					   bool reverse_predicted)
+{
+  static char buf[64];
+  sprintf (buf, "%s%s\t%%%c%d, %%l0", opcode,
+	   (cbranch_predicted_p (insn) ^ reverse_predicted) ? "t" : "",
+	   netreg_p ? 'N' : 'r', regop);
+  return buf;
+}
+
+
+/* Output assembly code for a specific branch instruction, appending
+   the branch prediction flag to the opcode if appropriate.  */
+const char *
+tilepro_output_cbranch_with_opcode (rtx insn, rtx *operands,
+				    const char *opcode,
+				    const char *rev_opcode,
+				    int regop, bool netreg_p)
+{
+  const char *branch_if_false;
+  rtx taken, not_taken;
+  bool is_simple_branch;
+
+  gcc_assert (LABEL_P (operands[0]));
+
+  is_simple_branch = true;
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      int from_addr = INSN_ADDRESSES (INSN_UID (insn));
+      int to_addr = INSN_ADDRESSES (INSN_UID (operands[0]));
+      int delta = to_addr - from_addr;
+      is_simple_branch = IN_RANGE (delta, -524288, 524280);
+    }
+
+  if (is_simple_branch)
+    {
+      /* Just a simple conditional branch.  */
+      return
+	tilepro_output_simple_cbranch_with_opcode (insn, opcode, regop,
+						   netreg_p, false);
+    }
+
+  /* Generate a reversed branch around a direct jump.  This fallback
+     does not use branch-likely instructions.  */
+  not_taken = gen_label_rtx ();
+  taken = operands[0];
+
+  /* Generate the reversed branch to NOT_TAKEN.  */
+  operands[0] = not_taken;
+  branch_if_false =
+    tilepro_output_simple_cbranch_with_opcode (insn, rev_opcode, regop,
+					       netreg_p, true);
+  output_asm_insn (branch_if_false, operands);
+
+  output_asm_insn ("j\t%l0", &taken);
+
+  /* Output NOT_TAKEN.  */
+  targetm.asm_out.internal_label (asm_out_file, "L",
+				  CODE_LABEL_NUMBER (not_taken));
+  return "";
+}
+
+
+/* Output assembly code for a conditional branch instruction.  */
+const char *
+tilepro_output_cbranch (rtx insn, rtx *operands, bool reversed)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  const char *opcode;
+  const char *rev_opcode;
+
+  if (reversed)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case NE:
+      opcode = "bnz";
+      rev_opcode = "bz";
+      break;
+    case EQ:
+      opcode = "bz";
+      rev_opcode = "bnz";
+      break;
+    case GE:
+      opcode = "bgez";
+      rev_opcode = "blz";
+      break;
+    case GT:
+      opcode = "bgz";
+      rev_opcode = "blez";
+      break;
+    case LE:
+      opcode = "blez";
+      rev_opcode = "bgz";
+      break;
+    case LT:
+      opcode = "blz";
+      rev_opcode = "bgez";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return
+    tilepro_output_cbranch_with_opcode (insn, operands, opcode, rev_opcode,
+					2, false);
+}
+
+
+/* Implement the tablejump pattern.  */
+void
+tilepro_expand_tablejump (rtx op0, rtx op1)
+{
+  if (flag_pic)
+    {
+      rtx table = gen_rtx_LABEL_REF (Pmode, op1);
+      rtx temp = gen_reg_rtx (Pmode);
+      rtx text_label_symbol = tilepro_text_label_symbol ();
+      rtx text_label_rtx = tilepro_text_label_rtx ();
+
+      emit_insn (gen_addli_pcrel (temp, text_label_rtx,
+				  table, text_label_symbol));
+      emit_insn (gen_auli_pcrel (temp, temp, table, text_label_symbol));
+      emit_move_insn (temp,
+		      gen_rtx_PLUS (Pmode,
+				    convert_to_mode (Pmode, op0, false),
+				    temp));
+      op0 = temp;
+    }
+
+  emit_jump_insn (gen_tablejump_aux (op0, op1));
+}
+
+
+/* Expand a builtin vector binary op, by calling gen function GEN with
+   operands in the proper modes.  DEST is converted to DEST_MODE, and
+   src0 and src1 (if DO_SRC1 is true) is converted to SRC_MODE.  */
+void
+tilepro_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
+				     enum machine_mode dest_mode,
+				     rtx dest,
+				     enum machine_mode src_mode,
+				     rtx src0, rtx src1, bool do_src1)
+{
+  dest = gen_lowpart (dest_mode, dest);
+
+  if (src0 == const0_rtx)
+    src0 = CONST0_RTX (src_mode);
+  else
+    src0 = gen_lowpart (src_mode, src0);
+
+  if (do_src1)
+    {
+      if (src1 == const0_rtx)
+	src1 = CONST0_RTX (src_mode);
+      else
+	src1 = gen_lowpart (src_mode, src1);
+    }
+
+  emit_insn ((*gen) (dest, src0, src1));
+}
+
+
+
+/* Intrinsics  */
+
+struct tile_builtin_info
+{
+  enum insn_code icode;
+  tree fndecl;
+};
+
+static struct tile_builtin_info tilepro_builtin_info[TILEPRO_BUILTIN_max] = {
+  { CODE_FOR_addsi3,                    NULL }, /* add */
+  { CODE_FOR_insn_addb,                 NULL }, /* addb */
+  { CODE_FOR_insn_addbs_u,              NULL }, /* addbs_u */
+  { CODE_FOR_insn_addh,                 NULL }, /* addh */
+  { CODE_FOR_insn_addhs,                NULL }, /* addhs */
+  { CODE_FOR_insn_addib,                NULL }, /* addib */
+  { CODE_FOR_insn_addih,                NULL }, /* addih */
+  { CODE_FOR_insn_addlis,               NULL }, /* addlis */
+  { CODE_FOR_ssaddsi3,                  NULL }, /* adds */
+  { CODE_FOR_insn_adiffb_u,             NULL }, /* adiffb_u */
+  { CODE_FOR_insn_adiffh,               NULL }, /* adiffh */
+  { CODE_FOR_andsi3,                    NULL }, /* and */
+  { CODE_FOR_insn_auli,                 NULL }, /* auli */
+  { CODE_FOR_insn_avgb_u,               NULL }, /* avgb_u */
+  { CODE_FOR_insn_avgh,                 NULL }, /* avgh */
+  { CODE_FOR_insn_bitx,                 NULL }, /* bitx */
+  { CODE_FOR_bswapsi2,                  NULL }, /* bytex */
+  { CODE_FOR_clzsi2,                    NULL }, /* clz */
+  { CODE_FOR_insn_crc32_32,             NULL }, /* crc32_32 */
+  { CODE_FOR_insn_crc32_8,              NULL }, /* crc32_8 */
+  { CODE_FOR_ctzsi2,                    NULL }, /* ctz */
+  { CODE_FOR_insn_drain,                NULL }, /* drain */
+  { CODE_FOR_insn_dtlbpr,               NULL }, /* dtlbpr */
+  { CODE_FOR_insn_dword_align,          NULL }, /* dword_align */
+  { CODE_FOR_insn_finv,                 NULL }, /* finv */
+  { CODE_FOR_insn_flush,                NULL }, /* flush */
+  { CODE_FOR_insn_fnop,                 NULL }, /* fnop */
+  { CODE_FOR_insn_icoh,                 NULL }, /* icoh */
+  { CODE_FOR_insn_ill,                  NULL }, /* ill */
+  { CODE_FOR_insn_info,                 NULL }, /* info */
+  { CODE_FOR_insn_infol,                NULL }, /* infol */
+  { CODE_FOR_insn_inthb,                NULL }, /* inthb */
+  { CODE_FOR_insn_inthh,                NULL }, /* inthh */
+  { CODE_FOR_insn_intlb,                NULL }, /* intlb */
+  { CODE_FOR_insn_intlh,                NULL }, /* intlh */
+  { CODE_FOR_insn_inv,                  NULL }, /* inv */
+  { CODE_FOR_insn_lb,                   NULL }, /* lb */
+  { CODE_FOR_insn_lb_u,                 NULL }, /* lb_u */
+  { CODE_FOR_insn_lh,                   NULL }, /* lh */
+  { CODE_FOR_insn_lh_u,                 NULL }, /* lh_u */
+  { CODE_FOR_insn_lnk,                  NULL }, /* lnk */
+  { CODE_FOR_insn_lw,                   NULL }, /* lw */
+  { CODE_FOR_insn_lw_na,                NULL }, /* lw_na */
+  { CODE_FOR_insn_lb_L2,                NULL }, /* lb_L2 */
+  { CODE_FOR_insn_lb_u_L2,              NULL }, /* lb_u_L2 */
+  { CODE_FOR_insn_lh_L2,                NULL }, /* lh_L2 */
+  { CODE_FOR_insn_lh_u_L2,              NULL }, /* lh_u_L2 */
+  { CODE_FOR_insn_lw_L2,                NULL }, /* lw_L2 */
+  { CODE_FOR_insn_lw_na_L2,             NULL }, /* lw_na_L2 */
+  { CODE_FOR_insn_lb_miss,              NULL }, /* lb_miss */
+  { CODE_FOR_insn_lb_u_miss,            NULL }, /* lb_u_miss */
+  { CODE_FOR_insn_lh_miss,              NULL }, /* lh_miss */
+  { CODE_FOR_insn_lh_u_miss,            NULL }, /* lh_u_miss */
+  { CODE_FOR_insn_lw_miss,              NULL }, /* lw_miss */
+  { CODE_FOR_insn_lw_na_miss,           NULL }, /* lw_na_miss */
+  { CODE_FOR_insn_maxb_u,               NULL }, /* maxb_u */
+  { CODE_FOR_insn_maxh,                 NULL }, /* maxh */
+  { CODE_FOR_insn_maxib_u,              NULL }, /* maxib_u */
+  { CODE_FOR_insn_maxih,                NULL }, /* maxih */
+  { CODE_FOR_memory_barrier,            NULL }, /* mf */
+  { CODE_FOR_insn_mfspr,                NULL }, /* mfspr */
+  { CODE_FOR_insn_minb_u,               NULL }, /* minb_u */
+  { CODE_FOR_insn_minh,                 NULL }, /* minh */
+  { CODE_FOR_insn_minib_u,              NULL }, /* minib_u */
+  { CODE_FOR_insn_minih,                NULL }, /* minih */
+  { CODE_FOR_insn_mm,                   NULL }, /* mm */
+  { CODE_FOR_insn_mnz,                  NULL }, /* mnz */
+  { CODE_FOR_insn_mnzb,                 NULL }, /* mnzb */
+  { CODE_FOR_insn_mnzh,                 NULL }, /* mnzh */
+  { CODE_FOR_movsi,                     NULL }, /* move */
+  { CODE_FOR_insn_movelis,              NULL }, /* movelis */
+  { CODE_FOR_insn_mtspr,                NULL }, /* mtspr */
+  { CODE_FOR_insn_mulhh_ss,             NULL }, /* mulhh_ss */
+  { CODE_FOR_insn_mulhh_su,             NULL }, /* mulhh_su */
+  { CODE_FOR_insn_mulhh_uu,             NULL }, /* mulhh_uu */
+  { CODE_FOR_insn_mulhha_ss,            NULL }, /* mulhha_ss */
+  { CODE_FOR_insn_mulhha_su,            NULL }, /* mulhha_su */
+  { CODE_FOR_insn_mulhha_uu,            NULL }, /* mulhha_uu */
+  { CODE_FOR_insn_mulhhsa_uu,           NULL }, /* mulhhsa_uu */
+  { CODE_FOR_insn_mulhl_ss,             NULL }, /* mulhl_ss */
+  { CODE_FOR_insn_mulhl_su,             NULL }, /* mulhl_su */
+  { CODE_FOR_insn_mulhl_us,             NULL }, /* mulhl_us */
+  { CODE_FOR_insn_mulhl_uu,             NULL }, /* mulhl_uu */
+  { CODE_FOR_insn_mulhla_ss,            NULL }, /* mulhla_ss */
+  { CODE_FOR_insn_mulhla_su,            NULL }, /* mulhla_su */
+  { CODE_FOR_insn_mulhla_us,            NULL }, /* mulhla_us */
+  { CODE_FOR_insn_mulhla_uu,            NULL }, /* mulhla_uu */
+  { CODE_FOR_insn_mulhlsa_uu,           NULL }, /* mulhlsa_uu */
+  { CODE_FOR_insn_mulll_ss,             NULL }, /* mulll_ss */
+  { CODE_FOR_insn_mulll_su,             NULL }, /* mulll_su */
+  { CODE_FOR_insn_mulll_uu,             NULL }, /* mulll_uu */
+  { CODE_FOR_insn_mullla_ss,            NULL }, /* mullla_ss */
+  { CODE_FOR_insn_mullla_su,            NULL }, /* mullla_su */
+  { CODE_FOR_insn_mullla_uu,            NULL }, /* mullla_uu */
+  { CODE_FOR_insn_mulllsa_uu,           NULL }, /* mulllsa_uu */
+  { CODE_FOR_insn_mvnz,                 NULL }, /* mvnz */
+  { CODE_FOR_insn_mvz,                  NULL }, /* mvz */
+  { CODE_FOR_insn_mz,                   NULL }, /* mz */
+  { CODE_FOR_insn_mzb,                  NULL }, /* mzb */
+  { CODE_FOR_insn_mzh,                  NULL }, /* mzh */
+  { CODE_FOR_insn_nap,                  NULL }, /* nap */
+  { CODE_FOR_nop,                       NULL }, /* nop */
+  { CODE_FOR_insn_nor,                  NULL }, /* nor */
+  { CODE_FOR_iorsi3,                    NULL }, /* or */
+  { CODE_FOR_insn_packbs_u,             NULL }, /* packbs_u */
+  { CODE_FOR_insn_packhb,               NULL }, /* packhb */
+  { CODE_FOR_insn_packhs,               NULL }, /* packhs */
+  { CODE_FOR_insn_packlb,               NULL }, /* packlb */
+  { CODE_FOR_popcountsi2,               NULL }, /* pcnt */
+  { CODE_FOR_insn_prefetch,             NULL }, /* prefetch */
+  { CODE_FOR_insn_prefetch_L1,          NULL }, /* prefetch_L1 */
+  { CODE_FOR_rotlsi3,                   NULL }, /* rl */
+  { CODE_FOR_insn_s1a,                  NULL }, /* s1a */
+  { CODE_FOR_insn_s2a,                  NULL }, /* s2a */
+  { CODE_FOR_insn_s3a,                  NULL }, /* s3a */
+  { CODE_FOR_insn_sadab_u,              NULL }, /* sadab_u */
+  { CODE_FOR_insn_sadah,                NULL }, /* sadah */
+  { CODE_FOR_insn_sadah_u,              NULL }, /* sadah_u */
+  { CODE_FOR_insn_sadb_u,               NULL }, /* sadb_u */
+  { CODE_FOR_insn_sadh,                 NULL }, /* sadh */
+  { CODE_FOR_insn_sadh_u,               NULL }, /* sadh_u */
+  { CODE_FOR_insn_sb,                   NULL }, /* sb */
+  { CODE_FOR_insn_seq,                  NULL }, /* seq */
+  { CODE_FOR_insn_seqb,                 NULL }, /* seqb */
+  { CODE_FOR_insn_seqh,                 NULL }, /* seqh */
+  { CODE_FOR_insn_seqib,                NULL }, /* seqib */
+  { CODE_FOR_insn_seqih,                NULL }, /* seqih */
+  { CODE_FOR_insn_sh,                   NULL }, /* sh */
+  { CODE_FOR_ashlsi3,                   NULL }, /* shl */
+  { CODE_FOR_insn_shlb,                 NULL }, /* shlb */
+  { CODE_FOR_insn_shlh,                 NULL }, /* shlh */
+  { CODE_FOR_insn_shlb,                 NULL }, /* shlib */
+  { CODE_FOR_insn_shlh,                 NULL }, /* shlih */
+  { CODE_FOR_lshrsi3,                   NULL }, /* shr */
+  { CODE_FOR_insn_shrb,                 NULL }, /* shrb */
+  { CODE_FOR_insn_shrh,                 NULL }, /* shrh */
+  { CODE_FOR_insn_shrb,                 NULL }, /* shrib */
+  { CODE_FOR_insn_shrh,                 NULL }, /* shrih */
+  { CODE_FOR_insn_slt,                  NULL }, /* slt */
+  { CODE_FOR_insn_slt_u,                NULL }, /* slt_u */
+  { CODE_FOR_insn_sltb,                 NULL }, /* sltb */
+  { CODE_FOR_insn_sltb_u,               NULL }, /* sltb_u */
+  { CODE_FOR_insn_slte,                 NULL }, /* slte */
+  { CODE_FOR_insn_slte_u,               NULL }, /* slte_u */
+  { CODE_FOR_insn_slteb,                NULL }, /* slteb */
+  { CODE_FOR_insn_slteb_u,              NULL }, /* slteb_u */
+  { CODE_FOR_insn_slteh,                NULL }, /* slteh */
+  { CODE_FOR_insn_slteh_u,              NULL }, /* slteh_u */
+  { CODE_FOR_insn_slth,                 NULL }, /* slth */
+  { CODE_FOR_insn_slth_u,               NULL }, /* slth_u */
+  { CODE_FOR_insn_sltib,                NULL }, /* sltib */
+  { CODE_FOR_insn_sltib_u,              NULL }, /* sltib_u */
+  { CODE_FOR_insn_sltih,                NULL }, /* sltih */
+  { CODE_FOR_insn_sltih_u,              NULL }, /* sltih_u */
+  { CODE_FOR_insn_sne,                  NULL }, /* sne */
+  { CODE_FOR_insn_sneb,                 NULL }, /* sneb */
+  { CODE_FOR_insn_sneh,                 NULL }, /* sneh */
+  { CODE_FOR_ashrsi3,                   NULL }, /* sra */
+  { CODE_FOR_insn_srab,                 NULL }, /* srab */
+  { CODE_FOR_insn_srah,                 NULL }, /* srah */
+  { CODE_FOR_insn_srab,                 NULL }, /* sraib */
+  { CODE_FOR_insn_srah,                 NULL }, /* sraih */
+  { CODE_FOR_subsi3,                    NULL }, /* sub */
+  { CODE_FOR_insn_subb,                 NULL }, /* subb */
+  { CODE_FOR_insn_subbs_u,              NULL }, /* subbs_u */
+  { CODE_FOR_insn_subh,                 NULL }, /* subh */
+  { CODE_FOR_insn_subhs,                NULL }, /* subhs */
+  { CODE_FOR_sssubsi3,                  NULL }, /* subs */
+  { CODE_FOR_insn_sw,                   NULL }, /* sw */
+  { CODE_FOR_insn_tblidxb0,             NULL }, /* tblidxb0 */
+  { CODE_FOR_insn_tblidxb1,             NULL }, /* tblidxb1 */
+  { CODE_FOR_insn_tblidxb2,             NULL }, /* tblidxb2 */
+  { CODE_FOR_insn_tblidxb3,             NULL }, /* tblidxb3 */
+  { CODE_FOR_insn_tns,                  NULL }, /* tns */
+  { CODE_FOR_insn_wh64,                 NULL }, /* wh64 */
+  { CODE_FOR_xorsi3,                    NULL }, /* xor */
+  { CODE_FOR_tilepro_network_barrier,   NULL }, /* network_barrier */
+  { CODE_FOR_tilepro_idn0_receive,      NULL }, /* idn0_receive */
+  { CODE_FOR_tilepro_idn1_receive,      NULL }, /* idn1_receive */
+  { CODE_FOR_tilepro_idn_send,          NULL }, /* idn_send */
+  { CODE_FOR_tilepro_sn_receive,        NULL }, /* sn_receive */
+  { CODE_FOR_tilepro_sn_send,           NULL }, /* sn_send */
+  { CODE_FOR_tilepro_udn0_receive,      NULL }, /* udn0_receive */
+  { CODE_FOR_tilepro_udn1_receive,      NULL }, /* udn1_receive */
+  { CODE_FOR_tilepro_udn2_receive,      NULL }, /* udn2_receive */
+  { CODE_FOR_tilepro_udn3_receive,      NULL }, /* udn3_receive */
+  { CODE_FOR_tilepro_udn_send,          NULL }, /* udn_send */
+};
+
+
+struct tilepro_builtin_def
+{
+  const char *name;
+  enum tilepro_builtin code;
+  bool is_const;
+  /* The first character is the return type.  Subsequent characters
+     are the argument types. See char_to_type.  */
+  const char *type;
+};
+
+
+static const struct tilepro_builtin_def tilepro_builtins[] = {
+  { "__insn_add",             TILEPRO_INSN_ADD,         true,  "lll"   },
+  { "__insn_addb",            TILEPRO_INSN_ADDB,        true,  "lll"   },
+  { "__insn_addbs_u",         TILEPRO_INSN_ADDBS_U,     false, "lll"   },
+  { "__insn_addh",            TILEPRO_INSN_ADDH,        true,  "lll"   },
+  { "__insn_addhs",           TILEPRO_INSN_ADDHS,       false, "lll"   },
+  { "__insn_addi",            TILEPRO_INSN_ADD,         true,  "lll"   },
+  { "__insn_addib",           TILEPRO_INSN_ADDIB,       true,  "lll"   },
+  { "__insn_addih",           TILEPRO_INSN_ADDIH,       true,  "lll"   },
+  { "__insn_addli",           TILEPRO_INSN_ADD,         true,  "lll"   },
+  { "__insn_addlis",          TILEPRO_INSN_ADDLIS,      false, "lll"   },
+  { "__insn_adds",            TILEPRO_INSN_ADDS,        false, "lll"   },
+  { "__insn_adiffb_u",        TILEPRO_INSN_ADIFFB_U,    true,  "lll"   },
+  { "__insn_adiffh",          TILEPRO_INSN_ADIFFH,      true,  "lll"   },
+  { "__insn_and",             TILEPRO_INSN_AND,         true,  "lll"   },
+  { "__insn_andi",            TILEPRO_INSN_AND,         true,  "lll"   },
+  { "__insn_auli",            TILEPRO_INSN_AULI,        true,  "lll"   },
+  { "__insn_avgb_u",          TILEPRO_INSN_AVGB_U,      true,  "lll"   },
+  { "__insn_avgh",            TILEPRO_INSN_AVGH,        true,  "lll"   },
+  { "__insn_bitx",            TILEPRO_INSN_BITX,        true,  "ll"    },
+  { "__insn_bytex",           TILEPRO_INSN_BYTEX,       true,  "ll"    },
+  { "__insn_clz",             TILEPRO_INSN_CLZ,         true,  "ll"    },
+  { "__insn_crc32_32",        TILEPRO_INSN_CRC32_32,    true,  "lll"   },
+  { "__insn_crc32_8",         TILEPRO_INSN_CRC32_8,     true,  "lll"   },
+  { "__insn_ctz",             TILEPRO_INSN_CTZ,         true,  "ll"    },
+  { "__insn_drain",           TILEPRO_INSN_DRAIN,       false, "v"     },
+  { "__insn_dtlbpr",          TILEPRO_INSN_DTLBPR,      false, "vl"    },
+  { "__insn_dword_align",     TILEPRO_INSN_DWORD_ALIGN, true,  "lllk"  },
+  { "__insn_finv",            TILEPRO_INSN_FINV,        false, "vk"    },
+  { "__insn_flush",           TILEPRO_INSN_FLUSH,       false, "vk"    },
+  { "__insn_fnop",            TILEPRO_INSN_FNOP,        false, "v"     },
+  { "__insn_icoh",            TILEPRO_INSN_ICOH,        false, "vk"    },
+  { "__insn_ill",             TILEPRO_INSN_ILL,         false, "v"     },
+  { "__insn_info",            TILEPRO_INSN_INFO,        false, "vl"    },
+  { "__insn_infol",           TILEPRO_INSN_INFOL,       false, "vl"    },
+  { "__insn_inthb",           TILEPRO_INSN_INTHB,       true,  "lll"   },
+  { "__insn_inthh",           TILEPRO_INSN_INTHH,       true,  "lll"   },
+  { "__insn_intlb",           TILEPRO_INSN_INTLB,       true,  "lll"   },
+  { "__insn_intlh",           TILEPRO_INSN_INTLH,       true,  "lll"   },
+  { "__insn_inv",             TILEPRO_INSN_INV,         false, "vp"    },
+  { "__insn_lb",              TILEPRO_INSN_LB,          false, "lk"    },
+  { "__insn_lb_u",            TILEPRO_INSN_LB_U,        false, "lk"    },
+  { "__insn_lh",              TILEPRO_INSN_LH,          false, "lk"    },
+  { "__insn_lh_u",            TILEPRO_INSN_LH_U,        false, "lk"    },
+  { "__insn_lnk",             TILEPRO_INSN_LNK,         true,  "l"     },
+  { "__insn_lw",              TILEPRO_INSN_LW,          false, "lk"    },
+  { "__insn_lw_na",           TILEPRO_INSN_LW_NA,       false, "lk"    },
+  { "__insn_lb_L2",           TILEPRO_INSN_LB_L2,       false, "lk"    },
+  { "__insn_lb_u_L2",         TILEPRO_INSN_LB_U_L2,     false, "lk"    },
+  { "__insn_lh_L2",           TILEPRO_INSN_LH_L2,       false, "lk"    },
+  { "__insn_lh_u_L2",         TILEPRO_INSN_LH_U_L2,     false, "lk"    },
+  { "__insn_lw_L2",           TILEPRO_INSN_LW_L2,       false, "lk"    },
+  { "__insn_lw_na_L2",        TILEPRO_INSN_LW_NA_L2,    false, "lk"    },
+  { "__insn_lb_miss",         TILEPRO_INSN_LB_MISS,     false, "lk"    },
+  { "__insn_lb_u_miss",       TILEPRO_INSN_LB_U_MISS,   false, "lk"    },
+  { "__insn_lh_miss",         TILEPRO_INSN_LH_MISS,     false, "lk"    },
+  { "__insn_lh_u_miss",       TILEPRO_INSN_LH_U_MISS,   false, "lk"    },
+  { "__insn_lw_miss",         TILEPRO_INSN_LW_MISS,     false, "lk"    },
+  { "__insn_lw_na_miss",      TILEPRO_INSN_LW_NA_MISS,  false, "lk"    },
+  { "__insn_maxb_u",          TILEPRO_INSN_MAXB_U,      true,  "lll"   },
+  { "__insn_maxh",            TILEPRO_INSN_MAXH,        true,  "lll"   },
+  { "__insn_maxib_u",         TILEPRO_INSN_MAXIB_U,     true,  "lll"   },
+  { "__insn_maxih",           TILEPRO_INSN_MAXIH,       true,  "lll"   },
+  { "__insn_mf",              TILEPRO_INSN_MF,          false, "v"     },
+  { "__insn_mfspr",           TILEPRO_INSN_MFSPR,       false, "ll"    },
+  { "__insn_minb_u",          TILEPRO_INSN_MINB_U,      true,  "lll"   },
+  { "__insn_minh",            TILEPRO_INSN_MINH,        true,  "lll"   },
+  { "__insn_minib_u",         TILEPRO_INSN_MINIB_U,     true,  "lll"   },
+  { "__insn_minih",           TILEPRO_INSN_MINIH,       true,  "lll"   },
+  { "__insn_mm",              TILEPRO_INSN_MM,          true,  "lllll" },
+  { "__insn_mnz",             TILEPRO_INSN_MNZ,         true,  "lll"   },
+  { "__insn_mnzb",            TILEPRO_INSN_MNZB,        true,  "lll"   },
+  { "__insn_mnzh",            TILEPRO_INSN_MNZH,        true,  "lll"   },
+  { "__insn_move",            TILEPRO_INSN_MOVE,        true,  "ll"    },
+  { "__insn_movei",           TILEPRO_INSN_MOVE,        true,  "ll"    },
+  { "__insn_moveli",          TILEPRO_INSN_MOVE,        true,  "ll"    },
+  { "__insn_movelis",         TILEPRO_INSN_MOVELIS,     false, "ll"    },
+  { "__insn_mtspr",           TILEPRO_INSN_MTSPR,       false, "vll"   },
+  { "__insn_mulhh_ss",        TILEPRO_INSN_MULHH_SS,    true,  "lll"   },
+  { "__insn_mulhh_su",        TILEPRO_INSN_MULHH_SU,    true,  "lll"   },
+  { "__insn_mulhh_uu",        TILEPRO_INSN_MULHH_UU,    true,  "lll"   },
+  { "__insn_mulhha_ss",       TILEPRO_INSN_MULHHA_SS,   true,  "llll"  },
+  { "__insn_mulhha_su",       TILEPRO_INSN_MULHHA_SU,   true,  "llll"  },
+  { "__insn_mulhha_uu",       TILEPRO_INSN_MULHHA_UU,   true,  "llll"  },
+  { "__insn_mulhhsa_uu",      TILEPRO_INSN_MULHHSA_UU,  true,  "llll"  },
+  { "__insn_mulhl_ss",        TILEPRO_INSN_MULHL_SS,    true,  "lll"   },
+  { "__insn_mulhl_su",        TILEPRO_INSN_MULHL_SU,    true,  "lll"   },
+  { "__insn_mulhl_us",        TILEPRO_INSN_MULHL_US,    true,  "lll"   },
+  { "__insn_mulhl_uu",        TILEPRO_INSN_MULHL_UU,    true,  "lll"   },
+  { "__insn_mulhla_ss",       TILEPRO_INSN_MULHLA_SS,   true,  "llll"  },
+  { "__insn_mulhla_su",       TILEPRO_INSN_MULHLA_SU,   true,  "llll"  },
+  { "__insn_mulhla_us",       TILEPRO_INSN_MULHLA_US,   true,  "llll"  },
+  { "__insn_mulhla_uu",       TILEPRO_INSN_MULHLA_UU,   true,  "llll"  },
+  { "__insn_mulhlsa_uu",      TILEPRO_INSN_MULHLSA_UU,  true,  "llll"  },
+  { "__insn_mulll_ss",        TILEPRO_INSN_MULLL_SS,    true,  "lll"   },
+  { "__insn_mulll_su",        TILEPRO_INSN_MULLL_SU,    true,  "lll"   },
+  { "__insn_mulll_uu",        TILEPRO_INSN_MULLL_UU,    true,  "lll"   },
+  { "__insn_mullla_ss",       TILEPRO_INSN_MULLLA_SS,   true,  "llll"  },
+  { "__insn_mullla_su",       TILEPRO_INSN_MULLLA_SU,   true,  "llll"  },
+  { "__insn_mullla_uu",       TILEPRO_INSN_MULLLA_UU,   true,  "llll"  },
+  { "__insn_mulllsa_uu",      TILEPRO_INSN_MULLLSA_UU,  true,  "llll"  },
+  { "__insn_mvnz",            TILEPRO_INSN_MVNZ,        true,  "llll"  },
+  { "__insn_mvz",             TILEPRO_INSN_MVZ,         true,  "llll"  },
+  { "__insn_mz",              TILEPRO_INSN_MZ,          true,  "lll"   },
+  { "__insn_mzb",             TILEPRO_INSN_MZB,         true,  "lll"   },
+  { "__insn_mzh",             TILEPRO_INSN_MZH,         true,  "lll"   },
+  { "__insn_nap",             TILEPRO_INSN_NAP,         false, "v"     },
+  { "__insn_nop",             TILEPRO_INSN_NOP,         true,  "v"     },
+  { "__insn_nor",             TILEPRO_INSN_NOR,         true,  "lll"   },
+  { "__insn_or",              TILEPRO_INSN_OR,          true,  "lll"   },
+  { "__insn_ori",             TILEPRO_INSN_OR,          true,  "lll"   },
+  { "__insn_packbs_u",        TILEPRO_INSN_PACKBS_U,    false, "lll"   },
+  { "__insn_packhb",          TILEPRO_INSN_PACKHB,      true,  "lll"   },
+  { "__insn_packhs",          TILEPRO_INSN_PACKHS,      false, "lll"   },
+  { "__insn_packlb",          TILEPRO_INSN_PACKLB,      true,  "lll"   },
+  { "__insn_pcnt",            TILEPRO_INSN_PCNT,        true,  "ll"    },
+  { "__insn_prefetch",        TILEPRO_INSN_PREFETCH,    false, "vk"    },
+  { "__insn_prefetch_L1",     TILEPRO_INSN_PREFETCH_L1, false, "vk"    },
+  { "__insn_rl",              TILEPRO_INSN_RL,          true,  "lll"   },
+  { "__insn_rli",             TILEPRO_INSN_RL,          true,  "lll"   },
+  { "__insn_s1a",             TILEPRO_INSN_S1A,         true,  "lll"   },
+  { "__insn_s2a",             TILEPRO_INSN_S2A,         true,  "lll"   },
+  { "__insn_s3a",             TILEPRO_INSN_S3A,         true,  "lll"   },
+  { "__insn_sadab_u",         TILEPRO_INSN_SADAB_U,     true,  "llll"  },
+  { "__insn_sadah",           TILEPRO_INSN_SADAH,       true,  "llll"  },
+  { "__insn_sadah_u",         TILEPRO_INSN_SADAH_U,     true,  "llll"  },
+  { "__insn_sadb_u",          TILEPRO_INSN_SADB_U,      true,  "lll"   },
+  { "__insn_sadh",            TILEPRO_INSN_SADH,        true,  "lll"   },
+  { "__insn_sadh_u",          TILEPRO_INSN_SADH_U,      true,  "lll"   },
+  { "__insn_sb",              TILEPRO_INSN_SB,          false, "vpl"   },
+  { "__insn_seq",             TILEPRO_INSN_SEQ,         true,  "lll"   },
+  { "__insn_seqb",            TILEPRO_INSN_SEQB,        true,  "lll"   },
+  { "__insn_seqh",            TILEPRO_INSN_SEQH,        true,  "lll"   },
+  { "__insn_seqi",            TILEPRO_INSN_SEQ,         true,  "lll"   },
+  { "__insn_seqib",           TILEPRO_INSN_SEQIB,       true,  "lll"   },
+  { "__insn_seqih",           TILEPRO_INSN_SEQIH,       true,  "lll"   },
+  { "__insn_sh",              TILEPRO_INSN_SH,          false, "vpl"   },
+  { "__insn_shl",             TILEPRO_INSN_SHL,         true,  "lll"   },
+  { "__insn_shlb",            TILEPRO_INSN_SHLB,        true,  "lll"   },
+  { "__insn_shlh",            TILEPRO_INSN_SHLH,        true,  "lll"   },
+  { "__insn_shli",            TILEPRO_INSN_SHL,         true,  "lll"   },
+  { "__insn_shlib",           TILEPRO_INSN_SHLIB,       true,  "lll"   },
+  { "__insn_shlih",           TILEPRO_INSN_SHLIH,       true,  "lll"   },
+  { "__insn_shr",             TILEPRO_INSN_SHR,         true,  "lll"   },
+  { "__insn_shrb",            TILEPRO_INSN_SHRB,        true,  "lll"   },
+  { "__insn_shrh",            TILEPRO_INSN_SHRH,        true,  "lll"   },
+  { "__insn_shri",            TILEPRO_INSN_SHR,         true,  "lll"   },
+  { "__insn_shrib",           TILEPRO_INSN_SHRIB,       true,  "lll"   },
+  { "__insn_shrih",           TILEPRO_INSN_SHRIH,       true,  "lll"   },
+  { "__insn_slt",             TILEPRO_INSN_SLT,         true,  "lll"   },
+  { "__insn_slt_u",           TILEPRO_INSN_SLT_U,       true,  "lll"   },
+  { "__insn_sltb",            TILEPRO_INSN_SLTB,        true,  "lll"   },
+  { "__insn_sltb_u",          TILEPRO_INSN_SLTB_U,      true,  "lll"   },
+  { "__insn_slte",            TILEPRO_INSN_SLTE,        true,  "lll"   },
+  { "__insn_slte_u",          TILEPRO_INSN_SLTE_U,      true,  "lll"   },
+  { "__insn_slteb",           TILEPRO_INSN_SLTEB,       true,  "lll"   },
+  { "__insn_slteb_u",         TILEPRO_INSN_SLTEB_U,     true,  "lll"   },
+  { "__insn_slteh",           TILEPRO_INSN_SLTEH,       true,  "lll"   },
+  { "__insn_slteh_u",         TILEPRO_INSN_SLTEH_U,     true,  "lll"   },
+  { "__insn_slth",            TILEPRO_INSN_SLTH,        true,  "lll"   },
+  { "__insn_slth_u",          TILEPRO_INSN_SLTH_U,      true,  "lll"   },
+  { "__insn_slti",            TILEPRO_INSN_SLT,         true,  "lll"   },
+  { "__insn_slti_u",          TILEPRO_INSN_SLT_U,       true,  "lll"   },
+  { "__insn_sltib",           TILEPRO_INSN_SLTIB,       true,  "lll"   },
+  { "__insn_sltib_u",         TILEPRO_INSN_SLTIB_U,     true,  "lll"   },
+  { "__insn_sltih",           TILEPRO_INSN_SLTIH,       true,  "lll"   },
+  { "__insn_sltih_u",         TILEPRO_INSN_SLTIH_U,     true,  "lll"   },
+  { "__insn_sne",             TILEPRO_INSN_SNE,         true,  "lll"   },
+  { "__insn_sneb",            TILEPRO_INSN_SNEB,        true,  "lll"   },
+  { "__insn_sneh",            TILEPRO_INSN_SNEH,        true,  "lll"   },
+  { "__insn_sra",             TILEPRO_INSN_SRA,         true,  "lll"   },
+  { "__insn_srab",            TILEPRO_INSN_SRAB,        true,  "lll"   },
+  { "__insn_srah",            TILEPRO_INSN_SRAH,        true,  "lll"   },
+  { "__insn_srai",            TILEPRO_INSN_SRA,         true,  "lll"   },
+  { "__insn_sraib",           TILEPRO_INSN_SRAIB,       true,  "lll"   },
+  { "__insn_sraih",           TILEPRO_INSN_SRAIH,       true,  "lll"   },
+  { "__insn_sub",             TILEPRO_INSN_SUB,         true,  "lll"   },
+  { "__insn_subb",            TILEPRO_INSN_SUBB,        true,  "lll"   },
+  { "__insn_subbs_u",         TILEPRO_INSN_SUBBS_U,     false, "lll"   },
+  { "__insn_subh",            TILEPRO_INSN_SUBH,        true,  "lll"   },
+  { "__insn_subhs",           TILEPRO_INSN_SUBHS,       false, "lll"   },
+  { "__insn_subs",            TILEPRO_INSN_SUBS,        false, "lll"   },
+  { "__insn_sw",              TILEPRO_INSN_SW,          false, "vpl"   },
+  { "__insn_tblidxb0",        TILEPRO_INSN_TBLIDXB0,    true,  "lll"   },
+  { "__insn_tblidxb1",        TILEPRO_INSN_TBLIDXB1,    true,  "lll"   },
+  { "__insn_tblidxb2",        TILEPRO_INSN_TBLIDXB2,    true,  "lll"   },
+  { "__insn_tblidxb3",        TILEPRO_INSN_TBLIDXB3,    true,  "lll"   },
+  { "__insn_tns",             TILEPRO_INSN_TNS,         false, "lp"    },
+  { "__insn_wh64",            TILEPRO_INSN_WH64,        false, "vp"    },
+  { "__insn_xor",             TILEPRO_INSN_XOR,         true,  "lll"   },
+  { "__insn_xori",            TILEPRO_INSN_XOR,         true,  "lll"   },
+  { "__tile_network_barrier", TILEPRO_NETWORK_BARRIER,  false, "v"     },
+  { "__tile_idn0_receive",    TILEPRO_IDN0_RECEIVE,     false, "l"     },
+  { "__tile_idn1_receive",    TILEPRO_IDN1_RECEIVE,     false, "l"     },
+  { "__tile_idn_send",        TILEPRO_IDN_SEND,         false, "vl"    },
+  { "__tile_sn_receive",      TILEPRO_SN_RECEIVE,       false, "l"     },
+  { "__tile_sn_send",         TILEPRO_SN_SEND,          false, "vl"    },
+  { "__tile_udn0_receive",    TILEPRO_UDN0_RECEIVE,     false, "l"     },
+  { "__tile_udn1_receive",    TILEPRO_UDN1_RECEIVE,     false, "l"     },
+  { "__tile_udn2_receive",    TILEPRO_UDN2_RECEIVE,     false, "l"     },
+  { "__tile_udn3_receive",    TILEPRO_UDN3_RECEIVE,     false, "l"     },
+  { "__tile_udn_send",        TILEPRO_UDN_SEND,         false, "vl"    },
+};
+
+
+/* Convert a character in a builtin type string to a tree type.  */
+static tree
+char_to_type (char c)
+{
+  static tree volatile_ptr_type_node = NULL;
+  static tree volatile_const_ptr_type_node = NULL;
+
+  if (volatile_ptr_type_node == NULL)
+    {
+      volatile_ptr_type_node =
+	build_pointer_type (build_qualified_type (void_type_node,
+						  TYPE_QUAL_VOLATILE));
+      volatile_const_ptr_type_node =
+	build_pointer_type (build_qualified_type (void_type_node,
+						  TYPE_QUAL_CONST
+						  | TYPE_QUAL_VOLATILE));
+    }
+
+  switch (c)
+    {
+    case 'v':
+      return void_type_node;
+    case 'l':
+      return long_unsigned_type_node;
+    case 'p':
+      return volatile_ptr_type_node;
+    case 'k':
+      return volatile_const_ptr_type_node;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Implement TARGET_INIT_BUILTINS.  */
+static void
+tilepro_init_builtins (void)
+{
+  size_t i;
+
+  for (i = 0; i < ARRAY_SIZE (tilepro_builtins); i++)
+    {
+      const struct tilepro_builtin_def *p = &tilepro_builtins[i];
+      tree ftype, ret_type, arg_type_list = void_list_node;
+      tree decl;
+      int j;
+
+      for (j = strlen (p->type) - 1; j > 0; j--)
+	{
+	  arg_type_list =
+	    tree_cons (NULL_TREE, char_to_type (p->type[j]), arg_type_list);
+	}
+
+      ret_type = char_to_type (p->type[0]);
+
+      ftype = build_function_type (ret_type, arg_type_list);
+
+      decl = add_builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
+				   NULL, NULL);
+
+      if (p->is_const)
+	TREE_READONLY (decl) = 1;
+      TREE_NOTHROW (decl) = 1;
+
+      if (tilepro_builtin_info[p->code].fndecl == NULL)
+	tilepro_builtin_info[p->code].fndecl = decl;
+    }
+}
+
+
+/* Implement TARGET_EXPAND_BUILTIN.  */
+static rtx
+tilepro_expand_builtin (tree exp,
+			rtx target,
+			rtx subtarget ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_BUILTIN_ARGS 4
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg;
+  call_expr_arg_iterator iter;
+  enum insn_code icode;
+  rtx op[MAX_BUILTIN_ARGS + 1], pat;
+  int opnum;
+  bool nonvoid;
+  insn_gen_fn fn;
+
+  if (fcode >= TILEPRO_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = tilepro_builtin_info[fcode].icode;
+  if (icode == 0)
+    internal_error ("bad builtin icode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  opnum = nonvoid;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+  {
+    const struct insn_operand_data *insn_op;
+
+    if (arg == error_mark_node)
+      return NULL_RTX;
+    if (opnum > MAX_BUILTIN_ARGS)
+      return NULL_RTX;
+
+    insn_op = &insn_data[icode].operand[opnum];
+
+    op[opnum] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+    if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
+      op[opnum] = copy_to_mode_reg (insn_op->mode, op[opnum]);
+
+    if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
+      {
+	/* We still failed to meet the predicate even after moving
+	   into a register. Assume we needed an immediate.  */
+	error_at (EXPR_LOCATION (exp),
+		  "operand must be an immediate of the right size");
+	return const0_rtx;
+      }
+
+    opnum++;
+  }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      op[0] = target;
+    }
+
+  fn = GEN_FCN (icode);
+  switch (opnum)
+    {
+    case 0:
+      pat = fn (NULL_RTX);
+      break;
+    case 1:
+      pat = fn (op[0]);
+      break;
+    case 2:
+      pat = fn (op[0], op[1]);
+      break;
+    case 3:
+      pat = fn (op[0], op[1], op[2]);
+      break;
+    case 4:
+      pat = fn (op[0], op[1], op[2], op[3]);
+      break;
+    case 5:
+      pat = fn (op[0], op[1], op[2], op[3], op[4]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+
+  /* If we are generating a prefetch, tell the scheduler not to move
+     it around.  */
+  if (GET_CODE (pat) == PREFETCH)
+    PREFETCH_SCHEDULE_BARRIER_P (pat) = true;
+
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
+/* Implement TARGET_BUILTIN_DECL.  */
+static tree
+tilepro_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= TILEPRO_BUILTIN_max)
+    return error_mark_node;
+
+  return tilepro_builtin_info[code].fndecl;
+}
+
+
+
+/* Stack frames  */
+
+/* Return whether REGNO needs to be saved in the stack frame.  */
+static bool
+need_to_save_reg (unsigned int regno)
+{
+  if (!fixed_regs[regno] && !call_used_regs[regno]
+      && df_regs_ever_live_p (regno))
+    return true;
+
+  if (flag_pic
+      && (regno == PIC_OFFSET_TABLE_REGNUM
+	  || regno == TILEPRO_PIC_TEXT_LABEL_REGNUM)
+      && (crtl->uses_pic_offset_table || crtl->saves_all_registers))
+    return true;
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned i;
+      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++)
+	{
+	  if (regno == EH_RETURN_DATA_REGNO (i))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Return the size of the register savev area.  This function is only
+   correct starting with local register allocation */
+static int
+tilepro_saved_regs_size (void)
+{
+  int reg_save_size = 0;
+  int regno;
+  int offset_to_frame;
+  int align_mask;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (need_to_save_reg (regno))
+      reg_save_size += UNITS_PER_WORD;
+
+  /* Pad out the register save area if necessary to make
+     frame_pointer_rtx be as aligned as the stack pointer.  */
+  offset_to_frame = crtl->args.pretend_args_size + reg_save_size;
+  align_mask = (STACK_BOUNDARY / BITS_PER_UNIT) - 1;
+  reg_save_size += (-offset_to_frame) & align_mask;
+
+  return reg_save_size;
+}
+
+
+/* Round up frame size SIZE.  */
+static int
+round_frame_size (int size)
+{
+  return ((size + STACK_BOUNDARY / BITS_PER_UNIT - 1)
+	  & -STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+
+/* Emit a store in the stack frame to save REGNO at address ADDR, and
+   emit the corresponding REG_CFA_OFFSET note described by CFA and
+   CFA_OFFSET.  Return the emitted insn.  */
+static rtx
+frame_emit_store (int regno, int regno_note, rtx addr, rtx cfa,
+		  int cfa_offset)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  rtx mem = gen_frame_mem (Pmode, addr);
+  rtx mov = gen_movsi (mem, reg);
+
+  /* Describe what just happened in a way that dwarf understands.  We
+     use temporary registers to hold the address to make scheduling
+     easier, and use the REG_CFA_OFFSET to describe the address as an
+     offset from the CFA.  */
+  rtx reg_note = gen_rtx_REG (Pmode, regno_note);
+  rtx cfa_relative_addr = gen_rtx_PLUS (Pmode, cfa, gen_int_si (cfa_offset));
+  rtx cfa_relative_mem = gen_frame_mem (Pmode, cfa_relative_addr);
+  rtx real = gen_rtx_SET (VOIDmode, cfa_relative_mem, reg_note);
+  add_reg_note (mov, REG_CFA_OFFSET, real);
+
+  return emit_insn (mov);
+}
+
+
+/* Emit a load in the stack frame to load REGNO from address ADDR.
+   Add a REG_CFA_RESTORE note to CFA_RESTORES if CFA_RESTORES is
+   non-null.  Return the emitted insn.  */
+static rtx
+frame_emit_load (int regno, rtx addr, rtx *cfa_restores)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  rtx mem = gen_frame_mem (Pmode, addr);
+  if (cfa_restores)
+    *cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, *cfa_restores);
+  return emit_insn (gen_movsi (reg, mem));
+}
+
+
+/* Helper function to set RTX_FRAME_RELATED_P on instructions,
+   including sequences.  */
+static rtx
+set_frame_related_p (void)
+{
+  rtx seq = get_insns ();
+  rtx insn;
+
+  end_sequence ();
+
+  if (!seq)
+    return NULL_RTX;
+
+  if (INSN_P (seq))
+    {
+      insn = seq;
+      while (insn != NULL_RTX)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = NEXT_INSN (insn);
+	}
+      seq = emit_insn (seq);
+    }
+  else
+    {
+      seq = emit_insn (seq);
+      RTX_FRAME_RELATED_P (seq) = 1;
+    }
+  return seq;
+}
+
+
+#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
+
+/* This emits code for 'sp += offset'.
+   
+   The ABI only allows us to modify 'sp' in a single 'addi' or
+   'addli', so the backtracer understands it. Larger amounts cannot
+   use those instructions, so are added by placing the offset into a
+   large register and using 'add'.
+
+   This happens after reload, so we need to expand it ourselves.  */
+static rtx
+emit_sp_adjust (int offset, int *next_scratch_regno, bool frame_related,
+		rtx reg_notes)
+{
+  rtx to_add;
+  rtx imm_rtx = gen_int_si (offset);
+
+  rtx insn;
+  if (satisfies_constraint_J (imm_rtx))
+    {
+      /* We can add this using a single addi or addli.  */
+      to_add = imm_rtx;
+    }
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
+      tilepro_expand_set_const32 (tmp, imm_rtx);
+      to_add = tmp;
+    }
+
+  /* Actually adjust the stack pointer.  */
+  insn = emit_insn (gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx,
+				   to_add));
+  REG_NOTES (insn) = reg_notes;
+
+  /* Describe what just happened in a way that dwarf understands.  */
+  if (frame_related)
+    {
+      rtx real = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    imm_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, real);
+    }
+
+  return insn;
+}
+
+
+/* Return whether the current function is leaf.  This takes into
+   account whether the function calls tls_get_addr.  */
+static bool
+tilepro_current_function_is_leaf (void)
+{
+  return crtl->is_leaf && !cfun->machine->calls_tls_get_addr;
+}
+
+
+/* Return the frame size.  */
+static int
+compute_total_frame_size (void)
+{
+  int total_size = (get_frame_size () + tilepro_saved_regs_size ()
+		    + crtl->outgoing_args_size
+		    + crtl->args.pretend_args_size);
+
+  if (!tilepro_current_function_is_leaf () || cfun->calls_alloca)
+    {
+      /* Make room for save area in callee.  */
+      total_size += STACK_POINTER_OFFSET;
+    }
+
+  return round_frame_size (total_size);
+}
+
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack was
+   created.  */
+bool
+tilepro_can_use_return_insn_p (void)
+{
+  return (reload_completed
+	  && cfun->static_chain_decl == 0
+	  && compute_total_frame_size () == 0
+	  && tilepro_current_function_is_leaf ()
+	  && !crtl->profile && !df_regs_ever_live_p (TILEPRO_LINK_REGNUM));
+}
+
+
+/* Returns an rtx for a stack slot at 'FP + offset_from_fp'.  If there
+   is a frame pointer, it computes the value relative to
+   that. Otherwise it uses the stack pointer.  */
+static rtx
+compute_frame_addr (int offset_from_fp, int *next_scratch_regno)
+{
+  rtx base_reg_rtx, tmp_reg_rtx, offset_rtx;
+  int offset_from_base;
+
+  if (frame_pointer_needed)
+    {
+      base_reg_rtx = hard_frame_pointer_rtx;
+      offset_from_base = offset_from_fp;
+    }
+  else
+    {
+      int offset_from_sp = compute_total_frame_size () + offset_from_fp;
+      base_reg_rtx = stack_pointer_rtx;
+      offset_from_base = offset_from_sp;
+    }
+
+  if (offset_from_base == 0)
+    return base_reg_rtx;
+
+  /* Compute the new value of the stack pointer.  */
+  tmp_reg_rtx = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
+  offset_rtx = gen_int_si (offset_from_base);
+
+  if (!tilepro_expand_addsi (tmp_reg_rtx, base_reg_rtx, offset_rtx))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg_rtx,
+			      gen_rtx_PLUS (Pmode, base_reg_rtx,
+					    offset_rtx)));
+    }
+
+  return tmp_reg_rtx;
+}
+
+
+/* The stack frame looks like this:
+         +-------------+
+         |    ...      | 
+         |  incoming   | 
+         | stack args  | 
+   AP -> +-------------+
+         | caller's HFP|
+         +-------------+
+         | lr save     |
+  HFP -> +-------------+
+         |  var args   | 
+         |  reg save   | crtl->args.pretend_args_size bytes
+         +-------------+
+         |    ...      | 
+         | saved regs  | tilepro_saved_regs_size() bytes
+   FP -> +-------------+
+         |    ...      | 
+         |   vars      | get_frame_size() bytes
+         +-------------+
+         |    ...      | 
+         |  outgoing   | 
+         |  stack args | crtl->outgoing_args_size bytes
+         +-------------+
+         | HFP         | 4 bytes (only here if nonleaf / alloca)
+         +-------------+
+         | callee lr   | 4 bytes (only here if nonleaf / alloca)
+         | save        | 
+   SP -> +-------------+
+
+  HFP == incoming SP.
+
+  For functions with a frame larger than 32767 bytes, or which use
+  alloca (), r52 is used as a frame pointer.  Otherwise there is no
+  frame pointer.
+
+  FP is saved at SP+4 before calling a subroutine so the
+  callee can chain.  */
+void
+tilepro_expand_prologue (void)
+{
+#define ROUND_ROBIN_SIZE 4
+  /* We round-robin through four scratch registers to hold temporary
+     addresses for saving registers, to make instruction scheduling
+     easier.  */
+  rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
+    NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
+  };
+  rtx insn, cfa;
+  unsigned int which_scratch;
+  int offset, start_offset, regno;
+
+  /* A register that holds a copy of the incoming fp.  */
+  int fp_copy_regno = -1;
+
+  /* A register that holds a copy of the incoming sp.  */
+  int sp_copy_regno = -1;
+
+  /* Next scratch register number to hand out (postdecrementing).  */
+  int next_scratch_regno = 29;
+
+  int total_size = compute_total_frame_size ();
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = total_size;
+
+  /* Save lr first in its special location because code after this
+     might use the link register as a scratch register.  */
+  if (df_regs_ever_live_p (TILEPRO_LINK_REGNUM) || crtl->calls_eh_return)
+    FRP (frame_emit_store (TILEPRO_LINK_REGNUM, TILEPRO_LINK_REGNUM,
+			   stack_pointer_rtx, stack_pointer_rtx, 0));
+
+  if (total_size == 0)
+    {
+      /* Load the PIC register if needed.  */
+      if (flag_pic && crtl->uses_pic_offset_table)
+	load_pic_register (false);
+
+      return;
+    }
+
+  cfa = stack_pointer_rtx;
+
+  if (frame_pointer_needed)
+    {
+      fp_copy_regno = next_scratch_regno--;
+
+      /* Copy the old frame pointer aside so we can save it later.  */
+      insn = FRP (emit_move_insn (gen_rtx_REG (word_mode, fp_copy_regno),
+				  hard_frame_pointer_rtx));
+      add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+      /* Set up the frame pointer.  */
+      insn = FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+      add_reg_note (insn, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
+      cfa = hard_frame_pointer_rtx;
+      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
+
+      /* fp holds a copy of the incoming sp, in case we need to store
+         it.  */
+      sp_copy_regno = HARD_FRAME_POINTER_REGNUM;
+    }
+  else if (!tilepro_current_function_is_leaf ())
+    {
+      /* Copy the old stack pointer aside so we can save it later.  */
+      sp_copy_regno = next_scratch_regno--;
+      emit_move_insn (gen_rtx_REG (Pmode, sp_copy_regno),
+		      stack_pointer_rtx);
+    }
+
+  if (tilepro_current_function_is_leaf ())
+    {
+      /* No need to store chain pointer to caller's frame.  */
+      emit_sp_adjust (-total_size, &next_scratch_regno,
+		      !frame_pointer_needed, NULL_RTX);
+    }
+  else
+    {
+      /* Save the frame pointer (incoming sp value) to support
+         backtracing.  First we need to create an rtx with the store
+         address.  */
+      rtx chain_addr = gen_rtx_REG (Pmode, next_scratch_regno--);
+      rtx size_rtx = gen_int_si (-(total_size - UNITS_PER_WORD));
+
+      if (add_operand (size_rtx, Pmode))
+	{
+	  /* Expose more parallelism by computing this value from the
+	     original stack pointer, not the one after we have pushed
+	     the frame.  */
+	  rtx p = gen_rtx_PLUS (Pmode, stack_pointer_rtx, size_rtx);
+	  emit_insn (gen_rtx_SET (VOIDmode, chain_addr, p));
+	  emit_sp_adjust (-total_size, &next_scratch_regno,
+			  !frame_pointer_needed, NULL_RTX);
+	}
+      else
+	{
+	  /* The stack frame is large, so just store the incoming sp
+	     value at *(new_sp + UNITS_PER_WORD).  */
+	  rtx p;
+	  emit_sp_adjust (-total_size, &next_scratch_regno,
+			  !frame_pointer_needed, NULL_RTX);
+	  p = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			    GEN_INT (UNITS_PER_WORD));
+	  emit_insn (gen_rtx_SET (VOIDmode, chain_addr, p));
+	}
+
+      /* Save our frame pointer for backtrace chaining.  */
+      emit_insn (gen_movsi (gen_frame_mem (SImode, chain_addr),
+			    gen_rtx_REG (SImode, sp_copy_regno)));
+    }
+
+  /* Compute where to start storing registers we need to save.  */
+  start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
+  offset = start_offset;
+
+  /* Store all registers that need saving.  */
+  which_scratch = 0;
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+    if (need_to_save_reg (regno))
+      {
+	rtx r = reg_save_addr[which_scratch];
+	int from_regno;
+	int cfa_offset = frame_pointer_needed ? offset : total_size + offset;
+
+	if (r == NULL_RTX)
+	  {
+	    rtx p = compute_frame_addr (offset, &next_scratch_regno);
+	    r = gen_rtx_REG (word_mode, next_scratch_regno--);
+	    reg_save_addr[which_scratch] = r;
+
+	    emit_insn (gen_rtx_SET (VOIDmode, r, p));
+	  }
+	else
+	  {
+	    /* Advance to the next stack slot to store this register.  */
+	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
+	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
+	    emit_insn (gen_rtx_SET (VOIDmode, r, p));
+	  }
+
+	/* Save this register to the stack (but use the old fp value
+	   we copied aside if appropriate).  */
+	from_regno = (fp_copy_regno >= 0
+		      && regno ==
+		      HARD_FRAME_POINTER_REGNUM) ? fp_copy_regno : regno;
+	FRP (frame_emit_store (from_regno, regno, r, cfa, cfa_offset));
+
+	offset -= UNITS_PER_WORD;
+	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
+      }
+
+  /* If profiling, force that to happen after the frame is set up.  */
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+
+  /* Load the PIC register if needed.  */
+  if (flag_pic && crtl->uses_pic_offset_table)
+    load_pic_register (false);
+}
+
+
+/* Implement the epilogue and sibcall_epilogue patterns.  SIBCALL_P is
+   true for a sibcall_epilogue pattern, and false for an epilogue
+   pattern.  */
+void
+tilepro_expand_epilogue (bool sibcall_p)
+{
+  /* We round-robin through four scratch registers to hold temporary
+     addresses for saving registers, to make instruction scheduling
+     easier.  */
+  rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
+    NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
+  };
+  rtx last_insn, insn;
+  unsigned int which_scratch;
+  int offset, start_offset, regno;
+  rtx cfa_restores = NULL_RTX;
+
+  /* A register that holds a copy of the incoming fp.  */
+  int fp_copy_regno = -1;
+
+  /* Next scratch register number to hand out (postdecrementing).  */
+  int next_scratch_regno = 29;
+
+  int total_size = compute_total_frame_size ();
+
+  last_insn = get_last_insn ();
+
+  /* Load lr first since we are going to need it first.  */
+  insn = NULL;
+  if (df_regs_ever_live_p (TILEPRO_LINK_REGNUM))
+    {
+      insn = frame_emit_load (TILEPRO_LINK_REGNUM,
+			      compute_frame_addr (0, &next_scratch_regno),
+			      &cfa_restores);
+    }
+
+  if (total_size == 0)
+    {
+      if (insn)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  REG_NOTES (insn) = cfa_restores;
+	}
+      goto done;
+    }
+
+  /* Compute where to start restoring registers.  */
+  start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
+  offset = start_offset;
+
+  if (frame_pointer_needed)
+    fp_copy_regno = next_scratch_regno--;
+
+  /* Restore all callee-saved registers.  */
+  which_scratch = 0;
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+    if (need_to_save_reg (regno))
+      {
+	rtx r = reg_save_addr[which_scratch];
+	if (r == NULL_RTX)
+	  {
+	    r = compute_frame_addr (offset, &next_scratch_regno);
+	    reg_save_addr[which_scratch] = r;
+	  }
+	else
+	  {
+	    /* Advance to the next stack slot to store this
+	       register.  */
+	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
+	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
+	    emit_insn (gen_rtx_SET (VOIDmode, r, p));
+	  }
+
+	if (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
+	  frame_emit_load (fp_copy_regno, r, NULL);
+	else
+	  frame_emit_load (regno, r, &cfa_restores);
+
+	offset -= UNITS_PER_WORD;
+	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
+      }
+
+  if (!tilepro_current_function_is_leaf ())
+    cfa_restores =
+      alloc_reg_note (REG_CFA_RESTORE, stack_pointer_rtx, cfa_restores);
+
+  emit_insn (gen_blockage ());
+
+  if (frame_pointer_needed)
+    {
+      /* Restore the old stack pointer by copying from the frame
+         pointer.  */
+      insn = emit_insn (gen_sp_restore (stack_pointer_rtx,
+					hard_frame_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+    }
+  else
+    {
+      insn = emit_sp_adjust (total_size, &next_scratch_regno, true,
+			     cfa_restores);
+    }
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx,
+			      EH_RETURN_STACKADJ_RTX));
+
+  /* Restore the old frame pointer.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx,
+			     gen_rtx_REG (Pmode, fp_copy_regno));
+      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+    }
+
+  /* Mark the pic registers as live outside of the function.  */
+  if (flag_pic)
+    {
+      emit_use (cfun->machine->text_label_rtx);
+      emit_use (cfun->machine->got_rtx);
+    }
+
+done:
+  if (!sibcall_p)
+    {
+      /* Emit the actual 'return' instruction.  */
+      emit_jump_insn (gen__return ());
+    }
+  else
+    {
+      emit_use (gen_rtx_REG (Pmode, TILEPRO_LINK_REGNUM));
+    }
+
+  /* Mark all insns we just emitted as frame-related.  */
+  for (; last_insn != NULL_RTX; last_insn = next_insn (last_insn))
+    RTX_FRAME_RELATED_P (last_insn) = 1;
+}
+
+#undef ROUND_ROBIN_SIZE
+
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  */
+int
+tilepro_initial_elimination_offset (int from, int to)
+{
+  int total_size = compute_total_frame_size ();
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      return (total_size - crtl->args.pretend_args_size
+	      - tilepro_saved_regs_size ());
+    }
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    {
+      return -(crtl->args.pretend_args_size + tilepro_saved_regs_size ());
+    }
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      return STACK_POINTER_OFFSET + total_size;
+    }
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    {
+      return STACK_POINTER_OFFSET;
+    }
+  else
+    gcc_unreachable ();
+}
+
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+rtx
+tilepro_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, TILEPRO_LINK_REGNUM);
+}
+
+
+/* Implement EH_RETURN_HANDLER_RTX.  */
+rtx
+tilepro_eh_return_handler_rtx (void)
+{
+  /* The MEM needs to be volatile to prevent it from being
+     deleted.  */
+  rtx tmp = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
+}
+
+
+
+/* Registers  */
+
+/* Implemnet TARGET_CONDITIONAL_REGISTER_USAGE.  */
+static void
+tilepro_conditional_register_usage (void)
+{
+  global_regs[TILEPRO_NETORDER_REGNUM] = 1;
+  /* TILEPRO_PIC_TEXT_LABEL_REGNUM is conditionally used.  It is a
+     member of fixed_regs, and therefore must be member of
+     call_used_regs, but it is not a member of call_really_used_regs[]
+     because it is not clobbered by a call.  */
+  if (TILEPRO_PIC_TEXT_LABEL_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[TILEPRO_PIC_TEXT_LABEL_REGNUM] = 1;
+      call_used_regs[TILEPRO_PIC_TEXT_LABEL_REGNUM] = 1;
+    }
+  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+static bool
+tilepro_frame_pointer_required (void)
+{
+  return crtl->calls_eh_return || cfun->calls_alloca;
+}
+
+
+
+/* Scheduling and reorg  */
+
+/* Return the length of INSN.  LENGTH is the initial length computed
+   by attributes in the machine-description file.  This is where we
+   account for bundles.  */
+int
+tilepro_adjust_insn_length (rtx insn, int length)
+{
+  enum machine_mode mode = GET_MODE (insn);
+
+  /* A non-termininating instruction in a bundle has length 0.  */
+  if (mode == SImode)
+    return 0;
+
+  /* By default, there is not length adjustment.  */
+  return length;
+}
+
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+static int
+tilepro_issue_rate (void)
+{
+  return 3;
+}
+
+
+/* Return the rtx for the jump target.  */
+static rtx
+get_jump_target (rtx branch)
+{
+  if (CALL_P (branch))
+    {
+      rtx call;
+      call = PATTERN (branch);
+
+      if (GET_CODE (call) == PARALLEL)
+	call = XVECEXP (call, 0, 0);
+
+      if (GET_CODE (call) == SET)
+	call = SET_SRC (call);
+
+      if (GET_CODE (call) == CALL)
+	return XEXP (XEXP (call, 0), 0);
+    }
+  return 0;
+}
+
+/* Implement TARGET_SCHED_ADJUST_COST.  */
+static int
+tilepro_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  /* If we have a true dependence, INSN is a call, and DEP_INSN
+     defines a register that is needed by the call (argument or stack
+     pointer), set its latency to 0 so that it can be bundled with
+     the call.  Explicitly check for and exclude the case when
+     DEP_INSN defines the target of the jump.  */
+  if (CALL_P (insn) && REG_NOTE_KIND (link) == REG_DEP_TRUE)
+    {
+      rtx target = get_jump_target (insn);
+      if (!REG_P (target) || !set_of (target, dep_insn))
+	return 0;
+    }
+
+  return cost;
+}
+
+
+/* Skip over irrelevant NOTEs and such and look for the next insn we
+   would consider bundling.  */
+static rtx
+next_insn_to_bundle (rtx r, rtx end)
+{
+  for (; r != end; r = NEXT_INSN (r))
+    {
+      if (NONDEBUG_INSN_P (r)
+	  && GET_CODE (PATTERN (r)) != USE
+	  && GET_CODE (PATTERN (r)) != CLOBBER)
+	return r;
+    }
+
+  return NULL_RTX;
+}
+
+
+/* Go through all insns, and use the information generated during
+   scheduling to generate SEQUENCEs to represent bundles of
+   instructions issued simultaneously.  */
+static void
+tilepro_gen_bundles (void)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+  {
+    rtx insn, next;
+    rtx end = NEXT_INSN (BB_END (bb));
+
+    for (insn = next_insn_to_bundle (BB_HEAD (bb), end); insn; insn = next)
+      {
+	next = next_insn_to_bundle (NEXT_INSN (insn), end);
+
+	/* Never wrap {} around inline asm.  */
+	if (GET_CODE (PATTERN (insn)) != ASM_INPUT)
+	  {
+	    if (next == NULL_RTX || GET_MODE (next) == TImode
+		/* NOTE: The scheduler incorrectly believes a call
+		   insn can execute in the same cycle as the insn
+		   after the call.  This is of course impossible.
+		   Really we need to fix the scheduler somehow, so
+		   the code after the call gets scheduled
+		   optimally.  */
+		|| CALL_P (insn))
+	      {
+		/* Mark current insn as the end of a bundle.  */
+		PUT_MODE (insn, QImode);
+	      }
+	    else
+	      {
+		/* Mark it as part of a bundle.  */
+		PUT_MODE (insn, SImode);
+	      }
+	  }
+      }
+  }
+}
+
+
+/* Helper function for tilepro_fixup_pcrel_references.  */
+static void
+replace_pc_relative_symbol_ref (rtx insn, rtx opnds[4], bool first_insn_p)
+{
+  rtx new_insns;
+
+  start_sequence ();
+
+  if (flag_pic == 1)
+    {
+      if (!first_insn_p)
+	{
+	  emit_insn (gen_add_got16 (opnds[0], tilepro_got_rtx (),
+				    opnds[2]));
+	  emit_insn (gen_insn_lw (opnds[0], opnds[0]));
+	}
+    }
+  else
+    {
+      if (first_insn_p)
+	{
+	  emit_insn (gen_addhi_got32 (opnds[0], tilepro_got_rtx (),
+				      opnds[2]));
+	}
+      else
+	{
+	  emit_insn (gen_addlo_got32 (opnds[0], opnds[1], opnds[2]));
+	  emit_insn (gen_insn_lw (opnds[0], opnds[0]));
+	}
+    }
+
+  new_insns = get_insns ();
+  end_sequence ();
+
+  if (new_insns)
+    emit_insn_before (new_insns, insn);
+
+  delete_insn (insn);
+}
+
+
+/* Returns whether INSN is a pc-relative addli insn.   */
+static bool
+match_addli_pcrel (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx unspec;
+
+  if (GET_CODE (pattern) != SET)
+    return false;
+
+  if (GET_CODE (SET_SRC (pattern)) != LO_SUM)
+    return false;
+
+  if (GET_CODE (XEXP (SET_SRC (pattern), 1)) != CONST)
+    return false;
+
+  unspec = XEXP (XEXP (SET_SRC (pattern), 1), 0);
+
+  return (GET_CODE (unspec) == UNSPEC
+	  && XINT (unspec, 1) == UNSPEC_PCREL_SYM);
+}
+
+
+/* Helper function for tilepro_fixup_pcrel_references.  */
+static void
+replace_addli_pcrel (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx set_src;
+  rtx unspec;
+  rtx opnds[4];
+  bool first_insn_p;
+
+  gcc_assert (GET_CODE (pattern) == SET);
+  opnds[0] = SET_DEST (pattern);
+
+  set_src = SET_SRC (pattern);
+  gcc_assert (GET_CODE (set_src) == LO_SUM);
+  gcc_assert (GET_CODE (XEXP (set_src, 1)) == CONST);
+  opnds[1] = XEXP (set_src, 0);
+
+  unspec = XEXP (XEXP (set_src, 1), 0);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+  gcc_assert (XINT (unspec, 1) == UNSPEC_PCREL_SYM);
+  opnds[2] = XVECEXP (unspec, 0, 0);
+  opnds[3] = XVECEXP (unspec, 0, 1);
+
+  /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
+  if (GET_CODE (opnds[2]) != SYMBOL_REF)
+    return;
+
+  first_insn_p = (opnds[1] == tilepro_text_label_rtx ());
+
+  replace_pc_relative_symbol_ref (insn, opnds, first_insn_p);
+}
+
+
+/* Returns whether INSN is a pc-relative auli insn.   */
+static bool
+match_auli_pcrel (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx high;
+  rtx unspec;
+
+  if (GET_CODE (pattern) != SET)
+    return false;
+
+  if (GET_CODE (SET_SRC (pattern)) != PLUS)
+    return false;
+
+  high = XEXP (SET_SRC (pattern), 1);
+
+  if (GET_CODE (high) != HIGH
+      || GET_CODE (XEXP (high, 0)) != CONST)
+    return false;
+
+  unspec = XEXP (XEXP (high, 0), 0);
+
+  return (GET_CODE (unspec) == UNSPEC
+	  && XINT (unspec, 1) == UNSPEC_PCREL_SYM);
+}
+
+
+/* Helper function for tilepro_fixup_pcrel_references.  */
+static void
+replace_auli_pcrel (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx set_src;
+  rtx high;
+  rtx unspec;
+  rtx opnds[4];
+  bool first_insn_p;
+
+  gcc_assert (GET_CODE (pattern) == SET);
+  opnds[0] = SET_DEST (pattern);
+
+  set_src = SET_SRC (pattern);
+  gcc_assert (GET_CODE (set_src) == PLUS);
+  opnds[1] = XEXP (set_src, 0);
+
+  high = XEXP (set_src, 1);
+  gcc_assert (GET_CODE (high) == HIGH);
+  gcc_assert (GET_CODE (XEXP (high, 0)) == CONST);
+
+  unspec = XEXP (XEXP (high, 0), 0);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+  gcc_assert (XINT (unspec, 1) == UNSPEC_PCREL_SYM);
+  opnds[2] = XVECEXP (unspec, 0, 0);
+  opnds[3] = XVECEXP (unspec, 0, 1);
+
+  /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
+  if (GET_CODE (opnds[2]) != SYMBOL_REF)
+    return;
+
+  first_insn_p = (opnds[1] == tilepro_text_label_rtx ());
+
+  replace_pc_relative_symbol_ref (insn, opnds, first_insn_p);
+}
+
+
+/* We generate PC relative SYMBOL_REFs as an optimization, to avoid
+   going through the GOT when the symbol is local to the compilation
+   unit.  But such a symbol requires that the common text_label that
+   we generate at the beginning of the function be in the same section
+   as the reference to the SYMBOL_REF.  This may not be true if we
+   generate hot/cold sections.  This function looks for such cases and
+   replaces such references with the longer sequence going through the
+   GOT.
+
+   We expect one of the following two instruction sequences:
+   addli tmp1, txt_label_reg, lo16(sym - txt_label)
+   auli  tmp2,          tmp1, ha16(sym - txt_label)
+
+   auli  tmp1, txt_label_reg, ha16(sym - txt_label)
+   addli tmp2,          tmp1, lo16(sym - txt_label)
+
+   If we're compiling -fpic, we replace the first instruction with
+   nothing, and the second instruction with:
+
+   addli tmp2, got_rtx, got(sym)
+   lw    tmp2,    tmp2
+   
+   If we're compiling -fPIC, we replace the first instruction with:
+
+   auli  tmp1, got_rtx, got_ha16(sym)
+
+   and the second instruction with:
+
+   addli tmp2,    tmp1, got_lo16(sym)
+   lw    tmp2,    tmp2
+
+   Note that we're careful to disturb the instruction sequence as
+   little as possible, since it's very late in the compilation
+   process.
+*/
+static void
+tilepro_fixup_pcrel_references (void)
+{
+  rtx insn, next_insn;
+  bool same_section_as_entry = true;
+
+  for (insn = get_insns (); insn; insn = next_insn)
+    {
+      next_insn = NEXT_INSN (insn);
+
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	{
+	  same_section_as_entry = !same_section_as_entry;
+	  continue;
+	}
+
+      if (same_section_as_entry)
+      	continue;
+
+      if (!(INSN_P (insn)
+	    && GET_CODE (PATTERN (insn)) != USE
+	    && GET_CODE (PATTERN (insn)) != CLOBBER))
+	continue;
+
+      if (match_addli_pcrel (insn))
+	replace_addli_pcrel (insn);
+      else if (match_auli_pcrel (insn))
+	replace_auli_pcrel (insn);
+    }
+}
+
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+   three-instruction bundle.  */
+static void
+reorder_var_tracking_notes (void)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+  {
+    rtx insn, next;
+    rtx queue = NULL_RTX;
+    bool in_bundle = false;
+
+    for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
+      {
+	next = NEXT_INSN (insn);
+
+	if (INSN_P (insn))
+	  {
+	    /* Emit queued up notes at the last instruction of a bundle.  */
+	    if (GET_MODE (insn) == QImode)
+	      {
+		while (queue)
+		  {
+		    rtx next_queue = PREV_INSN (queue);
+		    PREV_INSN (NEXT_INSN (insn)) = queue;
+		    NEXT_INSN (queue) = NEXT_INSN (insn);
+		    NEXT_INSN (insn) = queue;
+		    PREV_INSN (queue) = insn;
+		    queue = next_queue;
+		  }
+		in_bundle = false;
+	      }
+	    else if (GET_MODE (insn) == SImode)
+	      in_bundle = true;
+	  }
+	else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+	  {
+	    if (in_bundle)
+	      {
+		rtx prev = PREV_INSN (insn);
+		PREV_INSN (next) = prev;
+		NEXT_INSN (prev) = next;
+
+		PREV_INSN (insn) = queue;
+		queue = insn;
+	      }
+	  }
+      }
+  }
+}
+
+
+/* Perform machine dependent operations on the rtl chain INSNS.  */
+static void
+tilepro_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it
+     now.  */
+  compute_bb_for_insn ();
+
+  if (flag_reorder_blocks_and_partition)
+    {
+      tilepro_fixup_pcrel_references ();
+    }
+
+  if (flag_schedule_insns_after_reload)
+    {
+      split_all_insns ();
+
+      timevar_push (TV_SCHED2);
+      schedule_insns ();
+      timevar_pop (TV_SCHED2);
+
+      /* Examine the schedule to group into bundles.  */
+      tilepro_gen_bundles ();
+    }
+
+  df_analyze ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      reorder_var_tracking_notes ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+
+  df_finish_pass (false);
+}
+
+
+
+/* Assembly  */
+
+/* Select a format to encode pointers in exception handling data.
+   CODE is 0 for data, 1 for code labels, 2 for function pointers.
+   GLOBAL is true if the symbol may be affected by dynamic
+   relocations.  */
+int
+tilepro_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
+{
+  return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  */
+static void
+tilepro_asm_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			     tree function)
+{
+  rtx this_rtx, insn, funexp;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in $1.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 0);
+
+  /* Add DELTA to THIS_RTX.  */
+  emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta)));
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset)
+    {
+      rtx tmp;
+
+      tmp = gen_rtx_REG (Pmode, 29);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (vcall_offset)));
+
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+
+      emit_insn (gen_addsi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.
+
+     We don't currently bundle, but the instruciton sequence is all
+     serial except for the tail call, so we're only wasting one cycle.
+   */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Stop pretending to be a post-reload pass.  */
+  reload_completed = 0;
+}
+
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+static void
+tilepro_asm_trampoline_template (FILE *file)
+{
+  fprintf (file, "\tlnk   r10\n");
+  fprintf (file, "\taddi  r10, r10, 32\n");
+  fprintf (file, "\tlwadd r11, r10, %d\n", GET_MODE_SIZE (ptr_mode));
+  fprintf (file, "\tlw    r10, r10\n");
+  fprintf (file, "\tjr    r11\n");
+  fprintf (file, "\t.word 0 # <function address>\n");
+  fprintf (file, "\t.word 0 # <static chain value>\n");
+}
+
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+static void
+tilepro_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx fnaddr, chaddr;
+  rtx mem;
+  rtx begin_addr, end_addr;
+  int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
+
+  fnaddr = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
+  chaddr = copy_to_reg (static_chain);
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, ptr_mode,
+			TRAMPOLINE_SIZE - 2 * ptr_mode_size);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, ptr_mode,
+			TRAMPOLINE_SIZE - ptr_mode_size);
+  emit_move_insn (mem, chaddr);
+
+  /* Get pointers to the beginning and end of the code block.  */
+  begin_addr = force_reg (Pmode, XEXP (m_tramp, 0));
+  end_addr = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0),
+					      TRAMPOLINE_SIZE));
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, 2, begin_addr, Pmode,
+		     end_addr, Pmode);
+}
+
+
+/* Implement TARGET_PRINT_OPERAND.  */
+static void
+tilepro_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'c':
+      /* Print the compare operator opcode for conditional moves. */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("z", file);
+	  break;
+	case NE:
+	  fputs ("nz", file);
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%c operand");
+	}
+      return;
+
+    case 'C':
+      /* Print the compare operator opcode for conditional moves. */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("nz", file);
+	  break;
+	case NE:
+	  fputs ("z", file);
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%C operand");
+	}
+      return;
+
+    case 'h':
+      {
+	/* Print the high 16 bits of a 32-bit constant.  */
+	HOST_WIDE_INT i;
+	if (CONST_INT_P (x))
+	  i = INTVAL (x);
+	else if (GET_CODE (x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else
+	  {
+	    output_operand_lossage ("invalid %%h operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i >> 16, HImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 'H':
+      {
+	rtx addr = NULL;
+	const char *opstr = NULL;
+	bool pcrel = false;
+	if (GET_CODE (x) == CONST
+	    && GET_CODE (XEXP (x, 0)) == UNSPEC)
+	  {
+	    addr = XVECEXP (XEXP (x, 0), 0, 0);
+	    switch (XINT (XEXP (x, 0), 1))
+	    {
+	    case UNSPEC_GOT32_SYM:
+	      opstr = "got_ha16";
+	      break;
+	    case UNSPEC_PCREL_SYM:
+	      opstr = "ha16";
+	      pcrel = true;
+	      break;
+	    case UNSPEC_TLS_GD:
+	      opstr = "tls_gd_ha16";
+	      break;
+	    case UNSPEC_TLS_IE:
+	      opstr = "tls_ie_ha16";
+	      break;
+	    case UNSPEC_TLS_LE:
+	      opstr = "tls_le_ha16";
+	      break;
+	    default:
+	      output_operand_lossage ("invalid %%H operand");
+	    }
+	  }
+	else
+	  {
+	    addr = x;
+	    opstr = "ha16";
+	  }
+
+	fputs (opstr, file);
+	fputc ('(', file);
+	output_addr_const (file, addr);
+
+	if (pcrel)
+	  {
+	    rtx addr2 = XVECEXP (XEXP (x, 0), 0, 1);
+	    fputs (" - " , file);
+	    output_addr_const (file, addr2);
+	  }
+
+	fputc (')', file);
+	return;
+      }
+
+    case 'I':
+      /* Print an auto-inc memory operand.  */
+      if (!MEM_P (x))
+	{
+	  output_operand_lossage ("invalid %%I operand");
+	  return;
+	}
+
+      output_memory_reference_mode = GET_MODE (x);
+      output_memory_autoinc_first = true;
+      output_address (XEXP (x, 0));
+      output_memory_reference_mode = VOIDmode;
+      return;
+
+    case 'i':
+      /* Print an auto-inc memory operand.  */
+      if (!MEM_P (x))
+	{
+	  output_operand_lossage ("invalid %%i operand");
+	  return;
+	}
+
+      output_memory_reference_mode = GET_MODE (x);
+      output_memory_autoinc_first = false;
+      output_address (XEXP (x, 0));
+      output_memory_reference_mode = VOIDmode;
+      return;
+
+    case 'j':
+      {
+	/* Print the low 8 bits of a constant.  */
+	HOST_WIDE_INT i;
+	if (CONST_INT_P (x))
+	  i = INTVAL (x);
+	else if (GET_CODE (x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else if (GET_CODE (x) == CONST_VECTOR
+		 && CONST_INT_P (CONST_VECTOR_ELT (x, 0)))
+	  i = INTVAL (CONST_VECTOR_ELT (x, 0));
+	else
+	  {
+	    output_operand_lossage ("invalid %%j operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i, QImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 'L':
+      {
+	rtx addr = NULL;
+	const char *opstr = NULL;
+	bool pcrel = false;
+	if (GET_CODE (x) == CONST
+	    && GET_CODE (XEXP (x, 0)) == UNSPEC)
+	  {
+	    addr = XVECEXP (XEXP (x, 0), 0, 0);
+	    switch (XINT (XEXP (x, 0), 1))
+	    {
+	    case UNSPEC_GOT16_SYM:
+	      opstr = "got";
+	      break;
+	    case UNSPEC_GOT32_SYM:
+	      opstr = "got_lo16";
+	      break;
+	    case UNSPEC_PCREL_SYM:
+	      opstr = "lo16";
+	      pcrel = true;
+	      break;
+	    case UNSPEC_TLS_GD:
+	      opstr = "tls_gd_lo16";
+	      break;
+	    case UNSPEC_TLS_IE:
+	      opstr = "tls_ie_lo16";
+	      break;
+	    case UNSPEC_TLS_LE:
+	      opstr = "tls_le_lo16";
+	      break;
+	    default:
+	      output_operand_lossage ("invalid %%L operand");
+	    }
+	  }
+	else
+	  {
+	    addr = x;
+	    opstr = "lo16";
+	  }
+
+	fputs (opstr, file);
+	fputc ('(', file);
+	output_addr_const (file, addr);
+
+	if (pcrel)
+	  {
+	    rtx addr2 = XVECEXP (XEXP (x, 0), 0, 1);
+	    fputs (" - " , file);
+	    output_addr_const (file, addr2);
+	  }
+
+	fputc (')', file);
+	return;
+      }
+
+    case 'p':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
+	    fprintf (file, "plt(");
+	  output_addr_const (file, x);
+	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
+	    fprintf (file, ")");
+	}
+      else
+	output_addr_const (file, x);
+      return;
+
+    case 'P':
+      {
+	/* Print a 32-bit constant plus one.  */
+	HOST_WIDE_INT i;
+	if (!CONST_INT_P (x))
+	  {
+	    output_operand_lossage ("invalid %%P operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (INTVAL (x) + 1, SImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 'M':
+      {
+	/* Print an mm-style bit range.  */
+	int first_bit, last_bit;
+
+	if (!CONST_INT_P (x)
+	    || !tilepro_bitfield_operand_p (INTVAL (x), &first_bit,
+					    &last_bit))
+	  {
+	    output_operand_lossage ("invalid %%M operand");
+	    return;
+	  }
+
+	fprintf (file, "%d, %d", first_bit, last_bit);
+	return;
+      }
+
+    case 'N':
+      {
+	const char *reg = NULL;
+
+	/* Print a network register.  */
+	if (!CONST_INT_P (x))
+	  {
+	    output_operand_lossage ("invalid %%N operand");
+	    return;
+	  }
+
+	switch (INTVAL (x))
+	  {
+	  case TILEPRO_NETREG_IDN0: reg = "idn0"; break;
+	  case TILEPRO_NETREG_IDN1: reg = "idn1"; break;
+	  case TILEPRO_NETREG_SN:   reg = "sn";   break;
+	  case TILEPRO_NETREG_UDN0: reg = "udn0"; break;
+	  case TILEPRO_NETREG_UDN1: reg = "udn1"; break;
+	  case TILEPRO_NETREG_UDN2: reg = "udn2"; break;
+	  case TILEPRO_NETREG_UDN3: reg = "udn3"; break;
+	  default: gcc_unreachable ();
+	  }
+
+	fprintf (file, reg);
+	return;
+      }
+
+    case 't':
+      {
+	/* Log base 2 of a power of two.  */
+	HOST_WIDE_INT i;
+	HOST_WIDE_INT n;
+
+	if (!CONST_INT_P (x))
+	  {
+	    output_operand_lossage ("invalid %%t operand");
+	    return;
+	  }
+	n = trunc_int_for_mode (INTVAL (x), SImode);
+	i = exact_log2 (n);
+	if (i < 0)
+	  {
+	    output_operand_lossage ("invalid %%t operand '"
+				    HOST_WIDE_INT_PRINT_DEC "'", n);
+	    return;
+	  }
+
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+      break;
+
+    case 'r':
+      /* In this case we need a register.  Use 'zero' if the
+         operand is const0_rtx.  */
+      if (x == const0_rtx
+	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
+	{
+	  fputs ("zero", file);
+	  return;
+	}
+      else if (!REG_P (x))
+	{
+	  output_operand_lossage ("invalid %%r operand");
+	  return;
+	}
+      /* FALLTHRU */
+
+    case 0:
+      if (REG_P (x))
+	{
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  return;
+	}
+      else if (MEM_P (x))
+	{
+	  output_memory_reference_mode = VOIDmode;
+	  output_address (XEXP (x, 0));
+	  return;
+	}
+      else
+	{
+	  output_addr_const (file, x);
+	  return;
+	}
+      break;
+    }
+
+  debug_rtx (x);
+  output_operand_lossage ("unable to print out operand yet; code == %d (%c)",
+			  code, code);
+}
+
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+static void
+tilepro_print_operand_address (FILE *file, rtx addr)
+{
+  if (GET_CODE (addr) == POST_DEC
+      || GET_CODE (addr) == POST_INC)
+    {
+      int offset = GET_MODE_SIZE (output_memory_reference_mode);
+
+      gcc_assert (output_memory_reference_mode != VOIDmode);
+
+      if (output_memory_autoinc_first)
+	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
+      else
+	fprintf (file, "%d",
+		 GET_CODE (addr) == POST_DEC ? -offset : offset);
+    }
+  else if (GET_CODE (addr) == POST_MODIFY)
+    {
+      gcc_assert (output_memory_reference_mode != VOIDmode);
+
+      gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
+
+      if (output_memory_autoinc_first)
+	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 INTVAL (XEXP (XEXP (addr, 1), 1)));
+    }
+  else
+    tilepro_print_operand (file, addr, 'r');
+}
+
+
+/* Machine mode of current insn, for determining curly brace
+   placement.  */
+static enum machine_mode insn_mode;
+
+
+/* Implement FINAL_PRESCAN_INSN.  This is used to emit bundles.  */
+void
+tilepro_final_prescan_insn (rtx insn)
+{
+  /* Record this for tilepro_asm_output_opcode to examine.  */
+  insn_mode = GET_MODE (insn);
+}
+
+
+/* While emitting asm, are we currently inside '{' for a bundle? */
+static bool tilepro_in_bundle = false;
+
+/* Implement ASM_OUTPUT_OPCODE.  Prepend/append curly braces as
+   appropriate given the bundling information recorded by
+   tilepro_gen_bundles.  */
+const char *
+tilepro_asm_output_opcode (FILE *stream, const char *code)
+{
+  bool pseudo = !strcmp (code, "pseudo");
+
+  if (!tilepro_in_bundle && insn_mode == SImode)
+    {
+      /* Start a new bundle.  */
+      fprintf (stream, "{\n\t");
+      tilepro_in_bundle = true;
+    }
+
+  if (tilepro_in_bundle && insn_mode == QImode)
+    {
+      /* Close an existing bundle.  */
+      static char buf[100];
+
+      gcc_assert (strlen (code) + 3 + 1 < sizeof (buf));
+
+      strcpy (buf, pseudo ? "" : code);
+      strcat (buf, "\n\t}");
+      tilepro_in_bundle = false;
+
+      return buf;
+    }
+  else
+    {
+      return pseudo ? "" : code;
+    }
+}
+
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+void
+tilepro_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+  if (tilepro_in_bundle)
+    {
+      fprintf (file, "\t}\n");
+    }
+
+  if (flag_pic)
+    {
+      fprintf (file,
+	       "\t{\n"
+	       "\tmove\tr10, lr\n"
+	       "\tjal\tplt(%s)\n"
+	       "\t}\n", MCOUNT_NAME);
+    }
+  else
+    {
+      fprintf (file,
+	       "\t{\n"
+	       "\tmove\tr10, lr\n"
+	       "\tjal\t%s\n"
+	       "\t}\n", MCOUNT_NAME);
+    }
+
+  tilepro_in_bundle = false;
+}
+
+
+/* Implement TARGET_ASM_FILE_END.  */
+static void
+tilepro_file_end (void)
+{
+  if (NEED_INDICATE_EXEC_STACK)
+    file_end_indicate_exec_stack ();
+}
+
+
+#undef  TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE tilepro_option_override
+
+#undef  TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P tilepro_scalar_mode_supported_p
+
+#undef  TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P tile_vector_mode_supported_p
+
+#undef  TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM tilepro_cannot_force_const_mem
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL tilepro_function_ok_for_sibcall
+
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE tilepro_pass_by_reference
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY tilepro_return_in_memory
+
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY tilepro_function_arg_boundary
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG tilepro_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE tilepro_function_arg_advance
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE tilepro_function_value
+
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE tilepro_libcall_value
+
+#undef  TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P tilepro_function_value_regno_p
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE \
+  default_promote_function_mode_always_promote
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
+
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST tilepro_build_builtin_va_list
+
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START tilepro_va_start
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS tilepro_setup_incoming_varargs
+
+#undef  TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR tilepro_gimplify_va_arg_expr
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS tilepro_rtx_costs
+
+/* Limit to what we can reach in one addli.  */
+#undef  TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -32768
+#undef  TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 32767
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P tilepro_legitimate_constant_p
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P tilepro_legitimate_address_p
+
+#undef  TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS tilepro_legitimize_address
+
+#undef  TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS tilepro_delegitimize_address
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  tilepro_init_builtins
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL tilepro_builtin_decl
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN tilepro_expand_builtin
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE tilepro_conditional_register_usage
+
+#undef  TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED tilepro_frame_pointer_required
+
+#undef  TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+#undef  TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE tilepro_issue_rate
+
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST tilepro_sched_adjust_cost
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG tilepro_reorg
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK tilepro_asm_output_mi_thunk
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE tilepro_asm_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT tilepro_trampoline_init
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND tilepro_print_operand
+
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS tilepro_print_operand_address
+
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END tilepro_file_end
+
+#undef  TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-tilepro.h"
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro.h b/gcc-4.9/gcc/config/tilepro/tilepro.h
new file mode 100644
index 000000000..0c5e87c19
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro.h
@@ -0,0 +1,481 @@
+/* Definitions of target machine for GNU compiler for TILEPro.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is used by tilepro_cpu_cpp_builtins to indicate the byte order
+   we're compiling for.  */
+#define TILEPRO_CPU_CPP_ENDIAN_BUILTINS()	\
+  do						\
+    {						\
+      if (BYTES_BIG_ENDIAN)			\
+	builtin_define ("__BIG_ENDIAN__");	\
+      else					\
+	builtin_define ("__LITTLE_ENDIAN__");	\
+    }						\
+  while (0)
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS() \
+  tilepro_cpu_cpp_builtins (pfile)
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+
+/* Target machine storage layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 4
+#define PARM_BOUNDARY 32
+#define STACK_BOUNDARY 64
+#define FUNCTION_BOUNDARY 64
+#define BIGGEST_ALIGNMENT 64
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#define FASTEST_ALIGNMENT 32
+#define BIGGEST_FIELD_ALIGNMENT 64
+
+/* Unaligned moves trap and are very slow.  */
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Make strings word-aligned so strcpy from constants will be
+   faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make local arrays of chars word-aligned for the same reasons.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
+
+
+/* Standard register usage.  */
+
+#define FIRST_PSEUDO_REGISTER (64 + 3)
+
+#define FIXED_REGISTERS \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1}
+
+#define CALL_USED_REGISTERS \
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1}
+
+#define CALL_REALLY_USED_REGISTERS \
+ CALL_USED_REGISTERS
+
+#define REG_ALLOC_ORDER {				\
+      10, 11, 12, 13, 14, /* call used */		\
+      15, 16, 17, 18, 19,				\
+      20, 21, 22, 23, 24,				\
+      25, 26, 27, 28, 29,				\
+							\
+      9, 8, 7, 6, 5,      /* argument */		\
+      4, 3, 2, 1, 0,					\
+							\
+      55,	          /* return address */		\
+							\
+      30, 31, 32, 33, 34, /* call saved registers */	\
+      35, 36, 37, 38, 39,				\
+      40, 41, 42, 43, 44,				\
+      45, 46, 47, 48, 49,				\
+      50, 51,						\
+							\
+      52, 		  /* hard frame pointer */	\
+      53, 54, 		  /* tp, sp */			\
+							\
+      56, 57, 58, 59, 60, /* special purpose */		\
+      61, 62, 63, 64, 65, /* or fake registers */	\
+      66						\
+}
+
+#define HARD_REGNO_NREGS(REGNO, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* All registers can hold all modes.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+#define MODES_TIEABLE_P(MODE1, MODE2)  1
+
+/* Register that holds an address into the text segment that can be
+   used by pic code.  */
+#define TILEPRO_PIC_TEXT_LABEL_REGNUM (flag_pic ? 50 : INVALID_REGNUM)
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? 51 : INVALID_REGNUM)
+#define HARD_FRAME_POINTER_REGNUM 52
+#define THREAD_POINTER_REGNUM 53
+#define STACK_POINTER_REGNUM 54
+#define TILEPRO_LINK_REGNUM 55
+#define FRAME_POINTER_REGNUM 64
+#define ARG_POINTER_REGNUM 65
+/* Pseudo register used to enforce order between instructions that
+   touch the networks.  */
+#define TILEPRO_NETORDER_REGNUM 66
+#define STATIC_CHAIN_REGNUM 10
+
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  R1_REGS,
+  R2_REGS,
+  R3_REGS,
+  R4_REGS,
+  R5_REGS,
+  R6_REGS,
+  R7_REGS,
+  R8_REGS,
+  R9_REGS,
+  R10_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Since GENERAL_REGS is the same class as ALL_REGS, don't give it a
+   different class number; just make it an alias.  */
+#define GENERAL_REGS ALL_REGS
+
+#define REG_CLASS_NAMES	\
+  { \
+    "NO_REGS", \
+    "R0_REGS", \
+    "R1_REGS", \
+    "R2_REGS", \
+    "R3_REGS", \
+    "R4_REGS", \
+    "R5_REGS", \
+    "R6_REGS", \
+    "R7_REGS", \
+    "R8_REGS", \
+    "R9_REGS", \
+    "R10_REGS", \
+    "ALL_REGS" \
+  }
+
+#define REG_CLASS_CONTENTS \
+  { \
+    { 0 }, \
+    { 1 << 0 }, \
+    { 1 << 1 }, \
+    { 1 << 2 }, \
+    { 1 << 3 }, \
+    { 1 << 4 }, \
+    { 1 << 5 }, \
+    { 1 << 6 }, \
+    { 1 << 7 }, \
+    { 1 << 8 }, \
+    { 1 << 9 }, \
+    { 1 << 10 }, \
+    { 0xffffffff, 0xffffffff } \
+  }
+
+#define REGNO_REG_CLASS(REGNO) \
+  ((unsigned)(REGNO) <= 10 ? \
+   (enum reg_class)(R0_REGS + (REGNO)) : ALL_REGS)
+
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS ALL_REGS
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)  (CLASS)
+
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+#define STARTING_FRAME_OFFSET 0
+
+#define DYNAMIC_CHAIN_ADDRESS(FRAME) \
+  plus_constant (Pmode, (FRAME), UNITS_PER_WORD)
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+#define INCOMING_FRAME_SP_OFFSET 0
+
+#define STACK_POINTER_OFFSET (2 * UNITS_PER_WORD)
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL) (-STACK_POINTER_OFFSET)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* The first 10 registers may hold return value.  */
+#define TILEPRO_NUM_RETURN_REGS 10
+
+/* The first 10 registers hold function arguments.  */
+#define TILEPRO_NUM_ARG_REGS 10
+
+#define FUNCTION_ARG_REGNO_P(N) ((N) < TILEPRO_NUM_ARG_REGS)
+
+/* The type used to store the number of words of arguments scanned so
+   far during argument scanning.  This includes any space that is
+   skipped.  */
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+
+#define ELIMINABLE_REGS					\
+  {{ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},		\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = tilepro_initial_elimination_offset((FROM),(TO)))
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  tilepro_function_profiler (FILE, LABELNO)
+
+#define TRAMPOLINE_SIZE 48
+#define TRAMPOLINE_ALIGNMENT 64
+#define TRAMPOLINE_SECTION text_section
+
+
+/* Call frame debugging information.  */
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, TILEPRO_LINK_REGNUM)
+
+#define RETURN_ADDR_RTX tilepro_return_addr
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (TILEPRO_LINK_REGNUM)
+
+#define DWARF_ZERO_REG 63
+
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N + 12) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 11)
+#define EH_RETURN_HANDLER_RTX tilepro_eh_return_handler_rtx ()
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+  tilepro_asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_POST_MODIFY_DISP 1
+
+#define REGNO_OK_FOR_INDEX_P(regno) 0
+#define REGNO_OK_FOR_BASE_P(regno)	\
+  ((regno) < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0)
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define CONSTANT_ADDRESS_P(X) 0
+
+#define LEGITIMATE_PIC_OPERAND_P(X) tilepro_legitimate_pic_operand_p (X)
+
+
+#define CASE_VECTOR_MODE SImode
+#define CASE_VECTOR_PC_RELATIVE 0
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#define MOVE_MAX UNITS_PER_WORD
+
+/* Use a value of 11 for MOVE_RATIO and friends, because TILEPro
+   returns structs as large as 10 words in registers.  Because of some
+   some code generation inefficiency, we never get smaller code for
+   turning that into a memcpy, so pick a value that guarantees this
+   doesn't happen.  */
+#define TILEPRO_CALL_RATIO 11
+#define MOVE_RATIO(speed) ((speed) ? 15 : TILEPRO_CALL_RATIO)
+#define CLEAR_RATIO(speed) ((speed) ? 15 : TILEPRO_CALL_RATIO)
+#define SET_RATIO(speed) ((speed) ? 15 : TILEPRO_CALL_RATIO)
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    (MODE) = SImode;
+
+/* Define SLOW_BYTE_ACCESS to avoid making a QI or HI mode
+   register.  */
+#define SLOW_BYTE_ACCESS 1
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+
+#define Pmode SImode
+
+#define STORE_FLAG_VALUE 1
+
+#define FUNCTION_MODE SImode
+
+#define NO_FUNCTION_CSE 1
+
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = tilepro_adjust_insn_length ((INSN), (LENGTH)))
+
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+#define BRANCH_COST(speed_p, predictable_p) ((predictable_p) ? 2 : 6)
+
+
+/* Control the assembler format that we output.  */
+
+#undef NO_DOLLAR_IN_LABEL
+
+#define ASM_COMMENT_START "##"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata, \"a\""
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss, \"wa\""
+
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section\t.init, \"ax\""
+
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini, \"ax\""
+
+#define GLOBAL_ASM_OP ".global "
+
+#define SUPPORTS_WEAK 1
+
+#define USER_LABEL_PREFIX ""
+
+#define REGISTER_PREFIX ""
+#define REGISTER_NAMES                                                  \
+  { "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",       \
+    "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",      \
+    "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",      \
+    "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",      \
+    "r32",  "r33",  "r34",  "r35",  "r36",  "r37",  "r38",  "r39",      \
+    "r40",  "r41",  "r42",  "r43",  "r44",  "r45",  "r46",  "r47",      \
+    "r48",  "r49",  "r50",  "r51",  "r52",  "tp",   "sp",   "lr",       \
+    "sn",   "idn0", "idn1", "udn0", "udn1", "udn2", "udn3", "zero",     \
+    "?FRAME?", "?ARG?", "?NET?" }
+
+/* This is used to help emit bundles.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  tilepro_final_prescan_insn (insn)
+
+/* This is used to help emit bundles.  */
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
+  (PTR = tilepro_asm_output_opcode (STREAM, PTR))
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)		\
+  do							\
+    {							\
+      char label[256];					\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));\
+      fprintf (FILE, "\t.word ");			\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "\n");				\
+    }							\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)	\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));	\
+      fprintf (FILE, "\t.word ");				\
+      assemble_name (FILE, label);				\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL));		\
+      fprintf (FILE, "-");					\
+      assemble_name (FILE, label);				\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)  \
+  do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0)
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".comm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (unsigned int)(ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".lcomm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (unsigned int)(ROUNDED)))
+
+
+
+#define INIT_EXPANDERS tilepro_init_expanders ()
+
+/* A C structure for machine-specific, per-function data.  This is
+   added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Symbol for the text label used for pic.  */
+  rtx text_label_symbol;
+
+  /* Register for the text label.  */
+  rtx text_label_rtx;
+
+  /* Register for the pic offset table.  */
+  rtx got_rtx;
+
+  /* The function calls tls_get_addr.  */
+  int calls_tls_get_addr;
+} machine_function;
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro.md b/gcc-4.9/gcc/config/tilepro/tilepro.md
new file mode 100644
index 000000000..314dd90bf
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro.md
@@ -0,0 +1,3810 @@
+;; Machine description for Tilera TILEPro chip for GCC.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;; Contributed by Walter Lee (walt@tilera.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants [
+  ;;
+  ;; The following represent intrinsic insns, organized by latency.
+  ;;
+
+  ;; single cycle
+  (UNSPEC_INSN_ADDLIS                  1)
+  (UNSPEC_INSN_AULI                    2)
+  (UNSPEC_INSN_AVGB_U                  3)
+  (UNSPEC_INSN_AVGH                    4)
+  (UNSPEC_INSN_BITX                    5)
+  (UNSPEC_INSN_CRC32_32                6)
+  (UNSPEC_INSN_CRC32_8                 7)
+  (UNSPEC_INSN_DRAIN                   8)
+  (UNSPEC_INSN_DTLBPR                  9)
+  (UNSPEC_INSN_DWORD_ALIGN             10)
+  (UNSPEC_INSN_FINV                    11)
+  (UNSPEC_INSN_FLUSH                   12)
+  (UNSPEC_INSN_FNOP                    13)
+  (UNSPEC_INSN_ICOH                    14)
+  (UNSPEC_INSN_ILL                     15)
+  (UNSPEC_INSN_INFO                    16)
+  (UNSPEC_INSN_INFOL                   17)
+  (UNSPEC_INSN_INV                     18)
+  (UNSPEC_INSN_LNK                     19)
+  (UNSPEC_INSN_MFSPR                   20)
+  (UNSPEC_INSN_MNZB                    21)
+  (UNSPEC_INSN_MNZH                    22)
+  (UNSPEC_INSN_MOVELIS                 23)
+  (UNSPEC_INSN_MTSPR                   24)
+  (UNSPEC_INSN_MZB                     25)
+  (UNSPEC_INSN_MZH                     26)
+  (UNSPEC_INSN_NAP                     27)
+  (UNSPEC_INSN_PACKBS_U                28)
+  (UNSPEC_INSN_PACKHB                  29)
+  (UNSPEC_INSN_PACKHS                  30)
+  (UNSPEC_INSN_PACKLB                  31)
+  (UNSPEC_INSN_PREFETCH_L1             32)
+  (UNSPEC_INSN_TBLIDXB0                33)
+  (UNSPEC_INSN_TBLIDXB1                34)
+  (UNSPEC_INSN_TBLIDXB2                35)
+  (UNSPEC_INSN_TBLIDXB3                36)
+  (UNSPEC_INSN_WH64                    37)
+
+  ;; 2 cycles
+  (UNSPEC_INSN_ADIFFB_U                100)
+  (UNSPEC_INSN_ADIFFH                  101)
+  (UNSPEC_INSN_MULHHA_SS               102)
+  (UNSPEC_INSN_MULHHA_SU               103)
+  (UNSPEC_INSN_MULHHA_UU               104)
+  (UNSPEC_INSN_MULHHSA_UU              105)
+  (UNSPEC_INSN_MULHH_SS                106)
+  (UNSPEC_INSN_MULHH_SU                107)
+  (UNSPEC_INSN_MULHH_UU                108)
+  (UNSPEC_INSN_MULHLA_SS               109)
+  (UNSPEC_INSN_MULHLA_SU               110)
+  (UNSPEC_INSN_MULHLA_US               111)
+  (UNSPEC_INSN_MULHLA_UU               112)
+  (UNSPEC_INSN_MULHLSA_UU              113)
+  (UNSPEC_INSN_MULHL_SS                114)
+  (UNSPEC_INSN_MULHL_SU                115)
+  (UNSPEC_INSN_MULHL_US                116)
+  (UNSPEC_INSN_MULHL_UU                117)
+  (UNSPEC_INSN_MULLLA_SS               118)
+  (UNSPEC_INSN_MULLLA_SU               119)
+  (UNSPEC_INSN_MULLLA_UU               120)
+  (UNSPEC_INSN_MULLLSA_UU              121)
+  (UNSPEC_INSN_MULLL_SU                122)
+  (UNSPEC_INSN_MULLL_SS                123)
+  (UNSPEC_INSN_MULLL_UU                124)
+  (UNSPEC_INSN_SADAB_U                 125)
+  (UNSPEC_INSN_SADAH                   126)
+  (UNSPEC_INSN_SADAH_U                 127)
+  (UNSPEC_INSN_SADB_U                  128)
+  (UNSPEC_INSN_SADH                    129)
+  (UNSPEC_INSN_SADH_U                  130)
+
+  ;;
+  ;; The following are special insns.
+  ;;
+
+  ;; Blockage
+  (UNSPEC_BLOCKAGE                     200)
+
+  ;; Latency specifying loads.
+  (UNSPEC_LATENCY_L2                   201)
+  (UNSPEC_LATENCY_MISS                 202)
+
+  ;; Lnk and its label
+  (UNSPEC_LNK_AND_LABEL                203)
+
+  ;; Memory fence
+  (UNSPEC_MF                           204)
+
+  ;; A pseudo-op that prevents network operations from being ordered.
+  (UNSPEC_NETWORK_BARRIER              205)
+
+  ;; Operations that access network registers.
+  (UNSPEC_NETWORK_RECEIVE              206)
+  (UNSPEC_NETWORK_SEND                 207)
+
+  ;; Stack protector operations
+  (UNSPEC_SP_SET                       208)
+  (UNSPEC_SP_TEST                      209)
+
+  ;; A call to __tls_get_addr
+  (UNSPEC_TLS_GD_CALL                  210)
+
+  ;; An opaque TLS "add" operation for TLS general dynamic model
+  ;; access.
+  (UNSPEC_TLS_GD_ADD                   211)
+
+  ;; An opaque TLS "load" operation for TLS initial exec model access.
+  (UNSPEC_TLS_IE_LOAD                  212)
+
+  ;;
+  ;; The following are operands.
+  ;;
+  (UNSPEC_PCREL_SYM                    300)
+  (UNSPEC_GOT16_SYM                    301)
+  (UNSPEC_GOT32_SYM                    302)
+  (UNSPEC_TLS_GD                       303)
+  (UNSPEC_TLS_IE                       304)
+  (UNSPEC_TLS_LE                       305)
+])
+
+;; Mark the last instruction of various latencies, used to
+;; determine the rtx costs of unspec insns.
+(define_constants [
+  (TILEPRO_LAST_LATENCY_1_INSN             99)
+  (TILEPRO_LAST_LATENCY_2_INSN            199)
+  (TILEPRO_LAST_LATENCY_INSN              299)
+])
+
+;; Constants for network registers.
+(define_constants [
+  (TILEPRO_NETREG_IDN0 0)
+  (TILEPRO_NETREG_IDN1 1)
+  (TILEPRO_NETREG_SN   2)
+  (TILEPRO_NETREG_UDN0 3)
+  (TILEPRO_NETREG_UDN1 4)
+  (TILEPRO_NETREG_UDN2 5)
+  (TILEPRO_NETREG_UDN3 6)
+])
+
+;; Constants for special purpose registers.
+(define_constants [
+  (TILEPRO_NETORDER_REG 66)])
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+(include "tilepro-generic.md")
+
+;; Define an insn type attribute.  This defines what pipes things can
+;; go in.
+(define_attr "type"
+  "X0,X0_2cycle,X1,X1_branch,X1_2cycle,X1_L2,X1_miss,X01,Y0,Y0_2cycle,Y2,Y2_2cycle,Y2_L2,Y2_miss,Y01,cannot_bundle,cannot_bundle_3cycle,cannot_bundle_4cycle,nothing"
+  (const_string "Y01"))
+
+(define_attr "length" ""
+   (cond [(eq_attr "type" "X1_branch")
+	  (if_then_else
+	   (and (le (minus (match_dup 0) (pc)) (const_int 524280))
+		(le (minus (pc) (match_dup 0)) (const_int 524288)))
+	   (const_int 8)
+	   (const_int 16))
+	  ]
+	 (const_int 8)))
+
+
+;; Define iterators.
+(define_mode_iterator I48MODE [SI DI])
+(define_mode_iterator I12MODE [QI HI])
+
+(define_code_iterator binop_u5bit [ashift ashiftrt lshiftrt rotate])
+(define_code_iterator binop_with_imm
+  [ashift lshiftrt ashiftrt rotate eq lt and ior xor])
+(define_code_iterator unop [bswap clz ctz popcount])
+
+(define_mode_attr load [(QI "lb") (HI "lh") (SI "lw")])
+(define_mode_attr store [(QI "sb") (HI "sh") (SI "sw")])
+
+;; <optab> expands to the name of the optab for a particular code.
+(define_code_attr optab [(ashift "ashl")
+			 (ashiftrt "ashr")
+			 (lshiftrt "lshr")
+			 (eq "seq")
+			 (ne "sne")
+			 (lt "slt")
+			 (ltu "sltu")
+			 (le "sle")
+			 (leu "sleu")
+			 (minus "sub")
+			 (plus "add")
+			 (rotate "rotl")
+			 (smax "smax")
+			 (smin "smin")
+			 (umax "umax")
+			 (umin "umin")
+			 (ss_minus "sssub")
+			 (ss_plus "ssadd")
+			 (us_minus "ussub")
+			 (us_plus "usadd")
+			 (and "and")
+			 (ior "ior")
+			 (xor "xor")
+			 (bswap "bswap")
+			 (clz "clz")
+			 (ctz "ctz")
+			 (popcount "popcount")])
+
+;; <insn> expands to the name of the insn that implements a particular
+;; code.
+(define_code_attr insn [(ashift "shl")
+			(ashiftrt "sra")
+			(lshiftrt "shr")
+			(eq "seq")
+			(ne "sne")
+			(lt "slt")
+			(ltu "slt")
+			(le "slte")
+			(leu "slte")
+			(minus "sub")
+			(plus "add")
+			(rotate "rl")
+			(smax "max")
+			(smin "min")
+			(umax "max")
+			(umin "min")
+			(ss_minus "sub")
+			(ss_plus "add")
+			(us_minus "sub")
+			(us_plus "add")
+			(and "and")
+			(ior "or")
+			(xor "xor")
+			(bswap "bytex")
+			(clz "clz")
+			(ctz "ctz")
+			(popcount "pcnt")])
+
+;; <u> expands to the suffix of the insn that implements a particular
+;; code.
+(define_code_attr u [(ashift "")
+		     (ashiftrt "")
+		     (lshiftrt "")
+		     (eq "")
+		     (ne "")
+		     (lt "")
+		     (ltu "_u")
+		     (le "")
+		     (leu "_u")
+		     (minus "")
+		     (plus "")
+		     (rotate "")
+		     (smax "")
+		     (smin "")
+		     (umax "_u")
+		     (umin "_u")
+		     (ss_minus "s")
+		     (ss_plus "s")
+		     (us_minus "s_u")
+		     (us_plus "s_u")
+		     (and "")
+		     (ior "")
+		     (xor "")])
+
+;; <comm> indicates whether a particular code is commutative, using
+;; the "%" commutative opterator constraint.
+(define_code_attr comm [(ashift "")
+			(ashiftrt "")
+			(lshiftrt "")
+			(eq "%")
+			(ne "%")
+			(lt "")
+			(ltu "")
+			(le "")
+			(leu "")
+			(minus "")
+			(plus "%")
+			(rotate "")
+			(smax "%")
+			(umax "%")
+			(smin "%")
+			(umin "%")
+			(ss_plus "%")
+			(us_plus "%")
+			(ss_minus "")
+			(us_minus "")
+			(and "%")
+			(ior "%")
+			(xor "%")])
+
+(define_mode_iterator VEC [V4QI V2HI])
+
+;; Code iterator for all three shifts.
+(define_code_iterator any_shift [ashift ashiftrt lshiftrt])
+
+;; Code iterator for all byte ops without immediate variants.
+(define_code_iterator v1op [us_plus ne le leu minus us_minus])
+
+;; Code iterator for all 2-byte vector ops without immediate variants.
+(define_code_iterator v2op [ss_plus ne le leu minus ss_minus])
+
+;; Code iterator for all byte vector ops with immediate variants.
+(define_code_iterator v1op_immed [plus umax umin eq lt ltu])
+
+;; Code iterator for all 2-byte vector ops with immediate variants.
+(define_code_iterator v2op_immed [plus smax smin eq lt ltu])
+
+;; Code for packing two 2-byte vectors.
+(define_code_iterator v2pack [truncate us_truncate])
+
+;; <pack_optab> expands to the part of the optab name describing how
+;; two vectors are packed.
+(define_code_attr pack_optab [(truncate "trunc")
+			      (us_truncate "usat")
+			      (ss_truncate "ssat")])
+
+;; <pack_insn> expands to the insn that implements a particular vector
+;; packing code.
+(define_code_attr pack_insn [(truncate "packl")
+			     (us_truncate "pack")
+			     (ss_truncate "pack")])
+
+;; <pack_u> expands to the suffix of the insn that implements a
+;; particular vector packing code.
+(define_code_attr pack_u [(truncate "")
+			  (us_truncate "s_u")
+			  (ss_truncate "s")])
+
+
+;;
+;; The basic data move insns.
+;;
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilepro_expand_mov (QImode, operands))
+    DONE;
+})
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,r,U,m")
+	(match_operand:QI 1 "move_operand"         "r,I,U,m,rO,rO"))]
+  "(register_operand (operands[0], QImode)
+    || reg_or_0_operand (operands[1], QImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   lb_u\t%0, %1
+   lbadd_u\t%0, %I1, %i1
+   sb\t%0, %r1
+   sbadd\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilepro_expand_mov (HImode, operands))
+    DONE;
+})
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,r,U,m")
+	(match_operand:HI 1 "move_operand"         "r,I,J,U,m,rO,rO"))]
+  "(register_operand (operands[0], HImode)
+    || reg_or_0_operand (operands[1], HImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   moveli\t%0, %1
+   lh_u\t%0, %1
+   lhadd_u\t%0, %I1, %i1
+   sh\t%0, %r1
+   shadd\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,X01,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "nonautoinc_operand" ""))]
+  ""
+{
+  if (tilepro_expand_mov (SImode, operands))
+    DONE;
+})
+
+(define_insn "*movsi_high_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "symbolic_operand" "in")))]
+  ""
+  "auli\t%0, zero, ha16(%1)"
+  [(set_attr "type" "X01")])
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,r,U,m")
+	(match_operand:SI 1 "move_operand"         "r,I,J,K,N,P,U,m,rO,rO"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))"
+  "@
+   move\t%0, %r1
+   movei\t%0, %1
+   moveli\t%0, %1
+   auli\t%0, zero, %h1
+   addib\t%0, zero, %j1
+   addih\t%0, zero, %h1
+   lw\t%0, %1
+   lwadd\t%0, %I1, %i1
+   sw\t%0, %r1
+   swadd\t%I0, %r1, %i0"
+  [(set_attr "type" "*,*,X01,X01,X01,X01,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+r"))
+	(match_operand:QI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "mm\t%r0, %r1, %r0, 0, 7"
+  [(set_attr "type" "X01")])
+  
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+	(match_operand:HI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "mm\t%r0, %r1, %r0, 0, 15"
+  [(set_attr "type" "X01")])
+  
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VEC 0 "nonautoincmem_nonimmediate_operand" "")
+        (match_operand:VEC 1 "nonautoincmem_general_operand" ""))]
+  ""
+{
+  tilepro_expand_movmisalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  /* Materialize immediates using clever SImode code, but don't
+     do this after reload starts, since gen_lowpart will choke
+     during reload if given an illegitimate address. */
+  if (immediate_operand (operands[1], SFmode)
+      && operands[1] != const0_rtx
+      && (register_operand (operands[0], SFmode)
+          || (!reload_in_progress && !reload_completed)))
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+                            gen_lowpart (SImode, operands[1])));
+      DONE;
+    }
+})
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,U,m")
+	(match_operand:SF 1 "general_operand" "rO,U,m,rO,rO"))]
+  ""
+  "@
+   move\t%0, %r1
+   lw\t%0, %1
+   lwadd\t%0, %I1, %i1
+   sw\t%0, %r1
+   swadd\t%I0, %r1, %i0"
+  [(set_attr "type" "*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "")
+        (match_operand:VEC 1 "general_operand" ""))]
+  ""
+{
+  /* Materialize immediates using clever SImode code, but don't
+     do this after reload starts, since gen_lowpart will choke
+     during reload if given an illegitimate address. */
+  if (immediate_operand (operands[1], <MODE>mode)
+      && operands[1] != const0_rtx
+      && (register_operand (operands[0], <MODE>mode)
+          || (!reload_in_progress && !reload_completed)))
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+                            gen_lowpart (SImode, operands[1])));
+      DONE;
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,U,m")
+	(match_operand:VEC 1 "general_operand" "rO,U,m,rO,rO"))]
+  ""
+  "@
+   move\t%0, %r1
+   lw\t%0, %1
+   lwadd\t%0, %I1, %i1
+   sw\t%0, %r1
+   swadd\t%I0, %r1, %i0"
+  [(set_attr "type" "*,Y2_2cycle,X1_2cycle,Y2,X1")])
+
+
+;;
+;; Bit-field extracts
+;;
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI
+	 (match_operand:QI 1 "nonautoincmem_operand" "")
+	 (match_operand:SI 2 "immediate_operand" "")
+	 (match_operand:SI 3 "immediate_operand" "")))]
+  ""
+{
+  HOST_WIDE_INT bit_offset, bit_width;
+  HOST_WIDE_INT first_byte_offset, last_byte_offset;
+
+  bit_width = INTVAL (operands[2]);
+  bit_offset = INTVAL (operands[3]);
+
+  /* Reject bitfields that can be done with a normal load */
+  if (MEM_ALIGN (operands[1]) >= bit_offset + bit_width)
+    FAIL;
+
+  /* The value in memory cannot span more than 4 bytes. */
+  first_byte_offset = bit_offset / BITS_PER_UNIT;
+  last_byte_offset = (bit_offset + bit_width - 1) / BITS_PER_UNIT;
+  if (last_byte_offset - first_byte_offset > 3)
+    FAIL;
+
+  tilepro_expand_unaligned_load (operands[0], operands[1],
+			         bit_width, bit_offset, 1);
+
+  DONE;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI
+	 (match_operand:QI 1 "nonautoincmem_operand" "")
+	 (match_operand:SI 2 "immediate_operand" "")
+	 (match_operand:SI 3 "immediate_operand" "")))]
+  ""
+{
+  HOST_WIDE_INT bit_offset, bit_width;
+  HOST_WIDE_INT first_byte_offset, last_byte_offset;
+
+  bit_width = INTVAL (operands[2]);
+  bit_offset = INTVAL (operands[3]);
+
+  /* Reject bitfields that can be done with a normal load */
+  if (MEM_ALIGN (operands[1]) >= bit_offset + bit_width)
+    FAIL;
+
+  /* The value in memory cannot span more than 4 bytes. */
+  first_byte_offset = bit_offset / BITS_PER_UNIT;
+  last_byte_offset = (bit_offset + bit_width - 1) / BITS_PER_UNIT;
+  if (last_byte_offset - first_byte_offset > 3)
+    FAIL;
+
+  tilepro_expand_unaligned_load (operands[0], operands[1],
+                                 bit_width, bit_offset, 0);
+
+  DONE;
+})
+
+
+;;
+;; Arithmetic ops
+;;
+
+(define_insn "*s123a_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                          (match_operand:SI 2 "cint_248_operand" "I"))
+                 (match_operand:SI 3 "reg_or_0_operand" "rO")))]
+  ""
+  "s%t2a\t%0, %r1, %r3")
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  ""
+  "
+    if (tilepro_expand_addsi (operands[0], operands[1], operands[2]))
+      DONE;
+  ")
+
+(define_insn "*addsi_high_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI
+         (match_operand:SI 1 "reg_or_0_operand" "%rO")
+         (high:SI (match_operand:SI 2 "const_symbolic_operand" "T"))))]
+  ""
+  "auli\t%0, %r1, %H2"
+  [(set_attr "type" "X01")])
+
+(define_insn "*addsi_lo_sum_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lo_sum:SI
+         (match_operand:SI 1 "reg_or_0_operand" "%rO")
+         (match_operand:SI 2 "const_symbolic_operand" "T")))]
+  ""
+  "addli\t%0, %r1, %L2"
+  [(set_attr "type" "X01")])
+
+(define_insn "*addsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO,rO,rO")
+		 (match_operand:SI 2 "add_operand" "r,I,J,K")))]
+  ""
+  "@
+   add\t%0, %r1, %r2
+   addi\t%0, %r1, %2
+   addli\t%0, %r1, %2
+   auli\t%0, %r1, %h2"
+  [(set_attr "type" "*,*,X01,X01")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                  (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "sub\t%0, %r1, %r2")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "sub\t%0, zero, %r1")
+
+(define_insn "ssaddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_plus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "adds\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "sssubsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_minus:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+                     (match_operand:SI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "subs\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+;;
+;; Shifts
+;;
+
+;; ashift, ashiftrt, lshiftrt, rotate.
+(define_insn "<optab>si3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(binop_u5bit:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+			(match_operand:SI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+  <insn>i\t%0, %r1, %2
+  <insn>\t%0, %r1, %r2")
+
+
+;;
+;; Compares
+;;
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:I48MODE 2 "reg_or_cint_operand" "")
+          (match_operand:I48MODE 3 "reg_or_cint_operand" "")]))]
+  ""
+  { if (!tilepro_emit_setcc (operands, <MODE>mode)) FAIL; else DONE; })
+
+(define_insn "insn_seq"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(eq:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO")
+               (match_operand:SI 2 "reg_or_cint_operand" "I,rO")))]
+  ""
+  "@
+   seqi\t%0, %r1, %2
+   seq\t%0, %r1, %r2")
+
+(define_insn "insn_sne"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+               (match_operand:SI 2 "reg_or_cint_operand" "rO")))]
+  ""
+  "sne\t%0, %r1, %r2")
+
+(define_insn "insn_slt"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(lt:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+               (match_operand:SI 2 "reg_or_cint_operand" "I,rO")))]
+  ""
+  "@
+   slti\t%0, %r1, %2
+   slt\t%0, %r1, %r2")
+
+(define_insn "insn_slte"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(le:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+               (match_operand:SI 2 "reg_or_cint_operand" "L,rO")))]
+  ""
+  "@
+   slti\t%0, %r1, %P2
+   slte\t%0, %r1, %r2")
+
+(define_insn "insn_slt_u"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ltu:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+                (match_operand:SI 2 "reg_or_cint_operand" "I,rO")))]
+  ""
+  "@
+   slti_u\t%0, %r1, %2
+   slt_u\t%0, %r1, %r2")
+
+(define_insn "insn_slte_u"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(leu:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+                (match_operand:SI 2 "reg_or_cint_operand" "Q,rO")))]
+  ""
+  "@
+   slti_u\t%0, %r1, %P2
+   slte_u\t%0, %r1, %r2")
+
+
+;;
+;; Logical ops
+;;
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO,rO")
+                (match_operand:SI 2 "and_operand" "I,M,rO")))]
+  ""
+  "@
+   andi\t%0, %r1, %2
+   mm\t%0, %r1, zero, %M2
+   and\t%0, %r1, %r2"
+  [(set_attr "type" "*,X01,*")])
+  
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO")
+                (match_operand:SI 2 "reg_or_s8bit_operand" "I,rO")))]
+  ""
+  "@
+   ori\t%0, %r1, %2
+   or\t%0, %r1, %r2")
+  
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO")
+                (match_operand:SI 2 "reg_or_s8bit_operand" "rO,I")))]
+  ""
+  "@
+   xor\t%0, %r1, %r2
+   xori\t%0, %r1, %2"
+  [(set_attr "type" "*,X01")])
+  
+;; bswap, clz, ctz, popcount
+(define_insn "<optab>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unop:SI (match_operand:SI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "<insn>\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_expand "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ctz:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+{
+  rtx lo, hi, ctz_lo, ctz_hi, ctz_hi_plus_32, result;
+
+  split_di (&operands[1], 1, &lo, &hi);
+  lo = force_reg (SImode, lo);
+  hi = force_reg (SImode, hi);
+
+  ctz_lo = gen_reg_rtx (SImode);
+  emit_insn (gen_ctzsi2 (ctz_lo, lo));
+
+  ctz_hi = gen_reg_rtx (SImode);
+  emit_insn (gen_ctzsi2 (ctz_hi, hi));
+
+  ctz_hi_plus_32 = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (ctz_hi_plus_32, ctz_hi, GEN_INT (32)));
+
+  result = gen_reg_rtx (SImode);
+  emit_insn (gen_insn_mvz (result, ctz_lo, lo, ctz_hi_plus_32));
+
+  emit_move_insn (operands[0], convert_to_mode (DImode, result, 1));
+
+  DONE;
+})
+
+(define_expand "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(clz:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+{
+  rtx lo, hi, clz_lo, clz_hi, clz_lo_plus_32, result;
+
+  split_di (&operands[1], 1, &lo, &hi);
+  lo = force_reg (SImode, lo);
+  hi = force_reg (SImode, hi);
+
+  clz_lo = gen_reg_rtx (SImode);
+  emit_insn (gen_clzsi2 (clz_lo, lo));
+
+  clz_hi = gen_reg_rtx (SImode);
+  emit_insn (gen_clzsi2 (clz_hi, hi));
+
+  clz_lo_plus_32 = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (clz_lo_plus_32, clz_lo, GEN_INT (32)));
+
+  result = gen_reg_rtx (SImode);
+  emit_insn (gen_insn_mvz (result, clz_hi, hi, clz_lo_plus_32));
+
+  emit_move_insn (operands[0], convert_to_mode (DImode, result, 1));
+
+  DONE;
+})
+
+(define_expand "ffsdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ffs:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+{
+  rtx lo, hi, ctz_lo, ctz_hi, ctz_hi_plus_32, ctz, ctz_plus_1,ctz_cond;
+  rtx result;
+
+  split_di (&operands[1], 1, &lo, &hi);
+  lo = force_reg (SImode, lo);
+  hi = force_reg (SImode, hi);
+
+  ctz_lo = gen_reg_rtx (SImode);
+  emit_insn (gen_ctzsi2 (ctz_lo, lo));
+
+  ctz_hi = gen_reg_rtx (SImode);
+  emit_insn (gen_ctzsi2 (ctz_hi, hi));
+
+  ctz_hi_plus_32 = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (ctz_hi_plus_32, ctz_hi, GEN_INT (32)));
+
+  ctz = gen_reg_rtx (SImode);
+  emit_insn (gen_insn_mvz (ctz, ctz_lo, lo, ctz_hi_plus_32));
+
+  ctz_plus_1 = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (ctz_plus_1, ctz, GEN_INT (1)));
+
+  ctz_cond = gen_reg_rtx (SImode);
+  emit_insn (gen_iorsi3 (ctz_cond, lo, hi));
+
+  result = gen_reg_rtx (SImode);
+  emit_insn (gen_insn_mvz (result, ctz_plus_1, ctz_cond, const0_rtx));
+
+  emit_move_insn (operands[0], convert_to_mode (DImode, result, 1));
+
+  DONE;
+})
+
+(define_expand "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(popcount:DI (match_operand:DI 1 "nonmemory_operand" "")))]
+  ""
+{
+  rtx lo, hi, popcount_lo, popcount_hi, result;
+
+  split_di (&operands[1], 1, &lo, &hi);
+  lo = force_reg (SImode, lo);
+  hi = force_reg (SImode, hi);
+
+  popcount_lo = gen_reg_rtx (SImode);
+  emit_insn (gen_popcountsi2 (popcount_lo, lo));
+
+  popcount_hi = gen_reg_rtx (SImode);
+  emit_insn (gen_popcountsi2 (popcount_hi, hi));
+
+  result = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (result, popcount_lo, popcount_hi));
+
+  emit_move_insn (operands[0], convert_to_mode (DImode, result, 1));
+
+  DONE;
+})
+
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(parity:SI (match_operand:SI 1 "reg_or_0_operand" "")))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (SImode);
+    emit_insn (gen_popcountsi2 (operands[2], operands[1]));
+    emit_insn (gen_andsi3 (operands[0], operands[2], const1_rtx));
+    DONE;
+  })
+
+(define_expand "paritydi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(parity:DI (match_operand:DI 1 "nonmemory_operand" "")))]
+  ""
+{
+  rtx lo, hi, xor_lohi, result;
+
+  split_di (&operands[1], 1, &lo, &hi);
+  lo = force_reg (SImode, lo);
+  hi = force_reg (SImode, hi);
+
+  xor_lohi = gen_reg_rtx (SImode);
+  emit_insn (gen_xorsi3 (xor_lohi, lo, hi));
+
+  result = gen_reg_rtx (SImode);
+  emit_insn (gen_paritysi2 (result, xor_lohi));
+
+  emit_move_insn (operands[0], convert_to_mode (DImode, result, 1));
+
+  DONE;
+})
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "reg_or_0_operand" "rO")))]
+  ""
+  "nor\t%0, %r1, zero")
+
+
+;;
+;; Conditional moves.
+;;
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "reg_or_0_operand" "")
+			 (match_operand:SI 3 "reg_or_0_operand" "")))]
+  ""
+  { operands[1] = tilepro_emit_conditional_move (operands[1]); })
+
+(define_insn "movcc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "eqne_operator"
+	  [(match_operand:SI 1 "reg_or_0_operand" "rO,rO,rO,rO")
+	   (const_int 0)])
+	 (match_operand:SI 2 "reg_or_0_operand"	"rO,O,rO,0")
+	 (match_operand:SI 3 "reg_or_0_operand"	"O,rO,0,rO")))]
+  ""
+  "@
+   m%c4\t%0, %r1, %r2
+   m%C4\t%0, %r1, %r3
+   mv%c4\t%0, %r1, %r2
+   mv%C4\t%0, %r1, %r3"
+  [(set_attr "type" "*,*,Y0,Y0")])
+
+(define_expand "insn_mz"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+         (eq (match_operand:SI 1 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:SI 2 "reg_or_0_operand" "")
+         (const_int 0)))])
+
+(define_expand "insn_mnz"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+         (ne (match_operand:SI 1 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:SI 2 "reg_or_0_operand" "")
+         (const_int 0)))])
+
+(define_expand "insn_mvz"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+         (eq (match_operand:SI 2 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:SI 3 "reg_or_0_operand" "")
+         (match_operand:SI 1 "reg_or_0_operand" "")))])
+   
+(define_expand "insn_mvnz"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+         (ne (match_operand:SI 2 "reg_or_0_operand" "")
+             (const_int 0))
+         (match_operand:SI 3 "reg_or_0_operand" "")
+         (match_operand:SI 1 "reg_or_0_operand" "")))])
+   
+
+;;
+;; Conversions
+;;
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:QI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   mm\t%0, %r1, zero, 0, 7
+   lb_u\t%0, %1
+   lbadd_u\t%0, %I1, %i1"
+  [(set_attr "type" "X01,Y2_2cycle,X1_2cycle")])
+  
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:HI 1 "move_operand" "rO,U,m")))]
+  ""
+  "@
+   mm\t%0, %r1, zero, 0, 15
+   lh_u\t%0, %1
+   lhadd_u\t%0, %I1, %i1"
+  [(set_attr "type" "X01,Y2_2cycle,X1_2cycle")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "move_operand" "")))]
+  ""
+{
+  if (!memory_operand (operands[1], HImode))
+  {
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+
+    emit_move_insn (operands[2], gen_rtx_ASHIFT (SImode, operands[1],
+					         GEN_INT (16)));
+    emit_move_insn (operands[0], gen_rtx_ASHIFTRT (SImode, operands[2],
+					           GEN_INT (16)));
+    DONE;
+  }
+})
+
+(define_insn "*lh"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "U,m")))]
+  ""
+  "@
+   lh\t%0, %1
+   lhadd\t%0, %I1, %i1"
+  [(set_attr "type" "Y2_2cycle,X1_2cycle")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "move_operand" "")))]
+  ""
+{
+  if (!memory_operand (operands[1], QImode))
+  {
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+
+    emit_move_insn (operands[2], gen_rtx_ASHIFT (SImode, operands[1],
+					         GEN_INT (24)));
+    emit_move_insn (operands[0], gen_rtx_ASHIFTRT (SImode, operands[2],
+					           GEN_INT (24)));
+    DONE;
+  }
+})
+
+(define_insn "*lb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "U,m")))]
+  ""
+  "@
+   lb\t%0, %1
+   lbadd\t%0, %I1, %i1"
+  [(set_attr "type" "Y2_2cycle,X1_2cycle")])
+
+;;
+;; insv patterns
+;;
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "u5bit_cint_operand" "")
+			 (match_operand:SI 2 "u5bit_cint_operand" ""))
+	(match_operand:SI 3 "reg_or_cint_operand" ""))]
+  ""
+{
+  tilepro_expand_insv (operands);
+  DONE;
+})
+
+(define_insn "*insv_tblidxb0"
+  [(set (zero_extract:SI
+	 (match_operand:SI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(match_operand:SI 1 "register_operand" "rO"))]
+  ""
+  "tblidxb0\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_tblidxb1"
+  [(set (zero_extract:SI
+	 (match_operand:SI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(zero_extract:SI
+	 (const_int 8)
+	 (const_int 8)
+	(match_operand:SI 1 "register_operand" "rO")))]
+  ""
+  "tblidxb1\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_tblidxb2"
+  [(set (zero_extract:SI
+	 (match_operand:SI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(zero_extract:SI
+	 (const_int 8)
+	 (const_int 16)
+	(match_operand:SI 1 "register_operand" "rO")))]
+  ""
+  "tblidxb2\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_tblidxb3"
+  [(set (zero_extract:SI
+	 (match_operand:SI 0 "register_operand" "+r")
+	 (const_int 8)
+	 (const_int 2))
+	(zero_extract:SI
+	 (const_int 8)
+	 (const_int 24)
+	(match_operand:SI 1 "register_operand" "rO")))]
+  ""
+  "tblidxb3\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*insv_mm1"
+  [(set (zero_extract:SI
+	 (match_operand:SI 0 "register_operand" "+r")
+	 (match_operand:SI 1 "u5bit_cint_operand" "n")
+	 (const_int 0))
+	(match_operand:SI 2 "register_operand" "rO"))]
+  ""
+  "mm\t%0, %r2, %0, 0, %1-1"
+  [(set_attr "type" "X01")])
+
+(define_insn "*insv_mm2"
+  [(set (zero_extract:SI
+	 (match_operand:SI 0 "register_operand" "+r")
+	 (match_operand:SI 1 "u5bit_cint_operand" "n")
+	 (match_operand:SI 2 "u5bit_cint_operand" "n"))
+	(zero_extract:SI
+	 (match_operand:SI 3 "register_operand" "rO")
+	 (match_dup 1)
+	 (match_dup 2)))]
+  ""
+  "mm\t%0, %r3, %0, %2, %2+%1-1"
+  [(set_attr "type" "X01")])
+
+
+;;
+;; Multiplies
+;;
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (mult:SI (zero_extend:SI
+                  (subreg:HI (match_operand:SI 1 "nonmemory_operand" "") 0))
+                 (zero_extend:SI
+                  (subreg:HI (match_operand:SI 2 "nonmemory_operand" "") 0))))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 0) (match_dup 1) (match_dup 2)]
+                   UNSPEC_INSN_MULHLSA_UU))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 0) (match_dup 2) (match_dup 1)]
+                   UNSPEC_INSN_MULHLSA_UU))]
+  ""
+  {
+    operands[1] = force_reg (SImode, operands[1]);
+    operands[1] = make_safe_from (operands[1], operands[0]);
+
+    if (tilepro_expand_mulsi (operands[0], operands[1], operands[2]))
+      DONE;
+    else
+      {
+        operands[2] = force_reg (SImode, operands[2]);
+        operands[2] = make_safe_from (operands[2], operands[0]);
+      }
+  })
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "reg_or_0_operand" "rO"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mulll_ss\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+  
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "reg_or_0_operand" "rO"))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mulll_uu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+  
+(define_insn "usmulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "reg_or_0_operand" "rO"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mulll_su\t%0, %r2, %r1"
+  [(set_attr "type" "X0_2cycle")])
+  
+(define_insn "maddhisi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI
+         (mult:SI (sign_extend:SI
+                   (match_operand:HI 1 "reg_or_0_operand" "rO"))
+                  (sign_extend:SI
+                   (match_operand:HI 2 "reg_or_0_operand" "rO")))
+         (match_operand:SI 3 "register_operand" "0")))]
+  ""
+  "mullla_ss\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+  
+(define_insn "umaddhisi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI
+         (mult:SI (zero_extend:SI
+                   (match_operand:HI 1 "reg_or_0_operand" "rO"))
+                  (zero_extend:SI
+                   (match_operand:HI 2 "reg_or_0_operand" "rO")))
+         (match_operand:SI 3 "register_operand" "0")))]
+  ""
+  "mullla_uu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+  
+
+(define_insn "mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sign_extend:HI
+		  (match_operand:QI 1 "reg_or_0_operand" "rO"))
+		 (sign_extend:HI
+		  (match_operand:QI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mulll_ss\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+  
+(define_insn "umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (zero_extend:HI
+		  (match_operand:QI 1 "reg_or_0_operand" "rO"))
+		 (zero_extend:HI
+		  (match_operand:QI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "mulll_uu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_expand "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (truncate:SI
+         (ashiftrt:DI 
+          (mult:DI (sign_extend:DI (match_operand:SI 1 "reg_or_0_operand" ""))
+                   (sign_extend:DI (match_operand:SI 2 "reg_or_0_operand" "")))
+          (const_int 32))))]
+  ""
+  {
+    tilepro_expand_smulsi3_highpart (operands[0], operands[1], operands[2]);
+    DONE;
+  })
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 1 "reg_or_0_operand" ""))
+		   (zero_extend:DI (match_operand:SI 2 "reg_or_0_operand" "")))
+	  (const_int 32))))]
+  ""
+{
+  tilepro_expand_umulsi3_highpart (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+
+;;
+;; Loops
+;;
+
+;; Define the subtract-one-and-jump insns so loop.c knows what to
+;; generate.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))    ;; loop pseudo
+   (use (match_operand 1 "" ""))]   ;; label
+   ""
+{
+  if (optimize > 0)
+  {
+     rtx s0;
+     rtx bcomp;
+     rtx loc_ref;
+
+     /* only deal with loop counters in SImode  */
+     if (GET_MODE (operands[0]) != SImode)
+       FAIL;
+
+     s0 = operands [0];
+
+     emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
+     bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
+     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
+     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                        loc_ref, pc_rtx)));
+     DONE;
+  }
+  else
+     FAIL;
+
+})
+
+;;
+;; Prologue/epilogue
+;;
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  tilepro_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+{
+  tilepro_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+{
+  tilepro_expand_epilogue (true);
+  DONE;
+})
+
+;;
+;; Stack manipulations
+;;
+
+;; An insn to allocate new stack space for dynamic use (e.g., alloca).
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "register_operand" "")
+	(minus (reg 54) (match_operand 1 "nonmemory_operand" "")))
+   (set (reg 54)
+	(minus (reg 54) (match_dup 1)))]
+  ""
+  "tilepro_allocate_stack (operands[0], operands[1]); DONE;")
+
+;;
+;; Branches
+;;
+(define_expand "call"
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+              (use (reg:SI 54))
+	      (clobber (reg:SI 55))])]
+  ""
+  "")
+
+(define_insn "*call_insn"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" "rO,i"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI 54))
+   (clobber (reg:SI 55))]
+  ""
+  "@
+   jalr\t%r0
+   jal\t%p0"
+  [(set_attr "type" "X1,X1")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+              (use (reg:SI 54))
+	      (clobber (reg:SI 55))])]
+  "")
+
+(define_insn "*call_value_insn"
+  [(set (match_operand 0 "register_operand" "=r,r")
+	(call (mem:SI (match_operand:SI 1 "call_address_operand" "rO,i"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI 54))
+   (clobber (reg:SI 55))]
+  ""
+  "@
+   jalr\t%r1
+   jal\t%p1"
+  [(set_attr "type" "X1,X1")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	      (use (reg:SI 54))])]
+  ""
+  "")
+
+(define_insn "*sibcall_insn"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" "rO,i"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI 54))]
+  "SIBLING_CALL_P(insn)"
+  "@
+   jr\t%r0
+   j\t%p0"
+  [(set_attr "type" "X1,X1")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand:SI 2 "" "")))
+	      (use (reg:SI 54))])]
+  ""
+  "")
+
+(define_insn "*sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "call_address_operand" "rO,i"))
+	      (match_operand:SI 2 "" "")))
+   (use (reg:SI 54))]
+  "SIBLING_CALL_P(insn)"
+  "@
+   jr\t%r1
+   j\t%p1"
+  [(set_attr "type" "X1,X1")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "j\t%l0"
+  [(set_attr "type" "X1")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "rO"))]
+  ""
+  "jr\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_expand "return"
+  [(parallel
+    [(return)
+     (use (reg:SI 55))])]
+  "tilepro_can_use_return_insn_p ()"
+  "")
+
+(define_insn "_return"
+  [(return)
+   (use (reg:SI 55))]
+  "reload_completed"
+  "jrp\tlr"
+  [(set_attr "type" "X1")])
+
+(define_expand "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  tilepro_expand_tablejump (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "tablejump_aux"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jr\t%0"
+  [(set_attr "type" "X1")])
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers
+;; and all of memory.  This blocks insns from being moved across this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  "pseudo"
+  [(set_attr "type" "nothing")
+   (set_attr "length" "0")])
+
+;; Internal expanders to prevent memory ops from moving around frame
+;; allocation/deallocation.
+;;
+;; TODO: really this clobber should just clobber the frame memory.  Is
+;; this possibly by clobbering memory @ the sp reg (as alpha does?)
+;; or by explicitly setting the alias set to the frame?
+(define_insn "sp_adjust"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+        (plus:SI
+         (match_operand:SI 1 "register_operand" "%r,r,r")
+         (match_operand:SI 2 "add_operand" "r,I,J")))
+   (clobber (mem:BLK (scratch)))]
+ ""
+ "@
+  add\t%0, %1, %2
+  addi\t%0, %1, %2
+  addli\t%0, %1, %2"
+ [(set_attr "type" "*,*,X01")])
+
+;; Used for move sp, r52, to pop a stack frame.  We need to make sure
+;; that stack frame memory operations have been issued before we do
+;; this.  TODO: see above TODO.
+(define_insn "sp_restore"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operand:SI 1 "register_operand" "r"))
+   (clobber (mem:BLK (scratch)))]
+ ""
+ "move\t%0, %1")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "Y01")])
+
+
+;;
+;; Conditional branches
+;;
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "reg_or_cint_operand")
+		        (match_operand:SI 2 "reg_or_cint_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  { tilepro_emit_conditional_branch (operands, SImode); DONE; })
+
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:DI 1 "reg_or_cint_operand")
+		        (match_operand:DI 2 "reg_or_cint_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  { tilepro_emit_conditional_branch (operands, DImode); DONE; })
+  
+
+(define_insn "*bcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:SI 2 "reg_or_0_operand" "rO")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  { return tilepro_output_cbranch (insn, operands, false); }
+  [(set_attr "type" "X1_branch")])
+
+(define_insn "*bcc_reverse"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:SI 2 "reg_or_0_operand" "rO")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  ""
+  { return tilepro_output_cbranch (insn, operands, true); }
+  [(set_attr "type" "X1_branch")])
+
+;; FIXME: the straight forward versions which do not include the
+;; subreg:QI does not match for some unknown reason.
+(define_insn "*bbs_normal"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (subreg:QI 
+			       (match_operand:SI 1 "reg_or_0_operand" "rO") 0)
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  { return tilepro_output_cbranch_with_opcode (insn, operands, "bbs", "bbns",
+					    1, 0); }
+  [(set_attr "type" "X1_branch")])
+
+(define_insn "*bbc_normal"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (subreg:QI
+			       (match_operand:SI 1 "reg_or_0_operand" "rO") 0)
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  { return tilepro_output_cbranch_with_opcode (insn, operands, "bbns", "bbs",
+					    1, 0); }
+  [(set_attr "type" "X1_branch")])
+
+;; Note that __insn_mf() expands to this.
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec_volatile:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+  "mf"
+  [(set_attr "type" "X1")])
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "rO")
+             (match_operand:SI 1 "const_int_operand" "")
+             (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "prefetch\t%r0"
+  [(set_attr "type" "Y2")])
+
+
+;;
+;; Network intrinsics
+;;
+
+;; Note the "pseudo" text is handled specially by the
+;; asm_output_opcode routine.  If the output is an empty string, the
+;; instruction would bypass the asm_output_opcode routine, bypassing
+;; the bundle handling code.
+(define_insn "tilepro_network_barrier"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_NETWORK_BARRIER)]
+  ""
+  "pseudo"
+  [(set_attr "type" "nothing")
+   (set_attr "length" "0")])
+
+(define_insn "*netreg_receive"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,U,m")
+        (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i,i,i")
+			     (reg:SI TILEPRO_NETORDER_REG)]
+			    UNSPEC_NETWORK_RECEIVE))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   move\t%0, %N1
+   sw\t%0, %N1
+   swadd\t%I0, %N1, %i0"
+  [(set_attr "type" "*,Y2,X1")])
+  
+(define_insn "*netreg_send"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i,i,i,i,i,i")
+     (match_operand:SI 1 "reg_or_cint_operand" "rO,I,J,K,N,P")
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   move\t%N0, %r1
+   movei\t%N0, %1
+   moveli\t%N0, %1
+   auli\t%N0, zero, %h1
+   addib\t%N0, zero, %j1
+   addih\t%N0, zero, %h1"
+  [(set_attr "type" "*,*,X01,X01,X01,X01")])
+
+(define_insn "*netreg_copy"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i")
+     (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+			  (reg:SI TILEPRO_NETORDER_REG)]
+			 UNSPEC_NETWORK_RECEIVE)
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "move %N0, %N1")
+
+(define_expand "tilepro_idn0_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_IDN0)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_idn1_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_IDN1)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_idn_send"
+  [(parallel
+    [(unspec_volatile:SI [(const_int TILEPRO_NETREG_IDN0)
+			  (match_operand:SI 0 "reg_or_cint_operand" "")
+			  (reg:SI TILEPRO_NETORDER_REG)]
+			 UNSPEC_NETWORK_SEND)
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_sn_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_SN)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_sn_send"
+  [(parallel
+    [(unspec_volatile:SI [(const_int TILEPRO_NETREG_SN)
+			  (match_operand:SI 0 "reg_or_cint_operand" "")
+			  (reg:SI TILEPRO_NETORDER_REG)]
+			 UNSPEC_NETWORK_SEND)
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_udn0_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_UDN0)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_udn1_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_UDN1)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_udn2_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_UDN2)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_udn3_receive"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec_volatile:SI [(const_int TILEPRO_NETREG_UDN3)
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE))
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_expand "tilepro_udn_send"
+  [(parallel
+    [(unspec_volatile:SI [(const_int TILEPRO_NETREG_UDN0)
+			  (match_operand:SI 0 "reg_or_cint_operand" "")
+			  (reg:SI TILEPRO_NETORDER_REG)]
+			 UNSPEC_NETWORK_SEND)
+     (clobber (reg:SI TILEPRO_NETORDER_REG))])]
+  "")
+
+(define_insn "*netreg_add_to_network"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i,i,i,i")
+     (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rO,rO,rO,rO")
+              (match_operand:SI 2 "add_operand" "r,I,J,K"))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   add\t%N0, %r1, %2
+   addi\t%N0, %r1, %2
+   addli\t%N0, %r1, %2
+   auli\t%N0, %r1, %h2"
+  [(set_attr "type" "*,*,X01,X01")])
+
+(define_insn "*netreg_add_from_network"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(plus:SI
+	 (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i,i,i,i")
+			      (reg:SI TILEPRO_NETORDER_REG)]
+			     UNSPEC_NETWORK_RECEIVE)
+	 (match_operand:SI 2 "add_operand" "rO,I,J,K")))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   add\t%0, %N1, %r2
+   addi\t%0, %N1, %2
+   addli\t%0, %N1, %2
+   auli\t%0, %N1, %h2"
+  [(set_attr "type" "*,*,X01,X01")])
+
+(define_insn "*netreg_add_from_to_network"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i,i,i,i")
+     (plus:SI
+      (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i,i,i,i")
+			   (reg:SI TILEPRO_NETORDER_REG)]
+			  UNSPEC_NETWORK_RECEIVE)
+      (match_operand:SI 2 "add_operand" "rO,I,J,K"))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   add\t%N0, %N1, %r2
+   addi\t%N0, %N1, %2
+   addli\t%N0, %N1, %2
+   auli\t%N0, %N1, %h2"
+  [(set_attr "type" "*,*,X01,X01")])
+
+(define_code_iterator netreg_binop
+  [minus])
+
+(define_insn "*netreg_binop_to_network"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i")
+    (netreg_binop:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+		     (match_operand:SI 2 "reg_or_0_operand" "rO"))
+    (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%N0, %r1, %r2")
+
+(define_insn "*netreg_binop_from_network0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(netreg_binop:SI
+	 (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+			      (reg:SI TILEPRO_NETORDER_REG)]
+			     UNSPEC_NETWORK_RECEIVE)
+	 (match_operand:SI 2 "reg_or_0_operand" "rO")))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%0, %N1, %r2")
+
+(define_insn "*netreg_binop_from_network1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(netreg_binop:SI
+         (match_operand:SI 1 "reg_or_0_operand" "rO")
+	 (unspec_volatile:SI [(match_operand:SI 2 "netreg_operand" "i")
+			      (reg:SI TILEPRO_NETORDER_REG)]
+			     UNSPEC_NETWORK_RECEIVE)))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%0, %r1, %N2")
+
+(define_insn "*netreg_binop_from_to_network0"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i")
+     (netreg_binop:SI
+      (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+			   (reg:SI TILEPRO_NETORDER_REG)]
+			  UNSPEC_NETWORK_RECEIVE)
+      (match_operand:SI 2 "reg_or_0_operand" "rO"))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%N0, %N1, %r2")
+
+(define_insn "*netreg_binop_from_to_network1"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i")
+     (netreg_binop:SI
+      (match_operand:SI 1 "reg_or_0_operand" "rO")
+      (unspec_volatile:SI [(match_operand:SI 2 "netreg_operand" "i")
+			   (reg:SI TILEPRO_NETORDER_REG)]
+			  UNSPEC_NETWORK_RECEIVE))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%N0, %r1, %N2")
+
+(define_insn "*netreg_binop_to_network"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i,i")
+     (binop_with_imm:SI (match_operand:SI 1 "reg_or_0_operand" "rO,rO")
+			(match_operand:SI 2 "reg_or_cint_operand" "I,rO"))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   <insn>i<u>\t%N0, %r1, %2
+   <insn><u>\t%N0, %r1, %r2")
+
+(define_insn "*netreg_binop_from_network"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(binop_with_imm:SI
+         (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i,i")
+			      (reg:SI TILEPRO_NETORDER_REG)]
+			     UNSPEC_NETWORK_RECEIVE)
+         (match_operand:SI 2 "reg_or_cint_operand" "I,rO")))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   <insn>i<u>\t%0, %N1, %2
+   <insn><u>\t%0, %N1, %r2")
+
+(define_insn "*netreg_binop_from_to_network"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i,i")
+     (binop_with_imm:SI
+      (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i,i")
+			   (reg:SI TILEPRO_NETORDER_REG)]
+			  UNSPEC_NETWORK_RECEIVE)
+      (match_operand:SI 2 "reg_or_cint_operand" "I,rO"))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "@
+   <insn>i<u>\t%N0, %N1, %2
+   <insn><u>\t%N0, %N1, %r2")
+
+(define_insn "*netreg_unop_to_network"
+  [(unspec_volatile:SI [(match_operand:SI 0 "netreg_operand" "i")
+			(unop:SI (match_operand:SI 1 "reg_or_0_operand" "rO"))
+			(reg:SI TILEPRO_NETORDER_REG)]
+		       UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%N0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*netreg_unop_from_network"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unop:SI
+	 (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+			      (reg:SI TILEPRO_NETORDER_REG)]
+			     UNSPEC_NETWORK_RECEIVE)))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%0, %N1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*netreg_unop_from_to_network"
+  [(unspec_volatile:SI
+    [(match_operand:SI 0 "netreg_operand" "i")
+     (unop:SI
+      (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+			   (reg:SI TILEPRO_NETORDER_REG)]
+			  UNSPEC_NETWORK_RECEIVE))
+     (reg:SI TILEPRO_NETORDER_REG)]
+    UNSPEC_NETWORK_SEND)
+   (clobber (reg:SI TILEPRO_NETORDER_REG))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "<insn>\t%N0, %N1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "*netreg_sadh_u_from_network0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	 [(unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE)
+	  (match_operand:SI 2 "reg_or_0_operand" "rO")]
+	 UNSPEC_INSN_SADH_U))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "sadh_u\t%0, %N1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "*netreg_sadh_u_from_network1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	 [(match_operand:SI 1 "reg_or_0_operand" "rO")
+	  (unspec_volatile:SI [(match_operand:SI 2 "netreg_operand" "i")
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE)]
+	 UNSPEC_INSN_SADH_U))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "sadh_u\t%0, %r1, %N2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "*netreg_sadah_u_from_network0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	 [(match_operand:SI 1 "reg_or_0_operand" "0")
+	  (unspec_volatile:SI [(match_operand:SI 2 "netreg_operand" "i")
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE)
+	  (match_operand:SI 3 "reg_or_0_operand" "rO")]
+	 UNSPEC_INSN_SADAH_U))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "sadah_u\t%0, %N2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "*netreg_sadah_u_from_network1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	 [(match_operand:SI 1 "reg_or_0_operand" "0")
+	  (match_operand:SI 2 "reg_or_0_operand" "rO")
+	  (unspec_volatile:SI [(match_operand:SI 3 "netreg_operand" "i")
+			       (reg:SI TILEPRO_NETORDER_REG)]
+			      UNSPEC_NETWORK_RECEIVE)]
+	 UNSPEC_INSN_SADAH_U))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  "sadah_u\t%0, %r2, %N3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_code_iterator mm_combiner [ior xor plus])
+
+;; This doesn't seem to match -- too complex for 'combine'?
+;;
+;; (define_insn "*netreg_mm_to_network"
+;;   [(unspec_volatile:SI
+;;     [(match_operand:SI 0 "netreg_operand" "i")
+;;      (mm_combiner:SI
+;;       (and:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+;; 	         (match_operand:SI 3 "const_int_operand" "n"))
+;;       (and:SI (match_operand:SI 2 "reg_or_0_operand" "rO")
+;; 	         (match_operand:SI 4 "const_int_operand" "n")))]
+;;     UNSPEC_NETWORK_SEND)]
+;;   "tilepro_bitfield_operand_p (INTVAL (operands[3]), NULL, NULL)
+;;    && INTVAL (operands[3]) == ~INTVAL (operands[4])"
+;;   "mm\t%N0, %r1, %r2, %M3"
+;;   [(set_attr "type" "X01")])
+
+;; FIXME: the straight forward versions which do not include the
+;; subreg:QI does not match for some unknown reason.
+(define_insn "*netreg_bbs_normal"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI
+	      (subreg:QI 
+	       (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+				    (reg:SI TILEPRO_NETORDER_REG)]
+				   UNSPEC_NETWORK_RECEIVE) 0)
+              (const_int 1)
+              (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  { return tilepro_output_cbranch_with_opcode (insn, operands, "bbs", "bbns",
+					    1, 1); }
+  [(set_attr "type" "X1_branch")])
+
+(define_insn "*netreg_bbc_normal"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI
+	      (subreg:QI 
+	       (unspec_volatile:SI [(match_operand:SI 1 "netreg_operand" "i")
+				    (reg:SI TILEPRO_NETORDER_REG)]
+				   UNSPEC_NETWORK_RECEIVE) 0)
+              (const_int 1)
+              (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (reg:SI TILEPRO_NETORDER_REG))]
+  ""
+  { return tilepro_output_cbranch_with_opcode (insn, operands, "bbns", "bbns",
+					    1, 1); }
+  [(set_attr "type" "X1_branch")])
+
+
+;;
+;; "__insn" Intrinsics (some expand directly to normal patterns above).
+;;
+
+(define_insn "insn_addlis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                             (match_operand:SI 2 "s16bit_cint_operand" "i")] 
+                            UNSPEC_INSN_ADDLIS))]
+  ""
+  "addlis\t%0, %r1, %2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_auli"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "s16bit_cint_operand" "i")] 
+                   UNSPEC_INSN_AULI))]
+  ""
+  "auli\t%0, %r1, %2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_drain"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_DRAIN)]
+  ""
+  "drain"
+  [(set_attr "type" "cannot_bundle")])
+
+(define_insn "insn_icoh"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "reg_or_0_operand" "rO")] 
+                         UNSPEC_INSN_ICOH)]
+  ""
+  "icoh\t%r0"
+  [(set_attr "type" "X1")])
+
+
+(define_insn "insn_info"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "s8bit_cint_operand" "i")] 
+                         UNSPEC_INSN_INFO)]
+  ""
+  "info\t%0")
+
+(define_insn "insn_infol"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "s16bit_cint_operand" "i")] 
+                         UNSPEC_INSN_INFOL)]
+  ""
+  "infol\t%0"
+  [(set_attr "type" "X01")])
+
+;; loads
+
+(define_expand "insn_<load>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI
+	 (mem:I12MODE (match_operand:SI 1 "address_operand" ""))))]
+  "")
+
+(define_expand "insn_<load>_u"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI
+	 (mem:I12MODE (match_operand:SI 1 "address_operand" ""))))]
+  "")
+
+(define_insn "insn_<load>add"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:I12MODE (match_dup 3))))]
+  ""
+  "<load>add\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_<load>add_u"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (mem:I12MODE (match_dup 3))))]
+  ""
+  "<load>add_u\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_expand "insn_lw"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (match_operand:SI 1 "address_operand" "")))]
+  "")
+
+(define_insn "insn_lwadd"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (mem:SI (match_dup 3)))]
+  ""
+  "lwadd\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_lwadd_na"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (mem:SI (and:SI (match_dup 3) (const_int -4))))]
+  ""
+  "lwadd_na\t%0, %1, %2"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_insn "insn_lw_na"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (and:SI (match_operand:SI 1 "address_operand" "rO")
+                        (const_int -4))))]
+  ""
+  "lw_na\t%0, %r1"
+  [(set_attr "type" "X1_2cycle")])
+
+;; L2 hits
+
+(define_insn "insn_<load>_L2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI
+	 (unspec:I12MODE
+	  [(mem:I12MODE (match_operand:SI 1 "address_operand" "rO"))]
+	  UNSPEC_LATENCY_L2)))]
+  ""
+  "<load>\t%0, %r1"
+  [(set_attr "type" "Y2_L2")])
+
+(define_insn "insn_<load>_u_L2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	 (unspec:I12MODE
+	  [(mem:I12MODE (match_operand:SI 1 "address_operand" "rO"))]
+	  UNSPEC_LATENCY_L2)))]
+  ""
+  "<load>_u\t%0, %r1"
+  [(set_attr "type" "Y2_L2")])
+
+(define_insn "insn_<load>add_L2"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extend:SI (unspec:I12MODE [(mem:I12MODE (match_dup 3))]
+					UNSPEC_LATENCY_L2)))]
+  ""
+  "<load>add\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_<load>add_u_L2"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (unspec:I12MODE [(mem:I12MODE (match_dup 3))]
+					UNSPEC_LATENCY_L2)))]
+  ""
+  "<load>add_u\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_lwadd_L2"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(mem:SI (match_dup 3))] UNSPEC_LATENCY_L2))]
+  ""
+  "lwadd\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_lwadd_na_L2"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(mem:SI (and:SI (match_dup 3) (const_int -4)))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "lwadd_na\t%0, %1, %2"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_lw_na_L2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(mem:SI (and:SI (match_operand:SI 1 "address_operand" "rO")
+				    (const_int -4)))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "lw_na\t%0, %r1"
+  [(set_attr "type" "X1_L2")])
+
+(define_insn "insn_lw_L2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(mem:SI (match_operand:SI 1 "address_operand" "rO"))]
+		   UNSPEC_LATENCY_L2))]
+  ""
+  "lw\t%0, %r1"
+  [(set_attr "type" "Y2_L2")])
+
+;; L2 miss
+
+(define_insn "insn_<load>_miss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI
+	 (unspec:I12MODE
+	  [(mem:I12MODE (match_operand:SI 1 "address_operand" "rO"))]
+	  UNSPEC_LATENCY_MISS)))]
+  ""
+  "<load>\t%0, %r1"
+  [(set_attr "type" "Y2_miss")])
+
+(define_insn "insn_<load>_u_miss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	 (unspec:I12MODE
+	  [(mem:I12MODE (match_operand:SI 1 "address_operand" "rO"))]
+	  UNSPEC_LATENCY_MISS)))]
+  ""
+  "<load>_u\t%0, %r1"
+  [(set_attr "type" "Y2_miss")])
+
+(define_insn "insn_<load>add_miss"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extend:SI (unspec:I12MODE [(mem:I12MODE (match_dup 3))]
+					UNSPEC_LATENCY_MISS)))]
+  ""
+  "<load>add\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_<load>add_u_miss"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (unspec:I12MODE [(mem:I12MODE (match_dup 3))]
+					UNSPEC_LATENCY_MISS)))]
+  ""
+  "<load>add_u\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_lwadd_miss"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(mem:SI (match_dup 3))] UNSPEC_LATENCY_MISS))]
+  ""
+  "lwadd\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_lwadd_na_miss"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "1")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(mem:SI (and:SI (match_dup 3) (const_int -4)))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "lwadd_na\t%0, %1, %2"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_lw_na_miss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(mem:SI (and:SI (match_operand:SI 1 "address_operand" "rO")
+				    (const_int -4)))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "lw_na\t%0, %r1"
+  [(set_attr "type" "X1_miss")])
+
+(define_insn "insn_lw_miss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(mem:SI (match_operand:SI 1 "address_operand" "rO"))]
+		   UNSPEC_LATENCY_MISS))]
+  ""
+  "lw\t%0, %r1"
+  [(set_attr "type" "Y2_miss")])
+
+;; end loads
+
+(define_insn "insn_mfspr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "u15bit_cint_operand" "i")]
+                            UNSPEC_INSN_MFSPR))
+   (clobber (mem:BLK (const_int 0)))]
+  ""
+  "mfspr\t%0, %1"
+  [(set_attr "type" "X1")])
+
+(define_insn "*mm"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mm_combiner:SI
+	 (and:SI (match_operand:SI 1 "reg_or_0_operand" "rO")
+		 (match_operand:SI 3 "const_int_operand" "n"))
+	 (and:SI (match_operand:SI 2 "reg_or_0_operand" "rO")
+		 (match_operand:SI 4 "const_int_operand" "n"))))]
+  "tilepro_bitfield_operand_p (INTVAL (operands[3]), NULL, NULL)
+   && INTVAL (operands[3]) == ~INTVAL (operands[4])"
+  "mm\t%0, %r1, %r2, %M3"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_mm"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI
+	 (and:SI (match_operand:SI 1 "reg_or_cint_operand" "")
+		 (match_operand:SI 3 "u5bit_cint_operand" ""))
+	 (and:SI (match_operand:SI 2 "reg_or_cint_operand" "")
+		 (match_operand:SI 4 "u5bit_cint_operand" ""))))]
+  ""
+{
+  int first, last, i;
+  HOST_WIDE_INT mask;
+
+  first = INTVAL (operands[3]) & 31;
+  last = INTVAL (operands[4]) & 31;
+
+  if (((last + 1) & 31) == first)
+    {
+      /* Handle pathological case of a mask that includes only the
+         first operand. The reordering code below can't handle this. */
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+
+  /* Canonicalize order by putting constant second, if any. */
+  if (CONST_INT_P (operands[1]))
+    {
+      int tmp_first;
+
+      rtx tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+
+      /* Invert the bit range. */
+      tmp_first = first;
+      first = (last + 1) & 31;
+      last = (tmp_first - 1) & 31;
+    }
+
+  /* Convert the first/last bit range into a bit mask. */
+  mask = 0;
+
+  for (i = first; ; i = (i + 1) & 31)
+    {
+      mask |= ((HOST_WIDE_INT)1) << i;
+      if (i == last)
+        break;
+    }
+
+  mask = trunc_int_for_mode (mask, SImode);
+
+  operands[1] = force_reg (SImode, operands[1]);
+  operands[3] = GEN_INT (mask);
+  operands[4] = GEN_INT (~mask);
+
+  if (CONST_INT_P (operands[2]))
+    {
+      HOST_WIDE_INT inserted_bits = INTVAL (operands[2]) & ~mask;
+
+      if (inserted_bits == 0)
+        {
+	  /* All inserted bits are zero. Use a bitwise AND. */
+          emit_insn (gen_andsi3 (operands[0], operands[1], operands[3]));
+          DONE;
+        }
+      else if (inserted_bits == ~mask)
+        {
+	  /* All inserted bits are ones. Use a bitwise IOR if we can. */
+	  if (satisfies_constraint_I (operands[4]))
+	    {
+              emit_insn (gen_iorsi3 (operands[0], operands[1], operands[4]));
+              DONE;
+	    }
+
+	  /* Canonicalize to inserting -1 when setting all masked bits
+	     to 1, to facilitate CSE. */
+	  inserted_bits = -1;
+        }
+
+      /* Sign extend the inserted bits to make them easier to materialize
+         in a register, but only if the inserted bits (~mask) do not already
+	 include the high bits. */
+      if ((~mask & 0x80000000) == 0)
+        {
+          int shift = sizeof (HOST_WIDE_INT) * 8 - first;
+          inserted_bits = (inserted_bits << shift) >> shift;
+        }
+
+      operands[2] = GEN_INT (inserted_bits);
+    }
+
+  operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "insn_movelis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "s16bit_cint_operand" "i")] 
+                            UNSPEC_INSN_MOVELIS))]
+  ""
+  "movelis\t%0, %1"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_mtspr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "u15bit_cint_operand" "i")
+                        (match_operand:SI 1 "reg_or_0_operand" "rO")]
+                       UNSPEC_INSN_MTSPR)
+   (clobber (mem:BLK (const_int 0)))]
+  ""
+  "mtspr\t%0, %r1"
+  [(set_attr "type" "X1")])
+
+(define_expand "insn_prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "")
+             (const_int 0)
+             (const_int 2))])
+
+(define_expand "insn_prefetch_L1"
+  [(use (match_operand:SI 0 "address_operand" ""))]
+  ""
+{
+  /* Generate a volatile byte load to a dummy register. */
+  rtx mem = gen_rtx_MEM (QImode, operands[0]);
+  MEM_VOLATILE_P (mem) = 1;
+
+  emit_insn (gen_zero_extendqisi2 (gen_reg_rtx (SImode), mem));
+  DONE;
+})
+
+(define_expand "insn_s1a"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "")
+                          (const_int 2))
+                 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  "")
+
+(define_expand "insn_s2a"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "")
+                          (const_int 4))
+                 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  "")
+
+(define_expand "insn_s3a"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (plus:SI (mult:SI (match_operand:SI 1 "reg_or_0_operand" "")
+                          (const_int 8))
+                 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  "")
+
+(define_expand "insn_<store>"
+  [(set (mem:I12MODE (match_operand:SI 0 "address_operand" ""))
+        (match_operand:SI 1 "reg_or_0_operand" ""))]
+  ""
+{
+  operands[1] = simplify_gen_subreg (<MODE>mode, operands[1], SImode, 0);
+})
+
+(define_expand "insn_sw"
+  [(set (mem:SI (match_operand:SI 0 "address_operand" ""))
+        (match_operand:SI 1 "reg_or_0_operand" ""))]
+  "")
+
+(define_expand "insn_<store>add"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (plus:SI (match_operand:SI 3 "register_operand" "")
+		   (match_operand:SI 2 "s8bit_cint_operand" "")))
+     (set (mem:I12MODE (match_dup 3))
+	  (match_operand:SI 1 "reg_or_0_operand" ""))])]
+  ""
+{
+  operands[1] = simplify_gen_subreg (<MODE>mode, operands[1], SImode, 0);
+})
+
+(define_insn "*insn_<store>add"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 3 "register_operand" "0")
+		 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (mem:I12MODE (match_dup 3))
+	(match_operand:I12MODE 1 "reg_or_0_operand" "rO"))]
+  ""
+  "<store>add\t%0, %r1, %2"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_swadd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 3 "register_operand" "0")
+                 (match_operand:SI 2 "s8bit_cint_operand" "i")))
+   (set (mem:SI (match_dup 3))
+        (match_operand:SI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "swadd\t%0, %r1, %2"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_wh64"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "reg_or_0_operand" "rO")]
+                         UNSPEC_INSN_WH64)
+   (clobber (mem:BLK (const_int 0)))]
+  ""
+  "wh64\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_tns"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (mem:SI (match_operand:SI 1 "reg_or_0_operand" "rO")))
+   (set (mem:SI (match_dup 1)) (const_int 1))]
+  ""
+  "tns\t%0, %1"
+  [(set_attr "type" "X1")])
+
+;; insn_addb
+;; insn_addib
+;; insn_maxb_u
+;; insn_maxib_u
+;; insn_minb_u
+;; insn_minib_u
+;; insn_seqb
+;; insn_seqib
+;; insn_sltb
+;; insn_sltib
+;; insn_sltb_u
+;; insn_sltib_u
+(define_insn "<optab>v4qi3"
+  [(set (match_operand:V4QI 0 "register_operand" "=r,r")
+	(v1op_immed:V4QI
+	 (match_operand:V4QI 1 "reg_or_0_operand" "<comm>rO,rO")
+	 (match_operand:V4QI 2 "reg_or_v4s8bit_operand" "W,rO")))]
+  ""
+  "@
+   <insn>ib<u>\t%0, %r1, %j2
+   <insn>b<u>\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_expand "insn_<insn>b<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(v1op_immed:V4QI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilepro_expand_builtin_vector_binop (gen_<optab>v4qi3, V4QImode, operands[0],
+				       V4QImode, operands[1], operands[2], true);
+  DONE;
+})
+
+(define_expand "insn_<insn>ib<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(v1op_immed:V4QI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "s8bit_cint_operand" "")))]
+  ""
+{
+  /* Tile out immediate and expand to general case. */
+  rtx n = tilepro_simd_int (operands[2], QImode);
+  tilepro_expand_builtin_vector_binop (gen_<optab>v4qi3, V4QImode, operands[0],
+				       V4QImode, operands[1], n, true);
+  DONE;
+})
+
+;; insn_shlb
+;; insn_shlib
+;; insn_shrb
+;; insn_shrib
+;; insn_srab
+;; insn_sraib
+(define_insn "<optab>v4qi3"
+  [(set (match_operand:V4QI 0 "register_operand" "=r,r")
+	(any_shift:V4QI
+	 (match_operand:V4QI 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+   <insn>ib<u>\t%0, %r1, %2
+   <insn>b<u>\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_expand "insn_<insn>b<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(any_shift:V4QI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "reg_or_u5bit_operand" "")))]
+  ""
+{
+  tilepro_expand_builtin_vector_binop (gen_<optab>v4qi3, V4QImode, operands[0],
+				    V4QImode, operands[1], operands[2], false);
+  DONE;
+})
+
+;; insn_addh
+;; insn_addih
+;; insn_maxh
+;; insn_maxih
+;; insn_minh
+;; insn_minih
+;; insn_seqh
+;; insn_seqih
+;; insn_slth
+;; insn_sltih
+;; insn_slth_u
+;; insn_sltih_u
+(define_insn "<optab>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=r,r")
+	(v2op_immed:V2HI
+	 (match_operand:V2HI 1 "reg_or_0_operand" "<comm>rO,rO")
+	 (match_operand:V2HI 2 "reg_or_v2s8bit_operand" "Y,rO")))]
+  ""
+  "@
+   <insn>ih<u>\t%0, %r1, %j2
+   <insn>h<u>\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_expand "insn_<insn>h<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(v2op_immed:V2HI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilepro_expand_builtin_vector_binop (gen_<optab>v2hi3, V2HImode, operands[0],
+				       V2HImode, operands[1], operands[2], true);
+  DONE;
+})
+
+(define_expand "insn_<insn>ih<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(v2op_immed:V2HI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "s8bit_cint_operand" "")))]
+  ""
+{
+  /* Tile out immediate and expand to general case. */
+  rtx n = tilepro_simd_int (operands[2], HImode);
+  tilepro_expand_builtin_vector_binop (gen_<optab>v2hi3, V2HImode, operands[0],
+				       V2HImode, operands[1], n, true);
+  DONE;
+})
+
+;; insn_shlh
+;; insn_shlih
+;; insn_shrh
+;; insn_shrih
+;; insn_srah
+;; insn_sraih
+(define_insn "<optab>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=r,r")
+	(any_shift:V2HI
+	 (match_operand:V2HI 1 "reg_or_0_operand" "rO,rO")
+	 (match_operand:SI 2 "reg_or_u5bit_operand" "I,rO")))]
+  ""
+  "@
+   <insn>ih<u>\t%0, %r1, %2
+   <insn>h<u>\t%0, %r1, %r2"
+  [(set_attr "type" "X01,X01")])
+
+(define_expand "insn_<insn>h<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(any_shift:V2HI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilepro_expand_builtin_vector_binop (gen_<optab>v2hi3, V2HImode, operands[0],
+				       V2HImode, operands[1], operands[2], false);
+  DONE;
+})
+
+;; insn_addbs_u
+;; insn_subbs_u
+;; insn_subb
+;; insn_slteb
+;; insn_slteb_u
+;; insn_sneb
+(define_insn "<optab>v4qi3"
+  [(set (match_operand:V4QI 0 "register_operand" "=r")
+	(v1op:V4QI
+	 (match_operand:V4QI 1 "reg_or_0_operand" "<comm>rO")
+	 (match_operand:V4QI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "<insn>b<u>\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_<insn>b<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(v1op:V4QI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilepro_expand_builtin_vector_binop (gen_<optab>v4qi3, V4QImode, operands[0],
+				       V4QImode, operands[1], operands[2], true);
+  DONE;
+})
+
+;; insn_addhs
+;; insn_subhs
+;; insn_subh
+;; insn_slteh
+;; insn_slteh_u
+;; insn_sneh
+(define_insn "<optab>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(v2op:V2HI
+	 (match_operand:V2HI 1 "reg_or_0_operand" "<comm>rO")
+	 (match_operand:V2HI 2 "reg_or_0_operand" "rO")))]
+  ""
+  "<insn>h<u>\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_<insn>h<u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(v2op:V2HI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (match_operand:SI 2 "reg_or_0_operand" "")))]
+  ""
+{
+  tilepro_expand_builtin_vector_binop (gen_<optab>v2hi3, V2HImode, operands[0],
+				       V2HImode, operands[1], operands[2], true);
+  DONE;
+})
+
+;; insn_inthb
+
+;; Byte ordering of these vectors is endian dependent.  We concat
+;; right-to-left for little endian.  We concat and interleave in the
+;; opposite way gcc's vector patterns work, so we need to reverse the
+;; order of source operands.
+
+;;    {B3,B2,B1,B0} {A3,A2,A1,A0}
+;; => {A3,A2,A1,A0,B3,B2,B1,B0}
+;; => {A3,B3,A2,B2}
+(define_insn "vec_interleave_highv4qi"
+  [(set (match_operand:V4QI 0 "register_operand" "=r")
+	(vec_select:V4QI
+	 (vec_concat:V8QI (match_operand:V4QI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V4QI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 2) (const_int 6)
+		    (const_int 3) (const_int 7)])))]
+  ""
+  "inthb\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_inthb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "reg_or_0_operand" "")
+   (match_operand:SI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* Our instruction interleaves opposite of the way vec_interleave
+     works, so we need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_interleave_highv4qi, V4QImode,
+                                       operands[0], V4QImode, operands[2],
+				       operands[1], true);
+  DONE;
+})
+
+;; insn_intlb
+;;    {B3,B2,B1,B0} {A3,A2,A1,A0}
+;; => {A3,A2,A1,A0,B3,B2,B1,B0}
+;; => {A1,B1,A0,B0}
+(define_insn "vec_interleave_lowv4qi"
+  [(set (match_operand:V4QI 0 "register_operand" "=r")
+	(vec_select:V4QI
+	 (vec_concat:V8QI (match_operand:V4QI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V4QI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 4)
+		    (const_int 1) (const_int 5)])))]
+  ""
+  "intlb\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_intlb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "reg_or_0_operand" "")
+   (match_operand:SI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* Our instruction interleaves opposite of the way vec_interleave
+     works, so we need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_interleave_lowv4qi, V4QImode,
+				       operands[0], V4QImode, operands[2],
+				       operands[1], true);
+  DONE;
+})
+
+;; insn_inthh
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+;; => {A1,B1}
+(define_insn "vec_interleave_highv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_select:V2HI
+	 (vec_concat:V4HI (match_operand:V2HI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V2HI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 1) (const_int 3)])))]
+  ""
+  "inthh\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_inthh"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "reg_or_0_operand" "")
+   (match_operand:SI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* Our instruction interleaves opposite of the way vec_interleave
+     works, so we need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_interleave_highv2hi, V2HImode,
+                                       operands[0], V2HImode, operands[2],
+				       operands[1], true);
+  DONE;
+})
+
+;; insn_intlh
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+;; => {A0,B0}
+(define_insn "vec_interleave_lowv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_select:V2HI
+	 (vec_concat:V4HI (match_operand:V2HI 1 "reg_or_0_operand" "rO")
+			  (match_operand:V2HI 2 "reg_or_0_operand" "rO"))
+	 (parallel [(const_int 0) (const_int 2)])))]
+  ""
+  "intlh\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_intlh"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "reg_or_0_operand" "")
+   (match_operand:SI 2 "reg_or_0_operand" "")]
+  ""
+{
+  /* Our instruction interleaves opposite of the way vec_interleave
+     works, so we need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_interleave_lowv2hi, V2HImode,
+                                       operands[0], V2HImode, operands[2],
+				       operands[1], true);
+  DONE;
+})
+
+;; insn_packbs_u
+;; insn_packlb
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+(define_insn "vec_pack_<pack_optab>_v2hi"
+  [(set (match_operand:V4QI 0 "register_operand" "=r")
+	(vec_concat:V4QI
+	 (v2pack:V2QI (match_operand:V2HI 1 "reg_or_0_operand" "rO"))
+	 (v2pack:V2QI (match_operand:V2HI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "<pack_insn>b<pack_u>\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_<pack_insn>b<pack_u>"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(vec_concat:V4QI
+	 (v2pack:V2QI (match_operand:SI 1 "reg_or_0_operand" ""))
+	 (v2pack:V2QI (match_operand:SI 2 "reg_or_0_operand" ""))))]
+  ""
+{
+  /* Our instruction concats opposite of the way vec_pack works, so we
+     need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_pack_<pack_optab>_v2hi,
+                                       V4QImode, operands[0],
+                                       V2HImode, operands[2], operands[1], true);
+  DONE;
+})
+
+;; insn_packhb
+;;    {B1,B0} {A1,A0}
+;; => {A1,A0,B1,B0}
+(define_insn "vec_pack_hipart_v2hi"
+  [(set (match_operand:V4QI 0 "register_operand" "=r")
+	(vec_concat:V4QI
+	 (truncate:V2QI
+	  (ashiftrt:V2HI (match_operand:V2HI 1 "reg_or_0_operand" "rO")
+			 (const_int 8)))
+	 (truncate:V2QI
+	  (ashiftrt:V2HI (match_operand:V2HI 2 "reg_or_0_operand" "rO")
+			 (const_int 8)))))]
+  ""
+  "packhb\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_packhb"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(vec_concat:V4QI
+	 (truncate:V2QI
+	  (ashiftrt:V2HI (match_operand:SI 2 "reg_or_0_operand" "")
+			 (const_int 8)))
+	 (truncate:V2QI
+	  (ashiftrt:V2HI (match_operand:SI 1 "reg_or_0_operand" "")
+			 (const_int 8)))))]
+  ""
+{
+  /* Our instruction concats opposite of the way vec_pack works, so we
+     need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_pack_hipart_v2hi,
+                                       V4QImode, operands[0],
+                                       V2HImode, operands[2], operands[1], true);
+  DONE;
+})
+
+;; insn_packhs
+;;    {B0} {A0}
+;; => {A0,B0}
+(define_insn "vec_pack_ssat_si"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_concat:V2HI
+	 (ss_truncate:HI (match_operand:SI 1 "reg_or_0_operand" "rO"))
+	 (ss_truncate:HI (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "packhs\t%0, %r2, %r1"
+  [(set_attr "type" "X01")])
+
+(define_expand "insn_packhs"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(vec_concat:V2HI
+	 (ss_truncate:HI (match_operand:SI 2 "reg_or_0_operand" ""))
+	 (ss_truncate:HI (match_operand:SI 1 "reg_or_0_operand" ""))))]
+  ""
+{
+  /* Our instruction concats opposite of the way vec_pack works, so we
+     need to reverse the source operands.  */
+  tilepro_expand_builtin_vector_binop (gen_vec_pack_ssat_si,
+                                       V2HImode, operands[0],
+                                       SImode, operands[2], operands[1], true);
+  DONE;
+})
+
+;; Rest of the intrinsics
+(define_insn "insn_adiffb_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_ADIFFB_U))]
+  ""
+  "adiffb_u\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_adiffh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_ADIFFH))]
+  ""
+  "adiffh\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_avgb_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_AVGB_U))]
+  ""
+  "avgb_u\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_avgh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_AVGH))]
+  ""
+  "avgh\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_bitx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")]
+                    UNSPEC_INSN_BITX))]
+  ""
+  "bitx\t%0, %r1"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_crc32_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_CRC32_32))]
+  ""
+  "crc32_32\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_crc32_8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_CRC32_8))]
+  ""
+  "crc32_8\t%0, %r1, %r2"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_dtlbpr"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "reg_or_0_operand" "rO")] 
+                         UNSPEC_INSN_DTLBPR)]
+  ""
+  "dtlbpr\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_dword_align"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_DWORD_ALIGN))]
+  ""
+  "dword_align\t%0, %r2, %r3"
+  [(set_attr "type" "X0")])
+
+(define_insn "insn_finv"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "reg_or_0_operand" "rO")] 
+                         UNSPEC_INSN_FINV)]
+  ""
+  "finv\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_flush"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "reg_or_0_operand" "rO")] 
+                         UNSPEC_INSN_FLUSH)]
+  ""
+  "flush\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_fnop"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_FNOP)]
+  ""
+  "fnop")
+
+(define_insn "insn_ill"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_ILL)]
+  ""
+  "ill"
+  [(set_attr "type" "cannot_bundle")])
+
+(define_insn "insn_inv"
+  [(unspec_volatile:VOID [(match_operand:SI 0 "reg_or_0_operand" "rO")] 
+                         UNSPEC_INSN_INV)]
+  ""
+  "inv\t%r0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_lnk"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(const_int 0)] UNSPEC_INSN_LNK))]
+  ""
+  "lnk\t%0"
+  [(set_attr "type" "X1")])
+
+(define_insn "insn_mnzb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MNZB))]
+  ""
+  "mnzb\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_mnzh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MNZH))]
+  ""
+  "mnzh\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_mulhh_ss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHH_SS))]
+  ""
+  "mulhh_ss\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulhh_su"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHH_SU))]
+  ""
+  "mulhh_su\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhh_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHH_UU))]
+  ""
+  "mulhh_uu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulhha_ss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHHA_SS))]
+  ""
+  "mulhha_ss\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulhha_su"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHHA_SU))]
+  ""
+  "mulhha_su\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhha_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHHA_UU))]
+  ""
+  "mulhha_uu\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulhhsa_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHHSA_UU))]
+  ""
+  "mulhhsa_uu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhl_ss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHL_SS))]
+  ""
+  "mulhl_ss\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhl_su"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHL_SU))]
+  ""
+  "mulhl_su\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhl_us"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHL_US))]
+  ""
+  "mulhl_us\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhl_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHL_UU))]
+  ""
+  "mulhl_uu\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhla_ss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHLA_SS))]
+  ""
+  "mulhla_ss\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhla_su"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHLA_SU))]
+  ""
+  "mulhla_su\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhla_us"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHLA_US))]
+  ""
+  "mulhla_us\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhla_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHLA_UU))]
+  ""
+  "mulhla_uu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulhlsa_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULHLSA_UU))]
+  ""
+  "mulhlsa_uu\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulll_ss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")]
+                    UNSPEC_INSN_MULLL_SS))]
+  ""
+  "mulll_ss\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+  
+(define_insn "insn_mulll_su"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULLL_SU))]
+  ""
+  "mulll_su\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mulll_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULLL_UU))]
+  ""
+  "mulll_uu\t%0, %r1, %r2"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mullla_ss"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULLLA_SS))]
+  ""
+  "mullla_ss\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mullla_su"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULLLA_SU))]
+  ""
+  "mullla_su\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mullla_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULLLA_UU))]
+  ""
+  "mullla_uu\t%0, %r2, %r3"
+  [(set_attr "type" "Y0_2cycle")])
+
+(define_insn "insn_mulllsa_uu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MULLLSA_UU))]
+  ""
+  "mulllsa_uu\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_mzb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MZB))]
+  ""
+  "mzb\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_mzh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_MZH))]
+  ""
+  "mzh\t%0, %r1, %r2"
+  [(set_attr "type" "X01")])
+
+(define_insn "insn_nap"
+  [(unspec_volatile:VOID [(const_int 0)] UNSPEC_INSN_NAP)]
+  ""
+  "nap"
+  [(set_attr "type" "cannot_bundle")])
+
+(define_insn "insn_nor"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (and:SI (not:SI (match_operand:SI 1 "reg_or_0_operand" "rO"))
+                (not:SI (match_operand:SI 2 "reg_or_0_operand" "rO"))))]
+  ""
+  "nor\t%0, %r1, %r2")
+
+(define_insn "insn_sadab_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_SADAB_U))]
+  ""
+  "sadab_u\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_sadah"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_SADAH))]
+  ""
+  "sadah\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_sadah_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")
+                    (match_operand:SI 3 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_SADAH_U))]
+  ""
+  "sadah_u\t%0, %r2, %r3"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_sadb_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_SADB_U))]
+  ""
+  "sadb_u\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_sadh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_SADH))]
+  ""
+  "sadh\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_sadh_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "rO")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_SADH_U))]
+  ""
+  "sadh_u\t%0, %r1, %r2"
+  [(set_attr "type" "X0_2cycle")])
+
+(define_insn "insn_tblidxb0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_TBLIDXB0))]
+  ""
+  "tblidxb0\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_tblidxb1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_TBLIDXB1))]
+  ""
+  "tblidxb1\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_tblidxb2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_TBLIDXB2))]
+  ""
+  "tblidxb2\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+(define_insn "insn_tblidxb3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "0")
+                    (match_operand:SI 2 "reg_or_0_operand" "rO")] 
+                   UNSPEC_INSN_TBLIDXB3))]
+  ""
+  "tblidxb3\t%0, %r2"
+  [(set_attr "type" "Y0")])
+
+
+;;
+;; pic related instructions
+;;
+
+;; NOTE: We compute the label in this unusual way because if we place
+;; the label after the lnk, whether it is at the same address as the
+;; lnk will vary depending on whether the optimization level chooses to
+;; insert bundling braces.
+(define_insn "insn_lnk_and_label"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "symbolic_operand" "")]
+                            UNSPEC_LNK_AND_LABEL))]
+  ""
+  "%1 = . + 8\n\tlnk\t%0"
+  [(set_attr "type" "X1")])
+
+(define_expand "addli_pcrel"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lo_sum:SI
+	 (match_operand:SI 1 "register_operand" "")
+	 (const:SI
+	  (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")
+		      (match_operand:SI 3 "symbolic_operand" "")]
+		     UNSPEC_PCREL_SYM))))]
+  "flag_pic")
+
+(define_expand "auli_pcrel"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (plus:SI
+         (match_operand:SI 1 "reg_or_0_operand" "")
+         (high:SI
+	  (const:SI
+	   (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")
+		       (match_operand:SI 3 "symbolic_operand" "")]
+                      UNSPEC_PCREL_SYM)))))]
+  "flag_pic")
+
+(define_expand "add_got16"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lo_sum:SI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (const:SI (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")]
+			      UNSPEC_GOT16_SYM))))]
+  "flag_pic == 1")
+
+(define_expand "addhi_got32"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (high:SI
+	  (const:SI (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")]
+			       UNSPEC_GOT32_SYM)))))]
+  "flag_pic == 2")
+
+(define_expand "addlo_got32"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lo_sum:SI
+	 (match_operand:SI 1 "reg_or_0_operand" "")
+	 (const:SI (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")]
+			      UNSPEC_GOT32_SYM))))]
+  "flag_pic == 2")
+
+
+;;
+;; TLS
+;;
+
+(define_expand "tls_gd_addhi"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI
+         (match_operand:SI 1 "reg_or_0_operand" "")
+         (high:SI
+	  (const:SI (unspec:SI [(match_operand 2 "tls_symbolic_operand" "")]
+			       UNSPEC_TLS_GD)))))]
+  "HAVE_AS_TLS")
+
+(define_expand "tls_gd_addlo"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lo_sum:SI
+         (match_operand:SI 1 "reg_or_0_operand" "")
+         (const:SI (unspec:SI [(match_operand 2 "tls_symbolic_operand" "")]
+			      UNSPEC_TLS_GD))))]
+  "HAVE_AS_TLS")
+
+(define_expand "tls_gd_call"
+  [(parallel
+    [(set (reg:SI 0)
+	  (unspec:SI [(match_operand:SI 0 "tls_symbolic_operand" "")
+		     (reg:SI 0)]
+		     UNSPEC_TLS_GD_CALL))
+     (clobber (reg:SI 25))
+     (clobber (reg:SI 26))
+     (clobber (reg:SI 27))
+     (clobber (reg:SI 28))
+     (clobber (reg:SI 29))
+     (clobber (reg:SI 55))])]
+   ""
+{
+  cfun->machine->calls_tls_get_addr = true;
+})
+
+(define_insn "*tls_gd_call"
+  [(set (reg:SI 0)
+	(unspec:SI [(match_operand:SI 0 "tls_symbolic_operand" "")
+		    (reg:SI 0)]
+		   UNSPEC_TLS_GD_CALL))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 27))
+   (clobber (reg:SI 28))
+   (clobber (reg:SI 29))
+   (clobber (reg:SI 55))]
+  ""
+  "jal\ttls_gd_call(%0)"
+  [(set_attr "type" "X1")])
+
+(define_insn "tls_gd_add"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "tls_symbolic_operand" "")]
+                  UNSPEC_TLS_GD_ADD))]
+  "HAVE_AS_TLS"
+  "addi\t%0, %1, tls_gd_add(%2)")
+
+(define_insn "tls_ie_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "tls_symbolic_operand" "")]
+                  UNSPEC_TLS_IE_LOAD))]
+  "HAVE_AS_TLS"
+  "lw_tls\t%0, %1, tls_ie_load(%2)"
+  [(set_attr "type" "X1_2cycle")])
+
+(define_expand "tls_ie_addhi"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (plus:SI
+         (match_operand:SI 1 "register_operand" "")
+         (high:SI
+	  (const:SI (unspec:SI [(match_operand 2 "tls_ie_symbolic_operand" "")]
+			       UNSPEC_TLS_IE)))))]
+  "HAVE_AS_TLS")
+
+(define_expand "tls_ie_addlo"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lo_sum:SI
+         (match_operand:SI 1 "register_operand" "")
+         (const:SI (unspec:SI [(match_operand 2 "tls_ie_symbolic_operand" "")]
+			      UNSPEC_TLS_IE))))]
+  "HAVE_AS_TLS")
+
+(define_expand "tls_le_addhi"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (plus:SI
+         (match_operand:SI 1 "register_operand" "")
+         (high:SI
+	  (const:SI (unspec:SI [(match_operand 2 "tls_le_symbolic_operand" "")]
+			       UNSPEC_TLS_LE)))))]
+  "HAVE_AS_TLS")
+
+(define_expand "tls_le_addlo"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lo_sum:SI
+         (match_operand:SI 1 "register_operand" "")
+         (const:SI (unspec:SI [(match_operand 2 "tls_le_symbolic_operand" "")]
+			      UNSPEC_TLS_LE))))]
+  "HAVE_AS_TLS")
+
+
+;;
+;; Stack protector instructions.
+;;
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "nonautoincmem_operand" "")
+	(match_operand 1 "nonautoincmem_operand" ""))]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+  rtx ssp_addr = gen_rtx_PLUS (Pmode, tp, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  rtx ssp = gen_reg_rtx (Pmode);
+  
+  emit_insn (gen_rtx_SET (VOIDmode, ssp, ssp_addr));
+
+  operands[1] = gen_rtx_MEM (Pmode, ssp);
+#endif
+
+  emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_setsi"
+  [(set (match_operand:SI 0 "nonautoincmem_operand" "=U")
+        (unspec:SI [(match_operand:SI 1 "nonautoincmem_operand" "U")]
+		   UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  ""
+  "lw\t%2, %1; { sw\t%0, %2; move\t%2, zero }"
+  [(set_attr "length" "16")
+   (set_attr "type" "cannot_bundle_3cycle")])
+
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "nonautoincmem_operand" "")
+   (match_operand 1 "nonautoincmem_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx compare_result;
+  rtx bcomp, loc_ref;
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+  rtx ssp_addr = gen_rtx_PLUS (Pmode, tp, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  rtx ssp = gen_reg_rtx (Pmode);
+  
+  emit_insn (gen_rtx_SET (VOIDmode, ssp, ssp_addr));
+
+  operands[1] = gen_rtx_MEM (Pmode, ssp);
+#endif
+
+  compare_result = gen_reg_rtx (SImode);
+
+  emit_insn (gen_stack_protect_testsi (compare_result, operands[0],
+				       operands[1]));
+
+  bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx);
+
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]);
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+						     loc_ref, pc_rtx)));
+
+  DONE;
+})
+
+(define_insn "stack_protect_testsi"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (unspec:SI [(match_operand:SI 1 "nonautoincmem_operand" "U")
+                    (match_operand:SI 2 "nonautoincmem_operand" "U")]
+                   UNSPEC_SP_TEST))
+   (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  ""
+  "lw\t%0, %1; lw\t%3, %2; { seq\t%0, %0, %3; move\t%3, zero }"
+  [(set_attr "length" "24")
+   (set_attr "type" "cannot_bundle_4cycle")])
+
diff --git a/gcc-4.9/gcc/config/tilepro/tilepro.opt b/gcc-4.9/gcc/config/tilepro/tilepro.opt
new file mode 100644
index 000000000..fcfb20a37
--- /dev/null
+++ b/gcc-4.9/gcc/config/tilepro/tilepro.opt
@@ -0,0 +1,36 @@
+; Options for the TILEPro port of the compiler.
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+; Contributed by Walter Lee (walt@tilera.com)
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m32
+Target Report RejectNegative
+Compile with 32 bit longs and pointers, which is the only supported
+behavior and thus the flag is ignored.
+
+mcpu=
+Target RejectNegative Joined Enum(tilepro_cpu) Var(tilepro_cpu) Init(0)
+-mcpu=CPU	Use features of and schedule code for given CPU
+
+Enum
+Name(tilepro_cpu) Type(int)
+Known TILEPro CPUs (for use with the -mcpu= option):
+
+EnumValue
+Enum(tilepro_cpu) String(tilepro) Value(0)
+
diff --git a/gcc-4.9/gcc/config/tm-dwarf2.h b/gcc-4.9/gcc/config/tm-dwarf2.h
new file mode 100644
index 000000000..d08646ecc
--- /dev/null
+++ b/gcc-4.9/gcc/config/tm-dwarf2.h
@@ -0,0 +1,4 @@
+/* Enable Dwarf2 debugging and make it the default */
+#define  DWARF2_DEBUGGING_INFO 1
+#undef	 PREFERRED_DEBUGGING_TYPE
+#define  PREFERRED_DEBUGGING_TYPE  DWARF2_DEBUG
diff --git a/gcc-4.9/gcc/config/usegas.h b/gcc-4.9/gcc/config/usegas.h
new file mode 100644
index 000000000..5c83a73e8
--- /dev/null
+++ b/gcc-4.9/gcc/config/usegas.h
@@ -0,0 +1,20 @@
+/* Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Just set a single flag we can test for it inside other files.  */
+#define USE_GAS 1
diff --git a/gcc-4.9/gcc/config/usegld.h b/gcc-4.9/gcc/config/usegld.h
new file mode 100644
index 000000000..066773270
--- /dev/null
+++ b/gcc-4.9/gcc/config/usegld.h
@@ -0,0 +1 @@
+#define USE_GLD 1
diff --git a/gcc-4.9/gcc/config/v850/constraints.md b/gcc-4.9/gcc/config/v850/constraints.md
new file mode 100644
index 000000000..3b40afa93
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/constraints.md
@@ -0,0 +1,108 @@
+;; Constraint definitions for V850.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "e" "EVEN_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "J"
+  "A signed 5-bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -16 && ival <= 15")))
+
+(define_constraint "K"
+  "A signed 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "L"
+  "A valid constant for a movhi instruction."
+  (and (match_code "const_int")
+       (ior (match_test "(ival | 0x7fff0000) == 0x7fff0000")
+	    (match_test "(ival | 0x7fff0000) + 0x10000 == 0"))))
+
+(define_constraint "M"
+  "An unsigned 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "N"
+  "An unsigned 5-bit immediate in shift instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "O"
+  "A signed 9-bit immediate for word multiply instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= -255 && ival <= 255")))
+
+(define_constraint "P"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "0")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "A zero of some form."
+  (and (match_code "const_double")
+       (ior (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT")
+	    (match_test "GET_MODE_CLASS (mode) == MODE_INT"))
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "H"
+  "@internal"
+  (and (match_code "const_double")
+       (match_test "0")))
+
+;;; Extra constraints.
+(define_memory_constraint "Q"
+  "A memory address that does not contain a symbol address."
+  (and (match_code "mem")
+       (match_test "ep_memory_operand (op, mode, FALSE)")))
+
+(define_constraint "R"
+  "@internal"
+  (match_test "special_symbolref_operand (op, VOIDmode)"))
+
+(define_constraint "S"
+  "@internal"
+  (and (match_code "symbol_ref")
+       (match_test "!SYMBOL_REF_ZDA_P (op)")))
+
+(define_constraint "T"
+  "@internal"
+  (match_test "ep_memory_operand (op, mode, TRUE)"))
+
+(define_constraint "U"
+  "@internal"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "SYMBOL_REF_ZDA_P (op)"))
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+	    (match_test "GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF")
+	    (match_test "SYMBOL_REF_ZDA_P (XEXP (XEXP (op, 0), 0))"))))
+
+(define_constraint "W"
+  "@internal"
+  (match_test "disp23_operand (op, VOIDmode)"))
diff --git a/gcc-4.9/gcc/config/v850/predicates.md b/gcc-4.9/gcc/config/v850/predicates.md
new file mode 100644
index 000000000..1e16c1cae
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/predicates.md
@@ -0,0 +1,584 @@
+;; Predicate definitions for NEC V850.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is either a register or 0.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return INTVAL (op) == 0;
+
+  else if (GET_CODE (op) == CONST_DOUBLE)
+    return satisfies_constraint_G (op);
+
+  else
+    return register_operand (op, mode);
+})
+
+;; Return true if OP is either a register or a signed five bit
+;; integer.
+
+(define_predicate "reg_or_int5_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return CONST_OK_FOR_J (INTVAL (op));
+
+  else
+    return register_operand (op, mode);
+})
+
+;; Return true if OP is either a register or a signed nine bit
+;; integer.
+
+(define_predicate "reg_or_int9_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return CONST_OK_FOR_O (INTVAL (op));
+
+  return register_operand (op, mode);
+})
+
+;; Return true if OP is either a register or a const integer.
+
+(define_predicate "reg_or_const_operand"
+  (match_code "reg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return TRUE;
+
+  return register_operand (op, mode);
+})
+
+;; Return true if OP is a even number register.
+
+(define_predicate "even_reg_operand"
+  (match_code "reg")
+{
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || ((REGNO (op) > 0) && (REGNO (op) < 32)
+		   && ((REGNO (op) & 1)==0))));
+})
+
+;; Return true if OP is a valid call operand.
+
+(define_predicate "call_address_operand"
+  (match_code "reg,symbol_ref")
+{
+  /* Only registers are valid call operands if TARGET_LONG_CALLS.  */
+  if (TARGET_LONG_CALLS)
+    return GET_CODE (op) == REG;
+  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG);
+})
+
+;; Return true if OP is a valid source operand for SImode move.
+
+(define_predicate "movsi_source_operand"
+  (match_code "label_ref,symbol_ref,const_int,const_double,const,high,mem,reg,subreg")
+{
+  /* Some constants, as well as symbolic operands
+     must be done with HIGH & LO_SUM patterns.  */
+  if (CONSTANT_P (op)
+      && GET_CODE (op) != HIGH
+      && !(GET_CODE (op) == CONST_INT
+           && (CONST_OK_FOR_J (INTVAL (op))
+               || CONST_OK_FOR_K (INTVAL (op))
+               || CONST_OK_FOR_L (INTVAL (op)))))
+    return special_symbolref_operand (op, mode);
+  else
+    return general_operand (op, mode);
+})
+
+;; Return true if OP is a valid operand for 23 bit displacement
+;; operations.
+
+(define_predicate "disp23_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT
+      && ((unsigned)(INTVAL (op)) >= 0x8000)
+      && ((unsigned)(INTVAL (op)) < 0x400000))
+    return 1;
+  else
+    return 0;
+})
+
+;; Return true if OP is a symbol ref with 16-bit signed value.
+
+(define_predicate "special_symbolref_operand"
+  (match_code "symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && satisfies_constraint_K (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return (SYMBOL_REF_FLAGS (op)
+	    & (SYMBOL_FLAG_ZDA | SYMBOL_FLAG_TDA | SYMBOL_FLAG_SDA)) != 0;
+
+  return FALSE;
+})
+
+;; Return true if OP is a valid operand for bit related operations
+;; containing only single 1 in its binary representation.
+
+(define_predicate "power_of_two_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+
+  if (exact_log2 (INTVAL (op)) == -1)
+    return 0;
+  return 1;
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into a
+;; jump to a function prologue.
+
+(define_predicate "pattern_is_ok_for_prologue"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+  rtx vector_element;
+
+  /* If there are no registers to save then the function prologue
+     is not suitable.  */
+  if (count <= (TARGET_LONG_CALLS ? 3 : 2))
+    return 0;
+
+  /* The pattern matching has already established that we are adjusting the
+     stack and pushing at least one register.  We must now check that the
+     remaining entries in the vector to make sure that they are also register
+     pushes, except for the last entry which should be a CLOBBER of r10.
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+     (set (mem:SI (plus:SI (reg:SI 3)
+      (match_operand:SI 2 "immediate_operand" "i")))
+      (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))
+
+     */
+
+  for (i = 2; i < count - (TARGET_LONG_CALLS ? 2: 1); i++)
+    {
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      vector_element = XVECEXP (op, 0, i);
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src = SET_SRC (vector_element);
+
+      if (GET_CODE (dest) != MEM
+	  || GET_MODE (dest) != SImode
+	  || GET_CODE (src) != REG
+	  || GET_MODE (src) != SImode
+	  || ! register_is_ok_for_epilogue (src, SImode))
+	return 0;
+
+      plus = XEXP (dest, 0);
+
+      if ( GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+
+      /* If the register is being pushed somewhere other than the stack
+	 space just acquired by the first operand then abandon this quest.
+	 Note: the test is <= because both values are negative.	 */
+      if (INTVAL (XEXP (plus, 1))
+	  <= INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)))
+	{
+	  return 0;
+	}
+    }
+
+  /* Make sure that the last entries in the vector are clobbers.  */
+  vector_element = XVECEXP (op, 0, i++);
+
+  if (GET_CODE (vector_element) != CLOBBER
+      || GET_CODE (XEXP (vector_element, 0)) != REG
+      || REGNO (XEXP (vector_element, 0)) != 10)
+    return 0;
+
+  if (TARGET_LONG_CALLS)
+    {
+      vector_element = XVECEXP (op, 0, i++);
+
+      if (GET_CODE (vector_element) != CLOBBER
+	  || GET_CODE (XEXP (vector_element, 0)) != REG
+	  || REGNO (XEXP (vector_element, 0)) != 11)
+	return 0;
+    }
+
+  return i == count;
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into
+;; jump to a function epilogue.
+
+(define_predicate "pattern_is_ok_for_epilogue"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* If there are no registers to restore then the function epilogue
+     is not suitable.  */
+  if (count <= 2)
+    return 0;
+
+  /* The pattern matching has already established that we are performing a
+     function epilogue and that we are popping at least one register.  We must
+     now check the remaining entries in the vector to make sure that they are
+     also register pops.  There is no good reason why there should ever be
+     anything else in this vector, but being paranoid always helps...
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+        (set (match_operand:SI n "register_is_ok_for_epilogue" "r")
+	  (mem:SI (plus:SI (reg:SI 3) (match_operand:SI n "immediate_operand" "i"))))
+     */
+
+  for (i = 2; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src = SET_SRC (vector_element);
+
+      if (GET_CODE (dest) != REG
+	  || GET_MODE (dest) != SImode
+	  || ! register_is_ok_for_epilogue (dest, SImode)
+	  || GET_CODE (src) != MEM
+	  || GET_MODE (src) != SImode)
+	return 0;
+
+      plus = XEXP (src, 0);
+
+      if (GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return true if the given RTX is a register which can be restored by
+;; a function epilogue.
+
+(define_predicate "register_is_ok_for_epilogue"
+  (match_code "reg")
+{
+  /* The save/restore routines can only cope with registers 20 - 31.  */
+  return ((GET_CODE (op) == REG)
+          && (((REGNO (op) >= 20) && REGNO (op) <= 31)));
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into a
+;; DISPOSE instruction.
+
+(define_predicate "pattern_is_ok_for_dispose"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* If there are no registers to restore then
+     the dispose instruction is not suitable.  */
+  if (count <= 2)
+    return 0;
+
+  /* The pattern matching has already established that we are performing a
+     function epilogue and that we are popping at least one register.  We must
+     now check the remaining entries in the vector to make sure that they are
+     also register pops.  There is no good reason why there should ever be
+     anything else in this vector, but being paranoid always helps...
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+        (set (match_operand:SI n "register_is_ok_for_epilogue" "r")
+	  (mem:SI (plus:SI (reg:SI 3)
+	    (match_operand:SI n "immediate_operand" "i"))))
+     */
+
+  for (i = 3; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src  = SET_SRC (vector_element);
+
+      if (   GET_CODE (dest) != REG
+	  || GET_MODE (dest) != SImode
+	  || ! register_is_ok_for_epilogue (dest, SImode)
+	  || GET_CODE (src) != MEM
+	  || GET_MODE (src) != SImode)
+	return 0;
+
+      plus = XEXP (src, 0);
+
+      if (   GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO    (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into a
+;; PREPARE instruction.
+
+(define_predicate "pattern_is_ok_for_prepare"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* If there are no registers to restore then the prepare instruction
+     is not suitable.  */
+  if (count <= 1)
+    return 0;
+
+  /* The pattern matching has already established that we are adjusting the
+     stack and pushing at least one register.  We must now check that the
+     remaining entries in the vector to make sure that they are also register
+     pushes.
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+     (set (mem:SI (plus:SI (reg:SI 3)
+       (match_operand:SI 2 "immediate_operand" "i")))
+         (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))
+
+     */
+
+  for (i = 1; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      if (GET_CODE (vector_element) == CLOBBER)
+	continue;
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src  = SET_SRC (vector_element);
+
+      if (   GET_CODE (dest) != MEM
+	  || GET_MODE (dest) != SImode
+	  || GET_CODE (src) != REG
+	  || GET_MODE (src) != SImode
+	  || ! register_is_ok_for_epilogue (src, SImode)
+	     )
+	return 0;
+
+      plus = XEXP (dest, 0);
+
+      if (   GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO    (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+
+      /* If the register is being pushed somewhere other than the stack
+	 space just acquired by the first operand then abandon this quest.
+	 Note: the test is <= because both values are negative.	 */
+      if (INTVAL (XEXP (plus, 1))
+	  < INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)))
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return true if OP is a valid operand for bit related operations
+;; containing only single 0 in its binary representation.
+
+(define_predicate "not_power_of_two_operand"
+  (match_code "const_int")
+{
+  unsigned int mask;
+
+  if (mode == QImode)
+    mask = 0xff;
+  else if (mode == HImode)
+    mask = 0xffff;
+  else if (mode == SImode)
+    mask = 0xffffffff;
+  else
+    return 0;
+
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+
+  if (exact_log2 (~INTVAL (op) & mask) == -1)
+    return 0;
+  return 1;
+})
+
+;; Return true if OP is a float value operand with value as 1.
+
+(define_predicate "const_float_1_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_DOUBLE
+      || mode != GET_MODE (op)
+      || (mode != DFmode && mode != SFmode))
+    return 0;
+
+  return op == CONST1_RTX(mode);
+})
+
+;; Return true if OP is a float value operand with value as 0.
+
+(define_predicate "const_float_0_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_DOUBLE
+      || mode != GET_MODE (op)
+      || (mode != DFmode && mode != SFmode))
+    return 0;
+
+  return op == CONST0_RTX(mode);
+})
+
+(define_predicate "label_ref_operand"
+  (match_code "label_ref")
+)
+
+
+(define_predicate "e3v5_shift_operand"
+  (match_code "const_int,reg")
+  {
+    if (CONST_INT_P (op))
+      return IN_RANGE (INTVAL (op), 0, 31);
+    return true;
+  }
+)
+
+(define_predicate "ior_operator"
+  (match_code "ior")
+{
+  return (GET_CODE (op) == IOR);
+})
+
+;; Return true if the floating point comparison operation
+;; given produces a canonical answer.
+(define_predicate "v850_float_z_comparison_operator"
+  (match_code "lt,le,eq,gt,ge")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (GET_RTX_CLASS (code) != RTX_COMPARE
+      && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
+    return 0;
+
+  if (mode != GET_MODE (op) && mode != VOIDmode)
+    return 0;
+
+  if ((GET_CODE (XEXP (op, 0)) != REG
+       || REGNO (XEXP (op, 0)) != CC_REGNUM)
+      || XEXP (op, 1) != const0_rtx)
+    return 0;
+
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_LTmode)
+    return code == LT;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_LEmode)
+    return code == LE;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_EQmode)
+    return code == EQ;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_GTmode)
+    return code == GT;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_GEmode)
+    return code == GE;
+
+  /* Note we do not accept CC_FPU_NEmode here.  See
+     v850_float_nz_comparison for the reason why.  */
+  return 0;
+})
+
+;; Return true if the floating point comparison operation
+;; given produces an inverted answer.
+(define_predicate "v850_float_nz_comparison_operator"
+  (match_code "ne")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  /* The V850E2V3 does not have a floating point NZ comparison operator.
+     Instead it is implemented as an EQ comparison and this function ensures
+     that the branch_nz_normal and set_nz_insn patterns are used to examine
+     (and invert) the result of the floating point comparison.  */
+
+  if (GET_RTX_CLASS (code) != RTX_COMPARE
+      && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
+    return 0;
+
+  if (mode != GET_MODE (op) && mode != VOIDmode)
+    return 0;
+
+  if ((GET_CODE (XEXP (op, 0)) != REG
+       || REGNO (XEXP (op, 0)) != CC_REGNUM)
+      || XEXP (op, 1) != const0_rtx)
+    return 0;
+
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_NEmode)
+    return code == NE;
+
+  return 0;
+})
diff --git a/gcc-4.9/gcc/config/v850/rtems.h b/gcc-4.9/gcc/config/v850/rtems.h
new file mode 100644
index 000000000..01dff3e52
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/rtems.h
@@ -0,0 +1,36 @@
+/* Definitions for rtems targeting a v850 using ELF.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define( "__rtems__" );		\
+      builtin_assert( "system=rtems" );		\
+    }						\
+  while (0)
+
+/* Map mv850e1 and mv850es to mv850e to match MULTILIB_MATCHES */
+#undef  ASM_SPEC
+#define ASM_SPEC "%{mv850es:-mv850e} \
+%{mv850e1:-mv850e} \
+%{!mv850es:%{!mv850e1:%{mv*:-mv%*}} \
+%{m8byte-align:-m8byte-align} \
+%{mgcc-abi:-mgcc-abi}}"
diff --git a/gcc-4.9/gcc/config/v850/t-rtems b/gcc-4.9/gcc/config/v850/t-rtems
new file mode 100644
index 000000000..c0427e5fd
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/t-rtems
@@ -0,0 +1,7 @@
+# Custom multilibs for RTEMS
+
+MULTILIB_OPTIONS  = mv850/mv850e/mv850e2/mv850e2v3
+MULTILIB_DIRNAMES = v850 v850e v850e2 v850e2v3
+MULTILIB_MATCHES  = mv850e=mv850e1 
+
+MULTILIB_MATCHES  += mv850e=mv850es
diff --git a/gcc-4.9/gcc/config/v850/t-v850 b/gcc-4.9/gcc/config/v850/t-v850
new file mode 100644
index 000000000..44346f07d
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/t-v850
@@ -0,0 +1,35 @@
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Create target-specific versions of the libraries
+MULTILIB_OPTIONS  = m8byte-align mgcc-abi msoft-float
+MULTILIB_DIRNAMES =  8byte        gcc-abi  soft-float
+
+MULTILIB_OPTIONS  += mv850e3v5
+MULTILIB_DIRNAMES +=  v850e3v5
+
+TCFLAGS = -mno-app-regs -msmall-sld -Wa,-mwarn-signed-overflow -Wa,-mwarn-unsigned-overflow
+
+v850-c.o: $(srcdir)/config/v850/v850-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(CPPLIB_H) $(TREE_H) $(C_PRAGMA_H) $(GGC_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/v850/v850-c.c 
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc-4.9/gcc/config/v850/v850-c.c b/gcc-4.9/gcc/config/v850/v850-c.c
new file mode 100644
index 000000000..4c2e052cf
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850-c.c
@@ -0,0 +1,274 @@
+/* v850 specific, C compiler specific functions.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "c-family/c-pragma.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "tm_p.h"
+
+#ifndef streq
+#define streq(a,b) (strcmp (a, b) == 0)
+#endif
+
+static int  pop_data_area          (v850_data_area);
+static int  push_data_area         (v850_data_area);
+static void mark_current_function_as_interrupt (void);
+
+/* Push a data area onto the stack.  */
+
+static int
+push_data_area (v850_data_area data_area)
+{
+  data_area_stack_element * elem;
+
+  elem = (data_area_stack_element *) xmalloc (sizeof (* elem));
+
+  if (elem == NULL)
+    return 0;
+
+  elem->prev      = data_area_stack;
+  elem->data_area = data_area;
+
+  data_area_stack = elem;
+
+  return 1;
+}
+
+/* Remove a data area from the stack.  */
+
+static int
+pop_data_area (v850_data_area data_area)
+{
+  if (data_area_stack == NULL)
+    warning (OPT_Wpragmas, "#pragma GHS endXXXX found without "
+	     "previous startXXX");
+  else if (data_area != data_area_stack->data_area)
+    warning (OPT_Wpragmas, "#pragma GHS endXXX does not match "
+	     "previous startXXX");
+  else
+    {
+      data_area_stack_element * elem;
+
+      elem = data_area_stack;
+      data_area_stack = data_area_stack->prev;
+
+      free (elem);
+
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Set the machine specific 'interrupt' attribute on the current function.  */
+
+static void
+mark_current_function_as_interrupt (void)
+{
+  tree name;
+  
+  if (current_function_decl ==  NULL_TREE)
+    {
+      warning (0, "cannot set interrupt attribute: no current function");
+      return;
+    }
+
+  name = get_identifier ("interrupt");
+
+  if (name == NULL_TREE || TREE_CODE (name) != IDENTIFIER_NODE)
+    {
+      warning (0, "cannot set interrupt attribute: no such identifier");
+      return;
+    }
+  
+  decl_attributes (&current_function_decl,
+		   tree_cons (name, NULL_TREE, NULL_TREE), 0);
+}
+
+
+/* Support for GHS pragmata.  */
+
+void
+ghs_pragma_section (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  int repeat = 0;
+
+  /* #pragma ghs section [name = alias [, name = alias [, ...]]] */
+  do
+    {
+      tree x;
+      enum cpp_ttype type;
+      tree sect_ident;
+      const char *sect, *alias;
+      enum GHS_section_kind kind;
+      
+      type = pragma_lex (&x);
+      
+      if (type == CPP_EOF && !repeat)
+	goto reset;
+      else if (type == CPP_NAME)
+	{
+	  sect_ident = x;
+	  sect = IDENTIFIER_POINTER (sect_ident);
+	}
+      else
+	goto bad;
+      repeat = 0;
+      
+      if (pragma_lex (&x) != CPP_EQ)
+	goto bad;
+      if (pragma_lex (&x) != CPP_NAME)
+	goto bad;
+      
+      alias = IDENTIFIER_POINTER (x);
+      
+      type = pragma_lex (&x);
+      if (type == CPP_COMMA)
+	repeat = 1;
+      else if (type != CPP_EOF)
+	warning (OPT_Wpragmas, "junk at end of #pragma ghs section");
+      
+      if      (streq (sect, "data"))    kind = GHS_SECTION_KIND_DATA;
+      else if (streq (sect, "text"))    kind = GHS_SECTION_KIND_TEXT;
+      else if (streq (sect, "rodata"))  kind = GHS_SECTION_KIND_RODATA;
+      else if (streq (sect, "const"))   kind = GHS_SECTION_KIND_RODATA;
+      else if (streq (sect, "rosdata")) kind = GHS_SECTION_KIND_ROSDATA;
+      else if (streq (sect, "rozdata")) kind = GHS_SECTION_KIND_ROZDATA;
+      else if (streq (sect, "sdata"))   kind = GHS_SECTION_KIND_SDATA;
+      else if (streq (sect, "tdata"))   kind = GHS_SECTION_KIND_TDATA;
+      else if (streq (sect, "zdata"))   kind = GHS_SECTION_KIND_ZDATA;
+      /* According to GHS beta documentation, the following should not be
+	 allowed!  */
+      else if (streq (sect, "bss"))     kind = GHS_SECTION_KIND_BSS;
+      else if (streq (sect, "zbss"))    kind = GHS_SECTION_KIND_ZDATA;
+      else
+	{
+	  warning (0, "unrecognized section name %qE", sect_ident);
+	  return;
+	}
+      
+      if (streq (alias, "default"))
+	GHS_current_section_names [kind] = NULL;
+      else
+	GHS_current_section_names [kind] =
+	  build_string (strlen (alias) + 1, alias);
+    }
+  while (repeat);
+
+  return;
+
+ bad:
+  warning (OPT_Wpragmas, "malformed #pragma ghs section");
+  return;
+
+ reset:
+  /* #pragma ghs section \n: Reset all section names back to their defaults.  */
+  {
+    int i;
+    
+    for (i = COUNT_OF_GHS_SECTION_KINDS; i--;)
+      GHS_current_section_names [i] = NULL;
+  }
+}
+
+void
+ghs_pragma_interrupt (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs interrupt");
+  
+  mark_current_function_as_interrupt ();
+}
+
+void
+ghs_pragma_starttda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs starttda");
+  
+  push_data_area (DATA_AREA_TDA);
+}
+
+void
+ghs_pragma_startsda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs startsda");
+  
+  push_data_area (DATA_AREA_SDA);
+}
+
+void
+ghs_pragma_startzda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs startzda");
+  
+  push_data_area (DATA_AREA_ZDA);
+}
+
+void
+ghs_pragma_endtda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs endtda");
+  
+  pop_data_area (DATA_AREA_TDA);
+}
+
+void
+ghs_pragma_endsda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs endsda");
+  
+  pop_data_area (DATA_AREA_SDA);
+}
+
+void
+ghs_pragma_endzda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs endzda");
+  
+  pop_data_area (DATA_AREA_ZDA);
+}
diff --git a/gcc-4.9/gcc/config/v850/v850-modes.def b/gcc-4.9/gcc/config/v850/v850-modes.def
new file mode 100644
index 000000000..d5729cb36
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850-modes.def
@@ -0,0 +1,27 @@
+/* Definitions of target machine for GNU compiler. NEC V850 series
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by NEC EL
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CC_FPU_LT);
+CC_MODE (CC_FPU_LE);
+CC_MODE (CC_FPU_GT);
+CC_MODE (CC_FPU_GE);
+CC_MODE (CC_FPU_EQ);
+CC_MODE (CC_FPU_NE);
+
diff --git a/gcc-4.9/gcc/config/v850/v850-opts.h b/gcc-4.9/gcc/config/v850/v850-opts.h
new file mode 100644
index 000000000..a91b1b059
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850-opts.h
@@ -0,0 +1,33 @@
+/* Definitions for option handling for NEC V850 series.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef V850_OPTS_H
+#define V850_OPTS_H
+
+enum small_memory_type {
+  /* tiny data area, using EP as base register */
+  SMALL_MEMORY_TDA = 0,
+  /* small data area using dp as base register */
+  SMALL_MEMORY_SDA,
+  /* zero data area using r0 as base register */
+  SMALL_MEMORY_ZDA,
+  SMALL_MEMORY_max
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/v850/v850-protos.h b/gcc-4.9/gcc/config/v850/v850-protos.h
new file mode 100644
index 000000000..ba504cb6b
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850-protos.h
@@ -0,0 +1,69 @@
+/* Prototypes for v850.c functions used in the md file & elsewhere.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Function prototypes that cannot exist in v850.h due to dependency
+   complications.  */
+#ifndef GCC_V850_PROTOS_H
+#define GCC_V850_PROTOS_H
+
+extern void   expand_prologue               (void);
+extern void   expand_epilogue               (void);
+extern int    v850_handle_pragma            (int (*)(void), void (*)(int), char *);
+extern int    compute_register_save_size    (long *);
+extern int    compute_frame_size            (int, long *);
+extern void   v850_init_expanders           (void);
+
+#ifdef RTX_CODE
+extern rtx    v850_return_addr              (int);
+extern const char *output_move_single       (rtx *);
+extern void   notice_update_cc              (rtx, rtx);
+extern char * construct_save_jarl           (rtx);
+extern char * construct_restore_jr          (rtx);
+#ifdef HAVE_MACHINE_MODES
+extern char * construct_dispose_instruction (rtx);
+extern char * construct_prepare_instruction (rtx);
+extern int    ep_memory_operand             (rtx, enum machine_mode, int);
+extern int    v850_adjust_insn_length       (rtx, int);
+extern const char * v850_gen_movdi          (rtx *);
+extern rtx    v850_gen_compare              (enum rtx_code, enum machine_mode,
+					     rtx, rtx);
+extern enum machine_mode  v850_gen_float_compare (enum rtx_code,
+						  enum machine_mode, rtx, rtx);
+extern enum machine_mode  v850_select_cc_mode (RTX_CODE, rtx, rtx);
+#endif
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern int    v850_interrupt_function_p     (tree);
+extern void   v850_output_aligned_bss       (FILE *, tree, const char *, unsigned HOST_WIDE_INT, int);
+extern void   v850_output_common            (FILE *, tree, const char *, int, int);
+extern void   v850_output_local             (FILE *, tree, const char *, int, int);
+extern v850_data_area v850_get_data_area    (tree);
+#endif
+
+extern void ghs_pragma_section		    (struct cpp_reader *);
+extern void ghs_pragma_interrupt	    (struct cpp_reader *);
+extern void ghs_pragma_starttda		    (struct cpp_reader *);
+extern void ghs_pragma_startsda		    (struct cpp_reader *);
+extern void ghs_pragma_startzda		    (struct cpp_reader *);
+extern void ghs_pragma_endtda		    (struct cpp_reader *);
+extern void ghs_pragma_endsda		    (struct cpp_reader *);
+extern void ghs_pragma_endzda		    (struct cpp_reader *);
+
+#endif /* ! GCC_V850_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/v850/v850.c b/gcc-4.9/gcc/config/v850/v850.c
new file mode 100644
index 000000000..eb1932657
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850.c
@@ -0,0 +1,3281 @@
+/* Subroutines for insn-output.c for NEC V850 series
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+#include "opts.h"
+
+#ifndef streq
+#define streq(a,b) (strcmp (a, b) == 0)
+#endif
+
+static void v850_print_operand_address (FILE *, rtx);
+
+/* Names of the various data areas used on the v850.  */
+tree GHS_default_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+tree GHS_current_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+
+/* Track the current data area set by the data area pragma (which 
+   can be nested).  Tested by check_default_data_area.  */
+data_area_stack_element * data_area_stack = NULL;
+
+/* True if we don't need to check any more if the current
+   function is an interrupt handler.  */
+static int v850_interrupt_cache_p = FALSE;
+
+rtx v850_compare_op0, v850_compare_op1;
+
+/* Whether current function is an interrupt handler.  */
+static int v850_interrupt_p = FALSE;
+
+static GTY(()) section * rosdata_section;
+static GTY(()) section * rozdata_section;
+static GTY(()) section * tdata_section;
+static GTY(()) section * zdata_section;
+static GTY(()) section * zbss_section;
+
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x)
+{
+  if (GET_CODE (x) != CLOBBER)
+    RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Mark all the subexpressions of the PARALLEL rtx PAR as
+   frame-related.  Return PAR.
+
+   dwarf2out.c:dwarf2out_frame_debug_expr ignores sub-expressions of a
+   PARALLEL rtx other than the first if they do not have the
+   FRAME_RELATED flag set on them.  */
+
+static rtx
+v850_all_frame_related (rtx par)
+{
+  int len = XVECLEN (par, 0);
+  int i;
+
+  gcc_assert (GET_CODE (par) == PARALLEL);
+  for (i = 0; i < len; i++)
+    F (XVECEXP (par, 0, i));
+
+  return par;
+}
+
+/* Handle the TARGET_PASS_BY_REFERENCE target hook.
+   Specify whether to pass the argument by reference.  */
+
+static bool
+v850_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (!TARGET_GCC_ABI)
+    return 0;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return size > 8;
+}
+
+/* Return an RTX to represent where an argument with mode MODE
+   and type TYPE will be passed to a function.  If the result
+   is NULL_RTX, the argument will be pushed.  */
+
+static rtx
+v850_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  rtx result = NULL_RTX;
+  int size, align;
+
+  if (!named)
+    return NULL_RTX;
+
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  size = (size + UNITS_PER_WORD -1) & ~(UNITS_PER_WORD -1);
+
+  if (size < 1)
+    {
+      /* Once we have stopped using argument registers, do not start up again.  */
+      cum->nbytes = 4 * UNITS_PER_WORD;
+      return NULL_RTX;
+    }
+
+  if (!TARGET_GCC_ABI)
+    align = UNITS_PER_WORD;
+  else if (size <= UNITS_PER_WORD && type)
+    align = TYPE_ALIGN (type) / BITS_PER_UNIT;
+  else
+    align = size;
+
+  cum->nbytes = (cum->nbytes + align - 1) &~(align - 1);
+
+  if (cum->nbytes > 4 * UNITS_PER_WORD)
+    return NULL_RTX;
+
+  if (type == NULL_TREE
+      && cum->nbytes + size > 4 * UNITS_PER_WORD)
+    return NULL_RTX;
+
+  switch (cum->nbytes / UNITS_PER_WORD)
+    {
+    case 0:
+      result = gen_rtx_REG (mode, 6);
+      break;
+    case 1:
+      result = gen_rtx_REG (mode, 7);
+      break;
+    case 2:
+      result = gen_rtx_REG (mode, 8);
+      break;
+    case 3:
+      result = gen_rtx_REG (mode, 9);
+      break;
+    default:
+      result = NULL_RTX;
+    }
+
+  return result;
+}
+
+/* Return the number of bytes which must be put into registers
+   for values which are part in registers and part in memory.  */
+static int
+v850_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+                        tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int size, align;
+
+  if (!named)
+    return 0;
+
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (size < 1)
+    size = 1;
+  
+  if (!TARGET_GCC_ABI)
+    align = UNITS_PER_WORD;
+  else if (type)
+    align = TYPE_ALIGN (type) / BITS_PER_UNIT;
+  else
+    align = size;
+
+  cum->nbytes = (cum->nbytes + align - 1) & ~ (align - 1);
+
+  if (cum->nbytes > 4 * UNITS_PER_WORD)
+    return 0;
+
+  if (cum->nbytes + size <= 4 * UNITS_PER_WORD)
+    return 0;
+
+  if (type == NULL_TREE
+      && cum->nbytes + size > 4 * UNITS_PER_WORD)
+    return 0;
+
+  return 4 * UNITS_PER_WORD - cum->nbytes;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+v850_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (!TARGET_GCC_ABI)
+    cum->nbytes += (((mode != BLKmode
+		      ? GET_MODE_SIZE (mode)
+		      : int_size_in_bytes (type)) + UNITS_PER_WORD - 1)
+		    & -UNITS_PER_WORD);
+  else
+    cum->nbytes += (((type && int_size_in_bytes (type) > 8
+		      ? GET_MODE_SIZE (Pmode)
+		      : (mode != BLKmode
+			 ? GET_MODE_SIZE (mode)
+			 : int_size_in_bytes (type))) + UNITS_PER_WORD - 1)
+		    & -UNITS_PER_WORD);
+}
+
+/* Return the high and low words of a CONST_DOUBLE */
+
+static void
+const_double_split (rtx x, HOST_WIDE_INT * p_high, HOST_WIDE_INT * p_low)
+{
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      long t[2];
+      REAL_VALUE_TYPE rv;
+
+      switch (GET_MODE (x))
+	{
+	case DFmode:
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	  REAL_VALUE_TO_TARGET_DOUBLE (rv, t);
+	  *p_high = t[1];	/* since v850 is little endian */
+	  *p_low = t[0];	/* high is second word */
+	  return;
+
+	case SFmode:
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, *p_high);
+	  *p_low = 0;
+	  return;
+
+	case VOIDmode:
+	case DImode:
+	  *p_high = CONST_DOUBLE_HIGH (x);
+	  *p_low  = CONST_DOUBLE_LOW (x);
+	  return;
+
+	default:
+	  break;
+	}
+    }
+
+  fatal_insn ("const_double_split got a bad insn:", x);
+}
+
+
+/* Return the cost of the rtx R with code CODE.  */
+
+static int
+const_costs_int (HOST_WIDE_INT value, int zero_cost)
+{
+  if (CONST_OK_FOR_I (value))
+      return zero_cost;
+  else if (CONST_OK_FOR_J (value))
+    return 1;
+  else if (CONST_OK_FOR_K (value))
+    return 2;
+  else
+    return 4;
+}
+
+static int
+const_costs (rtx r, enum rtx_code c)
+{
+  HOST_WIDE_INT high, low;
+
+  switch (c)
+    {
+    case CONST_INT:
+      return const_costs_int (INTVAL (r), 0);
+
+    case CONST_DOUBLE:
+      const_double_split (r, &high, &low);
+      if (GET_MODE (r) == SFmode)
+	return const_costs_int (high, 1);
+      else
+	return const_costs_int (high, 1) + const_costs_int (low, 1);
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST:
+      return 2;
+
+    case HIGH:
+      return 1;
+
+    default:
+      return 4;
+    }
+}
+
+static bool
+v850_rtx_costs (rtx x,
+                int codearg,
+                int outer_code ATTRIBUTE_UNUSED,
+		int opno ATTRIBUTE_UNUSED,
+                int * total, bool speed)
+{
+  enum rtx_code code = (enum rtx_code) codearg;
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      *total = COSTS_N_INSNS (const_costs (x, code));
+      return true;
+
+    case MOD:
+    case DIV:
+    case UMOD:
+    case UDIV:
+      if (TARGET_V850E && !speed)
+        *total = 6;
+      else
+	*total = 60;
+      return true;
+
+    case MULT:
+      if (TARGET_V850E
+	  && (   GET_MODE (x) == SImode
+	      || GET_MODE (x) == HImode
+	      || GET_MODE (x) == QImode))
+        {
+	  if (GET_CODE (XEXP (x, 1)) == REG)
+	    *total = 4;
+	  else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      if (CONST_OK_FOR_O (INTVAL (XEXP (x, 1))))
+	        *total = 6;
+	      else if (CONST_OK_FOR_K (INTVAL (XEXP (x, 1))))
+	        *total = 10;
+	    }
+        }
+      else
+	*total = 20;
+      return true;
+
+    case ZERO_EXTRACT:
+      if (outer_code == COMPARE)
+	*total = 0;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Print operand X using operand code CODE to assembly language output file
+   FILE.  */
+
+static void
+v850_print_operand (FILE * file, rtx x, int code)
+{
+  HOST_WIDE_INT high, low;
+
+  switch (code)
+    {
+    case 'c':
+      /* We use 'c' operands with symbols for .vtinherit.  */
+      if (GET_CODE (x) == SYMBOL_REF)
+        {
+          output_addr_const(file, x);
+          break;
+        }
+      /* Fall through.  */
+    case 'b':
+    case 'B':
+    case 'C':
+      switch ((code == 'B' || code == 'C')
+	      ? reverse_condition (GET_CODE (x)) : GET_CODE (x))
+	{
+	  case NE:
+	    if (code == 'c' || code == 'C')
+	      fprintf (file, "nz");
+	    else
+	      fprintf (file, "ne");
+	    break;
+	  case EQ:
+	    if (code == 'c' || code == 'C')
+	      fprintf (file, "z");
+	    else
+	      fprintf (file, "e");
+	    break;
+	  case GE:
+	    fprintf (file, "ge");
+	    break;
+	  case GT:
+	    fprintf (file, "gt");
+	    break;
+	  case LE:
+	    fprintf (file, "le");
+	    break;
+	  case LT:
+	    fprintf (file, "lt");
+	    break;
+	  case GEU:
+	    fprintf (file, "nl");
+	    break;
+	  case GTU:
+	    fprintf (file, "h");
+	    break;
+	  case LEU:
+	    fprintf (file, "nh");
+	    break;
+	  case LTU:
+	    fprintf (file, "l");
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+      break;
+    case 'F':			/* High word of CONST_DOUBLE.  */
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (file, "%d", (INTVAL (x) >= 0) ? 0 : -1);
+	  break;
+	  
+	case CONST_DOUBLE:
+	  const_double_split (x, &high, &low);
+	  fprintf (file, "%ld", (long) high);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'G':			/* Low word of CONST_DOUBLE.  */
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (file, "%ld", (long) INTVAL (x));
+	  break;
+	  
+	case CONST_DOUBLE:
+	  const_double_split (x, &high, &low);
+	  fprintf (file, "%ld", (long) low);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'L':
+      fprintf (file, "%d\n", (int)(INTVAL (x) & 0xffff));
+      break;
+    case 'M':
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      break;
+    case 'O':
+      gcc_assert (special_symbolref_operand (x, VOIDmode));
+      
+      if (GET_CODE (x) == CONST)
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	gcc_assert (GET_CODE (x) == SYMBOL_REF);
+      
+      if (SYMBOL_REF_ZDA_P (x))
+	fprintf (file, "zdaoff");
+      else if (SYMBOL_REF_SDA_P (x))
+	fprintf (file, "sdaoff");
+      else if (SYMBOL_REF_TDA_P (x))
+	fprintf (file, "tdaoff");
+      else
+	gcc_unreachable ();
+      break;
+    case 'P':
+      gcc_assert (special_symbolref_operand (x, VOIDmode));
+      output_addr_const (file, x);
+      break;
+    case 'Q':
+      gcc_assert (special_symbolref_operand (x, VOIDmode));
+      
+      if (GET_CODE (x) == CONST)
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	gcc_assert (GET_CODE (x) == SYMBOL_REF);
+      
+      if (SYMBOL_REF_ZDA_P (x))
+	fprintf (file, "r0");
+      else if (SYMBOL_REF_SDA_P (x))
+	fprintf (file, "gp");
+      else if (SYMBOL_REF_TDA_P (x))
+	fprintf (file, "ep");
+      else
+	gcc_unreachable ();
+      break;
+    case 'R':		/* 2nd word of a double.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, reg_names[REGNO (x) + 1]);
+	  break;
+	case MEM:
+	  x = XEXP (adjust_address (x, SImode, 4), 0);
+	  v850_print_operand_address (file, x);
+	  if (GET_CODE (x) == CONST_INT)
+	    fprintf (file, "[r0]");
+	  break;
+	  
+	case CONST_INT:
+	  {
+	    unsigned HOST_WIDE_INT v = INTVAL (x);
+
+	    /* Trickery to avoid problems with shifting
+	       32-bits at a time on a 32-bit host.  */
+	    v = v >> 16;
+	    v = v >> 16;	  
+	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, v);
+	    break;
+	  }
+
+	case CONST_DOUBLE:
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_HIGH (x));
+	  break;
+
+	default:
+	  debug_rtx (x);
+	  gcc_unreachable ();
+	}
+      break;
+    case 'S':
+      {
+        /* If it's a reference to a TDA variable, use sst/sld vs. st/ld.  */
+        if (GET_CODE (x) == MEM && ep_memory_operand (x, GET_MODE (x), FALSE))
+          fputs ("s", file);
+
+        break;
+      }
+    case 'T':
+      {
+	/* Like an 'S' operand above, but for unsigned loads only.  */
+        if (GET_CODE (x) == MEM && ep_memory_operand (x, GET_MODE (x), TRUE))
+          fputs ("s", file);
+
+        break;
+      }
+    case 'W':			/* Print the instruction suffix.  */
+      switch (GET_MODE (x))
+	{
+	default:
+	  gcc_unreachable ();
+
+	case QImode: fputs (".b", file); break;
+	case HImode: fputs (".h", file); break;
+	case SImode: fputs (".w", file); break;
+	case SFmode: fputs (".w", file); break;
+	}
+      break;
+    case '.':			/* Register r0.  */
+      fputs (reg_names[0], file);
+      break;
+    case 'z':			/* Reg or zero.  */
+      if (REG_P (x))
+	fputs (reg_names[REGNO (x)], file);
+      else if ((GET_MODE(x) == SImode
+		|| GET_MODE(x) == DFmode
+		|| GET_MODE(x) == SFmode)
+		&& x == CONST0_RTX(GET_MODE(x)))
+      fputs (reg_names[0], file);
+      else
+	{
+	  gcc_assert (x == const0_rtx);
+	  fputs (reg_names[0], file);
+	}
+      break;
+    default:
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	    output_address (gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, 0),
+					  XEXP (x, 0)));
+	  else
+	    output_address (XEXP (x, 0));
+	  break;
+
+	case REG:
+	  fputs (reg_names[REGNO (x)], file);
+	  break;
+	case SUBREG:
+	  fputs (reg_names[subreg_regno (x)], file);
+	  break;
+	case CONST_DOUBLE:
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
+	  break;
+	  
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	case CODE_LABEL:
+	  v850_print_operand_address (file, x);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    }
+}
+
+
+/* Output assembly language output for the address ADDR to FILE.  */
+
+static void
+v850_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "0[");
+      v850_print_operand (file, addr, 0);
+      fprintf (file, "]");
+      break;
+    case LO_SUM:
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	{
+	  /* reg,foo */
+	  fprintf (file, "lo(");
+	  v850_print_operand (file, XEXP (addr, 1), 0);
+	  fprintf (file, ")[");
+	  v850_print_operand (file, XEXP (addr, 0), 0);
+	  fprintf (file, "]");
+	}
+      break;
+    case PLUS:
+      if (GET_CODE (XEXP (addr, 0)) == REG
+	  || GET_CODE (XEXP (addr, 0)) == SUBREG)
+	{
+	  /* reg,foo */
+	  v850_print_operand (file, XEXP (addr, 1), 0);
+	  fprintf (file, "[");
+	  v850_print_operand (file, XEXP (addr, 0), 0);
+	  fprintf (file, "]");
+	}
+      else
+	{
+	  v850_print_operand (file, XEXP (addr, 0), 0);
+	  fprintf (file, "+");
+	  v850_print_operand (file, XEXP (addr, 1), 0);
+	}
+      break;
+    case SYMBOL_REF:
+      {
+        const char *off_name = NULL;
+        const char *reg_name = NULL;
+
+	if (SYMBOL_REF_ZDA_P (addr))
+          {
+            off_name = "zdaoff";
+            reg_name = "r0";
+          }
+        else if (SYMBOL_REF_SDA_P (addr))
+          {
+            off_name = "sdaoff";
+            reg_name = "gp";
+          }
+        else if (SYMBOL_REF_TDA_P (addr))
+          {
+            off_name = "tdaoff";
+            reg_name = "ep";
+          }
+
+	if (off_name)
+          fprintf (file, "%s(", off_name);
+        output_addr_const (file, addr);
+	if (reg_name)
+          fprintf (file, ")[%s]", reg_name);
+      }
+      break;
+    case CONST:
+      if (special_symbolref_operand (addr, VOIDmode))
+        {
+	  rtx x = XEXP (XEXP (addr, 0), 0);
+          const char *off_name;
+          const char *reg_name;
+
+          if (SYMBOL_REF_ZDA_P (x))
+            {
+              off_name = "zdaoff";
+              reg_name = "r0";
+            }
+          else if (SYMBOL_REF_SDA_P (x))
+            {
+              off_name = "sdaoff";
+              reg_name = "gp";
+            }
+          else if (SYMBOL_REF_TDA_P (x))
+            {
+              off_name = "tdaoff";
+              reg_name = "ep";
+            }
+          else
+            gcc_unreachable ();
+
+          fprintf (file, "%s(", off_name);
+          output_addr_const (file, addr);
+          fprintf (file, ")[%s]", reg_name);
+        }
+      else
+        output_addr_const (file, addr);
+      break;
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static bool
+v850_print_operand_punct_valid_p (unsigned char code)
+{
+  return code == '.';
+}
+
+/* When assemble_integer is used to emit the offsets for a switch
+   table it can encounter (TRUNCATE:HI (MINUS:SI (LABEL_REF:SI) (LABEL_REF:SI))).
+   output_addr_const will normally barf at this, but it is OK to omit
+   the truncate and just emit the difference of the two labels.  The
+   .hword directive will automatically handle the truncation for us.
+   
+   Returns true if rtx was handled, false otherwise.  */
+
+static bool
+v850_output_addr_const_extra (FILE * file, rtx x)
+{
+  if (GET_CODE (x) != TRUNCATE)
+    return false;
+
+  x = XEXP (x, 0);
+
+  /* We must also handle the case where the switch table was passed a
+     constant value and so has been collapsed.  In this case the first
+     label will have been deleted.  In such a case it is OK to emit
+     nothing, since the table will not be used.
+     (cf gcc.c-torture/compile/990801-1.c).  */
+  if (GET_CODE (x) == MINUS
+      && GET_CODE (XEXP (x, 0)) == LABEL_REF
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == CODE_LABEL
+      && INSN_DELETED_P (XEXP (XEXP (x, 0), 0)))
+    return true;
+
+  output_addr_const (file, x);
+  return true;
+}
+
+/* Return appropriate code to load up a 1, 2, or 4 integer/floating
+   point value.  */
+
+const char *
+output_move_single (rtx * operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (REG_P (dst))
+    {
+      if (REG_P (src))
+	return "mov %1,%0";
+
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  HOST_WIDE_INT value = INTVAL (src);
+
+	  if (CONST_OK_FOR_J (value))		/* Signed 5-bit immediate.  */
+	    return "mov %1,%0";
+
+	  else if (CONST_OK_FOR_K (value))	/* Signed 16-bit immediate.  */
+	    return "movea %1,%.,%0";
+
+	  else if (CONST_OK_FOR_L (value))	/* Upper 16 bits were set.  */
+	    return "movhi hi0(%1),%.,%0";
+
+	  /* A random constant.  */
+	  else if (TARGET_V850E_UP)
+	      return "mov %1,%0";
+	  else
+	    return "movhi hi(%1),%.,%0\n\tmovea lo(%1),%0,%0";
+	}
+
+      else if (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode)
+	{
+	  HOST_WIDE_INT high, low;
+
+	  const_double_split (src, &high, &low);
+
+	  if (CONST_OK_FOR_J (high))		/* Signed 5-bit immediate.  */
+	    return "mov %F1,%0";
+
+	  else if (CONST_OK_FOR_K (high))	/* Signed 16-bit immediate.  */
+	    return "movea %F1,%.,%0";
+
+	  else if (CONST_OK_FOR_L (high))	/* Upper 16 bits were set.  */
+	    return "movhi hi0(%F1),%.,%0";
+
+	  /* A random constant.  */
+	else if (TARGET_V850E_UP)
+	      return "mov %F1,%0";
+
+	  else
+	    return "movhi hi(%F1),%.,%0\n\tmovea lo(%F1),%0,%0";
+	}
+
+      else if (GET_CODE (src) == MEM)
+	return "%S1ld%W1 %1,%0";
+
+      else if (special_symbolref_operand (src, VOIDmode))
+	return "movea %O1(%P1),%Q1,%0";
+
+      else if (GET_CODE (src) == LABEL_REF
+	       || GET_CODE (src) == SYMBOL_REF
+	       || GET_CODE (src) == CONST)
+	{
+	  if (TARGET_V850E_UP) 
+	    return "mov hilo(%1),%0";
+	  else
+	    return "movhi hi(%1),%.,%0\n\tmovea lo(%1),%0,%0";
+	}
+
+      else if (GET_CODE (src) == HIGH)
+	return "movhi hi(%1),%.,%0";
+
+      else if (GET_CODE (src) == LO_SUM)
+	{
+	  operands[2] = XEXP (src, 0);
+	  operands[3] = XEXP (src, 1);
+	  return "movea lo(%3),%2,%0";
+	}
+    }
+
+  else if (GET_CODE (dst) == MEM)
+    {
+      if (REG_P (src))
+	return "%S0st%W0 %1,%0";
+
+      else if (GET_CODE (src) == CONST_INT && INTVAL (src) == 0)
+	return "%S0st%W0 %.,%0";
+
+      else if (GET_CODE (src) == CONST_DOUBLE
+	       && CONST0_RTX (GET_MODE (dst)) == src)
+	return "%S0st%W0 %.,%0";
+    }
+
+  fatal_insn ("output_move_single:", gen_rtx_SET (VOIDmode, dst, src));
+  return "";
+}
+
+enum machine_mode
+v850_select_cc_mode (enum rtx_code cond, rtx op0, rtx op1 ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
+    {
+      switch (cond)
+	{
+	case LE:
+	  return CC_FPU_LEmode;
+	case GE:
+	  return CC_FPU_GEmode;
+	case LT:
+	  return CC_FPU_LTmode;
+	case GT:
+	  return CC_FPU_GTmode;
+	case EQ:
+	  return CC_FPU_EQmode;
+	case NE:
+	  return CC_FPU_NEmode;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  return CCmode;
+}
+
+enum machine_mode
+v850_gen_float_compare (enum rtx_code cond, enum machine_mode mode ATTRIBUTE_UNUSED, rtx op0, rtx op1)
+{
+  if (GET_MODE (op0) == DFmode)
+    {
+      switch (cond)
+	{
+	case LE:
+	  emit_insn (gen_cmpdf_le_insn (op0, op1));
+	  break;
+	case GE:
+	  emit_insn (gen_cmpdf_ge_insn (op0, op1));
+	  break;
+	case LT:
+	  emit_insn (gen_cmpdf_lt_insn (op0, op1));
+	  break;
+	case GT:
+	  emit_insn (gen_cmpdf_gt_insn (op0, op1));
+	  break;
+	case NE:
+	  /* Note: There is no NE comparison operator. So we
+	     perform an EQ comparison and invert the branch.
+	     See v850_float_nz_comparison for how this is done.  */
+	case EQ:
+	  emit_insn (gen_cmpdf_eq_insn (op0, op1));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (GET_MODE (v850_compare_op0) == SFmode)
+    {
+      switch (cond)
+	{
+	case LE:
+	  emit_insn (gen_cmpsf_le_insn(op0, op1));
+	  break;
+	case GE:
+	  emit_insn (gen_cmpsf_ge_insn(op0, op1));
+	  break;
+	case LT:
+	  emit_insn (gen_cmpsf_lt_insn(op0, op1));
+	  break;
+	case GT:
+	  emit_insn (gen_cmpsf_gt_insn(op0, op1));
+	  break;
+	case NE:
+	  /* Note: There is no NE comparison operator. So we
+	     perform an EQ comparison and invert the branch.
+	     See v850_float_nz_comparison for how this is done.  */
+	case EQ:
+	  emit_insn (gen_cmpsf_eq_insn(op0, op1));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  return v850_select_cc_mode (cond, op0, op1);
+}
+
+rtx
+v850_gen_compare (enum rtx_code cond, enum machine_mode mode, rtx op0, rtx op1)
+{
+  if (GET_MODE_CLASS(GET_MODE (op0)) != MODE_FLOAT)
+    {
+      emit_insn (gen_cmpsi_insn (op0, op1));
+      return gen_rtx_fmt_ee (cond, mode, gen_rtx_REG(CCmode, CC_REGNUM), const0_rtx);
+    }
+  else
+    {
+      rtx cc_reg;
+      mode = v850_gen_float_compare (cond, mode, op0, op1);
+      cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+      emit_insn (gen_rtx_SET(mode, cc_reg, gen_rtx_REG (mode, FCC_REGNUM)));  
+
+      return gen_rtx_fmt_ee (cond, mode, cc_reg, const0_rtx);
+    }
+}
+
+/* Return maximum offset supported for a short EP memory reference of mode
+   MODE and signedness UNSIGNEDP.  */
+
+static int
+ep_memory_offset (enum machine_mode mode, int unsignedp ATTRIBUTE_UNUSED)
+{
+  int max_offset = 0;
+
+  switch (mode)
+    {
+    case QImode:
+      if (TARGET_SMALL_SLD)
+	max_offset = (1 << 4);
+      else if ((TARGET_V850E_UP)
+		&& unsignedp)
+	max_offset = (1 << 4);
+      else
+	max_offset = (1 << 7);
+      break;
+
+    case HImode:
+      if (TARGET_SMALL_SLD)
+	max_offset = (1 << 5);
+      else if ((TARGET_V850E_UP)
+		&& unsignedp)
+	max_offset = (1 << 5);
+      else
+	max_offset = (1 << 8);
+      break;
+
+    case SImode:
+    case SFmode:
+      max_offset = (1 << 8);
+      break;
+      
+    default:
+      break;
+    }
+
+  return max_offset;
+}
+
+/* Return true if OP is a valid short EP memory reference */
+
+int
+ep_memory_operand (rtx op, enum machine_mode mode, int unsigned_load)
+{
+  rtx addr, op0, op1;
+  int max_offset;
+  int mask;
+
+  /* If we are not using the EP register on a per-function basis
+     then do not allow this optimization at all.  This is to
+     prevent the use of the SLD/SST instructions which cannot be
+     guaranteed to work properly due to a hardware bug.  */
+  if (!TARGET_EP)
+    return FALSE;
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  max_offset = ep_memory_offset (mode, unsigned_load);
+
+  mask = GET_MODE_SIZE (mode) - 1;
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      break;
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TDA_P (addr);
+
+    case REG:
+      return REGNO (addr) == EP_REGNUM;
+
+    case PLUS:
+      op0 = XEXP (addr, 0);
+      op1 = XEXP (addr, 1);
+      if (GET_CODE (op1) == CONST_INT
+	  && INTVAL (op1) < max_offset
+	  && INTVAL (op1) >= 0
+	  && (INTVAL (op1) & mask) == 0)
+	{
+	  if (GET_CODE (op0) == REG && REGNO (op0) == EP_REGNUM)
+	    return TRUE;
+
+	  if (GET_CODE (op0) == SYMBOL_REF && SYMBOL_REF_TDA_P (op0))
+	    return TRUE;
+	}
+      break;
+    }
+
+  return FALSE;
+}
+
+/* Substitute memory references involving a pointer, to use the ep pointer,
+   taking care to save and preserve the ep.  */
+
+static void
+substitute_ep_register (rtx first_insn,
+                        rtx last_insn,
+                        int uses,
+                        int regno,
+                        rtx * p_r1,
+                        rtx * p_ep)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  rtx insn;
+
+  if (!*p_r1)
+    {
+      df_set_regs_ever_live (1, true);
+      *p_r1 = gen_rtx_REG (Pmode, 1);
+      *p_ep = gen_rtx_REG (Pmode, 30);
+    }
+
+  if (TARGET_DEBUG)
+    fprintf (stderr, "\
+Saved %d bytes (%d uses of register %s) in function %s, starting as insn %d, ending at %d\n",
+	     2 * (uses - 3), uses, reg_names[regno],
+	     IDENTIFIER_POINTER (DECL_NAME (current_function_decl)),
+	     INSN_UID (first_insn), INSN_UID (last_insn));
+
+  if (NOTE_P (first_insn))
+    first_insn = next_nonnote_insn (first_insn);
+
+  last_insn = next_nonnote_insn (last_insn);
+  for (insn = first_insn; insn && insn != last_insn; insn = NEXT_INSN (insn))
+    {
+      if (NONJUMP_INSN_P (insn))
+	{
+	  rtx pattern = single_set (insn);
+
+	  /* Replace the memory references.  */
+	  if (pattern)
+	    {
+	      rtx *p_mem;
+	      /* Memory operands are signed by default.  */
+	      int unsignedp = FALSE;
+
+	      if (GET_CODE (SET_DEST (pattern)) == MEM
+		  && GET_CODE (SET_SRC (pattern)) == MEM)
+		p_mem = (rtx *)0;
+
+	      else if (GET_CODE (SET_DEST (pattern)) == MEM)
+		p_mem = &SET_DEST (pattern);
+
+	      else if (GET_CODE (SET_SRC (pattern)) == MEM)
+		p_mem = &SET_SRC (pattern);
+
+	      else if (GET_CODE (SET_SRC (pattern)) == SIGN_EXTEND
+		       && GET_CODE (XEXP (SET_SRC (pattern), 0)) == MEM)
+		p_mem = &XEXP (SET_SRC (pattern), 0);
+
+	      else if (GET_CODE (SET_SRC (pattern)) == ZERO_EXTEND
+		       && GET_CODE (XEXP (SET_SRC (pattern), 0)) == MEM)
+		{
+		  p_mem = &XEXP (SET_SRC (pattern), 0);
+		  unsignedp = TRUE;
+		}
+	      else
+		p_mem = (rtx *)0;
+
+	      if (p_mem)
+		{
+		  rtx addr = XEXP (*p_mem, 0);
+
+		  if (GET_CODE (addr) == REG && REGNO (addr) == (unsigned) regno)
+		    *p_mem = change_address (*p_mem, VOIDmode, *p_ep);
+
+		  else if (GET_CODE (addr) == PLUS
+			   && GET_CODE (XEXP (addr, 0)) == REG
+			   && REGNO (XEXP (addr, 0)) == (unsigned) regno
+			   && GET_CODE (XEXP (addr, 1)) == CONST_INT
+			   && ((INTVAL (XEXP (addr, 1)))
+			       < ep_memory_offset (GET_MODE (*p_mem),
+						   unsignedp))
+			   && ((INTVAL (XEXP (addr, 1))) >= 0))
+		    *p_mem = change_address (*p_mem, VOIDmode,
+					     gen_rtx_PLUS (Pmode,
+							   *p_ep,
+							   XEXP (addr, 1)));
+		}
+	    }
+	}
+    }
+
+  /* Optimize back to back cases of ep <- r1 & r1 <- ep.  */
+  insn = prev_nonnote_insn (first_insn);
+  if (insn && NONJUMP_INSN_P (insn)
+      && GET_CODE (PATTERN (insn)) == SET
+      && SET_DEST (PATTERN (insn)) == *p_ep
+      && SET_SRC (PATTERN (insn)) == *p_r1)
+    delete_insn (insn);
+  else
+    emit_insn_before (gen_rtx_SET (Pmode, *p_r1, *p_ep), first_insn);
+
+  emit_insn_before (gen_rtx_SET (Pmode, *p_ep, reg), first_insn);
+  emit_insn_before (gen_rtx_SET (Pmode, *p_ep, *p_r1), last_insn);
+}
+
+
+/* TARGET_MACHINE_DEPENDENT_REORG.  On the 850, we use it to implement
+   the -mep mode to copy heavily used pointers to ep to use the implicit
+   addressing.  */
+
+static void
+v850_reorg (void)
+{
+  struct
+  {
+    int uses;
+    rtx first_insn;
+    rtx last_insn;
+  }
+  regs[FIRST_PSEUDO_REGISTER];
+
+  int i;
+  int use_ep = FALSE;
+  rtx r1 = NULL_RTX;
+  rtx ep = NULL_RTX;
+  rtx insn;
+  rtx pattern;
+
+  /* If not ep mode, just return now.  */
+  if (!TARGET_EP)
+    return;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      regs[i].uses = 0;
+      regs[i].first_insn = NULL_RTX;
+      regs[i].last_insn = NULL_RTX;
+    }
+
+  for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+	{
+	  /* End of basic block */
+	default:
+	  if (!use_ep)
+	    {
+	      int max_uses = -1;
+	      int max_regno = -1;
+
+	      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+		{
+		  if (max_uses < regs[i].uses)
+		    {
+		      max_uses = regs[i].uses;
+		      max_regno = i;
+		    }
+		}
+
+	      if (max_uses > 3)
+		substitute_ep_register (regs[max_regno].first_insn,
+					regs[max_regno].last_insn,
+					max_uses, max_regno, &r1, &ep);
+	    }
+
+	  use_ep = FALSE;
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	    {
+	      regs[i].uses = 0;
+	      regs[i].first_insn = NULL_RTX;
+	      regs[i].last_insn = NULL_RTX;
+	    }
+	  break;
+
+	case NOTE:
+	  break;
+
+	case INSN:
+	  pattern = single_set (insn);
+
+	  /* See if there are any memory references we can shorten.  */
+	  if (pattern)
+	    {
+	      rtx src = SET_SRC (pattern);
+	      rtx dest = SET_DEST (pattern);
+	      rtx mem;
+	      /* Memory operands are signed by default.  */
+	      int unsignedp = FALSE;
+
+	      /* We might have (SUBREG (MEM)) here, so just get rid of the
+		 subregs to make this code simpler.  */
+	      if (GET_CODE (dest) == SUBREG
+		  && (GET_CODE (SUBREG_REG (dest)) == MEM
+		      || GET_CODE (SUBREG_REG (dest)) == REG))
+		alter_subreg (&dest, false);
+	      if (GET_CODE (src) == SUBREG
+		  && (GET_CODE (SUBREG_REG (src)) == MEM
+		      || GET_CODE (SUBREG_REG (src)) == REG))
+		alter_subreg (&src, false);
+
+	      if (GET_CODE (dest) == MEM && GET_CODE (src) == MEM)
+		mem = NULL_RTX;
+
+	      else if (GET_CODE (dest) == MEM)
+		mem = dest;
+
+	      else if (GET_CODE (src) == MEM)
+		mem = src;
+
+	      else if (GET_CODE (src) == SIGN_EXTEND
+		       && GET_CODE (XEXP (src, 0)) == MEM)
+		mem = XEXP (src, 0);
+
+	      else if (GET_CODE (src) == ZERO_EXTEND
+		       && GET_CODE (XEXP (src, 0)) == MEM)
+		{
+		  mem = XEXP (src, 0);
+		  unsignedp = TRUE;
+		}
+	      else
+		mem = NULL_RTX;
+
+	      if (mem && ep_memory_operand (mem, GET_MODE (mem), unsignedp))
+		use_ep = TRUE;
+
+	      else if (!use_ep && mem
+		       && GET_MODE_SIZE (GET_MODE (mem)) <= UNITS_PER_WORD)
+		{
+		  rtx addr = XEXP (mem, 0);
+		  int regno = -1;
+		  int short_p;
+
+		  if (GET_CODE (addr) == REG)
+		    {
+		      short_p = TRUE;
+		      regno = REGNO (addr);
+		    }
+
+		  else if (GET_CODE (addr) == PLUS
+			   && GET_CODE (XEXP (addr, 0)) == REG
+			   && GET_CODE (XEXP (addr, 1)) == CONST_INT
+			   && ((INTVAL (XEXP (addr, 1)))
+			       < ep_memory_offset (GET_MODE (mem), unsignedp))
+			   && ((INTVAL (XEXP (addr, 1))) >= 0))
+		    {
+		      short_p = TRUE;
+		      regno = REGNO (XEXP (addr, 0));
+		    }
+
+		  else
+		    short_p = FALSE;
+
+		  if (short_p)
+		    {
+		      regs[regno].uses++;
+		      regs[regno].last_insn = insn;
+		      if (!regs[regno].first_insn)
+			regs[regno].first_insn = insn;
+		    }
+		}
+
+	      /* Loading up a register in the basic block zaps any savings
+		 for the register */
+	      if (GET_CODE (dest) == REG)
+		{
+		  enum machine_mode mode = GET_MODE (dest);
+		  int regno;
+		  int endregno;
+
+		  regno = REGNO (dest);
+		  endregno = regno + HARD_REGNO_NREGS (regno, mode);
+
+		  if (!use_ep)
+		    {
+		      /* See if we can use the pointer before this
+			 modification.  */
+		      int max_uses = -1;
+		      int max_regno = -1;
+
+		      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+			{
+			  if (max_uses < regs[i].uses)
+			    {
+			      max_uses = regs[i].uses;
+			      max_regno = i;
+			    }
+			}
+
+		      if (max_uses > 3
+			  && max_regno >= regno
+			  && max_regno < endregno)
+			{
+			  substitute_ep_register (regs[max_regno].first_insn,
+						  regs[max_regno].last_insn,
+						  max_uses, max_regno, &r1,
+						  &ep);
+
+			  /* Since we made a substitution, zap all remembered
+			     registers.  */
+			  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+			    {
+			      regs[i].uses = 0;
+			      regs[i].first_insn = NULL_RTX;
+			      regs[i].last_insn = NULL_RTX;
+			    }
+			}
+		    }
+
+		  for (i = regno; i < endregno; i++)
+		    {
+		      regs[i].uses = 0;
+		      regs[i].first_insn = NULL_RTX;
+		      regs[i].last_insn = NULL_RTX;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* # of registers saved by the interrupt handler.  */
+#define INTERRUPT_FIXED_NUM 5
+
+/* # of bytes for registers saved by the interrupt handler.  */
+#define INTERRUPT_FIXED_SAVE_SIZE (4 * INTERRUPT_FIXED_NUM)
+
+/* # of words saved for other registers.  */
+#define INTERRUPT_ALL_SAVE_NUM \
+  (30 - INTERRUPT_FIXED_NUM)
+
+#define INTERRUPT_ALL_SAVE_SIZE (4 * INTERRUPT_ALL_SAVE_NUM)
+
+int
+compute_register_save_size (long * p_reg_saved)
+{
+  int size = 0;
+  int i;
+  int interrupt_handler = v850_interrupt_function_p (current_function_decl);
+  int call_p = df_regs_ever_live_p (LINK_POINTER_REGNUM);
+  long reg_saved = 0;
+
+  /* Count space for the register saves.  */
+  if (interrupt_handler)
+    {
+      for (i = 0; i <= 31; i++)
+	switch (i)
+	  {
+	  default:
+	    if (df_regs_ever_live_p (i) || call_p)
+	      {
+		size += 4;
+		reg_saved |= 1L << i;
+	      }
+	    break;
+
+	    /* We don't save/restore r0 or the stack pointer */
+	  case 0:
+	  case STACK_POINTER_REGNUM:
+	    break;
+
+	    /* For registers with fixed use, we save them, set them to the
+	       appropriate value, and then restore them.
+	       These registers are handled specially, so don't list them
+	       on the list of registers to save in the prologue.  */
+	  case 1:		/* temp used to hold ep */
+	  case 4:		/* gp */
+	  case 10:		/* temp used to call interrupt save/restore */
+	  case 11:		/* temp used to call interrupt save/restore (long call) */
+	  case EP_REGNUM:	/* ep */
+	    size += 4;
+	    break;
+	  }
+    }
+  else
+    {
+      /* Find the first register that needs to be saved.  */
+      for (i = 0; i <= 31; i++)
+	if (df_regs_ever_live_p (i) && ((! call_used_regs[i])
+				  || i == LINK_POINTER_REGNUM))
+	  break;
+
+      /* If it is possible that an out-of-line helper function might be
+	 used to generate the prologue for the current function, then we
+	 need to cover the possibility that such a helper function will
+	 be used, despite the fact that there might be gaps in the list of
+	 registers that need to be saved.  To detect this we note that the
+	 helper functions always push at least register r29 (provided
+	 that the function is not an interrupt handler).  */
+	 
+      if (TARGET_PROLOG_FUNCTION
+          && (i == 2 || ((i >= 20) && (i < 30))))
+	{
+	  if (i == 2)
+	    {
+	      size += 4;
+	      reg_saved |= 1L << i;
+
+	      i = 20;
+	    }
+
+	  /* Helper functions save all registers between the starting
+	     register and the last register, regardless of whether they
+	     are actually used by the function or not.  */
+	  for (; i <= 29; i++)
+	    {
+	      size += 4;
+	      reg_saved |= 1L << i;
+	    }
+
+	  if (df_regs_ever_live_p (LINK_POINTER_REGNUM))
+	    {
+	      size += 4;
+	      reg_saved |= 1L << LINK_POINTER_REGNUM;
+	    }
+	}
+      else
+	{
+	  for (; i <= 31; i++)
+	    if (df_regs_ever_live_p (i) && ((! call_used_regs[i])
+				      || i == LINK_POINTER_REGNUM))
+	      {
+		size += 4;
+		reg_saved |= 1L << i;
+	      }
+	}
+    }
+  
+  if (p_reg_saved)
+    *p_reg_saved = reg_saved;
+
+  return size;
+}
+
+/* Typical stack layout should looks like this after the function's prologue:
+
+                            |    |
+                              --                       ^
+                            |    | \                   |
+                            |    |   arguments saved   | Increasing
+                            |    |   on the stack      |  addresses
+    PARENT   arg pointer -> |    | /
+  -------------------------- ---- -------------------
+                            |    | - space for argument split between regs & stack
+			      --
+    CHILD                   |    | \    <-- (return address here)
+                            |    |   other call
+                            |    |   saved registers
+                            |    | /
+                              --
+        frame pointer ->    |    | \             ___
+                            |    |   local        |
+                            |    |   variables    |f
+                            |    | /              |r
+                              --                  |a
+                            |    | \              |m
+                            |    |   outgoing     |e
+                            |    |   arguments    |    | Decreasing
+    (hard) frame pointer    |    |  /             |    |  addresses
+       and stack pointer -> |    | /             _|_   |
+  -------------------------- ---- ------------------   V */
+
+int
+compute_frame_size (int size, long * p_reg_saved)
+{
+  return (size
+	  + compute_register_save_size (p_reg_saved)
+	  + crtl->outgoing_args_size);
+}
+
+static int
+use_prolog_function (int num_save, int frame_size)
+{
+  int alloc_stack = (4 * num_save);
+  int unalloc_stack = frame_size - alloc_stack;
+  int save_func_len, restore_func_len;
+  int save_normal_len, restore_normal_len;
+
+  if (! TARGET_DISABLE_CALLT)
+      save_func_len = restore_func_len = 2;
+  else
+      save_func_len = restore_func_len = TARGET_LONG_CALLS ? (4+4+4+2+2) : 4;
+
+  if (unalloc_stack)
+    {
+      save_func_len += CONST_OK_FOR_J (-unalloc_stack) ? 2 : 4;
+      restore_func_len += CONST_OK_FOR_J (-unalloc_stack) ? 2 : 4;
+    }
+
+  /* See if we would have used ep to save the stack.  */
+  if (TARGET_EP && num_save > 3 && (unsigned)frame_size < 255)
+    save_normal_len = restore_normal_len = (3 * 2) + (2 * num_save);
+  else
+    save_normal_len = restore_normal_len = 4 * num_save;
+
+  save_normal_len += CONST_OK_FOR_J (-frame_size) ? 2 : 4;
+  restore_normal_len += (CONST_OK_FOR_J (frame_size) ? 2 : 4) + 2;
+
+  /* Don't bother checking if we don't actually save any space.
+     This happens for instance if one register is saved and additional
+     stack space is allocated.  */
+  return ((save_func_len + restore_func_len) < (save_normal_len + restore_normal_len));
+}
+
+static void
+increment_stack (signed int amount, bool in_prologue)
+{
+  rtx inc;
+
+  if (amount == 0)
+    return;
+
+  inc = GEN_INT (amount);
+
+  if (! CONST_OK_FOR_K (amount))
+    {
+      rtx reg = gen_rtx_REG (Pmode, 12);
+
+      inc = emit_move_insn (reg, inc);
+      if (in_prologue)
+	F (inc);
+      inc = reg;
+    }
+
+  inc = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, inc));
+  if (in_prologue)
+    F (inc);
+}
+
+void
+expand_prologue (void)
+{
+  unsigned int i;
+  unsigned int size = get_frame_size ();
+  unsigned int actual_fsize;
+  unsigned int init_stack_alloc = 0;
+  rtx save_regs[32];
+  rtx save_all;
+  unsigned int num_save;
+  int code;
+  int interrupt_handler = v850_interrupt_function_p (current_function_decl);
+  long reg_saved = 0;
+
+  actual_fsize = compute_frame_size (size, &reg_saved);
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = actual_fsize;
+
+  /* Save/setup global registers for interrupt functions right now.  */
+  if (interrupt_handler)
+    {
+      if (! TARGET_DISABLE_CALLT && (TARGET_V850E_UP))
+	emit_insn (gen_callt_save_interrupt ());
+      else
+	emit_insn (gen_save_interrupt ());
+
+      actual_fsize -= INTERRUPT_FIXED_SAVE_SIZE;
+      
+      if (((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	actual_fsize -= INTERRUPT_ALL_SAVE_SIZE;
+
+      /* Interrupt functions are not passed arguments, so no need to
+	 allocate space for split structure arguments.  */
+      gcc_assert (crtl->args.pretend_args_size == 0);
+    }
+
+  /* Identify all of the saved registers.  */
+  num_save = 0;
+  for (i = 1; i < 32; i++)
+    {
+      if (((1L << i) & reg_saved) != 0)
+	save_regs[num_save++] = gen_rtx_REG (Pmode, i);
+    }
+
+  if (crtl->args.pretend_args_size)
+    {
+      if (num_save == 0)
+	{
+	  increment_stack (- (actual_fsize + crtl->args.pretend_args_size), true);
+	  actual_fsize = 0;
+	}
+      else
+	increment_stack (- crtl->args.pretend_args_size, true);
+    }
+
+  /* See if we have an insn that allocates stack space and saves the particular
+     registers we want to.  Note that the helpers won't
+     allocate additional space for registers GCC saves to complete a
+     "split" structure argument.  */
+  save_all = NULL_RTX;
+  if (TARGET_PROLOG_FUNCTION
+      && !crtl->args.pretend_args_size
+      && num_save > 0)
+    {
+      if (use_prolog_function (num_save, actual_fsize))
+	{
+	  int alloc_stack = 4 * num_save;
+	  int offset = 0;
+
+	  save_all = gen_rtx_PARALLEL
+	    (VOIDmode,
+	     rtvec_alloc (num_save + 1
+			  + (TARGET_DISABLE_CALLT ? (TARGET_LONG_CALLS ? 2 : 1) : 0)));
+
+	  XVECEXP (save_all, 0, 0)
+	    = gen_rtx_SET (VOIDmode,
+			   stack_pointer_rtx,
+			   gen_rtx_PLUS (Pmode,
+					 stack_pointer_rtx,
+					 GEN_INT(-alloc_stack)));
+	  for (i = 0; i < num_save; i++)
+	    {
+	      offset -= 4;
+	      XVECEXP (save_all, 0, i+1)
+		= gen_rtx_SET (VOIDmode,
+			       gen_rtx_MEM (Pmode,
+					    gen_rtx_PLUS (Pmode,
+							  stack_pointer_rtx,
+							  GEN_INT(offset))),
+			       save_regs[i]);
+	    }
+
+	  if (TARGET_DISABLE_CALLT)
+	    {
+	      XVECEXP (save_all, 0, num_save + 1)
+		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 10));
+
+	      if (TARGET_LONG_CALLS)
+		XVECEXP (save_all, 0, num_save + 2)
+		  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
+	    }
+
+	  v850_all_frame_related (save_all);
+
+	  code = recog (save_all, NULL_RTX, NULL);
+	  if (code >= 0)
+	    {
+	      rtx insn = emit_insn (save_all);
+	      INSN_CODE (insn) = code;
+	      actual_fsize -= alloc_stack;
+	      
+	    }
+	  else
+	    save_all = NULL_RTX;
+	}
+    }
+
+  /* If no prolog save function is available, store the registers the old
+     fashioned way (one by one).  */
+  if (!save_all)
+    {
+      /* Special case interrupt functions that save all registers for a call.  */
+      if (interrupt_handler && ((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	{
+	  if (! TARGET_DISABLE_CALLT && (TARGET_V850E_UP))
+	    emit_insn (gen_callt_save_all_interrupt ());
+	  else
+	    emit_insn (gen_save_all_interrupt ());
+	}
+      else
+	{
+	  int offset;
+	  /* If the stack is too big, allocate it in chunks so we can do the
+	     register saves.  We use the register save size so we use the ep
+	     register.  */
+	  if (actual_fsize && !CONST_OK_FOR_K (-actual_fsize))
+	    init_stack_alloc = compute_register_save_size (NULL);
+	  else
+	    init_stack_alloc = actual_fsize;
+	      
+	  /* Save registers at the beginning of the stack frame.  */
+	  offset = init_stack_alloc - 4;
+	  
+	  if (init_stack_alloc)
+	    increment_stack (- (signed) init_stack_alloc, true);
+	  
+	  /* Save the return pointer first.  */
+	  if (num_save > 0 && REGNO (save_regs[num_save-1]) == LINK_POINTER_REGNUM)
+	    {
+	      F (emit_move_insn (gen_rtx_MEM (SImode,
+					      plus_constant (Pmode,
+							     stack_pointer_rtx,
+							     offset)),
+				 save_regs[--num_save]));
+	      offset -= 4;
+	    }
+	  
+	  for (i = 0; i < num_save; i++)
+	    {
+	      F (emit_move_insn (gen_rtx_MEM (SImode,
+					      plus_constant (Pmode,
+							     stack_pointer_rtx,
+							     offset)),
+				 save_regs[i]));
+	      offset -= 4;
+	    }
+	}
+    }
+
+  /* Allocate the rest of the stack that was not allocated above (either it is
+     > 32K or we just called a function to save the registers and needed more
+     stack.  */
+  if (actual_fsize > init_stack_alloc)
+    increment_stack (init_stack_alloc - actual_fsize, true);
+
+  /* If we need a frame pointer, set it up now.  */
+  if (frame_pointer_needed)
+    F (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+}
+
+
+void
+expand_epilogue (void)
+{
+  unsigned int i;
+  unsigned int size = get_frame_size ();
+  long reg_saved = 0;
+  int actual_fsize = compute_frame_size (size, &reg_saved);
+  rtx restore_regs[32];
+  rtx restore_all;
+  unsigned int num_restore;
+  int code;
+  int interrupt_handler = v850_interrupt_function_p (current_function_decl);
+
+  /* Eliminate the initial stack stored by interrupt functions.  */
+  if (interrupt_handler)
+    {
+      actual_fsize -= INTERRUPT_FIXED_SAVE_SIZE;
+      if (((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	actual_fsize -= INTERRUPT_ALL_SAVE_SIZE;
+    }
+
+  /* Cut off any dynamic stack created.  */
+  if (frame_pointer_needed)
+    emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+
+  /* Identify all of the saved registers.  */
+  num_restore = 0;
+  for (i = 1; i < 32; i++)
+    {
+      if (((1L << i) & reg_saved) != 0)
+	restore_regs[num_restore++] = gen_rtx_REG (Pmode, i);
+    }
+
+  /* See if we have an insn that restores the particular registers we
+     want to.  */
+  restore_all = NULL_RTX;
+
+  if (TARGET_PROLOG_FUNCTION
+      && num_restore > 0
+      && !crtl->args.pretend_args_size
+      && !interrupt_handler)
+    {
+      int alloc_stack = (4 * num_restore);
+
+      /* Don't bother checking if we don't actually save any space.  */
+      if (use_prolog_function (num_restore, actual_fsize))
+	{
+	  int offset;
+	  restore_all = gen_rtx_PARALLEL (VOIDmode,
+					  rtvec_alloc (num_restore + 2));
+	  XVECEXP (restore_all, 0, 0) = ret_rtx;
+	  XVECEXP (restore_all, 0, 1)
+	    = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			    gen_rtx_PLUS (Pmode,
+					  stack_pointer_rtx,
+					  GEN_INT (alloc_stack)));
+
+	  offset = alloc_stack - 4;
+	  for (i = 0; i < num_restore; i++)
+	    {
+	      XVECEXP (restore_all, 0, i+2)
+		= gen_rtx_SET (VOIDmode,
+			       restore_regs[i],
+			       gen_rtx_MEM (Pmode,
+                                            gen_rtx_PLUS (Pmode,
+                                                          stack_pointer_rtx,
+                                                          GEN_INT(offset))));
+	      offset -= 4;
+	    }
+
+	  code = recog (restore_all, NULL_RTX, NULL);
+	  
+	  if (code >= 0)
+	    {
+	      rtx insn;
+
+	      actual_fsize -= alloc_stack;
+	      increment_stack (actual_fsize, false);
+
+	      insn = emit_jump_insn (restore_all);
+	      INSN_CODE (insn) = code;
+	    }
+	  else
+	    restore_all = NULL_RTX;
+	}
+    }
+
+  /* If no epilogue save function is available, restore the registers the
+     old fashioned way (one by one).  */
+  if (!restore_all)
+    {
+      unsigned int init_stack_free;
+
+      /* If the stack is large, we need to cut it down in 2 pieces.  */
+      if (interrupt_handler)
+       init_stack_free = 0;
+      else if (actual_fsize && !CONST_OK_FOR_K (-actual_fsize))
+	init_stack_free = 4 * num_restore;
+      else
+	init_stack_free = (signed) actual_fsize;
+
+      /* Deallocate the rest of the stack if it is > 32K.  */
+      if ((unsigned int) actual_fsize > init_stack_free)
+	increment_stack (actual_fsize - init_stack_free, false);
+
+      /* Special case interrupt functions that save all registers
+	 for a call.  */
+      if (interrupt_handler && ((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	{
+	  if (! TARGET_DISABLE_CALLT)
+	    emit_insn (gen_callt_restore_all_interrupt ());
+	  else
+	    emit_insn (gen_restore_all_interrupt ());
+	}
+      else
+	{
+	  /* Restore registers from the beginning of the stack frame.  */
+	  int offset = init_stack_free - 4;
+
+	  /* Restore the return pointer first.  */
+	  if (num_restore > 0
+	      && REGNO (restore_regs [num_restore - 1]) == LINK_POINTER_REGNUM)
+	    {
+	      emit_move_insn (restore_regs[--num_restore],
+			      gen_rtx_MEM (SImode,
+					   plus_constant (Pmode,
+							  stack_pointer_rtx,
+							  offset)));
+	      offset -= 4;
+	    }
+
+	  for (i = 0; i < num_restore; i++)
+	    {
+	      emit_move_insn (restore_regs[i],
+			      gen_rtx_MEM (SImode,
+					   plus_constant (Pmode,
+							  stack_pointer_rtx,
+							  offset)));
+
+	      emit_use (restore_regs[i]);
+	      offset -= 4;
+	    }
+
+	  /* Cut back the remainder of the stack.  */
+	  increment_stack (init_stack_free + crtl->args.pretend_args_size,
+			   false);
+	}
+
+      /* And return or use reti for interrupt handlers.  */
+      if (interrupt_handler)
+        {
+          if (! TARGET_DISABLE_CALLT && (TARGET_V850E_UP))
+            emit_insn (gen_callt_return_interrupt ());
+          else
+            emit_jump_insn (gen_return_interrupt ());
+	 }
+      else if (actual_fsize)
+	emit_jump_insn (gen_return_internal ());
+      else
+	emit_jump_insn (gen_return_simple ());
+    }
+
+  v850_interrupt_cache_p = FALSE;
+  v850_interrupt_p = FALSE;
+}
+
+/* Update the condition code from the insn.  */
+void
+notice_update_cc (rtx body, rtx insn)
+{
+  switch (get_attr_cc (insn))
+    {
+    case CC_NONE:
+      /* Insn does not affect CC at all.  */
+      break;
+
+    case CC_NONE_0HIT:
+      /* Insn does not change CC, but the 0'th operand has been changed.  */
+      if (cc_status.value1 != 0
+	  && reg_overlap_mentioned_p (recog_data.operand[0], cc_status.value1))
+	cc_status.value1 = 0;
+      break;
+
+    case CC_SET_ZN:
+      /* Insn sets the Z,N flags of CC to recog_data.operand[0].
+	 V,C is in an unusable state.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
+      cc_status.value1 = recog_data.operand[0];
+      break;
+
+    case CC_SET_ZNV:
+      /* Insn sets the Z,N,V flags of CC to recog_data.operand[0].
+  	 C is in an unusable state.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_NO_CARRY;
+      cc_status.value1 = recog_data.operand[0];
+      break;
+
+    case CC_COMPARE:
+      /* The insn is a compare instruction.  */
+      CC_STATUS_INIT;
+      cc_status.value1 = SET_SRC (body);
+      break;
+
+    case CC_CLOBBER:
+      /* Insn doesn't leave CC in a usable state.  */
+      CC_STATUS_INIT;
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Retrieve the data area that has been chosen for the given decl.  */
+
+v850_data_area
+v850_get_data_area (tree decl)
+{
+  if (lookup_attribute ("sda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
+    return DATA_AREA_SDA;
+  
+  if (lookup_attribute ("tda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
+    return DATA_AREA_TDA;
+  
+  if (lookup_attribute ("zda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
+    return DATA_AREA_ZDA;
+
+  return DATA_AREA_NORMAL;
+}
+
+/* Store the indicated data area in the decl's attributes.  */
+
+static void
+v850_set_data_area (tree decl, v850_data_area data_area)
+{
+  tree name;
+  
+  switch (data_area)
+    {
+    case DATA_AREA_SDA: name = get_identifier ("sda"); break;
+    case DATA_AREA_TDA: name = get_identifier ("tda"); break;
+    case DATA_AREA_ZDA: name = get_identifier ("zda"); break;
+    default:
+      return;
+    }
+
+  DECL_ATTRIBUTES (decl) = tree_cons
+    (name, NULL, DECL_ATTRIBUTES (decl));
+}
+
+/* Handle an "interrupt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+v850_handle_interrupt_attribute (tree * node,
+                                 tree name,
+                                 tree args ATTRIBUTE_UNUSED,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "sda", "tda" or "zda" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+v850_handle_data_area_attribute (tree* node,
+                                 tree name,
+                                 tree args ATTRIBUTE_UNUSED,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool * no_add_attrs)
+{
+  v850_data_area data_area;
+  v850_data_area area;
+  tree decl = *node;
+
+  /* Implement data area attribute.  */
+  if (is_attribute_p ("sda", name))
+    data_area = DATA_AREA_SDA;
+  else if (is_attribute_p ("tda", name))
+    data_area = DATA_AREA_TDA;
+  else if (is_attribute_p ("zda", name))
+    data_area = DATA_AREA_ZDA;
+  else
+    gcc_unreachable ();
+  
+  switch (TREE_CODE (decl))
+    {
+    case VAR_DECL:
+      if (current_function_decl != NULL_TREE)
+	{
+          error_at (DECL_SOURCE_LOCATION (decl),
+		    "data area attributes cannot be specified for "
+		    "local variables");
+	  *no_add_attrs = true;
+	}
+
+      /* Drop through.  */
+
+    case FUNCTION_DECL:
+      area = v850_get_data_area (decl);
+      if (area != DATA_AREA_NORMAL && data_area != area)
+	{
+	  error ("data area of %q+D conflicts with previous declaration",
+                 decl);
+	  *no_add_attrs = true;
+	}
+      break;
+      
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+
+/* Return nonzero if FUNC is an interrupt function as specified
+   by the "interrupt" attribute.  */
+
+int
+v850_interrupt_function_p (tree func)
+{
+  tree a;
+  int ret = 0;
+
+  if (v850_interrupt_cache_p)
+    return v850_interrupt_p;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    ret = 1;
+
+  else
+    {
+      a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func));
+      ret = a != NULL_TREE;
+    }
+
+  /* Its not safe to trust global variables until after function inlining has
+     been done.  */
+  if (reload_completed | reload_in_progress)
+    v850_interrupt_p = ret;
+
+  return ret;
+}
+
+
+static void
+v850_encode_data_area (tree decl, rtx symbol)
+{
+  int flags;
+
+  /* Map explicit sections into the appropriate attribute */
+  if (v850_get_data_area (decl) == DATA_AREA_NORMAL)
+    {
+      if (DECL_SECTION_NAME (decl))
+	{
+	  const char *name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+	  
+	  if (streq (name, ".zdata") || streq (name, ".zbss"))
+	    v850_set_data_area (decl, DATA_AREA_ZDA);
+
+	  else if (streq (name, ".sdata") || streq (name, ".sbss"))
+	    v850_set_data_area (decl, DATA_AREA_SDA);
+
+	  else if (streq (name, ".tdata"))
+	    v850_set_data_area (decl, DATA_AREA_TDA);
+	}
+
+      /* If no attribute, support -m{zda,sda,tda}=n */
+      else
+	{
+	  int size = int_size_in_bytes (TREE_TYPE (decl));
+	  if (size <= 0)
+	    ;
+
+	  else if (size <= small_memory_max [(int) SMALL_MEMORY_TDA])
+	    v850_set_data_area (decl, DATA_AREA_TDA);
+
+	  else if (size <= small_memory_max [(int) SMALL_MEMORY_SDA])
+	    v850_set_data_area (decl, DATA_AREA_SDA);
+
+	  else if (size <= small_memory_max [(int) SMALL_MEMORY_ZDA])
+	    v850_set_data_area (decl, DATA_AREA_ZDA);
+	}
+      
+      if (v850_get_data_area (decl) == DATA_AREA_NORMAL)
+	return;
+    }
+
+  flags = SYMBOL_REF_FLAGS (symbol);
+  switch (v850_get_data_area (decl))
+    {
+    case DATA_AREA_ZDA: flags |= SYMBOL_FLAG_ZDA; break;
+    case DATA_AREA_TDA: flags |= SYMBOL_FLAG_TDA; break;
+    case DATA_AREA_SDA: flags |= SYMBOL_FLAG_SDA; break;
+    default: gcc_unreachable ();
+    }
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+static void
+v850_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == VAR_DECL
+      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    v850_encode_data_area (decl, XEXP (rtl, 0));
+}
+
+/* Construct a JR instruction to a routine that will perform the equivalent of
+   the RTL passed in as an argument.  This RTL is a function epilogue that
+   pops registers off the stack and possibly releases some extra stack space
+   as well.  The code has already verified that the RTL matches these
+   requirements.  */
+
+char *
+construct_restore_jr (rtx op)
+{
+  int count = XVECLEN (op, 0);
+  int stack_bytes;
+  unsigned long int mask;
+  unsigned long int first;
+  unsigned long int last;
+  int i;
+  static char buff [100]; /* XXX */
+  
+  if (count <= 2)
+    {
+      error ("bogus JR construction: %d", count);
+      return NULL;
+    }
+
+  /* Work out how many bytes to pop off the stack before retrieving
+     registers.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 1)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1)) == CONST_INT);
+    
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1));
+
+  /* Each pop will remove 4 bytes from the stack....  */
+  stack_bytes -= (count - 2) * 4;
+
+  /* Make sure that the amount we are popping either 0 or 16 bytes.  */
+  if (stack_bytes != 0)
+    {
+      error ("bad amount of stack space removal: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  mask = 0;
+  for (i = 2; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_DEST (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_DEST (vector_element),
+					       SImode));
+      
+      mask |= 1 << REGNO (SET_DEST (vector_element));
+    }
+
+  /* Scan for the first register to pop.  */
+  for (first = 0; first < 32; first++)
+    {
+      if (mask & (1 << first))
+	break;
+    }
+
+  gcc_assert (first < 32);
+
+  /* Discover the last register to pop.  */
+  if (mask & (1 << LINK_POINTER_REGNUM))
+    {
+      last = LINK_POINTER_REGNUM;
+    }
+  else
+    {
+      gcc_assert (!stack_bytes);
+      gcc_assert (mask & (1 << 29));
+      
+      last = 29;
+    }
+
+  /* Note, it is possible to have gaps in the register mask.
+     We ignore this here, and generate a JR anyway.  We will
+     be popping more registers than is strictly necessary, but
+     it does save code space.  */
+  
+  if (TARGET_LONG_CALLS)
+    {
+      char name[40];
+      
+      if (first == last)
+	sprintf (name, "__return_%s", reg_names [first]);
+      else
+	sprintf (name, "__return_%s_%s", reg_names [first], reg_names [last]);
+      
+      sprintf (buff, "movhi hi(%s), r0, r6\n\tmovea lo(%s), r6, r6\n\tjmp r6",
+	       name, name);
+    }
+  else
+    {
+      if (first == last)
+	sprintf (buff, "jr __return_%s", reg_names [first]);
+      else
+	sprintf (buff, "jr __return_%s_%s", reg_names [first], reg_names [last]);
+    }
+  
+  return buff;
+}
+
+
+/* Construct a JARL instruction to a routine that will perform the equivalent
+   of the RTL passed as a parameter.  This RTL is a function prologue that
+   saves some of the registers r20 - r31 onto the stack, and possibly acquires
+   some stack space as well.  The code has already verified that the RTL
+   matches these requirements.  */
+char *
+construct_save_jarl (rtx op)
+{
+  int count = XVECLEN (op, 0);
+  int stack_bytes;
+  unsigned long int mask;
+  unsigned long int first;
+  unsigned long int last;
+  int i;
+  static char buff [100]; /* XXX */
+  
+  if (count <= (TARGET_LONG_CALLS ? 3 : 2)) 
+    {
+      error ("bogus JARL construction: %d", count);
+      return NULL;
+    }
+
+  /* Paranoia.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 0)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0)) == REG);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)) == CONST_INT);
+    
+  /* Work out how many bytes to push onto the stack after storing the
+     registers.  */
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1));
+
+  /* Each push will put 4 bytes from the stack....  */
+  stack_bytes += (count - (TARGET_LONG_CALLS ? 3 : 2)) * 4;
+
+  /* Make sure that the amount we are popping either 0 or 16 bytes.  */
+  if (stack_bytes != 0)
+    {
+      error ("bad amount of stack space removal: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  mask = 0;
+  for (i = 1; i < count - (TARGET_LONG_CALLS ? 2 : 1); i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_SRC (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_SRC (vector_element),
+					       SImode));
+      
+      mask |= 1 << REGNO (SET_SRC (vector_element));
+    }
+
+  /* Scan for the first register to push.  */  
+  for (first = 0; first < 32; first++)
+    {
+      if (mask & (1 << first))
+	break;
+    }
+
+  gcc_assert (first < 32);
+
+  /* Discover the last register to push.  */
+  if (mask & (1 << LINK_POINTER_REGNUM))
+    {
+      last = LINK_POINTER_REGNUM;
+    }
+  else
+    {
+      gcc_assert (!stack_bytes);
+      gcc_assert (mask & (1 << 29));
+      
+      last = 29;
+    }
+
+  /* Note, it is possible to have gaps in the register mask.
+     We ignore this here, and generate a JARL anyway.  We will
+     be pushing more registers than is strictly necessary, but
+     it does save code space.  */
+  
+  if (TARGET_LONG_CALLS)
+    {
+      char name[40];
+      
+      if (first == last)
+	sprintf (name, "__save_%s", reg_names [first]);
+      else
+	sprintf (name, "__save_%s_%s", reg_names [first], reg_names [last]);
+      
+      if (TARGET_V850E3V5_UP)
+	sprintf (buff, "mov hilo(%s), r11\n\tjarl [r11], r10", name);
+      else
+	sprintf (buff, "movhi hi(%s), r0, r11\n\tmovea lo(%s), r11, r11\n\tjarl .+4, r10\n\tadd 4, r10\n\tjmp r11",
+		 name, name);
+    }
+  else
+    {
+      if (first == last)
+	sprintf (buff, "jarl __save_%s, r10", reg_names [first]);
+      else
+	sprintf (buff, "jarl __save_%s_%s, r10", reg_names [first],
+		 reg_names [last]);
+    }
+
+  return buff;
+}
+
+/* A version of asm_output_aligned_bss() that copes with the special
+   data areas of the v850.  */
+void
+v850_output_aligned_bss (FILE * file,
+                         tree decl,
+                         const char * name,
+                         unsigned HOST_WIDE_INT size,
+                         int align)
+{
+  switch (v850_get_data_area (decl))
+    {
+    case DATA_AREA_ZDA:
+      switch_to_section (zbss_section);
+      break;
+
+    case DATA_AREA_SDA:
+      switch_to_section (sbss_section);
+      break;
+
+    case DATA_AREA_TDA:
+      switch_to_section (tdata_section);
+      
+    default:
+      switch_to_section (bss_section);
+      break;
+    }
+  
+  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+  last_assemble_variable_decl = decl;
+  ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+  /* Standard thing is just output label for the object.  */
+  ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+  ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+
+/* Called via the macro ASM_OUTPUT_DECL_COMMON */
+void
+v850_output_common (FILE * file,
+                    tree decl,
+                    const char * name,
+                    int size,
+                    int align)
+{
+  if (decl == NULL_TREE)
+    {
+      fprintf (file, "%s", COMMON_ASM_OP);
+    }
+  else
+    {
+      switch (v850_get_data_area (decl))
+	{
+	case DATA_AREA_ZDA:
+	  fprintf (file, "%s", ZCOMMON_ASM_OP);
+	  break;
+
+	case DATA_AREA_SDA:
+	  fprintf (file, "%s", SCOMMON_ASM_OP);
+	  break;
+
+	case DATA_AREA_TDA:
+	  fprintf (file, "%s", TCOMMON_ASM_OP);
+	  break;
+      
+	default:
+	  fprintf (file, "%s", COMMON_ASM_OP);
+	  break;
+	}
+    }
+  
+  assemble_name (file, name);
+  fprintf (file, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Called via the macro ASM_OUTPUT_DECL_LOCAL */
+void
+v850_output_local (FILE * file,
+                   tree decl,
+                   const char * name,
+                   int size,
+                   int align)
+{
+  fprintf (file, "%s", LOCAL_ASM_OP);
+  assemble_name (file, name);
+  fprintf (file, "\n");
+  
+  ASM_OUTPUT_ALIGNED_DECL_COMMON (file, decl, name, size, align);
+}
+
+/* Add data area to the given declaration if a ghs data area pragma is
+   currently in effect (#pragma ghs startXXX/endXXX).  */
+static void
+v850_insert_attributes (tree decl, tree * attr_ptr ATTRIBUTE_UNUSED )
+{
+  if (data_area_stack
+      && data_area_stack->data_area
+      && current_function_decl == NULL_TREE
+      && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL)
+      && v850_get_data_area (decl) == DATA_AREA_NORMAL)
+    v850_set_data_area (decl, data_area_stack->data_area);
+
+  /* Initialize the default names of the v850 specific sections,
+     if this has not been done before.  */
+  
+  if (GHS_default_section_names [(int) GHS_SECTION_KIND_SDATA] == NULL)
+    {
+      GHS_default_section_names [(int) GHS_SECTION_KIND_SDATA]
+	= build_string (sizeof (".sdata")-1, ".sdata");
+
+      GHS_default_section_names [(int) GHS_SECTION_KIND_ROSDATA]
+	= build_string (sizeof (".rosdata")-1, ".rosdata");
+
+      GHS_default_section_names [(int) GHS_SECTION_KIND_TDATA]
+	= build_string (sizeof (".tdata")-1, ".tdata");
+      
+      GHS_default_section_names [(int) GHS_SECTION_KIND_ZDATA]
+	= build_string (sizeof (".zdata")-1, ".zdata");
+
+      GHS_default_section_names [(int) GHS_SECTION_KIND_ROZDATA]
+	= build_string (sizeof (".rozdata")-1, ".rozdata");
+    }
+  
+  if (current_function_decl == NULL_TREE
+      && (TREE_CODE (decl) == VAR_DECL
+	  || TREE_CODE (decl) == CONST_DECL
+	  || TREE_CODE (decl) == FUNCTION_DECL)
+      && (!DECL_EXTERNAL (decl) || DECL_INITIAL (decl))
+      && !DECL_SECTION_NAME (decl))
+    {
+      enum GHS_section_kind kind = GHS_SECTION_KIND_DEFAULT;
+      tree chosen_section;
+
+      if (TREE_CODE (decl) == FUNCTION_DECL)
+	kind = GHS_SECTION_KIND_TEXT;
+      else
+	{
+	  /* First choose a section kind based on the data area of the decl.  */
+	  switch (v850_get_data_area (decl))
+	    {
+	    default:
+	      gcc_unreachable ();
+	      
+	    case DATA_AREA_SDA:
+	      kind = ((TREE_READONLY (decl))
+		      ? GHS_SECTION_KIND_ROSDATA
+		      : GHS_SECTION_KIND_SDATA);
+	      break;
+	      
+	    case DATA_AREA_TDA:
+	      kind = GHS_SECTION_KIND_TDATA;
+	      break;
+	      
+	    case DATA_AREA_ZDA:
+	      kind = ((TREE_READONLY (decl))
+		      ? GHS_SECTION_KIND_ROZDATA
+		      : GHS_SECTION_KIND_ZDATA);
+	      break;
+	      
+	    case DATA_AREA_NORMAL:		 /* default data area */
+	      if (TREE_READONLY (decl))
+		kind = GHS_SECTION_KIND_RODATA;
+	      else if (DECL_INITIAL (decl))
+		kind = GHS_SECTION_KIND_DATA;
+	      else
+		kind = GHS_SECTION_KIND_BSS;
+	    }
+	}
+
+      /* Now, if the section kind has been explicitly renamed,
+         then attach a section attribute.  */
+      chosen_section = GHS_current_section_names [(int) kind];
+
+      /* Otherwise, if this kind of section needs an explicit section
+         attribute, then also attach one.  */
+      if (chosen_section == NULL)
+        chosen_section = GHS_default_section_names [(int) kind];
+
+      if (chosen_section)
+	{
+	  /* Only set the section name if specified by a pragma, because
+	     otherwise it will force those variables to get allocated storage
+	     in this module, rather than by the linker.  */
+	  DECL_SECTION_NAME (decl) = chosen_section;
+	}
+    }
+}
+
+/* Construct a DISPOSE instruction that is the equivalent of
+   the given RTX.  We have already verified that this should
+   be possible.  */
+
+char *
+construct_dispose_instruction (rtx op)
+{
+  int                count = XVECLEN (op, 0);
+  int                stack_bytes;
+  unsigned long int  mask;
+  int		     i;
+  static char        buff[ 100 ]; /* XXX */
+  int                use_callt = 0;
+  
+  if (count <= 2)
+    {
+      error ("bogus DISPOSE construction: %d", count);
+      return NULL;
+    }
+
+  /* Work out how many bytes to pop off the
+     stack before retrieving registers.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 1)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1)) == CONST_INT);
+    
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1));
+
+  /* Each pop will remove 4 bytes from the stack....  */
+  stack_bytes -= (count - 2) * 4;
+
+  /* Make sure that the amount we are popping
+     will fit into the DISPOSE instruction.  */
+  if (stack_bytes > 128)
+    {
+      error ("too much stack space to dispose of: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  mask = 0;
+
+  for (i = 2; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_DEST (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_DEST (vector_element),
+					       SImode));
+
+      if (REGNO (SET_DEST (vector_element)) == 2)
+	use_callt = 1;
+      else
+        mask |= 1 << REGNO (SET_DEST (vector_element));
+    }
+
+  if (! TARGET_DISABLE_CALLT
+      && (use_callt || stack_bytes == 0))
+    {
+      if (use_callt)
+	{
+	  sprintf (buff, "callt ctoff(__callt_return_r2_r%d)", (mask & (1 << 31)) ? 31 : 29);
+	  return buff;
+	}
+      else
+	{
+	  for (i = 20; i < 32; i++)
+	    if (mask & (1 << i))
+	      break;
+	  
+	  if (i == 31)
+	    sprintf (buff, "callt ctoff(__callt_return_r31c)");
+	  else
+	    sprintf (buff, "callt ctoff(__callt_return_r%d_r%s)",
+		     i, (mask & (1 << 31)) ? "31c" : "29");
+	}
+    }
+  else
+    {
+      static char        regs [100]; /* XXX */
+      int                done_one;
+      
+      /* Generate the DISPOSE instruction.  Note we could just issue the
+	 bit mask as a number as the assembler can cope with this, but for
+	 the sake of our readers we turn it into a textual description.  */
+      regs[0] = 0;
+      done_one = 0;
+      
+      for (i = 20; i < 32; i++)
+	{
+	  if (mask & (1 << i))
+	    {
+	      int first;
+	      
+	      if (done_one)
+		strcat (regs, ", ");
+	      else
+		done_one = 1;
+	      
+	      first = i;
+	      strcat (regs, reg_names[ first ]);
+	      
+	      for (i++; i < 32; i++)
+		if ((mask & (1 << i)) == 0)
+		  break;
+	      
+	      if (i > first + 1)
+		{
+		  strcat (regs, " - ");
+		  strcat (regs, reg_names[ i - 1 ] );
+		}
+	    }
+	}
+      
+      sprintf (buff, "dispose %d {%s}, r31", stack_bytes / 4, regs);
+    }
+  
+  return buff;
+}
+
+/* Construct a PREPARE instruction that is the equivalent of
+   the given RTL.  We have already verified that this should
+   be possible.  */
+
+char *
+construct_prepare_instruction (rtx op)
+{
+  int                count;
+  int                stack_bytes;
+  unsigned long int  mask;
+  int		     i;
+  static char        buff[ 100 ]; /* XXX */
+  int		     use_callt = 0;
+  
+  if (XVECLEN (op, 0) <= 1)
+    {
+      error ("bogus PREPEARE construction: %d", XVECLEN (op, 0));
+      return NULL;
+    }
+
+  /* Work out how many bytes to push onto
+     the stack after storing the registers.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 0)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)) == CONST_INT);
+    
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1));
+
+
+  /* Make sure that the amount we are popping
+     will fit into the DISPOSE instruction.  */
+  if (stack_bytes < -128)
+    {
+      error ("too much stack space to prepare: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  count = 0;
+  mask = 0;
+  for (i = 1; i < XVECLEN (op, 0); i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      if (GET_CODE (vector_element) == CLOBBER)
+	continue;
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_SRC (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_SRC (vector_element),
+					       SImode));
+
+      if (REGNO (SET_SRC (vector_element)) == 2)
+	use_callt = 1;
+      else
+	mask |= 1 << REGNO (SET_SRC (vector_element));
+      count++;
+    }
+
+  stack_bytes += count * 4;
+
+  if ((! TARGET_DISABLE_CALLT)
+      && (use_callt || stack_bytes == 0))
+    {
+      if (use_callt)
+	{
+	  sprintf (buff, "callt ctoff(__callt_save_r2_r%d)", (mask & (1 << 31)) ? 31 : 29 );
+	  return buff;
+	}
+      
+      for (i = 20; i < 32; i++)
+	if (mask & (1 << i))
+	  break;
+
+      if (i == 31)
+	sprintf (buff, "callt ctoff(__callt_save_r31c)");
+      else
+	sprintf (buff, "callt ctoff(__callt_save_r%d_r%s)",
+		 i, (mask & (1 << 31)) ? "31c" : "29");
+    }
+  else
+    {
+      static char        regs [100]; /* XXX */
+      int                done_one;
+
+      
+      /* Generate the PREPARE instruction.  Note we could just issue the
+	 bit mask as a number as the assembler can cope with this, but for
+	 the sake of our readers we turn it into a textual description.  */      
+      regs[0] = 0;
+      done_one = 0;
+      
+      for (i = 20; i < 32; i++)
+	{
+	  if (mask & (1 << i))
+	    {
+	      int first;
+	      
+	      if (done_one)
+		strcat (regs, ", ");
+	      else
+		done_one = 1;
+	      
+	      first = i;
+	      strcat (regs, reg_names[ first ]);
+	      
+	      for (i++; i < 32; i++)
+		if ((mask & (1 << i)) == 0)
+		  break;
+	      
+	      if (i > first + 1)
+		{
+		  strcat (regs, " - ");
+		  strcat (regs, reg_names[ i - 1 ] );
+		}
+	    }
+	}
+      	 
+      sprintf (buff, "prepare {%s}, %d", regs, (- stack_bytes) / 4);
+    }
+  
+  return buff;
+}
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+
+rtx
+v850_return_addr (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, LINK_POINTER_REGNUM);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+v850_asm_init_sections (void)
+{
+  rosdata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .rosdata,\"a\"");
+
+  rozdata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .rozdata,\"a\"");
+
+  tdata_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .tdata,\"aw\"");
+
+  zdata_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .zdata,\"aw\"");
+
+  zbss_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS,
+			   output_section_asm_op,
+			   "\t.section .zbss,\"aw\"");
+}
+
+static section *
+v850_select_section (tree exp,
+                     int reloc ATTRIBUTE_UNUSED,
+                     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (TREE_CODE (exp) == VAR_DECL)
+    {
+      int is_const;
+      if (!TREE_READONLY (exp)
+	  || TREE_SIDE_EFFECTS (exp)
+	  || !DECL_INITIAL (exp)
+	  || (DECL_INITIAL (exp) != error_mark_node
+	      && !TREE_CONSTANT (DECL_INITIAL (exp))))
+        is_const = FALSE;
+      else
+        is_const = TRUE;
+
+      switch (v850_get_data_area (exp))
+        {
+        case DATA_AREA_ZDA:
+	  return is_const ? rozdata_section : zdata_section;
+
+        case DATA_AREA_TDA:
+	  return tdata_section;
+
+        case DATA_AREA_SDA:
+	  return is_const ? rosdata_section : sdata_section;
+
+        default:
+	  return is_const ? readonly_data_section : data_section;
+        }
+    }
+  return readonly_data_section;
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+v850_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 10);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+v850_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Return values > 8 bytes in length in memory.  */
+  return int_size_in_bytes (type) > 8
+    || TYPE_MODE (type) == BLKmode
+    /* With the rh850 ABI return all aggregates in memory.  */
+    || ((! TARGET_GCC_ABI) && AGGREGATE_TYPE_P (type))
+    ;
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+static rtx
+v850_function_value (const_tree valtype, 
+                    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+                    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), 10);
+}
+
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+static bool
+v850_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Worker function for TARGET_CONDITIONAL_REGISTER_USAGE.
+
+   If TARGET_APP_REGS is not defined then add r2 and r5 to
+   the pool of fixed registers. See PR 14505.  */
+
+static void
+v850_conditional_register_usage (void)
+{
+  if (TARGET_APP_REGS)
+    {
+     fixed_regs[2] = 0;  call_used_regs[2] = 0;
+     fixed_regs[5] = 0;  call_used_regs[5] = 1;
+    }
+}
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+v850_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tjarl .+4,r12\n");
+  fprintf (f, "\tld.w 12[r12],r20\n");
+  fprintf (f, "\tld.w 16[r12],r12\n");
+  fprintf (f, "\tjmp [r12]\n");
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\t.long 0\n");
+  fprintf (f, "\t.long 0\n");
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+v850_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 16);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 20);
+  emit_move_insn (mem, fnaddr);
+}
+
+static int
+v850_issue_rate (void)
+{
+  return (TARGET_V850E2_UP ? 2 : 1);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+v850_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return (GET_CODE (x) == CONST_DOUBLE
+	  || !(GET_CODE (x) == CONST
+	       && GET_CODE (XEXP (x, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	       && !CONST_OK_FOR_K (INTVAL (XEXP (XEXP (x, 0), 1)))));
+}
+
+static int
+v850_memory_move_cost (enum machine_mode mode,
+		       reg_class_t reg_class ATTRIBUTE_UNUSED,
+		       bool in)
+{
+  switch (GET_MODE_SIZE (mode))
+    {
+    case 0:
+      return in ? 24 : 8;
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      return in ? 6 : 2;
+    default:
+      return (GET_MODE_SIZE (mode) / 2) * (in ? 3 : 1);
+    }
+}
+
+int
+v850_adjust_insn_length (rtx insn, int length)
+{
+  if (TARGET_V850E3V5_UP)
+    {
+      if (CALL_P (insn))
+	{
+	  if (TARGET_LONG_CALLS)
+	    {
+	      /* call_internal_long, call_value_internal_long.  */
+	      if (length == 8)
+		length = 4;
+	      if (length == 16)
+		length = 10;
+	    }
+	  else
+	    {
+	      /* call_internal_short, call_value_internal_short.  */
+	      if (length == 8)
+		length = 4;
+	    }
+	}
+    }
+  return length;
+}
+
+/* V850 specific attributes.  */
+
+static const struct attribute_spec v850_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt_handler", 0, 0, true,  false, false,
+    v850_handle_interrupt_attribute, false },
+  { "interrupt",         0, 0, true,  false, false,
+    v850_handle_interrupt_attribute, false },
+  { "sda",               0, 0, true,  false, false,
+    v850_handle_data_area_attribute, false },
+  { "tda",               0, 0, true,  false, false,
+    v850_handle_data_area_attribute, false },
+  { "zda",               0, 0, true,  false, false,
+    v850_handle_data_area_attribute, false },
+  { NULL,                0, 0, false, false, false, NULL, false }
+};
+
+static void
+v850_option_override (void)
+{
+  if (flag_exceptions || flag_non_call_exceptions)
+    flag_omit_frame_pointer = 0;
+
+  /* The RH850 ABI does not (currently) support the use of the CALLT instruction.  */
+  if (! TARGET_GCC_ABI)
+    target_flags |= MASK_DISABLE_CALLT;
+}
+
+const char *
+v850_gen_movdi (rtx * operands)
+{
+  if (REG_P (operands[0]))
+    {
+      if (REG_P (operands[1]))
+	{
+	  if (REGNO (operands[0]) == (REGNO (operands[1]) - 1))
+	    return "mov %1, %0; mov %R1, %R0";
+
+	  return "mov %R1, %R0; mov %1, %0";
+	}
+
+      if (MEM_P (operands[1]))
+	{
+	  if (REGNO (operands[0]) & 1)
+	    /* Use two load word instructions to synthesise a load double.  */
+	    return "ld.w %1, %0 ; ld.w %R1, %R0" ;
+
+	  return "ld.dw %1, %0";
+	}
+
+      return "mov %1, %0; mov %R1, %R0";
+    }
+
+  gcc_assert (REG_P (operands[1]));
+
+  if (REGNO (operands[1]) & 1)
+    /* Use two store word instructions to synthesise a store double.  */
+    return "st.w %1, %0 ; st.w %R1, %R0 ";
+  
+  return "st.dw %1, %0";
+}
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		v850_option_override
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST 	v850_memory_move_cost
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND 		v850_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS 		v850_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P 	v850_print_operand_punct_valid_p
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA v850_output_addr_const_extra
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE v850_attribute_table
+
+#undef  TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES v850_insert_attributes
+
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  v850_select_section
+
+/* The assembler supports switchable .bss sections, but
+   v850_select_section doesn't yet make use of them.  */
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS false
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO v850_encode_section_info
+
+#undef  TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS v850_rtx_costs
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG v850_reorg
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE v850_issue_rate
+
+#undef  TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P v850_function_value_regno_p
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE v850_function_value
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY v850_return_in_memory
+
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE v850_pass_by_reference
+
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES v850_arg_partial_bytes
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG v850_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE v850_function_arg_advance
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE v850_can_eliminate
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE v850_conditional_register_usage
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE v850_asm_trampoline_template
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT v850_trampoline_init
+
+#undef  TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P v850_legitimate_constant_p
+
+#undef  TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-v850.h"
diff --git a/gcc-4.9/gcc/config/v850/v850.h b/gcc-4.9/gcc/config/v850/v850.h
new file mode 100644
index 000000000..92db20a44
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850.h
@@ -0,0 +1,979 @@
+/* Definitions of target machine for GNU compiler. NEC V850 series
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_V850_H
+#define GCC_V850_H
+
+extern GTY(()) rtx v850_compare_op0;
+extern GTY(()) rtx v850_compare_op1;
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:--start-group -lc -lgcc --end-group}}"
+
+#undef ENDFILE_SPEC
+#undef LINK_SPEC
+#undef STARTFILE_SPEC
+#undef ASM_SPEC
+
+#define TARGET_CPU_generic 	1
+#define TARGET_CPU_v850e   	2
+#define TARGET_CPU_v850e1	3
+#define TARGET_CPU_v850e2	4
+#define TARGET_CPU_v850e2v3	5
+#define TARGET_CPU_v850e3v5	6
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT	TARGET_CPU_generic
+#endif
+
+#define MASK_DEFAULT            MASK_V850
+#define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850}"
+#define SUBTARGET_CPP_SPEC 	"%{!mv*:-D__v850__}"
+
+/* Choose which processor will be the default.
+   We must pass a -mv850xx option to the assembler if no explicit -mv* option
+   is given, because the assembler's processor default may not be correct.  */
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850e}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC 	"%{!mv*:-D__v850e__}"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e1
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E     /* No practical difference.  */     
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC	"%{!mv*:-mv850e1}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC	"%{!mv*:-D__v850e1__} %{mv850e1:-D__v850e1__}"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e2
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E2	
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850e2}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC 	"%{!mv*:-D__v850e2__} %{mv850e2:-D__v850e2__}"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e2v3
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E2V3
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC	"%{!mv*:-mv850e2v3}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC	"%{!mv*:-D__v850e2v3__} %{mv850e2v3:-D__v850e2v3__}"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e3v5
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E3V5
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC	"%{!mv*:-mv850e3v5}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC	"%{!mv*:-D__v850e3v5__} %{mv850e3v5:-D__v850e3v5__}"
+#undef  TARGET_VERSION
+#define TARGET_VERSION		fprintf (stderr, " (Renesas V850E3V5)");
+#endif
+
+#define TARGET_V850E3V5_UP ((TARGET_V850E3V5))     
+#define TARGET_V850E2V3_UP ((TARGET_V850E2V3) || TARGET_V850E3V5_UP)
+#define TARGET_V850E2_UP   ((TARGET_V850E2)   || TARGET_V850E2V3_UP)
+#define TARGET_V850E_UP    ((TARGET_V850E)    || TARGET_V850E2_UP)
+#define TARGET_ALL         ((TARGET_V850)     || TARGET_V850E_UP)
+
+#define ASM_SPEC "%{m850es:-mv850e1}%{!mv850es:%{mv*:-mv%*}} \
+%{mrelax:-mrelax} \
+%{m8byte-align:-m8byte-align} \
+%{mgcc-abi:-mgcc-abi}"
+
+#define LINK_SPEC "%{mgcc-abi:-m v850}"
+
+#define CPP_SPEC "\
+  %{mv850e3v5:-D__v850e3v5__} \
+  %{mv850e2v3:-D__v850e2v3__} \
+  %{mv850e2:-D__v850e2__} \
+  %{mv850es:-D__v850e1__} \
+  %{mv850e1:-D__v850e1__} \
+  %{mv850e:-D__v850e__} \
+  %{mv850:-D__v850__} \
+  %(subtarget_cpp_spec) \
+  %{mep:-D__EP__}"
+
+#define EXTRA_SPECS \
+ { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \
+ { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC } 
+
+
+/* Macro to decide when FPU instructions can be used.  */
+#define TARGET_USE_FPU  (TARGET_V850E2V3_UP && ! TARGET_SOFT_FLOAT)
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define( "__v851__" );		\
+      builtin_define( "__v850" );		\
+      builtin_define( "__v850__" );		\
+      builtin_assert( "machine=v850" );		\
+      builtin_assert( "cpu=v850" );		\
+      if (TARGET_EP)				\
+	builtin_define ("__EP__");		\
+      if (TARGET_GCC_ABI)			\
+	builtin_define ("__V850_GCC_ABI__");	\
+      else					\
+	builtin_define ("__V850_RH850_ABI__");	\
+      if (! TARGET_DISABLE_CALLT)		\
+	builtin_define ("__V850_CALLT__");	\
+      if (TARGET_8BYTE_ALIGN)			\
+	builtin_define ("__V850_8BYTE_ALIGN__");\
+      builtin_define (TARGET_USE_FPU ?		\
+		      "__FPU_OK__" : "__NO_FPU__");\
+    }						\
+  while(0)
+
+#define MASK_CPU (MASK_V850 | MASK_V850E | MASK_V850E1 | MASK_V850E2 | MASK_V850E2V3 | MASK_V850E3V5)
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the NEC V850.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* This is not true on the NEC V850.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+   This is not true on the NEC V850.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   Some simple experiments have shown that leaving UNSIGNEDP alone
+   generates the best overall code.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  \
+  if (GET_MODE_CLASS (MODE) == MODE_INT \
+      && GET_MODE_SIZE (MODE) < 4)      \
+    { (MODE) = SImode; }
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY		32
+
+/* The stack goes in 32-bit lumps.  */
+#define STACK_BOUNDARY 		BIGGEST_ALIGNMENT
+
+/* Allocation boundary (in *bits*) for the code of a function.
+   16 is the minimum boundary; 32 would give better performance.  */
+#define FUNCTION_BOUNDARY 	(((! TARGET_GCC_ABI) || optimize_size) ? 16 : 32)
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT	(TARGET_8BYTE_ALIGN ? 64 : 32)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* No structure field wants to be aligned rounder than this.  */
+#define BIGGEST_FIELD_ALIGNMENT BIGGEST_ALIGNMENT
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT  (!TARGET_NO_STRICT_ALIGN)
+
+/* Define this as 1 if `char' should by default be signed; else as 0.
+
+   On the NEC V850, loads do sign extension, so make this default.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS \
+  { 1, 1, 1, 1, 1, 1, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 1, 0, \
+    1, 1,	\
+    1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you
+   like.  */
+
+#define CALL_USED_REGISTERS \
+  { 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 1, 1, \
+    1, 1,	\
+    1, 1}
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.
+
+   On the 850, we make the return registers first, then all of the volatile
+   registers, then the saved registers in reverse order to better save the
+   registers with an out of line function, and finally the fixed
+   registers.  */
+
+#define REG_ALLOC_ORDER							\
+{									\
+  10, 11,				/* return registers */		\
+  12, 13, 14, 15, 16, 17, 18, 19,	/* scratch registers */		\
+   6,  7,  8,  9, 31,			/* argument registers */	\
+  29, 28, 27, 26, 25, 24, 23, 22,	/* saved registers */		\
+  21, 20,  2,								\
+   0,  1,  3,  4,  5, 30, 32, 33,      /* fixed registers */           \
+  34, 35								\
+}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ ((GET_MODE_SIZE (MODE) <= 4) || (((REGNO) & 1) == 0 && (REGNO) != 0))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  (MODE1 == MODE2 || (GET_MODE_SIZE (MODE1) <= 4 && GET_MODE_SIZE (MODE2) <= 4))
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+   
+enum reg_class
+{
+  NO_REGS, EVEN_REGS, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "EVEN_REGS", "GENERAL_REGS", "ALL_REGS", "LIM_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS                     \
+{                                              \
+  { 0x00000000,0x0 }, /* NO_REGS      */       \
+  { 0x55555554,0x0 }, /* EVEN_REGS */          \
+  { 0xfffffffe,0x0 }, /* GENERAL_REGS */       \
+  { 0xffffffff,0x0 }, /* ALL_REGS      */      \
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)  ((REGNO == CC_REGNUM || REGNO == FCC_REGNUM) ? NO_REGS : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS  GENERAL_REGS
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+ 
+#define REGNO_OK_FOR_BASE_P(regno)             \
+  (((regno) < FIRST_PSEUDO_REGISTER            \
+    && (regno) != CC_REGNUM                    \
+    && (regno) != FCC_REGNUM)                  \
+   || reg_renumber[regno] >= 0)
+
+#define REGNO_OK_FOR_INDEX_P(regno) 0
+
+/* Convenience wrappers around insn_const_int_ok_for_constraint.  */
+
+#define CONST_OK_FOR_I(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_I)
+#define CONST_OK_FOR_J(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_J)
+#define CONST_OK_FOR_K(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_K)
+#define CONST_OK_FOR_L(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_L)
+#define CONST_OK_FOR_M(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_M)
+#define CONST_OK_FOR_N(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_N)
+#define CONST_OK_FOR_O(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_O)
+#define CONST_OK_FOR_W(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_W)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* Is equal to the size of the saved fp + pc, even if an fp isn't
+   saved since the value is used before we know.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM SP_REGNUM
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 34
+
+/* Register containing return address from latest function call.  */
+#define LINK_POINTER_REGNUM LP_REGNUM
+     
+/* On some machines the offset between the frame pointer and starting
+   offset of the automatic variables is not known until after register
+   allocation has been done (for example, because the saved registers
+   are between these two locations).  On those machines, define
+   `FRAME_POINTER_REGNUM' the number of a special, fixed register to
+   be used internally until the offset is known, and define
+   `HARD_FRAME_POINTER_REGNUM' to be actual the hard register number
+   used for the frame pointer.
+
+   You should define this macro only in the very rare circumstances
+   when it is not possible to calculate the offset between the frame
+   pointer and the automatic variables until after register
+   allocation has been completed.  When this macro is defined, you
+   must also indicate in your definition of `ELIMINABLE_REGS' how to
+   eliminate `FRAME_POINTER_REGNUM' into either
+   `HARD_FRAME_POINTER_REGNUM' or `STACK_POINTER_REGNUM'.
+
+   Do not define this macro if it would be the same as
+   `FRAME_POINTER_REGNUM'.  */
+#undef  HARD_FRAME_POINTER_REGNUM 
+#define HARD_FRAME_POINTER_REGNUM 29
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 35
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM 20
+
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  If
+   it is not defined, the only elimination attempted by the compiler
+   is to replace references to the frame pointer with references to
+   the stack pointer.
+
+   The definition of this macro is a list of structure
+   initializations, each of which specifies an original and
+   replacement register.
+
+   On some machines, the position of the argument pointer is not
+   known until the compilation is completed.  In such a case, a
+   separate hard register must be used for the argument pointer.
+   This register can be eliminated by replacing it with either the
+   frame pointer or the argument pointer, depending on whether or not
+   the frame pointer has been eliminated.
+
+   In this case, you might specify:
+        #define ELIMINABLE_REGS  \
+        {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+         {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+         {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+   Note that the elimination of the argument pointer with the stack
+   pointer is specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS							\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }}			\
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+{									\
+  if ((FROM) == FRAME_POINTER_REGNUM)					\
+    (OFFSET) = get_frame_size () + crtl->outgoing_args_size;	\
+  else if ((FROM) == ARG_POINTER_REGNUM)				\
+   (OFFSET) = compute_frame_size (get_frame_size (), (long *)0);	\
+  else									\
+    gcc_unreachable ();							\
+}
+
+/* Keep the stack pointer constant throughout the function.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define RETURN_ADDR_RTX(COUNT, FP) v850_return_addr (COUNT)
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+
+#define CUMULATIVE_ARGS struct cum_arg
+struct cum_arg { int nbytes; };
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  do { (CUM).nbytes = 0; } while (0)
+
+/* When a parameter is passed in a register, stack space is still
+   allocated for it.  */
+#define REG_PARM_STACK_SPACE(DECL) 0
+
+/* 1 if N is a possible register number for function argument passing.  */
+
+#define FUNCTION_ARG_REGNO_P(N) (N >= 6 && N <= 9)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) \
+  gen_rtx_REG (MODE, 10)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define this macro as a C expression that is nonzero for registers
+   used by the epilogue or the `return' pattern.  */
+
+#define EPILOGUE_USES(REGNO) \
+  (reload_completed && (REGNO) == LINK_POINTER_REGNUM)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) ;
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE 24
+
+/* Addressing modes, and classification of registers for them.  */
+
+
+/* 1 if X is an rtx for a constant that is a valid address.  */
+
+/* ??? This seems too exclusive.  May get better code by accepting more
+   possibilities here, in particular, should accept ZDA_NAME SYMBOL_REFs.  */
+
+#define CONSTANT_ADDRESS_P(X) constraint_satisfied_p (X, CONSTRAINT_K)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) 1
+#define REG_OK_FOR_INDEX_P_STRICT(X) 0
+#define REG_OK_FOR_BASE_P_STRICT(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#define STRICT 0
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#define STRICT 1
+
+#endif
+
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS,
+   except for CONSTANT_ADDRESS_P which is actually
+   machine-independent.  */
+
+/* Accept either REG or SUBREG where a register is valid.  */
+  
+#define RTX_OK_FOR_BASE_P(X)						\
+  ((REG_P (X) && REG_OK_FOR_BASE_P (X))					\
+   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))			\
+       && REG_OK_FOR_BASE_P (SUBREG_REG (X))))
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)				\
+do {									\
+  if (RTX_OK_FOR_BASE_P (X)) 						\
+    goto ADDR;								\
+  if (CONSTANT_ADDRESS_P (X)						\
+      && (MODE == QImode || INTVAL (X) % 2 == 0)			\
+      && (GET_MODE_SIZE (MODE) <= 4 || INTVAL (X) % 4 == 0))		\
+    goto ADDR;								\
+  if (GET_CODE (X) == LO_SUM						\
+      && REG_P (XEXP (X, 0))						\
+      && REG_OK_FOR_BASE_P (XEXP (X, 0))				\
+      && CONSTANT_P (XEXP (X, 1))					\
+      && (GET_CODE (XEXP (X, 1)) != CONST_INT				\
+	  || ((MODE == QImode || INTVAL (XEXP (X, 1)) % 2 == 0)		\
+	      && CONST_OK_FOR_K (INTVAL (XEXP (X, 1)))))		\
+      && GET_MODE_SIZE (MODE) <= GET_MODE_SIZE (word_mode))		\
+    goto ADDR;								\
+  if (special_symbolref_operand (X, MODE)				\
+      && (GET_MODE_SIZE (MODE) <= GET_MODE_SIZE (word_mode)))		\
+     goto ADDR;								\
+  if (GET_CODE (X) == PLUS						\
+      && RTX_OK_FOR_BASE_P (XEXP (X, 0)) 				\
+      && constraint_satisfied_p (XEXP (X,1), CONSTRAINT_K)		\
+      && ((MODE == QImode || INTVAL (XEXP (X, 1)) % 2 == 0)		\
+	   && CONST_OK_FOR_K (INTVAL (XEXP (X, 1)) 			\
+                              + (GET_MODE_NUNITS (MODE) * UNITS_PER_WORD)))) \
+    goto ADDR;			\
+} while (0)
+
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.
+
+   For floating-point equality comparisons, CCFPEQmode should be used.
+   VOIDmode should be used in all other cases.
+
+   For integer comparisons against zero, reduce to CCNOmode or CCZmode if
+   possible, to allow for more combinations.  */
+
+#define SELECT_CC_MODE(OP, X, Y)       v850_select_cc_mode (OP, X, Y)
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  No extra ones are needed for the VAX.  */
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define CC_OVERFLOW_UNUSABLE 0x200
+#define CC_NO_CARRY CC_NO_OVERFLOW
+#define NOTICE_UPDATE_CC(EXP, INSN) notice_update_cc(EXP, INSN)
+
+/* Nonzero if access to memory by bytes or half words is no faster
+   than accessing full words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* According expr.c, a value of around 6 should minimize code size, and
+   for the V850 series, that's our primary concern.  */
+#define MOVE_RATIO(speed) 6
+
+/* Indirect calls are expensive, never turn a direct call
+   into an indirect call.  */
+#define NO_FUNCTION_CSE
+
+/* The four different data regions on the v850.  */
+typedef enum 
+{
+  DATA_AREA_NORMAL,
+  DATA_AREA_SDA,
+  DATA_AREA_TDA,
+  DATA_AREA_ZDA
+} v850_data_area;
+
+#define TEXT_SECTION_ASM_OP  "\t.section .text"
+#define DATA_SECTION_ASM_OP  "\t.section .data"
+#define BSS_SECTION_ASM_OP   "\t.section .bss"
+#define SDATA_SECTION_ASM_OP "\t.section .sdata,\"aw\""
+#define SBSS_SECTION_ASM_OP  "\t.section .sbss,\"aw\""
+
+#define SCOMMON_ASM_OP 	       "\t.scomm\t"
+#define ZCOMMON_ASM_OP 	       "\t.zcomm\t"
+#define TCOMMON_ASM_OP 	       "\t.tcomm\t"
+
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* This says how to output the assembler to define a global
+   uninitialized but not common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+#undef  ASM_OUTPUT_ALIGNED_BSS 
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  v850_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This says how to output the assembler to define a global
+   uninitialized, common symbol.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#undef  ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+     v850_output_common (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This says how to output the assembler to define a local
+   uninitialized symbol.  */
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#undef  ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+     v850_output_local (FILE, DECL, NAME, SIZE, ALIGN)
+     
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+#define ASM_PN_FORMAT "%s___%lu"
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+  do { assemble_name(FILE, NAME1); 	 \
+       fputs(" = ", FILE);		 \
+       assemble_name(FILE, NAME2);	 \
+       fputc('\n', FILE); } while (0)
+
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES                                         \
+{  "r0",  "r1",  "r2",  "sp",  "gp",  "r5",  "r6" , "r7",      \
+   "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",      \
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",      \
+  "r24", "r25", "r26", "r27", "r28", "r29",  "ep", "r31",      \
+  "psw", "fcc",      \
+  ".fp", ".ap"}
+
+/* Register numbers */
+
+#define ADDITIONAL_REGISTER_NAMES              \
+{ { "zero",    ZERO_REGNUM },                  \
+  { "hp",      2 },                            \
+  { "r3",      3 },                            \
+  { "r4",      4 },                            \
+  { "tp",      5 },                            \
+  { "fp",      29 },                           \
+  { "r30",     30 },                           \
+  { "lp",      LP_REGNUM} }
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t%s .L%d\n",					\
+	   (TARGET_BIG_SWITCH ? ".long" : ".short"), VALUE)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+/* Disable the shift, which is for the currently disabled "switch"
+   opcode.  Se casesi in v850.md.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) 		\
+  fprintf (FILE, "\t%s %s.L%d-.L%d%s\n",				\
+	   (TARGET_BIG_SWITCH ? ".long" : ".short"),			\
+	   (0 && ! TARGET_BIG_SWITCH && (TARGET_V850E_UP) ? "(" : ""),             \
+	   VALUE, REL,							\
+	   (0 && ! TARGET_BIG_SWITCH && (TARGET_V850E_UP) ? ")>>1" : ""))
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* We don't have to worry about dbx compatibility for the v850.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Use dwarf2 debugging info by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE   DWARF2_DEBUG
+
+#define DWARF2_FRAME_INFO          1
+#define DWARF2_UNWIND_INFO         0
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_REG (Pmode, LINK_POINTER_REGNUM)
+#define DWARF_FRAME_RETURN_COLUMN  DWARF_FRAME_REGNUM (LINK_POINTER_REGNUM)
+
+#ifndef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
+  sprintf (STRING, "*.%s%u", PREFIX, (unsigned int)(NUM))
+#endif
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (TARGET_BIG_SWITCH ? SImode : HImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* The switch instruction requires that the jump table immediately follow
+   it.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (!TARGET_JUMP_TABLES_IN_DATA_SECTION)
+
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE,PREFIX,NUM,TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), (TARGET_BIG_SWITCH ? 2 : 1));
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Byte and short loads sign extend the value to a word.  */
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX	4
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Tell compiler we want to support GHS pragmas */
+#define REGISTER_TARGET_PRAGMAS() do {				\
+  c_register_pragma ("ghs", "interrupt", ghs_pragma_interrupt);	\
+  c_register_pragma ("ghs", "section",   ghs_pragma_section);	\
+  c_register_pragma ("ghs", "starttda",  ghs_pragma_starttda);	\
+  c_register_pragma ("ghs", "startsda",  ghs_pragma_startsda);	\
+  c_register_pragma ("ghs", "startzda",  ghs_pragma_startzda);	\
+  c_register_pragma ("ghs", "endtda",    ghs_pragma_endtda);	\
+  c_register_pragma ("ghs", "endsda",    ghs_pragma_endsda);	\
+  c_register_pragma ("ghs", "endzda",    ghs_pragma_endzda);	\
+} while (0)
+
+/* enum GHS_SECTION_KIND is an enumeration of the kinds of sections that
+   can appear in the "ghs section" pragma.  These names are used to index
+   into the GHS_default_section_names[] and GHS_current_section_names[]
+   that are defined in v850.c, and so the ordering of each must remain
+   consistent. 
+
+   These arrays give the default and current names for each kind of 
+   section defined by the GHS pragmas.  The current names can be changed
+   by the "ghs section" pragma.  If the current names are null, use 
+   the default names.  Note that the two arrays have different types.
+
+   For the *normal* section kinds (like .data, .text, etc.) we do not
+   want to explicitly force the name of these sections, but would rather
+   let the linker (or at least the back end) choose the name of the 
+   section, UNLESS the user has force a specific name for these section
+   kinds.  To accomplish this set the name in ghs_default_section_names
+   to null.  */
+
+enum GHS_section_kind
+{ 
+  GHS_SECTION_KIND_DEFAULT,
+
+  GHS_SECTION_KIND_TEXT,
+  GHS_SECTION_KIND_DATA, 
+  GHS_SECTION_KIND_RODATA,
+  GHS_SECTION_KIND_BSS,
+  GHS_SECTION_KIND_SDATA,
+  GHS_SECTION_KIND_ROSDATA,
+  GHS_SECTION_KIND_TDATA,
+  GHS_SECTION_KIND_ZDATA,
+  GHS_SECTION_KIND_ROZDATA,
+
+  COUNT_OF_GHS_SECTION_KINDS  /* must be last */
+};
+
+/* The following code is for handling pragmas supported by the
+   v850 compiler produced by Green Hills Software.  This is at
+   the specific request of a customer.  */
+
+typedef struct data_area_stack_element
+{
+  struct data_area_stack_element * prev;
+  v850_data_area                   data_area; /* Current default data area.  */
+} data_area_stack_element;
+
+/* Track the current data area set by the
+   data area pragma (which can be nested).  */
+extern data_area_stack_element * data_area_stack;
+
+/* Names of the various data areas used on the v850.  */
+extern tree GHS_default_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+extern tree GHS_current_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+
+/* The assembler op to start the file.  */
+
+#define FILE_ASM_OP "\t.file\n"
+
+/* Implement ZDA, TDA, and SDA */
+
+#define EP_REGNUM 30	/* ep register number */
+
+#define SYMBOL_FLAG_ZDA		(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_FLAG_TDA		(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_FLAG_SDA		(SYMBOL_FLAG_MACH_DEP << 2)
+#define SYMBOL_REF_ZDA_P(X)	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ZDA) != 0)
+#define SYMBOL_REF_TDA_P(X)	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_TDA) != 0)
+#define SYMBOL_REF_SDA_P(X)	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SDA) != 0)
+
+#define TARGET_ASM_INIT_SECTIONS v850_asm_init_sections
+
+/* Define this so that the cc1plus will not think that system header files
+   need an implicit 'extern "C" { ... }' assumed.  This breaks testing C++
+   in a build directory where the libstdc++ header files are found via a
+   -isystem <path-to-build-dir>.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = v850_adjust_insn_length ((INSN), (LENGTH)))
+
+#endif /* ! GCC_V850_H */
diff --git a/gcc-4.9/gcc/config/v850/v850.md b/gcc-4.9/gcc/config/v850/v850.md
new file mode 100644
index 000000000..341aae45d
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850.md
@@ -0,0 +1,3115 @@
+;; GCC machine description for NEC V850
+;; Copyright (C) 1996-2014 Free Software Foundation, Inc.
+;; Contributed by Jeff Law (law@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; The V851 manual states that the instruction address space is 16M;
+;; the various branch/call instructions only have a 22bit offset (4M range).
+;;
+;; One day we'll probably need to handle calls to targets more than 4M
+;; away.
+
+;; The size of instructions in bytes.
+
+;;---------------------------------------------------------------------------
+;; Constants
+
+;;
+(define_constants
+  [(ZERO_REGNUM            	0)          ; constant zero
+   (SP_REGNUM      		3)          ; Stack Pointer
+   (GP_REGNUM      		4)          ; GP Pointer
+   (EP_REGNUM      		30)         ; EP pointer
+   (LP_REGNUM       		31)         ; Return address register
+   (CC_REGNUM       		32)         ; Condition code pseudo register
+   (FCC_REGNUM      		33)         ; Floating Condition code pseudo register
+   (UNSPEC_LOOP                200)         ; loop counter
+  ]
+)
+
+(define_attr "length" ""
+  (const_int 4))
+
+(define_attr "long_calls" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_LONG_CALLS")
+		       (const_string "yes")
+		       (const_string "no"))))
+	    
+;; Types of instructions (for scheduling purposes).
+
+(define_attr "type" "load,store,bit1,mult,macc,div,fpu,single,other"
+  (const_string "other"))
+
+(define_attr "cpu" "none,v850,v850e,v850e1,v850e2,v850e2v3,v850e3v5"
+  (cond [(match_test "TARGET_V850")
+	 (const_string "v850")
+	 (match_test "TARGET_V850E")
+	 (const_string "v850e")
+	 (match_test "TARGET_V850E1")
+	 (const_string "v850e1")
+	 (match_test "TARGET_V850E2")
+	 (const_string "v850e2")
+	 (match_test "TARGET_V850E2V3")
+	 (const_string "v850e2v3")
+	 (match_test "TARGET_V850E3V5")
+	 (const_string "v850e3v5")]	 
+	 (const_string "none")))
+
+;; Condition code settings.
+;; none - insn does not affect cc
+;; none_0hit - insn does not affect cc but it does modify operand 0
+;;	This attribute is used to keep track of when operand 0 changes.
+;; 	See the description of NOTICE_UPDATE_CC for more info.
+;; set_znv - sets z,n,v to usable values; c is unknown.
+;; set_zn  - sets z,n to usable values; v,c is unknown.
+;; compare - compare instruction
+;; clobber - value of cc is unknown
+(define_attr "cc" "none,none_0hit,set_z,set_zn,set_znv,compare,clobber"
+  (const_string "clobber"))
+
+;; Function units for the V850.  As best as I can tell, there's
+;; a traditional memory load/use stall as well as a stall if
+;; the result of a multiply is used too early.
+
+(define_insn_reservation "v850_other" 1
+			 (eq_attr "type" "other")
+			 "nothing")
+(define_insn_reservation "v850_mult" 2
+			 (eq_attr "type" "mult")
+			 "nothing")
+(define_insn_reservation "v850_memory" 2
+			 (eq_attr "type" "load")
+			 "nothing")
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ----------------------------------------------------------------------
+;; MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+(define_insn "sign23byte_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI
+	(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3_UP"
+  "ld.b %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+  
+(define_insn "unsign23byte_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3_UP"
+  "ld.bu %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "sign23hword_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI
+	(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3_UP"
+  "ld.h %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "unsign23hword_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3_UP"
+  "ld.hu %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23word_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W"))))]
+  "TARGET_V850E2V3_UP"
+  "ld.w %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23byte_store"
+  [(set (mem:QI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand 1 "disp23_operand" "W")))
+	(match_operand:QI 2 "register_operand" "r"))]
+  "TARGET_V850E2V3_UP"
+  "st.b %2,%1[%0]"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23hword_store"
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand 1 "disp23_operand" "W")))
+	(match_operand:HI 2 "register_operand" "r"))]
+  "TARGET_V850E2V3_UP"
+  "st.h %2,%1[%0]"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23word_store"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand 1 "disp23_operand" "W")))
+	(match_operand:SI 2 "register_operand" "r"))]
+  "TARGET_V850E2V3_UP"
+  "st.w %2,%1[%0]"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+;; movdi
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand")
+	(match_operand:DI 1 "general_operand"))]
+  "TARGET_V850E3V5_UP"
+  {
+    /* One of the ops has to be in a register or 0.  */
+    if (!register_operand (operand0, DImode)
+        && !register_operand (operand1, DImode))
+      operands[1] = copy_to_mode_reg (DImode, operand1);
+
+    if (register_operand (operand0, DImode)
+	&& (CONST_INT_P (operands[1]) || CONST_DOUBLE_P (operands[1])))
+      {
+        int i;
+
+        for (i = 0; i < UNITS_PER_WORD * 2; i += UNITS_PER_WORD)
+          emit_move_insn (simplify_gen_subreg (SImode, operands[0], DImode, i),
+                          simplify_gen_subreg (SImode, operands[1], DImode, i));
+        DONE;
+      }
+  }
+)
+
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,e!r,m")
+	(match_operand:DI 1 "nonimmediate_operand"  "r,m,e!r"))]
+  "TARGET_V850E3V5_UP
+   || (register_operand (operands[0], DImode) && register_operand (operands[1], DImode))"
+  { return v850_gen_movdi (operands); }
+  [(set_attr "length" "4,12,12")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "other,load,store")]
+)
+
+;; movqi
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  {
+    /* One of the ops has to be in a register or 0 */
+    if (!register_operand (operand0, QImode)
+	&& !reg_or_0_operand (operand1, QImode))
+      operands[1] = copy_to_mode_reg (QImode, operand1);
+  })
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,Q,r,m,m")
+	(match_operand:QI 1 "general_operand" "Jr,n,Q,Ir,m,r,I"))]
+  "register_operand (operands[0], QImode)
+   || reg_or_0_operand (operands[1], QImode)"
+{
+  return output_move_single (operands);
+}
+  [(set_attr "length" "2,4,2,2,4,4,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,load,other,load,store,store")])
+
+;; movhi
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* One of the ops has to be in a register or 0 */
+  if (!register_operand (operand0, HImode)
+      && !reg_or_0_operand (operand1, HImode))
+    operands[1] = copy_to_mode_reg (HImode, operand1);
+})
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,Q,r,m,m")
+	(match_operand:HI 1 "general_operand" "Jr,n,Q,Ir,m,r,I"))]
+  "register_operand (operands[0], HImode)
+   || reg_or_0_operand (operands[1], HImode)"
+{
+  return output_move_single (operands);
+}
+  [(set_attr "length" "2,4,2,2,4,4,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,load,other,load,store,store")])
+
+;; movsi and helpers
+
+(define_insn "*movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand 1 "immediate_operand" "i")))]
+  ""
+  "movhi hi(%1),%.,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "other")])
+
+(define_insn "*movsi_lo"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")))]
+  ""
+  "movea lo(%2),%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "other")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  {
+    /* One of the ops has to be in a register or 0 */
+    if (!register_operand (operand0, SImode)
+	&& !reg_or_0_operand (operand1, SImode))
+      operands[1] = copy_to_mode_reg (SImode, operand1);
+
+    /* Some constants, as well as symbolic operands
+       must be done with HIGH & LO_SUM patterns.  */
+    if (CONSTANT_P (operands[1])	
+	&& GET_CODE (operands[1]) != HIGH
+	&& ! (TARGET_V850E_UP)
+	&& !special_symbolref_operand (operands[1], VOIDmode)
+	&& !(GET_CODE (operands[1]) == CONST_INT
+	     && (CONST_OK_FOR_J (INTVAL (operands[1]))
+		 || CONST_OK_FOR_K (INTVAL (operands[1]))
+		 || CONST_OK_FOR_L (INTVAL (operands[1])))))
+      {
+	rtx temp;
+
+	if (reload_in_progress || reload_completed)
+          temp = operands[0];
+	else
+	  temp = gen_reg_rtx (SImode);
+
+	emit_insn (gen_rtx_SET (SImode, temp,
+				gen_rtx_HIGH (SImode, operand1)));
+	emit_insn (gen_rtx_SET (SImode, operand0,
+				gen_rtx_LO_SUM (SImode, temp, operand1)));
+	DONE;
+      }
+  })
+
+;; This is the same as the following pattern, except that it includes
+;; support for arbitrary 32-bit immediates.
+
+;; ??? This always loads addresses using hilo.  If the only use of this address
+;; was in a load/store, then we would get smaller code if we only loaded the
+;; upper part with hi, and then put the lower part in the load/store insn.
+
+(define_insn "*movsi_internal_v850e"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,Q,r,r,m,m,r")
+	(match_operand:SI 1 "general_operand" "Jr,K,L,Q,Ir,m,R,r,I,i"))]
+  "(TARGET_V850E_UP)
+   && (register_operand (operands[0], SImode)
+       || reg_or_0_operand (operands[1], SImode))"
+{
+  return output_move_single (operands);
+}
+  [(set_attr "length" "2,4,4,2,2,4,4,4,4,6")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,other,load,other,load,other,store,store,other")])
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,Q,r,r,m,m")
+	(match_operand:SI 1 "movsi_source_operand" "Jr,K,L,Q,Ir,m,R,r,I"))]
+  "register_operand (operands[0], SImode)
+   || reg_or_0_operand (operands[1], SImode)"
+{
+  return output_move_single (operands);
+}
+  [(set_attr "length" "2,4,4,2,2,4,4,4,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,other,load,other,load,store,store,other")])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,r,r,Q,r,m,m,r")
+	(match_operand:SF 1 "general_operand" "Jr,K,L,n,Q,Ir,m,r,IG,iF"))]
+  "register_operand (operands[0], SFmode)
+   || reg_or_0_operand (operands[1], SFmode)"
+{
+  return output_move_single (operands);
+}
+  [(set_attr "length" "2,4,4,8,2,2,4,4,4,8")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,other,other,load,other,load,store,store,other")])
+
+;; ----------------------------------------------------------------------
+;; TEST INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_tst1"
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "m")
+                                  (const_int 1)
+                                  (match_operand:QI 1 "const_int_operand" "n"))
+		 (const_int 0)))]
+  ""
+  "tst1 %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; This replaces ld.b;sar;andi with tst1;setf nz.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(compare (zero_extract:SI (match_operand:QI 1 "memory_operand" "")
+				  (const_int 1)
+				  (match_operand 2 "const_int_operand" ""))
+		 (const_int 0)))]
+  ""
+  [(set (cc0) (compare (zero_extract:SI (match_dup 1)
+				        (const_int 1)
+				        (match_dup 2))
+		       (const_int 0)))
+   (set (match_dup 0) (ne:SI (cc0) (const_int 0)))])
+
+(define_expand "cbranchsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "reg_or_int5_operand" "")))
+   (set (pc)
+	(if_then_else
+	      (match_operator 0 "ordered_comparison_operator" [(cc0)
+							       (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_expand "cstoresi4"
+  [(set (cc0)
+	(compare (match_operand:SI 2 "register_operand" "")
+		 (match_operand:SI 3 "reg_or_int5_operand" "")))
+   (set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator" [(cc0)
+							    (const_int 0)]))]
+  "")
+
+(define_expand "cmpsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "r,r")
+		 (match_operand:SI 1 "reg_or_int5_operand" "r,J")))]
+   ""
+  {
+    v850_compare_op0 = operands[0];
+    v850_compare_op1 = operands[1];
+    DONE;
+  })
+
+(define_insn "cmpsi_insn"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "r,r")
+		 (match_operand:SI 1 "reg_or_int5_operand" "r,J")))]
+  ""
+  "@
+  cmp %1,%0
+  cmp %1,%0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "compare")])
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+       (if_then_else (match_operator     0 "ordered_comparison_operator"
+                      [(match_operand:SF 1 "register_operand")
+                       (match_operand:SF 2 "register_operand")])
+                     (label_ref (match_operand 3 ""))
+                     (pc)))
+  (clobber (cc0))]
+  "TARGET_USE_FPU"
+{
+  enum rtx_code cond = GET_CODE (operands[0]);
+  enum machine_mode mode;
+  rtx fcc_reg;
+  rtx cc_reg;
+  rtx tmp;
+
+  v850_compare_op0 = operands[1];
+  v850_compare_op1 = operands[2];
+
+  if (GET_MODE_CLASS (GET_MODE (v850_compare_op0)) != MODE_FLOAT)
+    FAIL;
+
+  mode = v850_gen_float_compare (cond, VOIDmode, v850_compare_op0, v850_compare_op1);
+  fcc_reg = gen_rtx_REG (mode, FCC_REGNUM);
+  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+  emit_insn(gen_rtx_SET (mode, cc_reg, fcc_reg));
+  tmp = gen_rtx_fmt_ee (cond, mode, cc_reg, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+                              gen_rtx_LABEL_REF (VOIDmode, operands[3]), pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  DONE;
+})
+
+(define_insn "cstoresf4"
+  [(set (match_operand:SI   0 "register_operand" "=r")
+        (match_operator:SI  1 "ordered_comparison_operator"
+         [(match_operand:SF 2 "register_operand" "r")
+          (match_operand:SF 3 "register_operand" "r")]))]
+  "TARGET_USE_FPU"
+{
+  if (GET_CODE (operands[1]) == GT || GET_CODE (operands[1]) == GE)
+    return "cmpf.s %c1, %z2, %z3 ; trfsr ; setf nz, %0";
+  if (GET_CODE (operands[1]) == LT || GET_CODE (operands[1]) == LE)
+    return "cmpf.s %c1, %z2, %z3 ; trfsr ; setf z, %0";
+  if (GET_CODE (operands[1]) == EQ)
+    return "cmpf.s eq, %z2, %z3 ; trfsr ; setf z, %0";
+  if (GET_CODE (operands[1]) == NE)
+    return "cmpf.s neq, %z2, %z3 ; trfsr ; setf nz, %0";
+  gcc_unreachable ();
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "fpu")]
+)
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+       (if_then_else (match_operator     0 "ordered_comparison_operator"
+                      [(match_operand:DF 1 "even_reg_operand")
+                       (match_operand:DF 2 "even_reg_operand")])
+                     (label_ref (match_operand 3 ""))
+                     (pc)))
+  (clobber (cc0))]
+  "TARGET_USE_FPU"
+{
+  enum rtx_code cond = GET_CODE (operands[0]);
+  enum machine_mode mode;
+  rtx fcc_reg;
+  rtx cc_reg;
+  rtx tmp;
+
+    v850_compare_op0 = operands[1];
+    v850_compare_op1 = operands[2];
+
+  if (GET_MODE_CLASS (GET_MODE (v850_compare_op0)) != MODE_FLOAT)
+    FAIL;
+
+  mode = v850_gen_float_compare (cond, VOIDmode, v850_compare_op0, v850_compare_op1);
+  fcc_reg = gen_rtx_REG (mode, FCC_REGNUM);
+  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+  emit_insn(gen_rtx_SET (mode, cc_reg, fcc_reg));
+  tmp = gen_rtx_fmt_ee (cond, mode, cc_reg, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+                              gen_rtx_LABEL_REF (VOIDmode, operands[3]), pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  DONE;
+})
+
+(define_insn "cstoredf4"
+  [(set (match_operand:SI   0 "register_operand" "=r")
+        (match_operator:SI  1 "ordered_comparison_operator"
+         [(match_operand:DF 2 "even_reg_operand"  "r")
+          (match_operand:DF 3 "even_reg_operand" "r")]))]
+  "TARGET_USE_FPU"
+{
+  if (GET_CODE (operands[1]) == GT || GET_CODE (operands[1]) == GE)
+    return "cmpf.d %c1, %z2, %z3 ; trfsr ; setf nz, %0";
+  if (GET_CODE (operands[1]) == LT || GET_CODE (operands[1]) == LE)
+    return "cmpf.d %c1, %z2, %z3 ; trfsr ; setf z, %0";
+  if (GET_CODE (operands[1]) == EQ)
+    return "cmpf.d eq, %z2, %z3 ; trfsr ; setf z ,%0";
+  if (GET_CODE (operands[1]) == NE)
+    return "cmpf.d neq, %z2, %z3 ; trfsr ; setf nz, %0";
+  gcc_unreachable ();
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "fpu")]
+)
+
+(define_expand "cmpsf"
+  [(set (reg:CC CC_REGNUM)
+	(compare (match_operand:SF 0 "register_operand" "r")
+		 (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  {
+    v850_compare_op0 = operands[0];
+    v850_compare_op1 = operands[1];
+    DONE;
+  })
+
+(define_expand "cmpdf"
+  [(set (reg:CC CC_REGNUM)
+	(compare (match_operand:DF 0 "even_reg_operand" "r")
+		 (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  {
+    v850_compare_op0 = operands[0];
+    v850_compare_op1 = operands[1];
+    DONE;
+  })
+
+;; ----------------------------------------------------------------------
+;; ADD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,r,r")
+		 (match_operand:SI 2 "nonmemory_operand" "rJ,K,U")))
+   (clobber (reg:CC CC_REGNUM))]
+
+  ""
+  "@
+   add %2,%0
+   addi %2,%1,%0
+   addi %O2(%P2),%1,%0"
+  [(set_attr "length" "2,4,4")
+   (set_attr "cc" "set_zn,set_zn,set_zn")])
+
+;; ----------------------------------------------------------------------
+;; SUBTRACT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,r")
+		  (match_operand:SI 2 "register_operand" "r,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  sub %2,%0
+  subr %1,%0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "subr %.,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; MULTIPLY INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+	  (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" ""))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	emit_insn (gen_mulhisi3_internal2 (operands[0], operands[1], operands[2]));
+	DONE;
+      }
+  })
+
+(define_insn "*mulhisi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+	  (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "mulh %2,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "mult")])
+
+(define_insn "mulhisi3_internal2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" "%0,r"))
+	  (match_operand:HI 2 "const_int_operand" "J,K")))]
+  ""
+  "@
+   mulh %2,%0
+   mulhi %2,%1,%0"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "none_0hit,none_0hit")
+   (set_attr "type" "mult")])
+
+;; ??? The scheduling info is probably wrong.
+
+;; ??? This instruction can also generate the 32-bit highpart, but using it
+;; may increase code size counter to the desired result.
+
+;; ??? This instructions can also give a DImode result.
+
+;; ??? There is unsigned version, but it matters only for the DImode/highpart
+;; results.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "reg_or_int9_operand" "rO")))]
+  "(TARGET_V850E_UP)"
+  "mul %2,%1,%."
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "mult")])
+
+;; ----------------------------------------------------------------------
+;; DIVIDE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; ??? These insns do set the Z/N condition codes, except that they are based
+;; on only one of the two results, so it doesn't seem to make sense to use
+;; them.
+
+;; ??? The scheduling info is probably wrong.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "register_operand" "=r")
+	(mod:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E_UP"
+{
+  if (TARGET_V850E2_UP)
+    return "divq %2,%0,%3";
+   else
+    return "div %2,%0,%3";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+	
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "register_operand" "=r")
+	(umod:SI (match_dup 1)
+		 (match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E_UP"
+{
+  if (TARGET_V850E2_UP)
+    return "divqu %2,%0,%3";
+  else
+    return "divu %2,%0,%3";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+	
+;; ??? There is a 2 byte instruction for generating only the quotient.
+;; However, it isn't clear how to compute the length field correctly.
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(div:HI (match_operand:HI 1 "register_operand" "0")
+		(match_operand:HI 2 "register_operand" "r")))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(mod:HI (match_dup 1)
+		(match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E_UP"
+  "divh %2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+
+;; Half-words are sign-extended by default, so we must zero extend to a word
+;; here before doing the divide.
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(udiv:HI (match_operand:HI 1 "register_operand" "0")
+		 (match_operand:HI 2 "register_operand" "r")))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(umod:HI (match_dup 1)
+		 (match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E_UP"
+  "zxh %0 ; divhu %2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+
+;; ----------------------------------------------------------------------
+;; AND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_clr1_1"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(subreg:QI
+	  (and:SI (subreg:SI (match_dup 0) 0)
+		  (match_operand:QI 1 "not_power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+{
+  rtx xoperands[2];
+  xoperands[0] = operands[0];
+  xoperands[1] = GEN_INT (~INTVAL (operands[1]) & 0xff);
+  output_asm_insn ("clr1 %M1,%0", xoperands);
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_clr1_2"
+  [(set (match_operand:HI 0 "indirect_operand" "=m")
+	(subreg:HI
+	  (and:SI (subreg:SI (match_dup 0) 0)
+		  (match_operand:HI 1 "not_power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+{
+  int log2 = exact_log2 (~INTVAL (operands[1]) & 0xffff);
+
+  rtx xoperands[2];
+  xoperands[0] = gen_rtx_MEM (QImode,
+			      plus_constant (Pmode, XEXP (operands[0], 0),
+					     log2 / 8));
+  xoperands[1] = GEN_INT (log2 % 8);
+  output_asm_insn ("clr1 %1,%0", xoperands);
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_clr1_3"
+  [(set (match_operand:SI 0 "indirect_operand" "=m")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "not_power_of_two_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+{
+  int log2 = exact_log2 (~INTVAL (operands[1]) & 0xffffffff);
+
+  rtx xoperands[2];
+  xoperands[0] = gen_rtx_MEM (QImode,
+			      plus_constant (Pmode, XEXP (operands[0], 0),
+					     log2 / 8));
+  xoperands[1] = GEN_INT (log2 % 8);
+  output_asm_insn ("clr1 %1,%0", xoperands);
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,I,M")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  and %2,%0
+  and %.,%0
+  andi %2,%1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; OR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_set1_1"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(subreg:QI (ior:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "set1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_set1_2"
+  [(set (match_operand:HI 0 "indirect_operand" "=m")
+	(subreg:HI (ior:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))]
+  ""
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return "set1 %M1,%0";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (Pmode, XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn ("set1 %1,%0", xoperands);
+    }
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_set1_3"
+  [(set (match_operand:SI 0 "indirect_operand" "=m")
+	(ior:SI (match_dup 0)
+		(match_operand 1 "power_of_two_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return "set1 %M1,%0";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (Pmode, XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn ("set1 %1,%0", xoperands);
+    }
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,I,M")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  or %2,%0
+  or %.,%0
+  ori %2,%1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; XOR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_not1_1"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(subreg:QI (xor:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "not1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_not1_2"
+  [(set (match_operand:HI 0 "indirect_operand" "=m")
+	(subreg:HI (xor:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))]
+  ""
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return "not1 %M1,%0";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (Pmode, XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn ("not1 %1,%0", xoperands);
+    }
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_not1_3"
+  [(set (match_operand:SI 0 "indirect_operand" "=m")
+	(xor:SI (match_dup 0)
+		(match_operand 1 "power_of_two_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return "not1 %M1,%0";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (Pmode, XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn ("not1 %1,%0", xoperands);
+    }
+  return "";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,I,M")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  xor %2,%0
+  xor %.,%0
+  xori %2,%1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; NOT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "not %1,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+;; -----------------------------------------------------------------
+;; BIT FIELDS
+;; -----------------------------------------------------------------
+
+;; ??? Is it worth defining insv and extv for the V850 series?!?
+
+;; An insv pattern would be useful, but does not get used because
+;; store_bit_field never calls insv when storing a constant value into a
+;; single-bit bitfield.
+
+;; extv/extzv patterns would be useful, but do not get used because
+;; optimize_bitfield_compare in fold-const usually converts single
+;; bit extracts into an AND with a mask.
+
+(define_insn "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "immediate_operand" "n")
+			 (match_operand:SI 2 "immediate_operand" "n"))
+	(match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_V850E3V5_UP"
+  "bins %3, %2, %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")]
+)
+
+;; -----------------------------------------------------------------
+;; Scc INSTRUCTIONS
+;; -----------------------------------------------------------------
+
+(define_insn "*setcc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operator:SI 1 "comparison_operator"
+	 [(cc0) (const_int 0)]))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    return 0;
+
+  return "setf %c1,%0";
+}
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "setf_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+                          [(reg:CC CC_REGNUM) (const_int 0)]))]
+  ""
+  "setf %b1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "set_z_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand 1 "v850_float_z_comparison_operator" ""))]
+  "TARGET_V850E2V3_UP"
+  "setf z,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "set_nz_insn" 
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand 1 "v850_float_nz_comparison_operator" ""))]
+  "TARGET_V850E2V3_UP"
+  "setf nz,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+;; ----------------------------------------------------------------------
+;; CONDITIONAL MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Instructions using cc0 aren't allowed to have input reloads, so we must
+;; hide the fact that this instruction uses cc0.  We do so by including the
+;; compare instruction inside it.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operand 1 "comparison_operator")
+	 (match_operand:SI 2 "reg_or_const_operand" "rJ")
+	 (match_operand:SI 3 "reg_or_const_operand" "rI")))]
+  "(TARGET_V850E_UP)"
+  {
+    /* Make sure that we have an integer comparison...  */
+    if (GET_MODE (XEXP (operands[1], 0)) != CCmode
+        && GET_MODE (XEXP (operands[1], 0)) != SImode)
+      FAIL;
+
+    if ((GET_CODE (operands[2]) == CONST_INT
+	&& GET_CODE (operands[3]) == CONST_INT))
+      {
+	int o2 = INTVAL (operands[2]);
+	int o3 = INTVAL (operands[3]);
+
+	if (o2 == 1 && o3 == 0)
+	  FAIL;   /* setf */
+	if (o3 == 1 && o2 == 0)
+	  FAIL;   /* setf */
+	if (o2 == 0 && (o3 < -16 || o3 > 15) && exact_log2 (o3) >= 0)
+	  FAIL;   /* setf + shift */
+	if (o3 == 0 && (o2 < -16 || o2 > 15) && exact_log2 (o2) >=0)
+	  FAIL;   /* setf + shift */
+	if (o2 != 0)
+	  operands[2] = copy_to_mode_reg (SImode, operands[2]);
+	if (o3 !=0 )
+	  operands[3] = copy_to_mode_reg (SImode, operands[3]);
+      }
+    else
+      {
+	if (GET_CODE (operands[2]) != REG)
+	  operands[2] = copy_to_mode_reg (SImode,operands[2]);
+	if (GET_CODE (operands[3]) != REG)
+	  operands[3] = copy_to_mode_reg (SImode, operands[3]);
+      }
+  })
+
+;; ??? Clobbering the condition codes is overkill.
+
+;; ??? We sometimes emit an unnecessary compare instruction because the
+;; condition codes may have already been set by an earlier instruction,
+;; but we have no code here to avoid the compare if it is unnecessary.
+
+(define_insn "movsicc_normal_cc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (if_then_else:SI
+         (match_operator 1 "comparison_operator"
+                         [(reg:CC CC_REGNUM) (const_int 0)])
+         (match_operand:SI 2 "reg_or_int5_operand" "rJ")
+         (match_operand:SI 3 "reg_or_0_operand" "rI")))]
+  "(TARGET_V850E_UP)"
+  "cmov %c1,%2,%z3,%0";
+  [(set_attr "length" "6")
+   (set_attr "cc" "compare")])
+
+(define_insn "movsicc_reversed_cc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (if_then_else:SI
+         (match_operator 1 "comparison_operator"
+                         [(reg:CC CC_REGNUM) (const_int 0)])
+         (match_operand:SI 2 "reg_or_0_operand" "rI")
+         (match_operand:SI 3 "reg_or_int5_operand" "rJ")))]
+  "(TARGET_V850E_UP)"
+  "cmov %C1,%3,%z2,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "compare")])
+
+(define_insn "*movsicc_normal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 4 "register_operand" "r")
+			  (match_operand:SI 5 "reg_or_int5_operand" "rJ")])
+	 (match_operand:SI 2 "reg_or_int5_operand" "rJ")
+	 (match_operand:SI 3 "reg_or_0_operand" "rI")))]
+  "(TARGET_V850E_UP)"
+  "cmp %5,%4 ; cmov %c1,%2,%z3,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movsicc_reversed"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 4 "register_operand" "r")
+			  (match_operand:SI 5 "reg_or_int5_operand" "rJ")])
+	 (match_operand:SI 2 "reg_or_0_operand" "rI")
+	 (match_operand:SI 3 "reg_or_int5_operand" "rJ")))]
+  "(TARGET_V850E_UP)"
+  "cmp %5,%4 ; cmov %C1,%3,%z2,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movsicc_tst1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(zero_extract:SI
+			   (match_operand:QI 2 "memory_operand" "m")
+			   (const_int 1)
+			   (match_operand 3 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (match_operand:SI 4 "reg_or_int5_operand" "rJ")
+	 (match_operand:SI 5 "reg_or_0_operand" "rI")))]
+  "(TARGET_V850E_UP)"
+  "tst1 %3,%2 ; cmov %c1,%4,%z5,%0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movsicc_tst1_reversed"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(zero_extract:SI
+			   (match_operand:QI 2 "memory_operand" "m")
+			   (const_int 1)
+			   (match_operand 3 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (match_operand:SI 4 "reg_or_0_operand" "rI")
+	 (match_operand:SI 5 "reg_or_int5_operand" "rJ")))]
+  "(TARGET_V850E_UP)"
+  "tst1 %3,%2 ; cmov %C1,%5,%z4,%0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")])
+
+;; Matching for sasf requires combining 4 instructions, so we provide a
+;; dummy pattern to match the first 3, which will always be turned into the
+;; second pattern by subsequent combining.  As above, we must include the
+;; comparison to avoid input reloads in an insn using cc0.
+
+(define_insn "*sasf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 3 "register_operand" "r")
+			  (match_operand:SI 4 "reg_or_int5_operand" "rJ")])
+	 (ashift:SI (match_operand:SI 2 "register_operand" "0")
+		    (const_int 1))))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "cmp %4,%3 ; sasf %c1,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 4 "register_operand" "")
+			  (match_operand:SI 5 "reg_or_int5_operand" "")])
+	 (match_operand:SI 2 "const_int_operand" "")
+	 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)
+   && ((INTVAL (operands[2]) ^ INTVAL (operands[3])) == 1)
+   && ((INTVAL (operands[2]) + INTVAL (operands[3])) != 1)
+   && (GET_CODE (operands[5]) == CONST_INT
+      || REGNO (operands[0]) != REGNO (operands[5]))
+   && REGNO (operands[0]) != REGNO (operands[4])"
+  [(set (match_dup 0) (match_dup 6))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (match_op_dup 7 [(match_dup 4) (match_dup 5)])
+			   (ashift:SI (match_dup 0) (const_int 1))))
+	      (clobber (reg:CC CC_REGNUM))])]
+  {
+    operands[6] = GEN_INT (INTVAL (operands[2]) >> 1);
+    if (INTVAL (operands[2]) & 0x1)
+      operands[7] = operands[1];
+    else
+      operands[7] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[1])),
+				    GET_MODE (operands[1]),
+				    XEXP (operands[1], 0), XEXP (operands[1], 1));
+  })
+
+;; ---------------------------------------------------------------------
+;; BYTE SWAP INSTRUCTIONS
+;; ---------------------------------------------------------------------
+(define_expand "rotlhi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (rotate:HI (match_operand:HI 1 "register_operand" "")
+			      (match_operand:HI 2 "const_int_operand" "")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "(TARGET_V850E_UP)"
+  {
+    if (INTVAL (operands[2]) != 8)
+      FAIL;
+  })
+
+(define_insn "*rotlhi3_8"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(rotate:HI (match_operand:HI 1 "register_operand" "r")
+		   (const_int 8)))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "bsh %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_expand "rotlsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (rotate:SI (match_operand:SI 1 "register_operand" "")
+			      (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "(TARGET_V850E_UP)"
+  {
+    if (INTVAL (operands[2]) != 16)
+      FAIL;
+  })
+
+(define_insn "rotlsi3_a"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+     (match_operator:SI 4 "ior_operator"
+       [(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "const_int_operand" "n"))
+	(lshiftrt:SI (match_dup 1)
+	(match_operand:SI 3 "const_int_operand" "n"))]))]
+  "TARGET_V850E3V5_UP && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32)"
+  "rotl %2, %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")]
+)
+
+(define_insn "rotlsi3_b"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+     (match_operator:SI 4 "ior_operator"
+       [(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 3 "const_int_operand" "n"))
+	(ashift:SI (match_dup 1)
+		   (match_operand:SI 2 "const_int_operand" "n"))]))]
+  "TARGET_V850E3V5_UP && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32)"
+  "rotl %2, %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")]
+)
+
+(define_insn "rotlsi3_v850e3v5"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "e3v5_shift_operand" "rn")))
+	      (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E3V5_UP"
+  "rotl %2, %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")]
+)
+
+(define_insn "*rotlsi3_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r")
+		   (const_int 16)))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "hsw %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; ----------------------------------------------------------------------
+;; JUMP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Doloop
+
+(define_expand "doloop_begin"
+ [(use (match_operand 0 "" ""))        ; loop pseudo
+  (use (match_operand 1 "" ""))]       ; doloop_end pattern
+  "TARGET_V850E3V5_UP && TARGET_LOOP"
+  {
+    rtx loop_cnt = operands[0];
+    gcc_assert (GET_MODE (loop_cnt) == SImode);
+    emit_insn (gen_fix_loop_counter (loop_cnt));
+    DONE;
+  }
+)
+
+(define_insn "fix_loop_counter"
+  [(unspec:SI [(match_operand:SI          0 "register_operand" "+r,!m")
+	       (clobber (match_scratch:SI 1                    "=X,r"))] UNSPEC_LOOP)]
+  "TARGET_V850E3V5_UP && TARGET_LOOP"
+  {
+    switch (which_alternative)
+    {
+    case 0:  return "add 1, %0 # LOOP_BEGIN";
+    case 1:  return "ld.w %0, %1; add 1, %1; st.w %1, %0 # LOOP_BEGIN";
+    default: gcc_unreachable ();
+    }
+  }
+  [(set_attr "length" "2,6")
+   (set_attr "cc" "none")]
+)
+
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" ""))        ; loop pseudo
+  (use (match_operand 1 "" ""))]       ; label
+  "TARGET_V850E3V5_UP && TARGET_LOOP"
+  {
+    rtx loop_cnt = operands[0];
+    rtx label    = operands[1];
+
+    if (GET_MODE (loop_cnt) != SImode)
+      FAIL;
+
+    emit_jump_insn (gen_doloop_end_internal_loop (label, loop_cnt));
+    DONE;
+  }
+)
+
+(define_insn "doloop_end_internal_loop"
+ [(set (pc)
+       (if_then_else (ne (match_operand:SI 1 "register_operand" "+r,!m")
+			 (const_int 0))
+		     (label_ref (match_operand 0 "" ""))
+		     (pc)))
+  (set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))
+  (clobber (match_scratch:SI 2 "=X,r"))
+  (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E3V5_UP && TARGET_LOOP"
+  {
+    switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 4)
+	return "loop %1, %0 # LOOP.1.0";
+
+      return "add -1, %1; bne %l0 # LOOP.1.1";
+    case 1:
+      return "ld.w %1, %2; add -1, %2; st.w %2, %1; bne %l0 # LOOP.2.1";
+    default:
+      gcc_unreachable ();
+    }
+  }
+ [(set (attr "length")
+       (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		     (const_int 65534))
+		     (const_int 4)
+		     (const_int 14)))
+  (set_attr "cc" "none")])
+
+;; Conditional jump instructions
+
+(define_insn "*branch_normal"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    return 0;
+
+  if (get_attr_length (insn) == 2)
+    return "b%b1 %l0";
+  if (TARGET_V850E3V5_UP && get_attr_length (insn) == 4)
+    return "b%b1 %l0";
+  return "b%B1 .+6 ; jr %l0";
+}
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 65536))
+		      (const_int 4)
+		      (const_int 6))))
+  (set_attr "cc" "none")])
+
+(define_insn "*branch_invert"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    return NULL;
+
+  if (get_attr_length (insn) == 2)
+    return "b%B1 %l0";
+
+  if (TARGET_V850E3V5_UP && get_attr_length (insn) == 4)
+    return "b%B1 %l0";
+    
+  return "b%b1 .+6 ; jr %l0";
+}
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 65536))
+		      (const_int 4)
+		      (const_int 6))))
+  (set_attr "cc" "none")])
+
+(define_insn "branch_z_normal"	
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_z_comparison_operator" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_V850E2V3_UP"
+{
+  if (get_attr_length (insn) == 2)
+    return "bz %l0";
+
+  if (TARGET_V850E3V5_UP && get_attr_length (insn) == 4)
+    return "bz %l0";
+
+  return "bnz 1f ; jr %l0 ; 1:";
+}
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 65536))
+		      (const_int 4)
+		      (const_int 6))))
+  (set_attr "cc" "none")])
+
+(define_insn "*branch_z_invert"
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_z_comparison_operator" "")
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_V850E2V3_UP"
+{
+  if (get_attr_length (insn) == 2)
+    return "bnz %l0";
+
+  if (TARGET_V850E3V5_UP && get_attr_length (insn) == 4)
+    return "bnz %l0";
+
+  return "bz 1f ; jr %l0 ; 1:";
+}
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+			   (const_int 256))
+		  (const_int 2)
+		  (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 65536))
+		      (const_int 4)
+		      (const_int 6))))
+  (set_attr "cc" "none")])
+
+(define_insn "branch_nz_normal"
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_nz_comparison_operator" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_V850E2V3_UP"
+{
+  if (get_attr_length (insn) == 2)
+    return "bnz %l0";
+
+  if (TARGET_V850E3V5_UP && get_attr_length (insn) == 4)
+    return "bnz %l0";
+
+  return "bz 1f ; jr %l0 ; 1:";
+}
+[(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+			   (const_int 256))
+		  (const_int 2)
+		  (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 65536))
+		      (const_int 4)
+		      (const_int 6))))
+  (set_attr "cc" "none")])
+
+(define_insn "*branch_nz_invert"
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_nz_comparison_operator" "")
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_V850E2V3_UP"
+{
+  if (get_attr_length (insn) == 2)
+    return "bz %l0";
+
+  if (TARGET_V850E3V5_UP && get_attr_length (insn) == 4)
+    return "bz %l0";
+
+  return "bnz 1f ; jr %l0 ; 1:";
+}
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 65536))
+		      (const_int 4)
+		      (const_int 6))))
+  (set_attr "cc" "none")])
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+{
+ if (get_attr_length (insn) == 2)
+    return "br %0";
+  else
+    return "jr %0";
+}
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (const_int 4)))
+  (set_attr "cc" "none")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp  %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "switch"
+  [(set (pc)
+	(plus:SI
+	 (sign_extend:SI
+	 (mem:HI
+	  (plus:SI (ashift:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1))
+		   (label_ref (match_operand 1 "" "")))))
+	(label_ref (match_dup 1))))]
+  "(TARGET_V850E_UP)"
+  "switch %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand 3 "" "") (match_operand 4 "" "")]
+  ""
+  {
+    rtx reg = gen_reg_rtx (SImode);
+    rtx tableaddress = gen_reg_rtx (SImode);
+    rtx test;
+    rtx mem;
+
+    /* Subtract the lower bound from the index.  */
+    emit_insn (gen_subsi3 (reg, operands[0], operands[1]));
+
+    /* Compare the result against the number of table entries;
+       branch to the default label if out of range of the table.  */
+    test = gen_rtx_fmt_ee (GTU, VOIDmode, reg, operands[2]);
+    emit_jump_insn (gen_cbranchsi4 (test, reg, operands[2], operands[4]));
+
+    /* Shift index for the table array access.  */
+    emit_insn (gen_ashlsi3 (reg, reg, GEN_INT (TARGET_BIG_SWITCH ? 2 : 1)));
+    /* Load the table address into a pseudo.  */
+    emit_insn (gen_movsi (tableaddress,
+			  gen_rtx_LABEL_REF (Pmode, operands[3])));
+    /* Add the table address to the index.  */
+    emit_insn (gen_addsi3 (reg, reg, tableaddress));
+    /* Load the table entry.  */
+    mem = gen_const_mem (CASE_VECTOR_MODE, reg);
+    if (! TARGET_BIG_SWITCH)
+      {
+	rtx reg2 = gen_reg_rtx (HImode);
+	emit_insn (gen_movhi (reg2, mem));
+	emit_insn (gen_extendhisi2 (reg, reg2));
+      }
+    else
+      emit_insn (gen_movsi (reg, mem));
+    /* Add the table address.  */
+    emit_insn (gen_addsi3 (reg, reg, tableaddress));
+    /* Branch to the switch label.  */
+    emit_jump_insn (gen_tablejump (reg, operands[3]));
+    DONE;
+  })
+
+;; Call subroutine with no return value.
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand" "")
+	 (match_operand:SI 1 "general_operand" ""))]
+  ""
+  {
+    if (! call_address_operand (XEXP (operands[0], 0), QImode)
+	|| TARGET_LONG_CALLS)
+      XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
+    if (TARGET_LONG_CALLS)
+      emit_call_insn (gen_call_internal_long (XEXP (operands[0], 0), operands[1]));
+    else
+      emit_call_insn (gen_call_internal_short (XEXP (operands[0], 0), operands[1]));
+  
+    DONE;
+  })
+
+(define_insn "call_internal_short"
+  [(call (mem:QI (match_operand:SI 0 "call_address_operand" "S,r"))
+	 (match_operand:SI 1 "general_operand" "g,g"))
+   (clobber (reg:SI 31))]
+  "! TARGET_LONG_CALLS"
+  {
+    if (which_alternative == 1)
+      {
+        if (TARGET_V850E3V5_UP)
+	  return "jarl [%0], r31";
+
+        return "jarl .+4, r31 ; add 4, r31 ; jmp %0";
+      }
+
+    return "jarl %0, r31";
+  }
+  [(set_attr "length" "4,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+(define_insn "call_internal_long"
+  [(call (mem:QI (match_operand:SI 0 "call_address_operand" "S,r"))
+	 (match_operand:SI 1 "general_operand" "g,g"))
+   (clobber (reg:SI 31))]
+  "TARGET_LONG_CALLS"
+{
+  if (which_alternative == 0)
+    {
+      if (GET_CODE (operands[0]) == REG)
+        return "jarl %0,r31";
+
+      if (TARGET_V850E3V5_UP)
+	return "mov hilo(%0), r11 ; jarl [r11], r31";
+
+      return "movhi hi(%0), r0, r11 ; movea lo(%0), r11, r11 ; jarl .+4,r31 ; add 4, r31 ; jmp r11";
+    }
+
+  if (TARGET_V850E3V5_UP)
+    return "jarl [%0], r31";
+
+  return "jarl .+4,r31 ; add 4,r31 ; jmp %0";
+}
+  [(set_attr "length" "16,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "general_operand" "")
+	      (match_operand:SI 2 "general_operand" "")))]
+  ""
+  {
+    if (! call_address_operand (XEXP (operands[1], 0), QImode)
+	|| TARGET_LONG_CALLS)
+      XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
+    if (TARGET_LONG_CALLS)
+      emit_call_insn (gen_call_value_internal_long (operands[0],
+	 					    XEXP (operands[1], 0),
+						    operands[2]));
+    else
+      emit_call_insn (gen_call_value_internal_short (operands[0],
+	 					     XEXP (operands[1], 0),
+						     operands[2]));
+    DONE;
+  })
+
+(define_insn "call_value_internal_short"
+  [(set (match_operand 0 "" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "call_address_operand" "S,r"))
+	      (match_operand:SI 2 "general_operand" "g,g")))
+   (clobber (reg:SI 31))]
+  "! TARGET_LONG_CALLS"
+  {
+    if (which_alternative == 1)
+      {
+        if (TARGET_V850E3V5_UP)
+          return "jarl [%1], r31";
+
+        return "jarl .+4, r31 ; add 4, r31 ; jmp %1";
+      }
+
+    return "jarl %1, r31";
+  }
+  [(set_attr "length" "4,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+(define_insn "call_value_internal_long"
+  [(set (match_operand 0 "" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "call_address_operand" "S,r"))
+	      (match_operand:SI 2 "general_operand" "g,g")))
+   (clobber (reg:SI 31))]
+  "TARGET_LONG_CALLS"
+{
+  if (which_alternative == 0)
+    {
+      if (GET_CODE (operands[1]) == REG)
+        return "jarl %1, r31";
+
+      /* Reload can generate this pattern....  */
+      if (TARGET_V850E3V5_UP)
+        return "mov hilo(%1), r11 ; jarl [r11], r31";
+
+      return "movhi hi(%1), r0, r11 ; movea lo(%1), r11, r11 ; jarl .+4, r31 ; add 4, r31 ; jmp r11";
+    }
+  
+  if (TARGET_V850E3V5_UP)
+    return "jarl [%1], r31";
+
+  return "jarl .+4, r31 ; add 4, r31 ; jmp %1";
+}
+  [(set_attr "length" "16,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; ----------------------------------------------------------------------
+;; EXTEND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*zero_extendhisi2_v850e"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(zero_extend:SI
+        (match_operand:HI 1 "nonimmediate_operand" "0,r,T,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "@
+   zxh %0
+   andi 65535,%1,%0
+   sld.hu %1,%0
+   ld.hu %1,%0"
+  [(set_attr "length" "2,4,2,4")
+   (set_attr "cc" "none_0hit,set_zn,none_0hit,none_0hit")])
+
+(define_insn "*zero_extendhisi2_v850"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(match_operand:HI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]  ;; A lie, but we have to match the expander
+  ""
+  "andi 65535,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "zero_extendhisi2"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (zero_extend:SI
+		    (match_operand:HI 1 "nonimmediate_operand")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  {
+    if (! (TARGET_V850E_UP))
+      operands[1] = force_reg (HImode, operands[1]);
+  })
+
+(define_insn "*zero_extendqisi2_v850e"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(zero_extend:SI
+	(match_operand:QI 1 "nonimmediate_operand" "0,r,T,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "@
+   zxb %0
+   andi 255,%1,%0
+   sld.bu %1,%0
+   ld.bu %1,%0"
+  [(set_attr "length" "2,4,2,4")
+   (set_attr "cc" "none_0hit,set_zn,none_0hit,none_0hit")])
+
+(define_insn "*zero_extendqisi2_v850"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	  (match_operand:QI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))] ;; A lie, but we have to match the expander
+  ""
+  "andi 255,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "zero_extendqisi2"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (zero_extend:SI
+		     (match_operand:QI 1 "nonimmediate_operand")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  {
+    if (! (TARGET_V850E_UP))
+      operands[1] = force_reg (QImode, operands[1]);
+  })
+
+;;- sign extension instructions
+
+;; ??? The extendhisi2 pattern should not emit shifts for v850e?
+
+(define_insn "*extendhisi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,Q,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "@
+   sxh %0
+   sld.h %1,%0
+   ld.h %1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit")])
+
+;; ??? This is missing a sign extend from memory pattern to match the ld.h
+;; instruction.
+
+(define_expand "extendhisi2"
+  [(parallel [(set (match_dup 2)
+		   (ashift:SI (match_operand:HI 1 "register_operand" "")
+			      (const_int 16)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashiftrt:SI (match_dup 2)
+				(const_int 16)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  {
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_reg_rtx (SImode);
+  })
+
+;; ??? The extendqisi2 pattern should not emit shifts for v850e?
+
+(define_insn "*extendqisi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,Q,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E_UP)"
+  "@
+   sxb %0
+   sld.b %1,%0
+   ld.b %1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit")])
+
+;; ??? This is missing a sign extend from memory pattern to match the ld.b
+;; instruction.
+
+(define_expand "extendqisi2"
+  [(parallel [(set (match_dup 2)
+		   (ashift:SI (match_operand:QI 1 "register_operand" "")
+			      (const_int 24)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashiftrt:SI (match_dup 2)
+			      (const_int 24)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  {
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_reg_rtx (SImode);
+  })
+
+;; ----------------------------------------------------------------------
+;; SHIFTS
+;; ----------------------------------------------------------------------
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+      (ashift:SI
+	(match_operand:SI 1 "register_operand" "0,0")
+	(match_operand:SI 2 "nonmemory_operand" "r,N")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  shl %2,%0
+  shl %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "ashlsi3_v850e2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (ashift:SI
+	(match_operand:SI 1 "register_operand" "r")
+	(match_operand:SI 2 "nonmemory_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_UP"
+  "shl %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+      (lshiftrt:SI
+	(match_operand:SI 1 "register_operand" "0,0")
+        (match_operand:SI 2 "nonmemory_operand" "r,N")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  shr %2,%0
+  shr %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "lshrsi3_v850e2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (lshiftrt:SI
+	(match_operand:SI 1 "register_operand" "r")
+	(match_operand:SI 2 "nonmemory_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_UP"
+  "shr %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+      (ashiftrt:SI
+	(match_operand:SI 1 "register_operand" "0,0")
+	(match_operand:SI 2 "nonmemory_operand" "r,N")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  sar %2,%0
+  sar %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "cc" "set_zn, set_zn")])
+
+(define_insn "ashrsi3_v850e2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (ashiftrt:SI
+	(match_operand:SI 1 "register_operand" "r")
+	(match_operand:SI 2 "nonmemory_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_UP"
+  "sar %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; FIND FIRST BIT INSTRUCTION
+;; ----------------------------------------------------------------------
+
+(define_insn "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ffs:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_UP"
+  "sch1r %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; ----------------------------------------------------------------------
+;; PROLOGUE/EPILOGUE
+;; ----------------------------------------------------------------------
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  {
+    expand_prologue ();
+    DONE;
+  })
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  {
+    expand_epilogue ();
+    DONE;
+  })
+
+(define_insn "return_simple"
+  [(return)]
+  "reload_completed"
+  "jmp [r31]"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (reg:SI 31))]
+  ""
+  "jmp [r31]"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; ----------------------------------------------------------------------
+;; v850e2V3 floating-point hardware support
+;; ----------------------------------------------------------------------
+
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(plus:SF (match_operand:SF 1 "register_operand" "r")
+		 (match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "addf.s %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(plus:DF (match_operand:DF 1 "even_reg_operand" "r")
+	(match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "addf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(minus:SF (match_operand:SF 1 "register_operand" "r")
+		  (match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "subf.s %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(minus:DF (match_operand:DF 1 "even_reg_operand" "r")
+		  (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "subf.d %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(mult:SF (match_operand:SF 1 "register_operand" "r")
+		 (match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "mulf.s %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(mult:DF (match_operand:DF 1 "even_reg_operand" "r")
+		 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "mulf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(div:SF (match_operand:SF 1 "register_operand" "r")
+		(match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "divf.s %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(div:DF (match_operand:DF 1 "even_reg_operand" "r")
+		(match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "divf.d %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "minsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(smin:SF (match_operand:SF 1 "reg_or_0_operand" "r")
+		 (match_operand:SF 2 "reg_or_0_operand" "r")))]
+  "TARGET_USE_FPU"
+  "minf.s %z1,%z2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "mindf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(smin:DF (match_operand:DF 1 "even_reg_operand" "r")
+		 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "minf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "maxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(smax:SF (match_operand:SF 1 "reg_or_0_operand" "r")
+		 (match_operand:SF 2 "reg_or_0_operand" "r")))]
+  "TARGET_USE_FPU"
+  "maxf.s %z1,%z2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "maxdf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(smax:DF (match_operand:DF 1 "even_reg_operand" "r")
+		 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "maxf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "absf.s %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(abs:DF (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "absf.d %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "negf.s %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(neg:DF (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "negf.d %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; square-root
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "sqrtf.s %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(sqrt:DF (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "sqrtf.d %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; float -> int
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.sw %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI                  0 "register_operand" "=r")
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.suw %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")]
+)
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.dw %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "fixuns_truncdfsi2"
+  [(set (match_operand:SI                  0 "register_operand" "=r")
+	(unsigned_fix:SI (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.duw %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")]
+)
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI         0 "register_operand" "=r")
+	(fix:DI (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.sl %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "fixuns_truncsfdi2"
+  [(set (match_operand:DI                  0 "register_operand" "=r")
+	(unsigned_fix:DI (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.sul %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")]
+)
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI         0 "register_operand" "=r")
+	(fix:DI (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.dl %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI                  0 "register_operand" "=r")
+	(unsigned_fix:DI (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "trncf.dul %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")]
+)
+
+;; int -> float
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(float:SF (match_operand:SI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.ws %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "unsfloatsisf2"
+  [(set (match_operand:SF                    0 "register_operand" "=r")
+	(unsigned_float:SF (match_operand:SI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.uws %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(float:DF (match_operand:SI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.wd %z1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "unsfloatsidf2"
+  [(set (match_operand:DF                    0 "even_reg_operand" "=r")
+	(unsigned_float:DF (match_operand:SI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.uwd %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF           0 "even_reg_operand" "=r")
+	(float:SF (match_operand:DI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.ls %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "unsfloatdisf2"
+  [(set (match_operand:SF                    0 "even_reg_operand" "=r")
+	(unsigned_float:SF (match_operand:DI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.uls %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF           0 "even_reg_operand" "=r")
+	(float:DF (match_operand:DI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.ld %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "unsfloatdidf2"
+  [(set (match_operand:DF                    0 "even_reg_operand" "=r")
+	(unsigned_float:DF (match_operand:DI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.uld %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; single-float -> double-float
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(float_extend:DF
+	 (match_operand:SF 1 "reg_or_0_operand" "rI")))]
+  "TARGET_USE_FPU"
+  "cvtf.sd %z1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; double-float -> single-float
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(float_truncate:SF
+	 (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cvtf.ds %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;
+;; ---------------- special insns
+;;
+
+;;; reciprocal
+(define_insn "recipsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "recipf.s %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "recipdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
+		(match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "recipf.d %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; reciprocal of square-root
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "r"))))]
+  "TARGET_USE_FPU"
+  "rsqrtf.s %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "rsqrtdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
+		(sqrt:DF (match_operand:DF 2 "even_reg_operand" "r"))))]
+  "TARGET_USE_FPU"
+  "rsqrtf.d %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; Note: The FPU-2.0 (ie pre e3v5) versions of these routines do not actually
+;; need operand 4 to be the same as operand 0.  But the FPU-2.0 versions are
+;; also deprecated so the loss of flexibility is unimportant.
+
+;;; multiply-add
+(define_insn "fmasf4"
+  [(set (match_operand:SF         0 "register_operand" "=r")
+	(fma:SF (match_operand:SF 1 "register_operand" "r")
+		(match_operand:SF 2 "register_operand" "r")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_USE_FPU"
+  { return TARGET_V850E3V5_UP ? "fmaf.s %1, %2, %0" : "maddf.s %2, %1, %3, %0"; }
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; multiply-subtract
+(define_insn "fmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=r")
+	(fma:SF (match_operand:SF         1 "register_operand" "r")
+		(match_operand:SF         2 "register_operand" "r")
+		(neg:SF (match_operand:SF 3 "register_operand" "0"))))]
+  "TARGET_USE_FPU"
+  { return TARGET_V850E3V5_UP ? "fmsf.s %1, %2, %0" : "msubf.s %2, %1, %3, %0"; }
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; negative-multiply-add
+(define_insn "fnmasf4"
+  [(set (match_operand:SF                 0 "register_operand" "=r")
+	(neg:SF (fma:SF (match_operand:SF 1 "register_operand" "r")
+			(match_operand:SF 2 "register_operand" "r")
+			(match_operand:SF 3 "register_operand" "0"))))]
+  "TARGET_USE_FPU"
+  { return TARGET_V850E3V5_UP ? "fnmaf.s %1, %2, %0" : "nmaddf.s %2, %1, %3, %0"; }
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; negative-multiply-subtract
+(define_insn "fnmssf4"
+  [(set (match_operand:SF                         0 "register_operand" "=r")
+	(neg:SF (fma:SF (match_operand:SF         1 "register_operand" "r")
+			(match_operand:SF         2 "register_operand" "r")
+			(neg:SF (match_operand:SF 3 "register_operand" "0")))))]
+  "TARGET_USE_FPU"
+  { return TARGET_V850E3V5_UP ? "fnmsf.s %1, %2, %0" : "nmsubf.s %2, %1, %3, %0"; }
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+;
+; ---------------- comparison/conditionals
+;
+; SF
+
+(define_insn "cmpsf_le_insn"
+  [(set (reg:CC_FPU_LE FCC_REGNUM)
+        (compare:CC_FPU_LE (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.s le, %z0, %z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_lt_insn"
+  [(set (reg:CC_FPU_LT FCC_REGNUM)
+        (compare:CC_FPU_LT (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.s lt, %z0, %z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_ge_insn"
+  [(set (reg:CC_FPU_GE FCC_REGNUM)
+        (compare:CC_FPU_GE (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.s le, %z1, %z0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_gt_insn"
+  [(set (reg:CC_FPU_GT FCC_REGNUM)
+        (compare:CC_FPU_GT (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.s lt, %z1, %z0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_eq_insn"
+  [(set (reg:CC_FPU_EQ FCC_REGNUM)
+        (compare:CC_FPU_EQ (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.s eq, %z0, %z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+; DF
+
+(define_insn "cmpdf_le_insn"
+  [(set (reg:CC_FPU_LE FCC_REGNUM)
+        (compare:CC_FPU_LE (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.d le, %z0, %z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_lt_insn"
+  [(set (reg:CC_FPU_LT FCC_REGNUM)
+        (compare:CC_FPU_LT (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.d lt, %z0, %z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_ge_insn"
+  [(set (reg:CC_FPU_GE FCC_REGNUM)
+        (compare:CC_FPU_GE (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.d le, %z1, %z0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_gt_insn"
+  [(set (reg:CC_FPU_GT FCC_REGNUM)
+        (compare:CC_FPU_GT (match_operand:DF 0 "even_reg_operand" "r")
+		           (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.d lt, %z1, %z0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_eq_insn"
+  [(set (reg:CC_FPU_EQ FCC_REGNUM)
+        (compare:CC_FPU_EQ (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmpf.d eq, %z0, %z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;
+;; Transfer a v850e2v3 fcc to the Z bit of CC0 (this is necessary to do a
+;; conditional branch based on a floating-point compare)
+;;
+
+(define_insn "trfsr"
+  [(set (match_operand 0 "" "") (match_operand 1 "" ""))]
+  "TARGET_USE_FPU
+   && GET_MODE(operands[0]) == GET_MODE(operands[1])
+   && GET_CODE(operands[0]) == REG && REGNO (operands[0]) == CC_REGNUM
+   && GET_CODE(operands[1]) == REG && REGNO (operands[1]) == FCC_REGNUM
+   && (GET_MODE(operands[0]) == CC_FPU_LEmode
+       || GET_MODE(operands[0]) == CC_FPU_GEmode
+       || GET_MODE(operands[0]) == CC_FPU_LTmode
+       || GET_MODE(operands[0]) == CC_FPU_GTmode
+       || GET_MODE(operands[0]) == CC_FPU_EQmode
+       || GET_MODE(operands[0]) == CC_FPU_NEmode)"
+  "trfsr"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_z")
+   (set_attr "type" "fpu")])
+
+;;
+;; Floating-point conditional moves for the v850e2v3.
+;;
+
+;; The actual v850e2v3 conditional move instructions
+;;
+(define_insn "movsfcc_z_insn"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(if_then_else:SF
+	 (match_operand 3 "v850_float_z_comparison_operator" "")
+	 (match_operand:SF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:SF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_USE_FPU"
+  "cmovf.s 0,%z1,%z2,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movsfcc_nz_insn"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(if_then_else:SF
+	 (match_operand 3 "v850_float_nz_comparison_operator" "")
+	 (match_operand:SF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:SF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_USE_FPU"
+  "cmovf.s 0,%z2,%z1,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movdfcc_z_insn"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_z_comparison_operator" "")
+	 (match_operand:DF 1 "even_reg_operand" "r")
+	 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmovf.d 0,%z1,%z2,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movdfcc_nz_insn"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_nz_comparison_operator" "")
+	 (match_operand:DF 1 "even_reg_operand" "r")
+	 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_USE_FPU"
+  "cmovf.d 0,%z2,%z1,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movedfcc_z_zero"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_z_comparison_operator" "")
+	 (match_operand:DF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:DF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_USE_FPU"
+  "cmovf.s 0,%z1,%z2,%0 ; cmovf.s 0,%Z1,%Z2,%R0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movedfcc_nz_zero"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_nz_comparison_operator" "")
+	 (match_operand:DF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:DF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_USE_FPU"
+  "cmovf.s 0,%z2,%z1,%0 ; cmovf.s 0,%Z2,%Z1,%R0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+
+;; ----------------------------------------------------------------------
+;; HELPER INSTRUCTIONS for saving the prologue and epilogue registers
+;; ----------------------------------------------------------------------
+
+;; This pattern will match a stack adjust RTX followed by any number of push
+;; RTXs.  These RTXs will then be turned into a suitable call to a worker
+;; function.
+
+;;
+;; Actually, convert the RTXs into a PREPARE instruction.
+;;
+
+(define_insn ""
+ [(match_parallel 0 "pattern_is_ok_for_prepare"
+   [(set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (mem:SI (plus:SI (reg:SI 3)
+			  (match_operand:SI 2 "immediate_operand" "i")))
+	 (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))])]
+ "TARGET_PROLOG_FUNCTION && (TARGET_V850E_UP)"
+{
+  return construct_prepare_instruction (operands[0]);
+}
+ [(set_attr "length" "4")
+  (set_attr "cc"     "clobber")])
+
+(define_insn ""
+ [(match_parallel 0 "pattern_is_ok_for_prologue"
+   [(set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (mem:SI (plus:SI (reg:SI 3)
+			   (match_operand:SI 2 "immediate_operand" "i")))
+	 (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))])]
+ "TARGET_PROLOG_FUNCTION"
+{
+  return construct_save_jarl (operands[0]);
+}
+ [(set (attr "length") (if_then_else (eq_attr "long_calls" "yes")
+				     (const_string "16")
+				     (const_string "4")))
+  (set_attr "cc"     "clobber")])
+
+;;
+;; Actually, turn the RTXs into a DISPOSE instruction.
+;;
+(define_insn ""
+ [(match_parallel 0 "pattern_is_ok_for_dispose"
+   [(return)
+    (set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (match_operand:SI 2 "register_is_ok_for_epilogue" "=r")
+	 (mem:SI (plus:SI (reg:SI 3)
+			  (match_operand:SI 3 "immediate_operand" "i"))))])]
+ "TARGET_PROLOG_FUNCTION && (TARGET_V850E_UP)"
+{
+  return construct_dispose_instruction (operands[0]);
+}
+ [(set_attr "length" "4")
+  (set_attr "cc"     "clobber")])
+
+;; This pattern will match a return RTX followed by any number of pop RTXs
+;; and possible a stack adjustment as well.  These RTXs will be turned into
+;; a suitable call to a worker function.
+
+(define_insn ""
+[(match_parallel 0 "pattern_is_ok_for_epilogue"
+   [(return)
+    (set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (match_operand:SI 2 "register_is_ok_for_epilogue" "=r")
+	 (mem:SI (plus:SI (reg:SI 3)
+			  (match_operand:SI 3 "immediate_operand" "i"))))])]
+ "TARGET_PROLOG_FUNCTION"
+{
+  return construct_restore_jr (operands[0]);
+}
+ [(set (attr "length") (if_then_else (eq_attr "long_calls" "yes")
+				     (const_string "12")
+				     (const_string "4")))
+  (set_attr "cc"     "clobber")])
+
+;; Initialize an interrupt function.  Do not depend on TARGET_PROLOG_FUNCTION.
+(define_insn "callt_save_interrupt"
+  [(unspec_volatile [(const_int 0)] 2)]
+    "(TARGET_V850E_UP) && !TARGET_DISABLE_CALLT"
+    ;; The CALLT instruction stores the next address of CALLT to CTPC register
+    ;; without saving its previous value.  So if the interrupt handler
+    ;; or its caller could possibly execute the CALLT insn, save_interrupt 
+    ;; MUST NOT be called via CALLT.
+{
+  output_asm_insn ("addi -28,   sp, sp", operands);
+  output_asm_insn ("st.w r1,    24[sp]", operands);
+  output_asm_insn ("st.w r10,   12[sp]", operands);
+  output_asm_insn ("st.w r11,   16[sp]", operands);
+  output_asm_insn ("stsr ctpc,  r10",    operands);
+  output_asm_insn ("st.w r10,   20[sp]", operands);
+  output_asm_insn ("stsr ctpsw, r10",    operands);
+  output_asm_insn ("st.w r10,   24[sp]", operands);
+  output_asm_insn ("callt ctoff(__callt_save_interrupt)", operands);
+  return "";
+}
+   [(set_attr "length" "26")
+    (set_attr "cc" "clobber")])
+
+(define_insn "callt_return_interrupt"
+  [(unspec_volatile [(const_int 0)] 3)]
+  "(TARGET_V850E_UP) && !TARGET_DISABLE_CALLT"
+  "callt ctoff(__callt_return_interrupt)"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+(define_insn "save_interrupt"
+  [(set (reg:SI 3) (plus:SI (reg:SI 3) (const_int -20)))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int -20))) (reg:SI 30))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int -16))) (reg:SI 4))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int -12))) (reg:SI 1))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int  -8))) (reg:SI 10))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int  -4))) (reg:SI 11))]
+  ""
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return "addi -20,sp,sp \; st.w r11,16[sp] \; st.w r10,12[sp] \; jarl __save_interrupt,r10";
+  else
+    {
+      output_asm_insn ("addi  -20, sp, sp", operands);
+      output_asm_insn ("st.w  r11, 16[sp]", operands);
+      output_asm_insn ("st.w  r10, 12[sp]", operands);
+      output_asm_insn ("st.w  ep, 0[sp]", operands);
+      output_asm_insn ("st.w  gp, 4[sp]", operands);
+      output_asm_insn ("st.w  r1, 8[sp]", operands);
+      output_asm_insn ("movhi hi(__ep), r0, ep", operands);
+      output_asm_insn ("movea lo(__ep), ep, ep", operands);
+      output_asm_insn ("movhi hi(__gp), r0, gp", operands);
+      output_asm_insn ("movea lo(__gp), gp, gp", operands);
+      return "";
+    }
+}
+  [(set (attr "length")
+        (if_then_else (match_test "TARGET_LONG_CALLS")
+                       (const_int 10)
+                       (const_int 34)))
+   (set_attr "cc" "clobber")])
+  
+;; Restore r1, r4, r10, and return from the interrupt
+(define_insn "return_interrupt"
+  [(return)
+   (set (reg:SI 3)  (plus:SI (reg:SI 3) (const_int 20)))
+   (set (reg:SI 11) (mem:SI (plus:SI (reg:SI 3) (const_int 16))))
+   (set (reg:SI 10) (mem:SI (plus:SI (reg:SI 3) (const_int 12))))
+   (set (reg:SI 1)  (mem:SI (plus:SI (reg:SI 3) (const_int  8))))
+   (set (reg:SI 4)  (mem:SI (plus:SI (reg:SI 3) (const_int  4))))
+   (set (reg:SI 30) (mem:SI (reg:SI 3)))]
+  ""
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return "jr __return_interrupt";
+  else 
+    {
+      output_asm_insn ("ld.w 0[sp],  ep",   operands);
+      output_asm_insn ("ld.w 4[sp],  gp",   operands);
+      output_asm_insn ("ld.w 8[sp],  r1",   operands);
+      output_asm_insn ("ld.w 12[sp], r10", operands);
+      output_asm_insn ("ld.w 16[sp], r11", operands);
+      output_asm_insn ("addi 20, sp, sp",   operands);
+      output_asm_insn ("reti",            operands);
+      return "";
+    }
+}
+  [(set (attr "length")
+        (if_then_else (match_test "TARGET_LONG_CALLS")
+                       (const_int 4)
+                       (const_int 24)))
+   (set_attr "cc" "clobber")])
+
+;; Save all registers except for the registers saved in save_interrupt when
+;; an interrupt function makes a call.
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+;; This is needed because the rest of the compiler is not ready to handle
+;; insns this complicated.
+
+(define_insn "callt_save_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 0)]
+  "(TARGET_V850E_UP) && !TARGET_DISABLE_CALLT"
+  "callt ctoff(__callt_save_all_interrupt)"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "save_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return "jarl __save_all_interrupt,r10";
+
+  output_asm_insn ("addi -120, sp, sp", operands);
+
+  if (TARGET_EP)
+    {
+      output_asm_insn ("mov ep, r1", operands);
+      output_asm_insn ("mov sp, ep", operands);
+      output_asm_insn ("sst.w r31, 116[ep]", operands);
+      output_asm_insn ("sst.w r2,  112[ep]", operands);
+      output_asm_insn ("sst.w gp,  108[ep]", operands);
+      output_asm_insn ("sst.w r6,  104[ep]", operands);
+      output_asm_insn ("sst.w r7,  100[ep]", operands);
+      output_asm_insn ("sst.w r8,   96[ep]", operands);
+      output_asm_insn ("sst.w r9,   92[ep]", operands);
+      output_asm_insn ("sst.w r11,  88[ep]", operands);
+      output_asm_insn ("sst.w r12,  84[ep]", operands);
+      output_asm_insn ("sst.w r13,  80[ep]", operands);
+      output_asm_insn ("sst.w r14,  76[ep]", operands);
+      output_asm_insn ("sst.w r15,  72[ep]", operands);
+      output_asm_insn ("sst.w r16,  68[ep]", operands);
+      output_asm_insn ("sst.w r17,  64[ep]", operands);
+      output_asm_insn ("sst.w r18,  60[ep]", operands);
+      output_asm_insn ("sst.w r19,  56[ep]", operands);
+      output_asm_insn ("sst.w r20,  52[ep]", operands);
+      output_asm_insn ("sst.w r21,  48[ep]", operands);
+      output_asm_insn ("sst.w r22,  44[ep]", operands);
+      output_asm_insn ("sst.w r23,  40[ep]", operands);
+      output_asm_insn ("sst.w r24,  36[ep]", operands);
+      output_asm_insn ("sst.w r25,  32[ep]", operands);
+      output_asm_insn ("sst.w r26,  28[ep]", operands);
+      output_asm_insn ("sst.w r27,  24[ep]", operands);
+      output_asm_insn ("sst.w r28,  20[ep]", operands);
+      output_asm_insn ("sst.w r29,  16[ep]", operands);
+      output_asm_insn ("mov   r1,   ep", operands);
+    }
+  else
+    {
+      output_asm_insn ("st.w r31, 116[sp]", operands);
+      output_asm_insn ("st.w r2,  112[sp]", operands);
+      output_asm_insn ("st.w gp,  108[sp]", operands);
+      output_asm_insn ("st.w r6,  104[sp]", operands);
+      output_asm_insn ("st.w r7,  100[sp]", operands);
+      output_asm_insn ("st.w r8,   96[sp]", operands);
+      output_asm_insn ("st.w r9,   92[sp]", operands);
+      output_asm_insn ("st.w r11,  88[sp]", operands);
+      output_asm_insn ("st.w r12,  84[sp]", operands);
+      output_asm_insn ("st.w r13,  80[sp]", operands);
+      output_asm_insn ("st.w r14,  76[sp]", operands);
+      output_asm_insn ("st.w r15,  72[sp]", operands);
+      output_asm_insn ("st.w r16,  68[sp]", operands);
+      output_asm_insn ("st.w r17,  64[sp]", operands);
+      output_asm_insn ("st.w r18,  60[sp]", operands);
+      output_asm_insn ("st.w r19,  56[sp]", operands);
+      output_asm_insn ("st.w r20,  52[sp]", operands);
+      output_asm_insn ("st.w r21,  48[sp]", operands);
+      output_asm_insn ("st.w r22,  44[sp]", operands);
+      output_asm_insn ("st.w r23,  40[sp]", operands);
+      output_asm_insn ("st.w r24,  36[sp]", operands);
+      output_asm_insn ("st.w r25,  32[sp]", operands);
+      output_asm_insn ("st.w r26,  28[sp]", operands);
+      output_asm_insn ("st.w r27,  24[sp]", operands);
+      output_asm_insn ("st.w r28,  20[sp]", operands);
+      output_asm_insn ("st.w r29,  16[sp]", operands);
+    }
+    
+  return "";
+}
+  [(set (attr "length")
+        (if_then_else (match_test "TARGET_LONG_CALLS")
+                       (const_int 4)
+                       (const_int 62)
+	))
+   (set_attr "cc" "clobber")])
+
+(define_insn "_save_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 0)]
+  "TARGET_V850 && ! TARGET_LONG_CALLS"
+  "jarl __save_all_interrupt,r10"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; Restore all registers saved when an interrupt function makes a call.
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+;; This is needed because the rest of the compiler is not ready to handle
+;; insns this complicated.
+
+(define_insn "callt_restore_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 1)]
+  "(TARGET_V850E_UP) && !TARGET_DISABLE_CALLT"
+  "callt ctoff(__callt_restore_all_interrupt)"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "restore_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 1)]
+  ""
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return "jarl __restore_all_interrupt,r10";
+
+  if (TARGET_EP)
+    {
+      output_asm_insn ("mov   ep,      r1", operands);
+      output_asm_insn ("mov   sp,      ep", operands);
+      output_asm_insn ("sld.w 116[ep], r31", operands);
+      output_asm_insn ("sld.w 112[ep], r2", operands);
+      output_asm_insn ("sld.w 108[ep], gp", operands);
+      output_asm_insn ("sld.w 104[ep], r6", operands);
+      output_asm_insn ("sld.w 100[ep], r7", operands);
+      output_asm_insn ("sld.w 96[ep],  r8", operands);
+      output_asm_insn ("sld.w 92[ep],  r9", operands);
+      output_asm_insn ("sld.w 88[ep],  r11", operands);
+      output_asm_insn ("sld.w 84[ep],  r12", operands);
+      output_asm_insn ("sld.w 80[ep],  r13", operands);
+      output_asm_insn ("sld.w 76[ep],  r14", operands);
+      output_asm_insn ("sld.w 72[ep],  r15", operands);
+      output_asm_insn ("sld.w 68[ep],  r16", operands);
+      output_asm_insn ("sld.w 64[ep],  r17", operands);
+      output_asm_insn ("sld.w 60[ep],  r18", operands);
+      output_asm_insn ("sld.w 56[ep],  r19", operands);
+      output_asm_insn ("sld.w 52[ep],  r20", operands);
+      output_asm_insn ("sld.w 48[ep],  r21", operands);
+      output_asm_insn ("sld.w 44[ep],  r22", operands);
+      output_asm_insn ("sld.w 40[ep],  r23", operands);
+      output_asm_insn ("sld.w 36[ep],  r24", operands);
+      output_asm_insn ("sld.w 32[ep],  r25", operands);
+      output_asm_insn ("sld.w 28[ep],  r26", operands);
+      output_asm_insn ("sld.w 24[ep],  r27", operands);
+      output_asm_insn ("sld.w 20[ep],  r28", operands);
+      output_asm_insn ("sld.w 16[ep],  r29", operands);
+      output_asm_insn ("mov   r1,      ep", operands);
+    }
+  else
+    {
+      output_asm_insn ("ld.w 116[sp], r31", operands);
+      output_asm_insn ("ld.w 112[sp], r2", operands);
+      output_asm_insn ("ld.w 108[sp], gp", operands);
+      output_asm_insn ("ld.w 104[sp], r6", operands);
+      output_asm_insn ("ld.w 100[sp], r7", operands);
+      output_asm_insn ("ld.w 96[sp],  r8", operands);
+      output_asm_insn ("ld.w 92[sp],  r9", operands);
+      output_asm_insn ("ld.w 88[sp],  r11", operands);
+      output_asm_insn ("ld.w 84[sp],  r12", operands);
+      output_asm_insn ("ld.w 80[sp],  r13", operands);
+      output_asm_insn ("ld.w 76[sp],  r14", operands);
+      output_asm_insn ("ld.w 72[sp],  r15", operands);
+      output_asm_insn ("ld.w 68[sp],  r16", operands);
+      output_asm_insn ("ld.w 64[sp],  r17", operands);
+      output_asm_insn ("ld.w 60[sp],  r18", operands);
+      output_asm_insn ("ld.w 56[sp],  r19", operands);
+      output_asm_insn ("ld.w 52[sp],  r20", operands);
+      output_asm_insn ("ld.w 48[sp],  r21", operands);
+      output_asm_insn ("ld.w 44[sp],  r22", operands);
+      output_asm_insn ("ld.w 40[sp],  r23", operands);
+      output_asm_insn ("ld.w 36[sp],  r24", operands);
+      output_asm_insn ("ld.w 32[sp],  r25", operands);
+      output_asm_insn ("ld.w 28[sp],  r26", operands);
+      output_asm_insn ("ld.w 24[sp],  r27", operands);
+      output_asm_insn ("ld.w 20[sp],  r28", operands);
+      output_asm_insn ("ld.w 16[sp],  r29", operands);
+    }
+  output_asm_insn ("addi  120, sp, sp", operands);
+  return "";
+}
+  [(set (attr "length")
+        (if_then_else (match_test "TARGET_LONG_CALLS")
+                       (const_int 4)
+                       (const_int 62)
+	))
+   (set_attr "cc" "clobber")])
+
+(define_insn "_restore_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 1)]
+  "TARGET_V850 && ! TARGET_LONG_CALLS"
+  "jarl __restore_all_interrupt,r10"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
diff --git a/gcc-4.9/gcc/config/v850/v850.opt b/gcc-4.9/gcc/config/v850/v850.opt
new file mode 100644
index 000000000..c02750796
--- /dev/null
+++ b/gcc-4.9/gcc/config/v850/v850.opt
@@ -0,0 +1,159 @@
+; Options for the NEC V850 port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/v850/v850-opts.h
+
+Variable
+int small_memory_max[(int)SMALL_MEMORY_max] = { 0, 0, 0 }
+
+mapp-regs
+Target Report Mask(APP_REGS)
+Use registers r2 and r5
+
+mbig-switch
+Target Report Mask(BIG_SWITCH)
+Use 4 byte entries in switch tables
+
+mdebug
+Target Report Mask(DEBUG)
+Enable backend debugging
+
+mdisable-callt
+Target Report Mask(DISABLE_CALLT)
+Do not use the callt instruction (default)
+
+mep
+Target Report Mask(EP)
+Reuse r30 on a per function basis
+
+mghs
+Target RejectNegative InverseMask(GCC_ABI) MaskExists
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Prohibit PC relative function calls
+
+mprolog-function
+Target Report Mask(PROLOG_FUNCTION)
+Use stubs for function prologues
+
+msda=
+Target RejectNegative Joined UInteger
+Set the max size of data eligible for the SDA area
+
+msda-
+Target RejectNegative Joined Undocumented Alias(msda=)
+
+msmall-sld
+Target Report Mask(SMALL_SLD)
+Enable the use of the short load instructions
+
+mspace
+Target RejectNegative
+Same as: -mep -mprolog-function
+
+mtda=
+Target RejectNegative Joined UInteger
+Set the max size of data eligible for the TDA area
+
+mtda-
+Target RejectNegative Joined Undocumented Alias(mtda=)
+
+mno-strict-align
+Target Report Mask(NO_STRICT_ALIGN)
+Do not enforce strict alignment
+
+mjump-tables-in-data-section
+Target Report Mask(JUMP_TABLES_IN_DATA_SECTION)
+Put jump tables for switch statements into the .data section rather than the .code section
+
+mUS-bit-set
+Target Report Mask(US_BIT_SET)
+
+mv850
+Target Report RejectNegative Mask(V850)
+Compile for the v850 processor
+
+mv850e
+Target Report RejectNegative Mask(V850E)
+Compile for the v850e processor
+
+mv850e1
+Target RejectNegative Mask(V850E1)
+Compile for the v850e1 processor
+
+mv850es
+Target RejectNegative Mask(V850E1)
+Compile for the v850es variant of the v850e1
+
+mv850e2
+Target Report RejectNegative Mask(V850E2)
+Compile for the v850e2 processor
+
+mv850e2v3
+Target Report RejectNegative Mask(V850E2V3)
+Compile for the v850e2v3 processor
+
+mv850e3v5
+Target Report RejectNegative Mask(V850E3V5)
+Compile for the v850e3v5 processor
+
+mv850e2v4
+Target RejectNegative Mask(V850E3V5) MaskExists
+
+mloop
+Target Report Mask(LOOP)
+Enable v850e3v5 loop instructions
+
+mzda=
+Target RejectNegative Joined UInteger
+Set the max size of data eligible for the ZDA area
+
+mzda-
+Target RejectNegative Joined Undocumented Alias(mzda=)
+
+mrelax
+Target Report Mask(RELAX)
+Enable relaxing in the assembler
+
+mlong-jumps
+Target Report Mask(BIG_SWITCH) MaskExists
+Prohibit PC relative jumps
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT)
+Inhibit the use of hardware floating point instructions
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT) MaskExists
+Allow the use of hardware floating point instructions for V850E2V3 and up
+
+mrh850-abi
+Target RejectNegative Report InverseMask(GCC_ABI) MaskExists
+Enable support for the RH850 ABI.  This is the default
+
+mgcc-abi
+Target RejectNegative Report Mask(GCC_ABI)
+Enable support for the old GCC ABI
+
+m8byte-align
+Target Report Mask(8BYTE_ALIGN)
+Support alignments of up to 64-bits
diff --git a/gcc-4.9/gcc/config/vax/builtins.md b/gcc-4.9/gcc/config/vax/builtins.md
new file mode 100644
index 000000000..ea7486388
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/builtins.md
@@ -0,0 +1,192 @@
+;; builtin definitions for DEC VAX.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [
+    (VUNSPEC_LOCK 100)		; sync lock and test
+    (VUNSPEC_UNLOCK 101)	; sync lock release
+  ]
+)
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ffs:SI (match_operand:SI 1 "general_operand" "")))]
+  ""
+  "
+{
+  rtx label = gen_label_rtx ();
+  emit_insn (gen_ffssi2_internal (operands[0], operands[1]));
+  emit_jump_insn (gen_bne (label));
+  emit_insn (gen_negsi2 (operands[0], const1_rtx));
+  emit_label (label);
+  emit_insn (gen_addsi3 (operands[0], operands[0], const1_rtx));
+  DONE;
+}")
+
+(define_insn "ffssi2_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rQ")
+	(ffs:SI (match_operand:SI 1 "general_operand" "nrmT")))
+   (set (cc0) (match_dup 0))]
+  ""
+  "ffs $0,$32,%1,%0")
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=&g")
+	(unspec:VAXint [(match_operand:VAXint 1 "memory_operand" "+m")
+		    (match_operand:VAXint 2 "const_int_operand" "n")
+		   ] VUNSPEC_LOCK))]
+  ""
+  "
+{
+  rtx label;
+
+  if (operands[2] != const1_rtx)
+    FAIL;
+
+  label = gen_label_rtx ();
+  emit_move_insn (operands[0], const1_rtx);
+  emit_jump_insn (gen_jbbssi<mode> (operands[1], const0_rtx, label, operands[1]));
+  emit_move_insn (operands[0], const0_rtx);
+  emit_label (label);
+  DONE;
+}")
+
+(define_insn "jbbssiqi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (zero_extract:SI (match_operand:QI 0 "memory_operand" "g")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:QI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 1))])]
+  ""
+  "jbssi %1,%0,%l2")
+
+(define_insn "jbbssihi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (zero_extract:SI (match_operand:HI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:HI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 1))])]
+  ""
+  "jbssi %1,%0,%l2")
+
+(define_insn "jbbssisi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (zero_extract:SI (match_operand:SI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:SI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 1))])]
+  ""
+  "jbssi %1,%0,%l2")
+
+
+(define_expand "sync_lock_release<mode>"
+  [(set (match_operand:VAXint 0 "memory_operand" "+m")
+	(unspec:VAXint [(match_operand:VAXint 1 "const_int_operand" "n")
+		   ] VUNSPEC_UNLOCK))]
+  ""
+  "
+{
+  rtx label;
+  if (operands[1] != const0_rtx)
+    FAIL;
+#if 1
+  label = gen_label_rtx ();
+  emit_jump_insn (gen_jbbcci<mode> (operands[0], const0_rtx, label, operands[0]));
+  emit_label (label);
+#else
+  emit_move_insn (operands[0], const0_rtx);
+#endif
+  DONE;
+}")
+
+(define_insn "jbbcciqi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (eq (zero_extract:SI (match_operand:QI 0 "memory_operand" "g")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:QI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 0))])]
+  ""
+  "jbcci %1,%0,%l2")
+
+(define_insn "jbbccihi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (eq (zero_extract:SI (match_operand:HI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:HI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 0))])]
+  ""
+  "jbcci %1,%0,%l2")
+
+(define_insn "jbbccisi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (eq (zero_extract:SI (match_operand:SI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:SI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 0))])]
+  ""
+  "jbcci %1,%0,%l2")
+
diff --git a/gcc-4.9/gcc/config/vax/constraints.md b/gcc-4.9/gcc/config/vax/constraints.md
new file mode 100644
index 000000000..daa9eeb0b
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/constraints.md
@@ -0,0 +1,119 @@
+;; Constraints for the DEC VAX port.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_constraint "Z0"
+   "Match a CONST_INT of 0"
+   (and (match_code "const_int")
+	(match_test "ival == 0")))
+
+(define_constraint "U06"
+   "unsigned 6 bit value (0..63)"
+   (and (match_code "const_int")
+	(match_test "0 <= ival && ival < 64")))
+
+(define_constraint "U08"
+   "Unsigned 8 bit value"
+   (and (match_code "const_int")
+	(match_test "0 <= ival && ival < 256")))
+
+(define_constraint "U16"
+   "Unsigned 16 bit value"
+   (and (match_code "const_int")
+	(match_test "0 <= ival && ival < 65536")))
+
+(define_constraint "CN6"
+   "negative 6 bit value (-63..-1)"
+   (and (match_code "const_int")
+	(match_test "-63 <= ival && ival < 0")))
+
+(define_constraint "S08"
+   "signed 8 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "-128 <= ival && ival < 128")))
+
+(define_constraint "S16"
+   "signed 16 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "-32768 <= ival && ival < 32768")))
+
+(define_constraint "I"
+   "Match a CONST_INT of 0 [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_Z0 (GEN_INT (ival))")))
+
+(define_constraint "J"
+   "unsigned 6 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_U06 (GEN_INT (ival))")))
+
+(define_constraint "K"
+   "signed 8 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_S08 (GEN_INT (ival))")))
+
+(define_constraint "L"
+   "signed 16 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_S16 (GEN_INT (ival))")))
+
+(define_constraint "M"
+   "Unsigned 8 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_U08 (GEN_INT (ival))")))
+
+(define_constraint "N"
+   "Unsigned 16 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_U16 (GEN_INT (ival))")))
+
+(define_constraint "O"
+   "Negative short literals (-63..-1) [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_CN6 (GEN_INT (ival))")))
+
+/* Similar, but for floating constants, and defining letters G and H.  */
+
+(define_constraint "G"
+  "Match a floating-point zero"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (DFmode) || op == CONST0_RTX (SFmode)")))
+
+/* Optional extra constraints for this machine. */
+
+(define_memory_constraint "Q"
+   "operand is a MEM that does not have a mode-dependent address."
+   (and (match_code "mem")
+	(match_test "!mode_dependent_address_p (XEXP (op, 0),
+					        MEM_ADDR_SPACE (op))")))
+
+(define_memory_constraint "B"
+    ""
+    (and (match_operand:BLK 0 "memory_operand")
+	 (not (match_operand:BLK 0 "illegal_blk_memory_operand" ""))))
+
+(define_memory_constraint "R"
+    ""
+    (and (match_operand:DI 0 "memory_operand")
+	 (not (match_operand:DI 0 "illegal_addsub_di_memory_operand" ""))))
+
+(define_constraint "T"
+    "@internal satisfies CONSTANT_P and, if pic is enabled, is not a SYMBOL_REF, LABEL_REF, or CONST."
+  (and (match_test ("CONSTANT_P (op)"))
+       (ior (not (match_code "symbol_ref,label_ref,const"))
+	    (match_test "!flag_pic"))))
diff --git a/gcc-4.9/gcc/config/vax/elf.h b/gcc-4.9/gcc/config/vax/elf.h
new file mode 100644
index 000000000..d6041dc06
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/elf.h
@@ -0,0 +1,112 @@
+/* Target definitions for GNU compiler for VAX using ELF
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Matt Thomas <matt@3am-software.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_ELF
+#define TARGET_ELF 1
+
+#undef REGISTER_PREFIX
+#undef REGISTER_NAMES
+#define REGISTER_PREFIX "%"
+#define REGISTER_NAMES \
+  { "%r0", "%r1",  "%r2",  "%r3", "%r4", "%r5", "%r6", "%r7", \
+    "%r8", "%r9", "%r10", "%r11", "%ap", "%fp", "%sp", "%pc", }
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Profiling routine.  */
+#undef VAX_FUNCTION_PROFILER_NAME
+#define VAX_FUNCTION_PROFILER_NAME "__mcount"
+
+/*  Let's be re-entrant.  */
+#undef PCC_STATIC_STRUCT_RETURN
+
+/* Before the prologue, the top of the frame is below the argument
+   count pushed by the CALLS and before the start of the saved registers.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+/* Offset from the frame pointer register value to the top of the stack.  */
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* We use R2-R5 (call-clobbered) registers for exceptions.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 2 : INVALID_REGNUM)
+
+/* Place the top of the stack for the DWARF2 EH stackadj value.  */
+#define EH_RETURN_STACKADJ_RTX						\
+  gen_rtx_MEM (SImode,							\
+	       plus_constant (Pmode,					\
+			      gen_rtx_REG (Pmode, FRAME_POINTER_REGNUM),\
+			      -4))
+
+/* Simple store the return handler into the call frame.  */
+#define EH_RETURN_HANDLER_RTX						\
+  gen_rtx_MEM (Pmode,							\
+	       plus_constant (Pmode,					\
+			      gen_rtx_REG (Pmode, FRAME_POINTER_REGNUM),\
+			      16))
+
+
+/* Reserve the top of the stack for exception handler stackadj value.  */
+#undef STARTING_FRAME_OFFSET
+#define STARTING_FRAME_OFFSET -4
+
+/* The VAX wants no space between the case instruction and the jump table.  */
+#undef  ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS			\
+  do							\
+    {							\
+      /* Turn off function CSE if we're doing PIC.  */	\
+      if (flag_pic)					\
+	flag_no_function_cse = 1;			\
+    }							\
+  while (0)
+
+/* Don't allow *foo which foo is non-local */
+#define NO_EXTERNAL_INDIRECT_ADDRESS
+
+#undef VAX_CC1_AND_CC1PLUS_SPEC
+#define VAX_CC1_AND_CC1PLUS_SPEC \
+  "%{!fno-pic: \
+     %{!fpic: \
+       %{!fPIC:-fPIC}}}"
+
+/* VAX ELF is always gas; override the generic VAX ASM_SPEC.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{!fno-pic: %{!mno-asm-pic:-k}}"
+
+/*  We want PCREL dwarf output.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)	\
+  ((GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    fprintf (FILE, "%%pcrel%d(", SIZE * 8);		\
+    assemble_name (FILE, LABEL);			\
+    fputc (')', FILE);					\
+  } while (0)
diff --git a/gcc-4.9/gcc/config/vax/elf.opt b/gcc-4.9/gcc/config/vax/elf.opt
new file mode 100644
index 000000000..252060e9b
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/elf.opt
@@ -0,0 +1,29 @@
+; VAX ELF options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+mno-asm-pic
+Target RejectNegative
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/vax/linux.h b/gcc-4.9/gcc/config/vax/linux.h
new file mode 100644
index 000000000..c31fbe2ed
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/linux.h
@@ -0,0 +1,51 @@
+/* Definitions for VAX running Linux-based GNU systems with ELF format.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+/* We use GAS, G-float double and want new DI patterns.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_QMATH | MASK_G_FLOAT)
+
+/* Use standard names for udiv and umod libgcc calls.  */
+#undef TARGET_BSD_DIVMOD
+#define TARGET_BSD_DIVMOD 0
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC:-k}"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%(endian_spec) \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker /lib/ld.so.1} \
+    %{static:-static}}"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
diff --git a/gcc-4.9/gcc/config/vax/netbsd-elf.h b/gcc-4.9/gcc/config/vax/netbsd-elf.h
new file mode 100644
index 000000000..d2eaeb8e9
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/netbsd-elf.h
@@ -0,0 +1,68 @@
+/* Definitions of target machine for GNU compiler,
+   for NetBSD/vax ELF systems.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Names to predefine in the preprocessor for this target OS.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#ifndef NETBSD_CC1_AND_CC1PLUS_SPEC
+#define NETBSD_CC1_AND_CC1PLUS_SPEC ""
+#endif
+
+#undef CC1_SPEC
+#define CC1_SPEC NETBSD_CC1_AND_CC1PLUS_SPEC VAX_CC1_AND_CC1PLUS_SPEC
+
+#undef CC1PLUS_SPEC
+#define CC1PLUS_SPEC NETBSD_CC1_AND_CC1PLUS_SPEC VAX_CC1_AND_CC1PLUS_SPEC
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#undef LINK_SPEC
+#if 0
+/* FIXME: We must link all executables statically until PIC support
+   is added to the compiler.  */
+#define LINK_SPEC \
+  "%{assert*} %{R*} %{rpath*} \
+   %{shared:%ethe -shared option is not currently supported for VAX ELF} \
+   %{!shared: \
+     -dc -dp \
+     %{!nostdlib: \
+       %{!r: \
+	 %{!e*:-e %(netbsd_entry_point)}}} \
+     %{!static:-static} \
+     %{static:-static}}"
+#else
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+#endif
+
+#define EXTRA_SPECS				\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+/* We use gas, not the UNIX assembler.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_QMATH
diff --git a/gcc-4.9/gcc/config/vax/openbsd.h b/gcc-4.9/gcc/config/vax/openbsd.h
new file mode 100644
index 000000000..4ead0edbd
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/openbsd.h
@@ -0,0 +1,50 @@
+/* Configuration fragment for a VAX OpenBSD target.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Amend common OpenBSD definitions for VAX target.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__unix__");		\
+      builtin_define ("__OpenBSD__");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=OpenBSD");	\
+    }						\
+  while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h>  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+#define TARGET_HAVE_NAMED_SECTIONS false
diff --git a/gcc-4.9/gcc/config/vax/openbsd1.h b/gcc-4.9/gcc/config/vax/openbsd1.h
new file mode 100644
index 000000000..66c46b5e1
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/openbsd1.h
@@ -0,0 +1,22 @@
+/* Configuration fragment for a VAX OpenBSD target.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Set up definitions before picking up the common openbsd.h file.  */
+#define OBSD_OLD_GAS
+#define OBSD_NO_DYNAMIC_LIBRARIES
diff --git a/gcc-4.9/gcc/config/vax/predicates.md b/gcc-4.9/gcc/config/vax/predicates.md
new file mode 100644
index 000000000..0820c9d3c
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/predicates.md
@@ -0,0 +1,111 @@
+;; Predicate definitions for DEC VAX.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Special case of a symbolic operand that's used as a
+;; operand.
+
+(define_predicate "symbolic_operand"
+  (match_code "const,symbol_ref,label_ref"))
+
+(define_predicate "local_symbolic_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+  if (GET_CODE (op) == SYMBOL_REF)
+    return !flag_pic || SYMBOL_REF_LOCAL_P (op);
+  if (GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF)
+    return 1;
+  return !flag_pic || SYMBOL_REF_LOCAL_P (XEXP (XEXP (op, 0), 0));
+})
+
+(define_predicate "external_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (not (match_operand 0 "local_symbolic_operand" ""))))
+
+(define_predicate "external_const_operand"
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+		    && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (op, 0), 0))")))
+
+(define_predicate "nonsymbolic_operand"
+  (and (ior (match_test "!flag_pic")
+	    (not (match_operand 0 "symbolic_operand")))
+       (match_operand 0 "general_operand" "")))
+
+(define_predicate "external_memory_operand"
+   (match_code "mem")
+{
+  rtx addr = XEXP (op, 0);
+  if (MEM_P (addr))
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS)
+    addr = XEXP (addr, 1);
+  if (MEM_P (addr))
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS)
+    addr = XEXP (addr, 1);
+  return external_symbolic_operand (addr, SImode)
+	 || external_const_operand (addr, SImode);
+})
+
+(define_predicate "indirect_memory_operand"
+   (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (MEM_P (op))
+    return 1;
+  if (GET_CODE (op) == PLUS)
+    op = XEXP (op, 1);
+  return MEM_P (op);
+})
+
+(define_predicate "indexed_memory_operand"
+   (match_code "mem")
+{
+  rtx addr = XEXP (op, 0);
+  return GET_CODE (addr) != PRE_DEC && GET_CODE (addr) != POST_INC
+	 && mode_dependent_address_p (addr, MEM_ADDR_SPACE (op));
+})
+
+(define_predicate "illegal_blk_memory_operand"
+   (and (match_code "mem")
+	(ior (and (match_test "flag_pic")
+		  (match_operand 0 "external_memory_operand" ""))
+	     (ior (match_operand 0 "indexed_memory_operand" "")
+		  (ior (match_operand 0 "indirect_memory_operand" "")
+		       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC"))))))
+
+(define_predicate "illegal_addsub_di_memory_operand"
+   (and (match_code "mem")
+	(ior (and (match_test "flag_pic")
+		  (match_operand 0 "external_memory_operand" ""))
+	     (ior (match_operand 0 "indexed_memory_operand" "")
+		  (ior (match_operand 0 "indirect_memory_operand" "")
+		       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC"))))))
+
+(define_predicate "nonimmediate_addsub_di_operand"
+   (and (match_code "subreg,reg,mem")
+	(and (match_operand:DI 0 "nonimmediate_operand" "")
+	     (not (match_operand:DI 0 "illegal_addsub_di_memory_operand")))))
+
+(define_predicate "general_addsub_di_operand"
+   (and (match_code "const_int,const_double,subreg,reg,mem")
+	(and (match_operand:DI 0 "general_operand" "")
+	     (not (match_operand:DI 0 "illegal_addsub_di_memory_operand")))))
diff --git a/gcc-4.9/gcc/config/vax/vax-modes.def b/gcc-4.9/gcc/config/vax/vax-modes.def
new file mode 100644
index 000000000..824f70096
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/vax-modes.def
@@ -0,0 +1,22 @@
+/* VAX extra machine modes.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We just need to reset the floating point formats.  */
+RESET_FLOAT_FORMAT (SF, vax_f_format);
+RESET_FLOAT_FORMAT (DF, vax_d_format);
diff --git a/gcc-4.9/gcc/config/vax/vax-protos.h b/gcc-4.9/gcc/config/vax/vax-protos.h
new file mode 100644
index 000000000..4e978b99b
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/vax-protos.h
@@ -0,0 +1,39 @@
+/* Definitions of target machine for GNU compiler.  VAX version.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern bool legitimate_constant_address_p (rtx);
+extern void vax_expand_prologue (void);
+
+#ifdef RTX_CODE
+extern const char *cond_name (rtx);
+extern bool adjacent_operands_p (rtx, rtx, enum machine_mode);
+extern const char *rev_cond_name (rtx);
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern void vax_notice_update_cc (rtx, rtx);
+extern void vax_expand_addsub_di_operands (rtx *, enum rtx_code);
+extern const char * vax_output_int_move (rtx, rtx *, enum machine_mode);
+extern const char * vax_output_int_add (rtx, rtx *, enum machine_mode);
+extern const char * vax_output_int_subtract (rtx, rtx *, enum machine_mode);
+extern const char * vax_output_movmemsi (rtx, rtx *);
+#endif /* RTX_CODE */
+
+#ifdef REAL_VALUE_TYPE
+extern int check_float_value (enum machine_mode, REAL_VALUE_TYPE *, int);
+#endif /* REAL_VALUE_TYPE */
diff --git a/gcc-4.9/gcc/config/vax/vax.c b/gcc-4.9/gcc/config/vax/vax.c
new file mode 100644
index 000000000..2b152fdb4
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/vax.c
@@ -0,0 +1,2177 @@
+/* Subroutines for insn-output.c for VAX.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "df.h"
+#include "tree.h"
+#include "calls.h"
+#include "varasm.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "function.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "flags.h"
+#include "debug.h"
+#include "diagnostic-core.h"
+#include "reload.h"
+#include "tm-preds.h"
+#include "tm-constrs.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+
+static void vax_option_override (void);
+static bool vax_legitimate_address_p (enum machine_mode, rtx, bool);
+static void vax_file_start (void);
+static void vax_init_libfuncs (void);
+static void vax_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				 HOST_WIDE_INT, tree);
+static int vax_address_cost_1 (rtx);
+static int vax_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static bool vax_rtx_costs (rtx, int, int, int, int *, bool);
+static rtx vax_function_arg (cumulative_args_t, enum machine_mode,
+			     const_tree, bool);
+static void vax_function_arg_advance (cumulative_args_t, enum machine_mode,
+				      const_tree, bool);
+static rtx vax_struct_value_rtx (tree, int);
+static rtx vax_builtin_setjmp_frame_value (void);
+static void vax_asm_trampoline_template (FILE *);
+static void vax_trampoline_init (rtx, tree, rtx);
+static int vax_return_pops_args (tree, tree, int);
+static bool vax_mode_dependent_address_p (const_rtx, addr_space_t);
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START vax_file_start
+#undef TARGET_ASM_FILE_START_APP_OFF
+#define TARGET_ASM_FILE_START_APP_OFF true
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS vax_init_libfuncs
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK vax_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS vax_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST vax_address_cost
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG vax_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE vax_function_arg_advance
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX vax_struct_value_rtx
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE vax_builtin_setjmp_frame_value
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P vax_legitimate_address_p
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P vax_mode_dependent_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED hook_bool_void_true
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE vax_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT vax_trampoline_init
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS vax_return_pops_args
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE vax_option_override
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Set global variables as needed for the options enabled.  */
+
+static void
+vax_option_override (void)
+{
+  /* We're VAX floating point, not IEEE floating point.  */
+  if (TARGET_G_FLOAT)
+    REAL_MODE_FORMAT (DFmode) = &vax_g_format;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+}
+
+static void
+vax_add_reg_cfa_offset (rtx insn, int offset, rtx src)
+{
+  rtx x;
+
+  x = plus_constant (Pmode, frame_pointer_rtx, offset);
+  x = gen_rtx_MEM (SImode, x);
+  x = gen_rtx_SET (VOIDmode, x, src);
+  add_reg_note (insn, REG_CFA_OFFSET, x);
+}
+
+/* Generate the assembly code for function entry.  FILE is a stdio
+   stream to output the code to.  SIZE is an int: how many units of
+   temporary storage to allocate.
+
+   Refer to the array `regs_ever_live' to determine which registers to
+   save; `regs_ever_live[I]' is nonzero if register number I is ever
+   used in the function.  This function is responsible for knowing
+   which registers should not be saved even if used.  */
+
+void
+vax_expand_prologue (void)
+{
+  int regno, offset;
+  int mask = 0;
+  HOST_WIDE_INT size;
+  rtx insn;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+      mask |= 1 << regno;
+
+  insn = emit_insn (gen_procedure_entry_mask (GEN_INT (mask)));
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  /* The layout of the CALLG/S stack frame is follows:
+
+		<- CFA, AP
+	r11
+	r10
+	...	Registers saved as specified by MASK
+	r3
+	r2
+	return-addr
+	old fp
+	old ap
+	old psw
+	zero
+		<- FP, SP
+
+     The rest of the prologue will adjust the SP for the local frame.  */
+
+  vax_add_reg_cfa_offset (insn, 4, arg_pointer_rtx);
+  vax_add_reg_cfa_offset (insn, 8, frame_pointer_rtx);
+  vax_add_reg_cfa_offset (insn, 12, pc_rtx);
+
+  offset = 16;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (mask & (1 << regno))
+      {
+	vax_add_reg_cfa_offset (insn, offset, gen_rtx_REG (SImode, regno));
+	offset += 4;
+      }
+
+  /* Because add_reg_note pushes the notes, adding this last means that
+     it will be processed first.  This is required to allow the other
+     notes be interpreted properly.  */
+  add_reg_note (insn, REG_CFA_DEF_CFA,
+		plus_constant (Pmode, frame_pointer_rtx, offset));
+
+  /* Allocate the local stack frame.  */
+  size = get_frame_size ();
+  size -= STARTING_FRAME_OFFSET;
+  emit_insn (gen_addsi3 (stack_pointer_rtx,
+			 stack_pointer_rtx, GEN_INT (-size)));
+
+  /* Do not allow instructions referencing local stack memory to be
+     scheduled before the frame is allocated.  This is more pedantic
+     than anything else, given that VAX does not currently have a
+     scheduling description.  */
+  emit_insn (gen_blockage ());
+}
+
+/* When debugging with stabs, we want to output an extra dummy label
+   so that gas can distinguish between D_float and G_float prior to
+   processing the .stabs directive identifying type double.  */
+static void
+vax_file_start (void)
+{
+  default_file_start ();
+
+  if (write_symbols == DBX_DEBUG)
+    fprintf (asm_out_file, "___vax_%c_doubles:\n", ASM_DOUBLE_CHAR);
+}
+
+/* We can use the BSD C library routines for the libgcc calls that are
+   still generated, since that's what they boil down to anyways.  When
+   ELF, avoid the user's namespace.  */
+
+static void
+vax_init_libfuncs (void)
+{
+  if (TARGET_BSD_DIVMOD)
+    {
+      set_optab_libfunc (udiv_optab, SImode, TARGET_ELF ? "*__udiv" : "*udiv");
+      set_optab_libfunc (umod_optab, SImode, TARGET_ELF ? "*__urem" : "*urem");
+    }
+}
+
+/* This is like nonimmediate_operand with a restriction on the type of MEM.  */
+
+static void
+split_quadword_operands (rtx insn, enum rtx_code code, rtx * operands,
+			 rtx * low, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    low[i] = 0;
+
+  for (i = 0; i < n; i++)
+    {
+      if (MEM_P (operands[i])
+	  && (GET_CODE (XEXP (operands[i], 0)) == PRE_DEC
+	      || GET_CODE (XEXP (operands[i], 0)) == POST_INC))
+	{
+	  rtx addr = XEXP (operands[i], 0);
+	  operands[i] = low[i] = gen_rtx_MEM (SImode, addr);
+	}
+      else if (optimize_size && MEM_P (operands[i])
+	       && REG_P (XEXP (operands[i], 0))
+	       && (code != MINUS || operands[1] != const0_rtx)
+	       && find_regno_note (insn, REG_DEAD,
+				   REGNO (XEXP (operands[i], 0))))
+	{
+	  low[i] = gen_rtx_MEM (SImode,
+				gen_rtx_POST_INC (Pmode,
+						  XEXP (operands[i], 0)));
+	  operands[i] = gen_rtx_MEM (SImode, XEXP (operands[i], 0));
+	}
+      else
+	{
+	  low[i] = operand_subword (operands[i], 0, 0, DImode);
+	  operands[i] = operand_subword (operands[i], 1, 0, DImode);
+	}
+    }
+}
+
+void
+print_operand_address (FILE * file, rtx addr)
+{
+  rtx orig = addr;
+  rtx reg1, breg, ireg;
+  rtx offset;
+
+ retry:
+  switch (GET_CODE (addr))
+    {
+    case MEM:
+      fprintf (file, "*");
+      addr = XEXP (addr, 0);
+      goto retry;
+
+    case REG:
+      fprintf (file, "(%s)", reg_names[REGNO (addr)]);
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "-(%s)", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (file, "(%s)+", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PLUS:
+      /* There can be either two or three things added here.  One must be a
+	 REG.  One can be either a REG or a MULT of a REG and an appropriate
+	 constant, and the third can only be a constant or a MEM.
+
+	 We get these two or three things and put the constant or MEM in
+	 OFFSET, the MULT or REG in IREG, and the REG in BREG.  If we have
+	 a register and can't tell yet if it is a base or index register,
+	 put it into REG1.  */
+
+      reg1 = 0; ireg = 0; breg = 0; offset = 0;
+
+      if (CONSTANT_ADDRESS_P (XEXP (addr, 0))
+	  || MEM_P (XEXP (addr, 0)))
+	{
+	  offset = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (CONSTANT_ADDRESS_P (XEXP (addr, 1))
+	       || MEM_P (XEXP (addr, 1)))
+	{
+	  offset = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      else if (GET_CODE (XEXP (addr, 1)) == MULT)
+	{
+	  ireg = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      else if (GET_CODE (XEXP (addr, 0)) == MULT)
+	{
+	  ireg = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (REG_P (XEXP (addr, 1)))
+	{
+	  reg1 = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      else if (REG_P (XEXP (addr, 0)))
+	{
+	  reg1 = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else
+	gcc_unreachable ();
+
+      if (REG_P (addr))
+	{
+	  if (reg1)
+	    ireg = addr;
+	  else
+	    reg1 = addr;
+	}
+      else if (GET_CODE (addr) == MULT)
+	ireg = addr;
+      else
+	{
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	  if (CONSTANT_ADDRESS_P (XEXP (addr, 0))
+	      || MEM_P (XEXP (addr, 0)))
+	    {
+	      if (offset)
+		{
+		  if (CONST_INT_P (offset))
+		    offset = plus_constant (Pmode, XEXP (addr, 0),
+					    INTVAL (offset));
+		  else
+		    {
+		      gcc_assert (CONST_INT_P (XEXP (addr, 0)));
+		      offset = plus_constant (Pmode, offset,
+					      INTVAL (XEXP (addr, 0)));
+		    }
+		}
+	      offset = XEXP (addr, 0);
+	    }
+	  else if (REG_P (XEXP (addr, 0)))
+	    {
+	      if (reg1)
+		ireg = reg1, breg = XEXP (addr, 0), reg1 = 0;
+	      else
+		reg1 = XEXP (addr, 0);
+	    }
+	  else
+	    {
+	      gcc_assert (GET_CODE (XEXP (addr, 0)) == MULT);
+	      gcc_assert (!ireg);
+	      ireg = XEXP (addr, 0);
+	    }
+
+	  if (CONSTANT_ADDRESS_P (XEXP (addr, 1))
+	      || MEM_P (XEXP (addr, 1)))
+	    {
+	      if (offset)
+		{
+		  if (CONST_INT_P (offset))
+		    offset = plus_constant (Pmode, XEXP (addr, 1),
+					    INTVAL (offset));
+		  else
+		    {
+		      gcc_assert (CONST_INT_P (XEXP (addr, 1)));
+		      offset = plus_constant (Pmode, offset,
+					      INTVAL (XEXP (addr, 1)));
+		    }
+		}
+	      offset = XEXP (addr, 1);
+	    }
+	  else if (REG_P (XEXP (addr, 1)))
+	    {
+	      if (reg1)
+		ireg = reg1, breg = XEXP (addr, 1), reg1 = 0;
+	      else
+		reg1 = XEXP (addr, 1);
+	    }
+	  else
+	    {
+	      gcc_assert (GET_CODE (XEXP (addr, 1)) == MULT);
+	      gcc_assert (!ireg);
+	      ireg = XEXP (addr, 1);
+	    }
+	}
+
+      /* If REG1 is nonzero, figure out if it is a base or index register.  */
+      if (reg1)
+	{
+	  if (breg
+	      || (flag_pic && GET_CODE (addr) == SYMBOL_REF)
+	      || (offset
+		  && (MEM_P (offset)
+		      || (flag_pic && symbolic_operand (offset, SImode)))))
+	    {
+	      gcc_assert (!ireg);
+	      ireg = reg1;
+	    }
+	  else
+	    breg = reg1;
+	}
+
+      if (offset != 0)
+	{
+	  if (flag_pic && symbolic_operand (offset, SImode))
+	    {
+	      if (breg && ireg)
+		{
+		  debug_rtx (orig);
+		  output_operand_lossage ("symbol used with both base and indexed registers");
+		}
+
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+	      if (flag_pic > 1 && GET_CODE (offset) == CONST
+		  && GET_CODE (XEXP (XEXP (offset, 0), 0)) == SYMBOL_REF
+		  && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (offset, 0), 0)))
+		{
+		  debug_rtx (orig);
+		  output_operand_lossage ("symbol with offset used in PIC mode");
+		}
+#endif
+
+	      /* symbol(reg) isn't PIC, but symbol[reg] is.  */
+	      if (breg)
+		{
+		  ireg = breg;
+		  breg = 0;
+		}
+
+	    }
+
+	  output_address (offset);
+	}
+
+      if (breg != 0)
+	fprintf (file, "(%s)", reg_names[REGNO (breg)]);
+
+      if (ireg != 0)
+	{
+	  if (GET_CODE (ireg) == MULT)
+	    ireg = XEXP (ireg, 0);
+	  gcc_assert (REG_P (ireg));
+	  fprintf (file, "[%s]", reg_names[REGNO (ireg)]);
+	}
+      break;
+
+    default:
+      output_addr_const (file, addr);
+    }
+}
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  if (code == '#')
+    fputc (ASM_DOUBLE_CHAR, file);
+  else if (code == '|')
+    fputs (REGISTER_PREFIX, file);
+  else if (code == 'c')
+    fputs (cond_name (x), file);
+  else if (code == 'C')
+    fputs (rev_cond_name (x), file);
+  else if (code == 'D' && CONST_INT_P (x) && INTVAL (x) < 0)
+    fprintf (file, "$" NEG_HWI_PRINT_HEX16, INTVAL (x));
+  else if (code == 'P' && CONST_INT_P (x))
+    fprintf (file, "$" HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + 1);
+  else if (code == 'N' && CONST_INT_P (x))
+    fprintf (file, "$" HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
+  /* rotl instruction cannot deal with negative arguments.  */
+  else if (code == 'R' && CONST_INT_P (x))
+    fprintf (file, "$" HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
+  else if (code == 'H' && CONST_INT_P (x))
+    fprintf (file, "$%d", (int) (0xffff & ~ INTVAL (x)));
+  else if (code == 'h' && CONST_INT_P (x))
+    fprintf (file, "$%d", (short) - INTVAL (x));
+  else if (code == 'B' && CONST_INT_P (x))
+    fprintf (file, "$%d", (int) (0xff & ~ INTVAL (x)));
+  else if (code == 'b' && CONST_INT_P (x))
+    fprintf (file, "$%d", (int) (0xff & - INTVAL (x)));
+  else if (code == 'M' && CONST_INT_P (x))
+    fprintf (file, "$%d", ~((1 << INTVAL (x)) - 1));
+  else if (code == 'x' && CONST_INT_P (x))
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+  else if (REG_P (x))
+    fprintf (file, "%s", reg_names[REGNO (x)]);
+  else if (MEM_P (x))
+    output_address (XEXP (x, 0));
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+    {
+      char dstr[30];
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x),
+		       sizeof (dstr), 0, 1);
+      fprintf (file, "$0f%s", dstr);
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+    {
+      char dstr[30];
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x),
+		       sizeof (dstr), 0, 1);
+      fprintf (file, "$0%c%s", ASM_DOUBLE_CHAR, dstr);
+    }
+  else
+    {
+      if (flag_pic > 1 && symbolic_operand (x, SImode))
+	{
+	  debug_rtx (x);
+	  output_operand_lossage ("symbol used as immediate operand");
+	}
+      putc ('$', file);
+      output_addr_const (file, x);
+    }
+}
+
+const char *
+cond_name (rtx op)
+{
+  switch (GET_CODE (op))
+    {
+    case NE:
+      return "neq";
+    case EQ:
+      return "eql";
+    case GE:
+      return "geq";
+    case GT:
+      return "gtr";
+    case LE:
+      return "leq";
+    case LT:
+      return "lss";
+    case GEU:
+      return "gequ";
+    case GTU:
+      return "gtru";
+    case LEU:
+      return "lequ";
+    case LTU:
+      return "lssu";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+rev_cond_name (rtx op)
+{
+  switch (GET_CODE (op))
+    {
+    case EQ:
+      return "neq";
+    case NE:
+      return "eql";
+    case LT:
+      return "geq";
+    case LE:
+      return "gtr";
+    case GT:
+      return "leq";
+    case GE:
+      return "lss";
+    case LTU:
+      return "gequ";
+    case LEU:
+      return "gtru";
+    case GTU:
+      return "lequ";
+    case GEU:
+      return "lssu";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static bool
+vax_float_literal (rtx c)
+{
+  enum machine_mode mode;
+  REAL_VALUE_TYPE r, s;
+  int i;
+
+  if (GET_CODE (c) != CONST_DOUBLE)
+    return false;
+
+  mode = GET_MODE (c);
+
+  if (c == const_tiny_rtx[(int) mode][0]
+      || c == const_tiny_rtx[(int) mode][1]
+      || c == const_tiny_rtx[(int) mode][2])
+    return true;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+
+  for (i = 0; i < 7; i++)
+    {
+      int x = 1 << i;
+      bool ok;
+      REAL_VALUE_FROM_INT (s, x, 0, mode);
+
+      if (REAL_VALUES_EQUAL (r, s))
+	return true;
+      ok = exact_real_inverse (mode, &s);
+      gcc_assert (ok);
+      if (REAL_VALUES_EQUAL (r, s))
+	return true;
+    }
+  return false;
+}
+
+
+/* Return the cost in cycles of a memory address, relative to register
+   indirect.
+
+   Each of the following adds the indicated number of cycles:
+
+   1 - symbolic address
+   1 - pre-decrement
+   1 - indexing and/or offset(register)
+   2 - indirect */
+
+
+static int
+vax_address_cost_1 (rtx addr)
+{
+  int reg = 0, indexed = 0, indir = 0, offset = 0, predec = 0;
+  rtx plus_op0 = 0, plus_op1 = 0;
+ restart:
+  switch (GET_CODE (addr))
+    {
+    case PRE_DEC:
+      predec = 1;
+    case REG:
+    case SUBREG:
+    case POST_INC:
+      reg = 1;
+      break;
+    case MULT:
+      indexed = 1;	/* 2 on VAX 2 */
+      break;
+    case CONST_INT:
+      /* byte offsets cost nothing (on a VAX 2, they cost 1 cycle) */
+      if (offset == 0)
+	offset = (unsigned HOST_WIDE_INT)(INTVAL(addr)+128) > 256;
+      break;
+    case CONST:
+    case SYMBOL_REF:
+      offset = 1;	/* 2 on VAX 2 */
+      break;
+    case LABEL_REF:	/* this is probably a byte offset from the pc */
+      if (offset == 0)
+	offset = 1;
+      break;
+    case PLUS:
+      if (plus_op0)
+	plus_op1 = XEXP (addr, 0);
+      else
+	plus_op0 = XEXP (addr, 0);
+      addr = XEXP (addr, 1);
+      goto restart;
+    case MEM:
+      indir = 2;	/* 3 on VAX 2 */
+      addr = XEXP (addr, 0);
+      goto restart;
+    default:
+      break;
+    }
+
+  /* Up to 3 things can be added in an address.  They are stored in
+     plus_op0, plus_op1, and addr.  */
+
+  if (plus_op0)
+    {
+      addr = plus_op0;
+      plus_op0 = 0;
+      goto restart;
+    }
+  if (plus_op1)
+    {
+      addr = plus_op1;
+      plus_op1 = 0;
+      goto restart;
+    }
+  /* Indexing and register+offset can both be used (except on a VAX 2)
+     without increasing execution time over either one alone.  */
+  if (reg && indexed && offset)
+    return reg + indir + offset + predec;
+  return reg + indexed + indir + offset + predec;
+}
+
+static int
+vax_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+		  addr_space_t as ATTRIBUTE_UNUSED,
+		  bool speed ATTRIBUTE_UNUSED)
+{
+  return (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x)));
+}
+
+/* Cost of an expression on a VAX.  This version has costs tuned for the
+   CVAX chip (found in the VAX 3 series) with comments for variations on
+   other models.
+
+   FIXME: The costs need review, particularly for TRUNCATE, FLOAT_EXTEND
+   and FLOAT_TRUNCATE.  We need a -mcpu option to allow provision of
+   costs on a per cpu basis.  */
+
+static bool
+vax_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+	       int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int i = 0;				   /* may be modified in switch */
+  const char *fmt = GET_RTX_FORMAT (code); /* may be modified in switch */
+
+  switch (code)
+    {
+      /* On a VAX, constants from 0..63 are cheap because they can use the
+	 1 byte literal constant format.  Compare to -1 should be made cheap
+	 so that decrement-and-branch insns can be formed more easily (if
+	 the value -1 is copied to a register some decrement-and-branch
+	 patterns will not match).  */
+    case CONST_INT:
+      if (INTVAL (x) == 0)
+	{
+	  *total = 0;
+	  return true;
+	}
+      if (outer_code == AND)
+	{
+	  *total = ((unsigned HOST_WIDE_INT) ~INTVAL (x) <= 077) ? 1 : 2;
+	  return true;
+	}
+      if ((unsigned HOST_WIDE_INT) INTVAL (x) <= 077
+	  || (outer_code == COMPARE
+	      && INTVAL (x) == -1)
+	  || ((outer_code == PLUS || outer_code == MINUS)
+	      && (unsigned HOST_WIDE_INT) -INTVAL (x) <= 077))
+	{
+	  *total = 1;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 3;
+      return true;
+
+    case CONST_DOUBLE:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	*total = vax_float_literal (x) ? 5 : 8;
+      else
+	*total = ((CONST_DOUBLE_HIGH (x) == 0
+		   && (unsigned HOST_WIDE_INT) CONST_DOUBLE_LOW (x) < 64)
+		  || (outer_code == PLUS
+		      && CONST_DOUBLE_HIGH (x) == -1
+		      && (unsigned HOST_WIDE_INT)-CONST_DOUBLE_LOW (x) < 64))
+		 ? 2 : 5;
+      return true;
+
+    case POST_INC:
+      *total = 2;
+      return true;		/* Implies register operand.  */
+
+    case PRE_DEC:
+      *total = 3;
+      return true;		/* Implies register operand.  */
+
+    case MULT:
+      switch (mode)
+	{
+	case DFmode:
+	  *total = 16;		/* 4 on VAX 9000 */
+	  break;
+	case SFmode:
+	  *total = 9;		/* 4 on VAX 9000, 12 on VAX 2 */
+	  break;
+	case DImode:
+	  *total = 16;		/* 6 on VAX 9000, 28 on VAX 2 */
+	  break;
+	case SImode:
+	case HImode:
+	case QImode:
+	  *total = 10;		/* 3-4 on VAX 9000, 20-28 on VAX 2 */
+	  break;
+	default:
+	  *total = MAX_COST;	/* Mode is not supported.  */
+	  return true;
+	}
+      break;
+
+    case UDIV:
+      if (mode != SImode)
+	{
+	  *total = MAX_COST;	/* Mode is not supported.  */
+	  return true;
+	}
+      *total = 17;
+      break;
+
+    case DIV:
+      if (mode == DImode)
+	*total = 30;		/* Highly variable.  */
+      else if (mode == DFmode)
+	/* divide takes 28 cycles if the result is not zero, 13 otherwise */
+	*total = 24;
+      else
+	*total = 11;		/* 25 on VAX 2 */
+      break;
+
+    case MOD:
+      *total = 23;
+      break;
+
+    case UMOD:
+      if (mode != SImode)
+	{
+	  *total = MAX_COST;	/* Mode is not supported.  */
+	  return true;
+	}
+      *total = 29;
+      break;
+
+    case FLOAT:
+      *total = (6		/* 4 on VAX 9000 */
+		+ (mode == DFmode) + (GET_MODE (XEXP (x, 0)) != SImode));
+      break;
+
+    case FIX:
+      *total = 7;		/* 17 on VAX 2 */
+      break;
+
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (mode == DImode)
+	*total = 12;
+      else
+	*total = 10;		/* 6 on VAX 9000 */
+      break;
+
+    case ROTATE:
+    case ROTATERT:
+      *total = 6;		/* 5 on VAX 2, 4 on VAX 9000 */
+      if (CONST_INT_P (XEXP (x, 1)))
+	fmt = "e"; 		/* all constant rotate counts are short */
+      break;
+
+    case PLUS:
+    case MINUS:
+      *total = (mode == DFmode) ? 13 : 8; /* 6/8 on VAX 9000, 16/15 on VAX 2 */
+      /* Small integer operands can use subl2 and addl2.  */
+      if ((CONST_INT_P (XEXP (x, 1)))
+	  && (unsigned HOST_WIDE_INT)(INTVAL (XEXP (x, 1)) + 63) < 127)
+	fmt = "e";
+      break;
+
+    case IOR:
+    case XOR:
+      *total = 3;
+      break;
+
+    case AND:
+      /* AND is special because the first operand is complemented.  */
+      *total = 3;
+      if (CONST_INT_P (XEXP (x, 0)))
+	{
+	  if ((unsigned HOST_WIDE_INT)~INTVAL (XEXP (x, 0)) > 63)
+	    *total = 4;
+	  fmt = "e";
+	  i = 1;
+	}
+      break;
+
+    case NEG:
+      if (mode == DFmode)
+	*total = 9;
+      else if (mode == SFmode)
+	*total = 6;
+      else if (mode == DImode)
+	*total = 4;
+      else
+	*total = 2;
+      break;
+
+    case NOT:
+      *total = 2;
+      break;
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      *total = 15;
+      break;
+
+    case MEM:
+      if (mode == DImode || mode == DFmode)
+	*total = 5;		/* 7 on VAX 2 */
+      else
+	*total = 3;		/* 4 on VAX 2 */
+      x = XEXP (x, 0);
+      if (!REG_P (x) && GET_CODE (x) != POST_INC)
+	*total += vax_address_cost_1 (x);
+      return true;
+
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+    case TRUNCATE:
+      *total = 3;		/* FIXME: Costs need to be checked  */
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Now look inside the expression.  Operands which are not registers or
+     short constants add to the cost.
+
+     FMT and I may have been adjusted in the switch above for instructions
+     which require special handling.  */
+
+  while (*fmt++ == 'e')
+    {
+      rtx op = XEXP (x, i);
+
+      i += 1;
+      code = GET_CODE (op);
+
+      /* A NOT is likely to be found as the first operand of an AND
+	 (in which case the relevant cost is of the operand inside
+	 the not) and not likely to be found anywhere else.  */
+      if (code == NOT)
+	op = XEXP (op, 0), code = GET_CODE (op);
+
+      switch (code)
+	{
+	case CONST_INT:
+	  if ((unsigned HOST_WIDE_INT)INTVAL (op) > 63
+	      && GET_MODE (x) != QImode)
+	    *total += 1;	/* 2 on VAX 2 */
+	  break;
+	case CONST:
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  *total += 1;		/* 2 on VAX 2 */
+	  break;
+	case CONST_DOUBLE:
+	  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT)
+	    {
+	      /* Registers are faster than floating point constants -- even
+		 those constants which can be encoded in a single byte.  */
+	      if (vax_float_literal (op))
+		*total += 1;
+	      else
+		*total += (GET_MODE (x) == DFmode) ? 3 : 2;
+	    }
+	  else
+	    {
+	      if (CONST_DOUBLE_HIGH (op) != 0
+		  || (unsigned HOST_WIDE_INT)CONST_DOUBLE_LOW (op) > 63)
+		*total += 2;
+	    }
+	  break;
+	case MEM:
+	  *total += 1;		/* 2 on VAX 2 */
+	  if (!REG_P (XEXP (op, 0)))
+	    *total += vax_address_cost_1 (XEXP (op, 0));
+	  break;
+	case REG:
+	case SUBREG:
+	  break;
+	default:
+	  *total += 1;
+	  break;
+	}
+    }
+  return true;
+}
+
+/* Output code to add DELTA to the first argument, and then jump to FUNCTION.
+   Used for C++ multiple inheritance.
+	.mask	^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>  #conservative entry mask
+	addl2	$DELTA, 4(ap)	#adjust first argument
+	jmp	FUNCTION+2	#jump beyond FUNCTION's entry mask
+*/
+
+static void
+vax_output_mi_thunk (FILE * file,
+		     tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+		     tree function)
+{
+  fprintf (file, "\t.word 0x0ffc\n\taddl2 $" HOST_WIDE_INT_PRINT_DEC, delta);
+  asm_fprintf (file, ",4(%Rap)\n");
+  fprintf (file, "\tjmp ");
+  assemble_name (file,  XSTR (XEXP (DECL_RTL (function), 0), 0));
+  fprintf (file, "+2\n");
+}
+
+static rtx
+vax_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, VAX_STRUCT_VALUE_REGNUM);
+}
+
+static rtx
+vax_builtin_setjmp_frame_value (void)
+{
+  return hard_frame_pointer_rtx;
+}
+
+/* Worker function for NOTICE_UPDATE_CC.  */
+
+void
+vax_notice_update_cc (rtx exp, rtx insn ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (exp) == SET)
+    {
+      if (GET_CODE (SET_SRC (exp)) == CALL)
+	CC_STATUS_INIT;
+      else if (GET_CODE (SET_DEST (exp)) != ZERO_EXTRACT
+	       && GET_CODE (SET_DEST (exp)) != PC)
+	{
+	  cc_status.flags = 0;
+	  /* The integer operations below don't set carry or
+	     set it in an incompatible way.  That's ok though
+	     as the Z bit is all we need when doing unsigned
+	     comparisons on the result of these insns (since
+	     they're always with 0).  Set CC_NO_OVERFLOW to
+	     generate the correct unsigned branches.  */
+	  switch (GET_CODE (SET_SRC (exp)))
+	    {
+	    case NEG:
+	      if (GET_MODE_CLASS (GET_MODE (exp)) == MODE_FLOAT)
+		break;
+	    case AND:
+	    case IOR:
+	    case XOR:
+	    case NOT:
+	    case MEM:
+	    case REG:
+	      cc_status.flags = CC_NO_OVERFLOW;
+	      break;
+	    default:
+	      break;
+	    }
+	  cc_status.value1 = SET_DEST (exp);
+	  cc_status.value2 = SET_SRC (exp);
+	}
+    }
+  else if (GET_CODE (exp) == PARALLEL
+	   && GET_CODE (XVECEXP (exp, 0, 0)) == SET)
+    {
+      if (GET_CODE (SET_SRC (XVECEXP (exp, 0, 0))) == CALL)
+	CC_STATUS_INIT;
+      else if (GET_CODE (SET_DEST (XVECEXP (exp, 0, 0))) != PC)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = SET_DEST (XVECEXP (exp, 0, 0));
+	  cc_status.value2 = SET_SRC (XVECEXP (exp, 0, 0));
+	}
+      else
+	/* PARALLELs whose first element sets the PC are aob,
+	   sob insns.  They do change the cc's.  */
+	CC_STATUS_INIT;
+    }
+  else
+    CC_STATUS_INIT;
+  if (cc_status.value1 && REG_P (cc_status.value1)
+      && cc_status.value2
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+    cc_status.value2 = 0;
+  if (cc_status.value1 && MEM_P (cc_status.value1)
+      && cc_status.value2
+      && MEM_P (cc_status.value2))
+    cc_status.value2 = 0;
+  /* Actual condition, one line up, should be that value2's address
+     depends on value1, but that is too much of a pain.  */
+}
+
+/* Output integer move instructions.  */
+
+const char *
+vax_output_int_move (rtx insn ATTRIBUTE_UNUSED, rtx *operands,
+		     enum machine_mode mode)
+{
+  rtx hi[3], lo[3];
+  const char *pattern_hi, *pattern_lo;
+
+  switch (mode)
+    {
+    case DImode:
+      if (operands[1] == const0_rtx)
+	return "clrq %0";
+      if (TARGET_QMATH && optimize_size
+	  && (CONST_INT_P (operands[1])
+	      || GET_CODE (operands[1]) == CONST_DOUBLE))
+	{
+	  unsigned HOST_WIDE_INT hval, lval;
+	  int n;
+
+	  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	    {
+	      gcc_assert (HOST_BITS_PER_WIDE_INT != 64);
+
+	      /* Make sure only the low 32 bits are valid.  */
+	      lval = CONST_DOUBLE_LOW (operands[1]) & 0xffffffff;
+	      hval = CONST_DOUBLE_HIGH (operands[1]) & 0xffffffff;
+	    }
+	  else
+	    {
+	      lval = INTVAL (operands[1]);
+	      hval = 0;
+	    }
+
+	  /* Here we see if we are trying to see if the 64bit value is really
+	     a 6bit shifted some arbitrary amount.  If so, we can use ashq to
+	     shift it to the correct value saving 7 bytes (1 addr-mode-byte +
+	     8 bytes - 1 shift byte - 1 short literal byte.  */
+	  if (lval != 0
+	      && (n = exact_log2 (lval & (- lval))) != -1
+	      && (lval >> n) < 64)
+	    {
+	      lval >>= n;
+
+	      /* On 32bit platforms, if the 6bits didn't overflow into the
+		 upper 32bit value that value better be 0.  If we have
+		 overflowed, make sure it wasn't too much.  */
+	      if (HOST_BITS_PER_WIDE_INT == 32 && hval != 0)
+		{
+		  if (n <= 26 || hval >= ((unsigned)1 << (n - 26)))
+		    n = 0;	/* failure */
+		  else
+		    lval |= hval << (32 - n);
+		}
+	      /*  If n is 0, then ashq is not the best way to emit this.  */
+	      if (n > 0)
+		{
+		  operands[1] = GEN_INT (lval);
+		  operands[2] = GEN_INT (n);
+		  return "ashq %2,%D1,%0";
+		}
+#if HOST_BITS_PER_WIDE_INT == 32
+	    }
+	  /* On 32bit platforms, if the low 32bit value is 0, checkout the
+	     upper 32bit value.  */
+	  else if (hval != 0
+		   && (n = exact_log2 (hval & (- hval)) - 1) != -1
+		   && (hval >> n) < 64)
+	    {
+	      operands[1] = GEN_INT (hval >> n);
+	      operands[2] = GEN_INT (n + 32);
+	      return "ashq %2,%D1,%0";
+#endif
+	    }
+	}
+
+      if (TARGET_QMATH
+	  && (!MEM_P (operands[0])
+	      || GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	      || GET_CODE (XEXP (operands[0], 0)) == POST_INC
+	      || !illegal_addsub_di_memory_operand (operands[0], DImode))
+	  && ((CONST_INT_P (operands[1])
+	       && (unsigned HOST_WIDE_INT) INTVAL (operands[1]) >= 64)
+	      || GET_CODE (operands[1]) == CONST_DOUBLE))
+	{
+	  hi[0] = operands[0];
+	  hi[1] = operands[1];
+
+	  split_quadword_operands (insn, SET, hi, lo, 2);
+
+	  pattern_lo = vax_output_int_move (NULL, lo, SImode);
+	  pattern_hi = vax_output_int_move (NULL, hi, SImode);
+
+	  /* The patterns are just movl/movl or pushl/pushl then a movq will
+	     be shorter (1 opcode byte + 1 addrmode byte + 8 immediate value
+	     bytes .vs. 2 opcode bytes + 2 addrmode bytes + 8 immediate value
+	     value bytes.  */
+	  if ((!strncmp (pattern_lo, "movl", 4)
+	      && !strncmp (pattern_hi, "movl", 4))
+	      || (!strncmp (pattern_lo, "pushl", 5)
+		  && !strncmp (pattern_hi, "pushl", 5)))
+	    return "movq %1,%0";
+
+	  if (MEM_P (operands[0])
+	      && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+	    {
+	      output_asm_insn (pattern_hi, hi);
+	      operands[0] = lo[0];
+	      operands[1] = lo[1];
+	      operands[2] = lo[2];
+	      return pattern_lo;
+	    }
+	  else
+	    {
+	      output_asm_insn (pattern_lo, lo);
+	      operands[0] = hi[0];
+	      operands[1] = hi[1];
+	      operands[2] = hi[2];
+	      return pattern_hi;
+	    }
+	}
+      return "movq %1,%0";
+
+    case SImode:
+      if (symbolic_operand (operands[1], SImode))
+	{
+	  if (push_operand (operands[0], SImode))
+	    return "pushab %a1";
+	  return "movab %a1,%0";
+	}
+
+      if (operands[1] == const0_rtx)
+	{
+	  if (push_operand (operands[1], SImode))
+	    return "pushl %1";
+	  return "clrl %0";
+	}
+
+      if (CONST_INT_P (operands[1])
+	  && (unsigned HOST_WIDE_INT) INTVAL (operands[1]) >= 64)
+	{
+	  HOST_WIDE_INT i = INTVAL (operands[1]);
+	  int n;
+	  if ((unsigned HOST_WIDE_INT)(~i) < 64)
+	    return "mcoml %N1,%0";
+	  if ((unsigned HOST_WIDE_INT)i < 0x100)
+	    return "movzbl %1,%0";
+	  if (i >= -0x80 && i < 0)
+	    return "cvtbl %1,%0";
+	  if (optimize_size
+	      && (n = exact_log2 (i & (-i))) != -1
+	      && ((unsigned HOST_WIDE_INT)i >> n) < 64)
+	    {
+	      operands[1] = GEN_INT ((unsigned HOST_WIDE_INT)i >> n);
+	      operands[2] = GEN_INT (n);
+	      return "ashl %2,%1,%0";
+	    }
+	  if ((unsigned HOST_WIDE_INT)i < 0x10000)
+	    return "movzwl %1,%0";
+	  if (i >= -0x8000 && i < 0)
+	    return "cvtwl %1,%0";
+	}
+      if (push_operand (operands[0], SImode))
+	return "pushl %1";
+      return "movl %1,%0";
+
+    case HImode:
+      if (CONST_INT_P (operands[1]))
+	{
+	  HOST_WIDE_INT i = INTVAL (operands[1]);
+	  if (i == 0)
+	    return "clrw %0";
+	  else if ((unsigned HOST_WIDE_INT)i < 64)
+	    return "movw %1,%0";
+	  else if ((unsigned HOST_WIDE_INT)~i < 64)
+	    return "mcomw %H1,%0";
+	  else if ((unsigned HOST_WIDE_INT)i < 256)
+	    return "movzbw %1,%0";
+	  else if (i >= -0x80 && i < 0)
+	    return "cvtbw %1,%0";
+	}
+      return "movw %1,%0";
+
+    case QImode:
+      if (CONST_INT_P (operands[1]))
+	{
+	  HOST_WIDE_INT i = INTVAL (operands[1]);
+	  if (i == 0)
+	    return "clrb %0";
+	  else if ((unsigned HOST_WIDE_INT)~i < 64)
+	    return "mcomb %B1,%0";
+	}
+      return "movb %1,%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output integer add instructions.
+
+   The space-time-opcode tradeoffs for addition vary by model of VAX.
+
+   On a VAX 3 "movab (r1)[r2],r3" is faster than "addl3 r1,r2,r3",
+   but it not faster on other models.
+
+   "movab #(r1),r2" is usually shorter than "addl3 #,r1,r2", and is
+   faster on a VAX 3, but some VAXen (e.g. VAX 9000) will stall if
+   a register is used in an address too soon after it is set.
+   Compromise by using movab only when it is shorter than the add
+   or the base register in the address is one of sp, ap, and fp,
+   which are not modified very often.  */
+
+const char *
+vax_output_int_add (rtx insn, rtx *operands, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case DImode:
+      {
+	rtx low[3];
+	const char *pattern;
+	int carry = 1;
+	bool sub;
+
+	if (TARGET_QMATH && 0)
+	  debug_rtx (insn);
+
+	split_quadword_operands (insn, PLUS, operands, low, 3);
+
+	if (TARGET_QMATH)
+	  {
+	    gcc_assert (rtx_equal_p (operands[0], operands[1]));
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESSS
+	    gcc_assert (!flag_pic || !external_memory_operand (low[2], SImode));
+	    gcc_assert (!flag_pic || !external_memory_operand (low[0], SImode));
+#endif
+
+	    /* No reason to add a 0 to the low part and thus no carry, so just
+	       emit the appropriate add/sub instruction.  */
+	    if (low[2] == const0_rtx)
+	      return vax_output_int_add (NULL, operands, SImode);
+
+	    /* Are we doing addition or subtraction?  */
+	    sub = CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0;
+
+	    /* We can't use vax_output_int_add since some the patterns don't
+	       modify the carry bit.  */
+	    if (sub)
+	      {
+		if (low[2] == constm1_rtx)
+		  pattern = "decl %0";
+		else
+		  pattern = "subl2 $%n2,%0";
+	      }
+	    else
+	      {
+		if (low[2] == const1_rtx)
+		  pattern = "incl %0";
+		else
+		  pattern = "addl2 %2,%0";
+	      }
+	    output_asm_insn (pattern, low);
+
+	    /* In 2's complement, -n = ~n + 1.  Since we are dealing with
+	       two 32bit parts, we complement each and then add one to
+	       low part.  We know that the low part can't overflow since
+	       it's value can never be 0.  */
+	    if (sub)
+		return "sbwc %N2,%0";
+	    return "adwc %2,%0";
+	  }
+
+	/* Add low parts.  */
+	if (rtx_equal_p (operands[0], operands[1]))
+	  {
+	    if (low[2] == const0_rtx)
+	/* Should examine operand, punt if not POST_INC.  */
+	      pattern = "tstl %0", carry = 0;
+	    else if (low[2] == const1_rtx)
+	      pattern = "incl %0";
+	    else
+	      pattern = "addl2 %2,%0";
+	  }
+	else
+	  {
+	    if (low[2] == const0_rtx)
+	      pattern = "movl %1,%0", carry = 0;
+	    else
+	      pattern = "addl3 %2,%1,%0";
+	  }
+	if (pattern)
+	  output_asm_insn (pattern, low);
+	if (!carry)
+	  /* If CARRY is 0, we don't have any carry value to worry about.  */
+	  return get_insn_template (CODE_FOR_addsi3, insn);
+	/* %0 = C + %1 + %2 */
+	if (!rtx_equal_p (operands[0], operands[1]))
+	  output_asm_insn ((operands[1] == const0_rtx
+			    ? "clrl %0"
+			    : "movl %1,%0"), operands);
+	return "adwc %2,%0";
+      }
+
+    case SImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	{
+	  if (operands[2] == const1_rtx)
+	    return "incl %0";
+	  if (operands[2] == constm1_rtx)
+	    return "decl %0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	    return "subl2 $%n2,%0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 64
+	      && REG_P (operands[1])
+	      && ((INTVAL (operands[2]) < 32767 && INTVAL (operands[2]) > -32768)
+		   || REGNO (operands[1]) > 11))
+	    return "movab %c2(%1),%0";
+	  if (REG_P (operands[0]) && symbolic_operand (operands[2], SImode))
+	    return "movab %a2[%0],%0";
+	  return "addl2 %2,%0";
+	}
+
+      if (rtx_equal_p (operands[0], operands[2]))
+	{
+	  if (REG_P (operands[0]) && symbolic_operand (operands[1], SImode))
+	    return "movab %a1[%0],%0";
+	  return "addl2 %1,%0";
+	}
+
+      if (CONST_INT_P (operands[2])
+	  && INTVAL (operands[2]) < 32767
+	  && INTVAL (operands[2]) > -32768
+	  && REG_P (operands[1])
+	  && push_operand (operands[0], SImode))
+	return "pushab %c2(%1)";
+
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	return "subl3 $%n2,%1,%0";
+
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 64
+	  && REG_P (operands[1])
+	  && ((INTVAL (operands[2]) < 32767 && INTVAL (operands[2]) > -32768)
+	       || REGNO (operands[1]) > 11))
+	return "movab %c2(%1),%0";
+
+      /* Add this if using gcc on a VAX 3xxx:
+      if (REG_P (operands[1]) && REG_P (operands[2]))
+	return "movab (%1)[%2],%0";
+      */
+
+      if (REG_P (operands[1]) && symbolic_operand (operands[2], SImode))
+	{
+	  if (push_operand (operands[0], SImode))
+	    return "pushab %a2[%1]";
+	  return "movab %a2[%1],%0";
+	}
+
+      if (REG_P (operands[2]) && symbolic_operand (operands[1], SImode))
+	{
+	  if (push_operand (operands[0], SImode))
+	    return "pushab %a1[%2]";
+	  return "movab %a1[%2],%0";
+	}
+
+      if (flag_pic && REG_P (operands[0])
+	  && symbolic_operand (operands[2], SImode))
+	return "movab %a2,%0;addl2 %1,%0";
+
+      if (flag_pic
+	  && (symbolic_operand (operands[1], SImode)
+	      || symbolic_operand (operands[1], SImode)))
+	debug_rtx (insn);
+
+      return "addl3 %1,%2,%0";
+
+    case HImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	{
+	  if (operands[2] == const1_rtx)
+	    return "incw %0";
+	  if (operands[2] == constm1_rtx)
+	    return "decw %0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	    return "subw2 $%n2,%0";
+	  return "addw2 %2,%0";
+	}
+      if (rtx_equal_p (operands[0], operands[2]))
+	return "addw2 %1,%0";
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	return "subw3 $%n2,%1,%0";
+      return "addw3 %1,%2,%0";
+
+    case QImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	{
+	  if (operands[2] == const1_rtx)
+	    return "incb %0";
+	  if (operands[2] == constm1_rtx)
+	    return "decb %0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	    return "subb2 $%n2,%0";
+	  return "addb2 %2,%0";
+	}
+      if (rtx_equal_p (operands[0], operands[2]))
+	return "addb2 %1,%0";
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	return "subb3 $%n2,%1,%0";
+      return "addb3 %1,%2,%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+vax_output_int_subtract (rtx insn, rtx *operands, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case DImode:
+      {
+	rtx low[3];
+	const char *pattern;
+	int carry = 1;
+
+	if (TARGET_QMATH && 0)
+	  debug_rtx (insn);
+
+	split_quadword_operands (insn, MINUS, operands, low, 3);
+
+	if (TARGET_QMATH)
+	  {
+	    if (operands[1] == const0_rtx && low[1] == const0_rtx)
+	      {
+		/* Negation is tricky.  It's basically complement and increment.
+		   Negate hi, then lo, and subtract the carry back.  */
+		if ((MEM_P (low[0]) && GET_CODE (XEXP (low[0], 0)) == POST_INC)
+		    || (MEM_P (operands[0])
+			&& GET_CODE (XEXP (operands[0], 0)) == POST_INC))
+		  fatal_insn ("illegal operand detected", insn);
+		output_asm_insn ("mnegl %2,%0", operands);
+		output_asm_insn ("mnegl %2,%0", low);
+		return "sbwc $0,%0";
+	      }
+	    gcc_assert (rtx_equal_p (operands[0], operands[1]));
+	    gcc_assert (rtx_equal_p (low[0], low[1]));
+	    if (low[2] == const1_rtx)
+	      output_asm_insn ("decl %0", low);
+	    else
+	      output_asm_insn ("subl2 %2,%0", low);
+	    return "sbwc %2,%0";
+	  }
+
+	/* Subtract low parts.  */
+	if (rtx_equal_p (operands[0], operands[1]))
+	  {
+	    if (low[2] == const0_rtx)
+	      pattern = 0, carry = 0;
+	    else if (low[2] == constm1_rtx)
+	      pattern = "decl %0";
+	    else
+	      pattern = "subl2 %2,%0";
+	  }
+	else
+	  {
+	    if (low[2] == constm1_rtx)
+	      pattern = "decl %0";
+	    else if (low[2] == const0_rtx)
+	      pattern = get_insn_template (CODE_FOR_movsi, insn), carry = 0;
+	    else
+	      pattern = "subl3 %2,%1,%0";
+	  }
+	if (pattern)
+	  output_asm_insn (pattern, low);
+	if (carry)
+	  {
+	    if (!rtx_equal_p (operands[0], operands[1]))
+	      return "movl %1,%0;sbwc %2,%0";
+	    return "sbwc %2,%0";
+	    /* %0 = %2 - %1 - C */
+	  }
+	return get_insn_template (CODE_FOR_subsi3, insn);
+      }
+
+    default:
+      gcc_unreachable ();
+  }
+}
+
+/* True if X is an rtx for a constant that is a valid address.  */
+
+bool
+legitimate_constant_address_p (rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
+	  || CONST_INT_P (x) || GET_CODE (x) == HIGH)
+    return true;
+  if (GET_CODE (x) != CONST)
+    return false;
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+  if (flag_pic
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (x, 0), 0)))
+    return false;
+#endif
+   return true;
+}
+
+/* The other macros defined here are used only in legitimate_address_p ().  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or, if not strict, if it is a pseudo reg.  */
+#define	INDEX_REGISTER_P(X, STRICT) \
+(REG_P (X) && (!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X))))
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or, if not strict, if it is a pseudo reg.  */
+#define	BASE_REGISTER_P(X, STRICT) \
+(REG_P (X) && (!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X))))
+
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+
+/* Re-definition of CONSTANT_ADDRESS_P, which is true only when there
+   are no SYMBOL_REFs for external symbols present.  */
+
+static bool
+indirectable_constant_address_p (rtx x, bool indirect)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    return !flag_pic || SYMBOL_REF_LOCAL_P (x) || !indirect;
+
+  if (GET_CODE (x) == CONST)
+    return !flag_pic
+	   || GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+	   || SYMBOL_REF_LOCAL_P (XEXP (XEXP (x, 0), 0));
+
+  return CONSTANT_ADDRESS_P (x);
+}
+
+#else /* not NO_EXTERNAL_INDIRECT_ADDRESS */
+
+static bool
+indirectable_constant_address_p (rtx x, bool indirect ATTRIBUTE_UNUSED)
+{
+  return CONSTANT_ADDRESS_P (x);
+}
+
+#endif /* not NO_EXTERNAL_INDIRECT_ADDRESS */
+
+/* True if X is an address which can be indirected.  External symbols
+   could be in a sharable image library, so we disallow those.  */
+
+static bool
+indirectable_address_p (rtx x, bool strict, bool indirect)
+{
+  if (indirectable_constant_address_p (x, indirect)
+      || BASE_REGISTER_P (x, strict))
+    return true;
+  if (GET_CODE (x) != PLUS
+      || !BASE_REGISTER_P (XEXP (x, 0), strict)
+      || (flag_pic && !CONST_INT_P (XEXP (x, 1))))
+    return false;
+  return indirectable_constant_address_p (XEXP (x, 1), indirect);
+}
+
+/* Return true if x is a valid address not using indexing.
+   (This much is the easy part.)  */
+static bool
+nonindexed_address_p (rtx x, bool strict)
+{
+  rtx xfoo0;
+  if (REG_P (x))
+    {
+      if (! reload_in_progress
+	  || reg_equiv_mem (REGNO (x)) == 0
+	  || indirectable_address_p (reg_equiv_mem (REGNO (x)), strict, false))
+	return true;
+    }
+  if (indirectable_constant_address_p (x, false))
+    return true;
+  if (indirectable_address_p (x, strict, false))
+    return true;
+  xfoo0 = XEXP (x, 0);
+  if (MEM_P (x) && indirectable_address_p (xfoo0, strict, true))
+    return true;
+  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)
+      && BASE_REGISTER_P (xfoo0, strict))
+    return true;
+  return false;
+}
+
+/* True if PROD is either a reg times size of mode MODE and MODE is less
+   than or equal 8 bytes, or just a reg if MODE is one byte.  */
+
+static bool
+index_term_p (rtx prod, enum machine_mode mode, bool strict)
+{
+  rtx xfoo0, xfoo1;
+
+  if (GET_MODE_SIZE (mode) == 1)
+    return BASE_REGISTER_P (prod, strict);
+
+  if (GET_CODE (prod) != MULT || GET_MODE_SIZE (mode) > 8)
+    return false;
+
+  xfoo0 = XEXP (prod, 0);
+  xfoo1 = XEXP (prod, 1);
+
+  if (CONST_INT_P (xfoo0)
+      && INTVAL (xfoo0) == (int)GET_MODE_SIZE (mode)
+      && INDEX_REGISTER_P (xfoo1, strict))
+    return true;
+
+  if (CONST_INT_P (xfoo1)
+      && INTVAL (xfoo1) == (int)GET_MODE_SIZE (mode)
+      && INDEX_REGISTER_P (xfoo0, strict))
+    return true;
+
+  return false;
+}
+
+/* Return true if X is the sum of a register
+   and a valid index term for mode MODE.  */
+static bool
+reg_plus_index_p (rtx x, enum machine_mode mode, bool strict)
+{
+  rtx xfoo0, xfoo1;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  xfoo0 = XEXP (x, 0);
+  xfoo1 = XEXP (x, 1);
+
+  if (BASE_REGISTER_P (xfoo0, strict) && index_term_p (xfoo1, mode, strict))
+    return true;
+
+  if (BASE_REGISTER_P (xfoo1, strict) && index_term_p (xfoo0, mode, strict))
+    return true;
+
+  return false;
+}
+
+/* Return true if xfoo0 and xfoo1 constitute a valid indexed address.  */
+static bool
+indexable_address_p (rtx xfoo0, rtx xfoo1, enum machine_mode mode, bool strict)
+{
+  if (!CONSTANT_ADDRESS_P (xfoo0))
+    return false;
+  if (BASE_REGISTER_P (xfoo1, strict))
+    return !flag_pic || mode == QImode;
+  if (flag_pic && symbolic_operand (xfoo0, SImode))
+    return false;
+  return reg_plus_index_p (xfoo1, mode, strict);
+}
+
+/* legitimate_address_p returns true if it recognizes an RTL expression "x"
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.  */
+bool
+vax_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  rtx xfoo0, xfoo1;
+
+  if (nonindexed_address_p (x, strict))
+    return true;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  /* Handle <address>[index] represented with index-sum outermost */
+
+  xfoo0 = XEXP (x, 0);
+  xfoo1 = XEXP (x, 1);
+
+  if (index_term_p (xfoo0, mode, strict)
+      && nonindexed_address_p (xfoo1, strict))
+    return true;
+
+  if (index_term_p (xfoo1, mode, strict)
+      && nonindexed_address_p (xfoo0, strict))
+    return true;
+
+  /* Handle offset(reg)[index] with offset added outermost */
+
+  if (indexable_address_p (xfoo0, xfoo1, mode, strict)
+      || indexable_address_p (xfoo1, xfoo0, mode, strict))
+    return true;
+
+  return false;
+}
+
+/* Return true if x (a legitimate address expression) has an effect that
+   depends on the machine mode it is used for.  On the VAX, the predecrement
+   and postincrement address depend thus (the amount of decrement or
+   increment being the length of the operand) and all indexed address depend
+   thus (because the index scale factor is the length of the operand).  */
+
+static bool
+vax_mode_dependent_address_p (const_rtx x, addr_space_t as ATTRIBUTE_UNUSED)
+{
+  rtx xfoo0, xfoo1;
+
+  /* Auto-increment cases are now dealt with generically in recog.c.  */
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  xfoo0 = XEXP (x, 0);
+  xfoo1 = XEXP (x, 1);
+
+  if (CONST_INT_P (xfoo0) && REG_P (xfoo1))
+    return false;
+  if (CONST_INT_P (xfoo1) && REG_P (xfoo0))
+    return false;
+  if (!flag_pic && CONSTANT_ADDRESS_P (xfoo0) && REG_P (xfoo1))
+    return false;
+  if (!flag_pic && CONSTANT_ADDRESS_P (xfoo1) && REG_P (xfoo0))
+    return false;
+
+  return true;
+}
+
+static rtx
+fixup_mathdi_operand (rtx x, enum machine_mode mode)
+{
+  if (illegal_addsub_di_memory_operand (x, mode))
+    {
+      rtx addr = XEXP (x, 0);
+      rtx temp = gen_reg_rtx (Pmode);
+      rtx offset = 0;
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+      if (GET_CODE (addr) == CONST && flag_pic)
+	{
+	  offset = XEXP (XEXP (addr, 0), 1);
+	  addr = XEXP (XEXP (addr, 0), 0);
+	}
+#endif
+      emit_move_insn (temp, addr);
+      if (offset)
+	temp = gen_rtx_PLUS (Pmode, temp, offset);
+      x = gen_rtx_MEM (DImode, temp);
+    }
+  return x;
+}
+
+void
+vax_expand_addsub_di_operands (rtx * operands, enum rtx_code code)
+{
+  int hi_only = operand_subword (operands[2], 0, 0, DImode) == const0_rtx;
+  rtx temp;
+
+  rtx (*gen_old_insn)(rtx, rtx, rtx);
+  rtx (*gen_si_insn)(rtx, rtx, rtx);
+  rtx (*gen_insn)(rtx, rtx, rtx);
+
+  if (code == PLUS)
+    {
+      gen_old_insn = gen_adddi3_old;
+      gen_si_insn = gen_addsi3;
+      gen_insn = gen_adcdi3;
+    }
+  else if (code == MINUS)
+    {
+      gen_old_insn = gen_subdi3_old;
+      gen_si_insn = gen_subsi3;
+      gen_insn = gen_sbcdi3;
+    }
+  else
+    gcc_unreachable ();
+
+  /* If this is addition (thus operands are commutative) and if there is one
+     addend that duplicates the desination, we want that addend to be the
+     first addend.  */
+  if (code == PLUS
+      && rtx_equal_p (operands[0], operands[2])
+      && !rtx_equal_p (operands[1], operands[2]))
+    {
+      temp = operands[2];
+      operands[2] = operands[1];
+      operands[1] = temp;
+    }
+
+  if (!TARGET_QMATH)
+    {
+      emit_insn ((*gen_old_insn) (operands[0], operands[1], operands[2]));
+    }
+  else if (hi_only)
+    {
+      if (!rtx_equal_p (operands[0], operands[1])
+	  && (REG_P (operands[0]) && MEM_P (operands[1])))
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  operands[1] = operands[0];
+	}
+
+      operands[0] = fixup_mathdi_operand (operands[0], DImode);
+      operands[1] = fixup_mathdi_operand (operands[1], DImode);
+      operands[2] = fixup_mathdi_operand (operands[2], DImode);
+
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operand_subword (operands[0], 0, 0, DImode),
+			  operand_subword (operands[1], 0, 0, DImode));
+
+      emit_insn ((*gen_si_insn) (operand_subword (operands[0], 1, 0, DImode),
+				 operand_subword (operands[1], 1, 0, DImode),
+				 operand_subword (operands[2], 1, 0, DImode)));
+    }
+  else
+    {
+      /* If are adding the same value together, that's really a multiply by 2,
+	 and that's just a left shift of 1.  */
+      if (rtx_equal_p (operands[1], operands[2]))
+	{
+	  gcc_assert (code != MINUS);
+	  emit_insn (gen_ashldi3 (operands[0], operands[1], const1_rtx));
+	  return;
+	}
+
+      operands[0] = fixup_mathdi_operand (operands[0], DImode);
+
+      /* If an operand is the same as operand[0], use the operand[0] rtx
+	 because fixup will an equivalent rtx but not an equal one. */
+
+      if (rtx_equal_p (operands[0], operands[1]))
+	operands[1] = operands[0];
+      else
+	operands[1] = fixup_mathdi_operand (operands[1], DImode);
+
+      if (rtx_equal_p (operands[0], operands[2]))
+	operands[2] = operands[0];
+      else
+	operands[2] = fixup_mathdi_operand (operands[2], DImode);
+
+      /* If we are subtracting not from ourselves [d = a - b], and because the
+	 carry ops are two operand only, we would need to do a move prior to
+	 the subtract.  And if d == b, we would need a temp otherwise
+	 [d = a, d -= d] and we end up with 0.  Instead we rewrite d = a - b
+	 into d = -b, d += a.  Since -b can never overflow, even if b == d,
+	 no temp is needed.
+
+	 If we are doing addition, since the carry ops are two operand, if
+	 we aren't adding to ourselves, move the first addend to the
+	 destination first.  */
+
+      gcc_assert (operands[1] != const0_rtx || code == MINUS);
+      if (!rtx_equal_p (operands[0], operands[1]) && operands[1] != const0_rtx)
+	{
+	  if (code == MINUS && CONSTANT_P (operands[1]))
+	    {
+	      temp = gen_reg_rtx (DImode);
+	      emit_insn (gen_sbcdi3 (operands[0], const0_rtx, operands[2]));
+	      code = PLUS;
+	      gen_insn = gen_adcdi3;
+	      operands[2] = operands[1];
+	      operands[1] = operands[0];
+	    }
+	  else
+	    emit_move_insn (operands[0], operands[1]);
+	}
+
+      /* Subtracting a constant will have been rewritten to an addition of the
+	 negative of that constant before we get here.  */
+      gcc_assert (!CONSTANT_P (operands[2]) || code == PLUS);
+      emit_insn ((*gen_insn) (operands[0], operands[1], operands[2]));
+    }
+}
+
+bool
+adjacent_operands_p (rtx lo, rtx hi, enum machine_mode mode)
+{
+  HOST_WIDE_INT lo_offset;
+  HOST_WIDE_INT hi_offset;
+
+  if (GET_CODE (lo) != GET_CODE (hi))
+    return false;
+
+  if (REG_P (lo))
+    return mode == SImode && REGNO (lo) + 1 == REGNO (hi);
+  if (CONST_INT_P (lo))
+    return INTVAL (hi) == 0 && 0 <= INTVAL (lo) && INTVAL (lo) < 64;
+  if (CONST_INT_P (lo))
+    return mode != SImode;
+
+  if (!MEM_P (lo))
+    return false;
+
+  if (MEM_VOLATILE_P (lo) || MEM_VOLATILE_P (hi))
+    return false;
+
+  lo = XEXP (lo, 0);
+  hi = XEXP (hi, 0);
+
+  if (GET_CODE (lo) == POST_INC /* || GET_CODE (lo) == PRE_DEC */)
+    return rtx_equal_p (lo, hi);
+
+  switch (GET_CODE (lo))
+    {
+    case REG:
+    case SYMBOL_REF:
+      lo_offset = 0;
+      break;
+    case CONST:
+      lo = XEXP (lo, 0);
+      /* FALLTHROUGH */
+    case PLUS:
+      if (!CONST_INT_P (XEXP (lo, 1)))
+	return false;
+      lo_offset = INTVAL (XEXP (lo, 1));
+      lo = XEXP (lo, 0);
+      break;
+    default:
+      return false;
+    }
+
+  switch (GET_CODE (hi))
+    {
+    case REG:
+    case SYMBOL_REF:
+      hi_offset = 0;
+      break;
+    case CONST:
+      hi = XEXP (hi, 0);
+      /* FALLTHROUGH */
+    case PLUS:
+      if (!CONST_INT_P (XEXP (hi, 1)))
+	return false;
+      hi_offset = INTVAL (XEXP (hi, 1));
+      hi = XEXP (hi, 0);
+      break;
+    default:
+      return false;
+    }
+
+  if (GET_CODE (lo) == MULT || GET_CODE (lo) == PLUS)
+    return false;
+
+  return rtx_equal_p (lo, hi)
+	 && hi_offset - lo_offset == GET_MODE_SIZE (mode);
+}
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.  */
+
+/* On the VAX, the trampoline contains an entry mask and two instructions:
+     .word NN
+     movl $STATIC,r0   (store the functions static chain)
+     jmp  *$FUNCTION   (jump to function code at address FUNCTION)  */
+
+static void
+vax_asm_trampoline_template (FILE *f ATTRIBUTE_UNUSED)
+{
+  assemble_aligned_integer (2, const0_rtx);
+  assemble_aligned_integer (2, GEN_INT (0x8fd0));
+  assemble_aligned_integer (4, const0_rtx);
+  assemble_aligned_integer (1, GEN_INT (0x50 + STATIC_CHAIN_REGNUM));
+  assemble_aligned_integer (2, GEN_INT (0x9f17));
+  assemble_aligned_integer (4, const0_rtx);
+}
+
+/* We copy the register-mask from the function's pure code
+   to the start of the trampoline.  */
+
+static void
+vax_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, HImode, 0);
+  emit_move_insn (mem, gen_const_mem (HImode, fnaddr));
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, cxt);
+  mem = adjust_address (m_tramp, SImode, 11);
+  emit_move_insn (mem, plus_constant (Pmode, fnaddr, 2));
+  emit_insn (gen_sync_istream ());
+}
+
+/* Value is the number of bytes of arguments automatically
+   popped when returning from a subroutine call.
+   FUNDECL is the declaration node of the function (as a tree),
+   FUNTYPE is the data type of the function (as a tree),
+   or for a library call it is an identifier node for the subroutine name.
+   SIZE is the number of bytes of arguments passed on the stack.
+
+   On the VAX, the RET insn pops a maximum of 255 args for any function.  */
+
+static int
+vax_return_pops_args (tree fundecl ATTRIBUTE_UNUSED,
+		      tree funtype ATTRIBUTE_UNUSED, int size)
+{
+  return size > 255 * 4 ? 0 : size;
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+/* On the VAX all args are pushed.  */
+
+static rtx
+vax_function_arg (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		  enum machine_mode mode ATTRIBUTE_UNUSED,
+		  const_tree type ATTRIBUTE_UNUSED,
+		  bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+vax_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  *cum += (mode != BLKmode
+	   ? (GET_MODE_SIZE (mode) + 3) & ~3
+	   : (int_size_in_bytes (type) + 3) & ~3);
+}
diff --git a/gcc-4.9/gcc/config/vax/vax.h b/gcc-4.9/gcc/config/vax/vax.h
new file mode 100644
index 000000000..6c008d87c
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/vax.h
@@ -0,0 +1,708 @@
+/* Definitions of target machine for GNU compiler.  VAX version.
+   Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__vax__");		\
+      builtin_assert ("cpu=vax");		\
+      builtin_assert ("machine=vax");		\
+      if (TARGET_G_FLOAT)			\
+	{					\
+	  builtin_define ("__GFLOAT");		\
+	  builtin_define ("__GFLOAT__");	\
+	}					\
+    }						\
+  while (0)
+
+/* Use -J option for long branch support with Unix assembler.  */
+
+#define ASM_SPEC "-J"
+
+/* Choose proper libraries depending on float format.
+   Note that there are no profiling libraries for g-format.
+   Also use -lg for the sake of dbx.  */
+
+#define LIB_SPEC "%{g:-lg}\
+ %{mg:%{lm:-lmg} -lcg \
+  %{p:%eprofiling not supported with -mg\n}\
+  %{pg:%eprofiling not supported with -mg\n}}\
+ %{!mg:%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* Nonzero if ELF.  Redefined by vax/elf.h.  */
+#define TARGET_ELF 0
+
+/* Use BSD names for udiv and umod libgcc calls.  */
+#define TARGET_BSD_DIVMOD 1
+
+/* Default target_flags if no switches specified.  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_UNIX_ASM)
+#endif
+
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the VAX.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is not true on the VAX.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+/* This is not true on the VAX.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY (TARGET_VAXC_ALIGNMENT ? 8 : 32)
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS (! TARGET_VAXC_ALIGNMENT)
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* No structure field wants to be aligned rounder than this.  */
+#define BIGGEST_FIELD_ALIGNMENT (TARGET_VAXC_ALIGNMENT ? 8 : 32)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 0
+
+/* Let's keep the stack somewhat aligned.  */
+#define STACK_BOUNDARY 32
+
+/* The table of an ADDR_DIFF_VEC must be contiguous with the case
+   opcode, it is part of the case instruction.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 0
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+#define FIRST_PSEUDO_REGISTER 16
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the VAX, these are the AP, FP, SP and PC.  */
+#define FIXED_REGISTERS {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS {1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+   On the VAX, all registers are one word long.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On the VAX, all registers can hold all modes.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)  1
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* VAX pc is overloaded on a register.  */
+#define PC_REGNUM VAX_PC_REGNUM
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM VAX_SP_REGNUM
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM VAX_FP_REGNUM
+
+/* Offset from the frame pointer register value to the top of stack.  */
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM VAX_AP_REGNUM
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM 0
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+#define VAX_STRUCT_VALUE_REGNUM 1
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The VAX has only one kind of registers, so NO_REGS and ALL_REGS
+   are the only classes.  */
+
+enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Since GENERAL_REGS is the same class as ALL_REGS,
+   don't give it a different class number; just make it an alias.  */
+
+#define GENERAL_REGS ALL_REGS
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES	\
+  { "NO_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS {{0}, {0xffff}}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) ALL_REGS
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS ALL_REGS
+#define BASE_REG_CLASS ALL_REGS
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Given an rtx for the address of a frame,
+   return an rtx for the address of the word in the frame
+   that holds the dynamic chain--the previous frame's address.  */
+#define DYNAMIC_CHAIN_ADDRESS(FRAME) plus_constant (Pmode, (FRAME), 12)
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On the VAX, -(sp) pushes only the bytes of the operands.  */
+#define PUSH_ROUNDING(BYTES) (BYTES)
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 4
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+
+/* On the VAX the return value is in R0 regardless.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)	\
+  gen_rtx_REG (TYPE_MODE (VALTYPE), 0)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+/* On the VAX the return value is in R0 regardless.  */
+
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, 0)
+
+/* Define this if PCC uses the nonreentrant convention for returning
+   structure and union values.  */
+
+#define PCC_STATIC_STRUCT_RETURN
+
+/* 1 if N is a possible register number for a function value.
+   On the VAX, R0 is the only register thus used.  */
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0)
+
+/* 1 if N is a possible register number for function argument passing.
+   On the VAX, no registers are used in this way.  */
+
+#define FUNCTION_ARG_REGNO_P(N) ((void) (N), 0)
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the VAX, this is a single integer, which is a number of bytes
+   of arguments scanned so far.  */
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On the VAX, the offset starts at 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define VAX_FUNCTION_PROFILER_NAME "mcount"
+#define FUNCTION_PROFILER(FILE, LABELNO)			\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "LP", (LABELNO));	\
+      fprintf (FILE, "\tmovab ");				\
+      assemble_name (FILE, label);				\
+      asm_fprintf (FILE, ",%Rr0\n\tjsb %s\n",			\
+		   VAX_FUNCTION_PROFILER_NAME);			\
+    }								\
+  while (0)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Store in the variable DEPTH the initial difference between the
+   frame pointer reg contents and the stack pointer reg contents,
+   as of the start of the function body.  This depends on the layout
+   of the fixed parts of the stack frame and on how registers are saved.
+
+   On the VAX, FRAME_POINTER_REQUIRED is always 1, so the definition of this
+   macro doesn't matter.  But it must be defined.  */
+
+#define INITIAL_FRAME_POINTER_OFFSET(DEPTH) (DEPTH) = 0;
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE 15
+
+/* Byte offset of return address in a stack frame.  The "saved PC" field
+   is in element [4] when treating the frame as an array of longwords.  */
+
+#define RETURN_ADDRESS_OFFSET	(4 * UNITS_PER_WORD)	/* 16 */
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, so we
+   can ignore COUNT.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)					\
+  ((COUNT == 0)								\
+   ? gen_rtx_MEM (Pmode, plus_constant (Pmode, FRAME,			\
+					RETURN_ADDRESS_OFFSET))		\
+   : (rtx) 0)
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+
+#define HAVE_PRE_DECREMENT 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(regno)	\
+  ((regno) < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0)
+#define REGNO_OK_FOR_BASE_P(regno)	\
+  ((regno) < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* 1 if X is an rtx for a constant that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X) legitimate_constant_address_p (X)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 1
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) 1
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE HImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Indicate that jump tables go in the text section.  This is
+   necessary when compiling PIC code.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.  */
+#define MOVE_RATIO(speed) ((speed) ? 6 : 3)
+#define CLEAR_RATIO(speed) ((speed) ? 6 : 2)
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 0
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Specify the cost of a branch insn; roughly the number of extra insns that
+   should be added to avoid a branch.
+
+   Branches are extremely cheap on the VAX while the shift insns often
+   used to replace branches can be expensive.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 0
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  No extra ones are needed for the VAX.  */
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN)	\
+  vax_notice_update_cc ((EXP), (INSN))
+
+#define OUTPUT_JUMP(NORMAL, FLOAT, NO_OV)	\
+  { if (cc_status.flags & CC_NO_OVERFLOW)	\
+      return NO_OV;				\
+    return NORMAL;				\
+  }
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Output before read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).
+   The register names will be prefixed by REGISTER_PREFIX, if any.  */
+
+#define REGISTER_PREFIX ""
+#define REGISTER_NAMES					\
+  { "r0", "r1",  "r2",  "r3", "r4", "r5", "r6", "r7",	\
+    "r8", "r9", "r10", "r11", "ap", "fp", "sp", "pc", }
+
+/* This is BSD, so it wants DBX format.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+/* Do not break .stabs pseudos into continuations.  */
+
+#define DBX_CONTIN_LENGTH 0
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+
+#define DBX_CONTIN_CHAR '?'
+
+/* Don't use the `xsfoo;' construct in DBX output; this system
+   doesn't support it.  */
+
+#define DBX_NO_XREFS
+
+/* Output the .stabs for a C `static' variable in the data section.  */
+#define DBX_STATIC_STAB_DATA_SECTION
+
+/* VAX specific: which type character is used for type double?  */
+
+#define ASM_DOUBLE_CHAR (TARGET_G_FLOAT ? 'g' : 'd')
+
+/* This is how to output a command to make the user-level label named NAME
+   defined for reference from other files.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".globl "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM))
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)  \
+  fprintf (FILE, "\tpushl %s\n", reg_names[REGNO])
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)					\
+  fprintf (FILE, "\tmovl (%s)+,%s\n", reg_names[STACK_POINTER_REGNUM],	\
+	   reg_names[REGNO])
+
+/* This is how to output an element of a case-vector that is absolute.
+   (The VAX does not use such vectors,
+   but we must define this macro anyway.)  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)		\
+  do							\
+    {							\
+      char label[256];					\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));\
+      fprintf (FILE, "\t.long ");			\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "\n");				\
+    }							\
+  while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)	\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));	\
+      fprintf (FILE, "\t.word ");				\
+      assemble_name (FILE, label);				\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL));		\
+      fprintf (FILE, "-");					\
+      assemble_name (FILE, label);				\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)  \
+  fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space %u\n", (int)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".comm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".lcomm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* Print an instruction operand X on file FILE.
+   CODE is the code from the %-spec that requested printing this operand;
+   if `%z3' was used to print operand 3, then CODE is 'z'.
+
+VAX operand formatting codes:
+
+ letter	   print
+   c	direct branch condition
+   C	reverse branch condition
+   D	64-bit immediate operand
+   B	the low 8 bits of the complement of a constant operand
+   H	the low 16 bits of the complement of a constant operand
+   M	a mask for the N highest bits of a word
+   N	the complement of a constant integer operand
+   P	constant operand plus 1
+   R	32 - constant operand
+   b	the low 8 bits of a negated constant operand
+   h	the low 16 bits of a negated constant operand
+   #	'd' or 'g' depending on whether dfloat or gfloat is used
+   |	register prefix  */
+
+/* The purpose of D is to get around a quirk or bug in VAX assembler
+   whereby -1 in a 64-bit immediate operand means 0x00000000ffffffff,
+   which is not a 64-bit minus one.  As a workaround, we output negative
+   values in hex.  */
+#if HOST_BITS_PER_WIDE_INT == 64
+#  define NEG_HWI_PRINT_HEX16 HOST_WIDE_INT_PRINT_HEX
+#else
+#  define NEG_HWI_PRINT_HEX16 "0xffffffff%08lx"
+#endif
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)  \
+  ((CODE) == '#' || (CODE) == '|')
+
+#define PRINT_OPERAND(FILE, X, CODE)  \
+  print_operand (FILE, X, CODE)
+
+/* Print a memory operand whose address is X, on file FILE.
+   This uses a function in output-vax.c.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+  print_operand_address (FILE, ADDR)
+
+/* This is a blatent lie.  However, it's good enough, since we don't
+   actually have any code whatsoever for which this isn't overridden
+   by the proper FDE definition.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, PC_REGNUM)
+
diff --git a/gcc-4.9/gcc/config/vax/vax.md b/gcc-4.9/gcc/config/vax/vax.md
new file mode 100644
index 000000000..d3929731b
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/vax.md
@@ -0,0 +1,1662 @@
+;; Machine description for GNU compiler, VAX Version
+;; Copyright (C) 1987-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;- Instruction patterns.  When multiple patterns apply,
+;;- the first one in the file is chosen.
+;;-
+;;- See file "rtl.def" for documentation on define_insn, match_*, et al.
+;;-
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;; UNSPEC_VOLATILE usage:
+
+(define_c_enum "unspecv" [
+  VUNSPEC_BLOCKAGE 	    ; 'blockage' insn to prevent scheduling across an
+			    ; insn in the code.
+  VUNSPEC_SYNC_ISTREAM      ; sequence of insns to sync the I-stream
+  VUNSPEC_PEM		    ; 'procedure_entry_mask' insn.
+])
+
+(define_constants
+  [(VAX_AP_REGNUM 12)	    ; Register 12 contains the argument pointer
+   (VAX_FP_REGNUM 13)	    ; Register 13 contains the frame pointer
+   (VAX_SP_REGNUM 14)	    ; Register 14 contains the stack pointer
+   (VAX_PC_REGNUM 15)	    ; Register 15 contains the program counter
+  ]
+)
+
+;; Integer modes supported on VAX, with a mapping from machine mode
+;; to mnemonic suffix.  DImode is always a special case.
+(define_mode_iterator VAXint [QI HI SI])
+(define_mode_iterator VAXintQH [QI HI])
+(define_mode_iterator VAXintQHSD [QI HI SI DI])
+(define_mode_attr  isfx [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Similar for float modes supported on VAX.
+(define_mode_iterator VAXfp [SF DF])
+(define_mode_attr  fsfx [(SF "f") (DF "%#")])
+
+;; Some output patterns want integer immediates with a prefix...
+(define_mode_attr  iprefx [(QI "B") (HI "H") (SI "N")])
+
+;;
+(include "constraints.md")
+(include "predicates.md")
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:VAXint 0 "nonimmediate_operand" "nrmT,nrmT")
+		 (match_operand:VAXint 1 "general_operand" "I,nrmT")))]
+  ""
+  "@
+   tst<VAXint:isfx> %0
+   cmp<VAXint:isfx> %0,%1")
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:VAXfp 0 "general_operand" "gF,gF")
+		 (match_operand:VAXfp 1 "general_operand" "G,gF")))]
+  ""
+  "@
+   tst<VAXfp:fsfx> %0
+   cmp<VAXfp:fsfx> %0,%1")
+
+(define_insn "*bit<mode>"
+  [(set (cc0)
+	(compare (and:VAXint (match_operand:VAXint 0 "general_operand" "nrmT")
+			     (match_operand:VAXint 1 "general_operand" "nrmT"))
+		 (const_int 0)))]
+  ""
+  "bit<VAXint:isfx> %0,%1")
+
+;; The VAX has no sCOND insns.  It does have add/subtract with carry
+;; which could be used to implement the sltu and sgeu patterns.  However,
+;; to do this properly requires a complete rewrite of the compare insns
+;; to keep them together with the sltu/sgeu insns until after the
+;; reload pass is complete.  The previous implementation didn't do this
+;; and has been deleted.
+
+
+(define_insn "mov<mode>"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g")
+	(match_operand:VAXfp 1 "general_operand" "G,gF"))]
+  ""
+  "@
+   clr<VAXfp:fsfx> %0
+   mov<VAXfp:fsfx> %1,%0")
+
+;; Some VAXen don't support this instruction.
+;;(define_insn "movti"
+;;  [(set (match_operand:TI 0 "general_operand" "=g")
+;;	(match_operand:TI 1 "general_operand" "g"))]
+;;  ""
+;;  "movh %1,%0")
+
+(define_insn "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(match_operand:DI 1 "general_operand" "g"))]
+  ""
+  "* return vax_output_int_move (insn, operands, DImode);")
+
+;; The VAX move instructions have space-time tradeoffs.  On a MicroVAX
+;; register-register mov instructions take 3 bytes and 2 CPU cycles.  clrl
+;; takes 2 bytes and 3 cycles.  mov from constant to register takes 2 cycles
+;; if the constant is smaller than 4 bytes, 3 cycles for a longword
+;; constant.  movz, mneg, and mcom are as fast as mov, so movzwl is faster
+;; than movl for positive constants that fit in 16 bits but not 6 bits.  cvt
+;; instructions take 4 cycles.  inc takes 3 cycles.  The machine description
+;; is willing to trade 1 byte for 1 cycle (clrl instead of movl $0; cvtwl
+;; instead of movl).
+
+;; Cycle counts for other models may vary (on a VAX 750 they are similar,
+;; but on a VAX 9000 most move and add instructions with one constant
+;; operand take 1 cycle).
+
+;;  Loads of constants between 64 and 128 used to be done with
+;; "addl3 $63,#,dst" but this is slower than movzbl and takes as much space.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+  if (flag_pic
+      && GET_CODE (operands[1]) == CONST
+      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF
+      && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (operands[1], 0), 0)))
+    {
+      rtx symbol_ref = XEXP (XEXP (operands[1], 0), 0);
+      rtx const_int = XEXP (XEXP (operands[1], 0), 1);
+      rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+      emit_move_insn (temp, symbol_ref);
+      emit_move_insn (operands[0], gen_rtx_PLUS (SImode, temp, const_int));
+      DONE;
+    }
+#endif
+}")
+
+(define_insn "movsi_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:SI 1 "nonsymbolic_operand" "nrmT"))]
+  ""
+  "* return vax_output_int_move (insn, operands, SImode);")
+
+(define_insn "mov<mode>"
+  [(set (match_operand:VAXintQH 0 "nonimmediate_operand" "=g")
+	(match_operand:VAXintQH 1 "general_operand" "g"))]
+  ""
+  "* return vax_output_int_move (insn, operands, <MODE>mode);")
+
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+g"))
+	(match_operand:HI 1 "general_operand" "g"))]
+  ""
+  "*
+{
+  if (CONST_INT_P (operands[1]))
+    {
+      int i = INTVAL (operands[1]);
+      if (i == 0)
+	return \"clrw %0\";
+      else if ((unsigned int)i < 64)
+	return \"movw %1,%0\";
+      else if ((unsigned int)~i < 64)
+	return \"mcomw %H1,%0\";
+      else if ((unsigned int)i < 256)
+	return \"movzbw %1,%0\";
+    }
+  return \"movw %1,%0\";
+}")
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+g"))
+	(match_operand:QI 1 "general_operand" "g"))]
+  ""
+  "*
+{
+  if (CONST_INT_P (operands[1]))
+    {
+      int i = INTVAL (operands[1]);
+      if (i == 0)
+	return \"clrb %0\";
+      else if ((unsigned int)~i < 64)
+	return \"mcomb %B1,%0\";
+    }
+  return \"movb %1,%0\";
+}")
+
+;; This is here to accept 4 arguments and pass the first 3 along
+;; to the movmemhi1 pattern that really does the work.
+(define_expand "movmemhi"
+  [(set (match_operand:BLK 0 "general_operand" "=g")
+	(match_operand:BLK 1 "general_operand" "g"))
+   (use (match_operand:HI 2 "general_operand" "g"))
+   (match_operand 3 "" "")]
+  ""
+  "
+{
+  emit_insn (gen_movmemhi1 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+;; The definition of this insn does not really explain what it does,
+;; but it should suffice
+;; that anything generated as this insn will be recognized as one
+;; and that it won't successfully combine with anything.
+
+(define_insn "movmemhi1"
+  [(set (match_operand:BLK 0 "memory_operand" "=o")
+	(match_operand:BLK 1 "memory_operand" "o"))
+   (use (match_operand:HI 2 "general_operand" "g"))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:SI 4))
+   (clobber (reg:SI 5))]
+  ""
+  "movc3 %2,%1,%0")
+
+;; Extension and truncation insns.
+
+(define_insn "truncsiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=g")
+	(truncate:QI (match_operand:SI 1 "nonimmediate_operand" "nrmT")))]
+  ""
+  "cvtlb %1,%0")
+
+(define_insn "truncsihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(truncate:HI (match_operand:SI 1 "nonimmediate_operand" "nrmT")))]
+  ""
+  "cvtlw %1,%0")
+
+(define_insn "trunchiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=g")
+	(truncate:QI (match_operand:HI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtwb %1,%0")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtwl %1,%0")
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtbw %1,%0")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtbl %1,%0")
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=g")
+	(float_extend:DF (match_operand:SF 1 "general_operand" "gF")))]
+  ""
+  "cvtf%# %1,%0")
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=g")
+	(float_truncate:SF (match_operand:DF 1 "general_operand" "gF")))]
+  ""
+  "cvt%#f %1,%0")
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "g")))]
+  ""
+  "movzwl %1,%0")
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "movzbw %1,%0")
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "movzbl %1,%0")
+
+;; Fix-to-float conversion insns.
+
+(define_insn "float<VAXint:mode><VAXfp:mode>2"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g")
+	(float:VAXfp (match_operand:VAXint 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvt<VAXint:isfx><VAXfp:fsfx> %1,%0")
+
+;; Float-to-fix conversion insns.
+
+(define_insn "fix_trunc<VAXfp:mode><VAXint:mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(fix:VAXint (match_operand:VAXfp 1 "general_operand" "gF")))]
+  ""
+  "cvt<VAXfp:fsfx><VAXint:isfx> %1,%0")
+
+(define_expand "fixuns_trunc<VAXfp:mode><VAXint:mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "")
+	(fix:VAXint (match_operand:VAXfp 1 "general_operand")))]
+  "")
+
+;;- All kinds of add instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g,g")
+	(plus:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF,gF")
+		    (match_operand:VAXfp 2 "general_operand" "gF,0,gF")))]
+  ""
+  "@
+   add<VAXfp:fsfx>2 %2,%0
+   add<VAXfp:fsfx>2 %1,%0
+   add<VAXfp:fsfx>3 %1,%2,%0")
+
+(define_insn "pushlclsymreg"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "local_symbolic_operand" "i")))]
+  "flag_pic"
+  "pushab %a2[%1]")
+
+(define_insn "pushextsymreg"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "external_symbolic_operand" "i")))]
+  "flag_pic"
+  "pushab %a2[%1]")
+
+(define_insn "movlclsymreg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "local_symbolic_operand" "i")))]
+  "flag_pic"
+  "movab %a2[%1],%0")
+
+(define_insn "movextsymreg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "external_symbolic_operand" "i")))]
+  "flag_pic"
+  "movab %a2[%1],%0")
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(plus:VAXint (match_operand:VAXint 1 "general_operand" "nrmT")
+		     (match_operand:VAXint 2 "general_operand" "nrmT")))]
+  ""
+  "* return vax_output_int_add (insn, operands, <MODE>mode);")
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(plus:DI (match_operand:DI 1 "general_operand" "g")
+		 (match_operand:DI 2 "general_operand" "g")))]
+  "!reload_in_progress"
+  "vax_expand_addsub_di_operands (operands, PLUS); DONE;")
+
+(define_insn "adcdi3"
+  [(set (match_operand:DI 0 "nonimmediate_addsub_di_operand" "=Rr")
+	(plus:DI (match_operand:DI 1 "general_addsub_di_operand" "%0")
+		 (match_operand:DI 2 "general_addsub_di_operand" "nRr")))]
+  "TARGET_QMATH"
+  "* return vax_output_int_add (insn, operands, DImode);")
+
+;; The add-with-carry (adwc) instruction only accepts two operands.
+(define_insn "adddi3_old"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro>,ro>")
+	(plus:DI (match_operand:DI 1 "general_operand" "%0,ro>")
+		 (match_operand:DI 2 "general_operand" "Fsro,Fs")))]
+  "!TARGET_QMATH"
+  "* return vax_output_int_add (insn, operands, DImode);")
+
+;;- All kinds of subtract instructions.
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g")
+	(minus:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF")
+		     (match_operand:VAXfp 2 "general_operand" "gF,gF")))]
+  ""
+  "@
+   sub<VAXfp:fsfx>2 %2,%0
+   sub<VAXfp:fsfx>3 %2,%1,%0")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(minus:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT")
+		      (match_operand:VAXint 2 "general_operand" "nrmT,nrmT")))]
+  ""
+  "@
+   sub<VAXint:isfx>2 %2,%0
+   sub<VAXint:isfx>3 %2,%1,%0")
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(minus:DI (match_operand:DI 1 "general_operand" "g")
+		  (match_operand:DI 2 "general_operand" "g")))]
+  "!reload_in_progress"
+  "vax_expand_addsub_di_operands (operands, MINUS); DONE;")
+
+(define_insn "sbcdi3"
+  [(set (match_operand:DI 0 "nonimmediate_addsub_di_operand" "=Rr,=Rr")
+	(minus:DI (match_operand:DI 1 "general_addsub_di_operand" "0,I")
+		  (match_operand:DI 2 "general_addsub_di_operand" "nRr,Rr")))]
+  "TARGET_QMATH"
+  "* return vax_output_int_subtract (insn, operands, DImode);")
+
+;; The subtract-with-carry (sbwc) instruction only takes two operands.
+(define_insn "subdi3_old"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=or>,or>")
+	(minus:DI (match_operand:DI 1 "general_operand" "0,or>")
+		  (match_operand:DI 2 "general_operand" "Fsor,Fs")))]
+  "!TARGET_QMATH"
+  "* return vax_output_int_subtract (insn, operands, DImode);")
+
+;;- Multiply instructions.
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g,g")
+	(mult:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF,gF")
+		    (match_operand:VAXfp 2 "general_operand" "gF,0,gF")))]
+  ""
+  "@
+   mul<VAXfp:fsfx>2 %2,%0
+   mul<VAXfp:fsfx>2 %1,%0
+   mul<VAXfp:fsfx>3 %1,%2,%0")
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g,g")
+	(mult:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT,nrmT")
+		     (match_operand:VAXint 2 "general_operand" "nrmT,0,nrmT")))]
+  ""
+  "@
+   mul<VAXint:isfx>2 %2,%0
+   mul<VAXint:isfx>2 %1,%0
+   mul<VAXint:isfx>3 %1,%2,%0")
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(mult:DI (sign_extend:DI
+		  (match_operand:SI 1 "nonimmediate_operand" "nrmT"))
+		 (sign_extend:DI
+		  (match_operand:SI 2 "nonimmediate_operand" "nrmT"))))]
+  ""
+  "emul %1,%2,$0,%0")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(plus:DI
+	 (mult:DI (sign_extend:DI
+		   (match_operand:SI 1 "nonimmediate_operand" "nrmT"))
+		  (sign_extend:DI
+		   (match_operand:SI 2 "nonimmediate_operand" "nrmT")))
+	 (sign_extend:DI (match_operand:SI 3 "nonimmediate_operand" "g"))))]
+  ""
+  "emul %1,%2,%3,%0")
+
+;; 'F' constraint means type CONST_DOUBLE
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(plus:DI
+	 (mult:DI (sign_extend:DI
+		   (match_operand:SI 1 "nonimmediate_operand" "nrmT"))
+		  (sign_extend:DI
+		   (match_operand:SI 2 "nonimmediate_operand" "nrmT")))
+	 (match_operand:DI 3 "immediate_operand" "F")))]
+  "GET_CODE (operands[3]) == CONST_DOUBLE
+    && CONST_DOUBLE_HIGH (operands[3]) == (CONST_DOUBLE_LOW (operands[3]) >> 31)"
+  "*
+{
+  if (CONST_DOUBLE_HIGH (operands[3]))
+    operands[3] = GEN_INT (CONST_DOUBLE_LOW (operands[3]));
+  return \"emul %1,%2,%3,%0\";
+}")
+
+;;- Divide instructions.
+
+(define_insn "div<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g")
+	(div:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF")
+		   (match_operand:VAXfp 2 "general_operand" "gF,gF")))]
+  ""
+  "@
+   div<VAXfp:fsfx>2 %2,%0
+   div<VAXfp:fsfx>3 %2,%1,%0")
+
+(define_insn "div<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(div:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT")
+		    (match_operand:VAXint 2 "general_operand" "nrmT,nrmT")))]
+  ""
+  "@
+   div<VAXint:isfx>2 %2,%0
+   div<VAXint:isfx>3 %2,%1,%0")
+
+;This is left out because it is very slow;
+;we are better off programming around the "lack" of this insn.
+;(define_insn "divmoddisi4"
+;  [(set (match_operand:SI 0 "general_operand" "=g")
+;	(div:SI (match_operand:DI 1 "general_operand" "g")
+;		(match_operand:SI 2 "general_operand" "g")))
+;   (set (match_operand:SI 3 "general_operand" "=g")
+;	(mod:SI (match_operand:DI 1 "general_operand" "g")
+;		(match_operand:SI 2 "general_operand" "g")))]
+;  ""
+;  "ediv %2,%1,%0,%3")
+
+;; Bit-and on the VAX is done with a clear-bits insn.
+(define_expand "and<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "")
+	(and:VAXint (not:VAXint (match_operand:VAXint 1 "general_operand" ""))
+		    (match_operand:VAXint 2 "general_operand" "")))]
+  ""
+  "
+{
+  rtx op1 = operands[1];
+
+  /* If there is a constant argument, complement that one.  */
+  if (CONST_INT_P (operands[2]) && ! CONST_INT_P (op1))
+    {
+      operands[1] = operands[2];
+      operands[2] = op1;
+      op1 = operands[1];
+    }
+
+  if (CONST_INT_P (op1))
+    operands[1] = GEN_INT (~INTVAL (op1));
+  else
+    operands[1] = expand_unop (<MODE>mode, one_cmpl_optab, op1, 0, 1);
+}")
+
+(define_insn "*and<mode>"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(and:VAXint (not:VAXint (match_operand:VAXint 1 "general_operand" "nrmT,nrmT"))
+		    (match_operand:VAXint 2 "general_operand" "0,nrmT")))]
+  ""
+  "@
+   bic<VAXint:isfx>2 %1,%0
+   bic<VAXint:isfx>3 %1,%2,%0")
+
+;; The following used to be needed because constant propagation can
+;; create them starting from the bic insn patterns above.  This is no
+;; longer a problem.  However, having these patterns allows optimization
+;; opportunities in combine.c.
+
+(define_insn "*and<mode>_const_int"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(and:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT")
+		    (match_operand:VAXint 2 "const_int_operand" "n,n")))]
+  ""
+  "@
+   bic<VAXint:isfx>2 %<VAXint:iprefx>2,%0
+   bic<VAXint:isfx>3 %<VAXint:iprefx>2,%1,%0")
+
+
+;;- Bit set instructions.
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g,g")
+	(ior:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT,nrmT")
+		    (match_operand:VAXint 2 "general_operand" "nrmT,0,nrmT")))]
+  ""
+  "@
+   bis<VAXint:isfx>2 %2,%0
+   bis<VAXint:isfx>2 %1,%0
+   bis<VAXint:isfx>3 %2,%1,%0")
+
+;;- xor instructions.
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g,g")
+	(xor:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT,nrmT")
+		    (match_operand:VAXint 2 "general_operand" "nrmT,0,nrmT")))]
+  ""
+  "@
+   xor<VAXint:isfx>2 %2,%0
+   xor<VAXint:isfx>2 %1,%0
+   xor<VAXint:isfx>3 %2,%1,%0")
+
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g")
+	(neg:VAXfp (match_operand:VAXfp 1 "general_operand" "gF")))]
+  ""
+  "mneg<VAXfp:fsfx> %1,%0")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(neg:VAXint (match_operand:VAXint 1 "general_operand" "nrmT")))]
+  ""
+  "mneg<VAXint:isfx> %1,%0")
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(not:VAXint (match_operand:VAXint 1 "general_operand" "nrmT")))]
+  ""
+  "mcom<VAXint:isfx> %1,%0")
+
+
+;; Arithmetic right shift on the VAX works by negating the shift count,
+;; then emitting a right shift with the shift count negated.  This means
+;; that all actual shift counts in the RTL will be positive.  This
+;; prevents converting shifts to ZERO_EXTRACTs with negative positions,
+;; which isn't valid.
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "general_operand" "=g")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand" "g")
+		     (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  if (! CONST_INT_P(operands[2]))
+    operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (match_operand:QI 2 "const_int_operand" "n")))]
+  ""
+  "ashl $%n2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (neg:QI (match_operand:QI 2 "general_operand" "g"))))]
+  ""
+  "ashl %2,%1,%0")
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(ashift:SI (match_operand:SI 1 "general_operand" "nrmT")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "*
+{
+  if (operands[2] == const1_rtx && rtx_equal_p (operands[0], operands[1]))
+    return \"addl2 %0,%0\";
+  if (REG_P (operands[1]) && CONST_INT_P (operands[2]))
+    {
+      int i = INTVAL (operands[2]);
+      if (i == 1)
+	return \"addl3 %1,%1,%0\";
+      if (i == 2 && !optimize_size)
+	{
+	  if (push_operand (operands[0], SImode))
+	    return \"pushal 0[%1]\";
+	  return \"moval 0[%1],%0\";
+	}
+      if (i == 3 && !optimize_size)
+	{
+	  if (push_operand (operands[0], SImode))
+	    return \"pushaq 0[%1]\";
+	  return \"movaq 0[%1],%0\";
+	}
+    }
+  return \"ashl %2,%1,%0\";
+}")
+
+;; Arithmetic right shift on the VAX works by negating the shift count.
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "general_operand" "=g")
+	(ashiftrt:DI (match_operand:DI 1 "general_operand" "g")
+		     (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+}")
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(ashift:DI (match_operand:DI 1 "general_operand" "g")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "ashq %2,%D1,%0")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(ashiftrt:DI (match_operand:DI 1 "general_operand" "g")
+		     (neg:QI (match_operand:QI 2 "general_operand" "g"))))]
+  ""
+  "ashq %2,%D1,%0")
+
+;; We used to have expand_shift handle logical right shifts by using extzv,
+;; but this make it very difficult to do lshrdi3.  Since the VAX is the
+;; only machine with this kludge, it's better to just do this with a
+;; define_expand and remove that case from expand_shift.
+
+(define_expand "lshrsi3"
+  [(set (match_dup 3)
+	(minus:QI (const_int 32)
+		  (match_dup 4)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_dup 3)
+			 (match_operand:SI 2 "register_operand" "g")))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (QImode);
+  operands[4] = gen_lowpart (QImode, operands[2]);
+}")
+
+;; Rotate right on the VAX works by negating the shift count.
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "general_operand" "=g")
+	(rotatert:SI (match_operand:SI 1 "general_operand" "g")
+		     (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  if (! CONST_INT_P (operands[2]))
+    operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+}")
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(rotate:SI (match_operand:SI 1 "general_operand" "nrmT")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "rotl %2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(rotatert:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (match_operand:QI 2 "const_int_operand" "n")))]
+  ""
+  "rotl %R2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(rotatert:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (neg:QI (match_operand:QI 2 "general_operand" "g"))))]
+  ""
+  "rotl %2,%1,%0")
+
+;This insn is probably slower than a multiply and an add.
+;(define_insn ""
+;  [(set (match_operand:SI 0 "general_operand" "=g")
+;	(mult:SI (plus:SI (match_operand:SI 1 "general_operand" "g")
+;			  (match_operand:SI 2 "general_operand" "g"))
+;		 (match_operand:SI 3 "general_operand" "g")))]
+;  ""
+;  "index %1,$0x80000000,$0x7fffffff,%3,%2,%0")
+
+;; Special cases of bit-field insns which we should
+;; recognize in preference to the general case.
+;; These handle aligned 8-bit and 16-bit fields,
+;; which can usually be done with move instructions.
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+ro")
+			 (match_operand:QI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "general_operand" "g"))]
+   "(INTVAL (operands[1]) == 8 || INTVAL (operands[1]) == 16)
+   && INTVAL (operands[2]) % INTVAL (operands[1]) == 0
+   && (REG_P (operands[0])
+       || ! mode_dependent_address_p (XEXP (operands[0], 0),
+				       MEM_ADDR_SPACE (operands[0])))"
+  "*
+{
+  if (REG_P (operands[0]))
+    {
+      if (INTVAL (operands[2]) != 0)
+	return \"insv %3,%2,%1,%0\";
+    }
+  else
+    operands[0]
+      = adjust_address (operands[0],
+			INTVAL (operands[1]) == 8 ? QImode : HImode,
+			INTVAL (operands[2]) / 8);
+
+  CC_STATUS_INIT;
+  if (INTVAL (operands[1]) == 8)
+    return \"movb %3,%0\";
+  return \"movw %3,%0\";
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&g")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "(INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0
+   && (REG_P (operands[1])
+       || ! mode_dependent_address_p (XEXP (operands[1], 0),
+				      MEM_ADDR_SPACE (operands[1])))"
+  "*
+{
+  if (REG_P (operands[1]))
+    {
+      if (INTVAL (operands[3]) != 0)
+	return \"extzv %3,%2,%1,%0\";
+    }
+  else
+    operands[1]
+      = adjust_address (operands[1],
+			INTVAL (operands[2]) == 8 ? QImode : HImode,
+			INTVAL (operands[3]) / 8);
+
+  if (INTVAL (operands[2]) == 8)
+    return \"movzbl %1,%0\";
+  return \"movzwl %1,%0\";
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "(INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0
+   && (REG_P (operands[1])
+       || ! mode_dependent_address_p (XEXP (operands[1], 0),
+				      MEM_ADDR_SPACE (operands[1])))"
+  "*
+{
+  if (REG_P (operands[1]))
+    {
+      if (INTVAL (operands[3]) != 0)
+	return \"extv %3,%2,%1,%0\";
+    }
+  else
+    operands[1]
+      = adjust_address (operands[1],
+			INTVAL (operands[2]) == 8 ? QImode : HImode,
+			INTVAL (operands[3]) / 8);
+
+  if (INTVAL (operands[2]) == 8)
+    return \"cvtbl %1,%0\";
+  return \"cvtwl %1,%0\";
+}")
+
+;; Register-only SImode cases of bit-field insns.
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (sign_extract:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpv %2,%1,%0,%3")
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpzv %2,%1,%0,%3")
+
+;; When the field position and size are constant and the destination
+;; is a register, extv and extzv are much slower than a rotate followed
+;; by a bicl or sign extension.  Because we might end up choosing ext[z]v
+;; anyway, we can't allow immediate values for the primary source operand.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (! CONST_INT_P (operands[3]) || ! CONST_INT_P (operands[2])
+      || ! REG_P (operands[0])
+      || (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16))
+    return \"extv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;cvtbl %0,%0\";
+  return \"rotl %R3,%1,%0\;cvtwl %0,%0\";
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (! CONST_INT_P (operands[3]) || ! CONST_INT_P (operands[2])
+      || ! REG_P (operands[0]))
+    return \"extzv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;movzbl %0,%0\";
+  if (INTVAL (operands[2]) == 16)
+    return \"rotl %R3,%1,%0\;movzwl %0,%0\";
+  if (INTVAL (operands[3]) & 31)
+    return \"rotl %R3,%1,%0\;bicl2 %M2,%0\";
+  if (rtx_equal_p (operands[0], operands[1]))
+    return \"bicl2 %M2,%0\";
+  return \"bicl3 %M2,%1,%0\";
+}")
+
+;; Non-register cases.
+;; nonimmediate_operand is used to make sure that mode-ambiguous cases
+;; don't match these (and therefore match the cases above instead).
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (sign_extract:SI (match_operand:QI 0 "memory_operand" "m")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpv %2,%1,%0,%3")
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI (match_operand:QI 0 "nonimmediate_operand" "rm")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpzv %2,%1,%0,%3")
+
+(define_insn "extv"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extract:SI (match_operand:QI 1 "memory_operand" "m")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (!REG_P (operands[0]) || !CONST_INT_P (operands[2])
+      || !CONST_INT_P (operands[3])
+      || (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16)
+      || INTVAL (operands[2]) + INTVAL (operands[3]) > 32
+      || side_effects_p (operands[1])
+      || (MEM_P (operands[1])
+	  && mode_dependent_address_p (XEXP (operands[1], 0),
+				       MEM_ADDR_SPACE (operands[1]))))
+    return \"extv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;cvtbl %0,%0\";
+  return \"rotl %R3,%1,%0\;cvtwl %0,%0\";
+}")
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "general_operand" "")
+			 (match_operand:QI 2 "general_operand" "")
+			 (match_operand:SI 3 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extract:SI (match_operand:QI 1 "memory_operand" "m")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (!REG_P (operands[0]) || !CONST_INT_P (operands[2])
+      || !CONST_INT_P (operands[3])
+      || INTVAL (operands[2]) + INTVAL (operands[3]) > 32
+      || side_effects_p (operands[1])
+      || (MEM_P (operands[1])
+	  && mode_dependent_address_p (XEXP (operands[1], 0),
+				       MEM_ADDR_SPACE (operands[1]))))
+    return \"extzv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;movzbl %0,%0\";
+  if (INTVAL (operands[2]) == 16)
+    return \"rotl %R3,%1,%0\;movzwl %0,%0\";
+  if (MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == PLUS
+      && REG_P (XEXP (XEXP (operands[1], 0), 0))
+      && CONST_INT_P (XEXP (XEXP (operands[1], 0), 1))
+      && CONST_INT_P (operands[2])
+      && CONST_INT_P (operands[3]))
+    {
+      HOST_WIDE_INT o = INTVAL (XEXP (XEXP (operands[1], 0), 1));
+      HOST_WIDE_INT l = INTVAL (operands[2]);
+      HOST_WIDE_INT v = INTVAL (operands[3]);
+      if ((o & 3) && (o & 3) * 8 + v + l <= 32)
+	{
+	  rtx tmp;
+	  tmp = XEXP (XEXP (operands[1], 0), 0);
+	  if (o & ~3)
+	    tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (o & ~3));
+	  operands[1] = gen_rtx_MEM (QImode, tmp);
+	  operands[3] = GEN_INT (v + (o & 3) * 8);
+	}
+      if (optimize_size)
+	return \"extzv %3,%2,%1,%0\";
+    }
+  return \"rotl %R3,%1,%0\;bicl2 %M2,%0\";
+}")
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "general_operand" "")
+			 (match_operand:QI 1 "general_operand" "")
+			 (match_operand:SI 2 "general_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+g")
+			 (match_operand:QI 1 "general_operand" "g")
+			 (match_operand:SI 2 "general_operand" "nrmT"))
+	(match_operand:SI 3 "general_operand" "nrmT"))]
+  ""
+  "*
+{
+  if (MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == PLUS
+      && REG_P (XEXP (XEXP (operands[0], 0), 0))
+      && CONST_INT_P (XEXP (XEXP (operands[0], 0), 1))
+      && CONST_INT_P (operands[1])
+      && CONST_INT_P (operands[2]))
+    {
+      HOST_WIDE_INT o = INTVAL (XEXP (XEXP (operands[0], 0), 1));
+      HOST_WIDE_INT v = INTVAL (operands[2]);
+      HOST_WIDE_INT l = INTVAL (operands[1]);
+      if ((o & 3) && (o & 3) * 8 + v + l <= 32)
+	{
+	  rtx tmp;
+	  tmp = XEXP (XEXP (operands[0], 0), 0);
+	  if (o & ~3)
+	    tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (o & ~3));
+	  operands[0] = gen_rtx_MEM (QImode, tmp);
+	  operands[2] = GEN_INT (v + (o & 3) * 8);
+	}
+    }
+  return \"insv %3,%2,%1,%0\";
+}")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:QI 1 "general_operand" "g")
+			 (match_operand:SI 2 "general_operand" "nrmT"))
+	(match_operand:SI 3 "general_operand" "nrmT"))]
+  ""
+  "insv %3,%2,%1,%0")
+
+;; Unconditional jump
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jbr %l0")
+
+;; Conditional jumps
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:VAXint 1 "nonimmediate_operand" "")
+                 (match_operand:VAXint 2 "general_operand" "")))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:VAXfp 1 "general_operand" "")
+                 (match_operand:VAXfp 2 "general_operand" "")))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_insn "*branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+				      [(cc0)
+				       (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "j%c0 %l1")
+
+;; Recognize reversed jumps.
+(define_insn "*branch_reversed"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+				      [(cc0)
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "j%C0 %l1") ; %C0 negates condition
+
+;; Recognize jbs, jlbs, jbc and jlbc instructions.  Note that the operand
+;; of jlbs and jlbc insns are SImode in the hardware.  However, if it is
+;; memory, we use QImode in the insn.  So we can't use those instructions
+;; for mode-dependent addresses.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:QI 0 "memory_operand" "Q,g")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbs %0,%l2
+   jbs %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:QI 0 "memory_operand" "Q,g")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbc %0,%l2
+   jbc %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r,r")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbs %0,%l2
+   jbs %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r,r")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbc %0,%l2
+   jbc %1,%0,%l2")
+
+;; Subtract-and-jump and Add-and-jump insns.
+;; These are not used when output is for the Unix assembler
+;; because it does not know how to modify them to reach far.
+
+;; Normal sob insns.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (gt (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int -1))
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_UNIX_ASM"
+  "jsobgtr %0,%l1")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ge (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int -1))
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_UNIX_ASM"
+  "jsobgeq %0,%l1")
+
+;; Normal aob insns.  Define a version for when operands[1] is a constant.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (lt (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int 1))
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM"
+  "jaoblss %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (lt (match_operand:SI 0 "nonimmediate_operand" "+g")
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM && CONST_INT_P (operands[1])"
+  "jaoblss %P1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (le (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int 1))
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM"
+  "jaobleq %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (le (match_operand:SI 0 "nonimmediate_operand" "+g")
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM && CONST_INT_P (operands[1])"
+  "jaobleq %P1,%0,%l2")
+
+;; Something like a sob insn, but compares against -1.
+;; This finds `while (foo--)' which was changed to `while (--foo != -1)'.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:SI 0 "nonimmediate_operand" "+g")
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  ""
+  "decl %0\;jgequ %l1")
+
+(define_expand "call_pop"
+  [(parallel [(call (match_operand:QI 0 "memory_operand" "")
+		    (match_operand:SI 1 "const_int_operand" ""))
+	      (set (reg:SI VAX_SP_REGNUM)
+		   (plus:SI (reg:SI VAX_SP_REGNUM)
+			    (match_operand:SI 3 "immediate_operand" "")))])]
+  ""
+{
+  gcc_assert (INTVAL (operands[3]) <= 255 * 4 && INTVAL (operands[3]) % 4 == 0);
+
+  /* Operand 1 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[1] = GEN_INT (INTVAL (operands[3]) + 4);
+})
+
+(define_insn "*call_pop"
+  [(call (match_operand:QI 0 "memory_operand" "m")
+	 (match_operand:SI 1 "const_int_operand" "n"))
+   (set (reg:SI VAX_SP_REGNUM) (plus:SI (reg:SI VAX_SP_REGNUM)
+					(match_operand:SI 2 "immediate_operand" "i")))]
+  ""
+{
+  operands[1] = GEN_INT ((INTVAL (operands[1]) - 4) / 4);
+  return "calls %1,%0";
+})
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "memory_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")))
+	      (set (reg:SI VAX_SP_REGNUM)
+		   (plus:SI (reg:SI VAX_SP_REGNUM)
+			    (match_operand:SI 4 "immediate_operand" "")))])]
+  ""
+{
+  gcc_assert (INTVAL (operands[4]) <= 255 * 4 && INTVAL (operands[4]) % 4 == 0);
+
+  /* Operand 2 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[2] = GEN_INT (INTVAL (operands[4]) + 4);
+})
+
+(define_insn "*call_value_pop"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "const_int_operand" "n")))
+   (set (reg:SI VAX_SP_REGNUM) (plus:SI (reg:SI VAX_SP_REGNUM)
+					(match_operand:SI 3 "immediate_operand" "i")))]
+  ""
+  "*
+{
+  operands[2] = GEN_INT ((INTVAL (operands[2]) - 4) / 4);
+  return \"calls %2,%1\";
+}")
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+      (match_operand:SI 1 "const_int_operand" ""))]
+  ""
+  "
+{
+  /* Operand 1 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[1] = GEN_INT (INTVAL (operands[1]) + 4);
+}")
+
+(define_insn "*call"
+   [(call (match_operand:QI 0 "memory_operand" "m")
+	  (match_operand:SI 1 "const_int_operand" ""))]
+  ""
+  "calls $0,%0")
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+      (call (match_operand:QI 1 "memory_operand" "")
+	    (match_operand:SI 2 "const_int_operand" "")))]
+  ""
+  "
+{
+  /* Operand 2 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 4);
+}")
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "const_int_operand" "")))]
+  ""
+  "calls $0,%1")
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+	      (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call_pop (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_BLOCKAGE)]
+  ""
+  "")
+
+(define_insn "procedure_entry_mask"
+  [(unspec_volatile [(match_operand 0 "const_int_operand")] VUNSPEC_PEM)]
+  ""
+  ".word %x0")
+
+(define_insn "return"
+  [(return)]
+  ""
+  "ret")
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  vax_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  emit_jump_insn (gen_return ());
+  DONE;
+}")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; This had a wider constraint once, and it had trouble.
+;; If you are tempted to try `g', please don't--it's not worth
+;; the risk we will reopen the same bug.
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp (%0)")
+
+;; This is here to accept 5 arguments (as passed by expand_end_case)
+;; and pass the first 4 along to the casesi1 pattern that really does
+;; the actual casesi work.  We emit a jump here to the default label
+;; _before_ the casesi so that we can be sure that the casesi never
+;; drops through.
+;; This is suboptimal perhaps, but so is much of the rest of this
+;; machine description.  For what it's worth, HPPA uses the same trick.
+;;
+;; operand 0 is index
+;; operand 1 is the minimum bound (a const_int)
+;; operand 2 is the maximum bound - minimum bound + 1 (also a const_int)
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range (ie. default).
+;;
+;; We emit:
+;;	i = index - minimum_bound
+;;	if (i > (maximum_bound - minimum_bound + 1) goto default;
+;;	casesi (i, 0, table);
+;;
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+{
+  rtx test;
+
+  /* i = index - minimum_bound;
+     But only if the lower bound is not already zero.  */
+  if (operands[1] != const0_rtx)
+    {
+      rtx index = gen_reg_rtx (SImode);
+      emit_insn (gen_addsi3 (index,
+			     operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = index;
+    }
+
+  /* if (i > (maximum_bound - minimum_bound + 1)) goto default;  */
+  test = gen_rtx_fmt_ee (GTU, VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
+
+  /* casesi (i, 0, table);  */
+  emit_jump_insn (gen_casesi1 (operands[0], operands[2], operands[3]));
+  DONE;
+})
+
+;; This insn is a bit of a lier.  It actually falls through if no case
+;; matches.  But, we prevent that from ever happening by emitting a jump
+;; before this, see the define_expand above.
+(define_insn "casesi1"
+  [(match_operand:SI 1 "const_int_operand" "n")
+   (set (pc)
+	(plus:SI (sign_extend:SI
+		  (mem:HI (plus:SI (mult:SI (match_operand:SI 0 "general_operand" "nrmT")
+					    (const_int 2))
+			  (pc))))
+		 (label_ref:SI (match_operand 2 "" ""))))]
+  ""
+  "casel %0,$0,%1")
+
+(define_insn "pushextsym"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:SI 1 "external_symbolic_operand" "i"))]
+  ""
+  "pushab %a1")
+
+(define_insn "movextsym"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:SI 1 "external_symbolic_operand" "i"))]
+  ""
+  "movab %a1,%0")
+
+(define_insn "pushlclsym"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:SI 1 "local_symbolic_operand" "i"))]
+  ""
+  "pushab %a1")
+
+(define_insn "movlclsym"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:SI 1 "local_symbolic_operand" "i"))]
+  ""
+  "movab %a1,%0")
+
+;;- load or push effective address
+;; These come after the move and add/sub patterns
+;; because we don't want pushl $1 turned into pushad 1.
+;; or addl3 r1,r2,r3 turned into movab 0(r1)[r2],r3.
+
+;; It does not work to use constraints to distinguish pushes from moves,
+;; because < matches any autodecrement, not just a push.
+
+(define_insn "pushaddr<mode>"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:VAXintQHSD 1 "address_operand" "p"))]
+  ""
+  "pusha<VAXintQHSD:isfx> %a1")
+
+(define_insn "movaddr<mode>"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:VAXintQHSD 1 "address_operand" "p"))]
+  ""
+  "mova<VAXintQHSD:isfx> %a1,%0")
+
+(define_insn "pushaddr<mode>"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:VAXfp 1 "address_operand" "p"))]
+  ""
+  "pusha<VAXfp:fsfx> %a1")
+
+(define_insn "movaddr<mode>"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:VAXfp 1 "address_operand" "p"))]
+  ""
+  "mova<VAXfp:fsfx> %a1,%0")
+
+;; These used to be peepholes, but it is more straightforward to do them
+;; as single insns.  However, we must force the output to be a register
+;; if it is not an offsettable address so that we know that we can assign
+;; to it twice.
+
+;; If we had a good way of evaluating the relative costs, these could be
+;; machine-independent.
+
+;; Optimize   extzv ...,z;    andl2 ...,z
+;; or	      ashl ...,z;     andl2 ...,z
+;; with other operands constant.  This is what the combiner converts the
+;; above sequences to before attempting to recognize the new insn.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=ro")
+	(and:SI (ashiftrt:SI (match_operand:SI 1 "general_operand" "nrmT")
+			     (match_operand:QI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))]
+  "(INTVAL (operands[3]) & ~((1 << (32 - INTVAL (operands[2]))) - 1)) == 0"
+  "*
+{
+  unsigned long mask1 = INTVAL (operands[3]);
+  unsigned long mask2 = (1 << (32 - INTVAL (operands[2]))) - 1;
+
+  if ((mask1 & mask2) != mask1)
+    operands[3] = GEN_INT (mask1 & mask2);
+
+  return \"rotl %R2,%1,%0\;bicl2 %N3,%0\";
+}")
+
+;; left-shift and mask
+;; The only case where `ashl' is better is if the mask only turns off
+;; bits that the ashl would anyways, in which case it should have been
+;; optimized away.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=ro")
+	(and:SI (ashift:SI (match_operand:SI 1 "general_operand" "nrmT")
+			   (match_operand:QI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))]
+  ""
+  "*
+{
+  operands[3]
+    = GEN_INT (INTVAL (operands[3]) & ~((1 << INTVAL (operands[2])) - 1));
+  return \"rotl %2,%1,%0\;bicl2 %N3,%0\";
+}")
+
+;; Instruction sequence to sync the VAX instruction stream.
+(define_insn "sync_istream"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_SYNC_ISTREAM)]
+  ""
+  "movpsl -(%|sp)\;pushal 1(%|pc)\;rei")
+
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  rtx lab = operands[1];
+  rtx stack = operands[2];
+  rtx fp = operands[3];
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* We'll convert this to direct jump via a peephole optimization.  */
+  emit_indirect_jump (copy_to_reg (lab));
+  emit_barrier ();
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/vax/vax.opt b/gcc-4.9/gcc/config/vax/vax.opt
new file mode 100644
index 000000000..4e2947f44
--- /dev/null
+++ b/gcc-4.9/gcc/config/vax/vax.opt
@@ -0,0 +1,51 @@
+; Options for the VAX port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+md
+Target RejectNegative InverseMask(G_FLOAT)
+Target DFLOAT double precision code
+
+md-float
+Target RejectNegative InverseMask(G_FLOAT)
+Target DFLOAT double precision code
+
+mg
+Target RejectNegative Mask(G_FLOAT)
+Generate GFLOAT double precision code
+
+mg-float
+Target RejectNegative Mask(G_FLOAT)
+Generate GFLOAT double precision code
+
+mgnu
+Target RejectNegative InverseMask(UNIX_ASM)
+Generate code for GNU assembler (gas)
+
+munix
+Target RejectNegative Mask(UNIX_ASM)
+Generate code for UNIX assembler
+
+mvaxc-alignment
+Target RejectNegative Mask(VAXC_ALIGNMENT)
+Use VAXC structure conventions
+
+mqmath
+Target Mask(QMATH)
+Use new adddi3/subdi3 patterns
diff --git a/gcc-4.9/gcc/config/vms/make-crtlmap.awk b/gcc-4.9/gcc/config/vms/make-crtlmap.awk
new file mode 100644
index 000000000..79c4b43c8
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/make-crtlmap.awk
@@ -0,0 +1,55 @@
+# Generate the VMS crtl map
+#	Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+BEGIN {
+    is_first = 1;
+    maxlen=1;
+    maxlen_name="??"
+    prev=""
+    ORS=""
+}
+
+# Remove comment and blank lines.
+/^ *#/ || /^ *$/ {
+  next;
+}
+
+{
+    # Handle comma
+    if (!is_first)
+        print ",\n"
+    else
+        is_first = 0;
+
+    # Check the map is sorted
+    if ($0 <= prev)
+    {
+        print "Map not sorted!! (with name " $0 ")\n"
+        exit 1
+    }
+    prev = $0
+
+    # Compute the max of the identifier len.
+    l=length($1)
+    if (l > maxlen)
+    {
+        maxlen = l
+        maxlen_name = $1
+    }
+
+    print "{ \"" $1 "\", "
+    if (NF == 1)
+        print "0 }"
+    else
+    {
+	printf "VMS_CRTL_" $2
+	for (i = 3; i <= NF; i++)
+	    printf " | VMS_CRTL_" $i
+	printf " }"
+    }
+}
+
+END {
+    print "\n\n"
+    print "#define VMS_CRTL_MAXLEN " maxlen "  /*" maxlen_name " */\n"
+}
diff --git a/gcc-4.9/gcc/config/vms/t-vms b/gcc-4.9/gcc/config/vms/t-vms
new file mode 100644
index 000000000..a34c08369
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/t-vms
@@ -0,0 +1,40 @@
+# Copyright (C) 2009-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIMITS_H_TEST = false
+
+vms-crtlmap.h: $(srcdir)/config/vms/vms-crtlmap.map  \
+  $(srcdir)/config/vms/make-crtlmap.awk
+	$(AWK) -f $(srcdir)/config/vms/make-crtlmap.awk \
+	  $(srcdir)/config/vms/vms-crtlmap.map > $@
+
+vms.o: $(srcdir)/config/vms/vms.c $(TREE_H) $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h $(TM_P_H) vms-crtlmap.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	  $< -o $@
+
+vms-c.o: $(srcdir)/config/vms/vms-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(CPPLIB_H) $(TREE_H) c-family/c-pragma.h toplev.h $(GGC_H) \
+  $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+          $(PREPROCESSOR_DEFINES) $< -o $@
+
+vms-f.o: $(srcdir)/config/vms/vms-f.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+          $(PREPROCESSOR_DEFINES) $< -o $@
diff --git a/gcc-4.9/gcc/config/vms/t-vmsnative b/gcc-4.9/gcc/config/vms/t-vmsnative
new file mode 100644
index 000000000..41848698e
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/t-vmsnative
@@ -0,0 +1,34 @@
+# Copyright (C) 2010-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Under VMS, directory names cannot contain dots.
+version:=$(shell echo $(BASEVER_c) | sed -e 's/\./_/g')
+
+# Wrappers around native VMS tools for ld and ar.
+
+vms-ld.o: $(srcdir)/config/vms/vms-ld.c
+	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< $(OUTPUT_OPTION)
+
+ld$(exeext): vms-ld.o $(LIBIBERTY)
+	$(CC) -o $@ vms-ld.o $(LIBIBERTY)
+
+vms-ar.o: $(srcdir)/config/vms/vms-ar.c
+	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< $(OUTPUT_OPTION)
+
+ar$(exeext): vms-ar.o $(LIBIBERTY)
+	$(CC) -o $@ vms-ar.o $(LIBIBERTY)
diff --git a/gcc-4.9/gcc/config/vms/vms-ar.c b/gcc-4.9/gcc/config/vms/vms-ar.c
new file mode 100644
index 000000000..594e3fcc2
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-ar.c
@@ -0,0 +1,348 @@
+/* VMS archive wrapper.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by AdaCore.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "libiberty.h"
+
+#define FATAL_EXIT_CODE (44 | 0x10000000)
+
+/* Librarian arguments.  */
+static int lib_arg_max = 0;
+static const char **lib_args;
+static int lib_arg_index = -1;
+
+/* Set for r/c/x/v command.  */
+static int replace_mode = 0;
+static int create_mode = 0;
+static int extract_mode = 0;
+static int verbose_mode = 0;
+
+static char modecmd[32];
+static char libname[256];
+
+#define TEMP_FILE "arXXXXXX"
+#define TEMP_FILE_LEN (sizeof(TEMP_FILE) - 1)
+#define SUFFIX ".com"
+#define SUFFIX_LEN (sizeof(SUFFIX) - 1)
+
+static char *to_host_file_spec (char *filespec);
+static int is_regular_file (char *name);
+
+#ifdef VMS
+static char new_host_filespec [255];
+static char filename_buff [256];
+
+static int
+translate_unix (char *name, int type)
+{
+  strcpy (filename_buff, name);
+  return 0;
+}
+#endif
+
+static char *
+to_host_file_spec (char *filespec)
+{
+#ifdef VMS
+  if (strchr (filespec, ']') || strchr (filespec, ':'))
+    return filespec;
+  else
+    {
+      strcpy (filename_buff, filespec);
+      decc$to_vms (filespec, translate_unix, 1, 1);
+      strcpy (new_host_filespec, filename_buff);
+      return new_host_filespec;
+    }
+#else
+  return filespec;
+#endif
+}
+
+/* Check to see if the file named in NAME is a regular file, i.e. not a
+   directory.  */
+
+static int
+is_regular_file (char *name)
+{
+  int ret;
+  struct stat statbuf;
+
+  ret = stat (name, &statbuf);
+  return !ret && S_ISREG (statbuf.st_mode);
+}
+
+/* Add the argument contained in STR to the list of arguments to pass to the
+   archiver.  */
+
+static void
+addarg (const char *str)
+{
+  if (++lib_arg_index >= lib_arg_max)
+    {
+      lib_arg_max += 1000;
+      lib_args = XRESIZEVEC (const char *, lib_args, lib_arg_max);
+    }
+
+  lib_args[lib_arg_index] = str;
+}
+
+static void
+usage (void)
+{
+  printf ("usage: ar -r [-cv] archive file...\n");
+  printf ("       ar -c [-rv] archive file...\n");
+  printf ("       ar -x [-v] archive [module...]\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int i, nexti, iarg;
+  FILE *comfile;
+  int comfd;
+  int outlen, maxoutlen = 4000;
+  char temp_filename[] = TEMP_FILE SUFFIX;
+  char command[256];
+  int status;
+
+  if (argc < 2)
+    {
+      fprintf (stderr, "ar: no command or archive\n");
+      exit (FATAL_EXIT_CODE);
+    }
+
+  if (argv[1][0] != '-')
+    {
+      int arglen = strlen (argv[1]);
+
+      /* Compatibility mode.  */
+      for (i = 0; i < arglen; i++)
+	{
+	  if (argv[1][i] == 'r')
+	    {
+	      replace_mode = 1;
+	    }
+	  else if (argv[1][i] == 'c')
+	    {
+	      create_mode = 1;
+	    }
+	  else if (argv[1][i] == 'x')
+	    {
+	      extract_mode = 1;
+	    }
+	  else if (argv[1][i] == 'v')
+	    {
+	      verbose_mode = 1;
+	    }
+          else
+            {
+              fprintf (stderr, "ar: unknown command '%c'\n", argv[1][i]);
+              exit (FATAL_EXIT_CODE);
+            }
+	}
+      nexti = 2;
+    }
+  else
+    {
+      /* Option mode.  */
+      nexti = 1;
+      for (i = 1; i < argc; i++)
+	{
+	  if (argv[i][0] != '-')
+	    {
+	      nexti = i;
+	      break;
+	    }
+	  else if (strcmp (argv[i], "-r") == 0)
+	    {
+	      replace_mode = 1;
+	    }
+	  else if (strcmp (argv[i], "-c") == 0)
+	    {
+	      create_mode = 1;
+	    }
+	  else if (strcmp (argv[i], "-x") == 0)
+	    {
+	      extract_mode = 1;
+	    }
+	  else if (strcmp (argv[i], "-v") == 0)
+	    {
+	      verbose_mode = 1;
+	    }
+	  else if (strcmp (argv[i], "--help") == 0)
+	    {
+              usage ();
+              exit (EXIT_SUCCESS);
+	    }
+          else
+            {
+              fprintf (stderr, "ar: unknown option %s\n", argv[i]);
+              exit (FATAL_EXIT_CODE);
+            }
+	}
+    }
+
+  if (extract_mode)
+    {
+      do
+        {
+          char *lname = argv[nexti];
+          int lnamelen;
+
+          /* Next argument is the archive name.  */
+          if (is_regular_file (lname))
+            {
+              addarg (xstrdup (to_host_file_spec (lname)));
+              break;
+            }
+
+          /* If not found, try with .olb instead of .a.  */
+          lnamelen = strlen (lname);
+
+	  if (lnamelen > 2
+              && strcmp (&lname [lnamelen - 2], ".a") == 0)
+	    {
+	      char *nlibname;
+
+	      nlibname = (char *)alloca (lnamelen + 3);
+	      strcpy (nlibname, lname);
+	      strcpy (&nlibname [lnamelen - 2], ".olb");
+	      if (is_regular_file (nlibname))
+                {
+                  addarg (xstrdup (to_host_file_spec (nlibname)));
+                  break;
+                }
+            }
+
+          fprintf (stderr, "ar: file '%s' doesn't exist\n", lname);
+          exit (FATAL_EXIT_CODE);
+	} while (0);
+    }
+  else
+    strcpy (libname, to_host_file_spec (argv[nexti]));
+
+  nexti++;
+
+  /* Build command mode.  */
+  if (replace_mode)
+    {
+      strcat (modecmd, "/replace");
+
+      if (!is_regular_file (libname) || !replace_mode)
+        {
+          /* Really create if the archive doesn't exist.  */
+          strcat (modecmd, "/create");
+        }
+    }
+  else if (extract_mode)
+    {
+      if (nexti == argc)
+        {
+          /* Extract all.  */
+          strcat (modecmd, "/extract=(*");
+        }
+      else
+        strcat (modecmd, "/extract=(");
+    }
+
+  /* Add files.  */
+  for (i = nexti; i < argc; i++)
+    {
+      if (extract_mode)
+        {
+          /* Convert to module name (remove extension) and quote it.  */
+          char *module = argv[i];
+          int module_len = strlen (module);
+          char *newarg = (char *)xmalloc (module_len + 3);
+          int l;
+
+          newarg[0] = '"';
+          memcpy (newarg + 1, module, module_len);
+
+          l = 1 + module_len;
+          if (module_len > 4
+              && strcmp (&module[module_len - 4], ".obj") == 0)
+            l -= 4;
+
+          newarg[l] = '"';
+          newarg[l + 1] = 0;
+
+          addarg (newarg);
+        }
+      else
+        {
+          /* Add the filename.  */
+          addarg (xstrdup (to_host_file_spec (argv[i])));
+        }
+    }
+
+  if (extract_mode)
+    addarg (")");
+
+  /* Create the command file name.  */
+  strcpy (temp_filename, TEMP_FILE SUFFIX);
+  comfd = mkstemps (temp_filename, SUFFIX_LEN);
+  comfile = fdopen (comfd, "w");
+
+  /* Write the command file.
+     We need to split to command into severals ones if it is too long.  */
+  outlen = 0;
+  for (iarg = 0; iarg <= lib_arg_index; iarg++)
+    {
+      if (outlen == 0)
+        {
+          fprintf (comfile, "$ library %s %s -\n", modecmd, libname);
+          if (create_mode && iarg == 0)
+            strcpy (modecmd, "/replace");
+        }
+
+      fprintf (comfile, "%s", lib_args [iarg]);
+      outlen += strlen (lib_args [iarg]) + 2;
+
+      if (outlen > maxoutlen || iarg == lib_arg_index)
+        {
+          /* Will write a new command.  */
+          fprintf (comfile, "\n");
+          outlen = 0;
+        }
+      else
+        {
+          /* Continuation line.  */
+          fprintf (comfile, ",-\n");
+        }
+    }
+
+  fclose (comfile);
+
+  sprintf (command, "@%s", temp_filename);
+
+  status = system (command);
+
+  remove (temp_filename);
+
+  exit (status);
+}
diff --git a/gcc-4.9/gcc/config/vms/vms-c.c b/gcc-4.9/gcc/config/vms/vms-c.c
new file mode 100644
index 000000000..6c711040f
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-c.c
@@ -0,0 +1,488 @@
+/* VMS specific, C compiler specific functions.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by Tristan Gingold (gingold@adacore.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-common.h"
+#include "c/c-tree.h"
+#include "toplev.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "incpath.h"
+#include "diagnostic.h"
+
+/* '#pragma __nostandard' is simply ignored.  */
+
+static void
+vms_pragma_nostandard (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma __nostandard");
+}
+
+/* '#pragma __standard' is simply ignored.  */
+
+static void
+vms_pragma_standard (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma __standard");
+}
+
+/* Saved member alignment.  */
+static int saved_member_alignment;
+
+/* Handle '#pragma member_alignment'.  */
+
+static void
+vms_pragma_member_alignment (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  int tok;
+  const char *arg;
+
+  tok = pragma_lex (&x);
+
+  if (tok == CPP_EOF)
+    {
+      /* Disable packing.  */
+      maximum_field_alignment = initial_max_fld_align;
+      return;
+    }
+  if (tok != CPP_NAME)
+    {
+      warning (OPT_Wpragmas, "malformed '#pragma member_alignment', ignoring");
+      return;
+    }
+
+  arg = IDENTIFIER_POINTER (x);
+  /* Accept '__' prefix.  */
+  if (arg[0] == '_' && arg[1] == '_')
+    arg += 2;
+
+  if (strcmp (arg, "save") == 0)
+    saved_member_alignment = maximum_field_alignment;
+  else if (strcmp (arg, "restore") == 0)
+    maximum_field_alignment = saved_member_alignment;
+  else
+    {
+      error ("unknown '#pragma member_alignment' name %s", arg);
+      return;
+    }
+  if (pragma_lex (&x) != CPP_EOF)
+    {
+      error ("malformed '#pragma member_alignment'");
+      return;
+    }
+}
+
+/* Handle '#pragma nomember_alignment'.  */
+
+static void
+vms_pragma_nomember_alignment (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  int tok;
+
+  tok = pragma_lex (&x);
+  if (tok == CPP_NAME)
+    {
+      const char *arg = IDENTIFIER_POINTER (x);
+
+      /* Accept '__' prefix.  */
+      if (arg[0] == '_' && arg[1] == '_')
+        arg += 2;
+
+      if (strcmp (arg, "byte") == 0)
+        maximum_field_alignment = 1 * BITS_PER_UNIT;
+      else if (strcmp (arg, "word") == 0)
+        maximum_field_alignment = 2 * BITS_PER_UNIT;
+      else if (strcmp (arg, "longword") == 0)
+        maximum_field_alignment = 4 * BITS_PER_UNIT;
+      else if (strcmp (arg, "quadword") == 0)
+        maximum_field_alignment = 8 * BITS_PER_UNIT;
+      else if (strcmp (arg, "octaword") == 0)
+        maximum_field_alignment = 16 * BITS_PER_UNIT;
+      else
+        {
+          error ("unhandled alignment for '#pragma nomember_alignment'");
+        }
+
+      tok = pragma_lex (&x);
+    }
+  else
+    {
+      /* Enable packing.  */
+      maximum_field_alignment = BITS_PER_UNIT;
+    }
+
+  if (tok != CPP_EOF)
+    {
+      error ("garbage at end of '#pragma nomember_alignment'");
+      return;
+    }
+}
+
+/* The 'extern model' for public data.  This drives how the following
+   declarations are handled:
+   1) extern int name;
+   2) int name;
+   3) int name = 5;
+   See below for the behaviour as implemented by the native compiler.
+*/
+
+enum extern_model_kind
+{
+  /* Create one overlaid section per variable.  All the above declarations (1,
+      2 and 3) are handled the same way: they create an overlaid section named
+      NAME (and initialized only for 3).  No global symbol is created.
+      This is the VAX C behavior.  */
+  extern_model_common_block,
+
+  /* Like unix: multiple not-initialized declarations are allowed.
+     Only one initialized definition (case 3) is allows, but multiple
+     uninitialize definition (case 2) are allowed.
+     For case 2, this creates both a section named NAME and a global symbol.
+     For case 3, this creates a conditional global symbol defenition and a
+     conditional section definition.
+     This is the traditional UNIX C behavior.  */
+  extern_model_relaxed_refdef,
+
+  /* Like -fno-common.  Only one definition (cases 2 and 3) are allowed.
+     This is the ANSI-C model.  */
+  extern_model_strict_refdef,
+
+  /* Declarations creates symbols without storage.  */
+  extern_model_globalvalue
+};
+
+/* Current and saved extern model.  */
+static enum extern_model_kind current_extern_model;
+static enum extern_model_kind saved_extern_model;
+
+/* Partial handling of '#pragma extern_model'.  */
+
+static void
+vms_pragma_extern_model (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  int tok;
+  const char *arg;
+
+  tok = pragma_lex (&x);
+
+  if (tok != CPP_NAME)
+    {
+      warning (OPT_Wpragmas, "malformed '#pragma extern_model', ignoring");
+      return;
+    }
+
+  arg = IDENTIFIER_POINTER (x);
+  /* Accept "__" prefix.  */
+  if (arg[0] == '_' && arg[1] == '_')
+    arg += 2;
+
+  if (strcmp (arg, "save") == 0)
+    saved_extern_model = current_extern_model;
+  else if (strcmp (arg, "restore") == 0)
+    current_extern_model = saved_extern_model;
+  else if (strcmp (arg, "relaxed_refdef") == 0)
+    current_extern_model = extern_model_relaxed_refdef;
+  else if (strcmp (arg, "strict_refdef") == 0)
+    current_extern_model = extern_model_strict_refdef;
+  else if (strcmp (arg, "common_block") == 0)
+    current_extern_model = extern_model_common_block;
+  else if (strcmp (arg, "globalvalue") == 0)
+    {
+      sorry ("extern model globalvalue");
+      return;
+    }
+  else
+    {
+      error ("unknown '#pragma extern_model' model '%s'", arg);
+      return;
+    }
+#if 0
+  if (pragma_lex (&x) != CPP_EOF)
+    {
+      permerror (input_location, "junk at end of '#pragma extern_model'");
+      return;
+    }
+#endif
+}
+
+/* Ignore '#pragma message'.  */
+
+static void
+vms_pragma_message (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  /* Completly ignored.  */
+#if 0
+  pedwarn (input_location, OPT_Wpragmas,
+           "vms '#pragma __message' is ignored");
+#endif
+}
+
+/* Handle '#pragma __extern_prefix'  */
+
+static GTY(()) tree saved_extern_prefix;
+
+static void
+vms_pragma_extern_prefix (cpp_reader * ARG_UNUSED (dummy))
+{
+  enum cpp_ttype tok;
+  tree x;
+
+  tok = pragma_lex (&x);
+  if (tok == CPP_NAME)
+    {
+      const char *op = IDENTIFIER_POINTER (x);
+
+      if (!strcmp (op, "__save"))
+        saved_extern_prefix = pragma_extern_prefix;
+      else if (!strcmp (op, "__restore"))
+        pragma_extern_prefix = saved_extern_prefix;
+      else
+        warning (OPT_Wpragmas,
+                 "malformed '#pragma __extern_prefix', ignoring");
+      return;
+    }
+  else if (tok != CPP_STRING)
+    {
+      warning (OPT_Wpragmas,
+               "malformed '#pragma __extern_prefix', ignoring");
+    }
+  else
+    {
+      /* Note that the length includes the null terminator.  */
+      pragma_extern_prefix = (TREE_STRING_LENGTH (x) > 1 ? x : NULL);
+    }
+}
+
+/* #pragma __pointer_size  */
+
+static enum machine_mode saved_pointer_mode;
+
+static void
+handle_pragma_pointer_size (const char *pragma_name)
+{
+  enum cpp_ttype tok;
+  tree x;
+
+  tok = pragma_lex (&x);
+  if (tok == CPP_NAME)
+    {
+      const char *op = IDENTIFIER_POINTER (x);
+
+      if (!strcmp (op, "__save"))
+        saved_pointer_mode = c_default_pointer_mode;
+      else if (!strcmp (op, "__restore"))
+        c_default_pointer_mode = saved_pointer_mode;
+      else if (!strcmp (op, "__short"))
+        c_default_pointer_mode = SImode;
+      else if (!strcmp (op, "__long"))
+        c_default_pointer_mode = DImode;
+      else
+        error ("malformed %<#pragma %s%>, ignoring", pragma_name);
+    }
+  else if (tok == CPP_NUMBER)
+    {
+      int val;
+
+      if (TREE_CODE (x) == INTEGER_CST)
+        val = TREE_INT_CST_LOW (x);
+      else
+        val = -1;
+
+      if (val == 32)
+        c_default_pointer_mode = SImode;
+      else if (val == 64)
+        c_default_pointer_mode = DImode;
+      else
+        error ("invalid constant in %<#pragma %s%>", pragma_name);
+    }
+  else
+    {
+      error ("malformed %<#pragma %s%>, ignoring", pragma_name);
+    }
+}
+
+static void
+vms_pragma_pointer_size (cpp_reader * ARG_UNUSED (dummy))
+{
+  /* Ignore if no -mpointer-size option.  */
+  if (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE)
+    return;
+
+  handle_pragma_pointer_size ("pointer_size");
+}
+
+static void
+vms_pragma_required_pointer_size (cpp_reader * ARG_UNUSED (dummy))
+{
+  handle_pragma_pointer_size ("required_pointer_size");
+}
+
+/* Add vms-specific pragma.  */
+
+void
+vms_c_register_pragma (void)
+{
+  c_register_pragma (NULL, "__nostandard", vms_pragma_nostandard);
+  c_register_pragma (NULL, "nostandard", vms_pragma_nostandard);
+  c_register_pragma (NULL, "__standard", vms_pragma_standard);
+  c_register_pragma (NULL, "standard", vms_pragma_standard);
+  c_register_pragma (NULL, "__member_alignment", vms_pragma_member_alignment);
+  c_register_pragma (NULL, "member_alignment", vms_pragma_member_alignment);
+  c_register_pragma_with_expansion (NULL, "__nomember_alignment",
+                                    vms_pragma_nomember_alignment);
+  c_register_pragma_with_expansion (NULL, "nomember_alignment",
+                                    vms_pragma_nomember_alignment);
+  c_register_pragma (NULL, "__pointer_size",
+                     vms_pragma_pointer_size);
+  c_register_pragma (NULL, "__required_pointer_size",
+                     vms_pragma_required_pointer_size);
+  c_register_pragma (NULL, "__extern_model", vms_pragma_extern_model);
+  c_register_pragma (NULL, "extern_model", vms_pragma_extern_model);
+  c_register_pragma (NULL, "__message", vms_pragma_message);
+  c_register_pragma (NULL, "__extern_prefix", vms_pragma_extern_prefix);
+}
+
+/* Canonicalize the filename (remove directory prefix, force the .h extension),
+   and append it to the directory to create the path, but don't
+   turn / into // or // into ///; // may be a namespace escape.  */
+
+static char *
+vms_construct_include_filename (const char *fname, cpp_dir *dir)
+{
+  size_t dlen, flen;
+  char *path;
+  const char *fbasename = lbasename (fname);
+  size_t i;
+
+  dlen = dir->len;
+  flen = strlen (fbasename) + 2;
+  path = XNEWVEC (char, dlen + 1 + flen + 1);
+  memcpy (path, dir->name, dlen);
+  if (dlen && !IS_DIR_SEPARATOR (path[dlen - 1]))
+    path[dlen++] = '/';
+  for (i = 0; i < flen; i++)
+    if (fbasename[i] == '.')
+      break;
+    else
+      path[dlen + i] = TOLOWER (fbasename[i]);
+  path[dlen + i + 0] = '.';
+  path[dlen + i + 1] = 'h';
+  path[dlen + i + 2] = 0;
+
+  return path;
+}
+
+/* Standard modules list.  */
+static const char * const vms_std_modules[] = { "rtldef", "starlet_c", NULL };
+
+/* Find include modules in the include path.  */
+
+void
+vms_c_register_includes (const char *sysroot,
+                         const char *iprefix ATTRIBUTE_UNUSED, int stdinc)
+{
+  static const char dir_separator_str[] = { DIR_SEPARATOR, 0 };
+  struct cpp_dir *dir;
+
+  /* Add on standard include pathes.  */
+  if (!stdinc)
+    return;
+
+  for (dir = get_added_cpp_dirs (SYSTEM); dir != NULL; dir = dir->next)
+    {
+      const char * const *lib;
+      for (lib = vms_std_modules; *lib != NULL; lib++)
+        {
+          char *path;
+          struct stat st;
+
+          if (sysroot != NULL)
+            path = concat (sysroot, dir->name, dir_separator_str, *lib, NULL);
+          else
+            path = concat (dir->name, dir_separator_str, *lib, NULL);
+
+          if (stat (path, &st) == 0 && S_ISDIR (st.st_mode))
+            {
+              cpp_dir *p;
+
+              p = XNEW (cpp_dir);
+              p->next = NULL;
+              p->name = path;
+              p->sysp = 1;
+              p->construct = vms_construct_include_filename;
+              p->user_supplied_p = 0;
+              add_cpp_dir_path (p, SYSTEM);
+            }
+          else
+            free (path);
+        }
+    }
+}
+
+void
+vms_c_common_override_options (void)
+{
+  /* Allow variadic functions without parameters (as declared in starlet).  */
+  flag_allow_parameterless_variadic_functions = TRUE;
+
+  /* Initialize c_default_pointer_mode.  */
+  switch (flag_vms_pointer_size)
+    {
+    case VMS_POINTER_SIZE_NONE:
+      break;
+    case VMS_POINTER_SIZE_32:
+      c_default_pointer_mode = SImode;
+      break;
+    case VMS_POINTER_SIZE_64:
+      c_default_pointer_mode = DImode;
+      break;
+    }
+}
+
+/* The default value for _CRTL_VER macro.  */
+
+int
+vms_c_get_crtl_ver (void)
+{
+  return VMS_DEFAULT_CRTL_VER;
+}
+
+/* The default value for _VMS_VER macro.  */
+
+int
+vms_c_get_vms_ver (void)
+{
+  return VMS_DEFAULT_VMS_VER;
+}
diff --git a/gcc-4.9/gcc/config/vms/vms-crtlmap.map b/gcc-4.9/gcc/config/vms/vms-crtlmap.map
new file mode 100644
index 000000000..e80e2afe4
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-crtlmap.map
@@ -0,0 +1,930 @@
+# Standard C names to DEC-C names map table.
+# All names in the DEC-C shared image (shared library) are prefixed
+# with decc$ (this is the standard vms convention for names in public
+# shared libraries).  This conversion is done by the compiler for all
+# languages.
+#
+# Currently contains only a partial list, e.g. those functions use
+# in GNAT and GCC.
+#
+# File format:
+# - Empty lines are ignored.
+# - Comment lines start with '#' in the first column.
+# - map lines consist in an identifier optionnaly followed by at most 2
+#   space-separated flags.
+# Flags are described in vms.c (prefixed by VMS_CRTL_).
+#
+# Keep entries alpha-sorted - this is enforced by the awk script.
+#
+__32_getpwent
+__32_getpwnam
+__32_getpwuid
+__32_sigaction
+__32_signal
+__32_ssignal
+__64_getpwent
+__64_getpwnam
+__64_getpwuid
+__64_sigaction
+__64_signal
+__64_ssignal
+__assert
+__dl__xpv
+__freeaddrinfo32
+__freeaddrinfo64
+__getaddrinfo32
+__getaddrinfo64
+__getaddrinfo_compat4332
+__getaddrinfo_compat4364
+__getgrent64
+__getgrgid64
+__getgrgid_r64
+__getgrnam64
+__getgrnam_r64
+__lgamma	 FLOAT64 FLOATV2
+__lgammaf	 FLOAT32 FLOATV2
+__lgammal	 FLOAT64 FLOAT128 FLOATV2
+__long_gid___32_getpwnam
+__long_gid___32_getpwuid
+__long_gid___64_getpwnam
+__long_gid___64_getpwuid
+__long_gid_access
+__long_gid_chmod
+__long_gid_chown
+__long_gid_fchmod
+__long_gid_fchown
+__long_gid_fstat
+__long_gid_ftw
+__long_gid_getegid
+__long_gid_geteuid
+__long_gid_getgid
+__long_gid_getgroups
+__long_gid_getpwnam
+__long_gid_getpwnam_r	 64
+__long_gid_getpwuid
+__long_gid_getpwuid_r	 64
+__long_gid_getuid
+__long_gid_lchown
+__long_gid_lstat
+__long_gid_setgid
+__long_gid_setuid
+__long_gid_stat
+__non_utc_ftw
+__nw__xui
+__off64_fcntl
+__off64_fseeko
+__off64_fstat
+__off64_ftello
+__off64_ftruncate
+__off64_ftw
+__off64_long_gid_fstat
+__off64_long_gid_ftw
+__off64_long_gid_lstat
+__off64_long_gid_stat
+__off64_lseek
+__off64_lstat
+__off64_mmap	 64
+__off64_non_utc_ftw
+__off64_pread
+__off64_pwrite
+__off64_stat
+__off64_truncate
+__off64_utc_fstat
+__off64_utc_lstat
+__off64_utc_stat
+__pdam_wcsftime
+__pdam_wcstok	 64
+__posix__exit
+__posix_exit
+__posix_kill
+__posix_long_gid_kill
+__posix_system
+__posix_wait
+__read_rnd
+__recvmsg32	 BSD44
+__recvmsg64	 BSD44
+__sendmsg32	 BSD44
+__sendmsg64	 BSD44
+__short_gid___32_getpwent
+__short_gid___64_getpwent
+__short_gid_getpwent
+__std_fstat
+__std_ftw
+__std_lstat
+__std_stat
+__unix_geteuid
+__unix_getuid
+__utc_ctime
+__utc_ctime_r
+__utc_fstat
+__utc_ftime
+__utc_gmtime
+__utc_gmtime_r
+__utc_localtime
+__utc_localtime_r
+__utc_lstat
+__utc_mktime
+__utc_pdam_wcsftime
+__utc_stat
+__utc_strftime
+__utc_time
+__utc_utime
+__utc_utimes
+__utc_wcsftime
+__utctz_gmtime
+__utctz_gmtime_r
+__utctz_localtime
+__utctz_localtime_r
+__vms_pclose
+__vms_wait3
+__vms_wait4
+__vms_waitpid
+__writev32
+__writev64
+_exit
+_fstat
+a64l
+abort
+abs
+accept	 BSD44
+access
+acos	 FLOAT64 DPML
+acosd	FLOAT64_VAXD DPML
+acosdl	FLOAT64_VAXD
+acosh	FLOAT64_VAXD DPML
+acoshl	FLOAT64_VAXD
+acosl	FLOAT64_VAXD
+alarm
+asctime
+asctime_r
+asin	 FLOAT64 DPML
+asind	FLOAT64_VAXD DPML
+asindl	FLOAT64_VAXD
+asinh	FLOAT64_VAXD DPML
+asinhl	FLOAT64_VAXD
+asinl	FLOAT64_VAXD
+atan	 FLOAT64 DPML
+atan2	 FLOAT64 DPML
+atan2l	FLOAT64_VAXD
+atand	FLOAT64_VAXD DPML
+atand2	FLOAT64_VAXD DPML
+atand2l	FLOAT64_VAXD
+atandl	FLOAT64_VAXD
+atanh	FLOAT64_VAXD DPML
+atanhl	FLOAT64_VAXD
+atanl	FLOAT64_VAXD
+atexit
+atof	 FLOAT64
+atoi
+atol
+atoll
+atoq
+basename	 64
+bcmp
+bcopy
+bind	 BSD44
+box
+# brk
+bsd_mh	 GA
+bsd_waddbytes
+bsd_waddstr
+bsearch	 64
+btowc
+bzero
+cabs	 FLOAT64 FLOATV2
+cabsf	 FLOAT32 FLOATV2
+cabsl	 FLOAT64 FLOAT128 FLOATV2
+cacos	 DPML
+cacosh	 DPML
+calloc	 64 MALLOC
+calloc_opt
+carg	DPML
+casin	DPML
+casinh	DPML
+catan	DPML
+catanh	DPML
+catclose
+catgets	 64
+catopen
+cbrt	FLOAT64_VAXD
+cbrtl	FLOAT64_VAXD
+ccos	DPML
+ccosh	DPML
+cdiv	DPML
+ceil	 FLOAT64 DPML
+ceill	FLOAT64_VAXD
+cexp	DPML
+cfree
+cfree_opt
+chdir
+chmod
+chown
+clearerr
+clearerr_unlocked
+clock
+clock_getres
+clock_gettime
+clock_settime
+clog	DPML
+close
+closedir
+cmul	DPML
+cols	 GA
+confstr
+connect	 BSD44
+copysign	FLOAT64_VAXD DPML
+copysignl	FLOAT64_VAXD
+cos	 FLOAT64 DPML
+cosd	FLOAT64_VAXD DPML
+cosdl	FLOAT64_VAXD
+cosh	 FLOAT64 DPML
+coshl	FLOAT64_VAXD
+cosl	FLOAT64_VAXD
+cot	FLOAT64_VAXD DPML
+cotd	FLOAT64_VAXD DPML
+cotdl	FLOAT64_VAXD
+cotl	FLOAT64_VAXD
+cpow	DPML
+creat
+crtl_init
+crypt	 64
+csin	DPML
+csinh	DPML
+csqrt   DPML
+ctan	DPML
+ctanh	DPML
+ctermid	 64
+ctime
+ctime_r
+cuserid	 64
+daylight	 GL
+delete
+delwin
+difftime	 FLOAT64
+dirname	 64
+div
+dlclose
+dlerror
+dlopen
+dlsym
+drand48	 FLOAT64
+drem	DPML
+dup
+dup2
+ecvt	 FLOAT64
+encrypt
+endgrent
+endhostent
+endnetent
+endprotoent
+endpwent
+endservent
+endwin
+environ  GA
+erand48	 FLOAT64
+erf	FLOAT64_VAXD DPML
+erfc	FLOAT64_VAXD DPML
+erfcl	FLOAT64_VAXD
+erfl	FLOAT64_VAXD
+errno	 GA
+execl
+execle
+execlp
+execv
+execve
+execvp
+exit
+exp	 FLOAT64 DPML
+expl	FLOAT64_VAXD
+expm1	FLOAT64_VAXD DPML
+expm1l	FLOAT64_VAXD
+fabs	 FLOAT64 DPML
+fabsl	FLOAT64_VAXD
+fchmod
+fchown
+fclose
+fcntl
+fcvt	 FLOAT64
+fdopen
+feof
+feof_unlocked
+ferror
+ferror_unlocked
+fflush
+ffs
+fgetc
+fgetc_unlocked
+fgetname	 64
+fgetpos
+fgets	 64
+fgetwc
+fgetws	 64
+fileno
+finite	FLOAT64_VAXD DPML
+finitel	FLOAT64_VAXD
+flockfile
+floor	 FLOAT64 DPML
+floorl	FLOAT64_VAXD
+fmod	 FLOAT64 DPML NODPML FLOATV2
+fmodf	 FLOAT32 NODPML FLOATV2
+fmodl	 FLOAT64 FLOAT128 NODPML FLOATV2
+# fnmatch # Overridden by libiberty.
+fopen
+fp_class	FLOAT64_VAXD DPML
+fp_classl	FLOAT64_VAXD
+fpathconf
+fprintf	 FLOAT64 FLOAT128
+fprintf__cf	 FLOAT64 FLOAT128
+fputc
+fputc_unlocked
+fputs
+fputwc
+fputws
+fread
+free
+free_opt
+freeaddrinfo
+freehostent
+freopen
+frexp	 FLOAT64 DPML
+frexpl	FLOAT64_VAXD
+fscanf	 FLOAT64 FLOAT128
+fseek
+fseeko
+fsetpos
+fstat
+fstatvfs
+fsync
+ftell
+ftello
+ftime
+ftruncate
+ftrylockfile
+ftw
+funlockfile
+fwait
+fwide
+fwprintf	 FLOAT64 FLOAT128
+fwrite
+fwscanf	 FLOAT64 FLOAT128
+gai_strerror
+gamma	 FLOAT64 FLOATV2
+gammaf	 FLOAT32 FLOATV2
+gammal	 FLOAT64 FLOAT128 FLOATV2
+gbsd_mvprintw
+gbsd_mvscanw
+gbsd_mvwprintw
+gbsd_mvwscanw
+gbsd_printw
+gbsd_scanw
+gbsd_wprintw
+gbsd_wscanw
+gcvt	 FLOAT64 64
+get_errno_addr
+get_vms_errno_addr
+getaddrinfo
+getaddrinfo_compat43
+getc
+getc_unlocked
+getchar
+getchar_unlocked
+getclock
+getcwd	 64
+getdtablesize
+getegid
+getenv
+geteuid
+getgid
+getgrent
+getgrgid
+getgrgid_r
+getgrnam
+getgrnam_r
+getgroups
+gethostaddr
+gethostbyaddr	 BSD44
+gethostbyname	 BSD44
+gethostent
+gethostname
+getipnodebyaddr
+getipnodebyname
+getitimer
+getlogin
+getlogin_r
+getname	 64
+getnameinfo
+getnetbyaddr
+getnetbyname
+getnetent
+getopt	32ONLY
+getpagesize
+getpeername	 BSD44
+getpgid
+getpgrp
+getpid
+getppid
+getprotobyname
+getprotobynumber
+getprotoent
+getpwent
+getpwnam
+getpwnam_r	 64
+getpwuid
+getpwuid_r	 64
+gets	 64
+getservbyname
+getservbyport
+getservent
+getsid
+getsockname	 BSD44
+getsockopt
+gettimeofday
+getuid
+getw
+getwc
+getwchar
+glob	 64
+globfree	 64
+gmtime
+gmtime_r
+gsignal
+herror
+hostalias
+hstrerror
+htonl
+htons
+hypot	 FLOAT64 DPML NODPML FLOATV2
+hypotf	 FLOAT32 NODPML FLOATV2
+hypotl	 FLOAT64 FLOAT128 NODPML FLOATV2
+iconv
+iconv_close
+iconv_open
+if_freenameindex
+if_indextoname
+if_nameindex
+if_nametoindex
+ilogb	DPML
+index	 64
+inet6_opt_append
+inet6_opt_find
+inet6_opt_finish
+inet6_opt_get_val
+inet6_opt_init
+inet6_opt_next
+inet6_opt_set_val
+inet6_rth_add
+inet6_rth_getaddr
+inet6_rth_init
+inet6_rth_reverse
+inet6_rth_segments
+inet6_rth_space
+inet_addr
+inet_aton
+inet_lnaof
+inet_makeaddr
+inet_netof
+inet_network
+inet_ntoa
+inet_ntop
+inet_pton
+initscr
+initstate
+ioctl
+isalnum
+isalpha
+isapipe
+isascii
+isatty
+iscntrl
+isdigit
+isgraph
+islower
+isnan	FLOAT64_VAXD DPML
+isnanl	FLOAT64_VAXD
+isprint
+ispunct
+isspace
+isupper
+iswalnum
+iswalpha
+iswcntrl
+iswctype
+iswdigit
+iswgraph
+iswlower
+iswprint
+iswpunct
+iswspace
+iswupper
+iswxdigit
+isxdigit
+j0	FLOAT64_VAXD DPML
+j0l	FLOAT64_VAXD
+j1	FLOAT64_VAXD DPML
+j1l	FLOAT64_VAXD
+jn	FLOAT64_VAXD DPML
+jnl	FLOAT64_VAXD
+jrand48
+kill
+l64a
+l64a_r
+labs
+lchown
+lcong48
+ldexp	 FLOAT64 DPML
+ldexpl	FLOAT64_VAXD
+ldiv
+lgamma	 FLOAT64 DPML NODPML FLOATV2
+lgammaf	 FLOAT32 NODPML FLOATV2
+lgammal	 FLOAT64 FLOAT128 NODPML FLOATV2
+lines	 GA
+link
+listen
+llabs
+lldiv
+locale
+localeconv
+localtime
+localtime_r
+# ln -> log DPML
+log	 FLOAT64 FLOATV2
+log10	 FLOAT64 DPML
+log10l	FLOAT64_VAXD
+log1p	FLOAT64_VAXD DPML
+log1pl	FLOAT64_VAXD
+log2	FLOAT64_VAXD DPML
+log2l	FLOAT64_VAXD
+logb	FLOAT64_VAXD DPML
+logbl	FLOAT64_VAXD
+logf	 FLOAT32 FLOATV2
+logl	 FLOAT64 FLOAT128 FLOATV2
+longjmp
+longname 64
+lrand48
+lround	 DPML
+lseek
+lstat
+lwait
+malloc	 64 MALLOC
+malloc_opt
+mblen
+mbrlen
+mbrtowc
+mbsinit
+mbsrtowcs	 64
+mbstowcs
+mbtowc
+memccpy	 64
+memchr	 64
+memcmp
+memcpy	 64
+memmove	 64
+memset	 64
+mkdir
+mkstemp
+mktemp	 64
+mktime
+mmap	 64
+modf	 FLOAT64 DPML
+modfl	FLOAT64_VAXD
+mprotect
+mrand48
+msync
+munmap
+mvwaddstr
+mvwin
+nanosleep
+newwin
+nextafter	FLOAT64_VAXD DPML
+nextafterl	FLOAT64_VAXD
+nice
+nint	FLOAT64_VAXD DPML
+nintl	FLOAT64_VAXD
+nl_langinfo
+nrand48
+ntohl
+ntohs
+open
+opendir
+optarg	 GA	32ONLY
+opterr	 GL	32ONLY
+optind	 GL	32ONLY
+optopt	 GL	32ONLY
+overlay
+overwrite
+pathconf
+pause
+pclose
+perror
+pipe
+poll
+popen
+# pow DPML version ???
+pow	 FLOAT64 FLOATV2
+powf	 FLOAT32 FLOATV2
+powl	 FLOAT64 FLOAT128 FLOATV2
+pread
+printf	 FLOAT64 FLOAT128
+printf__cf	 FLOAT64 FLOAT128
+printw	 FLOAT64 FLOAT128
+putc
+putc_unlocked
+putchar
+putchar_unlocked
+putenv
+puts
+putw
+putwc
+putwchar
+pwrite
+qabs
+qdiv
+qsort	 64
+raise
+rand
+rand_r
+random
+read
+readdir
+readdir_r	 64
+readlink
+readv	 64
+realloc	 64 MALLOC
+realloc_opt
+realpath	 64
+record_read
+recv
+recvfrom	 BSD44
+recvmsg	 BSD44
+remainder DPML
+remove
+rename
+rewind
+rewinddir
+rindex	 64
+rint	FLOAT64_VAXD DPML
+rintl	FLOAT64_VAXD
+rmdir
+rtl_private
+# sbrk # Makes libiberty/xmalloc.c fails to build.
+scalb	FLOAT64_VAXD DPML
+scalbl	FLOAT64_VAXD
+scanf	 FLOAT64 FLOAT128
+scanw	 FLOAT64 FLOAT128
+scroll
+seed48
+seekdir
+select
+send
+sendmsg	 BSD44
+sendto	 BSD44
+set_new_handler__xpxv_v
+setbuf
+setenv
+seteuid
+setgid
+setgrent
+sethostent
+setitimer
+setkey
+setlocale
+setnetent
+setpgid
+setpgrp
+setprotoent
+setpwent
+setregid
+setreuid
+setservent
+setsid
+setsockopt
+setstate
+setuid
+setvbuf
+shm_open
+shm_unlink
+shutdown
+sigaction
+sigaddset
+sigblock
+sigdelset
+sigemptyset
+sigfillset
+sighold
+sigignore
+sigismember
+siglongjmp
+sigmask
+signal
+signgam	 GL
+sigpause
+sigpending
+sigprocmask
+sigrelse
+sigsetjmp
+sigsetmask
+sigstack
+sigsuspend
+sigtimedwait
+sigvec
+sigwait
+sigwaitinfo
+sin	 FLOAT64 DPML
+sincos  DPML
+sincosd DPML
+sind	FLOAT64_VAXD DPML
+sindl	FLOAT64_VAXD
+sinh	 FLOAT64 DPML
+sinhcosh DPML
+sinhl	FLOAT64_VAXD
+sinl	FLOAT64_VAXD
+sleep
+snprintf	 FLOAT64 FLOAT128
+socket
+socketpair
+sprintf	 FLOAT64 FLOAT128
+sprintf__cf	 FLOAT64 FLOAT128
+sqrt	 FLOAT64 DPML
+sqrtl	FLOAT64_VAXD
+srand
+srand48
+srandom
+sscanf	 FLOAT64 FLOAT128
+ssignal
+stat
+statvfs
+strcasecmp
+strcat	 64
+strchr	 64
+strcmp
+strcoll
+strcpy	 64
+strcspn
+strdup	 64 MALLOC
+strerror
+strfmon	 FLOAT64
+strftime
+strlen
+strncasecmp
+strncat	 64
+strncmp
+strncpy	 64
+strnlen
+strpbrk	 64
+strptime	 64
+strptime_xpg4
+strrchr	 64
+strsep	 64
+strspn
+strstr	 64
+strtod	 FLOAT64 64
+strtok	 64
+strtok_r	 64
+strtol	 64
+strtoll	 64
+strtoq	 64
+strtoul	 64
+strtoull	 64
+strtouq	 64
+strxfrm
+subwin
+swab
+swprintf	 FLOAT64 FLOAT128
+swscanf	 FLOAT64 FLOAT128
+symlink
+sys_errlist	 GA
+sys_nerr	 GL
+sysconf
+system
+tan	 FLOAT64 DPML
+tand	FLOAT64_VAXD DPML
+tandl	FLOAT64_VAXD
+tanh	 FLOAT64 DPML
+tanhl	FLOAT64_VAXD
+tanl	FLOAT64_VAXD
+telldir
+tempnam
+tgamma  DPML
+time
+times
+timezone	 GL
+tmpfile
+tmpnam	 64
+toascii
+tolower
+touchwin
+toupper
+towctrans
+towlower
+towupper
+trunc	FLOAT64_VAXD DPML
+truncate
+truncl	FLOAT64_VAXD
+ttyname
+ttyname_r
+tzname	 GA
+tzset
+ualarm
+umask
+uname
+ungetc
+ungetwc
+unlink
+unordered	FLOAT64_VAXD DPML
+unorderedl	FLOAT64_VAXD
+unsetenv
+usleep
+utime
+utimes
+vaxc$calloc_opt
+vaxc$cfree_opt
+vaxc$crtl_init
+vaxc$errno	 GA
+vaxc$free_opt
+vaxc$get_sdc
+vaxc$malloc_opt
+vaxc$realloc_opt
+vfprintf	 FLOAT64 FLOAT128
+vfscanf	 FLOAT64 FLOAT128
+vfwprintf	 FLOAT64 FLOAT128
+vfwscanf	 FLOAT64 FLOAT128
+vprintf	 FLOAT64 FLOAT128
+vscanf	 FLOAT64 FLOAT128
+vsnprintf	 FLOAT64 FLOAT128
+vsprintf	 FLOAT64 FLOAT128
+vsscanf	 FLOAT64 FLOAT128
+vswprintf	 FLOAT64 FLOAT128
+vswscanf	 FLOAT64 FLOAT128
+vwprintf	 FLOAT64 FLOAT128
+vwscanf	 FLOAT64 FLOAT128
+waddch
+waddstr
+wait
+wait3
+wait4
+waitpid
+wclear
+wclrattr
+wclrtobot
+wclrtoeol
+wcrtomb
+wcscat	 64
+wcschr	 64
+wcscmp
+wcscoll
+wcscpy	 64
+wcscspn
+wcsftime
+wcslen
+wcsncat	 64
+wcsncmp
+wcsncpy	 64
+wcspbrk	 64
+wcsrchr	 64
+wcsrtombs	 64
+wcsspn
+wcsstr	 64
+wcstod	 FLOAT64 64
+wcstok	 64
+wcstol	 64
+wcstombs
+wcstoul	 64
+wcswcs	 64
+wcswidth
+wcsxfrm
+wctob
+wctomb
+wctrans
+wctype
+wcwidth
+wdelch
+wdeleteln
+werase
+wgetch
+wgetstr
+winch
+winsch
+winsertln
+winsstr
+wmemchr	 64
+wmemcmp
+wmemcpy	 64
+wmemmove	 64
+wmemset	 64
+wmove
+wprintf	 FLOAT64 FLOAT128
+wprintw	 FLOAT64 FLOAT128
+wrefresh
+write
+writev
+wscanf	 FLOAT64 FLOAT128
+wscanw	 FLOAT64 FLOAT128
+wsetattr
+wstandend
+wstandout
+y0	FLOAT64_VAXD DPML
+y0l	FLOAT64_VAXD
+y1	FLOAT64_VAXD DPML
+y1l	FLOAT64_VAXD
+yn	FLOAT64_VAXD DPML
+ynl	FLOAT64_VAXD
diff --git a/gcc-4.9/gcc/config/vms/vms-f.c b/gcc-4.9/gcc/config/vms/vms-f.c
new file mode 100644
index 000000000..7027c0ef8
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-f.c
@@ -0,0 +1,31 @@
+/* VMS support needed only by Fortran frontends.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+void
+vms_c_register_includes (const char *sysroot ATTRIBUTE_UNUSED,
+                         const char *iprefix ATTRIBUTE_UNUSED,
+			 int stdinc ATTRIBUTE_UNUSED)
+{
+  /* No-op for fortran.  */
+}
diff --git a/gcc-4.9/gcc/config/vms/vms-ld.c b/gcc-4.9/gcc/config/vms/vms-ld.c
new file mode 100644
index 000000000..a66479f46
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-ld.c
@@ -0,0 +1,968 @@
+/* VMS linker wrapper.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+   Contributed by AdaCore
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This program is a wrapper around the VMS linker.
+   It translates Unix style command line options into corresponding
+   VMS style qualifiers and then spawns the VMS linker.
+
+   It is possible to build this program on UNIX but only for the purpose of
+   checking for errors.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "libiberty.h"
+#include <safe-ctype.h>
+#include <sys/stat.h>
+
+/* Macro for logicals.  */
+#define LNM__STRING 2
+#define LNM_C_NAMLENGTH 255
+#define PSL_C_SUPER 2
+#define PSL_C_USER 3
+
+/* Local variable declarations.  */
+static int ld_nocall_debug = 0;
+static int ld_mkthreads = 0;
+static int ld_upcalls = 0;
+
+/* verbose = 1 if -v passed.  */
+static int verbose = 0;
+
+/* save_temps = 1 if -save-temps passed.  */
+static int save_temps = 0;
+
+/* By default don't generate executable file if there are errors
+   in the link.  Override with --noinhibit-exec.  */
+static int inhibit_exec = 1;
+
+/* debug = 1 if -g passed.  */
+static int debug = 0;
+
+/* By default prefer to link with static libraries.  */
+static int staticp = 1;
+
+/* By default generate an executable, not a shareable image library.
+   Override with -shared.  */
+static int share = 0;
+
+/* Linker command line.  */
+static int link_cmd_maxlen = 0;
+static char *link_cmd = 0;
+static int link_cmd_len = 0;
+
+/* Keep track of filenames.  */
+static char *sharebasename;
+static const char *exefullfilename;
+static const char *exefilename;
+
+/* Search dir list passed on command line (with -L).  */
+static const char **search_dirs;
+static int search_dirs_len;
+
+/* Local function declarations.  */
+static void addarg (const char *);
+static int is_regular_file (char *);
+static char *to_host_file_spec (char *);
+static char *locate_lib (char *);
+static const char *expand_lib (char *);
+static void preprocess_args (int, char **);
+static void process_args (int, char **);
+static void maybe_set_link_compat (void);
+static int set_exe (const char *);
+#ifdef VMS
+static int translate_unix (char *, int);
+#endif
+
+
+/* Append STR to the command line to invoke the linker.
+   Expand the line as necessary to accommodate.  */
+
+static void
+addarg (const char *str)
+{
+  int l = strlen (str);
+
+  /* Extend the line.  */
+  if (link_cmd_len + l >= link_cmd_maxlen)
+    {
+      link_cmd_maxlen = link_cmd_len + l + 1024;
+      link_cmd = XRESIZEVEC (char, link_cmd, link_cmd_maxlen);
+    }
+
+  memcpy (link_cmd + link_cmd_len, str, l);
+  link_cmd_len += l;
+}
+
+/* Check to see if NAME is a regular file, i.e. not a directory.  */
+
+static int
+is_regular_file (char *name)
+{
+  int ret;
+  struct stat statbuf;
+
+  ret = stat (name, &statbuf);
+  return !ret && S_ISREG (statbuf.st_mode);
+}
+
+#ifdef VMS
+static char new_host_filespec [255];
+static char filename_buff [256];
+
+/* Action routine called by decc$to_vms.  NAME is a file name or
+   directory name.  TYPE is unused.  */
+
+static int
+translate_unix (char *name, int type ATTRIBUTE_UNUSED)
+{
+  strcpy (filename_buff, name);
+  return 0;
+}
+#endif
+
+/* Translate a Unix syntax file specification FILESPEC into VMS syntax.
+   If indicators of VMS syntax found, return input string.
+   Return a pointer to a static buffer.  */
+
+static char *
+to_host_file_spec (char *filespec)
+{
+#ifdef VMS
+  if (strchr (filespec, ']') || strchr (filespec, ':'))
+    {
+      /* Looks like a VMS path.  */
+      return filespec;
+    }
+  else
+    {
+
+      strcpy (filename_buff, filespec);
+      decc$to_vms (filespec, translate_unix, 1, 1);
+      strcpy (new_host_filespec, filename_buff);
+      return new_host_filespec;
+    }
+#else
+  return filespec;
+#endif
+}
+
+/* Locate library LIB_NAME on the library path.  */
+
+static char *
+locate_lib (char *lib_name)
+{
+  int lib_len = strlen (lib_name);
+  const char *exts[3];
+  int i;
+
+  if (staticp)
+    {
+      /* For static links, look for shareable image libraries last.  */
+      exts[0] = ".a";
+      exts[1] = ".olb";
+      exts[2] = ".exe";
+    }
+  else
+    {
+      exts[0] = ".exe";
+      exts[1] = ".a";
+      exts[2] = ".olb";
+    }
+
+  for (i = 0; i < search_dirs_len; i++)
+    {
+      char *buf;
+      int l;
+      int j;
+
+      l = strlen (search_dirs[i]);
+      buf = (char *)alloca (l + 4 + lib_len + 4 + 1);
+      /* Put PATH/libLIB.  */
+      memcpy (buf, search_dirs[i], l);
+      memcpy (buf + l, "/lib", 4);
+      l += 4;
+      memcpy (buf + l, lib_name, lib_len);
+      l += lib_len;
+
+      /* Look for files with the extensions.  */
+      for (j = 0; j < 3; j++)
+        {
+	  strcpy (buf + l, exts[j]);
+	  if (is_regular_file (buf))
+	    return xstrdup (to_host_file_spec (buf));
+        }
+    }
+
+  return NULL;
+}
+
+/* Given a library name NAME, i.e. foo,  Look for libfoo.lib and then
+   libfoo.a in the set of directories we are allowed to search in.
+   May return NULL if the library can be discarded.  */
+
+static const char *
+expand_lib (char *name)
+{
+  char *lib_path;
+
+  /* Discard libc.  */
+  if (strcmp (name, "c") == 0)
+    return NULL;
+
+  /* Discard libm.  No separate library for math functions.  */
+  if (strcmp (name, "m") == 0)
+    return NULL;
+
+  /* Search on path.  */
+  lib_path = locate_lib (name);
+  if (lib_path)
+    return lib_path;
+
+  fprintf (stderr,
+	   "Couldn't locate library: lib%s.exe, lib%s.a or lib%s.olb\n",
+	   name, name, name);
+
+  exit (EXIT_FAILURE);
+}
+
+/* Preprocess the number of args P_ARGC in ARGV.
+   Look for special flags, etc. that must be handled first.  */
+
+static void
+preprocess_args (int argc, char **argv)
+{
+  int i;
+
+  /* Scan for -shared.  */
+  for (i = 1; i < argc; i++)
+    if (strcmp (argv[i], "-shared") == 0)
+      {
+        share = 1;
+        break;
+      }
+
+  for (i = 1; i < argc; i++)
+    if (strcmp (argv[i], "-o") == 0)
+      {
+	int len;
+
+	i++;
+        exefilename = lbasename (argv[i]);
+	exefullfilename = xstrdup (to_host_file_spec (argv[i]));
+
+	if (share)
+          addarg(" /share=");
+	else
+	  addarg (" /exe=");
+        addarg (exefullfilename);
+
+	if (share)
+	  {
+            char *ptr;
+
+            /* Extract the basename.  */
+	    ptr = strchr (argv[i], ']');
+            if (ptr == NULL)
+              ptr = strchr (argv[i], ':');
+            if (ptr == NULL)
+              ptr = strchr (argv[i], '/');
+            if (ptr == NULL)
+	      sharebasename = xstrdup (argv[i]);
+            else
+	      sharebasename = xstrdup (ptr + 1);
+
+	    len = strlen (sharebasename);
+	    if (strncasecmp (&sharebasename[len-4], ".exe", 4) == 0)
+	      sharebasename[len - 4] = 0;
+
+            /* Convert to uppercase.  */
+	    for (ptr = sharebasename; *ptr; ptr++)
+	      *ptr = TOUPPER (*ptr);
+	  }
+      }
+
+  if (exefullfilename == NULL && !share)
+    {
+      exefilename = "a_out.exe";
+      exefullfilename = "a_out.exe";
+      addarg (xstrdup (" /exe=a_out.exe"));
+    }
+}
+
+/* Preprocess the number of args ARGC in ARGV.  Look for
+   special flags, etc. that must be handled for the VMS linker.  */
+
+static void
+process_args (int argc, char **argv)
+{
+  int i;
+
+  for (i = 1; i < argc; i++)
+    {
+      if (strncmp (argv[i], "-L", 2) == 0)
+	{
+          search_dirs = XRESIZEVEC(const char *, search_dirs,
+                                   search_dirs_len + 1);
+          search_dirs[search_dirs_len++] = &argv[i][2];
+	}
+
+      /* -v turns on verbose option here and is passed on to gcc.  */
+      else if (strcmp (argv[i], "-v") == 0)
+	verbose++;
+      else if (strcmp (argv[i], "--version") == 0)
+	{
+	  fprintf (stdout, "VMS Linker\n");
+          exit (EXIT_SUCCESS);
+	}
+      else if (strcmp (argv[i], "--help") == 0)
+	{
+	  fprintf (stdout, "VMS Linker\n");
+          exit (EXIT_SUCCESS);
+	}
+      else if (strcmp (argv[i], "-g0") == 0)
+	addarg ("/notraceback");
+      else if (strncmp (argv[i], "-g", 2) == 0)
+	{
+	  addarg ("/debug");
+	  debug = 1;
+	}
+      else if (strcmp (argv[i], "-static") == 0)
+	staticp = 1;
+      else if (strcmp (argv[i], "-map") == 0)
+	{
+	  char *buff, *ptr;
+
+	  buff = (char *) xstrdup (exefullfilename);
+	  ptr = strrchr (buff, '.');
+	  if (ptr)
+	    *ptr = 0;
+
+	  strcat (buff, ".map");
+	  addarg ("/map=");
+	  addarg (buff);
+          addarg (".map");
+	  addarg ("/full");
+
+          free (buff);
+	}
+      else if (strcmp (argv[i], "-save-temps") == 0)
+	save_temps = 1;
+      else if (strcmp (argv[i], "--noinhibit-exec") == 0)
+	inhibit_exec = 0;
+    }
+}
+
+#ifdef VMS
+typedef struct dsc
+{
+  unsigned short len, mbz;
+  const char *adr;
+} Descriptor;
+
+struct lst
+{
+  unsigned short buflen, item_code;
+  const void *bufaddr;
+  void *retlenaddr;
+};
+
+static struct
+{
+  struct lst items [1];
+  unsigned int terminator;
+} item_lst1;
+
+static struct
+{
+  struct lst items [2];
+  unsigned int terminator;
+} item_lst2;
+
+/* Checks if logical names are defined for setting system library path and
+   linker program to enable compatibility with earlier VMS versions.  */
+
+static void
+maybe_set_link_compat (void)
+{
+  char lnm_buff [LNM_C_NAMLENGTH];
+  unsigned int lnm_buff_len;
+  int status;
+  Descriptor tabledsc, linkdsc;
+
+  tabledsc.adr = "LNM$JOB";
+  tabledsc.len = strlen (tabledsc.adr);
+  tabledsc.mbz = 0;
+
+  linkdsc.adr = "GCC_LD_SYS$LIBRARY";
+  linkdsc.len = strlen (linkdsc.adr);
+  linkdsc.mbz = 0;
+
+  item_lst1.items[0].buflen = LNM_C_NAMLENGTH;
+  item_lst1.items[0].item_code = LNM__STRING;
+  item_lst1.items[0].bufaddr = lnm_buff;
+  item_lst1.items[0].retlenaddr = &lnm_buff_len;
+  item_lst1.terminator = 0;
+
+  status = SYS$TRNLNM
+    (0,          /* attr */
+     &tabledsc,  /* tabnam */
+     &linkdsc,   /* lognam */
+     0,          /* acmode */
+     &item_lst1);
+
+  /* If GCC_LD_SYS$LIBRARY is defined, redefine SYS$LIBRARY to search
+     the equivalence name first for system libraries, then the default
+     system library directory */
+
+  if ((status & 1) == 1)
+    {
+      unsigned char acmode = PSL_C_USER; /* Don't retain after image exit */
+      const char *syslib = "SYS$SYSROOT:[SYSLIB]"; /* Default SYS$LIBRARY */
+
+      /* Only visible to current and child processes */
+      tabledsc.adr = "LNM$PROCESS";
+      tabledsc.len = strlen (tabledsc.adr);
+      tabledsc.mbz = 0;
+
+      linkdsc.adr = "SYS$LIBRARY";
+      linkdsc.len = strlen (linkdsc.adr);
+      linkdsc.mbz = 0;
+
+      item_lst2.items[0].buflen = lnm_buff_len;
+      item_lst2.items[0].item_code = LNM__STRING;
+      item_lst2.items[0].bufaddr = lnm_buff;
+      item_lst2.items[0].retlenaddr = 0;
+
+      item_lst2.items[1].buflen = strlen (syslib);
+      item_lst2.items[1].item_code = LNM__STRING;
+      item_lst2.items[1].bufaddr = syslib;
+      item_lst2.items[1].retlenaddr = 0;
+      item_lst2.terminator = 0;
+
+      status = SYS$CRELNM
+	(0,          /* attr */
+	 &tabledsc,  /* tabnam */
+	 &linkdsc,   /* lognam */
+	 &acmode,    /* acmode */
+	 &item_lst2);
+
+    }
+
+  tabledsc.adr = "LNM$JOB";
+  tabledsc.len = strlen (tabledsc.adr);
+  tabledsc.mbz = 0;
+
+  linkdsc.adr = "GCC_LD_LINK";
+  linkdsc.len = strlen (linkdsc.adr);
+  linkdsc.mbz = 0;
+
+  item_lst1.items[0].buflen = LNM_C_NAMLENGTH;
+  item_lst1.items[0].item_code = LNM__STRING;
+  item_lst1.items[0].bufaddr = lnm_buff;
+  item_lst1.items[0].retlenaddr = &lnm_buff_len;
+  item_lst1.terminator = 0;
+
+  status = SYS$TRNLNM
+    (0,          /* attr */
+     &tabledsc,  /* tabnam */
+     &linkdsc,   /* lognam */
+     0,          /* acmode */
+     &item_lst1);
+
+  /* If GCC_LD_LINK is defined, redefine LINK to use the equivalence name
+     (sometimes the LINK program version is used by VMS to determine
+     compatibility).  */
+
+  if ((status & 1) == 1)
+    {
+      unsigned char acmode = PSL_C_USER; /* Don't retain after image exit.  */
+
+      /* Only visible to current and child processes.  */
+      tabledsc.adr = "LNM$PROCESS";
+      tabledsc.len = strlen (tabledsc.adr);
+      tabledsc.mbz = 0;
+
+      linkdsc.adr = "LINK";
+      linkdsc.len = strlen (linkdsc.adr);
+      linkdsc.mbz = 0;
+
+      item_lst1.items[0].buflen = lnm_buff_len;
+      item_lst1.items[0].item_code = LNM__STRING;
+      item_lst1.items[0].bufaddr = lnm_buff;
+      item_lst1.items[0].retlenaddr = 0;
+      item_lst1.terminator = 0;
+
+      status = SYS$CRELNM
+	(0,          /* attr */
+	 &tabledsc,  /* tabnam */
+	 &linkdsc,   /* lognam */
+	 &acmode,    /* acmode */
+	 &item_lst1);
+    }
+}
+#else
+static void
+maybe_set_link_compat (void)
+{
+}
+#endif
+
+/* Set environment defined executable attributes.  */
+
+static int
+set_exe (const char *arg)
+{
+  char allargs [1024];
+  int res;
+
+  snprintf (allargs, sizeof (allargs),
+            "$@gnu:[bin]set_exe %s %s", exefullfilename, arg);
+  if (verbose)
+    printf ("%s\n", allargs);
+
+  res = system (allargs);
+  if (verbose > 1)
+    printf ("$!status = %d\n", res);
+
+  if ((res & 1) != 1)
+    {
+      fprintf (stderr, "ld error: popen set_exe\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* The main program.  Spawn the VMS linker after fixing up the Unix-like flags
+   and args to be what the VMS linker wants.  */
+
+int
+main (int argc, char **argv)
+{
+  /* File specification for vms-dwarf2.o.  */
+  char *vmsdwarf2spec = 0;
+
+  /* File specification for vms-dwarf2eh.o.  */
+  char *vmsdwarf2ehspec = 0;
+
+  int i;
+  char cwdev[128], *devptr;
+  int cwdevlen;
+  FILE *optfile;
+  char *cwd, *ptr;
+  char *optfilename;
+  int status = 0;
+
+  /* Some linker options can be set with logicals.  */
+  if (getenv ("GNAT$LD_NOCALL_DEBUG"))
+    ld_nocall_debug = 1;
+  if (getenv ("GNAT$LD_MKTHREADS"))
+    ld_mkthreads = 1;
+  if (getenv ("GNAT$LD_UPCALLS"))
+    ld_upcalls = 1;
+  if (getenv ("GNAT$LD_SHARED_LIBS"))
+    staticp = 0;
+
+  /* Get current dir.  */
+#ifdef VMS
+  cwd = getcwd (0, 1024, 1);
+#else
+  cwd = getcwd (0, 1024);
+  strcat (cwd, "/");
+#endif
+
+  /* Extract device part of the path.  */
+  devptr = strchr (cwd, ':');
+  if (devptr)
+    cwdevlen = (devptr - cwd) + 1;
+  else
+    cwdevlen = 0;
+  memcpy (cwdev, cwd, cwdevlen);
+  cwdev [cwdevlen] = '\0';
+
+  maybe_set_link_compat ();
+
+  /* Linker command starts with the command name.  */
+  addarg ("$ link");
+
+  /* Pass to find args that have to be append first.  */
+  preprocess_args (argc , argv);
+
+  /* Pass to find the rest of the args.  */
+  process_args (argc , argv);
+
+  if (!verbose)
+    addarg ("/noinform");
+
+  /* Create a temp file to hold args, otherwise we can easily exceed the VMS
+     command line length limits.  */
+  optfilename = (char *) xmalloc (strlen (exefilename) + 13);
+  strcpy (optfilename, exefilename);
+  ptr = strrchr (optfilename, '.');
+  if (ptr)
+    *ptr = 0;
+  strcat (optfilename, ".opt_tmpfile");
+  optfile = fopen (optfilename, "w");
+
+  /* Write out the IDENTIFICATION argument first so that it can be overridden
+     by an options file.  */
+  for (i = 1; i < argc; i++)
+    {
+      int arg_len = strlen (argv[i]);
+
+      if (arg_len > 6 && strncasecmp (argv[i], "IDENT=", 6) == 0)
+	{
+	  /* Comes from command line. If present will always appear before
+	     --identification=... and will override.  */
+          break;
+	}
+      else if (arg_len > 17
+	       && strncasecmp (argv[i], "--identification=", 17) == 0)
+	{
+	  /* Comes from pragma Ident ().  */
+          fprintf (optfile, "case_sensitive=yes\n");
+          fprintf (optfile, "IDENTIFICATION=\"%-.15s\"\n", &argv[i][17]);
+          fprintf (optfile, "case_sensitive=NO\n");
+	}
+    }
+
+  for (i = 1; i < argc; i++)
+    {
+      int arg_len = strlen (argv[i]);
+
+      if (strcmp (argv[i], "-o") == 0)
+        {
+          /* Already handled.  */
+          i++;
+        }
+      else if (arg_len > 2 && strncmp (argv[i], "-l", 2) == 0)
+	{
+	  const char *libname;
+
+          libname = expand_lib (&argv[i][2]);
+	  if (libname != NULL)
+	    {
+              int len = strlen (libname);
+              const char *ext;
+
+	      if (len > 4 && strcasecmp (&libname [len-4], ".exe") == 0)
+		ext = "/shareable";
+	      else
+		ext = "/library";
+
+	      if (libname[0] == '[')
+                fprintf (optfile, "%s%s%s\n", cwdev, libname, ext);
+	      else
+                fprintf (optfile, "%s%s\n", libname, ext);
+	    }
+	}
+      else if (strcmp (argv[i], "-v" ) == 0
+	       || strncmp (argv[i], "-g", 2 ) == 0
+	       || strcmp (argv[i], "-static" ) == 0
+	       || strcmp (argv[i], "-map" ) == 0
+	       || strcmp (argv[i], "-save-temps") == 0
+	       || strcmp (argv[i], "--noinhibit-exec") == 0
+	       || (arg_len > 2 && strncmp (argv[i], "-L", 2) == 0)
+	       || (arg_len >= 6 && strncmp (argv[i], "-share", 6) == 0))
+        {
+          /* Already handled.  */
+        }
+      else if (strncmp (argv[i], "--opt=", 6) == 0)
+	fprintf (optfile, "%s\n", argv[i] + 6);
+      else if (arg_len > 1 && argv[i][0] == '@')
+	{
+          /* Read response file (in fact a single line of filenames).  */
+	  FILE *atfile;
+	  char *ptr, *ptr1;
+	  struct stat statbuf;
+	  char *buff;
+	  int len;
+
+	  if (stat (&argv[i][1], &statbuf))
+	    {
+	      fprintf (stderr, "Couldn't open linker response file: %s\n",
+		       &argv[i][1]);
+	      exit (EXIT_FAILURE);
+	    }
+
+          /* Read the line.  */
+	  buff = (char *) xmalloc (statbuf.st_size + 1);
+	  atfile = fopen (&argv[i][1], "r");
+	  fgets (buff, statbuf.st_size + 1, atfile);
+	  fclose (atfile);
+
+          /* Remove trailing \n.  */
+	  len = strlen (buff);
+	  if (buff [len - 1] == '\n')
+	    {
+	      buff [len - 1] = 0;
+	      len--;
+	    }
+
+          /* Put the filenames to the opt file.  */
+	  ptr = buff;
+	  do
+	  {
+	     ptr1 = strchr (ptr, ' ');
+	     if (ptr1)
+	       *ptr1 = 0;
+
+             /* Add device name if a path is present.  */
+	     ptr = to_host_file_spec (ptr);
+	     if (ptr[0] == '[')
+	       fprintf (optfile, "%s%s\n", cwdev, ptr);
+	     else
+	       fprintf (optfile, "%s\n", ptr);
+
+	     ptr = ptr1 + 1;
+	  }
+          while (ptr1);
+	}
+      else if ((argv[i][0] == '/') && (strchr (&argv[i][1], '/') == 0))
+        {
+          /* Unix style file specs and VMS style switches look alike,
+             so assume an arg consisting of one and only one slash,
+             and that being first, is really a switch.  */
+          addarg (argv[i]);
+        }
+      else if (arg_len > 4
+	       && strncasecmp (&argv[i][arg_len-4], ".opt", 4) == 0)
+	{
+          /* Read option file.  */
+	  FILE *optfile1;
+	  char buff[256];
+
+	  /* Disable __UNIX_FOPEN redefinition in case user supplied .opt
+	     file is not stream oriented. */
+
+	  optfile1 = (fopen) (argv[i], "r");
+	  if (optfile1 == 0)
+	    {
+	      perror (argv[i]);
+	      status = 1;
+	      goto cleanup_and_exit;
+	    }
+
+	  while (fgets (buff, sizeof (buff), optfile1))
+	    fputs (buff, optfile);
+
+	  fclose (optfile1);
+	}
+      else if (arg_len > 7 && strncasecmp (argv[i], "GSMATCH", 7) == 0)
+	fprintf (optfile, "%s\n", argv[i]);
+      else if (arg_len > 6 && strncasecmp (argv[i], "IDENT=", 6) == 0)
+	{
+	  /* Comes from command line and will override pragma.  */
+	  fprintf (optfile, "case_sensitive=yes\n");
+	  fprintf (optfile, "IDENT=\"%15.15s\"\n", &argv[i][6]);
+	  fprintf (optfile, "case_sensitive=NO\n");
+	}
+      else if (arg_len > 17
+	       && strncasecmp (argv[i], "--identification=", 17) == 0)
+        {
+          /* Already handled.  */
+        }
+      else
+	{
+	  /* Assume filename arg.  */
+          const char *file;
+	  const char *addswitch = NULL;
+	  char *buff;
+	  int buff_len;
+	  int is_cld = 0;
+
+	  file = to_host_file_spec (argv[i]);
+	  arg_len = strlen (file);
+
+	  /* Handle shareable image libraries.  */
+	  if (arg_len > 4 && strcasecmp (&file[arg_len - 4], ".exe") == 0)
+	    addswitch = "/shareable";
+	  else if (arg_len > 4 && strcasecmp (&file[arg_len - 4], ".cld") == 0)
+	    {
+	      addswitch = "/shareable";
+	      is_cld = 1;
+	    }
+
+	  /* Handle object libraries.  */
+	  else if (arg_len > 2 && strcasecmp (&file[arg_len - 2], ".a") == 0)
+	    addswitch = "/lib";
+	  else if (arg_len > 4 && strcasecmp (&file[arg_len - 4], ".olb") == 0)
+	    addswitch = "/lib";
+
+	  /* Absolutize file location.  */
+	  if (file[0] == '[')
+	    {
+	      buff = (char *) xmalloc (cwdevlen + arg_len + 1);
+	      sprintf (buff, "%s%s", cwdev, file);
+	    }
+	  else if (strchr (file, ':'))
+	    {
+	      buff = xstrdup (file);
+	    }
+	  else
+	    {
+	      buff = (char *) xmalloc (strlen (cwd) + arg_len + 1);
+	      sprintf (buff, "%s%s", cwd, file);
+	    }
+
+	  buff_len = strlen (buff);
+
+	  if (buff_len >= 15
+	      && strcasecmp (&buff[buff_len - 14], "vms-dwarf2eh.o") == 0)
+	    {
+              /* Remind of it.  */
+              vmsdwarf2ehspec = xstrdup (buff);
+	    }
+	  else if (buff_len >= 13
+                   && strcasecmp (&buff[buff_len - 12], "vms-dwarf2.o") == 0)
+            {
+              /* Remind of it.  */
+              vmsdwarf2spec = xstrdup (buff);
+            }
+	  else if (is_cld)
+	    {
+              /* Command line definition file.  */
+              addarg (buff);
+              addarg (addswitch);
+	      addarg (",");
+	    }
+	  else
+	    {
+              fprintf (optfile, "%s%s\n",
+                       buff, addswitch != NULL ? addswitch : "");
+	    }
+          free (buff);
+	}
+    }
+
+  if (vmsdwarf2ehspec)
+    {
+      /* Sequentialize exception handling info.  */
+
+      fprintf (optfile, "case_sensitive=yes\n");
+      fprintf (optfile, "cluster=DWARF2eh,,,%s\n", vmsdwarf2ehspec);
+      fprintf (optfile, "collect=DWARF2eh,eh_frame\n");
+      fprintf (optfile, "case_sensitive=NO\n");
+    }
+
+  if (debug && vmsdwarf2spec)
+    {
+      /* Sequentialize the debug info.  */
+
+      fprintf (optfile, "case_sensitive=yes\n");
+      fprintf (optfile, "cluster=DWARF2debug,,,%s\n", vmsdwarf2spec);
+      fprintf (optfile, "collect=DWARF2debug,debug_abbrev,debug_aranges,-\n");
+      fprintf (optfile, " debug_frame,debug_info,debug_line,debug_loc,-\n");
+      fprintf (optfile, " debug_macinfo,debug_pubnames,debug_str,-\n");
+      fprintf (optfile, " debug_zzzzzz\n");
+      fprintf (optfile, "case_sensitive=NO\n");
+    }
+
+  if (debug && share && vmsdwarf2spec)
+    {
+      /* Sequentialize the shared library debug info.  */
+
+      fprintf (optfile, "case_sensitive=yes\n");
+      fprintf (optfile, "symbol_vector=(-\n");
+      fprintf (optfile,
+	       "%s$DWARF2.DEBUG_ABBREV/$dwarf2.debug_abbrev=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile,
+	       "%s$DWARF2.DEBUG_ARANGES/$dwarf2.debug_aranges=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile, "%s$DWARF2.DEBUG_FRAME/$dwarf2.debug_frame=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile, "%s$DWARF2.DEBUG_INFO/$dwarf2.debug_info=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile, "%s$DWARF2.DEBUG_LINE/$dwarf2.debug_line=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile, "%s$DWARF2.DEBUG_LOC/$dwarf2.debug_loc=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile,
+	       "%s$DWARF2.DEBUG_MACINFO/$dwarf2.debug_macinfo=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile,
+	       "%s$DWARF2.DEBUG_PUBNAMES/$dwarf2.debug_pubnames=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile, "%s$DWARF2.DEBUG_STR/$dwarf2.debug_str=DATA,-\n",
+	       sharebasename);
+      fprintf (optfile, "%s$DWARF2.DEBUG_ZZZZZZ/$dwarf2.debug_zzzzzz=DATA)\n",
+	       sharebasename);
+      fprintf (optfile, "case_sensitive=NO\n");
+    }
+
+  fprintf (optfile, "PSECT_ATTR=LIB$INITIALIZE,GBL\n");
+  fclose (optfile);
+
+  /* Append opt file.  */
+  addarg (" ");
+  addarg (optfilename);
+  addarg ("/opt");
+
+  if (verbose)
+    printf ("%s\n", link_cmd);
+
+  status = system (link_cmd);
+  if (verbose > 1)
+    printf ("$!status = %d\n", status);
+
+  if ((status & 1) != 1)
+    {
+      status = 1;
+      goto cleanup_and_exit;
+    }
+
+  if (debug && !share && ld_nocall_debug)
+    {
+      status = set_exe ("/flags=nocall_debug");
+      if (status != 0)
+        goto cleanup_and_exit;
+    }
+
+  if (!share && ld_mkthreads)
+    {
+      status = set_exe ("/flags=mkthreads");
+      if (status != 0)
+        goto cleanup_and_exit;
+    }
+
+  if (!share && ld_upcalls)
+    {
+      status = set_exe ("/flags=upcalls");
+      if (status != 0)
+        goto cleanup_and_exit;
+    }
+
+  status = 0;
+
+ cleanup_and_exit:
+  if (!save_temps)
+    remove (optfilename);
+
+  if (status == 0)
+    exit (EXIT_SUCCESS);
+
+  if (exefullfilename && inhibit_exec == 1)
+    remove (exefullfilename);
+
+  exit (EXIT_FAILURE);
+}
diff --git a/gcc-4.9/gcc/config/vms/vms-opts.h b/gcc-4.9/gcc/config/vms/vms-opts.h
new file mode 100644
index 000000000..261c59b63
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-opts.h
@@ -0,0 +1,30 @@
+/* Definitions for option handling for OpenVMS.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef VMS_OPTS_H
+#define VMS_OPTS_H
+
+enum vms_pointer_size
+  {
+    VMS_POINTER_SIZE_NONE,
+    VMS_POINTER_SIZE_32,
+    VMS_POINTER_SIZE_64
+  };
+
+#endif
diff --git a/gcc-4.9/gcc/config/vms/vms-protos.h b/gcc-4.9/gcc/config/vms/vms-protos.h
new file mode 100644
index 000000000..9b10159e6
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-protos.h
@@ -0,0 +1,35 @@
+/* Definitions of target machine for GCC for VMS.
+   Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* vms-c.c  */
+extern void vms_c_register_pragma (void);
+extern void vms_c_common_override_options (void);
+extern int vms_c_get_crtl_ver (void);
+extern int vms_c_get_vms_ver (void);
+
+/* vms.c  */
+void vms_patch_builtins (void);
+
+#ifdef TREE_CODE
+extern section *vms_function_section (tree decl ATTRIBUTE_UNUSED,
+                                      enum node_frequency freq ATTRIBUTE_UNUSED,
+                                      bool startup ATTRIBUTE_UNUSED,
+                                      bool exit ATTRIBUTE_UNUSED);
+extern void vms_start_function (const char *fname);
+#endif /* TREE_CODE */
diff --git a/gcc-4.9/gcc/config/vms/vms-stdint.h b/gcc-4.9/gcc/config/vms/vms-stdint.h
new file mode 100644
index 000000000..469e95550
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms-stdint.h
@@ -0,0 +1,50 @@
+/* Definitions for <stdint.h> types on VMS systems.
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+#define INTPTR_TYPE "long long int"
+#define UINTPTR_TYPE "long long unsigned int"
diff --git a/gcc-4.9/gcc/config/vms/vms.c b/gcc-4.9/gcc/config/vms/vms.c
new file mode 100644
index 000000000..0a8a5b894
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms.c
@@ -0,0 +1,330 @@
+/* Definitions of target machine GNU compiler. 32bit VMS version.
+   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "vms-protos.h"
+#include "ggc.h"
+#include "target.h"
+#include "output.h"
+#include "tm.h"
+#include "dwarf2out.h"
+
+/* Correlation of standard CRTL names with DECCRTL function names.  */
+
+/* Name is for a function that allocate memory.  Use the 64bit version
+   if -mmalloc64.  */
+#define VMS_CRTL_MALLOC	(1 << 0)
+
+/* If long pointer are enabled, use _NAME64 instead.  */
+#define VMS_CRTL_64	(1 << 1)
+
+/* Prepend s/f before the name.  To be applied after the previous rule.
+   use 's' for S float, 'f' for IEEE 32.  */
+#define VMS_CRTL_FLOAT32  (1 << 2)
+
+/* Prepend t/g/d before the name.  To be applied after the previous rule.
+   use 'g' for VAX G float, 'd' for VAX D float, 't' for IEEE 64.  */
+#define VMS_CRTL_FLOAT64  (1 << 3)
+
+/* Prepend d before the name, only if using VAX fp.  */
+#define VMS_CRTL_FLOAT64_VAXD  (1 << 4)
+
+/* Prepend x before the name for if 128 bit long doubles are enabled.  This
+   concern mostly 'printf'-like functions.  */
+#define VMS_CRTL_FLOAT128 (1 << 5)
+
+/* From xxx, create xxx, xxxf, xxxl using MATH$XXX_T, MATH$XXX_S
+   and MATH$XXX{_X} if DPML is used.  */
+#define VMS_CRTL_DPML (1 << 6)
+
+/* Together with DPML, it means that all variant (ie xxx, xxxf and xxxl) are
+   overridden by decc.  Without DPML, it means this is a variant (ie xxxf
+   or xxxl) of a function.  */
+#define VMS_CRTL_NODPML (1 << 7)
+
+/* Prepend __bsd44_ before the name.  To be applied after the P64
+   rule.  */
+#define VMS_CRTL_BSD44	(1 << 8)
+
+/* Define only in 32 bits mode, as this has no 64 bit variants.
+   Concerns getopt/getarg.  */
+#define VMS_CRTL_32ONLY (1 << 9)
+
+/* GLobal data prefix (ga_, gl_...)  */
+#define VMS_CRTL_G_MASK (7 << 10)
+#define VMS_CRTL_G_NONE (0 << 10)
+#define VMS_CRTL_GA	(1 << 10)
+#define VMS_CRTL_GL	(2 << 10)
+
+/* Append '_2'.  Not compatible with 64.  */
+#define VMS_CRTL_FLOATV2 (1 << 13)
+
+struct vms_crtl_name
+{
+  /* The standard C name.  */
+  const char *const name;
+
+  /* Flags to drive the translation.  */
+  unsigned int flags;
+};
+
+/* Map for the translation.  */
+
+static const struct vms_crtl_name vms_crtl_names[] =
+  {
+#include "vms-crtlmap.h"
+  };
+
+/* Number of entires in the above array.  */
+
+#define NBR_CRTL_NAMES (sizeof (vms_crtl_names) / sizeof (*vms_crtl_names))
+
+/* List of aliased identifiers.  They must be persistent across gc.  */
+
+static GTY(()) vec<tree, va_gc> *aliases_id;
+
+/* Add a CRTL translation.  This simply use the transparent alias
+   mechanism, which is platform independent and works with the
+   #pragma extern_prefix (which set the assembler name).  */
+
+static void
+vms_add_crtl_xlat (const char *name, size_t nlen,
+                   const char *id_str, size_t id_len)
+{
+  tree targ;
+
+  /* printf ("vms crtl: %.*s -> %.*s\n", nlen, name, id_len, id_str); */
+
+  targ = get_identifier_with_length (name, nlen);
+  gcc_assert (!IDENTIFIER_TRANSPARENT_ALIAS (targ));
+  IDENTIFIER_TRANSPARENT_ALIAS (targ) = 1;
+  TREE_CHAIN (targ) = get_identifier_with_length (id_str, id_len);
+
+  vec_safe_push (aliases_id, targ);
+}
+
+/* Do VMS specific stuff on builtins: disable the ones that are not
+   standard, mangle names.  */
+
+void
+vms_patch_builtins (void)
+{
+  /* enum built_in_function bi; */
+  unsigned int i;
+
+  /* Fwrite on VMS is non-standard.  */
+  if (builtin_decl_implicit_p (BUILT_IN_FWRITE))
+    set_builtin_decl_implicit_p (BUILT_IN_FWRITE, false);
+
+  if (builtin_decl_implicit_p (BUILT_IN_FWRITE_UNLOCKED))
+    set_builtin_decl_implicit_p (BUILT_IN_FWRITE_UNLOCKED, false);
+
+  /* Define aliases for names.  */
+  for (i = 0; i < NBR_CRTL_NAMES; i++)
+    {
+      const struct vms_crtl_name *n = &vms_crtl_names[i];
+      char res[VMS_CRTL_MAXLEN + 3 + 9 + 1 + 1];
+      int rlen;
+      int nlen = strlen (n->name);
+
+      /* Discard 32ONLY if using 64 bit pointers.  */
+      if ((n->flags & VMS_CRTL_32ONLY)
+	  && flag_vms_pointer_size == VMS_POINTER_SIZE_64)
+	continue;
+
+      /* Handle DPML unless overridden by decc.  */
+      if ((n->flags & VMS_CRTL_DPML)
+	  && !(n->flags & VMS_CRTL_NODPML))
+	{
+	  const char *p;
+          char alt[VMS_CRTL_MAXLEN + 3];
+
+	  memcpy (res, "MATH$", 5);
+	  rlen = 5;
+	  for (p = n->name; *p; p++)
+	    res[rlen++] = TOUPPER (*p);
+	  res[rlen++] = '_';
+	  res[rlen++] = 'T';
+
+	  /* Double version.  */
+	  if (!(n->flags & VMS_CRTL_FLOAT64))
+	    vms_add_crtl_xlat (n->name, nlen, res, rlen);
+
+	  /* Float version.  */
+	  res[rlen - 1] = 'S';
+	  memcpy (alt, n->name, nlen);
+	  alt[nlen] = 'f';
+	  vms_add_crtl_xlat (alt, nlen + 1, res, rlen);
+
+	  /* Long double version.  */
+	  res[rlen - 1] = (LONG_DOUBLE_TYPE_SIZE == 128 ? 'X' : 'T');
+	  alt[nlen] = 'l';
+	  vms_add_crtl_xlat (alt, nlen + 1, res, rlen);
+
+	  if (!(n->flags & (VMS_CRTL_FLOAT32 | VMS_CRTL_FLOAT64)))
+	    continue;
+	}
+
+      if (n->flags & VMS_CRTL_FLOAT64_VAXD)
+	continue;
+
+      /* Add the dec-c prefix.  */
+      memcpy (res, "decc$", 5);
+      rlen = 5;
+
+      if (n->flags & VMS_CRTL_BSD44)
+        {
+          memcpy (res + rlen, "__bsd44_", 8);
+          rlen += 8;
+        }
+
+      if ((n->flags & VMS_CRTL_G_MASK) != VMS_CRTL_G_NONE)
+        {
+	  res[rlen++] = 'g';
+	  switch (n->flags & VMS_CRTL_G_MASK)
+	    {
+	    case VMS_CRTL_GA:
+	      res[rlen++] = 'a';
+	      break;
+	    case VMS_CRTL_GL:
+	      res[rlen++] = 'l';
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  res[rlen++] = '_';
+        }
+
+      if (n->flags & VMS_CRTL_FLOAT32)
+        res[rlen++] = 'f';
+
+      if (n->flags & VMS_CRTL_FLOAT64)
+        res[rlen++] = 't';
+
+      if ((n->flags & VMS_CRTL_FLOAT128) && LONG_DOUBLE_TYPE_SIZE == 128)
+        res[rlen++] = 'x';
+
+      memcpy (res + rlen, n->name, nlen);
+
+      if ((n->flags & VMS_CRTL_64) == 0)
+	{
+	  rlen += nlen;
+
+	  if (n->flags & VMS_CRTL_FLOATV2)
+	    {
+	      res[rlen++] = '_';
+	      res[rlen++] = '2';
+	    }
+	  vms_add_crtl_xlat (n->name, nlen, res, rlen);
+	}
+      else
+        {
+          char alt[VMS_CRTL_MAXLEN + 3];
+          bool use_64;
+
+          /* Add three translations:
+             _X32 -> X
+             _X64 -> _X64
+             X -> X if short, _X64 if long.  */
+          alt[0] = '_';
+          memcpy (alt + 1, n->name, nlen);
+          alt[1 + nlen + 0] = '3';
+          alt[1 + nlen + 1] = '2';
+          alt[1 + nlen + 2] = 0;
+          vms_add_crtl_xlat (alt, nlen + 3, res, rlen + nlen);
+
+          use_64 = (((n->flags & VMS_CRTL_64)
+                     && flag_vms_pointer_size == VMS_POINTER_SIZE_64)
+                    || ((n->flags & VMS_CRTL_MALLOC)
+                        && flag_vms_malloc64
+                        && flag_vms_pointer_size != VMS_POINTER_SIZE_NONE));
+          if (!use_64)
+            vms_add_crtl_xlat (n->name, nlen, res, rlen + nlen);
+
+          res[rlen++] = '_';
+          memcpy (res + rlen, n->name, nlen);
+          res[rlen + nlen + 0] = '6';
+          res[rlen + nlen + 1] = '4';
+
+          if (use_64)
+            vms_add_crtl_xlat (n->name, nlen, res, rlen + nlen + 2);
+
+          alt[1 + nlen + 0] = '6';
+          alt[1 + nlen + 1] = '4';
+          vms_add_crtl_xlat (alt, nlen + 3, res, rlen + nlen + 2);
+        }
+    }
+}
+
+/* Always default to .text section.  */
+
+section *
+vms_function_section (tree decl ATTRIBUTE_UNUSED,
+                      enum node_frequency freq ATTRIBUTE_UNUSED,
+                      bool startup ATTRIBUTE_UNUSED,
+                      bool exit ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+
+/* Additionnal VMS specific code for start_function.  */
+
+/* Must be kept in sync with libgcc/config/vms/vms-ucrt0.c  */
+#define VMS_MAIN_FLAGS_SYMBOL "__gcc_main_flags"
+#define MAIN_FLAG_64BIT (1 << 0)
+#define MAIN_FLAG_POSIX (1 << 1)
+
+void
+vms_start_function (const char *fnname)
+{
+#if VMS_DEBUGGING_INFO
+  if (vms_debug_main
+      && debug_info_level > DINFO_LEVEL_NONE
+      && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
+    {
+      targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
+      ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
+      dwarf2out_vms_debug_main_pointer ();
+      vms_debug_main = 0;
+    }
+#endif
+
+  /* Registers flags used for function main.  This is necessary for
+     crt0 code.  */
+  if (strcmp (fnname, "main") == 0)
+    {
+      unsigned int flags = 0;
+
+      if (flag_vms_pointer_size == VMS_POINTER_SIZE_64)
+	flags |= MAIN_FLAG_64BIT;
+      if (!flag_vms_return_codes)
+	flags |= MAIN_FLAG_POSIX;
+
+      targetm.asm_out.globalize_label (asm_out_file, VMS_MAIN_FLAGS_SYMBOL);
+      assemble_name (asm_out_file, VMS_MAIN_FLAGS_SYMBOL);
+      fprintf (asm_out_file, " = %u\n", flags);
+    }
+}
+
+#include "gt-vms.h"
diff --git a/gcc-4.9/gcc/config/vms/vms.h b/gcc-4.9/gcc/config/vms/vms.h
new file mode 100644
index 000000000..dabc35189
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms.h
@@ -0,0 +1,92 @@
+/* Definitions of target machine GNU compiler. VMS common version.
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJECT_SUFFIX ".obj"
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+#define TARGET_OS_CPP_BUILTINS()					 \
+  do {									 \
+    builtin_define_std ("vms");						 \
+    builtin_define_std ("VMS");						 \
+    builtin_assert ("system=vms");					 \
+    SUBTARGET_OS_CPP_BUILTINS();					 \
+    builtin_define ("__int64=long long");				 \
+    if (flag_vms_pointer_size == VMS_POINTER_SIZE_32)			 \
+      builtin_define ("__INITIAL_POINTER_SIZE=32");			 \
+    else if (flag_vms_pointer_size == VMS_POINTER_SIZE_64)		 \
+      builtin_define ("__INITIAL_POINTER_SIZE=64");			 \
+    if (POINTER_SIZE == 64)						 \
+      builtin_define ("__LONG_POINTERS=1");				 \
+    builtin_define_with_int_value ("__CRTL_VER", vms_c_get_crtl_ver ()); \
+    builtin_define_with_int_value ("__VMS_VER", vms_c_get_vms_ver ());   \
+  } while (0)
+
+extern void vms_c_register_includes (const char *, const char *, int);
+#define TARGET_EXTRA_INCLUDES vms_c_register_includes
+
+/* Tell compiler we want to support VMS pragmas */
+#define REGISTER_TARGET_PRAGMAS() vms_c_register_pragma ()
+
+/* By default, allow $ to be part of an identifier.  */
+#define DOLLARS_IN_IDENTIFIERS 2
+
+#undef TARGET_ABI_OPEN_VMS
+#define TARGET_ABI_OPEN_VMS 1
+
+/* "long" is 32 bits, but 64 bits for Ada.  */
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+#define ADA_LONG_TYPE_SIZE 64
+
+/* Pointer is 32 bits but the hardware has 64-bit addresses, sign extended.  */
+#undef POINTER_SIZE
+#define POINTER_SIZE (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE ? 32 : 64)
+#define POINTERS_EXTEND_UNSIGNED 0
+
+/* Always a 32 bit type.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE  "unsigned int"
+
+/* ???: Defined as a 'int' by dec-c, but obstack.h doesn't like it.  */
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE ? \
+                      "int" : "long long int")
+
+#define SIZETYPE (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE ? \
+		  "unsigned int" : "long long unsigned int")
+
+#define C_COMMON_OVERRIDE_OPTIONS vms_c_common_override_options ()
+
+/* VMS doesn't support other sections than .text for code.  */
+#define TARGET_ASM_FUNCTION_SECTION vms_function_section
+
+/* Always use 8 bytes addresses in dwarf2 debug info.  The default value doesn't
+   work as it may be 4 bytes, which won't match gas default (8 bytes for ia64),
+   and will thus produce incorrect values.  */
+#define DWARF2_ADDR_SIZE 8
+
+/* No libm on VMS.  */
+#define MATH_LIBRARY ""
+
+/* Special VMS debugger symbol to record the entry point.  */
+#define VMS_DEBUG_MAIN_POINTER "TRANSFER$BREAK$GO"
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc-4.9/gcc/config/vms/vms.opt b/gcc-4.9/gcc/config/vms/vms.opt
new file mode 100644
index 000000000..7b6167d4a
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/vms.opt
@@ -0,0 +1,63 @@
+; Copyright (C) 2009-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/vms/vms-opts.h
+
+map
+Target RejectNegative
+
+mmalloc64
+Target Report Var(flag_vms_malloc64) Init(1)
+Malloc data into P2 space
+
+mdebug-main=
+Target RejectNegative Joined Var(vms_debug_main)
+Set name of main routine for the debugger
+
+mvms-return-codes
+Target Report Var(flag_vms_return_codes)
+Use VMS exit codes instead of posix ones
+
+mpointer-size=
+Target Joined Report RejectNegative Enum(vms_pointer_size) Var(flag_vms_pointer_size) Init(VMS_POINTER_SIZE_NONE)
+-mpointer-size=[no,32,short,64,long]	Set the default pointer size
+
+Enum
+Name(vms_pointer_size) Type(enum vms_pointer_size) UnknownError(unknown pointer size model %qs)
+
+EnumValue
+Enum(vms_pointer_size) String(no) Value(VMS_POINTER_SIZE_NONE)
+
+EnumValue
+Enum(vms_pointer_size) String(32) Value(VMS_POINTER_SIZE_32)
+
+EnumValue
+Enum(vms_pointer_size) String(short) Value(VMS_POINTER_SIZE_32)
+
+EnumValue
+Enum(vms_pointer_size) String(64) Value(VMS_POINTER_SIZE_64)
+
+EnumValue
+Enum(vms_pointer_size) String(long) Value(VMS_POINTER_SIZE_64)
+
+; The float representation format in effect for Ada
+Variable
+char vms_float_format = 'I'
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/vms/x-vms b/gcc-4.9/gcc/config/vms/x-vms
new file mode 100644
index 000000000..8bf8796f7
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/x-vms
@@ -0,0 +1,26 @@
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LN = cp -p
+LN_S = cp -p
+
+# Doesn't work on VMS
+USE_COLLECT2=
+
+# There are no man pages on VMS
+POD2MAN = false
diff --git a/gcc-4.9/gcc/config/vms/xm-vms.h b/gcc-4.9/gcc/config/vms/xm-vms.h
new file mode 100644
index 000000000..9e3800e18
--- /dev/null
+++ b/gcc-4.9/gcc/config/vms/xm-vms.h
@@ -0,0 +1,63 @@
+/* Configuration for GCC for hosting on VMS
+   using a Unix style C library.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* A couple of conditionals for execution machine are controlled here.  */
+#ifndef VMS
+#define VMS
+#endif
+
+/* Causes exit() to be redefined to __posix_exit() and
+   Posix compatible failure and success codes to be used.  */
+#define _POSIX_EXIT 1
+
+/* Open files in stream mode if not otherwise explicitly specified.  */
+#define __UNIX_FOPEN 1
+
+/* Write to stdout using fputc to avoid record terminators in pipes.  */
+#define __UNIX_FWRITE 1
+
+#define STDC_HEADERS 1
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+#define HOST_OBJECT_SUFFIX ".obj"
+
+#define DUMPFILE_FORMAT "_%02d_"
+
+#define DELETE_IF_ORDINARY(NAME,ST,VERBOSE_FLAG)           \
+do                                                         \
+  {                                                        \
+    while (stat (NAME, &ST) >= 0 && S_ISREG (ST.st_mode))  \
+      if (unlink (NAME) < 0)                               \
+	{                                                  \
+	  if (VERBOSE_FLAG)                                \
+	    perror_with_name (NAME);                       \
+	  break;                                           \
+	}                                                  \
+  } while (0)
+
+/* If 64 bit pointers are used, use 64 bit specifier.  */
+
+#if (defined (__INITIAL_POINTER_SIZE) && __INITIAL_POINTER_SIZE == 64) \
+  || defined (__LONG_POINTERS)
+#define HOST_LONG_FORMAT "ll"
+#define HOST_PTR_PRINTF "%llp"
+#endif
+
+#define STANDARD_STARTFILE_PREFIX "/gnu/lib/"
diff --git a/gcc-4.9/gcc/config/vx-common.h b/gcc-4.9/gcc/config/vx-common.h
new file mode 100644
index 000000000..527024fdd
--- /dev/null
+++ b/gcc-4.9/gcc/config/vx-common.h
@@ -0,0 +1,94 @@
+/* Target-independent configuration for VxWorks and VxWorks AE.   
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* VxWorks headers are C++-aware.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/* Most of these will probably be overridden by subsequent headers.  We
+   undefine them here just in case, and define VXWORKS_ versions of each,
+   to be used in port-specific vxworks.h.  */
+#undef LIB_SPEC
+#undef LINK_SPEC
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC VXWORKS_LIBGCC_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+/* Most of these macros are overridden in "config/vxworks.h" or
+   "config/vxworksae.h" and are here merely for documentation
+   purposes.  */
+#define VXWORKS_ADDITIONAL_CPP_SPEC ""
+#define	VXWORKS_LIB_SPEC ""
+#define VXWORKS_LINK_SPEC ""
+#define VXWORKS_LIBGCC_SPEC ""
+#define	VXWORKS_STARTFILE_SPEC ""
+#define VXWORKS_ENDFILE_SPEC ""
+
+/* VxWorks cannot have dots in constructor labels, because it uses a
+   mutant variation of collect2 that generates C code instead of
+   assembly.  Thus each constructor label must be a legitimate C
+   symbol.  FIXME: Have VxWorks use real collect2 instead.  */
+#undef NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+/* VxWorks uses wchar_t == unsigned short (UCS2) on all architectures.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+/* Likewise wint_t.  */
+#undef WINT_TYPE
+#define WINT_TYPE "short unsigned int"
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 16
+
+/* Dwarf2 unwind info is not supported.  */
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 0
+
+/* VxWorks uses DWARF2.  */
+#define DWARF2_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* None of these other formats is supported.  */
+#undef DWARF_DEBUGGING_INFO
+#undef DBX_DEBUGGING_INFO
+#undef SDB_DEBUGGING_INFO
+#undef XCOFF_DEBUGGING_INFO
+#undef VMS_DEBUGGING_INFO
+
+/* Kernel mode doesn't have ctors/dtors, but RTP mode does.  */
+#define TARGET_HAVE_CTORS_DTORS false
+#define VXWORKS_OVERRIDE_OPTIONS /* empty */
+
+/* No math library needed.  */
+#define MATH_LIBRARY ""
+
+/* No profiling.  */
+#define VXWORKS_FUNCTION_PROFILER(FILE, LABELNO) do	\
+{							\
+  sorry ("profiler support for VxWorks");		\
+} while (0)
+
+/* We occasionally need to distinguish between the VxWorks variants.  */
+#define VXWORKS_KIND_NORMAL  1
+#define VXWORKS_KIND_AE      2
diff --git a/gcc-4.9/gcc/config/vxworks-dummy.h b/gcc-4.9/gcc/config/vxworks-dummy.h
new file mode 100644
index 000000000..e9009840f
--- /dev/null
+++ b/gcc-4.9/gcc/config/vxworks-dummy.h
@@ -0,0 +1,40 @@
+/* Dummy definitions of VxWorks-related macros
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* True if we're targeting VxWorks.  */
+#ifndef TARGET_VXWORKS
+#define TARGET_VXWORKS 0
+#endif
+
+/* True if generating code for a VxWorks RTP.  */
+#ifndef TARGET_VXWORKS_RTP
+#define TARGET_VXWORKS_RTP false
+#endif
+
+/* The symbol that points to an RTP's table of GOTs.  */
+#define VXWORKS_GOTT_BASE (gcc_unreachable (), "")
+
+/* The symbol that holds the index of the current module's GOT in
+   VXWORKS_GOTT_BASE.  */
+#define VXWORKS_GOTT_INDEX (gcc_unreachable (), "")
diff --git a/gcc-4.9/gcc/config/vxworks.c b/gcc-4.9/gcc/config/vxworks.c
new file mode 100644
index 000000000..bd5e735bf
--- /dev/null
+++ b/gcc-4.9/gcc/config/vxworks.c
@@ -0,0 +1,154 @@
+/* Common VxWorks target definitions for GNU compiler.
+   Copyright (C) 2007-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "diagnostic-core.h"
+#include "output.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+
+/* Like default_named_section_asm_out_constructor, except that even
+   constructors with DEFAULT_INIT_PRIORITY must go in a numbered
+   section on VxWorks.  The VxWorks runtime uses a clever trick to get
+   the sentinel entry (-1) inserted at the beginning of the .ctors
+   segment.  This trick will not work if we ever generate any entries
+   in plain .ctors sections; we must always use .ctors.PRIORITY.  */
+
+void
+vxworks_asm_out_constructor (rtx symbol, int priority)
+{
+  section *sec;
+
+  sec = get_cdtor_priority_section (priority,
+				    /*constructor_p=*/true);
+  assemble_addr_to_section (symbol, sec);
+}
+
+/* See comment for vxworks_asm_out_constructor.  */
+
+void
+vxworks_asm_out_destructor (rtx symbol, int priority)
+{
+  section *sec;
+
+  sec = get_cdtor_priority_section (priority,
+				    /*constructor_p=*/false);
+  assemble_addr_to_section (symbol, sec);
+}
+
+/* Return the list of FIELD_DECLs that make up an emulated TLS
+   variable's control object.  TYPE is the structure these are fields
+   of and *NAME will be filled in with the structure tag that should
+   be used.  */
+
+static tree
+vxworks_emutls_var_fields (tree type, tree *name)
+{
+  tree field, next_field;
+  
+  *name = get_identifier ("__tls_var");
+  
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		      get_identifier ("size"), unsigned_type_node);
+  DECL_CONTEXT (field) = type;
+  next_field = field;
+
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		      get_identifier ("module_id"), unsigned_type_node);
+  DECL_CONTEXT (field) = type;
+  DECL_CHAIN (field) = next_field;
+  next_field = field;
+
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		      get_identifier ("offset"), unsigned_type_node);
+  DECL_CONTEXT (field) = type;
+  DECL_CHAIN (field) = next_field;
+
+  return field;
+}
+
+/* Return the CONSTRUCTOR to initialize an emulated TLS control
+   object.  VAR is the control object.  DECL is the TLS object itself
+   and TMPL_ADDR is the address (an ADDR_EXPR) of the initializer for
+   that object.  */
+
+static tree
+vxworks_emutls_var_init (tree var, tree decl, tree tmpl_addr)
+{
+  vec<constructor_elt, va_gc> *v;
+  vec_alloc (v, 3);
+  
+  tree type = TREE_TYPE (var);
+  tree field = TYPE_FIELDS (type);
+  
+  constructor_elt elt = {field, fold_convert (TREE_TYPE (field), tmpl_addr)};
+  v->quick_push (elt);
+  
+  field = DECL_CHAIN (field);
+  elt.index = field;
+  elt.value = build_int_cst (TREE_TYPE (field), 0);
+  v->quick_push (elt);
+  
+  field = DECL_CHAIN (field);
+  elt.index = field;
+  elt.value = fold_convert (TREE_TYPE (field), DECL_SIZE_UNIT (decl));
+  v->quick_push (elt);
+  
+  return build_constructor (type, v);
+}
+
+/* Do VxWorks-specific parts of TARGET_OPTION_OVERRIDE.  */
+
+void
+vxworks_override_options (void)
+{
+  /* We don't support __thread via target hooks.  */
+  targetm.have_tls = false;
+
+  targetm.emutls.get_address = "__builtin___tls_lookup";
+  targetm.emutls.register_common = NULL;
+  targetm.emutls.var_section = ".tls_vars";
+  targetm.emutls.tmpl_section = ".tls_data";
+  targetm.emutls.var_prefix = "__tls__";
+  targetm.emutls.tmpl_prefix = "";
+  targetm.emutls.var_fields = vxworks_emutls_var_fields;
+  targetm.emutls.var_init = vxworks_emutls_var_init;
+  targetm.emutls.var_align_fixed = true;
+  targetm.emutls.debug_form_tls_address = true;
+  
+  /* We can use .ctors/.dtors sections only in RTP mode.  */
+  targetm.have_ctors_dtors = TARGET_VXWORKS_RTP;
+
+  /* PIC is only supported for RTPs.  */
+  if (flag_pic && !TARGET_VXWORKS_RTP)
+    error ("PIC is only supported for RTPs");
+
+  /* Default to strict dwarf-2 to prevent potential difficulties observed with
+     non-gdb debuggers on extensions > 2.  */
+  if (!global_options_set.x_dwarf_strict)
+    dwarf_strict = 1;
+
+  if (!global_options_set.x_dwarf_version)
+    dwarf_version = 2;
+}
diff --git a/gcc-4.9/gcc/config/vxworks.h b/gcc-4.9/gcc/config/vxworks.h
new file mode 100644
index 000000000..76dc430c7
--- /dev/null
+++ b/gcc-4.9/gcc/config/vxworks.h
@@ -0,0 +1,140 @@
+/* Common VxWorks target definitions for GNU compiler.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Wind River Systems.
+   Rewritten by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Assert that we are targeting VxWorks.  */
+#undef TARGET_VXWORKS
+#define TARGET_VXWORKS 1
+
+/* In kernel mode, VxWorks provides all the libraries itself, as well as
+   the functionality of startup files, etc.  In RTP mode, it behaves more
+   like a traditional Unix, with more external files.  Most of our specs
+   must be aware of the difference.  */
+
+/* We look for the VxWorks header files using the environment
+   variables that are set in VxWorks to indicate the location of the
+   system header files.  We use -idirafter so that the GCC's own
+   header-file directories (containing <stddef.h>, etc.) come before
+   the VxWorks system header directories.  */
+
+/* Since we provide a default -isystem, expand -isystem on the command
+   line early.  */
+#undef VXWORKS_ADDITIONAL_CPP_SPEC
+#define VXWORKS_ADDITIONAL_CPP_SPEC		\
+ "%{!nostdinc:					\
+    %{isystem*} -idirafter			\
+    %{mrtp: %:getenv(WIND_USR /h)		\
+      ;:    %:getenv(WIND_BASE /target/h)}}"
+
+/* The references to __init and __fini will be satisfied by
+   libc_internal.a.  */
+#undef VXWORKS_LIB_SPEC
+#define	VXWORKS_LIB_SPEC						\
+"%{mrtp:%{shared:-u " USER_LABEL_PREFIX "__init -u " USER_LABEL_PREFIX "__fini} \
+	%{!shared:%{non-static:-u " USER_LABEL_PREFIX "_STI__6__rtld -ldl} \
+		  --start-group -lc -lgcc -lc_internal -lnet -ldsi	\
+		  --end-group}}"
+
+/* The no-op spec for "-shared" below is present because otherwise GCC
+   will treat it as an unrecognized option.  */
+#undef VXWORKS_LINK_SPEC
+#define VXWORKS_LINK_SPEC				\
+"%{!mrtp:-r}						\
+ %{!shared:						\
+   %{mrtp:-q %{h*}					\
+          %{R*} %{!T*: %(link_start) }			\
+          %(link_target) %(link_os)}}			\
+ %{v:-v}						\
+ %{shared:-shared}					\
+ %{Bstatic:-Bstatic}					\
+ %{Bdynamic:-Bdynamic}					\
+ %{!Xbind-lazy:-z now}					\
+ %{Xbind-now:%{Xbind-lazy:				\
+   %e-Xbind-now and -Xbind-lazy are incompatible}}	\
+ %{mrtp:%{!shared:%{!non-static:-static}		\
+ 		  %{non-static:--force-dynamic --export-dynamic}}}"
+
+/* For VxWorks, the system provides libc_internal.a.  This is a superset
+   of libgcc.a; we want to use it.  Make sure not to dynamically export
+   any of its symbols, though.  Always look for libgcc.a first so that
+   we get the latest versions of the GNU intrinsics during our builds.  */
+#undef VXWORKS_LIBGCC_SPEC
+#define VXWORKS_LIBGCC_SPEC \
+  "-lgcc %{mrtp:--exclude-libs=libc_internal,libgcc -lc_internal}"
+
+#undef VXWORKS_STARTFILE_SPEC
+#define	VXWORKS_STARTFILE_SPEC "%{mrtp:%{!shared:-l:crt0.o}}"
+#define VXWORKS_ENDFILE_SPEC ""
+
+/* Do VxWorks-specific parts of TARGET_OPTION_OVERRIDE.  */
+#undef VXWORKS_OVERRIDE_OPTIONS
+#define VXWORKS_OVERRIDE_OPTIONS vxworks_override_options ()
+extern void vxworks_override_options (void);
+
+/* Only RTPs support prioritized constructors and destructors:
+   the implementation relies on numbered .ctors* sections.  */
+#define SUPPORTS_INIT_PRIORITY TARGET_VXWORKS_RTP
+
+/* VxWorks requires special handling of constructors and destructors.
+   All VxWorks configurations must use these functions.  */
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR vxworks_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR vxworks_asm_out_destructor
+extern void vxworks_asm_out_constructor (rtx symbol, int priority);
+extern void vxworks_asm_out_destructor (rtx symbol, int priority);
+
+/* Override the vxworks-dummy.h definitions.  TARGET_VXWORKS_RTP
+   is defined by vxworks.opt.  */
+#undef VXWORKS_GOTT_BASE
+#define VXWORKS_GOTT_BASE "__GOTT_BASE__"
+#undef VXWORKS_GOTT_INDEX
+#define VXWORKS_GOTT_INDEX "__GOTT_INDEX__"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Both kernels and RTPs have the facilities required by this macro.  */
+#define TARGET_POSIX_IO
+
+/* A VxWorks implementation of TARGET_OS_CPP_BUILTINS.  */
+#define VXWORKS_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_define ("__vxworks");					\
+      builtin_define ("__VXWORKS__");					\
+      builtin_assert ("system=unix");					\
+      if (TARGET_VXWORKS_RTP)						\
+	builtin_define ("__RTP__");					\
+      else								\
+	builtin_define ("_WRS_KERNEL");					\
+    }									\
+  while (0)
+
+#define VXWORKS_KIND VXWORKS_KIND_NORMAL
+
+/* The diab linker does not handle .gnu_attribute sections.  */
+#undef HAVE_AS_GNU_ATTRIBUTE
diff --git a/gcc-4.9/gcc/config/vxworks.opt b/gcc-4.9/gcc/config/vxworks.opt
new file mode 100644
index 000000000..729ab41ee
--- /dev/null
+++ b/gcc-4.9/gcc/config/vxworks.opt
@@ -0,0 +1,46 @@
+; Processor-independent options for VxWorks.
+;
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+; Contributed by CodeSourcery, LLC.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Bdynamic
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+Bstatic
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+Xbind-lazy
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+Xbind-now
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+mrtp
+Target Report RejectNegative Mask(VXWORKS_RTP) Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+Assume the VxWorks RTP environment
+
+; VxWorks AE has two modes: kernel mode and vThreads mode.  In
+; general, back ends do not actually need to know which mode they're
+; in, so we do not have to set any flags.
+mvthreads
+Target RejectNegative Condition(VXWORKS_KIND == VXWORKS_KIND_AE)
+Assume the VxWorks vThreads environment
+
+non-static
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
diff --git a/gcc-4.9/gcc/config/vxworksae.h b/gcc-4.9/gcc/config/vxworksae.h
new file mode 100644
index 000000000..02b89f7db
--- /dev/null
+++ b/gcc-4.9/gcc/config/vxworksae.h
@@ -0,0 +1,70 @@
+/* Common VxWorks AE target definitions for GNU compiler.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This header should be included after including vx-common.h.  */
+
+/* Most of the definitions below this point are versions of the
+   vxworks.h definitions, without the -mrtp bits.  */
+
+/* The directory containing the VxWorks AE target headers.  */
+#define VXWORKSAE_TARGET_DIR \
+  "/home/tornado/vxworks-ae/latest/target"
+
+/* Include target/vThreads/h or target/h (depending on the compilation
+   mode), and then target/val/h (in either mode).  The macros defined
+   are in the user's namespace, but the VxWorks headers require
+   them.  */
+#undef VXWORKS_ADDITIONAL_CPP_SPEC
+#define VXWORKS_ADDITIONAL_CPP_SPEC "					\
+ %{!nostdinc:%{isystem*}}						\
+ %{mvthreads:-DVTHREADS=1						\
+	 %{!nostdinc:-isystem " VXWORKSAE_TARGET_DIR "/vThreads/h}}	\
+ %{!mvthreads:-DAE653_BUILD=1						\
+	 %{!nostdinc:-isystem " VXWORKSAE_TARGET_DIR "/h}}		\
+ %{!nostdinc:-isystem " VXWORKSAE_TARGET_DIR "/val/h}"
+
+#undef VXWORKS_LIB_SPEC
+#define VXWORKS_LIB_SPEC ""
+
+#undef VXWORKS_LINK_SPEC
+#define VXWORKS_LINK_SPEC	\
+  "-r %{v:-V}"
+ 
+#undef VXWORKS_LIBGCC_SPEC
+#define VXWORKS_LIBGCC_SPEC	\
+  "-lgcc"
+
+#undef VXWORKS_STARTFILE_SPEC
+#define VXWORKS_STARTFILE_SPEC ""
+
+#define VXWORKS_KIND VXWORKS_KIND_AE
+
+/* Both kernels and RTPs have the facilities required by this macro.  */
+#define TARGET_POSIX_IO
+
+/* A VxWorks 653 implementation of TARGET_OS_CPP_BUILTINS.  */
+#define VXWORKS_OS_CPP_BUILTINS()                                       \
+  do                                                                    \
+    {                                                                   \
+      builtin_define ("__vxworks");                                     \
+      builtin_define ("__VXWORKS__");                                   \
+    }                                                                   \
+  while (0)
+
diff --git a/gcc-4.9/gcc/config/winnt-c.c b/gcc-4.9/gcc/config/winnt-c.c
new file mode 100644
index 000000000..dfb06c763
--- /dev/null
+++ b/gcc-4.9/gcc/config/winnt-c.c
@@ -0,0 +1,38 @@
+/* Default C-family target hooks initializer.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "c-family/c-target.h"
+#include "c-family/c-target-def.h"
+
+static bool
+winnt_implicit_extern_c (const char *ident)
+{
+  return !strcmp(ident, "wmain")
+      || !strcmp(ident, "DllMain")
+      || !strcmp(ident, "WinMain")
+      || !strcmp(ident, "wWinMain");
+}
+
+#undef TARGET_CXX_IMPLICIT_EXTERN_C
+#define TARGET_CXX_IMPLICIT_EXTERN_C winnt_implicit_extern_c
+
+struct gcc_targetcm targetcm = TARGETCM_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/x-cflags-O1 b/gcc-4.9/gcc/config/x-cflags-O1
new file mode 100644
index 000000000..9ba1e7496
--- /dev/null
+++ b/gcc-4.9/gcc/config/x-cflags-O1
@@ -0,0 +1,5 @@
+# At -O0 cc1 etc. are too large on some targets for successful
+# link; force building libbackend.a with -O1.
+ifeq ($(filter-out -O0,$(lastword $(filter -O%,$(CFLAGS)))),)
+$(OBJS) : override CFLAGS += -O1
+endif
diff --git a/gcc-4.9/gcc/config/x-darwin b/gcc-4.9/gcc/config/x-darwin
new file mode 100644
index 000000000..c6226c048
--- /dev/null
+++ b/gcc-4.9/gcc/config/x-darwin
@@ -0,0 +1,3 @@
+host-darwin.o : $(srcdir)/config/host-darwin.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/x-hpux b/gcc-4.9/gcc/config/x-hpux
new file mode 100644
index 000000000..e9f2f18f1
--- /dev/null
+++ b/gcc-4.9/gcc/config/x-hpux
@@ -0,0 +1,4 @@
+host-hpux.o : $(srcdir)/config/host-hpux.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/host-hpux.c
diff --git a/gcc-4.9/gcc/config/x-linux b/gcc-4.9/gcc/config/x-linux
new file mode 100644
index 000000000..dba21a666
--- /dev/null
+++ b/gcc-4.9/gcc/config/x-linux
@@ -0,0 +1,3 @@
+host-linux.o : $(srcdir)/config/host-linux.c
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/x-openbsd b/gcc-4.9/gcc/config/x-openbsd
new file mode 100644
index 000000000..18e3f659a
--- /dev/null
+++ b/gcc-4.9/gcc/config/x-openbsd
@@ -0,0 +1,4 @@
+host-openbsd.o : $(srcdir)/config/host-openbsd.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/host-openbsd.c
diff --git a/gcc-4.9/gcc/config/x-solaris b/gcc-4.9/gcc/config/x-solaris
new file mode 100644
index 000000000..d50b4f59a
--- /dev/null
+++ b/gcc-4.9/gcc/config/x-solaris
@@ -0,0 +1,3 @@
+host-solaris.o: $(srcdir)/config/host-solaris.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc-4.9/gcc/config/xtensa/constraints.md b/gcc-4.9/gcc/config/xtensa/constraints.md
new file mode 100644
index 000000000..9d31f9847
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/constraints.md
@@ -0,0 +1,139 @@
+;; Constraint definitions for Xtensa.
+;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+
+(define_register_constraint "a" "GR_REGS"
+ "General-purpose AR registers @code{a0}-@code{a15},
+  except @code{a1} (@code{sp}).")
+
+(define_register_constraint "b" "TARGET_BOOLEANS ? BR_REGS : NO_REGS"
+ "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa
+  Boolean Option is configured.")
+
+(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS"
+ "@internal
+  All AR registers, including sp, but only if the Xtensa Code Density
+  Option is configured.")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+ "Floating-point registers @code{f0}-@code{f15}; only available if the
+  Xtensa Floating-Pointer Coprocessor is configured.")
+
+(define_register_constraint "q" "SP_REG"
+ "@internal
+  The stack pointer (register @code{a1}).")
+
+(define_register_constraint "A" "TARGET_MAC16 ? ACC_REG : NO_REGS"
+ "The low 32 bits of the accumulator from the Xtensa MAC16 Option.")
+
+(define_register_constraint "B" "TARGET_SEXT ? GR_REGS : NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Sign Extend
+  Option is configured.")
+
+(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa 16-Bit Integer
+  Multiply Option is configured.")
+
+(define_register_constraint "D" "TARGET_DENSITY ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Code Density
+  Option is configured.")
+
+(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Const16
+  Option is configured.")
+
+;; Integer constant constraints.
+
+(define_constraint "I"
+ "A signed 12-bit integer constant for use with MOVI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm12b (ival)")))
+
+(define_constraint "J"
+ "A signed 8-bit integer constant for use with ADDI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm8 (ival)")))
+
+(define_constraint "K"
+ "A constant integer that can be an immediate operand of an Xtensa
+  conditional branch instruction that performs a signed comparison or
+  a comparison against zero."
+ (and (match_code "const_int")
+      (match_test "xtensa_b4const_or_zero (ival)")))
+
+(define_constraint "L"
+ "A constant integer that can be an immediate operand of an Xtensa
+  conditional branch instruction that performs an unsigned comparison."
+ (and (match_code "const_int")
+      (match_test "xtensa_b4constu (ival)")))
+
+(define_constraint "M"
+ "An integer constant in the range @minus{}32-95 for use with MOVI.N
+  instructions."
+ (and (match_code "const_int")
+      (match_test "ival >= -32 && ival <= 95")))
+
+(define_constraint "N"
+ "An unsigned 8-bit integer constant shifted left by 8 bits for use
+  with ADDMI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm8x256 (ival)")))
+
+(define_constraint "O"
+ "An integer constant that can be used in ADDI.N instructions."
+ (and (match_code "const_int")
+      (match_test "ival == -1 || (ival >= 1 && ival <= 15)")))
+
+(define_constraint "P"
+ "An integer constant that can be used as a mask value in an EXTUI
+  instruction."
+ (and (match_code "const_int")
+      (match_test "xtensa_mask_immediate (ival)")))
+
+;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
+;; causes reload to force some constants into the constant pool, but since
+;; the Xtensa constant pool can only be accessed with L32R instructions, it
+;; is always better to just copy a constant into a register.  Instead, use
+;; regular constraints but add a check to allow pseudos during reload.
+
+(define_constraint "R"
+ "Memory that can be accessed with a 4-bit unsigned offset from a register."
+ (ior (and (match_code "mem")
+	   (match_test "smalloffset_mem_p (op)"))
+      (and (match_code "reg")
+	   (match_test "reload_in_progress
+			&& REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_constraint "T"
+ "Memory in a literal pool (addressable with an L32R instruction)."
+ (and (match_code "mem")
+      (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
+
+(define_constraint "U"
+ "Memory that is not in a literal pool."
+ (ior (and (match_code "mem")
+	   (match_test "! constantpool_mem_p (op)"))
+      (and (match_code "reg")
+	   (match_test "reload_in_progress
+			&& REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
diff --git a/gcc-4.9/gcc/config/xtensa/elf.h b/gcc-4.9/gcc/config/xtensa/elf.h
new file mode 100644
index 000000000..34a34e22a
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/elf.h
@@ -0,0 +1,101 @@
+/* Xtensa/Elf configuration.
+   Derived from the configuration for GCC for Intel i386 running Linux.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_SECTION_TYPE_FLAGS xtensa_multibss_section_type_flags
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+  %{mno-text-section-literals:--no-text-section-literals} \
+  %{mtarget-align:--target-align} \
+  %{mno-target-align:--no-target-align} \
+  %{mlongcalls:--longcalls} \
+  %{mno-longcalls:--no-longcalls}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lsim -lc -lhandlers-sim -lhal"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"  
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+    %{static:-static}}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Avoid dots for compatibility with VxWorks.  */
+#undef NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+/* Do not force "-fpic" for this target.  */
+#define XTENSA_ALWAYS_PIC 0
+
+#undef DBX_REGISTER_NUMBER
+
+/* Search for headers in $tooldir/arch/include and for libraries and
+   startfiles in $tooldir/arch/lib.  */
+#define GCC_DRIVER_HOST_INITIALIZATION \
+do \
+{ \
+  char *tooldir, *archdir; \
+  tooldir = concat (tooldir_base_prefix, spec_machine, \
+		    dir_separator_str, NULL); \
+  if (!IS_ABSOLUTE_PATH (tooldir)) \
+    tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
+		      spec_version, dir_separator_str, tooldir, NULL); \
+  archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
+  add_prefix (&startfile_prefixes, \
+	      concat (archdir, "lib", dir_separator_str, NULL), \
+	      "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
+  add_prefix (&include_prefixes, archdir, \
+	      "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
+  } \
+while (0)
diff --git a/gcc-4.9/gcc/config/xtensa/elf.opt b/gcc-4.9/gcc/config/xtensa/elf.opt
new file mode 100644
index 000000000..8f346de9f
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/elf.opt
@@ -0,0 +1,29 @@
+; Xtensa ELF (bare metal) options.
+
+; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rdynamic
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc-4.9/gcc/config/xtensa/linux.h b/gcc-4.9/gcc/config/xtensa/linux.h
new file mode 100644
index 000000000..100a8c123
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/linux.h
@@ -0,0 +1,65 @@
+/* Xtensa Linux configuration.
+   Derived from the configuration for GCC for Intel i386 running Linux.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS()
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+  %{mno-text-section-literals:--no-text-section-literals} \
+  %{mtarget-align:--target-align} \
+  %{mno-target-align:--no-target-align} \
+  %{mlongcalls:--longcalls} \
+  %{mno-longcalls:--no-longcalls}"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+    %{static:-static}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Always enable "-fpic" for Xtensa Linux.  */
+#define XTENSA_ALWAYS_PIC 1
+
+#undef DBX_REGISTER_NUMBER
+
diff --git a/gcc-4.9/gcc/config/xtensa/predicates.md b/gcc-4.9/gcc/config/xtensa/predicates.md
new file mode 100644
index 000000000..4a4334189
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/predicates.md
@@ -0,0 +1,175 @@
+;; Predicate definitions for Xtensa.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "add_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_simm8 (INTVAL (op))
+			 || xtensa_simm8x256 (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "addsubx_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2
+		    || INTVAL (op) == 4
+		    || INTVAL (op) == 8")))
+
+(define_predicate "arith_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_simm8 (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+;; Non-immediate operand excluding the constant pool.
+(define_predicate "nonimmed_operand"
+  (ior (and (match_operand 0 "memory_operand")
+	    (match_test "!constantpool_mem_p (op)"))
+       (match_operand 0 "register_operand")))
+
+;; Memory operand excluding the constant pool.
+(define_predicate "mem_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "!constantpool_mem_p (op)")))
+
+;; Memory operand in the constant pool.
+(define_predicate "constantpool_operand"
+  (match_test "constantpool_mem_p (op)"))
+
+(define_predicate "mask_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_mask_immediate (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "extui_fldsz_operand"
+  (and (match_code "const_int")
+       (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)")))
+
+(define_predicate "sext_operand"
+  (if_then_else (match_test "TARGET_SEXT")
+		(match_operand 0 "nonimmed_operand")
+		(match_operand 0 "mem_operand")))
+
+(define_predicate "sext_fldsz_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23")))
+
+(define_predicate "lsbitnum_operand"
+  (and (match_code "const_int")
+       (match_test "BITS_BIG_ENDIAN
+		    ? (INTVAL (op) == BITS_PER_WORD - 1)
+		    : (INTVAL (op) == 0)")))
+
+(define_predicate "branch_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_b4const_or_zero (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "ubranch_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_b4constu (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "call_insn_operand"
+  (match_code "const_int,const,symbol_ref,reg")
+{
+  if ((GET_CODE (op) == REG)
+      && (op != arg_pointer_rtx)
+      && ((REGNO (op) < FRAME_POINTER_REGNUM)
+	  || (REGNO (op) > LAST_VIRTUAL_REGISTER)))
+    return true;
+
+  if (CONSTANT_ADDRESS_P (op))
+    {
+      /* Direct calls only allowed to static functions with PIC.  */
+      if (flag_pic)
+	{
+	  tree callee, callee_sec, caller_sec;
+
+	  if (GET_CODE (op) != SYMBOL_REF
+	      || !SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_EXTERNAL_P (op))
+	    return false;
+
+	  /* Don't attempt a direct call if the callee is known to be in
+	     a different section, since there's a good chance it will be
+	     out of range.  */
+
+	  if (flag_function_sections
+	      || DECL_ONE_ONLY (current_function_decl))
+	    return false;
+	  caller_sec = DECL_SECTION_NAME (current_function_decl);
+	  callee = SYMBOL_REF_DECL (op);
+	  if (callee)
+	    {
+	      if (DECL_ONE_ONLY (callee))
+		return false;
+	      callee_sec = DECL_SECTION_NAME (callee);
+	      if (((caller_sec == NULL_TREE) ^ (callee_sec == NULL_TREE))
+		  || (caller_sec != NULL_TREE
+		      && strcmp (TREE_STRING_POINTER (caller_sec),
+				 TREE_STRING_POINTER (callee_sec)) != 0))
+		return false;
+	    }
+	  else if (caller_sec != NULL_TREE)
+	    return false;
+	}
+      return true;
+    }
+
+  return false;
+})
+
+(define_predicate "move_operand"
+  (ior
+     (ior (match_operand 0 "register_operand")
+	  (and (match_operand 0 "memory_operand")
+	       (match_test "!constantpool_mem_p (op)
+			    || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+     (ior (and (match_code "const_int")
+	       (match_test "GET_MODE_CLASS (mode) == MODE_INT
+			    && xtensa_simm12b (INTVAL (op))"))
+	  (and (match_code "const_int,const_double,const,symbol_ref,label_ref")
+	       (match_test "TARGET_CONST16 && CONSTANT_P (op)
+			    && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))))
+
+;; Accept the floating point constant 1 in the appropriate mode.
+(define_predicate "const_float_1_operand"
+  (match_code "const_double")
+{
+  REAL_VALUE_TYPE d;
+  REAL_VALUE_FROM_CONST_DOUBLE (d, op);
+  return REAL_VALUES_EQUAL (d, dconst1);
+})
+
+(define_predicate "fpmem_offset_operand"
+  (and (match_code "const_int")
+       (match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
+
+(define_predicate "branch_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "ubranch_operator"
+  (match_code "ltu,geu"))
+
+(define_predicate "boolean_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "xtensa_cstoresi_operator"
+  (match_code "eq,ne,gt,ge,lt,le"))
+
+(define_predicate "tls_symbol_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
diff --git a/gcc-4.9/gcc/config/xtensa/t-xtensa b/gcc-4.9/gcc/config/xtensa/t-xtensa
new file mode 100644
index 000000000..a8ac2d406
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/t-xtensa
@@ -0,0 +1,19 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+$(out_object_file): gt-xtensa.h
diff --git a/gcc-4.9/gcc/config/xtensa/xtensa-protos.h b/gcc-4.9/gcc/config/xtensa/xtensa-protos.h
new file mode 100644
index 000000000..5f090263c
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/xtensa-protos.h
@@ -0,0 +1,73 @@
+/* Prototypes of target machine for GNU compiler for Xtensa.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __XTENSA_PROTOS_H__
+#define __XTENSA_PROTOS_H__
+
+/* Functions to test whether an immediate fits in a given field.  */
+extern bool xtensa_simm8 (HOST_WIDE_INT);
+extern bool xtensa_simm8x256 (HOST_WIDE_INT);
+extern bool xtensa_simm12b (HOST_WIDE_INT);
+extern bool xtensa_b4const_or_zero (HOST_WIDE_INT);
+extern bool xtensa_b4constu (HOST_WIDE_INT);
+extern bool xtensa_mask_immediate (HOST_WIDE_INT);
+extern bool xtensa_mem_offset (unsigned, enum machine_mode);
+
+/* Functions within xtensa.c that we reference.  */
+#ifdef RTX_CODE
+extern int xt_true_regnum (rtx);
+extern int xtensa_valid_move (enum machine_mode, rtx *);
+extern int smalloffset_mem_p (rtx);
+extern int constantpool_mem_p (rtx);
+extern void xtensa_extend_reg (rtx, rtx);
+extern void xtensa_expand_conditional_branch (rtx *, enum machine_mode);
+extern int xtensa_expand_conditional_move (rtx *, int);
+extern int xtensa_expand_scc (rtx *, enum machine_mode);
+extern int xtensa_expand_block_move (rtx *);
+extern void xtensa_split_operand_pair (rtx *, enum machine_mode);
+extern int xtensa_emit_move_sequence (rtx *, enum machine_mode);
+extern rtx xtensa_copy_incoming_a7 (rtx);
+extern void xtensa_expand_nonlocal_goto (rtx *);
+extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx);
+extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool);
+extern void xtensa_emit_loop_end (rtx, rtx *);
+extern char *xtensa_emit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_bit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
+extern char *xtensa_emit_call (int, rtx *);
+extern bool xtensa_tls_referenced_p (rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
+#endif /* TREE_CODE */
+
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int);
+extern rtx xtensa_return_addr (int, rtx);
+#endif /* RTX_CODE */
+
+extern void xtensa_setup_frame_addresses (void);
+extern int xtensa_dbx_register_number (int);
+extern long compute_frame_size (int);
+extern void xtensa_expand_prologue (void);
+extern void order_regs_for_local_alloc (void);
+
+#endif /* !__XTENSA_PROTOS_H__ */
diff --git a/gcc-4.9/gcc/config/xtensa/xtensa.c b/gcc-4.9/gcc/config/xtensa/xtensa.c
new file mode 100644
index 000000000..06e1eb78a
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/xtensa.c
@@ -0,0 +1,3712 @@
+/* Subroutines for insn-output.c for Tensilica's Xtensa architecture.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "df.h"
+
+
+/* Enumeration for all of the relational tests, so that we can build
+   arrays indexed by the test type, and not worry about the order
+   of EQ, NE, etc.  */
+
+enum internal_test
+{
+  ITEST_EQ,
+  ITEST_NE,
+  ITEST_GT,
+  ITEST_GE,
+  ITEST_LT,
+  ITEST_LE,
+  ITEST_GTU,
+  ITEST_GEU,
+  ITEST_LTU,
+  ITEST_LEU,
+  ITEST_MAX
+};
+
+/* Array giving truth value on whether or not a given hard register
+   can support a given mode.  */
+char xtensa_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
+
+/* Current frame size calculated by compute_frame_size.  */
+unsigned xtensa_current_frame_size;
+
+/* Largest block move to handle in-line.  */
+#define LARGEST_MOVE_RATIO 15
+
+/* Define the structure for the machine field in struct function.  */
+struct GTY(()) machine_function
+{
+  int accesses_prev_frame;
+  bool need_a7_copy;
+  bool vararg_a7;
+  rtx vararg_a7_copy;
+  rtx set_frame_ptr_insn;
+};
+
+/* Vector, indexed by hard register number, which contains 1 for a
+   register that is allowable in a candidate for leaf function
+   treatment.  */
+
+const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] =
+{
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1
+};
+
+/* Map hard register number to register class */
+const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER] =
+{
+  RL_REGS,	SP_REG,		RL_REGS,	RL_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	GR_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	RL_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	RL_REGS,
+  AR_REGS,	AR_REGS,	BR_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  ACC_REG,
+};
+
+static void xtensa_option_override (void);
+static enum internal_test map_test_to_internal_test (enum rtx_code);
+static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *);
+static rtx gen_float_relational (enum rtx_code, rtx, rtx);
+static rtx gen_conditional_move (enum rtx_code, enum machine_mode, rtx, rtx);
+static rtx fixup_subreg_mem (rtx);
+static struct machine_function * xtensa_init_machine_status (void);
+static rtx xtensa_legitimize_tls_address (rtx);
+static rtx xtensa_legitimize_address (rtx, rtx, enum machine_mode);
+static bool xtensa_mode_dependent_address_p (const_rtx, addr_space_t);
+static bool xtensa_return_in_msb (const_tree);
+static void printx (FILE *, signed int);
+static void xtensa_function_epilogue (FILE *, HOST_WIDE_INT);
+static rtx xtensa_builtin_saveregs (void);
+static bool xtensa_legitimate_address_p (enum machine_mode, rtx, bool);
+static unsigned int xtensa_multibss_section_type_flags (tree, const char *,
+							int) ATTRIBUTE_UNUSED;
+static section *xtensa_select_rtx_section (enum machine_mode, rtx,
+					   unsigned HOST_WIDE_INT);
+static bool xtensa_rtx_costs (rtx, int, int, int, int *, bool);
+static int xtensa_register_move_cost (enum machine_mode, reg_class_t,
+				      reg_class_t);
+static int xtensa_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static tree xtensa_build_builtin_va_list (void);
+static bool xtensa_return_in_memory (const_tree, const_tree);
+static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
+					 gimple_seq *);
+static void xtensa_function_arg_advance (cumulative_args_t, enum machine_mode,
+					 const_tree, bool);
+static rtx xtensa_function_arg (cumulative_args_t, enum machine_mode,
+				const_tree, bool);
+static rtx xtensa_function_incoming_arg (cumulative_args_t,
+					 enum machine_mode, const_tree, bool);
+static rtx xtensa_function_value (const_tree, const_tree, bool);
+static rtx xtensa_libcall_value (enum machine_mode, const_rtx);
+static bool xtensa_function_value_regno_p (const unsigned int);
+static unsigned int xtensa_function_arg_boundary (enum machine_mode,
+						  const_tree);
+static void xtensa_init_builtins (void);
+static tree xtensa_fold_builtin (tree, int, tree *, bool);
+static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void xtensa_va_start (tree, rtx);
+static bool xtensa_frame_pointer_required (void);
+static rtx xtensa_static_chain (const_tree, bool);
+static void xtensa_asm_trampoline_template (FILE *);
+static void xtensa_trampoline_init (rtx, tree, rtx);
+static bool xtensa_output_addr_const_extra (FILE *, rtx);
+static bool xtensa_cannot_force_const_mem (enum machine_mode, rtx);
+
+static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+
+static bool constantpool_address_p (const_rtx addr);
+static bool xtensa_legitimate_constant_p (enum machine_mode, rtx);
+
+static bool xtensa_member_type_forces_blk (const_tree,
+					   enum machine_mode mode);
+
+static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
+  REG_ALLOC_ORDER;
+
+
+/* This macro generates the assembly code for function exit,
+   on machines that need it.  If FUNCTION_EPILOGUE is not defined
+   then individual return instructions are generated for each
+   return statement.  Args are same as for FUNCTION_PROLOGUE.  */
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE xtensa_function_epilogue
+
+/* These hooks specify assembly directives for creating certain kinds
+   of integer object.  */
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  xtensa_select_rtx_section
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS xtensa_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P xtensa_mode_dependent_address_p
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST xtensa_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS xtensa_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_MEMBER_TYPE_FORCES_BLK
+#define TARGET_MEMBER_TYPE_FORCES_BLK xtensa_member_type_forces_blk
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE xtensa_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE xtensa_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P xtensa_function_value_regno_p
+
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE xtensa_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG xtensa_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG xtensa_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY xtensa_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS xtensa_builtin_saveregs
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR xtensa_gimplify_va_arg_expr
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB xtensa_return_in_msb
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS xtensa_init_builtins
+#undef  TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN xtensa_fold_builtin
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xtensa_preferred_output_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD xtensa_secondary_reload
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (TARGET_THREADPTR && HAVE_AS_TLS)
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	xtensa_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED xtensa_frame_pointer_required
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN xtensa_static_chain
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE xtensa_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT xtensa_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE xtensa_option_override
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA xtensa_output_addr_const_extra
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Functions to test Xtensa immediate operand validity.  */
+
+bool
+xtensa_simm8 (HOST_WIDE_INT v)
+{
+  return v >= -128 && v <= 127;
+}
+
+
+bool
+xtensa_simm8x256 (HOST_WIDE_INT v)
+{
+  return (v & 255) == 0 && (v >= -32768 && v <= 32512);
+}
+
+
+bool
+xtensa_simm12b (HOST_WIDE_INT v)
+{
+  return v >= -2048 && v <= 2047;
+}
+
+
+static bool
+xtensa_uimm8 (HOST_WIDE_INT v)
+{
+  return v >= 0 && v <= 255;
+}
+
+
+static bool
+xtensa_uimm8x2 (HOST_WIDE_INT v)
+{
+  return (v & 1) == 0 && (v >= 0 && v <= 510);
+}
+
+
+static bool
+xtensa_uimm8x4 (HOST_WIDE_INT v)
+{
+  return (v & 3) == 0 && (v >= 0 && v <= 1020);
+}
+
+
+static bool
+xtensa_b4const (HOST_WIDE_INT v)
+{
+  switch (v)
+    {
+    case -1:
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+      return true;
+    }
+  return false;
+}
+
+
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
+{
+  if (v == 0)
+    return true;
+  return xtensa_b4const (v);
+}
+
+
+bool
+xtensa_b4constu (HOST_WIDE_INT v)
+{
+  switch (v)
+    {
+    case 32768:
+    case 65536:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+      return true;
+    }
+  return false;
+}
+
+
+bool
+xtensa_mask_immediate (HOST_WIDE_INT v)
+{
+#define MAX_MASK_SIZE 16
+  int mask_size;
+
+  for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++)
+    {
+      if ((v & 1) == 0)
+	return false;
+      v = v >> 1;
+      if (v == 0)
+	return true;
+    }
+
+  return false;
+}
+
+
+/* This is just like the standard true_regnum() function except that it
+   works even when reg_renumber is not initialized.  */
+
+int
+xt_true_regnum (rtx x)
+{
+  if (GET_CODE (x) == REG)
+    {
+      if (reg_renumber
+	  && REGNO (x) >= FIRST_PSEUDO_REGISTER
+	  && reg_renumber[REGNO (x)] >= 0)
+	return reg_renumber[REGNO (x)];
+      return REGNO (x);
+    }
+  if (GET_CODE (x) == SUBREG)
+    {
+      int base = xt_true_regnum (SUBREG_REG (x));
+      if (base >= 0 && base < FIRST_PSEUDO_REGISTER)
+        return base + subreg_regno_offset (REGNO (SUBREG_REG (x)),
+                                           GET_MODE (SUBREG_REG (x)),
+                                           SUBREG_BYTE (x), GET_MODE (x));
+    }
+  return -1;
+}
+
+
+int
+xtensa_valid_move (enum machine_mode mode, rtx *operands)
+{
+  /* Either the destination or source must be a register, and the
+     MAC16 accumulator doesn't count.  */
+
+  if (register_operand (operands[0], mode))
+    {
+      int dst_regnum = xt_true_regnum (operands[0]);
+
+      /* The stack pointer can only be assigned with a MOVSP opcode.  */
+      if (dst_regnum == STACK_POINTER_REGNUM)
+	return (mode == SImode
+		&& register_operand (operands[1], mode)
+		&& !ACC_REG_P (xt_true_regnum (operands[1])));
+
+      if (!ACC_REG_P (dst_regnum))
+	return true;
+    }
+  if (register_operand (operands[1], mode))
+    {
+      int src_regnum = xt_true_regnum (operands[1]);
+      if (!ACC_REG_P (src_regnum))
+	return true;
+    }
+  return FALSE;
+}
+
+
+int
+smalloffset_mem_p (rtx op)
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx addr = XEXP (op, 0);
+      if (GET_CODE (addr) == REG)
+	return BASE_REG_P (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx offset = XEXP (addr, 0);
+	  HOST_WIDE_INT val;
+	  if (GET_CODE (offset) != CONST_INT)
+	    offset = XEXP (addr, 1);
+	  if (GET_CODE (offset) != CONST_INT)
+	    return FALSE;
+
+	  val = INTVAL (offset);
+	  return (val & 3) == 0 && (val >= 0 && val <= 60);
+	}
+    }
+  return FALSE;
+}
+
+
+static bool
+constantpool_address_p (const_rtx addr)
+{
+  const_rtx sym = addr;
+
+  if (GET_CODE (addr) == CONST)
+    {
+      rtx offset;
+
+      /* Only handle (PLUS (SYM, OFFSET)) form.  */
+      addr = XEXP (addr, 0);
+      if (GET_CODE (addr) != PLUS)
+	return false;
+
+      /* Make sure the address is word aligned.  */
+      offset = XEXP (addr, 1);
+      if ((!CONST_INT_P (offset))
+	  || ((INTVAL (offset) & 3) != 0))
+	return false;
+
+      sym = XEXP (addr, 0);
+    }
+
+  if ((GET_CODE (sym) == SYMBOL_REF)
+      && CONSTANT_POOL_ADDRESS_P (sym))
+    return true;
+  return false;
+}
+
+
+int
+constantpool_mem_p (rtx op)
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (GET_CODE (op) == MEM)
+    return constantpool_address_p (XEXP (op, 0));
+  return FALSE;
+}
+
+
+/* Return TRUE if X is a thread-local symbol.  */
+
+static bool
+xtensa_tls_symbol_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+
+void
+xtensa_extend_reg (rtx dst, rtx src)
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (GET_MODE (src)));
+
+  /* Generate paradoxical subregs as needed so that the modes match.  */
+  src = simplify_gen_subreg (SImode, src, GET_MODE (src), 0);
+  dst = simplify_gen_subreg (SImode, dst, GET_MODE (dst), 0);
+
+  emit_insn (gen_ashlsi3 (temp, src, shift));
+  emit_insn (gen_ashrsi3 (dst, temp, shift));
+}
+
+
+bool
+xtensa_mem_offset (unsigned v, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case BLKmode:
+      /* Handle the worst case for block moves.  See xtensa_expand_block_move
+	 where we emit an optimized block move operation if the block can be
+	 moved in < "move_ratio" pieces.  The worst case is when the block is
+	 aligned but has a size of (3 mod 4) (does this happen?) so that the
+	 last piece requires a byte load/store.  */
+      return (xtensa_uimm8 (v)
+	      && xtensa_uimm8 (v + MOVE_MAX * LARGEST_MOVE_RATIO));
+
+    case QImode:
+      return xtensa_uimm8 (v);
+
+    case HImode:
+      return xtensa_uimm8x2 (v);
+
+    case DFmode:
+      return (xtensa_uimm8x4 (v) && xtensa_uimm8x4 (v + 4));
+
+    default:
+      break;
+    }
+
+  return xtensa_uimm8x4 (v);
+}
+
+
+/* Make normal rtx_code into something we can index from an array.  */
+
+static enum internal_test
+map_test_to_internal_test (enum rtx_code test_code)
+{
+  enum internal_test test = ITEST_MAX;
+
+  switch (test_code)
+    {
+    default:			break;
+    case EQ:  test = ITEST_EQ;  break;
+    case NE:  test = ITEST_NE;  break;
+    case GT:  test = ITEST_GT;  break;
+    case GE:  test = ITEST_GE;  break;
+    case LT:  test = ITEST_LT;  break;
+    case LE:  test = ITEST_LE;  break;
+    case GTU: test = ITEST_GTU; break;
+    case GEU: test = ITEST_GEU; break;
+    case LTU: test = ITEST_LTU; break;
+    case LEU: test = ITEST_LEU; break;
+    }
+
+  return test;
+}
+
+
+/* Generate the code to compare two integer values.  The return value is
+   the comparison expression.  */
+
+static rtx
+gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+		    rtx cmp0, /* first operand to compare */
+		    rtx cmp1, /* second operand to compare */
+		    int *p_invert /* whether branch needs to reverse test */)
+{
+  struct cmp_info
+  {
+    enum rtx_code test_code;	/* test code to use in insn */
+    bool (*const_range_p) (HOST_WIDE_INT); /* range check function */
+    int const_add;		/* constant to add (convert LE -> LT) */
+    int reverse_regs;		/* reverse registers in test */
+    int invert_const;		/* != 0 if invert value if cmp1 is constant */
+    int invert_reg;		/* != 0 if invert value if cmp1 is register */
+    int unsignedp;		/* != 0 for unsigned comparisons.  */
+  };
+
+  static struct cmp_info info[ (int)ITEST_MAX ] = {
+
+    { EQ,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* EQ  */
+    { NE,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* NE  */
+
+    { LT,	xtensa_b4const_or_zero,	1, 1, 1, 0, 0 },	/* GT  */
+    { GE,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* GE  */
+    { LT,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* LT  */
+    { GE,	xtensa_b4const_or_zero,	1, 1, 1, 0, 0 },	/* LE  */
+
+    { LTU,	xtensa_b4constu,	1, 1, 1, 0, 1 },	/* GTU */
+    { GEU,	xtensa_b4constu,	0, 0, 0, 0, 1 },	/* GEU */
+    { LTU,	xtensa_b4constu,	0, 0, 0, 0, 1 },	/* LTU */
+    { GEU,	xtensa_b4constu,	1, 1, 1, 0, 1 },	/* LEU */
+  };
+
+  enum internal_test test;
+  enum machine_mode mode;
+  struct cmp_info *p_info;
+
+  test = map_test_to_internal_test (test_code);
+  gcc_assert (test != ITEST_MAX);
+
+  p_info = &info[ (int)test ];
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Make sure we can handle any constants given to us.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (cmp1);
+      unsigned HOST_WIDE_INT uvalue = (unsigned HOST_WIDE_INT)value;
+
+      /* if the immediate overflows or does not fit in the immediate field,
+	 spill it to a register */
+
+      if ((p_info->unsignedp ?
+	   (uvalue + p_info->const_add > uvalue) :
+	   (value + p_info->const_add > value)) != (p_info->const_add > 0))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	}
+      else if (!(p_info->const_range_p) (value + p_info->const_add))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	}
+    }
+  else if ((GET_CODE (cmp1) != REG) && (GET_CODE (cmp1) != SUBREG))
+    {
+      cmp1 = force_reg (mode, cmp1);
+    }
+
+  /* See if we need to invert the result.  */
+  *p_invert = ((GET_CODE (cmp1) == CONST_INT)
+	       ? p_info->invert_const
+	       : p_info->invert_reg);
+
+  /* Comparison to constants, may involve adding 1 to change a LT into LE.
+     Comparison between two registers, may involve switching operands.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      if (p_info->const_add != 0)
+	cmp1 = GEN_INT (INTVAL (cmp1) + p_info->const_add);
+
+    }
+  else if (p_info->reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1);
+}
+
+
+/* Generate the code to compare two float values.  The return value is
+   the comparison expression.  */
+
+static rtx
+gen_float_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+		      rtx cmp0, /* first operand to compare */
+		      rtx cmp1 /* second operand to compare */)
+{
+  rtx (*gen_fn) (rtx, rtx, rtx);
+  rtx brtmp;
+  int reverse_regs, invert;
+
+  switch (test_code)
+    {
+    case EQ: reverse_regs = 0; invert = 0; gen_fn = gen_seq_sf; break;
+    case NE: reverse_regs = 0; invert = 1; gen_fn = gen_seq_sf; break;
+    case LE: reverse_regs = 0; invert = 0; gen_fn = gen_sle_sf; break;
+    case GT: reverse_regs = 1; invert = 0; gen_fn = gen_slt_sf; break;
+    case LT: reverse_regs = 0; invert = 0; gen_fn = gen_slt_sf; break;
+    case GE: reverse_regs = 1; invert = 0; gen_fn = gen_sle_sf; break;
+    case UNEQ: reverse_regs = 0; invert = 0; gen_fn = gen_suneq_sf; break;
+    case LTGT: reverse_regs = 0; invert = 1; gen_fn = gen_suneq_sf; break;
+    case UNLE: reverse_regs = 0; invert = 0; gen_fn = gen_sunle_sf; break;
+    case UNGT: reverse_regs = 1; invert = 0; gen_fn = gen_sunlt_sf; break;
+    case UNLT: reverse_regs = 0; invert = 0; gen_fn = gen_sunlt_sf; break;
+    case UNGE: reverse_regs = 1; invert = 0; gen_fn = gen_sunle_sf; break;
+    case UNORDERED:
+      reverse_regs = 0; invert = 0; gen_fn = gen_sunordered_sf; break;
+    case ORDERED:
+      reverse_regs = 0; invert = 1; gen_fn = gen_sunordered_sf; break;
+    default:
+      fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+      reverse_regs = 0; invert = 0; gen_fn = 0; /* avoid compiler warnings */
+    }
+
+  if (reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM);
+  emit_insn (gen_fn (brtmp, cmp0, cmp1));
+
+  return gen_rtx_fmt_ee (invert ? EQ : NE, VOIDmode, brtmp, const0_rtx);
+}
+
+
+void
+xtensa_expand_conditional_branch (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code test_code = GET_CODE (operands[0]);
+  rtx cmp0 = operands[1];
+  rtx cmp1 = operands[2];
+  rtx cmp;
+  int invert;
+  rtx label1, label2;
+
+  switch (mode)
+    {
+    case DFmode:
+    default:
+      fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+
+    case SImode:
+      invert = FALSE;
+      cmp = gen_int_relational (test_code, cmp0, cmp1, &invert);
+      break;
+
+    case SFmode:
+      if (!TARGET_HARD_FLOAT)
+	fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode,
+						cmp0, cmp1));
+      invert = FALSE;
+      cmp = gen_float_relational (test_code, cmp0, cmp1);
+      break;
+    }
+
+  /* Generate the branch.  */
+
+  label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  label2 = pc_rtx;
+
+  if (invert)
+    {
+      label2 = label1;
+      label1 = pc_rtx;
+    }
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, cmp,
+						     label1,
+						     label2)));
+}
+
+
+static rtx
+gen_conditional_move (enum rtx_code code, enum machine_mode mode,
+		      rtx op0, rtx op1)
+{
+  if (mode == SImode)
+    {
+      rtx cmp;
+
+      /* Jump optimization calls get_condition() which canonicalizes
+	 comparisons like (GE x <const>) to (GT x <const-1>).
+	 Transform those comparisons back to GE, since that is the
+	 comparison supported in Xtensa.  We shouldn't have to
+	 transform <LE x const> comparisons, because neither
+	 xtensa_expand_conditional_branch() nor get_condition() will
+	 produce them.  */
+
+      if ((code == GT) && (op1 == constm1_rtx))
+	{
+	  code = GE;
+	  op1 = const0_rtx;
+	}
+      cmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+
+      if (boolean_operator (cmp, VOIDmode))
+	{
+	  /* Swap the operands to make const0 second.  */
+	  if (op0 == const0_rtx)
+	    {
+	      op0 = op1;
+	      op1 = const0_rtx;
+	    }
+
+	  /* If not comparing against zero, emit a comparison (subtract).  */
+	  if (op1 != const0_rtx)
+	    {
+	      op0 = expand_binop (SImode, sub_optab, op0, op1,
+				  0, 0, OPTAB_LIB_WIDEN);
+	      op1 = const0_rtx;
+	    }
+	}
+      else if (branch_operator (cmp, VOIDmode))
+	{
+	  /* Swap the operands to make const0 second.  */
+	  if (op0 == const0_rtx)
+	    {
+	      op0 = op1;
+	      op1 = const0_rtx;
+
+	      switch (code)
+		{
+		case LT: code = GE; break;
+		case GE: code = LT; break;
+		default: gcc_unreachable ();
+		}
+	    }
+
+	  if (op1 != const0_rtx)
+	    return 0;
+	}
+      else
+	return 0;
+
+      return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+    }
+
+  if (TARGET_HARD_FLOAT && mode == SFmode)
+    return gen_float_relational (code, op0, op1);
+
+  return 0;
+}
+
+
+int
+xtensa_expand_conditional_move (rtx *operands, int isflt)
+{
+  rtx dest = operands[0];
+  rtx cmp = operands[1];
+  enum machine_mode cmp_mode = GET_MODE (XEXP (cmp, 0));
+  rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+  if (!(cmp = gen_conditional_move (GET_CODE (cmp), cmp_mode,
+				    XEXP (cmp, 0), XEXP (cmp, 1))))
+    return 0;
+
+  if (isflt)
+    gen_fn = (cmp_mode == SImode
+	      ? gen_movsfcc_internal0
+	      : gen_movsfcc_internal1);
+  else
+    gen_fn = (cmp_mode == SImode
+	      ? gen_movsicc_internal0
+	      : gen_movsicc_internal1);
+
+  emit_insn (gen_fn (dest, XEXP (cmp, 0), operands[2], operands[3], cmp));
+  return 1;
+}
+
+
+int
+xtensa_expand_scc (rtx operands[4], enum machine_mode cmp_mode)
+{
+  rtx dest = operands[0];
+  rtx cmp;
+  rtx one_tmp, zero_tmp;
+  rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+  if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
+				    operands[2], operands[3])))
+    return 0;
+
+  one_tmp = gen_reg_rtx (SImode);
+  zero_tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (one_tmp, const_true_rtx));
+  emit_insn (gen_movsi (zero_tmp, const0_rtx));
+
+  gen_fn = (cmp_mode == SImode
+	    ? gen_movsicc_internal0
+	    : gen_movsicc_internal1);
+  emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp));
+  return 1;
+}
+
+
+/* Split OP[1] into OP[2,3] and likewise for OP[0] into OP[0,1].  MODE is
+   for the output, i.e., the input operands are twice as big as MODE.  */
+
+void
+xtensa_split_operand_pair (rtx operands[4], enum machine_mode mode)
+{
+  switch (GET_CODE (operands[1]))
+    {
+    case REG:
+      operands[3] = gen_rtx_REG (mode, REGNO (operands[1]) + 1);
+      operands[2] = gen_rtx_REG (mode, REGNO (operands[1]));
+      break;
+
+    case MEM:
+      operands[3] = adjust_address (operands[1], mode, GET_MODE_SIZE (mode));
+      operands[2] = adjust_address (operands[1], mode, 0);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      split_double (operands[1], &operands[2], &operands[3]);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      operands[1] = gen_rtx_REG (mode, REGNO (operands[0]) + 1);
+      operands[0] = gen_rtx_REG (mode, REGNO (operands[0]));
+      break;
+
+    case MEM:
+      operands[1] = adjust_address (operands[0], mode, GET_MODE_SIZE (mode));
+      operands[0] = adjust_address (operands[0], mode, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Emit insns to move operands[1] into operands[0].
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.  */
+
+int
+xtensa_emit_move_sequence (rtx *operands, enum machine_mode mode)
+{
+  rtx src = operands[1];
+
+  if (CONSTANT_P (src)
+      && (GET_CODE (src) != CONST_INT || ! xtensa_simm12b (INTVAL (src))))
+    {
+      rtx dst = operands[0];
+
+      if (xtensa_tls_referenced_p (src))
+	{
+	  rtx addend = NULL;
+
+	  if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS)
+	    {
+	      addend = XEXP (XEXP (src, 0), 1);
+	      src = XEXP (XEXP (src, 0), 0);
+	    }
+
+	  src = xtensa_legitimize_tls_address (src);
+	  if (addend)
+	    {
+	      src = gen_rtx_PLUS (mode, src, addend);
+	      src = force_operand (src, dst);
+	    }
+	  emit_move_insn (dst, src);
+	  return 1;
+	}
+
+      if (! TARGET_CONST16)
+	{
+	  src = force_const_mem (SImode, src);
+	  operands[1] = src;
+	}
+
+      /* PC-relative loads are always SImode, and CONST16 is only
+	 supported in the movsi pattern, so add a SUBREG for any other
+	 (smaller) mode.  */
+
+      if (mode != SImode)
+	{
+	  if (register_operand (dst, mode))
+	    {
+	      emit_move_insn (simplify_gen_subreg (SImode, dst, mode, 0), src);
+	      return 1;
+	    }
+	  else
+	    {
+	      src = force_reg (SImode, src);
+	      src = gen_lowpart_SUBREG (mode, src);
+	      operands[1] = src;
+	    }
+	}
+    }
+
+  if (!(reload_in_progress | reload_completed)
+      && !xtensa_valid_move (mode, operands))
+    operands[1] = force_reg (mode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+
+  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
+     instruction won't be recognized after reload, so we remove the
+     subreg and adjust mem accordingly.  */
+  if (reload_in_progress)
+    {
+      operands[0] = fixup_subreg_mem (operands[0]);
+      operands[1] = fixup_subreg_mem (operands[1]);
+    }
+  return 0;
+}
+
+
+static rtx
+fixup_subreg_mem (rtx x)
+{
+  if (GET_CODE (x) == SUBREG
+      && GET_CODE (SUBREG_REG (x)) == REG
+      && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER)
+    {
+      rtx temp =
+	gen_rtx_SUBREG (GET_MODE (x),
+			reg_equiv_mem (REGNO (SUBREG_REG (x))),
+			SUBREG_BYTE (x));
+      x = alter_subreg (&temp, true);
+    }
+  return x;
+}
+
+
+/* Check if an incoming argument in a7 is expected to be used soon and
+   if OPND is a register or register pair that includes a7.  If so,
+   create a new pseudo and copy a7 into that pseudo at the very
+   beginning of the function, followed by the special "set_frame_ptr"
+   unspec_volatile insn.  The return value is either the original
+   operand, if it is not a7, or the new pseudo containing a copy of
+   the incoming argument.  This is necessary because the register
+   allocator will ignore conflicts with a7 and may either assign some
+   other pseudo to a7 or use a7 as the hard_frame_pointer, clobbering
+   the incoming argument in a7.  By copying the argument out of a7 as
+   the very first thing, and then immediately following that with an
+   unspec_volatile to keep the scheduler away, we should avoid any
+   problems.  Putting the set_frame_ptr insn at the beginning, with
+   only the a7 copy before it, also makes it easier for the prologue
+   expander to initialize the frame pointer after the a7 copy and to
+   fix up the a7 copy to use the stack pointer instead of the frame
+   pointer.  */
+
+rtx
+xtensa_copy_incoming_a7 (rtx opnd)
+{
+  rtx entry_insns = 0;
+  rtx reg, tmp;
+  enum machine_mode mode;
+
+  if (!cfun->machine->need_a7_copy)
+    return opnd;
+
+  /* This function should never be called again once a7 has been copied.  */
+  gcc_assert (!cfun->machine->set_frame_ptr_insn);
+
+  mode = GET_MODE (opnd);
+
+  /* The operand using a7 may come in a later instruction, so just return
+     the original operand if it doesn't use a7.  */
+  reg = opnd;
+  if (GET_CODE (reg) == SUBREG)
+    {
+      gcc_assert (SUBREG_BYTE (reg) == 0);
+      reg = SUBREG_REG (reg);
+    }
+  if (GET_CODE (reg) != REG
+      || REGNO (reg) > A7_REG
+      || REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) <= A7_REG)
+    return opnd;
+
+  /* 1-word args will always be in a7; 2-word args in a6/a7.  */
+  gcc_assert (REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) - 1 == A7_REG);
+
+  cfun->machine->need_a7_copy = false;
+
+  /* Copy a7 to a new pseudo at the function entry.  Use gen_raw_REG to
+     create the REG for a7 so that hard_frame_pointer_rtx is not used.  */
+
+  start_sequence ();
+  tmp = gen_reg_rtx (mode);
+
+  switch (mode)
+    {
+    case DFmode:
+    case DImode:
+      /* Copy the value out of A7 here but keep the first word in A6 until
+	 after the set_frame_ptr insn.  Otherwise, the register allocator
+	 may decide to put "subreg (tmp, 0)" in A7 and clobber the incoming
+	 value.  */
+      emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 4),
+				     gen_raw_REG (SImode, A7_REG)));
+      break;
+    case SFmode:
+      emit_insn (gen_movsf_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case SImode:
+      emit_insn (gen_movsi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case HImode:
+      emit_insn (gen_movhi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case QImode:
+      emit_insn (gen_movqi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  cfun->machine->set_frame_ptr_insn = emit_insn (gen_set_frame_ptr ());
+
+  /* For DF and DI mode arguments, copy the incoming value in A6 now.  */
+  if (mode == DFmode || mode == DImode)
+    emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
+				   gen_rtx_REG (SImode, A7_REG - 1)));
+  entry_insns = get_insns ();
+  end_sequence ();
+
+  if (cfun->machine->vararg_a7)
+    {
+      /* This is called from within builtin_saveregs, which will insert the
+	 saveregs code at the function entry, ahead of anything placed at
+	 the function entry now.  Instead, save the sequence to be inserted
+	 at the beginning of the saveregs code.  */
+      cfun->machine->vararg_a7_copy = entry_insns;
+    }
+  else
+    {
+      /* Put entry_insns after the NOTE that starts the function.  If
+	 this is inside a start_sequence, make the outer-level insn
+	 chain current, so the code is placed at the start of the
+	 function.  */
+      push_topmost_sequence ();
+      /* Do not use entry_of_function() here.  This is called from within
+	 expand_function_start, when the CFG still holds GIMPLE.  */
+      emit_insn_after (entry_insns, get_insns ());
+      pop_topmost_sequence ();
+    }
+
+  return tmp;
+}
+
+
+/* Try to expand a block move operation to a sequence of RTL move
+   instructions.  If not optimizing, or if the block size is not a
+   constant, or if the block is too large, the expansion fails and GCC
+   falls back to calling memcpy().
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+int
+xtensa_expand_block_move (rtx *operands)
+{
+  static const enum machine_mode mode_from_align[] =
+  {
+    VOIDmode, QImode, HImode, VOIDmode, SImode,
+  };
+
+  rtx dst_mem = operands[0];
+  rtx src_mem = operands[1];
+  HOST_WIDE_INT bytes, align;
+  int num_pieces, move_ratio;
+  rtx temp[2];
+  enum machine_mode mode[2];
+  int amount[2];
+  bool active[2];
+  int phase = 0;
+  int next;
+  int offset_ld = 0;
+  int offset_st = 0;
+  rtx x;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (!optimize || (GET_CODE (operands[2]) != CONST_INT))
+    return 0;
+
+  bytes = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+
+  /* Anything to move?  */
+  if (bytes <= 0)
+    return 0;
+
+  if (align > MOVE_MAX)
+    align = MOVE_MAX;
+
+  /* Decide whether to expand inline based on the optimization level.  */
+  move_ratio = 4;
+  if (optimize > 2)
+    move_ratio = LARGEST_MOVE_RATIO;
+  num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway.  */
+  if (num_pieces > move_ratio)
+    return 0;
+
+  x = XEXP (dst_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      dst_mem = replace_equiv_address (dst_mem, x);
+    }
+
+  x = XEXP (src_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      src_mem = replace_equiv_address (src_mem, x);
+    }
+
+  active[0] = active[1] = false;
+
+  do
+    {
+      next = phase;
+      phase ^= 1;
+
+      if (bytes > 0)
+	{
+	  int next_amount;
+
+	  next_amount = (bytes >= 4 ? 4 : (bytes >= 2 ? 2 : 1));
+	  next_amount = MIN (next_amount, align);
+
+	  amount[next] = next_amount;
+	  mode[next] = mode_from_align[next_amount];
+	  temp[next] = gen_reg_rtx (mode[next]);
+
+	  x = adjust_address (src_mem, mode[next], offset_ld);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
+
+	  offset_ld += next_amount;
+	  bytes -= next_amount;
+	  active[next] = true;
+	}
+
+      if (active[phase])
+	{
+	  active[phase] = false;
+	  
+	  x = adjust_address (dst_mem, mode[phase], offset_st);
+	  emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
+
+	  offset_st += amount[phase];
+	}
+    }
+  while (active[next]);
+
+  return 1;
+}
+
+
+void
+xtensa_expand_nonlocal_goto (rtx *operands)
+{
+  rtx goto_handler = operands[1];
+  rtx containing_fp = operands[3];
+
+  /* Generate a call to "__xtensa_nonlocal_goto" (in libgcc); the code
+     is too big to generate in-line.  */
+
+  if (GET_CODE (containing_fp) != REG)
+    containing_fp = force_reg (Pmode, containing_fp);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_nonlocal_goto"),
+		     LCT_NORMAL, VOIDmode, 2,
+		     containing_fp, Pmode,
+		     goto_handler, Pmode);
+}
+
+
+static struct machine_function *
+xtensa_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Shift VAL of mode MODE left by COUNT bits.  */
+
+static inline rtx
+xtensa_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
+{
+  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
+			     NULL_RTX, 1, OPTAB_DIRECT);
+  return expand_simple_binop (SImode, ASHIFT, val, count,
+			      NULL_RTX, 1, OPTAB_DIRECT);
+}
+
+
+/* Structure to hold the initial parameters for a compare_and_swap operation
+   in HImode and QImode.  */
+
+struct alignment_context
+{
+  rtx memsi;	  /* SI aligned memory location.  */
+  rtx shift;	  /* Bit offset with regard to lsb.  */
+  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
+  rtx modemaski;  /* ~modemask */
+};
+
+
+/* Initialize structure AC for word access to HI and QI mode memory.  */
+
+static void
+init_alignment_context (struct alignment_context *ac, rtx mem)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx byteoffset = NULL_RTX;
+  bool aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
+
+  if (aligned)
+    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
+  else
+    {
+      /* Alignment is unknown.  */
+      rtx addr, align;
+
+      /* Force the address into a register.  */
+      addr = force_reg (Pmode, XEXP (mem, 0));
+
+      /* Align it to SImode.  */
+      align = expand_simple_binop (Pmode, AND, addr,
+				   GEN_INT (-GET_MODE_SIZE (SImode)),
+				   NULL_RTX, 1, OPTAB_DIRECT);
+      /* Generate MEM.  */
+      ac->memsi = gen_rtx_MEM (SImode, align);
+      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
+      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
+      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
+
+      byteoffset = expand_simple_binop (Pmode, AND, addr,
+					GEN_INT (GET_MODE_SIZE (SImode) - 1),
+					NULL_RTX, 1, OPTAB_DIRECT);
+    }
+
+  /* Calculate shiftcount.  */
+  if (TARGET_BIG_ENDIAN)
+    {
+      ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
+      if (!aligned)
+	ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
+					 NULL_RTX, 1, OPTAB_DIRECT);
+    }
+  else
+    {
+      if (aligned)
+	ac->shift = NULL_RTX;
+      else
+	ac->shift = byteoffset;
+    }
+
+  if (ac->shift != NULL_RTX)
+    {
+      /* Shift is the byte count, but we need the bitcount.  */
+      ac->shift = expand_simple_binop (SImode, MULT, ac->shift,
+				       GEN_INT (BITS_PER_UNIT),
+				       NULL_RTX, 1, OPTAB_DIRECT);
+      ac->modemask = expand_simple_binop (SImode, ASHIFT,
+					  GEN_INT (GET_MODE_MASK (mode)),
+					  ac->shift,
+					  NULL_RTX, 1, OPTAB_DIRECT);
+    }
+  else
+    ac->modemask = GEN_INT (GET_MODE_MASK (mode));
+
+  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
+}
+
+
+/* Expand an atomic compare and swap operation for HImode and QImode.
+   MEM is the memory location, CMP the old value to compare MEM with
+   and NEW_RTX the value to set if CMP == MEM.  */
+
+void
+xtensa_expand_compare_and_swap (rtx target, rtx mem, rtx cmp, rtx new_rtx)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct alignment_context ac;
+  rtx tmp, cmpv, newv, val;
+  rtx oldval = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx csloop = gen_label_rtx ();
+  rtx csend = gen_label_rtx ();
+
+  init_alignment_context (&ac, mem);
+
+  if (ac.shift != NULL_RTX)
+    {
+      cmp = xtensa_expand_mask_and_shift (cmp, mode, ac.shift);
+      new_rtx = xtensa_expand_mask_and_shift (new_rtx, mode, ac.shift);
+    }
+
+  /* Load the surrounding word into VAL with the MEM value masked out.  */
+  val = force_reg (SImode, expand_simple_binop (SImode, AND, ac.memsi,
+						ac.modemaski, NULL_RTX, 1,
+						OPTAB_DIRECT));
+  emit_label (csloop);
+
+  /* Patch CMP and NEW_RTX into VAL at correct position.  */
+  cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
+						 NULL_RTX, 1, OPTAB_DIRECT));
+  newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
+						 NULL_RTX, 1, OPTAB_DIRECT));
+
+  /* Jump to end if we're done.  */
+  emit_insn (gen_sync_compare_and_swapsi (res, ac.memsi, cmpv, newv));
+  emit_cmp_and_jump_insns (res, cmpv, EQ, const0_rtx, SImode, true, csend);
+
+  /* Check for changes outside mode.  */
+  emit_move_insn (oldval, val);
+  tmp = expand_simple_binop (SImode, AND, res, ac.modemaski,
+			     val, 1, OPTAB_DIRECT);
+  if (tmp != val)
+    emit_move_insn (val, tmp);
+
+  /* Loop internal if so.  */
+  emit_cmp_and_jump_insns (oldval, val, NE, const0_rtx, SImode, true, csloop);
+
+  emit_label (csend);
+
+  /* Return the correct part of the bitfield.  */
+  convert_move (target,
+		(ac.shift == NULL_RTX ? res
+		 : expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+					NULL_RTX, 1, OPTAB_DIRECT)),
+		1);
+}
+
+
+/* Expand an atomic operation CODE of mode MODE (either HImode or QImode --
+   the default expansion works fine for SImode).  MEM is the memory location
+   and VAL the value to play with.  If AFTER is true then store the value
+   MEM holds after the operation, if AFTER is false then store the value MEM
+   holds before the operation.  If TARGET is zero then discard that value, else
+   store it to TARGET.  */
+
+void
+xtensa_expand_atomic (enum rtx_code code, rtx target, rtx mem, rtx val,
+		      bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct alignment_context ac;
+  rtx csloop = gen_label_rtx ();
+  rtx cmp, tmp;
+  rtx old = gen_reg_rtx (SImode);
+  rtx new_rtx = gen_reg_rtx (SImode);
+  rtx orig = NULL_RTX;
+
+  init_alignment_context (&ac, mem);
+
+  /* Prepare values before the compare-and-swap loop.  */
+  if (ac.shift != NULL_RTX)
+    val = xtensa_expand_mask_and_shift (val, mode, ac.shift);
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      orig = gen_reg_rtx (SImode);
+      convert_move (orig, val, 1);
+      break;
+
+    case SET:
+    case IOR:
+    case XOR:
+      break;
+
+    case MULT: /* NAND */
+    case AND:
+      /* val = "11..1<val>11..1" */
+      val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Load full word.  Subsequent loads are performed by S32C1I.  */
+  cmp = force_reg (SImode, ac.memsi);
+
+  emit_label (csloop);
+  emit_move_insn (old, cmp);
+
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      val = expand_simple_binop (SImode, code, old, orig,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      val = expand_simple_binop (SImode, AND, val, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* FALLTHRU */
+    case SET:
+      tmp = expand_simple_binop (SImode, AND, old, ac.modemaski,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (SImode, IOR, tmp, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    case AND:
+    case IOR:
+    case XOR:
+      tmp = expand_simple_binop (SImode, code, old, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    case MULT: /* NAND */
+      tmp = expand_simple_binop (SImode, XOR, old, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (SImode, AND, tmp, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (tmp != new_rtx)
+    emit_move_insn (new_rtx, tmp);
+  emit_insn (gen_sync_compare_and_swapsi (cmp, ac.memsi, old, new_rtx));
+  emit_cmp_and_jump_insns (cmp, old, NE, const0_rtx, SImode, true, csloop);
+
+  if (target)
+    {
+      tmp = (after ? new_rtx : cmp);
+      convert_move (target,
+		    (ac.shift == NULL_RTX ? tmp
+		     : expand_simple_binop (SImode, LSHIFTRT, tmp, ac.shift,
+					    NULL_RTX, 1, OPTAB_DIRECT)),
+		    1);
+    }
+}
+
+
+void
+xtensa_setup_frame_addresses (void)
+{
+  /* Set flag to cause TARGET_FRAME_POINTER_REQUIRED to return true.  */
+  cfun->machine->accesses_prev_frame = 1;
+
+  emit_library_call
+    (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_libgcc_window_spill"),
+     LCT_NORMAL, VOIDmode, 0);
+}
+
+
+/* Emit the assembly for the end of a zero-cost loop.  Normally we just emit
+   a comment showing where the end of the loop is.  However, if there is a
+   label or a branch at the end of the loop then we need to place a nop
+   there.  If the loop ends with a label we need the nop so that branches
+   targeting that label will target the nop (and thus remain in the loop),
+   instead of targeting the instruction after the loop (and thus exiting
+   the loop).  If the loop ends with a branch, we need the nop in case the
+   branch is targeting a location inside the loop.  When the branch
+   executes it will cause the loop count to be decremented even if it is
+   taken (because it is the last instruction in the loop), so we need to
+   nop after the branch to prevent the loop count from being decremented
+   when the branch is taken.  */
+
+void
+xtensa_emit_loop_end (rtx insn, rtx *operands)
+{
+  char done = 0;
+
+  for (insn = PREV_INSN (insn); insn && !done; insn = PREV_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+	{
+	case NOTE:
+	case BARRIER:
+	  break;
+
+	case CODE_LABEL:
+	  output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+	  done = 1;
+	  break;
+
+	default:
+	  {
+	    rtx body = PATTERN (insn);
+
+	    if (JUMP_P (body))
+	      {
+		output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+		done = 1;
+	      }
+	    else if ((GET_CODE (body) != USE)
+		     && (GET_CODE (body) != CLOBBER))
+	      done = 1;
+	  }
+	  break;
+        }
+    }
+
+  output_asm_insn ("# loop end for %0", operands);
+}
+
+
+char *
+xtensa_emit_branch (bool inverted, bool immed, rtx *operands)
+{
+  static char result[64];
+  enum rtx_code code;
+  const char *op;
+
+  code = GET_CODE (operands[3]);
+  switch (code)
+    {
+    case EQ:	op = inverted ? "ne" : "eq"; break;
+    case NE:	op = inverted ? "eq" : "ne"; break;
+    case LT:	op = inverted ? "ge" : "lt"; break;
+    case GE:	op = inverted ? "lt" : "ge"; break;
+    case LTU:	op = inverted ? "geu" : "ltu"; break;
+    case GEU:	op = inverted ? "ltu" : "geu"; break;
+    default:	gcc_unreachable ();
+    }
+
+  if (immed)
+    {
+      if (INTVAL (operands[1]) == 0)
+	sprintf (result, "b%sz%s\t%%0, %%2", op,
+		 (TARGET_DENSITY && (code == EQ || code == NE)) ? ".n" : "");
+      else
+	sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+    }
+  else
+    sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+  return result;
+}
+
+
+char *
+xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands)
+{
+  static char result[64];
+  const char *op;
+
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:	op = inverted ? "bs" : "bc"; break;
+    case NE:	op = inverted ? "bc" : "bs"; break;
+    default:	gcc_unreachable ();
+    }
+
+  if (immed)
+    {
+      unsigned bitnum = INTVAL (operands[1]) & 0x1f; 
+      operands[1] = GEN_INT (bitnum); 
+      sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+    }
+  else
+    sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+  return result;
+}
+
+
+char *
+xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands)
+{
+  static char result[64];
+  enum rtx_code code;
+  const char *op;
+
+  code = GET_CODE (operands[4]);
+  if (isbool)
+    {
+      switch (code)
+	{
+	case EQ:	op = inverted ? "t" : "f"; break;
+	case NE:	op = inverted ? "f" : "t"; break;
+	default:	gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (code)
+	{
+	case EQ:	op = inverted ? "nez" : "eqz"; break;
+	case NE:	op = inverted ? "eqz" : "nez"; break;
+	case LT:	op = inverted ? "gez" : "ltz"; break;
+	case GE:	op = inverted ? "ltz" : "gez"; break;
+	default:	gcc_unreachable ();
+	}
+    }
+
+  sprintf (result, "mov%s%s\t%%0, %%%d, %%1",
+	   op, isfp ? ".s" : "", inverted ? 3 : 2);
+  return result;
+}
+
+
+char *
+xtensa_emit_call (int callop, rtx *operands)
+{
+  static char result[64];
+  rtx tgt = operands[callop];
+
+  if (GET_CODE (tgt) == CONST_INT)
+    sprintf (result, "call8\t0x%lx", INTVAL (tgt));
+  else if (register_operand (tgt, VOIDmode))
+    sprintf (result, "callx8\t%%%d", callop);
+  else
+    sprintf (result, "call8\t%%%d", callop);
+
+  return result;
+}
+
+
+bool
+xtensa_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  /* Allow constant pool addresses.  */
+  if (mode != BLKmode && GET_MODE_SIZE (mode) >= UNITS_PER_WORD
+      && ! TARGET_CONST16 && constantpool_address_p (addr)
+      && ! xtensa_tls_referenced_p (addr))
+    return true;
+
+  while (GET_CODE (addr) == SUBREG)
+    addr = SUBREG_REG (addr);
+
+  /* Allow base registers.  */
+  if (GET_CODE (addr) == REG && BASE_REG_P (addr, strict))
+    return true;
+
+  /* Check for "register + offset" addressing.  */
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx xplus0 = XEXP (addr, 0);
+      rtx xplus1 = XEXP (addr, 1);
+      enum rtx_code code0;
+      enum rtx_code code1;
+
+      while (GET_CODE (xplus0) == SUBREG)
+	xplus0 = SUBREG_REG (xplus0);
+      code0 = GET_CODE (xplus0);
+
+      while (GET_CODE (xplus1) == SUBREG)
+	xplus1 = SUBREG_REG (xplus1);
+      code1 = GET_CODE (xplus1);
+
+      /* Swap operands if necessary so the register is first.  */
+      if (code0 != REG && code1 == REG)
+	{
+	  xplus0 = XEXP (addr, 1);
+	  xplus1 = XEXP (addr, 0);
+	  code0 = GET_CODE (xplus0);
+	  code1 = GET_CODE (xplus1);
+	}
+
+      if (code0 == REG && BASE_REG_P (xplus0, strict)
+	  && code1 == CONST_INT
+	  && xtensa_mem_offset (INTVAL (xplus1), mode))
+	return true;
+    }
+
+  return false;
+}
+
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
+
+static GTY(()) rtx xtensa_tls_module_base_symbol;
+
+static rtx
+xtensa_tls_module_base (void)
+{
+  if (! xtensa_tls_module_base_symbol)
+    {
+      xtensa_tls_module_base_symbol =
+	gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+      SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol)
+        |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+    }
+
+  return xtensa_tls_module_base_symbol;
+}
+
+
+static rtx
+xtensa_call_tls_desc (rtx sym, rtx *retp)
+{
+  rtx fn, arg, a10, call_insn, insns;
+
+  start_sequence ();
+  fn = gen_reg_rtx (Pmode);
+  arg = gen_reg_rtx (Pmode);
+  a10 = gen_rtx_REG (Pmode, 10);
+
+  emit_insn (gen_tls_func (fn, sym));
+  emit_insn (gen_tls_arg (arg, sym));
+  emit_move_insn (a10, arg);
+  call_insn = emit_call_insn (gen_tls_call (a10, fn, sym, const1_rtx));
+  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), a10);
+  insns = get_insns ();
+  end_sequence ();
+
+  *retp = a10;
+  return insns;
+}
+
+
+static rtx
+xtensa_legitimize_tls_address (rtx x)
+{
+  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+  rtx dest, tp, ret, modbase, base, addend, insns;
+
+  dest = gen_reg_rtx (Pmode);
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      insns = xtensa_call_tls_desc (x, &ret);
+      emit_libcall_block (insns, dest, ret, x);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      base = gen_reg_rtx (Pmode);
+      modbase = xtensa_tls_module_base ();
+      insns = xtensa_call_tls_desc (modbase, &ret);
+      emit_libcall_block (insns, base, ret, modbase);
+      addend = force_reg (SImode, gen_sym_DTPOFF (x));
+      emit_insn (gen_addsi3 (dest, base, addend));
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = gen_reg_rtx (SImode);
+      emit_insn (gen_get_thread_pointersi (tp));
+      addend = force_reg (SImode, gen_sym_TPOFF (x));
+      emit_insn (gen_addsi3 (dest, tp, addend));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+
+rtx
+xtensa_legitimize_address (rtx x,
+			   rtx oldx ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  if (xtensa_tls_symbol_p (x))
+    return xtensa_legitimize_tls_address (x);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx plus0 = XEXP (x, 0);
+      rtx plus1 = XEXP (x, 1);
+
+      if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG)
+	{
+	  plus0 = XEXP (x, 1);
+	  plus1 = XEXP (x, 0);
+	}
+
+      /* Try to split up the offset to use an ADDMI instruction.  */
+      if (GET_CODE (plus0) == REG
+	  && GET_CODE (plus1) == CONST_INT
+	  && !xtensa_mem_offset (INTVAL (plus1), mode)
+	  && !xtensa_simm8 (INTVAL (plus1))
+	  && xtensa_mem_offset (INTVAL (plus1) & 0xff, mode)
+	  && xtensa_simm8x256 (INTVAL (plus1) & ~0xff))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx addmi_offset = GEN_INT (INTVAL (plus1) & ~0xff);
+	  emit_insn (gen_rtx_SET (Pmode, temp,
+				  gen_rtx_PLUS (Pmode, plus0, addmi_offset)));
+	  return gen_rtx_PLUS (Pmode, temp, GEN_INT (INTVAL (plus1) & 0xff));
+	}
+    }
+
+  return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+   Treat constant-pool references as "mode dependent" since they can
+   only be accessed with SImode loads.  This works around a bug in the
+   combiner where a constant pool reference is temporarily converted
+   to an HImode load, which is then assumed to zero-extend based on
+   our definition of LOAD_EXTEND_OP.  This is wrong because the high
+   bits of a 16-bit value in the constant pool are now sign-extended
+   by default.  */
+
+static bool
+xtensa_mode_dependent_address_p (const_rtx addr,
+				 addr_space_t as ATTRIBUTE_UNUSED)
+{
+  return constantpool_address_p (addr);
+}
+
+/* Helper for xtensa_tls_referenced_p.  */
+
+static int
+xtensa_tls_referenced_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Ignore TLS references that have already been legitimized.  */
+  if (GET_CODE (*x) == UNSPEC)
+    {
+      switch (XINT (*x, 1))
+	{
+	case UNSPEC_TPOFF:
+	case UNSPEC_DTPOFF:
+	case UNSPEC_TLS_FUNC:
+	case UNSPEC_TLS_ARG:
+	case UNSPEC_TLS_CALL:
+	  return -1;
+	default:
+	  break;
+	}
+    }
+
+  return 0;
+}
+
+
+/* Return TRUE if X contains any TLS symbol references.  */
+
+bool
+xtensa_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, xtensa_tls_referenced_p_1, NULL);
+}
+
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+xtensa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return xtensa_tls_referenced_p (x);
+}
+
+
+/* Return the debugger register number to use for 'regno'.  */
+
+int
+xtensa_dbx_register_number (int regno)
+{
+  int first = -1;
+
+  if (GP_REG_P (regno))
+    {
+      regno -= GP_REG_FIRST;
+      first = 0;
+    }
+  else if (BR_REG_P (regno))
+    {
+      regno -= BR_REG_FIRST;
+      first = 16;
+    }
+  else if (FP_REG_P (regno))
+    {
+      regno -= FP_REG_FIRST;
+      first = 48;
+    }
+  else if (ACC_REG_P (regno))
+    {
+      first = 0x200;	/* Start of Xtensa special registers.  */
+      regno = 16;	/* ACCLO is special register 16.  */
+    }
+
+  /* When optimizing, we sometimes get asked about pseudo-registers
+     that don't represent hard registers.  Return 0 for these.  */
+  if (first == -1)
+    return 0;
+
+  return first + regno;
+}
+
+
+/* Argument support functions.  */
+
+/* Initialize CUMULATIVE_ARGS for a function.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, int incoming)
+{
+  cum->arg_words = 0;
+  cum->incoming = incoming;
+}
+
+
+/* Advance the argument to the next argument position.  */
+
+static void
+xtensa_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
+			     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words, max;
+  int *arg_words;
+
+  arg_words = &get_cumulative_args (cum)->arg_words;
+  max = MAX_ARGS_IN_REGISTERS;
+
+  words = (((mode != BLKmode)
+	    ? (int) GET_MODE_SIZE (mode)
+	    : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (*arg_words < max
+      && (targetm.calls.must_pass_in_stack (mode, type)
+	  || *arg_words + words > max))
+    *arg_words = max;
+
+  *arg_words += words;
+}
+
+
+/* Return an RTL expression containing the register for the given mode,
+   or 0 if the argument is to be passed on the stack.  INCOMING_P is nonzero
+   if this is an incoming argument to the current function.  */
+
+static rtx
+xtensa_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
+		       const_tree type, bool incoming_p)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int regbase, words, max;
+  int *arg_words;
+  int regno;
+
+  arg_words = &cum->arg_words;
+  regbase = (incoming_p ? GP_ARG_FIRST : GP_OUTGOING_ARG_FIRST);
+  max = MAX_ARGS_IN_REGISTERS;
+
+  words = (((mode != BLKmode)
+	    ? (int) GET_MODE_SIZE (mode)
+	    : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (type && (TYPE_ALIGN (type) > BITS_PER_WORD))
+    {
+      int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_WORD;
+      *arg_words = (*arg_words + align - 1) & -align;
+    }
+
+  if (*arg_words + words > max)
+    return (rtx)0;
+
+  regno = regbase + *arg_words;
+
+  if (cum->incoming && regno <= A7_REG && regno + words > A7_REG)
+    cfun->machine->need_a7_copy = true;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+xtensa_function_arg (cumulative_args_t cum, enum machine_mode mode,
+		     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return xtensa_function_arg_1 (cum, mode, type, false);
+}
+
+/* Implement TARGET_FUNCTION_INCOMING_ARG.  */
+
+static rtx
+xtensa_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
+			      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return xtensa_function_arg_1 (cum, mode, type, true);
+}
+
+static unsigned int
+xtensa_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+
+static bool
+xtensa_return_in_msb (const_tree valtype)
+{
+  return (TARGET_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && int_size_in_bytes (valtype) >= UNITS_PER_WORD);
+}
+
+
+static void
+xtensa_option_override (void)
+{
+  int regno;
+  enum machine_mode mode;
+
+  if (!TARGET_BOOLEANS && TARGET_HARD_FLOAT)
+    error ("boolean registers required for the floating-point option");
+
+  /* Set up array giving whether a given register can hold a given mode.  */
+  for (mode = VOIDmode;
+       mode != MAX_MACHINE_MODE;
+       mode = (enum machine_mode) ((int) mode + 1))
+    {
+      int size = GET_MODE_SIZE (mode);
+      enum mode_class mclass = GET_MODE_CLASS (mode);
+
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	{
+	  int temp;
+
+	  if (ACC_REG_P (regno))
+	    temp = (TARGET_MAC16
+		    && (mclass == MODE_INT) && (size <= UNITS_PER_WORD));
+	  else if (GP_REG_P (regno))
+	    temp = ((regno & 1) == 0 || (size <= UNITS_PER_WORD));
+	  else if (FP_REG_P (regno))
+	    temp = (TARGET_HARD_FLOAT && (mode == SFmode));
+	  else if (BR_REG_P (regno))
+	    temp = (TARGET_BOOLEANS && (mode == CCmode));
+	  else
+	    temp = FALSE;
+
+	  xtensa_hard_regno_mode_ok[(int) mode][regno] = temp;
+	}
+    }
+
+  init_machine_status = xtensa_init_machine_status;
+
+  /* Check PIC settings.  PIC is only supported when using L32R
+     instructions, and some targets need to always use PIC.  */
+  if (flag_pic && TARGET_CONST16)
+    error ("-f%s is not supported with CONST16 instructions",
+	   (flag_pic > 1 ? "PIC" : "pic"));
+  else if (TARGET_FORCE_NO_PIC)
+    flag_pic = 0;
+  else if (XTENSA_ALWAYS_PIC)
+    {
+      if (TARGET_CONST16)
+	error ("PIC is required but not supported with CONST16 instructions");
+      flag_pic = 1;
+    }
+  /* There's no need for -fPIC (as opposed to -fpic) on Xtensa.  */
+  if (flag_pic > 1)
+    flag_pic = 1;
+  if (flag_pic && !flag_pie)
+    flag_shlib = 1;
+
+  /* Hot/cold partitioning does not work on this architecture, because of
+     constant pools (the load instruction cannot necessarily reach that far).
+     Therefore disable it on this architecture.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  X is an RTL
+   expression.
+
+   CODE is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  CODE
+   comes from the '%' specification that was used to request
+   printing of the operand.  If the specification was just '%DIGIT'
+   then CODE is 0; if the specification was '%LTR DIGIT' then CODE
+   is the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.
+   The names can be found in an array 'reg_names' whose type is
+   'char *[]'.  'reg_names' is initialized from 'REGISTER_NAMES'.
+
+   When the machine description has a specification '%PUNCT' (a '%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for CODE.
+
+   'a', 'c', 'l', and 'n' are reserved.
+
+   The Xtensa specific codes are:
+
+   'd'  CONST_INT, print as signed decimal
+   'x'  CONST_INT, print as signed hexadecimal
+   'K'  CONST_INT, print number of bits in mask for EXTUI
+   'R'  CONST_INT, print (X & 0x1f)
+   'L'  CONST_INT, print ((32 - X) & 0x1f)
+   'D'  REG, print second register of double-word register operand
+   'N'  MEM, print address of next word following a memory operand
+   'v'  MEM, if memory reference is volatile, output a MEMW before it
+   't'  any constant, add "@h" suffix for top 16 bits
+   'b'  any constant, add "@l" suffix for bottom 16 bits
+*/
+
+static void
+printx (FILE *file, signed int val)
+{
+  /* Print a hexadecimal value in a nice way.  */
+  if ((val > -0xa) && (val < 0xa))
+    fprintf (file, "%d", val);
+  else if (val < 0)
+    fprintf (file, "-0x%x", -val);
+  else
+    fprintf (file, "0x%x", val);
+}
+
+
+void
+print_operand (FILE *file, rtx x, int letter)
+{
+  if (!x)
+    error ("PRINT_OPERAND null pointer");
+
+  switch (letter)
+    {
+    case 'D':
+      if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	fprintf (file, "%s", reg_names[xt_true_regnum (x) + 1]);
+      else
+	output_operand_lossage ("invalid %%D value");
+      break;
+
+    case 'v':
+      if (GET_CODE (x) == MEM)
+	{
+	  /* For a volatile memory reference, emit a MEMW before the
+	     load or store.  */
+	  if (MEM_VOLATILE_P (x) && TARGET_SERIALIZE_VOLATILE)
+	    fprintf (file, "memw\n\t");
+	}
+      else
+	output_operand_lossage ("invalid %%v value");
+      break;
+
+    case 'N':
+      if (GET_CODE (x) == MEM
+	  && (GET_MODE (x) == DFmode || GET_MODE (x) == DImode))
+	{
+	  x = adjust_address (x, GET_MODE (x) == DFmode ? SFmode : SImode, 4);
+	  output_address (XEXP (x, 0));
+	}
+      else
+	output_operand_lossage ("invalid %%N value");
+      break;
+
+    case 'K':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  int num_bits = 0;
+	  unsigned val = INTVAL (x);
+	  while (val & 1)
+	    {
+	      num_bits += 1;
+	      val = val >> 1;
+	    }
+	  if ((val != 0) || (num_bits == 0) || (num_bits > 16))
+	    fatal_insn ("invalid mask", x);
+
+	  fprintf (file, "%d", num_bits);
+	}
+      else
+	output_operand_lossage ("invalid %%K value");
+      break;
+
+    case 'L':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", (32 - INTVAL (x)) & 0x1f);
+      else
+	output_operand_lossage ("invalid %%L value");
+      break;
+
+    case 'R':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x) & 0x1f);
+      else
+	output_operand_lossage ("invalid %%R value");
+      break;
+
+    case 'x':
+      if (GET_CODE (x) == CONST_INT)
+	printx (file, INTVAL (x));
+      else
+	output_operand_lossage ("invalid %%x value");
+      break;
+
+    case 'd':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x));
+      else
+	output_operand_lossage ("invalid %%d value");
+      break;
+
+    case 't':
+    case 'b':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  printx (file, INTVAL (x));
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	}
+      else if (GET_CODE (x) == CONST_DOUBLE)
+	{
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	  if (GET_MODE (x) == SFmode)
+	    {
+	      long l;
+	      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+	      fprintf (file, "0x%08lx@%c", l, letter == 't' ? 'h' : 'l');
+	    }
+	  else
+	    output_operand_lossage ("invalid %%t/%%b value");
+	}
+      else if (GET_CODE (x) == CONST)
+	{
+	  /* X must be a symbolic constant on ELF.  Write an expression
+	     suitable for 'const16' that sets the high or low 16 bits.  */
+	  if (GET_CODE (XEXP (x, 0)) != PLUS
+	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
+	      || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+	    output_operand_lossage ("invalid %%t/%%b value");
+	  print_operand (file, XEXP (XEXP (x, 0), 0), 0);
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	  /* There must be a non-alphanumeric character between 'h' or 'l'
+	     and the number.  The '-' is added by print_operand() already.  */
+	  if (INTVAL (XEXP (XEXP (x, 0), 1)) >= 0)
+	    fputs ("+", file);
+	  print_operand (file, XEXP (XEXP (x, 0), 1), 0);
+	}
+      else
+	{
+	  output_addr_const (file, x);
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	}
+      break;
+
+    default:
+      if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	fprintf (file, "%s", reg_names[xt_true_regnum (x)]);
+      else if (GET_CODE (x) == MEM)
+	output_address (XEXP (x, 0));
+      else if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x));
+      else
+	output_addr_const (file, x);
+    }
+}
+
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  if (!addr)
+    error ("PRINT_OPERAND_ADDRESS, null pointer");
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      fatal_insn ("invalid address", addr);
+      break;
+
+    case REG:
+      fprintf (file, "%s, 0", reg_names [REGNO (addr)]);
+      break;
+
+    case PLUS:
+      {
+	rtx reg = (rtx)0;
+	rtx offset = (rtx)0;
+	rtx arg0 = XEXP (addr, 0);
+	rtx arg1 = XEXP (addr, 1);
+
+	if (GET_CODE (arg0) == REG)
+	  {
+	    reg = arg0;
+	    offset = arg1;
+	  }
+	else if (GET_CODE (arg1) == REG)
+	  {
+	    reg = arg1;
+	    offset = arg0;
+	  }
+	else
+	  fatal_insn ("no register in address", addr);
+
+	if (CONSTANT_P (offset))
+	  {
+	    fprintf (file, "%s, ", reg_names [REGNO (reg)]);
+	    output_addr_const (file, offset);
+	  }
+	else
+	  fatal_insn ("address offset not a constant", addr);
+      }
+      break;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_INT:
+    case CONST:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+xtensa_output_addr_const_extra (FILE *fp, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_TPOFF:
+	  output_addr_const (fp, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", fp);
+	  return true;
+	case UNSPEC_DTPOFF:
+	  output_addr_const (fp, XVECEXP (x, 0, 0));
+	  fputs ("@DTPOFF", fp);
+	  return true;
+	case UNSPEC_PLT:
+	  if (flag_pic)
+	    {
+	      output_addr_const (fp, XVECEXP (x, 0, 0));
+	      fputs ("@PLT", fp);
+	      return true;
+	    }
+	  break;
+	default:
+	  break;
+	}
+    }
+  return false;
+}
+
+
+void
+xtensa_output_literal (FILE *file, rtx x, enum machine_mode mode, int labelno)
+{
+  long value_long[2];
+  REAL_VALUE_TYPE r;
+  int size;
+  rtx first, second;
+
+  fprintf (file, "\t.literal .LC%u, ", (unsigned) labelno);
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_FLOAT:
+      gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      switch (mode)
+	{
+	case SFmode:
+	  REAL_VALUE_TO_TARGET_SINGLE (r, value_long[0]);
+	  if (HOST_BITS_PER_LONG > 32)
+	    value_long[0] &= 0xffffffff;
+	  fprintf (file, "0x%08lx\n", value_long[0]);
+	  break;
+
+	case DFmode:
+	  REAL_VALUE_TO_TARGET_DOUBLE (r, value_long);
+	  if (HOST_BITS_PER_LONG > 32)
+	    {
+	      value_long[0] &= 0xffffffff;
+	      value_long[1] &= 0xffffffff;
+	    }
+	  fprintf (file, "0x%08lx, 0x%08lx\n",
+		   value_long[0], value_long[1]);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      break;
+
+    case MODE_INT:
+    case MODE_PARTIAL_INT:
+      size = GET_MODE_SIZE (mode);
+      switch (size)
+	{
+	case 4:
+	  output_addr_const (file, x);
+	  fputs ("\n", file);
+	  break;
+
+	case 8:
+	  split_double (x, &first, &second);
+	  output_addr_const (file, first);
+	  fputs (", ", file);
+	  output_addr_const (file, second);
+	  fputs ("\n", file);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  */
+
+#define STACK_BYTES (STACK_BOUNDARY / BITS_PER_UNIT)
+#define XTENSA_STACK_ALIGN(LOC) (((LOC) + STACK_BYTES-1) & ~(STACK_BYTES-1))
+
+long
+compute_frame_size (int size)
+{
+  /* Add space for the incoming static chain value.  */
+  if (cfun->static_chain_decl != NULL)
+    size += (1 * UNITS_PER_WORD);
+
+  xtensa_current_frame_size =
+    XTENSA_STACK_ALIGN (size
+			+ crtl->outgoing_args_size
+			+ (WINDOW_SIZE * UNITS_PER_WORD));
+  return xtensa_current_frame_size;
+}
+
+
+bool
+xtensa_frame_pointer_required (void)
+{
+  /* The code to expand builtin_frame_addr and builtin_return_addr
+     currently uses the hard_frame_pointer instead of frame_pointer.
+     This seems wrong but maybe it's necessary for other architectures.
+     This function is derived from the i386 code.  */
+
+  if (cfun->machine->accesses_prev_frame)
+    return true;
+
+  return false;
+}
+
+
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+   and the total number of words must be a multiple of 128 bits.  */
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
+void
+xtensa_expand_prologue (void)
+{
+  HOST_WIDE_INT total_size;
+  rtx size_rtx;
+  rtx insn, note_rtx;
+
+  total_size = compute_frame_size (get_frame_size ());
+  size_rtx = GEN_INT (total_size);
+
+  if (total_size < (1 << (12+3)))
+    insn = emit_insn (gen_entry (size_rtx));
+  else
+    {
+      /* Use a8 as a temporary since a0-a7 may be live.  */
+      rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG);
+      emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE)));
+      emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE));
+      emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg));
+      insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg));
+    }
+
+  if (frame_pointer_needed)
+    {
+      if (cfun->machine->set_frame_ptr_insn)
+	{
+	  rtx first;
+
+	  push_topmost_sequence ();
+	  first = get_insns ();
+	  pop_topmost_sequence ();
+
+	  /* For all instructions prior to set_frame_ptr_insn, replace
+	     hard_frame_pointer references with stack_pointer.  */
+	  for (insn = first;
+	       insn != cfun->machine->set_frame_ptr_insn;
+	       insn = NEXT_INSN (insn))
+	    {
+	      if (INSN_P (insn))
+		{
+		  PATTERN (insn) = replace_rtx (copy_rtx (PATTERN (insn)),
+						hard_frame_pointer_rtx,
+						stack_pointer_rtx);
+		  df_insn_rescan (insn);
+		}
+	    }
+	}
+      else
+	insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				     stack_pointer_rtx));
+    }
+
+  /* Create a note to describe the CFA.  Because this is only used to set
+     DW_AT_frame_base for debug info, don't bother tracking changes through
+     each instruction in the prologue.  It just takes up space.  */
+  note_rtx = gen_rtx_SET (VOIDmode, (frame_pointer_needed
+				     ? hard_frame_pointer_rtx
+				     : stack_pointer_rtx),
+			  plus_constant (Pmode, stack_pointer_rtx,
+					 -total_size));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+
+/* Clear variables at function end.  */
+
+void
+xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  xtensa_current_frame_size = 0;
+}
+
+
+rtx
+xtensa_return_addr (int count, rtx frame)
+{
+  rtx result, retaddr, curaddr, label;
+
+  if (count == -1)
+    retaddr = gen_rtx_REG (Pmode, A0_REG);
+  else
+    {
+      rtx addr = plus_constant (Pmode, frame, -4 * UNITS_PER_WORD);
+      addr = memory_address (Pmode, addr);
+      retaddr = gen_reg_rtx (Pmode);
+      emit_move_insn (retaddr, gen_rtx_MEM (Pmode, addr));
+    }
+
+  /* The 2 most-significant bits of the return address on Xtensa hold
+     the register window size.  To get the real return address, these
+     bits must be replaced with the high bits from some address in the
+     code.  */
+
+  /* Get the 2 high bits of a local label in the code.  */
+  curaddr = gen_reg_rtx (Pmode);
+  label = gen_label_rtx ();
+  emit_label (label);
+  LABEL_PRESERVE_P (label) = 1;
+  emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
+  emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
+  emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
+
+  /* Clear the 2 high bits of the return address.  */
+  result = gen_reg_rtx (Pmode);
+  emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
+  emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
+
+  /* Combine them to get the result.  */
+  emit_insn (gen_iorsi3 (result, result, curaddr));
+  return result;
+}
+
+/* Disable the use of word-sized or smaller complex modes for structures,
+   and for function arguments in particular, where they cause problems with
+   register a7.  The xtensa_copy_incoming_a7 function assumes that there is
+   a single reference to an argument in a7, but with small complex modes the
+   real and imaginary components may be extracted separately, leading to two
+   uses of the register, only one of which would be replaced.  */
+
+static bool
+xtensa_member_type_forces_blk (const_tree, enum machine_mode mode)
+{
+  return mode == CQImode || mode == CHImode;
+}
+
+/* Create the va_list data type.
+
+   This structure is set up by __builtin_saveregs.  The __va_reg field
+   points to a stack-allocated region holding the contents of the
+   incoming argument registers.  The __va_ndx field is an index
+   initialized to the position of the first unnamed (variable)
+   argument.  This same index is also used to address the arguments
+   passed in memory.  Thus, the __va_stk field is initialized to point
+   to the position of the first argument in memory offset to account
+   for the arguments passed in registers and to account for the size
+   of the argument registers not being 16-byte aligned.  E.G., there
+   are 6 argument registers of 4 bytes each, but we want the __va_ndx
+   for the first stack argument to have the maximal alignment of 16
+   bytes, so we offset the __va_stk address by 32 bytes so that
+   __va_stk[32] references the first argument on the stack.  */
+
+static tree
+xtensa_build_builtin_va_list (void)
+{
+  tree f_stk, f_reg, f_ndx, record, type_decl;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_stk = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_stk"),
+		      ptr_type_node);
+  f_reg = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_reg"),
+		      ptr_type_node);
+  f_ndx = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_ndx"),
+		      integer_type_node);
+
+  DECL_FIELD_CONTEXT (f_stk) = record;
+  DECL_FIELD_CONTEXT (f_reg) = record;
+  DECL_FIELD_CONTEXT (f_ndx) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_stk;
+  DECL_CHAIN (f_stk) = f_reg;
+  DECL_CHAIN (f_reg) = f_ndx;
+
+  layout_type (record);
+  return record;
+}
+
+
+/* Save the incoming argument registers on the stack.  Returns the
+   address of the saved registers.  */
+
+static rtx
+xtensa_builtin_saveregs (void)
+{
+  rtx gp_regs;
+  int arg_words = crtl->args.info.arg_words;
+  int gp_left = MAX_ARGS_IN_REGISTERS - arg_words;
+
+  if (gp_left <= 0)
+    return const0_rtx;
+
+  /* Allocate the general-purpose register space.  */
+  gp_regs = assign_stack_local
+    (BLKmode, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD, -1);
+  set_mem_alias_set (gp_regs, get_varargs_alias_set ());
+
+  /* Now store the incoming registers.  */
+  cfun->machine->need_a7_copy = true;
+  cfun->machine->vararg_a7 = true;
+  move_block_from_reg (GP_ARG_FIRST + arg_words,
+		       adjust_address (gp_regs, BLKmode,
+				       arg_words * UNITS_PER_WORD),
+		       gp_left);
+  gcc_assert (cfun->machine->vararg_a7_copy != 0);
+  emit_insn_before (cfun->machine->vararg_a7_copy, get_insns ());
+
+  return XEXP (gp_regs, 0);
+}
+
+
+/* Implement `va_start' for varargs and stdarg.  We look at the
+   current function to fill in an initial va_list.  */
+
+static void
+xtensa_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_stk, stk;
+  tree f_reg, reg;
+  tree f_ndx, ndx;
+  tree t, u;
+  int arg_words;
+
+  arg_words = crtl->args.info.arg_words;
+
+  f_stk = TYPE_FIELDS (va_list_type_node);
+  f_reg = DECL_CHAIN (f_stk);
+  f_ndx = DECL_CHAIN (f_reg);
+
+  stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE);
+  reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+		f_reg, NULL_TREE);
+  ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+		f_ndx, NULL_TREE);
+
+  /* Call __builtin_saveregs; save the result in __va_reg */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, reg, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Set the __va_stk member to ($arg_ptr - 32).  */
+  u = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+  u = fold_build_pointer_plus_hwi (u, -32);
+  t = build2 (MODIFY_EXPR, ptr_type_node, stk, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Set the __va_ndx member.  If the first variable argument is on
+     the stack, adjust __va_ndx by 2 words to account for the extra
+     alignment offset for __va_stk.  */
+  if (arg_words >= MAX_ARGS_IN_REGISTERS)
+    arg_words += 2;
+  t = build2 (MODIFY_EXPR, integer_type_node, ndx,
+	      build_int_cst (integer_type_node, arg_words * UNITS_PER_WORD));
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Implement `va_arg'.  */
+
+static tree
+xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			     gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_stk, stk;
+  tree f_reg, reg;
+  tree f_ndx, ndx;
+  tree type_size, array, orig_ndx, addr, size, va_size, t;
+  tree lab_false, lab_over, lab_false2;
+  bool indirect;
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type (type);
+
+  /* Handle complex values as separate real and imaginary parts.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      tree real_part, imag_part;
+
+      real_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type),
+					       pre_p, NULL);
+      real_part = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+      imag_part = xtensa_gimplify_va_arg_expr (unshare_expr (valist),
+					       TREE_TYPE (type),
+					       pre_p, NULL);
+      imag_part = get_initialized_tmp_var (imag_part, pre_p, NULL);
+
+      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+    }
+
+  f_stk = TYPE_FIELDS (va_list_type_node);
+  f_reg = DECL_CHAIN (f_stk);
+  f_ndx = DECL_CHAIN (f_reg);
+
+  stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist,
+		f_stk, NULL_TREE);
+  reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+		f_reg, NULL_TREE);
+  ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+		f_ndx, NULL_TREE);
+
+  type_size = size_in_bytes (type);
+  va_size = round_up (type_size, UNITS_PER_WORD);
+  gimplify_expr (&va_size, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+
+  /* First align __va_ndx if necessary for this arg:
+
+     orig_ndx = (AP).__va_ndx;
+     if (__alignof__ (TYPE) > 4 )
+       orig_ndx = ((orig_ndx + __alignof__ (TYPE) - 1)
+			& -__alignof__ (TYPE)); */
+
+  orig_ndx = get_initialized_tmp_var (ndx, pre_p, NULL);
+
+  if (TYPE_ALIGN (type) > BITS_PER_WORD)
+    {
+      int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_UNIT;
+
+      t = build2 (PLUS_EXPR, integer_type_node, unshare_expr (orig_ndx),
+		  build_int_cst (integer_type_node, align - 1));
+      t = build2 (BIT_AND_EXPR, integer_type_node, t,
+		  build_int_cst (integer_type_node, -align));
+      gimplify_assign (unshare_expr (orig_ndx), t, pre_p);
+    }
+
+
+  /* Increment __va_ndx to point past the argument:
+
+     (AP).__va_ndx = orig_ndx + __va_size (TYPE); */
+
+  t = fold_convert (integer_type_node, va_size);
+  t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, t);
+  gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+
+  /* Check if the argument is in registers:
+
+     if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4
+         && !must_pass_in_stack (type))
+        __array = (AP).__va_reg; */
+
+  array = create_tmp_var (ptr_type_node, NULL);
+
+  lab_over = NULL;
+  if (!targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
+    {
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      t = build2 (GT_EXPR, boolean_type_node, unshare_expr (ndx),
+		  build_int_cst (integer_type_node,
+				 MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+      t = build3 (COND_EXPR, void_type_node, t,
+		  build1 (GOTO_EXPR, void_type_node, lab_false),
+		  NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      gimplify_assign (unshare_expr (array), reg, pre_p);
+
+      t = build1 (GOTO_EXPR, void_type_node, lab_over);
+      gimplify_and_add (t, pre_p);
+
+      t = build1 (LABEL_EXPR, void_type_node, lab_false);
+      gimplify_and_add (t, pre_p);
+    }
+
+
+  /* ...otherwise, the argument is on the stack (never split between
+     registers and the stack -- change __va_ndx if necessary):
+
+     else
+       {
+	 if (orig_ndx <= __MAX_ARGS_IN_REGISTERS * 4)
+	     (AP).__va_ndx = 32 + __va_size (TYPE);
+	 __array = (AP).__va_stk;
+       } */
+
+  lab_false2 = create_artificial_label (UNKNOWN_LOCATION);
+
+  t = build2 (GT_EXPR, boolean_type_node, unshare_expr (orig_ndx),
+	      build_int_cst (integer_type_node,
+			     MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+  t = build3 (COND_EXPR, void_type_node, t,
+	      build1 (GOTO_EXPR, void_type_node, lab_false2),
+	      NULL_TREE);
+  gimplify_and_add (t, pre_p);
+
+  t = size_binop (PLUS_EXPR, unshare_expr (va_size), size_int (32));
+  t = fold_convert (integer_type_node, t);
+  gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+  t = build1 (LABEL_EXPR, void_type_node, lab_false2);
+  gimplify_and_add (t, pre_p);
+
+  gimplify_assign (array, stk, pre_p);
+
+  if (lab_over)
+    {
+      t = build1 (LABEL_EXPR, void_type_node, lab_over);
+      gimplify_and_add (t, pre_p);
+    }
+
+
+  /* Given the base array pointer (__array) and index to the subsequent
+     argument (__va_ndx), find the address:
+
+     __array + (AP).__va_ndx - (BYTES_BIG_ENDIAN && sizeof (TYPE) < 4
+				? sizeof (TYPE)
+				: __va_size (TYPE))
+
+     The results are endian-dependent because values smaller than one word
+     are aligned differently.  */
+
+
+  if (BYTES_BIG_ENDIAN && TREE_CODE (type_size) == INTEGER_CST)
+    {
+      t = fold_build2 (GE_EXPR, boolean_type_node, unshare_expr (type_size),
+		       size_int (PARM_BOUNDARY / BITS_PER_UNIT));
+      t = fold_build3 (COND_EXPR, sizetype, t, unshare_expr (va_size),
+		       unshare_expr (type_size));
+      size = t;
+    }
+  else
+    size = unshare_expr (va_size);
+
+  t = fold_convert (sizetype, unshare_expr (ndx));
+  t = build2 (MINUS_EXPR, sizetype, t, size);
+  addr = fold_build_pointer_plus (unshare_expr (array), t);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+  if (indirect)
+    addr = build_va_arg_indirect_ref (addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+
+/* Builtins.  */
+
+enum xtensa_builtin
+{
+  XTENSA_BUILTIN_UMULSIDI3,
+  XTENSA_BUILTIN_max
+};
+
+
+static void
+xtensa_init_builtins (void)
+{
+  tree ftype, decl;
+
+  ftype = build_function_type_list (unsigned_intDI_type_node,
+				    unsigned_intSI_type_node,
+				    unsigned_intSI_type_node, NULL_TREE);
+
+  decl = add_builtin_function ("__builtin_umulsidi3", ftype,
+			       XTENSA_BUILTIN_UMULSIDI3, BUILT_IN_MD,
+			       "__umulsidi3", NULL_TREE);
+  TREE_NOTHROW (decl) = 1;
+  TREE_READONLY (decl) = 1;
+}
+
+
+static tree
+xtensa_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+		     bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1;
+
+  switch (fcode)
+    {
+    case XTENSA_BUILTIN_UMULSIDI3:
+      arg0 = args[0];
+      arg1 = args[1];
+      if ((TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST)
+	  || TARGET_MUL32_HIGH)
+	return fold_build2 (MULT_EXPR, unsigned_intDI_type_node,
+			    fold_convert (unsigned_intDI_type_node, arg0),
+			    fold_convert (unsigned_intDI_type_node, arg1));
+      break;
+
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+
+  return NULL;
+}
+
+
+static rtx
+xtensa_expand_builtin (tree exp, rtx target,
+		       rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case XTENSA_BUILTIN_UMULSIDI3:
+      /* The umulsidi3 builtin is just a mechanism to avoid calling the real
+	 __umulsidi3 function when the Xtensa configuration can directly
+	 implement it.  If not, just call the function.  */
+      return expand_call (exp, target, ignore);
+
+    default:
+      internal_error ("bad builtin code");
+    }
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (CONSTANT_P (x) && CONST_DOUBLE_P (x))
+    return NO_REGS;
+
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED,
+				      reg_class_t rclass)
+{
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+			 enum machine_mode mode, secondary_reload_info *sri)
+{
+  int regno;
+
+  if (in_p && constantpool_mem_p (x))
+    {
+      if (rclass == FP_REGS)
+	return RL_REGS;
+
+      if (mode == QImode)
+	sri->icode = CODE_FOR_reloadqi_literal;
+      else if (mode == HImode)
+	sri->icode = CODE_FOR_reloadhi_literal;
+    }
+
+  regno = xt_true_regnum (x);
+  if (ACC_REG_P (regno))
+    return ((rclass == GR_REGS || rclass == RL_REGS) ? NO_REGS : RL_REGS);
+  if (rclass == ACC_REG)
+    return (GP_REG_P (regno) ? NO_REGS : RL_REGS);
+
+  return NO_REGS;
+}
+
+
+void
+order_regs_for_local_alloc (void)
+{
+  if (!leaf_function_p ())
+    {
+      memcpy (reg_alloc_order, reg_nonleaf_alloc_order,
+	      FIRST_PSEUDO_REGISTER * sizeof (int));
+    }
+  else
+    {
+      int i, num_arg_regs;
+      int nxt = 0;
+
+      /* Use the AR registers in increasing order (skipping a0 and a1)
+	 but save the incoming argument registers for a last resort.  */
+      num_arg_regs = crtl->args.info.arg_words;
+      if (num_arg_regs > MAX_ARGS_IN_REGISTERS)
+	num_arg_regs = MAX_ARGS_IN_REGISTERS;
+      for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++)
+	reg_alloc_order[nxt++] = i + num_arg_regs;
+      for (i = 0; i < num_arg_regs; i++)
+	reg_alloc_order[nxt++] = GP_ARG_FIRST + i;
+
+      /* List the coprocessor registers in order.  */
+      for (i = 0; i < BR_REG_NUM; i++)
+	reg_alloc_order[nxt++] = BR_REG_FIRST + i;
+
+      /* List the FP registers in order for now.  */
+      for (i = 0; i < 16; i++)
+	reg_alloc_order[nxt++] = FP_REG_FIRST + i;
+
+      /* GCC requires that we list *all* the registers....  */
+      reg_alloc_order[nxt++] = 0;	/* a0 = return address */
+      reg_alloc_order[nxt++] = 1;	/* a1 = stack pointer */
+      reg_alloc_order[nxt++] = 16;	/* pseudo frame pointer */
+      reg_alloc_order[nxt++] = 17;	/* pseudo arg pointer */
+
+      reg_alloc_order[nxt++] = ACC_REG_FIRST;	/* MAC16 accumulator */
+    }
+}
+
+
+/* Some Xtensa targets support multiple bss sections.  If the section
+   name ends with ".bss", add SECTION_BSS to the flags.  */
+
+static unsigned int
+xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+  const char *suffix;
+
+  suffix = strrchr (name, '.');
+  if (suffix && strcmp (suffix, ".bss") == 0)
+    {
+      if (!decl || (TREE_CODE (decl) == VAR_DECL
+		    && DECL_INITIAL (decl) == NULL_TREE))
+	flags |= SECTION_BSS;  /* @nobits */
+      else
+	warning (0, "only uninitialized variables can be placed in a "
+		 ".bss section");
+    }
+
+  return flags;
+}
+
+
+/* The literal pool stays with the function.  */
+
+static section *
+xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  return function_section (current_function_decl);
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+static int
+xtensa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   reg_class_t from, reg_class_t to)
+{
+  if (from == to && from != BR_REGS && to != BR_REGS)
+    return 2;
+  else if (reg_class_subset_p (from, AR_REGS)
+	   && reg_class_subset_p (to, AR_REGS))
+    return 2;
+  else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+    return 3;
+  else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+    return 3;
+  else
+    return 10;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+xtensa_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t rclass ATTRIBUTE_UNUSED,
+			 bool in ATTRIBUTE_UNUSED)
+{
+  return 4;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+xtensa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+		  int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      switch (outer_code)
+	{
+	case SET:
+	  if (xtensa_simm12b (INTVAL (x)))
+	    {
+	      *total = 4;
+	      return true;
+	    }
+	  break;
+	case PLUS:
+	  if (xtensa_simm8 (INTVAL (x))
+	      || xtensa_simm8x256 (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case AND:
+	  if (xtensa_mask_immediate (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case COMPARE:
+	  if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case ASHIFT:
+	case ASHIFTRT:
+	case LSHIFTRT:
+	case ROTATE:
+	case ROTATERT:
+	  /* No way to tell if X is the 2nd operand so be conservative.  */
+	default: break;
+	}
+      if (xtensa_simm12b (INTVAL (x)))
+	*total = 5;
+      else if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 6;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 5;
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = 7;
+      return true;
+
+    case MEM:
+      {
+	int num_words =
+	  (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) ?  2 : 1;
+
+	if (memory_address_p (GET_MODE (x), XEXP ((x), 0)))
+	  *total = COSTS_N_INSNS (num_words);
+	else
+	  *total = COSTS_N_INSNS (2*num_words);
+	return true;
+      }
+
+    case FFS:
+    case CTZ:
+      *total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50);
+      return true;
+
+    case CLZ:
+      *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (50);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ABS:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+	else if (xmode == DFmode)
+	  *total = COSTS_N_INSNS (50);
+	else
+	  *total = COSTS_N_INSNS (4);
+	return true;
+      }
+
+    case PLUS:
+    case MINUS:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+	else if (xmode == DFmode || xmode == DImode)
+	  *total = COSTS_N_INSNS (50);
+	else
+	  *total = COSTS_N_INSNS (1);
+	return true;
+      }
+
+    case NEG:
+      *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 4 : 2);
+      return true;
+
+    case MULT:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50);
+	else if (xmode == DFmode)
+	  *total = COSTS_N_INSNS (50);
+	else if (xmode == DImode)
+	  *total = COSTS_N_INSNS (TARGET_MUL32_HIGH ? 10 : 50);
+	else if (TARGET_MUL32)
+	  *total = COSTS_N_INSNS (4);
+	else if (TARGET_MAC16)
+	  *total = COSTS_N_INSNS (16);
+	else if (TARGET_MUL16)
+	  *total = COSTS_N_INSNS (12);
+	else
+	  *total = COSTS_N_INSNS (50);
+	return true;
+      }
+
+    case DIV:
+    case MOD:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  {
+	    *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_DIV ? 8 : 50);
+	    return true;
+	  }
+	else if (xmode == DFmode)
+	  {
+	    *total = COSTS_N_INSNS (50);
+	    return true;
+	  }
+      }
+      /* Fall through.  */
+
+    case UDIV:
+    case UMOD:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == DImode)
+	  *total = COSTS_N_INSNS (50);
+	else if (TARGET_DIV32)
+	  *total = COSTS_N_INSNS (32);
+	else
+	  *total = COSTS_N_INSNS (50);
+	return true;
+      }
+
+    case SQRT:
+      if (GET_MODE (x) == SFmode)
+	*total = COSTS_N_INSNS (TARGET_HARD_FLOAT_SQRT ? 8 : 50);
+      else
+	*total = COSTS_N_INSNS (50);
+      return true;
+
+    case SMIN:
+    case UMIN:
+    case SMAX:
+    case UMAX:
+      *total = COSTS_N_INSNS (TARGET_MINMAX ? 1 : 50);
+      return true;
+
+    case SIGN_EXTRACT:
+    case SIGN_EXTEND:
+      *total = COSTS_N_INSNS (TARGET_SEXT ? 1 : 2);
+      return true;
+
+    case ZERO_EXTRACT:
+    case ZERO_EXTEND:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
+	  > 4 * UNITS_PER_WORD);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+rtx
+xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, 
+                      bool outgoing)
+{
+  return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype)
+                      && TYPE_PRECISION (valtype) < BITS_PER_WORD)
+                     ? SImode : TYPE_MODE (valtype),
+                     outgoing ? GP_OUTGOING_RETURN : GP_RETURN);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+xtensa_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT
+		       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+		      ? SImode : mode, GP_RETURN);
+}
+
+/* Worker function TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+xtensa_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == GP_RETURN);
+}
+
+/* The static chain is passed in memory.  Provide rtx giving 'mem'
+   expressions that denote where they are stored.  */
+
+static rtx
+xtensa_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p)
+{
+  rtx base = incoming_p ? arg_pointer_rtx : stack_pointer_rtx;
+  return gen_frame_mem (Pmode, plus_constant (Pmode, base,
+					      -5 * UNITS_PER_WORD));
+}
+
+
+/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
+   instruction with a minimal stack frame in order to get some free
+   registers.  Once the actual call target is known, the proper stack frame
+   size is extracted from the ENTRY instruction at the target and the
+   current frame is adjusted to match.  The trampoline then transfers
+   control to the instruction following the ENTRY at the target.  Note:
+   this assumes that the target begins with an ENTRY instruction.  */
+
+static void
+xtensa_asm_trampoline_template (FILE *stream)
+{
+  bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+
+  fprintf (stream, "\t.begin no-transform\n");
+  fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
+
+  if (use_call0)
+    {
+      /* Save the return address.  */
+      fprintf (stream, "\tmov\ta10, a0\n");
+
+      /* Use a CALL0 instruction to skip past the constants and in the
+	 process get the PC into A0.  This allows PC-relative access to
+	 the constants without relying on L32R.  */
+      fprintf (stream, "\tcall0\t.Lskipconsts\n");
+    }
+  else
+    fprintf (stream, "\tj\t.Lskipconsts\n");
+
+  fprintf (stream, "\t.align\t4\n");
+  fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
+  fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
+  fprintf (stream, ".Lskipconsts:\n");
+
+  /* Load the static chain and function address from the trampoline.  */
+  if (use_call0)
+    {
+      fprintf (stream, "\taddi\ta0, a0, 3\n");
+      fprintf (stream, "\tl32i\ta9, a0, 0\n");
+      fprintf (stream, "\tl32i\ta8, a0, 4\n");
+    }
+  else
+    {
+      fprintf (stream, "\tl32r\ta9, .Lchainval\n");
+      fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
+    }
+
+  /* Store the static chain.  */
+  fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
+
+  /* Set the proper stack pointer value.  */
+  fprintf (stream, "\tl32i\ta9, a8, 0\n");
+  fprintf (stream, "\textui\ta9, a9, %d, 12\n",
+	   TARGET_BIG_ENDIAN ? 8 : 12);
+  fprintf (stream, "\tslli\ta9, a9, 3\n");
+  fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
+  fprintf (stream, "\tsub\ta9, sp, a9\n");
+  fprintf (stream, "\tmovsp\tsp, a9\n");
+
+  if (use_call0)
+    /* Restore the return address.  */
+    fprintf (stream, "\tmov\ta0, a10\n");
+
+  /* Jump to the instruction following the ENTRY.  */
+  fprintf (stream, "\taddi\ta8, a8, 3\n");
+  fprintf (stream, "\tjx\ta8\n");
+
+  /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT.  */
+  if (use_call0)
+    fprintf (stream, "\t.byte\t0\n");
+  else
+    fprintf (stream, "\tnop\n");
+
+  fprintf (stream, "\t.end no-transform\n");
+}
+
+static void
+xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain)
+{
+  rtx func = XEXP (DECL_RTL (fndecl), 0);
+  bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+  int chain_off = use_call0 ? 12 : 8;
+  int func_off = use_call0 ? 16 : 12;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  emit_move_insn (adjust_address (m_tramp, SImode, chain_off), chain);
+  emit_move_insn (adjust_address (m_tramp, SImode, func_off), func);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
+		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+xtensa_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return !xtensa_tls_referenced_p (x);
+}
+
+#include "gt-xtensa.h"
diff --git a/gcc-4.9/gcc/config/xtensa/xtensa.h b/gcc-4.9/gcc/config/xtensa/xtensa.h
new file mode 100644
index 000000000..c4a8f8826
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/xtensa.h
@@ -0,0 +1,819 @@
+/* Definitions of Tensilica's Xtensa target machine for GNU compiler.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Get Xtensa configuration settings */
+#include "xtensa-config.h"
+
+/* External variables defined in xtensa.c.  */
+
+extern unsigned xtensa_current_frame_size;
+
+/* Macros used in the machine description to select various Xtensa
+   configuration options.  */
+#ifndef XCHAL_HAVE_MUL32_HIGH
+#define XCHAL_HAVE_MUL32_HIGH 0
+#endif
+#ifndef XCHAL_HAVE_RELEASE_SYNC
+#define XCHAL_HAVE_RELEASE_SYNC 0
+#endif
+#ifndef XCHAL_HAVE_S32C1I
+#define XCHAL_HAVE_S32C1I 0
+#endif
+#ifndef XCHAL_HAVE_THREADPTR
+#define XCHAL_HAVE_THREADPTR 0
+#endif
+#define TARGET_BIG_ENDIAN	XCHAL_HAVE_BE
+#define TARGET_DENSITY		XCHAL_HAVE_DENSITY
+#define TARGET_MAC16		XCHAL_HAVE_MAC16
+#define TARGET_MUL16		XCHAL_HAVE_MUL16
+#define TARGET_MUL32		XCHAL_HAVE_MUL32
+#define TARGET_MUL32_HIGH	XCHAL_HAVE_MUL32_HIGH
+#define TARGET_DIV32		XCHAL_HAVE_DIV32
+#define TARGET_NSA		XCHAL_HAVE_NSA
+#define TARGET_MINMAX		XCHAL_HAVE_MINMAX
+#define TARGET_SEXT		XCHAL_HAVE_SEXT
+#define TARGET_BOOLEANS		XCHAL_HAVE_BOOLEANS
+#define TARGET_HARD_FLOAT	XCHAL_HAVE_FP
+#define TARGET_HARD_FLOAT_DIV	XCHAL_HAVE_FP_DIV
+#define TARGET_HARD_FLOAT_RECIP	XCHAL_HAVE_FP_RECIP
+#define TARGET_HARD_FLOAT_SQRT	XCHAL_HAVE_FP_SQRT
+#define TARGET_HARD_FLOAT_RSQRT	XCHAL_HAVE_FP_RSQRT
+#define TARGET_ABS		XCHAL_HAVE_ABS
+#define TARGET_ADDX		XCHAL_HAVE_ADDX
+#define TARGET_RELEASE_SYNC	XCHAL_HAVE_RELEASE_SYNC
+#define TARGET_S32C1I		XCHAL_HAVE_S32C1I
+#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
+#define TARGET_THREADPTR	XCHAL_HAVE_THREADPTR
+
+#define TARGET_DEFAULT \
+  ((XCHAL_HAVE_L32R	? 0 : MASK_CONST16) |				\
+   MASK_SERIALIZE_VOLATILE)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do {									\
+    builtin_assert ("cpu=xtensa");					\
+    builtin_assert ("machine=xtensa");					\
+    builtin_define ("__xtensa__");					\
+    builtin_define ("__XTENSA__");					\
+    builtin_define ("__XTENSA_WINDOWED_ABI__");				\
+    builtin_define (TARGET_BIG_ENDIAN ? "__XTENSA_EB__" : "__XTENSA_EL__"); \
+    if (!TARGET_HARD_FLOAT)						\
+      builtin_define ("__XTENSA_SOFT_FLOAT__");				\
+  } while (0)
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define EXTRA_SPECS							\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant word of a multiword number is the lowest.  */
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+#define MIN_UNITS_PER_WORD 4
+
+/* Width of a floating point register.  */
+#define UNITS_PER_FPREG 4
+
+/* Size in bits of various types on the target machine.  */
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after 'int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
+   for QImode, because there is no 8-bit load from memory with sign
+   extension.  Otherwise, leave UNSIGNEDP alone, since Xtensa has 16-bit
+   loads both with and without sign extension.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)				\
+  do {									\
+    if (GET_MODE_CLASS (MODE) == MODE_INT				\
+	&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+      {									\
+	if ((MODE) == QImode)						\
+	  (UNSIGNEDP) = 1;						\
+	(MODE) = SImode;						\
+      }									\
+  } while (0)
+
+/* Imitate the way many other C compilers handle alignment of
+   bitfields and the structures that contain them.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Align string constants and constructors to at least a word boundary.
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that 'strcpy' calls that copy
+   constants can be done inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD						\
+	? BITS_PER_WORD							\
+	: (ALIGN))
+
+/* Align arrays, unions and records to at least a word boundary.
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that 'strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Xtensa loads are zero-extended by default.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   The fake frame pointer and argument pointer will never appear in
+   the generated code, since they will always be eliminated and replaced
+   by either the stack pointer or the hard frame pointer.
+
+   0 - 15	AR[0] - AR[15]
+   16		FRAME_POINTER (fake = initial sp)
+   17		ARG_POINTER (fake = initial sp + framesize)
+   18		BR[0] for floating-point CC
+   19 - 34	FR[0] - FR[15]
+   35		MAC16 accumulator */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+/* Return the stabs register number to use for REGNO.  */
+#define DBX_REGISTER_NUMBER(REGNO) xtensa_dbx_register_number (REGNO)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 0,								\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0,									\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1,								\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1,									\
+}
+
+/* For non-leaf procedures on Xtensa processors, the allocation order
+   is as specified below by REG_ALLOC_ORDER.  For leaf procedures, we
+   want to use the lowest numbered registers first to minimize
+   register window overflows.  However, local-alloc is not smart
+   enough to consider conflicts with incoming arguments.  If an
+   incoming argument in a2 is live throughout the function and
+   local-alloc decides to use a2, then the incoming argument must
+   either be spilled or copied to another register.  To get around
+   this, we define ADJUST_REG_ALLOC_ORDER to redefine
+   reg_alloc_order for leaf functions such that lowest numbered
+   registers are used first with the exception that the incoming
+   argument registers are not used until after other register choices
+   have been exhausted.  */
+
+#define REG_ALLOC_ORDER \
+{  8,  9, 10, 11, 12, 13, 14, 15,  7,  6,  5,  4,  3,  2, \
+  18, \
+  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \
+   0,  1, 16, 17, \
+  35, \
+}
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+/* For Xtensa, the only point of this is to prevent GCC from otherwise
+   giving preference to call-used registers.  To minimize window
+   overflows for the AR registers, we want to give preference to the
+   lower-numbered AR registers.  For other register files, which are
+   not windowed, we still prefer call-used registers, if there are any.  */
+extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER];
+#define LEAF_REGISTERS xtensa_leaf_regs
+
+/* For Xtensa, no remapping is necessary, but this macro must be
+   defined if LEAF_REGISTERS is defined.  */
+#define LEAF_REG_REMAP(REGNO) (REGNO)
+
+/* This must be declared if LEAF_REGISTERS is set.  */
+extern int leaf_function;
+
+/* Internal macros to classify a register number.  */
+
+/* 16 address registers + fake registers */
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  17
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+
+/* Coprocessor registers */
+#define BR_REG_FIRST 18
+#define BR_REG_LAST  18 
+#define BR_REG_NUM   (BR_REG_LAST - BR_REG_FIRST + 1)
+
+/* 16 floating-point registers */
+#define FP_REG_FIRST 19
+#define FP_REG_LAST  34
+#define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
+
+/* MAC16 accumulator */
+#define ACC_REG_FIRST 35
+#define ACC_REG_LAST 35
+#define ACC_REG_NUM  (ACC_REG_LAST - ACC_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define BR_REG_P(REGNO) ((unsigned) ((REGNO) - BR_REG_FIRST) < BR_REG_NUM)
+#define FP_REG_P(REGNO) ((unsigned) ((REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define ACC_REG_P(REGNO) ((unsigned) ((REGNO) - ACC_REG_FIRST) < ACC_REG_NUM)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (FP_REG_P (REGNO) ?							\
+	((GET_MODE_SIZE (MODE) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG) : \
+	((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  xtensa_hard_regno_mode_ok[(int) (MODE)][(REGNO)]
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)					\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||				\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)			\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||				\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 1)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 7)
+
+/* The register number of the frame pointer register, which is used to
+   access automatic variables in the stack frame.  For Xtensa, this
+   register never appears in the output.  It is always eliminated to
+   either the stack pointer or the hard frame pointer.  */
+#define FRAME_POINTER_REGNUM (GP_REG_FIRST + 16)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM (GP_REG_FIRST + 17)
+
+/* For now we don't try to use the full set of boolean registers.  Without
+   software pipelining of FP operations, there's not much to gain and it's
+   a real pain to get them reloaded.  */
+#define FPCC_REGNUM (BR_REG_FIRST + 0)
+
+/* It is as good or better to call a constant function address than to
+   call an address kept in a register.  */
+#define NO_FUNCTION_CSE 1
+
+/* Xtensa processors have "register windows".  GCC does not currently
+   take advantage of the possibility for variable-sized windows; instead,
+   we use a fixed window size of 8.  */
+
+#define INCOMING_REGNO(OUT)						\
+  ((GP_REG_P (OUT) &&							\
+    ((unsigned) ((OUT) - GP_REG_FIRST) >= WINDOW_SIZE)) ?		\
+   (OUT) - WINDOW_SIZE : (OUT))
+
+#define OUTGOING_REGNO(IN)						\
+  ((GP_REG_P (IN) &&							\
+    ((unsigned) ((IN) - GP_REG_FIRST) < WINDOW_SIZE)) ?			\
+   (IN) + WINDOW_SIZE : (IN))
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  */
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  BR_REGS,			/* coprocessor boolean registers */
+  FP_REGS,			/* floating point registers */
+  ACC_REG,			/* MAC16 accumulator */
+  SP_REG,			/* sp register (aka a1) */
+  RL_REGS,			/* preferred reload regs (not sp or fp) */
+  GR_REGS,			/* integer registers except sp */
+  AR_REGS,			/* all integer registers */
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES		/* max value + 1 */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS AR_REGS
+
+/* An initializer containing the names of the register classes as C
+   string constants.  These names are used in writing some of the
+   debugging dumps.  */
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "BR_REGS",								\
+  "FP_REGS",								\
+  "ACC_REG",								\
+  "SP_REG",								\
+  "RL_REGS",								\
+  "GR_REGS",								\
+  "AR_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Contents of the register classes.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if 'MASK & (1 << R)' is 1.  */
+#define REG_CLASS_CONTENTS \
+{ \
+  { 0x00000000, 0x00000000 }, /* no registers */ \
+  { 0x00040000, 0x00000000 }, /* coprocessor boolean registers */ \
+  { 0xfff80000, 0x00000007 }, /* floating-point registers */ \
+  { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \
+  { 0x00000002, 0x00000000 }, /* stack pointer register */ \
+  { 0x0000ff7d, 0x00000000 }, /* preferred reload registers */ \
+  { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \
+  { 0x0003ffff, 0x00000000 }, /* integer registers */ \
+  { 0xffffffff, 0x0000000f }  /* all registers */ \
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER];
+
+#define REGNO_REG_CLASS(REGNO) xtensa_regno_to_class[ (REGNO) ]
+
+/* Use the Xtensa AR register file for base registers.
+   No index registers.  */
+#define BASE_REG_CLASS AR_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+/* The small_register_classes_for_mode_p hook must always return true for
+   Xtrnase, because all of the 16 AR registers may be explicitly used in
+   the RTL, as either incoming or outgoing arguments.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Offset within stack frame to start allocating local variables at.  */
+#define STARTING_FRAME_OFFSET						\
+  crtl->outgoing_args_size
+
+/* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so
+   they are eliminated to either the stack pointer or hard frame pointer.  */
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,		STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM,		HARD_FRAME_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM,	STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM,	HARD_FRAME_POINTER_REGNUM}}
+
+/* Specify the initial difference between the specified pair of registers.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  do {									\
+    compute_frame_size (get_frame_size ());				\
+    switch (FROM)							\
+      {									\
+      case FRAME_POINTER_REGNUM:					\
+        (OFFSET) = 0;							\
+	break;								\
+      case ARG_POINTER_REGNUM:						\
+        (OFFSET) = xtensa_current_frame_size;				\
+	break;								\
+      default:								\
+	gcc_unreachable ();						\
+      }									\
+  } while (0)
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   'crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue
+   should increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset from the argument pointer register to the first argument's
+   address.  On some machines it may depend on the data type of the
+   function.  If 'ARGS_GROW_DOWNWARD', this is the offset to the
+   location above the first argument's address.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Align stack frames on 128 bits for Xtensa.  This is necessary for
+   128-bit datatypes defined in TIE (e.g., for Vectra).  */
+#define STACK_BOUNDARY 128
+
+/* Use a fixed register window size of 8.  */
+#define WINDOW_SIZE 8
+
+/* Symbolic macros for the registers used to return integer, floating
+   point, and values of coprocessor and user-defined modes.  */
+#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_RETURN (GP_REG_FIRST + 2)
+
+/* Symbolic macros for the first/last argument registers.  */
+#define GP_ARG_FIRST (GP_REG_FIRST + 2)
+#define GP_ARG_LAST  (GP_REG_FIRST + 7)
+#define GP_OUTGOING_ARG_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_ARG_LAST  (GP_REG_FIRST + 7 + WINDOW_SIZE)
+
+#define MAX_ARGS_IN_REGISTERS 6
+
+/* Don't worry about compatibility with PCC.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* A C expression that is nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  This
+   does *not* include implicit arguments such as the static chain and
+   the structure-value address.  On many machines, no registers can be
+   used for this purpose since all function arguments are pushed on
+   the stack.  */
+#define FUNCTION_ARG_REGNO_P(N)						\
+  ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST)
+
+/* Record the number of argument words seen so far, along with a flag to
+   indicate whether these are incoming arguments.  (FUNCTION_INCOMING_ARG
+   is used for both incoming and outgoing args, so a separate flag is
+   needed.  */
+typedef struct xtensa_args
+{
+  int arg_words;
+  int incoming;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  init_cumulative_args (&CUM, 0)
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME)		\
+  init_cumulative_args (&CUM, 1)
+
+/* Profiling Xtensa code is typically done with the built-in profiling
+   feature of Tensilica's instruction set simulator, which does not
+   require any compiler support.  Profiling code on a real (i.e.,
+   non-simulated) Xtensa processor is currently only supported by
+   GNU/Linux with glibc.  The glibc version of _mcount doesn't require
+   counter variables.  The _mcount function needs the current PC and
+   the current return address to identify an arc in the call graph.
+   Pass the current return address as the first argument; the current
+   PC is available as a0 in _mcount's register window.  Both of these
+   values contain window size information in the two most significant
+   bits; we assume that _mcount will mask off those bits.  The call to
+   _mcount uses a window size of 8 to make sure that it doesn't clobber
+   any incoming argument values.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  do {									\
+    fprintf (FILE, "\t%s\ta10, a0\n", TARGET_DENSITY ? "mov.n" : "mov"); \
+    if (flag_pic)							\
+      {									\
+	fprintf (FILE, "\tmovi\ta8, _mcount@PLT\n");			\
+	fprintf (FILE, "\tcallx8\ta8\n");				\
+      }									\
+    else								\
+      fprintf (FILE, "\tcall8\t_mcount\n");				\
+  } while (0)
+
+/* Stack pointer value doesn't matter at exit.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Size in bytes of the trampoline, as an integer.  Make sure this is
+   a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings.  */
+#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
+
+/* Alignment required for trampolines, in bits.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/* If defined, a C expression that produces the machine-specific code
+   to setup the stack so that arbitrary frames can be accessed.
+
+   On Xtensa, a stack back-trace must always begin from the stack pointer,
+   so that the register overflow save area can be located.  However, the
+   stack-walking code in GCC always begins from the hard_frame_pointer
+   register, not the stack pointer.  The frame pointer is usually equal
+   to the stack pointer, but the __builtin_return_address and
+   __builtin_frame_address functions will not work if count > 0 and
+   they are called from a routine that uses alloca.  These functions
+   are not guaranteed to work at all if count > 0 so maybe that is OK.
+
+   A nicer solution would be to allow the architecture-specific files to
+   specify whether to start from the stack pointer or frame pointer.  That
+   would also allow us to skip the machine->accesses_prev_frame stuff that
+   we currently need to ensure that there is a frame pointer when these
+   builtin functions are used.  */
+
+#define SETUP_FRAME_ADDRESSES  xtensa_setup_frame_addresses
+
+/* A C expression whose value is RTL representing the address in a
+   stack frame where the pointer to the caller's frame is stored.
+   Assume that FRAMEADDR is an RTL expression for the address of the
+   stack frame itself.
+
+   For Xtensa, there is no easy way to get the frame pointer if it is
+   not equivalent to the stack pointer.  Moreover, the result of this
+   macro is used for continuing to walk back up the stack, so it must
+   return the stack pointer address.  Thus, there is some inconsistency
+   here in that __builtin_frame_address will return the frame pointer
+   when count == 0 and the stack pointer when count > 0.  */
+
+#define DYNAMIC_CHAIN_ADDRESS(frame)					\
+  gen_rtx_PLUS (Pmode, frame, GEN_INT (-3 * UNITS_PER_WORD))
+
+/* Define this if the return address of a particular stack frame is
+   accessed from the frame pointer of the previous stack frame.  */
+#define RETURN_ADDR_IN_PREVIOUS_FRAME
+
+/* A C expression whose value is RTL representing the value of the
+   return address for the frame COUNT steps up from the current
+   frame, after the prologue.  */
+#define RETURN_ADDR_RTX  xtensa_return_addr
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* C expressions which are nonzero if register number NUM is suitable
+   for use as a base or index register in operand addresses.  */
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+#define REGNO_OK_FOR_BASE_P(NUM) \
+  (GP_REG_P (NUM) || GP_REG_P ((unsigned) reg_renumber[NUM]))
+
+/* C expressions that are nonzero if X (assumed to be a `reg' RTX) is
+   valid for use as a base or index register.  */
+
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_FLAG 1
+#else
+#define REG_OK_STRICT_FLAG 0
+#endif
+
+#define BASE_REG_P(X, STRICT)						\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)			\
+   || REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+#define REG_OK_FOR_INDEX_P(X) 0
+#define REG_OK_FOR_BASE_P(X) BASE_REG_P (X, REG_OK_STRICT_FLAG)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a
+   valid address.  This is defined to be the same as 'CONSTANT_P (X)',
+   but rejecting CONST_DOUBLE.  */
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent
+   code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+  ((GET_CODE (X) != SYMBOL_REF						\
+    || (SYMBOL_REF_LOCAL_P (X) && !SYMBOL_REF_EXTERNAL_P (X)))		\
+   && GET_CODE (X) != LABEL_REF						\
+   && GET_CODE (X) != CONST)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (SImode)
+
+/* Define this as 1 if 'char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+#define MAX_MOVE_MAX 4
+
+/* Prefer word-sized loads.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Shift instructions ignore all but the low-order few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = -1, 1)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction is a word address (for
+   indexing purposes) so give the MEM rtx a words's mode.  */
+#define FUNCTION_MODE SImode
+
+#define BRANCH_COST(speed_p, predictable_p) 3
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES							\
+{									\
+  "a0",   "sp",   "a2",   "a3",   "a4",   "a5",   "a6",   "a7",		\
+  "a8",   "a9",   "a10",  "a11",  "a12",  "a13",  "a14",  "a15",	\
+  "fp",   "argp", "b0",							\
+  "f0",   "f1",   "f2",   "f3",   "f4",   "f5",   "f6",   "f7",		\
+  "f8",   "f9",   "f10",  "f11",  "f12",  "f13",  "f14",  "f15",	\
+  "acc"									\
+}
+
+/* If defined, a C initializer for an array of structures containing a
+   name and a register number.  This macro defines additional names
+   for hard registers, thus allowing the 'asm' option in declarations
+   to refer to registers using alternate names.  */
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "a1",	 1 + GP_REG_FIRST }					\
+}
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Declare an uninitialized external linkage data object.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "%s%sL%u\n", integer_asm_op (4, TRUE),		\
+	   LOCAL_LABEL_PREFIX, VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   This is used for pc-relative code.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do {									\
+    fprintf (STREAM, "%s%sL%u-%sL%u\n",	integer_asm_op (4, TRUE),	\
+	     LOCAL_LABEL_PREFIX, (VALUE),				\
+	     LOCAL_LABEL_PREFIX, (REL));				\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)					\
+  do {									\
+    if ((LOG) != 0)							\
+      fprintf (STREAM, "\t.align\t%d\n", 1 << (LOG));			\
+  } while (0)
+
+/* Indicate that jump tables go in the text section.  This is
+   necessary when compiling PIC code.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+
+
+/* Define output to appear before the constant pool.  */
+#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE)          \
+  do {									\
+    if ((SIZE) > 0)							\
+      {									\
+	resolve_unique_section ((FUNDECL), 0, flag_function_sections);	\
+	switch_to_section (function_section (FUNDECL));			\
+	fprintf (FILE, "\t.literal_position\n");			\
+      }									\
+  } while (0)
+
+
+/* A C statement (with or without semicolon) to output a constant in
+   the constant pool, if it needs special treatment.  */
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, JUMPTO) \
+  do {									\
+    xtensa_output_literal (FILE, X, MODE, LABELNO);			\
+    goto JUMPTO;							\
+  } while (0)
+
+/* How to start an assembler comment.  */
+#define ASM_COMMENT_START "#"
+
+/* Exception handling.  Xtensa uses much of the standard DWARF2 unwinding
+   machinery, but the variable size register window save areas are too
+   complicated to efficiently describe with CFI entries.  The CFA must
+   still be specified in DWARF so that DW_AT_frame_base is set correctly
+   for debugging.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 0)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (0)
+#define DWARF_FRAME_REGISTERS 16
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 2 : INVALID_REGNUM)
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			\
+  (flag_pic								\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0)				\
+      | DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+   : DW_EH_PE_absptr)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)			\
+  do {									\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);				\
+    assemble_name (FILE, LABEL);					\
+    fputs ("@pcrel", FILE);						\
+  } while (0)
+
+/* Xtensa constant pool breaks the devices in crtstuff.c to control
+   section in where code resides.  We have to write it as asm code.  Use
+   a MOVI and let the assembler relax it -- for the .init and .fini
+   sections, the assembler knows to put the literal in the right
+   place.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\
+	movi\ta8, " USER_LABEL_PREFIX #FUNC "\n\
+	callx8\ta8\n" \
+	TEXT_SECTION_ASM_OP);
diff --git a/gcc-4.9/gcc/config/xtensa/xtensa.md b/gcc-4.9/gcc/config/xtensa/xtensa.md
new file mode 100644
index 000000000..dddc6ab79
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/xtensa.md
@@ -0,0 +1,1913 @@
+;; GCC machine description for Tensilica's Xtensa architecture.
+;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
+;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constants [
+  (A0_REG		0)
+  (A1_REG		1)
+  (A7_REG		7)
+  (A8_REG		8)
+
+  (UNSPEC_NOP		2)
+  (UNSPEC_PLT		3)
+  (UNSPEC_RET_ADDR	4)
+  (UNSPEC_TPOFF		5)
+  (UNSPEC_DTPOFF	6)
+  (UNSPEC_TLS_FUNC	7)
+  (UNSPEC_TLS_ARG	8)
+  (UNSPEC_TLS_CALL	9)
+  (UNSPEC_TP		10)
+  (UNSPEC_MEMW		11)
+
+  (UNSPECV_SET_FP	1)
+  (UNSPECV_ENTRY	2)
+  (UNSPECV_S32RI	4)
+  (UNSPECV_S32C1I	5)
+  (UNSPECV_EH_RETURN	6)
+  (UNSPECV_SET_TP	7)
+])
+
+;; This code iterator allows signed and unsigned widening multiplications
+;; to use the same template.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; <u> expands to an empty string when doing a signed operation and
+;; "u" when doing an unsigned operation.
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+
+;; <su> is like <u>, but the signed form expands to "s" rather than "".
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; This code iterator allows four integer min/max operations to be
+;; generated from one template.
+(define_code_iterator any_minmax [smin umin smax umax])
+
+;; <minmax> expands to the opcode name for any_minmax operations.
+(define_code_attr minmax [(smin "min") (umin "minu")
+			  (smax "max") (umax "maxu")])
+
+;; This code iterator is for floating-point comparisons.
+(define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered])
+(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") 
+			  (uneq "ueq") (unlt "ult") (unle "ule")
+			  (unordered "un")])
+
+;; This iterator and attribute allow to combine most atomic operations.
+(define_code_iterator ATOMIC [and ior xor plus minus mult])
+(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") 
+			  (plus "add") (minus "sub") (mult "nand")])
+
+;; This mode iterator allows the HI and QI patterns to be defined from
+;; the same template.
+(define_mode_iterator HQI [HI QI])
+
+
+;; Attributes.
+
+(define_attr "type"
+  "unknown,jump,call,load,store,move,arith,multi,nop,farith,fmadd,fdiv,fsqrt,fconv,fload,fstore,mul16,mul32,div32,mac16,rsr,wsr,entry"
+  (const_string "unknown"))
+
+(define_attr "mode"
+  "unknown,none,QI,HI,SI,DI,SF,DF,BL"
+  (const_string "unknown"))
+
+(define_attr "length" "" (const_int 1))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+
+;; Pipeline model.
+
+;; The Xtensa basically has simple 5-stage RISC pipeline.
+;; Most instructions complete in 1 cycle, and it is OK to assume that
+;; everything is fully pipelined.  The exceptions have special insn
+;; reservations in the pipeline description below.  The Xtensa can
+;; issue one instruction per cycle, so defining CPU units is unnecessary.
+
+(define_insn_reservation "xtensa_any_insn" 1
+			 (eq_attr "type" "!load,fload,rsr,mul16,mul32,fmadd,fconv")
+			 "nothing")
+
+(define_insn_reservation "xtensa_memory" 2
+			 (eq_attr "type" "load,fload")
+			 "nothing")
+
+(define_insn_reservation "xtensa_sreg" 2
+			 (eq_attr "type" "rsr")
+			 "nothing")
+
+(define_insn_reservation "xtensa_mul16" 2
+			 (eq_attr "type" "mul16")
+			 "nothing")
+
+(define_insn_reservation "xtensa_mul32" 2
+			 (eq_attr "type" "mul32")
+			 "nothing")
+
+(define_insn_reservation "xtensa_fmadd" 4
+			 (eq_attr "type" "fmadd")
+			 "nothing")
+
+(define_insn_reservation "xtensa_fconv" 2
+			 (eq_attr "type" "fconv")
+			 "nothing")
+
+;; Include predicates and constraints.
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Addition.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
+	(plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
+		 (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
+  ""
+  "@
+   add.n\t%0, %1, %2
+   addi.n\t%0, %1, %d2
+   add\t%0, %1, %2
+   addi\t%0, %1, %d2
+   addmi\t%0, %1, %x2"
+  [(set_attr "type"	"arith,arith,arith,arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"2,2,3,3,3")])
+
+(define_insn "*addx"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 3 "addsubx_operand" "i"))
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_ADDX"
+  "addx%3\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "add.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Subtraction.
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+        (minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "*subx"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 3 "addsubx_operand" "i"))
+		  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_ADDX"
+  "subx%3\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "sub.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Multiplication.
+
+(define_expand "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "TARGET_MUL32_HIGH"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+  emit_insn (gen_<u>mulsi3_highpart (gen_highpart (SImode, operands[0]),
+				     operands[1], operands[2]));
+  emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp));
+  DONE;
+})
+
+(define_insn "<u>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))]
+  "TARGET_MUL32_HIGH"
+  "mul<su>h\t%0, %1, %2"
+  [(set_attr "type"	"mul32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mult:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_MUL32"
+  "mull\t%0, %1, %2"
+  [(set_attr "type"	"mul32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=C,A")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "%r,r"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  "TARGET_MUL16 || TARGET_MAC16"
+  "@
+   mul16s\t%0, %1, %2
+   mul.aa.ll\t%1, %2"
+  [(set_attr "type"	"mul16,mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=C,A")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "%r,r"))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  "TARGET_MUL16 || TARGET_MAC16"
+  "@
+   mul16u\t%0, %1, %2
+   umul.aa.ll\t%1, %2"
+  [(set_attr "type"	"mul16,mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "muladdhisi"
+  [(set (match_operand:SI 0 "register_operand" "=A")
+	(plus:SI (mult:SI (sign_extend:SI
+			   (match_operand:HI 1 "register_operand" "%r"))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "register_operand" "r")))
+		 (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_MAC16"
+  "mula.aa.ll\t%1, %2"
+  [(set_attr "type"	"mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsubhisi"
+  [(set (match_operand:SI 0 "register_operand" "=A")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (mult:SI (sign_extend:SI
+			    (match_operand:HI 2 "register_operand" "%r"))
+			   (sign_extend:SI
+			    (match_operand:HI 3 "register_operand" "r")))))]
+  "TARGET_MAC16"
+  "muls.aa.ll\t%2, %3"
+  [(set_attr "type"	"mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "mul.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_HARD_FLOAT"
+  "madd.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+;; Note that (C - A*B) = (-A*B + C)
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_HARD_FLOAT"
+  "msub.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Division.
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "quos\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(udiv:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "quou\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_DIV"
+  "div.s\t%0, %1, %2"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*recipsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_RECIP && flag_unsafe_math_optimizations"
+  "recip.s\t%0, %2"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Remainders.
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mod:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "rems\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(umod:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "remu\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Square roots.
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_SQRT"
+  "sqrt.s\t%0, %1"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT_RSQRT && flag_unsafe_math_optimizations"
+  "rsqrt.s\t%0, %2"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Absolute value.
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_ABS"
+  "abs\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "abs.s\t%0, %1"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Min and max.
+
+(define_insn "<code>si3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+        (any_minmax:SI (match_operand:SI 1 "register_operand" "%r")
+		       (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_MINMAX"
+  "<minmax>\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Count leading/trailing zeros and find first bit.
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_NSA"
+  "nsau\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "ctzsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ctz:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NSA"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_negsi2 (temp, operands[1]));
+  emit_insn (gen_andsi3 (temp, temp, operands[1]));
+  emit_insn (gen_clzsi2 (temp, temp));
+  emit_insn (gen_negsi2 (temp, temp));
+  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31)));
+  DONE;
+})
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ffs:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NSA"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_negsi2 (temp, operands[1]));
+  emit_insn (gen_andsi3 (temp, temp, operands[1]));
+  emit_insn (gen_clzsi2 (temp, temp));
+  emit_insn (gen_negsi2 (temp, temp));
+  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32)));
+  DONE;
+})
+
+
+;; Negation and one's complement.
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (temp, constm1_rtx));
+  emit_insn (gen_xorsi3 (operands[0], temp, operands[1]));
+  DONE;
+})
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "neg.s\t%0, %1"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Logical instructions.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(and:SI (match_operand:SI 1 "register_operand" "%r,r")
+		(match_operand:SI 2 "mask_operand" "P,r")))]
+  ""
+  "@
+   extui\t%0, %1, 0, %K2
+   and\t%0, %1, %2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "or\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Zero-extend instructions.
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
+  ""
+  "@
+   extui\t%0, %1, 0, 16
+   l16ui\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
+  ""
+  "@
+   extui\t%0, %1, 0, 8
+   l8ui\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+
+;; Sign-extend instructions.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  if (sext_operand (operands[1], HImode))
+    emit_insn (gen_extendhisi2_internal (operands[0], operands[1]));
+  else
+    xtensa_extend_reg (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "extendhisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=B,a")
+	(sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+  ""
+  "@
+   sext\t%0, %1, 15
+   l16si\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  if (TARGET_SEXT)
+    emit_insn (gen_extendqisi2_internal (operands[0], operands[1]));
+  else
+    xtensa_extend_reg (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "extendqisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=B")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_SEXT"
+  "sext\t%0, %1, 7"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Field extract instructions.
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_SEXT"
+{
+  if (!sext_fldsz_operand (operands[2], SImode))
+    FAIL;
+
+  /* We could expand to a right shift followed by SEXT but that's
+     no better than the standard left and right shift sequence.  */
+  if (!lsbitnum_operand (operands[3], SImode))
+    FAIL;
+
+  emit_insn (gen_extv_internal (operands[0], operands[1],
+				operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "extv_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "sext_fldsz_operand" "i")
+			 (match_operand:SI 3 "lsbitnum_operand" "i")))]
+  "TARGET_SEXT"
+{
+  int fldsz = INTVAL (operands[2]);
+  operands[2] = GEN_INT (fldsz - 1);
+  return "sext\t%0, %1, %2";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+{
+  if (!extui_fldsz_operand (operands[2], SImode))
+    FAIL;
+  emit_insn (gen_extzv_internal (operands[0], operands[1],
+				 operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "extzv_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "extui_fldsz_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  ""
+{
+  int shift;
+  if (BITS_BIG_ENDIAN)
+    shift = (32 - (INTVAL (operands[2]) + INTVAL (operands[3]))) & 0x1f;
+  else
+    shift = INTVAL (operands[3]) & 0x1f;
+  operands[3] = GEN_INT (shift);
+  return "extui\t%0, %1, %3, %2";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Conversions.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "trunc.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "utrunc.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "a")))]
+  "TARGET_HARD_FLOAT"
+  "float.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:SI 1 "register_operand" "a")))]
+  "TARGET_HARD_FLOAT"
+  "ufloat.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Data movement instructions.
+
+;; 64-bit Integer moves
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmed_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+    operands[1] = force_const_mem (DImode, operands[1]);
+
+  if (!register_operand (operands[0], DImode)
+      && !register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdi_internal"
+  [(set (match_operand:DI 0 "nonimmed_operand" "=a,W,a,a,U")
+	(match_operand:DI 1 "move_operand" "r,i,T,U,r"))]
+  "register_operand (operands[0], DImode)
+   || register_operand (operands[1], DImode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  xtensa_split_operand_pair (operands, SImode);
+  if (reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+})
+
+;; 32-bit Integer moves
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmed_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, SImode))
+    DONE;
+})
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,W,a,a,U,*a,*A")
+	(match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,i,T,U,r,*A,*r"))]
+  "xtensa_valid_move (SImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov.n\t%0, %1
+   %v1l32i.n\t%0, %1
+   %v0s32i.n\t%1, %0
+   %v0s32i.n\t%1, %0
+   mov\t%0, %1
+   movsp\t%0, %1
+   movi\t%0, %x1
+   const16\t%0, %t1\;const16\t%0, %b1
+   %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,load,load,store,rsr,wsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"2,2,2,2,2,2,3,3,3,6,3,3,3,3,3")])
+
+;; 16-bit Integer moves
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmed_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, HImode))
+    DONE;
+})
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+	(match_operand:HI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  "xtensa_valid_move (HImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov\t%0, %1
+   movi\t%0, %x1
+   %v1l16ui\t%0, %1
+   %v0s16i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+
+;; 8-bit Integer moves
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmed_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, QImode))
+    DONE;
+})
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+	(match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  "xtensa_valid_move (QImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov\t%0, %1
+   movi\t%0, %x1
+   %v1l8ui\t%0, %1
+   %v0s8i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
+   (set_attr "mode"	"QI")
+   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+
+;; Sub-word reloads from the constant pool.
+
+(define_expand "reload<mode>_literal"
+  [(parallel [(match_operand:HQI 0 "register_operand" "=r")
+	      (match_operand:HQI 1 "constantpool_operand" "")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  ""
+{
+  rtx lit, scratch;
+  unsigned word_off, byte_off;
+
+  if (MEM_P (operands[1]))
+    {
+      lit = operands[1];
+      word_off = 0;
+      byte_off = 0;
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[1]) == SUBREG);
+      lit = SUBREG_REG (operands[1]);
+      word_off = SUBREG_BYTE (operands[1]) & ~(UNITS_PER_WORD - 1);
+      byte_off = SUBREG_BYTE (operands[1]) - word_off;
+    }
+
+  lit = adjust_address (lit, SImode, word_off);
+  scratch = operands[2];
+  emit_insn (gen_movsi (scratch, lit));
+  emit_insn (gen_mov<mode> (operands[0],
+			    gen_rtx_SUBREG (<MODE>mode, scratch, byte_off)));
+
+  DONE;
+})
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmed_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  if (!TARGET_CONST16 && CONSTANT_P (operands[1]))
+    operands[1] = force_const_mem (SFmode, operands[1]);
+
+  if ((!register_operand (operands[0], SFmode)
+       && !register_operand (operands[1], SFmode))
+      || (FP_REG_P (xt_true_regnum (operands[0]))
+	  && !(reload_in_progress | reload_completed)
+	  && (constantpool_mem_p (operands[1])
+	      || CONSTANT_P (operands[1]))))
+    operands[1] = force_reg (SFmode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "movsf_internal"
+  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,W,a,a,U")
+	(match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,iF,T,U,r"))]
+  "((register_operand (operands[0], SFmode)
+     || register_operand (operands[1], SFmode))
+    && !(FP_REG_P (xt_true_regnum (operands[0]))
+         && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
+  "@
+   mov.s\t%0, %1
+   %v1lsi\t%0, %1
+   %v0ssi\t%1, %0
+   mov.n\t%0, %1
+   %v1l32i.n\t%0, %1
+   %v0s32i.n\t%1, %0
+   mov\t%0, %1
+   wfr\t%0, %1
+   rfr\t%0, %1
+   const16\t%0, %t1\;const16\t%0, %b1
+   %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0"
+  [(set_attr "type"	"farith,fload,fstore,move,load,store,move,farith,farith,move,load,load,store")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,2,2,2,3,3,3,6,3,3,3")])
+
+(define_insn "*lsiu"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mem:SF (plus:SI (match_operand:SI 1 "register_operand" "+a")
+			 (match_operand:SI 2 "fpmem_offset_operand" "i"))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT"
+{
+  if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+    output_asm_insn ("memw", operands);
+  return "lsiu\t%0, %1, %2";
+}
+  [(set_attr "type"	"fload")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*ssiu"
+  [(set (mem:SF (plus:SI (match_operand:SI 0 "register_operand" "+a")
+			 (match_operand:SI 1 "fpmem_offset_operand" "i")))
+	(match_operand:SF 2 "register_operand" "f"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  "TARGET_HARD_FLOAT"
+{
+  if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+    output_asm_insn ("memw", operands);
+  return "ssiu\t%2, %0, %1";
+}
+  [(set_attr "type"	"fstore")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+;; 64-bit floating point moves
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmed_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+    operands[1] = force_const_mem (DFmode, operands[1]);
+
+  if (!register_operand (operands[0], DFmode)
+      && !register_operand (operands[1], DFmode))
+    operands[1] = force_reg (DFmode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdf_internal"
+  [(set (match_operand:DF 0 "nonimmed_operand" "=a,W,a,a,U")
+	(match_operand:DF 1 "move_operand" "r,iF,T,U,r"))]
+  "register_operand (operands[0], DFmode)
+   || register_operand (operands[1], DFmode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  xtensa_split_operand_pair (operands, SFmode);
+  if (reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+})
+
+;; Block moves
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  ""
+{
+  if (!xtensa_expand_block_move (operands))
+    FAIL;
+  DONE;
+})
+
+
+;; Shift instructions.
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "arith_operand" "")))]
+  ""
+{
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "ashlsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+		   (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""      
+  "@
+   slli\t%0, %1, %R2
+   ssl\t%2\;sll\t%0, %1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   srai\t%0, %1, %R2
+   ssr\t%2\;sra\t%0, %1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+{
+  if (which_alternative == 0)
+    {
+      if ((INTVAL (operands[2]) & 0x1f) < 16)
+        return "srli\t%0, %1, %R2";
+      else
+      	return "extui\t%0, %1, %R2, %L2";
+    }
+  return "ssr\t%2\;srl\t%0, %1";
+}
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   ssai\t%L2\;src\t%0, %1, %1
+   ssl\t%2\;src\t%0, %1, %1"
+  [(set_attr "type"	"multi,multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6,6")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   ssai\t%R2\;src\t%0, %1, %1
+   ssr\t%2\;src\t%0, %1, %1"
+  [(set_attr "type"	"multi,multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6,6")])
+
+
+;; Comparisons.
+
+;; Conditional branches.
+
+(define_expand "cbranchsi4"
+  [(match_operator 0 "comparison_operator"
+    [(match_operand:SI 1 "register_operand")
+     (match_operand:SI 2 "nonmemory_operand")])
+   (match_operand 3 "")]
+  ""
+{
+  xtensa_expand_conditional_branch (operands, SImode);
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(match_operator 0 "comparison_operator"
+    [(match_operand:SF 1 "register_operand")
+     (match_operand:SF 2 "register_operand")])
+   (match_operand 3 "")]
+  "TARGET_HARD_FLOAT"
+{
+  xtensa_expand_conditional_branch (operands, SFmode);
+  DONE;
+})
+
+;; Branch patterns for standard integer comparisons
+
+(define_insn "*btrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "branch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "branch_operand" "K,r")])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*bfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "branch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "branch_operand" "K,r")])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*ubtrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "ubranch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "ubranch_operand" "L,r")])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*ubfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "ubranch_operator"
+			 [(match_operand:SI 0 "register_operand" "r,r")
+			  (match_operand:SI 1 "ubranch_operand" "L,r")])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+;; Branch patterns for bit testing
+
+(define_insn "*bittrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+			[(zero_extract:SI
+			    (match_operand:SI 0 "register_operand" "r,r")
+			    (const_int 1)
+			    (match_operand:SI 1 "arith_operand" "J,r"))
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_bit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*bitfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+			[(zero_extract:SI
+			    (match_operand:SI 0 "register_operand" "r,r")
+			    (const_int 1)
+			    (match_operand:SI 1 "arith_operand" "J,r"))
+			 (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_bit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*masktrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+		 [(and:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "register_operand" "r"))
+		  (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:		return "bnone\t%0, %1, %2";
+    case NE:		return "bany\t%0, %1, %2";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*maskfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+		 [(and:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "register_operand" "r"))
+		  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:		return "bany\t%0, %1, %2";
+    case NE:		return "bnone\t%0, %1, %2";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Define the loop insns used by bct optimization to represent the
+;; start and end of a zero-overhead loop (in loop.c).  This start
+;; template generates the loop insn; the end template doesn't generate
+;; any instructions since loop end is handled in hardware.
+
+(define_insn "zero_cost_loop_start"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "a")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (reg:SI 19)
+	(plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "loopnez\t%0, %l1"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "zero_cost_loop_end"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 19) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (reg:SI 19)
+	(plus:SI (reg:SI 19) (const_int -1)))]
+  ""
+{
+    xtensa_emit_loop_end (insn, operands);
+    return "";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+
+;; Setting a register from a comparison.
+
+(define_expand "cstoresi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operator 1 "xtensa_cstoresi_operator"
+    [(match_operand:SI 2 "register_operand")
+     (match_operand:SI 3 "nonmemory_operand")])]
+  ""
+{
+  if (!xtensa_expand_scc (operands, SImode))
+    FAIL;
+  DONE;
+})
+
+(define_expand "cstoresf4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operator:SI 1 "comparison_operator"
+    [(match_operand:SF 2 "register_operand")
+     (match_operand:SF 3 "register_operand")])]
+  "TARGET_HARD_FLOAT"
+{
+  if (!xtensa_expand_scc (operands, SFmode))
+    FAIL;
+  DONE;
+})
+
+
+
+;; Conditional moves.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  ""
+{
+  if (!xtensa_expand_conditional_move (operands, 0))
+    FAIL;
+  DONE;
+})
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+			 (match_operand:SF 2 "register_operand" "")
+			 (match_operand:SF 3 "register_operand" "")))]
+  ""
+{
+  if (!xtensa_expand_conditional_move (operands, 1))
+    FAIL;
+  DONE;
+})
+
+(define_insn "movsicc_internal0"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(if_then_else:SI (match_operator 4 "branch_operator"
+			   [(match_operand:SI 1 "register_operand" "r,r")
+			    (const_int 0)])
+			 (match_operand:SI 2 "register_operand" "r,0")
+			 (match_operand:SI 3 "register_operand" "0,r")))]
+  ""
+{
+  return xtensa_emit_movcc (which_alternative == 1, false, false, operands);
+}
+  [(set_attr "type"	"move,move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "movsicc_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(if_then_else:SI (match_operator 4 "boolean_operator"
+			   [(match_operand:CC 1 "register_operand" "b,b")
+			    (const_int 0)])
+			 (match_operand:SI 2 "register_operand" "r,0")
+			 (match_operand:SI 3 "register_operand" "0,r")))]
+  "TARGET_BOOLEANS"
+{
+  return xtensa_emit_movcc (which_alternative == 1, false, true, operands);
+}
+  [(set_attr "type"	"move,move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "movsfcc_internal0"
+  [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+	(if_then_else:SF (match_operator 4 "branch_operator"
+			   [(match_operand:SI 1 "register_operand" "r,r,r,r")
+			    (const_int 0)])
+			 (match_operand:SF 2 "register_operand" "r,0,f,0")
+			 (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+  ""
+{
+  return xtensa_emit_movcc ((which_alternative & 1) == 1,
+			    which_alternative >= 2, false, operands);
+}
+  [(set_attr "type"	"move,move,move,move")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,3")])
+
+(define_insn "movsfcc_internal1"
+  [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+	(if_then_else:SF (match_operator 4 "boolean_operator"
+			   [(match_operand:CC 1 "register_operand" "b,b,b,b")
+			    (const_int 0)])
+			 (match_operand:SF 2 "register_operand" "r,0,f,0")
+			 (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+  "TARGET_BOOLEANS"
+{
+  return xtensa_emit_movcc ((which_alternative & 1) == 1,
+			    which_alternative >= 2, true, operands);
+}
+  [(set_attr "type"	"move,move,move,move")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,3")])
+
+
+;; Floating-point comparisons.
+
+(define_insn "s<code>_sf"
+  [(set (match_operand:CC 0 "register_operand" "=b")
+	(any_scc_sf:CC (match_operand:SF 1 "register_operand" "f")
+		       (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "<scc_sf>.s\t%0, %1, %2"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"BL")
+   (set_attr "length"	"3")])
+
+
+;; Unconditional branches.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "j\t%l0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx dest = operands[0];
+  if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+    operands[0] = copy_to_mode_reg (Pmode, dest);
+
+  emit_jump_insn (gen_indirect_jump_internal (dest));
+  DONE;
+})
+
+(define_insn "indirect_jump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jx\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+(define_expand "tablejump"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+   ""
+{
+  rtx target = operands[0];
+  if (flag_pic)
+    {
+      /* For PIC, the table entry is relative to the start of the table.  */
+      rtx label = gen_reg_rtx (SImode);
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1]));
+      emit_insn (gen_addsi3 (target, operands[0], label));
+    }
+  emit_jump_insn (gen_tablejump_internal (target, operands[1]));
+  DONE;
+})
+
+(define_insn "tablejump_internal"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jx\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Function calls.
+
+(define_expand "sym_PLT"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PLT))]
+  ""
+  "")
+
+(define_expand "call"
+  [(call (match_operand 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  ""
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_internal"
+  [(call (mem (match_operand:SI 0 "call_insn_operand" "nir"))
+	 (match_operand 1 "" "i"))]
+  ""
+{
+  return xtensa_emit_call (0, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "call_value"
+  [(set (match_operand 0 "register_operand" "")
+	(call (match_operand 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  ""
+{
+  rtx addr = XEXP (operands[1], 0);
+  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=a")
+        (call (mem (match_operand:SI 1 "call_insn_operand" "nir"))
+              (match_operand 2 "" "i")))]
+  ""
+{
+  return xtensa_emit_call (1, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "entry"
+  [(set (reg:SI A1_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")]
+			    UNSPECV_ENTRY))]
+  ""
+  "entry\tsp, %0"
+  [(set_attr "type"	"entry")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "return"
+  [(return)
+   (use (reg:SI A0_REG))]
+  "reload_completed"
+{
+  return (TARGET_DENSITY ? "retw.n" : "retw");
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"2")])
+
+
+;; Miscellaneous instructions.
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  xtensa_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  emit_jump_insn (gen_return ());
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  return (TARGET_DENSITY ? "nop.n" : "nop");
+}
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "nonlocal_goto"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (match_operand:SI 3 "" "")]
+  ""
+{
+  xtensa_expand_nonlocal_goto (operands);
+  DONE;
+})
+
+;; Stuff an address into the return address register along with the window
+;; size in the high bits.  Because we don't have the window size of the
+;; previous frame, assume the function called out with a CALL8 since that
+;; is what compilers always use.  Note: __builtin_frob_return_addr has
+;; already been applied to the handler, but the generic version doesn't
+;; allow us to frob it quite enough, so we just frob here.
+
+(define_insn_and_split "eh_return"
+  [(set (reg:SI A0_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+			    UNSPECV_EH_RETURN))
+   (clobber (match_scratch:SI 1 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (ashift:SI (match_dup 0) (const_int 2)))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
+   (set (reg:SI A0_REG) (rotatert:SI (match_dup 1) (const_int 2)))]
+  "")
+
+;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't
+;; know if a frame pointer is required until the reload pass, and
+;; because there may be an incoming argument value in the hard frame
+;; pointer register (a7).  If there is an incoming argument in that
+;; register, the "set_frame_ptr" insn gets inserted immediately after
+;; the insn that copies the incoming argument to a pseudo or to the
+;; stack.  This serves several purposes here: (1) it keeps the
+;; optimizer from copy-propagating or scheduling the use of a7 as an
+;; incoming argument away from the beginning of the function; (2) we
+;; can use a post-reload splitter to expand away the insn if a frame
+;; pointer is not required, so that the post-reload scheduler can do
+;; the right thing; and (3) it makes it easy for the prologue expander
+;; to search for this insn to determine whether it should add a new insn
+;; to set up the frame pointer.
+
+(define_insn "set_frame_ptr"
+  [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+  ""
+{
+  if (frame_pointer_needed)
+    return "mov\ta7, sp";
+  return "";
+}
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+;; Post-reload splitter to remove fp assignment when it's not needed.
+(define_split
+  [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+  "reload_completed && !frame_pointer_needed"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  "")
+
+;; The preceding splitter needs something to split the insn into;
+;; things start breaking if the result is just a "use" so instead we
+;; generate the following insn.
+(define_insn "*unspec_nop"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  ""
+  ""
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+
+;; TLS support
+
+(define_expand "sym_TPOFF"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_TPOFF))]
+  ""
+  "")
+
+(define_expand "sym_DTPOFF"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_DTPOFF))]
+  ""
+  "")
+
+(define_insn "get_thread_pointersi"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_THREADPTR"
+  "rur\t%0, THREADPTR"
+  [(set_attr "type"	"rsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "set_thread_pointersi"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+		    UNSPECV_SET_TP)]
+  "TARGET_THREADPTR"
+  "wur\t%0, THREADPTR"
+  [(set_attr "type"	"wsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_func"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+		   UNSPEC_TLS_FUNC))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "movi\t%0, %1@TLSFUNC"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_arg"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+		   UNSPEC_TLS_ARG))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "movi\t%0, %1@TLSARG"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_call"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "tls_symbol_operand" "")]
+				  UNSPEC_TLS_CALL))
+	      (match_operand 3 "" "i")))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "callx8.tls %1, %2@TLSCALL"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Instructions for the Xtensa "boolean" option.
+
+(define_insn "*booltrue"
+  [(set (pc)
+	(if_then_else (match_operator 2 "boolean_operator"
+			 [(match_operand:CC 0 "register_operand" "b")
+			  (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_BOOLEANS"
+{
+  if (GET_CODE (operands[2]) == EQ)
+    return "bf\t%0, %1";
+  else
+    return "bt\t%0, %1";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*boolfalse"
+  [(set (pc)
+	(if_then_else (match_operator 2 "boolean_operator"
+			 [(match_operand:CC 0 "register_operand" "b")
+			  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  "TARGET_BOOLEANS"
+{
+  if (GET_CODE (operands[2]) == EQ)
+    return "bt\t%0, %1";
+  else
+    return "bf\t%0, %1";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Atomic operations
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+  ""
+  "memw"
+  [(set_attr "type"	"unknown")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+;; sync_lock_release is only implemented for SImode.
+;; For other modes, just use the default of a store with a memory_barrier.
+(define_insn "sync_lock_releasesi"
+  [(set (match_operand:SI 0 "mem_operand" "=U")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "register_operand" "r")]
+	  UNSPECV_S32RI))]
+  "TARGET_RELEASE_SYNC"
+  "s32ri\t%1, %0"
+  [(set_attr "type"	"store")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "sync_compare_and_swapsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=a")
+	  (match_operand:SI 1 "mem_operand" "+U"))
+     (set (match_dup 1)
+	  (unspec_volatile:SI
+	    [(match_dup 1)
+	     (match_operand:SI 2 "register_operand" "r")
+	     (match_operand:SI 3 "register_operand" "0")]
+	    UNSPECV_S32C1I))])]
+  "TARGET_S32C1I"
+  "wsr\t%2, SCOMPARE1\;s32c1i\t%3, %1"
+  [(set_attr "type"	"multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+    [(set (match_operand:HQI 0 "register_operand" "")
+	  (match_operand:HQI 1 "mem_operand" ""))
+     (set (match_dup 1)
+	  (unspec_volatile:HQI
+	    [(match_dup 1)
+	     (match_operand:HQI 2 "register_operand" "")
+	     (match_operand:HQI 3 "register_operand" "")]
+	    UNSPECV_S32C1I))])]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_compare_and_swap (operands[0], operands[1],
+				  operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:HQI 0 "register_operand")
+   (match_operand:HQI 1 "memory_operand")
+   (match_operand:HQI 2 "register_operand")]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (SET, operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_expand "sync_<atomic><mode>"
+  [(set (match_operand:HQI 0 "memory_operand")
+	(ATOMIC:HQI (match_dup 0)
+		    (match_operand:HQI 1 "register_operand")))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, NULL_RTX, operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "sync_old_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(match_operand:HQI 1 "memory_operand"))
+   (set (match_dup 1)
+	(ATOMIC:HQI (match_dup 1)
+		    (match_operand:HQI 2 "register_operand")))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_expand "sync_new_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
+		    (match_operand:HQI 2 "register_operand"))) 
+   (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], true);
+  DONE;
+})
diff --git a/gcc-4.9/gcc/config/xtensa/xtensa.opt b/gcc-4.9/gcc/config/xtensa/xtensa.opt
new file mode 100644
index 000000000..be5c44a0b
--- /dev/null
+++ b/gcc-4.9/gcc/config/xtensa/xtensa.opt
@@ -0,0 +1,43 @@
+; Options for the Tensilica Xtensa port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mconst16
+Target Report Mask(CONST16)
+Use CONST16 instruction to load constants
+
+mforce-no-pic
+Target Report Mask(FORCE_NO_PIC)
+Disable position-independent code (PIC) for use in OS kernel code
+
+mlongcalls
+Target
+Use indirect CALLXn instructions for large programs
+
+mtarget-align
+Target
+Automatically align branch targets to reduce branch penalties
+
+mtext-section-literals
+Target
+Intersperse literal pools with code in the text section
+
+mserialize-volatile
+Target Report Mask(SERIALIZE_VOLATILE)
+-mno-serialize-volatile	Do not serialize volatile memory references with MEMW instructions
-- 
cgit v1.2.3